diff --git a/README.md b/README.md
index 682d5961226399a3e1b7c343ca88298b89219a92..4a9096f842541985d121b87f165913f6f63a892e 100644
--- a/README.md
+++ b/README.md
@@ -6,21 +6,28 @@ The Xenomai Cobalt real-time core depends on a patch to the mainline Linux kerne
 
  内核版本：openeuler kernel-4.19.90-2012.4.0.0053   
  xenomai版本：xenomai-3.1  
- patch名称:  
- 1.ipipe-core-4.19.55-oe1.patch   
- 2.enable_irq.patch   
- 3.cobalt-core-3.1-4.19.90.patch  
- 4.cobalt-core-3.1-4.19.90-oe1.patch  
+ arm64 patch名称:  
+ 1.ipipe-core-4.19.55-oe1_arm64.patch   
+ 2.enable_irq_arm64.patch   
+ 3.cobalt-core-3.1-4.19.90_arm64.patch  
+ 4.cobalt-core-3.1-4.19.90-oe1_arm64.patch  
  5.openeuler_defconfig_arm64.patch  
  6.openeuler_defconfig_arm64_2.patch  
 
+ x86_64 patch名称   
+ 1.cobalt-core-3.1-4.19.90-oe1_x86.patch  
+ 2.cobalt-core-3.1-4.19.90_x86.patch   
+ 3.ipipe-core-4.19.90-oe1_x86.patch   
+ 4.openeuler_defconfig_x86_2.patch  
+ 5.openeuler_defconfig_x86.patch 
+
+
 #### 软件架构
-软件架构说明：该版本适配ARM架构，具体为鲲鹏920和FT 2000/4的机器。
+软件架构说明：ARM架构:鲲鹏920和FT 2000/4的机器。
 #### 存在问题
-
+ARM架构：
 1. 飞腾网卡驱动问题
 2. RTOS环境串口驱动问题
-3. 后续将上传x86架构的xenomai实时内核patch
 #### 安装教程
 
 1.  xxxx
diff --git a/cobalt-core-3.1-4.19.90-oe1.patch b/cobalt-core-3.1-4.19.90-oe1_arm64.patch
similarity index 100%
rename from cobalt-core-3.1-4.19.90-oe1.patch
rename to cobalt-core-3.1-4.19.90-oe1_arm64.patch
diff --git a/cobalt-core-3.1-4.19.90-oe1_x86.patch b/cobalt-core-3.1-4.19.90-oe1_x86.patch
new file mode 100755
index 0000000000000000000000000000000000000000..51abbbca9a5bc87d689e4dc9c2d36934a22f0478
--- /dev/null
+++ b/cobalt-core-3.1-4.19.90-oe1_x86.patch
@@ -0,0 +1,16 @@
+--- kernel/include/asm-generic/xenomai/syscall.h	2020-02-04 01:35:56.000000000 +0800
++++ kernel_new/include/asm-generic/xenomai/syscall.h	2021-04-07 13:58:24.939549909 +0800
+@@ -27,13 +27,8 @@
+ #include <asm/xenomai/machine.h>
+ #include <cobalt/uapi/asm-generic/syscall.h>
+ 
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
+ #define access_rok(addr, size)	access_ok((addr), (size))
+ #define access_wok(addr, size)	access_ok((addr), (size))
+-#else
+-#define access_rok(addr, size)	access_ok(VERIFY_READ, (addr), (size))
+-#define access_wok(addr, size)	access_ok(VERIFY_WRITE, (addr), (size))
+-#endif
+ 
+ #define __xn_reg_arglist(regs)	\
+ 	__xn_reg_arg1(regs),	\
diff --git a/cobalt-core-3.1-4.19.90.patch b/cobalt-core-3.1-4.19.90_arm64.patch
similarity index 100%
rename from cobalt-core-3.1-4.19.90.patch
rename to cobalt-core-3.1-4.19.90_arm64.patch
diff --git a/cobalt-core-3.1-4.19.90_x86.patch b/cobalt-core-3.1-4.19.90_x86.patch
new file mode 100755
index 0000000000000000000000000000000000000000..cfba05e8d6c109ff2839cb5ab9a875f18ffe3689
--- /dev/null
+++ b/cobalt-core-3.1-4.19.90_x86.patch
@@ -0,0 +1,264343 @@
+--- linux/include/xenomai/version.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/version.h	2021-04-07 16:01:28.509632334 +0800
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _XENOMAI_VERSION_H
++#define _XENOMAI_VERSION_H
++
++#ifndef __KERNEL__
++#include <xeno_config.h>
++#include <boilerplate/compiler.h>
++#endif
++
++#define XENO_VERSION(maj, min, rev)  (((maj)<<16)|((min)<<8)|(rev))
++
++#define XENO_VERSION_CODE	XENO_VERSION(CONFIG_XENO_VERSION_MAJOR,	\
++					     CONFIG_XENO_VERSION_MINOR,	\
++					     CONFIG_XENO_REVISION_LEVEL)
++
++#define XENO_VERSION_STRING	CONFIG_XENO_VERSION_STRING
++
++#endif /* _XENOMAI_VERSION_H */
+--- linux/include/xenomai/rtdm/uapi/udd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/udd.h	2021-04-07 16:01:28.502632344 +0800
+@@ -0,0 +1,98 @@
++/**
++ * @file
++ * This file is part of the Xenomai project.
++ *
++ * @author Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _RTDM_UAPI_UDD_H
++#define _RTDM_UAPI_UDD_H
++
++/**
++ * @addtogroup rtdm_udd
++ *
++ * @{
++ */
++
++/**
++ * @anchor udd_signotify
++ * @brief UDD event notification descriptor
++ *
++ * This structure shall be used to pass the information required to
++ * enable/disable the notification by signal upon interrupt receipt.
++ *
++ * If PID is zero or negative, the notification is disabled.
++ * Otherwise, the Cobalt thread whose PID is given will receive the
++ * Cobalt signal also mentioned, along with the count of interrupts at
++ * the time of the receipt stored in siginfo.si_int. A Cobalt thread
++ * must explicitly wait for notifications using the sigwaitinfo() or
++ * sigtimedwait() services (no asynchronous mode available).
++ */
++struct udd_signotify {
++	/**
++	 * PID of the Cobalt thread to notify upon interrupt
++	 * receipt. If @a pid is zero or negative, the notification is
++	 * disabled.
++	 */
++	pid_t pid;
++	/**
++	 * Signal number to send to PID for notifying, which must be
++	 * in the range [SIGRTMIN .. SIGRTMAX] inclusive. This value
++	 * is not considered if @a pid is zero or negative.
++	 */
++	int sig;
++};
++
++/**
++ * @anchor udd_ioctl_codes @name UDD_IOCTL
++ * IOCTL requests
++ *
++ * @{
++ */
++
++/**
++ * Enable the interrupt line. The UDD-class mini-driver should handle
++ * this request when received through its ->ioctl() handler if
++ * provided. Otherwise, the UDD core enables the interrupt line in the
++ * interrupt controller before returning to the caller.
++ */
++#define UDD_RTIOC_IRQEN		_IO(RTDM_CLASS_UDD, 0)
++/**
++ * Disable the interrupt line. The UDD-class mini-driver should handle
++ * this request when received through its ->ioctl() handler if
++ * provided. Otherwise, the UDD core disables the interrupt line in
++ * the interrupt controller before returning to the caller.
++ *
++ * @note The mini-driver must handle the UDD_RTIOC_IRQEN request for a
++ * custom IRQ from its ->ioctl() handler, otherwise such request
++ * receives -EIO from the UDD core.
++ */
++#define UDD_RTIOC_IRQDIS	_IO(RTDM_CLASS_UDD, 1)
++/**
++ * Enable/Disable signal notification upon interrupt event. A valid
++ * @ref udd_signotify "notification descriptor" must be passed along
++ * with this request, which is handled by the UDD core directly.
++ *
++ * @note The mini-driver must handle the UDD_RTIOC_IRQDIS request for
++ * a custom IRQ from its ->ioctl() handler, otherwise such request
++ * receives -EIO from the UDD core.
++ */
++#define UDD_RTIOC_IRQSIG	_IOW(RTDM_CLASS_UDD, 2, struct udd_signotify)
++
++/** @} */
++/** @} */
++
++#endif /* !_RTDM_UAPI_UDD_H */
+--- linux/include/xenomai/rtdm/uapi/spi.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/spi.h	2021-04-07 16:01:28.497632352 +0800
+@@ -0,0 +1,42 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTDM_UAPI_SPI_H
++#define _RTDM_UAPI_SPI_H
++
++#include <linux/types.h>
++
++struct rtdm_spi_config {
++	__u32 speed_hz;
++	__u16 mode;
++	__u8 bits_per_word;
++};
++
++struct rtdm_spi_iobufs {
++	__u32 io_len;
++	__u32 i_offset;
++	__u32 o_offset;
++	__u32 map_len;
++};
++
++#define SPI_RTIOC_SET_CONFIG		_IOW(RTDM_CLASS_SPI, 0, struct rtdm_spi_config)
++#define SPI_RTIOC_GET_CONFIG		_IOR(RTDM_CLASS_SPI, 1, struct rtdm_spi_config)
++#define SPI_RTIOC_SET_IOBUFS		_IOR(RTDM_CLASS_SPI, 2, struct rtdm_spi_iobufs)
++#define SPI_RTIOC_TRANSFER		_IO(RTDM_CLASS_SPI, 3)
++#define SPI_RTIOC_TRANSFER_N		_IOR(RTDM_CLASS_SPI, 4, int)
++
++#endif /* !_RTDM_UAPI_SPI_H */
+--- linux/include/xenomai/rtdm/uapi/serial.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/serial.h	2021-04-07 16:01:28.493632357 +0800
+@@ -0,0 +1,407 @@
++/**
++ * @file
++ * Real-Time Driver Model for Xenomai, serial device profile header
++ *
++ * @note Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ *
++ * @ingroup rtserial
++ */
++#ifndef _RTDM_UAPI_SERIAL_H
++#define _RTDM_UAPI_SERIAL_H
++
++#define RTSER_PROFILE_VER		3
++
++/*!
++ * @anchor RTSER_DEF_BAUD   @name RTSER_DEF_BAUD
++ * Default baud rate
++ * @{ */
++#define RTSER_DEF_BAUD			9600
++/** @} */
++
++/*!
++ * @anchor RTSER_xxx_PARITY   @name RTSER_xxx_PARITY
++ * Number of parity bits
++ * @{ */
++#define RTSER_NO_PARITY			0x00
++#define RTSER_ODD_PARITY		0x01
++#define RTSER_EVEN_PARITY		0x03
++#define RTSER_DEF_PARITY		RTSER_NO_PARITY
++/** @} */
++
++/*!
++ * @anchor RTSER_xxx_BITS   @name RTSER_xxx_BITS
++ * Number of data bits
++ * @{ */
++#define RTSER_5_BITS			0x00
++#define RTSER_6_BITS			0x01
++#define RTSER_7_BITS			0x02
++#define RTSER_8_BITS			0x03
++#define RTSER_DEF_BITS			RTSER_8_BITS
++/** @} */
++
++/*!
++ * @anchor RTSER_xxx_STOPB   @name RTSER_xxx_STOPB
++ * Number of stop bits
++ * @{ */
++#define RTSER_1_STOPB			0x00
++/** valid only in combination with 5 data bits */
++#define RTSER_1_5_STOPB			0x01
++#define RTSER_2_STOPB			0x01
++#define RTSER_DEF_STOPB			RTSER_1_STOPB
++/** @} */
++
++/*!
++ * @anchor RTSER_xxx_HAND   @name RTSER_xxx_HAND
++ * Handshake mechanisms
++ * @{ */
++#define RTSER_NO_HAND			0x00
++#define RTSER_RTSCTS_HAND		0x01
++#define RTSER_DEF_HAND			RTSER_NO_HAND
++/** @} */
++
++/*!
++ * @anchor RTSER_RS485_xxx   @name RTSER_RS485_xxx
++ * RS485 mode with automatic RTS handling
++ * @{ */
++#define RTSER_RS485_DISABLE		0x00
++#define RTSER_RS485_ENABLE		0x01
++#define RTSER_DEF_RS485			RTSER_RS485_DISABLE
++/** @} */
++
++/*!
++ * @anchor RTSER_FIFO_xxx   @name RTSER_FIFO_xxx
++ * Reception FIFO interrupt threshold
++ * @{ */
++#define RTSER_FIFO_DEPTH_1		0x00
++#define RTSER_FIFO_DEPTH_4		0x40
++#define RTSER_FIFO_DEPTH_8		0x80
++#define RTSER_FIFO_DEPTH_14		0xC0
++#define RTSER_DEF_FIFO_DEPTH		RTSER_FIFO_DEPTH_1
++/** @} */
++
++/*!
++ * @anchor RTSER_TIMEOUT_xxx   @name RTSER_TIMEOUT_xxx
++ * Special timeout values, see also @ref RTDM_TIMEOUT_xxx
++ * @{ */
++#define RTSER_TIMEOUT_INFINITE		RTDM_TIMEOUT_INFINITE
++#define RTSER_TIMEOUT_NONE		RTDM_TIMEOUT_NONE
++#define RTSER_DEF_TIMEOUT		RTDM_TIMEOUT_INFINITE
++/** @} */
++
++/*!
++ * @anchor RTSER_xxx_TIMESTAMP_HISTORY   @name RTSER_xxx_TIMESTAMP_HISTORY
++ * Timestamp history control
++ * @{ */
++#define RTSER_RX_TIMESTAMP_HISTORY	0x01
++#define RTSER_DEF_TIMESTAMP_HISTORY	0x00
++/** @} */
++
++/*!
++ * @anchor RTSER_EVENT_xxx   @name RTSER_EVENT_xxx
++ * Events bits
++ * @{ */
++#define RTSER_EVENT_RXPEND		0x01
++#define RTSER_EVENT_ERRPEND		0x02
++#define RTSER_EVENT_MODEMHI		0x04
++#define RTSER_EVENT_MODEMLO		0x08
++#define RTSER_EVENT_TXEMPTY		0x10
++#define RTSER_DEF_EVENT_MASK		0x00
++/** @} */
++
++
++/*!
++ * @anchor RTSER_SET_xxx   @name RTSER_SET_xxx
++ * Configuration mask bits
++ * @{ */
++#define RTSER_SET_BAUD			0x0001
++#define RTSER_SET_PARITY		0x0002
++#define RTSER_SET_DATA_BITS		0x0004
++#define RTSER_SET_STOP_BITS		0x0008
++#define RTSER_SET_HANDSHAKE		0x0010
++#define RTSER_SET_FIFO_DEPTH		0x0020
++#define RTSER_SET_TIMEOUT_RX		0x0100
++#define RTSER_SET_TIMEOUT_TX		0x0200
++#define RTSER_SET_TIMEOUT_EVENT		0x0400
++#define RTSER_SET_TIMESTAMP_HISTORY	0x0800
++#define RTSER_SET_EVENT_MASK		0x1000
++#define RTSER_SET_RS485			0x2000
++/** @} */
++
++
++/*!
++ * @anchor RTSER_LSR_xxx   @name RTSER_LSR_xxx
++ * Line status bits
++ * @{ */
++#define RTSER_LSR_DATA			0x01
++#define RTSER_LSR_OVERRUN_ERR		0x02
++#define RTSER_LSR_PARITY_ERR		0x04
++#define RTSER_LSR_FRAMING_ERR		0x08
++#define RTSER_LSR_BREAK_IND		0x10
++#define RTSER_LSR_THR_EMTPY		0x20
++#define RTSER_LSR_TRANSM_EMPTY		0x40
++#define RTSER_LSR_FIFO_ERR		0x80
++#define RTSER_SOFT_OVERRUN_ERR		0x0100
++/** @} */
++
++
++/*!
++ * @anchor RTSER_MSR_xxx   @name RTSER_MSR_xxx
++ * Modem status bits
++ * @{ */
++#define RTSER_MSR_DCTS			0x01
++#define RTSER_MSR_DDSR			0x02
++#define RTSER_MSR_TERI			0x04
++#define RTSER_MSR_DDCD			0x08
++#define RTSER_MSR_CTS			0x10
++#define RTSER_MSR_DSR			0x20
++#define RTSER_MSR_RI			0x40
++#define RTSER_MSR_DCD			0x80
++/** @} */
++
++
++/*!
++ * @anchor RTSER_MCR_xxx   @name RTSER_MCR_xxx
++ * Modem control bits
++ * @{ */
++#define RTSER_MCR_DTR			0x01
++#define RTSER_MCR_RTS			0x02
++#define RTSER_MCR_OUT1			0x04
++#define RTSER_MCR_OUT2			0x08
++#define RTSER_MCR_LOOP			0x10
++/** @} */
++
++
++/*!
++ * @anchor RTSER_BREAK_xxx   @name RTSER_BREAK_xxx
++ * Break control
++ * @{ */
++#define RTSER_BREAK_CLR			0x00
++#define RTSER_BREAK_SET			0x01
++
++
++/**
++ * Serial device configuration
++ */
++typedef struct rtser_config {
++	/** mask specifying valid fields, see @ref RTSER_SET_xxx */
++	int		config_mask;
++
++	/** baud rate, default @ref RTSER_DEF_BAUD */
++	int		baud_rate;
++
++	/** number of parity bits, see @ref RTSER_xxx_PARITY */
++	int		parity;
++
++	/** number of data bits, see @ref RTSER_xxx_BITS */
++	int		data_bits;
++
++	/** number of stop bits, see @ref RTSER_xxx_STOPB */
++	int		stop_bits;
++
++	/** handshake mechanisms, see @ref RTSER_xxx_HAND */
++	int		handshake;
++
++	/** reception FIFO interrupt threshold, see @ref RTSER_FIFO_xxx */
++	int		fifo_depth;
++
++	int		reserved;
++
++	/** reception timeout, see @ref RTSER_TIMEOUT_xxx for special
++	 *  values */
++	nanosecs_rel_t	rx_timeout;
++
++	/** transmission timeout, see @ref RTSER_TIMEOUT_xxx for special
++	 *  values */
++	nanosecs_rel_t	tx_timeout;
++
++	/** event timeout, see @ref RTSER_TIMEOUT_xxx for special values */
++	nanosecs_rel_t	event_timeout;
++
++	/** enable timestamp history, see @ref RTSER_xxx_TIMESTAMP_HISTORY */
++	int		timestamp_history;
++
++	/** event mask to be used with @ref RTSER_RTIOC_WAIT_EVENT, see
++	 *  @ref RTSER_EVENT_xxx */
++	int		event_mask;
++
++	/** enable RS485 mode, see @ref RTSER_RS485_xxx */
++	int		rs485;
++} rtser_config_t;
++
++/**
++ * Serial device status
++ */
++typedef struct rtser_status {
++	/** line status register, see @ref RTSER_LSR_xxx */
++	int		line_status;
++
++	/** modem status register, see @ref RTSER_MSR_xxx */
++	int		modem_status;
++} rtser_status_t;
++
++/**
++ * Additional information about serial device events
++ */
++typedef struct rtser_event {
++	/** signalled events, see @ref RTSER_EVENT_xxx */
++	int		events;
++
++	/** number of pending input characters */
++	int		rx_pending;
++
++	/** last interrupt timestamp */
++	nanosecs_abs_t	last_timestamp;
++
++	/** reception timestamp of oldest character in input queue */
++	nanosecs_abs_t	rxpend_timestamp;
++} rtser_event_t;
++
++
++#define RTIOC_TYPE_SERIAL		RTDM_CLASS_SERIAL
++
++
++/*!
++ * @name Sub-Classes of RTDM_CLASS_SERIAL
++ * @{ */
++#define RTDM_SUBCLASS_16550A		0
++/** @} */
++
++
++/*!
++ * @anchor SERIOCTLs @name IOCTLs
++ * Serial device IOCTLs
++ * @{ */
++
++/**
++ * Get serial device configuration
++ *
++ * @param[out] arg Pointer to configuration buffer (struct rtser_config)
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ */
++#define RTSER_RTIOC_GET_CONFIG	\
++	_IOR(RTIOC_TYPE_SERIAL, 0x00, struct rtser_config)
++
++/**
++ * Set serial device configuration
++ *
++ * @param[in] arg Pointer to configuration buffer (struct rtser_config)
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EPERM is returned if the caller's context is invalid, see note below.
++ *
++ * - -ENOMEM is returned if a new history buffer for timestamps cannot be
++ * allocated.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note If rtser_config contains a valid timestamp_history and the
++ * addressed device has been opened in non-real-time context, this IOCTL must
++ * be issued in non-real-time context as well. Otherwise, this command will
++ * fail.
++ */
++#define RTSER_RTIOC_SET_CONFIG	\
++	_IOW(RTIOC_TYPE_SERIAL, 0x01, struct rtser_config)
++
++/**
++ * Get serial device status
++ *
++ * @param[out] arg Pointer to status buffer (struct rtser_status)
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note The error states @c RTSER_LSR_OVERRUN_ERR, @c RTSER_LSR_PARITY_ERR,
++ * @c RTSER_LSR_FRAMING_ERR, and @c RTSER_SOFT_OVERRUN_ERR that may have
++ * occured during previous read accesses to the device will be saved for being
++ * reported via this IOCTL. Upon return from @c RTSER_RTIOC_GET_STATUS, the
++ * saved state will be cleared.
++ */
++#define RTSER_RTIOC_GET_STATUS	\
++	_IOR(RTIOC_TYPE_SERIAL, 0x02, struct rtser_status)
++
++/**
++ * Get serial device's modem contol register
++ *
++ * @param[out] arg Pointer to variable receiving the content (int, see
++ *             @ref RTSER_MCR_xxx)
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ */
++#define RTSER_RTIOC_GET_CONTROL	\
++	_IOR(RTIOC_TYPE_SERIAL, 0x03, int)
++
++/**
++ * Set serial device's modem contol register
++ *
++ * @param[in] arg New control register content (int, see @ref RTSER_MCR_xxx)
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ */
++#define RTSER_RTIOC_SET_CONTROL	\
++	_IOW(RTIOC_TYPE_SERIAL, 0x04, int)
++
++/**
++ * Wait on serial device events according to previously set mask
++ *
++ * @param[out] arg Pointer to event information buffer (struct rtser_event)
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EBUSY is returned if another task is already waiting on events of this
++ * device.
++ *
++ * - -EBADF is returned if the file descriptor is invalid or the device has
++ * just been closed.
++ *
++ * @coretags{mode-unrestricted}
++ */
++#define RTSER_RTIOC_WAIT_EVENT	\
++	_IOR(RTIOC_TYPE_SERIAL, 0x05, struct rtser_event)
++/** @} */
++
++/**
++ * Set or clear break on UART output line
++ *
++ * @param[in] arg @c RTSER_BREAK_SET or @c RTSER_BREAK_CLR (int)
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note A set break condition may also be cleared on UART line
++ * reconfiguration.
++ */
++#define RTSER_RTIOC_BREAK_CTL	\
++	_IOR(RTIOC_TYPE_SERIAL, 0x06, int)
++/** @} */
++
++/*!
++ * @anchor SERutils @name RT Serial example and utility programs
++ * @{ */
++/** @example cross-link.c */
++/** @} */
++
++#endif /* !_RTDM_UAPI_SERIAL_H */
+--- linux/include/xenomai/rtdm/uapi/gpio.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/gpio.h	2021-04-07 16:01:28.478632379 +0800
+@@ -0,0 +1,41 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTDM_UAPI_GPIO_H
++#define _RTDM_UAPI_GPIO_H
++
++struct rtdm_gpio_readout {
++	nanosecs_abs_t timestamp;
++	__s32 value;
++};
++
++#define GPIO_RTIOC_DIR_OUT	_IOW(RTDM_CLASS_GPIO, 0, int)
++#define GPIO_RTIOC_DIR_IN	_IO(RTDM_CLASS_GPIO, 1)
++#define GPIO_RTIOC_IRQEN	_IOW(RTDM_CLASS_GPIO, 2, int) /* GPIO trigger */
++#define GPIO_RTIOC_IRQDIS	_IO(RTDM_CLASS_GPIO, 3)
++#define GPIO_RTIOC_REQS		_IO(RTDM_CLASS_GPIO, 4)
++#define GPIO_RTIOC_RELS		_IO(RTDM_CLASS_GPIO, 5)
++#define GPIO_RTIOC_TS		_IOR(RTDM_CLASS_GPIO, 7, int)
++
++#define GPIO_TRIGGER_NONE		0x0 /* unspecified */
++#define GPIO_TRIGGER_EDGE_RISING	0x1
++#define GPIO_TRIGGER_EDGE_FALLING	0x2
++#define GPIO_TRIGGER_LEVEL_HIGH		0x4
++#define GPIO_TRIGGER_LEVEL_LOW		0x8
++#define GPIO_TRIGGER_MASK		0xf
++
++#endif /* !_RTDM_UAPI_GPIO_H */
+--- linux/include/xenomai/rtdm/uapi/testing.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/testing.h	2021-04-07 16:01:28.473632386 +0800
+@@ -0,0 +1,198 @@
++/**
++ * @file
++ * Real-Time Driver Model for Xenomai, testing device profile header
++ *
++ * @note Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ *
++ * @ingroup rttesting
++ */
++#ifndef _RTDM_UAPI_TESTING_H
++#define _RTDM_UAPI_TESTING_H
++
++#include <linux/types.h>
++
++#define RTTST_PROFILE_VER		2
++
++typedef struct rttst_bench_res {
++	__s32 avg;
++	__s32 min;
++	__s32 max;
++	__s32 overruns;
++	__s32 test_loops;
++} rttst_bench_res_t;
++
++typedef struct rttst_interm_bench_res {
++	struct rttst_bench_res last;
++	struct rttst_bench_res overall;
++} rttst_interm_bench_res_t;
++
++typedef struct rttst_overall_bench_res {
++	struct rttst_bench_res result;
++	__s32 *histogram_avg;
++	__s32 *histogram_min;
++	__s32 *histogram_max;
++} rttst_overall_bench_res_t;
++
++#define RTTST_TMBENCH_INVALID		-1 /* internal use only */
++#define RTTST_TMBENCH_TASK		0
++#define RTTST_TMBENCH_HANDLER		1
++
++typedef struct rttst_tmbench_config {
++	int mode;
++	int priority;
++	__u64 period;
++	int warmup_loops;
++	int histogram_size;
++	int histogram_bucketsize;
++	int freeze_max;
++} rttst_tmbench_config_t;
++
++struct rttst_swtest_task {
++	unsigned int index;
++	unsigned int flags;
++};
++
++/* Possible values for struct rttst_swtest_task::flags. */
++#define RTTST_SWTEST_FPU		0x1
++#define RTTST_SWTEST_USE_FPU		0x2 /* Only for kernel-space tasks. */
++#define RTTST_SWTEST_FREEZE		0x4 /* Only for kernel-space tasks. */
++
++struct rttst_swtest_dir {
++	unsigned int from;
++	unsigned int to;
++};
++
++struct rttst_swtest_error {
++	struct rttst_swtest_dir last_switch;
++	unsigned int fp_val;
++};
++
++#define RTTST_RTDM_NORMAL_CLOSE		0
++#define RTTST_RTDM_DEFER_CLOSE_CONTEXT	1
++
++#define RTTST_RTDM_MAGIC_PRIMARY	0xfefbfefb
++#define RTTST_RTDM_MAGIC_SECONDARY	0xa5b9a5b9
++
++#define RTTST_HEAPCHECK_ZEROOVRD   1
++#define RTTST_HEAPCHECK_SHUFFLE    2
++#define RTTST_HEAPCHECK_PATTERN    4
++#define RTTST_HEAPCHECK_HOT        8
++
++struct rttst_heap_parms {
++	__u64 heap_size;
++	__u64 block_size;
++	int flags;
++	int nrstats;
++};
++
++struct rttst_heap_stats {
++	__u64 heap_size;
++	__u64 user_size;
++	__u64 block_size;
++	__s64 alloc_avg_ns;
++	__s64 alloc_max_ns;
++	__s64 free_avg_ns;
++	__s64 free_max_ns;
++	__u64 maximum_free;
++	__u64 largest_free;
++	int nrblocks;
++	int flags;
++};
++
++struct rttst_heap_stathdr {
++	int nrstats;
++	struct rttst_heap_stats *buf;
++};
++
++#define RTIOC_TYPE_TESTING		RTDM_CLASS_TESTING
++
++/*!
++ * @name Sub-Classes of RTDM_CLASS_TESTING
++ * @{ */
++/** subclass name: "timerbench" */
++#define RTDM_SUBCLASS_TIMERBENCH	0
++/** subclass name: "irqbench" */
++#define RTDM_SUBCLASS_IRQBENCH		1
++/** subclass name: "switchtest" */
++#define RTDM_SUBCLASS_SWITCHTEST	2
++/** subclase name: "rtdm" */
++#define RTDM_SUBCLASS_RTDMTEST		3
++/** subclase name: "heapcheck" */
++#define RTDM_SUBCLASS_HEAPCHECK		4
++/** @} */
++
++/*!
++ * @anchor TSTIOCTLs @name IOCTLs
++ * Testing device IOCTLs
++ * @{ */
++#define RTTST_RTIOC_INTERM_BENCH_RES \
++	_IOWR(RTIOC_TYPE_TESTING, 0x00, struct rttst_interm_bench_res)
++
++#define RTTST_RTIOC_TMBENCH_START \
++	_IOW(RTIOC_TYPE_TESTING, 0x10, struct rttst_tmbench_config)
++
++#define RTTST_RTIOC_TMBENCH_STOP \
++	_IOWR(RTIOC_TYPE_TESTING, 0x11, struct rttst_overall_bench_res)
++
++#define RTTST_RTIOC_SWTEST_SET_TASKS_COUNT \
++	_IOW(RTIOC_TYPE_TESTING, 0x30, __u32)
++
++#define RTTST_RTIOC_SWTEST_SET_CPU \
++	_IOW(RTIOC_TYPE_TESTING, 0x31, __u32)
++
++#define RTTST_RTIOC_SWTEST_REGISTER_UTASK \
++	_IOW(RTIOC_TYPE_TESTING, 0x32, struct rttst_swtest_task)
++
++#define RTTST_RTIOC_SWTEST_CREATE_KTASK \
++	_IOWR(RTIOC_TYPE_TESTING, 0x33, struct rttst_swtest_task)
++
++#define RTTST_RTIOC_SWTEST_PEND \
++	_IOR(RTIOC_TYPE_TESTING, 0x34, struct rttst_swtest_task)
++
++#define RTTST_RTIOC_SWTEST_SWITCH_TO \
++	_IOR(RTIOC_TYPE_TESTING, 0x35, struct rttst_swtest_dir)
++
++#define RTTST_RTIOC_SWTEST_GET_SWITCHES_COUNT \
++	_IOR(RTIOC_TYPE_TESTING, 0x36, __u32)
++
++#define RTTST_RTIOC_SWTEST_GET_LAST_ERROR \
++	_IOR(RTIOC_TYPE_TESTING, 0x37, struct rttst_swtest_error)
++
++#define RTTST_RTIOC_SWTEST_SET_PAUSE \
++	_IOW(RTIOC_TYPE_TESTING, 0x38, __u32)
++
++#define RTTST_RTIOC_RTDM_DEFER_CLOSE \
++	_IOW(RTIOC_TYPE_TESTING, 0x40, __u32)
++
++#define RTTST_RTIOC_RTDM_ACTOR_GET_CPU \
++	_IOR(RTIOC_TYPE_TESTING, 0x41, __u32)
++  
++#define RTTST_RTIOC_RTDM_PING_PRIMARY \
++	_IOR(RTIOC_TYPE_TESTING, 0x42, __u32)
++  
++#define RTTST_RTIOC_RTDM_PING_SECONDARY \
++	_IOR(RTIOC_TYPE_TESTING, 0x43, __u32)
++
++#define RTTST_RTIOC_HEAP_CHECK \
++	_IOR(RTIOC_TYPE_TESTING, 0x44, struct rttst_heap_parms)
++
++#define RTTST_RTIOC_HEAP_STAT_COLLECT \
++	_IOR(RTIOC_TYPE_TESTING, 0x45, int)
++
++/** @} */
++
++#endif /* !_RTDM_UAPI_TESTING_H */
+--- linux/include/xenomai/rtdm/uapi/analogy.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/analogy.h	2021-04-07 16:01:28.464632399 +0800
+@@ -0,0 +1,743 @@
++/**
++ * @file
++ * Analogy for Linux, UAPI bits
++ * @note Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * @note Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _RTDM_UAPI_ANALOGY_H
++#define _RTDM_UAPI_ANALOGY_H
++
++/* --- Misc precompilation constant --- */
++#define A4L_NAMELEN 20
++
++#define A4L_INFINITE 0
++#define A4L_NONBLOCK (-1)
++
++/* --- Common Analogy types --- */
++
++typedef unsigned short sampl_t;
++typedef unsigned long lsampl_t;
++
++/* MMAP ioctl argument structure */
++struct a4l_mmap_arg {
++	unsigned int idx_subd;
++	unsigned long size;
++	void *ptr;
++};
++typedef struct a4l_mmap_arg a4l_mmap_t;
++
++/* Constants related with buffer size
++   (might be used with BUFCFG ioctl) */
++#define A4L_BUF_MAXSIZE 0x1000000
++#define A4L_BUF_DEFSIZE 0x10000
++#define A4L_BUF_DEFMAGIC 0xffaaff55
++
++/* BUFCFG ioctl argument structure */
++struct a4l_buffer_config {
++	/* NOTE: with the last buffer implementation, the field
++	   idx_subd became useless; the buffer are now
++	   per-context. So, the buffer size configuration is specific
++	   to an opened device. There is a little exception: we can
++	   define a default buffer size for a device.
++	   So far, a hack is used to implement the configuration of
++	   the default buffer size */
++	unsigned int idx_subd;
++	unsigned long buf_size;
++};
++typedef struct a4l_buffer_config a4l_bufcfg_t;
++
++/* BUFINFO ioctl argument structure */
++struct a4l_buffer_info {
++	unsigned int idx_subd;
++	unsigned long buf_size;
++	unsigned long rw_count;
++};
++typedef struct a4l_buffer_info a4l_bufinfo_t;
++
++/* BUFCFG2 / BUFINFO2 ioctl argument structure */
++struct a4l_buffer_config2 {
++	unsigned long wake_count;
++	unsigned long reserved[3];
++};
++typedef struct a4l_buffer_config2 a4l_bufcfg2_t;
++
++/* POLL ioctl argument structure */
++struct a4l_poll {
++	unsigned int idx_subd;
++	unsigned long arg;
++};
++typedef struct a4l_poll a4l_poll_t;
++
++/* DEVCFG ioctl argument structure */
++struct a4l_link_desc {
++	unsigned char bname_size;
++	char *bname;
++	unsigned int opts_size;
++	void *opts;
++};
++typedef struct a4l_link_desc a4l_lnkdesc_t;
++
++/* DEVINFO ioctl argument structure */
++struct a4l_dev_info {
++	char board_name[A4L_NAMELEN];
++	char driver_name[A4L_NAMELEN];
++	int nb_subd;
++	int idx_read_subd;
++	int idx_write_subd;
++};
++typedef struct a4l_dev_info a4l_dvinfo_t;
++
++#define CIO 'd'
++#define A4L_DEVCFG _IOW(CIO,0,a4l_lnkdesc_t)
++#define A4L_DEVINFO _IOR(CIO,1,a4l_dvinfo_t)
++#define A4L_SUBDINFO _IOR(CIO,2,a4l_sbinfo_t)
++#define A4L_CHANINFO _IOR(CIO,3,a4l_chinfo_arg_t)
++#define A4L_RNGINFO _IOR(CIO,4,a4l_rnginfo_arg_t)
++#define A4L_CMD _IOWR(CIO,5,a4l_cmd_t)
++#define A4L_CANCEL _IOR(CIO,6,unsigned int)
++#define A4L_INSNLIST _IOR(CIO,7,unsigned int)
++#define A4L_INSN _IOR(CIO,8,unsigned int)
++#define A4L_BUFCFG _IOR(CIO,9,a4l_bufcfg_t)
++#define A4L_BUFINFO _IOWR(CIO,10,a4l_bufinfo_t)
++#define A4L_POLL _IOR(CIO,11,unsigned int)
++#define A4L_MMAP _IOWR(CIO,12,unsigned int)
++#define A4L_NBCHANINFO _IOR(CIO,13,a4l_chinfo_arg_t)
++#define A4L_NBRNGINFO _IOR(CIO,14,a4l_rnginfo_arg_t)
++
++/* These IOCTLs are bound to be merged with A4L_BUFCFG and A4L_BUFINFO
++   at the next major release */
++#define A4L_BUFCFG2 _IOR(CIO,15,a4l_bufcfg_t)
++#define A4L_BUFINFO2 _IOWR(CIO,16,a4l_bufcfg_t)
++
++/*!
++ * @addtogroup analogy_lib_async1
++ * @{
++ */
++
++/*!
++ * @anchor ANALOGY_CMD_xxx @name ANALOGY_CMD_xxx
++ * @brief Common command flags definitions
++ * @{
++ */
++
++/**
++ * Do not execute the command, just check it
++ */
++#define A4L_CMD_SIMUL 0x1
++/**
++ * Perform data recovery / transmission in bulk mode
++ */
++#define A4L_CMD_BULK 0x2
++/**
++ * Perform a command which will write data to the device
++ */
++#define A4L_CMD_WRITE 0x4
++
++	  /*! @} ANALOGY_CMD_xxx */
++
++/*!
++ * @anchor TRIG_xxx @name TRIG_xxx
++ * @brief Command triggers flags definitions
++ * @{
++ */
++
++/**
++ * Never trigger
++ */
++#define TRIG_NONE	0x00000001
++/**
++ * Trigger now + N ns
++ */
++#define TRIG_NOW	0x00000002
++/**
++ * Trigger on next lower level trig
++ */
++#define TRIG_FOLLOW	0x00000004
++/**
++ * Trigger at time N ns
++ */
++#define TRIG_TIME	0x00000008
++/**
++ * Trigger at rate N ns
++ */
++#define TRIG_TIMER	0x00000010
++/**
++ * Trigger when count reaches N
++ */
++#define TRIG_COUNT	0x00000020
++/**
++ * Trigger on external signal N
++ */
++#define TRIG_EXT	0x00000040
++/**
++ * Trigger on analogy-internal signal N
++ */
++#define TRIG_INT	0x00000080
++/**
++ * Driver defined trigger
++ */
++#define TRIG_OTHER	0x00000100
++/**
++ * Wake up on end-of-scan
++ */
++#define TRIG_WAKE_EOS	0x0020
++/**
++ * Trigger not implemented yet
++ */
++#define TRIG_ROUND_MASK 0x00030000
++/**
++ * Trigger not implemented yet
++ */
++#define TRIG_ROUND_NEAREST 0x00000000
++/**
++ * Trigger not implemented yet
++ */
++#define TRIG_ROUND_DOWN 0x00010000
++/**
++ * Trigger not implemented yet
++ */
++#define TRIG_ROUND_UP 0x00020000
++/**
++ * Trigger not implemented yet
++ */
++#define TRIG_ROUND_UP_NEXT 0x00030000
++
++	  /*! @} TRIG_xxx */
++
++/*!
++ * @anchor CHAN_RNG_AREF @name Channel macros
++ * @brief Specific precompilation macros and constants useful for the
++ * channels descriptors tab located in the command structure
++ * @{
++ */
++
++/**
++ * Channel indication macro
++ */
++#define CHAN(a) ((a) & 0xffff)
++/**
++ * Range definition macro
++ */
++#define RNG(a) (((a) & 0xff) << 16)
++/**
++ * Reference definition macro
++ */
++#define AREF(a) (((a) & 0x03) << 24)
++/**
++ * Flags definition macro
++ */
++#define FLAGS(a) ((a) & CR_FLAGS_MASK)
++/**
++ * Channel + range + reference definition macro
++ */
++#define PACK(a, b, c) (a | RNG(b) | AREF(c))
++/**
++ * Channel + range + reference + flags definition macro
++ */
++#define PACK_FLAGS(a, b, c, d) (PACK(a, b, c) | FLAGS(d))
++
++/**
++ * Analog reference is analog ground
++ */
++#define AREF_GROUND 0x00
++/**
++ * Analog reference is analog common
++ */
++#define AREF_COMMON 0x01
++/**
++ * Analog reference is differential
++ */
++#define AREF_DIFF 0x02
++/**
++ * Analog reference is undefined
++ */
++#define AREF_OTHER 0x03
++
++	  /*! @} CHAN_RNG_AREF */
++
++#if !defined(DOXYGEN_CPP)
++
++#define CR_FLAGS_MASK 0xfc000000
++#define CR_ALT_FILTER (1<<26)
++#define CR_DITHER CR_ALT_FILTER
++#define CR_DEGLITCH CR_ALT_FILTER
++#define CR_ALT_SOURCE (1<<27)
++#define CR_EDGE	(1<<30)
++#define CR_INVERT (1<<31)
++
++#endif /* !DOXYGEN_CPP */
++
++/*!
++ * @brief Structure describing the asynchronous instruction
++ * @see a4l_snd_command()
++ */
++
++struct a4l_cmd_desc {
++	unsigned char idx_subd;
++			       /**< Subdevice to which the command will be applied. */
++
++	unsigned long flags;
++			       /**< Command flags */
++
++	/* Command trigger characteristics */
++	unsigned int start_src;
++			       /**< Start trigger type */
++	unsigned int start_arg;
++			       /**< Start trigger argument */
++	unsigned int scan_begin_src;
++			       /**< Scan begin trigger type */
++	unsigned int scan_begin_arg;
++			       /**< Scan begin trigger argument */
++	unsigned int convert_src;
++			       /**< Convert trigger type */
++	unsigned int convert_arg;
++			       /**< Convert trigger argument */
++	unsigned int scan_end_src;
++			       /**< Scan end trigger type */
++	unsigned int scan_end_arg;
++			       /**< Scan end trigger argument */
++	unsigned int stop_src;
++			       /**< Stop trigger type */
++	unsigned int stop_arg;
++			   /**< Stop trigger argument */
++
++	unsigned char nb_chan;
++			   /**< Count of channels related with the command */
++	unsigned int *chan_descs;
++			    /**< Tab containing channels descriptors */
++
++	/* Driver specific fields */
++	unsigned int valid_simul_stages;
++			   /** < cmd simulation valid stages (driver dependent) */
++
++	unsigned int data_len;
++			   /**< Driver specific buffer size */
++	sampl_t *data;
++	                   /**< Driver specific buffer pointer */
++};
++typedef struct a4l_cmd_desc a4l_cmd_t;
++
++/*! @} analogy_lib_async1 */
++
++/* --- Range section --- */
++
++/** Constant for internal use only (must not be used by driver
++    developer).  */
++#define A4L_RNG_FACTOR 1000000
++
++/**
++ * Volt unit range flag
++ */
++#define A4L_RNG_VOLT_UNIT 0x0
++/**
++ * MilliAmpere unit range flag
++ */
++#define A4L_RNG_MAMP_UNIT 0x1
++/**
++ * No unit range flag
++ */
++#define A4L_RNG_NO_UNIT 0x2
++/**
++ * External unit range flag
++ */
++#define A4L_RNG_EXT_UNIT 0x4
++
++/**
++ * Macro to retrieve the range unit from the range flags
++ */
++#define A4L_RNG_UNIT(x) (x & (A4L_RNG_VOLT_UNIT |	\
++			      A4L_RNG_MAMP_UNIT |	\
++			      A4L_RNG_NO_UNIT |		\
++			      A4L_RNG_EXT_UNIT))
++
++/* --- Subdevice flags desc stuff --- */
++
++/* TODO: replace ANALOGY_SUBD_AI with ANALOGY_SUBD_ANALOG
++   and ANALOGY_SUBD_INPUT */
++
++/* Subdevice types masks */
++#define A4L_SUBD_MASK_READ 0x80000000
++#define A4L_SUBD_MASK_WRITE 0x40000000
++#define A4L_SUBD_MASK_SPECIAL 0x20000000
++
++/*!
++ * @addtogroup analogy_subdevice
++ * @{
++ */
++
++/*!
++ * @anchor ANALOGY_SUBD_xxx @name Subdevices types
++ * @brief Flags to define the subdevice type
++ * @{
++ */
++
++/**
++ * Unused subdevice
++ */
++#define A4L_SUBD_UNUSED (A4L_SUBD_MASK_SPECIAL|0x1)
++/**
++ * Analog input subdevice
++ */
++#define A4L_SUBD_AI (A4L_SUBD_MASK_READ|0x2)
++/**
++ * Analog output subdevice
++ */
++#define A4L_SUBD_AO (A4L_SUBD_MASK_WRITE|0x4)
++/**
++ * Digital input subdevice
++ */
++#define A4L_SUBD_DI (A4L_SUBD_MASK_READ|0x8)
++/**
++ * Digital output subdevice
++ */
++#define A4L_SUBD_DO (A4L_SUBD_MASK_WRITE|0x10)
++/**
++ * Digital input/output subdevice
++ */
++#define A4L_SUBD_DIO (A4L_SUBD_MASK_SPECIAL|0x20)
++/**
++ * Counter subdevice
++ */
++#define A4L_SUBD_COUNTER (A4L_SUBD_MASK_SPECIAL|0x40)
++/**
++ * Timer subdevice
++ */
++#define A4L_SUBD_TIMER (A4L_SUBD_MASK_SPECIAL|0x80)
++/**
++ * Memory, EEPROM, DPRAM
++ */
++#define A4L_SUBD_MEMORY (A4L_SUBD_MASK_SPECIAL|0x100)
++/**
++ * Calibration subdevice  DACs
++ */
++#define A4L_SUBD_CALIB (A4L_SUBD_MASK_SPECIAL|0x200)
++/**
++ * Processor, DSP
++ */
++#define A4L_SUBD_PROC (A4L_SUBD_MASK_SPECIAL|0x400)
++/**
++ * Serial IO subdevice
++ */
++#define A4L_SUBD_SERIAL (A4L_SUBD_MASK_SPECIAL|0x800)
++/**
++ * Mask which gathers all the types
++ */
++#define A4L_SUBD_TYPES (A4L_SUBD_UNUSED |	 \
++			   A4L_SUBD_AI |	 \
++			   A4L_SUBD_AO |	 \
++			   A4L_SUBD_DI |	 \
++			   A4L_SUBD_DO |	 \
++			   A4L_SUBD_DIO |	 \
++			   A4L_SUBD_COUNTER | \
++			   A4L_SUBD_TIMER |	 \
++			   A4L_SUBD_MEMORY |	 \
++			   A4L_SUBD_CALIB |	 \
++			   A4L_SUBD_PROC |	 \
++			   A4L_SUBD_SERIAL)
++
++/*! @} ANALOGY_SUBD_xxx */
++
++/*!
++ * @anchor ANALOGY_SUBD_FT_xxx @name Subdevice features
++ * @brief Flags to define the subdevice's capabilities
++ * @{
++ */
++
++/* Subdevice capabilities */
++/**
++ * The subdevice can handle command (i.e it can perform asynchronous
++ * acquisition)
++ */
++#define A4L_SUBD_CMD 0x1000
++/**
++ * The subdevice support mmap operations (technically, any driver can
++ * do it; however, the developer might want that his driver must be
++ * accessed through read / write
++ */
++#define A4L_SUBD_MMAP 0x8000
++
++/*! @} ANALOGY_SUBD_FT_xxx */
++
++/*!
++ * @anchor ANALOGY_SUBD_ST_xxx @name Subdevice status
++ * @brief Flags to define the subdevice's status
++ * @{
++ */
++
++/* Subdevice status flag(s) */
++/**
++ * The subdevice is busy, a synchronous or an asynchronous acquisition
++ * is occuring
++ */
++#define A4L_SUBD_BUSY_NR 0
++#define A4L_SUBD_BUSY (1 << A4L_SUBD_BUSY_NR)
++
++/**
++ * The subdevice is about to be cleaned in the middle of the detach
++ * procedure
++ */
++#define A4L_SUBD_CLEAN_NR 1
++#define A4L_SUBD_CLEAN (1 << A4L_SUBD_CLEAN_NR)
++
++
++/*! @} ANALOGY_SUBD_ST_xxx */
++
++/* --- Subdevice related IOCTL arguments structures --- */
++
++/* SUDBINFO IOCTL argument */
++struct a4l_subd_info {
++	unsigned long flags;
++	unsigned long status;
++	unsigned char nb_chan;
++};
++typedef struct a4l_subd_info a4l_sbinfo_t;
++
++/* CHANINFO / NBCHANINFO IOCTL arguments */
++struct a4l_chan_info {
++	unsigned long chan_flags;
++	unsigned char nb_rng;
++	unsigned char nb_bits;
++};
++typedef struct a4l_chan_info a4l_chinfo_t;
++
++struct a4l_chinfo_arg {
++	unsigned int idx_subd;
++	void *info;
++};
++typedef struct a4l_chinfo_arg a4l_chinfo_arg_t;
++
++/* RNGINFO / NBRNGINFO IOCTL arguments */
++struct a4l_rng_info {
++	long min;
++	long max;
++	unsigned long flags;
++};
++typedef struct a4l_rng_info a4l_rnginfo_t;
++
++struct a4l_rng_info_arg {
++	unsigned int idx_subd;
++	unsigned int idx_chan;
++	void *info;
++};
++typedef struct a4l_rng_info_arg a4l_rnginfo_arg_t;
++
++/*! @} */
++
++#define A4L_INSN_MASK_READ 0x8000000
++#define A4L_INSN_MASK_WRITE 0x4000000
++#define A4L_INSN_MASK_SPECIAL 0x2000000
++
++/*!
++ * @addtogroup analogy_lib_sync1
++ * @{
++ */
++
++/*!
++ * @anchor ANALOGY_INSN_xxx @name Instruction type
++ * @brief Flags to define the type of instruction
++ * @{
++ */
++
++/**
++ * Read instruction
++ */
++#define A4L_INSN_READ (0 | A4L_INSN_MASK_READ)
++/**
++ * Write instruction
++ */
++#define A4L_INSN_WRITE (1 | A4L_INSN_MASK_WRITE)
++/**
++ * "Bits" instruction
++ */
++#define A4L_INSN_BITS (2 | A4L_INSN_MASK_READ | \
++		       A4L_INSN_MASK_WRITE)
++/**
++ * Configuration instruction
++ */
++#define A4L_INSN_CONFIG (3 | A4L_INSN_MASK_READ | \
++			 A4L_INSN_MASK_WRITE)
++/**
++ * Get time instruction
++ */
++#define A4L_INSN_GTOD (4 | A4L_INSN_MASK_READ | \
++		       A4L_INSN_MASK_SPECIAL)
++/**
++ * Wait instruction
++ */
++#define A4L_INSN_WAIT (5 | A4L_INSN_MASK_WRITE | \
++		       A4L_INSN_MASK_SPECIAL)
++/**
++ * Trigger instruction (to start asynchronous acquisition)
++ */
++#define A4L_INSN_INTTRIG (6 | A4L_INSN_MASK_WRITE | \
++			  A4L_INSN_MASK_SPECIAL)
++
++	  /*! @} ANALOGY_INSN_xxx */
++
++/**
++ * Maximal wait duration
++ */
++#define A4L_INSN_WAIT_MAX 100000
++
++/*!
++ * @anchor INSN_CONFIG_xxx @name Configuration instruction type
++ * @brief Values to define the type of configuration instruction
++ * @{
++ */
++
++#define A4L_INSN_CONFIG_DIO_INPUT		0
++#define A4L_INSN_CONFIG_DIO_OUTPUT		1
++#define A4L_INSN_CONFIG_DIO_OPENDRAIN		2
++#define A4L_INSN_CONFIG_ANALOG_TRIG		16
++#define A4L_INSN_CONFIG_ALT_SOURCE		20
++#define A4L_INSN_CONFIG_DIGITAL_TRIG		21
++#define A4L_INSN_CONFIG_BLOCK_SIZE		22
++#define A4L_INSN_CONFIG_TIMER_1			23
++#define A4L_INSN_CONFIG_FILTER			24
++#define A4L_INSN_CONFIG_CHANGE_NOTIFY		25
++#define A4L_INSN_CONFIG_SERIAL_CLOCK		26
++#define A4L_INSN_CONFIG_BIDIRECTIONAL_DATA	27
++#define A4L_INSN_CONFIG_DIO_QUERY		28
++#define A4L_INSN_CONFIG_PWM_OUTPUT		29
++#define A4L_INSN_CONFIG_GET_PWM_OUTPUT		30
++#define A4L_INSN_CONFIG_ARM			31
++#define A4L_INSN_CONFIG_DISARM			32
++#define A4L_INSN_CONFIG_GET_COUNTER_STATUS	33
++#define A4L_INSN_CONFIG_RESET			34
++#define A4L_INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR	1001	/* Use CTR as single pulsegenerator */
++#define A4L_INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR	1002	/* Use CTR as pulsetraingenerator */
++#define A4L_INSN_CONFIG_GPCT_QUADRATURE_ENCODER	1003	/* Use the counter as encoder */
++#define A4L_INSN_CONFIG_SET_GATE_SRC		2001	/* Set gate source */
++#define A4L_INSN_CONFIG_GET_GATE_SRC		2002	/* Get gate source */
++#define A4L_INSN_CONFIG_SET_CLOCK_SRC		2003	/* Set master clock source */
++#define A4L_INSN_CONFIG_GET_CLOCK_SRC		2004	/* Get master clock source */
++#define A4L_INSN_CONFIG_SET_OTHER_SRC		2005	/* Set other source */
++#define A4L_INSN_CONFIG_SET_COUNTER_MODE	4097
++#define A4L_INSN_CONFIG_SET_ROUTING		4099
++#define A4L_INSN_CONFIG_GET_ROUTING		4109
++
++/*! @} INSN_CONFIG_xxx */
++
++/*!
++ * @anchor ANALOGY_COUNTER_xxx @name Counter status bits
++ * @brief Status bits for INSN_CONFIG_GET_COUNTER_STATUS
++ * @{
++ */
++
++#define A4L_COUNTER_ARMED		0x1
++#define A4L_COUNTER_COUNTING		0x2
++#define A4L_COUNTER_TERMINAL_COUNT	0x4
++
++	  /*! @} ANALOGY_COUNTER_xxx */
++
++/*!
++ * @anchor ANALOGY_IO_DIRECTION @name IO direction
++ * @brief Values to define the IO polarity
++ * @{
++ */
++
++#define A4L_INPUT	0
++#define A4L_OUTPUT	1
++#define A4L_OPENDRAIN	2
++
++	  /*! @} ANALOGY_IO_DIRECTION */
++
++
++/*!
++ * @anchor ANALOGY_EV_xxx @name Events types
++ * @brief Values to define the Analogy events. They might used to send
++ * some specific events through the instruction interface.
++ * @{
++ */
++
++#define A4L_EV_START		0x00040000
++#define A4L_EV_SCAN_BEGIN	0x00080000
++#define A4L_EV_CONVERT		0x00100000
++#define A4L_EV_SCAN_END		0x00200000
++#define A4L_EV_STOP		0x00400000
++
++/*! @} ANALOGY_EV_xxx */
++
++/*!
++ * @brief Structure describing the synchronous instruction
++ * @see a4l_snd_insn()
++ */
++
++struct a4l_instruction {
++	unsigned int type;
++		       /**< Instruction type */
++	unsigned int idx_subd;
++			   /**< Subdevice to which the instruction will be applied. */
++	unsigned int chan_desc;
++			    /**< Channel descriptor */
++	unsigned int data_size;
++			    /**< Size of the intruction data */
++	void *data;
++		    /**< Instruction data */
++};
++typedef struct a4l_instruction a4l_insn_t;
++
++/*!
++ * @brief Structure describing the list of synchronous instructions
++ * @see a4l_snd_insnlist()
++ */
++
++struct a4l_instruction_list {
++	unsigned int count;
++			/**< Instructions count */
++	a4l_insn_t *insns;
++			  /**< Tab containing the instructions pointers */
++};
++typedef struct a4l_instruction_list a4l_insnlst_t;
++
++/*! @} analogy_lib_sync1 */
++
++struct a4l_calibration_subdev {
++	a4l_sbinfo_t *info;
++	char *name;
++	int slen;
++	int idx;
++};
++
++struct a4l_calibration_subdev_data {
++	int index;
++	int channel;
++	int range;
++	int expansion;
++	int nb_coeff;
++	double *coeff;
++
++};
++
++struct a4l_calibration_data {
++	char *driver_name;
++	char *board_name;
++	int nb_ai;
++	struct a4l_calibration_subdev_data *ai;
++	int nb_ao;
++	struct a4l_calibration_subdev_data *ao;
++};
++
++struct a4l_polynomial {
++	int expansion;
++	int order;
++	int nb_coeff;
++	double *coeff;
++};
++
++
++#endif /* _RTDM_UAPI_ANALOGY_H */
+--- linux/include/xenomai/rtdm/uapi/gpiopwm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/gpiopwm.h	2021-04-07 16:01:28.459632406 +0800
+@@ -0,0 +1,56 @@
++/**
++ * @file
++ * Real-Time Driver Model for Xenomai, pwm header
++ *
++ * @note Copyright (C) 2015 Jorge Ramirez <jro@xenomai.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ *
++ * @ingroup rttesting
++ */
++#ifndef _RTDM_UAPI_PWM_H
++#define _RTDM_UAPI_PWM_H
++
++#include <linux/types.h>
++
++#define RTPWM_PROFILE_VER			1
++
++struct gpiopwm {
++	unsigned int duty_cycle;
++	unsigned int range_min;
++	unsigned int range_max;
++	unsigned int period;
++	unsigned int gpio;
++};
++
++#define RTIOC_TYPE_PWM		RTDM_CLASS_PWM
++
++#define GPIOPWM_RTIOC_SET_CONFIG \
++	_IOW(RTIOC_TYPE_PWM, 0x00, struct gpiopwm)
++
++#define GPIOPWM_RTIOC_GET_CONFIG \
++	_IOR(RTIOC_TYPE_PWM, 0x10, struct gpiopwm)
++
++#define GPIOPWM_RTIOC_START \
++	_IO(RTIOC_TYPE_PWM, 0x20)
++
++#define GPIOPWM_RTIOC_STOP \
++	_IO(RTIOC_TYPE_PWM, 0x30)
++
++#define GPIOPWM_RTIOC_CHANGE_DUTY_CYCLE \
++	_IOW(RTIOC_TYPE_PWM, 0x40, unsigned int)
++
++
++#endif /* !_RTDM_UAPI_TESTING_H */
+--- linux/include/xenomai/rtdm/uapi/net.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/net.h	2021-04-07 16:01:28.455632411 +0800
+@@ -0,0 +1,75 @@
++/***
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2005-2011 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ *  As a special exception to the GNU General Public license, the RTnet
++ *  project allows you to use this header file in unmodified form to produce
++ *  application programs executing in user-space which use RTnet services by
++ *  normal system calls. The resulting executable will not be covered by the
++ *  GNU General Public License merely as a result of this header file use.
++ *  Instead, this header file use will be considered normal use of RTnet and
++ *  not a "derived work" in the sense of the GNU General Public License.
++ *
++ *  This exception does not apply when the application code is built as a
++ *  static or dynamically loadable portion of the Linux kernel nor does the
++ *  exception override other reasons justifying application of the GNU General
++ *  Public License.
++ *
++ *  This exception applies only to the code released by the RTnet project
++ *  under the name RTnet and bearing this exception notice. If you copy code
++ *  from other sources into a copy of RTnet, the exception does not apply to
++ *  the code that you add in this way.
++ *
++ */
++
++#ifndef _RTDM_UAPI_NET_H
++#define _RTDM_UAPI_NET_H
++
++/* sub-classes: RTDM_CLASS_NETWORK */
++#define RTDM_SUBCLASS_RTNET     0
++
++#define RTIOC_TYPE_NETWORK      RTDM_CLASS_NETWORK
++
++/* RTnet-specific IOCTLs */
++#define RTNET_RTIOC_XMITPARAMS  _IOW(RTIOC_TYPE_NETWORK, 0x10, unsigned int)
++#define RTNET_RTIOC_PRIORITY    RTNET_RTIOC_XMITPARAMS  /* legacy */
++#define RTNET_RTIOC_TIMEOUT     _IOW(RTIOC_TYPE_NETWORK, 0x11, int64_t)
++/* RTNET_RTIOC_CALLBACK         _IOW(RTIOC_TYPE_NETWORK, 0x12, ...
++ * IOCTL only usable inside the kernel. */
++/* RTNET_RTIOC_NONBLOCK         _IOW(RTIOC_TYPE_NETWORK, 0x13, unsigned int)
++ * This IOCTL is no longer supported (and it was buggy anyway).
++ * Use RTNET_RTIOC_TIMEOUT with any negative timeout value instead. */
++#define RTNET_RTIOC_EXTPOOL     _IOW(RTIOC_TYPE_NETWORK, 0x14, unsigned int)
++#define RTNET_RTIOC_SHRPOOL     _IOW(RTIOC_TYPE_NETWORK, 0x15, unsigned int)
++
++/* socket transmission priorities */
++#define SOCK_MAX_PRIO           0
++#define SOCK_DEF_PRIO           SOCK_MAX_PRIO + \
++				    (SOCK_MIN_PRIO-SOCK_MAX_PRIO+1)/2
++#define SOCK_MIN_PRIO           SOCK_NRT_PRIO - 1
++#define SOCK_NRT_PRIO           31
++
++/* socket transmission channels */
++#define SOCK_DEF_RT_CHANNEL     0           /* default rt xmit channel     */
++#define SOCK_DEF_NRT_CHANNEL    1           /* default non-rt xmit channel */
++#define SOCK_USER_CHANNEL       2           /* first user-defined channel  */
++
++/* argument construction for RTNET_RTIOC_XMITPARAMS */
++#define SOCK_XMIT_PARAMS(priority, channel) ((priority) | ((channel) << 16))
++
++#endif  /* !_RTDM_UAPI_NET_H */
+--- linux/include/xenomai/rtdm/uapi/can.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/can.h	2021-04-07 16:01:28.450632419 +0800
+@@ -0,0 +1,905 @@
++/**
++ * @file
++ * Real-Time Driver Model for RT-Socket-CAN, CAN device profile header
++ *
++ * @note Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * @note Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                         <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * This RTDM CAN device profile header is based on:
++ *
++ * include/linux/can.h, include/linux/socket.h, net/can/pf_can.h in
++ * linux-can.patch, a CAN socket framework for Linux
++ *
++ * Copyright (C) 2004, 2005,
++ * Robert Schwebel, Benedikt Spranger, Marc Kleine-Budde, Pengutronix
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ *
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTDM_UAPI_CAN_H
++#define _RTDM_UAPI_CAN_H
++
++/**
++ * @addtogroup rtdm_can
++ * @{
++ */
++
++#define RTCAN_PROFILE_VER  2
++
++#ifndef AF_CAN
++
++/** CAN address family */
++#define AF_CAN	29
++
++/** CAN protocol family */
++#define PF_CAN	AF_CAN
++
++#endif
++
++/** CAN socket levels
++ *
++ *  Used for @ref Sockopts for the particular protocols.
++ */
++#define SOL_CAN_RAW  103
++
++/** Type of CAN id (see @ref CAN_xxx_MASK and @ref CAN_xxx_FLAG) */
++typedef uint32_t can_id_t;
++typedef uint32_t canid_t;
++
++/** Type of CAN error mask */
++typedef can_id_t can_err_mask_t;
++
++/*!
++ * @anchor CAN_xxx_MASK @name CAN ID masks
++ * Bit masks for masking CAN IDs
++ * @{ */
++
++/** Bit mask for extended CAN IDs */
++#define CAN_EFF_MASK  0x1FFFFFFF
++
++/** Bit mask for standard CAN IDs */
++#define CAN_SFF_MASK  0x000007FF
++
++/** @} */
++
++/*!
++ * @anchor CAN_xxx_FLAG @name CAN ID flags
++ * Flags within a CAN ID indicating special CAN frame attributes
++ * @{ */
++/** Extended frame */
++#define CAN_EFF_FLAG  0x80000000
++/** Remote transmission frame */
++#define CAN_RTR_FLAG  0x40000000
++/** Error frame (see @ref Errors), not valid in struct can_filter */
++#define CAN_ERR_FLAG  0x20000000
++/** Invert CAN filter definition, only valid in struct can_filter */
++#define CAN_INV_FILTER CAN_ERR_FLAG
++
++/** @} */
++
++/*!
++ * @anchor CAN_PROTO @name Particular CAN protocols
++ * Possible protocols for the PF_CAN protocol family
++ *
++ * Currently only the RAW protocol is supported.
++ * @{ */
++/** Raw protocol of @c PF_CAN, applicable to socket type @c SOCK_RAW */
++#define CAN_RAW  1
++/** @} */
++
++#define CAN_BAUDRATE_UNKNOWN       ((uint32_t)-1)
++#define CAN_BAUDRATE_UNCONFIGURED  0
++
++/**
++ * Baudrate definition in bits per second
++ */
++typedef uint32_t can_baudrate_t;
++
++/**
++ * Supported CAN bit-time types
++ */
++enum CAN_BITTIME_TYPE {
++	/** Standard bit-time definition according to Bosch */
++	CAN_BITTIME_STD,
++	/** Hardware-specific BTR bit-time definition */
++	CAN_BITTIME_BTR
++};
++
++/**
++ * See @ref CAN_BITTIME_TYPE
++ */
++typedef enum CAN_BITTIME_TYPE can_bittime_type_t;
++
++/**
++ * Standard bit-time parameters according to Bosch
++ */
++struct can_bittime_std {
++	uint32_t brp;		/**< Baud rate prescaler */
++	uint8_t prop_seg;	/**< from 1 to 8 */
++	uint8_t phase_seg1;	/**< from 1 to 8 */
++	uint8_t phase_seg2;	/**< from 1 to 8 */
++	uint8_t sjw:7;		/**< from 1 to 4 */
++	uint8_t sam:1;		/**< 1 - enable triple sampling */
++};
++
++/**
++ * Hardware-specific BTR bit-times
++ */
++struct can_bittime_btr {
++
++	uint8_t btr0;		/**< Bus timing register 0 */
++	uint8_t btr1;		/**< Bus timing register 1 */
++};
++
++/**
++ * Custom CAN bit-time definition
++ */
++struct can_bittime {
++	/** Type of bit-time definition */
++	can_bittime_type_t type;
++
++	union {
++		/** Standard bit-time */
++		struct can_bittime_std std;
++		/** Hardware-spcific BTR bit-time */
++		struct can_bittime_btr btr;
++	};
++};
++
++/*!
++ * @anchor CAN_MODE @name CAN operation modes
++ * Modes into which CAN controllers can be set
++ * @{ */
++enum CAN_MODE {
++	/*! Set controller in Stop mode (no reception / transmission possible) */
++	CAN_MODE_STOP = 0,
++
++	/*! Set controller into normal operation. @n
++	 *  Coming from stopped mode or bus off, the controller begins with no
++	 *  errors in @ref CAN_STATE_ACTIVE. */
++	CAN_MODE_START,
++
++	/*! Set controller into Sleep mode. @n
++	 *  This is only possible if the controller is not stopped or bus-off. @n
++	 *  Notice that sleep mode will only be entered when there is no bus
++	 *  activity. If the controller detects bus activity while "sleeping"
++	 *  it will go into operating mode again. @n
++	 *  To actively leave sleep mode again trigger @c CAN_MODE_START. */
++	CAN_MODE_SLEEP
++};
++/** @} */
++
++/** See @ref CAN_MODE */
++typedef enum CAN_MODE can_mode_t;
++
++/*!
++ * @anchor CAN_CTRLMODE @name CAN controller modes
++ * Special CAN controllers modes, which can be or'ed together.
++ *
++ * @note These modes are hardware-dependent. Please consult the hardware
++ * manual of the CAN controller for more detailed information.
++ *
++ * @{ */
++
++/*! Listen-Only mode
++ *
++ *  In this mode the CAN controller would give no acknowledge to the CAN-bus,
++ *  even if a message is received successfully and messages would not be
++ *  transmitted. This mode might be useful for bus-monitoring, hot-plugging
++ *  or throughput analysis. */
++#define CAN_CTRLMODE_LISTENONLY 0x1
++
++/*! Loopback mode
++ *
++ * In this mode the CAN controller does an internal loop-back, a message is
++ * transmitted and simultaneously received. That mode can be used for self
++ * test operation. */
++#define CAN_CTRLMODE_LOOPBACK   0x2
++
++/*! Triple sampling mode
++ *
++ * In this mode the CAN controller uses Triple sampling. */
++#define CAN_CTRLMODE_3_SAMPLES  0x4
++
++/** @} */
++
++/** See @ref CAN_CTRLMODE */
++typedef int can_ctrlmode_t;
++
++/*!
++ * @anchor CAN_STATE @name CAN controller states
++ * States a CAN controller can be in.
++ * @{ */
++enum CAN_STATE {
++	/** CAN controller is error active */
++	CAN_STATE_ERROR_ACTIVE = 0,
++	/** CAN controller is active */
++	CAN_STATE_ACTIVE = 0,
++
++	/** CAN controller is error active, warning level is reached */
++	CAN_STATE_ERROR_WARNING = 1,
++	/** CAN controller is error active, warning level is reached */
++	CAN_STATE_BUS_WARNING = 1,
++
++	/** CAN controller is error passive */
++	CAN_STATE_ERROR_PASSIVE = 2,
++	/** CAN controller is error passive */
++	CAN_STATE_BUS_PASSIVE = 2,
++
++	/** CAN controller went into Bus Off */
++	CAN_STATE_BUS_OFF,
++
++	/** CAN controller is scanning to get the baudrate */
++	CAN_STATE_SCANNING_BAUDRATE,
++
++	/** CAN controller is in stopped mode */
++	CAN_STATE_STOPPED,
++
++	/** CAN controller is in Sleep mode */
++	CAN_STATE_SLEEPING,
++};
++/** @} */
++
++/** See @ref CAN_STATE */
++typedef enum CAN_STATE can_state_t;
++
++#define CAN_STATE_OPERATING(state) ((state) < CAN_STATE_BUS_OFF)
++
++/**
++ * Filter for reception of CAN messages.
++ *
++ * This filter works as follows:
++ * A received CAN ID is AND'ed bitwise with @c can_mask and then compared to
++ * @c can_id. This also includes the @ref CAN_EFF_FLAG and @ref CAN_RTR_FLAG
++ * of @ref CAN_xxx_FLAG. If this comparison is true, the message will be
++ * received by the socket. The logic can be inverted with the @c can_id flag
++ * @ref CAN_INV_FILTER :
++ *
++ * @code
++ * if (can_id & CAN_INV_FILTER) {
++ *    if ((received_can_id & can_mask) != (can_id & ~CAN_INV_FILTER))
++ *       accept-message;
++ * } else {
++ *    if ((received_can_id & can_mask) == can_id)
++ *       accept-message;
++ * }
++ * @endcode
++ *
++ * Multiple filters can be arranged in a filter list and set with
++ * @ref Sockopts. If one of these filters matches a CAN ID upon reception
++ * of a CAN frame, this frame is accepted.
++ *
++ */
++typedef struct can_filter {
++	/** CAN ID which must match with incoming IDs after passing the mask.
++	 *  The filter logic can be inverted with the flag @ref CAN_INV_FILTER. */
++	uint32_t can_id;
++
++	/** Mask which is applied to incoming IDs. See @ref CAN_xxx_MASK
++	 *  "CAN ID masks" if exactly one CAN ID should come through. */
++	uint32_t can_mask;
++} can_filter_t;
++
++/**
++ * Socket address structure for the CAN address family
++ */
++struct sockaddr_can {
++	/** CAN address family, must be @c AF_CAN */
++	sa_family_t can_family;
++
++	/** Interface index of CAN controller. See @ref SIOCGIFINDEX. */
++	int can_ifindex;
++};
++
++/**
++ * Raw CAN frame
++ *
++ * Central structure for receiving and sending CAN frames.
++ */
++typedef struct can_frame {
++	/** CAN ID of the frame
++	 *
++	 *  See @ref CAN_xxx_FLAG "CAN ID flags" for special bits.
++	 */
++	can_id_t can_id;
++
++	/** Size of the payload in bytes */
++	uint8_t can_dlc;
++
++	/** Payload data bytes */
++	uint8_t data[8] __attribute__ ((aligned(8)));
++} can_frame_t;
++
++/**
++ * CAN interface request descriptor
++ *
++ * Parameter block for submitting CAN control requests.
++ */
++struct can_ifreq {
++	union {
++		char	ifrn_name[IFNAMSIZ];
++	} ifr_ifrn;
++	
++	union {
++		struct can_bittime bittime;
++		can_baudrate_t baudrate;
++		can_ctrlmode_t ctrlmode;
++		can_mode_t mode;
++		can_state_t state;
++		int ifru_ivalue;
++	} ifr_ifru;
++};
++
++/*!
++ * @anchor RTCAN_TIMESTAMPS   @name Timestamp switches
++ * Arguments to pass to @ref RTCAN_RTIOC_TAKE_TIMESTAMP
++ * @{ */
++#define RTCAN_TAKE_NO_TIMESTAMPS	0  /**< Switch off taking timestamps */
++#define RTCAN_TAKE_TIMESTAMPS		1  /**< Do take timestamps */
++/** @} */
++
++#define RTIOC_TYPE_CAN  RTDM_CLASS_CAN
++
++/*!
++ * @anchor Rawsockopts @name RAW socket options
++ * Setting and getting CAN RAW socket options.
++ * @{ */
++
++/**
++ * CAN filter definition
++ *
++ * A CAN raw filter list with elements of struct can_filter can be installed
++ * with @c setsockopt. This list is used upon reception of CAN frames to
++ * decide whether the bound socket will receive a frame. An empty filter list
++ * can also be defined using optlen = 0, which is recommanded for write-only
++ * sockets.
++ * @n
++ * If the socket was already bound with @ref Bind, the old filter list
++ * gets replaced with the new one. Be aware that already received, but
++ * not read out CAN frames may stay in the socket buffer.
++ * @n
++ * @n
++ * @param [in] level @b SOL_CAN_RAW
++ *
++ * @param [in] optname @b CAN_RAW_FILTER
++ *
++ * @param [in] optval Pointer to array of struct can_filter.
++ *
++ * @param [in] optlen Size of filter list: count * sizeof( struct can_filter).
++ * @n
++ * @coretags{task-unrestricted}
++ * @n
++ * Specific return values:
++ * - -EFAULT (It was not possible to access user space memory area at the
++ *            specified address.)
++ * - -ENOMEM (Not enough memory to fulfill the operation)
++ * - -EINVAL (Invalid length "optlen")
++ * - -ENOSPC (No space to store filter list, check RT-Socket-CAN kernel
++ *            parameters)
++ * .
++ */
++#define CAN_RAW_FILTER		0x1
++
++/**
++ * CAN error mask
++ *
++ * A CAN error mask (see @ref Errors) can be set with @c setsockopt. This
++ * mask is then used to decide if error frames are delivered to this socket
++ * in case of error condidtions. The error frames are marked with the
++ * @ref CAN_ERR_FLAG of @ref CAN_xxx_FLAG and must be handled by the
++ * application properly. A detailed description of the errors can be
++ * found in the @c can_id and the @c data fields of struct can_frame
++ * (see @ref Errors for futher details).
++ *
++ * @n
++ * @param [in] level @b SOL_CAN_RAW
++ *
++ * @param [in] optname @b CAN_RAW_ERR_FILTER
++ *
++ * @param [in] optval Pointer to error mask of type can_err_mask_t.
++ *
++ * @param [in] optlen Size of error mask: sizeof(can_err_mask_t).
++ *
++ * @coretags{task-unrestricted}
++ * @n
++ * Specific return values:
++ * - -EFAULT (It was not possible to access user space memory area at the
++ *            specified address.)
++ * - -EINVAL (Invalid length "optlen")
++ * .
++ */
++#define CAN_RAW_ERR_FILTER	0x2
++
++/**
++ * CAN TX loopback
++ *
++ * The TX loopback to other local sockets can be selected with this
++ * @c setsockopt.
++ *
++ * @note The TX loopback feature must be enabled in the kernel and then
++ * the loopback to other local TX sockets is enabled by default.
++ *
++ * @n
++ * @param [in] level @b SOL_CAN_RAW
++ *
++ * @param [in] optname @b CAN_RAW_LOOPBACK
++ *
++ * @param [in] optval Pointer to integer value.
++ *
++ * @param [in] optlen Size of int: sizeof(int).
++ *
++ * @coretags{task-unrestricted}
++ * @n
++ * Specific return values:
++ * - -EFAULT (It was not possible to access user space memory area at the
++ *            specified address.)
++ * - -EINVAL (Invalid length "optlen")
++ * - -EOPNOTSUPP (not supported, check RT-Socket-CAN kernel parameters).
++ */
++#define CAN_RAW_LOOPBACK	0x3
++
++/**
++ * CAN receive own messages
++ *
++ * Not supported by RT-Socket-CAN, but defined for compatibility with
++ * Socket-CAN.
++ */
++#define CAN_RAW_RECV_OWN_MSGS   0x4
++
++/** @} */
++
++/*!
++ * @anchor CANIOCTLs @name IOCTLs
++ * CAN device IOCTLs
++ *
++ * @deprecated Passing \c struct \c ifreq as a request descriptor
++ * for CAN IOCTLs is still accepted for backward compatibility,
++ * however it is recommended to switch to \c struct \c can_ifreq at
++ * the first opportunity.
++ *
++ * @{ */
++
++/**
++ * Get CAN interface index by name
++ *
++ * @param [in,out] arg Pointer to interface request structure buffer
++ *                     (<TT>struct can_ifreq</TT>). If
++ *                     <TT>ifr_name</TT> holds a valid CAN interface
++ *                     name <TT>ifr_ifindex</TT> will be filled with
++ *                     the corresponding interface index.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ *
++ * @coretags{task-unrestricted}
++ */
++#ifdef DOXYGEN_CPP /* For Doxygen only, already defined by kernel headers */
++#define SIOCGIFINDEX defined_by_kernel_header_file
++#endif
++
++/**
++ * Set baud rate
++ *
++ * The baudrate must be specified in bits per second. The driver will
++ * try to calculate resonable CAN bit-timing parameters. You can use
++ * @ref SIOCSCANCUSTOMBITTIME to set custom bit-timing.
++ *
++ * @param [in] arg Pointer to interface request structure buffer
++ *                 (<TT>struct can_ifreq</TT>).
++ *                 <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                 <TT>ifr_ifru</TT> must be filled with an instance of
++ *                 @ref can_baudrate_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t.
++ * - -EDOM  : Baud rate not possible.
++ * - -EAGAIN: Request could not be successully fulfilled. Try again.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ *
++ * @note Setting the baud rate is a configuration task. It should
++ * be done deliberately or otherwise CAN messages will likely be lost.
++ */
++#define SIOCSCANBAUDRATE	_IOW(RTIOC_TYPE_CAN, 0x01, struct can_ifreq)
++
++/**
++ * Get baud rate
++ *
++ * @param [in,out] arg Pointer to interface request structure buffer
++ *                    (<TT>struct can_ifreq</TT>).
++ *                    <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                    <TT>ifr_ifru</TT> will be filled with an instance of
++ *                    @ref can_baudrate_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No baud rate was set yet.
++ *
++ * @coretags{task-unrestricted}
++ */
++#define SIOCGCANBAUDRATE	_IOWR(RTIOC_TYPE_CAN, 0x02, struct can_ifreq)
++
++/**
++ * Set custom bit time parameter
++ *
++ * Custem-bit time could be defined in various formats (see
++ * struct can_bittime).
++ *
++ * @param [in] arg Pointer to interface request structure buffer
++ *                 (<TT>struct can_ifreq</TT>).
++ *                 <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                 <TT>ifr_ifru</TT> must be filled with an instance of
++ *                 struct can_bittime.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t.
++ * - -EAGAIN: Request could not be successully fulfilled. Try again.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ *
++ * @note Setting the bit-time is a configuration task. It should
++ * be done deliberately or otherwise CAN messages will likely be lost.
++ */
++#define SIOCSCANCUSTOMBITTIME	_IOW(RTIOC_TYPE_CAN, 0x03, struct can_ifreq)
++
++/**
++ * Get custom bit-time parameters
++ *
++ * @param [in,out] arg Pointer to interface request structure buffer
++ *                    (<TT>struct can_ifreq</TT>).
++ *                    <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                    <TT>ifr_ifru</TT> will be filled with an instance of
++ *                    struct can_bittime.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No baud rate was set yet.
++ *
++ * @coretags{task-unrestricted}
++ */
++#define SIOCGCANCUSTOMBITTIME	_IOWR(RTIOC_TYPE_CAN, 0x04, struct can_ifreq)
++
++/**
++ * Set operation mode of CAN controller
++ *
++ * See @ref CAN_MODE "CAN controller modes" for available modes.
++ *
++ * @param [in] arg Pointer to interface request structure buffer
++ *                 (<TT>struct can_ifreq</TT>).
++ *                 <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                 <TT>ifr_ifru</TT> must be filled with an instance of
++ *                 @ref can_mode_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EAGAIN: (@ref CAN_MODE_START, @ref CAN_MODE_STOP) Could not successfully
++ *            set mode, hardware is busy. Try again.
++ * - -EINVAL: (@ref CAN_MODE_START) Cannot start controller,
++ *            set baud rate first.
++ * - -ENETDOWN: (@ref CAN_MODE_SLEEP) Cannot go into sleep mode because
++		controller is stopped or bus off.
++ * - -EOPNOTSUPP: unknown mode
++ *
++ * @coretags{task-unrestricted, might-switch}
++ *
++ * @note Setting a CAN controller into normal operation after a bus-off can
++ * take some time (128 occurrences of 11 consecutive recessive bits).
++ * In such a case, although this IOCTL will return immediately with success
++ * and @ref SIOCGCANSTATE will report @ref CAN_STATE_ACTIVE,
++ * bus-off recovery may still be in progress. @n
++ * If a controller is bus-off, setting it into stop mode will return no error
++ * but the controller remains bus-off.
++ */
++#define SIOCSCANMODE		_IOW(RTIOC_TYPE_CAN, 0x05, struct can_ifreq)
++
++/**
++ * Get current state of CAN controller
++ *
++ * States are divided into main states and additional error indicators. A CAN
++ * controller is always in exactly one main state. CAN bus errors are
++ * registered by the CAN hardware and collected by the driver. There is one
++ * error indicator (bit) per error type. If this IOCTL is triggered the error
++ * types which occured since the last call of this IOCTL are reported and
++ * thereafter the error indicators are cleared. See also
++ * @ref CAN_STATE "CAN controller states".
++ *
++ * @param [in,out] arg Pointer to interface request structure buffer
++ *                    (<TT>struct can_ifreq</TT>).
++ *                    <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                    <TT>ifr_ifru</TT> will be filled with an instance of
++ *                    @ref can_mode_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++#define SIOCGCANSTATE		_IOWR(RTIOC_TYPE_CAN, 0x06, struct can_ifreq)
++
++/**
++ * Set special controller modes
++ *
++ * Various special controller modes could be or'ed together (see
++ * @ref CAN_CTRLMODE for further information).
++ *
++ * @param [in] arg Pointer to interface request structure buffer
++ *                 (<TT>struct can_ifreq</TT>).
++ *                 <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                 <TT>ifr_ifru</TT> must be filled with an instance of
++ *                 @ref can_ctrlmode_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No valid baud rate, see @ref can_baudrate_t.
++ * - -EAGAIN: Request could not be successully fulfilled. Try again.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ *
++ * @note Setting special controller modes is a configuration task. It should
++ * be done deliberately or otherwise CAN messages will likely be lost.
++ */
++#define SIOCSCANCTRLMODE	_IOW(RTIOC_TYPE_CAN, 0x07, struct can_ifreq)
++
++/**
++ * Get special controller modes
++ *
++ *
++ * @param [in] arg Pointer to interface request structure buffer
++ *                 (<TT>struct can_ifreq</TT>).
++ *                 <TT>ifr_name</TT> must hold a valid CAN interface name,
++ *                 <TT>ifr_ifru</TT> must be filled with an instance of
++ *                 @ref can_ctrlmode_t.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ * - -ENODEV: No device with specified name exists.
++ * - -EINVAL: No baud rate was set yet.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++#define SIOCGCANCTRLMODE	_IOWR(RTIOC_TYPE_CAN, 0x08, struct can_ifreq)
++
++/**
++ * Enable or disable storing a high precision timestamp upon reception of
++ * a CAN frame.
++ *
++ * A newly created socket takes no timestamps by default.
++ *
++ * @param [in] arg int variable, see @ref RTCAN_TIMESTAMPS "Timestamp switches"
++ *
++ * @return 0 on success.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note Activating taking timestamps only has an effect on newly received
++ * CAN messages from the bus. Frames that already are in the socket buffer do
++ * not have timestamps if it was deactivated before. See @ref Recv "Receive"
++ * for more details.
++ */
++#define RTCAN_RTIOC_TAKE_TIMESTAMP _IOW(RTIOC_TYPE_CAN, 0x09, int)
++
++/**
++ * Specify a reception timeout for a socket
++ *
++ * Defines a timeout for all receive operations via a
++ * socket which will take effect when one of the @ref Recv "receive functions"
++ * is called without the @c MSG_DONTWAIT flag set.
++ *
++ * The default value for a newly created socket is an infinite timeout.
++ *
++ * @note The setting of the timeout value is not done atomically to avoid
++ * locks. Please set the value before receiving messages from the socket.
++ *
++ * @param [in] arg Pointer to @ref nanosecs_rel_t variable. The value is
++ *                interpreted as relative timeout in nanoseconds in case
++ *                of a positive value.
++ *                See @ref RTDM_TIMEOUT_xxx "Timeouts" for special timeouts.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ *
++ * @coretags{task-unrestricted}
++ */
++#define RTCAN_RTIOC_RCV_TIMEOUT	_IOW(RTIOC_TYPE_CAN, 0x0A, nanosecs_rel_t)
++
++/**
++ * Specify a transmission timeout for a socket
++ *
++ * Defines a timeout for all send operations via a
++ * socket which will take effect when one of the @ref Send "send functions"
++ * is called without the @c MSG_DONTWAIT flag set.
++ *
++ * The default value for a newly created socket is an infinite timeout.
++ *
++ * @note The setting of the timeout value is not done atomically to avoid
++ * locks. Please set the value before sending messages to the socket.
++ *
++ * @param [in] arg Pointer to @ref nanosecs_rel_t variable. The value is
++ *                interpreted as relative timeout in nanoseconds in case
++ *                of a positive value.
++ *                See @ref RTDM_TIMEOUT_xxx "Timeouts" for special timeouts.
++ *
++ * @return 0 on success, otherwise:
++ * - -EFAULT: It was not possible to access user space memory area at the
++ *            specified address.
++ *
++ * @coretags{task-unrestricted}
++ */
++#define RTCAN_RTIOC_SND_TIMEOUT	_IOW(RTIOC_TYPE_CAN, 0x0B, nanosecs_rel_t)
++/** @} */
++
++#define CAN_ERR_DLC  8	/* dlc for error frames */
++
++/*!
++ * @anchor Errors @name Error mask
++ * Error class (mask) in @c can_id field of struct can_frame to
++ * be used with @ref CAN_RAW_ERR_FILTER.
++ *
++ * @b Note: Error reporting is hardware dependent and most CAN controllers
++ * report less detailed error conditions than the SJA1000.
++ *
++ * @b Note: In case of a bus-off error condition (@ref CAN_ERR_BUSOFF), the
++ * CAN controller is @b not restarted automatically. It is the application's
++ * responsibility to react appropriately, e.g. calling @ref CAN_MODE_START.
++ *
++ * @b Note: Bus error interrupts (@ref CAN_ERR_BUSERROR) are enabled when an
++ * application is calling a @ref Recv function on a socket listening
++ * on bus errors (using @ref CAN_RAW_ERR_FILTER). After one bus error has
++ * occured, the interrupt will be disabled to allow the application time for
++ * error processing and to efficiently avoid bus error interrupt flooding.
++ * @{ */
++
++/** TX timeout (netdevice driver) */
++#define CAN_ERR_TX_TIMEOUT	0x00000001U
++
++/** Lost arbitration (see @ref Error0 "data[0]") */
++#define CAN_ERR_LOSTARB		0x00000002U
++
++/** Controller problems (see @ref Error1 "data[1]") */
++#define CAN_ERR_CRTL		0x00000004U
++
++/** Protocol violations (see @ref Error2 "data[2]",
++			     @ref Error3 "data[3]") */
++#define CAN_ERR_PROT		0x00000008U
++
++/** Transceiver status (see @ref Error4 "data[4]")    */
++#define CAN_ERR_TRX		0x00000010U
++
++/** Received no ACK on transmission */
++#define CAN_ERR_ACK		0x00000020U
++
++/** Bus off */
++#define CAN_ERR_BUSOFF		0x00000040U
++
++/** Bus error (may flood!) */
++#define CAN_ERR_BUSERROR	0x00000080U
++
++/** Controller restarted */
++#define CAN_ERR_RESTARTED	0x00000100U
++
++/** Omit EFF, RTR, ERR flags */
++#define CAN_ERR_MASK		0x1FFFFFFFU
++
++/** @} */
++
++/*!
++ * @anchor Error0 @name Arbitration lost error
++ * Error in the data[0] field of struct can_frame.
++ * @{ */
++/* arbitration lost in bit ... / data[0] */
++#define CAN_ERR_LOSTARB_UNSPEC	0x00 /**< unspecified */
++				     /**< else bit number in bitstream */
++/** @} */
++
++/*!
++ * @anchor Error1 @name Controller problems
++ * Error in the data[1] field of struct can_frame.
++ * @{ */
++/* error status of CAN-controller / data[1] */
++#define CAN_ERR_CRTL_UNSPEC	 0x00 /**< unspecified */
++#define CAN_ERR_CRTL_RX_OVERFLOW 0x01 /**< RX buffer overflow */
++#define CAN_ERR_CRTL_TX_OVERFLOW 0x02 /**< TX buffer overflow */
++#define CAN_ERR_CRTL_RX_WARNING	 0x04 /**< reached warning level for RX errors */
++#define CAN_ERR_CRTL_TX_WARNING	 0x08 /**< reached warning level for TX errors */
++#define CAN_ERR_CRTL_RX_PASSIVE	 0x10 /**< reached passive level for RX errors */
++#define CAN_ERR_CRTL_TX_PASSIVE	 0x20 /**< reached passive level for TX errors */
++/** @} */
++
++/*!
++ * @anchor Error2 @name Protocol error type
++ * Error in the data[2] field of struct can_frame.
++ * @{ */
++/* error in CAN protocol (type) / data[2] */
++#define CAN_ERR_PROT_UNSPEC	0x00 /**< unspecified */
++#define CAN_ERR_PROT_BIT	0x01 /**< single bit error */
++#define CAN_ERR_PROT_FORM	0x02 /**< frame format error */
++#define CAN_ERR_PROT_STUFF	0x04 /**< bit stuffing error */
++#define CAN_ERR_PROT_BIT0	0x08 /**< unable to send dominant bit */
++#define CAN_ERR_PROT_BIT1	0x10 /**< unable to send recessive bit */
++#define CAN_ERR_PROT_OVERLOAD	0x20 /**< bus overload */
++#define CAN_ERR_PROT_ACTIVE	0x40 /**< active error announcement */
++#define CAN_ERR_PROT_TX		0x80 /**< error occured on transmission */
++/** @} */
++
++/*!
++ * @anchor Error3 @name Protocol error location
++ * Error in the data[3] field of struct can_frame.
++ * @{ */
++/* error in CAN protocol (location) / data[3] */
++#define CAN_ERR_PROT_LOC_UNSPEC	 0x00 /**< unspecified */
++#define CAN_ERR_PROT_LOC_SOF	 0x03 /**< start of frame */
++#define CAN_ERR_PROT_LOC_ID28_21 0x02 /**< ID bits 28 - 21 (SFF: 10 - 3) */
++#define CAN_ERR_PROT_LOC_ID20_18 0x06 /**< ID bits 20 - 18 (SFF: 2 - 0 )*/
++#define CAN_ERR_PROT_LOC_SRTR	 0x04 /**< substitute RTR (SFF: RTR) */
++#define CAN_ERR_PROT_LOC_IDE	 0x05 /**< identifier extension */
++#define CAN_ERR_PROT_LOC_ID17_13 0x07 /**< ID bits 17-13 */
++#define CAN_ERR_PROT_LOC_ID12_05 0x0F /**< ID bits 12-5 */
++#define CAN_ERR_PROT_LOC_ID04_00 0x0E /**< ID bits 4-0 */
++#define CAN_ERR_PROT_LOC_RTR	 0x0C /**< RTR */
++#define CAN_ERR_PROT_LOC_RES1	 0x0D /**< reserved bit 1 */
++#define CAN_ERR_PROT_LOC_RES0	 0x09 /**< reserved bit 0 */
++#define CAN_ERR_PROT_LOC_DLC	 0x0B /**< data length code */
++#define CAN_ERR_PROT_LOC_DATA	 0x0A /**< data section */
++#define CAN_ERR_PROT_LOC_CRC_SEQ 0x08 /**< CRC sequence */
++#define CAN_ERR_PROT_LOC_CRC_DEL 0x18 /**< CRC delimiter */
++#define CAN_ERR_PROT_LOC_ACK	 0x19 /**< ACK slot */
++#define CAN_ERR_PROT_LOC_ACK_DEL 0x1B /**< ACK delimiter */
++#define CAN_ERR_PROT_LOC_EOF	 0x1A /**< end of frame */
++#define CAN_ERR_PROT_LOC_INTERM	 0x12 /**< intermission */
++/** @} */
++
++/*!
++ * @anchor Error4 @name Protocol error location
++ * Error in the data[4] field of struct can_frame.
++ * @{ */
++/* error status of CAN-transceiver / data[4] */
++/*                                               CANH CANL */
++#define CAN_ERR_TRX_UNSPEC		0x00 /**< 0000 0000 */
++#define CAN_ERR_TRX_CANH_NO_WIRE	0x04 /**< 0000 0100 */
++#define CAN_ERR_TRX_CANH_SHORT_TO_BAT	0x05 /**< 0000 0101 */
++#define CAN_ERR_TRX_CANH_SHORT_TO_VCC	0x06 /**< 0000 0110 */
++#define CAN_ERR_TRX_CANH_SHORT_TO_GND	0x07 /**< 0000 0111 */
++#define CAN_ERR_TRX_CANL_NO_WIRE	0x40 /**< 0100 0000 */
++#define CAN_ERR_TRX_CANL_SHORT_TO_BAT	0x50 /**< 0101 0000 */
++#define CAN_ERR_TRX_CANL_SHORT_TO_VCC	0x60 /**< 0110 0000 */
++#define CAN_ERR_TRX_CANL_SHORT_TO_GND	0x70 /**< 0111 0000 */
++#define CAN_ERR_TRX_CANL_SHORT_TO_CANH	0x80 /**< 1000 0000 */
++/** @} */
++
++/** @} */
++
++#endif /* !_RTDM_UAPI_CAN_H */
+--- linux/include/xenomai/rtdm/uapi/autotune.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/autotune.h	2021-04-07 16:01:28.445632426 +0800
+@@ -0,0 +1,40 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _RTDM_UAPI_AUTOTUNE_H
++#define _RTDM_UAPI_AUTOTUNE_H
++
++#include <linux/types.h>
++
++#define RTDM_CLASS_AUTOTUNE		RTDM_CLASS_MISC
++#define RTDM_SUBCLASS_AUTOTUNE		0
++
++struct autotune_setup {
++	__u32 period;
++	__u32 quiet;
++};
++
++#define AUTOTUNE_RTIOC_IRQ		_IOW(RTDM_CLASS_AUTOTUNE, 0, struct autotune_setup)
++#define AUTOTUNE_RTIOC_KERN		_IOW(RTDM_CLASS_AUTOTUNE, 1, struct autotune_setup)
++#define AUTOTUNE_RTIOC_USER		_IOW(RTDM_CLASS_AUTOTUNE, 2, struct autotune_setup)
++#define AUTOTUNE_RTIOC_PULSE		_IOW(RTDM_CLASS_AUTOTUNE, 3, __u64)
++#define AUTOTUNE_RTIOC_RUN		_IOR(RTDM_CLASS_AUTOTUNE, 4, __u32)
++#define AUTOTUNE_RTIOC_RESET		_IO(RTDM_CLASS_AUTOTUNE, 5)
++
++#endif /* !_RTDM_UAPI_AUTOTUNE_H */
+--- linux/include/xenomai/rtdm/uapi/ipc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/ipc.h	2021-04-07 16:01:28.440632433 +0800
+@@ -0,0 +1,881 @@
++/**
++ * @file
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++
++#ifndef _RTDM_UAPI_IPC_H
++#define _RTDM_UAPI_IPC_H
++
++/**
++ * @ingroup rtdm_profiles
++ * @defgroup rtdm_ipc Real-time IPC
++ *
++ * @b Profile @b Revision: 1
++ * @n
++ * @n
++ * @par Device Characteristics
++ * @n
++ * @ref rtdm_driver_flags "Device Flags": @c RTDM_PROTOCOL_DEVICE @n
++ * @n
++ * @ref rtdm_driver.protocol_family "Protocol Family": @c PF_RTIPC @n
++ * @n
++ * @ref rtdm_driver.socket_type "Socket Type": @c SOCK_DGRAM @n
++ * @n
++ * @ref rtdm_driver_profile "Device Class": @c RTDM_CLASS_RTIPC @n
++ * @n
++ * @{
++ *
++ * @anchor rtipc_operations @name Supported operations
++ * Standard socket operations supported by the RTIPC protocols.
++ * @{
++ */
++
++/** Create an endpoint for communication in the AF_RTIPC domain.
++ *
++ * @param[in] domain The communication domain. Must be AF_RTIPC.
++ *
++ * @param[in] type The socket type. Must be SOCK_DGRAM.
++ *
++ * @param [in] protocol Any of @ref IPCPROTO_XDDP, @ref IPCPROTO_IDDP,
++ * or @ref IPCPROTO_BUFP. @ref IPCPROTO_IPC is also valid, and refers
++ * to the default RTIPC protocol, namely @ref IPCPROTO_IDDP.
++ *
++ * @return In addition to the standard error codes for @c socket(2),
++ * the following specific error code may be returned:
++ * - -ENOPROTOOPT (Protocol is known, but not compiled in the RTIPC driver).
++ *   See @ref RTIPC_PROTO "RTIPC protocols"
++ *   for available protocols.
++ *
++ * @par Calling context:
++ * non-RT
++ */
++#ifdef DOXYGEN_CPP
++int socket__AF_RTIPC(int domain =AF_RTIPC, int type =SOCK_DGRAM, int protocol);
++#endif
++
++/**
++ * Close a RTIPC socket descriptor.
++ *
++ * Blocking calls to any of the @ref sendmsg__AF_RTIPC "sendmsg" or @ref
++ * recvmsg__AF_RTIPC "recvmsg" functions will be unblocked when the socket
++ * is closed and return with an error.
++ *
++ * @param[in] sockfd The socket descriptor to close.
++ *
++ * @return In addition to the standard error codes for @c close(2),
++ * the following specific error code may be returned:
++ * none
++ *
++ * @par Calling context:
++ * non-RT
++ */
++#ifdef DOXYGEN_CPP
++int close__AF_RTIPC(int sockfd);
++#endif
++
++/**
++ * Bind a RTIPC socket to a port.
++ *
++ * Bind the socket to a destination port.
++ *
++ * @param[in] sockfd The RTDM file descriptor obtained from the socket
++ * creation call.
++ *
++ * @param [in] addr The address to bind the socket to (see struct
++ * sockaddr_ipc). The meaning of such address depends on the RTIPC
++ * protocol in use for the socket:
++ *
++ * - IPCPROTO_XDDP
++ *
++ *   This action creates an endpoint for channelling traffic between
++ *   the Xenomai and Linux domains.
++ *
++ *   @em sipc_family must be AF_RTIPC, @em sipc_port is either -1,
++ *   or a valid free port number between 0 and
++ *   CONFIG_XENO_OPT_PIPE_NRDEV-1.
++ *
++ *   If @em sipc_port is -1, a free port will be assigned automatically.
++ *
++ *   Upon success, the pseudo-device /dev/rtp@em N will be reserved
++ *   for this communication channel, where @em N is the assigned port
++ *   number. The non real-time side shall open this device to exchange
++ *   data over the bound socket.
++ *
++ * @anchor xddp_label_binding
++ *   If a label was assigned (see @ref XDDP_LABEL) prior to
++ *   binding the socket to a port, a registry link referring to the
++ *   created pseudo-device will be automatically set up as
++ *   @c /proc/xenomai/registry/rtipc/xddp/@em label, where @em label is the
++ *   label string passed to setsockopt() for the @ref XDDP_LABEL option.
++ *
++ * - IPCPROTO_IDDP
++ *
++ *   This action creates an endpoint for exchanging datagrams within
++ *   the Xenomai domain.
++ *
++ *   @em sipc_family must be AF_RTIPC, @em sipc_port is either -1,
++ *   or a valid free port number between 0 and
++ *   CONFIG_XENO_OPT_IDDP_NRPORT-1.
++ *
++ *   If @em sipc_port is -1, a free port will be assigned
++ *   automatically. The real-time peer shall connect to the same port
++ *   for exchanging data over the bound socket.
++ *
++ * @anchor iddp_label_binding
++ *   If a label was assigned (see @ref IDDP_LABEL) prior to binding
++ *   the socket to a port, a registry link referring to the assigned
++ *   port number will be automatically set up as @c
++ *   /proc/xenomai/registry/rtipc/iddp/@em label, where @em label is
++ *   the label string passed to setsockopt() for the @ref IDDP_LABEL
++ *   option.
++ *
++ * - IPCPROTO_BUFP
++ *
++ *   This action creates an endpoint for a one-way byte
++ *   stream within the Xenomai domain.
++ *
++ *   @em sipc_family must be AF_RTIPC, @em sipc_port is either -1,
++ *   or a valid free port number between 0 and CONFIG_XENO_OPT_BUFP_NRPORT-1.
++ *
++ *   If @em sipc_port is -1, an available port will be assigned
++ *   automatically. The real-time peer shall connect to the same port
++ *   for exchanging data over the bound socket.
++ *
++ * @anchor bufp_label_binding
++ *   If a label was assigned (see @ref BUFP_LABEL) prior to binding
++ *   the socket to a port, a registry link referring to the assigned
++ *   port number will be automatically set up as @c
++ *   /proc/xenomai/registry/rtipc/bufp/@em label, where @em label is
++ *   the label string passed to setsockopt() for the @a BUFP_LABEL
++ *   option.
++ *
++ * @param[in] addrlen The size in bytes of the structure pointed to by
++ * @a addr.
++ *
++ * @return In addition to the standard error codes for @c
++ * bind(2), the following specific error code may be returned:
++ *   - -EFAULT (Invalid data address given)
++ *   - -ENOMEM (Not enough memory)
++ *   - -EINVAL (Invalid parameter)
++ *   - -EADDRINUSE (Socket already bound to a port, or no port available)
++ *   - -EAGAIN (no registry slot available, check/raise
++ *     CONFIG_XENO_OPT_REGISTRY_NRSLOTS) .
++ *
++ * @par Calling context:
++ * non-RT
++ */
++#ifdef DOXYGEN_CPP
++int bind__AF_RTIPC(int sockfd, const struct sockaddr_ipc *addr,
++		   socklen_t addrlen);
++#endif
++
++/**
++ * Initiate a connection on a RTIPC socket.
++ *
++ * @param[in] sockfd The RTDM file descriptor obtained from the socket
++ * creation call.
++ *
++ * @param [in] addr The address to connect the socket to (see struct
++ * sockaddr_ipc).
++ *
++ * - If sipc_port is a valid port for the protocol, it is used
++ * verbatim and the connection succeeds immediately, regardless of
++ * whether the destination is bound at the time of the call.
++ *
++ * - If sipc_port is -1 and a label was assigned to the socket,
++ * connect() blocks for the requested amount of time (see @ref
++ * SO_RCVTIMEO) until a socket is bound to the same label via @c
++ * bind(2) (see @ref XDDP_LABEL, @ref IDDP_LABEL, @ref BUFP_LABEL), in
++ * which case a connection is established between both endpoints.
++ *
++ * - If sipc_port is -1 and no label was assigned to the socket, the
++ * default destination address is cleared, meaning that any subsequent
++ * write to the socket will return -EDESTADDRREQ, until a valid
++ * destination address is set via @c connect(2) or @c bind(2).
++ *
++ * @param[in] addrlen The size in bytes of the structure pointed to by
++ * @a addr.
++ *
++ * @return In addition to the standard error codes for @c connect(2),
++ * the following specific error code may be returned:
++ * none.
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#ifdef DOXYGEN_CPP
++int connect__AF_RTIPC(int sockfd, const struct sockaddr_ipc *addr,
++		      socklen_t addrlen);
++#endif
++
++/**
++ * Set options on RTIPC sockets.
++ *
++ * These functions allow to set various socket options.
++ * Supported Levels and Options:
++ *
++ * - Level @ref sockopts_socket "SOL_SOCKET"
++ * - Level @ref sockopts_xddp "SOL_XDDP"
++ * - Level @ref sockopts_iddp "SOL_IDDP"
++ * - Level @ref sockopts_bufp "SOL_BUFP"
++ * .
++ *
++ * @return In addition to the standard error codes for @c
++ * setsockopt(2), the following specific error code may
++ * be returned:
++ * follow the option links above.
++ *
++ * @par Calling context:
++ * non-RT
++ */
++#ifdef DOXYGEN_CPP
++int setsockopt__AF_RTIPC(int sockfd, int level, int optname,
++			 const void *optval, socklen_t optlen);
++#endif
++/**
++ * Get options on RTIPC sockets.
++ *
++ * These functions allow to get various socket options.
++ * Supported Levels and Options:
++ *
++ * - Level @ref sockopts_socket "SOL_SOCKET"
++ * - Level @ref sockopts_xddp "SOL_XDDP"
++ * - Level @ref sockopts_iddp "SOL_IDDP"
++ * - Level @ref sockopts_bufp "SOL_BUFP"
++ * .
++ *
++ * @return In addition to the standard error codes for @c
++ * getsockopt(2), the following specific error code may
++ * be returned:
++ * follow the option links above.
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#ifdef DOXYGEN_CPP
++int getsockopt__AF_RTIPC(int sockfd, int level, int optname,
++			 void *optval, socklen_t *optlen);
++#endif
++
++/**
++ * Send a message on a RTIPC socket.
++ *
++ * @param[in] sockfd The RTDM file descriptor obtained from the socket
++ * creation call.
++ *
++ * @param[in] msg The address of the message header conveying the
++ * datagram.
++ *
++ * @param [in] flags Operation flags:
++ *
++ * - MSG_OOB Send out-of-band message.  For all RTIPC protocols except
++ *   @ref IPCPROTO_BUFP, sending out-of-band data actually means
++ *   pushing them to the head of the receiving queue, so that the
++ *   reader will always receive them before normal messages. @ref
++ *   IPCPROTO_BUFP does not support out-of-band sending.
++ *
++ * - MSG_DONTWAIT Non-blocking I/O operation. The caller will not be
++ *   blocked whenever the message cannot be sent immediately at the
++ *   time of the call (e.g. memory shortage), but will rather return
++ *   with -EWOULDBLOCK. Unlike other RTIPC protocols, @ref
++ *   IPCPROTO_XDDP accepts but never considers MSG_DONTWAIT since
++ *   writing to a real-time XDDP endpoint is inherently a non-blocking
++ *   operation.
++ *
++ * - MSG_MORE Accumulate data before sending. This flag is accepted by
++ *   the @ref IPCPROTO_XDDP protocol only, and tells the send service
++ *   to accumulate the outgoing data into an internal streaming
++ *   buffer, instead of issuing a datagram immediately for it. See
++ *   @ref XDDP_BUFSZ for more.
++ *
++ * @note No RTIPC protocol allows for short writes, and only complete
++ * messages are sent to the peer.
++ *
++ * @return In addition to the standard error codes for @c sendmsg(2),
++ * the following specific error code may be returned:
++ * none.
++ *
++ * @par Calling context:
++ * RT
++ */
++#ifdef DOXYGEN_CPP
++ssize_t sendmsg__AF_RTIPC(int sockfd, const struct msghdr *msg, int flags);
++#endif
++
++/**
++ * Receive a message from a RTIPC socket.
++ *
++ * @param[in] sockfd The RTDM file descriptor obtained from the socket
++ * creation call.
++ *
++ * @param[out] msg The address the message header will be copied at.
++ *
++ * @param [in] flags Operation flags:
++ *
++ * - MSG_DONTWAIT Non-blocking I/O operation. The caller will not be
++ *   blocked whenever no message is immediately available for receipt
++ *   at the time of the call, but will rather return with
++ *   -EWOULDBLOCK.
++ *
++ * @note @ref IPCPROTO_BUFP does not allow for short reads and always
++ * returns the requested amount of bytes, except in one situation:
++ * whenever some writer is waiting for sending data upon a buffer full
++ * condition, while the caller would have to wait for receiving a
++ * complete message.  This is usually the sign of a pathological use
++ * of the BUFP socket, like defining an incorrect buffer size via @ref
++ * BUFP_BUFSZ. In that case, a short read is allowed to prevent a
++ * deadlock.
++ *
++ * @return In addition to the standard error codes for @c recvmsg(2),
++ * the following specific error code may be returned:
++ * none.
++ *
++ * @par Calling context:
++ * RT
++ */
++#ifdef DOXYGEN_CPP
++ssize_t recvmsg__AF_RTIPC(int sockfd, struct msghdr *msg, int flags);
++#endif
++
++/**
++ * Get socket name.
++ *
++ * The name of the local endpoint for the socket is copied back (see
++ * struct sockaddr_ipc).
++ *
++ * @return In addition to the standard error codes for @c getsockname(2),
++ * the following specific error code may be returned:
++ * none.
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#ifdef DOXYGEN_CPP
++int getsockname__AF_RTIPC(int sockfd, struct sockaddr_ipc *addr, socklen_t *addrlen);
++#endif
++
++/**
++ * Get socket peer.
++ *
++ * The name of the remote endpoint for the socket is copied back (see
++ * struct sockaddr_ipc). This is the default destination address for
++ * messages sent on the socket. It can be set either explicitly via @c
++ * connect(2), or implicitly via @c bind(2) if no @c connect(2) was
++ * called prior to binding the socket to a port, in which case both
++ * the local and remote names are equal.
++ *
++ * @return In addition to the standard error codes for @c getpeername(2),
++ * the following specific error code may be returned:
++ * none.
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#ifdef DOXYGEN_CPP
++int getpeername__AF_RTIPC(int sockfd, struct sockaddr_ipc *addr, socklen_t *addrlen);
++#endif
++
++/** @} */
++
++#include <cobalt/uapi/kernel/types.h>
++#include <cobalt/uapi/kernel/pipe.h>
++#include <rtdm/rtdm.h>
++
++/* Address family */
++#define AF_RTIPC		111
++
++/* Protocol family */
++#define PF_RTIPC		AF_RTIPC
++
++/**
++ * @anchor RTIPC_PROTO @name RTIPC protocol list
++ * protocols for the PF_RTIPC protocol family
++ *
++ * @{ */
++enum {
++/** Default protocol (IDDP) */
++	IPCPROTO_IPC  = 0,
++/**
++ * Cross-domain datagram protocol (RT <-> non-RT).
++ *
++ * Real-time Xenomai threads and regular Linux threads may want to
++ * exchange data in a way that does not require the former to leave
++ * the real-time domain (i.e. primary mode). The RTDM-based XDDP
++ * protocol is available for this purpose.
++ *
++ * On the Linux domain side, pseudo-device files named /dev/rtp@em \<minor\>
++ * give regular POSIX threads access to non real-time communication
++ * endpoints, via the standard character-based I/O interface. On the
++ * Xenomai domain side, sockets may be bound to XDDP ports, which act
++ * as proxies to send and receive data to/from the associated
++ * pseudo-device files. Ports and pseudo-device minor numbers are
++ * paired, meaning that e.g. socket port 7 will proxy the traffic to/from
++ * /dev/rtp7.
++ *
++ * All data sent through a bound/connected XDDP socket via @c
++ * sendto(2) or @c write(2) will be passed to the peer endpoint in the
++ * Linux domain, and made available for reading via the standard @c
++ * read(2) system call. Conversely, all data sent using @c write(2)
++ * through the non real-time endpoint will be conveyed to the
++ * real-time socket endpoint, and made available to the @c recvfrom(2)
++ * or @c read(2) system calls.
++ */
++	IPCPROTO_XDDP = 1,
++/**
++ * Intra-domain datagram protocol (RT <-> RT).
++ *
++ * The RTDM-based IDDP protocol enables real-time threads to exchange
++ * datagrams within the Xenomai domain, via socket endpoints.
++ */
++	IPCPROTO_IDDP = 2,
++/**
++ * Buffer protocol (RT <-> RT, byte-oriented).
++ *
++ * The RTDM-based BUFP protocol implements a lightweight,
++ * byte-oriented, one-way Producer-Consumer data path. All messages
++ * written are buffered into a single memory area in strict FIFO
++ * order, until read by the consumer.
++ *
++ * This protocol always prevents short writes, and only allows short
++ * reads when a potential deadlock situation arises (i.e. readers and
++ * writers waiting for each other indefinitely).
++ */
++	IPCPROTO_BUFP = 3,
++	IPCPROTO_MAX
++};
++/** @} */
++
++/**
++ * Port number type for the RTIPC address family.
++ */
++typedef int16_t rtipc_port_t;
++
++/**
++ * Port label information structure.
++ */
++struct rtipc_port_label {
++	/** Port label string, null-terminated. */
++	char label[XNOBJECT_NAME_LEN];
++};
++
++/**
++ * Socket address structure for the RTIPC address family.
++ */
++struct sockaddr_ipc {
++	/** RTIPC address family, must be @c AF_RTIPC */
++	sa_family_t sipc_family;
++	/** Port number. */
++	rtipc_port_t sipc_port;
++};
++
++#define SOL_XDDP		311
++/**
++ * @anchor sockopts_xddp @name XDDP socket options
++ * Setting and getting XDDP socket options.
++ * @{ */
++/**
++ * XDDP label assignment
++ *
++ * ASCII label strings can be attached to XDDP ports, so that opening
++ * the non-RT endpoint can be done by specifying this symbolic device
++ * name rather than referring to a raw pseudo-device entry
++ * (i.e. /dev/rtp@em N).
++ *
++ * When available, this label will be registered when binding, in
++ * addition to the port number (see @ref xddp_label_binding
++ * "XDDP port binding").
++ *
++ * It is not allowed to assign a label after the socket was
++ * bound. However, multiple assignment calls are allowed prior to the
++ * binding; the last label set will be used.
++ *
++ * @param [in] level @ref sockopts_xddp "SOL_XDDP"
++ * @param [in] optname @b XDDP_LABEL
++ * @param [in] optval Pointer to struct rtipc_port_label
++ * @param [in] optlen sizeof(struct rtipc_port_label)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen invalid)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define XDDP_LABEL		1
++/**
++ * XDDP local pool size configuration
++ *
++ * By default, the memory needed to convey the data is pulled from
++ * Xenomai's system pool. Setting a local pool size overrides this
++ * default for the socket.
++ *
++ * If a non-zero size was configured, a local pool is allocated at
++ * binding time. This pool will provide storage for pending datagrams.
++ *
++ * It is not allowed to configure a local pool size after the socket
++ * was bound. However, multiple configuration calls are allowed prior
++ * to the binding; the last value set will be used.
++ *
++ * @note: the pool memory is obtained from the host allocator by the
++ * @ref bind__AF_RTIPC "bind call".
++ *
++ * @param [in] level @ref sockopts_xddp "SOL_XDDP"
++ * @param [in] optname @b XDDP_POOLSZ
++ * @param [in] optval Pointer to a variable of type size_t, containing
++ * the required size of the local pool to reserve at binding time
++ * @param [in] optlen sizeof(size_t)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen invalid or *@a optval is zero)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define XDDP_POOLSZ		2
++/**
++ * XDDP streaming buffer size configuration
++ *
++ * In addition to sending datagrams, real-time threads may stream data
++ * in a byte-oriented mode through the port as well. This increases
++ * the bandwidth and reduces the overhead, when the overall data to
++ * send to the Linux domain is collected by bits, and keeping the
++ * message boundaries is not required.
++ *
++ * This feature is enabled when a non-zero buffer size is set for the
++ * socket. In that case, the real-time data accumulates into the
++ * streaming buffer when MSG_MORE is passed to any of the @ref
++ * sendmsg__AF_RTIPC "send functions", until:
++ *
++ * - the receiver from the Linux domain wakes up and consumes it,
++ * - a different source port attempts to send data to the same
++ *   destination port,
++ * - MSG_MORE is absent from the send flags,
++ * - the buffer is full,
++ * .
++ * whichever comes first.
++ *
++ * Setting *@a optval to zero disables the streaming buffer, in which
++ * case all sendings are conveyed in separate datagrams, regardless of
++ * MSG_MORE.
++ *
++ * @note only a single streaming buffer exists per socket. When this
++ * buffer is full, the real-time data stops accumulating and sending
++ * operations resume in mere datagram mode. Accumulation may happen
++ * again after some or all data in the streaming buffer is consumed
++ * from the Linux domain endpoint.
++ *
++ * The streaming buffer size may be adjusted multiple times during the
++ * socket lifetime; the latest configuration change will take effect
++ * when the accumulation resumes after the previous buffer was
++ * flushed.
++ *
++ * @param [in] level @ref sockopts_xddp "SOL_XDDP"
++ * @param [in] optname @b XDDP_BUFSZ
++ * @param [in] optval Pointer to a variable of type size_t, containing
++ * the required size of the streaming buffer
++ * @param [in] optlen sizeof(size_t)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -ENOMEM (Not enough memory)
++ * - -EINVAL (@a optlen is invalid)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define XDDP_BUFSZ		3
++/**
++ * XDDP monitoring callback
++ *
++ * Other RTDM drivers may install a user-defined callback via the @ref
++ * rtdm_setsockopt call from the inter-driver API, in order to collect
++ * particular events occurring on the channel.
++ *
++ * This notification mechanism is particularly useful to monitor a
++ * channel asynchronously while performing other tasks.
++ *
++ * The user-provided routine will be passed the RTDM file descriptor
++ * of the socket receiving the event, the event code, and an optional
++ * argument.  Four events are currently defined, see @ref XDDP_EVENTS.
++ *
++ * The XDDP_EVTIN and XDDP_EVTOUT events are fired on behalf of a
++ * fully atomic context; therefore, care must be taken to keep their
++ * overhead low. In those cases, the Xenomai services that may be
++ * called from the callback are restricted to the set allowed to a
++ * real-time interrupt handler.
++ *
++ * @param [in] level @ref sockopts_xddp "SOL_XDDP"
++ * @param [in] optname @b XDDP_MONITOR
++ * @param [in] optval Pointer to a pointer to function of type int
++ *             (*)(int fd, int event, long arg), containing the address of the
++ *             user-defined callback.Passing a NULL callback pointer
++ *             in @a optval disables monitoring.
++ * @param [in] optlen sizeof(int (*)(int fd, int event, long arg))
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EPERM (Operation not allowed from user-space)
++ * - -EINVAL (@a optlen is invalid)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT, kernel space only
++ */
++#define XDDP_MONITOR		4
++/** @} */
++
++/**
++ * @anchor XDDP_EVENTS @name XDDP events
++ * Specific events occurring on XDDP channels, which can be monitored
++ * via the @ref XDDP_MONITOR socket option.
++ *
++ * @{ */
++/**
++ * @ref XDDP_MONITOR "Monitor" writes to the non real-time endpoint.
++ *
++ * XDDP_EVTIN is sent when data is written to the non real-time
++ * endpoint the socket is bound to (i.e. via /dev/rtp@em N), which
++ * means that some input is pending for the real-time endpoint. The
++ * argument is the size of the incoming message.
++ */
++#define XDDP_EVTIN		1
++/**
++ * @ref XDDP_MONITOR "Monitor" reads from the non real-time endpoint.
++ *
++ * XDDP_EVTOUT is sent when the non real-time endpoint successfully
++ * reads a complete message (i.e. via /dev/rtp@em N). The argument is
++ * the size of the outgoing message.
++ */
++#define XDDP_EVTOUT		2
++/**
++ * @ref XDDP_MONITOR "Monitor" close from the non real-time endpoint.
++ *
++ * XDDP_EVTDOWN is sent when the non real-time endpoint is closed. The
++ * argument is always 0.
++ */
++#define XDDP_EVTDOWN		3
++/**
++ * @ref XDDP_MONITOR "Monitor" memory shortage for non real-time
++ * datagrams.
++ *
++ * XDDP_EVTNOBUF is sent when no memory is available from the pool to
++ * hold the message currently sent from the non real-time
++ * endpoint. The argument is the size of the failed allocation. Upon
++ * return from the callback, the caller will block and retry until
++ * enough space is available from the pool; during that process, the
++ * callback might be invoked multiple times, each time a new attempt
++ * to get the required memory fails.
++ */
++#define XDDP_EVTNOBUF		4
++/** @} */
++
++#define SOL_IDDP		312
++/**
++ * @anchor sockopts_iddp @name IDDP socket options
++ * Setting and getting IDDP socket options.
++ * @{ */
++/**
++ * IDDP label assignment
++ *
++ * ASCII label strings can be attached to IDDP ports, in order to
++ * connect sockets to them in a more descriptive way than using plain
++ * numeric port values.
++ *
++ * When available, this label will be registered when binding, in
++ * addition to the port number (see @ref iddp_label_binding
++ * "IDDP port binding").
++ *
++ * It is not allowed to assign a label after the socket was
++ * bound. However, multiple assignment calls are allowed prior to the
++ * binding; the last label set will be used.
++ *
++ * @param [in] level @ref sockopts_iddp "SOL_IDDP"
++ * @param [in] optname @b IDDP_LABEL
++ * @param [in] optval Pointer to struct rtipc_port_label
++ * @param [in] optlen sizeof(struct rtipc_port_label)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen is invalid)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define IDDP_LABEL		1
++/**
++ * IDDP local pool size configuration
++ *
++ * By default, the memory needed to convey the data is pulled from
++ * Xenomai's system pool. Setting a local pool size overrides this
++ * default for the socket.
++ *
++ * If a non-zero size was configured, a local pool is allocated at
++ * binding time. This pool will provide storage for pending datagrams.
++ *
++ * It is not allowed to configure a local pool size after the socket
++ * was bound. However, multiple configuration calls are allowed prior
++ * to the binding; the last value set will be used.
++ *
++ * @note: the pool memory is obtained from the host allocator by the
++ * @ref bind__AF_RTIPC "bind call".
++ *
++ * @param [in] level @ref sockopts_iddp "SOL_IDDP"
++ * @param [in] optname @b IDDP_POOLSZ
++ * @param [in] optval Pointer to a variable of type size_t, containing
++ * the required size of the local pool to reserve at binding time
++ * @param [in] optlen sizeof(size_t)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen is invalid or *@a optval is zero)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define IDDP_POOLSZ		2
++/** @} */
++
++#define SOL_BUFP		313
++/**
++ * @anchor sockopts_bufp @name BUFP socket options
++ * Setting and getting BUFP socket options.
++ * @{ */
++/**
++ * BUFP label assignment
++ *
++ * ASCII label strings can be attached to BUFP ports, in order to
++ * connect sockets to them in a more descriptive way than using plain
++ * numeric port values.
++ *
++ * When available, this label will be registered when binding, in
++ * addition to the port number (see @ref bufp_label_binding
++ * "BUFP port binding").
++ *
++ * It is not allowed to assign a label after the socket was
++ * bound. However, multiple assignment calls are allowed prior to the
++ * binding; the last label set will be used.
++ *
++ * @param [in] level @ref sockopts_bufp "SOL_BUFP"
++ * @param [in] optname @b BUFP_LABEL
++ * @param [in] optval Pointer to struct rtipc_port_label
++ * @param [in] optlen sizeof(struct rtipc_port_label)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen is invalid)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define BUFP_LABEL		1
++/**
++ * BUFP buffer size configuration
++ *
++ * All messages written to a BUFP socket are buffered in a single
++ * per-socket memory area. Configuring the size of such buffer prior
++ * to binding the socket to a destination port is mandatory.
++ *
++ * It is not allowed to configure a buffer size after the socket was
++ * bound. However, multiple configuration calls are allowed prior to
++ * the binding; the last value set will be used.
++ *
++ * @note: the buffer memory is obtained from the host allocator by the
++ * @ref bind__AF_RTIPC "bind call".
++ *
++ * @param [in] level @ref sockopts_bufp "SOL_BUFP"
++ * @param [in] optname @b BUFP_BUFSZ
++ * @param [in] optval Pointer to a variable of type size_t, containing
++ * the required size of the buffer to reserve at binding time
++ * @param [in] optlen sizeof(size_t)
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EFAULT (Invalid data address given)
++ * - -EALREADY (socket already bound)
++ * - -EINVAL (@a optlen is invalid or *@a optval is zero)
++ * .
++ *
++ * @par Calling context:
++ * RT/non-RT
++ */
++#define BUFP_BUFSZ		2
++/** @} */
++
++/**
++ * @anchor sockopts_socket @name Socket level options
++ * Setting and getting supported standard socket level options.
++ * @{ */
++/**
++ *
++ * @ref IPCPROTO_IDDP and @ref IPCPROTO_BUFP protocols support the
++ * standard SO_SNDTIMEO socket option, from the @c SOL_SOCKET level.
++ *
++ * @see @c setsockopt(), @c getsockopt() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399/
++ */
++#ifdef DOXYGEN_CPP
++#define SO_SNDTIMEO defined_by_kernel_header_file
++#endif
++/**
++ *
++ * All RTIPC protocols support the standard SO_RCVTIMEO socket option,
++ * from the @c SOL_SOCKET level.
++ *
++ * @see @c setsockopt(), @c getsockopt() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399/
++ */
++#ifdef DOXYGEN_CPP
++#define SO_RCVTIMEO defined_by_kernel_header_file
++#endif
++/** @} */
++
++/**
++ * @anchor rtdm_ipc_examples @name RTIPC examples
++ * @{ */
++/** @example bufp-readwrite.c */
++/** @example bufp-label.c */
++/** @example iddp-label.c */
++/** @example iddp-sendrecv.c */
++/** @example xddp-echo.c */
++/** @example xddp-label.c */
++/** @example xddp-stream.c */
++/** @} */
++
++/** @} */
++
++#endif /* !_RTDM_UAPI_IPC_H */
+--- linux/include/xenomai/rtdm/uapi/rtdm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/uapi/rtdm.h	2021-04-07 16:01:28.436632439 +0800
+@@ -0,0 +1,203 @@
++/**
++ * @file
++ * Real-Time Driver Model for Xenomai, user API header.
++ *
++ * @note Copyright (C) 2005, 2006 Jan Kiszka <jan.kiszka@web.de>
++ * @note Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ * @ingroup rtdm_user_api
++ */
++#ifndef _RTDM_UAPI_RTDM_H
++#define _RTDM_UAPI_RTDM_H
++
++/*!
++ * @addtogroup rtdm
++ * @{
++ */
++
++/*!
++ * @anchor rtdm_api_versioning @name API Versioning
++ * @{ */
++/** Common user and driver API version */
++#define RTDM_API_VER			9
++
++/** Minimum API revision compatible with the current release */
++#define RTDM_API_MIN_COMPAT_VER		9
++/** @} API Versioning */
++
++/** RTDM type for representing absolute dates. Its base type is a 64 bit
++ *  unsigned integer. The unit is 1 nanosecond. */
++typedef uint64_t nanosecs_abs_t;
++
++/** RTDM type for representing relative intervals. Its base type is a 64 bit
++ *  signed integer. The unit is 1 nanosecond. Relative intervals can also
++ *  encode the special timeouts "infinite" and "non-blocking", see
++ *  @ref RTDM_TIMEOUT_xxx. */
++typedef int64_t nanosecs_rel_t;
++
++/*!
++ * @anchor RTDM_TIMEOUT_xxx @name RTDM_TIMEOUT_xxx
++ * Special timeout values
++ * @{ */
++/** Block forever. */
++#define RTDM_TIMEOUT_INFINITE		0
++
++/** Any negative timeout means non-blocking. */
++#define RTDM_TIMEOUT_NONE		(-1)
++/** @} RTDM_TIMEOUT_xxx */
++/** @} rtdm */
++
++/*!
++ * @addtogroup rtdm_profiles
++ * @{
++ */
++
++/*!
++ * @anchor RTDM_CLASS_xxx   @name RTDM_CLASS_xxx
++ * Device classes
++ * @{ */
++#define RTDM_CLASS_PARPORT		1
++#define RTDM_CLASS_SERIAL		2
++#define RTDM_CLASS_CAN			3
++#define RTDM_CLASS_NETWORK		4
++#define RTDM_CLASS_RTMAC		5
++#define RTDM_CLASS_TESTING		6
++#define RTDM_CLASS_RTIPC		7
++#define RTDM_CLASS_COBALT		8
++#define RTDM_CLASS_UDD			9
++#define RTDM_CLASS_MEMORY		10
++#define RTDM_CLASS_GPIO			11
++#define RTDM_CLASS_SPI			12
++#define RTDM_CLASS_PWM			13
++
++#define RTDM_CLASS_MISC			223
++#define RTDM_CLASS_EXPERIMENTAL		224
++#define RTDM_CLASS_MAX			255
++/** @} RTDM_CLASS_xxx */
++
++#define RTDM_SUBCLASS_GENERIC		(-1)
++
++#define RTIOC_TYPE_COMMON		0
++
++/*!
++ * @anchor device_naming    @name Device Naming
++ * Maximum length of device names (excluding the final null character)
++ * @{
++ */
++#define RTDM_MAX_DEVNAME_LEN		31
++/** @} Device Naming */
++
++/**
++ * Device information
++ */
++typedef struct rtdm_device_info {
++	/** Device flags, see @ref dev_flags "Device Flags" for details */
++	int device_flags;
++
++	/** Device class ID, see @ref RTDM_CLASS_xxx */
++	int device_class;
++
++	/** Device sub-class, either RTDM_SUBCLASS_GENERIC or a
++	 *  RTDM_SUBCLASS_xxx definition of the related @ref rtdm_profiles
++	 *  "Device Profile" */
++	int device_sub_class;
++
++	/** Supported device profile version */
++	int profile_version;
++} rtdm_device_info_t;
++
++/*!
++ * @anchor RTDM_PURGE_xxx_BUFFER    @name RTDM_PURGE_xxx_BUFFER
++ * Flags selecting buffers to be purged
++ * @{ */
++#define RTDM_PURGE_RX_BUFFER		0x0001
++#define RTDM_PURGE_TX_BUFFER		0x0002
++/** @} RTDM_PURGE_xxx_BUFFER*/
++
++/*!
++ * @anchor common_IOCTLs    @name Common IOCTLs
++ * The following IOCTLs are common to all device rtdm_profiles.
++ * @{
++ */
++
++/**
++ * Retrieve information about a device or socket.
++ * @param[out] arg Pointer to information buffer (struct rtdm_device_info)
++ */
++#define RTIOC_DEVICE_INFO \
++	_IOR(RTIOC_TYPE_COMMON, 0x00, struct rtdm_device_info)
++
++/**
++ * Purge internal device or socket buffers.
++ * @param[in] arg Purge mask, see @ref RTDM_PURGE_xxx_BUFFER
++ */
++#define RTIOC_PURGE		_IOW(RTIOC_TYPE_COMMON, 0x10, int)
++/** @} Common IOCTLs */
++/** @} rtdm */
++
++/* Internally used for mapping socket functions on IOCTLs */
++struct _rtdm_getsockopt_args {
++	int level;
++	int optname;
++	void *optval;
++	socklen_t *optlen;
++};
++
++struct _rtdm_setsockopt_args {
++	int level;
++	int optname;
++	const void *optval;
++	socklen_t optlen;
++};
++
++struct _rtdm_getsockaddr_args {
++	struct sockaddr *addr;
++	socklen_t *addrlen;
++};
++
++struct _rtdm_setsockaddr_args {
++	const struct sockaddr *addr;
++	socklen_t addrlen;
++};
++
++#define _RTIOC_GETSOCKOPT	_IOW(RTIOC_TYPE_COMMON, 0x20,		\
++				     struct _rtdm_getsockopt_args)
++#define _RTIOC_SETSOCKOPT	_IOW(RTIOC_TYPE_COMMON, 0x21,		\
++				     struct _rtdm_setsockopt_args)
++#define _RTIOC_BIND		_IOW(RTIOC_TYPE_COMMON, 0x22,		\
++				     struct _rtdm_setsockaddr_args)
++#define _RTIOC_CONNECT		_IOW(RTIOC_TYPE_COMMON, 0x23,		\
++				     struct _rtdm_setsockaddr_args)
++#define _RTIOC_LISTEN		_IOW(RTIOC_TYPE_COMMON, 0x24,		\
++				     int)
++#define _RTIOC_ACCEPT		_IOW(RTIOC_TYPE_COMMON, 0x25,		\
++				     struct _rtdm_getsockaddr_args)
++#define _RTIOC_GETSOCKNAME	_IOW(RTIOC_TYPE_COMMON, 0x26,		\
++				     struct _rtdm_getsockaddr_args)
++#define _RTIOC_GETPEERNAME	_IOW(RTIOC_TYPE_COMMON, 0x27,		\
++				     struct _rtdm_getsockaddr_args)
++#define _RTIOC_SHUTDOWN		_IOW(RTIOC_TYPE_COMMON, 0x28,		\
++				     int)
++
++/* Internally used for mmap() */
++struct _rtdm_mmap_request {
++	__u64 offset;
++	size_t length;
++	int prot;
++	int flags;
++};
++
++#endif /* !_RTDM_UAPI_RTDM_H */
+--- linux/include/xenomai/rtdm/udd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/udd.h	2021-04-07 16:01:28.319632606 +0800
+@@ -0,0 +1,340 @@
++/**
++ * @file
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_UDD_H
++#define _COBALT_RTDM_UDD_H
++
++#include <linux/list.h>
++#include <rtdm/driver.h>
++#include <rtdm/uapi/udd.h>
++
++/**
++ * @ingroup rtdm_profiles
++ * @defgroup rtdm_udd User-space driver core
++ *
++ * This profile includes all mini-drivers sitting on top of the
++ * User-space Device Driver framework (UDD). The generic UDD core
++ * driver enables interrupt control and I/O memory access interfaces
++ * to user-space device drivers, as defined by the mini-drivers when
++ * registering.
++ *
++ * A mini-driver supplements the UDD core with ancillary functions for
++ * dealing with @ref udd_memory_region "memory mappings" and @ref
++ * udd_irq_handler "interrupt control" for a particular I/O
++ * card/device.
++ *
++ * UDD-compliant mini-drivers only have to provide the basic support
++ * for dealing with the interrupt sources present in the device, so
++ * that most part of the device requests can be handled from a Xenomai
++ * application running in user-space. Typically, a mini-driver would
++ * handle the interrupt top-half, and the user-space application would
++ * handle the bottom-half.
++ *
++ * This profile is reminiscent of the UIO framework available with the
++ * Linux kernel, adapted to the dual kernel Cobalt environment.
++ *
++ * @{
++ */
++
++/**
++ * @anchor udd_irq_special
++ * Special IRQ values for udd_device.irq
++ *
++ * @{
++ */
++/**
++ * No IRQ managed. Passing this code implicitly disables all
++ * interrupt-related services, including control (disable/enable) and
++ * notification.
++ */
++#define UDD_IRQ_NONE     0
++/**
++ * IRQ directly managed from the mini-driver on top of the UDD
++ * core. The mini-driver is in charge of attaching the handler(s) to
++ * the IRQ(s) it manages, notifying the Cobalt threads waiting for IRQ
++ * events by calling the udd_notify_event() service.
++ */
++#define UDD_IRQ_CUSTOM   (-1)
++/** @} */
++
++/**
++ * @anchor udd_memory_types  @name Memory types for mapping
++ * Types of memory for mapping
++ *
++ * The UDD core implements a default ->mmap() handler which first
++ * attempts to hand over the request to the corresponding handler
++ * defined by the mini-driver. If not present, the UDD core
++ * establishes the mapping automatically, depending on the memory
++ * type defined for the region.
++ *
++ * @{
++ */
++/**
++ * No memory region. Use this type code to disable an entry in the
++ * array of memory mappings, i.e. udd_device.mem_regions[].
++ */
++#define UDD_MEM_NONE     0
++/**
++ * Physical I/O memory region. By default, the UDD core maps such
++ * memory to a virtual user range by calling the rtdm_mmap_iomem()
++ * service.
++ */
++#define UDD_MEM_PHYS     1
++/**
++ * Kernel logical memory region (e.g. kmalloc()). By default, the UDD
++ * core maps such memory to a virtual user range by calling the
++ * rtdm_mmap_kmem() service. */
++#define UDD_MEM_LOGICAL  2
++/**
++ * Virtual memory region with no direct physical mapping
++ * (e.g. vmalloc()). By default, the UDD core maps such memory to a
++ * virtual user range by calling the rtdm_mmap_vmem() service.
++ */
++#define UDD_MEM_VIRTUAL  3
++/** @} */
++
++#define UDD_NR_MAPS  5
++
++/**
++ * @anchor udd_memory_region
++ * UDD memory region descriptor.
++ *
++ * This descriptor defines the characteristics of a memory region
++ * declared to the UDD core by the mini-driver. All valid regions
++ * should be declared in the udd_device.mem_regions[] array,
++ * invalid/unassigned ones should bear the UDD_MEM_NONE type.
++ *
++ * The UDD core exposes each region via the mmap(2) interface to the
++ * application. To this end, a companion mapper device is created
++ * automatically when registering the mini-driver.
++ *
++ * The mapper device creates special files in the RTDM namespace for
++ * reaching the individual regions, which the application can open
++ * then map to its address space via the mmap(2) system call.
++ *
++ * For instance, declaring a region of physical memory at index #2 of
++ * the memory region array could be done as follows:
++ *
++ * @code
++ * static struct udd_device udd;
++ *
++ * static int foocard_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
++ * {
++ *      udd.device_name = "foocard";
++ *      ...
++ *      udd.mem_regions[2].name = "ADC";
++ *      udd.mem_regions[2].addr = pci_resource_start(dev, 1);
++ *      udd.mem_regions[2].len = pci_resource_len(dev, 1);
++ *      udd.mem_regions[2].type = UDD_MEM_PHYS;
++ *      ...
++ *      return udd_register_device(&udd);
++ * }
++ * @endcode
++ *
++ * This will make such region accessible via the mapper device using
++ * the following sequence of code (see note), via the default
++ * ->mmap() handler from the UDD core:
++ *
++ * @code
++ * int fd, fdm;
++ * void *p;
++ *
++ * fd = open("/dev/rtdm/foocard", O_RDWR);
++ * fdm = open("/dev/rtdm/foocard,mapper2", O_RDWR);
++ * p = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fdm, 0);
++ * @endcode
++ *
++ * if no valid region has been declared in the
++ * udd_device.mem_regions[] array, no mapper device is created.
++ *
++ * @note The example code assumes that @ref cobalt_api POSIX symbol
++ * wrapping is in effect, so that RTDM performs the memory mapping
++ * operation (not the regular kernel).
++ */
++struct udd_memregion {
++	/** Name of the region (informational but required) */
++	const char *name;
++	/**
++	 * Start address of the region. This may be a physical or
++	 * virtual address, depending on the @ref udd_memory_types
++	 * "memory type".
++	 */
++	unsigned long addr;
++	/**
++	 * Length (in bytes) of the region. This value must be
++	 * PAGE_SIZE aligned.
++	 */
++	size_t len;
++	/**
++	 * Type of the region. See the discussion about @ref
++	 * udd_memory_types "UDD memory types" for possible values.
++	 */
++	int type;
++};
++
++/**
++ * @anchor udd_device
++ * UDD device descriptor.
++ *
++ * This descriptor defines the characteristics of a UDD-based
++ * mini-driver when registering via a call to udd_register_device().
++ */
++struct udd_device {
++	/**
++	 * Name of the device managed by the mini-driver, appears
++	 * automatically in the /dev/rtdm namespace upon creation.
++	 */
++	const char *device_name;
++	/**
++	 * Additional device flags (e.g. RTDM_EXCLUSIVE)
++	 * RTDM_NAMED_DEVICE may be omitted).
++	 */
++	int device_flags;
++	/**
++	 * Subclass code of the device managed by the mini-driver (see
++	 * RTDM_SUBCLASS_xxx definition in the @ref rtdm_profiles
++	 * "Device Profiles"). The main class code is pre-set to
++	 * RTDM_CLASS_UDD.
++	 */
++	int device_subclass;
++	struct {
++		/**
++		 * Ancillary open() handler, optional. See
++		 * rtdm_open_handler().
++		 *
++		 * @note This handler is called from secondary mode
++		 * only.
++		 */
++		int (*open)(struct rtdm_fd *fd, int oflags);
++		/**
++		 * Ancillary close() handler, optional. See
++		 * rtdm_close_handler().
++		 *
++		 * @note This handler is called from secondary mode
++		 * only.
++		 */
++		void (*close)(struct rtdm_fd *fd);
++		/**
++		 * Ancillary ioctl() handler, optional. See
++		 * rtdm_ioctl_handler().
++		 *
++		 * If this routine returns -ENOSYS, the default action
++		 * implemented by the UDD core for the corresponding
++		 * request will be applied, as if no ioctl handler had
++		 * been defined.
++		 *
++		 * @note This handler is called from primary mode
++		 * only.
++		 */
++		int (*ioctl)(struct rtdm_fd *fd,
++			     unsigned int request, void *arg);
++		/**
++		 * Ancillary mmap() handler for the mapper device,
++		 * optional. See rtdm_mmap_handler(). The mapper
++		 * device operates on a valid region defined in the @a
++		 * mem_regions[] array. A pointer to the region 
++		 * can be obtained by a call to udd_get_region().
++		 *
++		 * If this handler is NULL, the UDD core establishes
++		 * the mapping automatically, depending on the memory
++		 * type defined for the region.
++		 *
++		 * @note This handler is called from secondary mode
++		 * only.
++		 */
++		int (*mmap)(struct rtdm_fd *fd,
++			    struct vm_area_struct *vma);
++		/**
++		 * @anchor udd_irq_handler
++		 *
++		 * Ancillary handler for receiving interrupts. This
++		 * handler must be provided if the mini-driver hands
++		 * over IRQ handling to the UDD core, by setting the
++		 * @a irq field to a valid value, different from
++		 * UDD_IRQ_CUSTOM and UDD_IRQ_NONE.
++		 *
++		 * The ->interrupt() handler shall return one of the
++		 * following status codes:
++		 *
++		 * - RTDM_IRQ_HANDLED, if the mini-driver successfully
++		 * handled the IRQ. This flag can be combined with
++		 * RTDM_IRQ_DISABLE to prevent the Cobalt kernel from
++		 * re-enabling the interrupt line upon return,
++		 * otherwise it is re-enabled automatically.
++		 *
++		 * - RTDM_IRQ_NONE, if the interrupt does not match
++		 * any IRQ the mini-driver can handle.
++		 *
++		 * Once the ->interrupt() handler has returned, the
++		 * UDD core notifies user-space Cobalt threads waiting
++		 * for IRQ events (if any).
++		 *
++		 * @note This handler is called from primary mode
++		 * only.
++		 */
++		int (*interrupt)(struct udd_device *udd);
++	} ops;
++	/**
++	 * IRQ number. If valid, the UDD core manages the
++	 * corresponding interrupt line, installing a base handler.
++	 * Otherwise, a special value can be passed for declaring
++	 * @ref udd_irq_special "unmanaged IRQs".
++	 */
++	int irq;
++	/**
++	 * Array of memory regions defined by the device. The array
++	 * can be sparse, with some entries bearing the UDD_MEM_NONE
++	 * type interleaved with valid ones.  See the discussion about
++	 * @ref udd_memory_region "UDD memory regions".
++	 */
++	struct udd_memregion mem_regions[UDD_NR_MAPS];
++	/** Reserved to the UDD core. */
++	struct udd_reserved {
++		rtdm_irq_t irqh;
++		u32 event_count;
++		struct udd_signotify signfy;
++		struct rtdm_event pulse;
++		struct rtdm_driver driver;
++		struct rtdm_device device;
++		struct rtdm_driver mapper_driver;
++		struct udd_mapper {
++			struct udd_device *udd;
++			struct rtdm_device dev;
++		} mapdev[UDD_NR_MAPS];
++		char *mapper_name;
++		int nr_maps;
++	} __reserved;
++};
++
++int udd_register_device(struct udd_device *udd);
++
++int udd_unregister_device(struct udd_device *udd);
++
++struct udd_device *udd_get_device(struct rtdm_fd *fd);
++
++void udd_notify_event(struct udd_device *udd);
++
++void udd_enable_irq(struct udd_device *udd,
++		    rtdm_event_t *done);
++
++void udd_disable_irq(struct udd_device *udd,
++		     rtdm_event_t *done);
++
++/** @} */
++
++#endif /* !_COBALT_RTDM_UDD_H */
+--- linux/include/xenomai/rtdm/serial.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/serial.h	2021-04-07 16:01:28.315632612 +0800
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_SERIAL_H
++#define _COBALT_RTDM_SERIAL_H
++
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/serial.h>
++
++#endif /* !_COBALT_RTDM_SERIAL_H */
+--- linux/include/xenomai/rtdm/analogy/device.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/device.h	2021-04-07 16:01:28.310632619 +0800
+@@ -0,0 +1,67 @@
++/*
++ * Analogy for Linux, device related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_DEVICE_H
++#define _COBALT_RTDM_ANALOGY_DEVICE_H
++
++#include <rtdm/analogy/rtdm_helpers.h>
++#include <rtdm/analogy/transfer.h>
++#include <rtdm/analogy/driver.h>
++
++#define A4L_NB_DEVICES 10
++
++#define A4L_DEV_ATTACHED_NR 0
++
++struct a4l_device {
++
++	/* Spinlock for global device use */
++	rtdm_lock_t lock;
++
++	/* Device specific flags */
++	unsigned long flags;
++
++	/* Driver assigned to this device thanks to attaching
++	   procedure */
++	struct a4l_driver *driver;
++
++	/* Hidden description stuff */
++	struct list_head subdvsq;
++
++	/* Context-dependent stuff */
++	struct a4l_transfer transfer;
++
++	/* Private data useful for drivers functioning */
++	void *priv;
++};
++
++/* --- Devices tab related functions --- */
++void a4l_init_devs(void);
++int a4l_check_cleanup_devs(void);
++int a4l_rdproc_devs(struct seq_file *p, void *data);
++
++/* --- Context related function / macro --- */
++void a4l_set_dev(struct a4l_device_context *cxt);
++#define a4l_get_dev(x) ((x)->dev)
++
++/* --- Upper layer functions --- */
++int a4l_ioctl_devcfg(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_devinfo(struct a4l_device_context * cxt, void *arg);
++
++#endif /* !_COBALT_RTDM_ANALOGY_DEVICE_H */
+--- linux/include/xenomai/rtdm/analogy/instruction.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/instruction.h	2021-04-07 16:01:28.305632626 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Analogy for Linux, instruction related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_INSTRUCTION_H
++#define _COBALT_RTDM_ANALOGY_INSTRUCTION_H
++
++struct a4l_kernel_instruction {
++	unsigned int type;
++	unsigned int idx_subd;
++	unsigned int chan_desc;
++	unsigned int data_size;
++	void *data;
++	void *__udata;
++};
++
++struct a4l_kernel_instruction_list {
++	unsigned int count;
++	struct a4l_kernel_instruction *insns;
++	a4l_insn_t *__uinsns;
++};
++
++/* Instruction related functions */
++
++/* Upper layer functions */
++int a4l_ioctl_insnlist(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_insn(struct a4l_device_context * cxt, void *arg);
++
++#endif /* !_COBALT_RTDM_ANALOGY_BUFFER_H */
+--- linux/include/xenomai/rtdm/analogy/channel_range.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/channel_range.h	2021-04-07 16:01:28.301632632 +0800
+@@ -0,0 +1,272 @@
++/**
++ * @file
++ * Analogy for Linux, channel, range related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H
++#define _COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H
++
++#include <rtdm/uapi/analogy.h>
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_channel_range Channels and ranges
++ *
++ * Channels
++ *
++ * According to the Analogy nomenclature, the channel is the elementary
++ * acquisition entity. One channel is supposed to acquire one data at
++ * a time. A channel can be:
++ * - an analog input or an analog ouput;
++ * - a digital input or a digital ouput;
++ *
++ * Channels are defined by their type and by some other
++ * characteristics like:
++ * - their resolutions for analog channels (which usually ranges from
++     8 to 32 bits);
++ * - their references;
++ *
++ * Such parameters must be declared for each channel composing a
++ * subdevice. The structure a4l_channel (struct a4l_channel) is used to
++ * define one channel.
++ *
++ * Another structure named a4l_channels_desc (struct a4l_channels_desc)
++ * gathers all channels for a specific subdevice. This latter
++ * structure also stores :
++ * - the channels count;
++ * - the channels declaration mode (A4L_CHAN_GLOBAL_CHANDESC or
++     A4L_CHAN_PERCHAN_CHANDESC): if all the channels composing a
++     subdevice are identical, there is no need to declare the
++     parameters for each channel; the global declaration mode eases
++     the structure composition.
++ *
++ * Usually the channels descriptor looks like this:
++ * <tt> @verbatim
++struct a4l_channels_desc example_chan = {
++	mode: A4L_CHAN_GLOBAL_CHANDESC, -> Global declaration
++					      mode is set
++	length: 8, -> 8 channels
++	chans: {
++		{A4L_CHAN_AREF_GROUND, 16}, -> Each channel is 16 bits
++						  wide with the ground as
++						  reference
++	},
++};
++@endverbatim </tt>
++ *
++ * Ranges
++ *
++ * So as to perform conversion from logical values acquired by the
++ * device to physical units, some range structure(s) must be declared
++ * on the driver side.
++ *
++ * Such structures contain:
++ * - the physical unit type (Volt, Ampere, none);
++ * - the minimal and maximal values;
++ *
++ * These range structures must be associated with the channels at
++ * subdevice registration time as a channel can work with many
++ * ranges. At configuration time (thanks to an Analogy command), one
++ * range will be selected for each enabled channel.
++ *
++ * Consequently, for each channel, the developer must declare all the
++ * possible ranges in a structure called struct a4l_rngtab. Here is an
++ * example:
++ * <tt> @verbatim
++struct a4l_rngtab example_tab = {
++    length: 2,
++    rngs: {
++	RANGE_V(-5,5),
++	RANGE_V(-10,10),
++    },
++};
++@endverbatim </tt>
++ *
++ * For each subdevice, a specific structure is designed to gather all
++ * the ranges tabs of all the channels. In this structure, called
++ * struct a4l_rngdesc, three fields must be filled:
++ * - the declaration mode (A4L_RNG_GLOBAL_RNGDESC or
++ *   A4L_RNG_PERCHAN_RNGDESC);
++ * - the number of ranges tab;
++ * - the tab of ranges tabs pointers;
++ *
++ * Most of the time, the channels which belong to the same subdevice
++ * use the same set of ranges. So, there is no need to declare the
++ * same ranges for each channel. A macro is defined to prevent
++ * redundant declarations: RNG_GLOBAL().
++ *
++ * Here is an example:
++ * <tt> @verbatim
++struct a4l_rngdesc example_rng = RNG_GLOBAL(example_tab);
++@endverbatim </tt>
++ *
++ * @{
++ */
++
++
++/* --- Channel section --- */
++
++/*!
++ * @anchor A4L_CHAN_AREF_xxx @name Channel reference
++ * @brief Flags to define the channel's reference
++ * @{
++ */
++
++/**
++ * Ground reference
++ */
++#define A4L_CHAN_AREF_GROUND 0x1
++/**
++ * Common reference
++ */
++#define A4L_CHAN_AREF_COMMON 0x2
++/**
++ * Differential reference
++ */
++#define A4L_CHAN_AREF_DIFF 0x4
++/**
++ * Misc reference
++ */
++#define A4L_CHAN_AREF_OTHER 0x8
++
++	  /*! @} A4L_CHAN_AREF_xxx */
++
++/**
++ * Internal use flag (must not be used by driver developer)
++ */
++#define A4L_CHAN_GLOBAL 0x10
++
++/*!
++ * @brief Structure describing some channel's characteristics
++ */
++
++struct a4l_channel {
++	unsigned long flags; /*!< Channel flags to define the reference. */
++	unsigned long nb_bits; /*!< Channel resolution. */
++};
++
++/*!
++ * @anchor A4L_CHAN_xxx @name Channels declaration mode
++ * @brief Constant to define whether the channels in a descriptor are
++ * identical
++ * @{
++ */
++
++/**
++ * Global declaration, the set contains channels with similar
++ * characteristics
++ */
++#define A4L_CHAN_GLOBAL_CHANDESC 0
++/**
++ * Per channel declaration, the decriptor gathers differents channels
++ */
++#define A4L_CHAN_PERCHAN_CHANDESC 1
++
++	  /*! @} A4L_CHAN_xxx */
++
++/*!
++ * @brief Structure describing a channels set
++ */
++
++struct a4l_channels_desc {
++	unsigned long mode; /*!< Declaration mode (global or per channel) */
++	unsigned long length; /*!< Channels count */
++	struct a4l_channel chans[]; /*!< Channels tab */
++};
++
++/**
++ * Internal use flag (must not be used by driver developer)
++ */
++#define A4L_RNG_GLOBAL 0x8
++
++/*!
++ * @brief Structure describing a (unique) range
++ */
++
++struct a4l_range {
++	long min; /*!< Minimal value */
++	long max; /*!< Maximal falue */
++	unsigned long flags; /*!< Range flags (unit, etc.) */
++};
++
++/**
++ * Macro to declare a (unique) range with no unit defined
++ */
++#define RANGE(x,y) {(x * A4L_RNG_FACTOR), (y * A4L_RNG_FACTOR),	\
++			A4L_RNG_NO_UNIT}
++/**
++ * Macro to declare a (unique) range in Volt
++ */
++#define RANGE_V(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \
++			A4L_RNG_VOLT_UNIT}
++/**
++ * Macro to declare a (unique) range in milliAmpere
++ */
++#define RANGE_mA(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \
++			A4L_RNG_MAMP_UNIT}
++/**
++ * Macro to declare a (unique) range in some external reference
++ */
++#define RANGE_ext(x,y) {(x * A4L_RNG_FACTOR),(y * A4L_RNG_FACTOR), \
++			A4L_RNG_EXT_UNIT}
++
++
++/* Ranges tab descriptor */
++struct a4l_rngtab {
++	unsigned char length;
++	struct a4l_range rngs[];
++};
++
++/**
++ * Constant to define a ranges descriptor as global (inter-channel)
++ */
++#define A4L_RNG_GLOBAL_RNGDESC 0
++/**
++ * Constant to define a ranges descriptor as specific for a channel
++ */
++#define A4L_RNG_PERCHAN_RNGDESC 1
++
++/* Global ranges descriptor */
++struct a4l_rngdesc {
++	unsigned char mode;
++	unsigned char length;
++	struct a4l_rngtab *rngtabs[];
++};
++
++/**
++ * Macro to declare a ranges global descriptor in one line
++ */
++#define RNG_GLOBAL(x) {			\
++	.mode = A4L_RNG_GLOBAL_RNGDESC,	\
++	.length =  1,			\
++	.rngtabs = {&(x)},		\
++}
++
++extern struct a4l_rngdesc a4l_range_bipolar10;
++extern struct a4l_rngdesc a4l_range_bipolar5;
++extern struct a4l_rngdesc a4l_range_unipolar10;
++extern struct a4l_rngdesc a4l_range_unipolar5;
++extern struct a4l_rngdesc a4l_range_unknown;
++extern struct a4l_rngdesc a4l_range_fake;
++
++#define range_digital a4l_range_unipolar5
++
++/*! @} channelrange */
++
++#endif /* !_COBALT_RTDM_ANALOGY_CHANNEL_RANGE_H */
+--- linux/include/xenomai/rtdm/analogy/buffer.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/buffer.h	2021-04-07 16:01:28.296632639 +0800
+@@ -0,0 +1,461 @@
++/*
++ * Analogy for Linux, buffer related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_BUFFER_H
++#define _COBALT_RTDM_ANALOGY_BUFFER_H
++
++#include <linux/version.h>
++#include <linux/mm.h>
++#include <rtdm/driver.h>
++#include <rtdm/uapi/analogy.h>
++#include <rtdm/analogy/rtdm_helpers.h>
++#include <rtdm/analogy/context.h>
++#include <rtdm/analogy/command.h>
++#include <rtdm/analogy/subdevice.h>
++
++/* --- Events bits / flags --- */
++
++#define A4L_BUF_EOBUF_NR 0
++#define A4L_BUF_EOBUF (1 << A4L_BUF_EOBUF_NR)
++
++#define A4L_BUF_ERROR_NR 1
++#define A4L_BUF_ERROR (1 << A4L_BUF_ERROR_NR)
++
++#define A4L_BUF_EOA_NR 2
++#define A4L_BUF_EOA (1 << A4L_BUF_EOA_NR)
++
++/* --- Status bits / flags --- */
++
++#define A4L_BUF_BULK_NR 8
++#define A4L_BUF_BULK (1 << A4L_BUF_BULK_NR)
++
++#define A4L_BUF_MAP_NR 9
++#define A4L_BUF_MAP (1 << A4L_BUF_MAP_NR)
++
++
++/* Buffer descriptor structure */
++struct a4l_buffer {
++
++	/* Added by the structure update */
++	struct a4l_subdevice *subd;
++
++	/* Buffer's first virtual page pointer */
++	void *buf;
++
++	/* Buffer's global size */
++	unsigned long size;
++	/* Tab containing buffer's pages pointers */
++	unsigned long *pg_list;
++
++	/* RT/NRT synchronization element */
++	struct a4l_sync sync;
++
++	/* Counters needed for transfer */
++	unsigned long end_count;
++	unsigned long prd_count;
++	unsigned long cns_count;
++	unsigned long tmp_count;
++
++	/* Status + events occuring during transfer */
++	unsigned long flags;
++
++	/* Command on progress */
++	struct a4l_cmd_desc *cur_cmd;
++
++	/* Munge counter */
++	unsigned long mng_count;
++
++	/* Theshold below which the user process should not be
++	   awakened */
++	unsigned long wake_count;
++};
++
++static inline void __dump_buffer_counters(struct a4l_buffer *buf)
++{
++	__a4l_dbg(1, core_dbg, "a4l_buffer=0x%p, p=0x%p \n", buf, buf->buf);
++	__a4l_dbg(1, core_dbg, "end=%06ld, prd=%06ld, cns=%06ld, tmp=%06ld \n",
++		buf->end_count, buf->prd_count, buf->cns_count, buf->tmp_count);
++}
++
++/* --- Static inline functions related with
++   user<->kernel data transfers --- */
++
++/* The function __produce is an inline function which copies data into
++   the asynchronous buffer and takes care of the non-contiguous issue
++   when looping. This function is used in read and write operations */
++static inline int __produce(struct a4l_device_context *cxt,
++			    struct a4l_buffer *buf, void *pin, unsigned long count)
++{
++	unsigned long start_ptr = (buf->prd_count % buf->size);
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	unsigned long tmp_cnt = count;
++	int ret = 0;
++
++	while (ret == 0 && tmp_cnt != 0) {
++		/* Check the data copy can be performed contiguously */
++		unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ?
++			buf->size - start_ptr : tmp_cnt;
++
++		/* Perform the copy */
++		if (cxt == NULL)
++			memcpy(buf->buf + start_ptr, pin, blk_size);
++		else
++			ret = rtdm_safe_copy_from_user(fd,
++						       buf->buf + start_ptr,
++						       pin, blk_size);
++
++		/* Update pointers/counts */
++		pin += blk_size;
++		tmp_cnt -= blk_size;
++		start_ptr = 0;
++	}
++
++	return ret;
++}
++
++/* The function __consume is an inline function which copies data from
++   the asynchronous buffer and takes care of the non-contiguous issue
++   when looping. This function is used in read and write operations */
++static inline int __consume(struct a4l_device_context *cxt,
++			    struct a4l_buffer *buf, void *pout, unsigned long count)
++{
++	unsigned long start_ptr = (buf->cns_count % buf->size);
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	unsigned long tmp_cnt = count;
++	int ret = 0;
++
++	while (ret == 0 && tmp_cnt != 0) {
++		/* Check the data copy can be performed contiguously */
++		unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ?
++			buf->size - start_ptr : tmp_cnt;
++
++		/* Perform the copy */
++		if (cxt == NULL)
++			memcpy(pout, buf->buf + start_ptr, blk_size);
++		else
++			ret = rtdm_safe_copy_to_user(fd,
++						     pout,
++						     buf->buf + start_ptr,
++						     blk_size);
++
++		/* Update pointers/counts */
++		pout += blk_size;
++		tmp_cnt -= blk_size;
++		start_ptr = 0;
++	}
++
++	return ret;
++}
++
++/* The function __munge is an inline function which calls the
++   subdevice specific munge callback on contiguous windows within the
++   whole buffer. This function is used in read and write operations */
++static inline void __munge(struct a4l_subdevice * subd,
++			   void (*munge) (struct a4l_subdevice *,
++					  void *, unsigned long),
++			   struct a4l_buffer * buf, unsigned long count)
++{
++	unsigned long start_ptr = (buf->mng_count % buf->size);
++	unsigned long tmp_cnt = count;
++
++	while (tmp_cnt != 0) {
++		/* Check the data copy can be performed contiguously */
++		unsigned long blk_size = (start_ptr + tmp_cnt > buf->size) ?
++			buf->size - start_ptr : tmp_cnt;
++
++		/* Perform the munge operation */
++		munge(subd, buf->buf + start_ptr, blk_size);
++
++		/* Update the start pointer and the count */
++		tmp_cnt -= blk_size;
++		start_ptr = 0;
++	}
++}
++
++/* The function __handle_event can only be called from process context
++   (not interrupt service routine). It allows the client process to
++   retrieve the buffer status which has been updated by the driver */
++static inline int __handle_event(struct a4l_buffer * buf)
++{
++	int ret = 0;
++
++	/* The event "End of acquisition" must not be cleaned
++	   before the complete flush of the buffer */
++	if (test_bit(A4L_BUF_EOA_NR, &buf->flags))
++		ret = -ENOENT;
++
++	if (test_bit(A4L_BUF_ERROR_NR, &buf->flags))
++		ret = -EPIPE;
++
++	return ret;
++}
++
++/* --- Counters management functions --- */
++
++/* Here, we may wonder why we need more than two counters / pointers.
++
++   Theoretically, we only need two counters (or two pointers):
++   - one which tells where the reader should be within the buffer
++   - one which tells where the writer should be within the buffer
++
++   With these two counters (or pointers), we just have to check that
++   the writer does not overtake the reader inside the ring buffer
++   BEFORE any read / write operations.
++
++   However, if one element is a DMA controller, we have to be more
++   careful. Generally a DMA transfer occurs like this:
++   DMA shot
++      |-> then DMA interrupt
++	 |-> then DMA soft handler which checks the counter
++
++   So, the checkings occur AFTER the write operations.
++
++   Let's take an example: the reader is a software task and the writer
++   is a DMA controller. At the end of the DMA shot, the write counter
++   is higher than the read counter. Unfortunately, a read operation
++   occurs between the DMA shot and the DMA interrupt, so the handler
++   will not notice that an overflow occured.
++
++   That is why tmp_count comes into play: tmp_count records the
++   read/consumer current counter before the next DMA shot and once the
++   next DMA shot is done, we check that the updated writer/producer
++   counter is not higher than tmp_count. Thus we are sure that the DMA
++   writer has not overtaken the reader because it was not able to
++   overtake the n-1 value. */
++
++static inline int __pre_abs_put(struct a4l_buffer * buf, unsigned long count)
++{
++	if (count - buf->tmp_count > buf->size) {
++		set_bit(A4L_BUF_ERROR_NR, &buf->flags);
++		return -EPIPE;
++	}
++
++	buf->tmp_count = buf->cns_count;
++
++	return 0;
++}
++
++static inline int __pre_put(struct a4l_buffer * buf, unsigned long count)
++{
++	return __pre_abs_put(buf, buf->tmp_count + count);
++}
++
++static inline int __pre_abs_get(struct a4l_buffer * buf, unsigned long count)
++{
++	/* The first time, we expect the buffer to be properly filled
++	before the trigger occurence; by the way, we need tmp_count to
++	have been initialized and tmp_count is updated right here */
++	if (buf->tmp_count == 0 || buf->cns_count == 0)
++		goto out;
++
++	/* At the end of the acquisition, the user application has
++	written the defined amount of data into the buffer; so the
++	last time, the DMA channel can easily overtake the tmp
++	frontier because no more data were sent from user space;
++	therefore no useless alarm should be sent */
++	if (buf->end_count != 0 && (long)(count - buf->end_count) > 0)
++		goto out;
++
++	/* Once the exception are passed, we check that the DMA
++	transfer has not overtaken the last record of the production
++	count (tmp_count was updated with prd_count the last time
++	__pre_abs_get was called). We must understand that we cannot
++	compare the current DMA count with the current production
++	count because even if, right now, the production count is
++	higher than the DMA count, it does not mean that the DMA count
++	was not greater a few cycles before; in such case, the DMA
++	channel would have retrieved the wrong data */
++	if ((long)(count - buf->tmp_count) > 0) {
++		set_bit(A4L_BUF_ERROR_NR, &buf->flags);
++		return -EPIPE;
++	}
++
++out:
++	buf->tmp_count = buf->prd_count;
++
++	return 0;
++}
++
++static inline int __pre_get(struct a4l_buffer * buf, unsigned long count)
++{
++	return __pre_abs_get(buf, buf->tmp_count + count);
++}
++
++static inline int __abs_put(struct a4l_buffer * buf, unsigned long count)
++{
++	unsigned long old = buf->prd_count;
++
++	if ((long)(buf->prd_count - count) >= 0)
++		return -EINVAL;
++
++	buf->prd_count = count;
++
++	if ((old / buf->size) != (count / buf->size))
++		set_bit(A4L_BUF_EOBUF_NR, &buf->flags);
++
++	if (buf->end_count != 0 && (long)(count - buf->end_count) >= 0)
++		set_bit(A4L_BUF_EOA_NR, &buf->flags);
++
++	return 0;
++}
++
++static inline int __put(struct a4l_buffer * buf, unsigned long count)
++{
++	return __abs_put(buf, buf->prd_count + count);
++}
++
++static inline int __abs_get(struct a4l_buffer * buf, unsigned long count)
++{
++	unsigned long old = buf->cns_count;
++
++	if ((long)(buf->cns_count - count) >= 0)
++		return -EINVAL;
++
++	buf->cns_count = count;
++
++	if ((old / buf->size) != count / buf->size)
++		set_bit(A4L_BUF_EOBUF_NR, &buf->flags);
++
++	if (buf->end_count != 0 && (long)(count - buf->end_count) >= 0)
++		set_bit(A4L_BUF_EOA_NR, &buf->flags);
++
++	return 0;
++}
++
++static inline int __get(struct a4l_buffer * buf, unsigned long count)
++{
++	return __abs_get(buf, buf->cns_count + count);
++}
++
++static inline unsigned long __count_to_put(struct a4l_buffer * buf)
++{
++	unsigned long ret;
++
++	if ((long) (buf->size + buf->cns_count - buf->prd_count) > 0)
++		ret = buf->size + buf->cns_count - buf->prd_count;
++	else
++		ret = 0;
++
++	return ret;
++}
++
++static inline unsigned long __count_to_get(struct a4l_buffer * buf)
++{
++	unsigned long ret;
++
++	/* If the acquisition is unlimited (end_count == 0), we must
++	   not take into account end_count */
++	if (buf->end_count == 0 || (long)(buf->end_count - buf->prd_count) > 0)
++		ret = buf->prd_count;
++	else
++		ret = buf->end_count;
++
++	if ((long)(ret - buf->cns_count) > 0)
++		ret -= buf->cns_count;
++	else
++		ret = 0;
++
++	return ret;
++}
++
++static inline unsigned long __count_to_end(struct a4l_buffer * buf)
++{
++	unsigned long ret = buf->end_count - buf->cns_count;
++
++	if (buf->end_count == 0)
++		return ULONG_MAX;
++
++	return ((long)ret) < 0 ? 0 : ret;
++}
++
++/* --- Buffer internal functions --- */
++
++int a4l_alloc_buffer(struct a4l_buffer *buf_desc, int buf_size);
++
++void a4l_free_buffer(struct a4l_buffer *buf_desc);
++
++void a4l_init_buffer(struct a4l_buffer * buf_desc);
++
++void a4l_cleanup_buffer(struct a4l_buffer * buf_desc);
++
++int a4l_setup_buffer(struct a4l_device_context *cxt, struct a4l_cmd_desc *cmd);
++
++void a4l_cancel_buffer(struct a4l_device_context *cxt);
++
++int a4l_buf_prepare_absput(struct a4l_subdevice *subd,
++			   unsigned long count);
++
++int a4l_buf_commit_absput(struct a4l_subdevice *subd,
++			  unsigned long count);
++
++int a4l_buf_prepare_put(struct a4l_subdevice *subd,
++			unsigned long count);
++
++int a4l_buf_commit_put(struct a4l_subdevice *subd,
++		       unsigned long count);
++
++int a4l_buf_put(struct a4l_subdevice *subd,
++		void *bufdata, unsigned long count);
++
++int a4l_buf_prepare_absget(struct a4l_subdevice *subd,
++			   unsigned long count);
++
++int a4l_buf_commit_absget(struct a4l_subdevice *subd,
++			  unsigned long count);
++
++int a4l_buf_prepare_get(struct a4l_subdevice *subd,
++			unsigned long count);
++
++int a4l_buf_commit_get(struct a4l_subdevice *subd,
++		       unsigned long count);
++
++int a4l_buf_get(struct a4l_subdevice *subd,
++		void *bufdata, unsigned long count);
++
++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts);
++
++unsigned long a4l_buf_count(struct a4l_subdevice *subd);
++
++/* --- Current Command management function --- */
++
++static inline struct a4l_cmd_desc *a4l_get_cmd(struct a4l_subdevice *subd)
++{
++	return (subd->buf) ? subd->buf->cur_cmd : NULL;
++}
++
++/* --- Munge related function --- */
++
++int a4l_get_chan(struct a4l_subdevice *subd);
++
++/* --- IOCTL / FOPS functions --- */
++
++int a4l_ioctl_mmap(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_bufcfg(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_bufcfg2(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_bufinfo(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_bufinfo2(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_poll(struct a4l_device_context * cxt, void *arg);
++ssize_t a4l_read_buffer(struct a4l_device_context * cxt, void *bufdata, size_t nbytes);
++ssize_t a4l_write_buffer(struct a4l_device_context * cxt, const void *bufdata, size_t nbytes);
++int a4l_select(struct a4l_device_context *cxt,
++	       rtdm_selector_t *selector,
++	       enum rtdm_selecttype type, unsigned fd_index);
++
++#endif /* !_COBALT_RTDM_ANALOGY_BUFFER_H */
+--- linux/include/xenomai/rtdm/analogy/context.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/context.h	2021-04-07 16:01:28.292632644 +0800
+@@ -0,0 +1,48 @@
++/*
++ * Analogy for Linux, context structure / macros declarations
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_CONTEXT_H
++#define _COBALT_RTDM_ANALOGY_CONTEXT_H
++
++#include <rtdm/driver.h>
++
++struct a4l_device;
++struct a4l_buffer;
++
++struct a4l_device_context {
++	/* The adequate device pointer
++	   (retrieved thanks to minor at open time) */
++	struct a4l_device *dev;
++
++	/* The buffer structure contains everything to transfer data
++	   from asynchronous acquisition operations on a specific
++	   subdevice */
++	struct a4l_buffer *buffer;
++};
++
++static inline int a4l_get_minor(struct a4l_device_context *cxt)
++{
++	/* Get a pointer on the container structure */
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	/* Get the minor index */
++	return rtdm_fd_minor(fd);
++}
++
++#endif /* !_COBALT_RTDM_ANALOGY_CONTEXT_H */
+--- linux/include/xenomai/rtdm/analogy/driver.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/driver.h	2021-04-07 16:01:28.287632652 +0800
+@@ -0,0 +1,74 @@
++/**
++ * @file
++ * Analogy for Linux, driver facilities
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_DRIVER_H
++#define _COBALT_RTDM_ANALOGY_DRIVER_H
++
++#include <linux/list.h>
++#include <rtdm/analogy/rtdm_helpers.h>
++#include <rtdm/analogy/context.h>
++#include <rtdm/analogy/buffer.h>
++
++struct seq_file;
++struct a4l_link_desc;
++struct a4l_device;
++
++/** Structure containing driver declaration data.
++ *
++ *  @see rt_task_inquire()
++ */
++/* Analogy driver descriptor */
++struct a4l_driver {
++
++	/* List stuff */
++	struct list_head list;
++			   /**< List stuff */
++
++	/* Visible description stuff */
++	struct module *owner;
++	               /**< Pointer to module containing the code */
++	unsigned int flags;
++	               /**< Type / status driver's flags */
++	char *board_name;
++		       /**< Board name */
++	char *driver_name;
++	               /**< driver name */
++	int privdata_size;
++		       /**< Size of the driver's private data */
++
++	/* Init/destroy procedures */
++	int (*attach) (struct a4l_device *, struct a4l_link_desc *);
++								      /**< Attach procedure */
++	int (*detach) (struct a4l_device *);
++				   /**< Detach procedure */
++
++};
++
++/* Driver list related functions */
++
++int a4l_register_drv(struct a4l_driver * drv);
++int a4l_unregister_drv(struct a4l_driver * drv);
++int a4l_lct_drv(char *pin, struct a4l_driver ** pio);
++#ifdef CONFIG_PROC_FS
++int a4l_rdproc_drvs(struct seq_file *p, void *data);
++#endif /* CONFIG_PROC_FS */
++
++#endif /* !_COBALT_RTDM_ANALOGY_DRIVER_H */
+--- linux/include/xenomai/rtdm/analogy/command.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/command.h	2021-04-07 16:01:28.282632659 +0800
+@@ -0,0 +1,35 @@
++/**
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_COMMAND_H
++#define _COBALT_RTDM_ANALOGY_COMMAND_H
++
++#include <rtdm/uapi/analogy.h>
++#include <rtdm/analogy/context.h>
++
++#define CR_CHAN(a) CHAN(a)
++#define CR_RNG(a) (((a)>>16)&0xff)
++#define CR_AREF(a) (((a)>>24)&0xf)
++
++/* --- Command related function --- */
++void a4l_free_cmddesc(struct a4l_cmd_desc * desc);
++
++/* --- Upper layer functions --- */
++int a4l_check_cmddesc(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc);
++int a4l_ioctl_cmd(struct a4l_device_context * cxt, void *arg);
++
++#endif /* !_COBALT_RTDM_ANALOGY_COMMAND_H */
+--- linux/include/xenomai/rtdm/analogy/subdevice.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/subdevice.h	2021-04-07 16:01:28.278632664 +0800
+@@ -0,0 +1,118 @@
++/**
++ * @file
++ * Analogy for Linux, subdevice related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_SUBDEVICE_H
++#define _COBALT_RTDM_ANALOGY_SUBDEVICE_H
++
++#include <linux/list.h>
++#include <rtdm/analogy/instruction.h>
++#include <rtdm/analogy/command.h>
++#include <rtdm/analogy/channel_range.h>
++
++/* --- Subdevice descriptor structure --- */
++
++struct a4l_device;
++struct a4l_buffer;
++
++/*!
++ * @brief Structure describing the subdevice
++ * @see a4l_add_subd()
++ */
++
++struct a4l_subdevice {
++
++	struct list_head list;
++			   /**< List stuff */
++
++	struct a4l_device *dev;
++			       /**< Containing device */
++
++	unsigned int idx;
++		      /**< Subdevice index */
++
++	struct a4l_buffer *buf;
++			       /**< Linked buffer */
++
++	/* Subdevice's status (busy, linked?) */
++	unsigned long status;
++			     /**< Subdevice's status */
++
++	/* Descriptors stuff */
++	unsigned long flags;
++			 /**< Type flags */
++	struct a4l_channels_desc *chan_desc;
++				/**< Tab of channels descriptors pointers */
++	struct a4l_rngdesc *rng_desc;
++				/**< Tab of ranges descriptors pointers */
++	struct a4l_cmd_desc *cmd_mask;
++			    /**< Command capabilities mask */
++
++	/* Functions stuff */
++	int (*insn_read) (struct a4l_subdevice *, struct a4l_kernel_instruction *);
++							/**< Callback for the instruction "read" */
++	int (*insn_write) (struct a4l_subdevice *, struct a4l_kernel_instruction *);
++							 /**< Callback for the instruction "write" */
++	int (*insn_bits) (struct a4l_subdevice *, struct a4l_kernel_instruction *);
++							/**< Callback for the instruction "bits" */
++	int (*insn_config) (struct a4l_subdevice *, struct a4l_kernel_instruction *);
++							  /**< Callback for the configuration instruction */
++	int (*do_cmd) (struct a4l_subdevice *, struct a4l_cmd_desc *);
++					/**< Callback for command handling */
++	int (*do_cmdtest) (struct a4l_subdevice *, struct a4l_cmd_desc *);
++						       /**< Callback for command checking */
++	void (*cancel) (struct a4l_subdevice *);
++					 /**< Callback for asynchronous transfer cancellation */
++	void (*munge) (struct a4l_subdevice *, void *, unsigned long);
++								/**< Callback for munge operation */
++	int (*trigger) (struct a4l_subdevice *, lsampl_t);
++					      /**< Callback for trigger operation */
++
++	char priv[0];
++		  /**< Private data */
++};
++
++/* --- Subdevice related functions and macros --- */
++
++struct a4l_channel *a4l_get_chfeat(struct a4l_subdevice * sb, int idx);
++struct a4l_range *a4l_get_rngfeat(struct a4l_subdevice * sb, int chidx, int rngidx);
++int a4l_check_chanlist(struct a4l_subdevice * subd,
++		       unsigned char nb_chan, unsigned int *chans);
++
++#define a4l_subd_is_input(x) ((A4L_SUBD_MASK_READ & (x)->flags) != 0)
++/* The following macro considers that a DIO subdevice is firstly an
++   output subdevice */
++#define a4l_subd_is_output(x) \
++	((A4L_SUBD_MASK_WRITE & (x)->flags) != 0 || \
++	 (A4L_SUBD_DIO & (x)->flags) != 0)
++
++/* --- Upper layer functions --- */
++
++struct a4l_subdevice * a4l_get_subd(struct a4l_device *dev, int idx);
++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv,
++			    void (*setup)(struct a4l_subdevice *));
++int a4l_add_subd(struct a4l_device *dev, struct a4l_subdevice * subd);
++int a4l_ioctl_subdinfo(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_chaninfo(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_rnginfo(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_nbchaninfo(struct a4l_device_context * cxt, void *arg);
++int a4l_ioctl_nbrnginfo(struct a4l_device_context * cxt, void *arg);
++
++#endif /* !_COBALT_RTDM_ANALOGY_SUBDEVICE_H */
+--- linux/include/xenomai/rtdm/analogy/transfer.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/transfer.h	2021-04-07 16:01:28.273632672 +0800
+@@ -0,0 +1,78 @@
++/*
++ * Analogy for Linux, transfer related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_TRANSFER_H
++#define _COBALT_RTDM_ANALOGY_TRANSFER_H
++
++#include <rtdm/analogy/buffer.h>
++
++/* IRQ types */
++#define A4L_IRQ_DISABLED 0
++
++/* Fields init values */
++#define A4L_IRQ_UNUSED (unsigned int)((unsigned short)(~0))
++#define A4L_IDX_UNUSED (unsigned int)(~0)
++
++/* TODO: IRQ handling must leave transfer for os_facilities */
++
++struct a4l_device;
++/* Analogy transfer descriptor */
++struct a4l_transfer {
++
++	/* Subdevices desc */
++	unsigned int nb_subd;
++	struct a4l_subdevice **subds;
++
++	/* Buffer stuff: the default size */
++	unsigned int default_bufsize;
++
++	/* IRQ in use */
++	/* TODO: irq_desc should vanish */
++	struct a4l_irq_descriptor irq_desc;
++};
++
++/* --- Proc function --- */
++
++int a4l_rdproc_transfer(struct seq_file *p, void *data);
++
++/* --- Upper layer functions --- */
++
++void a4l_presetup_transfer(struct a4l_device_context * cxt);
++int a4l_setup_transfer(struct a4l_device_context * cxt);
++int a4l_precleanup_transfer(struct a4l_device_context * cxt);
++int a4l_cleanup_transfer(struct a4l_device_context * cxt);
++int a4l_reserve_transfer(struct a4l_device_context * cxt, int idx_subd);
++int a4l_init_transfer(struct a4l_device_context * cxt, struct a4l_cmd_desc * cmd);
++int a4l_cancel_transfer(struct a4l_device_context * cxt, int idx_subd);
++int a4l_cancel_transfers(struct a4l_device_context * cxt);
++
++ssize_t a4l_put(struct a4l_device_context * cxt, void *buf, size_t nbytes);
++ssize_t a4l_get(struct a4l_device_context * cxt, void *buf, size_t nbytes);
++
++int a4l_request_irq(struct a4l_device *dev,
++		    unsigned int irq,
++		    a4l_irq_hdlr_t handler,
++		    unsigned long flags, void *cookie);
++int a4l_free_irq(struct a4l_device *dev, unsigned int irq);
++unsigned int a4l_get_irq(struct a4l_device *dev);
++
++int a4l_ioctl_cancel(struct a4l_device_context * cxt, void *arg);
++
++#endif /* !_COBALT_RTDM_ANALOGY_TRANSFER_H */
+--- linux/include/xenomai/rtdm/analogy/rtdm_helpers.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/analogy/rtdm_helpers.h	2021-04-07 16:01:28.268632679 +0800
+@@ -0,0 +1,143 @@
++/*
++ * Analogy for Linux, Operation system facilities
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_ANALOGY_RTDM_HELPERS_H
++#define _COBALT_RTDM_ANALOGY_RTDM_HELPERS_H
++
++#include <linux/fs.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/time.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/uaccess.h>
++#include <rtdm/driver.h>
++
++/* --- Trace section  --- */
++#define A4L_PROMPT "Analogy: "
++
++#define RTDM_SUBCLASS_ANALOGY 0
++
++#define __a4l_err(fmt, args...)  rtdm_printk(KERN_ERR A4L_PROMPT fmt, ##args)
++#define __a4l_warn(fmt, args...) rtdm_printk(KERN_WARNING A4L_PROMPT fmt, ##args)
++
++#ifdef  CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_FTRACE
++#define __a4l_info(fmt, args...) trace_printk(fmt, ##args)
++#else
++#define __a4l_info(fmt, args...) 						\
++        rtdm_printk(KERN_INFO A4L_PROMPT "%s: " fmt, __FUNCTION__, ##args)
++#endif
++
++#ifdef CONFIG_XENO_DRIVERS_ANALOGY_DEBUG
++#ifdef CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_FTRACE
++#define __a4l_dbg(level, debug, fmt, args...)				\
++	do {								\
++	if ((debug) >= (level))						\
++		trace_printk(fmt, ##args); 				\
++	} while (0)
++#else
++#define __a4l_dbg(level, debug, fmt, args...)						\
++	do {										\
++	if ((debug) >= (level))								\
++		rtdm_printk(KERN_DEBUG A4L_PROMPT "%s: " fmt, __FUNCTION__ , ##args);	\
++	} while (0)
++#endif
++
++#define core_dbg CONFIG_XENO_DRIVERS_ANALOGY_DEBUG_LEVEL
++#define drv_dbg CONFIG_XENO_DRIVERS_ANALOGY_DRIVER_DEBUG_LEVEL
++
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_DEBUG */
++
++#define __a4l_dbg(level, debug, fmt, args...)
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_DEBUG */
++
++#define __a4l_dev_name(dev) 						\
++	(dev->driver == NULL) ? "unattached dev" : dev->driver->board_name
++
++#define a4l_err(dev, fmt, args...) 					\
++	__a4l_err("%s: " fmt, __a4l_dev_name(dev), ##args)
++
++#define a4l_warn(dev, fmt, args...) 					\
++	__a4l_warn("%s: " fmt, __a4l_dev_name(dev), ##args)
++
++#define a4l_info(dev, fmt, args...) 					\
++	__a4l_info("%s: " fmt, __a4l_dev_name(dev), ##args)
++
++#define a4l_dbg(level, debug, dev, fmt, args...)			\
++	__a4l_dbg(level, debug, "%s: " fmt, __a4l_dev_name(dev), ##args)
++
++
++/* --- Time section --- */
++static inline void a4l_udelay(unsigned int us)
++{
++	rtdm_task_busy_sleep(((nanosecs_rel_t) us) * 1000);
++}
++
++/* Function which gives absolute time */
++nanosecs_abs_t a4l_get_time(void);
++
++/* Function for setting up the absolute time recovery */
++void a4l_init_time(void);
++
++/* --- IRQ section --- */
++#define A4L_IRQ_DISABLED 0
++
++typedef int (*a4l_irq_hdlr_t) (unsigned int irq, void *d);
++
++struct a4l_irq_descriptor {
++	/* These fields are useful to launch the IRQ trampoline;
++	   that is the reason why a structure has been defined */
++	a4l_irq_hdlr_t handler;
++	unsigned int irq;
++	void *cookie;
++	rtdm_irq_t rtdm_desc;
++};
++
++int __a4l_request_irq(struct a4l_irq_descriptor * dsc,
++		      unsigned int irq,
++		      a4l_irq_hdlr_t handler,
++		      unsigned long flags, void *cookie);
++int __a4l_free_irq(struct a4l_irq_descriptor * dsc);
++
++/* --- Synchronization section --- */
++#define __NRT_WAITER 1
++#define __RT_WAITER 2
++#define __EVT_PDING 3
++
++struct a4l_sync {
++	unsigned long status;
++	rtdm_event_t rtdm_evt;
++	rtdm_nrtsig_t nrt_sig;
++	wait_queue_head_t wq;
++};
++
++#define a4l_select_sync(snc, slr, type, fd) \
++	rtdm_event_select(&((snc)->rtdm_evt), slr, type, fd)
++
++int a4l_init_sync(struct a4l_sync * snc);
++void a4l_cleanup_sync(struct a4l_sync * snc);
++void a4l_flush_sync(struct a4l_sync * snc);
++int a4l_wait_sync(struct a4l_sync * snc, int rt);
++int a4l_timedwait_sync(struct a4l_sync * snc,
++		       int rt, unsigned long long ns_timeout);
++void a4l_signal_sync(struct a4l_sync * snc);
++
++#endif /* !_COBALT_RTDM_ANALOGY_RTDM_HELPERS_H */
+--- linux/include/xenomai/rtdm/gpio.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/gpio.h	2021-04-07 16:01:28.264632685 +0800
+@@ -0,0 +1,77 @@
++/**
++ * Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_GPIO_H
++#define _COBALT_RTDM_GPIO_H
++
++#include <linux/list.h>
++#include <rtdm/driver.h>
++#include <rtdm/uapi/gpio.h>
++
++struct class;
++struct device_node;
++struct gpio_desc;
++
++struct rtdm_gpio_pin {
++	struct rtdm_device dev;
++	struct list_head next;
++	rtdm_irq_t irqh;
++	rtdm_event_t event;
++	char *name;
++	struct gpio_desc *desc;
++	nanosecs_abs_t timestamp;
++};
++
++struct rtdm_gpio_chip {
++	struct gpio_chip *gc;
++	struct rtdm_driver driver;
++	struct class *devclass;
++	struct list_head next;
++	rtdm_lock_t lock;
++	struct rtdm_gpio_pin pins[0];
++};
++
++int rtdm_gpiochip_add(struct rtdm_gpio_chip *rgc,
++		      struct gpio_chip *gc,
++		      int gpio_subclass);
++
++struct rtdm_gpio_chip *
++rtdm_gpiochip_alloc(struct gpio_chip *gc,
++		    int gpio_subclass);
++
++void rtdm_gpiochip_remove(struct rtdm_gpio_chip *rgc);
++
++int rtdm_gpiochip_add_by_name(struct rtdm_gpio_chip *rgc,
++			      const char *label, int gpio_subclass);
++
++int rtdm_gpiochip_post_event(struct rtdm_gpio_chip *rgc,
++			     unsigned int offset);
++
++#ifdef CONFIG_OF
++
++int rtdm_gpiochip_scan_of(struct device_node *from,
++			  const char *compat, int type);
++
++int rtdm_gpiochip_scan_array_of(struct device_node *from,
++				const char *compat[],
++				int nentries, int type);
++
++void rtdm_gpiochip_remove_of(int type);
++
++#endif
++
++#endif /* !_COBALT_RTDM_GPIO_H */
+--- linux/include/xenomai/rtdm/driver.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/driver.h	2021-04-07 16:01:28.259632692 +0800
+@@ -0,0 +1,1342 @@
++/**
++ * @file
++ * Real-Time Driver Model for Xenomai, driver API header
++ *
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2008 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * @ingroup driverapi
++ */
++#ifndef _COBALT_RTDM_DRIVER_H
++#define _COBALT_RTDM_DRIVER_H
++
++#include <asm/atomic.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/cdev.h>
++#include <linux/wait.h>
++#include <linux/notifier.h>
++#include <xenomai/version.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/select.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/apc.h>
++#include <cobalt/kernel/init.h>
++#include <cobalt/kernel/ancillaries.h>
++#include <cobalt/kernel/tree.h>
++#include <rtdm/fd.h>
++#include <rtdm/rtdm.h>
++
++/* debug support */
++#include <cobalt/kernel/assert.h>
++#include <trace/events/cobalt-rtdm.h>
++#ifdef CONFIG_PCI
++#include <asm-generic/xenomai/pci_ids.h>
++#endif /* CONFIG_PCI */
++#include <asm/xenomai/syscall.h>
++
++struct class;
++typedef struct xnselector rtdm_selector_t;
++enum rtdm_selecttype;
++
++/*!
++ * @addtogroup rtdm_device_register
++ * @{
++ */
++
++/*!
++ * @anchor dev_flags @name Device Flags
++ * Static flags describing a RTDM device
++ * @{
++ */
++/** If set, only a single instance of the device can be requested by an
++ *  application. */
++#define RTDM_EXCLUSIVE			0x0001
++
++/**
++ * Use fixed minor provided in the rtdm_device description for
++ * registering. If this flag is absent, the RTDM core assigns minor
++ * numbers to devices managed by a driver in order of registration.
++ */
++#define RTDM_FIXED_MINOR		0x0002
++
++/** If set, the device is addressed via a clear-text name. */
++#define RTDM_NAMED_DEVICE		0x0010
++
++/** If set, the device is addressed via a combination of protocol ID and
++ *  socket type. */
++#define RTDM_PROTOCOL_DEVICE		0x0020
++
++/** Mask selecting the device type. */
++#define RTDM_DEVICE_TYPE_MASK		0x00F0
++
++/** Flag indicating a secure variant of RTDM (not supported here) */
++#define RTDM_SECURE_DEVICE		0x80000000
++/** @} Device Flags */
++
++/** Maximum number of named devices per driver. */
++#define RTDM_MAX_MINOR	4096
++
++/** @} rtdm_device_register */
++
++/*!
++ * @addtogroup rtdm_sync
++ * @{
++ */
++
++/*!
++ * @anchor RTDM_SELECTTYPE_xxx   @name RTDM_SELECTTYPE_xxx
++ * Event types select can bind to
++ * @{
++ */
++enum rtdm_selecttype {
++	/** Select input data availability events */
++	RTDM_SELECTTYPE_READ = XNSELECT_READ,
++
++	/** Select ouput buffer availability events */
++	RTDM_SELECTTYPE_WRITE = XNSELECT_WRITE,
++
++	/** Select exceptional events */
++	RTDM_SELECTTYPE_EXCEPT = XNSELECT_EXCEPT
++};
++/** @} RTDM_SELECTTYPE_xxx */
++
++/** @} rtdm_sync */
++
++/**
++ * @brief Device context
++ *
++ * A device context structure is associated with every open device instance.
++ * RTDM takes care of its creation and destruction and passes it to the
++ * operation handlers when being invoked.
++ *
++ * Drivers can attach arbitrary data immediately after the official
++ * structure.  The size of this data is provided via
++ * rtdm_driver.context_size during device registration.
++ */
++struct rtdm_dev_context {
++	struct rtdm_fd fd;
++
++	/** Set of active device operation handlers */
++	/** Reference to owning device */
++	struct rtdm_device *device;
++
++	/** Begin of driver defined context data structure */
++	char dev_private[0];
++};
++
++static inline struct rtdm_dev_context *rtdm_fd_to_context(struct rtdm_fd *fd)
++{
++	return container_of(fd, struct rtdm_dev_context, fd);
++}
++
++/**
++ * Locate the driver private area associated to a device context structure
++ *
++ * @param[in] fd File descriptor structure associated with opened
++ * device instance
++ *
++ * @return The address of the private driver area associated to @a
++ * file descriptor.
++ */
++static inline void *rtdm_fd_to_private(struct rtdm_fd *fd)
++{
++	return &rtdm_fd_to_context(fd)->dev_private[0];
++}
++
++/**
++ * Locate a device file descriptor structure from its driver private area
++ *
++ * @param[in] dev_private Address of a private context area
++ *
++ * @return The address of the file descriptor structure defining @a
++ * dev_private.
++ */
++static inline struct rtdm_fd *rtdm_private_to_fd(void *dev_private)
++{
++	struct rtdm_dev_context *ctx;
++	ctx = container_of(dev_private, struct rtdm_dev_context, dev_private);
++	return &ctx->fd;
++}
++
++/**
++ * Tell whether the passed file descriptor belongs to an application.
++ *
++ * @param[in] fd File descriptor
++ *
++ * @return true if passed file descriptor belongs to an application,
++ * false otherwise.
++ */
++static inline bool rtdm_fd_is_user(struct rtdm_fd *fd)
++{
++	return rtdm_fd_owner(fd) != &cobalt_kernel_ppd;
++}
++
++/**
++ * Locate a device structure from a file descriptor.
++ *
++ * @param[in] fd File descriptor
++ *
++ * @return The address of the device structure to which this file
++ * descriptor is attached.
++ */
++static inline struct rtdm_device *rtdm_fd_device(struct rtdm_fd *fd)
++{
++	return rtdm_fd_to_context(fd)->device;
++}
++
++/**
++ * @brief RTDM profile information
++ *
++ * This descriptor details the profile information associated to a
++ * RTDM class of device managed by a driver.
++ *
++ * @anchor rtdm_profile_info
++ */
++struct rtdm_profile_info {
++	/** Device class name */
++	const char *name;
++	/** Device class ID, see @ref RTDM_CLASS_xxx */
++	int class_id;
++	/** Device sub-class, see RTDM_SUBCLASS_xxx definition in the
++	    @ref rtdm_profiles "Device Profiles" */
++	int subclass_id;
++	/** Supported device profile version */
++	int version;
++	/** Reserved */
++	unsigned int magic;
++	struct module *owner;
++	struct class *kdev_class;
++};
++
++struct rtdm_driver;
++
++/**
++ * @brief RTDM state management handlers
++ */
++struct rtdm_sm_ops {
++	/** Handler called upon transition to COBALT_STATE_WARMUP */ 
++	int (*start)(struct rtdm_driver *drv);
++	/** Handler called upon transition to COBALT_STATE_TEARDOWN */ 
++	int (*stop)(struct rtdm_driver *drv);
++};
++
++/**
++ * @brief RTDM driver
++ *
++ * This descriptor describes a RTDM device driver. The structure holds
++ * runtime data, therefore it must reside in writable memory.
++ */
++struct rtdm_driver {
++	/**
++	 * Class profile information. The RTDM_PROFILE_INFO() macro @b
++	 * must be used for filling up this field.
++	 * @anchor rtdm_driver_profile
++	 */
++	struct rtdm_profile_info profile_info;
++	/**
++	 * Device flags, see @ref dev_flags "Device Flags" for details
++	 * @anchor rtdm_driver_flags
++	 */
++	int device_flags;
++	/**
++	 * Size of the private memory area the core should
++	 * automatically allocate for each open file descriptor, which
++	 * is usable for storing the context data associated to each
++	 * connection. The allocated memory is zero-initialized. The
++	 * start of this area can be retrieved by a call to
++	 * rtdm_fd_to_private().
++	 */
++	size_t context_size;
++	/** Protocol device identification: protocol family (PF_xxx) */
++	int protocol_family;
++	/** Protocol device identification: socket type (SOCK_xxx) */
++	int socket_type;
++	/** I/O operation handlers */
++	struct rtdm_fd_ops ops;
++	/** State management handlers */
++	struct rtdm_sm_ops smops;
++	/**
++	 * Count of devices this driver manages. This value is used to
++	 * allocate a chrdev region for named devices.
++	 */
++	int device_count;
++	/** Base minor for named devices. */
++	int base_minor;
++	/** Reserved area */
++	struct {
++		union {
++			struct {
++				struct cdev cdev;
++				int major;
++			} named;
++		};
++		atomic_t refcount;
++		struct notifier_block nb_statechange;
++		DECLARE_BITMAP(minor_map, RTDM_MAX_MINOR);
++	};
++};
++
++#define RTDM_CLASS_MAGIC	0x8284636c
++
++/**
++ * @brief Initializer for class profile information.
++ *
++ * This macro must be used to fill in the @ref rtdm_profile_info
++ * "class profile information" field from a RTDM driver.
++ *
++ * @param __name Class name (unquoted).
++ *
++ * @param __id Class major identification number
++ * (profile_version.class_id).
++ *
++ * @param __subid Class minor identification number
++ * (profile_version.subclass_id).
++ *
++ * @param __version Profile version number.
++ *
++ * @note See @ref rtdm_profiles "Device Profiles".
++ */
++#define RTDM_PROFILE_INFO(__name, __id, __subid, __version)	\
++{								\
++	.name = ( # __name ),					\
++	.class_id = (__id),					\
++	.subclass_id = (__subid),				\
++	.version = (__version),					\
++	.magic = ~RTDM_CLASS_MAGIC,				\
++	.owner = THIS_MODULE,					\
++	.kdev_class = NULL,					\
++}
++
++int rtdm_drv_set_sysclass(struct rtdm_driver *drv, struct class *cls);
++
++/**
++ * @brief RTDM device
++ *
++ * This descriptor describes a RTDM device instance. The structure
++ * holds runtime data, therefore it must reside in writable memory.
++ */
++struct rtdm_device {
++	/** Device driver. */
++	struct rtdm_driver *driver;
++	/** Driver definable device data */
++	void *device_data;
++	/**
++	 * Device label template for composing the device name. A
++	 * limited printf-like format string is assumed, with a
++	 * provision for replacing the first %d/%i placeholder found
++	 * in the string by the device minor number.  It is up to the
++	 * driver to actually mention this placeholder or not,
++	 * depending on the naming convention for its devices.  For
++	 * named devices, the corresponding device node will
++	 * automatically appear in the /dev/rtdm hierachy with
++	 * hotplug-enabled device filesystems (DEVTMPFS).
++	 */
++	const char *label;
++	/**
++	 * Minor number of the device. If RTDM_FIXED_MINOR is present
++	 * in the driver flags, the value stored in this field is used
++	 * verbatim by rtdm_dev_register(). Otherwise, the RTDM core
++	 * automatically assigns minor numbers to all devices managed
++	 * by the driver referred to by @a driver, in order of
++	 * registration, storing the resulting values into this field.
++	 *
++	 * Device nodes created for named devices in the Linux /dev
++	 * hierarchy are assigned this minor number.
++	 *
++	 * The minor number of the current device handling an I/O
++	 * request can be retreived by a call to rtdm_fd_minor().
++	 */
++	int minor;
++	/** Reserved area. */
++	struct {
++		unsigned int magic;
++		char *name;
++		union {
++			struct {
++				xnhandle_t handle;
++			} named;
++			struct {
++				struct xnid id;
++			} proto;
++		};
++		dev_t rdev;
++		struct device *kdev;
++		struct class *kdev_class;
++		atomic_t refcount;
++		struct rtdm_fd_ops ops;
++		wait_queue_head_t putwq;
++		struct list_head openfd_list;
++	};
++};
++
++/* --- device registration --- */
++
++int rtdm_dev_register(struct rtdm_device *device);
++
++void rtdm_dev_unregister(struct rtdm_device *device);
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++
++static inline struct device *rtdm_dev_to_kdev(struct rtdm_device *device)
++{
++	return device->kdev;
++}
++
++/* --- clock services --- */
++static inline nanosecs_abs_t rtdm_clock_read(void)
++{
++	return xnclock_read_realtime(&nkclock);
++}
++
++static inline nanosecs_abs_t rtdm_clock_read_monotonic(void)
++{
++	return xnclock_read_monotonic(&nkclock);
++}
++#endif /* !DOXYGEN_CPP */
++
++/* --- timeout sequences */
++
++typedef nanosecs_abs_t rtdm_toseq_t;
++
++void rtdm_toseq_init(rtdm_toseq_t *timeout_seq, nanosecs_rel_t timeout);
++
++/*!
++ * @addtogroup rtdm_sync
++ * @{
++ */
++
++/*!
++ * @defgroup rtdm_sync_biglock Big dual kernel lock
++ * @{
++ */
++
++/**
++ * @brief Enter atomic section (dual kernel only)
++ *
++ * This call opens a fully atomic section, serializing execution with
++ * respect to all interrupt handlers (including for real-time IRQs)
++ * and Xenomai threads running on all CPUs.
++ *
++ * @param __context name of local variable to store the context
++ * in. This variable updated by the real-time core will hold the
++ * information required to leave the atomic section properly.
++ *
++ * @note Atomic sections may be nested. The caller is allowed to sleep
++ * on a blocking Xenomai service from primary mode within an atomic
++ * section delimited by cobalt_atomic_enter/cobalt_atomic_leave calls.
++ * On the contrary, sleeping on a regular Linux kernel service while
++ * holding such lock is NOT valid.
++ *
++ * @note Since the strongest lock is acquired by this service, it can
++ * be used to synchronize real-time and non-real-time contexts.
++ *
++ * @warning This service is not portable to the Mercury core, and
++ * should be restricted to Cobalt-specific use cases, mainly for the
++ * purpose of porting existing dual-kernel drivers which still depend
++ * on the obsolete RTDM_EXECUTE_ATOMICALLY() construct.
++ */
++#define cobalt_atomic_enter(__context)				\
++	do {							\
++		xnlock_get_irqsave(&nklock, (__context));	\
++		xnsched_lock();					\
++	} while (0)
++
++/**
++ * @brief Leave atomic section (dual kernel only)
++ *
++ * This call closes an atomic section previously opened by a call to
++ * cobalt_atomic_enter(), restoring the preemption and interrupt state
++ * which prevailed prior to entering the exited section.
++ *
++ * @param __context name of local variable which stored the context.
++ *
++ * @warning This service is not portable to the Mercury core, and
++ * should be restricted to Cobalt-specific use cases.
++ */
++#define cobalt_atomic_leave(__context)				\
++	do {							\
++		xnsched_unlock();				\
++		xnlock_put_irqrestore(&nklock, (__context));	\
++	} while (0)
++
++/**
++ * @brief Execute code block atomically (DEPRECATED)
++ *
++ * Generally, it is illegal to suspend the current task by calling
++ * rtdm_task_sleep(), rtdm_event_wait(), etc. while holding a spinlock. In
++ * contrast, this macro allows to combine several operations including
++ * a potentially rescheduling call to an atomic code block with respect to
++ * other RTDM_EXECUTE_ATOMICALLY() blocks. The macro is a light-weight
++ * alternative for protecting code blocks via mutexes, and it can even be used
++ * to synchronise real-time and non-real-time contexts.
++ *
++ * @param code_block Commands to be executed atomically
++ *
++ * @note It is not allowed to leave the code block explicitly by using
++ * @c break, @c return, @c goto, etc. This would leave the global lock held
++ * during the code block execution in an inconsistent state. Moreover, do not
++ * embed complex operations into the code bock. Consider that they will be
++ * executed under preemption lock with interrupts switched-off. Also note that
++ * invocation of rescheduling calls may break the atomicity until the task
++ * gains the CPU again.
++ *
++ * @coretags{unrestricted}
++ *
++ * @deprecated This construct will be phased out in Xenomai
++ * 3.0. Please use rtdm_waitqueue services instead.
++ *
++ * @see cobalt_atomic_enter().
++ */
++#ifdef DOXYGEN_CPP /* Beautify doxygen output */
++#define RTDM_EXECUTE_ATOMICALLY(code_block)	\
++{						\
++	<ENTER_ATOMIC_SECTION>			\
++	code_block;				\
++	<LEAVE_ATOMIC_SECTION>			\
++}
++#else /* This is how it really works */
++static inline __attribute__((deprecated)) void
++rtdm_execute_atomically(void) { }
++
++#define RTDM_EXECUTE_ATOMICALLY(code_block)		\
++{							\
++	spl_t __rtdm_s;					\
++							\
++	rtdm_execute_atomically();			\
++	xnlock_get_irqsave(&nklock, __rtdm_s);		\
++	xnsched_lock();					\
++	code_block;					\
++	xnsched_unlock();				\
++	xnlock_put_irqrestore(&nklock, __rtdm_s);	\
++}
++#endif
++
++/** @} Big dual kernel lock */
++
++/**
++ * @defgroup rtdm_sync_spinlock Spinlock with preemption deactivation
++ * @{
++ */
++
++/**
++ * Static lock initialisation
++ */
++#define RTDM_LOCK_UNLOCKED(__name)	IPIPE_SPIN_LOCK_UNLOCKED
++
++#define DEFINE_RTDM_LOCK(__name)		\
++	rtdm_lock_t __name = RTDM_LOCK_UNLOCKED(__name)
++
++/** Lock variable */
++typedef ipipe_spinlock_t rtdm_lock_t;
++
++/** Variable to save the context while holding a lock */
++typedef unsigned long rtdm_lockctx_t;
++
++/**
++ * Dynamic lock initialisation
++ *
++ * @param lock Address of lock variable
++ *
++ * @coretags{task-unrestricted}
++ */
++static inline void rtdm_lock_init(rtdm_lock_t *lock)
++{
++	raw_spin_lock_init(lock);
++}
++
++/**
++ * Acquire lock from non-preemptible contexts
++ *
++ * @param lock Address of lock variable
++ *
++ * @coretags{unrestricted}
++ */
++static inline void rtdm_lock_get(rtdm_lock_t *lock)
++{
++	XENO_BUG_ON(COBALT, !spltest());
++	raw_spin_lock(lock);
++	xnsched_lock();
++}
++
++/**
++ * Release lock without preemption restoration
++ *
++ * @param lock Address of lock variable
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++static inline void rtdm_lock_put(rtdm_lock_t *lock)
++{
++	raw_spin_unlock(lock);
++	xnsched_unlock();
++}
++
++/**
++ * Acquire lock and disable preemption, by stalling the head domain.
++ *
++ * @param __lock Address of lock variable
++ * @param __context name of local variable to store the context in
++ *
++ * @coretags{unrestricted}
++ */
++#define rtdm_lock_get_irqsave(__lock, __context)	\
++	((__context) = __rtdm_lock_get_irqsave(__lock))
++
++static inline rtdm_lockctx_t __rtdm_lock_get_irqsave(rtdm_lock_t *lock)
++{
++	rtdm_lockctx_t context;
++
++	context = ipipe_test_and_stall_head();
++	raw_spin_lock(lock);
++	xnsched_lock();
++
++	return context;
++}
++
++/**
++ * Release lock and restore preemption state
++ *
++ * @param lock Address of lock variable
++ * @param context name of local variable which stored the context
++ *
++ * @coretags{unrestricted}
++ */
++static inline
++void rtdm_lock_put_irqrestore(rtdm_lock_t *lock, rtdm_lockctx_t context)
++{
++	raw_spin_unlock(lock);
++	xnsched_unlock();
++	ipipe_restore_head(context);
++}
++
++/**
++ * Disable preemption locally
++ *
++ * @param __context name of local variable to store the context in
++ *
++ * @coretags{unrestricted}
++ */
++#define rtdm_lock_irqsave(__context)	\
++	splhigh(__context)
++
++/**
++ * Restore preemption state
++ *
++ * @param __context name of local variable which stored the context
++ *
++ * @coretags{unrestricted}
++ */
++#define rtdm_lock_irqrestore(__context)	\
++	splexit(__context)
++
++/** @} Spinlock with Preemption Deactivation */
++
++#ifndef DOXYGEN_CPP
++
++struct rtdm_waitqueue {
++	struct xnsynch wait;
++};
++typedef struct rtdm_waitqueue rtdm_waitqueue_t;
++
++#define RTDM_WAITQUEUE_INITIALIZER(__name) {		 \
++	    .wait = XNSYNCH_WAITQUEUE_INITIALIZER((__name).wait), \
++	}
++
++#define DEFINE_RTDM_WAITQUEUE(__name)				\
++	struct rtdm_waitqueue __name = RTDM_WAITQUEUE_INITIALIZER(__name)
++
++#define DEFINE_RTDM_WAITQUEUE_ONSTACK(__name)	\
++	DEFINE_RTDM_WAITQUEUE(__name)
++
++static inline void rtdm_waitqueue_init(struct rtdm_waitqueue *wq)
++{
++	*wq = (struct rtdm_waitqueue)RTDM_WAITQUEUE_INITIALIZER(*wq);
++}
++
++static inline void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq)
++{
++	xnsynch_destroy(&wq->wait);
++}
++
++static inline int __rtdm_dowait(struct rtdm_waitqueue *wq,
++				nanosecs_rel_t timeout, xntmode_t timeout_mode)
++{
++	int ret;
++	
++	ret = xnsynch_sleep_on(&wq->wait, timeout, timeout_mode);
++	if (ret & XNBREAK)
++		return -EINTR;
++	if (ret & XNTIMEO)
++		return -ETIMEDOUT;
++	if (ret & XNRMID)
++		return -EIDRM;
++	return 0;
++}
++
++static inline int __rtdm_timedwait(struct rtdm_waitqueue *wq,
++				   nanosecs_rel_t timeout, rtdm_toseq_t *toseq)
++{
++	if (toseq && timeout > 0)
++		return __rtdm_dowait(wq, *toseq, XN_ABSOLUTE);
++
++	return __rtdm_dowait(wq, timeout, XN_RELATIVE);
++}
++
++#define rtdm_timedwait_condition_locked(__wq, __cond, __timeout, __toseq) \
++	({								\
++		int __ret = 0;						\
++		while (__ret == 0 && !(__cond))				\
++			__ret = __rtdm_timedwait(__wq, __timeout, __toseq); \
++		__ret;							\
++	})
++
++#define rtdm_wait_condition_locked(__wq, __cond)			\
++	({								\
++		int __ret = 0;						\
++		while (__ret == 0 && !(__cond))				\
++			__ret = __rtdm_dowait(__wq,			\
++					      XN_INFINITE, XN_RELATIVE); \
++		__ret;							\
++	})
++
++#define rtdm_timedwait_condition(__wq, __cond, __timeout, __toseq)	\
++	({								\
++		spl_t __s;						\
++		int __ret;						\
++		xnlock_get_irqsave(&nklock, __s);			\
++		__ret = rtdm_timedwait_condition_locked(__wq, __cond,	\
++					      __timeout, __toseq);	\
++		xnlock_put_irqrestore(&nklock, __s);			\
++		__ret;							\
++	})
++
++#define rtdm_timedwait(__wq, __timeout, __toseq)			\
++	__rtdm_timedwait(__wq, __timeout, __toseq)
++
++#define rtdm_timedwait_locked(__wq, __timeout, __toseq)			\
++	rtdm_timedwait(__wq, __timeout, __toseq)
++
++#define rtdm_wait_condition(__wq, __cond)				\
++	({								\
++		spl_t __s;						\
++		int __ret;						\
++		xnlock_get_irqsave(&nklock, __s);			\
++		__ret = rtdm_wait_condition_locked(__wq, __cond);	\
++		xnlock_put_irqrestore(&nklock, __s);			\
++		__ret;							\
++	})
++
++#define rtdm_wait(__wq)							\
++	__rtdm_dowait(__wq, XN_INFINITE, XN_RELATIVE)
++
++#define rtdm_wait_locked(__wq)  rtdm_wait(__wq)
++
++#define rtdm_waitqueue_lock(__wq, __context)  cobalt_atomic_enter(__context)
++
++#define rtdm_waitqueue_unlock(__wq, __context)  cobalt_atomic_leave(__context)
++
++#define rtdm_waitqueue_signal(__wq)					\
++	({								\
++		struct xnthread *__waiter;				\
++		__waiter = xnsynch_wakeup_one_sleeper(&(__wq)->wait);	\
++		xnsched_run();						\
++		__waiter != NULL;					\
++	})
++
++#define __rtdm_waitqueue_flush(__wq, __reason)				\
++	({								\
++		int __ret;						\
++		__ret = xnsynch_flush(&(__wq)->wait, __reason);		\
++		xnsched_run();						\
++		__ret == XNSYNCH_RESCHED;				\
++	})
++
++#define rtdm_waitqueue_broadcast(__wq)	\
++	__rtdm_waitqueue_flush(__wq, 0)
++
++#define rtdm_waitqueue_flush(__wq)	\
++	__rtdm_waitqueue_flush(__wq, XNBREAK)
++
++#define rtdm_waitqueue_wakeup(__wq, __waiter)				\
++	do {								\
++		xnsynch_wakeup_this_sleeper(&(__wq)->wait, __waiter);	\
++		xnsched_run();						\
++	} while (0)
++
++#define rtdm_for_each_waiter(__pos, __wq)		\
++	xnsynch_for_each_sleeper(__pos, &(__wq)->wait)
++
++#define rtdm_for_each_waiter_safe(__pos, __tmp, __wq)	\
++	xnsynch_for_each_sleeper_safe(__pos, __tmp, &(__wq)->wait)
++
++#endif /* !DOXYGEN_CPP */
++
++/** @} rtdm_sync */
++
++/* --- Interrupt management services --- */
++/*!
++ * @addtogroup rtdm_irq
++ * @{
++ */
++
++typedef struct xnintr rtdm_irq_t;
++
++/*!
++ * @anchor RTDM_IRQTYPE_xxx   @name RTDM_IRQTYPE_xxx
++ * Interrupt registrations flags
++ * @{
++ */
++/** Enable IRQ-sharing with other real-time drivers */
++#define RTDM_IRQTYPE_SHARED		XN_IRQTYPE_SHARED
++/** Mark IRQ as edge-triggered, relevant for correct handling of shared
++ *  edge-triggered IRQs */
++#define RTDM_IRQTYPE_EDGE		XN_IRQTYPE_EDGE
++/** @} RTDM_IRQTYPE_xxx */
++
++/**
++ * Interrupt handler
++ *
++ * @param[in] irq_handle IRQ handle as returned by rtdm_irq_request()
++ *
++ * @return 0 or a combination of @ref RTDM_IRQ_xxx flags
++ */
++typedef int (*rtdm_irq_handler_t)(rtdm_irq_t *irq_handle);
++
++/*!
++ * @anchor RTDM_IRQ_xxx   @name RTDM_IRQ_xxx
++ * Return flags of interrupt handlers
++ * @{
++ */
++/** Unhandled interrupt */
++#define RTDM_IRQ_NONE			XN_IRQ_NONE
++/** Denote handled interrupt */
++#define RTDM_IRQ_HANDLED		XN_IRQ_HANDLED
++/** Request interrupt disabling on exit */
++#define RTDM_IRQ_DISABLE		XN_IRQ_DISABLE
++/** @} RTDM_IRQ_xxx */
++
++/**
++ * Retrieve IRQ handler argument
++ *
++ * @param irq_handle IRQ handle
++ * @param type Type of the pointer to return
++ *
++ * @return The argument pointer registered on rtdm_irq_request() is returned,
++ * type-casted to the specified @a type.
++ *
++ * @coretags{unrestricted}
++ */
++#define rtdm_irq_get_arg(irq_handle, type)	((type *)irq_handle->cookie)
++/** @} rtdm_irq */
++
++int rtdm_irq_request(rtdm_irq_t *irq_handle, unsigned int irq_no,
++		     rtdm_irq_handler_t handler, unsigned long flags,
++		     const char *device_name, void *arg);
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++static inline int rtdm_irq_free(rtdm_irq_t *irq_handle)
++{
++	if (!XENO_ASSERT(COBALT, xnsched_root_p()))
++		return -EPERM;
++	xnintr_detach(irq_handle);
++	return 0;
++}
++
++static inline int rtdm_irq_enable(rtdm_irq_t *irq_handle)
++{
++	xnintr_enable(irq_handle);
++	return 0;
++}
++
++static inline int rtdm_irq_disable(rtdm_irq_t *irq_handle)
++{
++	xnintr_disable(irq_handle);
++	return 0;
++}
++#endif /* !DOXYGEN_CPP */
++
++/* --- non-real-time signalling services --- */
++
++/*!
++ * @addtogroup rtdm_nrtsignal
++ * @{
++ */
++
++typedef struct rtdm_nrtsig rtdm_nrtsig_t;
++/**
++ * Non-real-time signal handler
++ *
++ * @param[in] nrt_sig Signal handle pointer as passed to rtdm_nrtsig_init()
++ * @param[in] arg Argument as passed to rtdm_nrtsig_init()
++ *
++ * @note The signal handler will run in soft-IRQ context of the non-real-time
++ * subsystem. Note the implications of this context, e.g. no invocation of
++ * blocking operations.
++ */
++typedef void (*rtdm_nrtsig_handler_t)(rtdm_nrtsig_t *nrt_sig, void *arg);
++
++struct rtdm_nrtsig {
++	rtdm_nrtsig_handler_t handler;
++	void *arg;
++};
++
++void rtdm_schedule_nrt_work(struct work_struct *lostage_work);
++/** @} rtdm_nrtsignal */
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++static inline void rtdm_nrtsig_init(rtdm_nrtsig_t *nrt_sig,
++				rtdm_nrtsig_handler_t handler, void *arg)
++{
++	nrt_sig->handler = handler;
++	nrt_sig->arg = arg;
++}
++
++static inline void rtdm_nrtsig_destroy(rtdm_nrtsig_t *nrt_sig)
++{
++	nrt_sig->handler = NULL;
++	nrt_sig->arg = NULL;
++}
++
++void rtdm_nrtsig_pend(rtdm_nrtsig_t *nrt_sig);
++#endif /* !DOXYGEN_CPP */
++
++/* --- timer services --- */
++
++/*!
++ * @addtogroup rtdm_timer
++ * @{
++ */
++
++typedef struct xntimer rtdm_timer_t;
++
++/**
++ * Timer handler
++ *
++ * @param[in] timer Timer handle as returned by rtdm_timer_init()
++ */
++typedef void (*rtdm_timer_handler_t)(rtdm_timer_t *timer);
++
++/*!
++ * @anchor RTDM_TIMERMODE_xxx   @name RTDM_TIMERMODE_xxx
++ * Timer operation modes
++ * @{
++ */
++enum rtdm_timer_mode {
++	/** Monotonic timer with relative timeout */
++	RTDM_TIMERMODE_RELATIVE = XN_RELATIVE,
++
++	/** Monotonic timer with absolute timeout */
++	RTDM_TIMERMODE_ABSOLUTE = XN_ABSOLUTE,
++
++	/** Adjustable timer with absolute timeout */
++	RTDM_TIMERMODE_REALTIME = XN_REALTIME
++};
++/** @} RTDM_TIMERMODE_xxx */
++
++/** @} rtdm_timer */
++
++int rtdm_timer_init(rtdm_timer_t *timer, rtdm_timer_handler_t handler,
++		    const char *name);
++
++void rtdm_timer_destroy(rtdm_timer_t *timer);
++
++int rtdm_timer_start(rtdm_timer_t *timer, nanosecs_abs_t expiry,
++		     nanosecs_rel_t interval, enum rtdm_timer_mode mode);
++
++void rtdm_timer_stop(rtdm_timer_t *timer);
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++static inline int rtdm_timer_start_in_handler(rtdm_timer_t *timer,
++					      nanosecs_abs_t expiry,
++					      nanosecs_rel_t interval,
++					      enum rtdm_timer_mode mode)
++{
++	return xntimer_start(timer, expiry, interval, (xntmode_t)mode);
++}
++
++static inline void rtdm_timer_stop_in_handler(rtdm_timer_t *timer)
++{
++	xntimer_stop(timer);
++}
++#endif /* !DOXYGEN_CPP */
++
++/* --- task services --- */
++/*!
++ * @addtogroup rtdm_task
++ * @{
++ */
++
++typedef struct xnthread rtdm_task_t;
++
++/**
++ * Real-time task procedure
++ *
++ * @param[in,out] arg argument as passed to rtdm_task_init()
++ */
++typedef void (*rtdm_task_proc_t)(void *arg);
++
++/**
++ * @anchor rtdmtaskprio @name Task Priority Range
++ * Maximum and minimum task priorities
++ * @{ */
++#define RTDM_TASK_LOWEST_PRIORITY	0
++#define RTDM_TASK_HIGHEST_PRIORITY	99
++/** @} Task Priority Range */
++
++/**
++ * @anchor rtdmchangetaskprio @name Task Priority Modification
++ * Raise or lower task priorities by one level
++ * @{ */
++#define RTDM_TASK_RAISE_PRIORITY	(+1)
++#define RTDM_TASK_LOWER_PRIORITY	(-1)
++/** @} Task Priority Modification */
++
++/** @} rtdm_task */
++
++int rtdm_task_init(rtdm_task_t *task, const char *name,
++		   rtdm_task_proc_t task_proc, void *arg,
++		   int priority, nanosecs_rel_t period);
++int __rtdm_task_sleep(xnticks_t timeout, xntmode_t mode);
++void rtdm_task_busy_sleep(nanosecs_rel_t delay);
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++static inline void rtdm_task_destroy(rtdm_task_t *task)
++{
++	xnthread_cancel(task);
++	xnthread_join(task, true);
++}
++
++static inline int rtdm_task_should_stop(void)
++{
++	return xnthread_test_info(xnthread_current(), XNCANCELD);
++}
++
++void rtdm_task_join(rtdm_task_t *task);
++
++static inline void __deprecated rtdm_task_join_nrt(rtdm_task_t *task,
++						   unsigned int poll_delay)
++{
++	rtdm_task_join(task);
++}
++
++static inline void rtdm_task_set_priority(rtdm_task_t *task, int priority)
++{
++	union xnsched_policy_param param = { .rt = { .prio = priority } };
++	spl_t s;
++
++	splhigh(s);
++	xnthread_set_schedparam(task, &xnsched_class_rt, &param);
++	xnsched_run();
++	splexit(s);
++}
++
++static inline int rtdm_task_set_period(rtdm_task_t *task,
++				       nanosecs_abs_t start_date,
++				       nanosecs_rel_t period)
++{
++	if (period < 0)
++		period = 0;
++	if (start_date == 0)
++		start_date = XN_INFINITE;
++
++	return xnthread_set_periodic(task, start_date, XN_ABSOLUTE, period);
++}
++
++static inline int rtdm_task_unblock(rtdm_task_t *task)
++{
++	spl_t s;
++	int res;
++
++	splhigh(s);
++	res = xnthread_unblock(task);
++	xnsched_run();
++	splexit(s);
++
++	return res;
++}
++
++static inline rtdm_task_t *rtdm_task_current(void)
++{
++	return xnthread_current();
++}
++
++static inline int rtdm_task_wait_period(unsigned long *overruns_r)
++{
++	if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p()))
++		return -EPERM;
++	return xnthread_wait_period(overruns_r);
++}
++
++static inline int rtdm_task_sleep(nanosecs_rel_t delay)
++{
++	return __rtdm_task_sleep(delay, XN_RELATIVE);
++}
++
++static inline int
++rtdm_task_sleep_abs(nanosecs_abs_t wakeup_date, enum rtdm_timer_mode mode)
++{
++	/* For the sake of a consistent API usage... */
++	if (mode != RTDM_TIMERMODE_ABSOLUTE && mode != RTDM_TIMERMODE_REALTIME)
++		return -EINVAL;
++	return __rtdm_task_sleep(wakeup_date, (xntmode_t)mode);
++}
++
++/* rtdm_task_sleep_abs shall be used instead */
++static inline int __deprecated rtdm_task_sleep_until(nanosecs_abs_t wakeup_time)
++{
++	return __rtdm_task_sleep(wakeup_time, XN_REALTIME);
++}
++
++#define rtdm_task_busy_wait(__condition, __spin_ns, __sleep_ns)			\
++	({									\
++		__label__ done;							\
++		nanosecs_abs_t __end;						\
++		int __ret = 0;							\
++		for (;;) {							\
++			__end = rtdm_clock_read_monotonic() + __spin_ns;	\
++			for (;;) {						\
++				if (__condition)				\
++					goto done;				\
++				if (rtdm_clock_read_monotonic() >= __end)	\
++					break;					\
++			}							\
++			__ret = rtdm_task_sleep(__sleep_ns);			\
++			if (__ret)						\
++				break;						\
++		}								\
++	done:									\
++		__ret;								\
++	})
++
++#define rtdm_wait_context	xnthread_wait_context
++
++static inline
++void rtdm_wait_complete(struct rtdm_wait_context *wc)
++{
++	xnthread_complete_wait(wc);
++}
++
++static inline
++int rtdm_wait_is_completed(struct rtdm_wait_context *wc)
++{
++	return xnthread_wait_complete_p(wc);
++}
++
++static inline void rtdm_wait_prepare(struct rtdm_wait_context *wc)
++{
++	xnthread_prepare_wait(wc);
++}
++
++static inline
++struct rtdm_wait_context *rtdm_wait_get_context(rtdm_task_t *task)
++{
++	return xnthread_get_wait_context(task);
++}
++
++#endif /* !DOXYGEN_CPP */
++
++/* --- event services --- */
++
++typedef struct rtdm_event {
++	struct xnsynch synch_base;
++	DECLARE_XNSELECT(select_block);
++} rtdm_event_t;
++
++#define RTDM_EVENT_PENDING		XNSYNCH_SPARE1
++
++void rtdm_event_init(rtdm_event_t *event, unsigned long pending);
++int rtdm_event_select(rtdm_event_t *event, rtdm_selector_t *selector,
++		      enum rtdm_selecttype type, unsigned fd_index);
++int rtdm_event_wait(rtdm_event_t *event);
++int rtdm_event_timedwait(rtdm_event_t *event, nanosecs_rel_t timeout,
++			 rtdm_toseq_t *timeout_seq);
++void rtdm_event_signal(rtdm_event_t *event);
++
++void rtdm_event_clear(rtdm_event_t *event);
++
++void rtdm_event_pulse(rtdm_event_t *event);
++
++void rtdm_event_destroy(rtdm_event_t *event);
++
++/* --- semaphore services --- */
++
++typedef struct rtdm_sem {
++	unsigned long value;
++	struct xnsynch synch_base;
++	DECLARE_XNSELECT(select_block);
++} rtdm_sem_t;
++
++void rtdm_sem_init(rtdm_sem_t *sem, unsigned long value);
++int rtdm_sem_select(rtdm_sem_t *sem, rtdm_selector_t *selector,
++		    enum rtdm_selecttype type, unsigned fd_index);
++int rtdm_sem_down(rtdm_sem_t *sem);
++int rtdm_sem_timeddown(rtdm_sem_t *sem, nanosecs_rel_t timeout,
++		       rtdm_toseq_t *timeout_seq);
++void rtdm_sem_up(rtdm_sem_t *sem);
++
++void rtdm_sem_destroy(rtdm_sem_t *sem);
++
++/* --- mutex services --- */
++
++typedef struct rtdm_mutex {
++	struct xnsynch synch_base;
++	atomic_t fastlock;
++} rtdm_mutex_t;
++
++void rtdm_mutex_init(rtdm_mutex_t *mutex);
++int rtdm_mutex_lock(rtdm_mutex_t *mutex);
++int rtdm_mutex_timedlock(rtdm_mutex_t *mutex, nanosecs_rel_t timeout,
++			 rtdm_toseq_t *timeout_seq);
++void rtdm_mutex_unlock(rtdm_mutex_t *mutex);
++void rtdm_mutex_destroy(rtdm_mutex_t *mutex);
++
++/* --- utility functions --- */
++
++#define rtdm_printk(format, ...)	printk(format, ##__VA_ARGS__)
++
++#define rtdm_printk_ratelimited(fmt, ...)  do {				\
++	if (xnclock_ratelimit())					\
++		printk(fmt, ##__VA_ARGS__);				\
++} while (0)
++
++#ifndef DOXYGEN_CPP /* Avoid static inline tags for RTDM in doxygen */
++static inline void *rtdm_malloc(size_t size)
++{
++	return xnmalloc(size);
++}
++
++static inline void rtdm_free(void *ptr)
++{
++	xnfree(ptr);
++}
++
++int rtdm_mmap_to_user(struct rtdm_fd *fd,
++		      void *src_addr, size_t len,
++		      int prot, void **pptr,
++		      struct vm_operations_struct *vm_ops,
++		      void *vm_private_data);
++
++int rtdm_iomap_to_user(struct rtdm_fd *fd,
++		       phys_addr_t src_addr, size_t len,
++		       int prot, void **pptr,
++		       struct vm_operations_struct *vm_ops,
++		       void *vm_private_data);
++
++int rtdm_mmap_kmem(struct vm_area_struct *vma, void *va);
++
++int rtdm_mmap_vmem(struct vm_area_struct *vma, void *va);
++
++int rtdm_mmap_iomem(struct vm_area_struct *vma, phys_addr_t pa);
++
++int rtdm_munmap(void *ptr, size_t len);
++
++static inline int rtdm_read_user_ok(struct rtdm_fd *fd,
++				    const void __user *ptr, size_t size)
++{
++	return access_rok(ptr, size);
++}
++
++static inline int rtdm_rw_user_ok(struct rtdm_fd *fd,
++				  const void __user *ptr, size_t size)
++{
++	return access_wok(ptr, size);
++}
++
++static inline int rtdm_copy_from_user(struct rtdm_fd *fd,
++				      void *dst, const void __user *src,
++				      size_t size)
++{
++	return __xn_copy_from_user(dst, src, size) ? -EFAULT : 0;
++}
++
++static inline int rtdm_safe_copy_from_user(struct rtdm_fd *fd,
++					   void *dst, const void __user *src,
++					   size_t size)
++{
++	return cobalt_copy_from_user(dst, src, size);
++}
++
++static inline int rtdm_copy_to_user(struct rtdm_fd *fd,
++				    void __user *dst, const void *src,
++				    size_t size)
++{
++	return __xn_copy_to_user(dst, src, size) ? -EFAULT : 0;
++}
++
++static inline int rtdm_safe_copy_to_user(struct rtdm_fd *fd,
++					 void __user *dst, const void *src,
++					 size_t size)
++{
++	return cobalt_copy_to_user(dst, src, size);
++}
++
++static inline int rtdm_strncpy_from_user(struct rtdm_fd *fd,
++					 char *dst,
++					 const char __user *src, size_t count)
++{
++	return cobalt_strncpy_from_user(dst, src, count);
++}
++
++static inline bool rtdm_available(void)
++{
++	return realtime_core_enabled();
++}
++
++static inline int rtdm_rt_capable(struct rtdm_fd *fd)
++{
++	if (!XENO_ASSERT(COBALT, !xnsched_interrupt_p()))
++		return 0;
++
++	if (!rtdm_fd_is_user(fd))
++		return !xnsched_root_p();
++
++	return xnthread_current() != NULL;
++}
++
++static inline int rtdm_in_rt_context(void)
++{
++	return (ipipe_current_domain != ipipe_root_domain);
++}
++
++#define RTDM_IOV_FASTMAX  16
++
++int rtdm_get_iovec(struct rtdm_fd *fd, struct iovec **iov,
++		   const struct user_msghdr *msg,
++		   struct iovec *iov_fast);
++
++int rtdm_put_iovec(struct rtdm_fd *fd, struct iovec *iov,
++		   const struct user_msghdr *msg,
++		   struct iovec *iov_fast);
++
++static inline
++void rtdm_drop_iovec(struct iovec *iov, struct iovec *iov_fast)
++{
++	if (iov != iov_fast)
++		xnfree(iov);
++}
++
++ssize_t rtdm_get_iov_flatlen(struct iovec *iov, int iovlen);
++
++#endif /* !DOXYGEN_CPP */
++
++#endif /* _COBALT_RTDM_DRIVER_H */
+--- linux/include/xenomai/rtdm/testing.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/testing.h	2021-04-07 16:01:28.254632699 +0800
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_TESTING_H
++#define _COBALT_RTDM_TESTING_H
++
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/testing.h>
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++
++#include <rtdm/compat.h>
++
++struct compat_rttst_overall_bench_res {
++	struct rttst_bench_res result;
++	compat_uptr_t histogram_avg;
++	compat_uptr_t histogram_min;
++	compat_uptr_t histogram_max;
++};
++
++#define RTTST_RTIOC_TMBENCH_STOP_COMPAT \
++	_IOWR(RTIOC_TYPE_TESTING, 0x11, struct compat_rttst_overall_bench_res)
++
++#endif	/* CONFIG_XENO_ARCH_SYS3264 */
++
++#endif /* !_COBALT_RTDM_TESTING_H */
+--- linux/include/xenomai/rtdm/net.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/net.h	2021-04-07 16:01:28.250632704 +0800
+@@ -0,0 +1,45 @@
++/*
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2005-2011 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#ifndef _COBALT_RTDM_NET_H
++#define _COBALT_RTDM_NET_H
++
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/net.h>
++#include <rtdm/driver.h>
++
++struct rtnet_callback {
++    void    (*func)(struct rtdm_fd *, void *);
++    void    *arg;
++};
++
++#define RTNET_RTIOC_CALLBACK    _IOW(RTIOC_TYPE_NETWORK, 0x12, \
++				     struct rtnet_callback)
++
++/* utility functions */
++
++/* provided by rt_ipv4 */
++unsigned long rt_inet_aton(const char *ip);
++
++/* provided by rt_packet */
++int rt_eth_aton(unsigned char *addr_buf, const char *mac);
++
++#define RTNET_RTDM_VER 914
++
++#endif  /* _COBALT_RTDM_NET_H */
+--- linux/include/xenomai/rtdm/can.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/can.h	2021-04-07 16:01:28.245632712 +0800
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                    <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ *
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_CAN_H
++#define _COBALT_RTDM_CAN_H
++
++#include <linux/net.h>
++#include <linux/socket.h>
++#include <linux/if.h>
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/can.h>
++
++#endif /* _COBALT_RTDM_CAN_H */
+--- linux/include/xenomai/rtdm/compat.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/compat.h	2021-04-07 16:01:28.240632719 +0800
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_COMPAT_H
++#define _COBALT_RTDM_COMPAT_H
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++
++#include <cobalt/kernel/compat.h>
++#include <rtdm/rtdm.h>
++
++struct compat_rtdm_getsockopt_args {
++	int level;
++	int optname;
++	compat_uptr_t optval;
++	compat_uptr_t optlen;
++};
++
++struct compat_rtdm_setsockopt_args {
++	int level;
++	int optname;
++	const compat_uptr_t optval;
++	socklen_t optlen;
++};
++
++struct compat_rtdm_getsockaddr_args {
++	compat_uptr_t addr;
++	compat_uptr_t addrlen;
++};
++
++struct compat_rtdm_setsockaddr_args {
++	const compat_uptr_t addr;
++	socklen_t addrlen;
++};
++
++#define _RTIOC_GETSOCKOPT_COMPAT	_IOW(RTIOC_TYPE_COMMON, 0x20,	\
++					     struct compat_rtdm_getsockopt_args)
++#define _RTIOC_SETSOCKOPT_COMPAT	_IOW(RTIOC_TYPE_COMMON, 0x21,	\
++					     struct compat_rtdm_setsockopt_args)
++#define _RTIOC_BIND_COMPAT		_IOW(RTIOC_TYPE_COMMON, 0x22,	\
++					     struct compat_rtdm_setsockaddr_args)
++#define _RTIOC_CONNECT_COMPAT		_IOW(RTIOC_TYPE_COMMON, 0x23,	\
++					     struct compat_rtdm_setsockaddr_args)
++#define _RTIOC_ACCEPT_COMPAT		_IOW(RTIOC_TYPE_COMMON, 0x25,	\
++					     struct compat_rtdm_getsockaddr_args)
++#define _RTIOC_GETSOCKNAME_COMPAT	_IOW(RTIOC_TYPE_COMMON, 0x26,	\
++					     struct compat_rtdm_getsockaddr_args)
++#define _RTIOC_GETPEERNAME_COMPAT	_IOW(RTIOC_TYPE_COMMON, 0x27,	\
++					     struct compat_rtdm_getsockaddr_args)
++
++#define __COMPAT_CASE(__op)		: case __op
++
++#else	/* !CONFIG_XENO_ARCH_SYS3264 */
++
++#define __COMPAT_CASE(__op)
++
++#endif	/* !CONFIG_XENO_ARCH_SYS3264 */
++
++#define COMPAT_CASE(__op)	case __op __COMPAT_CASE(__op  ## _COMPAT)
++
++#endif /* !_COBALT_RTDM_COMPAT_H */
+--- linux/include/xenomai/rtdm/fd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/fd.h	2021-04-07 16:01:28.235632726 +0800
+@@ -0,0 +1,410 @@
++/*
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2008,2013,2014 Gilles Chanteperdrix <gch@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_FD_H
++#define _COBALT_KERNEL_FD_H
++
++#include <linux/types.h>
++#include <linux/socket.h>
++#include <linux/file.h>
++#include <cobalt/kernel/tree.h>
++#include <asm-generic/xenomai/syscall.h>
++
++struct vm_area_struct;
++struct rtdm_fd;
++struct _rtdm_mmap_request;
++struct xnselector;
++struct cobalt_ppd;
++struct rtdm_device;
++
++/**
++ * @file
++ * @anchor File operation handlers
++ * @addtogroup rtdm_device_register
++ * @{
++ */
++
++/**
++ * Open handler for named devices
++ *
++ * @param[in] fd File descriptor associated with opened device instance
++ * @param[in] oflags Open flags as passed by the user
++ *
++ * The file descriptor carries a device minor information which can be
++ * retrieved by a call to rtdm_fd_minor(fd). The minor number can be
++ * used for distinguishing devices managed by a driver.
++ *
++ * @return 0 on success. On failure, a negative error code is returned.
++ *
++ * @see @c open() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++int rtdm_open_handler(struct rtdm_fd *fd, int oflags);
++
++/**
++ * Socket creation handler for protocol devices
++ *
++ * @param[in] fd File descriptor associated with opened device instance
++ * @param[in] protocol Protocol number as passed by the user
++ *
++ * @return 0 on success. On failure, a negative error code is returned.
++ *
++ * @see @c socket() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++int rtdm_socket_handler(struct rtdm_fd *fd, int protocol);
++
++/**
++ * Close handler
++ *
++ * @param[in] fd File descriptor associated with opened
++ * device instance.
++ *
++ * @see @c close() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++void rtdm_close_handler(struct rtdm_fd *fd);
++
++/**
++ * IOCTL handler
++ *
++ * @param[in] fd File descriptor
++ * @param[in] request Request number as passed by the user
++ * @param[in,out] arg Request argument as passed by the user
++ *
++ * @return A positive value or 0 on success. On failure return either
++ * -ENOSYS, to request that the function be called again from the opposite
++ * realtime/non-realtime context, or another negative error code.
++ *
++ * @see @c ioctl() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++int rtdm_ioctl_handler(struct rtdm_fd *fd, unsigned int request, void __user *arg);
++
++/**
++ * Read handler
++ *
++ * @param[in] fd File descriptor
++ * @param[out] buf Input buffer as passed by the user
++ * @param[in] size Number of bytes the user requests to read
++ *
++ * @return On success, the number of bytes read. On failure return either
++ * -ENOSYS, to request that this handler be called again from the opposite
++ * realtime/non-realtime context, or another negative error code.
++ *
++ * @see @c read() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++ssize_t rtdm_read_handler(struct rtdm_fd *fd, void __user *buf, size_t size);
++
++/**
++ * Write handler
++ *
++ * @param[in] fd File descriptor
++ * @param[in] buf Output buffer as passed by the user
++ * @param[in] size Number of bytes the user requests to write
++ *
++ * @return On success, the number of bytes written. On failure return
++ * either -ENOSYS, to request that this handler be called again from the
++ * opposite realtime/non-realtime context, or another negative error code.
++ *
++ * @see @c write() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++ssize_t rtdm_write_handler(struct rtdm_fd *fd, const void __user *buf, size_t size);
++
++/**
++ * Receive message handler
++ *
++ * @param[in] fd File descriptor
++ * @param[in,out] msg Message descriptor as passed by the user, automatically
++ * mirrored to safe kernel memory in case of user mode call
++ * @param[in] flags Message flags as passed by the user
++ *
++ * @return On success, the number of bytes received. On failure return
++ * either -ENOSYS, to request that this handler be called again from the
++ * opposite realtime/non-realtime context, or another negative error code.
++ *
++ * @see @c recvmsg() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++ssize_t rtdm_recvmsg_handler(struct rtdm_fd *fd, struct user_msghdr *msg, int flags);
++
++/**
++ * Transmit message handler
++ *
++ * @param[in] fd File descriptor
++ * @param[in] msg Message descriptor as passed by the user, automatically
++ * mirrored to safe kernel memory in case of user mode call
++ * @param[in] flags Message flags as passed by the user
++ *
++ * @return On success, the number of bytes transmitted. On failure return
++ * either -ENOSYS, to request that this handler be called again from the
++ * opposite realtime/non-realtime context, or another negative error code.
++ *
++ * @see @c sendmsg() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ */
++ssize_t rtdm_sendmsg_handler(struct rtdm_fd *fd, const struct user_msghdr *msg, int flags);
++
++/**
++ * Select handler
++ *
++ * @param[in] fd File descriptor
++ * @param selector Pointer to the selector structure
++ * @param type Type of events (@a XNSELECT_READ, @a XNSELECT_WRITE, or @a
++ * XNSELECT_EXCEPT)
++ * @param index Index of the file descriptor
++ *
++ * @return 0 on success. On failure, a negative error code is
++ * returned.
++ *
++ * @see @c select() in POSIX.1-2001,
++ * http://pubs.opengroup.org/onlinepubs/007908799/xsh/select.html
++ */
++int rtdm_select_handler(struct rtdm_fd *fd, struct xnselector *selector,
++			unsigned int type, unsigned int index);
++
++/**
++ * Memory mapping handler
++ *
++ * @param[in] fd File descriptor
++ * @param[in] vma Virtual memory area descriptor
++ *
++ * @return 0 on success. On failure, a negative error code is
++ * returned.
++ *
++ * @see @c mmap() in POSIX.1-2001,
++ * http://pubs.opengroup.org/onlinepubs/7908799/xsh/mmap.html
++ *
++ * @note The address hint passed to the mmap() request is deliberately
++ * ignored by RTDM.
++ */
++int rtdm_mmap_handler(struct rtdm_fd *fd, struct vm_area_struct *vma);
++
++/**
++ * Allocate mapping region in address space
++ *
++ * When present, this optional handler should return the start address
++ * of a free region in the process's address space, large enough to
++ * cover the ongoing mmap() operation. If unspecified, the default
++ * architecture-defined handler is invoked.
++ *
++ * Most drivers can omit this handler, except on MMU-less platforms
++ * (see second note).
++ *
++ * @param[in] fd File descriptor
++ * @param[in] len Length of the requested region
++ * @param[in] pgoff Page frame number to map to (see second note).
++ * @param[in] flags Requested mapping flags
++ *
++ * @return The start address of the mapping region on success. On
++ * failure, a negative error code should be returned, with -ENOSYS
++ * meaning that the driver does not want to provide such information,
++ * in which case the ongoing mmap() operation will fail.
++ *
++ * @note The address hint passed to the mmap() request is deliberately
++ * ignored by RTDM, and therefore not passed to this handler.
++ *
++ * @note On MMU-less platforms, this handler is required because RTDM
++ * issues mapping requests over a shareable character device
++ * internally. In such context, the RTDM core may pass a null @a pgoff
++ * argument to the handler, for probing for the logical start address
++ * of the memory region to map to. Otherwise, when @a pgoff is
++ * non-zero, pgoff << PAGE_SHIFT is usually returned.
++ */
++unsigned long
++rtdm_get_unmapped_area_handler(struct rtdm_fd *fd,
++			       unsigned long len, unsigned long pgoff,
++			       unsigned long flags);
++/**
++ * @anchor rtdm_fd_ops
++ * @brief RTDM file operation descriptor.
++ *
++ * This structure describes the operations available with a RTDM
++ * device, defining handlers for submitting I/O requests. Those
++ * handlers are implemented by RTDM device drivers.
++ */
++struct rtdm_fd_ops {
++	/** See rtdm_open_handler(). */
++	int (*open)(struct rtdm_fd *fd, int oflags);
++	/** See rtdm_socket_handler(). */
++	int (*socket)(struct rtdm_fd *fd, int protocol);
++	/** See rtdm_close_handler(). */
++	void (*close)(struct rtdm_fd *fd);
++	/** See rtdm_ioctl_handler(). */
++	int (*ioctl_rt)(struct rtdm_fd *fd,
++			unsigned int request, void __user *arg);
++	/** See rtdm_ioctl_handler(). */
++	int (*ioctl_nrt)(struct rtdm_fd *fd,
++			 unsigned int request, void __user *arg);
++	/** See rtdm_read_handler(). */
++	ssize_t (*read_rt)(struct rtdm_fd *fd,
++			   void __user *buf, size_t size);
++	/** See rtdm_read_handler(). */
++	ssize_t (*read_nrt)(struct rtdm_fd *fd,
++			    void __user *buf, size_t size);
++	/** See rtdm_write_handler(). */
++	ssize_t (*write_rt)(struct rtdm_fd *fd,
++			    const void __user *buf, size_t size);
++	/** See rtdm_write_handler(). */
++	ssize_t (*write_nrt)(struct rtdm_fd *fd,
++			     const void __user *buf, size_t size);
++	/** See rtdm_recvmsg_handler(). */
++	ssize_t (*recvmsg_rt)(struct rtdm_fd *fd,
++			      struct user_msghdr *msg, int flags);
++	/** See rtdm_recvmsg_handler(). */
++	ssize_t (*recvmsg_nrt)(struct rtdm_fd *fd,
++			       struct user_msghdr *msg, int flags);
++	/** See rtdm_sendmsg_handler(). */
++	ssize_t (*sendmsg_rt)(struct rtdm_fd *fd,
++			      const struct user_msghdr *msg, int flags);
++	/** See rtdm_sendmsg_handler(). */
++	ssize_t (*sendmsg_nrt)(struct rtdm_fd *fd,
++			       const struct user_msghdr *msg, int flags);
++	/** See rtdm_select_handler(). */
++	int (*select)(struct rtdm_fd *fd,
++		      struct xnselector *selector,
++		      unsigned int type, unsigned int index);
++	/** See rtdm_mmap_handler(). */
++	int (*mmap)(struct rtdm_fd *fd,
++		    struct vm_area_struct *vma);
++	/** See rtdm_get_unmapped_area_handler(). */
++	unsigned long (*get_unmapped_area)(struct rtdm_fd *fd,
++					   unsigned long len,
++					   unsigned long pgoff,
++					   unsigned long flags);
++};
++
++/** @} File operation handlers */
++
++struct rtdm_fd {
++	unsigned int magic;
++	struct rtdm_fd_ops *ops;
++	struct cobalt_ppd *owner;
++	unsigned int refs;
++	int ufd;
++	int minor;
++	int oflags;
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	int compat;
++#endif
++	bool stale;
++	struct list_head cleanup;
++	struct list_head next;	/* in dev->openfd_list */
++};
++
++#define RTDM_FD_MAGIC 0x52544446
++
++#define RTDM_FD_COMPAT	__COBALT_COMPAT_BIT
++#define RTDM_FD_COMPATX	__COBALT_COMPATX_BIT
++
++int __rtdm_anon_getfd(const char *name, int flags);
++
++void __rtdm_anon_putfd(int ufd);
++
++static inline struct cobalt_ppd *rtdm_fd_owner(const struct rtdm_fd *fd)
++{
++	return fd->owner;
++}
++
++static inline int rtdm_fd_ufd(const struct rtdm_fd *fd)
++{
++	return fd->ufd;
++}
++
++static inline int rtdm_fd_minor(const struct rtdm_fd *fd)
++{
++	return fd->minor;
++}
++
++static inline int rtdm_fd_flags(const struct rtdm_fd *fd)
++{
++	return fd->oflags;
++}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++static inline int rtdm_fd_is_compat(const struct rtdm_fd *fd)
++{
++	return fd->compat;
++}
++#else
++static inline int rtdm_fd_is_compat(const struct rtdm_fd *fd)
++{
++	return 0;
++}
++#endif
++
++int rtdm_fd_enter(struct rtdm_fd *rtdm_fd, int ufd,
++		  unsigned int magic, struct rtdm_fd_ops *ops);
++
++int rtdm_fd_register(struct rtdm_fd *fd, int ufd);
++
++struct rtdm_fd *rtdm_fd_get(int ufd, unsigned int magic);
++
++int rtdm_fd_lock(struct rtdm_fd *fd);
++
++void rtdm_fd_put(struct rtdm_fd *fd);
++
++void rtdm_fd_unlock(struct rtdm_fd *fd);
++
++int rtdm_fd_fcntl(int ufd, int cmd, ...);
++
++int rtdm_fd_ioctl(int ufd, unsigned int request, ...);
++
++ssize_t rtdm_fd_read(int ufd, void __user *buf, size_t size);
++
++ssize_t rtdm_fd_write(int ufd, const void __user *buf, size_t size);
++
++int rtdm_fd_close(int ufd, unsigned int magic);
++
++ssize_t rtdm_fd_recvmsg(int ufd, struct user_msghdr *msg, int flags);
++
++int __rtdm_fd_recvmmsg(int ufd, void __user *u_msgvec, unsigned int vlen,
++		       unsigned int flags, void __user *u_timeout,
++		       int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg),
++		       int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg),
++		       int (*get_timespec)(struct timespec *ts, const void __user *u_ts));
++
++ssize_t rtdm_fd_sendmsg(int ufd, const struct user_msghdr *msg,
++			int flags);
++
++int __rtdm_fd_sendmmsg(int ufd, void __user *u_msgvec, unsigned int vlen,
++		       unsigned int flags,
++		       int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg),
++		       int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg));
++
++int rtdm_fd_mmap(int ufd, struct _rtdm_mmap_request *rma,
++		 void **u_addrp);
++
++int rtdm_fd_valid_p(int ufd);
++
++int rtdm_fd_select(int ufd, struct xnselector *selector,
++		   unsigned int type);
++
++int rtdm_device_new_fd(struct rtdm_fd *fd, int ufd,
++		struct rtdm_device *dev);
++
++void rtdm_device_flush_fds(struct rtdm_device *dev);
++
++void rtdm_fd_cleanup(struct cobalt_ppd *p);
++
++void rtdm_fd_init(void);
++
++#endif /* _COBALT_KERNEL_FD_H */
+--- linux/include/xenomai/rtdm/autotune.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/autotune.h	2021-04-07 16:01:28.231632732 +0800
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_AUTOTUNE_H
++#define _COBALT_RTDM_AUTOTUNE_H
++
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/autotune.h>
++
++#endif /* !_COBALT_RTDM_AUTOTUNE_H */
+--- linux/include/xenomai/rtdm/ipc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/ipc.h	2021-04-07 16:01:28.226632739 +0800
+@@ -0,0 +1,30 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_RTDM_IPC_H
++#define _COBALT_RTDM_IPC_H
++
++#include <linux/net.h>
++#include <linux/socket.h>
++#include <linux/if.h>
++#include <rtdm/rtdm.h>
++#include <rtdm/uapi/ipc.h>
++
++#endif /* !_COBALT_RTDM_IPC_H */
+--- linux/include/xenomai/rtdm/rtdm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/rtdm.h	2021-04-07 16:01:28.221632746 +0800
+@@ -0,0 +1,218 @@
++/*
++ * Copyright (C) 2005, 2006 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_RTDM_H
++#define _COBALT_RTDM_RTDM_H
++
++#include <linux/types.h>
++#include <linux/fcntl.h>
++#include <linux/ioctl.h>
++#include <linux/sched.h>
++#include <linux/socket.h>
++#include <cobalt/kernel/ppd.h>
++#include <rtdm/fd.h>
++
++typedef __u32 socklen_t;
++
++#include <rtdm/uapi/rtdm.h>
++
++int __rtdm_dev_open(const char *path, int oflag);
++
++int __rtdm_dev_socket(int protocol_family,
++		      int socket_type, int protocol);
++
++static inline int rtdm_open(const char *path, int oflag, ...)
++{
++	return __rtdm_dev_open(path, oflag);
++}
++
++static inline int rtdm_socket(int protocol_family,
++			      int socket_type, int protocol)
++{
++	return __rtdm_dev_socket(protocol_family, socket_type, protocol);
++}
++
++static inline int rtdm_close(int fd)
++{
++	return rtdm_fd_close(fd, RTDM_FD_MAGIC);
++}
++
++#define rtdm_fcntl(__fd, __cmd, __args...)	\
++	rtdm_fd_fcntl(__fd, __cmd, ##__args)
++
++#define rtdm_ioctl(__fd, __request, __args...)	\
++	rtdm_fd_ioctl(__fd, __request, ##__args)
++
++static inline ssize_t rtdm_read(int fd, void *buf, size_t count)
++{
++	return rtdm_fd_read(fd, buf, count);
++}
++
++static inline ssize_t rtdm_write(int fd, const void *buf, size_t count)
++{
++	return rtdm_fd_write(fd, buf, count);
++}
++
++static inline ssize_t rtdm_recvmsg(int s, struct user_msghdr *msg, int flags)
++{
++	return rtdm_fd_recvmsg(s, msg, flags);
++}
++
++static inline ssize_t rtdm_sendmsg(int s, const struct user_msghdr *msg, int flags)
++{
++	return rtdm_fd_sendmsg(s, msg, flags);
++}
++
++static inline
++ssize_t rtdm_recvfrom(int s, void *buf, size_t len, int flags,
++		      struct sockaddr *from,
++		      socklen_t *fromlen)
++{
++	struct user_msghdr msg;
++	struct iovec iov;
++	ssize_t ret;
++
++	iov.iov_base = buf;
++	iov.iov_len = len;
++	msg.msg_name = from;
++	msg.msg_namelen = from ? *fromlen : 0;
++	msg.msg_iov = &iov;
++	msg.msg_iovlen = 1;
++	msg.msg_control = NULL;
++	msg.msg_controllen = 0;
++
++	ret = rtdm_recvmsg(s, &msg, flags);
++	if (ret < 0)
++		return ret;
++
++	if (from)
++		*fromlen = msg.msg_namelen;
++
++	return ret;
++}
++
++static inline ssize_t rtdm_recv(int s, void *buf, size_t len, int flags)
++{
++	return rtdm_recvfrom(s, buf, len, flags, NULL, NULL);
++}
++
++static inline ssize_t rtdm_sendto(int s, const void *buf, size_t len,
++				  int flags, const struct sockaddr *to,
++				  socklen_t tolen)
++{
++	struct user_msghdr msg;
++	struct iovec iov;
++
++	iov.iov_base = (void *)buf;
++	iov.iov_len = len;
++	msg.msg_name = (struct sockaddr *)to;
++	msg.msg_namelen = tolen;
++	msg.msg_iov = &iov;
++	msg.msg_iovlen = 1;
++	msg.msg_control = NULL;
++	msg.msg_controllen = 0;
++
++	return rtdm_sendmsg(s, &msg, flags);
++}
++
++static inline ssize_t rtdm_send(int s, const void *buf, size_t len, int flags)
++{
++	return rtdm_sendto(s, buf, len, flags, NULL, 0);
++}
++
++static inline int rtdm_getsockopt(int s, int level, int optname,
++				  void *optval, socklen_t *optlen)
++{
++	struct _rtdm_getsockopt_args args = {
++		level, optname, optval, optlen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_GETSOCKOPT, &args);
++}
++
++static inline int rtdm_setsockopt(int s, int level, int optname,
++				  const void *optval, socklen_t optlen)
++{
++	struct _rtdm_setsockopt_args args = {
++		level, optname, (void *)optval, optlen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_SETSOCKOPT, &args);
++}
++
++static inline int rtdm_bind(int s, const struct sockaddr *my_addr,
++			    socklen_t addrlen)
++{
++	struct _rtdm_setsockaddr_args args = {
++		my_addr, addrlen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_BIND, &args);
++}
++
++static inline int rtdm_connect(int s, const struct sockaddr *serv_addr,
++			       socklen_t addrlen)
++{
++	struct _rtdm_setsockaddr_args args = {
++		serv_addr, addrlen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_CONNECT, &args);
++}
++
++static inline int rtdm_listen(int s, int backlog)
++{
++	return rtdm_ioctl(s, _RTIOC_LISTEN, backlog);
++}
++
++static inline int rtdm_accept(int s, struct sockaddr *addr,
++			      socklen_t *addrlen)
++{
++	struct _rtdm_getsockaddr_args args = {
++		addr, addrlen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_ACCEPT, &args);
++}
++
++static inline int rtdm_getsockname(int s, struct sockaddr *name,
++				   socklen_t *namelen)
++{
++	struct _rtdm_getsockaddr_args args = {
++		name, namelen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_GETSOCKNAME, &args);
++}
++
++static inline int rtdm_getpeername(int s, struct sockaddr *name,
++				   socklen_t *namelen)
++{
++	struct _rtdm_getsockaddr_args args = {
++		name, namelen
++	};
++
++	return rtdm_ioctl(s, _RTIOC_GETPEERNAME, &args);
++}
++
++static inline int rtdm_shutdown(int s, int how)
++{
++	return rtdm_ioctl(s, _RTIOC_SHUTDOWN, how);
++}
++
++#endif /* _COBALT_RTDM_RTDM_H */
+--- linux/include/xenomai/rtdm/cobalt.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/rtdm/cobalt.h	2021-04-07 16:01:28.217632752 +0800
+@@ -0,0 +1,33 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_RTDM_COBALT_H
++#define _COBALT_RTDM_COBALT_H
++
++#include <xenomai/posix/process.h>
++#include <xenomai/posix/extension.h>
++#include <xenomai/posix/thread.h>
++#include <xenomai/posix/signal.h>
++#include <xenomai/posix/timer.h>
++#include <xenomai/posix/clock.h>
++#include <xenomai/posix/event.h>
++#include <xenomai/posix/monitor.h>
++#include <xenomai/posix/corectl.h>
++
++#endif /* !_COBALT_RTDM_COBALT_H */
+--- linux/include/xenomai/cobalt/uapi/corectl.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/corectl.h	2021-04-07 16:01:28.429632449 +0800
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (C) 2015 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_CORECTL_H
++#define _COBALT_UAPI_CORECTL_H
++
++#define _CC_COBALT_GET_VERSION		0
++#define _CC_COBALT_GET_NR_PIPES		1
++#define _CC_COBALT_GET_NR_TIMERS	2
++
++#define _CC_COBALT_GET_DEBUG			3
++#   define _CC_COBALT_DEBUG_ASSERT		1
++#   define _CC_COBALT_DEBUG_CONTEXT		2
++#   define _CC_COBALT_DEBUG_LOCKING		4
++#   define _CC_COBALT_DEBUG_USER		8
++#   define _CC_COBALT_DEBUG_MUTEX_RELAXED	16
++#   define _CC_COBALT_DEBUG_MUTEX_SLEEP		32
++/* bit 6 (64) formerly used for DEBUG_POSIX_SYNCHRO */
++#   define _CC_COBALT_DEBUG_LEGACY		128
++#   define _CC_COBALT_DEBUG_TRACE_RELAX		256
++#   define _CC_COBALT_DEBUG_NET			512
++
++#define _CC_COBALT_GET_POLICIES		4
++#   define _CC_COBALT_SCHED_FIFO	1
++#   define _CC_COBALT_SCHED_RR		2
++#   define _CC_COBALT_SCHED_WEAK	4
++#   define _CC_COBALT_SCHED_SPORADIC	8
++#   define _CC_COBALT_SCHED_QUOTA	16
++#   define _CC_COBALT_SCHED_TP		32
++
++#define _CC_COBALT_GET_WATCHDOG		5
++#define _CC_COBALT_GET_CORE_STATUS	6
++#define _CC_COBALT_START_CORE		7
++#define _CC_COBALT_STOP_CORE		8
++
++#define _CC_COBALT_GET_NET_CONFIG	9
++#   define _CC_COBALT_NET		0x00000001
++#   define _CC_COBALT_NET_ETH_P_ALL	0x00000002
++#   define _CC_COBALT_NET_IPV4		0x00000004
++#   define _CC_COBALT_NET_ICMP		0x00000008
++#   define _CC_COBALT_NET_NETROUTING	0x00000010
++#   define _CC_COBALT_NET_ROUTER	0x00000020
++#   define _CC_COBALT_NET_UDP		0x00000040
++#   define _CC_COBALT_NET_AF_PACKET	0x00000080
++#   define _CC_COBALT_NET_TDMA		0x00000100
++#   define _CC_COBALT_NET_NOMAC		0x00000200
++#   define _CC_COBALT_NET_CFG		0x00000400
++#   define _CC_COBALT_NET_CAP		0x00000800
++#   define _CC_COBALT_NET_PROXY		0x00001000
++
++
++enum cobalt_run_states {
++	COBALT_STATE_DISABLED,
++	COBALT_STATE_RUNNING,
++	COBALT_STATE_STOPPED,
++	COBALT_STATE_TEARDOWN,
++	COBALT_STATE_WARMUP,
++};
++
++#endif /* !_COBALT_UAPI_CORECTL_H */
+--- linux/include/xenomai/cobalt/uapi/cond.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/cond.h	2021-04-07 16:01:28.424632456 +0800
+@@ -0,0 +1,39 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_COND_H
++#define _COBALT_UAPI_COND_H
++
++#include <cobalt/uapi/mutex.h>
++
++#define COBALT_COND_MAGIC 0x86860505
++
++struct cobalt_cond_state {
++	__u32 pending_signals;
++	__u32 mutex_state_offset;
++};
++
++union cobalt_cond_union {
++	pthread_cond_t native_cond;
++	struct cobalt_cond_shadow {
++		__u32 magic;
++		__u32 state_offset;
++		xnhandle_t handle;
++	} shadow_cond;
++};
++
++#endif /* !_COBALT_UAPI_COND_H */
+--- linux/include/xenomai/cobalt/uapi/event.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/event.h	2021-04-07 16:01:28.419632463 +0800
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_EVENT_H
++#define _COBALT_UAPI_EVENT_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++struct cobalt_event_state {
++	__u32 value;
++	__u32 flags;
++#define COBALT_EVENT_PENDED  0x1
++	__u32 nwaiters;
++};
++
++struct cobalt_event;
++
++/* Creation flags. */
++#define COBALT_EVENT_FIFO    0x0
++#define COBALT_EVENT_PRIO    0x1
++#define COBALT_EVENT_SHARED  0x2
++
++/* Wait mode. */
++#define COBALT_EVENT_ALL  0x0
++#define COBALT_EVENT_ANY  0x1
++
++struct cobalt_event_shadow {
++	__u32 state_offset;
++	__u32 flags;
++	xnhandle_t handle;
++};
++
++struct cobalt_event_info {
++	unsigned int value;
++	int flags;
++	int nrwait;
++};
++
++typedef struct cobalt_event_shadow cobalt_event_t;
++
++#endif /* !_COBALT_UAPI_EVENT_H */
+--- linux/include/xenomai/cobalt/uapi/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/syscall.h	2021-04-07 16:01:28.415632469 +0800
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_SYSCALL_H
++#define _COBALT_UAPI_SYSCALL_H
++
++#include <cobalt/uapi/asm-generic/syscall.h>
++
++#define sc_cobalt_bind				0
++#define sc_cobalt_thread_create			1
++#define sc_cobalt_thread_getpid			2
++#define sc_cobalt_thread_setmode		3
++#define sc_cobalt_thread_setname		4
++#define sc_cobalt_thread_join			5
++#define sc_cobalt_thread_kill			6
++#define sc_cobalt_thread_setschedparam_ex	7
++#define sc_cobalt_thread_getschedparam_ex	8
++#define sc_cobalt_thread_getstat		9
++#define sc_cobalt_sem_init			10
++#define sc_cobalt_sem_destroy			11
++#define sc_cobalt_sem_post			12
++#define sc_cobalt_sem_wait			13
++#define sc_cobalt_sem_trywait			14
++#define sc_cobalt_sem_getvalue			15
++#define sc_cobalt_sem_open			16
++#define sc_cobalt_sem_close			17
++#define sc_cobalt_sem_unlink			18
++#define sc_cobalt_sem_timedwait			19
++#define sc_cobalt_sem_inquire			20
++#define sc_cobalt_sem_broadcast_np		21
++#define sc_cobalt_clock_getres			22
++#define sc_cobalt_clock_gettime			23
++#define sc_cobalt_clock_settime			24
++#define sc_cobalt_clock_nanosleep		25
++#define sc_cobalt_mutex_init			26
++#define sc_cobalt_mutex_check_init		27
++#define sc_cobalt_mutex_destroy			28
++#define sc_cobalt_mutex_lock			29
++#define sc_cobalt_mutex_timedlock		30
++#define sc_cobalt_mutex_trylock			31
++#define sc_cobalt_mutex_unlock			32
++#define sc_cobalt_cond_init			33
++#define sc_cobalt_cond_destroy			34
++#define sc_cobalt_cond_wait_prologue		35
++#define sc_cobalt_cond_wait_epilogue		36
++#define sc_cobalt_mq_open			37
++#define sc_cobalt_mq_close			38
++#define sc_cobalt_mq_unlink			39
++#define sc_cobalt_mq_getattr			40
++#define sc_cobalt_mq_timedsend			41
++#define sc_cobalt_mq_timedreceive		42
++#define sc_cobalt_mq_notify			43
++#define sc_cobalt_sched_minprio			44
++#define sc_cobalt_sched_maxprio			45
++#define sc_cobalt_sched_weightprio		46
++#define sc_cobalt_sched_yield			47
++#define sc_cobalt_sched_setscheduler_ex		48
++#define sc_cobalt_sched_getscheduler_ex		49
++#define sc_cobalt_sched_setconfig_np		50
++#define sc_cobalt_sched_getconfig_np		51
++#define sc_cobalt_timer_create			52
++#define sc_cobalt_timer_delete			53
++#define sc_cobalt_timer_settime			54
++#define sc_cobalt_timer_gettime			55
++#define sc_cobalt_timer_getoverrun		56
++#define sc_cobalt_timerfd_create		57
++#define sc_cobalt_timerfd_settime		58
++#define sc_cobalt_timerfd_gettime		59
++#define sc_cobalt_sigwait			60
++#define sc_cobalt_sigwaitinfo			61
++#define sc_cobalt_sigtimedwait			62
++#define sc_cobalt_sigpending			63
++#define sc_cobalt_kill				64
++#define sc_cobalt_sigqueue			65
++#define sc_cobalt_monitor_init			66
++#define sc_cobalt_monitor_destroy		67
++#define sc_cobalt_monitor_enter			68
++#define sc_cobalt_monitor_wait			69
++#define sc_cobalt_monitor_sync			70
++#define sc_cobalt_monitor_exit			71
++#define sc_cobalt_event_init			72
++#define sc_cobalt_event_wait			73
++#define sc_cobalt_event_sync			74
++#define sc_cobalt_event_destroy			75
++#define sc_cobalt_event_inquire			76
++#define sc_cobalt_open				77
++#define sc_cobalt_socket			78
++#define sc_cobalt_close				79
++#define sc_cobalt_ioctl				80
++#define sc_cobalt_read				81
++#define sc_cobalt_write				82
++#define sc_cobalt_recvmsg			83
++#define sc_cobalt_sendmsg			84
++#define sc_cobalt_mmap				85
++#define sc_cobalt_select			86
++#define sc_cobalt_fcntl				87
++#define sc_cobalt_migrate			88
++#define sc_cobalt_archcall			89
++#define sc_cobalt_trace				90
++#define sc_cobalt_corectl			91
++#define sc_cobalt_get_current			92
++/* 93: formerly mayday */
++#define sc_cobalt_backtrace			94
++#define sc_cobalt_serialdbg			95
++#define sc_cobalt_extend			96
++#define sc_cobalt_ftrace_puts			97
++#define sc_cobalt_recvmmsg			98
++#define sc_cobalt_sendmmsg			99
++#define sc_cobalt_clock_adjtime			100
++#define sc_cobalt_thread_setschedprio		101
++
++#define __NR_COBALT_SYSCALLS			128 /* Power of 2 */
++
++#endif /* !_COBALT_UAPI_SYSCALL_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/urw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/urw.h	2021-04-07 16:01:28.410632476 +0800
+@@ -0,0 +1,113 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_URW_H
++#define _COBALT_UAPI_KERNEL_URW_H
++
++#include <linux/types.h>
++
++/*
++ * A restricted version of the kernel seqlocks with a slightly
++ * different interface, allowing for unsynced reads with concurrent
++ * write detection, without serializing writers.  Caller should
++ * provide for proper locking to deal with concurrent updates.
++ *
++ * urw_t lock = URW_INITIALIZER;
++ * urwstate_t tmp;
++ *
++ * unsynced_read_block(&tmp, &lock) {
++ *          (will redo until clean read)...
++ * }
++ *
++ * unsynced_write_block(&tmp, &lock) {
++ *          ...
++ * }
++ *
++ * This code was inspired by Wolfgang Mauerer's linux/seqlock.h
++ * adaptation for Xenomai 2.6 to support the VDSO feature.
++ */
++
++typedef struct {
++	__u32 sequence;
++} urw_t;
++
++typedef struct {
++	__u32 token;
++	__u32 dirty;
++} urwstate_t;
++
++#define URW_INITIALIZER     { 0 }
++#define DEFINE_URW(__name)  urw_t __name = URW_INITIALIZER
++
++#ifndef READ_ONCE
++#define READ_ONCE ACCESS_ONCE
++#endif
++
++static inline void __try_read_start(const urw_t *urw, urwstate_t *tmp)
++{
++	__u32 token;
++repeat:
++	token = READ_ONCE(urw->sequence);
++	smp_rmb();
++	if (token & 1) {
++		cpu_relax();
++		goto repeat;
++	}
++
++	tmp->token = token;
++	tmp->dirty = 1;
++}
++
++static inline void __try_read_end(const urw_t *urw, urwstate_t *tmp)
++{
++	smp_rmb();
++	if (urw->sequence != tmp->token) {
++		__try_read_start(urw, tmp);
++		return;
++	}
++
++	tmp->dirty = 0;
++}
++
++static inline void __do_write_start(urw_t *urw, urwstate_t *tmp)
++{
++	urw->sequence++;
++	tmp->dirty = 1;
++	smp_wmb();
++}
++
++static inline void __do_write_end(urw_t *urw, urwstate_t *tmp)
++{
++	smp_wmb();
++	tmp->dirty = 0;
++	urw->sequence++;
++}
++
++static inline void unsynced_rw_init(urw_t *urw)
++{
++	urw->sequence = 0;
++}
++
++#define unsynced_read_block(__tmp, __urw)		\
++	for (__try_read_start(__urw, __tmp);		\
++	     (__tmp)->dirty; __try_read_end(__urw, __tmp))
++
++#define unsynced_write_block(__tmp, __urw)		\
++	for (__do_write_start(__urw, __tmp);		\
++	     (__tmp)->dirty; __do_write_end(__urw, __tmp))
++
++#endif /* !_COBALT_UAPI_KERNEL_URW_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/vdso.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/vdso.h	2021-04-07 16:01:28.405632483 +0800
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (C) 2009 Wolfgang Mauerer <wolfgang.mauerer@siemens.com>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_VDSO_H
++#define _COBALT_UAPI_KERNEL_VDSO_H
++
++#include <cobalt/uapi/kernel/urw.h>
++
++struct xnvdso_hostrt_data {
++	__u64 wall_sec;
++	__u64 wtom_sec;
++	__u64 cycle_last;
++	__u64 mask;
++	__u32 wall_nsec;
++	__u32 wtom_nsec;
++	__u32 mult;
++	__u32 shift;
++	__u32 live;
++	urw_t lock;
++};
++
++/*
++ * Data shared between the Cobalt kernel and applications, which lives
++ * in the shared memory heap (COBALT_MEMDEV_SHARED).
++ * xnvdso_hostrt_data.features tells which data is present. Notice
++ * that struct xnvdso may only grow, but never shrink.
++ */
++struct xnvdso {
++	__u64 features;
++	/* XNVDSO_FEAT_HOST_REALTIME */
++	struct xnvdso_hostrt_data hostrt_data;
++	/* XNVDSO_FEAT_WALLCLOCK_OFFSET */
++	__u64 wallclock_offset;
++};
++
++/* For each shared feature, add a flag below. */
++
++#define XNVDSO_FEAT_HOST_REALTIME	0x0000000000000001ULL
++#define XNVDSO_FEAT_WALLCLOCK_OFFSET	0x0000000000000002ULL
++
++static inline int xnvdso_test_feature(struct xnvdso *vdso,
++				      __u64 feature)
++{
++	return (vdso->features & feature) != 0;
++}
++
++#endif /* !_COBALT_UAPI_KERNEL_VDSO_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/synch.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/synch.h	2021-04-07 16:01:28.401632489 +0800
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2008, 2009 Jan Kiszka <jan.kiszka@siemens.com>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_SYNCH_H
++#define _COBALT_UAPI_KERNEL_SYNCH_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++/* Creation flags */
++#define XNSYNCH_FIFO    0x0
++#define XNSYNCH_PRIO    0x1
++#define XNSYNCH_PI      0x2
++#define XNSYNCH_DREORD  0x4
++#define XNSYNCH_OWNER   0x8
++#define XNSYNCH_PP      0x10
++
++/* Fast lock API */
++static inline int xnsynch_fast_is_claimed(xnhandle_t handle)
++{
++	return (handle & XNSYNCH_FLCLAIM) != 0;
++}
++
++static inline xnhandle_t xnsynch_fast_claimed(xnhandle_t handle)
++{
++	return handle | XNSYNCH_FLCLAIM;
++}
++
++static inline xnhandle_t xnsynch_fast_ceiling(xnhandle_t handle)
++{
++	return handle | XNSYNCH_FLCEIL;
++}
++
++static inline int
++xnsynch_fast_owner_check(atomic_t *fastlock, xnhandle_t ownerh)
++{
++	return (xnhandle_get_id(atomic_read(fastlock)) == ownerh) ?
++		0 : -EPERM;
++}
++
++static inline
++int xnsynch_fast_acquire(atomic_t *fastlock, xnhandle_t new_ownerh)
++{
++	xnhandle_t h;
++
++	h = atomic_cmpxchg(fastlock, XN_NO_HANDLE, new_ownerh);
++	if (h != XN_NO_HANDLE) {
++		if (xnhandle_get_id(h) == new_ownerh)
++			return -EBUSY;
++
++		return -EAGAIN;
++	}
++
++	return 0;
++}
++
++static inline
++int xnsynch_fast_release(atomic_t *fastlock, xnhandle_t cur_ownerh)
++{
++	return atomic_cmpxchg(fastlock, cur_ownerh, XN_NO_HANDLE)
++		== cur_ownerh;
++}
++
++/* Local/shared property */
++static inline int xnsynch_is_shared(xnhandle_t handle)
++{
++	return (handle & XNSYNCH_PSHARED) != 0;
++}
++
++#endif /* !_COBALT_UAPI_KERNEL_SYNCH_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/pipe.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/pipe.h	2021-04-07 16:01:28.396632496 +0800
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_PIPE_H
++#define _COBALT_UAPI_KERNEL_PIPE_H
++
++#define	XNPIPE_IOCTL_BASE	'p'
++
++#define XNPIPEIOC_GET_NRDEV	_IOW(XNPIPE_IOCTL_BASE, 0, int)
++#define XNPIPEIOC_IFLUSH	_IO(XNPIPE_IOCTL_BASE, 1)
++#define XNPIPEIOC_OFLUSH	_IO(XNPIPE_IOCTL_BASE, 2)
++#define XNPIPEIOC_FLUSH		XNPIPEIOC_OFLUSH
++#define XNPIPEIOC_SETSIG	_IO(XNPIPE_IOCTL_BASE, 3)
++
++#define XNPIPE_NORMAL	0x0
++#define XNPIPE_URGENT	0x1
++
++#define XNPIPE_IFLUSH	0x1
++#define XNPIPE_OFLUSH	0x2
++
++#define XNPIPE_MINOR_AUTO  (-1)
++
++#endif /* !_COBALT_UAPI_KERNEL_PIPE_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/types.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/types.h	2021-04-07 16:01:28.392632502 +0800
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_TYPES_H
++#define _COBALT_UAPI_KERNEL_TYPES_H
++
++#include <linux/types.h>
++#include <cobalt/uapi/kernel/limits.h>
++
++typedef __u64 xnticks_t;
++
++typedef __s64 xnsticks_t;
++
++typedef __u32 xnhandle_t;
++
++#define XN_NO_HANDLE		((xnhandle_t)0)
++#define XN_HANDLE_INDEX_MASK	((xnhandle_t)0xf0000000)
++
++/* Fixed bits (part of the identifier) */
++#define XNSYNCH_PSHARED		((xnhandle_t)0x40000000)
++
++/* Transient bits (expressing a status) */
++#define XNSYNCH_FLCLAIM		((xnhandle_t)0x80000000) /* Contended. */
++#define XNSYNCH_FLCEIL		((xnhandle_t)0x20000000) /* Ceiling active. */
++
++#define XN_HANDLE_TRANSIENT_MASK	(XNSYNCH_FLCLAIM|XNSYNCH_FLCEIL)
++
++/*
++ * Strip all special bits from the handle, only retaining the object
++ * index value in the registry.
++ */
++static inline xnhandle_t xnhandle_get_index(xnhandle_t handle)
++{
++	return handle & ~XN_HANDLE_INDEX_MASK;
++}
++
++/*
++ * Strip the transient bits from the handle, only retaining the fixed
++ * part making the identifier.
++ */
++static inline xnhandle_t xnhandle_get_id(xnhandle_t handle)
++{
++	return handle & ~XN_HANDLE_TRANSIENT_MASK;
++}
++
++#endif /* !_COBALT_UAPI_KERNEL_TYPES_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/trace.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/trace.h	2021-04-07 16:01:28.387632509 +0800
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (C) 2006 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_TRACE_H
++#define _COBALT_UAPI_KERNEL_TRACE_H
++
++#define __xntrace_op_max_begin		0
++#define __xntrace_op_max_end		1
++#define __xntrace_op_max_reset		2
++#define __xntrace_op_user_start		3
++#define __xntrace_op_user_stop		4
++#define __xntrace_op_user_freeze	5
++#define __xntrace_op_special		6
++#define __xntrace_op_special_u64	7
++
++#endif /* !_COBALT_UAPI_KERNEL_TRACE_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/limits.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/limits.h	2021-04-07 16:01:28.382632516 +0800
+@@ -0,0 +1,23 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_LIMITS_H
++#define _COBALT_UAPI_KERNEL_LIMITS_H
++
++#define XNOBJECT_NAME_LEN 32
++
++#endif /* !_COBALT_UAPI_KERNEL_LIMITS_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/heap.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/heap.h	2021-04-07 16:01:28.378632521 +0800
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_HEAP_H
++#define _COBALT_UAPI_KERNEL_HEAP_H
++
++#include <linux/types.h>
++
++#define COBALT_MEMDEV_PRIVATE  "memdev-private"
++#define COBALT_MEMDEV_SHARED   "memdev-shared"
++#define COBALT_MEMDEV_SYS      "memdev-sys"
++
++struct cobalt_memdev_stat {
++	__u32 size;
++	__u32 free;
++};
++
++#define MEMDEV_RTIOC_STAT	_IOR(RTDM_CLASS_MEMORY, 0, struct cobalt_memdev_stat)
++
++#endif /* !_COBALT_UAPI_KERNEL_HEAP_H */
+--- linux/include/xenomai/cobalt/uapi/kernel/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/kernel/thread.h	2021-04-07 16:01:28.373632529 +0800
+@@ -0,0 +1,116 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_KERNEL_THREAD_H
++#define _COBALT_UAPI_KERNEL_THREAD_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++/**
++ * @ingroup cobalt_core_thread
++ * @defgroup cobalt_core_thread_states Thread state flags
++ * @brief Bits reporting permanent or transient states of threads
++ * @{
++ */
++
++/* State flags (shared) */
++
++#define XNSUSP    0x00000001 /**< Suspended. */
++#define XNPEND    0x00000002 /**< Sleep-wait for a resource. */
++#define XNDELAY   0x00000004 /**< Delayed */
++#define XNREADY   0x00000008 /**< Linked to the ready queue. */
++#define XNDORMANT 0x00000010 /**< Not started yet */
++#define XNZOMBIE  0x00000020 /**< Zombie thread in deletion process */
++#define XNMAPPED  0x00000040 /**< Thread is mapped to a linux task */
++#define XNRELAX   0x00000080 /**< Relaxed shadow thread (blocking bit) */
++#define XNMIGRATE 0x00000100 /**< Thread is currently migrating to another CPU. */
++#define XNHELD    0x00000200 /**< Thread is held to process emergency. */
++#define XNBOOST   0x00000400 /**< PI/PP boost undergoing */
++#define XNSSTEP   0x00000800 /**< Single-stepped by debugger */
++#define XNLOCK    0x00001000 /**< Scheduler lock control (pseudo-bit, not in ->state) */
++#define XNRRB     0x00002000 /**< Undergoes a round-robin scheduling */
++#define XNWARN    0x00004000 /**< Issue SIGDEBUG on error detection */
++#define XNFPU     0x00008000 /**< Thread uses FPU */
++#define XNROOT    0x00010000 /**< Root thread (that is, Linux/IDLE) */
++#define XNWEAK    0x00020000 /**< Non real-time shadow (from the WEAK class) */
++#define XNUSER    0x00040000 /**< Shadow thread running in userland */
++#define XNJOINED  0x00080000 /**< Another thread waits for joining this thread */
++#define XNTRAPLB  0x00100000 /**< Trap lock break (i.e. may not sleep with sched lock) */
++#define XNDEBUG   0x00200000 /**< User-level debugging enabled */
++#define XNDBGSTOP 0x00400000 /**< Stopped for synchronous debugging */
++
++/** @} */
++
++/**
++ * @ingroup cobalt_core_thread
++ * @defgroup cobalt_core_thread_info Thread information flags
++ * @brief Bits reporting events notified to threads
++ * @{
++ */
++
++/* Information flags (shared) */
++
++#define XNTIMEO   0x00000001 /**< Woken up due to a timeout condition */
++#define XNRMID    0x00000002 /**< Pending on a removed resource */
++#define XNBREAK   0x00000004 /**< Forcibly awaken from a wait state */
++#define XNKICKED  0x00000008 /**< Forced out of primary mode */
++#define XNWAKEN   0x00000010 /**< Thread waken up upon resource availability */
++#define XNROBBED  0x00000020 /**< Robbed from resource ownership */
++#define XNCANCELD 0x00000040 /**< Cancellation request is pending */
++#define XNPIALERT 0x00000080 /**< Priority inversion alert (SIGDEBUG sent) */
++#define XNSCHEDP  0x00000100 /**< schedparam propagation is pending */
++#define XNCONTHI  0x00000200 /**< Continue in primary mode after debugging */
++
++/* Local information flags (private to current thread) */
++
++#define XNMOVED   0x00000001 /**< CPU migration in primary mode occurred */
++#define XNLBALERT 0x00000002 /**< Scheduler lock break alert (SIGDEBUG sent) */
++#define XNDESCENT 0x00000004 /**< Adaptive transitioning to secondary mode */
++#define XNSYSRST  0x00000008 /**< Thread awaiting syscall restart after signal */
++#define XNHICCUP  0x00000010 /**< Just left from ptracing */
++
++/** @} */
++
++/*
++ * Must follow strictly the declaration order of the state flags
++ * defined above. Status symbols are defined as follows:
++ *
++ * 'S' -> Forcibly suspended.
++ * 'w'/'W' -> Waiting for a resource, with or without timeout.
++ * 'D' -> Delayed (without any other wait condition).
++ * 'R' -> Runnable.
++ * 'U' -> Unstarted or dormant.
++ * 'X' -> Relaxed shadow.
++ * 'H' -> Held in emergency.
++ * 'b' -> Priority boost undergoing.
++ * 'T' -> Ptraced and stopped.
++ * 'l' -> Locks scheduler.
++ * 'r' -> Undergoes round-robin.
++ * 't' -> Runtime mode errors notified.
++ * 'L' -> Lock breaks trapped.
++ * 's' -> Ptraced, stopped synchronously.
++ */
++#define XNTHREAD_STATE_LABELS  "SWDRU..X.HbTlrt.....L.s"
++
++struct xnthread_user_window {
++	__u32 state;
++	__u32 info;
++	__u32 grant_value;
++	__u32 pp_pending;
++};
++
++#endif /* !_COBALT_UAPI_KERNEL_THREAD_H */
+--- linux/include/xenomai/cobalt/uapi/asm-generic/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/syscall.h	2021-04-07 16:01:28.368632536 +0800
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_ASM_GENERIC_SYSCALL_H
++#define _COBALT_UAPI_ASM_GENERIC_SYSCALL_H
++
++#include <linux/types.h>
++#include <asm/xenomai/uapi/features.h>
++#include <asm/xenomai/uapi/syscall.h>
++
++#define __COBALT_SYSCALL_BIT	0x10000000
++
++struct cobalt_bindreq {
++	/** Features userland requires. */
++	__u32 feat_req;
++	/** ABI revision userland uses. */
++	__u32 abi_rev;
++	/** Features the Cobalt core provides. */
++	struct cobalt_featinfo feat_ret;
++};
++
++#define COBALT_SECONDARY  0
++#define COBALT_PRIMARY    1
++
++#endif /* !_COBALT_UAPI_ASM_GENERIC_SYSCALL_H */
+--- linux/include/xenomai/cobalt/uapi/asm-generic/features.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/features.h	2021-04-07 16:01:28.364632542 +0800
+@@ -0,0 +1,114 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_ASM_GENERIC_FEATURES_H
++#define _COBALT_UAPI_ASM_GENERIC_FEATURES_H
++
++#include <linux/types.h>
++
++#define XNFEAT_STRING_LEN 64
++
++struct cobalt_featinfo {
++	/** Real-time clock frequency */
++	__u64 clock_freq;
++	/** Offset of nkvdso in the sem heap. */
++	__u32 vdso_offset;
++	/** ABI revision level. */
++	__u32 feat_abirev;
++	/** Available feature set. */
++	__u32 feat_all;
++	/** Mandatory features (when requested). */
++	__u32 feat_man;
++	/** Requested feature set. */
++	__u32 feat_req;
++	/** Missing features. */
++	__u32 feat_mis;
++	char feat_all_s[XNFEAT_STRING_LEN];
++	char feat_man_s[XNFEAT_STRING_LEN];
++	char feat_req_s[XNFEAT_STRING_LEN];
++	char feat_mis_s[XNFEAT_STRING_LEN];
++	/* Architecture-specific features. */
++	struct cobalt_featinfo_archdep feat_arch;
++};
++
++#define __xn_feat_smp         0x80000000
++#define __xn_feat_nosmp       0x40000000
++#define __xn_feat_fastsynch   0x20000000
++#define __xn_feat_nofastsynch 0x10000000
++#define __xn_feat_control     0x08000000
++#define __xn_feat_prioceiling 0x04000000
++
++#ifdef CONFIG_SMP
++#define __xn_feat_smp_mask __xn_feat_smp
++#else
++#define __xn_feat_smp_mask __xn_feat_nosmp
++#endif
++
++/*
++ * Revisit: all archs currently support fast locking, and there is no
++ * reason for any future port not to provide this. This will be
++ * written in stone at the next ABI update, when fastsynch support is
++ * dropped from the optional feature set.
++ */
++#define __xn_feat_fastsynch_mask __xn_feat_fastsynch
++
++/* List of generic features kernel or userland may support */
++#define __xn_feat_generic_mask			\
++	(__xn_feat_smp_mask		|	\
++	 __xn_feat_fastsynch_mask 	|	\
++	 __xn_feat_prioceiling)
++
++/*
++ * List of features both sides have to agree on: If userland supports
++ * it, the kernel has to provide it, too. This means backward
++ * compatibility between older userland and newer kernel may be
++ * supported for those features, but forward compatibility between
++ * newer userland and older kernel cannot.
++ */
++#define __xn_feat_generic_man_mask		\
++	(__xn_feat_fastsynch		|	\
++	 __xn_feat_nofastsynch		|	\
++	 __xn_feat_nosmp		|	\
++	 __xn_feat_prioceiling)
++
++static inline
++const char *get_generic_feature_label(unsigned int feature)
++{
++	switch (feature) {
++	case __xn_feat_smp:
++		return "smp";
++	case __xn_feat_nosmp:
++		return "nosmp";
++	case __xn_feat_fastsynch:
++		return "fastsynch";
++	case __xn_feat_nofastsynch:
++		return "nofastsynch";
++	case __xn_feat_control:
++		return "control";
++	case __xn_feat_prioceiling:
++		return "prioceiling";
++	default:
++		return 0;
++	}
++}
++
++static inline int check_abi_revision(unsigned long abirev)
++{
++	return abirev == XENOMAI_ABI_REV;
++}
++
++#endif /* !_COBALT_UAPI_ASM_GENERIC_FEATURES_H */
+--- linux/include/xenomai/cobalt/uapi/asm-generic/arith.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/asm-generic/arith.h	2021-04-07 16:01:28.359632549 +0800
+@@ -0,0 +1,365 @@
++/**
++ *   Generic arithmetic/conversion routines.
++ *   Copyright &copy; 2005 Stelian Pop.
++ *   Copyright &copy; 2005 Gilles Chanteperdrix.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_ASM_GENERIC_ARITH_H
++#define _COBALT_UAPI_ASM_GENERIC_ARITH_H
++
++#ifndef xnarch_u64tou32
++#define xnarch_u64tou32(ull, h, l) ({		\
++      union {					\
++	      unsigned long long _ull;		\
++	      struct endianstruct _s;		\
++      } _u;					\
++      _u._ull = (ull);				\
++      (h) = _u._s._h;				\
++      (l) = _u._s._l;				\
++})
++#endif /* !xnarch_u64tou32 */
++
++#ifndef xnarch_u64fromu32
++#define xnarch_u64fromu32(h, l) ({		\
++	union {					\
++		unsigned long long _ull;	\
++		struct endianstruct _s;		\
++	} _u;					\
++	_u._s._h = (h);				\
++	_u._s._l = (l);				\
++	_u._ull;				\
++})
++#endif /* !xnarch_u64fromu32 */
++
++#ifndef xnarch_ullmul
++static inline __attribute__((__const__)) unsigned long long
++xnarch_generic_ullmul(const unsigned m0, const unsigned m1)
++{
++	return (unsigned long long) m0 * m1;
++}
++#define xnarch_ullmul(m0,m1) xnarch_generic_ullmul((m0),(m1))
++#endif /* !xnarch_ullmul */
++
++#ifndef xnarch_ulldiv
++static inline unsigned long long xnarch_generic_ulldiv (unsigned long long ull,
++							const unsigned uld,
++							unsigned long *const rp)
++{
++	const unsigned r = do_div(ull, uld);
++
++	if (rp)
++		*rp = r;
++
++	return ull;
++}
++#define xnarch_ulldiv(ull,uld,rp) xnarch_generic_ulldiv((ull),(uld),(rp))
++#endif /* !xnarch_ulldiv */
++
++#ifndef xnarch_uldivrem
++#define xnarch_uldivrem(ull,ul,rp) ((unsigned) xnarch_ulldiv((ull),(ul),(rp)))
++#endif /* !xnarch_uldivrem */
++
++#ifndef xnarch_divmod64
++static inline unsigned long long
++xnarch_generic_divmod64(unsigned long long a,
++			unsigned long long b,
++			unsigned long long *rem)
++{
++	unsigned long long q;
++#if defined(__KERNEL__) && BITS_PER_LONG < 64
++	unsigned long long
++		xnarch_generic_full_divmod64(unsigned long long a,
++					     unsigned long long b,
++					     unsigned long long *rem);
++	if (b <= 0xffffffffULL) {
++		unsigned long r;
++		q = xnarch_ulldiv(a, b, &r);
++		if (rem)
++			*rem = r;
++	} else {
++		if (a < b) {
++			if (rem)
++				*rem = a;
++			return 0;
++		}
++
++		return xnarch_generic_full_divmod64(a, b, rem);
++	}
++#else /* !(__KERNEL__ && BITS_PER_LONG < 64) */
++	q = a / b;
++	if (rem)
++		*rem = a % b;
++#endif  /* !(__KERNEL__ && BITS_PER_LONG < 64) */
++	return q;
++}
++#define xnarch_divmod64(a,b,rp) xnarch_generic_divmod64((a),(b),(rp))
++#endif /* !xnarch_divmod64 */
++
++#ifndef xnarch_imuldiv
++static inline __attribute__((__const__)) int xnarch_generic_imuldiv(int i,
++								    int mult,
++								    int div)
++{
++	/* (int)i = (unsigned long long)i*(unsigned)(mult)/(unsigned)div. */
++	const unsigned long long ull = xnarch_ullmul(i, mult);
++	return xnarch_uldivrem(ull, div, NULL);
++}
++#define xnarch_imuldiv(i,m,d) xnarch_generic_imuldiv((i),(m),(d))
++#endif /* !xnarch_imuldiv */
++
++#ifndef xnarch_imuldiv_ceil
++static inline __attribute__((__const__)) int xnarch_generic_imuldiv_ceil(int i,
++									 int mult,
++									 int div)
++{
++	/* Same as xnarch_generic_imuldiv, rounding up. */
++	const unsigned long long ull = xnarch_ullmul(i, mult);
++	return xnarch_uldivrem(ull + (unsigned)div - 1, div, NULL);
++}
++#define xnarch_imuldiv_ceil(i,m,d) xnarch_generic_imuldiv_ceil((i),(m),(d))
++#endif /* !xnarch_imuldiv_ceil */
++
++/* Division of an unsigned 96 bits ((h << 32) + l) by an unsigned 32 bits.
++   Building block for llimd. Without const qualifiers, gcc reload registers
++   after each call to uldivrem. */
++static inline unsigned long long
++xnarch_generic_div96by32(const unsigned long long h,
++			 const unsigned l,
++			 const unsigned d,
++			 unsigned long *const rp)
++{
++	unsigned long rh;
++	const unsigned qh = xnarch_uldivrem(h, d, &rh);
++	const unsigned long long t = xnarch_u64fromu32(rh, l);
++	const unsigned ql = xnarch_uldivrem(t, d, rp);
++
++	return xnarch_u64fromu32(qh, ql);
++}
++
++#ifndef xnarch_llimd
++static inline __attribute__((__const__))
++unsigned long long xnarch_generic_ullimd(const unsigned long long op,
++					 const unsigned m,
++					 const unsigned d)
++{
++	unsigned int oph, opl, tlh, tll;
++	unsigned long long th, tl;
++
++	xnarch_u64tou32(op, oph, opl);
++	tl = xnarch_ullmul(opl, m);
++	xnarch_u64tou32(tl, tlh, tll);
++	th = xnarch_ullmul(oph, m);
++	th += tlh;
++
++	return xnarch_generic_div96by32(th, tll, d, NULL);
++}
++
++static inline __attribute__((__const__)) long long
++xnarch_generic_llimd (long long op, unsigned m, unsigned d)
++{
++	long long ret;
++	int sign = 0;
++
++	if (op < 0LL) {
++		sign = 1;
++		op = -op;
++	}
++	ret = xnarch_generic_ullimd(op, m, d);
++
++	return sign ? -ret : ret;
++}
++#define xnarch_llimd(ll,m,d) xnarch_generic_llimd((ll),(m),(d))
++#endif /* !xnarch_llimd */
++
++#ifndef _xnarch_u96shift
++#define xnarch_u96shift(h, m, l, s) ({		\
++	unsigned int _l = (l);			\
++	unsigned int _m = (m);			\
++	unsigned int _s = (s);			\
++	_l >>= _s;				\
++	_l |= (_m << (32 - _s));		\
++	_m >>= _s;				\
++	_m |= ((h) << (32 - _s));		\
++	xnarch_u64fromu32(_m, _l);		\
++})
++#endif /* !xnarch_u96shift */
++
++static inline long long xnarch_llmi(int i, int j)
++{
++	/* Fast 32x32->64 signed multiplication */
++	return (long long) i * j;
++}
++
++#ifndef xnarch_llmulshft
++/* Fast scaled-math-based replacement for long long multiply-divide */
++static inline long long
++xnarch_generic_llmulshft(const long long op,
++			  const unsigned m,
++			  const unsigned s)
++{
++	unsigned int oph, opl, tlh, tll, thh, thl;
++	unsigned long long th, tl;
++
++	xnarch_u64tou32(op, oph, opl);
++	tl = xnarch_ullmul(opl, m);
++	xnarch_u64tou32(tl, tlh, tll);
++	th = xnarch_llmi(oph, m);
++	th += tlh;
++	xnarch_u64tou32(th, thh, thl);
++
++	return xnarch_u96shift(thh, thl, tll, s);
++}
++#define xnarch_llmulshft(ll, m, s) xnarch_generic_llmulshft((ll), (m), (s))
++#endif /* !xnarch_llmulshft */
++
++#ifdef XNARCH_HAVE_NODIV_LLIMD
++
++/* Representation of a 32 bits fraction. */
++struct xnarch_u32frac {
++	unsigned long long frac;
++	unsigned integ;
++};
++
++static inline void xnarch_init_u32frac(struct xnarch_u32frac *const f,
++				       const unsigned m,
++				       const unsigned d)
++{
++	/*
++	 * Avoid clever compiler optimizations to occur when d is
++	 * known at compile-time. The performance of this function is
++	 * not critical since it is only called at init time.
++	 */
++	volatile unsigned vol_d = d;
++	f->integ = m / d;
++	f->frac = xnarch_generic_div96by32
++		(xnarch_u64fromu32(m % d, 0), 0, vol_d, NULL);
++}
++
++#ifndef xnarch_nodiv_imuldiv
++static inline __attribute__((__const__)) unsigned
++xnarch_generic_nodiv_imuldiv(unsigned op, const struct xnarch_u32frac f)
++{
++	return (xnarch_ullmul(op, f.frac >> 32) >> 32) + f.integ * op;
++}
++#define xnarch_nodiv_imuldiv(op, f) xnarch_generic_nodiv_imuldiv((op),(f))
++#endif /* xnarch_nodiv_imuldiv */
++
++#ifndef xnarch_nodiv_imuldiv_ceil
++static inline __attribute__((__const__)) unsigned
++xnarch_generic_nodiv_imuldiv_ceil(unsigned op, const struct xnarch_u32frac f)
++{
++	unsigned long long full = xnarch_ullmul(op, f.frac >> 32) + ~0U;
++	return (full >> 32) + f.integ * op;
++}
++#define xnarch_nodiv_imuldiv_ceil(op, f) \
++	xnarch_generic_nodiv_imuldiv_ceil((op),(f))
++#endif /* xnarch_nodiv_imuldiv_ceil */
++
++#ifndef xnarch_nodiv_ullimd
++
++#ifndef xnarch_add96and64
++#error "xnarch_add96and64 must be implemented."
++#endif
++
++static inline __attribute__((__const__)) unsigned long long
++xnarch_mul64by64_high(const unsigned long long op, const unsigned long long m)
++{
++	/* Compute high 64 bits of multiplication 64 bits x 64 bits. */
++	register unsigned long long t0, t1, t2, t3;
++	register unsigned int oph, opl, mh, ml, t0h, t0l, t1h, t1l, t2h, t2l, t3h, t3l;
++
++	xnarch_u64tou32(op, oph, opl);
++	xnarch_u64tou32(m, mh, ml);
++	t0 = xnarch_ullmul(opl, ml);
++	xnarch_u64tou32(t0, t0h, t0l);
++	t3 = xnarch_ullmul(oph, mh);
++	xnarch_u64tou32(t3, t3h, t3l);
++	xnarch_add96and64(t3h, t3l, t0h, 0, t0l >> 31);
++	t1 = xnarch_ullmul(oph, ml);
++	xnarch_u64tou32(t1, t1h, t1l);
++	xnarch_add96and64(t3h, t3l, t0h, t1h, t1l);
++	t2 = xnarch_ullmul(opl, mh);
++	xnarch_u64tou32(t2, t2h, t2l);
++	xnarch_add96and64(t3h, t3l, t0h, t2h, t2l);
++
++	return xnarch_u64fromu32(t3h, t3l);
++}
++
++static inline unsigned long long
++xnarch_generic_nodiv_ullimd(const unsigned long long op,
++			    const unsigned long long frac,
++			    unsigned int integ)
++{
++	return xnarch_mul64by64_high(op, frac) + integ * op;
++}
++#define xnarch_nodiv_ullimd(op, f, i)  xnarch_generic_nodiv_ullimd((op),(f), (i))
++#endif /* !xnarch_nodiv_ullimd */
++
++#ifndef xnarch_nodiv_llimd
++static inline __attribute__((__const__)) long long
++xnarch_generic_nodiv_llimd(long long op, unsigned long long frac,
++			   unsigned int integ)
++{
++	long long ret;
++	int sign = 0;
++
++	if (op < 0LL) {
++		sign = 1;
++		op = -op;
++	}
++	ret = xnarch_nodiv_ullimd(op, frac, integ);
++
++	return sign ? -ret : ret;
++}
++#define xnarch_nodiv_llimd(ll,frac,integ) xnarch_generic_nodiv_llimd((ll),(frac),(integ))
++#endif /* !xnarch_nodiv_llimd */
++
++#endif /* XNARCH_HAVE_NODIV_LLIMD */
++
++static inline void xnarch_init_llmulshft(const unsigned m_in,
++					 const unsigned d_in,
++					 unsigned *m_out,
++					 unsigned *s_out)
++{
++	/*
++	 * Avoid clever compiler optimizations to occur when d is
++	 * known at compile-time. The performance of this function is
++	 * not critical since it is only called at init time.
++	 */
++	volatile unsigned int vol_d = d_in;
++	unsigned long long mult;
++
++	*s_out = 31;
++	while (1) {
++		mult = ((unsigned long long)m_in) << *s_out;
++		do_div(mult, vol_d);
++		if (mult <= 0x7FFFFFFF)
++			break;
++		(*s_out)--;
++	}
++	*m_out = (unsigned int)mult;
++}
++
++#define xnarch_ullmod(ull,uld,rem)   ({ xnarch_ulldiv(ull,uld,rem); (*rem); })
++#define xnarch_uldiv(ull, d)         xnarch_uldivrem(ull, d, NULL)
++#define xnarch_ulmod(ull, d)         ({ unsigned long _rem;	\
++					xnarch_uldivrem(ull,d,&_rem); _rem; })
++
++#define xnarch_div64(a,b)            xnarch_divmod64((a),(b),NULL)
++#define xnarch_mod64(a,b)            ({ unsigned long long _rem; \
++					xnarch_divmod64((a),(b),&_rem); _rem; })
++
++#endif /* _COBALT_UAPI_ASM_GENERIC_ARITH_H */
+--- linux/include/xenomai/cobalt/uapi/mutex.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/mutex.h	2021-04-07 16:01:28.354632556 +0800
+@@ -0,0 +1,44 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_MUTEX_H
++#define _COBALT_UAPI_MUTEX_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++#define COBALT_MUTEX_MAGIC  0x86860303
++
++struct cobalt_mutex_state {
++	atomic_t owner;
++	__u32 flags;
++#define COBALT_MUTEX_COND_SIGNAL 0x00000001
++#define COBALT_MUTEX_ERRORCHECK  0x00000002
++	__u32 ceiling;
++};
++
++union cobalt_mutex_union {
++	pthread_mutex_t native_mutex;
++	struct cobalt_mutex_shadow {
++		__u32 magic;
++		__u32 lockcnt;
++		__u32 state_offset;
++		xnhandle_t handle;
++		struct cobalt_mutexattr attr;
++	} shadow_mutex;
++};
++
++#endif /* !_COBALT_UAPI_MUTEX_H */
+--- linux/include/xenomai/cobalt/uapi/time.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/time.h	2021-04-07 16:01:28.350632562 +0800
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_TIME_H
++#define _COBALT_UAPI_TIME_H
++
++#ifndef CLOCK_MONOTONIC_RAW
++#define CLOCK_MONOTONIC_RAW  4
++#endif
++
++/*
++ * Additional clock ids we manage are supposed not to collide with any
++ * of the POSIX and Linux kernel definitions so that no ambiguities
++ * arise when porting applications in both directions.
++ *
++ * 0  .. 31   regular POSIX/linux clock ids.
++ * 32 .. 63   statically reserved Cobalt clocks
++ * 64 .. 127  dynamically registered Cobalt clocks (external)
++ *
++ * CAUTION: clock ids must fit within a 7bit value, see
++ * include/cobalt/uapi/thread.h (e.g. cobalt_condattr).
++ */
++#define __COBALT_CLOCK_STATIC(nr)	((clockid_t)(nr + 32))
++
++#define CLOCK_HOST_REALTIME  __COBALT_CLOCK_STATIC(0)
++
++#define COBALT_MAX_EXTCLOCKS  64
++
++#define __COBALT_CLOCK_EXT(nr)		((clockid_t)(nr) | (1 << 6))
++#define __COBALT_CLOCK_EXT_P(id)	((int)(id) >= 64 && (int)(id) < 128)
++#define __COBALT_CLOCK_EXT_INDEX(id)	((int)(id) & ~(1 << 6))
++
++/*
++ * Additional timerfd defines
++ *
++ * when passing TFD_WAKEUP to timer_settime, any timer expiration
++ * unblocks the thread having issued timer_settime.
++ */
++#define TFD_WAKEUP	(1 << 2)
++
++#endif /* !_COBALT_UAPI_TIME_H */
+--- linux/include/xenomai/cobalt/uapi/sem.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/sem.h	2021-04-07 16:01:28.345632569 +0800
+@@ -0,0 +1,56 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_SEM_H
++#define _COBALT_UAPI_SEM_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++#define COBALT_SEM_MAGIC (0x86860707)
++#define COBALT_NAMED_SEM_MAGIC (0x86860D0D)
++
++struct cobalt_sem;
++
++struct cobalt_sem_state {
++	atomic_t value;
++	__u32 flags;
++};
++
++union cobalt_sem_union {
++	sem_t native_sem;
++	struct cobalt_sem_shadow {
++		__u32 magic;
++		__s32 state_offset;
++		xnhandle_t handle;
++	} shadow_sem;
++};
++
++struct cobalt_sem_info {
++	unsigned int value;
++	int flags;
++	int nrwait;
++};
++
++#define SEM_FIFO       0x1
++#define SEM_PULSE      0x2
++#define SEM_PSHARED    0x4
++#define SEM_REPORT     0x8
++#define SEM_WARNDEL    0x10
++#define SEM_RAWCLOCK   0x20
++#define SEM_NOBUSYDEL  0x40
++
++#endif /* !_COBALT_UAPI_SEM_H */
+--- linux/include/xenomai/cobalt/uapi/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/thread.h	2021-04-07 16:01:28.341632574 +0800
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_THREAD_H
++#define _COBALT_UAPI_THREAD_H
++
++#include <cobalt/uapi/kernel/thread.h>
++
++#define PTHREAD_WARNSW             XNWARN
++#define PTHREAD_LOCK_SCHED         XNLOCK
++#define PTHREAD_DISABLE_LOCKBREAK  XNTRAPLB
++#define PTHREAD_CONFORMING     0
++
++struct cobalt_mutexattr {
++	int type : 3;
++	int protocol : 3;
++	int pshared : 1;
++	int __pad : 1;
++	int ceiling : 8;  /* prio-1, (XN)SCHED_FIFO range. */
++};
++
++struct cobalt_condattr {
++	int clock : 7;
++	int pshared : 1;
++};
++
++struct cobalt_threadstat {
++	__u64 xtime;
++	__u64 timeout;
++	__u64 msw;
++	__u64 csw;
++	__u64 xsc;
++	__u32 status;
++	__u32 pf;
++	int cpu;
++	int cprio;
++	char name[XNOBJECT_NAME_LEN];
++	char personality[XNOBJECT_NAME_LEN];
++};
++
++#endif /* !_COBALT_UAPI_THREAD_H */
+--- linux/include/xenomai/cobalt/uapi/monitor.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/monitor.h	2021-04-07 16:01:28.336632582 +0800
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_MONITOR_H
++#define _COBALT_UAPI_MONITOR_H
++
++#include <cobalt/uapi/kernel/types.h>
++
++struct cobalt_monitor_state {
++	atomic_t owner;
++	__u32 flags;
++#define COBALT_MONITOR_GRANTED    0x01
++#define COBALT_MONITOR_DRAINED    0x02
++#define COBALT_MONITOR_SIGNALED   0x03 /* i.e. GRANTED or DRAINED */
++#define COBALT_MONITOR_BROADCAST  0x04
++#define COBALT_MONITOR_PENDED     0x08
++};
++
++struct cobalt_monitor;
++
++struct cobalt_monitor_shadow {
++	__u32 state_offset;
++	__u32 flags;
++	xnhandle_t handle;
++#define COBALT_MONITOR_SHARED     0x1
++#define COBALT_MONITOR_WAITGRANT  0x0
++#define COBALT_MONITOR_WAITDRAIN  0x1
++};
++
++typedef struct cobalt_monitor_shadow cobalt_monitor_t;
++
++#endif /* !_COBALT_UAPI_MONITOR_H */
+--- linux/include/xenomai/cobalt/uapi/sched.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/sched.h	2021-04-07 16:01:28.331632589 +0800
+@@ -0,0 +1,136 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_SCHED_H
++#define _COBALT_UAPI_SCHED_H
++
++#define SCHED_COBALT		42
++#define SCHED_WEAK		43
++
++#ifndef SCHED_SPORADIC
++#define SCHED_SPORADIC		10
++#define sched_ss_low_priority	sched_u.ss.__sched_low_priority
++#define sched_ss_repl_period	sched_u.ss.__sched_repl_period
++#define sched_ss_init_budget	sched_u.ss.__sched_init_budget
++#define sched_ss_max_repl	sched_u.ss.__sched_max_repl
++#endif	/* !SCHED_SPORADIC */
++
++struct __sched_ss_param {
++	int __sched_low_priority;
++	struct timespec __sched_repl_period;
++	struct timespec __sched_init_budget;
++	int __sched_max_repl;
++};
++
++#define sched_rr_quantum	sched_u.rr.__sched_rr_quantum
++
++struct __sched_rr_param {
++	struct timespec __sched_rr_quantum;
++};
++
++#ifndef SCHED_TP
++#define SCHED_TP		11
++#define sched_tp_partition	sched_u.tp.__sched_partition
++#endif	/* !SCHED_TP */
++
++struct __sched_tp_param {
++	int __sched_partition;
++};
++
++struct sched_tp_window {
++	struct timespec offset;
++	struct timespec duration;
++	int ptid;
++};
++
++enum {
++	sched_tp_install,
++	sched_tp_uninstall,
++	sched_tp_start,
++	sched_tp_stop,
++};
++	
++struct __sched_config_tp {
++	int op;
++	int nr_windows;
++	struct sched_tp_window windows[0];
++};
++
++#define sched_tp_confsz(nr_win) \
++  (sizeof(struct __sched_config_tp) + nr_win * sizeof(struct sched_tp_window))
++
++#ifndef SCHED_QUOTA
++#define SCHED_QUOTA		12
++#define sched_quota_group	sched_u.quota.__sched_group
++#endif	/* !SCHED_QUOTA */
++
++struct __sched_quota_param {
++	int __sched_group;
++};
++
++enum {
++	sched_quota_add,
++	sched_quota_remove,
++	sched_quota_force_remove,
++	sched_quota_set,
++	sched_quota_get,
++};
++
++struct __sched_config_quota {
++	int op;
++	union {
++		struct {
++			int pshared;
++		} add;
++		struct {
++			int tgid;
++		} remove;
++		struct {
++			int tgid;
++			int quota;
++			int quota_peak;
++		} set;
++		struct {
++			int tgid;
++		} get;
++	};
++	struct __sched_quota_info {
++		int tgid;
++		int quota;
++		int quota_peak;
++		int quota_sum;
++	} info;
++};
++
++#define sched_quota_confsz()  sizeof(struct __sched_config_quota)
++
++struct sched_param_ex {
++	int sched_priority;
++	union {
++		struct __sched_ss_param ss;
++		struct __sched_rr_param rr;
++		struct __sched_tp_param tp;
++		struct __sched_quota_param quota;
++	} sched_u;
++};
++
++union sched_config {
++	struct __sched_config_tp tp;
++	struct __sched_config_quota quota;
++};
++
++#endif /* !_COBALT_UAPI_SCHED_H */
+--- linux/include/xenomai/cobalt/uapi/signal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/uapi/signal.h	2021-04-07 16:01:28.327632594 +0800
+@@ -0,0 +1,141 @@
++/*
++ * Copyright (C) 2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_UAPI_SIGNAL_H
++#define _COBALT_UAPI_SIGNAL_H
++
++/*
++ * Those are pseudo-signals only available with pthread_kill() to
++ * suspend/resume/unblock threads synchronously, force them out of
++ * primary mode or even demote them to the SCHED_OTHER class via the
++ * low-level nucleus interface. Can't block those signals, queue them,
++ * or even set them in a sigset. Those are nasty, strictly anti-POSIX
++ * things; we do provide them nevertheless only because we are mean
++ * people doing harmful code for no valid reason. Can't go against
++ * your nature, right?  Nah... (this said, don't blame us for POSIX,
++ * we are not _that_ mean).
++ */
++#define SIGSUSP (SIGRTMAX + 1)
++#define SIGRESM (SIGRTMAX + 2)
++#define SIGRELS (SIGRTMAX + 3)
++#define SIGKICK (SIGRTMAX + 4)
++#define SIGDEMT (SIGRTMAX + 5)
++
++/*
++ * Regular POSIX signals with specific handling by Xenomai.
++ */
++#define SIGSHADOW			SIGWINCH
++#define sigshadow_action(code)		((code) & 0xff)
++#define sigshadow_arg(code)		(((code) >> 8) & 0xff)
++#define sigshadow_int(action, arg)	((action) | ((arg) << 8))
++
++/* SIGSHADOW action codes. */
++#define SIGSHADOW_ACTION_HARDEN		1
++#define SIGSHADOW_ACTION_BACKTRACE	2
++#define SIGSHADOW_ACTION_HOME		3
++#define SIGSHADOW_BACKTRACE_DEPTH	16
++
++#define SIGDEBUG			SIGXCPU
++#define sigdebug_code(si)		((si)->si_value.sival_int)
++#define sigdebug_reason(si)		(sigdebug_code(si) & 0xff)
++#define sigdebug_marker			0xfccf0000
++#define sigdebug_marked(si)		\
++	((sigdebug_code(si) & 0xffff0000) == sigdebug_marker)
++
++/* Possible values of sigdebug_reason() */
++#define SIGDEBUG_UNDEFINED		0
++#define SIGDEBUG_MIGRATE_SIGNAL		1
++#define SIGDEBUG_MIGRATE_SYSCALL	2
++#define SIGDEBUG_MIGRATE_FAULT		3
++#define SIGDEBUG_MIGRATE_PRIOINV	4
++#define SIGDEBUG_NOMLOCK		5
++#define SIGDEBUG_WATCHDOG		6
++#define SIGDEBUG_RESCNT_IMBALANCE	7
++#define SIGDEBUG_LOCK_BREAK		8
++#define SIGDEBUG_MUTEX_SLEEP		9
++
++#define COBALT_DELAYMAX			2147483647U
++
++/*
++ * Internal accessors to extra siginfo/sigevent fields, extending some
++ * existing base field. The extra data should be grouped in a
++ * dedicated struct type. The extra space is taken from the padding
++ * area available from the original structure definitions.
++ *
++ * e.g. getting the address of the following extension to
++ * _sifields._rt from siginfo_t,
++ *
++ * struct bar {
++ *    int foo;
++ * };
++ *
++ * would be noted as:
++ *
++ * siginfo_t si;
++ * struct bar *p = __cobalt_si_extra(&si, _rt, struct bar);
++ *
++ * This code is shared between kernel and user space. Proper
++ * definitions of siginfo_t and sigevent_t should have been read prior
++ * to including this file.
++ *
++ * CAUTION: this macro does not handle alignment issues for the extra
++ * data. The extra type definition should take care of this.
++ */
++#ifdef __OPTIMIZE__
++extern void *__siginfo_overflow(void);
++static inline
++const void *__check_si_overflow(size_t fldsz, size_t extrasz, const void *p)
++{
++	siginfo_t *si __attribute__((unused));
++
++	if (fldsz + extrasz <= sizeof(si->_sifields))
++		return p;
++
++	return __siginfo_overflow();
++}
++#define __cobalt_si_extra(__si, __basefield, __type)				\
++	((__type *)__check_si_overflow(sizeof(__si->_sifields.__basefield),	\
++	       sizeof(__type), &(__si->_sifields.__basefield) + 1))
++#else
++#define __cobalt_si_extra(__si, __basefield, __type)				\
++	((__type *)((&__si->_sifields.__basefield) + 1))
++#endif
++
++/* Same approach, this time for extending sigevent_t. */
++
++#ifdef __OPTIMIZE__
++extern void *__sigevent_overflow(void);
++static inline
++const void *__check_sev_overflow(size_t fldsz, size_t extrasz, const void *p)
++{
++	sigevent_t *sev __attribute__((unused));
++
++	if (fldsz + extrasz <= sizeof(sev->_sigev_un))
++		return p;
++
++	return __sigevent_overflow();
++}
++#define __cobalt_sev_extra(__sev, __basefield, __type)				\
++	((__type *)__check_sev_overflow(sizeof(__sev->_sigev_un.__basefield),	\
++	       sizeof(__type), &(__sev->_sigev_un.__basefield) + 1))
++#else
++#define __cobalt_sev_extra(__sev, __basefield, __type)				\
++	((__type *)((&__sev->_sigev_un.__basefield) + 1))
++#endif
++
++#endif /* !_COBALT_UAPI_SIGNAL_H */
+--- linux/include/xenomai/cobalt/kernel/schedqueue.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/schedqueue.h	2021-04-07 16:01:28.210632762 +0800
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHEDQUEUE_H
++#define _COBALT_KERNEL_SCHEDQUEUE_H
++
++#include <cobalt/kernel/list.h>
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++#define XNSCHED_CLASS_WEIGHT_FACTOR	1024
++
++#ifdef CONFIG_XENO_OPT_SCALABLE_SCHED
++
++#include <linux/bitmap.h>
++
++/*
++ * Multi-level priority queue, suitable for handling the runnable
++ * thread queue of the core scheduling class with O(1) property. We
++ * only manage a descending queuing order, i.e. highest numbered
++ * priorities come first.
++ */
++#define XNSCHED_MLQ_LEVELS  260	/* i.e. XNSCHED_CORE_NR_PRIO */
++
++struct xnsched_mlq {
++	int elems;
++	DECLARE_BITMAP(prio_map, XNSCHED_MLQ_LEVELS);
++	struct list_head heads[XNSCHED_MLQ_LEVELS];
++};
++
++struct xnthread;
++
++void xnsched_initq(struct xnsched_mlq *q);
++
++void xnsched_addq(struct xnsched_mlq *q,
++		  struct xnthread *thread);
++
++void xnsched_addq_tail(struct xnsched_mlq *q, 
++		       struct xnthread *thread);
++
++void xnsched_delq(struct xnsched_mlq *q,
++		  struct xnthread *thread);
++
++struct xnthread *xnsched_getq(struct xnsched_mlq *q);
++
++static inline int xnsched_emptyq_p(struct xnsched_mlq *q)
++{
++	return q->elems == 0;
++}
++
++static inline int xnsched_weightq(struct xnsched_mlq *q)
++{
++	return find_first_bit(q->prio_map, XNSCHED_MLQ_LEVELS);
++}
++
++typedef struct xnsched_mlq xnsched_queue_t;
++
++#else /* ! CONFIG_XENO_OPT_SCALABLE_SCHED */
++
++typedef struct list_head xnsched_queue_t;
++
++#define xnsched_initq(__q)			INIT_LIST_HEAD(__q)
++#define xnsched_emptyq_p(__q)			list_empty(__q)
++#define xnsched_addq(__q, __t)			list_add_prilf(__t, __q, cprio, rlink)
++#define xnsched_addq_tail(__q, __t)		list_add_priff(__t, __q, cprio, rlink)
++#define xnsched_delq(__q, __t)			(void)(__q), list_del(&(__t)->rlink)
++#define xnsched_getq(__q)							\
++	({									\
++		struct xnthread *__t = NULL;					\
++		if (!list_empty(__q))						\
++			__t = list_get_entry(__q, struct xnthread, rlink);	\
++		__t;								\
++	})
++#define xnsched_weightq(__q)						\
++	({								\
++		struct xnthread *__t;					\
++		__t = list_first_entry(__q, struct xnthread, rlink);	\
++		__t->cprio;						\
++	})
++	
++
++#endif /* !CONFIG_XENO_OPT_SCALABLE_SCHED */
++
++struct xnthread *xnsched_findq(xnsched_queue_t *q, int prio);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHEDQUEUE_H */
+--- linux/include/xenomai/cobalt/kernel/stat.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/stat.h	2021-04-07 16:01:28.205632769 +0800
+@@ -0,0 +1,152 @@
++/*
++ * Copyright (C) 2006 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2006 Dmitry Adamushko <dmitry.adamushko@gmail.com>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_STAT_H
++#define _COBALT_KERNEL_STAT_H
++
++#include <cobalt/kernel/clock.h>
++
++/**
++ * @ingroup cobalt_core_thread
++ * @defgroup cobalt_core_stat Thread runtime statistics
++ * @{
++ */
++#ifdef CONFIG_XENO_OPT_STATS
++
++typedef struct xnstat_exectime {
++
++	xnticks_t start;   /* Start of execution time accumulation */
++
++	xnticks_t total; /* Accumulated execution time */
++
++} xnstat_exectime_t;
++
++#define xnstat_percpu_data	raw_cpu_ptr(nktimer.stats)
++
++/* Return current date which can be passed to other xnstat services for
++   immediate or lazy accounting. */
++#define xnstat_exectime_now() xnclock_core_read_raw()
++
++/* Accumulate exectime of the current account until the given date. */
++#define xnstat_exectime_update(sched, date) \
++do { \
++	(sched)->current_account->total += \
++		date - (sched)->last_account_switch; \
++	(sched)->last_account_switch = date; \
++	/* All changes must be committed before changing the current_account \
++	   reference in sched (required for xnintr_sync_stat_references) */ \
++	smp_wmb(); \
++} while (0)
++
++/* Update the current account reference, returning the previous one. */
++#define xnstat_exectime_set_current(sched, new_account) \
++({ \
++	xnstat_exectime_t *__prev; \
++	__prev = (xnstat_exectime_t *)atomic_long_xchg(&(sched)->current_account, (long)(new_account)); \
++	__prev; \
++})
++
++/* Return the currently active accounting entity. */
++#define xnstat_exectime_get_current(sched) ((sched)->current_account)
++
++/* Finalize an account (no need to accumulate the exectime, just mark the
++   switch date and set the new account). */
++#define xnstat_exectime_finalize(sched, new_account) \
++do { \
++	(sched)->last_account_switch = xnclock_core_read_raw(); \
++	(sched)->current_account = (new_account); \
++} while (0)
++
++/* Obtain content of xnstat_exectime_t */
++#define xnstat_exectime_get_start(account)	((account)->start)
++#define xnstat_exectime_get_total(account)	((account)->total)
++
++/* Obtain last account switch date of considered sched */
++#define xnstat_exectime_get_last_switch(sched)	((sched)->last_account_switch)
++
++/* Reset statistics from inside the accounted entity (e.g. after CPU
++   migration). */
++#define xnstat_exectime_reset_stats(stat) \
++do { \
++	(stat)->total = 0; \
++	(stat)->start = xnclock_core_read_raw(); \
++} while (0)
++
++
++typedef struct xnstat_counter {
++	unsigned long counter;
++} xnstat_counter_t;
++
++static inline unsigned long xnstat_counter_inc(xnstat_counter_t *c)
++{
++	return c->counter++;
++}
++
++static inline unsigned long xnstat_counter_get(xnstat_counter_t *c)
++{
++	return c->counter;
++}
++
++static inline void xnstat_counter_set(xnstat_counter_t *c, unsigned long value)
++{
++	c->counter = value;
++}
++
++#else /* !CONFIG_XENO_OPT_STATS */
++typedef struct xnstat_exectime {
++} xnstat_exectime_t;
++
++#define xnstat_percpu_data					NULL
++#define xnstat_exectime_now()					({ 0; })
++#define xnstat_exectime_update(sched, date)			do { } while (0)
++#define xnstat_exectime_set_current(sched, new_account)		({ (void)sched; NULL; })
++#define xnstat_exectime_get_current(sched)			({ (void)sched; NULL; })
++#define xnstat_exectime_finalize(sched, new_account)		do { } while (0)
++#define xnstat_exectime_get_start(account)			({ 0; })
++#define xnstat_exectime_get_total(account)			({ 0; })
++#define xnstat_exectime_get_last_switch(sched)			({ 0; })
++#define xnstat_exectime_reset_stats(account)			do { } while (0)
++
++typedef struct xnstat_counter {
++} xnstat_counter_t;
++
++#define xnstat_counter_inc(c) ({ do { } while(0); 0; })
++#define xnstat_counter_get(c) ({ 0; })
++#define xnstat_counter_set(c, value) do { } while (0)
++#endif /* CONFIG_XENO_OPT_STATS */
++
++/* Account the exectime of the current account until now, switch to
++   new_account, and return the previous one. */
++#define xnstat_exectime_switch(sched, new_account) \
++({ \
++	xnstat_exectime_update(sched, xnstat_exectime_now()); \
++	xnstat_exectime_set_current(sched, new_account); \
++})
++
++/* Account the exectime of the current account until given start time, switch
++   to new_account, and return the previous one. */
++#define xnstat_exectime_lazy_switch(sched, new_account, date) \
++({ \
++	xnstat_exectime_update(sched, date); \
++	xnstat_exectime_set_current(sched, new_account); \
++})
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_STAT_H */
+--- linux/include/xenomai/cobalt/kernel/sched-idle.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-idle.h	2021-04-07 16:01:28.200632776 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_IDLE_H
++#define _COBALT_KERNEL_SCHED_IDLE_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-idle.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++/* Idle priority level - actually never used for indexing. */
++#define XNSCHED_IDLE_PRIO	-1
++
++extern struct xnsched_class xnsched_class_idle;
++
++static inline bool __xnsched_idle_setparam(struct xnthread *thread,
++					   const union xnsched_policy_param *p)
++{
++	xnthread_clear_state(thread, XNWEAK);
++	return xnsched_set_effective_priority(thread, p->idle.prio);
++}
++
++static inline void __xnsched_idle_getparam(struct xnthread *thread,
++					   union xnsched_policy_param *p)
++{
++	p->idle.prio = thread->cprio;
++}
++
++static inline void __xnsched_idle_trackprio(struct xnthread *thread,
++					    const union xnsched_policy_param *p)
++{
++	if (p)
++		/* Inheriting a priority-less class makes no sense. */
++		XENO_WARN_ON_ONCE(COBALT, 1);
++	else
++		thread->cprio = XNSCHED_IDLE_PRIO;
++}
++
++static inline void __xnsched_idle_protectprio(struct xnthread *thread, int prio)
++{
++	XENO_WARN_ON_ONCE(COBALT, 1);
++}
++
++static inline int xnsched_idle_init_thread(struct xnthread *thread)
++{
++	return 0;
++}
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_IDLE_H */
+--- linux/include/xenomai/cobalt/kernel/vfile.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/vfile.h	2021-04-07 16:01:28.196632782 +0800
+@@ -0,0 +1,667 @@
++/*
++ * Copyright (C) 2010 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_KERNEL_VFILE_H
++#define _COBALT_KERNEL_VFILE_H
++
++#if defined(CONFIG_XENO_OPT_VFILE) || defined(DOXYGEN_CPP)
++
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <cobalt/kernel/lock.h>
++
++/**
++ * @addtogroup cobalt_core_vfile
++ * @{
++ */
++
++struct xnvfile_directory;
++struct xnvfile_regular_iterator;
++struct xnvfile_snapshot_iterator;
++struct xnvfile_lock_ops;
++
++struct xnvfile {
++	struct proc_dir_entry *pde;
++	struct file *file;
++	struct xnvfile_lock_ops *lockops;
++	int refcnt;
++	void *private;
++};
++
++/**
++ * @brief Vfile locking operations
++ * @anchor vfile_lockops
++ *
++ * This structure describes the operations to be provided for
++ * implementing locking support on vfiles. They apply to both
++ * snapshot-driven and regular vfiles.
++ */
++struct xnvfile_lock_ops {
++	/**
++	 * @anchor lockops_get
++	 * This handler should grab the desired lock.
++	 *
++	 * @param vfile A pointer to the virtual file which needs
++	 * locking.
++	 *
++	 * @return zero should be returned if the call
++	 * succeeds. Otherwise, a negative error code can be returned;
++	 * upon error, the current vfile operation is aborted, and the
++	 * user-space caller is passed back the error value.
++	 */
++	int (*get)(struct xnvfile *vfile);
++	/**
++	 * @anchor lockops_put This handler should release the lock
++	 * previously grabbed by the @ref lockops_get "get() handler".
++	 *
++	 * @param vfile A pointer to the virtual file which currently
++	 * holds the lock to release.
++	 */
++	void (*put)(struct xnvfile *vfile);
++};
++
++struct xnvfile_hostlock_class {
++	struct xnvfile_lock_ops ops;
++	struct mutex mutex;
++};
++
++struct xnvfile_nklock_class {
++	struct xnvfile_lock_ops ops;
++	spl_t s;
++};
++
++struct xnvfile_input {
++	const char __user *u_buf;
++	size_t size;
++	struct xnvfile *vfile;
++};
++
++/**
++ * @brief Regular vfile operation descriptor
++ * @anchor regular_ops
++ *
++ * This structure describes the operations available with a regular
++ * vfile. It defines handlers for sending back formatted kernel data
++ * upon a user-space read request, and for obtaining user data upon a
++ * user-space write request.
++ */
++struct xnvfile_regular_ops {
++	/**
++	 * @anchor regular_rewind This handler is called only once,
++	 * when the virtual file is opened, before the @ref
++	 * regular_begin "begin() handler" is invoked.
++	 *
++	 * @param it A pointer to the vfile iterator which will be
++	 * used to read the file contents.
++	 *
++	 * @return Zero should be returned upon success. Otherwise, a
++	 * negative error code aborts the operation, and is passed
++	 * back to the reader.
++	 *
++	 * @note This handler is optional. It should not be used to
++	 * allocate resources but rather to perform consistency
++	 * checks, since no closure call is issued in case the open
++	 * sequence eventually fails.
++	 */
++	int (*rewind)(struct xnvfile_regular_iterator *it);
++	/**
++	 * @anchor regular_begin
++	 * This handler should prepare for iterating over the records
++	 * upon a read request, starting from the specified position.
++	 *
++	 * @param it A pointer to the current vfile iterator. On
++	 * entry, it->pos is set to the (0-based) position of the
++	 * first record to output. This handler may be called multiple
++	 * times with different position requests.
++	 *
++	 * @return A pointer to the first record to format and output,
++	 * to be passed to the @ref regular_show "show() handler" as
++	 * its @a data parameter, if the call succeeds. Otherwise:
++	 *
++	 * - NULL in case no record is available, in which case the
++	 * read operation will terminate immediately with no output.
++	 *
++	 * - VFILE_SEQ_START, a special value indicating that @ref
++	 * regular_show "the show() handler" should receive a NULL
++	 * data pointer first, in order to output a header.
++	 *
++	 * - ERR_PTR(errno), where errno is a negative error code;
++	 * upon error, the current operation will be aborted
++	 * immediately.
++	 *
++	 * @note This handler is optional; if none is given in the
++	 * operation descriptor (i.e. NULL value), the @ref
++	 * regular_show "show() handler()" will be called only once
++	 * for a read operation, with a NULL @a data parameter. This
++	 * particular setting is convenient for simple regular vfiles
++	 * having a single, fixed record to output.
++	 */
++	void *(*begin)(struct xnvfile_regular_iterator *it);
++	/**
++	 * @anchor regular_next
++	 * This handler should return the address of the next record
++	 * to format and output by the @ref regular_show "show()
++	 * handler".
++	 *
++	 * @param it A pointer to the current vfile iterator. On
++	 * entry, it->pos is set to the (0-based) position of the
++	 * next record to output.
++	 *
++	 * @return A pointer to the next record to format and output,
++	 * to be passed to the @ref regular_show "show() handler" as
++	 * its @a data parameter, if the call succeeds. Otherwise:
++	 *
++	 * - NULL in case no record is available, in which case the
++	 * read operation will terminate immediately with no output.
++	 *
++	 * - ERR_PTR(errno), where errno is a negative error code;
++	 * upon error, the current operation will be aborted
++	 * immediately.
++	 *
++	 * @note This handler is optional; if none is given in the
++	 * operation descriptor (i.e. NULL value), the read operation
++	 * will stop after the first invocation of the @ref regular_show
++	 * "show() handler".
++	 */
++	void *(*next)(struct xnvfile_regular_iterator *it);
++	/**
++	 * @anchor regular_end
++	 * This handler is called after all records have been output.
++	 *
++	 * @param it A pointer to the current vfile iterator.
++	 *
++	 * @note This handler is optional and the pointer may be NULL.
++	 */
++	void (*end)(struct xnvfile_regular_iterator *it);
++	/**
++	 * @anchor regular_show
++	 * This handler should format and output a record.
++	 *
++	 * xnvfile_printf(), xnvfile_write(), xnvfile_puts() and
++	 * xnvfile_putc() are available to format and/or emit the
++	 * output. All routines take the iterator argument @a it as
++	 * their first parameter.
++	 *
++	 * @param it A pointer to the current vfile iterator.
++	 *
++	 * @param data A pointer to the record to format then
++	 * output. The first call to the handler may receive a NULL @a
++	 * data pointer, depending on the presence and/or return of a
++	 * @ref regular_begin "hander"; the show handler should test
++	 * this special value to output any header that fits, prior to
++	 * receiving more calls with actual records.
++	 *
++	 * @return zero if the call succeeds, also indicating that the
++	 * handler should be called for the next record if
++	 * any. Otherwise:
++	 *
++	 * - A negative error code. This will abort the output phase,
++	 * and return this status to the reader.
++	 *
++	 * - VFILE_SEQ_SKIP, a special value indicating that the
++	 * current record should be skipped and will not be output.
++	 */
++	int (*show)(struct xnvfile_regular_iterator *it, void *data);
++	/**
++	 * @anchor regular_store
++	 * This handler receives data written to the vfile, likely for
++	 * updating some kernel setting, or triggering any other
++	 * action which fits. This is the only handler which deals
++	 * with the write-side of a vfile.  It is called when writing
++	 * to the /proc entry of the vfile from a user-space process.
++	 *
++	 * The input data is described by a descriptor passed to the
++	 * handler, which may be subsequently passed to parsing helper
++	 * routines.  For instance, xnvfile_get_string() will accept
++	 * the input descriptor for returning the written data as a
++	 * null-terminated character string. On the other hand,
++	 * xnvfile_get_integer() will attempt to return a long integer
++	 * from the input data.
++	 *
++	 * @param input A pointer to an input descriptor. It refers to
++	 * an opaque data from the handler's standpoint.
++	 *
++	 * @return the number of bytes read from the input descriptor
++	 * if the call succeeds. Otherwise, a negative error code.
++	 * Return values from parsing helper routines are commonly
++	 * passed back to the caller by the @ref regular_store
++	 * "store() handler".
++	 *
++	 * @note This handler is optional, and may be omitted for
++	 * read-only vfiles.
++	 */
++	ssize_t (*store)(struct xnvfile_input *input);
++};
++
++struct xnvfile_regular {
++	struct xnvfile entry;
++	size_t privsz;
++	struct xnvfile_regular_ops *ops;
++};
++
++struct xnvfile_regular_template {
++	size_t privsz;
++	struct xnvfile_regular_ops *ops;
++	struct xnvfile_lock_ops *lockops;
++};
++
++/**
++ * @brief Regular vfile iterator
++ * @anchor regular_iterator
++ *
++ * This structure defines an iterator over a regular vfile.
++ */
++struct xnvfile_regular_iterator {
++	/** Current record position while iterating. */
++	loff_t pos;
++	/** Backlink to the host sequential file supporting the vfile. */
++	struct seq_file *seq;
++	/** Backlink to the vfile being read. */
++	struct xnvfile_regular *vfile;
++	/**
++	 * Start of private area. Use xnvfile_iterator_priv() to
++	 * address it.
++	 */
++	char private[0];
++};
++
++/**
++ * @brief Snapshot vfile operation descriptor
++ * @anchor snapshot_ops
++ *
++ * This structure describes the operations available with a
++ * snapshot-driven vfile. It defines handlers for returning a
++ * printable snapshot of some Xenomai object contents upon a
++ * user-space read request, and for updating this object upon a
++ * user-space write request.
++ */
++struct xnvfile_snapshot_ops {
++	/**
++	 * @anchor snapshot_rewind
++	 * This handler (re-)initializes the data collection, moving
++	 * the seek pointer at the first record. When the file
++	 * revision tag is touched while collecting data, the current
++	 * reading is aborted, all collected data dropped, and the
++	 * vfile is eventually rewound.
++	 *
++	 * @param it A pointer to the current snapshot iterator. Two
++	 * useful information can be retrieved from this iterator in
++	 * this context:
++	 *
++	 * - it->vfile is a pointer to the descriptor of the virtual
++	 * file being rewound.
++	 *
++	 * - xnvfile_iterator_priv(it) returns a pointer to the
++	 * private data area, available from the descriptor, which
++	 * size is vfile->privsz. If the latter size is zero, the
++	 * returned pointer is meaningless and should not be used.
++	 *
++	 * @return A negative error code aborts the data collection,
++	 * and is passed back to the reader. Otherwise:
++	 *
++	 * - a strictly positive value is interpreted as the total
++	 * number of records which will be returned by the @ref
++	 * snapshot_next "next() handler" during the data collection
++	 * phase. If no @ref snapshot_begin "begin() handler" is
++	 * provided in the @ref snapshot_ops "operation descriptor",
++	 * this value is used to allocate the snapshot buffer
++	 * internally. The size of this buffer would then be
++	 * vfile->datasz * value.
++	 *
++	 * - zero leaves the allocation to the @ref snapshot_begin
++	 * "begin() handler" if present, or indicates that no record
++	 * is to be output in case such handler is not given.
++	 *
++	 * @note This handler is optional; a NULL value indicates that
++	 * nothing needs to be done for rewinding the vfile.  It is
++	 * called with the vfile lock held.
++	 */
++	int (*rewind)(struct xnvfile_snapshot_iterator *it);
++	/**
++	 * @anchor snapshot_begin
++	 * This handler should allocate the snapshot buffer to hold
++	 * records during the data collection phase.  When specified,
++	 * all records collected via the @ref snapshot_next "next()
++	 * handler" will be written to a cell from the memory area
++	 * returned by begin().
++	 *
++	 * @param it A pointer to the current snapshot iterator.
++	 *
++	 * @return A pointer to the record buffer, if the call
++	 * succeeds. Otherwise:
++	 *
++	 * - NULL in case of allocation error. This will abort the data
++	 * collection, and return -ENOMEM to the reader.
++	 *
++	 * - VFILE_SEQ_EMPTY, a special value indicating that no
++	 * record will be output. In such a case, the @ref
++	 * snapshot_next "next() handler" will not be called, and the
++	 * data collection will stop immediately. However, the @ref
++	 * snapshot_show "show() handler" will still be called once,
++	 * with a NULL data pointer (i.e. header display request).
++	 *
++	 * @note This handler is optional; if none is given, an
++	 * internal allocation depending on the value returned by the
++	 * @ref snapshot_rewind "rewind() handler" can be obtained.
++	 */
++	void *(*begin)(struct xnvfile_snapshot_iterator *it);
++	/**
++	 * @anchor snapshot_end
++	 * This handler releases the memory buffer previously obtained
++	 * from begin(). It is usually called after the snapshot data
++	 * has been output by show(), but it may also be called before
++	 * rewinding the vfile after a revision change, to release the
++	 * dropped buffer.
++	 *
++	 * @param it A pointer to the current snapshot iterator.
++	 *
++	 * @param buf A pointer to the buffer to release.
++	 *
++	 * @note This routine is optional and the pointer may be
++	 * NULL. It is not needed upon internal buffer allocation;
++	 * see the description of the @ref snapshot_rewind "rewind()
++	 * handler".
++	 */
++	void (*end)(struct xnvfile_snapshot_iterator *it, void *buf);
++	/**
++	 * @anchor snapshot_next
++	 * This handler fetches the next record, as part of the
++	 * snapshot data to be sent back to the reader via the
++	 * show().
++	 *
++	 * @param it A pointer to the current snapshot iterator.
++	 *
++	 * @param data A pointer to the record to fill in.
++	 *
++	 * @return a strictly positive value, if the call succeeds and
++	 * leaves a valid record into @a data, which should be passed
++	 * to the @ref snapshot_show "show() handler()" during the
++	 * formatting and output phase. Otherwise:
++	 *
++	 * - A negative error code. This will abort the data
++	 * collection, and return this status to the reader.
++	 *
++	 * - VFILE_SEQ_SKIP, a special value indicating that the
++	 * current record should be skipped. In such a case, the @a
++	 * data pointer is not advanced to the next position before
++	 * the @ref snapshot_next "next() handler" is called anew.
++	 *
++	 * @note This handler is called with the vfile lock
++	 * held. Before each invocation of this handler, the vfile
++	 * core checks whether the revision tag has been touched, in
++	 * which case the data collection is restarted from scratch. A
++	 * data collection phase succeeds whenever all records can be
++	 * fetched via the @ref snapshot_next "next() handler", while
++	 * the revision tag remains unchanged, which indicates that a
++	 * consistent snapshot of the object state was taken.
++	 */
++	int (*next)(struct xnvfile_snapshot_iterator *it, void *data);
++	/**
++	 * @anchor snapshot_show
++	 * This handler should format and output a record from the
++	 * collected data.
++	 *
++	 * xnvfile_printf(), xnvfile_write(), xnvfile_puts() and
++	 * xnvfile_putc() are available to format and/or emit the
++	 * output. All routines take the iterator argument @a it as
++	 * their first parameter.
++	 *
++	 * @param it A pointer to the current snapshot iterator.
++	 *
++	 * @param data A pointer to the record to format then
++	 * output. The first call to the handler is always passed a
++	 * NULL @a data pointer; the show handler should test this
++	 * special value to output any header that fits, prior to
++	 * receiving more calls with actual records.
++	 *
++	 * @return zero if the call succeeds, also indicating that the
++	 * handler should be called for the next record if
++	 * any. Otherwise:
++	 *
++	 * - A negative error code. This will abort the output phase,
++	 * and return this status to the reader.
++	 *
++	 * - VFILE_SEQ_SKIP, a special value indicating that the
++	 * current record should be skipped and will not be output.
++	 */
++	int (*show)(struct xnvfile_snapshot_iterator *it, void *data);
++	/**
++	 * @anchor snapshot_store
++	 * This handler receives data written to the vfile, likely for
++	 * updating the associated Xenomai object's state, or
++	 * triggering any other action which fits. This is the only
++	 * handler which deals with the write-side of a vfile.  It is
++	 * called when writing to the /proc entry of the vfile
++	 * from a user-space process.
++	 *
++	 * The input data is described by a descriptor passed to the
++	 * handler, which may be subsequently passed to parsing helper
++	 * routines.  For instance, xnvfile_get_string() will accept
++	 * the input descriptor for returning the written data as a
++	 * null-terminated character string. On the other hand,
++	 * xnvfile_get_integer() will attempt to return a long integer
++	 * from the input data.
++	 *
++	 * @param input A pointer to an input descriptor. It refers to
++	 * an opaque data from the handler's standpoint.
++	 *
++	 * @return the number of bytes read from the input descriptor
++	 * if the call succeeds. Otherwise, a negative error code.
++	 * Return values from parsing helper routines are commonly
++	 * passed back to the caller by the @ref snapshot_store
++	 * "store() handler".
++	 *
++	 * @note This handler is optional, and may be omitted for
++	 * read-only vfiles.
++	 */
++	ssize_t (*store)(struct xnvfile_input *input);
++};
++
++/**
++ * @brief Snapshot revision tag
++ * @anchor revision_tag
++ *
++ * This structure defines a revision tag to be used with @ref
++ * snapshot_vfile "snapshot-driven vfiles".
++ */
++struct xnvfile_rev_tag {
++	/** Current revision number. */
++	int rev;
++};
++
++struct xnvfile_snapshot_template {
++	size_t privsz;
++	size_t datasz;
++	struct xnvfile_rev_tag *tag;
++	struct xnvfile_snapshot_ops *ops;
++	struct xnvfile_lock_ops *lockops;
++};
++
++/**
++ * @brief Snapshot vfile descriptor
++ * @anchor snapshot_vfile
++ *
++ * This structure describes a snapshot-driven vfile.  Reading from
++ * such a vfile involves a preliminary data collection phase under
++ * lock protection, and a subsequent formatting and output phase of
++ * the collected data records. Locking is done in a way that does not
++ * increase worst-case latency, regardless of the number of records to
++ * be collected for output.
++ */
++struct xnvfile_snapshot {
++	struct xnvfile entry;
++	size_t privsz;
++	size_t datasz;
++	struct xnvfile_rev_tag *tag;
++	struct xnvfile_snapshot_ops *ops;
++};
++
++/**
++ * @brief Snapshot-driven vfile iterator
++ * @anchor snapshot_iterator
++ *
++ * This structure defines an iterator over a snapshot-driven vfile.
++ */
++struct xnvfile_snapshot_iterator {
++	/** Number of collected records. */
++	int nrdata;
++	/** Address of record buffer. */
++	caddr_t databuf;
++	/** Backlink to the host sequential file supporting the vfile. */
++	struct seq_file *seq;
++	/** Backlink to the vfile being read. */
++	struct xnvfile_snapshot *vfile;
++	/** Buffer release handler. */
++	void (*endfn)(struct xnvfile_snapshot_iterator *it, void *buf);
++	/**
++	 * Start of private area. Use xnvfile_iterator_priv() to
++	 * address it.
++	 */
++	char private[0];
++};
++
++struct xnvfile_directory {
++	struct xnvfile entry;
++};
++
++struct xnvfile_link {
++	struct xnvfile entry;
++};
++
++/* vfile.begin()=> */
++#define VFILE_SEQ_EMPTY			((void *)-1)
++/* =>vfile.show() */
++#define VFILE_SEQ_START			SEQ_START_TOKEN
++/* vfile.next/show()=> */
++#define VFILE_SEQ_SKIP			2
++
++#define xnvfile_printf(it, args...)	seq_printf((it)->seq, ##args)
++#define xnvfile_write(it, data, len)	seq_write((it)->seq, (data),(len))
++#define xnvfile_puts(it, s)		seq_puts((it)->seq, (s))
++#define xnvfile_putc(it, c)		seq_putc((it)->seq, (c))
++
++static inline void xnvfile_touch_tag(struct xnvfile_rev_tag *tag)
++{
++	tag->rev++;
++}
++
++static inline void xnvfile_touch(struct xnvfile_snapshot *vfile)
++{
++	xnvfile_touch_tag(vfile->tag);
++}
++
++#define xnvfile_noentry			\
++	{				\
++		.pde = NULL,		\
++		.private = NULL,	\
++		.file = NULL,		\
++		.refcnt = 0,		\
++	}
++
++#define xnvfile_nodir	{ .entry = xnvfile_noentry }
++#define xnvfile_nolink	{ .entry = xnvfile_noentry }
++#define xnvfile_nofile	{ .entry = xnvfile_noentry }
++
++#define xnvfile_priv(e)			((e)->entry.private)
++#define xnvfile_nref(e)			((e)->entry.refcnt)
++#define xnvfile_file(e)			((e)->entry.file)
++#define xnvfile_iterator_priv(it)	((void *)(&(it)->private))
++
++extern struct xnvfile_nklock_class xnvfile_nucleus_lock;
++
++extern struct xnvfile_directory cobalt_vfroot;
++
++int xnvfile_init_root(void);
++
++void xnvfile_destroy_root(void);
++
++int xnvfile_init_snapshot(const char *name,
++			  struct xnvfile_snapshot *vfile,
++			  struct xnvfile_directory *parent);
++
++int xnvfile_init_regular(const char *name,
++			 struct xnvfile_regular *vfile,
++			 struct xnvfile_directory *parent);
++
++int xnvfile_init_dir(const char *name,
++		     struct xnvfile_directory *vdir,
++		     struct xnvfile_directory *parent);
++
++int xnvfile_init_link(const char *from,
++		      const char *to,
++		      struct xnvfile_link *vlink,
++		      struct xnvfile_directory *parent);
++
++void xnvfile_destroy(struct xnvfile *vfile);
++
++ssize_t xnvfile_get_blob(struct xnvfile_input *input,
++			 void *data, size_t size);
++
++ssize_t xnvfile_get_string(struct xnvfile_input *input,
++			   char *s, size_t maxlen);
++
++ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp);
++
++int __vfile_hostlock_get(struct xnvfile *vfile);
++
++void __vfile_hostlock_put(struct xnvfile *vfile);
++
++static inline
++void xnvfile_destroy_snapshot(struct xnvfile_snapshot *vfile)
++{
++	xnvfile_destroy(&vfile->entry);
++}
++
++static inline
++void xnvfile_destroy_regular(struct xnvfile_regular *vfile)
++{
++	xnvfile_destroy(&vfile->entry);
++}
++
++static inline
++void xnvfile_destroy_dir(struct xnvfile_directory *vdir)
++{
++	xnvfile_destroy(&vdir->entry);
++}
++
++static inline
++void xnvfile_destroy_link(struct xnvfile_link *vlink)
++{
++	xnvfile_destroy(&vlink->entry);
++}
++
++#define DEFINE_VFILE_HOSTLOCK(name)					\
++	struct xnvfile_hostlock_class name = {				\
++		.ops = {						\
++			.get = __vfile_hostlock_get,			\
++			.put = __vfile_hostlock_put,			\
++		},							\
++		.mutex = __MUTEX_INITIALIZER(name.mutex),		\
++	}
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++#define xnvfile_touch_tag(tag)	do { } while (0)
++
++#define xnvfile_touch(vfile)	do { } while (0)
++
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_VFILE_H */
+--- linux/include/xenomai/cobalt/kernel/sched-rt.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-rt.h	2021-04-07 16:01:28.191632789 +0800
+@@ -0,0 +1,150 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_RT_H
++#define _COBALT_KERNEL_SCHED_RT_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-rt.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++/*
++ * Global priority scale for Xenomai's core scheduling class,
++ * available to SCHED_COBALT members.
++ */
++#define XNSCHED_CORE_MIN_PRIO	0
++#define XNSCHED_CORE_MAX_PRIO	259
++#define XNSCHED_CORE_NR_PRIO	\
++	(XNSCHED_CORE_MAX_PRIO - XNSCHED_CORE_MIN_PRIO + 1)
++
++/*
++ * Priority range for SCHED_FIFO, and all other classes Cobalt
++ * implements except SCHED_COBALT.
++ */
++#define XNSCHED_FIFO_MIN_PRIO	1
++#define XNSCHED_FIFO_MAX_PRIO	256
++
++#if XNSCHED_CORE_NR_PRIO > XNSCHED_CLASS_WEIGHT_FACTOR ||	\
++  (defined(CONFIG_XENO_OPT_SCALABLE_SCHED) &&			\
++   XNSCHED_CORE_NR_PRIO > XNSCHED_MLQ_LEVELS)
++#error "XNSCHED_MLQ_LEVELS is too low"
++#endif
++
++extern struct xnsched_class xnsched_class_rt;
++
++static inline void __xnsched_rt_requeue(struct xnthread *thread)
++{
++	xnsched_addq(&thread->sched->rt.runnable, thread);
++}
++
++static inline void __xnsched_rt_enqueue(struct xnthread *thread)
++{
++	xnsched_addq_tail(&thread->sched->rt.runnable, thread);
++}
++
++static inline void __xnsched_rt_dequeue(struct xnthread *thread)
++{
++	xnsched_delq(&thread->sched->rt.runnable, thread);
++}
++
++static inline void __xnsched_rt_track_weakness(struct xnthread *thread)
++{
++	/*
++	 * We have to track threads exiting weak scheduling, i.e. any
++	 * thread leaving the WEAK class code if compiled in, or
++	 * assigned a zero priority if weak threads are hosted by the
++	 * RT class.
++	 *
++	 * CAUTION: since we need to check the effective priority
++	 * level for determining the weakness state, this can only
++	 * apply to non-boosted threads.
++	 */
++	if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_WEAK) || thread->cprio)
++		xnthread_clear_state(thread, XNWEAK);
++	else
++		xnthread_set_state(thread, XNWEAK);
++}
++
++static inline bool __xnsched_rt_setparam(struct xnthread *thread,
++					 const union xnsched_policy_param *p)
++{
++	bool ret = xnsched_set_effective_priority(thread, p->rt.prio);
++	
++	if (!xnthread_test_state(thread, XNBOOST))
++		__xnsched_rt_track_weakness(thread);
++
++	return ret;
++}
++
++static inline void __xnsched_rt_getparam(struct xnthread *thread,
++					 union xnsched_policy_param *p)
++{
++	p->rt.prio = thread->cprio;
++}
++
++static inline void __xnsched_rt_trackprio(struct xnthread *thread,
++					  const union xnsched_policy_param *p)
++{
++	if (p)
++		thread->cprio = p->rt.prio; /* Force update. */
++	else {
++		thread->cprio = thread->bprio;
++		/* Leaving PI/PP, so non-boosted by definition. */
++		__xnsched_rt_track_weakness(thread);
++	}
++}
++
++static inline void __xnsched_rt_protectprio(struct xnthread *thread, int prio)
++{
++	/*
++	 * The RT class supports the widest priority range from
++	 * XNSCHED_CORE_MIN_PRIO to XNSCHED_CORE_MAX_PRIO inclusive,
++	 * no need to cap the input value which is guaranteed to be in
++	 * the range [1..XNSCHED_CORE_MAX_PRIO].
++	 */
++	thread->cprio = prio;
++}
++
++static inline void __xnsched_rt_forget(struct xnthread *thread)
++{
++}
++
++static inline int xnsched_rt_init_thread(struct xnthread *thread)
++{
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES
++struct xnthread *xnsched_rt_pick(struct xnsched *sched);
++#else
++static inline struct xnthread *xnsched_rt_pick(struct xnsched *sched)
++{
++	return xnsched_getq(&sched->rt.runnable);
++}
++#endif
++
++void xnsched_rt_tick(struct xnsched *sched);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_RT_H */
+--- linux/include/xenomai/cobalt/kernel/vdso.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/vdso.h	2021-04-07 16:01:28.186632796 +0800
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (C) 2009 Wolfgang Mauerer <wolfgang.mauerer@siemens.com>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_VDSO_H
++#define _COBALT_KERNEL_VDSO_H
++
++#include <linux/time.h>
++#include <asm/barrier.h>
++#include <asm/atomic.h>
++#include <asm/processor.h>
++#include <cobalt/uapi/kernel/vdso.h>
++
++/*
++ * Define the available feature set here. We have a single feature
++ * defined for now.
++ */
++#ifdef CONFIG_XENO_OPT_HOSTRT
++#define XNVDSO_FEATURES XNVDSO_FEAT_HOST_REALTIME
++#else
++#define XNVDSO_FEATURES 0
++#endif /* CONFIG_XENO_OPT_HOSTRT */
++
++extern struct xnvdso *nkvdso;
++
++static inline struct xnvdso_hostrt_data *get_hostrt_data(void)
++{
++	return &nkvdso->hostrt_data;
++}
++
++#endif /* _COBALT_KERNEL_VDSO_H */
+--- linux/include/xenomai/cobalt/kernel/synch.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/synch.h	2021-04-07 16:01:28.181632803 +0800
+@@ -0,0 +1,179 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SYNCH_H
++#define _COBALT_KERNEL_SYNCH_H
++
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/uapi/kernel/synch.h>
++#include <cobalt/uapi/kernel/thread.h>
++
++/**
++ * @addtogroup cobalt_core_synch
++ * @{
++ */
++#define XNSYNCH_CLAIMED  0x100	/* Claimed by other thread(s) (PI) */
++#define XNSYNCH_CEILING  0x200	/* Actively boosting (PP) */
++
++/* Spare flags usable by upper interfaces */
++#define XNSYNCH_SPARE0  0x01000000
++#define XNSYNCH_SPARE1  0x02000000
++#define XNSYNCH_SPARE2  0x04000000
++#define XNSYNCH_SPARE3  0x08000000
++#define XNSYNCH_SPARE4  0x10000000
++#define XNSYNCH_SPARE5  0x20000000
++#define XNSYNCH_SPARE6  0x40000000
++#define XNSYNCH_SPARE7  0x80000000
++
++/* Statuses */
++#define XNSYNCH_DONE    0	/* Resource available / operation complete */
++#define XNSYNCH_WAIT    1	/* Calling thread blocked -- start rescheduling */
++#define XNSYNCH_RESCHED 2	/* Force rescheduling */
++
++struct xnthread;
++struct xnsynch;
++
++struct xnsynch {
++	/** wait (weighted) prio in thread->boosters */
++	int wprio;
++	/** thread->boosters */
++	struct list_head next;
++	/**
++	 *  &variable holding the current priority ceiling value
++	 *  (xnsched_class_rt-based, [1..255], XNSYNCH_PP).
++	 */
++	u32 *ceiling_ref;
++	/** Status word */
++	unsigned long status;
++	/** Pending threads */
++	struct list_head pendq;
++	/** Thread which owns the resource */
++	struct xnthread *owner;
++	 /** Pointer to fast lock word */
++	atomic_t *fastlock;
++	/* Cleanup handler */
++	void (*cleanup)(struct xnsynch *synch);
++};
++
++#define XNSYNCH_WAITQUEUE_INITIALIZER(__name) {		\
++		.status = XNSYNCH_PRIO,			\
++		.wprio = -1,				\
++		.pendq = LIST_HEAD_INIT((__name).pendq),	\
++		.owner = NULL,				\
++		.cleanup = NULL,			\
++		.fastlock = NULL,			\
++	}
++
++#define DEFINE_XNWAITQ(__name)	\
++	struct xnsynch __name = XNSYNCH_WAITQUEUE_INITIALIZER(__name)
++
++static inline void xnsynch_set_status(struct xnsynch *synch, int bits)
++{
++	synch->status |= bits;
++}
++
++static inline void xnsynch_clear_status(struct xnsynch *synch, int bits)
++{
++	synch->status &= ~bits;
++}
++
++#define xnsynch_for_each_sleeper(__pos, __synch)		\
++	list_for_each_entry(__pos, &(__synch)->pendq, plink)
++
++#define xnsynch_for_each_sleeper_safe(__pos, __tmp, __synch)	\
++	list_for_each_entry_safe(__pos, __tmp, &(__synch)->pendq, plink)
++
++static inline int xnsynch_pended_p(struct xnsynch *synch)
++{
++	return !list_empty(&synch->pendq);
++}
++
++static inline struct xnthread *xnsynch_owner(struct xnsynch *synch)
++{
++	return synch->owner;
++}
++
++#define xnsynch_fastlock(synch)		((synch)->fastlock)
++#define xnsynch_fastlock_p(synch)	((synch)->fastlock != NULL)
++#define xnsynch_owner_check(synch, thread) \
++	xnsynch_fast_owner_check((synch)->fastlock, thread->handle)
++
++#ifdef CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED
++
++void xnsynch_detect_relaxed_owner(struct xnsynch *synch,
++				  struct xnthread *sleeper);
++
++void xnsynch_detect_boosted_relax(struct xnthread *owner);
++
++#else /* !CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */
++
++static inline void xnsynch_detect_relaxed_owner(struct xnsynch *synch,
++				  struct xnthread *sleeper) { }
++
++static inline void xnsynch_detect_boosted_relax(struct xnthread *owner) { }
++
++#endif /* !CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */
++
++void xnsynch_init(struct xnsynch *synch, int flags,
++		  atomic_t *fastlock);
++
++void xnsynch_init_protect(struct xnsynch *synch, int flags,
++			  atomic_t *fastlock, u32 *ceiling_ref);
++
++int xnsynch_destroy(struct xnsynch *synch);
++
++void xnsynch_commit_ceiling(struct xnthread *curr);
++
++static inline void xnsynch_register_cleanup(struct xnsynch *synch,
++					    void (*handler)(struct xnsynch *))
++{
++	synch->cleanup = handler;
++}
++
++int __must_check xnsynch_sleep_on(struct xnsynch *synch,
++				  xnticks_t timeout,
++				  xntmode_t timeout_mode);
++
++struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch);
++
++int xnsynch_wakeup_many_sleepers(struct xnsynch *synch, int nr);
++
++void xnsynch_wakeup_this_sleeper(struct xnsynch *synch,
++				 struct xnthread *sleeper);
++
++int __must_check xnsynch_acquire(struct xnsynch *synch,
++				 xnticks_t timeout,
++				 xntmode_t timeout_mode);
++
++int __must_check xnsynch_try_acquire(struct xnsynch *synch);
++
++bool xnsynch_release(struct xnsynch *synch, struct xnthread *thread);
++
++struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch);
++
++int xnsynch_flush(struct xnsynch *synch, int reason);
++
++void xnsynch_requeue_sleeper(struct xnthread *thread);
++
++void xnsynch_forget_sleeper(struct xnthread *thread);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SYNCH_H_ */
+--- linux/include/xenomai/cobalt/kernel/list.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/list.h	2021-04-07 16:01:28.177632809 +0800
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_LIST_H
++#define _COBALT_KERNEL_LIST_H
++
++#include <linux/list.h>
++
++#define __list_add_pri(__new, __head, __member_pri, __member_next, __relop)	\
++do {										\
++	typeof(*__new) *__pos;							\
++	if (list_empty(__head))							\
++		list_add(&(__new)->__member_next, __head);		 	\
++	else {									\
++		list_for_each_entry_reverse(__pos, __head, __member_next) {	\
++			if ((__new)->__member_pri __relop __pos->__member_pri)	\
++				break;						\
++		}								\
++		list_add(&(__new)->__member_next, &__pos->__member_next); 	\
++	}									\
++} while (0)
++
++#define list_add_priff(__new, __head, __member_pri, __member_next)		\
++	__list_add_pri(__new, __head, __member_pri, __member_next, <=)
++
++#define list_add_prilf(__new, __head, __member_pri, __member_next)		\
++	__list_add_pri(__new, __head, __member_pri, __member_next, <)
++
++#define list_get_entry(__head, __type, __member)		\
++  ({								\
++	  __type *__item;					\
++	  __item = list_first_entry(__head, __type, __member);	\
++	  list_del(&__item->__member);				\
++	  __item;						\
++  })
++
++#define list_get_entry_init(__head, __type, __member)		\
++  ({								\
++	  __type *__item;					\
++	  __item = list_first_entry(__head, __type, __member);	\
++	  list_del_init(&__item->__member);			\
++	  __item;						\
++  })
++
++#ifndef list_next_entry
++#define list_next_entry(__item, __member)			\
++	list_entry((__item)->__member.next, typeof(*(__item)), __member)
++#endif
++
++#endif /* !_COBALT_KERNEL_LIST_H_ */
+--- linux/include/xenomai/cobalt/kernel/sched-weak.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-weak.h	2021-04-07 16:01:28.172632816 +0800
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_WEAK_H
++#define _COBALT_KERNEL_SCHED_WEAK_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-weak.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++
++#define XNSCHED_WEAK_MIN_PRIO	0
++#define XNSCHED_WEAK_MAX_PRIO	99
++#define XNSCHED_WEAK_NR_PRIO	\
++	(XNSCHED_WEAK_MAX_PRIO - XNSCHED_WEAK_MIN_PRIO + 1)
++
++#if XNSCHED_WEAK_NR_PRIO > XNSCHED_CLASS_WEIGHT_FACTOR ||	\
++	(defined(CONFIG_XENO_OPT_SCALABLE_SCHED) &&		\
++	 XNSCHED_WEAK_NR_PRIO > XNSCHED_MLQ_LEVELS)
++#error "WEAK class has too many priority levels"
++#endif
++
++extern struct xnsched_class xnsched_class_weak;
++
++struct xnsched_weak {
++	xnsched_queue_t runnable;	/*!< Runnable thread queue. */
++};
++
++static inline int xnsched_weak_init_thread(struct xnthread *thread)
++{
++	return 0;
++}
++
++#endif /* CONFIG_XENO_OPT_SCHED_WEAK */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_WEAK_H */
+--- linux/include/xenomai/cobalt/kernel/ancillaries.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/ancillaries.h	2021-04-07 16:01:28.167632823 +0800
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_ANCILLARIES_H
++#define _COBALT_KERNEL_ANCILLARIES_H
++
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/uidgid.h>
++#include <cobalt/uapi/kernel/limits.h>
++
++#define ksformat(__dst, __len, __fmt, __args...)			\
++	({								\
++		size_t __ret;						\
++		__ret = snprintf(__dst, __len, __fmt, ##__args);	\
++		if (__ret >= __len)					\
++			__dst[__len-1] = '\0';				\
++		__ret;							\
++	})
++
++#define kasformat(__fmt, __args...)					\
++	({								\
++		kasprintf(GFP_KERNEL, __fmt, ##__args);			\
++	})
++
++#define kvsformat(__dst, __len, __fmt, __ap)				\
++	({								\
++		size_t __ret;						\
++		__ret = vsnprintf(__dst, __len, __fmt, __ap);		\
++		if (__ret >= __len)					\
++			__dst[__len-1] = '\0';				\
++		__ret;							\
++	})
++
++#define kvasformat(__fmt, __ap)						\
++	({								\
++		kvasprintf(GFP_KERNEL, __fmt, __ap);			\
++	})
++
++void __knamecpy_requires_character_array_as_destination(void);
++
++#define knamecpy(__dst, __src)						\
++	({								\
++		if (!__builtin_types_compatible_p(typeof(__dst), char[])) \
++			__knamecpy_requires_character_array_as_destination();	\
++		strncpy((__dst), __src, sizeof(__dst));			\
++		__dst[sizeof(__dst) - 1] = '\0';			\
++		__dst;							\
++	 })
++
++#define get_current_uuid() from_kuid_munged(current_user_ns(), current_uid())
++
++#endif /* !_COBALT_KERNEL_ANCILLARIES_H */
+--- linux/include/xenomai/cobalt/kernel/map.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/map.h	2021-04-07 16:01:28.163632829 +0800
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (C) 2007 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_MAP_H
++#define _COBALT_KERNEL_MAP_H
++
++#include <asm/bitsperlong.h>
++
++/**
++ * @addtogroup cobalt_core_map
++ * @{
++ */
++
++#define XNMAP_MAX_KEYS	(BITS_PER_LONG * BITS_PER_LONG)
++
++struct xnmap {
++    int nkeys;
++    int ukeys;
++    int offset;
++    unsigned long himask;
++    unsigned long himap;
++#define __IDMAP_LONGS	((XNMAP_MAX_KEYS+BITS_PER_LONG-1)/BITS_PER_LONG)
++    unsigned long lomap[__IDMAP_LONGS];
++#undef __IDMAP_LONGS
++    void *objarray[1];
++};
++
++struct xnmap *xnmap_create(int nkeys,
++			   int reserve,
++			   int offset);
++
++void xnmap_delete(struct xnmap *map);
++
++int xnmap_enter(struct xnmap *map,
++		int key,
++		void *objaddr);
++
++int xnmap_remove(struct xnmap *map,
++		 int key);
++
++static inline void *xnmap_fetch_nocheck(struct xnmap *map, int key)
++{
++	int ofkey = key - map->offset;
++	return map->objarray[ofkey];
++}
++
++static inline void *xnmap_fetch(struct xnmap *map, int key)
++{
++	int ofkey = key - map->offset;
++
++	if (ofkey < 0 || ofkey >= map->nkeys)
++		return NULL;
++
++	return map->objarray[ofkey];
++}
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_MAP_H */
+--- linux/include/xenomai/cobalt/kernel/bufd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/bufd.h	2021-04-07 16:01:28.158632836 +0800
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_BUFD_H
++#define _COBALT_KERNEL_BUFD_H
++
++#include <linux/types.h>
++
++/**
++ * @addtogroup cobalt_core_bufd
++ *
++ * @{
++ */
++
++struct mm_struct;
++
++struct xnbufd {
++	caddr_t b_ptr;		/* src/dst buffer address */
++	size_t b_len;		/* total length of buffer */
++	off_t b_off;		/* # of bytes read/written */
++	struct mm_struct *b_mm;	/* src/dst address space */
++	caddr_t b_carry;	/* pointer to carry over area */
++	char b_buf[64];		/* fast carry over area */
++};
++
++void xnbufd_map_umem(struct xnbufd *bufd,
++		     void __user *ptr, size_t len);
++
++static inline void xnbufd_map_uread(struct xnbufd *bufd,
++				    const void __user *ptr, size_t len)
++{
++	xnbufd_map_umem(bufd, (void __user *)ptr, len);
++}
++
++static inline void xnbufd_map_uwrite(struct xnbufd *bufd,
++				     void __user *ptr, size_t len)
++{
++	xnbufd_map_umem(bufd, ptr, len);
++}
++
++ssize_t xnbufd_unmap_uread(struct xnbufd *bufd);
++
++ssize_t xnbufd_unmap_uwrite(struct xnbufd *bufd);
++
++void xnbufd_map_kmem(struct xnbufd *bufd,
++		     void *ptr, size_t len);
++
++static inline void xnbufd_map_kread(struct xnbufd *bufd,
++				    const void *ptr, size_t len)
++{
++	xnbufd_map_kmem(bufd, (void *)ptr, len);
++}
++
++static inline void xnbufd_map_kwrite(struct xnbufd *bufd,
++				     void *ptr, size_t len)
++{
++	xnbufd_map_kmem(bufd, ptr, len);
++}
++
++ssize_t xnbufd_unmap_kread(struct xnbufd *bufd);
++
++ssize_t xnbufd_unmap_kwrite(struct xnbufd *bufd);
++
++ssize_t xnbufd_copy_to_kmem(void *ptr,
++			    struct xnbufd *bufd, size_t len);
++
++ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd,
++			      void *from, size_t len);
++
++void xnbufd_invalidate(struct xnbufd *bufd);
++
++static inline void xnbufd_reset(struct xnbufd *bufd)
++{
++	bufd->b_off = 0;
++}
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_BUFD_H */
+--- linux/include/xenomai/cobalt/kernel/clock.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/clock.h	2021-04-07 16:01:28.153632843 +0800
+@@ -0,0 +1,361 @@
++/*
++ * Copyright (C) 2006,2007 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_CLOCK_H
++#define _COBALT_KERNEL_CLOCK_H
++
++#include <linux/ipipe.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/vfile.h>
++#include <cobalt/uapi/kernel/types.h>
++
++/**
++ * @addtogroup cobalt_core_clock
++ * @{
++ */
++
++struct xnsched;
++struct xntimerdata;
++
++struct xnclock_gravity {
++	unsigned long irq;
++	unsigned long kernel;
++	unsigned long user;
++};
++
++struct xnclock {
++	/** (ns) */
++	xnticks_t wallclock_offset;
++	/** (ns) */
++	xnticks_t resolution;
++	/** (raw clock ticks). */
++	struct xnclock_gravity gravity;
++	/** Clock name. */
++	const char *name;
++	struct {
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++		xnticks_t (*read_raw)(struct xnclock *clock);
++		xnticks_t (*read_monotonic)(struct xnclock *clock);
++		int (*set_time)(struct xnclock *clock,
++				const struct timespec *ts);
++		xnsticks_t (*ns_to_ticks)(struct xnclock *clock,
++					  xnsticks_t ns);
++		xnsticks_t (*ticks_to_ns)(struct xnclock *clock,
++					  xnsticks_t ticks);
++		xnsticks_t (*ticks_to_ns_rounded)(struct xnclock *clock,
++						  xnsticks_t ticks);
++		void (*program_local_shot)(struct xnclock *clock,
++					   struct xnsched *sched);
++		void (*program_remote_shot)(struct xnclock *clock,
++					    struct xnsched *sched);
++#endif
++		int (*adjust_time)(struct xnclock *clock,
++				   struct timex *tx);
++		int (*set_gravity)(struct xnclock *clock,
++				   const struct xnclock_gravity *p);
++		void (*reset_gravity)(struct xnclock *clock);
++#ifdef CONFIG_XENO_OPT_VFILE
++		void (*print_status)(struct xnclock *clock,
++				     struct xnvfile_regular_iterator *it);
++#endif
++	} ops;
++	/* Private section. */
++	struct xntimerdata *timerdata;
++	int id;
++#ifdef CONFIG_SMP
++	/** Possible CPU affinity of clock beat. */
++	cpumask_t affinity;
++#endif
++#ifdef CONFIG_XENO_OPT_STATS
++	struct xnvfile_snapshot timer_vfile;
++	struct xnvfile_rev_tag timer_revtag;
++	struct list_head timerq;
++	int nrtimers;
++#endif /* CONFIG_XENO_OPT_STATS */
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct xnvfile_regular vfile;
++#endif
++};
++
++struct xnclock_ratelimit_state {
++	xnticks_t interval;
++	xnticks_t begin;
++	int burst;
++	int printed;
++	int missed;
++};
++
++extern struct xnclock nkclock;
++
++extern unsigned long nktimerlat;
++
++int xnclock_register(struct xnclock *clock,
++		     const cpumask_t *affinity);
++
++void xnclock_deregister(struct xnclock *clock);
++
++void xnclock_tick(struct xnclock *clock);
++
++void xnclock_adjust(struct xnclock *clock,
++		    xnsticks_t delta);
++
++void xnclock_core_local_shot(struct xnsched *sched);
++
++void xnclock_core_remote_shot(struct xnsched *sched);
++
++xnsticks_t xnclock_core_ns_to_ticks(xnsticks_t ns);
++
++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks);
++
++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks);
++
++xnticks_t xnclock_core_read_monotonic(void);
++
++static inline xnticks_t xnclock_core_read_raw(void)
++{
++	unsigned long long t;
++	ipipe_read_tsc(t);
++	return t;
++}
++
++/* We use the Linux defaults */
++#define XN_RATELIMIT_INTERVAL	5000000000LL
++#define XN_RATELIMIT_BURST	10
++
++int __xnclock_ratelimit(struct xnclock_ratelimit_state *rs, const char *func);
++
++#define xnclock_ratelimit()	({					\
++	static struct xnclock_ratelimit_state __state = {		\
++		.interval	= XN_RATELIMIT_INTERVAL,		\
++		.burst		= XN_RATELIMIT_BURST,			\
++	};								\
++	__xnclock_ratelimit(&__state, __func__);			\
++})
++
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++
++static inline void xnclock_program_shot(struct xnclock *clock,
++					struct xnsched *sched)
++{
++	if (likely(clock == &nkclock))
++		xnclock_core_local_shot(sched);
++	else if (clock->ops.program_local_shot)
++		clock->ops.program_local_shot(clock, sched);
++}
++
++static inline void xnclock_remote_shot(struct xnclock *clock,
++				       struct xnsched *sched)
++{
++#ifdef CONFIG_SMP
++	if (likely(clock == &nkclock))
++		xnclock_core_remote_shot(sched);
++	else if (clock->ops.program_remote_shot)
++		clock->ops.program_remote_shot(clock, sched);
++#endif
++}
++
++static inline xnticks_t xnclock_read_raw(struct xnclock *clock)
++{
++	if (likely(clock == &nkclock))
++		return xnclock_core_read_raw();
++
++	return clock->ops.read_raw(clock);
++}
++
++static inline xnsticks_t xnclock_ns_to_ticks(struct xnclock *clock,
++					     xnsticks_t ns)
++{
++	if (likely(clock == &nkclock))
++		return xnclock_core_ns_to_ticks(ns);
++
++	return clock->ops.ns_to_ticks(clock, ns);
++}
++
++static inline xnsticks_t xnclock_ticks_to_ns(struct xnclock *clock,
++					     xnsticks_t ticks)
++{
++	if (likely(clock == &nkclock))
++		return xnclock_core_ticks_to_ns(ticks);
++
++	return clock->ops.ticks_to_ns(clock, ticks);
++}
++
++static inline xnsticks_t xnclock_ticks_to_ns_rounded(struct xnclock *clock,
++						     xnsticks_t ticks)
++{
++	if (likely(clock == &nkclock))
++		return xnclock_core_ticks_to_ns_rounded(ticks);
++
++	return clock->ops.ticks_to_ns_rounded(clock, ticks);
++}
++
++static inline xnticks_t xnclock_read_monotonic(struct xnclock *clock)
++{
++	if (likely(clock == &nkclock))
++		return xnclock_core_read_monotonic();
++
++	return clock->ops.read_monotonic(clock);
++}
++
++static inline int xnclock_set_time(struct xnclock *clock,
++				   const struct timespec *ts)
++{
++	if (likely(clock == &nkclock))
++		return -EINVAL;
++
++	return clock->ops.set_time(clock, ts);
++}
++
++#else /* !CONFIG_XENO_OPT_EXTCLOCK */
++
++static inline void xnclock_program_shot(struct xnclock *clock,
++					struct xnsched *sched)
++{
++	xnclock_core_local_shot(sched);
++}
++
++static inline void xnclock_remote_shot(struct xnclock *clock,
++				       struct xnsched *sched)
++{
++#ifdef CONFIG_SMP
++	xnclock_core_remote_shot(sched);
++#endif
++}
++
++static inline xnticks_t xnclock_read_raw(struct xnclock *clock)
++{
++	return xnclock_core_read_raw();
++}
++
++static inline xnsticks_t xnclock_ns_to_ticks(struct xnclock *clock,
++					     xnsticks_t ns)
++{
++	return xnclock_core_ns_to_ticks(ns);
++}
++
++static inline xnsticks_t xnclock_ticks_to_ns(struct xnclock *clock,
++					     xnsticks_t ticks)
++{
++	return xnclock_core_ticks_to_ns(ticks);
++}
++
++static inline xnsticks_t xnclock_ticks_to_ns_rounded(struct xnclock *clock,
++						     xnsticks_t ticks)
++{
++	return xnclock_core_ticks_to_ns_rounded(ticks);
++}
++
++static inline xnticks_t xnclock_read_monotonic(struct xnclock *clock)
++{
++	return xnclock_core_read_monotonic();
++}
++
++static inline int xnclock_set_time(struct xnclock *clock,
++				   const struct timespec *ts)
++{
++	/*
++	 * There is no way to change the core clock's idea of time.
++	 */
++	return -EINVAL;
++}
++
++#endif /* !CONFIG_XENO_OPT_EXTCLOCK */
++
++static inline int xnclock_adjust_time(struct xnclock *clock,
++				      struct timex *tx)
++{
++	if (clock->ops.adjust_time == NULL)
++		return -EOPNOTSUPP;
++
++	return clock->ops.adjust_time(clock, tx);
++}
++
++static inline xnticks_t xnclock_get_offset(struct xnclock *clock)
++{
++	return clock->wallclock_offset;
++}
++
++static inline xnticks_t xnclock_get_resolution(struct xnclock *clock)
++{
++	return clock->resolution; /* ns */
++}
++
++static inline void xnclock_set_resolution(struct xnclock *clock,
++					  xnticks_t resolution)
++{
++	clock->resolution = resolution; /* ns */
++}
++
++static inline int xnclock_set_gravity(struct xnclock *clock,
++				      const struct xnclock_gravity *gravity)
++{
++	if (clock->ops.set_gravity)
++		return clock->ops.set_gravity(clock, gravity);
++
++	return -EINVAL;
++}
++
++static inline void xnclock_reset_gravity(struct xnclock *clock)
++{
++	if (clock->ops.reset_gravity)
++		clock->ops.reset_gravity(clock);
++}
++
++#define xnclock_get_gravity(__clock, __type)  ((__clock)->gravity.__type)
++
++static inline xnticks_t xnclock_read_realtime(struct xnclock *clock)
++{
++	/*
++	 * Return an adjusted value of the monotonic time with the
++	 * translated system wallclock offset.
++	 */
++	return xnclock_read_monotonic(clock) + xnclock_get_offset(clock);
++}
++
++unsigned long long xnclock_divrem_billion(unsigned long long value,
++					  unsigned long *rem);
++
++xnticks_t xnclock_get_host_time(void);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++void xnclock_init_proc(void);
++
++void xnclock_cleanup_proc(void);
++
++static inline void xnclock_print_status(struct xnclock *clock,
++					struct xnvfile_regular_iterator *it)
++{
++	if (clock->ops.print_status)
++		clock->ops.print_status(clock, it);
++}
++
++#else
++static inline void xnclock_init_proc(void) { }
++static inline void xnclock_cleanup_proc(void) { }
++#endif
++
++void xnclock_update_freq(unsigned long long freq);
++
++int xnclock_init(unsigned long long freq);
++
++void xnclock_cleanup(void);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_CLOCK_H */
+--- linux/include/xenomai/cobalt/kernel/pipe.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/pipe.h	2021-04-07 16:01:28.149632849 +0800
+@@ -0,0 +1,136 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA
++ * 02139, USA; either version 2 of the License, or (at your option)
++ * any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_PIPE_H
++#define _COBALT_KERNEL_PIPE_H
++
++#include <linux/types.h>
++#include <linux/poll.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/uapi/kernel/pipe.h>
++
++#define XNPIPE_NDEVS      CONFIG_XENO_OPT_PIPE_NRDEV
++#define XNPIPE_DEV_MAJOR  150
++
++#define XNPIPE_KERN_CONN         0x1
++#define XNPIPE_KERN_LCLOSE       0x2
++#define XNPIPE_USER_CONN         0x4
++#define XNPIPE_USER_SIGIO        0x8
++#define XNPIPE_USER_WREAD        0x10
++#define XNPIPE_USER_WREAD_READY  0x20
++#define XNPIPE_USER_WSYNC        0x40
++#define XNPIPE_USER_WSYNC_READY  0x80
++#define XNPIPE_USER_LCONN        0x100
++
++#define XNPIPE_USER_ALL_WAIT \
++(XNPIPE_USER_WREAD|XNPIPE_USER_WSYNC)
++
++#define XNPIPE_USER_ALL_READY \
++(XNPIPE_USER_WREAD_READY|XNPIPE_USER_WSYNC_READY)
++
++struct xnpipe_mh {
++	size_t size;
++	size_t rdoff;
++	struct list_head link;
++};
++
++struct xnpipe_state;
++
++struct xnpipe_operations {
++	void (*output)(struct xnpipe_mh *mh, void *xstate);
++	int (*input)(struct xnpipe_mh *mh, int retval, void *xstate);
++	void *(*alloc_ibuf)(size_t size, void *xstate);
++	void (*free_ibuf)(void *buf, void *xstate);
++	void (*free_obuf)(void *buf, void *xstate);
++	void (*release)(void *xstate);
++};
++
++struct xnpipe_state {
++	struct list_head slink;	/* Link on sleep queue */
++	struct list_head alink;	/* Link on async queue */
++
++	struct list_head inq;		/* From user-space to kernel */
++	int nrinq;
++	struct list_head outq;		/* From kernel to user-space */
++	int nroutq;
++	struct xnsynch synchbase;
++	struct xnpipe_operations ops;
++	void *xstate;		/* Extra state managed by caller */
++
++	/* Linux kernel part */
++	unsigned long status;
++	struct fasync_struct *asyncq;
++	wait_queue_head_t readq;	/* open/read/poll waiters */
++	wait_queue_head_t syncq;	/* sync waiters */
++	int wcount;			/* number of waiters on this minor */
++	size_t ionrd;
++};
++
++extern struct xnpipe_state xnpipe_states[];
++
++#define xnminor_from_state(s) (s - xnpipe_states)
++
++#ifdef CONFIG_XENO_OPT_PIPE
++int xnpipe_mount(void);
++void xnpipe_umount(void);
++#else /* !CONFIG_XENO_OPT_PIPE */
++static inline int xnpipe_mount(void) { return 0; }
++static inline void xnpipe_umount(void) { }
++#endif /* !CONFIG_XENO_OPT_PIPE */
++
++/* Entry points of the kernel interface. */
++
++int xnpipe_connect(int minor,
++		   struct xnpipe_operations *ops, void *xstate);
++
++int xnpipe_disconnect(int minor);
++
++ssize_t xnpipe_send(int minor,
++		    struct xnpipe_mh *mh, size_t size, int flags);
++
++ssize_t xnpipe_mfixup(int minor, struct xnpipe_mh *mh, ssize_t size);
++
++ssize_t xnpipe_recv(int minor,
++		    struct xnpipe_mh **pmh, xnticks_t timeout);
++
++int xnpipe_flush(int minor, int mode);
++
++int xnpipe_pollstate(int minor, unsigned int *mask_r);
++
++static inline unsigned int __xnpipe_pollstate(int minor)
++{
++	struct xnpipe_state *state = xnpipe_states + minor;
++	unsigned int mask = POLLOUT;
++
++	if (!list_empty(&state->inq))
++		mask |= POLLIN;
++
++	return mask;
++}
++
++static inline char *xnpipe_m_data(struct xnpipe_mh *mh)
++{
++	return (char *)(mh + 1);
++}
++
++#define xnpipe_m_size(mh) ((mh)->size)
++
++#define xnpipe_m_rdoff(mh) ((mh)->rdoff)
++
++#endif /* !_COBALT_KERNEL_PIPE_H */
+--- linux/include/xenomai/cobalt/kernel/sched-tp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-tp.h	2021-04-07 16:01:28.144632856 +0800
+@@ -0,0 +1,99 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_TP_H
++#define _COBALT_KERNEL_SCHED_TP_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-tp.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++
++#define XNSCHED_TP_MIN_PRIO	1
++#define XNSCHED_TP_MAX_PRIO	255
++#define XNSCHED_TP_NR_PRIO	\
++	(XNSCHED_TP_MAX_PRIO - XNSCHED_TP_MIN_PRIO + 1)
++
++extern struct xnsched_class xnsched_class_tp;
++
++struct xnsched_tp_window {
++	xnticks_t w_offset;
++	int w_part;
++};
++
++struct xnsched_tp_schedule {
++	int pwin_nr;
++	xnticks_t tf_duration;
++	atomic_t refcount;
++	struct xnsched_tp_window pwins[0];
++};
++
++struct xnsched_tp {
++	struct xnsched_tpslot {
++		/** Per-partition runqueue. */
++		xnsched_queue_t runnable;
++	} partitions[CONFIG_XENO_OPT_SCHED_TP_NRPART];
++	/** Idle slot for passive windows. */
++	struct xnsched_tpslot idle;
++	/** Active partition slot */
++	struct xnsched_tpslot *tps;
++	/** Time frame timer */
++	struct xntimer tf_timer;
++	/** Global partition schedule */
++	struct xnsched_tp_schedule *gps;
++	/** Window index of next partition */
++	int wnext;
++	/** Start of next time frame */
++	xnticks_t tf_start;
++	/** Assigned thread queue */
++	struct list_head threads;
++};
++
++static inline int xnsched_tp_init_thread(struct xnthread *thread)
++{
++	thread->tps = NULL;
++
++	return 0;
++}
++
++struct xnsched_tp_schedule *
++xnsched_tp_set_schedule(struct xnsched *sched,
++			struct xnsched_tp_schedule *gps);
++
++void xnsched_tp_start_schedule(struct xnsched *sched);
++
++void xnsched_tp_stop_schedule(struct xnsched *sched);
++
++int xnsched_tp_get_partition(struct xnsched *sched);
++
++struct xnsched_tp_schedule *
++xnsched_tp_get_schedule(struct xnsched *sched);
++
++void xnsched_tp_put_schedule(struct xnsched_tp_schedule *gps);
++
++#endif /* CONFIG_XENO_OPT_SCHED_TP */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_TP_H */
+--- linux/include/xenomai/cobalt/kernel/registry.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/registry.h	2021-04-07 16:01:28.139632863 +0800
+@@ -0,0 +1,200 @@
++/*
++ * Copyright (C) 2004 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_REGISTRY_H
++#define _COBALT_KERNEL_REGISTRY_H
++
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/vfile.h>
++
++/**
++ * @addtogroup cobalt_core_registry
++ *
++ * @{
++ */
++struct xnpnode;
++
++struct xnobject {
++	void *objaddr;
++	const char *key;	  /* !< Hash key. May be NULL if anonynous. */
++	unsigned long cstamp;		  /* !< Creation stamp. */
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct xnpnode *pnode;	/* !< v-file information class. */
++	union {
++		struct {
++			struct xnvfile_rev_tag tag;
++			struct xnvfile_snapshot file;
++		} vfsnap; /* !< virtual snapshot file. */
++		struct xnvfile_regular vfreg; /* !< virtual regular file */
++		struct xnvfile_link link;     /* !< virtual link. */
++	} vfile_u;
++	struct xnvfile *vfilp;
++#endif /* CONFIG_XENO_OPT_VFILE */
++	struct hlist_node hlink; /* !< Link in h-table */
++	struct list_head link;
++};
++
++int xnregistry_init(void);
++
++void xnregistry_cleanup(void);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++#define XNOBJECT_EXPORT_SCHEDULED  ((struct xnvfile *)1L)
++#define XNOBJECT_EXPORT_INPROGRESS ((struct xnvfile *)2L)
++#define XNOBJECT_EXPORT_ABORTED    ((struct xnvfile *)3L)
++
++struct xnptree {
++	const char *dirname;
++	/* hidden */
++	int entries;
++	struct xnvfile_directory vdir;
++};
++
++#define DEFINE_XNPTREE(__var, __name)		\
++	struct xnptree __var = {		\
++		.dirname = __name,		\
++		.entries = 0,			\
++		.vdir = xnvfile_nodir,		\
++	}
++
++struct xnpnode_ops {
++	int (*export)(struct xnobject *object, struct xnpnode *pnode);
++	void (*unexport)(struct xnobject *object, struct xnpnode *pnode);
++	void (*touch)(struct xnobject *object);
++};
++
++struct xnpnode {
++	const char *dirname;
++	struct xnptree *root;
++	struct xnpnode_ops *ops;
++	/* hidden */
++	int entries;
++	struct xnvfile_directory vdir;
++};
++
++struct xnpnode_snapshot {
++	struct xnpnode node;
++	struct xnvfile_snapshot_template vfile;
++};
++
++struct xnpnode_regular {
++	struct xnpnode node;
++	struct xnvfile_regular_template vfile;
++};
++
++struct xnpnode_link {
++	struct xnpnode node;
++	char *(*target)(void *obj);
++};
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++#define DEFINE_XNPTREE(__var, __name);
++
++/* Placeholders. */
++
++struct xnpnode {
++	const char *dirname;
++};
++
++struct xnpnode_snapshot {
++	struct xnpnode node;
++};
++
++struct xnpnode_regular {
++	struct xnpnode node;
++};
++
++struct xnpnode_link {
++	struct xnpnode node;
++};
++
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++/* Public interface. */
++
++extern struct xnobject *registry_obj_slots;
++
++static inline struct xnobject *xnregistry_validate(xnhandle_t handle)
++{
++	struct xnobject *object;
++	/*
++	 * Careful: a removed object which is still in flight to be
++	 * unexported carries a NULL objaddr, so we have to check this
++	 * as well.
++	 */
++	handle = xnhandle_get_index(handle);
++	if (likely(handle && handle < CONFIG_XENO_OPT_REGISTRY_NRSLOTS)) {
++		object = &registry_obj_slots[handle];
++		return object->objaddr ? object : NULL;
++	}
++
++	return NULL;
++}
++
++static inline const char *xnregistry_key(xnhandle_t handle)
++{
++	struct xnobject *object = xnregistry_validate(handle);
++	return object ? object->key : NULL;
++}
++
++int xnregistry_enter(const char *key,
++		     void *objaddr,
++		     xnhandle_t *phandle,
++		     struct xnpnode *pnode);
++
++static inline int
++xnregistry_enter_anon(void *objaddr, xnhandle_t *phandle)
++{
++	return xnregistry_enter(NULL, objaddr, phandle, NULL);
++}
++
++int xnregistry_bind(const char *key,
++		    xnticks_t timeout,
++		    int timeout_mode,
++		    xnhandle_t *phandle);
++
++int xnregistry_remove(xnhandle_t handle);
++
++static inline
++void *xnregistry_lookup(xnhandle_t handle,
++			unsigned long *cstamp_r)
++{
++	struct xnobject *object = xnregistry_validate(handle);
++
++	if (object == NULL)
++		return NULL;
++
++	if (cstamp_r)
++		*cstamp_r = object->cstamp;
++
++	return object->objaddr;
++}
++
++int xnregistry_unlink(const char *key);
++
++unsigned xnregistry_hash_size(void);
++
++extern struct xnpnode_ops xnregistry_vfsnap_ops;
++
++extern struct xnpnode_ops xnregistry_vlink_ops;
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_REGISTRY_H */
+--- linux/include/xenomai/cobalt/kernel/trace.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/trace.h	2021-04-07 16:01:28.134632870 +0800
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2006 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_TRACE_H
++#define _COBALT_KERNEL_TRACE_H
++
++#include <linux/types.h>
++#include <linux/ipipe_trace.h>
++#include <cobalt/uapi/kernel/trace.h>
++
++static inline int xntrace_max_begin(unsigned long v)
++{
++	ipipe_trace_begin(v);
++	return 0;
++}
++
++static inline int xntrace_max_end(unsigned long v)
++{
++	ipipe_trace_end(v);
++	return 0;
++}
++
++static inline int xntrace_max_reset(void)
++{
++	ipipe_trace_max_reset();
++	return 0;
++}
++
++static inline int xntrace_user_start(void)
++{
++	return ipipe_trace_frozen_reset();
++}
++
++static inline int xntrace_user_stop(unsigned long v)
++{
++	ipipe_trace_freeze(v);
++	return 0;
++}
++
++static inline int xntrace_user_freeze(unsigned long v, int once)
++{
++	int ret = 0;
++
++	if (!once)
++		ret = ipipe_trace_frozen_reset();
++
++	ipipe_trace_freeze(v);
++
++	return ret;
++}
++
++static inline int xntrace_special(unsigned char id, unsigned long v)
++{
++	ipipe_trace_special(id, v);
++	return 0;
++}
++
++static inline int xntrace_special_u64(unsigned char id,
++				      unsigned long long v)
++{
++	ipipe_trace_special(id, (unsigned long)(v >> 32));
++	ipipe_trace_special(id, (unsigned long)(v & 0xFFFFFFFF));
++	return 0;
++}
++
++static inline int xntrace_pid(pid_t pid, short prio)
++{
++	ipipe_trace_pid(pid, prio);
++	return 0;
++}
++
++static inline int xntrace_tick(unsigned long delay_ticks)
++{
++	ipipe_trace_event(0, delay_ticks);
++	return 0;
++}
++
++static inline int xntrace_panic_freeze(void)
++{
++	ipipe_trace_panic_freeze();
++	return 0;
++}
++
++static inline int xntrace_panic_dump(void)
++{
++	ipipe_trace_panic_dump();
++	return 0;
++}
++
++#endif /* !_COBALT_KERNEL_TRACE_H */
+--- linux/include/xenomai/cobalt/kernel/init.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/init.h	2021-04-07 16:01:28.130632876 +0800
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_INIT_H
++#define _COBALT_KERNEL_INIT_H
++
++#include <linux/atomic.h>
++#include <linux/notifier.h>
++#include <cobalt/uapi/corectl.h>
++
++extern atomic_t cobalt_runstate;
++
++static inline enum cobalt_run_states realtime_core_state(void)
++{
++	return atomic_read(&cobalt_runstate);
++}
++
++static inline int realtime_core_enabled(void)
++{
++	return atomic_read(&cobalt_runstate) != COBALT_STATE_DISABLED;
++}
++
++static inline int realtime_core_running(void)
++{
++	return atomic_read(&cobalt_runstate) == COBALT_STATE_RUNNING;
++}
++
++static inline void set_realtime_core_state(enum cobalt_run_states state)
++{
++	atomic_set(&cobalt_runstate, state);
++}
++
++void cobalt_add_state_chain(struct notifier_block *nb);
++
++void cobalt_remove_state_chain(struct notifier_block *nb);
++
++void cobalt_call_state_chain(enum cobalt_run_states newstate);
++
++#endif /* !_COBALT_KERNEL_INIT_H_ */
+--- linux/include/xenomai/cobalt/kernel/select.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/select.h	2021-04-07 16:01:28.125632883 +0800
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (C) 2008 Efixo <gilles.chanteperdrix@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SELECT_H
++#define _COBALT_KERNEL_SELECT_H
++
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/thread.h>
++
++/**
++ * @addtogroup cobalt_core_select
++ * @{
++ */
++
++#define XNSELECT_READ      0
++#define XNSELECT_WRITE     1
++#define XNSELECT_EXCEPT    2
++#define XNSELECT_MAX_TYPES 3
++
++struct xnselector {
++	struct xnsynch synchbase;
++	struct fds {
++		fd_set expected;
++		fd_set pending;
++	} fds [XNSELECT_MAX_TYPES];
++	struct list_head destroy_link;
++	struct list_head bindings; /* only used by xnselector_destroy */
++};
++
++#define __NFDBITS__	(8 * sizeof(unsigned long))
++#define __FDSET_LONGS__	(__FD_SETSIZE/__NFDBITS__)
++#define	__FDELT__(d)	((d) / __NFDBITS__)
++#define	__FDMASK__(d)	(1UL << ((d) % __NFDBITS__))
++
++static inline void __FD_SET__(unsigned long __fd, __kernel_fd_set *__fdsetp)
++{
++        unsigned long __tmp = __fd / __NFDBITS__;
++        unsigned long __rem = __fd % __NFDBITS__;
++        __fdsetp->fds_bits[__tmp] |= (1UL<<__rem);
++}
++
++static inline void __FD_CLR__(unsigned long __fd, __kernel_fd_set *__fdsetp)
++{
++        unsigned long __tmp = __fd / __NFDBITS__;
++        unsigned long __rem = __fd % __NFDBITS__;
++        __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem);
++}
++
++static inline int __FD_ISSET__(unsigned long __fd, const __kernel_fd_set *__p)
++{
++        unsigned long __tmp = __fd / __NFDBITS__;
++        unsigned long __rem = __fd % __NFDBITS__;
++        return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0;
++}
++
++static inline void __FD_ZERO__(__kernel_fd_set *__p)
++{
++	unsigned long *__tmp = __p->fds_bits;
++	int __i;
++
++	__i = __FDSET_LONGS__;
++	while (__i) {
++		__i--;
++		*__tmp = 0;
++		__tmp++;
++	}
++}
++
++struct xnselect {
++	struct list_head bindings;
++};
++
++#define DECLARE_XNSELECT(name) struct xnselect name
++
++struct xnselect_binding {
++	struct xnselector *selector;
++	struct xnselect *fd;
++	unsigned int type;
++	unsigned int bit_index;
++	struct list_head link;  /* link in selected fds list. */
++	struct list_head slink; /* link in selector list */
++};
++
++void xnselect_init(struct xnselect *select_block);
++
++int xnselect_bind(struct xnselect *select_block,
++		  struct xnselect_binding *binding,
++		  struct xnselector *selector,
++		  unsigned int type,
++		  unsigned int bit_index,
++		  unsigned int state);
++
++int __xnselect_signal(struct xnselect *select_block, unsigned int state);
++
++/**
++ * Signal a file descriptor state change.
++ *
++ * @param select_block pointer to an @a xnselect structure representing the file
++ * descriptor whose state changed;
++ * @param state new value of the state.
++ *
++ * @retval 1 if rescheduling is needed;
++ * @retval 0 otherwise.
++ */
++static inline int
++xnselect_signal(struct xnselect *select_block, unsigned int state)
++{
++	if (!list_empty(&select_block->bindings))
++		return __xnselect_signal(select_block, state);
++
++	return 0;
++}
++
++void xnselect_destroy(struct xnselect *select_block);
++
++int xnselector_init(struct xnselector *selector);
++
++int xnselect(struct xnselector *selector,
++	     fd_set *out_fds[XNSELECT_MAX_TYPES],
++	     fd_set *in_fds[XNSELECT_MAX_TYPES],
++	     int nfds,
++	     xnticks_t timeout, xntmode_t timeout_mode);
++
++void xnselector_destroy(struct xnselector *selector);
++
++int xnselect_mount(void);
++
++int xnselect_umount(void);
++
++/** @} */
++
++#endif /* _COBALT_KERNEL_SELECT_H */
+--- linux/include/xenomai/cobalt/kernel/compat.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/compat.h	2021-04-07 16:01:28.120632890 +0800
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_COMPAT_H
++#define _COBALT_KERNEL_COMPAT_H
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++
++#include <linux/compat.h>
++#include <net/compat.h>
++#include <asm/xenomai/wrappers.h>
++#include <cobalt/uapi/sched.h>
++
++struct mq_attr;
++
++struct __compat_sched_ss_param {
++	int __sched_low_priority;
++	struct compat_timespec __sched_repl_period;
++	struct compat_timespec __sched_init_budget;
++	int __sched_max_repl;
++};
++
++struct __compat_sched_rr_param {
++	struct compat_timespec __sched_rr_quantum;
++};
++
++struct compat_sched_param_ex {
++	int sched_priority;
++	union {
++		struct __compat_sched_ss_param ss;
++		struct __compat_sched_rr_param rr;
++		struct __sched_tp_param tp;
++		struct __sched_quota_param quota;
++	} sched_u;
++};
++
++struct compat_mq_attr {
++	compat_long_t mq_flags;
++	compat_long_t mq_maxmsg;
++	compat_long_t mq_msgsize;
++	compat_long_t mq_curmsgs;
++};
++
++struct compat_sched_tp_window {
++	struct compat_timespec offset;
++	struct compat_timespec duration;
++	int ptid;
++};
++
++struct __compat_sched_config_tp {
++	int op;
++	int nr_windows;
++	struct compat_sched_tp_window windows[0];
++};
++
++union compat_sched_config {
++	struct __compat_sched_config_tp tp;
++	struct __sched_config_quota quota;
++};
++
++#define compat_sched_tp_confsz(nr_win) \
++  (sizeof(struct __compat_sched_config_tp) + nr_win * sizeof(struct compat_sched_tp_window))
++
++typedef struct {
++	compat_ulong_t fds_bits[__FD_SETSIZE / (8 * sizeof(compat_long_t))];
++} compat_fd_set;
++
++struct compat_rtdm_mmap_request {
++	u64 offset;
++	compat_size_t length;
++	int prot;
++	int flags;
++};
++
++int sys32_get_timespec(struct timespec *ts,
++		       const struct compat_timespec __user *cts);
++
++int sys32_put_timespec(struct compat_timespec __user *cts,
++		       const struct timespec *ts);
++
++int sys32_get_itimerspec(struct itimerspec *its,
++			 const struct compat_itimerspec __user *cits);
++
++int sys32_put_itimerspec(struct compat_itimerspec __user *cits,
++			 const struct itimerspec *its);
++
++int sys32_get_timeval(struct timeval *tv,
++		      const struct compat_timeval __user *ctv);
++
++int sys32_put_timeval(struct compat_timeval __user *ctv,
++		      const struct timeval *tv);
++
++int sys32_get_timex(struct timex *tx,
++		    const struct compat_timex __user *ctx);
++
++int sys32_put_timex(struct compat_timex __user *ctx,
++		    const struct timex *tx);
++
++ssize_t sys32_get_fdset(fd_set *fds, const compat_fd_set __user *cfds,
++			size_t cfdsize);
++
++ssize_t sys32_put_fdset(compat_fd_set __user *cfds, const fd_set *fds,
++			size_t fdsize);
++
++int sys32_get_param_ex(int policy,
++		       struct sched_param_ex *p,
++		       const struct compat_sched_param_ex __user *u_cp);
++
++int sys32_put_param_ex(int policy,
++		       struct compat_sched_param_ex __user *u_cp,
++		       const struct sched_param_ex *p);
++
++int sys32_get_mqattr(struct mq_attr *ap,
++		     const struct compat_mq_attr __user *u_cap);
++
++int sys32_put_mqattr(struct compat_mq_attr __user *u_cap,
++		     const struct mq_attr *ap);
++
++int sys32_get_sigevent(struct sigevent *ev,
++		       const struct compat_sigevent *__user u_cev);
++
++int sys32_get_sigset(sigset_t *set, const compat_sigset_t *u_cset);
++
++int sys32_put_sigset(compat_sigset_t *u_cset, const sigset_t *set);
++
++int sys32_get_sigval(union sigval *val, const union compat_sigval *u_cval);
++
++int sys32_put_siginfo(void __user *u_si, const struct siginfo *si,
++		      int overrun);
++
++int sys32_get_msghdr(struct user_msghdr *msg,
++		     const struct compat_msghdr __user *u_cmsg);
++
++int sys32_get_mmsghdr(struct mmsghdr *mmsg,
++		      const struct compat_mmsghdr __user *u_cmmsg);
++
++int sys32_put_msghdr(struct compat_msghdr __user *u_cmsg,
++		     const struct user_msghdr *msg);
++
++int sys32_put_mmsghdr(struct compat_mmsghdr __user *u_cmmsg,
++		     const struct mmsghdr *mmsg);
++
++int sys32_get_iovec(struct iovec *iov,
++		    const struct compat_iovec __user *ciov,
++		    int ciovlen);
++
++int sys32_put_iovec(struct compat_iovec __user *u_ciov,
++		    const struct iovec *iov,
++		    int iovlen);
++
++#endif /* CONFIG_XENO_ARCH_SYS3264 */
++
++#endif /* !_COBALT_KERNEL_COMPAT_H */
+--- linux/include/xenomai/cobalt/kernel/timer.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/timer.h	2021-04-07 16:01:28.116632896 +0800
+@@ -0,0 +1,566 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++
++#ifndef _COBALT_KERNEL_TIMER_H
++#define _COBALT_KERNEL_TIMER_H
++
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/stat.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/ancillaries.h>
++#include <asm/xenomai/wrappers.h>
++
++/**
++ * @addtogroup cobalt_core_timer
++ * @{
++ */
++#define XN_INFINITE   ((xnticks_t)0)
++#define XN_NONBLOCK   ((xnticks_t)-1)
++
++/* Timer modes */
++typedef enum xntmode {
++	XN_RELATIVE,
++	XN_ABSOLUTE,
++	XN_REALTIME
++} xntmode_t;
++
++/* Timer status */
++#define XNTIMER_DEQUEUED  0x00000001
++#define XNTIMER_KILLED    0x00000002
++#define XNTIMER_PERIODIC  0x00000004
++#define XNTIMER_REALTIME  0x00000008
++#define XNTIMER_FIRED     0x00000010
++#define XNTIMER_RUNNING   0x00000020
++#define XNTIMER_KGRAVITY  0x00000040
++#define XNTIMER_UGRAVITY  0x00000080
++#define XNTIMER_IGRAVITY  0	     /* most conservative */
++
++#define XNTIMER_GRAVITY_MASK	(XNTIMER_KGRAVITY|XNTIMER_UGRAVITY)
++#define XNTIMER_INIT_MASK	XNTIMER_GRAVITY_MASK
++
++/* These flags are available to the real-time interfaces */
++#define XNTIMER_SPARE0  0x01000000
++#define XNTIMER_SPARE1  0x02000000
++#define XNTIMER_SPARE2  0x04000000
++#define XNTIMER_SPARE3  0x08000000
++#define XNTIMER_SPARE4  0x10000000
++#define XNTIMER_SPARE5  0x20000000
++#define XNTIMER_SPARE6  0x40000000
++#define XNTIMER_SPARE7  0x80000000
++
++/* Timer priorities */
++#define XNTIMER_LOPRIO  (-999999999)
++#define XNTIMER_STDPRIO 0
++#define XNTIMER_HIPRIO  999999999
++
++struct xntlholder {
++	struct list_head link;
++	xnticks_t key;
++	int prio;
++};
++
++#define xntlholder_date(h)	((h)->key)
++#define xntlholder_prio(h)	((h)->prio)
++#define xntlist_init(q)		INIT_LIST_HEAD(q)
++#define xntlist_empty(q)	list_empty(q)
++
++static inline struct xntlholder *xntlist_head(struct list_head *q)
++{
++	if (list_empty(q))
++		return NULL;
++
++	return list_first_entry(q, struct xntlholder, link);
++}
++
++static inline struct xntlholder *xntlist_next(struct list_head *q,
++					      struct xntlholder *h)
++{
++	if (list_is_last(&h->link, q))
++		return NULL;
++
++	return list_entry(h->link.next, struct xntlholder, link);
++}
++
++static inline struct xntlholder *xntlist_second(struct list_head *q,
++	struct xntlholder *h)
++{
++	return xntlist_next(q, h);
++}
++
++static inline void xntlist_insert(struct list_head *q, struct xntlholder *holder)
++{
++	struct xntlholder *p;
++
++	if (list_empty(q)) {
++		list_add(&holder->link, q);
++		return;
++	}
++
++	/*
++	 * Insert the new timer at the proper place in the single
++	 * queue. O(N) here, but this is the price for the increased
++	 * flexibility...
++	 */
++	list_for_each_entry_reverse(p, q, link) {
++		if ((xnsticks_t) (holder->key - p->key) > 0 ||
++		    (holder->key == p->key && holder->prio <= p->prio))
++		  break;
++	}
++
++	list_add(&holder->link, &p->link);
++}
++
++#define xntlist_remove(q, h)			\
++	do {					\
++		(void)(q);			\
++		list_del(&(h)->link);		\
++	} while (0)
++
++#if defined(CONFIG_XENO_OPT_TIMER_RBTREE)
++
++#include <linux/rbtree.h>
++
++typedef struct {
++	unsigned long long date;
++	unsigned prio;
++	struct rb_node link;
++} xntimerh_t;
++
++#define xntimerh_date(h) ((h)->date)
++#define xntimerh_prio(h) ((h)->prio)
++#define xntimerh_init(h) do { } while (0)
++
++typedef struct {
++	struct rb_root root;
++	xntimerh_t *head;
++} xntimerq_t;
++
++#define xntimerq_init(q)			\
++	({					\
++		xntimerq_t *_q = (q);		\
++		_q->root = RB_ROOT;		\
++		_q->head = NULL;		\
++	})
++
++#define xntimerq_destroy(q) do { } while (0)
++#define xntimerq_empty(q) ((q)->head == NULL)
++
++#define xntimerq_head(q) ((q)->head)
++
++#define xntimerq_next(q, h)						\
++	({								\
++		struct rb_node *_node = rb_next(&(h)->link);		\
++		_node ? (container_of(_node, xntimerh_t, link)) : NULL; \
++	})
++
++#define xntimerq_second(q, h) xntimerq_next(q, h)
++
++void xntimerq_insert(xntimerq_t *q, xntimerh_t *holder);
++
++static inline void xntimerq_remove(xntimerq_t *q, xntimerh_t *holder)
++{
++	if (holder == q->head)
++		q->head = xntimerq_second(q, holder);
++
++	rb_erase(&holder->link, &q->root);
++}
++
++typedef struct { } xntimerq_it_t;
++
++#define xntimerq_it_begin(q,i)	((void) (i), xntimerq_head(q))
++#define xntimerq_it_next(q,i,h) ((void) (i), xntimerq_next((q),(h)))
++
++#else /* CONFIG_XENO_OPT_TIMER_LIST */
++
++typedef struct xntlholder xntimerh_t;
++
++#define xntimerh_date(h)       xntlholder_date(h)
++#define xntimerh_prio(h)       xntlholder_prio(h)
++#define xntimerh_init(h)       do { } while (0)
++
++typedef struct list_head xntimerq_t;
++
++#define xntimerq_init(q)        xntlist_init(q)
++#define xntimerq_destroy(q)     do { } while (0)
++#define xntimerq_empty(q)       xntlist_empty(q)
++#define xntimerq_head(q)        xntlist_head(q)
++#define xntimerq_second(q, h)   xntlist_second((q),(h))
++#define xntimerq_insert(q, h)   xntlist_insert((q),(h))
++#define xntimerq_remove(q, h)   xntlist_remove((q),(h))
++
++typedef struct { } xntimerq_it_t;
++
++#define xntimerq_it_begin(q,i)  ((void) (i), xntlist_head(q))
++#define xntimerq_it_next(q,i,h) ((void) (i), xntlist_next((q),(h)))
++
++#endif /* CONFIG_XENO_OPT_TIMER_LIST */
++
++struct xnsched;
++
++struct xntimerdata {
++	xntimerq_t q;
++};
++
++static inline struct xntimerdata *
++xnclock_percpu_timerdata(struct xnclock *clock, int cpu)
++{
++	return per_cpu_ptr(clock->timerdata, cpu);
++}
++
++static inline struct xntimerdata *
++xnclock_this_timerdata(struct xnclock *clock)
++{
++	return raw_cpu_ptr(clock->timerdata);
++}
++
++struct xntimer {
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++	struct xnclock *clock;
++#endif
++	/** Link in timers list. */
++	xntimerh_t aplink;
++	struct list_head adjlink;
++	/** Timer status. */
++	unsigned long status;
++	/** Periodic interval (clock ticks, 0 == one shot). */
++	xnticks_t interval;
++	/** Periodic interval (nanoseconds, 0 == one shot). */
++	xnticks_t interval_ns;
++	/** Count of timer ticks in periodic mode. */
++	xnticks_t periodic_ticks;
++	/** First tick date in periodic mode. */
++	xnticks_t start_date;
++	/** Date of next periodic release point (timer ticks). */
++	xnticks_t pexpect_ticks;
++	/** Sched structure to which the timer is attached. */
++	struct xnsched *sched;
++	/** Timeout handler. */
++	void (*handler)(struct xntimer *timer);
++#ifdef CONFIG_XENO_OPT_STATS
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++	struct xnclock *tracker;
++#endif
++	/** Timer name to be displayed. */
++	char name[XNOBJECT_NAME_LEN];
++	/** Timer holder in timebase. */
++	struct list_head next_stat;
++	/** Number of timer schedules. */
++	xnstat_counter_t scheduled;
++	/** Number of timer events. */
++	xnstat_counter_t fired;
++#endif /* CONFIG_XENO_OPT_STATS */
++};
++
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++
++static inline struct xnclock *xntimer_clock(struct xntimer *timer)
++{
++	return timer->clock;
++}
++
++void xntimer_set_clock(struct xntimer *timer,
++		       struct xnclock *newclock);
++
++#else /* !CONFIG_XENO_OPT_EXTCLOCK */
++
++static inline struct xnclock *xntimer_clock(struct xntimer *timer)
++{
++	return &nkclock;
++}
++
++static inline void xntimer_set_clock(struct xntimer *timer,
++				     struct xnclock *newclock)
++{
++	XENO_BUG_ON(COBALT, newclock != &nkclock);
++}
++
++#endif /* !CONFIG_XENO_OPT_EXTCLOCK */
++
++#ifdef CONFIG_SMP
++static inline struct xnsched *xntimer_sched(struct xntimer *timer)
++{
++	return timer->sched;
++}
++#else /* !CONFIG_SMP */
++#define xntimer_sched(t)	xnsched_current()
++#endif /* !CONFIG_SMP */
++
++#define xntimer_percpu_queue(__timer)					\
++	({								\
++		struct xntimerdata *tmd;				\
++		int cpu = xnsched_cpu((__timer)->sched);		\
++		tmd = xnclock_percpu_timerdata(xntimer_clock(__timer), cpu); \
++		&tmd->q;						\
++	})
++
++static inline unsigned long xntimer_gravity(struct xntimer *timer)
++{
++	struct xnclock *clock = xntimer_clock(timer);
++
++	if (timer->status & XNTIMER_KGRAVITY)
++		return clock->gravity.kernel;
++
++	if (timer->status & XNTIMER_UGRAVITY)
++		return clock->gravity.user;
++
++	return clock->gravity.irq;
++}
++
++static inline void xntimer_update_date(struct xntimer *timer)
++{
++	xntimerh_date(&timer->aplink) = timer->start_date
++		+ xnclock_ns_to_ticks(xntimer_clock(timer),
++			timer->periodic_ticks * timer->interval_ns)
++		- xntimer_gravity(timer);
++}
++
++static inline xnticks_t xntimer_pexpect(struct xntimer *timer)
++{
++	return timer->start_date +
++		xnclock_ns_to_ticks(xntimer_clock(timer),
++				timer->pexpect_ticks * timer->interval_ns);
++}
++
++static inline void xntimer_set_priority(struct xntimer *timer,
++					int prio)
++{
++	xntimerh_prio(&timer->aplink) = prio;
++}
++
++static inline int xntimer_active_p(struct xntimer *timer)
++{
++	return timer->sched != NULL;
++}
++
++static inline int xntimer_running_p(struct xntimer *timer)
++{
++	return (timer->status & XNTIMER_RUNNING) != 0;
++}
++
++static inline int xntimer_fired_p(struct xntimer *timer)
++{
++	return (timer->status & XNTIMER_FIRED) != 0;
++}
++
++static inline int xntimer_periodic_p(struct xntimer *timer)
++{
++	return (timer->status & XNTIMER_PERIODIC) != 0;
++}
++
++void __xntimer_init(struct xntimer *timer,
++		    struct xnclock *clock,
++		    void (*handler)(struct xntimer *timer),
++		    struct xnsched *sched,
++		    int flags);
++
++void xntimer_set_gravity(struct xntimer *timer,
++			 int gravity);
++
++#ifdef CONFIG_XENO_OPT_STATS
++
++#define xntimer_init(__timer, __clock, __handler, __sched, __flags)	\
++do {									\
++	__xntimer_init(__timer, __clock, __handler, __sched, __flags);	\
++	xntimer_set_name(__timer, #__handler);				\
++} while (0)
++
++static inline void xntimer_reset_stats(struct xntimer *timer)
++{
++	xnstat_counter_set(&timer->scheduled, 0);
++	xnstat_counter_set(&timer->fired, 0);
++}
++
++static inline void xntimer_account_scheduled(struct xntimer *timer)
++{
++	xnstat_counter_inc(&timer->scheduled);
++}
++
++static inline void xntimer_account_fired(struct xntimer *timer)
++{
++	xnstat_counter_inc(&timer->fired);
++}
++
++static inline void xntimer_set_name(struct xntimer *timer, const char *name)
++{
++	knamecpy(timer->name, name);
++}
++
++#else /* !CONFIG_XENO_OPT_STATS */
++
++#define xntimer_init	__xntimer_init
++
++static inline void xntimer_reset_stats(struct xntimer *timer) { }
++
++static inline void xntimer_account_scheduled(struct xntimer *timer) { }
++
++static inline void xntimer_account_fired(struct xntimer *timer) { }
++
++static inline void xntimer_set_name(struct xntimer *timer, const char *name) { }
++
++#endif /* !CONFIG_XENO_OPT_STATS */
++
++#if defined(CONFIG_XENO_OPT_EXTCLOCK) && defined(CONFIG_XENO_OPT_STATS)
++void xntimer_switch_tracking(struct xntimer *timer,
++			     struct xnclock *newclock);
++#else
++static inline
++void xntimer_switch_tracking(struct xntimer *timer,
++			     struct xnclock *newclock) { }
++#endif
++
++void xntimer_destroy(struct xntimer *timer);
++
++/**
++ * @fn xnticks_t xntimer_interval(struct xntimer *timer)
++ *
++ * @brief Return the timer interval value.
++ *
++ * Return the timer interval value in nanoseconds.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @return The duration of a period in nanoseconds. The special value
++ * XN_INFINITE is returned if @a timer is currently disabled or
++ * one shot.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++static inline xnticks_t xntimer_interval(struct xntimer *timer)
++{
++	return timer->interval_ns;
++}
++
++static inline xnticks_t xntimer_expiry(struct xntimer *timer)
++{
++	/* Real expiry date in ticks without anticipation (no gravity) */
++	return xntimerh_date(&timer->aplink) + xntimer_gravity(timer);
++}
++
++int xntimer_start(struct xntimer *timer,
++		xnticks_t value,
++		xnticks_t interval,
++		xntmode_t mode);
++
++void __xntimer_stop(struct xntimer *timer);
++
++xnticks_t xntimer_get_date(struct xntimer *timer);
++
++xnticks_t __xntimer_get_timeout(struct xntimer *timer);
++
++xnticks_t xntimer_get_interval(struct xntimer *timer);
++
++int xntimer_heading_p(struct xntimer *timer);
++
++static inline void xntimer_stop(struct xntimer *timer)
++{
++	if (timer->status & XNTIMER_RUNNING)
++		__xntimer_stop(timer);
++}
++
++static inline xnticks_t xntimer_get_timeout(struct xntimer *timer)
++{
++	if (!xntimer_running_p(timer))
++		return XN_INFINITE;
++
++	return __xntimer_get_timeout(timer);
++}
++
++static inline xnticks_t xntimer_get_timeout_stopped(struct xntimer *timer)
++{
++	return __xntimer_get_timeout(timer);
++}
++
++static inline void xntimer_enqueue(struct xntimer *timer,
++				   xntimerq_t *q)
++{
++	xntimerq_insert(q, &timer->aplink);
++	timer->status &= ~XNTIMER_DEQUEUED;
++	xntimer_account_scheduled(timer);
++}
++
++static inline void xntimer_dequeue(struct xntimer *timer,
++				   xntimerq_t *q)
++{
++	xntimerq_remove(q, &timer->aplink);
++	timer->status |= XNTIMER_DEQUEUED;
++}
++
++unsigned long long xntimer_get_overruns(struct xntimer *timer,
++					struct xnthread *waiter,
++					xnticks_t now);
++
++#ifdef CONFIG_SMP
++
++void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched);
++
++static inline
++void xntimer_migrate(struct xntimer *timer, struct xnsched *sched)
++{				/* nklocked, IRQs off */
++	if (timer->sched != sched)
++		__xntimer_migrate(timer, sched);
++}
++
++int xntimer_setup_ipi(void);
++
++void xntimer_release_ipi(void);
++
++void __xntimer_set_affinity(struct xntimer *timer,
++			    struct xnsched *sched);
++
++static inline void xntimer_set_affinity(struct xntimer *timer,
++					struct xnsched *sched)
++{
++	if (sched != xntimer_sched(timer))
++		__xntimer_set_affinity(timer, sched);
++}
++
++#else /* ! CONFIG_SMP */
++
++static inline void xntimer_migrate(struct xntimer *timer,
++				   struct xnsched *sched)
++{
++	timer->sched = sched;
++}
++
++static inline int xntimer_setup_ipi(void)
++{
++	return 0;
++}
++
++static inline void xntimer_release_ipi(void) { }
++
++static inline void xntimer_set_affinity(struct xntimer *timer,
++					struct xnsched *sched)
++{
++	xntimer_migrate(timer, sched);
++}
++
++#endif /* CONFIG_SMP */
++
++char *xntimer_format_time(xnticks_t ns,
++			  char *buf, size_t bufsz);
++
++int xntimer_grab_hardware(void);
++
++void xntimer_release_hardware(void);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_TIMER_H */
+--- linux/include/xenomai/cobalt/kernel/tree.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/tree.h	2021-04-07 16:01:28.111632903 +0800
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (C) 2014 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_TREE_H
++#define _COBALT_KERNEL_TREE_H
++
++#include <linux/errno.h>
++#include <linux/rbtree.h>
++#include <cobalt/kernel/assert.h>
++
++typedef unsigned long long xnkey_t;
++
++static inline xnkey_t PTR_KEY(void *p)
++{
++	return (xnkey_t)(long)p;
++}
++
++struct xnid {
++	xnkey_t key;
++	struct rb_node link;
++};
++
++#define xnid_entry(ptr, type, member)					\
++	({								\
++		typeof(ptr) _ptr = (ptr);				\
++		(_ptr ? container_of(_ptr, type, member.link) : NULL);	\
++	})
++
++#define xnid_next_entry(ptr, member)				\
++	xnid_entry(rb_next(&ptr->member.link), typeof(*ptr), member)
++
++static inline void xntree_init(struct rb_root *t)
++{
++	*t = RB_ROOT;
++}
++
++#define xntree_for_each_entry(pos, root, member)			\
++	for (pos = xnid_entry(rb_first(root), typeof(*pos), member);	\
++	     pos; pos = xnid_next_entry(pos, member))
++
++void xntree_cleanup(struct rb_root *t, void *cookie,
++		void (*destroy)(void *cookie, struct xnid *id));
++
++int xnid_enter(struct rb_root *t, struct xnid *xnid, xnkey_t key);
++
++static inline xnkey_t xnid_key(struct xnid *i)
++{
++	return i->key;
++}
++
++static inline
++struct xnid *xnid_fetch(struct rb_root *t, xnkey_t key)
++{
++	struct rb_node *node = t->rb_node;
++
++	while (node) {
++		struct xnid *i = container_of(node, struct xnid, link);
++
++		if (key < i->key)
++			node = node->rb_left;
++		else if (key > i->key)
++			node = node->rb_right;
++		else
++			return i;
++	}
++
++	return NULL;
++}
++
++static inline int xnid_remove(struct rb_root *t, struct xnid *xnid)
++{
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	if (xnid_fetch(t, xnid->key) != xnid)
++		return -ENOENT;
++#endif
++	rb_erase(&xnid->link, t);
++	return 0;
++}
++
++#endif /* _COBALT_KERNEL_TREE_H */
+--- linux/include/xenomai/cobalt/kernel/lock.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/lock.h	2021-04-07 16:01:28.106632910 +0800
+@@ -0,0 +1,288 @@
++/*
++ * Copyright (C) 2001-2008,2012 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2004,2005 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_LOCK_H
++#define _COBALT_KERNEL_LOCK_H
++
++#include <linux/ipipe.h>
++#include <linux/percpu.h>
++#include <cobalt/kernel/assert.h>
++
++/**
++ * @addtogroup cobalt_core_lock
++ *
++ * @{
++ */
++typedef unsigned long spl_t;
++
++/**
++ * Hard disable interrupts on the local processor, saving previous state.
++ *
++ * @param[out] x An unsigned long integer context variable
++ */
++#define splhigh(x)  ((x) = ipipe_test_and_stall_head() & 1)
++#ifdef CONFIG_SMP
++/**
++ * Restore the saved hard interrupt state on the local processor.
++ *
++ * @param[in] x The context variable previously updated by splhigh()
++ */
++#define splexit(x)  ipipe_restore_head(x & 1)
++#else /* !CONFIG_SMP */
++#define splexit(x)  ipipe_restore_head(x)
++#endif /* !CONFIG_SMP */
++/**
++ * Hard disable interrupts on the local processor.
++ */
++#define splmax()    ipipe_stall_head()
++/**
++ * Hard enable interrupts on the local processor.
++ */
++#define splnone()   ipipe_unstall_head()
++/**
++ * Test hard interrupt state on the local processor.
++ *
++ * @return Zero if the local processor currently accepts interrupts,
++ * non-zero otherwise.
++ */
++#define spltest()   ipipe_test_head()
++
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++
++struct xnlock {
++	unsigned owner;
++	arch_spinlock_t alock;
++	const char *file;
++	const char *function;
++	unsigned int line;
++	int cpu;
++	unsigned long long spin_time;
++	unsigned long long lock_date;
++};
++
++struct xnlockinfo {
++	unsigned long long spin_time;
++	unsigned long long lock_time;
++	const char *file;
++	const char *function;
++	unsigned int line;
++};
++
++#define XNARCH_LOCK_UNLOCKED (struct xnlock) {	\
++	~0,					\
++	__ARCH_SPIN_LOCK_UNLOCKED,		\
++	NULL,					\
++	NULL,					\
++	0,					\
++	-1,					\
++	0LL,					\
++	0LL,					\
++}
++
++#define XNLOCK_DBG_CONTEXT		, __FILE__, __LINE__, __FUNCTION__
++#define XNLOCK_DBG_CONTEXT_ARGS					\
++	, const char *file, int line, const char *function
++#define XNLOCK_DBG_PASS_CONTEXT		, file, line, function
++
++void xnlock_dbg_prepare_acquire(unsigned long long *start);
++void xnlock_dbg_prepare_spin(unsigned int *spin_limit);
++void xnlock_dbg_acquired(struct xnlock *lock, int cpu,
++			 unsigned long long *start,
++			 const char *file, int line,
++			 const char *function);
++int xnlock_dbg_release(struct xnlock *lock,
++			 const char *file, int line,
++			 const char *function);
++
++DECLARE_PER_CPU(struct xnlockinfo, xnlock_stats);
++
++#else /* !CONFIG_XENO_OPT_DEBUG_LOCKING */
++
++struct xnlock {
++	unsigned owner;
++	arch_spinlock_t alock;
++};
++
++#define XNARCH_LOCK_UNLOCKED			\
++	(struct xnlock) {			\
++		~0,				\
++		__ARCH_SPIN_LOCK_UNLOCKED,	\
++	}
++
++#define XNLOCK_DBG_CONTEXT
++#define XNLOCK_DBG_CONTEXT_ARGS
++#define XNLOCK_DBG_PASS_CONTEXT
++
++static inline
++void xnlock_dbg_prepare_acquire(unsigned long long *start)
++{
++}
++
++static inline
++void xnlock_dbg_prepare_spin(unsigned int *spin_limit)
++{
++}
++
++static inline void
++xnlock_dbg_acquired(struct xnlock *lock, int cpu,
++		    unsigned long long *start)
++{
++}
++
++static inline int xnlock_dbg_release(struct xnlock *lock)
++{
++	return 0;
++}
++
++#endif /* !CONFIG_XENO_OPT_DEBUG_LOCKING */
++
++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING)
++
++#define xnlock_get(lock)		__xnlock_get(lock  XNLOCK_DBG_CONTEXT)
++#define xnlock_put(lock)		__xnlock_put(lock  XNLOCK_DBG_CONTEXT)
++#define xnlock_get_irqsave(lock,x) \
++	((x) = __xnlock_get_irqsave(lock  XNLOCK_DBG_CONTEXT))
++#define xnlock_put_irqrestore(lock,x) \
++	__xnlock_put_irqrestore(lock,x  XNLOCK_DBG_CONTEXT)
++#define xnlock_clear_irqoff(lock)	xnlock_put_irqrestore(lock, 1)
++#define xnlock_clear_irqon(lock)	xnlock_put_irqrestore(lock, 0)
++
++static inline void xnlock_init (struct xnlock *lock)
++{
++	*lock = XNARCH_LOCK_UNLOCKED;
++}
++
++#define DECLARE_XNLOCK(lock)		struct xnlock lock
++#define DECLARE_EXTERN_XNLOCK(lock)	extern struct xnlock lock
++#define DEFINE_XNLOCK(lock)		struct xnlock lock = XNARCH_LOCK_UNLOCKED
++#define DEFINE_PRIVATE_XNLOCK(lock)	static DEFINE_XNLOCK(lock)
++
++static inline int ____xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	int cpu = ipipe_processor_id();
++	unsigned long long start;
++
++	if (lock->owner == cpu)
++		return 2;
++
++	xnlock_dbg_prepare_acquire(&start);
++
++	arch_spin_lock(&lock->alock);
++	lock->owner = cpu;
++
++	xnlock_dbg_acquired(lock, cpu, &start /*, */ XNLOCK_DBG_PASS_CONTEXT);
++
++	return 0;
++}
++
++static inline void ____xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	if (xnlock_dbg_release(lock /*, */ XNLOCK_DBG_PASS_CONTEXT))
++		return;
++
++	lock->owner = ~0U;
++	arch_spin_unlock(&lock->alock);
++}
++
++#ifndef CONFIG_XENO_ARCH_OUTOFLINE_XNLOCK
++#define ___xnlock_get ____xnlock_get
++#define ___xnlock_put ____xnlock_put
++#else /* out of line xnlock */
++int ___xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS);
++
++void ___xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS);
++#endif /* out of line xnlock */
++
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++/* Disable UP-over-SMP kernel optimization in debug mode. */
++#define __locking_active__  1
++#else
++#define __locking_active__  ipipe_smp_p
++#endif
++
++static inline spl_t
++__xnlock_get_irqsave(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	unsigned long flags;
++
++	splhigh(flags);
++
++	if (__locking_active__)
++		flags |= ___xnlock_get(lock /*, */ XNLOCK_DBG_PASS_CONTEXT);
++
++	return flags;
++}
++
++static inline void __xnlock_put_irqrestore(struct xnlock *lock, spl_t flags
++					   /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	/* Only release the lock if we didn't take it recursively. */
++	if (__locking_active__ && !(flags & 2))
++		___xnlock_put(lock /*, */ XNLOCK_DBG_PASS_CONTEXT);
++
++	splexit(flags & 1);
++}
++
++static inline int xnlock_is_owner(struct xnlock *lock)
++{
++	if (__locking_active__)
++		return lock->owner == ipipe_processor_id();
++
++	return 1;
++}
++
++static inline int __xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	if (__locking_active__)
++		return ___xnlock_get(lock /* , */ XNLOCK_DBG_PASS_CONTEXT);
++
++	return 0;
++}
++
++static inline void __xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	if (__locking_active__)
++		___xnlock_put(lock /*, */ XNLOCK_DBG_PASS_CONTEXT);
++}
++
++#undef __locking_active__
++
++#else /* !(CONFIG_SMP || CONFIG_XENO_OPT_DEBUG_LOCKING) */
++
++#define xnlock_init(lock)		do { } while(0)
++#define xnlock_get(lock)		do { } while(0)
++#define xnlock_put(lock)		do { } while(0)
++#define xnlock_get_irqsave(lock,x)	splhigh(x)
++#define xnlock_put_irqrestore(lock,x)	splexit(x)
++#define xnlock_clear_irqoff(lock)	splmax()
++#define xnlock_clear_irqon(lock)	splnone()
++#define xnlock_is_owner(lock)		1
++
++#define DECLARE_XNLOCK(lock)
++#define DECLARE_EXTERN_XNLOCK(lock)
++#define DEFINE_XNLOCK(lock)
++#define DEFINE_PRIVATE_XNLOCK(lock)
++
++#endif /* !(CONFIG_SMP || CONFIG_XENO_OPT_DEBUG_LOCKING) */
++
++DECLARE_EXTERN_XNLOCK(nklock);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_LOCK_H */
+--- linux/include/xenomai/cobalt/kernel/apc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/apc.h	2021-04-07 16:01:28.101632917 +0800
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_APC_H
++#define _COBALT_KERNEL_APC_H
++
++#include <linux/ipipe.h>
++#include <asm/xenomai/machine.h>
++
++/**
++ * @addtogroup cobalt_core_apc
++ * @{
++ */
++
++int xnapc_alloc(const char *name,
++		void (*handler)(void *cookie),
++		void *cookie);
++
++void xnapc_free(int apc);
++
++static inline void __xnapc_schedule(int apc)
++{
++	unsigned long *p = &raw_cpu_ptr(&cobalt_machine_cpudata)->apc_pending;
++
++	if (!__test_and_set_bit(apc, p))
++		ipipe_post_irq_root(cobalt_pipeline.apc_virq);
++}
++
++/**
++ * @fn static inline int xnapc_schedule(int apc)
++ *
++ * @brief Schedule an APC invocation.
++ *
++ * This service marks the APC as pending for the Linux domain, so that
++ * its handler will be called as soon as possible, when the Linux
++ * domain gets back in control.
++ *
++ * When posted from the Linux domain, the APC handler is fired as soon
++ * as the interrupt mask is explicitly cleared by some kernel
++ * code. When posted from the Xenomai domain, the APC handler is
++ * fired as soon as the Linux domain is resumed, i.e. after Xenomai has
++ * completed all its pending duties.
++ *
++ * @param apc The APC id. to schedule.
++ *
++ * This service can be called from:
++ *
++ * - Any domain context, albeit the usual calling place is from the
++ * Xenomai domain.
++ */
++static inline void xnapc_schedule(int apc)
++{
++	unsigned long flags;
++
++	flags = ipipe_test_and_stall_head() & 1;
++	__xnapc_schedule(apc);
++	ipipe_restore_head(flags);
++}
++
++void apc_dispatch(unsigned int virq, void *arg);
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_APC_H */
+--- linux/include/xenomai/cobalt/kernel/heap.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/heap.h	2021-04-07 16:01:28.097632923 +0800
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_HEAP_H
++#define _COBALT_KERNEL_HEAP_H
++
++#include <linux/string.h>
++#include <linux/rbtree.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/uapi/kernel/types.h>
++#include <cobalt/uapi/kernel/heap.h>
++
++/**
++ * @addtogroup cobalt_core_heap
++ * @{
++ */
++
++#define XNHEAP_PAGE_SHIFT	9 /* 2^9 => 512 bytes */
++#define XNHEAP_PAGE_SIZE	(1UL << XNHEAP_PAGE_SHIFT)
++#define XNHEAP_PAGE_MASK	(~(XNHEAP_PAGE_SIZE - 1))
++#define XNHEAP_MIN_LOG2		4 /* 16 bytes */
++/*
++ * Use bucketed memory for sizes between 2^XNHEAP_MIN_LOG2 and
++ * 2^(XNHEAP_PAGE_SHIFT-1).
++ */
++#define XNHEAP_MAX_BUCKETS	(XNHEAP_PAGE_SHIFT - XNHEAP_MIN_LOG2)
++#define XNHEAP_MIN_ALIGN	(1U << XNHEAP_MIN_LOG2)
++/* Maximum size of a heap (4Gb - PAGE_SIZE). */
++#define XNHEAP_MAX_HEAPSZ	(4294967295U - PAGE_SIZE + 1)
++/* Bits we need for encoding a page # */
++#define XNHEAP_PGENT_BITS      (32 - XNHEAP_PAGE_SHIFT)
++/* Each page is represented by a page map entry. */
++#define XNHEAP_PGMAP_BYTES	sizeof(struct xnheap_pgentry)
++
++struct xnheap_pgentry {
++	/* Linkage in bucket list. */
++	unsigned int prev : XNHEAP_PGENT_BITS;
++	unsigned int next : XNHEAP_PGENT_BITS;
++	/*  page_list or log2. */
++	unsigned int type : 6;
++	/*
++	 * We hold either a spatial map of busy blocks within the page
++	 * for bucketed memory (up to 32 blocks per page), or the
++	 * overall size of the multi-page block if entry.type ==
++	 * page_list.
++	 */
++	union {
++		u32 map;
++		u32 bsize;
++	};
++};
++
++/*
++ * A range descriptor is stored at the beginning of the first page of
++ * a range of free pages. xnheap_range.size is nrpages *
++ * XNHEAP_PAGE_SIZE. Ranges are indexed by address and size in
++ * rbtrees.
++ */
++struct xnheap_range {
++	struct rb_node addr_node;
++	struct rb_node size_node;
++	size_t size;
++};
++
++struct xnheap {
++	void *membase;
++	struct rb_root addr_tree;
++	struct rb_root size_tree;
++	struct xnheap_pgentry *pagemap;
++	size_t usable_size;
++	size_t used_size;
++	u32 buckets[XNHEAP_MAX_BUCKETS];
++	char name[XNOBJECT_NAME_LEN];
++	DECLARE_XNLOCK(lock);
++	struct list_head next;
++};
++
++extern struct xnheap cobalt_heap;
++
++#define xnmalloc(size)     xnheap_alloc(&cobalt_heap, size)
++#define xnfree(ptr)        xnheap_free(&cobalt_heap, ptr)
++
++static inline void *xnheap_get_membase(const struct xnheap *heap)
++{
++	return heap->membase;
++}
++
++static inline
++size_t xnheap_get_size(const struct xnheap *heap)
++{
++	return heap->usable_size;
++}
++
++static inline
++size_t xnheap_get_used(const struct xnheap *heap)
++{
++	return heap->used_size;
++}
++
++static inline
++size_t xnheap_get_free(const struct xnheap *heap)
++{
++	return heap->usable_size - heap->used_size;
++}
++
++int xnheap_init(struct xnheap *heap,
++		void *membase, size_t size);
++
++void xnheap_destroy(struct xnheap *heap);
++
++void *xnheap_alloc(struct xnheap *heap, size_t size);
++
++void xnheap_free(struct xnheap *heap, void *block);
++
++ssize_t xnheap_check_block(struct xnheap *heap, void *block);
++
++void xnheap_set_name(struct xnheap *heap,
++		     const char *name, ...);
++
++void *xnheap_vmalloc(size_t size);
++
++void xnheap_vfree(void *p);
++
++static inline void *xnheap_zalloc(struct xnheap *heap, size_t size)
++{
++	void *p;
++
++	p = xnheap_alloc(heap, size);
++	if (p)
++		memset(p, 0, size);
++
++	return p;
++}
++
++static inline char *xnstrdup(const char *s)
++{
++	char *p;
++
++	p = xnmalloc(strlen(s) + 1);
++	if (p == NULL)
++		return NULL;
++
++	return strcpy(p, s);
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++void xnheap_init_proc(void);
++void xnheap_cleanup_proc(void);
++#else /* !CONFIG_XENO_OPT_VFILE */
++static inline void xnheap_init_proc(void) { }
++static inline void xnheap_cleanup_proc(void) { }
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_HEAP_H */
+--- linux/include/xenomai/cobalt/kernel/schedparam.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/schedparam.h	2021-04-07 16:01:28.092632930 +0800
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHEDPARAM_H
++#define _COBALT_KERNEL_SCHEDPARAM_H
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++struct xnsched_idle_param {
++	int prio;
++};
++
++struct xnsched_weak_param {
++	int prio;
++};
++
++struct xnsched_rt_param {
++	int prio;
++};
++
++struct xnsched_tp_param {
++	int prio;
++	int ptid;	/* partition id. */
++};
++
++struct xnsched_sporadic_param {
++	xnticks_t init_budget;
++	xnticks_t repl_period;
++	int max_repl;
++	int low_prio;
++	int normal_prio;
++	int current_prio;
++};
++
++struct xnsched_quota_param {
++	int prio;
++	int tgid;	/* thread group id. */
++};
++
++union xnsched_policy_param {
++	struct xnsched_idle_param idle;
++	struct xnsched_rt_param rt;
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++	struct xnsched_weak_param weak;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	struct xnsched_tp_param tp;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	struct xnsched_sporadic_param pss;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	struct xnsched_quota_param quota;
++#endif
++};
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHEDPARAM_H */
+--- linux/include/xenomai/cobalt/kernel/ppd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/ppd.h	2021-04-07 16:01:28.087632937 +0800
+@@ -0,0 +1,42 @@
++/*
++ * Copyright &copy; 2006 Gilles Chanteperdrix <gch@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_PPD_H
++#define _COBALT_KERNEL_PPD_H
++
++#include <linux/types.h>
++#include <linux/atomic.h>
++#include <linux/rbtree.h>
++#include <cobalt/kernel/heap.h>
++
++struct cobalt_umm {
++	struct xnheap heap;
++	atomic_t refcount;
++	void (*release)(struct cobalt_umm *umm);
++};
++
++struct cobalt_ppd {
++	struct cobalt_umm umm;
++	atomic_t refcnt;
++	char *exe_path;
++	struct rb_root fds;
++};
++
++extern struct cobalt_ppd cobalt_kernel_ppd;
++
++#endif /* _COBALT_KERNEL_PPD_H */
+--- linux/include/xenomai/cobalt/kernel/sched-quota.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-quota.h	2021-04-07 16:01:28.082632945 +0800
+@@ -0,0 +1,93 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_QUOTA_H
++#define _COBALT_KERNEL_SCHED_QUOTA_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-quota.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++
++#define XNSCHED_QUOTA_MIN_PRIO	1
++#define XNSCHED_QUOTA_MAX_PRIO	255
++#define XNSCHED_QUOTA_NR_PRIO	\
++	(XNSCHED_QUOTA_MAX_PRIO - XNSCHED_QUOTA_MIN_PRIO + 1)
++
++extern struct xnsched_class xnsched_class_quota;
++
++struct xnsched_quota_group {
++	struct xnsched *sched;
++	xnticks_t quota_ns;
++	xnticks_t quota_peak_ns;
++	xnticks_t run_start_ns;
++	xnticks_t run_budget_ns;
++	xnticks_t run_credit_ns;
++	struct list_head members;
++	struct list_head expired;
++	struct list_head next;
++	int nr_active;
++	int nr_threads;
++	int tgid;
++	int quota_percent;
++	int quota_peak_percent;
++};
++
++struct xnsched_quota {
++	xnticks_t period_ns;
++	struct xntimer refill_timer;
++	struct xntimer limit_timer;
++	struct list_head groups;
++};
++
++static inline int xnsched_quota_init_thread(struct xnthread *thread)
++{
++	thread->quota = NULL;
++	INIT_LIST_HEAD(&thread->quota_expired);
++
++	return 0;
++}
++
++int xnsched_quota_create_group(struct xnsched_quota_group *tg,
++			       struct xnsched *sched,
++			       int *quota_sum_r);
++
++int xnsched_quota_destroy_group(struct xnsched_quota_group *tg,
++				int force,
++				int *quota_sum_r);
++
++void xnsched_quota_set_limit(struct xnsched_quota_group *tg,
++			     int quota_percent, int quota_peak_percent,
++			     int *quota_sum_r);
++
++struct xnsched_quota_group *
++xnsched_quota_find_group(struct xnsched *sched, int tgid);
++
++int xnsched_quota_sum_all(struct xnsched *sched);
++
++#endif /* !CONFIG_XENO_OPT_SCHED_QUOTA */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_QUOTA_H */
+--- linux/include/xenomai/cobalt/kernel/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/thread.h	2021-04-07 16:01:28.078632950 +0800
+@@ -0,0 +1,570 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_THREAD_H
++#define _COBALT_KERNEL_THREAD_H
++
++#include <linux/wait.h>
++#include <linux/sched.h>
++#include <linux/sched/rt.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/stat.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/registry.h>
++#include <cobalt/kernel/schedparam.h>
++#include <cobalt/kernel/trace.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/uapi/kernel/thread.h>
++#include <cobalt/uapi/signal.h>
++#include <asm/xenomai/machine.h>
++#include <asm/xenomai/thread.h>
++
++/**
++ * @addtogroup cobalt_core_thread
++ * @{
++ */
++#define XNTHREAD_BLOCK_BITS   (XNSUSP|XNPEND|XNDELAY|XNDORMANT|XNRELAX|XNMIGRATE|XNHELD|XNDBGSTOP)
++#define XNTHREAD_MODE_BITS    (XNRRB|XNWARN|XNTRAPLB)
++
++struct xnthread;
++struct xnsched;
++struct xnselector;
++struct xnsched_class;
++struct xnsched_tpslot;
++struct xnthread_personality;
++struct completion;
++
++struct xnthread_init_attr {
++	struct xnthread_personality *personality;
++	cpumask_t affinity;
++	int flags;
++	const char *name;
++};
++
++struct xnthread_start_attr {
++	int mode;
++	void (*entry)(void *cookie);
++	void *cookie;
++};
++
++struct xnthread_wait_context {
++	int posted;
++};
++
++struct xnthread_personality {
++	const char *name;
++	unsigned int magic;
++	int xid;
++	atomic_t refcnt;
++	struct {
++		void *(*attach_process)(void);
++		void (*detach_process)(void *arg);
++		void (*map_thread)(struct xnthread *thread);
++		struct xnthread_personality *(*relax_thread)(struct xnthread *thread);
++		struct xnthread_personality *(*harden_thread)(struct xnthread *thread);
++		struct xnthread_personality *(*move_thread)(struct xnthread *thread,
++							    int dest_cpu);
++		struct xnthread_personality *(*exit_thread)(struct xnthread *thread);
++		struct xnthread_personality *(*finalize_thread)(struct xnthread *thread);
++	} ops;
++	struct module *module;
++};
++
++struct xnthread {
++	struct xnarchtcb tcb;	/* Architecture-dependent block */
++
++	__u32 state;		/* Thread state flags */
++	__u32 info;		/* Thread information flags */
++	__u32 local_info;	/* Local thread information flags */
++
++	struct xnsched *sched;		/* Thread scheduler */
++	struct xnsched_class *sched_class; /* Current scheduling class */
++	struct xnsched_class *base_class; /* Base scheduling class */
++
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	struct xnsched_tpslot *tps;	/* Current partition slot for TP scheduling */
++	struct list_head tp_link;	/* Link in per-sched TP thread queue */
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	struct xnsched_sporadic_data *pss; /* Sporadic scheduling data. */
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	struct xnsched_quota_group *quota; /* Quota scheduling group. */
++	struct list_head quota_expired;
++	struct list_head quota_next;
++#endif
++	cpumask_t affinity;	/* Processor affinity. */
++
++	/** Base priority (before PI/PP boost) */
++	int bprio;
++
++	/** Current (effective) priority */
++	int cprio;
++
++	/**
++	 * Weighted priority (cprio + scheduling class weight).
++	 */
++	int wprio;
++
++	int lock_count;	/** Scheduler lock count. */
++
++	/**
++	 * Thread holder in xnsched run queue. Ordered by
++	 * thread->cprio.
++	 */
++	struct list_head rlink;
++
++	/**
++	 * Thread holder in xnsynch pendq. Prioritized by
++	 * thread->cprio + scheduling class weight.
++	 */
++	struct list_head plink;
++
++	/** Thread holder in global queue. */
++	struct list_head glink;
++
++	/**
++	 * List of xnsynch owned by this thread which cause a priority
++	 * boost due to one of the following reasons:
++	 *
++	 * - they are currently claimed by other thread(s) when
++	 * enforcing the priority inheritance protocol (XNSYNCH_PI).
++	 *
++	 * - they require immediate priority ceiling (XNSYNCH_PP).
++	 *
++	 * This list is ordered by decreasing (weighted) thread
++	 * priorities.
++	 */
++	struct list_head boosters;
++
++	struct xnsynch *wchan;		/* Resource the thread pends on */
++
++	struct xnsynch *wwake;		/* Wait channel the thread was resumed from */
++
++	int res_count;			/* Held resources count */
++
++	struct xntimer rtimer;		/* Resource timer */
++
++	struct xntimer ptimer;		/* Periodic timer */
++
++	xnticks_t rrperiod;		/* Allotted round-robin period (ns) */
++
++  	struct xnthread_wait_context *wcontext;	/* Active wait context. */
++
++	struct {
++		xnstat_counter_t ssw;	/* Primary -> secondary mode switch count */
++		xnstat_counter_t csw;	/* Context switches (includes secondary -> primary switches) */
++		xnstat_counter_t xsc;	/* Xenomai syscalls */
++		xnstat_counter_t pf;	/* Number of page faults */
++		xnstat_exectime_t account; /* Execution time accounting entity */
++		xnstat_exectime_t lastperiod; /* Interval marker for execution time reports */
++	} stat;
++
++	struct xnselector *selector;    /* For select. */
++
++	xnhandle_t handle;	/* Handle in registry */
++
++	char name[XNOBJECT_NAME_LEN]; /* Symbolic name of thread */
++
++	void (*entry)(void *cookie); /* Thread entry routine */
++	void *cookie;		/* Cookie to pass to the entry routine */
++
++	/**
++	 * Thread data visible from userland through a window on the
++	 * global heap.
++	 */
++	struct xnthread_user_window *u_window;
++
++	struct xnthread_personality *personality;
++
++	struct completion exited;
++
++#ifdef CONFIG_XENO_OPT_DEBUG
++	const char *exe_path;	/* Executable path */
++	u32 proghash;		/* Hash value for exe_path */
++#endif
++};
++
++static inline int xnthread_get_state(const struct xnthread *thread)
++{
++	return thread->state;
++}
++
++static inline int xnthread_test_state(struct xnthread *thread, int bits)
++{
++	return thread->state & bits;
++}
++
++static inline void xnthread_set_state(struct xnthread *thread, int bits)
++{
++	thread->state |= bits;
++}
++
++static inline void xnthread_clear_state(struct xnthread *thread, int bits)
++{
++	thread->state &= ~bits;
++}
++
++static inline int xnthread_test_info(struct xnthread *thread, int bits)
++{
++	return thread->info & bits;
++}
++
++static inline void xnthread_set_info(struct xnthread *thread, int bits)
++{
++	thread->info |= bits;
++}
++
++static inline void xnthread_clear_info(struct xnthread *thread, int bits)
++{
++	thread->info &= ~bits;
++}
++
++static inline int xnthread_test_localinfo(struct xnthread *curr, int bits)
++{
++	return curr->local_info & bits;
++}
++
++static inline void xnthread_set_localinfo(struct xnthread *curr, int bits)
++{
++	curr->local_info |= bits;
++}
++
++static inline void xnthread_clear_localinfo(struct xnthread *curr, int bits)
++{
++	curr->local_info &= ~bits;
++}
++
++static inline struct xnarchtcb *xnthread_archtcb(struct xnthread *thread)
++{
++	return &thread->tcb;
++}
++
++static inline int xnthread_base_priority(const struct xnthread *thread)
++{
++	return thread->bprio;
++}
++
++static inline int xnthread_current_priority(const struct xnthread *thread)
++{
++	return thread->cprio;
++}
++
++static inline struct task_struct *xnthread_host_task(struct xnthread *thread)
++{
++	return xnthread_archtcb(thread)->core.host_task;
++}
++
++#define xnthread_for_each_booster(__pos, __thread)		\
++	list_for_each_entry(__pos, &(__thread)->boosters, next)
++
++#define xnthread_for_each_booster_safe(__pos, __tmp, __thread)	\
++	list_for_each_entry_safe(__pos, __tmp, &(__thread)->boosters, next)
++
++#define xnthread_run_handler(__t, __h, __a...)				\
++	do {								\
++		struct xnthread_personality *__p__ = (__t)->personality;	\
++		if ((__p__)->ops.__h)					\
++			(__p__)->ops.__h(__t, ##__a);			\
++	} while (0)
++	
++#define xnthread_run_handler_stack(__t, __h, __a...)			\
++	do {								\
++		struct xnthread_personality *__p__ = (__t)->personality;	\
++		do {							\
++			if ((__p__)->ops.__h == NULL)			\
++				break;					\
++			__p__ = (__p__)->ops.__h(__t, ##__a);		\
++		} while (__p__);					\
++	} while (0)
++	
++static inline
++struct xnthread_wait_context *xnthread_get_wait_context(struct xnthread *thread)
++{
++	return thread->wcontext;
++}
++
++static inline
++int xnthread_register(struct xnthread *thread, const char *name)
++{
++	return xnregistry_enter(name, thread, &thread->handle, NULL);
++}
++
++static inline
++struct xnthread *xnthread_lookup(xnhandle_t threadh)
++{
++	struct xnthread *thread = xnregistry_lookup(threadh, NULL);
++	return thread && thread->handle == xnhandle_get_index(threadh) ? thread : NULL;
++}
++
++static inline void xnthread_sync_window(struct xnthread *thread)
++{
++	if (thread->u_window) {
++		thread->u_window->state = thread->state;
++		thread->u_window->info = thread->info;
++	}
++}
++
++static inline
++void xnthread_clear_sync_window(struct xnthread *thread, int state_bits)
++{
++	if (thread->u_window) {
++		thread->u_window->state = thread->state & ~state_bits;
++		thread->u_window->info = thread->info;
++	}
++}
++
++static inline
++void xnthread_set_sync_window(struct xnthread *thread, int state_bits)
++{
++	if (thread->u_window) {
++		thread->u_window->state = thread->state | state_bits;
++		thread->u_window->info = thread->info;
++	}
++}
++
++static inline int normalize_priority(int prio)
++{
++	return prio < MAX_RT_PRIO ? prio : MAX_RT_PRIO - 1;
++}
++
++int __xnthread_init(struct xnthread *thread,
++		    const struct xnthread_init_attr *attr,
++		    struct xnsched *sched,
++		    struct xnsched_class *sched_class,
++		    const union xnsched_policy_param *sched_param);
++
++void __xnthread_test_cancel(struct xnthread *curr);
++
++void __xnthread_cleanup(struct xnthread *curr);
++
++void __xnthread_discard(struct xnthread *thread);
++
++/**
++ * @fn struct xnthread *xnthread_current(void)
++ * @brief Retrieve the current Cobalt core TCB.
++ *
++ * Returns the address of the current Cobalt core thread descriptor,
++ * or NULL if running over a regular Linux task. This call is not
++ * affected by the current runtime mode of the core thread.
++ *
++ * @note The returned value may differ from xnsched_current_thread()
++ * called from the same context, since the latter returns the root
++ * thread descriptor for the current CPU if the caller is running in
++ * secondary mode.
++ *
++ * @coretags{unrestricted}
++ */
++static inline struct xnthread *xnthread_current(void)
++{
++	return ipipe_current_threadinfo()->thread;
++}
++
++/**
++ * @fn struct xnthread *xnthread_from_task(struct task_struct *p)
++ * @brief Retrieve the Cobalt core TCB attached to a Linux task.
++ *
++ * Returns the address of the Cobalt core thread descriptor attached
++ * to the Linux task @a p, or NULL if @a p is a regular Linux
++ * task. This call is not affected by the current runtime mode of the
++ * core thread.
++ *
++ * @coretags{unrestricted}
++ */
++static inline struct xnthread *xnthread_from_task(struct task_struct *p)
++{
++	return ipipe_task_threadinfo(p)->thread;
++}
++
++/**
++ * @fn void xnthread_test_cancel(void)
++ * @brief Introduce a thread cancellation point.
++ *
++ * Terminates the current thread if a cancellation request is pending
++ * for it, i.e. if xnthread_cancel() was called.
++ *
++ * @coretags{mode-unrestricted}
++ */
++static inline void xnthread_test_cancel(void)
++{
++	struct xnthread *curr = xnthread_current();
++
++	if (curr && xnthread_test_info(curr, XNCANCELD))
++		__xnthread_test_cancel(curr);
++}
++
++static inline
++void xnthread_complete_wait(struct xnthread_wait_context *wc)
++{
++	wc->posted = 1;
++}
++
++static inline
++int xnthread_wait_complete_p(struct xnthread_wait_context *wc)
++{
++	return wc->posted;
++}
++
++#ifdef CONFIG_XENO_ARCH_FPU
++void xnthread_switch_fpu(struct xnsched *sched);
++#else
++static inline void xnthread_switch_fpu(struct xnsched *sched) { }
++#endif /* CONFIG_XENO_ARCH_FPU */
++
++void xnthread_init_shadow_tcb(struct xnthread *thread);
++
++void xnthread_init_root_tcb(struct xnthread *thread);
++
++void xnthread_deregister(struct xnthread *thread);
++
++char *xnthread_format_status(unsigned long status,
++			     char *buf, int size);
++
++pid_t xnthread_host_pid(struct xnthread *thread);
++
++int xnthread_set_clock(struct xnthread *thread,
++		       struct xnclock *newclock);
++
++xnticks_t xnthread_get_timeout(struct xnthread *thread,
++			       xnticks_t ns);
++
++xnticks_t xnthread_get_period(struct xnthread *thread);
++
++void xnthread_prepare_wait(struct xnthread_wait_context *wc);
++
++int xnthread_init(struct xnthread *thread,
++		  const struct xnthread_init_attr *attr,
++		  struct xnsched_class *sched_class,
++		  const union xnsched_policy_param *sched_param);
++
++int xnthread_start(struct xnthread *thread,
++		   const struct xnthread_start_attr *attr);
++
++int xnthread_set_mode(int clrmask,
++		      int setmask);
++
++void xnthread_suspend(struct xnthread *thread,
++		      int mask,
++		      xnticks_t timeout,
++		      xntmode_t timeout_mode,
++		      struct xnsynch *wchan);
++
++void xnthread_resume(struct xnthread *thread,
++		     int mask);
++
++int xnthread_unblock(struct xnthread *thread);
++
++int xnthread_set_periodic(struct xnthread *thread,
++			  xnticks_t idate,
++			  xntmode_t timeout_mode,
++			  xnticks_t period);
++
++int xnthread_wait_period(unsigned long *overruns_r);
++
++int xnthread_set_slice(struct xnthread *thread,
++		       xnticks_t quantum);
++
++void xnthread_cancel(struct xnthread *thread);
++
++int xnthread_join(struct xnthread *thread, bool uninterruptible);
++
++int xnthread_harden(void);
++
++void xnthread_relax(int notify, int reason);
++
++void __xnthread_kick(struct xnthread *thread);
++
++void xnthread_kick(struct xnthread *thread);
++
++void __xnthread_demote(struct xnthread *thread);
++
++void xnthread_demote(struct xnthread *thread);
++
++void xnthread_signal(struct xnthread *thread,
++		     int sig, int arg);
++
++void xnthread_pin_initial(struct xnthread *thread);
++
++int xnthread_map(struct xnthread *thread,
++		 struct completion *done);
++
++void xnthread_call_mayday(struct xnthread *thread, int reason);
++
++static inline void xnthread_get_resource(struct xnthread *curr)
++{
++	if (xnthread_test_state(curr, XNWEAK|XNDEBUG))
++		curr->res_count++;
++}
++
++static inline int xnthread_put_resource(struct xnthread *curr)
++{
++	if (xnthread_test_state(curr, XNWEAK) ||
++	    IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP)) {
++		if (unlikely(curr->res_count == 0)) {
++			if (xnthread_test_state(curr, XNWARN))
++				xnthread_signal(curr, SIGDEBUG,
++						SIGDEBUG_RESCNT_IMBALANCE);
++			return -EPERM;
++		}
++		curr->res_count--;
++	}
++
++	return 0;
++}
++
++static inline void xnthread_commit_ceiling(struct xnthread *curr)
++{
++	if (curr->u_window->pp_pending)
++		xnsynch_commit_ceiling(curr);
++}
++
++#ifdef CONFIG_SMP
++
++void xnthread_migrate_passive(struct xnthread *thread,
++			      struct xnsched *sched);
++#else
++
++static inline void xnthread_migrate_passive(struct xnthread *thread,
++					    struct xnsched *sched)
++{ }
++
++#endif
++
++int __xnthread_set_schedparam(struct xnthread *thread,
++			      struct xnsched_class *sched_class,
++			      const union xnsched_policy_param *sched_param);
++
++int xnthread_set_schedparam(struct xnthread *thread,
++			    struct xnsched_class *sched_class,
++			    const union xnsched_policy_param *sched_param);
++
++int xnthread_killall(int grace, int mask);
++
++void __xnthread_propagate_schedparam(struct xnthread *curr);
++
++static inline void xnthread_propagate_schedparam(struct xnthread *curr)
++{
++	if (xnthread_test_info(curr, XNSCHEDP))
++		__xnthread_propagate_schedparam(curr);
++}
++
++extern struct xnthread_personality xenomai_personality;
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_THREAD_H */
+--- linux/include/xenomai/cobalt/kernel/assert.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/assert.h	2021-04-07 16:01:28.073632957 +0800
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (C) 2006 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_ASSERT_H
++#define _COBALT_KERNEL_ASSERT_H
++
++#include <linux/kconfig.h>
++#include <cobalt/kernel/trace.h>
++#include <cobalt/kernel/ancillaries.h>
++
++#define XENO_INFO	KERN_INFO    "[Xenomai] "
++#define XENO_WARNING	KERN_WARNING "[Xenomai] "
++#define XENO_ERR	KERN_ERR     "[Xenomai] "
++
++#define XENO_DEBUG(__subsys)				\
++	IS_ENABLED(CONFIG_XENO_OPT_DEBUG_##__subsys)
++#define XENO_ASSERT(__subsys, __cond)			\
++	(!WARN_ON(XENO_DEBUG(__subsys) && !(__cond)))
++#define XENO_BUG(__subsys)				\
++	BUG_ON(XENO_DEBUG(__subsys))
++#define XENO_BUG_ON(__subsys, __cond)			\
++	BUG_ON(XENO_DEBUG(__subsys) && (__cond))
++#define XENO_WARN(__subsys, __cond, __fmt...)		\
++	WARN(XENO_DEBUG(__subsys) && (__cond), __fmt)
++#define XENO_WARN_ON(__subsys, __cond)			\
++	WARN_ON(XENO_DEBUG(__subsys) && (__cond))
++#define XENO_WARN_ON_ONCE(__subsys, __cond)		\
++	WARN_ON_ONCE(XENO_DEBUG(__subsys) && (__cond))
++#ifdef CONFIG_SMP
++#define XENO_BUG_ON_SMP(__subsys, __cond)		\
++	XENO_BUG_ON(__subsys, __cond)
++#define XENO_WARN_ON_SMP(__subsys, __cond)		\
++	XENO_WARN_ON(__subsys, __cond)
++#define XENO_WARN_ON_ONCE_SMP(__subsys, __cond)		\
++	XENO_WARN_ON_ONCE(__subsys, __cond)
++#else
++#define XENO_BUG_ON_SMP(__subsys, __cond)		\
++	do { } while (0)
++#define XENO_WARN_ON_SMP(__subsys, __cond)		\
++	do { } while (0)
++#define XENO_WARN_ON_ONCE_SMP(__subsys, __cond)		\
++	do { } while (0)
++#endif
++
++#define primary_mode_only()	XENO_BUG_ON(CONTEXT, ipipe_root_p)
++#define secondary_mode_only()	XENO_BUG_ON(CONTEXT, !ipipe_root_p)
++#define interrupt_only()	XENO_BUG_ON(CONTEXT, !xnsched_interrupt_p())
++#define realtime_cpu_only()	XENO_BUG_ON(CONTEXT, !xnsched_supported_cpu(ipipe_processor_id()))
++#define thread_only()		XENO_BUG_ON(CONTEXT, xnsched_interrupt_p())
++#define irqoff_only()		XENO_BUG_ON(CONTEXT, hard_irqs_disabled() == 0)
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++#define atomic_only()		XENO_BUG_ON(CONTEXT, (xnlock_is_owner(&nklock) && hard_irqs_disabled()) == 0)
++#define preemptible_only()	XENO_BUG_ON(CONTEXT, xnlock_is_owner(&nklock) || hard_irqs_disabled())
++#else
++#define atomic_only()		XENO_BUG_ON(CONTEXT, hard_irqs_disabled() == 0)
++#define preemptible_only()	XENO_BUG_ON(CONTEXT, hard_irqs_disabled() != 0)
++#endif
++
++#endif /* !_COBALT_KERNEL_ASSERT_H */
+--- linux/include/xenomai/cobalt/kernel/arith.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/arith.h	2021-04-07 16:01:28.068632964 +0800
+@@ -0,0 +1,35 @@
++/*
++ *   Generic arithmetic/conversion routines.
++ *   Copyright &copy; 2005 Stelian Pop.
++ *   Copyright &copy; 2005 Gilles Chanteperdrix.
++ *
++ *   Xenomai is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_ARITH_H
++#define _COBALT_KERNEL_ARITH_H
++
++#include <asm/byteorder.h>
++#include <asm/div64.h>
++
++#ifdef __BIG_ENDIAN
++#define endianstruct { unsigned int _h; unsigned int _l; }
++#else /* __LITTLE_ENDIAN */
++#define endianstruct { unsigned int _l; unsigned int _h; }
++#endif
++
++#include <asm/xenomai/uapi/arith.h>
++
++#endif /* _COBALT_KERNEL_ARITH_H */
+--- linux/include/xenomai/cobalt/kernel/intr.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/intr.h	2021-04-07 16:01:28.063632972 +0800
+@@ -0,0 +1,164 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_INTR_H
++#define _COBALT_KERNEL_INTR_H
++
++#include <linux/spinlock.h>
++#include <cobalt/kernel/stat.h>
++
++/**
++ * @addtogroup cobalt_core_irq
++ * @{
++ */
++
++/* Possible return values of a handler. */
++#define XN_IRQ_NONE	 0x1
++#define XN_IRQ_HANDLED	 0x2
++#define XN_IRQ_STATMASK	 (XN_IRQ_NONE|XN_IRQ_HANDLED)
++#define XN_IRQ_PROPAGATE 0x100
++#define XN_IRQ_DISABLE   0x200
++
++/* Init flags. */
++#define XN_IRQTYPE_SHARED  0x1
++#define XN_IRQTYPE_EDGE    0x2
++
++/* Status bits. */
++#define XN_IRQSTAT_ATTACHED   0
++#define _XN_IRQSTAT_ATTACHED  (1 << XN_IRQSTAT_ATTACHED)
++#define XN_IRQSTAT_DISABLED   1
++#define _XN_IRQSTAT_DISABLED  (1 << XN_IRQSTAT_DISABLED)
++
++struct xnintr;
++struct xnsched;
++
++typedef int (*xnisr_t)(struct xnintr *intr);
++
++typedef void (*xniack_t)(unsigned irq, void *arg);
++
++struct xnirqstat {
++	/** Number of handled receipts since attachment. */
++	xnstat_counter_t hits;
++	/** Runtime accounting entity */
++	xnstat_exectime_t account;
++	/** Accumulated accounting entity */
++	xnstat_exectime_t sum;
++};
++
++struct xnintr {
++#ifdef CONFIG_XENO_OPT_SHIRQ
++	/** Next object in the IRQ-sharing chain. */
++	struct xnintr *next;
++#endif
++	/** Number of consequent unhandled interrupts */
++	unsigned int unhandled;
++	/** Interrupt service routine. */
++	xnisr_t isr;
++	/** User-defined cookie value. */
++	void *cookie;
++	/** runtime status */
++	unsigned long status;
++	/** Creation flags. */
++	int flags;
++	/** IRQ number. */
++	unsigned int irq;
++	/** Interrupt acknowledge routine. */
++	xniack_t iack;
++	/** Symbolic name. */
++	const char *name;
++	/** Descriptor maintenance lock. */
++	raw_spinlock_t lock;
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++	/** Statistics. */
++	struct xnirqstat *stats;
++#endif
++};
++
++struct xnintr_iterator {
++    int cpu;		/** Current CPU in iteration. */
++    unsigned long hits;	/** Current hit counter. */
++    xnticks_t exectime_period;	/** Used CPU time in current accounting period. */
++    xnticks_t account_period; /** Length of accounting period. */
++    xnticks_t exectime_total;	/** Overall CPU time consumed. */
++    int list_rev;	/** System-wide xnintr list revision (internal use). */
++    struct xnintr *prev;	/** Previously visited xnintr object (internal use). */
++};
++
++int xnintr_mount(void);
++
++void xnintr_core_clock_handler(void);
++
++void xnintr_host_tick(struct xnsched *sched);
++
++void xnintr_init_proc(void);
++
++void xnintr_cleanup_proc(void);
++
++    /* Public interface. */
++
++int xnintr_init(struct xnintr *intr,
++		const char *name,
++		unsigned irq,
++		xnisr_t isr,
++		xniack_t iack,
++		int flags);
++
++void xnintr_destroy(struct xnintr *intr);
++
++int xnintr_attach(struct xnintr *intr,
++		  void *cookie);
++
++void xnintr_detach(struct xnintr *intr);
++
++void xnintr_enable(struct xnintr *intr);
++
++void xnintr_disable(struct xnintr *intr);
++
++void xnintr_affinity(struct xnintr *intr,
++		     cpumask_t cpumask);
++
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++extern struct xnintr nktimer;
++
++int xnintr_query_init(struct xnintr_iterator *iterator);
++
++int xnintr_get_query_lock(void);
++
++void xnintr_put_query_lock(void);
++
++int xnintr_query_next(int irq, struct xnintr_iterator *iterator,
++		      char *name_buf);
++
++#else /* !CONFIG_XENO_OPT_STATS_IRQS */
++
++static inline int xnintr_query_init(struct xnintr_iterator *iterator)
++{
++	return 0;
++}
++
++static inline int xnintr_get_query_lock(void)
++{
++	return 0;
++}
++
++static inline void xnintr_put_query_lock(void) {}
++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_INTR_H */
+--- linux/include/xenomai/cobalt/kernel/sched.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched.h	2021-04-07 16:01:28.059632978 +0800
+@@ -0,0 +1,704 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_H
++#define _COBALT_KERNEL_SCHED_H
++
++#include <linux/percpu.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/schedqueue.h>
++#include <cobalt/kernel/sched-tp.h>
++#include <cobalt/kernel/sched-weak.h>
++#include <cobalt/kernel/sched-sporadic.h>
++#include <cobalt/kernel/sched-quota.h>
++#include <cobalt/kernel/vfile.h>
++#include <cobalt/kernel/assert.h>
++#include <asm/xenomai/machine.h>
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++/* Sched status flags */
++#define XNRESCHED	0x10000000	/* Needs rescheduling */
++#define XNINSW		0x20000000	/* In context switch */
++#define XNINTCK		0x40000000	/* In master tick handler context */
++
++/* Sched local flags */
++#define XNIDLE		0x00010000	/* Idle (no outstanding timer) */
++#define XNHTICK		0x00008000	/* Host tick pending  */
++#define XNINIRQ		0x00004000	/* In IRQ handling context */
++#define XNHDEFER	0x00002000	/* Host tick deferred */
++
++struct xnsched_rt {
++	xnsched_queue_t runnable;	/*!< Runnable thread queue. */
++};
++
++/*!
++ * \brief Scheduling information structure.
++ */
++
++struct xnsched {
++	/*!< Scheduler specific status bitmask. */
++	unsigned long status;
++	/*!< Scheduler specific local flags bitmask. */
++	unsigned long lflags;
++	/*!< Current thread. */
++	struct xnthread *curr;
++#ifdef CONFIG_SMP
++	/*!< Owner CPU id. */
++	int cpu;
++	/*!< Mask of CPUs needing rescheduling. */
++	cpumask_t resched;
++#endif
++	/*!< Context of built-in real-time class. */
++	struct xnsched_rt rt;
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++	/*!< Context of weak scheduling class. */
++	struct xnsched_weak weak;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	/*!< Context of TP class. */
++	struct xnsched_tp tp;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	/*!< Context of sporadic scheduling class. */
++	struct xnsched_sporadic pss;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	/*!< Context of runtime quota scheduling. */
++	struct xnsched_quota quota;
++#endif
++	/*!< Interrupt nesting level. */
++	volatile unsigned inesting;
++	/*!< Host timer. */
++	struct xntimer htimer;
++	/*!< Round-robin timer. */
++	struct xntimer rrbtimer;
++	/*!< Root thread control block. */
++	struct xnthread rootcb;
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	struct xnthread *last;
++#endif
++#ifdef CONFIG_XENO_ARCH_FPU
++	/*!< Thread owning the current FPU context. */
++	struct xnthread *fpuholder;
++#endif
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	/*!< Watchdog timer object. */
++	struct xntimer wdtimer;
++#endif
++#ifdef CONFIG_XENO_OPT_STATS
++	/*!< Last account switch date (ticks). */
++	xnticks_t last_account_switch;
++	/*!< Currently active account */
++	xnstat_exectime_t *current_account;
++#endif
++};
++
++DECLARE_PER_CPU(struct xnsched, nksched);
++
++extern cpumask_t cobalt_cpu_affinity;
++
++extern struct list_head nkthreadq;
++
++extern int cobalt_nrthreads;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++extern struct xnvfile_rev_tag nkthreadlist_tag;
++#endif
++
++union xnsched_policy_param;
++
++struct xnsched_class {
++	void (*sched_init)(struct xnsched *sched);
++	void (*sched_enqueue)(struct xnthread *thread);
++	void (*sched_dequeue)(struct xnthread *thread);
++	void (*sched_requeue)(struct xnthread *thread);
++	struct xnthread *(*sched_pick)(struct xnsched *sched);
++	void (*sched_tick)(struct xnsched *sched);
++	void (*sched_rotate)(struct xnsched *sched,
++			     const union xnsched_policy_param *p);
++	void (*sched_migrate)(struct xnthread *thread,
++			      struct xnsched *sched);
++	int (*sched_chkparam)(struct xnthread *thread,
++			      const union xnsched_policy_param *p);
++	/**
++	 * Set base scheduling parameters. This routine is indirectly
++	 * called upon a change of base scheduling settings through
++	 * __xnthread_set_schedparam() -> xnsched_set_policy(),
++	 * exclusively.
++	 *
++	 * The scheduling class implementation should do the necessary
++	 * housekeeping to comply with the new settings.
++	 * thread->base_class is up to date before the call is made,
++	 * and should be considered for the new weighted priority
++	 * calculation. On the contrary, thread->sched_class should
++	 * NOT be referred to by this handler.
++	 *
++	 * sched_setparam() is NEVER involved in PI or PP
++	 * management. However it must deny a priority update if it
++	 * contradicts an ongoing boost for @a thread. This is
++	 * typically what the xnsched_set_effective_priority() helper
++	 * does for such handler.
++	 *
++	 * @param thread Affected thread.
++	 * @param p New base policy settings.
++	 *
++	 * @return True if the effective priority was updated
++	 * (thread->cprio).
++	 */
++	bool (*sched_setparam)(struct xnthread *thread,
++			       const union xnsched_policy_param *p);
++	void (*sched_getparam)(struct xnthread *thread,
++			       union xnsched_policy_param *p);
++	void (*sched_trackprio)(struct xnthread *thread,
++				const union xnsched_policy_param *p);
++	void (*sched_protectprio)(struct xnthread *thread, int prio);
++	int (*sched_declare)(struct xnthread *thread,
++			     const union xnsched_policy_param *p);
++	void (*sched_forget)(struct xnthread *thread);
++	void (*sched_kick)(struct xnthread *thread);
++#ifdef CONFIG_XENO_OPT_VFILE
++	int (*sched_init_vfile)(struct xnsched_class *schedclass,
++				struct xnvfile_directory *vfroot);
++	void (*sched_cleanup_vfile)(struct xnsched_class *schedclass);
++#endif
++	int nthreads;
++	struct xnsched_class *next;
++	int weight;
++	int policy;
++	const char *name;
++};
++
++#define XNSCHED_CLASS_WEIGHT(n)		(n * XNSCHED_CLASS_WEIGHT_FACTOR)
++
++/* Placeholder for current thread priority */
++#define XNSCHED_RUNPRIO   0x80000000
++
++#define xnsched_for_each_thread(__thread)	\
++	list_for_each_entry(__thread, &nkthreadq, glink)
++
++#ifdef CONFIG_SMP
++static inline int xnsched_cpu(struct xnsched *sched)
++{
++	return sched->cpu;
++}
++#else /* !CONFIG_SMP */
++static inline int xnsched_cpu(struct xnsched *sched)
++{
++	return 0;
++}
++#endif /* CONFIG_SMP */
++
++static inline struct xnsched *xnsched_struct(int cpu)
++{
++	return &per_cpu(nksched, cpu);
++}
++
++static inline struct xnsched *xnsched_current(void)
++{
++	/* IRQs off */
++	return raw_cpu_ptr(&nksched);
++}
++
++static inline struct xnthread *xnsched_current_thread(void)
++{
++	return xnsched_current()->curr;
++}
++
++/* Test resched flag of given sched. */
++static inline int xnsched_resched_p(struct xnsched *sched)
++{
++	return sched->status & XNRESCHED;
++}
++
++/* Set self resched flag for the current scheduler. */
++static inline void xnsched_set_self_resched(struct xnsched *sched)
++{
++	sched->status |= XNRESCHED;
++}
++
++#define xnsched_realtime_domain  cobalt_pipeline.domain
++
++/* Set resched flag for the given scheduler. */
++#ifdef CONFIG_SMP
++
++static inline void xnsched_set_resched(struct xnsched *sched)
++{
++	struct xnsched *current_sched = xnsched_current();
++
++	if (current_sched == sched)
++		current_sched->status |= XNRESCHED;
++	else if (!xnsched_resched_p(sched)) {
++		cpumask_set_cpu(xnsched_cpu(sched), &current_sched->resched);
++		sched->status |= XNRESCHED;
++		current_sched->status |= XNRESCHED;
++	}
++}
++
++#define xnsched_realtime_cpus    cobalt_pipeline.supported_cpus
++
++static inline int xnsched_supported_cpu(int cpu)
++{
++	return cpumask_test_cpu(cpu, &xnsched_realtime_cpus);
++}
++
++static inline int xnsched_threading_cpu(int cpu)
++{
++	return cpumask_test_cpu(cpu, &cobalt_cpu_affinity);
++}
++
++#else /* !CONFIG_SMP */
++
++static inline void xnsched_set_resched(struct xnsched *sched)
++{
++	xnsched_set_self_resched(sched);
++}
++
++#define xnsched_realtime_cpus CPU_MASK_ALL
++
++static inline int xnsched_supported_cpu(int cpu)
++{
++	return 1;
++}
++
++static inline int xnsched_threading_cpu(int cpu)
++{
++	return 1;
++}
++
++#endif /* !CONFIG_SMP */
++
++#define for_each_realtime_cpu(cpu)		\
++	for_each_online_cpu(cpu)		\
++		if (xnsched_supported_cpu(cpu))	\
++
++int ___xnsched_run(struct xnsched *sched);
++
++void __xnsched_run_handler(void);
++
++static inline int __xnsched_run(struct xnsched *sched)
++{
++	/*
++	 * Reschedule if XNSCHED is pending, but never over an IRQ
++	 * handler or in the middle of unlocked context switch.
++	 */
++	if (((sched->status|sched->lflags) &
++	     (XNINIRQ|XNINSW|XNRESCHED)) != XNRESCHED)
++		return 0;
++
++	return ___xnsched_run(sched);
++}
++
++static inline int xnsched_run(void)
++{
++	struct xnsched *sched = xnsched_current();
++	/*
++	 * sched->curr is shared locklessly with ___xnsched_run().
++	 * READ_ONCE() makes sure the compiler never uses load tearing
++	 * for reading this pointer piecemeal, so that multiple stores
++	 * occurring concurrently on remote CPUs never yield a
++	 * spurious merged value on the local one.
++	 */
++	struct xnthread *curr = READ_ONCE(sched->curr);
++
++	/*
++	 * If running over the root thread, hard irqs must be off
++	 * (asserted out of line in ___xnsched_run()).
++	 */
++	return curr->lock_count > 0 ? 0 : __xnsched_run(sched);
++}
++
++void xnsched_lock(void);
++
++void xnsched_unlock(void);
++
++static inline int xnsched_interrupt_p(void)
++{
++	return xnsched_current()->lflags & XNINIRQ;
++}
++
++static inline int xnsched_root_p(void)
++{
++	return xnthread_test_state(xnsched_current_thread(), XNROOT);
++}
++
++static inline int xnsched_unblockable_p(void)
++{
++	return xnsched_interrupt_p() || xnsched_root_p();
++}
++
++static inline int xnsched_primary_p(void)
++{
++	return !xnsched_unblockable_p();
++}
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++
++struct xnsched *xnsched_finish_unlocked_switch(struct xnsched *sched);
++
++#define xnsched_resched_after_unlocked_switch() xnsched_run()
++
++static inline
++int xnsched_maybe_resched_after_unlocked_switch(struct xnsched *sched)
++{
++	return sched->status & XNRESCHED;
++}
++
++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++static inline struct xnsched *
++xnsched_finish_unlocked_switch(struct xnsched *sched)
++{
++	XENO_BUG_ON(COBALT, !hard_irqs_disabled());
++	return xnsched_current();
++}
++
++static inline void xnsched_resched_after_unlocked_switch(void) { }
++
++static inline int
++xnsched_maybe_resched_after_unlocked_switch(struct xnsched *sched)
++{
++	return 0;
++}
++
++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++bool xnsched_set_effective_priority(struct xnthread *thread,
++				    int prio);
++
++#include <cobalt/kernel/sched-idle.h>
++#include <cobalt/kernel/sched-rt.h>
++
++int xnsched_init_proc(void);
++
++void xnsched_cleanup_proc(void);
++
++void xnsched_register_classes(void);
++
++void xnsched_init_all(void);
++
++void xnsched_destroy_all(void);
++
++struct xnthread *xnsched_pick_next(struct xnsched *sched);
++
++void xnsched_putback(struct xnthread *thread);
++
++int xnsched_set_policy(struct xnthread *thread,
++		       struct xnsched_class *sched_class,
++		       const union xnsched_policy_param *p);
++
++void xnsched_track_policy(struct xnthread *thread,
++			  struct xnthread *target);
++
++void xnsched_protect_priority(struct xnthread *thread,
++			      int prio);
++
++void xnsched_migrate(struct xnthread *thread,
++		     struct xnsched *sched);
++
++void xnsched_migrate_passive(struct xnthread *thread,
++			     struct xnsched *sched);
++
++/**
++ * @fn void xnsched_rotate(struct xnsched *sched, struct xnsched_class *sched_class, const union xnsched_policy_param *sched_param)
++ * @brief Rotate a scheduler runqueue.
++ *
++ * The specified scheduling class is requested to rotate its runqueue
++ * for the given scheduler. Rotation is performed according to the
++ * scheduling parameter specified by @a sched_param.
++ *
++ * @note The nucleus supports round-robin scheduling for the members
++ * of the RT class.
++ *
++ * @param sched The per-CPU scheduler hosting the target scheduling
++ * class.
++ *
++ * @param sched_class The scheduling class which should rotate its
++ * runqueue.
++ *
++ * @param sched_param The scheduling parameter providing rotation
++ * information to the specified scheduling class.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++static inline void xnsched_rotate(struct xnsched *sched,
++				  struct xnsched_class *sched_class,
++				  const union xnsched_policy_param *sched_param)
++{
++	sched_class->sched_rotate(sched, sched_param);
++}
++
++static inline int xnsched_init_thread(struct xnthread *thread)
++{
++	int ret = 0;
++
++	xnsched_idle_init_thread(thread);
++	xnsched_rt_init_thread(thread);
++
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	ret = xnsched_tp_init_thread(thread);
++	if (ret)
++		return ret;
++#endif /* CONFIG_XENO_OPT_SCHED_TP */
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	ret = xnsched_sporadic_init_thread(thread);
++	if (ret)
++		return ret;
++#endif /* CONFIG_XENO_OPT_SCHED_SPORADIC */
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	ret = xnsched_quota_init_thread(thread);
++	if (ret)
++		return ret;
++#endif /* CONFIG_XENO_OPT_SCHED_QUOTA */
++
++	return ret;
++}
++
++static inline int xnsched_root_priority(struct xnsched *sched)
++{
++	return sched->rootcb.cprio;
++}
++
++static inline struct xnsched_class *xnsched_root_class(struct xnsched *sched)
++{
++	return sched->rootcb.sched_class;
++}
++
++static inline void xnsched_tick(struct xnsched *sched)
++{
++	struct xnthread *curr = sched->curr;
++	struct xnsched_class *sched_class = curr->sched_class;
++	/*
++	 * A thread that undergoes round-robin scheduling only
++	 * consumes its time slice when it runs within its own
++	 * scheduling class, which excludes temporary PI boosts, and
++	 * does not hold the scheduler lock.
++	 */
++	if (sched_class == curr->base_class &&
++	    sched_class->sched_tick &&
++	    xnthread_test_state(curr, XNTHREAD_BLOCK_BITS|XNRRB) == XNRRB &&
++		curr->lock_count == 0)
++		sched_class->sched_tick(sched);
++}
++
++static inline int xnsched_chkparam(struct xnsched_class *sched_class,
++				   struct xnthread *thread,
++				   const union xnsched_policy_param *p)
++{
++	if (sched_class->sched_chkparam)
++		return sched_class->sched_chkparam(thread, p);
++
++	return 0;
++}
++
++static inline int xnsched_declare(struct xnsched_class *sched_class,
++				  struct xnthread *thread,
++				  const union xnsched_policy_param *p)
++{
++	int ret;
++
++	if (sched_class->sched_declare) {
++		ret = sched_class->sched_declare(thread, p);
++		if (ret)
++			return ret;
++	}
++	if (sched_class != thread->base_class)
++		sched_class->nthreads++;
++
++	return 0;
++}
++
++static inline int xnsched_calc_wprio(struct xnsched_class *sched_class,
++				     int prio)
++{
++	return prio + sched_class->weight;
++}
++
++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES
++
++static inline void xnsched_enqueue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		sched_class->sched_enqueue(thread);
++}
++
++static inline void xnsched_dequeue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		sched_class->sched_dequeue(thread);
++}
++
++static inline void xnsched_requeue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		sched_class->sched_requeue(thread);
++}
++
++static inline
++bool xnsched_setparam(struct xnthread *thread,
++		      const union xnsched_policy_param *p)
++{
++	return thread->base_class->sched_setparam(thread, p);
++}
++
++static inline void xnsched_getparam(struct xnthread *thread,
++				    union xnsched_policy_param *p)
++{
++	thread->sched_class->sched_getparam(thread, p);
++}
++
++static inline void xnsched_trackprio(struct xnthread *thread,
++				     const union xnsched_policy_param *p)
++{
++	thread->sched_class->sched_trackprio(thread, p);
++	thread->wprio = xnsched_calc_wprio(thread->sched_class, thread->cprio);
++}
++
++static inline void xnsched_protectprio(struct xnthread *thread, int prio)
++{
++	thread->sched_class->sched_protectprio(thread, prio);
++	thread->wprio = xnsched_calc_wprio(thread->sched_class, thread->cprio);
++}
++
++static inline void xnsched_forget(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->base_class;
++
++	--sched_class->nthreads;
++
++	if (sched_class->sched_forget)
++		sched_class->sched_forget(thread);
++}
++
++static inline void xnsched_kick(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->base_class;
++
++	xnthread_set_info(thread, XNKICKED);
++
++	if (sched_class->sched_kick)
++		sched_class->sched_kick(thread);
++
++	xnsched_set_resched(thread->sched);
++}
++
++#else /* !CONFIG_XENO_OPT_SCHED_CLASSES */
++
++/*
++ * If only the RT and IDLE scheduling classes are compiled in, we can
++ * fully inline common helpers for dealing with those.
++ */
++
++static inline void xnsched_enqueue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		__xnsched_rt_enqueue(thread);
++}
++
++static inline void xnsched_dequeue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		__xnsched_rt_dequeue(thread);
++}
++
++static inline void xnsched_requeue(struct xnthread *thread)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class != &xnsched_class_idle)
++		__xnsched_rt_requeue(thread);
++}
++
++static inline bool xnsched_setparam(struct xnthread *thread,
++				    const union xnsched_policy_param *p)
++{
++	struct xnsched_class *sched_class = thread->base_class;
++
++	if (sched_class == &xnsched_class_idle)
++		return __xnsched_idle_setparam(thread, p);
++
++	return __xnsched_rt_setparam(thread, p);
++}
++
++static inline void xnsched_getparam(struct xnthread *thread,
++				    union xnsched_policy_param *p)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class == &xnsched_class_idle)
++		__xnsched_idle_getparam(thread, p);
++	else
++		__xnsched_rt_getparam(thread, p);
++}
++
++static inline void xnsched_trackprio(struct xnthread *thread,
++				     const union xnsched_policy_param *p)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class == &xnsched_class_idle)
++		__xnsched_idle_trackprio(thread, p);
++	else
++		__xnsched_rt_trackprio(thread, p);
++
++	thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio);
++}
++
++static inline void xnsched_protectprio(struct xnthread *thread, int prio)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (sched_class == &xnsched_class_idle)
++		__xnsched_idle_protectprio(thread, prio);
++	else
++		__xnsched_rt_protectprio(thread, prio);
++
++	thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio);
++}
++
++static inline void xnsched_forget(struct xnthread *thread)
++{
++	--thread->base_class->nthreads;
++	__xnsched_rt_forget(thread);
++}
++
++static inline void xnsched_kick(struct xnthread *thread)
++{
++	xnthread_set_info(thread, XNKICKED);
++	xnsched_set_resched(thread->sched);
++}
++
++#endif /* !CONFIG_XENO_OPT_SCHED_CLASSES */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_H */
+--- linux/include/xenomai/cobalt/kernel/sched-sporadic.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/xenomai/cobalt/kernel/sched-sporadic.h	2021-04-07 16:01:28.053632986 +0800
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_KERNEL_SCHED_SPORADIC_H
++#define _COBALT_KERNEL_SCHED_SPORADIC_H
++
++#ifndef _COBALT_KERNEL_SCHED_H
++#error "please don't include cobalt/kernel/sched-sporadic.h directly"
++#endif
++
++/**
++ * @addtogroup cobalt_core_sched
++ * @{
++ */
++
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++
++#define XNSCHED_SPORADIC_MIN_PRIO	1
++#define XNSCHED_SPORADIC_MAX_PRIO	255
++#define XNSCHED_SPORADIC_NR_PRIO	\
++	(XNSCHED_SPORADIC_MAX_PRIO - XNSCHED_SPORADIC_MIN_PRIO + 1)
++
++extern struct xnsched_class xnsched_class_sporadic;
++
++struct xnsched_sporadic_repl {
++	xnticks_t date;
++	xnticks_t amount;
++};
++
++struct xnsched_sporadic_data {
++	xnticks_t resume_date;
++	xnticks_t budget;
++	int repl_in;
++	int repl_out;
++	int repl_pending;
++	struct xntimer repl_timer;
++	struct xntimer drop_timer;
++	struct xnsched_sporadic_repl repl_data[CONFIG_XENO_OPT_SCHED_SPORADIC_MAXREPL];
++	struct xnsched_sporadic_param param;
++	struct xnthread *thread;
++};
++
++struct xnsched_sporadic {
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	unsigned long drop_retries;
++#endif
++};
++
++static inline int xnsched_sporadic_init_thread(struct xnthread *thread)
++{
++	thread->pss = NULL;
++
++	return 0;
++}
++
++#endif /* !CONFIG_XENO_OPT_SCHED_SPORADIC */
++
++/** @} */
++
++#endif /* !_COBALT_KERNEL_SCHED_SPORADIC_H */
+--- linux/include/linux/xenomai/wrappers.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/linux/xenomai/wrappers.h	2021-04-07 16:01:25.949635992 +0800
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (C) 2017 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_LINUX_WRAPPERS_H
++#define _COBALT_LINUX_WRAPPERS_H
++
++#include <linux/version.h>
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0)
++#include <linux/sched.h>
++#include <linux/sched/rt.h>
++
++#define cobalt_set_task_state(tsk, state_value)	\
++	set_task_state(tsk, state_value)
++#else
++#include <linux/sched.h>
++#include <linux/sched/signal.h>
++#include <linux/sched/rt.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/task_stack.h>
++#include <uapi/linux/sched/types.h>
++/*
++ * The co-kernel can still do this sanely for a thread which is
++ * currently active on the head stage.
++ */
++#define cobalt_set_task_state(tsk, state_value)	\
++		smp_store_mb((tsk)->state, (state_value))
++#endif
++
++#include <linux/ipipe.h>
++
++#ifndef ipipe_root_nr_syscalls
++#define ipipe_root_nr_syscalls(ti)	NR_syscalls
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0)
++typedef siginfo_t kernel_siginfo_t;
++#endif
++
++#endif /* !_COBALT_LINUX_WRAPPERS_H */
+--- linux/include/asm-generic/xenomai/pci_ids.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/pci_ids.h	2021-04-07 16:01:25.942636002 +0800
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (C) 2009 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_PCI_IDS_H
++#define _COBALT_ASM_GENERIC_PCI_IDS_H
++
++#include <linux/pci_ids.h>
++
++/* SMI */
++#ifndef PCI_DEVICE_ID_INTEL_ESB2_0
++#define PCI_DEVICE_ID_INTEL_ESB2_0 0x2670
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH7_0
++#define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH7_1
++#define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH8_4
++#define PCI_DEVICE_ID_INTEL_ICH8_4 0x2815
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH9_1
++#define PCI_DEVICE_ID_INTEL_ICH9_1 0x2917
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH9_5
++#define PCI_DEVICE_ID_INTEL_ICH9_5 0x2919
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_ICH10_1
++#define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16
++#endif
++#ifndef PCI_DEVICE_ID_INTEL_PCH_LPC_MIN
++#define PCI_DEVICE_ID_INTEL_PCH_LPC_MIN 0x3b00
++#endif
++
++/* RTCAN */
++#ifndef PCI_VENDOR_ID_ESDGMBH
++#define PCI_VENDOR_ID_ESDGMBH 0x12fe
++#endif
++#ifndef PCI_DEVICE_ID_PLX_9030
++#define PCI_DEVICE_ID_PLX_9030 0x9030
++#endif
++#ifndef PCI_DEVICE_ID_PLX_9056
++#define PCI_DEVICE_ID_PLX_9056 0x9056
++#endif
++
++#endif /* _COBALT_ASM_GENERIC_PCI_IDS_H */
+--- linux/include/asm-generic/xenomai/machine.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/machine.h	2021-04-07 16:01:25.938636008 +0800
+@@ -0,0 +1,77 @@
++/**
++ *   Copyright &copy; 2012 Philippe Gerum.
++ *
++ *   Xenomai is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_MACHINE_H
++#define _COBALT_ASM_GENERIC_MACHINE_H
++
++#include <linux/ipipe.h>
++#include <linux/percpu.h>
++#include <asm/byteorder.h>
++#include <asm/xenomai/wrappers.h>
++
++struct vm_area_struct;
++
++struct cobalt_machine {
++	const char *name;
++	int (*init)(void);
++	int (*late_init)(void);
++	void (*cleanup)(void);
++	void (*prefault)(struct vm_area_struct *vma);
++	unsigned long (*calibrate)(void);
++	const char *const *fault_labels;
++};
++
++extern struct cobalt_machine cobalt_machine;
++
++struct cobalt_machine_cpudata {
++	unsigned long apc_pending;
++	unsigned long apc_shots[BITS_PER_LONG];
++	unsigned int faults[IPIPE_NR_FAULTS];
++};
++
++DECLARE_PER_CPU(struct cobalt_machine_cpudata, cobalt_machine_cpudata);
++
++struct cobalt_pipeline {
++	struct ipipe_domain domain;
++	unsigned long timer_freq;
++	unsigned long clock_freq;
++	unsigned int apc_virq;
++	unsigned long apc_map;
++	unsigned int escalate_virq;
++	struct {
++		void (*handler)(void *cookie);
++		void *cookie;
++		const char *name;
++	} apc_table[BITS_PER_LONG];
++#ifdef CONFIG_SMP
++	cpumask_t supported_cpus;
++#endif
++};
++
++extern struct cobalt_pipeline cobalt_pipeline;
++
++static inline unsigned long xnarch_timer_calibrate(void)
++{
++	return cobalt_machine.calibrate();
++}
++
++#ifndef xnarch_cache_aliasing
++#define xnarch_cache_aliasing()  0
++#endif
++
++#endif /* !_COBALT_ASM_GENERIC_MACHINE_H */
+--- linux/include/asm-generic/xenomai/syscall32.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/syscall32.h	2021-04-07 16:01:25.933636015 +0800
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_SYSCALL32_H
++#define _COBALT_ASM_GENERIC_SYSCALL32_H
++
++#define __COBALT_CALL32_INITHAND(__handler)
++
++#define __COBALT_CALL32_INITMODE(__mode)
++
++#define __COBALT_CALL32_ENTRY(__name, __handler)
++
++#define __COBALT_CALL_COMPAT(__reg)	0
++
++#endif /* !_COBALT_ASM_GENERIC_SYSCALL32_H */
+--- linux/include/asm-generic/xenomai/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/syscall.h	2021-04-07 16:01:25.928636022 +0800
+@@ -0,0 +1,89 @@
++/*
++ * Copyright (C) 2001,2002,2003,2004,2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_SYSCALL_H
++#define _COBALT_ASM_GENERIC_SYSCALL_H
++
++#include <linux/types.h>
++#include <linux/version.h>
++#include <linux/uaccess.h>
++#include <asm/xenomai/features.h>
++#include <asm/xenomai/wrappers.h>
++#include <asm/xenomai/machine.h>
++#include <cobalt/uapi/asm-generic/syscall.h>
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
++#define access_rok(addr, size)	access_ok((addr), (size))
++#define access_wok(addr, size)	access_ok((addr), (size))
++#else
++#define access_rok(addr, size)	access_ok(VERIFY_READ, (addr), (size))
++#define access_wok(addr, size)	access_ok(VERIFY_WRITE, (addr), (size))
++#endif
++
++#define __xn_reg_arglist(regs)	\
++	__xn_reg_arg1(regs),	\
++	__xn_reg_arg2(regs),	\
++	__xn_reg_arg3(regs),	\
++	__xn_reg_arg4(regs),	\
++	__xn_reg_arg5(regs)
++
++#define __xn_copy_from_user(dstP, srcP, n)	raw_copy_from_user(dstP, srcP, n)
++#define __xn_copy_to_user(dstP, srcP, n)	raw_copy_to_user(dstP, srcP, n)
++#define __xn_put_user(src, dstP)		__put_user(src, dstP)
++#define __xn_get_user(dst, srcP)		__get_user(dst, srcP)
++#define __xn_strncpy_from_user(dstP, srcP, n)	strncpy_from_user(dstP, srcP, n)
++
++static inline int cobalt_copy_from_user(void *dst, const void __user *src,
++					size_t size)
++{
++	size_t remaining = size;
++
++	if (likely(access_rok(src, size)))
++		remaining = __xn_copy_from_user(dst, src, size);
++
++	if (unlikely(remaining > 0)) {
++		memset(dst + (size - remaining), 0, remaining);
++		return -EFAULT;
++	}
++	return 0;
++}
++
++static inline int cobalt_copy_to_user(void __user *dst, const void *src,
++				      size_t size)
++{
++	if (unlikely(!access_wok(dst, size) ||
++	    __xn_copy_to_user(dst, src, size)))
++		return -EFAULT;
++	return 0;
++}
++
++static inline int cobalt_strncpy_from_user(char *dst, const char __user *src,
++					   size_t count)
++{
++	if (unlikely(!access_rok(src, 1)))
++		return -EFAULT;
++
++	return __xn_strncpy_from_user(dst, src, count);
++}
++
++/* 32bit syscall emulation */
++#define __COBALT_COMPAT_BIT	0x1
++/* 32bit syscall emulation - extended form */
++#define __COBALT_COMPATX_BIT	0x2
++
++#endif /* !_COBALT_ASM_GENERIC_SYSCALL_H */
+--- linux/include/asm-generic/xenomai/wrappers.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/wrappers.h	2021-04-07 16:01:25.924636028 +0800
+@@ -0,0 +1,180 @@
++/*
++ * Copyright (C) 2005-2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_WRAPPERS_H
++
++#include <linux/xenomai/wrappers.h>
++
++#ifdef CONFIG_IPIPE_LEGACY
++#error "CONFIG_IPIPE_LEGACY must be switched off"
++#endif
++
++#define COBALT_BACKPORT(__sym) __cobalt_backport_ ##__sym
++
++/*
++ * To keep the #ifdefery as readable as possible, please:
++ *
++ * - keep the conditional structure flat, no nesting (e.g. do not fold
++ *   the pre-3.11 conditions into the pre-3.14 ones).
++ * - group all wrappers for a single kernel revision.
++ * - list conditional blocks in order of kernel release, latest first
++ * - identify the first kernel release for which the wrapper should
++ *   be defined, instead of testing the existence of a preprocessor
++ *   symbol, so that obsolete wrappers can be spotted.
++ */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
++#define get_compat_sigset(set, compat)					\
++({									\
++	compat_sigset_t set32;						\
++	int ret;							\
++									\
++	ret = cobalt_copy_from_user(&set32, compat, sizeof(compat_sigset_t)); \
++	if (!ret)							\
++		sigset_from_compat(set, &set32);			\
++	ret;								\
++})
++
++#define put_compat_sigset(compat, set, size)				\
++({									\
++	compat_sigset_t set32;						\
++									\
++	sigset_to_compat(&set32, set);					\
++	cobalt_copy_to_user(compat, &set32, size);			\
++})
++#endif /* < 4.15 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,11,0)
++#define raw_copy_to_user(__to, __from, __n)	__copy_to_user_inatomic(__to, __from, __n)
++#define raw_copy_from_user(__to, __from, __n)	__copy_from_user_inatomic(__to, __from, __n)
++#define raw_put_user(__from, __to)		__put_user_inatomic(__from, __to)
++#define raw_get_user(__to, __from)		__get_user_inatomic(__to, __from)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
++#define in_ia32_syscall() (current_thread_info()->status & TS_COMPAT)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
++#define cobalt_gpiochip_dev(__gc)	((__gc)->dev)
++#else
++#define cobalt_gpiochip_dev(__gc)	((__gc)->parent)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
++#define cobalt_get_restart_block(p)	(&task_thread_info(p)->restart_block)
++#else
++#define cobalt_get_restart_block(p)	(&(p)->restart_block)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
++#define user_msghdr msghdr
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,17,0)
++#include <linux/netdevice.h>
++
++#undef alloc_netdev
++#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
++	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
++ 
++#include <linux/trace_seq.h>
++
++static inline unsigned char *
++trace_seq_buffer_ptr(struct trace_seq *s)
++{
++	return s->buffer + s->len;
++}
++
++#endif /* < 3.17 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
++#define smp_mb__before_atomic()  smp_mb()
++#define smp_mb__after_atomic()   smp_mb()
++#endif /* < 3.16 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0)
++#define raw_cpu_ptr(v)	__this_cpu_ptr(v)
++#endif /* < 3.15 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
++#include <linux/pci.h>
++
++#ifdef CONFIG_PCI
++#define pci_enable_msix_range COBALT_BACKPORT(pci_enable_msix_range)
++#ifdef CONFIG_PCI_MSI
++int pci_enable_msix_range(struct pci_dev *dev,
++			  struct msix_entry *entries,
++			  int minvec, int maxvec);
++#else /* !CONFIG_PCI_MSI */
++static inline
++int pci_enable_msix_range(struct pci_dev *dev,
++			  struct msix_entry *entries,
++			  int minvec, int maxvec)
++{
++	return -ENOSYS;
++}
++#endif /* !CONFIG_PCI_MSI */
++#endif /* CONFIG_PCI */
++#endif /* < 3.14 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
++#include <linux/dma-mapping.h>
++#include <linux/hwmon.h>
++
++#define dma_set_mask_and_coherent COBALT_BACKPORT(dma_set_mask_and_coherent)
++static inline
++int dma_set_mask_and_coherent(struct device *dev, u64 mask)
++{
++	int rc = dma_set_mask(dev, mask);
++	if (rc == 0)
++		dma_set_coherent_mask(dev, mask);
++	return rc;
++}
++
++#ifdef CONFIG_HWMON
++#define hwmon_device_register_with_groups \
++	COBALT_BACKPORT(hwmon_device_register_with_groups)
++struct device *
++hwmon_device_register_with_groups(struct device *dev, const char *name,
++				void *drvdata,
++				const struct attribute_group **groups);
++
++#define devm_hwmon_device_register_with_groups \
++	COBALT_BACKPORT(devm_hwmon_device_register_with_groups)
++struct device *
++devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
++				void *drvdata,
++				const struct attribute_group **groups);
++#endif /* !CONFIG_HWMON */
++
++#define reinit_completion(__x)	INIT_COMPLETION(*(__x))
++
++#endif /* < 3.13 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
++#define DEVICE_ATTR_RW(_name)	__ATTR_RW(_name)
++#define DEVICE_ATTR_RO(_name)	__ATTR_RO(_name)
++#define DEVICE_ATTR_WO(_name)	__ATTR_WO(_name)
++#endif /* < 3.11 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
++#error "Xenomai/cobalt requires Linux kernel 3.10 or above"
++#endif /* < 3.10 */
++
++#endif /* _COBALT_ASM_GENERIC_WRAPPERS_H */
+--- linux/include/asm-generic/xenomai/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/asm-generic/xenomai/thread.h	2021-04-07 16:01:25.919636035 +0800
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_ASM_GENERIC_THREAD_H
++#define _COBALT_ASM_GENERIC_THREAD_H
++
++#include <asm/ptrace.h>
++#include <asm/processor.h>
++
++struct task_struct;
++
++struct xnthread;
++struct xnarchtcb;
++
++struct xntcb {
++	struct task_struct *host_task;
++	struct thread_struct *tsp;
++	struct mm_struct *mm;
++	struct mm_struct *active_mm;
++	struct thread_struct ts;
++	struct thread_info *tip;
++#ifdef CONFIG_XENO_ARCH_FPU
++	struct task_struct *user_fpu_owner;
++#endif
++};
++
++#endif /* !_COBALT_ASM_GENERIC_THREAD_H */
+--- linux/include/trace/events/cobalt-posix.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/trace/events/cobalt-posix.h	2021-04-07 16:01:25.912636045 +0800
+@@ -0,0 +1,1180 @@
++/*
++ * Copyright (C) 2014 Jan Kiszka <jan.kiszka@siemens.com>.
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM cobalt_posix
++
++#if !defined(_TRACE_COBALT_POSIX_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_COBALT_POSIX_H
++
++#include <linux/tracepoint.h>
++#include <linux/trace_seq.h>
++#include <xenomai/posix/cond.h>
++#include <xenomai/posix/mqueue.h>
++#include <xenomai/posix/event.h>
++
++#define __timespec_fields(__name)				\
++	__field(__kernel_time_t, tv_sec_##__name)		\
++	__field(long, tv_nsec_##__name)
++
++#define __assign_timespec(__to, __from)				\
++	do {							\
++		__entry->tv_sec_##__to = (__from)->tv_sec;	\
++		__entry->tv_nsec_##__to = (__from)->tv_nsec;	\
++	} while (0)
++
++#define __timespec_args(__name)					\
++	__entry->tv_sec_##__name, __entry->tv_nsec_##__name
++
++#ifdef CONFIG_X86_X32
++#define __sc_x32(__name)	, { sc_cobalt_##__name + __COBALT_X32_BASE, "x32-" #__name }
++#else
++#define __sc_x32(__name)
++#endif
++
++#ifdef CONFIG_IA32_EMULATION
++#define __sc_compat(__name)	, { sc_cobalt_##__name + __COBALT_IA32_BASE, "compat-" #__name }
++#else
++#define __sc_compat(__name)
++#endif
++
++#define __cobalt_symbolic_syscall(__name)				\
++	{ sc_cobalt_##__name, #__name }					\
++	__sc_x32(__name)						\
++	__sc_compat(__name)						\
++
++#define __cobalt_syscall_name(__nr)					\
++	__print_symbolic((__nr),					\
++		__cobalt_symbolic_syscall(bind),			\
++		__cobalt_symbolic_syscall(thread_create),		\
++		__cobalt_symbolic_syscall(thread_getpid),		\
++		__cobalt_symbolic_syscall(thread_setmode),		\
++		__cobalt_symbolic_syscall(thread_setname),		\
++		__cobalt_symbolic_syscall(thread_join),			\
++		__cobalt_symbolic_syscall(thread_kill),			\
++		__cobalt_symbolic_syscall(thread_setschedparam_ex),	\
++		__cobalt_symbolic_syscall(thread_getschedparam_ex),	\
++		__cobalt_symbolic_syscall(thread_setschedprio),		\
++		__cobalt_symbolic_syscall(thread_getstat),		\
++		__cobalt_symbolic_syscall(sem_init),			\
++		__cobalt_symbolic_syscall(sem_destroy),			\
++		__cobalt_symbolic_syscall(sem_post),			\
++		__cobalt_symbolic_syscall(sem_wait),			\
++		__cobalt_symbolic_syscall(sem_trywait),			\
++		__cobalt_symbolic_syscall(sem_getvalue),		\
++		__cobalt_symbolic_syscall(sem_open),			\
++		__cobalt_symbolic_syscall(sem_close),			\
++		__cobalt_symbolic_syscall(sem_unlink),			\
++		__cobalt_symbolic_syscall(sem_timedwait),		\
++		__cobalt_symbolic_syscall(sem_inquire),			\
++		__cobalt_symbolic_syscall(sem_broadcast_np),		\
++		__cobalt_symbolic_syscall(clock_getres),		\
++		__cobalt_symbolic_syscall(clock_gettime),		\
++		__cobalt_symbolic_syscall(clock_settime),		\
++		__cobalt_symbolic_syscall(clock_nanosleep),		\
++		__cobalt_symbolic_syscall(mutex_init),			\
++		__cobalt_symbolic_syscall(mutex_check_init),		\
++		__cobalt_symbolic_syscall(mutex_destroy),		\
++		__cobalt_symbolic_syscall(mutex_lock),			\
++		__cobalt_symbolic_syscall(mutex_timedlock),		\
++		__cobalt_symbolic_syscall(mutex_trylock),		\
++		__cobalt_symbolic_syscall(mutex_unlock),		\
++		__cobalt_symbolic_syscall(cond_init),			\
++		__cobalt_symbolic_syscall(cond_destroy),		\
++		__cobalt_symbolic_syscall(cond_wait_prologue),		\
++		__cobalt_symbolic_syscall(cond_wait_epilogue),		\
++		__cobalt_symbolic_syscall(mq_open),			\
++		__cobalt_symbolic_syscall(mq_close),			\
++		__cobalt_symbolic_syscall(mq_unlink),			\
++		__cobalt_symbolic_syscall(mq_getattr),			\
++		__cobalt_symbolic_syscall(mq_timedsend),		\
++		__cobalt_symbolic_syscall(mq_timedreceive),		\
++		__cobalt_symbolic_syscall(mq_notify),			\
++		__cobalt_symbolic_syscall(sched_minprio),		\
++		__cobalt_symbolic_syscall(sched_maxprio),		\
++		__cobalt_symbolic_syscall(sched_weightprio),		\
++		__cobalt_symbolic_syscall(sched_yield),			\
++		__cobalt_symbolic_syscall(sched_setscheduler_ex),	\
++		__cobalt_symbolic_syscall(sched_getscheduler_ex),	\
++		__cobalt_symbolic_syscall(sched_setconfig_np),		\
++		__cobalt_symbolic_syscall(sched_getconfig_np),		\
++		__cobalt_symbolic_syscall(timer_create),		\
++		__cobalt_symbolic_syscall(timer_delete),		\
++		__cobalt_symbolic_syscall(timer_settime),		\
++		__cobalt_symbolic_syscall(timer_gettime),		\
++		__cobalt_symbolic_syscall(timer_getoverrun),		\
++		__cobalt_symbolic_syscall(timerfd_create),		\
++		__cobalt_symbolic_syscall(timerfd_settime),		\
++		__cobalt_symbolic_syscall(timerfd_gettime),		\
++		__cobalt_symbolic_syscall(sigwait),			\
++		__cobalt_symbolic_syscall(sigwaitinfo),			\
++		__cobalt_symbolic_syscall(sigtimedwait),		\
++		__cobalt_symbolic_syscall(sigpending),			\
++		__cobalt_symbolic_syscall(kill),			\
++		__cobalt_symbolic_syscall(sigqueue),			\
++		__cobalt_symbolic_syscall(monitor_init),		\
++		__cobalt_symbolic_syscall(monitor_destroy),		\
++		__cobalt_symbolic_syscall(monitor_enter),		\
++		__cobalt_symbolic_syscall(monitor_wait),		\
++		__cobalt_symbolic_syscall(monitor_sync),		\
++		__cobalt_symbolic_syscall(monitor_exit),		\
++		__cobalt_symbolic_syscall(event_init),			\
++		__cobalt_symbolic_syscall(event_wait),			\
++		__cobalt_symbolic_syscall(event_sync),			\
++		__cobalt_symbolic_syscall(event_destroy),		\
++		__cobalt_symbolic_syscall(event_inquire),		\
++		__cobalt_symbolic_syscall(open),			\
++		__cobalt_symbolic_syscall(socket),			\
++		__cobalt_symbolic_syscall(close),			\
++		__cobalt_symbolic_syscall(ioctl),			\
++		__cobalt_symbolic_syscall(read),			\
++		__cobalt_symbolic_syscall(write),			\
++		__cobalt_symbolic_syscall(recvmsg),			\
++		__cobalt_symbolic_syscall(sendmsg),			\
++		__cobalt_symbolic_syscall(mmap),			\
++		__cobalt_symbolic_syscall(select),			\
++		__cobalt_symbolic_syscall(fcntl),			\
++		__cobalt_symbolic_syscall(migrate),			\
++		__cobalt_symbolic_syscall(archcall),			\
++		__cobalt_symbolic_syscall(trace),			\
++		__cobalt_symbolic_syscall(corectl),			\
++		__cobalt_symbolic_syscall(get_current),			\
++		__cobalt_symbolic_syscall(backtrace),			\
++		__cobalt_symbolic_syscall(serialdbg),			\
++		__cobalt_symbolic_syscall(extend),			\
++		__cobalt_symbolic_syscall(ftrace_puts),			\
++		__cobalt_symbolic_syscall(recvmmsg),			\
++		__cobalt_symbolic_syscall(sendmmsg),			\
++		__cobalt_symbolic_syscall(clock_adjtime))
++
++DECLARE_EVENT_CLASS(syscall_entry,
++	TP_PROTO(unsigned int nr),
++	TP_ARGS(nr),
++
++	TP_STRUCT__entry(
++		__field(unsigned int, nr)
++	),
++
++	TP_fast_assign(
++		__entry->nr = nr;
++	),
++
++	TP_printk("syscall=%s", __cobalt_syscall_name(__entry->nr))
++);
++
++DECLARE_EVENT_CLASS(syscall_exit,
++	TP_PROTO(long result),
++	TP_ARGS(result),
++
++	TP_STRUCT__entry(
++		__field(long, result)
++	),
++
++	TP_fast_assign(
++		__entry->result = result;
++	),
++
++	TP_printk("result=%ld", __entry->result)
++);
++
++#define cobalt_print_sched_policy(__policy)			\
++	__print_symbolic(__policy,				\
++			 {SCHED_NORMAL, "normal"},		\
++			 {SCHED_FIFO, "fifo"},			\
++			 {SCHED_RR, "rr"},			\
++			 {SCHED_TP, "tp"},			\
++			 {SCHED_QUOTA, "quota"},		\
++			 {SCHED_SPORADIC, "sporadic"},		\
++			 {SCHED_COBALT, "cobalt"},		\
++			 {SCHED_WEAK, "weak"})
++
++const char *cobalt_trace_parse_sched_params(struct trace_seq *, int,
++					    struct sched_param_ex *);
++
++#define __parse_sched_params(policy, params)			\
++	cobalt_trace_parse_sched_params(p, policy,		\
++					(struct sched_param_ex *)(params))
++
++DECLARE_EVENT_CLASS(cobalt_posix_schedparam,
++	TP_PROTO(unsigned long pth, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pth, policy, param_ex),
++
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__field(int, policy)
++		__dynamic_array(char, param_ex, sizeof(struct sched_param_ex))
++	),
++
++	TP_fast_assign(
++		__entry->pth = pth;
++		__entry->policy = policy;
++		memcpy(__get_dynamic_array(param_ex), param_ex, sizeof(*param_ex));
++	),
++
++	TP_printk("pth=%p policy=%s param={ %s }",
++		  (void *)__entry->pth,
++		  cobalt_print_sched_policy(__entry->policy),
++		  __parse_sched_params(__entry->policy,
++				       __get_dynamic_array(param_ex))
++	)
++);
++
++DECLARE_EVENT_CLASS(cobalt_posix_scheduler,
++	TP_PROTO(pid_t pid, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pid, policy, param_ex),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__field(int, policy)
++		__dynamic_array(char, param_ex, sizeof(struct sched_param_ex))
++	),
++
++	TP_fast_assign(
++		__entry->pid = pid;
++		__entry->policy = policy;
++		memcpy(__get_dynamic_array(param_ex), param_ex, sizeof(*param_ex));
++	),
++
++	TP_printk("pid=%d policy=%s param={ %s }",
++		  __entry->pid,
++		  cobalt_print_sched_policy(__entry->policy),
++		  __parse_sched_params(__entry->policy,
++				       __get_dynamic_array(param_ex))
++	)
++);
++
++DECLARE_EVENT_CLASS(cobalt_void,
++	TP_PROTO(int dummy),
++	TP_ARGS(dummy),
++	TP_STRUCT__entry(
++		__field(int, dummy)
++	),
++	TP_fast_assign(
++		(void)dummy;
++	),
++	TP_printk("%s", "")
++);
++
++DEFINE_EVENT(syscall_entry, cobalt_head_sysentry,
++	TP_PROTO(unsigned int nr),
++	TP_ARGS(nr)
++);
++
++DEFINE_EVENT(syscall_exit, cobalt_head_sysexit,
++	TP_PROTO(long result),
++	TP_ARGS(result)
++);
++
++DEFINE_EVENT(syscall_entry, cobalt_root_sysentry,
++	TP_PROTO(unsigned int nr),
++	TP_ARGS(nr)
++);
++
++DEFINE_EVENT(syscall_exit, cobalt_root_sysexit,
++	TP_PROTO(long result),
++	TP_ARGS(result)
++);
++
++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_create,
++	TP_PROTO(unsigned long pth, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pth, policy, param_ex)
++);
++
++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_setschedparam,
++	TP_PROTO(unsigned long pth, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pth, policy, param_ex)
++);
++
++DEFINE_EVENT(cobalt_posix_schedparam, cobalt_pthread_getschedparam,
++	TP_PROTO(unsigned long pth, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pth, policy, param_ex)
++);
++
++TRACE_EVENT(cobalt_pthread_setschedprio,
++	TP_PROTO(unsigned long pth, int prio),
++	TP_ARGS(pth, prio),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__field(int, prio)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++		__entry->prio = prio;
++	),
++	TP_printk("pth=%p prio=%d", (void *)__entry->pth, __entry->prio)
++);
++
++#define cobalt_print_thread_mode(__mode)			\
++	__print_flags(__mode, "|",				\
++		      {PTHREAD_WARNSW, "warnsw"},		\
++		      {PTHREAD_LOCK_SCHED, "lock"},		\
++		      {PTHREAD_DISABLE_LOCKBREAK, "nolockbreak"})
++
++TRACE_EVENT(cobalt_pthread_setmode,
++	TP_PROTO(int clrmask, int setmask),
++	TP_ARGS(clrmask, setmask),
++	TP_STRUCT__entry(
++		__field(int, clrmask)
++		__field(int, setmask)
++	),
++	TP_fast_assign(
++		__entry->clrmask = clrmask;
++		__entry->setmask = setmask;
++	),
++	TP_printk("clrmask=%#x(%s) setmask=%#x(%s)",
++		  __entry->clrmask, cobalt_print_thread_mode(__entry->clrmask),
++		  __entry->setmask, cobalt_print_thread_mode(__entry->setmask))
++);
++
++TRACE_EVENT(cobalt_pthread_setname,
++	TP_PROTO(unsigned long pth, const char *name),
++	TP_ARGS(pth, name),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__string(name, name)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++		__assign_str(name, name);
++	),
++	TP_printk("pth=%p name=%s", (void *)__entry->pth, __get_str(name))
++);
++
++DECLARE_EVENT_CLASS(cobalt_posix_pid,
++	TP_PROTO(pid_t pid),
++	TP_ARGS(pid),
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++	),
++	TP_fast_assign(
++		__entry->pid = pid;
++	),
++	TP_printk("pid=%d", __entry->pid)
++);
++
++DEFINE_EVENT(cobalt_posix_pid, cobalt_pthread_stat,
++	TP_PROTO(pid_t pid),
++	TP_ARGS(pid)
++);
++
++TRACE_EVENT(cobalt_pthread_kill,
++	TP_PROTO(unsigned long pth, int sig),
++	TP_ARGS(pth, sig),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__field(int, sig)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++		__entry->sig = sig;
++	),
++	TP_printk("pth=%p sig=%d", (void *)__entry->pth, __entry->sig)
++);
++
++TRACE_EVENT(cobalt_pthread_join,
++	TP_PROTO(unsigned long pth),
++	TP_ARGS(pth),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++	),
++	TP_printk("pth=%p", (void *)__entry->pth)
++);
++
++TRACE_EVENT(cobalt_pthread_pid,
++	TP_PROTO(unsigned long pth),
++	TP_ARGS(pth),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++	),
++	TP_printk("pth=%p", (void *)__entry->pth)
++);
++
++TRACE_EVENT(cobalt_pthread_extend,
++	TP_PROTO(unsigned long pth, const char *name),
++	TP_ARGS(pth, name),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__string(name, name)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++		__assign_str(name, name);
++	),
++	TP_printk("pth=%p +personality=%s", (void *)__entry->pth, __get_str(name))
++);
++
++TRACE_EVENT(cobalt_pthread_restrict,
++	TP_PROTO(unsigned long pth, const char *name),
++	TP_ARGS(pth, name),
++	TP_STRUCT__entry(
++		__field(unsigned long, pth)
++		__string(name, name)
++	),
++	TP_fast_assign(
++		__entry->pth = pth;
++		__assign_str(name, name);
++	),
++	TP_printk("pth=%p -personality=%s", (void *)__entry->pth, __get_str(name))
++);
++
++DEFINE_EVENT(cobalt_void, cobalt_pthread_yield,
++	TP_PROTO(int dummy),
++	TP_ARGS(dummy)
++);
++
++TRACE_EVENT(cobalt_sched_setconfig,
++	TP_PROTO(int cpu, int policy, size_t len),
++	TP_ARGS(cpu, policy, len),
++	TP_STRUCT__entry(
++		__field(int, cpu)
++		__field(int, policy)
++		__field(size_t, len)
++	),
++	TP_fast_assign(
++		__entry->cpu = cpu;
++		__entry->policy = policy;
++		__entry->len = len;
++	),
++	TP_printk("cpu=%d policy=%d(%s) len=%zu",
++		  __entry->cpu, __entry->policy,
++		  cobalt_print_sched_policy(__entry->policy),
++		  __entry->len)
++);
++
++TRACE_EVENT(cobalt_sched_get_config,
++	TP_PROTO(int cpu, int policy, size_t rlen),
++	TP_ARGS(cpu, policy, rlen),
++	TP_STRUCT__entry(
++		__field(int, cpu)
++		__field(int, policy)
++		__field(ssize_t, rlen)
++	),
++	TP_fast_assign(
++		__entry->cpu = cpu;
++		__entry->policy = policy;
++		__entry->rlen = rlen;
++	),
++	TP_printk("cpu=%d policy=%d(%s) rlen=%Zd",
++		  __entry->cpu, __entry->policy,
++		  cobalt_print_sched_policy(__entry->policy),
++		  __entry->rlen)
++);
++
++DEFINE_EVENT(cobalt_posix_scheduler, cobalt_sched_setscheduler,
++	TP_PROTO(pid_t pid, int policy,
++		 const struct sched_param_ex *param_ex),
++	TP_ARGS(pid, policy, param_ex)
++);
++
++DEFINE_EVENT(cobalt_posix_pid, cobalt_sched_getscheduler,
++	TP_PROTO(pid_t pid),
++	TP_ARGS(pid)
++);
++
++DECLARE_EVENT_CLASS(cobalt_posix_prio_bound,
++	TP_PROTO(int policy, int prio),
++	TP_ARGS(policy, prio),
++	TP_STRUCT__entry(
++		__field(int, policy)
++		__field(int, prio)
++	),
++	TP_fast_assign(
++		__entry->policy = policy;
++		__entry->prio = prio;
++	),
++	TP_printk("policy=%d(%s) prio=%d",
++		  __entry->policy,
++		  cobalt_print_sched_policy(__entry->policy),
++		  __entry->prio)
++);
++
++DEFINE_EVENT(cobalt_posix_prio_bound, cobalt_sched_min_prio,
++	TP_PROTO(int policy, int prio),
++	TP_ARGS(policy, prio)
++);
++
++DEFINE_EVENT(cobalt_posix_prio_bound, cobalt_sched_max_prio,
++	TP_PROTO(int policy, int prio),
++	TP_ARGS(policy, prio)
++);
++
++DECLARE_EVENT_CLASS(cobalt_posix_sem,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle),
++	TP_STRUCT__entry(
++		__field(xnhandle_t, handle)
++	),
++	TP_fast_assign(
++		__entry->handle = handle;
++	),
++	TP_printk("sem=%#x", __entry->handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_wait,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_trywait,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_timedwait,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_post,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_destroy,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_broadcast,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_inquire,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++TRACE_EVENT(cobalt_psem_getvalue,
++	TP_PROTO(xnhandle_t handle, int value),
++	TP_ARGS(handle, value),
++	TP_STRUCT__entry(
++		__field(xnhandle_t, handle)
++		__field(int, value)
++	),
++	TP_fast_assign(
++		__entry->handle = handle;
++		__entry->value = value;
++	),
++	TP_printk("sem=%#x value=%d", __entry->handle, __entry->value)
++);
++
++#define cobalt_print_sem_flags(__flags)				\
++  	__print_flags(__flags, "|",				\
++			 {SEM_FIFO, "fifo"},			\
++			 {SEM_PULSE, "pulse"},			\
++			 {SEM_PSHARED, "pshared"},		\
++			 {SEM_REPORT, "report"},		\
++			 {SEM_WARNDEL, "warndel"},		\
++			 {SEM_RAWCLOCK, "rawclock"},		\
++			 {SEM_NOBUSYDEL, "nobusydel"})
++
++TRACE_EVENT(cobalt_psem_init,
++	TP_PROTO(const char *name, xnhandle_t handle,
++		 int flags, unsigned int value),
++	TP_ARGS(name, handle, flags, value),
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(xnhandle_t, handle)
++		__field(int, flags)
++		__field(unsigned int, value)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->handle = handle;
++		__entry->flags = flags;
++		__entry->value = value;
++	),
++	TP_printk("sem=%#x(%s) flags=%#x(%s) value=%u",
++		  __entry->handle,
++		  __get_str(name),
++		  __entry->flags,
++		  cobalt_print_sem_flags(__entry->flags),
++		  __entry->value)
++);
++
++TRACE_EVENT(cobalt_psem_init_failed,
++	TP_PROTO(const char *name, int flags, unsigned int value, int status),
++	TP_ARGS(name, flags, value, status),
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(int, flags)
++		__field(unsigned int, value)
++		__field(int, status)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->flags = flags;
++		__entry->value = value;
++		__entry->status = status;
++	),
++	TP_printk("name=%s flags=%#x(%s) value=%u error=%d",
++		  __get_str(name),
++		  __entry->flags,
++		  cobalt_print_sem_flags(__entry->flags),
++		  __entry->value, __entry->status)
++);
++
++#define cobalt_print_oflags(__flags)		\
++	__print_flags(__flags,  "|", 		\
++		      {O_RDONLY, "rdonly"},	\
++		      {O_WRONLY, "wronly"},	\
++		      {O_RDWR, "rdwr"},		\
++		      {O_CREAT, "creat"},	\
++		      {O_EXCL, "excl"},		\
++		      {O_DIRECT, "direct"},	\
++		      {O_NONBLOCK, "nonblock"},	\
++		      {O_TRUNC, "trunc"})
++
++TRACE_EVENT(cobalt_psem_open,
++	TP_PROTO(const char *name, xnhandle_t handle,
++		 int oflags, mode_t mode, unsigned int value),
++	TP_ARGS(name, handle, oflags, mode, value),
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(xnhandle_t, handle)
++		__field(int, oflags)
++		__field(mode_t, mode)
++		__field(unsigned int, value)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->handle = handle;
++		__entry->oflags = oflags;
++		if (oflags & O_CREAT) {
++			__entry->mode = mode;
++			__entry->value = value;
++		} else {
++			__entry->mode = 0;
++			__entry->value = 0;
++		}
++	),
++	TP_printk("named_sem=%#x=(%s) oflags=%#x(%s) mode=%o value=%u",
++		  __entry->handle, __get_str(name),
++		  __entry->oflags, cobalt_print_oflags(__entry->oflags),
++		  __entry->mode, __entry->value)
++);
++
++TRACE_EVENT(cobalt_psem_open_failed,
++	TP_PROTO(const char *name, int oflags, mode_t mode,
++		 unsigned int value, int status),
++	TP_ARGS(name, oflags, mode, value, status),
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(int, oflags)
++		__field(mode_t, mode)
++		__field(unsigned int, value)
++		__field(int, status)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->oflags = oflags;
++		__entry->status = status;
++		if (oflags & O_CREAT) {
++			__entry->mode = mode;
++			__entry->value = value;
++		} else {
++			__entry->mode = 0;
++			__entry->value = 0;
++		}
++	),
++	TP_printk("named_sem=%s oflags=%#x(%s) mode=%o value=%u error=%d",
++		  __get_str(name),
++		  __entry->oflags, cobalt_print_oflags(__entry->oflags),
++		  __entry->mode, __entry->value, __entry->status)
++);
++
++DEFINE_EVENT(cobalt_posix_sem, cobalt_psem_close,
++	TP_PROTO(xnhandle_t handle),
++	TP_ARGS(handle)
++);
++
++TRACE_EVENT(cobalt_psem_unlink,
++	TP_PROTO(const char *name),
++	TP_ARGS(name),
++	TP_STRUCT__entry(
++		__string(name, name)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++	),
++	TP_printk("name=%s", __get_str(name))
++);
++
++DECLARE_EVENT_CLASS(cobalt_clock_timespec,
++	TP_PROTO(clockid_t clk_id, const struct timespec *val),
++	TP_ARGS(clk_id, val),
++
++	TP_STRUCT__entry(
++		__field(clockid_t, clk_id)
++		__timespec_fields(val)
++	),
++
++	TP_fast_assign(
++		__entry->clk_id = clk_id;
++		__assign_timespec(val, val);
++	),
++
++	TP_printk("clock_id=%d timeval=(%ld.%09ld)",
++		  __entry->clk_id,
++		  __timespec_args(val)
++	)
++);
++
++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_getres,
++	TP_PROTO(clockid_t clk_id, const struct timespec *res),
++	TP_ARGS(clk_id, res)
++);
++
++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_gettime,
++	TP_PROTO(clockid_t clk_id, const struct timespec *time),
++	TP_ARGS(clk_id, time)
++);
++
++DEFINE_EVENT(cobalt_clock_timespec, cobalt_clock_settime,
++	TP_PROTO(clockid_t clk_id, const struct timespec *time),
++	TP_ARGS(clk_id, time)
++);
++
++TRACE_EVENT(cobalt_clock_adjtime,
++	TP_PROTO(clockid_t clk_id, struct timex *tx),
++	TP_ARGS(clk_id, tx),
++
++	TP_STRUCT__entry(
++		__field(clockid_t, clk_id)
++		__field(struct timex *, tx)
++	),
++
++	TP_fast_assign(
++		__entry->clk_id = clk_id;
++		__entry->tx = tx;
++	),
++
++	TP_printk("clock_id=%d timex=%p",
++		  __entry->clk_id,
++		  __entry->tx
++	)
++);
++
++#define cobalt_print_timer_flags(__flags)			\
++	__print_flags(__flags, "|",				\
++		      {TIMER_ABSTIME, "TIMER_ABSTIME"})
++
++TRACE_EVENT(cobalt_clock_nanosleep,
++	TP_PROTO(clockid_t clk_id, int flags, const struct timespec *time),
++	TP_ARGS(clk_id, flags, time),
++
++	TP_STRUCT__entry(
++		__field(clockid_t, clk_id)
++		__field(int, flags)
++		__timespec_fields(time)
++	),
++
++	TP_fast_assign(
++		__entry->clk_id = clk_id;
++		__entry->flags = flags;
++		__assign_timespec(time, time);
++	),
++
++	TP_printk("clock_id=%d flags=%#x(%s) rqt=(%ld.%09ld)",
++		  __entry->clk_id,
++		  __entry->flags, cobalt_print_timer_flags(__entry->flags),
++		  __timespec_args(time)
++	)
++);
++
++DECLARE_EVENT_CLASS(cobalt_clock_ident,
++	TP_PROTO(const char *name, clockid_t clk_id),
++	TP_ARGS(name, clk_id),
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(clockid_t, clk_id)
++	),
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->clk_id = clk_id;
++	),
++	TP_printk("name=%s, id=%#x", __get_str(name), __entry->clk_id)
++);
++
++DEFINE_EVENT(cobalt_clock_ident, cobalt_clock_register,
++	TP_PROTO(const char *name, clockid_t clk_id),
++	TP_ARGS(name, clk_id)
++);
++
++DEFINE_EVENT(cobalt_clock_ident, cobalt_clock_deregister,
++	TP_PROTO(const char *name, clockid_t clk_id),
++	TP_ARGS(name, clk_id)
++);
++
++#define cobalt_print_clock(__clk_id)					\
++	__print_symbolic(__clk_id,					\
++			 {CLOCK_MONOTONIC, "CLOCK_MONOTONIC"},		\
++			 {CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW"},	\
++			 {CLOCK_REALTIME, "CLOCK_REALTIME"})
++
++TRACE_EVENT(cobalt_cond_init,
++	TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd,
++		 const struct cobalt_condattr *attr),
++	TP_ARGS(u_cnd, attr),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_cond_shadow __user *, u_cnd)
++		__field(clockid_t, clk_id)
++		__field(int, pshared)
++	),
++	TP_fast_assign(
++		__entry->u_cnd = u_cnd;
++		__entry->clk_id = attr->clock;
++		__entry->pshared = attr->pshared;
++	),
++	TP_printk("cond=%p attr={ .clock=%s, .pshared=%d }",
++		  __entry->u_cnd,
++		  cobalt_print_clock(__entry->clk_id),
++		  __entry->pshared)
++);
++
++TRACE_EVENT(cobalt_cond_destroy,
++	TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd),
++	TP_ARGS(u_cnd),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_cond_shadow __user *, u_cnd)
++	),
++	TP_fast_assign(
++		__entry->u_cnd = u_cnd;
++	),
++	TP_printk("cond=%p", __entry->u_cnd)
++);
++
++TRACE_EVENT(cobalt_cond_timedwait,
++	TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd,
++		 const struct cobalt_mutex_shadow __user *u_mx,
++		 const struct timespec *timeout),
++	TP_ARGS(u_cnd, u_mx, timeout),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_cond_shadow __user *, u_cnd)
++		__field(const struct cobalt_mutex_shadow __user *, u_mx)
++		__timespec_fields(timeout)
++	),
++	TP_fast_assign(
++		__entry->u_cnd = u_cnd;
++		__entry->u_mx = u_mx;
++		__assign_timespec(timeout, timeout);
++	),
++	TP_printk("cond=%p, mutex=%p, timeout=(%ld.%09ld)",
++		  __entry->u_cnd, __entry->u_mx, __timespec_args(timeout))
++);
++
++TRACE_EVENT(cobalt_cond_wait,
++	TP_PROTO(const struct cobalt_cond_shadow __user *u_cnd,
++		 const struct cobalt_mutex_shadow __user *u_mx),
++	TP_ARGS(u_cnd, u_mx),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_cond_shadow __user *, u_cnd)
++		__field(const struct cobalt_mutex_shadow __user *, u_mx)
++	),
++	TP_fast_assign(
++		__entry->u_cnd = u_cnd;
++		__entry->u_mx = u_mx;
++	),
++	TP_printk("cond=%p, mutex=%p",
++		  __entry->u_cnd, __entry->u_mx)
++);
++
++TRACE_EVENT(cobalt_mq_open,
++	TP_PROTO(const char *name, int oflags, mode_t mode),
++	TP_ARGS(name, oflags, mode),
++
++	TP_STRUCT__entry(
++		__string(name, name)
++		__field(int, oflags)
++		__field(mode_t, mode)
++	),
++
++	TP_fast_assign(
++		__assign_str(name, name);
++		__entry->oflags = oflags;
++		__entry->mode = (oflags & O_CREAT) ? mode : 0;
++	),
++
++	TP_printk("name=%s oflags=%#x(%s) mode=%o",
++		  __get_str(name),
++		  __entry->oflags, cobalt_print_oflags(__entry->oflags),
++		  __entry->mode)
++);
++
++TRACE_EVENT(cobalt_mq_notify,
++	TP_PROTO(mqd_t mqd, const struct sigevent *sev),
++	TP_ARGS(mqd, sev),
++
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++		__field(int, signo)
++	),
++
++	TP_fast_assign(
++		__entry->mqd = mqd;
++		__entry->signo = sev && sev->sigev_notify != SIGEV_NONE ?
++			sev->sigev_signo : 0;
++	),
++
++	TP_printk("mqd=%d signo=%d",
++		  __entry->mqd, __entry->signo)
++);
++
++TRACE_EVENT(cobalt_mq_close,
++	TP_PROTO(mqd_t mqd),
++	TP_ARGS(mqd),
++
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++	),
++
++	TP_fast_assign(
++		__entry->mqd = mqd;
++	),
++
++	TP_printk("mqd=%d", __entry->mqd)
++);
++
++TRACE_EVENT(cobalt_mq_unlink,
++	TP_PROTO(const char *name),
++	TP_ARGS(name),
++
++	TP_STRUCT__entry(
++		__string(name, name)
++	),
++
++	TP_fast_assign(
++		__assign_str(name, name);
++	),
++
++	TP_printk("name=%s", __get_str(name))
++);
++
++TRACE_EVENT(cobalt_mq_send,
++	TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len,
++		 unsigned int prio),
++	TP_ARGS(mqd, u_buf, len, prio),
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++		__field(const void __user *, u_buf)
++		__field(size_t, len)
++		__field(unsigned int, prio)
++	),
++	TP_fast_assign(
++		__entry->mqd = mqd;
++		__entry->u_buf = u_buf;
++		__entry->len = len;
++		__entry->prio = prio;
++	),
++	TP_printk("mqd=%d buf=%p len=%zu prio=%u",
++		  __entry->mqd, __entry->u_buf, __entry->len,
++		  __entry->prio)
++);
++
++TRACE_EVENT(cobalt_mq_timedreceive,
++	TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len,
++		 const struct timespec *timeout),
++	TP_ARGS(mqd, u_buf, len, timeout),
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++		__field(const void __user *, u_buf)
++		__field(size_t, len)
++		__timespec_fields(timeout)
++	),
++	TP_fast_assign(
++		__entry->mqd = mqd;
++		__entry->u_buf = u_buf;
++		__entry->len = len;
++		__assign_timespec(timeout, timeout);
++	),
++	TP_printk("mqd=%d buf=%p len=%zu timeout=(%ld.%09ld)",
++		  __entry->mqd, __entry->u_buf, __entry->len,
++		  __timespec_args(timeout))
++);
++
++TRACE_EVENT(cobalt_mq_receive,
++	TP_PROTO(mqd_t mqd, const void __user *u_buf, size_t len),
++	TP_ARGS(mqd, u_buf, len),
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++		__field(const void __user *, u_buf)
++		__field(size_t, len)
++	),
++	TP_fast_assign(
++		__entry->mqd = mqd;
++		__entry->u_buf = u_buf;
++		__entry->len = len;
++	),
++	TP_printk("mqd=%d buf=%p len=%zu",
++		  __entry->mqd, __entry->u_buf, __entry->len)
++);
++
++DECLARE_EVENT_CLASS(cobalt_posix_mqattr,
++	TP_PROTO(mqd_t mqd, const struct mq_attr *attr),
++	TP_ARGS(mqd, attr),
++	TP_STRUCT__entry(
++		__field(mqd_t, mqd)
++		__field(long, flags)
++		__field(long, curmsgs)
++		__field(long, msgsize)
++		__field(long, maxmsg)
++	),
++	TP_fast_assign(
++		__entry->mqd = mqd;
++		__entry->flags = attr->mq_flags;
++		__entry->curmsgs = attr->mq_curmsgs;
++		__entry->msgsize = attr->mq_msgsize;
++		__entry->maxmsg = attr->mq_maxmsg;
++	),
++	TP_printk("mqd=%d flags=%#lx(%s) curmsgs=%ld msgsize=%ld maxmsg=%ld",
++		  __entry->mqd,
++		  __entry->flags, cobalt_print_oflags(__entry->flags),
++		  __entry->curmsgs,
++		  __entry->msgsize,
++		  __entry->maxmsg
++	)
++);
++
++DEFINE_EVENT(cobalt_posix_mqattr, cobalt_mq_getattr,
++	TP_PROTO(mqd_t mqd, const struct mq_attr *attr),
++	TP_ARGS(mqd, attr)
++);
++
++DEFINE_EVENT(cobalt_posix_mqattr, cobalt_mq_setattr,
++	TP_PROTO(mqd_t mqd, const struct mq_attr *attr),
++	TP_ARGS(mqd, attr)
++);
++
++#define cobalt_print_evflags(__flags)			\
++	__print_flags(__flags,  "|",			\
++		      {COBALT_EVENT_SHARED, "shared"},	\
++		      {COBALT_EVENT_PRIO, "prio"})
++
++TRACE_EVENT(cobalt_event_init,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event,
++		 unsigned long value, int flags),
++	TP_ARGS(u_event, value, flags),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_event_shadow __user *, u_event)
++		__field(unsigned long, value)
++		__field(int, flags)
++	),
++	TP_fast_assign(
++		__entry->u_event = u_event;
++		__entry->value = value;
++		__entry->flags = flags;
++	),
++	TP_printk("event=%p value=%lu flags=%#x(%s)",
++		  __entry->u_event, __entry->value,
++		  __entry->flags, cobalt_print_evflags(__entry->flags))
++);
++
++#define cobalt_print_evmode(__mode)			\
++	__print_symbolic(__mode,			\
++			 {COBALT_EVENT_ANY, "any"},	\
++			 {COBALT_EVENT_ALL, "all"})
++
++TRACE_EVENT(cobalt_event_timedwait,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event,
++		 unsigned long bits, int mode,
++		 const struct timespec *timeout),
++	TP_ARGS(u_event, bits, mode, timeout),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_event_shadow __user *, u_event)
++		__field(unsigned long, bits)
++		__field(int, mode)
++		__timespec_fields(timeout)
++	),
++	TP_fast_assign(
++		__entry->u_event = u_event;
++		__entry->bits = bits;
++		__entry->mode = mode;
++		__assign_timespec(timeout, timeout);
++	),
++	TP_printk("event=%p bits=%#lx mode=%#x(%s) timeout=(%ld.%09ld)",
++		  __entry->u_event, __entry->bits, __entry->mode,
++		  cobalt_print_evmode(__entry->mode),
++		  __timespec_args(timeout))
++);
++
++TRACE_EVENT(cobalt_event_wait,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event,
++		 unsigned long bits, int mode),
++	TP_ARGS(u_event, bits, mode),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_event_shadow __user *, u_event)
++		__field(unsigned long, bits)
++		__field(int, mode)
++	),
++	TP_fast_assign(
++		__entry->u_event = u_event;
++		__entry->bits = bits;
++		__entry->mode = mode;
++	),
++	TP_printk("event=%p bits=%#lx mode=%#x(%s)",
++		  __entry->u_event, __entry->bits, __entry->mode,
++		  cobalt_print_evmode(__entry->mode))
++);
++
++DECLARE_EVENT_CLASS(cobalt_event_ident,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event),
++	TP_ARGS(u_event),
++	TP_STRUCT__entry(
++		__field(const struct cobalt_event_shadow __user *, u_event)
++	),
++	TP_fast_assign(
++		__entry->u_event = u_event;
++	),
++	TP_printk("event=%p", __entry->u_event)
++);
++
++DEFINE_EVENT(cobalt_event_ident, cobalt_event_destroy,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event),
++	TP_ARGS(u_event)
++);
++
++DEFINE_EVENT(cobalt_event_ident, cobalt_event_sync,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event),
++	TP_ARGS(u_event)
++);
++
++DEFINE_EVENT(cobalt_event_ident, cobalt_event_inquire,
++	TP_PROTO(const struct cobalt_event_shadow __user *u_event),
++	TP_ARGS(u_event)
++);
++
++#endif /* _TRACE_COBALT_POSIX_H */
++
++/* This part must be outside protection */
++#undef TRACE_INCLUDE_PATH
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE cobalt-posix
++#include <trace/define_trace.h>
+--- linux/include/trace/events/cobalt-rtdm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/trace/events/cobalt-rtdm.h	2021-04-07 16:01:25.905636055 +0800
+@@ -0,0 +1,554 @@
++/*
++ * Copyright (C) 2014 Jan Kiszka <jan.kiszka@siemens.com>.
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM cobalt_rtdm
++
++#if !defined(_TRACE_COBALT_RTDM_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_COBALT_RTDM_H
++
++#include <linux/tracepoint.h>
++#include <linux/mman.h>
++#include <linux/sched.h>
++
++struct rtdm_fd;
++struct rtdm_event;
++struct rtdm_sem;
++struct rtdm_mutex;
++struct xnthread;
++struct rtdm_device;
++struct rtdm_dev_context;
++struct _rtdm_mmap_request;
++
++DECLARE_EVENT_CLASS(fd_event,
++	TP_PROTO(struct rtdm_fd *fd, int ufd),
++	TP_ARGS(fd, ufd),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_device *, dev)
++		__field(int, ufd)
++	),
++
++	TP_fast_assign(
++		__entry->dev = rtdm_fd_to_context(fd)->device;
++		__entry->ufd = ufd;
++	),
++
++	TP_printk("device=%p fd=%d",
++		  __entry->dev, __entry->ufd)
++);
++
++DECLARE_EVENT_CLASS(fd_request,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd, unsigned long arg),
++	TP_ARGS(task, fd, ufd, arg),
++
++	TP_STRUCT__entry(
++		__array(char, comm, TASK_COMM_LEN)
++		__field(pid_t, pid)
++		__field(struct rtdm_device *, dev)
++		__field(int, ufd)
++		__field(unsigned long, arg)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++		__entry->pid = task_pid_nr(task);
++		__entry->dev = rtdm_fd_to_context(fd)->device;
++		__entry->ufd = ufd;
++		__entry->arg = arg;
++	),
++
++	TP_printk("device=%p fd=%d arg=%#lx pid=%d comm=%s",
++		  __entry->dev, __entry->ufd, __entry->arg,
++		  __entry->pid, __entry->comm)
++);
++
++DECLARE_EVENT_CLASS(fd_request_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd, int status),
++	TP_ARGS(task, fd, ufd, status),
++
++	TP_STRUCT__entry(
++		__array(char, comm, TASK_COMM_LEN)
++		__field(pid_t, pid)
++		__field(struct rtdm_device *, dev)
++		__field(int, ufd)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++		__entry->pid = task_pid_nr(task);
++		__entry->dev =
++			!IS_ERR(fd) ? rtdm_fd_to_context(fd)->device : NULL;
++		__entry->ufd = ufd;
++	),
++
++	TP_printk("device=%p fd=%d pid=%d comm=%s",
++		  __entry->dev, __entry->ufd, __entry->pid, __entry->comm)
++);
++
++DECLARE_EVENT_CLASS(task_op,
++	TP_PROTO(struct xnthread *task),
++	TP_ARGS(task),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, task)
++		__string(task_name, task->name)
++	),
++
++	TP_fast_assign(
++		__entry->task = task;
++		__assign_str(task_name, task->name);
++	),
++
++	TP_printk("task %p(%s)", __entry->task, __get_str(task_name))
++);
++
++DECLARE_EVENT_CLASS(event_op,
++	TP_PROTO(struct rtdm_event *ev),
++	TP_ARGS(ev),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_event *, ev)
++	),
++
++	TP_fast_assign(
++		__entry->ev = ev;
++	),
++
++	TP_printk("event=%p", __entry->ev)
++);
++
++DECLARE_EVENT_CLASS(sem_op,
++	TP_PROTO(struct rtdm_sem *sem),
++	TP_ARGS(sem),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_sem *, sem)
++	),
++
++	TP_fast_assign(
++		__entry->sem = sem;
++	),
++
++	TP_printk("sem=%p", __entry->sem)
++);
++
++DECLARE_EVENT_CLASS(mutex_op,
++	TP_PROTO(struct rtdm_mutex *mutex),
++	TP_ARGS(mutex),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_mutex *, mutex)
++	),
++
++	TP_fast_assign(
++		__entry->mutex = mutex;
++	),
++
++	TP_printk("mutex=%p", __entry->mutex)
++);
++
++TRACE_EVENT(cobalt_device_register,
++	TP_PROTO(struct rtdm_device *dev),
++	TP_ARGS(dev),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_device *, dev)
++		__string(device_name, dev->name)
++		__field(int, flags)
++		__field(int, class_id)
++		__field(int, subclass_id)
++		__field(int, profile_version)
++	),
++
++	TP_fast_assign(
++		__entry->dev	= dev;
++		__assign_str(device_name, dev->name);
++		__entry->flags = dev->driver->device_flags;
++		__entry->class_id = dev->driver->profile_info.class_id;
++		__entry->subclass_id = dev->driver->profile_info.subclass_id;
++		__entry->profile_version = dev->driver->profile_info.version;
++	),
++
++	TP_printk("%s device %s=%p flags=0x%x, class=%d.%d profile=%d",
++		  (__entry->flags & RTDM_DEVICE_TYPE_MASK)
++		  == RTDM_NAMED_DEVICE ? "named" : "protocol",
++		  __get_str(device_name), __entry->dev,
++		  __entry->flags, __entry->class_id, __entry->subclass_id,
++		  __entry->profile_version)
++);
++
++TRACE_EVENT(cobalt_device_unregister,
++	TP_PROTO(struct rtdm_device *dev),
++	TP_ARGS(dev),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_device *, dev)
++		__string(device_name, dev->name)
++	),
++
++	TP_fast_assign(
++		__entry->dev	= dev;
++		__assign_str(device_name, dev->name);
++	),
++
++	TP_printk("device %s=%p",
++		  __get_str(device_name), __entry->dev)
++);
++
++DEFINE_EVENT(fd_event, cobalt_fd_created,
++	TP_PROTO(struct rtdm_fd *fd, int ufd),
++	TP_ARGS(fd, ufd)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_open,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long oflags),
++	TP_ARGS(task, fd, ufd, oflags)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_close,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long lock_count),
++	TP_ARGS(task, fd, ufd, lock_count)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_socket,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long protocol_family),
++	TP_ARGS(task, fd, ufd, protocol_family)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_read,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long len),
++	TP_ARGS(task, fd, ufd, len)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_write,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long len),
++	TP_ARGS(task, fd, ufd, len)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_ioctl,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long request),
++	TP_ARGS(task, fd, ufd, request)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_sendmsg,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long flags),
++	TP_ARGS(task, fd, ufd, flags)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_sendmmsg,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long flags),
++	TP_ARGS(task, fd, ufd, flags)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_recvmsg,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long flags),
++	TP_ARGS(task, fd, ufd, flags)
++);
++
++DEFINE_EVENT(fd_request, cobalt_fd_recvmmsg,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 unsigned long flags),
++	TP_ARGS(task, fd, ufd, flags)
++);
++
++#define cobalt_print_protbits(__prot)		\
++	__print_flags(__prot,  "|", 		\
++		      {PROT_EXEC, "exec"},	\
++		      {PROT_READ, "read"},	\
++		      {PROT_WRITE, "write"})
++
++#define cobalt_print_mapbits(__flags)		\
++	__print_flags(__flags,  "|", 		\
++		      {MAP_SHARED, "shared"},	\
++		      {MAP_PRIVATE, "private"},	\
++		      {MAP_ANONYMOUS, "anon"},	\
++		      {MAP_FIXED, "fixed"},	\
++		      {MAP_HUGETLB, "huge"},	\
++		      {MAP_NONBLOCK, "nonblock"},	\
++		      {MAP_NORESERVE, "noreserve"},	\
++		      {MAP_POPULATE, "populate"},	\
++		      {MAP_UNINITIALIZED, "uninit"})
++
++TRACE_EVENT(cobalt_fd_mmap,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd, struct _rtdm_mmap_request *rma),
++        TP_ARGS(task, fd, ufd, rma),
++
++	TP_STRUCT__entry(
++		__array(char, comm, TASK_COMM_LEN)
++		__field(pid_t, pid)
++		__field(struct rtdm_device *, dev)
++		__field(int, ufd)
++		__field(size_t, length)
++		__field(off_t, offset)
++		__field(int, prot)
++		__field(int, flags)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++		__entry->pid = task_pid_nr(task);
++		__entry->dev = rtdm_fd_to_context(fd)->device;
++		__entry->ufd = ufd;
++		__entry->length = rma->length;
++		__entry->offset = rma->offset;
++		__entry->prot = rma->prot;
++		__entry->flags = rma->flags;
++	),
++
++	TP_printk("device=%p fd=%d area={ len:%zu, off:%Lu }"
++		  " prot=%#x(%s) flags=%#x(%s) pid=%d comm=%s",
++		  __entry->dev, __entry->ufd, __entry->length,
++		  (unsigned long long)__entry->offset,
++		  __entry->prot, cobalt_print_protbits(__entry->prot),
++		  __entry->flags, cobalt_print_mapbits(__entry->flags),
++		  __entry->pid, __entry->comm)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_ioctl_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_read_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_write_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_recvmsg_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_recvmmsg_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_sendmsg_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_sendmmsg_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(fd_request_status, cobalt_fd_mmap_status,
++	TP_PROTO(struct task_struct *task,
++		 struct rtdm_fd *fd, int ufd,
++		 int status),
++	TP_ARGS(task, fd, ufd, status)
++);
++
++DEFINE_EVENT(task_op, cobalt_driver_task_join,
++	TP_PROTO(struct xnthread *task),
++	TP_ARGS(task)
++);
++
++TRACE_EVENT(cobalt_driver_event_init,
++	TP_PROTO(struct rtdm_event *ev, unsigned long pending),
++	TP_ARGS(ev, pending),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_event *, ev)
++		__field(unsigned long,	pending)
++	),
++
++	TP_fast_assign(
++		__entry->ev = ev;
++		__entry->pending = pending;
++	),
++
++	TP_printk("event=%p pending=%#lx",
++		  __entry->ev, __entry->pending)
++);
++
++TRACE_EVENT(cobalt_driver_event_wait,
++	TP_PROTO(struct rtdm_event *ev, struct xnthread *task),
++	TP_ARGS(ev, task),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, task)
++		__string(task_name, task->name)
++		__field(struct rtdm_event *, ev)
++	),
++
++	TP_fast_assign(
++		__entry->task = task;
++		__assign_str(task_name, task->name);
++		__entry->ev = ev;
++	),
++
++	TP_printk("event=%p task=%p(%s)",
++		  __entry->ev, __entry->task, __get_str(task_name))
++);
++
++DEFINE_EVENT(event_op, cobalt_driver_event_signal,
++	TP_PROTO(struct rtdm_event *ev),
++	TP_ARGS(ev)
++);
++
++DEFINE_EVENT(event_op, cobalt_driver_event_clear,
++	TP_PROTO(struct rtdm_event *ev),
++	TP_ARGS(ev)
++);
++
++DEFINE_EVENT(event_op, cobalt_driver_event_pulse,
++	TP_PROTO(struct rtdm_event *ev),
++	TP_ARGS(ev)
++);
++
++DEFINE_EVENT(event_op, cobalt_driver_event_destroy,
++	TP_PROTO(struct rtdm_event *ev),
++	TP_ARGS(ev)
++);
++
++TRACE_EVENT(cobalt_driver_sem_init,
++	TP_PROTO(struct rtdm_sem *sem, unsigned long value),
++	TP_ARGS(sem, value),
++
++	TP_STRUCT__entry(
++		__field(struct rtdm_sem *, sem)
++		__field(unsigned long, value)
++	),
++
++	TP_fast_assign(
++		__entry->sem = sem;
++		__entry->value = value;
++	),
++
++	TP_printk("sem=%p value=%lu",
++		  __entry->sem, __entry->value)
++);
++
++TRACE_EVENT(cobalt_driver_sem_wait,
++	TP_PROTO(struct rtdm_sem *sem, struct xnthread *task),
++	TP_ARGS(sem, task),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, task)
++		__string(task_name, task->name)
++		__field(struct rtdm_sem *, sem)
++	),
++
++	TP_fast_assign(
++		__entry->task = task;
++		__assign_str(task_name, task->name);
++		__entry->sem = sem;
++	),
++
++	TP_printk("sem=%p task=%p(%s)",
++		  __entry->sem, __entry->task, __get_str(task_name))
++);
++
++DEFINE_EVENT(sem_op, cobalt_driver_sem_up,
++	TP_PROTO(struct rtdm_sem *sem),
++	TP_ARGS(sem)
++);
++
++DEFINE_EVENT(sem_op, cobalt_driver_sem_destroy,
++	TP_PROTO(struct rtdm_sem *sem),
++	TP_ARGS(sem)
++);
++
++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_init,
++	TP_PROTO(struct rtdm_mutex *mutex),
++	TP_ARGS(mutex)
++);
++
++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_release,
++	TP_PROTO(struct rtdm_mutex *mutex),
++	TP_ARGS(mutex)
++);
++
++DEFINE_EVENT(mutex_op, cobalt_driver_mutex_destroy,
++	TP_PROTO(struct rtdm_mutex *mutex),
++	TP_ARGS(mutex)
++);
++
++TRACE_EVENT(cobalt_driver_mutex_wait,
++	TP_PROTO(struct rtdm_mutex *mutex, struct xnthread *task),
++	TP_ARGS(mutex, task),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, task)
++		__string(task_name, task->name)
++		__field(struct rtdm_mutex *, mutex)
++	),
++
++	TP_fast_assign(
++		__entry->task = task;
++		__assign_str(task_name, task->name);
++		__entry->mutex = mutex;
++	),
++
++	TP_printk("mutex=%p task=%p(%s)",
++		  __entry->mutex, __entry->task, __get_str(task_name))
++);
++
++#endif /* _TRACE_COBALT_RTDM_H */
++
++/* This part must be outside protection */
++#undef TRACE_INCLUDE_PATH
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE cobalt-rtdm
++#include <trace/define_trace.h>
+--- linux/include/trace/events/cobalt-core.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/include/trace/events/cobalt-core.h	2021-04-07 16:01:25.897636066 +0800
+@@ -0,0 +1,777 @@
++/*
++ * Copyright (C) 2014 Jan Kiszka <jan.kiszka@siemens.com>.
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM cobalt_core
++
++#if !defined(_TRACE_COBALT_CORE_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_COBALT_CORE_H
++
++#include <linux/tracepoint.h>
++
++DECLARE_EVENT_CLASS(thread_event,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__field(unsigned long, state)
++		__field(unsigned long, info)
++	),
++
++	TP_fast_assign(
++		__entry->state = thread->state;
++		__entry->info = thread->info;
++		__entry->pid = xnthread_host_pid(thread);
++	),
++
++	TP_printk("pid=%d state=0x%lx info=0x%lx",
++		  __entry->pid, __entry->state, __entry->info)
++);
++
++DECLARE_EVENT_CLASS(curr_thread_event,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, thread)
++		__field(unsigned long, state)
++		__field(unsigned long, info)
++	),
++
++	TP_fast_assign(
++		__entry->state = thread->state;
++		__entry->info = thread->info;
++	),
++
++	TP_printk("state=0x%lx info=0x%lx",
++		  __entry->state, __entry->info)
++);
++
++DECLARE_EVENT_CLASS(synch_wait_event,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch),
++
++	TP_STRUCT__entry(
++		__field(struct xnsynch *, synch)
++	),
++
++	TP_fast_assign(
++		__entry->synch = synch;
++	),
++
++	TP_printk("synch=%p", __entry->synch)
++);
++
++DECLARE_EVENT_CLASS(synch_post_event,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch),
++
++	TP_STRUCT__entry(
++		__field(struct xnsynch *, synch)
++	),
++
++	TP_fast_assign(
++		__entry->synch = synch;
++	),
++
++	TP_printk("synch=%p", __entry->synch)
++);
++
++DECLARE_EVENT_CLASS(irq_event,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq),
++
++	TP_STRUCT__entry(
++		__field(unsigned int, irq)
++	),
++
++	TP_fast_assign(
++		__entry->irq = irq;
++	),
++
++	TP_printk("irq=%u", __entry->irq)
++);
++
++DECLARE_EVENT_CLASS(clock_event,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq),
++
++	TP_STRUCT__entry(
++		__field(unsigned int, irq)
++	),
++
++	TP_fast_assign(
++		__entry->irq = irq;
++	),
++
++	TP_printk("clock_irq=%u", __entry->irq)
++);
++
++DECLARE_EVENT_CLASS(timer_event,
++	TP_PROTO(struct xntimer *timer),
++	TP_ARGS(timer),
++
++	TP_STRUCT__entry(
++		__field(struct xntimer *, timer)
++	),
++
++	TP_fast_assign(
++		__entry->timer = timer;
++	),
++
++	TP_printk("timer=%p", __entry->timer)
++);
++
++TRACE_EVENT(cobalt_schedule,
++	TP_PROTO(struct xnsched *sched),
++	TP_ARGS(sched),
++
++	TP_STRUCT__entry(
++		__field(unsigned long, status)
++	),
++
++	TP_fast_assign(
++		__entry->status = sched->status;
++	),
++
++	TP_printk("status=0x%lx", __entry->status)
++);
++
++TRACE_EVENT(cobalt_schedule_remote,
++	TP_PROTO(struct xnsched *sched),
++	TP_ARGS(sched),
++
++	TP_STRUCT__entry(
++		__field(unsigned long, status)
++	),
++
++	TP_fast_assign(
++		__entry->status = sched->status;
++	),
++
++	TP_printk("status=0x%lx", __entry->status)
++);
++
++TRACE_EVENT(cobalt_switch_context,
++	TP_PROTO(struct xnthread *prev, struct xnthread *next),
++	TP_ARGS(prev, next),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, prev)
++		__string(prev_name, prev->name)
++		__field(pid_t, prev_pid)
++		__field(int, prev_prio)
++		__field(unsigned long, prev_state)
++		__field(struct xnthread *, next)
++		__string(next_name, next->name)
++		__field(pid_t, next_pid)
++		__field(int, next_prio)
++	),
++
++	TP_fast_assign(
++		__entry->prev = prev;
++		__assign_str(prev_name, prev->name);
++		__entry->prev_pid = xnthread_host_pid(prev);
++		__entry->prev_prio = xnthread_current_priority(prev);
++		__entry->prev_state = prev->state;
++		__entry->next = next;
++		__assign_str(next_name, next->name);
++		__entry->next_pid = xnthread_host_pid(next);
++		__entry->next_prio = xnthread_current_priority(next);
++	),
++
++	TP_printk("prev_name=%s prev_pid=%d prev_prio=%d prev_state=0x%lx ==> next_name=%s next_pid=%d next_prio=%d",
++		  __get_str(prev_name), __entry->prev_pid,
++		  __entry->prev_prio, __entry->prev_state,
++		  __get_str(next_name), __entry->next_pid, __entry->next_prio)
++);
++
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++
++TRACE_EVENT(cobalt_schedquota_refill,
++	TP_PROTO(int dummy),
++	TP_ARGS(dummy),
++
++	TP_STRUCT__entry(
++		__field(int, dummy)
++	),
++
++	TP_fast_assign(
++		(void)dummy;
++	),
++
++	TP_printk("%s", "")
++);
++
++DECLARE_EVENT_CLASS(schedquota_group_event,
++	TP_PROTO(struct xnsched_quota_group *tg),
++	TP_ARGS(tg),
++
++	TP_STRUCT__entry(
++		__field(int, tgid)
++	),
++
++	TP_fast_assign(
++		__entry->tgid = tg->tgid;
++	),
++
++	TP_printk("tgid=%d",
++		  __entry->tgid)
++);
++
++DEFINE_EVENT(schedquota_group_event, cobalt_schedquota_create_group,
++	TP_PROTO(struct xnsched_quota_group *tg),
++	TP_ARGS(tg)
++);
++
++DEFINE_EVENT(schedquota_group_event, cobalt_schedquota_destroy_group,
++	TP_PROTO(struct xnsched_quota_group *tg),
++	TP_ARGS(tg)
++);
++
++TRACE_EVENT(cobalt_schedquota_set_limit,
++	TP_PROTO(struct xnsched_quota_group *tg,
++		 int percent,
++		 int peak_percent),
++	TP_ARGS(tg, percent, peak_percent),
++
++	TP_STRUCT__entry(
++		__field(int, tgid)
++		__field(int, percent)
++		__field(int, peak_percent)
++	),
++
++	TP_fast_assign(
++		__entry->tgid = tg->tgid;
++		__entry->percent = percent;
++		__entry->peak_percent = peak_percent;
++	),
++
++	TP_printk("tgid=%d percent=%d peak_percent=%d",
++		  __entry->tgid, __entry->percent, __entry->peak_percent)
++);
++
++DECLARE_EVENT_CLASS(schedquota_thread_event,
++	TP_PROTO(struct xnsched_quota_group *tg,
++		 struct xnthread *thread),
++	TP_ARGS(tg, thread),
++
++	TP_STRUCT__entry(
++		__field(int, tgid)
++		__field(struct xnthread *, thread)
++		__field(pid_t, pid)
++	),
++
++	TP_fast_assign(
++		__entry->tgid = tg->tgid;
++		__entry->thread = thread;
++		__entry->pid = xnthread_host_pid(thread);
++	),
++
++	TP_printk("tgid=%d thread=%p pid=%d",
++		  __entry->tgid, __entry->thread, __entry->pid)
++);
++
++DEFINE_EVENT(schedquota_thread_event, cobalt_schedquota_add_thread,
++	TP_PROTO(struct xnsched_quota_group *tg,
++		 struct xnthread *thread),
++	TP_ARGS(tg, thread)
++);
++
++DEFINE_EVENT(schedquota_thread_event, cobalt_schedquota_remove_thread,
++	TP_PROTO(struct xnsched_quota_group *tg,
++		 struct xnthread *thread),
++	TP_ARGS(tg, thread)
++);
++
++#endif /* CONFIG_XENO_OPT_SCHED_QUOTA */
++
++TRACE_EVENT(cobalt_thread_init,
++	TP_PROTO(struct xnthread *thread,
++		 const struct xnthread_init_attr *attr,
++		 struct xnsched_class *sched_class),
++	TP_ARGS(thread, attr, sched_class),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, thread)
++		__string(thread_name, thread->name)
++		__string(class_name, sched_class->name)
++		__field(unsigned long, flags)
++		__field(int, cprio)
++	),
++
++	TP_fast_assign(
++		__entry->thread = thread;
++		__assign_str(thread_name, thread->name);
++		__entry->flags = attr->flags;
++		__assign_str(class_name, sched_class->name);
++		__entry->cprio = thread->cprio;
++	),
++
++	TP_printk("thread=%p name=%s flags=0x%lx class=%s prio=%d",
++		   __entry->thread, __get_str(thread_name), __entry->flags,
++		   __get_str(class_name), __entry->cprio)
++);
++
++TRACE_EVENT(cobalt_thread_suspend,
++	TP_PROTO(struct xnthread *thread, unsigned long mask, xnticks_t timeout,
++		 xntmode_t timeout_mode, struct xnsynch *wchan),
++	TP_ARGS(thread, mask, timeout, timeout_mode, wchan),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__field(unsigned long, mask)
++		__field(xnticks_t, timeout)
++		__field(xntmode_t, timeout_mode)
++		__field(struct xnsynch *, wchan)
++	),
++
++	TP_fast_assign(
++		__entry->pid = xnthread_host_pid(thread);
++		__entry->mask = mask;
++		__entry->timeout = timeout;
++		__entry->timeout_mode = timeout_mode;
++		__entry->wchan = wchan;
++	),
++
++	TP_printk("pid=%d mask=0x%lx timeout=%Lu timeout_mode=%d wchan=%p",
++		  __entry->pid, __entry->mask,
++		  __entry->timeout, __entry->timeout_mode, __entry->wchan)
++);
++
++TRACE_EVENT(cobalt_thread_resume,
++	TP_PROTO(struct xnthread *thread, unsigned long mask),
++	TP_ARGS(thread, mask),
++
++	TP_STRUCT__entry(
++		__string(name, thread->name)
++		__field(pid_t, pid)
++		__field(unsigned long, mask)
++	),
++
++	TP_fast_assign(
++		__assign_str(name, thread->name);
++		__entry->pid = xnthread_host_pid(thread);
++		__entry->mask = mask;
++	),
++
++	TP_printk("name=%s pid=%d mask=0x%lx",
++		  __get_str(name), __entry->pid, __entry->mask)
++);
++
++TRACE_EVENT(cobalt_thread_fault,
++	TP_PROTO(struct ipipe_trap_data *td),
++	TP_ARGS(td),
++
++	TP_STRUCT__entry(
++		__field(void *,	ip)
++		__field(unsigned int, type)
++	),
++
++	TP_fast_assign(
++		__entry->ip = (void *)xnarch_fault_pc(td);
++		__entry->type = xnarch_fault_trap(td);
++	),
++
++	TP_printk("ip=%p type=%x",
++		  __entry->ip, __entry->type)
++);
++
++TRACE_EVENT(cobalt_thread_set_current_prio,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, thread)
++		__field(pid_t, pid)
++		__field(int, cprio)
++	),
++
++	TP_fast_assign(
++		__entry->thread = thread;
++		__entry->pid = xnthread_host_pid(thread);
++		__entry->cprio = xnthread_current_priority(thread);
++	),
++
++	TP_printk("thread=%p pid=%d prio=%d",
++		  __entry->thread, __entry->pid, __entry->cprio)
++);
++
++DEFINE_EVENT(thread_event, cobalt_thread_start,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(thread_event, cobalt_thread_cancel,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(thread_event, cobalt_thread_join,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(thread_event, cobalt_thread_unblock,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_thread_wait_period,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_thread_missed_period,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_thread_set_mode,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++TRACE_EVENT(cobalt_thread_migrate,
++	TP_PROTO(unsigned int cpu),
++	TP_ARGS(cpu),
++
++	TP_STRUCT__entry(
++		__field(unsigned int, cpu)
++	),
++
++	TP_fast_assign(
++		__entry->cpu = cpu;
++	),
++
++	TP_printk("cpu=%u", __entry->cpu)
++);
++
++TRACE_EVENT(cobalt_thread_migrate_passive,
++	TP_PROTO(struct xnthread *thread, unsigned int cpu),
++	TP_ARGS(thread, cpu),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, thread)
++		__field(pid_t, pid)
++		__field(unsigned int, cpu)
++	),
++
++	TP_fast_assign(
++		__entry->thread = thread;
++		__entry->pid = xnthread_host_pid(thread);
++		__entry->cpu = cpu;
++	),
++
++	TP_printk("thread=%p pid=%d cpu=%u",
++		  __entry->thread, __entry->pid, __entry->cpu)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_shadow_gohard,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_watchdog_signal,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_shadow_hardened,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++#define cobalt_print_relax_reason(reason)				\
++	__print_symbolic(reason,					\
++			 { SIGDEBUG_UNDEFINED,		"undefined" },	\
++			 { SIGDEBUG_MIGRATE_SIGNAL,	"signal" },	\
++			 { SIGDEBUG_MIGRATE_SYSCALL,	"syscall" },	\
++			 { SIGDEBUG_MIGRATE_FAULT,	"fault" })
++
++TRACE_EVENT(cobalt_shadow_gorelax,
++	TP_PROTO(int reason),
++	TP_ARGS(reason),
++
++	TP_STRUCT__entry(
++		__field(int, reason)
++	),
++
++	TP_fast_assign(
++		__entry->reason = reason;
++	),
++
++	TP_printk("reason=%s", cobalt_print_relax_reason(__entry->reason))
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_shadow_relaxed,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_shadow_entry,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++TRACE_EVENT(cobalt_shadow_map,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread),
++
++	TP_STRUCT__entry(
++		__field(struct xnthread *, thread)
++		__field(pid_t, pid)
++		__field(int, prio)
++	),
++
++	TP_fast_assign(
++		__entry->thread = thread;
++		__entry->pid = xnthread_host_pid(thread);
++		__entry->prio = xnthread_base_priority(thread);
++	),
++
++	TP_printk("thread=%p pid=%d prio=%d",
++		  __entry->thread, __entry->pid, __entry->prio)
++);
++
++DEFINE_EVENT(curr_thread_event, cobalt_shadow_unmap,
++	TP_PROTO(struct xnthread *thread),
++	TP_ARGS(thread)
++);
++
++TRACE_EVENT(cobalt_lostage_request,
++        TP_PROTO(const char *type, struct task_struct *task),
++	TP_ARGS(type, task),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__array(char, comm, TASK_COMM_LEN)
++		__field(const char *, type)
++	),
++
++	TP_fast_assign(
++		__entry->type = type;
++		__entry->pid = task_pid_nr(task);
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++	),
++
++	TP_printk("request=%s pid=%d comm=%s",
++		  __entry->type, __entry->pid, __entry->comm)
++);
++
++TRACE_EVENT(cobalt_lostage_wakeup,
++	TP_PROTO(struct task_struct *task),
++	TP_ARGS(task),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__array(char, comm, TASK_COMM_LEN)
++	),
++
++	TP_fast_assign(
++		__entry->pid = task_pid_nr(task);
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++	),
++
++	TP_printk("pid=%d comm=%s",
++		  __entry->pid, __entry->comm)
++);
++
++TRACE_EVENT(cobalt_lostage_signal,
++	TP_PROTO(struct task_struct *task, int sig),
++	TP_ARGS(task, sig),
++
++	TP_STRUCT__entry(
++		__field(pid_t, pid)
++		__array(char, comm, TASK_COMM_LEN)
++		__field(int, sig)
++	),
++
++	TP_fast_assign(
++		__entry->pid = task_pid_nr(task);
++		__entry->sig = sig;
++		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
++	),
++
++	TP_printk("pid=%d comm=%s sig=%d",
++		  __entry->pid, __entry->comm, __entry->sig)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_entry,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_exit,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_attach,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_detach,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_enable,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(irq_event, cobalt_irq_disable,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(clock_event, cobalt_clock_entry,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(clock_event, cobalt_clock_exit,
++	TP_PROTO(unsigned int irq),
++	TP_ARGS(irq)
++);
++
++DEFINE_EVENT(timer_event, cobalt_timer_stop,
++	TP_PROTO(struct xntimer *timer),
++	TP_ARGS(timer)
++);
++
++DEFINE_EVENT(timer_event, cobalt_timer_expire,
++	TP_PROTO(struct xntimer *timer),
++	TP_ARGS(timer)
++);
++
++#define cobalt_print_timer_mode(mode)			\
++	__print_symbolic(mode,				\
++			 { XN_RELATIVE, "rel" },	\
++			 { XN_ABSOLUTE, "abs" },	\
++			 { XN_REALTIME, "rt" })
++
++TRACE_EVENT(cobalt_timer_start,
++	TP_PROTO(struct xntimer *timer, xnticks_t value, xnticks_t interval,
++		 xntmode_t mode),
++	TP_ARGS(timer, value, interval, mode),
++
++	TP_STRUCT__entry(
++		__field(struct xntimer *, timer)
++#ifdef CONFIG_XENO_OPT_STATS
++		__string(name, timer->name)
++#endif
++		__field(xnticks_t, value)
++		__field(xnticks_t, interval)
++		__field(xntmode_t, mode)
++	),
++
++	TP_fast_assign(
++		__entry->timer = timer;
++#ifdef CONFIG_XENO_OPT_STATS
++		__assign_str(name, timer->name);
++#endif
++		__entry->value = value;
++		__entry->interval = interval;
++		__entry->mode = mode;
++	),
++
++	TP_printk("timer=%p(%s) value=%Lu interval=%Lu mode=%s",
++		  __entry->timer,
++#ifdef CONFIG_XENO_OPT_STATS
++		  __get_str(name),
++#else
++		  "(anon)",
++#endif
++		  __entry->value, __entry->interval,
++		  cobalt_print_timer_mode(__entry->mode))
++);
++
++#ifdef CONFIG_SMP
++
++TRACE_EVENT(cobalt_timer_migrate,
++	TP_PROTO(struct xntimer *timer, unsigned int cpu),
++	TP_ARGS(timer, cpu),
++
++	TP_STRUCT__entry(
++		__field(struct xntimer *, timer)
++		__field(unsigned int, cpu)
++	),
++
++	TP_fast_assign(
++		__entry->timer = timer;
++		__entry->cpu = cpu;
++	),
++
++	TP_printk("timer=%p cpu=%u",
++		  __entry->timer, __entry->cpu)
++);
++
++#endif /* CONFIG_SMP */
++
++DEFINE_EVENT(synch_wait_event, cobalt_synch_sleepon,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_wait_event, cobalt_synch_try_acquire,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_wait_event, cobalt_synch_acquire,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_post_event, cobalt_synch_release,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_post_event, cobalt_synch_wakeup,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_post_event, cobalt_synch_wakeup_many,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_post_event, cobalt_synch_flush,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++DEFINE_EVENT(synch_post_event, cobalt_synch_forget,
++	TP_PROTO(struct xnsynch *synch),
++	TP_ARGS(synch)
++);
++
++#endif /* _TRACE_COBALT_CORE_H */
++
++/* This part must be outside protection */
++#undef TRACE_INCLUDE_PATH
++#undef TRACE_INCLUDE_FILE
++#define TRACE_INCLUDE_FILE cobalt-core
++#include <trace/define_trace.h>
+--- linux/kernel/xenomai/rtdm/wrappers.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/wrappers.c	2021-04-07 16:01:26.185635655 +0800
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (c) 2013  Hauke Mehrtens <hauke@hauke-m.de>
++ * Copyright (c) 2013  Hannes Frederic Sowa <hannes@stressinduktion.org>
++ * Copyright (c) 2014  Luis R. Rodriguez <mcgrof@do-not-panic.com>
++ *
++ * Backport functionality introduced in Linux 3.13.
++ *
++ * Copyright (c) 2014  Hauke Mehrtens <hauke@hauke-m.de>
++ *
++ * Backport functionality introduced in Linux 3.14.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/version.h>
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include <linux/hwmon.h>
++#include <asm/xenomai/wrappers.h>
++
++/*
++ * Same rules as kernel/cobalt/include/asm-generic/xenomai/wrappers.h
++ * apply to reduce #ifdefery.
++ */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0)
++#ifdef CONFIG_PCI_MSI
++int pci_enable_msix_range(struct pci_dev *dev,
++			struct msix_entry *entries,
++			int minvec, int maxvec)
++{
++	int nvec = maxvec;
++	int rc;
++
++	if (maxvec < minvec)
++		return -ERANGE;
++
++	do {
++		rc = pci_enable_msix(dev, entries, nvec);
++		if (rc < 0) {
++			return rc;
++		} else if (rc > 0) {
++			if (rc < minvec)
++				return -ENOSPC;
++			nvec = rc;
++		}
++	} while (rc);
++
++	return nvec;
++}
++EXPORT_SYMBOL(pci_enable_msix_range);
++#endif
++#endif /* < 3.14 */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,13,0)
++#ifdef CONFIG_HWMON
++struct device*
++hwmon_device_register_with_groups(struct device *dev, const char *name,
++				void *drvdata,
++				const struct attribute_group **groups)
++{
++	struct device *hwdev;
++
++	hwdev = hwmon_device_register(dev);
++	hwdev->groups = groups;
++	dev_set_drvdata(hwdev, drvdata);
++	return hwdev;
++}
++
++static void devm_hwmon_release(struct device *dev, void *res)
++{
++	struct device *hwdev = *(struct device **)res;
++
++	hwmon_device_unregister(hwdev);
++}
++
++struct device *
++devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
++				void *drvdata,
++				const struct attribute_group **groups)
++{
++	struct device **ptr, *hwdev;
++
++	if (!dev)
++		return ERR_PTR(-EINVAL);
++
++	ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL);
++	if (!ptr)
++		return ERR_PTR(-ENOMEM);
++
++	hwdev = hwmon_device_register_with_groups(dev, name, drvdata, groups);
++	if (IS_ERR(hwdev))
++		goto error;
++
++	*ptr = hwdev;
++	devres_add(dev, ptr);
++	return hwdev;
++
++error:
++	devres_free(ptr);
++	return hwdev;
++}
++EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups);
++#endif
++#endif /* < 3.13 */
+--- linux/kernel/xenomai/rtdm/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/Makefile	2021-04-07 16:01:26.180635662 +0800
+@@ -0,0 +1,10 @@
++
++obj-$(CONFIG_XENOMAI) += xenomai.o
++
++xenomai-y :=	core.o		\
++		device.o	\
++		drvlib.o	\
++		fd.o		\
++		wrappers.o
++
++ccflags-y += -I$(src)/.. -Ikernel
+--- linux/kernel/xenomai/rtdm/drvlib.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/drvlib.c	2021-04-07 16:01:26.176635668 +0800
+@@ -0,0 +1,2446 @@
++/*
++ * Real-Time Driver Model for Xenomai, driver library
++ *
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2008 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/bitops.h>
++#include <linux/delay.h>
++#include <linux/mman.h>
++#include <asm/page.h>
++#include <asm/io.h>
++#include <asm/pgtable.h>
++#include <linux/highmem.h>
++#include <linux/err.h>
++#include <linux/anon_inodes.h>
++#include <rtdm/driver.h>
++#include <rtdm/compat.h>
++#include "internal.h"
++#include <trace/events/cobalt-rtdm.h>
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_clock Clock Services
++ * @{
++ */
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++
++/**
++ * @brief Get system time
++ *
++ * @return The system time in nanoseconds is returned
++ *
++ * @note The resolution of this service depends on the system timer. In
++ * particular, if the system timer is running in periodic mode, the return
++ * value will be limited to multiples of the timer tick period.
++ *
++ * @note The system timer may have to be started to obtain valid results.
++ * Whether this happens automatically (as on Xenomai) or is controlled by the
++ * application depends on the RTDM host environment.
++ *
++ * @coretags{unrestricted}
++ */
++nanosecs_abs_t rtdm_clock_read(void);
++
++/**
++ * @brief Get monotonic time
++ *
++ * @return The monotonic time in nanoseconds is returned
++ *
++ * @note The resolution of this service depends on the system timer. In
++ * particular, if the system timer is running in periodic mode, the return
++ * value will be limited to multiples of the timer tick period.
++ *
++ * @note The system timer may have to be started to obtain valid results.
++ * Whether this happens automatically (as on Xenomai) or is controlled by the
++ * application depends on the RTDM host environment.
++ *
++ * @coretags{unrestricted}
++ */
++nanosecs_abs_t rtdm_clock_read_monotonic(void);
++#endif /* DOXYGEN_CPP */
++/** @} */
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_task Task Services
++ * @{
++ */
++
++/**
++ * @brief Initialise and start a real-time task
++ *
++ * After initialising a task, the task handle remains valid and can be
++ * passed to RTDM services until either rtdm_task_destroy() or
++ * rtdm_task_join() was invoked.
++ *
++ * @param[in,out] task Task handle
++ * @param[in] name Optional task name
++ * @param[in] task_proc Procedure to be executed by the task
++ * @param[in] arg Custom argument passed to @c task_proc() on entry
++ * @param[in] priority Priority of the task, see also
++ * @ref rtdmtaskprio "Task Priority Range"
++ * @param[in] period Period in nanoseconds of a cyclic task, 0 for non-cyclic
++ * mode. Waiting for the first and subsequent periodic events is
++ * done using rtdm_task_wait_period().
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_task_init(rtdm_task_t *task, const char *name,
++		   rtdm_task_proc_t task_proc, void *arg,
++		   int priority, nanosecs_rel_t period)
++{
++	union xnsched_policy_param param;
++	struct xnthread_start_attr sattr;
++	struct xnthread_init_attr iattr;
++	int err;
++
++	if (!realtime_core_enabled())
++		return -ENOSYS;
++
++	iattr.name = name;
++	iattr.flags = 0;
++	iattr.personality = &xenomai_personality;
++	iattr.affinity = CPU_MASK_ALL;
++	param.rt.prio = priority;
++
++	err = xnthread_init(task, &iattr, &xnsched_class_rt, &param);
++	if (err)
++		return err;
++
++	/* We need an anonymous registry entry to obtain a handle for fast
++	   mutex locking. */
++	err = xnthread_register(task, "");
++	if (err)
++		goto cleanup_out;
++
++	if (period > 0) {
++		err = xnthread_set_periodic(task, XN_INFINITE,
++					    XN_RELATIVE, period);
++		if (err)
++			goto cleanup_out;
++	}
++
++	sattr.mode = 0;
++	sattr.entry = task_proc;
++	sattr.cookie = arg;
++	err = xnthread_start(task, &sattr);
++	if (err)
++		goto cleanup_out;
++
++	return 0;
++
++      cleanup_out:
++	xnthread_cancel(task);
++	return err;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_task_init);
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++/**
++ * @brief Destroy a real-time task
++ *
++ * This call sends a termination request to @a task, then waits for it
++ * to exit. All RTDM task should check for pending termination
++ * requests by calling rtdm_task_should_stop() from their work loop.
++ *
++ * If @a task is current, rtdm_task_destroy() terminates the current
++ * context, and does not return to the caller.
++ *
++ * @param[in,out] task Task handle as returned by rtdm_task_init()
++ *
++ * @note Passing the same task handle to RTDM services after the completion of
++ * this function is not allowed.
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++void rtdm_task_destroy(rtdm_task_t *task);
++
++/**
++ * @brief Check for pending termination request
++ *
++ * Check whether a termination request was received by the current
++ * RTDM task. Termination requests are sent by calling
++ * rtdm_task_destroy().
++ *
++ * @return Non-zero indicates that a termination request is pending,
++ * in which case the caller should wrap up and exit.
++ *
++ * @coretags{rtdm-task, might-switch}
++ */
++int rtdm_task_should_stop(void);
++
++/**
++ * @brief Adjust real-time task priority
++ *
++ * @param[in,out] task Task handle as returned by rtdm_task_init()
++ * @param[in] priority New priority of the task, see also
++ * @ref rtdmtaskprio "Task Priority Range"
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++void rtdm_task_set_priority(rtdm_task_t *task, int priority);
++
++/**
++ * @brief Adjust real-time task period
++ *
++ * @param[in,out] task Task handle as returned by rtdm_task_init(), or
++ * NULL for referring to the current RTDM task or Cobalt thread.
++ *
++ * @param[in] start_date The initial (absolute) date of the first
++ * release point, expressed in nanoseconds.  @a task will be delayed
++ * by the first call to rtdm_task_wait_period() until this point is
++ * reached. If @a start_date is zero, the first release point is set
++ * to @a period nanoseconds after the current date.
++
++ * @param[in] period New period in nanoseconds of a cyclic task, zero
++ * to disable cyclic mode for @a task.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_task_set_period(rtdm_task_t *task, nanosecs_abs_t start_date,
++			 nanosecs_rel_t period);
++
++/**
++ * @brief Wait on next real-time task period
++ *
++ * @param[in] overruns_r Address of a long word receiving the count of
++ * overruns if -ETIMEDOUT is returned, or NULL if the caller don't
++ * need that information.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINVAL is returned if calling task is not in periodic mode.
++ *
++ * - -ETIMEDOUT is returned if a timer overrun occurred, which indicates
++ * that a previous release point has been missed by the calling task.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_task_wait_period(unsigned long *overruns_r);
++
++/**
++ * @brief Activate a blocked real-time task
++ *
++ * @return Non-zero is returned if the task was actually unblocked from a
++ * pending wait state, 0 otherwise.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++int rtdm_task_unblock(rtdm_task_t *task);
++
++/**
++ * @brief Get current real-time task
++ *
++ * @return Pointer to task handle
++ *
++ * @coretags{mode-unrestricted}
++ */
++rtdm_task_t *rtdm_task_current(void);
++
++/**
++ * @brief Sleep a specified amount of time
++ *
++ * @param[in] delay Delay in nanoseconds, see @ref RTDM_TIMEOUT_xxx for
++ * special values.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_task_sleep(nanosecs_rel_t delay);
++
++/**
++ * @brief Sleep until a specified absolute time
++ *
++ * @deprecated Use rtdm_task_sleep_abs instead!
++ *
++ * @param[in] wakeup_time Absolute timeout in nanoseconds
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_task_sleep_until(nanosecs_abs_t wakeup_time);
++
++/**
++ * @brief Sleep until a specified absolute time
++ *
++ * @param[in] wakeup_time Absolute timeout in nanoseconds
++ * @param[in] mode Selects the timer mode, see RTDM_TIMERMODE_xxx for details
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * - -EINVAL is returned if an invalid parameter was passed.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_task_sleep_abs(nanosecs_abs_t wakeup_time, enum rtdm_timer_mode mode);
++
++/**
++ * @brief Safe busy waiting
++ *
++ * This service alternates active spinning and sleeping within a wait
++ * loop, until a condition is satisfied. While sleeping, a task is
++ * scheduled out and does not consume any CPU time.
++ *
++ * rtdm_task_busy_wait() is particularly useful for waiting for a
++ * state change reading an I/O register, which usually happens shortly
++ * after the wait starts, without incurring the adverse effects of
++ * long busy waiting if it doesn't.
++ *
++ * @param[in] condition The C expression to be tested for detecting
++ * completion.
++ * @param[in] spin_ns The time to spin on @a condition before
++ * sleeping, expressed as a count of nanoseconds.
++ * @param[in] sleep_ns The time to sleep for before spinning again,
++ * expressed as a count of nanoseconds.
++ *
++ * @return 0 on success if @a condition is satisfied, otherwise:
++ *
++ * - -EINTR is returned if the calling task has been unblocked by a
++ * Linux signal or explicitly via rtdm_task_unblock().
++ *
++ * - -EPERM may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_task_busy_wait(bool condition, nanosecs_rel_t spin_ns,
++			nanosecs_rel_t sleep_ns);
++
++/**
++ * @brief Register wait context
++ *
++ * rtdm_wait_prepare() registers a wait context structure for the
++ * caller, which can be later retrieved by a call to
++ * rtdm_wait_get_context(). This call is normally issued before the
++ * current task blocks on a wait object, waiting for some (producer)
++ * code to wake it up. Arbitrary data can be exchanged between both
++ * sites via the wait context structure, which is allocated by the
++ * waiter (consumer) side.
++ *
++ * @a wc is the address of an anchor object which is commonly embedded
++ * into a larger structure with arbitrary contents, which needs to be
++ * shared between the consumer (waiter) and the producer for
++ * implementing the wait code.
++ *
++ * A typical implementation pattern for the wait side is:
++ *
++ * @code
++ * struct rtdm_waitqueue wq;
++ * struct some_wait_context {
++ *    int input_value;
++ *    int output_value;
++ *    struct rtdm_wait_context wc;
++ * } wait_context;
++ *
++ * wait_context.input_value = 42;
++ * rtdm_wait_prepare(&wait_context);
++ * ret = rtdm_wait_condition(&wq, rtdm_wait_is_completed(&wait_context));
++ * if (ret)
++ *     goto wait_failed;
++ * handle_event(wait_context.output_value);
++ * @endcode
++ *
++ * On the producer side, the implementation would look like:
++ *
++ * @code
++ * struct rtdm_waitqueue wq;
++ * struct some_wait_context {
++ *    int input_value;
++ *    int output_value;
++ *    struct rtdm_wait_context wc;
++ * } *wait_context_ptr;
++ * struct rtdm_wait_context *wc;
++ * rtdm_task_t *task;
++ *
++ * rtdm_for_each_waiter(task, &wq) {
++ *    wc = rtdm_wait_get_context(task);
++ *    wait_context_ptr = container_of(wc, struct some_wait_context, wc);
++ *    wait_context_ptr->output_value = 12;
++ * }
++ * rtdm_waitqueue_broadcast(&wq);
++ * @endcode
++ *
++ * @param wc Wait context to register.
++ */
++void rtdm_wait_prepare(struct rtdm_wait_context *wc);
++
++/**
++ * @brief Mark completion for a wait context
++ *
++ * rtdm_complete_wait() marks a wait context as completed, so that
++ * rtdm_wait_is_completed() returns true for such context.
++ *
++ * @param wc Wait context to complete.
++ */
++void rtdm_wait_complete(struct rtdm_wait_context *wc);
++
++/**
++ * @brief Test completion of a wait context
++ *
++ * rtdm_wait_is_completed() returns true if rtdm_complete_wait() was
++ * called for @a wc. The completion mark is reset each time
++ * rtdm_wait_prepare() is called for a wait context.
++ *
++ * @param wc Wait context to check for completion.
++ *
++ * @return non-zero/true if rtdm_wait_complete() was called for @a wc,
++ * zero otherwise.
++ */
++int rtdm_wait_is_completed(struct rtdm_wait_context *wc);
++
++#endif /* DOXYGEN_CPP */
++
++int __rtdm_task_sleep(xnticks_t timeout, xntmode_t mode)
++{
++	struct xnthread *thread;
++
++	if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p()))
++		return -EPERM;
++
++	thread = xnthread_current();
++	xnthread_suspend(thread, XNDELAY, timeout, mode, NULL);
++
++	return xnthread_test_info(thread, XNBREAK) ? -EINTR : 0;
++}
++
++EXPORT_SYMBOL_GPL(__rtdm_task_sleep);
++
++/**
++ * @brief Wait on a real-time task to terminate
++ *
++ * @param[in,out] task Task handle as returned by rtdm_task_init()
++ *
++ * @note Passing the same task handle to RTDM services after the
++ * completion of this function is not allowed.
++ *
++ * @note This service does not trigger the termination of the targeted
++ * task.  The user has to take of this, otherwise rtdm_task_join()
++ * will never return.
++ *
++ * @coretags{mode-unrestricted}
++ */
++void rtdm_task_join(rtdm_task_t *task)
++{
++	trace_cobalt_driver_task_join(task);
++
++	xnthread_join(task, true);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_task_join);
++
++/**
++ * @brief Busy-wait a specified amount of time
++ *
++ * This service does not schedule out the caller, but rather spins in
++ * a tight loop, burning CPU cycles until the timeout elapses.
++ *
++ * @param[in] delay Delay in nanoseconds. Note that a zero delay does @b not
++ * have the meaning of @c RTDM_TIMEOUT_INFINITE here.
++ *
++ * @note The caller must not be migratable to different CPUs while executing
++ * this service. Otherwise, the actual delay will be undefined.
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_task_busy_sleep(nanosecs_rel_t delay)
++{
++	xnticks_t wakeup;
++
++	wakeup = xnclock_read_raw(&nkclock) +
++		xnclock_ns_to_ticks(&nkclock, delay);
++
++	while ((xnsticks_t)(xnclock_read_raw(&nkclock) - wakeup) < 0)
++		cpu_relax();
++}
++
++EXPORT_SYMBOL_GPL(rtdm_task_busy_sleep);
++/** @} */
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_timer Timer Services
++ * @{
++ */
++
++/**
++ * @brief Initialise a timer
++ *
++ * @param[in,out] timer Timer handle
++ * @param[in] handler Handler to be called on timer expiry
++ * @param[in] name Optional timer name
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_timer_init(rtdm_timer_t *timer, rtdm_timer_handler_t handler,
++		    const char *name)
++{
++	if (!realtime_core_enabled())
++		return -ENOSYS;
++
++	xntimer_init((timer), &nkclock, handler, NULL, XNTIMER_IGRAVITY);
++	xntimer_set_name((timer), (name));
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_timer_init);
++
++/**
++ * @brief Destroy a timer
++ *
++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init()
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_timer_destroy(rtdm_timer_t *timer)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_destroy(timer);
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_timer_destroy);
++
++/**
++ * @brief Start a timer
++ *
++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init()
++ * @param[in] expiry Firing time of the timer, @c mode defines if relative or
++ * absolute
++ * @param[in] interval Relative reload value, > 0 if the timer shall work in
++ * periodic mode with the specific interval, 0 for one-shot timers
++ * @param[in] mode Defines the operation mode, see @ref RTDM_TIMERMODE_xxx for
++ * possible values
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ETIMEDOUT is returned if @c expiry describes an absolute date in
++ * the past. In such an event, the timer is nevertheless armed for the
++ * next shot in the timeline if @a interval is non-zero.
++ *
++ * @coretags{unrestricted}
++ */
++int rtdm_timer_start(rtdm_timer_t *timer, nanosecs_abs_t expiry,
++		     nanosecs_rel_t interval, enum rtdm_timer_mode mode)
++{
++	spl_t s;
++	int err;
++
++	xnlock_get_irqsave(&nklock, s);
++	err = xntimer_start(timer, expiry, interval, (xntmode_t)mode);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_timer_start);
++
++/**
++ * @brief Stop a timer
++ *
++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init()
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_timer_stop(rtdm_timer_t *timer)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_stop(timer);
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_timer_stop);
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++/**
++ * @brief Start a timer from inside a timer handler
++ *
++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init()
++ * @param[in] expiry Firing time of the timer, @c mode defines if relative or
++ * absolute
++ * @param[in] interval Relative reload value, > 0 if the timer shall work in
++ * periodic mode with the specific interval, 0 for one-shot timers
++ * @param[in] mode Defines the operation mode, see @ref RTDM_TIMERMODE_xxx for
++ * possible values
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ETIMEDOUT is returned if @c expiry describes an absolute date in the
++ * past.
++ *
++ * @coretags{coreirq-only}
++ */
++int rtdm_timer_start_in_handler(rtdm_timer_t *timer, nanosecs_abs_t expiry,
++				nanosecs_rel_t interval,
++				enum rtdm_timer_mode mode);
++
++/**
++ * @brief Stop a timer from inside a timer handler
++ *
++ * @param[in,out] timer Timer handle as returned by rtdm_timer_init()
++ *
++ * @coretags{coreirq-only}
++ */
++void rtdm_timer_stop_in_handler(rtdm_timer_t *timer);
++#endif /* DOXYGEN_CPP */
++/** @} */
++
++/* --- IPC cleanup helper --- */
++
++#define RTDM_SYNCH_DELETED          XNSYNCH_SPARE0
++
++void __rtdm_synch_flush(struct xnsynch *synch, unsigned long reason)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (reason == XNRMID)
++		xnsynch_set_status(synch, RTDM_SYNCH_DELETED);
++
++	if (likely(xnsynch_flush(synch, reason) == XNSYNCH_RESCHED))
++		xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(__rtdm_synch_flush);
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_sync Synchronisation Services
++ * @{
++ */
++
++/*!
++ * @name Timeout Sequence Management
++ * @{
++ */
++
++/**
++ * @brief Initialise a timeout sequence
++ *
++ * This service initialises a timeout sequence handle according to the given
++ * timeout value. Timeout sequences allow to maintain a continuous @a timeout
++ * across multiple calls of blocking synchronisation services. A typical
++ * application scenario is given below.
++ *
++ * @param[in,out] timeout_seq Timeout sequence handle
++ * @param[in] timeout Relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values
++ *
++ * Application Scenario:
++ * @code
++int device_service_routine(...)
++{
++	rtdm_toseq_t timeout_seq;
++	...
++
++	rtdm_toseq_init(&timeout_seq, timeout);
++	...
++	while (received < requested) {
++		ret = rtdm_event_timedwait(&data_available, timeout, &timeout_seq);
++		if (ret < 0) // including -ETIMEDOUT
++			break;
++
++		// receive some data
++		...
++	}
++	...
++}
++ * @endcode
++ * Using a timeout sequence in such a scenario avoids that the user-provided
++ * relative @c timeout is restarted on every call to rtdm_event_timedwait(),
++ * potentially causing an overall delay that is larger than specified by
++ * @c timeout. Moreover, all functions supporting timeout sequences also
++ * interpret special timeout values (infinite and non-blocking),
++ * disburdening the driver developer from handling them separately.
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_toseq_init(rtdm_toseq_t *timeout_seq, nanosecs_rel_t timeout)
++{
++	XENO_WARN_ON(COBALT, xnsched_unblockable_p()); /* only warn here */
++
++	*timeout_seq = xnclock_read_monotonic(&nkclock) + timeout;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_toseq_init);
++
++/** @} */
++
++/**
++ * @ingroup rtdm_sync
++ * @defgroup rtdm_sync_event Event Services
++ * @{
++ */
++
++/**
++ * @brief Initialise an event
++ *
++ * @param[in,out] event Event handle
++ * @param[in] pending Non-zero if event shall be initialised as set, 0 otherwise
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_event_init(rtdm_event_t *event, unsigned long pending)
++{
++	spl_t s;
++
++	trace_cobalt_driver_event_init(event, pending);
++
++	/* Make atomic for re-initialisation support */
++	xnlock_get_irqsave(&nklock, s);
++
++	xnsynch_init(&event->synch_base, XNSYNCH_PRIO, NULL);
++	if (pending)
++		xnsynch_set_status(&event->synch_base, RTDM_EVENT_PENDING);
++	xnselect_init(&event->select_block);
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_event_init);
++
++/**
++ * @brief Destroy an event
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++void rtdm_event_destroy(rtdm_event_t *event)
++{
++	trace_cobalt_driver_event_destroy(event);
++	if (realtime_core_enabled()) {
++		__rtdm_synch_flush(&event->synch_base, XNRMID);
++		xnselect_destroy(&event->select_block);
++	}
++}
++EXPORT_SYMBOL_GPL(rtdm_event_destroy);
++
++/**
++ * @brief Signal an event occurrence to currently listening waiters
++ *
++ * This function wakes up all current waiters of the given event, but it does
++ * not change the event state. Subsequently callers of rtdm_event_wait() or
++ * rtdm_event_timedwait() will therefore be blocked first.
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_event_pulse(rtdm_event_t *event)
++{
++	trace_cobalt_driver_event_pulse(event);
++	__rtdm_synch_flush(&event->synch_base, 0);
++}
++EXPORT_SYMBOL_GPL(rtdm_event_pulse);
++
++/**
++ * @brief Signal an event occurrence
++ *
++ * This function sets the given event and wakes up all current waiters. If no
++ * waiter is presently registered, the next call to rtdm_event_wait() or
++ * rtdm_event_timedwait() will return immediately.
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_event_signal(rtdm_event_t *event)
++{
++	int resched = 0;
++	spl_t s;
++
++	trace_cobalt_driver_event_signal(event);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	xnsynch_set_status(&event->synch_base, RTDM_EVENT_PENDING);
++	if (xnsynch_flush(&event->synch_base, 0))
++		resched = 1;
++	if (xnselect_signal(&event->select_block, 1))
++		resched = 1;
++	if (resched)
++		xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_event_signal);
++
++/**
++ * @brief Wait on event occurrence
++ *
++ * This is the light-weight version of rtdm_event_timedwait(), implying an
++ * infinite timeout.
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EIDRM is returned if @a event has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_event_wait(rtdm_event_t *event)
++{
++	return rtdm_event_timedwait(event, 0, NULL);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_event_wait);
++
++/**
++ * @brief Wait on event occurrence with timeout
++ *
++ * This function waits or tests for the occurence of the given event, taking
++ * the provided timeout into account. On successful return, the event is
++ * reset.
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ * @param[in] timeout Relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values
++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EIDRM is returned if @a event has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * - -EWOULDBLOCK is returned if a negative @a timeout (i.e., non-blocking
++ * operation) has been specified.
++ *
++ * @coretags{primary-timed, might-switch}
++ */
++int rtdm_event_timedwait(rtdm_event_t *event, nanosecs_rel_t timeout,
++			 rtdm_toseq_t *timeout_seq)
++{
++	struct xnthread *thread;
++	int err = 0, ret;
++	spl_t s;
++
++	if (!XENO_ASSERT(COBALT, timeout < 0 || !xnsched_unblockable_p()))
++		return -EPERM;
++
++	trace_cobalt_driver_event_wait(event, xnthread_current());
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (unlikely(event->synch_base.status & RTDM_SYNCH_DELETED))
++		err = -EIDRM;
++	else if (likely(event->synch_base.status & RTDM_EVENT_PENDING)) {
++		xnsynch_clear_status(&event->synch_base, RTDM_EVENT_PENDING);
++		xnselect_signal(&event->select_block, 0);
++	} else {
++		/* non-blocking mode */
++		if (timeout < 0) {
++			err = -EWOULDBLOCK;
++			goto unlock_out;
++		}
++
++		thread = xnthread_current();
++
++		if (timeout_seq && (timeout > 0))
++			/* timeout sequence */
++			ret = xnsynch_sleep_on(&event->synch_base, *timeout_seq,
++					       XN_ABSOLUTE);
++		else
++			/* infinite or relative timeout */
++			ret = xnsynch_sleep_on(&event->synch_base, timeout, XN_RELATIVE);
++
++		if (likely(ret == 0)) {
++			xnsynch_clear_status(&event->synch_base,
++					    RTDM_EVENT_PENDING);
++			xnselect_signal(&event->select_block, 0);
++		} else if (ret & XNTIMEO)
++			err = -ETIMEDOUT;
++		else if (ret & XNRMID)
++			err = -EIDRM;
++		else /* XNBREAK */
++			err = -EINTR;
++	}
++
++unlock_out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_event_timedwait);
++
++/**
++ * @brief Clear event state
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_event_clear(rtdm_event_t *event)
++{
++	spl_t s;
++
++	trace_cobalt_driver_event_clear(event);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	xnsynch_clear_status(&event->synch_base, RTDM_EVENT_PENDING);
++	xnselect_signal(&event->select_block, 0);
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_event_clear);
++
++/**
++ * @brief Bind a selector to an event
++ *
++ * This functions binds the given selector to an event so that the former is
++ * notified when the event state changes. Typically the select binding handler
++ * will invoke this service.
++ *
++ * @param[in,out] event Event handle as returned by rtdm_event_init()
++ * @param[in,out] selector Selector as passed to the select binding handler
++ * @param[in] type Type of the bound event as passed to the select binding handler
++ * @param[in] fd_index File descriptor index as passed to the select binding
++ * handler
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ENOMEM is returned if there is insufficient memory to establish the
++ * dynamic binding.
++ *
++ * - -EINVAL is returned if @a type or @a fd_index are invalid.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_event_select(rtdm_event_t *event, rtdm_selector_t *selector,
++		      enum rtdm_selecttype type, unsigned int fd_index)
++{
++	struct xnselect_binding *binding;
++	int err;
++	spl_t s;
++
++	binding = xnmalloc(sizeof(*binding));
++	if (!binding)
++		return -ENOMEM;
++
++	xnlock_get_irqsave(&nklock, s);
++	err = xnselect_bind(&event->select_block,
++			    binding, selector, type, fd_index,
++			    event->synch_base.status & (RTDM_SYNCH_DELETED |
++						       RTDM_EVENT_PENDING));
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (err)
++		xnfree(binding);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(rtdm_event_select);
++
++/** @} */
++
++/**
++ * @ingroup rtdm_sync
++ * @defgroup rtdm_sync_sem Semaphore Services
++ * @{
++ */
++
++/**
++ * @brief Initialise a semaphore
++ *
++ * @param[in,out] sem Semaphore handle
++ * @param[in] value Initial value of the semaphore
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_sem_init(rtdm_sem_t *sem, unsigned long value)
++{
++	spl_t s;
++
++	trace_cobalt_driver_sem_init(sem, value);
++
++	/* Make atomic for re-initialisation support */
++	xnlock_get_irqsave(&nklock, s);
++
++	sem->value = value;
++	xnsynch_init(&sem->synch_base, XNSYNCH_PRIO, NULL);
++	xnselect_init(&sem->select_block);
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_sem_init);
++
++/**
++ * @brief Destroy a semaphore
++ *
++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init()
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++void rtdm_sem_destroy(rtdm_sem_t *sem)
++{
++	trace_cobalt_driver_sem_destroy(sem);
++	if (realtime_core_enabled()) {
++		__rtdm_synch_flush(&sem->synch_base, XNRMID);
++		xnselect_destroy(&sem->select_block);
++	}
++}
++EXPORT_SYMBOL_GPL(rtdm_sem_destroy);
++
++/**
++ * @brief Decrement a semaphore
++ *
++ * This is the light-weight version of rtdm_sem_timeddown(), implying an
++ * infinite timeout.
++ *
++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init()
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EIDRM is returned if @a sem has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_sem_down(rtdm_sem_t *sem)
++{
++	return rtdm_sem_timeddown(sem, 0, NULL);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_sem_down);
++
++/**
++ * @brief Decrement a semaphore with timeout
++ *
++ * This function tries to decrement the given semphore's value if it is
++ * positive on entry. If not, the caller is blocked unless non-blocking
++ * operation was selected.
++ *
++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init()
++ * @param[in] timeout Relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values
++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * - -EWOULDBLOCK is returned if @a timeout is negative and the semaphore
++ * value is currently not positive.
++ *
++ * - -EINTR is returned if calling task has been unblock by a signal or
++ * explicitly via rtdm_task_unblock().
++ *
++ * - -EIDRM is returned if @a sem has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-timed, might-switch}
++ */
++int rtdm_sem_timeddown(rtdm_sem_t *sem, nanosecs_rel_t timeout,
++		       rtdm_toseq_t *timeout_seq)
++{
++	struct xnthread *thread;
++	int err = 0, ret;
++	spl_t s;
++
++	if (!XENO_ASSERT(COBALT, timeout < 0 || !xnsched_unblockable_p()))
++		return -EPERM;
++
++	trace_cobalt_driver_sem_wait(sem, xnthread_current());
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (unlikely(sem->synch_base.status & RTDM_SYNCH_DELETED))
++		err = -EIDRM;
++	else if (sem->value > 0) {
++		if(!--sem->value)
++			xnselect_signal(&sem->select_block, 0);
++	} else if (timeout < 0) /* non-blocking mode */
++		err = -EWOULDBLOCK;
++	else {
++		thread = xnthread_current();
++
++		if (timeout_seq && timeout > 0)
++			/* timeout sequence */
++			ret = xnsynch_sleep_on(&sem->synch_base, *timeout_seq,
++					       XN_ABSOLUTE);
++		else
++			/* infinite or relative timeout */
++			ret = xnsynch_sleep_on(&sem->synch_base, timeout, XN_RELATIVE);
++
++		if (ret) {
++			if (ret & XNTIMEO)
++				err = -ETIMEDOUT;
++			else if (ret & XNRMID)
++				err = -EIDRM;
++			else /* XNBREAK */
++				err = -EINTR;
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_sem_timeddown);
++
++/**
++ * @brief Increment a semaphore
++ *
++ * This function increments the given semphore's value, waking up a potential
++ * waiter which was blocked upon rtdm_sem_down().
++ *
++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init()
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_sem_up(rtdm_sem_t *sem)
++{
++	spl_t s;
++
++	trace_cobalt_driver_sem_up(sem);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnsynch_wakeup_one_sleeper(&sem->synch_base))
++		xnsched_run();
++	else
++		if (sem->value++ == 0
++		    && xnselect_signal(&sem->select_block, 1))
++			xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_sem_up);
++
++/**
++ * @brief Bind a selector to a semaphore
++ *
++ * This functions binds the given selector to the semaphore so that the former
++ * is notified when the semaphore state changes. Typically the select binding
++ * handler will invoke this service.
++ *
++ * @param[in,out] sem Semaphore handle as returned by rtdm_sem_init()
++ * @param[in,out] selector Selector as passed to the select binding handler
++ * @param[in] type Type of the bound event as passed to the select binding handler
++ * @param[in] fd_index File descriptor index as passed to the select binding
++ * handler
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ENOMEM is returned if there is insufficient memory to establish the
++ * dynamic binding.
++ *
++ * - -EINVAL is returned if @a type or @a fd_index are invalid.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_sem_select(rtdm_sem_t *sem, rtdm_selector_t *selector,
++		    enum rtdm_selecttype type, unsigned int fd_index)
++{
++	struct xnselect_binding *binding;
++	int err;
++	spl_t s;
++
++	binding = xnmalloc(sizeof(*binding));
++	if (!binding)
++		return -ENOMEM;
++
++	xnlock_get_irqsave(&nklock, s);
++	err = xnselect_bind(&sem->select_block, binding, selector,
++			    type, fd_index,
++			    (sem->value > 0) ||
++			    sem->synch_base.status & RTDM_SYNCH_DELETED);
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (err)
++		xnfree(binding);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(rtdm_sem_select);
++
++/** @} */
++
++/**
++ * @ingroup rtdm_sync
++ * @defgroup rtdm_sync_mutex Mutex services
++ * @{
++ */
++
++/**
++ * @brief Initialise a mutex
++ *
++ * This function initalises a basic mutex with priority inversion protection.
++ * "Basic", as it does not allow a mutex owner to recursively lock the same
++ * mutex again.
++ *
++ * @param[in,out] mutex Mutex handle
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_mutex_init(rtdm_mutex_t *mutex)
++{
++	spl_t s;
++
++	/* Make atomic for re-initialisation support */
++	xnlock_get_irqsave(&nklock, s);
++	xnsynch_init(&mutex->synch_base, XNSYNCH_PI, &mutex->fastlock);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(rtdm_mutex_init);
++
++/**
++ * @brief Destroy a mutex
++ *
++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init()
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++void rtdm_mutex_destroy(rtdm_mutex_t *mutex)
++{
++	trace_cobalt_driver_mutex_destroy(mutex);
++
++	if (realtime_core_enabled())
++		__rtdm_synch_flush(&mutex->synch_base, XNRMID);
++}
++EXPORT_SYMBOL_GPL(rtdm_mutex_destroy);
++
++/**
++ * @brief Release a mutex
++ *
++ * This function releases the given mutex, waking up a potential waiter which
++ * was blocked upon rtdm_mutex_lock() or rtdm_mutex_timedlock().
++ *
++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init()
++ *
++ * @coretags{primary-only, might-switch}
++ */
++void rtdm_mutex_unlock(rtdm_mutex_t *mutex)
++{
++	if (!XENO_ASSERT(COBALT, !xnsched_interrupt_p()))
++		return;
++
++	trace_cobalt_driver_mutex_release(mutex);
++
++	if (unlikely(xnsynch_release(&mutex->synch_base,
++				     xnsched_current_thread())))
++		xnsched_run();
++}
++EXPORT_SYMBOL_GPL(rtdm_mutex_unlock);
++
++/**
++ * @brief Request a mutex
++ *
++ * This is the light-weight version of rtdm_mutex_timedlock(), implying an
++ * infinite timeout.
++ *
++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init()
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EIDRM is returned if @a mutex has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_mutex_lock(rtdm_mutex_t *mutex)
++{
++	return rtdm_mutex_timedlock(mutex, 0, NULL);
++}
++
++EXPORT_SYMBOL_GPL(rtdm_mutex_lock);
++
++/**
++ * @brief Request a mutex with timeout
++ *
++ * This function tries to acquire the given mutex. If it is not available, the
++ * caller is blocked unless non-blocking operation was selected.
++ *
++ * @param[in,out] mutex Mutex handle as returned by rtdm_mutex_init()
++ * @param[in] timeout Relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values
++ * @param[in,out] timeout_seq Handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * - -EWOULDBLOCK is returned if @a timeout is negative and the semaphore
++ * value is currently not positive.
++ *
++ * - -EIDRM is returned if @a mutex has been destroyed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int rtdm_mutex_timedlock(rtdm_mutex_t *mutex, nanosecs_rel_t timeout,
++			 rtdm_toseq_t *timeout_seq)
++{
++	struct xnthread *curr;
++	int ret;
++	spl_t s;
++
++	if (!XENO_ASSERT(COBALT, !xnsched_unblockable_p()))
++		return -EPERM;
++
++	curr = xnthread_current();
++	trace_cobalt_driver_mutex_wait(mutex, curr);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (unlikely(mutex->synch_base.status & RTDM_SYNCH_DELETED)) {
++		ret = -EIDRM;
++		goto out;
++	}
++
++	ret = xnsynch_try_acquire(&mutex->synch_base);
++	if (ret != -EBUSY)
++		goto out;
++
++	if (timeout < 0) {
++		ret = -EWOULDBLOCK;
++		goto out;
++	}
++
++	for (;;) {
++		if (timeout_seq && timeout > 0) /* timeout sequence */
++			ret = xnsynch_acquire(&mutex->synch_base, *timeout_seq,
++					      XN_ABSOLUTE);
++		else		/* infinite or relative timeout */
++			ret = xnsynch_acquire(&mutex->synch_base, timeout,
++					      XN_RELATIVE);
++		if (ret == 0)
++			break;
++		if (ret & XNBREAK)
++			continue;
++		ret = ret & XNTIMEO ? -ETIMEDOUT : -EIDRM;
++		break;
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_mutex_timedlock);
++/** @} */
++
++/** @} Synchronisation services */
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_irq Interrupt Management Services
++ * @{
++ */
++
++/**
++ * @brief Register an interrupt handler
++ *
++ * This function registers the provided handler with an IRQ line and enables
++ * the line.
++ *
++ * @param[in,out] irq_handle IRQ handle
++ * @param[in] irq_no Line number of the addressed IRQ
++ * @param[in] handler Interrupt handler
++ * @param[in] flags Registration flags, see @ref RTDM_IRQTYPE_xxx for details
++ * @param[in] device_name Device name to show up in real-time IRQ lists
++ * @param[in] arg Pointer to be passed to the interrupt handler on invocation
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINVAL is returned if an invalid parameter was passed.
++ *
++ * - -EBUSY is returned if the specified IRQ line is already in use.
++ *
++ * - -ENOSYS is returned if the real-time core is disabled.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_irq_request(rtdm_irq_t *irq_handle, unsigned int irq_no,
++		     rtdm_irq_handler_t handler, unsigned long flags,
++		     const char *device_name, void *arg)
++{
++	int err;
++
++	if (!realtime_core_enabled())
++		return -ENOSYS;
++
++	if (!XENO_ASSERT(COBALT, xnsched_root_p()))
++		return -EPERM;
++
++	err = xnintr_init(irq_handle, device_name, irq_no, handler, NULL, flags);
++	if (err)
++		return err;
++
++	err = xnintr_attach(irq_handle, arg);
++	if (err) {
++		xnintr_destroy(irq_handle);
++		return err;
++	}
++
++	xnintr_enable(irq_handle);
++
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(rtdm_irq_request);
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++/**
++ * @brief Release an interrupt handler
++ *
++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request()
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @note The caller is responsible for shutting down the IRQ source at device
++ * level before invoking this service. In turn, rtdm_irq_free ensures that any
++ * pending event on the given IRQ line is fully processed on return from this
++ * service.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_irq_free(rtdm_irq_t *irq_handle);
++
++/**
++ * @brief Enable interrupt line
++ *
++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request()
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @note This service is for exceptional use only. Drivers should
++ * always prefer interrupt masking at device level (via corresponding
++ * control registers etc.)  over masking at line level. Keep in mind
++ * that the latter is incompatible with IRQ line sharing and can also
++ * be more costly as interrupt controller access requires broader
++ * synchronization. Also, such service is solely available from
++ * secondary mode. The caller is responsible for excluding such
++ * conflicts.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_irq_enable(rtdm_irq_t *irq_handle);
++
++/**
++ * @brief Disable interrupt line
++ *
++ * @param[in,out] irq_handle IRQ handle as returned by rtdm_irq_request()
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * @note This service is for exceptional use only. Drivers should
++ * always prefer interrupt masking at device level (via corresponding
++ * control registers etc.)  over masking at line level. Keep in mind
++ * that the latter is incompatible with IRQ line sharing and can also
++ * be more costly as interrupt controller access requires broader
++ * synchronization.  Also, such service is solely available from
++ * secondary mode.  The caller is responsible for excluding such
++ * conflicts.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_irq_disable(rtdm_irq_t *irq_handle);
++#endif /* DOXYGEN_CPP */
++
++/** @} Interrupt Management Services */
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_nrtsignal Non-Real-Time Signalling Services
++ *
++ * These services provide a mechanism to request the execution of a specified
++ * handler in non-real-time context. The triggering can safely be performed in
++ * real-time context without suffering from unknown delays. The handler
++ * execution will be deferred until the next time the real-time subsystem
++ * releases the CPU to the non-real-time part.
++ * @{
++ */
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++
++/**
++ * @brief Register a non-real-time signal handler
++ *
++ * @param[in,out] nrt_sig Signal handle
++ * @param[in] handler Non-real-time signal handler
++ * @param[in] arg Custom argument passed to @c handler() on each invocation
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EAGAIN is returned if no free signal slot is available.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_nrtsig_init(rtdm_nrtsig_t *nrt_sig, rtdm_nrtsig_handler_t handler,
++		     void *arg);
++
++/**
++ * @brief Release a non-realtime signal handler
++ *
++ * @param[in,out] nrt_sig Signal handle
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_nrtsig_destroy(rtdm_nrtsig_t *nrt_sig);
++#endif /* DOXYGEN_CPP */
++
++struct nrtsig_work {
++	struct ipipe_work_header work;
++	struct rtdm_nrtsig *nrtsig;
++};
++
++static void nrtsig_execute(struct ipipe_work_header *work)
++{
++	struct rtdm_nrtsig *nrtsig;
++	struct nrtsig_work *w;
++
++	w = container_of(work, typeof(*w), work);
++	nrtsig = w->nrtsig;
++	nrtsig->handler(nrtsig, nrtsig->arg);
++}
++
++/**
++ * Trigger non-real-time signal
++ *
++ * @param[in,out] nrt_sig Signal handle
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_nrtsig_pend(rtdm_nrtsig_t *nrt_sig)
++{
++	struct nrtsig_work nrtsig_work = {
++		.work = {
++			.size = sizeof(nrtsig_work),
++			.handler = nrtsig_execute,
++		},
++		.nrtsig = nrt_sig,
++	};
++	ipipe_post_work_root(&nrtsig_work, work);
++}
++EXPORT_SYMBOL_GPL(rtdm_nrtsig_pend);
++
++struct lostage_schedule_work {
++	struct ipipe_work_header work;
++	struct work_struct *lostage_work;
++};
++
++static void lostage_schedule_work(struct ipipe_work_header *work)
++{
++	struct lostage_schedule_work *w;
++
++	w = container_of(work, typeof(*w), work);
++	schedule_work(w->lostage_work);
++}
++
++/**
++ * Put a work task in Linux non real-time global workqueue from primary mode.
++ *
++ * @param lostage_work
++ */
++void rtdm_schedule_nrt_work(struct work_struct *lostage_work)
++{
++	struct lostage_schedule_work ipipe_work = {
++		.work = {
++			.size = sizeof(ipipe_work),
++			.handler = lostage_schedule_work,
++		},
++		.lostage_work = lostage_work,
++	};
++
++	if (ipipe_root_p)
++		schedule_work(lostage_work);
++	else
++		ipipe_post_work_root(&ipipe_work, work);
++}
++EXPORT_SYMBOL_GPL(rtdm_schedule_nrt_work);
++
++/** @} Non-Real-Time Signalling Services */
++
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_util Utility Services
++ * @{
++ */
++
++struct mmap_tramp_data {
++	struct rtdm_fd *fd;
++	struct file_operations *fops;
++	int (*mmap_handler)(struct rtdm_fd *fd,
++			    struct vm_area_struct *vma);
++};
++
++struct mmap_helper_data {
++	void *src_vaddr;
++	phys_addr_t src_paddr;
++	struct vm_operations_struct *vm_ops;
++	void *vm_private_data;
++	struct mmap_tramp_data tramp_data;
++};
++
++static int mmap_kmem_helper(struct vm_area_struct *vma, void *va)
++{
++	unsigned long addr, len, pfn, to;
++	int ret = 0;
++
++	to = (unsigned long)va;
++	addr = vma->vm_start;
++	len = vma->vm_end - vma->vm_start;
++
++	if (to != PAGE_ALIGN(to) || (len & ~PAGE_MASK) != 0)
++		return -EINVAL;
++
++#ifndef CONFIG_MMU
++	pfn = __pa(to) >> PAGE_SHIFT;
++	ret = remap_pfn_range(vma, addr, pfn, len, PAGE_SHARED);
++#else
++	if (to < VMALLOC_START || to >= VMALLOC_END) {
++		/* logical address. */
++		pfn = __pa(to) >> PAGE_SHIFT;
++		ret = remap_pfn_range(vma, addr, pfn, len, PAGE_SHARED);
++		if (ret)
++			return ret;
++	} else {
++		/* vmalloc memory. */
++		while (len > 0) {
++			struct page *page = vmalloc_to_page((void *)to);
++			if (vm_insert_page(vma, addr, page))
++				return -EAGAIN;
++			addr += PAGE_SIZE;
++			to += PAGE_SIZE;
++			len -= PAGE_SIZE;
++		}
++	}
++
++	if (cobalt_machine.prefault)
++		cobalt_machine.prefault(vma);
++#endif
++
++	return ret;
++}
++
++static int mmap_iomem_helper(struct vm_area_struct *vma, phys_addr_t pa)
++{
++	pgprot_t prot = PAGE_SHARED;
++	unsigned long len;
++
++	len = vma->vm_end - vma->vm_start;
++#ifndef CONFIG_MMU
++	vma->vm_pgoff = pa >> PAGE_SHIFT;
++#endif /* CONFIG_MMU */
++
++#ifdef __HAVE_PHYS_MEM_ACCESS_PROT
++	if (vma->vm_file)
++		prot = phys_mem_access_prot(vma->vm_file, pa >> PAGE_SHIFT,
++					    len, prot);
++#endif
++	vma->vm_page_prot = pgprot_noncached(prot);
++
++	return remap_pfn_range(vma, vma->vm_start, pa >> PAGE_SHIFT,
++			       len, vma->vm_page_prot);
++}
++
++static int mmap_buffer_helper(struct rtdm_fd *fd, struct vm_area_struct *vma)
++{
++	struct mmap_tramp_data *tramp_data = vma->vm_private_data;
++	struct mmap_helper_data *helper_data;
++	int ret;
++
++	helper_data = container_of(tramp_data, struct mmap_helper_data, tramp_data);
++	vma->vm_ops = helper_data->vm_ops;
++	vma->vm_private_data = helper_data->vm_private_data;
++
++	if (helper_data->src_paddr)
++		ret = mmap_iomem_helper(vma, helper_data->src_paddr);
++	else
++		ret = mmap_kmem_helper(vma, helper_data->src_vaddr);
++
++	return ret;
++}
++
++static int mmap_trampoline(struct file *filp, struct vm_area_struct *vma)
++{
++	struct mmap_tramp_data *tramp_data = filp->private_data;
++	int ret;
++
++	vma->vm_private_data = tramp_data;
++
++	ret = tramp_data->mmap_handler(tramp_data->fd, vma);
++	if (ret)
++		return ret;
++
++	return 0;
++}
++
++#ifndef CONFIG_MMU
++
++static unsigned long
++internal_get_unmapped_area(struct file *filp,
++			   unsigned long addr, unsigned long len,
++			   unsigned long pgoff, unsigned long flags)
++{
++	struct mmap_tramp_data *tramp_data = filp->private_data;
++	struct mmap_helper_data *helper_data;
++	unsigned long pa;
++
++	helper_data = container_of(tramp_data, struct mmap_helper_data, tramp_data);
++	pa = helper_data->src_paddr;
++	if (pa)
++		return (unsigned long)__va(pa);
++
++	return (unsigned long)helper_data->src_vaddr;
++}
++
++static int do_rtdm_mmap(struct mmap_tramp_data *tramp_data,
++			size_t len, off_t offset, int prot, int flags,
++			void **pptr)
++{
++	const struct file_operations *old_fops;
++	unsigned long u_addr;
++	struct file *filp;
++
++	filp = filp_open("/dev/mem", O_RDWR, 0);
++	if (IS_ERR(filp))
++		return PTR_ERR(filp);
++
++	old_fops = filp->f_op;
++	filp->f_op = tramp_data->fops;
++	filp->private_data = tramp_data;
++	u_addr = vm_mmap(filp, (unsigned long)*pptr, len, prot, flags, offset);
++	filp_close(filp, current->files);
++	filp->f_op = old_fops;
++
++	if (IS_ERR_VALUE(u_addr))
++		return (int)u_addr;
++
++	*pptr = (void *)u_addr;
++
++	return 0;
++}
++
++#else /* CONFIG_MMU */
++
++static int do_rtdm_mmap(struct mmap_tramp_data *tramp_data,
++			size_t len, off_t offset, int prot, int flags,
++			void **pptr)
++{
++	unsigned long u_addr;
++	struct file *filp;
++
++	filp = anon_inode_getfile("[rtdm]", tramp_data->fops, tramp_data, O_RDWR);
++	if (IS_ERR(filp))
++		return PTR_ERR(filp);
++
++	u_addr = vm_mmap(filp, (unsigned long)*pptr, len, prot, flags, offset);
++	filp_close(filp, current->files);
++
++	if (IS_ERR_VALUE(u_addr))
++		return (int)u_addr;
++
++	*pptr = (void *)u_addr;
++
++	return 0;
++}
++
++#define internal_get_unmapped_area  NULL
++
++#endif /* CONFIG_MMU */
++
++static struct file_operations internal_mmap_fops = {
++	.mmap = mmap_trampoline,
++	.get_unmapped_area = internal_get_unmapped_area
++};
++
++static unsigned long
++driver_get_unmapped_area(struct file *filp,
++			 unsigned long addr, unsigned long len,
++			 unsigned long pgoff, unsigned long flags)
++{
++	struct mmap_tramp_data *tramp_data = filp->private_data;
++	struct rtdm_fd *fd = tramp_data->fd;
++
++	if (fd->ops->get_unmapped_area)
++		return fd->ops->get_unmapped_area(fd, len, pgoff, flags);
++
++#ifdef CONFIG_MMU
++	/* Run default handler. */
++	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
++#else
++	return -ENODEV;
++#endif
++}
++
++static struct file_operations driver_mmap_fops = {
++	.mmap = mmap_trampoline,
++	.get_unmapped_area = driver_get_unmapped_area
++};
++
++int __rtdm_mmap_from_fdop(struct rtdm_fd *fd, size_t len, off_t offset,
++			  int prot, int flags, void **pptr)
++{
++	struct mmap_tramp_data tramp_data = {
++		.fd = fd,
++		.fops = &driver_mmap_fops,
++		.mmap_handler = fd->ops->mmap,
++	};
++
++#ifndef CONFIG_MMU
++	/*
++	 * XXX: A .get_unmapped_area handler must be provided in the
++	 * nommu case. We use this to force the memory management code
++	 * not to share VM regions for distinct areas to map to, as it
++	 * would otherwise do since all requests currently apply to
++	 * the same file (i.e. from /dev/mem, see do_mmap_pgoff() in
++	 * the nommu case).
++	 */
++	if (fd->ops->get_unmapped_area)
++		offset = fd->ops->get_unmapped_area(fd, len, 0, flags);
++#endif
++
++	return do_rtdm_mmap(&tramp_data, len, offset, prot, flags, pptr);
++}
++
++/**
++ * Map a kernel memory range into the address space of the user.
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] src_addr Kernel virtual address to be mapped
++ * @param[in] len Length of the memory range
++ * @param[in] prot Protection flags for the user's memory range, typically
++ * either PROT_READ or PROT_READ|PROT_WRITE
++ * @param[in,out] pptr Address of a pointer containing the desired user
++ * address or NULL on entry and the finally assigned address on return
++ * @param[in] vm_ops vm_operations to be executed on the vm_area of the
++ * user memory range or NULL
++ * @param[in] vm_private_data Private data to be stored in the vm_area,
++ * primarily useful for vm_operation handlers
++ *
++ * @return 0 on success, otherwise (most common values):
++ *
++ * - -EINVAL is returned if an invalid start address, size, or destination
++ * address was passed.
++ *
++ * - -ENOMEM is returned if there is insufficient free memory or the limit of
++ * memory mapping for the user process was reached.
++ *
++ * - -EAGAIN is returned if too much memory has been already locked by the
++ * user process.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @note This service only works on memory regions allocated via kmalloc() or
++ * vmalloc(). To map physical I/O memory to user-space use
++ * rtdm_iomap_to_user() instead.
++ *
++ * @note RTDM supports two models for unmapping the memory area:
++ * - manual unmapping via rtdm_munmap(), which may be issued from a
++ * driver in response to an IOCTL call, or by a call to the regular
++ * munmap() call from the application.
++ * - automatic unmapping, triggered by the termination of the process
++ *   which owns the mapping.
++ * To track the number of references pending on the resource mapped,
++ * the driver can pass the address of a close handler for the vm_area
++ * considered, in the @a vm_ops descriptor. See the relevant Linux
++ * kernel programming documentation (e.g. Linux Device Drivers book)
++ * on virtual memory management for details.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_mmap_to_user(struct rtdm_fd *fd,
++		      void *src_addr, size_t len,
++		      int prot, void **pptr,
++		      struct vm_operations_struct *vm_ops,
++		      void *vm_private_data)
++{
++	struct mmap_helper_data helper_data = {
++		.tramp_data = {
++			.fd = fd,
++			.fops = &internal_mmap_fops,
++			.mmap_handler = mmap_buffer_helper,
++		},
++		.src_vaddr = src_addr,
++		.src_paddr = 0,
++		.vm_ops = vm_ops,
++		.vm_private_data = vm_private_data
++	};
++
++	if (!XENO_ASSERT(COBALT, xnsched_root_p()))
++		return -EPERM;
++
++	return do_rtdm_mmap(&helper_data.tramp_data, len, 0, prot, MAP_SHARED, pptr);
++}
++EXPORT_SYMBOL_GPL(rtdm_mmap_to_user);
++
++/**
++ * Map an I/O memory range into the address space of the user.
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] src_addr physical I/O address to be mapped
++ * @param[in] len Length of the memory range
++ * @param[in] prot Protection flags for the user's memory range, typically
++ * either PROT_READ or PROT_READ|PROT_WRITE
++ * @param[in,out] pptr Address of a pointer containing the desired user
++ * address or NULL on entry and the finally assigned address on return
++ * @param[in] vm_ops vm_operations to be executed on the vm_area of the
++ * user memory range or NULL
++ * @param[in] vm_private_data Private data to be stored in the vm_area,
++ * primarily useful for vm_operation handlers
++ *
++ * @return 0 on success, otherwise (most common values):
++ *
++ * - -EINVAL is returned if an invalid start address, size, or destination
++ * address was passed.
++ *
++ * - -ENOMEM is returned if there is insufficient free memory or the limit of
++ * memory mapping for the user process was reached.
++ *
++ * - -EAGAIN is returned if too much memory has been already locked by the
++ * user process.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @note RTDM supports two models for unmapping the memory area:
++ * - manual unmapping via rtdm_munmap(), which may be issued from a
++ * driver in response to an IOCTL call, or by a call to the regular
++ * munmap() call from the application.
++ * - automatic unmapping, triggered by the termination of the process
++ *   which owns the mapping.
++ * To track the number of references pending on the resource mapped,
++ * the driver can pass the address of a close handler for the vm_area
++ * considered, in the @a vm_ops descriptor. See the relevant Linux
++ * kernel programming documentation (e.g. Linux Device Drivers book)
++ * on virtual memory management for details.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_iomap_to_user(struct rtdm_fd *fd,
++		       phys_addr_t src_addr, size_t len,
++		       int prot, void **pptr,
++		       struct vm_operations_struct *vm_ops,
++		       void *vm_private_data)
++{
++	struct mmap_helper_data helper_data = {
++		.tramp_data = {
++			.fd = fd,
++			.fops = &internal_mmap_fops,
++			.mmap_handler = mmap_buffer_helper,
++		},
++		.src_vaddr = NULL,
++		.src_paddr = src_addr,
++		.vm_ops = vm_ops,
++		.vm_private_data = vm_private_data
++	};
++
++	if (!XENO_ASSERT(COBALT, xnsched_root_p()))
++		return -EPERM;
++
++	return do_rtdm_mmap(&helper_data.tramp_data, len, 0, prot, MAP_SHARED, pptr);
++}
++EXPORT_SYMBOL_GPL(rtdm_iomap_to_user);
++
++/**
++ * Map a kernel logical memory range to a virtual user area.
++ *
++ * This routine is commonly used from a ->mmap() handler of a RTDM
++ * driver, for mapping a virtual memory area with a direct physical
++ * mapping over the user address space referred to by @a vma.
++ *
++ * @param[in] vma The VMA descriptor to receive the mapping.
++ * @param[in] va The kernel logical address to be mapped.
++ *
++ * @return 0 on success, otherwise a negated error code is returned.
++ *
++ * @note This service works on memory regions allocated via
++ * kmalloc(). To map a chunk of virtual space with no direct physical
++ * mapping, or a physical I/O memory to a VMA, call rtdm_mmap_vmem()
++ * or rtdm_mmap_iomem() respectively instead.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_mmap_kmem(struct vm_area_struct *vma, void *va)
++{
++	return mmap_kmem_helper(vma, va);
++}
++EXPORT_SYMBOL_GPL(rtdm_mmap_kmem);
++
++/**
++ * Map a virtual memory range to a virtual user area.
++ *
++ * This routine is commonly used from a ->mmap() handler of a RTDM
++ * driver, for mapping a purely virtual memory area over the user
++ * address space referred to by @a vma.
++ *
++ * @param[in] vma The VMA descriptor to receive the mapping.
++ * @param[in] va The virtual address to be mapped.
++ *
++ * @return 0 on success, otherwise a negated error code is returned.
++ *
++ * @note This service works on memory regions allocated via
++ * vmalloc(). To map a chunk of logical space obtained from kmalloc(),
++ * or a physical I/O memory to a VMA, call rtdm_mmap_kmem() or
++ * rtdm_mmap_iomem() respectively instead.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_mmap_vmem(struct vm_area_struct *vma, void *va)
++{
++	/*
++	 * Our helper handles both of directly mapped to physical and
++	 * purely virtual memory ranges.
++	 */
++	return mmap_kmem_helper(vma, va);
++}
++EXPORT_SYMBOL_GPL(rtdm_mmap_vmem);
++
++/**
++ * Map an I/O memory range to a virtual user area.
++ *
++ * This routine is commonly used from a ->mmap() handler of a RTDM
++ * driver, for mapping an I/O memory area over the user address space
++ * referred to by @a vma.
++ *
++ * @param[in] vma The VMA descriptor to receive the mapping.
++ * @param[in] pa The physical I/O address to be mapped.
++ *
++ * @return 0 on success, otherwise a negated error code is returned.
++ *
++ * @note To map a chunk of logical space obtained from kmalloc(), or a
++ * purely virtual area with no direct physical mapping to a VMA, call
++ * rtdm_mmap_kmem() or rtdm_mmap_vmem() respectively instead.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_mmap_iomem(struct vm_area_struct *vma, phys_addr_t pa)
++{
++	return mmap_iomem_helper(vma, pa);
++}
++EXPORT_SYMBOL_GPL(rtdm_mmap_iomem);
++
++/**
++ * Unmap a user memory range.
++ *
++ * @param[in] ptr User address or the memory range
++ * @param[in] len Length of the memory range
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINVAL is returned if an invalid address or size was passed.
++ *
++ * - -EPERM @e may be returned if an illegal invocation environment is
++ * detected.
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_munmap(void *ptr, size_t len)
++{
++	if (!XENO_ASSERT(COBALT, xnsched_root_p()))
++		return -EPERM;
++
++	return vm_munmap((unsigned long)ptr, len);
++}
++EXPORT_SYMBOL_GPL(rtdm_munmap);
++
++int rtdm_get_iovec(struct rtdm_fd *fd, struct iovec **iovp,
++		   const struct user_msghdr *msg,
++		   struct iovec *iov_fast)
++{
++	size_t len = sizeof(struct iovec) * msg->msg_iovlen;
++	struct iovec *iov = iov_fast;
++
++	/*
++	 * If the I/O vector doesn't fit in the fast memory, allocate
++	 * a chunk from the system heap which is large enough to hold
++	 * it.
++	 */
++	if (msg->msg_iovlen > RTDM_IOV_FASTMAX) {
++		iov = xnmalloc(len);
++		if (iov == NULL)
++			return -ENOMEM;
++	}
++
++	*iovp = iov;
++
++	if (!rtdm_fd_is_user(fd)) {
++		memcpy(iov, msg->msg_iov, len);
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd))
++		return sys32_get_iovec(iov,
++			       (struct compat_iovec __user *)msg->msg_iov,
++			       msg->msg_iovlen);
++#endif
++
++	return rtdm_copy_from_user(fd, iov, msg->msg_iov, len);
++}
++EXPORT_SYMBOL_GPL(rtdm_get_iovec);
++
++int rtdm_put_iovec(struct rtdm_fd *fd, struct iovec *iov,
++		   const struct user_msghdr *msg,
++		   struct iovec *iov_fast)
++{
++	size_t len = sizeof(iov[0]) * msg->msg_iovlen;
++	int ret;
++
++	if (!rtdm_fd_is_user(fd)) {
++		memcpy(msg->msg_iov, iov, len);
++		ret = 0;
++	} else
++#ifdef CONFIG_XENO_ARCH_SYS3264
++		if (rtdm_fd_is_compat(fd))
++			ret = sys32_put_iovec((struct compat_iovec __user *)msg->msg_iov,
++					      iov, msg->msg_iovlen);
++		else
++#endif
++			ret = rtdm_copy_to_user(fd, msg->msg_iov, iov, len);
++
++	if (iov != iov_fast)
++		xnfree(iov);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_put_iovec);
++
++ssize_t rtdm_get_iov_flatlen(struct iovec *iov, int iovlen)
++{
++	ssize_t len;
++	int nvec;
++
++	/* Return the flattened vector length. */
++	for (len = 0, nvec = 0; nvec < iovlen; nvec++) {
++		ssize_t l = iov[nvec].iov_len;
++		if (l < 0 || len + l < len) /* SuS wants this. */
++			return -EINVAL;
++		len += l;
++	}
++
++	return len;
++}
++EXPORT_SYMBOL_GPL(rtdm_get_iov_flatlen);
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++
++/**
++ * Real-time safe rate-limited message printing on kernel console
++ *
++ * @param[in] format Format string (conforming standard @c printf())
++ * @param ... Arguments referred by @a format
++ *
++ * @return On success, this service returns the number of characters printed.
++ * Otherwise, a negative error code is returned.
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_printk_ratelimited(const char *format, ...);
++
++/**
++ * Real-time safe message printing on kernel console
++ *
++ * @param[in] format Format string (conforming standard @c printf())
++ * @param ... Arguments referred by @a format
++ *
++ * @return On success, this service returns the number of characters printed.
++ * Otherwise, a negative error code is returned.
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_printk(const char *format, ...);
++
++/**
++ * Allocate memory block
++ *
++ * @param[in] size Requested size of the memory block
++ *
++ * @return The pointer to the allocated block is returned on success, NULL
++ * otherwise.
++ *
++ * @coretags{unrestricted}
++ */
++void *rtdm_malloc(size_t size);
++
++/**
++ * Release real-time memory block
++ *
++ * @param[in] ptr Pointer to memory block as returned by rtdm_malloc()
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_free(void *ptr);
++
++/**
++ * Check if read access to user-space memory block is safe
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] ptr Address of the user-provided memory block
++ * @param[in] size Size of the memory block
++ *
++ * @return Non-zero is return when it is safe to read from the specified
++ * memory block, 0 otherwise.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_read_user_ok(struct rtdm_fd *fd, const void __user *ptr,
++		      size_t size);
++
++/**
++ * Check if read/write access to user-space memory block is safe
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] ptr Address of the user-provided memory block
++ * @param[in] size Size of the memory block
++ *
++ * @return Non-zero is return when it is safe to read from or write to the
++ * specified memory block, 0 otherwise.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_rw_user_ok(struct rtdm_fd *fd, const void __user *ptr,
++		    size_t size);
++
++/**
++ * Copy user-space memory block to specified buffer
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] dst Destination buffer address
++ * @param[in] src Address of the user-space memory block
++ * @param[in] size Size of the memory block
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EFAULT is returned if an invalid memory area was accessed.
++ *
++ * @note Before invoking this service, verify via rtdm_read_user_ok() that the
++ * provided user-space address can securely be accessed.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_copy_from_user(struct rtdm_fd *fd, void *dst,
++			const void __user *src, size_t size);
++
++/**
++ * Check if read access to user-space memory block and copy it to specified
++ * buffer
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] dst Destination buffer address
++ * @param[in] src Address of the user-space memory block
++ * @param[in] size Size of the memory block
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EFAULT is returned if an invalid memory area was accessed.
++ *
++ * @note This service is a combination of rtdm_read_user_ok and
++ * rtdm_copy_from_user.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_safe_copy_from_user(struct rtdm_fd *fd, void *dst,
++			     const void __user *src, size_t size);
++
++/**
++ * Copy specified buffer to user-space memory block
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] dst Address of the user-space memory block
++ * @param[in] src Source buffer address
++ * @param[in] size Size of the memory block
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EFAULT is returned if an invalid memory area was accessed.
++ *
++ * @note Before invoking this service, verify via rtdm_rw_user_ok() that the
++ * provided user-space address can securely be accessed.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_copy_to_user(struct rtdm_fd *fd, void __user *dst,
++		      const void *src, size_t size);
++
++/**
++ * Check if read/write access to user-space memory block is safe and copy
++ * specified buffer to it
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] dst Address of the user-space memory block
++ * @param[in] src Source buffer address
++ * @param[in] size Size of the memory block
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EFAULT is returned if an invalid memory area was accessed.
++ *
++ * @note This service is a combination of rtdm_rw_user_ok and
++ * rtdm_copy_to_user.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_safe_copy_to_user(struct rtdm_fd *fd, void __user *dst,
++			   const void *src, size_t size);
++
++/**
++ * Copy user-space string to specified buffer
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ * @param[in] dst Destination buffer address
++ * @param[in] src Address of the user-space string
++ * @param[in] count Maximum number of bytes to copy, including the trailing
++ * '0'
++ *
++ * @return Length of the string on success (not including the trailing '0'),
++ * otherwise:
++ *
++ * - -EFAULT is returned if an invalid memory area was accessed.
++ *
++ * @note This services already includes a check of the source address,
++ * calling rtdm_read_user_ok() for @a src explicitly is not required.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_strncpy_from_user(struct rtdm_fd *fd, char *dst,
++			   const char __user *src, size_t count);
++
++/**
++ * Test if running in a real-time task
++ *
++ * @return Non-zero is returned if the caller resides in real-time context, 0
++ * otherwise.
++ *
++ * @coretags{unrestricted}
++ */
++int rtdm_in_rt_context(void);
++
++/**
++ * Test if the caller is capable of running in real-time context
++ *
++ * @param[in] fd RTDM file descriptor as passed to the invoked
++ * device operation handler
++ *
++ * @return Non-zero is returned if the caller is able to execute in real-time
++ * context (independent of its current execution mode), 0 otherwise.
++ *
++ * @note This function can be used by drivers that provide different
++ * implementations for the same service depending on the execution mode of
++ * the caller. If a caller requests such a service in non-real-time context
++ * but is capable of running in real-time as well, it might be appropriate
++ * for the driver to reject the request via -ENOSYS so that RTDM can switch
++ * the caller and restart the request in real-time context.
++ *
++ * @coretags{unrestricted}
++ */
++int rtdm_rt_capable(struct rtdm_fd *fd);
++
++/**
++ * Test if the real-time core is available
++ *
++ * @return True if the real-time is available, false if it is disabled or in
++ * error state.
++ *
++ * @note Drivers should query the core state during initialization if they
++ * perform hardware setup operations or interact with RTDM services such as
++ * locks prior to calling an RTDM service that has a built-in state check of
++ * the real-time core (e.g. rtdm_dev_register() or rtdm_task_init()).
++ *
++ * @coretags{unrestricted}
++ */
++bool rtdm_available(void);
++
++#endif /* DOXYGEN_CPP */
++
++/** @} Utility Services */
+--- linux/kernel/xenomai/rtdm/core.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/core.c	2021-04-07 16:01:26.171635675 +0800
+@@ -0,0 +1,1374 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/workqueue.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/fdtable.h>
++#include <linux/anon_inodes.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/apc.h>
++#include "rtdm/internal.h"
++#define CREATE_TRACE_POINTS
++#include <trace/events/cobalt-rtdm.h>
++#include "posix/process.h"
++
++/**
++ * @ingroup rtdm
++ * @defgroup rtdm_driver_interface Driver programming interface
++ * RTDM driver programming interface
++ * @{
++ */
++
++static void cleanup_instance(struct rtdm_device *dev,
++			     struct rtdm_dev_context *context)
++{
++	if (context)
++		kfree(context);
++
++	__rtdm_put_device(dev);
++}
++
++void __rtdm_dev_close(struct rtdm_fd *fd)
++{
++	struct rtdm_dev_context *context = rtdm_fd_to_context(fd);
++	struct rtdm_device *dev = context->device;
++	struct rtdm_driver *drv = dev->driver;
++
++	if (!fd->stale && drv->ops.close)
++		drv->ops.close(fd);
++
++	cleanup_instance(dev, context);
++}
++
++int __rtdm_anon_getfd(const char *name, int flags)
++{
++	return anon_inode_getfd(name, &rtdm_dumb_fops, NULL, flags);
++}
++
++void __rtdm_anon_putfd(int ufd)
++{
++	__close_fd(current->files, ufd);
++}
++
++static int create_instance(int ufd, struct rtdm_device *dev,
++			   struct rtdm_dev_context **context_ptr)
++{
++	struct rtdm_driver *drv = dev->driver;
++	struct rtdm_dev_context *context;
++
++	/*
++	 * Reset to NULL so that we can always use cleanup_files/instance to
++	 * revert also partially successful allocations.
++	 */
++	*context_ptr = NULL;
++
++	if ((drv->device_flags & RTDM_EXCLUSIVE) != 0 &&
++	    atomic_read(&dev->refcount) > 1)
++		return -EBUSY;
++
++	context = kzalloc(sizeof(struct rtdm_dev_context) +
++			  drv->context_size, GFP_KERNEL);
++	if (unlikely(context == NULL))
++		return -ENOMEM;
++
++	context->device = dev;
++	*context_ptr = context;
++
++	return rtdm_fd_enter(&context->fd, ufd, RTDM_FD_MAGIC, &dev->ops);
++}
++
++#ifdef CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE
++
++static inline struct file *
++open_devnode(struct rtdm_device *dev, const char *path, int oflag)
++{
++	struct file *filp;
++	char *filename;
++
++	if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LEGACY) &&
++	    strncmp(path, "/dev/rtdm/", 10))
++		printk(XENO_WARNING
++		       "%s[%d] opens obsolete device path: %s\n",
++		       current->comm, task_pid_nr(current), path);
++
++	filename = kasprintf(GFP_KERNEL, "/dev/rtdm/%s", dev->name);
++	if (filename == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	filp = filp_open(filename, oflag, 0);
++	kfree(filename);
++
++	return filp;
++}
++
++#else /* !CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE */
++
++static inline struct file *
++open_devnode(struct rtdm_device *dev, const char *path, int oflag)
++{
++	return filp_open(path, oflag, 0);
++}
++
++#endif /* !CONFIG_XENO_OPT_RTDM_COMPAT_DEVNODE */
++
++int __rtdm_dev_open(const char *path, int oflag)
++{
++	struct rtdm_dev_context *context;
++	struct rtdm_device *dev;
++	struct file *filp;
++	int ufd, ret;
++
++	secondary_mode_only();
++
++	/*
++	 * CAUTION: we do want a lookup into the registry to happen
++	 * before any attempt is made to open the devnode, so that we
++	 * don't inadvertently open a regular (i.e. non-RTDM) device.
++	 * Reason is that opening, then closing a device - because we
++	 * don't manage it - may incur side-effects we don't want,
++	 * e.g. opening then closing one end of a pipe would cause the
++	 * other side to read the EOF condition.  This is basically
++	 * why we keep a RTDM registry for named devices, so that we
++	 * can figure out whether an open() request is going to be
++	 * valid, without having to open the devnode yet.
++	 */
++	dev = __rtdm_get_namedev(path);
++	if (dev == NULL)
++		return -EADV;
++
++	ufd = get_unused_fd_flags(oflag);
++	if (ufd < 0) {
++		ret = ufd;
++		goto fail_fd;
++	}
++
++	filp = open_devnode(dev, path, oflag);
++	if (IS_ERR(filp)) {
++		ret = PTR_ERR(filp);
++		goto fail_fopen;
++	}
++
++	ret = create_instance(ufd, dev, &context);
++	if (ret < 0)
++		goto fail_create;
++
++	context->fd.minor = dev->minor;
++	context->fd.oflags = oflag;
++
++	trace_cobalt_fd_open(current, &context->fd, ufd, oflag);
++
++	if (dev->ops.open) {
++		ret = dev->ops.open(&context->fd, oflag);
++		if (!XENO_ASSERT(COBALT, !spltest()))
++			splnone();
++		if (ret < 0)
++			goto fail_open;
++	}
++
++	ret = rtdm_device_new_fd(&context->fd, ufd, context->device);
++	if (ret < 0)
++		goto fail_open;
++
++	fd_install(ufd, filp);
++
++	return ufd;
++
++fail_open:
++	cleanup_instance(dev, context);
++fail_create:
++	filp_close(filp, current->files);
++fail_fopen:
++	put_unused_fd(ufd);
++fail_fd:
++	__rtdm_put_device(dev);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__rtdm_dev_open);
++
++int __rtdm_dev_socket(int protocol_family, int socket_type,
++		      int protocol)
++{
++	struct rtdm_dev_context *context;
++	struct rtdm_device *dev;
++	int ufd, ret;
++
++	secondary_mode_only();
++
++	dev = __rtdm_get_protodev(protocol_family, socket_type);
++	if (dev == NULL)
++		return -EAFNOSUPPORT;
++
++	ufd = __rtdm_anon_getfd("[rtdm-socket]", O_RDWR);
++	if (ufd < 0) {
++		ret = ufd;
++		goto fail_getfd;
++	}
++
++	ret = create_instance(ufd, dev, &context);
++	if (ret < 0)
++		goto fail_create;
++
++	trace_cobalt_fd_socket(current, &context->fd, ufd, protocol_family);
++
++	if (dev->ops.socket) {
++		ret = dev->ops.socket(&context->fd, protocol);
++		if (!XENO_ASSERT(COBALT, !spltest()))
++			splnone();
++		if (ret < 0)
++			goto fail_socket;
++	}
++
++	ret = rtdm_device_new_fd(&context->fd, ufd, context->device);
++	if (ret < 0)
++		goto fail_socket;
++
++	return ufd;
++
++fail_socket:
++	cleanup_instance(dev, context);
++fail_create:
++	__close_fd(current->files, ufd);
++fail_getfd:
++	__rtdm_put_device(dev);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__rtdm_dev_socket);
++
++int __rtdm_dev_ioctl_core(struct rtdm_fd *fd, unsigned int request,
++			  void __user *arg)
++{
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	struct rtdm_driver *drv;
++	struct rtdm_device_info dev_info;
++
++	if (fd->magic != RTDM_FD_MAGIC || request != RTIOC_DEVICE_INFO)
++		return -EADV;
++
++	drv = dev->driver;
++	dev_info.device_flags = drv->device_flags;
++	dev_info.device_class = drv->profile_info.class_id;
++	dev_info.device_sub_class = drv->profile_info.subclass_id;
++	dev_info.profile_version = drv->profile_info.version;
++
++	return rtdm_safe_copy_to_user(fd, arg, &dev_info,  sizeof(dev_info));
++}
++
++#ifdef DOXYGEN_CPP /* Only used for doxygen doc generation */
++
++/**
++ * @addtogroup rtdm_sync
++ *@{
++ */
++
++/**
++ * @fn void rtdm_waitqueue_init(struct rtdm_waitqueue *wq)
++ * @brief  Initialize a RTDM wait queue
++ *
++ * Sets up a wait queue structure for further use.
++ *
++ * @param wq waitqueue to initialize.
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_waitqueue_init(struct rtdm_waitqueue *wq);
++
++/**
++ * @fn void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq)
++ * @brief  Deletes a RTDM wait queue
++ *
++ * Dismantles a wait queue structure, releasing all resources attached
++ * to it.
++ *
++ * @param wq waitqueue to delete.
++ *
++ * @coretags{task-unrestricted}
++ */
++void rtdm_waitqueue_destroy(struct rtdm_waitqueue *wq);
++
++/**
++ * @fn rtdm_timedwait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition, nanosecs_rel_t timeout, rtdm_toseq_t *toseq)
++ * @brief Timed sleep on a locked waitqueue until a condition gets true
++ *
++ * The calling task is put to sleep until @a condition evaluates to
++ * true or a timeout occurs. The condition is checked each time the
++ * waitqueue @a wq is signaled.
++ *
++ * The waitqueue must have been locked by a call to
++ * rtdm_waitqueue_lock() prior to calling this service.
++ *
++ * @param wq locked waitqueue to wait on. The waitqueue lock is
++ * dropped when sleeping, then reacquired before this service returns
++ * to the caller.
++ *
++ * @param condition C expression for the event to wait for.
++ *
++ * @param timeout relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values.
++ * 
++ * @param[in,out] toseq handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has received a Linux signal or
++ * has been forcibly unblocked by a call to rtdm_task_unblock().
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * @note rtdm_waitqueue_signal() has to be called after changing any
++ * variable that could change the result of the wait condition.
++ *
++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for
++ * such service, and might cause unexpected behavior.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++rtdm_timedwait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition,
++				nanosecs_rel_t timeout, rtdm_toseq_t *toseq);
++
++/**
++ * @fn rtdm_wait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition)
++ * @brief Sleep on a locked waitqueue until a condition gets true
++ *
++ * The calling task is put to sleep until @a condition evaluates to
++ * true. The condition is checked each time the waitqueue @a wq is
++ * signaled.
++ *
++ * The waitqueue must have been locked by a call to
++ * rtdm_waitqueue_lock() prior to calling this service.
++ *
++ * @param wq locked waitqueue to wait on. The waitqueue lock is
++ * dropped when sleeping, then reacquired before this service returns
++ * to the caller.
++ *
++ * @param condition C expression for the event to wait for.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has received a Linux signal or
++ * has been forcibly unblocked by a call to rtdm_task_unblock().
++ *
++ * @note rtdm_waitqueue_signal() has to be called after changing any
++ * variable that could change the result of the wait condition.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++rtdm_wait_condition_locked(struct rtdm_wait_queue *wq, C_expr condition);
++
++/**
++ * @fn rtdm_timedwait_condition(struct rtdm_wait_queue *wq, C_expr condition, nanosecs_rel_t timeout, rtdm_toseq_t *toseq)
++ * @brief Timed sleep on a waitqueue until a condition gets true
++ *
++ * The calling task is put to sleep until @a condition evaluates to
++ * true or a timeout occurs. The condition is checked each time the
++ * waitqueue @a wq is signaled.
++ *
++ * @param wq waitqueue to wait on.
++ *
++ * @param condition C expression for the event to wait for.
++ *
++ * @param timeout relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values.
++ * 
++ * @param[in,out] toseq handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has received a Linux signal or
++ * has been forcibly unblocked by a call to rtdm_task_unblock().
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * @note rtdm_waitqueue_signal() has to be called after changing any
++ * variable that could change the result of the wait condition.
++ *
++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for
++ * such service, and might cause unexpected behavior.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++rtdm_timedwait_condition(struct rtdm_wait_queue *wq, C_expr condition,
++			 nanosecs_rel_t timeout, rtdm_toseq_t *toseq);
++
++/**
++ * @fn void rtdm_timedwait(struct rtdm_wait_queue *wq, nanosecs_rel_t timeout, rtdm_toseq_t *toseq)
++ * @brief Timed sleep on a waitqueue unconditionally
++ *
++ * The calling task is put to sleep until the waitqueue is signaled by
++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or
++ * flushed by a call to rtdm_waitqueue_flush(), or a timeout occurs.
++ *
++ * @param wq waitqueue to wait on.
++ *
++ * @param timeout relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values.
++ * 
++ * @param[in,out] toseq handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if the waitqueue has been flushed, or the
++ * calling task has received a Linux signal or has been forcibly
++ * unblocked by a call to rtdm_task_unblock().
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for
++ * such service, and might cause unexpected behavior.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++void rtdm_timedwait(struct rtdm_wait_queue *wq,
++		    nanosecs_rel_t timeout, rtdm_toseq_t *toseq);
++
++/**
++ * @fn void rtdm_timedwait_locked(struct rtdm_wait_queue *wq, nanosecs_rel_t timeout, rtdm_toseq_t *toseq)
++ * @brief Timed sleep on a locked waitqueue unconditionally
++ *
++ * The calling task is put to sleep until the waitqueue is signaled by
++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or
++ * flushed by a call to rtdm_waitqueue_flush(), or a timeout occurs.
++ *
++ * The waitqueue must have been locked by a call to
++ * rtdm_waitqueue_lock() prior to calling this service.
++ *
++ * @param wq locked waitqueue to wait on. The waitqueue lock is
++ * dropped when sleeping, then reacquired before this service returns
++ * to the caller.
++ *
++ * @param timeout relative timeout in nanoseconds, see
++ * @ref RTDM_TIMEOUT_xxx for special values.
++ * 
++ * @param[in,out] toseq handle of a timeout sequence as returned by
++ * rtdm_toseq_init() or NULL.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if the waitqueue has been flushed, or the
++ * calling task has received a Linux signal or has been forcibly
++ * unblocked by a call to rtdm_task_unblock().
++ *
++ * - -ETIMEDOUT is returned if the if the request has not been satisfied
++ * within the specified amount of time.
++ *
++ * @note Passing RTDM_TIMEOUT_NONE to @a timeout makes no sense for
++ * such service, and might cause unexpected behavior.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++void rtdm_timedwait_locked(struct rtdm_wait_queue *wq,
++			   nanosecs_rel_t timeout, rtdm_toseq_t *toseq);
++
++/**
++ * @fn rtdm_wait_condition(struct rtdm_wait_queue *wq, C_expr condition)
++ * @brief Sleep on a waitqueue until a condition gets true
++ *
++ * The calling task is put to sleep until @a condition evaluates to
++ * true. The condition is checked each time the waitqueue @a wq is
++ * signaled.
++ *
++ * @param wq waitqueue to wait on
++ *
++ * @param condition C expression for the event to wait for.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if calling task has received a Linux signal or
++ * has been forcibly unblocked by a call to rtdm_task_unblock().
++ *
++ * @note rtdm_waitqueue_signal() has to be called after changing any
++ * variable that could change the result of the wait condition.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++rtdm_wait_condition(struct rtdm_wait_queue *wq, C_expr condition);
++
++/**
++ * @fn void rtdm_wait(struct rtdm_wait_queue *wq)
++ * @brief Sleep on a waitqueue unconditionally
++ *
++ * The calling task is put to sleep until the waitqueue is signaled by
++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or
++ * flushed by a call to rtdm_waitqueue_flush().
++ *
++ * @param wq waitqueue to wait on.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if the waitqueue has been flushed, or the
++ * calling task has received a Linux signal or has been forcibly
++ * unblocked by a call to rtdm_task_unblock().
++ *
++ * @coretags{primary-only, might-switch}
++ */
++void rtdm_wait(struct rtdm_wait_queue *wq);
++
++/**
++ * @fn void rtdm_wait_locked(struct rtdm_wait_queue *wq)
++ * @brief Sleep on a locked waitqueue unconditionally
++ *
++ * The calling task is put to sleep until the waitqueue is signaled by
++ * either rtdm_waitqueue_signal() or rtdm_waitqueue_broadcast(), or
++ * flushed by a call to rtdm_waitqueue_flush().
++ *
++ * The waitqueue must have been locked by a call to
++ * rtdm_waitqueue_lock() prior to calling this service.
++ *
++ * @param wq locked waitqueue to wait on. The waitqueue lock is
++ * dropped when sleeping, then reacquired before this service returns
++ * to the caller.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EINTR is returned if the waitqueue has been flushed, or the
++ * calling task has received a Linux signal or has been forcibly
++ * unblocked by a call to rtdm_task_unblock().
++ *
++ * @coretags{primary-only, might-switch}
++ */
++void rtdm_wait_locked(struct rtdm_wait_queue *wq);
++
++/**
++ * @fn void rtdm_waitqueue_lock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context)
++ * @brief Lock a waitqueue
++ *
++ * Acquires the lock on the waitqueue @a wq.
++ *
++ * @param wq waitqueue to lock.
++ *
++ * @param context name of local variable to store the context in.
++ *
++ * @note Recursive locking might lead to unexpected behavior,
++ * including lock up.
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_waitqueue_lock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context);
++
++/**
++ * @fn void rtdm_waitqueue_unlock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context)
++ * @brief Unlock a waitqueue
++ *
++ * Releases the lock on the waitqueue @a wq.
++ *
++ * @param wq waitqueue to unlock.
++ *
++ * @param context name of local variable to retrieve the context from.
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_waitqueue_unlock(struct rtdm_wait_queue *wq, rtdm_lockctx_t context);
++
++/**
++ * @fn void rtdm_waitqueue_signal(struct rtdm_wait_queue *wq)
++ * @brief Signal a waitqueue
++ *
++ * Signals the waitqueue @a wq, waking up a single waiter (if
++ * any).
++ *
++ * @param wq waitqueue to signal.
++ *
++ * @return non-zero if a task has been readied as a result of this
++ * call, zero otherwise.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_waitqueue_signal(struct rtdm_wait_queue *wq);
++
++/**
++ * @fn void rtdm_waitqueue_broadcast(struct rtdm_wait_queue *wq)
++ * @brief Broadcast a waitqueue
++ *
++ * Broadcast the waitqueue @a wq, waking up all waiters. Each
++ * readied task may assume to have received the wake up event.
++ *
++ * @param wq waitqueue to broadcast.
++ *
++ * @return non-zero if at least one task has been readied as a result
++ * of this call, zero otherwise.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_waitqueue_broadcast(struct rtdm_wait_queue *wq);
++
++/**
++ * @fn void rtdm_waitqueue_flush(struct rtdm_wait_queue *wq)
++ * @brief Flush a waitqueue
++ *
++ * Flushes the waitqueue @a wq, unblocking all waiters with an error
++ * status (-EINTR).
++ *
++ * @param wq waitqueue to flush.
++ *
++ * @return non-zero if at least one task has been readied as a result
++ * of this call, zero otherwise.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_waitqueue_flush(struct rtdm_wait_queue *wq);
++
++/**
++ * @fn void rtdm_waitqueue_wakeup(struct rtdm_wait_queue *wq, rtdm_task_t waiter)
++ * @brief Signal a particular waiter on a waitqueue
++ *
++ * Signals the waitqueue @a wq, waking up waiter @a waiter only,
++ * which must be currently sleeping on the waitqueue.
++ *
++ * @param wq waitqueue to signal.
++ *
++ * @param waiter RTDM task to wake up.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void rtdm_waitqueue_wakeup(struct rtdm_wait_queue *wq, rtdm_task_t waiter);
++
++/**
++ * @fn rtdm_for_each_waiter(rtdm_task_t pos, struct rtdm_wait_queue *wq)
++ * @brief Simple iterator for waitqueues
++ *
++ * This construct traverses the wait list of a given waitqueue
++ * @a wq, assigning each RTDM task pointer to the cursor variable
++ * @a pos, which must be of type rtdm_task_t.
++ *
++ * @a wq must have been locked by a call to rtdm_waitqueue_lock()
++ * prior to traversing its wait list.
++ *
++ * @param pos cursor variable holding a pointer to the RTDM task
++ * being fetched.
++ *
++ * @param wq waitqueue to scan.
++ *
++ * @note The waitqueue should not be signaled, broadcast or flushed
++ * during the traversal, unless the loop is aborted immediately
++ * after. Should multiple waiters be readied while iterating, the safe
++ * form rtdm_for_each_waiter_safe() must be used for traversal
++ * instead.
++ *
++ * @coretags{unrestricted}
++ */
++rtdm_for_each_waiter(rtdm_task_t pos, struct rtdm_wait_queue *wq);
++
++/**
++ * @fn rtdm_for_each_waiter_safe(rtdm_task_t pos, rtdm_task_t tmp, struct rtdm_wait_queue *wq)
++ * @brief Safe iterator for waitqueues
++ *
++ * This construct traverses the wait list of a given waitqueue
++ * @a wq, assigning each RTDM task pointer to the cursor variable
++ * @a pos, which must be of type rtdm_task_t.
++ *
++ * Unlike with rtdm_for_each_waiter(), the waitqueue may be signaled,
++ * broadcast or flushed during the traversal.
++ *
++ * @a wq must have been locked by a call to rtdm_waitqueue_lock()
++ * prior to traversing its wait list.
++ *
++ * @param pos cursor variable holding a pointer to the RTDM task
++ * being fetched.
++ *
++ * @param tmp temporary cursor variable.
++ *
++ * @param wq waitqueue to scan.
++ *
++ * @coretags{unrestricted}
++ */
++rtdm_for_each_waiter_safe(rtdm_task_t pos, rtdm_task_t tmp, struct rtdm_wait_queue *wq);
++
++/** @} rtdm_sync */
++
++/**
++ * @defgroup rtdm_interdriver_api Driver to driver services
++ * Inter-driver interface
++ *@{
++ */
++
++/**
++ * @brief Open a device
++ *
++ * Refer to rtdm_open() for parameters and return values
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_open(const char *path, int oflag, ...);
++
++/**
++ * @brief Create a socket
++ *
++ * Refer to rtdm_socket() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_socket(int protocol_family, int socket_type, int protocol);
++
++/**
++ * @brief Close a device or socket
++ *
++ * Refer to rtdm_close() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_close(int fd);
++
++/**
++ * @brief Issue an IOCTL
++ *
++ * Refer to rtdm_ioctl() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_ioctl(int fd, int request, ...);
++
++/**
++ * @brief Read from device
++ *
++ * Refer to rtdm_read() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_read(int fd, void *buf, size_t nbyte);
++
++/**
++ * @brief Write to device
++ *
++ * Refer to rtdm_write() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_write(int fd, const void *buf, size_t nbyte);
++
++/**
++ * @brief Receive message from socket
++ *
++ * Refer to rtdm_recvmsg() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recvmsg(int fd, struct user_msghdr *msg, int flags);
++
++/**
++ * @brief Receive message from socket
++ *
++ * Refer to rtdm_recvfrom() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recvfrom(int fd, void *buf, size_t len, int flags,
++		      struct sockaddr *from, socklen_t *fromlen);
++
++/**
++ * @brief Receive message from socket
++ *
++ * Refer to rtdm_recv() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recv(int fd, void *buf, size_t len, int flags);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * Refer to rtdm_sendmsg() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_sendmsg(int fd, const struct user_msghdr *msg, int flags);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * Refer to rtdm_sendto() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_sendto(int fd, const void *buf, size_t len, int flags,
++		    const struct sockaddr *to, socklen_t tolen);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * Refer to rtdm_send() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_send(int fd, const void *buf, size_t len, int flags);
++
++/**
++ * @brief Bind to local address
++ *
++ * Refer to rtdm_bind() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_bind(int fd, const struct sockaddr *my_addr, socklen_t addrlen);
++
++/**
++ * @brief Connect to remote address
++ *
++ * Refer to rtdm_connect() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++int rtdm_connect(int fd, const struct sockaddr *serv_addr, socklen_t addrlen);
++
++/**
++ * @brief Listen to incoming connection requests
++ *
++ * Refer to rtdm_listen() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_listen(int fd, int backlog);
++
++/**
++ * @brief Accept a connection request
++ *
++ * Refer to rtdm_accept() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++int rtdm_accept(int fd, struct sockaddr *addr, socklen_t *addrlen);
++
++/**
++ * @brief Shut down parts of a connection
++ *
++ * Refer to rtdm_shutdown() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_shutdown(int fd, int how);
++
++/**
++ * @brief Get socket option
++ *
++ * Refer to rtdm_getsockopt() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getsockopt(int fd, int level, int optname, void *optval,
++		    socklen_t *optlen);
++
++/**
++ * @brief Set socket option
++ *
++ * Refer to rtdm_setsockopt() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_setsockopt(int fd, int level, int optname, const void *optval,
++		    socklen_t optlen);
++
++/**
++ * @brief Get local socket address
++ *
++ * Refer to rtdm_getsockname() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getsockname(int fd, struct sockaddr *name, socklen_t *namelen);
++
++/**
++ * @brief Get socket destination address
++ *
++ * Refer to rtdm_getpeername() for parameters and return values. Action
++ * depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getpeername(int fd, struct sockaddr *name, socklen_t *namelen);
++
++/** @} Inter-driver calls */
++
++/** @} */
++
++/*!
++ * @addtogroup rtdm_user_api
++ * @{
++ */
++
++/**
++ * @brief Open a device
++ *
++ * @param[in] path Device name
++ * @param[in] oflag Open flags
++ * @param ... Further parameters will be ignored.
++ *
++ * @return Positive file descriptor value on success, otherwise a negative
++ * error code.
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c open() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_open(const char *path, int oflag, ...);
++
++/**
++ * @brief Create a socket
++ *
++ * @param[in] protocol_family Protocol family (@c PF_xxx)
++ * @param[in] socket_type Socket type (@c SOCK_xxx)
++ * @param[in] protocol Protocol ID, 0 for default
++ *
++ * @return Positive file descriptor value on success, otherwise a negative
++ * error code.
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c socket() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_socket(int protocol_family, int socket_type, int protocol);
++
++/**
++ * @brief Close a device or socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_open() or rtdm_socket()
++ *
++ * @return 0 on success, otherwise a negative error code.
++ *
++ * @note If the matching rtdm_open() or rtdm_socket() call took place in
++ * non-real-time context, rtdm_close() must be issued within non-real-time
++ * as well. Otherwise, the call will fail.
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c close() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_close(int fd);
++
++/**
++ * @brief Issue an IOCTL
++ *
++ * @param[in] fd File descriptor as returned by rtdm_open() or rtdm_socket()
++ * @param[in] request IOCTL code
++ * @param ... Optional third argument, depending on IOCTL function
++ * (@c void @c * or @c unsigned @c long)
++ *
++ * @return Positiv value on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c ioctl() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_ioctl(int fd, int request, ...);
++
++/**
++ * @brief Read from device
++ *
++ * @param[in] fd File descriptor as returned by rtdm_open()
++ * @param[out] buf Input buffer
++ * @param[in] nbyte Number of bytes to read
++ *
++ * @return Number of bytes read, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c read() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_read(int fd, void *buf, size_t nbyte);
++
++/**
++ * @brief Write to device
++ *
++ * @param[in] fd File descriptor as returned by rtdm_open()
++ * @param[in] buf Output buffer
++ * @param[in] nbyte Number of bytes to write
++ *
++ * @return Number of bytes written, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c write() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_write(int fd, const void *buf, size_t nbyte);
++
++/**
++ * @brief Receive message from socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in,out] msg Message descriptor
++ * @param[in] flags Message flags
++ *
++ * @return Number of bytes received, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c recvmsg() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recvmsg(int fd, struct user_msghdr *msg, int flags);
++
++/**
++ * @brief Receive message from socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[out] buf Message buffer
++ * @param[in] len Message buffer size
++ * @param[in] flags Message flags
++ * @param[out] from Buffer for message sender address
++ * @param[in,out] fromlen Address buffer size
++ *
++ * @return Number of bytes received, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c recvfrom() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recvfrom(int fd, void *buf, size_t len, int flags,
++		      struct sockaddr *from, socklen_t *fromlen);
++
++/**
++ * @brief Receive message from socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[out] buf Message buffer
++ * @param[in] len Message buffer size
++ * @param[in] flags Message flags
++ *
++ * @return Number of bytes received, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c recv() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_recv(int fd, void *buf, size_t len, int flags);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] msg Message descriptor
++ * @param[in] flags Message flags
++ *
++ * @return Number of bytes sent, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c sendmsg() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_sendmsg(int fd, const struct user_msghdr *msg, int flags);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] buf Message buffer
++ * @param[in] len Message buffer size
++ * @param[in] flags Message flags
++ * @param[in] to Buffer for message destination address
++ * @param[in] tolen Address buffer size
++ *
++ * @return Number of bytes sent, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c sendto() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_sendto(int fd, const void *buf, size_t len, int flags,
++		    const struct sockaddr *to, socklen_t tolen);
++
++/**
++ * @brief Transmit message to socket
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] buf Message buffer
++ * @param[in] len Message buffer size
++ * @param[in] flags Message flags
++ *
++ * @return Number of bytes sent, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c send() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++ssize_t rtdm_send(int fd, const void *buf, size_t len, int flags);
++
++/**
++ * @brief Bind to local address
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] my_addr Address buffer
++ * @param[in] addrlen Address buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c bind() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++int rtdm_bind(int fd, const struct sockaddr *my_addr, socklen_t addrlen);
++
++/**
++ * @brief Connect to remote address
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] serv_addr Address buffer
++ * @param[in] addrlen Address buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c connect() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++int rtdm_connect(int fd, const struct sockaddr *serv_addr,
++		 socklen_t addrlen);
++
++/**
++ * @brief Listen for incomming connection requests
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] backlog Maximum queue length
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c listen() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_listen(int fd, int backlog);
++
++/**
++ * @brief Accept connection requests
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[out] addr Buffer for remote address
++ * @param[in,out] addrlen Address buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c accept() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{mode-unrestricted, might-switch}
++ */
++int rtdm_accept(int fd, struct sockaddr *addr, socklen_t *addrlen);
++
++/**
++ * @brief Shut down parts of a connection
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] how Specifies the part to be shut down (@c SHUT_xxx)
++*
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c shutdown() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int rtdm_shutdown(int fd, int how);
++
++/**
++ * @brief Get socket option
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] level Addressed stack level
++ * @param[in] optname Option name ID
++ * @param[out] optval Value buffer
++ * @param[in,out] optlen Value buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c getsockopt() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getsockopt(int fd, int level, int optname, void *optval,
++		      socklen_t *optlen);
++
++/**
++ * @brief Set socket option
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[in] level Addressed stack level
++ * @param[in] optname Option name ID
++ * @param[in] optval Value buffer
++ * @param[in] optlen Value buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c setsockopt() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_setsockopt(int fd, int level, int optname, const void *optval,
++		    socklen_t optlen);
++
++/**
++ * @brief Get local socket address
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[out] name Address buffer
++ * @param[in,out] namelen Address buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c getsockname() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getsockname(int fd, struct sockaddr *name, socklen_t *namelen);
++
++/**
++ * @brief Get socket destination address
++ *
++ * @param[in] fd File descriptor as returned by rtdm_socket()
++ * @param[out] name Address buffer
++ * @param[in,out] namelen Address buffer size
++ *
++ * @return 0 on success, otherwise negative error code
++ *
++ * Action depends on driver implementation, see @ref rtdm_profiles
++ * "Device Profiles".
++ *
++ * @see @c getpeername() in IEEE Std 1003.1,
++ * http://www.opengroup.org/onlinepubs/009695399
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int rtdm_getpeername(int fd, struct sockaddr *name, socklen_t *namelen);
++
++#endif /* DOXYGEN_CPP */
++
++/** @} */
+--- linux/kernel/xenomai/rtdm/device.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/device.c	2021-04-07 16:01:26.166635682 +0800
+@@ -0,0 +1,649 @@
++/*
++ * Real-Time Driver Model for Xenomai, device management
++ *
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/mutex.h>
++#include <linux/slab.h>
++#include <linux/device.h>
++#include <linux/notifier.h>
++#include "rtdm/internal.h"
++#include <cobalt/kernel/init.h>
++#include <trace/events/cobalt-rtdm.h>
++
++/**
++ * @ingroup rtdm
++ * @defgroup rtdm_profiles Device Profiles
++ *
++ * Pre-defined classes of real-time devices
++ *
++ * Device profiles define which operation handlers a driver of a
++ * certain class of devices has to implement, which name or protocol
++ * it has to register, which IOCTLs it has to provide, and further
++ * details. Sub-classes can be defined in order to extend a device
++ * profile with more hardware-specific functions.
++ */
++
++/**
++ * @addtogroup rtdm_driver_interface
++ * @{
++ */
++
++#define RTDM_DEVICE_MAGIC	0x82846877
++
++static struct rb_root protocol_devices;
++
++static DEFINE_MUTEX(register_lock);
++static DECLARE_BITMAP(protocol_devices_minor_map, RTDM_MAX_MINOR);
++
++static struct class *rtdm_class;
++
++static int enosys(void)
++{
++	return -ENOSYS;
++}
++
++void __rtdm_put_device(struct rtdm_device *dev)
++{
++	secondary_mode_only();
++
++	if (atomic_dec_and_test(&dev->refcount))
++		wake_up(&dev->putwq);
++}
++
++static inline xnkey_t get_proto_id(int pf, int type)
++{
++	xnkey_t llpf = (unsigned int)pf;
++	return (llpf << 32) | (unsigned int)type;
++}
++
++struct rtdm_device *__rtdm_get_namedev(const char *path)
++{
++	struct rtdm_device *dev;
++	xnhandle_t handle;
++	int ret;
++
++	secondary_mode_only();
++
++	/* skip common /dev prefix */
++	if (strncmp(path, "/dev/", 5) == 0)
++		path += 5;
++
++	/* skip RTDM devnode root */
++	if (strncmp(path, "rtdm/", 5) == 0)
++		path += 5;
++
++	ret = xnregistry_bind(path, XN_NONBLOCK, XN_RELATIVE, &handle);
++	if (ret)
++		return NULL;
++
++	mutex_lock(&register_lock);
++
++	dev = xnregistry_lookup(handle, NULL);
++	if (dev && dev->magic == RTDM_DEVICE_MAGIC)
++		__rtdm_get_device(dev);
++	else
++		dev = NULL;
++
++	mutex_unlock(&register_lock);
++
++	return dev;
++}
++
++struct rtdm_device *__rtdm_get_protodev(int protocol_family, int socket_type)
++{
++	struct rtdm_device *dev = NULL;
++	struct xnid *xnid;
++	xnkey_t id;
++
++	secondary_mode_only();
++
++	id = get_proto_id(protocol_family, socket_type);
++
++	mutex_lock(&register_lock);
++
++	xnid = xnid_fetch(&protocol_devices, id);
++	if (xnid) {
++		dev = container_of(xnid, struct rtdm_device, proto.id);
++		__rtdm_get_device(dev);
++	}
++
++	mutex_unlock(&register_lock);
++
++	return dev;
++}
++
++/**
++ * @ingroup rtdm_driver_interface
++ * @defgroup rtdm_device_register Device Registration Services
++ * @{
++ */
++
++static char *rtdm_devnode(struct device *dev, umode_t *mode)
++{
++	return kasprintf(GFP_KERNEL, "rtdm/%s", dev_name(dev));
++}
++
++static ssize_t profile_show(struct device *kdev,
++			    struct device_attribute *attr, char *buf)
++{
++	struct rtdm_device *dev = dev_get_drvdata(kdev);
++
++	return sprintf(buf, "%d,%d\n",
++		       dev->driver->profile_info.class_id,
++		       dev->driver->profile_info.subclass_id);
++}
++
++static ssize_t refcount_show(struct device *kdev,
++			     struct device_attribute *attr, char *buf)
++{
++	struct rtdm_device *dev = dev_get_drvdata(kdev);
++
++	return sprintf(buf, "%d\n", atomic_read(&dev->refcount));
++}
++
++#define cat_count(__buf, __str)			\
++	({					\
++		int __ret = sizeof(__str) - 1;	\
++		strcat(__buf, __str);		\
++		__ret;				\
++	})
++
++static ssize_t flags_show(struct device *kdev,
++			  struct device_attribute *attr, char *buf)
++{
++	struct rtdm_device *dev = dev_get_drvdata(kdev);
++	struct rtdm_driver *drv = dev->driver;
++
++	return sprintf(buf, "%#x\n", drv->device_flags);
++
++}
++
++static ssize_t type_show(struct device *kdev,
++			 struct device_attribute *attr, char *buf)
++{
++	struct rtdm_device *dev = dev_get_drvdata(kdev);
++	struct rtdm_driver *drv = dev->driver;
++	int ret;
++
++	if (drv->device_flags & RTDM_NAMED_DEVICE)
++		ret = cat_count(buf, "named\n");
++	else
++		ret = cat_count(buf, "protocol\n");
++
++	return ret;
++
++}
++
++#ifdef ATTRIBUTE_GROUPS
++
++static DEVICE_ATTR_RO(profile);
++static DEVICE_ATTR_RO(refcount);
++static DEVICE_ATTR_RO(flags);
++static DEVICE_ATTR_RO(type);
++
++static struct attribute *rtdm_attrs[] = {
++	&dev_attr_profile.attr,
++	&dev_attr_refcount.attr,
++	&dev_attr_flags.attr,
++	&dev_attr_type.attr,
++	NULL,
++};
++ATTRIBUTE_GROUPS(rtdm);
++
++#else /* !ATTRIBUTE_GROUPS */
++
++/*
++ * Cope with legacy sysfs attributes. Scheduled for removal when 3.10
++ * is at EOL for us.
++ */
++static struct device_attribute rtdm_attrs[] = {
++	DEVICE_ATTR_RO(profile),
++	DEVICE_ATTR_RO(refcount),
++	DEVICE_ATTR_RO(flags),
++	DEVICE_ATTR_RO(type),
++	__ATTR_NULL 
++};
++
++#define dev_groups   dev_attrs
++#define rtdm_groups  rtdm_attrs
++
++#endif /* !ATTRIBUTE_GROUPS */
++
++static int state_change_notifier(struct notifier_block *nb,
++				 unsigned long action, void *data)
++{
++	struct rtdm_driver *drv;
++	int ret;
++
++	drv = container_of(nb, struct rtdm_driver, nb_statechange);
++
++	switch (action) {
++	case COBALT_STATE_WARMUP:
++		if (drv->smops.start == NULL)
++			return NOTIFY_DONE;
++		ret = drv->smops.start(drv);
++		if (ret)
++			printk(XENO_WARNING
++			       "failed starting driver %s (%d)\n",
++			       drv->profile_info.name, ret);
++		break;
++	case COBALT_STATE_TEARDOWN:
++		if (drv->smops.stop == NULL)
++			return NOTIFY_DONE;
++		ret = drv->smops.stop(drv);
++		if (ret)
++			printk(XENO_WARNING
++			       "failed stopping driver %s (%d)\n",
++			       drv->profile_info.name, ret);
++		break;
++	default:
++		return NOTIFY_DONE;
++	}
++
++	return NOTIFY_OK;
++}
++
++static int register_driver(struct rtdm_driver *drv)
++{
++	dev_t rdev;
++	int ret;
++
++	if (drv->profile_info.magic == RTDM_CLASS_MAGIC) {
++		atomic_inc(&drv->refcount);
++		return 0;
++	}
++
++	if (drv->profile_info.magic != ~RTDM_CLASS_MAGIC) {
++		XENO_WARN_ON_ONCE(COBALT, 1);
++		return -EINVAL;
++	}
++
++	switch (drv->device_flags & RTDM_DEVICE_TYPE_MASK) {
++	case RTDM_NAMED_DEVICE:
++	case RTDM_PROTOCOL_DEVICE:
++		break;
++	default:
++		printk(XENO_WARNING "%s has invalid device type (%#x)\n",
++		       drv->profile_info.name,
++		       drv->device_flags & RTDM_DEVICE_TYPE_MASK);
++		return -EINVAL;
++	}
++
++	if (drv->device_count <= 0 ||
++	    drv->device_count > RTDM_MAX_MINOR) {
++		printk(XENO_WARNING "%s has invalid device count (%d)\n",
++		       drv->profile_info.name, drv->device_count);
++		return -EINVAL;
++	}
++
++	if ((drv->device_flags & RTDM_NAMED_DEVICE) == 0)
++		goto done;
++
++	if (drv->base_minor < 0 ||
++	    drv->base_minor >= RTDM_MAX_MINOR) {
++		printk(XENO_WARNING "%s has invalid base minor (%d)\n",
++		       drv->profile_info.name, drv->base_minor);
++		return -EINVAL;
++	}
++
++	ret = alloc_chrdev_region(&rdev, drv->base_minor, drv->device_count,
++				  drv->profile_info.name);
++	if (ret) {
++		printk(XENO_WARNING "cannot allocate chrdev region %s[%d..%d]\n",
++		       drv->profile_info.name, drv->base_minor,
++		       drv->base_minor + drv->device_count - 1);
++		return ret;
++	}
++
++	cdev_init(&drv->named.cdev, &rtdm_dumb_fops);
++	ret = cdev_add(&drv->named.cdev, rdev, drv->device_count);
++	if (ret) {
++		printk(XENO_WARNING "cannot create cdev series for %s\n",
++		       drv->profile_info.name);
++		goto fail_cdev;
++	}
++
++	drv->named.major = MAJOR(rdev);
++	bitmap_zero(drv->minor_map, RTDM_MAX_MINOR);
++
++done:
++	atomic_set(&drv->refcount, 1);
++	drv->nb_statechange.notifier_call = state_change_notifier;
++	drv->nb_statechange.priority = 0;
++	cobalt_add_state_chain(&drv->nb_statechange);
++	drv->profile_info.magic = RTDM_CLASS_MAGIC;
++
++	return 0;
++
++fail_cdev:
++	unregister_chrdev_region(rdev, drv->device_count);
++
++	return ret;
++}
++
++static void unregister_driver(struct rtdm_driver *drv)
++{
++	XENO_BUG_ON(COBALT, drv->profile_info.magic != RTDM_CLASS_MAGIC);
++
++	if (!atomic_dec_and_test(&drv->refcount))
++		return;
++
++	cobalt_remove_state_chain(&drv->nb_statechange);
++
++	drv->profile_info.magic = ~RTDM_CLASS_MAGIC;
++
++	if (drv->device_flags & RTDM_NAMED_DEVICE) {
++		cdev_del(&drv->named.cdev);
++		unregister_chrdev_region(MKDEV(drv->named.major, drv->base_minor),
++					 drv->device_count);
++	}
++}
++
++/**
++ * @brief Register a RTDM device
++ *
++ * Registers a device in the RTDM namespace.
++ *
++ * @param[in] dev Device descriptor.
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EINVAL is returned if the descriptor contains invalid
++ * entries. RTDM_PROFILE_INFO() must appear in the list of
++ * initializers for the driver properties.
++ *
++ * - -EEXIST is returned if the specified device name of protocol ID is
++ * already in use.
++ *
++ * - -ENOMEM is returned if a memory allocation failed in the process
++ * of registering the device.
++ *
++ * - -EAGAIN is returned if no registry slot is available (check/raise
++ * CONFIG_XENO_OPT_REGISTRY_NRSLOTS).
++ *
++ * - -ENOSYS is returned if the real-time core is disabled.
++ *
++ * - -ENXIO is returned if no valid minor could be assigned
++ *
++ * @coretags{secondary-only}
++ */
++int rtdm_dev_register(struct rtdm_device *dev)
++{
++	struct class *kdev_class = rtdm_class;
++	struct device *kdev = NULL;
++	struct rtdm_driver *drv;
++	int ret, major, minor;
++	xnkey_t id;
++	dev_t rdev;
++	const char *dev_name;
++
++	secondary_mode_only();
++
++	if (!realtime_core_enabled())
++		return -ENOSYS;
++
++	mutex_lock(&register_lock);
++
++	dev->name = NULL;
++	drv = dev->driver;
++	ret = register_driver(drv);
++	if (ret) {
++		mutex_unlock(&register_lock);
++		return ret;
++	}
++
++	dev->ops = drv->ops;
++	if (drv->device_flags & RTDM_NAMED_DEVICE)
++		dev->ops.socket = (typeof(dev->ops.socket))enosys;
++	else
++		dev->ops.open = (typeof(dev->ops.open))enosys;
++
++	INIT_LIST_HEAD(&dev->openfd_list);
++	init_waitqueue_head(&dev->putwq);
++	dev->ops.close = __rtdm_dev_close; /* Interpose on driver's handler. */
++	atomic_set(&dev->refcount, 0);
++
++	if (drv->profile_info.kdev_class)
++		kdev_class = drv->profile_info.kdev_class;
++
++	if (drv->device_flags & RTDM_NAMED_DEVICE) {
++		if (drv->device_flags & RTDM_FIXED_MINOR) {
++			minor = dev->minor;
++			if (minor < 0 ||
++			    minor >= drv->base_minor + drv->device_count) {
++				ret = -ENXIO;
++				goto fail;
++			}
++		} else {
++			minor = find_first_zero_bit(drv->minor_map, RTDM_MAX_MINOR);
++			if (minor >= RTDM_MAX_MINOR) {
++				ret = -ENXIO;
++				goto fail;
++			}
++			dev->minor = minor;
++		}
++
++		major = drv->named.major;
++		dev->name = kasformat(dev->label, minor);
++		if (dev->name == NULL) {
++			ret = -ENOMEM;
++			goto fail;
++		}
++		if (dev->name[0] == '/') {
++			dev_name = dev->name+1;
++		} else {
++			dev_name = dev->name;
++		}
++		ret = xnregistry_enter(dev_name, dev,
++				       &dev->named.handle, NULL);
++		if (ret)
++			goto fail;
++
++		rdev = MKDEV(major, minor);
++		kdev = device_create(kdev_class, NULL, rdev,
++				     dev, kbasename(dev->label), minor);
++		if (IS_ERR(kdev)) {
++			xnregistry_remove(dev->named.handle);
++			ret = PTR_ERR(kdev);
++			goto fail2;
++		}
++		__set_bit(minor, drv->minor_map);
++	} else {
++		minor = find_first_zero_bit(protocol_devices_minor_map,
++					RTDM_MAX_MINOR);
++		if (minor >= RTDM_MAX_MINOR) {
++			ret = -ENXIO;
++			goto fail;
++		}
++		dev->minor = minor;
++
++		dev->name = kstrdup(dev->label, GFP_KERNEL);
++		if (dev->name == NULL) {
++			ret = -ENOMEM;
++			goto fail;
++		}
++
++		rdev = MKDEV(0, minor);
++		kdev = device_create(kdev_class, NULL, rdev,
++				     dev, dev->name);
++		if (IS_ERR(kdev)) {
++			ret = PTR_ERR(kdev);
++			goto fail2;
++		}
++
++		id = get_proto_id(drv->protocol_family, drv->socket_type);
++		ret = xnid_enter(&protocol_devices, &dev->proto.id, id);
++		if (ret < 0)
++			goto fail;
++		__set_bit(minor, protocol_devices_minor_map);
++	}
++
++	dev->rdev = rdev;
++	dev->kdev = kdev;
++	dev->magic = RTDM_DEVICE_MAGIC;
++	dev->kdev_class = kdev_class;
++
++	mutex_unlock(&register_lock);
++
++	trace_cobalt_device_register(dev);
++
++	return 0;
++fail:
++	if (kdev)
++		device_destroy(kdev_class, rdev);
++fail2:
++	unregister_driver(drv);
++
++	mutex_unlock(&register_lock);
++
++	if (dev->name)
++		kfree(dev->name);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_dev_register);
++
++/**
++ * @brief Unregister a RTDM device
++ *
++ * Removes the device from the RTDM namespace. This routine first
++ * attempts to teardown all active connections to the @a device prior
++ * to unregistering.
++ *
++ * @param[in] dev Device descriptor.
++ *
++ * @coretags{secondary-only}
++ */
++void rtdm_dev_unregister(struct rtdm_device *dev)
++{
++	struct rtdm_driver *drv = dev->driver;
++
++	secondary_mode_only();
++
++	trace_cobalt_device_unregister(dev);
++
++	/* Lock out any further connection. */
++	dev->magic = ~RTDM_DEVICE_MAGIC;
++
++	/* Flush all fds from this device. */
++	rtdm_device_flush_fds(dev);
++
++	/* Then wait for the ongoing connections to finish. */
++	wait_event(dev->putwq,
++		   atomic_read(&dev->refcount) == 0);
++
++	mutex_lock(&register_lock);
++
++	if (drv->device_flags & RTDM_NAMED_DEVICE) {
++		xnregistry_remove(dev->named.handle);
++		__clear_bit(dev->minor, drv->minor_map);
++	} else {
++		xnid_remove(&protocol_devices, &dev->proto.id);
++		__clear_bit(dev->minor, protocol_devices_minor_map);
++	}
++
++	device_destroy(dev->kdev_class, dev->rdev);
++
++	unregister_driver(drv);
++
++	mutex_unlock(&register_lock);
++
++	kfree(dev->name);
++}
++EXPORT_SYMBOL_GPL(rtdm_dev_unregister);
++
++/**
++ * @brief Set the kernel device class of a RTDM driver.
++ *
++ * Set the kernel device class assigned to the RTDM driver. By
++ * default, RTDM drivers belong to Linux's "rtdm" device class,
++ * creating a device node hierarchy rooted at /dev/rtdm, and sysfs
++ * nodes under /sys/class/rtdm.
++ *
++ * This call assigns a user-defined kernel device class to the RTDM
++ * driver, so that its devices are created into a different system
++ * hierarchy.
++ *
++ * rtdm_drv_set_sysclass() is meaningful only before the first device
++ * which is attached to @a drv is registered by a call to
++ * rtdm_dev_register().
++ *
++ * @param[in] drv Address of the RTDM driver descriptor.
++ *
++ * @param[in] cls Pointer to the kernel device class. NULL is allowed
++ * to clear a previous setting, switching back to the default "rtdm"
++ * device class.
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EBUSY is returned if the kernel device class has already been
++ * set for @a drv, or some device(s) attached to @a drv are currently
++ * registered.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @attention The kernel device class set by this call is not related to
++ * the RTDM class identification as defined by the @ref rtdm_profiles
++ * "RTDM profiles" in any way. This is strictly related to the Linux
++ * kernel device hierarchy.
++ */
++int rtdm_drv_set_sysclass(struct rtdm_driver *drv, struct class *cls)
++{
++	if ((cls && drv->profile_info.kdev_class) ||
++	    atomic_read(&drv->refcount))
++		return -EBUSY;
++
++	drv->profile_info.kdev_class = cls;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rtdm_drv_set_sysclass);
++
++/** @} */
++
++int __init rtdm_init(void)
++{
++	xntree_init(&protocol_devices);
++
++	rtdm_class = class_create(THIS_MODULE, "rtdm");
++	if (IS_ERR(rtdm_class)) {
++		printk(XENO_ERR "cannot create RTDM sysfs class\n");
++		return PTR_ERR(rtdm_class);
++	}
++	rtdm_class->dev_groups = rtdm_groups;
++	rtdm_class->devnode = rtdm_devnode;
++
++	bitmap_zero(protocol_devices_minor_map, RTDM_MAX_MINOR);
++
++	return 0;
++}
++
++void rtdm_cleanup(void)
++{
++	class_destroy(rtdm_class);
++	/*
++	 * NOTE: no need to flush the cleanup_queue as no device is
++	 * allowed to unregister as long as there are references.
++	 */
++}
++
++/** @} */
+--- linux/kernel/xenomai/rtdm/internal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/internal.h	2021-04-07 16:01:26.161635689 +0800
+@@ -0,0 +1,64 @@
++/*
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _RTDM_INTERNAL_H
++#define _RTDM_INTERNAL_H
++
++#include <linux/types.h>
++#include <linux/list.h>
++#include <linux/sem.h>
++#include <linux/file.h>
++#include <linux/atomic.h>
++#include <cobalt/kernel/tree.h>
++#include <cobalt/kernel/lock.h>
++#include <rtdm/driver.h>
++
++static inline void __rtdm_get_device(struct rtdm_device *device)
++{
++	atomic_inc(&device->refcount);
++}
++
++void __rtdm_put_device(struct rtdm_device *device);
++
++struct rtdm_device *__rtdm_get_namedev(const char *path);
++
++struct rtdm_device *__rtdm_get_protodev(int protocol_family,
++					int socket_type);
++
++void __rtdm_dev_close(struct rtdm_fd *fd);
++
++int __rtdm_dev_ioctl_core(struct rtdm_fd *fd,
++			  unsigned int request, void __user *arg);
++
++int __rtdm_mmap_from_fdop(struct rtdm_fd *fd, size_t len, off_t offset,
++			  int prot, int flags, void **pptr);
++
++/* nklock held, irqs off. */
++static inline void rtdm_fd_get_light(struct rtdm_fd *fd)
++{
++	++fd->refs;
++}
++
++int rtdm_init(void);
++
++void rtdm_cleanup(void);
++
++extern const struct file_operations rtdm_dumb_fops;
++
++#endif /* _RTDM_INTERNAL_H */
+--- linux/kernel/xenomai/rtdm/fd.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/rtdm/fd.c	2021-04-07 16:01:26.156635696 +0800
+@@ -0,0 +1,1037 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>
++ * Copyright (C) 2013,2014 Gilles Chanteperdrix <gch@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/list.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/poll.h>
++#include <linux/kthread.h>
++#include <linux/fdtable.h>
++#include <cobalt/kernel/registry.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/ppd.h>
++#include <trace/events/cobalt-rtdm.h>
++#include <rtdm/fd.h>
++#include "internal.h"
++#include "posix/process.h"
++#include "posix/syscall.h"
++#include "posix/clock.h"
++
++#define RTDM_SETFL_MASK (O_NONBLOCK)
++
++DEFINE_PRIVATE_XNLOCK(fdtree_lock);
++static LIST_HEAD(rtdm_fd_cleanup_queue);
++static struct semaphore rtdm_fd_cleanup_sem;
++
++struct rtdm_fd_index {
++	struct xnid id;
++	struct rtdm_fd *fd;
++};
++
++static int enosys(void)
++{
++	return -ENOSYS;
++}
++
++static int eadv(void)
++{
++	return -EADV;
++}
++
++static inline struct rtdm_fd_index *
++fetch_fd_index(struct cobalt_ppd *p, int ufd)
++{
++	struct xnid *id = xnid_fetch(&p->fds, ufd);
++	if (id == NULL)
++		return NULL;
++
++	return container_of(id, struct rtdm_fd_index, id);
++}
++
++static struct rtdm_fd *fetch_fd(struct cobalt_ppd *p, int ufd)
++{
++	struct rtdm_fd_index *idx = fetch_fd_index(p, ufd);
++	if (idx == NULL)
++		return NULL;
++
++	return idx->fd;
++}
++
++#define assign_invalid_handler(__handler)				\
++	do								\
++		(__handler) = (typeof(__handler))eadv;			\
++	while (0)
++
++#define __assign_default_handler(__handler, __placeholder)		\
++	do								\
++		if ((__handler) == NULL)				\
++			(__handler) = (typeof(__handler))__placeholder;	\
++	while (0)
++
++/* Calling this handler should beget EADV if not implemented. */
++#define assign_invalid_default_handler(__handler)			\
++	__assign_default_handler(__handler, eadv)
++
++/* Calling this handler should beget ENOSYS if not implemented. */
++#define assign_default_handler(__handler)				\
++	__assign_default_handler(__handler, enosys)
++
++#define __rt(__handler)		__handler ## _rt
++#define __nrt(__handler)	__handler ## _nrt
++
++/*
++ * Install a placeholder returning EADV if none of the dual handlers
++ * are implemented, ENOSYS otherwise for NULL handlers to trigger the
++ * adaptive switch.
++ */
++#define assign_default_dual_handlers(__handler)				\
++	do								\
++		if (__rt(__handler) || __nrt(__handler)) {		\
++			assign_default_handler(__rt(__handler));	\
++			assign_default_handler(__nrt(__handler));	\
++		} else {						\
++			assign_invalid_handler(__rt(__handler));	\
++			assign_invalid_handler(__nrt(__handler));	\
++		}							\
++	while (0)
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++
++static inline void set_compat_bit(struct rtdm_fd *fd)
++{
++	struct pt_regs *regs;
++
++	if (cobalt_ppd_get(0) == &cobalt_kernel_ppd)
++		fd->compat = 0;
++	else {
++		regs = task_pt_regs(current);
++		XENO_BUG_ON(COBALT, !__xn_syscall_p(regs));
++		fd->compat = __COBALT_CALL_COMPAT(__xn_reg_sys(regs));
++	}
++}
++
++#else	/* !CONFIG_XENO_ARCH_SYS3264 */
++
++static inline void set_compat_bit(struct rtdm_fd *fd)
++{
++}
++
++#endif	/* !CONFIG_XENO_ARCH_SYS3264 */
++
++int rtdm_fd_enter(struct rtdm_fd *fd, int ufd, unsigned int magic,
++		  struct rtdm_fd_ops *ops)
++{
++	struct cobalt_ppd *ppd;
++
++	secondary_mode_only();
++
++	if (magic == 0)
++		return -EINVAL;
++
++	assign_default_dual_handlers(ops->ioctl);
++	assign_default_dual_handlers(ops->read);
++	assign_default_dual_handlers(ops->write);
++	assign_default_dual_handlers(ops->recvmsg);
++	assign_default_dual_handlers(ops->sendmsg);
++	assign_invalid_default_handler(ops->select);
++	assign_invalid_default_handler(ops->mmap);
++
++	ppd = cobalt_ppd_get(0);
++	fd->magic = magic;
++	fd->ops = ops;
++	fd->owner = ppd;
++	fd->ufd = ufd;
++	fd->refs = 1;
++	fd->stale = false;
++	set_compat_bit(fd);
++	INIT_LIST_HEAD(&fd->next);
++
++	return 0;
++}
++
++int rtdm_fd_register(struct rtdm_fd *fd, int ufd)
++{
++	struct rtdm_fd_index *idx;
++	struct cobalt_ppd *ppd;
++	spl_t s;
++	int ret = 0;
++
++	ppd = cobalt_ppd_get(0);
++	idx = kmalloc(sizeof(*idx), GFP_KERNEL);
++	if (idx == NULL)
++		return -ENOMEM;
++
++	idx->fd = fd;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	ret = xnid_enter(&ppd->fds, &idx->id, ufd);
++	xnlock_put_irqrestore(&fdtree_lock, s);
++	if (ret < 0) {
++		kfree(idx);
++		ret = -EBUSY;
++	}
++
++	return ret;
++}
++
++int rtdm_device_new_fd(struct rtdm_fd *fd, int ufd,
++			struct rtdm_device *device)
++{
++	spl_t s;
++	int ret;
++
++	ret = rtdm_fd_register(fd, ufd);
++	if (ret < 0)
++		return ret;
++
++	trace_cobalt_fd_created(fd, ufd);
++	xnlock_get_irqsave(&fdtree_lock, s);
++	list_add(&fd->next, &device->openfd_list);
++	xnlock_put_irqrestore(&fdtree_lock, s);
++
++	return 0;
++}
++
++/**
++ * @brief Retrieve and lock a RTDM file descriptor
++ *
++ * @param[in] ufd User-side file descriptor
++ * @param[in] magic Magic word for lookup validation
++ *
++ * @return Pointer to the RTDM file descriptor matching @a
++ * ufd. Otherwise:
++ *
++ * - ERR_PTR(-EADV) if the use-space handle is either invalid, or not
++ * managed by RTDM.
++ *
++ * - ERR_PTR(-EBADF) if the underlying device is being torned down at
++ * the time of the call.
++ *
++ * @note The file descriptor returned must be later released by a call
++ * to rtdm_fd_put().
++ *
++ * @coretags{unrestricted}
++ */
++struct rtdm_fd *rtdm_fd_get(int ufd, unsigned int magic)
++{
++	struct cobalt_ppd *p = cobalt_ppd_get(0);
++	struct rtdm_fd *fd;
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	fd = fetch_fd(p, ufd);
++	if (fd == NULL || (magic != 0 && fd->magic != magic)) {
++		fd = ERR_PTR(-EADV);
++		goto out;
++	}
++
++	if (fd->stale) {
++		fd = ERR_PTR(-EBADF);
++		goto out;
++	}
++
++	++fd->refs;
++out:
++	xnlock_put_irqrestore(&fdtree_lock, s);
++
++	return fd;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_get);
++
++struct lostage_trigger_close {
++	struct ipipe_work_header work; /* Must be first */
++};
++
++static int fd_cleanup_thread(void *data)
++{
++	struct rtdm_fd *fd;
++	int err;
++	spl_t s;
++
++	for (;;) {
++		set_cpus_allowed_ptr(current, cpu_online_mask);
++
++		do {
++			err = down_interruptible(&rtdm_fd_cleanup_sem);
++			if (kthread_should_stop())
++				return 0;
++		} while (err);
++
++		xnlock_get_irqsave(&fdtree_lock, s);
++		fd = list_first_entry(&rtdm_fd_cleanup_queue,
++				struct rtdm_fd, cleanup);
++		list_del(&fd->cleanup);
++		xnlock_put_irqrestore(&fdtree_lock, s);
++
++		fd->ops->close(fd);
++	}
++
++	return 0;
++}
++
++static void lostage_trigger_close(struct ipipe_work_header *work)
++{
++	up(&rtdm_fd_cleanup_sem);
++}
++
++static void __put_fd(struct rtdm_fd *fd, spl_t s)
++{
++	bool destroy;
++
++	XENO_WARN_ON(COBALT, fd->refs <= 0);
++	destroy = --fd->refs == 0;
++	if (destroy && !list_empty(&fd->next))
++		list_del_init(&fd->next);
++
++	xnlock_put_irqrestore(&fdtree_lock, s);
++
++	if (!destroy)
++		return;
++
++	if (ipipe_root_p)
++		fd->ops->close(fd);
++	else {
++		struct lostage_trigger_close closework = {
++			.work = {
++				.size = sizeof(closework),
++				.handler = lostage_trigger_close,
++			},
++		};
++
++		xnlock_get_irqsave(&fdtree_lock, s);
++		list_add_tail(&fd->cleanup, &rtdm_fd_cleanup_queue);
++		xnlock_put_irqrestore(&fdtree_lock, s);
++
++		ipipe_post_work_root(&closework, work);
++	}
++}
++
++void rtdm_device_flush_fds(struct rtdm_device *dev)
++{
++	struct rtdm_driver *drv = dev->driver;
++	struct rtdm_fd *fd;
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++
++	while (!list_empty(&dev->openfd_list)) {
++		fd = list_get_entry_init(&dev->openfd_list, struct rtdm_fd, next);
++		fd->stale = true;
++		if (drv->ops.close) {
++			rtdm_fd_get_light(fd);
++			xnlock_put_irqrestore(&fdtree_lock, s);
++			drv->ops.close(fd);
++			rtdm_fd_put(fd);
++			xnlock_get_irqsave(&fdtree_lock, s);
++		}
++	}
++
++	xnlock_put_irqrestore(&fdtree_lock, s);
++}
++
++/**
++ * @brief Release a RTDM file descriptor obtained via rtdm_fd_get()
++ *
++ * @param[in] fd RTDM file descriptor to release
++ *
++ * @note Every call to rtdm_fd_get() must be matched by a call to
++ * rtdm_fd_put().
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_fd_put(struct rtdm_fd *fd)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	__put_fd(fd, s);
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_put);
++
++/**
++ * @brief Hold a reference on a RTDM file descriptor
++ *
++ * @param[in] fd Target file descriptor
++ *
++ * @note rtdm_fd_lock() increments the reference counter of @a fd. You
++ * only need to call this function in special scenarios, e.g. when
++ * keeping additional references to the file descriptor that have
++ * different lifetimes. Only use rtdm_fd_lock() on descriptors that
++ * are currently locked via an earlier rtdm_fd_get()/rtdm_fd_lock() or
++ * while running a device operation handler.
++ *
++ * @coretags{unrestricted}
++ */
++int rtdm_fd_lock(struct rtdm_fd *fd)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	if (fd->refs == 0) {
++		xnlock_put_irqrestore(&fdtree_lock, s);
++		return -EIDRM;
++	}
++	++fd->refs;
++	xnlock_put_irqrestore(&fdtree_lock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_lock);
++
++/**
++ * @brief Drop a reference on a RTDM file descriptor
++ *
++ * @param[in] fd Target file descriptor
++ *
++ * @note Every call to rtdm_fd_lock() must be matched by a call to
++ * rtdm_fd_unlock().
++ *
++ * @coretags{unrestricted}
++ */
++void rtdm_fd_unlock(struct rtdm_fd *fd)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	__put_fd(fd, s);
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_unlock);
++
++int rtdm_fd_fcntl(int ufd, int cmd, ...)
++{
++	struct rtdm_fd *fd;
++	va_list ap;
++	long arg;
++	int ret;
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd))
++		return PTR_ERR(fd);
++
++	va_start(ap, cmd);
++	arg = va_arg(ap, long);
++	va_end(ap);
++
++	switch (cmd) {
++	case F_GETFL:
++		ret = fd->oflags;
++		break;
++	case F_SETFL:
++		fd->oflags = (fd->oflags & ~RTDM_SETFL_MASK) |
++			(arg & RTDM_SETFL_MASK);
++		ret = 0;
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	rtdm_fd_put(fd);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_fcntl);
++
++static struct rtdm_fd *get_fd_fixup_mode(int ufd)
++{
++	struct xnthread *thread;
++	struct rtdm_fd *fd;
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd))
++		return fd;
++
++	/*
++	 * Mode is selected according to the following convention:
++	 *
++	 * - Cobalt threads must try running the syscall from primary
++	 * mode as a first attempt, regardless of their scheduling
++	 * class. The driver handler may ask for demoting the caller
++	 * to secondary mode by returning -ENOSYS.
++	 *
++	 * - Regular threads (i.e. not bound to Cobalt) may only run
++	 * the syscall from secondary mode.
++	 */
++	thread = xnthread_current();
++	if (unlikely(ipipe_root_p)) {
++		if (thread == NULL ||
++		    xnthread_test_localinfo(thread, XNDESCENT))
++			return fd;
++	} else if (likely(thread))
++		return fd;
++
++	/*
++	 * We need to switch to the converse mode. Since all callers
++	 * bear the "adaptive" tag, we just pass -ENOSYS back to the
++	 * syscall dispatcher to get switched to the next mode.
++	 */
++	rtdm_fd_put(fd);
++
++	return ERR_PTR(-ENOSYS);
++}
++
++int rtdm_fd_ioctl(int ufd, unsigned int request, ...)
++{
++	struct rtdm_fd *fd;
++	void __user *arg;
++	va_list args;
++	int err, ret;
++
++	fd = get_fd_fixup_mode(ufd);
++	if (IS_ERR(fd)) {
++		err = PTR_ERR(fd);
++		goto out;
++	}
++
++	va_start(args, request);
++	arg = va_arg(args, void __user *);
++	va_end(args);
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_ioctl(current, fd, ufd, request);
++
++	if (ipipe_root_p)
++		err = fd->ops->ioctl_nrt(fd, request, arg);
++	else
++		err = fd->ops->ioctl_rt(fd, request, arg);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		splnone();
++
++	if (err < 0) {
++		ret = __rtdm_dev_ioctl_core(fd, request, arg);
++		if (ret != -EADV)
++			err = ret;
++	}
++
++	rtdm_fd_put(fd);
++  out:
++	if (err < 0)
++		trace_cobalt_fd_ioctl_status(current, fd, ufd, err);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_ioctl);
++
++ssize_t
++rtdm_fd_read(int ufd, void __user *buf, size_t size)
++{
++	struct rtdm_fd *fd;
++	ssize_t ret;
++
++	fd = get_fd_fixup_mode(ufd);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_read(current, fd, ufd, size);
++
++	if (ipipe_root_p)
++		ret = fd->ops->read_nrt(fd, buf, size);
++	else
++		ret = fd->ops->read_rt(fd, buf, size);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		    splnone();
++
++	rtdm_fd_put(fd);
++
++  out:
++	if (ret < 0)
++		trace_cobalt_fd_read_status(current, fd, ufd, ret);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_read);
++
++ssize_t rtdm_fd_write(int ufd, const void __user *buf, size_t size)
++{
++	struct rtdm_fd *fd;
++	ssize_t ret;
++
++	fd = get_fd_fixup_mode(ufd);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_write(current, fd, ufd, size);
++
++	if (ipipe_root_p)
++		ret = fd->ops->write_nrt(fd, buf, size);
++	else
++		ret = fd->ops->write_rt(fd, buf, size);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		splnone();
++
++	rtdm_fd_put(fd);
++
++  out:
++	if (ret < 0)
++		trace_cobalt_fd_write_status(current, fd, ufd, ret);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_write);
++
++ssize_t rtdm_fd_recvmsg(int ufd, struct user_msghdr *msg, int flags)
++{
++	struct rtdm_fd *fd;
++	ssize_t ret;
++
++	fd = get_fd_fixup_mode(ufd);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_recvmsg(current, fd, ufd, flags);
++
++	if (fd->oflags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++
++	if (ipipe_root_p)
++		ret = fd->ops->recvmsg_nrt(fd, msg, flags);
++	else
++		ret = fd->ops->recvmsg_rt(fd, msg, flags);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		splnone();
++
++	rtdm_fd_put(fd);
++out:
++	if (ret < 0)
++		trace_cobalt_fd_recvmsg_status(current, fd, ufd, ret);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_recvmsg);
++
++struct cobalt_recvmmsg_timer {
++	struct xntimer timer;
++	struct xnthread *waiter;
++};
++
++static void recvmmsg_timeout_handler(struct xntimer *timer)
++{
++	struct cobalt_recvmmsg_timer *rq;
++
++	rq = container_of(timer, struct cobalt_recvmmsg_timer, timer);
++	xnthread_set_info(rq->waiter, XNTIMEO);
++	xnthread_resume(rq->waiter, XNDELAY);
++}
++
++int __rtdm_fd_recvmmsg(int ufd, void __user *u_msgvec, unsigned int vlen,
++		       unsigned int flags, void __user *u_timeout,
++		       int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg),
++		       int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg),
++		       int (*get_timespec)(struct timespec *ts, const void __user *u_ts))
++{
++	struct cobalt_recvmmsg_timer rq;
++	xntmode_t tmode = XN_RELATIVE;
++	struct timespec ts = { 0 };
++	int ret = 0, datagrams = 0;
++	xnticks_t timeout = 0;
++	struct mmsghdr mmsg;
++	struct rtdm_fd *fd;
++	void __user *u_p;
++	ssize_t len;
++	spl_t s;
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_recvmmsg(current, fd, ufd, flags);
++
++	if (u_timeout) {
++		ret = get_timespec(&ts, u_timeout);
++		if (ret)
++			goto fail;
++
++		if ((unsigned long)ts.tv_nsec >= ONE_BILLION) {
++			ret = -EINVAL;
++			goto fail;
++		}
++
++		tmode = XN_ABSOLUTE;
++		timeout = ts2ns(&ts);
++		if (timeout == 0)
++			flags |= MSG_DONTWAIT;
++		else {
++			timeout += xnclock_read_monotonic(&nkclock);
++			rq.waiter = xnthread_current();
++			xntimer_init(&rq.timer, &nkclock,
++				     recvmmsg_timeout_handler,
++				     NULL, XNTIMER_IGRAVITY);
++			xnlock_get_irqsave(&nklock, s);
++			ret = xntimer_start(&rq.timer, timeout,
++					    XN_INFINITE, tmode);
++			xnlock_put_irqrestore(&nklock, s);
++		}
++	}
++
++	if (fd->oflags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++
++	for (u_p = u_msgvec; vlen > 0; vlen--) {
++		ret = get_mmsg(&mmsg, u_p);
++		if (ret)
++			break;
++		len = fd->ops->recvmsg_rt(fd, &mmsg.msg_hdr, flags);
++		if (len < 0) {
++			ret = len;
++			break;
++		}
++		mmsg.msg_len = (unsigned int)len;
++		ret = put_mmsg(&u_p, &mmsg);
++		if (ret)
++			break;
++		datagrams++;
++		/* OOB data requires immediate handling. */
++		if (mmsg.msg_hdr.msg_flags & MSG_OOB)
++			break;
++		if (flags & MSG_WAITFORONE)
++			flags |= MSG_DONTWAIT;
++	}
++
++	if (timeout) {
++		xnlock_get_irqsave(&nklock, s);
++		xntimer_destroy(&rq.timer);
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++fail:
++	rtdm_fd_put(fd);
++
++	if (datagrams > 0)
++		ret = datagrams;
++
++out:
++	trace_cobalt_fd_recvmmsg_status(current, fd, ufd, ret);
++
++	return ret;
++}
++
++ssize_t rtdm_fd_sendmsg(int ufd, const struct user_msghdr *msg, int flags)
++{
++	struct rtdm_fd *fd;
++	ssize_t ret;
++
++	fd = get_fd_fixup_mode(ufd);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_sendmsg(current, fd, ufd, flags);
++
++	if (fd->oflags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++
++	if (ipipe_root_p)
++		ret = fd->ops->sendmsg_nrt(fd, msg, flags);
++	else
++		ret = fd->ops->sendmsg_rt(fd, msg, flags);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		splnone();
++
++	rtdm_fd_put(fd);
++out:
++	if (ret < 0)
++		trace_cobalt_fd_sendmsg_status(current, fd, ufd, ret);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_sendmsg);
++
++int __rtdm_fd_sendmmsg(int ufd, void __user *u_msgvec, unsigned int vlen,
++		       unsigned int flags,
++		       int (*get_mmsg)(struct mmsghdr *mmsg, void __user *u_mmsg),
++		       int (*put_mmsg)(void __user **u_mmsg_p, const struct mmsghdr *mmsg))
++{
++	int ret = 0, datagrams = 0;
++	struct mmsghdr mmsg;
++	struct rtdm_fd *fd;
++	void __user *u_p;
++	ssize_t len;
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_sendmmsg(current, fd, ufd, flags);
++
++	if (fd->oflags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++
++	for (u_p = u_msgvec; vlen > 0; vlen--) {
++		ret = get_mmsg(&mmsg, u_p);
++		if (ret)
++			break;
++		len = fd->ops->sendmsg_rt(fd, &mmsg.msg_hdr, flags);
++		if (len < 0) {
++			ret = len;
++			break;
++		}
++		mmsg.msg_len = (unsigned int)len;
++		ret = put_mmsg(&u_p, &mmsg);
++		if (ret)
++			break;
++		datagrams++;
++	}
++
++	rtdm_fd_put(fd);
++
++	if (datagrams > 0)
++		ret = datagrams;
++
++out:
++	trace_cobalt_fd_sendmmsg_status(current, fd, ufd, ret);
++
++	return ret;
++}
++
++static void
++__fd_close(struct cobalt_ppd *p, struct rtdm_fd_index *idx, spl_t s)
++{
++	xnid_remove(&p->fds, &idx->id);
++	__put_fd(idx->fd, s);
++
++	kfree(idx);
++}
++
++int rtdm_fd_close(int ufd, unsigned int magic)
++{
++	struct rtdm_fd_index *idx;
++	struct cobalt_ppd *ppd;
++	struct rtdm_fd *fd;
++	spl_t s;
++
++	secondary_mode_only();
++
++	ppd = cobalt_ppd_get(0);
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	idx = fetch_fd_index(ppd, ufd);
++	if (idx == NULL)
++		goto eadv;
++
++	fd = idx->fd;
++	if (magic != 0 && fd->magic != magic) {
++eadv:
++		xnlock_put_irqrestore(&fdtree_lock, s);
++		return -EADV;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_close(current, fd, ufd, fd->refs);
++
++	/*
++	 * In dual kernel mode, the linux-side fdtable and the RTDM
++	 * ->close() handler are asynchronously managed, i.e.  the
++	 * handler execution may be deferred after the regular file
++	 * descriptor was removed from the fdtable if some refs on
++	 * rtdm_fd are still pending.
++	 */
++	__fd_close(ppd, idx, s);
++	__close_fd(current->files, ufd);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rtdm_fd_close);
++
++int rtdm_fd_mmap(int ufd, struct _rtdm_mmap_request *rma,
++		 void **u_addrp)
++{
++	struct rtdm_fd *fd;
++	int ret;
++
++	secondary_mode_only();
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd)) {
++		ret = PTR_ERR(fd);
++		goto out;
++	}
++
++	set_compat_bit(fd);
++
++	trace_cobalt_fd_mmap(current, fd, ufd, rma);
++
++	if (rma->flags & (MAP_FIXED|MAP_ANONYMOUS)) {
++		ret = -EADV;
++		goto unlock;
++	}
++
++	ret = __rtdm_mmap_from_fdop(fd, rma->length, rma->offset,
++				    rma->prot, rma->flags, u_addrp);
++unlock:
++	rtdm_fd_put(fd);
++out:
++	if (ret)
++		trace_cobalt_fd_mmap_status(current, fd, ufd, ret);
++
++	return ret;
++}
++
++int rtdm_fd_valid_p(int ufd)
++{
++	struct rtdm_fd *fd;
++	spl_t s;
++
++	xnlock_get_irqsave(&fdtree_lock, s);
++	fd = fetch_fd(cobalt_ppd_get(0), ufd);
++	xnlock_put_irqrestore(&fdtree_lock, s);
++
++	return fd != NULL;
++}
++
++/**
++ * @brief Bind a selector to specified event types of a given file descriptor
++ * @internal
++ *
++ * This function is invoked by higher RTOS layers implementing select-like
++ * services. It shall not be called directly by RTDM drivers.
++ *
++ * @param[in] ufd User-side file descriptor to bind to
++ * @param[in,out] selector Selector object that shall be bound to the given
++ * event
++ * @param[in] type Event type the caller is interested in
++ *
++ * @return 0 on success, otherwise:
++ *
++ * - -EBADF is returned if the file descriptor @a ufd cannot be resolved.
++ * - -EINVAL is returned if @a type is invalid.
++ *
++ * @coretags{task-unrestricted}
++ */
++int rtdm_fd_select(int ufd, struct xnselector *selector,
++		   unsigned int type)
++{
++	struct rtdm_fd *fd;
++	int ret;
++
++	fd = rtdm_fd_get(ufd, 0);
++	if (IS_ERR(fd))
++		return PTR_ERR(fd);
++
++	set_compat_bit(fd);
++
++	ret = fd->ops->select(fd, selector, type, ufd);
++
++	if (!XENO_ASSERT(COBALT, !spltest()))
++		splnone();
++
++	rtdm_fd_put(fd);
++
++	return ret;
++}
++
++static void destroy_fd(void *cookie, struct xnid *id)
++{
++	struct cobalt_ppd *p = cookie;
++	struct rtdm_fd_index *idx;
++	spl_t s;
++
++	idx = container_of(id, struct rtdm_fd_index, id);
++	xnlock_get_irqsave(&fdtree_lock, s);
++	__fd_close(p, idx, 0);
++}
++
++void rtdm_fd_cleanup(struct cobalt_ppd *p)
++{
++	/*
++	 * This is called on behalf of a (userland) task exit handler,
++	 * so we don't have to deal with the regular file descriptors,
++	 * we only have to empty our own index.
++	 */
++	xntree_cleanup(&p->fds, p, destroy_fd);
++}
++
++void rtdm_fd_init(void)
++{
++	sema_init(&rtdm_fd_cleanup_sem, 0);
++	kthread_run(fd_cleanup_thread, NULL, "rtdm_fd");
++}
++
++static inline void warn_user(struct file *file, const char *call)
++{
++	struct dentry *dentry = file->f_path.dentry;
++	
++	printk(XENO_WARNING
++	       "%s[%d] called regular %s() on /dev/rtdm/%s\n",
++	       current->comm, task_pid_nr(current), call + 5, dentry->d_name.name);
++}
++
++static ssize_t dumb_read(struct file *file, char  __user *buf,
++			 size_t count, loff_t __user *ppos)
++{
++	warn_user(file, __func__);
++	return -EINVAL;
++}
++
++static ssize_t dumb_write(struct file *file,  const char __user *buf,
++			  size_t count, loff_t __user *ppos)
++{
++	warn_user(file, __func__);
++	return -EINVAL;
++}
++
++static unsigned int dumb_poll(struct file *file, poll_table *pt)
++{
++	warn_user(file, __func__);
++	return -EINVAL;
++}
++
++static long dumb_ioctl(struct file *file, unsigned int cmd,
++		       unsigned long arg)
++{
++	warn_user(file, __func__);
++	return -EINVAL;
++}
++
++const struct file_operations rtdm_dumb_fops = {
++	.read		= dumb_read,
++	.write		= dumb_write,
++	.poll		= dumb_poll,
++	.unlocked_ioctl	= dumb_ioctl,
++};
+--- linux/kernel/xenomai/posix/corectl.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/corectl.h	2021-04-07 16:01:26.149635706 +0800
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_CORECTL_H
++#define _COBALT_POSIX_CORECTL_H
++
++#include <linux/types.h>
++#include <linux/notifier.h>
++#include <xenomai/posix/syscall.h>
++#include <cobalt/uapi/corectl.h>
++
++struct cobalt_config_vector {
++	void __user *u_buf;
++	size_t u_bufsz;
++};
++
++COBALT_SYSCALL_DECL(corectl,
++		    (int request, void __user *u_buf, size_t u_bufsz));
++
++void cobalt_add_config_chain(struct notifier_block *nb);
++
++void cobalt_remove_config_chain(struct notifier_block *nb);
++
++#endif /* !_COBALT_POSIX_CORECTL_H */
+--- linux/kernel/xenomai/posix/sched.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/sched.c	2021-04-07 16:01:26.145635712 +0800
+@@ -0,0 +1,852 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include "internal.h"
++#include "thread.h"
++#include "sched.h"
++#include "clock.h"
++#include <trace/events/cobalt-posix.h>
++
++struct xnsched_class *
++cobalt_sched_policy_param(union xnsched_policy_param *param,
++			  int u_policy, const struct sched_param_ex *param_ex,
++			  xnticks_t *tslice_r)
++{
++	struct xnsched_class *sched_class;
++	int prio, policy;
++	xnticks_t tslice;
++
++	prio = param_ex->sched_priority;
++	tslice = XN_INFINITE;
++	policy = u_policy;
++
++	/*
++	 * NOTE: The user-defined policy may be different than ours,
++	 * e.g. SCHED_FIFO,prio=-7 from userland would be interpreted
++	 * as SCHED_WEAK,prio=7 in kernel space.
++	 */
++	if (prio < 0) {
++		prio = -prio;
++		policy = SCHED_WEAK;
++	}
++	sched_class = &xnsched_class_rt;
++	param->rt.prio = prio;
++
++	switch (policy) {
++	case SCHED_NORMAL:
++		if (prio)
++			return NULL;
++		/*
++		 * When the weak scheduling class is compiled in,
++		 * SCHED_WEAK and SCHED_NORMAL threads are scheduled
++		 * by xnsched_class_weak, at their respective priority
++		 * levels. Otherwise, SCHED_NORMAL is scheduled by
++		 * xnsched_class_rt at priority level #0.
++		 */
++	case SCHED_WEAK:
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++		if (prio < XNSCHED_WEAK_MIN_PRIO ||
++		    prio > XNSCHED_WEAK_MAX_PRIO)
++			return NULL;
++		param->weak.prio = prio;
++		sched_class = &xnsched_class_weak;
++#else
++		if (prio)
++			return NULL;
++#endif
++		break;
++	case SCHED_RR:
++		/* if unspecified, use current one. */
++		tslice = ts2ns(&param_ex->sched_rr_quantum);
++		if (tslice == XN_INFINITE && tslice_r)
++			tslice = *tslice_r;
++		/* falldown wanted */
++	case SCHED_FIFO:
++		if (prio < XNSCHED_FIFO_MIN_PRIO ||
++		    prio > XNSCHED_FIFO_MAX_PRIO)
++			return NULL;
++		break;
++	case SCHED_COBALT:
++		if (prio < XNSCHED_CORE_MIN_PRIO ||
++		    prio > XNSCHED_CORE_MAX_PRIO)
++			return NULL;
++		break;
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	case SCHED_SPORADIC:
++		param->pss.normal_prio = param_ex->sched_priority;
++		param->pss.low_prio = param_ex->sched_ss_low_priority;
++		param->pss.current_prio = param->pss.normal_prio;
++		param->pss.init_budget = ts2ns(&param_ex->sched_ss_init_budget);
++		param->pss.repl_period = ts2ns(&param_ex->sched_ss_repl_period);
++		param->pss.max_repl = param_ex->sched_ss_max_repl;
++		sched_class = &xnsched_class_sporadic;
++		break;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	case SCHED_TP:
++		param->tp.prio = param_ex->sched_priority;
++		param->tp.ptid = param_ex->sched_tp_partition;
++		sched_class = &xnsched_class_tp;
++		break;
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	case SCHED_QUOTA:
++		param->quota.prio = param_ex->sched_priority;
++		param->quota.tgid = param_ex->sched_quota_group;
++		sched_class = &xnsched_class_quota;
++		break;
++#endif
++	default:
++		return NULL;
++	}
++
++	if (tslice_r)
++		*tslice_r = tslice;
++
++	return sched_class;
++}
++
++COBALT_SYSCALL(sched_minprio, current, (int policy))
++{
++	int ret;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++	case SCHED_SPORADIC:
++	case SCHED_TP:
++	case SCHED_QUOTA:
++		ret = XNSCHED_FIFO_MIN_PRIO;
++		break;
++	case SCHED_COBALT:
++		ret = XNSCHED_CORE_MIN_PRIO;
++		break;
++	case SCHED_NORMAL:
++	case SCHED_WEAK:
++		ret = 0;
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	trace_cobalt_sched_min_prio(policy, ret);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sched_maxprio, current, (int policy))
++{
++	int ret;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++	case SCHED_SPORADIC:
++	case SCHED_TP:
++	case SCHED_QUOTA:
++		ret = XNSCHED_FIFO_MAX_PRIO;
++		break;
++	case SCHED_COBALT:
++		ret = XNSCHED_CORE_MAX_PRIO;
++		break;
++	case SCHED_NORMAL:
++		ret = 0;
++		break;
++	case SCHED_WEAK:
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++		ret = XNSCHED_FIFO_MAX_PRIO;
++#else
++		ret = 0;
++#endif
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	trace_cobalt_sched_max_prio(policy, ret);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sched_yield, primary, (void))
++{
++	struct cobalt_thread *curr = cobalt_current_thread();
++	int ret = 0;
++
++	trace_cobalt_pthread_yield(0);
++
++	/* Maybe some extension wants to handle this. */
++  	if (cobalt_call_extension(sched_yield, &curr->extref, ret) && ret)
++		return ret > 0 ? 0 : ret;
++
++	xnthread_resume(&curr->threadbase, 0);
++	if (xnsched_run())
++		return 0;
++
++	/*
++	 * If the round-robin move did not beget any context switch to
++	 * a thread running in primary mode, then wait for the next
++	 * linux context switch to happen.
++	 *
++	 * Rationale: it is most probably unexpected that
++	 * sched_yield() does not cause any context switch, since this
++	 * service is commonly used for implementing a poor man's
++	 * cooperative scheduling. By waiting for a context switch to
++	 * happen in the regular kernel, we guarantee that the CPU has
++	 * been relinquished for a while.
++	 *
++	 * Typically, this behavior allows a thread running in primary
++	 * mode to effectively yield the CPU to a thread of
++	 * same/higher priority stuck in secondary mode.
++	 *
++	 * NOTE: calling cobalt_yield() with no timeout
++	 * (i.e. XN_INFINITE) is probably never a good idea. This
++	 * means that a SCHED_FIFO non-rt thread stuck in a tight loop
++	 * would prevent the caller from waking up, since no
++	 * linux-originated schedule event would happen for unblocking
++	 * it on the current CPU. For this reason, we pass the
++	 * arbitrary TICK_NSEC value to limit the wait time to a
++	 * reasonable amount.
++	 */
++	return cobalt_yield(TICK_NSEC, TICK_NSEC);
++}
++
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++
++static inline
++int set_tp_config(int cpu, union sched_config *config, size_t len)
++{
++	xnticks_t offset, duration, next_offset;
++	struct xnsched_tp_schedule *gps, *ogps;
++	struct xnsched_tp_window *w;
++	struct sched_tp_window *p;
++	struct xnsched *sched;
++	spl_t s;
++	int n;
++
++	if (len < sizeof(config->tp))
++		return -EINVAL;
++
++	sched = xnsched_struct(cpu);
++
++	switch (config->tp.op) {
++	case sched_tp_install:
++		if (config->tp.nr_windows > 0)
++			break;
++		/* Fallback wanted. */
++	case sched_tp_uninstall:
++		gps = NULL;
++		goto set_schedule;
++	case sched_tp_start:
++		xnlock_get_irqsave(&nklock, s);
++		xnsched_tp_start_schedule(sched);
++		xnlock_put_irqrestore(&nklock, s);
++		return 0;
++	case sched_tp_stop:
++		xnlock_get_irqsave(&nklock, s);
++		xnsched_tp_stop_schedule(sched);
++		xnlock_put_irqrestore(&nklock, s);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	/* Install a new TP schedule on CPU. */
++
++	gps = xnmalloc(sizeof(*gps) + config->tp.nr_windows * sizeof(*w));
++	if (gps == NULL)
++		return -ENOMEM;
++
++	for (n = 0, p = config->tp.windows, w = gps->pwins, next_offset = 0;
++	     n < config->tp.nr_windows; n++, p++, w++) {
++		/*
++		 * Time windows must be strictly contiguous. Holes may
++		 * be defined using windows assigned to the pseudo
++		 * partition #-1.
++		 */
++		offset = ts2ns(&p->offset);
++		if (offset != next_offset)
++			goto cleanup_and_fail;
++
++		duration = ts2ns(&p->duration);
++		if (duration <= 0)
++			goto cleanup_and_fail;
++
++		if (p->ptid < -1 ||
++		    p->ptid >= CONFIG_XENO_OPT_SCHED_TP_NRPART)
++			goto cleanup_and_fail;
++
++		w->w_offset = next_offset;
++		w->w_part = p->ptid;
++		next_offset += duration;
++	}
++
++	atomic_set(&gps->refcount, 1);
++	gps->pwin_nr = n;
++	gps->tf_duration = next_offset;
++set_schedule:
++	xnlock_get_irqsave(&nklock, s);
++	ogps = xnsched_tp_set_schedule(sched, gps);
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (ogps)
++		xnsched_tp_put_schedule(ogps);
++
++	return 0;
++
++cleanup_and_fail:
++	xnfree(gps);
++
++	return -EINVAL;
++}
++
++static inline
++ssize_t get_tp_config(int cpu, void __user *u_config, size_t len,
++		      union sched_config *(*fetch_config)
++		      (int policy, const void __user *u_config,
++		       size_t *len),
++		      ssize_t (*put_config)(int policy, void __user *u_config,
++					    size_t u_len,
++					    const union sched_config *config,
++					    size_t len))
++{
++	struct xnsched_tp_window *pw, *w;
++	struct xnsched_tp_schedule *gps;
++	struct sched_tp_window *pp, *p;
++	union sched_config *config;
++	struct xnsched *sched;
++	ssize_t ret, elen;
++	spl_t s;
++	int n;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sched = xnsched_struct(cpu);
++	gps = xnsched_tp_get_schedule(sched);
++	if (gps == NULL) {
++		xnlock_put_irqrestore(&nklock, s);
++		return 0;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	elen = sched_tp_confsz(gps->pwin_nr);
++	config = xnmalloc(elen);
++	if (config == NULL) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	config->tp.op = sched_tp_install;
++	config->tp.nr_windows = gps->pwin_nr;
++	for (n = 0, pp = p = config->tp.windows, pw = w = gps->pwins;
++	     n < gps->pwin_nr; pp = p, p++, pw = w, w++, n++) {
++		ns2ts(&p->offset, w->w_offset);
++		ns2ts(&pp->duration, w->w_offset - pw->w_offset);
++		p->ptid = w->w_part;
++	}
++	ns2ts(&pp->duration, gps->tf_duration - pw->w_offset);
++	ret = put_config(SCHED_TP, u_config, len, config, elen);
++	xnfree(config);
++out:
++	xnsched_tp_put_schedule(gps);
++
++	return ret;
++}
++
++#else /* !CONFIG_XENO_OPT_SCHED_TP */
++
++static inline int
++set_tp_config(int cpu, union sched_config *config, size_t len)
++{
++	return -EINVAL;
++}
++
++static inline ssize_t
++get_tp_config(int cpu, union sched_config __user *u_config, size_t len,
++	      union sched_config *(*fetch_config)
++	      (int policy, const void __user *u_config,
++	       size_t *len),
++	      ssize_t (*put_config)(int policy, void __user *u_config,
++				    size_t u_len,
++				    const union sched_config *config,
++				    size_t len))
++{
++	return -EINVAL;
++}
++
++#endif /* !CONFIG_XENO_OPT_SCHED_TP */
++
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++
++static inline
++int set_quota_config(int cpu, union sched_config *config, size_t len)
++{
++	struct __sched_config_quota *p = &config->quota;
++	struct __sched_quota_info *iq = &p->info;
++	struct cobalt_sched_group *group;
++	struct xnsched_quota_group *tg;
++	struct xnsched *sched;
++	int ret, quota_sum;
++	spl_t s;
++
++	if (len < sizeof(*p))
++		return -EINVAL;
++
++	switch (p->op) {
++	case sched_quota_add:
++		group = xnmalloc(sizeof(*group));
++		if (group == NULL)
++			return -ENOMEM;
++		tg = &group->quota;
++		group->pshared = p->add.pshared != 0;
++		group->scope = cobalt_current_resources(group->pshared);
++		xnlock_get_irqsave(&nklock, s);
++		sched = xnsched_struct(cpu);
++		ret = xnsched_quota_create_group(tg, sched, &quota_sum);
++		if (ret) {
++			xnlock_put_irqrestore(&nklock, s);
++			xnfree(group);
++			return ret;
++		}
++		list_add(&group->next, &group->scope->schedq);
++		xnlock_put_irqrestore(&nklock, s);
++		break;
++	case sched_quota_remove:
++	case sched_quota_force_remove:
++		xnlock_get_irqsave(&nklock, s);
++		sched = xnsched_struct(cpu);
++		tg = xnsched_quota_find_group(sched, p->remove.tgid);
++		if (tg == NULL)
++			goto bad_tgid;
++		group = container_of(tg, struct cobalt_sched_group, quota);
++		if (group->scope != cobalt_current_resources(group->pshared))
++			goto bad_tgid;
++		ret = xnsched_quota_destroy_group(tg,
++						  p->op == sched_quota_force_remove,
++						  &quota_sum);
++		if (ret) {
++			xnlock_put_irqrestore(&nklock, s);
++			return ret;
++		}
++		list_del(&group->next);
++		xnlock_put_irqrestore(&nklock, s);
++		iq->tgid = tg->tgid;
++		iq->quota = tg->quota_percent;
++		iq->quota_peak = tg->quota_peak_percent;
++		iq->quota_sum = quota_sum;
++		xnfree(group);
++		return 0;
++	case sched_quota_set:
++		xnlock_get_irqsave(&nklock, s);
++		sched = xnsched_struct(cpu);
++		tg = xnsched_quota_find_group(sched, p->set.tgid);
++		if (tg == NULL)
++			goto bad_tgid;
++		group = container_of(tg, struct cobalt_sched_group, quota);
++		if (group->scope != cobalt_current_resources(group->pshared))
++			goto bad_tgid;
++		xnsched_quota_set_limit(tg, p->set.quota, p->set.quota_peak,
++					&quota_sum);
++		xnlock_put_irqrestore(&nklock, s);
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	iq->tgid = tg->tgid;
++	iq->quota = tg->quota_percent;
++	iq->quota_peak = tg->quota_peak_percent;
++	iq->quota_sum = quota_sum;
++
++	return 0;
++bad_tgid:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return -ESRCH;
++}
++
++static inline
++ssize_t get_quota_config(int cpu, void __user *u_config, size_t len,
++			 union sched_config *(*fetch_config)
++			 (int policy, const void __user *u_config,
++			  size_t *len),
++			 ssize_t (*put_config)(int policy, void __user *u_config,
++					       size_t u_len,
++					       const union sched_config *config,
++					       size_t len))
++{
++	struct cobalt_sched_group *group;
++	struct xnsched_quota_group *tg;
++	union sched_config *config;
++	struct xnsched *sched;
++	ssize_t ret;
++	spl_t s;
++
++	config = fetch_config(SCHED_QUOTA, u_config, &len);
++	if (IS_ERR(config))
++		return PTR_ERR(config);
++
++	xnlock_get_irqsave(&nklock, s);
++	sched = xnsched_struct(cpu);
++	tg = xnsched_quota_find_group(sched, config->quota.get.tgid);
++	if (tg == NULL)
++		goto bad_tgid;
++
++	group = container_of(tg, struct cobalt_sched_group, quota);
++	if (group->scope != cobalt_current_resources(group->pshared))
++		goto bad_tgid;
++
++	config->quota.info.tgid = tg->tgid;
++	config->quota.info.quota = tg->quota_percent;
++	config->quota.info.quota_peak = tg->quota_peak_percent;
++	config->quota.info.quota_sum = xnsched_quota_sum_all(sched);
++	xnlock_put_irqrestore(&nklock, s);
++
++	ret = put_config(SCHED_QUOTA, u_config, len, config, sizeof(*config));
++	xnfree(config);
++
++	return ret;
++bad_tgid:
++	xnlock_put_irqrestore(&nklock, s);
++	xnfree(config);
++
++	return -ESRCH;
++}
++
++#else /* !CONFIG_XENO_OPT_SCHED_QUOTA */
++
++static inline
++int set_quota_config(int cpu, union sched_config *config, size_t len)
++{
++	return -EINVAL;
++}
++
++static inline
++ssize_t get_quota_config(int cpu, void __user *u_config,
++			 size_t len,
++			 union sched_config *(*fetch_config)
++			 (int policy, const void __user *u_config,
++			  size_t *len),
++			 ssize_t (*put_config)(int policy, void __user *u_config,
++					       size_t u_len,
++					       const union sched_config *config,
++					       size_t len))
++{
++	return -EINVAL;
++}
++
++#endif /* !CONFIG_XENO_OPT_SCHED_QUOTA */
++
++static union sched_config *
++sched_fetch_config(int policy, const void __user *u_config, size_t *len)
++{
++	union sched_config *buf;
++	int ret;
++
++	if (u_config == NULL)
++		return ERR_PTR(-EFAULT);
++
++	if (policy == SCHED_QUOTA && *len < sizeof(buf->quota))
++		return ERR_PTR(-EINVAL);
++
++	buf = xnmalloc(*len);
++	if (buf == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = cobalt_copy_from_user(buf, u_config, *len);
++	if (ret) {
++		xnfree(buf);
++		return ERR_PTR(ret);
++	}
++
++	return buf;
++}
++
++static int sched_ack_config(int policy, const union sched_config *config,
++			    void __user *u_config)
++{
++	union sched_config __user *u_p = u_config;
++
++	if (policy != SCHED_QUOTA)
++		return 0;
++
++	return u_p == NULL ? -EFAULT :
++		cobalt_copy_to_user(&u_p->quota.info, &config->quota.info,
++				       sizeof(u_p->quota.info));
++}
++
++static ssize_t sched_put_config(int policy,
++				void __user *u_config, size_t u_len,
++				const union sched_config *config, size_t len)
++{
++	union sched_config *u_p = u_config;
++
++	if (u_config == NULL)
++		return -EFAULT;
++
++	if (policy == SCHED_QUOTA) {
++		if (u_len < sizeof(config->quota))
++			return -EINVAL;
++		return cobalt_copy_to_user(&u_p->quota.info, &config->quota.info,
++					      sizeof(u_p->quota.info)) ?:
++			sizeof(u_p->quota.info);
++	}
++
++	return cobalt_copy_to_user(u_config, config, len) ?: len;
++}
++
++int __cobalt_sched_setconfig_np(int cpu, int policy,
++				void __user *u_config,
++				size_t len,
++				union sched_config *(*fetch_config)
++				(int policy, const void __user *u_config,
++				 size_t *len),
++				int (*ack_config)(int policy,
++						  const union sched_config *config,
++						  void __user *u_config))
++{
++	union sched_config *buf;
++	int ret;
++
++	trace_cobalt_sched_setconfig(cpu, policy, len);
++
++	if (cpu < 0 || cpu >= NR_CPUS || !xnsched_threading_cpu(cpu))
++		return -EINVAL;
++
++	if (len == 0)
++		return -EINVAL;
++
++	buf = fetch_config(policy, u_config, &len);
++	if (IS_ERR(buf))
++		return PTR_ERR(buf);
++
++	switch (policy)	{
++	case SCHED_TP:
++		ret = set_tp_config(cpu, buf, len);
++		break;
++	case SCHED_QUOTA:
++		ret = set_quota_config(cpu, buf, len);
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	if (ret == 0)
++		ret = ack_config(policy, buf, u_config);
++
++	xnfree(buf);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sched_setconfig_np, conforming,
++	       (int cpu, int policy,
++		union sched_config __user *u_config,
++		size_t len))
++{
++	return __cobalt_sched_setconfig_np(cpu, policy, u_config, len,
++					   sched_fetch_config, sched_ack_config);
++}
++
++ssize_t __cobalt_sched_getconfig_np(int cpu, int policy,
++				    void __user *u_config,
++				    size_t len,
++				    union sched_config *(*fetch_config)
++				    (int policy, const void __user *u_config,
++				     size_t *len),
++				    ssize_t (*put_config)(int policy,
++							  void __user *u_config,
++							  size_t u_len,
++							  const union sched_config *config,
++							  size_t len))
++{
++	ssize_t ret;
++
++	switch (policy)	{
++	case SCHED_TP:
++		ret = get_tp_config(cpu, u_config, len,
++				    fetch_config, put_config);
++		break;
++	case SCHED_QUOTA:
++		ret = get_quota_config(cpu, u_config, len,
++				       fetch_config, put_config);
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	trace_cobalt_sched_get_config(cpu, policy, ret);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sched_getconfig_np, conforming,
++	       (int cpu, int policy,
++		union sched_config __user *u_config,
++		size_t len))
++{
++	return __cobalt_sched_getconfig_np(cpu, policy, u_config, len,
++					   sched_fetch_config, sched_put_config);
++}
++
++int __cobalt_sched_weightprio(int policy,
++			      const struct sched_param_ex *param_ex)
++{
++	struct xnsched_class *sched_class;
++	union xnsched_policy_param param;
++	int prio;
++
++	sched_class = cobalt_sched_policy_param(&param, policy,
++						param_ex, NULL);
++	if (sched_class == NULL)
++		return -EINVAL;
++
++	prio = param_ex->sched_priority;
++	if (prio < 0)
++		prio = -prio;
++
++	return prio + sched_class->weight;
++}
++
++COBALT_SYSCALL(sched_weightprio, current,
++	       (int policy, const struct sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++
++	if (cobalt_copy_from_user(&param_ex, u_param, sizeof(param_ex)))
++		return -EFAULT;
++
++	return __cobalt_sched_weightprio(policy, &param_ex);
++}
++
++int cobalt_sched_setscheduler_ex(pid_t pid,
++				 int policy,
++				 const struct sched_param_ex *param_ex,
++				 __u32 __user *u_winoff,
++				 int __user *u_promoted)
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	int ret, promoted = 0;
++	spl_t s;
++
++	trace_cobalt_sched_setscheduler(pid, policy, param_ex);
++
++	if (pid) {
++		xnlock_get_irqsave(&nklock, s);
++		thread = cobalt_thread_find(pid);
++		xnlock_put_irqrestore(&nklock, s);
++	} else
++		thread = cobalt_current_thread();
++
++	if (thread == NULL) {
++		if (u_winoff == NULL || pid != task_pid_vnr(current))
++			return -ESRCH;
++			
++		thread = cobalt_thread_shadow(&hkey, u_winoff);
++		if (IS_ERR(thread))
++			return PTR_ERR(thread);
++
++		promoted = 1;
++	}
++
++	ret = __cobalt_thread_setschedparam_ex(thread, policy, param_ex);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted));
++}
++
++COBALT_SYSCALL(sched_setscheduler_ex, conforming,
++	       (pid_t pid,
++		int policy,
++		const struct sched_param_ex __user *u_param,
++		__u32 __user *u_winoff,
++		int __user *u_promoted))
++{
++	struct sched_param_ex param_ex;
++
++	if (cobalt_copy_from_user(&param_ex, u_param, sizeof(param_ex)))
++		return -EFAULT;
++
++	return cobalt_sched_setscheduler_ex(pid, policy, &param_ex,
++					    u_winoff, u_promoted);
++}
++
++int cobalt_sched_getscheduler_ex(pid_t pid,
++				 int *policy_r,
++				 struct sched_param_ex *param_ex)
++{
++	struct cobalt_thread *thread;
++	spl_t s;
++
++	trace_cobalt_sched_getscheduler(pid);
++
++	if (pid) {
++		xnlock_get_irqsave(&nklock, s);
++		thread = cobalt_thread_find(pid);
++		xnlock_put_irqrestore(&nklock, s);
++	} else
++		thread = cobalt_current_thread();
++
++	if (thread == NULL)
++		return -ESRCH;
++
++	return __cobalt_thread_getschedparam_ex(thread, policy_r, param_ex);
++}
++
++COBALT_SYSCALL(sched_getscheduler_ex, current,
++	       (pid_t pid,
++		int __user *u_policy,
++		struct sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++	int ret, policy;
++
++	ret = cobalt_sched_getscheduler_ex(pid, &policy, &param_ex);
++	if (ret)
++		return ret;
++
++	if (cobalt_copy_to_user(u_param, &param_ex, sizeof(param_ex)) ||
++	    cobalt_copy_to_user(u_policy, &policy, sizeof(policy)))
++		return -EFAULT;
++
++	return 0;
++}
++
++void cobalt_sched_reclaim(struct cobalt_process *process)
++{
++	struct cobalt_resources *p = &process->resources;
++	struct cobalt_sched_group *group;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	while (!list_empty(&p->schedq)) {
++		group = list_get_entry(&p->schedq, struct cobalt_sched_group, next);
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++		xnsched_quota_destroy_group(&group->quota, 1, NULL);
++#endif
++		xnlock_put_irqrestore(&nklock, s);
++		xnfree(group);
++		xnlock_get_irqsave(&nklock, s);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++}
+--- linux/kernel/xenomai/posix/io.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/io.h	2021-04-07 16:01:26.140635719 +0800
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_IO_H
++#define _COBALT_POSIX_IO_H
++
++#include <rtdm/rtdm.h>
++#include <xenomai/posix/syscall.h>
++#include <cobalt/kernel/select.h>
++
++int __cobalt_first_fd_valid_p(fd_set *fds[XNSELECT_MAX_TYPES], int nfds);
++
++int __cobalt_select_bind_all(struct xnselector *selector,
++			     fd_set *fds[XNSELECT_MAX_TYPES], int nfds);
++
++COBALT_SYSCALL_DECL(open,
++		    (const char __user *u_path, int oflag));
++
++COBALT_SYSCALL_DECL(socket,
++		    (int protocol_family,
++		     int socket_type, int protocol));
++
++COBALT_SYSCALL_DECL(close, (int fd));
++
++COBALT_SYSCALL_DECL(fcntl, (int fd, int cmd, long arg));
++
++COBALT_SYSCALL_DECL(ioctl,
++		    (int fd, unsigned int request, void __user *arg));
++
++COBALT_SYSCALL_DECL(read,
++		    (int fd, void __user *buf, size_t size));
++
++COBALT_SYSCALL_DECL(write,
++		    (int fd, const void __user *buf, size_t size));
++
++COBALT_SYSCALL_DECL(recvmsg,
++		    (int fd, struct user_msghdr __user *umsg, int flags));
++
++COBALT_SYSCALL_DECL(recvmmsg,
++		    (int fd, struct mmsghdr __user *u_msgvec, unsigned int vlen,
++		     unsigned int flags, struct timespec *u_timeout));
++
++COBALT_SYSCALL_DECL(sendmsg,
++		    (int fd, struct user_msghdr __user *umsg, int flags));
++
++COBALT_SYSCALL_DECL(sendmmsg,
++		    (int fd, struct mmsghdr __user *u_msgvec,
++		     unsigned int vlen, unsigned int flags));
++
++COBALT_SYSCALL_DECL(mmap,
++		    (int fd, struct _rtdm_mmap_request __user *u_rma,
++		     void __user * __user *u_addrp));
++
++COBALT_SYSCALL_DECL(select,
++		    (int nfds,
++		     fd_set __user *u_rfds,
++		     fd_set __user *u_wfds,
++		     fd_set __user *u_xfds,
++		     struct timeval __user *u_tv));
++
++#endif /* !_COBALT_POSIX_IO_H */
+--- linux/kernel/xenomai/posix/timerfd.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/timerfd.c	2021-04-07 16:01:26.135635727 +0800
+@@ -0,0 +1,334 @@
++/*
++ * Copyright (C) 2013 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/timerfd.h>
++#include <linux/err.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/select.h>
++#include <rtdm/fd.h>
++#include "internal.h"
++#include "clock.h"
++#include "timer.h"
++#include "timerfd.h"
++
++struct cobalt_tfd {
++	int flags;
++	clockid_t clockid;
++	struct rtdm_fd fd;
++	struct xntimer timer;
++	DECLARE_XNSELECT(read_select);
++	struct itimerspec value;
++	struct xnsynch readers;
++	struct xnthread *target;
++};
++
++#define COBALT_TFD_TICKED	(1 << 2)
++
++#define COBALT_TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_WAKEUP)
++
++static ssize_t timerfd_read(struct rtdm_fd *fd, void __user *buf, size_t size)
++{
++	struct cobalt_tfd *tfd;
++	__u64 __user *u_ticks;
++	__u64 ticks = 0;
++	bool aligned;
++	spl_t s;
++	int err;
++
++	if (size < sizeof(ticks))
++		return -EINVAL;
++
++	u_ticks = buf;
++	if (!access_wok(u_ticks, sizeof(*u_ticks)))
++		return -EFAULT;
++
++	aligned = (((unsigned long)buf) & (sizeof(ticks) - 1)) == 0;
++
++	tfd = container_of(fd, struct cobalt_tfd, fd);
++
++	xnlock_get_irqsave(&nklock, s);
++	if (tfd->flags & COBALT_TFD_TICKED) {
++		err = 0;
++		goto out;
++	}
++	if (rtdm_fd_flags(fd) & O_NONBLOCK) {
++		err = -EAGAIN;
++		goto out;
++	}
++
++	do {
++		err = xnsynch_sleep_on(&tfd->readers, XN_INFINITE, XN_RELATIVE);
++	} while (err == 0 && (tfd->flags & COBALT_TFD_TICKED) == 0);
++
++	if (err & XNBREAK)
++		err = -EINTR;
++  out:
++	if (err == 0) {
++		xnticks_t now;
++
++		if (xntimer_periodic_p(&tfd->timer)) {
++			now = xnclock_read_raw(xntimer_clock(&tfd->timer));
++			ticks = 1 + xntimer_get_overruns(&tfd->timer,
++					 xnthread_current(), now);
++		} else
++			ticks = 1;
++
++		tfd->flags &= ~COBALT_TFD_TICKED;
++		xnselect_signal(&tfd->read_select, 0);
++	}
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (err == 0) {
++		err = aligned ? __xn_put_user(ticks, u_ticks) :
++			__xn_copy_to_user(buf, &ticks, sizeof(ticks));
++		if (err)
++			err =-EFAULT;
++	}
++
++	return err ?: sizeof(ticks);
++}
++
++static int
++timerfd_select(struct rtdm_fd *fd, struct xnselector *selector,
++	       unsigned type, unsigned index)
++{
++	struct cobalt_tfd *tfd = container_of(fd, struct cobalt_tfd, fd);
++	struct xnselect_binding *binding;
++	spl_t s;
++	int err;
++
++	if (type != XNSELECT_READ)
++		return -EBADF;
++
++	binding = xnmalloc(sizeof(*binding));
++	if (binding == NULL)
++		return -ENOMEM;
++
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_set_affinity(&tfd->timer, xnthread_current()->sched);
++	err = xnselect_bind(&tfd->read_select, binding, selector, type,
++			index, tfd->flags & COBALT_TFD_TICKED);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++static void timerfd_close(struct rtdm_fd *fd)
++{
++	struct cobalt_tfd *tfd = container_of(fd, struct cobalt_tfd, fd);
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_destroy(&tfd->timer);
++	xnsynch_destroy(&tfd->readers);
++	xnsched_run();
++	xnlock_put_irqrestore(&nklock, s);
++	xnselect_destroy(&tfd->read_select); /* Reschedules. */
++	xnfree(tfd);
++}
++
++static struct rtdm_fd_ops timerfd_ops = {
++	.read_rt = timerfd_read,
++	.select = timerfd_select,
++	.close = timerfd_close,
++};
++
++static void timerfd_handler(struct xntimer *xntimer)
++{
++	struct cobalt_tfd *tfd;
++
++	tfd = container_of(xntimer, struct cobalt_tfd, timer);
++	tfd->flags |= COBALT_TFD_TICKED;
++	xnselect_signal(&tfd->read_select, 1);
++	xnsynch_wakeup_one_sleeper(&tfd->readers);
++	if (tfd->target)
++		xnthread_unblock(tfd->target);
++}
++
++COBALT_SYSCALL(timerfd_create, lostage, (int clockid, int flags))
++{
++	struct cobalt_tfd *tfd;
++	struct xnthread *curr;
++	struct xnclock *clock;
++	int ret, ufd;
++
++	if (flags & ~TFD_CREATE_FLAGS)
++		return -EINVAL;
++
++	clock = cobalt_clock_find(clockid);
++	if (IS_ERR(clock))
++		return PTR_ERR(clock);
++
++	tfd = xnmalloc(sizeof(*tfd));
++	if (tfd == NULL)
++		return -ENOMEM;
++
++	ufd = __rtdm_anon_getfd("[cobalt-timerfd]",
++				O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
++	if (ufd < 0) {
++		ret = ufd;
++		goto fail_getfd;
++	}
++
++	tfd->flags = flags & ~TFD_NONBLOCK;
++	tfd->fd.oflags = (flags & TFD_NONBLOCK) ? O_NONBLOCK : 0;
++	tfd->clockid = clockid;
++	curr = xnthread_current();
++	xntimer_init(&tfd->timer, clock, timerfd_handler,
++		     curr ? curr->sched : NULL, XNTIMER_UGRAVITY);
++	xnsynch_init(&tfd->readers, XNSYNCH_PRIO, NULL);
++	xnselect_init(&tfd->read_select);
++	tfd->target = NULL;
++
++	ret = rtdm_fd_enter(&tfd->fd, ufd, COBALT_TIMERFD_MAGIC, &timerfd_ops);
++	if (ret < 0)
++		goto fail;
++
++	ret = rtdm_fd_register(&tfd->fd, ufd);
++	if (ret < 0)
++		goto fail;
++
++	return ufd;
++fail:
++	xnselect_destroy(&tfd->read_select);
++	xnsynch_destroy(&tfd->readers);
++	xntimer_destroy(&tfd->timer);
++	__rtdm_anon_putfd(ufd);
++fail_getfd:
++	xnfree(tfd);
++
++	return ret;
++}
++
++static inline struct cobalt_tfd *tfd_get(int ufd)
++{
++	struct rtdm_fd *fd;
++
++	fd = rtdm_fd_get(ufd, COBALT_TIMERFD_MAGIC);
++	if (IS_ERR(fd)) {
++		int err = PTR_ERR(fd);
++		if (err == -EBADF && cobalt_current_process() == NULL)
++			err = -EPERM;
++		return ERR_PTR(err);
++	}
++
++	return container_of(fd, struct cobalt_tfd, fd);
++}
++
++static inline void tfd_put(struct cobalt_tfd *tfd)
++{
++	rtdm_fd_put(&tfd->fd);
++}
++
++int __cobalt_timerfd_settime(int fd, int flags,
++			     const struct itimerspec *value,
++			     struct itimerspec *ovalue)
++{
++	struct cobalt_tfd *tfd;
++	int cflag, ret;
++	spl_t s;
++
++	if (flags & ~COBALT_TFD_SETTIME_FLAGS)
++		return -EINVAL;
++
++	tfd = tfd_get(fd);
++	if (IS_ERR(tfd))
++		return PTR_ERR(tfd);
++
++	cflag = (flags & TFD_TIMER_ABSTIME) ? TIMER_ABSTIME : 0;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	tfd->target = NULL;
++	if (flags & TFD_WAKEUP) {
++		tfd->target = xnthread_current();
++		if (tfd->target == NULL) {
++			ret = -EPERM;
++			goto out;
++		}
++	}
++
++	if (ovalue)
++		__cobalt_timer_getval(&tfd->timer, ovalue);
++
++	xntimer_set_affinity(&tfd->timer, xnthread_current()->sched);
++
++	ret = __cobalt_timer_setval(&tfd->timer,
++				    clock_flag(cflag, tfd->clockid), value);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	tfd_put(tfd);
++
++	return ret;
++}
++
++COBALT_SYSCALL(timerfd_settime, primary,
++	       (int fd, int flags,
++		const struct itimerspec __user *new_value,
++		struct itimerspec __user *old_value))
++{
++	struct itimerspec ovalue, value;
++	int ret;
++
++	ret = cobalt_copy_from_user(&value, new_value, sizeof(value));
++	if (ret)
++		return ret;
++
++	ret = __cobalt_timerfd_settime(fd, flags, &value, &ovalue);
++	if (ret)
++		return ret;
++
++	if (old_value) {
++		ret = cobalt_copy_to_user(old_value, &ovalue, sizeof(ovalue));
++		value.it_value.tv_sec = 0;
++		value.it_value.tv_nsec = 0;
++		__cobalt_timerfd_settime(fd, flags, &value, NULL);
++	}
++
++	return ret;
++}
++
++int __cobalt_timerfd_gettime(int fd, struct itimerspec *value)
++{
++	struct cobalt_tfd *tfd;
++	spl_t s;
++
++	tfd = tfd_get(fd);
++	if (IS_ERR(tfd))
++		return PTR_ERR(tfd);
++
++	xnlock_get_irqsave(&nklock, s);
++	__cobalt_timer_getval(&tfd->timer, value);
++	xnlock_put_irqrestore(&nklock, s);
++
++	tfd_put(tfd);
++
++	return 0;
++}
++
++COBALT_SYSCALL(timerfd_gettime, current,
++	       (int fd, struct itimerspec __user *curr_value))
++{
++	struct itimerspec value;
++	int ret;
++
++	ret = __cobalt_timerfd_gettime(fd, &value);
++
++	return ret ?: cobalt_copy_to_user(curr_value, &value, sizeof(value));
++}
+--- linux/kernel/xenomai/posix/process.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/process.h	2021-04-07 16:01:26.131635732 +0800
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_PROCESS_H
++#define _COBALT_POSIX_PROCESS_H
++
++#include <linux/list.h>
++#include <linux/bitmap.h>
++#include <cobalt/kernel/ppd.h>
++
++#define KEVENT_PROPAGATE   0
++#define KEVENT_STOP        1
++
++#define NR_PERSONALITIES  4
++#if BITS_PER_LONG < NR_PERSONALITIES
++#error "NR_PERSONALITIES overflows internal bitmap"
++#endif
++
++struct mm_struct;
++struct xnthread_personality;
++struct cobalt_timer;
++
++struct cobalt_resources {
++	struct list_head condq;
++	struct list_head mutexq;
++	struct list_head semq;
++	struct list_head monitorq;
++	struct list_head eventq;
++	struct list_head schedq;
++};
++
++struct cobalt_process {
++	struct mm_struct *mm;
++	struct hlist_node hlink;
++	struct cobalt_ppd sys_ppd;
++	unsigned long permap;
++	struct rb_root usems;
++	struct list_head sigwaiters;
++	struct cobalt_resources resources;
++	struct list_head thread_list;
++	DECLARE_BITMAP(timers_map, CONFIG_XENO_OPT_NRTIMERS);
++	struct cobalt_timer *timers[CONFIG_XENO_OPT_NRTIMERS];
++	void *priv[NR_PERSONALITIES];
++	int ufeatures;
++	unsigned int debugged_threads;
++};
++
++struct cobalt_resnode {
++	struct cobalt_resources *scope;
++	struct cobalt_process *owner;
++	struct list_head next;
++	xnhandle_t handle;
++};
++
++int cobalt_register_personality(struct xnthread_personality *personality);
++
++int cobalt_unregister_personality(int xid);
++
++struct xnthread_personality *cobalt_push_personality(int xid);
++
++void cobalt_pop_personality(struct xnthread_personality *prev);
++
++int cobalt_bind_core(int ufeatures);
++
++int cobalt_bind_personality(unsigned int magic);
++
++struct cobalt_process *cobalt_search_process(struct mm_struct *mm);
++
++int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff);
++
++void *cobalt_get_context(int xid);
++
++int cobalt_yield(xnticks_t min, xnticks_t max);
++
++int cobalt_process_init(void);
++
++extern struct list_head cobalt_global_thread_list;
++
++extern struct cobalt_resources cobalt_global_resources;
++
++static inline struct cobalt_process *cobalt_current_process(void)
++{
++	return ipipe_current_threadinfo()->process;
++}
++
++static inline struct cobalt_process *
++cobalt_set_process(struct cobalt_process *process)
++{
++	struct ipipe_threadinfo *p = ipipe_current_threadinfo();
++	struct cobalt_process *old;
++
++	old = p->process;
++	p->process = process;
++
++	return old;
++}
++
++static inline struct cobalt_ppd *cobalt_ppd_get(int global)
++{
++	struct cobalt_process *process;
++
++	if (global || (process = cobalt_current_process()) == NULL)
++		return &cobalt_kernel_ppd;
++
++	return &process->sys_ppd;
++}
++
++static inline struct cobalt_resources *cobalt_current_resources(int pshared)
++{
++	struct cobalt_process *process;
++
++	if (pshared || (process = cobalt_current_process()) == NULL)
++		return &cobalt_global_resources;
++
++	return &process->resources;
++}
++
++static inline
++void __cobalt_add_resource(struct cobalt_resnode *node, int pshared)
++{
++	node->owner = cobalt_current_process();
++	node->scope = cobalt_current_resources(pshared);
++}
++
++#define cobalt_add_resource(__node, __type, __pshared)			\
++	do {								\
++		__cobalt_add_resource(__node, __pshared);		\
++		list_add_tail(&(__node)->next,				\
++			      &((__node)->scope)->__type ## q);		\
++	} while (0)
++
++static inline
++void cobalt_del_resource(struct cobalt_resnode *node)
++{
++	list_del(&node->next);
++}
++
++extern struct xnthread_personality *cobalt_personalities[];
++
++extern struct xnthread_personality cobalt_personality;
++
++#endif /* !_COBALT_POSIX_PROCESS_H */
+--- linux/kernel/xenomai/posix/memory.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/memory.c	2021-04-07 16:01:26.126635739 +0800
+@@ -0,0 +1,353 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/gfp.h>
++#include <linux/vmalloc.h>
++#include <rtdm/driver.h>
++#include <cobalt/kernel/vdso.h>
++#include "process.h"
++#include "memory.h"
++
++#define UMM_PRIVATE  0	/* Per-process user-mapped memory heap */
++#define UMM_SHARED   1	/* Shared user-mapped memory heap */
++#define SYS_GLOBAL   2	/* System heap (not mmapped) */
++
++struct xnvdso *nkvdso;
++EXPORT_SYMBOL_GPL(nkvdso);
++
++static void umm_vmopen(struct vm_area_struct *vma)
++{
++	struct cobalt_umm *umm = vma->vm_private_data;
++
++	atomic_inc(&umm->refcount);
++}
++
++static void umm_vmclose(struct vm_area_struct *vma)
++{
++	struct cobalt_umm *umm = vma->vm_private_data;
++
++	cobalt_umm_destroy(umm);
++}
++
++static struct vm_operations_struct umm_vmops = {
++	.open = umm_vmopen,
++	.close = umm_vmclose,
++};
++
++static struct cobalt_umm *umm_from_fd(struct rtdm_fd *fd)
++{
++	struct cobalt_process *process;
++
++	process = cobalt_current_process();
++	if (process == NULL)
++		return NULL;
++
++	if (rtdm_fd_minor(fd) == UMM_PRIVATE)
++		return &process->sys_ppd.umm;
++
++	return &cobalt_kernel_ppd.umm;
++}
++
++static int umm_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma)
++{
++	struct cobalt_umm *umm;
++	size_t len;
++	int ret;
++
++	umm = umm_from_fd(fd);
++	if (fd == NULL)
++		return -ENODEV;
++
++	len = vma->vm_end - vma->vm_start;
++	if (len != xnheap_get_size(&umm->heap))
++		return -EINVAL;
++
++	vma->vm_private_data = umm;
++	vma->vm_ops = &umm_vmops;
++	if (xnarch_cache_aliasing())
++		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++	ret = rtdm_mmap_vmem(vma, xnheap_get_membase(&umm->heap));
++	if (ret)
++		return ret;
++
++	atomic_inc(&umm->refcount);
++
++	return 0;
++}
++
++#ifndef CONFIG_MMU
++static unsigned long umm_get_unmapped_area(struct rtdm_fd *fd,
++					   unsigned long len,
++					   unsigned long pgoff,
++					   unsigned long flags)
++{
++	struct cobalt_umm *umm;
++
++	umm = umm_from_fd(fd);
++	if (umm == NULL)
++		return -ENODEV;
++
++	if (pgoff == 0)
++		return (unsigned long)xnheap_get_membase(&umm->heap);
++
++	return pgoff << PAGE_SHIFT;
++}
++#else
++#define umm_get_unmapped_area	NULL
++#endif
++
++static int stat_umm(struct rtdm_fd *fd,
++		    struct cobalt_umm __user *u_stat)
++{
++	struct cobalt_memdev_stat stat;
++	struct cobalt_umm *umm;
++	spl_t s;
++	
++	umm = umm_from_fd(fd);
++	if (umm == NULL)
++		return -ENODEV;
++
++	xnlock_get_irqsave(&umm->heap.lock, s);
++	stat.size = xnheap_get_size(&umm->heap);
++	stat.free = xnheap_get_free(&umm->heap);
++	xnlock_put_irqrestore(&umm->heap.lock, s);
++
++	return rtdm_safe_copy_to_user(fd, u_stat, &stat, sizeof(stat));
++}
++
++static int do_umm_ioctls(struct rtdm_fd *fd,
++			 unsigned int request, void __user *arg)
++{
++	int ret;
++
++	switch (request) {
++	case MEMDEV_RTIOC_STAT:
++		ret = stat_umm(fd, arg);
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int umm_ioctl_rt(struct rtdm_fd *fd,
++			unsigned int request, void __user *arg)
++{
++	return do_umm_ioctls(fd, request, arg);
++}
++
++static int umm_ioctl_nrt(struct rtdm_fd *fd,
++			 unsigned int request, void __user *arg)
++{
++	return do_umm_ioctls(fd, request, arg);
++}
++
++static int sysmem_open(struct rtdm_fd *fd, int oflags)
++{
++	if ((oflags & O_ACCMODE) != O_RDONLY)
++		return -EACCES;
++
++	return 0;
++}
++
++static int do_sysmem_ioctls(struct rtdm_fd *fd,
++			    unsigned int request, void __user *arg)
++{
++	struct cobalt_memdev_stat stat;
++	spl_t s;
++	int ret;
++
++	switch (request) {
++	case MEMDEV_RTIOC_STAT:
++		xnlock_get_irqsave(&cobalt_heap.lock, s);
++		stat.size = xnheap_get_size(&cobalt_heap);
++		stat.free = xnheap_get_free(&cobalt_heap);
++		xnlock_put_irqrestore(&cobalt_heap.lock, s);
++		ret = rtdm_safe_copy_to_user(fd, arg, &stat, sizeof(stat));
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int sysmem_ioctl_rt(struct rtdm_fd *fd,
++			   unsigned int request, void __user *arg)
++{
++	return do_sysmem_ioctls(fd, request, arg);
++}
++
++static int sysmem_ioctl_nrt(struct rtdm_fd *fd,
++			   unsigned int request, void __user *arg)
++{
++	return do_sysmem_ioctls(fd, request, arg);
++}
++
++static struct rtdm_driver umm_driver = {
++	.profile_info	=	RTDM_PROFILE_INFO(umm,
++						  RTDM_CLASS_MEMORY,
++						  RTDM_SUBCLASS_GENERIC,
++						  0),
++	.device_flags	=	RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR,
++	.device_count	=	2,
++	.ops = {
++		.ioctl_rt		=	umm_ioctl_rt,
++		.ioctl_nrt		=	umm_ioctl_nrt,
++		.mmap			=	umm_mmap,
++		.get_unmapped_area	=	umm_get_unmapped_area,
++	},
++};
++
++static struct rtdm_device umm_devices[] = {
++	[ UMM_PRIVATE ] = {
++		.driver = &umm_driver,
++		.label = COBALT_MEMDEV_PRIVATE,
++		.minor = UMM_PRIVATE,
++	},
++	[ UMM_SHARED ] = {
++		.driver = &umm_driver,
++		.label = COBALT_MEMDEV_SHARED,
++		.minor = UMM_SHARED,
++	},
++};
++
++static struct rtdm_driver sysmem_driver = {
++	.profile_info	=	RTDM_PROFILE_INFO(sysmem,
++						  RTDM_CLASS_MEMORY,
++						  SYS_GLOBAL,
++						  0),
++	.device_flags	=	RTDM_NAMED_DEVICE,
++	.device_count	=	1,
++	.ops = {
++		.open		=	sysmem_open,
++		.ioctl_rt	=	sysmem_ioctl_rt,
++		.ioctl_nrt	=	sysmem_ioctl_nrt,
++	},
++};
++
++static struct rtdm_device sysmem_device = {
++	.driver = &sysmem_driver,
++	.label = COBALT_MEMDEV_SYS,
++};
++
++static inline void init_vdso(void)
++{
++	nkvdso->features = XNVDSO_FEATURES;
++	nkvdso->wallclock_offset = nkclock.wallclock_offset;
++}
++
++int cobalt_memdev_init(void)
++{
++	int ret;
++
++	ret = cobalt_umm_init(&cobalt_kernel_ppd.umm,
++			      CONFIG_XENO_OPT_SHARED_HEAPSZ * 1024, NULL);
++	if (ret)
++		return ret;
++
++	cobalt_umm_set_name(&cobalt_kernel_ppd.umm, "shared heap");
++
++	nkvdso = cobalt_umm_alloc(&cobalt_kernel_ppd.umm, sizeof(*nkvdso));
++	if (nkvdso == NULL) {
++		ret = -ENOMEM;
++		goto fail_vdso;
++	}
++
++	init_vdso();
++
++	ret = rtdm_dev_register(umm_devices + UMM_PRIVATE);
++	if (ret)
++		goto fail_private;
++
++	ret = rtdm_dev_register(umm_devices + UMM_SHARED);
++	if (ret)
++		goto fail_shared;
++
++	ret = rtdm_dev_register(&sysmem_device);
++	if (ret)
++		goto fail_sysmem;
++
++	return 0;
++
++fail_sysmem:
++	rtdm_dev_unregister(umm_devices + UMM_SHARED);
++fail_shared:
++	rtdm_dev_unregister(umm_devices + UMM_PRIVATE);
++fail_private:
++	cobalt_umm_free(&cobalt_kernel_ppd.umm, nkvdso);
++fail_vdso:
++	cobalt_umm_destroy(&cobalt_kernel_ppd.umm);
++
++	return ret;
++}
++
++void cobalt_memdev_cleanup(void)
++{
++	rtdm_dev_unregister(&sysmem_device);
++	rtdm_dev_unregister(umm_devices + UMM_SHARED);
++	rtdm_dev_unregister(umm_devices + UMM_PRIVATE);
++	cobalt_umm_free(&cobalt_kernel_ppd.umm, nkvdso);
++	cobalt_umm_destroy(&cobalt_kernel_ppd.umm);
++}
++
++int cobalt_umm_init(struct cobalt_umm *umm, u32 size,
++		    void (*release)(struct cobalt_umm *umm))
++{
++	void *basemem;
++	int ret;
++
++	secondary_mode_only();
++
++	size = PAGE_ALIGN(size);
++	basemem = __vmalloc(size, GFP_KERNEL|__GFP_ZERO,
++			    xnarch_cache_aliasing() ?
++			    pgprot_noncached(PAGE_KERNEL) : PAGE_KERNEL);
++	if (basemem == NULL)
++		return -ENOMEM;
++
++	ret = xnheap_init(&umm->heap, basemem, size);
++	if (ret) {
++		vfree(basemem);
++		return ret;
++	}
++
++	umm->release = release;
++	atomic_set(&umm->refcount, 1);
++	smp_mb();
++
++	return 0;
++}
++
++void cobalt_umm_destroy(struct cobalt_umm *umm)
++{
++	secondary_mode_only();
++
++	if (atomic_dec_and_test(&umm->refcount)) {
++		xnheap_destroy(&umm->heap);
++		vfree(xnheap_get_membase(&umm->heap));
++		if (umm->release)
++			umm->release(umm);
++	}
++}
+--- linux/kernel/xenomai/posix/syscall32.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/syscall32.h	2021-04-07 16:01:26.121635747 +0800
+@@ -0,0 +1,234 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_SYSCALL32_H
++#define _COBALT_POSIX_SYSCALL32_H
++
++#include <cobalt/kernel/compat.h>
++
++struct cobalt_mutex_shadow;
++struct cobalt_event_shadow;
++struct cobalt_cond_shadow;
++struct cobalt_sem_shadow;
++struct cobalt_monitor_shadow;
++
++COBALT_SYSCALL32emu_DECL(thread_create,
++			 (compat_ulong_t pth,
++			  int policy,
++			  const struct compat_sched_param_ex __user *u_param_ex,
++			  int xid,
++			  __u32 __user *u_winoff));
++
++COBALT_SYSCALL32emu_DECL(thread_setschedparam_ex,
++			 (compat_ulong_t pth,
++			  int policy,
++			  const struct compat_sched_param_ex __user *u_param,
++			  __u32 __user *u_winoff,
++			  int __user *u_promoted));
++
++COBALT_SYSCALL32emu_DECL(thread_getschedparam_ex,
++			 (compat_ulong_t pth,
++			  int __user *u_policy,
++			  struct compat_sched_param_ex __user *u_param));
++
++COBALT_SYSCALL32emu_DECL(thread_setschedprio,
++			 (compat_ulong_t pth,
++			  int prio,
++			  __u32 __user *u_winoff,
++			  int __user *u_promoted));
++
++COBALT_SYSCALL32emu_DECL(clock_getres,
++			 (clockid_t clock_id,
++			  struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(clock_gettime,
++			 (clockid_t clock_id,
++			  struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(clock_settime,
++			 (clockid_t clock_id,
++			  const struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(clock_adjtime,
++			 (clockid_t clock_id,
++			  struct compat_timex __user *u_tx));
++
++COBALT_SYSCALL32emu_DECL(clock_nanosleep,
++			 (clockid_t clock_id, int flags,
++			  const struct compat_timespec __user *u_rqt,
++			  struct compat_timespec __user *u_rmt));
++
++COBALT_SYSCALL32emu_DECL(mutex_timedlock,
++			 (struct cobalt_mutex_shadow __user *u_mx,
++			  const struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(cond_wait_prologue,
++			 (struct cobalt_cond_shadow __user *u_cnd,
++			  struct cobalt_mutex_shadow __user *u_mx,
++			  int *u_err,
++			  unsigned int timed,
++			  struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(mq_open,
++			 (const char __user *u_name, int oflags,
++			  mode_t mode, struct compat_mq_attr __user *u_attr));
++
++COBALT_SYSCALL32emu_DECL(mq_getattr,
++			 (mqd_t uqd, struct compat_mq_attr __user *u_attr));
++
++COBALT_SYSCALL32emu_DECL(mq_timedsend,
++			 (mqd_t uqd, const void __user *u_buf, size_t len,
++			  unsigned int prio,
++			  const struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(mq_timedreceive,
++			 (mqd_t uqd, void __user *u_buf,
++			  compat_ssize_t __user *u_len,
++			  unsigned int __user *u_prio,
++			  const struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32x_DECL(mq_timedreceive,
++		       (mqd_t uqd, void __user *u_buf,
++			compat_ssize_t __user *u_len,
++			unsigned int __user *u_prio,
++			const struct timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(mq_notify,
++			 (mqd_t fd, const struct compat_sigevent *__user u_cev));
++
++COBALT_SYSCALL32emu_DECL(sched_weightprio,
++			 (int policy,
++			  const struct compat_sched_param_ex __user *u_param));
++
++COBALT_SYSCALL32emu_DECL(sched_setconfig_np,
++			 (int cpu, int policy,
++			  union compat_sched_config __user *u_config,
++			  size_t len));
++
++COBALT_SYSCALL32emu_DECL(sched_getconfig_np,
++			 (int cpu, int policy,
++			  union compat_sched_config __user *u_config,
++			  size_t len));
++
++COBALT_SYSCALL32emu_DECL(sched_setscheduler_ex,
++			 (compat_pid_t pid,
++			  int policy,
++			  const struct compat_sched_param_ex __user *u_param,
++			  __u32 __user *u_winoff,
++			  int __user *u_promoted));
++
++COBALT_SYSCALL32emu_DECL(sched_getscheduler_ex,
++			 (compat_pid_t pid,
++			  int __user *u_policy,
++			  struct compat_sched_param_ex __user *u_param));
++
++COBALT_SYSCALL32emu_DECL(timer_create,
++			 (clockid_t clock,
++			  const struct compat_sigevent __user *u_sev,
++			  timer_t __user *u_tm));
++
++COBALT_SYSCALL32emu_DECL(timer_settime,
++			 (timer_t tm, int flags,
++			  const struct compat_itimerspec __user *u_newval,
++			  struct compat_itimerspec __user *u_oldval));
++
++COBALT_SYSCALL32emu_DECL(timer_gettime,
++			 (timer_t tm,
++			  struct compat_itimerspec __user *u_val));
++
++COBALT_SYSCALL32emu_DECL(timerfd_settime,
++			 (int fd, int flags,
++			  const struct compat_itimerspec __user *new_value,
++			  struct compat_itimerspec __user *old_value));
++
++COBALT_SYSCALL32emu_DECL(timerfd_gettime,
++			 (int fd, struct compat_itimerspec __user *value));
++
++COBALT_SYSCALL32emu_DECL(sigwait,
++			 (const compat_sigset_t __user *u_set,
++			  int __user *u_sig));
++
++COBALT_SYSCALL32emu_DECL(sigtimedwait,
++			 (const compat_sigset_t __user *u_set,
++			  struct compat_siginfo __user *u_si,
++			  const struct compat_timespec __user *u_timeout));
++
++COBALT_SYSCALL32emu_DECL(sigwaitinfo,
++			 (const compat_sigset_t __user *u_set,
++			  struct compat_siginfo __user *u_si));
++
++COBALT_SYSCALL32emu_DECL(sigpending,
++			 (compat_old_sigset_t __user *u_set));
++
++COBALT_SYSCALL32emu_DECL(sigqueue,
++			 (pid_t pid, int sig,
++			  const union compat_sigval __user *u_value));
++
++COBALT_SYSCALL32emu_DECL(monitor_wait,
++			 (struct cobalt_monitor_shadow __user *u_mon,
++			  int event, const struct compat_timespec __user *u_ts,
++			  int __user *u_ret));
++
++COBALT_SYSCALL32emu_DECL(event_wait,
++			 (struct cobalt_event_shadow __user *u_event,
++			  unsigned int bits,
++			  unsigned int __user *u_bits_r,
++			  int mode, const struct compat_timespec __user *u_ts));
++
++COBALT_SYSCALL32emu_DECL(select,
++			 (int nfds,
++			  compat_fd_set __user *u_rfds,
++			  compat_fd_set __user *u_wfds,
++			  compat_fd_set __user *u_xfds,
++			  struct compat_timeval __user *u_tv));
++
++COBALT_SYSCALL32emu_DECL(recvmsg,
++			 (int fd, struct compat_msghdr __user *umsg,
++			  int flags));
++
++COBALT_SYSCALL32emu_DECL(recvmmsg,
++			 (int fd, struct compat_mmsghdr __user *u_msgvec,
++			  unsigned int vlen,
++			  unsigned int flags, struct compat_timespec *u_timeout));
++
++COBALT_SYSCALL32emu_DECL(sendmsg,
++			 (int fd, struct compat_msghdr __user *umsg,
++			  int flags));
++
++COBALT_SYSCALL32emu_DECL(sendmmsg,
++			 (int fd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen,
++			  unsigned int flags));
++
++COBALT_SYSCALL32emu_DECL(mmap,
++			 (int fd,
++			  struct compat_rtdm_mmap_request __user *u_rma,
++			  compat_uptr_t __user *u_addrp));
++
++COBALT_SYSCALL32emu_DECL(backtrace,
++			 (int nr, compat_ulong_t __user *u_backtrace,
++			  int reason));
++
++COBALT_SYSCALL32emu_DECL(sem_open,
++			 (compat_uptr_t __user *u_addrp,
++			  const char __user *u_name,
++			  int oflags, mode_t mode, unsigned int value));
++
++COBALT_SYSCALL32emu_DECL(sem_timedwait,
++			 (struct cobalt_sem_shadow __user *u_sem,
++			  struct compat_timespec __user *u_ts));
++
++#endif /* !_COBALT_POSIX_SYSCALL32_H */
+--- linux/kernel/xenomai/posix/cond.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/cond.h	2021-04-07 16:01:26.116635754 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_COND_H
++#define _COBALT_POSIX_COND_H
++
++#include <linux/types.h>
++#include <linux/time.h>
++#include <linux/list.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/uapi/thread.h>
++#include <cobalt/uapi/cond.h>
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/process.h>
++
++struct cobalt_mutex;
++
++struct cobalt_cond {
++	unsigned int magic;
++	struct xnsynch synchbase;
++	struct list_head mutex_link;
++	struct cobalt_cond_state *state;
++	struct cobalt_condattr attr;
++	struct cobalt_mutex *mutex;
++	struct cobalt_resnode resnode;
++};
++
++int __cobalt_cond_wait_prologue(struct cobalt_cond_shadow __user *u_cnd,
++				struct cobalt_mutex_shadow __user *u_mx,
++				int *u_err,
++				void __user *u_ts,
++				int (*fetch_timeout)(struct timespec *ts,
++						     const void __user *u_ts));
++COBALT_SYSCALL_DECL(cond_init,
++		    (struct cobalt_cond_shadow __user *u_cnd,
++		     const struct cobalt_condattr __user *u_attr));
++
++COBALT_SYSCALL_DECL(cond_destroy,
++		    (struct cobalt_cond_shadow __user *u_cnd));
++
++COBALT_SYSCALL_DECL(cond_wait_prologue,
++		    (struct cobalt_cond_shadow __user *u_cnd,
++		     struct cobalt_mutex_shadow __user *u_mx,
++		     int *u_err,
++		     unsigned int timed,
++		     struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(cond_wait_epilogue,
++		    (struct cobalt_cond_shadow __user *u_cnd,
++		     struct cobalt_mutex_shadow __user *u_mx));
++
++int cobalt_cond_deferred_signals(struct cobalt_cond *cond);
++
++void cobalt_cond_reclaim(struct cobalt_resnode *node,
++			 spl_t s);
++
++#endif /* !_COBALT_POSIX_COND_H */
+--- linux/kernel/xenomai/posix/timerfd.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/timerfd.h	2021-04-07 16:01:26.112635759 +0800
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (C) 2014 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef TIMERFD_H
++#define TIMERFD_H
++
++#include <linux/time.h>
++#include <xenomai/posix/syscall.h>
++
++int __cobalt_timerfd_settime(int fd, int flags,
++			     const struct itimerspec *new_value,
++			     struct itimerspec *old_value);
++
++int __cobalt_timerfd_gettime(int fd,
++			     struct itimerspec *value);
++
++COBALT_SYSCALL_DECL(timerfd_create,
++		    (int clockid, int flags));
++
++COBALT_SYSCALL_DECL(timerfd_settime,
++		    (int fd, int flags,
++		     const struct itimerspec __user *new_value,
++		     struct itimerspec __user *old_value));
++
++COBALT_SYSCALL_DECL(timerfd_gettime,
++		    (int fd, struct itimerspec __user *curr_value));
++
++#endif /* TIMERFD_H */
+--- linux/kernel/xenomai/posix/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/Makefile	2021-04-07 16:01:26.107635766 +0800
+@@ -0,0 +1,38 @@
++
++ccflags-y += -Ikernel
++
++obj-$(CONFIG_XENOMAI) += xenomai.o
++
++xenomai-y :=		\
++	clock.o		\
++	cond.o		\
++	corectl.o	\
++	event.o		\
++	io.o		\
++	memory.o	\
++	monitor.o	\
++	mqueue.o	\
++	mutex.o		\
++	nsem.o		\
++	process.o	\
++	sched.o		\
++	sem.o		\
++	signal.o	\
++	syscall.o	\
++	thread.o	\
++	timer.o		\
++	timerfd.o
++
++syscall_entries := $(srctree)/$(src)/gen-syscall-entries.sh
++
++quiet_cmd_syscall_entries = GEN     $@
++      cmd_syscall_entries = $(CONFIG_SHELL) '$(syscall_entries)' $^ > $@
++
++$(obj)/syscall_entries.h: $(syscall_entries) $(wildcard $(srctree)/$(src)/*.c)
++	$(call if_changed,syscall_entries)
++
++target += syscall_entries.h
++
++$(obj)/syscall.o: $(obj)/syscall_entries.h
++
++xenomai-$(CONFIG_XENO_ARCH_SYS3264) += compat.o syscall32.o
+--- linux/kernel/xenomai/posix/process.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/process.c	2021-04-07 16:01:26.102635774 +0800
+@@ -0,0 +1,1710 @@
++/*
++ * Copyright (C) 2001-2014 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2001-2014 The Xenomai project <http://www.xenomai.org>
++ * Copyright (C) 2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ *
++ * SMP support Copyright (C) 2004 The HYADES project <http://www.hyades-itea.org>
++ * RTAI/fusion Copyright (C) 2004 The RTAI project <http://www.rtai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <stdarg.h>
++#include <linux/unistd.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/anon_inodes.h>
++#include <linux/mman.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/cred.h>
++#include <linux/file.h>
++#include <linux/ptrace.h>
++#include <linux/sched.h>
++#include <linux/signal.h>
++#include <linux/kallsyms.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/kernel/trace.h>
++#include <cobalt/kernel/stat.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/kernel/vdso.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/uapi/signal.h>
++#include <cobalt/uapi/syscall.h>
++#include <trace/events/cobalt-core.h>
++#include <rtdm/driver.h>
++#include <asm/xenomai/features.h>
++#include <asm/xenomai/syscall.h>
++#include "../debug.h"
++#include "internal.h"
++#include "thread.h"
++#include "sched.h"
++#include "mutex.h"
++#include "cond.h"
++#include "mqueue.h"
++#include "sem.h"
++#include "signal.h"
++#include "timer.h"
++#include "monitor.h"
++#include "clock.h"
++#include "event.h"
++#include "timerfd.h"
++#include "io.h"
++
++static int gid_arg = -1;
++module_param_named(allowed_group, gid_arg, int, 0644);
++
++static DEFINE_MUTEX(personality_lock);
++
++static struct hlist_head *process_hash;
++DEFINE_PRIVATE_XNLOCK(process_hash_lock);
++#define PROCESS_HASH_SIZE 13
++
++struct xnthread_personality *cobalt_personalities[NR_PERSONALITIES];
++
++static struct xnsynch yield_sync;
++
++LIST_HEAD(cobalt_global_thread_list);
++
++struct cobalt_resources cobalt_global_resources = {
++	.condq = LIST_HEAD_INIT(cobalt_global_resources.condq),
++	.mutexq = LIST_HEAD_INIT(cobalt_global_resources.mutexq),
++	.semq = LIST_HEAD_INIT(cobalt_global_resources.semq),
++	.monitorq = LIST_HEAD_INIT(cobalt_global_resources.monitorq),
++	.eventq = LIST_HEAD_INIT(cobalt_global_resources.eventq),
++	.schedq = LIST_HEAD_INIT(cobalt_global_resources.schedq),
++};
++
++static inline struct cobalt_process *
++process_from_thread(struct xnthread *thread)
++{
++	return container_of(thread, struct cobalt_thread, threadbase)->process;
++}
++
++static unsigned __attribute__((pure)) process_hash_crunch(struct mm_struct *mm)
++{
++	unsigned long hash = ((unsigned long)mm - PAGE_OFFSET) / sizeof(*mm);
++	return hash % PROCESS_HASH_SIZE;
++}
++
++static struct cobalt_process *__process_hash_search(struct mm_struct *mm)
++{
++	unsigned int bucket = process_hash_crunch(mm);
++	struct cobalt_process *p;
++
++	hlist_for_each_entry(p, &process_hash[bucket], hlink)
++		if (p->mm == mm)
++			return p;
++	
++	return NULL;
++}
++
++static int process_hash_enter(struct cobalt_process *p)
++{
++	struct mm_struct *mm = current->mm;
++	unsigned int bucket = process_hash_crunch(mm);
++	int err;
++	spl_t s;
++
++	xnlock_get_irqsave(&process_hash_lock, s);
++	if (__process_hash_search(mm)) {
++		err = -EBUSY;
++		goto out;
++	}
++
++	p->mm = mm;
++	hlist_add_head(&p->hlink, &process_hash[bucket]);
++	err = 0;
++  out:
++	xnlock_put_irqrestore(&process_hash_lock, s);
++	return err;
++}
++
++static void process_hash_remove(struct cobalt_process *p)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&process_hash_lock, s);
++	if (p->mm)
++		hlist_del(&p->hlink);
++	xnlock_put_irqrestore(&process_hash_lock, s);
++}
++
++struct cobalt_process *cobalt_search_process(struct mm_struct *mm)
++{
++	struct cobalt_process *process;
++	spl_t s;
++	
++	xnlock_get_irqsave(&process_hash_lock, s);
++	process = __process_hash_search(mm);
++	xnlock_put_irqrestore(&process_hash_lock, s);
++	
++	return process;
++}
++
++static void *lookup_context(int xid)
++{
++	struct cobalt_process *process = cobalt_current_process();
++	void *priv = NULL;
++	spl_t s;
++
++	xnlock_get_irqsave(&process_hash_lock, s);
++	/*
++	 * First try matching the process context attached to the
++	 * (usually main) thread which issued sc_cobalt_bind. If not
++	 * found, try matching by mm context, which should point us
++	 * back to the latter. If none match, then the current process
++	 * is unbound.
++	 */
++	if (process == NULL && current->mm)
++		process = __process_hash_search(current->mm);
++	if (process)
++		priv = process->priv[xid];
++
++	xnlock_put_irqrestore(&process_hash_lock, s);
++
++	return priv;
++}
++
++static void remove_process(struct cobalt_process *process)
++{
++	struct xnthread_personality *personality;
++	void *priv;
++	int xid;
++
++	mutex_lock(&personality_lock);
++
++	for (xid = NR_PERSONALITIES - 1; xid >= 0; xid--) {
++		if (!__test_and_clear_bit(xid, &process->permap))
++			continue;
++		personality = cobalt_personalities[xid];
++		priv = process->priv[xid];
++		if (priv == NULL)
++			continue;
++		/*
++		 * CAUTION: process potentially refers to stale memory
++		 * upon return from detach_process() for the Cobalt
++		 * personality, so don't dereference it afterwards.
++		 */
++		if (xid)
++			process->priv[xid] = NULL;
++		__clear_bit(personality->xid, &process->permap);
++		personality->ops.detach_process(priv);
++		atomic_dec(&personality->refcnt);
++		XENO_WARN_ON(COBALT, atomic_read(&personality->refcnt) < 0);
++		if (personality->module)
++			module_put(personality->module);
++	}
++
++	cobalt_set_process(NULL);
++
++	mutex_unlock(&personality_lock);
++}
++
++static void post_ppd_release(struct cobalt_umm *umm)
++{
++	struct cobalt_process *process;
++
++	process = container_of(umm, struct cobalt_process, sys_ppd.umm);
++	kfree(process);
++}
++
++static inline char *get_exe_path(struct task_struct *p)
++{
++	struct file *exe_file;
++	char *pathname, *buf;
++	struct mm_struct *mm;
++	struct path path;
++
++	/*
++	 * PATH_MAX is fairly large, and in any case won't fit on the
++	 * caller's stack happily; since we are mapping a shadow,
++	 * which is a heavyweight operation anyway, let's pick the
++	 * memory from the page allocator.
++	 */
++	buf = (char *)__get_free_page(GFP_KERNEL);
++	if (buf == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	mm = get_task_mm(p);
++	if (mm == NULL) {
++		pathname = "vmlinux";
++		goto copy;	/* kernel thread */
++	}
++
++	exe_file = get_mm_exe_file(mm);
++	mmput(mm);
++	if (exe_file == NULL) {
++		pathname = ERR_PTR(-ENOENT);
++		goto out;	/* no luck. */
++	}
++
++	path = exe_file->f_path;
++	path_get(&exe_file->f_path);
++	fput(exe_file);
++	pathname = d_path(&path, buf, PATH_MAX);
++	path_put(&path);
++	if (IS_ERR(pathname))
++		goto out;	/* mmmh... */
++copy:
++	/* caution: d_path() may start writing anywhere in the buffer. */
++	pathname = kstrdup(pathname, GFP_KERNEL);
++out:
++	free_page((unsigned long)buf);
++
++	return pathname;
++}
++
++static inline int raise_cap(int cap)
++{
++	struct cred *new;
++
++	new = prepare_creds();
++	if (new == NULL)
++		return -ENOMEM;
++
++	cap_raise(new->cap_effective, cap);
++
++	return commit_creds(new);
++}
++
++static int bind_personality(struct xnthread_personality *personality)
++{
++	struct cobalt_process *process;
++	void *priv;
++
++	/*
++	 * We also check capabilities for stacking a Cobalt extension,
++	 * in case the process dropped the supervisor privileges after
++	 * a successful initial binding to the Cobalt interface.
++	 */
++	if (!capable(CAP_SYS_NICE) &&
++	    (gid_arg == -1 || !in_group_p(KGIDT_INIT(gid_arg))))
++		return -EPERM;
++	/*
++	 * Protect from the same process binding to the same interface
++	 * several times.
++	 */
++	priv = lookup_context(personality->xid);
++	if (priv)
++		return 0;
++
++	priv = personality->ops.attach_process();
++	if (IS_ERR(priv))
++		return PTR_ERR(priv);
++
++	process = cobalt_current_process();
++	/*
++	 * We are still covered by the personality_lock, so we may
++	 * safely bump the module refcount after the attach handler
++	 * has returned.
++	 */
++	if (personality->module && !try_module_get(personality->module)) {
++		personality->ops.detach_process(priv);
++		return -EAGAIN;
++	}
++
++	__set_bit(personality->xid, &process->permap);
++	atomic_inc(&personality->refcnt);
++	process->priv[personality->xid] = priv;
++
++	raise_cap(CAP_SYS_NICE);
++	raise_cap(CAP_IPC_LOCK);
++	raise_cap(CAP_SYS_RAWIO);
++
++	return 0;
++}
++
++int cobalt_bind_personality(unsigned int magic)
++{
++	struct xnthread_personality *personality;
++	int xid, ret = -ESRCH;
++
++	mutex_lock(&personality_lock);
++
++	for (xid = 1; xid < NR_PERSONALITIES; xid++) {
++		personality = cobalt_personalities[xid];
++		if (personality && personality->magic == magic) {
++			ret = bind_personality(personality);
++			break;
++		}
++	}
++
++	mutex_unlock(&personality_lock);
++
++	return ret ?: xid;
++}
++
++int cobalt_bind_core(int ufeatures)
++{
++	struct cobalt_process *process;
++	int ret;
++
++	mutex_lock(&personality_lock);
++	ret = bind_personality(&cobalt_personality);
++	mutex_unlock(&personality_lock);
++	if (ret)
++		return ret;
++
++	process = cobalt_current_process();
++	/* Feature set userland knows about. */
++	process->ufeatures = ufeatures;
++
++	return 0;
++}
++
++/**
++ * @fn int cobalt_register_personality(struct xnthread_personality *personality)
++ * @internal
++ * @brief Register a new interface personality.
++ *
++ * - personality->ops.attach_process() is called when a user-space
++ *   process binds to the personality, on behalf of one of its
++ *   threads. The attach_process() handler may return:
++ *
++ *   . an opaque pointer, representing the context of the calling
++ *   process for this personality;
++ *
++ *   . a NULL pointer, meaning that no per-process structure should be
++ *   attached to this process for this personality;
++ *
++ *   . ERR_PTR(negative value) indicating an error, the binding
++ *   process will then abort.
++ *
++ * - personality->ops.detach_process() is called on behalf of an
++ *   exiting user-space process which has previously attached to the
++ *   personality. This handler is passed a pointer to the per-process
++ *   data received earlier from the ops->attach_process() handler.
++ *
++ * @return the personality (extension) identifier.
++ *
++ * @note cobalt_get_context() is NULL when ops.detach_process() is
++ * invoked for the personality the caller detaches from.
++ *
++ * @coretags{secondary-only}
++ */
++int cobalt_register_personality(struct xnthread_personality *personality)
++{
++	int xid;
++
++	mutex_lock(&personality_lock);
++
++	for (xid = 0; xid < NR_PERSONALITIES; xid++) {
++		if (cobalt_personalities[xid] == NULL) {
++			personality->xid = xid;
++			atomic_set(&personality->refcnt, 0);
++			cobalt_personalities[xid] = personality;
++			goto out;
++		}
++	}
++
++	xid = -EAGAIN;
++out:
++	mutex_unlock(&personality_lock);
++
++	return xid;
++}
++EXPORT_SYMBOL_GPL(cobalt_register_personality);
++
++/*
++ * @brief Unregister an interface personality.
++ *
++ * @coretags{secondary-only}
++ */
++int cobalt_unregister_personality(int xid)
++{
++	struct xnthread_personality *personality;
++	int ret = 0;
++
++	if (xid < 0 || xid >= NR_PERSONALITIES)
++		return -EINVAL;
++
++	mutex_lock(&personality_lock);
++
++	personality = cobalt_personalities[xid];
++	if (atomic_read(&personality->refcnt) > 0)
++		ret = -EBUSY;
++	else
++		cobalt_personalities[xid] = NULL;
++
++	mutex_unlock(&personality_lock);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(cobalt_unregister_personality);
++
++/**
++ * Stack a new personality over Cobalt for the current thread.
++ *
++ * This service registers the current thread as a member of the
++ * additional personality identified by @a xid. If the current thread
++ * is already assigned this personality, the call returns successfully
++ * with no effect.
++ *
++ * @param xid the identifier of the additional personality.
++ *
++ * @return A handle to the previous personality. The caller should
++ * save this handle for unstacking @a xid when applicable via a call
++ * to cobalt_pop_personality().
++ *
++ * @coretags{secondary-only}
++ */
++struct xnthread_personality *
++cobalt_push_personality(int xid)
++{
++	struct ipipe_threadinfo *p = ipipe_current_threadinfo();
++	struct xnthread_personality *prev, *next;
++	struct xnthread *thread = p->thread;
++
++	secondary_mode_only();
++
++	mutex_lock(&personality_lock);
++
++	if (xid < 0 || xid >= NR_PERSONALITIES ||
++	    p->process == NULL || !test_bit(xid, &p->process->permap)) {
++		mutex_unlock(&personality_lock);
++		return NULL;
++	}
++
++	next = cobalt_personalities[xid];
++	prev = thread->personality;
++	if (next == prev) {
++		mutex_unlock(&personality_lock);
++		return prev;
++	}
++
++	thread->personality = next;
++	mutex_unlock(&personality_lock);
++	xnthread_run_handler(thread, map_thread);
++
++	return prev;
++}
++EXPORT_SYMBOL_GPL(cobalt_push_personality);
++
++/**
++ * Pop the topmost personality from the current thread.
++ *
++ * This service pops the topmost personality off the current thread.
++ *
++ * @param prev the previous personality which was returned by the
++ * latest call to cobalt_push_personality() for the current thread.
++ *
++ * @coretags{secondary-only}
++ */
++void cobalt_pop_personality(struct xnthread_personality *prev)
++{
++	struct ipipe_threadinfo *p = ipipe_current_threadinfo();
++	struct xnthread *thread = p->thread;
++
++	secondary_mode_only();
++	thread->personality = prev;
++}
++EXPORT_SYMBOL_GPL(cobalt_pop_personality);
++
++/**
++ * Return the per-process data attached to the calling user process.
++ *
++ * This service returns the per-process data attached to the calling
++ * user process for the personality whose xid is @a xid.
++ *
++ * The per-process data was obtained from the ->attach_process()
++ * handler defined for the personality @a xid refers to.
++ *
++ * See cobalt_register_personality() documentation for information on
++ * the way to attach a per-process data to a process.
++ *
++ * @param xid the personality identifier.
++ *
++ * @return the per-process data if the current context is a user-space
++ * process; @return NULL otherwise. As a special case,
++ * cobalt_get_context(0) returns the current Cobalt process
++ * descriptor, which is strictly identical to calling
++ * cobalt_current_process().
++ *
++ * @coretags{task-unrestricted}
++ */
++void *cobalt_get_context(int xid)
++{
++	return lookup_context(xid);
++}
++EXPORT_SYMBOL_GPL(cobalt_get_context);
++
++int cobalt_yield(xnticks_t min, xnticks_t max)
++{
++	xnticks_t start;
++	int ret;
++
++	start = xnclock_read_monotonic(&nkclock);
++	max += start;
++	min += start;
++
++	do {
++		ret = xnsynch_sleep_on(&yield_sync, max, XN_ABSOLUTE);
++		if (ret & XNBREAK)
++			return -EINTR;
++	} while (ret == 0 && xnclock_read_monotonic(&nkclock) < min);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(cobalt_yield);
++
++static inline void init_uthread_info(struct xnthread *thread)
++{
++	struct ipipe_threadinfo *p;
++
++	p = ipipe_current_threadinfo();
++	p->thread = thread;
++	p->process = cobalt_search_process(current->mm);
++}
++
++static inline void clear_threadinfo(void)
++{
++	struct ipipe_threadinfo *p = ipipe_current_threadinfo();
++	p->thread = NULL;
++	p->process = NULL;
++}
++
++#ifdef CONFIG_MMU
++
++static inline int disable_ondemand_memory(void)
++{
++	struct task_struct *p = current;
++	kernel_siginfo_t si;
++
++	if ((p->mm->def_flags & VM_LOCKED) == 0) {
++		memset(&si, 0, sizeof(si));
++		si.si_signo = SIGDEBUG;
++		si.si_code = SI_QUEUE;
++		si.si_int = SIGDEBUG_NOMLOCK | sigdebug_marker;
++		send_sig_info(SIGDEBUG, &si, p);
++		return 0;
++	}
++
++	return __ipipe_disable_ondemand_mappings(p);
++}
++
++static inline int get_mayday_prot(void)
++{
++	return PROT_READ|PROT_EXEC;
++}
++
++#else /* !CONFIG_MMU */
++
++static inline int disable_ondemand_memory(void)
++{
++	return 0;
++}
++
++static inline int get_mayday_prot(void)
++{
++	/*
++	 * Until we stop backing /dev/mem with the mayday page, we
++	 * can't ask for PROT_EXEC since the former does not define
++	 * mmap capabilities, and default ones won't allow an
++	 * executable mapping with MAP_SHARED. In the NOMMU case, this
++	 * is (currently) not an issue.
++	 */
++	return PROT_READ;
++}
++
++#endif /* !CONFIG_MMU */
++
++/**
++ * @fn int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff)
++ * @internal
++ * @brief Create a shadow thread context over a user task.
++ *
++ * This call maps a Xenomai thread to the current regular Linux task
++ * running in userland.  The priority and scheduling class of the
++ * underlying Linux task are not affected; it is assumed that the
++ * interface library did set them appropriately before issuing the
++ * shadow mapping request.
++ *
++ * @param thread The descriptor address of the new shadow thread to be
++ * mapped to current. This descriptor must have been previously
++ * initialized by a call to xnthread_init().
++ *
++ * @param u_winoff will receive the offset of the per-thread
++ * "u_window" structure in the global heap associated to @a
++ * thread. This structure reflects thread state information visible
++ * from userland through a shared memory window.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -EINVAL is returned if the thread control block does not bear the
++ * XNUSER bit.
++ *
++ * - -EBUSY is returned if either the current Linux task or the
++ * associated shadow thread is already involved in a shadow mapping.
++ *
++ * @coretags{secondary-only}
++ */
++int cobalt_map_user(struct xnthread *thread, __u32 __user *u_winoff)
++{
++	struct xnthread_user_window *u_window;
++	struct xnthread_start_attr attr;
++	struct cobalt_ppd *sys_ppd;
++	struct cobalt_umm *umm;
++	int ret;
++
++	if (!xnthread_test_state(thread, XNUSER))
++		return -EINVAL;
++
++	if (xnthread_current() || xnthread_test_state(thread, XNMAPPED))
++		return -EBUSY;
++
++	if (!access_wok(u_winoff, sizeof(*u_winoff)))
++		return -EFAULT;
++
++	ret = disable_ondemand_memory();
++	if (ret)
++		return ret;
++
++	umm = &cobalt_kernel_ppd.umm;
++	u_window = cobalt_umm_zalloc(umm, sizeof(*u_window));
++	if (u_window == NULL)
++		return -ENOMEM;
++
++	thread->u_window = u_window;
++	__xn_put_user(cobalt_umm_offset(umm, u_window), u_winoff);
++	xnthread_pin_initial(thread);
++
++	/*
++	 * CAUTION: we enable the pipeline notifier only when our
++	 * shadow TCB is consistent, so that we won't trigger false
++	 * positive in debug code from handle_schedule_event() and
++	 * friends.
++	 */
++	xnthread_init_shadow_tcb(thread);
++	xnthread_suspend(thread, XNRELAX, XN_INFINITE, XN_RELATIVE, NULL);
++	init_uthread_info(thread);
++	xnthread_set_state(thread, XNMAPPED);
++	xndebug_shadow_init(thread);
++	sys_ppd = cobalt_ppd_get(0);
++	atomic_inc(&sys_ppd->refcnt);
++	/*
++	 * ->map_thread() handler is invoked after the TCB is fully
++	 * built, and when we know for sure that current will go
++	 * through our task-exit handler, because it has a shadow
++	 * extension and I-pipe notifications will soon be enabled for
++	 * it.
++	 */
++	xnthread_run_handler(thread, map_thread);
++	ipipe_enable_notifier(current);
++
++	attr.mode = 0;
++	attr.entry = NULL;
++	attr.cookie = NULL;
++	ret = xnthread_start(thread, &attr);
++	if (ret)
++		return ret;
++
++	xnthread_sync_window(thread);
++
++	xntrace_pid(xnthread_host_pid(thread),
++		    xnthread_current_priority(thread));
++
++	return 0;
++}
++
++#ifdef IPIPE_KEVT_PTRESUME
++static void stop_debugged_process(struct xnthread *thread)
++{
++	struct cobalt_process *process = process_from_thread(thread);
++	struct cobalt_thread *cth;
++
++	if (process->debugged_threads > 0)
++		return;
++
++	list_for_each_entry(cth, &process->thread_list, next) {
++		if (&cth->threadbase == thread)
++			continue;
++
++		xnthread_suspend(&cth->threadbase, XNDBGSTOP, XN_INFINITE,
++				 XN_RELATIVE, NULL);
++	}
++}
++
++static void resume_debugged_process(struct cobalt_process *process)
++{
++	struct cobalt_thread *cth;
++
++	xnsched_lock();
++
++	list_for_each_entry(cth, &process->thread_list, next)
++		if (xnthread_test_state(&cth->threadbase, XNDBGSTOP))
++			xnthread_resume(&cth->threadbase, XNDBGSTOP);
++
++	xnsched_unlock();
++}
++
++#else /* IPIPE_KEVT_PTRESUME unavailable */
++
++static inline void stop_debugged_process(struct xnthread *thread)
++{
++}
++
++static inline void resume_debugged_process(struct cobalt_process *process)
++{
++}
++#endif /* IPIPE_KEVT_PTRESUME unavailable */
++
++/* called with nklock held */
++static void cobalt_register_debugged_thread(struct xnthread *thread)
++{
++	struct cobalt_process *process = process_from_thread(thread);
++
++	xnthread_set_state(thread, XNSSTEP);
++
++	stop_debugged_process(thread);
++	process->debugged_threads++;
++
++	if (xnthread_test_state(thread, XNRELAX))
++		xnthread_suspend(thread, XNDBGSTOP, XN_INFINITE, XN_RELATIVE,
++				 NULL);
++}
++
++/* called with nklock held */
++static void cobalt_unregister_debugged_thread(struct xnthread *thread)
++{
++	struct cobalt_process *process = process_from_thread(thread);
++
++	process->debugged_threads--;
++	xnthread_clear_state(thread, XNSSTEP);
++
++	if (process->debugged_threads == 0)
++		resume_debugged_process(process);
++}
++
++static inline int handle_exception(struct ipipe_trap_data *d)
++{
++	struct xnthread *thread;
++	struct xnsched *sched;
++
++	sched = xnsched_current();
++	thread = sched->curr;
++
++	trace_cobalt_thread_fault(d);
++
++	if (xnthread_test_state(thread, XNROOT))
++		return 0;
++
++#ifdef IPIPE_KEVT_USERINTRET
++	if (xnarch_fault_bp_p(d) && user_mode(d->regs)) {
++		spl_t s;
++
++		XENO_WARN_ON(CORE, xnthread_test_state(thread, XNRELAX));
++		xnlock_get_irqsave(&nklock, s);
++		xnthread_set_info(thread, XNCONTHI);
++		ipipe_enable_user_intret_notifier();
++		stop_debugged_process(thread);
++		xnlock_put_irqrestore(&nklock, s);
++		xnsched_run();
++	}
++#endif
++
++	if (xnarch_fault_fpu_p(d)) {
++#ifdef CONFIG_XENO_ARCH_FPU
++		spl_t s;
++
++		/* FPU exception received in primary mode. */
++		splhigh(s);
++		if (xnarch_handle_fpu_fault(sched->fpuholder, thread, d)) {
++			sched->fpuholder = thread;
++			splexit(s);
++			return 1;
++		}
++		splexit(s);
++#endif /* CONFIG_XENO_ARCH_FPU */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0)
++		printk("invalid use of FPU in Xenomai context at %pS\n",
++		       (void *)xnarch_fault_pc(d));
++#else
++		print_symbol("invalid use of FPU in Xenomai context at %s\n",
++			     xnarch_fault_pc(d));
++#endif
++	}
++
++	/*
++	 * If we experienced a trap on behalf of a shadow thread
++	 * running in primary mode, move it to the Linux domain,
++	 * leaving the kernel process the exception.
++	 */
++#if defined(CONFIG_XENO_OPT_DEBUG_COBALT) || defined(CONFIG_XENO_OPT_DEBUG_USER)
++	if (!user_mode(d->regs)) {
++		xntrace_panic_freeze();
++		printk(XENO_WARNING
++		       "switching %s to secondary mode after exception #%u in "
++		       "kernel-space at 0x%lx (pid %d)\n", thread->name,
++		       xnarch_fault_trap(d),
++		       xnarch_fault_pc(d),
++		       xnthread_host_pid(thread));
++		xntrace_panic_dump();
++	} else if (xnarch_fault_notify(d)) /* Don't report debug traps */
++		printk(XENO_WARNING
++		       "switching %s to secondary mode after exception #%u from "
++		       "user-space at 0x%lx (pid %d)\n", thread->name,
++		       xnarch_fault_trap(d),
++		       xnarch_fault_pc(d),
++		       xnthread_host_pid(thread));
++#endif
++
++	if (xnarch_fault_pf_p(d))
++		/*
++		 * The page fault counter is not SMP-safe, but it's a
++		 * simple indicator that something went wrong wrt
++		 * memory locking anyway.
++		 */
++		xnstat_counter_inc(&thread->stat.pf);
++
++	xnthread_relax(xnarch_fault_notify(d), SIGDEBUG_MIGRATE_FAULT);
++
++	return 0;
++}
++
++static int handle_mayday_event(struct pt_regs *regs)
++{
++	XENO_BUG_ON(COBALT, !xnthread_test_state(xnthread_current(), XNUSER));
++
++	xnthread_relax(0, 0);
++
++	return KEVENT_PROPAGATE;
++}
++
++int ipipe_trap_hook(struct ipipe_trap_data *data)
++{
++	if (data->exception == IPIPE_TRAP_MAYDAY)
++		return handle_mayday_event(data->regs);
++
++	/*
++	 * No migration is possible on behalf of the head domain, so
++	 * the following access is safe.
++	 */
++	raw_cpu_ptr(&cobalt_machine_cpudata)->faults[data->exception]++;
++
++	if (handle_exception(data))
++		return KEVENT_STOP;
++
++	/*
++	 * CAUTION: access faults must be propagated downstream
++	 * whichever domain caused them, so that we don't spuriously
++	 * raise a fatal error when some Linux fixup code is available
++	 * to recover from the fault.
++	 */
++	return KEVENT_PROPAGATE;
++}
++
++/*
++ * Legacy idle hook, unconditionally allow entering the idle state.
++ */
++bool ipipe_enter_idle_hook(void)
++{
++	return true;
++}
++
++#ifdef CONFIG_SMP
++
++static int handle_setaffinity_event(struct ipipe_cpu_migration_data *d)
++{
++	struct task_struct *p = d->task;
++	struct xnthread *thread;
++	spl_t s;
++
++	thread = xnthread_from_task(p);
++	if (thread == NULL)
++		return KEVENT_PROPAGATE;
++
++	/*
++	 * Detect a Cobalt thread sleeping in primary mode which is
++	 * required to migrate to another CPU by the host kernel.
++	 *
++	 * We may NOT fix up thread->sched immediately using the
++	 * passive migration call, because that latter always has to
++	 * take place on behalf of the target thread itself while
++	 * running in secondary mode. Therefore, that thread needs to
++	 * go through secondary mode first, then move back to primary
++	 * mode, so that affinity_ok() does the fixup work.
++	 *
++	 * We force this by sending a SIGSHADOW signal to the migrated
++	 * thread, asking it to switch back to primary mode from the
++	 * handler, at which point the interrupted syscall may be
++	 * restarted.
++	 */
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnthread_test_state(thread, XNTHREAD_BLOCK_BITS & ~XNRELAX))
++		xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HARDEN);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return KEVENT_PROPAGATE;
++}
++
++static inline bool affinity_ok(struct task_struct *p) /* nklocked, IRQs off */
++{
++	struct xnthread *thread = xnthread_from_task(p);
++	struct xnsched *sched;
++	int cpu = task_cpu(p);
++
++	/*
++	 * To maintain consistency between both Cobalt and host
++	 * schedulers, reflecting a thread migration to another CPU
++	 * into the Cobalt scheduler state must happen from secondary
++	 * mode only, on behalf of the migrated thread itself once it
++	 * runs on the target CPU.
++	 *
++	 * This means that the Cobalt scheduler state regarding the
++	 * CPU information lags behind the host scheduler state until
++	 * the migrated thread switches back to primary mode
++	 * (i.e. task_cpu(p) != xnsched_cpu(xnthread_from_task(p)->sched)).
++	 * This is ok since Cobalt does not schedule such thread until then.
++	 *
++	 * check_affinity() detects when a Cobalt thread switching
++	 * back to primary mode did move to another CPU earlier while
++	 * in secondary mode. If so, do the fixups to reflect the
++	 * change.
++	 */
++	if (!xnsched_threading_cpu(cpu)) {
++		/*
++		 * The thread is about to switch to primary mode on a
++		 * non-rt CPU, which is damn wrong and hopeless.
++		 * Whine and cancel that thread.
++		 */
++		printk(XENO_WARNING "thread %s[%d] switched to non-rt CPU%d, aborted.\n",
++		       thread->name, xnthread_host_pid(thread), cpu);
++		/*
++		 * Can't call xnthread_cancel() from a migration
++		 * point, that would break. Since we are on the wakeup
++		 * path to hardening, just raise XNCANCELD to catch it
++		 * in xnthread_harden().
++		 */
++		xnthread_set_info(thread, XNCANCELD);
++		return false;
++	}
++
++	sched = xnsched_struct(cpu);
++	if (sched == thread->sched)
++		return true;
++
++	/*
++	 * The current thread moved to a supported real-time CPU,
++	 * which is not part of its original affinity mask
++	 * though. Assume user wants to extend this mask.
++	 */
++	if (!cpumask_test_cpu(cpu, &thread->affinity))
++		cpumask_set_cpu(cpu, &thread->affinity);
++
++	xnthread_run_handler_stack(thread, move_thread, cpu);
++	xnthread_migrate_passive(thread, sched);
++
++	return true;
++}
++
++#else /* !CONFIG_SMP */
++
++struct ipipe_cpu_migration_data;
++
++static int handle_setaffinity_event(struct ipipe_cpu_migration_data *d)
++{
++	return KEVENT_PROPAGATE;
++}
++
++static inline bool affinity_ok(struct task_struct *p)
++{
++	return true;
++}
++
++#endif /* CONFIG_SMP */
++
++void ipipe_migration_hook(struct task_struct *p) /* hw IRQs off */
++{
++	struct xnthread *thread = xnthread_from_task(p);
++
++	xnlock_get(&nklock);
++
++	/*
++	 * We fire the handler before the thread is migrated, so that
++	 * thread->sched does not change between paired invocations of
++	 * relax_thread/harden_thread handlers.
++	 */
++	xnthread_run_handler_stack(thread, harden_thread);
++	if (affinity_ok(p))
++		xnthread_resume(thread, XNRELAX);
++
++#ifdef IPIPE_KEVT_USERINTRET
++	/*
++	 * In case we migrated independently of the user return notifier, clear
++	 * XNCONTHI here and also disable the notifier - we are already done.
++	 */
++	if (unlikely(xnthread_test_info(thread, XNCONTHI))) {
++		xnthread_clear_info(thread, XNCONTHI);
++		ipipe_disable_user_intret_notifier();
++	}
++#endif
++
++	/* Unregister as debugged thread in case we postponed this. */
++	if (unlikely(xnthread_test_state(thread, XNSSTEP)))
++		cobalt_unregister_debugged_thread(thread);
++
++	xnlock_put(&nklock);
++
++	xnsched_run();
++}
++
++#ifdef CONFIG_XENO_OPT_HOSTRT
++
++static IPIPE_DEFINE_SPINLOCK(__hostrtlock);
++
++static int handle_hostrt_event(struct ipipe_hostrt_data *hostrt)
++{
++	unsigned long flags;
++	urwstate_t tmp;
++
++	/*
++	 * The locking strategy is twofold:
++	 * - The spinlock protects against concurrent updates from within the
++	 *   Linux kernel and against preemption by Xenomai
++	 * - The unsynced R/W block is for lockless read-only access.
++	 */
++	raw_spin_lock_irqsave(&__hostrtlock, flags);
++
++	unsynced_write_block(&tmp, &nkvdso->hostrt_data.lock) {
++		nkvdso->hostrt_data.live = 1;
++		nkvdso->hostrt_data.cycle_last = hostrt->cycle_last;
++		nkvdso->hostrt_data.mask = hostrt->mask;
++		nkvdso->hostrt_data.mult = hostrt->mult;
++		nkvdso->hostrt_data.shift = hostrt->shift;
++		nkvdso->hostrt_data.wall_sec = hostrt->wall_time_sec;
++		nkvdso->hostrt_data.wall_nsec = hostrt->wall_time_nsec;
++		nkvdso->hostrt_data.wtom_sec = hostrt->wall_to_monotonic.tv_sec;
++		nkvdso->hostrt_data.wtom_nsec = hostrt->wall_to_monotonic.tv_nsec;
++	}
++
++	raw_spin_unlock_irqrestore(&__hostrtlock, flags);
++
++	return KEVENT_PROPAGATE;
++}
++
++static inline void init_hostrt(void)
++{
++	unsynced_rw_init(&nkvdso->hostrt_data.lock);
++	nkvdso->hostrt_data.live = 0;
++}
++
++#else /* !CONFIG_XENO_OPT_HOSTRT */
++
++struct ipipe_hostrt_data;
++
++static inline int handle_hostrt_event(struct ipipe_hostrt_data *hostrt)
++{
++	return KEVENT_PROPAGATE;
++}
++
++static inline void init_hostrt(void) { }
++
++#endif /* !CONFIG_XENO_OPT_HOSTRT */
++
++static void __handle_taskexit_event(struct task_struct *p)
++{
++	struct cobalt_ppd *sys_ppd;
++	struct xnthread *thread;
++	spl_t s;
++
++	/*
++	 * We are called for both kernel and user shadows over the
++	 * root thread.
++	 */
++	secondary_mode_only();
++
++	thread = xnthread_current();
++	XENO_BUG_ON(COBALT, thread == NULL);
++	trace_cobalt_shadow_unmap(thread);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnthread_test_state(thread, XNSSTEP))
++		cobalt_unregister_debugged_thread(thread);
++
++	xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	xnthread_run_handler_stack(thread, exit_thread);
++
++	if (xnthread_test_state(thread, XNUSER)) {
++		cobalt_umm_free(&cobalt_kernel_ppd.umm, thread->u_window);
++		thread->u_window = NULL;
++		sys_ppd = cobalt_ppd_get(0);
++		if (atomic_dec_and_test(&sys_ppd->refcnt))
++			remove_process(cobalt_current_process());
++	}
++}
++
++static int handle_taskexit_event(struct task_struct *p) /* p == current */
++{
++	__handle_taskexit_event(p);
++
++	/*
++	 * __xnthread_cleanup() -> ... -> finalize_thread
++	 * handler. From that point, the TCB is dropped. Be careful of
++	 * not treading on stale memory within @thread.
++	 */
++	__xnthread_cleanup(xnthread_current());
++
++	clear_threadinfo();
++
++	return KEVENT_PROPAGATE;
++}
++
++static inline void signal_yield(void)
++{
++	spl_t s;
++
++	if (!xnsynch_pended_p(&yield_sync))
++		return;
++
++	xnlock_get_irqsave(&nklock, s);
++	if (xnsynch_pended_p(&yield_sync)) {
++		xnsynch_flush(&yield_sync, 0);
++		xnsched_run();
++	}
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++static int handle_schedule_event(struct task_struct *next_task)
++{
++	struct task_struct *prev_task;
++	struct xnthread *next;
++	sigset_t pending;
++	spl_t s;
++
++	signal_yield();
++
++	prev_task = current;
++	next = xnthread_from_task(next_task);
++	if (next == NULL)
++		goto out;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/*
++	 * Track tasks leaving the ptraced state.  Check both SIGSTOP
++	 * (NPTL) and SIGINT (LinuxThreads) to detect ptrace
++	 * continuation.
++	 */
++	if (xnthread_test_state(next, XNSSTEP)) {
++		if (signal_pending(next_task)) {
++			/*
++			 * Do not grab the sighand lock here: it's
++			 * useless, and we already own the runqueue
++			 * lock, so this would expose us to deadlock
++			 * situations on SMP.
++			 */
++			sigorsets(&pending,
++				  &next_task->pending.signal,
++				  &next_task->signal->shared_pending.signal);
++			if (sigismember(&pending, SIGSTOP) ||
++			    sigismember(&pending, SIGINT))
++				goto no_ptrace;
++		}
++
++		/*
++		 * Do not unregister before the thread migrated.
++		 * cobalt_unregister_debugged_thread will then be called by our
++		 * ipipe_migration_hook.
++		 */
++		if (!xnthread_test_info(next, XNCONTHI))
++			cobalt_unregister_debugged_thread(next);
++
++		xnthread_set_localinfo(next, XNHICCUP);
++	}
++
++no_ptrace:
++	xnlock_put_irqrestore(&nklock, s);
++
++	/*
++	 * Do basic sanity checks on the incoming thread state.
++	 * NOTE: we allow ptraced threads to run shortly in order to
++	 * properly recover from a stopped state.
++	 */
++	if (!XENO_WARN(COBALT, !xnthread_test_state(next, XNRELAX),
++		       "hardened thread %s[%d] running in Linux domain?! "
++		       "(status=0x%x, sig=%d, prev=%s[%d])",
++		       next->name, task_pid_nr(next_task),
++		       xnthread_get_state(next),
++		       signal_pending(next_task),
++		       prev_task->comm, task_pid_nr(prev_task)))
++		XENO_WARN(COBALT,
++			  !(next_task->ptrace & PT_PTRACED) &&
++			   !xnthread_test_state(next, XNDORMANT)
++			  && xnthread_test_state(next, XNPEND),
++			  "blocked thread %s[%d] rescheduled?! "
++			  "(status=0x%x, sig=%d, prev=%s[%d])",
++			  next->name, task_pid_nr(next_task),
++			  xnthread_get_state(next),
++			  signal_pending(next_task), prev_task->comm,
++			  task_pid_nr(prev_task));
++out:
++	return KEVENT_PROPAGATE;
++}
++
++static int handle_sigwake_event(struct task_struct *p)
++{
++	struct xnthread *thread;
++	sigset_t pending;
++	spl_t s;
++
++	thread = xnthread_from_task(p);
++	if (thread == NULL)
++		return KEVENT_PROPAGATE;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/*
++	 * CAUTION: __TASK_TRACED is not set in p->state yet. This
++	 * state bit will be set right after we return, when the task
++	 * is woken up.
++	 */
++	if ((p->ptrace & PT_PTRACED) && !xnthread_test_state(thread, XNSSTEP)) {
++		/* We already own the siglock. */
++		sigorsets(&pending,
++			  &p->pending.signal,
++			  &p->signal->shared_pending.signal);
++
++		if (sigismember(&pending, SIGTRAP) ||
++		    sigismember(&pending, SIGSTOP)
++		    || sigismember(&pending, SIGINT))
++			cobalt_register_debugged_thread(thread);
++	}
++
++	if (xnthread_test_state(thread, XNRELAX))
++		goto out;
++
++	/*
++	 * If kicking a shadow thread in primary mode, make sure Linux
++	 * won't schedule in its mate under our feet as a result of
++	 * running signal_wake_up(). The Xenomai scheduler must remain
++	 * in control for now, until we explicitly relax the shadow
++	 * thread to allow for processing the pending signals. Make
++	 * sure we keep the additional state flags unmodified so that
++	 * we don't break any undergoing ptrace.
++	 */
++	if (p->state & (TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE))
++		cobalt_set_task_state(p, p->state | TASK_NOWAKEUP);
++
++	/*
++	 * Allow a thread stopped for debugging to resume briefly in order to
++	 * migrate to secondary mode. xnthread_relax will reapply XNDBGSTOP.
++	 */
++	if (xnthread_test_state(thread, XNDBGSTOP))
++		xnthread_resume(thread, XNDBGSTOP);
++
++	__xnthread_kick(thread);
++out:
++	xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return KEVENT_PROPAGATE;
++}
++
++static int handle_cleanup_event(struct mm_struct *mm)
++{
++	struct cobalt_process *old, *process;
++	struct cobalt_ppd *sys_ppd;
++	struct xnthread *curr;
++
++	/*
++	 * We are NOT called for exiting kernel shadows.
++	 * cobalt_current_process() is cleared if we get there after
++	 * handle_task_exit(), so we need to restore this context
++	 * pointer temporarily.
++	 */
++	process = cobalt_search_process(mm);
++	old = cobalt_set_process(process);
++	sys_ppd = cobalt_ppd_get(0);
++	if (sys_ppd != &cobalt_kernel_ppd) {
++		bool running_exec;
++
++		/*
++		 * Detect a userland shadow running exec(), i.e. still
++		 * attached to the current linux task (no prior
++		 * clear_threadinfo). In this case, we emulate a task
++		 * exit, since the Xenomai binding shall not survive
++		 * the exec() syscall. Since the process will keep on
++		 * running though, we have to disable the event
++		 * notifier manually for it.
++		 */
++		curr = xnthread_current();
++		running_exec = curr && (current->flags & PF_EXITING) == 0;
++		if (running_exec) {
++			__handle_taskexit_event(current);
++			ipipe_disable_notifier(current);
++		}
++		if (atomic_dec_and_test(&sys_ppd->refcnt))
++			remove_process(process);
++		if (running_exec) {
++			__xnthread_cleanup(curr);
++			clear_threadinfo();
++		}
++	}
++
++	/*
++	 * CAUTION: Do not override a state change caused by
++	 * remove_process().
++	 */
++	if (cobalt_current_process() == process)
++		cobalt_set_process(old);
++
++	return KEVENT_PROPAGATE;
++}
++
++static inline int handle_clockfreq_event(unsigned int *p)
++{
++	unsigned int newfreq = *p;
++
++	xnclock_update_freq(newfreq);
++
++	return KEVENT_PROPAGATE;
++}
++
++#ifdef IPIPE_KEVT_USERINTRET
++static int handle_user_return(struct task_struct *task)
++{
++	struct xnthread *thread;
++	spl_t s;
++	int err;
++
++	ipipe_disable_user_intret_notifier();
++
++	thread = xnthread_from_task(task);
++	if (thread == NULL)
++		return KEVENT_PROPAGATE;
++
++	if (xnthread_test_info(thread, XNCONTHI)) {
++		xnlock_get_irqsave(&nklock, s);
++		xnthread_clear_info(thread, XNCONTHI);
++		xnlock_put_irqrestore(&nklock, s);
++
++		err = xnthread_harden();
++
++		/*
++		 * XNCONTHI may or may not have been re-applied if
++		 * harden bailed out due to pending signals. Make sure
++		 * it is set in that case.
++		 */
++		if (err == -ERESTARTSYS) {
++			xnlock_get_irqsave(&nklock, s);
++			xnthread_set_info(thread, XNCONTHI);
++			xnlock_put_irqrestore(&nklock, s);
++		}
++	}
++
++	return KEVENT_PROPAGATE;
++}
++#endif /* IPIPE_KEVT_USERINTRET */
++
++#ifdef IPIPE_KEVT_PTRESUME
++int handle_ptrace_resume(struct ipipe_ptrace_resume_data *resume)
++{
++	struct xnthread *thread;
++	spl_t s;
++
++	thread = xnthread_from_task(resume->task);
++	if (thread == NULL)
++		return KEVENT_PROPAGATE;
++
++	if (resume->request == PTRACE_SINGLESTEP &&
++	    xnthread_test_state(thread, XNSSTEP)) {
++		xnlock_get_irqsave(&nklock, s);
++
++		xnthread_resume(thread, XNDBGSTOP);
++		cobalt_unregister_debugged_thread(thread);
++
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++	return KEVENT_PROPAGATE;
++}
++#endif /* IPIPE_KEVT_PTRESUME */
++
++int ipipe_kevent_hook(int kevent, void *data)
++{
++	int ret;
++
++	switch (kevent) {
++	case IPIPE_KEVT_SCHEDULE:
++		ret = handle_schedule_event(data);
++		break;
++	case IPIPE_KEVT_SIGWAKE:
++		ret = handle_sigwake_event(data);
++		break;
++	case IPIPE_KEVT_EXIT:
++		ret = handle_taskexit_event(data);
++		break;
++	case IPIPE_KEVT_CLEANUP:
++		ret = handle_cleanup_event(data);
++		break;
++	case IPIPE_KEVT_HOSTRT:
++		ret = handle_hostrt_event(data);
++		break;
++	case IPIPE_KEVT_SETAFFINITY:
++		ret = handle_setaffinity_event(data);
++		break;
++#ifdef IPIPE_KEVT_CLOCKFREQ
++	case IPIPE_KEVT_CLOCKFREQ:
++		ret = handle_clockfreq_event(data);
++		break;
++#endif
++#ifdef IPIPE_KEVT_USERINTRET
++	case IPIPE_KEVT_USERINTRET:
++		ret = handle_user_return(data);
++		break;
++#endif
++#ifdef IPIPE_KEVT_PTRESUME
++	case IPIPE_KEVT_PTRESUME:
++		ret = handle_ptrace_resume(data);
++		break;
++#endif
++	default:
++		ret = KEVENT_PROPAGATE;
++	}
++
++	return ret;
++}
++
++static int attach_process(struct cobalt_process *process)
++{
++	struct cobalt_ppd *p = &process->sys_ppd;
++	char *exe_path;
++	int ret;
++
++	ret = cobalt_umm_init(&p->umm, CONFIG_XENO_OPT_PRIVATE_HEAPSZ * 1024,
++			      post_ppd_release);
++	if (ret)
++		return ret;
++
++	cobalt_umm_set_name(&p->umm, "private heap[%d]", task_pid_nr(current));
++
++	exe_path = get_exe_path(current);
++	if (IS_ERR(exe_path)) {
++		printk(XENO_WARNING
++		       "%s[%d] can't find exe path\n",
++		       current->comm, task_pid_nr(current));
++		exe_path = NULL; /* Not lethal, but weird. */
++	}
++	p->exe_path = exe_path;
++	xntree_init(&p->fds);
++	atomic_set(&p->refcnt, 1);
++
++	ret = process_hash_enter(process);
++	if (ret)
++		goto fail_hash;
++
++	return 0;
++fail_hash:
++	if (p->exe_path)
++		kfree(p->exe_path);
++	cobalt_umm_destroy(&p->umm);
++
++	return ret;
++}
++
++static void *cobalt_process_attach(void)
++{
++	struct cobalt_process *process;
++	int ret;
++
++	process = kzalloc(sizeof(*process), GFP_KERNEL);
++	if (process == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = attach_process(process);
++	if (ret) {
++		kfree(process);
++		return ERR_PTR(ret);
++	}
++
++	INIT_LIST_HEAD(&process->resources.condq);
++	INIT_LIST_HEAD(&process->resources.mutexq);
++	INIT_LIST_HEAD(&process->resources.semq);
++	INIT_LIST_HEAD(&process->resources.monitorq);
++	INIT_LIST_HEAD(&process->resources.eventq);
++	INIT_LIST_HEAD(&process->resources.schedq);
++	INIT_LIST_HEAD(&process->sigwaiters);
++	INIT_LIST_HEAD(&process->thread_list);
++	xntree_init(&process->usems);
++	bitmap_fill(process->timers_map, CONFIG_XENO_OPT_NRTIMERS);
++	cobalt_set_process(process);
++
++	return process;
++}
++
++static void detach_process(struct cobalt_process *process)
++{
++	struct cobalt_ppd *p = &process->sys_ppd;
++
++	if (p->exe_path)
++		kfree(p->exe_path);
++
++	rtdm_fd_cleanup(p);
++	process_hash_remove(process);
++	/*
++	 * CAUTION: the process descriptor might be immediately
++	 * released as a result of calling cobalt_umm_destroy(), so we
++	 * must do this last, not to tread on stale memory.
++	 */
++	cobalt_umm_destroy(&p->umm);
++}
++
++static void __reclaim_resource(struct cobalt_process *process,
++			       void (*reclaim)(struct cobalt_resnode *node, spl_t s),
++			       struct list_head *local,
++			       struct list_head *global)
++{
++	struct cobalt_resnode *node, *tmp;
++	LIST_HEAD(stash);
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(global))
++		goto flush_local;
++
++	list_for_each_entry_safe(node, tmp, global, next) {
++		if (node->owner == process) {
++			list_del(&node->next);
++			list_add(&node->next, &stash);
++		}
++	}
++		
++	list_for_each_entry_safe(node, tmp, &stash, next) {
++		reclaim(node, s);
++		xnlock_get_irqsave(&nklock, s);
++	}
++
++	XENO_BUG_ON(COBALT, !list_empty(&stash));
++
++flush_local:
++	if (list_empty(local))
++		goto out;
++
++	list_for_each_entry_safe(node, tmp, local, next) {
++		reclaim(node, s);
++		xnlock_get_irqsave(&nklock, s);
++	}
++out:
++	xnsched_run();
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++#define cobalt_reclaim_resource(__process, __reclaim, __type)		\
++	__reclaim_resource(__process, __reclaim,			\
++			   &(__process)->resources.__type ## q,		\
++			   &cobalt_global_resources.__type ## q)
++
++static void cobalt_process_detach(void *arg)
++{
++	struct cobalt_process *process = arg;
++
++	cobalt_nsem_reclaim(process);
++ 	cobalt_timer_reclaim(process);
++ 	cobalt_sched_reclaim(process);
++	cobalt_reclaim_resource(process, cobalt_cond_reclaim, cond);
++	cobalt_reclaim_resource(process, cobalt_mutex_reclaim, mutex);
++	cobalt_reclaim_resource(process, cobalt_event_reclaim, event);
++	cobalt_reclaim_resource(process, cobalt_monitor_reclaim, monitor);
++	cobalt_reclaim_resource(process, cobalt_sem_reclaim, sem);
++ 	detach_process(process);
++	/*
++	 * The cobalt_process descriptor release may be deferred until
++	 * the last mapping on the private heap is gone. However, this
++	 * is potentially stale memory already.
++	 */
++}
++
++struct xnthread_personality cobalt_personality = {
++	.name = "cobalt",
++	.magic = 0,
++	.ops = {
++		.attach_process = cobalt_process_attach,
++		.detach_process = cobalt_process_detach,
++		.map_thread = cobalt_thread_map,
++		.exit_thread = cobalt_thread_exit,
++		.finalize_thread = cobalt_thread_finalize,
++	},
++};
++EXPORT_SYMBOL_GPL(cobalt_personality);
++
++__init int cobalt_init(void)
++{
++	unsigned int i, size;
++	int ret;
++
++	size = sizeof(*process_hash) * PROCESS_HASH_SIZE;
++	process_hash = kmalloc(size, GFP_KERNEL);
++	if (process_hash == NULL) {
++		printk(XENO_ERR "cannot allocate processes hash table\n");
++		return -ENOMEM;
++	}
++
++	ret = xndebug_init();
++	if (ret)
++		goto fail_debug;
++
++	for (i = 0; i < PROCESS_HASH_SIZE; i++)
++		INIT_HLIST_HEAD(&process_hash[i]);
++
++	xnsynch_init(&yield_sync, XNSYNCH_FIFO, NULL);
++
++	ret = cobalt_memdev_init();
++	if (ret)
++		goto fail_memdev;
++
++	ret = cobalt_register_personality(&cobalt_personality);
++	if (ret)
++		goto fail_register;
++
++	ret = cobalt_signal_init();
++	if (ret)
++		goto fail_siginit;
++
++	init_hostrt();
++	ipipe_set_hooks(ipipe_root_domain, IPIPE_SYSCALL|IPIPE_KEVENT);
++	ipipe_set_hooks(&xnsched_realtime_domain, IPIPE_SYSCALL|IPIPE_TRAP);
++
++	if (gid_arg != -1)
++		printk(XENO_INFO "allowing access to group %d\n", gid_arg);
++
++	return 0;
++fail_siginit:
++	cobalt_unregister_personality(0);
++fail_register:
++	cobalt_memdev_cleanup();
++fail_memdev:
++	xnsynch_destroy(&yield_sync);
++	xndebug_cleanup();
++fail_debug:
++	kfree(process_hash);
++
++	return ret;
++}
+--- linux/kernel/xenomai/posix/syscall32.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/syscall32.c	2021-04-07 16:01:26.098635779 +0800
+@@ -0,0 +1,945 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include <linux/err.h>
++#include <cobalt/uapi/syscall.h>
++#include <xenomai/rtdm/internal.h>
++#include "internal.h"
++#include "syscall32.h"
++#include "thread.h"
++#include "mutex.h"
++#include "cond.h"
++#include "sem.h"
++#include "sched.h"
++#include "clock.h"
++#include "timer.h"
++#include "timerfd.h"
++#include "signal.h"
++#include "monitor.h"
++#include "event.h"
++#include "mqueue.h"
++#include "io.h"
++#include "../debug.h"
++
++COBALT_SYSCALL32emu(thread_create, init,
++		    (compat_ulong_t pth,
++		     int policy,
++		     const struct compat_sched_param_ex __user *u_param_ex,
++		     int xid,
++		     __u32 __user *u_winoff))
++{
++	struct sched_param_ex param_ex;
++	int ret;
++
++	ret = sys32_get_param_ex(policy, &param_ex, u_param_ex);
++	if (ret)
++		return ret;
++
++	return __cobalt_thread_create(pth, policy, &param_ex, xid, u_winoff);
++}
++
++COBALT_SYSCALL32emu(thread_setschedparam_ex, conforming,
++		    (compat_ulong_t pth,
++		     int policy,
++		     const struct compat_sched_param_ex __user *u_param_ex,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted))
++{
++	struct sched_param_ex param_ex;
++	int ret;
++
++	ret = sys32_get_param_ex(policy, &param_ex, u_param_ex);
++	if (ret)
++		return ret;
++
++	return cobalt_thread_setschedparam_ex(pth, policy, &param_ex,
++					      u_winoff, u_promoted);
++}
++
++COBALT_SYSCALL32emu(thread_getschedparam_ex, current,
++		    (compat_ulong_t pth,
++		     int __user *u_policy,
++		     struct compat_sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++	int ret, policy;
++
++	ret = cobalt_thread_getschedparam_ex(pth, &policy, &param_ex);
++	if (ret)
++		return ret;
++
++	ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy));
++
++	return ret ?: sys32_put_param_ex(policy, u_param, &param_ex);
++}
++
++COBALT_SYSCALL32emu(thread_setschedprio, conforming,
++		    (compat_ulong_t pth,
++		     int prio,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted))
++{
++	return cobalt_thread_setschedprio(pth, prio, u_winoff, u_promoted);
++}
++
++static inline int sys32_fetch_timeout(struct timespec *ts,
++				      const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		sys32_get_timespec(ts, u_ts);
++}
++
++COBALT_SYSCALL32emu(sem_open, lostage,
++		    (compat_uptr_t __user *u_addrp,
++		     const char __user *u_name,
++		     int oflags, mode_t mode, unsigned int value))
++{
++	struct cobalt_sem_shadow __user *usm;
++	compat_uptr_t cusm;
++
++	if (__xn_get_user(cusm, u_addrp))
++		return -EFAULT;
++
++	usm = __cobalt_sem_open(compat_ptr(cusm), u_name, oflags, mode, value);
++	if (IS_ERR(usm))
++		return PTR_ERR(usm);
++
++	return __xn_put_user(ptr_to_compat(usm), u_addrp) ? -EFAULT : 0;
++}
++
++COBALT_SYSCALL32emu(sem_timedwait, primary,
++		    (struct cobalt_sem_shadow __user *u_sem,
++		     struct compat_timespec __user *u_ts))
++{
++	return __cobalt_sem_timedwait(u_sem, u_ts, sys32_fetch_timeout);
++}
++
++COBALT_SYSCALL32emu(clock_getres, current,
++		    (clockid_t clock_id,
++		     struct compat_timespec __user *u_ts))
++{
++	struct timespec ts;
++	int ret;
++
++	ret = __cobalt_clock_getres(clock_id, &ts);
++	if (ret)
++		return ret;
++
++	return u_ts ? sys32_put_timespec(u_ts, &ts) : 0;
++}
++
++COBALT_SYSCALL32emu(clock_gettime, current,
++		    (clockid_t clock_id,
++		     struct compat_timespec __user *u_ts))
++{
++	struct timespec ts;
++	int ret;
++
++	ret = __cobalt_clock_gettime(clock_id, &ts);
++	if (ret)
++		return ret;
++
++	return sys32_put_timespec(u_ts, &ts);
++}
++
++COBALT_SYSCALL32emu(clock_settime, current,
++		    (clockid_t clock_id,
++		     const struct compat_timespec __user *u_ts))
++{
++	struct timespec ts;
++	int ret;
++
++	ret = sys32_get_timespec(&ts, u_ts);
++	if (ret)
++		return ret;
++
++	return __cobalt_clock_settime(clock_id, &ts);
++}
++
++COBALT_SYSCALL32emu(clock_adjtime, current,
++		    (clockid_t clock_id, struct compat_timex __user *u_tx))
++{
++	struct timex tx;
++	int ret;
++
++	ret = sys32_get_timex(&tx, u_tx);
++	if (ret)
++		return ret;
++
++	ret = __cobalt_clock_adjtime(clock_id, &tx);
++	if (ret)
++		return ret;
++
++	return sys32_put_timex(u_tx, &tx);
++}
++
++COBALT_SYSCALL32emu(clock_nanosleep, nonrestartable,
++		    (clockid_t clock_id, int flags,
++		     const struct compat_timespec __user *u_rqt,
++		     struct compat_timespec __user *u_rmt))
++{
++	struct timespec rqt, rmt, *rmtp = NULL;
++	int ret;
++
++	if (u_rmt)
++		rmtp = &rmt;
++
++	ret = sys32_get_timespec(&rqt, u_rqt);
++	if (ret)
++		return ret;
++
++	ret = __cobalt_clock_nanosleep(clock_id, flags, &rqt, rmtp);
++	if (ret == -EINTR && flags == 0 && rmtp)
++		ret = sys32_put_timespec(u_rmt, rmtp);
++
++	return ret;
++}
++
++COBALT_SYSCALL32emu(mutex_timedlock, primary,
++		    (struct cobalt_mutex_shadow __user *u_mx,
++		     const struct compat_timespec __user *u_ts))
++{
++	return __cobalt_mutex_timedlock_break(u_mx, u_ts, sys32_fetch_timeout);
++}
++
++COBALT_SYSCALL32emu(cond_wait_prologue, nonrestartable,
++		    (struct cobalt_cond_shadow __user *u_cnd,
++		     struct cobalt_mutex_shadow __user *u_mx,
++		     int *u_err,
++		     unsigned int timed,
++		     struct compat_timespec __user *u_ts))
++{
++	return __cobalt_cond_wait_prologue(u_cnd, u_mx, u_err, u_ts,
++					   timed ? sys32_fetch_timeout : NULL);
++}
++
++COBALT_SYSCALL32emu(mq_open, lostage,
++		    (const char __user *u_name, int oflags,
++		     mode_t mode, struct compat_mq_attr __user *u_attr))
++{
++	struct mq_attr _attr, *attr = &_attr;
++	int ret;
++
++	if ((oflags & O_CREAT) && u_attr) {
++		ret = sys32_get_mqattr(&_attr, u_attr);
++		if (ret)
++			return ret;
++	} else
++		attr = NULL;
++
++	return __cobalt_mq_open(u_name, oflags, mode, attr);
++}
++
++COBALT_SYSCALL32emu(mq_getattr, current,
++		    (mqd_t uqd, struct compat_mq_attr __user *u_attr))
++{
++	struct mq_attr attr;
++	int ret;
++
++	ret = __cobalt_mq_getattr(uqd, &attr);
++	if (ret)
++		return ret;
++
++	return sys32_put_mqattr(u_attr, &attr);
++}
++
++COBALT_SYSCALL32emu(mq_timedsend, primary,
++		    (mqd_t uqd, const void __user *u_buf, size_t len,
++		     unsigned int prio,
++		     const struct compat_timespec __user *u_ts))
++{
++	return __cobalt_mq_timedsend(uqd, u_buf, len, prio,
++				     u_ts, u_ts ? sys32_fetch_timeout : NULL);
++}
++
++COBALT_SYSCALL32emu(mq_timedreceive, primary,
++		    (mqd_t uqd, void __user *u_buf,
++		     compat_ssize_t __user *u_len,
++		     unsigned int __user *u_prio,
++		     const struct compat_timespec __user *u_ts))
++{
++	compat_ssize_t clen;
++	ssize_t len;
++	int ret;
++
++	ret = cobalt_copy_from_user(&clen, u_len, sizeof(*u_len));
++	if (ret)
++		return ret;
++
++	len = clen;
++	ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio,
++				       u_ts, u_ts ? sys32_fetch_timeout : NULL);
++	clen = len;
++
++	return ret ?: cobalt_copy_to_user(u_len, &clen, sizeof(*u_len));
++}
++
++static inline int mq_fetch_timeout(struct timespec *ts,
++				   const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++
++}
++
++COBALT_SYSCALL32emu(mq_notify, primary,
++		    (mqd_t fd, const struct compat_sigevent *__user u_cev))
++{
++	struct sigevent sev;
++	int ret;
++
++	if (u_cev) {
++		ret = sys32_get_sigevent(&sev, u_cev);
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_mq_notify(fd, u_cev ? &sev : NULL);
++}
++
++COBALT_SYSCALL32emu(sched_weightprio, current,
++		    (int policy,
++		     const struct compat_sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++	int ret;
++
++	ret = sys32_get_param_ex(policy, &param_ex, u_param);
++	if (ret)
++		return ret;
++
++	return __cobalt_sched_weightprio(policy, &param_ex);
++}
++
++static union sched_config *
++sys32_fetch_config(int policy, const void __user *u_config, size_t *len)
++{
++	union compat_sched_config *cbuf;
++	union sched_config *buf;
++	int ret, n;
++
++	if (u_config == NULL)
++		return ERR_PTR(-EFAULT);
++
++	if (policy == SCHED_QUOTA && *len < sizeof(cbuf->quota))
++		return ERR_PTR(-EINVAL);
++
++	cbuf = xnmalloc(*len);
++	if (cbuf == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = cobalt_copy_from_user(cbuf, u_config, *len);
++	if (ret) {
++		buf = ERR_PTR(ret);
++		goto out;
++	}
++
++	switch (policy) {
++	case SCHED_TP:
++		*len = sched_tp_confsz(cbuf->tp.nr_windows);
++		break;
++	case SCHED_QUOTA:
++		break;
++	default:
++		buf = ERR_PTR(-EINVAL);
++		goto out;
++	}
++
++	buf = xnmalloc(*len);
++	if (buf == NULL) {
++		buf = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	if (policy == SCHED_QUOTA)
++		memcpy(&buf->quota, &cbuf->quota, sizeof(cbuf->quota));
++	else {
++		buf->tp.op = cbuf->tp.op;
++		buf->tp.nr_windows = cbuf->tp.nr_windows;
++		for (n = 0; n < buf->tp.nr_windows; n++) {
++			buf->tp.windows[n].ptid = cbuf->tp.windows[n].ptid;
++			buf->tp.windows[n].offset.tv_sec = cbuf->tp.windows[n].offset.tv_sec;
++			buf->tp.windows[n].offset.tv_nsec = cbuf->tp.windows[n].offset.tv_nsec;
++			buf->tp.windows[n].duration.tv_sec = cbuf->tp.windows[n].duration.tv_sec;
++			buf->tp.windows[n].duration.tv_nsec = cbuf->tp.windows[n].duration.tv_nsec;
++		}
++	}
++out:
++	xnfree(cbuf);
++
++	return buf;
++}
++
++static int sys32_ack_config(int policy, const union sched_config *config,
++			    void __user *u_config)
++{
++	union compat_sched_config __user *u_p = u_config;
++
++	if (policy != SCHED_QUOTA)
++		return 0;
++
++	return u_config == NULL ? -EFAULT :
++		cobalt_copy_to_user(&u_p->quota.info, &config->quota.info,
++				       sizeof(u_p->quota.info));
++}
++
++static ssize_t sys32_put_config(int policy,
++				void __user *u_config, size_t u_len,
++				const union sched_config *config, size_t len)
++{
++	union compat_sched_config __user *u_p = u_config;
++	int n, ret;
++
++	if (u_config == NULL)
++		return -EFAULT;
++
++	if (policy == SCHED_QUOTA) {
++		if (u_len < sizeof(u_p->quota))
++			return -EINVAL;
++		return cobalt_copy_to_user(&u_p->quota.info, &config->quota.info,
++					      sizeof(u_p->quota.info)) ?:
++			sizeof(u_p->quota.info);
++	}
++
++	/* SCHED_TP */
++
++	if (u_len < compat_sched_tp_confsz(config->tp.nr_windows))
++		return -ENOSPC;
++
++	__xn_put_user(config->tp.op, &u_p->tp.op);
++	__xn_put_user(config->tp.nr_windows, &u_p->tp.nr_windows);
++
++	for (n = 0, ret = 0; n < config->tp.nr_windows; n++) {
++		ret |= __xn_put_user(config->tp.windows[n].ptid,
++				     &u_p->tp.windows[n].ptid);
++		ret |= __xn_put_user(config->tp.windows[n].offset.tv_sec,
++				     &u_p->tp.windows[n].offset.tv_sec);
++		ret |= __xn_put_user(config->tp.windows[n].offset.tv_nsec,
++				     &u_p->tp.windows[n].offset.tv_nsec);
++		ret |= __xn_put_user(config->tp.windows[n].duration.tv_sec,
++				     &u_p->tp.windows[n].duration.tv_sec);
++		ret |= __xn_put_user(config->tp.windows[n].duration.tv_nsec,
++				     &u_p->tp.windows[n].duration.tv_nsec);
++	}
++
++	return ret ?: u_len;
++}
++
++COBALT_SYSCALL32emu(sched_setconfig_np, conforming,
++		    (int cpu, int policy,
++		     union compat_sched_config __user *u_config,
++		     size_t len))
++{
++	return __cobalt_sched_setconfig_np(cpu, policy, u_config, len,
++					   sys32_fetch_config, sys32_ack_config);
++}
++
++COBALT_SYSCALL32emu(sched_getconfig_np, conformin,
++		    (int cpu, int policy,
++		     union compat_sched_config __user *u_config,
++		     size_t len))
++{
++	return __cobalt_sched_getconfig_np(cpu, policy, u_config, len,
++					   sys32_fetch_config, sys32_put_config);
++}
++
++COBALT_SYSCALL32emu(sched_setscheduler_ex, conforming,
++		    (compat_pid_t pid,
++		     int policy,
++		     const struct compat_sched_param_ex __user *u_param_ex,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted))
++{
++	struct sched_param_ex param_ex;
++	int ret;
++
++	ret = sys32_get_param_ex(policy, &param_ex, u_param_ex);
++	if (ret)
++		return ret;
++
++	return cobalt_sched_setscheduler_ex(pid, policy, &param_ex,
++					    u_winoff, u_promoted);
++}
++
++COBALT_SYSCALL32emu(sched_getscheduler_ex, current,
++		    (compat_pid_t pid,
++		     int __user *u_policy,
++		     struct compat_sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++	int ret, policy;
++
++	ret = cobalt_sched_getscheduler_ex(pid, &policy, &param_ex);
++	if (ret)
++		return ret;
++
++	ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy));
++
++	return ret ?: sys32_put_param_ex(policy, u_param, &param_ex);
++}
++
++COBALT_SYSCALL32emu(timer_create, current,
++		    (clockid_t clock,
++		     const struct compat_sigevent __user *u_sev,
++		     timer_t __user *u_tm))
++{
++	struct sigevent sev, *evp = NULL;
++	int ret;
++
++	if (u_sev) {
++		evp = &sev;
++		ret = sys32_get_sigevent(&sev, u_sev);
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_timer_create(clock, evp, u_tm);
++}
++
++COBALT_SYSCALL32emu(timer_settime, primary,
++		    (timer_t tm, int flags,
++		     const struct compat_itimerspec __user *u_newval,
++		     struct compat_itimerspec __user *u_oldval))
++{
++	struct itimerspec newv, oldv, *oldvp = &oldv;
++	int ret;
++
++	if (u_oldval == NULL)
++		oldvp = NULL;
++
++	ret = sys32_get_itimerspec(&newv, u_newval);
++	if (ret)
++		return ret;
++
++	ret = __cobalt_timer_settime(tm, flags, &newv, oldvp);
++	if (ret)
++		return ret;
++
++	if (oldvp) {
++		ret = sys32_put_itimerspec(u_oldval, oldvp);
++		if (ret)
++			__cobalt_timer_settime(tm, flags, oldvp, NULL);
++	}
++
++	return ret;
++}
++
++COBALT_SYSCALL32emu(timer_gettime, current,
++		    (timer_t tm, struct compat_itimerspec __user *u_val))
++{
++	struct itimerspec val;
++	int ret;
++
++	ret = __cobalt_timer_gettime(tm, &val);
++
++	return ret ?: sys32_put_itimerspec(u_val, &val);
++}
++
++COBALT_SYSCALL32emu(timerfd_settime, primary,
++		    (int fd, int flags,
++		     const struct compat_itimerspec __user *new_value,
++		     struct compat_itimerspec __user *old_value))
++{
++	struct itimerspec ovalue, value;
++	int ret;
++
++	ret = sys32_get_itimerspec(&value, new_value);
++	if (ret)
++		return ret;
++
++	ret = __cobalt_timerfd_settime(fd, flags, &value, &ovalue);
++	if (ret)
++		return ret;
++
++	if (old_value) {
++		ret = sys32_put_itimerspec(old_value, &ovalue);
++		value.it_value.tv_sec = 0;
++		value.it_value.tv_nsec = 0;
++		__cobalt_timerfd_settime(fd, flags, &value, NULL);
++	}
++
++	return ret;
++}
++
++COBALT_SYSCALL32emu(timerfd_gettime, current,
++		    (int fd, struct compat_itimerspec __user *curr_value))
++{
++	struct itimerspec value;
++	int ret;
++
++	ret = __cobalt_timerfd_gettime(fd, &value);
++
++	return ret ?: sys32_put_itimerspec(curr_value, &value);
++}
++
++COBALT_SYSCALL32emu(sigwait, primary,
++		    (const compat_sigset_t __user *u_set,
++		     int __user *u_sig))
++{
++	sigset_t set;
++	int ret, sig;
++
++	ret = sys32_get_sigset(&set, u_set);
++	if (ret)
++		return ret;
++
++	sig = __cobalt_sigwait(&set);
++	if (sig < 0)
++		return sig;
++
++	return cobalt_copy_to_user(u_sig, &sig, sizeof(*u_sig));
++}
++
++COBALT_SYSCALL32emu(sigtimedwait, nonrestartable,
++		    (const compat_sigset_t __user *u_set,
++		     struct compat_siginfo __user *u_si,
++		     const struct compat_timespec __user *u_timeout))
++{
++	struct timespec timeout;
++	sigset_t set;
++	int ret;
++
++	ret = sys32_get_sigset(&set, u_set);
++	if (ret)
++		return ret;
++
++	ret = sys32_get_timespec(&timeout, u_timeout);
++	if (ret)
++		return ret;
++
++	return __cobalt_sigtimedwait(&set, &timeout, u_si, true);
++}
++
++COBALT_SYSCALL32emu(sigwaitinfo, nonrestartable,
++		    (const compat_sigset_t __user *u_set,
++		     struct compat_siginfo __user *u_si))
++{
++	sigset_t set;
++	int ret;
++
++	ret = sys32_get_sigset(&set, u_set);
++	if (ret)
++		return ret;
++
++	return __cobalt_sigwaitinfo(&set, u_si, true);
++}
++
++COBALT_SYSCALL32emu(sigpending, primary, (compat_old_sigset_t __user *u_set))
++{
++	struct cobalt_thread *curr = cobalt_current_thread();
++
++	return sys32_put_sigset((compat_sigset_t *)u_set, &curr->sigpending);
++}
++
++COBALT_SYSCALL32emu(sigqueue, conforming,
++		    (pid_t pid, int sig,
++		     const union compat_sigval __user *u_value))
++{
++	union sigval val;
++	int ret;
++
++	ret = sys32_get_sigval(&val, u_value);
++
++	return ret ?: __cobalt_sigqueue(pid, sig, &val);
++}
++
++COBALT_SYSCALL32emu(monitor_wait, nonrestartable,
++		    (struct cobalt_monitor_shadow __user *u_mon,
++		     int event, const struct compat_timespec __user *u_ts,
++		     int __user *u_ret))
++{
++	struct timespec ts, *tsp = NULL;
++	int ret;
++
++	if (u_ts) {
++		tsp = &ts;
++		ret = sys32_get_timespec(&ts, u_ts);
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_monitor_wait(u_mon, event, tsp, u_ret);
++}
++
++COBALT_SYSCALL32emu(event_wait, primary,
++		    (struct cobalt_event_shadow __user *u_event,
++		     unsigned int bits,
++		     unsigned int __user *u_bits_r,
++		     int mode, const struct compat_timespec __user *u_ts))
++{
++	struct timespec ts, *tsp = NULL;
++	int ret;
++
++	if (u_ts) {
++		tsp = &ts;
++		ret = sys32_get_timespec(&ts, u_ts);
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_event_wait(u_event, bits, u_bits_r, mode, tsp);
++}
++
++COBALT_SYSCALL32emu(select, nonrestartable,
++		    (int nfds,
++		     compat_fd_set __user *u_rfds,
++		     compat_fd_set __user *u_wfds,
++		     compat_fd_set __user *u_xfds,
++		     struct compat_timeval __user *u_tv))
++{
++	compat_fd_set __user *ufd_sets[XNSELECT_MAX_TYPES] = {
++		[XNSELECT_READ] = u_rfds,
++		[XNSELECT_WRITE] = u_wfds,
++		[XNSELECT_EXCEPT] = u_xfds
++	};
++	fd_set *in_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL};
++	fd_set *out_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL};
++	fd_set in_fds_storage[XNSELECT_MAX_TYPES],
++		out_fds_storage[XNSELECT_MAX_TYPES];
++	xnticks_t timeout = XN_INFINITE;
++	xntmode_t mode = XN_RELATIVE;
++	struct xnselector *selector;
++	struct xnthread *curr;
++	struct timeval tv;
++	xnsticks_t diff;
++	size_t fds_size;
++	int i, err;
++
++	curr = xnthread_current();
++
++	if (u_tv) {
++		err = sys32_get_timeval(&tv, u_tv);
++		if (err)
++			return err;
++
++		if (tv.tv_usec >= 1000000)
++			return -EINVAL;
++
++		timeout = clock_get_ticks(CLOCK_MONOTONIC) + tv2ns(&tv);
++		mode = XN_ABSOLUTE;
++	}
++
++	fds_size = __FDELT__(nfds + __NFDBITS__ - 1) * sizeof(compat_ulong_t);
++
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (ufd_sets[i]) {
++			in_fds[i] = &in_fds_storage[i];
++			out_fds[i] = & out_fds_storage[i];
++			if (sys32_get_fdset(in_fds[i], ufd_sets[i], fds_size) < 0)
++				return -EFAULT;
++		}
++
++	selector = curr->selector;
++	if (selector == NULL) {
++		/* Bail out if non-RTDM fildes is found. */
++		if (!__cobalt_first_fd_valid_p(in_fds, nfds))
++			return -EBADF;
++
++		selector = xnmalloc(sizeof(*curr->selector));
++		if (selector == NULL)
++			return -ENOMEM;
++		xnselector_init(selector);
++		curr->selector = selector;
++
++		/* Bind directly the file descriptors, we do not need to go
++		   through xnselect returning -ECHRNG */
++		err = __cobalt_select_bind_all(selector, in_fds, nfds);
++		if (err)
++			return err;
++	}
++
++	do {
++		err = xnselect(selector, out_fds, in_fds, nfds, timeout, mode);
++		if (err == -ECHRNG) {
++			int err = __cobalt_select_bind_all(selector, out_fds, nfds);
++			if (err)
++				return err;
++		}
++	} while (err == -ECHRNG);
++
++	if (u_tv && (err > 0 || err == -EINTR)) {
++		diff = timeout - clock_get_ticks(CLOCK_MONOTONIC);
++		if (diff > 0)
++			ticks2tv(&tv, diff);
++		else
++			tv.tv_sec = tv.tv_usec = 0;
++
++		if (sys32_put_timeval(u_tv, &tv))
++			return -EFAULT;
++	}
++
++	if (err >= 0)
++		for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++			if (ufd_sets[i] &&
++			    sys32_put_fdset(ufd_sets[i], out_fds[i],
++					    sizeof(fd_set)) < 0)
++				return -EFAULT;
++	return err;
++}
++
++COBALT_SYSCALL32emu(recvmsg, handover,
++		    (int fd, struct compat_msghdr __user *umsg,
++		     int flags))
++{
++	struct user_msghdr m;
++	ssize_t ret;
++
++	ret = sys32_get_msghdr(&m, umsg);
++	if (ret)
++		return ret;
++
++	ret = rtdm_fd_recvmsg(fd, &m, flags);
++	if (ret < 0)
++		return ret;
++
++	return sys32_put_msghdr(umsg, &m) ?: ret;
++}
++
++static int get_timespec32(struct timespec *ts,
++			  const void __user *u_ts)
++{
++	return sys32_get_timespec(ts, u_ts);
++}
++
++static int get_mmsg32(struct mmsghdr *mmsg, void __user *u_mmsg)
++{
++	return sys32_get_mmsghdr(mmsg, u_mmsg);
++}
++
++static int put_mmsg32(void __user **u_mmsg_p, const struct mmsghdr *mmsg)
++{
++	struct compat_mmsghdr __user **p = (struct compat_mmsghdr **)u_mmsg_p,
++		*q __user = (*p)++;
++
++	return sys32_put_mmsghdr(q, mmsg);
++}
++
++COBALT_SYSCALL32emu(recvmmsg, primary,
++	       (int ufd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen,
++		unsigned int flags, struct compat_timespec *u_timeout))
++{
++	return __rtdm_fd_recvmmsg(ufd, u_msgvec, vlen, flags, u_timeout,
++				  get_mmsg32, put_mmsg32,
++				  get_timespec32);
++}
++
++COBALT_SYSCALL32emu(sendmsg, handover,
++		    (int fd, struct compat_msghdr __user *umsg, int flags))
++{
++	struct user_msghdr m;
++	int ret;
++
++	ret = sys32_get_msghdr(&m, umsg);
++
++	return ret ?: rtdm_fd_sendmsg(fd, &m, flags);
++}
++
++static int put_mmsglen32(void __user **u_mmsg_p, const struct mmsghdr *mmsg)
++{
++	struct compat_mmsghdr __user **p = (struct compat_mmsghdr **)u_mmsg_p,
++		*q __user = (*p)++;
++
++	return __xn_put_user(mmsg->msg_len, &q->msg_len);
++}
++
++COBALT_SYSCALL32emu(sendmmsg, primary,
++		    (int fd, struct compat_mmsghdr __user *u_msgvec, unsigned int vlen,
++		     unsigned int flags))
++{
++	return __rtdm_fd_sendmmsg(fd, u_msgvec, vlen, flags,
++				  get_mmsg32, put_mmsglen32);
++}
++
++COBALT_SYSCALL32emu(mmap, lostage,
++		    (int fd, struct compat_rtdm_mmap_request __user *u_crma,
++		     compat_uptr_t __user *u_caddrp))
++{
++	struct _rtdm_mmap_request rma;
++	compat_uptr_t u_caddr;
++	void *u_addr = NULL;
++	int ret;
++
++	if (u_crma == NULL ||
++	    !access_rok(u_crma, sizeof(*u_crma)) ||
++	    __xn_get_user(rma.length, &u_crma->length) ||
++	    __xn_get_user(rma.offset, &u_crma->offset) ||
++	    __xn_get_user(rma.prot, &u_crma->prot) ||
++	    __xn_get_user(rma.flags, &u_crma->flags))
++	  return -EFAULT;
++
++	ret = rtdm_fd_mmap(fd, &rma, &u_addr);
++	if (ret)
++		return ret;
++
++	u_caddr = ptr_to_compat(u_addr);
++
++	return cobalt_copy_to_user(u_caddrp, &u_caddr, sizeof(u_caddr));
++}
++
++COBALT_SYSCALL32emu(backtrace, current,
++		    (int nr, compat_ulong_t __user *u_backtrace,
++		     int reason))
++{
++	compat_ulong_t cbacktrace[SIGSHADOW_BACKTRACE_DEPTH];
++	unsigned long backtrace[SIGSHADOW_BACKTRACE_DEPTH];
++	int ret, n;
++
++	if (nr <= 0)
++		return 0;
++
++	if (nr > SIGSHADOW_BACKTRACE_DEPTH)
++		nr = SIGSHADOW_BACKTRACE_DEPTH;
++
++	ret = cobalt_copy_from_user(cbacktrace, u_backtrace,
++				       nr * sizeof(compat_ulong_t));
++	if (ret)
++		return ret;
++
++	for (n = 0; n < nr; n++)
++		backtrace [n] = cbacktrace[n];
++
++	xndebug_trace_relax(nr, backtrace, reason);
++
++	return 0;
++}
++
++#ifdef COBALT_SYSCALL32x
++
++COBALT_SYSCALL32x(mq_timedreceive, primary,
++		  (mqd_t uqd, void __user *u_buf,
++		   compat_ssize_t __user *u_len,
++		   unsigned int __user *u_prio,
++		   const struct timespec __user *u_ts))
++{
++	compat_ssize_t clen;
++	ssize_t len;
++	int ret;
++
++	ret = cobalt_copy_from_user(&clen, u_len, sizeof(*u_len));
++	if (ret)
++		return ret;
++
++	len = clen;
++	ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio,
++				       u_ts, u_ts ? mq_fetch_timeout : NULL);
++	clen = len;
++
++	return ret ?: cobalt_copy_to_user(u_len, &clen, sizeof(*u_len));
++}
++
++#endif /* COBALT_SYSCALL32x */
+--- linux/kernel/xenomai/posix/event.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/event.c	2021-04-07 16:01:26.093635787 +0800
+@@ -0,0 +1,387 @@
++/*
++ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include "internal.h"
++#include "thread.h"
++#include "clock.h"
++#include "event.h"
++#include <trace/events/cobalt-posix.h>
++
++/*
++ * Cobalt event notification services
++ *
++ * An event flag group is a synchronization object represented by a
++ * regular native integer; every available bit in such word can be
++ * used to map a user-defined event flag.  When a flag is set, the
++ * associated event is said to have occurred.
++ *
++ * Xenomai threads and interrupt handlers can use event flags to
++ * signal the occurrence of events to other threads; those threads can
++ * either wait for the events to occur in a conjunctive manner (all
++ * awaited events must have occurred to wake up), or in a disjunctive
++ * way (at least one of the awaited events must have occurred to wake
++ * up).
++ *
++ * We expose this non-POSIX feature through the internal API, as a
++ * fast IPC mechanism available to the Copperplate interface.
++ */
++
++struct event_wait_context {
++	struct xnthread_wait_context wc;
++	unsigned int value;
++	int mode;
++};
++
++COBALT_SYSCALL(event_init, current,
++	       (struct cobalt_event_shadow __user *u_event,
++		unsigned int value, int flags))
++{
++	struct cobalt_event_shadow shadow;
++	struct cobalt_event_state *state;
++	int pshared, synflags, ret;
++	struct cobalt_event *event;
++	struct cobalt_umm *umm;
++	unsigned long stateoff;
++	spl_t s;
++
++	trace_cobalt_event_init(u_event, value, flags);
++
++	event = xnmalloc(sizeof(*event));
++	if (event == NULL)
++		return -ENOMEM;
++
++	pshared = (flags & COBALT_EVENT_SHARED) != 0;
++	umm = &cobalt_ppd_get(pshared)->umm;
++	state = cobalt_umm_alloc(umm, sizeof(*state));
++	if (state == NULL) {
++		xnfree(event);
++		return -EAGAIN;
++	}
++
++	ret = xnregistry_enter_anon(event, &event->resnode.handle);
++	if (ret) {
++		cobalt_umm_free(umm, state);
++		xnfree(event);
++		return ret;
++	}
++
++	event->state = state;
++	event->flags = flags;
++	synflags = (flags & COBALT_EVENT_PRIO) ? XNSYNCH_PRIO : XNSYNCH_FIFO;
++	xnsynch_init(&event->synch, synflags, NULL);
++	state->value = value;
++	state->flags = 0;
++	state->nwaiters = 0;
++	stateoff = cobalt_umm_offset(umm, state);
++	XENO_BUG_ON(COBALT, stateoff != (__u32)stateoff);
++
++	xnlock_get_irqsave(&nklock, s);
++	cobalt_add_resource(&event->resnode, event, pshared);
++	event->magic = COBALT_EVENT_MAGIC;
++	xnlock_put_irqrestore(&nklock, s);
++
++	shadow.flags = flags;
++	shadow.handle = event->resnode.handle;
++	shadow.state_offset = (__u32)stateoff;
++
++	return cobalt_copy_to_user(u_event, &shadow, sizeof(*u_event));
++}
++
++int __cobalt_event_wait(struct cobalt_event_shadow __user *u_event,
++			unsigned int bits,
++			unsigned int __user *u_bits_r,
++			int mode, const struct timespec *ts)
++{
++	unsigned int rbits = 0, testval;
++	xnticks_t timeout = XN_INFINITE;
++	struct cobalt_event_state *state;
++	xntmode_t tmode = XN_RELATIVE;
++	struct event_wait_context ewc;
++	struct cobalt_event *event;
++	xnhandle_t handle;
++	int ret = 0, info;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_event->handle);
++
++	if (ts) {
++		if ((unsigned long)ts->tv_nsec >= ONE_BILLION)
++			return -EINVAL;
++	
++		timeout = ts2ns(ts);
++		if (timeout) {
++			timeout++;
++			tmode = XN_ABSOLUTE;
++		} else
++			timeout = XN_NONBLOCK;
++		trace_cobalt_event_timedwait(u_event, bits, mode, ts);
++	} else
++		trace_cobalt_event_wait(u_event, bits, mode);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	event = xnregistry_lookup(handle, NULL);
++	if (event == NULL || event->magic != COBALT_EVENT_MAGIC) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	state = event->state;
++
++	if (bits == 0) {
++		/*
++		 * Special case: we don't wait for any event, we only
++		 * return the current flag group value.
++		 */
++		rbits = state->value;
++		goto out;
++	}
++
++	state->flags |= COBALT_EVENT_PENDED;
++	rbits = state->value & bits;
++	testval = mode & COBALT_EVENT_ANY ? rbits : bits;
++	if (rbits && rbits == testval)
++		goto done;
++
++	if (timeout == XN_NONBLOCK) {
++		ret = -EWOULDBLOCK;
++		goto done;
++	}
++
++	ewc.value = bits;
++	ewc.mode = mode;
++	xnthread_prepare_wait(&ewc.wc);
++	state->nwaiters++;
++	info = xnsynch_sleep_on(&event->synch, timeout, tmode);
++	if (info & XNRMID) {
++		ret = -EIDRM;
++		goto out;
++	}
++	if (info & (XNBREAK|XNTIMEO)) {
++		state->nwaiters--;
++		ret = (info & XNBREAK) ? -EINTR : -ETIMEDOUT;
++	} else
++		rbits = ewc.value;
++done:
++	if (!xnsynch_pended_p(&event->synch))
++		state->flags &= ~COBALT_EVENT_PENDED;
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (ret == 0 &&
++	    cobalt_copy_to_user(u_bits_r, &rbits, sizeof(rbits)))
++		return -EFAULT;
++
++	return ret;
++}
++
++COBALT_SYSCALL(event_wait, primary,
++	       (struct cobalt_event_shadow __user *u_event,
++		unsigned int bits,
++		unsigned int __user *u_bits_r,
++		int mode, const struct timespec __user *u_ts))
++{
++	struct timespec ts, *tsp = NULL;
++	int ret;
++
++	if (u_ts) {
++		tsp = &ts;
++		ret = cobalt_copy_from_user(&ts, u_ts, sizeof(ts));
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_event_wait(u_event, bits, u_bits_r, mode, tsp);
++}
++
++COBALT_SYSCALL(event_sync, current,
++	       (struct cobalt_event_shadow __user *u_event))
++{
++	unsigned int bits, waitval, testval;
++	struct xnthread_wait_context *wc;
++	struct cobalt_event_state *state;
++	struct event_wait_context *ewc;
++	struct cobalt_event *event;
++	struct xnthread *p, *tmp;
++	xnhandle_t handle;
++	int ret = 0;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_event->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	event = xnregistry_lookup(handle, NULL);
++	if (event == NULL || event->magic != COBALT_EVENT_MAGIC) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	/*
++	 * Userland has already updated the bitmask, our job is to
++	 * wake up any thread which could be satisfied by its current
++	 * value.
++	 */
++	state = event->state;
++	bits = state->value;
++
++	xnsynch_for_each_sleeper_safe(p, tmp, &event->synch) {
++		wc = xnthread_get_wait_context(p);
++		ewc = container_of(wc, struct event_wait_context, wc);
++		waitval = ewc->value & bits;
++		testval = ewc->mode & COBALT_EVENT_ANY ? waitval : ewc->value;
++		if (waitval && waitval == testval) {
++			state->nwaiters--;
++			ewc->value = waitval;
++			xnsynch_wakeup_this_sleeper(&event->synch, p);
++		}
++	}
++
++	xnsched_run();
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(event_destroy, current,
++	       (struct cobalt_event_shadow __user *u_event))
++{
++	struct cobalt_event *event;
++	xnhandle_t handle;
++	spl_t s;
++
++	trace_cobalt_event_destroy(u_event);
++
++	handle = cobalt_get_handle_from_user(&u_event->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	event = xnregistry_lookup(handle, NULL);
++	if (event == NULL || event->magic != COBALT_EVENT_MAGIC) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EINVAL;
++	}
++
++	cobalt_event_reclaim(&event->resnode, s); /* drops lock */
++	
++	return 0;
++}
++
++COBALT_SYSCALL(event_inquire, current,
++	       (struct cobalt_event_shadow __user *u_event,
++		struct cobalt_event_info __user *u_info,
++		pid_t __user *u_waitlist,
++		size_t waitsz))
++{
++	int nrpend = 0, nrwait = 0, nrpids, ret = 0;
++	unsigned long pstamp, nstamp = 0;
++	struct cobalt_event_info info;
++	struct cobalt_event *event;
++	pid_t *t = NULL, fbuf[16];
++	struct xnthread *thread;
++	xnhandle_t handle;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_event->handle);
++
++	nrpids = waitsz / sizeof(pid_t);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	for (;;) {
++		pstamp = nstamp;
++		event = xnregistry_lookup(handle, &nstamp);
++		if (event == NULL || event->magic != COBALT_EVENT_MAGIC) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EINVAL;
++		}
++		/*
++		 * Allocate memory to return the wait list without
++		 * holding any lock, then revalidate the handle.
++		 */
++		if (t == NULL) {
++			nrpend = 0;
++			if (!xnsynch_pended_p(&event->synch))
++				break;
++			xnsynch_for_each_sleeper(thread, &event->synch)
++				nrpend++;
++			if (u_waitlist == NULL)
++				break;
++			xnlock_put_irqrestore(&nklock, s);
++			if (nrpids > nrpend)
++				nrpids = nrpend;
++			if (nrpend <= ARRAY_SIZE(fbuf))
++				t = fbuf; /* Use fast buffer. */
++			else {
++				t = xnmalloc(nrpend * sizeof(pid_t));
++				if (t == NULL)
++					return -ENOMEM;
++			}
++			xnlock_get_irqsave(&nklock, s);
++		} else if (pstamp == nstamp)
++			break;
++		else {
++			xnlock_put_irqrestore(&nklock, s);
++			if (t != fbuf)
++				xnfree(t);
++			t = NULL;
++			xnlock_get_irqsave(&nklock, s);
++		}
++	}
++
++	info.flags = event->flags;
++	info.value = event->value;
++	info.nrwait = nrpend;
++
++	if (xnsynch_pended_p(&event->synch) && u_waitlist != NULL) {
++		xnsynch_for_each_sleeper(thread, &event->synch) {
++			if (nrwait >= nrpids)
++				break;
++			t[nrwait++] = xnthread_host_pid(thread);
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	ret = cobalt_copy_to_user(u_info, &info, sizeof(info));
++	if (ret == 0 && nrwait > 0)
++		ret = cobalt_copy_to_user(u_waitlist, t, nrwait * sizeof(pid_t));
++
++	if (t && t != fbuf)
++		xnfree(t);
++
++	return ret ?: nrwait;
++}
++
++void cobalt_event_reclaim(struct cobalt_resnode *node, spl_t s)
++{
++	struct cobalt_event *event;
++	struct cobalt_umm *umm;
++	int pshared;
++
++	event = container_of(node, struct cobalt_event, resnode);
++	xnregistry_remove(node->handle);
++	cobalt_del_resource(node);
++	xnsynch_destroy(&event->synch);
++	pshared = (event->flags & COBALT_EVENT_SHARED) != 0;
++	xnlock_put_irqrestore(&nklock, s);
++
++	umm = &cobalt_ppd_get(pshared)->umm;
++	cobalt_umm_free(umm, event->state);
++	xnfree(event);
++}
+--- linux/kernel/xenomai/posix/event.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/event.h	2021-04-07 16:01:26.088635794 +0800
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_POSIX_EVENT_H
++#define _COBALT_POSIX_EVENT_H
++
++#include <cobalt/kernel/synch.h>
++#include <cobalt/uapi/event.h>
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/process.h>
++
++struct cobalt_resources;
++struct cobalt_process;
++
++struct cobalt_event {
++	unsigned int magic;
++	unsigned int value;
++	int flags;
++	struct xnsynch synch;
++	struct cobalt_event_state *state;
++	struct cobalt_resnode resnode;
++};
++
++int __cobalt_event_wait(struct cobalt_event_shadow __user *u_event,
++			unsigned int bits,
++			unsigned int __user *u_bits_r,
++			int mode, const struct timespec *ts);
++
++COBALT_SYSCALL_DECL(event_init,
++		    (struct cobalt_event_shadow __user *u_evtsh,
++		     unsigned int value,
++		     int flags));
++
++COBALT_SYSCALL_DECL(event_wait,
++		    (struct cobalt_event_shadow __user *u_evtsh,
++		     unsigned int bits,
++		     unsigned int __user *u_bits_r,
++		     int mode,
++		     const struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(event_sync,
++		    (struct cobalt_event_shadow __user *u_evtsh));
++
++COBALT_SYSCALL_DECL(event_destroy,
++		    (struct cobalt_event_shadow __user *u_evtsh));
++
++COBALT_SYSCALL_DECL(event_inquire,
++		    (struct cobalt_event_shadow __user *u_event,
++		     struct cobalt_event_info __user *u_info,
++		     pid_t __user *u_waitlist,
++		     size_t waitsz));
++
++void cobalt_event_reclaim(struct cobalt_resnode *node,
++			  spl_t s);
++
++#endif /* !_COBALT_POSIX_EVENT_H */
+--- linux/kernel/xenomai/posix/memory.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/memory.h	2021-04-07 16:01:26.084635799 +0800
+@@ -0,0 +1,61 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_MEMORY_H
++#define _COBALT_POSIX_MEMORY_H
++
++#include <cobalt/kernel/ppd.h>
++
++#define cobalt_umm_set_name(__umm, __fmt, __args...)	\
++	xnheap_set_name(&(__umm)->heap, (__fmt), ## __args)
++
++static inline
++void *cobalt_umm_alloc(struct cobalt_umm *umm, __u32 size)
++{
++	return xnheap_alloc(&umm->heap, size);
++}
++
++static inline
++void *cobalt_umm_zalloc(struct cobalt_umm *umm, __u32 size)
++{
++	return xnheap_zalloc(&umm->heap, size);
++}
++
++static inline
++void cobalt_umm_free(struct cobalt_umm *umm, void *p)
++{
++	xnheap_free(&umm->heap, p);
++}
++
++static inline
++__u32 cobalt_umm_offset(struct cobalt_umm *umm, void *p)
++{
++	return p - xnheap_get_membase(&umm->heap);
++}
++
++int cobalt_memdev_init(void);
++
++void cobalt_memdev_cleanup(void);
++
++int cobalt_umm_init(struct cobalt_umm *umm, u32 size,
++		    void (*release)(struct cobalt_umm *umm));
++
++void cobalt_umm_destroy(struct cobalt_umm *umm);
++
++#endif /* !_COBALT_POSIX_MEMORY_H */
+--- linux/kernel/xenomai/posix/clock.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/clock.h	2021-04-07 16:01:26.079635806 +0800
+@@ -0,0 +1,125 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_CLOCK_H
++#define _COBALT_POSIX_CLOCK_H
++
++#include <linux/types.h>
++#include <linux/time.h>
++#include <linux/cpumask.h>
++#include <cobalt/uapi/time.h>
++#include <xenomai/posix/syscall.h>
++
++#define ONE_BILLION             1000000000
++
++struct xnclock;
++
++static inline void ns2ts(struct timespec *ts, xnticks_t nsecs)
++{
++	ts->tv_sec = xnclock_divrem_billion(nsecs, &ts->tv_nsec);
++}
++
++static inline xnticks_t ts2ns(const struct timespec *ts)
++{
++	xnticks_t nsecs = ts->tv_nsec;
++
++	if (ts->tv_sec)
++		nsecs += (xnticks_t)ts->tv_sec * ONE_BILLION;
++
++	return nsecs;
++}
++
++static inline xnticks_t tv2ns(const struct timeval *tv)
++{
++	xnticks_t nsecs = tv->tv_usec * 1000;
++
++	if (tv->tv_sec)
++		nsecs += (xnticks_t)tv->tv_sec * ONE_BILLION;
++
++	return nsecs;
++}
++
++static inline void ticks2tv(struct timeval *tv, xnticks_t ticks)
++{
++	unsigned long nsecs;
++
++	tv->tv_sec = xnclock_divrem_billion(ticks, &nsecs);
++	tv->tv_usec = nsecs / 1000;
++}
++
++static inline xnticks_t clock_get_ticks(clockid_t clock_id)
++{
++	return clock_id == CLOCK_REALTIME ?
++		xnclock_read_realtime(&nkclock) :
++		xnclock_read_monotonic(&nkclock);
++}
++
++static inline int clock_flag(int flag, clockid_t clock_id)
++{
++	if ((flag & TIMER_ABSTIME) == 0)
++		return XN_RELATIVE;
++
++	if (clock_id == CLOCK_REALTIME)
++		return XN_REALTIME;
++
++	return XN_ABSOLUTE;
++}
++
++int __cobalt_clock_getres(clockid_t clock_id,
++			  struct timespec *ts);
++
++int __cobalt_clock_gettime(clockid_t clock_id,
++			   struct timespec *ts);
++
++int __cobalt_clock_settime(clockid_t clock_id,
++			   const struct timespec *ts);
++
++int __cobalt_clock_adjtime(clockid_t clock_id,
++			   struct timex *tx);
++
++int __cobalt_clock_nanosleep(clockid_t clock_id, int flags,
++			     const struct timespec *rqt,
++			     struct timespec *rmt);
++
++COBALT_SYSCALL_DECL(clock_getres,
++		    (clockid_t clock_id, struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(clock_gettime,
++		    (clockid_t clock_id, struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(clock_settime,
++		    (clockid_t clock_id, const struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(clock_adjtime,
++		    (clockid_t clock_id, struct timex __user *u_tx));
++
++COBALT_SYSCALL_DECL(clock_nanosleep,
++		    (clockid_t clock_id, int flags,
++		     const struct timespec __user *u_rqt,
++		     struct timespec __user *u_rmt));
++
++int cobalt_clock_register(struct xnclock *clock,
++			  const cpumask_t *affinity,
++			  clockid_t *clk_id);
++
++void cobalt_clock_deregister(struct xnclock *clock);
++
++struct xnclock *cobalt_clock_find(clockid_t clock_id);
++
++extern DECLARE_BITMAP(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS);
++
++#endif /* !_COBALT_POSIX_CLOCK_H */
+--- linux/kernel/xenomai/posix/mutex.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/mutex.c	2021-04-07 16:01:26.074635814 +0800
+@@ -0,0 +1,421 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include "internal.h"
++#include "thread.h"
++#include "mutex.h"
++#include "cond.h"
++#include "clock.h"
++
++static int cobalt_mutex_init_inner(struct cobalt_mutex_shadow *shadow,
++				   struct cobalt_mutex *mutex,
++				   struct cobalt_mutex_state *state,
++				   const struct cobalt_mutexattr *attr)
++{
++	int synch_flags = XNSYNCH_PRIO | XNSYNCH_OWNER;
++	struct cobalt_umm *umm;
++	spl_t s;
++	int ret;
++
++	ret = xnregistry_enter_anon(mutex, &mutex->resnode.handle);
++	if (ret < 0)
++		return ret;
++
++	umm = &cobalt_ppd_get(attr->pshared)->umm;
++	shadow->handle = mutex->resnode.handle;
++	shadow->magic = COBALT_MUTEX_MAGIC;
++	shadow->lockcnt = 0;
++	shadow->attr = *attr;
++	shadow->state_offset = cobalt_umm_offset(umm, state);
++
++	mutex->magic = COBALT_MUTEX_MAGIC;
++
++	if (attr->protocol == PTHREAD_PRIO_PROTECT) {
++		state->ceiling = attr->ceiling + 1;
++		xnsynch_init_protect(&mutex->synchbase, synch_flags,
++				     &state->owner, &state->ceiling);
++	} else {
++		state->ceiling = 0;
++		if (attr->protocol == PTHREAD_PRIO_INHERIT)
++			synch_flags |= XNSYNCH_PI;
++		xnsynch_init(&mutex->synchbase, synch_flags, &state->owner);
++	}
++
++	state->flags = (attr->type == PTHREAD_MUTEX_ERRORCHECK
++			? COBALT_MUTEX_ERRORCHECK : 0);
++	mutex->attr = *attr;
++	INIT_LIST_HEAD(&mutex->conds);
++
++	xnlock_get_irqsave(&nklock, s);
++	cobalt_add_resource(&mutex->resnode, mutex, attr->pshared);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++/* must be called with nklock locked, interrupts off. */
++int __cobalt_mutex_acquire_unchecked(struct xnthread *cur,
++				     struct cobalt_mutex *mutex,
++				     const struct timespec *ts)
++{
++	int ret;
++
++	if (ts) {
++		if (ts->tv_nsec >= ONE_BILLION)
++			return -EINVAL;
++		ret = xnsynch_acquire(&mutex->synchbase, ts2ns(ts) + 1, XN_REALTIME);
++	} else
++		ret = xnsynch_acquire(&mutex->synchbase, XN_INFINITE, XN_RELATIVE);
++
++	if (ret) {
++		if (ret & XNBREAK)
++			return -EINTR;
++		if (ret & XNTIMEO)
++			return -ETIMEDOUT;
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++int cobalt_mutex_release(struct xnthread *curr,
++			 struct cobalt_mutex *mutex)
++{	/* nklock held, irqs off */
++	struct cobalt_mutex_state *state;
++	struct cobalt_cond *cond;
++	unsigned long flags;
++	int need_resched;
++
++	if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex))
++		 return -EINVAL;
++
++	if (mutex->resnode.scope !=
++	    cobalt_current_resources(mutex->attr.pshared))
++		return -EPERM;
++
++	/*
++	 * We are about to release a mutex which is still pending PP
++	 * (i.e. we never got scheduled out while holding it). Clear
++	 * the lazy handle.
++	 */
++	if (mutex->resnode.handle == curr->u_window->pp_pending)
++		curr->u_window->pp_pending = XN_NO_HANDLE;
++
++	state = container_of(mutex->synchbase.fastlock, struct cobalt_mutex_state, owner);
++	flags = state->flags;
++	need_resched = 0;
++	if ((flags & COBALT_MUTEX_COND_SIGNAL)) {
++		state->flags = flags & ~COBALT_MUTEX_COND_SIGNAL;
++		if (!list_empty(&mutex->conds)) {
++			list_for_each_entry(cond, &mutex->conds, mutex_link)
++				need_resched |=
++				cobalt_cond_deferred_signals(cond);
++		}
++	}
++	need_resched |= xnsynch_release(&mutex->synchbase, curr);
++
++	return need_resched;
++}
++
++int __cobalt_mutex_timedlock_break(struct cobalt_mutex_shadow __user *u_mx,
++				   const void __user *u_ts,
++				   int (*fetch_timeout)(struct timespec *ts,
++							const void __user *u_ts))
++{
++	struct xnthread *curr = xnthread_current();
++	struct timespec ts, *tsp = NULL;
++	struct cobalt_mutex *mutex;
++	xnhandle_t handle;
++	spl_t s;
++	int ret;
++
++	/* We need a valid thread handle for the fast lock. */
++	if (curr->handle == XN_NO_HANDLE)
++		return -EPERM;
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++redo:
++	xnlock_get_irqsave(&nklock, s);
++
++	mutex = xnregistry_lookup(handle, NULL);
++	if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (mutex->resnode.scope !=
++	    cobalt_current_resources(mutex->attr.pshared)) {
++		ret = -EPERM;
++		goto out;
++	}
++
++	xnthread_commit_ceiling(curr);
++
++	if (xnsynch_owner_check(&mutex->synchbase, curr)) {
++		if (fetch_timeout) {
++			xnlock_put_irqrestore(&nklock, s);
++			ret = fetch_timeout(&ts, u_ts);
++			if (ret)
++				return ret;
++
++			fetch_timeout = NULL;
++			tsp = &ts;
++			goto redo; /* Revalidate handle. */
++		}
++		ret = __cobalt_mutex_acquire_unchecked(curr, mutex, tsp);
++		xnlock_put_irqrestore(&nklock, s);
++		return ret;
++	}
++
++	/* We already own the mutex, something looks wrong. */
++
++	ret = -EBUSY;
++	switch(mutex->attr.type) {
++	case PTHREAD_MUTEX_NORMAL:
++		/* Attempting to relock a normal mutex, deadlock. */
++		if (IS_ENABLED(XENO_OPT_DEBUG_USER))
++			printk(XENO_WARNING
++			       "thread %s deadlocks on non-recursive mutex\n",
++			       curr->name);
++		/* Make the caller hang. */
++		__cobalt_mutex_acquire_unchecked(curr, mutex, NULL);
++		break;
++
++	case PTHREAD_MUTEX_ERRORCHECK:
++	case PTHREAD_MUTEX_RECURSIVE:
++		/*
++		 * Recursive mutexes are handled in user-space, so
++		 * these cases should never happen.
++		 */
++		ret = -EINVAL;
++		break;
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(mutex_check_init, current,
++	       (struct cobalt_mutex_shadow __user *u_mx))
++{
++	struct cobalt_mutex *mutex;
++	xnhandle_t handle;
++	int err;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++	mutex = xnregistry_lookup(handle, NULL);
++	if (cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex)))
++		/* mutex is already in a queue. */
++		err = -EBUSY;
++	else
++		err = 0;
++
++	xnlock_put_irqrestore(&nklock, s);
++	return err;
++}
++
++COBALT_SYSCALL(mutex_init, current,
++	       (struct cobalt_mutex_shadow __user *u_mx,
++		const struct cobalt_mutexattr __user *u_attr))
++{
++	struct cobalt_mutex_state *state;
++	struct cobalt_mutex_shadow mx;
++	struct cobalt_mutexattr attr;
++	struct cobalt_mutex *mutex;
++	int ret;
++
++	if (cobalt_copy_from_user(&mx, u_mx, sizeof(mx)))
++		return -EFAULT;
++
++	if (cobalt_copy_from_user(&attr, u_attr, sizeof(attr)))
++		return -EFAULT;
++
++	mutex = xnmalloc(sizeof(*mutex));
++	if (mutex == NULL)
++		return -ENOMEM;
++
++	state = cobalt_umm_alloc(&cobalt_ppd_get(attr.pshared)->umm,
++				 sizeof(*state));
++	if (state == NULL) {
++		xnfree(mutex);
++		return -EAGAIN;
++	}
++
++	ret = cobalt_mutex_init_inner(&mx, mutex, state, &attr);
++	if (ret) {
++		xnfree(mutex);
++		cobalt_umm_free(&cobalt_ppd_get(attr.pshared)->umm, state);
++		return ret;
++	}
++
++	return cobalt_copy_to_user(u_mx, &mx, sizeof(*u_mx));
++}
++
++COBALT_SYSCALL(mutex_destroy, current,
++	       (struct cobalt_mutex_shadow __user *u_mx))
++{
++	struct cobalt_mutex_shadow mx;
++	struct cobalt_mutex *mutex;
++	spl_t s;
++	int ret;
++
++	if (cobalt_copy_from_user(&mx, u_mx, sizeof(mx)))
++		return -EFAULT;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mutex = xnregistry_lookup(mx.handle, NULL);
++	if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex))) {
++		ret = -EINVAL;
++		goto fail;
++	}
++	if (cobalt_current_resources(mutex->attr.pshared) !=
++	    mutex->resnode.scope) {
++		ret = -EPERM;
++		goto fail;
++	}
++	if (xnsynch_fast_owner_check(mutex->synchbase.fastlock,
++					XN_NO_HANDLE) != 0 ||
++	    !list_empty(&mutex->conds)) {
++		ret = -EBUSY;
++		goto fail;
++	}
++
++	cobalt_mutex_reclaim(&mutex->resnode, s); /* drops lock */
++
++	cobalt_mark_deleted(&mx);
++
++	return cobalt_copy_to_user(u_mx, &mx, sizeof(*u_mx));
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(mutex_trylock, primary,
++	       (struct cobalt_mutex_shadow __user *u_mx))
++{
++	struct xnthread *curr = xnthread_current();
++	struct cobalt_mutex *mutex;
++	xnhandle_t handle;
++	spl_t s;
++	int ret;
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mutex = xnregistry_lookup(handle, NULL);
++	if (!cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, typeof(*mutex))) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	xnthread_commit_ceiling(curr);
++
++	ret = xnsynch_try_acquire(&mutex->synchbase);
++
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(mutex_lock, primary,
++	       (struct cobalt_mutex_shadow __user *u_mx))
++{
++	return __cobalt_mutex_timedlock_break(u_mx, NULL, NULL);
++}
++
++static inline int mutex_fetch_timeout(struct timespec *ts,
++				      const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++}
++
++COBALT_SYSCALL(mutex_timedlock, primary,
++	       (struct cobalt_mutex_shadow __user *u_mx,
++		const struct timespec __user *u_ts))
++{
++	return __cobalt_mutex_timedlock_break(u_mx, u_ts, mutex_fetch_timeout);
++}
++
++COBALT_SYSCALL(mutex_unlock, nonrestartable,
++	       (struct cobalt_mutex_shadow __user *u_mx))
++{
++	struct cobalt_mutex *mutex;
++	struct xnthread *curr;
++	xnhandle_t handle;
++	int ret;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++	curr = xnthread_current();
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mutex = xnregistry_lookup(handle, NULL);
++	ret = cobalt_mutex_release(curr, mutex);
++	if (ret > 0) {
++		xnsched_run();
++		ret = 0;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++void cobalt_mutex_reclaim(struct cobalt_resnode *node, spl_t s)
++{
++	struct cobalt_mutex_state *state;
++	struct cobalt_mutex *mutex;
++	int pshared;
++
++	mutex = container_of(node, struct cobalt_mutex, resnode);
++	state = container_of(mutex->synchbase.fastlock, struct cobalt_mutex_state, owner);
++	pshared = mutex->attr.pshared;
++	xnregistry_remove(node->handle);
++	cobalt_del_resource(node);
++	xnsynch_destroy(&mutex->synchbase);
++	cobalt_mark_deleted(mutex);
++	xnlock_put_irqrestore(&nklock, s);
++
++	cobalt_umm_free(&cobalt_ppd_get(pshared)->umm, state);
++	xnfree(mutex);
++}
++
++struct xnsynch *lookup_lazy_pp(xnhandle_t handle)
++{				/* nklock held, irqs off */
++	struct cobalt_mutex *mutex;
++
++	/* Only mutexes may be PP-enabled. */
++	
++	mutex = xnregistry_lookup(handle, NULL);
++	if (mutex == NULL ||
++	    !cobalt_obj_active(mutex, COBALT_MUTEX_MAGIC, struct cobalt_mutex) ||
++	    mutex->attr.protocol != PTHREAD_PRIO_PROTECT)
++		return NULL;
++
++	return &mutex->synchbase;
++}
+--- linux/kernel/xenomai/posix/io.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/io.c	2021-04-07 16:01:26.069635821 +0800
+@@ -0,0 +1,342 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2005 Joerg Langenberg <joerg.langenberg@gmx.net>.
++ * Copyright (C) 2008 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/err.h>
++#include <linux/fs.h>
++#include <cobalt/kernel/ppd.h>
++#include <xenomai/rtdm/internal.h>
++#include "process.h"
++#include "internal.h"
++#include "clock.h"
++#include "io.h"
++
++COBALT_SYSCALL(open, lostage,
++	       (const char __user *u_path, int oflag))
++{
++	struct filename *filename;
++	int ufd;
++
++	filename = getname(u_path);
++	if (IS_ERR(filename))
++		return PTR_ERR(filename);
++
++	ufd = __rtdm_dev_open(filename->name, oflag);
++	putname(filename);
++
++	return ufd;
++}
++
++COBALT_SYSCALL(socket, lostage,
++	       (int protocol_family, int socket_type, int protocol))
++{
++	return __rtdm_dev_socket(protocol_family, socket_type, protocol);
++}
++
++COBALT_SYSCALL(close, lostage, (int fd))
++{
++	return rtdm_fd_close(fd, 0);
++}
++
++COBALT_SYSCALL(fcntl, current, (int fd, int cmd, long arg))
++{
++	return rtdm_fd_fcntl(fd, cmd, arg);
++}
++
++COBALT_SYSCALL(ioctl, handover,
++	       (int fd, unsigned int request, void __user *arg))
++{
++	return rtdm_fd_ioctl(fd, request, arg);
++}
++
++COBALT_SYSCALL(read, handover,
++	       (int fd, void __user *buf, size_t size))
++{
++	return rtdm_fd_read(fd, buf, size);
++}
++
++COBALT_SYSCALL(write, handover,
++	       (int fd, const void __user *buf, size_t size))
++{
++	return rtdm_fd_write(fd, buf, size);
++}
++
++COBALT_SYSCALL(recvmsg, handover,
++	       (int fd, struct user_msghdr __user *umsg, int flags))
++{
++	struct user_msghdr m;
++	ssize_t ret;
++
++	ret = cobalt_copy_from_user(&m, umsg, sizeof(m));
++	if (ret)
++		return ret;
++
++	ret = rtdm_fd_recvmsg(fd, &m, flags);
++	if (ret < 0)
++		return ret;
++
++	return cobalt_copy_to_user(umsg, &m, sizeof(*umsg)) ?: ret;
++}
++
++static int get_timespec(struct timespec *ts,
++			const void __user *u_ts)
++{
++	return cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++}
++
++static int get_mmsg(struct mmsghdr *mmsg, void __user *u_mmsg)
++{
++	return cobalt_copy_from_user(mmsg, u_mmsg, sizeof(*mmsg));
++}
++
++static int put_mmsg(void __user **u_mmsg_p, const struct mmsghdr *mmsg)
++{
++	struct mmsghdr __user **p = (struct mmsghdr **)u_mmsg_p,
++		*q __user = (*p)++;
++
++	return cobalt_copy_to_user(q, mmsg, sizeof(*q));
++}
++
++COBALT_SYSCALL(recvmmsg, primary,
++	       (int fd, struct mmsghdr __user *u_msgvec, unsigned int vlen,
++		unsigned int flags, struct timespec *u_timeout))
++{
++	return __rtdm_fd_recvmmsg(fd, u_msgvec, vlen, flags, u_timeout,
++				  get_mmsg, put_mmsg, get_timespec);
++}
++
++COBALT_SYSCALL(sendmsg, handover,
++	       (int fd, struct user_msghdr __user *umsg, int flags))
++{
++	struct user_msghdr m;
++	int ret;
++
++	ret = cobalt_copy_from_user(&m, umsg, sizeof(m));
++
++	return ret ?: rtdm_fd_sendmsg(fd, &m, flags);
++}
++
++static int put_mmsglen(void __user **u_mmsg_p, const struct mmsghdr *mmsg)
++{
++	struct mmsghdr __user **p = (struct mmsghdr **)u_mmsg_p,
++		*q __user = (*p)++;
++
++	return __xn_put_user(mmsg->msg_len, &q->msg_len);
++}
++
++COBALT_SYSCALL(sendmmsg, primary,
++	       (int fd, struct mmsghdr __user *u_msgvec,
++		unsigned int vlen, unsigned int flags))
++{
++	return __rtdm_fd_sendmmsg(fd, u_msgvec, vlen, flags,
++				  get_mmsg, put_mmsglen);
++}
++
++COBALT_SYSCALL(mmap, lostage,
++	       (int fd, struct _rtdm_mmap_request __user *u_rma,
++	        void __user **u_addrp))
++{
++	struct _rtdm_mmap_request rma;
++	void *u_addr = NULL;
++	int ret;
++
++	ret = cobalt_copy_from_user(&rma, u_rma, sizeof(rma));
++	if (ret)
++		return ret;
++
++	ret = rtdm_fd_mmap(fd, &rma, &u_addr);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_addrp, &u_addr, sizeof(u_addr));
++}
++
++int __cobalt_first_fd_valid_p(fd_set *fds[XNSELECT_MAX_TYPES], int nfds)
++{
++	int i, fd;
++
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (fds[i]
++		    && (fd = find_first_bit(fds[i]->fds_bits, nfds)) < nfds)
++			return rtdm_fd_valid_p(fd);
++
++	/* All empty is correct, used as a "sleep" mechanism by strange
++	   applications. */
++	return 1;
++}
++
++static int select_bind_one(struct xnselector *selector, unsigned type, int fd)
++{
++	int rc;
++
++	rc = rtdm_fd_select(fd, selector, type);
++	if (rc != -ENOENT)
++		return rc;
++
++	return -EBADF;
++}
++
++int __cobalt_select_bind_all(struct xnselector *selector,
++			     fd_set *fds[XNSELECT_MAX_TYPES], int nfds)
++{
++	unsigned fd, type;
++	int err;
++
++	for (type = 0; type < XNSELECT_MAX_TYPES; type++) {
++		fd_set *set = fds[type];
++		if (set)
++			for (fd = find_first_bit(set->fds_bits, nfds);
++			     fd < nfds;
++			     fd = find_next_bit(set->fds_bits, nfds, fd + 1)) {
++				err = select_bind_one(selector, type, fd);
++				if (err)
++					return err;
++			}
++	}
++
++	return 0;
++}
++
++/* int select(int, fd_set *, fd_set *, fd_set *, struct timeval *) */
++COBALT_SYSCALL(select, primary,
++	       (int nfds,
++		fd_set __user *u_rfds,
++		fd_set __user *u_wfds,
++		fd_set __user *u_xfds,
++		struct timeval __user *u_tv))
++{
++	fd_set __user *ufd_sets[XNSELECT_MAX_TYPES] = {
++		[XNSELECT_READ] = u_rfds,
++		[XNSELECT_WRITE] = u_wfds,
++		[XNSELECT_EXCEPT] = u_xfds
++	};
++	fd_set *in_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL};
++	fd_set *out_fds[XNSELECT_MAX_TYPES] = {NULL, NULL, NULL};
++	fd_set in_fds_storage[XNSELECT_MAX_TYPES],
++		out_fds_storage[XNSELECT_MAX_TYPES];
++	xnticks_t timeout = XN_INFINITE;
++	struct restart_block *restart;
++	xntmode_t mode = XN_RELATIVE;
++	struct xnselector *selector;
++	struct xnthread *curr;
++	struct timeval tv;
++	size_t fds_size;
++	int i, err;
++
++	curr = xnthread_current();
++
++	if (u_tv) {
++		if (xnthread_test_localinfo(curr, XNSYSRST)) {
++			xnthread_clear_localinfo(curr, XNSYSRST);
++
++			restart = cobalt_get_restart_block(current);
++			timeout = restart->nanosleep.expires;
++
++			if (restart->fn != cobalt_restart_syscall_placeholder) {
++				err = -EINTR;
++				goto out;
++			}
++		} else {
++			if (!access_wok(u_tv, sizeof(tv))
++			    || cobalt_copy_from_user(&tv, u_tv, sizeof(tv)))
++				return -EFAULT;
++
++			if (tv.tv_usec >= 1000000)
++				return -EINVAL;
++
++			timeout = clock_get_ticks(CLOCK_MONOTONIC) + tv2ns(&tv);
++		}
++
++		mode = XN_ABSOLUTE;
++	}
++
++	fds_size = __FDELT__(nfds + __NFDBITS__ - 1) * sizeof(long);
++
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (ufd_sets[i]) {
++			in_fds[i] = &in_fds_storage[i];
++			out_fds[i] = & out_fds_storage[i];
++			if (!access_wok((void __user *) ufd_sets[i],
++					sizeof(fd_set))
++			    || cobalt_copy_from_user(in_fds[i],
++						     (void __user *) ufd_sets[i],
++						     fds_size))
++				return -EFAULT;
++		}
++
++	selector = curr->selector;
++	if (!selector) {
++		/* This function may be called from pure Linux fd_sets, we want
++		   to avoid the xnselector allocation in this case, so, we do a
++		   simple test: test if the first file descriptor we find in the
++		   fd_set is an RTDM descriptor or a message queue descriptor. */
++		if (!__cobalt_first_fd_valid_p(in_fds, nfds))
++			return -EBADF;
++
++		selector = xnmalloc(sizeof(*curr->selector));
++		if (selector == NULL)
++			return -ENOMEM;
++		xnselector_init(selector);
++		curr->selector = selector;
++
++		/* Bind directly the file descriptors, we do not need to go
++		   through xnselect returning -ECHRNG */
++		if ((err = __cobalt_select_bind_all(selector, in_fds, nfds)))
++			return err;
++	}
++
++	do {
++		err = xnselect(selector, out_fds, in_fds, nfds, timeout, mode);
++
++		if (err == -ECHRNG) {
++			int err = __cobalt_select_bind_all(selector, out_fds, nfds);
++			if (err)
++				return err;
++		}
++	} while (err == -ECHRNG);
++
++	if (err == -EINTR && signal_pending(current)) {
++		xnthread_set_localinfo(curr, XNSYSRST);
++
++		restart = cobalt_get_restart_block(current);
++		restart->fn = cobalt_restart_syscall_placeholder;
++		restart->nanosleep.expires = timeout;
++
++		return -ERESTARTSYS;
++	}
++
++out:
++	if (u_tv && (err > 0 || err == -EINTR)) {
++		xnsticks_t diff = timeout - clock_get_ticks(CLOCK_MONOTONIC);
++		if (diff > 0)
++			ticks2tv(&tv, diff);
++		else
++			tv.tv_sec = tv.tv_usec = 0;
++
++		if (cobalt_copy_to_user(u_tv, &tv, sizeof(tv)))
++			return -EFAULT;
++	}
++
++	if (err >= 0)
++		for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++			if (ufd_sets[i]
++			    && cobalt_copy_to_user((void __user *) ufd_sets[i],
++						   out_fds[i], sizeof(fd_set)))
++				return -EFAULT;
++	return err;
++}
+--- linux/kernel/xenomai/posix/compat.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/compat.c	2021-04-07 16:01:26.065635827 +0800
+@@ -0,0 +1,486 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/err.h>
++#include <linux/memory.h>
++#include <linux/module.h>
++#include <cobalt/kernel/compat.h>
++#include <asm/xenomai/syscall.h>
++#include <xenomai/posix/mqueue.h>
++
++int sys32_get_timespec(struct timespec *ts,
++		       const struct compat_timespec __user *cts)
++{
++	return (cts == NULL ||
++		!access_rok(cts, sizeof(*cts)) ||
++		__xn_get_user(ts->tv_sec, &cts->tv_sec) ||
++		__xn_get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_timespec);
++
++int sys32_put_timespec(struct compat_timespec __user *cts,
++		       const struct timespec *ts)
++{
++	return (cts == NULL ||
++		!access_wok(cts, sizeof(*cts)) ||
++		__xn_put_user(ts->tv_sec, &cts->tv_sec) ||
++		__xn_put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
++}
++EXPORT_SYMBOL_GPL(sys32_put_timespec);
++
++int sys32_get_itimerspec(struct itimerspec *its,
++			 const struct compat_itimerspec __user *cits)
++{
++	int ret = sys32_get_timespec(&its->it_value, &cits->it_value);
++
++	return ret ?: sys32_get_timespec(&its->it_interval, &cits->it_interval);
++}
++EXPORT_SYMBOL_GPL(sys32_get_itimerspec);
++
++int sys32_put_itimerspec(struct compat_itimerspec __user *cits,
++			 const struct itimerspec *its)
++{
++	int ret = sys32_put_timespec(&cits->it_value, &its->it_value);
++
++	return ret ?: sys32_put_timespec(&cits->it_interval, &its->it_interval);
++}
++EXPORT_SYMBOL_GPL(sys32_put_itimerspec);
++
++int sys32_get_timeval(struct timeval *tv,
++		      const struct compat_timeval __user *ctv)
++{
++	return (ctv == NULL ||
++		!access_rok(ctv, sizeof(*ctv)) ||
++		__xn_get_user(tv->tv_sec, &ctv->tv_sec) ||
++		__xn_get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_timeval);
++
++int sys32_put_timeval(struct compat_timeval __user *ctv,
++		      const struct timeval *tv)
++{
++	return (ctv == NULL ||
++		!access_wok(ctv, sizeof(*ctv)) ||
++		__xn_put_user(tv->tv_sec, &ctv->tv_sec) ||
++		__xn_put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
++}
++EXPORT_SYMBOL_GPL(sys32_put_timeval);
++
++int sys32_get_timex(struct timex *tx,
++		    const struct compat_timex __user *ctx)
++{
++	memset(tx, 0, sizeof(*tx));
++
++	if (!access_rok(ctx, sizeof(*ctx)) ||
++	    __xn_get_user(tx->modes, &ctx->modes) ||
++	    __xn_get_user(tx->offset, &ctx->offset) ||
++	    __xn_get_user(tx->freq, &ctx->freq) ||
++	    __xn_get_user(tx->maxerror, &ctx->maxerror) ||
++	    __xn_get_user(tx->esterror, &ctx->esterror) ||
++	    __xn_get_user(tx->status, &ctx->status) ||
++	    __xn_get_user(tx->constant, &ctx->constant) ||
++	    __xn_get_user(tx->precision, &ctx->precision) ||
++	    __xn_get_user(tx->tolerance, &ctx->tolerance) ||
++	    __xn_get_user(tx->time.tv_sec, &ctx->time.tv_sec) ||
++	    __xn_get_user(tx->time.tv_usec, &ctx->time.tv_usec) ||
++	    __xn_get_user(tx->tick, &ctx->tick) ||
++	    __xn_get_user(tx->ppsfreq, &ctx->ppsfreq) ||
++	    __xn_get_user(tx->jitter, &ctx->jitter) ||
++	    __xn_get_user(tx->shift, &ctx->shift) ||
++	    __xn_get_user(tx->stabil, &ctx->stabil) ||
++	    __xn_get_user(tx->jitcnt, &ctx->jitcnt) ||
++	    __xn_get_user(tx->calcnt, &ctx->calcnt) ||
++	    __xn_get_user(tx->errcnt, &ctx->errcnt) ||
++	    __xn_get_user(tx->stbcnt, &ctx->stbcnt))
++	  return -EFAULT;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_timex);
++
++int sys32_put_timex(struct compat_timex __user *ctx,
++		    const struct timex *tx)
++{
++	if (!access_wok(ctx, sizeof(*ctx)) ||
++	    __xn_put_user(tx->modes, &ctx->modes) ||
++	    __xn_put_user(tx->offset, &ctx->offset) ||
++	    __xn_put_user(tx->freq, &ctx->freq) ||
++	    __xn_put_user(tx->maxerror, &ctx->maxerror) ||
++	    __xn_put_user(tx->esterror, &ctx->esterror) ||
++	    __xn_put_user(tx->status, &ctx->status) ||
++	    __xn_put_user(tx->constant, &ctx->constant) ||
++	    __xn_put_user(tx->precision, &ctx->precision) ||
++	    __xn_put_user(tx->tolerance, &ctx->tolerance) ||
++	    __xn_put_user(tx->time.tv_sec, &ctx->time.tv_sec) ||
++	    __xn_put_user(tx->time.tv_usec, &ctx->time.tv_usec) ||
++	    __xn_put_user(tx->tick, &ctx->tick) ||
++	    __xn_put_user(tx->ppsfreq, &ctx->ppsfreq) ||
++	    __xn_put_user(tx->jitter, &ctx->jitter) ||
++	    __xn_put_user(tx->shift, &ctx->shift) ||
++	    __xn_put_user(tx->stabil, &ctx->stabil) ||
++	    __xn_put_user(tx->jitcnt, &ctx->jitcnt) ||
++	    __xn_put_user(tx->calcnt, &ctx->calcnt) ||
++	    __xn_put_user(tx->errcnt, &ctx->errcnt) ||
++	    __xn_put_user(tx->stbcnt, &ctx->stbcnt))
++	  return -EFAULT;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_put_timex);
++
++ssize_t sys32_get_fdset(fd_set *fds, const compat_fd_set __user *cfds,
++			size_t cfdsize)
++{
++	int rdpos, wrpos, rdlim = cfdsize / sizeof(compat_ulong_t);
++
++	if (cfds == NULL || !access_rok(cfds, cfdsize))
++		return -EFAULT;
++
++	for (rdpos = 0, wrpos = 0; rdpos < rdlim; rdpos++, wrpos++)
++		if (__xn_get_user(fds->fds_bits[wrpos], cfds->fds_bits + rdpos))
++			return -EFAULT;
++
++	return (ssize_t)rdlim * sizeof(long);
++}
++EXPORT_SYMBOL_GPL(sys32_get_fdset);
++
++ssize_t sys32_put_fdset(compat_fd_set __user *cfds, const fd_set *fds,
++			size_t fdsize)
++{
++	int rdpos, wrpos, wrlim = fdsize / sizeof(long);
++
++	if (cfds == NULL || !access_wok(cfds, wrlim * sizeof(compat_ulong_t)))
++		return -EFAULT;
++
++	for (rdpos = 0, wrpos = 0; wrpos < wrlim; rdpos++, wrpos++)
++		if (__xn_put_user(fds->fds_bits[rdpos], cfds->fds_bits + wrpos))
++			return -EFAULT;
++
++	return (ssize_t)wrlim * sizeof(compat_ulong_t);
++}
++EXPORT_SYMBOL_GPL(sys32_put_fdset);
++
++int sys32_get_param_ex(int policy,
++		       struct sched_param_ex *p,
++		       const struct compat_sched_param_ex __user *u_cp)
++{
++	struct compat_sched_param_ex cpex;
++
++	if (u_cp == NULL || cobalt_copy_from_user(&cpex, u_cp, sizeof(cpex)))
++		return -EFAULT;
++
++	p->sched_priority = cpex.sched_priority;
++
++	switch (policy) {
++	case SCHED_SPORADIC:
++		p->sched_ss_low_priority = cpex.sched_ss_low_priority;
++		p->sched_ss_max_repl = cpex.sched_ss_max_repl;
++		p->sched_ss_repl_period.tv_sec = cpex.sched_ss_repl_period.tv_sec;
++		p->sched_ss_repl_period.tv_nsec = cpex.sched_ss_repl_period.tv_nsec;
++		p->sched_ss_init_budget.tv_sec = cpex.sched_ss_init_budget.tv_sec;
++		p->sched_ss_init_budget.tv_nsec = cpex.sched_ss_init_budget.tv_nsec;
++		break;
++	case SCHED_RR:
++		p->sched_rr_quantum.tv_sec = cpex.sched_rr_quantum.tv_sec;
++		p->sched_rr_quantum.tv_nsec = cpex.sched_rr_quantum.tv_nsec;
++		break;
++	case SCHED_TP:
++		p->sched_tp_partition = cpex.sched_tp_partition;
++		break;
++	case SCHED_QUOTA:
++		p->sched_quota_group = cpex.sched_quota_group;
++		break;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_param_ex);
++
++int sys32_put_param_ex(int policy,
++		       struct compat_sched_param_ex __user *u_cp,
++		       const struct sched_param_ex *p)
++{
++	struct compat_sched_param_ex cpex;
++
++	if (u_cp == NULL)
++		return -EFAULT;
++
++	cpex.sched_priority = p->sched_priority;
++
++	switch (policy) {
++	case SCHED_SPORADIC:
++		cpex.sched_ss_low_priority = p->sched_ss_low_priority;
++		cpex.sched_ss_max_repl = p->sched_ss_max_repl;
++		cpex.sched_ss_repl_period.tv_sec = p->sched_ss_repl_period.tv_sec;
++		cpex.sched_ss_repl_period.tv_nsec = p->sched_ss_repl_period.tv_nsec;
++		cpex.sched_ss_init_budget.tv_sec = p->sched_ss_init_budget.tv_sec;
++		cpex.sched_ss_init_budget.tv_nsec = p->sched_ss_init_budget.tv_nsec;
++		break;
++	case SCHED_RR:
++		cpex.sched_rr_quantum.tv_sec = p->sched_rr_quantum.tv_sec;
++		cpex.sched_rr_quantum.tv_nsec = p->sched_rr_quantum.tv_nsec;
++		break;
++	case SCHED_TP:
++		cpex.sched_tp_partition = p->sched_tp_partition;
++		break;
++	case SCHED_QUOTA:
++		cpex.sched_quota_group = p->sched_quota_group;
++		break;
++	}
++
++	return cobalt_copy_to_user(u_cp, &cpex, sizeof(cpex));
++}
++EXPORT_SYMBOL_GPL(sys32_put_param_ex);
++
++int sys32_get_mqattr(struct mq_attr *ap,
++		     const struct compat_mq_attr __user *u_cap)
++{
++	struct compat_mq_attr cattr;
++
++	if (u_cap == NULL ||
++	    cobalt_copy_from_user(&cattr, u_cap, sizeof(cattr)))
++		return -EFAULT;
++
++	ap->mq_flags = cattr.mq_flags;
++	ap->mq_maxmsg = cattr.mq_maxmsg;
++	ap->mq_msgsize = cattr.mq_msgsize;
++	ap->mq_curmsgs = cattr.mq_curmsgs;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_mqattr);
++
++int sys32_put_mqattr(struct compat_mq_attr __user *u_cap,
++		     const struct mq_attr *ap)
++{
++	struct compat_mq_attr cattr;
++
++	cattr.mq_flags = ap->mq_flags;
++	cattr.mq_maxmsg = ap->mq_maxmsg;
++	cattr.mq_msgsize = ap->mq_msgsize;
++	cattr.mq_curmsgs = ap->mq_curmsgs;
++
++	return u_cap == NULL ? -EFAULT :
++		cobalt_copy_to_user(u_cap, &cattr, sizeof(cattr));
++}
++EXPORT_SYMBOL_GPL(sys32_put_mqattr);
++
++int sys32_get_sigevent(struct sigevent *ev,
++		       const struct compat_sigevent *__user u_cev)
++{
++	struct compat_sigevent cev;
++	compat_int_t *cp;
++	int ret, *p;
++
++	if (u_cev == NULL)
++		return -EFAULT;
++
++	ret = cobalt_copy_from_user(&cev, u_cev, sizeof(cev));
++	if (ret)
++		return ret;
++
++	memset(ev, 0, sizeof(*ev));
++	ev->sigev_value.sival_ptr = compat_ptr(cev.sigev_value.sival_ptr);
++	ev->sigev_signo = cev.sigev_signo;
++	ev->sigev_notify = cev.sigev_notify;
++	/*
++	 * Extensions may define extra fields we don't know about in
++	 * the padding area, so we have to load it entirely.
++	 */
++	p = ev->_sigev_un._pad;
++	cp = cev._sigev_un._pad;
++	while (p < &ev->_sigev_un._pad[ARRAY_SIZE(ev->_sigev_un._pad)] &&
++	       cp < &cev._sigev_un._pad[ARRAY_SIZE(cev._sigev_un._pad)])
++		*p++ = *cp++;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_sigevent);
++
++int sys32_get_sigset(sigset_t *set, const compat_sigset_t *u_cset)
++{
++	return get_compat_sigset(set, u_cset);
++}
++EXPORT_SYMBOL_GPL(sys32_get_sigset);
++
++int sys32_put_sigset(compat_sigset_t *u_cset, const sigset_t *set)
++{
++	return put_compat_sigset(u_cset, set, sizeof(*u_cset));
++}
++EXPORT_SYMBOL_GPL(sys32_put_sigset);
++
++int sys32_get_sigval(union sigval *val, const union compat_sigval *u_cval)
++{
++	union compat_sigval cval;
++	int ret;
++
++	if (u_cval == NULL)
++		return -EFAULT;
++
++	ret = cobalt_copy_from_user(&cval, u_cval, sizeof(cval));
++	if (ret)
++		return ret;
++
++	val->sival_ptr = compat_ptr(cval.sival_ptr);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_sigval);
++
++int sys32_put_siginfo(void __user *u_si, const struct siginfo *si,
++		      int overrun)
++{
++	struct compat_siginfo __user *u_p = u_si;
++	int ret;
++
++	if (u_p == NULL)
++		return -EFAULT;
++
++	ret = __xn_put_user(si->si_signo, &u_p->si_signo);
++	ret |= __xn_put_user(si->si_errno, &u_p->si_errno);
++	ret |= __xn_put_user(si->si_code, &u_p->si_code);
++
++	/*
++	 * Copy the generic/standard siginfo bits to userland.
++	 */
++	switch (si->si_code) {
++	case SI_TIMER:
++		ret |= __xn_put_user(si->si_tid, &u_p->si_tid);
++		ret |= __xn_put_user(ptr_to_compat(si->si_ptr), &u_p->si_ptr);
++		ret |= __xn_put_user(overrun, &u_p->si_overrun);
++		break;
++	case SI_QUEUE:
++	case SI_MESGQ:
++		ret |= __xn_put_user(ptr_to_compat(si->si_ptr), &u_p->si_ptr);
++		/* falldown wanted. */
++	case SI_USER:
++		ret |= __xn_put_user(si->si_pid, &u_p->si_pid);
++		ret |= __xn_put_user(si->si_uid, &u_p->si_uid);
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(sys32_put_siginfo);
++
++int sys32_get_msghdr(struct user_msghdr *msg,
++		     const struct compat_msghdr __user *u_cmsg)
++{
++	compat_uptr_t tmp1, tmp2, tmp3;
++
++	if (u_cmsg == NULL ||
++	    !access_rok(u_cmsg, sizeof(*u_cmsg)) ||
++	    __xn_get_user(tmp1, &u_cmsg->msg_name) ||
++	    __xn_get_user(msg->msg_namelen, &u_cmsg->msg_namelen) ||
++	    __xn_get_user(tmp2, &u_cmsg->msg_iov) ||
++	    __xn_get_user(msg->msg_iovlen, &u_cmsg->msg_iovlen) ||
++	    __xn_get_user(tmp3, &u_cmsg->msg_control) ||
++	    __xn_get_user(msg->msg_controllen, &u_cmsg->msg_controllen) ||
++	    __xn_get_user(msg->msg_flags, &u_cmsg->msg_flags))
++		return -EFAULT;
++
++	if (msg->msg_namelen > sizeof(struct sockaddr_storage))
++		msg->msg_namelen = sizeof(struct sockaddr_storage);
++
++	msg->msg_name = compat_ptr(tmp1);
++	msg->msg_iov = compat_ptr(tmp2);
++	msg->msg_control = compat_ptr(tmp3);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_msghdr);
++
++int sys32_get_mmsghdr(struct mmsghdr *mmsg,
++		      const struct compat_mmsghdr __user *u_cmmsg)
++{
++	if (u_cmmsg == NULL ||
++	    !access_rok(u_cmmsg, sizeof(*u_cmmsg)) ||
++	    __xn_get_user(mmsg->msg_len, &u_cmmsg->msg_len))
++		return -EFAULT;
++
++	return sys32_get_msghdr(&mmsg->msg_hdr, &u_cmmsg->msg_hdr);
++}
++EXPORT_SYMBOL_GPL(sys32_get_mmsghdr);
++
++int sys32_put_msghdr(struct compat_msghdr __user *u_cmsg,
++		     const struct user_msghdr *msg)
++{
++	if (u_cmsg == NULL ||
++	    !access_wok(u_cmsg, sizeof(*u_cmsg)) ||
++	    __xn_put_user(ptr_to_compat(msg->msg_name), &u_cmsg->msg_name) ||
++	    __xn_put_user(msg->msg_namelen, &u_cmsg->msg_namelen) ||
++	    __xn_put_user(ptr_to_compat(msg->msg_iov), &u_cmsg->msg_iov) ||
++	    __xn_put_user(msg->msg_iovlen, &u_cmsg->msg_iovlen) ||
++	    __xn_put_user(ptr_to_compat(msg->msg_control), &u_cmsg->msg_control) ||
++	    __xn_put_user(msg->msg_controllen, &u_cmsg->msg_controllen) ||
++	    __xn_put_user(msg->msg_flags, &u_cmsg->msg_flags))
++		return -EFAULT;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_put_msghdr);
++
++int sys32_put_mmsghdr(struct compat_mmsghdr __user *u_cmmsg,
++		     const struct mmsghdr *mmsg)
++{
++	if (u_cmmsg == NULL ||
++	    !access_wok(u_cmmsg, sizeof(*u_cmmsg)) ||
++	    __xn_put_user(mmsg->msg_len, &u_cmmsg->msg_len))
++		return -EFAULT;
++
++	return sys32_put_msghdr(&u_cmmsg->msg_hdr, &mmsg->msg_hdr);
++}
++EXPORT_SYMBOL_GPL(sys32_put_mmsghdr);
++
++int sys32_get_iovec(struct iovec *iov,
++		    const struct compat_iovec __user *u_ciov,
++		    int ciovlen)
++{
++	const struct compat_iovec __user *p;
++	struct compat_iovec ciov;
++	int ret, n;
++	
++	for (n = 0, p = u_ciov; n < ciovlen; n++, p++) {
++		ret = cobalt_copy_from_user(&ciov, p, sizeof(ciov));
++		if (ret)
++			return ret;
++		iov[n].iov_base = compat_ptr(ciov.iov_base);
++		iov[n].iov_len = ciov.iov_len;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_get_iovec);
++
++int sys32_put_iovec(struct compat_iovec __user *u_ciov,
++		    const struct iovec *iov,
++		    int iovlen)
++{
++	struct compat_iovec __user *p;
++	struct compat_iovec ciov;
++	int ret, n;
++	
++	for (n = 0, p = u_ciov; n < iovlen; n++, p++) {
++		ciov.iov_base = ptr_to_compat(iov[n].iov_base);
++		ciov.iov_len = iov[n].iov_len;
++		ret = cobalt_copy_to_user(p, &ciov, sizeof(*p));
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(sys32_put_iovec);
+--- linux/kernel/xenomai/posix/monitor.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/monitor.c	2021-04-07 16:01:26.060635834 +0800
+@@ -0,0 +1,435 @@
++/*
++ * Copyright (C) 2011 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include "internal.h"
++#include "thread.h"
++#include "clock.h"
++#include "monitor.h"
++#include <trace/events/cobalt-posix.h>
++
++/*
++ * The Cobalt monitor is a double-wait condition object, serializing
++ * accesses through a gate. It behaves like a mutex + two condition
++ * variables combo with extended signaling logic. Folding several
++ * conditions and the serialization support into a single object
++ * performs better on low end hw caches and allows for specific
++ * optimizations, compared to using separate general-purpose mutex and
++ * condvars. This object is used by the Copperplate interface
++ * internally when it runs over the Cobalt core.
++ *
++ * Threads can wait for some resource(s) to be granted (consumer
++ * side), or wait for the available resource(s) to drain (producer
++ * side).  Therefore, signals are thread-directed for the grant side,
++ * and monitor-directed for the drain side.
++ *
++ * Typically, a consumer would wait for the GRANT condition to be
++ * satisfied, signaling the DRAINED condition when more resources
++ * could be made available if the protocol implements output
++ * contention (e.g. the write side of a message queue waiting for the
++ * consumer to release message slots). Conversely, a producer would
++ * wait for the DRAINED condition to be satisfied, issuing GRANT
++ * signals once more resources have been made available to the
++ * consumer.
++ *
++ * Implementation-wise, the monitor logic is shared with the Cobalt
++ * thread object.
++ */
++COBALT_SYSCALL(monitor_init, current,
++	       (struct cobalt_monitor_shadow __user *u_mon,
++		clockid_t clk_id, int flags))
++{
++	struct cobalt_monitor_shadow shadow;
++	struct cobalt_monitor_state *state;
++	struct cobalt_monitor *mon;
++	int pshared, tmode, ret;
++	struct cobalt_umm *umm;
++	unsigned long stateoff;
++	spl_t s;
++
++	tmode = clock_flag(TIMER_ABSTIME, clk_id);
++	if (tmode < 0)
++		return -EINVAL;
++
++	mon = xnmalloc(sizeof(*mon));
++	if (mon == NULL)
++		return -ENOMEM;
++
++	pshared = (flags & COBALT_MONITOR_SHARED) != 0;
++	umm = &cobalt_ppd_get(pshared)->umm;
++	state = cobalt_umm_alloc(umm, sizeof(*state));
++	if (state == NULL) {
++		xnfree(mon);
++		return -EAGAIN;
++	}
++
++	ret = xnregistry_enter_anon(mon, &mon->resnode.handle);
++	if (ret) {
++		cobalt_umm_free(umm, state);
++		xnfree(mon);
++		return ret;
++	}
++
++	mon->state = state;
++	xnsynch_init(&mon->gate, XNSYNCH_PI, &state->owner);
++	xnsynch_init(&mon->drain, XNSYNCH_PRIO, NULL);
++	mon->flags = flags;
++	mon->tmode = tmode;
++	INIT_LIST_HEAD(&mon->waiters);
++
++	xnlock_get_irqsave(&nklock, s);
++	cobalt_add_resource(&mon->resnode, monitor, pshared);
++	mon->magic = COBALT_MONITOR_MAGIC;
++	xnlock_put_irqrestore(&nklock, s);
++
++	state->flags = 0;
++	stateoff = cobalt_umm_offset(umm, state);
++	XENO_BUG_ON(COBALT, stateoff != (__u32)stateoff);
++	shadow.flags = flags;
++	shadow.handle = mon->resnode.handle;
++	shadow.state_offset = (__u32)stateoff;
++
++	return cobalt_copy_to_user(u_mon, &shadow, sizeof(*u_mon));
++}
++
++/* nklock held, irqs off */
++static int monitor_enter(xnhandle_t handle, struct xnthread *curr)
++{
++	struct cobalt_monitor *mon;
++	int info;
++
++	mon = xnregistry_lookup(handle, NULL); /* (Re)validate. */
++	if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC)
++		return -EINVAL;
++
++	info = xnsynch_acquire(&mon->gate, XN_INFINITE, XN_RELATIVE);
++	if (info)
++		/* Break or error, no timeout possible. */
++		return info & XNBREAK ? -EINTR : -EINVAL;
++
++	mon->state->flags &= ~(COBALT_MONITOR_SIGNALED|COBALT_MONITOR_BROADCAST);
++
++	return 0;
++}
++
++COBALT_SYSCALL(monitor_enter, primary,
++	       (struct cobalt_monitor_shadow __user *u_mon))
++{
++	struct xnthread *curr = xnthread_current();
++	xnhandle_t handle;
++	int ret;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mon->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++	ret = monitor_enter(handle, curr);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++/* nklock held, irqs off */
++static void monitor_wakeup(struct cobalt_monitor *mon)
++{
++	struct cobalt_monitor_state *state = mon->state;
++	struct cobalt_thread *thread, *tmp;
++	struct xnthread *p;
++	int bcast;
++
++	/*
++	 * Having the GRANT signal pending does not necessarily mean
++	 * that somebody is actually waiting for it, so we have to
++	 * check both conditions below.
++	 */
++	bcast = (state->flags & COBALT_MONITOR_BROADCAST) != 0;
++	if ((state->flags & COBALT_MONITOR_GRANTED) == 0 ||
++	    list_empty(&mon->waiters))
++		goto drain;
++
++	/*
++	 * Unblock waiters requesting a grant, either those who
++	 * received it only or all of them, depending on the broadcast
++	 * bit.
++	 *
++	 * We update the PENDED flag to inform userland about the
++	 * presence of waiters, so that it may decide not to issue any
++	 * syscall for exiting the monitor if nobody else is waiting
++	 * at the gate.
++	 */
++	list_for_each_entry_safe(thread, tmp, &mon->waiters, monitor_link) {
++		p = &thread->threadbase;
++		/*
++		 * A thread might receive a grant signal albeit it
++		 * does not wait on a monitor, or it might have timed
++		 * out before we got there, so we really have to check
++		 * that ->wchan does match our sleep queue.
++		 */
++		if (bcast ||
++		    (p->u_window->grant_value && p->wchan == &thread->monitor_synch)) {
++			xnsynch_wakeup_this_sleeper(&thread->monitor_synch, p);
++			list_del_init(&thread->monitor_link);
++		}
++	}
++drain:
++	/*
++	 * Unblock threads waiting for a drain event if that signal is
++	 * pending, either one or all, depending on the broadcast
++	 * flag.
++	 */
++	if ((state->flags & COBALT_MONITOR_DRAINED) != 0 &&
++	    xnsynch_pended_p(&mon->drain)) {
++		if (bcast)
++			xnsynch_flush(&mon->drain, 0);
++		else
++			xnsynch_wakeup_one_sleeper(&mon->drain);
++	}
++
++	if (list_empty(&mon->waiters) && !xnsynch_pended_p(&mon->drain))
++		state->flags &= ~COBALT_MONITOR_PENDED;
++}
++
++int __cobalt_monitor_wait(struct cobalt_monitor_shadow __user *u_mon,
++			  int event, const struct timespec *ts,
++			  int __user *u_ret)
++{
++	struct cobalt_thread *curr = cobalt_current_thread();
++	struct cobalt_monitor_state *state;
++	xnticks_t timeout = XN_INFINITE;
++	int ret = 0, opret = 0, info;
++	struct cobalt_monitor *mon;
++	struct xnsynch *synch;
++	xnhandle_t handle;
++	xntmode_t tmode;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mon->handle);
++
++	if (ts)
++		timeout = ts2ns(ts) + 1;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mon = xnregistry_lookup(handle, NULL);
++	if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	/*
++	 * The current thread might have sent signals to the monitor
++	 * it wants to sleep on: wake up satisfied waiters before
++	 * going to sleep.
++	 */
++	state = mon->state;
++	if (state->flags & COBALT_MONITOR_SIGNALED)
++		monitor_wakeup(mon);
++
++	synch = &curr->monitor_synch;
++	if (event & COBALT_MONITOR_WAITDRAIN)
++		synch = &mon->drain;
++	else {
++		curr->threadbase.u_window->grant_value = 0;
++		list_add_tail(&curr->monitor_link, &mon->waiters);
++	}
++
++	/*
++	 * Tell userland that somebody is now waiting for a signal, so
++	 * that later exiting the monitor on the producer side will
++	 * trigger a wakeup syscall.
++	 *
++	 * CAUTION: we must raise the PENDED flag while holding the
++	 * gate mutex, to prevent a signal from sneaking in from a
++	 * remote CPU without the producer issuing the corresponding
++	 * wakeup call when dropping the gate lock.
++	 */
++	state->flags |= COBALT_MONITOR_PENDED;
++
++	tmode = ts ? mon->tmode : XN_RELATIVE;
++
++	/* Release the gate prior to waiting, all atomically. */
++	xnsynch_release(&mon->gate, &curr->threadbase);
++
++	info = xnsynch_sleep_on(synch, timeout, tmode);
++	if (info) {
++		if ((event & COBALT_MONITOR_WAITDRAIN) == 0 &&
++		    !list_empty(&curr->monitor_link))
++			list_del_init(&curr->monitor_link);
++
++		if (list_empty(&mon->waiters) && !xnsynch_pended_p(&mon->drain))
++			state->flags &= ~COBALT_MONITOR_PENDED;
++
++		if (info & XNBREAK) {
++			opret = -EINTR;
++			goto out;
++		}
++		if (info & XNTIMEO)
++			opret = -ETIMEDOUT;
++	}
++
++	ret = monitor_enter(handle, &curr->threadbase);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	__xn_put_user(opret, u_ret);
++
++	return ret;
++}
++
++COBALT_SYSCALL(monitor_wait, nonrestartable,
++	       (struct cobalt_monitor_shadow __user *u_mon,
++	       int event, const struct timespec __user *u_ts,
++	       int __user *u_ret))
++{
++	struct timespec ts, *tsp = NULL;
++	int ret;
++
++	if (u_ts) {
++		tsp = &ts;
++		ret = cobalt_copy_from_user(&ts, u_ts, sizeof(ts));
++		if (ret)
++			return ret;
++	}
++
++	return __cobalt_monitor_wait(u_mon, event, tsp, u_ret);
++}
++
++COBALT_SYSCALL(monitor_sync, nonrestartable,
++	       (struct cobalt_monitor_shadow __user *u_mon))
++{
++	struct cobalt_monitor *mon;
++	struct xnthread *curr;
++	xnhandle_t handle;
++	int ret = 0;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mon->handle);
++	curr = xnthread_current();
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mon = xnregistry_lookup(handle, NULL);
++	if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC)
++		ret = -EINVAL;
++	else if (mon->state->flags & COBALT_MONITOR_SIGNALED) {
++		monitor_wakeup(mon);
++		xnsynch_release(&mon->gate, curr);
++		xnsched_run();
++		ret = monitor_enter(handle, curr);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(monitor_exit, primary,
++	       (struct cobalt_monitor_shadow __user *u_mon))
++{
++	struct cobalt_monitor *mon;
++	struct xnthread *curr;
++	xnhandle_t handle;
++	int ret = 0;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mon->handle);
++	curr = xnthread_current();
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mon = xnregistry_lookup(handle, NULL);
++	if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC)
++		ret = -EINVAL;
++	else {
++		if (mon->state->flags & COBALT_MONITOR_SIGNALED)
++			monitor_wakeup(mon);
++
++		xnsynch_release(&mon->gate, curr);
++		xnsched_run();
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(monitor_destroy, primary,
++	       (struct cobalt_monitor_shadow __user *u_mon))
++{
++	struct cobalt_monitor_state *state;
++	struct cobalt_monitor *mon;
++	struct xnthread *curr;
++	xnhandle_t handle;
++	int ret = 0;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_mon->handle);
++	curr = xnthread_current();
++
++	xnlock_get_irqsave(&nklock, s);
++
++	mon = xnregistry_lookup(handle, NULL);
++	if (mon == NULL || mon->magic != COBALT_MONITOR_MAGIC) {
++		ret = -EINVAL;
++		goto fail;
++	}
++
++	state = mon->state;
++	if ((state->flags & COBALT_MONITOR_PENDED) != 0 ||
++	    xnsynch_pended_p(&mon->drain) || !list_empty(&mon->waiters)) {
++		ret = -EBUSY;
++		goto fail;
++	}
++
++	/*
++	 * A monitor must be destroyed by the thread currently holding
++	 * its gate lock.
++	 */
++	if (xnsynch_owner_check(&mon->gate, curr)) {
++		ret = -EPERM;
++		goto fail;
++	}
++
++	cobalt_monitor_reclaim(&mon->resnode, s); /* drops lock */
++
++	xnsched_run();
++
++	return 0;
++ fail:
++	xnlock_put_irqrestore(&nklock, s);
++	
++	return ret;
++}
++
++void cobalt_monitor_reclaim(struct cobalt_resnode *node, spl_t s)
++{
++	struct cobalt_monitor *mon;
++	struct cobalt_umm *umm;
++	int pshared;
++
++	mon = container_of(node, struct cobalt_monitor, resnode);
++	pshared = (mon->flags & COBALT_MONITOR_SHARED) != 0;
++	xnsynch_destroy(&mon->gate);
++	xnsynch_destroy(&mon->drain);
++	xnregistry_remove(node->handle);
++	cobalt_del_resource(node);
++	cobalt_mark_deleted(mon);
++	xnlock_put_irqrestore(&nklock, s);
++
++	umm = &cobalt_ppd_get(pshared)->umm;
++	cobalt_umm_free(umm, mon->state);
++	xnfree(mon);
++}
+--- linux/kernel/xenomai/posix/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/syscall.h	2021-04-07 16:01:26.055635841 +0800
+@@ -0,0 +1,33 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_SYSCALL_H
++#define _COBALT_POSIX_SYSCALL_H
++
++#include <cobalt/uapi/syscall.h>
++
++/* Regular (native) syscall handler implementation. */
++#define COBALT_SYSCALL(__name, __mode, __args)	\
++	long CoBaLt_ ## __name __args
++
++/* Regular (native) syscall handler declaration. */
++#define COBALT_SYSCALL_DECL(__name, __args)	\
++	long CoBaLt_ ## __name __args
++
++#include <asm/xenomai/syscall32.h>
++
++#endif /* !_COBALT_POSIX_SYSCALL_H */
+--- linux/kernel/xenomai/posix/sem.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/sem.c	2021-04-07 16:01:26.051635847 +0800
+@@ -0,0 +1,618 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ * Copyright (C) 2014,2015 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <stddef.h>
++#include <linux/err.h>
++#include "internal.h"
++#include "thread.h"
++#include "clock.h"
++#include "sem.h"
++#include <trace/events/cobalt-posix.h>
++
++static inline struct cobalt_resources *sem_kqueue(struct cobalt_sem *sem)
++{
++	int pshared = !!(sem->flags & SEM_PSHARED);
++	return cobalt_current_resources(pshared);
++}
++
++static inline int sem_check(struct cobalt_sem *sem)
++{
++	if (sem == NULL || sem->magic != COBALT_SEM_MAGIC)
++		return -EINVAL;
++
++	if (sem->resnode.scope && sem->resnode.scope != sem_kqueue(sem))
++		return -EPERM;
++
++	return 0;
++}
++
++int __cobalt_sem_destroy(xnhandle_t handle)
++{
++	struct cobalt_sem *sem;
++	int ret = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	sem = xnregistry_lookup(handle, NULL);
++	if (!cobalt_obj_active(sem, COBALT_SEM_MAGIC, typeof(*sem))) {
++		ret = -EINVAL;
++		goto fail;
++	}
++
++	if (--sem->refs) {
++		ret = -EBUSY;
++		goto fail;
++	}
++
++	cobalt_mark_deleted(sem);
++	xnregistry_remove(sem->resnode.handle);
++	if (!sem->pathname)
++		cobalt_del_resource(&sem->resnode);
++	if (xnsynch_destroy(&sem->synchbase) == XNSYNCH_RESCHED) {
++		xnsched_run();
++		ret = 1;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (sem->pathname)
++		putname(sem->pathname);
++
++	cobalt_umm_free(&cobalt_ppd_get(!!(sem->flags & SEM_PSHARED))->umm,
++			sem->state);
++
++	xnfree(sem);
++
++	return ret;
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++struct cobalt_sem *
++__cobalt_sem_init(const char *name, struct cobalt_sem_shadow *sm,
++		  int flags, unsigned int value)
++{
++	struct cobalt_sem_state *state;
++	struct cobalt_sem *sem, *osem;
++	struct cobalt_ppd *sys_ppd;
++	int ret, sflags, pshared;
++	struct list_head *semq;
++	spl_t s;
++
++	if ((flags & SEM_PULSE) != 0 && value > 0) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	sem = xnmalloc(sizeof(*sem));
++	if (sem == NULL) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	pshared = !!(flags & SEM_PSHARED);
++	sys_ppd = cobalt_ppd_get(pshared);
++	state = cobalt_umm_alloc(&sys_ppd->umm, sizeof(*state));
++	if (state == NULL) {
++		ret = -EAGAIN;
++		goto err_free_sem;
++	}
++
++	xnlock_get_irqsave(&nklock, s);
++
++	semq = &cobalt_current_resources(pshared)->semq;
++	if ((sm->magic == COBALT_SEM_MAGIC && !list_empty(semq)) ||
++	    sm->magic == COBALT_NAMED_SEM_MAGIC) {
++		osem = xnregistry_lookup(sm->handle, NULL);
++		if (cobalt_obj_active(osem, COBALT_SEM_MAGIC, typeof(*osem))) {
++			ret = -EBUSY;
++			goto err_lock_put;
++		}
++	}
++
++	if (value > (unsigned)SEM_VALUE_MAX) {
++		ret = -EINVAL;
++		goto err_lock_put;
++	}
++
++	ret = xnregistry_enter(name ?: "", sem, &sem->resnode.handle, NULL);
++	if (ret < 0)
++		goto err_lock_put;
++
++	sem->magic = COBALT_SEM_MAGIC;
++	if (!name)
++		cobalt_add_resource(&sem->resnode, sem, pshared);
++	else
++		sem->resnode.scope = NULL;
++	sflags = flags & SEM_FIFO ? 0 : XNSYNCH_PRIO;
++	xnsynch_init(&sem->synchbase, sflags, NULL);
++
++	sem->state = state;
++	atomic_set(&state->value, value);
++	state->flags = flags;
++	sem->flags = flags;
++	sem->refs = name ? 2 : 1;
++	sem->pathname = NULL;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	__cobalt_sem_shadow_init(sem,
++			name ? COBALT_NAMED_SEM_MAGIC : COBALT_SEM_MAGIC, sm);
++
++	trace_cobalt_psem_init(name ?: "anon",
++			       sem->resnode.handle, flags, value);
++
++	return sem;
++
++err_lock_put:
++	xnlock_put_irqrestore(&nklock, s);
++	cobalt_umm_free(&sys_ppd->umm, state);
++err_free_sem:
++	xnfree(sem);
++out:
++	trace_cobalt_psem_init_failed(name ?: "anon", flags, value, ret);
++
++	return ERR_PTR(ret);
++}
++
++void __cobalt_sem_shadow_init(struct cobalt_sem *sem, __u32 magic,
++			      struct cobalt_sem_shadow *sm)
++{
++	__u32 flags = sem->state->flags;
++	struct cobalt_ppd *sys_ppd;
++
++	sys_ppd = cobalt_ppd_get(!!(flags & SEM_PSHARED));
++
++	sm->magic = magic;
++	sm->handle = sem->resnode.handle;
++	sm->state_offset = cobalt_umm_offset(&sys_ppd->umm, sem->state);
++	if (sem->state->flags & SEM_PSHARED)
++		sm->state_offset = -sm->state_offset;
++}
++
++static int sem_destroy(struct cobalt_sem_shadow *sm)
++{
++	struct cobalt_sem *sem;
++	int warn, ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (sm->magic != COBALT_SEM_MAGIC) {
++		ret = -EINVAL;
++		goto fail;
++	}
++
++	sem = xnregistry_lookup(sm->handle, NULL);
++	ret = sem_check(sem);
++	if (ret)
++		goto fail;
++
++	if ((sem->flags & SEM_NOBUSYDEL) != 0 &&
++	    xnsynch_pended_p(&sem->synchbase)) {
++		ret = -EBUSY;
++		goto fail;
++	}
++
++	warn = sem->flags & SEM_WARNDEL;
++	cobalt_mark_deleted(sm);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	ret = __cobalt_sem_destroy(sem->resnode.handle);
++
++	return warn ? ret : 0;
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++static inline int do_trywait(struct cobalt_sem *sem)
++{
++	int ret;
++	
++	ret = sem_check(sem);
++	if (ret)
++		return ret;
++
++	if (atomic_sub_return(1, &sem->state->value) < 0)
++		return -EAGAIN;
++
++	return 0;
++}
++
++static int sem_wait(xnhandle_t handle)
++{
++	struct cobalt_sem *sem;
++	int ret, info;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sem = xnregistry_lookup(handle, NULL);
++	ret = do_trywait(sem);
++	if (ret != -EAGAIN)
++		goto out;
++
++	ret = 0;
++	info = xnsynch_sleep_on(&sem->synchbase, XN_INFINITE, XN_RELATIVE);
++	if (info & XNRMID) {
++		ret = -EINVAL;
++	} else if (info & XNBREAK) {
++		atomic_inc(&sem->state->value); /* undo do_trywait() */
++		ret = -EINTR;
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++static inline int sem_fetch_timeout(struct timespec *ts,
++				    const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++}
++
++int __cobalt_sem_timedwait(struct cobalt_sem_shadow __user *u_sem,
++			   const void __user *u_ts,
++			   int (*fetch_timeout)(struct timespec *ts,
++						const void __user *u_ts))
++{
++	struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
++	int pull_ts = 1, ret, info;
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	xntmode_t tmode;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_timedwait(handle);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	for (;;) {
++		sem = xnregistry_lookup(handle, NULL);
++		ret = do_trywait(sem);
++		if (ret != -EAGAIN)
++			break;
++
++		/*
++		 * POSIX states that the validity of the timeout spec
++		 * _need_ not be checked if the semaphore can be
++		 * locked immediately, we show this behavior despite
++		 * it's actually more complex, to keep some
++		 * applications ported to Linux happy.
++		 */
++		if (pull_ts) {
++			atomic_inc(&sem->state->value);
++			xnlock_put_irqrestore(&nklock, s);
++			ret = fetch_timeout(&ts, u_ts);
++			xnlock_get_irqsave(&nklock, s);
++			if (ret)
++				break;
++			if (ts.tv_nsec >= ONE_BILLION) {
++				ret = -EINVAL;
++				break;
++			}
++			pull_ts = 0;
++			continue;
++		}
++
++		ret = 0;
++		tmode = sem->flags & SEM_RAWCLOCK ? XN_ABSOLUTE : XN_REALTIME;
++		info = xnsynch_sleep_on(&sem->synchbase, ts2ns(&ts) + 1, tmode);
++		if (info & XNRMID)
++			ret = -EINVAL;
++		else if (info & (XNBREAK|XNTIMEO)) {
++			ret = (info & XNBREAK) ? -EINTR : -ETIMEDOUT;
++			atomic_inc(&sem->state->value);
++		}
++		break;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++static int sem_post(xnhandle_t handle)
++{
++	struct cobalt_sem *sem;
++	int ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sem = xnregistry_lookup(handle, NULL);
++	ret = sem_check(sem);
++	if (ret)
++		goto out;
++
++	if (atomic_read(&sem->state->value) == SEM_VALUE_MAX) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (atomic_inc_return(&sem->state->value) <= 0) {
++		if (xnsynch_wakeup_one_sleeper(&sem->synchbase))
++			xnsched_run();
++	} else if (sem->flags & SEM_PULSE)
++		atomic_set(&sem->state->value, 0);
++out:	
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++static int sem_getvalue(xnhandle_t handle, int *value)
++{
++	struct cobalt_sem *sem;
++	int ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sem = xnregistry_lookup(handle, NULL);
++	ret = sem_check(sem);
++	if (ret) {
++		xnlock_put_irqrestore(&nklock, s);
++		return ret;
++	}
++
++	*value = atomic_read(&sem->state->value);
++	if ((sem->flags & SEM_REPORT) == 0 && *value < 0)
++		*value = 0;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++COBALT_SYSCALL(sem_init, current,
++	       (struct cobalt_sem_shadow __user *u_sem,
++		int flags, unsigned int value))
++{
++	struct cobalt_sem_shadow sm;
++	struct cobalt_sem *sem;
++
++	if (cobalt_copy_from_user(&sm, u_sem, sizeof(sm)))
++		return -EFAULT;
++
++	if (flags & ~(SEM_FIFO|SEM_PULSE|SEM_PSHARED|SEM_REPORT|\
++		      SEM_WARNDEL|SEM_RAWCLOCK|SEM_NOBUSYDEL))
++		return -EINVAL;
++
++	sem = __cobalt_sem_init(NULL, &sm, flags, value);
++	if (IS_ERR(sem))
++		return PTR_ERR(sem);
++
++	return cobalt_copy_to_user(u_sem, &sm, sizeof(*u_sem));
++}
++
++COBALT_SYSCALL(sem_post, current,
++	       (struct cobalt_sem_shadow __user *u_sem))
++{
++	xnhandle_t handle;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_post(handle);
++
++	return sem_post(handle);
++}
++
++COBALT_SYSCALL(sem_wait, primary,
++	       (struct cobalt_sem_shadow __user *u_sem))
++{
++	xnhandle_t handle;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_wait(handle);
++
++	return sem_wait(handle);
++}
++
++COBALT_SYSCALL(sem_timedwait, primary,
++	       (struct cobalt_sem_shadow __user *u_sem,
++		struct timespec __user *u_ts))
++{
++	return __cobalt_sem_timedwait(u_sem, u_ts, sem_fetch_timeout);
++}
++
++COBALT_SYSCALL(sem_trywait, primary,
++	       (struct cobalt_sem_shadow __user *u_sem))
++{
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	int ret;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_trywait(handle);
++
++	xnlock_get_irqsave(&nklock, s);
++	sem = xnregistry_lookup(handle, NULL);
++	ret = do_trywait(sem);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sem_getvalue, current,
++	       (struct cobalt_sem_shadow __user *u_sem,
++		int __user *u_sval))
++{
++	int ret, sval = -1;
++	xnhandle_t handle;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++
++	ret = sem_getvalue(handle, &sval);
++	trace_cobalt_psem_getvalue(handle, sval);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_sval, &sval, sizeof(sval));
++}
++
++COBALT_SYSCALL(sem_destroy, current,
++	       (struct cobalt_sem_shadow __user *u_sem))
++{
++	struct cobalt_sem_shadow sm;
++	int err;
++
++	if (cobalt_copy_from_user(&sm, u_sem, sizeof(sm)))
++		return -EFAULT;
++
++	trace_cobalt_psem_destroy(sm.handle);
++
++	err = sem_destroy(&sm);
++	if (err < 0)
++		return err;
++
++	return cobalt_copy_to_user(u_sem, &sm, sizeof(*u_sem)) ?: err;
++}
++
++COBALT_SYSCALL(sem_broadcast_np, current,
++	       (struct cobalt_sem_shadow __user *u_sem))
++{
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	spl_t s;
++	int ret;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_broadcast(u_sem->handle);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sem = xnregistry_lookup(handle, NULL);
++	ret = sem_check(sem);
++	if (ret == 0 && atomic_read(&sem->state->value) < 0) {
++		atomic_set(&sem->state->value, 0);
++		xnsynch_flush(&sem->synchbase, 0);
++		xnsched_run();
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(sem_inquire, current,
++	       (struct cobalt_sem_shadow __user *u_sem,
++		struct cobalt_sem_info __user *u_info,
++		pid_t __user *u_waitlist,
++		size_t waitsz))
++{
++	int val = 0, nrwait = 0, nrpids, ret = 0;
++	unsigned long pstamp, nstamp = 0;
++	struct cobalt_sem_info info;
++	pid_t *t = NULL, fbuf[16];
++	struct xnthread *thread;
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	spl_t s;
++
++	handle = cobalt_get_handle_from_user(&u_sem->handle);
++	trace_cobalt_psem_inquire(handle);
++
++	nrpids = waitsz / sizeof(pid_t);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	for (;;) {
++		pstamp = nstamp;
++		sem = xnregistry_lookup(handle, &nstamp);
++		if (sem == NULL || sem->magic != COBALT_SEM_MAGIC) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EINVAL;
++		}
++		/*
++		 * Allocate memory to return the wait list without
++		 * holding any lock, then revalidate the handle.
++		 */
++		if (t == NULL) {
++			val = atomic_read(&sem->state->value);
++			if (val >= 0 || u_waitlist == NULL)
++				break;
++			xnlock_put_irqrestore(&nklock, s);
++			if (nrpids > -val)
++				nrpids = -val;
++			if (-val <= ARRAY_SIZE(fbuf))
++				t = fbuf; /* Use fast buffer. */
++			else {
++				t = xnmalloc(-val * sizeof(pid_t));
++				if (t == NULL)
++					return -ENOMEM;
++			}
++			xnlock_get_irqsave(&nklock, s);
++		} else if (pstamp == nstamp)
++			break;
++		else if (val != atomic_read(&sem->state->value)) {
++			xnlock_put_irqrestore(&nklock, s);
++			if (t != fbuf)
++				xnfree(t);
++			t = NULL;
++			xnlock_get_irqsave(&nklock, s);
++		}
++	}
++
++	info.flags = sem->flags;
++	info.value = (sem->flags & SEM_REPORT) || val >= 0 ? val : 0;
++	info.nrwait = val < 0 ? -val : 0;
++
++	if (xnsynch_pended_p(&sem->synchbase) && u_waitlist != NULL) {
++		xnsynch_for_each_sleeper(thread, &sem->synchbase) {
++			if (nrwait >= nrpids)
++				break;
++			t[nrwait++] = xnthread_host_pid(thread);
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	ret = cobalt_copy_to_user(u_info, &info, sizeof(info));
++	if (ret == 0 && nrwait > 0)
++		ret = cobalt_copy_to_user(u_waitlist, t, nrwait * sizeof(pid_t));
++
++	if (t && t != fbuf)
++		xnfree(t);
++
++	return ret ?: nrwait;
++}
++
++void cobalt_sem_reclaim(struct cobalt_resnode *node, spl_t s)
++{
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	int named, ret;
++
++	sem = container_of(node, struct cobalt_sem, resnode);
++	named = (sem->flags & SEM_NAMED) != 0;
++	handle = node->handle;
++	xnlock_put_irqrestore(&nklock, s);
++	ret = __cobalt_sem_destroy(handle);
++	if (named && ret == -EBUSY)
++		xnregistry_unlink(xnregistry_key(handle));
++}
+--- linux/kernel/xenomai/posix/thread.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/thread.c	2021-04-07 16:01:26.046635854 +0800
+@@ -0,0 +1,953 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/types.h>
++#include <linux/cred.h>
++#include <linux/jhash.h>
++#include <linux/signal.h>
++#include <linux/jiffies.h>
++#include <linux/err.h>
++#include "internal.h"
++#include "thread.h"
++#include "sched.h"
++#include "signal.h"
++#include "timer.h"
++#include "clock.h"
++#include "sem.h"
++#define CREATE_TRACE_POINTS
++#include <trace/events/cobalt-posix.h>
++
++xnticks_t cobalt_time_slice = CONFIG_XENO_OPT_RR_QUANTUM * 1000;
++
++#define PTHREAD_HSLOTS (1 << 8)	/* Must be a power of 2 */
++
++/* Process-local index, pthread_t x mm_struct (cobalt_local_hkey). */
++struct local_thread_hash {
++	pid_t pid;
++	struct cobalt_thread *thread;
++	struct cobalt_local_hkey hkey;
++	struct local_thread_hash *next;
++};
++
++/* System-wide index on task_pid_nr(). */
++struct global_thread_hash {
++	pid_t pid;
++	struct cobalt_thread *thread;
++	struct global_thread_hash *next;
++};
++
++static struct local_thread_hash *local_index[PTHREAD_HSLOTS];
++
++static struct global_thread_hash *global_index[PTHREAD_HSLOTS];
++
++static inline struct local_thread_hash *
++thread_hash(const struct cobalt_local_hkey *hkey,
++	    struct cobalt_thread *thread, pid_t pid)
++{
++	struct global_thread_hash **ghead, *gslot;
++	struct local_thread_hash **lhead, *lslot;
++	u32 hash;
++	void *p;
++	spl_t s;
++
++	p = xnmalloc(sizeof(*lslot) + sizeof(*gslot));
++	if (p == NULL)
++		return NULL;
++
++	lslot = p;
++	lslot->hkey = *hkey;
++	lslot->thread = thread;
++	lslot->pid = pid;
++	hash = jhash2((u32 *)&lslot->hkey,
++		      sizeof(lslot->hkey) / sizeof(u32), 0);
++	lhead = &local_index[hash & (PTHREAD_HSLOTS - 1)];
++
++	gslot = p + sizeof(*lslot);
++	gslot->pid = pid;
++	gslot->thread = thread;
++	hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0);
++	ghead = &global_index[hash & (PTHREAD_HSLOTS - 1)];
++
++	xnlock_get_irqsave(&nklock, s);
++	lslot->next = *lhead;
++	*lhead = lslot;
++	gslot->next = *ghead;
++	*ghead = gslot;
++	xnlock_put_irqrestore(&nklock, s);
++
++	return lslot;
++}
++
++static inline void thread_unhash(const struct cobalt_local_hkey *hkey)
++{
++	struct global_thread_hash **gtail, *gslot;
++	struct local_thread_hash **ltail, *lslot;
++	pid_t pid;
++	u32 hash;
++	spl_t s;
++
++	hash = jhash2((u32 *) hkey, sizeof(*hkey) / sizeof(u32), 0);
++	ltail = &local_index[hash & (PTHREAD_HSLOTS - 1)];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	lslot = *ltail;
++	while (lslot &&
++	       (lslot->hkey.u_pth != hkey->u_pth ||
++		lslot->hkey.mm != hkey->mm)) {
++		ltail = &lslot->next;
++		lslot = *ltail;
++	}
++
++	if (lslot == NULL) {
++		xnlock_put_irqrestore(&nklock, s);
++		return;
++	}
++
++	*ltail = lslot->next;
++	pid = lslot->pid;
++	hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0);
++	gtail = &global_index[hash & (PTHREAD_HSLOTS - 1)];
++	gslot = *gtail;
++	while (gslot && gslot->pid != pid) {
++		gtail = &gslot->next;
++		gslot = *gtail;
++	}
++	/* gslot must be found here. */
++	XENO_BUG_ON(COBALT, !(gslot && gtail));
++	*gtail = gslot->next;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	xnfree(lslot);
++}
++
++static struct cobalt_thread *
++thread_lookup(const struct cobalt_local_hkey *hkey)
++{
++	struct local_thread_hash *lslot;
++	struct cobalt_thread *thread;
++	u32 hash;
++	spl_t s;
++
++	hash = jhash2((u32 *)hkey, sizeof(*hkey) / sizeof(u32), 0);
++	lslot = local_index[hash & (PTHREAD_HSLOTS - 1)];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	while (lslot != NULL &&
++	       (lslot->hkey.u_pth != hkey->u_pth || lslot->hkey.mm != hkey->mm))
++		lslot = lslot->next;
++
++	thread = lslot ? lslot->thread : NULL;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return thread;
++}
++
++struct cobalt_thread *cobalt_thread_find(pid_t pid) /* nklocked, IRQs off */
++{
++	struct global_thread_hash *gslot;
++	u32 hash;
++
++	hash = jhash2((u32 *)&pid, sizeof(pid) / sizeof(u32), 0);
++
++	gslot = global_index[hash & (PTHREAD_HSLOTS - 1)];
++	while (gslot && gslot->pid != pid)
++		gslot = gslot->next;
++
++	return gslot ? gslot->thread : NULL;
++}
++EXPORT_SYMBOL_GPL(cobalt_thread_find);
++
++struct cobalt_thread *cobalt_thread_find_local(pid_t pid) /* nklocked, IRQs off */
++{
++	struct cobalt_thread *thread;
++
++	thread = cobalt_thread_find(pid);
++	if (thread == NULL || thread->hkey.mm != current->mm)
++		return NULL;
++
++	return thread;
++}
++EXPORT_SYMBOL_GPL(cobalt_thread_find_local);
++
++struct cobalt_thread *cobalt_thread_lookup(unsigned long pth) /* nklocked, IRQs off */
++{
++	struct cobalt_local_hkey hkey;
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++	return thread_lookup(&hkey);
++}
++EXPORT_SYMBOL_GPL(cobalt_thread_lookup);
++
++void cobalt_thread_map(struct xnthread *curr)
++{
++	struct cobalt_thread *thread;
++
++	thread = container_of(curr, struct cobalt_thread, threadbase);
++	thread->process = cobalt_current_process();
++	XENO_BUG_ON(COBALT, thread->process == NULL);
++}
++
++struct xnthread_personality *cobalt_thread_exit(struct xnthread *curr)
++{
++	struct cobalt_thread *thread;
++	spl_t s;
++
++	thread = container_of(curr, struct cobalt_thread, threadbase);
++	/*
++	 * Unhash first, to prevent further access to the TCB from
++	 * userland.
++	 */
++	thread_unhash(&thread->hkey);
++	xnlock_get_irqsave(&nklock, s);
++	cobalt_mark_deleted(thread);
++	list_del(&thread->next);
++	xnlock_put_irqrestore(&nklock, s);
++	cobalt_signal_flush(thread);
++	xnsynch_destroy(&thread->monitor_synch);
++	xnsynch_destroy(&thread->sigwait);
++
++	return NULL;
++}
++
++struct xnthread_personality *cobalt_thread_finalize(struct xnthread *zombie)
++{
++	struct cobalt_thread *thread;
++
++	thread = container_of(zombie, struct cobalt_thread, threadbase);
++	xnfree(thread);
++
++	return NULL;
++}
++
++int __cobalt_thread_setschedparam_ex(struct cobalt_thread *thread, int policy,
++				     const struct sched_param_ex *param_ex)
++{
++	struct xnsched_class *sched_class;
++	union xnsched_policy_param param;
++	xnticks_t tslice;
++	int ret = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (!cobalt_obj_active(thread, COBALT_THREAD_MAGIC,
++			       struct cobalt_thread)) {
++		ret = -ESRCH;
++		goto out;
++	}
++
++	tslice = thread->threadbase.rrperiod;
++	sched_class = cobalt_sched_policy_param(&param, policy,
++						param_ex, &tslice);
++	if (sched_class == NULL) {
++		ret = -EINVAL;
++		goto out;
++	}
++	xnthread_set_slice(&thread->threadbase, tslice);
++	if (cobalt_call_extension(thread_setsched, &thread->extref, ret,
++				  sched_class, &param) && ret)
++		goto out;
++	ret = xnthread_set_schedparam(&thread->threadbase,
++				      sched_class, &param);
++	xnsched_run();
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++int __cobalt_thread_getschedparam_ex(struct cobalt_thread *thread,
++				     int *policy_r,
++				     struct sched_param_ex *param_ex)
++{
++	struct xnsched_class *base_class;
++	struct xnthread *base_thread;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (!cobalt_obj_active(thread, COBALT_THREAD_MAGIC,
++			       struct cobalt_thread)) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -ESRCH;
++	}
++
++	base_thread = &thread->threadbase;
++	base_class = base_thread->base_class;
++	*policy_r = base_class->policy;
++
++	param_ex->sched_priority = xnthread_base_priority(base_thread);
++	if (param_ex->sched_priority == 0) /* SCHED_FIFO/SCHED_WEAK */
++		*policy_r = SCHED_NORMAL;
++
++	if (base_class == &xnsched_class_rt) {
++		if (xnthread_test_state(base_thread, XNRRB)) {
++			ns2ts(&param_ex->sched_rr_quantum, base_thread->rrperiod);
++			*policy_r = SCHED_RR;
++		}
++		goto out;
++	}
++
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++	if (base_class == &xnsched_class_weak) {
++		if (*policy_r != SCHED_WEAK)
++			param_ex->sched_priority = -param_ex->sched_priority;
++		goto out;
++	}
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	if (base_class == &xnsched_class_sporadic) {
++		param_ex->sched_ss_low_priority = base_thread->pss->param.low_prio;
++		ns2ts(&param_ex->sched_ss_repl_period, base_thread->pss->param.repl_period);
++		ns2ts(&param_ex->sched_ss_init_budget, base_thread->pss->param.init_budget);
++		param_ex->sched_ss_max_repl = base_thread->pss->param.max_repl;
++		goto out;
++	}
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	if (base_class == &xnsched_class_tp) {
++		param_ex->sched_tp_partition =
++			base_thread->tps - base_thread->sched->tp.partitions;
++		goto out;
++	}
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	if (base_class == &xnsched_class_quota) {
++		param_ex->sched_quota_group = base_thread->quota->tgid;
++		goto out;
++	}
++#endif
++
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++static int pthread_create(struct cobalt_thread **thread_p,
++			  int policy,
++			  const struct sched_param_ex *param_ex,
++			  struct task_struct *task)
++{
++	struct cobalt_process *process = cobalt_current_process();
++	struct xnsched_class *sched_class;
++	union xnsched_policy_param param;
++	struct xnthread_init_attr iattr;
++	struct cobalt_thread *thread;
++	xnticks_t tslice;
++	int ret, n;
++	spl_t s;
++
++	thread = xnmalloc(sizeof(*thread));
++	if (thread == NULL)
++		return -EAGAIN;
++
++	tslice = cobalt_time_slice;
++	sched_class = cobalt_sched_policy_param(&param, policy,
++						param_ex, &tslice);
++	if (sched_class == NULL) {
++		xnfree(thread);
++		return -EINVAL;
++	}
++
++	iattr.name = task->comm;
++	iattr.flags = XNUSER|XNFPU;
++	iattr.personality = &cobalt_personality;
++	iattr.affinity = CPU_MASK_ALL;
++	ret = xnthread_init(&thread->threadbase, &iattr, sched_class, &param);
++	if (ret) {
++		xnfree(thread);
++		return ret;
++	}
++
++	thread->magic = COBALT_THREAD_MAGIC;
++	xnsynch_init(&thread->monitor_synch, XNSYNCH_FIFO, NULL);
++
++	xnsynch_init(&thread->sigwait, XNSYNCH_FIFO, NULL);
++	sigemptyset(&thread->sigpending);
++	for (n = 0; n < _NSIG; n++)
++		INIT_LIST_HEAD(thread->sigqueues + n);
++
++	xnthread_set_slice(&thread->threadbase, tslice);
++	cobalt_set_extref(&thread->extref, NULL, NULL);
++
++	/*
++	 * We need an anonymous registry entry to obtain a handle for
++	 * fast mutex locking.
++	 */
++	ret = xnthread_register(&thread->threadbase, "");
++	if (ret) {
++		xnsynch_destroy(&thread->monitor_synch);
++		xnsynch_destroy(&thread->sigwait);
++		xnfree(thread);
++		return ret;
++	}
++
++	xnlock_get_irqsave(&nklock, s);
++	list_add_tail(&thread->next, process ? &process->thread_list
++					     : &cobalt_global_thread_list);
++	xnlock_put_irqrestore(&nklock, s);
++
++	thread->hkey.u_pth = 0;
++	thread->hkey.mm = NULL;
++
++	*thread_p = thread;
++
++	return 0;
++}
++
++static void pthread_discard(struct cobalt_thread *thread)
++{
++	spl_t s;
++
++	xnsynch_destroy(&thread->monitor_synch);
++	xnsynch_destroy(&thread->sigwait);
++
++	xnlock_get_irqsave(&nklock, s);
++	list_del(&thread->next);
++	xnlock_put_irqrestore(&nklock, s);
++	__xnthread_discard(&thread->threadbase);
++	xnfree(thread);
++}
++
++static inline int pthread_setmode_np(int clrmask, int setmask, int *mode_r)
++{
++	const int valid_flags = XNLOCK|XNWARN|XNTRAPLB;
++	int old;
++
++	/*
++	 * The conforming mode bit is actually zero, since jumping to
++	 * this code entailed switching to primary mode already.
++	 */
++	if ((clrmask & ~valid_flags) != 0 || (setmask & ~valid_flags) != 0)
++		return -EINVAL;
++
++	old = xnthread_set_mode(clrmask, setmask);
++	if (mode_r)
++		*mode_r = old;
++
++	if ((clrmask & ~setmask) & XNLOCK)
++		/* Reschedule if the scheduler has been unlocked. */
++		xnsched_run();
++
++	return 0;
++}
++
++static struct cobalt_thread *thread_lookup_or_shadow(unsigned long pth,
++						     __u32 __user *u_winoff,
++						     int *promoted_r)
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++
++	*promoted_r = 0;
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++
++	thread = thread_lookup(&hkey);
++	if (thread == NULL) {
++		if (u_winoff == NULL)
++			return ERR_PTR(-ESRCH);
++			
++		thread = cobalt_thread_shadow(&hkey, u_winoff);
++		if (!IS_ERR(thread))
++			*promoted_r = 1;
++	}
++
++	return thread;
++}
++
++int cobalt_thread_setschedparam_ex(unsigned long pth,
++				   int policy,
++				   const struct sched_param_ex *param_ex,
++				   __u32 __user *u_winoff,
++				   int __user *u_promoted)
++{
++	struct cobalt_thread *thread;
++	int ret, promoted;
++
++	trace_cobalt_pthread_setschedparam(pth, policy, param_ex);
++
++	thread = thread_lookup_or_shadow(pth, u_winoff, &promoted);
++	if (IS_ERR(thread))
++		return PTR_ERR(thread);
++
++	ret = __cobalt_thread_setschedparam_ex(thread, policy, param_ex);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted));
++}
++
++COBALT_SYSCALL(thread_setschedparam_ex, conforming,
++	       (unsigned long pth,
++		int policy,
++		const struct sched_param_ex __user *u_param,
++		__u32 __user *u_winoff,
++		int __user *u_promoted))
++{
++	struct sched_param_ex param_ex;
++
++	if (cobalt_copy_from_user(&param_ex, u_param, sizeof(param_ex)))
++		return -EFAULT;
++
++	return cobalt_thread_setschedparam_ex(pth, policy, &param_ex,
++					      u_winoff, u_promoted);
++}
++
++int cobalt_thread_getschedparam_ex(unsigned long pth,
++				   int *policy_r,
++				   struct sched_param_ex *param_ex)
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	int ret;
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++	thread = thread_lookup(&hkey);
++	if (thread == NULL)
++		return -ESRCH;
++
++	ret = __cobalt_thread_getschedparam_ex(thread, policy_r, param_ex);
++	if (ret)
++		return ret;
++
++	trace_cobalt_pthread_getschedparam(pth, *policy_r, param_ex);
++
++	return 0;
++}
++
++COBALT_SYSCALL(thread_getschedparam_ex, current,
++	       (unsigned long pth,
++		int __user *u_policy,
++		struct sched_param_ex __user *u_param))
++{
++	struct sched_param_ex param_ex;
++	int ret, policy;
++
++	ret = cobalt_thread_getschedparam_ex(pth, &policy, &param_ex);
++	if (ret)
++		return ret;
++
++	ret = cobalt_copy_to_user(u_policy, &policy, sizeof(policy));
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_param, &param_ex, sizeof(param_ex));
++}
++
++int cobalt_thread_setschedprio(unsigned long pth,
++			       int prio,
++			       __u32 __user *u_winoff,
++			       int __user *u_promoted)
++{
++	struct sched_param_ex param_ex;
++	struct cobalt_thread *thread;
++	int ret, policy, promoted;
++
++	trace_cobalt_pthread_setschedprio(pth, prio);
++
++	thread = thread_lookup_or_shadow(pth, u_winoff, &promoted);
++	if (IS_ERR(thread))
++		return PTR_ERR(thread);
++
++	ret = __cobalt_thread_getschedparam_ex(thread, &policy, &param_ex);
++	if (ret)
++		return ret;
++
++	param_ex.sched_priority = prio;
++
++	ret = __cobalt_thread_setschedparam_ex(thread, policy, &param_ex);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_promoted, &promoted, sizeof(promoted));
++}
++
++COBALT_SYSCALL(thread_setschedprio, conforming,
++	       (unsigned long pth,
++		int prio,
++		__u32 __user *u_winoff,
++		int __user *u_promoted))
++{
++	return cobalt_thread_setschedprio(pth, prio, u_winoff, u_promoted);
++}
++
++int __cobalt_thread_create(unsigned long pth, int policy,
++			   struct sched_param_ex *param_ex,
++			   int xid, __u32 __user *u_winoff)
++{
++	struct cobalt_thread *thread = NULL;
++	struct task_struct *p = current;
++	struct cobalt_local_hkey hkey;
++	int ret;
++
++	trace_cobalt_pthread_create(pth, policy, param_ex);
++
++	/*
++	 * We have been passed the pthread_t identifier the user-space
++	 * Cobalt library has assigned to our caller; we'll index our
++	 * internal pthread_t descriptor in kernel space on it.
++	 */
++	hkey.u_pth = pth;
++	hkey.mm = p->mm;
++
++	ret = pthread_create(&thread, policy, param_ex, p);
++	if (ret)
++		return ret;
++
++	ret = cobalt_map_user(&thread->threadbase, u_winoff);
++	if (ret) {
++		pthread_discard(thread);
++		return ret;
++	}
++
++	if (!thread_hash(&hkey, thread, task_pid_vnr(p))) {
++		ret = -EAGAIN;
++		goto fail;
++	}
++
++	thread->hkey = hkey;
++
++	if (xid > 0 && cobalt_push_personality(xid) == NULL) {
++		ret = -EINVAL;
++		goto fail;
++	}
++
++	return xnthread_harden();
++fail:
++	xnthread_cancel(&thread->threadbase);
++
++	return ret;
++}
++
++COBALT_SYSCALL(thread_create, init,
++	       (unsigned long pth, int policy,
++		struct sched_param_ex __user *u_param,
++		int xid,
++		__u32 __user *u_winoff))
++{
++	struct sched_param_ex param_ex;
++	int ret;
++
++	ret = cobalt_copy_from_user(&param_ex, u_param, sizeof(param_ex));
++	if (ret)
++		return ret;
++
++	return __cobalt_thread_create(pth, policy, &param_ex, xid, u_winoff);
++}
++
++struct cobalt_thread *
++cobalt_thread_shadow(struct cobalt_local_hkey *hkey,
++		     __u32 __user *u_winoff)
++{
++	struct cobalt_thread *thread = NULL;
++	struct sched_param_ex param_ex;
++	int ret;
++
++	if (xnthread_current())
++		return ERR_PTR(-EBUSY);
++
++	param_ex.sched_priority = 0;
++	trace_cobalt_pthread_create(hkey->u_pth, SCHED_NORMAL, &param_ex);
++	ret = pthread_create(&thread, SCHED_NORMAL, &param_ex, current);
++	if (ret)
++		return ERR_PTR(ret);
++
++	ret = cobalt_map_user(&thread->threadbase, u_winoff);
++	if (ret) {
++		pthread_discard(thread);
++		return ERR_PTR(ret);
++	}
++
++	if (!thread_hash(hkey, thread, task_pid_vnr(current))) {
++		ret = -EAGAIN;
++		goto fail;
++	}
++
++	thread->hkey = *hkey;
++
++	xnthread_harden();
++
++	return thread;
++fail:
++	xnthread_cancel(&thread->threadbase);
++
++	return ERR_PTR(ret);
++}
++
++COBALT_SYSCALL(thread_setmode, primary,
++	       (int clrmask, int setmask, int __user *u_mode_r))
++{
++	int ret, old;
++
++	trace_cobalt_pthread_setmode(clrmask, setmask);
++
++	ret = pthread_setmode_np(clrmask, setmask, &old);
++	if (ret)
++		return ret;
++
++	if (u_mode_r && cobalt_copy_to_user(u_mode_r, &old, sizeof(old)))
++		return -EFAULT;
++
++	return 0;
++}
++
++COBALT_SYSCALL(thread_setname, current,
++	       (unsigned long pth, const char __user *u_name))
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	char name[XNOBJECT_NAME_LEN];
++	struct task_struct *p;
++	spl_t s;
++
++	if (cobalt_strncpy_from_user(name, u_name,
++				     sizeof(name) - 1) < 0)
++		return -EFAULT;
++
++	name[sizeof(name) - 1] = '\0';
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++
++	trace_cobalt_pthread_setname(pth, name);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	thread = thread_lookup(&hkey);
++	if (thread == NULL) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -ESRCH;
++	}
++
++	ksformat(thread->threadbase.name,
++		 XNOBJECT_NAME_LEN - 1, "%s", name);
++	p = xnthread_host_task(&thread->threadbase);
++	get_task_struct(p);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	knamecpy(p->comm, name);
++	put_task_struct(p);
++
++	return 0;
++}
++
++COBALT_SYSCALL(thread_kill, conforming,
++	       (unsigned long pth, int sig))
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	int ret;
++	spl_t s;
++
++	trace_cobalt_pthread_kill(pth, sig);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++	thread = thread_lookup(&hkey);
++	if (thread == NULL)
++		ret = -ESRCH;
++	else
++		ret = __cobalt_kill(thread, sig, 0);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++COBALT_SYSCALL(thread_join, primary, (unsigned long pth))
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	spl_t s;
++
++	trace_cobalt_pthread_join(pth);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++	thread = thread_lookup(&hkey);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (thread == NULL)
++		return -ESRCH;
++
++	return xnthread_join(&thread->threadbase, false);
++}
++
++COBALT_SYSCALL(thread_getpid, current, (unsigned long pth))
++{
++	struct cobalt_local_hkey hkey;
++	struct cobalt_thread *thread;
++	pid_t pid;
++	spl_t s;
++
++	trace_cobalt_pthread_pid(pth);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	hkey.u_pth = pth;
++	hkey.mm = current->mm;
++	thread = thread_lookup(&hkey);
++	if (thread == NULL)
++		pid = -ESRCH;
++	else
++		pid = xnthread_host_pid(&thread->threadbase);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return pid;
++}
++
++COBALT_SYSCALL(thread_getstat, current,
++	       (pid_t pid, struct cobalt_threadstat __user *u_stat))
++{
++	struct cobalt_threadstat stat;
++	struct cobalt_thread *p;
++	struct xnthread *thread;
++	xnticks_t xtime;
++	spl_t s;
++
++	trace_cobalt_pthread_stat(pid);
++
++	if (pid == 0) {
++		thread = xnthread_current();
++		if (thread == NULL)
++			return -EPERM;
++		xnlock_get_irqsave(&nklock, s);
++	} else {
++		xnlock_get_irqsave(&nklock, s);
++		p = cobalt_thread_find(pid);
++		if (p == NULL) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -ESRCH;
++		}
++		thread = &p->threadbase;
++	}
++
++	/* We have to hold the nklock to keep most values consistent. */
++	stat.cpu = xnsched_cpu(thread->sched);
++	stat.cprio = xnthread_current_priority(thread);
++	xtime = xnstat_exectime_get_total(&thread->stat.account);
++	if (thread->sched->curr == thread)
++		xtime += xnstat_exectime_now() -
++			xnstat_exectime_get_last_switch(thread->sched);
++	stat.xtime = xnclock_ticks_to_ns(&nkclock, xtime);
++	stat.msw = xnstat_counter_get(&thread->stat.ssw);
++	stat.csw = xnstat_counter_get(&thread->stat.csw);
++	stat.xsc = xnstat_counter_get(&thread->stat.xsc);
++	stat.pf = xnstat_counter_get(&thread->stat.pf);
++	stat.status = xnthread_get_state(thread);
++	if (thread->lock_count > 0)
++		stat.status |= XNLOCK;
++	stat.timeout = xnthread_get_timeout(thread,
++					    xnclock_read_monotonic(&nkclock));
++	strcpy(stat.name, thread->name);
++	strcpy(stat.personality, thread->personality->name);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return cobalt_copy_to_user(u_stat, &stat, sizeof(stat));
++}
++
++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION
++
++int cobalt_thread_extend(struct cobalt_extension *ext,
++			 void *priv)
++{
++	struct cobalt_thread *thread = cobalt_current_thread();
++	struct xnthread_personality *prev;
++
++	trace_cobalt_pthread_extend(thread->hkey.u_pth, ext->core.name);
++
++	prev = cobalt_push_personality(ext->core.xid);
++	if (prev == NULL)
++		return -EINVAL;
++
++	cobalt_set_extref(&thread->extref, ext, priv);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(cobalt_thread_extend);
++
++void cobalt_thread_restrict(void)
++{
++	struct cobalt_thread *thread = cobalt_current_thread();
++
++	trace_cobalt_pthread_restrict(thread->hkey.u_pth,
++		      thread->threadbase.personality->name);
++	cobalt_pop_personality(&cobalt_personality);
++	cobalt_set_extref(&thread->extref, NULL, NULL);
++}
++EXPORT_SYMBOL_GPL(cobalt_thread_restrict);
++
++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */
++
++const char *cobalt_trace_parse_sched_params(struct trace_seq *p, int policy,
++					    struct sched_param_ex *params)
++{
++	const char *ret = trace_seq_buffer_ptr(p);
++
++	switch (policy) {
++	case SCHED_QUOTA:
++		trace_seq_printf(p, "priority=%d, group=%d",
++				 params->sched_priority,
++				 params->sched_quota_group);
++		break;
++	case SCHED_TP:
++		trace_seq_printf(p, "priority=%d, partition=%d",
++				 params->sched_priority,
++				 params->sched_tp_partition);
++		break;
++	case SCHED_NORMAL:
++		break;
++	case SCHED_SPORADIC:
++		trace_seq_printf(p, "priority=%d, low_priority=%d, "
++				 "budget=(%ld.%09ld), period=(%ld.%09ld), "
++				 "maxrepl=%d",
++				 params->sched_priority,
++				 params->sched_ss_low_priority,
++				 params->sched_ss_init_budget.tv_sec,
++				 params->sched_ss_init_budget.tv_nsec,
++				 params->sched_ss_repl_period.tv_sec,
++				 params->sched_ss_repl_period.tv_nsec,
++				 params->sched_ss_max_repl);
++		break;
++	case SCHED_RR:
++	case SCHED_FIFO:
++	case SCHED_COBALT:
++	case SCHED_WEAK:
++	default:
++		trace_seq_printf(p, "priority=%d", params->sched_priority);
++		break;
++	}
++	trace_seq_putc(p, '\0');
++
++	return ret;
++}
+--- linux/kernel/xenomai/posix/timer.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/timer.c	2021-04-07 16:01:26.041635861 +0800
+@@ -0,0 +1,588 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/cred.h>
++#include <linux/err.h>
++#include "internal.h"
++#include "thread.h"
++#include "timer.h"
++#include "clock.h"
++#include "signal.h"
++
++void cobalt_timer_handler(struct xntimer *xntimer)
++{
++	struct cobalt_timer *timer;
++	/*
++	 * Deliver the timer notification via a signal (unless
++	 * SIGEV_NONE was given). If we can't do this because the
++	 * target thread disappeared, then stop the timer. It will go
++	 * away when timer_delete() is called, or the owner's process
++	 * exits, whichever comes first.
++	 */
++	timer = container_of(xntimer, struct cobalt_timer, timerbase);
++	if (timer->sigp.si.si_signo &&
++	    cobalt_signal_send_pid(timer->target, &timer->sigp) == -ESRCH)
++		xntimer_stop(&timer->timerbase);
++}
++EXPORT_SYMBOL_GPL(cobalt_timer_handler);
++
++static inline struct cobalt_thread *
++timer_init(struct cobalt_timer *timer,
++	   const struct sigevent *__restrict__ evp) /* nklocked, IRQs off. */
++{
++	struct cobalt_thread *owner = cobalt_current_thread(), *target = NULL;
++	struct xnclock *clock;
++
++	/*
++	 * First, try to offload this operation to the extended
++	 * personality the current thread might originate from.
++	 */
++	if (cobalt_initcall_extension(timer_init, &timer->extref,
++				      owner, target, evp) && target)
++		return target;
++
++	/*
++	 * Ok, we have no extension available, or we do but it does
++	 * not want to overload the standard behavior: handle this
++	 * timer the pure Cobalt way then.
++	 */
++	if (evp == NULL || evp->sigev_notify == SIGEV_NONE) {
++		target = owner;	/* Assume SIGEV_THREAD_ID. */
++		goto init;
++	}
++
++	if (evp->sigev_notify != SIGEV_THREAD_ID)
++		return ERR_PTR(-EINVAL);
++
++	/*
++	 * Recipient thread must be a Xenomai shadow in user-space,
++	 * living in the same process than our caller.
++	 */
++	target = cobalt_thread_find_local(evp->sigev_notify_thread_id);
++	if (target == NULL)
++		return ERR_PTR(-EINVAL);
++init:
++	clock = cobalt_clock_find(timer->clockid);
++	if (IS_ERR(clock))
++		return ERR_PTR(PTR_ERR(clock));
++
++	xntimer_init(&timer->timerbase, clock, cobalt_timer_handler,
++		     target->threadbase.sched, XNTIMER_UGRAVITY);
++
++	return target;
++}
++
++static inline int timer_alloc_id(struct cobalt_process *cc)
++{
++	int id;
++
++	id = find_first_bit(cc->timers_map, CONFIG_XENO_OPT_NRTIMERS);
++	if (id == CONFIG_XENO_OPT_NRTIMERS)
++		return -EAGAIN;
++
++	__clear_bit(id, cc->timers_map);
++
++	return id;
++}
++
++static inline void timer_free_id(struct cobalt_process *cc, int id)
++{
++	__set_bit(id, cc->timers_map);
++}
++
++struct cobalt_timer *
++cobalt_timer_by_id(struct cobalt_process *cc, timer_t timer_id)
++{
++	if (timer_id < 0 || timer_id >= CONFIG_XENO_OPT_NRTIMERS)
++		return NULL;
++
++	if (test_bit(timer_id, cc->timers_map))
++		return NULL;
++
++	return cc->timers[timer_id];
++}
++
++static inline int timer_create(clockid_t clockid,
++			       const struct sigevent *__restrict__ evp,
++			       timer_t * __restrict__ timerid)
++{
++	struct cobalt_process *cc;
++	struct cobalt_thread *target;
++	struct cobalt_timer *timer;
++	int signo, ret = -EINVAL;
++	timer_t timer_id;
++	spl_t s;
++
++	cc = cobalt_current_process();
++	if (cc == NULL)
++		return -EPERM;
++
++	timer = xnmalloc(sizeof(*timer));
++	if (timer == NULL)
++		return -ENOMEM;
++
++	timer->sigp.si.si_errno = 0;
++	timer->sigp.si.si_code = SI_TIMER;
++	timer->sigp.si.si_overrun = 0;
++	INIT_LIST_HEAD(&timer->sigp.next);
++	timer->clockid = clockid;
++	timer->overruns = 0;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	ret = timer_alloc_id(cc);
++	if (ret < 0)
++		goto out;
++
++	timer_id = ret;
++
++	if (evp == NULL) {
++		timer->sigp.si.si_int = timer_id;
++		signo = SIGALRM;
++	} else {
++		if (evp->sigev_notify == SIGEV_NONE)
++			signo = 0; /* Don't notify. */
++		else {
++			signo = evp->sigev_signo;
++			if (signo < 1 || signo > _NSIG) {
++				ret = -EINVAL;
++				goto fail;
++			}
++			timer->sigp.si.si_value = evp->sigev_value;
++		}
++	}
++
++	timer->sigp.si.si_signo = signo;
++	timer->sigp.si.si_tid = timer_id;
++	timer->id = timer_id;
++
++	target = timer_init(timer, evp);
++	if (target == NULL) {
++		ret = -EPERM;
++		goto fail;
++	}
++
++	if (IS_ERR(target)) {
++		ret = PTR_ERR(target);
++		goto fail;
++	}
++
++	timer->target = xnthread_host_pid(&target->threadbase);
++	cc->timers[timer_id] = timer;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	*timerid = timer_id;
++
++	return 0;
++fail:
++	timer_free_id(cc, timer_id);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	xnfree(timer);
++
++	return ret;
++}
++
++static void timer_cleanup(struct cobalt_process *p, struct cobalt_timer *timer)
++{
++	xntimer_destroy(&timer->timerbase);
++
++	if (!list_empty(&timer->sigp.next))
++		list_del(&timer->sigp.next);
++
++	timer_free_id(p, cobalt_timer_id(timer));
++	p->timers[cobalt_timer_id(timer)] = NULL;
++}
++
++static inline int
++timer_delete(timer_t timerid)
++{
++	struct cobalt_process *cc;
++	struct cobalt_timer *timer;
++	int ret = 0;
++	spl_t s;
++
++	cc = cobalt_current_process();
++	if (cc == NULL)
++		return -EPERM;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	timer = cobalt_timer_by_id(cc, timerid);
++	if (timer == NULL) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EINVAL;
++	}
++	/*
++	 * If an extension runs and actually handles the deletion, we
++	 * should not call the timer_cleanup extension handler for
++	 * this timer, but we shall destroy the core timer. If the
++	 * handler returns on error, the whole deletion process is
++	 * aborted, leaving the timer untouched. In all other cases,
++	 * we do the core timer cleanup work, firing the timer_cleanup
++	 * extension handler if defined.
++	 */
++  	if (cobalt_call_extension(timer_delete, &timer->extref, ret) && ret < 0)
++		goto out;
++
++	if (ret == 0)
++		cobalt_call_extension(timer_cleanup, &timer->extref, ret);
++	else
++		ret = 0;
++
++	timer_cleanup(cc, timer);
++	xnlock_put_irqrestore(&nklock, s);
++	xnfree(timer);
++
++	return ret;
++
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++void __cobalt_timer_getval(struct xntimer *__restrict__ timer,
++			   struct itimerspec *__restrict__ value)
++{
++	ns2ts(&value->it_interval, xntimer_interval(timer));
++
++	if (!xntimer_running_p(timer)) {
++		value->it_value.tv_sec = 0;
++		value->it_value.tv_nsec = 0;
++	} else {
++		ns2ts(&value->it_value, xntimer_get_timeout(timer));
++	}
++}
++
++static inline void
++timer_gettimeout(struct cobalt_timer *__restrict__ timer,
++		 struct itimerspec *__restrict__ value)
++{
++	int ret = 0;
++
++	if (cobalt_call_extension(timer_gettime, &timer->extref,
++				  ret, value) && ret != 0)
++		return;
++
++	__cobalt_timer_getval(&timer->timerbase, value);
++}
++
++int __cobalt_timer_setval(struct xntimer *__restrict__ timer, int clock_flag,
++			  const struct itimerspec *__restrict__ value)
++{
++	xnticks_t start, period;
++
++	if (value->it_value.tv_nsec == 0 && value->it_value.tv_sec == 0) {
++		xntimer_stop(timer);
++		return 0;
++	}
++
++	if ((unsigned long)value->it_value.tv_nsec >= ONE_BILLION ||
++	    ((unsigned long)value->it_interval.tv_nsec >= ONE_BILLION &&
++	     (value->it_value.tv_sec != 0 || value->it_value.tv_nsec != 0)))
++		return -EINVAL;
++
++	start = ts2ns(&value->it_value) + 1;
++	period = ts2ns(&value->it_interval);
++
++	/*
++	 * Now start the timer. If the timeout data has already
++	 * passed, the caller will handle the case.
++	 */
++	return xntimer_start(timer, start, period, clock_flag);
++}
++
++static inline int timer_set(struct cobalt_timer *timer, int flags,
++			    const struct itimerspec *__restrict__ value)
++{				/* nklocked, IRQs off. */
++	struct cobalt_thread *thread;
++	int ret = 0;
++
++	/* First, try offloading the work to an extension. */
++
++	if (cobalt_call_extension(timer_settime, &timer->extref,
++				  ret, value, flags) && ret != 0)
++		return ret < 0 ? ret : 0;
++
++	/*
++	 * No extension, or operation not handled. Default to plain
++	 * POSIX behavior.
++	 *
++	 * If the target thread vanished, just don't start the timer.
++	 */
++	thread = cobalt_thread_find(timer->target);
++	if (thread == NULL)
++		return 0;
++
++	/*
++	 * Make the timer affine to the CPU running the thread to be
++	 * signaled if possible.
++	 */
++	xntimer_set_affinity(&timer->timerbase, thread->threadbase.sched);
++
++	return __cobalt_timer_setval(&timer->timerbase,
++				     clock_flag(flags, timer->clockid), value);
++}
++
++static inline void
++timer_deliver_late(struct cobalt_process *cc, timer_t timerid)
++{
++	struct cobalt_timer *timer;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	/*
++	 * We dropped the lock shortly, revalidate the timer handle in
++	 * case a deletion slipped in.
++	 */
++	timer = cobalt_timer_by_id(cc, timerid);
++	if (timer)
++		cobalt_timer_handler(&timer->timerbase);
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++int __cobalt_timer_settime(timer_t timerid, int flags,
++			   const struct itimerspec *__restrict__ value,
++			   struct itimerspec *__restrict__ ovalue)
++{
++	struct cobalt_timer *timer;
++	struct cobalt_process *cc;
++	int ret;
++	spl_t s;
++
++	cc = cobalt_current_process();
++	XENO_BUG_ON(COBALT, cc == NULL);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	timer = cobalt_timer_by_id(cc, timerid);
++	if (timer == NULL) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (ovalue)
++		timer_gettimeout(timer, ovalue);
++
++	ret = timer_set(timer, flags, value);
++	if (ret == -ETIMEDOUT) {
++		/*
++		 * Time has already passed, deliver a notification
++		 * immediately. Since we are about to dive into the
++		 * signal machinery for this, let's drop the nklock to
++		 * break the atomic section temporarily.
++		 */
++		xnlock_put_irqrestore(&nklock, s);
++		timer_deliver_late(cc, timerid);
++		return 0;
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++int __cobalt_timer_gettime(timer_t timerid, struct itimerspec *value)
++{
++	struct cobalt_timer *timer;
++	struct cobalt_process *cc;
++	spl_t s;
++
++	cc = cobalt_current_process();
++	if (cc == NULL)
++		return -EPERM;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	timer = cobalt_timer_by_id(cc, timerid);
++	if (timer == NULL)
++		goto fail;
++
++	timer_gettimeout(timer, value);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return -EINVAL;
++}
++
++COBALT_SYSCALL(timer_delete, current, (timer_t timerid))
++{
++	return timer_delete(timerid);
++}
++
++int __cobalt_timer_create(clockid_t clock,
++			  const struct sigevent *sev,
++			  timer_t __user *u_tm)
++{
++	timer_t timerid = 0;
++	int ret;
++
++	ret = timer_create(clock, sev, &timerid);
++	if (ret)
++		return ret;
++
++	if (cobalt_copy_to_user(u_tm, &timerid, sizeof(timerid))) {
++		timer_delete(timerid);
++		return -EFAULT;
++	}
++
++	return 0;
++}
++
++COBALT_SYSCALL(timer_create, current,
++	       (clockid_t clock,
++		const struct sigevent __user *u_sev,
++		timer_t __user *u_tm))
++{
++	struct sigevent sev, *evp = NULL;
++
++	if (u_sev) {
++		evp = &sev;
++		if (cobalt_copy_from_user(&sev, u_sev, sizeof(sev)))
++			return -EFAULT;
++	}
++
++	return __cobalt_timer_create(clock, evp, u_tm);
++}
++
++COBALT_SYSCALL(timer_settime, primary,
++	       (timer_t tm, int flags,
++		const struct itimerspec __user *u_newval,
++		struct itimerspec __user *u_oldval))
++{
++	struct itimerspec newv, oldv, *oldvp = &oldv;
++	int ret;
++
++	if (u_oldval == NULL)
++		oldvp = NULL;
++
++	if (cobalt_copy_from_user(&newv, u_newval, sizeof(newv)))
++		return -EFAULT;
++
++	ret = __cobalt_timer_settime(tm, flags, &newv, oldvp);
++	if (ret)
++		return ret;
++
++	if (oldvp && cobalt_copy_to_user(u_oldval, oldvp, sizeof(oldv))) {
++		__cobalt_timer_settime(tm, flags, oldvp, NULL);
++		return -EFAULT;
++	}
++
++	return 0;
++}
++
++COBALT_SYSCALL(timer_gettime, current,
++	       (timer_t tm, struct itimerspec __user *u_val))
++{
++	struct itimerspec val;
++	int ret;
++
++	ret = __cobalt_timer_gettime(tm, &val);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_val, &val, sizeof(val));
++}
++
++COBALT_SYSCALL(timer_getoverrun, current, (timer_t timerid))
++{
++	struct cobalt_timer *timer;
++	struct cobalt_process *cc;
++	int overruns;
++	spl_t s;
++
++	cc = cobalt_current_process();
++	if (cc == NULL)
++		return -EPERM;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	timer = cobalt_timer_by_id(cc, timerid);
++	if (timer == NULL)
++		goto fail;
++
++	overruns = timer->overruns;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return overruns;
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return -EINVAL;
++}
++
++int cobalt_timer_deliver(struct cobalt_thread *waiter, timer_t timerid) /* nklocked, IRQs off. */
++{
++	struct cobalt_timer *timer;
++	xnticks_t now;
++
++	timer = cobalt_timer_by_id(cobalt_current_process(), timerid);
++	if (timer == NULL)
++		/* Killed before ultimate delivery, who cares then? */
++		return 0;
++
++	if (!xntimer_periodic_p(&timer->timerbase))
++		timer->overruns = 0;
++	else {
++		now = xnclock_read_raw(xntimer_clock(&timer->timerbase));
++		timer->overruns = xntimer_get_overruns(&timer->timerbase,
++					       &waiter->threadbase, now);
++		if ((unsigned int)timer->overruns > COBALT_DELAYMAX)
++			timer->overruns = COBALT_DELAYMAX;
++	}
++
++	return timer->overruns;
++}
++
++void cobalt_timer_reclaim(struct cobalt_process *p)
++{
++	struct cobalt_timer *timer;
++	unsigned id;
++	spl_t s;
++	int ret;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (find_first_zero_bit(p->timers_map, CONFIG_XENO_OPT_NRTIMERS) ==
++		CONFIG_XENO_OPT_NRTIMERS)
++		goto out;
++
++	for (id = 0; id < ARRAY_SIZE(p->timers); id++) {
++		timer = cobalt_timer_by_id(p, id);
++		if (timer == NULL)
++			continue;
++
++		cobalt_call_extension(timer_cleanup, &timer->extref, ret);
++		timer_cleanup(p, timer);
++		xnlock_put_irqrestore(&nklock, s);
++		xnfree(timer);
++		xnlock_get_irqsave(&nklock, s);
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++}
+--- linux/kernel/xenomai/posix/timer.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/timer.h	2021-04-07 16:01:26.036635868 +0800
+@@ -0,0 +1,86 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_TIMER_H
++#define _COBALT_POSIX_TIMER_H
++
++#include <linux/types.h>
++#include <linux/time.h>
++#include <linux/list.h>
++#include <cobalt/kernel/timer.h>
++#include <xenomai/posix/signal.h>
++#include <xenomai/posix/syscall.h>
++
++struct cobalt_timer {
++	struct xntimer timerbase;
++	timer_t id;
++	int overruns;
++	clockid_t clockid;
++	pid_t target;
++	struct cobalt_sigpending sigp;
++	struct cobalt_extref extref;
++};
++
++int cobalt_timer_deliver(struct cobalt_thread *waiter,
++			 timer_t timerid);
++
++void cobalt_timer_reclaim(struct cobalt_process *p);
++
++static inline timer_t cobalt_timer_id(const struct cobalt_timer *timer)
++{
++	return timer->id;
++}
++
++struct cobalt_timer *
++cobalt_timer_by_id(struct cobalt_process *p, timer_t timer_id);
++
++void cobalt_timer_handler(struct xntimer *xntimer);
++
++void __cobalt_timer_getval(struct xntimer *__restrict__ timer, 
++			   struct itimerspec *__restrict__ value);
++
++int __cobalt_timer_setval(struct xntimer *__restrict__ timer, int clock_flag, 
++			  const struct itimerspec *__restrict__ value);
++
++int __cobalt_timer_create(clockid_t clock,
++			  const struct sigevent *sev,
++			  timer_t __user *u_tm);
++
++int __cobalt_timer_settime(timer_t timerid, int flags,
++			   const struct itimerspec *__restrict__ value,
++			   struct itimerspec *__restrict__ ovalue);
++
++int __cobalt_timer_gettime(timer_t timerid, struct itimerspec *value);
++
++COBALT_SYSCALL_DECL(timer_create,
++		    (clockid_t clock,
++		     const struct sigevent __user *u_sev,
++		     timer_t __user *u_tm));
++
++COBALT_SYSCALL_DECL(timer_delete, (timer_t tm));
++
++COBALT_SYSCALL_DECL(timer_settime,
++		    (timer_t tm, int flags,
++		     const struct itimerspec __user *u_newval,
++		     struct itimerspec __user *u_oldval));
++
++COBALT_SYSCALL_DECL(timer_gettime,
++		    (timer_t tm, struct itimerspec __user *u_val));
++
++COBALT_SYSCALL_DECL(timer_getoverrun, (timer_t tm));
++
++#endif /* !_COBALT_POSIX_TIMER_H */
+--- linux/kernel/xenomai/posix/gen-syscall-entries.sh	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/gen-syscall-entries.sh	2021-04-07 16:01:26.032635874 +0800
+@@ -0,0 +1,32 @@
++#! /bin/sh
++
++set -e
++
++shift
++
++awk '
++match($0, /COBALT_SYSCALL\([^,]*,[ \t]*[^,]*/)  {
++	str=substr($0, RSTART + 15, RLENGTH - 15)
++	match(str, /[^, \t]*/)
++	syscall=substr(str, RSTART, RLENGTH)
++
++	if (syscall == "") {
++		print "Failed to find syscall name in line " $0 > "/dev/stderr"
++		exit 1
++	}
++
++	calls = calls "	__COBALT_CALL_ENTRY(" syscall ") \\\n"
++	modes = modes "	__COBALT_MODE(" str ") \\\n"
++	next
++}
++
++/COBALT_SYSCALL\(/  {
++	print "Failed to parse line " $0 > "/dev/stderr"
++	exit 1
++}
++
++END {
++	print "#define __COBALT_CALL_ENTRIES \\\n" calls "	/* end */"
++	print "#define __COBALT_CALL_MODES \\\n" modes "	/* end */"
++}
++' $*
+--- linux/kernel/xenomai/posix/mqueue.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/mqueue.c	2021-04-07 16:01:26.027635881 +0800
+@@ -0,0 +1,1010 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <stdarg.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <linux/sched.h>
++#include <cobalt/kernel/select.h>
++#include <rtdm/fd.h>
++#include "internal.h"
++#include "thread.h"
++#include "signal.h"
++#include "timer.h"
++#include "mqueue.h"
++#include "clock.h"
++#include <trace/events/cobalt-posix.h>
++
++#define COBALT_MSGMAX		65536
++#define COBALT_MSGSIZEMAX	(16*1024*1024)
++#define COBALT_MSGPRIOMAX	32768
++
++struct cobalt_mq {
++	unsigned magic;
++
++	struct list_head link;
++
++	struct xnsynch receivers;
++	struct xnsynch senders;
++	size_t memsize;
++	char *mem;
++	struct list_head queued;
++	struct list_head avail;
++	int nrqueued;
++
++	/* mq_notify */
++	struct siginfo si;
++	mqd_t target_qd;
++	struct cobalt_thread *target;
++
++	struct mq_attr attr;
++
++	unsigned refs;
++	char name[COBALT_MAXNAME];
++	xnhandle_t handle;
++
++	DECLARE_XNSELECT(read_select);
++	DECLARE_XNSELECT(write_select);
++};
++
++struct cobalt_mqd {
++	struct cobalt_mq *mq;
++	struct rtdm_fd fd;
++};
++
++struct cobalt_msg {
++	struct list_head link;
++	unsigned int prio;
++	size_t len;
++	char data[0];
++};
++
++struct cobalt_mqwait_context {
++	struct xnthread_wait_context wc;
++	struct cobalt_msg *msg;
++};
++
++static struct mq_attr default_attr = {
++      .mq_maxmsg = 10,
++      .mq_msgsize = 8192,
++};
++
++static LIST_HEAD(cobalt_mqq);
++
++static inline struct cobalt_msg *mq_msg_alloc(struct cobalt_mq *mq)
++{
++	if (list_empty(&mq->avail))
++		return NULL;
++
++	return list_get_entry(&mq->avail, struct cobalt_msg, link);
++}
++
++static inline void mq_msg_free(struct cobalt_mq *mq, struct cobalt_msg * msg)
++{
++	list_add(&msg->link, &mq->avail); /* For earliest re-use of the block. */
++}
++
++static inline int mq_init(struct cobalt_mq *mq, const struct mq_attr *attr)
++{
++	unsigned i, msgsize, memsize;
++	char *mem;
++
++	if (attr == NULL)
++		attr = &default_attr;
++	else {
++		if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
++			return -EINVAL;
++		if (attr->mq_maxmsg > COBALT_MSGMAX)
++			return -EINVAL;
++		if (attr->mq_msgsize > COBALT_MSGSIZEMAX)
++			return -EINVAL;
++	}
++
++	msgsize = attr->mq_msgsize + sizeof(struct cobalt_msg);
++
++	/* Align msgsize on natural boundary. */
++	if ((msgsize % sizeof(unsigned long)))
++		msgsize +=
++		    sizeof(unsigned long) - (msgsize % sizeof(unsigned long));
++
++	memsize = msgsize * attr->mq_maxmsg;
++	memsize = PAGE_ALIGN(memsize);
++	if (get_order(memsize) > MAX_ORDER)
++		return -ENOSPC;
++
++	mem = xnheap_vmalloc(memsize);
++	if (mem == NULL)
++		return -ENOSPC;
++
++	mq->memsize = memsize;
++	INIT_LIST_HEAD(&mq->queued);
++	mq->nrqueued = 0;
++	xnsynch_init(&mq->receivers, XNSYNCH_PRIO, NULL);
++	xnsynch_init(&mq->senders, XNSYNCH_PRIO, NULL);
++	mq->mem = mem;
++
++	/* Fill the pool. */
++	INIT_LIST_HEAD(&mq->avail);
++	for (i = 0; i < attr->mq_maxmsg; i++) {
++		struct cobalt_msg *msg = (struct cobalt_msg *) (mem + i * msgsize);
++		mq_msg_free(mq, msg);
++	}
++
++	mq->attr = *attr;
++	mq->target = NULL;
++	xnselect_init(&mq->read_select);
++	xnselect_init(&mq->write_select);
++	mq->magic = COBALT_MQ_MAGIC;
++	mq->refs = 2;
++	INIT_LIST_HEAD(&mq->link);
++
++	return 0;
++}
++
++static inline void mq_destroy(struct cobalt_mq *mq)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	xnsynch_destroy(&mq->receivers);
++	xnsynch_destroy(&mq->senders);
++	list_del(&mq->link);
++	xnsched_run();
++	xnlock_put_irqrestore(&nklock, s);
++	xnselect_destroy(&mq->read_select); /* Reschedules. */
++	xnselect_destroy(&mq->write_select); /* Ditto. */
++	xnregistry_remove(mq->handle);
++	xnheap_vfree(mq->mem);
++	kfree(mq);
++}
++
++static int mq_unref_inner(struct cobalt_mq *mq, spl_t s)
++{
++	int destroy;
++
++	destroy = --mq->refs == 0;
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (destroy)
++		mq_destroy(mq);
++
++	return destroy;
++}
++
++static int mq_unref(struct cobalt_mq *mq)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	return mq_unref_inner(mq, s);
++}
++
++static void mqd_close(struct rtdm_fd *fd)
++{
++	struct cobalt_mqd *mqd = container_of(fd, struct cobalt_mqd, fd);
++	struct cobalt_mq *mq = mqd->mq;
++
++	kfree(mqd);
++	mq_unref(mq);
++}
++
++int
++mqd_select(struct rtdm_fd *fd, struct xnselector *selector,
++	   unsigned type, unsigned index)
++{
++	struct cobalt_mqd *mqd = container_of(fd, struct cobalt_mqd, fd);
++	struct xnselect_binding *binding;
++	struct cobalt_mq *mq;
++	int err;
++	spl_t s;
++
++	if (type == XNSELECT_READ || type == XNSELECT_WRITE) {
++		binding = xnmalloc(sizeof(*binding));
++		if (!binding)
++			return -ENOMEM;
++	} else
++		return -EBADF;
++
++	xnlock_get_irqsave(&nklock, s);
++	mq = mqd->mq;
++
++	switch(type) {
++	case XNSELECT_READ:
++		err = -EBADF;
++		if ((rtdm_fd_flags(fd) & COBALT_PERMS_MASK) == O_WRONLY)
++			goto unlock_and_error;
++
++		err = xnselect_bind(&mq->read_select, binding,
++				selector, type, index,
++				!list_empty(&mq->queued));
++		if (err)
++			goto unlock_and_error;
++		break;
++
++	case XNSELECT_WRITE:
++		err = -EBADF;
++		if ((rtdm_fd_flags(fd) & COBALT_PERMS_MASK) == O_RDONLY)
++			goto unlock_and_error;
++
++		err = xnselect_bind(&mq->write_select, binding,
++				selector, type, index,
++				!list_empty(&mq->avail));
++		if (err)
++			goto unlock_and_error;
++		break;
++	}
++	xnlock_put_irqrestore(&nklock, s);
++	return 0;
++
++      unlock_and_error:
++	xnlock_put_irqrestore(&nklock, s);
++	xnfree(binding);
++	return err;
++}
++
++static struct rtdm_fd_ops mqd_ops = {
++	.close = mqd_close,
++	.select = mqd_select,
++};
++
++static inline int mqd_create(struct cobalt_mq *mq, unsigned long flags, int ufd)
++{
++	struct cobalt_mqd *mqd;
++	int ret;
++
++	if (cobalt_ppd_get(0) == &cobalt_kernel_ppd)
++		return -EPERM;
++
++	mqd = kmalloc(sizeof(*mqd), GFP_KERNEL);
++	if (mqd == NULL)
++		return -ENOSPC;
++
++	mqd->fd.oflags = flags;
++	mqd->mq = mq;
++
++	ret = rtdm_fd_enter(&mqd->fd, ufd, COBALT_MQD_MAGIC, &mqd_ops);
++	if (ret < 0)
++		return ret;
++
++	return rtdm_fd_register(&mqd->fd, ufd);
++}
++
++static int mq_open(int uqd, const char *name, int oflags,
++		   int mode, struct mq_attr *attr)
++{
++	struct cobalt_mq *mq;
++	xnhandle_t handle;
++	spl_t s;
++	int err;
++
++	if (name[0] != '/' || name[1] == '\0')
++		return -EINVAL;
++
++  retry_bind:
++	err = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle);
++	switch (err) {
++	case 0:
++		/* Found */
++		if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++			return -EEXIST;
++
++		xnlock_get_irqsave(&nklock, s);
++		mq = xnregistry_lookup(handle, NULL);
++		if (mq && mq->magic != COBALT_MQ_MAGIC) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EINVAL;
++		}
++
++		if (mq) {
++			++mq->refs;
++			xnlock_put_irqrestore(&nklock, s);
++		} else {
++			xnlock_put_irqrestore(&nklock, s);
++			goto retry_bind;
++		}
++
++		err = mqd_create(mq, oflags & (O_NONBLOCK | COBALT_PERMS_MASK),
++				uqd);
++		if (err < 0) {
++			mq_unref(mq);
++			return err;
++		}
++		break;
++
++	case -EWOULDBLOCK:
++		/* Not found */
++		if ((oflags & O_CREAT) == 0)
++			return (mqd_t)-ENOENT;
++
++		mq = kmalloc(sizeof(*mq), GFP_KERNEL);
++		if (mq == NULL)
++			return -ENOSPC;
++
++		err = mq_init(mq, attr);
++		if (err) {
++			kfree(mq);
++			return err;
++		}
++
++		snprintf(mq->name, sizeof(mq->name), "%s", &name[1]);
++
++		err = mqd_create(mq, oflags & (O_NONBLOCK | COBALT_PERMS_MASK),
++				uqd);
++		if (err < 0) {
++			mq_destroy(mq);
++			return err;
++		}
++
++		xnlock_get_irqsave(&nklock, s);
++		err = xnregistry_enter(mq->name, mq, &mq->handle, NULL);
++		if (err < 0)
++			--mq->refs;
++		else
++			list_add_tail(&mq->link, &cobalt_mqq);
++		xnlock_put_irqrestore(&nklock, s);
++		if (err < 0) {
++			rtdm_fd_close(uqd, COBALT_MQD_MAGIC);
++			if (err == -EEXIST)
++				goto retry_bind;
++			return err;
++		}
++		break;
++
++	default:
++		return err;
++	}
++
++	return 0;
++}
++
++static inline int mq_close(mqd_t fd)
++{
++	return rtdm_fd_close(fd, COBALT_MQD_MAGIC);
++}
++
++static inline int mq_unlink(const char *name)
++{
++	struct cobalt_mq *mq;
++	xnhandle_t handle;
++	spl_t s;
++	int err;
++
++	if (name[0] != '/' || name[1] == '\0')
++		return -EINVAL;
++
++	err = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle);
++	if (err == -EWOULDBLOCK)
++		return -ENOENT;
++	if (err)
++		return err;
++
++	xnlock_get_irqsave(&nklock, s);
++	mq = xnregistry_lookup(handle, NULL);
++	if (!mq) {
++		err = -ENOENT;
++		goto err_unlock;
++	}
++	if (mq->magic != COBALT_MQ_MAGIC) {
++		err = -EINVAL;
++	  err_unlock:
++		xnlock_put_irqrestore(&nklock, s);
++
++		return err;
++	}
++	if (mq_unref_inner(mq, s) == 0)
++		xnregistry_unlink(&name[1]);
++	return 0;
++}
++
++static inline struct cobalt_msg *
++mq_trysend(struct cobalt_mqd *mqd, size_t len)
++{
++	struct cobalt_msg *msg;
++	struct cobalt_mq *mq;
++	unsigned flags;
++
++	mq = mqd->mq;
++	flags = rtdm_fd_flags(&mqd->fd) & COBALT_PERMS_MASK;
++
++	if (flags != O_WRONLY && flags != O_RDWR)
++		return ERR_PTR(-EBADF);
++
++	if (len > mq->attr.mq_msgsize)
++		return ERR_PTR(-EMSGSIZE);
++
++	msg = mq_msg_alloc(mq);
++	if (msg == NULL)
++		return ERR_PTR(-EAGAIN);
++
++	if (list_empty(&mq->avail))
++		xnselect_signal(&mq->write_select, 0);
++
++	return msg;
++}
++
++static inline struct cobalt_msg *
++mq_tryrcv(struct cobalt_mqd *mqd, size_t len)
++{
++	struct cobalt_msg *msg;
++	unsigned int flags;
++	struct cobalt_mq *mq;
++
++	mq = mqd->mq;
++	flags = rtdm_fd_flags(&mqd->fd) & COBALT_PERMS_MASK;
++
++	if (flags != O_RDONLY && flags != O_RDWR)
++		return ERR_PTR(-EBADF);
++
++	if (len < mq->attr.mq_msgsize)
++		return ERR_PTR(-EMSGSIZE);
++
++	if (list_empty(&mq->queued))
++		return ERR_PTR(-EAGAIN);
++
++	msg = list_get_entry(&mq->queued, struct cobalt_msg, link);
++	mq->nrqueued--;
++
++	if (list_empty(&mq->queued))
++		xnselect_signal(&mq->read_select, 0);
++
++	return msg;
++}
++
++static struct cobalt_msg *
++mq_timedsend_inner(struct cobalt_mqd *mqd,
++		   size_t len, const void __user *u_ts,
++		   int (*fetch_timeout)(struct timespec *ts,
++					const void __user *u_ts))
++{
++	struct cobalt_mqwait_context mwc;
++	struct cobalt_msg *msg;
++	struct cobalt_mq *mq;
++	struct timespec ts;
++	xntmode_t tmode;
++	xnticks_t to;
++	spl_t s;
++	int ret;
++
++	to = XN_INFINITE;
++	tmode = XN_RELATIVE;
++redo:
++	xnlock_get_irqsave(&nklock, s);
++	msg = mq_trysend(mqd, len);
++	if (msg != ERR_PTR(-EAGAIN))
++		goto out;
++
++	if (rtdm_fd_flags(&mqd->fd) & O_NONBLOCK)
++		goto out;
++
++	if (fetch_timeout) {
++		xnlock_put_irqrestore(&nklock, s);
++		ret = fetch_timeout(&ts, u_ts);
++		if (ret)
++			return ERR_PTR(ret);
++		if ((unsigned long)ts.tv_nsec >= ONE_BILLION)
++			return ERR_PTR(-EINVAL);
++		to = ts2ns(&ts) + 1;
++		tmode = XN_REALTIME;
++		fetch_timeout = NULL;
++		goto redo;
++	}
++
++	mq = mqd->mq;
++	xnthread_prepare_wait(&mwc.wc);
++	ret = xnsynch_sleep_on(&mq->senders, to, tmode);
++	if (ret) {
++		if (ret & XNBREAK)
++			msg = ERR_PTR(-EINTR);
++		else if (ret & XNTIMEO)
++			msg = ERR_PTR(-ETIMEDOUT);
++		else if (ret & XNRMID)
++			msg = ERR_PTR(-EBADF);
++	} else
++		msg = mwc.msg;
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return msg;
++}
++
++static void mq_release_msg(struct cobalt_mq *mq, struct cobalt_msg *msg)
++{
++	struct cobalt_mqwait_context *mwc;
++	struct xnthread_wait_context *wc;
++	struct xnthread *thread;
++
++	/*
++	 * Try passing the free message slot to a waiting sender, link
++	 * it to the free queue otherwise.
++	 */
++	if (xnsynch_pended_p(&mq->senders)) {
++		thread = xnsynch_wakeup_one_sleeper(&mq->senders);
++		wc = xnthread_get_wait_context(thread);
++		mwc = container_of(wc, struct cobalt_mqwait_context, wc);
++		mwc->msg = msg;
++		xnthread_complete_wait(wc);
++	} else {
++		mq_msg_free(mq, msg);
++		if (list_is_singular(&mq->avail))
++			xnselect_signal(&mq->write_select, 1);
++	}
++}
++
++static int
++mq_finish_send(struct cobalt_mqd *mqd, struct cobalt_msg *msg)
++{
++	struct cobalt_mqwait_context *mwc;
++	struct xnthread_wait_context *wc;
++	struct cobalt_sigpending *sigp;
++	struct xnthread *thread;
++	struct cobalt_mq *mq;
++	spl_t s;
++
++	mq = mqd->mq;
++
++	xnlock_get_irqsave(&nklock, s);
++	/* Can we do pipelined sending? */
++	if (xnsynch_pended_p(&mq->receivers)) {
++		thread = xnsynch_wakeup_one_sleeper(&mq->receivers);
++		wc = xnthread_get_wait_context(thread);
++		mwc = container_of(wc, struct cobalt_mqwait_context, wc);
++		mwc->msg = msg;
++		xnthread_complete_wait(wc);
++	} else {
++		/* Nope, have to go through the queue. */
++		list_add_priff(msg, &mq->queued, prio, link);
++		mq->nrqueued++;
++
++		/*
++		 * If first message and no pending reader, send a
++		 * signal if notification was enabled via mq_notify().
++		 */
++		if (list_is_singular(&mq->queued)) {
++			xnselect_signal(&mq->read_select, 1);
++			if (mq->target) {
++				sigp = cobalt_signal_alloc();
++				if (sigp) {
++					cobalt_copy_siginfo(SI_MESGQ, &sigp->si, &mq->si);
++					if (cobalt_signal_send(mq->target, sigp, 0) <= 0)
++						cobalt_signal_free(sigp);
++				}
++				mq->target = NULL;
++			}
++		}
++	}
++	xnsched_run();
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++static struct cobalt_msg *
++mq_timedrcv_inner(struct cobalt_mqd *mqd,
++		  size_t len,
++		  const void __user *u_ts,
++		  int (*fetch_timeout)(struct timespec *ts,
++				       const void __user *u_ts))
++{
++	struct cobalt_mqwait_context mwc;
++	struct cobalt_msg *msg;
++	struct cobalt_mq *mq;
++	struct timespec ts;
++	xntmode_t tmode;
++	xnticks_t to;
++	spl_t s;
++	int ret;
++
++	to = XN_INFINITE;
++	tmode = XN_RELATIVE;
++redo:
++	xnlock_get_irqsave(&nklock, s);
++	msg = mq_tryrcv(mqd, len);
++	if (msg != ERR_PTR(-EAGAIN))
++		goto out;
++
++	if (rtdm_fd_flags(&mqd->fd) & O_NONBLOCK)
++		goto out;
++
++	if (fetch_timeout) {
++		xnlock_put_irqrestore(&nklock, s);
++		ret = fetch_timeout(&ts, u_ts);
++		if (ret)
++			return ERR_PTR(ret);
++		if (ts.tv_nsec >= ONE_BILLION)
++			return ERR_PTR(-EINVAL);
++		to = ts2ns(&ts) + 1;
++		tmode = XN_REALTIME;
++		fetch_timeout = NULL;
++		goto redo;
++	}
++
++	mq = mqd->mq;
++	xnthread_prepare_wait(&mwc.wc);
++	ret = xnsynch_sleep_on(&mq->receivers, to, tmode);
++	if (ret == 0)
++		msg = mwc.msg;
++	else if (ret & XNRMID)
++		msg = ERR_PTR(-EBADF);
++	else if (ret & XNTIMEO)
++		msg = ERR_PTR(-ETIMEDOUT);
++	else
++		msg = ERR_PTR(-EINTR);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return msg;
++}
++
++static int
++mq_finish_rcv(struct cobalt_mqd *mqd, struct cobalt_msg *msg)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	mq_release_msg(mqd->mq, msg);
++	xnsched_run();
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++static inline int mq_getattr(struct cobalt_mqd *mqd, struct mq_attr *attr)
++{
++	struct cobalt_mq *mq;
++	spl_t s;
++
++	mq = mqd->mq;
++	*attr = mq->attr;
++	xnlock_get_irqsave(&nklock, s);
++	attr->mq_flags = rtdm_fd_flags(&mqd->fd);
++	attr->mq_curmsgs = mq->nrqueued;
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++static inline int
++mq_notify(struct cobalt_mqd *mqd, unsigned index, const struct sigevent *evp)
++{
++	struct cobalt_thread *thread = cobalt_current_thread();
++	struct cobalt_mq *mq;
++	int err;
++	spl_t s;
++
++	if (evp && ((evp->sigev_notify != SIGEV_SIGNAL &&
++		     evp->sigev_notify != SIGEV_NONE) ||
++		    (unsigned int)(evp->sigev_signo - 1) > SIGRTMAX - 1))
++		return -EINVAL;
++
++	if (xnsched_interrupt_p() || thread == NULL)
++		return -EPERM;
++
++	xnlock_get_irqsave(&nklock, s);
++	mq = mqd->mq;
++	if (mq->target && mq->target != thread) {
++		err = -EBUSY;
++		goto unlock_and_error;
++	}
++
++	if (evp == NULL || evp->sigev_notify == SIGEV_NONE)
++		/* Here, mq->target == cobalt_current_thread() or NULL. */
++		mq->target = NULL;
++	else {
++		mq->target = thread;
++		mq->target_qd = index;
++		mq->si.si_signo = evp->sigev_signo;
++		mq->si.si_errno = 0;
++		mq->si.si_code = SI_MESGQ;
++		mq->si.si_value = evp->sigev_value;
++		/*
++		 * XXX: we differ from the regular kernel here, which
++		 * passes the sender's pid/uid data into the
++		 * receiver's namespaces. We pass the receiver's creds
++		 * into the init namespace instead.
++		 */
++		mq->si.si_pid = task_pid_nr(current);
++		mq->si.si_uid = get_current_uuid();
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++	return 0;
++
++      unlock_and_error:
++	xnlock_put_irqrestore(&nklock, s);
++	return err;
++}
++
++static inline struct cobalt_mqd *cobalt_mqd_get(mqd_t ufd)
++{
++	struct rtdm_fd *fd;
++
++	fd = rtdm_fd_get(ufd, COBALT_MQD_MAGIC);
++	if (IS_ERR(fd)) {
++		int err = PTR_ERR(fd);
++		if (err == -EBADF && cobalt_current_process() == NULL)
++			err = -EPERM;
++		return ERR_PTR(err);
++	}
++
++	return container_of(fd, struct cobalt_mqd, fd);
++}
++
++static inline void cobalt_mqd_put(struct cobalt_mqd *mqd)
++{
++	rtdm_fd_put(&mqd->fd);
++}
++
++int __cobalt_mq_notify(mqd_t fd, const struct sigevent *evp)
++{
++	struct cobalt_mqd *mqd;
++	int ret;
++
++	mqd = cobalt_mqd_get(fd);
++	if (IS_ERR(mqd))
++		ret = PTR_ERR(mqd);
++	else {
++		trace_cobalt_mq_notify(fd, evp);
++		ret = mq_notify(mqd, fd, evp);
++		cobalt_mqd_put(mqd);
++	}
++
++	return ret;
++}
++
++COBALT_SYSCALL(mq_notify, primary,
++	       (mqd_t fd, const struct sigevent *__user evp))
++{
++	struct sigevent sev;
++
++	if (evp && cobalt_copy_from_user(&sev, evp, sizeof(sev)))
++		return -EFAULT;
++
++	return __cobalt_mq_notify(fd, evp ? &sev : NULL);
++}
++
++int __cobalt_mq_open(const char __user *u_name, int oflags,
++		     mode_t mode, struct mq_attr *attr)
++{
++	char name[COBALT_MAXNAME];
++	unsigned int len;
++	mqd_t uqd;
++	int ret;
++
++	len = cobalt_strncpy_from_user(name, u_name, sizeof(name));
++	if (len < 0)
++		return -EFAULT;
++
++	if (len >= sizeof(name))
++		return -ENAMETOOLONG;
++
++	if (len == 0)
++		return -EINVAL;
++
++	trace_cobalt_mq_open(name, oflags, mode);
++
++	uqd = __rtdm_anon_getfd("[cobalt-mq]", oflags);
++	if (uqd < 0)
++		return uqd;
++
++	ret = mq_open(uqd, name, oflags, mode, attr);
++	if (ret < 0) {
++		__rtdm_anon_putfd(uqd);
++		return ret;
++	}
++
++	return uqd;
++}
++
++COBALT_SYSCALL(mq_open, lostage,
++	       (const char __user *u_name, int oflags,
++		mode_t mode, struct mq_attr __user *u_attr))
++{
++	struct mq_attr _attr, *attr = &_attr;
++
++	if ((oflags & O_CREAT) && u_attr) {
++		if (cobalt_copy_from_user(&_attr, u_attr, sizeof(_attr)))
++			return -EFAULT;
++	} else
++		attr = NULL;
++
++	return __cobalt_mq_open(u_name, oflags, mode, attr);
++}
++
++COBALT_SYSCALL(mq_close, lostage, (mqd_t uqd))
++{
++	trace_cobalt_mq_close(uqd);
++
++	return mq_close(uqd);
++}
++
++COBALT_SYSCALL(mq_unlink, lostage, (const char __user *u_name))
++{
++	char name[COBALT_MAXNAME];
++	unsigned len;
++
++	len = cobalt_strncpy_from_user(name, u_name, sizeof(name));
++	if (len < 0)
++		return -EFAULT;
++	if (len >= sizeof(name))
++		return -ENAMETOOLONG;
++
++	trace_cobalt_mq_unlink(name);
++
++	return mq_unlink(name);
++}
++
++int __cobalt_mq_getattr(mqd_t uqd, struct mq_attr *attr)
++{
++	struct cobalt_mqd *mqd;
++	int ret;
++
++	mqd = cobalt_mqd_get(uqd);
++	if (IS_ERR(mqd))
++		return PTR_ERR(mqd);
++
++	ret = mq_getattr(mqd, attr);
++	cobalt_mqd_put(mqd);
++	if (ret)
++		return ret;
++
++	trace_cobalt_mq_getattr(uqd, attr);
++
++	return 0;
++}
++
++COBALT_SYSCALL(mq_getattr, current,
++	       (mqd_t uqd, struct mq_attr __user *u_attr))
++{
++	struct mq_attr attr;
++	int ret;
++
++	ret = __cobalt_mq_getattr(uqd, &attr);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_attr, &attr, sizeof(attr));
++}
++
++static inline int mq_fetch_timeout(struct timespec *ts,
++				   const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++}
++
++int __cobalt_mq_timedsend(mqd_t uqd, const void __user *u_buf, size_t len,
++			  unsigned int prio, const void __user *u_ts,
++			  int (*fetch_timeout)(struct timespec *ts,
++					       const void __user *u_ts))
++{
++	struct cobalt_msg *msg;
++	struct cobalt_mqd *mqd;
++	int ret;
++
++	mqd = cobalt_mqd_get(uqd);
++	if (IS_ERR(mqd))
++		return PTR_ERR(mqd);
++
++	if (prio >= COBALT_MSGPRIOMAX) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (len > 0 && !access_rok(u_buf, len)) {
++		ret = -EFAULT;
++		goto out;
++	}
++
++	trace_cobalt_mq_send(uqd, u_buf, len, prio);
++	msg = mq_timedsend_inner(mqd, len, u_ts, fetch_timeout);
++	if (IS_ERR(msg)) {
++		ret = PTR_ERR(msg);
++		goto out;
++	}
++
++	ret = cobalt_copy_from_user(msg->data, u_buf, len);
++	if (ret) {
++		mq_finish_rcv(mqd, msg);
++		goto out;
++	}
++	msg->len = len;
++	msg->prio = prio;
++	ret = mq_finish_send(mqd, msg);
++out:
++	cobalt_mqd_put(mqd);
++
++	return ret;
++}
++
++COBALT_SYSCALL(mq_timedsend, primary,
++	       (mqd_t uqd, const void __user *u_buf, size_t len,
++		unsigned int prio, const struct timespec __user *u_ts))
++{
++	return __cobalt_mq_timedsend(uqd, u_buf, len, prio,
++				     u_ts, u_ts ? mq_fetch_timeout : NULL);
++}
++
++int __cobalt_mq_timedreceive(mqd_t uqd, void __user *u_buf,
++			     ssize_t *lenp,
++			     unsigned int __user *u_prio,
++			     const void __user *u_ts,
++			     int (*fetch_timeout)(struct timespec *ts,
++						  const void __user *u_ts))
++{
++	struct cobalt_mqd *mqd;
++	struct cobalt_msg *msg;
++	unsigned int prio;
++	int ret;
++
++	mqd = cobalt_mqd_get(uqd);
++	if (IS_ERR(mqd))
++		return PTR_ERR(mqd);
++
++	if (*lenp > 0 && !access_wok(u_buf, *lenp)) {
++		ret = -EFAULT;
++		goto fail;
++	}
++
++	msg = mq_timedrcv_inner(mqd, *lenp, u_ts, fetch_timeout);
++	if (IS_ERR(msg)) {
++		ret = PTR_ERR(msg);
++		goto fail;
++	}
++
++	ret = cobalt_copy_to_user(u_buf, msg->data, msg->len);
++	if (ret) {
++		mq_finish_rcv(mqd, msg);
++		goto fail;
++	}
++
++	*lenp = msg->len;
++	prio = msg->prio;
++	ret = mq_finish_rcv(mqd, msg);
++	if (ret)
++		goto fail;
++
++	cobalt_mqd_put(mqd);
++
++	if (u_prio && __xn_put_user(prio, u_prio))
++		return -EFAULT;
++
++	return 0;
++fail:
++	cobalt_mqd_put(mqd);
++
++	return ret;
++}
++
++COBALT_SYSCALL(mq_timedreceive, primary,
++	       (mqd_t uqd, void __user *u_buf,
++		ssize_t __user *u_len,
++		unsigned int __user *u_prio,
++		const struct timespec __user *u_ts))
++{
++	ssize_t len;
++	int ret;
++
++	ret = cobalt_copy_from_user(&len, u_len, sizeof(len));
++	if (ret)
++		return ret;
++
++	ret = __cobalt_mq_timedreceive(uqd, u_buf, &len, u_prio,
++				       u_ts, u_ts ? mq_fetch_timeout : NULL);
++
++	return ret ?: cobalt_copy_to_user(u_len, &len, sizeof(*u_len));
++}
+--- linux/kernel/xenomai/posix/mqueue.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/mqueue.h	2021-04-07 16:01:26.022635888 +0800
+@@ -0,0 +1,74 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_POSIX_MQUEUE_H
++#define _COBALT_POSIX_MQUEUE_H
++
++#include <linux/types.h>
++#include <linux/fcntl.h>
++#include <xenomai/posix/syscall.h>
++
++struct mq_attr {
++	long mq_flags;
++	long mq_maxmsg;
++	long mq_msgsize;
++	long mq_curmsgs;
++};
++
++int __cobalt_mq_open(const char __user *u_name, int oflags,
++		     mode_t mode, struct mq_attr *attr);
++
++int __cobalt_mq_getattr(mqd_t uqd, struct mq_attr *attr);
++
++int __cobalt_mq_timedsend(mqd_t uqd, const void __user *u_buf, size_t len,
++			  unsigned int prio, const void __user *u_ts,
++			  int (*fetch_timeout)(struct timespec *ts,
++					       const void __user *u_ts));
++
++int __cobalt_mq_timedreceive(mqd_t uqd, void __user *u_buf,
++			     ssize_t *lenp,
++			     unsigned int __user *u_prio,
++			     const void __user *u_ts,
++			     int (*fetch_timeout)(struct timespec *ts,
++						  const void __user *u_ts));
++
++int __cobalt_mq_notify(mqd_t fd, const struct sigevent *evp);
++
++COBALT_SYSCALL_DECL(mq_open,
++		    (const char __user *u_name, int oflags,
++		     mode_t mode, struct mq_attr __user *u_attr));
++
++COBALT_SYSCALL_DECL(mq_close, (mqd_t uqd));
++
++COBALT_SYSCALL_DECL(mq_unlink, (const char __user *u_name));
++
++COBALT_SYSCALL_DECL(mq_getattr, (mqd_t uqd, struct mq_attr __user *u_attr));
++
++COBALT_SYSCALL_DECL(mq_timedsend,
++		    (mqd_t uqd, const void __user *u_buf, size_t len,
++		     unsigned int prio, const struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(mq_timedreceive,
++		    (mqd_t uqd, void __user *u_buf, ssize_t __user *u_len,
++		     unsigned int __user *u_prio,
++		     const struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(mq_notify,
++		    (mqd_t fd, const struct sigevent *__user evp));
++
++#endif /* !_COBALT_POSIX_MQUEUE_H */
+--- linux/kernel/xenomai/posix/mutex.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/mutex.h	2021-04-07 16:01:26.018635894 +0800
+@@ -0,0 +1,76 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_POSIX_MUTEX_H
++#define _COBALT_POSIX_MUTEX_H
++
++#include "thread.h"
++#include <cobalt/uapi/mutex.h>
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/process.h>
++
++struct cobalt_process;
++
++struct cobalt_mutex {
++	unsigned int magic;
++	struct xnsynch synchbase;
++	/** cobalt_mutexq */
++	struct list_head conds;
++	struct cobalt_mutexattr attr;
++	struct cobalt_resnode resnode;
++};
++
++int __cobalt_mutex_timedlock_break(struct cobalt_mutex_shadow __user *u_mx,
++				   const void __user *u_ts,
++				   int (*fetch_timeout)(struct timespec *ts,
++							const void __user *u_ts));
++
++int __cobalt_mutex_acquire_unchecked(struct xnthread *cur,
++				     struct cobalt_mutex *mutex,
++				     const struct timespec *ts);
++
++COBALT_SYSCALL_DECL(mutex_check_init,
++		    (struct cobalt_mutex_shadow __user *u_mx));
++
++COBALT_SYSCALL_DECL(mutex_init,
++		    (struct cobalt_mutex_shadow __user *u_mx,
++		     const struct cobalt_mutexattr __user *u_attr));
++
++COBALT_SYSCALL_DECL(mutex_destroy,
++		    (struct cobalt_mutex_shadow __user *u_mx));
++
++COBALT_SYSCALL_DECL(mutex_trylock,
++		    (struct cobalt_mutex_shadow __user *u_mx));
++
++COBALT_SYSCALL_DECL(mutex_lock,
++		    (struct cobalt_mutex_shadow __user *u_mx));
++
++COBALT_SYSCALL_DECL(mutex_timedlock,
++		    (struct cobalt_mutex_shadow __user *u_mx,
++		     const struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(mutex_unlock,
++		    (struct cobalt_mutex_shadow __user *u_mx));
++
++int cobalt_mutex_release(struct xnthread *cur,
++			 struct cobalt_mutex *mutex);
++
++void cobalt_mutex_reclaim(struct cobalt_resnode *node,
++			  spl_t s);
++
++#endif /* !_COBALT_POSIX_MUTEX_H */
+--- linux/kernel/xenomai/posix/clock.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/clock.c	2021-04-07 16:01:26.013635901 +0800
+@@ -0,0 +1,443 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/clocksource.h>
++#include <linux/bitmap.h>
++#include <cobalt/kernel/vdso.h>
++#include <cobalt/kernel/clock.h>
++#include "internal.h"
++#include "thread.h"
++#include "clock.h"
++#include <trace/events/cobalt-posix.h>
++
++static struct xnclock *external_clocks[COBALT_MAX_EXTCLOCKS];
++
++DECLARE_BITMAP(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS);
++
++static int do_clock_host_realtime(struct timespec *tp)
++{
++#ifdef CONFIG_XENO_OPT_HOSTRT
++	struct xnvdso_hostrt_data *hostrt_data;
++	u64 now, base, mask, cycle_delta;
++	__u32 mult, shift;
++	unsigned long rem;
++	urwstate_t tmp;
++	__u64 nsec;
++
++	hostrt_data = get_hostrt_data();
++	BUG_ON(!hostrt_data);
++
++	if (unlikely(!hostrt_data->live))
++		return -1;
++
++	/*
++	 * Note: Disabling HW interrupts around writes to hostrt_data
++	 * ensures that a reader (on the Xenomai side) cannot
++	 * interrupt a writer (on the Linux kernel side) on the same
++	 * CPU.  The urw block is required when a reader is
++	 * interleaved by a writer on a different CPU. This follows
++	 * the approach from userland, where taking the spinlock is
++	 * not possible.
++	 */
++	unsynced_read_block(&tmp, &hostrt_data->lock) {
++		now = xnclock_read_raw(&nkclock);
++		base = hostrt_data->cycle_last;
++		mask = hostrt_data->mask;
++		mult = hostrt_data->mult;
++		shift = hostrt_data->shift;
++		tp->tv_sec = hostrt_data->wall_sec;
++		nsec = hostrt_data->wall_nsec;
++	}
++
++	/*
++	 * At this point, we have a consistent copy of the fundamental
++	 * data structure - calculate the interval between the current
++	 * and base time stamp cycles, and convert the difference
++	 * to nanoseconds.
++	 */
++	cycle_delta = (now - base) & mask;
++	nsec += (cycle_delta * mult) >> shift;
++
++	/* Convert to the desired sec, usec representation */
++	tp->tv_sec += xnclock_divrem_billion(nsec, &rem);
++	tp->tv_nsec = rem;
++
++	return 0;
++#else /* CONFIG_XENO_OPT_HOSTRT */
++	return -EINVAL;
++#endif
++}
++
++#define do_ext_clock(__clock_id, __handler, __ret, __args...)	\
++({								\
++	struct xnclock *__clock;				\
++	int __val = 0, __nr;					\
++	spl_t __s;						\
++								\
++	if (!__COBALT_CLOCK_EXT_P(__clock_id))			\
++		__val = -EINVAL;				\
++	else {							\
++		__nr = __COBALT_CLOCK_EXT_INDEX(__clock_id);	\
++		xnlock_get_irqsave(&nklock, __s);		\
++		if (!test_bit(__nr, cobalt_clock_extids)) {	\
++			xnlock_put_irqrestore(&nklock, __s);	\
++			__val = -EINVAL;			\
++		} else {					\
++			__clock = external_clocks[__nr];	\
++			(__ret) = xnclock_ ## __handler(__clock, ##__args); \
++			xnlock_put_irqrestore(&nklock, __s);	\
++		}						\
++	}							\
++	__val;							\
++})
++
++int __cobalt_clock_getres(clockid_t clock_id, struct timespec *ts)
++{
++	xnticks_t ns;
++	int ret;
++
++	switch (clock_id) {
++	case CLOCK_REALTIME:
++	case CLOCK_MONOTONIC:
++	case CLOCK_MONOTONIC_RAW:
++		ns2ts(ts, 1);
++		break;
++	default:
++		ret = do_ext_clock(clock_id, get_resolution, ns);
++		if (ret)
++			return ret;
++		ns2ts(ts, ns);
++	}
++
++	trace_cobalt_clock_getres(clock_id, ts);
++
++	return 0;
++}
++
++COBALT_SYSCALL(clock_getres, current,
++	       (clockid_t clock_id, struct timespec __user *u_ts))
++{
++	struct timespec ts;
++	int ret;
++
++	ret = __cobalt_clock_getres(clock_id, &ts);
++	if (ret)
++		return ret;
++
++	if (u_ts && cobalt_copy_to_user(u_ts, &ts, sizeof(ts)))
++		return -EFAULT;
++
++	trace_cobalt_clock_getres(clock_id, &ts);
++
++	return 0;
++}
++
++int __cobalt_clock_gettime(clockid_t clock_id, struct timespec *ts)
++{
++	xnticks_t ns;
++	int ret;
++
++	switch (clock_id) {
++	case CLOCK_REALTIME:
++		ns2ts(ts, xnclock_read_realtime(&nkclock));
++		break;
++	case CLOCK_MONOTONIC:
++	case CLOCK_MONOTONIC_RAW:
++		ns2ts(ts, xnclock_read_monotonic(&nkclock));
++		break;
++	case CLOCK_HOST_REALTIME:
++		if (do_clock_host_realtime(ts) != 0)
++			return -EINVAL;
++		break;
++	default:
++		ret = do_ext_clock(clock_id, read_monotonic, ns);
++		if (ret)
++			return ret;
++		ns2ts(ts, ns);
++	}
++
++	trace_cobalt_clock_gettime(clock_id, ts);
++
++	return 0;
++}
++
++COBALT_SYSCALL(clock_gettime, current,
++	       (clockid_t clock_id, struct timespec __user *u_ts))
++{
++	struct timespec ts;
++	int ret;
++
++	ret = __cobalt_clock_gettime(clock_id, &ts);
++	if (ret)
++		return ret;
++
++	if (cobalt_copy_to_user(u_ts, &ts, sizeof(*u_ts)))
++		return -EFAULT;
++
++	trace_cobalt_clock_gettime(clock_id, &ts);
++
++	return 0;
++}
++
++int __cobalt_clock_settime(clockid_t clock_id, const struct timespec *ts)
++{
++	int _ret, ret = 0;
++	xnticks_t now;
++	spl_t s;
++
++	if ((unsigned long)ts->tv_nsec >= ONE_BILLION)
++		return -EINVAL;
++
++	switch (clock_id) {
++	case CLOCK_REALTIME:
++		xnlock_get_irqsave(&nklock, s);
++		now = xnclock_read_realtime(&nkclock);
++		xnclock_adjust(&nkclock, (xnsticks_t) (ts2ns(ts) - now));
++		xnlock_put_irqrestore(&nklock, s);
++		break;
++	default:
++		_ret = do_ext_clock(clock_id, set_time, ret, ts);
++		if (_ret || ret)
++			return _ret ?: ret;
++	}
++
++	trace_cobalt_clock_settime(clock_id, ts);
++
++	return 0;
++}
++
++int __cobalt_clock_adjtime(clockid_t clock_id, struct timex *tx)
++{
++	int _ret, ret = 0;
++
++	switch (clock_id) {
++	case CLOCK_REALTIME:
++	case CLOCK_MONOTONIC:
++	case CLOCK_MONOTONIC_RAW:
++	case CLOCK_HOST_REALTIME:
++		return -EOPNOTSUPP;
++	default:
++		_ret = do_ext_clock(clock_id, adjust_time, ret, tx);
++		if (_ret || ret)
++			return _ret ?: ret;
++	}
++
++	trace_cobalt_clock_adjtime(clock_id, tx);
++
++	return 0;
++}
++
++COBALT_SYSCALL(clock_settime, current,
++	       (clockid_t clock_id, const struct timespec __user *u_ts))
++{
++	struct timespec ts;
++
++	if (cobalt_copy_from_user(&ts, u_ts, sizeof(ts)))
++		return -EFAULT;
++
++	return __cobalt_clock_settime(clock_id, &ts);
++}
++
++COBALT_SYSCALL(clock_adjtime, current,
++	       (clockid_t clock_id, struct timex __user *u_tx))
++{
++	struct timex tx;
++	int ret;
++
++	if (cobalt_copy_from_user(&tx, u_tx, sizeof(tx)))
++		return -EFAULT;
++
++	ret = __cobalt_clock_adjtime(clock_id, &tx);
++	if (ret)
++		return ret;
++
++	return cobalt_copy_to_user(u_tx, &tx, sizeof(tx));
++}
++
++int __cobalt_clock_nanosleep(clockid_t clock_id, int flags,
++			     const struct timespec *rqt,
++			     struct timespec *rmt)
++{
++	struct restart_block *restart;
++	struct xnthread *cur;
++	xnsticks_t timeout, rem;
++	spl_t s;
++
++	trace_cobalt_clock_nanosleep(clock_id, flags, rqt);
++
++	if (clock_id != CLOCK_MONOTONIC &&
++	    clock_id != CLOCK_MONOTONIC_RAW &&
++	    clock_id != CLOCK_REALTIME)
++		return -EOPNOTSUPP;
++
++	if (rqt->tv_sec < 0)
++		return -EINVAL;
++
++	if ((unsigned long)rqt->tv_nsec >= ONE_BILLION)
++		return -EINVAL;
++
++	if (flags & ~TIMER_ABSTIME)
++		return -EINVAL;
++
++	cur = xnthread_current();
++
++	if (xnthread_test_localinfo(cur, XNSYSRST)) {
++		xnthread_clear_localinfo(cur, XNSYSRST);
++
++		restart = cobalt_get_restart_block(current);
++
++		if (restart->fn != cobalt_restart_syscall_placeholder) {
++			if (rmt) {
++				xnlock_get_irqsave(&nklock, s);
++				rem = xntimer_get_timeout_stopped(&cur->rtimer);
++				xnlock_put_irqrestore(&nklock, s);
++				ns2ts(rmt, rem > 1 ? rem : 0);
++			}
++			return -EINTR;
++		}
++
++		timeout = restart->nanosleep.expires;
++	} else
++		timeout = ts2ns(rqt);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	xnthread_suspend(cur, XNDELAY, timeout + 1,
++			 clock_flag(flags, clock_id), NULL);
++
++	if (xnthread_test_info(cur, XNBREAK)) {
++		if (signal_pending(current)) {
++			restart = cobalt_get_restart_block(current);
++			restart->nanosleep.expires =
++				(flags & TIMER_ABSTIME) ? timeout :
++				    xntimer_get_timeout_stopped(&cur->rtimer);
++			xnlock_put_irqrestore(&nklock, s);
++			restart->fn = cobalt_restart_syscall_placeholder;
++
++			xnthread_set_localinfo(cur, XNSYSRST);
++
++			return -ERESTARTSYS;
++		}
++
++		if (flags == 0 && rmt) {
++			rem = xntimer_get_timeout_stopped(&cur->rtimer);
++			xnlock_put_irqrestore(&nklock, s);
++			ns2ts(rmt, rem > 1 ? rem : 0);
++		} else
++			xnlock_put_irqrestore(&nklock, s);
++
++		return -EINTR;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++COBALT_SYSCALL(clock_nanosleep, primary,
++	       (clockid_t clock_id, int flags,
++		const struct timespec __user *u_rqt,
++		struct timespec __user *u_rmt))
++{
++	struct timespec rqt, rmt, *rmtp = NULL;
++	int ret;
++
++	if (u_rmt)
++		rmtp = &rmt;
++
++	if (cobalt_copy_from_user(&rqt, u_rqt, sizeof(rqt)))
++		return -EFAULT;
++
++	ret = __cobalt_clock_nanosleep(clock_id, flags, &rqt, rmtp);
++	if (ret == -EINTR && flags == 0 && rmtp) {
++		if (cobalt_copy_to_user(u_rmt, rmtp, sizeof(*u_rmt)))
++			return -EFAULT;
++	}
++
++	return ret;
++}
++
++int cobalt_clock_register(struct xnclock *clock, const cpumask_t *affinity,
++			  clockid_t *clk_id)
++{
++	int ret, nr;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	nr = find_first_zero_bit(cobalt_clock_extids, COBALT_MAX_EXTCLOCKS);
++	if (nr >= COBALT_MAX_EXTCLOCKS) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EAGAIN;
++	}
++
++	/*
++	 * CAUTION: a bit raised in cobalt_clock_extids means that the
++	 * corresponding entry in external_clocks[] is valid. The
++	 * converse assumption is NOT true.
++	 */
++	__set_bit(nr, cobalt_clock_extids);
++	external_clocks[nr] = clock;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	ret = xnclock_register(clock, affinity);
++	if (ret)
++		return ret;
++
++	clock->id = nr;
++	*clk_id = __COBALT_CLOCK_EXT(clock->id);
++
++	trace_cobalt_clock_register(clock->name, *clk_id);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(cobalt_clock_register);
++
++void cobalt_clock_deregister(struct xnclock *clock)
++{
++	trace_cobalt_clock_deregister(clock->name, clock->id);
++	clear_bit(clock->id, cobalt_clock_extids);
++	smp_mb__after_atomic();
++	external_clocks[clock->id] = NULL;
++	xnclock_deregister(clock);
++}
++EXPORT_SYMBOL_GPL(cobalt_clock_deregister);
++
++struct xnclock *cobalt_clock_find(clockid_t clock_id)
++{
++	struct xnclock *clock = ERR_PTR(-EINVAL);
++	spl_t s;
++	int nr;
++
++	if (clock_id == CLOCK_MONOTONIC ||
++	    clock_id == CLOCK_MONOTONIC_RAW ||
++	    clock_id == CLOCK_REALTIME)
++		return &nkclock;
++	
++	if (__COBALT_CLOCK_EXT_P(clock_id)) {
++		nr = __COBALT_CLOCK_EXT_INDEX(clock_id);
++		xnlock_get_irqsave(&nklock, s);
++		if (test_bit(nr, cobalt_clock_extids))
++			clock = external_clocks[nr];
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++	return clock;
++}
++EXPORT_SYMBOL_GPL(cobalt_clock_find);
+--- linux/kernel/xenomai/posix/internal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/internal.h	2021-04-07 16:01:26.008635908 +0800
+@@ -0,0 +1,60 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_INTERNAL_H
++#define _COBALT_POSIX_INTERNAL_H
++
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/kernel/arith.h>
++#include <asm/xenomai/syscall.h>
++#include "process.h"
++#include "extension.h"
++#include "syscall.h"
++#include "memory.h"
++
++#define COBALT_MAXNAME		64
++#define COBALT_PERMS_MASK	(O_RDONLY | O_WRONLY | O_RDWR)
++
++#define COBALT_MAGIC(n)		(0x8686##n##n)
++#define COBALT_ANY_MAGIC	COBALT_MAGIC(00)
++#define COBALT_THREAD_MAGIC	COBALT_MAGIC(01)
++#define COBALT_MQ_MAGIC		COBALT_MAGIC(0A)
++#define COBALT_MQD_MAGIC	COBALT_MAGIC(0B)
++#define COBALT_EVENT_MAGIC	COBALT_MAGIC(0F)
++#define COBALT_MONITOR_MAGIC	COBALT_MAGIC(10)
++#define COBALT_TIMERFD_MAGIC	COBALT_MAGIC(11)
++
++#define cobalt_obj_active(h,m,t)	\
++	((h) && ((t *)(h))->magic == (m))
++
++#define cobalt_mark_deleted(t) ((t)->magic = ~(t)->magic)
++
++static inline xnhandle_t cobalt_get_handle_from_user(xnhandle_t *u_h)
++{
++	xnhandle_t handle;
++	return __xn_get_user(handle, u_h) ? 0 : handle;
++}
++
++int cobalt_init(void);
++
++long cobalt_restart_syscall_placeholder(struct restart_block *param);
++
++#endif /* !_COBALT_POSIX_INTERNAL_H */
+--- linux/kernel/xenomai/posix/syscall.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/syscall.c	2021-04-07 16:01:26.003635915 +0800
+@@ -0,0 +1,801 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>
++ * Copyright (C) 2005 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include <linux/err.h>
++#include <linux/ipipe.h>
++#include <linux/sched.h>
++#include <linux/kconfig.h>
++#include <linux/unistd.h>
++#include <cobalt/uapi/corectl.h>
++#include <cobalt/kernel/tree.h>
++#include <cobalt/kernel/vdso.h>
++#include <cobalt/kernel/init.h>
++#include <asm/syscall.h>
++#include "internal.h"
++#include "thread.h"
++#include "sched.h"
++#include "mutex.h"
++#include "cond.h"
++#include "mqueue.h"
++#include "sem.h"
++#include "signal.h"
++#include "timer.h"
++#include "monitor.h"
++#include "clock.h"
++#include "event.h"
++#include "timerfd.h"
++#include "io.h"
++#include "corectl.h"
++#include "../debug.h"
++#include <trace/events/cobalt-posix.h>
++
++/* Syscall must run into the Linux domain. */
++#define __xn_exec_lostage    0x1
++/* Syscall must run into the Xenomai domain. */
++#define __xn_exec_histage    0x2
++/* Shadow syscall: caller must be mapped. */
++#define __xn_exec_shadow     0x4
++/* Switch back toggle; caller must return to its original mode. */
++#define __xn_exec_switchback 0x8
++/* Exec in current domain. */
++#define __xn_exec_current    0x10
++/* Exec in conforming domain, Xenomai or Linux. */
++#define __xn_exec_conforming 0x20
++/* Attempt syscall restart in the opposite domain upon -ENOSYS. */
++#define __xn_exec_adaptive   0x40
++/* Do not restart syscall upon signal receipt. */
++#define __xn_exec_norestart  0x80
++/* Shorthand for shadow init syscall. */
++#define __xn_exec_init       __xn_exec_lostage
++/* Shorthand for shadow syscall in Xenomai space. */
++#define __xn_exec_primary   (__xn_exec_shadow|__xn_exec_histage)
++/* Shorthand for shadow syscall in Linux space. */
++#define __xn_exec_secondary (__xn_exec_shadow|__xn_exec_lostage)
++/* Shorthand for syscall in Linux space with switchback if shadow. */
++#define __xn_exec_downup    (__xn_exec_lostage|__xn_exec_switchback)
++/* Shorthand for non-restartable primary syscall. */
++#define __xn_exec_nonrestartable (__xn_exec_primary|__xn_exec_norestart)
++/* Domain probing syscall starting in conforming mode. */
++#define __xn_exec_probing   (__xn_exec_conforming|__xn_exec_adaptive)
++/* Hand over mode selection to syscall.  */
++#define __xn_exec_handover  (__xn_exec_current|__xn_exec_adaptive)
++
++typedef long (*cobalt_syshand)(unsigned long arg1, unsigned long arg2,
++			       unsigned long arg3, unsigned long arg4,
++			       unsigned long arg5);
++
++static void prepare_for_signal(struct task_struct *p,
++			       struct xnthread *thread,
++			       struct pt_regs *regs,
++			       int sysflags)
++{
++	int notify = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnthread_test_info(thread, XNKICKED)) {
++		if (signal_pending(p)) {
++			__xn_error_return(regs,
++					  (sysflags & __xn_exec_norestart) ?
++					  -EINTR : -ERESTARTSYS);
++			notify = !xnthread_test_state(thread, XNSSTEP);
++			xnthread_clear_info(thread, XNBREAK);
++		}
++		xnthread_clear_info(thread, XNKICKED);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	xnthread_test_cancel();
++
++	xnthread_relax(notify, SIGDEBUG_MIGRATE_SIGNAL);
++}
++
++static COBALT_SYSCALL(migrate, current, (int domain))
++{
++	struct xnthread *thread = xnthread_current();
++
++	if (ipipe_root_p) {
++		if (domain == COBALT_PRIMARY) {
++			if (thread == NULL)
++				return -EPERM;
++			/*
++			 * Paranoid: a corner case where userland
++			 * fiddles with SIGSHADOW while the target
++			 * thread is still waiting to be started.
++			 */
++			if (xnthread_test_state(thread, XNDORMANT))
++				return 0;
++
++			return xnthread_harden() ? : 1;
++		}
++		return 0;
++	}
++
++	/* ipipe_current_domain != ipipe_root_domain */
++	if (domain == COBALT_SECONDARY) {
++		xnthread_relax(0, 0);
++		return 1;
++	}
++
++	return 0;
++}
++
++static COBALT_SYSCALL(trace, current,
++		      (int op, unsigned long a1,
++		       unsigned long a2, unsigned long a3))
++{
++	int ret = -EINVAL;
++
++	switch (op) {
++	case __xntrace_op_max_begin:
++		ret = xntrace_max_begin(a1);
++		break;
++
++	case __xntrace_op_max_end:
++		ret = xntrace_max_end(a1);
++		break;
++
++	case __xntrace_op_max_reset:
++		ret = xntrace_max_reset();
++		break;
++
++	case __xntrace_op_user_start:
++		ret = xntrace_user_start();
++		break;
++
++	case __xntrace_op_user_stop:
++		ret = xntrace_user_stop(a1);
++		break;
++
++	case __xntrace_op_user_freeze:
++		ret = xntrace_user_freeze(a1, a2);
++		break;
++
++	case __xntrace_op_special:
++		ret = xntrace_special(a1 & 0xFF, a2);
++		break;
++
++	case __xntrace_op_special_u64:
++		ret = xntrace_special_u64(a1 & 0xFF,
++					  (((u64) a2) << 32) | a3);
++		break;
++	}
++	return ret;
++}
++
++static COBALT_SYSCALL(ftrace_puts, current,
++		      (const char __user *str))
++{
++	char buf[256];
++	unsigned len;
++
++	len = cobalt_strncpy_from_user(buf, str, sizeof(buf));
++	if (len < 0)
++		return -EFAULT;
++
++#ifdef CONFIG_TRACING
++	__trace_puts(_THIS_IP_, buf, len);
++#endif
++
++	return 0;
++}
++
++static COBALT_SYSCALL(archcall, current,
++		      (unsigned long a1, unsigned long a2,
++		       unsigned long a3, unsigned long a4,
++		       unsigned long a5))
++{
++	return xnarch_local_syscall(a1, a2, a3, a4, a5);
++}
++
++static COBALT_SYSCALL(get_current, current,
++		      (xnhandle_t __user *u_handle))
++{
++	struct xnthread *cur = xnthread_current();
++
++	if (cur == NULL)
++		return -EPERM;
++
++	return cobalt_copy_to_user(u_handle, &cur->handle,
++				      sizeof(*u_handle));
++}
++
++static COBALT_SYSCALL(backtrace, lostage,
++		      (int nr, unsigned long __user *u_backtrace, int reason))
++{
++	unsigned long backtrace[SIGSHADOW_BACKTRACE_DEPTH];
++	int ret;
++
++	/*
++	 * In case backtrace() in userland is broken or fails. We may
++	 * want to know about this in kernel space however, for future
++	 * use.
++	 */
++	if (nr <= 0)
++		return 0;
++	/*
++	 * We may omit the older frames if we can't store the full
++	 * backtrace.
++	 */
++	if (nr > SIGSHADOW_BACKTRACE_DEPTH)
++		nr = SIGSHADOW_BACKTRACE_DEPTH;
++	/*
++	 * Fetch the backtrace array, filled with PC values as seen
++	 * from the relaxing thread in user-space. This can't fail
++	 */
++	ret = cobalt_copy_from_user(backtrace, u_backtrace, nr * sizeof(long));
++	if (ret)
++		return ret;
++
++	xndebug_trace_relax(nr, backtrace, reason);
++
++	return 0;
++}
++
++static COBALT_SYSCALL(serialdbg, current,
++		      (const char __user *u_msg, int len))
++{
++	char buf[128];
++	int n;
++
++	while (len > 0) {
++		n = len;
++		if (n > sizeof(buf))
++			n = sizeof(buf);
++		if (cobalt_copy_from_user(buf, u_msg, n))
++			return -EFAULT;
++		__ipipe_serial_debug("%.*s", n, buf);
++		u_msg += n;
++		len -= n;
++	}
++
++	return 0;
++}
++
++static void stringify_feature_set(unsigned long fset, char *buf, int size)
++{
++	unsigned long feature;
++	int nc, nfeat;
++
++	*buf = '\0';
++
++	for (feature = 1, nc = nfeat = 0; fset != 0 && size > 0; feature <<= 1) {
++		if (fset & feature) {
++			nc = ksformat(buf, size, "%s%s",
++				      nfeat > 0 ? " " : "",
++				      get_feature_label(feature));
++			nfeat++;
++			size -= nc;
++			buf += nc;
++			fset &= ~feature;
++		}
++	}
++}
++
++static COBALT_SYSCALL(bind, lostage,
++		      (struct cobalt_bindreq __user *u_breq))
++{
++	unsigned long featreq, featmis;
++	struct cobalt_bindreq breq;
++	struct cobalt_featinfo *f;
++	int abirev;
++
++	if (cobalt_copy_from_user(&breq, u_breq, sizeof(breq)))
++		return -EFAULT;
++
++	f = &breq.feat_ret;
++	featreq = breq.feat_req;
++	if (!realtime_core_running() && (featreq & __xn_feat_control) == 0)
++		return -EAGAIN;
++
++	/*
++	 * Calculate the missing feature set:
++	 * kernel_unavailable_set & user_mandatory_set.
++	 */
++	featmis = (~XENOMAI_FEAT_DEP & (featreq & XENOMAI_FEAT_MAN));
++	abirev = breq.abi_rev;
++
++	/*
++	 * Pass back the supported feature set and the ABI revision
++	 * level to user-space.
++	 */
++	f->feat_all = XENOMAI_FEAT_DEP;
++	stringify_feature_set(XENOMAI_FEAT_DEP, f->feat_all_s,
++			      sizeof(f->feat_all_s));
++	f->feat_man = featreq & XENOMAI_FEAT_MAN;
++	stringify_feature_set(f->feat_man, f->feat_man_s,
++			      sizeof(f->feat_man_s));
++	f->feat_mis = featmis;
++	stringify_feature_set(featmis, f->feat_mis_s,
++			      sizeof(f->feat_mis_s));
++	f->feat_req = featreq;
++	stringify_feature_set(featreq, f->feat_req_s,
++			      sizeof(f->feat_req_s));
++	f->feat_abirev = XENOMAI_ABI_REV;
++	collect_arch_features(f);
++
++	f->clock_freq = cobalt_pipeline.clock_freq;
++	f->vdso_offset = cobalt_umm_offset(&cobalt_ppd_get(1)->umm, nkvdso);
++
++	if (cobalt_copy_to_user(u_breq, &breq, sizeof(breq)))
++		return -EFAULT;
++
++	/*
++	 * If some mandatory features the user-space code relies on
++	 * are missing at kernel level, we cannot go further.
++	 */
++	if (featmis)
++		return -EINVAL;
++
++	if (!check_abi_revision(abirev))
++		return -ENOEXEC;
++
++	return cobalt_bind_core(featreq);
++}
++
++static COBALT_SYSCALL(extend, lostage, (unsigned int magic))
++{
++	return cobalt_bind_personality(magic);
++}
++
++static int CoBaLt_ni(void)
++{
++	return -ENOSYS;
++}
++
++/*
++ * We have a single syscall table for all ABI models, i.e. 64bit
++ * native + 32bit) or plain 32bit. In the former case, we may want to
++ * support several models with a single build (e.g. ia32 and x32 for
++ * x86_64).
++ *
++ * The syscall table is set up in a single step, based on three
++ * subsequent sources of initializers:
++ *
++ * - first, all syscall entries are defaulted to a placeholder
++ * returning -ENOSYS, as the table may be sparse.
++ *
++ * - then __COBALT_CALL_ENTRY() produces a native call entry
++ * (e.g. pure 64bit call handler for a 64bit architecture), optionally
++ * followed by a set of 32bit syscall entries offset by an
++ * arch-specific base index, which default to the native calls. These
++ * nitty-gritty details are defined by
++ * <asm/xenomai/syscall32.h>. 32bit architectures - or 64bit ones for
++ * which we don't support any 32bit ABI model - will simply define
++ * __COBALT_CALL32_ENTRY() as an empty macro.
++ *
++ * - finally, 32bit thunk entries are generated per-architecture, by
++ * including <asm/xenomai/syscall32-table.h>, overriding the default
++ * handlers installed during the previous step.
++ *
++ * For instance, with CONFIG_X86_X32 support enabled in an x86_64
++ * kernel, sc_cobalt_mq_timedreceive would appear twice in the table,
++ * as:
++ *
++ * [sc_cobalt_mq_timedreceive] = cobalt_mq_timedreceive,
++ * ...
++ * [sc_cobalt_mq_timedreceive + __COBALT_X32_BASE] = cobalt32x_mq_timedreceive,
++ *
++ * cobalt32x_mq_timedreceive() would do the required thunking for
++ * dealing with the 32<->64bit conversion of arguments. On the other
++ * hand, sc_cobalt_sched_yield - which do not require any thunk -
++ * would also appear twice, but both entries would point at the native
++ * syscall implementation:
++ *
++ * [sc_cobalt_sched_yield] = cobalt_sched_yield,
++ * ...
++ * [sc_cobalt_sched_yield + __COBALT_X32_BASE] = cobalt_sched_yield,
++ *
++ * Accordingly, applications targeting the x32 model (-mx32) issue
++ * syscalls in the range [__COBALT_X32_BASE..__COBALT_X32_BASE +
++ * __NR_COBALT_SYSCALLS-1], whilst native (32/64bit) ones issue
++ * syscalls in the range [0..__NR_COBALT_SYSCALLS-1].
++ *
++ * In short, this is an incremental process where the arch-specific
++ * code can override the 32bit syscall entries, pointing at the thunk
++ * routines it may need for handing 32bit calls over their respective
++ * 64bit implementation.
++ *
++ * By convention, there is NO pure 32bit syscall, which means that
++ * each 32bit syscall defined by a compat ABI interface MUST match a
++ * native (64bit) syscall. This is important as we share the call
++ * modes (i.e. __xn_exec_ bits) between all ABI models.
++ *
++ * --rpm
++ */
++#define __syshand__(__name)	((cobalt_syshand)(CoBaLt_ ## __name))
++
++#define __COBALT_NI	__syshand__(ni)
++
++#define __COBALT_CALL_NI				\
++	[0 ... __NR_COBALT_SYSCALLS-1] = __COBALT_NI,	\
++	__COBALT_CALL32_INITHAND(__COBALT_NI)
++
++#define __COBALT_CALL_NFLAGS				\
++	[0 ... __NR_COBALT_SYSCALLS-1] = 0,		\
++	__COBALT_CALL32_INITMODE(0)
++
++#define __COBALT_CALL_ENTRY(__name)				\
++	[sc_cobalt_ ## __name] = __syshand__(__name),		\
++	__COBALT_CALL32_ENTRY(__name, __syshand__(__name))
++
++#define __COBALT_MODE(__name, __mode)	\
++	[sc_cobalt_ ## __name] = __xn_exec_##__mode,
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++#include "syscall32.h"
++#endif
++
++#include "syscall_entries.h"
++
++static const cobalt_syshand cobalt_syscalls[] = {
++	__COBALT_CALL_NI
++	__COBALT_CALL_ENTRIES
++#ifdef CONFIG_XENO_ARCH_SYS3264
++#include <asm/xenomai/syscall32-table.h>
++#endif
++};
++
++static const int cobalt_sysmodes[] = {
++	__COBALT_CALL_NFLAGS
++	__COBALT_CALL_MODES
++};
++
++static inline int allowed_syscall(struct cobalt_process *process,
++				  struct xnthread *thread,
++				  int sysflags, int nr)
++{
++	if (nr == sc_cobalt_bind)
++		return 1;
++	
++	if (process == NULL)
++		return 0;
++
++	if (thread == NULL && (sysflags & __xn_exec_shadow))
++		return 0;
++
++	return cap_raised(current_cap(), CAP_SYS_NICE);
++}
++
++static int handle_head_syscall(struct ipipe_domain *ipd, struct pt_regs *regs)
++{
++	struct cobalt_process *process;
++	int switched, sigs, sysflags;
++	struct xnthread *thread;
++	cobalt_syshand handler;
++	struct task_struct *p;
++	unsigned int nr, code;
++	long ret;
++
++	if (!__xn_syscall_p(regs))
++		goto linux_syscall;
++
++	thread = xnthread_current();
++	code = __xn_syscall(regs);
++	if (code >= ARRAY_SIZE(cobalt_syscalls))
++		goto bad_syscall;
++
++	nr = code & (__NR_COBALT_SYSCALLS - 1);
++
++	trace_cobalt_head_sysentry(code);
++
++	process = cobalt_current_process();
++	if (process == NULL) {
++		process = cobalt_search_process(current->mm);
++		cobalt_set_process(process);
++	}
++
++	handler = cobalt_syscalls[code];
++	sysflags = cobalt_sysmodes[nr];
++
++	/*
++	 * Executing Cobalt services requires CAP_SYS_NICE, except for
++	 * sc_cobalt_bind which does its own checks.
++	 */
++	if (unlikely(!allowed_syscall(process, thread, sysflags, nr))) {
++		/*
++		 * Exclude get_current from reporting, it is used to probe the
++		 * execution context.
++		 */
++		if (XENO_DEBUG(COBALT) && nr != sc_cobalt_get_current)
++			printk(XENO_WARNING
++			       "syscall <%d> denied to %s[%d]\n",
++			       nr, current->comm, task_pid_nr(current));
++		__xn_error_return(regs, -EPERM);
++		goto ret_handled;
++	}
++
++	if (sysflags & __xn_exec_conforming)
++		/*
++		 * If the conforming exec bit is set, turn the exec
++		 * bitmask for the syscall into the most appropriate
++		 * setup for the caller, i.e. Xenomai domain for
++		 * shadow threads, Linux otherwise.
++		 */
++		sysflags |= (thread ? __xn_exec_histage : __xn_exec_lostage);
++
++	/*
++	 * Here we have to dispatch the syscall execution properly,
++	 * depending on:
++	 *
++	 * o Whether the syscall must be run into the Linux or Xenomai
++	 * domain, or indifferently in the current Xenomai domain.
++	 *
++	 * o Whether the caller currently runs in the Linux or Xenomai
++	 * domain.
++	 */
++restart:
++	/*
++	 * Process adaptive syscalls by restarting them in the
++	 * opposite domain upon receiving -ENOSYS from the syscall
++	 * handler.
++	 */
++	switched = 0;
++	if (sysflags & __xn_exec_lostage) {
++		/*
++		 * The syscall must run from the Linux domain.
++		 */
++		if (ipd == &xnsched_realtime_domain) {
++			/*
++			 * Request originates from the Xenomai domain:
++			 * relax the caller then invoke the syscall
++			 * handler right after.
++			 */
++			xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL);
++			switched = 1;
++		} else
++			/*
++			 * Request originates from the Linux domain:
++			 * propagate the event to our Linux-based
++			 * handler, so that the syscall is executed
++			 * from there.
++			 */
++			return KEVENT_PROPAGATE;
++	} else if (sysflags & (__xn_exec_histage | __xn_exec_current)) {
++		/*
++		 * Syscall must run either from the Xenomai domain, or
++		 * from the calling domain.
++		 *
++		 * If the request originates from the Linux domain,
++		 * hand it over to our secondary-mode dispatcher.
++		 * Otherwise, invoke the syscall handler immediately.
++		 */
++		if (ipd != &xnsched_realtime_domain)
++			return KEVENT_PROPAGATE;
++	}
++
++	/*
++	 * 'thread' has to be valid from that point: all syscalls
++	 * regular threads may call have been pipelined to the root
++	 * handler (lostage ones), or rejected by allowed_syscall().
++	 */
++
++	ret = handler(__xn_reg_arglist(regs));
++	if (ret == -ENOSYS && (sysflags & __xn_exec_adaptive)) {
++		if (switched) {
++			ret = xnthread_harden();
++			if (ret) {
++				switched = 0;
++				goto done;
++			}
++		} else /* Mark the primary -> secondary transition. */
++			xnthread_set_localinfo(thread, XNDESCENT);
++		sysflags ^=
++		    (__xn_exec_lostage | __xn_exec_histage |
++		     __xn_exec_adaptive);
++		goto restart;
++	}
++done:
++	__xn_status_return(regs, ret);
++	sigs = 0;
++	if (!xnsched_root_p()) {
++		p = current;
++		if (signal_pending(p) ||
++		    xnthread_test_info(thread, XNKICKED)) {
++			sigs = 1;
++			prepare_for_signal(p, thread, regs, sysflags);
++		} else if (xnthread_test_state(thread, XNWEAK) &&
++			   thread->res_count == 0) {
++			if (switched)
++				switched = 0;
++			else
++				xnthread_relax(0, 0);
++		}
++	}
++	if (!sigs && (sysflags & __xn_exec_switchback) && switched)
++		/* -EPERM will be trapped later if needed. */
++		xnthread_harden();
++
++ret_handled:
++	/* Update the stats and userland-visible state. */
++	if (thread) {
++		xnthread_clear_localinfo(thread, XNDESCENT);
++		xnstat_counter_inc(&thread->stat.xsc);
++		xnthread_sync_window(thread);
++	}
++
++	trace_cobalt_head_sysexit(__xn_reg_rval(regs));
++
++	return KEVENT_STOP;
++
++linux_syscall:
++	if (xnsched_root_p())
++		/*
++		 * The call originates from the Linux domain, either
++		 * from a relaxed shadow or from a regular Linux task;
++		 * just propagate the event so that we will fall back
++		 * to handle_root_syscall().
++		 */
++		return KEVENT_PROPAGATE;
++
++	if (!__xn_rootcall_p(regs, &code))
++		goto bad_syscall;
++
++	/*
++	 * We know this is a Cobalt thread since it runs over the head
++	 * domain, however the current syscall should be handled by
++	 * the host kernel instead.  Before this happens, we have to
++	 * re-enter the root domain.
++	 */
++	xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL);
++
++	return KEVENT_PROPAGATE;
++
++bad_syscall:
++	printk(XENO_WARNING "bad syscall <%#x>\n", code);
++
++	__xn_error_return(regs, -ENOSYS);
++
++	return KEVENT_STOP;
++}
++
++static int handle_root_syscall(struct ipipe_domain *ipd, struct pt_regs *regs)
++{
++	int sysflags, switched, sigs;
++	struct xnthread *thread;
++	cobalt_syshand handler;
++	struct task_struct *p;
++	unsigned int nr, code;
++	long ret;
++
++	/*
++	 * Catch cancellation requests pending for user shadows
++	 * running mostly in secondary mode, i.e. XNWEAK. In that
++	 * case, we won't run prepare_for_signal() that frequently, so
++	 * check for cancellation here.
++	 */
++	xnthread_test_cancel();
++
++	if (!__xn_syscall_p(regs))
++		/* Fall back to Linux syscall handling. */
++		return KEVENT_PROPAGATE;
++
++	thread = xnthread_current();
++	/* code has already been checked in the head domain handler. */
++	code = __xn_syscall(regs);
++	nr = code & (__NR_COBALT_SYSCALLS - 1);
++
++	trace_cobalt_root_sysentry(code);
++
++	/* Processing a Xenomai syscall. */
++
++	handler = cobalt_syscalls[code];
++	sysflags = cobalt_sysmodes[nr];
++
++	if (thread && (sysflags & __xn_exec_conforming))
++		sysflags |= __xn_exec_histage;
++restart:
++	/*
++	 * Process adaptive syscalls by restarting them in the
++	 * opposite domain upon receiving -ENOSYS from the syscall
++	 * handler.
++	 */
++	switched = 0;
++	if (sysflags & __xn_exec_histage) {
++		/*
++		 * This request originates from the Linux domain but
++		 * should run into the Xenomai domain: harden the
++		 * caller before invoking the syscall handler.
++		 */
++		ret = xnthread_harden();
++		if (ret) {
++			__xn_error_return(regs, ret);
++			goto ret_handled;
++		}
++		switched = 1;
++	} else {
++		/*
++		 * We want to run the syscall in the current Linux
++		 * domain. This is a slow path, so proceed with any
++		 * pending schedparam update on the fly.
++		 */
++		if (thread)
++			xnthread_propagate_schedparam(thread);
++	}
++
++	ret = handler(__xn_reg_arglist(regs));
++	if (ret == -ENOSYS && (sysflags & __xn_exec_adaptive)) {
++		sysflags ^= __xn_exec_histage;
++		if (switched) {
++			xnthread_relax(1, SIGDEBUG_MIGRATE_SYSCALL);
++			sysflags &= ~__xn_exec_adaptive;
++			 /* Mark the primary -> secondary transition. */
++			xnthread_set_localinfo(thread, XNDESCENT);
++		}
++		goto restart;
++	}
++
++	__xn_status_return(regs, ret);
++
++	sigs = 0;
++	if (!xnsched_root_p()) {
++		/*
++		 * We may have gained a shadow TCB from the syscall we
++		 * just invoked, so make sure to fetch it.
++		 */
++		thread = xnthread_current();
++		p = current;
++		if (signal_pending(p)) {
++			sigs = 1;
++			prepare_for_signal(p, thread, regs, sysflags);
++		} else if (xnthread_test_state(thread, XNWEAK) &&
++			   thread->res_count == 0)
++			sysflags |= __xn_exec_switchback;
++	}
++	if (!sigs && (sysflags & __xn_exec_switchback)
++	    && (switched || xnsched_primary_p()))
++		xnthread_relax(0, 0);
++
++ret_handled:
++	/* Update the stats and userland-visible state. */
++	if (thread) {
++		xnthread_clear_localinfo(thread, XNDESCENT|XNHICCUP);
++		xnstat_counter_inc(&thread->stat.xsc);
++		xnthread_sync_window(thread);
++	}
++
++	trace_cobalt_root_sysexit(__xn_reg_rval(regs));
++
++	return KEVENT_STOP;
++}
++
++int ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs)
++{
++	if (unlikely(ipipe_root_p))
++		return handle_root_syscall(ipd, regs);
++
++	return handle_head_syscall(ipd, regs);
++}
++
++int ipipe_fastcall_hook(struct pt_regs *regs)
++{
++	int ret;
++
++	ret = handle_head_syscall(&xnsched_realtime_domain, regs);
++	XENO_BUG_ON(COBALT, ret == KEVENT_PROPAGATE);
++
++	return ret;
++}
++
++long cobalt_restart_syscall_placeholder(struct restart_block *param)
++{
++	return -EINVAL;
++}
+--- linux/kernel/xenomai/posix/sem.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/sem.h	2021-04-07 16:01:25.999635921 +0800
+@@ -0,0 +1,128 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_SEM_H
++#define _COBALT_POSIX_SEM_H
++
++#include <linux/kernel.h>
++#include <linux/fcntl.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/registry.h>
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/process.h>
++
++struct cobalt_process;
++struct filename;
++
++struct cobalt_sem {
++	unsigned int magic;
++	struct xnsynch synchbase;
++	struct cobalt_sem_state *state;
++	int flags;
++	unsigned int refs;
++	struct filename *pathname;
++	struct cobalt_resnode resnode;
++};
++
++/* Copied from Linuxthreads semaphore.h. */
++struct _sem_fastlock
++{
++  long int __status;
++  int __spinlock;
++};
++
++typedef struct
++{
++  struct _sem_fastlock __sem_lock;
++  int __sem_value;
++  long __sem_waiting;
++} sem_t;
++
++#include <cobalt/uapi/sem.h>
++
++#define SEM_VALUE_MAX	(INT_MAX)
++#define SEM_FAILED	NULL
++#define SEM_NAMED	0x80000000
++
++struct cobalt_sem_shadow __user *
++__cobalt_sem_open(struct cobalt_sem_shadow __user *usm,
++		  const char __user *u_name,
++		  int oflags, mode_t mode, unsigned int value);
++
++int __cobalt_sem_timedwait(struct cobalt_sem_shadow __user *u_sem,
++			   const void __user *u_ts,
++			   int (*fetch_timeout)(struct timespec *ts,
++						const void __user *u_ts));
++
++int __cobalt_sem_destroy(xnhandle_t handle);
++
++void cobalt_nsem_reclaim(struct cobalt_process *process);
++
++struct cobalt_sem *
++__cobalt_sem_init(const char *name, struct cobalt_sem_shadow *sem,
++		  int flags, unsigned value);
++
++void __cobalt_sem_shadow_init(struct cobalt_sem *sem, __u32 magic,
++			      struct cobalt_sem_shadow *sm);
++
++COBALT_SYSCALL_DECL(sem_init,
++		    (struct cobalt_sem_shadow __user *u_sem,
++		     int flags, unsigned value));
++
++COBALT_SYSCALL_DECL(sem_post,
++		    (struct cobalt_sem_shadow __user *u_sem));
++
++COBALT_SYSCALL_DECL(sem_wait,
++		    (struct cobalt_sem_shadow __user *u_sem));
++
++COBALT_SYSCALL_DECL(sem_timedwait,
++		    (struct cobalt_sem_shadow __user *u_sem,
++		     struct timespec __user *u_ts));
++
++COBALT_SYSCALL_DECL(sem_trywait,
++		    (struct cobalt_sem_shadow __user *u_sem));
++
++COBALT_SYSCALL_DECL(sem_getvalue,
++		    (struct cobalt_sem_shadow __user *u_sem,
++		     int __user *u_sval));
++
++COBALT_SYSCALL_DECL(sem_destroy,
++		    (struct cobalt_sem_shadow __user *u_sem));
++
++COBALT_SYSCALL_DECL(sem_open,
++		    (struct cobalt_sem_shadow __user *__user *u_addrp,
++		     const char __user *u_name,
++		     int oflags, mode_t mode, unsigned int value));
++
++COBALT_SYSCALL_DECL(sem_close,
++		    (struct cobalt_sem_shadow __user *usm));
++
++COBALT_SYSCALL_DECL(sem_unlink, (const char __user *u_name));
++
++COBALT_SYSCALL_DECL(sem_broadcast_np,
++		    (struct cobalt_sem_shadow __user *u_sem));
++
++COBALT_SYSCALL_DECL(sem_inquire,
++		    (struct cobalt_sem_shadow __user *u_sem,
++		     struct cobalt_sem_info __user *u_info,
++		     pid_t __user *u_waitlist,
++		     size_t waitsz));
++
++void cobalt_sem_reclaim(struct cobalt_resnode *node,
++			spl_t s);
++
++#endif /* !_COBALT_POSIX_SEM_H */
+--- linux/kernel/xenomai/posix/extension.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/extension.h	2021-04-07 16:01:25.994635928 +0800
+@@ -0,0 +1,135 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_EXTENSION_H
++#define _COBALT_POSIX_EXTENSION_H
++
++#include <linux/time.h>
++#include <linux/list.h>
++
++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION
++
++#include <cobalt/kernel/thread.h>
++
++struct cobalt_timer;
++struct cobalt_sigpending;
++struct cobalt_extref;
++struct siginfo;
++struct xnsched_class;
++union xnsched_policy_param;
++
++struct cobalt_extension {
++	struct xnthread_personality core;
++	struct {
++		struct cobalt_thread *
++		(*timer_init)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */
++			      const struct sigevent *__restrict__ evp);
++		int (*timer_settime)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */
++				     const struct itimerspec *__restrict__ value,
++				     int flags);
++		int (*timer_gettime)(struct cobalt_extref *reftimer, /* nklocked, IRQs off. */
++				     struct itimerspec *__restrict__ value);
++		int (*timer_delete)(struct cobalt_extref *reftimer); /* nklocked, IRQs off. */
++		int (*timer_cleanup)(struct cobalt_extref *reftimer); /* nklocked, IRQs off. */
++		int (*signal_deliver)(struct cobalt_extref *refthread,
++				      struct siginfo *si,
++				      struct cobalt_sigpending *sigp);
++		int (*signal_queue)(struct cobalt_extref *refthread,
++				    struct cobalt_sigpending *sigp);
++		int (*signal_copyinfo)(struct cobalt_extref *refthread,
++				       void __user *u_si,
++				       const struct siginfo *si,
++				       int overrun);
++		int (*signal_copyinfo_compat)(struct cobalt_extref *refthread,
++					      void __user *u_si,
++					      const struct siginfo *si,
++					      int overrun);
++		int (*sched_yield)(struct cobalt_extref *curref);
++		int (*thread_setsched)(struct cobalt_extref *refthread, /* nklocked, IRQs off. */
++				       struct xnsched_class *sched_class,
++				       union xnsched_policy_param *param);
++	} ops;
++};
++
++struct cobalt_extref {
++	struct cobalt_extension *extension;
++	struct list_head next;
++	void *private;
++};
++
++static inline void cobalt_set_extref(struct cobalt_extref *ref,
++				     struct cobalt_extension *ext,
++				     void *priv)
++{
++	ref->extension = ext;
++	ref->private = priv;
++}
++
++/**
++ * All macros return non-zero if some thread-level extension code was
++ * called, leaving the output value into __ret. Otherwise, the __ret
++ * value is undefined.
++ */
++#define cobalt_initcall_extension(__extfn, __extref, __owner, __ret, __args...) \
++	({									\
++		int __val = 0;							\
++		if ((__owner) && (__owner)->extref.extension) {			\
++			(__extref)->extension = (__owner)->extref.extension;	\
++			if ((__extref)->extension->ops.__extfn) {		\
++				(__ret) = (__extref)->extension->ops.		\
++					__extfn(__extref, ##__args );		\
++				__val = 1;					\
++			}							\
++		} else								\
++			(__extref)->extension = NULL;				\
++		__val;								\
++	})
++		
++#define cobalt_call_extension(__extfn, __extref, __ret, __args...)	\
++	({								\
++		int __val = 0;						\
++		if ((__extref)->extension &&				\
++		    (__extref)->extension->ops.__extfn) {		\
++			(__ret) = (__extref)->extension->ops.		\
++				__extfn(__extref, ##__args );		\
++			__val = 1;					\
++		}							\
++		__val;							\
++	})
++
++#else /* !CONFIG_XENO_OPT_COBALT_EXTENSION */
++
++struct cobalt_extension;
++
++struct cobalt_extref {
++};
++
++static inline void cobalt_set_extref(struct cobalt_extref *ref,
++				     struct cobalt_extension *ext,
++				     void *priv)
++{
++}
++
++#define cobalt_initcall_extension(__extfn, __extref, __owner, __ret, __args...)	\
++	({ (void)(__owner); (void)(__ret); 0; })
++
++#define cobalt_call_extension(__extfn, __extref, __ret, __args...)	\
++	({ (void)(__ret); 0; })
++
++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */
++
++#endif /* !_COBALT_POSIX_EXTENSION_H */
+--- linux/kernel/xenomai/posix/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/thread.h	2021-04-07 16:01:25.989635935 +0800
+@@ -0,0 +1,228 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_THREAD_H
++#define _COBALT_POSIX_THREAD_H
++
++#include <stdarg.h>
++#include <linux/types.h>
++#include <linux/time.h>
++#include <linux/signal.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/uapi/thread.h>
++#include <cobalt/uapi/sched.h>
++/* CAUTION: rtdm/cobalt.h reads this header. */
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/extension.h>
++
++#define PTHREAD_PROCESS_PRIVATE 0
++#define PTHREAD_PROCESS_SHARED  1
++
++#define PTHREAD_CREATE_JOINABLE 0
++#define PTHREAD_CREATE_DETACHED 1
++
++#define PTHREAD_INHERIT_SCHED  0
++#define PTHREAD_EXPLICIT_SCHED 1
++
++#define PTHREAD_MUTEX_NORMAL     0
++#define PTHREAD_MUTEX_RECURSIVE  1
++#define PTHREAD_MUTEX_ERRORCHECK 2
++#define PTHREAD_MUTEX_DEFAULT    0
++
++struct cobalt_thread;
++struct cobalt_threadstat;
++
++/*
++ * pthread_mutexattr_t and pthread_condattr_t fit on 32 bits, for
++ * compatibility with libc.
++ */
++
++/* The following definitions are copied from linuxthread pthreadtypes.h. */
++struct _pthread_fastlock {
++	long int __status;
++	int __spinlock;
++};
++
++typedef struct {
++	struct _pthread_fastlock __c_lock;
++	long __c_waiting;
++	char __padding[48 - sizeof (struct _pthread_fastlock)
++		       - sizeof (long) - sizeof (long long)];
++	long long __align;
++} pthread_cond_t;
++
++enum {
++	PTHREAD_PRIO_NONE,
++	PTHREAD_PRIO_INHERIT,
++	PTHREAD_PRIO_PROTECT
++};
++
++typedef struct {
++	int __m_reserved;
++	int __m_count;
++	long __m_owner;
++	int __m_kind;
++	struct _pthread_fastlock __m_lock;
++} pthread_mutex_t;
++
++struct cobalt_local_hkey {
++	/** pthread_t from userland. */
++	unsigned long u_pth;
++	/** kernel mm context. */
++	struct mm_struct *mm;
++};
++
++struct cobalt_thread {
++	unsigned int magic;
++	struct xnthread threadbase;
++	struct cobalt_extref extref;
++	struct cobalt_process *process;
++	struct list_head next;	/* in global/process thread_list */
++
++	/** Signal management. */
++	sigset_t sigpending;
++	struct list_head sigqueues[_NSIG]; /* in cobalt_sigpending */
++	struct xnsynch sigwait;
++	struct list_head signext;
++
++	/** Monitor wait object and link holder. */
++	struct xnsynch monitor_synch;
++	struct list_head monitor_link;
++
++	struct cobalt_local_hkey hkey;
++};
++
++struct cobalt_sigwait_context {
++	struct xnthread_wait_context wc;
++	sigset_t *set;
++	struct siginfo *si;
++};
++
++static inline struct cobalt_thread *cobalt_current_thread(void)
++{
++	struct xnthread *curr = xnthread_current();
++	return curr ? container_of(curr, struct cobalt_thread, threadbase) : NULL;
++}
++
++int __cobalt_thread_create(unsigned long pth, int policy,
++			   struct sched_param_ex __user *u_param,
++			   int xid, __u32 __user *u_winoff);
++
++int __cobalt_thread_setschedparam_ex(struct cobalt_thread *thread, int policy,
++				     const struct sched_param_ex *param_ex);
++
++int cobalt_thread_setschedparam_ex(unsigned long pth,
++				   int policy,
++				   const struct sched_param_ex *param_ex,
++				   __u32 __user *u_winoff,
++				   int __user *u_promoted);
++
++int cobalt_thread_getschedparam_ex(unsigned long pth,
++				   int *policy_r,
++				   struct sched_param_ex *param_ex);
++
++int __cobalt_thread_getschedparam_ex(struct cobalt_thread *thread,
++				     int *policy_r,
++				     struct sched_param_ex *param_ex);
++
++int cobalt_thread_setschedprio(unsigned long pth,
++			       int prio,
++			       __u32 __user *u_winoff,
++			       int __user *u_promoted);
++
++struct cobalt_thread *cobalt_thread_find(pid_t pid);
++
++struct cobalt_thread *cobalt_thread_find_local(pid_t pid);
++
++struct cobalt_thread *cobalt_thread_lookup(unsigned long pth);
++
++COBALT_SYSCALL_DECL(thread_create,
++		    (unsigned long pth, int policy,
++		     struct sched_param_ex __user *u_param,
++		     int xid, __u32 __user *u_winoff));
++
++struct cobalt_thread *
++cobalt_thread_shadow(struct cobalt_local_hkey *lhkey,
++		     __u32 __user *u_winoff);
++
++COBALT_SYSCALL_DECL(thread_setmode,
++		    (int clrmask, int setmask, int __user *u_mode_r));
++
++COBALT_SYSCALL_DECL(thread_setname,
++		    (unsigned long pth, const char __user *u_name));
++
++COBALT_SYSCALL_DECL(thread_kill, (unsigned long pth, int sig));
++
++COBALT_SYSCALL_DECL(thread_join, (unsigned long pth));
++
++COBALT_SYSCALL_DECL(thread_getpid, (unsigned long pth));
++
++COBALT_SYSCALL_DECL(thread_getstat,
++		    (pid_t pid, struct cobalt_threadstat __user *u_stat));
++
++COBALT_SYSCALL_DECL(thread_setschedparam_ex,
++		    (unsigned long pth,
++		     int policy,
++		     const struct sched_param_ex __user *u_param,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted));
++
++COBALT_SYSCALL_DECL(thread_getschedparam_ex,
++		    (unsigned long pth,
++		     int __user *u_policy,
++		     struct sched_param_ex __user *u_param));
++
++COBALT_SYSCALL_DECL(thread_setschedprio,
++		    (unsigned long pth,
++		     int prio,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted));
++
++void cobalt_thread_map(struct xnthread *curr);
++
++struct xnthread_personality *cobalt_thread_exit(struct xnthread *curr);
++
++struct xnthread_personality *cobalt_thread_finalize(struct xnthread *zombie);
++
++#ifdef CONFIG_XENO_OPT_COBALT_EXTENSION
++
++int cobalt_thread_extend(struct cobalt_extension *ext,
++			 void *priv);
++
++void cobalt_thread_restrict(void);
++
++static inline
++int cobalt_thread_extended_p(const struct cobalt_thread *thread,
++			     const struct cobalt_extension *ext)
++{
++	return thread->extref.extension == ext;
++}
++
++#else /* !CONFIG_XENO_OPT_COBALT_EXTENSION */
++
++static inline
++int cobalt_thread_extended_p(const struct cobalt_thread *thread,
++			     const struct cobalt_extension *ext)
++{
++	return 0;
++}
++
++#endif /* !CONFIG_XENO_OPT_COBALT_EXTENSION */
++
++extern xnticks_t cobalt_time_slice;
++
++#endif /* !_COBALT_POSIX_THREAD_H */
+--- linux/kernel/xenomai/posix/monitor.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/monitor.h	2021-04-07 16:01:25.985635941 +0800
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (C) 2011 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _COBALT_POSIX_MONITOR_H
++#define _COBALT_POSIX_MONITOR_H
++
++#include <cobalt/kernel/synch.h>
++#include <cobalt/uapi/monitor.h>
++#include <xenomai/posix/syscall.h>
++#include <xenomai/posix/process.h>
++
++struct cobalt_resources;
++struct cobalt_process;
++
++struct cobalt_monitor {
++	unsigned int magic;
++	struct xnsynch gate;
++	struct xnsynch drain;
++	struct cobalt_monitor_state *state;
++	struct list_head waiters;
++	int flags;
++	xntmode_t tmode;
++	struct cobalt_resnode resnode;
++};
++
++int __cobalt_monitor_wait(struct cobalt_monitor_shadow __user *u_mon,
++			  int event, const struct timespec *ts,
++			  int __user *u_ret);
++
++COBALT_SYSCALL_DECL(monitor_init,
++		    (struct cobalt_monitor_shadow __user *u_monsh,
++		     clockid_t clk_id,
++		     int flags));
++
++COBALT_SYSCALL_DECL(monitor_enter,
++		    (struct cobalt_monitor_shadow __user *u_monsh));
++
++COBALT_SYSCALL_DECL(monitor_sync,
++		    (struct cobalt_monitor_shadow __user *u_monsh));
++
++COBALT_SYSCALL_DECL(monitor_exit,
++		    (struct cobalt_monitor_shadow __user *u_monsh));
++
++COBALT_SYSCALL_DECL(monitor_wait,
++		    (struct cobalt_monitor_shadow __user *u_monsh,
++		     int event, const struct timespec __user *u_ts,
++		     int __user *u_ret));
++
++COBALT_SYSCALL_DECL(monitor_destroy,
++		    (struct cobalt_monitor_shadow __user *u_monsh));
++
++void cobalt_monitor_reclaim(struct cobalt_resnode *node,
++			    spl_t s);
++
++#endif /* !_COBALT_POSIX_MONITOR_H */
+--- linux/kernel/xenomai/posix/nsem.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/nsem.c	2021-04-07 16:01:25.980635948 +0800
+@@ -0,0 +1,299 @@
++/*
++ * Copyright (C) 2013 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/list.h>
++#include <linux/err.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/tree.h>
++#include "internal.h"
++#include "sem.h"
++#include "thread.h"
++#include <trace/events/cobalt-posix.h>
++
++DEFINE_PRIVATE_XNLOCK(named_sem_lock);
++
++struct cobalt_named_sem {
++	struct cobalt_sem *sem;
++	struct cobalt_sem_shadow __user *usem;
++	unsigned int refs;
++	struct xnid id;
++};
++
++static struct cobalt_named_sem *
++sem_search(struct cobalt_process *process, xnhandle_t handle)
++{
++	struct xnid *i;
++
++	i = xnid_fetch(&process->usems, handle);
++	if (i == NULL)
++		return NULL;
++
++	return container_of(i, struct cobalt_named_sem, id);
++}
++
++static struct cobalt_sem_shadow __user *
++sem_open(struct cobalt_process *process,
++	 struct cobalt_sem_shadow __user *ushadow,
++	 struct filename *filename, int oflags, mode_t mode,
++	 unsigned int value)
++{
++	const char *name = filename->name;
++	struct cobalt_sem_shadow shadow;
++	struct cobalt_named_sem *u, *v;
++	struct cobalt_sem *sem;
++	xnhandle_t handle;
++	spl_t s;
++	int rc;
++
++	if (name[0] != '/' || name[1] == '\0')
++		return ERR_PTR(-EINVAL);
++
++  retry_bind:
++	rc = xnregistry_bind(&name[1], XN_NONBLOCK, XN_RELATIVE, &handle);
++	switch (rc) {
++	case 0:
++		/* Found */
++		if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++			return ERR_PTR(-EEXIST);
++
++		xnlock_get_irqsave(&named_sem_lock, s);
++		u = sem_search(process, handle);
++		if (u) {
++			++u->refs;
++			xnlock_put_irqrestore(&named_sem_lock, s);
++			return u->usem;
++		}
++		xnlock_put_irqrestore(&named_sem_lock, s);
++
++		xnlock_get_irqsave(&nklock, s);
++		sem = xnregistry_lookup(handle, NULL);
++		if (sem && sem->magic != COBALT_SEM_MAGIC) {
++			xnlock_put_irqrestore(&nklock, s);
++			return ERR_PTR(-EINVAL);
++		}
++
++		if (sem) {
++			++sem->refs;
++			xnlock_put_irqrestore(&nklock, s);
++		} else {
++			xnlock_put_irqrestore(&nklock, s);
++			goto retry_bind;
++		}
++
++		__cobalt_sem_shadow_init(sem, COBALT_NAMED_SEM_MAGIC, &shadow);
++		break;
++
++	case -EWOULDBLOCK:
++		/* Not found */
++		if ((oflags & O_CREAT) == 0)
++			return ERR_PTR(-ENOENT);
++
++		shadow.magic = 0;
++		sem = __cobalt_sem_init(&name[1], &shadow,
++					SEM_PSHARED | SEM_NAMED, value);
++		if (IS_ERR(sem)) {
++			rc = PTR_ERR(sem);
++			if (rc == -EEXIST)
++				goto retry_bind;
++			return ERR_PTR(rc);
++		}
++
++		sem->pathname = filename;
++		handle = shadow.handle;
++		break;
++
++	default:
++		return ERR_PTR(rc);
++	}
++
++	if (cobalt_copy_to_user(ushadow, &shadow, sizeof(shadow))) {
++		__cobalt_sem_destroy(handle);
++		return ERR_PTR(-EFAULT);
++	}
++
++	u = xnmalloc(sizeof(*u));
++	if (u == NULL) {
++		__cobalt_sem_destroy(handle);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	u->sem = sem;
++	u->usem = ushadow;
++	u->refs = 1;
++
++	xnlock_get_irqsave(&named_sem_lock, s);
++	v = sem_search(process, handle);
++	if (v) {
++		++v->refs;
++		xnlock_put_irqrestore(&named_sem_lock, s);
++		xnlock_get_irqsave(&nklock, s);
++		--sem->refs;
++		xnlock_put_irqrestore(&nklock, s);
++		putname(filename);
++		xnfree(u);
++		u = v;
++	} else {
++		xnid_enter(&process->usems, &u->id, handle);
++		xnlock_put_irqrestore(&named_sem_lock, s);
++	}
++
++	trace_cobalt_psem_open(name, handle, oflags, mode, value);
++
++	return u->usem;
++}
++
++static int sem_close(struct cobalt_process *process, xnhandle_t handle)
++{
++	struct cobalt_named_sem *u;
++	spl_t s;
++	int err;
++
++	xnlock_get_irqsave(&named_sem_lock, s);
++	u = sem_search(process, handle);
++	if (u == NULL) {
++		err = -ENOENT;
++		goto err_unlock;
++	}
++
++	if (--u->refs) {
++		err = 0;
++		goto err_unlock;
++	}
++
++	xnid_remove(&process->usems, &u->id);
++	xnlock_put_irqrestore(&named_sem_lock, s);
++
++	__cobalt_sem_destroy(handle);
++
++	xnfree(u);
++	return 1;
++
++  err_unlock:
++	xnlock_put_irqrestore(&named_sem_lock, s);
++	return err;
++}
++
++struct cobalt_sem_shadow __user *
++__cobalt_sem_open(struct cobalt_sem_shadow __user *usm,
++		  const char __user *u_name,
++		  int oflags, mode_t mode, unsigned int value)
++{
++	struct cobalt_process *process;
++	struct filename *filename;
++
++	process = cobalt_current_process();
++	if (process == NULL)
++		return ERR_PTR(-EPERM);
++
++	filename = getname(u_name);
++	if (IS_ERR(filename))
++		return ERR_CAST(filename);
++
++	usm = sem_open(process, usm, filename, oflags, mode, value);
++	if (IS_ERR(usm)) {
++		trace_cobalt_psem_open_failed(filename->name, oflags, mode,
++					      value, PTR_ERR(usm));
++		putname(filename);
++	}
++
++	return usm;
++}
++
++COBALT_SYSCALL(sem_open, lostage,
++	       (struct cobalt_sem_shadow __user *__user *u_addrp,
++		const char __user *u_name,
++		int oflags, mode_t mode, unsigned int value))
++{
++	struct cobalt_sem_shadow __user *usm;
++
++	if (__xn_get_user(usm, u_addrp))
++		return -EFAULT;
++
++	usm = __cobalt_sem_open(usm, u_name, oflags, mode, value);
++	if (IS_ERR(usm))
++		return PTR_ERR(usm);
++
++	return __xn_put_user(usm, u_addrp) ? -EFAULT : 0;
++}
++
++COBALT_SYSCALL(sem_close, lostage,
++	       (struct cobalt_sem_shadow __user *usm))
++{
++	struct cobalt_process *process;
++	xnhandle_t handle;
++
++	process = cobalt_current_process();
++	if (process == NULL)
++		return -EPERM;
++
++	handle = cobalt_get_handle_from_user(&usm->handle);
++	trace_cobalt_psem_close(handle);
++
++	return sem_close(process, handle);
++}
++
++static inline int sem_unlink(const char *name)
++{
++	xnhandle_t handle;
++	int ret;
++
++	if (name[0] != '/')
++		return -EINVAL;
++
++	ret = xnregistry_bind(name + 1, XN_NONBLOCK, XN_RELATIVE, &handle);
++	if (ret == -EWOULDBLOCK)
++		return -ENOENT;
++
++	if (__cobalt_sem_destroy(handle) == -EBUSY)
++		xnregistry_unlink(xnregistry_key(handle));
++
++	return 0;
++}
++
++COBALT_SYSCALL(sem_unlink, lostage,
++	       (const char __user *u_name))
++{
++	struct filename *filename;
++	int ret;
++
++	filename = getname(u_name);
++	if (IS_ERR(filename))
++		return PTR_ERR(filename);
++
++	trace_cobalt_psem_unlink(filename->name);
++	ret = sem_unlink(filename->name);
++	putname(filename);
++
++	return ret;
++}
++
++static void reclaim_named_sem(void *arg, struct xnid *i)
++{
++	struct cobalt_process *process = arg;
++	struct cobalt_named_sem *u;
++
++	u = container_of(i, struct cobalt_named_sem, id);
++	u->refs = 1;
++	sem_close(process, xnid_key(i));
++}
++
++void cobalt_nsem_reclaim(struct cobalt_process *process)
++{
++	xntree_cleanup(&process->usems, process, reclaim_named_sem);
++}
+--- linux/kernel/xenomai/posix/sched.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/sched.h	2021-04-07 16:01:25.975635955 +0800
+@@ -0,0 +1,109 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_SCHED_H
++#define _COBALT_POSIX_SCHED_H
++
++#include <linux/list.h>
++#include <cobalt/kernel/sched.h>
++#include <xenomai/posix/syscall.h>
++
++struct cobalt_resources;
++struct cobalt_process;
++
++struct cobalt_sched_group {
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	struct xnsched_quota_group quota;
++#endif
++	struct cobalt_resources *scope;
++	int pshared;
++	struct list_head next;
++};
++
++int __cobalt_sched_weightprio(int policy,
++			      const struct sched_param_ex *param_ex);
++
++int __cobalt_sched_setconfig_np(int cpu, int policy,
++				void __user *u_config,
++				size_t len,
++				union sched_config *(*fetch_config)
++				(int policy, const void __user *u_config,
++				 size_t *len),
++				int (*ack_config)(int policy,
++						  const union sched_config *config,
++						  void __user *u_config));
++
++ssize_t __cobalt_sched_getconfig_np(int cpu, int policy,
++				    void __user *u_config,
++				    size_t len,
++				    union sched_config *(*fetch_config)
++				    (int policy, const void __user *u_config,
++				     size_t *len),
++				    ssize_t (*put_config)(int policy,
++							  void __user *u_config, size_t u_len,
++							  const union sched_config *config,
++							  size_t len));
++int cobalt_sched_setscheduler_ex(pid_t pid,
++				 int policy,
++				 const struct sched_param_ex *param_ex,
++				 __u32 __user *u_winoff,
++				 int __user *u_promoted);
++
++int cobalt_sched_getscheduler_ex(pid_t pid,
++				 int *policy_r,
++				 struct sched_param_ex *param_ex);
++
++struct xnsched_class *
++cobalt_sched_policy_param(union xnsched_policy_param *param,
++			  int u_policy, const struct sched_param_ex *param_ex,
++			  xnticks_t *tslice_r);
++
++COBALT_SYSCALL_DECL(sched_yield, (void));
++
++COBALT_SYSCALL_DECL(sched_weightprio,
++		    (int policy, const struct sched_param_ex __user *u_param));
++
++COBALT_SYSCALL_DECL(sched_minprio, (int policy));
++
++COBALT_SYSCALL_DECL(sched_maxprio, (int policy));
++
++COBALT_SYSCALL_DECL(sched_setconfig_np,
++		    (int cpu,
++		     int policy,
++		     union sched_config __user *u_config,
++		     size_t len));
++
++COBALT_SYSCALL_DECL(sched_getconfig_np,
++		    (int cpu, int policy,
++		     union sched_config __user *u_config,
++		     size_t len));
++
++COBALT_SYSCALL_DECL(sched_setscheduler_ex,
++		    (pid_t pid,
++		     int policy,
++		     const struct sched_param_ex __user *u_param,
++		     __u32 __user *u_winoff,
++		     int __user *u_promoted));
++
++COBALT_SYSCALL_DECL(sched_getscheduler_ex,
++		    (pid_t pid,
++		     int __user *u_policy,
++		     struct sched_param_ex __user *u_param));
++
++void cobalt_sched_reclaim(struct cobalt_process *process);
++
++#endif /* !_COBALT_POSIX_SCHED_H */
+--- linux/kernel/xenomai/posix/cond.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/cond.c	2021-04-07 16:01:25.971635961 +0800
+@@ -0,0 +1,425 @@
++/*
++ * Written by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include "internal.h"
++#include "thread.h"
++#include "mutex.h"
++#include "cond.h"
++#include "clock.h"
++#include <trace/events/cobalt-posix.h>
++
++static inline int
++pthread_cond_init(struct cobalt_cond_shadow *cnd, const struct cobalt_condattr *attr)
++{
++	int synch_flags = XNSYNCH_PRIO, ret;
++	struct cobalt_cond *cond, *old_cond;
++	struct cobalt_cond_state *state;
++	struct cobalt_ppd *sys_ppd;
++	struct list_head *condq;
++	spl_t s;
++
++	cond = xnmalloc(sizeof(*cond));
++	if (cond == NULL)
++		return -ENOMEM;
++
++	sys_ppd = cobalt_ppd_get(attr->pshared);
++	state = cobalt_umm_alloc(&sys_ppd->umm, sizeof(*state));
++	if (state == NULL) {
++		ret = -EAGAIN;
++		goto fail_umm;
++	}
++	cond->state = state;
++	state->pending_signals = 0;
++	state->mutex_state_offset = ~0U;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	condq = &cobalt_current_resources(attr->pshared)->condq;
++	if (cnd->magic == COBALT_COND_MAGIC && !list_empty(condq)) {
++		old_cond = xnregistry_lookup(cnd->handle, NULL);
++		if (cobalt_obj_active(old_cond, COBALT_COND_MAGIC,
++				      typeof(*old_cond))) {
++			ret = -EBUSY;
++			goto fail_register;
++		}
++	}
++
++	ret = xnregistry_enter_anon(cond, &cond->resnode.handle);
++	if (ret < 0)
++		goto fail_register;
++	if (attr->pshared)
++		cond->resnode.handle |= XNSYNCH_PSHARED;
++	cond->magic = COBALT_COND_MAGIC;
++	xnsynch_init(&cond->synchbase, synch_flags, NULL);
++	cond->attr = *attr;
++	cond->mutex = NULL;
++	cobalt_add_resource(&cond->resnode, cond, attr->pshared);
++
++	cnd->handle = cond->resnode.handle;
++	cnd->state_offset = cobalt_umm_offset(&sys_ppd->umm, state);
++	cnd->magic = COBALT_COND_MAGIC;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++fail_register:
++	xnlock_put_irqrestore(&nklock, s);
++	cobalt_umm_free(&sys_ppd->umm, state);
++fail_umm:
++	xnfree(cond);
++
++	return ret;
++}
++
++static inline int pthread_cond_destroy(struct cobalt_cond_shadow *cnd)
++{
++	struct cobalt_cond *cond;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	cond = xnregistry_lookup(cnd->handle, NULL);
++	if (cond == NULL) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EINVAL;
++	}
++
++	if (!cobalt_obj_active(cnd, COBALT_COND_MAGIC, struct cobalt_cond_shadow)
++	    || !cobalt_obj_active(cond, COBALT_COND_MAGIC, struct cobalt_cond)) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EINVAL;
++	}
++
++	if (cond->resnode.scope !=
++	    cobalt_current_resources(cond->attr.pshared)) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EPERM;
++	}
++
++	if (xnsynch_pended_p(&cond->synchbase) || cond->mutex) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBUSY;
++	}
++
++	cobalt_cond_reclaim(&cond->resnode, s); /* drops lock */
++
++	cobalt_mark_deleted(cnd);
++
++	return 0;
++}
++
++static inline int cobalt_cond_timedwait_prologue(struct xnthread *cur,
++						 struct cobalt_cond *cond,
++						 struct cobalt_mutex *mutex,
++						 xnticks_t abs_to)
++{
++	int err, ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/* If another thread waiting for cond does not use the same mutex */
++	if (!cobalt_obj_active(cond, COBALT_COND_MAGIC, struct cobalt_cond)
++	    || (cond->mutex && cond->mutex != mutex)) {
++		err = -EINVAL;
++		goto unlock_and_return;
++	}
++
++	if (cond->resnode.scope !=
++	    cobalt_current_resources(cond->attr.pshared)) {
++		err = -EPERM;
++		goto unlock_and_return;
++	}
++
++	if (mutex->attr.pshared != cond->attr.pshared) {
++		err = -EINVAL;
++		goto unlock_and_return;
++	}
++
++	/* Unlock mutex. */
++	err = cobalt_mutex_release(cur, mutex);
++	if (err < 0)
++		goto unlock_and_return;
++
++	/* err == 1 means a reschedule is needed, but do not
++	   reschedule here, releasing the mutex and suspension must be
++	   done atomically in pthread_cond_*wait. */
++
++	/* Bind mutex to cond. */
++	if (cond->mutex == NULL) {
++		cond->mutex = mutex;
++		list_add_tail(&cond->mutex_link, &mutex->conds);
++	}
++
++	/* Wait for another thread to signal the condition. */
++	if (abs_to != XN_INFINITE)
++		ret = xnsynch_sleep_on(&cond->synchbase, abs_to,
++				       clock_flag(TIMER_ABSTIME, cond->attr.clock));
++	else
++		ret = xnsynch_sleep_on(&cond->synchbase, XN_INFINITE, XN_RELATIVE);
++
++	/* There are three possible wakeup conditions :
++	   - cond_signal / cond_broadcast, no status bit is set, and the function
++	     should return 0 ;
++	   - timeout, the status XNTIMEO is set, and the function should return
++	     ETIMEDOUT ;
++	   - pthread_kill, the status bit XNBREAK is set, but ignored, the
++	     function simply returns EINTR (used only by the user-space
++	     interface, replaced by 0 anywhere else), causing a wakeup, spurious
++	     or not whether pthread_cond_signal was called between pthread_kill
++	     and the moment when xnsynch_sleep_on returned ;
++	 */
++
++	err = 0;
++
++	if (ret & XNBREAK)
++		err = -EINTR;
++	else if (ret & XNTIMEO)
++		err = -ETIMEDOUT;
++
++unlock_and_return:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++static inline int cobalt_cond_timedwait_epilogue(struct xnthread *cur,
++						 struct cobalt_cond *cond,
++						 struct cobalt_mutex *mutex)
++{
++	int err;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	err = __cobalt_mutex_acquire_unchecked(cur, mutex, NULL);
++	if (err == -EINTR)
++		goto unlock_and_return;
++
++	/*
++	 * Unbind mutex and cond, if no other thread is waiting, if
++	 * the job was not already done.
++	 */
++	if (!xnsynch_pended_p(&cond->synchbase) && cond->mutex == mutex) {
++		cond->mutex = NULL;
++		list_del(&cond->mutex_link);
++	}
++
++unlock_and_return:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++COBALT_SYSCALL(cond_init, current,
++	       (struct cobalt_cond_shadow __user *u_cnd,
++		const struct cobalt_condattr __user *u_attr))
++{
++	struct cobalt_cond_shadow cnd;
++	struct cobalt_condattr attr;
++	int err;
++
++	if (cobalt_copy_from_user(&cnd, u_cnd, sizeof(cnd)))
++		return -EFAULT;
++
++	if (cobalt_copy_from_user(&attr, u_attr, sizeof(attr)))
++		return -EFAULT;
++
++	trace_cobalt_cond_init(u_cnd, &attr);
++
++	err = pthread_cond_init(&cnd, &attr);
++	if (err < 0)
++		return err;
++
++	return cobalt_copy_to_user(u_cnd, &cnd, sizeof(*u_cnd));
++}
++
++COBALT_SYSCALL(cond_destroy, current,
++	       (struct cobalt_cond_shadow __user *u_cnd))
++{
++	struct cobalt_cond_shadow cnd;
++	int err;
++
++	if (cobalt_copy_from_user(&cnd, u_cnd, sizeof(cnd)))
++		return -EFAULT;
++
++	trace_cobalt_cond_destroy(u_cnd);
++
++	err = pthread_cond_destroy(&cnd);
++	if (err < 0)
++		return err;
++
++	return cobalt_copy_to_user(u_cnd, &cnd, sizeof(*u_cnd));
++}
++
++struct us_cond_data {
++	int err;
++};
++
++static inline int cond_fetch_timeout(struct timespec *ts,
++				     const void __user *u_ts)
++{
++	return u_ts == NULL ? -EFAULT :
++		cobalt_copy_from_user(ts, u_ts, sizeof(*ts));
++}
++
++int __cobalt_cond_wait_prologue(struct cobalt_cond_shadow __user *u_cnd,
++				struct cobalt_mutex_shadow __user *u_mx,
++				int *u_err,
++				void __user *u_ts,
++				int (*fetch_timeout)(struct timespec *ts,
++						     const void __user *u_ts))
++{
++	struct xnthread *cur = xnthread_current();
++	struct cobalt_cond *cond;
++	struct cobalt_mutex *mx;
++	struct us_cond_data d;
++	struct timespec ts;
++	xnhandle_t handle;
++	int err, perr = 0;
++	__u32 offset;
++
++	handle = cobalt_get_handle_from_user(&u_cnd->handle);
++	cond = xnregistry_lookup(handle, NULL);
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++	mx = xnregistry_lookup(handle, NULL);
++
++	if (cond->mutex == NULL) {
++		__xn_get_user(offset, &u_mx->state_offset);
++		cond->state->mutex_state_offset = offset;
++	}
++
++	if (fetch_timeout) {
++		err = fetch_timeout(&ts, u_ts);
++		if (err == 0) {
++			trace_cobalt_cond_timedwait(u_cnd, u_mx, &ts);
++			err = cobalt_cond_timedwait_prologue(cur, cond, mx,
++							     ts2ns(&ts) + 1);
++		}
++	} else {
++		trace_cobalt_cond_wait(u_cnd, u_mx);
++		err = cobalt_cond_timedwait_prologue(cur, cond, mx, XN_INFINITE);
++	}
++
++	switch(err) {
++	case 0:
++	case -ETIMEDOUT:
++		perr = d.err = err;
++		err = cobalt_cond_timedwait_epilogue(cur, cond, mx);
++		break;
++
++	case -EINTR:
++		perr = err;
++		d.err = 0;	/* epilogue should return 0. */
++		break;
++
++	default:
++		/* Please gcc and handle the case which will never
++		   happen */
++		d.err = EINVAL;
++	}
++
++	if (cond->mutex == NULL)
++		cond->state->mutex_state_offset = ~0U;
++
++	if (err == -EINTR)
++		__xn_put_user(d.err, u_err);
++
++	return err == 0 ? perr : err;
++}
++
++/* pthread_cond_wait_prologue(cond, mutex, count_ptr, timed, timeout) */
++COBALT_SYSCALL(cond_wait_prologue, nonrestartable,
++	       (struct cobalt_cond_shadow __user *u_cnd,
++		struct cobalt_mutex_shadow __user *u_mx,
++		int *u_err,
++		unsigned int timed,
++		struct timespec __user *u_ts))
++{
++	return __cobalt_cond_wait_prologue(u_cnd, u_mx, u_err, u_ts,
++					   timed ? cond_fetch_timeout : NULL);
++}
++
++COBALT_SYSCALL(cond_wait_epilogue, primary,
++	       (struct cobalt_cond_shadow __user *u_cnd,
++		struct cobalt_mutex_shadow __user *u_mx))
++{
++	struct xnthread *cur = xnthread_current();
++	struct cobalt_cond *cond;
++	struct cobalt_mutex *mx;
++	xnhandle_t handle;
++	int err;
++
++	handle = cobalt_get_handle_from_user(&u_cnd->handle);
++	cond = xnregistry_lookup(handle, NULL);
++
++	handle = cobalt_get_handle_from_user(&u_mx->handle);
++	mx = xnregistry_lookup(handle, NULL);
++	err = cobalt_cond_timedwait_epilogue(cur, cond, mx);
++
++	if (cond->mutex == NULL)
++		cond->state->mutex_state_offset = ~0U;
++
++	return err;
++}
++
++int cobalt_cond_deferred_signals(struct cobalt_cond *cond)
++{
++	struct cobalt_cond_state *state;
++	__u32 pending_signals;
++	int need_resched;
++
++	state = cond->state;
++	pending_signals = state->pending_signals;
++
++	switch(pending_signals) {
++	default:
++		state->pending_signals = 0;
++		need_resched = xnsynch_wakeup_many_sleepers(&cond->synchbase,
++							    pending_signals);
++		break;
++
++	case ~0U:
++		need_resched =
++			xnsynch_flush(&cond->synchbase, 0) == XNSYNCH_RESCHED;
++		state->pending_signals = 0;
++		break;
++
++	case 0:
++		need_resched = 0;
++		break;
++	}
++
++	return need_resched;
++}
++
++void cobalt_cond_reclaim(struct cobalt_resnode *node, spl_t s)
++{
++	struct cobalt_cond *cond;
++
++	cond = container_of(node, struct cobalt_cond, resnode);
++	xnregistry_remove(node->handle);
++	cobalt_del_resource(node);
++	xnsynch_destroy(&cond->synchbase);
++	cobalt_mark_deleted(cond);
++	xnlock_put_irqrestore(&nklock, s);
++
++	cobalt_umm_free(&cobalt_ppd_get(cond->attr.pshared)->umm,
++			cond->state);
++	xnfree(cond);
++}
+--- linux/kernel/xenomai/posix/corectl.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/corectl.c	2021-04-07 16:01:25.966635968 +0800
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/ipipe.h>
++#include <linux/kconfig.h>
++#include <linux/atomic.h>
++#include <linux/printk.h>
++#include <cobalt/kernel/init.h>
++#include <cobalt/kernel/thread.h>
++#include <xenomai/version.h>
++#include <asm/xenomai/syscall.h>
++#include "corectl.h"
++
++static BLOCKING_NOTIFIER_HEAD(config_notifier_list);
++
++static int do_conf_option(int option, void __user *u_buf, size_t u_bufsz)
++{
++	struct cobalt_config_vector vec;
++	int ret, val = 0;
++
++	if (option <= _CC_COBALT_GET_CORE_STATUS && u_bufsz < sizeof(val))
++		return -EINVAL;
++
++	switch (option) {
++	case _CC_COBALT_GET_VERSION:
++		val = XENO_VERSION_CODE;
++		break;
++	case _CC_COBALT_GET_NR_PIPES:
++#ifdef CONFIG_XENO_OPT_PIPE
++		val = CONFIG_XENO_OPT_PIPE_NRDEV;
++#endif
++		break;
++	case _CC_COBALT_GET_NR_TIMERS:
++		val = CONFIG_XENO_OPT_NRTIMERS;
++		break;
++	case _CC_COBALT_GET_POLICIES:
++		val = _CC_COBALT_SCHED_FIFO|_CC_COBALT_SCHED_RR;
++		if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_WEAK))
++			val |= _CC_COBALT_SCHED_WEAK;
++		if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_SPORADIC))
++			val |= _CC_COBALT_SCHED_SPORADIC;
++		if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_QUOTA))
++			val |= _CC_COBALT_SCHED_QUOTA;
++		if (IS_ENABLED(CONFIG_XENO_OPT_SCHED_TP))
++			val |= _CC_COBALT_SCHED_TP;
++		break;
++	case _CC_COBALT_GET_DEBUG:
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_COBALT))
++			val |= _CC_COBALT_DEBUG_ASSERT;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_CONTEXT))
++			val |= _CC_COBALT_DEBUG_CONTEXT;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LOCKING))
++			val |= _CC_COBALT_DEBUG_LOCKING;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_USER))
++			val |= _CC_COBALT_DEBUG_USER;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED))
++			val |= _CC_COBALT_DEBUG_MUTEX_RELAXED;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP))
++			val |= _CC_COBALT_DEBUG_MUTEX_SLEEP;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_LEGACY))
++			val |= _CC_COBALT_DEBUG_LEGACY;
++		if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_TRACE_RELAX))
++			val |= _CC_COBALT_DEBUG_TRACE_RELAX;
++		if (IS_ENABLED(CONFIG_XENO_DRIVERS_RTNET_CHECKED))
++			val |= _CC_COBALT_DEBUG_NET;
++		break;
++	case _CC_COBALT_GET_WATCHDOG:
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++		val = CONFIG_XENO_OPT_WATCHDOG_TIMEOUT;
++#endif
++		break;
++	case _CC_COBALT_GET_CORE_STATUS:
++		val = realtime_core_state();
++		break;
++	default:
++		if (!ipipe_root_p)
++			/* Switch to secondary mode first. */
++			return -ENOSYS;
++		vec.u_buf = u_buf;
++		vec.u_bufsz = u_bufsz;
++		ret = blocking_notifier_call_chain(&config_notifier_list,
++						   option, &vec);
++		if (ret == NOTIFY_DONE)
++			return -EINVAL; /* Nobody cared. */
++		return notifier_to_errno(ret);
++	}
++
++	ret = cobalt_copy_to_user(u_buf, &val, sizeof(val));
++
++	return ret ? -EFAULT : 0;
++}
++
++static int stop_services(const void __user *u_buf, size_t u_bufsz)
++{
++	const u32 final_grace_period = 3; /* seconds */
++	enum cobalt_run_states state;
++	__u32 grace_period;
++	int ret;
++
++	/*
++	 * XXX: we don't have any syscall for unbinding a thread from
++	 * the Cobalt core, so we deny real-time threads from stopping
++	 * Cobalt services. i.e. _CC_COBALT_STOP_CORE must be issued
++	 * from a plain regular linux thread.
++	 */
++	if (xnthread_current())
++		return -EPERM;
++
++	if (u_bufsz != sizeof(__u32))
++		return -EINVAL;
++
++	ret = cobalt_copy_from_user(&grace_period,
++				    u_buf, sizeof(grace_period));
++	if (ret)
++		return ret;
++
++	state = atomic_cmpxchg(&cobalt_runstate,
++			       COBALT_STATE_RUNNING,
++			       COBALT_STATE_TEARDOWN);
++	switch (state) {
++	case COBALT_STATE_STOPPED:
++		break;
++	case COBALT_STATE_RUNNING:
++		/* Kill user threads. */
++		ret = xnthread_killall(grace_period, XNUSER);
++		if (ret) {
++			set_realtime_core_state(state);
++			return ret;
++		}
++		cobalt_call_state_chain(COBALT_STATE_TEARDOWN);
++		/* Kill lingering RTDM tasks. */
++		ret = xnthread_killall(final_grace_period, 0);
++		if (ret == -EAGAIN)
++			printk(XENO_WARNING "some RTDM tasks won't stop");
++		xntimer_release_hardware();
++		set_realtime_core_state(COBALT_STATE_STOPPED);
++		printk(XENO_INFO "services stopped\n");
++		break;
++	default:
++		ret = -EINPROGRESS;
++	}
++
++	return ret;
++}
++
++static int start_services(void)
++{
++	enum cobalt_run_states state;
++	int ret = 0;
++
++	state = atomic_cmpxchg(&cobalt_runstate,
++			       COBALT_STATE_STOPPED,
++			       COBALT_STATE_WARMUP);
++	switch (state) {
++	case COBALT_STATE_RUNNING:
++		break;
++	case COBALT_STATE_STOPPED:
++		xntimer_grab_hardware();
++		cobalt_call_state_chain(COBALT_STATE_WARMUP);
++		set_realtime_core_state(COBALT_STATE_RUNNING);
++		printk(XENO_INFO "services started\n");
++		break;
++	default:
++		ret = -EINPROGRESS;
++	}
++
++	return ret;
++}
++
++COBALT_SYSCALL(corectl, probing,
++	       (int request, void __user *u_buf, size_t u_bufsz))
++{
++	int ret;
++	
++	switch (request) {
++	case _CC_COBALT_STOP_CORE:
++		ret = stop_services(u_buf, u_bufsz);
++		break;
++	case _CC_COBALT_START_CORE:
++		ret = start_services();
++		break;
++	default:
++		ret = do_conf_option(request, u_buf, u_bufsz);
++	}
++	
++	return ret;
++}
++
++void cobalt_add_config_chain(struct notifier_block *nb)
++{
++	blocking_notifier_chain_register(&config_notifier_list, nb);
++}
++EXPORT_SYMBOL_GPL(cobalt_add_config_chain);
++
++void cobalt_remove_config_chain(struct notifier_block *nb)
++{
++	blocking_notifier_chain_unregister(&config_notifier_list, nb);
++}
++EXPORT_SYMBOL_GPL(cobalt_remove_config_chain);
+--- linux/kernel/xenomai/posix/signal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/signal.h	2021-04-07 16:01:25.961635975 +0800
+@@ -0,0 +1,113 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_POSIX_SIGNAL_H
++#define _COBALT_POSIX_SIGNAL_H
++
++#include <linux/signal.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/list.h>
++#include <cobalt/uapi/signal.h>
++#include <xenomai/posix/syscall.h>
++
++struct cobalt_thread;
++
++struct cobalt_sigpending {
++	struct siginfo si;
++	struct list_head next;
++};
++
++static inline
++void cobalt_copy_siginfo(int code,
++			 struct siginfo *__restrict__ dst,
++			 const struct siginfo *__restrict__ src)
++{
++	dst->si_signo = src->si_signo;
++	dst->si_errno = src->si_errno;
++	dst->si_code = code;
++
++	switch (code) {
++	case SI_TIMER:
++		dst->si_tid = src->si_tid;
++		dst->si_overrun = src->si_overrun;
++		dst->si_value = src->si_value;
++		break;
++	case SI_QUEUE:
++	case SI_MESGQ:
++		dst->si_value = src->si_value;
++		/* falldown wanted. */
++	case SI_USER:
++		dst->si_pid = src->si_pid;
++		dst->si_uid = src->si_uid;
++	}
++}
++
++int __cobalt_sigwait(sigset_t *set);
++
++int __cobalt_sigtimedwait(sigset_t *set,
++			  const struct timespec *timeout,
++			  void __user *u_si,
++			  bool compat);
++
++int __cobalt_sigwaitinfo(sigset_t *set,
++			 void __user *u_si,
++			 bool compat);
++
++int __cobalt_sigqueue(pid_t pid, int sig, const union sigval *value);
++
++int cobalt_signal_send(struct cobalt_thread *thread,
++		       struct cobalt_sigpending *sigp,
++		       int group);
++
++int cobalt_signal_send_pid(pid_t pid,
++			   struct cobalt_sigpending *sigp);
++
++struct cobalt_sigpending *cobalt_signal_alloc(void);
++
++void cobalt_signal_free(struct cobalt_sigpending *sigp);
++
++void cobalt_signal_flush(struct cobalt_thread *thread);
++
++int cobalt_signal_wait(sigset_t *set, struct siginfo *si,
++		       xnticks_t timeout, xntmode_t tmode);
++
++int __cobalt_kill(struct cobalt_thread *thread,
++		  int sig, int group);
++
++COBALT_SYSCALL_DECL(sigwait,
++		    (const sigset_t __user *u_set, int __user *u_sig));
++
++COBALT_SYSCALL_DECL(sigtimedwait,
++		    (const sigset_t __user *u_set,
++		     struct siginfo __user *u_si,
++		     const struct timespec __user *u_timeout));
++
++COBALT_SYSCALL_DECL(sigwaitinfo,
++		    (const sigset_t __user *u_set,
++		     struct siginfo __user *u_si));
++
++COBALT_SYSCALL_DECL(sigpending,
++		    (old_sigset_t __user *u_set));
++
++COBALT_SYSCALL_DECL(kill, (pid_t pid, int sig));
++
++COBALT_SYSCALL_DECL(sigqueue,
++		    (pid_t pid, int sig, const union sigval __user *u_value));
++
++int cobalt_signal_init(void);
++
++#endif /* !_COBALT_POSIX_SIGNAL_H */
+--- linux/kernel/xenomai/posix/signal.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/posix/signal.c	2021-04-07 16:01:25.956635982 +0800
+@@ -0,0 +1,616 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/sched.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/compat.h>
++#include "internal.h"
++#include "signal.h"
++#include "thread.h"
++#include "timer.h"
++#include "clock.h"
++
++static void *sigpending_mem;
++
++static LIST_HEAD(sigpending_pool);
++
++/*
++ * How many signal notifications which may be pending at any given
++ * time, except timers.  Cobalt signals are always thread directed,
++ * and we assume that in practice, each signal number is processed by
++ * a dedicated thread. We provide for up to three real-time signal
++ * events to pile up, and a single notification pending for other
++ * signals. Timers use a fast queuing logic maintaining a count of
++ * overruns, and therefore do not consume any memory from this pool.
++ */
++#define __SIGPOOL_SIZE  (sizeof(struct cobalt_sigpending) *	\
++			 (_NSIG + (SIGRTMAX - SIGRTMIN) * 2))
++
++static int cobalt_signal_deliver(struct cobalt_thread *thread,
++				 struct cobalt_sigpending *sigp,
++				 int group)
++{				/* nklocked, IRQs off */
++	struct cobalt_sigwait_context *swc;
++	struct xnthread_wait_context *wc;
++	struct list_head *sigwaiters;
++	int sig, ret;
++
++	sig = sigp->si.si_signo;
++	XENO_BUG_ON(COBALT, sig < 1 || sig > _NSIG);
++
++	/*
++	 * Attempt to deliver the signal immediately to the initial
++	 * target that waits for it.
++	 */
++	if (xnsynch_pended_p(&thread->sigwait)) {
++		wc = xnthread_get_wait_context(&thread->threadbase);
++		swc = container_of(wc, struct cobalt_sigwait_context, wc);
++		if (sigismember(swc->set, sig))
++			goto deliver;
++	}
++
++	/*
++	 * If that does not work out and we are sending to a thread
++	 * group, try to deliver to any thread from the same process
++	 * waiting for that signal.
++	 */
++	sigwaiters = &thread->process->sigwaiters;
++	if (!group || list_empty(sigwaiters))
++		return 0;
++
++	list_for_each_entry(thread, sigwaiters, signext) {
++		wc = xnthread_get_wait_context(&thread->threadbase);
++		swc = container_of(wc, struct cobalt_sigwait_context, wc);
++		if (sigismember(swc->set, sig))
++			goto deliver;
++	}
++
++	return 0;
++deliver:
++	cobalt_copy_siginfo(sigp->si.si_code, swc->si, &sigp->si);
++	cobalt_call_extension(signal_deliver, &thread->extref,
++			      ret, swc->si, sigp);
++	xnthread_complete_wait(&swc->wc);
++	xnsynch_wakeup_one_sleeper(&thread->sigwait);
++	list_del(&thread->signext);
++
++	/*
++	 * This is an immediate delivery bypassing any queuing, so we
++	 * have to release the sigpending data right away before
++	 * leaving.
++	 */
++	cobalt_signal_free(sigp);
++
++	return 1;
++}
++
++int cobalt_signal_send(struct cobalt_thread *thread,
++		       struct cobalt_sigpending *sigp,
++		       int group)
++{				/* nklocked, IRQs off */
++	struct list_head *sigq;
++	int sig, ret;
++
++	/* Can we deliver this signal immediately? */
++	ret = cobalt_signal_deliver(thread, sigp, group);
++	if (ret)
++		return ret;	/* Yep, done. */
++
++	/*
++	 * Nope, attempt to queue it. We start by calling any Cobalt
++	 * extension for queuing the signal first.
++	 */
++	if (cobalt_call_extension(signal_queue, &thread->extref, ret, sigp)) {
++		if (ret)
++			/* Queuing done remotely or error. */
++			return ret;
++	}
++
++	sig = sigp->si.si_signo;
++	sigq = thread->sigqueues + sig - 1;
++	if (!list_empty(sigq)) {
++		/* Queue non-rt signals only once. */
++		if (sig < SIGRTMIN)
++			return 0;
++		/* Queue rt signal source only once (SI_TIMER). */
++		if (!list_empty(&sigp->next))
++			return 0;
++	}
++
++	sigaddset(&thread->sigpending, sig);
++	list_add_tail(&sigp->next, sigq);
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(cobalt_signal_send);
++
++int cobalt_signal_send_pid(pid_t pid, struct cobalt_sigpending *sigp)
++{				/* nklocked, IRQs off */
++	struct cobalt_thread *thread;
++
++	thread = cobalt_thread_find(pid);
++	if (thread)
++		return cobalt_signal_send(thread, sigp, 0);
++
++	return -ESRCH;
++}
++EXPORT_SYMBOL_GPL(cobalt_signal_send_pid);
++
++struct cobalt_sigpending *cobalt_signal_alloc(void)
++{				/* nklocked, IRQs off */
++	struct cobalt_sigpending *sigp;
++
++	if (list_empty(&sigpending_pool)) {
++		if (xnclock_ratelimit())
++			printk(XENO_WARNING "signal bucket pool underflows\n");
++		return NULL;
++	}
++
++	sigp = list_get_entry(&sigpending_pool, struct cobalt_sigpending, next);
++	INIT_LIST_HEAD(&sigp->next);
++
++	return sigp;
++}
++EXPORT_SYMBOL_GPL(cobalt_signal_alloc);
++
++void cobalt_signal_free(struct cobalt_sigpending *sigp)
++{				/* nklocked, IRQs off */
++	if ((void *)sigp >= sigpending_mem &&
++	    (void *)sigp < sigpending_mem + __SIGPOOL_SIZE)
++		list_add_tail(&sigp->next, &sigpending_pool);
++}
++EXPORT_SYMBOL_GPL(cobalt_signal_free);
++
++void cobalt_signal_flush(struct cobalt_thread *thread)
++{
++	struct cobalt_sigpending *sigp, *tmp;
++	struct list_head *sigq;
++	spl_t s;
++	int n;
++
++	/*
++	 * TCB is not accessible from userland anymore, no locking
++	 * required.
++	 */
++	if (sigisemptyset(&thread->sigpending))
++		return;
++
++	for (n = 0; n < _NSIG; n++) {
++		sigq = thread->sigqueues + n;
++		if (list_empty(sigq))
++			continue;
++		/*
++		 * sigpending blocks must be unlinked so that we
++		 * detect this fact when deleting their respective
++		 * owners.
++		 */
++		list_for_each_entry_safe(sigp, tmp, sigq, next) {
++			list_del_init(&sigp->next);
++			if ((void *)sigp >= sigpending_mem &&
++			    (void *)sigp < sigpending_mem + __SIGPOOL_SIZE) {
++				xnlock_get_irqsave(&nklock, s);
++				list_add_tail(&sigp->next, &sigpending_pool);
++				xnlock_put_irqrestore(&nklock, s);
++			}
++		}
++	}
++
++	sigemptyset(&thread->sigpending);
++}
++
++static int signal_put_siginfo(void __user *u_si, const struct siginfo *si,
++			      int overrun)
++{
++	struct siginfo __user *u_p = u_si;
++	int ret;
++
++	ret = __xn_put_user(si->si_signo, &u_p->si_signo);
++	ret |= __xn_put_user(si->si_errno, &u_p->si_errno);
++	ret |= __xn_put_user(si->si_code, &u_p->si_code);
++
++	/*
++	 * Copy the generic/standard siginfo bits to userland.
++	 */
++	switch (si->si_code) {
++	case SI_TIMER:
++		ret |= __xn_put_user(si->si_tid, &u_p->si_tid);
++		ret |= __xn_put_user(si->si_ptr, &u_p->si_ptr);
++		ret |= __xn_put_user(overrun, &u_p->si_overrun);
++		break;
++	case SI_QUEUE:
++	case SI_MESGQ:
++		ret |= __xn_put_user(si->si_ptr, &u_p->si_ptr);
++		/* falldown wanted. */
++	case SI_USER:
++		ret |= __xn_put_user(si->si_pid, &u_p->si_pid);
++		ret |= __xn_put_user(si->si_uid, &u_p->si_uid);
++	}
++
++	return ret;
++}
++
++static int signal_wait(sigset_t *set, xnticks_t timeout,
++		       void __user *u_si, bool compat)
++{
++	struct cobalt_sigpending *sigp = NULL;
++	struct cobalt_sigwait_context swc;
++	struct cobalt_thread *curr;
++	int ret, sig, n, overrun;
++	unsigned long *p, *t, m;
++	struct siginfo si, *sip;
++	struct list_head *sigq;
++	spl_t s;
++
++	curr = cobalt_current_thread();
++	XENO_BUG_ON(COBALT, curr == NULL);
++
++	if (u_si && !access_wok(u_si, sizeof(*u_si)))
++		return -EFAULT;
++
++	xnlock_get_irqsave(&nklock, s);
++
++check:
++	if (sigisemptyset(&curr->sigpending))
++		/* Most common/fast path. */
++		goto wait;
++
++	p = curr->sigpending.sig; /* pending */
++	t = set->sig;		  /* tested */
++
++	for (n = 0, sig = 0; n < _NSIG_WORDS; ++n) {
++		m = *p++ & *t++;
++		if (m == 0)
++			continue;
++		sig = ffz(~m) +  n *_NSIG_BPW + 1;
++		break;
++	}
++
++	if (sig) {
++		sigq = curr->sigqueues + sig - 1;
++		if (list_empty(sigq)) {
++			sigdelset(&curr->sigpending, sig);
++			goto check;
++		}
++		sigp = list_get_entry(sigq, struct cobalt_sigpending, next);
++		INIT_LIST_HEAD(&sigp->next); /* Mark sigp as unlinked. */
++		if (list_empty(sigq))
++			sigdelset(&curr->sigpending, sig);
++		sip = &sigp->si;
++		ret = 0;
++		goto done;
++	}
++
++wait:
++	if (timeout == XN_NONBLOCK) {
++		ret = -EAGAIN;
++		goto fail;
++	}
++	swc.set = set;
++	swc.si = &si;
++	xnthread_prepare_wait(&swc.wc);
++	list_add_tail(&curr->signext, &curr->process->sigwaiters);
++	ret = xnsynch_sleep_on(&curr->sigwait, timeout, XN_RELATIVE);
++	if (ret) {
++		list_del(&curr->signext);
++		ret = ret & XNBREAK ? -EINTR : -EAGAIN;
++		goto fail;
++	}
++	sig = si.si_signo;
++	sip = &si;
++done:
++	 /*
++	  * si_overrun raises a nasty issue since we have to
++	  * collect+clear it atomically before we drop the lock,
++	  * although we don't know in advance if any extension would
++	  * use it along with the additional si_codes it may provide,
++	  * but we must drop the lock before running the
++	  * signal_copyinfo handler.
++	  *
++	  * Observing that si_overrun is likely the only "unstable"
++	  * data from the signal information which might change under
++	  * our feet while we copy the bits to userland, we collect it
++	  * here from the atomic section for all unknown si_codes,
++	  * then pass its value to the signal_copyinfo handler.
++	  */
++	switch (sip->si_code) {
++	case SI_TIMER:
++		overrun = cobalt_timer_deliver(curr, sip->si_tid);
++		break;
++	case SI_USER:
++	case SI_MESGQ:
++	case SI_QUEUE:
++		overrun = 0;
++		break;
++	default:
++		overrun = sip->si_overrun;
++		if (overrun)
++			sip->si_overrun = 0;
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (u_si == NULL)
++		goto out;	/* Return signo only. */
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (compat) {
++		ret = sys32_put_siginfo(u_si, sip, overrun);
++		if (!ret)
++			/* Allow an extended target to receive more data. */
++			cobalt_call_extension(signal_copyinfo_compat,
++					      &curr->extref, ret, u_si, sip,
++					      overrun);
++	} else
++#endif
++	{
++		ret = signal_put_siginfo(u_si, sip, overrun);
++		if (!ret)
++			/* Allow an extended target to receive more data. */
++			cobalt_call_extension(signal_copyinfo, &curr->extref,
++					      ret, u_si, sip, overrun);
++	}
++
++out:
++	/*
++	 * If we pulled the signal information from a sigpending
++	 * block, release it to the free pool if applicable.
++	 */
++	if (sigp &&
++	    (void *)sigp >= sigpending_mem &&
++	    (void *)sigp < sigpending_mem + __SIGPOOL_SIZE) {
++		xnlock_get_irqsave(&nklock, s);
++		list_add_tail(&sigp->next, &sigpending_pool);
++		xnlock_put_irqrestore(&nklock, s);
++		/* no more ref. to sigp beyond this point. */
++	}
++
++	return ret ? -EFAULT : sig;
++fail:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++int __cobalt_sigwait(sigset_t *set)
++{
++	return signal_wait(set, XN_INFINITE, NULL, false);
++}
++
++COBALT_SYSCALL(sigwait, primary,
++	       (const sigset_t __user *u_set, int __user *u_sig))
++{
++	sigset_t set;
++	int sig;
++
++	if (cobalt_copy_from_user(&set, u_set, sizeof(set)))
++		return -EFAULT;
++
++	sig = signal_wait(&set, XN_INFINITE, NULL, false);
++	if (sig < 0)
++		return sig;
++
++	return cobalt_copy_to_user(u_sig, &sig, sizeof(*u_sig));
++}
++
++int __cobalt_sigtimedwait(sigset_t *set,
++			  const struct timespec *timeout,
++			  void __user *u_si,
++			  bool compat)
++{
++	xnticks_t ticks;
++
++	if ((unsigned long)timeout->tv_nsec >= ONE_BILLION)
++		return -EINVAL;
++
++	ticks = ts2ns(timeout);
++	if (ticks++ == 0)
++		ticks = XN_NONBLOCK;
++
++	return signal_wait(set, ticks, u_si, compat);
++}
++
++COBALT_SYSCALL(sigtimedwait, nonrestartable,
++	       (const sigset_t __user *u_set,
++		struct siginfo __user *u_si,
++		const struct timespec __user *u_timeout))
++{
++	struct timespec timeout;
++	sigset_t set;
++
++	if (cobalt_copy_from_user(&set, u_set, sizeof(set)))
++		return -EFAULT;
++
++	if (cobalt_copy_from_user(&timeout, u_timeout, sizeof(timeout)))
++		return -EFAULT;
++
++	return __cobalt_sigtimedwait(&set, &timeout, u_si, false);
++}
++
++int __cobalt_sigwaitinfo(sigset_t *set,
++			 void __user *u_si,
++			 bool compat)
++{
++	return signal_wait(set, XN_INFINITE, u_si, compat);
++}
++
++COBALT_SYSCALL(sigwaitinfo, nonrestartable,
++	       (const sigset_t __user *u_set, struct siginfo __user *u_si))
++{
++	sigset_t set;
++
++	if (cobalt_copy_from_user(&set, u_set, sizeof(set)))
++		return -EFAULT;
++
++	return __cobalt_sigwaitinfo(&set, u_si, false);
++}
++
++COBALT_SYSCALL(sigpending, primary, (old_sigset_t __user *u_set))
++{
++	struct cobalt_thread *curr = cobalt_current_thread();
++
++	return cobalt_copy_to_user(u_set, &curr->sigpending, sizeof(*u_set));
++}
++
++int __cobalt_kill(struct cobalt_thread *thread, int sig, int group) /* nklocked, IRQs off */
++{
++	struct cobalt_sigpending *sigp;
++	int ret = 0;
++
++	/*
++	 * We have undocumented pseudo-signals to suspend/resume/unblock
++	 * threads, force them out of primary mode or even demote them
++	 * to the weak scheduling class/priority. Process them early,
++	 * before anyone can notice...
++	 */
++	switch(sig) {
++	case 0:
++		/* Check for existence only. */
++		break;
++	case SIGSUSP:
++		/*
++		 * All callers shall be tagged as conforming calls, so
++		 * self-directed suspension can only happen from
++		 * primary mode. Yummie.
++		 */
++		xnthread_suspend(&thread->threadbase, XNSUSP,
++				 XN_INFINITE, XN_RELATIVE, NULL);
++		if (&thread->threadbase == xnthread_current() &&
++		    xnthread_test_info(&thread->threadbase, XNBREAK))
++			ret = -EINTR;
++		break;
++	case SIGRESM:
++		xnthread_resume(&thread->threadbase, XNSUSP);
++		goto resched;
++	case SIGRELS:
++		xnthread_unblock(&thread->threadbase);
++		goto resched;
++	case SIGKICK:
++		xnthread_kick(&thread->threadbase);
++		goto resched;
++	case SIGDEMT:
++		xnthread_demote(&thread->threadbase);
++		goto resched;
++	case 1 ... _NSIG:
++		sigp = cobalt_signal_alloc();
++		if (sigp) {
++			sigp->si.si_signo = sig;
++			sigp->si.si_errno = 0;
++			sigp->si.si_code = SI_USER;
++			sigp->si.si_pid = task_pid_nr(current);
++			sigp->si.si_uid = get_current_uuid();
++			if (cobalt_signal_send(thread, sigp, group) <= 0)
++				cobalt_signal_free(sigp);
++		}
++	resched:
++		xnsched_run();
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++COBALT_SYSCALL(kill, conforming, (pid_t pid, int sig))
++{
++	struct cobalt_thread *thread;
++	int ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	thread = cobalt_thread_find(pid);
++	if (thread == NULL)
++		ret = -ESRCH;
++	else
++		ret = __cobalt_kill(thread, sig, 1);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++int __cobalt_sigqueue(pid_t pid, int sig, const union sigval *value)
++{
++	struct cobalt_sigpending *sigp;
++	struct cobalt_thread *thread;
++	int ret = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	thread = cobalt_thread_find(pid);
++	if (thread == NULL) {
++		ret = -ESRCH;
++		goto out;
++	}
++
++	switch(sig) {
++	case 0:
++		/* Check for existence only. */
++		break;
++	case 1 ... _NSIG:
++		sigp = cobalt_signal_alloc();
++		if (sigp) {
++			sigp->si.si_signo = sig;
++			sigp->si.si_errno = 0;
++			sigp->si.si_code = SI_QUEUE;
++			sigp->si.si_pid = task_pid_nr(current);
++			sigp->si.si_uid = get_current_uuid();
++			sigp->si.si_value = *value;
++			if (cobalt_signal_send(thread, sigp, 1) <= 0)
++				cobalt_signal_free(sigp);
++			else
++				xnsched_run();
++		}
++		break;
++	default:
++		/* Cobalt pseudo-signals are never process-directed. */
++		ret = __cobalt_kill(thread, sig, 0);
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__cobalt_sigqueue);
++
++COBALT_SYSCALL(sigqueue, conforming,
++	       (pid_t pid, int sig, const union sigval __user *u_value))
++{
++	union sigval val;
++	int ret;
++
++	ret = cobalt_copy_from_user(&val, u_value, sizeof(val));
++
++	return ret ?: __cobalt_sigqueue(pid, sig, &val);
++}
++
++__init int cobalt_signal_init(void)
++{
++	struct cobalt_sigpending *sigp;
++
++	sigpending_mem = xnheap_vmalloc(__SIGPOOL_SIZE);
++	if (sigpending_mem == NULL)
++		return -ENOMEM;
++
++	for (sigp = sigpending_mem;
++	     (void *)sigp < sigpending_mem + __SIGPOOL_SIZE; sigp++)
++		list_add_tail(&sigp->next, &sigpending_pool);
++
++	return 0;
++}
+--- linux/kernel/xenomai/debug.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/debug.h	2021-04-07 16:01:25.884636085 +0800
+@@ -0,0 +1,72 @@
++/*
++ * Copyright (C) 2010 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++
++#ifndef _KERNEL_COBALT_DEBUG_H
++#define _KERNEL_COBALT_DEBUG_H
++
++#include <cobalt/kernel/assert.h>
++
++struct xnthread;
++
++#ifdef CONFIG_XENO_OPT_DEBUG
++
++int xndebug_init(void);
++
++void xndebug_cleanup(void);
++
++void xndebug_shadow_init(struct xnthread *thread);
++
++extern struct xnvfile_directory cobalt_debug_vfroot;
++
++#else  /* !XENO_OPT_DEBUG */
++
++static inline int xndebug_init(void)
++{
++	return 0;
++}
++
++static inline void xndebug_cleanup(void)
++{
++}
++
++static inline void xndebug_shadow_init(struct xnthread *thread)
++{
++}
++
++#endif  /* !XENO_OPT_DEBUG */
++
++#ifdef CONFIG_XENO_OPT_DEBUG_TRACE_RELAX
++void xndebug_notify_relax(struct xnthread *thread,
++			  int reason);
++void xndebug_trace_relax(int nr, unsigned long *backtrace,
++			 int reason);
++#else
++static inline
++void xndebug_notify_relax(struct xnthread *thread, int reason)
++{
++}
++static inline
++void xndebug_trace_relax(int nr, unsigned long *backtrace,
++			 int reason)
++{
++	/* Simply ignore. */
++}
++#endif
++
++#endif /* !_KERNEL_COBALT_DEBUG_H */
+--- linux/kernel/xenomai/intr.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/intr.c	2021-04-07 16:01:25.879636092 +0800
+@@ -0,0 +1,1204 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2005,2006 Dmitry Adamushko <dmitry.adamushko@gmail.com>.
++ * Copyright (C) 2007 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++*/
++#include <linux/mutex.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/stat.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/assert.h>
++#include <trace/events/cobalt-core.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_irq Interrupt management
++ * @{
++ */
++#define XNINTR_MAX_UNHANDLED	1000
++
++static DEFINE_MUTEX(intrlock);
++
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++struct xnintr nktimer;	     /* Only for statistics */
++static int xnintr_count = 1; /* Number of attached xnintr objects + nktimer */
++static int xnintr_list_rev;  /* Modification counter of xnintr list */
++
++/* Both functions update xnintr_list_rev at the very end.
++ * This guarantees that module.c::stat_seq_open() won't get
++ * an up-to-date xnintr_list_rev and old xnintr_count. */
++
++static inline void stat_counter_inc(void)
++{
++	xnintr_count++;
++	smp_mb();
++	xnintr_list_rev++;
++}
++
++static inline void stat_counter_dec(void)
++{
++	xnintr_count--;
++	smp_mb();
++	xnintr_list_rev++;
++}
++
++static inline void sync_stat_references(struct xnintr *intr)
++{
++	struct xnirqstat *statp;
++	struct xnsched *sched;
++	int cpu;
++
++	for_each_realtime_cpu(cpu) {
++		sched = xnsched_struct(cpu);
++		statp = per_cpu_ptr(intr->stats, cpu);
++		/* Synchronize on all dangling references to go away. */
++		while (sched->current_account == &statp->account)
++			cpu_relax();
++	}
++}
++
++static void clear_irqstats(struct xnintr *intr)
++{
++	struct xnirqstat *p;
++	int cpu;
++
++	for_each_realtime_cpu(cpu) {
++		p = per_cpu_ptr(intr->stats, cpu);
++		memset(p, 0, sizeof(*p));
++	}
++}
++
++static inline void alloc_irqstats(struct xnintr *intr)
++{
++	intr->stats = alloc_percpu(struct xnirqstat);
++	clear_irqstats(intr);
++}
++
++static inline void free_irqstats(struct xnintr *intr)
++{
++	free_percpu(intr->stats);
++}
++
++static inline void query_irqstats(struct xnintr *intr, int cpu,
++				  struct xnintr_iterator *iterator)
++{
++	struct xnirqstat *statp;
++	xnticks_t last_switch;
++
++	statp = per_cpu_ptr(intr->stats, cpu);
++	iterator->hits = xnstat_counter_get(&statp->hits);
++	last_switch = xnsched_struct(cpu)->last_account_switch;
++	iterator->exectime_period = statp->account.total;
++	iterator->account_period = last_switch - statp->account.start;
++	statp->sum.total += iterator->exectime_period;
++	iterator->exectime_total = statp->sum.total;
++	statp->account.total = 0;
++	statp->account.start = last_switch;
++}
++
++static void inc_irqstats(struct xnintr *intr, struct xnsched *sched, xnticks_t start)
++{
++	struct xnirqstat *statp;
++
++	statp = raw_cpu_ptr(intr->stats);
++	xnstat_counter_inc(&statp->hits);
++	xnstat_exectime_lazy_switch(sched, &statp->account, start);
++}
++
++static inline void switch_to_irqstats(struct xnintr *intr,
++				      struct xnsched *sched)
++{
++	struct xnirqstat *statp;
++
++	statp = raw_cpu_ptr(intr->stats);
++	xnstat_exectime_switch(sched, &statp->account);
++}
++
++static inline void switch_from_irqstats(struct xnsched *sched,
++					xnstat_exectime_t *prev)
++{
++	xnstat_exectime_switch(sched, prev);
++}
++
++static inline xnstat_exectime_t *switch_core_irqstats(struct xnsched *sched)
++{
++	struct xnirqstat *statp;
++	xnstat_exectime_t *prev;
++
++	statp = xnstat_percpu_data;
++	prev = xnstat_exectime_switch(sched, &statp->account);
++	xnstat_counter_inc(&statp->hits);
++
++	return prev;
++}
++
++#else  /* !CONFIG_XENO_OPT_STATS_IRQS */
++
++static inline void stat_counter_inc(void) {}
++
++static inline void stat_counter_dec(void) {}
++
++static inline void sync_stat_references(struct xnintr *intr) {}
++
++static inline void alloc_irqstats(struct xnintr *intr) {}
++
++static inline void free_irqstats(struct xnintr *intr) {}
++
++static inline void clear_irqstats(struct xnintr *intr) {}
++
++static inline void query_irqstats(struct xnintr *intr, int cpu,
++				  struct xnintr_iterator *iterator) {}
++
++static inline void inc_irqstats(struct xnintr *intr, struct xnsched *sched, xnticks_t start) {}
++
++static inline void switch_to_irqstats(struct xnintr *intr,
++				      struct xnsched *sched) {}
++
++static inline void switch_from_irqstats(struct xnsched *sched,
++					xnstat_exectime_t *prev) {}
++
++static inline xnstat_exectime_t *switch_core_irqstats(struct xnsched *sched)
++{
++	return NULL;
++}
++
++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */
++
++static void xnintr_irq_handler(unsigned int irq, void *cookie);
++
++void xnintr_host_tick(struct xnsched *sched) /* Interrupts off. */
++{
++	sched->lflags &= ~XNHTICK;
++#ifdef XNARCH_HOST_TICK_IRQ
++	ipipe_post_irq_root(XNARCH_HOST_TICK_IRQ);
++#endif
++}
++
++/*
++ * Low-level core clock irq handler. This one forwards ticks from the
++ * Xenomai platform timer to nkclock exclusively.
++ */
++void xnintr_core_clock_handler(void)
++{
++	struct xnsched *sched = xnsched_current();
++	int cpu  __maybe_unused = xnsched_cpu(sched);
++	xnstat_exectime_t *prev;
++
++	if (!xnsched_supported_cpu(cpu)) {
++#ifdef XNARCH_HOST_TICK_IRQ
++		ipipe_post_irq_root(XNARCH_HOST_TICK_IRQ);
++#endif
++		return;
++	}
++
++	prev = switch_core_irqstats(sched);
++
++	trace_cobalt_clock_entry(per_cpu(ipipe_percpu.hrtimer_irq, cpu));
++
++	++sched->inesting;
++	sched->lflags |= XNINIRQ;
++
++	xnlock_get(&nklock);
++	xnclock_tick(&nkclock);
++	xnlock_put(&nklock);
++
++	trace_cobalt_clock_exit(per_cpu(ipipe_percpu.hrtimer_irq, cpu));
++	switch_from_irqstats(sched, prev);
++
++	if (--sched->inesting == 0) {
++		sched->lflags &= ~XNINIRQ;
++		xnsched_run();
++		sched = xnsched_current();
++	}
++	/*
++	 * If the core clock interrupt preempted a real-time thread,
++	 * any transition to the root thread has already triggered a
++	 * host tick propagation from xnsched_run(), so at this point,
++	 * we only need to propagate the host tick in case the
++	 * interrupt preempted the root thread.
++	 */
++	if ((sched->lflags & XNHTICK) &&
++	    xnthread_test_state(sched->curr, XNROOT))
++		xnintr_host_tick(sched);
++}
++
++struct irqdisable_work {
++	struct ipipe_work_header work; /* Must be first. */
++	int irq;
++};
++
++static void lostage_irqdisable_line(struct ipipe_work_header *work)
++{
++	struct irqdisable_work *rq;
++
++	rq = container_of(work, struct irqdisable_work, work);
++	ipipe_disable_irq(rq->irq);
++}
++
++static void disable_irq_line(int irq)
++{
++	struct irqdisable_work diswork = {
++		.work = {
++			.size = sizeof(diswork),
++			.handler = lostage_irqdisable_line,
++		},
++		.irq = irq,
++	};
++
++	ipipe_post_work_root(&diswork, work);
++}
++
++/* Optional support for shared interrupts. */
++
++#ifdef CONFIG_XENO_OPT_SHIRQ
++
++struct xnintr_vector {
++	DECLARE_XNLOCK(lock);
++	struct xnintr *handlers;
++	int unhandled;
++} ____cacheline_aligned_in_smp;
++
++static struct xnintr_vector vectors[IPIPE_NR_IRQS];
++
++static inline struct xnintr *xnintr_vec_first(unsigned int irq)
++{
++	return vectors[irq].handlers;
++}
++
++static inline struct xnintr *xnintr_vec_next(struct xnintr *prev)
++{
++	return prev->next;
++}
++
++static void disable_shared_irq_line(struct xnintr_vector *vec)
++{
++	int irq = vec - vectors;
++	struct xnintr *intr;
++
++	xnlock_get(&vec->lock);
++	intr = vec->handlers;
++	while (intr) {
++		set_bit(XN_IRQSTAT_DISABLED, &intr->status);
++		intr = intr->next;
++	}
++	xnlock_put(&vec->lock);
++	disable_irq_line(irq);
++}
++
++/*
++ * Low-level interrupt handler dispatching the user-defined ISRs for
++ * shared interrupts -- Called with interrupts off.
++ */
++static void xnintr_vec_handler(unsigned int irq, void *cookie)
++{
++	struct xnsched *sched = xnsched_current();
++	struct xnintr_vector *vec = vectors + irq;
++	xnstat_exectime_t *prev;
++	struct xnintr *intr;
++	xnticks_t start;
++	int s = 0, ret;
++
++	prev  = xnstat_exectime_get_current(sched);
++	start = xnstat_exectime_now();
++	trace_cobalt_irq_entry(irq);
++
++	++sched->inesting;
++	sched->lflags |= XNINIRQ;
++
++	xnlock_get(&vec->lock);
++	intr = vec->handlers;
++	if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) {
++		/* irqdisable_work is on its way, ignore. */
++		xnlock_put(&vec->lock);
++		goto out;
++	}
++
++	while (intr) {
++		/*
++		 * NOTE: We assume that no CPU migration can occur
++		 * while running the interrupt service routine.
++		 */
++		ret = intr->isr(intr);
++		XENO_WARN_ON_ONCE(USER, (ret & XN_IRQ_STATMASK) == 0);
++		s |= ret;
++		if (ret & XN_IRQ_HANDLED) {
++			inc_irqstats(intr, sched, start);
++			start = xnstat_exectime_now();
++		}
++		intr = intr->next;
++	}
++
++	xnlock_put(&vec->lock);
++
++	if (unlikely(!(s & XN_IRQ_HANDLED))) {
++		if (++vec->unhandled == XNINTR_MAX_UNHANDLED) {
++			printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n",
++			       __FUNCTION__, irq);
++			s |= XN_IRQ_DISABLE;
++		}
++	} else
++		vec->unhandled = 0;
++
++	if (s & XN_IRQ_PROPAGATE)
++		ipipe_post_irq_root(irq);
++	else if (s & XN_IRQ_DISABLE)
++		disable_shared_irq_line(vec);
++	else
++		ipipe_end_irq(irq);
++out:
++	switch_from_irqstats(sched, prev);
++
++	trace_cobalt_irq_exit(irq);
++
++	if (--sched->inesting == 0) {
++		sched->lflags &= ~XNINIRQ;
++		xnsched_run();
++	}
++}
++
++/*
++ * Low-level interrupt handler dispatching the user-defined ISRs for
++ * shared edge-triggered interrupts -- Called with interrupts off.
++ */
++static void xnintr_edge_vec_handler(unsigned int irq, void *cookie)
++{
++	const int MAX_EDGEIRQ_COUNTER = 128;
++	struct xnsched *sched = xnsched_current();
++	struct xnintr_vector *vec = vectors + irq;
++	struct xnintr *intr, *end = NULL;
++	int s = 0, counter = 0, ret;
++	xnstat_exectime_t *prev;
++	xnticks_t start;
++
++	prev  = xnstat_exectime_get_current(sched);
++	start = xnstat_exectime_now();
++	trace_cobalt_irq_entry(irq);
++
++	++sched->inesting;
++	sched->lflags |= XNINIRQ;
++
++	xnlock_get(&vec->lock);
++	intr = vec->handlers;
++	if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) {
++		/* irqdisable_work is on its way, ignore. */
++		xnlock_put(&vec->lock);
++		goto out;
++	}
++
++	while (intr != end) {
++		switch_to_irqstats(intr, sched);
++		/*
++		 * NOTE: We assume that no CPU migration will occur
++		 * while running the interrupt service routine.
++		 */
++		ret = intr->isr(intr);
++		XENO_WARN_ON_ONCE(USER, (ret & XN_IRQ_STATMASK) == 0);
++		s |= ret;
++
++		if (ret & XN_IRQ_HANDLED) {
++			end = NULL;
++			inc_irqstats(intr, sched, start);
++			start = xnstat_exectime_now();
++		} else if (end == NULL)
++			end = intr;
++
++		if (counter++ > MAX_EDGEIRQ_COUNTER)
++			break;
++
++		intr = intr->next;
++		if (intr  == NULL)
++			intr = vec->handlers;
++	}
++
++	xnlock_put(&vec->lock);
++
++	if (counter > MAX_EDGEIRQ_COUNTER)
++		printk(XENO_ERR "%s: failed to get the IRQ%d line free\n",
++		       __FUNCTION__, irq);
++
++	if (unlikely(!(s & XN_IRQ_HANDLED))) {
++		if (++vec->unhandled == XNINTR_MAX_UNHANDLED) {
++			printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n",
++			       __FUNCTION__, irq);
++			s |= XN_IRQ_DISABLE;
++		}
++	} else
++		vec->unhandled = 0;
++
++	if (s & XN_IRQ_PROPAGATE)
++		ipipe_post_irq_root(irq);
++	else if (s & XN_IRQ_DISABLE)
++		disable_shared_irq_line(vec);
++	else
++		ipipe_end_irq(irq);
++out:
++	switch_from_irqstats(sched, prev);
++
++	trace_cobalt_irq_exit(irq);
++
++	if (--sched->inesting == 0) {
++		sched->lflags &= ~XNINIRQ;
++		xnsched_run();
++	}
++}
++
++static inline bool cobalt_owns_irq(int irq)
++{
++	ipipe_irq_handler_t h;
++
++	h = __ipipe_irq_handler(&xnsched_realtime_domain, irq);
++
++	return h == xnintr_vec_handler ||
++		h == xnintr_edge_vec_handler ||
++		h == xnintr_irq_handler;
++}
++
++static inline int xnintr_irq_attach(struct xnintr *intr)
++{
++	struct xnintr_vector *vec = vectors + intr->irq;
++	struct xnintr *prev, **p = &vec->handlers;
++	int ret;
++
++	prev = *p;
++	if (prev) {
++		/* Check on whether the shared mode is allowed. */
++		if ((prev->flags & intr->flags & XN_IRQTYPE_SHARED) == 0 ||
++		    (prev->iack != intr->iack)
++		    || ((prev->flags & XN_IRQTYPE_EDGE) !=
++			(intr->flags & XN_IRQTYPE_EDGE)))
++			return -EBUSY;
++
++		/*
++		 * Get a position at the end of the list to insert the
++		 * new element.
++		 */
++		while (prev) {
++			p = &prev->next;
++			prev = *p;
++		}
++	} else {
++		/* Initialize the corresponding interrupt channel */
++		void (*handler) (unsigned, void *) = xnintr_irq_handler;
++
++		if (intr->flags & XN_IRQTYPE_SHARED) {
++			if (intr->flags & XN_IRQTYPE_EDGE)
++				handler = xnintr_edge_vec_handler;
++			else
++				handler = xnintr_vec_handler;
++
++		}
++		vec->unhandled = 0;
++
++		ret = ipipe_request_irq(&xnsched_realtime_domain,
++					intr->irq, handler, intr,
++					(ipipe_irq_ackfn_t)intr->iack);
++		if (ret)
++			return ret;
++	}
++
++	intr->next = NULL;
++	/*
++	 * Add the given interrupt object. No need to synchronise with
++	 * the IRQ handler, we are only extending the chain.
++	 */
++	*p = intr;
++
++	return 0;
++}
++
++static inline void xnintr_irq_detach(struct xnintr *intr)
++{
++	struct xnintr_vector *vec = vectors + intr->irq;
++	struct xnintr *e, **p = &vec->handlers;
++
++	while ((e = *p) != NULL) {
++		if (e == intr) {
++			/* Remove the given interrupt object from the list. */
++			xnlock_get(&vec->lock);
++			*p = e->next;
++			xnlock_put(&vec->lock);
++
++			sync_stat_references(intr);
++
++			/* Release the IRQ line if this was the last user */
++			if (vec->handlers == NULL)
++				ipipe_free_irq(&xnsched_realtime_domain, intr->irq);
++
++			return;
++		}
++		p = &e->next;
++	}
++
++	printk(XENO_ERR "attempted to detach an unregistered interrupt descriptor\n");
++}
++
++#else /* !CONFIG_XENO_OPT_SHIRQ */
++
++struct xnintr_vector {
++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING)
++	DECLARE_XNLOCK(lock);
++#endif /* CONFIG_SMP || XENO_DEBUG(LOCKING) */
++} ____cacheline_aligned_in_smp;
++
++static struct xnintr_vector vectors[IPIPE_NR_IRQS];
++
++static inline bool cobalt_owns_irq(int irq)
++{
++	ipipe_irq_handler_t h;
++
++	h = __ipipe_irq_handler(&xnsched_realtime_domain, irq);
++
++	return h == xnintr_irq_handler;
++}
++
++static inline struct xnintr *xnintr_vec_first(unsigned int irq)
++{
++	return cobalt_owns_irq(irq) ?
++		__ipipe_irq_cookie(&xnsched_realtime_domain, irq) : NULL;
++}
++
++static inline struct xnintr *xnintr_vec_next(struct xnintr *prev)
++{
++	return NULL;
++}
++
++static inline int xnintr_irq_attach(struct xnintr *intr)
++{
++	return ipipe_request_irq(&xnsched_realtime_domain,
++				 intr->irq, xnintr_irq_handler, intr,
++				 (ipipe_irq_ackfn_t)intr->iack);
++}
++
++static inline void xnintr_irq_detach(struct xnintr *intr)
++{
++	int irq = intr->irq;
++
++	xnlock_get(&vectors[irq].lock);
++	ipipe_free_irq(&xnsched_realtime_domain, irq);
++	xnlock_put(&vectors[irq].lock);
++
++	sync_stat_references(intr);
++}
++
++#endif /* !CONFIG_XENO_OPT_SHIRQ */
++
++/*
++ * Low-level interrupt handler dispatching non-shared ISRs -- Called
++ * with interrupts off.
++ */
++static void xnintr_irq_handler(unsigned int irq, void *cookie)
++{
++	struct xnintr_vector __maybe_unused *vec = vectors + irq;
++	struct xnsched *sched = xnsched_current();
++	xnstat_exectime_t *prev;
++	struct xnintr *intr;
++	xnticks_t start;
++	int s = 0;
++
++	prev  = xnstat_exectime_get_current(sched);
++	start = xnstat_exectime_now();
++	trace_cobalt_irq_entry(irq);
++
++	++sched->inesting;
++	sched->lflags |= XNINIRQ;
++
++	xnlock_get(&vec->lock);
++
++#ifdef CONFIG_SMP
++	/*
++	 * In SMP case, we have to reload the cookie under the per-IRQ
++	 * lock to avoid racing with xnintr_detach.  However, we
++	 * assume that no CPU migration will occur while running the
++	 * interrupt service routine, so the scheduler pointer will
++	 * remain valid throughout this function.
++	 */
++	intr = __ipipe_irq_cookie(&xnsched_realtime_domain, irq);
++	if (unlikely(intr == NULL))
++		goto done;
++#else
++	intr = cookie;
++#endif
++	if (unlikely(test_bit(XN_IRQSTAT_DISABLED, &intr->status))) {
++		/* irqdisable_work is on its way, ignore. */
++		xnlock_put(&vec->lock);
++		goto out;
++	}
++
++	s = intr->isr(intr);
++	XENO_WARN_ON_ONCE(USER, (s & XN_IRQ_STATMASK) == 0);
++	if (unlikely(!(s & XN_IRQ_HANDLED))) {
++		if (++intr->unhandled == XNINTR_MAX_UNHANDLED) {
++			printk(XENO_ERR "%s: IRQ%d not handled. Disabling IRQ line\n",
++			       __FUNCTION__, irq);
++			s |= XN_IRQ_DISABLE;
++		}
++	} else {
++		inc_irqstats(intr, sched, start);
++		intr->unhandled = 0;
++	}
++
++	if (s & XN_IRQ_DISABLE)
++		set_bit(XN_IRQSTAT_DISABLED, &intr->status);
++#ifdef CONFIG_SMP
++done:
++#endif
++	xnlock_put(&vec->lock);
++
++	if (s & XN_IRQ_DISABLE)
++		disable_irq_line(irq);
++	else if (s & XN_IRQ_PROPAGATE)
++		ipipe_post_irq_root(irq);
++	else
++		ipipe_end_irq(irq);
++out:
++	switch_from_irqstats(sched, prev);
++
++	trace_cobalt_irq_exit(irq);
++
++	if (--sched->inesting == 0) {
++		sched->lflags &= ~XNINIRQ;
++		xnsched_run();
++	}
++}
++
++int __init xnintr_mount(void)
++{
++	int i;
++	for (i = 0; i < IPIPE_NR_IRQS; ++i)
++		xnlock_init(&vectors[i].lock);
++	return 0;
++}
++
++/**
++ * @fn int xnintr_init(struct xnintr *intr,const char *name,unsigned int irq,xnisr_t isr,xniack_t iack,int flags)
++ * @brief Initialize an interrupt descriptor.
++ *
++ * When an interrupt occurs on the given @a irq line, the interrupt
++ * service routine @a isr is fired in order to deal with the hardware
++ * event. The interrupt handler may call any non-blocking service from
++ * the Cobalt core.
++ *
++ * Upon receipt of an IRQ, the interrupt handler @a isr is immediately
++ * called on behalf of the interrupted stack context, the rescheduling
++ * procedure is locked, and the interrupt line is masked in the system
++ * interrupt controller chip.  Upon return, the status of the
++ * interrupt handler is checked for the following bits:
++ *
++ * - XN_IRQ_HANDLED indicates that the interrupt request was
++ * successfully handled.
++ *
++ * - XN_IRQ_NONE indicates the opposite to XN_IRQ_HANDLED, meaning
++ * that no interrupt source could be identified for the ongoing
++ * request by the handler.
++ *
++ * In addition, one of the following bits may be present in the
++ * status:
++ *
++ * - XN_IRQ_DISABLE tells the Cobalt core to disable the interrupt
++ * line before returning from the interrupt context.
++ *
++ * - XN_IRQ_PROPAGATE propagates the IRQ event down the interrupt
++ * pipeline to Linux. Using this flag is strongly discouraged, unless
++ * you fully understand the implications of such propagation.
++ *
++ * @warning The handler should not use these bits if it shares the
++ * interrupt line with other handlers in the real-time domain. When
++ * any of these bits is detected, the interrupt line is left masked.
++ *
++ * A count of interrupt receipts is tracked into the interrupt
++ * descriptor, and reset to zero each time such descriptor is
++ * attached. Since this count could wrap around, it should be used as
++ * an indication of interrupt activity only.
++ *
++ * @param intr The address of a descriptor the Cobalt core will use to
++ * store the interrupt-specific data.
++ *
++ * @param name An ASCII string standing for the symbolic name of the
++ * interrupt or NULL.
++ *
++ * @param irq The IRQ line number associated with the interrupt
++ * descriptor. This value is architecture-dependent. An interrupt
++ * descriptor must be attached to the system by a call to
++ * xnintr_attach() before @a irq events can be received.
++ *
++ * @param isr The address of an interrupt handler, which is passed the
++ * address of the interrupt descriptor receiving the IRQ.
++ *
++ * @param iack The address of an optional interrupt acknowledge
++ * routine, aimed at replacing the default one. Only very specific
++ * situations actually require to override the default setting for
++ * this parameter, like having to acknowledge non-standard PIC
++ * hardware. @a iack should return a non-zero value to indicate that
++ * the interrupt has been properly acknowledged. If @a iack is NULL,
++ * the default routine will be used instead.
++ *
++ * @param flags A set of creation flags affecting the operation. The
++ * valid flags are:
++ *
++ * - XN_IRQTYPE_SHARED enables IRQ-sharing with other interrupt
++ * objects.
++ *
++ * - XN_IRQTYPE_EDGE is an additional flag need to be set together
++ * with XN_IRQTYPE_SHARED to enable IRQ-sharing of edge-triggered
++ * interrupts.
++ *
++ * @return 0 is returned on success. Otherwise, -EINVAL is returned if
++ * @a irq is not a valid interrupt number.
++ *
++ * @coretags{secondary-only}
++ */
++int xnintr_init(struct xnintr *intr, const char *name,
++		unsigned int irq, xnisr_t isr, xniack_t iack,
++		int flags)
++{
++	secondary_mode_only();
++
++	if (irq >= IPIPE_NR_IRQS)
++		return -EINVAL;
++
++	intr->irq = irq;
++	intr->isr = isr;
++	intr->iack = iack;
++	intr->cookie = NULL;
++	intr->name = name ? : "<unknown>";
++	intr->flags = flags;
++	intr->status = _XN_IRQSTAT_DISABLED;
++	intr->unhandled = 0;
++	raw_spin_lock_init(&intr->lock);
++#ifdef CONFIG_XENO_OPT_SHIRQ
++	intr->next = NULL;
++#endif
++	alloc_irqstats(intr);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnintr_init);
++
++/**
++ * @fn void xnintr_destroy(struct xnintr *intr)
++ * @brief Destroy an interrupt descriptor.
++ *
++ * Destroys an interrupt descriptor previously initialized by
++ * xnintr_init(). The descriptor is automatically detached by a call
++ * to xnintr_detach(). No more IRQs will be received through this
++ * descriptor after this service has returned.
++ *
++ * @param intr The address of the interrupt descriptor to destroy.
++ *
++ * @coretags{secondary-only}
++ */
++void xnintr_destroy(struct xnintr *intr)
++{
++	secondary_mode_only();
++	xnintr_detach(intr);
++	free_irqstats(intr);
++}
++EXPORT_SYMBOL_GPL(xnintr_destroy);
++
++/**
++ * @fn int xnintr_attach(struct xnintr *intr, void *cookie)
++ * @brief Attach an interrupt descriptor.
++ *
++ * Attach an interrupt descriptor previously initialized by
++ * xnintr_init(). This operation registers the descriptor at the
++ * interrupt pipeline, but does not enable the interrupt line yet. A
++ * call to xnintr_enable() is required to start receiving IRQs from
++ * the interrupt line associated to the descriptor.
++ *
++ * @param intr The address of the interrupt descriptor to attach.
++ *
++ * @param cookie A user-defined opaque value which is stored into the
++ * descriptor for further retrieval by the interrupt handler.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -EINVAL is returned if an error occurred while attaching the
++ * descriptor.
++ *
++ * - -EBUSY is returned if the descriptor was already attached.
++ *
++ * @note The caller <b>must not</b> hold nklock when invoking this service,
++ * this would cause deadlocks.
++ *
++ * @coretags{secondary-only}
++ *
++ * @note Attaching an interrupt descriptor resets the tracked number
++ * of IRQ receipts to zero.
++ */
++int xnintr_attach(struct xnintr *intr, void *cookie)
++{
++	int ret;
++
++	secondary_mode_only();
++	trace_cobalt_irq_attach(intr->irq);
++
++	intr->cookie = cookie;
++	clear_irqstats(intr);
++
++#ifdef CONFIG_SMP
++	ipipe_set_irq_affinity(intr->irq, xnsched_realtime_cpus);
++#endif /* CONFIG_SMP */
++
++	raw_spin_lock(&intr->lock);
++
++	if (test_and_set_bit(XN_IRQSTAT_ATTACHED, &intr->status)) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++	ret = xnintr_irq_attach(intr);
++	if (ret) {
++		clear_bit(XN_IRQSTAT_ATTACHED, &intr->status);
++		goto out;
++	}
++
++	stat_counter_inc();
++out:
++	raw_spin_unlock(&intr->lock);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnintr_attach);
++
++/**
++ * @fn int xnintr_detach(struct xnintr *intr)
++ * @brief Detach an interrupt descriptor.
++ *
++ * This call unregisters an interrupt descriptor previously attached
++ * by xnintr_attach() from the interrupt pipeline. Once detached, the
++ * associated interrupt line is disabled, but the descriptor remains
++ * valid. The descriptor can be attached anew by a call to
++ * xnintr_attach().
++ *
++ * @param intr The address of the interrupt descriptor to detach.
++ *
++ * @note The caller <b>must not</b> hold nklock when invoking this
++ * service, this would cause deadlocks.
++ *
++ * @coretags{secondary-only}
++ */
++void xnintr_detach(struct xnintr *intr)
++{
++	secondary_mode_only();
++	trace_cobalt_irq_detach(intr->irq);
++
++	raw_spin_lock(&intr->lock);
++
++	if (test_and_clear_bit(XN_IRQSTAT_ATTACHED, &intr->status)) {
++		xnintr_irq_detach(intr);
++		stat_counter_dec();
++	}
++
++	raw_spin_unlock(&intr->lock);
++}
++EXPORT_SYMBOL_GPL(xnintr_detach);
++
++/**
++ * @fn void xnintr_enable(struct xnintr *intr)
++ * @brief Enable an interrupt line.
++ *
++ * Enables the interrupt line associated with an interrupt descriptor.
++ *
++ * @param intr The address of the interrupt descriptor.
++ *
++ * @coretags{secondary-only}
++ */
++void xnintr_enable(struct xnintr *intr)
++{
++	unsigned long flags;
++
++	secondary_mode_only();
++	trace_cobalt_irq_enable(intr->irq);
++
++	raw_spin_lock_irqsave(&intr->lock, flags);
++
++	/*
++	 * If disabled on entry, there is no way we could race with
++	 * disable_irq_line().
++	 */
++	if (test_and_clear_bit(XN_IRQSTAT_DISABLED, &intr->status))
++		ipipe_enable_irq(intr->irq);
++
++	raw_spin_unlock_irqrestore(&intr->lock, flags);
++}
++EXPORT_SYMBOL_GPL(xnintr_enable);
++
++/**
++ * @fn void xnintr_disable(struct xnintr *intr)
++ * @brief Disable an interrupt line.
++ *
++ * Disables the interrupt line associated with an interrupt
++ * descriptor.
++ *
++ * @param intr The address of the interrupt descriptor.
++ *
++ * @coretags{secondary-only}
++ */
++void xnintr_disable(struct xnintr *intr)
++{
++	unsigned long flags;
++
++	secondary_mode_only();
++	trace_cobalt_irq_disable(intr->irq);
++
++	/* We only need a virtual masking. */
++	raw_spin_lock_irqsave(&intr->lock, flags);
++
++	/*
++	 * Racing with disable_irq_line() is innocuous, the pipeline
++	 * would serialize calls to ipipe_disable_irq() across CPUs,
++	 * and the descriptor status would still properly match the
++	 * line status in the end.
++	 */
++	if (!test_and_set_bit(XN_IRQSTAT_DISABLED, &intr->status))
++		ipipe_disable_irq(intr->irq);
++
++	raw_spin_unlock_irqrestore(&intr->lock, flags);
++}
++EXPORT_SYMBOL_GPL(xnintr_disable);
++
++/**
++ * @fn void xnintr_affinity(struct xnintr *intr, cpumask_t cpumask)
++ * @brief Set processor affinity of interrupt.
++ *
++ * Restricts the IRQ line associated with the interrupt descriptor @a
++ * intr to be received only on processors which bits are set in @a
++ * cpumask.
++ *
++ * @param intr The address of the interrupt descriptor.
++ *
++ * @param cpumask The new processor affinity.
++ *
++ * @note Depending on architectures, setting more than one bit in @a
++ * cpumask could be meaningless.
++ *
++ * @coretags{secondary-only}
++ */
++void xnintr_affinity(struct xnintr *intr, cpumask_t cpumask)
++{
++	secondary_mode_only();
++#ifdef CONFIG_SMP
++	ipipe_set_irq_affinity(intr->irq, cpumask);
++#endif
++}
++EXPORT_SYMBOL_GPL(xnintr_affinity);
++
++static inline int xnintr_is_timer_irq(int irq)
++{
++	int cpu;
++
++	for_each_realtime_cpu(cpu)
++		if (irq == per_cpu(ipipe_percpu.hrtimer_irq, cpu))
++			return 1;
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++
++int xnintr_get_query_lock(void)
++{
++	return mutex_lock_interruptible(&intrlock) ? -ERESTARTSYS : 0;
++}
++
++void xnintr_put_query_lock(void)
++{
++	mutex_unlock(&intrlock);
++}
++
++int xnintr_query_init(struct xnintr_iterator *iterator)
++{
++	iterator->prev = NULL;
++
++	/* The order is important here: first xnintr_list_rev then
++	 * xnintr_count.  On the other hand, xnintr_attach/detach()
++	 * update xnintr_count first and then xnintr_list_rev.  This
++	 * should guarantee that we can't get an up-to-date
++	 * xnintr_list_rev and old xnintr_count here. The other way
++	 * around is not a problem as xnintr_query() will notice this
++	 * fact later.  Should xnintr_list_rev change later,
++	 * xnintr_query() will trigger an appropriate error below.
++	 */
++	iterator->list_rev = xnintr_list_rev;
++	smp_mb();
++
++	return xnintr_count;
++}
++
++int xnintr_query_next(int irq, struct xnintr_iterator *iterator,
++		      char *name_buf)
++{
++	int cpu, nr_cpus = num_present_cpus();
++	struct xnintr *intr;
++
++	if (iterator->list_rev != xnintr_list_rev)
++		return -EAGAIN;
++
++	intr = iterator->prev;
++	if (intr == NULL) {
++		if (xnintr_is_timer_irq(irq))
++			intr = &nktimer;
++		else
++			intr = xnintr_vec_first(irq);
++		if (intr == NULL)
++			return -ENODEV;
++		iterator->prev = intr;
++		iterator->cpu = -1;
++	}
++
++	for (;;) {
++		for (cpu = iterator->cpu + 1; cpu < nr_cpus; ++cpu) {
++			if (cpu_online(cpu)) {
++				ksformat(name_buf, XNOBJECT_NAME_LEN, "IRQ%d: %s",
++					irq, intr->name);
++				query_irqstats(intr, cpu, iterator);
++				iterator->cpu = cpu;
++				return 0;
++			}
++		}
++
++		iterator->prev = xnintr_vec_next(intr);
++		if (iterator->prev == NULL)
++			return -ENODEV;
++
++		iterator->cpu = -1;
++	}
++}
++
++#endif /* CONFIG_XENO_OPT_STATS_IRQS */
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++#include <cobalt/kernel/vfile.h>
++
++static inline int format_irq_proc(unsigned int irq,
++				  struct xnvfile_regular_iterator *it)
++{
++	struct xnintr *intr;
++	struct irq_desc *d;
++	int cpu;
++
++	for_each_realtime_cpu(cpu)
++		if (xnintr_is_timer_irq(irq)) {
++			xnvfile_printf(it, "         [timer/%d]", cpu);
++			return 0;
++		}
++
++#ifdef CONFIG_SMP
++	/*
++	 * IPI numbers on ARM are not compile time constants, so do
++	 * not use switch/case here.
++	 */
++	if (irq == IPIPE_HRTIMER_IPI) {
++		xnvfile_puts(it, "         [timer-ipi]");
++		return 0;
++	}
++	if (irq == IPIPE_RESCHEDULE_IPI) {
++		xnvfile_puts(it, "         [reschedule]");
++		return 0;
++	}
++	if (irq == IPIPE_CRITICAL_IPI) {
++		xnvfile_puts(it, "         [sync]");
++		return 0;
++	}
++#endif /* CONFIG_SMP */
++	if (ipipe_virtual_irq_p(irq)) {
++		xnvfile_puts(it, "         [virtual]");
++		return 0;
++	}
++
++	mutex_lock(&intrlock);
++
++	if (!cobalt_owns_irq(irq)) {
++		xnvfile_puts(it, "         ");
++		d = irq_to_desc(irq);
++		xnvfile_puts(it, d && d->name ? d->name : "-");
++	} else {
++		intr = xnintr_vec_first(irq);
++		if (intr) {
++			xnvfile_puts(it, "        ");
++
++			do {
++				xnvfile_putc(it, ' ');
++				xnvfile_puts(it, intr->name);
++				intr = xnintr_vec_next(intr);
++			} while (intr);
++		}
++	}
++
++	mutex_unlock(&intrlock);
++
++	return 0;
++}
++
++static int irq_vfile_show(struct xnvfile_regular_iterator *it,
++			  void *data)
++{
++	int cpu, irq;
++
++	/* FIXME: We assume the entire output fits in a single page. */
++
++	xnvfile_puts(it, "  IRQ ");
++
++	for_each_realtime_cpu(cpu)
++		xnvfile_printf(it, "        CPU%d", cpu);
++
++	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
++		if (__ipipe_irq_handler(&xnsched_realtime_domain, irq) == NULL)
++			continue;
++
++		xnvfile_printf(it, "\n%5d:", irq);
++
++		for_each_realtime_cpu(cpu) {
++			xnvfile_printf(it, "%12lu",
++				       __ipipe_cpudata_irq_hits(&xnsched_realtime_domain, cpu,
++								irq));
++		}
++
++		format_irq_proc(irq, it);
++	}
++
++	xnvfile_putc(it, '\n');
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops irq_vfile_ops = {
++	.show = irq_vfile_show,
++};
++
++static struct xnvfile_regular irq_vfile = {
++	.ops = &irq_vfile_ops,
++};
++
++void xnintr_init_proc(void)
++{
++	xnvfile_init_regular("irq", &irq_vfile, &cobalt_vfroot);
++}
++
++void xnintr_cleanup_proc(void)
++{
++	xnvfile_destroy_regular(&irq_vfile);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++/** @} */
+--- linux/kernel/xenomai/apc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/apc.c	2021-04-07 16:01:25.873636101 +0800
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (C) 2007,2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/spinlock.h>
++#include <linux/ipipe.h>
++#include <cobalt/kernel/apc.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_apc Asynchronous Procedure Calls
++ *
++ * Services for scheduling function calls in the Linux domain
++ *
++ * APC is the acronym for Asynchronous Procedure Call, a mean by which
++ * activities from the Xenomai domain can schedule deferred
++ * invocations of handlers to be run into the Linux domain, as soon as
++ * possible when the Linux kernel gets back in control.
++ *
++ * Up to BITS_PER_LONG APC slots can be active at any point in time.
++ *
++ * APC support is built upon the interrupt pipeline's virtual
++ * interrupt support.
++ *
++ * @{
++ */
++static IPIPE_DEFINE_SPINLOCK(apc_lock);
++
++void apc_dispatch(unsigned int virq, void *arg)
++{
++	void (*handler)(void *), *cookie;
++	unsigned long *p;
++	int apc;
++
++	/*
++	 * CAUTION: The APC dispatch loop is not protected against a
++	 * handler becoming unavailable while processing the pending
++	 * queue; the software must make sure to uninstall all APCs
++	 * before eventually unloading any module that may contain APC
++	 * handlers. We keep the handler affinity with the poster's
++	 * CPU, so that the handler is invoked on the same CPU than
++	 * the code which called xnapc_schedule().
++	 */
++	raw_spin_lock(&apc_lock);
++
++	/* This is atomic linux context (non-threaded IRQ). */
++	p = &raw_cpu_ptr(&cobalt_machine_cpudata)->apc_pending;
++	while (*p) {
++		apc = ffnz(*p);
++		clear_bit(apc, p);
++		handler = cobalt_pipeline.apc_table[apc].handler;
++		cookie = cobalt_pipeline.apc_table[apc].cookie;
++		raw_cpu_ptr(&cobalt_machine_cpudata)->apc_shots[apc]++;
++		raw_spin_unlock(&apc_lock);
++		handler(cookie);
++		raw_spin_lock(&apc_lock);
++	}
++
++	raw_spin_unlock(&apc_lock);
++}
++
++/**
++ * @fn int xnapc_alloc(const char *name,void (*handler)(void *cookie),void *cookie)
++ *
++ * @brief Allocate an APC slot.
++ *
++ * APC is the acronym for Asynchronous Procedure Call, a mean by which
++ * activities from the Xenomai domain can schedule deferred
++ * invocations of handlers to be run into the Linux domain, as soon as
++ * possible when the Linux kernel gets back in control. Up to
++ * BITS_PER_LONG APC slots can be active at any point in time. APC
++ * support is built upon the interrupt pipeline's virtual interrupt
++ * support.
++ *
++ * Any Linux kernel service which is callable from a regular Linux
++ * interrupt handler is in essence available to APC handlers.
++ *
++ * @param name is a symbolic name identifying the APC which will get
++ * reported through the /proc/xenomai/apc interface. Passing NULL to
++ * create an anonymous APC is allowed.
++ *
++ * @param handler The address of the fault handler to call upon
++ * exception condition. The handle will be passed the @a cookie value
++ * unmodified.
++ *
++ * @param cookie A user-defined opaque pointer the APC handler
++ * receives as its sole argument.
++ *
++ * @return a valid APC identifier is returned upon success, or a
++ * negative error code otherwise:
++ *
++ * - -EINVAL is returned if @a handler is invalid.
++ *
++ * - -EBUSY is returned if no more APC slots are available.
++ *
++ * @coretags{unrestricted}
++ */
++int xnapc_alloc(const char *name,
++		void (*handler)(void *cookie), void *cookie)
++{
++	unsigned long flags;
++	int apc;
++
++	if (handler == NULL)
++		return -EINVAL;
++
++	raw_spin_lock_irqsave(&apc_lock, flags);
++
++	if (cobalt_pipeline.apc_map == ~0) {
++		apc = -EBUSY;
++		goto out;
++	}
++
++	apc = ffz(cobalt_pipeline.apc_map);
++	__set_bit(apc, &cobalt_pipeline.apc_map);
++	cobalt_pipeline.apc_table[apc].handler = handler;
++	cobalt_pipeline.apc_table[apc].cookie = cookie;
++	cobalt_pipeline.apc_table[apc].name = name;
++out:
++	raw_spin_unlock_irqrestore(&apc_lock, flags);
++
++	return apc;
++}
++EXPORT_SYMBOL_GPL(xnapc_alloc);
++
++/**
++ * @fn int xnapc_free(int apc)
++ *
++ * @brief Releases an APC slot.
++ *
++ * This service deallocates an APC slot obtained by xnapc_alloc().
++ *
++ * @param apc The APC id. to release, as returned by a successful call
++ * to the xnapc_alloc() service.
++ *
++ * @coretags{unrestricted}
++ */
++void xnapc_free(int apc)
++{
++	BUG_ON(apc < 0 || apc >= BITS_PER_LONG);
++	clear_bit(apc, &cobalt_pipeline.apc_map);
++	smp_mb__after_atomic();
++}
++EXPORT_SYMBOL_GPL(xnapc_free);
++
++/** @} */
+--- linux/kernel/xenomai/sched.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched.c	2021-04-07 16:01:25.869636107 +0800
+@@ -0,0 +1,1587 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/signal.h>
++#include <linux/wait.h>
++#include <linux/sched.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/arith.h>
++#include <cobalt/uapi/signal.h>
++#define CREATE_TRACE_POINTS
++#include <trace/events/cobalt-core.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_sched Thread scheduling control
++ * @{
++ */
++
++DEFINE_PER_CPU(struct xnsched, nksched);
++EXPORT_PER_CPU_SYMBOL_GPL(nksched);
++
++cpumask_t cobalt_cpu_affinity = CPU_MASK_ALL;
++EXPORT_SYMBOL_GPL(cobalt_cpu_affinity);
++
++LIST_HEAD(nkthreadq);
++
++int cobalt_nrthreads;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++struct xnvfile_rev_tag nkthreadlist_tag;
++#endif
++
++static struct xnsched_class *xnsched_class_highest;
++
++#define for_each_xnsched_class(p) \
++   for (p = xnsched_class_highest; p; p = p->next)
++
++static void xnsched_register_class(struct xnsched_class *sched_class)
++{
++	sched_class->next = xnsched_class_highest;
++	xnsched_class_highest = sched_class;
++
++	/*
++	 * Classes shall be registered by increasing priority order,
++	 * idle first and up.
++	 */
++	XENO_BUG_ON(COBALT, sched_class->next &&
++		   sched_class->next->weight > sched_class->weight);
++
++	printk(XENO_INFO "scheduling class %s registered.\n", sched_class->name);
++}
++
++void xnsched_register_classes(void)
++{
++	xnsched_register_class(&xnsched_class_idle);
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++	xnsched_register_class(&xnsched_class_weak);
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_TP
++	xnsched_register_class(&xnsched_class_tp);
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_SPORADIC
++	xnsched_register_class(&xnsched_class_sporadic);
++#endif
++#ifdef CONFIG_XENO_OPT_SCHED_QUOTA
++	xnsched_register_class(&xnsched_class_quota);
++#endif
++	xnsched_register_class(&xnsched_class_rt);
++}
++
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++
++static unsigned long wd_timeout_arg = CONFIG_XENO_OPT_WATCHDOG_TIMEOUT;
++module_param_named(watchdog_timeout, wd_timeout_arg, ulong, 0644);
++
++static inline xnticks_t get_watchdog_timeout(void)
++{
++	return wd_timeout_arg * 1000000000ULL;
++}
++
++/**
++ * @internal
++ * @fn void watchdog_handler(struct xntimer *timer)
++ * @brief Process watchdog ticks.
++ *
++ * This internal routine handles incoming watchdog triggers to detect
++ * software lockups. It forces the offending thread to stop
++ * monopolizing the CPU, either by kicking it out of primary mode if
++ * running in user space, or cancelling it if kernel-based.
++ *
++ * @coretags{coreirq-only, atomic-entry}
++ */
++static void watchdog_handler(struct xntimer *timer)
++{
++	struct xnsched *sched = xnsched_current();
++	struct xnthread *curr = sched->curr;
++
++	/*
++	 * CAUTION: The watchdog tick might have been delayed while we
++	 * were busy switching the CPU to secondary mode at the
++	 * trigger date eventually. Make sure that we are not about to
++	 * kick the incoming root thread.
++	 */
++	if (xnthread_test_state(curr, XNROOT))
++ 		return;
++
++	trace_cobalt_watchdog_signal(curr);
++
++	if (xnthread_test_state(curr, XNUSER)) {
++		printk(XENO_WARNING "watchdog triggered on CPU #%d -- runaway thread "
++		       "'%s' signaled\n", xnsched_cpu(sched), curr->name);
++		xnthread_call_mayday(curr, SIGDEBUG_WATCHDOG);
++	} else {
++		printk(XENO_WARNING "watchdog triggered on CPU #%d -- runaway thread "
++		       "'%s' canceled\n", xnsched_cpu(sched), curr->name);
++		/*
++		 * On behalf on an IRQ handler, xnthread_cancel()
++		 * would go half way cancelling the preempted
++		 * thread. Therefore we manually raise XNKICKED to
++		 * cause the next call to xnthread_suspend() to return
++		 * early in XNBREAK condition, and XNCANCELD so that
++		 * @thread exits next time it invokes
++		 * xnthread_test_cancel().
++		 */
++		xnthread_set_info(curr, XNKICKED|XNCANCELD);
++	}
++}
++
++#endif /* CONFIG_XENO_OPT_WATCHDOG */
++
++static void roundrobin_handler(struct xntimer *timer)
++{
++	struct xnsched *sched = container_of(timer, struct xnsched, rrbtimer);
++	xnsched_tick(sched);
++}
++
++static void xnsched_init(struct xnsched *sched, int cpu)
++{
++	char rrbtimer_name[XNOBJECT_NAME_LEN];
++	char htimer_name[XNOBJECT_NAME_LEN];
++	char root_name[XNOBJECT_NAME_LEN];
++	union xnsched_policy_param param;
++	struct xnthread_init_attr attr;
++	struct xnsched_class *p;
++
++#ifdef CONFIG_SMP
++	sched->cpu = cpu;
++	ksformat(htimer_name, sizeof(htimer_name), "[host-timer/%u]", cpu);
++	ksformat(rrbtimer_name, sizeof(rrbtimer_name), "[rrb-timer/%u]", cpu);
++	ksformat(root_name, sizeof(root_name), "ROOT/%u", cpu);
++	cpumask_clear(&sched->resched);
++#else
++	strcpy(htimer_name, "[host-timer]");
++	strcpy(rrbtimer_name, "[rrb-timer]");
++	strcpy(root_name, "ROOT");
++#endif
++	for_each_xnsched_class(p) {
++		if (p->sched_init)
++			p->sched_init(sched);
++	}
++
++	sched->status = 0;
++	sched->lflags = XNIDLE;
++	sched->inesting = 0;
++	sched->curr = &sched->rootcb;
++
++	attr.flags = XNROOT | XNFPU;
++	attr.name = root_name;
++	attr.personality = &xenomai_personality;
++	attr.affinity = *cpumask_of(cpu);
++	param.idle.prio = XNSCHED_IDLE_PRIO;
++
++	__xnthread_init(&sched->rootcb, &attr,
++			sched, &xnsched_class_idle, &param);
++
++	/*
++	 * No direct handler here since the host timer processing is
++	 * postponed to xnintr_irq_handler(), as part of the interrupt
++	 * exit code.
++	 */
++	xntimer_init(&sched->htimer, &nkclock, NULL,
++		     sched, XNTIMER_IGRAVITY);
++	xntimer_set_priority(&sched->htimer, XNTIMER_LOPRIO);
++	xntimer_set_name(&sched->htimer, htimer_name);
++	xntimer_init(&sched->rrbtimer, &nkclock, roundrobin_handler,
++		     sched, XNTIMER_IGRAVITY);
++	xntimer_set_name(&sched->rrbtimer, rrbtimer_name);
++	xntimer_set_priority(&sched->rrbtimer, XNTIMER_LOPRIO);
++
++	xnstat_exectime_set_current(sched, &sched->rootcb.stat.account);
++#ifdef CONFIG_XENO_ARCH_FPU
++	sched->fpuholder = &sched->rootcb;
++#endif /* CONFIG_XENO_ARCH_FPU */
++
++	xnthread_init_root_tcb(&sched->rootcb);
++	list_add_tail(&sched->rootcb.glink, &nkthreadq);
++	cobalt_nrthreads++;
++
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	xntimer_init(&sched->wdtimer, &nkclock, watchdog_handler,
++		     sched, XNTIMER_IGRAVITY);
++	xntimer_set_name(&sched->wdtimer, "[watchdog]");
++	xntimer_set_priority(&sched->wdtimer, XNTIMER_LOPRIO);
++#endif /* CONFIG_XENO_OPT_WATCHDOG */
++}
++
++void xnsched_init_all(void)
++{
++	struct xnsched *sched;
++	int cpu;
++
++	for_each_online_cpu(cpu) {
++		sched = &per_cpu(nksched, cpu);
++		xnsched_init(sched, cpu);
++	}
++
++#ifdef CONFIG_SMP
++	ipipe_request_irq(&xnsched_realtime_domain,
++			  IPIPE_RESCHEDULE_IPI,
++			  (ipipe_irq_handler_t)__xnsched_run_handler,
++			  NULL, NULL);
++#endif
++}
++
++static void xnsched_destroy(struct xnsched *sched)
++{
++	xntimer_destroy(&sched->htimer);
++	xntimer_destroy(&sched->rrbtimer);
++	xntimer_destroy(&sched->rootcb.ptimer);
++	xntimer_destroy(&sched->rootcb.rtimer);
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	xntimer_destroy(&sched->wdtimer);
++#endif /* CONFIG_XENO_OPT_WATCHDOG */
++}
++
++void xnsched_destroy_all(void)
++{
++	struct xnthread *thread, *tmp;
++	struct xnsched *sched;
++	int cpu;
++	spl_t s;
++
++#ifdef CONFIG_SMP
++	ipipe_free_irq(&xnsched_realtime_domain, IPIPE_RESCHEDULE_IPI);
++#endif
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/* NOTE: &nkthreadq can't be empty (root thread(s)). */
++	list_for_each_entry_safe(thread, tmp, &nkthreadq, glink) {
++		if (!xnthread_test_state(thread, XNROOT))
++			xnthread_cancel(thread);
++	}
++
++	xnsched_run();
++
++	for_each_online_cpu(cpu) {
++		sched = xnsched_struct(cpu);
++		xnsched_destroy(sched);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++static inline void set_thread_running(struct xnsched *sched,
++				      struct xnthread *thread)
++{
++	xnthread_clear_state(thread, XNREADY);
++	if (xnthread_test_state(thread, XNRRB))
++		xntimer_start(&sched->rrbtimer,
++			      thread->rrperiod, XN_INFINITE, XN_RELATIVE);
++	else
++		xntimer_stop(&sched->rrbtimer);
++}
++
++/* Must be called with nklock locked, interrupts off. */
++struct xnthread *xnsched_pick_next(struct xnsched *sched)
++{
++	struct xnsched_class *p __maybe_unused;
++	struct xnthread *curr = sched->curr;
++	struct xnthread *thread;
++
++	if (!xnthread_test_state(curr, XNTHREAD_BLOCK_BITS | XNZOMBIE)) {
++		/*
++		 * Do not preempt the current thread if it holds the
++		 * scheduler lock.
++		 */
++		if (curr->lock_count > 0) {
++			xnsched_set_self_resched(sched);
++			return curr;
++		}
++		/*
++		 * Push the current thread back to the run queue of
++		 * the scheduling class it belongs to, if not yet
++		 * linked to it (XNREADY tells us if it is).
++		 */
++		if (!xnthread_test_state(curr, XNREADY)) {
++			xnsched_requeue(curr);
++			xnthread_set_state(curr, XNREADY);
++		}
++	}
++
++	/*
++	 * Find the runnable thread having the highest priority among
++	 * all scheduling classes, scanned by decreasing priority.
++	 */
++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES
++	for_each_xnsched_class(p) {
++		thread = p->sched_pick(sched);
++		if (thread) {
++			set_thread_running(sched, thread);
++			return thread;
++		}
++	}
++
++	return NULL; /* Never executed because of the idle class. */
++#else /* !CONFIG_XENO_OPT_SCHED_CLASSES */
++	thread = xnsched_rt_pick(sched);
++	if (unlikely(thread == NULL))
++		thread = &sched->rootcb;
++
++	set_thread_running(sched, thread);
++
++	return thread;
++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */
++}
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++
++struct xnsched *xnsched_finish_unlocked_switch(struct xnsched *sched)
++{
++	struct xnthread *last;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++#ifdef CONFIG_SMP
++	/* If current thread migrated while suspended */
++	sched = xnsched_current();
++#endif /* CONFIG_SMP */
++
++	last = sched->last;
++	sched->status &= ~XNINSW;
++
++	/* Detect a thread which has migrated. */
++	if (last->sched != sched) {
++		xnsched_putback(last);
++		xnthread_clear_state(last, XNMIGRATE);
++	}
++
++	return sched;
++}
++
++#endif /* CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++void xnsched_lock(void)
++{
++	struct xnsched *sched = xnsched_current();
++	/* See comments in xnsched_run(), ___xnsched_run(). */
++	struct xnthread *curr = READ_ONCE(sched->curr);
++
++	if (sched->lflags & XNINIRQ)
++		return;
++
++	/*
++	 * CAUTION: The fast xnthread_current() accessor carries the
++	 * relevant lock nesting count only if current runs in primary
++	 * mode. Otherwise, if the caller is unknown or relaxed
++	 * Xenomai-wise, then we fall back to the root thread on the
++	 * current scheduler, which must be done with IRQs off.
++	 * Either way, we don't need to grab the super lock.
++	 */
++	XENO_WARN_ON_ONCE(COBALT, (curr->state & XNROOT) &&
++			  !hard_irqs_disabled());
++
++	curr->lock_count++;
++}
++EXPORT_SYMBOL_GPL(xnsched_lock);
++
++void xnsched_unlock(void)
++{
++	struct xnsched *sched = xnsched_current();
++	struct xnthread *curr = READ_ONCE(sched->curr);
++
++	XENO_WARN_ON_ONCE(COBALT, (curr->state & XNROOT) &&
++			  !hard_irqs_disabled());
++
++	if (sched->lflags & XNINIRQ)
++		return;
++
++	if (!XENO_ASSERT(COBALT, curr->lock_count > 0))
++		return;
++
++	if (--curr->lock_count == 0) {
++		xnthread_clear_localinfo(curr, XNLBALERT);
++		xnsched_run();
++	}
++}
++EXPORT_SYMBOL_GPL(xnsched_unlock);
++
++/* nklock locked, interrupts off. */
++void xnsched_putback(struct xnthread *thread)
++{
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_dequeue(thread);
++	else
++		xnthread_set_state(thread, XNREADY);
++
++	xnsched_enqueue(thread);
++	xnsched_set_resched(thread->sched);
++}
++
++/* nklock locked, interrupts off. */
++int xnsched_set_policy(struct xnthread *thread,
++		       struct xnsched_class *sched_class,
++		       const union xnsched_policy_param *p)
++{
++	struct xnsched_class *orig_effective_class __maybe_unused;
++	bool effective;
++	int ret;
++
++	ret = xnsched_chkparam(sched_class, thread, p);
++	if (ret)
++		return ret;
++
++	/*
++	 * Declaring a thread to a new scheduling class may fail, so
++	 * we do that early, while the thread is still a member of the
++	 * previous class. However, this also means that the
++	 * declaration callback shall not do anything that might
++	 * affect the previous class (such as touching thread->rlink
++	 * for instance).
++	 */
++	if (sched_class != thread->base_class) {
++		ret = xnsched_declare(sched_class, thread, p);
++		if (ret)
++			return ret;
++	}
++
++	/*
++	 * As a special case, we may be called from __xnthread_init()
++	 * with no previous scheduling class at all.
++	 */
++	if (likely(thread->base_class != NULL)) {
++		if (xnthread_test_state(thread, XNREADY))
++			xnsched_dequeue(thread);
++
++		if (sched_class != thread->base_class)
++			xnsched_forget(thread);
++	}
++
++	/*
++	 * Set the base and effective scheduling parameters. However,
++	 * xnsched_setparam() will deny lowering the effective
++	 * priority if a boost is undergoing, only recording the
++	 * change into the base priority field in such situation.
++	 */
++	thread->base_class = sched_class;
++	/*
++	 * Referring to the effective class from a setparam() handler
++	 * is wrong: make sure to break if so.
++	 */
++	if (XENO_DEBUG(COBALT)) {
++		orig_effective_class = thread->sched_class;
++		thread->sched_class = NULL;
++	}
++
++	/*
++	 * This is the ONLY place where calling xnsched_setparam() is
++	 * legit, sane and safe.
++	 */
++	effective = xnsched_setparam(thread, p);
++	if (effective) {
++		thread->sched_class = sched_class;
++		thread->wprio = xnsched_calc_wprio(sched_class, thread->cprio);
++	} else if (XENO_DEBUG(COBALT))
++		thread->sched_class = orig_effective_class;
++
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_enqueue(thread);
++
++	if (!xnthread_test_state(thread, XNDORMANT))
++		xnsched_set_resched(thread->sched);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnsched_set_policy);
++
++/* nklock locked, interrupts off. */
++bool xnsched_set_effective_priority(struct xnthread *thread, int prio)
++{
++	int wprio = xnsched_calc_wprio(thread->base_class, prio);
++
++	thread->bprio = prio;
++	if (wprio == thread->wprio)
++		return true;
++
++	/*
++	 * We may not lower the effective/current priority of a
++	 * boosted thread when changing the base scheduling
++	 * parameters. Only xnsched_track_policy() and
++	 * xnsched_protect_priority() may do so when dealing with PI
++	 * and PP synchs resp.
++	 */
++	if (wprio < thread->wprio && xnthread_test_state(thread, XNBOOST))
++		return false;
++
++	thread->cprio = prio;
++
++	trace_cobalt_thread_set_current_prio(thread);
++
++	return true;
++}
++
++/* nklock locked, interrupts off. */
++void xnsched_track_policy(struct xnthread *thread,
++			  struct xnthread *target)
++{
++	union xnsched_policy_param param;
++
++	/*
++	 * Inherit (or reset) the effective scheduling class and
++	 * priority of a thread. Unlike xnsched_set_policy(), this
++	 * routine is allowed to lower the weighted priority with no
++	 * restriction, even if a boost is undergoing.
++	 */
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_dequeue(thread);
++	/*
++	 * Self-targeting means to reset the scheduling policy and
++	 * parameters to the base settings. Otherwise, make thread
++	 * inherit the scheduling parameters from target.
++	 */
++	if (target == thread) {
++		thread->sched_class = thread->base_class;
++		xnsched_trackprio(thread, NULL);
++		/*
++		 * Per SuSv2, resetting the base scheduling parameters
++		 * should not move the thread to the tail of its
++		 * priority group.
++		 */
++		if (xnthread_test_state(thread, XNREADY))
++			xnsched_requeue(thread);
++
++	} else {
++		xnsched_getparam(target, &param);
++		thread->sched_class = target->sched_class;
++		xnsched_trackprio(thread, &param);
++		if (xnthread_test_state(thread, XNREADY))
++			xnsched_enqueue(thread);
++	}
++
++	trace_cobalt_thread_set_current_prio(thread);
++
++	xnsched_set_resched(thread->sched);
++}
++
++/* nklock locked, interrupts off. */
++void xnsched_protect_priority(struct xnthread *thread, int prio)
++{
++	/*
++	 * Apply a PP boost by changing the effective priority of a
++	 * thread, forcing it to the RT class. Like
++	 * xnsched_track_policy(), this routine is allowed to lower
++	 * the weighted priority with no restriction, even if a boost
++	 * is undergoing.
++	 *
++	 * This routine only deals with active boosts, resetting the
++	 * base priority when leaving a PP boost is obtained by a call
++	 * to xnsched_track_policy().
++	 */
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_dequeue(thread);
++
++	thread->sched_class = &xnsched_class_rt;
++	xnsched_protectprio(thread, prio);
++
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_enqueue(thread);
++
++	trace_cobalt_thread_set_current_prio(thread);
++
++	xnsched_set_resched(thread->sched);
++}
++
++static void migrate_thread(struct xnthread *thread, struct xnsched *sched)
++{
++	struct xnsched_class *sched_class = thread->sched_class;
++
++	if (xnthread_test_state(thread, XNREADY)) {
++		xnsched_dequeue(thread);
++		xnthread_clear_state(thread, XNREADY);
++	}
++
++	if (sched_class->sched_migrate)
++		sched_class->sched_migrate(thread, sched);
++	/*
++	 * WARNING: the scheduling class may have just changed as a
++	 * result of calling the per-class migration hook.
++	 */
++	thread->sched = sched;
++}
++
++/*
++ * nklock locked, interrupts off. thread must be runnable.
++ */
++void xnsched_migrate(struct xnthread *thread, struct xnsched *sched)
++{
++	xnsched_set_resched(thread->sched);
++	migrate_thread(thread, sched);
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	/*
++	 * Mark the thread in flight, xnsched_finish_unlocked_switch()
++	 * will put the thread on the remote runqueue.
++	 */
++	xnthread_set_state(thread, XNMIGRATE);
++#else
++	/* Move thread to the remote run queue. */
++	xnsched_putback(thread);
++#endif
++}
++
++/*
++ * nklock locked, interrupts off. Thread may be blocked.
++ */
++void xnsched_migrate_passive(struct xnthread *thread, struct xnsched *sched)
++{
++	struct xnsched *last_sched = thread->sched;
++
++	migrate_thread(thread, sched);
++
++	if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS)) {
++		xnsched_requeue(thread);
++		xnthread_set_state(thread, XNREADY);
++		xnsched_set_resched(last_sched);
++	}
++}
++
++#ifdef CONFIG_XENO_OPT_SCALABLE_SCHED
++
++void xnsched_initq(struct xnsched_mlq *q)
++{
++	int prio;
++
++	q->elems = 0;
++	bitmap_zero(q->prio_map, XNSCHED_MLQ_LEVELS);
++
++	for (prio = 0; prio < XNSCHED_MLQ_LEVELS; prio++)
++		INIT_LIST_HEAD(q->heads + prio);
++}
++
++static inline int get_qindex(struct xnsched_mlq *q, int prio)
++{
++	XENO_BUG_ON(COBALT, prio < 0 || prio >= XNSCHED_MLQ_LEVELS);
++	/*
++	 * BIG FAT WARNING: We need to rescale the priority level to a
++	 * 0-based range. We use find_first_bit() to scan the bitmap
++	 * which is a bit scan forward operation. Therefore, the lower
++	 * the index value, the higher the priority (since least
++	 * significant bits will be found first when scanning the
++	 * bitmap).
++	 */
++	return XNSCHED_MLQ_LEVELS - prio - 1;
++}
++
++static struct list_head *add_q(struct xnsched_mlq *q, int prio)
++{
++	struct list_head *head;
++	int idx;
++
++	idx = get_qindex(q, prio);
++	head = q->heads + idx;
++	q->elems++;
++
++	/* New item is not linked yet. */
++	if (list_empty(head))
++		__set_bit(idx, q->prio_map);
++
++	return head;
++}
++
++void xnsched_addq(struct xnsched_mlq *q, struct xnthread *thread)
++{
++	struct list_head *head = add_q(q, thread->cprio);
++	list_add(&thread->rlink, head);
++}
++
++void xnsched_addq_tail(struct xnsched_mlq *q, struct xnthread *thread)
++{
++	struct list_head *head = add_q(q, thread->cprio);
++	list_add_tail(&thread->rlink, head);
++}
++
++static void del_q(struct xnsched_mlq *q,
++		  struct list_head *entry, int idx)
++{
++	struct list_head *head = q->heads + idx;
++
++	list_del(entry);
++	q->elems--;
++
++	if (list_empty(head))
++		__clear_bit(idx, q->prio_map);
++}
++
++void xnsched_delq(struct xnsched_mlq *q, struct xnthread *thread)
++{
++	del_q(q, &thread->rlink, get_qindex(q, thread->cprio));
++}
++
++struct xnthread *xnsched_getq(struct xnsched_mlq *q)
++{
++	struct xnthread *thread;
++	struct list_head *head;
++	int idx;
++
++	if (q->elems == 0)
++		return NULL;
++
++	idx = xnsched_weightq(q);
++	head = q->heads + idx;
++	XENO_BUG_ON(COBALT, list_empty(head));
++	thread = list_first_entry(head, struct xnthread, rlink);
++	del_q(q, &thread->rlink, idx);
++
++	return thread;
++}
++
++struct xnthread *xnsched_findq(struct xnsched_mlq *q, int prio)
++{
++	struct list_head *head;
++	int idx;
++
++	idx = get_qindex(q, prio);
++	head = q->heads + idx;
++	if (list_empty(head))
++		return NULL;
++
++	return list_first_entry(head, struct xnthread, rlink);
++}
++
++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES
++
++struct xnthread *xnsched_rt_pick(struct xnsched *sched)
++{
++	struct xnsched_mlq *q = &sched->rt.runnable;
++	struct xnthread *thread;
++	struct list_head *head;
++	int idx;
++
++	if (q->elems == 0)
++		return NULL;
++
++	/*
++	 * Some scheduling policies may be implemented as variants of
++	 * the core SCHED_FIFO class, sharing its runqueue
++	 * (e.g. SCHED_SPORADIC, SCHED_QUOTA). This means that we have
++	 * to do some cascading to call the right pick handler
++	 * eventually.
++	 */
++	idx = xnsched_weightq(q);
++	head = q->heads + idx;
++	XENO_BUG_ON(COBALT, list_empty(head));
++
++	/*
++	 * The active class (i.e. ->sched_class) is the one currently
++	 * queuing the thread, reflecting any priority boost due to
++	 * PI.
++	 */
++	thread = list_first_entry(head, struct xnthread, rlink);
++	if (unlikely(thread->sched_class != &xnsched_class_rt))
++		return thread->sched_class->sched_pick(sched);
++
++	del_q(q, &thread->rlink, idx);
++
++	return thread;
++}
++
++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */
++
++#else /* !CONFIG_XENO_OPT_SCALABLE_SCHED */
++
++struct xnthread *xnsched_findq(struct list_head *q, int prio)
++{
++	struct xnthread *thread;
++
++	if (list_empty(q))
++		return NULL;
++
++	/* Find thread leading a priority group. */
++	list_for_each_entry(thread, q, rlink) {
++		if (prio == thread->cprio)
++			return thread;
++	}
++
++	return NULL;
++}
++
++#ifdef CONFIG_XENO_OPT_SCHED_CLASSES
++
++struct xnthread *xnsched_rt_pick(struct xnsched *sched)
++{
++	struct list_head *q = &sched->rt.runnable;
++	struct xnthread *thread;
++
++	if (list_empty(q))
++		return NULL;
++
++	thread = list_first_entry(q, struct xnthread, rlink);
++	if (unlikely(thread->sched_class != &xnsched_class_rt))
++		return thread->sched_class->sched_pick(sched);
++
++	list_del(&thread->rlink);
++
++	return thread;
++}
++
++#endif /* CONFIG_XENO_OPT_SCHED_CLASSES */
++
++#endif /* !CONFIG_XENO_OPT_SCALABLE_SCHED */
++
++static inline void switch_context(struct xnsched *sched,
++				  struct xnthread *prev, struct xnthread *next)
++{
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	sched->last = prev;
++	sched->status |= XNINSW;
++	xnlock_clear_irqon(&nklock);
++#endif
++
++	xnarch_switch_to(prev, next);
++}
++
++/**
++ * @fn int xnsched_run(void)
++ * @brief The rescheduling procedure.
++ *
++ * This is the central rescheduling routine which should be called to
++ * validate and apply changes which have previously been made to the
++ * nucleus scheduling state, such as suspending, resuming or changing
++ * the priority of threads.  This call performs context switches as
++ * needed. xnsched_run() schedules out the current thread if:
++ *
++ * - the current thread is about to block.
++ * - a runnable thread from a higher priority scheduling class is
++ * waiting for the CPU.
++ * - the current thread does not lead the runnable threads from its
++ * own scheduling class (i.e. round-robin).
++ *
++ * The Cobalt core implements a lazy rescheduling scheme so that most
++ * of the services affecting the threads state MUST be followed by a
++ * call to the rescheduling procedure for the new scheduling state to
++ * be applied.
++ *
++ * In other words, multiple changes on the scheduler state can be done
++ * in a row, waking threads up, blocking others, without being
++ * immediately translated into the corresponding context switches.
++ * When all changes have been applied, xnsched_run() should be called
++ * for considering those changes, and possibly switching context.
++ *
++ * As a notable exception to the previous principle however, every
++ * action which ends up suspending the current thread begets an
++ * implicit call to the rescheduling procedure on behalf of the
++ * blocking service.
++ *
++ * Typically, self-suspension or sleeping on a synchronization object
++ * automatically leads to a call to the rescheduling procedure,
++ * therefore the caller does not need to explicitly issue
++ * xnsched_run() after such operations.
++ *
++ * The rescheduling procedure always leads to a null-effect if it is
++ * called on behalf of an interrupt service routine. Any outstanding
++ * scheduler lock held by the outgoing thread will be restored when
++ * the thread is scheduled back in.
++ *
++ * Calling this procedure with no applicable context switch pending is
++ * harmless and simply leads to a null-effect.
++ *
++ * @return Non-zero is returned if a context switch actually happened,
++ * otherwise zero if the current thread was left running.
++ *
++ * @coretags{unrestricted}
++ */
++static inline int test_resched(struct xnsched *sched)
++{
++	int resched = xnsched_resched_p(sched);
++#ifdef CONFIG_SMP
++	/* Send resched IPI to remote CPU(s). */
++	if (unlikely(!cpumask_empty(&sched->resched))) {
++		smp_mb();
++		ipipe_send_ipi(IPIPE_RESCHEDULE_IPI, sched->resched);
++		cpumask_clear(&sched->resched);
++	}
++#endif
++	sched->status &= ~XNRESCHED;
++
++	return resched;
++}
++
++static inline void enter_root(struct xnthread *root)
++{
++	struct xnarchtcb *rootcb __maybe_unused = xnthread_archtcb(root);
++
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	xntimer_stop(&root->sched->wdtimer);
++#endif
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	if (rootcb->core.mm == NULL)
++		set_ti_thread_flag(rootcb->core.tip, TIF_MMSWITCH_INT);
++#endif
++}
++
++static inline void leave_root(struct xnthread *root)
++{
++	struct xnarchtcb *rootcb = xnthread_archtcb(root);
++	struct task_struct *p = current;
++
++	ipipe_notify_root_preemption();
++	/* Remember the preempted Linux task pointer. */
++	rootcb->core.host_task = p;
++	rootcb->core.tsp = &p->thread;
++	rootcb->core.mm = rootcb->core.active_mm = ipipe_get_active_mm();
++	rootcb->core.tip = task_thread_info(p);
++	xnarch_leave_root(root);
++
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	xntimer_start(&root->sched->wdtimer, get_watchdog_timeout(),
++		      XN_INFINITE, XN_RELATIVE);
++#endif
++}
++
++void __xnsched_run_handler(void) /* hw interrupts off. */
++{
++	trace_cobalt_schedule_remote(xnsched_current());
++	xnsched_run();
++}
++
++static inline void do_lazy_user_work(struct xnthread *curr)
++{
++	xnthread_commit_ceiling(curr);
++}
++
++int ___xnsched_run(struct xnsched *sched)
++{
++	struct xnthread *prev, *next, *curr;
++	int switched, shadow;
++	spl_t s;
++
++	XENO_WARN_ON_ONCE(COBALT, !hard_irqs_disabled() && ipipe_root_p);
++
++	if (xnarch_escalate())
++		return 0;
++
++	trace_cobalt_schedule(sched);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	curr = sched->curr;
++	/*
++	 * CAUTION: xnthread_host_task(curr) may be unsynced and even
++	 * stale if curr = &rootcb, since the task logged by
++	 * leave_root() may not still be the current one. Use
++	 * "current" for disambiguating.
++	 */
++	xntrace_pid(task_pid_nr(current), xnthread_current_priority(curr));
++reschedule:
++	if (xnthread_test_state(curr, XNUSER))
++		do_lazy_user_work(curr);
++
++	switched = 0;
++	if (!test_resched(sched))
++		goto out;
++
++	next = xnsched_pick_next(sched);
++	if (next == curr) {
++		if (unlikely(xnthread_test_state(next, XNROOT))) {
++			if (sched->lflags & XNHTICK)
++				xnintr_host_tick(sched);
++			if (sched->lflags & XNHDEFER)
++				xnclock_program_shot(&nkclock, sched);
++		}
++		goto out;
++	}
++
++	prev = curr;
++
++	trace_cobalt_switch_context(prev, next);
++
++	/*
++	 * sched->curr is shared locklessly with xnsched_run() and
++	 * xnsched_lock(). WRITE_ONCE() makes sure sched->curr is
++	 * written atomically so that these routines always observe
++	 * consistent values by preventing the compiler from using
++	 * store tearing.
++	 */
++	WRITE_ONCE(sched->curr, next);
++	shadow = 1;
++
++	if (xnthread_test_state(prev, XNROOT)) {
++		leave_root(prev);
++		shadow = 0;
++	} else if (xnthread_test_state(next, XNROOT)) {
++		if (sched->lflags & XNHTICK)
++			xnintr_host_tick(sched);
++		if (sched->lflags & XNHDEFER)
++			xnclock_program_shot(&nkclock, sched);
++		enter_root(next);
++	}
++
++	xnstat_exectime_switch(sched, &next->stat.account);
++	xnstat_counter_inc(&next->stat.csw);
++
++	switch_context(sched, prev, next);
++
++	/*
++	 * Test whether we transitioned from primary mode to secondary
++	 * over a shadow thread, caused by a call to xnthread_relax().
++	 * In such a case, we are running over the regular schedule()
++	 * tail code, so we have to skip our tail code.
++	 */
++	if (shadow && ipipe_root_p)
++		goto shadow_epilogue;
++
++	switched = 1;
++	sched = xnsched_finish_unlocked_switch(sched);
++	/*
++	 * Re-read the currently running thread, this is needed
++	 * because of relaxed/hardened transitions.
++	 */
++	curr = sched->curr;
++	xnthread_switch_fpu(sched);
++	xntrace_pid(task_pid_nr(current), xnthread_current_priority(curr));
++out:
++	if (switched &&
++	    xnsched_maybe_resched_after_unlocked_switch(sched))
++		goto reschedule;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return switched;
++
++shadow_epilogue:
++	__ipipe_complete_domain_migration();
++
++	XENO_BUG_ON(COBALT, xnthread_current() == NULL);
++
++	/*
++	 * Interrupts must be disabled here (has to be done on entry
++	 * of the Linux [__]switch_to function), but it is what
++	 * callers expect, specifically the reschedule of an IRQ
++	 * handler that hit before we call xnsched_run in
++	 * xnthread_suspend() when relaxing a thread.
++	 */
++	XENO_BUG_ON(COBALT, !hard_irqs_disabled());
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(___xnsched_run);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static struct xnvfile_directory sched_vfroot;
++
++struct vfile_schedlist_priv {
++	struct xnthread *curr;
++	xnticks_t start_time;
++};
++
++struct vfile_schedlist_data {
++	int cpu;
++	pid_t pid;
++	char name[XNOBJECT_NAME_LEN];
++	char sched_class[XNOBJECT_NAME_LEN];
++	char personality[XNOBJECT_NAME_LEN];
++	int cprio;
++	xnticks_t timeout;
++	int state;
++};
++
++static struct xnvfile_snapshot_ops vfile_schedlist_ops;
++
++static struct xnvfile_snapshot schedlist_vfile = {
++	.privsz = sizeof(struct vfile_schedlist_priv),
++	.datasz = sizeof(struct vfile_schedlist_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_schedlist_ops,
++};
++
++static int vfile_schedlist_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_schedlist_priv *priv = xnvfile_iterator_priv(it);
++
++	/* &nkthreadq cannot be empty (root thread(s)). */
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++	priv->start_time = xnclock_read_monotonic(&nkclock);
++
++	return cobalt_nrthreads;
++}
++
++static int vfile_schedlist_next(struct xnvfile_snapshot_iterator *it,
++				void *data)
++{
++	struct vfile_schedlist_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_schedlist_data *p = data;
++	xnticks_t timeout, period;
++	struct xnthread *thread;
++	xnticks_t base_time;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->cprio = thread->cprio;
++	p->state = xnthread_get_state(thread);
++	if (thread->lock_count > 0)
++		p->state |= XNLOCK;
++	knamecpy(p->sched_class, thread->sched_class->name);
++	knamecpy(p->personality, thread->personality->name);
++	period = xnthread_get_period(thread);
++	base_time = priv->start_time;
++	if (xntimer_clock(&thread->ptimer) != &nkclock)
++		base_time = xnclock_read_monotonic(xntimer_clock(&thread->ptimer));
++	timeout = xnthread_get_timeout(thread, base_time);
++	/*
++	 * Here we cheat: thread is periodic and the sampling rate may
++	 * be high, so it is indeed possible that the next tick date
++	 * from the ptimer progresses fast enough while we are busy
++	 * collecting output data in this loop, so that next_date -
++	 * start_time > period. In such a case, we simply ceil the
++	 * value to period to keep the result meaningful, even if not
++	 * necessarily accurate. But what does accuracy mean when the
++	 * sampling frequency is high, and the way to read it has to
++	 * go through the vfile interface anyway?
++	 */
++	if (period > 0 && period < timeout &&
++	    !xntimer_running_p(&thread->rtimer))
++		timeout = period;
++
++	p->timeout = timeout;
++
++	return 1;
++}
++
++static int vfile_schedlist_show(struct xnvfile_snapshot_iterator *it,
++				void *data)
++{
++	struct vfile_schedlist_data *p = data;
++	char sbuf[64], pbuf[16], tbuf[16];
++
++	if (p == NULL)
++		xnvfile_printf(it,
++			       "%-3s  %-6s %-5s  %-8s  %-5s %-12s  %-10s %s\n",
++			       "CPU", "PID", "CLASS", "TYPE", "PRI", "TIMEOUT",
++			       "STAT", "NAME");
++	else {
++		ksformat(pbuf, sizeof(pbuf), "%3d", p->cprio);
++		xntimer_format_time(p->timeout, tbuf, sizeof(tbuf));
++		xnthread_format_status(p->state, sbuf, sizeof(sbuf));
++
++		xnvfile_printf(it,
++			       "%3u  %-6d %-5s  %-8s  %-5s %-12s  %-10s %s%s%s\n",
++			       p->cpu,
++			       p->pid,
++			       p->sched_class,
++			       p->personality,
++			       pbuf,
++			       tbuf,
++			       sbuf,
++			       (p->state & XNUSER) ? "" : "[",
++			       p->name,
++			       (p->state & XNUSER) ? "" : "]");
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_schedlist_ops = {
++	.rewind = vfile_schedlist_rewind,
++	.next = vfile_schedlist_next,
++	.show = vfile_schedlist_show,
++};
++
++#ifdef CONFIG_XENO_OPT_STATS
++
++static spl_t vfile_schedstat_lock_s;
++
++static int vfile_schedstat_get_lock(struct xnvfile *vfile)
++{
++	int ret;
++
++	ret = xnintr_get_query_lock();
++	if (ret < 0)
++		return ret;
++	xnlock_get_irqsave(&nklock, vfile_schedstat_lock_s);
++	return 0;
++}
++
++static void vfile_schedstat_put_lock(struct xnvfile *vfile)
++{
++	xnlock_put_irqrestore(&nklock, vfile_schedstat_lock_s);
++	xnintr_put_query_lock();
++}
++
++static struct xnvfile_lock_ops vfile_schedstat_lockops = {
++	.get = vfile_schedstat_get_lock,
++	.put = vfile_schedstat_put_lock,
++};
++
++struct vfile_schedstat_priv {
++	int irq;
++	struct xnthread *curr;
++	struct xnintr_iterator intr_it;
++};
++
++struct vfile_schedstat_data {
++	int cpu;
++	pid_t pid;
++	int state;
++	char name[XNOBJECT_NAME_LEN];
++	unsigned long ssw;
++	unsigned long csw;
++	unsigned long xsc;
++	unsigned long pf;
++	xnticks_t exectime_period;
++	xnticks_t account_period;
++	xnticks_t exectime_total;
++	struct xnsched_class *sched_class;
++	xnticks_t period;
++	int cprio;
++};
++
++static struct xnvfile_snapshot_ops vfile_schedstat_ops;
++
++static struct xnvfile_snapshot schedstat_vfile = {
++	.privsz = sizeof(struct vfile_schedstat_priv),
++	.datasz = sizeof(struct vfile_schedstat_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_schedstat_ops,
++	.entry = { .lockops = &vfile_schedstat_lockops },
++};
++
++static int vfile_schedstat_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_schedstat_priv *priv = xnvfile_iterator_priv(it);
++	int irqnr;
++
++	/*
++	 * The activity numbers on each valid interrupt descriptor are
++	 * grouped under a pseudo-thread.
++	 */
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++	priv->irq = 0;
++	irqnr = xnintr_query_init(&priv->intr_it) * num_online_cpus();
++
++	return irqnr + cobalt_nrthreads;
++}
++
++static int vfile_schedstat_next(struct xnvfile_snapshot_iterator *it,
++				void *data)
++{
++	struct vfile_schedstat_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_schedstat_data *p = data;
++	struct xnthread *thread;
++	struct xnsched *sched;
++	xnticks_t period;
++	int __maybe_unused ret;
++
++	if (priv->curr == NULL)
++		/*
++		 * We are done with actual threads, scan interrupt
++		 * descriptors.
++		 */
++		goto scan_irqs;
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	sched = thread->sched;
++	p->cpu = xnsched_cpu(sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->state = xnthread_get_state(thread);
++	if (thread->lock_count > 0)
++		p->state |= XNLOCK;
++	p->ssw = xnstat_counter_get(&thread->stat.ssw);
++	p->csw = xnstat_counter_get(&thread->stat.csw);
++	p->xsc = xnstat_counter_get(&thread->stat.xsc);
++	p->pf = xnstat_counter_get(&thread->stat.pf);
++	p->sched_class = thread->sched_class;
++	p->cprio = thread->cprio;
++	p->period = xnthread_get_period(thread);
++
++	period = sched->last_account_switch - thread->stat.lastperiod.start;
++	if (period == 0 && thread == sched->curr) {
++		p->exectime_period = 1;
++		p->account_period = 1;
++	} else {
++		p->exectime_period = thread->stat.account.total -
++			thread->stat.lastperiod.total;
++		p->account_period = period;
++	}
++	p->exectime_total = thread->stat.account.total;
++	thread->stat.lastperiod.total = thread->stat.account.total;
++	thread->stat.lastperiod.start = sched->last_account_switch;
++
++	return 1;
++
++scan_irqs:
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++	if (priv->irq >= IPIPE_NR_IRQS)
++		return 0;	/* All done. */
++
++	ret = xnintr_query_next(priv->irq, &priv->intr_it, p->name);
++	if (ret) {
++		if (ret == -EAGAIN)
++			xnvfile_touch(it->vfile); /* force rewind. */
++		priv->irq++;
++		return VFILE_SEQ_SKIP;
++	}
++
++	if (!xnsched_supported_cpu(priv->intr_it.cpu))
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = priv->intr_it.cpu;
++	p->csw = priv->intr_it.hits;
++	p->exectime_period = priv->intr_it.exectime_period;
++	p->account_period = priv->intr_it.account_period;
++	p->exectime_total = priv->intr_it.exectime_total;
++	p->pid = 0;
++	p->state =  0;
++	p->ssw = 0;
++	p->xsc = 0;
++	p->pf = 0;
++	p->sched_class = &xnsched_class_idle;
++	p->cprio = 0;
++	p->period = 0;
++
++	return 1;
++#else /* !CONFIG_XENO_OPT_STATS_IRQS */
++	return 0;
++#endif /* !CONFIG_XENO_OPT_STATS_IRQS */
++}
++
++static int vfile_schedstat_show(struct xnvfile_snapshot_iterator *it,
++				void *data)
++{
++	struct vfile_schedstat_data *p = data;
++	int usage = 0;
++
++	if (p == NULL)
++		xnvfile_printf(it,
++			       "%-3s  %-6s %-10s %-10s %-10s %-4s  %-8s  %5s"
++			       "  %s\n",
++			       "CPU", "PID", "MSW", "CSW", "XSC", "PF", "STAT", "%CPU",
++			       "NAME");
++	else {
++		if (p->account_period) {
++			while (p->account_period > 0xffffffffUL) {
++				p->exectime_period >>= 16;
++				p->account_period >>= 16;
++			}
++			usage = xnarch_ulldiv(p->exectime_period * 1000LL +
++					      (p->account_period >> 1),
++					      p->account_period, NULL);
++		}
++		xnvfile_printf(it,
++			       "%3u  %-6d %-10lu %-10lu %-10lu %-4lu  %.8x  %3u.%u"
++			       "  %s%s%s\n",
++			       p->cpu, p->pid, p->ssw, p->csw, p->xsc, p->pf, p->state,
++			       usage / 10, usage % 10,
++			       (p->state & XNUSER) ? "" : "[",
++			       p->name,
++			       (p->state & XNUSER) ? "" : "]");
++	}
++
++	return 0;
++}
++
++static int vfile_schedacct_show(struct xnvfile_snapshot_iterator *it,
++				void *data)
++{
++	struct vfile_schedstat_data *p = data;
++
++	if (p == NULL)
++		return 0;
++
++	xnvfile_printf(it, "%u %d %lu %lu %lu %lu %.8x %Lu %Lu %Lu %s %s %d %Lu\n",
++		       p->cpu, p->pid, p->ssw, p->csw, p->xsc, p->pf, p->state,
++		       xnclock_ticks_to_ns(&nkclock, p->account_period),
++		       xnclock_ticks_to_ns(&nkclock, p->exectime_period),
++		       xnclock_ticks_to_ns(&nkclock, p->exectime_total),
++		       p->name,
++		       p->sched_class->name,
++		       p->cprio,
++		       p->period);
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_schedstat_ops = {
++	.rewind = vfile_schedstat_rewind,
++	.next = vfile_schedstat_next,
++	.show = vfile_schedstat_show,
++};
++
++/*
++ * An accounting vfile is a thread statistics vfile in disguise with a
++ * different output format, which is parser-friendly.
++ */
++static struct xnvfile_snapshot_ops vfile_schedacct_ops;
++
++static struct xnvfile_snapshot schedacct_vfile = {
++	.privsz = sizeof(struct vfile_schedstat_priv),
++	.datasz = sizeof(struct vfile_schedstat_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_schedacct_ops,
++};
++
++static struct xnvfile_snapshot_ops vfile_schedacct_ops = {
++	.rewind = vfile_schedstat_rewind,
++	.next = vfile_schedstat_next,
++	.show = vfile_schedacct_show,
++};
++
++#endif /* CONFIG_XENO_OPT_STATS */
++
++#ifdef CONFIG_SMP
++
++static int affinity_vfile_show(struct xnvfile_regular_iterator *it,
++			       void *data)
++{
++	unsigned long val = 0;
++	int cpu;
++
++	for (cpu = 0; cpu < BITS_PER_LONG; cpu++)
++		if (cpumask_test_cpu(cpu, &cobalt_cpu_affinity))
++			val |= (1UL << cpu);
++
++	xnvfile_printf(it, "%08lx\n", val);
++
++	return 0;
++}
++
++static ssize_t affinity_vfile_store(struct xnvfile_input *input)
++{
++	cpumask_t affinity, set;
++	ssize_t ret;
++	long val;
++	int cpu;
++	spl_t s;
++
++	ret = xnvfile_get_integer(input, &val);
++	if (ret < 0)
++		return ret;
++
++	if (val == 0)
++		affinity = xnsched_realtime_cpus; /* Reset to default. */
++	else {
++		cpumask_clear(&affinity);
++		for (cpu = 0; cpu < BITS_PER_LONG; cpu++, val >>= 1) {
++			if (val & 1)
++				cpumask_set_cpu(cpu, &affinity);
++		}
++	}
++
++	cpumask_and(&set, &affinity, cpu_online_mask);
++	if (cpumask_empty(&set))
++		return -EINVAL;
++
++	/*
++	 * The new dynamic affinity must be a strict subset of the
++	 * static set of supported CPUs.
++	 */
++	cpumask_or(&set, &affinity, &xnsched_realtime_cpus);
++	if (!cpumask_equal(&set, &xnsched_realtime_cpus))
++		return -EINVAL;
++
++	xnlock_get_irqsave(&nklock, s);
++	cobalt_cpu_affinity = affinity;
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++static struct xnvfile_regular_ops affinity_vfile_ops = {
++	.show = affinity_vfile_show,
++	.store = affinity_vfile_store,
++};
++
++static struct xnvfile_regular affinity_vfile = {
++	.ops = &affinity_vfile_ops,
++};
++
++#endif /* CONFIG_SMP */
++
++int xnsched_init_proc(void)
++{
++	struct xnsched_class *p;
++	int ret;
++
++	ret = xnvfile_init_dir("sched", &sched_vfroot, &cobalt_vfroot);
++	if (ret)
++		return ret;
++
++	ret = xnvfile_init_snapshot("threads", &schedlist_vfile, &sched_vfroot);
++	if (ret)
++		return ret;
++
++	for_each_xnsched_class(p) {
++		if (p->sched_init_vfile) {
++			ret = p->sched_init_vfile(p, &sched_vfroot);
++			if (ret)
++				return ret;
++		}
++	}
++
++#ifdef CONFIG_XENO_OPT_STATS
++	ret = xnvfile_init_snapshot("stat", &schedstat_vfile, &sched_vfroot);
++	if (ret)
++		return ret;
++	ret = xnvfile_init_snapshot("acct", &schedacct_vfile, &sched_vfroot);
++	if (ret)
++		return ret;
++#endif /* CONFIG_XENO_OPT_STATS */
++
++#ifdef CONFIG_SMP
++	xnvfile_init_regular("affinity", &affinity_vfile, &cobalt_vfroot);
++#endif /* CONFIG_SMP */
++
++	return 0;
++}
++
++void xnsched_cleanup_proc(void)
++{
++	struct xnsched_class *p;
++
++	for_each_xnsched_class(p) {
++		if (p->sched_cleanup_vfile)
++			p->sched_cleanup_vfile(p);
++	}
++
++#ifdef CONFIG_SMP
++	xnvfile_destroy_regular(&affinity_vfile);
++#endif /* CONFIG_SMP */
++#ifdef CONFIG_XENO_OPT_STATS
++	xnvfile_destroy_snapshot(&schedacct_vfile);
++	xnvfile_destroy_snapshot(&schedstat_vfile);
++#endif /* CONFIG_XENO_OPT_STATS */
++	xnvfile_destroy_snapshot(&schedlist_vfile);
++	xnvfile_destroy_dir(&sched_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++/** @} */
+--- linux/kernel/xenomai/select.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/select.c	2021-04-07 16:01:25.864636114 +0800
+@@ -0,0 +1,460 @@
++/*
++ * Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ * Copyright (C) 2008 Efixo
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include <linux/bitops.h>	/* For hweight_long */
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/select.h>
++#include <cobalt/kernel/apc.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_select Synchronous I/O multiplexing
++ *
++ * This module implements the services needed for implementing the
++ * POSIX select() service, or any other event multiplexing services.
++ *
++ * Following the implementation of the posix select service, this module defines
++ * three types of events:
++ * - \a XNSELECT_READ meaning that a file descriptor is ready for reading;
++ * - \a XNSELECT_WRITE meaning that a file descriptor is ready for writing;
++ * - \a XNSELECT_EXCEPT meaning that a file descriptor received an exceptional
++ *   event.
++ *
++ * It works by defining two structures:
++ * - a @a struct @a xnselect structure, which should be added to every file
++ * descriptor for every event type (read, write, or except);
++ * - a @a struct @a xnselector structure, the selection structure,  passed by
++ * the thread calling the xnselect service, where this service does all its
++ * housekeeping.
++ * @{
++ */
++
++static LIST_HEAD(selector_list);
++static int deletion_apc;
++
++/**
++ * Initialize a @a struct @a xnselect structure.
++ *
++ * This service must be called to initialize a @a struct @a xnselect structure
++ * before it is bound to a selector by the means of xnselect_bind().
++ *
++ * @param select_block pointer to the xnselect structure to be initialized
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnselect_init(struct xnselect *select_block)
++{
++	INIT_LIST_HEAD(&select_block->bindings);
++}
++EXPORT_SYMBOL_GPL(xnselect_init);
++
++static inline int xnselect_wakeup(struct xnselector *selector)
++{
++	return xnsynch_flush(&selector->synchbase, 0) == XNSYNCH_RESCHED;
++}
++
++/**
++ * Bind a file descriptor (represented by its @a xnselect structure) to a
++ * selector block.
++ *
++ * @param select_block pointer to the @a struct @a xnselect to be bound;
++ *
++ * @param binding pointer to a newly allocated (using xnmalloc) @a struct
++ * @a xnselect_binding;
++ *
++ * @param selector pointer to the selector structure;
++ *
++ * @param type type of events (@a XNSELECT_READ, @a XNSELECT_WRITE, or @a
++ * XNSELECT_EXCEPT);
++ *
++ * @param index index of the file descriptor (represented by @a
++ * select_block) in the bit fields used by the @a selector structure;
++ *
++ * @param state current state of the file descriptor.
++ *
++ * @a select_block must have been initialized with xnselect_init(),
++ * the @a xnselector structure must have been initialized with
++ * xnselector_init(), @a binding may be uninitialized.
++ *
++ * This service must be called with nklock locked, irqs off. For this reason,
++ * the @a binding parameter must have been allocated by the caller outside the
++ * locking section.
++ *
++ * @retval -EINVAL if @a type or @a index is invalid;
++ * @retval 0 otherwise.
++ *
++ * @coretags{task-unrestricted, might-switch, atomic-entry}
++ */
++int xnselect_bind(struct xnselect *select_block,
++		  struct xnselect_binding *binding,
++		  struct xnselector *selector,
++		  unsigned type,
++		  unsigned index,
++		  unsigned state)
++{
++	atomic_only();
++
++	if (type >= XNSELECT_MAX_TYPES || index > __FD_SETSIZE)
++		return -EINVAL;
++
++	binding->selector = selector;
++	binding->fd = select_block;
++	binding->type = type;
++	binding->bit_index = index;
++
++	list_add_tail(&binding->slink, &selector->bindings);
++	list_add_tail(&binding->link, &select_block->bindings);
++	__FD_SET__(index, &selector->fds[type].expected);
++	if (state) {
++		__FD_SET__(index, &selector->fds[type].pending);
++		if (xnselect_wakeup(selector))
++			xnsched_run();
++	} else
++		__FD_CLR__(index, &selector->fds[type].pending);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnselect_bind);
++
++/* Must be called with nklock locked irqs off */
++int __xnselect_signal(struct xnselect *select_block, unsigned state)
++{
++	struct xnselect_binding *binding;
++	struct xnselector *selector;
++	int resched = 0;
++
++	list_for_each_entry(binding, &select_block->bindings, link) {
++		selector = binding->selector;
++		if (state) {
++			if (!__FD_ISSET__(binding->bit_index,
++					&selector->fds[binding->type].pending)) {
++				__FD_SET__(binding->bit_index,
++					 &selector->fds[binding->type].pending);
++				if (xnselect_wakeup(selector))
++					resched = 1;
++			}
++		} else
++			__FD_CLR__(binding->bit_index,
++				 &selector->fds[binding->type].pending);
++	}
++
++	return resched;
++}
++EXPORT_SYMBOL_GPL(__xnselect_signal);
++
++/**
++ * Destroy the @a xnselect structure associated with a file descriptor.
++ *
++ * Any binding with a @a xnselector block is destroyed.
++ *
++ * @param select_block pointer to the @a xnselect structure associated
++ * with a file descriptor
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++void xnselect_destroy(struct xnselect *select_block)
++{
++	struct xnselect_binding *binding, *tmp;
++	struct xnselector *selector;
++	int resched = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(&select_block->bindings))
++		goto out;
++
++	list_for_each_entry_safe(binding, tmp, &select_block->bindings, link) {
++		list_del(&binding->link);
++		selector = binding->selector;
++		__FD_CLR__(binding->bit_index,
++			 &selector->fds[binding->type].expected);
++		if (!__FD_ISSET__(binding->bit_index,
++				&selector->fds[binding->type].pending)) {
++			__FD_SET__(binding->bit_index,
++				 &selector->fds[binding->type].pending);
++			if (xnselect_wakeup(selector))
++				resched = 1;
++		}
++		list_del(&binding->slink);
++		xnlock_put_irqrestore(&nklock, s);
++		xnfree(binding);
++		xnlock_get_irqsave(&nklock, s);
++	}
++	if (resched)
++		xnsched_run();
++out:
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnselect_destroy);
++
++static unsigned
++fd_set_andnot(fd_set *result, fd_set *first, fd_set *second, unsigned n)
++{
++	unsigned i, not_empty = 0;
++
++	for (i = 0; i < __FDELT__(n); i++)
++		if((result->fds_bits[i] =
++		    first->fds_bits[i] & ~(second->fds_bits[i])))
++			not_empty = 1;
++
++	if (i < __FDSET_LONGS__
++	    && (result->fds_bits[i] =
++		first->fds_bits[i] & ~(second->fds_bits[i]) & (__FDMASK__(n) - 1)))
++		not_empty = 1;
++
++	return not_empty;
++}
++
++static unsigned
++fd_set_and(fd_set *result, fd_set *first, fd_set *second, unsigned n)
++{
++	unsigned i, not_empty = 0;
++
++	for (i = 0; i < __FDELT__(n); i++)
++		if((result->fds_bits[i] =
++		    first->fds_bits[i] & second->fds_bits[i]))
++			not_empty = 1;
++
++	if (i < __FDSET_LONGS__
++	    && (result->fds_bits[i] =
++		first->fds_bits[i] & second->fds_bits[i] & (__FDMASK__(n) - 1)))
++		not_empty = 1;
++
++	return not_empty;
++}
++
++static void fd_set_zeropad(fd_set *set, unsigned n)
++{
++	unsigned i;
++
++	i = __FDELT__(n);
++
++	if (i < __FDSET_LONGS__)
++		set->fds_bits[i] &= (__FDMASK__(n) - 1);
++
++	for(i++; i < __FDSET_LONGS__; i++)
++		set->fds_bits[i] = 0;
++}
++
++static unsigned fd_set_popcount(fd_set *set, unsigned n)
++{
++	unsigned count = 0, i;
++
++	for (i = 0; i < __FDELT__(n); i++)
++		if (set->fds_bits[i])
++			count += hweight_long(set->fds_bits[i]);
++
++	if (i < __FDSET_LONGS__ && (set->fds_bits[i] & (__FDMASK__(n) - 1)))
++		count += hweight_long(set->fds_bits[i] & (__FDMASK__(n) - 1));
++
++	return count;
++}
++
++/**
++ * Initialize a selector structure.
++ *
++ * @param selector The selector structure to be initialized.
++ *
++ * @retval 0
++ *
++ * @coretags{task-unrestricted}
++ */
++int xnselector_init(struct xnselector *selector)
++{
++	unsigned int i;
++
++	xnsynch_init(&selector->synchbase, XNSYNCH_FIFO, NULL);
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++) {
++		__FD_ZERO__(&selector->fds[i].expected);
++		__FD_ZERO__(&selector->fds[i].pending);
++	}
++	INIT_LIST_HEAD(&selector->bindings);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnselector_init);
++
++/**
++ * Check the state of a number of file descriptors, wait for a state change if
++ * no descriptor is ready.
++ *
++ * @param selector structure to check for pending events
++ * @param out_fds The set of descriptors with pending events if a strictly positive number is returned, or the set of descriptors not yet bound if -ECHRNG is returned;
++ * @param in_fds the set of descriptors which events should be checked
++ * @param nfds the highest-numbered descriptor in any of the @a in_fds sets, plus 1;
++ * @param timeout the timeout, whose meaning depends on @a timeout_mode, note
++ * that xnselect() pass @a timeout and @a timeout_mode unchanged to
++ * xnsynch_sleep_on, so passing a relative value different from XN_INFINITE as a
++ * timeout with @a timeout_mode set to XN_RELATIVE, will cause a longer sleep
++ * than expected if the sleep is interrupted.
++ * @param timeout_mode the mode of @a timeout.
++ *
++ * @retval -EINVAL if @a nfds is negative;
++ * @retval -ECHRNG if some of the descriptors passed in @a in_fds have not yet
++ * been registered with xnselect_bind(), @a out_fds contains the set of such
++ * descriptors;
++ * @retval -EINTR if @a xnselect was interrupted while waiting;
++ * @retval 0 in case of timeout.
++ * @retval the number of file descriptors having received an event.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int xnselect(struct xnselector *selector,
++	     fd_set *out_fds[XNSELECT_MAX_TYPES],
++	     fd_set *in_fds[XNSELECT_MAX_TYPES],
++	     int nfds,
++	     xnticks_t timeout, xntmode_t timeout_mode)
++{
++	unsigned int i, not_empty = 0, count;
++	int info = 0;
++	spl_t s;
++
++	if ((unsigned) nfds > __FD_SETSIZE)
++		return -EINVAL;
++
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (out_fds[i])
++			fd_set_zeropad(out_fds[i], nfds);
++
++	xnlock_get_irqsave(&nklock, s);
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (out_fds[i]
++		    && fd_set_andnot(out_fds[i], in_fds[i],
++				     &selector->fds[i].expected, nfds))
++			not_empty = 1;
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (not_empty)
++		return -ECHRNG;
++
++	xnlock_get_irqsave(&nklock, s);
++	for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++		if (out_fds[i]
++		    && fd_set_and(out_fds[i], in_fds[i],
++				  &selector->fds[i].pending, nfds))
++			not_empty = 1;
++
++	while (!not_empty) {
++		info = xnsynch_sleep_on(&selector->synchbase,
++					timeout, timeout_mode);
++
++		for (i = 0; i < XNSELECT_MAX_TYPES; i++)
++			if (out_fds[i]
++			    && fd_set_and(out_fds[i], in_fds[i],
++					  &selector->fds[i].pending, nfds))
++				not_empty = 1;
++
++		if (info & (XNBREAK | XNTIMEO))
++			break;
++	}
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (not_empty) {
++		for (count = 0, i = 0; i < XNSELECT_MAX_TYPES; i++)
++			if (out_fds[i])
++				count += fd_set_popcount(out_fds[i], nfds);
++
++		return count;
++	}
++
++	if (info & XNBREAK)
++		return -EINTR;
++
++	return 0; /* Timeout */
++}
++EXPORT_SYMBOL_GPL(xnselect);
++
++/**
++ * Destroy a selector block.
++ *
++ * All bindings with file descriptor are destroyed.
++ *
++ * @param selector the selector block to be destroyed
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnselector_destroy(struct xnselector *selector)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	list_add_tail(&selector->destroy_link, &selector_list);
++	__xnapc_schedule(deletion_apc);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnselector_destroy);
++
++static void xnselector_destroy_loop(void *cookie)
++{
++	struct xnselect_binding *binding, *tmpb;
++	struct xnselector *selector, *tmps;
++	struct xnselect *fd;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(&selector_list))
++		goto out;
++
++	list_for_each_entry_safe(selector, tmps, &selector_list, destroy_link) {
++		list_del(&selector->destroy_link);
++		if (list_empty(&selector->bindings))
++			goto release;
++		list_for_each_entry_safe(binding, tmpb, &selector->bindings, slink) {
++			list_del(&binding->slink);
++			fd = binding->fd;
++			list_del(&binding->link);
++			xnlock_put_irqrestore(&nklock, s);
++			xnfree(binding);
++			xnlock_get_irqsave(&nklock, s);
++		}
++	release:
++		xnsynch_destroy(&selector->synchbase);
++		xnsched_run();
++		xnlock_put_irqrestore(&nklock, s);
++
++		xnfree(selector);
++
++		xnlock_get_irqsave(&nklock, s);
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++int xnselect_mount(void)
++{
++	deletion_apc = xnapc_alloc("selector_list_destroy",
++				   xnselector_destroy_loop, NULL);
++	if (deletion_apc < 0)
++		return deletion_apc;
++
++	return 0;
++}
++
++int xnselect_umount(void)
++{
++	xnapc_free(deletion_apc);
++	return 0;
++}
++
++/** @} */
+--- linux/kernel/xenomai/bufd.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/bufd.c	2021-04-07 16:01:25.859636121 +0800
+@@ -0,0 +1,653 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/bufd.h>
++#include <cobalt/kernel/assert.h>
++#include <asm/xenomai/syscall.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_bufd Buffer descriptor
++ *
++ * Abstraction for copying data to/from different address spaces
++ *
++ * A buffer descriptor is a simple abstraction dealing with copy
++ * operations to/from memory buffers which may belong to different
++ * address spaces.
++ *
++ * To this end, the buffer descriptor library provides a small set of
++ * copy routines which are aware of address space restrictions when
++ * moving data, and a generic container type which can hold a
++ * reference to - or cover - a particular memory area, either present
++ * in kernel space, or in any of the existing user memory contexts.
++ *
++ * The goal of the buffer descriptor abstraction is to hide address
++ * space specifics from Xenomai services dealing with memory areas,
++ * allowing them to operate on multiple address spaces seamlessly.
++ *
++ * The common usage patterns are as follows:
++ *
++ * - Implementing a Xenomai syscall returning a bulk of data to the
++ *   caller, which may have to be copied back to either kernel or user
++ *   space:
++ *
++ *   @code
++ *   [Syscall implementation]
++ *   ssize_t rt_bulk_read_inner(struct xnbufd *bufd)
++ *   {
++ *       ssize_t ret;
++ *       size_t len;
++ *       void *bulk;
++ *
++ *       bulk = get_next_readable_bulk(&len);
++ *       ret = xnbufd_copy_from_kmem(bufd, bulk, min(bufd->b_len, len));
++ *       free_bulk(bulk);
++ *
++ *       ret = this_may_fail();
++ *       if (ret)
++ *	       xnbufd_invalidate(bufd);
++ *
++ *       return ret;
++ *   }
++ *
++ *   [Kernel wrapper for in-kernel calls]
++ *   int rt_bulk_read(void *ptr, size_t len)
++ *   {
++ *       struct xnbufd bufd;
++ *       ssize_t ret;
++ *
++ *       xnbufd_map_kwrite(&bufd, ptr, len);
++ *       ret = rt_bulk_read_inner(&bufd);
++ *       xnbufd_unmap_kwrite(&bufd);
++ *
++ *       return ret;
++ *   }
++ *
++ *   [Userland trampoline for user syscalls]
++ *   int __rt_bulk_read(struct pt_regs *regs)
++ *   {
++ *       struct xnbufd bufd;
++ *       void __user *ptr;
++ *       ssize_t ret;
++ *       size_t len;
++ *
++ *       ptr = (void __user *)__xn_reg_arg1(regs);
++ *       len = __xn_reg_arg2(regs);
++ *
++ *       xnbufd_map_uwrite(&bufd, ptr, len);
++ *       ret = rt_bulk_read_inner(&bufd);
++ *       xnbufd_unmap_uwrite(&bufd);
++ *
++ *       return ret;
++ *   }
++ *   @endcode
++ *
++ * - Implementing a Xenomai syscall receiving a bulk of data from the
++ *   caller, which may have to be read from either kernel or user
++ *   space:
++ *
++ *   @code
++ *   [Syscall implementation]
++ *   ssize_t rt_bulk_write_inner(struct xnbufd *bufd)
++ *   {
++ *       void *bulk = get_free_bulk(bufd->b_len);
++ *       return xnbufd_copy_to_kmem(bulk, bufd, bufd->b_len);
++ *   }
++ *
++ *   [Kernel wrapper for in-kernel calls]
++ *   int rt_bulk_write(const void *ptr, size_t len)
++ *   {
++ *       struct xnbufd bufd;
++ *       ssize_t ret;
++ *
++ *       xnbufd_map_kread(&bufd, ptr, len);
++ *       ret = rt_bulk_write_inner(&bufd);
++ *       xnbufd_unmap_kread(&bufd);
++ *
++ *       return ret;
++ *   }
++ *
++ *   [Userland trampoline for user syscalls]
++ *   int __rt_bulk_write(struct pt_regs *regs)
++ *   {
++ *       struct xnbufd bufd;
++ *       void __user *ptr;
++ *       ssize_t ret;
++ *       size_t len;
++ *
++ *       ptr = (void __user *)__xn_reg_arg1(regs);
++ *       len = __xn_reg_arg2(regs);
++ *
++ *       xnbufd_map_uread(&bufd, ptr, len);
++ *       ret = rt_bulk_write_inner(&bufd);
++ *       xnbufd_unmap_uread(&bufd);
++ *
++ *       return ret;
++ *   }
++ *   @endcode
++ *
++ *@{*/
++
++/**
++ * @fn void xnbufd_map_kread(struct xnbufd *bufd, const void *ptr, size_t len)
++ * @brief Initialize a buffer descriptor for reading from kernel memory.
++ *
++ * The new buffer descriptor may be used to copy data from kernel
++ * memory. This routine should be used in pair with
++ * xnbufd_unmap_kread().
++ *
++ * @param bufd The address of the buffer descriptor which will map a
++ * @a len bytes kernel memory area, starting from @a ptr.
++ *
++ * @param ptr The start of the kernel buffer to map.
++ *
++ * @param len The length of the kernel buffer starting at @a ptr.
++ *
++ * @coretags{unrestricted}
++ */
++
++/**
++ * @fn void xnbufd_map_kwrite(struct xnbufd *bufd, void *ptr, size_t len)
++ * @brief Initialize a buffer descriptor for writing to kernel memory.
++ *
++ * The new buffer descriptor may be used to copy data to kernel
++ * memory. This routine should be used in pair with
++ * xnbufd_unmap_kwrite().
++ *
++ * @param bufd The address of the buffer descriptor which will map a
++ * @a len bytes kernel memory area, starting from @a ptr.
++ *
++ * @param ptr The start of the kernel buffer to map.
++ *
++ * @param len The length of the kernel buffer starting at @a ptr.
++ *
++ * @coretags{unrestricted}
++ */
++void xnbufd_map_kmem(struct xnbufd *bufd, void *ptr, size_t len)
++{
++	bufd->b_ptr = ptr;
++	bufd->b_len = len;
++	bufd->b_mm = NULL;
++	bufd->b_off = 0;
++	bufd->b_carry = NULL;
++}
++EXPORT_SYMBOL_GPL(xnbufd_map_kmem);
++
++/**
++ * @fn void xnbufd_map_uread(struct xnbufd *bufd, const void __user *ptr, size_t len)
++ * @brief Initialize a buffer descriptor for reading from user memory.
++ *
++ * The new buffer descriptor may be used to copy data from user
++ * memory. This routine should be used in pair with
++ * xnbufd_unmap_uread().
++ *
++ * @param bufd The address of the buffer descriptor which will map a
++ * @a len bytes user memory area, starting from @a ptr. @a ptr is
++ * never dereferenced directly, since it may refer to a buffer that
++ * lives in another address space.
++ *
++ * @param ptr The start of the user buffer to map.
++ *
++ * @param len The length of the user buffer starting at @a ptr.
++ *
++ * @coretags{task-unrestricted}
++ */
++
++/**
++ * @fn void xnbufd_map_uwrite(struct xnbufd *bufd, void __user *ptr, size_t len)
++ * @brief Initialize a buffer descriptor for writing to user memory.
++ *
++ * The new buffer descriptor may be used to copy data to user
++ * memory. This routine should be used in pair with
++ * xnbufd_unmap_uwrite().
++ *
++ * @param bufd The address of the buffer descriptor which will map a
++ * @a len bytes user memory area, starting from @a ptr. @a ptr is
++ * never dereferenced directly, since it may refer to a buffer that
++ * lives in another address space.
++ *
++ * @param ptr The start of the user buffer to map.
++ *
++ * @param len The length of the user buffer starting at @a ptr.
++ *
++ * @coretags{task-unrestricted}
++ */
++
++void xnbufd_map_umem(struct xnbufd *bufd, void __user *ptr, size_t len)
++{
++	bufd->b_ptr = ptr;
++	bufd->b_len = len;
++	bufd->b_mm = current->mm;
++	bufd->b_off = 0;
++	bufd->b_carry = NULL;
++}
++EXPORT_SYMBOL_GPL(xnbufd_map_umem);
++
++/**
++ * @fn ssize_t xnbufd_copy_to_kmem(void *to, struct xnbufd *bufd, size_t len)
++ * @brief Copy memory covered by a buffer descriptor to kernel memory.
++ *
++ * This routine copies @a len bytes from the area referred to by the
++ * buffer descriptor @a bufd to the kernel memory area @a to.
++ * xnbufd_copy_to_kmem() tracks the read offset within the source
++ * memory internally, so that it may be called several times in a
++ * loop, until the entire memory area is loaded.
++ *
++ * The source address space is dealt with, according to the following
++ * rules:
++ *
++ * - if @a bufd refers to readable kernel area (i.e. see
++ *   xnbufd_map_kread()), the copy is immediately and fully performed
++ *   with no restriction.
++ *
++ * - if @a bufd refers to a readable user area (i.e. see
++ *   xnbufd_map_uread()), the copy is performed only if that area
++ *   lives in the currently active address space, and only if the
++ *   caller may sleep Linux-wise to process any potential page fault
++ *   which may arise while reading from that memory.
++ *
++ * - any attempt to read from @a bufd from a non-suitable context is
++ *   considered as a bug, and will raise a panic assertion when the
++ *   nucleus is compiled in debug mode.
++ *
++ * @param to The start address of the kernel memory to copy to.
++ *
++ * @param bufd The address of the buffer descriptor covering the user
++ * memory to copy data from.
++ *
++ * @param len The length of the user memory to copy from @a bufd.
++ *
++ * @return The number of bytes read so far from the memory area
++ * covered by @a ubufd. Otherwise:
++ *
++ * - -EINVAL is returned upon attempt to read from the user area from
++ *   an invalid context. This error is only returned when the debug
++ *   mode is disabled; otherwise a panic assertion is raised.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note Calling this routine while holding the nklock and/or running
++ * with interrupts disabled is invalid, and doing so will trigger a
++ * debug assertion.
++ *
++ * This routine may switch the caller to secondary mode if a page
++ * fault occurs while reading from the user area. For that reason,
++ * xnbufd_copy_to_kmem() may only be called from a preemptible section
++ * (Linux-wise).
++ */
++ssize_t xnbufd_copy_to_kmem(void *to, struct xnbufd *bufd, size_t len)
++{
++	caddr_t from;
++
++	thread_only();
++
++	if (len == 0)
++		goto out;
++
++	from = bufd->b_ptr + bufd->b_off;
++
++	/*
++	 * If the descriptor covers a source buffer living in the
++	 * kernel address space, we may read from it directly.
++	 */
++	if (bufd->b_mm == NULL) {
++		memcpy(to, from, len);
++		goto advance_offset;
++	}
++
++	/*
++	 * We want to read data from user-space, check whether:
++	 * 1) the source buffer lies in the current address space,
++	 * 2) we may fault while reading from the buffer directly.
++	 *
++	 * If we can't reach the buffer, or the current context may
++	 * not fault while reading data from it, copy_from_user() is
++	 * not an option and we have a bug somewhere, since there is
++	 * no way we could fetch the data to kernel space immediately.
++	 *
++	 * Note that we don't check for non-preemptible Linux context
++	 * here, since the source buffer would live in kernel space in
++	 * such a case.
++	 */
++	if (current->mm == bufd->b_mm) {
++		preemptible_only();
++		if (cobalt_copy_from_user(to, (void __user *)from, len))
++			return -EFAULT;
++		goto advance_offset;
++	}
++
++	XENO_BUG(COBALT);
++
++	return -EINVAL;
++
++advance_offset:
++	bufd->b_off += len;
++out:
++	return (ssize_t)bufd->b_off;
++}
++EXPORT_SYMBOL_GPL(xnbufd_copy_to_kmem);
++
++/**
++ * @fn ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd, void *from, size_t len)
++ * @brief Copy kernel memory to the area covered by a buffer descriptor.
++ *
++ * This routine copies @a len bytes from the kernel memory starting at
++ * @a from to the area referred to by the buffer descriptor @a
++ * bufd. xnbufd_copy_from_kmem() tracks the write offset within the
++ * destination memory internally, so that it may be called several
++ * times in a loop, until the entire memory area is stored.
++ *
++ * The destination address space is dealt with, according to the
++ * following rules:
++ *
++ * - if @a bufd refers to a writable kernel area (i.e. see
++ *   xnbufd_map_kwrite()), the copy is immediatly and fully performed
++ *   with no restriction.
++ *
++ * - if @a bufd refers to a writable user area (i.e. see
++ *   xnbufd_map_uwrite()), the copy is performed only if that area
++ *   lives in the currently active address space, and only if the
++ *   caller may sleep Linux-wise to process any potential page fault
++ *   which may arise while writing to that memory.
++ *
++ * - if @a bufd refers to a user area which may not be immediately
++ *   written to from the current context, the copy is postponed until
++ *   xnbufd_unmap_uwrite() is invoked for @a ubufd, at which point the
++ *   copy will take place. In such a case, the source memory is
++ *   transferred to a carry over buffer allocated internally; this
++ *   operation may lead to request dynamic memory from the nucleus
++ *   heap if @a len is greater than 64 bytes.
++ *
++ * @param bufd The address of the buffer descriptor covering the user
++ * memory to copy data to.
++ *
++ * @param from The start address of the kernel memory to copy from.
++ *
++ * @param len The length of the kernel memory to copy to @a bufd.
++ *
++ * @return The number of bytes written so far to the memory area
++ * covered by @a ubufd. Otherwise,
++ *
++ * - -ENOMEM is returned when no memory is available from the nucleus
++ *    heap to allocate the carry over buffer.
++ *
++ * @coretags{unrestricted}
++ *
++ * @note Calling this routine while holding the nklock and/or running
++ * with interrupts disabled is invalid, and doing so will trigger a
++ * debug assertion.
++ *
++ * This routine may switch the caller to secondary mode if a page
++ * fault occurs while reading from the user area. For that reason,
++ * xnbufd_copy_to_kmem() may only be called from a preemptible section
++ * (Linux-wise).
++ */
++ssize_t xnbufd_copy_from_kmem(struct xnbufd *bufd, void *from, size_t len)
++{
++	caddr_t to;
++
++	thread_only();
++
++	if (len == 0)
++		goto out;
++
++	to = bufd->b_ptr + bufd->b_off;
++
++	/*
++	 * If the descriptor covers a destination buffer living in the
++	 * kernel address space, we may copy to it directly.
++	 */
++	if (bufd->b_mm == NULL)
++		goto direct_copy;
++
++	/*
++	 * We want to pass data to user-space, check whether:
++	 * 1) the destination buffer lies in the current address space,
++	 * 2) we may fault while writing to the buffer directly.
++	 *
++	 * If we can't reach the buffer, or the current context may
++	 * not fault while copying data to it, copy_to_user() is not
++	 * an option and we have to convey the data from kernel memory
++	 * through the carry over buffer.
++	 *
++	 * Note that we don't check for non-preemptible Linux context
++	 * here: feeding a RT activity with data from a non-RT context
++	 * is wrong in the first place, so never mind.
++	 */
++	if (current->mm == bufd->b_mm) {
++		preemptible_only();
++		if (cobalt_copy_to_user((void __user *)to, from, len))
++			return -EFAULT;
++		goto advance_offset;
++	}
++
++	/*
++	 * We need a carry over buffer to convey the data to
++	 * user-space. xnbufd_unmap_uwrite() should be called on the
++	 * way back to user-space to update the destination buffer
++	 * from the carry over area.
++	 */
++	if (bufd->b_carry == NULL) {
++		/*
++		 * Try to use the fast carry over area available
++		 * directly from the descriptor for short messages, to
++		 * save a dynamic allocation request.
++		 */
++		if (bufd->b_len <= sizeof(bufd->b_buf))
++			bufd->b_carry = bufd->b_buf;
++		else {
++			bufd->b_carry = xnmalloc(bufd->b_len);
++			if (bufd->b_carry == NULL)
++				return -ENOMEM;
++		}
++		to = bufd->b_carry;
++	} else
++		to = bufd->b_carry + bufd->b_off;
++
++direct_copy:
++	memcpy(to, from, len);
++
++advance_offset:
++	bufd->b_off += len;
++out:
++	return (ssize_t)bufd->b_off;
++}
++EXPORT_SYMBOL_GPL(xnbufd_copy_from_kmem);
++
++/**
++ * @fn void xnbufd_unmap_uread(struct xnbufd *bufd)
++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_uread().
++ *
++ * This routine finalizes a buffer descriptor previously initialized
++ * by a call to xnbufd_map_uread(), to read data from a user area.
++ *
++ * @param bufd The address of the buffer descriptor to finalize.
++ *
++ * @return The number of bytes read so far from the memory area
++ * covered by @a ubufd.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note Calling this routine while holding the nklock and/or running
++ * with interrupts disabled is invalid, and doing so will trigger a
++ * debug assertion.
++ */
++ssize_t xnbufd_unmap_uread(struct xnbufd *bufd)
++{
++	preemptible_only();
++
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	bufd->b_ptr = (caddr_t)-1;
++#endif
++	return bufd->b_off;
++}
++EXPORT_SYMBOL_GPL(xnbufd_unmap_uread);
++
++/**
++ * @fn void xnbufd_unmap_uwrite(struct xnbufd *bufd)
++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_uwrite().
++ *
++ * This routine finalizes a buffer descriptor previously initialized
++ * by a call to xnbufd_map_uwrite(), to write data to a user area.
++ *
++ * The main action taken is to write the contents of the kernel memory
++ * area passed to xnbufd_copy_from_kmem() whenever the copy operation
++ * was postponed at that time; the carry over buffer is eventually
++ * released as needed. If xnbufd_copy_from_kmem() was allowed to copy
++ * to the destination user memory at once, then xnbufd_unmap_uwrite()
++ * leads to a no-op.
++ *
++ * @param bufd The address of the buffer descriptor to finalize.
++ *
++ * @return The number of bytes written so far to the memory area
++ * covered by @a ubufd.
++ *
++ * @coretags{task-unrestricted}
++ *
++ * @note Calling this routine while holding the nklock and/or running
++ * with interrupts disabled is invalid, and doing so will trigger a
++ * debug assertion.
++ */
++ssize_t xnbufd_unmap_uwrite(struct xnbufd *bufd)
++{
++	ssize_t ret = 0;
++	void __user *to;
++	void *from;
++	size_t len;
++
++	preemptible_only();
++
++	len = bufd->b_off;
++
++	if (bufd->b_carry == NULL)
++		/* Copy took place directly. Fine. */
++		goto done;
++
++	/*
++	 * Something was written to the carry over area, copy the
++	 * contents to user-space, then release the area if needed.
++	 */
++	to = (void __user *)bufd->b_ptr;
++	from = bufd->b_carry;
++	ret = cobalt_copy_to_user(to, from, len);
++
++	if (bufd->b_len > sizeof(bufd->b_buf))
++		xnfree(bufd->b_carry);
++done:
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	bufd->b_ptr = (caddr_t)-1;
++#endif
++	return ret ?: (ssize_t)len;
++}
++EXPORT_SYMBOL_GPL(xnbufd_unmap_uwrite);
++
++/**
++ * @fn void xnbufd_reset(struct xnbufd *bufd)
++ * @brief Reset a buffer descriptor.
++ *
++ * The buffer descriptor is reset, so that all data already copied is
++ * forgotten. Any carry over buffer allocated is kept, though.
++ *
++ * @param bufd The address of the buffer descriptor to reset.
++ *
++ * @coretags{unrestricted}
++ */
++
++/**
++ * @fn void xnbufd_invalidate(struct xnbufd *bufd)
++ * @brief Invalidate a buffer descriptor.
++ *
++ * The buffer descriptor is invalidated, making it unusable for
++ * further copy operations. If an outstanding carry over buffer was
++ * allocated by a previous call to xnbufd_copy_from_kmem(), it is
++ * immediately freed so that no data transfer will happen when the
++ * descriptor is finalized.
++ *
++ * The only action that may subsequently be performed on an
++ * invalidated descriptor is calling the relevant unmapping routine
++ * for it. For that reason, xnbufd_invalidate() should be invoked on
++ * the error path when data may have been transferred to the carry
++ * over buffer.
++ *
++ * @param bufd The address of the buffer descriptor to invalidate.
++ *
++ * @coretags{unrestricted}
++ */
++void xnbufd_invalidate(struct xnbufd *bufd)
++{
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	bufd->b_ptr = (caddr_t)-1;
++#endif
++	if (bufd->b_carry) {
++		if (bufd->b_len > sizeof(bufd->b_buf))
++			xnfree(bufd->b_carry);
++		bufd->b_carry = NULL;
++	}
++	bufd->b_off = 0;
++}
++EXPORT_SYMBOL_GPL(xnbufd_invalidate);
++
++/**
++ * @fn void xnbufd_unmap_kread(struct xnbufd *bufd)
++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_kread().
++ *
++ * This routine finalizes a buffer descriptor previously initialized
++ * by a call to xnbufd_map_kread(), to read data from a kernel area.
++ *
++ * @param bufd The address of the buffer descriptor to finalize.
++ *
++ * @return The number of bytes read so far from the memory area
++ * covered by @a ubufd.
++ *
++ * @coretags{task-unrestricted}
++ */
++ssize_t xnbufd_unmap_kread(struct xnbufd *bufd)
++{
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	bufd->b_ptr = (caddr_t)-1;
++#endif
++	return bufd->b_off;
++}
++EXPORT_SYMBOL_GPL(xnbufd_unmap_kread);
++
++/**
++ * @fn void xnbufd_unmap_kwrite(struct xnbufd *bufd)
++ * @brief Finalize a buffer descriptor obtained from xnbufd_map_kwrite().
++ *
++ * This routine finalizes a buffer descriptor previously initialized
++ * by a call to xnbufd_map_kwrite(), to write data to a kernel area.
++ *
++ * @param bufd The address of the buffer descriptor to finalize.
++ *
++ * @return The number of bytes written so far to the memory area
++ * covered by @a ubufd.
++ *
++ * @coretags{task-unrestricted}
++ */
++ssize_t xnbufd_unmap_kwrite(struct xnbufd *bufd)
++{
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	bufd->b_ptr = (caddr_t)-1;
++#endif
++	return bufd->b_off;
++}
++EXPORT_SYMBOL_GPL(xnbufd_unmap_kwrite);
++
++/** @} */
+--- linux/kernel/xenomai/sched-quota.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-quota.c	2021-04-07 16:01:25.854636128 +0800
+@@ -0,0 +1,835 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/bitmap.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/arith.h>
++#include <cobalt/uapi/sched.h>
++#include <trace/events/cobalt-core.h>
++
++/*
++ * With this policy, each per-CPU scheduler slot maintains a list of
++ * active thread groups, picking from the sched_rt runqueue.
++ *
++ * Each time a thread is picked from the runqueue, we check whether we
++ * still have budget for running it, looking at the group it belongs
++ * to. If so, a timer is armed to elapse when that group has no more
++ * budget, would the incoming thread run unpreempted until then
++ * (i.e. xnsched_quota->limit_timer).
++ *
++ * Otherwise, if no budget remains in the group for running the
++ * candidate thread, we move the latter to a local expiry queue
++ * maintained by the group. This process is done on the fly as we pull
++ * from the runqueue.
++ *
++ * Updating the remaining budget is done each time the Cobalt core
++ * asks for replacing the current thread with the next runnable one,
++ * i.e. xnsched_quota_pick(). There we charge the elapsed run time of
++ * the outgoing thread to the relevant group, and conversely, we check
++ * whether the incoming thread has budget.
++ *
++ * Finally, a per-CPU timer (xnsched_quota->refill_timer) periodically
++ * ticks in the background, in accordance to the defined quota
++ * interval. Thread group budgets get replenished by its handler in
++ * accordance to their respective share, pushing all expired threads
++ * back to the run queue in the same move.
++ *
++ * NOTE: since the core logic enforcing the budget entirely happens in
++ * xnsched_quota_pick(), applying a budget change can be done as
++ * simply as forcing the rescheduling procedure to be invoked asap. As
++ * a result of this, the Cobalt core will ask for the next thread to
++ * run, which means calling xnsched_quota_pick() eventually.
++ *
++ * CAUTION: xnsched_quota_group->nr_active does count both the threads
++ * from that group linked to the sched_rt runqueue, _and_ the threads
++ * moved to the local expiry queue. As a matter of fact, the expired
++ * threads - those for which we consumed all the per-group budget -
++ * are still seen as runnable (i.e. not blocked/suspended) by the
++ * Cobalt core. This only means that the SCHED_QUOTA policy won't pick
++ * them until the corresponding budget is replenished.
++ */
++static DECLARE_BITMAP(group_map, CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS);
++
++static inline int group_is_active(struct xnsched_quota_group *tg)
++{
++	struct xnthread *curr = tg->sched->curr;
++
++	if (tg->nr_active)
++		return 1;
++
++	/*
++	 * Check whether the current thread belongs to the group, and
++	 * is still in running state (XNREADY denotes a thread linked
++	 * to the runqueue, in which case tg->nr_active already
++	 * accounts for it).
++	 */
++	if (curr->quota == tg &&
++	    xnthread_test_state(curr, XNREADY|XNTHREAD_BLOCK_BITS) == 0)
++		return 1;
++
++	return 0;
++}
++
++static inline void replenish_budget(struct xnsched_quota *qs,
++				    struct xnsched_quota_group *tg)
++{
++	xnticks_t budget_ns, credit_ns;
++
++	if (tg->quota_ns == tg->quota_peak_ns) {
++		/*
++		 * Fast path: we don't accumulate runtime credit.
++		 * This includes groups with no runtime limit
++		 * (i.e. quota off: quota >= period && quota == peak).
++		 */
++		tg->run_budget_ns = tg->quota_ns;
++		return;
++	}
++
++	/*
++	 * We have to deal with runtime credit accumulation, as the
++	 * group may consume more than its base quota during a single
++	 * interval, up to a peak duration though (not to monopolize
++	 * the CPU).
++	 *
++	 * - In the simplest case, a group is allotted a new full
++	 * budget plus the unconsumed portion of the previous budget,
++	 * provided the sum does not exceed the peak quota.
++	 *
++	 * - When there is too much budget for a single interval
++	 * (i.e. above peak quota), we spread the extra time over
++	 * multiple intervals through a credit accumulation mechanism.
++	 *
++	 * - The accumulated credit is dropped whenever a group has no
++	 * runnable threads.
++	 */
++	if (!group_is_active(tg)) {
++		/* Drop accumulated credit. */
++		tg->run_credit_ns = 0;
++		tg->run_budget_ns = tg->quota_ns;
++		return;
++	}
++
++	budget_ns = tg->run_budget_ns + tg->quota_ns;
++	if (budget_ns > tg->quota_peak_ns) {
++		/* Too much budget, spread it over intervals. */
++		tg->run_credit_ns += budget_ns - tg->quota_peak_ns;
++		tg->run_budget_ns = tg->quota_peak_ns;
++	} else if (tg->run_credit_ns) {
++		credit_ns = tg->quota_peak_ns - budget_ns;
++		/* Consume the accumulated credit. */
++		if (tg->run_credit_ns >= credit_ns)
++			tg->run_credit_ns -= credit_ns;
++		else {
++			credit_ns = tg->run_credit_ns;
++			tg->run_credit_ns = 0;
++		}
++		/* Allot extended budget, limited to peak quota. */
++		tg->run_budget_ns = budget_ns + credit_ns;
++	} else
++		/* No credit, budget was below peak quota. */
++		tg->run_budget_ns = budget_ns;
++}
++
++static void quota_refill_handler(struct xntimer *timer)
++{
++	struct xnsched_quota_group *tg;
++	struct xnthread *thread, *tmp;
++	struct xnsched_quota *qs;
++	struct xnsched *sched;
++
++	qs = container_of(timer, struct xnsched_quota, refill_timer);
++	XENO_BUG_ON(COBALT, list_empty(&qs->groups));
++	sched = container_of(qs, struct xnsched, quota);
++
++	trace_cobalt_schedquota_refill(0);
++
++	list_for_each_entry(tg, &qs->groups, next) {
++		/* Allot a new runtime budget for the group. */
++		replenish_budget(qs, tg);
++
++		if (tg->run_budget_ns == 0 || list_empty(&tg->expired))
++			continue;
++		/*
++		 * For each group living on this CPU, move all expired
++		 * threads back to the runqueue. Since those threads
++		 * were moved out of the runqueue as we were
++		 * considering them for execution, we push them back
++		 * in LIFO order to their respective priority group.
++		 * The expiry queue is FIFO to keep ordering right
++		 * among expired threads.
++		 */
++		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired, quota_expired) {
++			list_del_init(&thread->quota_expired);
++			xnsched_addq(&sched->rt.runnable, thread);
++		}
++	}
++
++	xnsched_set_self_resched(timer->sched);
++}
++
++static void quota_limit_handler(struct xntimer *timer)
++{
++	struct xnsched *sched;
++
++	sched = container_of(timer, struct xnsched, quota.limit_timer);
++	/*
++	 * Force a rescheduling on the return path of the current
++	 * interrupt, so that the budget is re-evaluated for the
++	 * current group in xnsched_quota_pick().
++	 */
++	xnsched_set_self_resched(sched);
++}
++
++static int quota_sum_all(struct xnsched_quota *qs)
++{
++	struct xnsched_quota_group *tg;
++	int sum;
++
++	if (list_empty(&qs->groups))
++		return 0;
++
++	sum = 0;
++	list_for_each_entry(tg, &qs->groups, next)
++		sum += tg->quota_percent;
++
++	return sum;
++}
++
++static void xnsched_quota_init(struct xnsched *sched)
++{
++	char limiter_name[XNOBJECT_NAME_LEN], refiller_name[XNOBJECT_NAME_LEN];
++	struct xnsched_quota *qs = &sched->quota;
++
++	qs->period_ns = CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD * 1000ULL;
++	INIT_LIST_HEAD(&qs->groups);
++
++#ifdef CONFIG_SMP
++	ksformat(refiller_name, sizeof(refiller_name),
++		 "[quota-refill/%u]", sched->cpu);
++	ksformat(limiter_name, sizeof(limiter_name),
++		 "[quota-limit/%u]", sched->cpu);
++#else
++	strcpy(refiller_name, "[quota-refill]");
++	strcpy(limiter_name, "[quota-limit]");
++#endif
++	xntimer_init(&qs->refill_timer,
++		     &nkclock, quota_refill_handler, sched,
++		     XNTIMER_IGRAVITY);
++	xntimer_set_name(&qs->refill_timer, refiller_name);
++
++	xntimer_init(&qs->limit_timer,
++		     &nkclock, quota_limit_handler, sched,
++		     XNTIMER_IGRAVITY);
++	xntimer_set_name(&qs->limit_timer, limiter_name);
++}
++
++static bool xnsched_quota_setparam(struct xnthread *thread,
++				   const union xnsched_policy_param *p)
++{
++	struct xnsched_quota_group *tg;
++	struct xnsched_quota *qs;
++	bool effective;
++
++	xnthread_clear_state(thread, XNWEAK);
++	effective = xnsched_set_effective_priority(thread, p->quota.prio);
++
++	qs = &thread->sched->quota;
++	list_for_each_entry(tg, &qs->groups, next) {
++		if (tg->tgid != p->quota.tgid)
++			continue;
++		if (thread->quota) {
++			/* Dequeued earlier by our caller. */
++			list_del(&thread->quota_next);
++			thread->quota->nr_threads--;
++		}
++
++		trace_cobalt_schedquota_add_thread(tg, thread);
++
++		thread->quota = tg;
++		list_add(&thread->quota_next, &tg->members);
++		tg->nr_threads++;
++		return effective;
++	}
++
++	XENO_BUG(COBALT);
++
++	return false;
++}
++
++static void xnsched_quota_getparam(struct xnthread *thread,
++				   union xnsched_policy_param *p)
++{
++	p->quota.prio = thread->cprio;
++	p->quota.tgid = thread->quota->tgid;
++}
++
++static void xnsched_quota_trackprio(struct xnthread *thread,
++				    const union xnsched_policy_param *p)
++{
++	if (p) {
++		/* We should not cross groups during PI boost. */
++		XENO_WARN_ON(COBALT,
++			     thread->base_class == &xnsched_class_quota &&
++			     thread->quota->tgid != p->quota.tgid);
++		thread->cprio = p->quota.prio;
++	} else
++		thread->cprio = thread->bprio;
++}
++
++static void xnsched_quota_protectprio(struct xnthread *thread, int prio)
++{
++	if (prio > XNSCHED_QUOTA_MAX_PRIO)
++		prio = XNSCHED_QUOTA_MAX_PRIO;
++
++	thread->cprio = prio;
++}
++
++static int xnsched_quota_chkparam(struct xnthread *thread,
++				  const union xnsched_policy_param *p)
++{
++	struct xnsched_quota_group *tg;
++	struct xnsched_quota *qs;
++	int tgid;
++
++	if (p->quota.prio < XNSCHED_QUOTA_MIN_PRIO ||
++	    p->quota.prio > XNSCHED_QUOTA_MAX_PRIO)
++		return -EINVAL;
++
++	tgid = p->quota.tgid;
++	if (tgid < 0 || tgid >= CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS)
++		return -EINVAL;
++
++	/*
++	 * The group must be managed on the same CPU the thread
++	 * currently runs on.
++	 */
++	qs = &thread->sched->quota;
++	list_for_each_entry(tg, &qs->groups, next) {
++		if (tg->tgid == tgid)
++			return 0;
++	}
++
++	/*
++	 * If that group exists nevertheless, we give userland a
++	 * specific error code.
++	 */
++	if (test_bit(tgid, group_map))
++		return -EPERM;
++
++	return -EINVAL;
++}
++
++static void xnsched_quota_forget(struct xnthread *thread)
++{
++	trace_cobalt_schedquota_remove_thread(thread->quota, thread);
++
++	thread->quota->nr_threads--;
++	XENO_BUG_ON(COBALT, thread->quota->nr_threads < 0);
++	list_del(&thread->quota_next);
++	thread->quota = NULL;
++}
++
++static void xnsched_quota_kick(struct xnthread *thread)
++{
++	struct xnsched_quota_group *tg = thread->quota;
++	struct xnsched *sched = thread->sched;
++
++	/*
++	 * Allow a kicked thread to be elected for running until it
++	 * relaxes, even if the group it belongs to lacks runtime
++	 * budget.
++	 */
++	if (tg->run_budget_ns == 0 && !list_empty(&thread->quota_expired)) {
++		list_del_init(&thread->quota_expired);
++		xnsched_addq_tail(&sched->rt.runnable, thread);
++	}
++}
++
++static inline int thread_is_runnable(struct xnthread *thread)
++{
++	return thread->quota->run_budget_ns > 0 ||
++		xnthread_test_info(thread, XNKICKED);
++}
++
++static void xnsched_quota_enqueue(struct xnthread *thread)
++{
++	struct xnsched_quota_group *tg = thread->quota;
++	struct xnsched *sched = thread->sched;
++
++	if (!thread_is_runnable(thread))
++		list_add_tail(&thread->quota_expired, &tg->expired);
++	else
++		xnsched_addq_tail(&sched->rt.runnable, thread);
++
++	tg->nr_active++;
++}
++
++static void xnsched_quota_dequeue(struct xnthread *thread)
++{
++	struct xnsched_quota_group *tg = thread->quota;
++	struct xnsched *sched = thread->sched;
++
++	if (!list_empty(&thread->quota_expired))
++		list_del_init(&thread->quota_expired);
++	else
++		xnsched_delq(&sched->rt.runnable, thread);
++
++	tg->nr_active--;
++}
++
++static void xnsched_quota_requeue(struct xnthread *thread)
++{
++	struct xnsched_quota_group *tg = thread->quota;
++	struct xnsched *sched = thread->sched;
++
++	if (!thread_is_runnable(thread))
++		list_add(&thread->quota_expired, &tg->expired);
++	else
++		xnsched_addq(&sched->rt.runnable, thread);
++
++	tg->nr_active++;
++}
++
++static struct xnthread *xnsched_quota_pick(struct xnsched *sched)
++{
++	struct xnthread *next, *curr = sched->curr;
++	struct xnsched_quota *qs = &sched->quota;
++	struct xnsched_quota_group *otg, *tg;
++	xnticks_t now, elapsed;
++	int ret;
++
++	now = xnclock_read_monotonic(&nkclock);
++	otg = curr->quota;
++	if (otg == NULL)
++		goto pick;
++	/*
++	 * Charge the time consumed by the outgoing thread to the
++	 * group it belongs to.
++	 */
++	elapsed = now - otg->run_start_ns;
++	if (elapsed < otg->run_budget_ns)
++		otg->run_budget_ns -= elapsed;
++	else
++		otg->run_budget_ns = 0;
++pick:
++	next = xnsched_getq(&sched->rt.runnable);
++	if (next == NULL) {
++		xntimer_stop(&qs->limit_timer);
++		return NULL;
++	}
++
++	/*
++	 * As we basically piggyback on the SCHED_FIFO runqueue, make
++	 * sure to detect non-quota threads.
++	 */
++	tg = next->quota;
++	if (tg == NULL)
++		return next;
++
++	tg->run_start_ns = now;
++
++	/*
++	 * Don't consider budget if kicked, we have to allow this
++	 * thread to run until it eventually relaxes.
++	 */
++	if (xnthread_test_info(next, XNKICKED)) {
++		xntimer_stop(&qs->limit_timer);
++		goto out;
++	}
++
++	if (tg->run_budget_ns == 0) {
++		/* Flush expired group members as we go. */
++		list_add_tail(&next->quota_expired, &tg->expired);
++		goto pick;
++	}
++
++	if (otg == tg && xntimer_running_p(&qs->limit_timer))
++		/* Same group, leave the running timer untouched. */
++		goto out;
++
++	/* Arm limit timer for the new running group. */
++	ret = xntimer_start(&qs->limit_timer, now + tg->run_budget_ns,
++			    XN_INFINITE, XN_ABSOLUTE);
++	if (ret) {
++		/* Budget exhausted: deactivate this group. */
++		tg->run_budget_ns = 0;
++		list_add_tail(&next->quota_expired, &tg->expired);
++		goto pick;
++	}
++out:
++	tg->nr_active--;
++
++	return next;
++}
++
++static void xnsched_quota_migrate(struct xnthread *thread, struct xnsched *sched)
++{
++	union xnsched_policy_param param;
++	/*
++	 * Runtime quota groups are defined per-CPU, so leaving the
++	 * current CPU means exiting the group. We do this by moving
++	 * the target thread to the plain RT class.
++	 */
++	param.rt.prio = thread->cprio;
++	__xnthread_set_schedparam(thread, &xnsched_class_rt, &param);
++}
++
++/**
++ * @ingroup cobalt_core_sched
++ * @defgroup sched_quota SCHED_QUOTA scheduling policy
++ *
++ * The SCHED_QUOTA policy enforces a limitation on the CPU consumption
++ * of threads over a globally defined period, known as the quota
++ * interval. This is done by pooling threads with common requirements
++ * in groups, and giving each group a share of the global period
++ * (CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD).
++ *
++ * When threads have entirely consumed the quota allotted to the group
++ * they belong to, the latter is suspended as a whole, until the next
++ * quota interval starts. At this point, a new runtime budget is
++ * given to each group, in accordance with its share.
++ *
++ *@{
++ */
++int xnsched_quota_create_group(struct xnsched_quota_group *tg,
++			       struct xnsched *sched,
++			       int *quota_sum_r)
++{
++	int tgid, nr_groups = CONFIG_XENO_OPT_SCHED_QUOTA_NR_GROUPS;
++	struct xnsched_quota *qs = &sched->quota;
++
++	atomic_only();
++
++	tgid = find_first_zero_bit(group_map, nr_groups);
++	if (tgid >= nr_groups)
++		return -ENOSPC;
++
++	__set_bit(tgid, group_map);
++	tg->tgid = tgid;
++	tg->sched = sched;
++	tg->run_budget_ns = qs->period_ns;
++	tg->run_credit_ns = 0;
++	tg->quota_percent = 100;
++	tg->quota_peak_percent = 100;
++	tg->quota_ns = qs->period_ns;
++	tg->quota_peak_ns = qs->period_ns;
++	tg->nr_active = 0;
++	tg->nr_threads = 0;
++	INIT_LIST_HEAD(&tg->members);
++	INIT_LIST_HEAD(&tg->expired);
++
++	trace_cobalt_schedquota_create_group(tg);
++
++	if (list_empty(&qs->groups))
++		xntimer_start(&qs->refill_timer,
++			      qs->period_ns, qs->period_ns, XN_RELATIVE);
++
++	list_add(&tg->next, &qs->groups);
++	*quota_sum_r = quota_sum_all(qs);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnsched_quota_create_group);
++
++int xnsched_quota_destroy_group(struct xnsched_quota_group *tg,
++				int force, int *quota_sum_r)
++{
++	struct xnsched_quota *qs = &tg->sched->quota;
++	union xnsched_policy_param param;
++	struct xnthread *thread, *tmp;
++
++	atomic_only();
++
++	if (!list_empty(&tg->members)) {
++		if (!force)
++			return -EBUSY;
++		/* Move group members to the rt class. */
++		list_for_each_entry_safe(thread, tmp, &tg->members, quota_next) {
++			param.rt.prio = thread->cprio;
++			__xnthread_set_schedparam(thread, &xnsched_class_rt, &param);
++		}
++	}
++
++	trace_cobalt_schedquota_destroy_group(tg);
++
++	list_del(&tg->next);
++	__clear_bit(tg->tgid, group_map);
++
++	if (list_empty(&qs->groups))
++		xntimer_stop(&qs->refill_timer);
++
++	if (quota_sum_r)
++		*quota_sum_r = quota_sum_all(qs);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnsched_quota_destroy_group);
++
++void xnsched_quota_set_limit(struct xnsched_quota_group *tg,
++			     int quota_percent, int quota_peak_percent,
++			     int *quota_sum_r)
++{
++	struct xnsched *sched = tg->sched;
++	struct xnsched_quota *qs = &sched->quota;
++	xnticks_t old_quota_ns = tg->quota_ns;
++	struct xnthread *thread, *tmp, *curr;
++	xnticks_t now, elapsed, consumed;
++
++	atomic_only();
++
++	trace_cobalt_schedquota_set_limit(tg, quota_percent,
++					  quota_peak_percent);
++
++	if (quota_percent < 0 || quota_percent > 100) { /* Quota off. */
++		quota_percent = 100;
++		tg->quota_ns = qs->period_ns;
++	} else
++		tg->quota_ns = xnarch_div64(qs->period_ns * quota_percent, 100);
++
++	if (quota_peak_percent < quota_percent)
++		quota_peak_percent = quota_percent;
++
++	if (quota_peak_percent < 0 || quota_peak_percent > 100) {
++		quota_peak_percent = 100;
++		tg->quota_peak_ns = qs->period_ns;
++	} else
++		tg->quota_peak_ns = xnarch_div64(qs->period_ns * quota_peak_percent, 100);
++
++	tg->quota_percent = quota_percent;
++	tg->quota_peak_percent = quota_peak_percent;
++
++	curr = sched->curr;
++	if (curr->quota == tg &&
++	    xnthread_test_state(curr, XNREADY|XNTHREAD_BLOCK_BITS) == 0) {
++		now = xnclock_read_monotonic(&nkclock);
++
++		elapsed = now - tg->run_start_ns;
++		if (elapsed < tg->run_budget_ns)
++			tg->run_budget_ns -= elapsed;
++		else
++			tg->run_budget_ns = 0;
++
++		tg->run_start_ns = now;
++
++		xntimer_stop(&qs->limit_timer);
++	}
++
++	if (tg->run_budget_ns <= old_quota_ns)
++		consumed = old_quota_ns - tg->run_budget_ns;
++	else
++		consumed = 0;
++	if (tg->quota_ns >= consumed)
++		tg->run_budget_ns = tg->quota_ns - consumed;
++	else
++		tg->run_budget_ns = 0;
++
++	tg->run_credit_ns = 0;	/* Drop accumulated credit. */
++
++	*quota_sum_r = quota_sum_all(qs);
++
++	if (tg->run_budget_ns > 0) {
++		list_for_each_entry_safe_reverse(thread, tmp, &tg->expired,
++						 quota_expired) {
++			list_del_init(&thread->quota_expired);
++			xnsched_addq(&sched->rt.runnable, thread);
++		}
++	}
++
++	/*
++	 * Apply the new budget immediately, in case a member of this
++	 * group is currently running.
++	 */
++	xnsched_set_resched(sched);
++	xnsched_run();
++}
++EXPORT_SYMBOL_GPL(xnsched_quota_set_limit);
++
++struct xnsched_quota_group *
++xnsched_quota_find_group(struct xnsched *sched, int tgid)
++{
++	struct xnsched_quota_group *tg;
++
++	atomic_only();
++
++	if (list_empty(&sched->quota.groups))
++		return NULL;
++
++	list_for_each_entry(tg, &sched->quota.groups, next) {
++		if (tg->tgid == tgid)
++			return tg;
++	}
++
++	return NULL;
++}
++EXPORT_SYMBOL_GPL(xnsched_quota_find_group);
++
++int xnsched_quota_sum_all(struct xnsched *sched)
++{
++	struct xnsched_quota *qs = &sched->quota;
++
++	atomic_only();
++
++	return quota_sum_all(qs);
++}
++EXPORT_SYMBOL_GPL(xnsched_quota_sum_all);
++
++/** @} */
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++struct xnvfile_directory sched_quota_vfroot;
++
++struct vfile_sched_quota_priv {
++	struct xnthread *curr;
++};
++
++struct vfile_sched_quota_data {
++	int cpu;
++	pid_t pid;
++	int prio;
++	int tgid;
++	xnticks_t budget;
++	char name[XNOBJECT_NAME_LEN];
++};
++
++static struct xnvfile_snapshot_ops vfile_sched_quota_ops;
++
++static struct xnvfile_snapshot vfile_sched_quota = {
++	.privsz = sizeof(struct vfile_sched_quota_priv),
++	.datasz = sizeof(struct vfile_sched_quota_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_sched_quota_ops,
++};
++
++static int vfile_sched_quota_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_sched_quota_priv *priv = xnvfile_iterator_priv(it);
++	int nrthreads = xnsched_class_quota.nthreads;
++
++	if (nrthreads == 0)
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++
++	return nrthreads;
++}
++
++static int vfile_sched_quota_next(struct xnvfile_snapshot_iterator *it,
++				  void *data)
++{
++	struct vfile_sched_quota_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_sched_quota_data *p = data;
++	struct xnthread *thread;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	if (thread->base_class != &xnsched_class_quota)
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->tgid = thread->quota->tgid;
++	p->prio = thread->cprio;
++	p->budget = thread->quota->run_budget_ns;
++
++	return 1;
++}
++
++static int vfile_sched_quota_show(struct xnvfile_snapshot_iterator *it,
++				  void *data)
++{
++	struct vfile_sched_quota_data *p = data;
++	char buf[16];
++
++	if (p == NULL)
++		xnvfile_printf(it, "%-3s  %-6s %-4s %-4s %-10s %s\n",
++			       "CPU", "PID", "TGID", "PRI", "BUDGET", "NAME");
++	else {
++		xntimer_format_time(p->budget, buf, sizeof(buf));
++		xnvfile_printf(it, "%3u  %-6d %-4d %-4d %-10s %s\n",
++			       p->cpu,
++			       p->pid,
++			       p->tgid,
++			       p->prio,
++			       buf,
++			       p->name);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_sched_quota_ops = {
++	.rewind = vfile_sched_quota_rewind,
++	.next = vfile_sched_quota_next,
++	.show = vfile_sched_quota_show,
++};
++
++static int xnsched_quota_init_vfile(struct xnsched_class *schedclass,
++				    struct xnvfile_directory *vfroot)
++{
++	int ret;
++
++	ret = xnvfile_init_dir(schedclass->name, &sched_quota_vfroot, vfroot);
++	if (ret)
++		return ret;
++
++	return xnvfile_init_snapshot("threads", &vfile_sched_quota,
++				     &sched_quota_vfroot);
++}
++
++static void xnsched_quota_cleanup_vfile(struct xnsched_class *schedclass)
++{
++	xnvfile_destroy_snapshot(&vfile_sched_quota);
++	xnvfile_destroy_dir(&sched_quota_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct xnsched_class xnsched_class_quota = {
++	.sched_init		=	xnsched_quota_init,
++	.sched_enqueue		=	xnsched_quota_enqueue,
++	.sched_dequeue		=	xnsched_quota_dequeue,
++	.sched_requeue		=	xnsched_quota_requeue,
++	.sched_pick		=	xnsched_quota_pick,
++	.sched_tick		=	NULL,
++	.sched_rotate		=	NULL,
++	.sched_migrate		=	xnsched_quota_migrate,
++	.sched_chkparam		=	xnsched_quota_chkparam,
++	.sched_setparam		=	xnsched_quota_setparam,
++	.sched_getparam		=	xnsched_quota_getparam,
++	.sched_trackprio	=	xnsched_quota_trackprio,
++	.sched_protectprio	=	xnsched_quota_protectprio,
++	.sched_forget		=	xnsched_quota_forget,
++	.sched_kick		=	xnsched_quota_kick,
++#ifdef CONFIG_XENO_OPT_VFILE
++	.sched_init_vfile	=	xnsched_quota_init_vfile,
++	.sched_cleanup_vfile	=	xnsched_quota_cleanup_vfile,
++#endif
++	.weight			=	XNSCHED_CLASS_WEIGHT(3),
++	.policy			=	SCHED_QUOTA,
++	.name			=	"quota"
++};
++EXPORT_SYMBOL_GPL(xnsched_class_quota);
+--- linux/kernel/xenomai/registry.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/registry.c	2021-04-07 16:01:25.848636137 +0800
+@@ -0,0 +1,947 @@
++/*
++ * Copyright (C) 2004 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/slab.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/registry.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/apc.h>
++#include <cobalt/kernel/assert.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_registry Registry services
++ *
++ * The registry provides a mean to index object descriptors on unique
++ * alphanumeric keys. When labeled this way, an object is globally
++ * exported; it can be searched for, and its descriptor returned to
++ * the caller for further use; the latter operation is called a
++ * "binding". When no object has been registered under the given name
++ * yet, the registry can be asked to set up a rendez-vous, blocking
++ * the caller until the object is eventually registered.
++ *
++ *@{
++ */
++
++struct xnobject *registry_obj_slots;
++EXPORT_SYMBOL_GPL(registry_obj_slots);
++
++static LIST_HEAD(free_object_list); /* Free objects. */
++
++static LIST_HEAD(busy_object_list); /* Active and exported objects. */
++
++static unsigned int nr_active_objects;
++
++static unsigned long next_object_stamp;
++
++static struct hlist_head *object_index;
++
++static int nr_object_entries;
++
++static struct xnsynch register_synch;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++#include <linux/workqueue.h>
++
++static void proc_callback(struct work_struct *work);
++
++static void registry_proc_schedule(void *cookie);
++
++static LIST_HEAD(proc_object_list);	/* Objects waiting for /proc handling. */
++
++static DECLARE_WORK(registry_proc_work, proc_callback);
++
++static int proc_apc;
++
++static struct xnvfile_directory registry_vfroot;
++
++static int usage_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	xnvfile_printf(it, "%u/%u\n",
++		       nr_active_objects,
++		       CONFIG_XENO_OPT_REGISTRY_NRSLOTS);
++	return 0;
++}
++
++static struct xnvfile_regular_ops usage_vfile_ops = {
++	.show = usage_vfile_show,
++};
++
++static struct xnvfile_regular usage_vfile = {
++	.ops = &usage_vfile_ops,
++};
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++unsigned xnregistry_hash_size(void)
++{
++	static const int primes[] = {
++		101, 211, 307, 401, 503, 601,
++		701, 809, 907, 1009, 1103
++	};
++
++#define obj_hash_max(n)			 \
++((n) < sizeof(primes) / sizeof(int) ? \
++ (n) : sizeof(primes) / sizeof(int) - 1)
++
++	return primes[obj_hash_max(CONFIG_XENO_OPT_REGISTRY_NRSLOTS / 100)];
++}
++
++int xnregistry_init(void)
++{
++	int n, ret __maybe_unused;
++
++	registry_obj_slots = kmalloc(CONFIG_XENO_OPT_REGISTRY_NRSLOTS *
++				     sizeof(struct xnobject), GFP_KERNEL);
++	if (registry_obj_slots == NULL)
++		return -ENOMEM;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	ret = xnvfile_init_dir("registry", &registry_vfroot, &cobalt_vfroot);
++	if (ret)
++		return ret;
++
++	ret = xnvfile_init_regular("usage", &usage_vfile, &registry_vfroot);
++	if (ret) {
++		xnvfile_destroy_dir(&registry_vfroot);
++		return ret;
++	}
++
++	proc_apc =
++	    xnapc_alloc("registry_export", &registry_proc_schedule, NULL);
++
++	if (proc_apc < 0) {
++		xnvfile_destroy_regular(&usage_vfile);
++		xnvfile_destroy_dir(&registry_vfroot);
++		return proc_apc;
++	}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	next_object_stamp = 0;
++
++	for (n = 0; n < CONFIG_XENO_OPT_REGISTRY_NRSLOTS; n++) {
++		registry_obj_slots[n].objaddr = NULL;
++		list_add_tail(&registry_obj_slots[n].link, &free_object_list);
++	}
++
++	/* Slot #0 is reserved/invalid. */
++	list_get_entry(&free_object_list, struct xnobject, link);
++	nr_active_objects = 1;
++
++	nr_object_entries = xnregistry_hash_size();
++	object_index = kmalloc(sizeof(*object_index) *
++				      nr_object_entries, GFP_KERNEL);
++
++	if (object_index == NULL) {
++#ifdef CONFIG_XENO_OPT_VFILE
++		xnvfile_destroy_regular(&usage_vfile);
++		xnvfile_destroy_dir(&registry_vfroot);
++		xnapc_free(proc_apc);
++#endif /* CONFIG_XENO_OPT_VFILE */
++		return -ENOMEM;
++	}
++
++	for (n = 0; n < nr_object_entries; n++)
++		INIT_HLIST_HEAD(&object_index[n]);
++
++	xnsynch_init(&register_synch, XNSYNCH_FIFO, NULL);
++
++	return 0;
++}
++
++void xnregistry_cleanup(void)
++{
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct hlist_node *enext;
++	struct xnobject *ecurr;
++	struct xnpnode *pnode;
++	int n;
++
++	flush_scheduled_work();
++
++	for (n = 0; n < nr_object_entries; n++)
++		hlist_for_each_entry_safe(ecurr, enext, 
++					&object_index[n], hlink) {
++			pnode = ecurr->pnode;
++			if (pnode == NULL)
++				continue;
++
++			pnode->ops->unexport(ecurr, pnode);
++
++			if (--pnode->entries > 0)
++				continue;
++
++			xnvfile_destroy_dir(&pnode->vdir);
++
++			if (--pnode->root->entries == 0)
++				xnvfile_destroy_dir(&pnode->root->vdir);
++		}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	kfree(object_index);
++	xnsynch_destroy(&register_synch);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	xnapc_free(proc_apc);
++	flush_scheduled_work();
++	xnvfile_destroy_regular(&usage_vfile);
++	xnvfile_destroy_dir(&registry_vfroot);
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	kfree(registry_obj_slots);
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static DEFINE_SEMAPHORE(export_mutex);
++
++/*
++ * The following stuff implements the mechanism for delegating
++ * export/unexport requests to/from the /proc interface from the
++ * Xenomai domain to the Linux kernel (i.e. the "lower stage"). This
++ * ends up being a bit complex due to the fact that such requests
++ * might lag enough before being processed by the Linux kernel so that
++ * subsequent requests might just contradict former ones before they
++ * even had a chance to be applied (e.g. export -> unexport in the
++ * Xenomai domain for short-lived objects). This situation and the
++ * like are hopefully properly handled due to a careful
++ * synchronization of operations across domains.
++ */
++static void proc_callback(struct work_struct *work)
++{
++	struct xnvfile_directory *rdir, *dir;
++	const char *rname, *type;
++	struct xnobject *object;
++	struct xnpnode *pnode;
++	int ret;
++	spl_t s;
++
++	down(&export_mutex);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	while (!list_empty(&proc_object_list)) {
++		object = list_get_entry(&proc_object_list,
++					struct xnobject, link);
++		pnode = object->pnode;
++		type = pnode->dirname;
++		dir = &pnode->vdir;
++		rdir = &pnode->root->vdir;
++		rname = pnode->root->dirname;
++
++		if (object->vfilp != XNOBJECT_EXPORT_SCHEDULED)
++			goto unexport;
++
++		object->vfilp = XNOBJECT_EXPORT_INPROGRESS;
++		list_add_tail(&object->link, &busy_object_list);
++
++		xnlock_put_irqrestore(&nklock, s);
++
++		if (pnode->entries++ == 0) {
++			if (pnode->root->entries++ == 0) {
++				/* Create the root directory on the fly. */
++				ret = xnvfile_init_dir(rname, rdir, &registry_vfroot);
++				if (ret) {
++					xnlock_get_irqsave(&nklock, s);
++					object->pnode = NULL;
++					pnode->root->entries = 0;
++					pnode->entries = 0;
++					continue;
++				}
++			}
++			/* Create the class directory on the fly. */
++			ret = xnvfile_init_dir(type, dir, rdir);
++			if (ret) {
++				if (pnode->root->entries == 1) {
++					pnode->root->entries = 0;
++					xnvfile_destroy_dir(rdir);
++				}
++				xnlock_get_irqsave(&nklock, s);
++				object->pnode = NULL;
++				pnode->entries = 0;
++				continue;
++			}
++		}
++
++		ret = pnode->ops->export(object, pnode);
++		if (ret && --pnode->entries == 0) {
++			xnvfile_destroy_dir(dir);
++			if (--pnode->root->entries == 0)
++				xnvfile_destroy_dir(rdir);
++			xnlock_get_irqsave(&nklock, s);
++			object->pnode = NULL;
++		} else
++			xnlock_get_irqsave(&nklock, s);
++
++		continue;
++
++	unexport:
++		object->vfilp = NULL;
++		object->pnode = NULL;
++
++		if (object->vfilp == XNOBJECT_EXPORT_ABORTED)
++			object->objaddr = NULL;
++
++		if (object->objaddr)
++			list_add_tail(&object->link, &busy_object_list);
++		else {
++			/*
++			 * Trap the case where we are unexporting an
++			 * already unregistered object.
++			 */
++			list_add_tail(&object->link, &free_object_list);
++			nr_active_objects--;
++		}
++
++		xnlock_put_irqrestore(&nklock, s);
++
++		pnode->ops->unexport(object, pnode);
++
++		if (--pnode->entries == 0) {
++			xnvfile_destroy_dir(dir);
++			if (--pnode->root->entries == 0)
++				xnvfile_destroy_dir(rdir);
++		}
++
++		xnlock_get_irqsave(&nklock, s);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	up(&export_mutex);
++}
++
++static void registry_proc_schedule(void *cookie)
++{
++	/*
++	 * schedule_work() will check for us if the work has already
++	 * been scheduled, so just be lazy and submit blindly.
++	 */
++	schedule_work(&registry_proc_work);
++}
++
++static int registry_export_vfsnap(struct xnobject *object,
++				  struct xnpnode *pnode)
++{
++	struct xnpnode_snapshot *p;
++	int ret;
++
++	/*
++	 * Make sure to initialize _all_ mandatory vfile fields; most
++	 * of the time we are using sane NULL defaults based on static
++	 * storage for the vfile struct, but here we are building up a
++	 * vfile object explicitly.
++	 */
++	p = container_of(pnode, struct xnpnode_snapshot, node);
++	object->vfile_u.vfsnap.file.datasz = p->vfile.datasz;
++	object->vfile_u.vfsnap.file.privsz = p->vfile.privsz;
++	/*
++	 * Make the vfile refer to the provided tag struct if any,
++	 * otherwise use our default tag space. In the latter case,
++	 * each object family has its own private revision tag.
++	 */
++	object->vfile_u.vfsnap.file.tag = p->vfile.tag ?:
++		&object->vfile_u.vfsnap.tag;
++	object->vfile_u.vfsnap.file.ops = p->vfile.ops;
++	object->vfile_u.vfsnap.file.entry.lockops = p->vfile.lockops;
++
++	ret = xnvfile_init_snapshot(object->key, &object->vfile_u.vfsnap.file,
++				    &pnode->vdir);
++	if (ret)
++		return ret;
++
++	object->vfilp = &object->vfile_u.vfsnap.file.entry;
++	object->vfilp->private = object->objaddr;
++
++	return 0;
++}
++
++static void registry_unexport_vfsnap(struct xnobject *object,
++				    struct xnpnode *pnode)
++{
++	xnvfile_destroy_snapshot(&object->vfile_u.vfsnap.file);
++}
++
++static void registry_touch_vfsnap(struct xnobject *object)
++{
++	xnvfile_touch(&object->vfile_u.vfsnap.file);
++}
++
++struct xnpnode_ops xnregistry_vfsnap_ops = {
++	.export = registry_export_vfsnap,
++	.unexport = registry_unexport_vfsnap,
++	.touch = registry_touch_vfsnap,
++};
++EXPORT_SYMBOL_GPL(xnregistry_vfsnap_ops);
++
++static int registry_export_vfreg(struct xnobject *object,
++				 struct xnpnode *pnode)
++{
++	struct xnpnode_regular *p;
++	int ret;
++
++	/* See registry_export_vfsnap() for hints. */
++	p = container_of(pnode, struct xnpnode_regular, node);
++	object->vfile_u.vfreg.privsz = p->vfile.privsz;
++	object->vfile_u.vfreg.ops = p->vfile.ops;
++	object->vfile_u.vfreg.entry.lockops = p->vfile.lockops;
++
++	ret = xnvfile_init_regular(object->key, &object->vfile_u.vfreg,
++				   &pnode->vdir);
++	if (ret)
++		return ret;
++
++	object->vfilp = &object->vfile_u.vfreg.entry;
++	object->vfilp->private = object->objaddr;
++
++	return 0;
++}
++
++static void registry_unexport_vfreg(struct xnobject *object,
++				    struct xnpnode *pnode)
++{
++	xnvfile_destroy_regular(&object->vfile_u.vfreg);
++}
++
++struct xnpnode_ops xnregistry_vfreg_ops = {
++	.export = registry_export_vfreg,
++	.unexport = registry_unexport_vfreg,
++};
++EXPORT_SYMBOL_GPL(xnregistry_vfreg_ops);
++
++static int registry_export_vlink(struct xnobject *object,
++				 struct xnpnode *pnode)
++{
++	struct xnpnode_link *link_desc;
++	char *link_target;
++	int ret;
++
++	link_desc = container_of(pnode, struct xnpnode_link, node);
++	link_target = link_desc->target(object->objaddr);
++	if (link_target == NULL)
++		return -ENOMEM;
++
++	ret = xnvfile_init_link(object->key, link_target,
++				&object->vfile_u.link, &pnode->vdir);
++	kfree(link_target);
++	if (ret)
++		return ret;
++
++	object->vfilp = &object->vfile_u.link.entry;
++	object->vfilp->private = object->objaddr;
++
++	return 0;
++}
++
++static void registry_unexport_vlink(struct xnobject *object,
++				    struct xnpnode *pnode)
++{
++	xnvfile_destroy_link(&object->vfile_u.link);
++}
++
++struct xnpnode_ops xnregistry_vlink_ops = {
++	.export = registry_export_vlink,
++	.unexport = registry_unexport_vlink,
++};
++EXPORT_SYMBOL_GPL(xnregistry_vlink_ops);
++
++static inline void registry_export_pnode(struct xnobject *object,
++					 struct xnpnode *pnode)
++{
++	object->vfilp = XNOBJECT_EXPORT_SCHEDULED;
++	object->pnode = pnode;
++	list_del(&object->link);
++	list_add_tail(&object->link, &proc_object_list);
++	__xnapc_schedule(proc_apc);
++}
++
++static inline void registry_unexport_pnode(struct xnobject *object)
++{
++	if (object->vfilp != XNOBJECT_EXPORT_SCHEDULED) {
++		/*
++		 * We might have preempted a v-file read op, so bump
++		 * the object's revtag to make sure the data
++		 * collection is aborted next, if we end up deleting
++		 * the object being read.
++		 */
++		if (object->pnode->ops->touch)
++			object->pnode->ops->touch(object);
++		list_del(&object->link);
++		list_add_tail(&object->link, &proc_object_list);
++		__xnapc_schedule(proc_apc);
++	} else {
++		/*
++		 * Unexporting before the lower stage has had a chance
++		 * to export. Move back the object to the busyq just
++		 * like if no export had been requested.
++		 */
++		list_del(&object->link);
++		list_add_tail(&object->link, &busy_object_list);
++		object->pnode = NULL;
++		object->vfilp = NULL;
++	}
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++static unsigned registry_hash_crunch(const char *key)
++{
++	unsigned int h = 0, g;
++
++#define HQON    24		/* Higher byte position */
++#define HBYTE   0xf0000000	/* Higher nibble on */
++
++	while (*key) {
++		h = (h << 4) + *key++;
++		if ((g = (h & HBYTE)) != 0)
++			h = (h ^ (g >> HQON)) ^ g;
++	}
++
++	return h % nr_object_entries;
++}
++
++static inline int registry_hash_enter(const char *key, struct xnobject *object)
++{
++	struct xnobject *ecurr;
++	unsigned s;
++
++	object->key = key;
++	s = registry_hash_crunch(key);
++
++	hlist_for_each_entry(ecurr, &object_index[s], hlink)
++		if (ecurr == object || strcmp(key, ecurr->key) == 0)
++			return -EEXIST;
++
++	hlist_add_head(&object->hlink, &object_index[s]);
++
++	return 0;
++}
++
++static inline int registry_hash_remove(struct xnobject *object)
++{
++	unsigned int s = registry_hash_crunch(object->key);
++	struct xnobject *ecurr;
++
++	hlist_for_each_entry(ecurr, &object_index[s], hlink)
++		if (ecurr == object) {
++			hlist_del(&ecurr->hlink);
++			return 0;
++		}
++
++	return -ESRCH;
++}
++
++static struct xnobject *registry_hash_find(const char *key)
++{
++	struct xnobject *ecurr;
++
++	hlist_for_each_entry(ecurr, 
++			&object_index[registry_hash_crunch(key)], hlink)
++		if (strcmp(key, ecurr->key) == 0)
++			return ecurr;
++
++	return NULL;
++}
++
++struct registry_wait_context {
++	struct xnthread_wait_context wc;
++	const char *key;
++};
++
++static inline int registry_wakeup_sleepers(const char *key)
++{
++	struct registry_wait_context *rwc;
++	struct xnthread_wait_context *wc;
++	struct xnthread *sleeper, *tmp;
++	int cnt = 0;
++
++	xnsynch_for_each_sleeper_safe(sleeper, tmp, &register_synch) {
++		wc = xnthread_get_wait_context(sleeper);
++		rwc = container_of(wc, struct registry_wait_context, wc);
++		if (*key == *rwc->key && strcmp(key, rwc->key) == 0) {
++			xnsynch_wakeup_this_sleeper(&register_synch, sleeper);
++			++cnt;
++		}
++	}
++
++	return cnt;
++}
++
++/**
++ * @fn int xnregistry_enter(const char *key,void *objaddr,xnhandle_t *phandle,struct xnpnode *pnode)
++ * @brief Register a real-time object.
++ *
++ * This service allocates a new registry slot for an associated
++ * object, and indexes it by an alphanumeric key for later retrieval.
++ *
++ * @param key A valid NULL-terminated string by which the object will
++ * be indexed and later retrieved in the registry. Since it is assumed
++ * that such key is stored into the registered object, it will *not*
++ * be copied but only kept by reference in the registry. Pass an empty
++ * or NULL string if the object shall only occupy a registry slot for
++ * handle-based lookups. The slash character is not accepted in @a key
++ * if @a pnode is non-NULL.
++ *
++ * @param objaddr An opaque pointer to the object to index by @a
++ * key.
++ *
++ * @param phandle A pointer to a generic handle defined by the
++ * registry which will uniquely identify the indexed object, until the
++ * latter is unregistered using the xnregistry_remove() service.
++ *
++ * @param pnode A pointer to an optional /proc node class
++ * descriptor. This structure provides the information needed to
++ * export all objects from the given class through the /proc
++ * filesystem, under the /proc/xenomai/registry entry. Passing NULL
++ * indicates that no /proc support is available for the newly
++ * registered object.
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EINVAL is returned if @a objaddr is NULL.
++ *
++ * - -EINVAL if @a pnode is non-NULL, and @a key points to a valid
++ * string containing a '/' character.
++ *
++ * - -ENOMEM is returned if the system fails to get enough dynamic
++ * memory from the global real-time heap in order to register the
++ * object.
++ *
++ * - -EEXIST is returned if the @a key is already in use.
++ *
++ * @coretags{unrestricted, might-switch, atomic-entry}
++ */
++int xnregistry_enter(const char *key, void *objaddr,
++		     xnhandle_t *phandle, struct xnpnode *pnode)
++{
++	struct xnobject *object;
++	spl_t s;
++	int ret;
++
++	if (objaddr == NULL ||
++	    (pnode != NULL && key != NULL && strchr(key, '/')))
++		return -EINVAL;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(&free_object_list)) {
++		ret = -EAGAIN;
++		goto unlock_and_exit;
++	}
++
++	object = list_get_entry(&free_object_list, struct xnobject, link);
++	nr_active_objects++;
++	object->objaddr = objaddr;
++	object->cstamp = ++next_object_stamp;
++#ifdef CONFIG_XENO_OPT_VFILE
++	object->pnode = NULL;
++#endif
++	if (key == NULL || *key == '\0') {
++		object->key = NULL;
++		*phandle = object - registry_obj_slots;
++		ret = 0;
++		goto unlock_and_exit;
++	}
++
++	ret = registry_hash_enter(key, object);
++	if (ret) {
++		nr_active_objects--;
++		list_add_tail(&object->link, &free_object_list);
++		goto unlock_and_exit;
++	}
++
++	list_add_tail(&object->link, &busy_object_list);
++
++	/*
++	 * <!> Make sure the handle is written back before the
++	 * rescheduling takes place.
++	 */
++	*phandle = object - registry_obj_slots;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	if (pnode)
++		registry_export_pnode(object, pnode);
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	if (registry_wakeup_sleepers(key))
++		xnsched_run();
++
++unlock_and_exit:
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnregistry_enter);
++
++/**
++ * @fn int xnregistry_bind(const char *key,xnticks_t timeout,int timeout_mode,xnhandle_t *phandle)
++ * @brief Bind to a real-time object.
++ *
++ * This service retrieves the registry handle of a given object
++ * identified by its key. Unless otherwise specified, this service
++ * will block the caller if the object is not registered yet, waiting
++ * for such registration to occur.
++ *
++ * @param key A valid NULL-terminated string which identifies the
++ * object to bind to.
++ *
++ * @param timeout The timeout which may be used to limit the time the
++ * thread wait for the object to be registered. This value is a wait
++ * time given as a count of nanoseconds. It can either be relative,
++ * absolute monotonic (XN_ABSOLUTE), or absolute adjustable
++ * (XN_REALTIME) depending on @a timeout_mode. Passing XN_INFINITE @b
++ * and setting @a timeout_mode to XN_RELATIVE specifies an unbounded
++ * wait. Passing XN_NONBLOCK causes the service to return immediately
++ * without waiting if the object is not registered on entry. All other
++ * values are used as a wait limit.
++ *
++ * @param timeout_mode The mode of the @a timeout parameter. It can
++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also
++ * xntimer_start()).
++ *
++ * @param phandle A pointer to a memory location which will be written
++ * upon success with the generic handle defined by the registry for
++ * the retrieved object. Contents of this memory is undefined upon
++ * failure.
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -EINVAL is returned if @a key is NULL.
++ *
++ * - -EINTR is returned if xnthread_unblock() has been called for the
++ * waiting thread before the retrieval has completed.
++ *
++ * - -EWOULDBLOCK is returned if @a timeout is equal to XN_NONBLOCK
++ * and the searched object is not registered on entry. As a special
++ * exception, this error is also returned if this service should
++ * block, but was called from a context which cannot sleep
++ * (e.g. interrupt, non-realtime or scheduler locked).
++ *
++ * - -ETIMEDOUT is returned if the object cannot be retrieved within
++ * the specified amount of time.
++ *
++ * @coretags{primary-only, might-switch}
++ *
++ * @note xnregistry_bind() only returns the index portion of a handle,
++ * which might include other fixed bits to be complete
++ * (e.g. XNSYNCH_PSHARED). The caller is responsible for completing
++ * the handle returned with those bits if applicable, depending on the
++ * context.
++ */
++int xnregistry_bind(const char *key, xnticks_t timeout, int timeout_mode,
++		    xnhandle_t *phandle)
++{
++	struct registry_wait_context rwc;
++	struct xnobject *object;
++	int ret = 0, info;
++	spl_t s;
++
++	if (key == NULL)
++		return -EINVAL;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (timeout_mode == XN_RELATIVE &&
++	    timeout != XN_INFINITE && timeout != XN_NONBLOCK) {
++		timeout_mode = XN_ABSOLUTE;
++		timeout += xnclock_read_monotonic(&nkclock);
++	}
++
++	for (;;) {
++		object = registry_hash_find(key);
++		if (object) {
++			*phandle = object - registry_obj_slots;
++			goto unlock_and_exit;
++		}
++
++		if ((timeout_mode == XN_RELATIVE && timeout == XN_NONBLOCK) ||
++		    xnsched_unblockable_p()) {
++			ret = -EWOULDBLOCK;
++			goto unlock_and_exit;
++		}
++
++		rwc.key = key;
++		xnthread_prepare_wait(&rwc.wc);
++		info = xnsynch_sleep_on(&register_synch, timeout, timeout_mode);
++		if (info & XNTIMEO) {
++			ret = -ETIMEDOUT;
++			goto unlock_and_exit;
++		}
++		if (info & XNBREAK) {
++			ret = -EINTR;
++			goto unlock_and_exit;
++		}
++	}
++
++unlock_and_exit:
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnregistry_bind);
++
++/**
++ * @fn int xnregistry_remove(xnhandle_t handle)
++ * @brief Forcibly unregister a real-time object.
++ *
++ * This service forcibly removes an object from the registry. The
++ * removal is performed regardless of the current object's locking
++ * status.
++ *
++ * @param handle The generic handle of the object to remove.
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -ESRCH is returned if @a handle does not reference a registered
++ * object.
++ *
++ * @coretags{unrestricted}
++ */
++int xnregistry_remove(xnhandle_t handle)
++{
++	struct xnobject *object;
++	void *objaddr;
++	int ret = 0;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	object = xnregistry_validate(handle);
++	if (object == NULL) {
++		ret = -ESRCH;
++		goto unlock_and_exit;
++	}
++
++	objaddr = object->objaddr;
++	object->objaddr = NULL;
++	object->cstamp = 0;
++
++	if (object->key) {
++		registry_hash_remove(object);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++		if (object->pnode) {
++			if (object->vfilp == XNOBJECT_EXPORT_INPROGRESS) {
++				object->vfilp = XNOBJECT_EXPORT_ABORTED;
++				object->objaddr = objaddr;
++			}
++
++			registry_unexport_pnode(object);
++			/*
++			 * Leave the update of the object queues to
++			 * the work callback if it has been kicked.
++			 */
++			if (object->pnode) {
++				xnlock_put_irqrestore(&nklock, s);
++				if (ipipe_root_p)
++					flush_work(&registry_proc_work);
++				return 0;
++			}
++		}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++		list_del(&object->link);
++	}
++
++	if (!IS_ENABLED(CONFIG_XENO_OPT_VFILE) || !object->objaddr) {
++		list_add_tail(&object->link, &free_object_list);
++		nr_active_objects--;
++	}
++
++unlock_and_exit:
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnregistry_remove);
++
++/**
++ * Turn a named object into an anonymous object
++ *
++ * @coretags{unrestricted}
++ */
++int xnregistry_unlink(const char *key)
++{
++	struct xnobject *object;
++	int ret = 0;
++	spl_t s;
++
++	if (key == NULL)
++		return -EINVAL;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	object = registry_hash_find(key);
++	if (object == NULL) {
++		ret = -ESRCH;
++		goto unlock_and_exit;
++	}
++
++	ret = registry_hash_remove(object);
++	if (ret < 0)
++		goto unlock_and_exit;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	if (object->pnode) {
++		registry_unexport_pnode(object);
++		/*
++		 * Leave the update of the object queues to
++		 * the work callback if it has been kicked.
++		 */
++		if (object->pnode)
++			goto unlock_and_exit;
++	}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	list_del(&object->link);
++
++	object->key = NULL;
++
++unlock_and_exit:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++
++/**
++ * @fn void *xnregistry_lookup(xnhandle_t handle, unsigned long *cstamp_r)
++ * @brief Find a real-time object into the registry.
++ *
++ * This service retrieves an object from its handle into the registry
++ * and returns the memory address of its descriptor. Optionally, it
++ * also copies back the object's creation stamp which is unique across
++ * object registration calls.
++ *
++ * @param handle The generic handle of the object to fetch.
++ *
++ * @param cstamp_r If not-NULL, the object's creation stamp will be
++ * copied to this memory area.
++ *
++ * @return The memory address of the object's descriptor is returned
++ * on success. Otherwise, NULL is returned if @a handle does not
++ * reference a registered object.
++ *
++ * @coretags{unrestricted}
++ */
++
++/** @} */
+--- linux/kernel/xenomai/map.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/map.c	2021-04-07 16:01:25.843636144 +0800
+@@ -0,0 +1,265 @@
++/*
++ * Copyright (C) 2007 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/map.h>
++#include <asm/xenomai/machine.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_map Lightweight key-to-object mapping service
++ *
++ * A map is a simple indexing structure which associates unique
++ * integer keys with pointers to objects.  The current implementation
++ * supports reservation, for naming/indexing objects, either on a
++ * fixed, user-provided integer (i.e. a reserved key value), or by
++ * drawing the next available key internally if the caller did not
++ * specify any fixed key. For instance, in some given map, the key
++ * space ranging from 0 to 255 could be reserved for fixed keys,
++ * whilst the range from 256 to 511 could be available for drawing
++ * free keys dynamically.
++ *
++ * A maximum of 1024 unique keys per map is supported on 32bit
++ * machines.
++ *
++ * (This implementation should not be confused with C++ STL maps,
++ * which are dynamically expandable and allow arbitrary key types;
++ * Xenomai maps don't).
++ *
++ * @{
++ */
++
++/**
++ * @fn void xnmap_create(int nkeys, int reserve, int offset)
++ * @brief Create a map.
++ *
++ * Allocates a new map with the specified addressing capabilities. The
++ * memory is obtained from the Xenomai system heap.
++ *
++ * @param nkeys The maximum number of unique keys the map will be able
++ * to hold. This value cannot exceed the static limit represented by
++ * XNMAP_MAX_KEYS, and must be a power of two.
++ *
++ * @param reserve The number of keys which should be kept for
++ * reservation within the index space. Reserving a key means to
++ * specify a valid key to the xnmap_enter() service, which will then
++ * attempt to register this exact key, instead of drawing the next
++ * available key from the unreserved index space. When reservation is
++ * in effect, the unreserved index space will hold key values greater
++ * than @a reserve, keeping the low key values for the reserved space.
++ * For instance, passing @a reserve = 32 would cause the index range [
++ * 0 .. 31 ] to be kept for reserved keys.  When non-zero, @a reserve
++ * is rounded to the next multiple of BITS_PER_LONG. If @a reserve is
++ * zero no reservation will be available from the map.
++ *
++ * @param offset The lowest key value xnmap_enter() will return to the
++ * caller. Key values will be in the range [ 0 + offset .. @a nkeys +
++ * offset - 1 ]. Negative offsets are valid.
++ *
++ * @return the address of the new map is returned on success;
++ * otherwise, NULL is returned if @a nkeys is invalid.
++ *
++ * @coretags{task-unrestricted}
++ */
++struct xnmap *xnmap_create(int nkeys, int reserve, int offset)
++{
++	struct xnmap *map;
++	int mapsize;
++
++	if (nkeys <= 0 || (nkeys & (nkeys - 1)) != 0)
++		return NULL;
++
++	mapsize = sizeof(*map) + (nkeys - 1) * sizeof(map->objarray[0]);
++	map = xnmalloc(mapsize);
++
++	if (!map)
++		return NULL;
++
++	map->ukeys = 0;
++	map->nkeys = nkeys;
++	map->offset = offset;
++	map->himask = (1 << ((reserve + BITS_PER_LONG - 1) / BITS_PER_LONG)) - 1;
++	map->himap = ~0;
++	memset(map->lomap, ~0, sizeof(map->lomap));
++	memset(map->objarray, 0, sizeof(map->objarray[0]) * nkeys);
++
++	return map;
++}
++EXPORT_SYMBOL_GPL(xnmap_create);
++
++/**
++ * @fn void xnmap_delete(struct xnmap *map)
++ * @brief Delete a map.
++ *
++ * Deletes a map, freeing any associated memory back to the Xenomai
++ * system heap.
++ *
++ * @param map The address of the map to delete.
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnmap_delete(struct xnmap *map)
++{
++	xnfree(map);
++}
++EXPORT_SYMBOL_GPL(xnmap_delete);
++
++/**
++ * @fn void xnmap_enter(struct xnmap *map, int key, void *objaddr)
++ * @brief Index an object into a map.
++ *
++ * Insert a new object into the given map.
++ *
++ * @param map The address of the map to insert into.
++ *
++ * @param key The key to index the object on. If this key is within
++ * the valid index range [ 0 - offset .. nkeys - offset - 1 ], then an
++ * attempt to reserve this exact key is made. If @a key has an
++ * out-of-range value lower or equal to 0 - offset - 1, then an
++ * attempt is made to draw a free key from the unreserved index space.
++ *
++ * @param objaddr The address of the object to index on the key. This
++ * value will be returned by a successful call to xnmap_fetch() with
++ * the same key.
++ *
++ * @return a valid key is returned on success, either @a key if
++ * reserved, or the next free key. Otherwise:
++ *
++ * - -EEXIST is returned upon attempt to reserve a busy key.
++ *
++ * - -ENOSPC when no more free key is available.
++ *
++ * @coretags{unrestricted}
++ */
++int xnmap_enter(struct xnmap *map, int key, void *objaddr)
++{
++	int hi, lo, ofkey = key - map->offset;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (ofkey >= 0 && ofkey < map->nkeys) {
++		if (map->objarray[ofkey] != NULL) {
++			key = -EEXIST;
++			goto unlock_and_exit;
++		}
++	} else if (map->ukeys >= map->nkeys) {
++		key = -ENOSPC;
++		goto unlock_and_exit;
++	}
++	else {
++		/* The himask implements a namespace reservation of
++		   half of the bitmap space which cannot be used to
++		   draw keys. */
++
++		hi = ffnz(map->himap & ~map->himask);
++		lo = ffnz(map->lomap[hi]);
++		ofkey = hi * BITS_PER_LONG + lo;
++		++map->ukeys;
++
++		map->lomap[hi] &= ~(1UL << lo);
++		if (map->lomap[hi] == 0)
++			map->himap &= ~(1UL << hi);
++	}
++
++	map->objarray[ofkey] = objaddr;
++
++      unlock_and_exit:
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ofkey + map->offset;
++}
++EXPORT_SYMBOL_GPL(xnmap_enter);
++
++/**
++ * @fn void xnmap_remove(struct xnmap *map, int key)
++ * @brief Remove an object reference from a map.
++ *
++ * Removes an object reference from the given map, releasing the
++ * associated key.
++ *
++ * @param map The address of the map to remove from.
++ *
++ * @param key The key the object reference to be removed is indexed
++ * on.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ESRCH is returned if @a key is invalid.
++ *
++ * @coretags{unrestricted}
++ */
++int xnmap_remove(struct xnmap *map, int key)
++{
++	int ofkey = key - map->offset, hi, lo;
++	spl_t s;
++
++	if (ofkey < 0 || ofkey >= map->nkeys)
++		return -ESRCH;
++
++	hi = ofkey / BITS_PER_LONG;
++	lo = ofkey % BITS_PER_LONG;
++	xnlock_get_irqsave(&nklock, s);
++	map->objarray[ofkey] = NULL;
++	map->himap |= (1UL << hi);
++	map->lomap[hi] |= (1UL << lo);
++	--map->ukeys;
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnmap_remove);
++
++/**
++ * @fn void xnmap_fetch(struct xnmap *map, int key)
++ * @brief Search an object into a map.
++ *
++ * Retrieve an object reference from the given map by its index key.
++ *
++ * @param map The address of the map to retrieve from.
++ *
++ * @param key The key to be searched for in the map index.
++ *
++ * @return The indexed object address is returned on success,
++ * otherwise NULL is returned when @a key is invalid or no object is
++ * currently indexed on it.
++ *
++ * @coretags{unrestricted}
++ */
++
++/**
++ * @fn void xnmap_fetch_nocheck(struct xnmap *map, int key)
++ * @brief Search an object into a map - unchecked form.
++ *
++ * Retrieve an object reference from the given map by its index key,
++ * but does not perform any sanity check on the provided key.
++ *
++ * @param map The address of the map to retrieve from.
++ *
++ * @param key The key to be searched for in the map index.
++ *
++ * @return The indexed object address is returned on success,
++ * otherwise NULL is returned when no object is currently indexed on
++ * @a key.
++ *
++ * @coretags{unrestricted}
++ */
++
++/** @} */
+--- linux/kernel/xenomai/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/Makefile	2021-04-07 16:01:25.839636149 +0800
+@@ -0,0 +1,28 @@
++obj-$(CONFIG_XENOMAI) += xenomai.o rtdm/ posix/
++
++xenomai-y :=	apc.o		\
++		arith.o 	\
++		bufd.o		\
++		clock.o		\
++		heap.o		\
++		init.o		\
++		intr.o		\
++		lock.o		\
++		registry.o	\
++		sched-idle.o	\
++		sched-rt.o	\
++		sched.o		\
++		select.o	\
++		synch.o		\
++		thread.o	\
++		timer.o		\
++		tree.o
++
++xenomai-$(CONFIG_XENO_OPT_SCHED_QUOTA) += sched-quota.o
++xenomai-$(CONFIG_XENO_OPT_SCHED_WEAK) += sched-weak.o
++xenomai-$(CONFIG_XENO_OPT_SCHED_SPORADIC) += sched-sporadic.o
++xenomai-$(CONFIG_XENO_OPT_SCHED_TP) += sched-tp.o
++xenomai-$(CONFIG_XENO_OPT_DEBUG) += debug.o
++xenomai-$(CONFIG_XENO_OPT_PIPE) += pipe.o
++xenomai-$(CONFIG_XENO_OPT_MAP) += map.o
++xenomai-$(CONFIG_PROC_FS) += vfile.o procfs.o
+--- linux/kernel/xenomai/pipe.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/pipe.c	2021-04-07 16:01:25.834636157 +0800
+@@ -0,0 +1,1178 @@
++/*
++ * Copyright (C) 2001,2002,2003,2004 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2005 Dmitry Adamushko <dmitry.adamushko@gmail.com>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA
++ * 02139, USA; either version 2 of the License, or (at your option)
++ * any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/fcntl.h>
++#include <linux/poll.h>
++#include <linux/termios.h>
++#include <linux/spinlock.h>
++#include <linux/device.h>
++#include <linux/uaccess.h>
++#include <asm/io.h>
++#include <asm/xenomai/syscall.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/pipe.h>
++#include <cobalt/kernel/apc.h>
++
++static int xnpipe_asyncsig = SIGIO;
++
++struct xnpipe_state xnpipe_states[XNPIPE_NDEVS];
++EXPORT_SYMBOL_GPL(xnpipe_states);
++
++#define XNPIPE_BITMAP_SIZE	((XNPIPE_NDEVS + BITS_PER_LONG - 1) / BITS_PER_LONG)
++
++static unsigned long xnpipe_bitmap[XNPIPE_BITMAP_SIZE];
++
++static LIST_HEAD(xnpipe_sleepq);
++
++static LIST_HEAD(xnpipe_asyncq);
++
++int xnpipe_wakeup_apc;
++
++static struct class *xnpipe_class;
++
++/* Allocation of minor values */
++
++static inline int xnpipe_minor_alloc(int minor)
++{
++	spl_t s;
++
++	if ((minor < 0 && minor != XNPIPE_MINOR_AUTO) || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (minor == XNPIPE_MINOR_AUTO)
++		minor = find_first_zero_bit(xnpipe_bitmap, XNPIPE_NDEVS);
++
++	if (minor == XNPIPE_NDEVS ||
++	    (xnpipe_bitmap[minor / BITS_PER_LONG] &
++	     (1UL << (minor % BITS_PER_LONG))))
++		minor = -EBUSY;
++	else
++		xnpipe_bitmap[minor / BITS_PER_LONG] |=
++			(1UL << (minor % BITS_PER_LONG));
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return minor;
++}
++
++static inline void xnpipe_minor_free(int minor)
++{
++	xnpipe_bitmap[minor / BITS_PER_LONG] &=
++		~(1UL << (minor % BITS_PER_LONG));
++}
++
++static inline void xnpipe_enqueue_wait(struct xnpipe_state *state, int mask)
++{
++	if (state->wcount != 0x7fffffff && state->wcount++ == 0)
++		list_add_tail(&state->slink, &xnpipe_sleepq);
++
++	state->status |= mask;
++}
++
++static inline void xnpipe_dequeue_wait(struct xnpipe_state *state, int mask)
++{
++	if (state->status & mask)
++		if (--state->wcount == 0) {
++			list_del(&state->slink);
++			state->status &= ~mask;
++		}
++}
++
++static inline void xnpipe_dequeue_all(struct xnpipe_state *state, int mask)
++{
++	if (state->status & mask) {
++		if (state->wcount) {
++			state->wcount = 0;
++			list_del(&state->slink);
++			state->status &= ~mask;
++		}
++	}
++}
++
++/* Must be entered with nklock held, interrupts off. */
++#define xnpipe_wait(__state, __mask, __s, __cond)			\
++({									\
++	wait_queue_head_t *__waitq;					\
++	DEFINE_WAIT(__wait);						\
++	int __sigpending;						\
++									\
++	if ((__mask) & XNPIPE_USER_WREAD)				\
++		__waitq = &(__state)->readq;				\
++	else								\
++		__waitq = &(__state)->syncq;				\
++									\
++	xnpipe_enqueue_wait(__state, __mask);				\
++	xnlock_put_irqrestore(&nklock, __s);				\
++									\
++	for (;;) {							\
++		__sigpending = signal_pending(current);			\
++		if (__sigpending)					\
++			break;						\
++		prepare_to_wait_exclusive(__waitq, &__wait, TASK_INTERRUPTIBLE); \
++		if (__cond || (__state)->status & XNPIPE_KERN_LCLOSE)	\
++			break;						\
++		schedule();						\
++	}								\
++									\
++	finish_wait(__waitq, &__wait);					\
++									\
++	/* Restore the interrupt state initially set by the caller. */	\
++	xnlock_get_irqsave(&nklock, __s);				\
++	xnpipe_dequeue_wait(__state, __mask);				\
++									\
++	__sigpending;							\
++})
++
++static void xnpipe_wakeup_proc(void *cookie)
++{
++	struct xnpipe_state *state;
++	unsigned long rbits;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/*
++	 * NOTE: sleepers might enter/leave the queue while we don't
++	 * hold the nklock in these wakeup loops. So we iterate over
++	 * each sleeper list until we find no more candidate for
++	 * wakeup after an entire scan, redoing the scan from the list
++	 * head otherwise.
++	 */
++	for (;;) {
++		if (list_empty(&xnpipe_sleepq))
++			goto check_async;
++
++		state = list_first_entry(&xnpipe_sleepq, struct xnpipe_state, slink);
++
++		for (;;) {
++			rbits = state->status & XNPIPE_USER_ALL_READY;
++			if (rbits)
++				break;
++			if (list_is_last(&state->slink, &xnpipe_sleepq))
++				goto check_async;
++			state = list_next_entry(state, slink);
++		}
++
++		state->status &= ~rbits;
++
++		if ((rbits & XNPIPE_USER_WREAD_READY) != 0) {
++			if (waitqueue_active(&state->readq)) {
++				xnlock_put_irqrestore(&nklock, s);
++				wake_up_interruptible(&state->readq);
++				xnlock_get_irqsave(&nklock, s);
++			}
++		}
++		if ((rbits & XNPIPE_USER_WSYNC_READY) != 0) {
++			if (waitqueue_active(&state->syncq)) {
++				xnlock_put_irqrestore(&nklock, s);
++				wake_up_interruptible(&state->syncq);
++				xnlock_get_irqsave(&nklock, s);
++			}
++		}
++	}
++
++check_async:
++	/*
++	 * Scan the async queue, sending the proper signal to
++	 * subscribers.
++	 */
++	for (;;) {
++		if (list_empty(&xnpipe_asyncq))
++			goto out;
++
++		state = list_first_entry(&xnpipe_asyncq, struct xnpipe_state, alink);
++
++		for (;;) {
++			if (state->status & XNPIPE_USER_SIGIO)
++				break;
++			if (list_is_last(&state->alink, &xnpipe_asyncq))
++				goto out;
++			state = list_next_entry(state, alink);
++		}
++
++		state->status &= ~XNPIPE_USER_SIGIO;
++		xnlock_put_irqrestore(&nklock, s);
++		kill_fasync(&state->asyncq, xnpipe_asyncsig, POLL_IN);
++		xnlock_get_irqsave(&nklock, s);
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++static inline void xnpipe_schedule_request(void) /* hw IRQs off */
++{
++	__xnapc_schedule(xnpipe_wakeup_apc);
++}
++
++static inline ssize_t xnpipe_flush_bufq(void (*fn)(void *buf, void *xstate),
++					struct list_head *q,
++					void *xstate)
++{
++	struct xnpipe_mh *mh, *tmp;
++	ssize_t n = 0;
++
++	if (list_empty(q))
++		return 0;
++
++	/* Queue is private, no locking is required. */
++	list_for_each_entry_safe(mh, tmp, q, link) {
++		list_del(&mh->link);
++		n += xnpipe_m_size(mh);
++		fn(mh, xstate);
++	}
++
++	/* Return the overall count of bytes flushed. */
++	return n;
++}
++
++/*
++ * Move the specified queue contents to a private queue, then call the
++ * flush handler to purge it. The latter runs without locking.
++ * Returns the number of bytes flushed. Must be entered with nklock
++ * held, interrupts off.
++ */
++#define xnpipe_flushq(__state, __q, __f, __s)				\
++({									\
++	LIST_HEAD(__privq);						\
++	ssize_t __n;							\
++									\
++	list_splice_init(&(state)->__q, &__privq);			\
++	(__state)->nr ## __q = 0;					\
++	xnlock_put_irqrestore(&nklock, (__s));				\
++	__n = xnpipe_flush_bufq((__state)->ops.__f, &__privq, (__state)->xstate);	\
++	xnlock_get_irqsave(&nklock, (__s));				\
++									\
++	__n;								\
++})
++
++static void *xnpipe_default_alloc_ibuf(size_t size, void *xstate)
++{
++	void *buf;
++
++	buf = xnmalloc(size);
++	if (likely(buf != NULL))
++		return buf;
++
++	if (size > xnheap_get_size(&cobalt_heap))
++		/* Request will never succeed. */
++		return (struct xnpipe_mh *)-1;
++
++	return NULL;
++}
++
++static void xnpipe_default_free_ibuf(void *buf, void *xstate)
++{
++	xnfree(buf);
++}
++
++static void xnpipe_default_release(void *xstate)
++{
++}
++
++static inline int xnpipe_set_ops(struct xnpipe_state *state,
++				 struct xnpipe_operations *ops)
++{
++	state->ops = *ops;
++
++	if (ops->free_obuf == NULL)
++		/*
++		 * Caller must provide a way to free unread outgoing
++		 * buffers.
++		 */
++		return -EINVAL;
++
++	/* Set some default handlers for common usage. */
++	if (ops->alloc_ibuf == NULL)
++		state->ops.alloc_ibuf = xnpipe_default_alloc_ibuf;
++	if (ops->free_ibuf == NULL)
++		state->ops.free_ibuf = xnpipe_default_free_ibuf;
++	if (ops->release == NULL)
++		state->ops.release = xnpipe_default_release;
++
++	return 0;
++}
++
++int xnpipe_connect(int minor, struct xnpipe_operations *ops, void *xstate)
++{
++	struct xnpipe_state *state;
++	int need_sched = 0, ret;
++	spl_t s;
++
++	minor = xnpipe_minor_alloc(minor);
++	if (minor < 0)
++		return minor;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	ret = xnpipe_set_ops(state, ops);
++	if (ret) {
++		xnlock_put_irqrestore(&nklock, s);
++		return ret;
++	}
++
++	state->status |= XNPIPE_KERN_CONN;
++	xnsynch_init(&state->synchbase, XNSYNCH_FIFO, NULL);
++	state->xstate = xstate;
++	state->ionrd = 0;
++
++	if (state->status & XNPIPE_USER_CONN) {
++		if (state->status & XNPIPE_USER_WREAD) {
++			/*
++			 * Wake up the regular Linux task waiting for
++			 * the kernel side to connect (xnpipe_open).
++			 */
++			state->status |= XNPIPE_USER_WREAD_READY;
++			need_sched = 1;
++		}
++
++		if (state->asyncq) {	/* Schedule asynch sig. */
++			state->status |= XNPIPE_USER_SIGIO;
++			need_sched = 1;
++		}
++	}
++
++	if (need_sched)
++		xnpipe_schedule_request();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return minor;
++}
++EXPORT_SYMBOL_GPL(xnpipe_connect);
++
++int xnpipe_disconnect(int minor)
++{
++	struct xnpipe_state *state;
++	int need_sched = 0;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBADF;
++	}
++
++	state->status &= ~XNPIPE_KERN_CONN;
++
++	state->ionrd -= xnpipe_flushq(state, outq, free_obuf, s);
++
++	if ((state->status & XNPIPE_USER_CONN) == 0)
++		goto cleanup;
++
++	xnpipe_flushq(state, inq, free_ibuf, s);
++
++	if (xnsynch_destroy(&state->synchbase) == XNSYNCH_RESCHED)
++		xnsched_run();
++
++	if (state->status & XNPIPE_USER_WREAD) {
++		/*
++		 * Wake up the regular Linux task waiting for some
++		 * operation from the Xenomai side (read/write or
++		 * poll).
++		 */
++		state->status |= XNPIPE_USER_WREAD_READY;
++		need_sched = 1;
++	}
++
++	if (state->asyncq) {	/* Schedule asynch sig. */
++		state->status |= XNPIPE_USER_SIGIO;
++		need_sched = 1;
++	}
++
++cleanup:
++	/*
++	 * If xnpipe_release() has not fully run, enter lingering
++	 * close. This will prevent the extra state from being wiped
++	 * out until then.
++	 */
++	if (state->status & XNPIPE_USER_CONN)
++		state->status |= XNPIPE_KERN_LCLOSE;
++	else {
++		xnlock_put_irqrestore(&nklock, s);
++		state->ops.release(state->xstate);
++		xnlock_get_irqsave(&nklock, s);
++		xnpipe_minor_free(minor);
++	}
++
++	if (need_sched)
++		xnpipe_schedule_request();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnpipe_disconnect);
++
++ssize_t xnpipe_send(int minor, struct xnpipe_mh *mh, size_t size, int flags)
++{
++	struct xnpipe_state *state;
++	int need_sched = 0;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	if (size <= sizeof(*mh))
++		return -EINVAL;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBADF;
++	}
++
++	xnpipe_m_size(mh) = size - sizeof(*mh);
++	xnpipe_m_rdoff(mh) = 0;
++	state->ionrd += xnpipe_m_size(mh);
++
++	if (flags & XNPIPE_URGENT)
++		list_add(&mh->link, &state->outq);
++	else
++		list_add_tail(&mh->link, &state->outq);
++
++	state->nroutq++;
++
++	if ((state->status & XNPIPE_USER_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return (ssize_t) size;
++	}
++
++	if (state->status & XNPIPE_USER_WREAD) {
++		/*
++		 * Wake up the regular Linux task waiting for input
++		 * from the Xenomai side.
++		 */
++		state->status |= XNPIPE_USER_WREAD_READY;
++		need_sched = 1;
++	}
++
++	if (state->asyncq) {	/* Schedule asynch sig. */
++		state->status |= XNPIPE_USER_SIGIO;
++		need_sched = 1;
++	}
++
++	if (need_sched)
++		xnpipe_schedule_request();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return (ssize_t) size;
++}
++EXPORT_SYMBOL_GPL(xnpipe_send);
++
++ssize_t xnpipe_mfixup(int minor, struct xnpipe_mh *mh, ssize_t size)
++{
++	struct xnpipe_state *state;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	if (size < 0)
++		return -EINVAL;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBADF;
++	}
++
++	xnpipe_m_size(mh) += size;
++	state->ionrd += size;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return (ssize_t) size;
++}
++EXPORT_SYMBOL_GPL(xnpipe_mfixup);
++
++ssize_t xnpipe_recv(int minor, struct xnpipe_mh **pmh, xnticks_t timeout)
++{
++	struct xnpipe_state *state;
++	struct xnpipe_mh *mh;
++	xntmode_t mode;
++	ssize_t ret;
++	int info;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	if (xnsched_interrupt_p())
++		return -EPERM;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		ret = -EBADF;
++		goto unlock_and_exit;
++	}
++
++	/*
++	 * If we received a relative timespec, rescale it to an
++	 * absolute time value based on the monotonic clock.
++	 */
++	mode = XN_RELATIVE;
++	if (timeout != XN_NONBLOCK && timeout != XN_INFINITE) {
++		mode = XN_ABSOLUTE;
++		timeout += xnclock_read_monotonic(&nkclock);
++	}
++
++	for (;;) {
++		if (!list_empty(&state->inq))
++			break;
++
++		if (timeout == XN_NONBLOCK) {
++			ret = -EWOULDBLOCK;
++			goto unlock_and_exit;
++		}
++
++		info = xnsynch_sleep_on(&state->synchbase, timeout, mode);
++		if (info & XNTIMEO) {
++			ret = -ETIMEDOUT;
++			goto unlock_and_exit;
++		}
++		if (info & XNBREAK) {
++			ret = -EINTR;
++			goto unlock_and_exit;
++		}
++		if (info & XNRMID) {
++			ret = -EIDRM;
++			goto unlock_and_exit;
++		}
++	}
++
++	mh = list_get_entry(&state->inq, struct xnpipe_mh, link);
++	*pmh = mh;
++	state->nrinq--;
++	ret = (ssize_t)xnpipe_m_size(mh);
++
++	if (state->status & XNPIPE_USER_WSYNC) {
++		state->status |= XNPIPE_USER_WSYNC_READY;
++		xnpipe_schedule_request();
++	}
++
++unlock_and_exit:
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnpipe_recv);
++
++int xnpipe_flush(int minor, int mode)
++{
++	struct xnpipe_state *state;
++	int msgcount;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBADF;
++	}
++
++	msgcount = state->nroutq + state->nrinq;
++
++	if (mode & XNPIPE_OFLUSH)
++		state->ionrd -= xnpipe_flushq(state, outq, free_obuf, s);
++
++	if (mode & XNPIPE_IFLUSH)
++		xnpipe_flushq(state, inq, free_ibuf, s);
++
++	if ((state->status & XNPIPE_USER_WSYNC) &&
++	    msgcount > state->nroutq + state->nrinq) {
++		state->status |= XNPIPE_USER_WSYNC_READY;
++		xnpipe_schedule_request();
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnpipe_flush);
++
++int xnpipe_pollstate(int minor, unsigned int *mask_r)
++{
++	struct xnpipe_state *state;
++	int ret = 0;
++	spl_t s;
++
++	if (minor < 0 || minor >= XNPIPE_NDEVS)
++		return -ENODEV;
++
++	state = xnpipe_states + minor;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (state->status & XNPIPE_KERN_CONN) {
++		*mask_r = POLLOUT;
++		if (!list_empty(&state->inq))
++			*mask_r |= POLLIN;
++	} else
++		ret = -EIO;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnpipe_pollstate);
++
++/* Must be entered with nklock held, interrupts off. */
++#define xnpipe_cleanup_user_conn(__state, __s)				\
++	do {								\
++		xnpipe_flushq((__state), outq, free_obuf, (__s));	\
++		xnpipe_flushq((__state), inq, free_ibuf, (__s));	\
++		(__state)->status &= ~XNPIPE_USER_CONN;			\
++		if ((__state)->status & XNPIPE_KERN_LCLOSE) {		\
++			(__state)->status &= ~XNPIPE_KERN_LCLOSE;	\
++			xnlock_put_irqrestore(&nklock, (__s));		\
++			(__state)->ops.release((__state)->xstate);	\
++			xnlock_get_irqsave(&nklock, (__s));		\
++			xnpipe_minor_free(xnminor_from_state(__state));	\
++		}							\
++	} while(0)
++
++/*
++ * Open the pipe from user-space.
++ */
++
++static int xnpipe_open(struct inode *inode, struct file *file)
++{
++	int minor, err = 0, sigpending;
++	struct xnpipe_state *state;
++	spl_t s;
++
++	minor = MINOR(inode->i_rdev);
++
++	if (minor >= XNPIPE_NDEVS)
++		return -ENXIO;	/* TssTss... stop playing with mknod() ;o) */
++
++	state = &xnpipe_states[minor];
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/* Enforce exclusive open for the message queues. */
++	if (state->status & (XNPIPE_USER_CONN | XNPIPE_USER_LCONN)) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBUSY;
++	}
++
++	state->status |= XNPIPE_USER_LCONN;
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	file->private_data = state;
++	init_waitqueue_head(&state->readq);
++	init_waitqueue_head(&state->syncq);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	state->status |= XNPIPE_USER_CONN;
++	state->status &= ~XNPIPE_USER_LCONN;
++	state->wcount = 0;
++
++	state->status &=
++		~(XNPIPE_USER_ALL_WAIT | XNPIPE_USER_ALL_READY |
++		  XNPIPE_USER_SIGIO);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		if (file->f_flags & O_NONBLOCK) {
++			xnpipe_cleanup_user_conn(state, s);
++			xnlock_put_irqrestore(&nklock, s);
++			return -EWOULDBLOCK;
++		}
++
++		sigpending = xnpipe_wait(state, XNPIPE_USER_WREAD, s,
++					 state->status & XNPIPE_KERN_CONN);
++		if (sigpending) {
++			xnpipe_cleanup_user_conn(state, s);
++			xnlock_put_irqrestore(&nklock, s);
++			return -ERESTARTSYS;
++		}
++	}
++
++	if (err)
++		xnpipe_cleanup_user_conn(state, s);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err;
++}
++
++static int xnpipe_release(struct inode *inode, struct file *file)
++{
++	struct xnpipe_state *state = file->private_data;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	xnpipe_dequeue_all(state, XNPIPE_USER_WREAD);
++	xnpipe_dequeue_all(state, XNPIPE_USER_WSYNC);
++
++	if (state->status & XNPIPE_KERN_CONN) {
++		/* Unblock waiters. */
++		if (xnsynch_pended_p(&state->synchbase)) {
++			xnsynch_flush(&state->synchbase, XNRMID);
++			xnsched_run();
++		}
++	}
++
++	if (state->ops.input)
++		state->ops.input(NULL, -EPIPE, state->xstate);
++
++	if (state->asyncq) {	/* Clear the async queue */
++		list_del(&state->alink);
++		state->status &= ~XNPIPE_USER_SIGIO;
++		xnlock_put_irqrestore(&nklock, s);
++		fasync_helper(-1, file, 0, &state->asyncq);
++		xnlock_get_irqsave(&nklock, s);
++	}
++
++	xnpipe_cleanup_user_conn(state, s);
++	/*
++	 * The extra state may not be available from now on, if
++	 * xnpipe_disconnect() entered lingering close before we got
++	 * there; so calling xnpipe_cleanup_user_conn() should be the
++	 * last thing we do.
++	 */
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++
++static ssize_t xnpipe_read(struct file *file,
++			   char *buf, size_t count, loff_t *ppos)
++{
++	struct xnpipe_state *state = file->private_data;
++	int sigpending, err = 0;
++	size_t nbytes, inbytes;
++	struct xnpipe_mh *mh;
++	ssize_t ret;
++	spl_t s;
++
++	if (!access_wok(buf, count))
++		return -EFAULT;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EPIPE;
++	}
++	/*
++	 * Queue probe and proc enqueuing must be seen atomically,
++	 * including from the Xenomai side.
++	 */
++	if (list_empty(&state->outq)) {
++		if (file->f_flags & O_NONBLOCK) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EWOULDBLOCK;
++		}
++
++		sigpending = xnpipe_wait(state, XNPIPE_USER_WREAD, s,
++					 !list_empty(&state->outq));
++
++		if (list_empty(&state->outq)) {
++			xnlock_put_irqrestore(&nklock, s);
++			return sigpending ? -ERESTARTSYS : 0;
++		}
++	}
++
++	mh = list_get_entry(&state->outq, struct xnpipe_mh, link);
++	state->nroutq--;
++
++	/*
++	 * We allow more data to be appended to the current message
++	 * bucket while its contents is being copied to the user
++	 * buffer, therefore, we need to loop until: 1) all the data
++	 * has been copied, 2) we consumed the user buffer space
++	 * entirely.
++	 */
++
++	inbytes = 0;
++
++	for (;;) {
++		nbytes = xnpipe_m_size(mh) - xnpipe_m_rdoff(mh);
++
++		if (nbytes + inbytes > count)
++			nbytes = count - inbytes;
++
++		if (nbytes == 0)
++			break;
++
++		xnlock_put_irqrestore(&nklock, s);
++
++		/* More data could be appended while doing this: */
++		err = __copy_to_user(buf + inbytes,
++				     xnpipe_m_data(mh) + xnpipe_m_rdoff(mh),
++				     nbytes);
++
++		xnlock_get_irqsave(&nklock, s);
++
++		if (err) {
++			err = -EFAULT;
++			break;
++		}
++
++		inbytes += nbytes;
++		xnpipe_m_rdoff(mh) += nbytes;
++	}
++
++	state->ionrd -= inbytes;
++	ret = inbytes;
++
++	if (xnpipe_m_size(mh) > xnpipe_m_rdoff(mh)) {
++		list_add(&mh->link, &state->outq);
++		state->nroutq++;
++	} else {
++		/*
++		 * We always want to fire the output handler because
++		 * whatever the error state is for userland (e.g
++		 * -EFAULT), we did pull a message from our output
++		 * queue.
++		 */
++		if (state->ops.output)
++			state->ops.output(mh, state->xstate);
++		xnlock_put_irqrestore(&nklock, s);
++		state->ops.free_obuf(mh, state->xstate);
++		xnlock_get_irqsave(&nklock, s);
++		if (state->status & XNPIPE_USER_WSYNC) {
++			state->status |= XNPIPE_USER_WSYNC_READY;
++			xnpipe_schedule_request();
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return err ? : ret;
++}
++
++static ssize_t xnpipe_write(struct file *file,
++			    const char *buf, size_t count, loff_t *ppos)
++{
++	struct xnpipe_state *state = file->private_data;
++	struct xnpipe_mh *mh;
++	int pollnum, ret;
++	spl_t s;
++
++	if (count == 0)
++		return 0;
++
++	if (!access_rok(buf, count))
++		return -EFAULT;
++
++	xnlock_get_irqsave(&nklock, s);
++
++retry:
++	if ((state->status & XNPIPE_KERN_CONN) == 0) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EPIPE;
++	}
++
++	pollnum = state->nrinq + state->nroutq;
++	xnlock_put_irqrestore(&nklock, s);
++
++	mh = state->ops.alloc_ibuf(count + sizeof(*mh), state->xstate);
++	if (mh == (struct xnpipe_mh *)-1)
++		return -ENOMEM;
++
++	if (mh == NULL) {
++		if (file->f_flags & O_NONBLOCK)
++			return -EWOULDBLOCK;
++
++		xnlock_get_irqsave(&nklock, s);
++		if (xnpipe_wait(state, XNPIPE_USER_WSYNC, s,
++				pollnum > state->nrinq + state->nroutq)) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -ERESTARTSYS;
++		}
++		goto retry;
++	}
++
++	xnpipe_m_size(mh) = count;
++	xnpipe_m_rdoff(mh) = 0;
++
++	if (copy_from_user(xnpipe_m_data(mh), buf, count)) {
++		state->ops.free_ibuf(mh, state->xstate);
++		return -EFAULT;
++	}
++
++	xnlock_get_irqsave(&nklock, s);
++
++	list_add_tail(&mh->link, &state->inq);
++	state->nrinq++;
++
++	/* Wake up a Xenomai sleeper if any. */
++	if (xnsynch_wakeup_one_sleeper(&state->synchbase))
++		xnsched_run();
++
++	if (state->ops.input) {
++		ret = state->ops.input(mh, 0, state->xstate);
++		if (ret)
++			count = (size_t)ret;
++	}
++
++	if (file->f_flags & O_SYNC) {
++		if (!list_empty(&state->inq)) {
++			if (xnpipe_wait(state, XNPIPE_USER_WSYNC, s,
++					list_empty(&state->inq)))
++				count = -ERESTARTSYS;
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return (ssize_t)count;
++}
++
++static long xnpipe_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	struct xnpipe_state *state = file->private_data;
++	int ret = 0;
++	ssize_t n;
++	spl_t s;
++
++	switch (cmd) {
++	case XNPIPEIOC_GET_NRDEV:
++
++		if (put_user(XNPIPE_NDEVS, (int *)arg))
++			return -EFAULT;
++
++		break;
++
++	case XNPIPEIOC_OFLUSH:
++
++		xnlock_get_irqsave(&nklock, s);
++
++		if ((state->status & XNPIPE_KERN_CONN) == 0) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EPIPE;
++		}
++
++		n = xnpipe_flushq(state, outq, free_obuf, s);
++		state->ionrd -= n;
++		goto kick_wsync;
++
++	case XNPIPEIOC_IFLUSH:
++
++		xnlock_get_irqsave(&nklock, s);
++
++		if ((state->status & XNPIPE_KERN_CONN) == 0) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EPIPE;
++		}
++
++		n = xnpipe_flushq(state, inq, free_ibuf, s);
++
++	kick_wsync:
++
++		if (n > 0 && (state->status & XNPIPE_USER_WSYNC)) {
++			state->status |= XNPIPE_USER_WSYNC_READY;
++			xnpipe_schedule_request();
++		}
++
++		xnlock_put_irqrestore(&nklock, s);
++		ret = n;
++		break;
++
++	case XNPIPEIOC_SETSIG:
++
++		if (arg < 1 || arg >= _NSIG)
++			return -EINVAL;
++
++		xnpipe_asyncsig = arg;
++		break;
++
++	case FIONREAD:
++
++		n = (state->status & XNPIPE_KERN_CONN) ? state->ionrd : 0;
++
++		if (put_user(n, (int *)arg))
++			return -EFAULT;
++
++		break;
++
++	case TCGETS:
++		/* For isatty() probing. */
++		return -ENOTTY;
++
++	default:
++
++		return -EINVAL;
++	}
++
++	return ret;
++}
++
++static int xnpipe_fasync(int fd, struct file *file, int on)
++{
++	struct xnpipe_state *state = file->private_data;
++	int ret, queued;
++	spl_t s;
++
++	queued = (state->asyncq != NULL);
++	ret = fasync_helper(fd, file, on, &state->asyncq);
++
++	if (state->asyncq) {
++		if (!queued) {
++			xnlock_get_irqsave(&nklock, s);
++			list_add_tail(&state->alink, &xnpipe_asyncq);
++			xnlock_put_irqrestore(&nklock, s);
++		}
++	} else if (queued) {
++		xnlock_get_irqsave(&nklock, s);
++		list_del(&state->alink);
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++	return ret;
++}
++
++static unsigned xnpipe_poll(struct file *file, poll_table *pt)
++{
++	struct xnpipe_state *state = file->private_data;
++	unsigned r_mask = 0, w_mask = 0;
++	spl_t s;
++
++	poll_wait(file, &state->readq, pt);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (state->status & XNPIPE_KERN_CONN)
++		w_mask |= (POLLOUT | POLLWRNORM);
++	else
++		r_mask |= POLLHUP;
++
++	if (!list_empty(&state->outq))
++		r_mask |= (POLLIN | POLLRDNORM);
++	else
++		/*
++		 * Procs which have issued a timed out poll req will
++		 * remain linked to the sleepers queue, and will be
++		 * silently unlinked the next time the Xenomai side
++		 * kicks xnpipe_wakeup_proc().
++		 */
++		xnpipe_enqueue_wait(state, XNPIPE_USER_WREAD);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return r_mask | w_mask;
++}
++
++static struct file_operations xnpipe_fops = {
++	.read = xnpipe_read,
++	.write = xnpipe_write,
++	.poll = xnpipe_poll,
++	.unlocked_ioctl = xnpipe_ioctl,
++	.open = xnpipe_open,
++	.release = xnpipe_release,
++	.fasync = xnpipe_fasync
++};
++
++int xnpipe_mount(void)
++{
++	struct xnpipe_state *state;
++	struct device *cldev;
++	int i;
++
++	for (state = &xnpipe_states[0];
++	     state < &xnpipe_states[XNPIPE_NDEVS]; state++) {
++		state->status = 0;
++		state->asyncq = NULL;
++		INIT_LIST_HEAD(&state->inq);
++		state->nrinq = 0;
++		INIT_LIST_HEAD(&state->outq);
++		state->nroutq = 0;
++	}
++
++	xnpipe_class = class_create(THIS_MODULE, "rtpipe");
++	if (IS_ERR(xnpipe_class)) {
++		printk(XENO_ERR "error creating rtpipe class, err=%ld\n",
++		       PTR_ERR(xnpipe_class));
++		return -EBUSY;
++	}
++
++	for (i = 0; i < XNPIPE_NDEVS; i++) {
++		cldev = device_create(xnpipe_class, NULL,
++				      MKDEV(XNPIPE_DEV_MAJOR, i),
++				      NULL, "rtp%d", i);
++		if (IS_ERR(cldev)) {
++			printk(XENO_ERR
++			       "can't add device class, major=%d, minor=%d, err=%ld\n",
++			       XNPIPE_DEV_MAJOR, i, PTR_ERR(cldev));
++			class_destroy(xnpipe_class);
++			return -EBUSY;
++		}
++	}
++
++	if (register_chrdev(XNPIPE_DEV_MAJOR, "rtpipe", &xnpipe_fops)) {
++		printk(XENO_ERR
++		       "unable to reserve major #%d for message pipe support\n",
++		       XNPIPE_DEV_MAJOR);
++		return -EBUSY;
++	}
++
++	xnpipe_wakeup_apc =
++	    xnapc_alloc("pipe_wakeup", &xnpipe_wakeup_proc, NULL);
++
++	return 0;
++}
++
++void xnpipe_umount(void)
++{
++	int i;
++
++	xnapc_free(xnpipe_wakeup_apc);
++	unregister_chrdev(XNPIPE_DEV_MAJOR, "rtpipe");
++
++	for (i = 0; i < XNPIPE_NDEVS; i++)
++		device_destroy(xnpipe_class, MKDEV(XNPIPE_DEV_MAJOR, i));
++
++	class_destroy(xnpipe_class);
++}
+--- linux/kernel/xenomai/arith.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/arith.c	2021-04-07 16:01:25.829636164 +0800
+@@ -0,0 +1,65 @@
++/*
++ * Copyright &copy; 2005 Gilles Chanteperdrix.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/module.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_arith In-kernel arithmetics
++ *
++ * A collection of helpers performing arithmetics not implicitly
++ * available from kernel context via GCC helpers. Many of these
++ * routines enable 64bit arithmetics on 32bit systems. Xenomai
++ * architecture ports normally implement the performance critical ones
++ * in hand-crafted assembly code (see
++ * kernel/cobalt/arch/\<arch\>/include/asm/xenomai/uapi/arith.h).
++ * @{
++ */
++
++/**
++ * Architecture-independent div64 operation with remainder.
++ *
++ * @param a dividend
++ *
++ * @param b divisor
++ *
++ * @param rem if non-NULL, a pointer to a 64bit variable for
++ * collecting the remainder from the division.
++ */
++unsigned long long xnarch_generic_full_divmod64(unsigned long long a,
++						unsigned long long b,
++						unsigned long long *rem)
++{
++	unsigned long long q = 0, r = a;
++	int i;
++
++	for (i = fls(a >> 32) - fls(b >> 32), b <<= i; i >= 0; i--, b >>= 1) {
++		q <<= 1;
++		if (b <= r) {
++			r -= b;
++			q++;
++		}
++	}
++
++	if (rem)
++		*rem = r;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xnarch_generic_full_divmod64);
++
++/** @} */
+--- linux/kernel/xenomai/sched-weak.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-weak.c	2021-04-07 16:01:25.824636171 +0800
+@@ -0,0 +1,224 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/sched.h>
++#include <cobalt/uapi/sched.h>
++
++static void xnsched_weak_init(struct xnsched *sched)
++{
++	xnsched_initq(&sched->weak.runnable);
++}
++
++static void xnsched_weak_requeue(struct xnthread *thread)
++{
++	xnsched_addq(&thread->sched->weak.runnable, thread);
++}
++
++static void xnsched_weak_enqueue(struct xnthread *thread)
++{
++	xnsched_addq_tail(&thread->sched->weak.runnable, thread);
++}
++
++static void xnsched_weak_dequeue(struct xnthread *thread)
++{
++	xnsched_delq(&thread->sched->weak.runnable, thread);
++}
++
++static struct xnthread *xnsched_weak_pick(struct xnsched *sched)
++{
++	return xnsched_getq(&sched->weak.runnable);
++}
++
++static bool xnsched_weak_setparam(struct xnthread *thread,
++				  const union xnsched_policy_param *p)
++{
++	if (!xnthread_test_state(thread, XNBOOST))
++		xnthread_set_state(thread, XNWEAK);
++
++	return xnsched_set_effective_priority(thread, p->weak.prio);
++}
++
++static void xnsched_weak_getparam(struct xnthread *thread,
++				  union xnsched_policy_param *p)
++{
++	p->weak.prio = thread->cprio;
++}
++
++static void xnsched_weak_trackprio(struct xnthread *thread,
++				   const union xnsched_policy_param *p)
++{
++	if (p)
++		thread->cprio = p->weak.prio;
++	else
++		thread->cprio = thread->bprio;
++}
++
++static void xnsched_weak_protectprio(struct xnthread *thread, int prio)
++{
++  	if (prio > XNSCHED_WEAK_MAX_PRIO)
++		prio = XNSCHED_WEAK_MAX_PRIO;
++
++	thread->cprio = prio;
++}
++
++static int xnsched_weak_chkparam(struct xnthread *thread,
++				 const union xnsched_policy_param *p)
++{
++	if (p->weak.prio < XNSCHED_WEAK_MIN_PRIO ||
++	    p->weak.prio > XNSCHED_WEAK_MAX_PRIO)
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++struct xnvfile_directory sched_weak_vfroot;
++
++struct vfile_sched_weak_priv {
++	struct xnthread *curr;
++};
++
++struct vfile_sched_weak_data {
++	int cpu;
++	pid_t pid;
++	char name[XNOBJECT_NAME_LEN];
++	int cprio;
++};
++
++static struct xnvfile_snapshot_ops vfile_sched_weak_ops;
++
++static struct xnvfile_snapshot vfile_sched_weak = {
++	.privsz = sizeof(struct vfile_sched_weak_priv),
++	.datasz = sizeof(struct vfile_sched_weak_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_sched_weak_ops,
++};
++
++static int vfile_sched_weak_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_sched_weak_priv *priv = xnvfile_iterator_priv(it);
++	int nrthreads = xnsched_class_weak.nthreads;
++
++	if (nrthreads == 0)
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++
++	return nrthreads;
++}
++
++static int vfile_sched_weak_next(struct xnvfile_snapshot_iterator *it,
++				 void *data)
++{
++	struct vfile_sched_weak_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_sched_weak_data *p = data;
++	struct xnthread *thread;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	if (thread->base_class != &xnsched_class_weak)
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->cprio = thread->cprio;
++
++	return 1;
++}
++
++static int vfile_sched_weak_show(struct xnvfile_snapshot_iterator *it,
++				 void *data)
++{
++	struct vfile_sched_weak_data *p = data;
++	char pribuf[16];
++
++	if (p == NULL)
++		xnvfile_printf(it, "%-3s  %-6s %-4s %s\n",
++			       "CPU", "PID", "PRI", "NAME");
++	else {
++		ksformat(pribuf, sizeof(pribuf), "%3d", p->cprio);
++		xnvfile_printf(it, "%3u  %-6d %-4s %s\n",
++			       p->cpu,
++			       p->pid,
++			       pribuf,
++			       p->name);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_sched_weak_ops = {
++	.rewind = vfile_sched_weak_rewind,
++	.next = vfile_sched_weak_next,
++	.show = vfile_sched_weak_show,
++};
++
++static int xnsched_weak_init_vfile(struct xnsched_class *schedclass,
++				   struct xnvfile_directory *vfroot)
++{
++	int ret;
++
++	ret = xnvfile_init_dir(schedclass->name, &sched_weak_vfroot, vfroot);
++	if (ret)
++		return ret;
++
++	return xnvfile_init_snapshot("threads", &vfile_sched_weak,
++				     &sched_weak_vfroot);
++}
++
++static void xnsched_weak_cleanup_vfile(struct xnsched_class *schedclass)
++{
++	xnvfile_destroy_snapshot(&vfile_sched_weak);
++	xnvfile_destroy_dir(&sched_weak_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct xnsched_class xnsched_class_weak = {
++	.sched_init		=	xnsched_weak_init,
++	.sched_enqueue		=	xnsched_weak_enqueue,
++	.sched_dequeue		=	xnsched_weak_dequeue,
++	.sched_requeue		=	xnsched_weak_requeue,
++	.sched_pick		=	xnsched_weak_pick,
++	.sched_tick		=	NULL,
++	.sched_rotate		=	NULL,
++	.sched_forget		=	NULL,
++	.sched_kick		=	NULL,
++	.sched_chkparam		=	xnsched_weak_chkparam,
++	.sched_setparam		=	xnsched_weak_setparam,
++	.sched_trackprio	=	xnsched_weak_trackprio,
++	.sched_protectprio	=	xnsched_weak_protectprio,
++	.sched_getparam		=	xnsched_weak_getparam,
++#ifdef CONFIG_XENO_OPT_VFILE
++	.sched_init_vfile	=	xnsched_weak_init_vfile,
++	.sched_cleanup_vfile	=	xnsched_weak_cleanup_vfile,
++#endif
++	.weight			=	XNSCHED_CLASS_WEIGHT(1),
++	.policy			=	SCHED_WEAK,
++	.name			=	"weak"
++};
++EXPORT_SYMBOL_GPL(xnsched_class_weak);
+--- linux/kernel/xenomai/synch.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/synch.c	2021-04-07 16:01:25.819636178 +0800
+@@ -0,0 +1,1185 @@
++/*
++ * Copyright (C) 2001-2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <stdarg.h>
++#include <linux/signal.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/uapi/signal.h>
++#include <trace/events/cobalt-core.h>
++
++#define PP_CEILING_MASK 0xff
++
++static inline int get_ceiling_value(struct xnsynch *synch)
++{
++	/*
++	 * The ceiling priority value is stored in user-writable
++	 * memory, make sure to constrain it within valid bounds for
++	 * xnsched_class_rt before using it.
++	 */
++	return *synch->ceiling_ref & PP_CEILING_MASK ?: 1;
++}
++
++struct xnsynch *lookup_lazy_pp(xnhandle_t handle);
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_synch Thread synchronization services
++ * @{
++ */
++
++/**
++ * @brief Initialize a synchronization object.
++ *
++ * Initializes a synchronization object. Xenomai threads can wait on
++ * and signal such objects for serializing access to resources.
++ * This object has built-in support for priority inheritance.
++ *
++ * @param synch The address of a synchronization object descriptor
++ * Cobalt will use to store the object-specific data.  This descriptor
++ * must always be valid while the object is active therefore it must
++ * be allocated in permanent memory.
++ *
++ * @param flags A set of creation flags affecting the operation. The
++ * valid flags are:
++ *
++ * - XNSYNCH_PRIO causes the threads waiting for the resource to pend
++ * in priority order. Otherwise, FIFO ordering is used (XNSYNCH_FIFO).
++ *
++ * - XNSYNCH_OWNER indicates that the synchronization object shall
++ * track the resource ownership, allowing a single owner at most at
++ * any point in time. Note that setting this flag implies the use of
++ * xnsynch_acquire() and xnsynch_release() instead of
++ * xnsynch_sleep_on() and xnsynch_wakeup_*().
++ *
++ * - XNSYNCH_PI enables priority inheritance when a priority inversion
++ * is detected among threads using this object.  XNSYNCH_PI implies
++ * XNSYNCH_OWNER and XNSYNCH_PRIO.
++ *
++ * - XNSYNCH_PP enables priority protect to prevent priority inversion.
++ * XNSYNCH_PP implies XNSYNCH_OWNER and XNSYNCH_PRIO.
++ *
++ * - XNSYNCH_DREORD (Disable REORDering) tells Cobalt not to reorder
++ * the wait list upon priority change of a waiter. Reordering is the
++ * default. Only applies when XNSYNCH_PRIO is present.
++ *
++ * @param fastlock Address of the fast lock word to be associated with
++ * a synchronization object with ownership tracking. Therefore, a
++ * valid fast-lock address is required if XNSYNCH_OWNER is set in @a
++ * flags.
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnsynch_init(struct xnsynch *synch, int flags, atomic_t *fastlock)
++{
++	if (flags & (XNSYNCH_PI|XNSYNCH_PP))
++		flags |= XNSYNCH_PRIO | XNSYNCH_OWNER;	/* Obviously... */
++
++	synch->status = flags & ~XNSYNCH_CLAIMED;
++	synch->owner = NULL;
++	synch->cleanup = NULL;	/* for PI/PP only. */
++	synch->wprio = -1;
++	synch->ceiling_ref = NULL;
++	INIT_LIST_HEAD(&synch->pendq);
++
++	if (flags & XNSYNCH_OWNER) {
++		BUG_ON(fastlock == NULL);
++		synch->fastlock = fastlock;
++		atomic_set(fastlock, XN_NO_HANDLE);
++	} else
++		synch->fastlock = NULL;
++}
++EXPORT_SYMBOL_GPL(xnsynch_init);
++
++/**
++ * @brief Initialize a synchronization object enforcing PP.
++ *
++ * This call is a variant of xnsynch_init() for initializing
++ * synchronization objects enabling the priority protect protocol.
++ *
++ * @param synch The address of a synchronization object descriptor
++ * Cobalt will use to store the object-specific data.  See
++ * xnsynch_init().
++ *
++ * @param flags A set of creation flags affecting the operation. See
++ * xnsynch_init(). XNSYNCH_PI is mutually exclusive with XNSYNCH_PP,
++ * and won't be considered.
++ *
++ * @param fastlock Address of the fast lock word to be associated with
++ * a synchronization object with ownership tracking. See xnsynch_init().
++ *
++ * @param ceiling_ref The address of the variable holding the current
++ * priority ceiling value for this object.
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnsynch_init_protect(struct xnsynch *synch, int flags,
++			  atomic_t *fastlock, u32 *ceiling_ref)
++{
++	xnsynch_init(synch, (flags & ~XNSYNCH_PI) | XNSYNCH_PP, fastlock);
++	synch->ceiling_ref = ceiling_ref;
++}
++
++/**
++ * @fn void xnsynch_destroy(struct xnsynch *synch)
++ * @brief Destroy a synchronization object.
++ *
++ * Destroys the synchronization object @a synch, unblocking all
++ * waiters with the XNRMID status.
++ *
++ * @return XNSYNCH_RESCHED is returned if at least one thread is
++ * unblocked, which means the caller should invoke xnsched_run() for
++ * applying the new scheduling state. Otherwise, XNSYNCH_DONE is
++ * returned.
++
++ * @sideeffect Same as xnsynch_flush().
++ *
++ * @coretags{task-unrestricted}
++ */
++int xnsynch_destroy(struct xnsynch *synch)
++{
++	int ret;
++	
++	ret = xnsynch_flush(synch, XNRMID);
++	XENO_BUG_ON(COBALT, synch->status & XNSYNCH_CLAIMED);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnsynch_destroy);
++
++/**
++ * @fn int xnsynch_sleep_on(struct xnsynch *synch, xnticks_t timeout, xntmode_t timeout_mode);
++ * @brief Sleep on an ownerless synchronization object.
++ *
++ * Makes the calling thread sleep on the specified synchronization
++ * object, waiting for it to be signaled.
++ *
++ * This service should be called by upper interfaces wanting the
++ * current thread to pend on the given resource. It must not be used
++ * with synchronization objects that are supposed to track ownership
++ * (XNSYNCH_OWNER).
++ *
++ * @param synch The descriptor address of the synchronization object
++ * to sleep on.
++ *
++ * @param timeout The timeout which may be used to limit the time the
++ * thread pends on the resource. This value is a wait time given as a
++ * count of nanoseconds. It can either be relative, absolute
++ * monotonic, or absolute adjustable depending on @a
++ * timeout_mode. Passing XN_INFINITE @b and setting @a mode to
++ * XN_RELATIVE specifies an unbounded wait. All other values are used
++ * to initialize a watchdog timer.
++ *
++ * @param timeout_mode The mode of the @a timeout parameter. It can
++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also
++ * xntimer_start()).
++ *
++ * @return A bitmask which may include zero or one information bit
++ * among XNRMID, XNTIMEO and XNBREAK, which should be tested by the
++ * caller, for detecting respectively: object deletion, timeout or
++ * signal/unblock conditions which might have happened while waiting.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int xnsynch_sleep_on(struct xnsynch *synch, xnticks_t timeout,
++		     xntmode_t timeout_mode)
++{
++	struct xnthread *thread;
++	spl_t s;
++
++	primary_mode_only();
++
++	XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER);
++
++	thread = xnthread_current();
++
++	if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP) &&
++	    thread->res_count > 0 &&
++	    xnthread_test_state(thread, XNWARN))
++		xnthread_signal(thread, SIGDEBUG, SIGDEBUG_MUTEX_SLEEP);
++	
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_synch_sleepon(synch);
++
++	if ((synch->status & XNSYNCH_PRIO) == 0) /* i.e. FIFO */
++		list_add_tail(&thread->plink, &synch->pendq);
++	else /* i.e. priority-sorted */
++		list_add_priff(thread, &synch->pendq, wprio, plink);
++
++	xnthread_suspend(thread, XNPEND, timeout, timeout_mode, synch);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return xnthread_test_info(thread, XNRMID|XNTIMEO|XNBREAK);
++}
++EXPORT_SYMBOL_GPL(xnsynch_sleep_on);
++
++/**
++ * @fn struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch);
++ * @brief Unblock the heading thread from wait.
++ *
++ * This service wakes up the thread which is currently leading the
++ * synchronization object's pending list. The sleeping thread is
++ * unblocked from its pending state, but no reschedule is performed.
++ *
++ * This service should be called by upper interfaces wanting to signal
++ * the given resource so that a single waiter is resumed. It must not
++ * be used with synchronization objects that are supposed to track
++ * ownership (XNSYNCH_OWNER not set).
++ *
++ * @param synch The descriptor address of the synchronization object
++ * whose ownership is changed.
++ *
++ * @return The descriptor address of the unblocked thread.
++ *
++ * @coretags{unrestricted}
++ */
++struct xnthread *xnsynch_wakeup_one_sleeper(struct xnsynch *synch)
++{
++	struct xnthread *thread;
++	spl_t s;
++
++	XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(&synch->pendq)) {
++		thread = NULL;
++		goto out;
++	}
++
++	trace_cobalt_synch_wakeup(synch);
++	thread = list_first_entry(&synch->pendq, struct xnthread, plink);
++	list_del(&thread->plink);
++	thread->wchan = NULL;
++	xnthread_resume(thread, XNPEND);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return thread;
++}
++EXPORT_SYMBOL_GPL(xnsynch_wakeup_one_sleeper);
++
++int xnsynch_wakeup_many_sleepers(struct xnsynch *synch, int nr)
++{
++	struct xnthread *thread, *tmp;
++	int nwakeups = 0;
++	spl_t s;
++
++	XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (list_empty(&synch->pendq))
++		goto out;
++
++	trace_cobalt_synch_wakeup_many(synch);
++
++	list_for_each_entry_safe(thread, tmp, &synch->pendq, plink) {
++		if (nwakeups++ >= nr)
++			break;
++		list_del(&thread->plink);
++		thread->wchan = NULL;
++		xnthread_resume(thread, XNPEND);
++	}
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return nwakeups;
++}
++EXPORT_SYMBOL_GPL(xnsynch_wakeup_many_sleepers);
++
++/**
++ * @fn void xnsynch_wakeup_this_sleeper(struct xnsynch *synch, struct xnthread *sleeper);
++ * @brief Unblock a particular thread from wait.
++ *
++ * This service wakes up a specific thread which is currently pending on
++ * the given synchronization object. The sleeping thread is unblocked
++ * from its pending state, but no reschedule is performed.
++ *
++ * This service should be called by upper interfaces wanting to signal
++ * the given resource so that a specific waiter is resumed. It must not
++ * be used with synchronization objects that are supposed to track
++ * ownership (XNSYNCH_OWNER not set).
++ *
++ * @param synch The descriptor address of the synchronization object
++ * whose ownership is changed.
++ *
++ * @param sleeper The thread to unblock which MUST be currently linked
++ * to the synchronization object's pending queue (i.e. synch->pendq).
++ *
++ * @coretags{unrestricted}
++ */
++void xnsynch_wakeup_this_sleeper(struct xnsynch *synch, struct xnthread *sleeper)
++{
++	spl_t s;
++
++	XENO_BUG_ON(COBALT, synch->status & XNSYNCH_OWNER);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_synch_wakeup(synch);
++	list_del(&sleeper->plink);
++	sleeper->wchan = NULL;
++	xnthread_resume(sleeper, XNPEND);
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnsynch_wakeup_this_sleeper);
++
++static inline void raise_boost_flag(struct xnthread *owner)
++{
++	/* Backup the base priority at first boost only. */
++	if (!xnthread_test_state(owner, XNBOOST)) {
++		owner->bprio = owner->cprio;
++		xnthread_set_state(owner, XNBOOST);
++	}
++}
++
++static void inherit_thread_priority(struct xnthread *owner,
++				    struct xnthread *target)
++{
++	if (xnthread_test_state(owner, XNZOMBIE))
++		return;
++	
++	/* Apply the scheduling policy of "target" to "thread" */
++	xnsched_track_policy(owner, target);
++
++	/*
++	 * Owner may be sleeping, propagate priority update through
++	 * the PI chain if needed.
++	 */
++	if (owner->wchan)
++		xnsynch_requeue_sleeper(owner);
++}
++
++static void __ceil_owner_priority(struct xnthread *owner, int prio)
++{
++	if (xnthread_test_state(owner, XNZOMBIE))
++		return;
++	/*
++	 * Raise owner priority to the ceiling value, this implicitly
++	 * selects SCHED_FIFO for the owner.
++	 */
++	xnsched_protect_priority(owner, prio);
++
++	if (owner->wchan)
++		xnsynch_requeue_sleeper(owner);
++}
++
++static void adjust_boost(struct xnthread *owner, struct xnthread *target)
++{
++	struct xnsynch *synch;
++
++	/*
++	 * CAUTION: we may have PI and PP-enabled objects among the
++	 * boosters, considering the leader of synch->pendq is
++	 * therefore NOT enough for determining the next boost
++	 * priority, since PP is tracked on acquisition, not on
++	 * contention. Check the head of the booster list instead.
++	 */
++	synch = list_first_entry(&owner->boosters, struct xnsynch, next);
++	if (synch->wprio == owner->wprio)
++		return;
++	
++	if (synch->status & XNSYNCH_PP)
++		__ceil_owner_priority(owner, get_ceiling_value(synch));
++	else {
++		XENO_BUG_ON(COBALT, list_empty(&synch->pendq));
++		if (target == NULL)
++			target = list_first_entry(&synch->pendq,
++						  struct xnthread, plink);
++		inherit_thread_priority(owner, target);
++	}
++}
++
++static void ceil_owner_priority(struct xnsynch *synch)
++{
++	struct xnthread *owner = synch->owner;
++	int wprio;
++
++	/* PP ceiling values are implicitly based on the RT class. */
++	wprio = xnsched_calc_wprio(&xnsched_class_rt,
++				   get_ceiling_value(synch));
++	synch->wprio = wprio;
++	list_add_priff(synch, &owner->boosters, wprio, next);
++	raise_boost_flag(owner);
++	synch->status |= XNSYNCH_CEILING;
++
++	/*
++	 * If the ceiling value is lower than the current effective
++	 * priority, we must not adjust the latter.  BEWARE: not only
++	 * this restriction is required to keep the PP logic right,
++	 * but this is also a basic assumption made by all
++	 * xnthread_commit_ceiling() callers which won't check for any
++	 * rescheduling opportunity upon return.
++	 *
++	 * However we do want the object to be linked to the booster
++	 * list, and XNBOOST must appear in the current thread status.
++	 *
++	 * This way, setparam() won't be allowed to decrease the
++	 * current weighted priority below the ceiling value, until we
++	 * eventually release this object.
++	 */
++	if (wprio > owner->wprio)
++		adjust_boost(owner, NULL);
++}
++
++static inline
++void track_owner(struct xnsynch *synch, struct xnthread *owner)
++{
++	synch->owner = owner;
++}
++
++static inline  /* nklock held, irqs off */
++void set_current_owner_locked(struct xnsynch *synch, struct xnthread *owner)
++{
++	/*
++	 * Update the owner information, and apply priority protection
++	 * for PP objects. We may only get there if owner is current,
++	 * or blocked.
++	 */
++	track_owner(synch, owner);
++	if (synch->status & XNSYNCH_PP)
++		ceil_owner_priority(synch);
++}
++
++static inline
++void set_current_owner(struct xnsynch *synch, struct xnthread *owner)
++{
++	spl_t s;
++
++	track_owner(synch, owner);
++	if (synch->status & XNSYNCH_PP) {
++		xnlock_get_irqsave(&nklock, s);
++		ceil_owner_priority(synch);
++		xnlock_put_irqrestore(&nklock, s);
++	}
++}
++
++static inline
++xnhandle_t get_owner_handle(xnhandle_t ownerh, struct xnsynch *synch)
++{
++	/*
++	 * On acquisition from kernel space, the fast lock handle
++	 * should bear the FLCEIL bit for PP objects, so that userland
++	 * takes the slow path on release, jumping to the kernel for
++	 * dropping the ceiling priority boost.
++	 */
++	if (synch->status & XNSYNCH_PP)
++		ownerh = xnsynch_fast_ceiling(ownerh);
++
++	return ownerh;
++}
++
++static void commit_ceiling(struct xnsynch *synch, struct xnthread *curr)
++{
++	xnhandle_t oldh, h;
++	atomic_t *lockp;
++
++	track_owner(synch, curr);
++	ceil_owner_priority(synch);
++	/*
++	 * Raise FLCEIL, which indicates a kernel entry will be
++	 * required for releasing this resource.
++	 */
++	lockp = xnsynch_fastlock(synch);
++	do {
++		h = atomic_read(lockp);
++		oldh = atomic_cmpxchg(lockp, h, xnsynch_fast_ceiling(h));
++	} while (oldh != h);
++}
++
++void xnsynch_commit_ceiling(struct xnthread *curr)  /* nklock held, irqs off */
++{
++	struct xnsynch *synch;
++	atomic_t *lockp;
++
++	/* curr->u_window has to be valid, curr bears XNUSER. */
++	synch = lookup_lazy_pp(curr->u_window->pp_pending);
++	if (synch == NULL) {
++		/*
++		 * If pp_pending is a bad handle, don't panic but
++		 * rather ignore: we don't want a misbehaving userland
++		 * to crash the kernel.
++		 */
++		XENO_WARN_ON_ONCE(USER, 1);
++		goto out;
++	}
++
++	/*
++	 * For PP locks, userland does, in that order:
++	 *
++	 * -- LOCK
++	 * 1. curr->u_window->pp_pending = lock_handle
++	 *    barrier();
++	 * 2. atomic_cmpxchg(lockp, XN_NO_HANDLE, curr->handle);
++	 *
++	 * -- UNLOCK
++	 * 1. atomic_cmpxchg(lockp, curr->handle, XN_NO_HANDLE); [unclaimed]
++	 *    barrier();
++	 * 2. curr->u_window->pp_pending = XN_NO_HANDLE
++	 *
++	 * Make sure we have not been caught in a rescheduling in
++	 * between those steps. If we did, then we won't be holding
++	 * the lock as we schedule away, therefore no priority update
++	 * must take place.
++	 */
++	lockp = xnsynch_fastlock(synch);
++	if (xnsynch_fast_owner_check(lockp, curr->handle))
++		return;
++
++	/*
++	 * In rare cases, we could be called multiple times for
++	 * committing a lazy ceiling for the same object, e.g. if
++	 * userland is preempted in the middle of a recursive locking
++	 * sequence.
++	 *
++	 * This stems from the fact that userland has to update
++	 * ->pp_pending prior to trying to grab the lock atomically,
++	 * at which point it can figure out whether a recursive
++	 * locking happened. We get out of this trap by testing the
++	 * XNSYNCH_CEILING flag.
++	 */
++	if ((synch->status & XNSYNCH_CEILING) == 0)
++		commit_ceiling(synch, curr);
++out:
++	curr->u_window->pp_pending = XN_NO_HANDLE;
++}
++
++/**
++ * @fn int xnsynch_try_acquire(struct xnsynch *synch);
++ * @brief Try acquiring the ownership of a synchronization object.
++ *
++ * This service should be called by upper interfaces wanting the
++ * current thread to acquire the ownership of the given resource. If
++ * the resource is already assigned to another thread, the call
++ * returns with an error code.
++ *
++ * This service must be used only with synchronization objects that
++ * track ownership (XNSYNCH_OWNER set.
++ *
++ * @param synch The descriptor address of the synchronization object
++ * to acquire.
++ *
++ * @return Zero is returned if @a synch has been successfully
++ * acquired. Otherwise:
++ *
++ * - -EDEADLK is returned if @a synch is currently held by the calling
++ * thread.
++ *
++ * - -EBUSY is returned if @a synch is currently held by another
++ * thread.
++ *
++ * @coretags{primary-only}
++ */
++int xnsynch_try_acquire(struct xnsynch *synch)
++{
++	struct xnthread *curr;
++	atomic_t *lockp;
++	xnhandle_t h;
++
++	primary_mode_only();
++
++	XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0);
++
++	curr = xnthread_current();
++	lockp = xnsynch_fastlock(synch);
++	trace_cobalt_synch_try_acquire(synch);
++
++	h = atomic_cmpxchg(lockp, XN_NO_HANDLE,
++			   get_owner_handle(curr->handle, synch));
++	if (h != XN_NO_HANDLE)
++		return xnhandle_get_id(h) == curr->handle ?
++			-EDEADLK : -EBUSY;
++
++	set_current_owner(synch, curr);
++	xnthread_get_resource(curr);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnsynch_try_acquire);
++
++/**
++ * @fn int xnsynch_acquire(struct xnsynch *synch, xnticks_t timeout, xntmode_t timeout_mode);
++ * @brief Acquire the ownership of a synchronization object.
++ *
++ * This service should be called by upper interfaces wanting the
++ * current thread to acquire the ownership of the given resource. If
++ * the resource is already assigned to another thread, the caller is
++ * suspended.
++ *
++ * This service must be used only with synchronization objects that
++ * track ownership (XNSYNCH_OWNER set.
++ *
++ * @param synch The descriptor address of the synchronization object
++ * to acquire.
++ *
++ * @param timeout The timeout which may be used to limit the time the
++ * thread pends on the resource. This value is a wait time given as a
++ * count of nanoseconds. It can either be relative, absolute
++ * monotonic, or absolute adjustable depending on @a
++ * timeout_mode. Passing XN_INFINITE @b and setting @a mode to
++ * XN_RELATIVE specifies an unbounded wait. All other values are used
++ * to initialize a watchdog timer.
++ *
++ * @param timeout_mode The mode of the @a timeout parameter. It can
++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also
++ * xntimer_start()).
++ *
++ * @return A bitmask which may include zero or one information bit
++ * among XNRMID, XNTIMEO and XNBREAK, which should be tested by the
++ * caller, for detecting respectively: object deletion, timeout or
++ * signal/unblock conditions which might have happened while waiting.
++ *
++ * @coretags{primary-only, might-switch}
++ *
++ * @note Unlike xnsynch_try_acquire(), this call does NOT check for
++ * invalid recursive locking request, which means that such request
++ * will always cause a deadlock for the caller.
++ */
++int xnsynch_acquire(struct xnsynch *synch, xnticks_t timeout,
++		    xntmode_t timeout_mode)
++{
++	struct xnthread *curr, *owner;
++	xnhandle_t currh, h, oldh;
++	atomic_t *lockp;
++	spl_t s;
++
++	primary_mode_only();
++
++	XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0);
++
++	curr = xnthread_current();
++	currh = curr->handle;
++	lockp = xnsynch_fastlock(synch);
++	trace_cobalt_synch_acquire(synch);
++redo:
++	/* Basic form of xnsynch_try_acquire(). */
++	h = atomic_cmpxchg(lockp, XN_NO_HANDLE,
++			   get_owner_handle(currh, synch));
++	if (likely(h == XN_NO_HANDLE)) {
++		set_current_owner(synch, curr);
++		xnthread_get_resource(curr);
++		return 0;
++	}
++
++	xnlock_get_irqsave(&nklock, s);
++
++	/*
++	 * Set claimed bit.  In case it appears to be set already,
++	 * re-read its state under nklock so that we don't miss any
++	 * change between the lock-less read and here. But also try to
++	 * avoid cmpxchg where possible. Only if it appears not to be
++	 * set, start with cmpxchg directly.
++	 */
++	if (xnsynch_fast_is_claimed(h)) {
++		oldh = atomic_read(lockp);
++		goto test_no_owner;
++	}
++
++	do {
++		oldh = atomic_cmpxchg(lockp, h, xnsynch_fast_claimed(h));
++		if (likely(oldh == h))
++			break;
++	test_no_owner:
++		if (oldh == XN_NO_HANDLE) {
++			/* Mutex released from another cpu. */
++			xnlock_put_irqrestore(&nklock, s);
++			goto redo;
++		}
++		h = oldh;
++	} while (!xnsynch_fast_is_claimed(h));
++
++	owner = xnthread_lookup(h);
++	if (owner == NULL) {
++		/*
++		 * The handle is broken, therefore pretend that the
++		 * synch object was deleted to signal an error.
++		 */
++		xnthread_set_info(curr, XNRMID);
++		goto out;
++	}
++
++	/*
++	 * This is the contended path. We just detected an earlier
++	 * syscall-less fast locking from userland, fix up the
++	 * in-kernel state information accordingly.
++	 *
++	 * The consistency of the state information is guaranteed,
++	 * because we just raised the claim bit atomically for this
++	 * contended lock, therefore userland will have to jump to the
++	 * kernel when releasing it, instead of doing a fast
++	 * unlock. Since we currently own the superlock, consistency
++	 * wrt transfer_ownership() is guaranteed through
++	 * serialization.
++	 *
++	 * CAUTION: in this particular case, the only assumptions we
++	 * can safely make is that *owner is valid but not current on
++	 * this CPU.
++	 */
++	track_owner(synch, owner);
++	xnsynch_detect_relaxed_owner(synch, curr);
++
++	if ((synch->status & XNSYNCH_PRIO) == 0) { /* i.e. FIFO */
++		list_add_tail(&curr->plink, &synch->pendq);
++		goto block;
++	}
++
++	if (curr->wprio > owner->wprio) {
++		if (xnthread_test_info(owner, XNWAKEN) && owner->wwake == synch) {
++			/* Ownership is still pending, steal the resource. */
++			set_current_owner_locked(synch, curr);
++			xnthread_clear_info(curr, XNRMID | XNTIMEO | XNBREAK);
++			xnthread_set_info(owner, XNROBBED);
++			goto grab;
++		}
++
++		list_add_priff(curr, &synch->pendq, wprio, plink);
++
++		if (synch->status & XNSYNCH_PI) {
++			raise_boost_flag(owner);
++
++			if (synch->status & XNSYNCH_CLAIMED)
++				list_del(&synch->next); /* owner->boosters */
++			else
++				synch->status |= XNSYNCH_CLAIMED;
++
++			synch->wprio = curr->wprio;
++			list_add_priff(synch, &owner->boosters, wprio, next);
++			/*
++			 * curr->wprio > owner->wprio implies that
++			 * synch must be leading the booster list
++			 * after insertion, so we may call
++			 * inherit_thread_priority() for tracking
++			 * current's priority directly without going
++			 * through adjust_boost().
++			 */
++			inherit_thread_priority(owner, curr);
++		}
++	} else
++		list_add_priff(curr, &synch->pendq, wprio, plink);
++block:
++	xnthread_suspend(curr, XNPEND, timeout, timeout_mode, synch);
++	curr->wwake = NULL;
++	xnthread_clear_info(curr, XNWAKEN);
++
++	if (xnthread_test_info(curr, XNRMID | XNTIMEO | XNBREAK))
++		goto out;
++
++	if (xnthread_test_info(curr, XNROBBED)) {
++		/*
++		 * Somebody stole us the ownership while we were ready
++		 * to run, waiting for the CPU: we need to wait again
++		 * for the resource.
++		 */
++		if (timeout_mode != XN_RELATIVE || timeout == XN_INFINITE) {
++			xnlock_put_irqrestore(&nklock, s);
++			goto redo;
++		}
++		timeout = xntimer_get_timeout_stopped(&curr->rtimer);
++		if (timeout > 1) { /* Otherwise, it's too late. */
++			xnlock_put_irqrestore(&nklock, s);
++			goto redo;
++		}
++		xnthread_set_info(curr, XNTIMEO);
++		goto out;
++	}
++grab:
++	xnthread_get_resource(curr);
++
++	if (xnsynch_pended_p(synch))
++		currh = xnsynch_fast_claimed(currh);
++
++	/* Set new ownership for this object. */
++	atomic_set(lockp, get_owner_handle(currh, synch));
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return xnthread_test_info(curr, XNRMID|XNTIMEO|XNBREAK);
++}
++EXPORT_SYMBOL_GPL(xnsynch_acquire);
++
++static void drop_booster(struct xnsynch *synch, struct xnthread *owner)
++{
++	list_del(&synch->next);	/* owner->boosters */
++
++	if (list_empty(&owner->boosters)) {
++		xnthread_clear_state(owner, XNBOOST);
++		inherit_thread_priority(owner, owner);
++	} else
++		adjust_boost(owner, NULL);
++}
++
++static inline void clear_pi_boost(struct xnsynch *synch,
++				  struct xnthread *owner)
++{	/* nklock held, irqs off */
++	synch->status &= ~XNSYNCH_CLAIMED;
++	drop_booster(synch, owner);
++}
++
++static inline void clear_pp_boost(struct xnsynch *synch,
++				  struct xnthread *owner)
++{	/* nklock held, irqs off */
++	synch->status &= ~XNSYNCH_CEILING;
++	drop_booster(synch, owner);
++}
++
++static bool transfer_ownership(struct xnsynch *synch,
++			       struct xnthread *lastowner)
++{				/* nklock held, irqs off */
++	struct xnthread *nextowner;
++	xnhandle_t nextownerh;
++	atomic_t *lockp;
++
++	lockp = xnsynch_fastlock(synch);
++
++	/*
++	 * Our caller checked for contention locklessly, so we do have
++	 * to check again under lock in a different way.
++	 */
++	if (list_empty(&synch->pendq)) {
++		synch->owner = NULL;
++		atomic_set(lockp, XN_NO_HANDLE);
++		return false;
++	}
++
++	nextowner = list_first_entry(&synch->pendq, struct xnthread, plink);
++	list_del(&nextowner->plink);
++	nextowner->wchan = NULL;
++	nextowner->wwake = synch;
++	set_current_owner_locked(synch, nextowner);
++	xnthread_set_info(nextowner, XNWAKEN);
++	xnthread_resume(nextowner, XNPEND);
++
++	if (synch->status & XNSYNCH_CLAIMED)
++		clear_pi_boost(synch, lastowner);
++
++	nextownerh = get_owner_handle(nextowner->handle, synch);
++	if (xnsynch_pended_p(synch))
++		nextownerh = xnsynch_fast_claimed(nextownerh);
++
++	atomic_set(lockp, nextownerh);
++
++	return true;
++}
++
++/**
++ * @fn bool xnsynch_release(struct xnsynch *synch, struct xnthread *curr)
++ * @brief Release a resource and pass it to the next waiting thread.
++ *
++ * This service releases the ownership of the given synchronization
++ * object. The thread which is currently leading the object's pending
++ * list, if any, is unblocked from its pending state. However, no
++ * reschedule is performed.
++ *
++ * This service must be used only with synchronization objects that
++ * track ownership (XNSYNCH_OWNER set).
++ *
++ * @param synch The descriptor address of the synchronization object
++ * whose ownership is changed.
++ *
++ * @param curr The descriptor address of the current thread, which
++ * must own the object at the time of calling.
++ *
++ * @return True if a reschedule is required.
++ *
++ * @sideeffect
++ *
++ * - The effective priority of the previous resource owner might be
++ * lowered to its base priority value as a consequence of the priority
++ * boost being cleared.
++ *
++ * - The synchronization object ownership is transfered to the
++ * unblocked thread.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++bool xnsynch_release(struct xnsynch *synch, struct xnthread *curr)
++{
++	bool need_resched = false;
++	xnhandle_t currh, h;
++	atomic_t *lockp;
++	spl_t s;
++
++	XENO_BUG_ON(COBALT, (synch->status & XNSYNCH_OWNER) == 0);
++
++	trace_cobalt_synch_release(synch);
++
++	if (xnthread_put_resource(curr))
++		return false;
++
++	lockp = xnsynch_fastlock(synch);
++	currh = curr->handle;
++	/*
++	 * FLCEIL may only be raised by the owner, or when the owner
++	 * is blocked waiting for the synch (ownership transfer). In
++	 * addition, only the current owner of a synch may release it,
++	 * therefore we can't race while testing FLCEIL locklessly.
++	 * All updates to FLCLAIM are covered by the superlock.
++	 *
++	 * Therefore, clearing the fastlock racelessly in this routine
++	 * without leaking FLCEIL/FLCLAIM updates can be achieved by
++	 * holding the superlock.
++	 */
++	xnlock_get_irqsave(&nklock, s);
++
++	if (synch->status & XNSYNCH_CEILING) {
++		clear_pp_boost(synch, curr);
++		need_resched = true;
++	}
++
++	h = atomic_cmpxchg(lockp, currh, XN_NO_HANDLE);
++	if ((h & ~XNSYNCH_FLCEIL) != currh)
++		/* FLCLAIM set, synch is contended. */
++		need_resched = transfer_ownership(synch, curr);
++	else if (h != currh)	/* FLCEIL set, FLCLAIM clear. */
++		atomic_set(lockp, XN_NO_HANDLE);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return need_resched;
++}
++EXPORT_SYMBOL_GPL(xnsynch_release);
++
++void xnsynch_requeue_sleeper(struct xnthread *thread)
++{				/* nklock held, irqs off */
++	struct xnsynch *synch = thread->wchan;
++	struct xnthread *owner;
++
++	XENO_BUG_ON(COBALT, !(synch->status & XNSYNCH_PRIO));
++
++	/*
++	 * Update the position in the pend queue of a thread waiting
++	 * for a lock. This routine propagates the change throughout
++	 * the PI chain if required.
++	 */
++	list_del(&thread->plink);
++	list_add_priff(thread, &synch->pendq, wprio, plink);
++	owner = synch->owner;
++
++	/* Only PI-enabled objects are of interest here. */
++	if ((synch->status & XNSYNCH_PI) == 0)
++		return;
++
++	synch->wprio = thread->wprio;
++	if (synch->status & XNSYNCH_CLAIMED)
++		list_del(&synch->next);
++	else {
++		synch->status |= XNSYNCH_CLAIMED;
++		raise_boost_flag(owner);
++	}
++
++	list_add_priff(synch, &owner->boosters, wprio, next);
++	adjust_boost(owner, thread);
++}
++EXPORT_SYMBOL_GPL(xnsynch_requeue_sleeper);
++
++/**
++ * @fn struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch);
++ * @brief Access the thread leading a synch object wait queue.
++ *
++ * This services returns the descriptor address of to the thread leading a
++ * synchronization object wait queue.
++ *
++ * @param synch The descriptor address of the target synchronization object.
++ *
++ * @return The descriptor address of the unblocked thread.
++ *
++ * @coretags{unrestricted}
++ */
++struct xnthread *xnsynch_peek_pendq(struct xnsynch *synch)
++{
++	struct xnthread *thread = NULL;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (!list_empty(&synch->pendq))
++		thread = list_first_entry(&synch->pendq,
++					  struct xnthread, plink);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return thread;
++}
++EXPORT_SYMBOL_GPL(xnsynch_peek_pendq);
++
++/**
++ * @fn int xnsynch_flush(struct xnsynch *synch, int reason);
++ * @brief Unblock all waiters pending on a resource.
++ *
++ * This service atomically releases all threads which currently sleep
++ * on a given resource. This service should be called by upper
++ * interfaces under circumstances requiring that the pending queue of
++ * a given resource is cleared, such as before the resource is
++ * deleted.
++ *
++ * @param synch The descriptor address of the synchronization object
++ * to be flushed.
++ *
++ * @param reason Some flags to set in the information mask of every
++ * unblocked thread. Zero is an acceptable value. The following bits
++ * are pre-defined by Cobalt:
++ *
++ * - XNRMID should be set to indicate that the synchronization object
++ * is about to be destroyed (see xnthread_resume()).
++ *
++ * - XNBREAK should be set to indicate that the wait has been forcibly
++ * interrupted (see xnthread_unblock()).
++ *
++ * @return XNSYNCH_RESCHED is returned if at least one thread is
++ * unblocked, which means the caller should invoke xnsched_run() for
++ * applying the new scheduling state. Otherwise, XNSYNCH_DONE is
++ * returned.
++ *
++ * @sideeffect
++ *
++ * - The effective priority of the current resource owner might be
++ * lowered to its base priority value as a consequence of the priority
++ * inheritance boost being cleared.
++ *
++ * @coretags{unrestricted}
++ */
++int xnsynch_flush(struct xnsynch *synch, int reason)
++{
++	struct xnthread *sleeper, *tmp;
++	int ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_synch_flush(synch);
++
++	if (list_empty(&synch->pendq)) {
++		XENO_BUG_ON(COBALT, synch->status & XNSYNCH_CLAIMED);
++		ret = XNSYNCH_DONE;
++	} else {
++		ret = XNSYNCH_RESCHED;
++		list_for_each_entry_safe(sleeper, tmp, &synch->pendq, plink) {
++			list_del(&sleeper->plink);
++			xnthread_set_info(sleeper, reason);
++			sleeper->wchan = NULL;
++			xnthread_resume(sleeper, XNPEND);
++		}
++		if (synch->status & XNSYNCH_CLAIMED)
++			clear_pi_boost(synch, synch->owner);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnsynch_flush);
++
++void xnsynch_forget_sleeper(struct xnthread *thread)
++{				/* nklock held, irqs off */
++	struct xnsynch *synch = thread->wchan;
++	struct xnthread *owner, *target;
++
++	/*
++	 * Do all the necessary housekeeping chores to stop a thread
++	 * from waiting on a given synchronization object. Doing so
++	 * may require to update a PI chain.
++	 */
++	trace_cobalt_synch_forget(synch);
++
++	xnthread_clear_state(thread, XNPEND);
++	thread->wchan = NULL;
++	list_del(&thread->plink); /* synch->pendq */
++
++	/*
++	 * Only a sleeper leaving a PI chain triggers an update.
++	 * NOTE: PP objects never bear the CLAIMED bit.
++	 */
++	if ((synch->status & XNSYNCH_CLAIMED) == 0)
++		return;
++
++	owner = synch->owner;
++
++	if (list_empty(&synch->pendq)) {
++		/* No more sleepers: clear the PI boost. */
++		clear_pi_boost(synch, owner);
++		return;
++	}
++
++	/*
++	 * Reorder the booster queue of the current owner after we
++	 * left the wait list, then set its priority to the new
++	 * required minimum required to prevent priority inversion.
++	 */
++	target = list_first_entry(&synch->pendq, struct xnthread, plink);
++	synch->wprio = target->wprio;
++	list_del(&synch->next);	/* owner->boosters */
++	list_add_priff(synch, &owner->boosters, wprio, next);
++	adjust_boost(owner, target);
++}
++EXPORT_SYMBOL_GPL(xnsynch_forget_sleeper);
++
++#ifdef CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED
++
++/*
++ * Detect when a thread is about to sleep on a synchronization
++ * object currently owned by someone running in secondary mode.
++ */
++void xnsynch_detect_relaxed_owner(struct xnsynch *synch,
++				  struct xnthread *sleeper)
++{
++	if (xnthread_test_state(sleeper, XNWARN) &&
++	    !xnthread_test_info(sleeper, XNPIALERT) &&
++	    xnthread_test_state(synch->owner, XNRELAX)) {
++		xnthread_set_info(sleeper, XNPIALERT);
++		xnthread_signal(sleeper, SIGDEBUG,
++				  SIGDEBUG_MIGRATE_PRIOINV);
++	} else
++		xnthread_clear_info(sleeper,  XNPIALERT);
++}
++
++/*
++ * Detect when a thread is about to relax while holding booster(s)
++ * (claimed PI or active PP object), which denotes a potential for
++ * priority inversion. In such an event, any sleeper bearing the
++ * XNWARN bit will receive a SIGDEBUG notification.
++ */
++void xnsynch_detect_boosted_relax(struct xnthread *owner)
++{
++	struct xnthread *sleeper;
++	struct xnsynch *synch;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	xnthread_for_each_booster(synch, owner) {
++		xnsynch_for_each_sleeper(sleeper, synch) {
++			if (xnthread_test_state(sleeper, XNWARN)) {
++				xnthread_set_info(sleeper, XNPIALERT);
++				xnthread_signal(sleeper, SIGDEBUG,
++						  SIGDEBUG_MIGRATE_PRIOINV);
++			}
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++#endif /* CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED */
++
++/** @} */
+--- linux/kernel/xenomai/sched-tp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-tp.c	2021-04-07 16:01:25.814636185 +0800
+@@ -0,0 +1,464 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/uapi/sched.h>
++
++static void tp_schedule_next(struct xnsched_tp *tp)
++{
++	struct xnsched_tp_window *w;
++	struct xnsched *sched;
++	int p_next, ret;
++	xnticks_t t;
++
++	for (;;) {
++		/*
++		 * Switch to the next partition. Time holes in a
++		 * global time frame are defined as partition windows
++		 * assigned to part# -1, in which case the (always
++		 * empty) idle queue will be polled for runnable
++		 * threads.  Therefore, we may assume that a window
++		 * begins immediately after the previous one ends,
++		 * which simplifies the implementation a lot.
++		 */
++		w = &tp->gps->pwins[tp->wnext];
++		p_next = w->w_part;
++		tp->tps = p_next < 0 ? &tp->idle : &tp->partitions[p_next];
++
++		/* Schedule tick to advance to the next window. */
++		tp->wnext = (tp->wnext + 1) % tp->gps->pwin_nr;
++		w = &tp->gps->pwins[tp->wnext];
++		t = tp->tf_start + w->w_offset;
++
++		ret = xntimer_start(&tp->tf_timer, t, XN_INFINITE, XN_ABSOLUTE);
++		if (ret != -ETIMEDOUT)
++			break;
++		/*
++		 * We are late, make sure to remain within the bounds
++		 * of a valid time frame before advancing to the next
++		 * window. Otherwise, fix up by advancing to the next
++		 * time frame immediately.
++		 */
++		for (;;) {
++			t = tp->tf_start + tp->gps->tf_duration;
++			if (xnclock_read_monotonic(&nkclock) > t) {
++				tp->tf_start = t;
++				tp->wnext = 0;
++			} else
++				break;
++		}
++	}
++
++	sched = container_of(tp, struct xnsched, tp);
++	xnsched_set_resched(sched);
++}
++
++static void tp_tick_handler(struct xntimer *timer)
++{
++	struct xnsched_tp *tp = container_of(timer, struct xnsched_tp, tf_timer);
++	/*
++	 * Advance beginning date of time frame by a full period if we
++	 * are processing the last window.
++	 */
++	if (tp->wnext + 1 == tp->gps->pwin_nr)
++		tp->tf_start += tp->gps->tf_duration;
++
++	tp_schedule_next(tp);
++}
++
++static void xnsched_tp_init(struct xnsched *sched)
++{
++	struct xnsched_tp *tp = &sched->tp;
++	char timer_name[XNOBJECT_NAME_LEN];
++	int n;
++
++	for (n = 0; n < CONFIG_XENO_OPT_SCHED_TP_NRPART; n++)
++		xnsched_initq(&tp->partitions[n].runnable);
++
++	xnsched_initq(&tp->idle.runnable);
++
++#ifdef CONFIG_SMP
++	ksformat(timer_name, sizeof(timer_name), "[tp-tick/%u]", sched->cpu);
++#else
++	strcpy(timer_name, "[tp-tick]");
++#endif
++	tp->tps = NULL;
++	tp->gps = NULL;
++	INIT_LIST_HEAD(&tp->threads);
++	xntimer_init(&tp->tf_timer, &nkclock, tp_tick_handler,
++		     sched, XNTIMER_IGRAVITY);
++	xntimer_set_name(&tp->tf_timer, timer_name);
++}
++
++static bool xnsched_tp_setparam(struct xnthread *thread,
++				const union xnsched_policy_param *p)
++{
++	struct xnsched *sched = thread->sched;
++
++	thread->tps = &sched->tp.partitions[p->tp.ptid];
++	xnthread_clear_state(thread, XNWEAK);
++
++	return xnsched_set_effective_priority(thread, p->tp.prio);
++}
++
++static void xnsched_tp_getparam(struct xnthread *thread,
++				union xnsched_policy_param *p)
++{
++	p->tp.prio = thread->cprio;
++	p->tp.ptid = thread->tps - thread->sched->tp.partitions;
++}
++
++static void xnsched_tp_trackprio(struct xnthread *thread,
++				 const union xnsched_policy_param *p)
++{
++	/*
++	 * The assigned partition never changes internally due to PI
++	 * (see xnsched_track_policy), since this would be pretty
++	 * wrong with respect to TP scheduling: i.e. we may not allow
++	 * a thread from another partition to consume CPU time from
++	 * the current one, despite this would help enforcing PI (see
++	 * note). In any case, introducing resource contention between
++	 * threads that belong to different partitions is utterly
++	 * wrong in the first place.  Only an explicit call to
++	 * xnsched_set_policy() may change the partition assigned to a
++	 * thread. For that reason, a policy reset action only boils
++	 * down to reinstating the base priority.
++	 *
++	 * NOTE: we do allow threads from lower scheduling classes to
++	 * consume CPU time from the current window as a result of a
++	 * PI boost, since this is aimed at speeding up the release of
++	 * a synchronization object a TP thread needs.
++	 */
++	if (p) {
++		/* We should never cross partition boundaries. */
++		XENO_WARN_ON(COBALT,
++			   thread->base_class == &xnsched_class_tp &&
++			   thread->tps - thread->sched->tp.partitions != p->tp.ptid);
++		thread->cprio = p->tp.prio;
++	} else
++		thread->cprio = thread->bprio;
++}
++
++static void xnsched_tp_protectprio(struct xnthread *thread, int prio)
++{
++  	if (prio > XNSCHED_TP_MAX_PRIO)
++		prio = XNSCHED_TP_MAX_PRIO;
++
++	thread->cprio = prio;
++}
++
++static int xnsched_tp_chkparam(struct xnthread *thread,
++			       const union xnsched_policy_param *p)
++{
++	struct xnsched_tp *tp = &thread->sched->tp;
++
++	if (p->tp.ptid < 0 ||
++		p->tp.ptid >= CONFIG_XENO_OPT_SCHED_TP_NRPART)
++		return -EINVAL;
++
++	if (tp->gps == NULL ||
++	    p->tp.prio < XNSCHED_TP_MIN_PRIO ||
++	    p->tp.prio > XNSCHED_TP_MAX_PRIO)
++		return -EINVAL;
++
++	return 0;
++}
++
++static int xnsched_tp_declare(struct xnthread *thread,
++			      const union xnsched_policy_param *p)
++{
++	struct xnsched *sched = thread->sched;
++
++	list_add_tail(&thread->tp_link, &sched->tp.threads);
++
++	return 0;
++}
++
++static void xnsched_tp_forget(struct xnthread *thread)
++{
++	list_del(&thread->tp_link);
++	thread->tps = NULL;
++}
++
++static void xnsched_tp_enqueue(struct xnthread *thread)
++{
++	xnsched_addq_tail(&thread->tps->runnable, thread);
++}
++
++static void xnsched_tp_dequeue(struct xnthread *thread)
++{
++	xnsched_delq(&thread->tps->runnable, thread);
++}
++
++static void xnsched_tp_requeue(struct xnthread *thread)
++{
++	xnsched_addq(&thread->tps->runnable, thread);
++}
++
++static struct xnthread *xnsched_tp_pick(struct xnsched *sched)
++{
++	/* Never pick a thread if we don't schedule partitions. */
++	if (!xntimer_running_p(&sched->tp.tf_timer))
++		return NULL;
++
++	return xnsched_getq(&sched->tp.tps->runnable);
++}
++
++static void xnsched_tp_migrate(struct xnthread *thread, struct xnsched *sched)
++{
++	union xnsched_policy_param param;
++	/*
++	 * Since our partition schedule is a per-scheduler property,
++	 * it cannot apply to a thread that moves to another CPU
++	 * anymore. So we upgrade that thread to the RT class when a
++	 * CPU migration occurs. A subsequent call to
++	 * __xnthread_set_schedparam() may move it back to TP
++	 * scheduling, with a partition assignment that fits the
++	 * remote CPU's partition schedule.
++	 */
++	param.rt.prio = thread->cprio;
++	__xnthread_set_schedparam(thread, &xnsched_class_rt, &param);
++}
++
++void xnsched_tp_start_schedule(struct xnsched *sched)
++{
++	struct xnsched_tp *tp = &sched->tp;
++
++	if (tp->gps == NULL)
++		return;
++
++	tp->wnext = 0;
++	tp->tf_start = xnclock_read_monotonic(&nkclock);
++	tp_schedule_next(tp);
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_start_schedule);
++
++void xnsched_tp_stop_schedule(struct xnsched *sched)
++{
++	struct xnsched_tp *tp = &sched->tp;
++
++	if (tp->gps)
++		xntimer_stop(&tp->tf_timer);
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_stop_schedule);
++
++struct xnsched_tp_schedule *
++xnsched_tp_set_schedule(struct xnsched *sched,
++			struct xnsched_tp_schedule *gps)
++{
++	struct xnsched_tp_schedule *old_gps;
++	struct xnsched_tp *tp = &sched->tp;
++	union xnsched_policy_param param;
++	struct xnthread *thread, *tmp;
++
++	XENO_BUG_ON(COBALT, gps != NULL &&
++		   (gps->pwin_nr <= 0 || gps->pwins[0].w_offset != 0));
++
++	xnsched_tp_stop_schedule(sched);
++
++	/*
++	 * Move all TP threads on this scheduler to the RT class,
++	 * until we call __xnthread_set_schedparam() for them again.
++	 */
++	if (list_empty(&tp->threads))
++		goto done;
++
++	list_for_each_entry_safe(thread, tmp, &tp->threads, tp_link) {
++		param.rt.prio = thread->cprio;
++		__xnthread_set_schedparam(thread, &xnsched_class_rt, &param);
++	}
++done:
++	old_gps = tp->gps;
++	tp->gps = gps;
++
++	return old_gps;
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_set_schedule);
++
++struct xnsched_tp_schedule *
++xnsched_tp_get_schedule(struct xnsched *sched)
++{
++	struct xnsched_tp_schedule *gps;
++
++	gps = sched->tp.gps;
++	if (gps == NULL)
++		return NULL;
++
++	atomic_inc(&gps->refcount);
++
++	return gps;
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_get_schedule);
++
++void xnsched_tp_put_schedule(struct xnsched_tp_schedule *gps)
++{
++	if (atomic_dec_and_test(&gps->refcount))
++		xnfree(gps);
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_put_schedule);
++
++int xnsched_tp_get_partition(struct xnsched *sched)
++{
++	struct xnsched_tp *tp = &sched->tp;
++
++	if (tp->tps == NULL || tp->tps == &tp->idle)
++		return -1;
++
++	return tp->tps - tp->partitions;
++}
++EXPORT_SYMBOL_GPL(xnsched_tp_get_partition);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++struct xnvfile_directory sched_tp_vfroot;
++
++struct vfile_sched_tp_priv {
++	struct xnthread *curr;
++};
++
++struct vfile_sched_tp_data {
++	int cpu;
++	pid_t pid;
++	char name[XNOBJECT_NAME_LEN];
++	int prio;
++	int ptid;
++};
++
++static struct xnvfile_snapshot_ops vfile_sched_tp_ops;
++
++static struct xnvfile_snapshot vfile_sched_tp = {
++	.privsz = sizeof(struct vfile_sched_tp_priv),
++	.datasz = sizeof(struct vfile_sched_tp_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_sched_tp_ops,
++};
++
++static int vfile_sched_tp_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_sched_tp_priv *priv = xnvfile_iterator_priv(it);
++	int nrthreads = xnsched_class_tp.nthreads;
++
++	if (nrthreads == 0)
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++
++	return nrthreads;
++}
++
++static int vfile_sched_tp_next(struct xnvfile_snapshot_iterator *it,
++			       void *data)
++{
++	struct vfile_sched_tp_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_sched_tp_data *p = data;
++	struct xnthread *thread;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	if (thread->base_class != &xnsched_class_tp)
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->ptid = thread->tps - thread->sched->tp.partitions;
++	p->prio = thread->cprio;
++
++	return 1;
++}
++
++static int vfile_sched_tp_show(struct xnvfile_snapshot_iterator *it,
++			       void *data)
++{
++	struct vfile_sched_tp_data *p = data;
++
++	if (p == NULL)
++		xnvfile_printf(it, "%-3s  %-6s %-4s %-4s  %s\n",
++			       "CPU", "PID", "PTID", "PRI", "NAME");
++	else
++		xnvfile_printf(it, "%3u  %-6d %-4d %-4d  %s\n",
++			       p->cpu,
++			       p->pid,
++			       p->ptid,
++			       p->prio,
++			       p->name);
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_sched_tp_ops = {
++	.rewind = vfile_sched_tp_rewind,
++	.next = vfile_sched_tp_next,
++	.show = vfile_sched_tp_show,
++};
++
++static int xnsched_tp_init_vfile(struct xnsched_class *schedclass,
++				 struct xnvfile_directory *vfroot)
++{
++	int ret;
++
++	ret = xnvfile_init_dir(schedclass->name, &sched_tp_vfroot, vfroot);
++	if (ret)
++		return ret;
++
++	return xnvfile_init_snapshot("threads", &vfile_sched_tp,
++				     &sched_tp_vfroot);
++}
++
++static void xnsched_tp_cleanup_vfile(struct xnsched_class *schedclass)
++{
++	xnvfile_destroy_snapshot(&vfile_sched_tp);
++	xnvfile_destroy_dir(&sched_tp_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct xnsched_class xnsched_class_tp = {
++	.sched_init		=	xnsched_tp_init,
++	.sched_enqueue		=	xnsched_tp_enqueue,
++	.sched_dequeue		=	xnsched_tp_dequeue,
++	.sched_requeue		=	xnsched_tp_requeue,
++	.sched_pick		=	xnsched_tp_pick,
++	.sched_tick		=	NULL,
++	.sched_rotate		=	NULL,
++	.sched_migrate		=	xnsched_tp_migrate,
++	.sched_chkparam		=	xnsched_tp_chkparam,
++	.sched_setparam		=	xnsched_tp_setparam,
++	.sched_getparam		=	xnsched_tp_getparam,
++	.sched_trackprio	=	xnsched_tp_trackprio,
++	.sched_protectprio	=	xnsched_tp_protectprio,
++	.sched_declare		=	xnsched_tp_declare,
++	.sched_forget		=	xnsched_tp_forget,
++	.sched_kick		=	NULL,
++#ifdef CONFIG_XENO_OPT_VFILE
++	.sched_init_vfile	=	xnsched_tp_init_vfile,
++	.sched_cleanup_vfile	=	xnsched_tp_cleanup_vfile,
++#endif
++	.weight			=	XNSCHED_CLASS_WEIGHT(2),
++	.policy			=	SCHED_TP,
++	.name			=	"tp"
++};
++EXPORT_SYMBOL_GPL(xnsched_class_tp);
+--- linux/kernel/xenomai/procfs.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/procfs.h	2021-04-07 16:01:25.809636192 +0800
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (C) 2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _KERNEL_COBALT_PROCFS_H
++#define _KERNEL_COBALT_PROCFS_H
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int xnprocfs_init_tree(void);
++void xnprocfs_cleanup_tree(void);
++#else
++static inline int xnprocfs_init_tree(void) { return 0; }
++static inline void xnprocfs_cleanup_tree(void) { }
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++#endif /* !_KERNEL_COBALT_PROCFS_H */
+--- linux/kernel/xenomai/sched-sporadic.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-sporadic.c	2021-04-07 16:01:25.804636199 +0800
+@@ -0,0 +1,560 @@
++/*
++ * Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/uapi/sched.h>
++
++#define MAX_REPLENISH CONFIG_XENO_OPT_SCHED_SPORADIC_MAXREPL
++
++static void sporadic_post_recharge(struct xnthread *thread, xnticks_t budget);
++
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++
++static inline void sporadic_note_late_drop(struct xnsched *sched)
++{
++	/*
++	 * This code should pull the break when a misconfigured
++	 * sporadic thread is late on its drop date for more than a
++	 * hundred times in a row. This normally reveals a time budget
++	 * which is too tight.
++	 */
++	XENO_BUG_ON(COBALT, ++sched->pss.drop_retries > 100);
++}
++
++static inline void sporadic_note_valid_drop(struct xnsched *sched)
++{
++	sched->pss.drop_retries = 0;
++}
++
++#else /* !CONFIG_XENO_OPT_DEBUG_COBALT */
++
++static inline void sporadic_note_late_drop(struct xnsched *sched)
++{
++}
++
++static inline void sporadic_note_valid_drop(struct xnsched *sched)
++{
++}
++
++#endif /* !CONFIG_XENO_OPT_DEBUG_COBALT */
++
++static inline xnticks_t sporadic_diff_time(xnticks_t start, xnticks_t end)
++{
++	xnsticks_t d = (xnsticks_t)(end - start);
++	return unlikely(d < 0) ? -d : d;
++}
++
++static void sporadic_drop_handler(struct xntimer *timer)
++{
++	struct xnsched_sporadic_data *pss;
++	union xnsched_policy_param p;
++	struct xnthread *thread;
++
++	/*
++	 * XXX: this code will work properly regardless of
++	 * primary/secondary mode issues.
++	 */
++	pss = container_of(timer, struct xnsched_sporadic_data, drop_timer);
++	thread = pss->thread;
++
++	sporadic_post_recharge(thread, pss->budget);
++
++	if (pss->budget == 0 && thread->cprio > pss->param.low_prio) {
++		if (pss->param.low_prio < 0)
++			/*
++			 * Special case: low_prio == -1, we want the
++			 * thread to suspend until a replenishment
++			 * happens.
++			 */
++			xnthread_suspend(thread, XNHELD,
++					 XN_INFINITE, XN_RELATIVE, NULL);
++		else {
++			p.pss.init_budget = 0;
++			p.pss.current_prio = pss->param.low_prio;
++			/* Move sporadic thread to the background. */
++			__xnthread_set_schedparam(thread, &xnsched_class_sporadic, &p);
++		}
++	}
++}
++
++static void sporadic_schedule_drop(struct xnthread *thread)
++{
++	xnticks_t now = xnclock_read_monotonic(&nkclock);
++	struct xnsched_sporadic_data *pss = thread->pss;
++	int ret;
++
++	pss->resume_date = now;
++	/*
++	 * Assuming this timer should not fire that often unless the
++	 * monitored thread behaves badly, we don't pin it on the CPU
++	 * the thread is running, trading cycles at firing time
++	 * against cycles when arming the timer.
++	 */
++	xntimer_set_affinity(&pss->drop_timer, thread->sched);
++	ret = xntimer_start(&pss->drop_timer, now + pss->budget,
++			    XN_INFINITE, XN_ABSOLUTE);
++	if (ret == -ETIMEDOUT) {
++		sporadic_note_late_drop(thread->sched);
++		sporadic_drop_handler(&pss->drop_timer);
++	} else
++		sporadic_note_valid_drop(thread->sched);
++}
++
++static void sporadic_replenish_handler(struct xntimer *timer)
++{
++	struct xnsched_sporadic_data *pss;
++	union xnsched_policy_param p;
++	struct xnthread *thread;
++	xnticks_t now;
++	int r, ret;
++
++	pss = container_of(timer, struct xnsched_sporadic_data, repl_timer);
++	thread = pss->thread;
++	XENO_BUG_ON(COBALT, pss->repl_pending <= 0);
++
++retry:
++	now = xnclock_read_monotonic(&nkclock);
++
++	do {
++		r = pss->repl_out;
++		if ((xnsticks_t)(now - pss->repl_data[r].date) <= 0)
++			break;
++		pss->budget += pss->repl_data[r].amount;
++		if (pss->budget > pss->param.init_budget)
++			pss->budget = pss->param.init_budget;
++		pss->repl_out = (r + 1) % MAX_REPLENISH;
++	} while(--pss->repl_pending > 0);
++
++	if (pss->repl_pending > 0) {
++		xntimer_set_affinity(&pss->repl_timer, thread->sched);
++		ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date,
++				    XN_INFINITE, XN_ABSOLUTE);
++		if (ret == -ETIMEDOUT)
++			goto retry; /* This plugs a tiny race. */
++	}
++
++	if (pss->budget == 0)
++		return;
++
++	if (xnthread_test_state(thread, XNHELD))
++		xnthread_resume(thread, XNHELD);
++	else if (thread->cprio < pss->param.normal_prio) {
++		p.pss.init_budget = 0;
++		p.pss.current_prio = pss->param.normal_prio;
++		/* Move sporadic thread to the foreground. */
++		__xnthread_set_schedparam(thread, &xnsched_class_sporadic, &p);
++	}
++
++	/*
++	 * XXX: we have to reset the drop timer in case we preempted
++	 * the thread which just got a budget increase.
++	 */
++	if (thread->sched->curr == thread)
++		sporadic_schedule_drop(thread);
++}
++
++static void sporadic_post_recharge(struct xnthread *thread, xnticks_t budget)
++{
++	struct xnsched_sporadic_data *pss = thread->pss;
++	int r, ret;
++
++	if (pss->repl_pending >= pss->param.max_repl)
++		return;
++
++	if (budget > pss->budget) {
++		budget = pss->budget;
++		pss->budget = 0;
++	} else
++		pss->budget -= budget;
++
++	r = pss->repl_in;
++	pss->repl_data[r].date = pss->resume_date + pss->param.repl_period;
++	pss->repl_data[r].amount = budget;
++	pss->repl_in = (r + 1) % MAX_REPLENISH;
++
++	if (pss->repl_pending++ == 0) {
++		xntimer_set_affinity(&pss->repl_timer, thread->sched);
++		ret = xntimer_start(&pss->repl_timer, pss->repl_data[r].date,
++				    XN_INFINITE, XN_ABSOLUTE);
++		/*
++		 * The following case should not happen unless the
++		 * initial budget value is inappropriate, but let's
++		 * handle it anyway.
++		 */
++		if (ret == -ETIMEDOUT)
++			sporadic_replenish_handler(&pss->repl_timer);
++	}
++}
++
++static void sporadic_suspend_activity(struct xnthread *thread)
++{
++	struct xnsched_sporadic_data *pss = thread->pss;
++	xnticks_t budget, now;
++
++	if (pss->budget > 0) {
++		xntimer_stop(&pss->drop_timer);
++		now = xnclock_read_monotonic(&nkclock);
++		budget = sporadic_diff_time(now, pss->resume_date);
++		sporadic_post_recharge(thread, budget);
++	}
++}
++
++static inline void sporadic_resume_activity(struct xnthread *thread)
++{
++	if (thread->pss->budget > 0)
++		sporadic_schedule_drop(thread);
++}
++
++static void xnsched_sporadic_init(struct xnsched *sched)
++{
++	/*
++	 * We litterally stack the sporadic scheduler on top of the RT
++	 * one, reusing its run queue directly. This way, RT and
++	 * sporadic threads are merged into the same runqueue and thus
++	 * share the same priority scale, with the addition of budget
++	 * management for the sporadic ones.
++	 */
++#ifdef CONFIG_XENO_OPT_DEBUG_COBALT
++	sched->pss.drop_retries = 0;
++#endif
++}
++
++static bool xnsched_sporadic_setparam(struct xnthread *thread,
++				      const union xnsched_policy_param *p)
++{
++	struct xnsched_sporadic_data *pss = thread->pss;
++	bool effective;
++
++	xnthread_clear_state(thread, XNWEAK);
++	effective = xnsched_set_effective_priority(thread, p->pss.current_prio);
++
++	/*
++	 * We use the budget information to determine whether we got
++	 * here from one of our internal calls to
++	 * xnthread_set_schedparam(), in which case we don't want to
++	 * update the scheduling parameters, but only set the
++	 * effective priority.
++	 */
++	if (p->pss.init_budget > 0) {
++		pss->param = p->pss;
++		pss->budget = p->pss.init_budget;
++		pss->repl_in = 0;
++		pss->repl_out = 0;
++		pss->repl_pending = 0;
++		if (effective && thread == thread->sched->curr) {
++			xntimer_stop(&pss->drop_timer);
++			sporadic_schedule_drop(thread);
++		}
++	}
++
++	return effective;
++}
++
++static void xnsched_sporadic_getparam(struct xnthread *thread,
++				      union xnsched_policy_param *p)
++{
++	p->pss = thread->pss->param;
++	p->pss.current_prio = thread->cprio;
++}
++
++static void xnsched_sporadic_trackprio(struct xnthread *thread,
++				       const union xnsched_policy_param *p)
++{
++	if (p)
++		thread->cprio = p->pss.current_prio;
++	else
++		thread->cprio = thread->bprio;
++}
++
++static void xnsched_sporadic_protectprio(struct xnthread *thread, int prio)
++{
++	if (prio > XNSCHED_SPORADIC_MAX_PRIO)
++		prio = XNSCHED_SPORADIC_MAX_PRIO;
++
++	thread->cprio = prio;
++}
++
++static int xnsched_sporadic_chkparam(struct xnthread *thread,
++				     const union xnsched_policy_param *p)
++{
++	if (p->pss.low_prio != -1 &&
++	    (p->pss.low_prio < XNSCHED_SPORADIC_MIN_PRIO ||
++	     p->pss.low_prio > XNSCHED_SPORADIC_MAX_PRIO))
++		return -EINVAL;
++
++	if (p->pss.normal_prio < XNSCHED_SPORADIC_MIN_PRIO ||
++	    p->pss.normal_prio > XNSCHED_SPORADIC_MAX_PRIO)
++		return -EINVAL;
++
++	if (p->pss.init_budget == 0)
++		return -EINVAL;
++
++	if (p->pss.current_prio != p->pss.normal_prio)
++		return -EINVAL;
++
++	if (p->pss.repl_period < p->pss.init_budget)
++		return -EINVAL;
++
++	if (p->pss.normal_prio <= p->pss.low_prio)
++		return -EINVAL;
++
++	if (p->pss.max_repl < 1 || p->pss.max_repl > MAX_REPLENISH)
++		return -EINVAL;
++
++	return 0;
++}
++
++static int xnsched_sporadic_declare(struct xnthread *thread,
++				    const union xnsched_policy_param *p)
++{
++	struct xnsched_sporadic_data *pss;
++
++	pss = xnmalloc(sizeof(*pss));
++	if (pss == NULL)
++		return -ENOMEM;
++
++	xntimer_init(&pss->repl_timer, &nkclock, sporadic_replenish_handler,
++		     thread->sched, XNTIMER_IGRAVITY);
++	xntimer_set_name(&pss->repl_timer, "pss-replenish");
++	xntimer_init(&pss->drop_timer, &nkclock, sporadic_drop_handler,
++		     thread->sched, XNTIMER_IGRAVITY);
++	xntimer_set_name(&pss->drop_timer, "pss-drop");
++
++	thread->pss = pss;
++	pss->thread = thread;
++
++	return 0;
++}
++
++static void xnsched_sporadic_forget(struct xnthread *thread)
++{
++	struct xnsched_sporadic_data *pss = thread->pss;
++
++	xntimer_destroy(&pss->repl_timer);
++	xntimer_destroy(&pss->drop_timer);
++	xnfree(pss);
++	thread->pss = NULL;
++}
++
++static void xnsched_sporadic_enqueue(struct xnthread *thread)
++{
++	__xnsched_rt_enqueue(thread);
++}
++
++static void xnsched_sporadic_dequeue(struct xnthread *thread)
++{
++	__xnsched_rt_dequeue(thread);
++}
++
++static void xnsched_sporadic_requeue(struct xnthread *thread)
++{
++	__xnsched_rt_requeue(thread);
++}
++
++static struct xnthread *xnsched_sporadic_pick(struct xnsched *sched)
++{
++	struct xnthread *curr = sched->curr, *next;
++
++	next = xnsched_getq(&sched->rt.runnable);
++	if (next == NULL)
++		goto swap;
++
++	if (curr == next)
++		return next;
++
++	/* Arm the drop timer for an incoming sporadic thread. */
++	if (next->pss)
++		sporadic_resume_activity(next);
++swap:
++	/*
++	 * A non-sporadic outgoing thread is having a priority
++	 * inheritance boost, so apply an infinite time budget as we
++	 * want it to release the claimed resource asap. Otherwise,
++	 * clear the drop timer, then schedule a replenishment
++	 * operation.
++	 */
++	if (curr->pss)
++		sporadic_suspend_activity(curr);
++
++	return next;
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++struct xnvfile_directory sched_sporadic_vfroot;
++
++struct vfile_sched_sporadic_priv {
++	int nrthreads;
++	struct xnthread *curr;
++};
++
++struct vfile_sched_sporadic_data {
++	int cpu;
++	pid_t pid;
++	char name[XNOBJECT_NAME_LEN];
++	int current_prio;
++	int low_prio;
++	int normal_prio;
++	xnticks_t period;
++	xnticks_t timeout;
++	xnticks_t budget;
++};
++
++static struct xnvfile_snapshot_ops vfile_sched_sporadic_ops;
++
++static struct xnvfile_snapshot vfile_sched_sporadic = {
++	.privsz = sizeof(struct vfile_sched_sporadic_priv),
++	.datasz = sizeof(struct vfile_sched_sporadic_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_sched_sporadic_ops,
++};
++
++static int vfile_sched_sporadic_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_sched_sporadic_priv *priv = xnvfile_iterator_priv(it);
++	int nrthreads = xnsched_class_sporadic.nthreads;
++
++	if (nrthreads == 0)
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++
++	return nrthreads;
++}
++
++static int vfile_sched_sporadic_next(struct xnvfile_snapshot_iterator *it,
++				     void *data)
++{
++	struct vfile_sched_sporadic_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_sched_sporadic_data *p = data;
++	struct xnthread *thread;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	if (thread->base_class != &xnsched_class_sporadic)
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->current_prio = thread->cprio;
++	p->low_prio = thread->pss->param.low_prio;
++	p->normal_prio = thread->pss->param.normal_prio;
++	p->period = xnthread_get_period(thread);
++	p->budget = thread->pss->param.init_budget;
++
++	return 1;
++}
++
++static int vfile_sched_sporadic_show(struct xnvfile_snapshot_iterator *it,
++				     void *data)
++{
++	char lpbuf[16], npbuf[16], ptbuf[16], btbuf[16];
++	struct vfile_sched_sporadic_data *p = data;
++
++	if (p == NULL)
++		xnvfile_printf(it,
++			       "%-3s  %-6s %-4s %-4s  %-10s %-10s %s\n",
++			       "CPU", "PID", "LPRI", "NPRI", "BUDGET",
++			       "PERIOD", "NAME");
++	else {
++		ksformat(lpbuf, sizeof(lpbuf), "%3d%c",
++			 p->low_prio, p->current_prio == p->low_prio ? '*' : ' ');
++
++		ksformat(npbuf, sizeof(npbuf), "%3d%c",
++			 p->normal_prio, p->current_prio == p->normal_prio ? '*' : ' ');
++
++		xntimer_format_time(p->period, ptbuf, sizeof(ptbuf));
++		xntimer_format_time(p->budget, btbuf, sizeof(btbuf));
++
++		xnvfile_printf(it,
++			       "%3u  %-6d %-4s %-4s  %-10s %-10s %s\n",
++			       p->cpu,
++			       p->pid,
++			       lpbuf,
++			       npbuf,
++			       btbuf,
++			       ptbuf,
++			       p->name);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_sched_sporadic_ops = {
++	.rewind = vfile_sched_sporadic_rewind,
++	.next = vfile_sched_sporadic_next,
++	.show = vfile_sched_sporadic_show,
++};
++
++static int xnsched_sporadic_init_vfile(struct xnsched_class *schedclass,
++				       struct xnvfile_directory *vfroot)
++{
++	int ret;
++
++	ret = xnvfile_init_dir(schedclass->name,
++			       &sched_sporadic_vfroot, vfroot);
++	if (ret)
++		return ret;
++
++	return xnvfile_init_snapshot("threads", &vfile_sched_sporadic,
++				     &sched_sporadic_vfroot);
++}
++
++static void xnsched_sporadic_cleanup_vfile(struct xnsched_class *schedclass)
++{
++	xnvfile_destroy_snapshot(&vfile_sched_sporadic);
++	xnvfile_destroy_dir(&sched_sporadic_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct xnsched_class xnsched_class_sporadic = {
++	.sched_init		=	xnsched_sporadic_init,
++	.sched_enqueue		=	xnsched_sporadic_enqueue,
++	.sched_dequeue		=	xnsched_sporadic_dequeue,
++	.sched_requeue		=	xnsched_sporadic_requeue,
++	.sched_pick		=	xnsched_sporadic_pick,
++	.sched_tick		=	NULL,
++	.sched_rotate		=	NULL,
++	.sched_migrate		=	NULL,
++	.sched_chkparam		=	xnsched_sporadic_chkparam,
++	.sched_setparam		=	xnsched_sporadic_setparam,
++	.sched_getparam		=	xnsched_sporadic_getparam,
++	.sched_trackprio	=	xnsched_sporadic_trackprio,
++	.sched_protectprio	=	xnsched_sporadic_protectprio,
++	.sched_declare		=	xnsched_sporadic_declare,
++	.sched_forget		=	xnsched_sporadic_forget,
++	.sched_kick		=	NULL,
++#ifdef CONFIG_XENO_OPT_VFILE
++	.sched_init_vfile	=	xnsched_sporadic_init_vfile,
++	.sched_cleanup_vfile	=	xnsched_sporadic_cleanup_vfile,
++#endif
++	.weight			=	XNSCHED_CLASS_WEIGHT(3),
++	.policy			=	SCHED_SPORADIC,
++	.name			=	"pss"
++};
++EXPORT_SYMBOL_GPL(xnsched_class_sporadic);
+--- linux/kernel/xenomai/init.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/init.c	2021-04-07 16:01:25.799636207 +0800
+@@ -0,0 +1,430 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/ipipe_tickdev.h>
++#include <xenomai/version.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/apc.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/kernel/pipe.h>
++#include <cobalt/kernel/select.h>
++#include <cobalt/kernel/vdso.h>
++#include <rtdm/fd.h>
++#include "rtdm/internal.h"
++#include "posix/internal.h"
++#include "procfs.h"
++
++/**
++ * @defgroup cobalt Cobalt
++ *
++ * Cobalt supplements the native Linux kernel in dual kernel
++ * configurations. It deals with all time-critical activities, such as
++ * handling interrupts, and scheduling real-time threads. The Cobalt
++ * kernel has higher priority over all the native kernel activities.
++ *
++ * Cobalt provides an implementation of the POSIX and RTDM interfaces
++ * based on a set of generic RTOS building blocks.
++ */
++
++static unsigned long timerfreq_arg;
++module_param_named(timerfreq, timerfreq_arg, ulong, 0444);
++
++static unsigned long clockfreq_arg;
++module_param_named(clockfreq, clockfreq_arg, ulong, 0444);
++
++#ifdef CONFIG_SMP
++static unsigned long supported_cpus_arg = -1;
++module_param_named(supported_cpus, supported_cpus_arg, ulong, 0444);
++#endif /* CONFIG_SMP */
++
++static unsigned long sysheap_size_arg;
++module_param_named(sysheap_size, sysheap_size_arg, ulong, 0444);
++
++static char init_state_arg[16] = "enabled";
++module_param_string(state, init_state_arg, sizeof(init_state_arg), 0444);
++
++static BLOCKING_NOTIFIER_HEAD(state_notifier_list);
++
++struct cobalt_pipeline cobalt_pipeline;
++EXPORT_SYMBOL_GPL(cobalt_pipeline);
++
++DEFINE_PER_CPU(struct cobalt_machine_cpudata, cobalt_machine_cpudata);
++EXPORT_PER_CPU_SYMBOL_GPL(cobalt_machine_cpudata);
++
++atomic_t cobalt_runstate = ATOMIC_INIT(COBALT_STATE_WARMUP);
++EXPORT_SYMBOL_GPL(cobalt_runstate);
++
++struct cobalt_ppd cobalt_kernel_ppd = {
++	.exe_path = "vmlinux",
++};
++EXPORT_SYMBOL_GPL(cobalt_kernel_ppd);
++
++#ifdef CONFIG_XENO_OPT_DEBUG
++#define boot_debug_notice "[DEBUG]"
++#else
++#define boot_debug_notice ""
++#endif
++
++#ifdef CONFIG_IPIPE_TRACE
++#define boot_lat_trace_notice "[LTRACE]"
++#else
++#define boot_lat_trace_notice ""
++#endif
++
++#ifdef CONFIG_ENABLE_DEFAULT_TRACERS
++#define boot_evt_trace_notice "[ETRACE]"
++#else
++#define boot_evt_trace_notice ""
++#endif
++
++#define boot_state_notice						\
++	({								\
++		realtime_core_state() == COBALT_STATE_STOPPED ?		\
++			"[STOPPED]" : "";				\
++	})
++
++void cobalt_add_state_chain(struct notifier_block *nb)
++{
++	blocking_notifier_chain_register(&state_notifier_list, nb);
++}
++EXPORT_SYMBOL_GPL(cobalt_add_state_chain);
++
++void cobalt_remove_state_chain(struct notifier_block *nb)
++{
++	blocking_notifier_chain_unregister(&state_notifier_list, nb);
++}
++EXPORT_SYMBOL_GPL(cobalt_remove_state_chain);
++
++void cobalt_call_state_chain(enum cobalt_run_states newstate)
++{
++	blocking_notifier_call_chain(&state_notifier_list, newstate, NULL);
++}
++EXPORT_SYMBOL_GPL(cobalt_call_state_chain);
++
++static void sys_shutdown(void)
++{
++	void *membase;
++
++	xntimer_release_hardware();
++	xnsched_destroy_all();
++	xnregistry_cleanup();
++	membase = xnheap_get_membase(&cobalt_heap);
++	xnheap_destroy(&cobalt_heap);
++	xnheap_vfree(membase);
++}
++
++static int __init mach_setup(void)
++{
++	struct ipipe_sysinfo sysinfo;
++	int ret, virq;
++
++	ret = ipipe_select_timers(&xnsched_realtime_cpus);
++	if (ret < 0)
++		return ret;
++
++	ipipe_get_sysinfo(&sysinfo);
++
++	if (timerfreq_arg == 0)
++		timerfreq_arg = sysinfo.sys_hrtimer_freq;
++
++	if (clockfreq_arg == 0)
++		clockfreq_arg = sysinfo.sys_hrclock_freq;
++
++	if (clockfreq_arg == 0) {
++		printk(XENO_ERR "null clock frequency? Aborting.\n");
++		return -ENODEV;
++	}
++
++	cobalt_pipeline.timer_freq = timerfreq_arg;
++	cobalt_pipeline.clock_freq = clockfreq_arg;
++
++	if (cobalt_machine.init) {
++		ret = cobalt_machine.init();
++		if (ret)
++			return ret;
++	}
++
++	ipipe_register_head(&xnsched_realtime_domain, "Xenomai");
++
++	ret = -EBUSY;
++	virq = ipipe_alloc_virq();
++	if (virq == 0)
++		goto fail_apc;
++
++	cobalt_pipeline.apc_virq = virq;
++
++	ipipe_request_irq(ipipe_root_domain,
++			  cobalt_pipeline.apc_virq,
++			  apc_dispatch,
++			  NULL, NULL);
++
++	virq = ipipe_alloc_virq();
++	if (virq == 0)
++		goto fail_escalate;
++
++	cobalt_pipeline.escalate_virq = virq;
++
++	ipipe_request_irq(&xnsched_realtime_domain,
++			  cobalt_pipeline.escalate_virq,
++			  (ipipe_irq_handler_t)__xnsched_run_handler,
++			  NULL, NULL);
++
++	ret = xnclock_init(cobalt_pipeline.clock_freq);
++	if (ret)
++		goto fail_clock;
++
++	return 0;
++
++fail_clock:
++	ipipe_free_irq(&xnsched_realtime_domain,
++		       cobalt_pipeline.escalate_virq);
++	ipipe_free_virq(cobalt_pipeline.escalate_virq);
++fail_escalate:
++	ipipe_free_irq(ipipe_root_domain,
++		       cobalt_pipeline.apc_virq);
++	ipipe_free_virq(cobalt_pipeline.apc_virq);
++fail_apc:
++	ipipe_unregister_head(&xnsched_realtime_domain);
++
++	if (cobalt_machine.cleanup)
++		cobalt_machine.cleanup();
++
++	return ret;
++}
++
++static inline int __init mach_late_setup(void)
++{
++	if (cobalt_machine.late_init)
++		return cobalt_machine.late_init();
++
++	return 0;
++}
++
++static __init void mach_cleanup(void)
++{
++	ipipe_unregister_head(&xnsched_realtime_domain);
++	ipipe_free_irq(&xnsched_realtime_domain,
++		       cobalt_pipeline.escalate_virq);
++	ipipe_free_virq(cobalt_pipeline.escalate_virq);
++	ipipe_timers_release();
++	xnclock_cleanup();
++}
++
++static struct {
++	const char *label;
++	enum cobalt_run_states state;
++} init_states[] __initdata = {
++	{ "disabled", COBALT_STATE_DISABLED },
++	{ "stopped", COBALT_STATE_STOPPED },
++	{ "enabled", COBALT_STATE_WARMUP },
++};
++	
++static void __init setup_init_state(void)
++{
++	static char warn_bad_state[] __initdata =
++		XENO_WARNING "invalid init state '%s'\n";
++	int n;
++
++	for (n = 0; n < ARRAY_SIZE(init_states); n++)
++		if (strcmp(init_states[n].label, init_state_arg) == 0) {
++			set_realtime_core_state(init_states[n].state);
++			return;
++		}
++
++	printk(warn_bad_state, init_state_arg);
++}
++
++static __init int sys_init(void)
++{
++	void *heapaddr;
++	int ret;
++
++	if (sysheap_size_arg == 0)
++		sysheap_size_arg = CONFIG_XENO_OPT_SYS_HEAPSZ;
++
++	heapaddr = xnheap_vmalloc(sysheap_size_arg * 1024);
++	if (heapaddr == NULL ||
++	    xnheap_init(&cobalt_heap, heapaddr, sysheap_size_arg * 1024)) {
++		return -ENOMEM;
++	}
++	xnheap_set_name(&cobalt_heap, "system heap");
++
++	xnsched_init_all();
++
++	xnregistry_init();
++
++	/*
++	 * If starting in stopped mode, do all initializations, but do
++	 * not enable the core timer.
++	 */
++	if (realtime_core_state() == COBALT_STATE_WARMUP) {
++		ret = xntimer_grab_hardware();
++		if (ret) {
++			sys_shutdown();
++			return ret;
++		}
++		set_realtime_core_state(COBALT_STATE_RUNNING);
++	}
++
++	return 0;
++}
++
++static int __init xenomai_init(void)
++{
++	int ret, __maybe_unused cpu;
++
++	setup_init_state();
++
++	if (!realtime_core_enabled()) {
++		printk(XENO_WARNING "disabled on kernel command line\n");
++		return 0;
++	}
++
++#ifdef CONFIG_SMP
++	cpumask_clear(&xnsched_realtime_cpus);
++	for_each_online_cpu(cpu) {
++		if (supported_cpus_arg & (1UL << cpu))
++			cpumask_set_cpu(cpu, &xnsched_realtime_cpus);
++	}
++	if (cpumask_empty(&xnsched_realtime_cpus)) {
++		printk(XENO_WARNING "disabled via empty real-time CPU mask\n");
++		set_realtime_core_state(COBALT_STATE_DISABLED);
++		return 0;
++	}
++	cobalt_cpu_affinity = xnsched_realtime_cpus;
++#endif /* CONFIG_SMP */
++
++	xnsched_register_classes();
++
++	ret = xnprocfs_init_tree();
++	if (ret)
++		goto fail;
++
++	ret = mach_setup();
++	if (ret)
++		goto cleanup_proc;
++
++	xnintr_mount();
++
++	ret = xnpipe_mount();
++	if (ret)
++		goto cleanup_mach;
++
++	ret = xnselect_mount();
++	if (ret)
++		goto cleanup_pipe;
++
++	ret = sys_init();
++	if (ret)
++		goto cleanup_select;
++
++	ret = mach_late_setup();
++	if (ret)
++		goto cleanup_sys;
++
++	ret = rtdm_init();
++	if (ret)
++		goto cleanup_sys;
++
++	ret = cobalt_init();
++	if (ret)
++		goto cleanup_rtdm;
++
++	rtdm_fd_init();
++
++	printk(XENO_INFO "Cobalt v%s %s%s%s%s\n",
++	       XENO_VERSION_STRING,
++	       boot_debug_notice,
++	       boot_lat_trace_notice,
++	       boot_evt_trace_notice,
++	       boot_state_notice);
++
++	return 0;
++
++cleanup_rtdm:
++	rtdm_cleanup();
++cleanup_sys:
++	sys_shutdown();
++cleanup_select:
++	xnselect_umount();
++cleanup_pipe:
++	xnpipe_umount();
++cleanup_mach:
++	mach_cleanup();
++cleanup_proc:
++	xnprocfs_cleanup_tree();
++fail:
++	set_realtime_core_state(COBALT_STATE_DISABLED);
++	printk(XENO_ERR "init failed, code %d\n", ret);
++
++	return ret;
++}
++device_initcall(xenomai_init);
++
++/**
++ * @ingroup cobalt
++ * @defgroup cobalt_core Cobalt kernel
++ *
++ * The Cobalt core is a co-kernel which supplements the Linux kernel
++ * for delivering real-time services with very low latency. It
++ * implements a set of generic RTOS building blocks, which the
++ * Cobalt/POSIX and Cobalt/RTDM APIs are based on.  Cobalt has higher
++ * priority over the Linux kernel activities.
++ *
++ * @{
++ *
++ * @page cobalt-core-tags Dual kernel service tags
++ *
++ * The Cobalt kernel services may be restricted to particular calling
++ * contexts, or entail specific side-effects. To describe this
++ * information, each service documented by this section bears a set of
++ * tags when applicable.
++ *
++ * The table below matches the tags used throughout the documentation
++ * with the description of their meaning for the caller.
++ *
++ * @par
++ * <b>Context tags</b>
++ * <TABLE>
++ * <TR><TH>Tag</TH> <TH>Context on entry</TH></TR>
++ * <TR><TD>primary-only</TD>	<TD>Must be called from a Cobalt task in primary mode</TD></TR>
++ * <TR><TD>primary-timed</TD>	<TD>Requires a Cobalt task in primary mode if timed</TD></TR>
++ * <TR><TD>coreirq-only</TD>	<TD>Must be called from a Cobalt IRQ handler</TD></TR>
++ * <TR><TD>secondary-only</TD>	<TD>Must be called from a Cobalt task in secondary mode or regular Linux task</TD></TR>
++ * <TR><TD>rtdm-task</TD>	<TD>Must be called from a RTDM driver task</TD></TR>
++ * <TR><TD>mode-unrestricted</TD>	<TD>May be called from a Cobalt task in either primary or secondary mode</TD></TR>
++ * <TR><TD>task-unrestricted</TD>	<TD>May be called from a Cobalt or regular Linux task indifferently</TD></TR>
++ * <TR><TD>unrestricted</TD>	<TD>May be called from any context previously described</TD></TR>
++ * <TR><TD>atomic-entry</TD>	<TD>Caller must currently hold the big Cobalt kernel lock (nklock)</TD></TR>
++ * </TABLE>
++ *
++ * @par
++ * <b>Possible side-effects</b>
++ * <TABLE>
++ * <TR><TH>Tag</TH> <TH>Description</TH></TR>
++ * <TR><TD>might-switch</TD>	<TD>The Cobalt kernel may switch context</TD></TR>
++ * </TABLE>
++ *
++ * @}
++ */
+--- linux/kernel/xenomai/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/Kconfig	2021-04-07 16:01:25.794636214 +0800
+@@ -0,0 +1,491 @@
++menu "Core features"
++
++config XENO_OPT_SCHED_CLASSES
++	bool "Extra scheduling classes"
++	default n
++	help
++	The Cobalt kernel implements a set of scheduling classes.
++	Each scheduling class defines its own set of rules for
++	determining when and how to select a new thread to run.
++
++	Cobalt has a built-in real-time class, which supports both
++	preemptive fixed-priority FIFO, and round-robin scheduling.
++
++	Enabling CONFIG_XENO_OPT_SCHED_CLASSES allows you to select
++	additional scheduling classes to enable in the Cobalt kernel.
++
++	If in doubt, say N.
++
++config XENO_OPT_SCHED_WEAK
++	bool "Weak scheduling class"
++	default n
++	depends on XENO_OPT_SCHED_CLASSES
++	help
++	This option creates a Cobalt scheduling class for mapping
++	members of the regular POSIX SCHED_FIFO/RR policies to a low
++	priority class of the Cobalt kernel, providing no real-time
++	guarantee. Therefore, up to a hundred non real-time priority
++	levels are available from the SCHED_WEAK policy.
++
++	When CONFIG_XENO_OPT_SCHED_WEAK is disabled, Cobalt still
++	supports a single non real-time priority level (i.e. zero
++	priority), assigned to members of the SCHED_OTHER class.
++
++	SCHED_WEAK/SCHED_OTHER threads can access Cobalt resources,
++	wait on Cobalt synchronization objects, but cannot compete for
++	the CPU with members of the real-time Cobalt classes.
++
++	Since Cobalt assumes no real-time requirement for
++	SCHED_WEAK/SCHED_OTHER threads, they are automatically moved
++	back to secondary mode upon return from any Cobalt syscall if
++	necessary, unless they hold a Cobalt mutex, which would defer
++	the transition until such mutex is released.
++
++	If in doubt, say N.
++
++config XENO_OPT_SCHED_TP
++	bool "Temporal partitioning"
++	default n
++	depends on XENO_OPT_SCHED_CLASSES
++	help
++	This option enables support for temporal partitioning.
++
++	If in doubt, say N.
++
++config XENO_OPT_SCHED_TP_NRPART
++	int "Number of partitions"
++	default 4
++	range 1 1024
++	depends on XENO_OPT_SCHED_TP
++	help
++	Define here the maximum number of temporal partitions the TP
++	scheduler may have to handle.
++
++config XENO_OPT_SCHED_SPORADIC
++	bool "Sporadic scheduling"
++	default n
++	depends on XENO_OPT_SCHED_CLASSES
++	help
++	This option enables support for the sporadic scheduling policy
++	in the Cobalt kernel (SCHED_SPORADIC), also known as POSIX
++	sporadic server.
++
++	It can be used to enforce a capped limit on the execution time
++	of a thread within a given period of time.
++
++	If in doubt, say N.
++
++config XENO_OPT_SCHED_SPORADIC_MAXREPL
++	int "Maximum number of pending replenishments"
++	default 8
++	range 4 16
++	depends on XENO_OPT_SCHED_SPORADIC
++	help
++	For performance reason, the budget replenishment information
++	is statically stored on a per-thread basis. This parameter
++	defines the maximum number of replenishment requests that can
++	be pending concurrently for any given thread that undergoes
++	sporadic scheduling (system minimum is 4).
++
++config XENO_OPT_SCHED_QUOTA
++	bool "Thread groups with runtime quota"
++	default n
++	depends on XENO_OPT_SCHED_CLASSES
++	help
++	This option enables the SCHED_QUOTA scheduling policy in the
++	Cobalt kernel.
++
++	This policy enforces a limitation on the CPU consumption of
++	threads over a globally defined period, known as the quota
++	interval. This is done by pooling threads with common
++	requirements in groups, and giving each group a share of the
++	global period (see CONFIG_XENO_OPT_SCHED_QUOTA_PERIOD).
++
++	When threads have entirely consumed the quota allotted to the
++	group they belong to, the latter is suspended as a whole,
++	until the next quota interval starts. At this point, a new
++	runtime budget is given to each group, in accordance with its
++	share.
++
++	If in doubt, say N.
++
++config XENO_OPT_SCHED_QUOTA_PERIOD
++	int "Quota interval (us)"
++	default 10000
++	range 100 1000000000
++	depends on XENO_OPT_SCHED_QUOTA
++	help
++	The global period thread groups can get a share of.
++
++config XENO_OPT_SCHED_QUOTA_NR_GROUPS
++	int "Number of thread groups"
++	default 32
++	range 1 1024
++	depends on XENO_OPT_SCHED_QUOTA
++	help
++	The overall number of thread groups which may be defined
++	across all CPUs.
++
++config XENO_OPT_STATS
++	bool "Runtime statistics"
++	depends on XENO_OPT_VFILE
++	default y
++	help
++	This option causes the Cobalt kernel to collect various
++	per-thread runtime statistics, which are accessible through
++	the /proc/xenomai/sched/stat interface.
++
++config XENO_OPT_STATS_IRQS
++	bool "Account IRQ handlers separatly"
++	depends on XENO_OPT_STATS
++	default y
++	help
++	When enabled, the runtime of interrupt handlers is accounted
++	separately from the threads they interrupt. Also, the
++	occurrence of shared interrupts is accounted on a per-handler
++	basis.
++
++config XENO_OPT_SHIRQ
++	bool "Shared interrupts"
++	help
++	Enables support for both level- and edge-triggered shared
++	interrupts, so that multiple real-time interrupt handlers
++	are allowed to control dedicated hardware devices which are
++	configured to share the same interrupt line.
++
++config XENO_OPT_RR_QUANTUM
++	int "Round-robin quantum (us)"
++	default 1000
++	help
++	This parameter defines the duration of the default round-robin
++	time quantum expressed as a count of micro-seconds. This value
++	may be overriden internally by Cobalt services which do
++	provide a round-robin interval.
++
++config XENO_OPT_AUTOTUNE
++        tristate "Auto-tuning"
++        default y
++	select XENO_DRIVERS_AUTOTUNE
++        help
++	Enable auto-tuning capabilities. Auto-tuning is used for
++	adjusting the core timing services to the intrinsic latency of
++	the platform.
++
++config XENO_OPT_SCALABLE_SCHED
++	bool "O(1) scheduler"
++	help
++	This option causes a multi-level priority queue to be used in
++	the real-time scheduler, so that it operates in constant-time
++	regardless of the number of _concurrently runnable_ threads
++	(which might be much lower than the total number of active
++	threads).
++
++	Its use is recommended for large multi-threaded systems
++	involving more than 10 of such threads; otherwise, the default
++	linear method usually performs better with lower memory
++	footprints.
++
++choice
++	prompt "Timer indexing method"
++	default XENO_OPT_TIMER_LIST if !X86_64
++	default XENO_OPT_TIMER_RBTREE if X86_64
++	help
++	This option allows to select the underlying data structure
++	which is going to be used for ordering the outstanding
++	software timers managed by the Cobalt kernel.
++
++config XENO_OPT_TIMER_LIST
++	bool "Linear"
++	help
++	Use a linked list. Albeit O(N), this simple data structure is
++	particularly efficient when only a few timers (< 10) may be
++	concurrently outstanding at any point in time.
++
++config XENO_OPT_TIMER_RBTREE
++	bool "Tree"
++	help
++	Use a red-black tree. This data structure is efficient when a
++	high number of software timers may be concurrently
++	outstanding at any point in time.
++
++endchoice
++
++config XENO_OPT_HOSTRT
++       depends on IPIPE_HAVE_HOSTRT
++       def_bool y
++
++config XENO_OPT_PIPE
++	bool
++
++config XENO_OPT_MAP
++	bool
++
++config XENO_OPT_EXTCLOCK
++       bool
++
++config XENO_OPT_COBALT_EXTENSION
++       bool
++
++config XENO_OPT_VFILE
++       bool
++       depends on PROC_FS
++       default y
++
++endmenu
++
++menu "Sizes and static limits"
++
++config XENO_OPT_PIPE_NRDEV
++	int "Number of pipe devices"
++	depends on XENO_OPT_PIPE
++	default 32
++	help
++	Message pipes are bi-directional FIFO communication channels
++	allowing data exchange between Cobalt threads and regular
++	POSIX threads. Pipes natively preserve message boundaries, but
++	can also be used in byte streaming mode from kernel to
++	user-space.
++
++	This option sets the maximum number of pipe devices supported
++	in the system. Pipe devices are named /dev/rtpN where N is a
++	device minor number ranging from 0 to XENO_OPT_PIPE_NRDEV - 1.
++
++config XENO_OPT_REGISTRY_NRSLOTS
++	int "Number of registry slots"
++	default 512
++	help
++	The registry is used by the Cobalt kernel to export named
++	resources to user-space programs via the /proc interface.
++	Each named resource occupies a registry slot. This option sets
++	the maximum number of resources the registry can handle.
++
++config XENO_OPT_SYS_HEAPSZ
++	int "Size of system heap (Kb)"
++	default 4096
++	help
++	The system heap is used for various internal allocations by
++	the Cobalt kernel. The size is expressed in Kilobytes.
++
++config XENO_OPT_PRIVATE_HEAPSZ
++	int "Size of private heap (Kb)"
++	default 256
++	help
++	The Cobalt kernel implements fast IPC mechanisms within the
++	scope of a process which require a private kernel memory heap
++	to be mapped in the address space of each Xenomai application
++	process. This option can be used to set the size of this
++	per-process heap.
++
++	64k is considered a large enough size for common use cases.
++
++config XENO_OPT_SHARED_HEAPSZ
++	int "Size of shared heap (Kb)"
++	default 256
++	help
++	The Cobalt kernel implements fast IPC mechanisms between
++	processes which require a shared kernel memory heap to be
++	mapped in the address space of all Xenomai application
++	processes. This option can be used to set the size of this
++	system-wide heap.
++
++	64k is considered a large enough size for common use cases.
++
++config XENO_OPT_NRTIMERS
++       int "Maximum number of POSIX timers per process"
++       default 256
++       help
++       This tunable controls how many POSIX timers can exist at any
++       given time for each Cobalt process (a timer is created by a
++       call to the timer_create() service of the Cobalt/POSIX API).
++
++config XENO_OPT_DEBUG_TRACE_LOGSZ
++       int "Trace log size"
++       depends on XENO_OPT_DEBUG_TRACE_RELAX
++       default 16
++       help
++       The size (kilobytes) of the trace log of relax requests. Once
++       this limit is reached, subsequent traces will be silently
++       discarded.
++
++       Writing to /proc/xenomai/debug/relax empties the trace log.
++
++endmenu
++
++menu "Latency settings"
++
++config XENO_OPT_TIMING_SCHEDLAT
++	int "User scheduling latency (ns)"
++	default 0
++	help
++	The user scheduling latency is the time between the
++	termination of an interrupt handler and the execution of the
++	first instruction of the real-time application thread this
++	handler resumes. A default value of 0 (recommended) will cause
++	a pre-calibrated value to be used.
++
++	If the auto-tuner is enabled, this value will be used as the
++	factory default when running "autotune --reset".
++
++config XENO_OPT_TIMING_KSCHEDLAT
++	int "Intra-kernel scheduling latency (ns)"
++	default 0
++	help
++	The intra-kernel scheduling latency is the time between the
++	termination of an interrupt handler and the execution of the
++	first instruction of the RTDM kernel thread this handler
++	resumes. A default value of 0 (recommended) will cause a
++	pre-calibrated value to be used.
++
++	Intra-kernel latency is usually significantly lower than user
++	scheduling latency on MMU-enabled platforms, due to CPU cache
++	latency.
++
++	If the auto-tuner is enabled, this value will be used as the
++	factory default when running "autotune --reset".
++
++config XENO_OPT_TIMING_IRQLAT
++	int "Interrupt latency (ns)"
++	default 0
++	help
++	The interrupt latency is the time between the occurrence of an
++	IRQ and the first instruction of the interrupt handler which
++	will service it. A default value of 0 (recommended) will cause
++	a pre-calibrated value to be used.
++
++	If the auto-tuner is enabled, this value will be used as the
++	factory default when running "autotune --reset".
++
++endmenu
++
++menuconfig XENO_OPT_DEBUG
++	depends on XENO_OPT_VFILE
++	bool "Debug support"
++	help
++	  When enabled, various debugging features can be switched
++	  on. They can help to find problems in applications, drivers,
++	  and the Cobalt kernel. XENO_OPT_DEBUG by itself does not have
++	  any impact on the generated code.
++
++if XENO_OPT_DEBUG
++
++config XENO_OPT_DEBUG_COBALT
++	bool "Cobalt runtime assertions"
++	help
++	  This option activates various assertions inside the Cobalt
++	  kernel. This option has limited overhead.
++
++config XENO_OPT_DEBUG_MEMORY
++	bool "Cobalt memory checks"
++	help
++	  This option enables memory debug checks inside the Cobalt
++	  kernel. This option may induce significant overhead with large
++	  heaps.
++
++config XENO_OPT_DEBUG_CONTEXT
++       bool "Check for calling context"
++       help
++         This option enables checks for the calling context in the
++         Cobalt kernel, aimed at detecting when regular Linux routines
++         are entered from a real-time context, and conversely.
++
++config XENO_OPT_DEBUG_LOCKING
++	bool "Spinlock debugging support"
++	default y if SMP
++	help
++	  This option activates runtime assertions, and measurements
++	  of spinlocks spinning time and duration in the Cobalt
++	  kernel. It helps finding latency spots due to interrupt
++	  masked sections. Statistics about the longest masked section
++	  can be found in /proc/xenomai/debug/lock.
++
++	  This option may induce a measurable overhead on low end
++	  machines.
++
++config XENO_OPT_DEBUG_USER
++	bool "User consistency checks"
++	help
++	  This option enables a set of consistency checks for
++	  detecting wrong runtime behavior in user applications.
++
++	  With some of the debug categories, threads can ask for
++	  notification when a problem is detected, by turning on the
++	  PTHREAD_WARNSW mode bit with pthread_setmode_np().  Cobalt
++	  sends the Linux-originated SIGDEBUG signal for notifying
++	  threads, along with a reason code passed into the associated
++	  siginfo data (see pthread_setmode_np()).
++	
++	  Some of these runtime checks may induce overhead, enable
++	  them for debugging purposes only.
++
++if XENO_OPT_DEBUG_USER
++
++config XENO_OPT_DEBUG_MUTEX_RELAXED
++       bool "Detect relaxed mutex owner"
++       default y
++       help
++         A thread which attempts to acquire a mutex currently owned by
++         another thread running in secondary/relaxed mode thread will
++         suffer unwanted latencies, due to a priority inversion.
++         debug notifications are enabled for such thread, it receives
++         a SIGDEBUG signal.
++
++	 This option has some overhead in real-time mode over
++	 contented mutexes.
++ 
++config XENO_OPT_DEBUG_MUTEX_SLEEP
++       bool "Detect sleeping with mutex"
++       default y
++       help
++         A thread which goes sleeping while holding a mutex is prone
++         to cause unwanted latencies to other threads serialized by
++         the same lock. If debug notifications are enabled for such
++         thread, it receives a SIGDEBUG signal right before entering
++	 sleep.
++
++	 This option has noticeable overhead in real-time mode as it
++	 disables the normal fast mutex operations from user-space,
++	 causing a system call for each mutex acquisition/release.
++
++config XENO_OPT_DEBUG_LEGACY
++        bool "Detect usage of legacy constructs/features"
++	default n
++	help
++	    Turns on detection of legacy API usage.
++
++endif # XENO_OPT_DEBUG_USER
++
++config XENO_OPT_DEBUG_TRACE_RELAX
++	bool "Trace relax requests"
++	default n
++	help
++	  This option enables recording of unwanted relax requests from
++	  user-space applications leaving the real-time domain, logging
++	  the thread information and code location involved. All records
++	  are readable from /proc/xenomai/debug/relax, and can be
++	  decoded using the "slackspot" utility.
++
++config XENO_OPT_WATCHDOG
++	bool "Watchdog support"
++	default y
++	help
++	  This option activates a watchdog aimed at detecting runaway
++	  Cobalt threads. If enabled, the watchdog triggers after a
++	  given period of uninterrupted real-time activity has elapsed
++	  without Linux interaction in the meantime.
++
++	  In such an event, the current thread is moved out the
++	  real-time domain, receiving a SIGDEBUG signal from the Linux
++	  kernel immediately after.
++
++	  The timeout value of the watchdog can be set using the
++	  XENO_OPT_WATCHDOG_TIMEOUT parameter.
++
++config XENO_OPT_WATCHDOG_TIMEOUT
++	depends on XENO_OPT_WATCHDOG
++	int "Watchdog timeout"
++	default 4
++	range 1 60
++	help
++	  Watchdog timeout value (in seconds).
++
++endif # XENO_OPT_DEBUG
+--- linux/kernel/xenomai/sched-idle.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-idle.c	2021-04-07 16:01:25.790636220 +0800
+@@ -0,0 +1,67 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/sched.h>
++
++static struct xnthread *xnsched_idle_pick(struct xnsched *sched)
++{
++	return &sched->rootcb;
++}
++
++static bool xnsched_idle_setparam(struct xnthread *thread,
++				  const union xnsched_policy_param *p)
++{
++	return __xnsched_idle_setparam(thread, p);
++}
++
++static void xnsched_idle_getparam(struct xnthread *thread,
++				  union xnsched_policy_param *p)
++{
++	__xnsched_idle_getparam(thread, p);
++}
++
++static void xnsched_idle_trackprio(struct xnthread *thread,
++				   const union xnsched_policy_param *p)
++{
++	__xnsched_idle_trackprio(thread, p);
++}
++
++static void xnsched_idle_protectprio(struct xnthread *thread, int prio)
++{
++	__xnsched_idle_protectprio(thread, prio);
++}
++
++struct xnsched_class xnsched_class_idle = {
++	.sched_init		=	NULL,
++	.sched_enqueue		=	NULL,
++	.sched_dequeue		=	NULL,
++	.sched_requeue		=	NULL,
++	.sched_tick		=	NULL,
++	.sched_rotate		=	NULL,
++	.sched_forget		=	NULL,
++	.sched_kick		=	NULL,
++	.sched_declare		=	NULL,
++	.sched_pick		=	xnsched_idle_pick,
++	.sched_setparam		=	xnsched_idle_setparam,
++	.sched_getparam		=	xnsched_idle_getparam,
++	.sched_trackprio	=	xnsched_idle_trackprio,
++	.sched_protectprio	=	xnsched_idle_protectprio,
++	.weight			=	XNSCHED_CLASS_WEIGHT(0),
++	.policy			=	SCHED_IDLE,
++	.name			=	"idle"
++};
+--- linux/kernel/xenomai/thread.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/thread.c	2021-04-07 16:01:25.785636227 +0800
+@@ -0,0 +1,2672 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2006-2010 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ * Copyright (C) 2001-2013 The Xenomai project <http://www.xenomai.org>
++ *
++ * SMP support Copyright (C) 2004 The HYADES project <http://www.hyades-itea.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/kthread.h>
++#include <linux/wait.h>
++#include <linux/signal.h>
++#include <linux/pid.h>
++#include <linux/sched.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/registry.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/stat.h>
++#include <cobalt/kernel/trace.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/select.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/thread.h>
++#include <trace/events/cobalt-core.h>
++#include "debug.h"
++
++static DECLARE_WAIT_QUEUE_HEAD(join_all);
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_thread Thread services
++ * @{
++ */
++
++static void timeout_handler(struct xntimer *timer)
++{
++	struct xnthread *thread = container_of(timer, struct xnthread, rtimer);
++
++	xnthread_set_info(thread, XNTIMEO);	/* Interrupts are off. */
++	xnthread_resume(thread, XNDELAY);
++}
++
++static void periodic_handler(struct xntimer *timer)
++{
++	struct xnthread *thread = container_of(timer, struct xnthread, ptimer);
++	/*
++	 * Prevent unwanted round-robin, and do not wake up threads
++	 * blocked on a resource.
++	 */
++	if (xnthread_test_state(thread, XNDELAY|XNPEND) == XNDELAY)
++		xnthread_resume(thread, XNDELAY);
++
++	/*
++	 * The periodic thread might have migrated to another CPU
++	 * while passive, fix the timer affinity if need be.
++	 */
++	xntimer_set_affinity(&thread->ptimer, thread->sched);
++}
++
++static inline void enlist_new_thread(struct xnthread *thread)
++{				/* nklock held, irqs off */
++	list_add_tail(&thread->glink, &nkthreadq);
++	cobalt_nrthreads++;
++	xnvfile_touch_tag(&nkthreadlist_tag);
++}
++
++struct kthread_arg {
++	struct xnthread *thread;
++	struct completion *done;
++};
++
++static int kthread_trampoline(void *arg)
++{
++	struct kthread_arg *ka = arg;
++	struct xnthread *thread = ka->thread;
++	struct sched_param param;
++	int ret, policy, prio;
++
++	/*
++	 * It only makes sense to create Xenomai kthreads with the
++	 * SCHED_FIFO, SCHED_NORMAL or SCHED_WEAK policies. So
++	 * anything that is not from Xenomai's RT class is assumed to
++	 * belong to SCHED_NORMAL linux-wise.
++	 */
++	if (thread->sched_class != &xnsched_class_rt) {
++		policy = SCHED_NORMAL;
++		prio = 0;
++	} else {
++		policy = SCHED_FIFO;
++		prio = normalize_priority(thread->cprio);
++	}
++
++	param.sched_priority = prio;
++	sched_setscheduler(current, policy, &param);
++
++	ret = xnthread_map(thread, ka->done);
++	if (ret) {
++		printk(XENO_WARNING "failed to create kernel shadow %s\n",
++		       thread->name);
++		return ret;
++	}
++
++	trace_cobalt_shadow_entry(thread);
++
++	thread->entry(thread->cookie);
++
++	xnthread_cancel(thread);
++
++	return 0;
++}
++
++static inline int spawn_kthread(struct xnthread *thread)
++{
++	DECLARE_COMPLETION_ONSTACK(done);
++	struct kthread_arg ka = {
++		.thread = thread,
++		.done = &done
++	};
++	struct task_struct *p;
++
++	p = kthread_run(kthread_trampoline, &ka, "%s", thread->name);
++	if (IS_ERR(p))
++		return PTR_ERR(p);
++
++	wait_for_completion(&done);
++
++	return 0;
++}
++
++int __xnthread_init(struct xnthread *thread,
++		    const struct xnthread_init_attr *attr,
++		    struct xnsched *sched,
++		    struct xnsched_class *sched_class,
++		    const union xnsched_policy_param *sched_param)
++{
++	int flags = attr->flags, ret, gravity;
++
++	flags &= ~(XNSUSP|XNBOOST);
++#ifndef CONFIG_XENO_ARCH_FPU
++	flags &= ~XNFPU;
++#endif
++	if ((flags & XNROOT) == 0)
++		flags |= XNDORMANT;
++
++	if (attr->name)
++		ksformat(thread->name,
++			 sizeof(thread->name), "%s", attr->name);
++	else
++		ksformat(thread->name,
++			 sizeof(thread->name), "@%p", thread);
++
++	/*
++	 * We mirror the global user debug state into the per-thread
++	 * state, to speed up branch taking in lib/cobalt wherever
++	 * this needs to be tested.
++	 */
++	if (IS_ENABLED(CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP))
++		flags |= XNDEBUG;
++
++	thread->personality = attr->personality;
++	cpumask_and(&thread->affinity, &attr->affinity, &cobalt_cpu_affinity);
++	thread->sched = sched;
++	thread->state = flags;
++	thread->info = 0;
++	thread->local_info = 0;
++	thread->wprio = XNSCHED_IDLE_PRIO;
++	thread->cprio = XNSCHED_IDLE_PRIO;
++	thread->bprio = XNSCHED_IDLE_PRIO;
++	thread->lock_count = 0;
++	thread->rrperiod = XN_INFINITE;
++	thread->wchan = NULL;
++	thread->wwake = NULL;
++	thread->wcontext = NULL;
++	thread->res_count = 0;
++	thread->handle = XN_NO_HANDLE;
++	memset(&thread->stat, 0, sizeof(thread->stat));
++	thread->selector = NULL;
++	INIT_LIST_HEAD(&thread->glink);
++	INIT_LIST_HEAD(&thread->boosters);
++	/* These will be filled by xnthread_start() */
++	thread->entry = NULL;
++	thread->cookie = NULL;
++	init_completion(&thread->exited);
++	memset(xnthread_archtcb(thread), 0, sizeof(struct xnarchtcb));
++
++	gravity = flags & XNUSER ? XNTIMER_UGRAVITY : XNTIMER_KGRAVITY;
++	xntimer_init(&thread->rtimer, &nkclock, timeout_handler,
++		     sched, gravity);
++	xntimer_set_name(&thread->rtimer, thread->name);
++	xntimer_set_priority(&thread->rtimer, XNTIMER_HIPRIO);
++	xntimer_init(&thread->ptimer, &nkclock, periodic_handler,
++		     sched, gravity);
++	xntimer_set_name(&thread->ptimer, thread->name);
++	xntimer_set_priority(&thread->ptimer, XNTIMER_HIPRIO);
++
++	thread->base_class = NULL; /* xnsched_set_policy() will set it. */
++	ret = xnsched_init_thread(thread);
++	if (ret)
++		goto err_out;
++
++	ret = xnsched_set_policy(thread, sched_class, sched_param);
++	if (ret)
++		goto err_out;
++
++	if ((flags & (XNUSER|XNROOT)) == 0) {
++		ret = spawn_kthread(thread);
++		if (ret)
++			goto err_out;
++	}
++
++	return 0;
++
++err_out:
++	xntimer_destroy(&thread->rtimer);
++	xntimer_destroy(&thread->ptimer);
++
++	return ret;
++}
++
++void xnthread_init_shadow_tcb(struct xnthread *thread)
++{
++	struct xnarchtcb *tcb = xnthread_archtcb(thread);
++	struct task_struct *p = current;
++
++	/*
++	 * If the current task is a kthread, the pipeline will take
++	 * the necessary steps to make the FPU usable in such
++	 * context. The kernel already took care of this issue for
++	 * userland tasks (e.g. setting up a clean backup area).
++	 */
++	__ipipe_share_current(0);
++
++	tcb->core.host_task = p;
++	tcb->core.tsp = &p->thread;
++	tcb->core.mm = p->mm;
++	tcb->core.active_mm = p->mm;
++	tcb->core.tip = task_thread_info(p);
++#ifdef CONFIG_XENO_ARCH_FPU
++	tcb->core.user_fpu_owner = p;
++#endif /* CONFIG_XENO_ARCH_FPU */
++	xnarch_init_shadow_tcb(thread);
++
++	trace_cobalt_shadow_map(thread);
++}
++
++void xnthread_init_root_tcb(struct xnthread *thread)
++{
++	struct xnarchtcb *tcb = xnthread_archtcb(thread);
++	struct task_struct *p = current;
++
++	tcb->core.host_task = p;
++	tcb->core.tsp = &tcb->core.ts;
++	tcb->core.mm = p->mm;
++	tcb->core.tip = NULL;
++	xnarch_init_root_tcb(thread);
++}
++
++void xnthread_deregister(struct xnthread *thread)
++{
++	if (thread->handle != XN_NO_HANDLE)
++		xnregistry_remove(thread->handle);
++
++	thread->handle = XN_NO_HANDLE;
++}
++
++char *xnthread_format_status(unsigned long status, char *buf, int size)
++{
++	static const char labels[] = XNTHREAD_STATE_LABELS;
++	int pos, c, mask;
++	char *wp;
++
++	for (mask = (int)status, pos = 0, wp = buf;
++	     mask != 0 && wp - buf < size - 2;	/* 1-letter label + \0 */
++	     mask >>= 1, pos++) {
++		if ((mask & 1) == 0)
++			continue;
++
++		c = labels[pos];
++
++		switch (1 << pos) {
++		case XNROOT:
++			c = 'R'; /* Always mark root as runnable. */
++			break;
++		case XNREADY:
++			if (status & XNROOT)
++				continue; /* Already reported on XNROOT. */
++			break;
++		case XNDELAY:
++			/*
++			 * Only report genuine delays here, not timed
++			 * waits for resources.
++			 */
++			if (status & XNPEND)
++				continue;
++			break;
++		case XNPEND:
++			/* Report timed waits with lowercase symbol. */
++			if (status & XNDELAY)
++				c |= 0x20;
++			break;
++		default:
++			if (c == '.')
++				continue;
++		}
++		*wp++ = c;
++	}
++
++	*wp = '\0';
++
++	return buf;
++}
++
++pid_t xnthread_host_pid(struct xnthread *thread)
++{
++	if (xnthread_test_state(thread, XNROOT))
++		return 0;
++	if (!xnthread_host_task(thread))
++		return -1;
++
++	return task_pid_nr(xnthread_host_task(thread));
++}
++
++int xnthread_set_clock(struct xnthread *thread, struct xnclock *newclock)
++{
++	spl_t s;
++
++	if (thread == NULL) {
++		thread = xnthread_current();
++		if (thread == NULL)
++			return -EPERM;
++	}
++	
++	/* Change the clock the thread's periodic timer is paced by. */
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_set_clock(&thread->ptimer, newclock);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_set_clock);
++
++xnticks_t xnthread_get_timeout(struct xnthread *thread, xnticks_t ns)
++{
++	struct xntimer *timer;
++	xnticks_t timeout;
++
++	if (!xnthread_test_state(thread,XNDELAY))
++		return 0LL;
++
++	if (xntimer_running_p(&thread->rtimer))
++		timer = &thread->rtimer;
++	else if (xntimer_running_p(&thread->ptimer))
++		timer = &thread->ptimer;
++	else
++		return 0LL;
++
++	timeout = xntimer_get_date(timer);
++	if (timeout <= ns)
++		return 1;
++
++	return timeout - ns;
++}
++EXPORT_SYMBOL_GPL(xnthread_get_timeout);
++
++xnticks_t xnthread_get_period(struct xnthread *thread)
++{
++	xnticks_t period = 0;
++	/*
++	 * The current thread period might be:
++	 * - the value of the timer interval for periodic threads (ns/ticks)
++	 * - or, the value of the alloted round-robin quantum (ticks)
++	 * - or zero, meaning "no periodic activity".
++	 */
++	if (xntimer_running_p(&thread->ptimer))
++		period = xntimer_interval(&thread->ptimer);
++	else if (xnthread_test_state(thread,XNRRB))
++		period = thread->rrperiod;
++
++	return period;
++}
++EXPORT_SYMBOL_GPL(xnthread_get_period);
++
++void xnthread_prepare_wait(struct xnthread_wait_context *wc)
++{
++	struct xnthread *curr = xnthread_current();
++
++	wc->posted = 0;
++	curr->wcontext = wc;
++}
++EXPORT_SYMBOL_GPL(xnthread_prepare_wait);
++
++static inline int moving_target(struct xnsched *sched, struct xnthread *thread)
++{
++	int ret = 0;
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	/*
++	 * When deleting a thread in the course of a context switch or
++	 * in flight to another CPU with nklock unlocked on a distant
++	 * CPU, do nothing, this case will be caught in
++	 * xnsched_finish_unlocked_switch.
++	 */
++	ret = (sched->status & XNINSW) ||
++		xnthread_test_state(thread, XNMIGRATE);
++#endif
++	return ret;
++}
++
++#ifdef CONFIG_XENO_ARCH_FPU
++
++static inline void giveup_fpu(struct xnsched *sched,
++			      struct xnthread *thread)
++{
++	if (thread == sched->fpuholder)
++		sched->fpuholder = NULL;
++}
++
++void xnthread_switch_fpu(struct xnsched *sched)
++{
++	struct xnthread *curr = sched->curr;
++
++	if (!xnthread_test_state(curr, XNFPU))
++		return;
++
++	xnarch_switch_fpu(sched->fpuholder, curr);
++	sched->fpuholder = curr;
++}
++
++#else /* !CONFIG_XENO_ARCH_FPU */
++
++static inline void giveup_fpu(struct xnsched *sched,
++				      struct xnthread *thread)
++{
++}
++
++#endif /* !CONFIG_XENO_ARCH_FPU */
++
++static inline void release_all_ownerships(struct xnthread *curr)
++{
++	struct xnsynch *synch, *tmp;
++
++	/*
++	 * Release all the ownerships obtained by a thread on
++	 * synchronization objects. This routine must be entered
++	 * interrupts off.
++	 */
++	xnthread_for_each_booster_safe(synch, tmp, curr) {
++		xnsynch_release(synch, curr);
++		if (synch->cleanup)
++			synch->cleanup(synch);
++	}
++}
++
++static inline void cleanup_tcb(struct xnthread *curr) /* nklock held, irqs off */
++{
++	struct xnsched *sched = curr->sched;
++
++	list_del(&curr->glink);
++	cobalt_nrthreads--;
++	xnvfile_touch_tag(&nkthreadlist_tag);
++
++	if (xnthread_test_state(curr, XNREADY)) {
++		XENO_BUG_ON(COBALT, xnthread_test_state(curr, XNTHREAD_BLOCK_BITS));
++		xnsched_dequeue(curr);
++		xnthread_clear_state(curr, XNREADY);
++	}
++
++	if (xnthread_test_state(curr, XNPEND))
++		xnsynch_forget_sleeper(curr);
++
++	xnthread_set_state(curr, XNZOMBIE);
++	/*
++	 * NOTE: we must be running over the root thread, or @curr
++	 * is dormant, which means that we don't risk sched->curr to
++	 * disappear due to voluntary rescheduling while holding the
++	 * nklock, despite @curr bears the zombie bit.
++	 */
++	release_all_ownerships(curr);
++
++	giveup_fpu(sched, curr);
++
++	if (moving_target(sched, curr))
++		return;
++
++	xnsched_forget(curr);
++	xnthread_deregister(curr);
++}
++
++void __xnthread_cleanup(struct xnthread *curr)
++{
++	spl_t s;
++
++	secondary_mode_only();
++
++	xntimer_destroy(&curr->rtimer);
++	xntimer_destroy(&curr->ptimer);
++
++	if (curr->selector) {
++		xnselector_destroy(curr->selector);
++		curr->selector = NULL;
++	}
++
++	xnlock_get_irqsave(&nklock, s);
++	cleanup_tcb(curr);
++	xnlock_put_irqrestore(&nklock, s);
++
++	/* Wake up the joiner if any (we can't have more than one). */
++	complete(&curr->exited);
++
++	/* Notify our exit to xnthread_killall() if need be. */
++	if (waitqueue_active(&join_all))
++		wake_up(&join_all);
++
++	/* Finalize last since this incurs releasing the TCB. */
++	xnthread_run_handler_stack(curr, finalize_thread);
++}
++
++/*
++ * Unwinds xnthread_init() ops for an unmapped thread.  Since the
++ * latter must be dormant, it can't be part of any runqueue.
++ */
++void __xnthread_discard(struct xnthread *thread)
++{
++	spl_t s;
++
++	secondary_mode_only();
++
++	xntimer_destroy(&thread->rtimer);
++	xntimer_destroy(&thread->ptimer);
++
++	xnlock_get_irqsave(&nklock, s);
++	if (!list_empty(&thread->glink)) {
++		list_del(&thread->glink);
++		cobalt_nrthreads--;
++		xnvfile_touch_tag(&nkthreadlist_tag);
++	}
++	xnthread_deregister(thread);
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++/**
++ * @fn void xnthread_init(struct xnthread *thread,const struct xnthread_init_attr *attr,struct xnsched_class *sched_class,const union xnsched_policy_param *sched_param)
++ * @brief Initialize a new thread.
++ *
++ * Initializes a new thread. The thread is left dormant until it is
++ * actually started by xnthread_start().
++ *
++ * @param thread The address of a thread descriptor Cobalt will use to
++ * store the thread-specific data.  This descriptor must always be
++ * valid while the thread is active therefore it must be allocated in
++ * permanent memory. @warning Some architectures may require the
++ * descriptor to be properly aligned in memory; this is an additional
++ * reason for descriptors not to be laid in the program stack where
++ * alignement constraints might not always be satisfied.
++ *
++ * @param attr A pointer to an attribute block describing the initial
++ * properties of the new thread. Members of this structure are defined
++ * as follows:
++ *
++ * - name: An ASCII string standing for the symbolic name of the
++ * thread. This name is copied to a safe place into the thread
++ * descriptor. This name might be used in various situations by Cobalt
++ * for issuing human-readable diagnostic messages, so it is usually a
++ * good idea to provide a sensible value here.  NULL is fine though
++ * and means "anonymous".
++ *
++ * - flags: A set of creation flags affecting the operation. The
++ * following flags can be part of this bitmask:
++ *
++ *   - XNSUSP creates the thread in a suspended state. In such a case,
++ * the thread shall be explicitly resumed using the xnthread_resume()
++ * service for its execution to actually begin, additionally to
++ * issuing xnthread_start() for it. This flag can also be specified
++ * when invoking xnthread_start() as a starting mode.
++ *
++ * - XNUSER shall be set if @a thread will be mapped over an existing
++ * user-space task. Otherwise, a new kernel host task is created, then
++ * paired with the new Xenomai thread.
++ *
++ * - XNFPU (enable FPU) tells Cobalt that the new thread may use the
++ * floating-point unit. XNFPU is implicitly assumed for user-space
++ * threads even if not set in @a flags.
++ *
++ * - affinity: The processor affinity of this thread. Passing
++ * CPU_MASK_ALL means "any cpu" from the allowed core affinity mask
++ * (cobalt_cpu_affinity). Passing an empty set is invalid.
++ *
++ * @param sched_class The initial scheduling class the new thread
++ * should be assigned to.
++ *
++ * @param sched_param The initial scheduling parameters to set for the
++ * new thread; @a sched_param must be valid within the context of @a
++ * sched_class.
++ *
++ * @return 0 is returned on success. Otherwise, the following error
++ * code indicates the cause of the failure:
++ *
++ * - -EINVAL is returned if @a attr->flags has invalid bits set, or @a
++ *   attr->affinity is invalid (e.g. empty).
++ *
++ * @coretags{secondary-only}
++ */
++int xnthread_init(struct xnthread *thread,
++		  const struct xnthread_init_attr *attr,
++		  struct xnsched_class *sched_class,
++		  const union xnsched_policy_param *sched_param)
++{
++	struct xnsched *sched;
++	cpumask_t affinity;
++	int ret;
++
++	if (attr->flags & ~(XNFPU | XNUSER | XNSUSP))
++		return -EINVAL;
++
++	/*
++	 * Pick an initial CPU for the new thread which is part of its
++	 * affinity mask, and therefore also part of the supported
++	 * CPUs. This CPU may change in pin_to_initial_cpu().
++	 */
++	cpumask_and(&affinity, &attr->affinity, &cobalt_cpu_affinity);
++	if (cpumask_empty(&affinity))
++		return -EINVAL;
++
++	sched = xnsched_struct(cpumask_first(&affinity));
++
++	ret = __xnthread_init(thread, attr, sched, sched_class, sched_param);
++	if (ret)
++		return ret;
++
++	trace_cobalt_thread_init(thread, attr, sched_class);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_init);
++
++/**
++ * @fn int xnthread_start(struct xnthread *thread,const struct xnthread_start_attr *attr)
++ * @brief Start a newly created thread.
++ *
++ * Starts a (newly) created thread, scheduling it for the first
++ * time. This call releases the target thread from the XNDORMANT
++ * state. This service also sets the initial mode for the new thread.
++ *
++ * @param thread The descriptor address of the started thread which
++ * must have been previously initialized by a call to xnthread_init().
++ *
++ * @param attr A pointer to an attribute block describing the
++ * execution properties of the new thread. Members of this structure
++ * are defined as follows:
++ *
++ * - mode: The initial thread mode. The following flags can be part of
++ * this bitmask:
++ *
++ *   - XNLOCK causes the thread to lock the scheduler when it starts.
++ * The target thread will have to call the xnsched_unlock()
++ * service to unlock the scheduler. A non-preemptible thread may still
++ * block, in which case, the lock is reasserted when the thread is
++ * scheduled back in.
++ *
++ *   - XNSUSP makes the thread start in a suspended state. In such a
++ * case, the thread will have to be explicitly resumed using the
++ * xnthread_resume() service for its execution to actually begin.
++ *
++ * - entry: The address of the thread's body routine. In other words,
++ * it is the thread entry point.
++ *
++ * - cookie: A user-defined opaque cookie Cobalt will pass to the
++ * emerging thread as the sole argument of its entry point.
++ *
++ * @retval 0 if @a thread could be started ;
++ *
++ * @retval -EBUSY if @a thread was not dormant or stopped ;
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int xnthread_start(struct xnthread *thread,
++		   const struct xnthread_start_attr *attr)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (!xnthread_test_state(thread, XNDORMANT)) {
++		xnlock_put_irqrestore(&nklock, s);
++		return -EBUSY;
++	}
++
++	xnthread_set_state(thread, attr->mode & (XNTHREAD_MODE_BITS | XNSUSP));
++	thread->entry = attr->entry;
++	thread->cookie = attr->cookie;
++	if (attr->mode & XNLOCK)
++		thread->lock_count = 1;
++
++	/*
++	 * A user-space thread starts immediately Cobalt-wise since we
++	 * already have an underlying Linux context for it, so we can
++	 * enlist it now to make it visible from the /proc interface.
++	 */
++	if (xnthread_test_state(thread, XNUSER))
++		enlist_new_thread(thread);
++
++	trace_cobalt_thread_start(thread);
++
++	xnthread_resume(thread, XNDORMANT);
++	xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_start);
++
++/**
++ * @fn void xnthread_set_mode(int clrmask,int setmask)
++ * @brief Change control mode of the current thread.
++ *
++ * Change the control mode of the current thread. The control mode
++ * affects several behaviours of the Cobalt core regarding this
++ * thread.
++ *
++ * @param clrmask Clears the corresponding bits from the control mode
++ * before setmask is applied. The scheduler lock held by the current
++ * thread can be forcibly released by passing the XNLOCK bit in this
++ * mask. In this case, the lock nesting count is also reset to zero.
++ *
++ * @param setmask The new thread mode. The following flags may be set
++ * in this bitmask:
++ *
++ * - XNLOCK makes the current thread non-preemptible by other threads.
++ * Unless XNTRAPLB is also set for the thread, the latter may still
++ * block, dropping the lock temporarily, in which case, the lock will
++ * be reacquired automatically when the thread resumes execution.
++ *
++ * - XNWARN enables debugging notifications for the current thread.  A
++ * SIGDEBUG (Linux-originated) signal is sent when the following
++ * atypical or abnormal behavior is detected:
++ *
++ *    - the current thread switches to secondary mode. Such notification
++ *      comes in handy for detecting spurious relaxes.
++ *
++ *    - CONFIG_XENO_OPT_DEBUG_MUTEX_RELAXED is enabled in the kernel
++ *      configuration, and the current thread is sleeping on a Cobalt
++ *      mutex currently owned by a thread running in secondary mode,
++ *      which reveals a priority inversion.
++ *
++ *    - the current thread is about to sleep while holding a Cobalt
++ *      mutex, and CONFIG_XENO_OPT_DEBUG_MUTEX_SLEEP is enabled in the
++ *      kernel configuration. Blocking for acquiring a mutex does not
++ *      trigger such a signal though.
++ *
++ *    - the current thread has both XNTRAPLB and XNLOCK set, and
++ *      attempts to block on a Cobalt service, which would cause a
++ *      lock break.
++ *
++ * - XNTRAPLB disallows breaking the scheduler lock. In the default
++ * case, a thread which holds the scheduler lock is allowed to drop it
++ * temporarily for sleeping. If this mode bit is set, such thread
++ * would return immediately with XNBREAK set from
++ * xnthread_suspend(). If XNWARN is set for the current thread,
++ * SIGDEBUG is sent in addition to raising the break condition.
++ *
++ * @coretags{primary-only, might-switch}
++ *
++ * @note Setting @a clrmask and @a setmask to zero leads to a nop,
++ * in which case xnthread_set_mode() returns the current mode.
++ */
++int xnthread_set_mode(int clrmask, int setmask)
++{
++	int oldmode, lock_count;
++	struct xnthread *curr;
++	spl_t s;
++
++	primary_mode_only();
++
++	xnlock_get_irqsave(&nklock, s);
++	curr = xnsched_current_thread();
++	oldmode = xnthread_get_state(curr) & XNTHREAD_MODE_BITS;
++	lock_count = curr->lock_count;
++	xnthread_clear_state(curr, clrmask & XNTHREAD_MODE_BITS);
++	xnthread_set_state(curr, setmask & XNTHREAD_MODE_BITS);
++	trace_cobalt_thread_set_mode(curr);
++
++	if (setmask & XNLOCK) {
++		if (lock_count == 0)
++			xnsched_lock();
++	} else if (clrmask & XNLOCK) {
++		if (lock_count > 0) {
++			curr->lock_count = 0;
++			xnthread_clear_localinfo(curr, XNLBALERT);
++			xnsched_run();
++		}
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	if (lock_count > 0)
++		oldmode |= XNLOCK;
++
++	return oldmode;
++}
++EXPORT_SYMBOL_GPL(xnthread_set_mode);
++
++/**
++ * @fn void xnthread_suspend(struct xnthread *thread, int mask,xnticks_t timeout, xntmode_t timeout_mode,struct xnsynch *wchan)
++ * @brief Suspend a thread.
++ *
++ * Suspends the execution of a thread according to a given suspensive
++ * condition. This thread will not be eligible for scheduling until it
++ * all the pending suspensive conditions set by this service are
++ * removed by one or more calls to xnthread_resume().
++ *
++ * @param thread The descriptor address of the suspended thread.
++ *
++ * @param mask The suspension mask specifying the suspensive condition
++ * to add to the thread's wait mask. Possible values usable by the
++ * caller are:
++ *
++ * - XNSUSP. This flag forcibly suspends a thread, regardless of any
++ * resource to wait for. A reverse call to xnthread_resume()
++ * specifying the XNSUSP bit must be issued to remove this condition,
++ * which is cumulative with other suspension bits.@a wchan should be
++ * NULL when using this suspending mode.
++ *
++ * - XNDELAY. This flags denotes a counted delay wait (in ticks) which
++ * duration is defined by the value of the timeout parameter.
++ *
++ * - XNPEND. This flag denotes a wait for a synchronization object to
++ * be signaled. The wchan argument must points to this object. A
++ * timeout value can be passed to bound the wait. This suspending mode
++ * should not be used directly by the client interface, but rather
++ * through the xnsynch_sleep_on() call.
++ *
++ * @param timeout The timeout which may be used to limit the time the
++ * thread pends on a resource. This value is a wait time given in
++ * nanoseconds. It can either be relative, absolute monotonic, or
++ * absolute adjustable depending on @a timeout_mode.
++ *
++ * Passing XN_INFINITE @b and setting @a timeout_mode to XN_RELATIVE
++ * specifies an unbounded wait. All other values are used to
++ * initialize a watchdog timer. If the current operation mode of the
++ * system timer is oneshot and @a timeout elapses before
++ * xnthread_suspend() has completed, then the target thread will not
++ * be suspended, and this routine leads to a null effect.
++ *
++ * @param timeout_mode The mode of the @a timeout parameter. It can
++ * either be set to XN_RELATIVE, XN_ABSOLUTE, or XN_REALTIME (see also
++ * xntimer_start()).
++ *
++ * @param wchan The address of a pended resource. This parameter is
++ * used internally by the synchronization object implementation code
++ * to specify on which object the suspended thread pends. NULL is a
++ * legitimate value when this parameter does not apply to the current
++ * suspending mode (e.g. XNSUSP).
++ *
++ * @note If the target thread has received a Linux-originated signal,
++ * then this service immediately exits without suspending the thread,
++ * but raises the XNBREAK condition in its information mask.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void xnthread_suspend(struct xnthread *thread, int mask,
++		      xnticks_t timeout, xntmode_t timeout_mode,
++		      struct xnsynch *wchan)
++{
++	unsigned long oldstate;
++	struct xnsched *sched;
++	spl_t s;
++
++	/* No, you certainly do not want to suspend the root thread. */
++	XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT));
++	/* No built-in support for conjunctive wait. */
++	XENO_BUG_ON(COBALT, wchan && thread->wchan);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_thread_suspend(thread, mask, timeout, timeout_mode, wchan);
++
++	sched = thread->sched;
++	oldstate = thread->state;
++
++	/*
++	 * If attempting to suspend a runnable thread which is pending
++	 * a forced switch to secondary mode (XNKICKED), just raise
++	 * the XNBREAK status and return immediately, except if we
++	 * are precisely doing such switch by applying XNRELAX.
++	 *
++	 * In the latter case, we also make sure to clear XNKICKED,
++	 * since we won't go through prepare_for_signal() once
++	 * relaxed.
++	 */
++	if (likely((oldstate & XNTHREAD_BLOCK_BITS) == 0)) {
++		if (likely((mask & XNRELAX) == 0)) {
++			if (xnthread_test_info(thread, XNKICKED))
++				goto abort;
++			if (thread == sched->curr &&
++			    thread->lock_count > 0 &&
++			    (oldstate & XNTRAPLB) != 0)
++				goto lock_break;
++		}
++		/*
++		 * Do not destroy the info left behind by yet unprocessed
++		 * wakeups when suspending a remote thread.
++		 */
++		if (thread == sched->curr)
++			xnthread_clear_info(thread, XNRMID|XNTIMEO|XNBREAK|
++						    XNWAKEN|XNROBBED|XNKICKED);
++	}
++
++	/*
++	 * Don't start the timer for a thread delayed indefinitely.
++	 */
++	if (timeout != XN_INFINITE || timeout_mode != XN_RELATIVE) {
++		xntimer_set_affinity(&thread->rtimer, thread->sched);
++		if (xntimer_start(&thread->rtimer, timeout, XN_INFINITE,
++				  timeout_mode)) {
++			/* (absolute) timeout value in the past, bail out. */
++			if (wchan) {
++				thread->wchan = wchan;
++				xnsynch_forget_sleeper(thread);
++			}
++			xnthread_set_info(thread, XNTIMEO);
++			goto out;
++		}
++		xnthread_set_state(thread, XNDELAY);
++	}
++
++	if (oldstate & XNREADY) {
++		xnsched_dequeue(thread);
++		xnthread_clear_state(thread, XNREADY);
++	}
++
++	xnthread_set_state(thread, mask);
++
++	/*
++	 * We must make sure that we don't clear the wait channel if a
++	 * thread is first blocked (wchan != NULL) then forcibly
++	 * suspended (wchan == NULL), since these are conjunctive
++	 * conditions.
++	 */
++	if (wchan)
++		thread->wchan = wchan;
++
++	/*
++	 * If the current thread is being relaxed, we must have been
++	 * called from xnthread_relax(), in which case we introduce an
++	 * opportunity for interrupt delivery right before switching
++	 * context, which shortens the uninterruptible code path.
++	 *
++	 * We have to shut irqs off before calling __xnsched_run()
++	 * though: if an interrupt could preempt us right after
++	 * xnarch_escalate() is passed but before the nklock is
++	 * grabbed, we would enter the critical section in
++	 * ___xnsched_run() from the root domain, which would defeat
++	 * the purpose of escalating the request.
++	 *
++	 * NOTE: using __xnsched_run() for rescheduling allows us to
++	 * break the scheduler lock temporarily.
++	 */
++	if (likely(thread == sched->curr)) {
++		xnsched_set_resched(sched);
++		if (unlikely(mask & XNRELAX)) {
++			xnlock_clear_irqon(&nklock);
++			splmax();
++			__xnsched_run(sched);
++			return;
++		}
++		/*
++		 * If the thread is runnning on a remote CPU,
++		 * xnsched_run() will trigger the IPI as required.  In
++		 * this case, sched refers to a remote runqueue, so
++		 * make sure to always kick the rescheduling procedure
++		 * for the local one.
++		 */
++		__xnsched_run(xnsched_current());
++		goto out;
++	}
++
++	/*
++	 * Ok, this one is an interesting corner case, which requires
++	 * a bit of background first. Here, we handle the case of
++	 * suspending a _relaxed_ user shadow which is _not_ the
++	 * current thread.
++	 *
++	 *  The net effect is that we are attempting to stop the
++	 * shadow thread for Cobalt, whilst this thread is actually
++	 * running some code under the control of the Linux scheduler
++	 * (i.e. it's relaxed).
++	 *
++	 *  To make this possible, we force the target Linux task to
++	 * migrate back to the Xenomai domain by sending it a
++	 * SIGSHADOW signal the interface libraries trap for this
++	 * specific internal purpose, whose handler is expected to
++	 * call back Cobalt's migration service.
++	 *
++	 * By forcing this migration, we make sure that Cobalt
++	 * controls, hence properly stops, the target thread according
++	 * to the requested suspension condition. Otherwise, the
++	 * shadow thread in secondary mode would just keep running
++	 * into the Linux domain, thus breaking the most common
++	 * assumptions regarding suspended threads.
++	 *
++	 * We only care for threads that are not current, and for
++	 * XNSUSP, XNDELAY, XNDORMANT and XNHELD conditions, because:
++	 *
++	 * - There is no point in dealing with a relaxed thread which
++	 * is current, since personalities have to ask for primary
++	 * mode switch when processing any syscall which may block the
++	 * caller (i.e. __xn_exec_primary).
++	 *
++	 * - among all blocking bits (XNTHREAD_BLOCK_BITS), only
++	 * XNSUSP, XNDELAY, XNHELD and XNDBGSTOP may be applied by the
++	 * current thread to a non-current thread. XNPEND is always
++	 * added by the caller to its own state, XNMIGRATE, XNRELAX
++	 * and XNDBGSTOP have special semantics escaping this issue.
++	 *
++	 * We don't signal threads which are already in a dormant
++	 * state, since they are suspended by definition.
++	 */
++	if (((oldstate & (XNTHREAD_BLOCK_BITS|XNUSER)) == (XNRELAX|XNUSER)) &&
++	    (mask & (XNDELAY | XNSUSP | XNHELD)) != 0)
++		xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HARDEN);
++out:
++	xnlock_put_irqrestore(&nklock, s);
++	return;
++
++lock_break:
++	/* NOTE: thread is current */
++	if (xnthread_test_state(thread, XNWARN) &&
++	    !xnthread_test_localinfo(thread, XNLBALERT)) {
++		xnthread_set_info(thread, XNKICKED);
++		xnthread_set_localinfo(thread, XNLBALERT);
++		xnthread_signal(thread, SIGDEBUG, SIGDEBUG_LOCK_BREAK);
++	}
++abort:
++	if (wchan) {
++		thread->wchan = wchan;
++		xnsynch_forget_sleeper(thread);
++	}
++	xnthread_clear_info(thread, XNRMID | XNTIMEO);
++	xnthread_set_info(thread, XNBREAK);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnthread_suspend);
++
++/**
++ * @fn void xnthread_resume(struct xnthread *thread,int mask)
++ * @brief Resume a thread.
++ *
++ * Resumes the execution of a thread previously suspended by one or
++ * more calls to xnthread_suspend(). This call removes a suspensive
++ * condition affecting the target thread. When all suspensive
++ * conditions are gone, the thread is left in a READY state at which
++ * point it becomes eligible anew for scheduling.
++ *
++ * @param thread The descriptor address of the resumed thread.
++ *
++ * @param mask The suspension mask specifying the suspensive condition
++ * to remove from the thread's wait mask. Possible values usable by
++ * the caller are:
++ *
++ * - XNSUSP. This flag removes the explicit suspension condition. This
++ * condition might be additive to the XNPEND condition.
++ *
++ * - XNDELAY. This flag removes the counted delay wait condition.
++ *
++ * - XNPEND. This flag removes the resource wait condition. If a
++ * watchdog is armed, it is automatically disarmed by this
++ * call. Unlike the two previous conditions, only the current thread
++ * can set this condition for itself, i.e. no thread can force another
++ * one to pend on a resource.
++ *
++ * When the thread is eventually resumed by one or more calls to
++ * xnthread_resume(), the caller of xnthread_suspend() in the awakened
++ * thread that suspended itself should check for the following bits in
++ * its own information mask to determine what caused its wake up:
++ *
++ * - XNRMID means that the caller must assume that the pended
++ * synchronization object has been destroyed (see xnsynch_flush()).
++ *
++ * - XNTIMEO means that the delay elapsed, or the watchdog went off
++ * before the corresponding synchronization object was signaled.
++ *
++ * - XNBREAK means that the wait has been forcibly broken by a call to
++ * xnthread_unblock().
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++void xnthread_resume(struct xnthread *thread, int mask)
++{
++	unsigned long oldstate;
++	struct xnsched *sched;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_thread_resume(thread, mask);
++
++	xntrace_pid(xnthread_host_pid(thread), xnthread_current_priority(thread));
++
++	sched = thread->sched;
++	oldstate = thread->state;
++
++	if ((oldstate & XNTHREAD_BLOCK_BITS) == 0) {
++		if (oldstate & XNREADY)
++			xnsched_dequeue(thread);
++		goto enqueue;
++	}
++
++	/* Clear the specified block bit(s) */
++	xnthread_clear_state(thread, mask);
++
++	/*
++	 * If XNDELAY was set in the clear mask, xnthread_unblock()
++	 * was called for the thread, or a timeout has elapsed. In the
++	 * latter case, stopping the timer is a no-op.
++	 */
++	if (mask & XNDELAY)
++		xntimer_stop(&thread->rtimer);
++
++	if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS))
++		goto clear_wchan;
++
++	if (mask & XNDELAY) {
++		mask = xnthread_test_state(thread, XNPEND);
++		if (mask == 0)
++			goto unlock_and_exit;
++		if (thread->wchan)
++			xnsynch_forget_sleeper(thread);
++		goto recheck_state;
++	}
++
++	if (xnthread_test_state(thread, XNDELAY)) {
++		if (mask & XNPEND) {
++			/*
++			 * A resource became available to the thread.
++			 * Cancel the watchdog timer.
++			 */
++			xntimer_stop(&thread->rtimer);
++			xnthread_clear_state(thread, XNDELAY);
++		}
++		goto recheck_state;
++	}
++
++	/*
++	 * The thread is still suspended, but is no more pending on a
++	 * resource.
++	 */
++	if ((mask & XNPEND) != 0 && thread->wchan)
++		xnsynch_forget_sleeper(thread);
++
++	goto unlock_and_exit;
++
++recheck_state:
++	if (xnthread_test_state(thread, XNTHREAD_BLOCK_BITS))
++		goto unlock_and_exit;
++
++clear_wchan:
++	if ((mask & ~XNDELAY) != 0 && thread->wchan != NULL)
++		/*
++		 * If the thread was actually suspended, clear the
++		 * wait channel.  -- this allows requests like
++		 * xnthread_suspend(thread,XNDELAY,...)  not to run
++		 * the following code when the suspended thread is
++		 * woken up while undergoing a simple delay.
++		 */
++		xnsynch_forget_sleeper(thread);
++
++	if (unlikely((oldstate & mask) & XNHELD)) {
++		xnsched_requeue(thread);
++		goto ready;
++	}
++enqueue:
++	xnsched_enqueue(thread);
++ready:
++	xnthread_set_state(thread, XNREADY);
++	xnsched_set_resched(sched);
++unlock_and_exit:
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnthread_resume);
++
++/**
++ * @fn int xnthread_unblock(struct xnthread *thread)
++ * @brief Unblock a thread.
++ *
++ * Breaks the thread out of any wait it is currently in.  This call
++ * removes the XNDELAY and XNPEND suspensive conditions previously put
++ * by xnthread_suspend() on the target thread. If all suspensive
++ * conditions are gone, the thread is left in a READY state at which
++ * point it becomes eligible anew for scheduling.
++ *
++ * @param thread The descriptor address of the unblocked thread.
++ *
++ * This call neither releases the thread from the XNSUSP, XNRELAX,
++ * XNDORMANT or XNHELD suspensive conditions.
++ *
++ * When the thread resumes execution, the XNBREAK bit is set in the
++ * unblocked thread's information mask. Unblocking a non-blocked
++ * thread is perfectly harmless.
++ *
++ * @return non-zero is returned if the thread was actually unblocked
++ * from a pending wait state, 0 otherwise.
++ *
++ * @coretags{unrestricted, might-switch}
++ */
++int xnthread_unblock(struct xnthread *thread)
++{
++	int ret = 1;
++	spl_t s;
++
++	/*
++	 * Attempt to abort an undergoing wait for the given thread.
++	 * If this state is due to an alarm that has been armed to
++	 * limit the sleeping thread's waiting time while it pends for
++	 * a resource, the corresponding XNPEND state will be cleared
++	 * by xnthread_resume() in the same move. Otherwise, this call
++	 * may abort an undergoing infinite wait for a resource (if
++	 * any).
++	 */
++	xnlock_get_irqsave(&nklock, s);
++
++	trace_cobalt_thread_unblock(thread);
++
++	if (xnthread_test_state(thread, XNDELAY))
++		xnthread_resume(thread, XNDELAY);
++	else if (xnthread_test_state(thread, XNPEND))
++		xnthread_resume(thread, XNPEND);
++	else
++		ret = 0;
++
++	/*
++	 * We should not clear a previous break state if this service
++	 * is called more than once before the target thread actually
++	 * resumes, so we only set the bit here and never clear
++	 * it. However, we must not raise the XNBREAK bit if the
++	 * target thread was already awake at the time of this call,
++	 * so that downstream code does not get confused by some
++	 * "successful but interrupted syscall" condition. IOW, a
++	 * break state raised here must always trigger an error code
++	 * downstream, and an already successful syscall cannot be
++	 * marked as interrupted.
++	 */
++	if (ret)
++		xnthread_set_info(thread, XNBREAK);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_unblock);
++
++/**
++ * @fn int xnthread_set_periodic(struct xnthread *thread,xnticks_t idate, xntmode_t timeout_mode, xnticks_t period)
++ * @brief Make a thread periodic.
++ *
++ * Make a thread periodic by programming its first release point and
++ * its period in the processor time line.  Subsequent calls to
++ * xnthread_wait_period() will delay the thread until the next
++ * periodic release point in the processor timeline is reached.
++ *
++ * @param thread The core thread to make periodic. If NULL, the
++ * current thread is assumed.
++ *
++ * @param idate The initial (absolute) date of the first release
++ * point, expressed in nanoseconds. The affected thread will be
++ * delayed by the first call to xnthread_wait_period() until this
++ * point is reached. If @a idate is equal to XN_INFINITE, the first
++ * release point is set to @a period nanoseconds after the current
++ * date. In the latter case, @a timeout_mode is not considered and can
++ * have any valid value.
++ *
++ * @param timeout_mode The mode of the @a idate parameter. It can
++ * either be set to XN_ABSOLUTE or XN_REALTIME with @a idate different
++ * from XN_INFINITE (see also xntimer_start()).
++ *
++ * @param period The period of the thread, expressed in nanoseconds.
++ * As a side-effect, passing XN_INFINITE attempts to stop the thread's
++ * periodic timer; in the latter case, the routine always exits
++ * succesfully, regardless of the previous state of this timer.
++ *
++ * @return 0 is returned upon success. Otherwise:
++ *
++ * - -ETIMEDOUT is returned @a idate is different from XN_INFINITE and
++ * represents a date in the past.
++ *
++ * - -EINVAL is returned if @a period is different from XN_INFINITE
++ * but shorter than the scheduling latency value for the target
++ * system, as available from /proc/xenomai/latency. -EINVAL is also
++ * returned if @a timeout_mode is not compatible with @a idate, such
++ * as XN_RELATIVE with @a idate different from XN_INFINITE.
++ *
++ * - -EPERM is returned if @a thread is NULL, but the caller is not a
++ * Xenomai thread.
++ *
++ * @coretags{task-unrestricted}
++ */
++int xnthread_set_periodic(struct xnthread *thread, xnticks_t idate,
++			  xntmode_t timeout_mode, xnticks_t period)
++{
++	int ret = 0;
++	spl_t s;
++
++	if (thread == NULL) {
++		thread = xnthread_current();
++		if (thread == NULL)
++			return -EPERM;
++	}
++		
++	xnlock_get_irqsave(&nklock, s);
++
++	if (period == XN_INFINITE) {
++		if (xntimer_running_p(&thread->ptimer))
++			xntimer_stop(&thread->ptimer);
++
++		goto unlock_and_exit;
++	}
++
++	/*
++	 * LART: detect periods which are shorter than the core clock
++	 * gravity for kernel thread timers. This can't work, caller
++	 * must have messed up arguments.
++	 */
++	if (period < xnclock_ticks_to_ns(&nkclock,
++			 xnclock_get_gravity(&nkclock, kernel))) {
++		ret = -EINVAL;
++		goto unlock_and_exit;
++	}
++
++	xntimer_set_affinity(&thread->ptimer, thread->sched);
++
++	if (idate == XN_INFINITE)
++		xntimer_start(&thread->ptimer, period, period, XN_RELATIVE);
++	else {
++		if (timeout_mode == XN_REALTIME)
++			idate -= xnclock_get_offset(xntimer_clock(&thread->ptimer));
++		else if (timeout_mode != XN_ABSOLUTE) {
++			ret = -EINVAL;
++			goto unlock_and_exit;
++		}
++		ret = xntimer_start(&thread->ptimer, idate, period,
++				    XN_ABSOLUTE);
++	}
++
++unlock_and_exit:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_set_periodic);
++
++/**
++ * @fn int xnthread_wait_period(unsigned long *overruns_r)
++ * @brief Wait for the next periodic release point.
++ *
++ * Make the current thread wait for the next periodic release point in
++ * the processor time line.
++ *
++ * @param overruns_r If non-NULL, @a overruns_r must be a pointer to a
++ * memory location which will be written with the count of pending
++ * overruns. This value is copied only when xnthread_wait_period()
++ * returns -ETIMEDOUT or success; the memory location remains
++ * unmodified otherwise. If NULL, this count will never be copied
++ * back.
++ *
++ * @return 0 is returned upon success; if @a overruns_r is valid, zero
++ * is copied to the pointed memory location. Otherwise:
++ *
++ * - -EWOULDBLOCK is returned if xnthread_set_periodic() has not
++ * previously been called for the calling thread.
++ *
++ * - -EINTR is returned if xnthread_unblock() has been called for the
++ * waiting thread before the next periodic release point has been
++ * reached. In this case, the overrun counter is reset too.
++ *
++ * - -ETIMEDOUT is returned if the timer has overrun, which indicates
++ * that one or more previous release points have been missed by the
++ * calling thread. If @a overruns_r is valid, the count of pending
++ * overruns is copied to the pointed memory location.
++ *
++ * @coretags{primary-only, might-switch}
++ */
++int xnthread_wait_period(unsigned long *overruns_r)
++{
++	unsigned long overruns = 0;
++	struct xnthread *thread;
++	struct xnclock *clock;
++	xnticks_t now;
++	int ret = 0;
++	spl_t s;
++
++	thread = xnthread_current();
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (unlikely(!xntimer_running_p(&thread->ptimer))) {
++		ret = -EWOULDBLOCK;
++		goto out;
++	}
++
++	trace_cobalt_thread_wait_period(thread);
++
++	clock = xntimer_clock(&thread->ptimer);
++	now = xnclock_read_raw(clock);
++	if (likely((xnsticks_t)(now - xntimer_pexpect(&thread->ptimer)) < 0)) {
++		xnthread_suspend(thread, XNDELAY, XN_INFINITE, XN_RELATIVE, NULL);
++		if (unlikely(xnthread_test_info(thread, XNBREAK))) {
++			ret = -EINTR;
++			goto out;
++		}
++
++		now = xnclock_read_raw(clock);
++	}
++
++	overruns = xntimer_get_overruns(&thread->ptimer, thread, now);
++	if (overruns) {
++		ret = -ETIMEDOUT;
++		trace_cobalt_thread_missed_period(thread);
++	}
++
++	if (likely(overruns_r != NULL))
++		*overruns_r = overruns;
++ out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_wait_period);
++
++/**
++ * @fn int xnthread_set_slice(struct xnthread *thread, xnticks_t quantum)
++ * @brief Set thread time-slicing information.
++ *
++ * Update the time-slicing information for a given thread. This
++ * service enables or disables round-robin scheduling for the thread,
++ * depending on the value of @a quantum. By default, times-slicing is
++ * disabled for a new thread initialized by a call to xnthread_init().
++ *
++ * @param thread The descriptor address of the affected thread.
++ *
++ * @param quantum The time quantum assigned to the thread expressed in
++ * nanoseconds. If @a quantum is different from XN_INFINITE, the
++ * time-slice for the thread is set to that value and its current time
++ * credit is refilled (i.e. the thread is given a full time-slice to
++ * run next). Otherwise, if @a quantum equals XN_INFINITE,
++ * time-slicing is stopped for that thread.
++ *
++ * @return 0 is returned upon success. Otherwise, -EINVAL is returned
++ * if @a quantum is not XN_INFINITE and:
++ *
++ *   - the base scheduling class of the target thread does not support
++ *   time-slicing,
++ *
++ *   - @a quantum is smaller than the master clock gravity for a user
++ * thread, which denotes a spurious value.
++ *
++ * @coretags{task-unrestricted}
++ */
++int xnthread_set_slice(struct xnthread *thread, xnticks_t quantum)
++{
++	struct xnsched *sched;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sched = thread->sched;
++	thread->rrperiod = quantum;
++
++	if (quantum != XN_INFINITE) {
++		if (quantum <= xnclock_get_gravity(&nkclock, user) ||
++		    thread->base_class->sched_tick == NULL) {
++			xnlock_put_irqrestore(&nklock, s);
++			return -EINVAL;
++		}
++		xnthread_set_state(thread, XNRRB);
++		if (sched->curr == thread)
++			xntimer_start(&sched->rrbtimer,
++				      quantum, XN_INFINITE, XN_RELATIVE);
++	} else {
++		xnthread_clear_state(thread, XNRRB);
++		if (sched->curr == thread)
++			xntimer_stop(&sched->rrbtimer);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_set_slice);
++
++/**
++ * @fn void xnthread_cancel(struct xnthread *thread)
++ * @brief Cancel a thread.
++ *
++ * Request cancellation of a thread. This service forces @a thread to
++ * exit from any blocking call, then to switch to secondary mode.
++ * @a thread will terminate as soon as it reaches a cancellation
++ * point. Cancellation points are defined for the following
++ * situations:
++ *
++ * - @a thread self-cancels by a call to xnthread_cancel().
++ * - @a thread invokes a Linux syscall (user-space shadow only).
++ * - @a thread receives a Linux signal (user-space shadow only).
++ * - @a thread unblocks from a Xenomai syscall (user-space shadow only).
++ * - @a thread attempts to block on a Xenomai syscall (user-space shadow only).
++ * - @a thread explicitly calls xnthread_test_cancel().
++ *
++ * @param thread The descriptor address of the thread to terminate.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ *
++ * @note In addition to the common actions taken upon cancellation, a
++ * thread which belongs to the SCHED_WEAK class is sent a regular
++ * SIGTERM signal.
++ */
++void xnthread_cancel(struct xnthread *thread)
++{
++	spl_t s;
++
++	/* Right, so you want to kill the kernel?! */
++	XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT));
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnthread_test_info(thread, XNCANCELD))
++		goto check_self_cancel;
++
++	trace_cobalt_thread_cancel(thread);
++
++	xnthread_set_info(thread, XNCANCELD);
++
++	/*
++	 * If @thread is not started yet, fake a start request,
++	 * raising the kicked condition bit to make sure it will reach
++	 * xnthread_test_cancel() on its wakeup path.
++	 */
++	if (xnthread_test_state(thread, XNDORMANT)) {
++		xnthread_set_info(thread, XNKICKED);
++		xnthread_resume(thread, XNDORMANT);
++		goto out;
++	}
++
++check_self_cancel:
++	if (xnthread_current() == thread) {
++		xnlock_put_irqrestore(&nklock, s);
++		xnthread_test_cancel();
++		/*
++		 * May return if on behalf of an IRQ handler which has
++		 * preempted @thread.
++		 */
++		return;
++	}
++
++	/*
++	 * Force the non-current thread to exit:
++	 *
++	 * - unblock a user thread, switch it to weak scheduling,
++	 * then send it SIGTERM.
++	 *
++	 * - just unblock a kernel thread, it is expected to reach a
++	 * cancellation point soon after
++	 * (i.e. xnthread_test_cancel()).
++	 */
++	if (xnthread_test_state(thread, XNUSER)) {
++		__xnthread_demote(thread);
++		xnthread_signal(thread, SIGTERM, 0);
++	} else
++		__xnthread_kick(thread);
++out:
++	xnsched_run();
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnthread_cancel);
++
++struct wait_grace_struct {
++	struct completion done;
++	struct rcu_head rcu;
++};
++
++static void grace_elapsed(struct rcu_head *head)
++{
++	struct wait_grace_struct *wgs;
++
++	wgs = container_of(head, struct wait_grace_struct, rcu);
++	complete(&wgs->done);
++}
++
++static void wait_for_rcu_grace_period(struct pid *pid)
++{
++	struct wait_grace_struct wait = {
++		.done = COMPLETION_INITIALIZER_ONSTACK(wait.done),
++	};
++	struct task_struct *p;
++
++	init_rcu_head_on_stack(&wait.rcu);
++	
++	for (;;) {
++		call_rcu(&wait.rcu, grace_elapsed);
++		wait_for_completion(&wait.done);
++		if (pid == NULL)
++			break;
++		rcu_read_lock();
++		p = pid_task(pid, PIDTYPE_PID);
++		rcu_read_unlock();
++		if (p == NULL)
++			break;
++		reinit_completion(&wait.done);
++	}
++}
++
++/**
++ * @fn void xnthread_join(struct xnthread *thread, bool uninterruptible)
++ * @brief Join with a terminated thread.
++ *
++ * This service waits for @a thread to terminate after a call to
++ * xnthread_cancel().  If that thread has already terminated or is
++ * dormant at the time of the call, then xnthread_join() returns
++ * immediately.
++ *
++ * xnthread_join() adapts to the calling context (primary or
++ * secondary), switching to secondary mode if needed for the duration
++ * of the wait. Upon return, the original runtime mode is restored,
++ * unless a Linux signal is pending.
++ *
++ * @param thread The descriptor address of the thread to join with.
++ *
++ * @param uninterruptible Boolean telling whether the service should
++ * wait for completion uninterruptible.
++ *
++ * @return 0 is returned on success. Otherwise, the following error
++ * codes indicate the cause of the failure:
++ *
++ * - -EDEADLK is returned if the current thread attempts to join
++ * itself.
++ *
++ * - -EINTR is returned if the current thread was unblocked while
++ *   waiting for @a thread to terminate.
++ *
++ * - -EBUSY indicates that another thread is already waiting for @a
++ *   thread to terminate.
++ *
++ * @coretags{task-unrestricted, might-switch}
++ */
++int xnthread_join(struct xnthread *thread, bool uninterruptible)
++{
++	struct xnthread *curr = xnthread_current();
++	int ret = 0, switched = 0;
++	struct pid *pid;
++	pid_t tpid;
++	spl_t s;
++
++	XENO_BUG_ON(COBALT, xnthread_test_state(thread, XNROOT));
++
++	if (thread == curr)
++		return -EDEADLK;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	if (xnthread_test_state(thread, XNJOINED)) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++	if (xnthread_test_info(thread, XNDORMANT))
++		goto out;
++
++	trace_cobalt_thread_join(thread);
++
++	xnthread_set_state(thread, XNJOINED);
++	tpid = xnthread_host_pid(thread);
++	
++	if (curr && !xnthread_test_state(curr, XNRELAX)) {
++		xnlock_put_irqrestore(&nklock, s);
++		xnthread_relax(0, 0);
++		switched = 1;
++	} else
++		xnlock_put_irqrestore(&nklock, s);
++
++	/*
++	 * Since in theory, we might be sleeping there for a long
++	 * time, we get a reference on the pid struct holding our
++	 * target, then we check for its existence upon wake up.
++	 */
++	pid = find_get_pid(tpid);
++	if (pid == NULL)
++		goto done;
++
++	/*
++	 * We have a tricky issue to deal with, which involves code
++	 * relying on the assumption that a destroyed thread will have
++	 * scheduled away from do_exit() before xnthread_join()
++	 * returns. A typical example is illustrated by the following
++	 * sequence, with a RTDM kernel task implemented in a
++	 * dynamically loaded module:
++	 *
++	 * CPU0:  rtdm_task_destroy(ktask)
++	 *           xnthread_cancel(ktask)
++	 *           xnthread_join(ktask)
++	 *        ...<back to user>..
++	 *        rmmod(module)
++	 *
++	 * CPU1:  in ktask()
++	 *        ...
++	 *        ...
++	 *          __xnthread_test_cancel()
++	 *             do_exit()
++         *                schedule()
++	 *
++	 * In such a sequence, the code on CPU0 would expect the RTDM
++	 * task to have scheduled away upon return from
++	 * rtdm_task_destroy(), so that unmapping the destroyed task
++	 * code and data memory when unloading the module is always
++	 * safe.
++	 *
++	 * To address this, the joiner first waits for the joinee to
++	 * signal completion from the Cobalt thread cleanup handler
++	 * (__xnthread_cleanup), then waits for a full RCU grace
++	 * period to have elapsed. Since the completion signal is sent
++	 * on behalf of do_exit(), we may assume that the joinee has
++	 * scheduled away before the RCU grace period ends.
++	 */
++	if (uninterruptible)
++		wait_for_completion(&thread->exited);
++	else {
++		ret = wait_for_completion_interruptible(&thread->exited);
++		if (ret < 0) {
++			put_pid(pid);
++			return -EINTR;
++		}
++	}
++
++	/* Make sure the joinee has scheduled away ultimately. */
++	wait_for_rcu_grace_period(pid);
++
++	put_pid(pid);
++done:
++	ret = 0;
++	if (switched)
++		ret = xnthread_harden();
++
++	return ret;
++out:
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_join);
++
++#ifdef CONFIG_SMP
++
++void xnthread_migrate_passive(struct xnthread *thread, struct xnsched *sched)
++{				/* nklocked, IRQs off */
++	if (thread->sched == sched)
++		return;
++
++	trace_cobalt_thread_migrate_passive(thread, xnsched_cpu(sched));
++	/*
++	 * Timer migration is postponed until the next timeout happens
++	 * for the periodic and rrb timers. The resource timer will be
++	 * moved to the right CPU next time it is armed in
++	 * xnthread_suspend().
++	 */
++	xnsched_migrate_passive(thread, sched);
++
++	xnstat_exectime_reset_stats(&thread->stat.lastperiod);
++}
++
++#endif	/* CONFIG_SMP */
++
++/**
++ * @fn int xnthread_set_schedparam(struct xnthread *thread,struct xnsched_class *sched_class,const union xnsched_policy_param *sched_param)
++ * @brief Change the base scheduling parameters of a thread.
++ *
++ * Changes the base scheduling policy and paramaters of a thread. If
++ * the thread is currently blocked, waiting in priority-pending mode
++ * (XNSYNCH_PRIO) for a synchronization object to be signaled, Cobalt
++ * will attempt to reorder the object's wait queue so that it reflects
++ * the new sleeper's priority, unless the XNSYNCH_DREORD flag has been
++ * set for the pended object.
++ *
++ * @param thread The descriptor address of the affected thread. See
++ * note.
++ *
++ * @param sched_class The new scheduling class the thread should be
++ * assigned to.
++ *
++ * @param sched_param The scheduling parameters to set for the thread;
++ * @a sched_param must be valid within the context of @a sched_class.
++ *
++ * It is absolutely required to use this service to change a thread
++ * priority, in order to have all the needed housekeeping chores
++ * correctly performed. i.e. Do *not* call xnsched_set_policy()
++ * directly or worse, change the thread.cprio field by hand in any
++ * case.
++ *
++ * @return 0 is returned on success. Otherwise, a negative error code
++ * indicates the cause of a failure that happened in the scheduling
++ * class implementation for @a sched_class. Invalid parameters passed
++ * into @a sched_param are common causes of error.
++ *
++ * @sideeffect
++ *
++ * - This service does not call the rescheduling procedure but may
++ * affect the state of the run queue for the previous and new
++ * scheduling classes.
++ *
++ * - Assigning the same scheduling class and parameters to a running
++ * or ready thread moves it to the end of the run queue, thus causing
++ * a manual round-robin, except if a priority boost is undergoing.
++ *
++ * @coretags{task-unregistred}
++ *
++ * @note The changes only apply to the Xenomai scheduling parameters
++ * for @a thread. There is no propagation/translation of such changes
++ * to the Linux scheduler for the task mated to the Xenomai target
++ * thread.
++ */
++int xnthread_set_schedparam(struct xnthread *thread,
++			    struct xnsched_class *sched_class,
++			    const union xnsched_policy_param *sched_param)
++{
++	spl_t s;
++	int ret;
++
++	xnlock_get_irqsave(&nklock, s);
++	ret = __xnthread_set_schedparam(thread, sched_class, sched_param);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_set_schedparam);
++
++int __xnthread_set_schedparam(struct xnthread *thread,
++			      struct xnsched_class *sched_class,
++			      const union xnsched_policy_param *sched_param)
++{
++	int old_wprio, new_wprio, ret;
++
++	old_wprio = thread->wprio;
++
++	ret = xnsched_set_policy(thread, sched_class, sched_param);
++	if (ret)
++		return ret;
++
++	new_wprio = thread->wprio;
++
++	/*
++	 * If the thread is waiting on a synchronization object,
++	 * update its position in the corresponding wait queue, unless
++	 * 1) reordering is explicitly disabled, or 2) the (weighted)
++	 * priority has not changed (to prevent spurious round-robin
++	 * effects).
++	 */
++	if (old_wprio != new_wprio && thread->wchan &&
++	    (thread->wchan->status & (XNSYNCH_DREORD|XNSYNCH_PRIO))
++	    == XNSYNCH_PRIO)
++		xnsynch_requeue_sleeper(thread);
++	/*
++	 * We should not move the thread at the end of its priority
++	 * group, if any of these conditions is true:
++	 *
++	 * - thread is not runnable;
++	 * - thread bears the ready bit which means that xnsched_set_policy()
++	 * already reordered the run queue;
++	 * - thread currently holds the scheduler lock, so we don't want
++	 * any round-robin effect to take place;
++	 * - a priority boost is undergoing for this thread.
++	 */
++	if (!xnthread_test_state(thread, XNTHREAD_BLOCK_BITS|XNREADY|XNBOOST) &&
++	    thread->lock_count == 0)
++		xnsched_putback(thread);
++
++	xnthread_set_info(thread, XNSCHEDP);
++	/* Ask the target thread to call back if relaxed. */
++	if (xnthread_test_state(thread, XNRELAX))
++		xnthread_signal(thread, SIGSHADOW, SIGSHADOW_ACTION_HOME);
++	
++	return ret;
++}
++
++void __xnthread_test_cancel(struct xnthread *curr)
++{
++	/*
++	 * Just in case xnthread_test_cancel() is called from an IRQ
++	 * handler, in which case we may not take the exit path.
++	 *
++	 * NOTE: curr->sched is stable from our POV and can't change
++	 * under our feet.
++	 */
++	if (curr->sched->lflags & XNINIRQ)
++		return;
++
++	if (!xnthread_test_state(curr, XNRELAX))
++		xnthread_relax(0, 0);
++
++	do_exit(0);
++	/* ... won't return ... */
++	XENO_BUG(COBALT);
++}
++EXPORT_SYMBOL_GPL(__xnthread_test_cancel);
++
++/**
++ * @internal
++ * @fn int xnthread_harden(void);
++ * @brief Migrate a Linux task to the Xenomai domain.
++ *
++ * This service causes the transition of "current" from the Linux
++ * domain to Xenomai. The shadow will resume in the Xenomai domain as
++ * returning from schedule().
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int xnthread_harden(void)
++{
++	struct task_struct *p = current;
++	struct xnthread *thread;
++	struct xnsched *sched;
++	int ret;
++
++	secondary_mode_only();
++
++	thread = xnthread_current();
++	if (thread == NULL)
++		return -EPERM;
++
++	if (signal_pending(p))
++		return -ERESTARTSYS;
++
++	trace_cobalt_shadow_gohard(thread);
++
++	xnthread_clear_sync_window(thread, XNRELAX);
++
++	ret = __ipipe_migrate_head();
++	if (ret) {
++		xnthread_test_cancel();
++		xnthread_set_sync_window(thread, XNRELAX);
++		return ret;
++	}
++
++	/* "current" is now running into the Xenomai domain. */
++	sched = xnsched_finish_unlocked_switch(thread->sched);
++	xnthread_switch_fpu(sched);
++
++	xnlock_clear_irqon(&nklock);
++	xnsched_resched_after_unlocked_switch();
++	xnthread_test_cancel();
++
++	trace_cobalt_shadow_hardened(thread);
++
++	/*
++	 * Recheck pending signals once again. As we block task
++	 * wakeups during the migration and handle_sigwake_event()
++	 * ignores signals until XNRELAX is cleared, any signal
++	 * between entering TASK_HARDENING and starting the migration
++	 * is just silently queued up to here.
++	 */
++	if (signal_pending(p)) {
++		xnthread_relax(!xnthread_test_state(thread, XNSSTEP),
++			       SIGDEBUG_MIGRATE_SIGNAL);
++		return -ERESTARTSYS;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_harden);
++
++struct lostage_wakeup {
++	struct ipipe_work_header work; /* Must be first. */
++	struct task_struct *task;
++};
++
++static void lostage_task_wakeup(struct ipipe_work_header *work)
++{
++	struct lostage_wakeup *rq;
++	struct task_struct *p;
++
++	rq = container_of(work, struct lostage_wakeup, work);
++	p = rq->task;
++
++	trace_cobalt_lostage_wakeup(p);
++
++	wake_up_process(p);
++}
++
++static void post_wakeup(struct task_struct *p)
++{
++	struct lostage_wakeup wakework = {
++		.work = {
++			.size = sizeof(wakework),
++			.handler = lostage_task_wakeup,
++		},
++		.task = p,
++	};
++
++	trace_cobalt_lostage_request("wakeup", wakework.task);
++
++	ipipe_post_work_root(&wakework, work);
++}
++
++void __xnthread_propagate_schedparam(struct xnthread *curr)
++{
++	int kpolicy = SCHED_FIFO, kprio = curr->bprio, ret;
++	struct task_struct *p = current;
++	struct sched_param param;
++	spl_t s;
++
++	/*
++	 * Test-set race for XNSCHEDP is ok, the propagation is meant
++	 * to be done asap but not guaranteed to be carried out
++	 * immediately, and the request will remain pending until it
++	 * is eventually handled. We just have to protect against a
++	 * set-clear race.
++	 */
++	xnlock_get_irqsave(&nklock, s);
++	xnthread_clear_info(curr, XNSCHEDP);
++	xnlock_put_irqrestore(&nklock, s);
++
++	/*
++	 * Map our policies/priorities to the regular kernel's
++	 * (approximated).
++	 */
++	if (xnthread_test_state(curr, XNWEAK) && kprio == 0)
++		kpolicy = SCHED_NORMAL;
++	else if (kprio >= MAX_USER_RT_PRIO)
++		kprio = MAX_USER_RT_PRIO - 1;
++
++	if (p->policy != kpolicy || (kprio > 0 && p->rt_priority != kprio)) {
++		param.sched_priority = kprio;
++		ret = sched_setscheduler_nocheck(p, kpolicy, &param);
++		XENO_WARN_ON(COBALT, ret != 0);
++	}
++}
++
++/**
++ * @internal
++ * @fn void xnthread_relax(int notify, int reason);
++ * @brief Switch a shadow thread back to the Linux domain.
++ *
++ * This service yields the control of the running shadow back to
++ * Linux. This is obtained by suspending the shadow and scheduling a
++ * wake up call for the mated user task inside the Linux domain. The
++ * Linux task will resume on return from xnthread_suspend() on behalf
++ * of the root thread.
++ *
++ * @param notify A boolean flag indicating whether threads monitored
++ * from secondary mode switches should be sent a SIGDEBUG signal. For
++ * instance, some internal operations like task exit should not
++ * trigger such signal.
++ *
++ * @param reason The reason to report along with the SIGDEBUG signal.
++ *
++ * @coretags{primary-only, might-switch}
++ *
++ * @note "current" is valid here since the shadow runs with the
++ * properties of the Linux task.
++ */
++void xnthread_relax(int notify, int reason)
++{
++	struct xnthread *thread = xnthread_current();
++	struct task_struct *p = current;
++	int suspension = XNRELAX;
++	int cpu __maybe_unused;
++	kernel_siginfo_t si;
++
++	primary_mode_only();
++
++	/*
++	 * Enqueue the request to move the running shadow from the Xenomai
++	 * domain to the Linux domain.  This will cause the Linux task
++	 * to resume using the register state of the shadow thread.
++	 */
++	trace_cobalt_shadow_gorelax(reason);
++
++	/*
++	 * If you intend to change the following interrupt-free
++	 * sequence, /first/ make sure to check the special handling
++	 * of XNRELAX in xnthread_suspend() when switching out the
++	 * current thread, not to break basic assumptions we make
++	 * there.
++	 *
++	 * We disable interrupts during the migration sequence, but
++	 * xnthread_suspend() has an interrupts-on section built in.
++	 */
++	splmax();
++	post_wakeup(p);
++	/*
++	 * Grab the nklock to synchronize the Linux task state
++	 * manipulation with handle_sigwake_event. This lock will be
++	 * dropped by xnthread_suspend().
++	 */
++	xnlock_get(&nklock);
++#ifdef IPIPE_KEVT_USERINTRET
++	/*
++	 * If the thread is being debugged, record that it should migrate back
++	 * in case it resumes in userspace. If it resumes in kernel space, i.e.
++	 * over a restarting syscall, the associated hardening will both clear
++	 * XNCONTHI and disable the user return notifier again.
++	 */
++	if (xnthread_test_state(thread, XNSSTEP)) {
++		xnthread_set_info(thread, XNCONTHI);
++		ipipe_enable_user_intret_notifier();
++		suspension |= XNDBGSTOP;
++	}
++#endif
++	set_current_state(p->state & ~TASK_NOWAKEUP);
++	xnthread_run_handler_stack(thread, relax_thread);
++	xnthread_suspend(thread, suspension, XN_INFINITE, XN_RELATIVE, NULL);
++	splnone();
++
++	/*
++	 * Basic sanity check after an expected transition to secondary
++	 * mode.
++	 */
++	XENO_WARN(COBALT, !ipipe_root_p,
++		  "xnthread_relax() failed for thread %s[%d]",
++		  thread->name, xnthread_host_pid(thread));
++
++	__ipipe_reenter_root();
++
++	/* Account for secondary mode switch. */
++	xnstat_counter_inc(&thread->stat.ssw);
++
++	/*
++	 * When relaxing, we check for propagating to the regular
++	 * kernel new Cobalt schedparams that might have been set for
++	 * us while we were running in primary mode.
++	 *
++	 * CAUTION: This obviously won't update the schedparams cached
++	 * by the glibc for the caller in user-space, but this is the
++	 * deal: we don't relax threads which issue
++	 * pthread_setschedparam[_ex]() from primary mode, but then
++	 * only the kernel side (Cobalt and the host kernel) will be
++	 * aware of the change, and glibc might cache obsolete
++	 * information.
++	 */
++	xnthread_propagate_schedparam(thread);
++
++	if (xnthread_test_state(thread, XNUSER) && notify) {
++		if (xnthread_test_state(thread, XNWARN)) {
++			/* Help debugging spurious relaxes. */
++			xndebug_notify_relax(thread, reason);
++			memset(&si, 0, sizeof(si));
++			si.si_signo = SIGDEBUG;
++			si.si_code = SI_QUEUE;
++			si.si_int = reason | sigdebug_marker;
++			send_sig_info(SIGDEBUG, &si, p);
++		}
++		xnsynch_detect_boosted_relax(thread);
++	}
++
++	/*
++	 * "current" is now running into the Linux domain on behalf of
++	 * the root thread.
++	 */
++	xnthread_sync_window(thread);
++
++#ifdef CONFIG_SMP
++	if (xnthread_test_localinfo(thread, XNMOVED)) {
++		xnthread_clear_localinfo(thread, XNMOVED);
++		cpu = xnsched_cpu(thread->sched);
++		set_cpus_allowed_ptr(p, cpumask_of(cpu));
++	}
++#endif
++	/*
++	 * After migration there will be no syscall restart (rather a signal
++	 * delivery).
++	 */
++	xnthread_clear_localinfo(thread, XNSYSRST);
++
++	ipipe_clear_thread_flag(TIP_MAYDAY);
++
++	trace_cobalt_shadow_relaxed(thread);
++}
++EXPORT_SYMBOL_GPL(xnthread_relax);
++
++struct lostage_signal {
++	struct ipipe_work_header work; /* Must be first. */
++	struct task_struct *task;
++	int signo, sigval;
++};
++
++static inline void do_kthread_signal(struct task_struct *p,
++				     struct xnthread *thread,
++				     struct lostage_signal *rq)
++{
++	printk(XENO_WARNING
++	       "kernel shadow %s received unhandled signal %d (action=0x%x)\n",
++	       thread->name, rq->signo, rq->sigval);
++}
++
++static void lostage_task_signal(struct ipipe_work_header *work)
++{
++	struct lostage_signal *rq;
++	struct xnthread *thread;
++	struct task_struct *p;
++	kernel_siginfo_t si;
++	int signo;
++
++	rq = container_of(work, struct lostage_signal, work);
++	p = rq->task;
++
++	thread = xnthread_from_task(p);
++	if (thread && !xnthread_test_state(thread, XNUSER)) {
++		do_kthread_signal(p, thread, rq);
++		return;
++	}
++
++	signo = rq->signo;
++
++	trace_cobalt_lostage_signal(p, signo);
++
++	if (signo == SIGSHADOW || signo == SIGDEBUG) {
++		memset(&si, '\0', sizeof(si));
++		si.si_signo = signo;
++		si.si_code = SI_QUEUE;
++		si.si_int = rq->sigval;
++		send_sig_info(signo, &si, p);
++	} else
++		send_sig(signo, p, 1);
++}
++
++static int force_wakeup(struct xnthread *thread) /* nklock locked, irqs off */
++{
++	int ret = 0;
++
++	if (xnthread_test_info(thread, XNKICKED))
++		return 1;
++
++	if (xnthread_unblock(thread)) {
++		xnthread_set_info(thread, XNKICKED);
++		ret = 1;
++	}
++
++	/*
++	 * CAUTION: we must NOT raise XNBREAK when clearing a forcible
++	 * block state, such as XNSUSP, XNHELD. The caller of
++	 * xnthread_suspend() we unblock shall proceed as for a normal
++	 * return, until it traverses a cancellation point if
++	 * XNCANCELD was raised earlier, or calls xnthread_suspend()
++	 * which will detect XNKICKED and act accordingly.
++	 *
++	 * Rationale: callers of xnthread_suspend() may assume that
++	 * receiving XNBREAK means that the process that motivated the
++	 * blocking did not go to completion. E.g. the wait context
++	 * (see. xnthread_prepare_wait()) was NOT posted before
++	 * xnsynch_sleep_on() returned, leaving no useful data there.
++	 * Therefore, in case only XNSUSP remains set for the thread
++	 * on entry to force_wakeup(), after XNPEND was lifted earlier
++	 * when the wait went to successful completion (i.e. no
++	 * timeout), then we want the kicked thread to know that it
++	 * did receive the requested resource, not finding XNBREAK in
++	 * its state word.
++	 *
++	 * Callers of xnthread_suspend() may inquire for XNKICKED to
++	 * detect forcible unblocks from XNSUSP, XNHELD, if they
++	 * should act upon this case specifically.
++	 */
++	if (xnthread_test_state(thread, XNSUSP|XNHELD)) {
++		xnthread_resume(thread, XNSUSP|XNHELD);
++		xnthread_set_info(thread, XNKICKED);
++	}
++
++	/*
++	 * Tricky cases:
++	 *
++	 * - a thread which was ready on entry wasn't actually
++	 * running, but nevertheless waits for the CPU in primary
++	 * mode, so we have to make sure that it will be notified of
++	 * the pending break condition as soon as it enters
++	 * xnthread_suspend() from a blocking Xenomai syscall.
++	 *
++	 * - a ready/readied thread on exit may be prevented from
++	 * running by the scheduling policy module it belongs
++	 * to. Typically, policies enforcing a runtime budget do not
++	 * block threads with no budget, but rather keep them out of
++	 * their run queue, so that ->sched_pick() won't elect
++	 * them. We tell the policy handler about the fact that we do
++	 * want such thread to run until it relaxes, whatever this
++	 * means internally for the implementation.
++	 */
++	if (xnthread_test_state(thread, XNREADY))
++		xnsched_kick(thread);
++
++	return ret;
++}
++
++void __xnthread_kick(struct xnthread *thread) /* nklock locked, irqs off */
++{
++	struct task_struct *p = xnthread_host_task(thread);
++
++	/* Thread is already relaxed -- nop. */
++	if (xnthread_test_state(thread, XNRELAX))
++		return;
++
++	/*
++	 * First, try to kick the thread out of any blocking syscall
++	 * Xenomai-wise. If that succeeds, then the thread will relax
++	 * on its return path to user-space.
++	 */
++	if (force_wakeup(thread))
++		return;
++
++	/*
++	 * If that did not work out because the thread was not blocked
++	 * (i.e. XNPEND/XNDELAY) in a syscall, then force a mayday
++	 * trap. Note that we don't want to send that thread any linux
++	 * signal, we only want to force it to switch to secondary
++	 * mode asap.
++	 *
++	 * It could happen that a thread is relaxed on a syscall
++	 * return path after it was resumed from self-suspension
++	 * (e.g. XNSUSP) then also forced to run a mayday trap right
++	 * after: this is still correct, at worst we would get a
++	 * useless mayday syscall leading to a no-op, no big deal.
++	 */
++	xnthread_set_info(thread, XNKICKED);
++
++	/*
++	 * We may send mayday signals to userland threads only.
++	 * However, no need to run a mayday trap if the current thread
++	 * kicks itself out of primary mode: it will relax on its way
++	 * back to userland via the current syscall
++	 * epilogue. Otherwise, we want that thread to enter the
++	 * mayday trap asap, to call us back for relaxing.
++	 */
++	if (thread != xnsched_current_thread() &&
++	    xnthread_test_state(thread, XNUSER))
++		ipipe_raise_mayday(p);
++}
++
++void xnthread_kick(struct xnthread *thread)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	__xnthread_kick(thread);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnthread_kick);
++
++void __xnthread_demote(struct xnthread *thread) /* nklock locked, irqs off */
++{
++	struct xnsched_class *sched_class;
++	union xnsched_policy_param param;
++
++	/*
++	 * First we kick the thread out of primary mode, and have it
++	 * resume execution immediately over the regular linux
++	 * context.
++	 */
++	__xnthread_kick(thread);
++
++	/*
++	 * Then we demote it, turning that thread into a non real-time
++	 * Xenomai shadow, which still has access to Xenomai
++	 * resources, but won't compete for real-time scheduling
++	 * anymore. In effect, moving the thread to a weak scheduling
++	 * class/priority will prevent it from sticking back to
++	 * primary mode.
++	 */
++#ifdef CONFIG_XENO_OPT_SCHED_WEAK
++	param.weak.prio = 0;
++	sched_class = &xnsched_class_weak;
++#else
++	param.rt.prio = 0;
++	sched_class = &xnsched_class_rt;
++#endif
++	__xnthread_set_schedparam(thread, sched_class, &param);
++}
++
++void xnthread_demote(struct xnthread *thread)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	__xnthread_demote(thread);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xnthread_demote);
++
++void xnthread_signal(struct xnthread *thread, int sig, int arg)
++{
++	struct lostage_signal sigwork = {
++		.work = {
++			.size = sizeof(sigwork),
++			.handler = lostage_task_signal,
++		},
++		.task = xnthread_host_task(thread),
++		.signo = sig,
++		.sigval = sig == SIGDEBUG ? arg | sigdebug_marker : arg,
++	};
++
++	trace_cobalt_lostage_request("signal", sigwork.task);
++
++	ipipe_post_work_root(&sigwork, work);
++}
++EXPORT_SYMBOL_GPL(xnthread_signal);
++
++void xnthread_pin_initial(struct xnthread *thread)
++{
++	struct task_struct *p = current;
++	struct xnsched *sched;
++	int cpu;
++	spl_t s;
++
++	/*
++	 * @thread is the Xenomai extension of the current kernel
++	 * task. If the current CPU is part of the affinity mask of
++	 * this thread, pin the latter on this CPU. Otherwise pin it
++	 * to the first CPU of that mask.
++	 */
++	cpu = task_cpu(p);
++	if (!cpumask_test_cpu(cpu, &thread->affinity))
++		cpu = cpumask_first(&thread->affinity);
++
++	set_cpus_allowed_ptr(p, cpumask_of(cpu));
++	/*
++	 * @thread is still unstarted Xenomai-wise, we are precisely
++	 * in the process of mapping the current kernel task to
++	 * it. Therefore xnthread_migrate_passive() is the right way
++	 * to pin it on a real-time CPU.
++	 */
++	xnlock_get_irqsave(&nklock, s);
++	sched = xnsched_struct(cpu);
++	xnthread_migrate_passive(thread, sched);
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++struct parent_wakeup_request {
++	struct ipipe_work_header work; /* Must be first. */
++	struct completion *done;
++};
++
++static void do_parent_wakeup(struct ipipe_work_header *work)
++{
++	struct parent_wakeup_request *rq;
++
++	rq = container_of(work, struct parent_wakeup_request, work);
++	complete(rq->done);
++}
++
++static inline void wakeup_parent(struct completion *done)
++{
++	struct parent_wakeup_request wakework = {
++		.work = {
++			.size = sizeof(wakework),
++			.handler = do_parent_wakeup,
++		},
++		.done = done,
++	};
++
++	trace_cobalt_lostage_request("wakeup", current);
++
++	ipipe_post_work_root(&wakework, work);
++}
++
++static inline void init_kthread_info(struct xnthread *thread)
++{
++	struct ipipe_threadinfo *p;
++
++	p = ipipe_current_threadinfo();
++	p->thread = thread;
++	p->process = NULL;
++}
++
++/**
++ * @fn int xnthread_map(struct xnthread *thread, struct completion *done)
++ * @internal
++ * @brief Create a shadow thread context over a kernel task.
++ *
++ * This call maps a Cobalt core thread to the "current" Linux task
++ * running in kernel space.  The priority and scheduling class of the
++ * underlying Linux task are not affected; it is assumed that the
++ * caller did set them appropriately before issuing the shadow mapping
++ * request.
++ *
++ * This call immediately moves the calling kernel thread to the
++ * Xenomai domain.
++ *
++ * @param thread The descriptor address of the new shadow thread to be
++ * mapped to "current". This descriptor must have been previously
++ * initialized by a call to xnthread_init().
++ *
++ * @param done A completion object to be signaled when @a thread is
++ * fully mapped over the current Linux context, waiting for
++ * xnthread_start().
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ERESTARTSYS is returned if the current Linux task has received a
++ * signal, thus preventing the final migration to the Xenomai domain
++ * (i.e. in order to process the signal in the Linux domain). This
++ * error should not be considered as fatal.
++ *
++ * - -EPERM is returned if the shadow thread has been killed before
++ * the current task had a chance to return to the caller. In such a
++ * case, the real-time mapping operation has failed globally, and no
++ * Xenomai resource remains attached to it.
++ *
++ * - -EINVAL is returned if the thread control block bears the XNUSER
++ * bit.
++ *
++ * - -EBUSY is returned if either the current Linux task or the
++ * associated shadow thread is already involved in a shadow mapping.
++ *
++ * @coretags{secondary-only, might-switch}
++ */
++int xnthread_map(struct xnthread *thread, struct completion *done)
++{
++	struct task_struct *p = current;
++	int ret;
++	spl_t s;
++
++	if (xnthread_test_state(thread, XNUSER))
++		return -EINVAL;
++
++	if (xnthread_current() || xnthread_test_state(thread, XNMAPPED))
++		return -EBUSY;
++
++	thread->u_window = NULL;
++	xnthread_pin_initial(thread);
++
++	xnthread_init_shadow_tcb(thread);
++	xnthread_suspend(thread, XNRELAX, XN_INFINITE, XN_RELATIVE, NULL);
++	init_kthread_info(thread);
++	xnthread_set_state(thread, XNMAPPED);
++	xndebug_shadow_init(thread);
++	xnthread_run_handler(thread, map_thread);
++	ipipe_enable_notifier(p);
++
++	/*
++	 * CAUTION: Soon after xnthread_init() has returned,
++	 * xnthread_start() is commonly invoked from the root domain,
++	 * therefore the call site may expect the started kernel
++	 * shadow to preempt immediately. As a result of such
++	 * assumption, start attributes (struct xnthread_start_attr)
++	 * are often laid on the caller's stack.
++	 *
++	 * For this reason, we raise the completion signal to wake up
++	 * the xnthread_init() caller only once the emerging thread is
++	 * hardened, and __never__ before that point. Since we run
++	 * over the Xenomai domain upon return from xnthread_harden(),
++	 * we schedule a virtual interrupt handler in the root domain
++	 * to signal the completion object.
++	 */
++	xnthread_resume(thread, XNDORMANT);
++	ret = xnthread_harden();
++	wakeup_parent(done);
++
++	xnlock_get_irqsave(&nklock, s);
++
++	enlist_new_thread(thread);
++	/*
++	 * Make sure xnthread_start() did not slip in from another CPU
++	 * while we were back from wakeup_parent().
++	 */
++	if (thread->entry == NULL)
++		xnthread_suspend(thread, XNDORMANT,
++				 XN_INFINITE, XN_RELATIVE, NULL);
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	xnthread_test_cancel();
++
++	xntrace_pid(xnthread_host_pid(thread),
++		    xnthread_current_priority(thread));
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnthread_map);
++
++/* nklock locked, irqs off */
++void xnthread_call_mayday(struct xnthread *thread, int reason)
++{
++	struct task_struct *p = xnthread_host_task(thread);
++
++	/* Mayday traps are available to userland threads only. */
++	XENO_BUG_ON(COBALT, !xnthread_test_state(thread, XNUSER));
++	xnthread_set_info(thread, XNKICKED);
++	xnthread_signal(thread, SIGDEBUG, reason);
++	ipipe_raise_mayday(p);
++}
++EXPORT_SYMBOL_GPL(xnthread_call_mayday);
++
++int xnthread_killall(int grace, int mask)
++{
++	struct xnthread *t, *curr = xnthread_current();
++	int nrkilled = 0, nrthreads, count;
++	long ret;
++	spl_t s;
++
++	secondary_mode_only();
++
++	/*
++	 * We may hold the core lock across calls to xnthread_cancel()
++	 * provided that we won't self-cancel.
++	 */
++	xnlock_get_irqsave(&nklock, s);
++
++	nrthreads = cobalt_nrthreads;
++
++	xnsched_for_each_thread(t) {
++		if (xnthread_test_state(t, XNROOT) ||
++		    xnthread_test_state(t, mask) != mask ||
++		    t == curr)
++			continue;
++
++		if (XENO_DEBUG(COBALT))
++			printk(XENO_INFO "terminating %s[%d]\n",
++			       t->name, xnthread_host_pid(t));
++		nrkilled++;
++		xnthread_cancel(t);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++
++	/*
++	 * Cancel then join all existing threads during the grace
++	 * period. It is the caller's responsibility to prevent more
++	 * threads to bind to the system if required, we won't make
++	 * any provision for this here.
++	 */
++	count = nrthreads - nrkilled;
++	if (XENO_DEBUG(COBALT))
++		printk(XENO_INFO "waiting for %d threads to exit\n",
++		       nrkilled);
++
++	if (grace > 0) {
++		ret = wait_event_interruptible_timeout(join_all,
++						       cobalt_nrthreads == count,
++						       grace * HZ);
++		if (ret == 0)
++			return -EAGAIN;
++	} else
++		ret = wait_event_interruptible(join_all,
++					       cobalt_nrthreads == count);
++
++	/* Wait for a full RCU grace period to expire. */
++	wait_for_rcu_grace_period(NULL);
++
++	if (XENO_DEBUG(COBALT))
++		printk(XENO_INFO "joined %d threads\n",
++		       count + nrkilled - cobalt_nrthreads);
++
++	return ret < 0 ? -EINTR : 0;
++}
++EXPORT_SYMBOL_GPL(xnthread_killall);
++
++/* Xenomai's generic personality. */
++struct xnthread_personality xenomai_personality = {
++	.name = "core",
++	.magic = -1
++};
++EXPORT_SYMBOL_GPL(xenomai_personality);
++
++/** @} */
+--- linux/kernel/xenomai/timer.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/timer.c	2021-04-07 16:01:25.780636234 +0800
+@@ -0,0 +1,982 @@
++/*
++ * Copyright (C) 2001,2002,2003,2007,2012 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2004 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
++#include <linux/sched.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/trace.h>
++#include <cobalt/kernel/arith.h>
++#include <trace/events/cobalt-core.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_timer Timer services
++ *
++ * The Xenomai timer facility depends on a clock source (xnclock) for
++ * scheduling the next activation times.
++ *
++ * The core provides and depends on a monotonic clock source (nkclock)
++ * with nanosecond resolution, driving the platform timer hardware
++ * exposed by the interrupt pipeline.
++ *
++ * @{
++ */
++
++int xntimer_heading_p(struct xntimer *timer)
++{
++	struct xnsched *sched = timer->sched;
++	xntimerq_t *q;
++	xntimerh_t *h;
++
++	q = xntimer_percpu_queue(timer);
++	h = xntimerq_head(q);
++	if (h == &timer->aplink)
++		return 1;
++
++	if (sched->lflags & XNHDEFER) {
++		h = xntimerq_second(q, h);
++		if (h == &timer->aplink)
++			return 1;
++	}
++
++	return 0;
++}
++
++void xntimer_enqueue_and_program(struct xntimer *timer, xntimerq_t *q)
++{
++	xntimer_enqueue(timer, q);
++	if (xntimer_heading_p(timer)) {
++		struct xnsched *sched = xntimer_sched(timer);
++		struct xnclock *clock = xntimer_clock(timer);
++		if (sched != xnsched_current())
++			xnclock_remote_shot(clock, sched);
++		else
++			xnclock_program_shot(clock, sched);
++	}
++}
++
++/**
++ * Arm a timer.
++ *
++ * Activates a timer so that the associated timeout handler will be
++ * fired after each expiration time. A timer can be either periodic or
++ * one-shot, depending on the reload value passed to this routine. The
++ * given timer must have been previously initialized.
++ *
++ * A timer is attached to the clock specified in xntimer_init().
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @param value The date of the initial timer shot, expressed in
++ * nanoseconds.
++ *
++ * @param interval The reload value of the timer. It is a periodic
++ * interval value to be used for reprogramming the next timer shot,
++ * expressed in nanoseconds. If @a interval is equal to XN_INFINITE,
++ * the timer will not be reloaded after it has expired.
++ *
++ * @param mode The timer mode. It can be XN_RELATIVE if @a value shall
++ * be interpreted as a relative date, XN_ABSOLUTE for an absolute date
++ * based on the monotonic clock of the related time base (as returned
++ * my xnclock_read_monotonic()), or XN_REALTIME if the absolute date
++ * is based on the adjustable real-time date for the relevant clock
++ * (obtained from xnclock_read_realtime()).
++ *
++ * @return 0 is returned upon success, or -ETIMEDOUT if an absolute
++ * date in the past has been given. In such an event, the timer is
++ * nevertheless armed for the next shot in the timeline if @a interval
++ * is different from XN_INFINITE.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++int xntimer_start(struct xntimer *timer,
++		  xnticks_t value, xnticks_t interval,
++		  xntmode_t mode)
++{
++	struct xnclock *clock = xntimer_clock(timer);
++	xntimerq_t *q = xntimer_percpu_queue(timer);
++	xnticks_t date, now, delay, period;
++	unsigned long gravity;
++	int ret = 0;
++
++	trace_cobalt_timer_start(timer, value, interval, mode);
++
++	if ((timer->status & XNTIMER_DEQUEUED) == 0)
++		xntimer_dequeue(timer, q);
++
++	now = xnclock_read_raw(clock);
++
++	timer->status &= ~(XNTIMER_REALTIME | XNTIMER_FIRED | XNTIMER_PERIODIC);
++	switch (mode) {
++	case XN_RELATIVE:
++		if ((xnsticks_t)value < 0)
++			return -ETIMEDOUT;
++		date = xnclock_ns_to_ticks(clock, value) + now;
++		break;
++	case XN_REALTIME:
++		timer->status |= XNTIMER_REALTIME;
++		value -= xnclock_get_offset(clock);
++		/* fall through */
++	default: /* XN_ABSOLUTE || XN_REALTIME */
++		date = xnclock_ns_to_ticks(clock, value);
++		if ((xnsticks_t)(date - now) <= 0) {
++			if (interval == XN_INFINITE)
++				return -ETIMEDOUT;
++			/*
++			 * We are late on arrival for the first
++			 * delivery, wait for the next shot on the
++			 * periodic time line.
++			 */
++			delay = now - date;
++			period = xnclock_ns_to_ticks(clock, interval);
++			date += period * (xnarch_div64(delay, period) + 1);
++		}
++		break;
++	}
++
++	/*
++	 * To cope with the basic system latency, we apply a clock
++	 * gravity value, which is the amount of time expressed in
++	 * clock ticks by which we should anticipate the shot for any
++	 * outstanding timer. The gravity value varies with the type
++	 * of context the timer wakes up, i.e. irq handler, kernel or
++	 * user thread.
++	 */
++	gravity = xntimer_gravity(timer);
++	xntimerh_date(&timer->aplink) = date - gravity;
++	if (now >= xntimerh_date(&timer->aplink))
++		xntimerh_date(&timer->aplink) += gravity / 2;
++
++	timer->interval_ns = XN_INFINITE;
++	timer->interval = XN_INFINITE;
++	if (interval != XN_INFINITE) {
++		timer->interval_ns = interval;
++		timer->interval = xnclock_ns_to_ticks(clock, interval);
++		timer->periodic_ticks = 0;
++		timer->start_date = date;
++		timer->pexpect_ticks = 0;
++		timer->status |= XNTIMER_PERIODIC;
++	}
++
++	timer->status |= XNTIMER_RUNNING;
++	xntimer_enqueue_and_program(timer, q);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xntimer_start);
++
++/**
++ * @fn int xntimer_stop(struct xntimer *timer)
++ *
++ * @brief Disarm a timer.
++ *
++ * This service deactivates a timer previously armed using
++ * xntimer_start(). Once disarmed, the timer can be subsequently
++ * re-armed using the latter service.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++void __xntimer_stop(struct xntimer *timer)
++{
++	struct xnclock *clock = xntimer_clock(timer);
++	xntimerq_t *q = xntimer_percpu_queue(timer);
++	struct xnsched *sched;
++	int heading = 1;
++
++	trace_cobalt_timer_stop(timer);
++
++	if ((timer->status & XNTIMER_DEQUEUED) == 0) {
++		heading = xntimer_heading_p(timer);
++		xntimer_dequeue(timer, q);
++	}
++	timer->status &= ~(XNTIMER_FIRED|XNTIMER_RUNNING);
++	sched = xntimer_sched(timer);
++
++	/*
++	 * If we removed the heading timer, reprogram the next shot if
++	 * any. If the timer was running on another CPU, let it tick.
++	 */
++	if (heading && sched == xnsched_current())
++		xnclock_program_shot(clock, sched);
++}
++EXPORT_SYMBOL_GPL(__xntimer_stop);
++
++/**
++ * @fn xnticks_t xntimer_get_date(struct xntimer *timer)
++ *
++ * @brief Return the absolute expiration date.
++ *
++ * Return the next expiration date of a timer as an absolute count of
++ * nanoseconds.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @return The expiration date in nanoseconds. The special value
++ * XN_INFINITE is returned if @a timer is currently disabled.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++xnticks_t xntimer_get_date(struct xntimer *timer)
++{
++	if (!xntimer_running_p(timer))
++		return XN_INFINITE;
++
++	return xnclock_ticks_to_ns(xntimer_clock(timer), xntimer_expiry(timer));
++}
++EXPORT_SYMBOL_GPL(xntimer_get_date);
++
++/**
++ * @fn xnticks_t xntimer_get_timeout(struct xntimer *timer)
++ *
++ * @brief Return the relative expiration date.
++ *
++ * This call returns the count of nanoseconds remaining until the
++ * timer expires.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @return The count of nanoseconds until expiry. The special value
++ * XN_INFINITE is returned if @a timer is currently disabled.  It
++ * might happen that the timer expires when this service runs (even if
++ * the associated handler has not been fired yet); in such a case, 1
++ * is returned.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++xnticks_t __xntimer_get_timeout(struct xntimer *timer)
++{
++	struct xnclock *clock;
++	xnticks_t expiry, now;
++
++	clock = xntimer_clock(timer);
++	now = xnclock_read_raw(clock);
++	expiry = xntimer_expiry(timer);
++	if (expiry < now)
++		return 1;  /* Will elapse shortly. */
++
++	return xnclock_ticks_to_ns(clock, expiry - now);
++}
++EXPORT_SYMBOL_GPL(__xntimer_get_timeout);
++
++/**
++ * @fn void xntimer_init(struct xntimer *timer,struct xnclock *clock,void (*handler)(struct xntimer *timer), struct xnsched *sched, int flags)
++ * @brief Initialize a timer object.
++ *
++ * Creates a timer. When created, a timer is left disarmed; it must be
++ * started using xntimer_start() in order to be activated.
++ *
++ * @param timer The address of a timer descriptor the nucleus will use
++ * to store the object-specific data.  This descriptor must always be
++ * valid while the object is active therefore it must be allocated in
++ * permanent memory.
++ *
++ * @param clock The clock the timer relates to. Xenomai defines a
++ * monotonic system clock, with nanosecond resolution, named
++ * nkclock. In addition, external clocks driven by other tick sources
++ * may be created dynamically if CONFIG_XENO_OPT_EXTCLOCK is defined.
++ *
++ * @param handler The routine to call upon expiration of the timer.
++ *
++ * @param sched An optional pointer to the per-CPU scheduler slot the
++ * new timer is affine to. If non-NULL, the timer will fire on the CPU
++ * @a sched is bound to, otherwise it will fire either on the current
++ * CPU if real-time, or on the first real-time CPU.
++ *
++ * @param flags A set of flags describing the timer. A set of clock
++ * gravity hints can be passed via the @a flags argument, used for
++ * optimizing the built-in heuristics aimed at latency reduction:
++ *
++ * - XNTIMER_IGRAVITY, the timer activates a leaf timer handler.
++ * - XNTIMER_KGRAVITY, the timer activates a kernel thread.
++ * - XNTIMER_UGRAVITY, the timer activates a user-space thread.
++ *
++ * There is no limitation on the number of timers which can be
++ * created/active concurrently.
++ *
++ * @coretags{unrestricted}
++ */
++#ifdef DOXYGEN_CPP
++void xntimer_init(struct xntimer *timer, struct xnclock *clock,
++		  void (*handler)(struct xntimer *timer),
++		  struct xnsched *sched,
++		  int flags);
++#endif
++
++void __xntimer_init(struct xntimer *timer,
++		    struct xnclock *clock,
++		    void (*handler)(struct xntimer *timer),
++		    struct xnsched *sched,
++		    int flags)
++{
++	spl_t s __maybe_unused;
++
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++	timer->clock = clock;
++#endif
++	xntimerh_init(&timer->aplink);
++	xntimerh_date(&timer->aplink) = XN_INFINITE;
++	xntimer_set_priority(timer, XNTIMER_STDPRIO);
++	timer->status = (XNTIMER_DEQUEUED|(flags & XNTIMER_INIT_MASK));
++	timer->handler = handler;
++	timer->interval_ns = 0;
++	timer->sched = NULL;
++
++	/*
++	 * Set the timer affinity, preferably to xnsched_cpu(sched) if
++	 * sched was given, CPU0 otherwise.
++	 */
++	if (sched == NULL)
++		sched = xnsched_struct(0);
++
++	xntimer_set_affinity(timer, sched);
++
++#ifdef CONFIG_XENO_OPT_STATS
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++	timer->tracker = clock;
++#endif
++	ksformat(timer->name, XNOBJECT_NAME_LEN, "%d/%s",
++		 task_pid_nr(current), current->comm);
++	xntimer_reset_stats(timer);
++	xnlock_get_irqsave(&nklock, s);
++	list_add_tail(&timer->next_stat, &clock->timerq);
++	clock->nrtimers++;
++	xnvfile_touch(&clock->timer_vfile);
++	xnlock_put_irqrestore(&nklock, s);
++#endif /* CONFIG_XENO_OPT_STATS */
++}
++EXPORT_SYMBOL_GPL(__xntimer_init);
++
++void xntimer_set_gravity(struct xntimer *timer, int gravity)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	timer->status &= ~XNTIMER_GRAVITY_MASK;
++	timer->status |= gravity;
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xntimer_set_gravity);
++
++#ifdef CONFIG_XENO_OPT_EXTCLOCK
++
++#ifdef CONFIG_XENO_OPT_STATS
++
++static void __xntimer_switch_tracking(struct xntimer *timer,
++				      struct xnclock *newclock)
++{
++	struct xnclock *oldclock = timer->tracker;
++
++	list_del(&timer->next_stat);
++	oldclock->nrtimers--;
++	xnvfile_touch(&oldclock->timer_vfile);
++	list_add_tail(&timer->next_stat, &newclock->timerq);
++	newclock->nrtimers++;
++	xnvfile_touch(&newclock->timer_vfile);
++	timer->tracker = newclock;
++}
++
++void xntimer_switch_tracking(struct xntimer *timer,
++			     struct xnclock *newclock)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	__xntimer_switch_tracking(timer, newclock);
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xntimer_switch_tracking);
++
++#else
++
++static inline
++void __xntimer_switch_tracking(struct xntimer *timer,
++			       struct xnclock *newclock)
++{ }
++
++#endif /* CONFIG_XENO_OPT_STATS */
++
++/**
++ * @brief Set the reference clock of a timer.
++ *
++ * This service changes the reference clock pacing a timer. If the
++ * clock timers are tracked, the tracking information is updated too.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @param newclock The address of a valid clock descriptor.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++void xntimer_set_clock(struct xntimer *timer,
++		       struct xnclock *newclock)
++{
++	if (timer->clock != newclock) {
++		xntimer_stop(timer);
++		timer->clock = newclock;
++		/*
++		 * Since the timer was stopped, we can wait until it
++		 * is restarted for fixing its CPU affinity.
++		 */
++		__xntimer_switch_tracking(timer, newclock);
++	}
++}
++
++#endif /* CONFIG_XENO_OPT_EXTCLOCK */
++
++/**
++ * @fn void xntimer_destroy(struct xntimer *timer)
++ *
++ * @brief Release a timer object.
++ *
++ * Destroys a timer. After it has been destroyed, all resources
++ * associated with the timer have been released. The timer is
++ * automatically deactivated before deletion if active on entry.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @coretags{unrestricted}
++ */
++void xntimer_destroy(struct xntimer *timer)
++{
++	struct xnclock *clock __maybe_unused = xntimer_clock(timer);
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	xntimer_stop(timer);
++	timer->status |= XNTIMER_KILLED;
++	timer->sched = NULL;
++#ifdef CONFIG_XENO_OPT_STATS
++	list_del(&timer->next_stat);
++	clock->nrtimers--;
++	xnvfile_touch(&clock->timer_vfile);
++#endif /* CONFIG_XENO_OPT_STATS */
++	xnlock_put_irqrestore(&nklock, s);
++}
++EXPORT_SYMBOL_GPL(xntimer_destroy);
++
++#ifdef CONFIG_SMP
++
++/**
++ * Migrate a timer.
++ *
++ * This call migrates a timer to another cpu. In order to avoid
++ * pathological cases, it must be called from the CPU to which @a
++ * timer is currently attached.
++ *
++ * @param timer The address of the timer object to be migrated.
++ *
++ * @param sched The address of the destination per-CPU scheduler
++ * slot.
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++void __xntimer_migrate(struct xntimer *timer, struct xnsched *sched)
++{				/* nklocked, IRQs off, sched != timer->sched */
++	struct xnclock *clock;
++	xntimerq_t *q;
++
++	trace_cobalt_timer_migrate(timer, xnsched_cpu(sched));
++
++	/*
++	 * This assertion triggers when the timer is migrated to a CPU
++	 * for which we do not expect any clock events/IRQs from the
++	 * associated clock device. If so, the timer would never fire
++	 * since clock ticks would never happen on that CPU.
++	 */
++	XENO_WARN_ON_SMP(COBALT,
++			 !cpumask_empty(&xntimer_clock(timer)->affinity) &&
++			 !cpumask_test_cpu(xnsched_cpu(sched),
++					   &xntimer_clock(timer)->affinity));
++
++	if (timer->status & XNTIMER_RUNNING) {
++		xntimer_stop(timer);
++		timer->sched = sched;
++		clock = xntimer_clock(timer);
++		q = xntimer_percpu_queue(timer);
++		xntimer_enqueue(timer, q);
++		if (xntimer_heading_p(timer))
++			xnclock_remote_shot(clock, sched);
++	} else
++		timer->sched = sched;
++}
++EXPORT_SYMBOL_GPL(__xntimer_migrate);
++
++static inline int get_clock_cpu(struct xnclock *clock, int cpu)
++{
++	/*
++	 * Check a CPU number against the possible set of CPUs
++	 * receiving events from the underlying clock device. If the
++	 * suggested CPU does not receive events from this device,
++	 * return the first one which does instead.
++	 *
++	 * A global clock device with no particular IRQ affinity may
++	 * tick on any CPU, but timers should always be queued on
++	 * CPU0.
++	 *
++	 * NOTE: we have scheduler slots initialized for all online
++	 * CPUs, we can program and receive clock ticks on any of
++	 * them. So there is no point in restricting the valid CPU set
++	 * to cobalt_cpu_affinity, which specifically refers to the
++	 * set of CPUs which may run real-time threads. Although
++	 * receiving a clock tick for waking up a thread living on a
++	 * remote CPU is not optimal since this involves IPI-signaled
++	 * rescheds, this is still a valid case.
++	 */
++	if (cpumask_empty(&clock->affinity))
++		return 0;
++
++	if (cpumask_test_cpu(cpu, &clock->affinity))
++		return cpu;
++	
++	return cpumask_first(&clock->affinity);
++}
++
++void __xntimer_set_affinity(struct xntimer *timer, struct xnsched *sched)
++{				/* nklocked, IRQs off */
++	struct xnclock *clock = xntimer_clock(timer);
++	int cpu;
++
++	/*
++	 * Figure out which CPU is best suited for managing this
++	 * timer, preferably picking xnsched_cpu(sched) if the ticking
++	 * device moving the timer clock beats on that CPU. Otherwise,
++	 * pick the first CPU from the clock affinity mask if set. If
++	 * not, the timer is backed by a global device with no
++	 * particular IRQ affinity, so it should always be queued to
++	 * CPU0.
++	 */
++	cpu = 0;
++	if (!cpumask_empty(&clock->affinity))
++		cpu = get_clock_cpu(clock, xnsched_cpu(sched));
++
++	xntimer_migrate(timer, xnsched_struct(cpu));
++}
++EXPORT_SYMBOL_GPL(__xntimer_set_affinity);
++
++int xntimer_setup_ipi(void)
++{
++	return ipipe_request_irq(&xnsched_realtime_domain,
++				 IPIPE_HRTIMER_IPI,
++				 (ipipe_irq_handler_t)xnintr_core_clock_handler,
++				 NULL, NULL);
++}
++
++void xntimer_release_ipi(void)
++{
++	ipipe_free_irq(&xnsched_realtime_domain, IPIPE_HRTIMER_IPI);
++}
++
++#endif /* CONFIG_SMP */
++
++/**
++ * Get the count of overruns for the last tick.
++ *
++ * This service returns the count of pending overruns for the last
++ * tick of a given timer, as measured by the difference between the
++ * expected expiry date of the timer and the date @a now passed as
++ * argument.
++ *
++ * @param timer The address of a valid timer descriptor.
++ *
++ * @param waiter The thread for which the overrun count is being
++ * collected.
++ *
++ * @param now current date (as
++ * xnclock_read_raw(xntimer_clock(timer)))
++ *
++ * @return the number of overruns of @a timer at date @a now
++ *
++ * @coretags{unrestricted, atomic-entry}
++ */
++unsigned long long xntimer_get_overruns(struct xntimer *timer,
++					struct xnthread *waiter,
++					xnticks_t now)
++{
++	xnticks_t period = timer->interval;
++	unsigned long long overruns = 0;
++	xnsticks_t delta;
++	xntimerq_t *q;
++
++	delta = now - xntimer_pexpect(timer);
++	if (unlikely(delta >= (xnsticks_t) period)) {
++		period = timer->interval_ns;
++		delta = xnclock_ticks_to_ns(xntimer_clock(timer), delta);
++		overruns = xnarch_div64(delta, period);
++		timer->pexpect_ticks += overruns;
++		if (xntimer_running_p(timer)) {
++			XENO_BUG_ON(COBALT, (timer->status &
++				    (XNTIMER_DEQUEUED|XNTIMER_PERIODIC))
++				    != XNTIMER_PERIODIC);
++				q = xntimer_percpu_queue(timer);
++			xntimer_dequeue(timer, q);
++			while (xntimerh_date(&timer->aplink) < now) {
++				timer->periodic_ticks++;
++				xntimer_update_date(timer);
++			}
++			xntimer_enqueue_and_program(timer, q);
++		}
++	}
++
++	timer->pexpect_ticks++;
++
++	/* Hide overruns due to the most recent ptracing session. */
++	if (xnthread_test_localinfo(waiter, XNHICCUP))
++		return 0;
++
++	return overruns;
++}
++EXPORT_SYMBOL_GPL(xntimer_get_overruns);
++
++char *xntimer_format_time(xnticks_t ns, char *buf, size_t bufsz)
++{
++	unsigned long ms, us, rem;
++	int len = (int)bufsz;
++	char *p = buf;
++	xnticks_t sec;
++
++	if (ns == 0 && bufsz > 1) {
++		strcpy(buf, "-");
++		return buf;
++	}
++
++	sec = xnclock_divrem_billion(ns, &rem);
++	us = rem / 1000;
++	ms = us / 1000;
++	us %= 1000;
++
++	if (sec) {
++		p += ksformat(p, bufsz, "%Lus", sec);
++		len = bufsz - (p - buf);
++	}
++
++	if (len > 0 && (ms || (sec && us))) {
++		p += ksformat(p, bufsz - (p - buf), "%lums", ms);
++		len = bufsz - (p - buf);
++	}
++
++	if (len > 0 && us)
++		p += ksformat(p, bufsz - (p - buf), "%luus", us);
++
++	return buf;
++}
++EXPORT_SYMBOL_GPL(xntimer_format_time);
++
++/**
++ * @internal
++ * @fn static int program_htick_shot(unsigned long delay, struct clock_event_device *cdev)
++ *
++ * @brief Program next host tick as a Xenomai timer event.
++ *
++ * Program the next shot for the host tick on the current CPU.
++ * Emulation is done using a nucleus timer attached to the master
++ * timebase.
++ *
++ * @param delay The time delta from the current date to the next tick,
++ * expressed as a count of nanoseconds.
++ *
++ * @param cdev An pointer to the clock device which notifies us.
++ *
++ * @coretags{unrestricted}
++ */
++static int program_htick_shot(unsigned long delay,
++			      struct clock_event_device *cdev)
++{
++	struct xnsched *sched;
++	int ret;
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	sched = xnsched_current();
++	ret = xntimer_start(&sched->htimer, delay, XN_INFINITE, XN_RELATIVE);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return ret ? -ETIME : 0;
++}
++
++/**
++ * @internal
++ * @fn void switch_htick_mode(enum clock_event_mode mode, struct clock_event_device *cdev)
++ *
++ * @brief Tick mode switch emulation callback.
++ *
++ * Changes the host tick mode for the tick device of the current CPU.
++ *
++ * @param mode The new mode to switch to. The possible values are:
++ *
++ * - CLOCK_EVT_MODE_ONESHOT, for a switch to oneshot mode.
++ *
++ * - CLOCK_EVT_MODE_PERIODIC, for a switch to periodic mode. The current
++ * implementation for the generic clockevent layer Linux exhibits
++ * should never downgrade from a oneshot to a periodic tick mode, so
++ * this mode should not be encountered. This said, the associated code
++ * is provided, basically for illustration purposes.
++ *
++ * - CLOCK_EVT_MODE_SHUTDOWN, indicates the removal of the current
++ * tick device. Normally, the nucleus only interposes on tick devices
++ * which should never be shut down, so this mode should not be
++ * encountered.
++ *
++ * @param cdev An opaque pointer to the clock device which notifies us.
++ *
++ * @coretags{unrestricted}
++ *
++ * @note GENERIC_CLOCKEVENTS is required from the host kernel.
++ */
++static void switch_htick_mode(enum clock_event_mode mode,
++			      struct clock_event_device *cdev)
++{
++	struct xnsched *sched;
++	xnticks_t tickval;
++	spl_t s;
++
++	if (mode == CLOCK_EVT_MODE_ONESHOT)
++		return;
++
++	xnlock_get_irqsave(&nklock, s);
++
++	sched = xnsched_current();
++
++	switch (mode) {
++	case CLOCK_EVT_MODE_PERIODIC:
++		tickval = 1000000000UL / HZ;
++		xntimer_start(&sched->htimer, tickval, tickval, XN_RELATIVE);
++		break;
++	case CLOCK_EVT_MODE_SHUTDOWN:
++		xntimer_stop(&sched->htimer);
++		break;
++	default:
++		XENO_BUG(COBALT);
++	}
++
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++/**
++ * @fn int xntimer_grab_hardware(void)
++ * @brief Grab the hardware timer on all real-time CPUs.
++ *
++ * xntimer_grab_hardware() grabs and tunes the hardware timer for all
++ * real-time CPUs.
++ *
++ * Host tick emulation is performed for sharing the clock chip between
++ * Linux and Xenomai.
++ *
++ * @return a positive value is returned on success, representing the
++ * duration of a Linux periodic tick expressed as a count of
++ * nanoseconds; zero should be returned when the Linux kernel does not
++ * undergo periodic timing on the given CPU (e.g. oneshot
++ * mode). Otherwise:
++ *
++ * - -EBUSY is returned if the hardware timer has already been
++ * grabbed.  xntimer_release_hardware() must be issued before
++ * xntimer_grab_hardware() is called again.
++ *
++ * - -ENODEV is returned if the hardware timer cannot be used.  This
++ * situation may occur after the kernel disabled the timer due to
++ * invalid calibration results; in such a case, such hardware is
++ * unusable for any timing duties.
++ *
++ * @coretags{secondary-only}
++ */
++static int grab_hardware_timer(int cpu)
++{
++	int tickval, ret;
++
++	ret = ipipe_timer_start(xnintr_core_clock_handler,
++				switch_htick_mode, program_htick_shot, cpu);
++	switch (ret) {
++	case CLOCK_EVT_MODE_PERIODIC:
++		/*
++		 * Oneshot tick emulation callback won't be used, ask
++		 * the caller to start an internal timer for emulating
++		 * a periodic tick.
++		 */
++		tickval = 1000000000UL / HZ;
++		break;
++
++	case CLOCK_EVT_MODE_ONESHOT:
++		/* oneshot tick emulation */
++		tickval = 1;
++		break;
++
++	case CLOCK_EVT_MODE_UNUSED:
++		/* we don't need to emulate the tick at all. */
++		tickval = 0;
++		break;
++
++	case CLOCK_EVT_MODE_SHUTDOWN:
++		return -ENODEV;
++
++	default:
++		return ret;
++	}
++
++	return tickval;
++}
++
++int xntimer_grab_hardware(void)
++{
++	struct xnsched *sched;
++	int ret, cpu, _cpu;
++	spl_t s;
++
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++	/*
++	 * Only for statistical purpose, the timer interrupt is
++	 * attached by xntimer_grab_hardware().
++	 */
++	xnintr_init(&nktimer, "[timer]",
++		    per_cpu(ipipe_percpu.hrtimer_irq, 0), NULL, NULL, 0);
++#endif /* CONFIG_XENO_OPT_STATS_IRQS */
++
++	nkclock.wallclock_offset =
++		xnclock_get_host_time() - xnclock_read_monotonic(&nkclock);
++
++	ret = xntimer_setup_ipi();
++	if (ret)
++		return ret;
++
++	for_each_realtime_cpu(cpu) {
++		ret = grab_hardware_timer(cpu);
++		if (ret < 0)
++			goto fail;
++
++		xnlock_get_irqsave(&nklock, s);
++
++		/*
++		 * If the current tick device for the target CPU is
++		 * periodic, we won't be called back for host tick
++		 * emulation. Therefore, we need to start a periodic
++		 * nucleus timer which will emulate the ticking for
++		 * that CPU, since we are going to hijack the hw clock
++		 * chip for managing our own system timer.
++		 *
++		 * CAUTION:
++		 *
++		 * - nucleus timers may be started only _after_ the hw
++		 * timer has been set up for the target CPU through a
++		 * call to xntimer_grab_hardware().
++		 *
++		 * - we don't compensate for the elapsed portion of
++		 * the current host tick, since we cannot get this
++		 * information easily for all CPUs except the current
++		 * one, and also because of the declining relevance of
++		 * the jiffies clocksource anyway.
++		 *
++		 * - we must not hold the nklock across calls to
++		 * xntimer_grab_hardware().
++		 */
++
++		sched = xnsched_struct(cpu);
++		/* Set up timer with host tick period if valid. */
++		if (ret > 1)
++			xntimer_start(&sched->htimer, ret, ret, XN_RELATIVE);
++		else if (ret == 1)
++			xntimer_start(&sched->htimer, 0, 0, XN_RELATIVE);
++
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++	return 0;
++fail:
++	for_each_realtime_cpu(_cpu) {
++		if (_cpu == cpu)
++			break;
++		xnlock_get_irqsave(&nklock, s);
++		sched = xnsched_struct(cpu);
++		xntimer_stop(&sched->htimer);
++		xnlock_put_irqrestore(&nklock, s);
++		ipipe_timer_stop(_cpu);
++	}
++
++	xntimer_release_ipi();
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xntimer_grab_hardware);
++
++/**
++ * @fn void xntimer_release_hardware(void)
++ * @brief Release hardware timers.
++ *
++ * Releases hardware timers previously grabbed by a call to
++ * xntimer_grab_hardware().
++ *
++ * @coretags{secondary-only}
++ */
++void xntimer_release_hardware(void)
++{
++	int cpu;
++
++	/*
++	 * We must not hold the nklock while stopping the hardware
++	 * timer, since this could cause deadlock situations to arise
++	 * on SMP systems.
++	 */
++	for_each_realtime_cpu(cpu)
++		ipipe_timer_stop(cpu);
++
++	xntimer_release_ipi();
++
++#ifdef CONFIG_XENO_OPT_STATS_IRQS
++	xnintr_destroy(&nktimer);
++#endif /* CONFIG_XENO_OPT_STATS_IRQS */
++}
++EXPORT_SYMBOL_GPL(xntimer_release_hardware);
++
++#if defined(CONFIG_XENO_OPT_TIMER_RBTREE)
++static inline bool xntimerh_is_lt(xntimerh_t *left, xntimerh_t *right)
++{
++	return left->date < right->date
++		|| (left->date == right->date && left->prio > right->prio);
++}
++
++void xntimerq_insert(xntimerq_t *q, xntimerh_t *holder)
++{
++	struct rb_node **new = &q->root.rb_node, *parent = NULL;
++
++	if (!q->head)
++		q->head = holder;
++	else if (xntimerh_is_lt(holder, q->head)) {
++		parent = &q->head->link;
++		new = &parent->rb_left;
++		q->head = holder;
++	} else while (*new) {
++		xntimerh_t *i = container_of(*new, xntimerh_t, link);
++
++		parent = *new;
++		if (xntimerh_is_lt(holder, i))
++			new = &((*new)->rb_left);
++		else
++			new = &((*new)->rb_right);
++	}
++
++	rb_link_node(&holder->link, parent, new);
++	rb_insert_color(&holder->link, &q->root);
++}
++#endif
++
++/** @} */
+--- linux/kernel/xenomai/clock.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/clock.c	2021-04-07 16:01:25.775636241 +0800
+@@ -0,0 +1,900 @@
++/*
++ * Copyright (C) 2006-2011 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/percpu.h>
++#include <linux/errno.h>
++#include <linux/ipipe_tickdev.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/arith.h>
++#include <cobalt/kernel/vdso.h>
++#include <cobalt/uapi/time.h>
++#include <asm/xenomai/calibration.h>
++#include <trace/events/cobalt-core.h>
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_clock Clock services
++ *
++ * @{
++ */
++unsigned long nktimerlat;
++
++static unsigned long long clockfreq;
++
++#ifdef XNARCH_HAVE_LLMULSHFT
++
++static unsigned int tsc_scale, tsc_shift;
++
++#ifdef XNARCH_HAVE_NODIV_LLIMD
++
++static struct xnarch_u32frac tsc_frac;
++static struct xnarch_u32frac bln_frac;
++
++long long xnclock_core_ns_to_ticks(long long ns)
++{
++	return xnarch_nodiv_llimd(ns, tsc_frac.frac, tsc_frac.integ);
++}
++
++unsigned long long xnclock_divrem_billion(unsigned long long value,
++					  unsigned long *rem)
++{
++	unsigned long long q;
++	unsigned r;
++
++	q = xnarch_nodiv_ullimd(value, bln_frac.frac, bln_frac.integ);
++	r = value - q * 1000000000;
++	if (r >= 1000000000) {
++		++q;
++		r -= 1000000000;
++	}
++	*rem = r;
++	return q;
++}
++
++#else /* !XNARCH_HAVE_NODIV_LLIMD */
++
++long long xnclock_core_ns_to_ticks(long long ns)
++{
++	return xnarch_llimd(ns, 1 << tsc_shift, tsc_scale);
++}
++
++#endif /* !XNARCH_HAVE_NODIV_LLIMD */
++
++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks)
++{
++	return xnarch_llmulshft(ticks, tsc_scale, tsc_shift);
++}
++
++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks)
++{
++	unsigned int shift = tsc_shift - 1;
++	return (xnarch_llmulshft(ticks, tsc_scale, shift) + 1) / 2;
++}
++
++#else  /* !XNARCH_HAVE_LLMULSHFT */
++
++xnsticks_t xnclock_core_ticks_to_ns(xnsticks_t ticks)
++{
++	return xnarch_llimd(ticks, 1000000000, clockfreq);
++}
++
++xnsticks_t xnclock_core_ticks_to_ns_rounded(xnsticks_t ticks)
++{
++	return (xnarch_llimd(ticks, 1000000000, clockfreq/2) + 1) / 2;
++}
++
++xnsticks_t xnclock_core_ns_to_ticks(xnsticks_t ns)
++{
++	return xnarch_llimd(ns, clockfreq, 1000000000);
++}
++
++#endif /* !XNARCH_HAVE_LLMULSHFT */
++
++#ifndef XNARCH_HAVE_NODIV_LLIMD
++unsigned long long xnclock_divrem_billion(unsigned long long value,
++					  unsigned long *rem)
++{
++	return xnarch_ulldiv(value, 1000000000, rem);
++
++}
++#endif /* !XNARCH_HAVE_NODIV_LLIMD */
++
++EXPORT_SYMBOL_GPL(xnclock_core_ticks_to_ns);
++EXPORT_SYMBOL_GPL(xnclock_core_ticks_to_ns_rounded);
++EXPORT_SYMBOL_GPL(xnclock_core_ns_to_ticks);
++EXPORT_SYMBOL_GPL(xnclock_divrem_billion);
++
++DEFINE_PRIVATE_XNLOCK(ratelimit_lock);
++
++int __xnclock_ratelimit(struct xnclock_ratelimit_state *rs, const char *func)
++{
++	spl_t s;
++	int ret;
++
++	if (!rs->interval)
++		return 1;
++
++	xnlock_get_irqsave(&ratelimit_lock, s);
++
++	if (!rs->begin)
++		rs->begin = xnclock_read_realtime(&nkclock);
++	if (xnclock_read_realtime(&nkclock) >= rs->begin + rs->interval) {
++		if (rs->missed)
++			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
++			       func, rs->missed);
++		rs->begin   = 0;
++		rs->printed = 0;
++		rs->missed  = 0;
++	}
++	if (rs->burst && rs->burst > rs->printed) {
++		rs->printed++;
++		ret = 1;
++	} else {
++		rs->missed++;
++		ret = 0;
++	}
++	xnlock_put_irqrestore(&ratelimit_lock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__xnclock_ratelimit);
++
++void xnclock_core_local_shot(struct xnsched *sched)
++{
++	struct xntimerdata *tmd;
++	struct xntimer *timer;
++	xnsticks_t delay;
++	xntimerh_t *h;
++
++	/*
++	 * Do not reprogram locally when inside the tick handler -
++	 * will be done on exit anyway. Also exit if there is no
++	 * pending timer.
++	 */
++	if (sched->status & XNINTCK)
++		return;
++
++	/*
++	 * Assume the core clock device always has percpu semantics in
++	 * SMP.
++	 */
++	tmd = xnclock_this_timerdata(&nkclock);
++	h = xntimerq_head(&tmd->q);
++	if (h == NULL) {
++		sched->lflags |= XNIDLE;
++		return;
++	}
++
++	/*
++	 * Here we try to defer the host tick heading the timer queue,
++	 * so that it does not preempt a real-time activity uselessly,
++	 * in two cases:
++	 *
++	 * 1) a rescheduling is pending for the current CPU. We may
++	 * assume that a real-time thread is about to resume, so we
++	 * want to move the host tick out of the way until the host
++	 * kernel resumes, unless there is no other outstanding
++	 * timers.
++	 *
++	 * 2) the current thread is running in primary mode, in which
++	 * case we may also defer the host tick until the host kernel
++	 * resumes.
++	 *
++	 * The host tick deferral is cleared whenever Xenomai is about
++	 * to yield control to the host kernel (see ___xnsched_run()),
++	 * or a timer with an earlier timeout date is scheduled,
++	 * whichever comes first.
++	 */
++	sched->lflags &= ~(XNHDEFER|XNIDLE);
++	timer = container_of(h, struct xntimer, aplink);
++	if (unlikely(timer == &sched->htimer)) {
++		if (xnsched_resched_p(sched) ||
++		    !xnthread_test_state(sched->curr, XNROOT)) {
++			h = xntimerq_second(&tmd->q, h);
++			if (h) {
++				sched->lflags |= XNHDEFER;
++				timer = container_of(h, struct xntimer, aplink);
++			}
++		}
++	}
++
++	delay = xntimerh_date(&timer->aplink) - xnclock_core_read_raw();
++	if (delay < 0)
++		delay = 0;
++	else if (delay > ULONG_MAX)
++		delay = ULONG_MAX;
++
++	xntrace_tick((unsigned)delay);
++
++	ipipe_timer_set(delay);
++}
++
++#ifdef CONFIG_SMP
++void xnclock_core_remote_shot(struct xnsched *sched)
++{
++	ipipe_send_ipi(IPIPE_HRTIMER_IPI, *cpumask_of(xnsched_cpu(sched)));
++}
++#endif
++
++static void adjust_timer(struct xntimer *timer, xntimerq_t *q,
++			 xnsticks_t delta)
++{
++	struct xnclock *clock = xntimer_clock(timer);
++	xnticks_t period, div;
++	xnsticks_t diff;
++
++	xntimerh_date(&timer->aplink) -= delta;
++
++	if (xntimer_periodic_p(timer) == 0)
++		goto enqueue;
++
++	timer->start_date -= delta;
++	period = xntimer_interval(timer);
++	diff = xnclock_ticks_to_ns(clock,
++		xnclock_read_raw(clock) - xntimer_expiry(timer));
++
++	if ((xnsticks_t)(diff - period) >= 0) {
++		/*
++		 * Timer should tick several times before now, instead
++		 * of calling timer->handler several times, we change
++		 * the timer date without changing its pexpect, so
++		 * that timer will tick only once and the lost ticks
++		 * will be counted as overruns.
++		 */
++		div = xnarch_div64(diff, period);
++		timer->periodic_ticks += div;
++		xntimer_update_date(timer);
++	} else if (delta < 0
++		   && (timer->status & XNTIMER_FIRED)
++		   && (xnsticks_t) (diff + period) <= 0) {
++		/*
++		 * Timer is periodic and NOT waiting for its first
++		 * shot, so we make it tick sooner than its original
++		 * date in order to avoid the case where by adjusting
++		 * time to a sooner date, real-time periodic timers do
++		 * not tick until the original date has passed.
++		 */
++		div = xnarch_div64(-diff, period);
++		timer->periodic_ticks -= div;
++		timer->pexpect_ticks -= div;
++		xntimer_update_date(timer);
++	}
++
++enqueue:
++	xntimer_enqueue(timer, q);
++}
++
++static void adjust_clock_timers(struct xnclock *clock, xnsticks_t delta)
++{
++	struct xntimer *timer, *tmp;
++	struct list_head adjq;
++	struct xnsched *sched;
++	xntimerq_it_t it;
++	unsigned int cpu;
++	xntimerh_t *h;
++	xntimerq_t *q;
++
++	INIT_LIST_HEAD(&adjq);
++	delta = xnclock_ns_to_ticks(clock, delta);
++
++	for_each_online_cpu(cpu) {
++		sched = xnsched_struct(cpu);
++		q = &xnclock_percpu_timerdata(clock, cpu)->q;
++
++		for (h = xntimerq_it_begin(q, &it); h;
++		     h = xntimerq_it_next(q, &it, h)) {
++			timer = container_of(h, struct xntimer, aplink);
++			if (timer->status & XNTIMER_REALTIME)
++				list_add_tail(&timer->adjlink, &adjq);
++		}
++
++		if (list_empty(&adjq))
++			continue;
++
++		list_for_each_entry_safe(timer, tmp, &adjq, adjlink) {
++			list_del(&timer->adjlink);
++			xntimer_dequeue(timer, q);
++			adjust_timer(timer, q, delta);
++		}
++
++		if (sched != xnsched_current())
++			xnclock_remote_shot(clock, sched);
++		else
++			xnclock_program_shot(clock, sched);
++	}
++}
++
++/**
++ * @fn void xnclock_adjust(struct xnclock *clock, xnsticks_t delta)
++ * @brief Adjust a clock time.
++ *
++ * This service changes the epoch for the given clock by applying the
++ * specified tick delta on its wallclock offset.
++ *
++ * @param clock The clock to adjust.
++ *
++ * @param delta The adjustment value expressed in nanoseconds.
++ *
++ * @coretags{task-unrestricted, atomic-entry}
++ *
++ * @note Xenomai tracks the system time in @a nkclock, as a
++ * monotonously increasing count of ticks since the epoch. The epoch
++ * is initially the same as the underlying machine time.
++ */
++void xnclock_adjust(struct xnclock *clock, xnsticks_t delta)
++{
++	xnticks_t now;
++
++	nkclock.wallclock_offset += delta;
++	nkvdso->wallclock_offset = nkclock.wallclock_offset;
++	now = xnclock_read_monotonic(clock) + nkclock.wallclock_offset;
++	adjust_clock_timers(clock, delta);
++}
++EXPORT_SYMBOL_GPL(xnclock_adjust);
++
++xnticks_t xnclock_get_host_time(void)
++{
++	return ktime_to_ns(ktime_get_real());
++}
++EXPORT_SYMBOL_GPL(xnclock_get_host_time);
++
++xnticks_t xnclock_core_read_monotonic(void)
++{
++	return xnclock_core_ticks_to_ns(xnclock_core_read_raw());
++}
++EXPORT_SYMBOL_GPL(xnclock_core_read_monotonic);
++
++#ifdef CONFIG_XENO_OPT_STATS
++
++static struct xnvfile_directory timerlist_vfroot;
++
++static struct xnvfile_snapshot_ops timerlist_ops;
++
++struct vfile_clock_priv {
++	struct xntimer *curr;
++};
++
++struct vfile_clock_data {
++	int cpu;
++	unsigned int scheduled;
++	unsigned int fired;
++	xnticks_t timeout;
++	xnticks_t interval;
++	unsigned long status;
++	char name[XNOBJECT_NAME_LEN];
++};
++
++static int timerlist_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_clock_priv *priv = xnvfile_iterator_priv(it);
++	struct xnclock *clock = xnvfile_priv(it->vfile);
++
++	if (list_empty(&clock->timerq))
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&clock->timerq, struct xntimer, next_stat);
++
++	return clock->nrtimers;
++}
++
++static int timerlist_next(struct xnvfile_snapshot_iterator *it, void *data)
++{
++	struct vfile_clock_priv *priv = xnvfile_iterator_priv(it);
++	struct xnclock *clock = xnvfile_priv(it->vfile);
++	struct vfile_clock_data *p = data;
++	struct xntimer *timer;
++
++	if (priv->curr == NULL)
++		return 0;
++
++	timer = priv->curr;
++	if (list_is_last(&timer->next_stat, &clock->timerq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_entry(timer->next_stat.next,
++					struct xntimer, next_stat);
++
++	if (clock == &nkclock && xnstat_counter_get(&timer->scheduled) == 0)
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(xntimer_sched(timer));
++	p->scheduled = xnstat_counter_get(&timer->scheduled);
++	p->fired = xnstat_counter_get(&timer->fired);
++	p->timeout = xntimer_get_timeout(timer);
++	p->interval = xntimer_interval(timer);
++	p->status = timer->status;
++	knamecpy(p->name, timer->name);
++
++	return 1;
++}
++
++static int timerlist_show(struct xnvfile_snapshot_iterator *it, void *data)
++{
++	struct vfile_clock_data *p = data;
++	char timeout_buf[]  = "-         ";
++	char interval_buf[] = "-         ";
++	char hit_buf[32];
++
++	if (p == NULL)
++		xnvfile_printf(it,
++			       "%-3s  %-20s  %-10s  %-10s  %s\n",
++			       "CPU", "SCHED/SHOT", "TIMEOUT",
++			       "INTERVAL", "NAME");
++	else {
++		if (p->status & XNTIMER_RUNNING)
++			xntimer_format_time(p->timeout, timeout_buf,
++					    sizeof(timeout_buf));
++		if (p->status & XNTIMER_PERIODIC)
++			xntimer_format_time(p->interval, interval_buf,
++					    sizeof(interval_buf));
++		ksformat(hit_buf, sizeof(hit_buf), "%u/%u",
++			 p->scheduled, p->fired);
++		xnvfile_printf(it,
++			       "%-3u  %-20s  %-10s  %-10s  %s\n",
++			       p->cpu, hit_buf, timeout_buf,
++			       interval_buf, p->name);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops timerlist_ops = {
++	.rewind = timerlist_rewind,
++	.next = timerlist_next,
++	.show = timerlist_show,
++};
++
++static void init_timerlist_proc(struct xnclock *clock)
++{
++	memset(&clock->timer_vfile, 0, sizeof(clock->timer_vfile));
++	clock->timer_vfile.privsz = sizeof(struct vfile_clock_priv);
++	clock->timer_vfile.datasz = sizeof(struct vfile_clock_data);
++	clock->timer_vfile.tag = &clock->timer_revtag;
++	clock->timer_vfile.ops = &timerlist_ops;
++
++	xnvfile_init_snapshot(clock->name, &clock->timer_vfile, &timerlist_vfroot);
++	xnvfile_priv(&clock->timer_vfile) = clock;
++}
++
++static void cleanup_timerlist_proc(struct xnclock *clock)
++{
++	xnvfile_destroy_snapshot(&clock->timer_vfile);
++}
++
++void init_timerlist_root(void)
++{
++	xnvfile_init_dir("timer", &timerlist_vfroot, &cobalt_vfroot);
++}
++
++void cleanup_timerlist_root(void)
++{
++	xnvfile_destroy_dir(&timerlist_vfroot);
++}
++
++#else  /* !CONFIG_XENO_OPT_STATS */
++
++static inline void init_timerlist_root(void) { }
++
++static inline void cleanup_timerlist_root(void) { }
++
++static inline void init_timerlist_proc(struct xnclock *clock) { }
++
++static inline void cleanup_timerlist_proc(struct xnclock *clock) { }
++
++#endif	/* !CONFIG_XENO_OPT_STATS */
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static struct xnvfile_directory clock_vfroot;
++
++void print_core_clock_status(struct xnclock *clock,
++			     struct xnvfile_regular_iterator *it)
++{
++	const char *wd_status = "off";
++
++#ifdef CONFIG_XENO_OPT_WATCHDOG
++	wd_status = "on";
++#endif /* CONFIG_XENO_OPT_WATCHDOG */
++
++	xnvfile_printf(it, "%8s: timer=%s, clock=%s\n",
++		       "devices", ipipe_timer_name(), ipipe_clock_name());
++	xnvfile_printf(it, "%8s: %s\n", "watchdog", wd_status);
++	xnvfile_printf(it, "%8s: %Lu\n", "setup",
++		       xnclock_ticks_to_ns(&nkclock, nktimerlat));
++}
++
++static int clock_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct xnclock *clock = xnvfile_priv(it->vfile);
++	xnticks_t now = xnclock_read_raw(clock);
++
++	if (clock->id >= 0)	/* External clock, print id. */
++		xnvfile_printf(it, "%7s: %d\n", "id", __COBALT_CLOCK_EXT(clock->id));
++		
++	xnvfile_printf(it, "%7s: irq=%Ld kernel=%Ld user=%Ld\n", "gravity",
++		       xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, irq)),
++		       xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, kernel)),
++		       xnclock_ticks_to_ns(clock, xnclock_get_gravity(clock, user)));
++
++	xnclock_print_status(clock, it);
++
++	xnvfile_printf(it, "%7s: %Lu (%.4Lx %.4x)\n", "ticks",
++		       now, now >> 32, (u32)(now & -1U));
++
++	return 0;
++}
++
++static ssize_t clock_store(struct xnvfile_input *input)
++{
++	char buf[128], *args = buf, *p;
++	struct xnclock_gravity gravity;
++	struct xnvfile_regular *vfile;
++	unsigned long ns, ticks;
++	struct xnclock *clock;
++	ssize_t nbytes;
++	int ret;
++
++	nbytes = xnvfile_get_string(input, buf, sizeof(buf));
++	if (nbytes < 0)
++		return nbytes;
++
++	vfile = container_of(input->vfile, struct xnvfile_regular, entry);
++	clock = xnvfile_priv(vfile);
++	gravity = clock->gravity;
++
++	while ((p = strsep(&args, " \t:/,")) != NULL) {
++		if (*p == '\0')
++			continue;
++		ns = simple_strtol(p, &p, 10);
++		ticks = xnclock_ns_to_ticks(clock, ns);
++		switch (*p) {
++		case 'i':
++			gravity.irq = ticks;
++			break;
++		case 'k':
++			gravity.kernel = ticks;
++			break;
++		case 'u':
++		case '\0':
++			gravity.user = ticks;
++			break;
++		default:
++			return -EINVAL;
++		}
++		ret = xnclock_set_gravity(clock, &gravity);
++		if (ret)
++			return ret;
++	}
++
++	return nbytes;
++}
++
++static struct xnvfile_regular_ops clock_ops = {
++	.show = clock_show,
++	.store = clock_store,
++};
++
++static void init_clock_proc(struct xnclock *clock)
++{
++	memset(&clock->vfile, 0, sizeof(clock->vfile));
++	clock->vfile.ops = &clock_ops;
++	xnvfile_init_regular(clock->name, &clock->vfile, &clock_vfroot);
++	xnvfile_priv(&clock->vfile) = clock;
++	init_timerlist_proc(clock);
++}
++
++static void cleanup_clock_proc(struct xnclock *clock)
++{
++	cleanup_timerlist_proc(clock);
++	xnvfile_destroy_regular(&clock->vfile);
++}
++
++void xnclock_init_proc(void)
++{
++	xnvfile_init_dir("clock", &clock_vfroot, &cobalt_vfroot);
++	init_timerlist_root();
++}
++
++void xnclock_cleanup_proc(void)
++{
++	xnvfile_destroy_dir(&clock_vfroot);
++	cleanup_timerlist_root();
++}
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++static inline void init_clock_proc(struct xnclock *clock) { }
++
++static inline void cleanup_clock_proc(struct xnclock *clock) { }
++
++#endif	/* !CONFIG_XENO_OPT_VFILE */
++
++/**
++ * @brief Register a Xenomai clock.
++ *
++ * This service installs a new clock which may be used to drive
++ * Xenomai timers.
++ *
++ * @param clock The new clock to register.
++ *
++ * @param affinity The set of CPUs we may expect the backing clock
++ * device to tick on. As a special case, passing a NULL affinity mask
++ * means that timer IRQs cannot be seen as percpu events, in which
++ * case all outstanding timers will be maintained into a single global
++ * queue instead of percpu timer queues.
++ *
++ * @coretags{secondary-only}
++ */
++int xnclock_register(struct xnclock *clock, const cpumask_t *affinity)
++{
++	struct xntimerdata *tmd;
++	int cpu;
++
++	secondary_mode_only();
++
++#ifdef CONFIG_SMP
++	/*
++	 * A CPU affinity set may be defined for each clock,
++	 * enumerating the CPUs which can receive ticks from the
++	 * backing clock device.  When given, this set must be a
++	 * subset of the real-time CPU set.
++	 */
++	if (affinity) {
++		cpumask_and(&clock->affinity, affinity, &xnsched_realtime_cpus);
++		if (cpumask_empty(&clock->affinity))
++			return -EINVAL;
++	} else	/* Device is global without particular IRQ affinity. */
++		cpumask_clear(&clock->affinity);
++#endif
++
++	/* Allocate the percpu timer queue slot. */
++	clock->timerdata = alloc_percpu(struct xntimerdata);
++	if (clock->timerdata == NULL)
++		return -ENOMEM;
++
++	/*
++	 * POLA: init all timer slots for the new clock, although some
++	 * of them might remain unused depending on the CPU affinity
++	 * of the event source(s). If the clock device is global
++	 * without any particular IRQ affinity, all timers will be
++	 * queued to CPU0.
++	 */
++	for_each_online_cpu(cpu) {
++		tmd = xnclock_percpu_timerdata(clock, cpu);
++		xntimerq_init(&tmd->q);
++	}
++
++#ifdef CONFIG_XENO_OPT_STATS
++	INIT_LIST_HEAD(&clock->timerq);
++#endif /* CONFIG_XENO_OPT_STATS */
++
++	init_clock_proc(clock);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnclock_register);
++
++/**
++ * @fn void xnclock_deregister(struct xnclock *clock)
++ * @brief Deregister a Xenomai clock.
++ *
++ * This service uninstalls a Xenomai clock previously registered with
++ * xnclock_register().
++ *
++ * This service may be called once all timers driven by @a clock have
++ * been stopped.
++ *
++ * @param clock The clock to deregister.
++ *
++ * @coretags{secondary-only}
++ */
++void xnclock_deregister(struct xnclock *clock)
++{
++	struct xntimerdata *tmd;
++	int cpu;
++
++	secondary_mode_only();
++
++	cleanup_clock_proc(clock);
++
++	for_each_online_cpu(cpu) {
++		tmd = xnclock_percpu_timerdata(clock, cpu);
++		XENO_BUG_ON(COBALT, !xntimerq_empty(&tmd->q));
++		xntimerq_destroy(&tmd->q);
++	}
++
++	free_percpu(clock->timerdata);
++}
++EXPORT_SYMBOL_GPL(xnclock_deregister);
++
++/**
++ * @fn void xnclock_tick(struct xnclock *clock)
++ * @brief Process a clock tick.
++ *
++ * This routine processes an incoming @a clock event, firing elapsed
++ * timers as appropriate.
++ *
++ * @param clock The clock for which a new event was received.
++ *
++ * @coretags{coreirq-only, atomic-entry}
++ *
++ * @note The current CPU must be part of the real-time affinity set
++ * unless the clock device has no particular IRQ affinity, otherwise
++ * weird things may happen.
++ */
++void xnclock_tick(struct xnclock *clock)
++{
++	struct xnsched *sched = xnsched_current();
++	struct xntimer *timer;
++	xnsticks_t delta;
++	xntimerq_t *tmq;
++	xnticks_t now;
++	xntimerh_t *h;
++
++	atomic_only();
++
++#ifdef CONFIG_SMP
++	/*
++	 * Some external clock devices may be global without any
++	 * particular IRQ affinity, in which case the associated
++	 * timers will be queued to CPU0.
++	 */
++	if (IS_ENABLED(CONFIG_XENO_OPT_EXTCLOCK) &&
++	    clock != &nkclock &&
++	    !cpumask_test_cpu(xnsched_cpu(sched), &clock->affinity))
++		tmq = &xnclock_percpu_timerdata(clock, 0)->q;
++	else
++#endif
++		tmq = &xnclock_this_timerdata(clock)->q;
++	
++	/*
++	 * Optimisation: any local timer reprogramming triggered by
++	 * invoked timer handlers can wait until we leave the tick
++	 * handler. Use this status flag as hint to xntimer_start().
++	 */
++	sched->status |= XNINTCK;
++
++	now = xnclock_read_raw(clock);
++	while ((h = xntimerq_head(tmq)) != NULL) {
++		timer = container_of(h, struct xntimer, aplink);
++		delta = (xnsticks_t)(xntimerh_date(&timer->aplink) - now);
++		if (delta > 0)
++			break;
++
++		trace_cobalt_timer_expire(timer);
++
++		xntimer_dequeue(timer, tmq);
++		xntimer_account_fired(timer);
++
++		/*
++		 * By postponing the propagation of the low-priority
++		 * host tick to the interrupt epilogue (see
++		 * xnintr_irq_handler()), we save some I-cache, which
++		 * translates into precious microsecs on low-end hw.
++		 */
++		if (unlikely(timer == &sched->htimer)) {
++			sched->lflags |= XNHTICK;
++			sched->lflags &= ~XNHDEFER;
++			if (timer->status & XNTIMER_PERIODIC)
++				goto advance;
++			continue;
++		}
++
++		timer->handler(timer);
++		now = xnclock_read_raw(clock);
++		timer->status |= XNTIMER_FIRED;
++		/*
++		 * Only requeue periodic timers which have not been
++		 * requeued, stopped or killed.
++		 */
++		if ((timer->status &
++		     (XNTIMER_PERIODIC|XNTIMER_DEQUEUED|XNTIMER_KILLED|XNTIMER_RUNNING)) !=
++		    (XNTIMER_PERIODIC|XNTIMER_DEQUEUED|XNTIMER_RUNNING))
++			continue;
++	advance:
++		do {
++			timer->periodic_ticks++;
++			xntimer_update_date(timer);
++		} while (xntimerh_date(&timer->aplink) < now);
++
++#ifdef CONFIG_SMP
++		/*
++		 * If the timer was migrated over its timeout handler,
++		 * xntimer_migrate() re-queued it already.
++		 */
++		if (unlikely(timer->sched != sched))
++			continue;
++#endif
++		xntimer_enqueue(timer, tmq);
++	}
++
++	sched->status &= ~XNINTCK;
++
++	xnclock_program_shot(clock, sched);
++}
++EXPORT_SYMBOL_GPL(xnclock_tick);
++
++void xnclock_update_freq(unsigned long long freq)
++{
++	spl_t s;
++
++	xnlock_get_irqsave(&nklock, s);
++	clockfreq = freq;
++#ifdef XNARCH_HAVE_LLMULSHFT
++	xnarch_init_llmulshft(1000000000, freq, &tsc_scale, &tsc_shift);
++#ifdef XNARCH_HAVE_NODIV_LLIMD
++	xnarch_init_u32frac(&tsc_frac, 1 << tsc_shift, tsc_scale);
++	xnarch_init_u32frac(&bln_frac, 1, 1000000000);
++#endif
++#endif
++	cobalt_pipeline.clock_freq = freq;
++	xnlock_put_irqrestore(&nklock, s);
++}
++
++static int set_core_clock_gravity(struct xnclock *clock,
++				  const struct xnclock_gravity *p)
++{
++	nkclock.gravity = *p;
++
++	return 0;
++}
++
++static void reset_core_clock_gravity(struct xnclock *clock)
++{
++	struct xnclock_gravity gravity;
++
++	xnarch_get_latencies(&gravity);
++	gravity.user += nktimerlat;
++	if (gravity.kernel == 0)
++		gravity.kernel = gravity.user;
++	if (gravity.irq == 0)
++		gravity.irq = nktimerlat;
++	set_core_clock_gravity(clock, &gravity);
++}
++
++struct xnclock nkclock = {
++	.name = "coreclk",
++	.resolution = 1,	/* nanosecond. */
++	.ops = {
++		.set_gravity = set_core_clock_gravity,
++		.reset_gravity = reset_core_clock_gravity,
++#ifdef CONFIG_XENO_OPT_VFILE
++		.print_status = print_core_clock_status,
++#endif
++	},
++	.id = -1,
++};
++EXPORT_SYMBOL_GPL(nkclock);
++
++void xnclock_cleanup(void)
++{
++	xnclock_deregister(&nkclock);
++}
++
++int __init xnclock_init(unsigned long long freq)
++{
++	xnclock_update_freq(freq);
++	nktimerlat = xnarch_timer_calibrate();
++	xnclock_reset_gravity(&nkclock);
++	xnclock_register(&nkclock, &xnsched_realtime_cpus);
++
++	return 0;
++}
++
++/** @} */
+--- linux/kernel/xenomai/sched-rt.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/sched-rt.c	2021-04-07 16:01:25.770636248 +0800
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (C) 2008 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/sched.h>
++
++static void xnsched_rt_init(struct xnsched *sched)
++{
++	xnsched_initq(&sched->rt.runnable);
++}
++
++static void xnsched_rt_requeue(struct xnthread *thread)
++{
++	/*
++	 * Put back at same place: i.e. requeue to head of current
++	 * priority group (i.e. LIFO, used for preemption handling).
++	 */
++	__xnsched_rt_requeue(thread);
++}
++
++static void xnsched_rt_enqueue(struct xnthread *thread)
++{
++	/*
++	 * Enqueue for next pick: i.e. move to end of current priority
++	 * group (i.e. FIFO).
++	 */
++	__xnsched_rt_enqueue(thread);
++}
++
++static void xnsched_rt_dequeue(struct xnthread *thread)
++{
++	/*
++	 * Pull from the runnable thread queue.
++	 */
++	__xnsched_rt_dequeue(thread);
++}
++
++static void xnsched_rt_rotate(struct xnsched *sched,
++			      const union xnsched_policy_param *p)
++{
++	struct xnthread *thread, *curr;
++
++	if (xnsched_emptyq_p(&sched->rt.runnable))
++		return;	/* No runnable thread in this class. */
++
++	curr = sched->curr;
++
++	if (p->rt.prio == XNSCHED_RUNPRIO)
++		thread = curr;
++	else {
++		thread = xnsched_findq(&sched->rt.runnable, p->rt.prio);
++		if (thread == NULL)
++			return;
++	}
++
++	/*
++	 * In case we picked the current thread, we have to make sure
++	 * not to move it back to the run queue if it was blocked
++	 * before we were called. The same goes if the current thread
++	 * holds the scheduler lock.
++	 */
++	if (thread != curr ||
++	    (!xnthread_test_state(curr, XNTHREAD_BLOCK_BITS) &&
++	     curr->lock_count == 0))
++		xnsched_putback(thread);
++}
++
++void xnsched_rt_tick(struct xnsched *sched)
++{
++	/*
++	 * The round-robin time credit is only consumed by a running
++	 * thread that neither holds the scheduler lock nor was
++	 * blocked before entering this callback. As the time slice is
++	 * exhausted for the running thread, move it back to the
++	 * run queue at the end of its priority group.
++	 */
++	xnsched_putback(sched->curr);
++}
++
++static bool xnsched_rt_setparam(struct xnthread *thread,
++				const union xnsched_policy_param *p)
++{
++	return __xnsched_rt_setparam(thread, p);
++}
++
++static void xnsched_rt_getparam(struct xnthread *thread,
++				union xnsched_policy_param *p)
++{
++	__xnsched_rt_getparam(thread, p);
++}
++
++static void xnsched_rt_trackprio(struct xnthread *thread,
++				 const union xnsched_policy_param *p)
++{
++	__xnsched_rt_trackprio(thread, p);
++}
++
++static void xnsched_rt_protectprio(struct xnthread *thread, int prio)
++{
++	__xnsched_rt_protectprio(thread, prio);
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++struct xnvfile_directory sched_rt_vfroot;
++
++struct vfile_sched_rt_priv {
++	struct xnthread *curr;
++};
++
++struct vfile_sched_rt_data {
++	int cpu;
++	pid_t pid;
++	char name[XNOBJECT_NAME_LEN];
++	xnticks_t period;
++	int cprio;
++};
++
++static struct xnvfile_snapshot_ops vfile_sched_rt_ops;
++
++static struct xnvfile_snapshot vfile_sched_rt = {
++	.privsz = sizeof(struct vfile_sched_rt_priv),
++	.datasz = sizeof(struct vfile_sched_rt_data),
++	.tag = &nkthreadlist_tag,
++	.ops = &vfile_sched_rt_ops,
++};
++
++static int vfile_sched_rt_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_sched_rt_priv *priv = xnvfile_iterator_priv(it);
++	int nrthreads = xnsched_class_rt.nthreads;
++
++	if (nrthreads == 0)
++		return -ESRCH;
++
++	priv->curr = list_first_entry(&nkthreadq, struct xnthread, glink);
++
++	return nrthreads;
++}
++
++static int vfile_sched_rt_next(struct xnvfile_snapshot_iterator *it,
++			       void *data)
++{
++	struct vfile_sched_rt_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_sched_rt_data *p = data;
++	struct xnthread *thread;
++
++	if (priv->curr == NULL)
++		return 0;	/* All done. */
++
++	thread = priv->curr;
++	if (list_is_last(&thread->glink, &nkthreadq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_next_entry(thread, glink);
++
++	if (thread->base_class != &xnsched_class_rt ||
++	    xnthread_test_state(thread, XNWEAK))
++		return VFILE_SEQ_SKIP;
++
++	p->cpu = xnsched_cpu(thread->sched);
++	p->pid = xnthread_host_pid(thread);
++	memcpy(p->name, thread->name, sizeof(p->name));
++	p->cprio = thread->cprio;
++	p->period = xnthread_get_period(thread);
++
++	return 1;
++}
++
++static int vfile_sched_rt_show(struct xnvfile_snapshot_iterator *it,
++			       void *data)
++{
++	struct vfile_sched_rt_data *p = data;
++	char pribuf[16], ptbuf[16];
++
++	if (p == NULL)
++		xnvfile_printf(it, "%-3s  %-6s %-8s %-10s %s\n",
++			       "CPU", "PID", "PRI", "PERIOD", "NAME");
++	else {
++		ksformat(pribuf, sizeof(pribuf), "%3d", p->cprio);
++		xntimer_format_time(p->period, ptbuf, sizeof(ptbuf));
++		xnvfile_printf(it, "%3u  %-6d %-8s %-10s %s\n",
++			       p->cpu,
++			       p->pid,
++			       pribuf,
++			       ptbuf,
++			       p->name);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_sched_rt_ops = {
++	.rewind = vfile_sched_rt_rewind,
++	.next = vfile_sched_rt_next,
++	.show = vfile_sched_rt_show,
++};
++
++static int xnsched_rt_init_vfile(struct xnsched_class *schedclass,
++				 struct xnvfile_directory *vfroot)
++{
++	int ret;
++
++	ret = xnvfile_init_dir(schedclass->name, &sched_rt_vfroot, vfroot);
++	if (ret)
++		return ret;
++
++	return xnvfile_init_snapshot("threads", &vfile_sched_rt,
++				     &sched_rt_vfroot);
++}
++
++static void xnsched_rt_cleanup_vfile(struct xnsched_class *schedclass)
++{
++	xnvfile_destroy_snapshot(&vfile_sched_rt);
++	xnvfile_destroy_dir(&sched_rt_vfroot);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct xnsched_class xnsched_class_rt = {
++	.sched_init		=	xnsched_rt_init,
++	.sched_enqueue		=	xnsched_rt_enqueue,
++	.sched_dequeue		=	xnsched_rt_dequeue,
++	.sched_requeue		=	xnsched_rt_requeue,
++	.sched_pick		=	xnsched_rt_pick,
++	.sched_tick		=	xnsched_rt_tick,
++	.sched_rotate		=	xnsched_rt_rotate,
++	.sched_forget		=	NULL,
++	.sched_kick		=	NULL,
++	.sched_declare		=	NULL,
++	.sched_setparam		=	xnsched_rt_setparam,
++	.sched_trackprio	=	xnsched_rt_trackprio,
++	.sched_protectprio	=	xnsched_rt_protectprio,
++	.sched_getparam		=	xnsched_rt_getparam,
++#ifdef CONFIG_XENO_OPT_VFILE
++	.sched_init_vfile	=	xnsched_rt_init_vfile,
++	.sched_cleanup_vfile	=	xnsched_rt_cleanup_vfile,
++#endif
++	.weight			=	XNSCHED_CLASS_WEIGHT(4),
++	.policy			=	SCHED_FIFO,
++	.name			=	"rt"
++};
++EXPORT_SYMBOL_GPL(xnsched_class_rt);
+--- linux/kernel/xenomai/tree.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/tree.c	2021-04-07 16:01:25.765636255 +0800
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (C) 2014 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <cobalt/kernel/tree.h>
++
++void xntree_cleanup(struct rb_root *t, void *cookie,
++		void (*destroy)(void *cookie, struct xnid *id))
++{
++	struct rb_node *node, *next;
++
++	node = rb_first(t);
++	while (node) {
++		next = rb_next(node);
++
++		/* destroy is expected to remove the node from the rbtree */
++		destroy(cookie, container_of(node, struct xnid, link));
++
++		node = next;
++	}
++}
++
++int xnid_enter(struct rb_root *t, struct xnid *xnid, xnkey_t key)
++{
++	struct rb_node **new = &t->rb_node, *parent = NULL;
++
++	while (*new) {
++		struct xnid *i = container_of(*new, struct xnid, link);
++
++		parent = *new;
++		if (key < i->key)
++			new = &((*new)->rb_left);
++		else if (key > i->key)
++			new = &((*new)->rb_right);
++		else
++			return -EEXIST;
++	}
++
++	xnid->key = key;
++	rb_link_node(&xnid->link, parent, new);
++	rb_insert_color(&xnid->link, t);
++
++	return 0;
++}
+--- linux/kernel/xenomai/vfile.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/vfile.c	2021-04-07 16:01:25.760636262 +0800
+@@ -0,0 +1,980 @@
++/*
++ * Copyright (C) 2010 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <stdarg.h>
++#include <linux/ctype.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/vfile.h>
++#include <asm/xenomai/wrappers.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_vfile Virtual file services
++ *
++ * Virtual files provide a mean to export Xenomai object states to
++ * user-space, based on common kernel interfaces.  This encapsulation
++ * is aimed at:
++ *
++ * - supporting consistent collection of very large record-based
++ * output, without encurring latency peaks for undergoing real-time
++ * activities.
++ *
++ * - in the future, hiding discrepancies between linux kernel
++ * releases, regarding the proper way to export kernel object states
++ * to userland, either via the /proc interface or by any other mean.
++ *
++ * This virtual file implementation offers record-based read support
++ * based on seq_files, single-buffer write support, directory and link
++ * handling, all visible from the /proc namespace.
++ *
++ * The vfile support exposes four filesystem object types:
++ *
++ * - snapshot-driven file (struct xnvfile_snapshot). This is commonly
++ * used to export real-time object states via the /proc filesystem. To
++ * minimize the latency involved in protecting the vfile routines from
++ * changes applied by real-time code on such objects, a snapshot of
++ * the data to output is first taken under proper locking, before the
++ * collected data is formatted and sent out in a lockless manner.
++ *
++ * Because a large number of records may have to be output, the data
++ * collection phase is not strictly atomic as a whole, but only
++ * protected at record level. The vfile implementation can be notified
++ * of updates to the underlying data set, and restart the collection
++ * from scratch until the snapshot is fully consistent.
++ *
++ * - regular sequential file (struct xnvfile_regular). This is
++ * basically an encapsulated sequential file object as available from
++ * the host kernel (i.e. seq_file), with a few additional features to
++ * make it more handy in a Xenomai environment, like implicit locking
++ * support and shortened declaration for simplest, single-record
++ * output.
++ *
++ * - virtual link (struct xnvfile_link). This is a symbolic link
++ * feature integrated with the vfile semantics. The link target is
++ * computed dynamically at creation time from a user-given helper
++ * routine.
++ *
++ * - virtual directory (struct xnvfile_directory). A directory object,
++ * which can be used to create a hierarchy for ordering a set of vfile
++ * objects.
++ *
++ *@{*/
++
++/**
++ * @var struct xnvfile_directory cobalt_vfroot
++ * @brief Xenomai vfile root directory
++ *
++ * This vdir maps the /proc/xenomai directory. It can be used to
++ * create a hierarchy of Xenomai-related vfiles under this root.
++ */
++struct xnvfile_directory cobalt_vfroot;
++EXPORT_SYMBOL_GPL(cobalt_vfroot);
++
++static struct xnvfile_directory sysroot;
++
++static void *vfile_snapshot_start(struct seq_file *seq, loff_t *offp)
++{
++	struct xnvfile_snapshot_iterator *it = seq->private;
++	loff_t pos = *offp;
++
++	if (pos > it->nrdata)
++		return NULL;
++
++	if (pos == 0)
++		return SEQ_START_TOKEN;
++
++	return it->databuf + (pos - 1) * it->vfile->datasz;
++}
++
++static void *vfile_snapshot_next(struct seq_file *seq, void *v, loff_t *offp)
++{
++	struct xnvfile_snapshot_iterator *it = seq->private;
++	loff_t pos = *offp;
++
++	if (pos >= it->nrdata)
++		return NULL;
++
++	++*offp;
++
++	return it->databuf + pos * it->vfile->datasz;
++}
++
++static void vfile_snapshot_stop(struct seq_file *seq, void *v)
++{
++}
++
++static int vfile_snapshot_show(struct seq_file *seq, void *v)
++{
++	struct xnvfile_snapshot_iterator *it = seq->private;
++	void *data = v == SEQ_START_TOKEN ? NULL : v;
++	int ret;
++
++	ret = it->vfile->ops->show(it, data);
++
++	return ret == VFILE_SEQ_SKIP ? SEQ_SKIP : ret;
++}
++
++static struct seq_operations vfile_snapshot_ops = {
++	.start = vfile_snapshot_start,
++	.next = vfile_snapshot_next,
++	.stop = vfile_snapshot_stop,
++	.show = vfile_snapshot_show
++};
++
++static void vfile_snapshot_free(struct xnvfile_snapshot_iterator *it, void *buf)
++{
++	kfree(buf);
++}
++
++static int vfile_snapshot_open(struct inode *inode, struct file *file)
++{
++	struct xnvfile_snapshot *vfile = PDE_DATA(inode);
++	struct xnvfile_snapshot_ops *ops = vfile->ops;
++	struct xnvfile_snapshot_iterator *it;
++	int revtag, ret, nrdata;
++	struct seq_file *seq;
++	caddr_t data;
++
++	if ((file->f_mode & FMODE_WRITE) != 0 && ops->store == NULL)
++		return -EACCES;
++
++	/*
++	 * Make sure to create the seq_file backend only when reading
++	 * from the v-file is possible.
++	 */
++	if ((file->f_mode & FMODE_READ) == 0) {
++		file->private_data = NULL;
++		return 0;
++	}
++
++	if ((file->f_flags & O_EXCL) != 0 && xnvfile_nref(vfile) > 0)
++		return -EBUSY;
++
++	it = kzalloc(sizeof(*it) + vfile->privsz, GFP_KERNEL);
++	if (it == NULL)
++		return -ENOMEM;
++
++	it->vfile = vfile;
++	xnvfile_file(vfile) = file;
++
++	ret = vfile->entry.lockops->get(&vfile->entry);
++	if (ret)
++		goto fail;
++redo:
++	/*
++	 * The ->rewind() method is optional; there may be cases where
++	 * we don't have to take an atomic snapshot of the v-file
++	 * contents before proceeding. In case ->rewind() detects a
++	 * stale backend object, it can force us to bail out.
++	 *
++	 * If present, ->rewind() may return a strictly positive
++	 * value, indicating how many records at most may be returned
++	 * by ->next(). We use this hint to allocate the snapshot
++	 * buffer, in case ->begin() is not provided. The size of this
++	 * buffer would then be vfile->datasz * hint value.
++	 *
++	 * If ->begin() is given, we always expect the latter do the
++	 * allocation for us regardless of the hint value. Otherwise,
++	 * a NULL return from ->rewind() tells us that the vfile won't
++	 * output any snapshot data via ->show().
++	 */
++	nrdata = 0;
++	if (ops->rewind) {
++		nrdata = ops->rewind(it);
++		if (nrdata < 0) {
++			ret = nrdata;
++			vfile->entry.lockops->put(&vfile->entry);
++			goto fail;
++		}
++	}
++	revtag = vfile->tag->rev;
++
++	vfile->entry.lockops->put(&vfile->entry);
++
++	/* Release the data buffer, in case we had to restart. */
++	if (it->databuf) {
++		it->endfn(it, it->databuf);
++		it->databuf = NULL;
++	}
++
++	/*
++	 * Having no record to output is fine, in which case ->begin()
++	 * shall return VFILE_SEQ_EMPTY if present. ->begin() may be
++	 * absent, meaning that no allocation is even required to
++	 * collect the records to output. NULL is kept for allocation
++	 * errors in all other cases.
++	 */
++	if (ops->begin) {
++		XENO_BUG_ON(COBALT, ops->end == NULL);
++		data = ops->begin(it);
++		if (data == NULL) {
++			kfree(it);
++			return -ENOMEM;
++		}
++		if (data != VFILE_SEQ_EMPTY) {
++			it->databuf = data;
++			it->endfn = ops->end;
++		}
++	} else if (nrdata > 0 && vfile->datasz > 0) {
++		/* We have a hint for auto-allocation. */
++		data = kmalloc(vfile->datasz * nrdata, GFP_KERNEL);
++		if (data == NULL) {
++			kfree(it);
++			return -ENOMEM;
++		}
++		it->databuf = data;
++		it->endfn = vfile_snapshot_free;
++	}
++
++	ret = seq_open(file, &vfile_snapshot_ops);
++	if (ret)
++		goto fail;
++
++	it->nrdata = 0;
++	data = it->databuf;
++	if (data == NULL)
++		goto finish;
++
++	/*
++	 * Take a snapshot of the vfile contents, redo if the revision
++	 * tag of the scanned data set changed concurrently.
++	 */
++	for (;;) {
++		ret = vfile->entry.lockops->get(&vfile->entry);
++		if (ret)
++			break;
++		if (vfile->tag->rev != revtag)
++			goto redo;
++		ret = ops->next(it, data);
++		vfile->entry.lockops->put(&vfile->entry);
++		if (ret <= 0)
++			break;
++		if (ret != VFILE_SEQ_SKIP) {
++			data += vfile->datasz;
++			it->nrdata++;
++		}
++	}
++
++	if (ret < 0) {
++		seq_release(inode, file);
++	fail:
++		if (it->databuf)
++			it->endfn(it, it->databuf);
++		kfree(it);
++		return ret;
++	}
++
++finish:
++	seq = file->private_data;
++	it->seq = seq;
++	seq->private = it;
++	xnvfile_nref(vfile)++;
++
++	return 0;
++}
++
++static int vfile_snapshot_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct xnvfile_snapshot_iterator *it;
++
++	if (seq) {
++		it = seq->private;
++		if (it) {
++			--xnvfile_nref(it->vfile);
++			XENO_BUG_ON(COBALT, it->vfile->entry.refcnt < 0);
++			if (it->databuf)
++				it->endfn(it, it->databuf);
++			kfree(it);
++		}
++
++		return seq_release(inode, file);
++	}
++
++	return 0;
++}
++
++ssize_t vfile_snapshot_write(struct file *file, const char __user *buf,
++			     size_t size, loff_t *ppos)
++{
++	struct xnvfile_snapshot *vfile =
++		PDE_DATA(file->f_path.dentry->d_inode);
++	struct xnvfile_input input;
++	ssize_t ret;
++
++	if (vfile->entry.lockops) {
++		ret = vfile->entry.lockops->get(&vfile->entry);
++		if (ret)
++			return ret;
++	}
++
++	input.u_buf = buf;
++	input.size = size;
++	input.vfile = &vfile->entry;
++
++	ret = vfile->ops->store(&input);
++
++	if (vfile->entry.lockops)
++		vfile->entry.lockops->put(&vfile->entry);
++
++	return ret;
++}
++
++static struct file_operations vfile_snapshot_fops = {
++	.open = vfile_snapshot_open,
++	.read = seq_read,
++	.write = vfile_snapshot_write,
++	.llseek = seq_lseek,
++	.release = vfile_snapshot_release,
++};
++
++/**
++ * @fn int xnvfile_init_snapshot(const char *name, struct xnvfile_snapshot *vfile, struct xnvfile_directory *parent)
++ * @brief Initialize a snapshot-driven vfile.
++ *
++ * @param name The name which should appear in the pseudo-filesystem,
++ * identifying the vfile entry.
++ *
++ * @param vfile A pointer to a vfile descriptor to initialize
++ * from. The following fields in this structure should be filled in
++ * prior to call this routine:
++ *
++ * - .privsz is the size (in bytes) of the private data area to be
++ * reserved in the @ref snapshot_iterator "vfile iterator". A NULL
++ * value indicates that no private area should be reserved.
++ *
++ * - .datasz is the size (in bytes) of a single record to be collected
++ * by the @ref snapshot_next "next() handler" from the @ref
++ * snapshot_ops "operation descriptor".
++ *
++ * - .tag is a pointer to a mandatory vfile revision tag structure
++ * (struct xnvfile_rev_tag). This tag will be monitored for changes by
++ * the vfile core while collecting data to output, so that any update
++ * detected will cause the current snapshot data to be dropped, and
++ * the collection to restart from the beginning. To this end, any
++ * change to the data which may be part of the collected records,
++ * should also invoke xnvfile_touch() on the associated tag.
++ *
++ * - entry.lockops is a pointer to a @ref vfile_lockops "lock descriptor",
++ * defining the lock and unlock operations for the vfile. This pointer
++ * may be left to NULL, in which case the operations on the nucleus
++ * lock (i.e. nklock) will be used internally around calls to data
++ * collection handlers (see @ref snapshot_ops "operation descriptor").
++ *
++ * - .ops is a pointer to an @ref snapshot_ops "operation descriptor".
++ *
++ * @param parent A pointer to a virtual directory descriptor; the
++ * vfile entry will be created into this directory. If NULL, the /proc
++ * root directory will be used. /proc/xenomai is mapped on the
++ * globally available @a cobalt_vfroot vdir.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ENOMEM is returned if the virtual file entry cannot be created
++ * in the /proc hierarchy.
++ *
++ * @coretags{secondary-only}
++ */
++int xnvfile_init_snapshot(const char *name,
++			  struct xnvfile_snapshot *vfile,
++			  struct xnvfile_directory *parent)
++{
++	struct proc_dir_entry *ppde, *pde;
++	int mode;
++
++	XENO_BUG_ON(COBALT, vfile->tag == NULL);
++
++	if (vfile->entry.lockops == NULL)
++		/* Defaults to nucleus lock */
++		vfile->entry.lockops = &xnvfile_nucleus_lock.ops;
++
++	if (parent == NULL)
++		parent = &sysroot;
++
++	mode = vfile->ops->store ? 0644 : 0444;
++	ppde = parent->entry.pde;
++	pde = proc_create_data(name, mode, ppde, &vfile_snapshot_fops, vfile);
++	if (pde == NULL)
++		return -ENOMEM;
++
++	vfile->entry.pde = pde;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnvfile_init_snapshot);
++
++static void *vfile_regular_start(struct seq_file *seq, loff_t *offp)
++{
++	struct xnvfile_regular_iterator *it = seq->private;
++	struct xnvfile_regular *vfile = it->vfile;
++	int ret;
++
++	it->pos = *offp;
++
++	if (vfile->entry.lockops) {
++		ret = vfile->entry.lockops->get(&vfile->entry);
++		if (ret)
++			return ERR_PTR(ret);
++	}
++
++	/*
++	 * If we have no begin() op, then we allow a single call only
++	 * to ->show(), by returning the start token once. Otherwise,
++	 * we are done.
++	 */
++	if (vfile->ops->begin == NULL)
++		return it->pos > 0 ? NULL : SEQ_START_TOKEN;
++
++	return vfile->ops->begin(it);
++}
++
++static void *vfile_regular_next(struct seq_file *seq, void *v, loff_t *offp)
++{
++	struct xnvfile_regular_iterator *it = seq->private;
++	struct xnvfile_regular *vfile = it->vfile;
++	void *data;
++
++	if (vfile->ops->next == NULL)
++		return NULL;
++
++	it->pos = *offp + 1;
++
++	data = vfile->ops->next(it);
++	if (data == NULL)
++		return NULL;
++
++	*offp = it->pos;
++
++	return data;
++}
++
++static void vfile_regular_stop(struct seq_file *seq, void *v)
++{
++	struct xnvfile_regular_iterator *it = seq->private;
++	struct xnvfile_regular *vfile = it->vfile;
++
++	if (vfile->entry.lockops)
++		vfile->entry.lockops->put(&vfile->entry);
++
++	if (vfile->ops->end)
++		vfile->ops->end(it);
++}
++
++static int vfile_regular_show(struct seq_file *seq, void *v)
++{
++	struct xnvfile_regular_iterator *it = seq->private;
++	struct xnvfile_regular *vfile = it->vfile;
++	void *data = v == SEQ_START_TOKEN ? NULL : v;
++	int ret;
++
++	ret = vfile->ops->show(it, data);
++
++	return ret == VFILE_SEQ_SKIP ? SEQ_SKIP : ret;
++}
++
++static struct seq_operations vfile_regular_ops = {
++	.start = vfile_regular_start,
++	.next = vfile_regular_next,
++	.stop = vfile_regular_stop,
++	.show = vfile_regular_show
++};
++
++static int vfile_regular_open(struct inode *inode, struct file *file)
++{
++	struct xnvfile_regular *vfile = PDE_DATA(inode);
++	struct xnvfile_regular_ops *ops = vfile->ops;
++	struct xnvfile_regular_iterator *it;
++	struct seq_file *seq;
++	int ret;
++
++	if ((file->f_flags & O_EXCL) != 0 && xnvfile_nref(vfile) > 0)
++		return -EBUSY;
++
++	if ((file->f_mode & FMODE_WRITE) != 0 && ops->store == NULL)
++		return -EACCES;
++
++	if ((file->f_mode & FMODE_READ) == 0) {
++		file->private_data = NULL;
++		return 0;
++	}
++
++	it = kzalloc(sizeof(*it) + vfile->privsz, GFP_KERNEL);
++	if (it == NULL)
++		return -ENOMEM;
++
++	it->vfile = vfile;
++	it->pos = -1;
++	xnvfile_file(vfile) = file;
++
++	if (ops->rewind) {
++		ret = ops->rewind(it);
++		if (ret) {
++		fail:
++			kfree(it);
++			return ret;
++		}
++	}
++
++	ret = seq_open(file, &vfile_regular_ops);
++	if (ret)
++		goto fail;
++
++	seq = file->private_data;
++	it->seq = seq;
++	seq->private = it;
++	xnvfile_nref(vfile)++;
++
++	return 0;
++}
++
++static int vfile_regular_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct xnvfile_regular_iterator *it;
++
++	if (seq) {
++		it = seq->private;
++		if (it) {
++			--xnvfile_nref(it->vfile);
++			XENO_BUG_ON(COBALT, xnvfile_nref(it->vfile) < 0);
++			kfree(it);
++		}
++
++		return seq_release(inode, file);
++	}
++
++	return 0;
++}
++
++ssize_t vfile_regular_write(struct file *file, const char __user *buf,
++			    size_t size, loff_t *ppos)
++{
++	struct xnvfile_regular *vfile =
++		PDE_DATA(file->f_path.dentry->d_inode);
++	struct xnvfile_input input;
++	ssize_t ret;
++
++	if (vfile->entry.lockops) {
++		ret = vfile->entry.lockops->get(&vfile->entry);
++		if (ret)
++			return ret;
++	}
++
++	input.u_buf = buf;
++	input.size = size;
++	input.vfile = &vfile->entry;
++
++	ret = vfile->ops->store(&input);
++
++	if (vfile->entry.lockops)
++		vfile->entry.lockops->put(&vfile->entry);
++
++	return ret;
++}
++
++static struct file_operations vfile_regular_fops = {
++	.open = vfile_regular_open,
++	.read = seq_read,
++	.write = vfile_regular_write,
++	.llseek = seq_lseek,
++	.release = vfile_regular_release,
++};
++
++/**
++ * @fn int xnvfile_init_regular(const char *name, struct xnvfile_regular *vfile, struct xnvfile_directory *parent)
++ * @brief Initialize a regular vfile.
++ *
++ * @param name The name which should appear in the pseudo-filesystem,
++ * identifying the vfile entry.
++ *
++ * @param vfile A pointer to a vfile descriptor to initialize
++ * from. The following fields in this structure should be filled in
++ * prior to call this routine:
++ *
++ * - .privsz is the size (in bytes) of the private data area to be
++ * reserved in the @ref regular_iterator "vfile iterator". A NULL
++ * value indicates that no private area should be reserved.
++ *
++ * - entry.lockops is a pointer to a @ref vfile_lockops "locking
++ * descriptor", defining the lock and unlock operations for the
++ * vfile. This pointer may be left to NULL, in which case no
++ * locking will be applied.
++ *
++ * - .ops is a pointer to an @ref regular_ops "operation descriptor".
++ *
++ * @param parent A pointer to a virtual directory descriptor; the
++ * vfile entry will be created into this directory. If NULL, the /proc
++ * root directory will be used. /proc/xenomai is mapped on the
++ * globally available @a cobalt_vfroot vdir.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ENOMEM is returned if the virtual file entry cannot be created
++ * in the /proc hierarchy.
++ *
++ * @coretags{secondary-only}
++ */
++int xnvfile_init_regular(const char *name,
++			 struct xnvfile_regular *vfile,
++			 struct xnvfile_directory *parent)
++{
++	struct proc_dir_entry *ppde, *pde;
++	int mode;
++
++	if (parent == NULL)
++		parent = &sysroot;
++
++	mode = vfile->ops->store ? 0644 : 0444;
++	ppde = parent->entry.pde;
++	pde = proc_create_data(name, mode, ppde, &vfile_regular_fops, vfile);
++	if (pde == NULL)
++		return -ENOMEM;
++
++	vfile->entry.pde = pde;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnvfile_init_regular);
++
++/**
++ * @fn int xnvfile_init_dir(const char *name, struct xnvfile_directory *vdir, struct xnvfile_directory *parent)
++ * @brief Initialize a virtual directory entry.
++ *
++ * @param name The name which should appear in the pseudo-filesystem,
++ * identifying the vdir entry.
++ *
++ * @param vdir A pointer to the virtual directory descriptor to
++ * initialize.
++ *
++ * @param parent A pointer to a virtual directory descriptor standing
++ * for the parent directory of the new vdir.  If NULL, the /proc root
++ * directory will be used. /proc/xenomai is mapped on the globally
++ * available @a cobalt_vfroot vdir.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ENOMEM is returned if the virtual directory entry cannot be
++ * created in the /proc hierarchy.
++ *
++ * @coretags{secondary-only}
++ */
++int xnvfile_init_dir(const char *name,
++		     struct xnvfile_directory *vdir,
++		     struct xnvfile_directory *parent)
++{
++	struct proc_dir_entry *ppde, *pde;
++
++	if (parent == NULL)
++		parent = &sysroot;
++
++	ppde = parent->entry.pde;
++	pde = proc_mkdir(name, ppde);
++	if (pde == NULL)
++		return -ENOMEM;
++
++	vdir->entry.pde = pde;
++	vdir->entry.lockops = NULL;
++	vdir->entry.private = NULL;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnvfile_init_dir);
++
++/**
++ * @fn int xnvfile_init_link(const char *from, const char *to, struct xnvfile_link *vlink, struct xnvfile_directory *parent)
++ * @brief Initialize a virtual link entry.
++ *
++ * @param from The name which should appear in the pseudo-filesystem,
++ * identifying the vlink entry.
++ *
++ * @param to The target file name which should be referred to
++ * symbolically by @a name.
++ *
++ * @param vlink A pointer to the virtual link descriptor to
++ * initialize.
++ *
++ * @param parent A pointer to a virtual directory descriptor standing
++ * for the parent directory of the new vlink. If NULL, the /proc root
++ * directory will be used. /proc/xenomai is mapped on the globally
++ * available @a cobalt_vfroot vdir.
++ *
++ * @return 0 is returned on success. Otherwise:
++ *
++ * - -ENOMEM is returned if the virtual link entry cannot be created
++ * in the /proc hierarchy.
++ *
++ * @coretags{secondary-only}
++ */
++int xnvfile_init_link(const char *from,
++		      const char *to,
++		      struct xnvfile_link *vlink,
++		      struct xnvfile_directory *parent)
++{
++	struct proc_dir_entry *ppde, *pde;
++
++	if (parent == NULL)
++		parent = &sysroot;
++
++	ppde = parent->entry.pde;
++	pde = proc_symlink(from, ppde, to);
++	if (pde == NULL)
++		return -ENOMEM;
++
++	vlink->entry.pde = pde;
++	vlink->entry.lockops = NULL;
++	vlink->entry.private = NULL;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnvfile_init_link);
++
++/**
++ * @fn void xnvfile_destroy(struct xnvfile *vfile)
++ * @brief Removes a virtual file entry.
++ *
++ * @param vfile A pointer to the virtual file descriptor to
++ * remove.
++ *
++ * @coretags{secondary-only}
++ */
++void xnvfile_destroy(struct xnvfile *vfile)
++{
++	proc_remove(vfile->pde);
++}
++EXPORT_SYMBOL_GPL(xnvfile_destroy);
++
++/**
++ * @fn ssize_t xnvfile_get_blob(struct xnvfile_input *input, void *data, size_t size)
++ * @brief Read in a data bulk written to the vfile.
++ *
++ * When writing to a vfile, the associated store() handler from the
++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store
++ * "regular vfile" is called, with a single argument describing the
++ * input data. xnvfile_get_blob() retrieves this data as an untyped
++ * binary blob, and copies it back to the caller's buffer.
++ *
++ * @param input A pointer to the input descriptor passed to the
++ * store() handler.
++ *
++ * @param data The address of the destination buffer to copy the input
++ * data to.
++ *
++ * @param size The maximum number of bytes to copy to the destination
++ * buffer. If @a size is larger than the actual data size, the input
++ * is truncated to @a size.
++ *
++ * @return The number of bytes read and copied to the destination
++ * buffer upon success. Otherwise, a negative error code is returned:
++ *
++ * - -EFAULT indicates an invalid source buffer address.
++ *
++ * @coretags{secondary-only}
++ */
++ssize_t xnvfile_get_blob(struct xnvfile_input *input,
++			 void *data, size_t size)
++{
++	ssize_t nbytes = input->size;
++
++	if (nbytes > size)
++		nbytes = size;
++
++	if (nbytes > 0 && copy_from_user(data, input->u_buf, nbytes))
++		return -EFAULT;
++
++	return nbytes;
++}
++EXPORT_SYMBOL_GPL(xnvfile_get_blob);
++
++/**
++ * @fn ssize_t xnvfile_get_string(struct xnvfile_input *input, char *s, size_t maxlen)
++ * @brief Read in a C-string written to the vfile.
++ *
++ * When writing to a vfile, the associated store() handler from the
++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store
++ * "regular vfile" is called, with a single argument describing the
++ * input data. xnvfile_get_string() retrieves this data as a
++ * null-terminated character string, and copies it back to the
++ * caller's buffer.
++ *
++ * @param input A pointer to the input descriptor passed to the
++ * store() handler.
++ *
++ * @param s The address of the destination string buffer to copy the
++ * input data to.
++ *
++ * @param maxlen The maximum number of bytes to copy to the
++ * destination buffer, including the ending null character. If @a
++ * maxlen is larger than the actual string length, the input is
++ * truncated to @a maxlen.
++ *
++ * @return The number of characters read upon success. Otherwise, a
++ * negative error code is returned:
++ *
++ * - -EFAULT indicates an invalid source buffer address.
++ *
++ * @coretags{secondary-only}
++ */
++ssize_t xnvfile_get_string(struct xnvfile_input *input,
++			   char *s, size_t maxlen)
++{
++	ssize_t nbytes, eol;
++
++	if (maxlen < 1)
++		return -EINVAL;
++
++	nbytes = xnvfile_get_blob(input, s, maxlen - 1);
++	if (nbytes < 0)
++		return nbytes;
++
++	eol = nbytes;
++	if (eol > 0 && s[eol - 1] == '\n')
++		eol--;
++
++	s[eol] = '\0';
++
++	return nbytes;
++}
++EXPORT_SYMBOL_GPL(xnvfile_get_string);
++
++/**
++ * @fn ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp)
++ * @brief Evaluate the string written to the vfile as a long integer.
++ *
++ * When writing to a vfile, the associated store() handler from the
++ * @ref snapshot_store "snapshot-driven vfile" or @ref regular_store
++ * "regular vfile" is called, with a single argument describing the
++ * input data. xnvfile_get_integer() retrieves and interprets this
++ * data as a long integer, and copies the resulting value back to @a
++ * valp.
++ *
++ * The long integer can be expressed in decimal, octal or hexadecimal
++ * bases depending on the prefix found.
++ *
++ * @param input A pointer to the input descriptor passed to the
++ * store() handler.
++ *
++ * @param valp The address of a long integer variable to receive the
++ * value.
++ *
++ * @return The number of characters read while evaluating the input as
++ * a long integer upon success. Otherwise, a negative error code is
++ * returned:
++ *
++ * - -EINVAL indicates a parse error on the input stream; the written
++ * text cannot be evaluated as a long integer.
++ *
++ * - -EFAULT indicates an invalid source buffer address.
++ *
++ * @coretags{secondary-only}
++ */
++ssize_t xnvfile_get_integer(struct xnvfile_input *input, long *valp)
++{
++	char *end, buf[32];
++	ssize_t nbytes;
++	long val;
++
++	nbytes = xnvfile_get_blob(input, buf, sizeof(buf) - 1);
++	if (nbytes < 0)
++		return nbytes;
++
++	if (nbytes == 0)
++		return -EINVAL;
++
++	buf[nbytes] = '\0';
++	val = simple_strtol(buf, &end, 0);
++
++	if (*end != '\0' && !isspace(*end))
++		return -EINVAL;
++
++	*valp = val;
++
++	return nbytes;
++}
++EXPORT_SYMBOL_GPL(xnvfile_get_integer);
++
++int __vfile_hostlock_get(struct xnvfile *vfile)
++{
++	struct xnvfile_hostlock_class *lc;
++
++	lc = container_of(vfile->lockops, struct xnvfile_hostlock_class, ops);
++	mutex_lock(&lc->mutex);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(__vfile_hostlock_get);
++
++void __vfile_hostlock_put(struct xnvfile *vfile)
++{
++	struct xnvfile_hostlock_class *lc;
++
++	lc = container_of(vfile->lockops, struct xnvfile_hostlock_class, ops);
++	mutex_unlock(&lc->mutex);
++}
++EXPORT_SYMBOL_GPL(__vfile_hostlock_put);
++
++static int __vfile_nklock_get(struct xnvfile *vfile)
++{
++	struct xnvfile_nklock_class *lc;
++
++	lc = container_of(vfile->lockops, struct xnvfile_nklock_class, ops);
++	xnlock_get_irqsave(&nklock, lc->s);
++
++	return 0;
++}
++
++static void __vfile_nklock_put(struct xnvfile *vfile)
++{
++	struct xnvfile_nklock_class *lc;
++
++	lc = container_of(vfile->lockops, struct xnvfile_nklock_class, ops);
++	xnlock_put_irqrestore(&nklock, lc->s);
++}
++
++struct xnvfile_nklock_class xnvfile_nucleus_lock = {
++	.ops = {
++		.get = __vfile_nklock_get,
++		.put = __vfile_nklock_put,
++	},
++};
++
++int __init xnvfile_init_root(void)
++{
++	struct xnvfile_directory *vdir = &cobalt_vfroot;
++	struct proc_dir_entry *pde;
++
++	pde = proc_mkdir("xenomai", NULL);
++	if (pde == NULL)
++		return -ENOMEM;
++
++	vdir->entry.pde = pde;
++	vdir->entry.lockops = NULL;
++	vdir->entry.private = NULL;
++
++	return 0;
++}
++
++void xnvfile_destroy_root(void)
++{
++	cobalt_vfroot.entry.pde = NULL;
++	remove_proc_entry("xenomai", NULL);
++}
++
++/** @} */
+--- linux/kernel/xenomai/heap.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/heap.c	2021-04-07 16:01:25.755636270 +0800
+@@ -0,0 +1,862 @@
++/*
++ * Copyright (C) 2001,2002,2003 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <stdarg.h>
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++#include <linux/kernel.h>
++#include <linux/log2.h>
++#include <linux/bitops.h>
++#include <linux/mm.h>
++#include <asm/pgtable.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/vfile.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_heap Dynamic memory allocation services
++ *
++ * This code implements a variant of the allocator described in
++ * "Design of a General Purpose Memory Allocator for the 4.3BSD Unix
++ * Kernel" by Marshall K. McKusick and Michael J. Karels (USENIX
++ * 1988), see http://docs.FreeBSD.org/44doc/papers/kernmalloc.pdf.
++ * The free page list is maintained in rbtrees for fast lookups of
++ * multi-page memory ranges, and pages holding bucketed memory have a
++ * fast allocation bitmap to manage their blocks internally.
++ *@{
++ */
++struct xnheap cobalt_heap;		/* System heap */
++EXPORT_SYMBOL_GPL(cobalt_heap);
++
++static LIST_HEAD(heapq);	/* Heap list for v-file dump */
++
++static int nrheaps;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static struct xnvfile_rev_tag vfile_tag;
++
++static struct xnvfile_snapshot_ops vfile_ops;
++
++struct vfile_priv {
++	struct xnheap *curr;
++};
++
++struct vfile_data {
++	size_t all_mem;
++	size_t free_mem;
++	char name[XNOBJECT_NAME_LEN];
++};
++
++static struct xnvfile_snapshot vfile = {
++	.privsz = sizeof(struct vfile_priv),
++	.datasz = sizeof(struct vfile_data),
++	.tag = &vfile_tag,
++	.ops = &vfile_ops,
++};
++
++static int vfile_rewind(struct xnvfile_snapshot_iterator *it)
++{
++	struct vfile_priv *priv = xnvfile_iterator_priv(it);
++
++	if (list_empty(&heapq)) {
++		priv->curr = NULL;
++		return 0;
++	}
++
++	priv->curr = list_first_entry(&heapq, struct xnheap, next);
++
++	return nrheaps;
++}
++
++static int vfile_next(struct xnvfile_snapshot_iterator *it, void *data)
++{
++	struct vfile_priv *priv = xnvfile_iterator_priv(it);
++	struct vfile_data *p = data;
++	struct xnheap *heap;
++
++	if (priv->curr == NULL)
++		return 0;	/* We are done. */
++
++	heap = priv->curr;
++	if (list_is_last(&heap->next, &heapq))
++		priv->curr = NULL;
++	else
++		priv->curr = list_entry(heap->next.next,
++					struct xnheap, next);
++
++	p->all_mem = xnheap_get_size(heap);
++	p->free_mem = xnheap_get_free(heap);
++	knamecpy(p->name, heap->name);
++
++	return 1;
++}
++
++static int vfile_show(struct xnvfile_snapshot_iterator *it, void *data)
++{
++	struct vfile_data *p = data;
++
++	if (p == NULL)
++		xnvfile_printf(it, "%9s %9s  %s\n",
++			       "TOTAL", "FREE", "NAME");
++	else
++		xnvfile_printf(it, "%9zu %9zu  %s\n",
++			       p->all_mem,
++			       p->free_mem,
++			       p->name);
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops vfile_ops = {
++	.rewind = vfile_rewind,
++	.next = vfile_next,
++	.show = vfile_show,
++};
++
++void xnheap_init_proc(void)
++{
++	xnvfile_init_snapshot("heap", &vfile, &cobalt_vfroot);
++}
++
++void xnheap_cleanup_proc(void)
++{
++	xnvfile_destroy_snapshot(&vfile);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++enum xnheap_pgtype {
++	page_free =0,
++	page_cont =1,
++	page_list =2
++};
++
++static inline u32 __always_inline
++gen_block_mask(int log2size)
++{
++	return -1U >> (32 - (XNHEAP_PAGE_SIZE >> log2size));
++}
++
++static inline  __always_inline
++int addr_to_pagenr(struct xnheap *heap, void *p)
++{
++	return ((void *)p - heap->membase) >> XNHEAP_PAGE_SHIFT;
++}
++
++static inline  __always_inline
++void *pagenr_to_addr(struct xnheap *heap, int pg)
++{
++	return heap->membase + (pg << XNHEAP_PAGE_SHIFT);
++}
++
++#ifdef CONFIG_XENO_OPT_DEBUG_MEMORY
++/*
++ * Setting page_cont/page_free in the page map is only required for
++ * enabling full checking of the block address in free requests, which
++ * may be extremely time-consuming when deallocating huge blocks
++ * spanning thousands of pages. We only do such marking when running
++ * in memory debug mode.
++ */
++static inline bool
++page_is_valid(struct xnheap *heap, int pg)
++{
++	switch (heap->pagemap[pg].type) {
++	case page_free:
++	case page_cont:
++		return false;
++	case page_list:
++	default:
++		return true;
++	}
++}
++
++static void mark_pages(struct xnheap *heap,
++		       int pg, int nrpages,
++		       enum xnheap_pgtype type)
++{
++	while (nrpages-- > 0)
++		heap->pagemap[pg].type = type;
++}
++
++#else
++
++static inline bool
++page_is_valid(struct xnheap *heap, int pg)
++{
++	return true;
++}
++
++static void mark_pages(struct xnheap *heap,
++		       int pg, int nrpages,
++		       enum xnheap_pgtype type)
++{ }
++
++#endif
++
++static struct xnheap_range *
++search_size_ge(struct rb_root *t, size_t size)
++{
++	struct rb_node *rb, *deepest = NULL;
++	struct xnheap_range *r;
++	
++	/*
++	 * We first try to find an exact match. If that fails, we walk
++	 * the tree in logical order by increasing size value from the
++	 * deepest node traversed until we find the first successor to
++	 * that node, or nothing beyond it, whichever comes first.
++	 */
++	rb = t->rb_node;
++	while (rb) {
++		deepest = rb;
++		r = rb_entry(rb, struct xnheap_range, size_node);
++		if (size < r->size) {
++			rb = rb->rb_left;
++			continue;
++		}
++		if (size > r->size) {
++			rb = rb->rb_right;
++			continue;
++		}
++		return r;
++	}
++
++	rb = deepest;
++	while (rb) {
++		r = rb_entry(rb, struct xnheap_range, size_node);
++		if (size <= r->size)
++			return r;
++		rb = rb_next(rb);
++	}
++
++	return NULL;
++}
++
++static struct xnheap_range *
++search_left_mergeable(struct xnheap *heap, struct xnheap_range *r)
++{
++  	struct rb_node *node = heap->addr_tree.rb_node;
++	struct xnheap_range *p;
++
++  	while (node) {
++		p = rb_entry(node, struct xnheap_range, addr_node);
++		if ((void *)p + p->size == (void *)r)
++			return p;
++		if (&r->addr_node < node)
++  			node = node->rb_left;
++		else
++  			node = node->rb_right;
++	}
++
++	return NULL;
++}
++
++static struct xnheap_range *
++search_right_mergeable(struct xnheap *heap, struct xnheap_range *r)
++{
++  	struct rb_node *node = heap->addr_tree.rb_node;
++	struct xnheap_range *p;
++
++  	while (node) {
++		p = rb_entry(node, struct xnheap_range, addr_node);
++		if ((void *)r + r->size == (void *)p)
++			return p;
++		if (&r->addr_node < node)
++  			node = node->rb_left;
++		else
++  			node = node->rb_right;
++	}
++
++	return NULL;
++}
++
++static void insert_range_bysize(struct xnheap *heap, struct xnheap_range *r)
++{
++  	struct rb_node **new = &heap->size_tree.rb_node, *parent = NULL;
++	struct xnheap_range *p;
++
++  	while (*new) {
++  		p = container_of(*new, struct xnheap_range, size_node);
++		parent = *new;
++  		if (r->size <= p->size)
++  			new = &((*new)->rb_left);
++  		else
++  			new = &((*new)->rb_right);
++  	}
++
++  	rb_link_node(&r->size_node, parent, new);
++  	rb_insert_color(&r->size_node, &heap->size_tree);
++}
++
++static void insert_range_byaddr(struct xnheap *heap, struct xnheap_range *r)
++{
++  	struct rb_node **new = &heap->addr_tree.rb_node, *parent = NULL;
++	struct xnheap_range *p;
++
++  	while (*new) {
++  		p = container_of(*new, struct xnheap_range, addr_node);
++		parent = *new;
++  		if (r < p)
++  			new = &((*new)->rb_left);
++  		else
++  			new = &((*new)->rb_right);
++  	}
++
++  	rb_link_node(&r->addr_node, parent, new);
++  	rb_insert_color(&r->addr_node, &heap->addr_tree);
++}
++
++static int reserve_page_range(struct xnheap *heap, size_t size)
++{
++	struct xnheap_range *new, *splitr;
++
++	/* Find a suitable range of pages covering 'size'. */
++	new = search_size_ge(&heap->size_tree, size);
++	if (new == NULL)
++		return -1;
++
++	rb_erase(&new->size_node, &heap->size_tree);
++	if (new->size == size) {
++		rb_erase(&new->addr_node, &heap->addr_tree);
++		return addr_to_pagenr(heap, new);
++	}
++
++	/*
++	 * The free range fetched is larger than what we need: split
++	 * it in two, the upper part is returned to the caller, the
++	 * lower part is sent back to the free list, which makes
++	 * reindexing by address pointless.
++	 */
++	splitr = new;
++	splitr->size -= size;
++	new = (struct xnheap_range *)((void *)new + splitr->size);
++	insert_range_bysize(heap, splitr);
++
++	return addr_to_pagenr(heap, new);
++}
++
++static void release_page_range(struct xnheap *heap,
++			       void *page, size_t size)
++{
++	struct xnheap_range *freed = page, *left, *right;
++	bool addr_linked = false;
++
++	freed->size = size;
++
++	left = search_left_mergeable(heap, freed);
++	if (left) {
++		rb_erase(&left->size_node, &heap->size_tree);
++		left->size += freed->size;
++		freed = left;
++		addr_linked = true;
++	}
++
++	right = search_right_mergeable(heap, freed);
++	if (right) {
++		rb_erase(&right->size_node, &heap->size_tree);
++		freed->size += right->size;
++		if (addr_linked)
++			rb_erase(&right->addr_node, &heap->addr_tree);
++		else
++			rb_replace_node(&right->addr_node, &freed->addr_node,
++					&heap->addr_tree);
++	} else if (!addr_linked)
++		insert_range_byaddr(heap, freed);
++
++	insert_range_bysize(heap, freed);
++	mark_pages(heap, addr_to_pagenr(heap, page),
++		   size >> XNHEAP_PAGE_SHIFT, page_free);
++}
++
++static void add_page_front(struct xnheap *heap,
++			   int pg, int log2size)
++{
++	struct xnheap_pgentry *new, *head, *next;
++	int ilog;
++
++	/* Insert page at front of the per-bucket page list. */
++	
++	ilog = log2size - XNHEAP_MIN_LOG2;
++	new = &heap->pagemap[pg];
++	if (heap->buckets[ilog] == -1U) {
++		heap->buckets[ilog] = pg;
++		new->prev = new->next = pg;
++	} else {
++		head = &heap->pagemap[heap->buckets[ilog]];
++		new->prev = heap->buckets[ilog];
++		new->next = head->next;
++		next = &heap->pagemap[new->next];
++		next->prev = pg;
++		head->next = pg;
++		heap->buckets[ilog] = pg;
++	}
++}
++
++static void remove_page(struct xnheap *heap,
++			int pg, int log2size)
++{
++	struct xnheap_pgentry *old, *prev, *next;
++	int ilog = log2size - XNHEAP_MIN_LOG2;
++
++	/* Remove page from the per-bucket page list. */
++
++	old = &heap->pagemap[pg];
++	if (pg == old->next)
++		heap->buckets[ilog] = -1U;
++	else {
++		if (pg == heap->buckets[ilog])
++			heap->buckets[ilog] = old->next;
++		prev = &heap->pagemap[old->prev];
++		prev->next = old->next;
++		next = &heap->pagemap[old->next];
++		next->prev = old->prev;
++	}
++}
++
++static void move_page_front(struct xnheap *heap,
++			    int pg, int log2size)
++{
++	int ilog = log2size - XNHEAP_MIN_LOG2;
++
++	/* Move page at front of the per-bucket page list. */
++	
++	if (heap->buckets[ilog] == pg)
++		return;	 /* Already at front, no move. */
++		
++	remove_page(heap, pg, log2size);
++	add_page_front(heap, pg, log2size);
++}
++
++static void move_page_back(struct xnheap *heap,
++			   int pg, int log2size)
++{
++	struct xnheap_pgentry *old, *last, *head, *next;
++	int ilog;
++
++	/* Move page at end of the per-bucket page list. */
++	
++	old = &heap->pagemap[pg];
++	if (pg == old->next) /* Singleton, no move. */
++		return;
++		
++	remove_page(heap, pg, log2size);
++
++	ilog = log2size - XNHEAP_MIN_LOG2;
++	head = &heap->pagemap[heap->buckets[ilog]];
++	last = &heap->pagemap[head->prev];
++	old->prev = head->prev;
++	old->next = last->next;
++	next = &heap->pagemap[old->next];
++	next->prev = pg;
++	last->next = pg;
++}
++
++static void *add_free_range(struct xnheap *heap,
++			    size_t bsize, int log2size)
++{
++	int pg;
++
++	pg = reserve_page_range(heap, ALIGN(bsize, XNHEAP_PAGE_SIZE));
++	if (pg < 0)
++		return NULL;
++	
++	/*
++	 * Update the page entry.  If @log2size is non-zero
++	 * (i.e. bsize < XNHEAP_PAGE_SIZE), bsize is (1 << log2Size)
++	 * between 2^XNHEAP_MIN_LOG2 and 2^(XNHEAP_PAGE_SHIFT - 1).
++	 * Save the log2 power into entry.type, then update the
++	 * per-page allocation bitmap to reserve the first block.
++	 *
++	 * Otherwise, we have a larger block which may span multiple
++	 * pages: set entry.type to page_list, indicating the start of
++	 * the page range, and entry.bsize to the overall block size.
++	 */
++	if (log2size) {
++		heap->pagemap[pg].type = log2size;
++		/*
++		 * Mark the first object slot (#0) as busy, along with
++		 * the leftmost bits we won't use for this log2 size.
++		 */
++		heap->pagemap[pg].map = ~gen_block_mask(log2size) | 1;
++		/*
++		 * Insert the new page at front of the per-bucket page
++		 * list, enforcing the assumption that pages with free
++		 * space live close to the head of this list.
++		 */
++		add_page_front(heap, pg, log2size);
++	} else {
++		heap->pagemap[pg].type = page_list;
++		heap->pagemap[pg].bsize = (u32)bsize;
++		mark_pages(heap, pg + 1,
++			   (bsize >> XNHEAP_PAGE_SHIFT) - 1, page_cont);
++	}
++
++	heap->used_size += bsize;
++
++	return pagenr_to_addr(heap, pg);
++}
++
++/**
++ * @fn void *xnheap_alloc(struct xnheap *heap, size_t size)
++ * @brief Allocate a memory block from a memory heap.
++ *
++ * Allocates a contiguous region of memory from an active memory heap.
++ * Such allocation is guaranteed to be time-bounded.
++ *
++ * @param heap The descriptor address of the heap to get memory from.
++ *
++ * @param size The size in bytes of the requested block.
++ *
++ * @return The address of the allocated region upon success, or NULL
++ * if no memory is available from the specified heap.
++ *
++ * @coretags{unrestricted}
++ */
++void *xnheap_alloc(struct xnheap *heap, size_t size)
++{
++	int log2size, ilog, pg, b = -1;
++	size_t bsize;
++	void *block;
++	spl_t s;
++
++	if (size == 0)
++		return NULL;
++
++	if (size < XNHEAP_MIN_ALIGN) {
++		bsize = size = XNHEAP_MIN_ALIGN;
++		log2size = XNHEAP_MIN_LOG2;
++	} else {
++		log2size = ilog2(size);
++		if (log2size < XNHEAP_PAGE_SHIFT) {
++			if (size & (size - 1))
++				log2size++;
++			bsize = 1 << log2size;
++		} else
++			bsize = ALIGN(size, XNHEAP_PAGE_SIZE);
++	}
++	
++	/*
++	 * Allocate entire pages directly from the pool whenever the
++	 * block is larger or equal to XNHEAP_PAGE_SIZE.  Otherwise,
++	 * use bucketed memory.
++	 *
++	 * NOTE: Fully busy pages from bucketed memory are moved back
++	 * at the end of the per-bucket page list, so that we may
++	 * always assume that either the heading page has some room
++	 * available, or no room is available from any page linked to
++	 * this list, in which case we should immediately add a fresh
++	 * page.
++	 */
++	xnlock_get_irqsave(&heap->lock, s);
++
++	if (bsize >= XNHEAP_PAGE_SIZE)
++		/* Add a range of contiguous free pages. */
++		block = add_free_range(heap, bsize, 0);
++	else {
++		ilog = log2size - XNHEAP_MIN_LOG2;
++		XENO_WARN_ON(MEMORY, ilog < 0 || ilog >= XNHEAP_MAX_BUCKETS);
++		pg = heap->buckets[ilog];
++		/*
++		 * Find a block in the heading page if any. If there
++		 * is none, there won't be any down the list: add a
++		 * new page right away.
++		 */
++		if (pg < 0 || heap->pagemap[pg].map == -1U)
++			block = add_free_range(heap, bsize, log2size);
++		else {
++			b = ffs(~heap->pagemap[pg].map) - 1;
++			/*
++			 * Got one block from the heading per-bucket
++			 * page, tag it as busy in the per-page
++			 * allocation map.
++			 */
++			heap->pagemap[pg].map |= (1U << b);
++			heap->used_size += bsize;
++			block = heap->membase +
++				(pg << XNHEAP_PAGE_SHIFT) +
++				(b << log2size);
++			if (heap->pagemap[pg].map == -1U)
++				move_page_back(heap, pg, log2size);
++		}
++	}
++
++	xnlock_put_irqrestore(&heap->lock, s);
++
++	return block;
++}
++EXPORT_SYMBOL_GPL(xnheap_alloc);
++
++/**
++ * @fn void xnheap_free(struct xnheap *heap, void *block)
++ * @brief Release a block to a memory heap.
++ *
++ * Releases a memory block to a heap.
++ *
++ * @param heap The heap descriptor.
++ *
++ * @param block The block to be returned to the heap.
++ *
++ * @coretags{unrestricted}
++ */
++void xnheap_free(struct xnheap *heap, void *block)
++{
++	unsigned long pgoff, boff;
++	int log2size, pg, n;
++	size_t bsize;
++	u32 oldmap;
++	spl_t s;
++
++	xnlock_get_irqsave(&heap->lock, s);
++
++	/* Compute the heading page number in the page map. */
++	pgoff = block - heap->membase;
++	pg = pgoff >> XNHEAP_PAGE_SHIFT;
++
++	if (!page_is_valid(heap, pg))
++		goto bad;
++	
++	switch (heap->pagemap[pg].type) {
++	case page_list:
++		bsize = heap->pagemap[pg].bsize;
++		XENO_WARN_ON(MEMORY, (bsize & (XNHEAP_PAGE_SIZE - 1)) != 0);
++		release_page_range(heap, pagenr_to_addr(heap, pg), bsize);
++		break;
++
++	default:
++		log2size = heap->pagemap[pg].type;
++		bsize = (1 << log2size);
++		XENO_WARN_ON(MEMORY, bsize >= XNHEAP_PAGE_SIZE);
++		boff = pgoff & ~XNHEAP_PAGE_MASK;
++		if ((boff & (bsize - 1)) != 0) /* Not at block start? */
++			goto bad;
++
++		n = boff >> log2size; /* Block position in page. */
++		oldmap = heap->pagemap[pg].map;
++		heap->pagemap[pg].map &= ~(1U << n);
++
++		/*
++		 * If the page the block was sitting on is fully idle,
++		 * return it to the pool. Otherwise, check whether
++		 * that page is transitioning from fully busy to
++		 * partially busy state, in which case it should move
++		 * toward the front of the per-bucket page list.
++		 */
++		if (heap->pagemap[pg].map == ~gen_block_mask(log2size)) {
++			remove_page(heap, pg, log2size);
++			release_page_range(heap, pagenr_to_addr(heap, pg),
++					   XNHEAP_PAGE_SIZE);
++		} else if (oldmap == -1U)
++			move_page_front(heap, pg, log2size);
++	}
++
++	heap->used_size -= bsize;
++
++	xnlock_put_irqrestore(&heap->lock, s);
++
++	return;
++bad:
++	xnlock_put_irqrestore(&heap->lock, s);
++
++	XENO_WARN(MEMORY, 1, "invalid block %p in heap %s",
++		  block, heap->name);
++}
++EXPORT_SYMBOL_GPL(xnheap_free);
++
++ssize_t xnheap_check_block(struct xnheap *heap, void *block)
++{
++	unsigned long pg, pgoff, boff;
++	ssize_t ret = -EINVAL;
++	size_t bsize;
++	spl_t s;
++
++	xnlock_get_irqsave(&heap->lock, s);
++
++	/* Calculate the page number from the block address. */
++	pgoff = block - heap->membase;
++	pg = pgoff >> XNHEAP_PAGE_SHIFT;
++	if (page_is_valid(heap, pg)) {
++		if (heap->pagemap[pg].type == page_list)
++			bsize = heap->pagemap[pg].bsize;
++		else {
++			bsize = (1 << heap->pagemap[pg].type);
++			boff = pgoff & ~XNHEAP_PAGE_MASK;
++			if ((boff & (bsize - 1)) != 0) /* Not at block start? */
++				goto out;
++		}
++		ret = (ssize_t)bsize;
++	}
++out:
++	xnlock_put_irqrestore(&heap->lock, s);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(xnheap_check_block);
++
++/**
++ * @fn xnheap_init(struct xnheap *heap, void *membase, u32 size)
++ * @brief Initialize a memory heap.
++ *
++ * Initializes a memory heap suitable for time-bounded allocation
++ * requests of dynamic memory.
++ *
++ * @param heap The address of a heap descriptor to initialize.
++ *
++ * @param membase The address of the storage area.
++ *
++ * @param size The size in bytes of the storage area.  @a size must be
++ * a multiple of XNHEAP_PAGE_SIZE and smaller than (4Gb - PAGE_SIZE)
++ * in the current implementation.
++ *
++ * @return 0 is returned upon success, or:
++ *
++ * - -EINVAL is returned if @a size is either greater than
++ *   XNHEAP_MAX_HEAPSZ, or not aligned on PAGE_SIZE.
++ *
++ * - -ENOMEM is returned upon failure of allocating the meta-data area
++ * used internally to maintain the heap.
++ *
++ * @coretags{secondary-only}
++ */
++int xnheap_init(struct xnheap *heap, void *membase, size_t size)
++{
++	int n, nrpages;
++	spl_t s;
++
++	secondary_mode_only();
++
++ 	if (size > XNHEAP_MAX_HEAPSZ || !PAGE_ALIGNED(size))
++		return -EINVAL;
++
++	/* Reset bucket page lists, all empty. */
++	for (n = 0; n < XNHEAP_MAX_BUCKETS; n++)
++		heap->buckets[n] = -1U;
++
++	xnlock_init(&heap->lock);
++
++	nrpages = size >> XNHEAP_PAGE_SHIFT;
++	heap->pagemap = kzalloc(sizeof(struct xnheap_pgentry) * nrpages,
++				GFP_KERNEL);
++	if (heap->pagemap == NULL)
++		return -ENOMEM;
++
++	heap->membase = membase;
++	heap->usable_size = size;
++	heap->used_size = 0;
++		      
++	/*
++	 * The free page pool is maintained as a set of ranges of
++	 * contiguous pages indexed by address and size in rbtrees.
++	 * Initially, we have a single range in those trees covering
++	 * the whole memory we have been given for the heap. Over
++	 * time, that range will be split then possibly re-merged back
++	 * as allocations and deallocations take place.
++	 */
++	heap->size_tree = RB_ROOT;
++	heap->addr_tree = RB_ROOT;
++	release_page_range(heap, membase, size);
++
++	/* Default name, override with xnheap_set_name() */
++	ksformat(heap->name, sizeof(heap->name), "(%p)", heap);
++
++	xnlock_get_irqsave(&nklock, s);
++	list_add_tail(&heap->next, &heapq);
++	nrheaps++;
++	xnvfile_touch_tag(&vfile_tag);
++	xnlock_put_irqrestore(&nklock, s);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnheap_init);
++
++/**
++ * @fn void xnheap_destroy(struct xnheap *heap)
++ * @brief Destroys a memory heap.
++ *
++ * Destroys a memory heap.
++ *
++ * @param heap The heap descriptor.
++ *
++ * @coretags{secondary-only}
++ */
++void xnheap_destroy(struct xnheap *heap)
++{
++	spl_t s;
++
++	secondary_mode_only();
++
++	xnlock_get_irqsave(&nklock, s);
++	list_del(&heap->next);
++	nrheaps--;
++	xnvfile_touch_tag(&vfile_tag);
++	xnlock_put_irqrestore(&nklock, s);
++	kfree(heap->pagemap);
++}
++EXPORT_SYMBOL_GPL(xnheap_destroy);
++
++/**
++ * @fn xnheap_set_name(struct xnheap *heap,const char *name,...)
++ * @brief Set the heap's name string.
++ *
++ * Set the heap name that will be used in statistic outputs.
++ *
++ * @param heap The address of a heap descriptor.
++ *
++ * @param name Name displayed in statistic outputs. This parameter can
++ * be a printk()-like format argument list.
++ *
++ * @coretags{task-unrestricted}
++ */
++void xnheap_set_name(struct xnheap *heap, const char *name, ...)
++{
++	va_list args;
++
++	va_start(args, name);
++	kvsformat(heap->name, sizeof(heap->name), name, args);
++	va_end(args);
++}
++EXPORT_SYMBOL_GPL(xnheap_set_name);
++
++void *xnheap_vmalloc(size_t size)
++{
++	/*
++	 * We want memory used in real-time context to be pulled from
++	 * ZONE_NORMAL, however we don't need it to be physically
++	 * contiguous.
++	 *
++	 * 32bit systems which would need HIGHMEM for running a Cobalt
++	 * configuration would also be required to support PTE
++	 * pinning, which not all architectures provide.  Moreover,
++	 * pinning PTEs eagerly for a potentially (very) large amount
++	 * of memory may quickly degrade performance.
++	 *
++	 * If using a different kernel/user memory split cannot be the
++	 * answer for those configs, it's likely that basing such
++	 * software on a 32bit system had to be wrong in the first
++	 * place anyway.
++	 */
++	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
++}
++EXPORT_SYMBOL_GPL(xnheap_vmalloc);
++
++void xnheap_vfree(void *p)
++{
++	vfree(p);
++}
++EXPORT_SYMBOL_GPL(xnheap_vfree);
++
++/** @} */
+--- linux/kernel/xenomai/debug.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/debug.c	2021-04-07 16:01:25.749636278 +0800
+@@ -0,0 +1,659 @@
++/*
++ * Copyright (C) 2010 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/types.h>
++#include <linux/limits.h>
++#include <linux/ctype.h>
++#include <linux/jhash.h>
++#include <linux/mm.h>
++#include <linux/signal.h>
++#include <linux/vmalloc.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/ppd.h>
++#include <cobalt/uapi/signal.h>
++#include <asm/xenomai/syscall.h>
++#include "posix/process.h"
++#include "debug.h"
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_debug Debugging services
++ * @{
++ */
++struct xnvfile_directory cobalt_debug_vfroot;
++EXPORT_SYMBOL_GPL(cobalt_debug_vfroot);
++
++#ifdef CONFIG_XENO_OPT_DEBUG_TRACE_RELAX
++
++#define SYMBOL_HSLOTS	(1 << 8)
++
++struct hashed_symbol {
++	struct hashed_symbol *next;
++	char symbol[0];
++};
++
++static struct hashed_symbol *symbol_jhash[SYMBOL_HSLOTS];
++
++static struct xnheap memory_pool;
++
++/*
++ * This is a permanent storage for ASCII strings which comes handy to
++ * get a unique and constant reference to a symbol while preserving
++ * storage space. Hashed symbols have infinite lifetime and are never
++ * flushed.
++ */
++DEFINE_PRIVATE_XNLOCK(symbol_lock);
++
++static const char *hash_symbol(const char *symbol)
++{
++	struct hashed_symbol *p, **h;
++	const char *str;
++	size_t len;
++	u32 hash;
++	spl_t s;
++
++	len = strlen(symbol);
++	hash = jhash(symbol, len, 0);
++
++	xnlock_get_irqsave(&symbol_lock, s);
++
++	h = &symbol_jhash[hash & (SYMBOL_HSLOTS - 1)];
++	p = *h;
++	while (p &&
++	       (*p->symbol != *symbol ||
++		strcmp(p->symbol + 1, symbol + 1)))
++	       p = p->next;
++
++	if (p)
++		goto done;
++
++	p = xnheap_alloc(&memory_pool, sizeof(*p) + len + 1);
++	if (p == NULL) {
++		str = NULL;
++		goto out;
++	}
++
++	strcpy(p->symbol, symbol);
++	p->next = *h;
++	*h = p;
++done:
++	str = p->symbol;
++out:
++	xnlock_put_irqrestore(&symbol_lock, s);
++
++	return str;
++}
++
++/*
++ * We define a static limit (RELAX_SPOTNR) for spot records to limit
++ * the memory consumption (we pull record memory from the system
++ * heap). The current value should be reasonable enough unless the
++ * application is extremely unsane, given that we only keep unique
++ * spots. Said differently, if the application has more than
++ * RELAX_SPOTNR distinct code locations doing spurious relaxes, then
++ * the first issue to address is likely PEBKAC.
++ */
++#define RELAX_SPOTNR	128
++#define RELAX_HSLOTS	(1 << 8)
++
++struct relax_record {
++	/* Number of hits for this location */
++	u32 hits;
++	struct relax_spot {
++		/* Faulty thread name. */
++		char thread[XNOBJECT_NAME_LEN];
++		/* call stack the relax originates from. */
++		int depth;
++		struct backtrace {
++			unsigned long pc;
++			const char *mapname;
++		} backtrace[SIGSHADOW_BACKTRACE_DEPTH];
++		/* Program hash value of the caller. */
++		u32 proghash;
++		/* Pid of the caller. */
++		pid_t pid;
++		/* Reason for relaxing. */
++		int reason;
++	} spot;
++	struct relax_record *r_next;
++	struct relax_record *h_next;
++	const char *exe_path;
++};
++
++static struct relax_record *relax_jhash[RELAX_HSLOTS];
++
++static struct relax_record *relax_record_list;
++
++static int relax_overall, relax_queued;
++
++DEFINE_PRIVATE_XNLOCK(relax_lock);
++
++/*
++ * The motivation to centralize tracing information about relaxes
++ * directly into kernel space is fourfold:
++ *
++ * - this allows to gather all the trace data into a single location
++ * and keep it safe there, with no external log file involved.
++ *
++ * - enabling the tracing does not impose any requirement on the
++ * application (aside of being compiled with debug symbols for best
++ * interpreting that information). We only need a kernel config switch
++ * for this (i.e. CONFIG_XENO_OPT_DEBUG_TRACE_RELAX).
++ *
++ * - the data is collected and can be made available exactly the same
++ * way regardless of the application emitting the relax requests, or
++ * whether it is still alive when the trace data are displayed.
++ *
++ * - the kernel is able to provide accurate and detailed trace
++ * information, such as the relative offset of instructions causing
++ * relax requests within dynamic shared objects, without having to
++ * guess it roughly from /proc/pid/maps, or relying on ldd's
++ * --function-relocs feature, which both require to run on the target
++ * system to get the needed information. Instead, we allow a build
++ * host to use a cross-compilation toolchain later to extract the
++ * source location, from the raw data the kernel has provided on the
++ * target system.
++ *
++ * However, collecting the call frames within the application to
++ * determine the full context of a relax spot is not something we can
++ * do purely from kernel space, notably because it depends on build
++ * options we just don't know about (e.g. frame pointers availability
++ * for the app, or other nitty-gritty details depending on the
++ * toolchain). To solve this, we ask the application to send us a
++ * complete backtrace taken from the context of a specific signal
++ * handler, which we know is stacked over the relax spot. That
++ * information is then stored by the kernel after some
++ * post-processing, along with other data identifying the caller, and
++ * made available through the /proc/xenomai/debug/relax vfile.
++ *
++ * Implementation-wise, xndebug_notify_relax and xndebug_trace_relax
++ * routines are paired: first, xndebug_notify_relax sends a SIGSHADOW
++ * request to userland when a relax spot is detected from
++ * xnthread_relax, which should then trigger a call back to
++ * xndebug_trace_relax with the complete backtrace information, as
++ * seen from userland (via the internal sc_cobalt_backtrace
++ * syscall). All this runs on behalf of the relaxing thread, so we can
++ * make a number of convenient assumptions (such as being able to scan
++ * the current vma list to get detailed information about the
++ * executable mappings that could be involved).
++ */
++
++void xndebug_notify_relax(struct xnthread *thread, int reason)
++{
++	xnthread_signal(thread, SIGSHADOW,
++			  sigshadow_int(SIGSHADOW_ACTION_BACKTRACE, reason));
++}
++
++void xndebug_trace_relax(int nr, unsigned long *backtrace,
++			 int reason)
++{
++	struct relax_record *p, **h;
++	struct vm_area_struct *vma;
++	struct xnthread *thread;
++	struct relax_spot spot;
++	struct mm_struct *mm;
++	struct file *file;
++	unsigned long pc;
++	char *mapname;
++	int n, depth;
++	char *tmp;
++	u32 hash;
++	spl_t s;
++
++	thread = xnthread_current();
++	if (thread == NULL)
++		return;		/* Can't be, right? What a mess. */
++
++	/*
++	 * We compute PC values relative to the base of the shared
++	 * executable mappings we find in the backtrace, which makes
++	 * it possible for the slackspot utility to match the
++	 * corresponding source code locations from unrelocated file
++	 * offsets.
++	 */
++
++	tmp = (char *)__get_free_page(GFP_KERNEL);
++	if (tmp == NULL)
++		/*
++		 * The situation looks really bad, but we can't do
++		 * anything about it. Just bail out.
++		 */
++		return;
++
++	memset(&spot, 0, sizeof(spot));
++	mm = get_task_mm(current);
++	down_read(&mm->mmap_sem);
++
++	for (n = 0, depth = 0; n < nr; n++) {
++		pc = backtrace[n];
++
++		vma = find_vma(mm, pc);
++		if (vma == NULL)
++			continue;
++
++		/*
++		 * Hack. Unlike DSOs, executables and interpreters
++		 * (e.g. dynamic linkers) are protected against write
++		 * attempts. Use this to determine when $pc should be
++		 * fixed up by subtracting the mapping base address in
++		 * the DSO case.
++		 */
++		if (!(vma->vm_flags & VM_DENYWRITE))
++			pc -= vma->vm_start;
++
++		spot.backtrace[depth].pc = pc;
++
++		/*
++		 * Even in case we can't fetch the map name, we still
++		 * record the PC value, which may still give some hint
++		 * downstream.
++		 */
++		file = vma->vm_file;
++		if (file == NULL)
++			goto next_frame;
++
++		mapname = d_path(&file->f_path, tmp, PAGE_SIZE);
++		if (IS_ERR(mapname))
++			goto next_frame;
++
++		spot.backtrace[depth].mapname = hash_symbol(mapname);
++	next_frame:
++		depth++;
++	}
++
++	up_read(&mm->mmap_sem);
++	mmput(mm);
++	free_page((unsigned long)tmp);
++
++	/*
++	 * Most of the time we will be sent duplicates, since the odds
++	 * of seeing the same thread running the same code doing the
++	 * same mistake all over again are high. So we probe the hash
++	 * table for an identical spot first, before going for a
++	 * complete record allocation from the system heap if no match
++	 * was found. Otherwise, we just take the fast exit path.
++	 */
++	spot.depth = depth;
++	spot.proghash = thread->proghash;
++	spot.pid = xnthread_host_pid(thread);
++	spot.reason = reason;
++	strcpy(spot.thread, thread->name);
++	hash = jhash2((u32 *)&spot, sizeof(spot) / sizeof(u32), 0);
++
++	xnlock_get_irqsave(&relax_lock, s);
++
++	h = &relax_jhash[hash & (RELAX_HSLOTS - 1)];
++	p = *h;
++	while (p &&
++	       /* Try quick guesses first, then memcmp */
++	       (p->spot.depth != spot.depth ||
++		p->spot.pid != spot.pid ||
++		memcmp(&p->spot, &spot, sizeof(spot))))
++	       p = p->h_next;
++
++	if (p) {
++		p->hits++;
++		goto out;	/* Spot already recorded. */
++	}
++
++	if (relax_queued >= RELAX_SPOTNR)
++		goto out;	/* No more space -- ignore. */
++	/*
++	 * We can only compete with other shadows which have just
++	 * switched to secondary mode like us. So holding the
++	 * relax_lock a bit more without disabling interrupts is not
++	 * an issue. This allows us to postpone the record memory
++	 * allocation while probing and updating the hash table in a
++	 * single move.
++	 */
++	p = xnheap_alloc(&memory_pool, sizeof(*p));
++	if (p == NULL)
++		goto out;      /* Something is about to go wrong... */
++
++	memcpy(&p->spot, &spot, sizeof(p->spot));
++	p->exe_path = hash_symbol(thread->exe_path);
++	p->hits = 1;
++	p->h_next = *h;
++	*h = p;
++	p->r_next = relax_record_list;
++	relax_record_list = p;
++	relax_queued++;
++out:
++	relax_overall++;
++
++	xnlock_put_irqrestore(&relax_lock, s);
++}
++
++static DEFINE_VFILE_HOSTLOCK(relax_mutex);
++
++struct relax_vfile_priv {
++	int queued;
++	int overall;
++	int ncurr;
++	struct relax_record *head;
++	struct relax_record *curr;
++};
++
++static void *relax_vfile_begin(struct xnvfile_regular_iterator *it)
++{
++	struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
++	struct relax_record *p;
++	spl_t s;
++	int n;
++
++	/*
++	 * Snapshot the counters under lock, to make sure they remain
++	 * mutually consistent despite we dump the record list in a
++	 * lock-less manner. Additionally, the vfile layer already
++	 * holds the relax_mutex lock for us, so that we can't race
++	 * with ->store().
++	 */
++	xnlock_get_irqsave(&relax_lock, s);
++
++	if (relax_queued == 0 || it->pos > relax_queued) {
++		xnlock_put_irqrestore(&relax_lock, s);
++		return NULL;
++	}
++	priv->overall = relax_overall;
++	priv->queued = relax_queued;
++	priv->head = relax_record_list;
++
++	xnlock_put_irqrestore(&relax_lock, s);
++
++	if (it->pos == 0) {
++		priv->curr = NULL;
++		priv->ncurr = -1;
++		return VFILE_SEQ_START;
++	}
++
++	for (n = 1, p = priv->head; n < it->pos; n++)
++		p = p->r_next;
++
++	priv->curr = p;
++	priv->ncurr = n;
++
++	return p;
++}
++
++static void *relax_vfile_next(struct xnvfile_regular_iterator *it)
++{
++	struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
++	struct relax_record *p;
++	int n;
++
++	if (it->pos > priv->queued)
++		return NULL;
++
++	if (it->pos == priv->ncurr + 1)
++		p = priv->curr->r_next;
++	else {
++		for (n = 1, p = priv->head; n < it->pos; n++)
++			p = p->r_next;
++	}
++
++	priv->curr = p;
++	priv->ncurr = it->pos;
++
++	return p;
++}
++
++static const char *reason_str[] = {
++    [SIGDEBUG_UNDEFINED] = "undefined",
++    [SIGDEBUG_MIGRATE_SIGNAL] = "signal",
++    [SIGDEBUG_MIGRATE_SYSCALL] = "syscall",
++    [SIGDEBUG_MIGRATE_FAULT] = "fault",
++    [SIGDEBUG_MIGRATE_PRIOINV] = "pi-error",
++    [SIGDEBUG_NOMLOCK] = "mlock-check",
++    [SIGDEBUG_WATCHDOG] = "runaway-break",
++    [SIGDEBUG_RESCNT_IMBALANCE] = "resource-count-imbalance",
++    [SIGDEBUG_MUTEX_SLEEP] = "sleep-holding-mutex",
++    [SIGDEBUG_LOCK_BREAK] = "scheduler-lock-break",
++};
++
++static int relax_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct relax_vfile_priv *priv = xnvfile_iterator_priv(it);
++	struct relax_record *p = data;
++	int n;
++
++	/*
++	 * No need to grab any lock to read a record from a previously
++	 * validated index: the data must be there and won't be
++	 * touched anymore.
++	 */
++	if (p == NULL) {
++		xnvfile_printf(it, "%d\n", priv->overall);
++		return 0;
++	}
++
++	xnvfile_printf(it, "%s\n", p->exe_path ?: "?");
++	xnvfile_printf(it, "%d %d %s %s\n", p->spot.pid, p->hits,
++		       reason_str[p->spot.reason], p->spot.thread);
++
++	for (n = 0; n < p->spot.depth; n++)
++		xnvfile_printf(it, "0x%lx %s\n",
++			       p->spot.backtrace[n].pc,
++			       p->spot.backtrace[n].mapname ?: "?");
++
++	xnvfile_printf(it, ".\n");
++
++	return 0;
++}
++
++static ssize_t relax_vfile_store(struct xnvfile_input *input)
++{
++	struct relax_record *p, *np;
++	spl_t s;
++
++	/*
++	 * Flush out all records. Races with ->show() are prevented
++	 * using the relax_mutex lock. The vfile layer takes care of
++	 * this internally.
++	 */
++	xnlock_get_irqsave(&relax_lock, s);
++	p = relax_record_list;
++	relax_record_list = NULL;
++	relax_overall = 0;
++	relax_queued = 0;
++	memset(relax_jhash, 0, sizeof(relax_jhash));
++	xnlock_put_irqrestore(&relax_lock, s);
++
++	while (p) {
++		np = p->r_next;
++		xnheap_free(&memory_pool, p);
++		p = np;
++	}
++
++	return input->size;
++}
++
++static struct xnvfile_regular_ops relax_vfile_ops = {
++	.begin = relax_vfile_begin,
++	.next = relax_vfile_next,
++	.show = relax_vfile_show,
++	.store = relax_vfile_store,
++};
++
++static struct xnvfile_regular relax_vfile = {
++	.privsz = sizeof(struct relax_vfile_priv),
++	.ops = &relax_vfile_ops,
++	.entry = { .lockops = &relax_mutex.ops },
++};
++
++static inline int init_trace_relax(void)
++{
++	u32 size = CONFIG_XENO_OPT_DEBUG_TRACE_LOGSZ * 1024;
++	void *p;
++	int ret;
++
++	p = vmalloc(size);
++	if (p == NULL)
++		return -ENOMEM;
++
++	ret = xnheap_init(&memory_pool, p, size);
++	if (ret)
++		return ret;
++
++	xnheap_set_name(&memory_pool, "debug log");
++
++	ret = xnvfile_init_regular("relax", &relax_vfile, &cobalt_debug_vfroot);
++	if (ret) {
++		xnheap_destroy(&memory_pool);
++		vfree(p);
++	}
++
++	return ret;
++}
++
++static inline void cleanup_trace_relax(void)
++{
++	void *p;
++
++	xnvfile_destroy_regular(&relax_vfile);
++	p = xnheap_get_membase(&memory_pool);
++	xnheap_destroy(&memory_pool);
++	vfree(p);
++}
++
++#else /* !CONFIG_XENO_OPT_DEBUG_TRACE_RELAX */
++
++static inline int init_trace_relax(void)
++{
++	return 0;
++}
++
++static inline void cleanup_trace_relax(void)
++{
++}
++
++static inline void init_thread_relax_trace(struct xnthread *thread)
++{
++}
++
++#endif /* !XENO_OPT_DEBUG_TRACE_RELAX */
++
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++
++void xnlock_dbg_prepare_acquire(unsigned long long *start)
++{
++	*start = xnclock_read_raw(&nkclock);
++}
++EXPORT_SYMBOL_GPL(xnlock_dbg_prepare_acquire);
++
++void xnlock_dbg_acquired(struct xnlock *lock, int cpu, unsigned long long *start,
++			 const char *file, int line, const char *function)
++{
++	lock->lock_date = *start;
++	lock->spin_time = xnclock_read_raw(&nkclock) - *start;
++	lock->file = file;
++	lock->function = function;
++	lock->line = line;
++	lock->cpu = cpu;
++}
++EXPORT_SYMBOL_GPL(xnlock_dbg_acquired);
++
++int xnlock_dbg_release(struct xnlock *lock,
++		       const char *file, int line, const char *function)
++{
++	unsigned long long lock_time;
++	struct xnlockinfo *stats;
++	int cpu;
++
++	lock_time = xnclock_read_raw(&nkclock) - lock->lock_date;
++	cpu = ipipe_processor_id();
++	stats = &per_cpu(xnlock_stats, cpu);
++
++	if (lock->file == NULL) {
++		lock->file = "??";
++		lock->line = 0;
++		lock->function = "invalid";
++	}
++
++	if (unlikely(lock->owner != cpu)) {
++		ipipe_prepare_panic();
++		printk(XENO_ERR "lock %p already unlocked on CPU #%d\n"
++				"          last owner = %s:%u (%s(), CPU #%d)\n",
++		       lock, cpu, lock->file, lock->line, lock->function,
++		       lock->cpu);
++		show_stack(NULL,NULL);
++		return 1;
++	}
++
++	/* File that we released it. */
++	lock->cpu = -lock->cpu;
++	lock->file = file;
++	lock->line = line;
++	lock->function = function;
++
++	if (lock_time > stats->lock_time) {
++		stats->lock_time = lock_time;
++		stats->spin_time = lock->spin_time;
++		stats->file = lock->file;
++		stats->function = lock->function;
++		stats->line = lock->line;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(xnlock_dbg_release);
++
++#endif /* CONFIG_XENO_OPT_DEBUG_LOCKING */
++
++void xndebug_shadow_init(struct xnthread *thread)
++{
++	struct cobalt_ppd *sys_ppd;
++	size_t len;
++
++	sys_ppd = cobalt_ppd_get(0);
++	/*
++	 * The caller is current, so we know for sure that sys_ppd
++	 * will still be valid after we dropped the lock.
++	 *
++	 * NOTE: Kernel shadows all share the system global ppd
++	 * descriptor with no refcounting.
++	 */
++	thread->exe_path = sys_ppd->exe_path ?: "(unknown)";
++	/*
++	 * The program hash value is a unique token debug features may
++	 * use to identify all threads which belong to a given
++	 * executable file. Using this value for quick probes is often
++	 * handier and more efficient than testing the whole exe_path.
++	 */
++	len = strlen(thread->exe_path);
++	thread->proghash = jhash(thread->exe_path, len, 0);
++}
++
++int xndebug_init(void)
++{
++	int ret;
++
++	ret = init_trace_relax();
++	if (ret)
++		return ret;
++
++	return 0;
++}
++
++void xndebug_cleanup(void)
++{
++	cleanup_trace_relax();
++}
++
++/** @} */
+--- linux/kernel/xenomai/procfs.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/procfs.c	2021-04-07 16:01:25.744636285 +0800
+@@ -0,0 +1,262 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <cobalt/kernel/lock.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/apc.h>
++#include <cobalt/kernel/vfile.h>
++#include <cobalt/kernel/intr.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/timer.h>
++#include <cobalt/kernel/sched.h>
++#include <xenomai/version.h>
++#include "debug.h"
++
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++
++static int lock_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct xnlockinfo lockinfo;
++	spl_t s;
++	int cpu;
++
++	for_each_realtime_cpu(cpu) {
++		xnlock_get_irqsave(&nklock, s);
++		lockinfo = per_cpu(xnlock_stats, cpu);
++		xnlock_put_irqrestore(&nklock, s);
++
++		if (cpu > 0)
++			xnvfile_printf(it, "\n");
++
++		xnvfile_printf(it, "CPU%d:\n", cpu);
++
++		xnvfile_printf(it,
++			     "  longest locked section: %llu ns\n"
++			     "  spinning time: %llu ns\n"
++			     "  section entry: %s:%d (%s)\n",
++			       xnclock_ticks_to_ns(&nkclock, lockinfo.lock_time),
++			       xnclock_ticks_to_ns(&nkclock, lockinfo.spin_time),
++			       lockinfo.file, lockinfo.line, lockinfo.function);
++	}
++
++	return 0;
++}
++
++static ssize_t lock_vfile_store(struct xnvfile_input *input)
++{
++	ssize_t ret;
++	spl_t s;
++	int cpu;
++
++	long val;
++
++	ret = xnvfile_get_integer(input, &val);
++	if (ret < 0)
++		return ret;
++
++	if (val != 0)
++		return -EINVAL;
++
++	for_each_realtime_cpu(cpu) {
++		xnlock_get_irqsave(&nklock, s);
++		memset(&per_cpu(xnlock_stats, cpu), '\0', sizeof(struct xnlockinfo));
++		xnlock_put_irqrestore(&nklock, s);
++	}
++
++	return ret;
++}
++
++static struct xnvfile_regular_ops lock_vfile_ops = {
++	.show = lock_vfile_show,
++	.store = lock_vfile_store,
++};
++
++static struct xnvfile_regular lock_vfile = {
++	.ops = &lock_vfile_ops,
++};
++
++#endif /* CONFIG_XENO_OPT_DEBUG_LOCKING */
++
++static int latency_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	xnvfile_printf(it, "%Lu\n",
++		       xnclock_ticks_to_ns(&nkclock, nkclock.gravity.user));
++
++	return 0;
++}
++
++static ssize_t latency_vfile_store(struct xnvfile_input *input)
++{
++	ssize_t ret;
++	long val;
++
++	ret = xnvfile_get_integer(input, &val);
++	if (ret < 0)
++		return ret;
++
++	nkclock.gravity.user = xnclock_ns_to_ticks(&nkclock, val);
++
++	return ret;
++}
++
++static struct xnvfile_regular_ops latency_vfile_ops = {
++	.show = latency_vfile_show,
++	.store = latency_vfile_store,
++};
++
++static struct xnvfile_regular latency_vfile = {
++	.ops = &latency_vfile_ops,
++};
++
++static int version_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	xnvfile_printf(it, "%s\n", XENO_VERSION_STRING);
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops version_vfile_ops = {
++	.show = version_vfile_show,
++};
++
++static struct xnvfile_regular version_vfile = {
++	.ops = &version_vfile_ops,
++};
++
++static int faults_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	int cpu, trap;
++
++	xnvfile_puts(it, "TRAP ");
++
++	for_each_realtime_cpu(cpu)
++		xnvfile_printf(it, "        CPU%d", cpu);
++
++	for (trap = 0; cobalt_machine.fault_labels[trap]; trap++) {
++		if (*cobalt_machine.fault_labels[trap] == '\0')
++			continue;
++
++		xnvfile_printf(it, "\n%3d: ", trap);
++
++		for_each_realtime_cpu(cpu)
++			xnvfile_printf(it, "%12u",
++				       per_cpu(cobalt_machine_cpudata, cpu).faults[trap]);
++
++		xnvfile_printf(it, "    (%s)",
++			       cobalt_machine.fault_labels[trap]);
++	}
++
++	xnvfile_putc(it, '\n');
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops faults_vfile_ops = {
++	.show = faults_vfile_show,
++};
++
++static struct xnvfile_regular faults_vfile = {
++	.ops = &faults_vfile_ops,
++};
++
++static int apc_vfile_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	int cpu, apc;
++
++	/* We assume the entire output fits in a single page. */
++
++	xnvfile_puts(it, "APC  ");
++
++	for_each_realtime_cpu(cpu)
++		xnvfile_printf(it, "        CPU%d", cpu);
++
++	for (apc = 0; apc < BITS_PER_LONG; apc++) {
++		if (!test_bit(apc, &cobalt_pipeline.apc_map))
++			continue; /* Not hooked. */
++
++		xnvfile_printf(it, "\n%3d: ", apc);
++
++		for_each_realtime_cpu(cpu)
++			xnvfile_printf(it, "%12lu",
++				       per_cpu(cobalt_machine_cpudata, cpu).apc_shots[apc]);
++
++		if (cobalt_pipeline.apc_table[apc].name)
++			xnvfile_printf(it, "    (%s)",
++				       cobalt_pipeline.apc_table[apc].name);
++	}
++
++	xnvfile_putc(it, '\n');
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops apc_vfile_ops = {
++	.show = apc_vfile_show,
++};
++
++static struct xnvfile_regular apc_vfile = {
++	.ops = &apc_vfile_ops,
++};
++
++void xnprocfs_cleanup_tree(void)
++{
++#ifdef CONFIG_XENO_OPT_DEBUG
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++	xnvfile_destroy_regular(&lock_vfile);
++#endif
++	xnvfile_destroy_dir(&cobalt_debug_vfroot);
++#endif /* XENO_OPT_DEBUG */
++	xnvfile_destroy_regular(&apc_vfile);
++	xnvfile_destroy_regular(&faults_vfile);
++	xnvfile_destroy_regular(&version_vfile);
++	xnvfile_destroy_regular(&latency_vfile);
++	xnintr_cleanup_proc();
++	xnheap_cleanup_proc();
++	xnclock_cleanup_proc();
++	xnsched_cleanup_proc();
++	xnvfile_destroy_root();
++}
++
++int __init xnprocfs_init_tree(void)
++{
++	int ret;
++
++	ret = xnvfile_init_root();
++	if (ret)
++		return ret;
++
++	ret = xnsched_init_proc();
++	if (ret)
++		return ret;
++
++	xnclock_init_proc();
++	xnheap_init_proc();
++	xnintr_init_proc();
++	xnvfile_init_regular("latency", &latency_vfile, &cobalt_vfroot);
++	xnvfile_init_regular("version", &version_vfile, &cobalt_vfroot);
++	xnvfile_init_regular("faults", &faults_vfile, &cobalt_vfroot);
++	xnvfile_init_regular("apc", &apc_vfile, &cobalt_vfroot);
++#ifdef CONFIG_XENO_OPT_DEBUG
++	xnvfile_init_dir("debug", &cobalt_debug_vfroot, &cobalt_vfroot);
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++	xnvfile_init_regular("lock", &lock_vfile, &cobalt_debug_vfroot);
++#endif
++#endif
++
++	return 0;
++}
+--- linux/kernel/xenomai/lock.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/kernel/xenomai/lock.c	2021-04-07 16:01:25.739636292 +0800
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (C) 2001-2012 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2004,2005 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <cobalt/kernel/lock.h>
++
++/**
++ * @ingroup cobalt_core
++ * @defgroup cobalt_core_lock Locking services
++ *
++ * The Xenomai core deals with concurrent activities from two distinct
++ * kernels running side-by-side. When interrupts are involved, the
++ * services from this section control the @b hard interrupt state
++ * exclusively, for protecting against processor-local or SMP
++ * concurrency.
++ *
++ * @note In a dual kernel configuration, <i>hard interrupts</i> are
++ * gated by the CPU. When enabled, hard interrupts are immediately
++ * delivered to the Xenomai core if they belong to a real-time source,
++ * or deferred until enabled by a second-stage virtual interrupt mask,
++ * if they belong to regular Linux devices/sources.
++ *
++ * @{
++ */
++DEFINE_XNLOCK(nklock);
++#if defined(CONFIG_SMP) || defined(CONFIG_XENO_OPT_DEBUG_LOCKING)
++EXPORT_SYMBOL_GPL(nklock);
++
++#ifdef CONFIG_XENO_ARCH_OUTOFLINE_XNLOCK
++int ___xnlock_get(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	return ____xnlock_get(lock /* , */ XNLOCK_DBG_PASS_CONTEXT);
++}
++EXPORT_SYMBOL_GPL(___xnlock_get);
++
++void ___xnlock_put(struct xnlock *lock /*, */ XNLOCK_DBG_CONTEXT_ARGS)
++{
++	____xnlock_put(lock /* , */ XNLOCK_DBG_PASS_CONTEXT);
++}
++EXPORT_SYMBOL_GPL(___xnlock_put);
++#endif /* out of line xnlock */
++#endif /* CONFIG_SMP || XENO_DEBUG(LOCKING) */
++
++#ifdef CONFIG_XENO_OPT_DEBUG_LOCKING
++DEFINE_PER_CPU(struct xnlockinfo, xnlock_stats);
++EXPORT_PER_CPU_SYMBOL_GPL(xnlock_stats);
++#endif
++
++/** @} */
+--- linux/kernel/Makefile	2021-04-07 16:00:26.635720743 +0800
++++ linux-patched/kernel/Makefile	2021-04-07 16:01:25.595636498 +0800
+@@ -126,3 +126,5 @@
+ targets += config_data.h
+ $(obj)/config_data.h: $(obj)/config_data.gz FORCE
+ 	$(call filechk,ikconfiggz)
++
++obj-$(CONFIG_XENOMAI) += xenomai/
+--- linux/drivers/xenomai/testing/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/Makefile	2021-04-07 16:01:28.041633003 +0800
+@@ -0,0 +1,13 @@
++
++obj-$(CONFIG_XENO_DRIVERS_TIMERBENCH) += xeno_timerbench.o
++obj-$(CONFIG_XENO_DRIVERS_SWITCHTEST) += xeno_switchtest.o
++obj-$(CONFIG_XENO_DRIVERS_RTDMTEST)   += xeno_rtdmtest.o
++obj-$(CONFIG_XENO_DRIVERS_HEAPCHECK)   += xeno_heapcheck.o
++
++xeno_timerbench-y := timerbench.o
++
++xeno_switchtest-y := switchtest.o
++
++xeno_rtdmtest-y := rtdmtest.o
++
++xeno_heapcheck-y := heapcheck.o
+--- linux/drivers/xenomai/testing/heapcheck.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/heapcheck.c	2021-04-07 16:01:28.036633010 +0800
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (C) 2018 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++#include <linux/kernel.h>
++#include <linux/random.h>
++#include <cobalt/kernel/assert.h>
++#include <cobalt/kernel/heap.h>
++#include <rtdm/uapi/testing.h>
++#include <rtdm/driver.h>
++
++#define complain(__fmt, __args...)	\
++	printk(XENO_WARNING "heap check: " __fmt "\n", ##__args)
++
++static struct xnheap test_heap = {
++	.name = "test_heap"
++};
++
++enum pattern {
++	alphabet_series,
++	digit_series,
++	binary_series,
++};
++
++struct chunk {
++	void *ptr;
++	enum pattern pattern;
++};
++
++struct runstats {
++	struct rttst_heap_stats stats;
++	struct runstats *next;
++};
++
++static struct runstats *statistics;
++
++static int nrstats;
++
++static inline void breathe(int loops)
++{
++	if ((loops % 1000) == 0)
++		rtdm_task_sleep(300000ULL);
++}
++
++static inline void do_swap(void *left, void *right, const size_t size)
++{
++	char trans[size];
++
++	memcpy(trans, left, size);
++	memcpy(left, right, size);
++	memcpy(right, trans, size);
++}
++
++static void random_shuffle(void *vbase, size_t nmemb, const size_t size)
++{
++	struct {
++		char x[size];
++	} __attribute__((packed)) *base = vbase;
++	unsigned int j, k;
++
++	for (j = nmemb; j > 0; j--) {
++		k = (unsigned int)(prandom_u32() % nmemb) + 1;
++		if (j == k)
++			continue;
++		do_swap(&base[j - 1], &base[k - 1], size);
++	}
++}
++
++static void fill_pattern(char *p, size_t size, enum pattern pat)
++{
++	unsigned int val, count;
++
++	switch (pat) {
++	case alphabet_series:
++		val = 'a';
++		count = 26;
++		break;
++	case digit_series:
++		val = '0';
++		count = 10;
++		break;
++	default:
++		val = 0;
++		count = 255;
++		break;
++	}
++
++	while (size-- > 0) {
++		*p++ = (char)(val % count);
++		val++;
++	}
++}
++
++static int check_pattern(const char *p, size_t size, enum pattern pat)
++{
++	unsigned int val, count;
++
++	switch (pat) {
++	case alphabet_series:
++		val = 'a';
++		count = 26;
++		break;
++	case digit_series:
++		val = '0';
++		count = 10;
++		break;
++	default:
++		val = 0;
++		count = 255;
++		break;
++	}
++
++	while (size-- > 0) {
++		if (*p++ != (char)(val % count))
++			return 0;
++		val++;
++	}
++
++	return 1;
++}
++
++static size_t find_largest_free(size_t free_size, size_t block_size)
++{
++	void *p;
++
++	for (;;) {
++		p = xnheap_alloc(&test_heap, free_size);
++		if (p) {
++			xnheap_free(&test_heap, p);
++			break;
++		}
++		if (free_size <= block_size)
++			break;
++		free_size -= block_size;
++	}
++
++	return free_size;
++}
++
++static int test_seq(size_t heap_size, size_t block_size, int flags)
++{
++	long alloc_sum_ns, alloc_avg_ns, free_sum_ns, free_avg_ns,
++		alloc_max_ns, free_max_ns, d;
++	size_t user_size, largest_free, maximum_free, freed;
++	int ret, n, k, maxblocks, nrblocks;
++	nanosecs_rel_t start, end;
++	struct chunk *chunks;
++	struct runstats *st;
++	bool done_frag;
++	void *mem, *p;
++
++	maxblocks = heap_size / block_size;
++
++	mem = vmalloc(heap_size);
++	if (mem == NULL)
++		return -ENOMEM;
++
++	ret = xnheap_init(&test_heap, mem, heap_size);
++	if (ret) {
++		complain("cannot init heap with size %zu",
++		       heap_size);
++		goto out;
++	}
++
++	chunks = vmalloc(sizeof(*chunks) * maxblocks);
++	if (chunks == NULL) {
++		ret = -ENOMEM;
++		goto no_chunks;
++	}
++	memset(chunks, 0, sizeof(*chunks) * maxblocks);
++
++	ret = xnthread_harden();
++	if (ret)
++		goto done;
++
++	if (xnheap_get_size(&test_heap) != heap_size) {
++		complain("memory size inconsistency (%zu / %zu bytes)",
++			 heap_size, xnheap_get_size(&test_heap));
++		goto bad;
++	}
++
++	user_size = 0;
++	alloc_avg_ns = 0;
++	free_avg_ns = 0;
++	alloc_max_ns = 0;
++	free_max_ns = 0;
++	maximum_free = 0;
++	largest_free = 0;
++
++	for (n = 0, alloc_sum_ns = 0; ; n++) {
++		start = rtdm_clock_read_monotonic();
++		p = xnheap_alloc(&test_heap, block_size);
++		end = rtdm_clock_read_monotonic();
++		d = end - start;
++		if (d > alloc_max_ns)
++			alloc_max_ns = d;
++		alloc_sum_ns += d;
++		if (p == NULL)
++			break;
++		user_size += block_size;
++		if (n >= maxblocks) {
++			complain("too many blocks fetched"
++			       " (heap=%zu, block=%zu, "
++			       "got more than %d blocks)",
++			       heap_size, block_size, maxblocks);
++			goto bad;
++		}
++		chunks[n].ptr = p;
++		if (flags & RTTST_HEAPCHECK_PATTERN) {
++			chunks[n].pattern = (enum pattern)(prandom_u32() % 3);
++			fill_pattern(chunks[n].ptr, block_size, chunks[n].pattern);
++		}
++		breathe(n);
++	}
++
++	nrblocks = n;
++	if (nrblocks == 0)
++		goto do_stats;
++
++	if ((flags & RTTST_HEAPCHECK_ZEROOVRD) && nrblocks != maxblocks) {
++		complain("too few blocks fetched, unexpected overhead"
++			 " (heap=%zu, block=%zu, "
++			 "got %d, less than %d blocks)",
++			 heap_size, block_size, nrblocks, maxblocks);
++		goto bad;
++	}
++
++	breathe(0);
++
++	/* Make sure we did not trash any busy block while allocating. */
++	if (flags & RTTST_HEAPCHECK_PATTERN) {
++		for (n = 0; n < nrblocks; n++) {
++			if (!check_pattern(chunks[n].ptr, block_size,
++					   chunks[n].pattern)) {
++				complain("corrupted block #%d on alloc"
++					 " sequence (pattern %d)",
++					 n, chunks[n].pattern);
++				goto bad;
++			}
++			breathe(n);
++		}
++	}
++	
++	if (flags & RTTST_HEAPCHECK_SHUFFLE)
++		random_shuffle(chunks, nrblocks, sizeof(*chunks));
++
++	/*
++	 * Release all blocks.
++	 */
++	for (n = 0, free_sum_ns = 0, freed = 0, done_frag = false;
++	     n < nrblocks; n++) {
++		start = rtdm_clock_read_monotonic();
++		xnheap_free(&test_heap, chunks[n].ptr);
++		end = rtdm_clock_read_monotonic();
++		d = end - start;
++		if (d > free_max_ns)
++			free_max_ns = d;
++		free_sum_ns += d;
++		chunks[n].ptr = NULL;
++		/* Make sure we did not trash busy blocks while freeing. */
++		if (flags & RTTST_HEAPCHECK_PATTERN) {
++			for (k = 0; k < nrblocks; k++) {
++				if (chunks[k].ptr &&
++				    !check_pattern(chunks[k].ptr, block_size,
++						   chunks[k].pattern)) {
++					complain("corrupted block #%d on release"
++						 " sequence (pattern %d)",
++						 k, chunks[k].pattern);
++					goto bad;
++				}
++				breathe(k);
++			}
++		}
++		freed += block_size;
++		/*
++		 * Get a sense of the fragmentation for the tested
++		 * allocation pattern, heap and block sizes when half
++		 * of the usable heap size should be available to us.
++		 * NOTE: user_size excludes the overhead, this is
++		 * actually what we managed to get from the current
++		 * heap out of the allocation loop.
++		 */
++		if (!done_frag && freed >= user_size / 2) {
++			/* Calculate the external fragmentation. */
++			largest_free = find_largest_free(freed, block_size);
++			maximum_free = freed;
++			done_frag = true;
++		}
++		breathe(n);
++	}
++
++	/*
++	 * If the deallocation mechanism is broken, we might not be
++	 * able to reproduce the same allocation pattern with the same
++	 * outcome, check this.
++	 */
++	if (flags & RTTST_HEAPCHECK_HOT) {
++		for (n = 0, alloc_max_ns = alloc_sum_ns = 0; ; n++) {
++			start = rtdm_clock_read_monotonic();
++			p = xnheap_alloc(&test_heap, block_size);
++			end = rtdm_clock_read_monotonic();
++			d = end - start;
++			if (d > alloc_max_ns)
++				alloc_max_ns = d;
++			alloc_sum_ns += d;
++			if (p == NULL)
++				break;
++			if (n >= maxblocks) {
++				complain("too many blocks fetched during hot pass"
++					 " (heap=%zu, block=%zu, "
++					 "got more than %d blocks)",
++					 heap_size, block_size, maxblocks);
++				goto bad;
++			}
++			chunks[n].ptr = p;
++			breathe(n);
++		}
++		if (n != nrblocks) {
++			complain("inconsistent block count fetched"
++				 " during hot pass (heap=%zu, block=%zu, "
++				 "got %d blocks vs %d during alloc)",
++				 heap_size, block_size, n, nrblocks);
++			goto bad;
++		}
++		for (n = 0, free_max_ns = free_sum_ns = 0; n < nrblocks; n++) {
++			start = rtdm_clock_read_monotonic();
++			xnheap_free(&test_heap, chunks[n].ptr);
++			end = rtdm_clock_read_monotonic();
++			d = end - start;
++			if (d > free_max_ns)
++				free_max_ns = d;
++			free_sum_ns += d;
++			breathe(n);
++		}
++	}
++
++	alloc_avg_ns = alloc_sum_ns / nrblocks;
++	free_avg_ns = free_sum_ns / nrblocks;
++
++	if ((flags & RTTST_HEAPCHECK_ZEROOVRD) && heap_size != user_size) {
++		complain("unexpected overhead reported");
++		goto bad;
++	}
++
++	if (xnheap_get_used(&test_heap) > 0) {
++		complain("memory leakage reported: %zu bytes missing",
++			 xnheap_get_used(&test_heap));
++		goto bad;
++	}
++		
++do_stats:
++	xnthread_relax(0, 0);
++	ret = 0;
++	/*
++	 * Don't report stats when running a pattern check, timings
++	 * are affected.
++	 */
++	if (!(flags & RTTST_HEAPCHECK_PATTERN)) {
++		st = kmalloc(sizeof(*st), GFP_KERNEL);
++		if (st == NULL) {
++			complain("failed allocating memory");
++			ret = -ENOMEM;
++			goto out;
++		}
++		st->stats.heap_size = heap_size;
++		st->stats.user_size = user_size;
++		st->stats.block_size = block_size;
++		st->stats.nrblocks = nrblocks;
++		st->stats.alloc_avg_ns = alloc_avg_ns;
++		st->stats.alloc_max_ns = alloc_max_ns;
++		st->stats.free_avg_ns = free_avg_ns;
++		st->stats.free_max_ns = free_max_ns;
++		st->stats.maximum_free = maximum_free;
++		st->stats.largest_free = largest_free;
++		st->stats.flags = flags;
++		st->next = statistics;
++		statistics = st;
++		nrstats++;
++	}
++
++done:
++	vfree(chunks);
++no_chunks:
++	xnheap_destroy(&test_heap);
++out:
++	vfree(mem);
++
++	return ret;
++bad:
++	xnthread_relax(0, 0);
++	ret = -EPROTO;
++	goto done;
++}
++
++static int collect_stats(struct rtdm_fd *fd,
++			 struct rttst_heap_stats __user *buf, int nr)
++{
++	struct runstats *p, *next;
++	int ret, n;
++
++	if (nr < 0)
++		return -EINVAL;
++
++	for (p = statistics, n = nr; p && n > 0 && nrstats > 0;
++	     n--, nrstats--, p = next, buf += sizeof(p->stats)) {
++		ret = rtdm_copy_to_user(fd, buf, &p->stats, sizeof(p->stats));
++		if (ret)
++			return ret;
++		next = p->next;
++		statistics = next;
++		kfree(p);
++	}
++
++	return nr - n;
++}
++
++static void heapcheck_close(struct rtdm_fd *fd)
++{
++	struct runstats *p, *next;
++
++	for (p = statistics; p; p = next) {
++		next = p->next;
++		kfree(p);
++	}
++
++	statistics = NULL;
++}
++
++static int heapcheck_ioctl(struct rtdm_fd *fd,
++			   unsigned int request, void __user *arg)
++{
++	struct rttst_heap_stathdr sthdr;
++	struct rttst_heap_parms parms;
++	int ret;
++
++	switch (request) {
++	case RTTST_RTIOC_HEAP_CHECK:
++		ret = rtdm_copy_from_user(fd, &parms, arg, sizeof(parms));
++		if (ret)
++			return ret;
++		ret = test_seq(parms.heap_size,
++			       parms.block_size,
++			       parms.flags);
++		if (ret)
++			return ret;
++		parms.nrstats = nrstats;
++		ret = rtdm_copy_to_user(fd, arg, &parms, sizeof(parms));
++		break;
++	case RTTST_RTIOC_HEAP_STAT_COLLECT:
++		sthdr.buf = NULL;
++		ret = rtdm_copy_from_user(fd, &sthdr, arg, sizeof(sthdr));
++		if (ret)
++			return ret;
++		ret = collect_stats(fd, sthdr.buf, sthdr.nrstats);
++		if (ret < 0)
++			return ret;
++		sthdr.nrstats = ret;
++		ret = rtdm_copy_to_user(fd, arg, &sthdr, sizeof(sthdr));
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static struct rtdm_driver heapcheck_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(heap_check,
++						    RTDM_CLASS_TESTING,
++						    RTDM_SUBCLASS_HEAPCHECK,
++						    RTTST_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.device_count		= 1,
++	.ops = {
++		.close		= heapcheck_close,
++		.ioctl_nrt	= heapcheck_ioctl,
++	},
++};
++
++static struct rtdm_device heapcheck_device = {
++	.driver = &heapcheck_driver,
++	.label = "heapcheck",
++};
++
++static int __init heapcheck_init(void)
++{
++	return rtdm_dev_register(&heapcheck_device);
++}
++
++static void __exit heapcheck_exit(void)
++{
++	rtdm_dev_unregister(&heapcheck_device);
++}
++
++module_init(heapcheck_init);
++module_exit(heapcheck_exit);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/testing/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/Kconfig	2021-04-07 16:01:28.031633017 +0800
+@@ -0,0 +1,29 @@
++menu "Testing drivers"
++
++config XENO_DRIVERS_TIMERBENCH
++	tristate "Timer benchmark driver"
++	default y
++	help
++	Kernel-based benchmark driver for timer latency evaluation.
++	See testsuite/latency for a possible front-end.
++
++config XENO_DRIVERS_SWITCHTEST
++	tristate "Context switch unit testing driver"
++	default y
++	help
++	Kernel-based driver for unit testing context switches and
++	FPU switches.
++
++config XENO_DRIVERS_HEAPCHECK
++	tristate "Memory allocator test driver"
++	default y
++	help
++	Kernel-based driver for testing Cobalt's memory allocator.
++
++config XENO_DRIVERS_RTDMTEST
++	depends on m
++	tristate "RTDM unit tests driver"
++	help
++	Kernel driver for performing RTDM unit tests.
++
++endmenu
+--- linux/drivers/xenomai/testing/switchtest.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/switchtest.c	2021-04-07 16:01:28.027633023 +0800
+@@ -0,0 +1,752 @@
++/*
++ * Copyright (C) 2010 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/semaphore.h>
++#include <cobalt/kernel/sched.h>
++#include <cobalt/kernel/synch.h>
++#include <cobalt/kernel/thread.h>
++#include <cobalt/kernel/trace.h>
++#include <rtdm/testing.h>
++#include <rtdm/driver.h>
++#include <asm/xenomai/fptest.h>
++
++MODULE_DESCRIPTION("Cobalt context switch test helper");
++MODULE_AUTHOR("Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>");
++MODULE_VERSION("0.1.1");
++MODULE_LICENSE("GPL");
++
++#define RTSWITCH_RT      0x10000
++#define RTSWITCH_NRT     0
++#define RTSWITCH_KERNEL  0x20000
++
++struct rtswitch_task {
++	struct rttst_swtest_task base;
++	rtdm_event_t rt_synch;
++	struct semaphore nrt_synch;
++	struct xnthread ktask;          /* For kernel-space real-time tasks. */
++	unsigned int last_switch;
++};
++
++struct rtswitch_context {
++	struct rtswitch_task *tasks;
++	unsigned int tasks_count;
++	unsigned int next_index;
++	struct semaphore lock;
++	unsigned int cpu;
++	unsigned int switches_count;
++
++	unsigned long pause_us;
++	unsigned int next_task;
++	rtdm_timer_t wake_up_delay;
++
++	unsigned int failed;
++	struct rttst_swtest_error error;
++
++	struct rtswitch_task *utask;
++	rtdm_nrtsig_t wake_utask;
++};
++
++static int fp_features;
++
++static int report(const char *fmt, ...)
++{
++	va_list ap;
++	int ret;
++
++	va_start(ap, fmt);
++	ret = vprintk(fmt, ap);
++	va_end(ap);
++
++	return ret;
++}
++
++static void handle_ktask_error(struct rtswitch_context *ctx, unsigned int fp_val)
++{
++	struct rtswitch_task *cur = &ctx->tasks[ctx->error.last_switch.to];
++	unsigned int i;
++
++	ctx->failed = 1;
++	ctx->error.fp_val = fp_val;
++
++	if ((cur->base.flags & RTSWITCH_RT) == RTSWITCH_RT)
++		for (i = 0; i < ctx->tasks_count; i++) {
++			struct rtswitch_task *task = &ctx->tasks[i];
++
++			/* Find the first non kernel-space task. */
++			if ((task->base.flags & RTSWITCH_KERNEL))
++				continue;
++
++			/* Unblock it. */
++			switch(task->base.flags & RTSWITCH_RT) {
++			case RTSWITCH_NRT:
++				ctx->utask = task;
++				rtdm_nrtsig_pend(&ctx->wake_utask);
++				break;
++
++			case RTSWITCH_RT:
++				rtdm_event_signal(&task->rt_synch);
++				break;
++			}
++
++			xnthread_suspend(&cur->ktask,
++					 XNSUSP, XN_INFINITE, XN_RELATIVE, NULL);
++		}
++}
++
++static int rtswitch_pend_rt(struct rtswitch_context *ctx,
++			    unsigned int idx)
++{
++	struct rtswitch_task *task;
++	int rc;
++
++	if (idx > ctx->tasks_count)
++		return -EINVAL;
++
++	task = &ctx->tasks[idx];
++	task->base.flags |= RTSWITCH_RT;
++
++	rc = rtdm_event_wait(&task->rt_synch);
++	if (rc < 0)
++		return rc;
++
++	if (ctx->failed)
++		return 1;
++
++	return 0;
++}
++
++static void timed_wake_up(rtdm_timer_t *timer)
++{
++	struct rtswitch_context *ctx =
++		container_of(timer, struct rtswitch_context, wake_up_delay);
++	struct rtswitch_task *task;
++
++	task = &ctx->tasks[ctx->next_task];
++
++	switch (task->base.flags & RTSWITCH_RT) {
++	case RTSWITCH_NRT:
++		ctx->utask = task;
++		rtdm_nrtsig_pend(&ctx->wake_utask);
++		break;
++
++	case RTSWITCH_RT:
++		rtdm_event_signal(&task->rt_synch);
++	}
++}
++
++static int rtswitch_to_rt(struct rtswitch_context *ctx,
++			  unsigned int from_idx,
++			  unsigned int to_idx)
++{
++	struct rtswitch_task *from, *to;
++	int rc;
++
++	if (from_idx > ctx->tasks_count || to_idx > ctx->tasks_count)
++		return -EINVAL;
++
++	/* to == from is a special case which means
++	   "return to the previous task". */
++	if (to_idx == from_idx)
++		to_idx = ctx->error.last_switch.from;
++
++	from = &ctx->tasks[from_idx];
++	to = &ctx->tasks[to_idx];
++
++	from->base.flags |= RTSWITCH_RT;
++	from->last_switch = ++ctx->switches_count;
++	ctx->error.last_switch.from = from_idx;
++	ctx->error.last_switch.to = to_idx;
++	barrier();
++
++	if (ctx->pause_us) {
++		ctx->next_task = to_idx;
++		barrier();
++		rtdm_timer_start(&ctx->wake_up_delay,
++				 ctx->pause_us * 1000, 0,
++				 RTDM_TIMERMODE_RELATIVE);
++		xnsched_lock();
++	} else
++		switch (to->base.flags & RTSWITCH_RT) {
++		case RTSWITCH_NRT:
++			ctx->utask = to;
++			barrier();
++			rtdm_nrtsig_pend(&ctx->wake_utask);
++			xnsched_lock();
++			break;
++
++		case RTSWITCH_RT:
++			xnsched_lock();
++			rtdm_event_signal(&to->rt_synch);
++			break;
++
++		default:
++			return -EINVAL;
++		}
++
++	rc = rtdm_event_wait(&from->rt_synch);
++	xnsched_unlock();
++
++	if (rc < 0)
++		return rc;
++
++	if (ctx->failed)
++		return 1;
++
++	return 0;
++}
++
++static int rtswitch_pend_nrt(struct rtswitch_context *ctx,
++			     unsigned int idx)
++{
++	struct rtswitch_task *task;
++
++	if (idx > ctx->tasks_count)
++		return -EINVAL;
++
++	task = &ctx->tasks[idx];
++
++	task->base.flags &= ~RTSWITCH_RT;
++
++	if (down_interruptible(&task->nrt_synch))
++		return -EINTR;
++
++	if (ctx->failed)
++		return 1;
++
++	return 0;
++}
++
++static int rtswitch_to_nrt(struct rtswitch_context *ctx,
++			   unsigned int from_idx,
++			   unsigned int to_idx)
++{
++	struct rtswitch_task *from, *to;
++	unsigned int expected, fp_val;
++	int fp_check;
++
++	if (from_idx > ctx->tasks_count || to_idx > ctx->tasks_count)
++		return -EINVAL;
++
++	/* to == from is a special case which means
++	   "return to the previous task". */
++	if (to_idx == from_idx)
++		to_idx = ctx->error.last_switch.from;
++
++	from = &ctx->tasks[from_idx];
++	to = &ctx->tasks[to_idx];
++
++	fp_check = ctx->switches_count == from->last_switch + 1
++		&& ctx->error.last_switch.from == to_idx
++		&& ctx->error.last_switch.to == from_idx;
++
++	from->base.flags &= ~RTSWITCH_RT;
++	from->last_switch = ++ctx->switches_count;
++	ctx->error.last_switch.from = from_idx;
++	ctx->error.last_switch.to = to_idx;
++	barrier();
++
++	if (ctx->pause_us) {
++		ctx->next_task = to_idx;
++		barrier();
++		rtdm_timer_start(&ctx->wake_up_delay,
++				 ctx->pause_us * 1000, 0,
++				 RTDM_TIMERMODE_RELATIVE);
++	} else
++		switch (to->base.flags & RTSWITCH_RT) {
++		case RTSWITCH_NRT:
++		switch_to_nrt:
++			up(&to->nrt_synch);
++			break;
++
++		case RTSWITCH_RT:
++
++			if (!fp_check || fp_linux_begin() < 0) {
++				fp_check = 0;
++				goto signal_nofp;
++			}
++
++			expected = from_idx + 500 +
++				(ctx->switches_count % 4000000) * 1000;
++
++			fp_regs_set(fp_features, expected);
++			rtdm_event_signal(&to->rt_synch);
++			fp_val = fp_regs_check(fp_features, expected, report);
++			fp_linux_end();
++
++			if(down_interruptible(&from->nrt_synch))
++				return -EINTR;
++			if (ctx->failed)
++				return 1;
++			if (fp_val != expected) {
++				handle_ktask_error(ctx, fp_val);
++				return 1;
++			}
++
++			from->base.flags &= ~RTSWITCH_RT;
++			from->last_switch = ++ctx->switches_count;
++			ctx->error.last_switch.from = from_idx;
++			ctx->error.last_switch.to = to_idx;
++			if ((to->base.flags & RTSWITCH_RT) == RTSWITCH_NRT)
++				goto switch_to_nrt;
++			expected = from_idx + 500 +
++				(ctx->switches_count % 4000000) * 1000;
++			barrier();
++
++			fp_linux_begin();
++			fp_regs_set(fp_features, expected);
++			rtdm_event_signal(&to->rt_synch);
++			fp_val = fp_regs_check(fp_features, expected, report);
++			fp_linux_end();
++
++			if (down_interruptible(&from->nrt_synch))
++				return -EINTR;
++			if (ctx->failed)
++				return 1;
++			if (fp_val != expected) {
++				handle_ktask_error(ctx, fp_val);
++				return 1;
++			}
++
++			from->base.flags &= ~RTSWITCH_RT;
++			from->last_switch = ++ctx->switches_count;
++			ctx->error.last_switch.from = from_idx;
++			ctx->error.last_switch.to = to_idx;
++			barrier();
++			if ((to->base.flags & RTSWITCH_RT) == RTSWITCH_NRT)
++				goto switch_to_nrt;
++
++		signal_nofp:
++			rtdm_event_signal(&to->rt_synch);
++			break;
++
++		default:
++			return -EINVAL;
++		}
++
++	if (down_interruptible(&from->nrt_synch))
++		return -EINTR;
++
++	if (ctx->failed)
++		return 1;
++
++	return 0;
++}
++
++static int rtswitch_set_tasks_count(struct rtswitch_context *ctx, unsigned int count)
++{
++	struct rtswitch_task *tasks;
++
++	if (ctx->tasks_count == count)
++		return 0;
++
++	tasks = vmalloc(count * sizeof(*tasks));
++
++	if (!tasks)
++		return -ENOMEM;
++
++	down(&ctx->lock);
++
++	if (ctx->tasks)
++		vfree(ctx->tasks);
++
++	ctx->tasks = tasks;
++	ctx->tasks_count = count;
++	ctx->next_index = 0;
++
++	up(&ctx->lock);
++
++	return 0;
++}
++
++static int rtswitch_register_task(struct rtswitch_context *ctx,
++				  struct rttst_swtest_task *arg)
++{
++	struct rtswitch_task *t;
++
++	down(&ctx->lock);
++
++	if (ctx->next_index == ctx->tasks_count) {
++		up(&ctx->lock);
++		return -EBUSY;
++	}
++
++	arg->index = ctx->next_index;
++	t = &ctx->tasks[arg->index];
++	ctx->next_index++;
++	t->base = *arg;
++	t->last_switch = 0;
++	sema_init(&t->nrt_synch, 0);
++	rtdm_event_init(&t->rt_synch, 0);
++
++	up(&ctx->lock);
++
++	return 0;
++}
++
++struct taskarg {
++	struct rtswitch_context *ctx;
++	struct rtswitch_task *task;
++};
++
++static void rtswitch_ktask(void *cookie)
++{
++	struct taskarg *arg = (struct taskarg *) cookie;
++	unsigned int fp_val, expected, to, i = 0;
++	struct rtswitch_context *ctx = arg->ctx;
++	struct rtswitch_task *task = arg->task;
++
++	to = task->base.index;
++
++	rtswitch_pend_rt(ctx, task->base.index);
++
++	while (!rtdm_task_should_stop()) {
++		if (task->base.flags & RTTST_SWTEST_USE_FPU)
++			fp_regs_set(fp_features, task->base.index + i * 1000);
++
++		switch(i % 3) {
++		case 0:
++			/* to == from means "return to last task" */
++			rtswitch_to_rt(ctx, task->base.index, task->base.index);
++			break;
++		case 1:
++			if (++to == task->base.index)
++				++to;
++			if (to > ctx->tasks_count - 1)
++				to = 0;
++			if (to == task->base.index)
++				++to;
++
++			/* Fall through. */
++		case 2:
++			rtswitch_to_rt(ctx, task->base.index, to);
++		}
++
++		if (task->base.flags & RTTST_SWTEST_USE_FPU) {
++			expected = task->base.index + i * 1000;
++			fp_val = fp_regs_check(fp_features, expected, report);
++
++			if (fp_val != expected) {
++				if (task->base.flags & RTTST_SWTEST_FREEZE)
++					xntrace_user_freeze(0, 0);
++				handle_ktask_error(ctx, fp_val);
++			}
++		}
++
++		if (++i == 4000000)
++			i = 0;
++	}
++}
++
++static int rtswitch_create_ktask(struct rtswitch_context *ctx,
++				 struct rttst_swtest_task *ptask)
++{
++	union xnsched_policy_param param;
++	struct xnthread_start_attr sattr;
++	struct xnthread_init_attr iattr;
++	struct rtswitch_task *task;
++	struct taskarg arg;
++	int init_flags;
++	char name[30];
++	int err;
++
++	/*
++	 * Silently disable FP tests in kernel if FPU is not supported
++	 * there. Typical case is math emulation support: we can use
++	 * it from userland as a synthetic FPU, but there is no sane
++	 * way to use it from kernel-based threads (Xenomai or Linux).
++	 */
++	if (!fp_kernel_supported())
++		ptask->flags &= ~RTTST_SWTEST_USE_FPU;
++
++	ptask->flags |= RTSWITCH_KERNEL;
++	err = rtswitch_register_task(ctx, ptask);
++
++	if (err)
++		return err;
++
++	ksformat(name, sizeof(name), "rtk%d/%u", ptask->index, ctx->cpu);
++
++	task = &ctx->tasks[ptask->index];
++
++	arg.ctx = ctx;
++	arg.task = task;
++
++	init_flags = (ptask->flags & RTTST_SWTEST_FPU) ? XNFPU : 0;
++
++	iattr.name = name;
++	iattr.flags = init_flags;
++	iattr.personality = &xenomai_personality;
++	iattr.affinity = *cpumask_of(ctx->cpu);
++	param.rt.prio = 1;
++
++	set_cpus_allowed_ptr(current, cpumask_of(ctx->cpu));
++
++	err = xnthread_init(&task->ktask,
++			    &iattr, &xnsched_class_rt, &param);
++	if (!err) {
++		sattr.mode = 0;
++		sattr.entry = rtswitch_ktask;
++		sattr.cookie = &arg;
++		err = xnthread_start(&task->ktask, &sattr);
++	} else
++		/*
++		 * In order to avoid calling xnthread_cancel() for an
++		 * invalid thread.
++		 */
++		task->base.flags = 0;
++	/*
++	 * Putting the argument on stack is safe, because the new
++	 * thread, thanks to the above call to set_cpus_allowed_ptr(),
++	 * will preempt the current thread immediately, and will
++	 * suspend only once the arguments on stack are used.
++	 */
++
++	return err;
++}
++
++static void rtswitch_utask_waker(rtdm_nrtsig_t *sig, void *arg)
++{
++	struct rtswitch_context *ctx = (struct rtswitch_context *)arg;
++	up(&ctx->utask->nrt_synch);
++}
++
++static int rtswitch_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rtswitch_context *ctx = rtdm_fd_to_private(fd);
++
++	ctx->tasks = NULL;
++	ctx->tasks_count = ctx->next_index = ctx->cpu = ctx->switches_count = 0;
++	sema_init(&ctx->lock, 1);
++	ctx->failed = 0;
++	ctx->error.last_switch.from = ctx->error.last_switch.to = -1;
++	ctx->pause_us = 0;
++
++	rtdm_nrtsig_init(&ctx->wake_utask, rtswitch_utask_waker, ctx);
++
++	rtdm_timer_init(&ctx->wake_up_delay, timed_wake_up, "switchtest timer");
++
++	return 0;
++}
++
++static void rtswitch_close(struct rtdm_fd *fd)
++{
++	struct rtswitch_context *ctx = rtdm_fd_to_private(fd);
++	unsigned int i;
++
++	rtdm_timer_destroy(&ctx->wake_up_delay);
++	rtdm_nrtsig_destroy(&ctx->wake_utask);
++
++	if (ctx->tasks) {
++		set_cpus_allowed_ptr(current, cpumask_of(ctx->cpu));
++
++		for (i = 0; i < ctx->next_index; i++) {
++			struct rtswitch_task *task = &ctx->tasks[i];
++
++			if (task->base.flags & RTSWITCH_KERNEL) {
++				rtdm_task_destroy(&task->ktask);
++				rtdm_task_join(&task->ktask);
++			}
++			rtdm_event_destroy(&task->rt_synch);
++		}
++		vfree(ctx->tasks);
++	}
++}
++
++static int rtswitch_ioctl_nrt(struct rtdm_fd *fd,
++			      unsigned int request,
++			      void *arg)
++{
++	struct rtswitch_context *ctx = rtdm_fd_to_private(fd);
++	struct rttst_swtest_task task;
++	struct rttst_swtest_dir fromto;
++	__u32 count;
++	int err;
++
++	switch (request) {
++	case RTTST_RTIOC_SWTEST_SET_TASKS_COUNT:
++		return rtswitch_set_tasks_count(ctx,
++						(unsigned long) arg);
++
++	case RTTST_RTIOC_SWTEST_SET_CPU:
++		if ((unsigned long) arg > num_online_cpus() - 1)
++			return -EINVAL;
++
++		ctx->cpu = (unsigned long) arg;
++		return 0;
++
++	case RTTST_RTIOC_SWTEST_SET_PAUSE:
++		ctx->pause_us = (unsigned long) arg;
++		return 0;
++
++	case RTTST_RTIOC_SWTEST_REGISTER_UTASK:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(task)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd, &task, arg, sizeof(task));
++
++		err = rtswitch_register_task(ctx, &task);
++
++		if (!err)
++			rtdm_copy_to_user(fd,
++					  arg,
++					  &task,
++					  sizeof(task));
++
++		return err;
++
++	case RTTST_RTIOC_SWTEST_CREATE_KTASK:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(task)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd, &task, arg, sizeof(task));
++
++		err = rtswitch_create_ktask(ctx, &task);
++
++		if (!err)
++			rtdm_copy_to_user(fd,
++					  arg,
++					  &task,
++					  sizeof(task));
++
++		return err;
++
++	case RTTST_RTIOC_SWTEST_PEND:
++		if (!rtdm_read_user_ok(fd, arg, sizeof(task)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd, &task, arg, sizeof(task));
++
++		return rtswitch_pend_nrt(ctx, task.index);
++
++	case RTTST_RTIOC_SWTEST_SWITCH_TO:
++		if (!rtdm_read_user_ok(fd, arg, sizeof(fromto)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd,
++				    &fromto,
++				    arg,
++				    sizeof(fromto));
++
++		return rtswitch_to_nrt(ctx, fromto.from, fromto.to);
++
++	case RTTST_RTIOC_SWTEST_GET_SWITCHES_COUNT:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(count)))
++			return -EFAULT;
++
++		count = ctx->switches_count;
++
++		rtdm_copy_to_user(fd, arg, &count, sizeof(count));
++
++		return 0;
++
++	case RTTST_RTIOC_SWTEST_GET_LAST_ERROR:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(ctx->error)))
++			return -EFAULT;
++
++		rtdm_copy_to_user(fd,
++				  arg,
++				  &ctx->error,
++				  sizeof(ctx->error));
++
++		return 0;
++
++	default:
++		return -ENOSYS;
++	}
++}
++
++static int rtswitch_ioctl_rt(struct rtdm_fd *fd,
++			     unsigned int request,
++			     void *arg)
++{
++	struct rtswitch_context *ctx = rtdm_fd_to_private(fd);
++	struct rttst_swtest_task task;
++	struct rttst_swtest_dir fromto;
++
++	switch (request) {
++	case RTTST_RTIOC_SWTEST_PEND:
++		if (!rtdm_read_user_ok(fd, arg, sizeof(task)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd, &task, arg, sizeof(task));
++
++		return rtswitch_pend_rt(ctx, task.index);
++
++	case RTTST_RTIOC_SWTEST_SWITCH_TO:
++		if (!rtdm_read_user_ok(fd, arg, sizeof(fromto)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd,
++				    &fromto,
++				    arg,
++				    sizeof(fromto));
++
++		return rtswitch_to_rt(ctx, fromto.from, fromto.to);
++
++	case RTTST_RTIOC_SWTEST_GET_LAST_ERROR:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(ctx->error)))
++			return -EFAULT;
++
++		rtdm_copy_to_user(fd,
++				  arg,
++				  &ctx->error,
++				  sizeof(ctx->error));
++
++		return 0;
++
++	default:
++		return -ENOSYS;
++	}
++}
++
++static struct rtdm_driver switchtest_driver = {
++	.profile_info = RTDM_PROFILE_INFO(switchtest,
++					  RTDM_CLASS_TESTING,
++					  RTDM_SUBCLASS_SWITCHTEST,
++					  RTTST_PROFILE_VER),
++	.device_flags = RTDM_NAMED_DEVICE,
++	.device_count =	1,
++	.context_size = sizeof(struct rtswitch_context),
++	.ops = {
++		.open = rtswitch_open,
++		.close = rtswitch_close,
++		.ioctl_rt = rtswitch_ioctl_rt,
++		.ioctl_nrt = rtswitch_ioctl_nrt,
++	},
++};
++
++static struct rtdm_device device = {
++	.driver = &switchtest_driver,
++	.label = "switchtest",
++};
++
++int __init __switchtest_init(void)
++{
++	fp_features = fp_detect();
++
++	return rtdm_dev_register(&device);
++}
++
++void __switchtest_exit(void)
++{
++	rtdm_dev_unregister(&device);
++}
++
++module_init(__switchtest_init);
++module_exit(__switchtest_exit);
+--- linux/drivers/xenomai/testing/rtdmtest.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/rtdmtest.c	2021-04-07 16:01:28.022633030 +0800
+@@ -0,0 +1,293 @@
++/*
++ * Copyright (C) 2010 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <rtdm/driver.h>
++#include <rtdm/testing.h>
++
++MODULE_DESCRIPTION("RTDM test helper module");
++MODULE_AUTHOR("Jan Kiszka <jan.kiszka@web.de>");
++MODULE_VERSION("0.1.0");
++MODULE_LICENSE("GPL");
++
++struct rtdm_basic_context {
++	rtdm_timer_t close_timer;
++	unsigned long close_counter;
++	unsigned long close_deferral;
++};
++
++struct rtdm_actor_context {
++	rtdm_task_t actor_task;
++	unsigned int request;
++	rtdm_event_t run;
++	rtdm_event_t done;
++	union {
++		__u32 cpu;
++	} args;
++};
++
++static void close_timer_proc(rtdm_timer_t *timer)
++{
++	struct rtdm_basic_context *ctx =
++		container_of(timer, struct rtdm_basic_context, close_timer);
++
++	if (ctx->close_counter != 1)
++		printk(XENO_ERR
++		       "rtdmtest: %s: close_counter is %lu, should be 1!\n",
++		       __FUNCTION__, ctx->close_counter);
++
++	ctx->close_deferral = RTTST_RTDM_NORMAL_CLOSE;
++	rtdm_fd_unlock(rtdm_private_to_fd(ctx));
++}
++
++static int rtdm_basic_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd);
++
++	rtdm_timer_init(&ctx->close_timer, close_timer_proc,
++			"rtdm close test");
++	ctx->close_counter = 0;
++	ctx->close_deferral = RTTST_RTDM_NORMAL_CLOSE;
++
++	return 0;
++}
++
++static void rtdm_basic_close(struct rtdm_fd *fd)
++{
++	struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd);
++
++	ctx->close_counter++;
++
++	switch (ctx->close_deferral) {
++	case RTTST_RTDM_DEFER_CLOSE_CONTEXT:
++		if (ctx->close_counter != 2) {
++			printk(XENO_ERR
++			       "rtdmtest: %s: close_counter is %lu, "
++			       "should be 2!\n",
++			       __FUNCTION__, ctx->close_counter);
++			return;
++		}
++		rtdm_fd_unlock(fd);
++		break;
++	}
++
++	rtdm_timer_destroy(&ctx->close_timer);
++}
++
++static int rtdm_basic_ioctl_rt(struct rtdm_fd *fd,
++			    unsigned int request, void __user *arg)
++{
++	int ret, magic = RTTST_RTDM_MAGIC_PRIMARY;
++
++	switch (request) {
++	case RTTST_RTIOC_RTDM_PING_PRIMARY:
++		ret = rtdm_safe_copy_to_user(fd, arg, &magic,
++					     sizeof(magic));
++		break;
++	default:
++		ret = -ENOSYS;
++	}
++
++	return ret;
++}
++
++static int rtdm_basic_ioctl_nrt(struct rtdm_fd *fd,
++			    unsigned int request, void __user *arg)
++{
++	struct rtdm_basic_context *ctx = rtdm_fd_to_private(fd);
++	int ret = 0, magic = RTTST_RTDM_MAGIC_SECONDARY;
++
++	switch (request) {
++	case RTTST_RTIOC_RTDM_DEFER_CLOSE:
++		ctx->close_deferral = (unsigned long)arg;
++		if (ctx->close_deferral == RTTST_RTDM_DEFER_CLOSE_CONTEXT) {
++			++ctx->close_counter;
++			rtdm_fd_lock(fd);
++			rtdm_timer_start(&ctx->close_timer, 300000000ULL, 0,
++					RTDM_TIMERMODE_RELATIVE);
++		}
++		break;
++	case RTTST_RTIOC_RTDM_PING_SECONDARY:
++		ret = rtdm_safe_copy_to_user(fd, arg, &magic,
++					     sizeof(magic));
++		break;
++	default:
++		ret = -ENOTTY;
++	}
++
++	return ret;
++}
++
++static void actor_handler(void *arg)
++{
++	struct rtdm_actor_context *ctx = arg;
++	int ret;
++
++	for (;;) {
++		if (rtdm_task_should_stop())
++			return;
++
++		ret = rtdm_event_wait(&ctx->run);
++		if (ret)
++			break;
++
++		switch (ctx->request) {
++		case RTTST_RTIOC_RTDM_ACTOR_GET_CPU:
++			ctx->args.cpu = task_cpu(current);
++			break;
++		default:
++			printk(XENO_ERR "rtdmtest: bad request code %d\n",
++			       ctx->request);
++		}
++
++		rtdm_event_signal(&ctx->done);
++	}
++}
++
++static int rtdm_actor_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd);
++
++	rtdm_event_init(&ctx->run, 0);
++	rtdm_event_init(&ctx->done, 0);
++
++	return rtdm_task_init(&ctx->actor_task, "rtdm_actor",
++			      actor_handler, ctx,
++			      RTDM_TASK_LOWEST_PRIORITY, 0);
++}
++
++static void rtdm_actor_close(struct rtdm_fd *fd)
++{
++	struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd);
++
++	rtdm_task_destroy(&ctx->actor_task);
++	rtdm_event_destroy(&ctx->run);
++	rtdm_event_destroy(&ctx->done);
++}
++
++#define ACTION_TIMEOUT 50000000ULL /* 50 ms timeout on action */
++
++static int run_action(struct rtdm_actor_context *ctx, unsigned int request)
++{
++	rtdm_toseq_t toseq;
++
++	rtdm_toseq_init(&toseq, ACTION_TIMEOUT);
++	ctx->request = request;
++	rtdm_event_signal(&ctx->run);
++	/*
++	 * XXX: The handshake mechanism is not bullet-proof against
++	 * -EINTR received when waiting for the done event. Hopefully
++	 * we won't restart/start a request while the action task has
++	 * not yet completed the previous one we stopped waiting for
++	 * abruptly.
++	 */
++	return rtdm_event_timedwait(&ctx->done, ACTION_TIMEOUT, &toseq);
++}
++
++static int rtdm_actor_ioctl(struct rtdm_fd *fd,
++			    unsigned int request, void __user *arg)
++{
++	struct rtdm_actor_context *ctx = rtdm_fd_to_private(fd);
++	int ret;
++
++	switch (request) {
++	case RTTST_RTIOC_RTDM_ACTOR_GET_CPU:
++		ctx->args.cpu = (__u32)-EINVAL;
++		ret = run_action(ctx, request);
++		if (ret)
++			break;
++		ret = rtdm_safe_copy_to_user(fd, arg, &ctx->args.cpu,
++					     sizeof(ctx->args.cpu));
++		break;
++	default:
++		ret = -ENOTTY;
++	}
++
++	return ret;
++}
++      
++static struct rtdm_driver rtdm_basic_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(rtdm_test_basic,
++						    RTDM_CLASS_TESTING,
++						    RTDM_SUBCLASS_RTDMTEST,
++						    RTTST_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.device_count		= 2,
++	.context_size		= sizeof(struct rtdm_basic_context),
++	.ops = {
++		.open		= rtdm_basic_open,
++		.close		= rtdm_basic_close,
++		.ioctl_rt	= rtdm_basic_ioctl_rt,
++		.ioctl_nrt	= rtdm_basic_ioctl_nrt,
++	},
++};
++
++static struct rtdm_driver rtdm_actor_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(rtdm_test_actor,
++						    RTDM_CLASS_TESTING,
++						    RTDM_SUBCLASS_RTDMTEST,
++						    RTTST_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.device_count		= 1,
++	.context_size		= sizeof(struct rtdm_actor_context),
++	.ops = {
++		.open		= rtdm_actor_open,
++		.close		= rtdm_actor_close,
++		.ioctl_rt	= rtdm_actor_ioctl,
++	},
++};
++
++static struct rtdm_device device[3] = {
++	[0 ... 1] = {
++		.driver = &rtdm_basic_driver,
++		.label = "rtdm%d",
++	},
++	[2] = {
++		.driver = &rtdm_actor_driver,
++		.label = "rtdmx",
++	}
++};
++
++static int __init rtdm_test_init(void)
++{
++	int i, ret;
++
++	for (i = 0; i < ARRAY_SIZE(device); i++) {
++		ret = rtdm_dev_register(device + i);
++		if (ret)
++			goto fail;
++	}
++
++	return 0;
++fail:
++	while (i-- > 0)
++		rtdm_dev_unregister(device + i);
++
++	return ret;
++}
++
++static void __exit rtdm_test_exit(void)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(device); i++)
++		rtdm_dev_unregister(device + i);
++}
++
++module_init(rtdm_test_init);
++module_exit(rtdm_test_exit);
+--- linux/drivers/xenomai/testing/timerbench.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/testing/timerbench.c	2021-04-07 16:01:28.017633037 +0800
+@@ -0,0 +1,529 @@
++/*
++ * Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/semaphore.h>
++#include <linux/ipipe_trace.h>
++#include <cobalt/kernel/arith.h>
++#include <rtdm/testing.h>
++#include <rtdm/driver.h>
++#include <rtdm/compat.h>
++
++MODULE_DESCRIPTION("Timer latency test helper");
++MODULE_AUTHOR("Jan Kiszka <jan.kiszka@web.de>");
++MODULE_VERSION("0.2.1");
++MODULE_LICENSE("GPL");
++
++struct rt_tmbench_context {
++	int mode;
++	unsigned int period;
++	int freeze_max;
++	int warmup_loops;
++	int samples_per_sec;
++	int32_t *histogram_min;
++	int32_t *histogram_max;
++	int32_t *histogram_avg;
++	int histogram_size;
++	int bucketsize;
++
++	rtdm_task_t timer_task;
++
++	rtdm_timer_t timer;
++	int warmup;
++	uint64_t start_time;
++	uint64_t date;
++	struct rttst_bench_res curr;
++
++	rtdm_event_t result_event;
++	struct rttst_interm_bench_res result;
++
++	struct semaphore nrt_mutex;
++};
++
++static inline void add_histogram(struct rt_tmbench_context *ctx,
++				 __s32 *histogram, __s32 addval)
++{
++	/* bucketsize steps */
++	int inabs = (addval >= 0 ? addval : -addval) / ctx->bucketsize;
++	histogram[inabs < ctx->histogram_size ?
++		  inabs : ctx->histogram_size - 1]++;
++}
++
++static inline long long slldiv(long long s, unsigned d)
++{
++	return s >= 0 ? xnarch_ulldiv(s, d, NULL) : -xnarch_ulldiv(-s, d, NULL);
++}
++
++static void eval_inner_loop(struct rt_tmbench_context *ctx, __s32 dt)
++{
++	if (dt > ctx->curr.max)
++		ctx->curr.max = dt;
++	if (dt < ctx->curr.min)
++		ctx->curr.min = dt;
++	ctx->curr.avg += dt;
++
++#ifdef CONFIG_IPIPE_TRACE
++	if (ctx->freeze_max && (dt > ctx->result.overall.max) && !ctx->warmup) {
++		ipipe_trace_frozen_reset();
++		ipipe_trace_freeze(dt);
++		ctx->result.overall.max = dt;
++	}
++#endif /* CONFIG_IPIPE_TRACE */
++
++	ctx->date += ctx->period;
++
++	if (!ctx->warmup && ctx->histogram_size)
++		add_histogram(ctx, ctx->histogram_avg, dt);
++
++	/* Evaluate overruns and adjust next release date.
++	   Beware of signedness! */
++	while (dt > 0 && (unsigned long)dt > ctx->period) {
++		ctx->curr.overruns++;
++		ctx->date += ctx->period;
++		dt -= ctx->period;
++	}
++}
++
++static void eval_outer_loop(struct rt_tmbench_context *ctx)
++{
++	if (!ctx->warmup) {
++		if (ctx->histogram_size) {
++			add_histogram(ctx, ctx->histogram_max, ctx->curr.max);
++			add_histogram(ctx, ctx->histogram_min, ctx->curr.min);
++		}
++
++		ctx->result.last.min = ctx->curr.min;
++		if (ctx->curr.min < ctx->result.overall.min)
++			ctx->result.overall.min = ctx->curr.min;
++
++		ctx->result.last.max = ctx->curr.max;
++		if (ctx->curr.max > ctx->result.overall.max)
++			ctx->result.overall.max = ctx->curr.max;
++
++		ctx->result.last.avg =
++		    slldiv(ctx->curr.avg, ctx->samples_per_sec);
++		ctx->result.overall.avg += ctx->result.last.avg;
++		ctx->result.overall.overruns += ctx->curr.overruns;
++		rtdm_event_pulse(&ctx->result_event);
++	}
++
++	if (ctx->warmup &&
++	    (ctx->result.overall.test_loops == ctx->warmup_loops)) {
++		ctx->result.overall.test_loops = 0;
++		ctx->warmup = 0;
++	}
++
++	ctx->curr.min = 10000000;
++	ctx->curr.max = -10000000;
++	ctx->curr.avg = 0;
++	ctx->curr.overruns = 0;
++
++	ctx->result.overall.test_loops++;
++}
++
++static void timer_task_proc(void *arg)
++{
++	struct rt_tmbench_context *ctx = arg;
++	int count, err;
++	spl_t s;
++
++	/* first event: one millisecond from now. */
++	ctx->date = rtdm_clock_read_monotonic() + 1000000;
++
++	while (1) {
++		for (count = 0; count < ctx->samples_per_sec; count++) {
++			cobalt_atomic_enter(s);
++			ctx->start_time = rtdm_clock_read_monotonic();
++			err = rtdm_task_sleep_abs(ctx->date,
++						  RTDM_TIMERMODE_ABSOLUTE);
++			cobalt_atomic_leave(s);
++			if (err)
++				return;
++
++			eval_inner_loop(ctx,
++					(__s32)(rtdm_clock_read_monotonic() -
++						ctx->date));
++		}
++		eval_outer_loop(ctx);
++	}
++}
++
++static void timer_proc(rtdm_timer_t *timer)
++{
++	struct rt_tmbench_context *ctx =
++	    container_of(timer, struct rt_tmbench_context, timer);
++	int err;
++
++	do {
++		eval_inner_loop(ctx, (__s32)(rtdm_clock_read_monotonic() -
++					     ctx->date));
++
++		ctx->start_time = rtdm_clock_read_monotonic();
++		err = rtdm_timer_start_in_handler(&ctx->timer, ctx->date, 0,
++						  RTDM_TIMERMODE_ABSOLUTE);
++
++		if (++ctx->curr.test_loops >= ctx->samples_per_sec) {
++			ctx->curr.test_loops = 0;
++			eval_outer_loop(ctx);
++		}
++	} while (err);
++}
++
++static int rt_tmbench_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rt_tmbench_context *ctx;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	ctx->mode = RTTST_TMBENCH_INVALID;
++	sema_init(&ctx->nrt_mutex, 1);
++
++	return 0;
++}
++
++static void rt_tmbench_close(struct rtdm_fd *fd)
++{
++	struct rt_tmbench_context *ctx;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	down(&ctx->nrt_mutex);
++
++	if (ctx->mode >= 0) {
++		if (ctx->mode == RTTST_TMBENCH_TASK)
++			rtdm_task_destroy(&ctx->timer_task);
++		else if (ctx->mode == RTTST_TMBENCH_HANDLER)
++			rtdm_timer_destroy(&ctx->timer);
++
++		rtdm_event_destroy(&ctx->result_event);
++
++		if (ctx->histogram_size)
++			kfree(ctx->histogram_min);
++
++		ctx->mode = RTTST_TMBENCH_INVALID;
++		ctx->histogram_size = 0;
++	}
++
++	up(&ctx->nrt_mutex);
++}
++
++static int rt_tmbench_start(struct rtdm_fd *fd,
++			    struct rt_tmbench_context *ctx,
++			    struct rttst_tmbench_config __user *user_config)
++{
++	int err = 0;
++	spl_t s;
++
++	struct rttst_tmbench_config config_buf;
++	struct rttst_tmbench_config *config =
++		(struct rttst_tmbench_config *)user_config;
++
++	if (rtdm_fd_is_user(fd)) {
++		if (rtdm_safe_copy_from_user
++		    (fd, &config_buf,user_config,
++		     sizeof(struct rttst_tmbench_config)) < 0)
++			return -EFAULT;
++
++		config = &config_buf;
++	}
++
++	down(&ctx->nrt_mutex);
++
++	ctx->period = config->period;
++	ctx->warmup_loops = config->warmup_loops;
++	ctx->samples_per_sec = 1000000000 / ctx->period;
++	ctx->histogram_size = config->histogram_size;
++	ctx->freeze_max = config->freeze_max;
++
++	if (ctx->histogram_size > 0) {
++		ctx->histogram_min =
++		    kmalloc(3 * ctx->histogram_size * sizeof(int32_t),
++			    GFP_KERNEL);
++		ctx->histogram_max =
++		    ctx->histogram_min + config->histogram_size;
++		ctx->histogram_avg =
++		    ctx->histogram_max + config->histogram_size;
++
++		if (!ctx->histogram_min) {
++			up(&ctx->nrt_mutex);
++			return -ENOMEM;
++		}
++
++		memset(ctx->histogram_min, 0,
++		       3 * ctx->histogram_size * sizeof(int32_t));
++		ctx->bucketsize = config->histogram_bucketsize;
++	}
++
++	ctx->result.overall.min = 10000000;
++	ctx->result.overall.max = -10000000;
++	ctx->result.overall.avg = 0;
++	ctx->result.overall.test_loops = 1;
++	ctx->result.overall.overruns = 0;
++
++	ctx->warmup = 1;
++
++	ctx->curr.min = 10000000;
++	ctx->curr.max = -10000000;
++	ctx->curr.avg = 0;
++	ctx->curr.overruns = 0;
++	ctx->mode = RTTST_TMBENCH_INVALID;
++
++	rtdm_event_init(&ctx->result_event, 0);
++
++	if (config->mode == RTTST_TMBENCH_TASK) {
++		err = rtdm_task_init(&ctx->timer_task, "timerbench",
++				timer_task_proc, ctx,
++				config->priority, 0);
++		if (!err)
++			ctx->mode = RTTST_TMBENCH_TASK;
++	} else {
++		rtdm_timer_init(&ctx->timer, timer_proc,
++				rtdm_fd_device(fd)->name);
++
++		ctx->curr.test_loops = 0;
++
++		ctx->mode = RTTST_TMBENCH_HANDLER;
++
++		cobalt_atomic_enter(s);
++		ctx->start_time = rtdm_clock_read_monotonic();
++
++		/* first event: one millisecond from now. */
++		ctx->date = ctx->start_time + 1000000;
++
++		err = rtdm_timer_start(&ctx->timer, ctx->date, 0,
++				RTDM_TIMERMODE_ABSOLUTE);
++		cobalt_atomic_leave(s);
++	}
++
++	up(&ctx->nrt_mutex);
++
++	return err;
++}
++
++static int kernel_copy_results(struct rt_tmbench_context *ctx,
++			       struct rttst_overall_bench_res *res)
++{
++	int size;
++
++	memcpy(&res->result, &ctx->result.overall, sizeof(res->result));
++
++	if (ctx->histogram_size > 0) {
++		size = ctx->histogram_size * sizeof(int32_t);
++		memcpy(res->histogram_min, ctx->histogram_min, size);
++		memcpy(res->histogram_max, ctx->histogram_max, size);
++		memcpy(res->histogram_avg, ctx->histogram_avg, size);
++		kfree(ctx->histogram_min);
++	}
++
++	return 0;
++}
++
++static int user_copy_results(struct rt_tmbench_context *ctx,
++			     struct rttst_overall_bench_res __user *u_res)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(ctx);
++	struct rttst_overall_bench_res res_buf;
++	int ret, size;
++
++	ret = rtdm_safe_copy_to_user(fd, &u_res->result,
++				     &ctx->result.overall,
++				     sizeof(u_res->result));
++	if (ret || ctx->histogram_size == 0)
++		return ret;
++
++	size = ctx->histogram_size * sizeof(int32_t);
++
++	if (rtdm_safe_copy_from_user(fd, &res_buf, u_res, sizeof(res_buf)) < 0 ||
++	    rtdm_safe_copy_to_user(fd, res_buf.histogram_min,
++				   ctx->histogram_min, size) < 0 ||
++	    rtdm_safe_copy_to_user(fd, res_buf.histogram_max,
++				   ctx->histogram_max, size) < 0 ||
++	    rtdm_safe_copy_to_user(fd, res_buf.histogram_avg,
++				   ctx->histogram_avg, size) < 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++
++static int compat_user_copy_results(struct rt_tmbench_context *ctx,
++				    struct compat_rttst_overall_bench_res __user *u_res)
++{
++	struct compat_rttst_overall_bench_res res_buf;
++	struct rtdm_fd *fd = rtdm_private_to_fd(ctx);
++	int ret, size;
++
++	ret = rtdm_safe_copy_to_user(fd, &u_res->result,
++				     &ctx->result.overall,
++				     sizeof(u_res->result));
++	if (ret || ctx->histogram_size == 0)
++		return ret;
++
++	size = ctx->histogram_size * sizeof(int32_t);
++
++	if (rtdm_safe_copy_from_user(fd, &res_buf, u_res, sizeof(res_buf)) < 0 ||
++	    rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_min),
++				   ctx->histogram_min, size) < 0 ||
++	    rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_max),
++				   ctx->histogram_max, size) < 0 ||
++	    rtdm_safe_copy_to_user(fd, compat_ptr(res_buf.histogram_avg),
++				   ctx->histogram_avg, size) < 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++#endif /* CONFIG_XENO_ARCH_SYS3264 */
++
++static int rt_tmbench_stop(struct rt_tmbench_context *ctx, void *u_res)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(ctx);
++	int ret;
++
++	down(&ctx->nrt_mutex);
++
++	if (ctx->mode < 0) {
++		up(&ctx->nrt_mutex);
++		return -EINVAL;
++	}
++
++	if (ctx->mode == RTTST_TMBENCH_TASK)
++		rtdm_task_destroy(&ctx->timer_task);
++	else if (ctx->mode == RTTST_TMBENCH_HANDLER)
++		rtdm_timer_destroy(&ctx->timer);
++
++	rtdm_event_destroy(&ctx->result_event);
++
++	ctx->mode = RTTST_TMBENCH_INVALID;
++
++	ctx->result.overall.avg =
++	    slldiv(ctx->result.overall.avg,
++		   ((ctx->result.overall.test_loops) > 1 ?
++		    ctx->result.overall.test_loops : 2) - 1);
++
++	if (rtdm_fd_is_user(fd)) {
++#ifdef CONFIG_XENO_ARCH_SYS3264
++		if (rtdm_fd_is_compat(fd))
++			ret = compat_user_copy_results(ctx, u_res);
++		else
++#endif
++			ret = user_copy_results(ctx, u_res);
++	} else
++		ret = kernel_copy_results(ctx, u_res);
++
++	if (ctx->histogram_size > 0)
++		kfree(ctx->histogram_min);
++
++	up(&ctx->nrt_mutex);
++
++	return ret;
++}
++
++static int rt_tmbench_ioctl_nrt(struct rtdm_fd *fd,
++				unsigned int request, void __user *arg)
++{
++	struct rt_tmbench_context *ctx;
++	int err = 0;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	switch (request) {
++	case RTTST_RTIOC_TMBENCH_START:
++		err = rt_tmbench_start(fd, ctx, arg);
++		break;
++
++	COMPAT_CASE(RTTST_RTIOC_TMBENCH_STOP):
++		err = rt_tmbench_stop(ctx, arg);
++		break;
++	default:
++		err = -ENOSYS;
++	}
++
++	return err;
++}
++
++static int rt_tmbench_ioctl_rt(struct rtdm_fd *fd,
++			       unsigned int request, void __user *arg)
++{
++	struct rt_tmbench_context *ctx;
++	int err = 0;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	switch (request) {
++	case RTTST_RTIOC_INTERM_BENCH_RES:
++		err = rtdm_event_wait(&ctx->result_event);
++		if (err)
++			return err;
++
++		if (rtdm_fd_is_user(fd)) {
++			struct rttst_interm_bench_res __user *user_res = arg;
++
++			err = rtdm_safe_copy_to_user(fd, user_res,
++						     &ctx->result,
++						     sizeof(*user_res));
++		} else {
++			struct rttst_interm_bench_res *res = (void *)arg;
++
++			memcpy(res, &ctx->result, sizeof(*res));
++		}
++
++		break;
++
++	default:
++		err = -ENOSYS;
++	}
++
++	return err;
++}
++
++static struct rtdm_driver timerbench_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(timerbench,
++						    RTDM_CLASS_TESTING,
++						    RTDM_SUBCLASS_TIMERBENCH,
++						    RTTST_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE,
++	.device_count		= 1,
++	.context_size		= sizeof(struct rt_tmbench_context),
++	.ops = {
++		.open		= rt_tmbench_open,
++		.close		= rt_tmbench_close,
++		.ioctl_rt	= rt_tmbench_ioctl_rt,
++		.ioctl_nrt	= rt_tmbench_ioctl_nrt,
++	},
++};
++
++static struct rtdm_device device = {
++	.driver = &timerbench_driver,
++	.label = "timerbench",
++};
++
++static int __init __timerbench_init(void)
++{
++	return rtdm_dev_register(&device);
++}
++
++static void __timerbench_exit(void)
++{
++	rtdm_dev_unregister(&device);
++}
++
++module_init(__timerbench_init);
++module_exit(__timerbench_exit);
+--- linux/drivers/xenomai/spi/spi-bcm2835.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-bcm2835.c	2021-04-07 16:01:28.013633043 +0800
+@@ -0,0 +1,699 @@
++/**
++ * I/O handling lifted from drivers/spi/spi-bcm2835.c:
++ * Copyright (C) 2012 Chris Boot
++ * Copyright (C) 2013 Stephen Warren
++ * Copyright (C) 2015 Martin Sperl
++ *
++ * RTDM integration by:
++ * Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/err.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/io.h>
++#include <linux/clk.h>
++#include <linux/spi/spi.h>
++#include <linux/of_irq.h>
++#include <linux/of_gpio.h>
++#include "spi-master.h"
++
++#define RTDM_SUBCLASS_BCM2835  1
++
++/* SPI register offsets */
++#define BCM2835_SPI_CS			0x00
++#define BCM2835_SPI_FIFO		0x04
++#define BCM2835_SPI_CLK			0x08
++#define BCM2835_SPI_DLEN		0x0c
++#define BCM2835_SPI_LTOH		0x10
++#define BCM2835_SPI_DC			0x14
++
++/* Bitfields in CS */
++#define BCM2835_SPI_CS_LEN_LONG		0x02000000
++#define BCM2835_SPI_CS_DMA_LEN		0x01000000
++#define BCM2835_SPI_CS_CSPOL2		0x00800000
++#define BCM2835_SPI_CS_CSPOL1		0x00400000
++#define BCM2835_SPI_CS_CSPOL0		0x00200000
++#define BCM2835_SPI_CS_RXF		0x00100000
++#define BCM2835_SPI_CS_RXR		0x00080000
++#define BCM2835_SPI_CS_TXD		0x00040000
++#define BCM2835_SPI_CS_RXD		0x00020000
++#define BCM2835_SPI_CS_DONE		0x00010000
++#define BCM2835_SPI_CS_LEN		0x00002000
++#define BCM2835_SPI_CS_REN		0x00001000
++#define BCM2835_SPI_CS_ADCS		0x00000800
++#define BCM2835_SPI_CS_INTR		0x00000400
++#define BCM2835_SPI_CS_INTD		0x00000200
++#define BCM2835_SPI_CS_DMAEN		0x00000100
++#define BCM2835_SPI_CS_TA		0x00000080
++#define BCM2835_SPI_CS_CSPOL		0x00000040
++#define BCM2835_SPI_CS_CLEAR_RX		0x00000020
++#define BCM2835_SPI_CS_CLEAR_TX		0x00000010
++#define BCM2835_SPI_CS_CPOL		0x00000008
++#define BCM2835_SPI_CS_CPHA		0x00000004
++#define BCM2835_SPI_CS_CS_10		0x00000002
++#define BCM2835_SPI_CS_CS_01		0x00000001
++
++#define BCM2835_SPI_POLLING_LIMIT_US	30
++#define BCM2835_SPI_POLLING_JIFFIES	2
++#define BCM2835_SPI_DMA_MIN_LENGTH	96
++#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
++				| SPI_NO_CS | SPI_3WIRE)
++
++struct spi_master_bcm2835 {
++	struct rtdm_spi_master master;
++	void __iomem *regs;
++	struct clk *clk;
++	unsigned long clk_hz;
++	rtdm_irq_t irqh;
++	const u8 *tx_buf;
++	u8 *rx_buf;
++	int tx_len;
++	int rx_len;
++	rtdm_event_t transfer_done;
++};
++
++struct spi_slave_bcm2835 {
++	struct rtdm_spi_remote_slave slave;
++	void *io_virt;
++	dma_addr_t io_dma;
++	size_t io_len;
++};
++
++static inline struct spi_slave_bcm2835 *
++to_slave_bcm2835(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave, struct spi_slave_bcm2835, slave);
++}
++
++static inline struct spi_master_bcm2835 *
++to_master_bcm2835(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave->master, struct spi_master_bcm2835, master);
++}
++
++static inline struct device *
++master_to_kdev(struct rtdm_spi_master *master)
++{
++	return &master->kmaster->dev;
++}
++
++static inline u32 bcm2835_rd(struct spi_master_bcm2835 *spim,
++			     unsigned int reg)
++{
++	return readl(spim->regs + reg);
++}
++
++static inline void bcm2835_wr(struct spi_master_bcm2835 *spim,
++			      unsigned int reg, u32 val)
++{
++	writel(val, spim->regs + reg);
++}
++
++static inline void bcm2835_rd_fifo(struct spi_master_bcm2835 *spim)
++{
++	u8 byte;
++
++	while (spim->rx_len > 0 &&
++	       (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) {
++		byte = bcm2835_rd(spim, BCM2835_SPI_FIFO);
++		if (spim->rx_buf)
++			*spim->rx_buf++ = byte;
++		spim->rx_len--;
++	}
++}
++
++static inline void bcm2835_wr_fifo(struct spi_master_bcm2835 *spim)
++{
++	u8 byte;
++
++	while (spim->tx_len > 0 &&
++	       (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) {
++		byte = spim->tx_buf ? *spim->tx_buf++ : 0;
++		bcm2835_wr(spim, BCM2835_SPI_FIFO, byte);
++		spim->tx_len--;
++	}
++}
++
++static void bcm2835_reset_hw(struct spi_master_bcm2835 *spim)
++{
++	u32 cs = bcm2835_rd(spim, BCM2835_SPI_CS);
++
++	cs &= ~(BCM2835_SPI_CS_INTR |
++		BCM2835_SPI_CS_INTD |
++		BCM2835_SPI_CS_DMAEN |
++		BCM2835_SPI_CS_TA);
++	cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
++
++	/* Reset the SPI block. */
++	bcm2835_wr(spim, BCM2835_SPI_CS, cs);
++	bcm2835_wr(spim, BCM2835_SPI_DLEN, 0);
++}
++
++static int bcm2835_spi_interrupt(rtdm_irq_t *irqh)
++{
++	struct spi_master_bcm2835 *spim;
++
++	spim = rtdm_irq_get_arg(irqh, struct spi_master_bcm2835);
++
++	bcm2835_rd_fifo(spim);
++	bcm2835_wr_fifo(spim);
++
++	if (bcm2835_rd(spim, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
++		bcm2835_reset_hw(spim);
++		rtdm_event_signal(&spim->transfer_done);
++	}
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int bcm2835_configure(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++	struct rtdm_spi_config *config = &slave->config;
++	unsigned long spi_hz, cdiv;
++	u32 cs;
++
++	/* Set clock polarity and phase. */
++
++	cs = bcm2835_rd(spim, BCM2835_SPI_CS);
++
++	cs &= ~(BCM2835_SPI_CS_CPOL | BCM2835_SPI_CS_CPHA);
++	if (config->mode & SPI_CPOL)
++		cs |= BCM2835_SPI_CS_CPOL;
++	if (config->mode & SPI_CPHA)
++		cs |= BCM2835_SPI_CS_CPHA;
++
++	bcm2835_wr(spim, BCM2835_SPI_CS, cs);
++	
++	/* Set clock frequency. */
++
++	spi_hz = config->speed_hz;
++
++	/*
++	 * Fastest clock rate is of the APB clock, which is close to
++	 * clk_hz / 2.
++	 */
++	if (spi_hz >= spim->clk_hz / 2)
++		cdiv = 2;
++	else if (spi_hz) {
++		cdiv = DIV_ROUND_UP(spim->clk_hz, spi_hz); /* Multiple of 2. */
++		cdiv += (cdiv % 2);
++		if (cdiv >= 65536)
++			cdiv = 0;
++	} else
++		cdiv = 0;
++
++	bcm2835_wr(spim, BCM2835_SPI_CLK, cdiv);
++	
++	return 0;
++}
++
++static void bcm2835_chip_select(struct rtdm_spi_remote_slave *slave,
++				bool active)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++	struct rtdm_spi_config *config = &slave->config;
++	u32 cs;
++
++	cs = bcm2835_rd(spim, BCM2835_SPI_CS);
++
++	if (config->mode & SPI_CS_HIGH) {
++		cs |= BCM2835_SPI_CS_CSPOL;
++		cs |= BCM2835_SPI_CS_CSPOL0 << slave->chip_select;
++	} else {
++		cs &= ~BCM2835_SPI_CS_CSPOL;
++		cs &= ~(BCM2835_SPI_CS_CSPOL0 << slave->chip_select);
++	}
++
++	/* "active" is the logical state, not the impedance level. */
++
++	if (active) {
++		if (config->mode & SPI_NO_CS)
++			cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
++		else {
++			cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
++			cs |= slave->chip_select;
++		}
++	} else {
++		/* Put HW-CS into deselected state. */
++		cs &= ~BCM2835_SPI_CS_CSPOL;
++		/* Use the "undefined" chip-select as precaution. */
++		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
++	}
++
++	bcm2835_wr(spim, BCM2835_SPI_CS, cs);
++}
++
++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++	int ret;
++	u32 cs;
++	
++	cs = bcm2835_rd(spim, BCM2835_SPI_CS);
++
++	cs &= ~BCM2835_SPI_CS_REN;
++	if ((slave->config.mode & SPI_3WIRE) && spim->rx_buf)
++		cs |= BCM2835_SPI_CS_REN;
++
++	cs |= BCM2835_SPI_CS_TA;
++
++	/*
++	 * Fill in fifo if we have gpio-cs note that there have been
++	 * rare events where the native-CS flapped for <1us which may
++	 * change the behaviour with gpio-cs this does not happen, so
++	 * it is implemented only for this case.
++	 */
++	if (gpio_is_valid(slave->cs_gpio)) {
++		/* Set dummy CS, ->chip_select() was not called. */
++		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
++		/* Enable SPI block, before filling FIFO. */
++		bcm2835_wr(spim, BCM2835_SPI_CS, cs);
++		bcm2835_wr_fifo(spim);
++	}
++
++	/* Enable interrupts last, wait for transfer completion. */
++	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD;
++	bcm2835_wr(spim, BCM2835_SPI_CS, cs);
++
++	ret = rtdm_event_wait(&spim->transfer_done);
++	if (ret) {
++		bcm2835_reset_hw(spim);
++		return ret;
++	}
++
++	return 0;
++}
++
++static int bcm2835_transfer_iobufs(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++
++	if (bcm->io_len == 0)
++		return -EINVAL;	/* No I/O buffers set. */
++	
++	spim->tx_len = bcm->io_len / 2;
++	spim->rx_len = spim->tx_len;
++	spim->tx_buf = bcm->io_virt + spim->rx_len;
++	spim->rx_buf = bcm->io_virt;
++
++	return do_transfer_irq(slave);
++}
++
++static int bcm2835_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave,
++				     int len)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++
++	if ((bcm->io_len == 0) ||
++		(len <= 0) || (len > (bcm->io_len / 2)))
++		return -EINVAL;
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = bcm->io_virt + bcm->io_len / 2;
++	spim->rx_buf = bcm->io_virt;
++
++	return do_transfer_irq(slave);
++}
++
++static ssize_t bcm2835_read(struct rtdm_spi_remote_slave *slave,
++			    void *rx, size_t len)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = NULL;
++	spim->rx_buf = rx;
++
++	return do_transfer_irq(slave) ?: len;
++}
++
++static ssize_t bcm2835_write(struct rtdm_spi_remote_slave *slave,
++			     const void *tx, size_t len)
++{
++	struct spi_master_bcm2835 *spim = to_master_bcm2835(slave);
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = tx;
++	spim->rx_buf = NULL;
++
++	return do_transfer_irq(slave) ?: len;
++}
++
++static int set_iobufs(struct spi_slave_bcm2835 *bcm, size_t len)
++{
++	dma_addr_t dma;
++	void *p;
++
++	if (len == 0)
++		return -EINVAL;
++	
++	len = L1_CACHE_ALIGN(len) * 2;
++	if (len == bcm->io_len)
++		return 0;
++
++	if (bcm->io_len)
++		return -EINVAL;	/* I/O buffers may not be resized. */
++
++	/*
++	 * Since we need the I/O buffers to be set for starting a
++	 * transfer, there is no need for serializing this routine and
++	 * transfer_iobufs(), provided io_len is set last.
++	 *
++	 * NOTE: We don't need coherent memory until we actually get
++	 * DMA transfers working, this code is a bit ahead of
++	 * schedule.
++	 *
++	 * Revisit: this assumes DMA mask is 4Gb.
++	 */
++	p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL);
++	if (p == NULL)
++		return -ENOMEM;
++
++	bcm->io_dma = dma;
++	bcm->io_virt = p;
++	smp_mb();
++	/*
++	 * May race with transfer_iobufs(), must be assigned after all
++	 * the rest is set up, enforcing a membar.
++	 */
++	bcm->io_len = len;
++	
++	return 0;
++}
++
++static int bcm2835_set_iobufs(struct rtdm_spi_remote_slave *slave,
++			      struct rtdm_spi_iobufs *p)
++{
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++	int ret;
++
++	ret = set_iobufs(bcm, p->io_len);
++	if (ret)
++		return ret;
++
++	p->i_offset = 0;
++	p->o_offset = bcm->io_len / 2;
++	p->map_len = bcm->io_len;
++	
++	return 0;
++}
++
++static int bcm2835_mmap_iobufs(struct rtdm_spi_remote_slave *slave,
++			       struct vm_area_struct *vma)
++{
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++
++	/*
++	 * dma_alloc_coherent() delivers non-cached memory, make sure
++	 * to return consistent mapping attributes. Typically, mixing
++	 * memory attributes across address spaces referring to the
++	 * same physical area is architecturally wrong on ARM.
++	 */
++	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++	return rtdm_mmap_kmem(vma, bcm->io_virt);
++}
++
++static void bcm2835_mmap_release(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++
++	dma_free_coherent(NULL, bcm->io_len,
++			  bcm->io_virt, bcm->io_dma);
++	bcm->io_len = 0;
++}
++
++static int gpio_match_name(struct gpio_chip *chip, void *data)
++{
++	return !strcmp(chip->label, data);
++}
++
++static int find_cs_gpio(struct spi_device *spi)
++{
++	struct spi_master *kmaster = spi->master;
++	u32 pingroup_index, pin, pin_index;
++	struct device_node *pins;
++	struct gpio_chip *chip;
++	int ret;
++
++	if (gpio_is_valid(spi->cs_gpio)) {
++		dev_info(&spi->dev, "using GPIO%i for CS%d\n",
++			 spi->cs_gpio, spi->chip_select);
++		return 0;
++	}
++
++	/* Translate native CS to GPIO. */
++
++	for (pingroup_index = 0;
++	     (pins = of_parse_phandle(kmaster->dev.of_node,
++		     "pinctrl-0", pingroup_index)) != 0; pingroup_index++) {
++		for (pin_index = 0;
++		     of_property_read_u32_index(pins, "brcm,pins",
++				pin_index, &pin) == 0; pin_index++) {
++			if ((spi->chip_select == 0 &&
++			     (pin == 8 || pin == 36 || pin == 46)) ||
++			    (spi->chip_select == 1 &&
++			     (pin == 7 || pin == 35))) {
++				spi->cs_gpio = pin;
++				break;
++			}
++		}
++		of_node_put(pins);
++	}
++
++	/* If that failed, assume GPIOs 7-11 are used */
++	if (!gpio_is_valid(spi->cs_gpio) ) {
++		chip = gpiochip_find("pinctrl-bcm2835", gpio_match_name);
++		if (chip == NULL)
++			return 0;
++
++		spi->cs_gpio = chip->base + 8 - spi->chip_select;
++	}
++
++	dev_info(&spi->dev,
++		 "setting up native-CS%i as GPIO %i\n",
++		 spi->chip_select, spi->cs_gpio);
++
++	ret = gpio_direction_output(spi->cs_gpio,
++			    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
++	if (ret) {
++		dev_err(&spi->dev,
++			"could not set CS%i gpio %i as output: %i",
++			spi->chip_select, spi->cs_gpio, ret);
++		return ret;
++	}
++
++	/*
++	 * Force value on GPIO in case the pin controller does not
++	 * handle that properly when switching to output mode.
++	 */
++	gpio_set_value(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
++
++	return 0;
++}
++
++static struct rtdm_spi_remote_slave *
++bcm2835_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi)
++{
++	struct spi_slave_bcm2835 *bcm;
++	int ret;
++
++	if (spi->chip_select > 1) {
++		/*
++		 * Error in the case of native CS requested with CS >
++		 * 1 officially there is a CS2, but it is not
++		 * documented which GPIO is connected with that...
++		 */
++		dev_err(&spi->dev,
++			"%s: only two native chip-selects are supported\n",
++			__func__);
++		return ERR_PTR(-EINVAL);
++	}
++
++	ret = find_cs_gpio(spi);
++	if (ret)
++		return ERR_PTR(ret);
++	
++	bcm = kzalloc(sizeof(*bcm), GFP_KERNEL);
++	if (bcm == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = rtdm_spi_add_remote_slave(&bcm->slave, master, spi);
++	if (ret) {
++		dev_err(&spi->dev,
++			"%s: failed to attach slave\n", __func__);
++		kfree(bcm);
++		return ERR_PTR(ret);
++	}
++
++	return &bcm->slave;
++}
++
++static void bcm2835_detach_slave(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_bcm2835 *bcm = to_slave_bcm2835(slave);
++
++	rtdm_spi_remove_remote_slave(slave);
++	kfree(bcm);
++}
++
++static struct rtdm_spi_master_ops bcm2835_master_ops = {
++	.configure = bcm2835_configure,
++	.chip_select = bcm2835_chip_select,
++	.set_iobufs = bcm2835_set_iobufs,
++	.mmap_iobufs = bcm2835_mmap_iobufs,
++	.mmap_release = bcm2835_mmap_release,
++	.transfer_iobufs = bcm2835_transfer_iobufs,
++	.transfer_iobufs_n = bcm2835_transfer_iobufs_n,
++	.write = bcm2835_write,
++	.read = bcm2835_read,
++	.attach_slave = bcm2835_attach_slave,
++	.detach_slave = bcm2835_detach_slave,
++};
++
++static int bcm2835_spi_probe(struct platform_device *pdev)
++{
++	struct spi_master_bcm2835 *spim;
++	struct rtdm_spi_master *master;
++	struct spi_master *kmaster;
++	struct resource *r;
++	int ret, irq;
++
++	dev_dbg(&pdev->dev, "%s: entered\n", __func__);
++
++	master = rtdm_spi_alloc_master(&pdev->dev,
++		   struct spi_master_bcm2835, master);
++	if (master == NULL)
++		return -ENOMEM;
++
++	master->subclass = RTDM_SUBCLASS_BCM2835;
++	master->ops = &bcm2835_master_ops;
++	platform_set_drvdata(pdev, master);
++
++	kmaster = master->kmaster;
++	kmaster->mode_bits = BCM2835_SPI_MODE_BITS;
++	kmaster->bits_per_word_mask = SPI_BPW_MASK(8);
++	kmaster->num_chipselect = 2;
++	kmaster->dev.of_node = pdev->dev.of_node;
++
++	spim = container_of(master, struct spi_master_bcm2835, master);
++	rtdm_event_init(&spim->transfer_done, 0);
++
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	spim->regs = devm_ioremap_resource(&pdev->dev, r);
++	if (IS_ERR(spim->regs)) {
++		dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__);
++		ret = PTR_ERR(spim->regs);
++		goto fail;
++	}
++	
++	spim->clk = devm_clk_get(&pdev->dev, NULL);
++	if (IS_ERR(spim->clk)) {
++		ret = PTR_ERR(spim->clk);
++		goto fail;
++	}
++
++	spim->clk_hz = clk_get_rate(spim->clk);
++
++	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
++	if (irq <= 0) {
++		ret = irq ?: -ENODEV;
++		goto fail;
++	}
++
++	clk_prepare_enable(spim->clk);
++
++	/* Initialise the hardware with the default polarities */
++	bcm2835_wr(spim, BCM2835_SPI_CS,
++		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
++
++	ret = rtdm_irq_request(&spim->irqh, irq,
++			       bcm2835_spi_interrupt, 0,
++			       dev_name(&pdev->dev), spim);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: cannot request IRQ%d\n",
++			__func__, irq);
++		goto fail_unclk;
++	}
++
++	ret = rtdm_spi_add_master(&spim->master);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: failed to add master\n",
++			__func__);
++		goto fail_unclk;
++	}
++
++	return 0;
++
++fail_unclk:
++	clk_disable_unprepare(spim->clk);
++fail:
++	spi_master_put(kmaster);
++
++	return ret;
++}
++
++static int bcm2835_spi_remove(struct platform_device *pdev)
++{
++	struct rtdm_spi_master *master = platform_get_drvdata(pdev);
++	struct spi_master_bcm2835 *spim;
++
++	dev_dbg(&pdev->dev, "%s: entered\n", __func__);
++
++	spim = container_of(master, struct spi_master_bcm2835, master);
++
++	/* Clear FIFOs, and disable the HW block */
++	bcm2835_wr(spim, BCM2835_SPI_CS,
++		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
++
++	rtdm_irq_free(&spim->irqh);
++
++	clk_disable_unprepare(spim->clk);
++
++	rtdm_spi_remove_master(master);
++
++	return 0;
++}
++
++static const struct of_device_id bcm2835_spi_match[] = {
++	{
++		.compatible = "brcm,bcm2835-spi",
++	},
++	{ /* Sentinel */ },
++};
++MODULE_DEVICE_TABLE(of, bcm2835_spi_match);
++
++static struct platform_driver bcm2835_spi_driver = {
++	.driver		= {
++		.name		= "spi-bcm2835",
++		.of_match_table	= bcm2835_spi_match,
++	},
++	.probe		= bcm2835_spi_probe,
++	.remove		= bcm2835_spi_remove,
++};
++module_platform_driver(bcm2835_spi_driver);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/spi/spi-omap2-mcspi-rt.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-omap2-mcspi-rt.c	2021-04-07 16:01:28.008633050 +0800
+@@ -0,0 +1,999 @@
++/**
++ * I/O handling lifted from drivers/spi/spi-omap2-mcspi.c:
++ * Copyright (C) 2019 Laurentiu-Cristian Duca
++ *  <laurentiu [dot] duca [at] gmail [dot] com>
++ * RTDM integration by:
++ * Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/err.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/io.h>
++#include <linux/clk.h>
++#include <linux/spi/spi.h>
++#include <linux/of_irq.h>
++#include <linux/of_gpio.h>
++#include <linux/of_device.h>
++#include <linux/pm_runtime.h>
++#include <linux/gcd.h>
++#include "spi-master.h"
++
++#define RTDM_SUBCLASS_OMAP2_MCSPI  3
++
++#define OMAP4_MCSPI_REG_OFFSET 0x100
++#define OMAP2_MCSPI_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH)
++
++#define OMAP2_MCSPI_MAX_FREQ		48000000
++#define OMAP2_MCSPI_DRIVER_MAX_FREQ	40000000
++#define OMAP2_MCSPI_MAX_DIVIDER		4096
++#define OMAP2_MCSPI_MAX_FIFODEPTH	64
++#define OMAP2_MCSPI_MAX_FIFOWCNT	0xFFFF
++#define SPI_AUTOSUSPEND_TIMEOUT		2000
++#define PM_NEGATIVE_DELAY			-2000
++
++#define OMAP2_MCSPI_REVISION		0x00
++#define OMAP2_MCSPI_SYSCONFIG		0x10
++#define OMAP2_MCSPI_SYSSTATUS		0x14
++#define OMAP2_MCSPI_IRQSTATUS		0x18
++#define OMAP2_MCSPI_IRQENABLE		0x1c
++#define OMAP2_MCSPI_WAKEUPENABLE	0x20
++#define OMAP2_MCSPI_SYST		0x24
++#define OMAP2_MCSPI_MODULCTRL		0x28
++#define OMAP2_MCSPI_XFERLEVEL		0x7c
++
++/* per-channel (chip select) banks, 0x14 bytes each, first is: */
++#define OMAP2_MCSPI_CHANNELBANK_SIZE	0x14
++#define OMAP2_MCSPI_CHCONF0		0x2c
++#define OMAP2_MCSPI_CHSTAT0		0x30
++#define OMAP2_MCSPI_CHCTRL0		0x34
++#define OMAP2_MCSPI_TX0			0x38
++#define OMAP2_MCSPI_RX0			0x3c
++
++/* per-register bitmasks: */
++#define OMAP2_MCSPI_IRQSTATUS_EOW		BIT(17)
++#define OMAP2_MCSPI_IRQSTATUS_RX1_FULL  BIT(6)
++#define OMAP2_MCSPI_IRQSTATUS_TX1_EMPTY	BIT(4)
++#define OMAP2_MCSPI_IRQSTATUS_RX0_FULL  BIT(2)
++#define OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY	BIT(0)
++
++#define OMAP2_MCSPI_IRQENABLE_EOW		BIT(17)
++#define OMAP2_MCSPI_IRQENABLE_RX1_FULL  BIT(6)
++#define OMAP2_MCSPI_IRQENABLE_TX1_EMPTY	BIT(4)
++#define OMAP2_MCSPI_IRQENABLE_RX0_FULL  BIT(2)
++#define OMAP2_MCSPI_IRQENABLE_TX0_EMPTY	BIT(0)
++
++#define OMAP2_MCSPI_MODULCTRL_SINGLE	BIT(0)
++#define OMAP2_MCSPI_MODULCTRL_MS	BIT(2)
++#define OMAP2_MCSPI_MODULCTRL_STEST	BIT(3)
++
++#define OMAP2_MCSPI_CHCONF_PHA		BIT(0)
++#define OMAP2_MCSPI_CHCONF_POL		BIT(1)
++#define OMAP2_MCSPI_CHCONF_CLKD_MASK	(0x0f << 2)
++#define OMAP2_MCSPI_CHCONF_EPOL		BIT(6)
++#define OMAP2_MCSPI_CHCONF_WL_MASK	(0x1f << 7)
++#define OMAP2_MCSPI_CHCONF_TRM_RX_ONLY	BIT(12)
++#define OMAP2_MCSPI_CHCONF_TRM_TX_ONLY	BIT(13)
++#define OMAP2_MCSPI_CHCONF_TRM_MASK	(0x03 << 12)
++#define OMAP2_MCSPI_CHCONF_DMAW		BIT(14)
++#define OMAP2_MCSPI_CHCONF_DMAR		BIT(15)
++#define OMAP2_MCSPI_CHCONF_DPE0		BIT(16)
++#define OMAP2_MCSPI_CHCONF_DPE1		BIT(17)
++#define OMAP2_MCSPI_CHCONF_IS		BIT(18)
++#define OMAP2_MCSPI_CHCONF_TURBO	BIT(19)
++#define OMAP2_MCSPI_CHCONF_FORCE	BIT(20)
++#define OMAP2_MCSPI_CHCONF_FFET		BIT(27)
++#define OMAP2_MCSPI_CHCONF_FFER		BIT(28)
++#define OMAP2_MCSPI_CHCONF_CLKG		BIT(29)
++
++#define OMAP2_MCSPI_CHSTAT_RXS		BIT(0)
++#define OMAP2_MCSPI_CHSTAT_TXS		BIT(1)
++#define OMAP2_MCSPI_CHSTAT_EOT		BIT(2)
++#define OMAP2_MCSPI_CHSTAT_TXFFE	BIT(3)
++
++#define OMAP2_MCSPI_CHCTRL_EN		BIT(0)
++#define OMAP2_MCSPI_CHCTRL_EXTCLK_MASK	(0xff << 8)
++
++#define OMAP2_MCSPI_WAKEUPENABLE_WKEN	BIT(0)
++
++#define OMAP2_MCSPI_SYSCONFIG_CLOCKACTIVITY_MASK	(0x3 << 8)
++#define OMAP2_MCSPI_SYSCONFIG_SIDLEMODE_MASK		(0x3 << 3)
++#define OMAP2_MCSPI_SYSCONFIG_SOFTRESET				BIT(1)
++#define OMAP2_MCSPI_SYSCONFIG_AUTOIDLE				BIT(0)
++
++#define OMAP2_MCSPI_SYSSTATUS_RESETDONE BIT(0)
++
++/* current version supports max 2 CS per module */
++#define OMAP2_MCSPI_CS_N	2
++
++#define MCSPI_PINDIR_D0_IN_D1_OUT	0
++#define MCSPI_PINDIR_D0_OUT_D1_IN	1
++
++struct omap2_mcspi_platform_config {
++	unsigned short	num_cs;
++	unsigned int regs_offset;
++	unsigned int pin_dir:1;
++};
++
++struct omap2_mcspi_cs {
++	/* CS channel */
++	void __iomem		*regs;
++	unsigned long		phys;
++	u8 chosen;
++};
++
++struct spi_master_omap2_mcspi {
++	struct rtdm_spi_master master;
++	void __iomem *regs;
++	unsigned long phys;
++	rtdm_irq_t irqh;
++	const u8 *tx_buf;
++	u8 *rx_buf;
++	int tx_len;
++	int rx_len;
++	int fifo_depth;
++	rtdm_event_t transfer_done;
++	unsigned int pin_dir:1;
++	struct omap2_mcspi_cs cs[OMAP2_MCSPI_CS_N];
++	/* logging */
++	int n_rx_full;
++	int n_tx_empty;
++	int n_interrupts;
++};
++
++struct spi_slave_omap2_mcspi {
++	struct rtdm_spi_remote_slave slave;
++	void *io_virt;
++	dma_addr_t io_dma;
++	size_t io_len;
++};
++
++static inline struct spi_slave_omap2_mcspi *
++to_slave_omap2_mcspi(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave, struct spi_slave_omap2_mcspi, slave);
++}
++
++static inline struct spi_master_omap2_mcspi *
++to_master_omap2_mcspi(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave->master,
++			struct spi_master_omap2_mcspi, master);
++}
++
++static inline struct device *
++master_to_kdev(struct rtdm_spi_master *master)
++{
++	return &master->kmaster->dev;
++}
++
++static inline u32 mcspi_rd_reg(struct spi_master_omap2_mcspi *spim,
++			     unsigned int reg)
++{
++	return readl(spim->regs + reg);
++}
++
++static inline void mcspi_wr_reg(struct spi_master_omap2_mcspi *spim,
++			      unsigned int reg, u32 val)
++{
++	writel(val, spim->regs + reg);
++}
++
++static inline u32
++mcspi_rd_cs_reg(struct spi_master_omap2_mcspi *spim,
++				int cs_id, unsigned int reg)
++{
++	return readl(spim->cs[cs_id].regs + reg);
++}
++
++static inline void
++mcspi_wr_cs_reg(struct spi_master_omap2_mcspi *spim, int cs_id,
++				unsigned int reg, u32 val)
++{
++	writel(val, spim->cs[cs_id].regs + reg);
++}
++
++static void omap2_mcspi_init_hw(struct spi_master_omap2_mcspi *spim)
++{
++	u32 l;
++
++	l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSCONFIG);
++	/* CLOCKACTIVITY = 3h: OCP and Functional clocks are maintained */
++	l |= OMAP2_MCSPI_SYSCONFIG_CLOCKACTIVITY_MASK;
++	/* SIDLEMODE = 1h: ignore idle requests */
++	l &= ~OMAP2_MCSPI_SYSCONFIG_SIDLEMODE_MASK;
++	l |= 0x1 << 3;
++	/* AUTOIDLE=0: OCP clock is free-running */
++	l &= ~OMAP2_MCSPI_SYSCONFIG_AUTOIDLE;
++	mcspi_wr_reg(spim, OMAP2_MCSPI_SYSCONFIG, l);
++
++	/* Initialise the hardware with the default polarities (only omap2) */
++	mcspi_wr_reg(spim, OMAP2_MCSPI_WAKEUPENABLE,
++				 OMAP2_MCSPI_WAKEUPENABLE_WKEN);
++
++	/* Setup single-channel master mode */
++	l = mcspi_rd_reg(spim, OMAP2_MCSPI_MODULCTRL);
++	/* MS=0 => spi master */
++	l &= ~(OMAP2_MCSPI_MODULCTRL_STEST | OMAP2_MCSPI_MODULCTRL_MS);
++	l |= OMAP2_MCSPI_MODULCTRL_SINGLE;
++	mcspi_wr_reg(spim, OMAP2_MCSPI_MODULCTRL, l);
++}
++
++static void omap2_mcspi_reset_hw(struct spi_master_omap2_mcspi *spim)
++{
++	u32 l;
++
++	l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSCONFIG);
++	l |= OMAP2_MCSPI_SYSCONFIG_SOFTRESET;
++	mcspi_wr_reg(spim, OMAP2_MCSPI_SYSCONFIG, l);
++	/* wait until reset is done */
++	do {
++		l = mcspi_rd_reg(spim, OMAP2_MCSPI_SYSSTATUS);
++		cpu_relax();
++	} while (!(l & OMAP2_MCSPI_SYSSTATUS_RESETDONE));
++}
++
++static void
++omap2_mcspi_chip_select(struct rtdm_spi_remote_slave *slave, bool active)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	u32 l;
++
++	/* FORCE: manual SPIEN assertion to keep SPIEN active */
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++	/* "active" is the logical state, not the impedance level. */
++	if (active)
++		l |= OMAP2_MCSPI_CHCONF_FORCE;
++	else
++		l &= ~OMAP2_MCSPI_CHCONF_FORCE;
++	mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, l);
++	/* Flash post-writes */
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++}
++
++static u32 omap2_mcspi_calc_divisor(u32 speed_hz)
++{
++	u32 div;
++
++	for (div = 0; div < 15; div++)
++		if (speed_hz >= (OMAP2_MCSPI_MAX_FREQ >> div))
++			return div;
++
++	return 15;
++}
++
++/* channel 0 enable/disable */
++static void
++omap2_mcspi_channel_enable(struct rtdm_spi_remote_slave *slave, int enable)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	u32 l;
++
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0);
++	if (enable)
++		l |= OMAP2_MCSPI_CHCTRL_EN;
++	else
++		l &= ~OMAP2_MCSPI_CHCTRL_EN;
++	mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0, l);
++	/* Flash post-writes */
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCTRL0);
++}
++
++/* called only when no transfer is active to this device */
++static int omap2_mcspi_configure(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	struct rtdm_spi_config *config = &slave->config;
++	u32 l = 0, clkd = 0, div = 1, extclk = 0, clkg = 0, word_len;
++	u32 speed_hz = OMAP2_MCSPI_MAX_FREQ;
++	u32 chctrl0;
++
++	/* The configuration parameters can be loaded in MCSPI_CH(i)CONF
++	 * only when the channel is disabled
++	 */
++	omap2_mcspi_channel_enable(slave, 0);
++
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++
++	/* Set clock frequency. */
++	speed_hz = (u32) config->speed_hz;
++	if (speed_hz > OMAP2_MCSPI_DRIVER_MAX_FREQ) {
++		dev_warn(slave_to_kdev(slave),
++			"maximum clock frequency is %d",
++			OMAP2_MCSPI_DRIVER_MAX_FREQ);
++	}
++	speed_hz = min_t(u32, speed_hz, OMAP2_MCSPI_DRIVER_MAX_FREQ);
++	if (speed_hz < (OMAP2_MCSPI_MAX_FREQ / OMAP2_MCSPI_MAX_DIVIDER)) {
++		clkd = omap2_mcspi_calc_divisor(speed_hz);
++		speed_hz = OMAP2_MCSPI_MAX_FREQ >> clkd;
++		clkg = 0;
++	} else {
++		div = (OMAP2_MCSPI_MAX_FREQ + speed_hz - 1) / speed_hz;
++		speed_hz = OMAP2_MCSPI_MAX_FREQ / div;
++		clkd = (div - 1) & 0xf;
++		extclk = (div - 1) >> 4;
++		clkg = OMAP2_MCSPI_CHCONF_CLKG;
++	}
++	/* set clock divisor */
++	l &= ~OMAP2_MCSPI_CHCONF_CLKD_MASK;
++	l |= clkd << 2;
++	/* set clock granularity */
++	l &= ~OMAP2_MCSPI_CHCONF_CLKG;
++	l |= clkg;
++	if (clkg) {
++		chctrl0 = mcspi_rd_cs_reg(spim,
++			slave->chip_select, OMAP2_MCSPI_CHCTRL0);
++		chctrl0 &= ~OMAP2_MCSPI_CHCTRL_EXTCLK_MASK;
++		chctrl0 |= extclk << 8;
++		mcspi_wr_cs_reg(spim,
++			slave->chip_select, OMAP2_MCSPI_CHCTRL0, chctrl0);
++	}
++
++	if (spim->pin_dir == MCSPI_PINDIR_D0_IN_D1_OUT) {
++		l &= ~OMAP2_MCSPI_CHCONF_IS;
++		l &= ~OMAP2_MCSPI_CHCONF_DPE1;
++		l |= OMAP2_MCSPI_CHCONF_DPE0;
++	} else {
++		l |= OMAP2_MCSPI_CHCONF_IS;
++		l |= OMAP2_MCSPI_CHCONF_DPE1;
++		l &= ~OMAP2_MCSPI_CHCONF_DPE0;
++	}
++
++	/* wordlength */
++	word_len = config->bits_per_word;
++	/* TODO: allow word_len != 8 */
++	if (word_len != 8) {
++		dev_err(slave_to_kdev(slave), "word_len(%d) != 8.\n",
++				word_len);
++		return -EIO;
++	}
++	l &= ~OMAP2_MCSPI_CHCONF_WL_MASK;
++	l |= (word_len - 1) << 7;
++
++	/* set chipselect polarity; manage with FORCE */
++	if (!(config->mode & SPI_CS_HIGH))
++		/* CS active-low */
++		l |= OMAP2_MCSPI_CHCONF_EPOL;
++	else
++		l &= ~OMAP2_MCSPI_CHCONF_EPOL;
++
++	/* set SPI mode 0..3 */
++	if (config->mode & SPI_CPOL)
++		l |= OMAP2_MCSPI_CHCONF_POL;
++	else
++		l &= ~OMAP2_MCSPI_CHCONF_POL;
++	if (config->mode & SPI_CPHA)
++		l |= OMAP2_MCSPI_CHCONF_PHA;
++	else
++		l &= ~OMAP2_MCSPI_CHCONF_PHA;
++
++	mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, l);
++	l = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++
++	omap2_mcspi_chip_select(slave, 0);
++
++	return 0;
++}
++
++static void mcspi_rd_fifo(struct spi_master_omap2_mcspi *spim, int cs_id)
++{
++	u8 byte;
++	int i;
++
++	/* Receiver register must be read to remove source of interrupt */
++	for (i = 0; i < spim->fifo_depth; i++) {
++		byte = mcspi_rd_cs_reg(spim, cs_id, OMAP2_MCSPI_RX0);
++		if (spim->rx_buf && (spim->rx_len > 0))
++			*spim->rx_buf++ = byte;
++		spim->rx_len--;
++	}
++}
++
++static void mcspi_wr_fifo(struct spi_master_omap2_mcspi *spim, int cs_id)
++{
++	u8 byte;
++	int i;
++
++	/* load transmitter register to remove the source of the interrupt */
++	for (i = 0; i < spim->fifo_depth; i++) {
++		if (spim->tx_len <= 0)
++			byte = 0;
++		else
++			byte = spim->tx_buf ? *spim->tx_buf++ : 0;
++		mcspi_wr_cs_reg(spim, cs_id, OMAP2_MCSPI_TX0, byte);
++		spim->tx_len--;
++	}
++}
++
++static int omap2_mcspi_interrupt(rtdm_irq_t *irqh)
++{
++	struct spi_master_omap2_mcspi *spim;
++	u32 l;
++	int i, cs_id = 0;
++
++	spim = rtdm_irq_get_arg(irqh, struct spi_master_omap2_mcspi);
++	for (i = 0; i < OMAP2_MCSPI_CS_N; i++)
++		if (spim->cs[i].chosen) {
++			cs_id = i;
++			break;
++		}
++
++	spim->n_interrupts++;
++	l = mcspi_rd_reg(spim, OMAP2_MCSPI_IRQSTATUS);
++
++	if ((l & OMAP2_MCSPI_IRQSTATUS_RX0_FULL) ||
++	   (l & OMAP2_MCSPI_IRQSTATUS_RX1_FULL)) {
++		mcspi_rd_fifo(spim, cs_id);
++		spim->n_rx_full++;
++	}
++	if ((l & OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY) ||
++		(l & OMAP2_MCSPI_IRQSTATUS_TX1_EMPTY)) {
++		if (spim->tx_len > 0)
++			mcspi_wr_fifo(spim, cs_id);
++		spim->n_tx_empty++;
++	}
++
++	/* write 1 to OMAP2_MCSPI_IRQSTATUS field to reset it */
++	mcspi_wr_reg(spim, OMAP2_MCSPI_IRQSTATUS, l);
++
++	if ((spim->tx_len <= 0) && (spim->rx_len <= 0)) {
++		/* disable interrupts */
++		mcspi_wr_reg(spim, OMAP2_MCSPI_IRQENABLE, 0);
++
++		rtdm_event_signal(&spim->transfer_done);
++	}
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int omap2_mcspi_disable_fifo(struct rtdm_spi_remote_slave *slave,
++							int cs_id)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	u32 chconf;
++
++	chconf = mcspi_rd_cs_reg(spim, cs_id, OMAP2_MCSPI_CHCONF0);
++	chconf &= ~(OMAP2_MCSPI_CHCONF_FFER | OMAP2_MCSPI_CHCONF_FFET);
++	mcspi_wr_cs_reg(spim, cs_id, OMAP2_MCSPI_CHCONF0, chconf);
++	return 0;
++}
++
++static int omap2_mcspi_set_fifo(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	unsigned int wcnt;
++	int max_fifo_depth, fifo_depth, bytes_per_word;
++	u32 chconf, xferlevel;
++
++	chconf = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++	bytes_per_word = 1;
++
++	max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH / 2;
++	if (spim->tx_len < max_fifo_depth) {
++		fifo_depth = spim->tx_len;
++		wcnt = spim->tx_len / bytes_per_word;
++	} else {
++		fifo_depth = max_fifo_depth;
++		wcnt = max_fifo_depth * (spim->tx_len / max_fifo_depth)
++			/ bytes_per_word;
++	}
++	if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT) {
++		dev_err(slave_to_kdev(slave),
++			"%s: wcnt=%d: too many bytes in a transfer.\n",
++			__func__, wcnt);
++		return -EINVAL;
++	}
++
++	chconf |= OMAP2_MCSPI_CHCONF_FFER;
++	chconf |= OMAP2_MCSPI_CHCONF_FFET;
++
++	mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, chconf);
++	spim->fifo_depth = fifo_depth;
++
++	xferlevel = wcnt << 16;
++	xferlevel |= (fifo_depth - 1) << 8;
++	xferlevel |= fifo_depth - 1;
++	mcspi_wr_reg(spim, OMAP2_MCSPI_XFERLEVEL, xferlevel);
++
++	return 0;
++}
++
++
++static int do_transfer_irq_bh(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	u32 chconf, l;
++	int ret;
++	int i;
++
++	/* configure to send and receive */
++	chconf = mcspi_rd_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0);
++	chconf &= ~OMAP2_MCSPI_CHCONF_TRM_MASK;
++	chconf &= ~OMAP2_MCSPI_CHCONF_TURBO;
++	mcspi_wr_cs_reg(spim, slave->chip_select, OMAP2_MCSPI_CHCONF0, chconf);
++
++	/* fifo can be enabled on a single channel */
++	if (slave->chip_select == 0) {
++		if (spim->cs[1].chosen)
++			omap2_mcspi_disable_fifo(slave, 1);
++	} else {
++		if (spim->cs[0].chosen)
++			omap2_mcspi_disable_fifo(slave, 0);
++	}
++	ret = omap2_mcspi_set_fifo(slave);
++	if (ret)
++		return ret;
++
++	omap2_mcspi_channel_enable(slave, 1);
++
++	/* Set slave->chip_select as chosen */
++	for (i = 0; i < OMAP2_MCSPI_CS_N; i++)
++		if (i == slave->chip_select)
++			spim->cs[i].chosen = 1;
++		else
++			spim->cs[i].chosen = 0;
++
++	/* The interrupt status bit should always be reset
++	 * after the channel is enabled
++	 * and before the event is enabled as an interrupt source.
++	 */
++	/* write 1 to OMAP2_MCSPI_IRQSTATUS field to reset it */
++	l = mcspi_rd_reg(spim, OMAP2_MCSPI_IRQSTATUS);
++	mcspi_wr_reg(spim, OMAP2_MCSPI_IRQSTATUS, l);
++
++	spim->n_interrupts = 0;
++	spim->n_rx_full = 0;
++	spim->n_tx_empty = 0;
++
++	/* Enable interrupts last. */
++	/* support only two channels */
++	if (slave->chip_select == 0)
++		l = OMAP2_MCSPI_IRQENABLE_TX0_EMPTY |
++			OMAP2_MCSPI_IRQENABLE_RX0_FULL;
++	else
++		l = OMAP2_MCSPI_IRQENABLE_TX1_EMPTY |
++			OMAP2_MCSPI_IRQENABLE_RX1_FULL;
++	mcspi_wr_reg(spim, OMAP2_MCSPI_IRQENABLE, l);
++
++	/* TX_EMPTY will be raised only after data is transfered */
++	mcspi_wr_fifo(spim, slave->chip_select);
++
++	/* wait for transfer completion */
++	ret = rtdm_event_wait(&spim->transfer_done);
++	omap2_mcspi_channel_enable(slave, 0);
++	if (ret)
++		return ret;
++
++	/* spim->tx_len and spim->rx_len should be 0 */
++	if (spim->tx_len || spim->rx_len)
++		return -EIO;
++	return 0;
++}
++
++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	int len, first_size, last_size, ret;
++
++	len = spim->tx_len;
++
++	if (len < (OMAP2_MCSPI_MAX_FIFODEPTH / 2))
++		goto label_last;
++
++	first_size = (OMAP2_MCSPI_MAX_FIFODEPTH / 2) *
++		(len / (OMAP2_MCSPI_MAX_FIFODEPTH / 2));
++	spim->tx_len = first_size;
++	spim->rx_len = first_size;
++	ret = do_transfer_irq_bh(slave);
++	if (ret)
++		return ret;
++
++label_last:
++	last_size = len % (OMAP2_MCSPI_MAX_FIFODEPTH / 2);
++	if (last_size == 0)
++		return ret;
++	spim->tx_len = last_size;
++	spim->rx_len = last_size;
++	ret = do_transfer_irq_bh(slave);
++	return ret;
++}
++
++static int omap2_mcspi_transfer_iobufs(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++	int ret;
++
++	if (mapped_data->io_len == 0)
++		return -EINVAL;	/* No I/O buffers set. */
++
++	spim->tx_len = mapped_data->io_len / 2;
++	spim->rx_len = spim->tx_len;
++	spim->tx_buf = mapped_data->io_virt + spim->rx_len;
++	spim->rx_buf = mapped_data->io_virt;
++
++	ret = do_transfer_irq(slave);
++
++	return ret ? : 0;
++}
++
++static int omap2_mcspi_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave,
++								 int len)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++	int ret;
++
++	if ((mapped_data->io_len == 0) ||
++		(len <= 0) || (len > (mapped_data->io_len / 2)))
++		return -EINVAL;
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = mapped_data->io_virt + mapped_data->io_len / 2;
++	spim->rx_buf = mapped_data->io_virt;
++
++	ret = do_transfer_irq(slave);
++
++
++	return ret ? : 0;
++}
++
++static ssize_t omap2_mcspi_read(struct rtdm_spi_remote_slave *slave,
++			    void *rx, size_t len)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	int ret;
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = NULL;
++	spim->rx_buf = rx;
++
++	ret = do_transfer_irq(slave);
++
++	return  ret ? : len;
++}
++
++static ssize_t omap2_mcspi_write(struct rtdm_spi_remote_slave *slave,
++			     const void *tx, size_t len)
++{
++	struct spi_master_omap2_mcspi *spim = to_master_omap2_mcspi(slave);
++	int ret;
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = tx;
++	spim->rx_buf = NULL;
++
++	ret = do_transfer_irq(slave);
++
++	return  ret ? : len;
++}
++
++static int set_iobufs(struct spi_slave_omap2_mcspi *mapped_data, size_t len)
++{
++	dma_addr_t dma;
++	void *p;
++
++	if (len == 0)
++		return -EINVAL;
++
++	len = L1_CACHE_ALIGN(len) * 2;
++	if (len == mapped_data->io_len)
++		return 0;
++
++	if (mapped_data->io_len)
++		return -EINVAL;	/* I/O buffers may not be resized. */
++
++	/*
++	 * Since we need the I/O buffers to be set for starting a
++	 * transfer, there is no need for serializing this routine and
++	 * transfer_iobufs(), provided io_len is set last.
++	 *
++	 * NOTE: We don't need coherent memory until we actually get
++	 * DMA transfers working, this code is a bit ahead of
++	 * schedule.
++	 *
++	 * Revisit: this assumes DMA mask is 4Gb.
++	 */
++	p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL);
++	if (p == NULL)
++		return -ENOMEM;
++
++	mapped_data->io_dma = dma;
++	mapped_data->io_virt = p;
++	/*
++	 * May race with transfer_iobufs(), must be assigned after all
++	 * the rest is set up, enforcing a membar.
++	 */
++	smp_mb();
++	mapped_data->io_len = len;
++
++	return 0;
++}
++
++static int omap2_mcspi_set_iobufs(struct rtdm_spi_remote_slave *slave,
++			      struct rtdm_spi_iobufs *p)
++{
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++	int ret;
++
++	ret = set_iobufs(mapped_data, p->io_len);
++	if (ret)
++		return ret;
++
++	p->i_offset = 0;
++	p->o_offset = mapped_data->io_len / 2;
++	p->map_len = mapped_data->io_len;
++
++	return 0;
++}
++
++static int omap2_mcspi_mmap_iobufs(struct rtdm_spi_remote_slave *slave,
++			       struct vm_area_struct *vma)
++{
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++
++	/*
++	 * dma_alloc_coherent() delivers non-cached memory, make sure
++	 * to return consistent mapping attributes. Typically, mixing
++	 * memory attributes across address spaces referring to the
++	 * same physical area is architecturally wrong on ARM.
++	 */
++	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++
++	return rtdm_mmap_kmem(vma, mapped_data->io_virt);
++}
++
++static void omap2_mcspi_mmap_release(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++
++	dma_free_coherent(NULL, mapped_data->io_len,
++			  mapped_data->io_virt, mapped_data->io_dma);
++	mapped_data->io_len = 0;
++}
++
++static struct rtdm_spi_remote_slave *
++omap2_mcspi_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi)
++{
++	struct spi_master_omap2_mcspi *spim;
++	struct spi_slave_omap2_mcspi *mapped_data;
++	int ret;
++
++	if ((spi->chip_select >= OMAP2_MCSPI_CS_N) || (OMAP2_MCSPI_CS_N > 2)) {
++		/* Error in the case of native CS requested with CS > 1 */
++		dev_err(&spi->dev, "%s: only two native CS per spi module are supported\n",
++			__func__);
++		return ERR_PTR(-EINVAL);
++	}
++
++	mapped_data = kzalloc(sizeof(*mapped_data), GFP_KERNEL);
++	if (mapped_data == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = rtdm_spi_add_remote_slave(&mapped_data->slave, master, spi);
++	if (ret) {
++		dev_err(&spi->dev, "%s: failed to attach slave\n", __func__);
++		kfree(mapped_data);
++		return ERR_PTR(ret);
++	}
++
++	spim = container_of(master, struct spi_master_omap2_mcspi, master);
++	spim->cs[spi->chip_select].chosen = 0;
++	spim->cs[spi->chip_select].regs = spim->regs +
++		spi->chip_select * OMAP2_MCSPI_CHANNELBANK_SIZE;
++	spim->cs[spi->chip_select].phys = spim->phys +
++		spi->chip_select * OMAP2_MCSPI_CHANNELBANK_SIZE;
++
++	return &mapped_data->slave;
++}
++
++static void omap2_mcspi_detach_slave(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_omap2_mcspi *mapped_data = to_slave_omap2_mcspi(slave);
++
++	rtdm_spi_remove_remote_slave(slave);
++
++	kfree(mapped_data);
++}
++
++static struct rtdm_spi_master_ops omap2_mcspi_master_ops = {
++	.configure = omap2_mcspi_configure,
++	.chip_select = omap2_mcspi_chip_select,
++	.set_iobufs = omap2_mcspi_set_iobufs,
++	.mmap_iobufs = omap2_mcspi_mmap_iobufs,
++	.mmap_release = omap2_mcspi_mmap_release,
++	.transfer_iobufs = omap2_mcspi_transfer_iobufs,
++	.transfer_iobufs_n = omap2_mcspi_transfer_iobufs_n,
++	.write = omap2_mcspi_write,
++	.read = omap2_mcspi_read,
++	.attach_slave = omap2_mcspi_attach_slave,
++	.detach_slave = omap2_mcspi_detach_slave,
++};
++
++static struct omap2_mcspi_platform_config omap2_pdata = {
++	.regs_offset = 0,
++};
++
++static struct omap2_mcspi_platform_config omap4_pdata = {
++	.regs_offset = OMAP4_MCSPI_REG_OFFSET,
++};
++
++static const struct of_device_id omap_mcspi_of_match[] = {
++	{
++		.compatible = "ti,omap2-mcspi",
++		.data = &omap2_pdata,
++	},
++	{
++		/* beaglebone black */
++		.compatible = "ti,omap4-mcspi",
++		.data = &omap4_pdata,
++	},
++	{ /* Sentinel */ },
++};
++MODULE_DEVICE_TABLE(of, omap_mcspi_of_match);
++
++static int omap2_mcspi_probe(struct platform_device *pdev)
++{
++	struct spi_master_omap2_mcspi *spim;
++	struct rtdm_spi_master *master;
++	struct spi_master *kmaster;
++	struct resource *r;
++	int ret, irq;
++	u32 regs_offset = 0;
++	const struct omap2_mcspi_platform_config *pdata;
++	const struct of_device_id *match;
++	u32 num_cs = 1;
++	unsigned int pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT;
++
++	match = of_match_device(omap_mcspi_of_match, &pdev->dev);
++	if (match) {
++		pdata = match->data;
++		regs_offset = pdata->regs_offset;
++	} else {
++		dev_err(&pdev->dev, "%s: cannot find a match with device tree\n"
++				"of '%s' or '%s'",
++				__func__,
++				omap_mcspi_of_match[0].compatible,
++				omap_mcspi_of_match[1].compatible);
++		return -ENOENT;
++	}
++
++	master = rtdm_spi_alloc_master(&pdev->dev,
++		   struct spi_master_omap2_mcspi, master);
++	if (master == NULL)
++		return -ENOMEM;
++
++	master->subclass = RTDM_SUBCLASS_OMAP2_MCSPI;
++	master->ops = &omap2_mcspi_master_ops;
++	platform_set_drvdata(pdev, master);
++
++	kmaster = master->kmaster;
++	/* flags understood by this controller driver */
++	kmaster->mode_bits = OMAP2_MCSPI_SPI_MODE_BITS;
++	/* TODO: SPI_BPW_RANGE_MASK(4, 32); */
++	kmaster->bits_per_word_mask = SPI_BPW_MASK(8);
++	of_property_read_u32(pdev->dev.of_node, "ti,spi-num-cs", &num_cs);
++	kmaster->num_chipselect = num_cs;
++	if (of_get_property(pdev->dev.of_node,
++		"ti,pindir-d0-out-d1-in", NULL)) {
++		pin_dir = MCSPI_PINDIR_D0_OUT_D1_IN;
++	}
++
++	kmaster->max_speed_hz = OMAP2_MCSPI_MAX_FREQ;
++	kmaster->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15;
++	kmaster->dev.of_node = pdev->dev.of_node;
++
++	spim = container_of(master, struct spi_master_omap2_mcspi, master);
++	rtdm_event_init(&spim->transfer_done, 0);
++
++	spim->pin_dir = pin_dir;
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	spim->regs = devm_ioremap_resource(&pdev->dev, r);
++	if (IS_ERR(spim->regs)) {
++		dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__);
++		ret = PTR_ERR(spim->regs);
++		goto fail;
++	}
++	spim->phys = r->start + regs_offset;
++	spim->regs += regs_offset;
++
++	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
++	if (irq <= 0) {
++		ret = irq ?: -ENODEV;
++		dev_err(&pdev->dev, "%s: irq_of_parse_and_map: %d\n",
++				__func__, irq);
++		goto fail;
++	}
++
++	ret = rtdm_irq_request(&spim->irqh, irq,
++			       omap2_mcspi_interrupt, 0,
++			       dev_name(&pdev->dev), spim);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: cannot request IRQ%d\n",
++				__func__, irq);
++		goto fail_unclk;
++	}
++
++	ret = rtdm_spi_add_master(&spim->master);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: failed to add master\n", __func__);
++		goto fail_unclk;
++	}
++
++	pm_runtime_use_autosuspend(&pdev->dev);
++	/* if delay is negative and the use_autosuspend flag is set
++	 * then runtime suspends are prevented.
++	 */
++	pm_runtime_set_autosuspend_delay(&pdev->dev, PM_NEGATIVE_DELAY);
++	pm_runtime_enable(&pdev->dev);
++	ret = pm_runtime_get_sync(&pdev->dev);
++	if (ret < 0) {
++		dev_err(&pdev->dev, "%s: pm_runtime_get_sync error %d\n",
++				__func__, ret);
++		return ret;
++	}
++
++	omap2_mcspi_reset_hw(spim);
++	omap2_mcspi_init_hw(spim);
++
++	dev_info(&pdev->dev, "success\n");
++	return 0;
++
++fail_unclk:
++fail:
++	spi_master_put(kmaster);
++
++	return ret;
++}
++
++static int omap2_mcspi_remove(struct platform_device *pdev)
++{
++	struct rtdm_spi_master *master = platform_get_drvdata(pdev);
++	struct spi_master_omap2_mcspi *spim;
++
++	spim = container_of(master, struct spi_master_omap2_mcspi, master);
++
++	omap2_mcspi_reset_hw(spim);
++
++	pm_runtime_dont_use_autosuspend(&pdev->dev);
++	pm_runtime_put_sync(&pdev->dev);
++	pm_runtime_disable(&pdev->dev);
++
++	rtdm_irq_free(&spim->irqh);
++
++	rtdm_spi_remove_master(master);
++
++	return 0;
++}
++
++static struct platform_driver omap2_mcspi_spi_driver = {
++	.driver		= {
++		.name		= "omap2_mcspi_rt",
++		.of_match_table	= omap_mcspi_of_match,
++	},
++	.probe		= omap2_mcspi_probe,
++	.remove		= omap2_mcspi_remove,
++};
++module_platform_driver(omap2_mcspi_spi_driver);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/spi/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/Makefile	2021-04-07 16:01:28.003633057 +0800
+@@ -0,0 +1,14 @@
++
++ccflags-$(CONFIG_XENO_DRIVERS_SPI_DEBUG) := -DDEBUG
++
++obj-$(CONFIG_XENO_DRIVERS_SPI) += xeno_spi.o
++
++xeno_spi-y := spi-master.o spi-device.o
++
++obj-$(CONFIG_XENO_DRIVERS_SPI_BCM2835) += xeno_spi_bcm2835.o
++obj-$(CONFIG_XENO_DRIVERS_SPI_SUN6I) += xeno_spi_sun6i.o
++obj-$(CONFIG_XENO_DRIVERS_SPI_OMAP2_MCSPI_RT) += xeno_spi_omap2_mcspi_rt.o
++
++xeno_spi_bcm2835-y := spi-bcm2835.o
++xeno_spi_sun6i-y := spi-sun6i.o
++xeno_spi_omap2_mcspi_rt-y := spi-omap2-mcspi-rt.o
+--- linux/drivers/xenomai/spi/spi-device.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-device.c	2021-04-07 16:01:27.999633063 +0800
+@@ -0,0 +1,181 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/device.h>
++#include <linux/slab.h>
++#include <linux/mutex.h>
++#include <linux/err.h>
++#include <linux/gpio.h>
++#include <linux/spi/spi.h>
++#include "spi-master.h"
++
++int rtdm_spi_add_remote_slave(struct rtdm_spi_remote_slave *slave,
++			      struct rtdm_spi_master *master,
++			      struct spi_device *spi)
++{
++	struct spi_master *kmaster = master->kmaster;
++	struct rtdm_device *dev;
++	rtdm_lockctx_t c;
++	int ret;
++
++	memset(slave, 0, sizeof(*slave));
++	slave->chip_select = spi->chip_select;
++	slave->config.bits_per_word = spi->bits_per_word;
++	slave->config.speed_hz = spi->max_speed_hz;
++	slave->config.mode = spi->mode;
++	slave->master = master;
++	
++	dev = &slave->dev;
++	dev->driver = &master->driver;
++	dev->label = kasprintf(GFP_KERNEL, "%s/slave%d.%%d",
++			       dev_name(&kmaster->dev),
++			       kmaster->bus_num);
++	if (dev->label == NULL)
++		return -ENOMEM;
++
++	if (gpio_is_valid(spi->cs_gpio))
++		slave->cs_gpio = spi->cs_gpio;
++	else {
++		slave->cs_gpio = -ENOENT;
++		if (kmaster->cs_gpios)
++			slave->cs_gpio = kmaster->cs_gpios[spi->chip_select];
++	}
++
++	if (gpio_is_valid(slave->cs_gpio)) {
++		ret = gpio_request(slave->cs_gpio, dev->label);
++		if (ret)
++			goto fail;
++		slave->cs_gpiod = gpio_to_desc(slave->cs_gpio);
++		if (slave->cs_gpiod == NULL)
++			goto fail;
++	}
++	
++	mutex_init(&slave->ctl_lock);
++
++	dev->device_data = master;
++	ret = rtdm_dev_register(dev);
++	if (ret)
++		goto fail;
++
++	rtdm_lock_get_irqsave(&master->lock, c);
++	list_add_tail(&slave->next, &master->slaves);
++	rtdm_lock_put_irqrestore(&master->lock, c);
++
++	return 0;
++fail:
++	kfree(dev->label);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_spi_add_remote_slave);
++
++void rtdm_spi_remove_remote_slave(struct rtdm_spi_remote_slave *slave)
++{
++	struct rtdm_spi_master *master = slave->master;
++	struct rtdm_device *dev;
++	rtdm_lockctx_t c;
++	
++	if (gpio_is_valid(slave->cs_gpio))
++		gpio_free(slave->cs_gpio);
++
++	mutex_destroy(&slave->ctl_lock);
++	rtdm_lock_get_irqsave(&master->lock, c);
++	list_del(&slave->next);
++	rtdm_lock_put_irqrestore(&master->lock, c);
++	dev = &slave->dev;
++	rtdm_dev_unregister(dev);
++	kfree(dev->label);
++}
++EXPORT_SYMBOL_GPL(rtdm_spi_remove_remote_slave);
++
++static int spi_device_probe(struct spi_device *spi)
++{
++	struct rtdm_spi_remote_slave *slave;
++	struct rtdm_spi_master *master;
++	int ret;
++
++	/*
++	 * Chicken and egg issue: we want the RTDM device class name
++	 * to duplicate the SPI master name, but that information is
++	 * only available after spi_register_master() has returned. We
++	 * solve this by initializing the RTDM driver descriptor on
++	 * the fly when the first SPI device on the bus is advertised
++	 * on behalf of spi_register_master().
++	 *
++	 * NOTE: the driver core guarantees serialization.
++	 */
++	master = spi_master_get_devdata(spi->master);
++	if (master->devclass == NULL) {
++		ret = __rtdm_spi_setup_driver(master);
++		if (ret)
++			return ret;
++	}
++
++	slave = master->ops->attach_slave(master, spi);
++	if (IS_ERR(slave))
++		return PTR_ERR(slave);
++
++	spi_set_drvdata(spi, slave);
++
++	return 0;
++}
++
++static int spi_device_remove(struct spi_device *spi)
++{
++	struct rtdm_spi_remote_slave *slave = spi_get_drvdata(spi);
++
++	slave->master->ops->detach_slave(slave);
++
++	return 0;
++}
++
++static const struct of_device_id spi_device_match[] = {
++	{
++		.compatible = "rtdm-spidev",
++	},
++	{ /* Sentinel */ },
++};
++MODULE_DEVICE_TABLE(of, spi_device_match);
++
++static struct spi_driver spi_device_driver = {
++	.driver = {
++		.name =	"rtdm_spi_device",
++		.owner = THIS_MODULE,
++		.of_match_table = spi_device_match,
++	},
++	.probe	= spi_device_probe,
++	.remove	= spi_device_remove,
++};
++
++static int __init spi_device_init(void)
++{
++	int ret;
++
++	ret = spi_register_driver(&spi_device_driver);
++
++	return ret;
++}
++module_init(spi_device_init);
++
++static void __exit spi_device_exit(void)
++{
++	spi_unregister_driver(&spi_device_driver);
++
++}
++module_exit(spi_device_exit);
+--- linux/drivers/xenomai/spi/spi-sun6i.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-sun6i.c	2021-04-07 16:01:27.994633070 +0800
+@@ -0,0 +1,674 @@
++/**
++ * I/O handling lifted from drivers/spi/spi-sun6i.c:
++ * Copyright (C) 2012 - 2014 Allwinner Tech
++ * Pan Nan <pannan@allwinnertech.com>
++ * Copyright (C) 2014 Maxime Ripard
++ * Maxime Ripard <maxime.ripard@free-electrons.com>
++ *
++ * RTDM integration by:
++ * Copyright (C) 2017 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/err.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/io.h>
++#include <linux/clk.h>
++#include <linux/spi/spi.h>
++#include <linux/of_irq.h>
++#include <linux/of_device.h>
++#include <linux/reset.h>
++#include "spi-master.h"
++
++#define RTDM_SUBCLASS_SUN6I  2
++
++#define SUN6I_GBL_CTL_REG		0x04
++#define SUN6I_GBL_CTL_BUS_ENABLE	BIT(0)
++#define SUN6I_GBL_CTL_MASTER		BIT(1)
++#define SUN6I_GBL_CTL_TP		BIT(7)
++#define SUN6I_GBL_CTL_RST		BIT(31)
++
++#define SUN6I_TFR_CTL_REG		0x08
++#define SUN6I_TFR_CTL_CPHA		BIT(0)
++#define SUN6I_TFR_CTL_CPOL		BIT(1)
++#define SUN6I_TFR_CTL_SPOL		BIT(2)
++#define SUN6I_TFR_CTL_CS_MASK		0x30
++#define SUN6I_TFR_CTL_CS(cs)		(((cs) << 4) & SUN6I_TFR_CTL_CS_MASK)
++#define SUN6I_TFR_CTL_CS_MANUAL		BIT(6)
++#define SUN6I_TFR_CTL_CS_LEVEL		BIT(7)
++#define SUN6I_TFR_CTL_DHB		BIT(8)
++#define SUN6I_TFR_CTL_FBS		BIT(12)
++#define SUN6I_TFR_CTL_XCH		BIT(31)
++
++#define SUN6I_INT_CTL_REG		0x10
++#define SUN6I_INT_CTL_RX_RDY		BIT(0)
++#define SUN6I_INT_CTL_TX_RDY		BIT(4)
++#define SUN6I_INT_CTL_RX_OVF		BIT(8)
++#define SUN6I_INT_CTL_TC		BIT(12)
++
++#define SUN6I_INT_STA_REG		0x14
++
++#define SUN6I_FIFO_CTL_REG		0x18
++#define SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_MASK	0xff
++#define SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_BITS	0
++#define SUN6I_FIFO_CTL_RX_RST			BIT(15)
++#define SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_MASK	0xff
++#define SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_BITS	16
++#define SUN6I_FIFO_CTL_TX_RST			BIT(31)
++
++#define SUN6I_FIFO_STA_REG		0x1c
++#define SUN6I_FIFO_STA_RX_CNT(reg)	(((reg) >> 0) & 0xff)
++#define SUN6I_FIFO_STA_TX_CNT(reg)	(((reg) >> 16) & 0xff)
++
++#define SUN6I_CLK_CTL_REG		0x24
++#define SUN6I_CLK_CTL_CDR2_MASK		0xff
++#define SUN6I_CLK_CTL_CDR2(div)		(((div) & SUN6I_CLK_CTL_CDR2_MASK) << 0)
++#define SUN6I_CLK_CTL_CDR1_MASK		0xf
++#define SUN6I_CLK_CTL_CDR1(div)		(((div) & SUN6I_CLK_CTL_CDR1_MASK) << 8)
++#define SUN6I_CLK_CTL_DRS		BIT(12)
++
++#define SUN6I_MAX_XFER_SIZE		0xffffff
++
++#define SUN6I_BURST_CNT_REG		0x30
++#define SUN6I_BURST_CNT(cnt)		((cnt) & SUN6I_MAX_XFER_SIZE)
++
++#define SUN6I_XMIT_CNT_REG		0x34
++#define SUN6I_XMIT_CNT(cnt)		((cnt) & SUN6I_MAX_XFER_SIZE)
++
++#define SUN6I_BURST_CTL_CNT_REG		0x38
++#define SUN6I_BURST_CTL_CNT_STC(cnt)	((cnt) & SUN6I_MAX_XFER_SIZE)
++
++#define SUN6I_TXDATA_REG		0x200
++#define SUN6I_RXDATA_REG		0x300
++
++#define SUN6I_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH	\
++				 | SPI_LSB_FIRST)
++
++	struct spi_setup_data {
++		int fifo_depth;
++	};
++
++static struct spi_setup_data sun6i_data = {
++	.fifo_depth = 128,
++};
++
++static struct spi_setup_data sun8i_data = {
++	.fifo_depth = 64,
++};
++
++struct spi_master_sun6i {
++	struct rtdm_spi_master master;
++	void __iomem *regs;
++	struct reset_control *rstc;
++	struct clk *hclk;
++	struct clk *mclk;
++	unsigned long clk_hz;
++	rtdm_irq_t irqh;
++	const u8 *tx_buf;
++	u8 *rx_buf;
++	int tx_len;
++	int rx_len;
++	rtdm_event_t transfer_done;
++	const struct spi_setup_data *setup;
++};
++
++struct spi_slave_sun6i {
++	struct rtdm_spi_remote_slave slave;
++	void *io_virt;
++	dma_addr_t io_dma;
++	size_t io_len;
++};
++
++static inline struct spi_slave_sun6i *
++to_slave_sun6i(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave, struct spi_slave_sun6i, slave);
++}
++
++static inline struct spi_master_sun6i *
++to_master_sun6i(struct rtdm_spi_remote_slave *slave)
++{
++	return container_of(slave->master, struct spi_master_sun6i, master);
++}
++
++static inline struct device *
++master_to_kdev(struct rtdm_spi_master *master)
++{
++	return &master->kmaster->dev;
++}
++
++static inline u32 sun6i_rd(struct spi_master_sun6i *spim,
++			   unsigned int reg)
++{
++	return readl(spim->regs + reg);
++}
++
++static inline void sun6i_wr(struct spi_master_sun6i *spim,
++			    unsigned int reg, u32 val)
++{
++	writel(val, spim->regs + reg);
++}
++
++static void sun6i_rd_fifo(struct spi_master_sun6i *spim)
++{
++	u32 reg;
++	int len;
++	u8 byte;
++
++	reg = sun6i_rd(spim, SUN6I_FIFO_STA_REG);
++	len = min((int)SUN6I_FIFO_STA_RX_CNT(reg), spim->rx_len);
++
++	while (len-- > 0) {
++		byte = sun6i_rd(spim, SUN6I_RXDATA_REG);
++		if (spim->rx_buf)
++			*spim->rx_buf++ = byte;
++		spim->rx_len--;
++	}
++}
++
++static void sun6i_wr_fifo(struct spi_master_sun6i *spim)
++{
++	u32 reg;
++	int len;
++	u8 byte;
++
++	reg = sun6i_rd(spim, SUN6I_FIFO_STA_REG);
++	len = min(spim->setup->fifo_depth - (int)SUN6I_FIFO_STA_TX_CNT(reg),
++		  spim->tx_len);
++	
++	while (len-- > 0) {
++		byte = spim->tx_buf ? *spim->tx_buf++ : 0;
++		sun6i_wr(spim, SUN6I_TXDATA_REG, byte);
++		spim->tx_len--;
++	}
++}
++
++static int sun6i_spi_interrupt(rtdm_irq_t *irqh)
++{
++	struct spi_master_sun6i *spim;
++	u32 status;
++
++	spim = rtdm_irq_get_arg(irqh, struct spi_master_sun6i);
++
++	sun6i_rd_fifo(spim);
++	sun6i_wr_fifo(spim);
++	
++	status = sun6i_rd(spim, SUN6I_INT_STA_REG);
++	if ((status & SUN6I_INT_CTL_TC)) {
++		sun6i_wr(spim, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
++		sun6i_wr(spim, SUN6I_INT_CTL_REG, 0);
++		rtdm_event_signal(&spim->transfer_done);
++	} else if (status & SUN6I_INT_CTL_TX_RDY)
++		sun6i_wr(spim, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TX_RDY);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int sun6i_configure(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++	struct rtdm_spi_config *config = &slave->config;
++	u32 reg, div;
++	
++	/* Set clock polarity and phase. */
++
++	reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG);
++	reg &= ~(SUN6I_TFR_CTL_CPOL | SUN6I_TFR_CTL_CPHA |
++		 SUN6I_TFR_CTL_FBS | SUN6I_TFR_CTL_SPOL);
++
++	/* Manual CS via ->chip_select(). */
++	reg |= SUN6I_TFR_CTL_CS_MANUAL;
++
++	if (config->mode & SPI_CPOL)
++		reg |= SUN6I_TFR_CTL_CPOL;
++
++	if (config->mode & SPI_CPHA)
++		reg |= SUN6I_TFR_CTL_CPHA;
++
++	if (config->mode & SPI_LSB_FIRST)
++		reg |= SUN6I_TFR_CTL_FBS;
++
++	if (!(config->mode & SPI_CS_HIGH))
++		reg |= SUN6I_TFR_CTL_SPOL;
++
++	sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg);
++	
++	/* Setup clock divider. */
++
++	div = spim->clk_hz / (2 * config->speed_hz);
++	if (div <= SUN6I_CLK_CTL_CDR2_MASK + 1) {
++		if (div > 0)
++			div--;
++		reg = SUN6I_CLK_CTL_CDR2(div) | SUN6I_CLK_CTL_DRS;
++	} else {
++		div = ilog2(spim->clk_hz) - ilog2(config->speed_hz);
++		reg = SUN6I_CLK_CTL_CDR1(div);
++	}
++
++	sun6i_wr(spim, SUN6I_CLK_CTL_REG, reg);
++
++	return 0;
++}
++
++static void sun6i_chip_select(struct rtdm_spi_remote_slave *slave,
++			      bool active)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++	u32 reg;
++
++	/*
++	 * We have no cs_gpios, so this handler will be called for
++	 * each transfer.
++	 */
++	reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG);
++	reg &= ~(SUN6I_TFR_CTL_CS_MASK | SUN6I_TFR_CTL_CS_LEVEL);
++	reg |= SUN6I_TFR_CTL_CS(slave->chip_select);
++
++	if (active)
++		reg |= SUN6I_TFR_CTL_CS_LEVEL;
++
++	sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg);
++}
++
++static int do_transfer_irq(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++	u32 tx_len = 0, reg;
++	int ret;
++
++	/* Reset FIFO. */
++	sun6i_wr(spim, SUN6I_FIFO_CTL_REG,
++		 SUN6I_FIFO_CTL_RX_RST | SUN6I_FIFO_CTL_TX_RST);
++
++	/* Set FIFO interrupt trigger level to 3/4 of the fifo depth. */
++	reg = spim->setup->fifo_depth / 4 * 3;
++	sun6i_wr(spim, SUN6I_FIFO_CTL_REG,
++		 (reg << SUN6I_FIFO_CTL_RX_RDY_TRIG_LEVEL_BITS) |
++		 (reg << SUN6I_FIFO_CTL_TX_RDY_TRIG_LEVEL_BITS));
++
++	reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG);
++	reg &= ~SUN6I_TFR_CTL_DHB;
++	/* Discard unused SPI bursts if TX only. */
++	if (spim->rx_buf == NULL)
++		reg |= SUN6I_TFR_CTL_DHB;
++	sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg);
++
++	if (spim->tx_buf)
++		tx_len = spim->tx_len;
++
++	/* Setup the counters. */
++	sun6i_wr(spim, SUN6I_BURST_CNT_REG, SUN6I_BURST_CNT(spim->tx_len));
++	sun6i_wr(spim, SUN6I_XMIT_CNT_REG, SUN6I_XMIT_CNT(tx_len));
++	sun6i_wr(spim, SUN6I_BURST_CTL_CNT_REG,
++		 SUN6I_BURST_CTL_CNT_STC(tx_len));
++
++	/* Fill the TX FIFO */
++	sun6i_wr_fifo(spim);
++
++	/* Enable interrupts. */
++	reg = sun6i_rd(spim, SUN6I_INT_CTL_REG);
++	reg |= SUN6I_INT_CTL_TC | SUN6I_INT_CTL_TX_RDY;
++	sun6i_wr(spim, SUN6I_INT_CTL_REG, reg);
++
++	/* Start the transfer. */
++	reg = sun6i_rd(spim, SUN6I_TFR_CTL_REG);
++	sun6i_wr(spim, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
++	
++	ret = rtdm_event_wait(&spim->transfer_done);
++	if (ret) {
++		sun6i_wr(spim, SUN6I_INT_CTL_REG, 0);
++		return ret;
++	}
++
++	return 0;
++}
++
++static int sun6i_transfer_iobufs(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++
++	if (sun6i->io_len == 0)
++		return -EINVAL;	/* No I/O buffers set. */
++	
++	spim->tx_len = sun6i->io_len / 2;
++	spim->rx_len = spim->tx_len;
++	spim->tx_buf = sun6i->io_virt + spim->rx_len;
++	spim->rx_buf = sun6i->io_virt;
++
++	return do_transfer_irq(slave);
++}
++
++static int sun6i_transfer_iobufs_n(struct rtdm_spi_remote_slave *slave,
++				   int len)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++
++	if ((sun6i->io_len == 0) ||
++		(len <= 0) || (len > (sun6i->io_len / 2)))
++		return -EINVAL;
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = sun6i->io_virt + sun6i->io_len / 2;
++	spim->rx_buf = sun6i->io_virt;
++
++	return do_transfer_irq(slave);
++}
++
++static ssize_t sun6i_read(struct rtdm_spi_remote_slave *slave,
++			  void *rx, size_t len)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = NULL;
++	spim->rx_buf = rx;
++
++	return do_transfer_irq(slave) ?: len;
++}
++
++static ssize_t sun6i_write(struct rtdm_spi_remote_slave *slave,
++			   const void *tx, size_t len)
++{
++	struct spi_master_sun6i *spim = to_master_sun6i(slave);
++
++	spim->tx_len = len;
++	spim->rx_len = len;
++	spim->tx_buf = tx;
++	spim->rx_buf = NULL;
++
++	return do_transfer_irq(slave) ?: len;
++}
++
++static int set_iobufs(struct spi_slave_sun6i *sun6i, size_t len)
++{
++	dma_addr_t dma;
++	void *p;
++
++	if (len == 0)
++		return -EINVAL;
++	
++	len = L1_CACHE_ALIGN(len) * 2;
++	if (len == sun6i->io_len)
++		return 0;
++
++	if (sun6i->io_len)
++		return -EINVAL;	/* I/O buffers may not be resized. */
++
++	p = dma_alloc_coherent(NULL, len, &dma, GFP_KERNEL);
++	if (p == NULL)
++		return -ENOMEM;
++
++	sun6i->io_dma = dma;
++	sun6i->io_virt = p;
++	smp_mb();
++	sun6i->io_len = len;
++	
++	return 0;
++}
++
++static int sun6i_set_iobufs(struct rtdm_spi_remote_slave *slave,
++			    struct rtdm_spi_iobufs *p)
++{
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++	int ret;
++
++	ret = set_iobufs(sun6i, p->io_len);
++	if (ret)
++		return ret;
++
++	p->i_offset = 0;
++	p->o_offset = sun6i->io_len / 2;
++	p->map_len = sun6i->io_len;
++	
++	return 0;
++}
++
++static int sun6i_mmap_iobufs(struct rtdm_spi_remote_slave *slave,
++			     struct vm_area_struct *vma)
++{
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++
++	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++	return rtdm_mmap_kmem(vma, sun6i->io_virt);
++}
++
++static void sun6i_mmap_release(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++
++	dma_free_coherent(NULL, sun6i->io_len,
++			  sun6i->io_virt, sun6i->io_dma);
++	sun6i->io_len = 0;
++}
++
++static struct rtdm_spi_remote_slave *
++sun6i_attach_slave(struct rtdm_spi_master *master, struct spi_device *spi)
++{
++	struct spi_slave_sun6i *sun6i;
++	int ret;
++
++	sun6i = kzalloc(sizeof(*sun6i), GFP_KERNEL);
++	if (sun6i == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = rtdm_spi_add_remote_slave(&sun6i->slave, master, spi);
++	if (ret) {
++		dev_err(&spi->dev,
++			"%s: failed to attach slave\n", __func__);
++		kfree(sun6i);
++		return ERR_PTR(ret);
++	}
++
++	return &sun6i->slave;
++}
++
++static void sun6i_detach_slave(struct rtdm_spi_remote_slave *slave)
++{
++	struct spi_slave_sun6i *sun6i = to_slave_sun6i(slave);
++
++	rtdm_spi_remove_remote_slave(slave);
++	kfree(sun6i);
++}
++
++static struct rtdm_spi_master_ops sun6i_master_ops = {
++	.configure = sun6i_configure,
++	.chip_select = sun6i_chip_select,
++	.set_iobufs = sun6i_set_iobufs,
++	.mmap_iobufs = sun6i_mmap_iobufs,
++	.mmap_release = sun6i_mmap_release,
++	.transfer_iobufs = sun6i_transfer_iobufs,
++	.transfer_iobufs_n = sun6i_transfer_iobufs_n,
++	.write = sun6i_write,
++	.read = sun6i_read,
++	.attach_slave = sun6i_attach_slave,
++	.detach_slave = sun6i_detach_slave,
++};
++
++static int sun6i_spi_probe(struct platform_device *pdev)
++{
++	struct rtdm_spi_master *master;
++	struct spi_master_sun6i *spim;
++	struct spi_master *kmaster;
++	struct resource *r;
++	int ret, irq;
++	u32 clk_rate;
++
++	dev_dbg(&pdev->dev, "%s: entered\n", __func__);
++
++	master = rtdm_spi_alloc_master(&pdev->dev,
++				       struct spi_master_sun6i, master);
++	if (master == NULL)
++		return -ENOMEM;
++
++	master->subclass = RTDM_SUBCLASS_SUN6I;
++	master->ops = &sun6i_master_ops;
++	platform_set_drvdata(pdev, master);
++
++	kmaster = master->kmaster;
++	kmaster->max_speed_hz = 100 * 1000 * 1000;
++	kmaster->min_speed_hz = 3 * 1000;
++	kmaster->mode_bits = SUN6I_SPI_MODE_BITS;
++	kmaster->bits_per_word_mask = SPI_BPW_MASK(8);
++	kmaster->num_chipselect = 4;
++	kmaster->dev.of_node = pdev->dev.of_node;
++
++	spim = container_of(master, struct spi_master_sun6i, master);
++	spim->setup = of_device_get_match_data(&pdev->dev);
++
++	rtdm_event_init(&spim->transfer_done, 0);
++
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	spim->regs = devm_ioremap_resource(&pdev->dev, r);
++	if (IS_ERR(spim->regs)) {
++		dev_err(&pdev->dev, "%s: cannot map I/O memory\n", __func__);
++		ret = PTR_ERR(spim->regs);
++		goto fail;
++	}
++	
++	spim->hclk = devm_clk_get(&pdev->dev, "ahb");
++	if (IS_ERR(spim->hclk)) {
++		dev_err(&pdev->dev, "Unable to acquire AHB clock\n");
++		ret = PTR_ERR(spim->hclk);
++		goto fail;
++	}
++
++	spim->mclk = devm_clk_get(&pdev->dev, "mod");
++	if (IS_ERR(spim->mclk)) {
++		dev_err(&pdev->dev, "Unable to acquire MOD clock\n");
++		ret = PTR_ERR(spim->mclk);
++		goto fail;
++	}
++
++	spim->rstc = devm_reset_control_get(&pdev->dev, NULL);
++	if (IS_ERR(spim->rstc)) {
++		dev_err(&pdev->dev, "Couldn't get reset controller\n");
++		ret = PTR_ERR(spim->rstc);
++		goto fail;
++	}
++
++	/*
++	 * Ensure that we have a parent clock fast enough to handle
++	 * the fastest transfers properly.
++	 */
++	clk_rate = clk_get_rate(spim->mclk);
++	if (clk_rate < 2 * kmaster->max_speed_hz)
++		clk_set_rate(spim->mclk, 2 * kmaster->max_speed_hz);
++
++	spim->clk_hz = clk_get_rate(spim->mclk);
++
++	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
++	if (irq <= 0) {
++		ret = irq ?: -ENODEV;
++		goto fail;
++	}
++
++	clk_prepare_enable(spim->hclk);
++	clk_prepare_enable(spim->mclk);
++
++	ret = reset_control_deassert(spim->rstc);
++	if (ret)
++		goto fail_unclk;
++
++	/* Enable SPI module, in master mode with smart burst. */
++
++	sun6i_wr(spim, SUN6I_GBL_CTL_REG,
++		 SUN6I_GBL_CTL_BUS_ENABLE | SUN6I_GBL_CTL_MASTER |
++		 SUN6I_GBL_CTL_TP);
++
++	/* Disable and clear all interrupts. */
++	sun6i_wr(spim, SUN6I_INT_CTL_REG, 0);
++	sun6i_wr(spim, SUN6I_INT_STA_REG, ~0);
++	
++	ret = rtdm_irq_request(&spim->irqh, irq,
++			       sun6i_spi_interrupt, 0,
++			       dev_name(&pdev->dev), spim);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: cannot request IRQ%d\n",
++			__func__, irq);
++		goto fail_unclk;
++	}
++
++	ret = rtdm_spi_add_master(&spim->master);
++	if (ret) {
++		dev_err(&pdev->dev, "%s: failed to add master\n",
++			__func__);
++		goto fail_register;
++	}
++
++	return 0;
++
++fail_register:
++	rtdm_irq_free(&spim->irqh);
++fail_unclk:
++	clk_disable_unprepare(spim->mclk);
++	clk_disable_unprepare(spim->hclk);
++fail:
++	spi_master_put(kmaster);
++
++	return ret;
++}
++
++static int sun6i_spi_remove(struct platform_device *pdev)
++{
++	struct rtdm_spi_master *master = platform_get_drvdata(pdev);
++	struct spi_master_sun6i *spim;
++
++	dev_dbg(&pdev->dev, "%s: entered\n", __func__);
++
++	spim = container_of(master, struct spi_master_sun6i, master);
++
++	rtdm_irq_free(&spim->irqh);
++
++	clk_disable_unprepare(spim->mclk);
++	clk_disable_unprepare(spim->hclk);
++
++	rtdm_spi_remove_master(master);
++
++	return 0;
++}
++
++static const struct of_device_id sun6i_spi_match[] = {
++	{
++		.compatible = "allwinner,sun6i-a31-spi",
++		.data = &sun6i_data,
++	},
++	{
++		.compatible = "allwinner,sun8i-h3-spi",
++		.data = &sun8i_data,
++	},
++	{ /* Sentinel */ },
++};
++MODULE_DEVICE_TABLE(of, sun6i_spi_match);
++
++static struct platform_driver sun6i_spi_driver = {
++	.driver		= {
++		.name		= "spi-sun6i",
++		.of_match_table	= sun6i_spi_match,
++	},
++	.probe		= sun6i_spi_probe,
++	.remove		= sun6i_spi_remove,
++};
++module_platform_driver(sun6i_spi_driver);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/spi/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/Kconfig	2021-04-07 16:01:27.989633078 +0800
+@@ -0,0 +1,39 @@
++menu "Real-time SPI master drivers"
++
++config XENO_DRIVERS_SPI
++       depends on SPI
++       tristate
++
++config XENO_DRIVERS_SPI_BCM2835
++	depends on ARCH_BCM2708 || ARCH_BCM2835
++	select XENO_DRIVERS_SPI
++	tristate "Support for BCM2835 SPI"
++	help
++
++	Enables support for the SPI0 controller available from
++	Broadcom's BCM2835 SoC.
++
++config XENO_DRIVERS_SPI_SUN6I
++	depends on MACH_SUN6I || MACH_SUN8I
++	select XENO_DRIVERS_SPI
++	tristate "Support for A31/H3 SoC SPI"
++	help
++
++	Enables support for the SPI controller available from
++	Allwinner's A31, H3 SoCs.
++
++config XENO_DRIVERS_SPI_OMAP2_MCSPI_RT
++	tristate "McSPI rt-driver for OMAP"
++	depends on HAS_DMA
++	depends on ARCH_OMAP2PLUS || COMPILE_TEST
++	select XENO_DRIVERS_SPI
++	help
++
++	SPI real-time master controller for OMAP24XX and later Multichannel SPI
++	(McSPI) modules.
++
++config XENO_DRIVERS_SPI_DEBUG
++       depends on XENO_DRIVERS_SPI
++       bool "Enable SPI core debugging features"
++       
++endmenu
+--- linux/drivers/xenomai/spi/spi-device.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-device.h	2021-04-07 16:01:27.984633085 +0800
+@@ -0,0 +1,54 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTDM_SPI_DEVICE_H
++#define _RTDM_SPI_DEVICE_H
++
++#include <linux/list.h>
++#include <linux/atomic.h>
++#include <linux/mutex.h>
++#include <rtdm/driver.h>
++#include <rtdm/uapi/spi.h>
++
++struct class;
++struct rtdm_spi_master;
++
++struct rtdm_spi_remote_slave {
++	u8 chip_select;
++	int cs_gpio;
++	struct gpio_desc *cs_gpiod;
++	struct rtdm_device dev;
++	struct list_head next;
++	struct rtdm_spi_config config;
++	struct rtdm_spi_master *master;
++	atomic_t mmap_refs;
++	struct mutex ctl_lock;
++};
++
++static inline struct device *
++slave_to_kdev(struct rtdm_spi_remote_slave *slave)
++{
++	return rtdm_dev_to_kdev(&slave->dev);
++}
++
++int rtdm_spi_add_remote_slave(struct rtdm_spi_remote_slave *slave,
++			      struct rtdm_spi_master *spim,
++			      struct spi_device *spi);
++
++void rtdm_spi_remove_remote_slave(struct rtdm_spi_remote_slave *slave);
++
++#endif /* !_RTDM_SPI_DEVICE_H */
+--- linux/drivers/xenomai/spi/spi-master.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-master.h	2021-04-07 16:01:27.980633090 +0800
+@@ -0,0 +1,82 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTDM_SPI_MASTER_H
++#define _RTDM_SPI_MASTER_H
++
++#include <rtdm/driver.h>
++#include <rtdm/uapi/spi.h>
++#include "spi-device.h"
++
++struct class;
++struct device_node;
++struct rtdm_spi_master;
++struct spi_master;
++
++struct rtdm_spi_master_ops {
++	int (*open)(struct rtdm_spi_remote_slave *slave);
++	void (*close)(struct rtdm_spi_remote_slave *slave);
++	int (*configure)(struct rtdm_spi_remote_slave *slave);
++	void (*chip_select)(struct rtdm_spi_remote_slave *slave,
++			    bool active);
++	int (*set_iobufs)(struct rtdm_spi_remote_slave *slave,
++			  struct rtdm_spi_iobufs *p);
++	int (*mmap_iobufs)(struct rtdm_spi_remote_slave *slave,
++			   struct vm_area_struct *vma);
++	void (*mmap_release)(struct rtdm_spi_remote_slave *slave);
++	int (*transfer_iobufs)(struct rtdm_spi_remote_slave *slave);
++	int (*transfer_iobufs_n)(struct rtdm_spi_remote_slave *slave, int len);
++	ssize_t (*write)(struct rtdm_spi_remote_slave *slave,
++			 const void *tx, size_t len);
++	ssize_t (*read)(struct rtdm_spi_remote_slave *slave,
++			 void *rx, size_t len);
++	struct rtdm_spi_remote_slave *(*attach_slave)
++		(struct rtdm_spi_master *master,
++			struct spi_device *spi);
++	void (*detach_slave)(struct rtdm_spi_remote_slave *slave);
++};
++
++struct rtdm_spi_master {
++	int subclass;
++	const struct rtdm_spi_master_ops *ops;
++	struct spi_master *kmaster;
++	struct {	/* Internal */
++		struct rtdm_driver driver;
++		struct class *devclass;
++		char *classname;
++		struct list_head slaves;
++		struct list_head next;
++		rtdm_lock_t lock;
++		rtdm_mutex_t bus_lock;
++		struct rtdm_spi_remote_slave *cs;
++	};
++};
++
++#define rtdm_spi_alloc_master(__dev, __type, __mptr)			\
++	__rtdm_spi_alloc_master(__dev, sizeof(__type),			\
++				offsetof(__type, __mptr))		\
++
++struct rtdm_spi_master *
++__rtdm_spi_alloc_master(struct device *dev, size_t size, int off);
++
++int __rtdm_spi_setup_driver(struct rtdm_spi_master *master);
++
++int rtdm_spi_add_master(struct rtdm_spi_master *master);
++
++void rtdm_spi_remove_master(struct rtdm_spi_master *master);
++
++#endif /* !_RTDM_SPI_MASTER_H */
+--- linux/drivers/xenomai/spi/spi-master.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/spi/spi-master.c	2021-04-07 16:01:27.975633097 +0800
+@@ -0,0 +1,448 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/device.h>
++#include <linux/slab.h>
++#include <linux/mutex.h>
++#include <linux/err.h>
++#include <linux/spi/spi.h>
++#include <linux/gpio.h>
++#include "spi-master.h"
++
++static inline
++struct device *to_kdev(struct rtdm_spi_remote_slave *slave)
++{
++	return rtdm_dev_to_kdev(&slave->dev);
++}
++
++static inline struct rtdm_spi_remote_slave *fd_to_slave(struct rtdm_fd *fd)
++{
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++
++	return container_of(dev, struct rtdm_spi_remote_slave, dev);
++}
++
++static int update_slave_config(struct rtdm_spi_remote_slave *slave,
++			       struct rtdm_spi_config *config)
++{
++	struct rtdm_spi_config old_config;
++	struct rtdm_spi_master *master = slave->master;
++	int ret;
++
++	rtdm_mutex_lock(&master->bus_lock);
++
++	old_config = slave->config;
++	slave->config = *config;
++	ret = slave->master->ops->configure(slave);
++	if (ret) {
++		slave->config = old_config;
++		rtdm_mutex_unlock(&master->bus_lock);
++		return ret;
++	}
++
++	rtdm_mutex_unlock(&master->bus_lock);
++	
++	dev_info(to_kdev(slave),
++		 "configured mode %d, %s%s%s%s%u bits/w, %u Hz max\n",
++		 (int) (slave->config.mode & (SPI_CPOL | SPI_CPHA)),
++		 (slave->config.mode & SPI_CS_HIGH) ? "cs_high, " : "",
++		 (slave->config.mode & SPI_LSB_FIRST) ? "lsb, " : "",
++		 (slave->config.mode & SPI_3WIRE) ? "3wire, " : "",
++		 (slave->config.mode & SPI_LOOP) ? "loopback, " : "",
++		 slave->config.bits_per_word,
++		 slave->config.speed_hz);
++	
++	return 0;
++}
++
++static int spi_master_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++
++	if (master->ops->open)
++		return master->ops->open(slave);
++		
++	return 0;
++}
++
++static void spi_master_close(struct rtdm_fd *fd)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++	rtdm_lockctx_t c;
++
++	rtdm_lock_get_irqsave(&master->lock, c);
++
++	if (master->cs == slave)
++		master->cs = NULL;
++
++	rtdm_lock_put_irqrestore(&master->lock, c);
++
++	if (master->ops->close)
++		master->ops->close(slave);
++}
++
++static int do_chip_select(struct rtdm_spi_remote_slave *slave)
++{				/* master->bus_lock held */
++	struct rtdm_spi_master *master = slave->master;
++	rtdm_lockctx_t c;
++	int state;
++
++	if (slave->config.speed_hz == 0)
++		return -EINVAL; /* Setup is missing. */
++
++	/* Serialize with spi_master_close() */
++	rtdm_lock_get_irqsave(&master->lock, c);
++	
++	if (master->cs != slave) {
++		if (gpio_is_valid(slave->cs_gpio)) {
++			state = !!(slave->config.mode & SPI_CS_HIGH);
++			gpiod_set_raw_value(slave->cs_gpiod, state);
++		} else
++			master->ops->chip_select(slave, true);
++		master->cs = slave;
++	}
++
++	rtdm_lock_put_irqrestore(&master->lock, c);
++
++	return 0;
++}
++
++static void do_chip_deselect(struct rtdm_spi_remote_slave *slave)
++{				/* master->bus_lock held */
++	struct rtdm_spi_master *master = slave->master;
++	rtdm_lockctx_t c;
++	int state;
++
++	rtdm_lock_get_irqsave(&master->lock, c);
++
++	if (gpio_is_valid(slave->cs_gpio)) {
++		state = !(slave->config.mode & SPI_CS_HIGH);
++		gpiod_set_raw_value(slave->cs_gpiod, state);
++	} else
++		master->ops->chip_select(slave, false);
++
++	master->cs = NULL;
++
++	rtdm_lock_put_irqrestore(&master->lock, c);
++}
++
++static int spi_master_ioctl_rt(struct rtdm_fd *fd,
++			       unsigned int request, void *arg)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++	struct rtdm_spi_config config;
++	int ret, len;
++
++	switch (request) {
++	case SPI_RTIOC_SET_CONFIG:
++		ret = rtdm_safe_copy_from_user(fd, &config,
++					       arg, sizeof(config));
++		if (ret == 0)
++			ret = update_slave_config(slave, &config);
++		break;
++	case SPI_RTIOC_GET_CONFIG:
++		rtdm_mutex_lock(&master->bus_lock);
++		config = slave->config;
++		rtdm_mutex_unlock(&master->bus_lock);
++		ret = rtdm_safe_copy_to_user(fd, arg,
++					     &config, sizeof(config));
++		break;
++	case SPI_RTIOC_TRANSFER:
++		ret = -EINVAL;
++		if (master->ops->transfer_iobufs) {
++			rtdm_mutex_lock(&master->bus_lock);
++			ret = do_chip_select(slave);
++			if (ret == 0) {
++				ret = master->ops->transfer_iobufs(slave);
++				do_chip_deselect(slave);
++			}
++			rtdm_mutex_unlock(&master->bus_lock);
++		}
++		break;
++	case SPI_RTIOC_TRANSFER_N:
++		ret = -EINVAL;
++		if (master->ops->transfer_iobufs_n) {
++			len = (int)arg;
++			rtdm_mutex_lock(&master->bus_lock);
++			ret = do_chip_select(slave);
++			if (ret == 0) {
++				ret = master->ops->transfer_iobufs_n(slave, len);
++				do_chip_deselect(slave);
++			}
++			rtdm_mutex_unlock(&master->bus_lock);
++		}
++		break;
++	default:
++		ret = -ENOSYS;
++	}
++
++	return ret;
++}
++
++static int spi_master_ioctl_nrt(struct rtdm_fd *fd,
++				unsigned int request, void *arg)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++	struct rtdm_spi_iobufs iobufs;
++	int ret;
++
++	switch (request) {
++	case SPI_RTIOC_SET_IOBUFS:
++		ret = rtdm_safe_copy_from_user(fd, &iobufs,
++					       arg, sizeof(iobufs));
++		if (ret)
++			break;
++		/*
++		 * No transfer can happen without I/O buffers being
++		 * set, and I/O buffers cannot be reset, therefore we
++		 * need no serialization with the transfer code here.
++		 */
++		mutex_lock(&slave->ctl_lock);
++		ret = master->ops->set_iobufs(slave, &iobufs);
++		mutex_unlock(&slave->ctl_lock);
++		if (ret == 0)
++			ret = rtdm_safe_copy_to_user(fd, arg,
++					     &iobufs, sizeof(iobufs));
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static ssize_t spi_master_read_rt(struct rtdm_fd *fd,
++				  void __user *u_buf, size_t len)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++	void *rx;
++	int ret;
++
++	if (len == 0)
++		return 0;
++
++	rx = xnmalloc(len);
++	if (rx == NULL)
++		return -ENOMEM;
++
++	rtdm_mutex_lock(&master->bus_lock);
++	ret = do_chip_select(slave);
++	if (ret == 0) {
++		ret = master->ops->read(slave, rx, len);
++		do_chip_deselect(slave);
++	}
++	rtdm_mutex_unlock(&master->bus_lock);
++	if (ret > 0)
++		ret = rtdm_safe_copy_to_user(fd, u_buf, rx, ret);
++	
++	xnfree(rx);
++	
++	return ret;
++}
++
++static ssize_t spi_master_write_rt(struct rtdm_fd *fd,
++				   const void __user *u_buf, size_t len)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	struct rtdm_spi_master *master = slave->master;
++	void *tx;
++	int ret;
++
++	if (len == 0)
++		return 0;
++
++	tx = xnmalloc(len);
++	if (tx == NULL)
++		return -ENOMEM;
++
++	ret = rtdm_safe_copy_from_user(fd, tx, u_buf, len);
++	if (ret == 0) {
++		rtdm_mutex_lock(&master->bus_lock);
++		ret = do_chip_select(slave);
++		if (ret == 0) {
++			ret = master->ops->write(slave, tx, len);
++			do_chip_deselect(slave);
++		}
++		rtdm_mutex_unlock(&master->bus_lock);
++	}
++	
++	xnfree(tx);
++
++	return ret;
++}
++
++static void iobufs_vmopen(struct vm_area_struct *vma)
++{
++	struct rtdm_spi_remote_slave *slave = vma->vm_private_data;
++
++	atomic_inc(&slave->mmap_refs);
++	dev_dbg(slave_to_kdev(slave), "mapping added\n");
++}
++
++static void iobufs_vmclose(struct vm_area_struct *vma)
++{
++	struct rtdm_spi_remote_slave *slave = vma->vm_private_data;
++
++	if (atomic_dec_and_test(&slave->mmap_refs)) {
++		slave->master->ops->mmap_release(slave);
++		dev_dbg(slave_to_kdev(slave), "mapping released\n");
++	}
++}
++
++static struct vm_operations_struct iobufs_vmops = {
++	.open = iobufs_vmopen,
++	.close = iobufs_vmclose,
++};
++
++static int spi_master_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma)
++{
++	struct rtdm_spi_remote_slave *slave = fd_to_slave(fd);
++	int ret;
++
++	if (slave->master->ops->mmap_iobufs == NULL)
++		return -EINVAL;
++
++	ret = slave->master->ops->mmap_iobufs(slave, vma);
++	if (ret)
++		return ret;
++
++	dev_dbg(slave_to_kdev(slave), "mapping created\n");
++	atomic_inc(&slave->mmap_refs);
++
++	if (slave->master->ops->mmap_release) {
++		vma->vm_ops = &iobufs_vmops;
++		vma->vm_private_data = slave;
++	}
++
++	return 0;
++}
++
++static char *spi_slave_devnode(struct device *dev, umode_t *mode)
++{
++	return kasprintf(GFP_KERNEL, "rtdm/%s/%s",
++			 dev->class->name,
++			 dev_name(dev));
++}
++
++struct rtdm_spi_master *
++__rtdm_spi_alloc_master(struct device *dev, size_t size, int off)
++{
++	struct rtdm_spi_master *master;
++	struct spi_master *kmaster;
++
++	kmaster = spi_alloc_master(dev, size);
++	if (kmaster == NULL)
++		return NULL;
++	
++	master = (void *)(kmaster + 1) + off;
++	master->kmaster = kmaster;
++	spi_master_set_devdata(kmaster, master);
++
++	return master;
++}
++EXPORT_SYMBOL_GPL(__rtdm_spi_alloc_master);
++
++int __rtdm_spi_setup_driver(struct rtdm_spi_master *master)
++{
++	master->classname = kstrdup(
++		dev_name(&master->kmaster->dev), GFP_KERNEL);
++	master->devclass = class_create(THIS_MODULE,
++		master->classname);
++	if (IS_ERR(master->devclass)) {
++		kfree(master->classname);
++		printk(XENO_ERR "cannot create sysfs class\n");
++		return PTR_ERR(master->devclass);
++	}
++
++	master->devclass->devnode = spi_slave_devnode;
++	master->cs = NULL;
++
++	master->driver.profile_info = (struct rtdm_profile_info)
++		RTDM_PROFILE_INFO(rtdm_spi_master,
++				  RTDM_CLASS_SPI,
++				  master->subclass,
++				  0);
++	master->driver.device_flags = RTDM_NAMED_DEVICE;
++	master->driver.base_minor = 0;
++	master->driver.device_count = 256;
++	master->driver.context_size = 0;
++	master->driver.ops = (struct rtdm_fd_ops){
++		.open		=	spi_master_open,
++		.close		=	spi_master_close,
++		.read_rt	=	spi_master_read_rt,
++		.write_rt	=	spi_master_write_rt,
++		.ioctl_rt	=	spi_master_ioctl_rt,
++		.ioctl_nrt	=	spi_master_ioctl_nrt,
++		.mmap		=	spi_master_mmap,
++	};
++	
++	rtdm_drv_set_sysclass(&master->driver, master->devclass);
++
++	INIT_LIST_HEAD(&master->slaves);
++	rtdm_lock_init(&master->lock);
++	rtdm_mutex_init(&master->bus_lock);
++
++	return 0;
++}
++
++static int spi_transfer_one_unimp(struct spi_master *master,
++				  struct spi_device *spi,
++				  struct spi_transfer *tfr)
++{
++	return -ENODEV;
++}
++
++int rtdm_spi_add_master(struct rtdm_spi_master *master)
++{
++	struct spi_master *kmaster = master->kmaster;
++
++	/*
++	 * Prevent the transfer handler to be called from the regular
++	 * SPI stack, just in case.
++	 */
++	kmaster->transfer_one = spi_transfer_one_unimp;
++	master->devclass = NULL;
++
++	/*
++	 * Add the core SPI driver, devices on the bus will be
++	 * enumerated, handed to spi_device_probe().
++	 */
++	return spi_register_master(kmaster);
++}
++EXPORT_SYMBOL_GPL(rtdm_spi_add_master);
++
++void rtdm_spi_remove_master(struct rtdm_spi_master *master)
++{
++	struct class *class = master->devclass;
++	char *classname = master->classname;
++	
++	rtdm_mutex_destroy(&master->bus_lock);
++	spi_unregister_master(master->kmaster);
++	rtdm_drv_set_sysclass(&master->driver, NULL);
++	class_destroy(class);
++	kfree(classname);
++}
++EXPORT_SYMBOL_GPL(rtdm_spi_remove_master);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/gpiopwm/gpiopwm.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpiopwm/gpiopwm.c	2021-04-07 16:01:27.970633105 +0800
+@@ -0,0 +1,298 @@
++/*
++ * Copyright (C) 2015 Jorge Ramirez <jro@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/slab.h>
++#include <linux/gpio.h>
++#include <linux/module.h>
++#include <rtdm/driver.h>
++#include <rtdm/gpiopwm.h>
++
++MODULE_AUTHOR("Jorge Ramirez <jro@xenomai.org>");
++MODULE_DESCRIPTION("PWM driver");
++MODULE_VERSION("0.0.1");
++MODULE_LICENSE("GPL");
++
++#define MAX_DUTY_CYCLE		100
++#define MAX_SAMPLES		(MAX_DUTY_CYCLE + 1)
++
++struct gpiopwm_base_signal {
++	unsigned long period;
++};
++
++struct gpiopwm_duty_signal {
++	unsigned int range_min;
++	unsigned int range_max;
++	unsigned long period;
++	unsigned int cycle;
++};
++
++struct gpiopwm_control {
++	struct gpiopwm_duty_signal duty;
++	unsigned int configured;
++	unsigned int update;
++};
++
++struct gpiopwm_priv {
++	struct gpiopwm_base_signal base;
++	struct gpiopwm_duty_signal duty;
++	struct gpiopwm_control ctrl;
++
++	rtdm_timer_t base_timer;
++	rtdm_timer_t duty_timer;
++
++	int gpio;
++};
++
++static inline int div100(long long dividend)
++{
++	const long long divisor = 0x28f5c29;
++	return ((divisor * dividend) >> 32) & 0xffffffff;
++}
++
++static inline unsigned long duty_period(struct gpiopwm_duty_signal *p)
++{
++	unsigned long period;
++
++	period = p->range_min + div100((p->range_max - p->range_min) * p->cycle);
++	return period * 1000;
++}
++
++static void gpiopwm_handle_base_timer(rtdm_timer_t *timer)
++{
++	struct gpiopwm_priv *ctx = container_of(timer, struct gpiopwm_priv,
++						base_timer);
++	gpio_set_value(ctx->gpio, 1);
++
++	/* one shot timer to avoid carrying over errors */
++	rtdm_timer_start_in_handler(&ctx->duty_timer, ctx->duty.period, 0,
++		RTDM_TIMERMODE_RELATIVE);
++
++	if (ctx->ctrl.update) {
++		ctx->duty.period = ctx->ctrl.duty.period;
++		ctx->duty.cycle = ctx->ctrl.duty.cycle;
++		ctx->ctrl.update = 0;
++	}
++}
++
++static void gpiopwm_handle_duty_timer(rtdm_timer_t *timer)
++{
++	struct gpiopwm_priv *ctx = container_of(timer, struct gpiopwm_priv,
++						duty_timer);
++	gpio_set_value(ctx->gpio, 0);
++}
++
++static inline int gpiopwm_config(struct rtdm_fd *fd, struct gpiopwm *conf)
++{
++	struct rtdm_dev_context *dev_ctx = rtdm_fd_to_context(fd);
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++	int ret;
++
++	if (ctx->ctrl.configured)
++		return -EINVAL;
++
++	if (conf->duty_cycle > MAX_DUTY_CYCLE)
++		return -EINVAL;
++
++	ret = gpio_request(conf->gpio, dev_ctx->device->name);
++	if (ret < 0) {
++		ctx->gpio = -1;
++		return ret;
++	}
++
++	ret = gpio_direction_output(conf->gpio, 0);
++	if (ret < 0)
++		return ret;
++
++	gpio_set_value(conf->gpio, 0);
++
++	ctx->duty.range_min = ctx->ctrl.duty.range_min = conf->range_min;
++	ctx->duty.range_max = ctx->ctrl.duty.range_max = conf->range_max;
++	ctx->duty.cycle = conf->duty_cycle;
++	ctx->base.period = conf->period;
++	ctx->gpio = conf->gpio;
++	ctx->duty.period = duty_period(&ctx->duty);
++
++	rtdm_timer_init(&ctx->base_timer, gpiopwm_handle_base_timer, "base_timer");
++	rtdm_timer_init(&ctx->duty_timer, gpiopwm_handle_duty_timer, "duty_timer");
++
++	ctx->ctrl.configured = 1;
++
++	return 0;
++}
++
++static inline int gpiopwm_change_duty_cycle(struct gpiopwm_priv *ctx, unsigned int cycle)
++{
++	if (cycle > MAX_DUTY_CYCLE)
++		return -EINVAL;
++
++	/* prepare the new data on the calling thread */
++	ctx->ctrl.duty.cycle = cycle;
++	ctx->ctrl.duty.period = duty_period(&ctx->ctrl.duty);
++
++	/* update data on the next base signal timeout */
++	ctx->ctrl.update = 1;
++
++	return 0;
++}
++
++static inline int gpiopwm_stop(struct rtdm_fd *fd)
++{
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++
++	if (!ctx->ctrl.configured)
++		return -EINVAL;
++
++	gpio_set_value(ctx->gpio, 0);
++
++	rtdm_timer_stop(&ctx->base_timer);
++	rtdm_timer_stop(&ctx->duty_timer);
++
++	return 0;
++}
++
++static inline int gpiopwm_start(struct rtdm_fd *fd)
++{
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++
++	if (!ctx->ctrl.configured)
++		return -EINVAL;
++
++	/* update duty cycle on next timeout */
++	ctx->ctrl.update = 1;
++
++	/* start the base signal tick */
++	rtdm_timer_start(&ctx->base_timer, ctx->base.period, ctx->base.period,
++			 RTDM_TIMERMODE_RELATIVE);
++
++	return 0;
++}
++
++static int gpiopwm_ioctl_rt(struct rtdm_fd *fd, unsigned int request, void __user *arg)
++{
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++
++	switch (request) {
++	case GPIOPWM_RTIOC_SET_CONFIG:
++		return -ENOSYS;
++	case GPIOPWM_RTIOC_CHANGE_DUTY_CYCLE:
++		return gpiopwm_change_duty_cycle(ctx, (unsigned long) arg);
++	case GPIOPWM_RTIOC_START:
++		return gpiopwm_start(fd);
++	case GPIOPWM_RTIOC_STOP:
++		return gpiopwm_stop(fd);
++	default:
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int gpiopwm_ioctl_nrt(struct rtdm_fd *fd, unsigned int request, void __user *arg)
++{
++	struct gpiopwm conf;
++
++	switch (request) {
++	case GPIOPWM_RTIOC_SET_CONFIG:
++		if (!rtdm_rw_user_ok(fd, arg, sizeof(conf)))
++			return -EFAULT;
++
++		rtdm_copy_from_user(fd, &conf, arg, sizeof(conf));
++		return gpiopwm_config(fd, &conf);
++	case GPIOPWM_RTIOC_GET_CONFIG:
++	default:
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int gpiopwm_open(struct rtdm_fd *fd, int oflags)
++{
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++
++	ctx->ctrl.configured = 0;
++	ctx->gpio = -1;
++
++	return 0;
++}
++
++static void gpiopwm_close(struct rtdm_fd *fd)
++{
++	struct gpiopwm_priv *ctx = rtdm_fd_to_private(fd);
++
++	if (ctx->gpio >= 0)
++		gpio_free(ctx->gpio);
++
++	if (!ctx->ctrl.configured)
++		return;
++
++	rtdm_timer_destroy(&ctx->base_timer);
++	rtdm_timer_destroy(&ctx->duty_timer);
++}
++
++static struct rtdm_driver gpiopwm_driver = {
++	.profile_info           = RTDM_PROFILE_INFO(gpiopwm,
++						    RTDM_CLASS_PWM,
++						    RTDM_SUBCLASS_GENERIC,
++						    RTPWM_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.device_count		= 8,
++	.context_size		= sizeof(struct gpiopwm_priv),
++	.ops = {
++		.open		= gpiopwm_open,
++		.close		= gpiopwm_close,
++		.ioctl_rt	= gpiopwm_ioctl_rt,
++		.ioctl_nrt	= gpiopwm_ioctl_nrt,
++	},
++};
++
++static struct rtdm_device device[8] = {
++	[0 ... 7] = {
++		.driver = &gpiopwm_driver,
++		.label = "gpiopwm%d",
++	}
++};
++
++static int __init __gpiopwm_init(void)
++{
++	int i, ret;
++
++	for (i = 0; i < ARRAY_SIZE(device); i++) {
++		ret = rtdm_dev_register(device + i);
++		if (ret)
++			goto fail;
++	}
++
++	return 0;
++fail:
++	while (i-- > 0)
++		rtdm_dev_unregister(device + i);
++
++	return ret;
++}
++
++static void __exit __gpiopwm_exit(void)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(device); i++)
++		rtdm_dev_unregister(device + i);
++}
++
++module_init(__gpiopwm_init);
++module_exit(__gpiopwm_exit);
+--- linux/drivers/xenomai/gpiopwm/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpiopwm/Makefile	2021-04-07 16:01:27.966633110 +0800
+@@ -0,0 +1,5 @@
++ccflags-y += -Ikernel -Iinclude/xenomai/
++
++obj-$(CONFIG_XENO_DRIVERS_GPIOPWM) += xeno_gpiopwm.o
++
++xeno_gpiopwm-y := gpiopwm.o
+--- linux/drivers/xenomai/gpiopwm/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpiopwm/Kconfig	2021-04-07 16:01:27.961633117 +0800
+@@ -0,0 +1,9 @@
++menu "GPIOPWM support"
++
++config XENO_DRIVERS_GPIOPWM
++	tristate "GPIOPWM driver"
++	help
++
++	An RTDM-based GPIO PWM generator driver
++
++endmenu
+--- linux/drivers/xenomai/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/Makefile	2021-04-07 16:01:27.956633124 +0800
+@@ -0,0 +1 @@
++obj-$(CONFIG_XENOMAI) += autotune/ serial/ testing/ can/ net/ analogy/ ipc/ udd/ gpio/ gpiopwm/ spi/
+--- linux/drivers/xenomai/analogy/testing/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/testing/Makefile	2021-04-07 16:01:27.952633130 +0800
+@@ -0,0 +1,8 @@
++
++ccflags-y += -Idrivers/xenomai/analogy
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_FAKE) += analogy_fake.o
++
++analogy_fake-y := fake.o
++
++analogy_loop-y := loop.o
+--- linux/drivers/xenomai/analogy/testing/loop.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/testing/loop.c	2021-04-07 16:01:27.947633137 +0800
+@@ -0,0 +1,285 @@
++#include <linux/module.h>
++#include <rtdm/analogy/device.h>
++
++#define LOOP_TASK_PERIOD 1000000
++#define LOOP_NB_BITS 16
++
++#define LOOP_INPUT_SUBD 0
++#define LOOP_OUTPUT_SUBD 1
++
++/* Channels descriptor */
++static struct a4l_channels_desc loop_chandesc = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 8,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, LOOP_NB_BITS},
++	},
++};
++
++/* Ranges tab */
++static struct a4l_rngtab loop_rngtab = {
++	.length =  2,
++	.rngs = {
++		RANGE_V(-5,5),
++		RANGE_V(-10,10),
++	},
++};
++/* Ranges descriptor */
++struct a4l_rngdesc loop_rngdesc = RNG_GLOBAL(loop_rngtab);
++
++/* Command options mask */
++static struct a4l_cmd_desc loop_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW | TRIG_INT,
++	.scan_begin_src = TRIG_TIMER,
++	.convert_src = TRIG_NOW | TRIG_TIMER,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_COUNT| TRIG_NONE,
++};
++
++/* Private data organization */
++struct loop_priv {
++
++	/* Task descriptor */
++	rtdm_task_t loop_task;
++
++	/* Misc fields */
++	int loop_running;
++	uint16_t loop_insn_value;
++};
++typedef struct loop_priv lpprv_t;
++
++/* Attach arguments contents */
++struct loop_attach_arg {
++	unsigned long period;
++};
++typedef struct loop_attach_arg lpattr_t;
++
++static void loop_task_proc(void *arg);
++
++/* --- Task part --- */
++
++/* Timer task routine  */
++static void loop_task_proc(void *arg)
++{
++	struct a4l_device *dev = (struct a4l_device*)arg;
++	struct a4l_subdevice *input_subd, *output_subd;
++	lpprv_t *priv = (lpprv_t *)dev->priv;
++
++	input_subd = a4l_get_subd(dev, LOOP_INPUT_SUBD);
++	output_subd = a4l_get_subd(dev, LOOP_OUTPUT_SUBD);
++
++	if (input_subd == NULL || output_subd == NULL) {
++		a4l_err(dev, "loop_task_proc: subdevices unavailable\n");
++		return;
++	}
++
++	while (1) {
++
++		int running;
++
++		running = priv->loop_running;
++
++		if (running) {
++			uint16_t value;
++			int ret=0;
++
++			while (ret==0) {
++
++				ret = a4l_buf_get(output_subd,
++						  &value, sizeof(uint16_t));
++				if (ret == 0) {
++
++					a4l_info(dev,
++						 "loop_task_proc: "
++						 "data available\n");
++
++					a4l_buf_evt(output_subd, 0);
++
++					ret = a4l_buf_put(input_subd,
++							  &value,
++							  sizeof(uint16_t));
++
++					if (ret==0)
++						a4l_buf_evt(input_subd, 0);
++				}
++			}
++		}
++
++		rtdm_task_sleep(LOOP_TASK_PERIOD);
++	}
++}
++
++/* --- Analogy Callbacks --- */
++
++/* Command callback */
++int loop_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	a4l_info(subd->dev, "loop_cmd: (subd=%d)\n", subd->idx);
++
++	return 0;
++
++}
++
++/* Trigger callback */
++int loop_trigger(struct a4l_subdevice *subd, lsampl_t trignum)
++{
++	lpprv_t *priv = (lpprv_t *)subd->dev->priv;
++
++	a4l_info(subd->dev, "loop_trigger: (subd=%d)\n", subd->idx);
++
++	priv->loop_running = 1;
++
++	return 0;
++}
++
++/* Cancel callback */
++void loop_cancel(struct a4l_subdevice *subd)
++{
++	lpprv_t *priv = (lpprv_t *)subd->dev->priv;
++
++	a4l_info(subd->dev, "loop_cancel: (subd=%d)\n", subd->idx);
++
++	priv->loop_running = 0;
++}
++
++/* Read instruction callback */
++int loop_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	lpprv_t *priv = (lpprv_t*)subd->dev->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	/* Checks the buffer size */
++	if (insn->data_size != sizeof(uint16_t))
++		return -EINVAL;
++
++	/* Sets the memorized value */
++	data[0] = priv->loop_insn_value;
++
++	return 0;
++}
++
++/* Write instruction callback */
++int loop_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	lpprv_t *priv = (lpprv_t*)subd->dev->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	/* Checks the buffer size */
++	if (insn->data_size != sizeof(uint16_t))
++		return -EINVAL;
++
++	/* Retrieves the value to memorize */
++	priv->loop_insn_value = data[0];
++
++	return 0;
++}
++
++void setup_input_subd(struct a4l_subdevice *subd)
++{
++	memset(subd, 0, sizeof(struct a4l_subdevice));
++
++	subd->flags |= A4L_SUBD_AI;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->flags |= A4L_SUBD_MMAP;
++	subd->rng_desc = &loop_rngdesc;
++	subd->chan_desc = &loop_chandesc;
++	subd->do_cmd = loop_cmd;
++	subd->cancel = loop_cancel;
++	subd->cmd_mask = &loop_cmd_mask;
++	subd->insn_read = loop_insn_read;
++	subd->insn_write = loop_insn_write;
++}
++
++void setup_output_subd(struct a4l_subdevice *subd)
++{
++	memset(subd, 0, sizeof(struct a4l_subdevice));
++
++	subd->flags = A4L_SUBD_AO;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->flags |= A4L_SUBD_MMAP;
++	subd->rng_desc = &loop_rngdesc;
++	subd->chan_desc = &loop_chandesc;
++	subd->do_cmd = loop_cmd;
++	subd->cancel = loop_cancel;
++	subd->trigger = loop_trigger;
++	subd->cmd_mask = &loop_cmd_mask;
++	subd->insn_read = loop_insn_read;
++	subd->insn_write = loop_insn_write;
++}
++
++/* Attach callback */
++int loop_attach(struct a4l_device *dev,
++		a4l_lnkdesc_t *arg)
++{
++	int ret = 0;
++	struct a4l_subdevice *subd;
++	lpprv_t *priv = (lpprv_t *)dev->priv;
++
++	/* Add the fake input subdevice */
++	subd = a4l_alloc_subd(0, setup_input_subd);
++	if (subd == NULL)
++		return -ENOMEM;
++
++	ret = a4l_add_subd(dev, subd);
++	if (ret != LOOP_INPUT_SUBD)
++		/* Let Analogy free the lately allocated subdevice */
++		return (ret < 0) ? ret : -EINVAL;
++
++	/* Add the fake output subdevice */
++	subd = a4l_alloc_subd(0, setup_output_subd);
++	if (subd == NULL)
++		/* Let Analogy free the lately allocated subdevice */
++		return -ENOMEM;
++
++	ret = a4l_add_subd(dev, subd);
++	if (ret != LOOP_OUTPUT_SUBD)
++		/* Let Analogy free the lately allocated subdevices */
++		return (ret < 0) ? ret : -EINVAL;
++
++	priv->loop_running = 0;
++	priv->loop_insn_value = 0;
++
++	ret = rtmd_task_init(&priv->loop_task,
++			    "a4l_loop task",
++			    loop_task_proc,
++			    dev, RTDM_TASK_HIGHEST_PRIORITY, 0);
++
++	return ret;
++}
++
++/* Detach callback */
++int loop_detach(struct a4l_device *dev)
++{
++	lpprv_t *priv = (lpprv_t *)dev->priv;
++
++	rtdm_task_destroy(&priv->loop_task);
++
++	return 0;
++}
++
++/* --- Module part --- */
++
++static struct a4l_driver loop_drv = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_loop",
++	.attach = loop_attach,
++	.detach = loop_detach,
++	.privdata_size = sizeof(lpprv_t),
++};
++
++static int __init a4l_loop_init(void)
++{
++	return a4l_register_drv(&loop_drv);
++}
++
++static void __exit a4l_loop_cleanup(void)
++{
++	a4l_unregister_drv(&loop_drv);
++}
++
++MODULE_DESCRIPTION("Analogy loop driver");
++MODULE_LICENSE("GPL");
++
++module_init(a4l_loop_init);
++module_exit(a4l_loop_cleanup);
+--- linux/drivers/xenomai/analogy/testing/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/testing/Kconfig	2021-04-07 16:01:27.942633145 +0800
+@@ -0,0 +1,13 @@
++
++config XENO_DRIVERS_ANALOGY_FAKE
++	depends on XENO_DRIVERS_ANALOGY
++	tristate "Fake driver"
++	default n
++	help
++
++	The fake driver displays many subdevices:
++	- 0: analog input;
++	- 1: digital input / output;
++	- 2: analog output;
++	- 3: analog input; data written into the subdevice 2 can be
++          read here.
+--- linux/drivers/xenomai/analogy/testing/fake.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/testing/fake.c	2021-04-07 16:01:27.938633150 +0800
+@@ -0,0 +1,686 @@
++#include <linux/module.h>
++#include <rtdm/analogy/device.h>
++
++#define TASK_PERIOD 1000000
++
++#define AI_SUBD 0
++#define DIO_SUBD 1
++#define AO_SUBD 2
++#define AI2_SUBD 3
++
++#define TRANSFER_SIZE 0x1000
++
++/* --- Driver related structures --- */
++struct fake_priv {
++	/* Attach configuration parameters
++	   (they should be relocated in ai_priv) */
++	unsigned long amplitude_div;
++	unsigned long quanta_cnt;
++
++	/* Task descriptor */
++	rtdm_task_t task;
++
++	/* Statuses of the asynchronous subdevices */
++	int ai_running;
++	int ao_running;
++	int ai2_running;
++};
++
++struct ai_priv {
++
++	/* Specific timing fields */
++	unsigned long scan_period_ns;
++	unsigned long convert_period_ns;
++	unsigned long current_ns;
++	unsigned long reminder_ns;
++	unsigned long long last_ns;
++
++	/* Misc fields */
++	unsigned long amplitude_div;
++	unsigned long quanta_cnt;
++};
++
++struct ao_ai2_priv {
++	/* Asynchronous loop stuff */
++	uint8_t buffer[TRANSFER_SIZE];
++	int count;
++	/* Synchronous loop stuff */
++	uint16_t insn_value;
++};
++
++struct dio_priv {
++	/* Bits status */
++	uint16_t bits_values;
++};
++
++/* --- Channels / ranges part --- */
++
++/* Channels descriptors */
++
++static struct a4l_channels_desc analog_chandesc = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 8,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 16},
++	},
++};
++
++static struct a4l_channels_desc dio_chandesc = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 16,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++/* Ranges tab */
++static struct a4l_rngtab analog_rngtab = {
++	.length = 2,
++	.rngs = {
++		RANGE_V(-5,5),
++		RANGE_V(-10,10),
++	},
++};
++/* Ranges descriptor */
++static struct a4l_rngdesc analog_rngdesc = RNG_GLOBAL(analog_rngtab);
++
++/* Command options masks */
++
++static struct a4l_cmd_desc ai_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW,
++	.scan_begin_src = TRIG_TIMER,
++	.convert_src = TRIG_NOW | TRIG_TIMER,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_COUNT | TRIG_NONE,
++};
++
++static struct a4l_cmd_desc ao_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW | TRIG_INT,
++	.scan_begin_src = TRIG_TIMER,
++	.convert_src = TRIG_NOW | TRIG_TIMER,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_COUNT | TRIG_NONE,
++};
++
++/* --- Analog input simulation --- */
++
++/* --- Values generation for 1st AI --- */
++
++static inline uint16_t ai_value_output(struct ai_priv *priv)
++{
++	static uint16_t output_tab[8] = {
++		0x0001, 0x2000, 0x4000, 0x6000,
++		0x8000, 0xa000, 0xc000, 0xffff
++	};
++	static unsigned int output_idx;
++	static DEFINE_RTDM_LOCK(output_lock);
++
++	unsigned long flags;
++	unsigned int idx;
++
++	rtdm_lock_get_irqsave(&output_lock, flags);
++
++	output_idx += priv->quanta_cnt;
++	if(output_idx == 8)
++		output_idx = 0;
++	idx = output_idx;
++
++	rtdm_lock_put_irqrestore(&output_lock, flags);
++
++	return output_tab[idx] / priv->amplitude_div;
++}
++
++int ai_push_values(struct a4l_subdevice *subd)
++{
++	uint64_t now_ns, elapsed_ns = 0;
++	struct a4l_cmd_desc *cmd;
++	struct ai_priv *priv;
++	int i = 0;
++
++	if (!subd)
++		return -EINVAL;
++
++	priv = (struct ai_priv *)subd->priv;
++
++	cmd = a4l_get_cmd(subd);
++	if (!cmd)
++		return -EPIPE;
++
++	now_ns = a4l_get_time();
++	elapsed_ns += now_ns - priv->last_ns + priv->reminder_ns;
++	priv->last_ns = now_ns;
++
++	while(elapsed_ns >= priv->scan_period_ns) {
++		int j;
++
++		for(j = 0; j < cmd->nb_chan; j++) {
++			uint16_t value = ai_value_output(priv);
++			a4l_buf_put(subd, &value, sizeof(uint16_t));
++		}
++
++		elapsed_ns -= priv->scan_period_ns;
++		i++;
++	}
++
++	priv->current_ns += i * priv->scan_period_ns;
++	priv->reminder_ns = elapsed_ns;
++
++	if (i != 0)
++		a4l_buf_evt(subd, 0);
++
++	return 0;
++}
++
++/* --- Data retrieval for AO --- */
++
++int ao_pull_values(struct a4l_subdevice *subd)
++{
++	struct ao_ai2_priv *priv = (struct ao_ai2_priv *)subd->priv;
++	int err;
++
++	/* Let's have a look at how many samples are available */
++	priv->count = a4l_buf_count(subd) < TRANSFER_SIZE ?
++		      a4l_buf_count(subd) : TRANSFER_SIZE;
++
++	if (!priv->count)
++		return 0;
++
++	err = a4l_buf_get(subd, priv->buffer, priv->count);
++	if (err < 0) {
++		a4l_err(subd->dev, "ao_get_values: a4l_buf_get failed (err=%d)\n", err);
++		priv->count = 0;
++		return err;
++
++	}
++
++	a4l_info(subd->dev, " %d bytes added to private buffer from async p=%p\n",
++		priv->count, subd->buf->buf);
++
++	a4l_buf_evt(subd, 0);
++
++	return 0;
++}
++
++/* --- Data redirection for 2nd AI (from AO) --- */
++
++int ai2_push_values(struct a4l_subdevice *subd)
++{
++	struct ao_ai2_priv *priv = *((struct ao_ai2_priv **)subd->priv);
++	int err = 0;
++
++	if (priv->count) {
++		err = a4l_buf_put(subd, priv->buffer, priv->count);
++
++		/* If there is no more place in the asynchronous
++		buffer, data are likely to be dropped; it is just a
++		test driver so no need to implement trickier mechanism */
++		err = (err == -EAGAIN) ? 0 : err;
++
++		a4l_info(subd->dev, "%d bytes added to async buffer p=%p\n",
++			priv->count, subd->buf->buf);
++
++		priv->count = 0;
++		if (err < 0)
++			a4l_err(subd->dev,
++				"ai2_push_values: "
++				"a4l_buf_put failed (err=%d)\n", err);
++		else
++			a4l_buf_evt(subd, 0);
++	}
++
++	return err;
++}
++
++/* --- Asynchronous AI functions --- */
++
++static int ai_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++	struct ai_priv *ai_priv = (struct ai_priv *)subd->priv;
++
++	ai_priv->scan_period_ns = cmd->scan_begin_arg;
++	ai_priv->convert_period_ns = (cmd->convert_src==TRIG_TIMER)?
++		cmd->convert_arg:0;
++
++	a4l_dbg(1, drv_dbg, subd->dev, "scan_period=%luns convert_period=%luns\n",
++		ai_priv->scan_period_ns, ai_priv->convert_period_ns);
++
++	ai_priv->last_ns = a4l_get_time();
++
++	ai_priv->current_ns = ((unsigned long)ai_priv->last_ns);
++	ai_priv->reminder_ns = 0;
++
++	priv->ai_running = 1;
++
++	return 0;
++
++}
++
++static int ai_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	if(cmd->scan_begin_src == TRIG_TIMER)
++	{
++		if (cmd->scan_begin_arg < 1000)
++			return -EINVAL;
++
++		if (cmd->convert_src == TRIG_TIMER &&
++		    cmd->scan_begin_arg < (cmd->convert_arg * cmd->nb_chan))
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++static void ai_cancel(struct a4l_subdevice *subd)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++
++	priv->ai_running = 0;
++}
++
++static void ai_munge(struct a4l_subdevice *subd, void *buf, unsigned long size)
++{
++	int i;
++
++	for(i = 0; i < size / sizeof(uint16_t); i++)
++		((uint16_t *)buf)[i] += 1;
++}
++
++/* --- Asynchronous A0 functions --- */
++
++int ao_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	a4l_info(subd->dev, "(subd=%d)\n", subd->idx);
++	return 0;
++}
++
++int ao_trigger(struct a4l_subdevice *subd, lsampl_t trignum)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++
++	a4l_info(subd->dev, "(subd=%d)\n", subd->idx);
++	priv->ao_running = 1;
++	return 0;
++}
++
++void ao_cancel(struct a4l_subdevice *subd)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++	struct ao_ai2_priv *ao_priv = (struct ao_ai2_priv *)subd->priv;
++	int running;
++
++	a4l_info(subd->dev, "(subd=%d)\n", subd->idx);
++	priv->ao_running = 0;
++
++	running = priv->ai2_running;
++	if (running) {
++		struct a4l_subdevice *ai2_subd =
++			(struct a4l_subdevice *)a4l_get_subd(subd->dev, AI2_SUBD);
++		/* Here, we have not saved the required amount of
++		   data; so, we cannot know whether or not, it is the
++		   end of the acquisition; that is why we force it */
++		priv->ai2_running = 0;
++		ao_priv->count = 0;
++
++		a4l_info(subd->dev, "subd %d cancelling subd %d too \n",
++			subd->idx, AI2_SUBD);
++
++		a4l_buf_evt(ai2_subd, A4L_BUF_EOA);
++	}
++}
++
++/* --- Asynchronous 2nd AI functions --- */
++
++int ai2_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++
++	a4l_info(subd->dev, "(subd=%d)\n", subd->idx);
++	priv->ai2_running = 1;
++	return 0;
++}
++
++void ai2_cancel(struct a4l_subdevice *subd)
++{
++	struct fake_priv *priv = (struct fake_priv *)subd->dev->priv;
++	struct ao_ai2_priv *ai2_priv = *((struct ao_ai2_priv **)subd->priv);
++
++	int running;
++
++	a4l_info(subd->dev, "(subd=%d)\n", subd->idx);
++	priv->ai2_running = 0;
++
++	running = priv->ao_running;
++	if (running) {
++		struct a4l_subdevice *ao_subd =
++			(struct a4l_subdevice *)a4l_get_subd(subd->dev, AO_SUBD);
++		/* Here, we have not saved the required amount of
++		   data; so, we cannot know whether or not, it is the
++		   end of the acquisition; that is why we force it */
++		priv->ao_running = 0;
++		ai2_priv->count = 0;
++
++		a4l_info(subd->dev, "subd %d cancelling subd %d too \n",
++			 subd->idx, AO_SUBD);
++
++		a4l_buf_evt(ao_subd, A4L_BUF_EOA);
++	}
++
++}
++
++
++/* --- Synchronous AI functions --- */
++
++static int ai_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ai_priv *priv = (struct ai_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++	int i;
++
++	for(i = 0; i < insn->data_size / sizeof(uint16_t); i++)
++		data[i] = ai_value_output(priv);
++
++	return 0;
++}
++
++/* --- Synchronous DIO function --- */
++
++static int dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct dio_priv *priv = (struct dio_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	if (insn->data_size != 2 * sizeof(uint16_t))
++		return -EINVAL;
++
++	if (data[0] != 0) {
++		priv->bits_values &= ~(data[0]);
++		priv->bits_values |= (data[0] & data[1]);
++	}
++
++	data[1] = priv->bits_values;
++
++	return 0;
++}
++
++/* --- Synchronous AO + AI2 functions --- */
++
++int ao_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ao_ai2_priv *priv = (struct ao_ai2_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	/* Checks the buffer size */
++	if (insn->data_size != sizeof(uint16_t))
++		return -EINVAL;
++
++	/* Retrieves the value to memorize */
++	priv->insn_value = data[0];
++
++	return 0;
++}
++
++int ai2_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ao_ai2_priv *priv = *((struct ao_ai2_priv **)subd->priv);
++	uint16_t *data = (uint16_t *)insn->data;
++
++	/* Checks the buffer size */
++	if (insn->data_size != sizeof(uint16_t))
++		return -EINVAL;
++
++	/* Sets the memorized value */
++	data[0] = priv->insn_value;
++
++	return 0;
++}
++
++/* --- Global task part --- */
++
++/* One task is enough for all the asynchronous subdevices, it is just a fake
++ * driver after all
++ */
++static void task_proc(void *arg)
++{
++	struct a4l_subdevice *ai_subd, *ao_subd, *ai2_subd;
++	struct a4l_device *dev;
++	struct fake_priv *priv;
++	int running;
++
++	dev = arg;
++	ai_subd = a4l_get_subd(dev, AI_SUBD);
++	ao_subd = a4l_get_subd(dev, AO_SUBD);
++	ai2_subd = a4l_get_subd(dev, AI2_SUBD);
++
++	priv = dev->priv;
++
++	while(!rtdm_task_should_stop()) {
++
++		/* copy sample static data from the subd private buffer to the
++		 * asynchronous buffer
++		 */
++		running = priv->ai_running;
++		if (running && ai_push_values(ai_subd) < 0) {
++			/* on error, wait for detach to destroy the task */
++			rtdm_task_sleep(RTDM_TIMEOUT_INFINITE);
++			continue;
++		}
++
++		/*
++		 * pull the data from the output subdevice (asynchronous buffer)
++		 * into its private buffer
++		 */
++		running = priv->ao_running;
++		if (running && ao_pull_values(ao_subd) < 0) {
++			rtdm_task_sleep(RTDM_TIMEOUT_INFINITE);
++			continue;
++		}
++
++		running = priv->ai2_running;
++		/*
++		 * then loop it to the ai2 subd since their private data is shared: so
++		 * pull the data from the private buffer back into the device's
++		 * asynchronous buffer
++		 */
++		if (running && ai2_push_values(ai2_subd) < 0) {
++			rtdm_task_sleep(RTDM_TIMEOUT_INFINITE);
++			continue;
++		}
++
++		rtdm_task_sleep(TASK_PERIOD);
++	}
++}
++
++/* --- Initialization functions --- */
++
++void setup_ai_subd(struct a4l_subdevice *subd)
++{
++	/* Fill the subdevice structure */
++	subd->flags |= A4L_SUBD_AI;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->flags |= A4L_SUBD_MMAP;
++	subd->rng_desc = &analog_rngdesc;
++	subd->chan_desc = &analog_chandesc;
++	subd->do_cmd = ai_cmd;
++	subd->do_cmdtest = ai_cmdtest;
++	subd->cancel = ai_cancel;
++	subd->munge = ai_munge;
++	subd->cmd_mask = &ai_cmd_mask;
++	subd->insn_read = ai_insn_read;
++}
++
++void setup_dio_subd(struct a4l_subdevice *subd)
++{
++	/* Fill the subdevice structure */
++	subd->flags |= A4L_SUBD_DIO;
++	subd->chan_desc = &dio_chandesc;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = dio_insn_bits;
++}
++
++void setup_ao_subd(struct a4l_subdevice *subd)
++{
++	/* Fill the subdevice structure */
++	subd->flags |= A4L_SUBD_AO;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->flags |= A4L_SUBD_MMAP;
++	subd->rng_desc = &analog_rngdesc;
++	subd->chan_desc = &analog_chandesc;
++	subd->do_cmd = ao_cmd;
++	subd->cancel = ao_cancel;
++	subd->trigger = ao_trigger;
++	subd->cmd_mask = &ao_cmd_mask;
++	subd->insn_write = ao_insn_write;
++}
++
++void setup_ai2_subd(struct a4l_subdevice *subd)
++{
++	/* Fill the subdevice structure */
++	subd->flags |= A4L_SUBD_AI;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->flags |= A4L_SUBD_MMAP;
++	subd->rng_desc = &analog_rngdesc;
++	subd->chan_desc = &analog_chandesc;
++	subd->do_cmd = ai2_cmd;
++	subd->cancel = ai2_cancel;
++	subd->cmd_mask = &ai_cmd_mask;
++	subd->insn_read = ai2_insn_read;
++}
++
++/* --- Attach / detach functions ---  */
++
++int test_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	typedef void (*setup_subd_function) (struct a4l_subdevice *subd);
++	struct fake_priv *priv = (struct fake_priv *) dev->priv;
++	struct a4l_subdevice *subd;
++	unsigned long tmp;
++	struct ai_priv *r;
++	int i, ret = 0;
++
++	struct initializers {
++		struct a4l_subdevice *subd;
++		setup_subd_function init;
++		int private_len;
++		char *name;
++		int index;
++	} sds[] = {
++		[AI_SUBD] = {
++			.name = "AI",
++			.private_len = sizeof(struct ai_priv),
++			.init = setup_ai_subd,
++			.index = AI_SUBD,
++			.subd = NULL,
++		},
++		[DIO_SUBD] = {
++			.name = "DIO",
++			.private_len = sizeof(struct dio_priv),
++			.init = setup_dio_subd,
++			.index = DIO_SUBD,
++			.subd = NULL,
++		},
++		[AO_SUBD] = {
++			.name = "AO",
++			.private_len = sizeof(struct ao_ai2_priv),
++			.init = setup_ao_subd,
++			.index = AO_SUBD,
++			.subd = NULL,
++		},
++		[AI2_SUBD] = {
++			.name = "AI2",
++			.private_len = sizeof(struct ao_ai2_priv *),
++			.init = setup_ai2_subd,
++			.index = AI2_SUBD,
++			.subd = NULL,
++		},
++	};
++
++	a4l_dbg(1, drv_dbg, dev, "starting attach procedure...\n");
++
++	/* Set default values for attach parameters */
++	priv->amplitude_div = 1;
++	priv->quanta_cnt = 1;
++	if (arg->opts_size) {
++		unsigned long *args = (unsigned long *)arg->opts;
++		priv->amplitude_div = args[0];
++		if (arg->opts_size == 2 * sizeof(unsigned long))
++			priv->quanta_cnt = (args[1] > 7 || args[1] == 0) ?
++				1 : args[1];
++	}
++
++	/* create and register the subdevices */
++	for (i = 0; i < ARRAY_SIZE(sds) ; i++) {
++
++		subd = a4l_alloc_subd(sds[i].private_len, sds[i].init);
++		if (subd == NULL)
++			return -ENOMEM;
++
++		ret = a4l_add_subd(dev, subd);
++		if (ret != sds[i].index)
++			return (ret < 0) ? ret : -EINVAL;
++
++		sds[i].subd = subd;
++
++		a4l_dbg(1, drv_dbg, dev, " %s subdev registered \n", sds[i].name);
++	}
++
++	/* initialize specifics */
++	r = (void *) sds[AI_SUBD].subd->priv;
++	r->amplitude_div = priv->amplitude_div;
++	r->quanta_cnt = priv->quanta_cnt;
++
++	/* A0 and AI2 shared their private buffers */
++	tmp = (unsigned long) sds[AO_SUBD].subd->priv;
++	memcpy(sds[AI2_SUBD].subd->priv, &tmp, sds[AI2_SUBD].private_len) ;
++
++	/* create the task */
++	ret = rtdm_task_init(&priv->task, "Fake AI task", task_proc, dev,
++		             RTDM_TASK_HIGHEST_PRIORITY, 0);
++	if (ret)
++		a4l_dbg(1, drv_dbg, dev, "Error creating A4L task \n");
++
++	a4l_dbg(1, drv_dbg, dev, "attach procedure completed: "
++				 "adiv = %lu, qcount = %lu \n"
++		                  , priv->amplitude_div, priv->quanta_cnt);
++
++	return ret;
++}
++
++int test_detach(struct a4l_device *dev)
++{
++	struct fake_priv *priv = (struct fake_priv *)dev->priv;
++
++	rtdm_task_destroy(&priv->task);
++	a4l_dbg(1, drv_dbg, dev, "detach procedure complete\n");
++
++	return 0;
++}
++
++/* --- Module stuff --- */
++
++static struct a4l_driver test_drv = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_fake",
++	.driver_name = "fake",
++	.attach = test_attach,
++	.detach = test_detach,
++	.privdata_size = sizeof(struct fake_priv),
++};
++
++static int __init a4l_fake_init(void)
++{
++	return a4l_register_drv(&test_drv);
++}
++
++static void __exit a4l_fake_cleanup(void)
++{
++	a4l_unregister_drv(&test_drv);
++}
++
++MODULE_DESCRIPTION("Analogy fake driver");
++MODULE_LICENSE("GPL");
++
++module_init(a4l_fake_init);
++module_exit(a4l_fake_cleanup);
+--- linux/drivers/xenomai/analogy/proc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/proc.h	2021-04-07 16:01:27.933633157 +0800
+@@ -0,0 +1,33 @@
++/*
++ * Analogy for Linux, procfs related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __ANALOGY_PROC_H__
++#define __ANALOGY_PROC_H__
++
++#ifdef __KERNEL__
++
++#ifdef CONFIG_PROC_FS
++extern struct proc_dir_entry *a4l_proc_root;
++#endif /* CONFIG_PROC_FS */
++
++#endif /* __KERNEL__ */
++
++#endif /* __ANALOGY_PROC_H__ */
+--- linux/drivers/xenomai/analogy/transfer.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/transfer.c	2021-04-07 16:01:27.928633164 +0800
+@@ -0,0 +1,259 @@
++/*
++ * Analogy for Linux, transfer related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <asm/errno.h>
++#include <rtdm/analogy/device.h>
++
++#include "proc.h"
++
++/* --- Initialization / cleanup / cancel functions --- */
++
++int a4l_precleanup_transfer(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev;
++	struct a4l_transfer *tsf;
++	int i, err = 0;
++
++	dev = a4l_get_dev(cxt);
++	tsf = &dev->transfer;
++
++	if (tsf == NULL) {
++		__a4l_err("a4l_precleanup_transfer: "
++			  "incoherent status, transfer block not reachable\n");
++		return -ENODEV;
++	}
++
++	for (i = 0; i < tsf->nb_subd; i++) {
++		unsigned long *status = &tsf->subds[i]->status;
++
++		__a4l_dbg(1, core_dbg, "subd[%d]->status=0x%08lx\n", i, *status);
++
++		if (test_and_set_bit(A4L_SUBD_BUSY, status)) {
++			__a4l_err("a4l_precleanup_transfer: "
++				  "device busy, acquisition occuring\n");
++			err = -EBUSY;
++			goto out_error;
++		} else
++			set_bit(A4L_SUBD_CLEAN, status);
++	}
++
++	return 0;
++
++out_error:
++	for (i = 0; i < tsf->nb_subd; i++) {
++		unsigned long *status = &tsf->subds[i]->status;
++
++		if (test_bit(A4L_SUBD_CLEAN, status)){
++			clear_bit(A4L_SUBD_BUSY, status);
++			clear_bit(A4L_SUBD_CLEAN, status);
++		}
++	}
++
++	return err;
++}
++
++int a4l_cleanup_transfer(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev;
++	struct a4l_transfer *tsf;
++
++	dev = a4l_get_dev(cxt);
++	tsf = &dev->transfer;
++
++	/* Releases the pointers tab, if need be */
++	if (tsf->subds != NULL) {
++		rtdm_free(tsf->subds);
++	}
++
++	memset(tsf, 0, sizeof(struct a4l_transfer));
++
++	return 0;
++}
++
++void a4l_presetup_transfer(struct a4l_device_context *cxt)
++{
++	struct a4l_device *dev = NULL;
++	struct a4l_transfer *tsf;
++
++	dev = a4l_get_dev(cxt);
++	tsf = &dev->transfer;
++
++	/* Clear the structure */
++	memset(tsf, 0, sizeof(struct a4l_transfer));
++
++	tsf->default_bufsize = A4L_BUF_DEFSIZE;
++
++	/* 0 is also considered as a valid IRQ, then
++	   the IRQ number must be initialized with another value */
++	tsf->irq_desc.irq = A4L_IRQ_UNUSED;
++}
++
++int a4l_setup_transfer(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev = NULL;
++	struct a4l_transfer *tsf;
++	struct list_head *this;
++	int i = 0, ret = 0;
++
++	dev = a4l_get_dev(cxt);
++	tsf = &dev->transfer;
++
++	/* Recovers the subdevices count
++	   (as they are registered in a linked list */
++	list_for_each(this, &dev->subdvsq) {
++		tsf->nb_subd++;
++	}
++
++	__a4l_dbg(1, core_dbg, "nb_subd=%d\n", tsf->nb_subd);
++
++	/* Allocates a suitable tab for the subdevices */
++	tsf->subds = rtdm_malloc(tsf->nb_subd * sizeof(struct a4l_subdevice *));
++	if (tsf->subds == NULL) {
++		__a4l_err("a4l_setup_transfer: call1(alloc) failed \n");
++		ret = -ENOMEM;
++		goto out_setup_tsf;
++	}
++
++	/* Recovers the subdevices pointers */
++	list_for_each(this, &dev->subdvsq) {
++		tsf->subds[i++] = list_entry(this, struct a4l_subdevice, list);
++	}
++
++out_setup_tsf:
++
++	if (ret != 0)
++		a4l_cleanup_transfer(cxt);
++
++	return ret;
++}
++
++/* --- IRQ handling section --- */
++
++int a4l_request_irq(struct a4l_device * dev,
++		    unsigned int irq,
++		    a4l_irq_hdlr_t handler,
++		    unsigned long flags, void *cookie)
++{
++	int ret;
++
++	if (dev->transfer.irq_desc.irq != A4L_IRQ_UNUSED)
++		return -EBUSY;
++
++	ret = __a4l_request_irq(&dev->transfer.irq_desc, irq, handler, flags,
++		cookie);
++	if (ret != 0) {
++		__a4l_err("a4l_request_irq: IRQ registration failed\n");
++		dev->transfer.irq_desc.irq = A4L_IRQ_UNUSED;
++	}
++
++	return ret;
++}
++
++int a4l_free_irq(struct a4l_device * dev, unsigned int irq)
++{
++
++	int ret = 0;
++
++	if (dev->transfer.irq_desc.irq != irq)
++		return -EINVAL;
++
++	/* There is less need to use a spinlock
++	   than for a4l_request_irq() */
++	ret = __a4l_free_irq(&dev->transfer.irq_desc);
++
++	if (ret == 0)
++		dev->transfer.irq_desc.irq = A4L_IRQ_UNUSED;
++
++	return ret;
++}
++
++unsigned int a4l_get_irq(struct a4l_device * dev)
++{
++	return dev->transfer.irq_desc.irq;
++}
++
++/* --- Proc section --- */
++
++#ifdef CONFIG_PROC_FS
++
++int a4l_rdproc_transfer(struct seq_file *seq, void *v)
++{
++	struct a4l_transfer *transfer = (struct a4l_transfer *) seq->private;
++	int i;
++
++	if (v != SEQ_START_TOKEN)
++		return -EINVAL;
++
++	seq_printf(seq, "--  Subdevices --\n\n");
++	seq_printf(seq, "| idx | type\n");
++
++	/* Gives the subdevice type's name */
++	for (i = 0; i < transfer->nb_subd; i++) {
++		char *type;
++		switch (transfer->subds[i]->flags & A4L_SUBD_TYPES) {
++		case A4L_SUBD_UNUSED:
++			type = "Unused subdevice";
++			break;
++		case A4L_SUBD_AI:
++			type = "Analog input subdevice";
++			break;
++		case A4L_SUBD_AO:
++			type = "Analog output subdevice";
++			break;
++		case A4L_SUBD_DI:
++			type = "Digital input subdevice";
++			break;
++		case A4L_SUBD_DO:
++			type = "Digital output subdevice";
++			break;
++		case A4L_SUBD_DIO:
++			type = "Digital input/output subdevice";
++			break;
++		case A4L_SUBD_COUNTER:
++			type = "Counter subdevice";
++			break;
++		case A4L_SUBD_TIMER:
++			type = "Timer subdevice";
++			break;
++		case A4L_SUBD_MEMORY:
++			type = "Memory subdevice";
++			break;
++		case A4L_SUBD_CALIB:
++			type = "Calibration subdevice";
++			break;
++		case A4L_SUBD_PROC:
++			type = "Processor subdevice";
++			break;
++		case A4L_SUBD_SERIAL:
++			type = "Serial subdevice";
++			break;
++		default:
++			type = "Unknown subdevice";
++		}
++
++		seq_printf(seq, "|  %02d | %s\n", i, type);
++	}
++
++	return 0;
++}
++
++#endif /* CONFIG_PROC_FS */
+--- linux/drivers/xenomai/analogy/buffer.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/buffer.c	2021-04-07 16:01:27.923633172 +0800
+@@ -0,0 +1,1145 @@
++/*
++ * Analogy for Linux, buffer related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/mman.h>
++#include <linux/vmalloc.h>
++#include <asm/errno.h>
++#include <asm/io.h>
++#include <rtdm/analogy/device.h>
++
++/* --- Initialization functions (init, alloc, free) --- */
++
++/* The buffer charactistic is very close to the Comedi one: it is
++   allocated with vmalloc() and all physical addresses of the pages which
++   compose the virtual buffer are hold in a table */
++
++void a4l_free_buffer(struct a4l_buffer * buf_desc)
++{
++	__a4l_dbg(1, core_dbg, "buf=%p buf->buf=%p\n", buf_desc, buf_desc->buf);
++
++	if (buf_desc->pg_list != NULL) {
++		rtdm_free(buf_desc->pg_list);
++		buf_desc->pg_list = NULL;
++	}
++
++	if (buf_desc->buf != NULL) {
++		char *vaddr, *vabase = buf_desc->buf;
++		for (vaddr = vabase; vaddr < vabase + buf_desc->size;
++		     vaddr += PAGE_SIZE)
++			ClearPageReserved(vmalloc_to_page(vaddr));
++		vfree(buf_desc->buf);
++		buf_desc->buf = NULL;
++	}
++}
++
++int a4l_alloc_buffer(struct a4l_buffer *buf_desc, int buf_size)
++{
++	int ret = 0;
++	char *vaddr, *vabase;
++
++	buf_desc->size = buf_size;
++	buf_desc->size = PAGE_ALIGN(buf_desc->size);
++
++	buf_desc->buf = vmalloc_32(buf_desc->size);
++	if (buf_desc->buf == NULL) {
++		ret = -ENOMEM;
++		goto out_virt_contig_alloc;
++	}
++
++	vabase = buf_desc->buf;
++
++	for (vaddr = vabase; vaddr < vabase + buf_desc->size;
++	     vaddr += PAGE_SIZE)
++		SetPageReserved(vmalloc_to_page(vaddr));
++
++	buf_desc->pg_list = rtdm_malloc(((buf_desc->size) >> PAGE_SHIFT) *
++					sizeof(unsigned long));
++	if (buf_desc->pg_list == NULL) {
++		ret = -ENOMEM;
++		goto out_virt_contig_alloc;
++	}
++
++	for (vaddr = vabase; vaddr < vabase + buf_desc->size;
++	     vaddr += PAGE_SIZE)
++		buf_desc->pg_list[(vaddr - vabase) >> PAGE_SHIFT] =
++			(unsigned long) page_to_phys(vmalloc_to_page(vaddr));
++
++	__a4l_dbg(1, core_dbg, "buf=%p buf->buf=%p\n", buf_desc, buf_desc->buf);
++
++out_virt_contig_alloc:
++	if (ret != 0)
++		a4l_free_buffer(buf_desc);
++
++	return ret;
++}
++
++static void a4l_reinit_buffer(struct a4l_buffer *buf_desc)
++{
++	/* No command to process yet */
++	buf_desc->cur_cmd = NULL;
++
++	/* No more (or not yet) linked with a subdevice */
++	buf_desc->subd = NULL;
++
++	/* Initializes counts and flags */
++	buf_desc->end_count = 0;
++	buf_desc->prd_count = 0;
++	buf_desc->cns_count = 0;
++	buf_desc->tmp_count = 0;
++	buf_desc->mng_count = 0;
++
++	/* Flush pending events */
++	buf_desc->flags = 0;
++	a4l_flush_sync(&buf_desc->sync);
++}
++
++void a4l_init_buffer(struct a4l_buffer *buf_desc)
++{
++	memset(buf_desc, 0, sizeof(struct a4l_buffer));
++	a4l_init_sync(&buf_desc->sync);
++	a4l_reinit_buffer(buf_desc);
++}
++
++void a4l_cleanup_buffer(struct a4l_buffer *buf_desc)
++{
++	a4l_cleanup_sync(&buf_desc->sync);
++}
++
++int a4l_setup_buffer(struct a4l_device_context *cxt, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_buffer *buf_desc = cxt->buffer;
++	int i;
++
++	/* Retrieve the related subdevice */
++	buf_desc->subd = a4l_get_subd(cxt->dev, cmd->idx_subd);
++	if (buf_desc->subd == NULL) {
++		__a4l_err("a4l_setup_buffer: subdevice index "
++			  "out of range (%d)\n", cmd->idx_subd);
++		return -EINVAL;
++	}
++
++	if (test_and_set_bit(A4L_SUBD_BUSY_NR, &buf_desc->subd->status)) {
++		__a4l_err("a4l_setup_buffer: subdevice %d already busy\n",
++			  cmd->idx_subd);
++		return -EBUSY;
++	}
++
++	/* Checks if the transfer system has to work in bulk mode */
++	if (cmd->flags & A4L_CMD_BULK)
++		set_bit(A4L_BUF_BULK_NR, &buf_desc->flags);
++
++	/* Sets the working command */
++	buf_desc->cur_cmd = cmd;
++
++	/* Link the subdevice with the context's buffer */
++	buf_desc->subd->buf = buf_desc;
++
++	/* Computes the count to reach, if need be */
++	if (cmd->stop_src == TRIG_COUNT) {
++		for (i = 0; i < cmd->nb_chan; i++) {
++			struct a4l_channel *chft;
++			chft = a4l_get_chfeat(buf_desc->subd,
++					      CR_CHAN(cmd->chan_descs[i]));
++			buf_desc->end_count += chft->nb_bits / 8;
++		}
++		buf_desc->end_count *= cmd->stop_arg;
++	}
++
++	__a4l_dbg(1, core_dbg, "end_count=%lu\n", buf_desc->end_count);
++
++	return 0;
++}
++
++void a4l_cancel_buffer(struct a4l_device_context *cxt)
++{
++	struct a4l_buffer *buf_desc = cxt->buffer;
++	struct a4l_subdevice *subd = buf_desc->subd;
++
++	if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return;
++
++	/* If a "cancel" function is registered, call it
++	   (Note: this function is called before having checked
++	   if a command is under progress; we consider that
++	   the "cancel" function can be used as as to (re)initialize
++	   some component) */
++	if (subd->cancel != NULL)
++		subd->cancel(subd);
++
++	if (buf_desc->cur_cmd != NULL) {
++		a4l_free_cmddesc(buf_desc->cur_cmd);
++		rtdm_free(buf_desc->cur_cmd);
++		buf_desc->cur_cmd = NULL;
++	}
++
++	a4l_reinit_buffer(buf_desc);
++
++	clear_bit(A4L_SUBD_BUSY_NR, &subd->status);
++	subd->buf = NULL;
++}
++
++/* --- Munge related function --- */
++
++int a4l_get_chan(struct a4l_subdevice *subd)
++{
++	int i, j, tmp_count, tmp_size = 0;
++	struct a4l_cmd_desc *cmd;
++
++	cmd = a4l_get_cmd(subd);
++	if (!cmd)
++		return -EINVAL;
++
++	/* There is no need to check the channel idx,
++	   it has already been controlled in command_test */
++
++	/* We assume channels can have different sizes;
++	   so, we have to compute the global size of the channels
++	   in this command... */
++	for (i = 0; i < cmd->nb_chan; i++) {
++		j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ?
++			CR_CHAN(cmd->chan_descs[i]) : 0;
++		tmp_size += subd->chan_desc->chans[j].nb_bits;
++	}
++
++	/* Translation bits -> bytes */
++	tmp_size /= 8;
++
++	tmp_count = subd->buf->mng_count % tmp_size;
++
++	/* Translation bytes -> bits */
++	tmp_count *= 8;
++
++	/* ...and find the channel the last munged sample
++	   was related with */
++	for (i = 0; tmp_count > 0 && i < cmd->nb_chan; i++) {
++		j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ?
++			CR_CHAN(cmd->chan_descs[i]) : 0;
++		tmp_count -= subd->chan_desc->chans[j].nb_bits;
++	}
++
++	if (tmp_count == 0)
++		return i;
++	else
++		return -EINVAL;
++}
++
++/* --- Transfer / copy functions --- */
++
++/* The following functions are explained in the Doxygen section
++   "Buffer management services" in driver_facilities.c */
++
++int a4l_buf_prepare_absput(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_input(subd))
++		return -EINVAL;
++
++	return __pre_abs_put(buf, count);
++}
++
++
++int a4l_buf_commit_absput(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_input(subd))
++		return -EINVAL;
++
++	return __abs_put(buf, count);
++}
++
++int a4l_buf_prepare_put(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_input(subd))
++		return -EINVAL;
++
++	return __pre_put(buf, count);
++}
++
++int a4l_buf_commit_put(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_input(subd))
++		return -EINVAL;
++
++	return __put(buf, count);
++}
++
++int a4l_buf_put(struct a4l_subdevice *subd, void *bufdata, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++	int err;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_input(subd))
++		return -EINVAL;
++
++	if (__count_to_put(buf) < count)
++		return -EAGAIN;
++
++	err = __produce(NULL, buf, bufdata, count);
++	if (err < 0)
++		return err;
++
++	err = __put(buf, count);
++
++	return err;
++}
++
++int a4l_buf_prepare_absget(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_output(subd))
++		return -EINVAL;
++
++	return __pre_abs_get(buf, count);
++}
++
++int a4l_buf_commit_absget(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_output(subd))
++		return -EINVAL;
++
++	return __abs_get(buf, count);
++}
++
++int a4l_buf_prepare_get(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_output(subd))
++		return -EINVAL;
++
++	return __pre_get(buf, count);
++}
++
++int a4l_buf_commit_get(struct a4l_subdevice *subd, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++
++	/* Basic checkings */
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_output(subd))
++		return -EINVAL;
++
++	return __get(buf, count);
++}
++
++int a4l_buf_get(struct a4l_subdevice *subd, void *bufdata, unsigned long count)
++{
++	struct a4l_buffer *buf = subd->buf;
++	int err;
++
++	/* Basic checkings */
++
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (!a4l_subd_is_output(subd))
++		return -EINVAL;
++
++	if (__count_to_get(buf) < count)
++		return -EAGAIN;
++
++	/* Update the counter */
++	err = __consume(NULL, buf, bufdata, count);
++	if (err < 0)
++		return err;
++
++	/* Perform the transfer */
++	err = __get(buf, count);
++
++	return err;
++}
++
++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts)
++{
++	struct a4l_buffer *buf = subd->buf;
++	int tmp;
++	unsigned long wake = 0, count = ULONG_MAX;
++
++	/* Warning: here, there may be a condition race : the cancel
++	   function is called by the user side and a4l_buf_evt and all
++	   the a4l_buf_... functions are called by the kernel
++	   side. Nonetheless, the driver should be in charge of such
++	   race conditions, not the framework */
++
++	/* Basic checking */
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	/* Here we save the data count available for the user side */
++	if (evts == 0) {
++		count = a4l_subd_is_input(subd) ?
++			__count_to_get(buf) : __count_to_put(buf);
++		wake = __count_to_end(buf) < buf->wake_count ?
++			__count_to_end(buf) : buf->wake_count;
++	} else {
++		/* Even if it is a little more complex, atomic
++		   operations are used so as to prevent any kind of
++		   corner case */
++		while ((tmp = ffs(evts) - 1) != -1) {
++			set_bit(tmp, &buf->flags);
++			clear_bit(tmp, &evts);
++		}
++	}
++
++	if (count >= wake)
++		/* Notify the user-space side */
++		a4l_signal_sync(&buf->sync);
++
++	return 0;
++}
++
++unsigned long a4l_buf_count(struct a4l_subdevice *subd)
++{
++	struct a4l_buffer *buf = subd->buf;
++	unsigned long ret = 0;
++
++	/* Basic checking */
++	if (!buf || !test_bit(A4L_SUBD_BUSY_NR, &subd->status))
++		return -ENOENT;
++
++	if (a4l_subd_is_input(subd))
++		ret = __count_to_put(buf);
++	else if (a4l_subd_is_output(subd))
++		ret = __count_to_get(buf);
++
++	return ret;
++}
++
++/* --- Mmap functions --- */
++
++void a4l_map(struct vm_area_struct *area)
++{
++	unsigned long *status = (unsigned long *)area->vm_private_data;
++	set_bit(A4L_BUF_MAP_NR, status);
++}
++
++void a4l_unmap(struct vm_area_struct *area)
++{
++	unsigned long *status = (unsigned long *)area->vm_private_data;
++	clear_bit(A4L_BUF_MAP_NR, status);
++}
++
++static struct vm_operations_struct a4l_vm_ops = {
++	.open = a4l_map,
++	.close = a4l_unmap,
++};
++
++int a4l_ioctl_mmap(struct a4l_device_context *cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	a4l_mmap_t map_cfg;
++	struct a4l_device *dev;
++	struct a4l_buffer *buf;
++	int ret;
++
++	/* The mmap operation cannot be performed in a
++	   real-time context */
++	if (rtdm_in_rt_context()) {
++		return -ENOSYS;
++	}
++
++	dev = a4l_get_dev(cxt);
++	buf = cxt->buffer;
++
++	/* Basic checkings */
++
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_mmap: cannot mmap on "
++			  "an unattached device\n");
++		return -EINVAL;
++	}
++
++	if (test_bit(A4L_BUF_MAP_NR, &buf->flags)) {
++		__a4l_err("a4l_ioctl_mmap: buffer already mapped\n");
++		return -EBUSY;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &map_cfg, arg, sizeof(a4l_mmap_t)) != 0)
++		return -EFAULT;
++
++	/* Check the size to be mapped */
++	if ((map_cfg.size & ~(PAGE_MASK)) != 0 || map_cfg.size > buf->size)
++		return -EFAULT;
++
++	/* All the magic is here */
++	ret = rtdm_mmap_to_user(fd,
++				buf->buf,
++				map_cfg.size,
++				PROT_READ | PROT_WRITE,
++				&map_cfg.ptr, &a4l_vm_ops, &buf->flags);
++
++	if (ret < 0) {
++		__a4l_err("a4l_ioctl_mmap: internal error, "
++			  "rtdm_mmap_to_user failed (err=%d)\n", ret);
++		return ret;
++	}
++
++	return rtdm_safe_copy_to_user(fd,
++				      arg, &map_cfg, sizeof(a4l_mmap_t));
++}
++
++/* --- IOCTL / FOPS functions --- */
++
++int a4l_ioctl_cancel(struct a4l_device_context * cxt, void *arg)
++{
++	unsigned int idx_subd = (unsigned long)arg;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_subdevice *subd;
++
++	/* Basically check the device */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_cancel: operation not supported on "
++			  "an unattached device\n");
++		return -EINVAL;
++	}
++
++	if (cxt->buffer->subd == NULL) {
++		__a4l_err("a4l_ioctl_cancel: "
++			  "no acquisition to cancel on this context\n");
++		return -EINVAL;
++	}
++
++	if (idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_ioctl_cancel: bad subdevice index\n");
++		return -EINVAL;
++	}
++
++	subd = dev->transfer.subds[idx_subd];
++
++	if (subd != cxt->buffer->subd) {
++		__a4l_err("a4l_ioctl_cancel: "
++			  "current context works on another subdevice "
++			  "(%d!=%d)\n", cxt->buffer->subd->idx, subd->idx);
++		return -EINVAL;
++	}
++
++	a4l_cancel_buffer(cxt);
++	return 0;
++}
++
++/* The ioctl BUFCFG is only useful for changing the size of the
++   asynchronous buffer.
++   (BUFCFG = free of the current buffer + allocation of a new one) */
++
++int a4l_ioctl_bufcfg(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++	a4l_bufcfg_t buf_cfg;
++
++	/* As Linux API is used to allocate a virtual buffer,
++	   the calling process must not be in primary mode */
++	if (rtdm_in_rt_context()) {
++		return -ENOSYS;
++	}
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_bufcfg: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &buf_cfg,
++				     arg, sizeof(a4l_bufcfg_t)) != 0)
++		return -EFAULT;
++
++	if (buf_cfg.buf_size > A4L_BUF_MAXSIZE) {
++		__a4l_err("a4l_ioctl_bufcfg: buffer size too big (<=16MB)\n");
++		return -EINVAL;
++	}
++
++	if (buf_cfg.idx_subd == A4L_BUF_DEFMAGIC) {
++		cxt->dev->transfer.default_bufsize = buf_cfg.buf_size;
++		return 0;
++	}
++
++	if (subd && test_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		__a4l_err("a4l_ioctl_bufcfg: acquisition in progress\n");
++		return -EBUSY;
++	}
++
++	if (test_bit(A4L_BUF_MAP, &buf->flags)) {
++		__a4l_err("a4l_ioctl_bufcfg: please unmap before "
++			  "configuring buffer\n");
++		return -EPERM;
++	}
++
++	/* Free the buffer... */
++	a4l_free_buffer(buf);
++
++	/* ...to reallocate it */
++	return a4l_alloc_buffer(buf, buf_cfg.buf_size);
++}
++
++/* The ioctl BUFCFG2 allows the user space process to define the
++   minimal amount of data which should trigger a wake-up. If the ABI
++   could be broken, this facility would be handled by the original
++   BUFCFG ioctl. At the next major release, this ioctl will vanish. */
++
++int a4l_ioctl_bufcfg2(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	a4l_bufcfg2_t buf_cfg;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_bufcfg2: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &buf_cfg,
++				     arg, sizeof(a4l_bufcfg2_t)) != 0)
++		return -EFAULT;
++
++	if (buf_cfg.wake_count > buf->size) {
++		__a4l_err("a4l_ioctl_bufcfg2: "
++			  "wake-up threshold too big (> buffer size: %lu)\n",
++			  buf->size);
++		return -EINVAL;
++	}
++
++	buf->wake_count = buf_cfg.wake_count;
++
++	return 0;
++}
++
++/* The BUFINFO ioctl provides two basic roles:
++   - tell the user app the size of the asynchronous buffer
++   - display the read/write counters (how many bytes to read/write) */
++
++int a4l_ioctl_bufinfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++	a4l_bufinfo_t info;
++
++	unsigned long tmp_cnt;
++	int ret;
++
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_bufinfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &info, arg, sizeof(a4l_bufinfo_t)) != 0)
++		return -EFAULT;
++
++
++	/* If a transfer is not occuring, simply return buffer
++	   informations, otherwise make the transfer progress */
++	if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		info.rw_count = 0;
++		goto a4l_ioctl_bufinfo_out;
++	}
++
++	ret = __handle_event(buf);
++
++	if (a4l_subd_is_input(subd)) {
++
++		/* Updates consume count if rw_count is not null */
++		if (info.rw_count != 0)
++			buf->cns_count += info.rw_count;
++
++		/* Retrieves the data amount to read */
++		tmp_cnt = info.rw_count = __count_to_get(buf);
++
++		__a4l_dbg(1, core_dbg, "count to read=%lu\n", tmp_cnt);
++
++		if ((ret < 0 && ret != -ENOENT) ||
++		    (ret == -ENOENT && tmp_cnt == 0)) {
++			a4l_cancel_buffer(cxt);
++			return ret;
++		}
++	} else if (a4l_subd_is_output(subd)) {
++
++		if (ret < 0) {
++			a4l_cancel_buffer(cxt);
++			if (info.rw_count != 0)
++				return ret;
++		}
++
++		/* If rw_count is not null,
++		   there is something to write / munge  */
++		if (info.rw_count != 0 && info.rw_count <= __count_to_put(buf)) {
++
++			/* Updates the production pointer */
++			buf->prd_count += info.rw_count;
++
++			/* Sets the munge count */
++			tmp_cnt = info.rw_count;
++		} else
++			tmp_cnt = 0;
++
++		/* Retrieves the data amount which is writable */
++		info.rw_count = __count_to_put(buf);
++
++		__a4l_dbg(1, core_dbg, " count to write=%lu\n", info.rw_count);
++
++	} else {
++		__a4l_err("a4l_ioctl_bufinfo: inappropriate subdevice\n");
++		return -EINVAL;
++	}
++
++	/* Performs the munge if need be */
++	if (subd->munge != NULL) {
++
++		/* Call the munge callback */
++		__munge(subd, subd->munge, buf, tmp_cnt);
++
++		/* Updates munge count */
++		buf->mng_count += tmp_cnt;
++	}
++
++a4l_ioctl_bufinfo_out:
++
++	/* Sets the buffer size */
++	info.buf_size = buf->size;
++
++	/* Sends the structure back to user space */
++	if (rtdm_safe_copy_to_user(fd,
++				   arg, &info, sizeof(a4l_bufinfo_t)) != 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++/* The ioctl BUFINFO2 tells the user application the minimal amount of
++data which should trigger a wake-up. If the ABI could be broken, this
++facility would be handled by the original BUFINFO ioctl. At the next
++major release, this ioctl will vanish. */
++
++int a4l_ioctl_bufinfo2(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	a4l_bufcfg2_t buf_cfg;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_bufcfg2: unattached device\n");
++		return -EINVAL;
++	}
++
++	buf_cfg.wake_count = buf->wake_count;
++
++	if (rtdm_safe_copy_to_user(fd,
++				   arg, &buf_cfg, sizeof(a4l_bufcfg2_t)) != 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++/* The function a4l_read_buffer can be considered as the kernel entry
++   point of the RTDM syscall read. This syscall is supposed to be used
++   only during asynchronous acquisitions */
++ssize_t a4l_read_buffer(struct a4l_device_context * cxt, void *bufdata, size_t nbytes)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++	ssize_t count = 0;
++
++	/* Basic checkings */
++
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_read: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		__a4l_err("a4l_read: idle subdevice on this context\n");
++		return -ENOENT;
++	}
++
++	if (!a4l_subd_is_input(subd)) {
++		__a4l_err("a4l_read: operation requires an input subdevice \n");
++		return -EINVAL;
++	}
++
++	while (count < nbytes) {
++
++		unsigned long tmp_cnt;
++
++		/* Check the events */
++		int ret = __handle_event(buf);
++
++		__dump_buffer_counters(buf);
++
++		/* Compute the data amount to copy */
++		tmp_cnt = __count_to_get(buf);
++
++		/* Check tmp_cnt count is not higher than
++		   the global count to read */
++		if (tmp_cnt > nbytes - count)
++			tmp_cnt = nbytes - count;
++
++		/* We check whether there is an error */
++		if (ret < 0 && ret != -ENOENT) {
++			__a4l_err("a4l_read: failed to handle event %d \n", ret);
++			a4l_cancel_buffer(cxt);
++			count = ret;
++			goto out_a4l_read;
++		}
++
++		/* We check whether the acquisition is over */
++		if (ret == -ENOENT && tmp_cnt == 0) {
++			__a4l_info("a4l_read: acquisition done - all data "
++				   "requested by the client was delivered \n");
++			a4l_cancel_buffer(cxt);
++			count = 0;
++			goto out_a4l_read;
++		}
++
++		if (tmp_cnt > 0) {
++
++			/* Performs the munge if need be */
++			if (subd->munge != NULL) {
++				__munge(subd, subd->munge, buf, tmp_cnt);
++
++				/* Updates munge count */
++				buf->mng_count += tmp_cnt;
++			}
++
++			/* Performs the copy */
++			ret = __consume(cxt, buf, bufdata + count, tmp_cnt);
++
++			if (ret < 0) {
++				count = ret;
++				goto out_a4l_read;
++			}
++
++			/* Updates consume count */
++			buf->cns_count += tmp_cnt;
++			a4l_dbg(1, core_dbg, dev, "buf->cns_cnt=%ld \n", buf->cns_count);
++
++			/* Updates the return value */
++			count += tmp_cnt;
++
++			/* If the driver does not work in bulk mode,
++			   we must leave this function */
++			if (!test_bit(A4L_BUF_BULK, &buf->flags))
++				goto out_a4l_read;
++		}
++		else {
++			/* If the acquisition is not over, we must not
++			   leave the function without having read a least byte */
++			ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context());
++			if (ret < 0) {
++				if (ret == -ERESTARTSYS)
++					ret = -EINTR;
++				count = ret;
++				goto out_a4l_read;
++			}
++		}
++	}
++
++out_a4l_read:
++
++	return count;
++}
++
++/* The function a4l_write_buffer can be considered as the kernel entry
++   point of the RTDM syscall write. This syscall is supposed to be
++   used only during asynchronous acquisitions */
++ssize_t a4l_write_buffer(struct a4l_device_context *cxt, const void *bufdata, size_t nbytes)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++	ssize_t count = 0;
++
++	/* Basic checkings */
++
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_write: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		__a4l_err("a4l_write: idle subdevice on this context\n");
++		return -ENOENT;
++	}
++
++	if (!a4l_subd_is_output(subd)) {
++		__a4l_err("a4l_write: operation requires an output subdevice \n");
++		return -EINVAL;
++	}
++
++	while (count < nbytes) {
++
++		unsigned long tmp_cnt;
++
++		/* Check the events */
++		int ret = __handle_event(buf);
++
++		__dump_buffer_counters(buf);
++
++		/* Compute the data amount to copy */
++		tmp_cnt = __count_to_put(buf);
++
++		/* Check tmp_cnt count is not higher than
++		   the global count to write */
++		if (tmp_cnt > nbytes - count)
++			tmp_cnt = nbytes - count;
++
++		if (ret < 0) {
++			count = (ret == -ENOENT) ? -EINVAL : ret;
++			__a4l_err("a4l_write: failed to handle event %d \n", ret);
++			a4l_cancel_buffer(cxt);
++			goto out_a4l_write;
++		}
++
++		if (tmp_cnt > 0) {
++
++
++			/* Performs the copy */
++			ret = __produce(cxt,
++					buf, (void *)bufdata + count, tmp_cnt);
++			if (ret < 0) {
++				count = ret;
++				goto out_a4l_write;
++			}
++
++			/* Performs the munge if need be */
++			if (subd->munge != NULL) {
++				__munge(subd, subd->munge, buf, tmp_cnt);
++
++				/* Updates munge count */
++				buf->mng_count += tmp_cnt;
++			}
++
++			/* Updates produce count */
++			buf->prd_count += tmp_cnt;
++			a4l_dbg(1, core_dbg, dev , "buf->prd_cnt=%ld \n", buf->prd_count);
++
++			/* Updates the return value */
++			count += tmp_cnt;
++
++			/* If the driver does not work in bulk mode,
++			   we must leave this function */
++			if (!test_bit(A4L_BUF_BULK, &buf->flags))
++				goto out_a4l_write;
++		} else {
++			/* The buffer is full, we have to wait for a slot to free */
++			ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context());
++			if (ret < 0) {
++				__a4l_err("a4l_write: failed to wait for free slot (%d)\n", ret);
++				if (ret == -ERESTARTSYS)
++					ret = -EINTR;
++				count = ret;
++				goto out_a4l_write;
++			}
++		}
++	}
++
++out_a4l_write:
++
++	return count;
++}
++
++int a4l_select(struct a4l_device_context *cxt,
++	       rtdm_selector_t *selector,
++	       enum rtdm_selecttype type, unsigned fd_index)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++
++	/* Basic checkings */
++
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_select: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (!subd || !test_bit(A4L_SUBD_BUSY, &subd->status)) {
++		__a4l_err("a4l_select: idle subdevice on this context\n");
++		return -ENOENT;
++	}
++
++	/* Check the RTDM select type
++	   (RTDM_SELECTTYPE_EXCEPT is not supported) */
++
++	if(type != RTDM_SELECTTYPE_READ &&
++	   type != RTDM_SELECTTYPE_WRITE) {
++		__a4l_err("a4l_select: wrong select argument\n");
++		return -EINVAL;
++	}
++
++	if (type == RTDM_SELECTTYPE_READ && !a4l_subd_is_input(subd)) {
++		__a4l_err("a4l_select: current context "
++			  "does not work with an input subdevice\n");
++		return -EINVAL;
++	}
++
++	if (type == RTDM_SELECTTYPE_WRITE && !a4l_subd_is_output(subd)) {
++		__a4l_err("a4l_select: current context "
++			  "does not work with an input subdevice\n");
++		return -EINVAL;
++	}
++
++	/* Performs a bind on the Analogy synchronization element */
++	return a4l_select_sync(&(buf->sync), selector, type, fd_index);
++}
++
++int a4l_ioctl_poll(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int ret = 0;
++	unsigned long tmp_cnt = 0;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_buffer *buf = cxt->buffer;
++	struct a4l_subdevice *subd = buf->subd;
++	a4l_poll_t poll;
++
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	/* Basic checking */
++
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_poll: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (!subd || !test_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		__a4l_err("a4l_poll: idle subdevice on this context\n");
++		return -ENOENT;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &poll, arg, sizeof(a4l_poll_t)) != 0)
++		return -EFAULT;
++
++	/* Checks the buffer events */
++	a4l_flush_sync(&buf->sync);
++	ret = __handle_event(buf);
++
++	/* Retrieves the data amount to compute
++	   according to the subdevice type */
++	if (a4l_subd_is_input(subd)) {
++
++		tmp_cnt = __count_to_get(buf);
++
++		/* Check if some error occured */
++		if (ret < 0 && ret != -ENOENT) {
++			a4l_cancel_buffer(cxt);
++			return ret;
++		}
++
++		/* Check whether the acquisition is over */
++		if (ret == -ENOENT && tmp_cnt == 0) {
++			a4l_cancel_buffer(cxt);
++			return 0;
++		}
++	} else {
++
++		/* If some error was detected, cancel the transfer */
++		if (ret < 0) {
++			a4l_cancel_buffer(cxt);
++			return ret;
++		}
++
++		tmp_cnt = __count_to_put(buf);
++	}
++
++	if (poll.arg == A4L_NONBLOCK || tmp_cnt != 0)
++		goto out_poll;
++
++	if (poll.arg == A4L_INFINITE)
++		ret = a4l_wait_sync(&(buf->sync), rtdm_in_rt_context());
++	else {
++		unsigned long long ns = ((unsigned long long)poll.arg) *
++			((unsigned long long)NSEC_PER_MSEC);
++		ret = a4l_timedwait_sync(&(buf->sync), rtdm_in_rt_context(), ns);
++	}
++
++	if (ret == 0) {
++		/* Retrieves the count once more */
++		if (a4l_subd_is_input(dev->transfer.subds[poll.idx_subd]))
++			tmp_cnt = __count_to_get(buf);
++		else
++			tmp_cnt = __count_to_put(buf);
++	}
++	else
++		return ret;
++
++out_poll:
++
++	poll.arg = tmp_cnt;
++
++	ret = rtdm_safe_copy_to_user(fd,
++				     arg, &poll, sizeof(a4l_poll_t));
++
++	return ret;
++}
+--- linux/drivers/xenomai/analogy/intel/8255.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/intel/8255.c	2021-04-07 16:01:27.919633178 +0800
+@@ -0,0 +1,331 @@
++/*
++ * Analogy subdevice driver for 8255 chip
++ * Copyright (C) 1999 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/io.h>
++#include <rtdm/analogy/device.h>
++
++#include "8255.h"
++
++#define CALLBACK_ARG		(((subd_8255_t *)subd->priv)->cb_arg)
++#define CALLBACK_FUNC		(((subd_8255_t *)subd->priv)->cb_func)
++
++/* Channels descriptor */
++static struct a4l_channels_desc chandesc_8255 = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 24,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, sizeof(sampl_t)},
++	},
++};
++
++/* Command options mask */
++static struct a4l_cmd_desc cmd_mask_8255 = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW,
++	.scan_begin_src = TRIG_EXT,
++	.convert_src = TRIG_FOLLOW,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_NONE,
++};
++
++void a4l_subdev_8255_interrupt(struct a4l_subdevice *subd)
++{
++	sampl_t d;
++
++	/* Retrieve the sample... */
++	d = CALLBACK_FUNC(0, _8255_DATA, 0, CALLBACK_ARG);
++	d |= (CALLBACK_FUNC(0, _8255_DATA + 1, 0, CALLBACK_ARG) << 8);
++
++	/* ...and send it */
++	a4l_buf_put(subd, &d, sizeof(sampl_t));
++
++	a4l_buf_evt(subd, 0);
++}
++EXPORT_SYMBOL_GPL(a4l_subdev_8255_interrupt);
++
++static int subdev_8255_cb(int dir, int port, int data, unsigned long arg)
++{
++	unsigned long iobase = arg;
++
++	if (dir) {
++		outb(data, iobase + port);
++		return 0;
++	} else {
++		return inb(iobase + port);
++	}
++}
++
++static void do_config(struct a4l_subdevice *subd)
++{
++	int config;
++	subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv;
++
++	config = CR_CW;
++	/* 1 in io_bits indicates output, 1 in config indicates input */
++	if (!(subd_8255->io_bits & 0x0000ff))
++		config |= CR_A_IO;
++	if (!(subd_8255->io_bits & 0x00ff00))
++		config |= CR_B_IO;
++	if (!(subd_8255->io_bits & 0x0f0000))
++		config |= CR_C_LO_IO;
++	if (!(subd_8255->io_bits & 0xf00000))
++		config |= CR_C_HI_IO;
++	CALLBACK_FUNC(1, _8255_CR, config, CALLBACK_ARG);
++}
++
++int subd_8255_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	/* FIXME */
++	return 0;
++}
++
++int subd_8255_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	if (cmd->start_arg != 0) {
++		cmd->start_arg = 0;
++		return -EINVAL;
++	}
++	if (cmd->scan_begin_arg != 0) {
++		cmd->scan_begin_arg = 0;
++		return -EINVAL;
++	}
++	if (cmd->convert_arg != 0) {
++		cmd->convert_arg = 0;
++		return -EINVAL;
++	}
++	if (cmd->scan_end_arg != 1) {
++		cmd->scan_end_arg = 1;
++		return -EINVAL;
++	}
++	if (cmd->stop_arg != 0) {
++		cmd->stop_arg = 0;
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++void subd_8255_cancel(struct a4l_subdevice *subd)
++{
++	/* FIXME */
++}
++
++int subd_8255_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv;
++	uint32_t *data = (uint32_t *)insn->data;
++
++	if (data[0]) {
++
++		subd_8255->status &= ~data[0];
++		subd_8255->status |= (data[0] & data[1]);
++
++		if (data[0] & 0xff)
++			CALLBACK_FUNC(1, _8255_DATA,
++				      subd_8255->status & 0xff, CALLBACK_ARG);
++		if (data[0] & 0xff00)
++			CALLBACK_FUNC(1, _8255_DATA + 1,
++				      (subd_8255->status >> 8) & 0xff,
++				      CALLBACK_ARG);
++		if (data[0] & 0xff0000)
++			CALLBACK_FUNC(1, _8255_DATA + 2,
++				      (subd_8255->status >> 16) & 0xff,
++				      CALLBACK_ARG);
++	}
++
++	data[1] = CALLBACK_FUNC(0, _8255_DATA, 0, CALLBACK_ARG);
++	data[1] |= (CALLBACK_FUNC(0, _8255_DATA + 1, 0, CALLBACK_ARG) << 8);
++	data[1] |= (CALLBACK_FUNC(0, _8255_DATA + 2, 0, CALLBACK_ARG) << 16);
++
++	return 0;
++}
++
++int subd_8255_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	unsigned int mask;
++	unsigned int bits;
++	subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	mask = 1 << CR_CHAN(insn->chan_desc);
++
++	if (mask & 0x0000ff) {
++		bits = 0x0000ff;
++	} else if (mask & 0x00ff00) {
++		bits = 0x00ff00;
++	} else if (mask & 0x0f0000) {
++		bits = 0x0f0000;
++	} else {
++		bits = 0xf00000;
++	}
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		subd_8255->io_bits &= ~bits;
++		break;
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		subd_8255->io_bits |= bits;
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (subd_8255->io_bits & bits) ?
++			A4L_OUTPUT : A4L_INPUT;
++		return 0;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	do_config(subd);
++
++	return 0;
++}
++
++void a4l_subdev_8255_init(struct a4l_subdevice *subd)
++{
++	subd_8255_t *subd_8255 = (subd_8255_t *)subd->priv;
++	/* Initializes the subdevice structure */
++	memset(subd, 0, sizeof(struct a4l_subdevice));
++
++	/* Subdevice filling part */
++
++	subd->flags = A4L_SUBD_DIO;
++	subd->flags |= A4L_SUBD_CMD;
++	subd->chan_desc = &chandesc_8255;
++	subd->insn_bits = subd_8255_insn_bits;
++	subd->insn_config = subd_8255_insn_config;
++
++	if(subd_8255->have_irq) {
++		subd->cmd_mask = &cmd_mask_8255;
++		subd->do_cmdtest = subd_8255_cmdtest;
++		subd->do_cmd = subd_8255_cmd;
++		subd->cancel = subd_8255_cancel;
++	}
++
++	/* 8255 setting part */
++
++	if(CALLBACK_FUNC == NULL)
++		CALLBACK_FUNC = subdev_8255_cb;
++
++	do_config(subd);
++}
++EXPORT_SYMBOL_GPL(a4l_subdev_8255_init);
++
++/*
++
++  Start of the 8255 standalone device
++
++*/
++
++static int dev_8255_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	unsigned long *addrs;
++	int i, err = 0;
++
++	if(arg->opts == NULL || arg->opts_size == 0) {
++		a4l_err(dev,
++			"dev_8255_attach: unable to detect any 8255 chip, "
++			"chips addresses must be passed as attach arguments\n");
++		return -EINVAL;
++	}
++
++	addrs = (unsigned long*) arg->opts;
++
++	for(i = 0; i < (arg->opts_size / sizeof(unsigned long)); i++) {
++		struct a4l_subdevice * subd;
++		subd_8255_t *subd_8255;
++
++		subd = a4l_alloc_subd(sizeof(subd_8255_t), NULL);
++		if(subd == NULL) {
++			a4l_err(dev,
++				"dev_8255_attach: "
++				"unable to allocate subdevice\n");
++			/* There is no need to free previously
++			   allocated structure(s), the analogy layer will
++			   do it for us */
++			err = -ENOMEM;
++			goto out_attach;
++		}
++
++		memset(subd, 0, sizeof(struct a4l_subdevice));
++		memset(subd->priv, 0, sizeof(subd_8255_t));
++
++		subd_8255 = (subd_8255_t *)subd->priv;
++
++		if(request_region(addrs[i], _8255_SIZE, "Analogy 8255") == 0) {
++			subd->flags = A4L_SUBD_UNUSED;
++			a4l_warn(dev,
++				 "dev_8255_attach: "
++				 "I/O port conflict at 0x%lx\n", addrs[i]);
++		}
++		else {
++			subd_8255->cb_arg = addrs[i];
++			a4l_subdev_8255_init(subd);
++		}
++
++		err = a4l_add_subd(dev, subd);
++		if(err < 0) {
++			a4l_err(dev,
++				"dev_8255_attach: "
++				"a4l_add_subd() failed (err=%d)\n", err);
++			goto out_attach;
++		}
++	}
++
++out_attach:
++	return err;
++}
++
++static int dev_8255_detach(struct a4l_device *dev)
++{
++	struct a4l_subdevice *subd;
++	int i = 0;
++
++	while((subd = a4l_get_subd(dev, i++)) != NULL) {
++		subd_8255_t *subd_8255 = (subd_8255_t *) subd->priv;
++		if(subd_8255 != NULL && subd_8255->cb_arg != 0)
++			release_region(subd_8255->cb_arg, _8255_SIZE);
++	}
++
++	return 0;
++}
++
++static struct a4l_driver drv_8255 = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_8255",
++	.driver_name = "8255",
++	.attach = dev_8255_attach,
++	.detach = dev_8255_detach,
++	.privdata_size = 0,
++};
++
++static int __init drv_8255_init(void)
++{
++	return a4l_register_drv(&drv_8255);
++}
++
++static void __exit drv_8255_cleanup(void)
++{
++	a4l_unregister_drv(&drv_8255);
++}
++MODULE_DESCRIPTION("Analogy driver for 8255 chip");
++MODULE_LICENSE("GPL");
++
++module_init(drv_8255_init);
++module_exit(drv_8255_cleanup);
+--- linux/drivers/xenomai/analogy/intel/parport.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/intel/parport.c	2021-04-07 16:01:27.914633185 +0800
+@@ -0,0 +1,457 @@
++/*
++ * Analogy driver for standard parallel port
++ * Copyright (C) 1998,2001 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++   A cheap and easy way to get a few more digital I/O lines.  Steal
++   additional parallel ports from old computers or your neighbors'
++   computers.
++
++   Attach options list:
++   0: I/O port base for the parallel port.
++   1: IRQ
++
++   Parallel Port Lines:
++
++   pin     subdev  chan    aka
++   ---     ------  ----    ---
++   1       2       0       strobe
++   2       0       0       data 0
++   3       0       1       data 1
++   4       0       2       data 2
++   5       0       3       data 3
++   6       0       4       data 4
++   7       0       5       data 5
++   8       0       6       data 6
++   9       0       7       data 7
++   10      1       3       acknowledge
++   11      1       4       busy
++   12      1       2       output
++   13      1       1       printer selected
++   14      2       1       auto LF
++   15      1       0       error
++   16      2       2       init
++   17      2       3       select printer
++   18-25   ground
++
++   Notes:
++
++   Subdevices 0 is digital I/O, subdevice 1 is digital input, and
++   subdevice 2 is digital output.  Unlike other Analogy devices,
++   subdevice 0 defaults to output.
++
++   Pins 13 and 14 are inverted once by Analogy and once by the
++   hardware, thus cancelling the effect.
++
++   Pin 1 is a strobe, thus acts like one.  There's no way in software
++   to change this, at least on a standard parallel port.
++
++   Subdevice 3 pretends to be a digital input subdevice, but it always
++   returns 0 when read.  However, if you run a command with
++   scan_begin_src=TRIG_EXT, it uses pin 10 as a external triggering
++   pin, which can be used to wake up tasks.
++
++   see http://www.beyondlogic.org/ for information.
++   or http://www.linux-magazin.de/ausgabe/1999/10/IO/io.html
++*/
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/io.h>		/* For inb/outb */
++#include <rtdm/analogy/device.h>
++
++#define PARPORT_SIZE 3
++
++#define PARPORT_A 0
++#define PARPORT_B 1
++#define PARPORT_C 2
++
++#define DEFAULT_ADDRESS 0x378
++#define DEFAULT_IRQ 7
++
++typedef struct parport_subd_priv {
++	unsigned long io_bits;
++} parport_spriv_t;
++
++typedef struct parport_priv {
++	unsigned long io_base;
++	unsigned int a_data;
++	unsigned int c_data;
++	int enable_irq;
++} parport_priv_t;
++
++#define devpriv ((parport_priv_t *)(dev->priv))
++
++static int parport_insn_a(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	if (data[0]) {
++		devpriv->a_data &= ~data[0];
++		devpriv->a_data |= (data[0] & data[1]);
++
++		outb(devpriv->a_data, devpriv->io_base + PARPORT_A);
++	}
++
++	data[1] = inb(devpriv->io_base + PARPORT_A);
++
++	return 0;
++}
++
++static int parport_insn_config_a(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	parport_spriv_t *spriv = (parport_spriv_t *)subd->priv;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	/* No need to check the channel descriptor; the input / output
++	   setting is global for all channels */
++
++	switch (data[0]) {
++
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		spriv->io_bits = 0xff;
++		devpriv->c_data &= ~(1 << 5);
++		break;
++
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		spriv->io_bits = 0;
++		devpriv->c_data |= (1 << 5);
++		break;
++
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (spriv->io_bits == 0xff) ?
++			A4L_OUTPUT: A4L_INPUT;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	outb(devpriv->c_data, devpriv->io_base + PARPORT_C);
++
++	return 0;
++}
++
++static int parport_insn_b(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	if (data[0]) {
++		/* should writes be ignored? */
++	}
++
++	data[1] = (inb(devpriv->io_base + PARPORT_B) >> 3);
++
++	return 0;
++}
++
++static int parport_insn_c(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	data[0] &= 0x0f;
++	if (data[0]) {
++		devpriv->c_data &= ~data[0];
++		devpriv->c_data |= (data[0] & data[1]);
++
++		outb(devpriv->c_data, devpriv->io_base + PARPORT_C);
++	}
++
++	data[1] = devpriv->c_data & 0xf;
++
++	return 2;
++}
++
++static int parport_intr_insn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	uint8_t *data = (uint8_t *)insn->data;
++
++	if (insn->data_size < sizeof(uint8_t))
++		return -EINVAL;
++
++	data[1] = 0;
++	return 0;
++}
++
++static struct a4l_cmd_desc parport_intr_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW,
++	.scan_begin_src = TRIG_EXT,
++	.convert_src = TRIG_FOLLOW,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_NONE,
++};
++
++static int parport_intr_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc * cmd)
++{
++
++	if (cmd->start_arg != 0) {
++		return -EINVAL;
++	}
++	if (cmd->scan_begin_arg != 0) {
++		return -EINVAL;
++	}
++	if (cmd->convert_arg != 0) {
++		return -EINVAL;
++	}
++	if (cmd->scan_end_arg != 1) {
++		return -EINVAL;
++	}
++	if (cmd->stop_arg != 0) {
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int parport_intr_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	devpriv->c_data |= 0x10;
++	outb(devpriv->c_data, devpriv->io_base + PARPORT_C);
++
++	devpriv->enable_irq = 1;
++
++	return 0;
++}
++
++static void parport_intr_cancel(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	a4l_info(dev, "cancel in progress\n");
++
++	devpriv->c_data &= ~0x10;
++	outb(devpriv->c_data, devpriv->io_base + PARPORT_C);
++
++	devpriv->enable_irq = 0;
++}
++
++static int parport_interrupt(unsigned int irq, void *d)
++{
++	struct a4l_device *dev = d;
++	struct a4l_subdevice *subd = a4l_get_subd(dev, 3);
++
++	if (!devpriv->enable_irq) {
++		a4l_err(dev, "parport_interrupt: bogus irq, ignored\n");
++		return IRQ_NONE;
++	}
++
++	a4l_buf_put(subd, 0, sizeof(unsigned int));
++	a4l_buf_evt(subd, 0);
++
++	return 0;
++}
++
++
++/* --- Channels descriptor --- */
++
++static struct a4l_channels_desc parport_chan_desc_a = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 8,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++static struct a4l_channels_desc parport_chan_desc_b = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 5,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++static struct a4l_channels_desc parport_chan_desc_c = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 4,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++static struct a4l_channels_desc parport_chan_desc_intr = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 1,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++/* --- Subdevice initialization functions --- */
++
++static void setup_subd_a(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_DIO;
++	subd->chan_desc = &parport_chan_desc_a;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = parport_insn_a;
++	subd->insn_config = parport_insn_config_a;
++}
++
++static void setup_subd_b(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_DI;
++	subd->chan_desc = &parport_chan_desc_b;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = parport_insn_b;
++}
++
++static void setup_subd_c(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_DO;
++	subd->chan_desc = &parport_chan_desc_c;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = parport_insn_c;
++}
++
++static void setup_subd_intr(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_DI;
++	subd->chan_desc = &parport_chan_desc_intr;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = parport_intr_insn;
++	subd->cmd_mask = &parport_intr_cmd_mask;
++	subd->do_cmdtest = parport_intr_cmdtest;
++	subd->do_cmd = parport_intr_cmd;
++	subd->cancel = parport_intr_cancel;
++}
++
++static void (*setup_subds[3])(struct a4l_subdevice *) = {
++	setup_subd_a,
++	setup_subd_b,
++	setup_subd_c
++};
++
++static int dev_parport_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	int i, err = 0, irq = A4L_IRQ_UNUSED;
++	unsigned long io_base;
++
++	if(arg->opts == NULL || arg->opts_size < sizeof(unsigned long)) {
++
++		a4l_warn(dev,
++			 "dev_parport_attach: no attach options specified, "
++			 "taking default options (addr=0x%x, irq=%d)\n",
++			 DEFAULT_ADDRESS, DEFAULT_IRQ);
++
++		io_base = DEFAULT_ADDRESS;
++		irq = DEFAULT_IRQ;
++	} else {
++
++		io_base = ((unsigned long *)arg->opts)[0];
++
++		if (arg->opts_size >= 2 * sizeof(unsigned long))
++			irq = (int) ((unsigned long *)arg->opts)[1];
++	}
++
++	if (!request_region(io_base, PARPORT_SIZE, "analogy_parport")) {
++		a4l_err(dev, "dev_parport_attach: I/O port conflict");
++		return -EIO;
++	}
++
++	a4l_info(dev, "address = 0x%lx\n", io_base);
++
++	for (i = 0; i < 3; i++) {
++
++		struct a4l_subdevice *subd = a4l_alloc_subd(sizeof(parport_spriv_t),
++						  setup_subds[i]);
++		if (subd == NULL)
++			return -ENOMEM;
++
++		err = a4l_add_subd(dev, subd);
++		if (err != i)
++			return err;
++	}
++
++	if (irq != A4L_IRQ_UNUSED) {
++
++		struct a4l_subdevice *subd;
++
++		a4l_info(dev, "irq = %d\n", irq);
++
++		err = a4l_request_irq(dev, irq, parport_interrupt, 0, dev);
++		if (err < 0) {
++			a4l_err(dev, "dev_parport_attach: irq not available\n");
++			return err;
++		}
++
++		subd = a4l_alloc_subd(0, setup_subd_intr);
++		if (subd == NULL)
++			return -ENOMEM;
++
++		err = a4l_add_subd(dev, subd);
++		if (err < 0)
++			return err;
++	}
++
++	devpriv->io_base = io_base;
++
++	devpriv->a_data = 0;
++	outb(devpriv->a_data, devpriv->io_base + PARPORT_A);
++
++	devpriv->c_data = 0;
++	outb(devpriv->c_data, devpriv->io_base + PARPORT_C);
++
++	return 0;
++}
++
++static int dev_parport_detach(struct a4l_device *dev)
++{
++	int err = 0;
++
++	if (devpriv->io_base != 0)
++		release_region(devpriv->io_base, PARPORT_SIZE);
++
++	if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) {
++		a4l_free_irq(dev, a4l_get_irq(dev));
++	}
++
++
++	return err;
++}
++
++static struct a4l_driver drv_parport = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_parport",
++	.driver_name = "parport",
++	.attach = dev_parport_attach,
++	.detach = dev_parport_detach,
++	.privdata_size = sizeof(parport_priv_t),
++};
++
++static int __init drv_parport_init(void)
++{
++	return a4l_register_drv(&drv_parport);
++}
++
++static void __exit drv_parport_cleanup(void)
++{
++	a4l_unregister_drv(&drv_parport);
++}
++
++MODULE_DESCRIPTION("Analogy driver for standard parallel port");
++MODULE_LICENSE("GPL");
++
++module_init(drv_parport_init);
++module_exit(drv_parport_cleanup);
+--- linux/drivers/xenomai/analogy/intel/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/intel/Makefile	2021-04-07 16:01:27.909633192 +0800
+@@ -0,0 +1,10 @@
++
++ccflags-y += -Idrivers/xenomai/analogy
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_8255) += analogy_8255.o
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_PARPORT) += analogy_parport.o
++
++analogy_8255-y := 8255.o
++
++analogy_parport-y := parport.o
+--- linux/drivers/xenomai/analogy/intel/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/intel/Kconfig	2021-04-07 16:01:27.905633197 +0800
+@@ -0,0 +1,10 @@
++
++config XENO_DRIVERS_ANALOGY_8255
++	depends on XENO_DRIVERS_ANALOGY
++	tristate "8255 driver"
++	default n
++
++config XENO_DRIVERS_ANALOGY_PARPORT
++	depends on XENO_DRIVERS_ANALOGY && X86
++	tristate "Standard parallel port driver"
++	default n
+--- linux/drivers/xenomai/analogy/intel/8255.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/intel/8255.h	2021-04-07 16:01:27.900633205 +0800
+@@ -0,0 +1,60 @@
++/*
++ * Hardware driver for 8255 chip
++ * @note Copyright (C) 1999 David A. Schleef <ds@schleef.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef __ANALOGY_8255_H__
++#define __ANALOGY_8255_H__
++
++#include <rtdm/analogy/device.h>
++
++typedef int (*a4l_8255_cb_t)(int, int, int, unsigned long);
++
++typedef struct subd_8255_struct {
++	unsigned long cb_arg;
++	a4l_8255_cb_t cb_func;
++	unsigned int status;
++	int have_irq;
++	int io_bits;
++} subd_8255_t;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_8255) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_8255_MODULE))
++
++#define _8255_SIZE 4
++
++#define _8255_DATA 0
++#define _8255_CR 3
++
++#define CR_C_LO_IO	0x01
++#define CR_B_IO		0x02
++#define CR_B_MODE	0x04
++#define CR_C_HI_IO	0x08
++#define CR_A_IO		0x10
++#define CR_A_MODE(a)	((a)<<5)
++#define CR_CW		0x80
++
++void a4l_subdev_8255_init(struct a4l_subdevice *subd);
++void a4l_subdev_8255_interrupt(struct a4l_subdevice *subd);
++
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_8255 */
++
++#define a4l_subdev_8255_init(x)		do { } while(0)
++#define a4l_subdev_8255_interrupt(x)	do { } while(0)
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_8255 */
++
++#endif /* !__ANALOGY_8255_H__ */
+--- linux/drivers/xenomai/analogy/rtdm_helpers.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/rtdm_helpers.c	2021-04-07 16:01:27.895633212 +0800
+@@ -0,0 +1,214 @@
++/*
++ * Analogy for Linux, RTDM helpers
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/fs.h>
++#include <asm/atomic.h>
++
++#include <rtdm/analogy/rtdm_helpers.h>
++
++/* --- Time section --- */
++
++static nanosecs_abs_t a4l_clkofs;
++
++void a4l_init_time(void)
++{
++	nanosecs_abs_t t1, t2;
++	t1 = rtdm_clock_read();
++	t2 = ktime_to_ns(ktime_get_real());
++	a4l_clkofs = t2 - t1;
++}
++
++nanosecs_abs_t a4l_get_time(void)
++{
++	return a4l_clkofs + rtdm_clock_read();
++}
++
++/* --- IRQ section --- */
++
++static int a4l_handle_irq(rtdm_irq_t *irq_handle)
++{
++	struct a4l_irq_descriptor *dsc =
++		rtdm_irq_get_arg(irq_handle, struct a4l_irq_descriptor);
++
++	if (dsc->handler((unsigned int)irq_handle->irq, dsc->cookie) == 0)
++		return RTDM_IRQ_HANDLED;
++	else
++		return RTDM_IRQ_NONE;
++}
++
++int __a4l_request_irq(struct a4l_irq_descriptor *dsc,
++		      unsigned int irq,
++		      a4l_irq_hdlr_t handler,
++		      unsigned long flags, void *cookie)
++{
++	/* Fills the IRQ descriptor */
++	dsc->handler = handler;
++	dsc->cookie = cookie;
++	dsc->irq = irq;
++
++	/* Registers the RT IRQ handler */
++	return rtdm_irq_request(&dsc->rtdm_desc,
++				(int)irq,
++				a4l_handle_irq, flags, "Analogy device", dsc);
++}
++
++int __a4l_free_irq(struct a4l_irq_descriptor * dsc)
++{
++	return rtdm_irq_free(&dsc->rtdm_desc);
++}
++
++/* --- Synchronization section --- */
++
++static void a4l_nrt_sync_handler(rtdm_nrtsig_t *nrt_sig, void *arg)
++{
++	struct a4l_sync *snc = (struct a4l_sync *) arg;
++	wake_up_interruptible(&snc->wq);
++}
++
++int a4l_init_sync(struct a4l_sync *snc)
++{
++	int ret = 0;
++
++	/* Initializes the flags field */
++	snc->status = 0;
++
++	/* If the process is NRT, we need a wait queue structure */
++	init_waitqueue_head(&snc->wq);
++
++	/* Initializes the RTDM event */
++	rtdm_event_init(&snc->rtdm_evt, 0);
++
++	/* Initializes the gateway to NRT context */
++	rtdm_nrtsig_init(&snc->nrt_sig, a4l_nrt_sync_handler, snc);
++
++	return ret;
++}
++
++void a4l_cleanup_sync(struct a4l_sync *snc)
++{
++	rtdm_nrtsig_destroy(&snc->nrt_sig);
++	rtdm_event_destroy(&snc->rtdm_evt);
++}
++
++int a4l_wait_sync(struct a4l_sync *snc, int rt)
++{
++	int ret = 0;
++
++	if (test_bit(__EVT_PDING, &snc->status))
++		goto out_wait;
++
++	if (rt != 0) {
++		/* If the calling process is in primary mode,
++		   we can use RTDM API ... */
++		set_bit(__RT_WAITER, &snc->status);
++		ret = rtdm_event_wait(&snc->rtdm_evt);
++	} else {
++		/* ... else if the process is NRT,
++		   the Linux wait queue system is used */
++		set_bit(__NRT_WAITER, &snc->status);
++		ret = wait_event_interruptible(snc->wq,
++					       test_bit(__EVT_PDING,
++							&snc->status));
++	}
++
++out_wait:
++
++	clear_bit(__EVT_PDING, &snc->status);
++
++	return ret;
++}
++
++int a4l_timedwait_sync(struct a4l_sync * snc,
++		       int rt, unsigned long long ns_timeout)
++{
++	int ret = 0;
++	unsigned long timeout;
++
++	if (test_bit(__EVT_PDING, &snc->status))
++		goto out_wait;
++
++	if (rt != 0) {
++		/* If the calling process is in primary mode,
++		   we can use RTDM API ... */
++		set_bit(__RT_WAITER, &snc->status);
++		ret = rtdm_event_timedwait(&snc->rtdm_evt, ns_timeout, NULL);
++	} else {
++		/* ... else if the process is NRT,
++		   the Linux wait queue system is used */
++
++		timeout = do_div(ns_timeout, 1000);
++
++		/* We consider the Linux kernel cannot tick at a frequency
++		   higher than 1 MHz
++		   If the timeout value is lower than 1us, we round up to 1us */
++		timeout = (timeout == 0) ? 1 : usecs_to_jiffies(timeout);
++
++		set_bit(__NRT_WAITER, &snc->status);
++
++		ret = wait_event_interruptible_timeout(snc->wq,
++						       test_bit(__EVT_PDING,
++								&snc->status),
++						       timeout);
++	}
++
++out_wait:
++
++	clear_bit(__EVT_PDING, &snc->status);
++
++	return ret;
++}
++
++void a4l_flush_sync(struct a4l_sync * snc)
++{
++	/* Clear the status bitfield */
++	snc->status = 0;
++
++	/* Flush the RTDM event */
++	rtdm_event_clear(&snc->rtdm_evt);
++}
++
++void a4l_signal_sync(struct a4l_sync * snc)
++{
++	int hit = 0;
++
++	set_bit(__EVT_PDING, &snc->status);
++
++	/* a4l_signal_sync() is bound not to be called upon the right
++	   user process context; so, the status flags stores its mode.
++	   Thus the proper event signaling function is called */
++	if (test_and_clear_bit(__RT_WAITER, &snc->status)) {
++		rtdm_event_signal(&snc->rtdm_evt);
++		hit++;
++	}
++
++	if (test_and_clear_bit(__NRT_WAITER, &snc->status)) {
++		rtdm_nrtsig_pend(&snc->nrt_sig);
++		hit++;
++	}
++
++	if (hit == 0) {
++		/* At first signaling, we may not know the proper way
++		   to send the event */
++		rtdm_event_signal(&snc->rtdm_evt);
++		rtdm_nrtsig_pend(&snc->nrt_sig);
++	}
++}
+--- linux/drivers/xenomai/analogy/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/Makefile	2021-04-07 16:01:27.891633217 +0800
+@@ -0,0 +1,16 @@
++
++ccflags-y += -Idrivers/xenomai/analogy
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY) += xeno_analogy.o testing/ intel/ national_instruments/ sensoray/
++
++xeno_analogy-y := \
++	buffer.o \
++	command.o \
++	device.o \
++	driver.o \
++	driver_facilities.o \
++	instruction.o \
++	rtdm_helpers.o \
++	subdevice.o \
++	transfer.o \
++	rtdm_interface.o
+--- linux/drivers/xenomai/analogy/command.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/command.c	2021-04-07 16:01:27.886633224 +0800
+@@ -0,0 +1,392 @@
++/*
++ * Analogy for Linux, command related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/mman.h>
++#include <asm/io.h>
++#include <asm/errno.h>
++#include <rtdm/analogy/device.h>
++
++/* --- Command descriptor management functions --- */
++int a4l_fill_cmddesc(struct a4l_device_context *cxt, struct a4l_cmd_desc *desc,
++		     unsigned int **chan_descs, void *arg)
++{
++	unsigned int *tmpchans = NULL;
++	int ret = 0;
++
++	ret = rtdm_safe_copy_from_user(rtdm_private_to_fd(cxt),
++				       desc, arg, sizeof(struct a4l_cmd_desc));
++	if (ret != 0)
++		goto out_cmddesc;
++
++
++	if (desc->nb_chan == 0) {
++		ret = -EINVAL;
++		goto out_cmddesc;
++	}
++
++	tmpchans = rtdm_malloc(desc->nb_chan * sizeof(unsigned int));
++	if (tmpchans == NULL) {
++		ret = -ENOMEM;
++		goto out_cmddesc;
++	}
++
++	ret = rtdm_safe_copy_from_user(rtdm_private_to_fd(cxt),
++				       tmpchans,
++				       desc->chan_descs,
++				       desc->nb_chan * sizeof(unsigned int));
++	if (ret != 0) {
++		__a4l_err("%s invalid arguments \n", __FUNCTION__);
++		goto out_cmddesc;
++	}
++
++	*chan_descs = desc->chan_descs;
++	desc->chan_descs = tmpchans;
++
++	__a4l_dbg(1, core_dbg, "desc dump: \n");
++	__a4l_dbg(1, core_dbg, "\t->idx_subd=%u\n", desc->idx_subd);
++	__a4l_dbg(1, core_dbg, "\t->flags=%lu\n", desc->flags);
++	__a4l_dbg(1, core_dbg, "\t->nb_chan=%u\n", desc->nb_chan);
++	__a4l_dbg(1, core_dbg, "\t->chan_descs=0x%x\n", *desc->chan_descs);
++	__a4l_dbg(1, core_dbg, "\t->data_len=%u\n", desc->data_len);
++	__a4l_dbg(1, core_dbg, "\t->pdata=0x%p\n", desc->data);
++
++	out_cmddesc:
++
++	if (ret != 0) {
++		__a4l_err("a4l_fill_cmddesc: %d \n", ret);
++		if (tmpchans != NULL)
++			rtdm_free(tmpchans);
++		desc->chan_descs = NULL;
++	}
++
++	return ret;
++}
++
++void a4l_free_cmddesc(struct a4l_cmd_desc * desc)
++{
++	if (desc->chan_descs != NULL)
++		rtdm_free(desc->chan_descs);
++}
++
++int a4l_check_cmddesc(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_subdevice *subd;
++
++	if (desc->idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_check_cmddesc: "
++			  "subdevice index out of range (idx=%u)\n",
++			  desc->idx_subd);
++		return -EINVAL;
++	}
++
++	subd = dev->transfer.subds[desc->idx_subd];
++
++	if ((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED) {
++		__a4l_err("a4l_check_cmddesc: "
++			  "subdevice type incoherent\n");
++		return -EIO;
++	}
++
++	if (!(subd->flags & A4L_SUBD_CMD)) {
++		__a4l_err("a4l_check_cmddesc: operation not supported, "
++			  "synchronous only subdevice\n");
++		return -EIO;
++	}
++
++	if (test_bit(A4L_SUBD_BUSY, &subd->status)) {
++		__a4l_err("a4l_check_cmddesc: subdevice busy\n");
++		return -EBUSY;
++	}
++
++	return a4l_check_chanlist(dev->transfer.subds[desc->idx_subd],
++				  desc->nb_chan, desc->chan_descs);
++}
++
++/* --- Command checking functions --- */
++
++int a4l_check_generic_cmdcnt(struct a4l_cmd_desc * desc)
++{
++	unsigned int tmp1, tmp2;
++
++	/* Makes sure trigger sources are trivially valid */
++	tmp1 =
++	desc->start_src & ~(TRIG_NOW | TRIG_INT | TRIG_EXT | TRIG_FOLLOW);
++	tmp2 = desc->start_src & (TRIG_NOW | TRIG_INT | TRIG_EXT | TRIG_FOLLOW);
++	if (tmp1 != 0 || tmp2 == 0) {
++		__a4l_err("a4l_check_cmddesc: start_src, weird trigger\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->scan_begin_src & ~(TRIG_TIMER | TRIG_EXT | TRIG_FOLLOW);
++	tmp2 = desc->scan_begin_src & (TRIG_TIMER | TRIG_EXT | TRIG_FOLLOW);
++	if (tmp1 != 0 || tmp2 == 0) {
++		__a4l_err("a4l_check_cmddesc: scan_begin_src, , weird trigger\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->convert_src & ~(TRIG_TIMER | TRIG_EXT | TRIG_NOW);
++	tmp2 = desc->convert_src & (TRIG_TIMER | TRIG_EXT | TRIG_NOW);
++	if (tmp1 != 0 || tmp2 == 0) {
++		__a4l_err("a4l_check_cmddesc: convert_src, weird trigger\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->scan_end_src & ~(TRIG_COUNT);
++	if (tmp1 != 0) {
++		__a4l_err("a4l_check_cmddesc: scan_end_src, weird trigger\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->stop_src & ~(TRIG_COUNT | TRIG_NONE);
++	tmp2 = desc->stop_src & (TRIG_COUNT | TRIG_NONE);
++	if (tmp1 != 0 || tmp2 == 0) {
++		__a4l_err("a4l_check_cmddesc: stop_src, weird trigger\n");
++		return -EINVAL;
++	}
++
++	/* Makes sure trigger sources are unique */
++	if (desc->start_src != TRIG_NOW &&
++	    desc->start_src != TRIG_INT &&
++	    desc->start_src != TRIG_EXT && desc->start_src != TRIG_FOLLOW) {
++		__a4l_err("a4l_check_cmddesc: start_src, "
++			  "only one trigger should be set\n");
++		return -EINVAL;
++	}
++
++	if (desc->scan_begin_src != TRIG_TIMER &&
++	    desc->scan_begin_src != TRIG_EXT &&
++	    desc->scan_begin_src != TRIG_FOLLOW) {
++		__a4l_err("a4l_check_cmddesc: scan_begin_src, "
++			  "only one trigger should be set\n");
++		return -EINVAL;
++	}
++
++	if (desc->convert_src != TRIG_TIMER &&
++	    desc->convert_src != TRIG_EXT && desc->convert_src != TRIG_NOW) {
++		__a4l_err("a4l_check_cmddesc: convert_src, "
++			  "only one trigger should be set\n");
++		return -EINVAL;
++	}
++
++	if (desc->stop_src != TRIG_COUNT && desc->stop_src != TRIG_NONE) {
++		__a4l_err("a4l_check_cmddesc: stop_src, "
++			  "only one trigger should be set\n");
++		return -EINVAL;
++	}
++
++	/* Makes sure arguments are trivially compatible */
++	tmp1 = desc->start_src & (TRIG_NOW | TRIG_FOLLOW | TRIG_INT);
++	tmp2 = desc->start_arg;
++	if (tmp1 != 0 && tmp2 != 0) {
++		__a4l_err("a4l_check_cmddesc: no start_arg expected\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->scan_begin_src & TRIG_FOLLOW;
++	tmp2 = desc->scan_begin_arg;
++	if (tmp1 != 0 && tmp2 != 0) {
++		__a4l_err("a4l_check_cmddesc: no scan_begin_arg expected\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->convert_src & TRIG_NOW;
++	tmp2 = desc->convert_arg;
++	if (tmp1 != 0 && tmp2 != 0) {
++		__a4l_err("a4l_check_cmddesc: no convert_arg expected\n");
++		return -EINVAL;
++	}
++
++	tmp1 = desc->stop_src & TRIG_NONE;
++	tmp2 = desc->stop_arg;
++	if (tmp1 != 0 && tmp2 != 0) {
++		__a4l_err("a4l_check_cmddesc: no stop_arg expected\n");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++int a4l_check_specific_cmdcnt(struct a4l_device_context * cxt, struct a4l_cmd_desc * desc)
++{
++	unsigned int tmp1, tmp2;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_cmd_desc *cmd_mask = dev->transfer.subds[desc->idx_subd]->cmd_mask;
++
++	if (cmd_mask == NULL)
++		return 0;
++
++	if (cmd_mask->start_src != 0) {
++		tmp1 = desc->start_src & ~(cmd_mask->start_src);
++		tmp2 = desc->start_src & (cmd_mask->start_src);
++		if (tmp1 != 0 || tmp2 == 0) {
++			__a4l_err("a4l_check_cmddesc: start_src, "
++				  "trigger unsupported\n");
++			return -EINVAL;
++		}
++	}
++
++	if (cmd_mask->scan_begin_src != 0) {
++		tmp1 = desc->scan_begin_src & ~(cmd_mask->scan_begin_src);
++		tmp2 = desc->scan_begin_src & (cmd_mask->scan_begin_src);
++		if (tmp1 != 0 || tmp2 == 0) {
++			__a4l_err("a4l_check_cmddesc: scan_begin_src, "
++				  "trigger unsupported\n");
++			return -EINVAL;
++		}
++	}
++
++	if (cmd_mask->convert_src != 0) {
++		tmp1 = desc->convert_src & ~(cmd_mask->convert_src);
++		tmp2 = desc->convert_src & (cmd_mask->convert_src);
++		if (tmp1 != 0 || tmp2 == 0) {
++			__a4l_err("a4l_check_cmddesc: convert_src, "
++				  "trigger unsupported\n");
++			return -EINVAL;
++		}
++	}
++
++	if (cmd_mask->scan_end_src != 0) {
++		tmp1 = desc->scan_end_src & ~(cmd_mask->scan_end_src);
++		if (tmp1 != 0) {
++			__a4l_err("a4l_check_cmddesc: scan_end_src, "
++				  "trigger unsupported\n");
++			return -EINVAL;
++		}
++	}
++
++	if (cmd_mask->stop_src != 0) {
++		tmp1 = desc->stop_src & ~(cmd_mask->stop_src);
++		tmp2 = desc->stop_src & (cmd_mask->stop_src);
++		if (tmp1 != 0 || tmp2 == 0) {
++			__a4l_err("a4l_check_cmddesc: stop_src, "
++				  "trigger unsupported\n");
++			return -EINVAL;
++		}
++	}
++
++	return 0;
++}
++
++/* --- IOCTL / FOPS function --- */
++
++int a4l_ioctl_cmd(struct a4l_device_context * ctx, void *arg)
++{
++	int ret = 0, simul_flag = 0;
++	struct a4l_cmd_desc *cmd_desc = NULL;
++	struct a4l_device *dev = a4l_get_dev(ctx);
++	unsigned int *chan_descs, *tmp;
++	struct a4l_subdevice *subd;
++
++	/* The command launching cannot be done in real-time because
++	   of some possible buffer allocations in the drivers */
++	if (rtdm_in_rt_context())
++		return -ENOSYS;
++
++	/* Basically check the device */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_cmd: cannot command "
++			  "an unattached device\n");
++		return -EINVAL;
++	}
++
++	/* Allocates the command */
++	cmd_desc = (struct a4l_cmd_desc *) rtdm_malloc(sizeof(struct a4l_cmd_desc));
++	if (cmd_desc == NULL)
++		return -ENOMEM;
++	memset(cmd_desc, 0, sizeof(struct a4l_cmd_desc));
++
++	/* Gets the command */
++	ret = a4l_fill_cmddesc(ctx, cmd_desc, &chan_descs, arg);
++	if (ret != 0)
++		goto out_ioctl_cmd;
++
++	/* Checks the command */
++	ret = a4l_check_cmddesc(ctx, cmd_desc);
++	if (ret != 0)
++		goto out_ioctl_cmd;
++
++	ret = a4l_check_generic_cmdcnt(cmd_desc);
++	if (ret != 0)
++		goto out_ioctl_cmd;
++
++	ret = a4l_check_specific_cmdcnt(ctx, cmd_desc);
++	if (ret != 0)
++		goto out_ioctl_cmd;
++
++	__a4l_dbg(1, core_dbg,"1st cmd checks passed\n");
++	subd = dev->transfer.subds[cmd_desc->idx_subd];
++
++	/* Tests the command with the cmdtest function */
++	if (cmd_desc->flags & A4L_CMD_SIMUL) {
++		simul_flag = 1;
++
++		if (!subd->do_cmdtest) {
++			__a4l_err("a4l_ioctl_cmd: driver's cmd_test NULL\n");
++			ret = -EINVAL;
++			goto out_ioctl_cmd;
++		}
++
++		ret = subd->do_cmdtest(subd, cmd_desc);
++		if (ret != 0) {
++			__a4l_err("a4l_ioctl_cmd: driver's cmd_test failed\n");
++			goto out_ioctl_cmd;
++		}
++		__a4l_dbg(1, core_dbg, "driver's cmd checks passed\n");
++		goto out_ioctl_cmd;
++	}
++
++
++	/* Gets the transfer system ready */
++	ret = a4l_setup_buffer(ctx, cmd_desc);
++	if (ret < 0)
++		goto out_ioctl_cmd;
++
++	/* Eventually launches the command */
++	ret = subd->do_cmd(subd, cmd_desc);
++
++	if (ret != 0) {
++		a4l_cancel_buffer(ctx);
++		goto out_ioctl_cmd;
++	}
++
++	out_ioctl_cmd:
++
++	if (simul_flag) {
++		/* copy the kernel based descriptor */
++		tmp = cmd_desc->chan_descs;
++		/* return the user based descriptor */
++		cmd_desc->chan_descs = chan_descs;
++		rtdm_safe_copy_to_user(rtdm_private_to_fd(ctx), arg, cmd_desc,
++				       sizeof(struct a4l_cmd_desc));
++		/* make sure we release the memory associated to the kernel */
++		cmd_desc->chan_descs = tmp;
++
++	}
++
++	if (ret != 0 || simul_flag == 1) {
++		a4l_free_cmddesc(cmd_desc);
++		rtdm_free(cmd_desc);
++	}
++
++	return ret;
++}
+--- linux/drivers/xenomai/analogy/subdevice.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/subdevice.c	2021-04-07 16:01:27.881633232 +0800
+@@ -0,0 +1,449 @@
++/*
++ * Analogy for Linux, subdevice, channel and range related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/mman.h>
++#include <asm/io.h>
++#include <asm/errno.h>
++#include <rtdm/analogy/device.h>
++
++/* --- Common ranges declarations --- */
++
++struct a4l_rngtab rng_bipolar10 = { 1, {
++		RANGE_V(-10, 10),
++	}};
++struct a4l_rngdesc a4l_range_bipolar10 = RNG_GLOBAL(rng_bipolar10);
++
++struct a4l_rngtab rng_bipolar5 = { 1, {
++		RANGE_V(-5, 5),
++	}};
++struct a4l_rngdesc a4l_range_bipolar5 = RNG_GLOBAL(rng_bipolar5);
++
++struct a4l_rngtab rng_unipolar10 = { 1, {
++		RANGE_V(0, 10),
++	}};
++struct a4l_rngdesc a4l_range_unipolar10 = RNG_GLOBAL(rng_unipolar10);
++
++struct a4l_rngtab rng_unipolar5 = { 1, {
++		RANGE_V(0, 5),
++	}};
++struct a4l_rngdesc a4l_range_unipolar5 = RNG_GLOBAL(rng_unipolar5);
++
++struct a4l_rngtab rng_unknown = { 1, {
++		RANGE(0, 1),
++	}};
++struct a4l_rngdesc a4l_range_unknown = RNG_GLOBAL(rng_unknown);
++
++struct a4l_rngtab rng_fake = { 0, {
++		RANGE(0, 0),
++	}};
++struct a4l_rngdesc a4l_range_fake = RNG_GLOBAL(rng_fake);
++
++/* --- Basic channel / range management functions --- */
++
++struct a4l_channel *a4l_get_chfeat(struct a4l_subdevice *sb, int idx)
++{
++	int i = (sb->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? idx : 0;
++	return &(sb->chan_desc->chans[i]);
++}
++
++struct a4l_range *a4l_get_rngfeat(struct a4l_subdevice *sb, int chidx, int rngidx)
++{
++	int i = (sb->rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? chidx : 0;
++	return &(sb->rng_desc->rngtabs[i]->rngs[rngidx]);
++}
++
++int a4l_check_chanlist(struct a4l_subdevice *subd,
++		       unsigned char nb_chan, unsigned int *chans)
++{
++	int i, j;
++
++	if (nb_chan > subd->chan_desc->length)
++		return -EINVAL;
++
++	for (i = 0; i < nb_chan; i++) {
++		j = (subd->chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? i : 0;
++
++		if (CR_CHAN(chans[i]) >= subd->chan_desc->length) {
++			__a4l_err("a4l_check_chanlist: "
++				  "chan idx out_of range (%u>=%lu)\n",
++				  CR_CHAN(chans[i]), subd->chan_desc->length);
++			return -EINVAL;
++		}
++		if (CR_AREF(chans[i]) != 0 &&
++		    (CR_AREF(chans[i]) & subd->chan_desc->chans[j].flags) == 0)
++		{
++			__a4l_err("a4l_check_chanlist: "
++				  "bad channel type\n");
++			return -EINVAL;
++		}
++	}
++
++	if (subd->rng_desc == NULL)
++		return 0;
++
++	for (i = 0; i < nb_chan; i++) {
++		j = (subd->rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? i : 0;
++
++		if (CR_RNG(chans[i]) > subd->rng_desc->rngtabs[j]->length) {
++			__a4l_err("a4l_check_chanlist: "
++				  "rng idx out_of range (%u>=%u)\n",
++				  CR_RNG(chans[i]),
++				  subd->rng_desc->rngtabs[j]->length);
++			return -EINVAL;
++		}
++	}
++
++	return 0;
++}
++
++/* --- Upper layer functions --- */
++
++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv,
++			    void (*setup)(struct a4l_subdevice *))
++{
++	struct a4l_subdevice *subd;
++
++	subd = rtdm_malloc(sizeof(struct a4l_subdevice) + sizeof_priv);
++
++	if(subd != NULL) {
++		memset(subd, 0 , sizeof(struct a4l_subdevice) + sizeof_priv);
++		if(setup != NULL)
++			setup(subd);
++	}
++
++	return subd;
++}
++
++int a4l_add_subd(struct a4l_device * dev, struct a4l_subdevice * subd)
++{
++	struct list_head *this;
++	int i = 0;
++
++	/* Basic checking */
++	if (dev == NULL || subd == NULL)
++		return -EINVAL;
++
++	list_add_tail(&subd->list, &dev->subdvsq);
++
++	subd->dev = dev;
++
++	list_for_each(this, &dev->subdvsq) {
++		i++;
++	}
++
++	subd->idx = --i;
++
++	return i;
++}
++
++struct a4l_subdevice *a4l_get_subd(struct a4l_device *dev, int idx)
++{
++	int i = 0;
++	struct a4l_subdevice *subd = NULL;
++	struct list_head *this;
++
++	/* This function is not optimized as we do not go through the
++	   transfer structure */
++
++	list_for_each(this, &dev->subdvsq) {
++		if(idx == i++)
++			subd = list_entry(this, struct a4l_subdevice, list);
++	}
++
++	return subd;
++}
++
++/* --- IOCTL / FOPS functions --- */
++
++int a4l_ioctl_subdinfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	int i, ret = 0;
++	a4l_sbinfo_t *subd_info;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_subdinfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	subd_info = rtdm_malloc(dev->transfer.nb_subd *
++				sizeof(a4l_sbinfo_t));
++	if (subd_info == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < dev->transfer.nb_subd; i++) {
++		subd_info[i].flags = dev->transfer.subds[i]->flags;
++		subd_info[i].status = dev->transfer.subds[i]->status;
++		subd_info[i].nb_chan =
++			(dev->transfer.subds[i]->chan_desc != NULL) ?
++			dev->transfer.subds[i]->chan_desc->length : 0;
++	}
++
++	if (rtdm_safe_copy_to_user(fd,
++				   arg,
++				   subd_info, dev->transfer.nb_subd *
++				   sizeof(a4l_sbinfo_t)) != 0)
++		ret = -EFAULT;
++
++	rtdm_free(subd_info);
++
++	return ret;
++
++}
++
++int a4l_ioctl_nbchaninfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	a4l_chinfo_arg_t inarg;
++
++	/* Basic checking */
++	if (!dev->flags & A4L_DEV_ATTACHED_NR) {
++		__a4l_err("a4l_ioctl_nbchaninfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &inarg, arg,
++				     sizeof(a4l_chinfo_arg_t)) != 0)
++		return -EFAULT;
++
++	if (inarg.idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_ioctl_nbchaninfo: subdevice index "
++			  "out of range\n");
++		return -EINVAL;
++	}
++
++	if(dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL)
++		inarg.info = (void *)0;
++	else
++		inarg.info = (void *)(unsigned long)
++			dev->transfer.subds[inarg.idx_subd]->chan_desc->length;
++
++	if (rtdm_safe_copy_to_user(fd,
++				   arg,
++				   &inarg, sizeof(a4l_chinfo_arg_t)) != 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++int a4l_ioctl_chaninfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int i, ret = 0;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	a4l_chinfo_t *chan_info;
++	a4l_chinfo_arg_t inarg;
++	struct a4l_channels_desc *chan_desc;
++	struct a4l_rngdesc *rng_desc;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_chaninfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &inarg, arg,
++				     sizeof(a4l_chinfo_arg_t)) != 0)
++		return -EFAULT;
++
++	if (inarg.idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_ioctl_chaninfo: bad subdevice index\n");
++		return -EINVAL;
++	}
++
++	chan_desc = dev->transfer.subds[inarg.idx_subd]->chan_desc;
++	rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc;
++
++	if (chan_desc == NULL) {
++		__a4l_err("a4l_ioctl_chaninfo: no channel descriptor "
++			  "for subdevice %d\n", inarg.idx_subd);
++		return -EINVAL;
++	}
++
++	if(rng_desc == NULL)
++		rng_desc = &a4l_range_fake;
++
++	chan_info = rtdm_malloc(chan_desc->length * sizeof(a4l_chinfo_t));
++	if (chan_info == NULL)
++		return -ENOMEM;
++
++	/* If the channel descriptor is global, the fields are filled
++	   with the same instance of channel descriptor */
++	for (i = 0; i < chan_desc->length; i++) {
++		int j =
++			(chan_desc->mode != A4L_CHAN_GLOBAL_CHANDESC) ? i : 0;
++		int k = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ? i : 0;
++
++		chan_info[i].chan_flags = chan_desc->chans[j].flags;
++		chan_info[i].nb_bits = chan_desc->chans[j].nb_bits;
++		chan_info[i].nb_rng = rng_desc->rngtabs[k]->length;
++
++		if (chan_desc->mode == A4L_CHAN_GLOBAL_CHANDESC)
++			chan_info[i].chan_flags |= A4L_CHAN_GLOBAL;
++	}
++
++	if (rtdm_safe_copy_to_user(fd,
++				   inarg.info,
++				   chan_info,
++				   chan_desc->length *
++				   sizeof(a4l_chinfo_t)) != 0)
++		return -EFAULT;
++
++	rtdm_free(chan_info);
++
++	return ret;
++}
++
++int a4l_ioctl_nbrnginfo(struct a4l_device_context * cxt, void *arg)
++{
++	int i;
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	a4l_rnginfo_arg_t inarg;
++	struct a4l_rngdesc *rng_desc;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_nbrnginfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &inarg,
++				     arg, sizeof(a4l_rnginfo_arg_t)) != 0)
++		return -EFAULT;
++
++	if (inarg.idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_ioctl_nbrnginfo: bad subdevice index\n");
++		return -EINVAL;
++	}
++
++	if (dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL) {
++		__a4l_err("a4l_ioctl_nbrnginfo: no channel descriptor "
++			  "for subdevice %d\n", inarg.idx_subd);
++		return -EINVAL;
++	}
++
++	if (inarg.idx_chan >=
++	    dev->transfer.subds[inarg.idx_subd]->chan_desc->length) {
++		__a4l_err("a4l_ioctl_nbrnginfo: bad channel index\n");
++		return -EINVAL;
++	}
++
++	rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc;
++	if (rng_desc != NULL) {
++		i = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ?
++			inarg.idx_chan : 0;
++		inarg.info = (void *)(unsigned long)
++			rng_desc->rngtabs[i]->length;
++	} else
++		inarg.info = (void *)0;
++
++
++	if (rtdm_safe_copy_to_user(fd,
++				   arg,
++				   &inarg, sizeof(a4l_rnginfo_arg_t)) != 0)
++		return -EFAULT;
++
++	return 0;
++}
++
++int a4l_ioctl_rnginfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int i, ret = 0;
++	unsigned int tmp;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_rngdesc *rng_desc;
++	a4l_rnginfo_t *rng_info;
++	a4l_rnginfo_arg_t inarg;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_rnginfo: unattached device\n");
++		return -EINVAL;
++	}
++
++	if (rtdm_safe_copy_from_user(fd,
++				     &inarg,
++				     arg, sizeof(a4l_rnginfo_arg_t)) != 0)
++		return -EFAULT;
++
++	if (inarg.idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_ioctl_rnginfo: bad subdevice index\n");
++		return -EINVAL;
++	}
++
++	if (dev->transfer.subds[inarg.idx_subd]->chan_desc == NULL) {
++		__a4l_err("a4l_ioctl_rnginfo: no channel descriptor "
++			  "for subdevice %d\n", inarg.idx_subd);
++		return -EINVAL;
++	}
++
++	if (inarg.idx_chan >=
++	    dev->transfer.subds[inarg.idx_subd]->chan_desc->length) {
++		__a4l_err("a4l_ioctl_rnginfo: bad channel index\n");
++		return -EINVAL;
++	}
++
++	rng_desc = dev->transfer.subds[inarg.idx_subd]->rng_desc;
++	if (rng_desc == NULL) {
++		__a4l_err("a4l_ioctl_rnginfo: no range descriptor "
++			  "for channel %d\n", inarg.idx_chan);
++		return -EINVAL;
++	}
++
++	/* If the range descriptor is global,
++	   we take the first instance */
++	tmp = (rng_desc->mode != A4L_RNG_GLOBAL_RNGDESC) ?
++		inarg.idx_chan : 0;
++
++	rng_info = rtdm_malloc(rng_desc->rngtabs[tmp]->length *
++			       sizeof(a4l_rnginfo_t));
++	if (rng_info == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < rng_desc->rngtabs[tmp]->length; i++) {
++		rng_info[i].min = rng_desc->rngtabs[tmp]->rngs[i].min;
++		rng_info[i].max = rng_desc->rngtabs[tmp]->rngs[i].max;
++		rng_info[i].flags = rng_desc->rngtabs[tmp]->rngs[i].flags;
++
++		if (rng_desc->mode == A4L_RNG_GLOBAL_RNGDESC)
++			rng_info[i].flags |= A4L_RNG_GLOBAL;
++	}
++
++	if (rtdm_safe_copy_to_user(fd,
++				   inarg.info,
++				   rng_info,
++				   rng_desc->rngtabs[tmp]->length *
++				   sizeof(a4l_rnginfo_t)) != 0)
++		return -EFAULT;
++
++	rtdm_free(rng_info);
++
++	return ret;
++}
+--- linux/drivers/xenomai/analogy/sensoray/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/sensoray/Makefile	2021-04-07 16:01:27.877633237 +0800
+@@ -0,0 +1,6 @@
++
++ccflags-y += -Idrivers/xenomai/analogy
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_S526) += analogy_s526.o
++
++analogy_s526-y := s526.o
+--- linux/drivers/xenomai/analogy/sensoray/s526.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/sensoray/s526.c	2021-04-07 16:01:27.872633245 +0800
+@@ -0,0 +1,756 @@
++/*
++ * Analogy driver for Sensoray Model 526 board
++ *
++ * Copyright (C) 2009 Simon Boulay <simon.boulay@gmail.com>
++ *
++ * Derived from comedi:
++ * Copyright (C) 2000 David A. Schleef <ds@schleef.org>
++ *               2006 Everett Wang <everett.wang@everteq.com>
++ *               2009 Ian Abbott <abbotti@mev.co.uk>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++/*
++ * Original code comes from comedi linux-next staging driver (2009.12.20)
++ * Board documentation: http://www.sensoray.com/products/526data.htm
++ * Everything should work as in comedi:
++ *   - Encoder works
++ *   - Analog input works
++ *   - Analog output works
++ *   - PWM output works
++ *   - Commands are not supported yet.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/io.h>
++#include <asm/byteorder.h>
++#include <rtdm/analogy/device.h>
++
++/* Board description */
++#define S526_GPCT_CHANS	4
++#define S526_GPCT_BITS	24
++#define S526_AI_CHANS	10	/* 8 regular differential inputs
++				 * channel 8 is "reference 0" (+10V)
++				 * channel 9 is "reference 1" (0V) */
++#define S526_AI_BITS	16
++#define S526_AI_TIMEOUT 100
++#define S526_AO_CHANS	4
++#define S526_AO_BITS	16
++#define S526_DIO_CHANS	8
++#define S526_DIO_BITS	1
++
++/* Ports */
++#define S526_IOSIZE		0x40  /* 64 bytes */
++#define S526_DEFAULT_ADDRESS	0x2C0 /* Manufacturing default */
++
++/* Registers */
++#define REG_TCR 0x00
++#define REG_WDC 0x02
++#define REG_DAC 0x04
++#define REG_ADC 0x06
++#define REG_ADD 0x08
++#define REG_DIO 0x0A
++#define REG_IER 0x0C
++#define REG_ISR 0x0E
++#define REG_MSC 0x10
++#define REG_C0L 0x12
++#define REG_C0H 0x14
++#define REG_C0M 0x16
++#define REG_C0C 0x18
++#define REG_C1L 0x1A
++#define REG_C1H 0x1C
++#define REG_C1M 0x1E
++#define REG_C1C 0x20
++#define REG_C2L 0x22
++#define REG_C2H 0x24
++#define REG_C2M 0x26
++#define REG_C2C 0x28
++#define REG_C3L 0x2A
++#define REG_C3H 0x2C
++#define REG_C3M 0x2E
++#define REG_C3C 0x30
++#define REG_EED 0x32
++#define REG_EEC 0x34
++
++#define ISR_ADC_DONE 0x4
++
++struct counter_mode_register_t {
++#if defined (__LITTLE_ENDIAN_BITFIELD)
++	unsigned short coutSource:1;
++	unsigned short coutPolarity:1;
++	unsigned short autoLoadResetRcap:3;
++	unsigned short hwCtEnableSource:2;
++	unsigned short ctEnableCtrl:2;
++	unsigned short clockSource:2;
++	unsigned short countDir:1;
++	unsigned short countDirCtrl:1;
++	unsigned short outputRegLatchCtrl:1;
++	unsigned short preloadRegSel:1;
++	unsigned short reserved:1;
++#elif defined(__BIG_ENDIAN_BITFIELD)
++	unsigned short reserved:1;
++	unsigned short preloadRegSel:1;
++	unsigned short outputRegLatchCtrl:1;
++	unsigned short countDirCtrl:1;
++	unsigned short countDir:1;
++	unsigned short clockSource:2;
++	unsigned short ctEnableCtrl:2;
++	unsigned short hwCtEnableSource:2;
++	unsigned short autoLoadResetRcap:3;
++	unsigned short coutPolarity:1;
++	unsigned short coutSource:1;
++#else
++#error Unknown bit field order
++#endif
++};
++
++union cmReg {
++	struct counter_mode_register_t reg;
++	unsigned short value;
++};
++
++/* Application Classes for GPCT Subdevices */
++enum S526_GPCT_APP_CLASS {
++	CountingAndTimeMeasurement,
++	SinglePulseGeneration,
++	PulseTrainGeneration,
++	PositionMeasurement,
++	Miscellaneous
++};
++
++/* GPCT subdevices configuration */
++#define MAX_GPCT_CONFIG_DATA 6
++struct s526GPCTConfig {
++	enum S526_GPCT_APP_CLASS app;
++	int data[MAX_GPCT_CONFIG_DATA];
++};
++
++typedef struct s526_priv {
++	unsigned long io_base;
++} s526_priv_t;
++
++struct s526_subd_gpct_priv {
++	struct s526GPCTConfig config[4];
++};
++
++struct s526_subd_ai_priv {
++	uint16_t config;
++};
++
++struct s526_subd_ao_priv {
++	uint16_t readback[2];
++};
++
++struct s526_subd_dio_priv {
++	int io_bits;
++	unsigned int state;
++};
++
++#define devpriv ((s526_priv_t*)(dev->priv))
++
++#define ADDR_REG(reg) (devpriv->io_base + (reg))
++#define ADDR_CHAN_REG(reg, chan) (devpriv->io_base + (reg) + (chan) * 8)
++
++
++static int s526_gpct_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_gpct_priv *subdpriv =
++	    (struct s526_subd_gpct_priv *)subd->priv;
++	unsigned int *data = (unsigned int *)insn->data;
++	int subdev_channel = CR_CHAN(insn->chan_desc);
++	int i;
++	short value;
++	union cmReg cmReg;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"s526_gpct_insn_config: Configuring Channel %d\n",
++		subdev_channel);
++
++	for (i = 0; i < MAX_GPCT_CONFIG_DATA; i++) {
++		subdpriv->config[subdev_channel].data[i] = data[i];
++		a4l_dbg(1, drv_dbg, dev, "data[%d]=%x\n", i, data[i]);
++	}
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_GPCT_QUADRATURE_ENCODER:
++		/*
++		 * data[0]: Application Type
++		 * data[1]: Counter Mode Register Value
++		 * data[2]: Pre-load Register Value
++		 * data[3]: Conter Control Register
++		 */
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring Encoder\n");
++		subdpriv->config[subdev_channel].app = PositionMeasurement;
++
++		/* Set Counter Mode Register */
++		cmReg.value = data[1] & 0xFFFF;
++
++		a4l_dbg(1, drv_dbg, dev, "Counter Mode register=%x\n", cmReg.value);
++		outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel));
++
++		/* Reset the counter if it is software preload */
++		if (cmReg.reg.autoLoadResetRcap == 0) {
++			outw(0x8000, ADDR_CHAN_REG(REG_C0C, subdev_channel)); /* Reset the counter */
++			/* outw(0x4000, ADDR_CHAN_REG(REG_C0C, subdev_channel));	/\* Load the counter from PR0 *\/ */
++		}
++		break;
++
++	case A4L_INSN_CONFIG_GPCT_SINGLE_PULSE_GENERATOR:
++		/*
++		 * data[0]: Application Type
++		 * data[1]: Counter Mode Register Value
++		 * data[2]: Pre-load Register 0 Value
++		 * data[3]: Pre-load Register 1 Value
++		 * data[4]: Conter Control Register
++		 */
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring SPG\n");
++		subdpriv->config[subdev_channel].app = SinglePulseGeneration;
++
++		/* Set Counter Mode Register */
++		cmReg.value = (short)(data[1] & 0xFFFF);
++		cmReg.reg.preloadRegSel = 0; /* PR0 */
++		outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel));
++
++		/* Load the pre-load register 0 high word */
++		value = (short)((data[2] >> 16) & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel));
++
++		/* Load the pre-load register 0 low word */
++		value = (short)(data[2] & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel));
++
++		/* Set Counter Mode Register */
++		cmReg.value = (short)(data[1] & 0xFFFF);
++		cmReg.reg.preloadRegSel = 1; /* PR1 */
++		outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel));
++
++		/* Load the pre-load register 1 high word */
++		value = (short)((data[3] >> 16) & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel));
++
++		/* Load the pre-load register 1 low word */
++		value = (short)(data[3] & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel));
++
++		/* Write the Counter Control Register */
++		if (data[4] != 0) {
++			value = (short)(data[4] & 0xFFFF);
++			outw(value, ADDR_CHAN_REG(REG_C0C, subdev_channel));
++		}
++		break;
++
++	case A4L_INSN_CONFIG_GPCT_PULSE_TRAIN_GENERATOR:
++		/*
++		 * data[0]: Application Type
++		 * data[1]: Counter Mode Register Value
++		 * data[2]: Pre-load Register 0 Value
++		 * data[3]: Pre-load Register 1 Value
++		 * data[4]: Conter Control Register
++		 */
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_insn_config: Configuring PTG\n");
++		subdpriv->config[subdev_channel].app = PulseTrainGeneration;
++
++		/* Set Counter Mode Register */
++		cmReg.value = (short)(data[1] & 0xFFFF);
++		cmReg.reg.preloadRegSel = 0; /* PR0 */
++		outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel));
++
++		/* Load the pre-load register 0 high word */
++		value = (short)((data[2] >> 16) & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel));
++
++		/* Load the pre-load register 0 low word */
++		value = (short)(data[2] & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel));
++
++		/* Set Counter Mode Register */
++		cmReg.value = (short)(data[1] & 0xFFFF);
++		cmReg.reg.preloadRegSel = 1; /* PR1 */
++		outw(cmReg.value, ADDR_CHAN_REG(REG_C0M, subdev_channel));
++
++		/* Load the pre-load register 1 high word */
++		value = (short)((data[3] >> 16) & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel));
++
++		/* Load the pre-load register 1 low word */
++		value = (short)(data[3] & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel));
++
++		/* Write the Counter Control Register */
++		if (data[4] != 0) {
++			value = (short)(data[4] & 0xFFFF);
++			outw(value, ADDR_CHAN_REG(REG_C0C, subdev_channel));
++		}
++		break;
++
++	default:
++		a4l_err(dev, "s526_gpct_insn_config: unsupported GPCT_insn_config\n");
++		return -EINVAL;
++		break;
++	}
++
++	return 0;
++}
++
++static int s526_gpct_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint32_t *data = (uint32_t *)insn->data;
++	int counter_channel = CR_CHAN(insn->chan_desc);
++	unsigned short datalow;
++	unsigned short datahigh;
++	int i;
++
++	if (insn->data_size <= 0) {
++		a4l_err(dev, "s526_gpct_rinsn: data size should be > 0\n");
++		return -EINVAL;
++	}
++
++	for (i = 0; i < insn->data_size / sizeof(uint32_t); i++) {
++		datalow = inw(ADDR_CHAN_REG(REG_C0L, counter_channel));
++		datahigh = inw(ADDR_CHAN_REG(REG_C0H, counter_channel));
++		data[i] = (int)(datahigh & 0x00FF);
++		data[i] = (data[i] << 16) | (datalow & 0xFFFF);
++		a4l_dbg(1, drv_dbg, dev,
++			"s526_gpct_rinsn GPCT[%d]: %x(0x%04x, 0x%04x)\n",
++			counter_channel, data[i], datahigh, datalow);
++	}
++
++	return 0;
++}
++
++static int s526_gpct_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_gpct_priv *subdpriv =
++	    (struct s526_subd_gpct_priv *)subd->priv;
++	uint32_t *data = (uint32_t *)insn->data;
++	int subdev_channel = CR_CHAN(insn->chan_desc);
++	short value;
++	union cmReg cmReg;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"s526_gpct_winsn: GPCT_INSN_WRITE on channel %d\n",
++		subdev_channel);
++
++	cmReg.value = inw(ADDR_CHAN_REG(REG_C0M, subdev_channel));
++	a4l_dbg(1, drv_dbg, dev,
++		"s526_gpct_winsn: Counter Mode Register: %x\n", cmReg.value);
++
++	/* Check what Application of Counter this channel is configured for */
++	switch (subdpriv->config[subdev_channel].app) {
++	case PositionMeasurement:
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: PM\n");
++		outw(0xFFFF & ((*data) >> 16), ADDR_CHAN_REG(REG_C0H,
++							     subdev_channel));
++		outw(0xFFFF & (*data),
++		     ADDR_CHAN_REG(REG_C0L, subdev_channel));
++		break;
++
++	case SinglePulseGeneration:
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: SPG\n");
++		outw(0xFFFF & ((*data) >> 16), ADDR_CHAN_REG(REG_C0H,
++							     subdev_channel));
++		outw(0xFFFF & (*data),
++		     ADDR_CHAN_REG(REG_C0L, subdev_channel));
++		break;
++
++	case PulseTrainGeneration:
++		/*
++		 * data[0] contains the PULSE_WIDTH
++		 * data[1] contains the PULSE_PERIOD
++		 * @pre PULSE_PERIOD > PULSE_WIDTH > 0
++		 * The above periods must be expressed as a multiple of the
++		 * pulse frequency on the selected source
++		 */
++		a4l_dbg(1, drv_dbg, dev, "s526_gpct_winsn: INSN_WRITE: PTG\n");
++		if ((data[1] > data[0]) && (data[0] > 0)) {
++			(subdpriv->config[subdev_channel]).data[0] = data[0];
++			(subdpriv->config[subdev_channel]).data[1] = data[1];
++		} else {
++			a4l_err(dev,
++				"s526_gpct_winsn: INSN_WRITE: PTG: Problem with Pulse params -> %du %du\n",
++				data[0], data[1]);
++			return -EINVAL;
++		}
++
++		value = (short)((*data >> 16) & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0H, subdev_channel));
++		value = (short)(*data & 0xFFFF);
++		outw(value, ADDR_CHAN_REG(REG_C0L, subdev_channel));
++		break;
++	default:		/* Impossible */
++		a4l_err(dev,
++			"s526_gpct_winsn: INSN_WRITE: Functionality %d not implemented yet\n",
++			 subdpriv->config[subdev_channel].app);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int s526_ai_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_ai_priv *subdpriv =
++	    (struct s526_subd_ai_priv *)subd->priv;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	if (insn->data_size < sizeof(unsigned int))
++		return -EINVAL;
++
++	/* data[0] : channels was set in relevant bits.
++	 * data[1] : delay
++	 */
++	/* COMMENT: abbotti 2008-07-24: I don't know why you'd want to
++	 * enable channels here.  The channel should be enabled in the
++	 * INSN_READ handler. */
++
++	/* Enable ADC interrupt */
++	outw(ISR_ADC_DONE, ADDR_REG(REG_IER));
++	a4l_dbg(1, drv_dbg, dev,
++		"s526_ai_insn_config: ADC current value: 0x%04x\n",
++		inw(ADDR_REG(REG_ADC)));
++
++	subdpriv->config = (data[0] & 0x3FF) << 5;
++	if (data[1] > 0)
++		subdpriv->config |= 0x8000; /* set the delay */
++
++	subdpriv->config |= 0x0001; /* ADC start bit. */
++
++	return 0;
++}
++
++static int s526_ai_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_ai_priv *subdpriv =
++	    (struct s526_subd_ai_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++	int n, i;
++	int chan = CR_CHAN(insn->chan_desc);
++	uint16_t value;
++	uint16_t d;
++	uint16_t status;
++
++	/* Set configured delay, enable channel for this channel only,
++	 * select "ADC read" channel, set "ADC start" bit. */
++	value = (subdpriv->config & 0x8000) |
++	    ((1 << 5) << chan) | (chan << 1) | 0x0001;
++
++	/* convert n samples */
++	for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) {
++		/* trigger conversion */
++		outw(value, ADDR_REG(REG_ADC));
++		a4l_dbg(1, drv_dbg, dev, "s526_ai_rinsn: Wrote 0x%04x to ADC\n",
++			value);
++
++		/* wait for conversion to end */
++		for (i = 0; i < S526_AI_TIMEOUT; i++) {
++			status = inw(ADDR_REG(REG_ISR));
++			if (status & ISR_ADC_DONE) {
++				outw(ISR_ADC_DONE, ADDR_REG(REG_ISR));
++				break;
++			}
++		}
++		if (i == S526_AI_TIMEOUT) {
++			a4l_warn(dev, "s526_ai_rinsn: ADC(0x%04x) timeout\n",
++				 inw(ADDR_REG(REG_ISR)));
++			return -ETIMEDOUT;
++		}
++
++		/* read data */
++		d = inw(ADDR_REG(REG_ADD));
++		a4l_dbg(1, drv_dbg, dev, "s526_ai_rinsn: AI[%d]=0x%04x\n",
++			n, (uint16_t)(d & 0xFFFF));
++
++		/* munge data */
++		data[n] = d ^ 0x8000;
++	}
++
++	return 0;
++}
++
++static int s526_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_ao_priv *subdpriv =
++	    (struct s526_subd_ao_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++	int i;
++	int chan = CR_CHAN(insn->chan_desc);
++	uint16_t val;
++
++	val = chan << 1;
++	outw(val, ADDR_REG(REG_DAC));
++
++	for (i = 0; i < insn->data_size / sizeof(uint16_t); i++) {
++		outw(data[i], ADDR_REG(REG_ADD)); /* write the data to preload register */
++		subdpriv->readback[chan] = data[i];
++		outw(val + 1, ADDR_REG(REG_DAC)); /* starts the D/A conversion. */
++	}
++
++	return 0;
++}
++
++static int s526_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct s526_subd_ao_priv *subdpriv =
++		(struct s526_subd_ao_priv *)subd->priv;
++	uint16_t *data = (uint16_t *)insn->data;
++	int i;
++	int chan = CR_CHAN(insn->chan_desc);
++
++	for (i = 0; i < insn->data_size / sizeof(uint16_t); i++)
++		data[i] = subdpriv->readback[chan];
++
++	return 0;
++}
++
++static int s526_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_dio_priv *subdpriv =
++	    (struct s526_subd_dio_priv *)subd->priv;
++	unsigned int *data = (unsigned int *)insn->data;
++	int chan = CR_CHAN(insn->chan_desc);
++	int group, mask;
++
++	group = chan >> 2;
++	mask = 0xF << (group << 2);
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		subdpriv->state |= 1 << (group + 10); /* bit 10/11 set the
++						       * group 1/2's mode */
++		subdpriv->io_bits |= mask;
++		break;
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		subdpriv->state &= ~(1 << (group + 10)); /* 1 is output, 0 is
++							  * input. */
++		subdpriv->io_bits &= ~mask;
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] =
++		    (subdpriv->io_bits & mask) ? A4L_OUTPUT : A4L_INPUT;
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	outw(subdpriv->state, ADDR_REG(REG_DIO));
++
++	return 0;
++}
++
++static int s526_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	struct s526_subd_dio_priv *subdpriv =
++		(struct s526_subd_dio_priv *)subd->priv;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	if (insn->data_size != 2 * sizeof(uint8_t))
++		return -EINVAL;
++
++	if (data[0]) {
++		subdpriv->state &= ~(data[0]);
++		subdpriv->state |= data[0] & data[1];
++
++		outw(subdpriv->state, ADDR_REG(REG_DIO));
++	}
++
++	data[1] = inw(ADDR_REG(REG_DIO)) & 0xFF; /* low 8 bits are the data */
++
++	return 0;
++}
++
++/* --- Channels descriptor --- */
++
++static struct a4l_channels_desc s526_chan_desc_gpct = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = S526_GPCT_CHANS,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, S526_GPCT_BITS},
++	},
++};
++
++static struct a4l_channels_desc s526_chan_desc_ai = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = S526_AI_CHANS,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, S526_AI_BITS},
++	},
++};
++
++static struct a4l_channels_desc s526_chan_desc_ao = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = S526_AO_CHANS,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, S526_AO_BITS},
++	},
++};
++
++static struct a4l_channels_desc s526_chan_desc_dio = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = S526_DIO_CHANS,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, S526_DIO_BITS},
++	},
++};
++
++/* --- Subdevice initialization functions --- */
++
++/* General purpose counter/timer (gpct) */
++static void setup_subd_gpct(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_COUNTER;
++	subd->chan_desc = &s526_chan_desc_gpct;
++	subd->insn_read = s526_gpct_rinsn;
++	subd->insn_config = s526_gpct_insn_config;
++	subd->insn_write = s526_gpct_winsn;
++}
++
++/* Analog input subdevice */
++static void setup_subd_ai(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_AI;
++	subd->chan_desc = &s526_chan_desc_ai;
++	subd->rng_desc = &a4l_range_bipolar10;
++	subd->insn_read = s526_ai_rinsn;
++	subd->insn_config = s526_ai_insn_config;
++}
++
++/* Analog output subdevice */
++static void setup_subd_ao(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_AO;
++	subd->chan_desc = &s526_chan_desc_ao;
++	subd->rng_desc = &a4l_range_bipolar10;
++	subd->insn_write = s526_ao_winsn;
++	subd->insn_read = s526_ao_rinsn;
++}
++
++/* Digital i/o subdevice */
++static void setup_subd_dio(struct a4l_subdevice *subd)
++{
++	subd->flags = A4L_SUBD_DIO;
++	subd->chan_desc = &s526_chan_desc_dio;
++	subd->rng_desc = &range_digital;
++	subd->insn_bits = s526_dio_insn_bits;
++	subd->insn_config = s526_dio_insn_config;
++}
++
++struct setup_subd {
++	void (*setup_func) (struct a4l_subdevice *);
++	int sizeof_priv;
++};
++
++static struct setup_subd setup_subds[4] = {
++	{
++		.setup_func = setup_subd_gpct,
++		.sizeof_priv = sizeof(struct s526_subd_gpct_priv),
++	},
++	{
++		.setup_func = setup_subd_ai,
++		.sizeof_priv = sizeof(struct s526_subd_ai_priv),
++	},
++	{
++		.setup_func = setup_subd_ao,
++		.sizeof_priv = sizeof(struct s526_subd_ao_priv),
++	},
++	{
++		.setup_func = setup_subd_dio,
++		.sizeof_priv = sizeof(struct s526_subd_dio_priv),
++	},
++};
++
++static int dev_s526_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	int io_base;
++	int i;
++	int err = 0;
++
++	if (arg->opts == NULL || arg->opts_size < sizeof(unsigned long)) {
++		a4l_warn(dev,
++			 "dev_s526_attach: no attach options specified; "
++			 "using defaults: addr=0x%x\n",
++			 S526_DEFAULT_ADDRESS);
++		io_base = S526_DEFAULT_ADDRESS;
++	} else {
++		io_base = ((unsigned long *)arg->opts)[0];
++	}
++
++	if (!request_region(io_base, S526_IOSIZE, "s526")) {
++		a4l_err(dev, "dev_s526_attach: I/O port conflict\n");
++		return -EIO;
++	}
++
++	/* Allocate the subdevice structures. */
++	for (i = 0; i < 4; i++) {
++		struct a4l_subdevice *subd = a4l_alloc_subd(setup_subds[i].sizeof_priv,
++						  setup_subds[i].setup_func);
++
++		if (subd == NULL)
++			return -ENOMEM;
++
++		err = a4l_add_subd(dev, subd);
++		if (err != i)
++			return err;
++	}
++
++	devpriv->io_base = io_base;
++
++	a4l_info(dev, " attached (address = 0x%x)\n", io_base);
++
++	return 0;
++}
++
++static int dev_s526_detach(struct a4l_device *dev)
++{
++	int err = 0;
++
++	if (devpriv->io_base != 0)
++		release_region(devpriv->io_base, S526_IOSIZE);
++
++	return err;
++}
++
++static struct a4l_driver drv_s526 = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_s526",
++	.driver_name = "s526",
++	.attach = dev_s526_attach,
++	.detach = dev_s526_detach,
++	.privdata_size = sizeof(s526_priv_t),
++};
++
++static int __init drv_s526_init(void)
++{
++	return a4l_register_drv(&drv_s526);
++}
++
++static void __exit drv_s526_cleanup(void)
++{
++	a4l_unregister_drv(&drv_s526);
++}
++
++MODULE_DESCRIPTION("Analogy driver for Sensoray Model 526 board.");
++MODULE_LICENSE("GPL");
++
++module_init(drv_s526_init);
++module_exit(drv_s526_cleanup);
+--- linux/drivers/xenomai/analogy/sensoray/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/sensoray/Kconfig	2021-04-07 16:01:27.867633252 +0800
+@@ -0,0 +1,5 @@
++
++config XENO_DRIVERS_ANALOGY_S526
++	depends on XENO_DRIVERS_ANALOGY
++	tristate "Sensoray Model 526 driver"
++	default n
+--- linux/drivers/xenomai/analogy/driver_facilities.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/driver_facilities.c	2021-04-07 16:01:27.863633257 +0800
+@@ -0,0 +1,608 @@
++/*
++ * Analogy for Linux, driver facilities
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <rtdm/analogy/device.h>
++
++/**
++ * @ingroup cobalt
++ * @defgroup analogy Analogy framework
++ * A RTDM-based interface for implementing DAQ card drivers
++ */
++
++/**
++ * @ingroup analogy
++ * @defgroup analogy_driver_facilities Driver API
++ * Programming interface provided to DAQ card drivers
++ */
++
++/* --- Driver section --- */
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_driver Driver management services
++ *
++ * Analogy driver registration / unregistration
++ *
++ * In a common Linux char driver, the developer has to register a fops
++ * structure filled with callbacks for read / write / mmap / ioctl
++ * operations.
++ *
++ * Analogy drivers do not have to implement read / write / mmap /
++ * ioctl functions, these procedures are implemented in the Analogy
++ * generic layer. Then, the transfers between user-space and
++ * kernel-space are already managed. Analogy drivers work with commands
++ * and instructions which are some kind of more dedicated read / write
++ * operations. And, instead of registering a fops structure, a Analogy
++ * driver must register some a4l_driver structure.
++ *
++ * @{
++ */
++
++/**
++ * @brief Register an Analogy driver
++ *
++ * After initialising a driver structure, the driver must be made
++ * available so as to be attached.
++ *
++ * @param[in] drv Driver descriptor structure
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_register_drv(struct a4l_driver * drv);
++EXPORT_SYMBOL_GPL(a4l_register_drv);
++
++/**
++ * @brief Unregister an Analogy driver
++ *
++ * This function removes the driver descriptor from the Analogy driver
++ * list. The driver cannot be attached anymore.
++ *
++ * @param[in] drv Driver descriptor structure
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_unregister_drv(struct a4l_driver * drv);
++EXPORT_SYMBOL_GPL(a4l_unregister_drv);
++
++/** @} */
++
++/* --- Subdevice section --- */
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_subdevice Subdevice management services
++ *
++ * Subdevice declaration in a driver
++ *
++ * The subdevice structure is the most complex one in the Analogy
++ * driver layer. It contains some description fields to fill and some
++ * callbacks to declare.
++ *
++ * The description fields are:
++ * - flags: to define the subdevice type and its capabilities;
++ * - chan_desc: to describe the channels which compose the subdevice;
++ * - rng_desc: to declare the usable ranges;
++ *
++ * The functions callbakcs are:
++ * - do_cmd() and do_cmdtest(): to performe asynchronous acquisitions
++ *   thanks to commands;
++ * - cancel(): to abort a working asynchronous acquisition;
++ * - munge(): to apply modifications on the data freshly acquired
++ *   during an asynchronous transfer. Warning: using this feature with
++ *   can significantly reduce the performances (if the munge operation
++ *   is complex, it will trigger high CPU charge and if the
++ *   acquisition device is DMA capable, many cache-misses and
++ *   cache-replaces will occur (the benefits of the DMA controller
++ *   will vanish);
++ * - trigger(): optionnaly to launch an asynchronous acquisition;
++ * - insn_read(), insn_write(), insn_bits(), insn_config(): to perform
++ *   synchronous acquisition operations.
++ *
++ * Once the subdevice is filled, it must be inserted into the driver
++ * structure thanks to a4l_add_subd().
++ *
++ * @{
++ */
++
++EXPORT_SYMBOL_GPL(a4l_range_bipolar10);
++EXPORT_SYMBOL_GPL(a4l_range_bipolar5);
++EXPORT_SYMBOL_GPL(a4l_range_unipolar10);
++EXPORT_SYMBOL_GPL(a4l_range_unipolar5);
++EXPORT_SYMBOL_GPL(a4l_range_unknown);
++EXPORT_SYMBOL_GPL(a4l_range_fake);
++
++/**
++ * @brief Allocate a subdevice descriptor
++ *
++ * This is a helper function so as to get a suitable subdevice
++ * descriptor
++ *
++ * @param[in] sizeof_priv Size of the subdevice's private data
++ * @param[in] setup Setup function to be called after the allocation
++ *
++ * @return the index with which the subdevice has been registered, in
++ * case of error a negative error code is returned.
++ *
++ */
++struct a4l_subdevice * a4l_alloc_subd(int sizeof_priv,
++				  void (*setup)(struct a4l_subdevice *));
++EXPORT_SYMBOL_GPL(a4l_alloc_subd);
++
++/**
++ * @brief Add a subdevice to the driver descriptor
++ *
++ * Once the driver descriptor structure is initialized, the function
++ * a4l_add_subd() must be used so to add some subdevices to the
++ * driver.
++ *
++ * @param[in] dev Device descriptor structure
++ * @param[in] subd Subdevice descriptor structure
++ *
++ * @return the index with which the subdevice has been registered, in
++ * case of error a negative error code is returned.
++ *
++ */
++int a4l_add_subd(struct a4l_device *dev, struct a4l_subdevice *subd);
++EXPORT_SYMBOL_GPL(a4l_add_subd);
++
++/**
++ * @brief Get a pointer to the subdevice descriptor referenced by its
++ * registration index
++ *
++ * This function is scarcely useful as all the drivers callbacks get
++ * the related subdevice descriptor as first argument.
++ * This function is not optimized, it goes through a linked list to
++ * get the proper pointer. So it must not be used in real-time context
++ * but at initialization / cleanup time (attach / detach).
++ *
++ * @param[in] dev Device descriptor structure
++ * @param[in] idx Subdevice index
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++struct a4l_subdevice *a4l_get_subd(struct a4l_device *dev, int idx);
++EXPORT_SYMBOL_GPL(a4l_get_subd);
++
++/** @} */
++
++/* --- Buffer section --- */
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_buffer Buffer management services
++ *
++ * Buffer management services
++ *
++ * The buffer is the key component of the Analogy infrastructure. It
++ * manages transfers between the user-space and the Analogy drivers
++ * thanks to generic functions which are described hereafter. Thanks
++ * to the buffer subsystem, the driver developer does not have to care
++ * about the way the user program retrieves or sends data.
++ *
++ * To write a classical char driver, the developer has to fill a fops
++ * structure so as to provide transfer operations to the user program
++ * (read, write, ioctl and mmap if need be).
++ *
++ * The Analogy infrastructure manages the whole interface with the
++ * userspace; the common read, write, mmap, etc. callbacks are generic
++ * Analogy functions. These functions manage (and perform, if need be)
++ * tranfers between the user-space and an asynchronous buffer thanks
++ * to lockless mechanisms.
++ *
++ * Consequently, the developer has to use the proper buffer functions
++ * in order to write / read acquired data into / from the asynchronous
++ * buffer.
++ *
++ * Here are listed the functions:
++ * - a4l_buf_prepare_(abs)put() and a4l_buf_commit_(abs)put()
++ * - a4l_buf_prepare_(abs)get() and a4l_buf_commit_(abs)get()
++ * - a4l_buf_put()
++ * - a4l_buf_get()
++ * - a4l_buf_evt().
++ *
++ * The functions count might seem high; however, the developer needs a
++ * few of them to write a driver. Having so many functions enables to
++ * manage any transfer cases:
++ * - If some DMA controller is available, there is no need to make the
++ *   driver copy the acquired data into the asynchronous buffer, the
++ *   DMA controller must directly trigger DMA shots into / from the
++ *   buffer. In that case, a function a4l_buf_prepare_*() must be used
++ *   so as to set up the DMA transfer and a function
++ *   a4l_buf_commit_*() has to be called to complete the transfer().
++ * - For DMA controllers which need to work with global counter (the
++ *   transfered data count since the beginning of the acquisition),
++ *   the functions a4l_buf_*_abs_*() have been made available.
++ * - If no DMA controller is available, the driver has to perform the
++ *   copy between the hardware component and the asynchronous
++ *   buffer. In such cases, the functions a4l_buf_get() and
++ *   a4l_buf_put() are useful.
++ *
++ * @{
++ */
++
++/**
++ * @brief Update the absolute count of data sent from the device to
++ * the buffer since the start of the acquisition and after the next
++ * DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(absg)et() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However, some
++ * pointers still have to be updated so as to monitor the tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count to be transferred during the next
++ * DMA shot plus the data count which have been copied since the start
++ * of the acquisition
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_prepare_absput(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_prepare_absput);
++
++/**
++ * @brief Set the absolute count of data which was sent from the
++ * device to the buffer since the start of the acquisition and until
++ * the last DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count transferred to the buffer during
++ * the last DMA shot plus the data count which have been sent /
++ * retrieved since the beginning of the acquisition
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_commit_absput(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_commit_absput);
++
++/**
++ * @brief Set the count of data which is to be sent to the buffer at
++ * the next DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count to be transferred
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_prepare_put(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_prepare_put);
++
++/**
++ * @brief Set the count of data sent to the buffer during the last
++ * completed DMA shots
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The amount of data transferred
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_commit_put(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_commit_put);
++
++/**
++ * @brief Copy some data from the device driver to the buffer
++ *
++ * The function a4l_buf_put() must copy data coming from some
++ * acquisition device to the Analogy buffer. This ring-buffer is an
++ * intermediate area between the device driver and the user-space
++ * program, which is supposed to recover the acquired data.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] bufdata The data buffer to copy into the Analogy buffer
++ * @param[in] count The amount of data to copy
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_put(struct a4l_subdevice *subd, void *bufdata, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_put);
++
++/**
++ * @brief Update the absolute count of data sent from the buffer to
++ * the device since the start of the acquisition and after the next
++ * DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(absg)et() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count to be transferred during the next
++ * DMA shot plus the data count which have been copied since the start
++ * of the acquisition
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_prepare_absget(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_prepare_absget);
++
++/**
++ * @brief Set the absolute count of data which was sent from the
++ * buffer to the device since the start of the acquisition and until
++ * the last DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count transferred to the device during
++ * the last DMA shot plus the data count which have been sent since
++ * the beginning of the acquisition
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_commit_absget(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_commit_absget);
++
++/**
++ * @brief Set the count of data which is to be sent from the buffer to
++ * the device at the next DMA shot
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The data count to be transferred
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_prepare_get(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_prepare_get);
++
++/**
++ * @brief Set the count of data sent from the buffer to the device
++ * during the last completed DMA shots
++ *
++ * The functions a4l_buf_prepare_(abs)put(),
++ * a4l_buf_commit_(abs)put(), a4l_buf_prepare_(abs)get() and
++ * a4l_buf_commit_(abs)get() have been made available for DMA
++ * transfers. In such situations, no data copy is needed between the
++ * Analogy buffer and the device as some DMA controller is in charge
++ * of performing data shots from / to the Analogy buffer. However,
++ * some pointers still have to be updated so as to monitor the
++ * tranfers.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] count The amount of data transferred
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_commit_get(struct a4l_subdevice *subd, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_commit_get);
++
++/**
++ * @brief Copy some data from the buffer to the device driver
++ *
++ * The function a4l_buf_get() must copy data coming from the Analogy
++ * buffer to some acquisition device. This ring-buffer is an
++ * intermediate area between the device driver and the user-space
++ * program, which is supposed to provide the data to send to the
++ * device.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] bufdata The data buffer to copy into the Analogy buffer
++ * @param[in] count The amount of data to copy
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_get(struct a4l_subdevice *subd, void *bufdata, unsigned long count);
++EXPORT_SYMBOL_GPL(a4l_buf_get);
++
++/**
++ * @brief Signal some event(s) to a user-space program involved in
++ * some read / write operation
++ *
++ * The function a4l_buf_evt() is useful in many cases:
++ * - To wake-up a process waiting for some data to read.
++ * - To wake-up a process waiting for some data to write.
++ * - To notify the user-process an error has occured during the
++ *   acquistion.
++ *
++ * @param[in] subd Subdevice descriptor structure
++ * @param[in] evts Some specific event to notify:
++ * - A4L_BUF_ERROR to indicate some error has occured during the
++ *   transfer
++ * - A4L_BUF_EOA to indicate the acquisition is complete (this
++ *   event is automatically set, it should not be used).
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_buf_evt(struct a4l_subdevice *subd, unsigned long evts);
++EXPORT_SYMBOL_GPL(a4l_buf_evt);
++
++/**
++ * @brief Get the data amount available in the Analogy buffer
++ *
++ * @param[in] subd Subdevice descriptor structure
++ *
++ * @return the amount of data available in the Analogy buffer.
++ *
++ */
++unsigned long a4l_buf_count(struct a4l_subdevice *subd);
++EXPORT_SYMBOL_GPL(a4l_buf_count);
++
++#ifdef DOXYGEN_CPP		/* Only used for doxygen doc generation */
++
++/**
++ * @brief Get the current Analogy command descriptor
++ *
++ * @param[in] subd Subdevice descriptor structure
++ *
++ * @return the command descriptor.
++ *
++ */
++struct a4l_cmd_desc *a4l_get_cmd(struct a4l_subdevice * subd);
++
++#endif /* DOXYGEN_CPP */
++
++/**
++ * @brief Get the channel index according to its type
++ *
++ * @param[in] subd Subdevice descriptor structure
++ *
++ * @return the channel index.
++ *
++ */
++int a4l_get_chan(struct a4l_subdevice *subd);
++EXPORT_SYMBOL_GPL(a4l_get_chan);
++
++/** @} */
++
++/* --- IRQ handling section --- */
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_irq Interrupt management services
++ * @{
++ */
++
++/**
++ * @brief Get the interrupt number in use for a specific device
++ *
++ * @param[in] dev Device descriptor structure
++ *
++ * @return the line number used or A4L_IRQ_UNUSED if no interrupt
++ * is registered.
++ *
++ */
++unsigned int a4l_get_irq(struct a4l_device * dev);
++EXPORT_SYMBOL_GPL(a4l_get_irq);
++
++/**
++ * @brief Register an interrupt handler for a specific device
++ *
++ * @param[in] dev Device descriptor structure
++ * @param[in] irq Line number of the addressed IRQ
++ * @param[in] handler Interrupt handler
++ * @param[in] flags Registration flags:
++ * - RTDM_IRQTYPE_SHARED: enable IRQ-sharing with other drivers
++ *   (Warning: real-time drivers and non-real-time drivers cannot
++ *   share an interrupt line).
++ * - RTDM_IRQTYPE_EDGE: mark IRQ as edge-triggered (Warning: this flag
++ *   is meaningless in RTDM-less context).
++ * - A4L_IRQ_DISABLED: keep IRQ disabled when calling the action
++ *   handler (Warning: this flag is ignored in RTDM-enabled
++ *   configuration).
++ * @param[in] cookie Pointer to be passed to the interrupt handler on
++ * invocation
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_request_irq(struct a4l_device * dev,
++		       unsigned int irq,
++		       a4l_irq_hdlr_t handler,
++		       unsigned long flags, void *cookie);
++EXPORT_SYMBOL_GPL(a4l_request_irq);
++
++/**
++ * @brief Release an interrupt handler for a specific device
++ *
++ * @param[in] dev Device descriptor structure
++ * @param[in] irq Line number of the addressed IRQ
++ *
++ * @return 0 on success, otherwise negative error code.
++ *
++ */
++int a4l_free_irq(struct a4l_device * dev, unsigned int irq);
++EXPORT_SYMBOL_GPL(a4l_free_irq);
++
++/** @} */
++
++/* --- Misc section --- */
++
++/**
++ * @ingroup analogy_driver_facilities
++ * @defgroup analogy_misc Misc services
++ * @{
++ */
++
++/**
++ * @brief Get the absolute time in nanoseconds
++ *
++ * @return the absolute time expressed in nanoseconds
++ *
++ */
++unsigned long long a4l_get_time(void);
++EXPORT_SYMBOL_GPL(a4l_get_time);
++
++/** @} */
+--- linux/drivers/xenomai/analogy/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/Kconfig	2021-04-07 16:01:27.858633265 +0800
+@@ -0,0 +1,56 @@
++menu "ANALOGY drivers"
++
++config XENO_DRIVERS_ANALOGY
++	tristate "ANALOGY interface"
++	help
++
++	ANALOGY is a framework aimed at supporting data acquisition
++	devices.
++
++config XENO_DRIVERS_ANALOGY_DEBUG
++       depends on XENO_DRIVERS_ANALOGY
++       bool "Analogy debug trace"
++       default n
++       help
++
++       Enable debugging traces in Analogy so as to monitor Analogy's
++       core and drivers behaviours.
++
++config XENO_DRIVERS_ANALOGY_DEBUG_FTRACE
++       depends on XENO_DRIVERS_ANALOGY_DEBUG
++       bool "Analogy debug ftrace"
++       default n
++       help
++
++       Route the Analogy a4l_dbg and a4l_info statements to /sys/kernel/debug/
++
++config XENO_DRIVERS_ANALOGY_DEBUG_LEVEL
++       depends on XENO_DRIVERS_ANALOGY_DEBUG
++       int "Analogy core debug level threshold"
++       default 0
++       help
++
++       Define the level above which the debugging traces will not be
++       displayed.
++
++       WARNING: this threshold is only applied on the Analogy
++       core. That will not affect the driver.
++
++config XENO_DRIVERS_ANALOGY_DRIVER_DEBUG_LEVEL
++       depends on XENO_DRIVERS_ANALOGY_DEBUG
++       int "Analogy driver debug level threshold"
++       default 0
++       help
++
++       Define the level above which the debugging traces will not be
++       displayed.
++
++       WARNING: this threshold is only applied on the Analogy
++       driver. That will not affect the core.
++
++source "drivers/xenomai/analogy/testing/Kconfig"
++source "drivers/xenomai/analogy/intel/Kconfig"
++source "drivers/xenomai/analogy/national_instruments/Kconfig"
++source "drivers/xenomai/analogy/sensoray/Kconfig"
++
++endmenu
+--- linux/drivers/xenomai/analogy/device.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/device.c	2021-04-07 16:01:27.853633272 +0800
+@@ -0,0 +1,459 @@
++/*
++ * Analogy for Linux, device related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <rtdm/analogy/device.h>
++
++#include "proc.h"
++
++static struct a4l_device a4l_devs[A4L_NB_DEVICES];
++
++/* --- Device tab management functions --- */
++
++void a4l_init_devs(void)
++{
++	int i;
++	memset(a4l_devs, 0, A4L_NB_DEVICES * sizeof(struct a4l_device));
++	for (i = 0; i < A4L_NB_DEVICES; i++) {
++		rtdm_lock_init(&a4l_devs[i].lock);
++		a4l_devs[i].transfer.irq_desc.irq = A4L_IRQ_UNUSED;
++	}
++}
++
++int a4l_check_cleanup_devs(void)
++{
++	int i, ret = 0;
++
++	for (i = 0; i < A4L_NB_DEVICES && ret == 0; i++)
++		if (test_bit(A4L_DEV_ATTACHED_NR, &a4l_devs[i].flags))
++			ret = -EBUSY;
++
++	return ret;
++}
++
++void a4l_set_dev(struct a4l_device_context *cxt)
++{
++	/* Retrieve the minor index */
++	const int minor = a4l_get_minor(cxt);
++	/* Fill the dev fields accordingly */
++	cxt->dev = &(a4l_devs[minor]);
++}
++
++/* --- Device tab proc section --- */
++
++#ifdef CONFIG_PROC_FS
++
++int a4l_rdproc_devs(struct seq_file *p, void *data)
++{
++	int i;
++
++	seq_printf(p, "--  Analogy devices --\n\n");
++	seq_printf(p, "| idx | status | driver\n");
++
++	for (i = 0; i < A4L_NB_DEVICES; i++) {
++		char *status, *name;
++
++		/* Gets the device's state */
++		if (a4l_devs[i].flags == 0) {
++			status = "Unused";
++			name = "No driver";
++		} else if (test_bit(A4L_DEV_ATTACHED_NR, &a4l_devs[i].flags)) {
++			status = "Linked";
++			name = a4l_devs[i].driver->driver_name;
++		} else {
++			status = "Broken";
++			name = "Unknown";
++		}
++
++		seq_printf(p, "|  %02d | %s | %s\n", i, status, name);
++	}
++	return 0;
++}
++
++static int a4l_proc_transfer_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, a4l_rdproc_transfer, PDE_DATA(inode));
++}
++
++static const struct file_operations a4l_proc_transfer_ops = {
++	.open		= a4l_proc_transfer_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++int a4l_proc_attach(struct a4l_device_context * cxt)
++{
++	int ret = 0;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct proc_dir_entry *entry;
++	char *entry_name;
++
++	/* Allocate the buffer for the file name */
++	entry_name = rtdm_malloc(A4L_NAMELEN + 4);
++	if (entry_name == NULL) {
++		__a4l_err("a4l_proc_attach: failed to allocate buffer\n");
++		return -ENOMEM;
++	}
++
++	/* Create the proc file name */
++	ksformat(entry_name, A4L_NAMELEN + 4, "%02d-%s",
++		 a4l_get_minor(cxt), dev->driver->board_name);
++
++	/* Create the proc entry */
++	entry = proc_create_data(entry_name, 0444, a4l_proc_root,
++				 &a4l_proc_transfer_ops, &dev->transfer);
++	if (entry == NULL) {
++		__a4l_err("a4l_proc_attach: "
++			  "failed to create /proc/analogy/%s\n",
++			  entry_name);
++		ret = -ENOMEM;
++	}
++
++	rtdm_free(entry_name);
++
++	return ret;
++}
++
++void a4l_proc_detach(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	char *entry_name;
++
++	entry_name = rtdm_malloc(A4L_NAMELEN + 4);
++	if (entry_name == NULL) {
++		__a4l_err("a4l_proc_detach: "
++			  "failed to allocate filename buffer\n");
++		return;
++	}
++
++	ksformat(entry_name, A4L_NAMELEN + 4, "%02d-%s",
++		 a4l_get_minor(cxt), dev->driver->board_name);
++
++	remove_proc_entry(entry_name, a4l_proc_root);
++
++	rtdm_free(entry_name);
++}
++
++#else /* !CONFIG_PROC_FS */
++
++int a4l_proc_attach(struct a4l_device_context * cxt)
++{
++	return 0;
++}
++
++void a4l_proc_detach(struct a4l_device_context * cxt)
++{
++}
++
++#endif /* CONFIG_PROC_FS */
++
++/* --- Attach / detach section --- */
++
++int a4l_fill_lnkdesc(struct a4l_device_context * cxt,
++		     a4l_lnkdesc_t * link_arg, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int ret;
++	char *tmpname = NULL;
++	void *tmpopts = NULL;
++
++	ret = rtdm_safe_copy_from_user(fd,
++				       link_arg, arg, sizeof(a4l_lnkdesc_t));
++	if (ret != 0) {
++		__a4l_err("a4l_fill_lnkdesc: "
++			  "call1(copy_from_user) failed\n");
++		goto out_get_lnkdesc;
++	}
++
++	if (link_arg->bname_size != 0 && link_arg->bname != NULL) {
++		tmpname = rtdm_malloc(link_arg->bname_size + 1);
++		if (tmpname == NULL) {
++			__a4l_err("a4l_fill_lnkdesc: "
++				  "call1(alloc) failed\n");
++			ret = -ENOMEM;
++			goto out_get_lnkdesc;
++		}
++		tmpname[link_arg->bname_size] = 0;
++
++		ret = rtdm_safe_copy_from_user(fd,
++					       tmpname,
++					       link_arg->bname,
++					       link_arg->bname_size);
++		if (ret != 0) {
++			__a4l_err("a4l_fill_lnkdesc: "
++				  "call2(copy_from_user) failed\n");
++			goto out_get_lnkdesc;
++		}
++	} else {
++		__a4l_err("a4l_fill_lnkdesc: board name missing\n");
++		ret = -EINVAL;
++		goto out_get_lnkdesc;
++	}
++
++	if (link_arg->opts_size != 0 && link_arg->opts != NULL) {
++		tmpopts = rtdm_malloc(link_arg->opts_size);
++
++		if (tmpopts == NULL) {
++			__a4l_err("a4l_fill_lnkdesc: "
++				  "call2(alloc) failed\n");
++			ret = -ENOMEM;
++			goto out_get_lnkdesc;
++		}
++
++		ret = rtdm_safe_copy_from_user(fd,
++					       tmpopts,
++					       link_arg->opts,
++					       link_arg->opts_size);
++		if (ret != 0) {
++			__a4l_err("a4l_fill_lnkdesc: "
++				  "call3(copy_from_user) failed\n");
++			goto out_get_lnkdesc;
++		}
++	}
++
++	link_arg->bname = tmpname;
++	link_arg->opts = tmpopts;
++
++      out_get_lnkdesc:
++
++	if (tmpname == NULL) {
++		link_arg->bname = NULL;
++		link_arg->bname_size = 0;
++	}
++
++	if (tmpopts == NULL) {
++		link_arg->opts = NULL;
++		link_arg->opts_size = 0;
++	}
++
++	return ret;
++}
++
++void a4l_free_lnkdesc(struct a4l_device_context * cxt, a4l_lnkdesc_t * link_arg)
++{
++	if (link_arg->bname != NULL)
++		rtdm_free(link_arg->bname);
++
++	if (link_arg->opts != NULL)
++		rtdm_free(link_arg->opts);
++}
++
++int a4l_assign_driver(struct a4l_device_context * cxt,
++			 struct a4l_driver * drv, a4l_lnkdesc_t * link_arg)
++{
++	int ret = 0;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++
++	dev->driver = drv;
++	INIT_LIST_HEAD(&dev->subdvsq);
++
++	if (drv->privdata_size == 0)
++		__a4l_dbg(1, core_dbg, " warning! "
++				       "the field priv will not be usable\n");
++	else {
++		dev->priv = rtdm_malloc(drv->privdata_size);
++		if (dev->priv == NULL) {
++			__a4l_err("a4l_assign_driver: "
++				  "call(alloc) failed\n");
++			ret = -ENOMEM;
++			goto out_assign_driver;
++		}
++
++		/* Initialize the private data even if it not our role
++		   (the driver should do it), that may prevent hard to
++		   find bugs */
++		memset(dev->priv, 0, drv->privdata_size);
++	}
++
++	if ((ret = drv->attach(dev, link_arg)) != 0)
++		__a4l_err("a4l_assign_driver: "
++			  "call(drv->attach) failed (ret=%d)\n",
++		     ret);
++
++out_assign_driver:
++
++	/* Increments module's count */
++	if (ret == 0 && (!try_module_get(drv->owner))) {
++		__a4l_err("a4l_assign_driver: "
++			  "driver's owner field wrongly set\n");
++		ret = -ENODEV;
++	}
++
++	if (ret != 0 && dev->priv != NULL) {
++		rtdm_free(dev->priv);
++		dev->driver = NULL;
++	}
++
++	return ret;
++}
++
++int a4l_release_driver(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	struct a4l_subdevice *subd, *tmp;
++	int ret = 0;
++
++	if ((ret = dev->driver->detach(dev)) != 0)
++		goto out_release_driver;
++
++	module_put(dev->driver->owner);
++
++	/* In case, the driver developer did not free the subdevices */
++	if (!list_empty(&dev->subdvsq))
++		list_for_each_entry_safe(subd, tmp, &dev->subdvsq, list) {
++			list_del(&subd->list);
++			rtdm_free(subd);
++		}
++
++	/* Free the private field */
++	if (dev->priv)
++		rtdm_free(dev->priv);
++
++	dev->driver = NULL;
++
++out_release_driver:
++	return ret;
++}
++
++int a4l_device_attach(struct a4l_device_context * cxt, void *arg)
++{
++	int ret = 0;
++	a4l_lnkdesc_t link_arg;
++	struct a4l_driver *drv = NULL;
++
++	if ((ret = a4l_fill_lnkdesc(cxt, &link_arg, arg)) != 0)
++		goto out_attach;
++
++	if ((ret = a4l_lct_drv(link_arg.bname, &drv)) != 0) {
++		__a4l_err("a4l_device_attach: "
++			  "cannot find board name %s\n", link_arg.bname);
++		goto out_attach;
++	}
++
++	if ((ret = a4l_assign_driver(cxt, drv, &link_arg)) != 0)
++		goto out_attach;
++
++      out_attach:
++	a4l_free_lnkdesc(cxt, &link_arg);
++	return ret;
++}
++
++int a4l_device_detach(struct a4l_device_context * cxt)
++{
++	struct a4l_device *dev = a4l_get_dev(cxt);
++
++	if (dev->driver == NULL) {
++		__a4l_err("a4l_device_detach: "
++			  "incoherent state, driver not reachable\n");
++		return -ENXIO;
++	}
++
++	return a4l_release_driver(cxt);
++}
++
++/* --- IOCTL / FOPS functions --- */
++
++int a4l_ioctl_devcfg(struct a4l_device_context * cxt, void *arg)
++{
++	int ret = 0;
++
++	if (rtdm_in_rt_context())
++		return -ENOSYS;
++
++	if (arg == NULL) {
++		/* Basic checking */
++		if (!test_bit(A4L_DEV_ATTACHED_NR, &(a4l_get_dev(cxt)->flags))) {
++			__a4l_err("a4l_ioctl_devcfg: "
++				  "free device, no driver to detach\n");
++			return -EINVAL;
++		}
++		/* Pre-cleanup of the transfer structure, we ensure
++		   that nothing is busy */
++		if ((ret = a4l_precleanup_transfer(cxt)) != 0)
++			return ret;
++		/* Remove the related proc file */
++		a4l_proc_detach(cxt);
++		/* Free the device and the driver from each other */
++		if ((ret = a4l_device_detach(cxt)) == 0)
++			clear_bit(A4L_DEV_ATTACHED_NR,
++				  &(a4l_get_dev(cxt)->flags));
++		/* Free the transfer structure and its related data */
++		if ((ret = a4l_cleanup_transfer(cxt)) != 0)
++			return ret;
++	} else {
++		/* Basic checking */
++		if (test_bit
++		    (A4L_DEV_ATTACHED_NR, &(a4l_get_dev(cxt)->flags))) {
++			__a4l_err("a4l_ioctl_devcfg: "
++				  "linked device, cannot attach more driver\n");
++			return -EINVAL;
++		}
++		/* Pre-initialization of the transfer structure */
++		a4l_presetup_transfer(cxt);
++		/* Link the device with the driver */
++		if ((ret = a4l_device_attach(cxt, arg)) != 0)
++			return ret;
++		/* Create the transfer structure and
++		   the related proc file */
++		if ((ret = a4l_setup_transfer(cxt)) != 0 ||
++		    (ret = a4l_proc_attach(cxt)) != 0)
++			a4l_device_detach(cxt);
++		else
++			set_bit(A4L_DEV_ATTACHED_NR,
++				&(a4l_get_dev(cxt)->flags));
++	}
++
++	return ret;
++}
++
++int a4l_ioctl_devinfo(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	a4l_dvinfo_t info;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++
++	memset(&info, 0, sizeof(a4l_dvinfo_t));
++
++	if (test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		int len = (strlen(dev->driver->board_name) > A4L_NAMELEN) ?
++		    A4L_NAMELEN : strlen(dev->driver->board_name);
++
++		memcpy(info.board_name, dev->driver->board_name, len);
++
++		len = (strlen(dev->driver->driver_name) > A4L_NAMELEN) ?
++		    A4L_NAMELEN : strlen(dev->driver->driver_name);
++
++		memcpy(info.driver_name, dev->driver->driver_name, len);
++
++		info.nb_subd = dev->transfer.nb_subd;
++		/* TODO: for API compatibility issue, find the first
++		   read subdevice and write subdevice */
++	}
++
++	if (rtdm_safe_copy_to_user(fd,
++				   arg, &info, sizeof(a4l_dvinfo_t)) != 0)
++		return -EFAULT;
++
++	return 0;
++}
+--- linux/drivers/xenomai/analogy/national_instruments/ni_tio.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_tio.h	2021-04-07 16:01:27.849633278 +0800
+@@ -0,0 +1,1192 @@
++/*
++ * Hardware driver for NI general purpose counter
++ * Copyright (C) 2006 Frank Mori Hess <fmhess@users.sourceforge.net>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef __ANALOGY_NI_TIO_H__
++#define __ANALOGY_NI_TIO_H__
++
++#include <rtdm/analogy/device.h>
++
++#ifdef CONFIG_PCI
++#include "mite.h"
++#endif
++
++enum ni_gpct_register {
++	NITIO_G0_Autoincrement_Reg,
++	NITIO_G1_Autoincrement_Reg,
++	NITIO_G2_Autoincrement_Reg,
++	NITIO_G3_Autoincrement_Reg,
++	NITIO_G0_Command_Reg,
++	NITIO_G1_Command_Reg,
++	NITIO_G2_Command_Reg,
++	NITIO_G3_Command_Reg,
++	NITIO_G0_HW_Save_Reg,
++	NITIO_G1_HW_Save_Reg,
++	NITIO_G2_HW_Save_Reg,
++	NITIO_G3_HW_Save_Reg,
++	NITIO_G0_SW_Save_Reg,
++	NITIO_G1_SW_Save_Reg,
++	NITIO_G2_SW_Save_Reg,
++	NITIO_G3_SW_Save_Reg,
++	NITIO_G0_Mode_Reg,
++	NITIO_G1_Mode_Reg,
++	NITIO_G2_Mode_Reg,
++	NITIO_G3_Mode_Reg,
++	NITIO_G0_LoadA_Reg,
++	NITIO_G1_LoadA_Reg,
++	NITIO_G2_LoadA_Reg,
++	NITIO_G3_LoadA_Reg,
++	NITIO_G0_LoadB_Reg,
++	NITIO_G1_LoadB_Reg,
++	NITIO_G2_LoadB_Reg,
++	NITIO_G3_LoadB_Reg,
++	NITIO_G0_Input_Select_Reg,
++	NITIO_G1_Input_Select_Reg,
++	NITIO_G2_Input_Select_Reg,
++	NITIO_G3_Input_Select_Reg,
++	NITIO_G0_Counting_Mode_Reg,
++	NITIO_G1_Counting_Mode_Reg,
++	NITIO_G2_Counting_Mode_Reg,
++	NITIO_G3_Counting_Mode_Reg,
++	NITIO_G0_Second_Gate_Reg,
++	NITIO_G1_Second_Gate_Reg,
++	NITIO_G2_Second_Gate_Reg,
++	NITIO_G3_Second_Gate_Reg,
++	NITIO_G01_Status_Reg,
++	NITIO_G23_Status_Reg,
++	NITIO_G01_Joint_Reset_Reg,
++	NITIO_G23_Joint_Reset_Reg,
++	NITIO_G01_Joint_Status1_Reg,
++	NITIO_G23_Joint_Status1_Reg,
++	NITIO_G01_Joint_Status2_Reg,
++	NITIO_G23_Joint_Status2_Reg,
++	NITIO_G0_DMA_Config_Reg,
++	NITIO_G1_DMA_Config_Reg,
++	NITIO_G2_DMA_Config_Reg,
++	NITIO_G3_DMA_Config_Reg,
++	NITIO_G0_DMA_Status_Reg,
++	NITIO_G1_DMA_Status_Reg,
++	NITIO_G2_DMA_Status_Reg,
++	NITIO_G3_DMA_Status_Reg,
++	NITIO_G0_ABZ_Reg,
++	NITIO_G1_ABZ_Reg,
++	NITIO_G0_Interrupt_Acknowledge_Reg,
++	NITIO_G1_Interrupt_Acknowledge_Reg,
++	NITIO_G2_Interrupt_Acknowledge_Reg,
++	NITIO_G3_Interrupt_Acknowledge_Reg,
++	NITIO_G0_Status_Reg,
++	NITIO_G1_Status_Reg,
++	NITIO_G2_Status_Reg,
++	NITIO_G3_Status_Reg,
++	NITIO_G0_Interrupt_Enable_Reg,
++	NITIO_G1_Interrupt_Enable_Reg,
++	NITIO_G2_Interrupt_Enable_Reg,
++	NITIO_G3_Interrupt_Enable_Reg,
++	NITIO_Num_Registers,
++};
++
++static inline enum ni_gpct_register NITIO_Gi_Autoincrement_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Autoincrement_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Autoincrement_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Autoincrement_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Autoincrement_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Command_Reg(unsigned counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Command_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Command_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Command_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Command_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Counting_Mode_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Counting_Mode_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Counting_Mode_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Counting_Mode_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Counting_Mode_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Input_Select_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Input_Select_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Input_Select_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Input_Select_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Input_Select_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gxx_Joint_Reset_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++	case 1:
++		return NITIO_G01_Joint_Reset_Reg;
++		break;
++	case 2:
++	case 3:
++		return NITIO_G23_Joint_Reset_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gxx_Joint_Status1_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++	case 1:
++		return NITIO_G01_Joint_Status1_Reg;
++		break;
++	case 2:
++	case 3:
++		return NITIO_G23_Joint_Status1_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gxx_Joint_Status2_Reg(unsigned
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++	case 1:
++		return NITIO_G01_Joint_Status2_Reg;
++		break;
++	case 2:
++	case 3:
++		return NITIO_G23_Joint_Status2_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gxx_Status_Reg(unsigned counter_index)
++{
++	switch (counter_index) {
++	case 0:
++	case 1:
++		return NITIO_G01_Status_Reg;
++		break;
++	case 2:
++	case 3:
++		return NITIO_G23_Status_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_LoadA_Reg(unsigned counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_LoadA_Reg;
++		break;
++	case 1:
++		return NITIO_G1_LoadA_Reg;
++		break;
++	case 2:
++		return NITIO_G2_LoadA_Reg;
++		break;
++	case 3:
++		return NITIO_G3_LoadA_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_LoadB_Reg(unsigned counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_LoadB_Reg;
++		break;
++	case 1:
++		return NITIO_G1_LoadB_Reg;
++		break;
++	case 2:
++		return NITIO_G2_LoadB_Reg;
++		break;
++	case 3:
++		return NITIO_G3_LoadB_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Mode_Reg(unsigned counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Mode_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Mode_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Mode_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Mode_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_SW_Save_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_SW_Save_Reg;
++		break;
++	case 1:
++		return NITIO_G1_SW_Save_Reg;
++		break;
++	case 2:
++		return NITIO_G2_SW_Save_Reg;
++		break;
++	case 3:
++		return NITIO_G3_SW_Save_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Second_Gate_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Second_Gate_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Second_Gate_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Second_Gate_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Second_Gate_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_DMA_Config_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_DMA_Config_Reg;
++		break;
++	case 1:
++		return NITIO_G1_DMA_Config_Reg;
++		break;
++	case 2:
++		return NITIO_G2_DMA_Config_Reg;
++		break;
++	case 3:
++		return NITIO_G3_DMA_Config_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_DMA_Status_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_DMA_Status_Reg;
++		break;
++	case 1:
++		return NITIO_G1_DMA_Status_Reg;
++		break;
++	case 2:
++		return NITIO_G2_DMA_Status_Reg;
++		break;
++	case 3:
++		return NITIO_G3_DMA_Status_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_ABZ_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_ABZ_Reg;
++		break;
++	case 1:
++		return NITIO_G1_ABZ_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Interrupt_Acknowledge_Reg(int
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Interrupt_Acknowledge_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Interrupt_Acknowledge_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Interrupt_Acknowledge_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Interrupt_Acknowledge_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Status_Reg(int counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Status_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Status_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Status_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Status_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline enum ni_gpct_register NITIO_Gi_Interrupt_Enable_Reg(int
++	counter_index)
++{
++	switch (counter_index) {
++	case 0:
++		return NITIO_G0_Interrupt_Enable_Reg;
++		break;
++	case 1:
++		return NITIO_G1_Interrupt_Enable_Reg;
++		break;
++	case 2:
++		return NITIO_G2_Interrupt_Enable_Reg;
++		break;
++	case 3:
++		return NITIO_G3_Interrupt_Enable_Reg;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++enum ni_gpct_variant {
++	ni_gpct_variant_e_series,
++	ni_gpct_variant_m_series,
++	ni_gpct_variant_660x
++};
++
++struct ni_gpct {
++	struct ni_gpct_device *counter_dev;
++	unsigned counter_index;
++	unsigned chip_index;
++	uint64_t clock_period_ps; /* clock period in picoseconds */
++	struct mite_channel *mite_chan;
++	rtdm_lock_t lock;
++};
++
++struct ni_gpct_device {
++	struct a4l_device *dev;
++	void (*write_register)(struct ni_gpct * counter,
++				unsigned int bits, enum ni_gpct_register reg);
++	unsigned (*read_register)(struct ni_gpct * counter,
++				   enum ni_gpct_register reg);
++	enum ni_gpct_variant variant;
++	struct ni_gpct **counters;
++	unsigned num_counters;
++	unsigned regs[NITIO_Num_Registers];
++	rtdm_lock_t regs_lock;
++};
++
++#define Gi_Auto_Increment_Mask		0xff
++#define Gi_Up_Down_Shift		5
++
++#define Gi_Arm_Bit			0x1
++#define Gi_Save_Trace_Bit		0x2
++#define Gi_Load_Bit			0x4
++#define Gi_Disarm_Bit			0x10
++#define Gi_Up_Down_Mask			(0x3 << Gi_Up_Down_Shift)
++#define Gi_Always_Down_Bits		(0x0 << Gi_Up_Down_Shift)
++#define Gi_Always_Up_Bits		(0x1 << Gi_Up_Down_Shift)
++#define Gi_Up_Down_Hardware_IO_Bits	(0x2 << Gi_Up_Down_Shift)
++#define Gi_Up_Down_Hardware_Gate_Bits	(0x3 << Gi_Up_Down_Shift)
++#define Gi_Write_Switch_Bit		0x80
++#define Gi_Synchronize_Gate_Bit		0x100
++#define Gi_Little_Big_Endian_Bit	0x200
++#define Gi_Bank_Switch_Start_Bit	0x400
++#define Gi_Bank_Switch_Mode_Bit		0x800
++#define Gi_Bank_Switch_Enable_Bit	0x1000
++#define Gi_Arm_Copy_Bit			0x2000
++#define Gi_Save_Trace_Copy_Bit		0x4000
++#define Gi_Disarm_Copy_Bit		0x8000
++
++#define Gi_Index_Phase_Bitshift	5
++#define Gi_HW_Arm_Select_Shift		8
++
++#define Gi_Counting_Mode_Mask		0x7
++#define Gi_Counting_Mode_Normal_Bits	0x0
++#define Gi_Counting_Mode_QuadratureX1_Bits 0x1
++#define Gi_Counting_Mode_QuadratureX2_Bits 0x2
++#define Gi_Counting_Mode_QuadratureX4_Bits 0x3
++#define Gi_Counting_Mode_Two_Pulse_Bits	0x4
++#define Gi_Counting_Mode_Sync_Source_Bits 0x6
++#define Gi_Index_Mode_Bit		0x10
++#define Gi_Index_Phase_Mask		(0x3 << Gi_Index_Phase_Bitshift)
++#define Gi_Index_Phase_LowA_LowB	(0x0 << Gi_Index_Phase_Bitshift)
++#define Gi_Index_Phase_LowA_HighB	(0x1 << Gi_Index_Phase_Bitshift)
++#define Gi_Index_Phase_HighA_LowB	(0x2 << Gi_Index_Phase_Bitshift)
++#define Gi_Index_Phase_HighA_HighB	(0x3 << Gi_Index_Phase_Bitshift)
++
++/* From m-series example code,
++   not documented in 660x register level manual */
++#define Gi_HW_Arm_Enable_Bit		0x80
++/* From m-series example code,
++   not documented in 660x register level manual */
++#define Gi_660x_HW_Arm_Select_Mask	(0x7 << Gi_HW_Arm_Select_Shift)
++#define Gi_660x_Prescale_X8_Bit		0x1000
++#define Gi_M_Series_Prescale_X8_Bit	0x2000
++#define Gi_M_Series_HW_Arm_Select_Mask	(0x1f << Gi_HW_Arm_Select_Shift)
++/* Must be set for clocks over 40MHz,
++   which includes synchronous counting and quadrature modes */
++#define Gi_660x_Alternate_Sync_Bit	0x2000
++#define Gi_M_Series_Alternate_Sync_Bit	0x4000
++/* From m-series example code,
++   not documented in 660x register level manual */
++#define Gi_660x_Prescale_X2_Bit		0x4000
++#define Gi_M_Series_Prescale_X2_Bit	0x8000
++
++static inline unsigned int Gi_Alternate_Sync_Bit(enum ni_gpct_variant variant)
++{
++	switch (variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++		return Gi_M_Series_Alternate_Sync_Bit;
++		break;
++	case ni_gpct_variant_660x:
++		return Gi_660x_Alternate_Sync_Bit;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline unsigned int Gi_Prescale_X2_Bit(enum ni_gpct_variant variant)
++{
++	switch (variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++		return Gi_M_Series_Prescale_X2_Bit;
++		break;
++	case ni_gpct_variant_660x:
++		return Gi_660x_Prescale_X2_Bit;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline unsigned int Gi_Prescale_X8_Bit(enum ni_gpct_variant variant)
++{
++	switch (variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++		return Gi_M_Series_Prescale_X8_Bit;
++		break;
++	case ni_gpct_variant_660x:
++		return Gi_660x_Prescale_X8_Bit;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline unsigned int Gi_HW_Arm_Select_Mask(enum ni_gpct_variant variant)
++{
++	switch (variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++		return Gi_M_Series_HW_Arm_Select_Mask;
++		break;
++	case ni_gpct_variant_660x:
++		return Gi_660x_HW_Arm_Select_Mask;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++#define NI_660x_Timebase_1_Clock	0x0 /* 20MHz */
++#define NI_660x_Source_Pin_i_Clock	0x1
++#define NI_660x_Next_Gate_Clock		0xa
++#define NI_660x_Timebase_2_Clock	0x12 /* 100KHz */
++#define NI_660x_Next_TC_Clock		0x13
++#define NI_660x_Timebase_3_Clock	0x1e /* 80MHz */
++#define NI_660x_Logic_Low_Clock		0x1f
++
++#define ni_660x_max_rtsi_channel	6
++#define ni_660x_max_source_pin		7
++
++static inline unsigned int NI_660x_RTSI_Clock(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_rtsi_channel);
++	return (0xb + n);
++}
++
++static inline unsigned int NI_660x_Source_Pin_Clock(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_source_pin);
++	return (0x2 + n);
++}
++
++/* Clock sources for ni e and m series boards,
++   get bits with Gi_Source_Select_Bits() */
++#define NI_M_Series_Timebase_1_Clock	0x0 /* 20MHz */
++#define NI_M_Series_Timebase_2_Clock	0x12 /* 100KHz */
++#define NI_M_Series_Next_TC_Clock	0x13
++#define NI_M_Series_Next_Gate_Clock	0x14 /* when Gi_Src_SubSelect = 0 */
++#define NI_M_Series_PXI_Star_Trigger_Clock 0x14 /* when Gi_Src_SubSelect = 1 */
++#define NI_M_Series_PXI10_Clock		0x1d
++#define NI_M_Series_Timebase_3_Clock	0x1e /* 80MHz, when Gi_Src_SubSelect = 0 */
++#define NI_M_Series_Analog_Trigger_Out_Clock 0x1e /* when Gi_Src_SubSelect = 1 */
++#define NI_M_Series_Logic_Low_Clock	0x1f
++
++#define ni_m_series_max_pfi_channel	15
++#define ni_m_series_max_rtsi_channel	7
++
++static inline unsigned int NI_M_Series_PFI_Clock(unsigned int n)
++{
++	BUG_ON(n > ni_m_series_max_pfi_channel);
++	if (n < 10)
++		return 1 + n;
++	else
++		return 0xb + n;
++}
++
++static inline unsigned int NI_M_Series_RTSI_Clock(unsigned int n)
++{
++	BUG_ON(n > ni_m_series_max_rtsi_channel);
++	if (n == 7)
++		return 0x1b;
++	else
++		return 0xb + n;
++}
++
++#define NI_660x_Source_Pin_i_Gate_Select 0x0
++#define NI_660x_Gate_Pin_i_Gate_Select	0x1
++#define NI_660x_Next_SRC_Gate_Select	0xa
++#define NI_660x_Next_Out_Gate_Select	0x14
++#define NI_660x_Logic_Low_Gate_Select	0x1f
++#define ni_660x_max_gate_pin 7
++
++static inline unsigned int NI_660x_Gate_Pin_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_gate_pin);
++	return 0x2 + n;
++}
++
++static inline unsigned int NI_660x_RTSI_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_rtsi_channel);
++	return 0xb + n;
++}
++
++
++#define NI_M_Series_Timestamp_Mux_Gate_Select	0x0
++#define NI_M_Series_AI_START2_Gate_Select	0x12
++#define NI_M_Series_PXI_Star_Trigger_Gate_Select 0x13
++#define NI_M_Series_Next_Out_Gate_Select	0x14
++#define NI_M_Series_AI_START1_Gate_Select	0x1c
++#define NI_M_Series_Next_SRC_Gate_Select	0x1d
++#define NI_M_Series_Analog_Trigger_Out_Gate_Select 0x1e
++#define NI_M_Series_Logic_Low_Gate_Select	0x1f
++
++static inline unsigned int NI_M_Series_RTSI_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_m_series_max_rtsi_channel);
++	if (n == 7)
++		return 0x1b;
++	return 0xb + n;
++}
++
++static inline unsigned int NI_M_Series_PFI_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_m_series_max_pfi_channel);
++	if (n < 10)
++		return 1 + n;
++	return 0xb + n;
++}
++
++
++#define Gi_Source_Select_Shift 2
++#define Gi_Gate_Select_Shift 7
++
++#define Gi_Read_Acknowledges_Irq	0x1 /* not present on 660x */
++#define Gi_Write_Acknowledges_Irq	0x2 /* not present on 660x */
++#define Gi_Source_Select_Mask		0x7c
++#define Gi_Gate_Select_Mask		(0x1f << Gi_Gate_Select_Shift)
++#define Gi_Gate_Select_Load_Source_Bit	0x1000
++#define Gi_Or_Gate_Bit			0x2000
++#define Gi_Output_Polarity_Bit		0x4000 /* set to invert */
++#define Gi_Source_Polarity_Bit		0x8000 /* set to invert */
++
++#define Gi_Source_Select_Bits(x) ((x << Gi_Source_Select_Shift) & \
++				  Gi_Source_Select_Mask)
++#define Gi_Gate_Select_Bits(x) ((x << Gi_Gate_Select_Shift) & \
++				Gi_Gate_Select_Mask)
++
++#define Gi_Gating_Mode_Mask		0x3
++#define Gi_Gating_Disabled_Bits		0x0
++#define Gi_Level_Gating_Bits		0x1
++#define Gi_Rising_Edge_Gating_Bits	0x2
++#define Gi_Falling_Edge_Gating_Bits	0x3
++#define Gi_Gate_On_Both_Edges_Bit	0x4 /* used in conjunction with
++					       rising edge gating mode */
++#define Gi_Trigger_Mode_for_Edge_Gate_Mask 0x18
++#define Gi_Edge_Gate_Starts_Stops_Bits	0x0
++#define Gi_Edge_Gate_Stops_Starts_Bits	0x8
++#define Gi_Edge_Gate_Starts_Bits	0x10
++#define Gi_Edge_Gate_No_Starts_or_Stops_Bits 0x18
++#define Gi_Stop_Mode_Mask		0x60
++#define Gi_Stop_on_Gate_Bits		0x00
++#define Gi_Stop_on_Gate_or_TC_Bits	0x20
++#define Gi_Stop_on_Gate_or_Second_TC_Bits 0x40
++#define Gi_Load_Source_Select_Bit	0x80
++#define Gi_Output_Mode_Mask		0x300
++#define Gi_Output_TC_Pulse_Bits		0x100
++#define Gi_Output_TC_Toggle_Bits	0x200
++#define Gi_Output_TC_or_Gate_Toggle_Bits 0x300
++#define Gi_Counting_Once_Mask		0xc00
++#define Gi_No_Hardware_Disarm_Bits	0x000
++#define Gi_Disarm_at_TC_Bits		0x400
++#define Gi_Disarm_at_Gate_Bits		0x800
++#define Gi_Disarm_at_TC_or_Gate_Bits	0xc00
++#define Gi_Loading_On_TC_Bit		0x1000
++#define Gi_Gate_Polarity_Bit		0x2000
++#define Gi_Loading_On_Gate_Bit		0x4000
++#define Gi_Reload_Source_Switching_Bit	0x8000
++
++#define NI_660x_Source_Pin_i_Second_Gate_Select		0x0
++#define NI_660x_Up_Down_Pin_i_Second_Gate_Select	0x1
++#define NI_660x_Next_SRC_Second_Gate_Select		0xa
++#define NI_660x_Next_Out_Second_Gate_Select		0x14
++#define NI_660x_Selected_Gate_Second_Gate_Select	0x1e
++#define NI_660x_Logic_Low_Second_Gate_Select		0x1f
++
++#define ni_660x_max_up_down_pin		7
++
++static inline
++unsigned int NI_660x_Up_Down_Pin_Second_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_up_down_pin);
++	return 0x2 + n;
++}
++static inline
++unsigned int NI_660x_RTSI_Second_Gate_Select(unsigned int n)
++{
++	BUG_ON(n > ni_660x_max_rtsi_channel);
++	return 0xb + n;
++}
++
++#define Gi_Second_Gate_Select_Shift	7
++
++/*FIXME: m-series has a second gate subselect bit */
++/*FIXME: m-series second gate sources are undocumented (by NI)*/
++#define Gi_Second_Gate_Mode_Bit		0x1
++#define Gi_Second_Gate_Select_Mask	(0x1f << Gi_Second_Gate_Select_Shift)
++#define Gi_Second_Gate_Polarity_Bit	0x2000
++#define Gi_Second_Gate_Subselect_Bit	0x4000 /* m-series only */
++#define Gi_Source_Subselect_Bit		0x8000 /* m-series only */
++
++static inline
++unsigned int Gi_Second_Gate_Select_Bits(unsigned int second_gate_select)
++{
++	return (second_gate_select << Gi_Second_Gate_Select_Shift) &
++		Gi_Second_Gate_Select_Mask;
++}
++
++#define G0_Save_Bit		0x1
++#define G1_Save_Bit		0x2
++#define G0_Counting_Bit		0x4
++#define G1_Counting_Bit		0x8
++#define G0_Next_Load_Source_Bit	0x10
++#define G1_Next_Load_Source_Bit	0x20
++#define G0_Stale_Data_Bit	0x40
++#define G1_Stale_Data_Bit	0x80
++#define G0_Armed_Bit		0x100
++#define G1_Armed_Bit		0x200
++#define G0_No_Load_Between_Gates_Bit 0x400
++#define G1_No_Load_Between_Gates_Bit 0x800
++#define G0_TC_Error_Bit		0x1000
++#define G1_TC_Error_Bit		0x2000
++#define G0_Gate_Error_Bit	0x4000
++#define G1_Gate_Error_Bit	0x8000
++
++static inline unsigned int Gi_Counting_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_Counting_Bit;
++	return G0_Counting_Bit;
++}
++
++static inline unsigned int Gi_Armed_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_Armed_Bit;
++	return G0_Armed_Bit;
++}
++
++static inline unsigned int Gi_Next_Load_Source_Bit(unsigned counter_index)
++{
++	if (counter_index % 2)
++		return G1_Next_Load_Source_Bit;
++	return G0_Next_Load_Source_Bit;
++}
++
++static inline unsigned int Gi_Stale_Data_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_Stale_Data_Bit;
++	return G0_Stale_Data_Bit;
++}
++
++static inline unsigned int Gi_TC_Error_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_TC_Error_Bit;
++	return G0_TC_Error_Bit;
++}
++
++static inline unsigned int Gi_Gate_Error_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_Gate_Error_Bit;
++	return G0_Gate_Error_Bit;
++}
++
++/* Joint reset register bits */
++static inline unsigned Gi_Reset_Bit(unsigned int counter_index)
++{
++	return 0x1 << (2 + (counter_index % 2));
++}
++
++#define G0_Output_Bit		0x1
++#define G1_Output_Bit		0x2
++#define G0_HW_Save_Bit		0x1000
++#define G1_HW_Save_Bit		0x2000
++#define G0_Permanent_Stale_Bit	0x4000
++#define G1_Permanent_Stale_Bit	0x8000
++
++static inline unsigned int Gi_Permanent_Stale_Bit(unsigned
++	counter_index)
++{
++	if (counter_index % 2)
++		return G1_Permanent_Stale_Bit;
++	return G0_Permanent_Stale_Bit;
++}
++
++#define Gi_DMA_Enable_Bit	0x1
++#define Gi_DMA_Write_Bit	0x2
++#define Gi_DMA_Int_Bit		0x4
++
++#define Gi_DMA_Readbank_Bit	0x2000
++#define Gi_DRQ_Error_Bit	0x4000
++#define Gi_DRQ_Status_Bit	0x8000
++
++#define G0_Gate_Error_Confirm_Bit	0x20
++#define G0_TC_Error_Confirm_Bit		0x40
++
++#define G1_Gate_Error_Confirm_Bit	0x2
++#define G1_TC_Error_Confirm_Bit		0x4
++
++static inline unsigned int Gi_Gate_Error_Confirm_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_Gate_Error_Confirm_Bit;
++	return G0_Gate_Error_Confirm_Bit;
++}
++
++static inline unsigned int Gi_TC_Error_Confirm_Bit(unsigned int counter_index)
++{
++	if (counter_index % 2)
++		return G1_TC_Error_Confirm_Bit;
++	return G0_TC_Error_Confirm_Bit;
++}
++
++/* Bits that are the same in G0/G2 and G1/G3 interrupt acknowledge registers */
++#define Gi_TC_Interrupt_Ack_Bit		0x4000
++#define Gi_Gate_Interrupt_Ack_Bit	0x8000
++
++#define Gi_Gate_Interrupt_Bit	0x4
++#define Gi_TC_Bit		0x8
++#define Gi_Interrupt_Bit	0x8000
++
++#define G0_TC_Interrupt_Enable_Bit	0x40
++#define G0_Gate_Interrupt_Enable_Bit	0x100
++
++#define G1_TC_Interrupt_Enable_Bit	0x200
++#define G1_Gate_Interrupt_Enable_Bit	0x400
++
++static inline unsigned int Gi_Gate_Interrupt_Enable_Bit(unsigned int counter_index)
++{
++	unsigned int bit;
++
++	if (counter_index % 2) {
++		bit = G1_Gate_Interrupt_Enable_Bit;
++	} else {
++		bit = G0_Gate_Interrupt_Enable_Bit;
++	}
++	return bit;
++}
++
++#define counter_status_mask (A4L_COUNTER_ARMED | A4L_COUNTER_COUNTING)
++
++#define NI_USUAL_PFI_SELECT(x)	((x < 10) ? (0x1 + x) : (0xb + x))
++#define NI_USUAL_RTSI_SELECT(x)	((x < 7 ) ? (0xb + x) : (0x1b + x))
++
++/* Mode bits for NI general-purpose counters, set with
++   INSN_CONFIG_SET_COUNTER_MODE */
++#define NI_GPCT_COUNTING_MODE_SHIFT		16
++#define NI_GPCT_INDEX_PHASE_BITSHIFT		20
++#define NI_GPCT_COUNTING_DIRECTION_SHIFT	24
++
++#define NI_GPCT_GATE_ON_BOTH_EDGES_BIT		0x4
++#define NI_GPCT_EDGE_GATE_MODE_MASK		0x18
++#define NI_GPCT_EDGE_GATE_STARTS_STOPS_BITS	0x0
++#define NI_GPCT_EDGE_GATE_STOPS_STARTS_BITS	0x8
++#define NI_GPCT_EDGE_GATE_STARTS_BITS		0x10
++#define NI_GPCT_EDGE_GATE_NO_STARTS_NO_STOPS_BITS 0x18
++#define NI_GPCT_STOP_MODE_MASK			0x60
++#define NI_GPCT_STOP_ON_GATE_BITS		0x00
++#define NI_GPCT_STOP_ON_GATE_OR_TC_BITS		0x20
++#define NI_GPCT_STOP_ON_GATE_OR_SECOND_TC_BITS	0x40
++#define NI_GPCT_LOAD_B_SELECT_BIT		0x80
++#define NI_GPCT_OUTPUT_MODE_MASK		0x300
++#define NI_GPCT_OUTPUT_TC_PULSE_BITS		0x100
++#define NI_GPCT_OUTPUT_TC_TOGGLE_BITS		0x200
++#define NI_GPCT_OUTPUT_TC_OR_GATE_TOGGLE_BITS	0x300
++#define NI_GPCT_HARDWARE_DISARM_MASK		0xc00
++#define NI_GPCT_NO_HARDWARE_DISARM_BITS		0x000
++#define NI_GPCT_DISARM_AT_TC_BITS		0x400
++#define NI_GPCT_DISARM_AT_GATE_BITS		0x800
++#define NI_GPCT_DISARM_AT_TC_OR_GATE_BITS	0xc00
++#define NI_GPCT_LOADING_ON_TC_BIT		0x1000
++#define NI_GPCT_LOADING_ON_GATE_BIT		0x4000
++#define NI_GPCT_COUNTING_MODE_MASK		0x7 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_NORMAL_BITS	0x0 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X1_BITS 0x1 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X2_BITS 0x2 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_QUADRATURE_X4_BITS 0x3 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_TWO_PULSE_BITS	0x4 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_COUNTING_MODE_SYNC_SOURCE_BITS	0x6 << NI_GPCT_COUNTING_MODE_SHIFT
++#define NI_GPCT_INDEX_PHASE_MASK		0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT
++#define NI_GPCT_INDEX_PHASE_LOW_A_LOW_B_BITS	0x0 << NI_GPCT_INDEX_PHASE_BITSHIFT
++#define NI_GPCT_INDEX_PHASE_LOW_A_HIGH_B_BITS	0x1 << NI_GPCT_INDEX_PHASE_BITSHIFT
++#define NI_GPCT_INDEX_PHASE_HIGH_A_LOW_B_BITS	0x2 << NI_GPCT_INDEX_PHASE_BITSHIFT
++#define NI_GPCT_INDEX_PHASE_HIGH_A_HIGH_B_BITS	0x3 << NI_GPCT_INDEX_PHASE_BITSHIFT
++#define NI_GPCT_INDEX_ENABLE_BIT		0x400000
++#define NI_GPCT_COUNTING_DIRECTION_MASK		0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT
++#define NI_GPCT_COUNTING_DIRECTION_DOWN_BITS	0x00 << NI_GPCT_COUNTING_DIRECTION_SHIFT
++#define NI_GPCT_COUNTING_DIRECTION_UP_BITS	0x1 << NI_GPCT_COUNTING_DIRECTION_SHIFT
++#define NI_GPCT_COUNTING_DIRECTION_HW_UP_DOWN_BITS 0x2 << NI_GPCT_COUNTING_DIRECTION_SHIFT
++#define NI_GPCT_COUNTING_DIRECTION_HW_GATE_BITS 0x3 << NI_GPCT_COUNTING_DIRECTION_SHIFT
++#define NI_GPCT_RELOAD_SOURCE_MASK		0xc000000
++#define NI_GPCT_RELOAD_SOURCE_FIXED_BITS	0x0
++#define NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS	0x4000000
++#define NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS	0x8000000
++#define NI_GPCT_OR_GATE_BIT			0x10000000
++#define NI_GPCT_INVERT_OUTPUT_BIT		0x20000000
++
++/* Bits for setting a clock source with INSN_CONFIG_SET_CLOCK_SRC when
++   using NI general-purpose counters. */
++#define NI_GPCT_CLOCK_SRC_SELECT_MASK		0x3f
++#define NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS	0x0
++#define NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS	0x1
++#define NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS	0x2
++#define NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS	0x3
++#define NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS	0x4
++#define NI_GPCT_NEXT_TC_CLOCK_SRC_BITS		0x5
++#define NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS	0x6 /* NI 660x-specific */
++#define NI_GPCT_PXI10_CLOCK_SRC_BITS		0x7
++#define NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS	0x8
++#define NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS 0x9
++#define NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK	0x30000000
++#define NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS	0x0
++#define NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS	0x10000000 /* divide source by 2 */
++#define NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS	0x20000000 /* divide source by 8 */
++#define NI_GPCT_INVERT_CLOCK_SRC_BIT		0x80000000
++#define NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(x)	(0x10 + x)
++#define NI_GPCT_RTSI_CLOCK_SRC_BITS(x)		(0x18 + x)
++#define NI_GPCT_PFI_CLOCK_SRC_BITS(x)		(0x20 + x)
++
++/* Possibilities for setting a gate source with
++   INSN_CONFIG_SET_GATE_SRC when using NI general-purpose counters.
++   May be bitwise-or'd with CR_EDGE or CR_INVERT. */
++/* M-series gates */
++#define NI_GPCT_TIMESTAMP_MUX_GATE_SELECT	0x0
++#define NI_GPCT_AI_START2_GATE_SELECT		0x12
++#define NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT	0x13
++#define NI_GPCT_NEXT_OUT_GATE_SELECT		0x14
++#define NI_GPCT_AI_START1_GATE_SELECT		0x1c
++#define NI_GPCT_NEXT_SOURCE_GATE_SELECT		0x1d
++#define NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT	0x1e
++#define NI_GPCT_LOGIC_LOW_GATE_SELECT		0x1f
++/* More gates for 660x */
++#define NI_GPCT_SOURCE_PIN_i_GATE_SELECT	0x100
++#define NI_GPCT_GATE_PIN_i_GATE_SELECT		0x101
++/* More gates for 660x "second gate" */
++#define NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT	0x201
++#define NI_GPCT_SELECTED_GATE_GATE_SELECT	0x21e
++/* M-series "second gate" sources are unknown, we should add them here
++   with an offset of 0x300 when known. */
++#define NI_GPCT_DISABLED_GATE_SELECT		0x8000
++#define NI_GPCT_GATE_PIN_GATE_SELECT(x)	(0x102 + x)
++#define NI_GPCT_RTSI_GATE_SELECT(x)		NI_USUAL_RTSI_SELECT(x)
++#define NI_GPCT_PFI_GATE_SELECT(x)		NI_USUAL_PFI_SELECT(x)
++#define NI_GPCT_UP_DOWN_PIN_GATE_SELECT(x)	(0x202 + x)
++
++/* Possibilities for setting a source with INSN_CONFIG_SET_OTHER_SRC
++   when using NI general-purpose counters. */
++#define NI_GPCT_SOURCE_ENCODER_A 0
++#define NI_GPCT_SOURCE_ENCODER_B 1
++#define NI_GPCT_SOURCE_ENCODER_Z 2
++/* M-series gates */
++/* Still unknown, probably only need NI_GPCT_PFI_OTHER_SELECT */
++#define NI_GPCT_DISABLED_OTHER_SELECT	0x8000
++#define NI_GPCT_PFI_OTHER_SELECT(x) NI_USUAL_PFI_SELECT(x)
++
++/* Start sources for ni general-purpose counters for use with
++   INSN_CONFIG_ARM */
++#define NI_GPCT_ARM_IMMEDIATE		0x0
++/* Start both the counter and the adjacent paired counter
++   simultaneously */
++#define NI_GPCT_ARM_PAIRED_IMMEDIATE	0x1
++/* NI doesn't document bits for selecting hardware arm triggers.  If
++   the NI_GPCT_ARM_UNKNOWN bit is set, we will pass the least significant
++   bits (3 bits for 660x or 5 bits for m-series) through to the
++   hardware. This will at least allow someone to figure out what the bits
++   do later. */
++#define NI_GPCT_ARM_UNKNOWN		0x1000
++
++/* Digital filtering options for ni 660x for use with
++   INSN_CONFIG_FILTER. */
++#define NI_GPCT_FILTER_OFF		0x0
++#define NI_GPCT_FILTER_TIMEBASE_3_SYNC	0x1
++#define NI_GPCT_FILTER_100x_TIMEBASE_1	0x2
++#define NI_GPCT_FILTER_20x_TIMEBASE_1	0x3
++#define NI_GPCT_FILTER_10x_TIMEBASE_1	0x4
++#define NI_GPCT_FILTER_2x_TIMEBASE_1	0x5
++#define NI_GPCT_FILTER_2x_TIMEBASE_3	0x6
++
++/* Master clock sources for ni mio boards and
++   INSN_CONFIG_SET_CLOCK_SRC */
++#define NI_MIO_INTERNAL_CLOCK		0
++#define NI_MIO_RTSI_CLOCK		1
++/* Doesn't work for m-series, use NI_MIO_PLL_RTSI_CLOCK() the
++   NI_MIO_PLL_* sources are m-series only */
++#define NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK 2
++#define NI_MIO_PLL_PXI10_CLOCK		3
++#define NI_MIO_PLL_RTSI0_CLOCK		4
++
++#define NI_MIO_PLL_RTSI_CLOCK(x) (NI_MIO_PLL_RTSI0_CLOCK + (x))
++
++/* Signals which can be routed to an NI RTSI pin with
++   INSN_CONFIG_SET_ROUTING. The numbers assigned are not arbitrary, they
++   correspond to the bits required to program the board. */
++#define NI_RTSI_OUTPUT_ADR_START1	0
++#define NI_RTSI_OUTPUT_ADR_START2	1
++#define NI_RTSI_OUTPUT_SCLKG		2
++#define NI_RTSI_OUTPUT_DACUPDN		3
++#define NI_RTSI_OUTPUT_DA_START1	4
++#define NI_RTSI_OUTPUT_G_SRC0		5
++#define NI_RTSI_OUTPUT_G_GATE0		6
++#define NI_RTSI_OUTPUT_RGOUT0		7
++#define NI_RTSI_OUTPUT_RTSI_BRD_0	8
++/* Pre-m-series always have RTSI clock on line 7 */
++#define NI_RTSI_OUTPUT_RTSI_OSC		12
++
++#define NI_RTSI_OUTPUT_RTSI_BRD(x) (NI_RTSI_OUTPUT_RTSI_BRD_0 + (x))
++
++
++int a4l_ni_tio_rinsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn);
++int a4l_ni_tio_winsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn);
++int a4l_ni_tio_insn_config(struct ni_gpct *counter, struct a4l_kernel_instruction *insn);
++void a4l_ni_tio_init_counter(struct ni_gpct *counter);
++
++struct ni_gpct_device *a4l_ni_gpct_device_construct(struct a4l_device * dev,
++	void (*write_register) (struct ni_gpct * counter, unsigned int bits,
++		enum ni_gpct_register reg),
++	unsigned int (*read_register) (struct ni_gpct * counter,
++		enum ni_gpct_register reg), enum ni_gpct_variant variant,
++	unsigned int num_counters);
++void a4l_ni_gpct_device_destroy(struct ni_gpct_device *counter_dev);
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++extern struct a4l_cmd_desc a4l_ni_tio_cmd_mask;
++
++int a4l_ni_tio_input_inttrig(struct ni_gpct *counter, lsampl_t trignum);
++int a4l_ni_tio_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd);
++int a4l_ni_tio_cmdtest(struct ni_gpct *counter, struct a4l_cmd_desc *cmd);
++int a4l_ni_tio_cancel(struct ni_gpct *counter);
++
++void a4l_ni_tio_handle_interrupt(struct ni_gpct *counter, struct a4l_device *dev);
++void a4l_ni_tio_set_mite_channel(struct ni_gpct *counter,
++			     struct mite_channel *mite_chan);
++void a4l_ni_tio_acknowledge_and_confirm(struct ni_gpct *counter,
++				    int *gate_error,
++				    int *tc_error,
++				    int *perm_stale_data, int *stale_data);
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++#endif /* !__ANALOGY_NI_TIO_H__ */
+--- linux/drivers/xenomai/analogy/national_instruments/ni_670x.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_670x.c	2021-04-07 16:01:27.844633285 +0800
+@@ -0,0 +1,443 @@
++/*
++    comedi/drivers/ni_670x.c
++    Hardware driver for NI 670x devices
++
++    COMEDI - Linux Control and Measurement Device Interface
++    Copyright (C) 1997-2001 David A. Schleef <ds@schleef.org>
++
++    This program is free software; you can redistribute it and/or modify
++    it under the terms of the GNU General Public License as published by
++    the Free Software Foundation; either version 2 of the License, or
++    (at your option) any later version.
++
++    This program is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++    GNU General Public License for more details.
++
++    You should have received a copy of the GNU General Public License
++    along with this program; if not, write to the Free Software
++    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++
++*/
++/*
++Driver: ni_670x
++Description: National Instruments 670x
++Author: Bart Joris <bjoris@advalvas.be>
++Updated: Wed, 11 Dec 2002 18:25:35 -0800
++Devices: [National Instruments] PCI-6703 (ni_670x), PCI-6704
++Status: unknown
++
++Commands are not supported.
++*/
++
++/*
++	Bart Joris <bjoris@advalvas.be> Last updated on 20/08/2001
++
++	Manuals:
++
++	322110a.pdf	PCI/PXI-6704 User Manual
++	322110b.pdf	PCI/PXI-6703/6704 User Manual
++*/
++
++/*
++ * Integration with Xenomai/Analogy layer based on the
++ * comedi driver. Adaptation made by
++ *   Julien Delange <julien.delange@esa.int>
++ */
++
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <rtdm/analogy/device.h>
++
++#include "../intel/8255.h"
++#include "ni_mio.h"
++#include "mite.h"
++
++#define PCIMIO_IRQ_POLARITY 1
++
++#define  AO_VALUE_OFFSET         0x00
++#define  AO_CHAN_OFFSET          0x0c
++#define  AO_STATUS_OFFSET        0x10
++#define  AO_CONTROL_OFFSET       0x10
++#define  DIO_PORT0_DIR_OFFSET    0x20
++#define  DIO_PORT0_DATA_OFFSET   0x24
++#define  DIO_PORT1_DIR_OFFSET    0x28
++#define  DIO_PORT1_DATA_OFFSET   0x2c
++#define  MISC_STATUS_OFFSET      0x14
++#define  MISC_CONTROL_OFFSET     0x14
++
++/* Board description*/
++
++struct ni_670x_board {
++	unsigned short device_id;
++	const char *name;
++	unsigned short ao_chans;
++	unsigned short ao_bits;
++};
++
++#define thisboard ((struct ni_670x_board *)dev->board_ptr)
++
++struct ni_670x_private {
++	struct mite_struct *mite;
++	int boardtype;
++	int dio;
++	unsigned int ao_readback[32];
++
++	/*
++	 * Added when porting to xenomai
++	 */
++	int irq_polarity;
++	int irq_pin;
++	int irq;
++	struct ni_670x_board *board_ptr;
++	/*
++	 * END OF ADDED when porting to xenomai
++	 */
++};
++
++struct ni_670x_subd_priv {
++	int io_bits;
++	unsigned int state;
++	uint16_t readback[2];
++	uint16_t config;
++	void* counter;
++};
++
++static int ni_670x_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn);
++static int ni_670x_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn);
++static int ni_670x_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn);
++static int ni_670x_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn);
++
++static struct a4l_channels_desc ni_670x_desc_dio = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 8,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 1},
++	},
++};
++
++static struct a4l_channels_desc ni_670x_desc_ao = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = 0, /* initialized later according to the board found */
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, 16},
++	},
++};
++
++
++static struct a4l_rngtab range_0_20mA = { 1, {RANGE_mA(0, 20)} };
++static struct a4l_rngtab rng_bipolar10 = { 1, {RANGE_V(-10, 10) }};
++
++struct a4l_rngtab *range_table_list[32] = {
++	&rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10,
++	&rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10,
++	&rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10,
++	&rng_bipolar10, &rng_bipolar10, &rng_bipolar10, &rng_bipolar10,
++	&range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA,
++	&range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA,
++	&range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA,
++	&range_0_20mA, &range_0_20mA, &range_0_20mA, &range_0_20mA};
++
++static A4L_RNGDESC(32) ni670x_ao_desc;
++
++static void setup_subd_ao(struct a4l_subdevice *subd)
++{
++	int i;
++	int nchans;
++
++	nchans = ((struct ni_670x_private*)(subd->dev->priv))->board_ptr->ao_chans;
++	subd->flags                = A4L_SUBD_AO;
++	subd->chan_desc            = &ni_670x_desc_ao;
++	subd->chan_desc->length    = nchans;
++	if (nchans == 32) {
++
++		subd->rng_desc = (struct a4l_rngdesc*) &ni670x_ao_desc;
++		subd->rng_desc->mode = A4L_RNG_PERCHAN_RNGDESC;
++		for (i = 0 ; i < 16 ; i++) {
++			subd->rng_desc->rngtabs[i] =&rng_bipolar10;
++			subd->rng_desc->rngtabs[16+i] =&range_0_20mA;
++		}
++	} else
++		subd->rng_desc = &a4l_range_bipolar10;
++
++	subd->insn_write = &ni_670x_ao_winsn;
++	subd->insn_read = &ni_670x_ao_rinsn;
++}
++
++static void setup_subd_dio(struct a4l_subdevice *s)
++{
++	/* Digital i/o subdevice */
++	s->flags = A4L_SUBD_DIO;
++	s->chan_desc = &ni_670x_desc_dio;
++	s->rng_desc = &range_digital;
++	s->insn_bits = ni_670x_dio_insn_bits;
++	s->insn_config = ni_670x_dio_insn_config;
++}
++
++struct setup_subd {
++	void (*setup_func) (struct a4l_subdevice *);
++	int sizeof_priv;
++};
++
++static struct setup_subd setup_subds[2] = {
++	{
++		.setup_func = setup_subd_ao,
++		.sizeof_priv = sizeof(struct ni_670x_subd_priv),
++	},
++	{
++		.setup_func = setup_subd_dio,
++		.sizeof_priv = sizeof(struct ni_670x_subd_priv),
++	},
++};
++
++static const struct ni_670x_board ni_670x_boards[] = {
++	{
++		.device_id = 0x2c90,
++		.name = "PCI-6703",
++		.ao_chans = 16,
++		.ao_bits = 16,
++	},
++	{
++		.device_id = 0x1920,
++		.name = "PXI-6704",
++		.ao_chans = 32,
++		.ao_bits = 16,
++	},
++	{
++		.device_id = 0x1290,
++		.name = "PCI-6704",
++		.ao_chans = 32,
++		.ao_bits = 16,
++	 },
++};
++
++#define n_ni_670x_boards ((sizeof(ni_670x_boards)/sizeof(ni_670x_boards[0])))
++
++static const struct pci_device_id ni_670x_pci_table[] = {
++	{PCI_DEVICE(PCI_VENDOR_ID_NI, 0x2c90)},
++	{PCI_DEVICE(PCI_VENDOR_ID_NI, 0x1920)},
++	{0}
++};
++
++MODULE_DEVICE_TABLE(pci, ni_670x_pci_table);
++
++#define devpriv ((struct ni_670x_private *)dev->priv)
++
++static inline struct ni_670x_private *private(struct a4l_device *dev)
++{
++	return (struct ni_670x_private*) dev->priv;
++}
++
++
++static int ni_670x_attach (struct a4l_device *dev, a4l_lnkdesc_t *arg);
++static int ni_670x_detach(struct a4l_device *dev);
++
++static struct a4l_driver ni_670x_drv = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_ni_670x",
++	.driver_name = "ni_670x",
++	.attach = ni_670x_attach,
++	.detach = ni_670x_detach,
++	.privdata_size = sizeof(struct ni_670x_private),
++};
++
++static int __init driver_ni_670x_init_module(void)
++{
++	return a4l_register_drv (&ni_670x_drv);
++}
++
++static void __exit driver_ni_670x_cleanup_module(void)
++{
++	a4l_unregister_drv (&ni_670x_drv);
++}
++
++module_init(driver_ni_670x_init_module);
++module_exit(driver_ni_670x_cleanup_module);
++
++static int ni_670x_attach (struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	int ret, bus, slot, i, irq;
++	struct mite_struct *mite;
++	struct ni_670x_board* board = NULL;
++	int err;
++
++	if(arg->opts == NULL || arg->opts_size == 0)
++		bus = slot = 0;
++	else {
++		bus = arg->opts_size >= sizeof(unsigned long) ?
++			((unsigned long *)arg->opts)[0] : 0;
++		slot = arg->opts_size >= sizeof(unsigned long) * 2 ?
++			((unsigned long *)arg->opts)[1] : 0;
++	}
++
++	a4l_info(dev, "ni670x attach procedure started(bus=%d/slot=%d)...\n",
++		 bus, slot);
++
++	mite = NULL;
++
++	for(i = 0; i <  n_ni_670x_boards && mite == NULL; i++) {
++		mite = a4l_mite_find_device(bus,
++					    slot, ni_670x_boards[i].device_id);
++		board = (struct ni_670x_board*) &ni_670x_boards[i];
++	}
++
++	if(mite == NULL) {
++		a4l_err(dev, "%s: cannot find the MITE device\n", __FUNCTION__);
++		return -ENOENT;
++	}
++
++	a4l_info(dev, "Found device %d %s\n", i, ni_670x_boards[i].name);
++
++	devpriv->irq_polarity = PCIMIO_IRQ_POLARITY;
++	devpriv->irq_pin = 0;
++
++	devpriv->mite = mite;
++	devpriv->board_ptr = board;
++
++	ret = a4l_mite_setup(devpriv->mite, 0);
++	if (ret < 0) {
++		a4l_err(dev, "%s: error setting up mite\n", __FUNCTION__);
++		return ret;
++	}
++
++	irq = mite_irq(devpriv->mite);
++	devpriv->irq = irq;
++
++	a4l_info(dev, "found %s board\n", board->name);
++
++	for (i = 0; i < 2; i++) {
++		struct a4l_subdevice *subd =
++			a4l_alloc_subd(setup_subds[i].sizeof_priv, NULL);
++
++		if (subd == NULL) {
++			a4l_err(dev,
++				"%s: cannot allocate subdevice\n",
++				__FUNCTION__);
++			return -ENOMEM;
++		}
++
++		err = a4l_add_subd(dev, subd);
++		if (err != i) {
++			a4l_err(dev,
++				"%s: cannot add subdevice\n",
++				__FUNCTION__);
++			return err;
++		}
++
++		setup_subds[i].setup_func (subd);
++	}
++
++	/* Config of misc registers */
++	writel(0x10, devpriv->mite->daq_io_addr + MISC_CONTROL_OFFSET);
++	/* Config of ao registers */
++	writel(0x00, devpriv->mite->daq_io_addr + AO_CONTROL_OFFSET);
++
++	a4l_info(dev, "ni670x attached\n");
++
++	return 0;
++}
++
++static int ni_670x_detach(struct a4l_device *dev)
++{
++	a4l_info(dev, "ni670x detach procedure started...\n");
++
++	if(dev->priv != NULL && devpriv->mite != NULL)
++		a4l_mite_unsetup(devpriv->mite);
++
++	a4l_info(dev, "ni670x detach procedure succeeded...\n");
++
++	return 0;
++}
++
++
++static int ni_670x_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int *)insn->data;
++	int chan = CR_CHAN(insn->chan_desc);
++	struct ni_670x_subd_priv *subdpriv =
++		(struct ni_670x_subd_priv *)subd->priv;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		subdpriv->io_bits |= 1 << chan;
++		break;
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		subdpriv->io_bits &= ~(1 << chan);
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (subdpriv->io_bits & (1 << chan)) ?
++			A4L_OUTPUT : A4L_INPUT;
++		return 0;
++		break;
++	default:
++		return -EINVAL;
++		break;
++	}
++
++	writel(subdpriv->io_bits,
++	       devpriv->mite->daq_io_addr + DIO_PORT0_DIR_OFFSET);
++
++	return 0;
++}
++
++static int ni_670x_ao_winsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	int i;
++	unsigned int tmp;
++	unsigned int* dtmp;
++	int chan;
++	dtmp = (unsigned int*)insn->data;
++	chan = CR_CHAN(insn->chan_desc);
++
++	/* Channel number mapping :
++
++	   NI 6703/ NI 6704     | NI 6704 Only
++	   ----------------------------------------------------
++	   vch(0)       :       0       | ich(16)       :       1
++	   vch(1)       :       2       | ich(17)       :       3
++	   .    :       .       |   .                   .
++	   .    :       .       |   .                   .
++	   .    :       .       |   .                   .
++	   vch(15)      :       30      | ich(31)       :       31 */
++
++	for (i = 0; i < insn->data_size / sizeof(unsigned int); i++) {
++
++		tmp = dtmp[i];
++
++		/* First write in channel register which channel to use */
++		writel(((chan & 15) << 1) | ((chan & 16) >> 4),
++		       private (subd->dev)->mite->daq_io_addr + AO_CHAN_OFFSET);
++
++		/* write channel value */
++		writel(dtmp[i],
++		       private(subd->dev)->mite->daq_io_addr + AO_VALUE_OFFSET);
++		private(subd->dev)->ao_readback[chan] = tmp;
++	}
++
++   return 0;
++}
++
++static int ni_670x_ao_rinsn(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	int i;
++	unsigned int* dtmp;
++	int chan = CR_CHAN(insn->chan_desc);
++
++	dtmp = (unsigned int*)insn->data;
++
++	for (i = 0; i < insn->data_size / sizeof(unsigned int); i++)
++		dtmp[i] = private(subd->dev)->ao_readback[chan];
++
++	return 0;
++}
++
++
++static int ni_670x_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	return -ENOSYS;
++}
++
++MODULE_DESCRIPTION("Analogy driver for NI670x series cards");
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/analogy/national_instruments/ni_mio.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_mio.h	2021-04-07 16:01:27.839633292 +0800
+@@ -0,0 +1,122 @@
++/*
++ * Hardware driver for NI Mite PCI interface chip
++ * Copyright (C) 1999 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef __ANALOGY_NI_MIO_H__
++#define __ANALOGY_NI_MIO_H__
++
++/* Debug stuff */
++
++#ifdef CONFIG_DEBUG_MIO
++#define MDPRINTK(fmt, args...) rtdm_printk(format, ##args)
++#else /* !CONFIG_DEBUG_MIO */
++#define MDPRINTK(fmt, args...)
++#endif /* CONFIG_DEBUG_MIO */
++
++/* Subdevice related defines */
++
++#define AIMODE_NONE		0
++#define AIMODE_HALF_FULL	1
++#define AIMODE_SCAN		2
++#define AIMODE_SAMPLE		3
++
++#define NI_AI_SUBDEV		0
++#define NI_AO_SUBDEV		1
++#define NI_DIO_SUBDEV		2
++#define NI_8255_DIO_SUBDEV	3
++#define NI_UNUSED_SUBDEV	4
++#define NI_CALIBRATION_SUBDEV	5
++#define NI_EEPROM_SUBDEV	6
++#define NI_PFI_DIO_SUBDEV	7
++#define NI_CS5529_CALIBRATION_SUBDEV 8
++#define NI_SERIAL_SUBDEV	9
++#define NI_RTSI_SUBDEV		10
++#define NI_GPCT0_SUBDEV		11
++#define NI_GPCT1_SUBDEV		12
++#define NI_FREQ_OUT_SUBDEV	13
++#define NI_NUM_SUBDEVICES	14
++
++#define NI_GPCT_SUBDEV(x)	((x == 1) ? NI_GPCT1_SUBDEV : NI_GPCT0_SUBDEV)
++
++#define TIMEBASE_1_NS		50
++#define TIMEBASE_2_NS		10000
++
++#define SERIAL_DISABLED		0
++#define SERIAL_600NS		600
++#define SERIAL_1_2US		1200
++#define SERIAL_10US		10000
++
++/* PFI digital filtering options for ni m-series for use with
++   INSN_CONFIG_FILTER. */
++#define NI_PFI_FILTER_OFF	0x0
++#define NI_PFI_FILTER_125ns	0x1
++#define NI_PFI_FILTER_6425ns	0x2
++#define NI_PFI_FILTER_2550us	0x3
++
++/* Signals which can be routed to an NI PFI pin on an m-series board
++   with INSN_CONFIG_SET_ROUTING. These numbers are also returned by
++   INSN_CONFIG_GET_ROUTING on pre-m-series boards, even though their
++   routing cannot be changed. The numbers assigned are not arbitrary,
++   they correspond to the bits required to program the board. */
++#define NI_PFI_OUTPUT_PFI_DEFAULT	0
++#define NI_PFI_OUTPUT_AI_START1		1
++#define NI_PFI_OUTPUT_AI_START2		2
++#define NI_PFI_OUTPUT_AI_CONVERT	3
++#define NI_PFI_OUTPUT_G_SRC1		4
++#define NI_PFI_OUTPUT_G_GATE1		5
++#define NI_PFI_OUTPUT_AO_UPDATE_N	6
++#define NI_PFI_OUTPUT_AO_START1		7
++#define NI_PFI_OUTPUT_AI_START_PULSE	8
++#define NI_PFI_OUTPUT_G_SRC0		9
++#define NI_PFI_OUTPUT_G_GATE0		10
++#define NI_PFI_OUTPUT_EXT_STROBE	11
++#define NI_PFI_OUTPUT_AI_EXT_MUX_CLK	12
++#define NI_PFI_OUTPUT_GOUT0		13
++#define NI_PFI_OUTPUT_GOUT1		14
++#define NI_PFI_OUTPUT_FREQ_OUT		15
++#define NI_PFI_OUTPUT_PFI_DO		16
++#define NI_PFI_OUTPUT_I_ATRIG		17
++#define NI_PFI_OUTPUT_RTSI0		18
++#define NI_PFI_OUTPUT_PXI_STAR_TRIGGER_IN 26
++#define NI_PFI_OUTPUT_SCXI_TRIG1	27
++#define NI_PFI_OUTPUT_DIO_CHANGE_DETECT_RTSI 28
++#define NI_PFI_OUTPUT_CDI_SAMPLE	29
++#define NI_PFI_OUTPUT_CDO_UPDATE	30
++
++static inline unsigned int NI_PFI_OUTPUT_RTSI(unsigned rtsi_channel) {
++	return NI_PFI_OUTPUT_RTSI0 + rtsi_channel;
++}
++
++/* Ranges declarations */
++
++extern struct a4l_rngdesc a4l_range_ni_E_ai;
++extern struct a4l_rngdesc a4l_range_ni_E_ai_limited;
++extern struct a4l_rngdesc a4l_range_ni_E_ai_limited14;
++extern struct a4l_rngdesc a4l_range_ni_E_ai_bipolar4;
++extern struct a4l_rngdesc a4l_range_ni_E_ai_611x;
++extern struct a4l_rngdesc range_ni_E_ai_622x;
++extern struct a4l_rngdesc range_ni_E_ai_628x;
++extern struct a4l_rngdesc a4l_range_ni_S_ai_6143;
++extern struct a4l_rngdesc a4l_range_ni_E_ao_ext;
++
++/* Misc functions declarations */
++
++int a4l_ni_E_interrupt(unsigned int irq, void *d);
++int a4l_ni_E_init(struct a4l_device *dev);
++
++
++#endif /* !__ANALOGY_NI_MIO_H__ */
+--- linux/drivers/xenomai/analogy/national_instruments/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/Makefile	2021-04-07 16:01:27.834633299 +0800
+@@ -0,0 +1,16 @@
++
++ccflags-y += -Idrivers/xenomai/analogy
++
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) += analogy_ni_mite.o
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_TIO) += analogy_ni_tio.o
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_MIO) += analogy_ni_mio.o
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_PCIMIO) += analogy_ni_pcimio.o
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_670x) += analogy_ni_670x.o
++obj-$(CONFIG_XENO_DRIVERS_ANALOGY_NI_660x) += analogy_ni_660x.o
++
++analogy_ni_mite-y := mite.o
++analogy_ni_tio-y := tio_common.o
++analogy_ni_mio-y := mio_common.o
++analogy_ni_pcimio-y := pcimio.o
++analogy_ni_670x-y := ni_670x.o
++analogy_ni_660x-y := ni_660x.o
+--- linux/drivers/xenomai/analogy/national_instruments/ni_660x.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_660x.c	2021-04-07 16:01:27.830633305 +0800
+@@ -0,0 +1,1481 @@
++/*
++ * comedi/drivers/ni_660x.c
++ * Hardware driver for NI 660x devices
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++ * Driver: ni_660x
++ * Description: National Instruments 660x counter/timer boards
++ * Devices:
++ * [National Instruments] PCI-6601 (ni_660x), PCI-6602, PXI-6602,
++ * PXI-6608
++ * Author: J.P. Mellor <jpmellor@rose-hulman.edu>,
++ * Herman.Bruyninckx@mech.kuleuven.ac.be,
++ * Wim.Meeussen@mech.kuleuven.ac.be,
++ * Klaas.Gadeyne@mech.kuleuven.ac.be,
++ * Frank Mori Hess <fmhess@users.sourceforge.net>
++ * Updated: Thu Oct 18 12:56:06 EDT 2007
++ * Status: experimental
++
++ * Encoders work.  PulseGeneration (both single pulse and pulse train)
++ * works. Buffered commands work for input but not output.
++
++ * References:
++ * DAQ 660x Register-Level Programmer Manual  (NI 370505A-01)
++ * DAQ 6601/6602 User Manual (NI 322137B-01)
++ */
++
++/*
++ * Integration with Xenomai/Analogy layer based on the
++ * comedi driver. Adaptation made by
++ *   Julien Delange <julien.delange@esa.int>
++ */
++
++#include <linux/interrupt.h>
++
++#include <linux/module.h>
++#include <rtdm/analogy/device.h>
++
++#include "../intel/8255.h"
++#include "ni_stc.h"
++#include "ni_mio.h"
++#include "ni_tio.h"
++#include "mite.h"
++
++enum io_direction {
++       DIRECTION_INPUT = 0,
++       DIRECTION_OUTPUT = 1,
++       DIRECTION_OPENDRAIN = 2
++};
++
++
++enum ni_660x_constants {
++	min_counter_pfi_chan = 8,
++	max_dio_pfi_chan = 31,
++	counters_per_chip = 4
++};
++
++struct ni_660x_subd_priv {
++   int                     io_bits;
++   unsigned int            state;
++   uint16_t                readback[2];
++   uint16_t                config;
++   struct ni_gpct*         counter;
++};
++
++#define NUM_PFI_CHANNELS 40
++/* Really there are only up to 3 dma channels, but the register layout
++   allows for 4 */
++#define MAX_DMA_CHANNEL 4
++
++static struct a4l_channels_desc chandesc_ni660x = {
++	.mode = A4L_CHAN_GLOBAL_CHANDESC,
++	.length = NUM_PFI_CHANNELS,
++	.chans = {
++		{A4L_CHAN_AREF_GROUND, sizeof(sampl_t)},
++	},
++};
++
++#define subdev_priv ((struct ni_660x_subd_priv*)s->priv)
++
++/* See Register-Level Programmer Manual page 3.1 */
++enum NI_660x_Register {
++	G0InterruptAcknowledge,
++	G0StatusRegister,
++	G1InterruptAcknowledge,
++	G1StatusRegister,
++	G01StatusRegister,
++	G0CommandRegister,
++	STCDIOParallelInput,
++	G1CommandRegister,
++	G0HWSaveRegister,
++	G1HWSaveRegister,
++	STCDIOOutput,
++	STCDIOControl,
++	G0SWSaveRegister,
++	G1SWSaveRegister,
++	G0ModeRegister,
++	G01JointStatus1Register,
++	G1ModeRegister,
++	STCDIOSerialInput,
++	G0LoadARegister,
++	G01JointStatus2Register,
++	G0LoadBRegister,
++	G1LoadARegister,
++	G1LoadBRegister,
++	G0InputSelectRegister,
++	G1InputSelectRegister,
++	G0AutoincrementRegister,
++	G1AutoincrementRegister,
++	G01JointResetRegister,
++	G0InterruptEnable,
++	G1InterruptEnable,
++	G0CountingModeRegister,
++	G1CountingModeRegister,
++	G0SecondGateRegister,
++	G1SecondGateRegister,
++	G0DMAConfigRegister,
++	G0DMAStatusRegister,
++	G1DMAConfigRegister,
++	G1DMAStatusRegister,
++	G2InterruptAcknowledge,
++	G2StatusRegister,
++	G3InterruptAcknowledge,
++	G3StatusRegister,
++	G23StatusRegister,
++	G2CommandRegister,
++	G3CommandRegister,
++	G2HWSaveRegister,
++	G3HWSaveRegister,
++	G2SWSaveRegister,
++	G3SWSaveRegister,
++	G2ModeRegister,
++	G23JointStatus1Register,
++	G3ModeRegister,
++	G2LoadARegister,
++	G23JointStatus2Register,
++	G2LoadBRegister,
++	G3LoadARegister,
++	G3LoadBRegister,
++	G2InputSelectRegister,
++	G3InputSelectRegister,
++	G2AutoincrementRegister,
++	G3AutoincrementRegister,
++	G23JointResetRegister,
++	G2InterruptEnable,
++	G3InterruptEnable,
++	G2CountingModeRegister,
++	G3CountingModeRegister,
++	G3SecondGateRegister,
++	G2SecondGateRegister,
++	G2DMAConfigRegister,
++	G2DMAStatusRegister,
++	G3DMAConfigRegister,
++	G3DMAStatusRegister,
++	DIO32Input,
++	DIO32Output,
++	ClockConfigRegister,
++	GlobalInterruptStatusRegister,
++	DMAConfigRegister,
++	GlobalInterruptConfigRegister,
++	IOConfigReg0_1,
++	IOConfigReg2_3,
++	IOConfigReg4_5,
++	IOConfigReg6_7,
++	IOConfigReg8_9,
++	IOConfigReg10_11,
++	IOConfigReg12_13,
++	IOConfigReg14_15,
++	IOConfigReg16_17,
++	IOConfigReg18_19,
++	IOConfigReg20_21,
++	IOConfigReg22_23,
++	IOConfigReg24_25,
++	IOConfigReg26_27,
++	IOConfigReg28_29,
++	IOConfigReg30_31,
++	IOConfigReg32_33,
++	IOConfigReg34_35,
++	IOConfigReg36_37,
++	IOConfigReg38_39,
++	NumRegisters,
++};
++
++static inline unsigned IOConfigReg(unsigned pfi_channel)
++{
++	unsigned reg = IOConfigReg0_1 + pfi_channel / 2;
++	BUG_ON(reg > IOConfigReg38_39);
++	return reg;
++}
++
++enum ni_660x_register_width {
++	DATA_1B,
++	DATA_2B,
++	DATA_4B
++};
++
++enum ni_660x_register_direction {
++	NI_660x_READ,
++	NI_660x_WRITE,
++	NI_660x_READ_WRITE
++};
++
++enum ni_660x_pfi_output_select {
++	pfi_output_select_high_Z = 0,
++	pfi_output_select_counter = 1,
++	pfi_output_select_do = 2,
++	num_pfi_output_selects
++};
++
++enum ni_660x_subdevices {
++	NI_660X_DIO_SUBDEV = 1,
++	NI_660X_GPCT_SUBDEV_0 = 2
++};
++
++static inline unsigned NI_660X_GPCT_SUBDEV(unsigned index)
++{
++	return NI_660X_GPCT_SUBDEV_0 + index;
++}
++
++struct NI_660xRegisterData {
++
++	const char *name; /*  Register Name */
++	int offset; /*  Offset from base address from GPCT chip */
++	enum ni_660x_register_direction direction;
++	enum ni_660x_register_width size; /*  1 byte, 2 bytes, or 4 bytes */
++};
++
++static const struct NI_660xRegisterData registerData[NumRegisters] = {
++	{"G0 Interrupt Acknowledge", 0x004, NI_660x_WRITE, DATA_2B},
++	{"G0 Status Register", 0x004, NI_660x_READ, DATA_2B},
++	{"G1 Interrupt Acknowledge", 0x006, NI_660x_WRITE, DATA_2B},
++	{"G1 Status Register", 0x006, NI_660x_READ, DATA_2B},
++	{"G01 Status Register ", 0x008, NI_660x_READ, DATA_2B},
++	{"G0 Command Register", 0x00C, NI_660x_WRITE, DATA_2B},
++	{"STC DIO Parallel Input", 0x00E, NI_660x_READ, DATA_2B},
++	{"G1 Command Register", 0x00E, NI_660x_WRITE, DATA_2B},
++	{"G0 HW Save Register", 0x010, NI_660x_READ, DATA_4B},
++	{"G1 HW Save Register", 0x014, NI_660x_READ, DATA_4B},
++	{"STC DIO Output", 0x014, NI_660x_WRITE, DATA_2B},
++	{"STC DIO Control", 0x016, NI_660x_WRITE, DATA_2B},
++	{"G0 SW Save Register", 0x018, NI_660x_READ, DATA_4B},
++	{"G1 SW Save Register", 0x01C, NI_660x_READ, DATA_4B},
++	{"G0 Mode Register", 0x034, NI_660x_WRITE, DATA_2B},
++	{"G01 Joint Status 1 Register", 0x036, NI_660x_READ, DATA_2B},
++	{"G1 Mode Register", 0x036, NI_660x_WRITE, DATA_2B},
++	{"STC DIO Serial Input", 0x038, NI_660x_READ, DATA_2B},
++	{"G0 Load A Register", 0x038, NI_660x_WRITE, DATA_4B},
++	{"G01 Joint Status 2 Register", 0x03A, NI_660x_READ, DATA_2B},
++	{"G0 Load B Register", 0x03C, NI_660x_WRITE, DATA_4B},
++	{"G1 Load A Register", 0x040, NI_660x_WRITE, DATA_4B},
++	{"G1 Load B Register", 0x044, NI_660x_WRITE, DATA_4B},
++	{"G0 Input Select Register", 0x048, NI_660x_WRITE, DATA_2B},
++	{"G1 Input Select Register", 0x04A, NI_660x_WRITE, DATA_2B},
++	{"G0 Autoincrement Register", 0x088, NI_660x_WRITE, DATA_2B},
++	{"G1 Autoincrement Register", 0x08A, NI_660x_WRITE, DATA_2B},
++	{"G01 Joint Reset Register", 0x090, NI_660x_WRITE, DATA_2B},
++	{"G0 Interrupt Enable", 0x092, NI_660x_WRITE, DATA_2B},
++	{"G1 Interrupt Enable", 0x096, NI_660x_WRITE, DATA_2B},
++	{"G0 Counting Mode Register", 0x0B0, NI_660x_WRITE, DATA_2B},
++	{"G1 Counting Mode Register", 0x0B2, NI_660x_WRITE, DATA_2B},
++	{"G0 Second Gate Register", 0x0B4, NI_660x_WRITE, DATA_2B},
++	{"G1 Second Gate Register", 0x0B6, NI_660x_WRITE, DATA_2B},
++	{"G0 DMA Config Register", 0x0B8, NI_660x_WRITE, DATA_2B},
++	{"G0 DMA Status Register", 0x0B8, NI_660x_READ, DATA_2B},
++	{"G1 DMA Config Register", 0x0BA, NI_660x_WRITE, DATA_2B},
++	{"G1 DMA Status Register", 0x0BA, NI_660x_READ, DATA_2B},
++	{"G2 Interrupt Acknowledge", 0x104, NI_660x_WRITE, DATA_2B},
++	{"G2 Status Register", 0x104, NI_660x_READ, DATA_2B},
++	{"G3 Interrupt Acknowledge", 0x106, NI_660x_WRITE, DATA_2B},
++	{"G3 Status Register", 0x106, NI_660x_READ, DATA_2B},
++	{"G23 Status Register", 0x108, NI_660x_READ, DATA_2B},
++	{"G2 Command Register", 0x10C, NI_660x_WRITE, DATA_2B},
++	{"G3 Command Register", 0x10E, NI_660x_WRITE, DATA_2B},
++	{"G2 HW Save Register", 0x110, NI_660x_READ, DATA_4B},
++	{"G3 HW Save Register", 0x114, NI_660x_READ, DATA_4B},
++	{"G2 SW Save Register", 0x118, NI_660x_READ, DATA_4B},
++	{"G3 SW Save Register", 0x11C, NI_660x_READ, DATA_4B},
++	{"G2 Mode Register", 0x134, NI_660x_WRITE, DATA_2B},
++	{"G23 Joint Status 1 Register", 0x136, NI_660x_READ, DATA_2B},
++	{"G3 Mode Register", 0x136, NI_660x_WRITE, DATA_2B},
++	{"G2 Load A Register", 0x138, NI_660x_WRITE, DATA_4B},
++	{"G23 Joint Status 2 Register", 0x13A, NI_660x_READ, DATA_2B},
++	{"G2 Load B Register", 0x13C, NI_660x_WRITE, DATA_4B},
++	{"G3 Load A Register", 0x140, NI_660x_WRITE, DATA_4B},
++	{"G3 Load B Register", 0x144, NI_660x_WRITE, DATA_4B},
++	{"G2 Input Select Register", 0x148, NI_660x_WRITE, DATA_2B},
++	{"G3 Input Select Register", 0x14A, NI_660x_WRITE, DATA_2B},
++	{"G2 Autoincrement Register", 0x188, NI_660x_WRITE, DATA_2B},
++	{"G3 Autoincrement Register", 0x18A, NI_660x_WRITE, DATA_2B},
++	{"G23 Joint Reset Register", 0x190, NI_660x_WRITE, DATA_2B},
++	{"G2 Interrupt Enable", 0x192, NI_660x_WRITE, DATA_2B},
++	{"G3 Interrupt Enable", 0x196, NI_660x_WRITE, DATA_2B},
++	{"G2 Counting Mode Register", 0x1B0, NI_660x_WRITE, DATA_2B},
++	{"G3 Counting Mode Register", 0x1B2, NI_660x_WRITE, DATA_2B},
++	{"G3 Second Gate Register", 0x1B6, NI_660x_WRITE, DATA_2B},
++	{"G2 Second Gate Register", 0x1B4, NI_660x_WRITE, DATA_2B},
++	{"G2 DMA Config Register", 0x1B8, NI_660x_WRITE, DATA_2B},
++	{"G2 DMA Status Register", 0x1B8, NI_660x_READ, DATA_2B},
++	{"G3 DMA Config Register", 0x1BA, NI_660x_WRITE, DATA_2B},
++	{"G3 DMA Status Register", 0x1BA, NI_660x_READ, DATA_2B},
++	{"32 bit Digital Input", 0x414, NI_660x_READ, DATA_4B},
++	{"32 bit Digital Output", 0x510, NI_660x_WRITE, DATA_4B},
++	{"Clock Config Register", 0x73C, NI_660x_WRITE, DATA_4B},
++	{"Global Interrupt Status Register", 0x754, NI_660x_READ, DATA_4B},
++	{"DMA Configuration Register", 0x76C, NI_660x_WRITE, DATA_4B},
++	{"Global Interrupt Config Register", 0x770, NI_660x_WRITE, DATA_4B},
++	{"IO Config Register 0-1", 0x77C, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 2-3", 0x77E, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 4-5", 0x780, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 6-7", 0x782, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 8-9", 0x784, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 10-11", 0x786, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 12-13", 0x788, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 14-15", 0x78A, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 16-17", 0x78C, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 18-19", 0x78E, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 20-21", 0x790, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 22-23", 0x792, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 24-25", 0x794, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 26-27", 0x796, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 28-29", 0x798, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 30-31", 0x79A, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 32-33", 0x79C, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 34-35", 0x79E, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 36-37", 0x7A0, NI_660x_READ_WRITE, DATA_2B},
++	{"IO Config Register 38-39", 0x7A2, NI_660x_READ_WRITE, DATA_2B}
++};
++
++/* kind of ENABLE for the second counter */
++enum clock_config_register_bits {
++	CounterSwap = 0x1 << 21
++};
++
++/* ioconfigreg */
++static inline unsigned ioconfig_bitshift(unsigned pfi_channel)
++{
++	if (pfi_channel % 2)
++		return 0;
++	else
++		return 8;
++}
++
++static inline unsigned pfi_output_select_mask(unsigned pfi_channel)
++{
++	return 0x3 << ioconfig_bitshift(pfi_channel);
++}
++
++static inline unsigned pfi_output_select_bits(unsigned pfi_channel,
++					      unsigned output_select)
++{
++	return (output_select & 0x3) << ioconfig_bitshift(pfi_channel);
++}
++
++static inline unsigned pfi_input_select_mask(unsigned pfi_channel)
++{
++	return 0x7 << (4 + ioconfig_bitshift(pfi_channel));
++}
++
++static inline unsigned pfi_input_select_bits(unsigned pfi_channel,
++					     unsigned input_select)
++{
++	return (input_select & 0x7) << (4 + ioconfig_bitshift(pfi_channel));
++}
++
++/* Dma configuration register bits */
++static inline unsigned dma_select_mask(unsigned dma_channel)
++{
++	BUG_ON(dma_channel >= MAX_DMA_CHANNEL);
++	return 0x1f << (8 * dma_channel);
++}
++
++enum dma_selection {
++	dma_selection_none = 0x1f,
++};
++
++static inline unsigned dma_selection_counter(unsigned counter_index)
++{
++	BUG_ON(counter_index >= counters_per_chip);
++	return counter_index;
++}
++
++static inline unsigned dma_select_bits(unsigned dma_channel, unsigned selection)
++{
++	BUG_ON(dma_channel >= MAX_DMA_CHANNEL);
++	return (selection << (8 * dma_channel)) & dma_select_mask(dma_channel);
++}
++
++static inline unsigned dma_reset_bit(unsigned dma_channel)
++{
++	BUG_ON(dma_channel >= MAX_DMA_CHANNEL);
++	return 0x80 << (8 * dma_channel);
++}
++
++enum global_interrupt_status_register_bits {
++	Counter_0_Int_Bit = 0x100,
++	Counter_1_Int_Bit = 0x200,
++	Counter_2_Int_Bit = 0x400,
++	Counter_3_Int_Bit = 0x800,
++	Cascade_Int_Bit = 0x20000000,
++	Global_Int_Bit = 0x80000000
++};
++
++enum global_interrupt_config_register_bits {
++	Cascade_Int_Enable_Bit = 0x20000000,
++	Global_Int_Polarity_Bit = 0x40000000,
++	Global_Int_Enable_Bit = 0x80000000
++};
++
++/* Offset of the GPCT chips from the base-adress of the card:
++   First chip is at base-address +0x00, etc. */
++static const unsigned GPCT_OFFSET[2] = { 0x0, 0x800 };
++
++/* Board description */
++struct ni_660x_board {
++	unsigned short dev_id;	/* `lspci` will show you this */
++	const char *name;
++	unsigned n_chips;	/* total number of TIO chips */
++};
++
++static const struct ni_660x_board ni_660x_boards[] = {
++	{
++	 .dev_id = 0x2c60,
++	 .name = "PCI-6601",
++	 .n_chips = 1,
++	 },
++	{
++	 .dev_id = 0x1310,
++	 .name = "PCI-6602",
++	 .n_chips = 2,
++	 },
++	{
++	 .dev_id = 0x1360,
++	 .name = "PXI-6602",
++	 .n_chips = 2,
++	 },
++	{
++	 .dev_id = 0x2cc0,
++	 .name = "PXI-6608",
++	 .n_chips = 2,
++	 },
++};
++
++#define NI_660X_MAX_NUM_CHIPS 2
++#define NI_660X_MAX_NUM_COUNTERS (NI_660X_MAX_NUM_CHIPS * counters_per_chip)
++
++static const struct pci_device_id ni_660x_pci_table[] = {
++	{
++	PCI_VENDOR_ID_NATINST, 0x2c60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {
++	PCI_VENDOR_ID_NATINST, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {
++	PCI_VENDOR_ID_NATINST, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {
++	PCI_VENDOR_ID_NATINST, 0x2cc0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {
++	0}
++};
++
++MODULE_DEVICE_TABLE(pci, ni_660x_pci_table);
++
++struct ni_660x_private {
++	struct mite_struct *mite;
++	struct ni_gpct_device *counter_dev;
++	uint64_t pfi_direction_bits;
++
++	struct mite_dma_descriptor_ring
++	  *mite_rings[NI_660X_MAX_NUM_CHIPS][counters_per_chip];
++
++	rtdm_lock_t mite_channel_lock;
++	/* Interrupt_lock prevents races between interrupt and
++	   comedi_poll */
++	rtdm_lock_t interrupt_lock;
++	unsigned int dma_configuration_soft_copies[NI_660X_MAX_NUM_CHIPS];
++	rtdm_lock_t soft_reg_copy_lock;
++	unsigned short pfi_output_selects[NUM_PFI_CHANNELS];
++
++	struct ni_660x_board *board_ptr;
++};
++
++#undef devpriv
++#define devpriv ((struct ni_660x_private *)dev->priv)
++
++static inline struct ni_660x_private *private(struct a4l_device *dev)
++{
++	return (struct ni_660x_private*) dev->priv;
++}
++
++/* Initialized in ni_660x_find_device() */
++static inline const struct ni_660x_board *board(struct a4l_device *dev)
++{
++	return ((struct ni_660x_private*)dev->priv)->board_ptr;
++}
++
++#define n_ni_660x_boards ARRAY_SIZE(ni_660x_boards)
++
++static int ni_660x_attach(struct a4l_device *dev,
++					 a4l_lnkdesc_t *arg);
++static int ni_660x_detach(struct a4l_device *dev);
++static void init_tio_chip(struct a4l_device *dev, int chipset);
++static void ni_660x_select_pfi_output(struct a4l_device *dev,
++				      unsigned pfi_channel,
++				      unsigned output_select);
++
++static struct a4l_driver ni_660x_drv = {
++	.board_name = "analogy_ni_660x",
++	.driver_name = "ni_660x",
++	.owner = THIS_MODULE,
++	.attach = ni_660x_attach,
++	.detach = ni_660x_detach,
++   .privdata_size = sizeof(struct ni_660x_private),
++};
++
++static int ni_660x_set_pfi_routing(struct a4l_device *dev, unsigned chan,
++				   unsigned source);
++
++/* Possible instructions for a GPCT */
++static int ni_660x_GPCT_rinsn(
++			      struct a4l_subdevice *s,
++			      struct a4l_kernel_instruction *insn);
++static int ni_660x_GPCT_insn_config(
++				    struct a4l_subdevice *s,
++				    struct a4l_kernel_instruction *insn);
++static int ni_660x_GPCT_winsn(
++			      struct a4l_subdevice *s,
++			      struct a4l_kernel_instruction *insn);
++
++/* Possible instructions for Digital IO */
++static int ni_660x_dio_insn_config(
++	       struct a4l_subdevice *s,
++	       struct a4l_kernel_instruction *insn);
++static int ni_660x_dio_insn_bits(
++	     struct a4l_subdevice *s,
++	     struct a4l_kernel_instruction *insn);
++
++static inline unsigned ni_660x_num_counters(struct a4l_device *dev)
++{
++	return board(dev)->n_chips * counters_per_chip;
++}
++
++static enum NI_660x_Register ni_gpct_to_660x_register(enum ni_gpct_register reg)
++{
++
++	enum NI_660x_Register ni_660x_register;
++	switch (reg) {
++	case NITIO_G0_Autoincrement_Reg:
++		ni_660x_register = G0AutoincrementRegister;
++		break;
++	case NITIO_G1_Autoincrement_Reg:
++		ni_660x_register = G1AutoincrementRegister;
++		break;
++	case NITIO_G2_Autoincrement_Reg:
++		ni_660x_register = G2AutoincrementRegister;
++		break;
++	case NITIO_G3_Autoincrement_Reg:
++		ni_660x_register = G3AutoincrementRegister;
++		break;
++	case NITIO_G0_Command_Reg:
++		ni_660x_register = G0CommandRegister;
++		break;
++	case NITIO_G1_Command_Reg:
++		ni_660x_register = G1CommandRegister;
++		break;
++	case NITIO_G2_Command_Reg:
++		ni_660x_register = G2CommandRegister;
++		break;
++	case NITIO_G3_Command_Reg:
++		ni_660x_register = G3CommandRegister;
++		break;
++	case NITIO_G0_HW_Save_Reg:
++		ni_660x_register = G0HWSaveRegister;
++		break;
++	case NITIO_G1_HW_Save_Reg:
++		ni_660x_register = G1HWSaveRegister;
++		break;
++	case NITIO_G2_HW_Save_Reg:
++		ni_660x_register = G2HWSaveRegister;
++		break;
++	case NITIO_G3_HW_Save_Reg:
++		ni_660x_register = G3HWSaveRegister;
++		break;
++	case NITIO_G0_SW_Save_Reg:
++		ni_660x_register = G0SWSaveRegister;
++		break;
++	case NITIO_G1_SW_Save_Reg:
++		ni_660x_register = G1SWSaveRegister;
++		break;
++	case NITIO_G2_SW_Save_Reg:
++		ni_660x_register = G2SWSaveRegister;
++		break;
++	case NITIO_G3_SW_Save_Reg:
++		ni_660x_register = G3SWSaveRegister;
++		break;
++	case NITIO_G0_Mode_Reg:
++		ni_660x_register = G0ModeRegister;
++		break;
++	case NITIO_G1_Mode_Reg:
++		ni_660x_register = G1ModeRegister;
++		break;
++	case NITIO_G2_Mode_Reg:
++		ni_660x_register = G2ModeRegister;
++		break;
++	case NITIO_G3_Mode_Reg:
++		ni_660x_register = G3ModeRegister;
++		break;
++	case NITIO_G0_LoadA_Reg:
++		ni_660x_register = G0LoadARegister;
++		break;
++	case NITIO_G1_LoadA_Reg:
++		ni_660x_register = G1LoadARegister;
++		break;
++	case NITIO_G2_LoadA_Reg:
++		ni_660x_register = G2LoadARegister;
++		break;
++	case NITIO_G3_LoadA_Reg:
++		ni_660x_register = G3LoadARegister;
++		break;
++	case NITIO_G0_LoadB_Reg:
++		ni_660x_register = G0LoadBRegister;
++		break;
++	case NITIO_G1_LoadB_Reg:
++		ni_660x_register = G1LoadBRegister;
++		break;
++	case NITIO_G2_LoadB_Reg:
++		ni_660x_register = G2LoadBRegister;
++		break;
++	case NITIO_G3_LoadB_Reg:
++		ni_660x_register = G3LoadBRegister;
++		break;
++	case NITIO_G0_Input_Select_Reg:
++		ni_660x_register = G0InputSelectRegister;
++		break;
++	case NITIO_G1_Input_Select_Reg:
++		ni_660x_register = G1InputSelectRegister;
++		break;
++	case NITIO_G2_Input_Select_Reg:
++		ni_660x_register = G2InputSelectRegister;
++		break;
++	case NITIO_G3_Input_Select_Reg:
++		ni_660x_register = G3InputSelectRegister;
++		break;
++	case NITIO_G01_Status_Reg:
++		ni_660x_register = G01StatusRegister;
++		break;
++	case NITIO_G23_Status_Reg:
++		ni_660x_register = G23StatusRegister;
++		break;
++	case NITIO_G01_Joint_Reset_Reg:
++		ni_660x_register = G01JointResetRegister;
++		break;
++	case NITIO_G23_Joint_Reset_Reg:
++		ni_660x_register = G23JointResetRegister;
++		break;
++	case NITIO_G01_Joint_Status1_Reg:
++		ni_660x_register = G01JointStatus1Register;
++		break;
++	case NITIO_G23_Joint_Status1_Reg:
++		ni_660x_register = G23JointStatus1Register;
++		break;
++	case NITIO_G01_Joint_Status2_Reg:
++		ni_660x_register = G01JointStatus2Register;
++		break;
++	case NITIO_G23_Joint_Status2_Reg:
++		ni_660x_register = G23JointStatus2Register;
++		break;
++	case NITIO_G0_Counting_Mode_Reg:
++		ni_660x_register = G0CountingModeRegister;
++		break;
++	case NITIO_G1_Counting_Mode_Reg:
++		ni_660x_register = G1CountingModeRegister;
++		break;
++	case NITIO_G2_Counting_Mode_Reg:
++		ni_660x_register = G2CountingModeRegister;
++		break;
++	case NITIO_G3_Counting_Mode_Reg:
++		ni_660x_register = G3CountingModeRegister;
++		break;
++	case NITIO_G0_Second_Gate_Reg:
++		ni_660x_register = G0SecondGateRegister;
++		break;
++	case NITIO_G1_Second_Gate_Reg:
++		ni_660x_register = G1SecondGateRegister;
++		break;
++	case NITIO_G2_Second_Gate_Reg:
++		ni_660x_register = G2SecondGateRegister;
++		break;
++	case NITIO_G3_Second_Gate_Reg:
++		ni_660x_register = G3SecondGateRegister;
++		break;
++	case NITIO_G0_DMA_Config_Reg:
++		ni_660x_register = G0DMAConfigRegister;
++		break;
++	case NITIO_G0_DMA_Status_Reg:
++		ni_660x_register = G0DMAStatusRegister;
++		break;
++	case NITIO_G1_DMA_Config_Reg:
++		ni_660x_register = G1DMAConfigRegister;
++		break;
++	case NITIO_G1_DMA_Status_Reg:
++		ni_660x_register = G1DMAStatusRegister;
++		break;
++	case NITIO_G2_DMA_Config_Reg:
++		ni_660x_register = G2DMAConfigRegister;
++		break;
++	case NITIO_G2_DMA_Status_Reg:
++		ni_660x_register = G2DMAStatusRegister;
++		break;
++	case NITIO_G3_DMA_Config_Reg:
++		ni_660x_register = G3DMAConfigRegister;
++		break;
++	case NITIO_G3_DMA_Status_Reg:
++		ni_660x_register = G3DMAStatusRegister;
++		break;
++	case NITIO_G0_Interrupt_Acknowledge_Reg:
++		ni_660x_register = G0InterruptAcknowledge;
++		break;
++	case NITIO_G1_Interrupt_Acknowledge_Reg:
++		ni_660x_register = G1InterruptAcknowledge;
++		break;
++	case NITIO_G2_Interrupt_Acknowledge_Reg:
++		ni_660x_register = G2InterruptAcknowledge;
++		break;
++	case NITIO_G3_Interrupt_Acknowledge_Reg:
++		ni_660x_register = G3InterruptAcknowledge;
++		break;
++	case NITIO_G0_Status_Reg:
++		ni_660x_register = G0StatusRegister;
++		break;
++	case NITIO_G1_Status_Reg:
++		ni_660x_register = G0StatusRegister;
++		break;
++	case NITIO_G2_Status_Reg:
++		ni_660x_register = G0StatusRegister;
++		break;
++	case NITIO_G3_Status_Reg:
++		ni_660x_register = G0StatusRegister;
++		break;
++	case NITIO_G0_Interrupt_Enable_Reg:
++		ni_660x_register = G0InterruptEnable;
++		break;
++	case NITIO_G1_Interrupt_Enable_Reg:
++		ni_660x_register = G1InterruptEnable;
++		break;
++	case NITIO_G2_Interrupt_Enable_Reg:
++		ni_660x_register = G2InterruptEnable;
++		break;
++	case NITIO_G3_Interrupt_Enable_Reg:
++		ni_660x_register = G3InterruptEnable;
++		break;
++	default:
++		__a4l_err("%s: unhandled register 0x%x in switch.\n",
++			  __FUNCTION__, reg);
++		BUG();
++		return 0;
++		break;
++	}
++	return ni_660x_register;
++}
++
++static inline void ni_660x_write_register(struct a4l_device *dev,
++					  unsigned chip_index, unsigned bits,
++					  enum NI_660x_Register reg)
++{
++	void *const write_address =
++	    private(dev)->mite->daq_io_addr + GPCT_OFFSET[chip_index] +
++	    registerData[reg].offset;
++
++	switch (registerData[reg].size) {
++	case DATA_2B:
++		writew(bits, write_address);
++		break;
++	case DATA_4B:
++		writel(bits, write_address);
++		break;
++	default:
++		__a4l_err("%s: %s: bug! unhandled case (reg=0x%x) in switch.\n",
++			  __FILE__, __FUNCTION__, reg);
++		BUG();
++		break;
++	}
++}
++
++static inline unsigned ni_660x_read_register(struct a4l_device *dev,
++					     unsigned chip_index,
++					     enum NI_660x_Register reg)
++{
++	void *const read_address =
++	    private(dev)->mite->daq_io_addr + GPCT_OFFSET[chip_index] +
++	    registerData[reg].offset;
++
++	switch (registerData[reg].size) {
++	case DATA_2B:
++		return readw(read_address);
++		break;
++	case DATA_4B:
++		return readl(read_address);
++		break;
++	default:
++		__a4l_err("%s: %s: bug! unhandled case (reg=0x%x) in switch.\n",
++			  __FILE__, __FUNCTION__, reg);
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static void ni_gpct_write_register(struct ni_gpct *counter,
++				   unsigned int bits, enum ni_gpct_register reg)
++{
++	struct a4l_device *dev = counter->counter_dev->dev;
++	enum NI_660x_Register ni_660x_register = ni_gpct_to_660x_register(reg);
++
++	ni_660x_write_register(dev, counter->chip_index, bits,
++			       ni_660x_register);
++}
++
++static unsigned ni_gpct_read_register(struct ni_gpct *counter,
++				      enum ni_gpct_register reg)
++{
++	struct a4l_device *dev = counter->counter_dev->dev;
++	enum NI_660x_Register ni_660x_register = ni_gpct_to_660x_register(reg);
++
++	return ni_660x_read_register(dev, counter->chip_index,
++				     ni_660x_register);
++}
++
++static inline
++struct mite_dma_descriptor_ring *mite_ring(struct ni_660x_private *priv,
++					   struct ni_gpct *counter)
++{
++
++	return priv->mite_rings[counter->chip_index][counter->counter_index];
++}
++
++static inline
++void ni_660x_set_dma_channel(struct a4l_device *dev,
++			     unsigned int mite_channel, struct ni_gpct *counter)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&private(dev)->soft_reg_copy_lock, flags);
++	private(dev)->dma_configuration_soft_copies[counter->chip_index] &=
++	    ~dma_select_mask(mite_channel);
++	private(dev)->dma_configuration_soft_copies[counter->chip_index] |=
++	    dma_select_bits(mite_channel,
++			    dma_selection_counter(counter->counter_index));
++	ni_660x_write_register(dev, counter->chip_index,
++			       private(dev)->
++			       dma_configuration_soft_copies
++			       [counter->chip_index] |
++			       dma_reset_bit(mite_channel), DMAConfigRegister);
++	mmiowb();
++	rtdm_lock_put_irqrestore(&private(dev)->soft_reg_copy_lock, flags);
++}
++
++static inline
++void ni_660x_unset_dma_channel(struct a4l_device *dev,
++			       unsigned int mite_channel,
++			       struct ni_gpct *counter)
++{
++	unsigned long flags;
++	rtdm_lock_get_irqsave(&private(dev)->soft_reg_copy_lock, flags);
++	private(dev)->dma_configuration_soft_copies[counter->chip_index] &=
++	    ~dma_select_mask(mite_channel);
++	private(dev)->dma_configuration_soft_copies[counter->chip_index] |=
++	    dma_select_bits(mite_channel, dma_selection_none);
++	ni_660x_write_register(dev, counter->chip_index,
++			       private(dev)->
++			       dma_configuration_soft_copies
++			       [counter->chip_index], DMAConfigRegister);
++	mmiowb();
++	rtdm_lock_put_irqrestore(&private(dev)->soft_reg_copy_lock, flags);
++}
++
++static int ni_660x_request_mite_channel(struct a4l_device *dev,
++					struct ni_gpct *counter,
++					enum io_direction direction)
++{
++	unsigned long flags;
++	struct mite_channel *mite_chan;
++
++	rtdm_lock_get_irqsave(&private(dev)->mite_channel_lock, flags);
++	BUG_ON(counter->mite_chan);
++	mite_chan = mite_request_channel(private(dev)->mite,
++					 mite_ring(private(dev), counter));
++	if (mite_chan == NULL) {
++		rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags);
++		a4l_err(dev,
++			"%s: failed to reserve mite dma channel for counter.\n",
++			__FUNCTION__);
++		return -EBUSY;
++	}
++	mite_chan->dir = direction;
++	a4l_ni_tio_set_mite_channel(counter, mite_chan);
++	ni_660x_set_dma_channel(dev, mite_chan->channel, counter);
++	rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags);
++	return 0;
++}
++
++void ni_660x_release_mite_channel(struct a4l_device *dev,
++				  struct ni_gpct *counter)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&private(dev)->mite_channel_lock, flags);
++	if (counter->mite_chan) {
++		struct mite_channel *mite_chan = counter->mite_chan;
++
++		ni_660x_unset_dma_channel(dev, mite_chan->channel, counter);
++		a4l_ni_tio_set_mite_channel(counter, NULL);
++		a4l_mite_release_channel(mite_chan);
++	}
++	rtdm_lock_put_irqrestore(&private(dev)->mite_channel_lock, flags);
++}
++
++static int ni_660x_cmd(struct a4l_subdevice *s, struct a4l_cmd_desc* cmd)
++{
++	int retval;
++
++	struct ni_gpct *counter = subdev_priv->counter;
++
++	retval = ni_660x_request_mite_channel(s->dev, counter, A4L_INPUT);
++	if (retval) {
++		a4l_err(s->dev,
++			"%s: no dma channel available for use by counter",
++			__FUNCTION__);
++		return retval;
++	}
++
++	a4l_ni_tio_acknowledge_and_confirm (counter, NULL, NULL, NULL, NULL);
++	retval = a4l_ni_tio_cmd(counter, cmd);
++
++	return retval;
++}
++
++static int ni_660x_cmdtest(struct a4l_subdevice *s, struct a4l_cmd_desc *cmd)
++{
++	struct ni_gpct *counter = subdev_priv->counter;
++	return a4l_ni_tio_cmdtest(counter, cmd);
++}
++
++static int ni_660x_cancel(struct a4l_subdevice *s)
++{
++	struct ni_gpct *counter = subdev_priv->counter;
++	int retval;
++
++	retval = a4l_ni_tio_cancel(counter);
++	ni_660x_release_mite_channel(s->dev, counter);
++	return retval;
++}
++
++static void set_tio_counterswap(struct a4l_device *dev, int chipset)
++{
++	/* See P. 3.5 of the Register-Level Programming manual.  The
++	   CounterSwap bit has to be set on the second chip, otherwise
++	   it will try to use the same pins as the first chip.
++	 */
++
++	if (chipset)
++		ni_660x_write_register(dev,
++				       chipset,
++				       CounterSwap, ClockConfigRegister);
++	else
++		ni_660x_write_register(dev,
++				       chipset, 0, ClockConfigRegister);
++}
++
++static void ni_660x_handle_gpct_interrupt(struct a4l_device *dev,
++					  struct a4l_subdevice *s)
++{
++   struct a4l_buffer *buf = s->buf;
++
++   a4l_ni_tio_handle_interrupt(subdev_priv->counter, dev);
++   if ( test_bit(A4L_BUF_EOA_NR, &buf->flags) &&
++	test_bit(A4L_BUF_ERROR_NR, &buf->flags) &&
++	test_bit(A4L_BUF_EOA_NR, &buf->flags))
++	   ni_660x_cancel(s);
++   else
++	   a4l_buf_evt(s, 0);
++}
++
++static int ni_660x_interrupt(unsigned int irq, void *d)
++{
++	struct a4l_device *dev = d;
++	unsigned long flags;
++
++	if (test_bit(A4L_DEV_ATTACHED_NR, &dev->flags))
++		return -ENOENT;
++
++	/* Lock to avoid race with comedi_poll */
++	rtdm_lock_get_irqsave(&private(dev)->interrupt_lock, flags);
++	smp_mb();
++
++	while (&dev->subdvsq != dev->subdvsq.next) {
++		struct list_head *this = dev->subdvsq.next;
++		struct a4l_subdevice *tmp = list_entry(this, struct a4l_subdevice, list);
++		ni_660x_handle_gpct_interrupt(dev, tmp);
++	}
++
++	rtdm_lock_put_irqrestore(&private(dev)->interrupt_lock, flags);
++	return 0;
++}
++
++static int ni_660x_alloc_mite_rings(struct a4l_device *dev)
++{
++	unsigned int i;
++	unsigned int j;
++
++	for (i = 0; i < board(dev)->n_chips; ++i) {
++		for (j = 0; j < counters_per_chip; ++j) {
++			private(dev)->mite_rings[i][j] =
++				mite_alloc_ring(private(dev)->mite);
++			if (private(dev)->mite_rings[i][j] == NULL)
++				return -ENOMEM;
++		}
++	}
++
++	return 0;
++}
++
++static void ni_660x_free_mite_rings(struct a4l_device *dev)
++{
++	unsigned int i;
++	unsigned int j;
++
++	for (i = 0; i < board(dev)->n_chips; ++i)
++		for (j = 0; j < counters_per_chip; ++j)
++			mite_free_ring(private(dev)->mite_rings[i][j]);
++}
++
++
++static int __init driver_ni_660x_init_module(void)
++{
++	return a4l_register_drv (&ni_660x_drv);
++}
++
++static void __exit driver_ni_660x_cleanup_module(void)
++{
++	a4l_unregister_drv (&ni_660x_drv);
++}
++
++module_init(driver_ni_660x_init_module);
++module_exit(driver_ni_660x_cleanup_module);
++
++static int ni_660x_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	struct a4l_subdevice *s;
++	int ret;
++	int err;
++	int bus, slot;
++	unsigned i;
++	int nsubdev = 0;
++	unsigned global_interrupt_config_bits;
++	struct mite_struct *mitedev;
++	struct ni_660x_board* boardptr = NULL;
++
++	ret = 0;
++	bus = slot = 0;
++	mitedev = NULL;
++	nsubdev = 0;
++
++	if(arg->opts == NULL || arg->opts_size == 0)
++		bus = slot = 0;
++	else {
++		bus = arg->opts_size >= sizeof(unsigned long) ?
++			((unsigned long *)arg->opts)[0] : 0;
++		slot = arg->opts_size >= sizeof(unsigned long) * 2 ?
++			((unsigned long *)arg->opts)[1] : 0;
++	}
++
++	for (i = 0; ( i < n_ni_660x_boards ) && ( mitedev == NULL ); i++) {
++		mitedev  = a4l_mite_find_device(bus, slot,
++						ni_660x_boards[i].dev_id);
++		boardptr = (struct ni_660x_board*) &ni_660x_boards[i];
++	}
++
++
++	if(mitedev == NULL) {
++		a4l_info(dev, "mite device not found\n");
++		return -ENOENT;
++	}
++
++	a4l_info(dev, "Board found (name=%s), continue initialization ...",
++		 boardptr->name);
++
++	private(dev)->mite      = mitedev;
++	private(dev)->board_ptr = boardptr;
++
++	rtdm_lock_init(&private(dev)->mite_channel_lock);
++	rtdm_lock_init(&private(dev)->interrupt_lock);
++	rtdm_lock_init(&private(dev)->soft_reg_copy_lock);
++	for (i = 0; i < NUM_PFI_CHANNELS; ++i) {
++		private(dev)->pfi_output_selects[i] = pfi_output_select_counter;
++	}
++
++	ret = a4l_mite_setup(private(dev)->mite, 1);
++	if (ret < 0) {
++		a4l_err(dev, "%s: error setting up mite\n", __FUNCTION__);
++		return ret;
++	}
++
++	ret = ni_660x_alloc_mite_rings(dev);
++	if (ret < 0) {
++		a4l_err(dev, "%s: error setting up mite rings\n", __FUNCTION__);
++		return ret;
++	}
++
++	/* Setup first subdevice */
++	s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL);
++	if (s == NULL)
++		return -ENOMEM;
++
++	s->flags = A4L_SUBD_UNUSED;
++
++	err = a4l_add_subd(dev, s);
++	if (err != nsubdev) {
++		a4l_info(dev, "cannot add first subdevice, returns %d, expect %d\n", err, i);
++		return err;
++	}
++
++	nsubdev++;
++
++	/* Setup second subdevice */
++	s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL);
++	if (s == NULL) {
++		a4l_info(dev, "cannot allocate second subdevice\n");
++		return -ENOMEM;
++	}
++
++	s->flags          = A4L_SUBD_DIO;
++	s->flags         |= A4L_SUBD_CMD;
++	s->chan_desc      = &chandesc_ni660x;
++	s->rng_desc       = &range_digital;
++	s->insn_bits      = ni_660x_dio_insn_bits;
++	s->insn_config    = ni_660x_dio_insn_config;
++	s->dev            = dev;
++	subdev_priv->io_bits = 0;
++	ni_660x_write_register(dev, 0, 0, STCDIOControl);
++
++	err = a4l_add_subd(dev, s);
++	if (err != nsubdev)
++		return err;
++
++	nsubdev++;
++
++	private(dev)->counter_dev =
++		a4l_ni_gpct_device_construct(dev,
++					     &ni_gpct_write_register,
++					     &ni_gpct_read_register,
++					     ni_gpct_variant_660x,
++					     ni_660x_num_counters (dev));
++	if (private(dev)->counter_dev == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < ni_660x_num_counters(dev); ++i) {
++		/* TODO: check why there are kmalloc here... and in pcimio */
++		private(dev)->counter_dev->counters[i] =
++			kmalloc(sizeof(struct ni_gpct), GFP_KERNEL);
++		private(dev)->counter_dev->counters[i]->counter_dev =
++			private(dev)->counter_dev;
++		rtdm_lock_init(&(private(dev)->counter_dev->counters[i]->lock));
++	}
++
++	for (i = 0; i < NI_660X_MAX_NUM_COUNTERS; ++i) {
++		if (i < ni_660x_num_counters(dev)) {
++			/* Setup other subdevice */
++			s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL);
++
++			if (s == NULL)
++				return -ENOMEM;
++
++			s->flags             = A4L_SUBD_COUNTER;
++			s->chan_desc         = rtdm_malloc (sizeof (struct a4l_channels_desc));
++			s->chan_desc->length = 3;
++			s->insn_read         = ni_660x_GPCT_rinsn;
++			s->insn_write        = ni_660x_GPCT_winsn;
++			s->insn_config       = ni_660x_GPCT_insn_config;
++			s->do_cmd            = &ni_660x_cmd;
++			s->do_cmdtest        = &ni_660x_cmdtest;
++			s->cancel            = &ni_660x_cancel;
++
++			subdev_priv->counter = private(dev)->counter_dev->counters[i];
++
++			private(dev)->counter_dev->counters[i]->chip_index =
++				i / counters_per_chip;
++			private(dev)->counter_dev->counters[i]->counter_index =
++				i % counters_per_chip;
++		} else {
++			s = a4l_alloc_subd(sizeof(struct ni_660x_subd_priv), NULL);
++			if (s == NULL)
++				return -ENOMEM;
++			s->flags = A4L_SUBD_UNUSED;
++		}
++
++		err = a4l_add_subd(dev, s);
++
++		if (err != nsubdev)
++			return err;
++
++		nsubdev++;
++	}
++
++	for (i = 0; i < board(dev)->n_chips; ++i)
++		init_tio_chip(dev, i);
++
++	for (i = 0; i < ni_660x_num_counters(dev); ++i)
++		a4l_ni_tio_init_counter(private(dev)->counter_dev->counters[i]);
++
++	for (i = 0; i < NUM_PFI_CHANNELS; ++i) {
++		if (i < min_counter_pfi_chan)
++			ni_660x_set_pfi_routing(dev, i, pfi_output_select_do);
++		else
++			ni_660x_set_pfi_routing(dev, i,
++						pfi_output_select_counter);
++		ni_660x_select_pfi_output(dev, i, pfi_output_select_high_Z);
++	}
++
++
++	/* To be safe, set counterswap bits on tio chips after all the
++	   counter outputs have been set to high impedance mode */
++
++	for (i = 0; i < board(dev)->n_chips; ++i)
++		set_tio_counterswap(dev, i);
++
++	ret = a4l_request_irq(dev,
++			      mite_irq(private(dev)->mite),
++			      ni_660x_interrupt, RTDM_IRQTYPE_SHARED, dev);
++
++	if (ret < 0) {
++		a4l_err(dev, "%s: IRQ not available\n", __FUNCTION__);
++		return ret;
++	}
++
++	global_interrupt_config_bits = Global_Int_Enable_Bit;
++	if (board(dev)->n_chips > 1)
++		global_interrupt_config_bits |= Cascade_Int_Enable_Bit;
++
++	ni_660x_write_register(dev, 0, global_interrupt_config_bits,
++			       GlobalInterruptConfigRegister);
++
++	a4l_info(dev, "attach succeed, ready to be used\n");
++
++	return 0;
++}
++
++static int ni_660x_detach(struct a4l_device *dev)
++{
++	int i;
++
++	a4l_info(dev, "begin to detach the driver ...");
++
++	/* Free irq */
++	if(a4l_get_irq(dev)!=A4L_IRQ_UNUSED)
++		a4l_free_irq(dev,a4l_get_irq(dev));
++
++	if (dev->priv) {
++
++		if (private(dev)->counter_dev) {
++
++			for (i = 0; i < ni_660x_num_counters(dev); ++i)
++				if ((private(dev)->counter_dev->counters[i]) != NULL)
++					kfree (private(dev)->counter_dev->counters[i]);
++
++			a4l_ni_gpct_device_destroy(private(dev)->counter_dev);
++		}
++
++		if (private(dev)->mite) {
++			ni_660x_free_mite_rings(dev);
++			a4l_mite_unsetup(private(dev)->mite);
++		}
++	}
++
++	a4l_info(dev, "driver detached !\n");
++
++	return 0;
++}
++
++static int ni_660x_GPCT_rinsn(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn)
++{
++	return a4l_ni_tio_rinsn(subdev_priv->counter, insn);
++}
++
++static void init_tio_chip(struct a4l_device *dev, int chipset)
++{
++	unsigned int i;
++
++	/*  Init dma configuration register */
++	private(dev)->dma_configuration_soft_copies[chipset] = 0;
++	for (i = 0; i < MAX_DMA_CHANNEL; ++i) {
++		private(dev)->dma_configuration_soft_copies[chipset] |=
++		    dma_select_bits(i, dma_selection_none) & dma_select_mask(i);
++	}
++
++	ni_660x_write_register(dev, chipset,
++			       private(dev)->
++			       dma_configuration_soft_copies[chipset],
++			       DMAConfigRegister);
++
++	for (i = 0; i < NUM_PFI_CHANNELS; ++i)
++		ni_660x_write_register(dev, chipset, 0, IOConfigReg(i));
++}
++
++static int ni_660x_GPCT_insn_config(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn)
++{
++	return a4l_ni_tio_insn_config (subdev_priv->counter, insn);
++}
++
++static int ni_660x_GPCT_winsn(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn)
++{
++	return a4l_ni_tio_winsn(subdev_priv->counter, insn);
++}
++
++static int ni_660x_dio_insn_bits(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn)
++{
++	unsigned int* data = (unsigned int*) insn->data;
++	unsigned int base_bitfield_channel = CR_CHAN(insn->chan_desc);
++
++	/*  Check if we have to write some bits */
++	if (data[0]) {
++		subdev_priv->state &= ~(data[0] << base_bitfield_channel);
++		subdev_priv->state |= (data[0] & data[1]) << base_bitfield_channel;
++		/* Write out the new digital output lines */
++		ni_660x_write_register(s->dev, 0, subdev_priv->state, DIO32Output);
++	}
++
++	/* On return, data[1] contains the value of the digital input
++	   and output lines. */
++	data[1] = ni_660x_read_register(s->dev, 0,DIO32Input) >>
++		base_bitfield_channel;
++
++	return 0;
++}
++
++static void ni_660x_select_pfi_output(struct a4l_device *dev,
++				      unsigned pfi_channel,
++				      unsigned output_select)
++{
++	static const unsigned counter_4_7_first_pfi = 8;
++	static const unsigned counter_4_7_last_pfi = 23;
++	unsigned active_chipset = 0;
++	unsigned idle_chipset = 0;
++	unsigned active_bits;
++	unsigned idle_bits;
++
++	if (board(dev)->n_chips > 1) {
++		if (output_select == pfi_output_select_counter &&
++		    pfi_channel >= counter_4_7_first_pfi &&
++		    pfi_channel <= counter_4_7_last_pfi) {
++			active_chipset = 1;
++			idle_chipset = 0;
++		} else {
++			active_chipset = 0;
++			idle_chipset = 1;
++		}
++	}
++
++	if (idle_chipset != active_chipset) {
++
++		idle_bits =ni_660x_read_register(dev, idle_chipset,
++						 IOConfigReg(pfi_channel));
++		idle_bits &= ~pfi_output_select_mask(pfi_channel);
++		idle_bits |=
++		    pfi_output_select_bits(pfi_channel,
++					   pfi_output_select_high_Z);
++		ni_660x_write_register(dev, idle_chipset, idle_bits,
++				       IOConfigReg(pfi_channel));
++	}
++
++	active_bits =
++	    ni_660x_read_register(dev, active_chipset,
++				  IOConfigReg(pfi_channel));
++	active_bits &= ~pfi_output_select_mask(pfi_channel);
++	active_bits |= pfi_output_select_bits(pfi_channel, output_select);
++	ni_660x_write_register(dev, active_chipset, active_bits,
++			       IOConfigReg(pfi_channel));
++}
++
++static int ni_660x_set_pfi_routing(struct a4l_device *dev, unsigned chan,
++				   unsigned source)
++{
++	BUG_ON(chan >= NUM_PFI_CHANNELS);
++
++	if (source > num_pfi_output_selects)
++		return -EINVAL;
++	if (source == pfi_output_select_high_Z)
++		return -EINVAL;
++	if (chan < min_counter_pfi_chan) {
++		if (source == pfi_output_select_counter)
++			return -EINVAL;
++	} else if (chan > max_dio_pfi_chan) {
++		if (source == pfi_output_select_do)
++			return -EINVAL;
++	}
++	BUG_ON(chan >= NUM_PFI_CHANNELS);
++
++	private(dev)->pfi_output_selects[chan] = source;
++	if (private(dev)->pfi_direction_bits & (((uint64_t) 1) << chan))
++		ni_660x_select_pfi_output(dev, chan,
++					  private(dev)->
++					  pfi_output_selects[chan]);
++	return 0;
++}
++
++static unsigned ni_660x_get_pfi_routing(struct a4l_device *dev,
++					unsigned chan)
++{
++	BUG_ON(chan >= NUM_PFI_CHANNELS);
++	return private(dev)->pfi_output_selects[chan];
++}
++
++static void ni660x_config_filter(struct a4l_device *dev,
++				 unsigned pfi_channel,
++				 int filter)
++{
++	unsigned int bits;
++
++	bits = ni_660x_read_register(dev, 0, IOConfigReg(pfi_channel));
++	bits &= ~pfi_input_select_mask(pfi_channel);
++	bits |= pfi_input_select_bits(pfi_channel, filter);
++	ni_660x_write_register(dev, 0, bits, IOConfigReg(pfi_channel));
++}
++
++static int ni_660x_dio_insn_config(struct a4l_subdevice *s, struct a4l_kernel_instruction *insn)
++{
++	unsigned int* data = insn->data;
++	int chan = CR_CHAN(insn->chan_desc);
++	struct a4l_device* dev = s->dev;
++
++	if (data == NULL)
++		return -EINVAL;
++
++	/* The input or output configuration of each digital line is
++	 * configured by a special insn_config instruction.  chanspec
++	 * contains the channel to be changed, and data[0] contains the
++	 * value COMEDI_INPUT or COMEDI_OUTPUT. */
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		private(dev)->pfi_direction_bits |= ((uint64_t) 1) << chan;
++		ni_660x_select_pfi_output(dev, chan,
++					  private(dev)->
++					  pfi_output_selects[chan]);
++		break;
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		private(dev)->pfi_direction_bits &= ~(((uint64_t) 1) << chan);
++		ni_660x_select_pfi_output(dev, chan, pfi_output_select_high_Z);
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] =
++		    (private(dev)->pfi_direction_bits &
++		     (((uint64_t) 1) << chan)) ? A4L_OUTPUT : A4L_INPUT;
++		return 0;
++	case A4L_INSN_CONFIG_SET_ROUTING:
++		return ni_660x_set_pfi_routing(dev, chan, data[1]);
++		break;
++	case A4L_INSN_CONFIG_GET_ROUTING:
++		data[1] = ni_660x_get_pfi_routing(dev, chan);
++		break;
++	case A4L_INSN_CONFIG_FILTER:
++		ni660x_config_filter(dev, chan, data[1]);
++		break;
++	default:
++		return -EINVAL;
++		break;
++	};
++
++	return 0;
++}
++
++
++MODULE_DESCRIPTION("Analogy driver for NI660x series cards");
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/analogy/national_instruments/tio_common.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/tio_common.c	2021-04-07 16:01:27.825633312 +0800
+@@ -0,0 +1,1998 @@
++/*
++ * Hardware driver for NI general purpose counter
++ * Copyright (C) 2006 Frank Mori Hess <fmhess@users.sourceforge.net>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Description: National Instruments general purpose counters
++ * This module is not used directly by end-users.  Rather, it is used
++ * by other drivers (for example ni_660x and ni_pcimio) to provide
++ * support for NI's general purpose counters.  It was originally based
++ * on the counter code from ni_660x.c and ni_mio_common.c.
++ *
++ * Author:
++ * J.P. Mellor <jpmellor@rose-hulman.edu>
++ * Herman.Bruyninckx@mech.kuleuven.ac.be
++ * Wim.Meeussen@mech.kuleuven.ac.be,
++ * Klaas.Gadeyne@mech.kuleuven.ac.be,
++ * Frank Mori Hess <fmhess@users.sourceforge.net>
++ *
++ * References:
++ * DAQ 660x Register-Level Programmer Manual  (NI 370505A-01)
++ * DAQ 6601/6602 User Manual (NI 322137B-01)
++ * 340934b.pdf  DAQ-STC reference manual
++ *
++ * TODO:
++ * - Support use of both banks X and Y
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/io.h>
++#include <rtdm/analogy/device.h>
++
++#include "ni_tio.h"
++#include "ni_mio.h"
++
++static inline void write_register(struct ni_gpct *counter,
++				  unsigned int bits, enum ni_gpct_register reg)
++{
++	BUG_ON(reg >= NITIO_Num_Registers);
++	counter->counter_dev->write_register(counter, bits, reg);
++}
++
++static inline unsigned int read_register(struct ni_gpct *counter,
++				     enum ni_gpct_register reg)
++{
++	BUG_ON(reg >= NITIO_Num_Registers);
++	return counter->counter_dev->read_register(counter, reg);
++}
++
++struct ni_gpct_device *a4l_ni_gpct_device_construct(struct a4l_device * dev,
++	void (*write_register) (struct ni_gpct * counter, unsigned int bits,
++		enum ni_gpct_register reg),
++	unsigned int (*read_register) (struct ni_gpct * counter,
++		enum ni_gpct_register reg), enum ni_gpct_variant variant,
++	unsigned int num_counters)
++{
++	struct ni_gpct_device *counter_dev =
++		kmalloc(sizeof(struct ni_gpct_device), GFP_KERNEL);
++	if (counter_dev == NULL)
++		return NULL;
++
++	memset(counter_dev, 0, sizeof(struct ni_gpct_device));
++
++	counter_dev->dev = dev;
++	counter_dev->write_register = write_register;
++	counter_dev->read_register = read_register;
++	counter_dev->variant = variant;
++	rtdm_lock_init(&counter_dev->regs_lock);
++	BUG_ON(num_counters == 0);
++
++	counter_dev->counters =
++		kmalloc(sizeof(struct ni_gpct *) * num_counters, GFP_KERNEL);
++
++	if (counter_dev->counters == NULL) {
++		 kfree(counter_dev);
++		return NULL;
++	}
++
++	memset(counter_dev->counters, 0, sizeof(struct ni_gpct *) * num_counters);
++
++	counter_dev->num_counters = num_counters;
++	return counter_dev;
++}
++
++void a4l_ni_gpct_device_destroy(struct ni_gpct_device *counter_dev)
++{
++	if (counter_dev->counters == NULL)
++		return;
++	kfree(counter_dev->counters);
++	kfree(counter_dev);
++}
++
++static
++int ni_tio_counting_mode_registers_present(const struct ni_gpct_device *counter_dev)
++{
++	switch (counter_dev->variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		return 1;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static
++int ni_tio_second_gate_registers_present(const struct ni_gpct_device *counter_dev)
++{
++	switch (counter_dev->variant) {
++	case ni_gpct_variant_e_series:
++		return 0;
++		break;
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		return 1;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static inline
++void ni_tio_set_bits_transient(struct ni_gpct *counter,
++			       enum ni_gpct_register register_index,
++			       unsigned int bit_mask,
++			       unsigned int bit_values,
++			       unsigned transient_bit_values)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	unsigned long flags;
++
++	BUG_ON(register_index >= NITIO_Num_Registers);
++	rtdm_lock_get_irqsave(&counter_dev->regs_lock, flags);
++	counter_dev->regs[register_index] &= ~bit_mask;
++	counter_dev->regs[register_index] |= (bit_values & bit_mask);
++	write_register(counter,
++		       counter_dev->regs[register_index] | transient_bit_values,
++		       register_index);
++	mmiowb();
++	rtdm_lock_put_irqrestore(&counter_dev->regs_lock, flags);
++}
++
++/* ni_tio_set_bits( ) is for safely writing to registers whose bits
++   may be twiddled in interrupt context, or whose software copy may be
++   read in interrupt context. */
++static inline void ni_tio_set_bits(struct ni_gpct *counter,
++				   enum ni_gpct_register register_index,
++				   unsigned int bit_mask,
++				   unsigned int bit_values)
++{
++	ni_tio_set_bits_transient(counter,
++				  register_index,
++				  bit_mask, bit_values, 0x0);
++}
++
++/* ni_tio_get_soft_copy( ) is for safely reading the software copy of
++   a register whose bits might be modified in interrupt context, or whose
++   software copy might need to be read in interrupt context. */
++static inline
++unsigned int ni_tio_get_soft_copy(const struct ni_gpct *counter,
++				  enum ni_gpct_register register_index)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	unsigned long flags;
++	unsigned value;
++
++	BUG_ON(register_index >= NITIO_Num_Registers);
++	rtdm_lock_get_irqsave(&counter_dev->regs_lock, flags);
++	value = counter_dev->regs[register_index];
++	rtdm_lock_put_irqrestore(&counter_dev->regs_lock, flags);
++	return value;
++}
++
++static void ni_tio_reset_count_and_disarm(struct ni_gpct *counter)
++{
++	write_register(counter, Gi_Reset_Bit(counter->counter_index),
++		       NITIO_Gxx_Joint_Reset_Reg(counter->counter_index));
++}
++
++void a4l_ni_tio_init_counter(struct ni_gpct *counter)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++
++	ni_tio_reset_count_and_disarm(counter);
++	/* Initialize counter registers */
++	counter_dev->regs[NITIO_Gi_Autoincrement_Reg(counter->counter_index)] =
++		0x0;
++	write_register(counter,
++		counter_dev->regs[NITIO_Gi_Autoincrement_Reg(counter->
++				counter_index)],
++		NITIO_Gi_Autoincrement_Reg(counter->counter_index));
++	ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index),
++		~0, Gi_Synchronize_Gate_Bit);
++	ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index), ~0,
++		0);
++	counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)] = 0x0;
++	write_register(counter,
++		counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)],
++		NITIO_Gi_LoadA_Reg(counter->counter_index));
++	counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)] = 0x0;
++	write_register(counter,
++		counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)],
++		NITIO_Gi_LoadB_Reg(counter->counter_index));
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index), ~0, 0);
++	if (ni_tio_counting_mode_registers_present(counter_dev)) {
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Counting_Mode_Reg(counter->counter_index), ~0,
++			0);
++	}
++	if (ni_tio_second_gate_registers_present(counter_dev)) {
++		counter_dev->regs[NITIO_Gi_Second_Gate_Reg(counter->
++				counter_index)] = 0x0;
++		write_register(counter,
++			counter_dev->regs[NITIO_Gi_Second_Gate_Reg(counter->
++					counter_index)],
++			NITIO_Gi_Second_Gate_Reg(counter->counter_index));
++	}
++	ni_tio_set_bits(counter,
++		NITIO_Gi_DMA_Config_Reg(counter->counter_index), ~0, 0x0);
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index), ~0, 0x0);
++}
++
++static lsampl_t ni_tio_counter_status(struct ni_gpct *counter)
++{
++	lsampl_t status = 0;
++	unsigned int bits;
++
++	bits = read_register(counter,NITIO_Gxx_Status_Reg(counter->counter_index));
++	if (bits & Gi_Armed_Bit(counter->counter_index)) {
++		status |= A4L_COUNTER_ARMED;
++		if (bits & Gi_Counting_Bit(counter->counter_index))
++			status |= A4L_COUNTER_COUNTING;
++	}
++	return status;
++}
++
++static
++uint64_t ni_tio_clock_period_ps(const struct ni_gpct *counter,
++				unsigned int generic_clock_source);
++static
++unsigned int ni_tio_generic_clock_src_select(const struct ni_gpct *counter);
++
++static void ni_tio_set_sync_mode(struct ni_gpct *counter, int force_alt_sync)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned counting_mode_reg =
++		NITIO_Gi_Counting_Mode_Reg(counter->counter_index);
++	static const uint64_t min_normal_sync_period_ps = 25000;
++	const uint64_t clock_period_ps = ni_tio_clock_period_ps(counter,
++		ni_tio_generic_clock_src_select(counter));
++
++	if (ni_tio_counting_mode_registers_present(counter_dev) == 0)
++		return;
++
++	switch (ni_tio_get_soft_copy(counter,
++			counting_mode_reg) & Gi_Counting_Mode_Mask) {
++	case Gi_Counting_Mode_QuadratureX1_Bits:
++	case Gi_Counting_Mode_QuadratureX2_Bits:
++	case Gi_Counting_Mode_QuadratureX4_Bits:
++	case Gi_Counting_Mode_Sync_Source_Bits:
++		force_alt_sync = 1;
++		break;
++	default:
++		break;
++	}
++
++	/* It's not clear what we should do if clock_period is
++	unknown, so we are not using the alt sync bit in that case,
++	but allow the caller to decide by using the force_alt_sync
++	parameter. */
++	if (force_alt_sync ||
++		(clock_period_ps
++			&& clock_period_ps < min_normal_sync_period_ps)) {
++		ni_tio_set_bits(counter, counting_mode_reg,
++			Gi_Alternate_Sync_Bit(counter_dev->variant),
++			Gi_Alternate_Sync_Bit(counter_dev->variant));
++	} else {
++		ni_tio_set_bits(counter, counting_mode_reg,
++			Gi_Alternate_Sync_Bit(counter_dev->variant), 0x0);
++	}
++}
++
++static int ni_tio_set_counter_mode(struct ni_gpct *counter, unsigned int mode)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	unsigned mode_reg_mask;
++	unsigned mode_reg_values;
++	unsigned input_select_bits = 0;
++
++	/* these bits map directly on to the mode register */
++	static const unsigned mode_reg_direct_mask =
++		NI_GPCT_GATE_ON_BOTH_EDGES_BIT | NI_GPCT_EDGE_GATE_MODE_MASK |
++		NI_GPCT_STOP_MODE_MASK | NI_GPCT_OUTPUT_MODE_MASK |
++		NI_GPCT_HARDWARE_DISARM_MASK | NI_GPCT_LOADING_ON_TC_BIT |
++		NI_GPCT_LOADING_ON_GATE_BIT | NI_GPCT_LOAD_B_SELECT_BIT;
++
++	mode_reg_mask = mode_reg_direct_mask | Gi_Reload_Source_Switching_Bit;
++	mode_reg_values = mode & mode_reg_direct_mask;
++	switch (mode & NI_GPCT_RELOAD_SOURCE_MASK) {
++	case NI_GPCT_RELOAD_SOURCE_FIXED_BITS:
++		break;
++	case NI_GPCT_RELOAD_SOURCE_SWITCHING_BITS:
++		mode_reg_values |= Gi_Reload_Source_Switching_Bit;
++		break;
++	case NI_GPCT_RELOAD_SOURCE_GATE_SELECT_BITS:
++		input_select_bits |= Gi_Gate_Select_Load_Source_Bit;
++		mode_reg_mask |= Gi_Gating_Mode_Mask;
++		mode_reg_values |= Gi_Level_Gating_Bits;
++		break;
++	default:
++		break;
++	}
++	ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index),
++		mode_reg_mask, mode_reg_values);
++
++	if (ni_tio_counting_mode_registers_present(counter_dev)) {
++		unsigned counting_mode_bits = 0;
++		counting_mode_bits |=
++			(mode >> NI_GPCT_COUNTING_MODE_SHIFT) &
++			Gi_Counting_Mode_Mask;
++		counting_mode_bits |=
++			((mode >> NI_GPCT_INDEX_PHASE_BITSHIFT) <<
++			Gi_Index_Phase_Bitshift) & Gi_Index_Phase_Mask;
++		if (mode & NI_GPCT_INDEX_ENABLE_BIT) {
++			counting_mode_bits |= Gi_Index_Mode_Bit;
++		}
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Counting_Mode_Reg(counter->counter_index),
++			Gi_Counting_Mode_Mask | Gi_Index_Phase_Mask |
++			Gi_Index_Mode_Bit, counting_mode_bits);
++		ni_tio_set_sync_mode(counter, 0);
++	}
++
++	ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index),
++		Gi_Up_Down_Mask,
++		(mode >> NI_GPCT_COUNTING_DIRECTION_SHIFT) << Gi_Up_Down_Shift);
++
++	if (mode & NI_GPCT_OR_GATE_BIT) {
++		input_select_bits |= Gi_Or_Gate_Bit;
++	}
++	if (mode & NI_GPCT_INVERT_OUTPUT_BIT) {
++		input_select_bits |= Gi_Output_Polarity_Bit;
++	}
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index),
++		Gi_Gate_Select_Load_Source_Bit | Gi_Or_Gate_Bit |
++		Gi_Output_Polarity_Bit, input_select_bits);
++
++	return 0;
++}
++
++static int ni_tio_arm(struct ni_gpct *counter, int arm, unsigned int start_trigger)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++
++	unsigned int command_transient_bits = 0;
++
++	if (arm) {
++		switch (start_trigger) {
++		case NI_GPCT_ARM_IMMEDIATE:
++			command_transient_bits |= Gi_Arm_Bit;
++			break;
++		case NI_GPCT_ARM_PAIRED_IMMEDIATE:
++			command_transient_bits |= Gi_Arm_Bit | Gi_Arm_Copy_Bit;
++			break;
++		default:
++			break;
++		}
++		if (ni_tio_counting_mode_registers_present(counter_dev)) {
++			unsigned counting_mode_bits = 0;
++
++			switch (start_trigger) {
++			case NI_GPCT_ARM_IMMEDIATE:
++			case NI_GPCT_ARM_PAIRED_IMMEDIATE:
++				break;
++			default:
++				if (start_trigger & NI_GPCT_ARM_UNKNOWN) {
++					/* Pass-through the least
++					significant bits so we can
++					figure out what select later
++					*/
++					unsigned hw_arm_select_bits =
++						(start_trigger <<
++						Gi_HW_Arm_Select_Shift) &
++						Gi_HW_Arm_Select_Mask
++						(counter_dev->variant);
++
++					counting_mode_bits |=
++						Gi_HW_Arm_Enable_Bit |
++						hw_arm_select_bits;
++				} else {
++					return -EINVAL;
++				}
++				break;
++			}
++			ni_tio_set_bits(counter,
++				NITIO_Gi_Counting_Mode_Reg(counter->
++					counter_index),
++				Gi_HW_Arm_Select_Mask(counter_dev->
++					variant) | Gi_HW_Arm_Enable_Bit,
++				counting_mode_bits);
++		}
++	} else {
++		command_transient_bits |= Gi_Disarm_Bit;
++	}
++	ni_tio_set_bits_transient(counter,
++		NITIO_Gi_Command_Reg(counter->counter_index), 0, 0,
++		command_transient_bits);
++	return 0;
++}
++
++static unsigned int ni_660x_source_select_bits(lsampl_t clock_source)
++{
++	unsigned int ni_660x_clock;
++	unsigned int i;
++	const unsigned int clock_select_bits =
++		clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK;
++
++	switch (clock_select_bits) {
++	case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Timebase_1_Clock;
++		break;
++	case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Timebase_2_Clock;
++		break;
++	case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Timebase_3_Clock;
++		break;
++	case NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Logic_Low_Clock;
++		break;
++	case NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Source_Pin_i_Clock;
++		break;
++	case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Next_Gate_Clock;
++		break;
++	case NI_GPCT_NEXT_TC_CLOCK_SRC_BITS:
++		ni_660x_clock = NI_660x_Next_TC_Clock;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (clock_select_bits == NI_GPCT_RTSI_CLOCK_SRC_BITS(i)) {
++				ni_660x_clock = NI_660x_RTSI_Clock(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_source_pin; ++i) {
++			if (clock_select_bits ==
++				NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(i)) {
++				ni_660x_clock = NI_660x_Source_Pin_Clock(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_source_pin)
++			break;
++		ni_660x_clock = 0;
++		BUG();
++		break;
++	}
++	return Gi_Source_Select_Bits(ni_660x_clock);
++}
++
++static unsigned int ni_m_series_source_select_bits(lsampl_t clock_source)
++{
++	unsigned int ni_m_series_clock;
++	unsigned int i;
++	const unsigned int clock_select_bits =
++		clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK;
++	switch (clock_select_bits) {
++	case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Timebase_1_Clock;
++		break;
++	case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Timebase_2_Clock;
++		break;
++	case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Timebase_3_Clock;
++		break;
++	case NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Logic_Low_Clock;
++		break;
++	case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Next_Gate_Clock;
++		break;
++	case NI_GPCT_NEXT_TC_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Next_TC_Clock;
++		break;
++	case NI_GPCT_PXI10_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_PXI10_Clock;
++		break;
++	case NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_PXI_Star_Trigger_Clock;
++		break;
++	case NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS:
++		ni_m_series_clock = NI_M_Series_Analog_Trigger_Out_Clock;
++		break;
++	default:
++		for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) {
++			if (clock_select_bits == NI_GPCT_RTSI_CLOCK_SRC_BITS(i)) {
++				ni_m_series_clock = NI_M_Series_RTSI_Clock(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) {
++			if (clock_select_bits == NI_GPCT_PFI_CLOCK_SRC_BITS(i)) {
++				ni_m_series_clock = NI_M_Series_PFI_Clock(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_pfi_channel)
++			break;
++		__a4l_err("invalid clock source 0x%lx\n",
++			     (unsigned long)clock_source);
++		BUG();
++		ni_m_series_clock = 0;
++		break;
++	}
++	return Gi_Source_Select_Bits(ni_m_series_clock);
++}
++
++static void ni_tio_set_source_subselect(struct ni_gpct *counter,
++					lsampl_t clock_source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++
++	if (counter_dev->variant != ni_gpct_variant_m_series)
++		return;
++	switch (clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK) {
++		/* Gi_Source_Subselect is zero */
++	case NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS:
++	case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS:
++		counter_dev->regs[second_gate_reg] &= ~Gi_Source_Subselect_Bit;
++		break;
++		/* Gi_Source_Subselect is one */
++	case NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS:
++	case NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS:
++		counter_dev->regs[second_gate_reg] |= Gi_Source_Subselect_Bit;
++		break;
++		/* Gi_Source_Subselect doesn't matter */
++	default:
++		return;
++		break;
++	}
++	write_register(counter, counter_dev->regs[second_gate_reg],
++		second_gate_reg);
++}
++
++static int ni_tio_set_clock_src(struct ni_gpct *counter,
++				lsampl_t clock_source, lsampl_t period_ns)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	unsigned input_select_bits = 0;
++	static const uint64_t pico_per_nano = 1000;
++
++	/* FIXME: validate clock source */
++	switch (counter_dev->variant) {
++	case ni_gpct_variant_660x:
++		input_select_bits |= ni_660x_source_select_bits(clock_source);
++		break;
++	case ni_gpct_variant_e_series:
++	case ni_gpct_variant_m_series:
++		input_select_bits |=
++			ni_m_series_source_select_bits(clock_source);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	if (clock_source & NI_GPCT_INVERT_CLOCK_SRC_BIT)
++		input_select_bits |= Gi_Source_Polarity_Bit;
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index),
++		Gi_Source_Select_Mask | Gi_Source_Polarity_Bit,
++		input_select_bits);
++	ni_tio_set_source_subselect(counter, clock_source);
++	if (ni_tio_counting_mode_registers_present(counter_dev)) {
++		const unsigned prescaling_mode =
++			clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK;
++		unsigned counting_mode_bits = 0;
++
++		switch (prescaling_mode) {
++		case NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS:
++			break;
++		case NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS:
++			counting_mode_bits |=
++				Gi_Prescale_X2_Bit(counter_dev->variant);
++			break;
++		case NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS:
++			counting_mode_bits |=
++				Gi_Prescale_X8_Bit(counter_dev->variant);
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Counting_Mode_Reg(counter->counter_index),
++			Gi_Prescale_X2_Bit(counter_dev->
++				variant) | Gi_Prescale_X8_Bit(counter_dev->
++				variant), counting_mode_bits);
++	}
++	counter->clock_period_ps = pico_per_nano * period_ns;
++	ni_tio_set_sync_mode(counter, 0);
++	return 0;
++}
++
++static unsigned int ni_tio_clock_src_modifiers(const struct ni_gpct *counter)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned counting_mode_bits = ni_tio_get_soft_copy(counter,
++		NITIO_Gi_Counting_Mode_Reg(counter->counter_index));
++	unsigned int bits = 0;
++
++	if (ni_tio_get_soft_copy(counter,
++			NITIO_Gi_Input_Select_Reg(counter->
++				counter_index)) & Gi_Source_Polarity_Bit)
++		bits |= NI_GPCT_INVERT_CLOCK_SRC_BIT;
++	if (counting_mode_bits & Gi_Prescale_X2_Bit(counter_dev->variant))
++		bits |= NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS;
++	if (counting_mode_bits & Gi_Prescale_X8_Bit(counter_dev->variant))
++		bits |= NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS;
++	return bits;
++}
++
++static unsigned int ni_m_series_clock_src_select(const struct ni_gpct *counter)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++	unsigned int i, clock_source = 0;
++
++	const unsigned int input_select = (ni_tio_get_soft_copy(counter,
++			NITIO_Gi_Input_Select_Reg(counter->
++				counter_index)) & Gi_Source_Select_Mask) >>
++		Gi_Source_Select_Shift;
++
++	switch (input_select) {
++	case NI_M_Series_Timebase_1_Clock:
++		clock_source = NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_Timebase_2_Clock:
++		clock_source = NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_Timebase_3_Clock:
++		if (counter_dev->
++			regs[second_gate_reg] & Gi_Source_Subselect_Bit)
++			clock_source =
++				NI_GPCT_ANALOG_TRIGGER_OUT_CLOCK_SRC_BITS;
++		else
++			clock_source = NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_Logic_Low_Clock:
++		clock_source = NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_Next_Gate_Clock:
++		if (counter_dev->
++			regs[second_gate_reg] & Gi_Source_Subselect_Bit)
++			clock_source = NI_GPCT_PXI_STAR_TRIGGER_CLOCK_SRC_BITS;
++		else
++			clock_source = NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_PXI10_Clock:
++		clock_source = NI_GPCT_PXI10_CLOCK_SRC_BITS;
++		break;
++	case NI_M_Series_Next_TC_Clock:
++		clock_source = NI_GPCT_NEXT_TC_CLOCK_SRC_BITS;
++		break;
++	default:
++		for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) {
++			if (input_select == NI_M_Series_RTSI_Clock(i)) {
++				clock_source = NI_GPCT_RTSI_CLOCK_SRC_BITS(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) {
++			if (input_select == NI_M_Series_PFI_Clock(i)) {
++				clock_source = NI_GPCT_PFI_CLOCK_SRC_BITS(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_pfi_channel)
++			break;
++		BUG();
++		break;
++	}
++	clock_source |= ni_tio_clock_src_modifiers(counter);
++	return clock_source;
++}
++
++static unsigned int ni_660x_clock_src_select(const struct ni_gpct *counter)
++{
++	unsigned int i, clock_source = 0;
++	const unsigned input_select = (ni_tio_get_soft_copy(counter,
++			NITIO_Gi_Input_Select_Reg(counter->
++				counter_index)) & Gi_Source_Select_Mask) >>
++		Gi_Source_Select_Shift;
++
++	switch (input_select) {
++	case NI_660x_Timebase_1_Clock:
++		clock_source = NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Timebase_2_Clock:
++		clock_source = NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Timebase_3_Clock:
++		clock_source = NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Logic_Low_Clock:
++		clock_source = NI_GPCT_LOGIC_LOW_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Source_Pin_i_Clock:
++		clock_source = NI_GPCT_SOURCE_PIN_i_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Next_Gate_Clock:
++		clock_source = NI_GPCT_NEXT_GATE_CLOCK_SRC_BITS;
++		break;
++	case NI_660x_Next_TC_Clock:
++		clock_source = NI_GPCT_NEXT_TC_CLOCK_SRC_BITS;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (input_select == NI_660x_RTSI_Clock(i)) {
++				clock_source = NI_GPCT_RTSI_CLOCK_SRC_BITS(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_source_pin; ++i) {
++			if (input_select == NI_660x_Source_Pin_Clock(i)) {
++				clock_source =
++					NI_GPCT_SOURCE_PIN_CLOCK_SRC_BITS(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_source_pin)
++			break;
++		BUG();
++		break;
++	}
++	clock_source |= ni_tio_clock_src_modifiers(counter);
++	return clock_source;
++}
++
++static unsigned int ni_tio_generic_clock_src_select(const struct ni_gpct *counter)
++{
++	switch (counter->counter_dev->variant) {
++	case ni_gpct_variant_e_series:
++	case ni_gpct_variant_m_series:
++		return ni_m_series_clock_src_select(counter);
++		break;
++	case ni_gpct_variant_660x:
++		return ni_660x_clock_src_select(counter);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static uint64_t ni_tio_clock_period_ps(const struct ni_gpct *counter,
++				       unsigned int generic_clock_source)
++{
++	uint64_t clock_period_ps;
++
++	switch (generic_clock_source & NI_GPCT_CLOCK_SRC_SELECT_MASK) {
++	case NI_GPCT_TIMEBASE_1_CLOCK_SRC_BITS:
++		clock_period_ps = 50000;
++		break;
++	case NI_GPCT_TIMEBASE_2_CLOCK_SRC_BITS:
++		clock_period_ps = 10000000;
++		break;
++	case NI_GPCT_TIMEBASE_3_CLOCK_SRC_BITS:
++		clock_period_ps = 12500;
++		break;
++	case NI_GPCT_PXI10_CLOCK_SRC_BITS:
++		clock_period_ps = 100000;
++		break;
++	default:
++		/* Clock period is specified by user with prescaling
++		   already taken into account. */
++		return counter->clock_period_ps;
++		break;
++	}
++
++	switch (generic_clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK) {
++	case NI_GPCT_NO_PRESCALE_CLOCK_SRC_BITS:
++		break;
++	case NI_GPCT_PRESCALE_X2_CLOCK_SRC_BITS:
++		clock_period_ps *= 2;
++		break;
++	case NI_GPCT_PRESCALE_X8_CLOCK_SRC_BITS:
++		clock_period_ps *= 8;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return clock_period_ps;
++}
++
++static void ni_tio_get_clock_src(struct ni_gpct *counter,
++				 unsigned int * clock_source,
++				 unsigned int * period_ns)
++{
++	static const unsigned int pico_per_nano = 1000;
++	uint64_t temp64;
++
++	*clock_source = ni_tio_generic_clock_src_select(counter);
++	temp64 = ni_tio_clock_period_ps(counter, *clock_source);
++	do_div(temp64, pico_per_nano);
++	*period_ns = temp64;
++}
++
++static void ni_tio_set_first_gate_modifiers(struct ni_gpct *counter,
++					    lsampl_t gate_source)
++{
++	const unsigned int mode_mask = Gi_Gate_Polarity_Bit | Gi_Gating_Mode_Mask;
++	unsigned int mode_values = 0;
++
++	if (gate_source & CR_INVERT) {
++		mode_values |= Gi_Gate_Polarity_Bit;
++	}
++	if (gate_source & CR_EDGE) {
++		mode_values |= Gi_Rising_Edge_Gating_Bits;
++	} else {
++		mode_values |= Gi_Level_Gating_Bits;
++	}
++	ni_tio_set_bits(counter, NITIO_Gi_Mode_Reg(counter->counter_index),
++		mode_mask, mode_values);
++}
++
++static int ni_660x_set_first_gate(struct ni_gpct *counter, lsampl_t gate_source)
++{
++	const unsigned int selected_gate = CR_CHAN(gate_source);
++	/* Bits of selected_gate that may be meaningful to
++	   input select register */
++	const unsigned int selected_gate_mask = 0x1f;
++	unsigned ni_660x_gate_select;
++	unsigned i;
++
++	switch (selected_gate) {
++	case NI_GPCT_NEXT_SOURCE_GATE_SELECT:
++		ni_660x_gate_select = NI_660x_Next_SRC_Gate_Select;
++		break;
++	case NI_GPCT_NEXT_OUT_GATE_SELECT:
++	case NI_GPCT_LOGIC_LOW_GATE_SELECT:
++	case NI_GPCT_SOURCE_PIN_i_GATE_SELECT:
++	case NI_GPCT_GATE_PIN_i_GATE_SELECT:
++		ni_660x_gate_select = selected_gate & selected_gate_mask;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (selected_gate == NI_GPCT_RTSI_GATE_SELECT(i)) {
++				ni_660x_gate_select =
++					selected_gate & selected_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_gate_pin; ++i) {
++			if (selected_gate == NI_GPCT_GATE_PIN_GATE_SELECT(i)) {
++				ni_660x_gate_select =
++					selected_gate & selected_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_660x_max_gate_pin)
++			break;
++		return -EINVAL;
++		break;
++	}
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index),
++		Gi_Gate_Select_Mask, Gi_Gate_Select_Bits(ni_660x_gate_select));
++	return 0;
++}
++
++static int ni_m_series_set_first_gate(struct ni_gpct *counter,
++				      lsampl_t gate_source)
++{
++	const unsigned int selected_gate = CR_CHAN(gate_source);
++	/* bits of selected_gate that may be meaningful to input select register */
++	const unsigned int selected_gate_mask = 0x1f;
++	unsigned int i, ni_m_series_gate_select;
++
++	switch (selected_gate) {
++	case NI_GPCT_TIMESTAMP_MUX_GATE_SELECT:
++	case NI_GPCT_AI_START2_GATE_SELECT:
++	case NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT:
++	case NI_GPCT_NEXT_OUT_GATE_SELECT:
++	case NI_GPCT_AI_START1_GATE_SELECT:
++	case NI_GPCT_NEXT_SOURCE_GATE_SELECT:
++	case NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT:
++	case NI_GPCT_LOGIC_LOW_GATE_SELECT:
++		ni_m_series_gate_select = selected_gate & selected_gate_mask;
++		break;
++	default:
++		for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) {
++			if (selected_gate == NI_GPCT_RTSI_GATE_SELECT(i)) {
++				ni_m_series_gate_select =
++					selected_gate & selected_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) {
++			if (selected_gate == NI_GPCT_PFI_GATE_SELECT(i)) {
++				ni_m_series_gate_select =
++					selected_gate & selected_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_pfi_channel)
++			break;
++		return -EINVAL;
++		break;
++	}
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index),
++		Gi_Gate_Select_Mask,
++		Gi_Gate_Select_Bits(ni_m_series_gate_select));
++	return 0;
++}
++
++static int ni_660x_set_second_gate(struct ni_gpct *counter,
++				   lsampl_t gate_source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++	const unsigned int selected_second_gate = CR_CHAN(gate_source);
++	/* bits of second_gate that may be meaningful to second gate register */
++	static const unsigned int selected_second_gate_mask = 0x1f;
++	unsigned int i, ni_660x_second_gate_select;
++
++	switch (selected_second_gate) {
++	case NI_GPCT_SOURCE_PIN_i_GATE_SELECT:
++	case NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT:
++	case NI_GPCT_SELECTED_GATE_GATE_SELECT:
++	case NI_GPCT_NEXT_OUT_GATE_SELECT:
++	case NI_GPCT_LOGIC_LOW_GATE_SELECT:
++		ni_660x_second_gate_select =
++			selected_second_gate & selected_second_gate_mask;
++		break;
++	case NI_GPCT_NEXT_SOURCE_GATE_SELECT:
++		ni_660x_second_gate_select =
++			NI_660x_Next_SRC_Second_Gate_Select;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (selected_second_gate == NI_GPCT_RTSI_GATE_SELECT(i)) {
++				ni_660x_second_gate_select =
++					selected_second_gate &
++					selected_second_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_up_down_pin; ++i) {
++			if (selected_second_gate ==
++				NI_GPCT_UP_DOWN_PIN_GATE_SELECT(i)) {
++				ni_660x_second_gate_select =
++					selected_second_gate &
++					selected_second_gate_mask;
++				break;
++			}
++		}
++		if (i <= ni_660x_max_up_down_pin)
++			break;
++		return -EINVAL;
++		break;
++	};
++	counter_dev->regs[second_gate_reg] |= Gi_Second_Gate_Mode_Bit;
++	counter_dev->regs[second_gate_reg] &= ~Gi_Second_Gate_Select_Mask;
++	counter_dev->regs[second_gate_reg] |=
++		Gi_Second_Gate_Select_Bits(ni_660x_second_gate_select);
++	write_register(counter, counter_dev->regs[second_gate_reg],
++		second_gate_reg);
++	return 0;
++}
++
++static int ni_m_series_set_second_gate(struct ni_gpct *counter,
++				       lsampl_t gate_source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++	const unsigned int selected_second_gate = CR_CHAN(gate_source);
++	/* Bits of second_gate that may be meaningful to second gate register */
++	static const unsigned int selected_second_gate_mask = 0x1f;
++	unsigned int ni_m_series_second_gate_select;
++
++	/* FIXME: We don't know what the m-series second gate codes
++	   are, so we'll just pass the bits through for now. */
++	switch (selected_second_gate) {
++	default:
++		ni_m_series_second_gate_select =
++			selected_second_gate & selected_second_gate_mask;
++		break;
++	};
++	counter_dev->regs[second_gate_reg] |= Gi_Second_Gate_Mode_Bit;
++	counter_dev->regs[second_gate_reg] &= ~Gi_Second_Gate_Select_Mask;
++	counter_dev->regs[second_gate_reg] |=
++		Gi_Second_Gate_Select_Bits(ni_m_series_second_gate_select);
++	write_register(counter, counter_dev->regs[second_gate_reg],
++		second_gate_reg);
++	return 0;
++}
++
++static int ni_tio_set_gate_src(struct ni_gpct *counter,
++			       unsigned int gate_index, lsampl_t gate_source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++
++	switch (gate_index) {
++	case 0:
++		if (CR_CHAN(gate_source) == NI_GPCT_DISABLED_GATE_SELECT) {
++			ni_tio_set_bits(counter,
++				NITIO_Gi_Mode_Reg(counter->counter_index),
++				Gi_Gating_Mode_Mask, Gi_Gating_Disabled_Bits);
++			return 0;
++		}
++		ni_tio_set_first_gate_modifiers(counter, gate_source);
++		switch (counter_dev->variant) {
++		case ni_gpct_variant_e_series:
++		case ni_gpct_variant_m_series:
++			return ni_m_series_set_first_gate(counter, gate_source);
++			break;
++		case ni_gpct_variant_660x:
++			return ni_660x_set_first_gate(counter, gate_source);
++			break;
++		default:
++			BUG();
++			break;
++		}
++		break;
++	case 1:
++		if (ni_tio_second_gate_registers_present(counter_dev) == 0)
++			return -EINVAL;
++		if (CR_CHAN(gate_source) == NI_GPCT_DISABLED_GATE_SELECT) {
++			counter_dev->regs[second_gate_reg] &=
++				~Gi_Second_Gate_Mode_Bit;
++			write_register(counter,
++				counter_dev->regs[second_gate_reg],
++				second_gate_reg);
++			return 0;
++		}
++		if (gate_source & CR_INVERT) {
++			counter_dev->regs[second_gate_reg] |=
++				Gi_Second_Gate_Polarity_Bit;
++		} else {
++			counter_dev->regs[second_gate_reg] &=
++				~Gi_Second_Gate_Polarity_Bit;
++		}
++		switch (counter_dev->variant) {
++		case ni_gpct_variant_m_series:
++			return ni_m_series_set_second_gate(counter,
++				gate_source);
++			break;
++		case ni_gpct_variant_660x:
++			return ni_660x_set_second_gate(counter, gate_source);
++			break;
++		default:
++			BUG();
++			break;
++		}
++		break;
++	default:
++		return -EINVAL;
++		break;
++	}
++	return 0;
++}
++
++static int ni_tio_set_other_src(struct ni_gpct *counter,
++				unsigned int index, unsigned int source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++
++	if (counter_dev->variant == ni_gpct_variant_m_series) {
++		unsigned int abz_reg, shift, mask;
++
++		abz_reg = NITIO_Gi_ABZ_Reg(counter->counter_index);
++		switch (index) {
++		case NI_GPCT_SOURCE_ENCODER_A:
++			shift = 10;
++			break;
++		case NI_GPCT_SOURCE_ENCODER_B:
++			shift = 5;
++			break;
++		case NI_GPCT_SOURCE_ENCODER_Z:
++			shift = 0;
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		mask = 0x1f << shift;
++		if (source > 0x1f) {
++			/* Disable gate */
++			source = 0x1f;
++		}
++		counter_dev->regs[abz_reg] &= ~mask;
++		counter_dev->regs[abz_reg] |= (source << shift) & mask;
++		write_register(counter, counter_dev->regs[abz_reg], abz_reg);
++		return 0;
++	}
++	return -EINVAL;
++}
++
++static unsigned int ni_660x_first_gate_to_generic_gate_source(unsigned int ni_660x_gate_select)
++{
++	unsigned int i;
++
++	switch (ni_660x_gate_select) {
++	case NI_660x_Source_Pin_i_Gate_Select:
++		return NI_GPCT_SOURCE_PIN_i_GATE_SELECT;
++		break;
++	case NI_660x_Gate_Pin_i_Gate_Select:
++		return NI_GPCT_GATE_PIN_i_GATE_SELECT;
++		break;
++	case NI_660x_Next_SRC_Gate_Select:
++		return NI_GPCT_NEXT_SOURCE_GATE_SELECT;
++		break;
++	case NI_660x_Next_Out_Gate_Select:
++		return NI_GPCT_NEXT_OUT_GATE_SELECT;
++		break;
++	case NI_660x_Logic_Low_Gate_Select:
++		return NI_GPCT_LOGIC_LOW_GATE_SELECT;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (ni_660x_gate_select == NI_660x_RTSI_Gate_Select(i)) {
++				return NI_GPCT_RTSI_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_gate_pin; ++i) {
++			if (ni_660x_gate_select ==
++				NI_660x_Gate_Pin_Gate_Select(i)) {
++				return NI_GPCT_GATE_PIN_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_gate_pin)
++			break;
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static unsigned int ni_m_series_first_gate_to_generic_gate_source(unsigned int
++	ni_m_series_gate_select)
++{
++	unsigned int i;
++
++	switch (ni_m_series_gate_select) {
++	case NI_M_Series_Timestamp_Mux_Gate_Select:
++		return NI_GPCT_TIMESTAMP_MUX_GATE_SELECT;
++		break;
++	case NI_M_Series_AI_START2_Gate_Select:
++		return NI_GPCT_AI_START2_GATE_SELECT;
++		break;
++	case NI_M_Series_PXI_Star_Trigger_Gate_Select:
++		return NI_GPCT_PXI_STAR_TRIGGER_GATE_SELECT;
++		break;
++	case NI_M_Series_Next_Out_Gate_Select:
++		return NI_GPCT_NEXT_OUT_GATE_SELECT;
++		break;
++	case NI_M_Series_AI_START1_Gate_Select:
++		return NI_GPCT_AI_START1_GATE_SELECT;
++		break;
++	case NI_M_Series_Next_SRC_Gate_Select:
++		return NI_GPCT_NEXT_SOURCE_GATE_SELECT;
++		break;
++	case NI_M_Series_Analog_Trigger_Out_Gate_Select:
++		return NI_GPCT_ANALOG_TRIGGER_OUT_GATE_SELECT;
++		break;
++	case NI_M_Series_Logic_Low_Gate_Select:
++		return NI_GPCT_LOGIC_LOW_GATE_SELECT;
++		break;
++	default:
++		for (i = 0; i <= ni_m_series_max_rtsi_channel; ++i) {
++			if (ni_m_series_gate_select ==
++				NI_M_Series_RTSI_Gate_Select(i)) {
++				return NI_GPCT_RTSI_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_m_series_max_pfi_channel; ++i) {
++			if (ni_m_series_gate_select ==
++				NI_M_Series_PFI_Gate_Select(i)) {
++				return NI_GPCT_PFI_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_m_series_max_pfi_channel)
++			break;
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static unsigned int ni_660x_second_gate_to_generic_gate_source(unsigned int
++	ni_660x_gate_select)
++{
++	unsigned int i;
++
++	switch (ni_660x_gate_select) {
++	case NI_660x_Source_Pin_i_Second_Gate_Select:
++		return NI_GPCT_SOURCE_PIN_i_GATE_SELECT;
++		break;
++	case NI_660x_Up_Down_Pin_i_Second_Gate_Select:
++		return NI_GPCT_UP_DOWN_PIN_i_GATE_SELECT;
++		break;
++	case NI_660x_Next_SRC_Second_Gate_Select:
++		return NI_GPCT_NEXT_SOURCE_GATE_SELECT;
++		break;
++	case NI_660x_Next_Out_Second_Gate_Select:
++		return NI_GPCT_NEXT_OUT_GATE_SELECT;
++		break;
++	case NI_660x_Selected_Gate_Second_Gate_Select:
++		return NI_GPCT_SELECTED_GATE_GATE_SELECT;
++		break;
++	case NI_660x_Logic_Low_Second_Gate_Select:
++		return NI_GPCT_LOGIC_LOW_GATE_SELECT;
++		break;
++	default:
++		for (i = 0; i <= ni_660x_max_rtsi_channel; ++i) {
++			if (ni_660x_gate_select ==
++				NI_660x_RTSI_Second_Gate_Select(i)) {
++				return NI_GPCT_RTSI_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_rtsi_channel)
++			break;
++		for (i = 0; i <= ni_660x_max_up_down_pin; ++i) {
++			if (ni_660x_gate_select ==
++				NI_660x_Up_Down_Pin_Second_Gate_Select(i)) {
++				return NI_GPCT_UP_DOWN_PIN_GATE_SELECT(i);
++				break;
++			}
++		}
++		if (i <= ni_660x_max_up_down_pin)
++			break;
++		BUG();
++		break;
++	}
++	return 0;
++}
++
++static unsigned int ni_m_series_second_gate_to_generic_gate_source(unsigned int
++	ni_m_series_gate_select)
++{
++	/* FIXME: the second gate sources for the m series are
++	   undocumented, so we just return the raw bits for now. */
++	switch (ni_m_series_gate_select) {
++	default:
++		return ni_m_series_gate_select;
++		break;
++	}
++	return 0;
++};
++
++static int ni_tio_get_gate_src(struct ni_gpct *counter,
++			       unsigned int gate_index,
++			       unsigned int * gate_source)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int mode_bits = ni_tio_get_soft_copy(counter,
++		NITIO_Gi_Mode_Reg(counter->counter_index));
++	const unsigned int second_gate_reg =
++		NITIO_Gi_Second_Gate_Reg(counter->counter_index);
++	unsigned int gate_select_bits;
++
++	switch (gate_index) {
++	case 0:
++		if ((mode_bits & Gi_Gating_Mode_Mask) ==
++			Gi_Gating_Disabled_Bits) {
++			*gate_source = NI_GPCT_DISABLED_GATE_SELECT;
++			return 0;
++		} else {
++			gate_select_bits =
++				(ni_tio_get_soft_copy(counter,
++					NITIO_Gi_Input_Select_Reg(counter->
++						counter_index)) &
++				Gi_Gate_Select_Mask) >> Gi_Gate_Select_Shift;
++		}
++		switch (counter_dev->variant) {
++		case ni_gpct_variant_e_series:
++		case ni_gpct_variant_m_series:
++			*gate_source =
++				ni_m_series_first_gate_to_generic_gate_source
++				(gate_select_bits);
++			break;
++		case ni_gpct_variant_660x:
++			*gate_source =
++				ni_660x_first_gate_to_generic_gate_source
++				(gate_select_bits);
++			break;
++		default:
++			BUG();
++			break;
++		}
++		if (mode_bits & Gi_Gate_Polarity_Bit) {
++			*gate_source |= CR_INVERT;
++		}
++		if ((mode_bits & Gi_Gating_Mode_Mask) != Gi_Level_Gating_Bits) {
++			*gate_source |= CR_EDGE;
++		}
++		break;
++	case 1:
++		if ((mode_bits & Gi_Gating_Mode_Mask) == Gi_Gating_Disabled_Bits
++			|| (counter_dev->
++				regs[second_gate_reg] & Gi_Second_Gate_Mode_Bit)
++			== 0) {
++			*gate_source = NI_GPCT_DISABLED_GATE_SELECT;
++			return 0;
++		} else {
++			gate_select_bits =
++				(counter_dev->
++				regs[second_gate_reg] &
++				Gi_Second_Gate_Select_Mask) >>
++				Gi_Second_Gate_Select_Shift;
++		}
++		switch (counter_dev->variant) {
++		case ni_gpct_variant_e_series:
++		case ni_gpct_variant_m_series:
++			*gate_source =
++				ni_m_series_second_gate_to_generic_gate_source
++				(gate_select_bits);
++			break;
++		case ni_gpct_variant_660x:
++			*gate_source =
++				ni_660x_second_gate_to_generic_gate_source
++				(gate_select_bits);
++			break;
++		default:
++			BUG();
++			break;
++		}
++		if (counter_dev->
++			regs[second_gate_reg] & Gi_Second_Gate_Polarity_Bit) {
++			*gate_source |= CR_INVERT;
++		}
++		/* Second gate can't have edge/level mode set independently */
++		if ((mode_bits & Gi_Gating_Mode_Mask) != Gi_Level_Gating_Bits) {
++			*gate_source |= CR_EDGE;
++		}
++		break;
++	default:
++		return -EINVAL;
++		break;
++	}
++	return 0;
++}
++
++int a4l_ni_tio_insn_config(struct ni_gpct *counter, struct a4l_kernel_instruction *insn)
++{
++	unsigned int *data = (unsigned int *)insn->data;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_SET_COUNTER_MODE:
++		return ni_tio_set_counter_mode(counter, data[1]);
++		break;
++	case A4L_INSN_CONFIG_ARM:
++		return ni_tio_arm(counter, 1, data[1]);
++		break;
++	case A4L_INSN_CONFIG_DISARM:
++		ni_tio_arm(counter, 0, 0);
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_GET_COUNTER_STATUS:
++		data[1] = ni_tio_counter_status(counter);
++		data[2] = counter_status_mask;
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_SET_CLOCK_SRC:
++		return ni_tio_set_clock_src(counter, data[1], data[2]);
++		break;
++	case A4L_INSN_CONFIG_GET_CLOCK_SRC:
++		ni_tio_get_clock_src(counter, &data[1], &data[2]);
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_SET_GATE_SRC:
++		return ni_tio_set_gate_src(counter, data[1], data[2]);
++		break;
++	case A4L_INSN_CONFIG_GET_GATE_SRC:
++		return ni_tio_get_gate_src(counter, data[1], &data[2]);
++		break;
++	case A4L_INSN_CONFIG_SET_OTHER_SRC:
++		return ni_tio_set_other_src(counter, data[1], data[2]);
++		break;
++	case A4L_INSN_CONFIG_RESET:
++		ni_tio_reset_count_and_disarm(counter);
++		return 0;
++		break;
++	default:
++		break;
++	}
++	return -EINVAL;
++}
++
++int a4l_ni_tio_rinsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int channel = CR_CHAN(insn->chan_desc);
++	unsigned int first_read;
++	unsigned int second_read;
++	unsigned int correct_read;
++
++	uint32_t *data = (uint32_t *)insn->data;
++
++	if (insn->data_size != sizeof(uint32_t))
++		return -EINVAL;
++
++	switch (channel) {
++	case 0:
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Command_Reg(counter->counter_index),
++			Gi_Save_Trace_Bit, 0);
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Command_Reg(counter->counter_index),
++			Gi_Save_Trace_Bit, Gi_Save_Trace_Bit);
++		/* The count doesn't get latched until the next clock
++		   edge, so it is possible the count may change (once)
++		   while we are reading.  Since the read of the
++		   SW_Save_Reg isn't atomic (apparently even when it's a
++		   32 bit register according to 660x docs), we need to
++		   read twice and make sure the reading hasn't changed.
++		   If it has, a third read will be correct since the
++		   count value will definitely have latched by then. */
++		first_read =
++			read_register(counter,
++			NITIO_Gi_SW_Save_Reg(counter->counter_index));
++		second_read =
++			read_register(counter,
++			NITIO_Gi_SW_Save_Reg(counter->counter_index));
++		if (first_read != second_read)
++			correct_read =
++				read_register(counter,
++				NITIO_Gi_SW_Save_Reg(counter->counter_index));
++		else
++			correct_read = first_read;
++		data[0] = correct_read;
++		return 0;
++		break;
++	case 1:
++		data[0] = counter_dev->regs
++			[NITIO_Gi_LoadA_Reg(counter->counter_index)];
++		break;
++	case 2:
++		data[0] = counter_dev->regs
++			[NITIO_Gi_LoadB_Reg(counter->counter_index)];
++		break;
++	};
++
++	return 0;
++}
++
++static unsigned int ni_tio_next_load_register(struct ni_gpct *counter)
++{
++	const unsigned int bits = read_register(counter,
++		NITIO_Gxx_Status_Reg(counter->counter_index));
++
++	if (bits & Gi_Next_Load_Source_Bit(counter->counter_index)) {
++		return NITIO_Gi_LoadB_Reg(counter->counter_index);
++	} else {
++		return NITIO_Gi_LoadA_Reg(counter->counter_index);
++	}
++}
++
++int a4l_ni_tio_winsn(struct ni_gpct *counter, struct a4l_kernel_instruction *insn)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	const unsigned int channel = CR_CHAN(insn->chan_desc);
++	unsigned int load_reg;
++
++	uint32_t *data = (uint32_t *)insn->data;
++
++	if (insn->data_size != sizeof(uint32_t))
++		return -EINVAL;
++
++	switch (channel) {
++	case 0:
++		/* Unsafe if counter is armed.  Should probably check
++		   status and return -EBUSY if armed. */
++		/* Don't disturb load source select, just use
++		   whichever load register is already selected. */
++		load_reg = ni_tio_next_load_register(counter);
++		write_register(counter, data[0], load_reg);
++		ni_tio_set_bits_transient(counter,
++			NITIO_Gi_Command_Reg(counter->counter_index), 0, 0,
++			Gi_Load_Bit);
++		/* Restore state of load reg to whatever the user set
++		   last set it to */
++		write_register(counter, counter_dev->regs[load_reg], load_reg);
++		break;
++	case 1:
++		counter_dev->regs[NITIO_Gi_LoadA_Reg(counter->counter_index)] =
++			data[0];
++		write_register(counter, data[0],
++			NITIO_Gi_LoadA_Reg(counter->counter_index));
++		break;
++	case 2:
++		counter_dev->regs[NITIO_Gi_LoadB_Reg(counter->counter_index)] =
++			data[0];
++		write_register(counter, data[0],
++			NITIO_Gi_LoadB_Reg(counter->counter_index));
++		break;
++	default:
++		return -EINVAL;
++		break;
++	}
++
++	return 0;
++}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static void ni_tio_configure_dma(struct ni_gpct *counter,
++				 short enable, short read_not_write)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	unsigned int input_select_bits = 0;
++
++	if (enable) {
++		if (read_not_write) {
++			input_select_bits |= Gi_Read_Acknowledges_Irq;
++		} else {
++			input_select_bits |= Gi_Write_Acknowledges_Irq;
++		}
++	}
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Input_Select_Reg(counter->counter_index),
++		Gi_Read_Acknowledges_Irq | Gi_Write_Acknowledges_Irq,
++		input_select_bits);
++	switch (counter_dev->variant) {
++	case ni_gpct_variant_e_series:
++		break;
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		{
++			unsigned gi_dma_config_bits = 0;
++
++			if (enable) {
++				gi_dma_config_bits |= Gi_DMA_Enable_Bit;
++				gi_dma_config_bits |= Gi_DMA_Int_Bit;
++			}
++			if (read_not_write == 0) {
++				gi_dma_config_bits |= Gi_DMA_Write_Bit;
++			}
++			ni_tio_set_bits(counter,
++				NITIO_Gi_DMA_Config_Reg(counter->counter_index),
++				Gi_DMA_Enable_Bit | Gi_DMA_Int_Bit |
++				Gi_DMA_Write_Bit, gi_dma_config_bits);
++		}
++		break;
++	}
++}
++
++/* TODO: a4l_ni_tio_input_inttrig is left unused because the trigger
++   callback cannot be changed at run time */
++int a4l_ni_tio_input_inttrig(struct ni_gpct *counter, lsampl_t trignum)
++{
++	unsigned long flags;
++	int retval = 0;
++
++	BUG_ON(counter == NULL);
++	if (trignum != 0)
++		return -EINVAL;
++
++	rtdm_lock_get_irqsave(&counter->lock, flags);
++	if (counter->mite_chan)
++		a4l_mite_dma_arm(counter->mite_chan);
++	else
++		retval = -EIO;
++	rtdm_lock_put_irqrestore(&counter->lock, flags);
++	if (retval < 0)
++		return retval;
++	retval = ni_tio_arm(counter, 1, NI_GPCT_ARM_IMMEDIATE);
++
++	/* TODO: disable trigger until a command is recorded.
++	   Null trig at beginning prevent ao start trigger from executing
++	   more than once per command (and doing things like trying to
++	   allocate the ao dma channel multiple times) */
++
++	return retval;
++}
++
++static int ni_tio_input_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd)
++{
++	struct ni_gpct_device *counter_dev = counter->counter_dev;
++	int retval = 0;
++
++	counter->mite_chan->dir = A4L_INPUT;
++	switch (counter_dev->variant) {
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		a4l_mite_prep_dma(counter->mite_chan, 32, 32);
++		break;
++	case ni_gpct_variant_e_series:
++		a4l_mite_prep_dma(counter->mite_chan, 16, 32);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	ni_tio_set_bits(counter, NITIO_Gi_Command_Reg(counter->counter_index),
++		Gi_Save_Trace_Bit, 0);
++	ni_tio_configure_dma(counter, 1, 1);
++	switch (cmd->start_src) {
++	case TRIG_NOW:
++		a4l_mite_dma_arm(counter->mite_chan);
++		retval = ni_tio_arm(counter, 1, NI_GPCT_ARM_IMMEDIATE);
++		break;
++	case TRIG_INT:
++		break;
++	case TRIG_EXT:
++		a4l_mite_dma_arm(counter->mite_chan);
++		retval = ni_tio_arm(counter, 1, cmd->start_arg);
++	case TRIG_OTHER:
++		a4l_mite_dma_arm(counter->mite_chan);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	return retval;
++}
++
++static int ni_tio_output_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd)
++{
++	__a4l_err("ni_tio: output commands not yet implemented.\n");
++	return -ENOTSUPP;
++}
++
++static int ni_tio_cmd_setup(struct ni_gpct *counter, struct a4l_cmd_desc *cmd)
++{
++	int retval = 0, set_gate_source = 0;
++	unsigned int gate_source;
++
++	if (cmd->scan_begin_src == TRIG_EXT) {
++		set_gate_source = 1;
++		gate_source = cmd->scan_begin_arg;
++	} else if (cmd->convert_src == TRIG_EXT) {
++		set_gate_source = 1;
++		gate_source = cmd->convert_arg;
++	}
++	if (set_gate_source) {
++		retval = ni_tio_set_gate_src(counter, 0, gate_source);
++	}
++	if (cmd->flags & TRIG_WAKE_EOS) {
++		ni_tio_set_bits(counter,
++			NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index),
++			Gi_Gate_Interrupt_Enable_Bit(counter->counter_index),
++			Gi_Gate_Interrupt_Enable_Bit(counter->counter_index));
++	}
++	return retval;
++}
++
++int a4l_ni_tio_cmd(struct ni_gpct *counter, struct a4l_cmd_desc *cmd)
++{
++	int retval = 0;
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&counter->lock, flags);
++	if (counter->mite_chan == NULL) {
++		__a4l_err("a4l_ni_tio_cmd: commands only supported with DMA."
++			     " Interrupt-driven commands not yet implemented.\n");
++		retval = -EIO;
++	} else {
++		retval = ni_tio_cmd_setup(counter, cmd);
++		if (retval == 0) {
++			if (cmd->flags & A4L_CMD_WRITE) {
++				retval = ni_tio_output_cmd(counter, cmd);
++			} else {
++				retval = ni_tio_input_cmd(counter, cmd);
++			}
++		}
++	}
++	rtdm_lock_put_irqrestore(&counter->lock, flags);
++	return retval;
++}
++
++struct a4l_cmd_desc a4l_ni_tio_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW | TRIG_INT | TRIG_OTHER | TRIG_EXT,
++	.scan_begin_src = TRIG_FOLLOW | TRIG_EXT | TRIG_OTHER,
++	.convert_src = TRIG_NOW | TRIG_EXT | TRIG_OTHER,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_NONE,
++};
++
++int a4l_ni_tio_cmdtest(struct ni_gpct *counter, struct a4l_cmd_desc *cmd)
++{
++	/* Make sure trigger sources are trivially valid */
++
++	if ((cmd->start_src & TRIG_EXT) != 0 &&
++	    ni_tio_counting_mode_registers_present(counter->counter_dev) == 0)
++		return -EINVAL;
++
++	/* Make sure trigger sources are mutually compatible */
++
++	if (cmd->convert_src != TRIG_NOW && cmd->scan_begin_src != TRIG_FOLLOW)
++		return -EINVAL;
++
++	/* Make sure arguments are trivially compatible */
++
++	if (cmd->start_src != TRIG_EXT) {
++		if (cmd->start_arg != 0) {
++			return -EINVAL;
++		}
++	}
++
++	if (cmd->scan_begin_src != TRIG_EXT) {
++		if (cmd->scan_begin_arg) {
++			return -EINVAL;
++		}
++	}
++
++	if (cmd->convert_src != TRIG_EXT) {
++		if (cmd->convert_arg) {
++			return -EINVAL;
++		}
++	}
++
++	if (cmd->scan_end_arg != cmd->nb_chan) {
++		return -EINVAL;
++	}
++
++	if (cmd->stop_src == TRIG_NONE) {
++		if (cmd->stop_arg != 0) {
++			return -EINVAL;
++		}
++	}
++
++	return 0;
++}
++
++int a4l_ni_tio_cancel(struct ni_gpct *counter)
++{
++	unsigned long flags;
++
++	ni_tio_arm(counter, 0, 0);
++	rtdm_lock_get_irqsave(&counter->lock, flags);
++	if (counter->mite_chan) {
++		a4l_mite_dma_disarm(counter->mite_chan);
++	}
++	rtdm_lock_put_irqrestore(&counter->lock, flags);
++	ni_tio_configure_dma(counter, 0, 0);
++
++	ni_tio_set_bits(counter,
++		NITIO_Gi_Interrupt_Enable_Reg(counter->counter_index),
++		Gi_Gate_Interrupt_Enable_Bit(counter->counter_index), 0x0);
++	return 0;
++}
++
++/*  During buffered input counter operation for e-series, the gate
++   interrupt is acked automatically by the dma controller, due to the
++   Gi_Read/Write_Acknowledges_IRQ bits in the input select
++   register. */
++static int should_ack_gate(struct ni_gpct *counter)
++{
++	unsigned long flags;
++	int retval = 0;
++
++	switch (counter->counter_dev->variant) {
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		/* Not sure if 660x really supports gate interrupts
++		   (the bits are not listed in register-level manual) */
++		return 1;
++		break;
++	case ni_gpct_variant_e_series:
++		rtdm_lock_get_irqsave(&counter->lock, flags);
++		{
++			if (counter->mite_chan == NULL ||
++				counter->mite_chan->dir != A4L_INPUT ||
++				(a4l_mite_done(counter->mite_chan))) {
++				retval = 1;
++			}
++		}
++		rtdm_lock_put_irqrestore(&counter->lock, flags);
++		break;
++	}
++	return retval;
++}
++
++void a4l_ni_tio_acknowledge_and_confirm(struct ni_gpct *counter,
++				    int *gate_error,
++				    int *tc_error,
++				    int *perm_stale_data, int *stale_data)
++{
++	const unsigned short gxx_status = read_register(counter,
++		NITIO_Gxx_Status_Reg(counter->counter_index));
++	const unsigned short gi_status = read_register(counter,
++		NITIO_Gi_Status_Reg(counter->counter_index));
++	unsigned ack = 0;
++
++	if (gate_error)
++		*gate_error = 0;
++	if (tc_error)
++		*tc_error = 0;
++	if (perm_stale_data)
++		*perm_stale_data = 0;
++	if (stale_data)
++		*stale_data = 0;
++
++	if (gxx_status & Gi_Gate_Error_Bit(counter->counter_index)) {
++		ack |= Gi_Gate_Error_Confirm_Bit(counter->counter_index);
++		if (gate_error) {
++			/* 660x don't support automatic
++			   acknowledgement of gate interrupt via dma
++			   read/write and report bogus gate errors */
++			if (counter->counter_dev->variant !=
++				ni_gpct_variant_660x) {
++				*gate_error = 1;
++			}
++		}
++	}
++	if (gxx_status & Gi_TC_Error_Bit(counter->counter_index)) {
++		ack |= Gi_TC_Error_Confirm_Bit(counter->counter_index);
++		if (tc_error)
++			*tc_error = 1;
++	}
++	if (gi_status & Gi_TC_Bit) {
++		ack |= Gi_TC_Interrupt_Ack_Bit;
++	}
++	if (gi_status & Gi_Gate_Interrupt_Bit) {
++		if (should_ack_gate(counter))
++			ack |= Gi_Gate_Interrupt_Ack_Bit;
++	}
++	if (ack)
++		write_register(counter, ack,
++			NITIO_Gi_Interrupt_Acknowledge_Reg(counter->
++				counter_index));
++	if (ni_tio_get_soft_copy(counter,
++			NITIO_Gi_Mode_Reg(counter->
++				counter_index)) & Gi_Loading_On_Gate_Bit) {
++		if (gxx_status & Gi_Stale_Data_Bit(counter->counter_index)) {
++			if (stale_data)
++				*stale_data = 1;
++		}
++		if (read_register(counter,
++				NITIO_Gxx_Joint_Status2_Reg(counter->
++					counter_index)) &
++			Gi_Permanent_Stale_Bit(counter->counter_index)) {
++			__a4l_err("%s: Gi_Permanent_Stale_Data detected.\n",
++				    __FUNCTION__);
++			if (perm_stale_data)
++				*perm_stale_data = 1;
++		}
++	}
++}
++
++/* TODO: to be adapted after a4l_buf_evt review */
++void a4l_ni_tio_handle_interrupt(struct ni_gpct *counter, struct a4l_device *dev)
++{
++	unsigned gpct_mite_status;
++	unsigned long flags;
++	int gate_error;
++	int tc_error;
++	int perm_stale_data;
++	struct a4l_subdevice *subd =
++		a4l_get_subd(dev, NI_GPCT_SUBDEV(counter->counter_index));
++
++	a4l_ni_tio_acknowledge_and_confirm(counter, &gate_error, &tc_error,
++		&perm_stale_data, NULL);
++	if (gate_error) {
++		__a4l_err("%s: Gi_Gate_Error detected.\n", __FUNCTION__);
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++	if (perm_stale_data) {
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++	switch (counter->counter_dev->variant) {
++	case ni_gpct_variant_m_series:
++	case ni_gpct_variant_660x:
++		if (read_register(counter,
++				  NITIO_Gi_DMA_Status_Reg(counter->counter_index))
++		    & Gi_DRQ_Error_Bit) {
++			__a4l_err("%s: Gi_DRQ_Error detected.\n", __FUNCTION__);
++			a4l_buf_evt(subd, A4L_BUF_ERROR);
++		}
++		break;
++	case ni_gpct_variant_e_series:
++		break;
++	}
++	rtdm_lock_get_irqsave(&counter->lock, flags);
++	if (counter->mite_chan == NULL) {
++		rtdm_lock_put_irqrestore(&counter->lock, flags);
++		return;
++	}
++	gpct_mite_status = a4l_mite_get_status(counter->mite_chan);
++	if (gpct_mite_status & CHSR_LINKC) {
++		writel(CHOR_CLRLC,
++			counter->mite_chan->mite->mite_io_addr +
++			MITE_CHOR(counter->mite_chan->channel));
++	}
++	a4l_mite_sync_input_dma(counter->mite_chan, subd);
++	rtdm_lock_put_irqrestore(&counter->lock, flags);
++}
++
++void a4l_ni_tio_set_mite_channel(struct ni_gpct *counter,
++			     struct mite_channel *mite_chan)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&counter->lock, flags);
++	counter->mite_chan = mite_chan;
++	rtdm_lock_put_irqrestore(&counter->lock, flags);
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++static int __init ni_tio_init_module(void)
++{
++	return 0;
++}
++
++static void __exit ni_tio_cleanup_module(void)
++{
++}
++
++MODULE_DESCRIPTION("Analogy support for NI general-purpose counters");
++MODULE_LICENSE("GPL");
++
++module_init(ni_tio_init_module);
++module_exit(ni_tio_cleanup_module);
++
++EXPORT_SYMBOL_GPL(a4l_ni_tio_rinsn);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_winsn);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_insn_config);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_init_counter);
++EXPORT_SYMBOL_GPL(a4l_ni_gpct_device_construct);
++EXPORT_SYMBOL_GPL(a4l_ni_gpct_device_destroy);
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++EXPORT_SYMBOL_GPL(a4l_ni_tio_input_inttrig);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmd);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmd_mask);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_cmdtest);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_cancel);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_handle_interrupt);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_set_mite_channel);
++EXPORT_SYMBOL_GPL(a4l_ni_tio_acknowledge_and_confirm);
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
+--- linux/drivers/xenomai/analogy/national_instruments/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/Kconfig	2021-04-07 16:01:27.820633319 +0800
+@@ -0,0 +1,42 @@
++
++config XENO_DRIVERS_ANALOGY_NI_MITE
++	depends on XENO_DRIVERS_ANALOGY && PCI
++	tristate "NI MITE driver"
++	default n
++
++config XENO_DRIVERS_ANALOGY_NI_TIO
++	depends on XENO_DRIVERS_ANALOGY
++	tristate "NI TIO driver"
++	default n
++
++config XENO_DRIVERS_ANALOGY_NI_MIO       
++	depends on XENO_DRIVERS_ANALOGY && XENO_DRIVERS_ANALOGY_NI_TIO && PCI
++	tristate "NI MIO driver"
++	default n
++
++config XENO_DRIVERS_ANALOGY_NI_PCIMIO       
++	depends on XENO_DRIVERS_ANALOGY && PCI
++	select XENO_DRIVERS_ANALOGY_NI_MITE
++	select XENO_DRIVERS_ANALOGY_NI_TIO
++	select XENO_DRIVERS_ANALOGY_NI_MIO
++	select XENO_DRIVERS_ANALOGY_8255
++	tristate "NI PCIMIO driver"
++	default n
++
++config XENO_DRIVERS_ANALOGY_NI_670x       
++	depends on EXPERIMENTAL && XENO_DRIVERS_ANALOGY && PCI
++	select XENO_DRIVERS_ANALOGY_NI_MITE
++	select XENO_DRIVERS_ANALOGY_NI_TIO
++	select XENO_DRIVERS_ANALOGY_NI_MIO
++	select XENO_DRIVERS_ANALOGY_8255
++	tristate "NI 670X driver (EXPERIMENTAL)"
++	default n
++
++config XENO_DRIVERS_ANALOGY_NI_660x       
++	depends on EXPERIMENTAL && XENO_DRIVERS_ANALOGY && PCI
++	select XENO_DRIVERS_ANALOGY_NI_MITE
++	select XENO_DRIVERS_ANALOGY_NI_TIO
++	select XENO_DRIVERS_ANALOGY_NI_MIO
++	select XENO_DRIVERS_ANALOGY_8255
++	tristate "NI 660X driver (EXPERIMENTAL)"
++	default n
+--- linux/drivers/xenomai/analogy/national_instruments/mite.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/mite.c	2021-04-07 16:01:27.815633326 +0800
+@@ -0,0 +1,839 @@
++/*
++ * Hardware driver for NI Mite PCI interface chip
++ *
++ * Copyright (C) 1999 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * The NI Mite driver was originally written by Tomasz Motylewski
++ * <...>, and ported to comedi by ds.
++ *
++ * References for specifications:
++ *
++ * 321747b.pdf  Register Level Programmer Manual (obsolete)
++ * 321747c.pdf  Register Level Programmer Manual (new)
++ * DAQ-STC reference manual
++ *
++ * Other possibly relevant info:
++ *
++ * 320517c.pdf  User manual (obsolete)
++ * 320517f.pdf  User manual (new)
++ * 320889a.pdf  delete
++ * 320906c.pdf  maximum signal ratings
++ * 321066a.pdf  about 16x
++ * 321791a.pdf  discontinuation of at-mio-16e-10 rev. c
++ * 321808a.pdf  about at-mio-16e-10 rev P
++ * 321837a.pdf  discontinuation of at-mio-16de-10 rev d
++ * 321838a.pdf  about at-mio-16de-10 rev N
++ *
++ * ISSUES:
++ */
++
++#include <linux/module.h>
++#include "mite.h"
++
++#ifdef CONFIG_DEBUG_MITE
++#define MDPRINTK(fmt, args...) rtdm_printk(fmt, ##args)
++#else /* !CONFIG_DEBUG_MITE */
++#define MDPRINTK(fmt, args...)
++#endif /* CONFIG_DEBUG_MITE */
++
++static LIST_HEAD(mite_devices);
++
++static struct pci_device_id mite_id[] = {
++	{PCI_DEVICE(PCI_VENDOR_ID_NATINST, PCI_ANY_ID), },
++	{0, }
++};
++
++static int mite_probe(struct pci_dev *dev, const struct pci_device_id *id)
++{
++	int i, err = 0;
++	struct mite_struct *mite;
++
++	mite = kmalloc(sizeof(struct mite_struct), GFP_KERNEL);
++	if(mite == NULL)
++		return -ENOMEM;
++
++	memset(mite, 0, sizeof(struct mite_struct));
++
++	rtdm_lock_init(&mite->lock);
++
++	mite->pcidev = dev;
++	if (pci_enable_device(dev) < 0) {
++		__a4l_err("error enabling mite\n");
++		err = -EIO;
++		goto out;
++	}
++
++	for(i = 0; i < MAX_MITE_DMA_CHANNELS; i++) {
++		mite->channels[i].mite = mite;
++		mite->channels[i].channel = i;
++		mite->channels[i].done = 1;
++	}
++
++	list_add(&mite->list, &mite_devices);
++
++out:
++	if (err < 0)
++		kfree(mite);
++
++	return err;
++}
++
++static void mite_remove(struct pci_dev *dev)
++{
++	struct list_head *this;
++
++	list_for_each(this, &mite_devices) {
++		struct mite_struct *mite =
++			list_entry(this, struct mite_struct, list);
++
++		if(mite->pcidev == dev) {
++			list_del(this);
++			kfree(mite);
++			break;
++		}
++	}
++}
++
++static struct pci_driver mite_driver = {
++	.name = "analogy_mite",
++	.id_table = mite_id,
++	.probe = mite_probe,
++	.remove = mite_remove,
++};
++
++int a4l_mite_setup(struct mite_struct *mite, int use_iodwbsr_1)
++{
++	unsigned long length;
++	resource_size_t addr;
++	int i;
++	u32 csigr_bits;
++	unsigned unknown_dma_burst_bits;
++
++	__a4l_dbg(1, drv_dbg, "starting setup...\n");
++
++	pci_set_master(mite->pcidev);
++
++	if (pci_request_regions(mite->pcidev, "mite")) {
++		__a4l_err("failed to request mite io regions\n");
++		return -EIO;
++	};
++
++	/* The PCI BAR0 is the Mite */
++	addr = pci_resource_start(mite->pcidev, 0);
++	length = pci_resource_len(mite->pcidev, 0);
++	mite->mite_phys_addr = addr;
++	mite->mite_io_addr = ioremap(addr, length);
++	if (!mite->mite_io_addr) {
++		__a4l_err("failed to remap mite io memory address\n");
++		pci_release_regions(mite->pcidev);
++		return -ENOMEM;
++	}
++
++	__a4l_dbg(1, drv_dbg, "bar0(mite) 0x%08llx mapped to %p\n",
++		  (unsigned long long)mite->mite_phys_addr,
++		  mite->mite_io_addr);
++
++
++	/* The PCI BAR1 is the DAQ */
++	addr = pci_resource_start(mite->pcidev, 1);
++	length = pci_resource_len(mite->pcidev, 1);
++	mite->daq_phys_addr = addr;
++	mite->daq_io_addr = ioremap(mite->daq_phys_addr, length);
++	if (!mite->daq_io_addr) {
++		__a4l_err("failed to remap daq io memory address\n");
++		pci_release_regions(mite->pcidev);
++		return -ENOMEM;
++	}
++
++	__a4l_dbg(1, drv_dbg, "bar0(daq) 0x%08llx mapped to %p\n",
++		  (unsigned long long)mite->daq_phys_addr,
++		  mite->daq_io_addr);
++
++	if (use_iodwbsr_1) {
++		__a4l_dbg(1, drv_dbg, "using I/O Window Base Size register 1\n");
++		writel(0, mite->mite_io_addr + MITE_IODWBSR);
++		writel(mite->
++		       daq_phys_addr | WENAB |
++		       MITE_IODWBSR_1_WSIZE_bits(length),
++		       mite->mite_io_addr + MITE_IODWBSR_1);
++		writel(0, mite->mite_io_addr + MITE_IODWCR_1);
++	} else {
++		writel(mite->daq_phys_addr | WENAB,
++		       mite->mite_io_addr + MITE_IODWBSR);
++	}
++
++	/* Make sure dma bursts work.  I got this from running a bus analyzer
++	   on a pxi-6281 and a pxi-6713.  6713 powered up with register value
++	   of 0x61f and bursts worked.  6281 powered up with register value of
++	   0x1f and bursts didn't work.  The NI windows driver reads the register,
++	   then does a bitwise-or of 0x600 with it and writes it back.
++	*/
++	unknown_dma_burst_bits =
++		readl(mite->mite_io_addr + MITE_UNKNOWN_DMA_BURST_REG);
++	unknown_dma_burst_bits |= UNKNOWN_DMA_BURST_ENABLE_BITS;
++	writel(unknown_dma_burst_bits,
++	       mite->mite_io_addr + MITE_UNKNOWN_DMA_BURST_REG);
++
++	csigr_bits = readl(mite->mite_io_addr + MITE_CSIGR);
++	mite->num_channels = mite_csigr_dmac(csigr_bits);
++	if (mite->num_channels > MAX_MITE_DMA_CHANNELS) {
++		__a4l_err("MITE: bug? chip claims to have %i dma channels. "
++			  "Setting to %i.\n",
++			  mite->num_channels, MAX_MITE_DMA_CHANNELS);
++		mite->num_channels = MAX_MITE_DMA_CHANNELS;
++	}
++
++	__a4l_dbg(1, drv_dbg, " version = %i, type = %i, mite mode = %i, "
++		  "interface mode = %i\n",
++		  mite_csigr_version(csigr_bits),
++		  mite_csigr_type(csigr_bits),
++		  mite_csigr_mmode(csigr_bits),
++		  mite_csigr_imode(csigr_bits));
++	__a4l_dbg(1, drv_dbg, " num channels = %i, write post fifo depth = %i, "
++		  "wins = %i, iowins = %i\n",
++		  mite_csigr_dmac(csigr_bits),
++		  mite_csigr_wpdep(csigr_bits),
++		  mite_csigr_wins(csigr_bits),
++		  mite_csigr_iowins(csigr_bits));
++
++	for (i = 0; i < mite->num_channels; i++) {
++		/* Registers the channel as a free one */
++		mite->channel_allocated[i] = 0;
++		/* Reset the channel */
++		writel(CHOR_DMARESET, mite->mite_io_addr + MITE_CHOR(i));
++		/* Disable interrupts */
++		writel(CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE | CHCR_CLR_SAR_IE |
++		       CHCR_CLR_DONE_IE | CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE |
++		       CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE,
++		       mite->mite_io_addr + MITE_CHCR(i));
++
++		__a4l_dbg(1, drv_dbg, "channel[%d] initialized\n", i);
++	}
++
++	mite->used = 1;
++
++	return 0;
++}
++
++void a4l_mite_unsetup(struct mite_struct *mite)
++{
++	if (!mite)
++		return;
++
++	if (mite->mite_io_addr) {
++		iounmap(mite->mite_io_addr);
++		mite->mite_io_addr = NULL;
++	}
++
++	if (mite->daq_io_addr) {
++		iounmap(mite->daq_io_addr);
++		mite->daq_io_addr = NULL;
++	}
++
++	if(mite->used)
++		pci_release_regions( mite->pcidev );
++
++	mite->used = 0;
++}
++
++void a4l_mite_list_devices(void)
++{
++	struct list_head *this;
++
++	printk("Analogy: MITE: Available NI device IDs:");
++	list_for_each(this, &mite_devices) {
++		struct mite_struct *mite =
++			list_entry(this, struct mite_struct, list);
++
++		printk(" 0x%04x", mite_device_id(mite));
++		if(mite->used)
++			printk("(used)");
++	}
++
++	printk("\n");
++}
++
++
++
++struct mite_struct * a4l_mite_find_device(int bus, 
++					  int slot, unsigned short device_id)
++{
++	struct list_head *this;
++
++	list_for_each(this, &mite_devices) {
++		struct mite_struct *mite =
++			list_entry(this, struct mite_struct, list);
++
++		if(mite->pcidev->device != device_id)
++			continue;
++
++		if((bus <= 0 && slot <= 0) ||
++		   (bus == mite->pcidev->bus->number &&
++		    slot == PCI_SLOT(mite->pcidev->devfn)))
++			return mite;
++	}
++
++	return NULL;
++}
++EXPORT_SYMBOL_GPL(a4l_mite_find_device);
++
++struct mite_channel *
++a4l_mite_request_channel_in_range(struct mite_struct *mite,
++				  struct mite_dma_descriptor_ring *ring,
++				  unsigned min_channel, unsigned max_channel)
++{
++	int i;
++	unsigned long flags;
++	struct mite_channel *channel = NULL;
++
++	__a4l_dbg(1, drv_dbg, " min_channel = %u, max_channel = %u\n",
++		  min_channel, max_channel);
++
++	/* spin lock so a4l_mite_release_channel can be called safely
++	   from interrupts */
++	rtdm_lock_get_irqsave(&mite->lock, flags);
++	for (i = min_channel; i <= max_channel; ++i) {
++
++	__a4l_dbg(1, drv_dbg, " channel[%d] allocated = %d\n",
++		  i, mite->channel_allocated[i]);
++
++		if (mite->channel_allocated[i] == 0) {
++			mite->channel_allocated[i] = 1;
++			channel = &mite->channels[i];
++			channel->ring = ring;
++			break;
++		}
++	}
++	rtdm_lock_put_irqrestore(&mite->lock, flags);
++	return channel;
++}
++
++void a4l_mite_release_channel(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	unsigned long flags;
++
++	/* Spin lock to prevent races with mite_request_channel */
++	rtdm_lock_get_irqsave(&mite->lock, flags);
++	if (mite->channel_allocated[mite_chan->channel]) {
++		/* disable all channel's interrupts */
++		writel(CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE |
++		       CHCR_CLR_SAR_IE | CHCR_CLR_DONE_IE |
++		       CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE |
++		       CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE,
++		       mite->mite_io_addr + MITE_CHCR(mite_chan->channel));
++		a4l_mite_dma_disarm(mite_chan);
++		mite_dma_reset(mite_chan);
++		mite->channel_allocated[mite_chan->channel] = 0;
++		mite_chan->ring = NULL;
++		mmiowb();
++	}
++	rtdm_lock_put_irqrestore(&mite->lock, flags);
++}
++
++void a4l_mite_dma_arm(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	int chor;
++	unsigned long flags;
++
++	MDPRINTK("a4l_mite_dma_arm ch%i\n", mite_chan->channel);
++	/* Memory barrier is intended to insure any twiddling with the buffer
++	   is done before writing to the mite to arm dma transfer */
++	smp_mb();
++	/* arm */
++	chor = CHOR_START;
++	rtdm_lock_get_irqsave(&mite->lock, flags);
++	mite_chan->done = 0;
++	writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel));
++	mmiowb();
++	rtdm_lock_put_irqrestore(&mite->lock, flags);
++}
++
++void a4l_mite_dma_disarm(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	unsigned chor;
++
++	/* disarm */
++	chor = CHOR_ABORT;
++	writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel));
++}
++
++int a4l_mite_buf_change(struct mite_dma_descriptor_ring *ring, struct a4l_subdevice *subd)
++{
++	struct a4l_buffer *buf = subd->buf;
++	unsigned int n_links;
++	int i;
++
++	if (ring->descriptors) {
++		pci_free_consistent(ring->pcidev,
++				    ring->n_links * sizeof(struct mite_dma_descriptor),
++				    ring->descriptors, ring->descriptors_dma_addr);
++	}
++	ring->descriptors = NULL;
++	ring->descriptors_dma_addr = 0;
++	ring->n_links = 0;
++
++	if (buf->size == 0) {
++		return 0;
++	}
++	n_links = buf->size >> PAGE_SHIFT;
++
++	MDPRINTK("ring->pcidev=%p, n_links=0x%04x\n", ring->pcidev, n_links);
++
++	ring->descriptors =
++		pci_alloc_consistent(ring->pcidev,
++				     n_links * sizeof(struct mite_dma_descriptor),
++				     &ring->descriptors_dma_addr);
++	if (!ring->descriptors) {
++		printk("MITE: ring buffer allocation failed\n");
++		return -ENOMEM;
++	}
++	ring->n_links = n_links;
++
++	for (i = 0; i < n_links; i++) {
++		ring->descriptors[i].count = cpu_to_le32(PAGE_SIZE);
++		ring->descriptors[i].addr = cpu_to_le32(buf->pg_list[i]);
++		ring->descriptors[i].next =
++			cpu_to_le32(ring->descriptors_dma_addr +
++				    (i + 1) * sizeof(struct mite_dma_descriptor));
++	}
++
++	ring->descriptors[n_links - 1].next =
++		cpu_to_le32(ring->descriptors_dma_addr);
++
++	/* Barrier is meant to insure that all the writes to the dma descriptors
++	   have completed before the dma controller is commanded to read them */
++	smp_wmb();
++
++	return 0;
++}
++
++void a4l_mite_prep_dma(struct mite_channel *mite_chan,
++		   unsigned int num_device_bits, unsigned int num_memory_bits)
++{
++	unsigned int chor, chcr, mcr, dcr, lkcr;
++	struct mite_struct *mite = mite_chan->mite;
++
++	MDPRINTK("a4l_mite_prep_dma ch%i\n", mite_chan->channel);
++
++	/* reset DMA and FIFO */
++	chor = CHOR_DMARESET | CHOR_FRESET;
++	writel(chor, mite->mite_io_addr + MITE_CHOR(mite_chan->channel));
++
++	/* short link chaining mode */
++	chcr = CHCR_SET_DMA_IE | CHCR_LINKSHORT | CHCR_SET_DONE_IE |
++		CHCR_BURSTEN;
++	/*
++	 * Link Complete Interrupt: interrupt every time a link
++	 * in MITE_RING is completed. This can generate a lot of
++	 * extra interrupts, but right now we update the values
++	 * of buf_int_ptr and buf_int_count at each interrupt.  A
++	 * better method is to poll the MITE before each user
++	 * "read()" to calculate the number of bytes available.
++	 */
++	chcr |= CHCR_SET_LC_IE;
++	if (num_memory_bits == 32 && num_device_bits == 16) {
++		/* Doing a combined 32 and 16 bit byteswap gets the 16
++		   bit samples into the fifo in the right order.
++		   Tested doing 32 bit memory to 16 bit device
++		   transfers to the analog out of a pxi-6281, which
++		   has mite version = 1, type = 4.  This also works
++		   for dma reads from the counters on e-series boards.
++		*/
++		chcr |= CHCR_BYTE_SWAP_DEVICE | CHCR_BYTE_SWAP_MEMORY;
++	}
++
++	if (mite_chan->dir == A4L_INPUT) {
++		chcr |= CHCR_DEV_TO_MEM;
++	}
++	writel(chcr, mite->mite_io_addr + MITE_CHCR(mite_chan->channel));
++
++	/* to/from memory */
++	mcr = CR_RL(64) | CR_ASEQUP;
++	switch (num_memory_bits) {
++	case 8:
++		mcr |= CR_PSIZE8;
++		break;
++	case 16:
++		mcr |= CR_PSIZE16;
++		break;
++	case 32:
++		mcr |= CR_PSIZE32;
++		break;
++	default:
++		__a4l_err("MITE: bug! "
++			  "invalid mem bit width for dma transfer\n");
++		break;
++	}
++	writel(mcr, mite->mite_io_addr + MITE_MCR(mite_chan->channel));
++
++	/* from/to device */
++	dcr = CR_RL(64) | CR_ASEQUP;
++	dcr |= CR_PORTIO | CR_AMDEVICE | CR_REQSDRQ(mite_chan->channel);
++	switch (num_device_bits) {
++	case 8:
++		dcr |= CR_PSIZE8;
++		break;
++	case 16:
++		dcr |= CR_PSIZE16;
++		break;
++	case 32:
++		dcr |= CR_PSIZE32;
++		break;
++	default:
++		__a4l_info("MITE: bug! "
++			   "invalid dev bit width for dma transfer\n");
++		break;
++	}
++	writel(dcr, mite->mite_io_addr + MITE_DCR(mite_chan->channel));
++
++	/* reset the DAR */
++	writel(0, mite->mite_io_addr + MITE_DAR(mite_chan->channel));
++
++	/* the link is 32bits */
++	lkcr = CR_RL(64) | CR_ASEQUP | CR_PSIZE32;
++	writel(lkcr, mite->mite_io_addr + MITE_LKCR(mite_chan->channel));
++
++	/* starting address for link chaining */
++	writel(mite_chan->ring->descriptors_dma_addr,
++	       mite->mite_io_addr + MITE_LKAR(mite_chan->channel));
++
++	MDPRINTK("exit a4l_mite_prep_dma\n");
++}
++
++u32 mite_device_bytes_transferred(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	return readl(mite->mite_io_addr + MITE_DAR(mite_chan->channel));
++}
++
++u32 a4l_mite_bytes_in_transit(struct mite_channel * mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	return readl(mite->mite_io_addr +
++		     MITE_FCR(mite_chan->channel)) & 0x000000FF;
++}
++
++/* Returns lower bound for number of bytes transferred from device to memory */
++u32 a4l_mite_bytes_written_to_memory_lb(struct mite_channel * mite_chan)
++{
++	u32 device_byte_count;
++
++	device_byte_count = mite_device_bytes_transferred(mite_chan);
++	return device_byte_count - a4l_mite_bytes_in_transit(mite_chan);
++}
++
++/* Returns upper bound for number of bytes transferred from device to memory */
++u32 a4l_mite_bytes_written_to_memory_ub(struct mite_channel * mite_chan)
++{
++	u32 in_transit_count;
++
++	in_transit_count = a4l_mite_bytes_in_transit(mite_chan);
++	return mite_device_bytes_transferred(mite_chan) - in_transit_count;
++}
++
++/* Returns lower bound for number of bytes read from memory for transfer to device */
++u32 a4l_mite_bytes_read_from_memory_lb(struct mite_channel * mite_chan)
++{
++	u32 device_byte_count;
++
++	device_byte_count = mite_device_bytes_transferred(mite_chan);
++	return device_byte_count + a4l_mite_bytes_in_transit(mite_chan);
++}
++
++/* Returns upper bound for number of bytes read from memory for transfer to device */
++u32 a4l_mite_bytes_read_from_memory_ub(struct mite_channel * mite_chan)
++{
++	u32 in_transit_count;
++
++	in_transit_count = a4l_mite_bytes_in_transit(mite_chan);
++	return mite_device_bytes_transferred(mite_chan) + in_transit_count;
++}
++
++int a4l_mite_sync_input_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd)
++{
++	unsigned int nbytes_lb, nbytes_ub;
++
++	nbytes_lb = a4l_mite_bytes_written_to_memory_lb(mite_chan);
++	nbytes_ub = a4l_mite_bytes_written_to_memory_ub(mite_chan);
++
++	if(a4l_buf_prepare_absput(subd, nbytes_ub) != 0) {
++		__a4l_err("MITE: DMA overwrite of free area\n");
++		return -EPIPE;
++	}
++
++	return a4l_buf_commit_absput(subd, nbytes_lb);
++}
++
++int a4l_mite_sync_output_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd)
++{
++	struct a4l_buffer *buf = subd->buf;
++	unsigned int nbytes_ub, nbytes_lb;
++	int err;
++
++	nbytes_lb = a4l_mite_bytes_read_from_memory_lb(mite_chan);
++	nbytes_ub = a4l_mite_bytes_read_from_memory_ub(mite_chan);
++
++	err = a4l_buf_prepare_absget(subd, nbytes_ub);
++	if(err < 0) {
++		__a4l_info("MITE: DMA underrun\n");
++		return -EPIPE;
++	}
++
++	err = a4l_buf_commit_absget(subd, nbytes_lb);
++
++	/* If the MITE has already transfered more than required, we
++	   can disable it */
++	if (test_bit(A4L_BUF_EOA_NR, &buf->flags))
++		writel(CHOR_STOP,
++		       mite_chan->mite->mite_io_addr +
++		       MITE_CHOR(mite_chan->channel));
++
++	return err;
++}
++
++u32 a4l_mite_get_status(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	u32 status;
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&mite->lock, flags);
++	status = readl(mite->mite_io_addr + MITE_CHSR(mite_chan->channel));
++	if (status & CHSR_DONE) {
++		mite_chan->done = 1;
++		writel(CHOR_CLRDONE,
++		       mite->mite_io_addr + MITE_CHOR(mite_chan->channel));
++	}
++	mmiowb();
++	rtdm_lock_put_irqrestore(&mite->lock, flags);
++	return status;
++}
++
++int a4l_mite_done(struct mite_channel *mite_chan)
++{
++	struct mite_struct *mite = mite_chan->mite;
++	unsigned long flags;
++	int done;
++
++	a4l_mite_get_status(mite_chan);
++	rtdm_lock_get_irqsave(&mite->lock, flags);
++	done = mite_chan->done;
++	rtdm_lock_put_irqrestore(&mite->lock, flags);
++	return done;
++}
++
++#ifdef CONFIG_DEBUG_MITE
++
++static void a4l_mite_decode(const char *const bit_str[], unsigned int bits);
++
++/* names of bits in mite registers */
++
++static const char *const mite_CHOR_strings[] = {
++	"start", "cont", "stop", "abort",
++	"freset", "clrlc", "clrrb", "clrdone",
++	"clr_lpause", "set_lpause", "clr_send_tc",
++	"set_send_tc", "12", "13", "14",
++	"15", "16", "17", "18",
++	"19", "20", "21", "22",
++	"23", "24", "25", "26",
++	"27", "28", "29", "30",
++	"dmareset",
++};
++
++static const char *const mite_CHCR_strings[] = {
++	"continue", "ringbuff", "2", "3",
++	"4", "5", "6", "7",
++	"8", "9", "10", "11",
++	"12", "13", "bursten", "fifodis",
++	"clr_cont_rb_ie", "set_cont_rb_ie", "clr_lc_ie", "set_lc_ie",
++	"clr_drdy_ie", "set_drdy_ie", "clr_mrdy_ie", "set_mrdy_ie",
++	"clr_done_ie", "set_done_ie", "clr_sar_ie", "set_sar_ie",
++	"clr_linkp_ie", "set_linkp_ie", "clr_dma_ie", "set_dma_ie",
++};
++
++static const char *const mite_MCR_strings[] = {
++	"amdevice", "1", "2", "3",
++	"4", "5", "portio", "portvxi",
++	"psizebyte", "psizehalf (byte & half = word)", "aseqxp1", "11",
++	"12", "13", "blocken", "berhand",
++	"reqsintlim/reqs0", "reqs1", "reqs2", "rd32",
++	"rd512", "rl1", "rl2", "rl8",
++	"24", "25", "26", "27",
++	"28", "29", "30", "stopen",
++};
++
++static const char *const mite_DCR_strings[] = {
++	"amdevice", "1", "2", "3",
++	"4", "5", "portio", "portvxi",
++	"psizebyte", "psizehalf (byte & half = word)", "aseqxp1", "aseqxp2",
++	"aseqxp8", "13", "blocken", "berhand",
++	"reqsintlim", "reqs1", "reqs2", "rd32",
++	"rd512", "rl1", "rl2", "rl8",
++	"23", "24", "25", "27",
++	"28", "wsdevc", "wsdevs", "rwdevpack",
++};
++
++static const char *const mite_LKCR_strings[] = {
++	"amdevice", "1", "2", "3",
++	"4", "5", "portio", "portvxi",
++	"psizebyte", "psizehalf (byte & half = word)", "asequp", "aseqdown",
++	"12", "13", "14", "berhand",
++	"16", "17", "18", "rd32",
++	"rd512", "rl1", "rl2", "rl8",
++	"24", "25", "26", "27",
++	"28", "29", "30", "chngend",
++};
++
++static const char *const mite_CHSR_strings[] = {
++	"d.err0", "d.err1", "m.err0", "m.err1",
++	"l.err0", "l.err1", "drq0", "drq1",
++	"end", "xferr", "operr0", "operr1",
++	"stops", "habort", "sabort", "error",
++	"16", "conts_rb", "18", "linkc",
++	"20", "drdy", "22", "mrdy",
++	"24", "done", "26", "sars",
++	"28", "lpauses", "30", "int",
++};
++
++void a4l_mite_dump_regs(struct mite_channel *mite_chan)
++{
++	unsigned long mite_io_addr =
++		(unsigned long)mite_chan->mite->mite_io_addr;
++	unsigned long addr = 0;
++	unsigned long temp = 0;
++
++	printk("a4l_mite_dump_regs ch%i\n", mite_chan->channel);
++	printk("mite address is  =0x%08lx\n", mite_io_addr);
++
++	addr = mite_io_addr + MITE_CHOR(mite_chan->channel);
++	printk("mite status[CHOR]at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_CHOR_strings, temp);
++	addr = mite_io_addr + MITE_CHCR(mite_chan->channel);
++	printk("mite status[CHCR]at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_CHCR_strings, temp);
++	addr = mite_io_addr + MITE_TCR(mite_chan->channel);
++	printk("mite status[TCR] at 0x%08lx =0x%08x\n", addr,
++	       readl((void *)addr));
++	addr = mite_io_addr + MITE_MCR(mite_chan->channel);
++	printk("mite status[MCR] at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_MCR_strings, temp);
++
++	addr = mite_io_addr + MITE_MAR(mite_chan->channel);
++	printk("mite status[MAR] at 0x%08lx =0x%08x\n", addr,
++	       readl((void *)addr));
++	addr = mite_io_addr + MITE_DCR(mite_chan->channel);
++	printk("mite status[DCR] at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_DCR_strings, temp);
++	addr = mite_io_addr + MITE_DAR(mite_chan->channel);
++	printk("mite status[DAR] at 0x%08lx =0x%08x\n", addr,
++	       readl((void *)addr));
++	addr = mite_io_addr + MITE_LKCR(mite_chan->channel);
++	printk("mite status[LKCR]at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_LKCR_strings, temp);
++	addr = mite_io_addr + MITE_LKAR(mite_chan->channel);
++	printk("mite status[LKAR]at 0x%08lx =0x%08x\n", addr,
++	       readl((void *)addr));
++
++	addr = mite_io_addr + MITE_CHSR(mite_chan->channel);
++	printk("mite status[CHSR]at 0x%08lx =0x%08lx\n", addr, temp =
++	       readl((void *)addr));
++	a4l_mite_decode(mite_CHSR_strings, temp);
++	addr = mite_io_addr + MITE_FCR(mite_chan->channel);
++	printk("mite status[FCR] at 0x%08lx =0x%08x\n\n", addr,
++	       readl((void *)addr));
++}
++
++
++static void a4l_mite_decode(const char *const bit_str[], unsigned int bits)
++{
++	int i;
++
++	for (i = 31; i >= 0; i--) {
++		if (bits & (1 << i)) {
++			printk(" %s", bit_str[i]);
++		}
++	}
++	printk("\n");
++}
++
++#endif /* CONFIG_DEBUG_MITE */
++
++
++static int __init mite_init(void)
++{
++	int err;
++
++	/* Register the mite's PCI driver */
++	err = pci_register_driver(&mite_driver);
++
++	if(err == 0)
++		a4l_mite_list_devices();
++
++	return err;
++}
++
++static void __exit mite_cleanup(void)
++{
++
++	/* Unregister the PCI structure driver */
++	pci_unregister_driver(&mite_driver);
++
++	/* Just paranoia... */
++	while(&mite_devices != mite_devices.next) {
++		struct list_head *this = mite_devices.next;
++		struct mite_struct *mite =
++			list_entry(this, struct mite_struct, list);
++
++		list_del(this);
++		kfree(mite);
++	}
++}
++
++MODULE_LICENSE("GPL");
++module_init(mite_init);
++module_exit(mite_cleanup);
++
++EXPORT_SYMBOL_GPL(a4l_mite_dma_arm);
++EXPORT_SYMBOL_GPL(a4l_mite_dma_disarm);
++EXPORT_SYMBOL_GPL(a4l_mite_sync_input_dma);
++EXPORT_SYMBOL_GPL(a4l_mite_sync_output_dma);
++EXPORT_SYMBOL_GPL(a4l_mite_setup);
++EXPORT_SYMBOL_GPL(a4l_mite_unsetup);
++EXPORT_SYMBOL_GPL(a4l_mite_list_devices);
++EXPORT_SYMBOL_GPL(a4l_mite_request_channel_in_range);
++EXPORT_SYMBOL_GPL(a4l_mite_release_channel);
++EXPORT_SYMBOL_GPL(a4l_mite_prep_dma);
++EXPORT_SYMBOL_GPL(a4l_mite_buf_change);
++EXPORT_SYMBOL_GPL(a4l_mite_bytes_written_to_memory_lb);
++EXPORT_SYMBOL_GPL(a4l_mite_bytes_written_to_memory_ub);
++EXPORT_SYMBOL_GPL(a4l_mite_bytes_read_from_memory_lb);
++EXPORT_SYMBOL_GPL(a4l_mite_bytes_read_from_memory_ub);
++EXPORT_SYMBOL_GPL(a4l_mite_bytes_in_transit);
++EXPORT_SYMBOL_GPL(a4l_mite_get_status);
++EXPORT_SYMBOL_GPL(a4l_mite_done);
++#ifdef CONFIG_DEBUG_MITE
++EXPORT_SYMBOL_GPL(a4l_mite_decode);
++EXPORT_SYMBOL_GPL(a4l_mite_dump_regs);
++#endif /* CONFIG_DEBUG_MITE */
+--- linux/drivers/xenomai/analogy/national_instruments/mite.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/mite.h	2021-04-07 16:01:27.810633333 +0800
+@@ -0,0 +1,435 @@
++/*
++ * Hardware driver for NI Mite PCI interface chip
++ * @note Copyright (C) 1999 David A. Schleef <ds@schleef.org>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef __ANALOGY_NI_MITE_H__
++#define __ANALOGY_NI_MITE_H__
++
++#include <linux/pci.h>
++#include <linux/slab.h>
++#include <rtdm/analogy/device.h>
++
++#define PCI_VENDOR_ID_NATINST 0x1093
++#define PCI_MITE_SIZE 4096
++#define PCI_DAQ_SIZE 4096
++#define PCI_DAQ_SIZE_660X 8192
++#define PCIMIO_COMPAT
++#define MAX_MITE_DMA_CHANNELS 8
++
++#define TOP_OF_PAGE(x) ((x)|(~(PAGE_MASK)))
++
++struct mite_dma_descriptor {
++	u32 count;
++	u32 addr;
++	u32 next;
++	u32 dar;
++};
++
++struct mite_dma_descriptor_ring {
++	struct pci_dev *pcidev;
++	u32 n_links;
++	struct mite_dma_descriptor *descriptors;
++	dma_addr_t descriptors_dma_addr;
++};
++
++struct mite_channel {
++	struct mite_struct *mite;
++	u32 channel;
++	u32 dir;
++	u32 done;
++	struct mite_dma_descriptor_ring *ring;
++};
++
++struct mite_struct {
++	struct list_head list;
++	rtdm_lock_t lock;
++	u32 used;
++	u32 num_channels;
++
++	struct mite_channel channels[MAX_MITE_DMA_CHANNELS];
++	u32 channel_allocated[MAX_MITE_DMA_CHANNELS];
++
++	struct pci_dev *pcidev;
++	resource_size_t mite_phys_addr;
++	void *mite_io_addr;
++	resource_size_t daq_phys_addr;
++	void *daq_io_addr;
++};
++
++static inline
++struct mite_dma_descriptor_ring *mite_alloc_ring(struct	mite_struct *mite)
++{
++	struct mite_dma_descriptor_ring *ring =
++		kmalloc(sizeof(struct mite_dma_descriptor_ring), GFP_DMA);
++
++	if (ring == NULL)
++		return ring;
++
++	memset(ring, 0, sizeof(struct mite_dma_descriptor_ring));
++
++	ring->pcidev = mite->pcidev;
++	if (ring->pcidev == NULL) {
++		kfree(ring);
++		return NULL;
++	}
++
++	return ring;
++};
++
++static inline void mite_free_ring(struct mite_dma_descriptor_ring *ring)
++{
++	if (ring) {
++		if (ring->descriptors) {
++			pci_free_consistent(
++				ring->pcidev,
++				ring->n_links *
++				sizeof(struct mite_dma_descriptor),
++				ring->descriptors, ring->descriptors_dma_addr);
++		}
++		kfree(ring);
++	}
++};
++
++static inline unsigned int mite_irq(struct mite_struct *mite)
++{
++	return mite->pcidev->irq;
++};
++static inline unsigned int mite_device_id(struct mite_struct *mite)
++{
++	return mite->pcidev->device;
++};
++
++int a4l_mite_setup(struct mite_struct *mite, int use_iodwbsr_1);
++void a4l_mite_unsetup(struct mite_struct *mite);
++void a4l_mite_list_devices(void);
++struct mite_struct * a4l_mite_find_device(int bus,
++					  int slot, unsigned short device_id);
++struct mite_channel *
++a4l_mite_request_channel_in_range(struct mite_struct *mite,
++				  struct mite_dma_descriptor_ring *ring,
++				  unsigned min_channel, unsigned max_channel);
++static inline struct mite_channel *mite_request_channel(struct mite_struct
++	*mite, struct mite_dma_descriptor_ring *ring)
++{
++	return a4l_mite_request_channel_in_range(mite, ring, 0,
++		mite->num_channels - 1);
++}
++void a4l_mite_release_channel(struct mite_channel *mite_chan);
++
++void a4l_mite_dma_arm(struct mite_channel *mite_chan);
++void a4l_mite_dma_disarm(struct mite_channel *mite_chan);
++int a4l_mite_sync_input_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd);
++int a4l_mite_sync_output_dma(struct mite_channel *mite_chan, struct a4l_subdevice *subd);
++u32 a4l_mite_bytes_written_to_memory_lb(struct mite_channel *mite_chan);
++u32 a4l_mite_bytes_written_to_memory_ub(struct mite_channel *mite_chan);
++u32 a4l_mite_bytes_read_from_memory_lb(struct mite_channel *mite_chan);
++u32 a4l_mite_bytes_read_from_memory_ub(struct mite_channel *mite_chan);
++u32 a4l_mite_bytes_in_transit(struct mite_channel *mite_chan);
++u32 a4l_mite_get_status(struct mite_channel *mite_chan);
++int a4l_mite_done(struct mite_channel *mite_chan);
++void a4l_mite_prep_dma(struct mite_channel *mite_chan,
++		   unsigned int num_device_bits, unsigned int num_memory_bits);
++int a4l_mite_buf_change(struct mite_dma_descriptor_ring *ring, struct a4l_subdevice *subd);
++
++#ifdef CONFIG_DEBUG_MITE
++void mite_print_chsr(unsigned int chsr);
++void a4l_mite_dump_regs(struct mite_channel *mite_chan);
++#endif
++
++static inline int CHAN_OFFSET(int channel)
++{
++	return 0x500 + 0x100 * channel;
++};
++
++enum mite_registers {
++	/* The bits 0x90180700 in MITE_UNKNOWN_DMA_BURST_REG can be
++	   written and read back.  The bits 0x1f always read as 1.
++	   The rest always read as zero. */
++	MITE_UNKNOWN_DMA_BURST_REG = 0x28,
++	MITE_IODWBSR = 0xc0,	//IO Device Window Base Size Register
++	MITE_IODWBSR_1 = 0xc4,	// IO Device Window Base Size Register 1
++	MITE_IODWCR_1 = 0xf4,
++	MITE_PCI_CONFIG_OFFSET = 0x300,
++	MITE_CSIGR = 0x460	//chip signature
++};
++static inline int MITE_CHOR(int channel)	// channel operation
++{
++	return CHAN_OFFSET(channel) + 0x0;
++};
++static inline int MITE_CHCR(int channel)	// channel control
++{
++	return CHAN_OFFSET(channel) + 0x4;
++};
++static inline int MITE_TCR(int channel)	// transfer count
++{
++	return CHAN_OFFSET(channel) + 0x8;
++};
++static inline int MITE_MCR(int channel)	// memory configuration
++{
++	return CHAN_OFFSET(channel) + 0xc;
++};
++static inline int MITE_MAR(int channel)	// memory address
++{
++	return CHAN_OFFSET(channel) + 0x10;
++};
++static inline int MITE_DCR(int channel)	// device configuration
++{
++	return CHAN_OFFSET(channel) + 0x14;
++};
++static inline int MITE_DAR(int channel)	// device address
++{
++	return CHAN_OFFSET(channel) + 0x18;
++};
++static inline int MITE_LKCR(int channel)	// link configuration
++{
++	return CHAN_OFFSET(channel) + 0x1c;
++};
++static inline int MITE_LKAR(int channel)	// link address
++{
++	return CHAN_OFFSET(channel) + 0x20;
++};
++static inline int MITE_LLKAR(int channel)	// see mite section of tnt5002 manual
++{
++	return CHAN_OFFSET(channel) + 0x24;
++};
++static inline int MITE_BAR(int channel)	// base address
++{
++	return CHAN_OFFSET(channel) + 0x28;
++};
++static inline int MITE_BCR(int channel)	// base count
++{
++	return CHAN_OFFSET(channel) + 0x2c;
++};
++static inline int MITE_SAR(int channel)	// ? address
++{
++	return CHAN_OFFSET(channel) + 0x30;
++};
++static inline int MITE_WSCR(int channel)	// ?
++{
++	return CHAN_OFFSET(channel) + 0x34;
++};
++static inline int MITE_WSER(int channel)	// ?
++{
++	return CHAN_OFFSET(channel) + 0x38;
++};
++static inline int MITE_CHSR(int channel)	// channel status
++{
++	return CHAN_OFFSET(channel) + 0x3c;
++};
++static inline int MITE_FCR(int channel)	// fifo count
++{
++	return CHAN_OFFSET(channel) + 0x40;
++};
++
++enum MITE_IODWBSR_bits {
++	WENAB = 0x80,		// window enable
++};
++
++static inline unsigned MITE_IODWBSR_1_WSIZE_bits(unsigned size)
++{
++	unsigned order = 0;
++	while (size >>= 1)
++		++order;
++	BUG_ON(order < 1);
++	return (order - 1) & 0x1f;
++}
++
++enum MITE_UNKNOWN_DMA_BURST_bits {
++	UNKNOWN_DMA_BURST_ENABLE_BITS = 0x600
++};
++
++static inline int mite_csigr_version(u32 csigr_bits)
++{
++	return csigr_bits & 0xf;
++};
++static inline int mite_csigr_type(u32 csigr_bits)
++{				// original mite = 0, minimite = 1
++	return (csigr_bits >> 4) & 0xf;
++};
++static inline int mite_csigr_mmode(u32 csigr_bits)
++{				// mite mode, minimite = 1
++	return (csigr_bits >> 8) & 0x3;
++};
++static inline int mite_csigr_imode(u32 csigr_bits)
++{				// cpu port interface mode, pci = 0x3
++	return (csigr_bits >> 12) & 0x3;
++};
++static inline int mite_csigr_dmac(u32 csigr_bits)
++{				// number of dma channels
++	return (csigr_bits >> 16) & 0xf;
++};
++static inline int mite_csigr_wpdep(u32 csigr_bits)
++{				// write post fifo depth
++	unsigned int wpdep_bits = (csigr_bits >> 20) & 0x7;
++	if (wpdep_bits == 0)
++		return 0;
++	else
++		return 1 << (wpdep_bits - 1);
++};
++static inline int mite_csigr_wins(u32 csigr_bits)
++{
++	return (csigr_bits >> 24) & 0x1f;
++};
++static inline int mite_csigr_iowins(u32 csigr_bits)
++{				// number of io windows
++	return (csigr_bits >> 29) & 0x7;
++};
++
++enum MITE_MCR_bits {
++	MCRPON = 0,
++};
++
++enum MITE_DCR_bits {
++	DCR_NORMAL = (1 << 29),
++	DCRPON = 0,
++};
++
++enum MITE_CHOR_bits {
++	CHOR_DMARESET = (1 << 31),
++	CHOR_SET_SEND_TC = (1 << 11),
++	CHOR_CLR_SEND_TC = (1 << 10),
++	CHOR_SET_LPAUSE = (1 << 9),
++	CHOR_CLR_LPAUSE = (1 << 8),
++	CHOR_CLRDONE = (1 << 7),
++	CHOR_CLRRB = (1 << 6),
++	CHOR_CLRLC = (1 << 5),
++	CHOR_FRESET = (1 << 4),
++	CHOR_ABORT = (1 << 3),	/* stop without emptying fifo */
++	CHOR_STOP = (1 << 2),	/* stop after emptying fifo */
++	CHOR_CONT = (1 << 1),
++	CHOR_START = (1 << 0),
++	CHOR_PON = (CHOR_CLR_SEND_TC | CHOR_CLR_LPAUSE),
++};
++
++enum MITE_CHCR_bits {
++	CHCR_SET_DMA_IE = (1 << 31),
++	CHCR_CLR_DMA_IE = (1 << 30),
++	CHCR_SET_LINKP_IE = (1 << 29),
++	CHCR_CLR_LINKP_IE = (1 << 28),
++	CHCR_SET_SAR_IE = (1 << 27),
++	CHCR_CLR_SAR_IE = (1 << 26),
++	CHCR_SET_DONE_IE = (1 << 25),
++	CHCR_CLR_DONE_IE = (1 << 24),
++	CHCR_SET_MRDY_IE = (1 << 23),
++	CHCR_CLR_MRDY_IE = (1 << 22),
++	CHCR_SET_DRDY_IE = (1 << 21),
++	CHCR_CLR_DRDY_IE = (1 << 20),
++	CHCR_SET_LC_IE = (1 << 19),
++	CHCR_CLR_LC_IE = (1 << 18),
++	CHCR_SET_CONT_RB_IE = (1 << 17),
++	CHCR_CLR_CONT_RB_IE = (1 << 16),
++	CHCR_FIFODIS = (1 << 15),
++	CHCR_FIFO_ON = 0,
++	CHCR_BURSTEN = (1 << 14),
++	CHCR_NO_BURSTEN = 0,
++	CHCR_BYTE_SWAP_DEVICE = (1 << 6),
++	CHCR_BYTE_SWAP_MEMORY = (1 << 4),
++	CHCR_DIR = (1 << 3),
++	CHCR_DEV_TO_MEM = CHCR_DIR,
++	CHCR_MEM_TO_DEV = 0,
++	CHCR_NORMAL = (0 << 0),
++	CHCR_CONTINUE = (1 << 0),
++	CHCR_RINGBUFF = (2 << 0),
++	CHCR_LINKSHORT = (4 << 0),
++	CHCR_LINKLONG = (5 << 0),
++	CHCRPON =
++		(CHCR_CLR_DMA_IE | CHCR_CLR_LINKP_IE | CHCR_CLR_SAR_IE |
++		CHCR_CLR_DONE_IE | CHCR_CLR_MRDY_IE | CHCR_CLR_DRDY_IE |
++		CHCR_CLR_LC_IE | CHCR_CLR_CONT_RB_IE),
++};
++
++enum ConfigRegister_bits {
++	CR_REQS_MASK = 0x7 << 16,
++	CR_ASEQDONT = 0x0 << 10,
++	CR_ASEQUP = 0x1 << 10,
++	CR_ASEQDOWN = 0x2 << 10,
++	CR_ASEQ_MASK = 0x3 << 10,
++	CR_PSIZE8 = (1 << 8),
++	CR_PSIZE16 = (2 << 8),
++	CR_PSIZE32 = (3 << 8),
++	CR_PORTCPU = (0 << 6),
++	CR_PORTIO = (1 << 6),
++	CR_PORTVXI = (2 << 6),
++	CR_PORTMXI = (3 << 6),
++	CR_AMDEVICE = (1 << 0),
++};
++static inline int CR_REQS(int source)
++{
++	return (source & 0x7) << 16;
++};
++static inline int CR_REQSDRQ(unsigned drq_line)
++{
++	/* This also works on m-series when
++	   using channels (drq_line) 4 or 5. */
++	return CR_REQS((drq_line & 0x3) | 0x4);
++}
++static inline int CR_RL(unsigned int retry_limit)
++{
++	int value = 0;
++
++	while (retry_limit) {
++		retry_limit >>= 1;
++		value++;
++	}
++	if (value > 0x7)
++		__a4l_err("bug! retry_limit too large\n");
++
++	return (value & 0x7) << 21;
++}
++
++enum CHSR_bits {
++	CHSR_INT = (1 << 31),
++	CHSR_LPAUSES = (1 << 29),
++	CHSR_SARS = (1 << 27),
++	CHSR_DONE = (1 << 25),
++	CHSR_MRDY = (1 << 23),
++	CHSR_DRDY = (1 << 21),
++	CHSR_LINKC = (1 << 19),
++	CHSR_CONTS_RB = (1 << 17),
++	CHSR_ERROR = (1 << 15),
++	CHSR_SABORT = (1 << 14),
++	CHSR_HABORT = (1 << 13),
++	CHSR_STOPS = (1 << 12),
++	CHSR_OPERR_mask = (3 << 10),
++	CHSR_OPERR_NOERROR = (0 << 10),
++	CHSR_OPERR_FIFOERROR = (1 << 10),
++	CHSR_OPERR_LINKERROR = (1 << 10),	/* ??? */
++	CHSR_XFERR = (1 << 9),
++	CHSR_END = (1 << 8),
++	CHSR_DRQ1 = (1 << 7),
++	CHSR_DRQ0 = (1 << 6),
++	CHSR_LxERR_mask = (3 << 4),
++	CHSR_LBERR = (1 << 4),
++	CHSR_LRERR = (2 << 4),
++	CHSR_LOERR = (3 << 4),
++	CHSR_MxERR_mask = (3 << 2),
++	CHSR_MBERR = (1 << 2),
++	CHSR_MRERR = (2 << 2),
++	CHSR_MOERR = (3 << 2),
++	CHSR_DxERR_mask = (3 << 0),
++	CHSR_DBERR = (1 << 0),
++	CHSR_DRERR = (2 << 0),
++	CHSR_DOERR = (3 << 0),
++};
++
++static inline void mite_dma_reset(struct mite_channel *mite_chan)
++{
++	writel(CHOR_DMARESET | CHOR_FRESET,
++		mite_chan->mite->mite_io_addr + MITE_CHOR(mite_chan->channel));
++};
++
++#endif /* !__ANALOGY_NI_MITE_H__ */
+--- linux/drivers/xenomai/analogy/national_instruments/mio_common.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/mio_common.c	2021-04-07 16:01:27.806633339 +0800
+@@ -0,0 +1,5590 @@
++/*
++ * Hardware driver for DAQ-STC based boards
++ *
++ * Copyright (C) 1997-2001 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2002-2006 Frank Mori Hess <fmhess@users.sourceforge.net>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Description: DAQ-STC systems
++ *
++ * References:
++ * 340747b.pdf  AT-MIO E series Register-Level Programmer Manual
++ * 341079b.pdf  PCI E Series Register-Level Programmer Manual
++ * 340934b.pdf  DAQ-STC reference manual
++ * 322080b.pdf  6711/6713/6715 User Manual
++ * 320945c.pdf  PCI E Series User Manual
++ * 322138a.pdf  PCI-6052E and DAQPad-6052E User Manual
++ * 320517c.pdf  AT E Series User manual (obsolete)
++ * 320517f.pdf  AT E Series User manual
++ * 320906c.pdf  Maximum signal ratings
++ * 321066a.pdf  About 16x
++ * 321791a.pdf  Discontinuation of at-mio-16e-10 rev. c
++ * 321808a.pdf  About at-mio-16e-10 rev P
++ * 321837a.pdf  Discontinuation of at-mio-16de-10 rev d
++ * 321838a.pdf  About at-mio-16de-10 rev N
++ *
++ * ISSUES:
++ * - The interrupt routine needs to be cleaned up
++ * - S-Series PCI-6143 support has been added but is not fully tested
++ *   as yet. Terry Barnaby, BEAM Ltd.
++ *
++ */
++#include <linux/module.h>
++#include <linux/slab.h>
++#include "../intel/8255.h"
++#include "mite.h"
++#include "ni_stc.h"
++#include "ni_mio.h"
++
++#define NI_TIMEOUT 1000
++
++/* Note: this table must match the ai_gain_* definitions */
++static const short ni_gainlkup[][16] = {
++	/* ai_gain_16 */
++	{0, 1, 2, 3, 4, 5, 6, 7, 0x100, 0x101, 0x102, 0x103, 0x104, 0x105,
++	 0x106, 0x107},
++	/* ai_gain_8 */
++	{1, 2, 4, 7, 0x101, 0x102, 0x104, 0x107},
++	/* ai_gain_14 */
++	{1, 2, 3, 4, 5, 6, 7, 0x101, 0x102, 0x103, 0x104, 0x105, 0x106,
++	 0x107},
++	/* ai_gain_4 */
++	{0, 1, 4, 7},
++	/* ai_gain_611x */
++	{0x00a, 0x00b, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006},
++	/* ai_gain_622x */
++	{0, 1, 4, 5},
++	/* ai_gain_628x */
++	{1, 2, 3, 4, 5, 6, 7},
++	/* ai_gain_6143 */
++	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
++};
++
++struct a4l_rngtab rng_ni_E_ai = {16, {
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-2.5, 2.5),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.5, 0.5),
++	RANGE_V(-0.25, 0.25),
++	RANGE_V(-0.1, 0.1),
++	RANGE_V(-0.05, 0.05),
++	RANGE_V(0, 20),
++	RANGE_V(0, 10),
++	RANGE_V(0, 5),
++	RANGE_V(0, 2),
++	RANGE_V(0, 1),
++	RANGE_V(0, 0.5),
++	RANGE_V(0, 0.2),
++	RANGE_V(0, 0.1),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ai =
++	RNG_GLOBAL(rng_ni_E_ai);
++
++struct a4l_rngtab rng_ni_E_ai_limited = {8, {
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.1, 0.1),
++	RANGE_V(0, 10),
++	RANGE_V(0, 5),
++	RANGE_V(0, 1),
++	RANGE_V(0, 0.1),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ai_limited =
++	RNG_GLOBAL(rng_ni_E_ai_limited);
++
++struct a4l_rngtab rng_ni_E_ai_limited14 = {14, {
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-2, 2),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.5, 0.5),
++	RANGE_V(-0.2, 0.2),
++	RANGE_V(-0.1, 0.1),
++	RANGE_V(0, 10),
++	RANGE_V(0, 5),
++	RANGE_V(0, 2),
++	RANGE_V(0, 1),
++	RANGE_V(0, 0.5),
++	RANGE_V(0, 0.2),
++	RANGE_V(0, 0.1),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ai_limited14 =
++	RNG_GLOBAL(rng_ni_E_ai_limited14);
++
++struct a4l_rngtab rng_ni_E_ai_bipolar4 = {4, {
++	RANGE_V(-10,10),
++	RANGE_V(-5, 5),
++	RANGE_V(-0.5, 0.5),
++	RANGE_V(-0.05, 0.05),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ai_bipolar4 =
++	RNG_GLOBAL(rng_ni_E_ai_bipolar4);
++
++struct a4l_rngtab rng_ni_E_ai_611x = {8, {
++	RANGE_V(-50, 50),
++	RANGE_V(-20, 20),
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-2, 2),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.5, 0.5),
++	RANGE_V(-0.2, 0.2),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ai_611x =
++	RNG_GLOBAL(rng_ni_E_ai_611x);
++
++struct a4l_rngtab rng_ni_M_ai_622x = {4, {
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.2, 0.2),
++}};
++struct a4l_rngdesc a4l_range_ni_M_ai_622x =
++	RNG_GLOBAL(rng_ni_M_ai_622x);
++
++struct a4l_rngtab rng_ni_M_ai_628x = {7, {
++	RANGE_V(-10, 10),
++	RANGE_V(-5, 5),
++	RANGE_V(-2, 2),
++	RANGE_V(-1, 1),
++	RANGE_V(-0.5, 0.5),
++	RANGE_V(-0.2, 0.2),
++	RANGE_V(-0.1, 0.1),
++}};
++struct a4l_rngdesc a4l_range_ni_M_ai_628x =
++	RNG_GLOBAL(rng_ni_M_ai_628x);
++
++struct a4l_rngtab rng_ni_S_ai_6143 = {1, {
++	RANGE_V(-5, 5),
++}};
++struct a4l_rngdesc a4l_range_ni_S_ai_6143 =
++	RNG_GLOBAL(rng_ni_S_ai_6143);
++
++
++struct a4l_rngtab rng_ni_E_ao_ext = {4, {
++	RANGE_V(-10, 10),
++	RANGE_V(0, 10),
++	RANGE_ext(-1, 1),
++	RANGE_ext(0, 1),
++}};
++struct a4l_rngdesc a4l_range_ni_E_ao_ext =
++	RNG_GLOBAL(rng_ni_E_ao_ext);
++
++struct a4l_rngdesc *ni_range_lkup[] = {
++	&a4l_range_ni_E_ai,
++	&a4l_range_ni_E_ai_limited,
++	&a4l_range_ni_E_ai_limited14,
++	&a4l_range_ni_E_ai_bipolar4,
++	&a4l_range_ni_E_ai_611x,
++	&a4l_range_ni_M_ai_622x,
++	&a4l_range_ni_M_ai_628x,
++	&a4l_range_ni_S_ai_6143
++};
++
++static const int num_adc_stages_611x = 3;
++
++static void ni_handle_fifo_dregs(struct a4l_subdevice *subd);
++static void get_last_sample_611x(struct a4l_subdevice *subd);
++static void get_last_sample_6143(struct a4l_subdevice *subd);
++static void handle_cdio_interrupt(struct a4l_device *dev);
++static void ni_load_channelgain_list(struct a4l_device *dev,
++				     unsigned int n_chan, unsigned int *list);
++
++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \
++     !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++static void ni_handle_fifo_half_full(struct a4l_subdevice *subd);
++static int ni_ao_fifo_half_empty(struct a4l_subdevice *subd);
++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++static inline void ni_set_bitfield(struct a4l_device *dev,
++				   int reg,
++				   unsigned int bit_mask,
++				   unsigned int bit_values)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->soft_reg_copy_lock, flags);
++	switch (reg) {
++	case Interrupt_A_Enable_Register:
++		devpriv->int_a_enable_reg &= ~bit_mask;
++		devpriv->int_a_enable_reg |= bit_values & bit_mask;
++		devpriv->stc_writew(dev, devpriv->int_a_enable_reg,
++				    Interrupt_A_Enable_Register);
++		break;
++	case Interrupt_B_Enable_Register:
++		devpriv->int_b_enable_reg &= ~bit_mask;
++		devpriv->int_b_enable_reg |= bit_values & bit_mask;
++		devpriv->stc_writew(dev, devpriv->int_b_enable_reg,
++				    Interrupt_B_Enable_Register);
++		break;
++	case IO_Bidirection_Pin_Register:
++		devpriv->io_bidirection_pin_reg &= ~bit_mask;
++		devpriv->io_bidirection_pin_reg |= bit_values & bit_mask;
++		devpriv->stc_writew(dev, devpriv->io_bidirection_pin_reg,
++				    IO_Bidirection_Pin_Register);
++		break;
++	case AI_AO_Select:
++		devpriv->ai_ao_select_reg &= ~bit_mask;
++		devpriv->ai_ao_select_reg |= bit_values & bit_mask;
++		ni_writeb(devpriv->ai_ao_select_reg, AI_AO_Select);
++		break;
++	case G0_G1_Select:
++		devpriv->g0_g1_select_reg &= ~bit_mask;
++		devpriv->g0_g1_select_reg |= bit_values & bit_mask;
++		ni_writeb(devpriv->g0_g1_select_reg, G0_G1_Select);
++		break;
++	default:
++		a4l_err(dev,
++			"Warning %s() called with invalid register\n",
++			__FUNCTION__);
++		a4l_err(dev,"reg is %d\n", reg);
++		break;
++	}
++
++	mmiowb();
++	rtdm_lock_put_irqrestore(&devpriv->soft_reg_copy_lock, flags);
++}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static int ni_ai_drain_dma(struct a4l_subdevice *subd);
++
++static inline void ni_set_ai_dma_channel(struct a4l_device * dev, int channel)
++{
++	unsigned bitfield;
++
++	if (channel >= 0) {
++		bitfield =
++			(ni_stc_dma_channel_select_bitfield(channel) <<
++			 AI_DMA_Select_Shift) & AI_DMA_Select_Mask;
++	} else {
++		bitfield = 0;
++	}
++	ni_set_bitfield(dev, AI_AO_Select, AI_DMA_Select_Mask, bitfield);
++}
++
++static inline void ni_set_ao_dma_channel(struct a4l_device * dev, int channel)
++{
++	unsigned bitfield;
++
++	if (channel >= 0) {
++		bitfield =
++			(ni_stc_dma_channel_select_bitfield(channel) <<
++			 AO_DMA_Select_Shift) & AO_DMA_Select_Mask;
++	} else {
++		bitfield = 0;
++	}
++	ni_set_bitfield(dev, AI_AO_Select, AO_DMA_Select_Mask, bitfield);
++}
++
++static inline void ni_set_gpct_dma_channel(struct a4l_device * dev,
++					   unsigned gpct_index, int mite_channel)
++{
++	unsigned bitfield;
++
++	if (mite_channel >= 0) {
++		bitfield = GPCT_DMA_Select_Bits(gpct_index, mite_channel);
++	} else {
++		bitfield = 0;
++	}
++	ni_set_bitfield(dev, G0_G1_Select, GPCT_DMA_Select_Mask(gpct_index),
++			bitfield);
++}
++
++static inline void ni_set_cdo_dma_channel(struct a4l_device * dev, int mite_channel)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->soft_reg_copy_lock, flags);
++	devpriv->cdio_dma_select_reg &= ~CDO_DMA_Select_Mask;
++	if (mite_channel >= 0) {
++		/*XXX just guessing
++		  ni_stc_dma_channel_select_bitfield() returns the right
++		  bits, under the assumption the cdio dma selection
++		  works just like ai/ao/gpct. Definitely works for dma
++		  channels 0 and 1. */
++		devpriv->cdio_dma_select_reg |=
++			(ni_stc_dma_channel_select_bitfield(mite_channel) <<
++			 CDO_DMA_Select_Shift) & CDO_DMA_Select_Mask;
++	}
++	ni_writeb(devpriv->cdio_dma_select_reg, M_Offset_CDIO_DMA_Select);
++	mmiowb();
++	rtdm_lock_put_irqrestore(&devpriv->soft_reg_copy_lock, flags);
++}
++
++static int ni_request_ai_mite_channel(struct a4l_device * dev)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	BUG_ON(devpriv->ai_mite_chan);
++	devpriv->ai_mite_chan =
++		mite_request_channel(devpriv->mite, devpriv->ai_mite_ring);
++	if (devpriv->ai_mite_chan == NULL) {
++		rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock,
++				      flags);
++		a4l_err(dev,
++			"ni_request_ai_mite_channel: "
++			"failed to reserve mite dma channel for analog input.");
++		return -EBUSY;
++	}
++	devpriv->ai_mite_chan->dir = A4L_INPUT;
++	ni_set_ai_dma_channel(dev, devpriv->ai_mite_chan->channel);
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++	return 0;
++}
++
++static int ni_request_ao_mite_channel(struct a4l_device * dev)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	BUG_ON(devpriv->ao_mite_chan);
++	devpriv->ao_mite_chan =
++		mite_request_channel(devpriv->mite, devpriv->ao_mite_ring);
++	if (devpriv->ao_mite_chan == NULL) {
++		rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock,
++				      flags);
++		a4l_err(dev,
++			"ni_request_ao_mite_channel: "
++			"failed to reserve mite dma channel for analog outut.");
++		return -EBUSY;
++	}
++	devpriv->ao_mite_chan->dir = A4L_OUTPUT;
++	ni_set_ao_dma_channel(dev, devpriv->ao_mite_chan->channel);
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++	return 0;
++}
++
++static int ni_request_gpct_mite_channel(struct a4l_device * dev,
++					unsigned gpct_index, int direction)
++{
++	unsigned long flags;
++	struct mite_channel *mite_chan;
++
++	BUG_ON(gpct_index >= NUM_GPCT);
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	BUG_ON(devpriv->counter_dev->counters[gpct_index]->mite_chan);
++	mite_chan = mite_request_channel(devpriv->mite,
++					 devpriv->gpct_mite_ring[gpct_index]);
++	if (mite_chan == NULL) {
++		rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock,
++				      flags);
++		a4l_err(dev,
++			"ni_request_gpct_mite_channel: "
++			"failed to reserve mite dma channel for counter.");
++		return -EBUSY;
++	}
++	mite_chan->dir = direction;
++	a4l_ni_tio_set_mite_channel(devpriv->counter_dev->counters[gpct_index],
++				mite_chan);
++	ni_set_gpct_dma_channel(dev, gpct_index, mite_chan->channel);
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++	return 0;
++}
++
++static int ni_request_cdo_mite_channel(struct a4l_device *dev)
++{
++	unsigned long flags;
++	int err = 0;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++
++	/* No channel should be allocated... */
++	BUG_ON(devpriv->cdo_mite_chan);
++	/* ...until now */
++	devpriv->cdo_mite_chan =
++		mite_request_channel(devpriv->mite, devpriv->cdo_mite_ring);
++
++	if (devpriv->cdo_mite_chan) {
++		devpriv->cdo_mite_chan->dir = A4L_OUTPUT;
++		ni_set_cdo_dma_channel(dev, devpriv->cdo_mite_chan->channel);
++	} else {
++		err = -EBUSY;
++		a4l_err(dev,
++			"ni_request_cdo_mite_channel: "
++			"failed to reserve mite dma channel "
++			"for correlated digital outut.");
++	}
++
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	return err;
++}
++
++void ni_release_ai_mite_channel(struct a4l_device *dev)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->ai_mite_chan) {
++		ni_set_ai_dma_channel(dev, -1);
++		a4l_mite_release_channel(devpriv->ai_mite_chan);
++		devpriv->ai_mite_chan = NULL;
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++}
++
++void ni_release_ao_mite_channel(struct a4l_device *dev)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->ao_mite_chan) {
++		ni_set_ao_dma_channel(dev, -1);
++		a4l_mite_release_channel(devpriv->ao_mite_chan);
++		devpriv->ao_mite_chan = NULL;
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++}
++
++void ni_release_gpct_mite_channel(struct a4l_device *dev, unsigned gpct_index)
++{
++	unsigned long flags;
++
++	BUG_ON(gpct_index >= NUM_GPCT);
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->counter_dev->counters[gpct_index]->mite_chan) {
++		struct mite_channel *mite_chan =
++			devpriv->counter_dev->counters[gpct_index]->mite_chan;
++
++		ni_set_gpct_dma_channel(dev, gpct_index, -1);
++		a4l_ni_tio_set_mite_channel(devpriv->counter_dev->
++					counters[gpct_index], NULL);
++		a4l_mite_release_channel(mite_chan);
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++}
++
++void ni_release_cdo_mite_channel(struct a4l_device *dev)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->cdo_mite_chan) {
++		ni_set_cdo_dma_channel(dev, -1);
++		a4l_mite_release_channel(devpriv->cdo_mite_chan);
++		devpriv->cdo_mite_chan = NULL;
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++}
++
++void ni_sync_ai_dma(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->ai_mite_chan)
++		a4l_mite_sync_input_dma(devpriv->ai_mite_chan, subd);
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++}
++
++void mite_handle_b_linkc(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->ao_mite_chan)
++		a4l_mite_sync_output_dma(devpriv->ao_mite_chan, subd);
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++}
++
++static int ni_ao_wait_for_dma_load(struct a4l_subdevice *subd)
++{
++	static const int timeout = 10000;
++
++	struct a4l_device *dev = subd->dev;
++	struct a4l_buffer *buf = subd->buf;
++
++	int i;
++
++	for (i = 0; i < timeout; i++) {
++
++		int buffer_filled;
++		unsigned short b_status;
++
++		b_status = devpriv->stc_readw(dev, AO_Status_1_Register);
++
++		buffer_filled = test_bit(A4L_BUF_EOA_NR, &buf->flags);
++		buffer_filled |= (b_status & AO_FIFO_Half_Full_St);
++
++		if (buffer_filled)
++			break;
++
++		/* If we poll too often, the pci bus activity seems
++		   to slow the dma transfer down */
++		a4l_udelay(10);
++	}
++
++	if (i == timeout) {
++		a4l_err(dev,
++			"ni_ao_wait_for_dma_load: "
++			"timed out waiting for dma load");
++		return -EPIPE;
++	}
++
++	return 0;
++}
++
++
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++static inline int ni_ai_drain_dma(struct a4l_subdevice *subd)
++{
++	return -ENOTSUPP;
++}
++
++static inline int ni_request_ai_mite_channel(struct a4l_device * dev)
++{
++	return -ENOTSUPP;
++}
++
++static inline int ni_request_ao_mite_channel(struct a4l_device * dev)
++{
++	return -ENOTSUPP;
++}
++
++static inline
++int ni_request_gpct_mite_channel(struct a4l_device * dev,
++				 unsigned gpct_index, int direction)
++{
++	return -ENOTSUPP;
++}
++
++static inline int ni_request_cdo_mite_channel(struct a4l_device *dev)
++{
++	return -ENOTSUPP;
++}
++
++#define ni_release_ai_mite_channel(x) do { } while (0)
++#define ni_release_ao_mite_channel(x) do { } while (0)
++#define ni_release_gpct_mite_channel(x) do { } while (0)
++#define ni_release_cdo_mite_channel(x) do { } while (0)
++#define ni_sync_ai_dma(x) do { } while (0)
++#define mite_handle_b_linkc(x) do { } while (0)
++
++static inline int ni_ao_wait_for_dma_load(struct a4l_subdevice *subd)
++{
++	return -ENOTSUPP;
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++/* E-series boards use the second irq signals to generate dma requests
++   for their counters */
++void ni_e_series_enable_second_irq(struct a4l_device *dev,
++				   unsigned gpct_index, short enable)
++{
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		return;
++	switch (gpct_index) {
++	case 0:
++		if (enable) {
++			devpriv->stc_writew(dev, G0_Gate_Second_Irq_Enable,
++					    Second_IRQ_A_Enable_Register);
++		} else {
++			devpriv->stc_writew(dev, 0,
++					    Second_IRQ_A_Enable_Register);
++		}
++		break;
++	case 1:
++		if (enable) {
++			devpriv->stc_writew(dev, G1_Gate_Second_Irq_Enable,
++					    Second_IRQ_B_Enable_Register);
++		} else {
++			devpriv->stc_writew(dev, 0,
++					    Second_IRQ_B_Enable_Register);
++		}
++		break;
++	default:
++		BUG();
++		break;
++	}
++}
++
++void ni_clear_ai_fifo(struct a4l_device *dev)
++{
++	if (boardtype.reg_type == ni_reg_6143) {
++		/* Flush the 6143 data FIFO */
++		ni_writel(0x10, AIFIFO_Control_6143); /* Flush fifo */
++		ni_writel(0x00, AIFIFO_Control_6143); /* Flush fifo */
++		while (ni_readl(AIFIFO_Status_6143) & 0x10); /* Wait for complete */
++	} else {
++		devpriv->stc_writew(dev, 1, ADC_FIFO_Clear);
++		if (boardtype.reg_type == ni_reg_625x) {
++			ni_writeb(0, M_Offset_Static_AI_Control(0));
++			ni_writeb(1, M_Offset_Static_AI_Control(0));
++		}
++	}
++}
++
++#define ao_win_out(data, addr) ni_ao_win_outw(dev, data, addr)
++static inline void ni_ao_win_outw(struct a4l_device *dev, uint16_t data, int addr)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->window_lock, flags);
++	ni_writew(addr, AO_Window_Address_611x);
++	ni_writew(data, AO_Window_Data_611x);
++	rtdm_lock_put_irqrestore(&devpriv->window_lock, flags);
++}
++
++static inline void ni_ao_win_outl(struct a4l_device *dev, uint32_t data, int addr)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->window_lock, flags);
++	ni_writew(addr, AO_Window_Address_611x);
++	ni_writel(data, AO_Window_Data_611x);
++	rtdm_lock_put_irqrestore(&devpriv->window_lock, flags);
++}
++
++static inline unsigned short ni_ao_win_inw(struct a4l_device *dev, int addr)
++{
++	unsigned long flags;
++	unsigned short data;
++
++	rtdm_lock_get_irqsave(&devpriv->window_lock, flags);
++	ni_writew(addr, AO_Window_Address_611x);
++	data = ni_readw(AO_Window_Data_611x);
++	rtdm_lock_put_irqrestore(&devpriv->window_lock, flags);
++	return data;
++}
++
++/*
++ * ni_set_bits( ) allows different parts of the ni_mio_common driver
++ * to share registers (such as Interrupt_A_Register) without interfering
++ * with each other.
++ *
++ * NOTE: the switch/case statements are optimized out for a constant
++ * argument so this is actually quite fast--- If you must wrap another
++ * function around this make it inline to avoid a large speed penalty.
++ *
++ * value should only be 1 or 0.
++ */
++
++static inline void ni_set_bits(struct a4l_device *dev,
++			       int reg, unsigned bits, unsigned value)
++{
++	unsigned bit_values;
++
++	if (value)
++		bit_values = bits;
++	else
++		bit_values = 0;
++
++	ni_set_bitfield(dev, reg, bits, bit_values);
++}
++
++static void shutdown_ai_command(struct a4l_subdevice *subd)
++{
++	ni_ai_drain_dma(subd);
++	ni_handle_fifo_dregs(subd);
++	get_last_sample_611x(subd);
++	get_last_sample_6143(subd);
++
++	/* TODO: stop the acquisiton */
++}
++
++static void ni_handle_eos(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	if (devpriv->aimode == AIMODE_SCAN) {
++		static const int timeout = 10;
++		int i;
++
++		for (i = 0; i < timeout; i++) {
++			ni_sync_ai_dma(subd);
++			/* TODO: stop when the transfer is really over */
++			a4l_udelay(1);
++		}
++	}
++
++	/* Handle special case of single scan using AI_End_On_End_Of_Scan */
++	if ((devpriv->ai_cmd2 & AI_End_On_End_Of_Scan)) {
++		shutdown_ai_command(subd);
++	}
++}
++
++static void ni_event(struct a4l_subdevice * subd)
++{
++	/* Temporary hack */
++	struct a4l_buffer *buf = subd->buf;
++
++	if(test_bit(A4L_BUF_ERROR_NR, &buf->flags)) {
++		if (subd->cancel != NULL)
++			subd->cancel(subd);
++	}
++
++	a4l_buf_evt(subd, 0);
++
++}
++
++static void handle_gpct_interrupt(struct a4l_device *dev, unsigned short counter_index)
++{
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	struct ni_gpct *counter = devpriv->counter_dev->counters[counter_index];
++	a4l_ni_tio_handle_interrupt(counter, dev);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++}
++
++#ifdef CONFIG_DEBUG_MIO_COMMON
++static const char *const status_a_strings[] = {
++	"passthru0", "fifo", "G0_gate", "G0_TC",
++	"stop", "start", "sc_tc", "start1",
++	"start2", "sc_tc_error", "overflow", "overrun",
++	"fifo_empty", "fifo_half_full", "fifo_full", "interrupt_a"
++};
++
++static void ni_mio_print_status_a(int status)
++{
++	int i;
++
++	__a4l_info("A status:");
++	for (i = 15; i >= 0; i--) {
++		if (status & (1 << i)) {
++			__a4l_info(" %s", status_a_strings[i]);
++		}
++	}
++	__a4l_info("\n");
++}
++
++static const char *const status_b_strings[] = {
++	"passthru1", "fifo", "G1_gate", "G1_TC",
++	"UI2_TC", "UPDATE", "UC_TC", "BC_TC",
++	"start1", "overrun", "start", "bc_tc_error",
++	"fifo_empty", "fifo_half_full", "fifo_full", "interrupt_b"
++};
++
++static void ni_mio_print_status_b(int status)
++{
++	int i;
++
++	__a4l_info("B status:");
++	for (i = 15; i >= 0; i--) {
++		if (status & (1 << i)) {
++			__a4l_info(" %s", status_b_strings[i]);
++		}
++	}
++	__a4l_info("\n");
++}
++
++#else /* !CONFIG_DEBUG_MIO_COMMON */
++
++#define ni_mio_print_status_a(x)
++#define ni_mio_print_status_b(x)
++
++#endif /* CONFIG_DEBUG_MIO_COMMON */
++
++static void ack_a_interrupt(struct a4l_device *dev, unsigned short a_status)
++{
++	unsigned short ack = 0;
++
++	if (a_status & AI_SC_TC_St) {
++		ack |= AI_SC_TC_Interrupt_Ack;
++	}
++	if (a_status & AI_START1_St) {
++		ack |= AI_START1_Interrupt_Ack;
++	}
++	if (a_status & AI_START_St) {
++		ack |= AI_START_Interrupt_Ack;
++	}
++	if (a_status & AI_STOP_St) {
++		/* not sure why we used to ack the START here also,
++		   instead of doing it independently. Frank Hess
++		   2007-07-06 */
++		ack |= AI_STOP_Interrupt_Ack;
++	}
++	if (ack)
++		devpriv->stc_writew(dev, ack, Interrupt_A_Ack_Register);
++}
++
++static void handle_a_interrupt(struct a4l_device *dev,
++			       unsigned short status,unsigned int ai_mite_status)
++{
++
++	struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AI_SUBDEV);
++
++	/* 67xx boards don't have ai subdevice, but their gpct0
++	   might generate an a interrupt. */
++
++	if((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED)
++		return;
++
++	a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: "
++		"a_status=%04x ai_mite_status=%08x\n",status, ai_mite_status);
++	ni_mio_print_status_a(status);
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	if (ai_mite_status & CHSR_LINKC)
++		ni_sync_ai_dma(subd);
++
++	if (ai_mite_status & ~(CHSR_INT | CHSR_LINKC | CHSR_DONE | CHSR_MRDY |
++			       CHSR_DRDY | CHSR_DRQ1 | CHSR_DRQ0 | CHSR_ERROR |
++			       CHSR_SABORT | CHSR_XFERR | CHSR_LxERR_mask)) {
++		a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: "
++			"unknown mite interrupt, ack! (ai_mite_status=%08x)\n",
++			ai_mite_status);
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++	/* Test for all uncommon interrupt events at the same time */
++	if (status & (AI_Overrun_St | AI_Overflow_St | AI_SC_TC_Error_St |
++		      AI_SC_TC_St | AI_START1_St)) {
++		if (status == 0xffff) {
++			a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: "
++				"a_status=0xffff.  Card removed?\n");
++			/* TODO: we probably aren't even running a command now,
++			   so it's a good idea to be careful.
++			   we should check the transfer status */
++			a4l_buf_evt(subd, A4L_BUF_ERROR);
++			ni_event(subd);
++			return;
++		}
++		if (status & (AI_Overrun_St | AI_Overflow_St |
++			      AI_SC_TC_Error_St)) {
++			a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: "
++				"ai error a_status=%04x\n", status);
++			ni_mio_print_status_a(status);
++
++			shutdown_ai_command(subd);
++
++			a4l_buf_evt(subd, A4L_BUF_ERROR);
++			ni_event(subd);
++
++			return;
++		}
++		if (status & AI_SC_TC_St) {
++			a4l_dbg(1, drv_dbg, dev, "ni_mio_common: SC_TC interrupt\n");
++			if (!devpriv->ai_continuous) {
++				shutdown_ai_command(subd);
++			}
++		}
++	}
++
++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \
++     !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++	if (status & AI_FIFO_Half_Full_St) {
++		int i;
++		static const int timeout = 10;
++		/* PCMCIA cards (at least 6036) seem to stop producing
++		   interrupts if we fail to get the fifo less than half
++		   full, so loop to be sure. */
++		for (i = 0; i < timeout; ++i) {
++			ni_handle_fifo_half_full(subd);
++			if ((devpriv->stc_readw(dev, AI_Status_1_Register) &
++			     AI_FIFO_Half_Full_St) == 0)
++				break;
++		}
++	}
++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++	if ((status & AI_STOP_St)) {
++		ni_handle_eos(subd);
++	}
++
++	ni_event(subd);
++
++	status = devpriv->stc_readw(dev, AI_Status_1_Register);
++	if (status & Interrupt_A_St)
++		a4l_dbg(1, drv_dbg, dev, "ni_mio_common: interrupt: "
++			" didn't clear interrupt? status=0x%x\n", status);
++}
++
++static void ack_b_interrupt(struct a4l_device *dev, unsigned short b_status)
++{
++	unsigned short ack = 0;
++	if (b_status & AO_BC_TC_St) {
++		ack |= AO_BC_TC_Interrupt_Ack;
++	}
++	if (b_status & AO_Overrun_St) {
++		ack |= AO_Error_Interrupt_Ack;
++	}
++	if (b_status & AO_START_St) {
++		ack |= AO_START_Interrupt_Ack;
++	}
++	if (b_status & AO_START1_St) {
++		ack |= AO_START1_Interrupt_Ack;
++	}
++	if (b_status & AO_UC_TC_St) {
++		ack |= AO_UC_TC_Interrupt_Ack;
++	}
++	if (b_status & AO_UI2_TC_St) {
++		ack |= AO_UI2_TC_Interrupt_Ack;
++	}
++	if (b_status & AO_UPDATE_St) {
++		ack |= AO_UPDATE_Interrupt_Ack;
++	}
++	if (ack)
++		devpriv->stc_writew(dev, ack, Interrupt_B_Ack_Register);
++}
++
++static void handle_b_interrupt(struct a4l_device * dev,
++			       unsigned short b_status, unsigned int ao_mite_status)
++{
++
++	struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AO_SUBDEV);
++
++	a4l_dbg(1, drv_dbg, dev,
++		"ni_mio_common: interrupt: b_status=%04x m1_status=%08x\n",
++		b_status, ao_mite_status);
++
++	ni_mio_print_status_b(b_status);
++
++	if (b_status == 0xffff)
++		return;
++
++	if (b_status & AO_Overrun_St) {
++		a4l_err(dev,
++			"ni_mio_common: interrupt: "
++			"AO FIFO underrun status=0x%04x status2=0x%04x\n",
++			b_status,
++			devpriv->stc_readw(dev, AO_Status_2_Register));
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++
++	if (b_status & AO_BC_TC_St) {
++		a4l_dbg(1, drv_dbg, dev,
++			"ni_mio_common: interrupt: "
++			"AO BC_TC status=0x%04x status2=0x%04x\n",
++			b_status, devpriv->stc_readw(dev, AO_Status_2_Register));
++		a4l_buf_evt(subd, A4L_BUF_EOA);
++	}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++	if (ao_mite_status & CHSR_STOPS) {
++		a4l_dbg(1, drv_dbg, dev,
++			"ni_mio_common: interrupt: MITE transfer stopped\n");
++	} else if (ao_mite_status & CHSR_LINKC) {
++		/* Currently, mite.c requires us to handle LINKC */
++		mite_handle_b_linkc(subd);
++	}
++
++	if (ao_mite_status &
++	    ~(CHSR_INT | CHSR_LINKC | CHSR_DONE | CHSR_MRDY |
++	      CHSR_DRDY | CHSR_DRQ1 | CHSR_DRQ0 | CHSR_ERROR |
++	      CHSR_SABORT | CHSR_STOPS | CHSR_XFERR | CHSR_LxERR_mask)) {
++		a4l_err(dev,
++			"unknown mite interrupt, ack! (ao_mite_status=%08x)\n",
++			 ao_mite_status);
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \
++     !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++	if (b_status & AO_FIFO_Request_St) {
++		int ret;
++
++		ret = ni_ao_fifo_half_empty(subd);
++		if (!ret) {
++			a4l_err(dev,
++				"ni_mio_common: "
++				"interrupt: AO buffer underrun\n");
++			ni_set_bits(dev, Interrupt_B_Enable_Register,
++				    AO_FIFO_Interrupt_Enable |
++				    AO_Error_Interrupt_Enable, 0);
++			a4l_buf_evt(subd, A4L_BUF_ERROR);
++		}
++	}
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++	ni_event(subd);
++}
++
++int a4l_ni_E_interrupt(unsigned int irq, void *d)
++{
++	struct a4l_device *dev = d;
++	unsigned short a_status;
++	unsigned short b_status;
++	unsigned int ai_mite_status = 0;
++	unsigned int ao_mite_status = 0;
++	unsigned long flags;
++	struct mite_struct *mite = devpriv->mite;
++
++	/* Make sure dev->attached is checked before handler does
++	   anything else. */
++	smp_mb();
++
++	/* lock to avoid race with a4l_poll */
++	rtdm_lock_get_irqsave(&dev->lock, flags);
++	a_status = devpriv->stc_readw(dev, AI_Status_1_Register);
++	b_status = devpriv->stc_readw(dev, AO_Status_1_Register);
++	if (mite) {
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++		rtdm_lock_get(&devpriv->mite_channel_lock);
++		if (devpriv->ai_mite_chan) {
++			ai_mite_status = a4l_mite_get_status(devpriv->ai_mite_chan);
++			if (ai_mite_status & CHSR_LINKC)
++				writel(CHOR_CLRLC,
++				       devpriv->mite->mite_io_addr +
++				       MITE_CHOR(devpriv->ai_mite_chan->channel));
++		}
++		if (devpriv->ao_mite_chan) {
++			ao_mite_status = a4l_mite_get_status(devpriv->ao_mite_chan);
++			if (ao_mite_status & CHSR_LINKC)
++				writel(CHOR_CLRLC,
++				       mite->mite_io_addr +
++				       MITE_CHOR(devpriv->ao_mite_chan->channel));
++		}
++		rtdm_lock_put(&devpriv->mite_channel_lock);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++	}
++	ack_a_interrupt(dev, a_status);
++	ack_b_interrupt(dev, b_status);
++	if ((a_status & Interrupt_A_St) || (ai_mite_status & CHSR_INT))
++		handle_a_interrupt(dev, a_status, ai_mite_status);
++	if ((b_status & Interrupt_B_St) || (ao_mite_status & CHSR_INT))
++		handle_b_interrupt(dev, b_status, ao_mite_status);
++	handle_gpct_interrupt(dev, 0);
++	handle_gpct_interrupt(dev, 1);
++	handle_cdio_interrupt(dev);
++
++	rtdm_lock_put_irqrestore(&dev->lock, flags);
++	return 0;
++}
++
++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \
++     !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static void ni_ao_fifo_load(struct a4l_subdevice *subd, int n)
++{
++	struct a4l_device *dev = subd->dev;
++	sampl_t d;
++	u32 packed_data;
++	int i, err = 1;
++
++	for (i = 0; i < n; i++) {
++		err = a4l_buf_get(subd, &d, sizeof(sampl_t));
++		if (err != 0)
++			break;
++
++		if (boardtype.reg_type & ni_reg_6xxx_mask) {
++			packed_data = d & 0xffff;
++			/* 6711 only has 16 bit wide ao fifo */
++			if (boardtype.reg_type != ni_reg_6711) {
++				err = a4l_buf_get(subd, &d, sizeof(sampl_t));
++				if (err != 0)
++					break;
++				i++;
++				packed_data |= (d << 16) & 0xffff0000;
++			}
++			ni_writel(packed_data, DAC_FIFO_Data_611x);
++		} else {
++			ni_writew(d, DAC_FIFO_Data);
++		}
++	}
++	if (err != 0) {
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++}
++
++/*
++ *  There's a small problem if the FIFO gets really low and we
++ *  don't have the data to fill it.  Basically, if after we fill
++ *  the FIFO with all the data available, the FIFO is _still_
++ *  less than half full, we never clear the interrupt.  If the
++ *  IRQ is in edge mode, we never get another interrupt, because
++ *  this one wasn't cleared.  If in level mode, we get flooded
++ *  with interrupts that we can't fulfill, because nothing ever
++ *  gets put into the buffer.
++ *
++ *  This kind of situation is recoverable, but it is easier to
++ *  just pretend we had a FIFO underrun, since there is a good
++ *  chance it will happen anyway.  This is _not_ the case for
++ *  RT code, as RT code might purposely be running close to the
++ *  metal.  Needs to be fixed eventually.
++ */
++static int ni_ao_fifo_half_empty(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	int n;
++
++	n = a4l_buf_count(subd);
++	if (n == 0) {
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++		return 0;
++	}
++
++	n /= sizeof(sampl_t);
++	if (n > boardtype.ao_fifo_depth / 2)
++		n = boardtype.ao_fifo_depth / 2;
++
++	ni_ao_fifo_load(subd, n);
++
++	return 1;
++}
++
++static int ni_ao_prep_fifo(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	int n;
++
++	/* Reset fifo */
++	devpriv->stc_writew(dev, 1, DAC_FIFO_Clear);
++	if (boardtype.reg_type & ni_reg_6xxx_mask)
++		ni_ao_win_outl(dev, 0x6, AO_FIFO_Offset_Load_611x);
++
++	/* Load some data */
++	n = a4l_buf_count(subd);
++	if (n == 0)
++		return 0;
++
++	n /= sizeof(sampl_t);
++	if (n > boardtype.ao_fifo_depth)
++		n = boardtype.ao_fifo_depth;
++
++	ni_ao_fifo_load(subd, n);
++
++	return n;
++}
++
++static void ni_ai_fifo_read(struct a4l_subdevice *subd, int n)
++{
++	struct a4l_device *dev = subd->dev;
++	int i;
++
++	if (boardtype.reg_type == ni_reg_611x) {
++		sampl_t data[2];
++		u32 dl;
++
++		for (i = 0; i < n / 2; i++) {
++			dl = ni_readl(ADC_FIFO_Data_611x);
++			/* This may get the hi/lo data in the wrong order */
++			data[0] = (dl >> 16) & 0xffff;
++			data[1] = dl & 0xffff;
++			a4l_buf_put(subd, data, sizeof(sampl_t) * 2);
++		}
++		/* Check if there's a single sample stuck in the FIFO */
++		if (n % 2) {
++			dl = ni_readl(ADC_FIFO_Data_611x);
++			data[0] = dl & 0xffff;
++			a4l_buf_put(subd, &data[0], sizeof(sampl_t));
++		}
++	} else if (boardtype.reg_type == ni_reg_6143) {
++		sampl_t data[2];
++		u32 dl;
++
++		/* This just reads the FIFO assuming the data is
++		   present, no checks on the FIFO status are performed */
++		for (i = 0; i < n / 2; i++) {
++			dl = ni_readl(AIFIFO_Data_6143);
++
++			data[0] = (dl >> 16) & 0xffff;
++			data[1] = dl & 0xffff;
++			a4l_buf_put(subd, data, sizeof(sampl_t) * 2);
++		}
++		if (n % 2) {
++			/* Assume there is a single sample stuck in the FIFO.
++			   Get stranded sample into FIFO */
++			ni_writel(0x01, AIFIFO_Control_6143);
++			dl = ni_readl(AIFIFO_Data_6143);
++			data[0] = (dl >> 16) & 0xffff;
++			a4l_buf_put(subd, &data[0], sizeof(sampl_t));
++		}
++	} else {
++		if (n > sizeof(devpriv->ai_fifo_buffer) /
++		    sizeof(devpriv->ai_fifo_buffer[0])) {
++			a4l_err(dev,
++				"ni_ai_fifo_read: "
++				"bug! ai_fifo_buffer too small");
++			a4l_buf_evt(subd, A4L_BUF_ERROR);
++			return;
++		}
++		for (i = 0; i < n; i++) {
++			devpriv->ai_fifo_buffer[i] =
++				ni_readw(ADC_FIFO_Data_Register);
++		}
++		a4l_buf_put(subd,
++			    devpriv->ai_fifo_buffer,
++			    n * sizeof(devpriv->ai_fifo_buffer[0]));
++	}
++}
++
++static void ni_handle_fifo_half_full(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	ni_ai_fifo_read(subd, boardtype.ai_fifo_depth / 2);
++}
++
++#endif /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static int ni_ai_drain_dma(struct a4l_subdevice *subd)
++{
++	int i;
++	static const int timeout = 10000;
++	unsigned long flags;
++	int retval = 0;
++	struct a4l_device *dev = subd->dev;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->ai_mite_chan) {
++		for (i = 0; i < timeout; i++) {
++			if ((devpriv->stc_readw(dev,
++						AI_Status_1_Register) &
++			     AI_FIFO_Empty_St)
++			    && a4l_mite_bytes_in_transit(devpriv->
++						     ai_mite_chan) == 0)
++				break;
++			a4l_udelay(5);
++		}
++		if (i == timeout) {
++			a4l_info(dev, "wait for dma drain timed out\n");
++
++			a4l_info(dev, "a4l_mite_bytes_in_transit=%i, "
++				 "AI_Status1_Register=0x%x\n",
++				 a4l_mite_bytes_in_transit(devpriv->ai_mite_chan),
++				 devpriv->stc_readw(dev, AI_Status_1_Register));
++			retval = -1;
++		}
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	ni_sync_ai_dma(subd);
++
++	return retval;
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++/* Empties the AI fifo */
++static void ni_handle_fifo_dregs(struct a4l_subdevice *subd)
++{
++	sampl_t data[2];
++	u32 dl;
++	short fifo_empty;
++	int i;
++	struct a4l_device *dev = subd->dev;
++
++	if (boardtype.reg_type == ni_reg_611x) {
++		while ((devpriv->stc_readw(dev,
++					   AI_Status_1_Register) &
++			AI_FIFO_Empty_St) == 0) {
++			dl = ni_readl(ADC_FIFO_Data_611x);
++
++			/* This may get the hi/lo data in the wrong order */
++			data[0] = (dl >> 16);
++			data[1] = (dl & 0xffff);
++			a4l_buf_put(subd, data, sizeof(sampl_t) * 2);
++		}
++	} else if (boardtype.reg_type == ni_reg_6143) {
++		i = 0;
++		while (ni_readl(AIFIFO_Status_6143) & 0x04) {
++			dl = ni_readl(AIFIFO_Data_6143);
++
++			/* This may get the hi/lo data in the wrong order */
++			data[0] = (dl >> 16);
++			data[1] = (dl & 0xffff);
++			a4l_buf_put(subd, data, sizeof(sampl_t) * 2);
++			i += 2;
++		}
++		// Check if stranded sample is present
++		if (ni_readl(AIFIFO_Status_6143) & 0x01) {
++			ni_writel(0x01, AIFIFO_Control_6143);	// Get stranded sample into FIFO
++			dl = ni_readl(AIFIFO_Data_6143);
++			data[0] = (dl >> 16) & 0xffff;
++			a4l_buf_put(subd, &data[0], sizeof(sampl_t));
++		}
++
++	} else {
++		fifo_empty =
++			devpriv->stc_readw(dev,
++					   AI_Status_1_Register) & AI_FIFO_Empty_St;
++		while (fifo_empty == 0) {
++			for (i = 0;
++			     i <
++				     sizeof(devpriv->ai_fifo_buffer) /
++				     sizeof(devpriv->ai_fifo_buffer[0]); i++) {
++				fifo_empty =
++					devpriv->stc_readw(dev,
++							   AI_Status_1_Register) &
++					AI_FIFO_Empty_St;
++				if (fifo_empty)
++					break;
++				devpriv->ai_fifo_buffer[i] =
++					ni_readw(ADC_FIFO_Data_Register);
++			}
++			a4l_buf_put(subd,
++				    devpriv->ai_fifo_buffer,
++				    i * sizeof(devpriv->ai_fifo_buffer[0]));
++		}
++	}
++}
++
++static void get_last_sample_611x(struct a4l_subdevice *subd)
++{
++	sampl_t data;
++	u32 dl;
++	struct a4l_device *dev = subd->dev;
++
++	if (boardtype.reg_type != ni_reg_611x)
++		return;
++
++	/* Check if there's a single sample stuck in the FIFO */
++	if (ni_readb(XXX_Status) & 0x80) {
++		dl = ni_readl(ADC_FIFO_Data_611x);
++		data = (dl & 0xffff);
++		a4l_buf_put(subd, &data, sizeof(sampl_t));
++	}
++}
++
++static void get_last_sample_6143(struct a4l_subdevice *subd)
++{
++	sampl_t data;
++	u32 dl;
++	struct a4l_device *dev = subd->dev;
++
++	if (boardtype.reg_type != ni_reg_6143)
++		return;
++
++	/* Check if there's a single sample stuck in the FIFO */
++	if (ni_readl(AIFIFO_Status_6143) & 0x01) {
++		/* Get stranded sample into FIFO */
++		ni_writel(0x01, AIFIFO_Control_6143);
++		dl = ni_readl(AIFIFO_Data_6143);
++
++		/* This may get the hi/lo data in the wrong order */
++		data = (dl >> 16) & 0xffff;
++		a4l_buf_put(subd, &data, sizeof(sampl_t));
++	}
++}
++
++static void ni_ai_munge16(struct a4l_subdevice *subd, void *buf, unsigned long size)
++{
++	struct a4l_device *dev = subd->dev;
++	struct a4l_cmd_desc *cmd = a4l_get_cmd(subd);
++	int chan_idx = a4l_get_chan(subd);
++	unsigned int i;
++	sampl_t *array = buf;
++
++	for (i = 0; i < size / sizeof(sampl_t); i++) {
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++		array[i] = le16_to_cpu(array[i]);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++		array[i] += devpriv->ai_offset[chan_idx];
++		chan_idx++;
++		chan_idx %= cmd->nb_chan;
++	}
++}
++
++static void ni_ai_munge32(struct a4l_subdevice *subd, void *buf, unsigned long size)
++{
++	struct a4l_device *dev = subd->dev;
++	struct a4l_cmd_desc *cmd = a4l_get_cmd(subd);
++	int chan_idx = a4l_get_chan(subd);
++	unsigned int i;
++	lsampl_t *larray = buf;
++
++	for (i = 0; i < size / sizeof(lsampl_t); i++) {
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++		larray[i] = le32_to_cpu(larray[i]);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++		larray[i] += devpriv->ai_offset[chan_idx];
++		chan_idx++;
++		chan_idx %= cmd->nb_chan;
++	}
++}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static int ni_ai_setup_MITE_dma(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned long flags;
++	int err;
++
++	err = ni_request_ai_mite_channel(dev);
++	if (err < 0)
++		return err;
++
++	err = a4l_mite_buf_change(devpriv->ai_mite_chan->ring, subd);
++	if (err < 0)
++		return err;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++
++	switch (boardtype.reg_type) {
++	case ni_reg_611x:
++	case ni_reg_6143:
++		a4l_mite_prep_dma(devpriv->ai_mite_chan, 32, 16);
++		break;
++	case ni_reg_628x:
++		a4l_mite_prep_dma(devpriv->ai_mite_chan, 32, 32);
++		break;
++	default:
++		a4l_mite_prep_dma(devpriv->ai_mite_chan, 16, 16);
++		break;
++	};
++
++	/* start the MITE */
++	a4l_mite_dma_arm(devpriv->ai_mite_chan);
++
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	return 0;
++}
++
++static int ni_ao_setup_MITE_dma(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned long flags;
++	int err;
++
++	err = ni_request_ao_mite_channel(dev);
++	if (err < 0)
++		return err;
++
++	err = a4l_mite_buf_change(devpriv->ao_mite_chan->ring, subd);
++	if (err < 0)
++		return err;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++
++	if (devpriv->ao_mite_chan) {
++
++		if (boardtype.reg_type & (ni_reg_611x | ni_reg_6713)) {
++			a4l_mite_prep_dma(devpriv->ao_mite_chan, 32, 32);
++		} else {
++			/* Doing 32 instead of 16 bit wide transfers
++			   from memory makes the mite do 32 bit pci
++			   transfers, doubling pci bandwidth. */
++			a4l_mite_prep_dma(devpriv->ao_mite_chan, 16, 32);
++		}
++		a4l_mite_dma_arm(devpriv->ao_mite_chan);
++	} else
++		err = -EIO;
++
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	return err;
++}
++
++static int ni_cdo_setup_MITE_dma(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned long flags;
++	int err;
++
++	err = ni_request_cdo_mite_channel(dev);
++	if (err < 0)
++		return err;
++
++	/* No need to get a lock to setup the ring buffer */
++	err = a4l_mite_buf_change(devpriv->cdo_mite_chan->ring, subd);
++	if (err < 0)
++		return err;
++
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++
++	/* This test should be useless but one never knows */
++	if (devpriv->cdo_mite_chan) {
++		/* Configure the DMA transfer */
++		a4l_mite_prep_dma(devpriv->cdo_mite_chan, 32, 32);
++		a4l_mite_dma_arm(devpriv->cdo_mite_chan);
++	} else
++		err = -EIO;
++
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	return err;
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++static void ni_ai_reset(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	ni_release_ai_mite_channel(dev);
++
++	/* ai configuration */
++	devpriv->stc_writew(dev, AI_Configuration_Start | AI_Reset,
++			    Joint_Reset_Register);
++
++	ni_set_bits(dev, Interrupt_A_Enable_Register,
++		    AI_SC_TC_Interrupt_Enable | AI_START1_Interrupt_Enable |
++		    AI_START2_Interrupt_Enable | AI_START_Interrupt_Enable |
++		    AI_STOP_Interrupt_Enable | AI_Error_Interrupt_Enable |
++		    AI_FIFO_Interrupt_Enable, 0);
++
++	ni_clear_ai_fifo(dev);
++
++	if (boardtype.reg_type != ni_reg_6143)
++		ni_writeb(0, Misc_Command);
++
++	devpriv->stc_writew(dev, AI_Disarm, AI_Command_1_Register);	/* reset pulses */
++	devpriv->stc_writew(dev,
++			    AI_Start_Stop | AI_Mode_1_Reserved /*| AI_Trigger_Once */ ,
++			    AI_Mode_1_Register);
++	devpriv->stc_writew(dev, 0x0000, AI_Mode_2_Register);
++	/* generate FIFO interrupts on non-empty */
++	devpriv->stc_writew(dev, (0 << 6) | 0x0000, AI_Mode_3_Register);
++	if (boardtype.reg_type == ni_reg_611x) {
++		devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width |
++				    AI_SOC_Polarity |
++				    AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register);
++		devpriv->stc_writew(dev, AI_SCAN_IN_PROG_Output_Select(3) |
++				    AI_EXTMUX_CLK_Output_Select(0) |
++				    AI_LOCALMUX_CLK_Output_Select(2) |
++				    AI_SC_TC_Output_Select(3) |
++				    AI_CONVERT_Output_Select(AI_CONVERT_Output_Enable_High),
++				    AI_Output_Control_Register);
++	} else if (boardtype.reg_type == ni_reg_6143) {
++		devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width |
++				    AI_SOC_Polarity |
++				    AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register);
++		devpriv->stc_writew(dev, AI_SCAN_IN_PROG_Output_Select(3) |
++				    AI_EXTMUX_CLK_Output_Select(0) |
++				    AI_LOCALMUX_CLK_Output_Select(2) |
++				    AI_SC_TC_Output_Select(3) |
++				    AI_CONVERT_Output_Select(AI_CONVERT_Output_Enable_Low),
++				    AI_Output_Control_Register);
++	} else {
++		unsigned int ai_output_control_bits;
++		devpriv->stc_writew(dev, AI_SHIFTIN_Pulse_Width |
++				    AI_SOC_Polarity |
++				    AI_CONVERT_Pulse_Width |
++				    AI_LOCALMUX_CLK_Pulse_Width, AI_Personal_Register);
++		ai_output_control_bits = AI_SCAN_IN_PROG_Output_Select(3) |
++			AI_EXTMUX_CLK_Output_Select(0) |
++			AI_LOCALMUX_CLK_Output_Select(2) |
++			AI_SC_TC_Output_Select(3);
++		if (boardtype.reg_type == ni_reg_622x)
++			ai_output_control_bits |=
++				AI_CONVERT_Output_Select
++				(AI_CONVERT_Output_Enable_High);
++		else
++			ai_output_control_bits |=
++				AI_CONVERT_Output_Select
++				(AI_CONVERT_Output_Enable_Low);
++		devpriv->stc_writew(dev, ai_output_control_bits,
++				    AI_Output_Control_Register);
++	}
++
++	/* the following registers should not be changed, because there
++	 * are no backup registers in devpriv.  If you want to change
++	 * any of these, add a backup register and other appropriate code:
++	 *      AI_Mode_1_Register
++	 *      AI_Mode_3_Register
++	 *      AI_Personal_Register
++	 *      AI_Output_Control_Register
++	 */
++
++	/* clear interrupts */
++	devpriv->stc_writew(dev, AI_SC_TC_Error_Confirm | AI_START_Interrupt_Ack |
++			    AI_START2_Interrupt_Ack | AI_START1_Interrupt_Ack |
++			    AI_SC_TC_Interrupt_Ack | AI_Error_Interrupt_Ack |
++			    AI_STOP_Interrupt_Ack, Interrupt_A_Ack_Register);
++
++	devpriv->stc_writew(dev, AI_Configuration_End, Joint_Reset_Register);
++}
++
++static int ni_ai_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	const unsigned int mask = (1 << boardtype.adbits) - 1;
++	int i, n;
++	unsigned int signbits;
++	unsigned short d;
++	unsigned long dl;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	ni_load_channelgain_list(dev, 1, &insn->chan_desc);
++
++	ni_clear_ai_fifo(dev);
++
++	signbits = devpriv->ai_offset[0];
++	if (boardtype.reg_type == ni_reg_611x) {
++		for (n = 0; n < num_adc_stages_611x; n++) {
++			devpriv->stc_writew(dev, AI_CONVERT_Pulse,
++					    AI_Command_1_Register);
++			a4l_udelay(1);
++		}
++		for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) {
++			devpriv->stc_writew(dev, AI_CONVERT_Pulse,
++					    AI_Command_1_Register);
++			/* The 611x has screwy 32-bit FIFOs. */
++			d = 0;
++			for (i = 0; i < NI_TIMEOUT; i++) {
++				if (ni_readb(XXX_Status) & 0x80) {
++					d = (ni_readl(ADC_FIFO_Data_611x) >> 16)
++						& 0xffff;
++					break;
++				}
++				if (!(devpriv->stc_readw(dev,
++							 AI_Status_1_Register) &
++				      AI_FIFO_Empty_St)) {
++					d = ni_readl(ADC_FIFO_Data_611x) &
++						0xffff;
++					break;
++				}
++			}
++			if (i == NI_TIMEOUT) {
++				a4l_warn(dev,
++					 "ni_mio_common: "
++					 "timeout in 611x ni_ai_insn_read\n");
++				return -ETIME;
++			}
++			d += signbits;
++			data[n] = d;
++		}
++	} else if (boardtype.reg_type == ni_reg_6143) {
++		for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) {
++			devpriv->stc_writew(dev, AI_CONVERT_Pulse,
++					    AI_Command_1_Register);
++
++			/* The 6143 has 32-bit FIFOs.
++			   You need to strobe a bit to move a single
++			   16bit stranded sample into the FIFO */
++			dl = 0;
++			for (i = 0; i < NI_TIMEOUT; i++) {
++				if (ni_readl(AIFIFO_Status_6143) & 0x01) {
++					ni_writel(0x01, AIFIFO_Control_6143);	// Get stranded sample into FIFO
++					dl = ni_readl(AIFIFO_Data_6143);
++					break;
++				}
++			}
++			if (i == NI_TIMEOUT) {
++				a4l_warn(dev,
++					 "ni_mio_common: "
++					 "timeout in 6143 ni_ai_insn_read\n");
++				return -ETIME;
++			}
++			data[n] = (((dl >> 16) & 0xFFFF) + signbits) & 0xFFFF;
++		}
++	} else {
++		for (n = 0; n < insn->data_size / sizeof(uint16_t); n++) {
++			devpriv->stc_writew(dev, AI_CONVERT_Pulse,
++					    AI_Command_1_Register);
++			for (i = 0; i < NI_TIMEOUT; i++) {
++				if (!(devpriv->stc_readw(dev,
++							 AI_Status_1_Register) &
++				      AI_FIFO_Empty_St))
++					break;
++			}
++			if (i == NI_TIMEOUT) {
++				a4l_warn(dev,
++					 "ni_mio_common: "
++					 "timeout in ni_ai_insn_read\n");
++				return -ETIME;
++			}
++			if (boardtype.reg_type & ni_reg_m_series_mask) {
++				data[n] = ni_readl(M_Offset_AI_FIFO_Data) & mask;
++			} else {
++				d = ni_readw(ADC_FIFO_Data_Register);
++				/* subtle: needs to be short addition */
++				d += signbits;
++				data[n] = d;
++			}
++		}
++	}
++	return 0;
++}
++
++void ni_prime_channelgain_list(struct a4l_device *dev)
++{
++	int i;
++	devpriv->stc_writew(dev, AI_CONVERT_Pulse, AI_Command_1_Register);
++	for (i = 0; i < NI_TIMEOUT; ++i) {
++		if (!(devpriv->stc_readw(dev,
++					 AI_Status_1_Register) &
++		      AI_FIFO_Empty_St)) {
++			devpriv->stc_writew(dev, 1, ADC_FIFO_Clear);
++			return;
++		}
++		a4l_udelay(1);
++	}
++	a4l_warn(dev, "ni_mio_common: timeout loading channel/gain list\n");
++}
++
++static void ni_m_series_load_channelgain_list(struct a4l_device *dev,
++					      unsigned int n_chan,
++					      unsigned int *list)
++{
++	unsigned int chan, range, aref;
++	unsigned int i;
++	unsigned offset;
++	unsigned int dither;
++	unsigned range_code;
++
++	devpriv->stc_writew(dev, 1, Configuration_Memory_Clear);
++
++	if ((list[0] & CR_ALT_SOURCE)) {
++		unsigned bypass_bits;
++		chan = CR_CHAN(list[0]);
++		range = CR_RNG(list[0]);
++		range_code = ni_gainlkup[boardtype.gainlkup][range];
++		dither = ((list[0] & CR_ALT_FILTER) != 0);
++		bypass_bits = MSeries_AI_Bypass_Config_FIFO_Bit;
++		bypass_bits |= chan;
++		bypass_bits |=
++			(devpriv->
++			 ai_calib_source) & (MSeries_AI_Bypass_Cal_Sel_Pos_Mask |
++					     MSeries_AI_Bypass_Cal_Sel_Neg_Mask |
++					     MSeries_AI_Bypass_Mode_Mux_Mask |
++					     MSeries_AO_Bypass_AO_Cal_Sel_Mask);
++		bypass_bits |= MSeries_AI_Bypass_Gain_Bits(range_code);
++		if (dither)
++			bypass_bits |= MSeries_AI_Bypass_Dither_Bit;
++		// don't use 2's complement encoding
++		bypass_bits |= MSeries_AI_Bypass_Polarity_Bit;
++		ni_writel(bypass_bits, M_Offset_AI_Config_FIFO_Bypass);
++	} else {
++		ni_writel(0, M_Offset_AI_Config_FIFO_Bypass);
++	}
++	offset = 0;
++	for (i = 0; i < n_chan; i++) {
++		unsigned config_bits = 0;
++		chan = CR_CHAN(list[i]);
++		aref = CR_AREF(list[i]);
++		range = CR_RNG(list[i]);
++		dither = ((list[i] & CR_ALT_FILTER) != 0);
++
++		range_code = ni_gainlkup[boardtype.gainlkup][range];
++		devpriv->ai_offset[i] = offset;
++		switch (aref) {
++		case AREF_DIFF:
++			config_bits |=
++				MSeries_AI_Config_Channel_Type_Differential_Bits;
++			break;
++		case AREF_COMMON:
++			config_bits |=
++				MSeries_AI_Config_Channel_Type_Common_Ref_Bits;
++			break;
++		case AREF_GROUND:
++			config_bits |=
++				MSeries_AI_Config_Channel_Type_Ground_Ref_Bits;
++			break;
++		case AREF_OTHER:
++			break;
++		}
++		config_bits |= MSeries_AI_Config_Channel_Bits(chan);
++		config_bits |=
++			MSeries_AI_Config_Bank_Bits(boardtype.reg_type, chan);
++		config_bits |= MSeries_AI_Config_Gain_Bits(range_code);
++		if (i == n_chan - 1)
++			config_bits |= MSeries_AI_Config_Last_Channel_Bit;
++		if (dither)
++			config_bits |= MSeries_AI_Config_Dither_Bit;
++		// don't use 2's complement encoding
++		config_bits |= MSeries_AI_Config_Polarity_Bit;
++		ni_writew(config_bits, M_Offset_AI_Config_FIFO_Data);
++	}
++	ni_prime_channelgain_list(dev);
++}
++
++/*
++ * Notes on the 6110 and 6111:
++ * These boards a slightly different than the rest of the series, since
++ * they have multiple A/D converters.
++ * From the driver side, the configuration memory is a
++ * little different.
++ * Configuration Memory Low:
++ *   bits 15-9: same
++ *   bit 8: unipolar/bipolar (should be 0 for bipolar)
++ *   bits 0-3: gain.  This is 4 bits instead of 3 for the other boards
++ *       1001 gain=0.1 (+/- 50)
++ *       1010 0.2
++ *       1011 0.1
++ *       0001 1
++ *       0010 2
++ *       0011 5
++ *       0100 10
++ *       0101 20
++ *       0110 50
++ * Configuration Memory High:
++ *   bits 12-14: Channel Type
++ *       001 for differential
++ *       000 for calibration
++ *   bit 11: coupling  (this is not currently handled)
++ *       1 AC coupling
++ *       0 DC coupling
++ *   bits 0-2: channel
++ *       valid channels are 0-3
++ */
++static void ni_load_channelgain_list(struct a4l_device *dev,
++				     unsigned int n_chan, unsigned int *list)
++{
++	unsigned int chan, range, aref;
++	unsigned int i;
++	unsigned int hi, lo;
++	unsigned offset;
++	unsigned int dither;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		ni_m_series_load_channelgain_list(dev, n_chan, list);
++		return;
++	}
++	if (n_chan == 1 && (boardtype.reg_type != ni_reg_611x)
++	    && (boardtype.reg_type != ni_reg_6143)) {
++		if (devpriv->changain_state
++		    && devpriv->changain_spec == list[0]) {
++			/* ready to go. */
++			return;
++		}
++		devpriv->changain_state = 1;
++		devpriv->changain_spec = list[0];
++	} else {
++		devpriv->changain_state = 0;
++	}
++
++	devpriv->stc_writew(dev, 1, Configuration_Memory_Clear);
++
++	/* Set up Calibration mode if required */
++	if (boardtype.reg_type == ni_reg_6143) {
++		if ((list[0] & CR_ALT_SOURCE)
++		    && !devpriv->ai_calib_source_enabled) {
++			/* Strobe Relay enable bit */
++			ni_writew(devpriv->
++				  ai_calib_source |
++				  Calibration_Channel_6143_RelayOn,
++				  Calibration_Channel_6143);
++			ni_writew(devpriv->ai_calib_source,
++				  Calibration_Channel_6143);
++			devpriv->ai_calib_source_enabled = 1;
++			/* Allow relays to change */
++			if(rtdm_in_rt_context())
++				rtdm_task_sleep(100*1000000);
++			else
++				msleep_interruptible(100);
++		} else if (!(list[0] & CR_ALT_SOURCE)
++			   && devpriv->ai_calib_source_enabled) {
++			/* Strobe Relay disable bit */
++			ni_writew(devpriv->
++				  ai_calib_source |
++				  Calibration_Channel_6143_RelayOff,
++				  Calibration_Channel_6143);
++			ni_writew(devpriv->ai_calib_source,
++				  Calibration_Channel_6143);
++			devpriv->ai_calib_source_enabled = 0;
++			/* Allow relays to change */
++			if(rtdm_in_rt_context())
++				rtdm_task_sleep(100*1000000);
++			else
++				msleep_interruptible(100);
++		}
++	}
++
++	offset = 1 << (boardtype.adbits - 1);
++	for (i = 0; i < n_chan; i++) {
++		if ((boardtype.reg_type != ni_reg_6143)
++		    && (list[i] & CR_ALT_SOURCE)) {
++			chan = devpriv->ai_calib_source;
++		} else {
++			chan = CR_CHAN(list[i]);
++		}
++		aref = CR_AREF(list[i]);
++		range = CR_RNG(list[i]);
++		dither = ((list[i] & CR_ALT_FILTER) != 0);
++
++		/* fix the external/internal range differences */
++		range = ni_gainlkup[boardtype.gainlkup][range];
++		if (boardtype.reg_type == ni_reg_611x)
++			devpriv->ai_offset[i] = offset;
++		else
++			devpriv->ai_offset[i] = (range & 0x100) ? 0 : offset;
++
++		hi = 0;
++		if ((list[i] & CR_ALT_SOURCE)) {
++			if (boardtype.reg_type == ni_reg_611x)
++				ni_writew(CR_CHAN(list[i]) & 0x0003,
++					  Calibration_Channel_Select_611x);
++		} else {
++			if (boardtype.reg_type == ni_reg_611x)
++				aref = AREF_DIFF;
++			else if (boardtype.reg_type == ni_reg_6143)
++				aref = AREF_OTHER;
++			switch (aref) {
++			case AREF_DIFF:
++				hi |= AI_DIFFERENTIAL;
++				break;
++			case AREF_COMMON:
++				hi |= AI_COMMON;
++				break;
++			case AREF_GROUND:
++				hi |= AI_GROUND;
++				break;
++			case AREF_OTHER:
++				break;
++			}
++		}
++		hi |= AI_CONFIG_CHANNEL(chan);
++
++		ni_writew(hi, Configuration_Memory_High);
++
++		if (boardtype.reg_type != ni_reg_6143) {
++			lo = range;
++			if (i == n_chan - 1)
++				lo |= AI_LAST_CHANNEL;
++			if (dither)
++				lo |= AI_DITHER;
++
++			ni_writew(lo, Configuration_Memory_Low);
++		}
++	}
++
++	/* prime the channel/gain list */
++	if ((boardtype.reg_type != ni_reg_611x)
++	    && (boardtype.reg_type != ni_reg_6143)) {
++		ni_prime_channelgain_list(dev);
++	}
++}
++
++static int ni_ns_to_timer(const struct a4l_device *dev,
++			  unsigned int nanosec, int round_mode)
++{
++	int divider;
++	switch (round_mode) {
++	case TRIG_ROUND_NEAREST:
++	default:
++		divider = (nanosec + devpriv->clock_ns / 2) / devpriv->clock_ns;
++		break;
++	case TRIG_ROUND_DOWN:
++		divider = (nanosec) / devpriv->clock_ns;
++		break;
++	case TRIG_ROUND_UP:
++		divider = (nanosec + devpriv->clock_ns - 1) / devpriv->clock_ns;
++		break;
++	}
++	return divider - 1;
++}
++
++static unsigned int ni_timer_to_ns(const struct a4l_device *dev, int timer)
++{
++	return devpriv->clock_ns * (timer + 1);
++}
++
++static unsigned int ni_min_ai_scan_period_ns(struct a4l_device *dev,
++					     unsigned int num_channels)
++{
++	switch (boardtype.reg_type) {
++	case ni_reg_611x:
++	case ni_reg_6143:
++		/* simultaneously-sampled inputs */
++		return boardtype.ai_speed;
++		break;
++	default:
++		/* multiplexed inputs */
++		break;
++	};
++	return boardtype.ai_speed * num_channels;
++}
++
++static struct a4l_cmd_desc mio_ai_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_NOW | TRIG_INT | TRIG_EXT,
++	.scan_begin_src = TRIG_TIMER | TRIG_EXT,
++	.convert_src = TRIG_TIMER | TRIG_EXT | TRIG_NOW,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_COUNT | TRIG_NONE,
++};
++
++int ni_ai_inttrig(struct a4l_subdevice *subd, lsampl_t trignum)
++{
++	struct a4l_device *dev = subd->dev;
++
++	if (trignum != 0)
++		return -EINVAL;
++
++	devpriv->stc_writew(dev, AI_START1_Pulse | devpriv->ai_cmd2,
++			    AI_Command_2_Register);
++
++	return 1;
++}
++
++#define cfc_check_trigger_arg_is(a,b) __cfc_check_trigger_arg_is(a,b, dev, __LINE__)
++static inline int __cfc_check_trigger_arg_is(unsigned int *arg,
++	                                     unsigned int val,
++					     struct a4l_device *dev,
++	                                     unsigned int line)
++{
++	if (*arg != val) {
++		a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) != val (%d) \n",
++			line, *arg, val);
++		*arg = val;
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#define cfc_check_trigger_is_unique(a) __cfc_check_trigger_is_unique(a, dev, __LINE__)
++static inline int __cfc_check_trigger_is_unique(unsigned int src,
++					        struct a4l_device *dev,
++	                                        unsigned int line)
++{
++	/* this test is true if more than one _src bit is set */
++	if ((src & (src - 1)) != 0) {
++		a4l_dbg(1, drv_dbg, dev, "line %d: src (%d) \n", line, src);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#define cfc_check_trigger_src(a,b) __cfc_check_trigger_src(a,b, dev, __LINE__)
++static inline int __cfc_check_trigger_src(unsigned int *src,
++	                                  unsigned int flags,
++					  struct a4l_device *dev,
++	                                  unsigned int line)
++{
++	unsigned int orig_src = *src;
++
++	*src = orig_src & flags;
++	if (*src == 0 || *src != orig_src){
++		a4l_dbg(1, drv_dbg, dev, "line %d: *src (%d)  orig_src (%d) flags(%d) \n",
++			line, *src, orig_src, flags);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++#define cfc_check_trigger_arg_min(a,b) __cfc_check_trigger_arg_min(a,b, dev, __LINE__)
++static inline int __cfc_check_trigger_arg_min(unsigned int *arg,
++					      unsigned int val,
++					      struct a4l_device *dev,
++	                                      unsigned int line)
++{
++	if (*arg < val) {
++		a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) < val (%d) \n",
++			line, *arg, val);
++		*arg = val;
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#define cfc_check_trigger_arg_max(a,b) __cfc_check_trigger_arg_max(a,b, dev, __LINE__)
++static inline int __cfc_check_trigger_arg_max(unsigned int *arg,
++					      unsigned int val,
++					      struct a4l_device *dev,
++	                                      unsigned int line)
++{
++	if (*arg > val) {
++		a4l_dbg(1, drv_dbg, dev, "line %d: *arg (%d) > val (%d) \n",
++			line, *arg, val);
++		*arg = val;
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int ni_ai_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int sources;
++	int tmp, err = 0;
++
++	/* Step 1 : check if triggers are trivially valid */
++	err |= cfc_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT | TRIG_EXT);
++	err |= cfc_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER | TRIG_EXT);
++
++	sources = TRIG_TIMER | TRIG_EXT;
++	if (boardtype.reg_type == ni_reg_611x || boardtype.reg_type == ni_reg_6143)
++		sources |= TRIG_NOW;
++
++	err |= cfc_check_trigger_src(&cmd->convert_src, sources);
++	err |= cfc_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT);
++	err |= cfc_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE);
++
++	if (err) {
++		if (cmd->valid_simul_stages & BIT(1))
++			return 0;
++
++		a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 1 \n");
++		return -EINVAL;
++	}
++
++	/* Step 2a : make sure trigger sources are unique */
++	err |= cfc_check_trigger_is_unique(cmd->start_src);
++	err |= cfc_check_trigger_is_unique(cmd->scan_begin_src);
++	err |= cfc_check_trigger_is_unique(cmd->convert_src);
++	err |= cfc_check_trigger_is_unique(cmd->stop_src);
++
++	/* Step 2b : and mutually compatible */
++
++	if (err) {
++		if (cmd->valid_simul_stages & BIT(2))
++			return 0;
++
++		a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 2 \n");
++		return -EINVAL;
++	}
++
++	/* Step 3: check if arguments are trivially valid */
++
++	if (cmd->start_src == TRIG_EXT) {
++		/* external trigger */
++		unsigned int tmp = CR_CHAN(cmd->start_arg);
++		if (tmp > 16)
++			tmp = 16;
++		tmp |= (cmd->start_arg & (CR_INVERT | CR_EDGE));
++		err |= cfc_check_trigger_arg_is(&cmd->start_arg, tmp);
++
++	} else {
++		/* true for both TRIG_NOW and TRIG_INT */
++		err |= cfc_check_trigger_arg_is(&cmd->start_arg, 0);
++	}
++
++	if (cmd->scan_begin_src == TRIG_TIMER) {
++		err |= cfc_check_trigger_arg_min(&cmd->scan_begin_arg,
++			ni_min_ai_scan_period_ns(dev, cmd->nb_chan));
++
++		err |= cfc_check_trigger_arg_max(&cmd->scan_begin_arg,
++						 devpriv->clock_ns * 0xffffff);
++	} else if (cmd->scan_begin_src == TRIG_EXT) {
++		/* external trigger */
++		unsigned int tmp = CR_CHAN(cmd->scan_begin_arg);
++
++		if (tmp > 16)
++			tmp = 16;
++		tmp |= (cmd->scan_begin_arg & (CR_INVERT | CR_EDGE));
++		err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, tmp);
++
++	} else {		/* TRIG_OTHER */
++		err |= cfc_check_trigger_arg_is(&cmd->scan_begin_arg, 0);
++
++	}
++
++	if (cmd->convert_src == TRIG_TIMER) {
++		if ((boardtype.reg_type == ni_reg_611x)
++		    || (boardtype.reg_type == ni_reg_6143)) {
++			err |= cfc_check_trigger_arg_is(&cmd->convert_arg, 0);
++
++		} else {
++			err |= cfc_check_trigger_arg_min(&cmd->convert_arg,
++							 boardtype.ai_speed);
++			err |= cfc_check_trigger_arg_max(&cmd->convert_arg,
++						devpriv->clock_ns * 0xffff);
++		}
++	} else if (cmd->convert_src == TRIG_EXT) {
++		/* external trigger */
++		unsigned int tmp = CR_CHAN(cmd->convert_arg);
++
++		if (tmp > 16)
++			tmp = 16;
++		tmp |= (cmd->convert_arg & (CR_ALT_FILTER | CR_INVERT));
++		err |= cfc_check_trigger_arg_is(&cmd->convert_arg, tmp);
++	} else if (cmd->convert_src == TRIG_NOW) {
++		err |= cfc_check_trigger_arg_is(&cmd->convert_arg, 0);
++	}
++
++	err |= cfc_check_trigger_arg_is(&cmd->scan_end_arg, cmd->nb_chan);
++
++	if (cmd->stop_src == TRIG_COUNT) {
++		unsigned int max_count = 0x01000000;
++
++		if (boardtype.reg_type == ni_reg_611x)
++			max_count -= num_adc_stages_611x;
++		err |= cfc_check_trigger_arg_max(&cmd->stop_arg, max_count);
++		err |= cfc_check_trigger_arg_min(&cmd->stop_arg, 1);
++
++	} else {
++		/* TRIG_NONE */
++		err |= cfc_check_trigger_arg_is(&cmd->stop_arg, 0);
++	}
++
++	if (err) {
++		if (cmd->valid_simul_stages & BIT(3))
++			return 0;
++
++		a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 3 \n");
++		return 3;
++	}
++
++	/* step 4: fix up any arguments */
++	if (cmd->scan_begin_src == TRIG_TIMER) {
++		tmp = cmd->scan_begin_arg;
++		cmd->scan_begin_arg =
++		    ni_timer_to_ns(dev, ni_ns_to_timer(dev,
++						       cmd->scan_begin_arg,
++						       cmd->flags &
++						       TRIG_ROUND_MASK));
++		if (tmp != cmd->scan_begin_arg)
++			err++;
++	}
++	if (cmd->convert_src == TRIG_TIMER) {
++		if ((boardtype.reg_type != ni_reg_611x)
++		    && (boardtype.reg_type != ni_reg_6143)) {
++			tmp = cmd->convert_arg;
++			cmd->convert_arg =
++			    ni_timer_to_ns(dev, ni_ns_to_timer(dev,
++							       cmd->convert_arg,
++							       cmd->
++							       flags &
++							       TRIG_ROUND_MASK));
++			if (tmp != cmd->convert_arg)
++				err++;
++			if (cmd->scan_begin_src == TRIG_TIMER &&
++			    cmd->scan_begin_arg <
++			    cmd->convert_arg * cmd->scan_end_arg) {
++				cmd->scan_begin_arg =
++				    cmd->convert_arg * cmd->scan_end_arg;
++				err++;
++			}
++		}
++	}
++
++	if (err) {
++		if (cmd->valid_simul_stages & BIT(4))
++			return 0;
++
++		a4l_dbg(1, drv_dbg, dev, "ai_cmdtest ERR 4 \n");
++		return -EINVAL;
++	}
++
++	return 0;
++
++
++}
++
++static int ni_ai_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++	int timer;
++	int mode1 = 0;		/* mode1 is needed for both stop and convert */
++	int mode2 = 0;
++	int start_stop_select = 0;
++	unsigned int stop_count;
++	int interrupt_a_enable = 0;
++
++	a4l_info(dev, "start\n");
++
++	if (a4l_get_irq(dev) == A4L_IRQ_UNUSED) {
++		a4l_err(dev, "ni_ai_cmd: cannot run command without an irq");
++		return -EIO;
++	}
++	ni_clear_ai_fifo(dev);
++
++	ni_load_channelgain_list(dev, cmd->nb_chan, cmd->chan_descs);
++
++	/* start configuration */
++	devpriv->stc_writew(dev, AI_Configuration_Start, Joint_Reset_Register);
++
++	/* disable analog triggering for now, since it
++	 * interferes with the use of pfi0 */
++	devpriv->an_trig_etc_reg &= ~Analog_Trigger_Enable;
++	devpriv->stc_writew(dev, devpriv->an_trig_etc_reg,
++			    Analog_Trigger_Etc_Register);
++
++	switch (cmd->start_src) {
++	case TRIG_INT:
++	case TRIG_NOW:
++		devpriv->stc_writew(dev, AI_START2_Select(0) |
++				    AI_START1_Sync | AI_START1_Edge | AI_START1_Select(0),
++				    AI_Trigger_Select_Register);
++		break;
++	case TRIG_EXT:
++	{
++		int chan = CR_CHAN(cmd->start_arg);
++		unsigned int bits = AI_START2_Select(0) |
++			AI_START1_Sync | AI_START1_Select(chan + 1);
++
++		if (cmd->start_arg & CR_INVERT)
++			bits |= AI_START1_Polarity;
++		if (cmd->start_arg & CR_EDGE)
++			bits |= AI_START1_Edge;
++		devpriv->stc_writew(dev, bits,
++				    AI_Trigger_Select_Register);
++		break;
++	}
++	}
++
++	mode2 &= ~AI_Pre_Trigger;
++	mode2 &= ~AI_SC_Initial_Load_Source;
++	mode2 &= ~AI_SC_Reload_Mode;
++	devpriv->stc_writew(dev, mode2, AI_Mode_2_Register);
++
++	if (cmd->nb_chan == 1 || (boardtype.reg_type == ni_reg_611x)
++	    || (boardtype.reg_type == ni_reg_6143)) {
++		start_stop_select |= AI_STOP_Polarity;
++		start_stop_select |= AI_STOP_Select(31);/* logic low */
++		start_stop_select |= AI_STOP_Sync;
++	} else {
++		start_stop_select |= AI_STOP_Select(19);/* ai configuration memory */
++	}
++	devpriv->stc_writew(dev, start_stop_select,
++			    AI_START_STOP_Select_Register);
++
++	devpriv->ai_cmd2 = 0;
++	switch (cmd->stop_src) {
++	case TRIG_COUNT:
++		stop_count = cmd->stop_arg - 1;
++
++		if (boardtype.reg_type == ni_reg_611x) {
++			/* have to take 3 stage adc pipeline into account */
++			stop_count += num_adc_stages_611x;
++		}
++		/* stage number of scans */
++		devpriv->stc_writel(dev, stop_count, AI_SC_Load_A_Registers);
++
++		mode1 |= AI_Start_Stop | AI_Mode_1_Reserved | AI_Trigger_Once;
++		devpriv->stc_writew(dev, mode1, AI_Mode_1_Register);
++		/* load SC (Scan Count) */
++		devpriv->stc_writew(dev, AI_SC_Load, AI_Command_1_Register);
++
++		devpriv->ai_continuous = 0;
++		if (stop_count == 0) {
++			devpriv->ai_cmd2 |= AI_End_On_End_Of_Scan;
++			interrupt_a_enable |= AI_STOP_Interrupt_Enable;
++			/* this is required to get the last sample
++			   for nb_chan > 1, not sure why */
++			if (cmd->nb_chan > 1)
++				start_stop_select |=
++					AI_STOP_Polarity | AI_STOP_Edge;
++		}
++		break;
++	case TRIG_NONE:
++		/* stage number of scans */
++		devpriv->stc_writel(dev, 0, AI_SC_Load_A_Registers);
++
++		mode1 |= AI_Start_Stop | AI_Mode_1_Reserved | AI_Continuous;
++		devpriv->stc_writew(dev, mode1, AI_Mode_1_Register);
++
++		/* load SC (Scan Count) */
++		devpriv->stc_writew(dev, AI_SC_Load, AI_Command_1_Register);
++
++		devpriv->ai_continuous = 1;
++
++		break;
++	}
++
++	switch (cmd->scan_begin_src) {
++	case TRIG_TIMER:
++		/*
++		  stop bits for non 611x boards
++		  AI_SI_Special_Trigger_Delay=0
++		  AI_Pre_Trigger=0
++		  AI_START_STOP_Select_Register:
++		  AI_START_Polarity=0 (?)      rising edge
++		  AI_START_Edge=1              edge triggered
++		  AI_START_Sync=1 (?)
++		  AI_START_Select=0            SI_TC
++		  AI_STOP_Polarity=0           rising edge
++		  AI_STOP_Edge=0               level
++		  AI_STOP_Sync=1
++		  AI_STOP_Select=19            external pin (configuration mem)
++		*/
++		start_stop_select |= AI_START_Edge | AI_START_Sync;
++		devpriv->stc_writew(dev, start_stop_select,
++				    AI_START_STOP_Select_Register);
++
++		mode2 |= AI_SI_Reload_Mode(0);
++		/* AI_SI_Initial_Load_Source=A */
++		mode2 &= ~AI_SI_Initial_Load_Source;
++
++		devpriv->stc_writew(dev, mode2, AI_Mode_2_Register);
++
++		/* load SI */
++		timer = ni_ns_to_timer(dev, cmd->scan_begin_arg,
++				       TRIG_ROUND_NEAREST);
++		devpriv->stc_writel(dev, timer, AI_SI_Load_A_Registers);
++		devpriv->stc_writew(dev, AI_SI_Load, AI_Command_1_Register);
++		break;
++	case TRIG_EXT:
++		if (cmd->scan_begin_arg & CR_EDGE)
++			start_stop_select |= AI_START_Edge;
++		/* AI_START_Polarity==1 is falling edge */
++		if (cmd->scan_begin_arg & CR_INVERT)
++			start_stop_select |= AI_START_Polarity;
++		if (cmd->scan_begin_src != cmd->convert_src ||
++		    (cmd->scan_begin_arg & ~CR_EDGE) !=
++		    (cmd->convert_arg & ~CR_EDGE))
++			start_stop_select |= AI_START_Sync;
++		start_stop_select |=
++			AI_START_Select(1 + CR_CHAN(cmd->scan_begin_arg));
++		devpriv->stc_writew(dev, start_stop_select,
++				    AI_START_STOP_Select_Register);
++		break;
++	}
++
++	switch (cmd->convert_src) {
++	case TRIG_TIMER:
++	case TRIG_NOW:
++		if (cmd->convert_arg == 0 || cmd->convert_src == TRIG_NOW)
++			timer = 1;
++		else
++			timer = ni_ns_to_timer(dev, cmd->convert_arg,
++					       TRIG_ROUND_NEAREST);
++		devpriv->stc_writew(dev, 1, AI_SI2_Load_A_Register);	/* 0,0 does not work. */
++		devpriv->stc_writew(dev, timer, AI_SI2_Load_B_Register);
++
++		/* AI_SI2_Reload_Mode = alternate */
++		/* AI_SI2_Initial_Load_Source = A */
++		mode2 &= ~AI_SI2_Initial_Load_Source;
++		mode2 |= AI_SI2_Reload_Mode;
++		devpriv->stc_writew(dev, mode2, AI_Mode_2_Register);
++
++		/* AI_SI2_Load */
++		devpriv->stc_writew(dev, AI_SI2_Load, AI_Command_1_Register);
++
++		mode2 |= AI_SI2_Reload_Mode; /* alternate */
++		mode2 |= AI_SI2_Initial_Load_Source; /* B */
++
++		devpriv->stc_writew(dev, mode2, AI_Mode_2_Register);
++		break;
++	case TRIG_EXT:
++		mode1 |= AI_CONVERT_Source_Select(1 + cmd->convert_arg);
++		if ((cmd->convert_arg & CR_INVERT) == 0)
++			mode1 |= AI_CONVERT_Source_Polarity;
++		devpriv->stc_writew(dev, mode1, AI_Mode_1_Register);
++
++		mode2 |= AI_Start_Stop_Gate_Enable | AI_SC_Gate_Enable;
++		devpriv->stc_writew(dev, mode2, AI_Mode_2_Register);
++
++		break;
++	}
++
++	if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) {
++
++		/* interrupt on FIFO, errors, SC_TC */
++		interrupt_a_enable |= AI_Error_Interrupt_Enable |
++			AI_SC_TC_Interrupt_Enable;
++
++#if (!defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) && \
++     !defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++		interrupt_a_enable |= AI_FIFO_Interrupt_Enable;
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++		if (cmd->flags & TRIG_WAKE_EOS
++		    || (devpriv->ai_cmd2 & AI_End_On_End_Of_Scan)) {
++			/* wake on end-of-scan */
++			devpriv->aimode = AIMODE_SCAN;
++		} else {
++			devpriv->aimode = AIMODE_HALF_FULL;
++		}
++
++		switch (devpriv->aimode) {
++		case AIMODE_HALF_FULL:
++			/* generate FIFO interrupts and DMA requests on half-full */
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++			devpriv->stc_writew(dev, AI_FIFO_Mode_HF_to_E,
++					    AI_Mode_3_Register);
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++			devpriv->stc_writew(dev, AI_FIFO_Mode_HF,
++					    AI_Mode_3_Register);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++			break;
++		case AIMODE_SAMPLE:
++			/* generate FIFO interrupts on non-empty */
++			devpriv->stc_writew(dev, AI_FIFO_Mode_NE,
++					    AI_Mode_3_Register);
++			break;
++		case AIMODE_SCAN:
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++			devpriv->stc_writew(dev, AI_FIFO_Mode_NE,
++					    AI_Mode_3_Register);
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++			devpriv->stc_writew(dev, AI_FIFO_Mode_HF,
++					    AI_Mode_3_Register);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++			interrupt_a_enable |= AI_STOP_Interrupt_Enable;
++			break;
++		default:
++			break;
++		}
++
++		/* Clear interrupts */
++		devpriv->stc_writew(dev,
++				    AI_Error_Interrupt_Ack | AI_STOP_Interrupt_Ack |
++				    AI_START_Interrupt_Ack | AI_START2_Interrupt_Ack |
++				    AI_START1_Interrupt_Ack | AI_SC_TC_Interrupt_Ack |
++				    AI_SC_TC_Error_Confirm, Interrupt_A_Ack_Register);	/* clear interrupts */
++
++		ni_set_bits(dev, Interrupt_A_Enable_Register,
++			    interrupt_a_enable, 1);
++
++		a4l_info(dev, "Interrupt_A_Enable_Register = 0x%04x\n",
++			 devpriv->int_a_enable_reg);
++	} else {
++		/* interrupt on nothing */
++		ni_set_bits(dev, Interrupt_A_Enable_Register, ~0, 0);
++
++		/* XXX start polling if necessary */
++		a4l_warn(dev, "ni_ai_cmd: interrupting on nothing\n");
++	}
++
++	/* end configuration */
++	devpriv->stc_writew(dev, AI_Configuration_End, Joint_Reset_Register);
++
++	switch (cmd->scan_begin_src) {
++	case TRIG_TIMER:
++		devpriv->stc_writew(dev,
++				    AI_SI2_Arm | AI_SI_Arm | AI_DIV_Arm | AI_SC_Arm,
++				    AI_Command_1_Register);
++		break;
++	case TRIG_EXT:
++		/* XXX AI_SI_Arm? */
++		devpriv->stc_writew(dev,
++				    AI_SI2_Arm | AI_SI_Arm | AI_DIV_Arm | AI_SC_Arm,
++				    AI_Command_1_Register);
++		break;
++	}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	{
++		int retval = ni_ai_setup_MITE_dma(subd);
++		if (retval)
++			return retval;
++	}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++	switch (cmd->start_src) {
++	case TRIG_NOW:
++		/* AI_START1_Pulse */
++		devpriv->stc_writew(dev, AI_START1_Pulse | devpriv->ai_cmd2,
++				    AI_Command_2_Register);
++		break;
++	case TRIG_EXT:
++		/* TODO: set trigger callback field to NULL */
++		break;
++	case TRIG_INT:
++		/* TODO: set trigger callback field to ni_ai_inttrig */
++		break;
++	}
++
++	a4l_info(dev, "exit\n");
++
++	return 0;
++}
++
++int ni_ai_config_analog_trig(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int a, b, modebits;
++	int err = 0;
++	uint32_t *data = (uint32_t *)insn->data;
++
++	/* data[1] is flags
++	 * data[2] is analog line
++	 * data[3] is set level
++	 * data[4] is reset level */
++	if (!boardtype.has_analog_trig)
++		return -EINVAL;
++
++	if ((data[1] & 0xffff0000) != A4L_EV_SCAN_BEGIN) {
++		data[1] &= (A4L_EV_SCAN_BEGIN | 0xffff);
++		err++;
++	}
++	if (data[2] >= boardtype.n_adchan) {
++		data[2] = boardtype.n_adchan - 1;
++		err++;
++	}
++	if (data[3] > 255) {	/* a */
++		data[3] = 255;
++		err++;
++	}
++	if (data[4] > 255) {	/* b */
++		data[4] = 255;
++		err++;
++	}
++	/*
++	 * 00 ignore
++	 * 01 set
++	 * 10 reset
++	 *
++	 * modes:
++	 *   1 level:                    +b-   +a-
++	 *     high mode                00 00 01 10
++	 *     low mode                 00 00 10 01
++	 *   2 level: (a<b)
++	 *     hysteresis low mode      10 00 00 01
++	 *     hysteresis high mode     01 00 00 10
++	 *     middle mode              10 01 01 10
++	 */
++
++	a = data[3];
++	b = data[4];
++	modebits = data[1] & 0xff;
++	if (modebits & 0xf0) {
++		/* two level mode */
++		if (b < a) {
++			/* swap order */
++			a = data[4];
++			b = data[3];
++			modebits = ((data[1] & 0xf) << 4) |
++				((data[1] & 0xf0) >> 4);
++		}
++		devpriv->atrig_low = a;
++		devpriv->atrig_high = b;
++		switch (modebits) {
++		case 0x81:	/* low hysteresis mode */
++			devpriv->atrig_mode = 6;
++			break;
++		case 0x42:	/* high hysteresis mode */
++			devpriv->atrig_mode = 3;
++			break;
++		case 0x96:	/* middle window mode */
++			devpriv->atrig_mode = 2;
++			break;
++		default:
++			data[1] &= ~0xff;
++			err++;
++		}
++	} else {
++		/* one level mode */
++		if (b != 0) {
++			data[4] = 0;
++			err++;
++		}
++		switch (modebits) {
++		case 0x06:	/* high window mode */
++			devpriv->atrig_high = a;
++			devpriv->atrig_mode = 0;
++			break;
++		case 0x09:	/* low window mode */
++			devpriv->atrig_low = a;
++			devpriv->atrig_mode = 1;
++			break;
++		default:
++			data[1] &= ~0xff;
++			err++;
++		}
++	}
++
++	if (err)
++		return -EAGAIN;
++
++	return 0;
++}
++
++int ni_ai_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	if (insn->data_size < sizeof(unsigned int))
++		return -EINVAL;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_ANALOG_TRIG:
++		return ni_ai_config_analog_trig(subd, insn);
++	case A4L_INSN_CONFIG_ALT_SOURCE:
++		if (boardtype.reg_type & ni_reg_m_series_mask) {
++			if (data[1] & ~(MSeries_AI_Bypass_Cal_Sel_Pos_Mask |
++					MSeries_AI_Bypass_Cal_Sel_Neg_Mask |
++					MSeries_AI_Bypass_Mode_Mux_Mask |
++					MSeries_AO_Bypass_AO_Cal_Sel_Mask)) {
++				return -EINVAL;
++			}
++			devpriv->ai_calib_source = data[1];
++		} else if (boardtype.reg_type == ni_reg_6143) {
++			unsigned int calib_source;
++
++			calib_source = data[1] & 0xf;
++
++			if (calib_source > 0xF)
++				return -EINVAL;
++
++			devpriv->ai_calib_source = calib_source;
++			ni_writew(calib_source, Calibration_Channel_6143);
++		} else {
++			unsigned int calib_source;
++			unsigned int calib_source_adjust;
++
++			calib_source = data[1] & 0xf;
++			calib_source_adjust = (data[1] >> 4) & 0xff;
++
++			if (calib_source >= 8)
++				return -EINVAL;
++			devpriv->ai_calib_source = calib_source;
++			if (boardtype.reg_type == ni_reg_611x) {
++				ni_writeb(calib_source_adjust,
++					  Cal_Gain_Select_611x);
++			}
++		}
++		return 0;
++	default:
++		break;
++	}
++
++	return -EINVAL;
++}
++
++/* munge data from unsigned to 2's complement for analog output bipolar modes */
++static void ni_ao_munge(struct a4l_subdevice *subd, void *buf, unsigned long size)
++{
++	struct a4l_device *dev = subd->dev;
++	struct a4l_cmd_desc *cmd = a4l_get_cmd(subd);
++	int chan_idx = a4l_get_chan(subd);
++	uint16_t *array = buf;
++	unsigned int i, range, offset;
++
++	offset = 1 << (boardtype.aobits - 1);
++	for (i = 0; i < size / sizeof(uint16_t); i++) {
++
++		range = CR_RNG(cmd->chan_descs[chan_idx]);
++		if (boardtype.ao_unipolar == 0 || (range & 1) == 0)
++			array[i] -= offset;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++		array[i] = cpu_to_le16(array[i]);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++		chan_idx++;
++		chan_idx %= cmd->nb_chan;
++	}
++}
++
++static int ni_m_series_ao_config_chan_descs(struct a4l_subdevice *subd,
++					    unsigned int chanspec[],
++					    unsigned int n_chans, int timed)
++{
++	unsigned int range;
++	unsigned int chan;
++	unsigned int conf;
++	int i, invert = 0;
++	struct a4l_device *dev = subd->dev;
++
++	for (i = 0; i < boardtype.n_aochan; ++i) {
++		ni_writeb(0xf, M_Offset_AO_Waveform_Order(i));
++	}
++	for (i = 0; i < n_chans; i++) {
++		struct a4l_range *rng;
++		int idx;
++		chan = CR_CHAN(chanspec[i]);
++		range = CR_RNG(chanspec[i]);
++
++		/* TODO: this a huge hack!
++		   Something is missing in the kernel API. We must
++		   allow access on the proper range descriptor */
++		idx =  (subd->rng_desc->mode !=
++			A4L_RNG_GLOBAL_RNGDESC) ? chan : 0;
++		rng = &(subd->rng_desc->rngtabs[idx]->rngs[range]);
++
++		invert = 0;
++		conf = 0;
++		switch (rng->max - rng->min) {
++		case 20000000:
++			conf |= MSeries_AO_DAC_Reference_10V_Internal_Bits;
++			ni_writeb(0, M_Offset_AO_Reference_Attenuation(chan));
++			break;
++		case 10000000:
++			conf |= MSeries_AO_DAC_Reference_5V_Internal_Bits;
++			ni_writeb(0, M_Offset_AO_Reference_Attenuation(chan));
++			break;
++		case 4000000:
++			conf |= MSeries_AO_DAC_Reference_10V_Internal_Bits;
++			ni_writeb(MSeries_Attenuate_x5_Bit,
++				  M_Offset_AO_Reference_Attenuation(chan));
++			break;
++		case 2000000:
++			conf |= MSeries_AO_DAC_Reference_5V_Internal_Bits;
++			ni_writeb(MSeries_Attenuate_x5_Bit,
++				  M_Offset_AO_Reference_Attenuation(chan));
++			break;
++		default:
++			a4l_err(subd->dev,
++				"%s: bug! unhandled ao reference voltage\n",
++				__FUNCTION__);
++			break;
++		}
++		switch (rng->max + rng->min) {
++		case 0:
++			conf |= MSeries_AO_DAC_Offset_0V_Bits;
++			break;
++		case 10000000:
++			conf |= MSeries_AO_DAC_Offset_5V_Bits;
++			break;
++		default:
++			a4l_err(subd->dev,
++				"%s: bug! unhandled ao offset voltage\n",
++				__FUNCTION__);
++			break;
++		}
++		if (timed)
++			conf |= MSeries_AO_Update_Timed_Bit;
++		ni_writeb(conf, M_Offset_AO_Config_Bank(chan));
++		devpriv->ao_conf[chan] = conf;
++		ni_writeb(i, M_Offset_AO_Waveform_Order(chan));
++	}
++	return invert;
++}
++
++static int ni_old_ao_config_chan_descs(struct a4l_subdevice *subd,
++				       unsigned int chanspec[],
++				       unsigned int n_chans)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int range;
++	unsigned int chan;
++	unsigned int conf;
++	int i, invert = 0;
++
++	for (i = 0; i < n_chans; i++) {
++		chan = CR_CHAN(chanspec[i]);
++		range = CR_RNG(chanspec[i]);
++		conf = AO_Channel(chan);
++
++		if (boardtype.ao_unipolar) {
++			if ((range & 1) == 0) {
++				conf |= AO_Bipolar;
++				invert = (1 << (boardtype.aobits - 1));
++			} else {
++				invert = 0;
++			}
++			if (range & 2)
++				conf |= AO_Ext_Ref;
++		} else {
++			conf |= AO_Bipolar;
++			invert = (1 << (boardtype.aobits - 1));
++		}
++
++		/* not all boards can deglitch, but this shouldn't hurt */
++		if (chanspec[i] & CR_DEGLITCH)
++			conf |= AO_Deglitch;
++
++		/* analog reference */
++		/* AREF_OTHER connects AO ground to AI ground, i think */
++		conf |= (CR_AREF(chanspec[i]) ==
++			 AREF_OTHER) ? AO_Ground_Ref : 0;
++
++		ni_writew(conf, AO_Configuration);
++		devpriv->ao_conf[chan] = conf;
++	}
++	return invert;
++}
++
++static int ni_ao_config_chan_descs(struct a4l_subdevice *subd,
++				   unsigned int chanspec[],
++				   unsigned int n_chans, int timed)
++{
++	struct a4l_device *dev = subd->dev;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		return ni_m_series_ao_config_chan_descs(subd,
++							chanspec,
++							n_chans, timed);
++	else
++		return ni_old_ao_config_chan_descs(subd, chanspec, n_chans);
++}
++
++int ni_ao_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	data[0] = devpriv->ao[CR_CHAN(insn->chan_desc)];
++
++	return 0;
++}
++
++int ni_ao_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int chan = CR_CHAN(insn->chan_desc);
++	uint16_t *data = (uint16_t *)insn->data;
++	unsigned int invert;
++
++	invert = ni_ao_config_chan_descs(subd,
++					 &insn->chan_desc, 1, 0);
++
++	devpriv->ao[chan] = data[0];
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		ni_writew(data[0], M_Offset_DAC_Direct_Data(chan));
++	} else
++		ni_writew(data[0] ^ invert,
++			  (chan) ? DAC1_Direct_Data : DAC0_Direct_Data);
++
++	return 0;
++}
++
++int ni_ao_insn_write_671x(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int chan = CR_CHAN(insn->chan_desc);
++	uint16_t *data = (uint16_t *)insn->data;
++	unsigned int invert;
++
++	ao_win_out(1 << chan, AO_Immediate_671x);
++	invert = 1 << (boardtype.aobits - 1);
++
++	ni_ao_config_chan_descs(subd, &insn->chan_desc, 1, 0);
++
++	devpriv->ao[chan] = data[0];
++	ao_win_out(data[0] ^ invert, DACx_Direct_Data_671x(chan));
++
++	return 0;
++}
++
++int ni_ao_inttrig(struct a4l_subdevice *subd, lsampl_t trignum)
++{
++	struct a4l_device *dev = subd->dev;
++	int ret, interrupt_b_bits, i;
++	static const int timeout = 1000;
++
++	if (trignum != 0)
++		return -EINVAL;
++
++	/* TODO: disable trigger until a command is recorded.
++	   Null trig at beginning prevent ao start trigger from executing
++	   more than once per command (and doing things like trying to
++	   allocate the ao dma channel multiple times) */
++
++	ni_set_bits(dev, Interrupt_B_Enable_Register,
++		    AO_FIFO_Interrupt_Enable | AO_Error_Interrupt_Enable, 0);
++	interrupt_b_bits = AO_Error_Interrupt_Enable;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	devpriv->stc_writew(dev, 1, DAC_FIFO_Clear);
++	if (boardtype.reg_type & ni_reg_6xxx_mask)
++		ni_ao_win_outl(dev, 0x6, AO_FIFO_Offset_Load_611x);
++	ret = ni_ao_setup_MITE_dma(subd);
++	if (ret)
++		return ret;
++	ret = ni_ao_wait_for_dma_load(subd);
++	if (ret < 0)
++		return ret;
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++	ret = ni_ao_prep_fifo(subd);
++	if (ret == 0)
++		return -EPIPE;
++
++	interrupt_b_bits |= AO_FIFO_Interrupt_Enable;
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++	devpriv->stc_writew(dev, devpriv->ao_mode3 | AO_Not_An_UPDATE,
++			    AO_Mode_3_Register);
++	devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register);
++	/* wait for DACs to be loaded */
++	for (i = 0; i < timeout; i++) {
++		a4l_udelay(1);
++		if ((devpriv->stc_readw(dev,Joint_Status_2_Register) &
++		     AO_TMRDACWRs_In_Progress_St) == 0)
++			break;
++	}
++	if (i == timeout) {
++		a4l_err(dev,
++			"ni_ao_inttrig: timed out "
++			"waiting for AO_TMRDACWRs_In_Progress_St to clear");
++		return -EIO;
++	}
++	/* stc manual says we are need to clear error interrupt after
++	   AO_TMRDACWRs_In_Progress_St clears */
++	devpriv->stc_writew(dev, AO_Error_Interrupt_Ack,
++			    Interrupt_B_Ack_Register);
++
++	ni_set_bits(dev, Interrupt_B_Enable_Register, interrupt_b_bits, 1);
++
++	devpriv->stc_writew(dev,
++			    devpriv->ao_cmd1 |
++			    AO_UI_Arm | AO_UC_Arm |
++			    AO_BC_Arm | AO_DAC1_Update_Mode |
++			    AO_DAC0_Update_Mode,
++			    AO_Command_1_Register);
++
++	devpriv->stc_writew(dev,
++			    devpriv->ao_cmd2 | AO_START1_Pulse,
++			    AO_Command_2_Register);
++
++	return 0;
++}
++
++int ni_ao_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	int bits;
++	int i;
++	unsigned trigvar;
++
++	if (a4l_get_irq(dev) == A4L_IRQ_UNUSED) {
++		a4l_err(dev, "ni_ao_cmd: cannot run command without an irq");
++		return -EIO;
++	}
++
++	devpriv->stc_writew(dev, AO_Configuration_Start, Joint_Reset_Register);
++
++	devpriv->stc_writew(dev, AO_Disarm, AO_Command_1_Register);
++
++	if (boardtype.reg_type & ni_reg_6xxx_mask) {
++		ao_win_out(CLEAR_WG, AO_Misc_611x);
++
++		bits = 0;
++		for (i = 0; i < cmd->nb_chan; i++) {
++			int chan;
++
++			chan = CR_CHAN(cmd->chan_descs[i]);
++			bits |= 1 << chan;
++			ao_win_out(chan, AO_Waveform_Generation_611x);
++		}
++		ao_win_out(bits, AO_Timed_611x);
++	}
++
++	ni_ao_config_chan_descs(subd, cmd->chan_descs, cmd->nb_chan, 1);
++
++	if (cmd->stop_src == TRIG_NONE) {
++		devpriv->ao_mode1 |= AO_Continuous;
++		devpriv->ao_mode1 &= ~AO_Trigger_Once;
++	} else {
++		devpriv->ao_mode1 &= ~AO_Continuous;
++		devpriv->ao_mode1 |= AO_Trigger_Once;
++	}
++	devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register);
++	devpriv->ao_trigger_select &=
++		~(AO_START1_Polarity | AO_START1_Select(-1));
++	devpriv->ao_trigger_select |= AO_START1_Edge | AO_START1_Sync;
++	devpriv->stc_writew(dev, devpriv->ao_trigger_select,
++			    AO_Trigger_Select_Register);
++	devpriv->ao_mode3 &= ~AO_Trigger_Length;
++	devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register);
++
++	devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register);
++	devpriv->ao_mode2 &= ~AO_BC_Initial_Load_Source;
++	devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register);
++	if (cmd->stop_src == TRIG_NONE) {
++		devpriv->stc_writel(dev, 0xffffff, AO_BC_Load_A_Register);
++	} else {
++		devpriv->stc_writel(dev, 0, AO_BC_Load_A_Register);
++	}
++	devpriv->stc_writew(dev, AO_BC_Load, AO_Command_1_Register);
++	devpriv->ao_mode2 &= ~AO_UC_Initial_Load_Source;
++	devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register);
++	switch (cmd->stop_src) {
++	case TRIG_COUNT:
++		devpriv->stc_writel(dev, cmd->stop_arg, AO_UC_Load_A_Register);
++		devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register);
++		devpriv->stc_writel(dev, cmd->stop_arg - 1,
++				    AO_UC_Load_A_Register);
++		break;
++	case TRIG_NONE:
++		devpriv->stc_writel(dev, 0xffffff, AO_UC_Load_A_Register);
++		devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register);
++		devpriv->stc_writel(dev, 0xffffff, AO_UC_Load_A_Register);
++		break;
++	default:
++		devpriv->stc_writel(dev, 0, AO_UC_Load_A_Register);
++		devpriv->stc_writew(dev, AO_UC_Load, AO_Command_1_Register);
++		devpriv->stc_writel(dev, cmd->stop_arg, AO_UC_Load_A_Register);
++	}
++
++	devpriv->ao_mode1 &=
++		~(AO_UI_Source_Select(0x1f) | AO_UI_Source_Polarity |
++		  AO_UPDATE_Source_Select(0x1f) | AO_UPDATE_Source_Polarity);
++	switch (cmd->scan_begin_src) {
++	case TRIG_TIMER:
++		devpriv->ao_cmd2 &= ~AO_BC_Gate_Enable;
++		trigvar =
++			ni_ns_to_timer(dev, cmd->scan_begin_arg,
++				       TRIG_ROUND_NEAREST);
++		devpriv->stc_writel(dev, 1, AO_UI_Load_A_Register);
++		devpriv->stc_writew(dev, AO_UI_Load, AO_Command_1_Register);
++		devpriv->stc_writel(dev, trigvar, AO_UI_Load_A_Register);
++		break;
++	case TRIG_EXT:
++		devpriv->ao_mode1 |=
++			AO_UPDATE_Source_Select(cmd->scan_begin_arg);
++		if (cmd->scan_begin_arg & CR_INVERT)
++			devpriv->ao_mode1 |= AO_UPDATE_Source_Polarity;
++		devpriv->ao_cmd2 |= AO_BC_Gate_Enable;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	devpriv->stc_writew(dev, devpriv->ao_cmd2, AO_Command_2_Register);
++	devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register);
++	devpriv->ao_mode2 &=
++		~(AO_UI_Reload_Mode(3) | AO_UI_Initial_Load_Source);
++	devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register);
++
++	if ((boardtype.reg_type & ni_reg_6xxx_mask) == 0) {
++		if (cmd->scan_end_arg > 1) {
++			devpriv->ao_mode1 |= AO_Multiple_Channels;
++			devpriv->stc_writew(dev,
++					    AO_Number_Of_Channels(cmd->scan_end_arg - 1) |
++					    AO_UPDATE_Output_Select
++					    (AO_Update_Output_High_Z),
++					    AO_Output_Control_Register);
++		} else {
++			unsigned int bits;
++			devpriv->ao_mode1 &= ~AO_Multiple_Channels;
++			bits = AO_UPDATE_Output_Select(AO_Update_Output_High_Z);
++			if (boardtype.reg_type & ni_reg_m_series_mask) {
++				bits |= AO_Number_Of_Channels(0);
++			} else {
++				bits |= AO_Number_Of_Channels(CR_CHAN(cmd->
++								      chan_descs[0]));
++			}
++			devpriv->stc_writew(dev, bits,
++					    AO_Output_Control_Register);
++		}
++		devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register);
++	}
++
++	devpriv->stc_writew(dev, AO_DAC0_Update_Mode | AO_DAC1_Update_Mode,
++			    AO_Command_1_Register);
++
++	devpriv->ao_mode3 |= AO_Stop_On_Overrun_Error;
++	devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register);
++
++	devpriv->ao_mode2 &= ~AO_FIFO_Mode_Mask;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	devpriv->ao_mode2 |= AO_FIFO_Mode_HF_to_F;
++#else /* !CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++	devpriv->ao_mode2 |= AO_FIFO_Mode_HF;
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++	devpriv->ao_mode2 &= ~AO_FIFO_Retransmit_Enable;
++	devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register);
++
++	bits = AO_BC_Source_Select | AO_UPDATE_Pulse_Width |
++		AO_TMRDACWR_Pulse_Width;
++	if (boardtype.ao_fifo_depth)
++		bits |= AO_FIFO_Enable;
++	else
++		bits |= AO_DMA_PIO_Control;
++#if 0
++	/* F Hess: windows driver does not set AO_Number_Of_DAC_Packages bit for 6281,
++	   verified with bus analyzer. */
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		bits |= AO_Number_Of_DAC_Packages;
++#endif
++	devpriv->stc_writew(dev, bits, AO_Personal_Register);
++	/* enable sending of ao dma requests */
++	devpriv->stc_writew(dev, AO_AOFREQ_Enable, AO_Start_Select_Register);
++
++	devpriv->stc_writew(dev, AO_Configuration_End, Joint_Reset_Register);
++
++	if (cmd->stop_src == TRIG_COUNT) {
++		devpriv->stc_writew(dev, AO_BC_TC_Interrupt_Ack,
++				    Interrupt_B_Ack_Register);
++		ni_set_bits(dev, Interrupt_B_Enable_Register,
++			    AO_BC_TC_Interrupt_Enable, 1);
++	}
++
++	return 0;
++}
++
++struct a4l_cmd_desc mio_ao_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_INT,
++	.scan_begin_src = TRIG_TIMER | TRIG_EXT,
++	.convert_src = TRIG_NOW,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_COUNT | TRIG_NONE,
++};
++
++int ni_ao_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	/* Make sure trigger sources are unique and mutually compatible */
++
++	if (cmd->stop_src != TRIG_COUNT && cmd->stop_src != TRIG_NONE)
++		return -EINVAL;
++
++	/* Make sure arguments are trivially compatible */
++
++	if (cmd->start_arg != 0) {
++		cmd->start_arg = 0;
++		return -EINVAL;
++	}
++
++	if (cmd->scan_begin_src == TRIG_TIMER) {
++		if (cmd->scan_begin_arg < boardtype.ao_speed) {
++			cmd->scan_begin_arg = boardtype.ao_speed;
++			return -EINVAL;
++		}
++		if (cmd->scan_begin_arg > devpriv->clock_ns * 0xffffff) {
++			/* XXX check */
++			cmd->scan_begin_arg = devpriv->clock_ns * 0xffffff;
++			return -EINVAL;
++		}
++	}
++
++	if (cmd->convert_arg != 0) {
++		cmd->convert_arg = 0;
++		return -EINVAL;
++	}
++	if (cmd->scan_end_arg != cmd->nb_chan) {
++		cmd->scan_end_arg = cmd->nb_chan;
++		return -EINVAL;
++	}
++	if (cmd->stop_src == TRIG_COUNT) {
++		/* XXX check */
++		if (cmd->stop_arg > 0x00ffffff) {
++			cmd->stop_arg = 0x00ffffff;
++			return -EINVAL;
++		}
++	} else {
++		/* TRIG_NONE */
++		if (cmd->stop_arg != 0) {
++			cmd->stop_arg = 0;
++			return -EINVAL;
++		}
++	}
++
++	/* step 4: fix up any arguments */
++	if (cmd->scan_begin_src == TRIG_TIMER) {
++
++		if(cmd->scan_begin_arg !=
++		   ni_timer_to_ns(dev,
++				  ni_ns_to_timer(dev,
++						 cmd->scan_begin_arg,
++						 cmd->flags & TRIG_ROUND_MASK)))
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++void ni_ao_reset(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++
++	ni_release_ao_mite_channel(dev);
++
++	devpriv->stc_writew(dev, AO_Configuration_Start, Joint_Reset_Register);
++	devpriv->stc_writew(dev, AO_Disarm, AO_Command_1_Register);
++	ni_set_bits(dev, Interrupt_B_Enable_Register, ~0, 0);
++	devpriv->stc_writew(dev, AO_BC_Source_Select, AO_Personal_Register);
++	devpriv->stc_writew(dev, 0x3f98, Interrupt_B_Ack_Register);
++	devpriv->stc_writew(dev, AO_BC_Source_Select | AO_UPDATE_Pulse_Width |
++			    AO_TMRDACWR_Pulse_Width, AO_Personal_Register);
++	devpriv->stc_writew(dev, 0, AO_Output_Control_Register);
++	devpriv->stc_writew(dev, 0, AO_Start_Select_Register);
++	devpriv->ao_cmd1 = 0;
++	devpriv->stc_writew(dev, devpriv->ao_cmd1, AO_Command_1_Register);
++	devpriv->ao_cmd2 = 0;
++	devpriv->stc_writew(dev, devpriv->ao_cmd2, AO_Command_2_Register);
++	devpriv->ao_mode1 = 0;
++	devpriv->stc_writew(dev, devpriv->ao_mode1, AO_Mode_1_Register);
++	devpriv->ao_mode2 = 0;
++	devpriv->stc_writew(dev, devpriv->ao_mode2, AO_Mode_2_Register);
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		devpriv->ao_mode3 = AO_Last_Gate_Disable;
++	else
++		devpriv->ao_mode3 = 0;
++	devpriv->stc_writew(dev, devpriv->ao_mode3, AO_Mode_3_Register);
++	devpriv->ao_trigger_select = 0;
++	devpriv->stc_writew(dev, devpriv->ao_trigger_select,
++			    AO_Trigger_Select_Register);
++	if (boardtype.reg_type & ni_reg_6xxx_mask) {
++		ao_win_out(0x3, AO_Immediate_671x);
++		ao_win_out(CLEAR_WG, AO_Misc_611x);
++	}
++	devpriv->stc_writew(dev, AO_Configuration_End, Joint_Reset_Register);
++}
++
++/* digital io */
++
++int ni_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int *)insn->data;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "chan=%d io=%d\n", CR_CHAN(insn->chan_desc), data[0]);
++#endif /* CONFIG_DEBUG_DIO */
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		devpriv->io_bits |= 1 << CR_CHAN(insn->chan_desc);
++		break;
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		devpriv->io_bits &= ~(1 << CR_CHAN(insn->chan_desc));
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (devpriv->io_bits &
++			   (1 << CR_CHAN(insn->chan_desc))) ?
++			A4L_OUTPUT : A4L_INPUT;
++		return 0;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	devpriv->dio_control &= ~DIO_Pins_Dir_Mask;
++	devpriv->dio_control |= DIO_Pins_Dir(devpriv->io_bits);
++	devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register);
++
++	return 1;
++}
++
++int ni_dio_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]);
++#endif
++
++	if (insn->data_size != 2 * sizeof(uint8_t))
++		return -EINVAL;
++
++	if (data[0]) {
++		/* Perform check to make sure we're not using the
++		   serial part of the dio */
++		if ((data[0] & (DIO_SDIN | DIO_SDOUT))
++		    && devpriv->serial_interval_ns)
++			return -EBUSY;
++
++		devpriv->dio_state &= ~data[0];
++		devpriv->dio_state |= (data[0] & data[1]);
++		devpriv->dio_output &= ~DIO_Parallel_Data_Mask;
++		devpriv->dio_output |=
++			DIO_Parallel_Data_Out(devpriv->dio_state);
++		devpriv->stc_writew(dev, devpriv->dio_output,
++				    DIO_Output_Register);
++	}
++
++	data[1] = (uint8_t)
++		devpriv->stc_readw(dev, DIO_Parallel_Input_Register);
++
++	return 0;
++}
++
++int ni_m_series_dio_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int *)insn->data;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "chan=%d io=%d\n", CR_CHAN(insn->chan_desc), data[0]);
++#endif
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_DIO_OUTPUT:
++		devpriv->io_bits |= 1 << CR_CHAN(insn->chan_desc);
++		break;
++	case A4L_INSN_CONFIG_DIO_INPUT:
++		devpriv->io_bits &= ~(1 << CR_CHAN(insn->chan_desc));
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (devpriv->io_bits &
++			   (1 << CR_CHAN(insn->chan_desc))) ?
++			A4L_OUTPUT : A4L_INPUT;
++		return 0;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	ni_writel(devpriv->io_bits, M_Offset_DIO_Direction);
++
++	return 0;
++}
++
++int ni_m_series_dio_insn_bits_8(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]);
++#endif
++
++	if (insn->data_size != 2 * sizeof(uint8_t))
++		return -EINVAL;
++
++	if (data[0]) {
++		devpriv->dio_state &= ~data[0];
++		devpriv->dio_state |= (data[0] & data[1]);
++		ni_writel(devpriv->dio_state, M_Offset_Static_Digital_Output);
++	}
++
++	data[1] = (uint8_t) ni_readl(M_Offset_Static_Digital_Input);
++
++	return 0;
++}
++
++int ni_m_series_dio_insn_bits_32(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint32_t *data = (uint32_t *)insn->data;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "mask=0x%x bits=0x%x\n", data[0], data[1]);
++#endif
++
++	if (insn->data_size != 2 * sizeof(uint32_t))
++		return -EINVAL;
++
++	if (data[0]) {
++		devpriv->dio_state &= ~data[0];
++		devpriv->dio_state |= (data[0] & data[1]);
++		ni_writel(devpriv->dio_state, M_Offset_Static_Digital_Output);
++	}
++
++	data[1] = ni_readl(M_Offset_Static_Digital_Input);
++
++	return 0;
++}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++struct a4l_cmd_desc mio_dio_cmd_mask = {
++	.idx_subd = 0,
++	.start_src = TRIG_INT,
++	.scan_begin_src = TRIG_EXT,
++	.convert_src = TRIG_NOW,
++	.scan_end_src = TRIG_COUNT,
++	.stop_src = TRIG_NONE,
++};
++
++int ni_cdio_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	unsigned int i;
++
++	/* Make sure arguments are trivially compatible */
++
++	if (cmd->start_arg != 0) {
++		cmd->start_arg = 0;
++		return -EINVAL;
++	}
++
++	if ((cmd->scan_begin_arg &
++	     PACK_FLAGS(CDO_Sample_Source_Select_Mask, 0, 0, CR_INVERT)) !=
++	    cmd->scan_begin_arg)
++		return -EINVAL;
++
++	if (cmd->convert_arg != 0) {
++		cmd->convert_arg = 0;
++		return -EINVAL;
++	}
++
++	if (cmd->scan_end_arg != cmd->nb_chan) {
++		cmd->scan_end_arg = cmd->nb_chan;
++		return -EINVAL;
++	}
++
++	if (cmd->stop_arg != 0) {
++		cmd->stop_arg = 0;
++		return -EINVAL;
++	}
++
++	/* Check chan_descs */
++
++	for (i = 0; i < cmd->nb_chan; ++i) {
++		if (cmd->chan_descs[i] != i)
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++int ni_cdio_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned cdo_mode_bits = CDO_FIFO_Mode_Bit | CDO_Halt_On_Error_Bit;
++
++	ni_writel(CDO_Reset_Bit, M_Offset_CDIO_Command);
++	switch (cmd->scan_begin_src) {
++	case TRIG_EXT:
++		cdo_mode_bits |=
++			CR_CHAN(cmd->scan_begin_arg) &
++			CDO_Sample_Source_Select_Mask;
++		break;
++	default:
++		BUG();
++		break;
++	}
++	if (cmd->scan_begin_arg & CR_INVERT)
++		cdo_mode_bits |= CDO_Polarity_Bit;
++	ni_writel(cdo_mode_bits, M_Offset_CDO_Mode);
++
++	if (devpriv->io_bits) {
++		ni_writel(devpriv->dio_state, M_Offset_CDO_FIFO_Data);
++		ni_writel(CDO_SW_Update_Bit, M_Offset_CDIO_Command);
++		ni_writel(devpriv->io_bits, M_Offset_CDO_Mask_Enable);
++	} else {
++		a4l_err(dev,
++			"ni_cdio_cmd: attempted to run digital "
++			"output command with no lines configured as outputs");
++		return -EIO;
++	}
++
++	return 0;
++}
++
++void ni_cdio_cancel(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	ni_writel(CDO_Disarm_Bit | CDO_Error_Interrupt_Enable_Clear_Bit |
++		  CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit |
++		  CDO_FIFO_Request_Interrupt_Enable_Clear_Bit,
++		  M_Offset_CDIO_Command);
++
++	ni_writel(0, M_Offset_CDO_Mask_Enable);
++	ni_release_cdo_mite_channel(dev);
++}
++
++int ni_cdo_inttrig(struct a4l_subdevice *subd, lsampl_t trignum)
++{
++	struct a4l_device *dev = subd->dev;
++	int err;
++	unsigned i;
++	const unsigned timeout = 1000;
++
++	/* TODO: disable trigger until a command is recorded.
++	   Null trig at beginning prevent ao start trigger from executing
++	   more than once per command (and doing things like trying to
++	   allocate the ao dma channel multiple times) */
++
++	err = ni_cdo_setup_MITE_dma(subd);
++	if (err < 0)
++		return err;
++
++	/* wait for dma to fill output fifo */
++	for (i = 0; i < timeout; ++i) {
++		if (ni_readl(M_Offset_CDIO_Status) & CDO_FIFO_Full_Bit)
++			break;
++		a4l_udelay(10);
++	}
++
++	if (i == timeout) {
++		a4l_err(dev, "ni_cdo_inttrig: dma failed to fill cdo fifo!");
++		ni_cdio_cancel(subd);
++		return -EIO;
++	}
++
++	ni_writel(CDO_Arm_Bit |
++		  CDO_Error_Interrupt_Enable_Set_Bit |
++		  CDO_Empty_FIFO_Interrupt_Enable_Set_Bit,
++		  M_Offset_CDIO_Command);
++
++	return 0;
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++static void handle_cdio_interrupt(struct a4l_device *dev)
++{
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++	unsigned cdio_status;
++	unsigned long flags;
++	struct a4l_subdevice *subd = a4l_get_subd(dev, NI_DIO_SUBDEV);
++
++	if ((boardtype.reg_type & ni_reg_m_series_mask) == 0) {
++		return;
++	}
++	rtdm_lock_get_irqsave(&devpriv->mite_channel_lock, flags);
++	if (devpriv->cdo_mite_chan) {
++		unsigned cdo_mite_status =
++			a4l_mite_get_status(devpriv->cdo_mite_chan);
++		if (cdo_mite_status & CHSR_LINKC) {
++			writel(CHOR_CLRLC,
++			       devpriv->mite->mite_io_addr +
++			       MITE_CHOR(devpriv->cdo_mite_chan->channel));
++		}
++		a4l_mite_sync_output_dma(devpriv->cdo_mite_chan, subd);
++	}
++	rtdm_lock_put_irqrestore(&devpriv->mite_channel_lock, flags);
++
++	cdio_status = ni_readl(M_Offset_CDIO_Status);
++	if (cdio_status & (CDO_Overrun_Bit | CDO_Underflow_Bit)) {
++		/* XXX just guessing this is needed and does something useful */
++		ni_writel(CDO_Error_Interrupt_Confirm_Bit, M_Offset_CDIO_Command);
++		a4l_buf_evt(subd, A4L_BUF_ERROR);
++	}
++	if (cdio_status & CDO_FIFO_Empty_Bit) {
++		ni_writel(CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit,
++			  M_Offset_CDIO_Command);
++	}
++	a4l_buf_evt(subd, 0);
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++}
++
++static int ni_serial_hw_readwrite8(struct a4l_device * dev,
++				   unsigned char data_out, unsigned char *data_in)
++{
++	unsigned int status1;
++	int err = 0, count = 20;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "outputting 0x%x\n", data_out);
++#endif
++
++	devpriv->dio_output &= ~DIO_Serial_Data_Mask;
++	devpriv->dio_output |= DIO_Serial_Data_Out(data_out);
++	devpriv->stc_writew(dev, devpriv->dio_output, DIO_Output_Register);
++
++	status1 = devpriv->stc_readw(dev, Joint_Status_1_Register);
++	if (status1 & DIO_Serial_IO_In_Progress_St) {
++		err = -EBUSY;
++		goto Error;
++	}
++
++	devpriv->dio_control |= DIO_HW_Serial_Start;
++	devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register);
++	devpriv->dio_control &= ~DIO_HW_Serial_Start;
++
++	/* Wait until STC says we're done, but don't loop infinitely. */
++	while ((status1 =
++		devpriv->stc_readw(dev,
++				   Joint_Status_1_Register)) &
++	       DIO_Serial_IO_In_Progress_St) {
++		/* Delay one bit per loop */
++		a4l_udelay((devpriv->serial_interval_ns + 999) / 1000);
++		if (--count < 0) {
++			a4l_err(dev,
++				"ni_serial_hw_readwrite8: "
++				"SPI serial I/O didn't finish in time!\n");
++			err = -ETIME;
++			goto Error;
++		}
++	}
++
++	/* Delay for last bit. This delay is absolutely necessary, because
++	   DIO_Serial_IO_In_Progress_St goes high one bit too early. */
++	a4l_udelay((devpriv->serial_interval_ns + 999) / 1000);
++
++	if (data_in != NULL) {
++		*data_in = devpriv->stc_readw(dev, DIO_Serial_Input_Register);
++#ifdef CONFIG_DEBUG_DIO
++		a4l_info(dev, "inputted 0x%x\n", *data_in);
++#endif
++	}
++
++Error:
++	devpriv->stc_writew(dev, devpriv->dio_control, DIO_Control_Register);
++
++	return err;
++}
++
++static int ni_serial_sw_readwrite8(struct a4l_device * dev,
++				   unsigned char data_out, unsigned char *data_in)
++{
++	unsigned char mask, input = 0;
++
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "outputting 0x%x\n", data_out);
++#endif
++
++	/* Wait for one bit before transfer */
++	a4l_udelay((devpriv->serial_interval_ns + 999) / 1000);
++
++	for (mask = 0x80; mask; mask >>= 1) {
++		/* Output current bit; note that we cannot touch devpriv->dio_state
++		   because it is a per-subdevice field, and serial is
++		   a separate subdevice from DIO. */
++		devpriv->dio_output &= ~DIO_SDOUT;
++		if (data_out & mask) {
++			devpriv->dio_output |= DIO_SDOUT;
++		}
++		devpriv->stc_writew(dev, devpriv->dio_output,
++				    DIO_Output_Register);
++
++		/* Assert SDCLK (active low, inverted), wait for half of
++		   the delay, deassert SDCLK, and wait for the other half. */
++		devpriv->dio_control |= DIO_Software_Serial_Control;
++		devpriv->stc_writew(dev, devpriv->dio_control,
++				    DIO_Control_Register);
++
++		a4l_udelay((devpriv->serial_interval_ns + 999) / 2000);
++
++		devpriv->dio_control &= ~DIO_Software_Serial_Control;
++		devpriv->stc_writew(dev, devpriv->dio_control,
++				    DIO_Control_Register);
++
++		a4l_udelay((devpriv->serial_interval_ns + 999) / 2000);
++
++		/* Input current bit */
++		if (devpriv->stc_readw(dev,
++				       DIO_Parallel_Input_Register) & DIO_SDIN) {
++			input |= mask;
++		}
++	}
++#ifdef CONFIG_DEBUG_DIO
++	a4l_info(dev, "inputted 0x%x\n", input);
++#endif
++	if (data_in)
++		*data_in = input;
++
++	return 0;
++}
++
++int ni_serial_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	int err = 0;
++	unsigned char byte_out, byte_in = 0;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	if (insn->data_size != 2 * sizeof(unsigned int))
++		return -EINVAL;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_SERIAL_CLOCK:
++
++#ifdef CONFIG_DEBUG_DIO
++		a4l_info(dev, "SPI serial clock Config %d\n", data[1]);
++#endif
++
++		devpriv->serial_hw_mode = 1;
++		devpriv->dio_control |= DIO_HW_Serial_Enable;
++
++		if (data[1] == SERIAL_DISABLED) {
++			devpriv->serial_hw_mode = 0;
++			devpriv->dio_control &= ~(DIO_HW_Serial_Enable |
++						  DIO_Software_Serial_Control);
++			data[1] = SERIAL_DISABLED;
++			devpriv->serial_interval_ns = data[1];
++		} else if (data[1] <= SERIAL_600NS) {
++			/* Warning: this clock speed is too fast to reliably
++			   control SCXI. */
++			devpriv->dio_control &= ~DIO_HW_Serial_Timebase;
++			devpriv->clock_and_fout |= Slow_Internal_Timebase;
++			devpriv->clock_and_fout &= ~DIO_Serial_Out_Divide_By_2;
++			data[1] = SERIAL_600NS;
++			devpriv->serial_interval_ns = data[1];
++		} else if (data[1] <= SERIAL_1_2US) {
++			devpriv->dio_control &= ~DIO_HW_Serial_Timebase;
++			devpriv->clock_and_fout |= Slow_Internal_Timebase |
++				DIO_Serial_Out_Divide_By_2;
++			data[1] = SERIAL_1_2US;
++			devpriv->serial_interval_ns = data[1];
++		} else if (data[1] <= SERIAL_10US) {
++			devpriv->dio_control |= DIO_HW_Serial_Timebase;
++			devpriv->clock_and_fout |= Slow_Internal_Timebase |
++				DIO_Serial_Out_Divide_By_2;
++			/* Note: DIO_Serial_Out_Divide_By_2 only affects
++			   600ns/1.2us. If you turn divide_by_2 off with the
++			   slow clock, you will still get 10us, except then
++			   all your delays are wrong. */
++			data[1] = SERIAL_10US;
++			devpriv->serial_interval_ns = data[1];
++		} else {
++			devpriv->dio_control &= ~(DIO_HW_Serial_Enable |
++						  DIO_Software_Serial_Control);
++			devpriv->serial_hw_mode = 0;
++			data[1] = (data[1] / 1000) * 1000;
++			devpriv->serial_interval_ns = data[1];
++		}
++
++		devpriv->stc_writew(dev, devpriv->dio_control,
++				    DIO_Control_Register);
++		devpriv->stc_writew(dev, devpriv->clock_and_fout,
++				    Clock_and_FOUT_Register);
++		return 0;
++
++		break;
++
++	case A4L_INSN_CONFIG_BIDIRECTIONAL_DATA:
++
++		if (devpriv->serial_interval_ns == 0) {
++			return -EINVAL;
++		}
++
++		byte_out = data[1] & 0xFF;
++
++		if (devpriv->serial_hw_mode) {
++			err = ni_serial_hw_readwrite8(dev, byte_out, &byte_in);
++		} else if (devpriv->serial_interval_ns > 0) {
++			err = ni_serial_sw_readwrite8(dev, byte_out, &byte_in);
++		} else {
++			a4l_err(dev,
++				"ni_serial_insn_config: serial disabled!\n");
++			return -EINVAL;
++		}
++		if (err < 0)
++			return err;
++		data[1] = byte_in & 0xFF;
++		return 0;
++
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	return -EINVAL;
++}
++
++void mio_common_detach(struct a4l_device * dev)
++{
++	if (dev->priv) {
++		if (devpriv->counter_dev) {
++			a4l_ni_gpct_device_destroy(devpriv->counter_dev);
++		}
++	}
++}
++
++static void init_ao_67xx(struct a4l_device * dev)
++{
++	struct a4l_subdevice *subd = a4l_get_subd(dev, NI_AO_SUBDEV);
++	int i;
++
++	if (subd == NULL) {
++		a4l_err(dev, "%s: unable to find AO subdevice\n", __FUNCTION__);
++		return;
++	}
++
++	for (i = 0; i < subd->chan_desc->length; i++)
++		ni_ao_win_outw(dev, AO_Channel(i) | 0x0,
++			       AO_Configuration_2_67xx);
++}
++
++static unsigned int ni_gpct_to_stc_register(enum ni_gpct_register reg)
++{
++	unsigned stc_register;
++	switch (reg) {
++	case NITIO_G0_Autoincrement_Reg:
++		stc_register = G_Autoincrement_Register(0);
++		break;
++	case NITIO_G1_Autoincrement_Reg:
++		stc_register = G_Autoincrement_Register(1);
++		break;
++	case NITIO_G0_Command_Reg:
++		stc_register = G_Command_Register(0);
++		break;
++	case NITIO_G1_Command_Reg:
++		stc_register = G_Command_Register(1);
++		break;
++	case NITIO_G0_HW_Save_Reg:
++		stc_register = G_HW_Save_Register(0);
++		break;
++	case NITIO_G1_HW_Save_Reg:
++		stc_register = G_HW_Save_Register(1);
++		break;
++	case NITIO_G0_SW_Save_Reg:
++		stc_register = G_Save_Register(0);
++		break;
++	case NITIO_G1_SW_Save_Reg:
++		stc_register = G_Save_Register(1);
++		break;
++	case NITIO_G0_Mode_Reg:
++		stc_register = G_Mode_Register(0);
++		break;
++	case NITIO_G1_Mode_Reg:
++		stc_register = G_Mode_Register(1);
++		break;
++	case NITIO_G0_LoadA_Reg:
++		stc_register = G_Load_A_Register(0);
++		break;
++	case NITIO_G1_LoadA_Reg:
++		stc_register = G_Load_A_Register(1);
++		break;
++	case NITIO_G0_LoadB_Reg:
++		stc_register = G_Load_B_Register(0);
++		break;
++	case NITIO_G1_LoadB_Reg:
++		stc_register = G_Load_B_Register(1);
++		break;
++	case NITIO_G0_Input_Select_Reg:
++		stc_register = G_Input_Select_Register(0);
++		break;
++	case NITIO_G1_Input_Select_Reg:
++		stc_register = G_Input_Select_Register(1);
++		break;
++	case NITIO_G01_Status_Reg:
++		stc_register = G_Status_Register;
++		break;
++	case NITIO_G01_Joint_Reset_Reg:
++		stc_register = Joint_Reset_Register;
++		break;
++	case NITIO_G01_Joint_Status1_Reg:
++		stc_register = Joint_Status_1_Register;
++		break;
++	case NITIO_G01_Joint_Status2_Reg:
++		stc_register = Joint_Status_2_Register;
++		break;
++	case NITIO_G0_Interrupt_Acknowledge_Reg:
++		stc_register = Interrupt_A_Ack_Register;
++		break;
++	case NITIO_G1_Interrupt_Acknowledge_Reg:
++		stc_register = Interrupt_B_Ack_Register;
++		break;
++	case NITIO_G0_Status_Reg:
++		stc_register = AI_Status_1_Register;
++		break;
++	case NITIO_G1_Status_Reg:
++		stc_register = AO_Status_1_Register;
++		break;
++	case NITIO_G0_Interrupt_Enable_Reg:
++		stc_register = Interrupt_A_Enable_Register;
++		break;
++	case NITIO_G1_Interrupt_Enable_Reg:
++		stc_register = Interrupt_B_Enable_Register;
++		break;
++	default:
++		__a4l_err("%s: unhandled register 0x%x in switch.\n",
++			  __FUNCTION__, reg);
++		BUG();
++		return 0;
++		break;
++	}
++	return stc_register;
++}
++
++static void ni_gpct_write_register(struct ni_gpct *counter,
++				   unsigned int bits, enum ni_gpct_register reg)
++{
++	struct a4l_device *dev = counter->counter_dev->dev;
++	unsigned stc_register;
++	/* bits in the join reset register which are relevant to counters */
++	static const unsigned gpct_joint_reset_mask = G0_Reset | G1_Reset;
++	static const unsigned gpct_interrupt_a_enable_mask =
++		G0_Gate_Interrupt_Enable | G0_TC_Interrupt_Enable;
++	static const unsigned gpct_interrupt_b_enable_mask =
++		G1_Gate_Interrupt_Enable | G1_TC_Interrupt_Enable;
++
++	switch (reg) {
++		/* m-series-only registers */
++	case NITIO_G0_Counting_Mode_Reg:
++		ni_writew(bits, M_Offset_G0_Counting_Mode);
++		break;
++	case NITIO_G1_Counting_Mode_Reg:
++		ni_writew(bits, M_Offset_G1_Counting_Mode);
++		break;
++	case NITIO_G0_Second_Gate_Reg:
++		ni_writew(bits, M_Offset_G0_Second_Gate);
++		break;
++	case NITIO_G1_Second_Gate_Reg:
++		ni_writew(bits, M_Offset_G1_Second_Gate);
++		break;
++	case NITIO_G0_DMA_Config_Reg:
++		ni_writew(bits, M_Offset_G0_DMA_Config);
++		break;
++	case NITIO_G1_DMA_Config_Reg:
++		ni_writew(bits, M_Offset_G1_DMA_Config);
++		break;
++	case NITIO_G0_ABZ_Reg:
++		ni_writew(bits, M_Offset_G0_MSeries_ABZ);
++		break;
++	case NITIO_G1_ABZ_Reg:
++		ni_writew(bits, M_Offset_G1_MSeries_ABZ);
++		break;
++
++		/* 32 bit registers */
++	case NITIO_G0_LoadA_Reg:
++	case NITIO_G1_LoadA_Reg:
++	case NITIO_G0_LoadB_Reg:
++	case NITIO_G1_LoadB_Reg:
++		stc_register = ni_gpct_to_stc_register(reg);
++		devpriv->stc_writel(dev, bits, stc_register);
++		break;
++
++		/* 16 bit registers */
++	case NITIO_G0_Interrupt_Enable_Reg:
++		BUG_ON(bits & ~gpct_interrupt_a_enable_mask);
++		ni_set_bitfield(dev, Interrupt_A_Enable_Register,
++				gpct_interrupt_a_enable_mask, bits);
++		break;
++	case NITIO_G1_Interrupt_Enable_Reg:
++		BUG_ON(bits & ~gpct_interrupt_b_enable_mask);
++		ni_set_bitfield(dev, Interrupt_B_Enable_Register,
++				gpct_interrupt_b_enable_mask, bits);
++		break;
++	case NITIO_G01_Joint_Reset_Reg:
++		BUG_ON(bits & ~gpct_joint_reset_mask);
++		/* fall-through */
++	default:
++		stc_register = ni_gpct_to_stc_register(reg);
++		devpriv->stc_writew(dev, bits, stc_register);
++	}
++}
++
++static unsigned int ni_gpct_read_register(struct ni_gpct *counter,
++					  enum ni_gpct_register reg)
++{
++	struct a4l_device *dev = counter->counter_dev->dev;
++	unsigned int stc_register;
++	switch (reg) {
++		/* m-series only registers */
++	case NITIO_G0_DMA_Status_Reg:
++		return ni_readw(M_Offset_G0_DMA_Status);
++		break;
++	case NITIO_G1_DMA_Status_Reg:
++		return ni_readw(M_Offset_G1_DMA_Status);
++		break;
++
++		/* 32 bit registers */
++	case NITIO_G0_HW_Save_Reg:
++	case NITIO_G1_HW_Save_Reg:
++	case NITIO_G0_SW_Save_Reg:
++	case NITIO_G1_SW_Save_Reg:
++		stc_register = ni_gpct_to_stc_register(reg);
++		return devpriv->stc_readl(dev, stc_register);
++		break;
++
++		/* 16 bit registers */
++	default:
++		stc_register = ni_gpct_to_stc_register(reg);
++		return devpriv->stc_readw(dev, stc_register);
++		break;
++	}
++	return 0;
++}
++
++int ni_freq_out_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	data[0] = FOUT_Divider(devpriv->clock_and_fout);
++
++	return 0;
++}
++
++int ni_freq_out_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	devpriv->clock_and_fout &= ~FOUT_Enable;
++	devpriv->stc_writew(dev, devpriv->clock_and_fout,
++			    Clock_and_FOUT_Register);
++	devpriv->clock_and_fout &= ~FOUT_Divider_mask;
++	devpriv->clock_and_fout |= FOUT_Divider(data[0]);
++	devpriv->clock_and_fout |= FOUT_Enable;
++	devpriv->stc_writew(dev, devpriv->clock_and_fout,
++			    Clock_and_FOUT_Register);
++
++	return 0;
++}
++
++static int ni_set_freq_out_clock(struct a4l_device * dev, lsampl_t clock_source)
++{
++	switch (clock_source) {
++	case NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC:
++		devpriv->clock_and_fout &= ~FOUT_Timebase_Select;
++		break;
++	case NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC:
++		devpriv->clock_and_fout |= FOUT_Timebase_Select;
++		break;
++	default:
++		return -EINVAL;
++	}
++	devpriv->stc_writew(dev, devpriv->clock_and_fout,
++			    Clock_and_FOUT_Register);
++
++	return 0;
++}
++
++static void ni_get_freq_out_clock(struct a4l_device * dev,
++				  unsigned int * clock_source,
++				  unsigned int * clock_period_ns)
++{
++	if (devpriv->clock_and_fout & FOUT_Timebase_Select) {
++		*clock_source = NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC;
++		*clock_period_ns = TIMEBASE_2_NS;
++	} else {
++		*clock_source = NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC;
++		*clock_period_ns = TIMEBASE_1_NS * 2;
++	}
++}
++
++int ni_freq_out_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int *)insn->data;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_SET_CLOCK_SRC:
++		return ni_set_freq_out_clock(dev, data[1]);
++		break;
++	case A4L_INSN_CONFIG_GET_CLOCK_SRC:
++		ni_get_freq_out_clock(dev, &data[1], &data[2]);
++		return 0;
++	default:
++		break;
++	}
++
++	return -EINVAL;
++}
++
++static int ni_8255_callback(int dir, int port, int data, unsigned long arg)
++{
++	struct a4l_device *dev = (struct a4l_device *) arg;
++
++	if (dir) {
++		ni_writeb(data, Port_A + 2 * port);
++		return 0;
++	} else {
++		return ni_readb(Port_A + 2 * port);
++	}
++}
++
++/*
++  reads bytes out of eeprom
++*/
++
++static int ni_read_eeprom(struct a4l_device *dev, int addr)
++{
++	int bit;
++	int bitstring;
++
++	bitstring = 0x0300 | ((addr & 0x100) << 3) | (addr & 0xff);
++	ni_writeb(0x04, Serial_Command);
++	for (bit = 0x8000; bit; bit >>= 1) {
++		ni_writeb(0x04 | ((bit & bitstring) ? 0x02 : 0),
++			  Serial_Command);
++		ni_writeb(0x05 | ((bit & bitstring) ? 0x02 : 0),
++			  Serial_Command);
++	}
++	bitstring = 0;
++	for (bit = 0x80; bit; bit >>= 1) {
++		ni_writeb(0x04, Serial_Command);
++		ni_writeb(0x05, Serial_Command);
++		bitstring |= ((ni_readb(XXX_Status) & PROMOUT) ? bit : 0);
++	}
++	ni_writeb(0x00, Serial_Command);
++
++	return bitstring;
++}
++
++/*
++  presents the EEPROM as a subdevice
++*/
++
++static int ni_eeprom_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	data[0] = ni_read_eeprom(dev, CR_CHAN(insn->chan_desc));
++
++	return 0;
++}
++
++
++static int ni_m_series_eeprom_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint8_t *data = (uint8_t *)insn->data;
++
++	data[0] = devpriv->eeprom_buffer[CR_CHAN(insn->chan_desc)];
++
++	return 0;
++}
++
++static int ni_get_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int*)insn->data;
++
++	data[1] = devpriv->pwm_up_count * devpriv->clock_ns;
++	data[2] = devpriv->pwm_down_count * devpriv->clock_ns;
++
++	return 0;
++}
++
++static int ni_m_series_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int up_count, down_count;
++	unsigned int *data = (unsigned int*)insn->data;
++
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_PWM_OUTPUT:
++		switch (data[1]) {
++		case TRIG_ROUND_NEAREST:
++			up_count =
++				(data[2] +
++				 devpriv->clock_ns / 2) / devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_DOWN:
++			up_count = data[2] / devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_UP:
++			up_count =(data[2] + devpriv->clock_ns - 1) /
++				devpriv->clock_ns;
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		switch (data[3]) {
++		case TRIG_ROUND_NEAREST:
++			down_count = (data[4] + devpriv->clock_ns / 2) /
++				devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_DOWN:
++			down_count = data[4] / devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_UP:
++			down_count =
++				(data[4] + devpriv->clock_ns - 1) /
++				devpriv->clock_ns;
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		if (up_count * devpriv->clock_ns != data[2] ||
++		    down_count * devpriv->clock_ns != data[4]) {
++			data[2] = up_count * devpriv->clock_ns;
++			data[4] = down_count * devpriv->clock_ns;
++			return -EAGAIN;
++		}
++		ni_writel(MSeries_Cal_PWM_High_Time_Bits(up_count) |
++			  MSeries_Cal_PWM_Low_Time_Bits(down_count),
++			  M_Offset_Cal_PWM);
++		devpriv->pwm_up_count = up_count;
++		devpriv->pwm_down_count = down_count;
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_GET_PWM_OUTPUT:
++		return ni_get_pwm_config(subd, insn);
++		break;
++	default:
++		return -EINVAL;
++		break;
++	}
++	return 0;
++}
++
++static int ni_6143_pwm_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int *data = (unsigned int*)insn->data;
++
++	unsigned up_count, down_count;
++	switch (data[0]) {
++	case A4L_INSN_CONFIG_PWM_OUTPUT:
++		switch (data[1]) {
++		case TRIG_ROUND_NEAREST:
++			up_count =
++				(data[2] + devpriv->clock_ns / 2) /
++				devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_DOWN:
++			up_count = data[2] / devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_UP:
++			up_count = (data[2] + devpriv->clock_ns - 1) /
++				devpriv->clock_ns;
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		switch (data[3]) {
++		case TRIG_ROUND_NEAREST:
++			down_count = (data[4] + devpriv->clock_ns / 2) /
++				devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_DOWN:
++			down_count = data[4] / devpriv->clock_ns;
++			break;
++		case TRIG_ROUND_UP:
++			down_count = (data[4] + devpriv->clock_ns - 1) /
++				devpriv->clock_ns;
++			break;
++		default:
++			return -EINVAL;
++			break;
++		}
++		if (up_count * devpriv->clock_ns != data[2] ||
++		    down_count * devpriv->clock_ns != data[4]) {
++			data[2] = up_count * devpriv->clock_ns;
++			data[4] = down_count * devpriv->clock_ns;
++			return -EAGAIN;
++		}
++		ni_writel(up_count, Calibration_HighTime_6143);
++		devpriv->pwm_up_count = up_count;
++		ni_writel(down_count, Calibration_LowTime_6143);
++		devpriv->pwm_down_count = down_count;
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_GET_PWM_OUTPUT:
++		return ni_get_pwm_config(subd, insn);
++	default:
++		return -EINVAL;
++		break;
++	}
++	return 0;
++}
++
++static int pack_mb88341(int addr, int val, int *bitstring)
++{
++	/*
++	  Fujitsu MB 88341
++	  Note that address bits are reversed.  Thanks to
++	  Ingo Keen for noticing this.
++
++	  Note also that the 88341 expects address values from
++	  1-12, whereas we use channel numbers 0-11.  The NI
++	  docs use 1-12, also, so be careful here.
++	*/
++	addr++;
++	*bitstring = ((addr & 0x1) << 11) |
++		((addr & 0x2) << 9) |
++		((addr & 0x4) << 7) | ((addr & 0x8) << 5) | (val & 0xff);
++	return 12;
++}
++
++static int pack_dac8800(int addr, int val, int *bitstring)
++{
++	*bitstring = ((addr & 0x7) << 8) | (val & 0xff);
++	return 11;
++}
++
++static int pack_dac8043(int addr, int val, int *bitstring)
++{
++	*bitstring = val & 0xfff;
++	return 12;
++}
++
++static int pack_ad8522(int addr, int val, int *bitstring)
++{
++	*bitstring = (val & 0xfff) | (addr ? 0xc000 : 0xa000);
++	return 16;
++}
++
++static int pack_ad8804(int addr, int val, int *bitstring)
++{
++	*bitstring = ((addr & 0xf) << 8) | (val & 0xff);
++	return 12;
++}
++
++static int pack_ad8842(int addr, int val, int *bitstring)
++{
++	*bitstring = ((addr + 1) << 8) | (val & 0xff);
++	return 12;
++}
++
++struct caldac_struct {
++	int n_chans;
++	int n_bits;
++	int (*packbits) (int, int, int *);
++};
++
++static struct caldac_struct caldacs[] = {
++	[mb88341] = {12, 8, pack_mb88341},
++	[dac8800] = {8, 8, pack_dac8800},
++	[dac8043] = {1, 12, pack_dac8043},
++	[ad8522] = {2, 12, pack_ad8522},
++	[ad8804] = {12, 8, pack_ad8804},
++	[ad8842] = {8, 8, pack_ad8842},
++	[ad8804_debug] = {16, 8, pack_ad8804},
++};
++
++static void ni_write_caldac(struct a4l_device * dev, int addr, int val)
++{
++	unsigned int loadbit = 0, bits = 0, bit, bitstring = 0;
++	int i;
++	int type;
++
++	if (devpriv->caldacs[addr] == val)
++		return;
++	devpriv->caldacs[addr] = val;
++
++	for (i = 0; i < 3; i++) {
++		type = boardtype.caldac[i];
++		if (type == caldac_none)
++			break;
++		if (addr < caldacs[type].n_chans) {
++			bits = caldacs[type].packbits(addr, val, &bitstring);
++			loadbit = SerDacLd(i);
++			break;
++		}
++		addr -= caldacs[type].n_chans;
++	}
++
++	for (bit = 1 << (bits - 1); bit; bit >>= 1) {
++		ni_writeb(((bit & bitstring) ? 0x02 : 0), Serial_Command);
++		a4l_udelay(1);
++		ni_writeb(1 | ((bit & bitstring) ? 0x02 : 0), Serial_Command);
++		a4l_udelay(1);
++	}
++	ni_writeb(loadbit, Serial_Command);
++	a4l_udelay(1);
++	ni_writeb(0, Serial_Command);
++}
++
++static void caldac_setup(struct a4l_device *dev, struct a4l_subdevice *subd)
++{
++	int i, j;
++	int n_dacs;
++	int n_chans = 0;
++	int n_bits;
++	int diffbits = 0;
++	int type;
++	int chan;
++
++	type = boardtype.caldac[0];
++	if (type == caldac_none)
++		return;
++	n_bits = caldacs[type].n_bits;
++	for (i = 0; i < 3; i++) {
++		type = boardtype.caldac[i];
++		if (type == caldac_none)
++			break;
++		if (caldacs[type].n_bits != n_bits)
++			diffbits = 1;
++		n_chans += caldacs[type].n_chans;
++	}
++	n_dacs = i;
++
++	if (diffbits) {
++
++		if (n_chans > MAX_N_CALDACS) {
++			a4l_err(dev, "BUG! MAX_N_CALDACS too small\n");
++		}
++
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  n_chans * sizeof(struct a4l_channel), GFP_KERNEL);
++
++		memset(subd->chan_desc,
++		       0,
++		       sizeof(struct a4l_channels_desc) + n_chans * sizeof(struct a4l_channel));
++
++		subd->chan_desc->length = n_chans;
++		subd->chan_desc->mode = A4L_CHAN_PERCHAN_CHANDESC;
++
++		chan = 0;
++		for (i = 0; i < n_dacs; i++) {
++			type = boardtype.caldac[i];
++			for (j = 0; j < caldacs[type].n_chans; j++) {
++
++				subd->chan_desc->chans[chan].nb_bits =
++					caldacs[type].n_bits;
++
++				chan++;
++			}
++		}
++
++		for (chan = 0; chan < n_chans; chan++) {
++			unsigned long tmp =
++				(1 << subd->chan_desc->chans[chan].nb_bits) / 2;
++			ni_write_caldac(dev, chan, tmp);
++		}
++	} else {
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  sizeof(struct a4l_channel), GFP_KERNEL);
++
++		memset(subd->chan_desc,
++		       0, sizeof(struct a4l_channels_desc) + sizeof(struct a4l_channel));
++
++		subd->chan_desc->length = n_chans;
++		subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++
++		type = boardtype.caldac[0];
++
++		subd->chan_desc->chans[0].nb_bits = caldacs[type].n_bits;
++
++		for (chan = 0; chan < n_chans; chan++)
++			ni_write_caldac(dev,
++					chan,
++					(1 << subd->chan_desc->chans[0].nb_bits) / 2);
++	}
++}
++
++static int ni_calib_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	ni_write_caldac(dev, CR_CHAN(insn->chan_desc), data[0]);
++	return 0;
++}
++
++static int ni_calib_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	data[0] = devpriv->caldacs[CR_CHAN(insn->chan_desc)];
++
++	return 0;
++}
++
++static int ni_gpct_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++	return a4l_ni_tio_insn_config(counter, insn);
++}
++
++static int ni_gpct_insn_read(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++	return a4l_ni_tio_rinsn(counter, insn);
++}
++
++static int ni_gpct_insn_write(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++	return a4l_ni_tio_winsn(counter, insn);
++}
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++static int ni_gpct_cmd(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	int retval;
++	struct a4l_device *dev = subd->dev;
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++	struct mite_dma_descriptor_ring *ring;
++
++	retval = ni_request_gpct_mite_channel(dev,
++					      counter->counter_index,
++					      A4L_INPUT);
++	if (retval) {
++		a4l_err(dev,
++			"ni_gpct_cmd: "
++			"no dma channel available for use by counter\n");
++		return retval;
++	}
++
++	ring = devpriv->gpct_mite_ring[counter->counter_index];
++	retval = a4l_mite_buf_change(ring, subd);
++	if (retval) {
++		a4l_err(dev,
++			"ni_gpct_cmd: "
++			"dma ring configuration failed\n");
++		return retval;
++
++	}
++
++	a4l_ni_tio_acknowledge_and_confirm(counter, NULL, NULL, NULL, NULL);
++	ni_e_series_enable_second_irq(dev, counter->counter_index, 1);
++	retval = a4l_ni_tio_cmd(counter, cmd);
++
++	return retval;
++}
++
++static int ni_gpct_cmdtest(struct a4l_subdevice *subd, struct a4l_cmd_desc *cmd)
++{
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++	return a4l_ni_tio_cmdtest(counter, cmd);
++}
++
++static void ni_gpct_cancel(struct a4l_subdevice *subd)
++{
++	struct a4l_device *dev = subd->dev;
++	struct ni_gpct *counter = (struct ni_gpct *)subd->priv;
++
++	a4l_ni_tio_cancel(counter);
++	ni_e_series_enable_second_irq(dev, counter->counter_index, 0);
++	ni_release_gpct_mite_channel(dev, counter->counter_index);
++}
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++
++/*
++ *
++ *  Programmable Function Inputs
++ *
++ */
++
++static int ni_m_series_set_pfi_routing(struct a4l_device *dev,
++				       unsigned int chan, unsigned int source)
++{
++	unsigned int pfi_reg_index;
++	unsigned int array_offset;
++
++	if ((source & 0x1f) != source)
++		return -EINVAL;
++	pfi_reg_index = 1 + chan / 3;
++	array_offset = pfi_reg_index - 1;
++	devpriv->pfi_output_select_reg[array_offset] &=
++		~MSeries_PFI_Output_Select_Mask(chan);
++	devpriv->pfi_output_select_reg[array_offset] |=
++		MSeries_PFI_Output_Select_Bits(chan, source);
++	ni_writew(devpriv->pfi_output_select_reg[array_offset],
++		  M_Offset_PFI_Output_Select(pfi_reg_index));
++	return 2;
++}
++
++static unsigned int ni_old_get_pfi_routing(struct a4l_device *dev,
++					   unsigned int chan)
++{
++	/* pre-m-series boards have fixed signals on pfi pins */
++
++	switch (chan) {
++	case 0:
++		return NI_PFI_OUTPUT_AI_START1;
++		break;
++	case 1:
++		return NI_PFI_OUTPUT_AI_START2;
++		break;
++	case 2:
++		return NI_PFI_OUTPUT_AI_CONVERT;
++		break;
++	case 3:
++		return NI_PFI_OUTPUT_G_SRC1;
++		break;
++	case 4:
++		return NI_PFI_OUTPUT_G_GATE1;
++		break;
++	case 5:
++		return NI_PFI_OUTPUT_AO_UPDATE_N;
++		break;
++	case 6:
++		return NI_PFI_OUTPUT_AO_START1;
++		break;
++	case 7:
++		return NI_PFI_OUTPUT_AI_START_PULSE;
++		break;
++	case 8:
++		return NI_PFI_OUTPUT_G_SRC0;
++		break;
++	case 9:
++		return NI_PFI_OUTPUT_G_GATE0;
++		break;
++	default:
++		__a4l_err("%s: bug, unhandled case in switch.\n",
++			  __FUNCTION__);
++		break;
++	}
++	return 0;
++}
++
++static int ni_old_set_pfi_routing(struct a4l_device *dev,
++				  unsigned int chan, unsigned int source)
++{
++	/* pre-m-series boards have fixed signals on pfi pins */
++	if (source != ni_old_get_pfi_routing(dev, chan))
++		return -EINVAL;
++
++	return 2;
++}
++
++static int ni_set_pfi_routing(struct a4l_device *dev,
++			      unsigned int chan, unsigned int source)
++{
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		return ni_m_series_set_pfi_routing(dev, chan, source);
++	else
++		return ni_old_set_pfi_routing(dev, chan, source);
++}
++
++static unsigned int ni_m_series_get_pfi_routing(struct a4l_device *dev,
++						unsigned int chan)
++{
++	const unsigned int array_offset = chan / 3;
++	return MSeries_PFI_Output_Select_Source(chan,
++						devpriv->pfi_output_select_reg[array_offset]);
++}
++
++static unsigned int ni_get_pfi_routing(struct a4l_device *dev, unsigned int chan)
++{
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		return ni_m_series_get_pfi_routing(dev, chan);
++	else
++		return ni_old_get_pfi_routing(dev, chan);
++}
++
++static int ni_config_filter(struct a4l_device *dev,
++			    unsigned int pfi_channel, int filter)
++{
++	unsigned int bits;
++	if ((boardtype.reg_type & ni_reg_m_series_mask) == 0) {
++		return -ENOTSUPP;
++	}
++	bits = ni_readl(M_Offset_PFI_Filter);
++	bits &= ~MSeries_PFI_Filter_Select_Mask(pfi_channel);
++	bits |= MSeries_PFI_Filter_Select_Bits(pfi_channel, filter);
++	ni_writel(bits, M_Offset_PFI_Filter);
++	return 0;
++}
++
++static int ni_pfi_insn_bits(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	uint16_t *data = (uint16_t *)insn->data;
++
++	if (data[0]) {
++		devpriv->pfi_state &= ~data[0];
++		devpriv->pfi_state |= (data[0] & data[1]);
++		ni_writew(devpriv->pfi_state, M_Offset_PFI_DO);
++	}
++
++	data[1] = ni_readw(M_Offset_PFI_DI);
++
++	return 0;
++}
++
++static int ni_pfi_insn_config(struct a4l_subdevice *subd, struct a4l_kernel_instruction *insn)
++{
++	struct a4l_device *dev = subd->dev;
++	unsigned int chan, *data = (unsigned int *)insn->data;
++
++	if (insn->data_size < sizeof(unsigned int))
++		return -EINVAL;
++
++	chan = CR_CHAN(insn->chan_desc);
++
++	switch (data[0]) {
++	case A4L_OUTPUT:
++		ni_set_bits(dev, IO_Bidirection_Pin_Register, 1 << chan, 1);
++		break;
++	case A4L_INPUT:
++		ni_set_bits(dev, IO_Bidirection_Pin_Register, 1 << chan, 0);
++		break;
++	case A4L_INSN_CONFIG_DIO_QUERY:
++		data[1] = (devpriv->io_bidirection_pin_reg & (1 << chan)) ?
++			A4L_OUTPUT :	A4L_INPUT;
++		return 0;
++		break;
++	case A4L_INSN_CONFIG_SET_ROUTING:
++		return ni_set_pfi_routing(dev, chan, data[1]);
++		break;
++	case A4L_INSN_CONFIG_GET_ROUTING:
++		data[1] = ni_get_pfi_routing(dev, chan);
++		break;
++	case A4L_INSN_CONFIG_FILTER:
++		return ni_config_filter(dev, chan, data[1]);
++		break;
++	default:
++		return -EINVAL;
++	}
++	return 0;
++}
++
++/*
++ *
++ *  RTSI Bus Functions
++ *
++ */
++
++/* Find best multiplier/divider to try and get the PLL running at 80 MHz
++ * given an arbitrary frequency input clock */
++static int ni_mseries_get_pll_parameters(unsigned int reference_period_ns,
++					 unsigned int *freq_divider,
++					 unsigned int *freq_multiplier,
++					 unsigned int *actual_period_ns)
++{
++	unsigned div;
++	unsigned best_div = 1;
++	static const unsigned max_div = 0x10;
++	unsigned mult;
++	unsigned best_mult = 1;
++	static const unsigned max_mult = 0x100;
++	static const unsigned pico_per_nano = 1000;
++
++	const unsigned reference_picosec = reference_period_ns * pico_per_nano;
++	/* m-series wants the phased-locked loop to output 80MHz, which is divided by 4 to
++	 * 20 MHz for most timing clocks */
++	static const unsigned target_picosec = 12500;
++	static const unsigned fudge_factor_80_to_20Mhz = 4;
++	int best_period_picosec = 0;
++	for (div = 1; div <= max_div; ++div) {
++		for (mult = 1; mult <= max_mult; ++mult) {
++			unsigned new_period_ps =
++				(reference_picosec * div) / mult;
++			if (abs(new_period_ps - target_picosec) <
++			    abs(best_period_picosec - target_picosec)) {
++				best_period_picosec = new_period_ps;
++				best_div = div;
++				best_mult = mult;
++			}
++		}
++	}
++	if (best_period_picosec == 0) {
++		__a4l_err("%s: bug, failed to find pll parameters\n",
++			  __FUNCTION__);
++		return -EIO;
++	}
++	*freq_divider = best_div;
++	*freq_multiplier = best_mult;
++	*actual_period_ns =
++		(best_period_picosec * fudge_factor_80_to_20Mhz +
++		 (pico_per_nano / 2)) / pico_per_nano;
++	return 0;
++}
++
++static int ni_mseries_set_pll_master_clock(struct a4l_device * dev,
++					   unsigned int source,
++					   unsigned int period_ns)
++{
++	static const unsigned min_period_ns = 50;
++	static const unsigned max_period_ns = 1000;
++	static const unsigned timeout = 1000;
++	unsigned pll_control_bits;
++	unsigned freq_divider;
++	unsigned freq_multiplier;
++	unsigned i;
++	int retval;
++	if (source == NI_MIO_PLL_PXI10_CLOCK)
++		period_ns = 100;
++	/* These limits are somewhat arbitrary, but NI advertises 1 to
++	   20MHz range so we'll use that */
++	if (period_ns < min_period_ns || period_ns > max_period_ns) {
++		a4l_err(dev,
++			"%s: you must specify an input clock frequency "
++			"between %i and %i nanosec "
++			"for the phased-lock loop.\n",
++			__FUNCTION__, min_period_ns, max_period_ns);
++		return -EINVAL;
++	}
++	devpriv->rtsi_trig_direction_reg &= ~Use_RTSI_Clock_Bit;
++	devpriv->stc_writew(dev, devpriv->rtsi_trig_direction_reg,
++			    RTSI_Trig_Direction_Register);
++	pll_control_bits =
++		MSeries_PLL_Enable_Bit | MSeries_PLL_VCO_Mode_75_150MHz_Bits;
++	devpriv->clock_and_fout2 |=
++		MSeries_Timebase1_Select_Bit | MSeries_Timebase3_Select_Bit;
++	devpriv->clock_and_fout2 &= ~MSeries_PLL_In_Source_Select_Mask;
++	switch (source) {
++	case NI_MIO_PLL_PXI_STAR_TRIGGER_CLOCK:
++		devpriv->clock_and_fout2 |=
++			MSeries_PLL_In_Source_Select_Star_Trigger_Bits;
++		retval = ni_mseries_get_pll_parameters(period_ns, &freq_divider,
++						       &freq_multiplier, &devpriv->clock_ns);
++		if (retval < 0)
++			return retval;
++		break;
++	case NI_MIO_PLL_PXI10_CLOCK:
++		/* pxi clock is 10MHz */
++		devpriv->clock_and_fout2 |=
++			MSeries_PLL_In_Source_Select_PXI_Clock10;
++		retval = ni_mseries_get_pll_parameters(period_ns, &freq_divider,
++						       &freq_multiplier, &devpriv->clock_ns);
++		if (retval < 0)
++			return retval;
++		break;
++	default:
++	{
++		unsigned rtsi_channel;
++		static const unsigned max_rtsi_channel = 7;
++		for (rtsi_channel = 0; rtsi_channel <= max_rtsi_channel;
++		     ++rtsi_channel) {
++			if (source ==
++			    NI_MIO_PLL_RTSI_CLOCK(rtsi_channel)) {
++				devpriv->clock_and_fout2 |=
++					MSeries_PLL_In_Source_Select_RTSI_Bits
++					(rtsi_channel);
++				break;
++			}
++		}
++		if (rtsi_channel > max_rtsi_channel)
++			return -EINVAL;
++		retval = ni_mseries_get_pll_parameters(period_ns,
++						       &freq_divider, &freq_multiplier,
++						       &devpriv->clock_ns);
++		if (retval < 0)
++			return retval;
++	}
++	break;
++	}
++	ni_writew(devpriv->clock_and_fout2, M_Offset_Clock_and_Fout2);
++	pll_control_bits |=
++		MSeries_PLL_Divisor_Bits(freq_divider) |
++		MSeries_PLL_Multiplier_Bits(freq_multiplier);
++	ni_writew(pll_control_bits, M_Offset_PLL_Control);
++	devpriv->clock_source = source;
++	/* It seems to typically take a few hundred microseconds for PLL to lock */
++	for (i = 0; i < timeout; ++i) {
++		if (ni_readw(M_Offset_PLL_Status) & MSeries_PLL_Locked_Bit) {
++			break;
++		}
++		udelay(1);
++	}
++	if (i == timeout) {
++		a4l_err(dev,
++			"%s: timed out waiting for PLL to lock "
++			"to reference clock source %i with period %i ns.\n",
++			__FUNCTION__, source, period_ns);
++		return -ETIMEDOUT;
++	}
++	return 3;
++}
++
++static int ni_set_master_clock(struct a4l_device *dev,
++			       unsigned int source, unsigned int period_ns)
++{
++	if (source == NI_MIO_INTERNAL_CLOCK) {
++		devpriv->rtsi_trig_direction_reg &= ~Use_RTSI_Clock_Bit;
++		devpriv->stc_writew(dev, devpriv->rtsi_trig_direction_reg,
++				    RTSI_Trig_Direction_Register);
++		devpriv->clock_ns = TIMEBASE_1_NS;
++		if (boardtype.reg_type & ni_reg_m_series_mask) {
++			devpriv->clock_and_fout2 &=
++				~(MSeries_Timebase1_Select_Bit |
++				  MSeries_Timebase3_Select_Bit);
++			ni_writew(devpriv->clock_and_fout2,
++				  M_Offset_Clock_and_Fout2);
++			ni_writew(0, M_Offset_PLL_Control);
++		}
++		devpriv->clock_source = source;
++	} else {
++		if (boardtype.reg_type & ni_reg_m_series_mask) {
++			return ni_mseries_set_pll_master_clock(dev, source,
++							       period_ns);
++		} else {
++			if (source == NI_MIO_RTSI_CLOCK) {
++				devpriv->rtsi_trig_direction_reg |=
++					Use_RTSI_Clock_Bit;
++				devpriv->stc_writew(dev,
++						    devpriv->rtsi_trig_direction_reg,
++						    RTSI_Trig_Direction_Register);
++				if (devpriv->clock_ns == 0) {
++					a4l_err(dev,
++						"%s: we don't handle an "
++						"unspecified clock period "
++						"correctly yet, returning error.\n",
++						__FUNCTION__);
++					return -EINVAL;
++				} else {
++					devpriv->clock_ns = period_ns;
++				}
++				devpriv->clock_source = source;
++			} else
++				return -EINVAL;
++		}
++	}
++	return 3;
++}
++
++static void ni_rtsi_init(struct a4l_device * dev)
++{
++	/* Initialise the RTSI bus signal switch to a default state */
++
++	/* Set clock mode to internal */
++	devpriv->clock_and_fout2 = MSeries_RTSI_10MHz_Bit;
++	if (ni_set_master_clock(dev, NI_MIO_INTERNAL_CLOCK, 0) < 0) {
++		a4l_err(dev, "ni_set_master_clock failed, bug?");
++	}
++
++	/* Default internal lines routing to RTSI bus lines */
++	devpriv->rtsi_trig_a_output_reg =
++		RTSI_Trig_Output_Bits(0, NI_RTSI_OUTPUT_ADR_START1) |
++		RTSI_Trig_Output_Bits(1, NI_RTSI_OUTPUT_ADR_START2) |
++		RTSI_Trig_Output_Bits(2, NI_RTSI_OUTPUT_SCLKG) |
++		RTSI_Trig_Output_Bits(3, NI_RTSI_OUTPUT_DACUPDN);
++	devpriv->stc_writew(dev, devpriv->rtsi_trig_a_output_reg,
++			    RTSI_Trig_A_Output_Register);
++	devpriv->rtsi_trig_b_output_reg =
++		RTSI_Trig_Output_Bits(4, NI_RTSI_OUTPUT_DA_START1) |
++		RTSI_Trig_Output_Bits(5, NI_RTSI_OUTPUT_G_SRC0) |
++		RTSI_Trig_Output_Bits(6, NI_RTSI_OUTPUT_G_GATE0);
++
++	if (boardtype.reg_type & ni_reg_m_series_mask)
++		devpriv->rtsi_trig_b_output_reg |=
++			RTSI_Trig_Output_Bits(7, NI_RTSI_OUTPUT_RTSI_OSC);
++	devpriv->stc_writew(dev, devpriv->rtsi_trig_b_output_reg,
++			    RTSI_Trig_B_Output_Register);
++}
++
++int a4l_ni_E_init(struct a4l_device *dev)
++{
++	int ret;
++	unsigned int j, counter_variant;
++	struct a4l_subdevice *subd;
++
++	if (boardtype.n_aochan > MAX_N_AO_CHAN) {
++		a4l_err(dev, "bug! boardtype.n_aochan > MAX_N_AO_CHAN\n");
++		return -EINVAL;
++	}
++
++	/* analog input subdevice */
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: starting attach procedure...\n");
++
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: registering AI subdevice...\n");
++
++	if (boardtype.n_adchan) {
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: AI: %d channels\n", boardtype.n_adchan);
++
++		subd->flags = A4L_SUBD_AI | A4L_SUBD_CMD | A4L_SUBD_MMAP;
++		subd->rng_desc = ni_range_lkup[boardtype.gainlkup];
++
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  sizeof(struct a4l_channel), GFP_KERNEL);
++
++		subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++		subd->chan_desc->length = boardtype.n_adchan;
++		subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_DIFF;
++		if (boardtype.reg_type != ni_reg_611x)
++			subd->chan_desc->chans[0].flags |= A4L_CHAN_AREF_GROUND |
++				A4L_CHAN_AREF_COMMON | A4L_CHAN_AREF_OTHER;
++		subd->chan_desc->chans[0].nb_bits = boardtype.adbits;
++
++		subd->insn_read = ni_ai_insn_read;
++		subd->insn_config = ni_ai_insn_config;
++		subd->do_cmdtest = ni_ai_cmdtest;
++		subd->do_cmd = ni_ai_cmd;
++		subd->cancel = ni_ai_reset;
++		subd->trigger = ni_ai_inttrig;
++
++		subd->munge = (boardtype.adbits > 16) ?
++			ni_ai_munge32 : ni_ai_munge16;
++
++		subd->cmd_mask = &mio_ai_cmd_mask;
++	} else {
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: AI subdevice not present\n");
++		subd->flags = A4L_SUBD_UNUSED;
++	}
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_AI_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: AI subdevice registered\n");
++
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: registering AO subdevice...\n");
++
++	/* analog output subdevice */
++	if (boardtype.n_aochan) {
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: AO: %d channels\n", boardtype.n_aochan);
++
++		subd->flags = A4L_SUBD_AO;
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  sizeof(struct a4l_channel), GFP_KERNEL);
++
++		subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++		subd->chan_desc->length = boardtype.n_aochan;
++		subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_GROUND;
++		subd->chan_desc->chans[0].nb_bits = boardtype.aobits;
++
++		subd->rng_desc = boardtype.ao_range_table;
++
++		subd->insn_read = ni_ao_insn_read;
++		if (boardtype.reg_type & ni_reg_6xxx_mask)
++			subd->insn_write = &ni_ao_insn_write_671x;
++		else
++			subd->insn_write = &ni_ao_insn_write;
++
++
++		if (boardtype.ao_fifo_depth) {
++			subd->flags |= A4L_SUBD_CMD | A4L_SUBD_MMAP;
++			subd->do_cmd = &ni_ao_cmd;
++			subd->cmd_mask = &mio_ao_cmd_mask;
++			subd->do_cmdtest = &ni_ao_cmdtest;
++			subd->trigger = ni_ao_inttrig;
++			if ((boardtype.reg_type & ni_reg_m_series_mask) == 0)
++				subd->munge = &ni_ao_munge;
++		}
++
++		subd->cancel = &ni_ao_reset;
++
++	} else {
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: AO subdevice not present\n");
++		subd->flags = A4L_SUBD_UNUSED;
++	}
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_AO_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: AO subdevice registered\n");
++
++	if ((boardtype.reg_type & ni_reg_67xx_mask))
++		init_ao_67xx(dev);
++
++	/* digital i/o subdevice */
++
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: registering DIO subdevice...\n");
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: DIO: %d channels\n",
++		boardtype.num_p0_dio_channels);
++
++	subd->flags = A4L_SUBD_DIO;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->length = boardtype.num_p0_dio_channels;
++	subd->chan_desc->chans[0].flags = A4L_CHAN_AREF_GROUND;
++	subd->chan_desc->chans[0].nb_bits = 1;
++	devpriv->io_bits = 0; /* all bits input */
++
++	subd->rng_desc = &range_digital;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++
++		if (subd->chan_desc->length == 8)
++			subd->insn_bits = ni_m_series_dio_insn_bits_8;
++		else
++			subd->insn_bits = ni_m_series_dio_insn_bits_32;
++
++		subd->insn_config = ni_m_series_dio_insn_config;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: DIO: command feature available\n");
++
++		subd->flags |= A4L_SUBD_CMD;
++		subd->do_cmd = ni_cdio_cmd;
++		subd->do_cmdtest = ni_cdio_cmdtest;
++		subd->cmd_mask = &mio_dio_cmd_mask;
++		subd->cancel = ni_cdio_cancel;
++		subd->trigger = ni_cdo_inttrig;
++
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++		ni_writel(CDO_Reset_Bit | CDI_Reset_Bit, M_Offset_CDIO_Command);
++		ni_writel(devpriv->io_bits, M_Offset_DIO_Direction);
++	} else {
++
++		subd->insn_bits = ni_dio_insn_bits;
++		subd->insn_config = ni_dio_insn_config;
++		devpriv->dio_control = DIO_Pins_Dir(devpriv->io_bits);
++		ni_writew(devpriv->dio_control, DIO_Control_Register);
++	}
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_DIO_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: DIO subdevice registered\n");
++
++	/* 8255 device */
++	subd = a4l_alloc_subd(sizeof(subd_8255_t), NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: registering 8255 subdevice...\n");
++
++	if (boardtype.has_8255) {
++		devpriv->subd_8255.cb_arg = (unsigned long)dev;
++		devpriv->subd_8255.cb_func = ni_8255_callback;
++		a4l_subdev_8255_init(subd);
++	} else {
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: 8255 subdevice not present\n");
++		subd->flags = A4L_SUBD_UNUSED;
++	}
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_8255_DIO_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: 8255 subdevice registered\n");
++
++	/* formerly general purpose counter/timer device, but no longer used */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	subd->flags = A4L_SUBD_UNUSED;
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_UNUSED_SUBDEV)
++		return ret;
++
++	/* calibration subdevice -- ai and ao */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: registering calib subdevice...\n");
++
++	subd->flags = A4L_SUBD_CALIB;
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		/* internal PWM analog output
++		   used for AI nonlinearity calibration */
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: calib: M series calibration");
++		subd->insn_config = ni_m_series_pwm_config;
++		ni_writel(0x0, M_Offset_Cal_PWM);
++	} else if (boardtype.reg_type == ni_reg_6143) {
++		/* internal PWM analog output
++		   used for AI nonlinearity calibration */
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: calib: 6143 calibration");
++		subd->insn_config = ni_6143_pwm_config;
++	} else {
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: calib: common calibration");
++		subd->insn_read = ni_calib_insn_read;
++		subd->insn_write = ni_calib_insn_write;
++		caldac_setup(dev, subd);
++	}
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_CALIBRATION_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: calib subdevice registered\n");
++
++	/* EEPROM */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: registering EEPROM subdevice...\n");
++
++	subd->flags = A4L_SUBD_MEMORY;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->chans[0].flags = 0;
++	subd->chan_desc->chans[0].nb_bits = 8;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		subd->chan_desc->length = M_SERIES_EEPROM_SIZE;
++		subd->insn_read = ni_m_series_eeprom_insn_read;
++	} else {
++		subd->chan_desc->length = 512;
++		subd->insn_read = ni_eeprom_insn_read;
++	}
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: EEPROM: size = %lu\n", subd->chan_desc->length);
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_EEPROM_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: EEPROM subdevice registered\n");
++
++	/* PFI */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: registering PFI(DIO) subdevice...\n");
++
++	subd->flags = A4L_SUBD_DIO;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->chans[0].flags = 0;
++	subd->chan_desc->chans[0].nb_bits = 1;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		unsigned int i;
++		subd->chan_desc->length = 16;
++		ni_writew(devpriv->dio_state, M_Offset_PFI_DO);
++		for (i = 0; i < NUM_PFI_OUTPUT_SELECT_REGS; ++i) {
++			ni_writew(devpriv->pfi_output_select_reg[i],
++				  M_Offset_PFI_Output_Select(i + 1));
++		}
++	} else
++		subd->chan_desc->length = 10;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: PFI: %lu bits...\n", subd->chan_desc->length);
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		subd->insn_bits = ni_pfi_insn_bits;
++	}
++
++	subd->insn_config = ni_pfi_insn_config;
++	ni_set_bits(dev, IO_Bidirection_Pin_Register, ~0, 0);
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_PFI_DIO_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: PFI subdevice registered\n");
++
++	/* cs5529 calibration adc */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++#if 0 /* TODO: add subdevices callbacks */
++	subd->flags = A4L_SUBD_AI;
++
++	if (boardtype.reg_type & ni_reg_67xx_mask) {
++
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  sizeof(struct a4l_channel), GFP_KERNEL);
++		subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++		subd->chan_desc->length = boardtype.n_aochan;
++		subd->chan_desc->chans[0].flags = 0;
++		subd->chan_desc->chans[0].nb_bits = 16;
++
++		/* one channel for each analog output channel */
++		subd->rng_desc = &a4l_range_unknown;	/* XXX */
++		s->insn_read = cs5529_ai_insn_read;
++		init_cs5529(dev);
++	} else
++#endif /* TODO: add subdevices callbacks */
++		subd->flags = A4L_SUBD_UNUSED;
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_CS5529_CALIBRATION_SUBDEV)
++		return ret;
++
++	/* Serial */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: registering serial subdevice...\n");
++
++	subd->flags = A4L_SUBD_SERIAL;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->length = 1;
++	subd->chan_desc->chans[0].flags = 0;
++	subd->chan_desc->chans[0].nb_bits = 8;
++
++	subd->insn_config = ni_serial_insn_config;
++
++	devpriv->serial_interval_ns = 0;
++	devpriv->serial_hw_mode = 0;
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_SERIAL_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: serial subdevice registered\n");
++
++	/* RTSI */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++#if 1 /* TODO: add RTSI subdevice */
++	subd->flags = A4L_SUBD_UNUSED;
++	ni_rtsi_init(dev);
++
++#else /* TODO: add RTSI subdevice */
++	subd->flags = A4L_SUBD_DIO;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->length = 8;
++	subd->chan_desc->chans[0].flags = 0;
++	subd->chan_desc->chans[0].nb_bits = 1;
++
++	subd->insn_bits = ni_rtsi_insn_bits;
++	subd->insn_config = ni_rtsi_insn_config;
++	ni_rtsi_init(dev);
++
++#endif /* TODO: add RTSI subdevice */
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_RTSI_SUBDEV)
++		return ret;
++
++	if (boardtype.reg_type & ni_reg_m_series_mask) {
++		counter_variant = ni_gpct_variant_m_series;
++	} else {
++		counter_variant = ni_gpct_variant_e_series;
++	}
++	devpriv->counter_dev =
++		a4l_ni_gpct_device_construct(dev,
++					     &ni_gpct_write_register,
++					     &ni_gpct_read_register,
++					     counter_variant, NUM_GPCT);
++
++	/* General purpose counters */
++	for (j = 0; j < NUM_GPCT; ++j) {
++		struct ni_gpct *counter;
++
++		subd = a4l_alloc_subd(sizeof(struct ni_gpct), NULL);
++		if(subd == NULL)
++			return -ENOMEM;
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: registering GPCT[%d] subdevice...\n", j);
++
++		subd->flags = A4L_SUBD_COUNTER;
++
++		subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++					  sizeof(struct a4l_channel), GFP_KERNEL);
++		subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++		subd->chan_desc->length = 3;
++		subd->chan_desc->chans[0].flags = 0;
++
++		if (boardtype.reg_type & ni_reg_m_series_mask)
++			subd->chan_desc->chans[0].nb_bits = 32;
++		else
++			subd->chan_desc->chans[0].nb_bits = 24;
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: GPCT[%d]: %lu bits\n",
++			j, subd->chan_desc->chans[0].nb_bits);
++
++		subd->insn_read = ni_gpct_insn_read;
++		subd->insn_write = ni_gpct_insn_write;
++		subd->insn_config = ni_gpct_insn_config;
++
++#if (defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE) || \
++     defined(CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE_MODULE))
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: GPCT[%d]: command feature available\n", j);
++		subd->flags |= A4L_SUBD_CMD;
++		subd->cmd_mask = &a4l_ni_tio_cmd_mask;
++		subd->do_cmd = ni_gpct_cmd;
++		subd->do_cmdtest = ni_gpct_cmdtest;
++		subd->cancel = ni_gpct_cancel;
++#endif /* CONFIG_XENO_DRIVERS_ANALOGY_NI_MITE */
++
++		counter = (struct ni_gpct *)subd->priv;
++		rtdm_lock_init(&counter->lock);
++		counter->chip_index = 0;
++		counter->counter_index = j;
++		counter->counter_dev = devpriv->counter_dev;
++		devpriv->counter_dev->counters[j] = counter;
++
++		a4l_ni_tio_init_counter(counter);
++
++		ret = a4l_add_subd(dev, subd);
++		if(ret != NI_GPCT_SUBDEV(j))
++			return ret;
++
++		a4l_dbg(1, drv_dbg, dev,
++			"mio_common: GCPT[%d] subdevice registered\n", j);
++	}
++
++	/* Frequency output */
++	subd = a4l_alloc_subd(0, NULL);
++	if(subd == NULL)
++		return -ENOMEM;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: registering counter subdevice...\n");
++
++	subd->flags = A4L_SUBD_COUNTER;
++
++	subd->chan_desc = kmalloc(sizeof(struct a4l_channels_desc) +
++				  sizeof(struct a4l_channel), GFP_KERNEL);
++	subd->chan_desc->mode = A4L_CHAN_GLOBAL_CHANDESC;
++	subd->chan_desc->length = 1;
++	subd->chan_desc->chans[0].flags = 0;
++	subd->chan_desc->chans[0].nb_bits = 4;
++
++	subd->insn_read = ni_freq_out_insn_read;
++	subd->insn_write = ni_freq_out_insn_write;
++	subd->insn_config = ni_freq_out_insn_config;
++
++	ret = a4l_add_subd(dev, subd);
++	if(ret != NI_FREQ_OUT_SUBDEV)
++		return ret;
++
++	a4l_dbg(1, drv_dbg, dev,
++		"mio_common: counter subdevice registered\n");
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: initializing AI...\n");
++
++	/* ai configuration */
++	ni_ai_reset(a4l_get_subd(dev, NI_AI_SUBDEV));
++	if ((boardtype.reg_type & ni_reg_6xxx_mask) == 0) {
++		// BEAM is this needed for PCI-6143 ??
++		devpriv->clock_and_fout =
++			Slow_Internal_Time_Divide_By_2 |
++			Slow_Internal_Timebase |
++			Clock_To_Board_Divide_By_2 |
++			Clock_To_Board |
++			AI_Output_Divide_By_2 | AO_Output_Divide_By_2;
++	} else {
++		devpriv->clock_and_fout =
++			Slow_Internal_Time_Divide_By_2 |
++			Slow_Internal_Timebase |
++			Clock_To_Board_Divide_By_2 | Clock_To_Board;
++	}
++	devpriv->stc_writew(dev, devpriv->clock_and_fout,
++			    Clock_and_FOUT_Register);
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: AI initialization OK\n");
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: initializing A0...\n");
++
++	/* analog output configuration */
++	ni_ao_reset(a4l_get_subd(dev, NI_AO_SUBDEV));
++
++	if (a4l_get_irq(dev) != A4L_IRQ_UNUSED) {
++		devpriv->stc_writew(dev,
++				    (devpriv->irq_polarity ? Interrupt_Output_Polarity : 0) |
++				    (Interrupt_Output_On_3_Pins & 0) | Interrupt_A_Enable |
++				    Interrupt_B_Enable |
++				    Interrupt_A_Output_Select(devpriv->irq_pin) |
++				    Interrupt_B_Output_Select(devpriv->irq_pin),
++				    Interrupt_Control_Register);
++	}
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: A0 initialization OK\n");
++
++	/* DMA setup */
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: DMA setup\n");
++
++	ni_writeb(devpriv->ai_ao_select_reg, AI_AO_Select);
++	ni_writeb(devpriv->g0_g1_select_reg, G0_G1_Select);
++
++	if (boardtype.reg_type & ni_reg_6xxx_mask) {
++		ni_writeb(0, Magic_611x);
++	} else if (boardtype.reg_type & ni_reg_m_series_mask) {
++		int channel;
++		for (channel = 0; channel < boardtype.n_aochan; ++channel) {
++			ni_writeb(0xf, M_Offset_AO_Waveform_Order(channel));
++			ni_writeb(0x0,
++				  M_Offset_AO_Reference_Attenuation(channel));
++		}
++		ni_writeb(0x0, M_Offset_AO_Calibration);
++	}
++
++	a4l_dbg(1, drv_dbg, dev, "mio_common: attach procedure complete\n");
++
++	return 0;
++}
++
++MODULE_DESCRIPTION("Analogy support for NI DAQ-STC based boards");
++MODULE_LICENSE("GPL");
++
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai);
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_limited);
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_limited14);
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_bipolar4);
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ai_611x);
++EXPORT_SYMBOL_GPL(a4l_range_ni_M_ai_622x);
++EXPORT_SYMBOL_GPL(a4l_range_ni_M_ai_628x);
++EXPORT_SYMBOL_GPL(a4l_range_ni_S_ai_6143);
++EXPORT_SYMBOL_GPL(a4l_range_ni_E_ao_ext);
++EXPORT_SYMBOL_GPL(a4l_ni_E_interrupt);
++EXPORT_SYMBOL_GPL(a4l_ni_E_init);
+--- linux/drivers/xenomai/analogy/national_instruments/pcimio.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/pcimio.c	2021-04-07 16:01:27.801633346 +0800
+@@ -0,0 +1,1603 @@
++/*
++ * Hardware driver for NI PCI-MIO E series cards
++ *
++ * Copyright (C) 1997-8 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Description: National Instruments PCI-MIO-E series and M series
++ * (all boards)
++ *
++ * Author: ds, John Hallen, Frank Mori Hess, Rolf Mueller, Herbert Peremans,
++ * Herman Bruyninckx, Terry Barnaby
++ * Status: works
++ * Devices: [National Instruments] PCI-MIO-16XE-50 (ni_pcimio),
++ * PCI-MIO-16XE-10, PXI-6030E, PCI-MIO-16E-1, PCI-MIO-16E-4, PCI-6014,
++ * PCI-6040E,PXI-6040E, PCI-6030E, PCI-6031E, PCI-6032E, PCI-6033E,
++ * PCI-6071E, PCI-6023E, PCI-6024E, PCI-6025E, PXI-6025E, PCI-6034E,
++ * PCI-6035E, PCI-6052E, PCI-6110, PCI-6111, PCI-6220, PCI-6221,
++ * PCI-6224, PCI-6225, PCI-6229, PCI-6250, PCI-6251, PCIe-6251,
++ * PCI-6254, PCI-6259, PCIe-6259, PCI-6280, PCI-6281, PXI-6281,
++ * PCI-6284, PCI-6289, PCI-6711, PXI-6711, PCI-6713, PXI-6713,
++ * PXI-6071E, PCI-6070E, PXI-6070E, PXI-6052E, PCI-6036E, PCI-6731,
++ * PCI-6733, PXI-6733, PCI-6143, PXI-6143
++ *
++ * These boards are almost identical to the AT-MIO E series, except that
++ * they use the PCI bus instead of ISA (i.e., AT).  See the notes for
++ * the ni_atmio.o driver for additional information about these boards.
++ *
++ * By default, the driver uses DMA to transfer analog input data to
++ * memory.  When DMA is enabled, not all triggering features are
++ * supported.
++ *
++ * Note that the PCI-6143 is a simultaneous sampling device with 8
++ * convertors. With this board all of the convertors perform one
++ * simultaneous sample during a scan interval. The period for a scan
++ * is used for the convert time in an Analgoy cmd. The convert trigger
++ * source is normally set to TRIG_NOW by default.
++ *
++ * The RTSI trigger bus is supported on these cards on subdevice
++ * 10. See the Analogy library documentation for details.
++ *
++ * References:
++ * 341079b.pdf  PCI E Series Register-Level Programmer Manual
++ * 340934b.pdf  DAQ-STC reference manual
++ * 322080b.pdf  6711/6713/6715 User Manual
++ * 320945c.pdf  PCI E Series User Manual
++ * 322138a.pdf  PCI-6052E and DAQPad-6052E User Manual
++ *
++ * ISSUES:
++ * - When DMA is enabled, XXX_EV_CONVERT does not work correctly.
++ * - Calibration is not fully implemented
++ * - SCXI is probably broken for m-series boards
++ * - Digital I/O may not work on 673x.
++ * - Information (number of channels, bits, etc.) for some devices may
++ *   be incorrect.  Please check this and submit a bug if there are
++ *   problems for your device.
++ * - Need to deal with external reference for DAC, and other DAC
++ *   properties in board properties
++ * - Deal with at-mio-16de-10 revision D to N changes, etc.
++ * - Need to add other CALDAC type
++ * - Need to slow down DAC loading.  I don't trust NI's claim that two
++ *   writes to the PCI bus slows IO enough.  I would prefer to use
++ *   a4l_udelay().  Timing specs: (clock)
++ *     AD8522   30ns
++ *     DAC8043  120ns
++ *     DAC8800  60ns
++ *     MB88341   ?
++ *
++ */
++
++#include <linux/module.h>
++#include <rtdm/analogy/device.h>
++
++#include "../intel/8255.h"
++#include "ni_stc.h"
++#include "ni_mio.h"
++#include "mite.h"
++
++#define PCIMIO_IRQ_POLARITY 1
++
++/* The following two tables must be in the same order */
++static struct pci_device_id ni_pci_table[] __maybe_unused = {
++	{ PCI_VENDOR_ID_NATINST, 0x0162, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1170, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1180, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1190, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x11b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x11c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x11d0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1270, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1330, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1350, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x14e0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x14f0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1580, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x15b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x1870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x18b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x18c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2890, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x28c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2a60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2a70, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2a80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2ab0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2b80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2b90, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2c80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x2ca0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70aa, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70ab, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70ac, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70af, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70b4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70b6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70b7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70bc, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70bd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70bf, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x70f2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x710d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x716c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x717f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x71bc, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_NATINST, 0x717d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
++	{ 0 }
++};
++MODULE_DEVICE_TABLE(pci, ni_pci_table);
++
++/* These are not all the possible ao ranges for 628x boards.
++ They can do OFFSET +- REFERENCE where OFFSET can be
++ 0V, 5V, APFI<0,1>, or AO<0...3> and RANGE can
++ be 10V, 5V, 2V, 1V, APFI<0,1>, AO<0...3>.  That's
++ 63 different possibilities.  An AO channel
++ can not act as it's own OFFSET or REFERENCE.
++*/
++
++#if 0
++static struct a4l_rngtab rng_ni_M_628x_ao = { 8, {
++	RANGE(-10, 10),
++	RANGE(-5, 5),
++	RANGE(-2, 2),
++	RANGE(-1, 1),
++	RANGE(-5, 15),
++	RANGE(0, 10),
++	RANGE(3, 7),
++	RANGE(4, 6),
++	RANGE_ext(-1, 1)
++}};
++static struct a4l_rngdesc range_ni_M_628x_ao =
++	RNG_GLOBAL(rng_ni_M_628x_ao);
++#endif
++
++static struct a4l_rngtab rng_ni_M_625x_ao = { 3, {
++	RANGE(-10, 10),
++	RANGE(-5, 5),
++	RANGE_ext(-1, 1)
++}};
++static struct a4l_rngdesc range_ni_M_625x_ao =
++	RNG_GLOBAL(rng_ni_M_625x_ao);
++
++static struct a4l_rngtab rng_ni_M_622x_ao = { 1, {
++	RANGE(-10, 10),
++}};
++static struct a4l_rngdesc range_ni_M_622x_ao =
++	RNG_GLOBAL(rng_ni_M_622x_ao);
++
++static ni_board ni_boards[]={
++	{       device_id:      0x0162, // NI also says 0x1620.  typo?
++		name:           "pci-mio-16xe-50",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  2048,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_8,
++		ai_speed:	50000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	50000,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043},
++		has_8255:       0,
++	},
++	{       device_id:      0x1170,
++		name:           "pci-mio-16xe-10", // aka pci-6030E
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	10000,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++		has_8255:       0,
++	},
++	{	device_id:      0x28c0,
++		name:           "pci-6014",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_4,
++		ai_speed:       5000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:       0,
++	},
++	{       device_id:      0x11d0,
++		name:           "pxi-6030e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	10000,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++		has_8255:       0,
++	},
++
++	{       device_id:      0x1180,
++		name:           "pci-mio-16e-1",	/* aka pci-6070e */
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_16,
++		ai_speed:	800,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {mb88341},
++		has_8255:       0,
++	},
++	{       device_id:      0x1190,
++		name:           "pci-mio-16e-4", /* aka pci-6040e */
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_16,
++		/* Note: there have been reported problems with full speed
++		 * on this board */
++		ai_speed:	2000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  512,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug}, // doc says mb88341
++		has_8255:       0,
++	},
++	{       device_id:      0x11c0,
++		name:           "pxi-6040e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_16,
++		ai_speed:	2000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  512,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {mb88341},
++		has_8255:       0,
++	},
++
++	{       device_id:      0x1330,
++		name:           "pci-6031e",
++		n_adchan:       64,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	10000,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++		has_8255:       0,
++	},
++	{       device_id:      0x1270,
++		name:           "pci-6032e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++		has_8255:       0,
++	},
++	{       device_id:      0x1340,
++		name:           "pci-6033e",
++		n_adchan:       64,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++		has_8255:       0,
++	},
++	{       device_id:      0x1350,
++		name:           "pci-6071e",
++		n_adchan:       64,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_16,
++		ai_speed:	800,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:       0,
++	},
++	{       device_id:      0x2a60,
++		name:           "pci-6023e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug}, /* manual is wrong */
++		has_8255:	0,
++	},
++	{       device_id:      0x2a70,
++		name:           "pci-6024e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug}, /* manual is wrong */
++		has_8255:	0,
++	},
++	{       device_id:      0x2a80,
++		name:           "pci-6025e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug}, /* manual is wrong */
++		has_8255:	1,
++	},
++	{       device_id:      0x2ab0,
++		name:           "pxi-6025e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug}, /* manual is wrong */
++		has_8255:	1,
++	},
++
++	{       device_id:      0x2ca0,
++		name:           "pci-6034e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:	0,
++	},
++	{       device_id:      0x2c80,
++		name:           "pci-6035e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:	0,
++	},
++	{       device_id:      0x18b0,
++		name:           "pci-6052e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_16,
++		ai_speed:	3000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_unipolar:    1,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_speed:	3000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug,ad8804_debug,ad8522}, /* manual is wrong */
++	},
++	{       device_id:      0x14e0,
++		name:           "pci-6110",
++		n_adchan:       4,
++		adbits:         12,
++		ai_fifo_depth:  8192,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_611x,
++		ai_speed:	200,
++		n_aochan:       2,
++		aobits:         16,
++		reg_type:	ni_reg_611x,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_fifo_depth:  2048,
++		ao_speed:	250,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804,ad8804},
++	},
++	{       device_id:      0x14f0,
++		name:           "pci-6111",
++		n_adchan:       2,
++		adbits:         12,
++		ai_fifo_depth:  8192,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_611x,
++		ai_speed:	200,
++		n_aochan:       2,
++		aobits:         16,
++		reg_type:	ni_reg_611x,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_fifo_depth:  2048,
++		ao_speed:	250,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804,ad8804},
++	},
++#if 0 /* Need device IDs */
++	/* The 6115 boards probably need their own driver */
++	{       device_id:      0x2ed0,
++		name:           "pci-6115",
++		n_adchan:       4,
++		adbits:         12,
++		ai_fifo_depth:  8192,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_611x,
++		ai_speed:	100,
++		n_aochan:       2,
++		aobits:         16,
++		ao_671x:	1,
++		ao_unipolar:    0,
++		ao_fifo_depth:  2048,
++		ao_speed:	250,
++		.num_p0_dio_channels = 8,
++		reg_611x:	1,
++		caldac:         {ad8804_debug,ad8804_debug,ad8804_debug},/* XXX */
++	},
++#endif
++#if 0 /* Need device IDs */
++	{       device_id:      0x0000,
++		name:           "pxi-6115",
++		n_adchan:       4,
++		adbits:         12,
++		ai_fifo_depth:  8192,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_611x,
++		ai_speed:	100,
++		n_aochan:       2,
++		aobits:         16,
++		ao_671x:	1,
++		ao_unipolar:    0,
++		ao_fifo_depth:  2048,
++		ao_speed:	250,
++		reg_611x:	1,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug,ad8804_debug,ad8804_debug},/* XXX */
++	},
++#endif
++	{       device_id:      0x1880,
++		name:           "pci-6711",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	4,
++		aobits:         12,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384, /* data sheet says 8192, but fifo really holds 16384 samples */
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6711,
++		caldac:         {ad8804_debug},
++	},
++	{       device_id:      0x2b90,
++		name:           "pxi-6711",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	4,
++		aobits:         12,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6711,
++		caldac:         {ad8804_debug},
++	},
++	{       device_id:      0x1870,
++		name:           "pci-6713",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	8,
++		aobits:         12,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6713,
++		caldac:         {ad8804_debug,ad8804_debug},
++	},
++	{       device_id:      0x2b80,
++		name:           "pxi-6713",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	8,
++		aobits:         12,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6713,
++		caldac:         {ad8804_debug,ad8804_debug},
++	},
++	{	device_id:	0x2430,
++		name:           "pci-6731",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	4,
++		aobits:         16,
++		ao_unipolar:    0,
++		ao_fifo_depth:  8192,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6711,
++		caldac:         {ad8804_debug},
++	},
++#if 0	/* Need device IDs */
++	{       device_id:      0x0,
++		name:           "pxi-6731",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	4,
++		aobits:         16,
++		ao_unipolar:    0,
++		ao_fifo_depth:  8192,
++		.ao_range_table = &a4l_range_bipolar10,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6711,
++		caldac:         {ad8804_debug},
++	},
++#endif
++	{       device_id:      0x2410,
++		name:           "pci-6733",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	8,
++		aobits:         16,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6713,
++		caldac:         {ad8804_debug,ad8804_debug},
++	},
++	{       device_id:      0x2420,
++		name:           "pxi-6733",
++		n_adchan:       0, /* no analog input */
++		n_aochan:	8,
++		aobits:         16,
++		ao_unipolar:    0,
++		ao_fifo_depth:  16384,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_6713,
++		caldac:         {ad8804_debug,ad8804_debug},
++	},
++	{	device_id:      0x15b0,
++		name:           "pxi-6071e",
++		n_adchan:       64,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_16,
++		ai_speed:       800,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:       0,
++	},
++	{	device_id:      0x11b0,
++		name:           "pxi-6070e",
++		n_adchan:       16,
++		adbits:         12,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_16,
++		ai_speed:       800,
++		n_aochan:       2,
++		aobits:         12,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	1000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:       0,
++	},
++	{	device_id:      0x18c0,
++		name:           "pxi-6052e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_16,
++		ai_speed:	3000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_unipolar:    1,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_speed:	3000,
++		.num_p0_dio_channels = 8,
++		caldac:         {mb88341,mb88341,ad8522},
++	},
++	{	device_id:      0x1580,
++		name:           "pxi-6031e",
++		n_adchan:       64,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_14,
++		ai_speed:	10000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  2048,
++		.ao_range_table = &a4l_range_ni_E_ao_ext,
++		ao_unipolar:    1,
++		ao_speed:	10000,
++		.num_p0_dio_channels = 8,
++		caldac:         {dac8800,dac8043,ad8522},
++	},
++	{	device_id:      0x2890,
++		name:           "pci-6036e",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,
++		alwaysdither:   1,
++		gainlkup:       ai_gain_4,
++		ai_speed:	5000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  0,
++		.ao_range_table = &a4l_range_bipolar10,
++		ao_unipolar:    0,
++		ao_speed:	100000,
++		.num_p0_dio_channels = 8,
++		caldac:         {ad8804_debug},
++		has_8255:	0,
++	},
++	{	device_id:      0x70b0,
++		name:           "pci-6220",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  512,	//FIXME: guess
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		.num_p0_dio_channels = 8,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70af,
++		name:           "pci-6221",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &a4l_range_bipolar10,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		ao_speed:	1200,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x71bc,
++		name:           "pci-6221_37pin",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &a4l_range_bipolar10,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		ao_speed:	1200,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70f2,
++		name:           "pci-6224",
++		n_adchan:       32,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x716c,
++		name:           "pci-6225",
++		n_adchan:       80,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_622x_ao,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		ao_speed:	1200,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70aa,
++		name:           "pci-6229",
++		n_adchan:       32,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		gainlkup:       ai_gain_622x,
++		ai_speed:	4000,
++		n_aochan:       4,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_622x_ao,
++		reg_type:	ni_reg_622x,
++		ao_unipolar:    0,
++		ao_speed:	1200,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70b4,
++		name:           "pci-6250",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70b8,
++		name:           "pci-6251",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_625x_ao,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		ao_speed:	357,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x717d,
++		name:           "pcie-6251",
++		n_adchan:       16,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_625x_ao,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		ao_speed:	357,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70b7,
++		name:           "pci-6254",
++		n_adchan:       32,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70ab,
++		name:           "pci-6259",
++		n_adchan:       32,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       4,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_625x_ao,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		ao_speed:	357,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x717f,
++		name:           "pcie-6259",
++		n_adchan:       32,
++		adbits:         16,
++		ai_fifo_depth:  4095,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	800,
++		n_aochan:       4,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_625x_ao,
++		reg_type:	ni_reg_625x,
++		ao_unipolar:    0,
++		ao_speed:	357,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++#if 0 /* TODO: fix data size */
++	{	device_id:      0x70b6,
++		name:           "pci-6280",
++		n_adchan:       16,
++		adbits:         18,
++		ai_fifo_depth:  2047,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	1600,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  8191,
++		reg_type:	ni_reg_628x,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70bd,
++		name:           "pci-6281",
++		n_adchan:       16,
++		adbits:         18,
++		ai_fifo_depth:  2047,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	1600,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_628x_ao,
++		reg_type:	ni_reg_628x,
++		ao_unipolar:    1,
++		ao_speed:	357,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70bf,
++		name:           "pxi-6281",
++		n_adchan:       16,
++		adbits:         18,
++		ai_fifo_depth:  2047,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	1600,
++		n_aochan:       2,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_628x_ao,
++		reg_type:	ni_reg_628x,
++		ao_unipolar:    1,
++		ao_speed:	357,
++		.num_p0_dio_channels = 8,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70bc,
++		name:           "pci-6284",
++		n_adchan:       32,
++		adbits:         18,
++		ai_fifo_depth:  2047,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	1600,
++		n_aochan:       0,
++		aobits:         0,
++		ao_fifo_depth:  0,
++		reg_type:	ni_reg_628x,
++		ao_unipolar:    0,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++	{	device_id:      0x70ac,
++		name:           "pci-6289",
++		n_adchan:       32,
++		adbits:         18,
++		ai_fifo_depth:  2047,
++		.gainlkup = ai_gain_628x,
++		ai_speed:	1600,
++		n_aochan:       4,
++		aobits:         16,
++		ao_fifo_depth:  8191,
++		.ao_range_table = &range_ni_M_628x_ao,
++		reg_type:	ni_reg_628x,
++		ao_unipolar:    1,
++		ao_speed:	357,
++		.num_p0_dio_channels = 32,
++		.caldac = {caldac_none},
++		has_8255:	0,
++	},
++#endif /* TODO: fix data size */
++	{	device_id:      0x70C0,
++		name:           "pci-6143",
++		n_adchan:       8,
++		adbits:         16,
++		ai_fifo_depth:  1024,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_6143,
++		ai_speed:	4000,
++		n_aochan:       0,
++		aobits:         0,
++		reg_type:	ni_reg_6143,
++		ao_unipolar:    0,
++		ao_fifo_depth:  0,
++		.num_p0_dio_channels = 8,
++		.caldac = {ad8804_debug,ad8804_debug},
++	},
++	{	device_id:      0x710D,
++		name:           "pxi-6143",
++		n_adchan:       8,
++		adbits:         16,
++		ai_fifo_depth:  1024,
++		alwaysdither:   0,
++		gainlkup:       ai_gain_6143,
++		ai_speed:	4000,
++		n_aochan:       0,
++		aobits:         0,
++		reg_type:	ni_reg_6143,
++		ao_unipolar:    0,
++		ao_fifo_depth:  0,
++		.num_p0_dio_channels = 8,
++		.caldac = {ad8804_debug,ad8804_debug},
++	},
++};
++#define n_pcimio_boards ((sizeof(ni_boards)/sizeof(ni_boards[0])))
++
++/* How we access STC registers */
++
++/* We automatically take advantage of STC registers that can be
++ * read/written directly in the I/O space of the board.  Most
++ * PCIMIO devices map the low 8 STC registers to iobase+addr*2.
++ * The 611x devices map the write registers to iobase+addr*2, and
++ * the read registers to iobase+(addr-1)*2. */
++/* However, the 611x boards still aren't working, so I'm disabling
++ * non-windowed STC access temporarily */
++
++static void e_series_win_out(struct a4l_device *dev, uint16_t data, int reg)
++{
++	unsigned long flags;
++
++	rtdm_lock_get_irqsave(&devpriv->window_lock, flags);
++	ni_writew(reg, Window_Address);
++	ni_writew(data, Window_Data);
++	rtdm_lock_put_irqrestore(&devpriv->window_lock, flags);
++}
++
++static uint16_t e_series_win_in(struct a4l_device *dev, int reg)
++{
++	unsigned long flags;
++	uint16_t ret;
++
++	rtdm_lock_get_irqsave(&devpriv->window_lock, flags);
++	ni_writew(reg, Window_Address);
++	ret = ni_readw(Window_Data);
++	rtdm_lock_put_irqrestore(&devpriv->window_lock,flags);
++
++	return ret;
++}
++
++static void m_series_stc_writew(struct a4l_device *dev, uint16_t data, int reg)
++{
++	unsigned offset;
++	switch(reg)
++	{
++	case ADC_FIFO_Clear:
++		offset = M_Offset_AI_FIFO_Clear;
++		break;
++	case AI_Command_1_Register:
++		offset = M_Offset_AI_Command_1;
++		break;
++	case AI_Command_2_Register:
++		offset = M_Offset_AI_Command_2;
++		break;
++	case AI_Mode_1_Register:
++		offset = M_Offset_AI_Mode_1;
++		break;
++	case AI_Mode_2_Register:
++		offset = M_Offset_AI_Mode_2;
++		break;
++	case AI_Mode_3_Register:
++		offset = M_Offset_AI_Mode_3;
++		break;
++	case AI_Output_Control_Register:
++		offset = M_Offset_AI_Output_Control;
++		break;
++	case AI_Personal_Register:
++		offset = M_Offset_AI_Personal;
++		break;
++	case AI_SI2_Load_A_Register:
++		/* This is actually a 32 bit register on m series boards */
++		ni_writel(data, M_Offset_AI_SI2_Load_A);
++		return;
++		break;
++	case AI_SI2_Load_B_Register:
++		/* This is actually a 32 bit register on m series boards */
++		ni_writel(data, M_Offset_AI_SI2_Load_B);
++		return;
++		break;
++	case AI_START_STOP_Select_Register:
++		offset = M_Offset_AI_START_STOP_Select;
++		break;
++	case AI_Trigger_Select_Register:
++		offset = M_Offset_AI_Trigger_Select;
++		break;
++	case Analog_Trigger_Etc_Register:
++		offset = M_Offset_Analog_Trigger_Etc;
++		break;
++	case AO_Command_1_Register:
++		offset = M_Offset_AO_Command_1;
++		break;
++	case AO_Command_2_Register:
++		offset = M_Offset_AO_Command_2;
++		break;
++	case AO_Mode_1_Register:
++		offset = M_Offset_AO_Mode_1;
++		break;
++	case AO_Mode_2_Register:
++		offset = M_Offset_AO_Mode_2;
++		break;
++	case AO_Mode_3_Register:
++		offset = M_Offset_AO_Mode_3;
++		break;
++	case AO_Output_Control_Register:
++		offset = M_Offset_AO_Output_Control;
++		break;
++	case AO_Personal_Register:
++		offset = M_Offset_AO_Personal;
++		break;
++	case AO_Start_Select_Register:
++		offset = M_Offset_AO_Start_Select;
++		break;
++	case AO_Trigger_Select_Register:
++		offset = M_Offset_AO_Trigger_Select;
++		break;
++	case Clock_and_FOUT_Register:
++		offset = M_Offset_Clock_and_FOUT;
++		break;
++	case Configuration_Memory_Clear:
++		offset = M_Offset_Configuration_Memory_Clear;
++		break;
++	case DAC_FIFO_Clear:
++		offset = M_Offset_AO_FIFO_Clear;
++		break;
++	case DIO_Control_Register:
++		rtdm_printk("%s: FIXME: register 0x%x does not map cleanly on to m-series boards.\n", __FUNCTION__, reg);
++		return;
++		break;
++	case G_Autoincrement_Register(0):
++		offset = M_Offset_G0_Autoincrement;
++		break;
++	case G_Autoincrement_Register(1):
++		offset = M_Offset_G1_Autoincrement;
++		break;
++	case G_Command_Register(0):
++		offset = M_Offset_G0_Command;
++		break;
++	case G_Command_Register(1):
++		offset = M_Offset_G1_Command;
++		break;
++	case G_Input_Select_Register(0):
++		offset = M_Offset_G0_Input_Select;
++		break;
++	case G_Input_Select_Register(1):
++		offset = M_Offset_G1_Input_Select;
++		break;
++	case G_Mode_Register(0):
++		offset = M_Offset_G0_Mode;
++		break;
++	case G_Mode_Register(1):
++		offset = M_Offset_G1_Mode;
++		break;
++	case Interrupt_A_Ack_Register:
++		offset = M_Offset_Interrupt_A_Ack;
++		break;
++	case Interrupt_A_Enable_Register:
++		offset = M_Offset_Interrupt_A_Enable;
++		break;
++	case Interrupt_B_Ack_Register:
++		offset = M_Offset_Interrupt_B_Ack;
++		break;
++	case Interrupt_B_Enable_Register:
++		offset = M_Offset_Interrupt_B_Enable;
++		break;
++	case Interrupt_Control_Register:
++		offset = M_Offset_Interrupt_Control;
++		break;
++	case IO_Bidirection_Pin_Register:
++		offset = M_Offset_IO_Bidirection_Pin;
++		break;
++	case Joint_Reset_Register:
++		offset = M_Offset_Joint_Reset;
++		break;
++	case RTSI_Trig_A_Output_Register:
++		offset = M_Offset_RTSI_Trig_A_Output;
++		break;
++	case RTSI_Trig_B_Output_Register:
++		offset = M_Offset_RTSI_Trig_B_Output;
++		break;
++	case RTSI_Trig_Direction_Register:
++		offset = M_Offset_RTSI_Trig_Direction;
++		break;
++		/* FIXME: DIO_Output_Register (16 bit reg) is replaced
++		by M_Offset_Static_Digital_Output (32 bit) and
++		M_Offset_SCXI_Serial_Data_Out (8 bit) */
++	default:
++		rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n",
++			    __FUNCTION__, reg);
++		BUG();
++		return;
++	}
++	ni_writew(data, offset);
++}
++
++static uint16_t m_series_stc_readw(struct a4l_device *dev, int reg)
++{
++	unsigned offset;
++	switch(reg)
++	{
++	case AI_Status_1_Register:
++		offset = M_Offset_AI_Status_1;
++		break;
++	case AO_Status_1_Register:
++		offset = M_Offset_AO_Status_1;
++		break;
++	case AO_Status_2_Register:
++		offset = M_Offset_AO_Status_2;
++		break;
++	case DIO_Serial_Input_Register:
++		return ni_readb(M_Offset_SCXI_Serial_Data_In);
++		break;
++	case Joint_Status_1_Register:
++		offset = M_Offset_Joint_Status_1;
++		break;
++	case Joint_Status_2_Register:
++		offset = M_Offset_Joint_Status_2;
++		break;
++	case G_Status_Register:
++		offset = M_Offset_G01_Status;
++		break;
++	default:
++		rtdm_printk("%s: bug! "
++			    "unhandled register=0x%x in switch.\n",
++			    __FUNCTION__, reg);
++		BUG();
++		return 0;
++		break;
++	}
++	return ni_readw(offset);
++}
++
++static void m_series_stc_writel(struct a4l_device *dev, uint32_t data, int reg)
++{
++	unsigned offset;
++
++	switch(reg)
++	{
++	case AI_SC_Load_A_Registers:
++		offset = M_Offset_AI_SC_Load_A;
++		break;
++	case AI_SI_Load_A_Registers:
++		offset = M_Offset_AI_SI_Load_A;
++		break;
++	case AO_BC_Load_A_Register:
++		offset = M_Offset_AO_BC_Load_A;
++		break;
++	case AO_UC_Load_A_Register:
++		offset = M_Offset_AO_UC_Load_A;
++		break;
++	case AO_UI_Load_A_Register:
++		offset = M_Offset_AO_UI_Load_A;
++		break;
++	case G_Load_A_Register(0):
++		offset = M_Offset_G0_Load_A;
++		break;
++	case G_Load_A_Register(1):
++		offset = M_Offset_G1_Load_A;
++		break;
++	case G_Load_B_Register(0):
++		offset = M_Offset_G0_Load_B;
++		break;
++	case G_Load_B_Register(1):
++		offset = M_Offset_G1_Load_B;
++		break;
++	default:
++		rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n",
++			    __FUNCTION__, reg);
++		BUG();
++		return;
++	}
++	ni_writel(data, offset);
++}
++
++static uint32_t m_series_stc_readl(struct a4l_device *dev, int reg)
++{
++	unsigned offset;
++	switch(reg)
++	{
++	case G_HW_Save_Register(0):
++		offset = M_Offset_G0_HW_Save;
++		break;
++	case G_HW_Save_Register(1):
++		offset = M_Offset_G1_HW_Save;
++		break;
++	case G_Save_Register(0):
++		offset = M_Offset_G0_Save;
++		break;
++	case G_Save_Register(1):
++		offset = M_Offset_G1_Save;
++		break;
++	default:
++		rtdm_printk("%s: bug! unhandled register=0x%x in switch.\n",
++			    __FUNCTION__, reg);
++		BUG();
++		return 0;
++	}
++	return ni_readl(offset);
++}
++
++static void win_out2(struct a4l_device *dev, uint32_t data, int reg)
++{
++	devpriv->stc_writew(dev, data >> 16, reg);
++	devpriv->stc_writew(dev, data & 0xffff, reg + 1);
++}
++
++static uint32_t win_in2(struct a4l_device *dev, int reg)
++{
++	uint32_t bits;
++	bits = devpriv->stc_readw(dev, reg) << 16;
++	bits |= devpriv->stc_readw(dev, reg + 1);
++	return bits;
++}
++
++static void m_series_init_eeprom_buffer(struct a4l_device *dev)
++{
++	static const int Start_Cal_EEPROM = 0x400;
++	static const unsigned window_size = 10;
++	unsigned old_iodwbsr_bits;
++	unsigned old_iodwbsr1_bits;
++	unsigned old_iodwcr1_bits;
++	int i;
++
++	old_iodwbsr_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWBSR);
++	old_iodwbsr1_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWBSR_1);
++	old_iodwcr1_bits = readl(devpriv->mite->mite_io_addr + MITE_IODWCR_1);
++	writel(0x0, devpriv->mite->mite_io_addr + MITE_IODWBSR);
++	writel(((0x80 | window_size) | devpriv->mite->daq_phys_addr),
++	       devpriv->mite->mite_io_addr + MITE_IODWBSR_1);
++	writel(0x0, devpriv->mite->mite_io_addr + MITE_IODWCR_1);
++	writel(0xf, devpriv->mite->mite_io_addr + 0x30);
++
++	for(i = 0; i < M_SERIES_EEPROM_SIZE; ++i)
++	{
++		devpriv->eeprom_buffer[i] = ni_readb(Start_Cal_EEPROM + i);
++	}
++
++	writel(old_iodwbsr1_bits, devpriv->mite->mite_io_addr + MITE_IODWBSR_1);
++	writel(old_iodwbsr_bits, devpriv->mite->mite_io_addr + MITE_IODWBSR);
++	writel(old_iodwcr1_bits, devpriv->mite->mite_io_addr + MITE_IODWCR_1);
++	writel(0x0, devpriv->mite->mite_io_addr + 0x30);
++}
++
++static void init_6143(struct a4l_device *dev)
++{
++	/* Disable interrupts */
++	devpriv->stc_writew(dev, 0, Interrupt_Control_Register);
++
++	/* Initialise 6143 AI specific bits */
++
++	/* Set G0,G1 DMA mode to E series version */
++	ni_writeb(0x00, Magic_6143);
++	/* Set EOCMode, ADCMode and pipelinedelay */
++	ni_writeb(0x80, PipelineDelay_6143);
++	/* Set EOC Delay */
++	ni_writeb(0x00, EOC_Set_6143);
++
++	/* Set the FIFO half full level */
++	ni_writel(boardtype.ai_fifo_depth / 2, AIFIFO_Flag_6143);
++
++	/* Strobe Relay disable bit */
++	devpriv->ai_calib_source_enabled = 0;
++	ni_writew(devpriv->ai_calib_source | Calibration_Channel_6143_RelayOff,
++		  Calibration_Channel_6143);
++	ni_writew(devpriv->ai_calib_source, Calibration_Channel_6143);
++}
++
++static int pcimio_attach(struct a4l_device *dev, a4l_lnkdesc_t *arg)
++{
++	int ret, bus, slot, i, irq;
++	struct mite_struct *mite = NULL;
++	struct ni_board_struct *board = NULL;
++
++	if(arg->opts == NULL || arg->opts_size == 0)
++		bus = slot = 0;
++	else {
++		bus = arg->opts_size >= sizeof(unsigned long) ?
++			((unsigned long *)arg->opts)[0] : 0;
++		slot = arg->opts_size >= sizeof(unsigned long) * 2 ?
++			((unsigned long *)arg->opts)[1] : 0;
++	}
++
++	for(i = 0; i < n_pcimio_boards && mite == NULL; i++) {
++		mite = a4l_mite_find_device(bus, slot, ni_boards[i].device_id);
++		board = &ni_boards[i];
++	}
++
++	if(mite == 0)
++		return -ENOENT;
++
++	devpriv->irq_polarity = PCIMIO_IRQ_POLARITY;
++	devpriv->irq_pin = 0;
++
++	devpriv->mite = mite;
++	devpriv->board_ptr = board;
++
++	devpriv->ai_mite_ring = mite_alloc_ring(mite);
++	devpriv->ao_mite_ring = mite_alloc_ring(mite);
++	devpriv->cdo_mite_ring = mite_alloc_ring(mite);
++	devpriv->gpct_mite_ring[0] = mite_alloc_ring(mite);
++	devpriv->gpct_mite_ring[1] = mite_alloc_ring(mite);
++
++	if(devpriv->ai_mite_ring == NULL ||
++	   devpriv->ao_mite_ring == NULL ||
++	   devpriv->cdo_mite_ring == NULL ||
++	   devpriv->gpct_mite_ring[0] == NULL ||
++	   devpriv->gpct_mite_ring[1] == NULL)
++		return -ENOMEM;
++
++	a4l_info(dev, "found %s board\n", boardtype.name);
++
++	if(boardtype.reg_type & ni_reg_m_series_mask)
++	{
++		devpriv->stc_writew = &m_series_stc_writew;
++		devpriv->stc_readw = &m_series_stc_readw;
++		devpriv->stc_writel = &m_series_stc_writel;
++		devpriv->stc_readl = &m_series_stc_readl;
++	}else
++	{
++		devpriv->stc_writew = &e_series_win_out;
++		devpriv->stc_readw = &e_series_win_in;
++		devpriv->stc_writel = &win_out2;
++		devpriv->stc_readl = &win_in2;
++	}
++
++	ret = a4l_mite_setup(devpriv->mite, 0);
++	if(ret < 0)
++	{
++		a4l_err(dev, "pcmio_attach: error setting up mite\n");
++		return ret;
++	}
++
++	if(boardtype.reg_type & ni_reg_m_series_mask)
++		m_series_init_eeprom_buffer(dev);
++	if(boardtype.reg_type == ni_reg_6143)
++		init_6143(dev);
++
++	irq = mite_irq(devpriv->mite);
++
++	if(irq == 0){
++		a4l_warn(dev, "pcimio_attach: unknown irq (bad)\n\n");
++	}else{
++		a4l_info(dev, "found irq %u\n", irq);
++		ret = a4l_request_irq(dev,
++				      irq,
++				      a4l_ni_E_interrupt, RTDM_IRQTYPE_SHARED, dev);
++		if(ret < 0)
++			a4l_err(dev, "pcimio_attach: irq not available\n");
++	}
++
++	ret = a4l_ni_E_init(dev);
++	if(ret < 0)
++		return ret;
++
++	dev->driver->driver_name = devpriv->board_ptr->name;
++
++	return ret;
++}
++
++static int pcimio_detach(struct a4l_device *dev)
++{
++	if(a4l_get_irq(dev)!=A4L_IRQ_UNUSED){
++		a4l_free_irq(dev,a4l_get_irq(dev));
++	}
++
++	if(dev->priv != NULL && devpriv->mite != NULL)
++	{
++		mite_free_ring(devpriv->ai_mite_ring);
++		mite_free_ring(devpriv->ao_mite_ring);
++		mite_free_ring(devpriv->gpct_mite_ring[0]);
++		mite_free_ring(devpriv->gpct_mite_ring[1]);
++		a4l_mite_unsetup(devpriv->mite);
++	}
++
++	dev->driver->driver_name = NULL;
++
++	return 0;
++}
++
++static struct a4l_driver pcimio_drv = {
++	.owner = THIS_MODULE,
++	.board_name = "analogy_ni_pcimio",
++	.driver_name = NULL,
++	.attach = pcimio_attach,
++	.detach = pcimio_detach,
++	.privdata_size = sizeof(ni_private),
++};
++
++static int __init pcimio_init(void)
++{
++	return a4l_register_drv(&pcimio_drv);
++}
++
++static void __exit pcimio_cleanup(void)
++{
++	a4l_unregister_drv(&pcimio_drv);
++}
++
++MODULE_DESCRIPTION("Analogy driver for NI PCI-MIO series cards");
++MODULE_LICENSE("GPL");
++
++module_init(pcimio_init);
++module_exit(pcimio_cleanup);
+--- linux/drivers/xenomai/analogy/national_instruments/ni_stc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/national_instruments/ni_stc.h	2021-04-07 16:01:27.796633353 +0800
+@@ -0,0 +1,1417 @@
++/*
++ * Register descriptions for NI DAQ-STC chip
++ *
++ * Copyright (C) 1998-9 David A. Schleef <ds@schleef.org>
++ *
++ * This code is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * This code is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this code; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * References:
++ * 340934b.pdf  DAQ-STC reference manual
++ *
++ */
++#ifndef __ANALOGY_NI_STC_H__
++#define __ANALOGY_NI_STC_H__
++
++#include "ni_tio.h"
++
++#define _bit15		0x8000
++#define _bit14		0x4000
++#define _bit13		0x2000
++#define _bit12		0x1000
++#define _bit11		0x0800
++#define _bit10		0x0400
++#define _bit9		0x0200
++#define _bit8		0x0100
++#define _bit7		0x0080
++#define _bit6		0x0040
++#define _bit5		0x0020
++#define _bit4		0x0010
++#define _bit3		0x0008
++#define _bit2		0x0004
++#define _bit1		0x0002
++#define _bit0		0x0001
++
++#define NUM_PFI_OUTPUT_SELECT_REGS 6
++
++/* Registers in the National Instruments DAQ-STC chip */
++
++#define Interrupt_A_Ack_Register	2
++#define G0_Gate_Interrupt_Ack			_bit15
++#define G0_TC_Interrupt_Ack			_bit14
++#define AI_Error_Interrupt_Ack			_bit13
++#define AI_STOP_Interrupt_Ack			_bit12
++#define AI_START_Interrupt_Ack			_bit11
++#define AI_START2_Interrupt_Ack			_bit10
++#define AI_START1_Interrupt_Ack			_bit9
++#define AI_SC_TC_Interrupt_Ack			_bit8
++#define AI_SC_TC_Error_Confirm			_bit7
++#define G0_TC_Error_Confirm			_bit6
++#define G0_Gate_Error_Confirm			_bit5
++
++#define AI_Status_1_Register		2
++#define Interrupt_A_St				_bit15
++#define AI_FIFO_Full_St				_bit14
++#define AI_FIFO_Half_Full_St			_bit13
++#define AI_FIFO_Empty_St			_bit12
++#define AI_Overrun_St				_bit11
++#define AI_Overflow_St				_bit10
++#define AI_SC_TC_Error_St			_bit9
++#define AI_START2_St				_bit8
++#define AI_START1_St				_bit7
++#define AI_SC_TC_St				_bit6
++#define AI_START_St				_bit5
++#define AI_STOP_St				_bit4
++#define G0_TC_St				_bit3
++#define G0_Gate_Interrupt_St			_bit2
++#define AI_FIFO_Request_St			_bit1
++#define Pass_Thru_0_Interrupt_St		_bit0
++
++#define AI_Status_2_Register		5
++
++#define Interrupt_B_Ack_Register	3
++#define G1_Gate_Error_Confirm			_bit1
++#define G1_TC_Error_Confirm			_bit2
++#define AO_BC_TC_Trigger_Error_Confirm		_bit3
++#define AO_BC_TC_Error_Confirm			_bit4
++#define AO_UI2_TC_Error_Confrim			_bit5
++#define AO_UI2_TC_Interrupt_Ack			_bit6
++#define AO_UC_TC_Interrupt_Ack			_bit7
++#define AO_BC_TC_Interrupt_Ack			_bit8
++#define AO_START1_Interrupt_Ack			_bit9
++#define AO_UPDATE_Interrupt_Ack			_bit10
++#define AO_START_Interrupt_Ack			_bit11
++#define AO_STOP_Interrupt_Ack			_bit12
++#define AO_Error_Interrupt_Ack			_bit13
++#define G1_TC_Interrupt_Ack			_bit14
++#define G1_Gate_Interrupt_Ack			_bit15
++
++#define AO_Status_1_Register		3
++#define Interrupt_B_St				_bit15
++#define AO_FIFO_Full_St				_bit14
++#define AO_FIFO_Half_Full_St			_bit13
++#define AO_FIFO_Empty_St			_bit12
++#define AO_BC_TC_Error_St			_bit11
++#define AO_START_St				_bit10
++#define AO_Overrun_St				_bit9
++#define AO_START1_St				_bit8
++#define AO_BC_TC_St				_bit7
++#define AO_UC_TC_St				_bit6
++#define AO_UPDATE_St				_bit5
++#define AO_UI2_TC_St				_bit4
++#define G1_TC_St				_bit3
++#define G1_Gate_Interrupt_St			_bit2
++#define AO_FIFO_Request_St			_bit1
++#define Pass_Thru_1_Interrupt_St		_bit0
++
++
++#define AI_Command_2_Register		4
++#define AI_End_On_SC_TC				_bit15
++#define AI_End_On_End_Of_Scan			_bit14
++#define AI_START1_Disable			_bit11
++#define AI_SC_Save_Trace			_bit10
++#define AI_SI_Switch_Load_On_SC_TC		_bit9
++#define AI_SI_Switch_Load_On_STOP		_bit8
++#define AI_SI_Switch_Load_On_TC			_bit7
++#define AI_SC_Switch_Load_On_TC			_bit4
++#define AI_STOP_Pulse				_bit3
++#define AI_START_Pulse				_bit2
++#define AI_START2_Pulse				_bit1
++#define AI_START1_Pulse				_bit0
++
++#define AO_Command_2_Register		5
++#define AO_End_On_BC_TC(x)			(((x) & 0x3) << 14)
++#define AO_Start_Stop_Gate_Enable		_bit13
++#define AO_UC_Save_Trace			_bit12
++#define AO_BC_Gate_Enable			_bit11
++#define AO_BC_Save_Trace			_bit10
++#define AO_UI_Switch_Load_On_BC_TC		_bit9
++#define AO_UI_Switch_Load_On_Stop		_bit8
++#define AO_UI_Switch_Load_On_TC			_bit7
++#define AO_UC_Switch_Load_On_BC_TC		_bit6
++#define AO_UC_Switch_Load_On_TC			_bit5
++#define AO_BC_Switch_Load_On_TC			_bit4
++#define AO_Mute_B				_bit3
++#define AO_Mute_A				_bit2
++#define AO_UPDATE2_Pulse			_bit1
++#define AO_START1_Pulse				_bit0
++
++#define AO_Status_2_Register		6
++
++#define DIO_Parallel_Input_Register	7
++
++#define AI_Command_1_Register		8
++#define AI_Analog_Trigger_Reset			_bit14
++#define AI_Disarm				_bit13
++#define AI_SI2_Arm				_bit12
++#define AI_SI2_Load				_bit11
++#define AI_SI_Arm				_bit10
++#define AI_SI_Load				_bit9
++#define AI_DIV_Arm				_bit8
++#define AI_DIV_Load				_bit7
++#define AI_SC_Arm				_bit6
++#define AI_SC_Load				_bit5
++#define AI_SCAN_IN_PROG_Pulse			_bit4
++#define AI_EXTMUX_CLK_Pulse			_bit3
++#define AI_LOCALMUX_CLK_Pulse			_bit2
++#define AI_SC_TC_Pulse				_bit1
++#define AI_CONVERT_Pulse			_bit0
++
++#define AO_Command_1_Register		9
++#define AO_Analog_Trigger_Reset			_bit15
++#define AO_START_Pulse				_bit14
++#define AO_Disarm				_bit13
++#define AO_UI2_Arm_Disarm			_bit12
++#define AO_UI2_Load				_bit11
++#define AO_UI_Arm				_bit10
++#define AO_UI_Load				_bit9
++#define AO_UC_Arm				_bit8
++#define AO_UC_Load				_bit7
++#define AO_BC_Arm				_bit6
++#define AO_BC_Load				_bit5
++#define AO_DAC1_Update_Mode			_bit4
++#define AO_LDAC1_Source_Select			_bit3
++#define AO_DAC0_Update_Mode			_bit2
++#define AO_LDAC0_Source_Select			_bit1
++#define AO_UPDATE_Pulse				_bit0
++
++
++#define DIO_Output_Register		10
++#define DIO_Parallel_Data_Out(a)                ((a)&0xff)
++#define DIO_Parallel_Data_Mask                  0xff
++#define DIO_SDOUT                               _bit0
++#define DIO_SDIN                                _bit4
++#define DIO_Serial_Data_Out(a)                  (((a)&0xff)<<8)
++#define DIO_Serial_Data_Mask                    0xff00
++
++#define DIO_Control_Register		11
++#define DIO_Software_Serial_Control             _bit11
++#define DIO_HW_Serial_Timebase                  _bit10
++#define DIO_HW_Serial_Enable                    _bit9
++#define DIO_HW_Serial_Start                     _bit8
++#define DIO_Pins_Dir(a)                         ((a)&0xff)
++#define DIO_Pins_Dir_Mask                       0xff
++
++#define AI_Mode_1_Register		12
++#define AI_CONVERT_Source_Select(a)		(((a) & 0x1f) << 11)
++#define AI_SI_Source_select(a)			(((a) & 0x1f) << 6)
++#define AI_CONVERT_Source_Polarity		_bit5
++#define AI_SI_Source_Polarity		_bit4
++#define AI_Start_Stop				_bit3
++#define AI_Mode_1_Reserved			_bit2
++#define AI_Continuous				_bit1
++#define AI_Trigger_Once				_bit0
++
++#define AI_Mode_2_Register		13
++#define AI_SC_Gate_Enable			_bit15
++#define AI_Start_Stop_Gate_Enable		_bit14
++#define AI_Pre_Trigger				_bit13
++#define AI_External_MUX_Present			_bit12
++#define AI_SI2_Initial_Load_Source		_bit9
++#define AI_SI2_Reload_Mode			_bit8
++#define AI_SI_Initial_Load_Source		_bit7
++#define AI_SI_Reload_Mode(a)			(((a) & 0x7)<<4)
++#define AI_SI_Write_Switch			_bit3
++#define AI_SC_Initial_Load_Source		_bit2
++#define AI_SC_Reload_Mode			_bit1
++#define AI_SC_Write_Switch			_bit0
++
++#define AI_SI_Load_A_Registers		14
++#define AI_SI_Load_B_Registers		16
++#define AI_SC_Load_A_Registers		18
++#define AI_SC_Load_B_Registers		20
++#define AI_SI_Save_Registers		64
++#define AI_SC_Save_Registers		66
++
++#define AI_SI2_Load_A_Register		23
++#define AI_SI2_Load_B_Register		25
++
++#define Joint_Status_1_Register         27
++#define DIO_Serial_IO_In_Progress_St            _bit12
++
++#define DIO_Serial_Input_Register       28
++#define Joint_Status_2_Register         29
++#define AO_TMRDACWRs_In_Progress_St		_bit5
++
++#define AO_Mode_1_Register		38
++#define AO_UPDATE_Source_Select(x)		(((x)&0x1f)<<11)
++#define AO_UI_Source_Select(x)			(((x)&0x1f)<<6)
++#define AO_Multiple_Channels			_bit5
++#define AO_UPDATE_Source_Polarity		_bit4
++#define AO_UI_Source_Polarity			_bit3
++#define AO_UC_Switch_Load_Every_TC		_bit2
++#define AO_Continuous				_bit1
++#define AO_Trigger_Once				_bit0
++
++#define AO_Mode_2_Register		39
++#define AO_FIFO_Mode_Mask			( 0x3 << 14 )
++#define AO_FIFO_Mode_HF_to_F			(3<<14)
++#define AO_FIFO_Mode_F				(2<<14)
++#define AO_FIFO_Mode_HF				(1<<14)
++#define AO_FIFO_Mode_E				(0<<14)
++#define AO_FIFO_Retransmit_Enable		_bit13
++#define AO_START1_Disable			_bit12
++#define AO_UC_Initial_Load_Source		_bit11
++#define AO_UC_Write_Switch			_bit10
++#define AO_UI2_Initial_Load_Source		_bit9
++#define AO_UI2_Reload_Mode			_bit8
++#define AO_UI_Initial_Load_Source		_bit7
++#define AO_UI_Reload_Mode(x)			(((x) & 0x7) << 4)
++#define AO_UI_Write_Switch			_bit3
++#define AO_BC_Initial_Load_Source		_bit2
++#define AO_BC_Reload_Mode			_bit1
++#define AO_BC_Write_Switch			_bit0
++
++#define AO_UI_Load_A_Register		40
++#define AO_UI_Load_A_Register_High	40
++#define AO_UI_Load_A_Register_Low	41
++#define AO_UI_Load_B_Register		42
++#define AO_UI_Save_Registers		16
++#define AO_BC_Load_A_Register		44
++#define AO_BC_Load_A_Register_High	44
++#define AO_BC_Load_A_Register_Low	45
++#define AO_BC_Load_B_Register		46
++#define AO_BC_Load_B_Register_High	46
++#define AO_BC_Load_B_Register_Low	47
++#define AO_BC_Save_Registers		18
++#define AO_UC_Load_A_Register		48
++#define AO_UC_Load_A_Register_High	48
++#define AO_UC_Load_A_Register_Low	49
++#define AO_UC_Load_B_Register		50
++#define AO_UC_Save_Registers		20
++
++#define Clock_and_FOUT_Register		56
++#define FOUT_Enable				_bit15
++#define FOUT_Timebase_Select			_bit14
++#define DIO_Serial_Out_Divide_By_2		_bit13
++#define Slow_Internal_Time_Divide_By_2		_bit12
++#define Slow_Internal_Timebase			_bit11
++#define G_Source_Divide_By_2			_bit10
++#define Clock_To_Board_Divide_By_2		_bit9
++#define Clock_To_Board				_bit8
++#define AI_Output_Divide_By_2			_bit7
++#define AI_Source_Divide_By_2			_bit6
++#define AO_Output_Divide_By_2			_bit5
++#define AO_Source_Divide_By_2			_bit4
++#define FOUT_Divider_mask			0xf
++#define FOUT_Divider(x)				(((x) & 0xf) << 0)
++
++#define IO_Bidirection_Pin_Register	57
++#define	RTSI_Trig_Direction_Register	58
++#define	Drive_RTSI_Clock_Bit			0x1
++#define	Use_RTSI_Clock_Bit			0x2
++
++static inline unsigned int RTSI_Output_Bit(unsigned channel, int is_mseries)
++{
++	unsigned max_channel;
++	unsigned base_bit_shift;
++	if(is_mseries)
++	{
++		base_bit_shift = 8;
++		max_channel = 7;
++	}else
++	{
++		base_bit_shift = 9;
++		max_channel = 6;
++	}
++	if(channel > max_channel)
++	{
++		rtdm_printk("%s: bug, invalid RTSI_channel=%i\n",
++			    __FUNCTION__, channel);
++		return 0;
++	}
++	return 1 << (base_bit_shift + channel);
++}
++
++#define Interrupt_Control_Register	59
++#define Interrupt_B_Enable			_bit15
++#define Interrupt_B_Output_Select(x)		((x)<<12)
++#define Interrupt_A_Enable			_bit11
++#define Interrupt_A_Output_Select(x)		((x)<<8)
++#define Pass_Thru_0_Interrupt_Polarity		_bit3
++#define Pass_Thru_1_Interrupt_Polarity		_bit2
++#define Interrupt_Output_On_3_Pins		_bit1
++#define Interrupt_Output_Polarity		_bit0
++
++#define AI_Output_Control_Register	60
++#define AI_START_Output_Select			_bit10
++#define AI_SCAN_IN_PROG_Output_Select(x)	(((x) & 0x3) << 8)
++#define AI_EXTMUX_CLK_Output_Select(x)		(((x) & 0x3) << 6)
++#define AI_LOCALMUX_CLK_Output_Select(x)	((x)<<4)
++#define AI_SC_TC_Output_Select(x)		((x)<<2)
++#define AI_CONVERT_Output_High_Z		0
++#define AI_CONVERT_Output_Ground		1
++#define AI_CONVERT_Output_Enable_Low		2
++#define AI_CONVERT_Output_Enable_High		3
++#define AI_CONVERT_Output_Select(x)		((x) & 0x3)
++
++#define AI_START_STOP_Select_Register	62
++#define AI_START_Polarity			_bit15
++#define AI_STOP_Polarity			_bit14
++#define AI_STOP_Sync				_bit13
++#define AI_STOP_Edge				_bit12
++#define AI_STOP_Select(a)			(((a) & 0x1f)<<7)
++#define AI_START_Sync				_bit6
++#define AI_START_Edge				_bit5
++#define AI_START_Select(a)			((a) & 0x1f)
++
++#define AI_Trigger_Select_Register	63
++#define AI_START1_Polarity			_bit15
++#define AI_START2_Polarity			_bit14
++#define AI_START2_Sync				_bit13
++#define AI_START2_Edge				_bit12
++#define AI_START2_Select(a)			(((a) & 0x1f) << 7)
++#define AI_START1_Sync				_bit6
++#define AI_START1_Edge				_bit5
++#define AI_START1_Select(a)			((a) & 0x1f)
++
++#define AI_DIV_Load_A_Register	64
++
++#define AO_Start_Select_Register	66
++#define AO_UI2_Software_Gate			_bit15
++#define AO_UI2_External_Gate_Polarity		_bit14
++#define AO_START_Polarity			_bit13
++#define AO_AOFREQ_Enable			_bit12
++#define AO_UI2_External_Gate_Select(a)		(((a) & 0x1f) << 7)
++#define AO_START_Sync				_bit6
++#define AO_START_Edge				_bit5
++#define AO_START_Select(a)			((a) & 0x1f)
++
++#define AO_Trigger_Select_Register	67
++#define AO_UI2_External_Gate_Enable		_bit15
++#define AO_Delayed_START1			_bit14
++#define AO_START1_Polarity			_bit13
++#define AO_UI2_Source_Polarity			_bit12
++#define AO_UI2_Source_Select(x)			(((x)&0x1f)<<7)
++#define AO_START1_Sync				_bit6
++#define AO_START1_Edge				_bit5
++#define AO_START1_Select(x)			(((x)&0x1f)<<0)
++
++#define AO_Mode_3_Register		70
++#define AO_UI2_Switch_Load_Next_TC		_bit13
++#define AO_UC_Switch_Load_Every_BC_TC		_bit12
++#define AO_Trigger_Length			_bit11
++#define AO_Stop_On_Overrun_Error		_bit5
++#define AO_Stop_On_BC_TC_Trigger_Error		_bit4
++#define AO_Stop_On_BC_TC_Error			_bit3
++#define AO_Not_An_UPDATE			_bit2
++#define AO_Software_Gate			_bit1
++#define AO_Last_Gate_Disable			_bit0	/* M Series only */
++
++#define Joint_Reset_Register		72
++#define Software_Reset				_bit11
++#define AO_Configuration_End			_bit9
++#define AI_Configuration_End			_bit8
++#define AO_Configuration_Start			_bit5
++#define AI_Configuration_Start			_bit4
++#define G1_Reset				_bit3
++#define G0_Reset				_bit2
++#define AO_Reset				_bit1
++#define AI_Reset				_bit0
++
++#define Interrupt_A_Enable_Register	73
++#define Pass_Thru_0_Interrupt_Enable		_bit9
++#define G0_Gate_Interrupt_Enable		_bit8
++#define AI_FIFO_Interrupt_Enable		_bit7
++#define G0_TC_Interrupt_Enable			_bit6
++#define AI_Error_Interrupt_Enable		_bit5
++#define AI_STOP_Interrupt_Enable		_bit4
++#define AI_START_Interrupt_Enable		_bit3
++#define AI_START2_Interrupt_Enable		_bit2
++#define AI_START1_Interrupt_Enable		_bit1
++#define AI_SC_TC_Interrupt_Enable		_bit0
++
++#define Interrupt_B_Enable_Register	75
++#define Pass_Thru_1_Interrupt_Enable		_bit11
++#define G1_Gate_Interrupt_Enable		_bit10
++#define G1_TC_Interrupt_Enable			_bit9
++#define AO_FIFO_Interrupt_Enable		_bit8
++#define AO_UI2_TC_Interrupt_Enable		_bit7
++#define AO_UC_TC_Interrupt_Enable		_bit6
++#define AO_Error_Interrupt_Enable		_bit5
++#define AO_STOP_Interrupt_Enable		_bit4
++#define AO_START_Interrupt_Enable		_bit3
++#define AO_UPDATE_Interrupt_Enable		_bit2
++#define AO_START1_Interrupt_Enable		_bit1
++#define AO_BC_TC_Interrupt_Enable		_bit0
++
++#define Second_IRQ_A_Enable_Register	74
++#define AI_SC_TC_Second_Irq_Enable		_bit0
++#define AI_START1_Second_Irq_Enable		_bit1
++#define AI_START2_Second_Irq_Enable		_bit2
++#define AI_START_Second_Irq_Enable		_bit3
++#define AI_STOP_Second_Irq_Enable		_bit4
++#define AI_Error_Second_Irq_Enable		_bit5
++#define G0_TC_Second_Irq_Enable			_bit6
++#define AI_FIFO_Second_Irq_Enable		_bit7
++#define G0_Gate_Second_Irq_Enable		_bit8
++#define Pass_Thru_0_Second_Irq_Enable		_bit9
++
++#define Second_IRQ_B_Enable_Register	76
++#define AO_BC_TC_Second_Irq_Enable		_bit0
++#define AO_START1_Second_Irq_Enable		_bit1
++#define AO_UPDATE_Second_Irq_Enable		_bit2
++#define AO_START_Second_Irq_Enable		_bit3
++#define AO_STOP_Second_Irq_Enable		_bit4
++#define AO_Error_Second_Irq_Enable		_bit5
++#define AO_UC_TC_Second_Irq_Enable		_bit6
++#define AO_UI2_TC_Second_Irq_Enable		_bit7
++#define AO_FIFO_Second_Irq_Enable		_bit8
++#define G1_TC_Second_Irq_Enable			_bit9
++#define G1_Gate_Second_Irq_Enable		_bit10
++#define Pass_Thru_1_Second_Irq_Enable		_bit11
++
++#define AI_Personal_Register		77
++#define AI_SHIFTIN_Pulse_Width			_bit15
++#define AI_EOC_Polarity				_bit14
++#define AI_SOC_Polarity				_bit13
++#define AI_SHIFTIN_Polarity			_bit12
++#define AI_CONVERT_Pulse_Timebase		_bit11
++#define AI_CONVERT_Pulse_Width			_bit10
++#define AI_CONVERT_Original_Pulse		_bit9
++#define AI_FIFO_Flags_Polarity			_bit8
++#define AI_Overrun_Mode				_bit7
++#define AI_EXTMUX_CLK_Pulse_Width		_bit6
++#define AI_LOCALMUX_CLK_Pulse_Width		_bit5
++#define AI_AIFREQ_Polarity			_bit4
++
++#define AO_Personal_Register		78
++#define AO_Interval_Buffer_Mode			_bit3
++#define AO_BC_Source_Select			_bit4
++#define AO_UPDATE_Pulse_Width			_bit5
++#define AO_UPDATE_Pulse_Timebase		_bit6
++#define AO_UPDATE_Original_Pulse		_bit7
++#define AO_DMA_PIO_Control			_bit8 /* M Series: reserved */
++#define AO_AOFREQ_Polarity			_bit9 /* M Series: reserved */
++#define AO_FIFO_Enable				_bit10
++#define AO_FIFO_Flags_Polarity			_bit11 /* M Series: reserved */
++#define AO_TMRDACWR_Pulse_Width			_bit12
++#define AO_Fast_CPU				_bit13 /* M Series: reserved */
++#define AO_Number_Of_DAC_Packages		_bit14 /* 1 for "single" mode,
++							  0 for "dual" */
++#define AO_Multiple_DACS_Per_Package		_bit15 /* M Series only */
++
++#define	RTSI_Trig_A_Output_Register	79
++
++#define	RTSI_Trig_B_Output_Register	80
++#define RTSI_Sub_Selection_1_Bit		_bit15 /* not for M Series */
++#define RTSI_Trig_Output_Bits(x, y)		((y & 0xf) << ((x % 4) * 4))
++#define RTSI_Trig_Output_Mask(x)		(0xf << ((x % 4) * 4))
++#define RTSI_Trig_Output_Source(x, y)		((y >> ((x % 4) * 4)) & 0xf)
++
++#define	RTSI_Board_Register		81
++#define Write_Strobe_0_Register		82
++#define Write_Strobe_1_Register		83
++#define Write_Strobe_2_Register		84
++#define Write_Strobe_3_Register		85
++
++#define AO_Output_Control_Register	86
++#define AO_External_Gate_Enable			_bit15
++#define AO_External_Gate_Select(x)		(((x)&0x1f)<<10)
++#define AO_Number_Of_Channels(x)		(((x)&0xf)<<6)
++#define AO_UPDATE2_Output_Select(x)		(((x)&0x3)<<4)
++#define AO_External_Gate_Polarity		_bit3
++#define AO_UPDATE2_Output_Toggle		_bit2
++#define AO_Update_Output_High_Z			0
++#define AO_Update_Output_Ground			1
++#define AO_Update_Output_Enable_Low		2
++#define AO_Update_Output_Enable_High		3
++#define AO_UPDATE_Output_Select(x)		(x&0x3)
++
++#define AI_Mode_3_Register		87
++#define AI_Trigger_Length			_bit15
++#define AI_Delay_START				_bit14
++#define AI_Software_Gate			_bit13
++#define AI_SI_Special_Trigger_Delay		_bit12
++#define AI_SI2_Source_Select			_bit11
++#define AI_Delayed_START2			_bit10
++#define AI_Delayed_START1			_bit9
++#define AI_External_Gate_Mode			_bit8
++#define AI_FIFO_Mode_HF_to_E			(3<<6)
++#define AI_FIFO_Mode_F				(2<<6)
++#define AI_FIFO_Mode_HF				(1<<6)
++#define AI_FIFO_Mode_NE				(0<<6)
++#define AI_External_Gate_Polarity		_bit5
++#define AI_External_Gate_Select(a)		((a) & 0x1f)
++
++#define G_Autoincrement_Register(a)	(68+(a))
++#define G_Command_Register(a)		(6+(a))
++#define G_HW_Save_Register(a)		(8+(a)*2)
++#define G_HW_Save_Register_High(a)	(8+(a)*2)
++#define G_HW_Save_Register_Low(a)	(9+(a)*2)
++#define G_Input_Select_Register(a)	(36+(a))
++#define G_Load_A_Register(a)		(28+(a)*4)
++#define G_Load_A_Register_High(a)	(28+(a)*4)
++#define G_Load_A_Register_Low(a)	(29+(a)*4)
++#define G_Load_B_Register(a)		(30+(a)*4)
++#define G_Load_B_Register_High(a)	(30+(a)*4)
++#define G_Load_B_Register_Low(a)	(31+(a)*4)
++#define G_Mode_Register(a)		(26+(a))
++#define G_Save_Register(a)		(12+(a)*2)
++#define G_Save_Register_High(a)		(12+(a)*2)
++#define G_Save_Register_Low(a)		(13+(a)*2)
++#define G_Status_Register		4
++#define Analog_Trigger_Etc_Register	61
++
++/* command register */
++#define G_Disarm_Copy			_bit15		/* strobe */
++#define G_Save_Trace_Copy		_bit14
++#define G_Arm_Copy			_bit13		/* strobe */
++#define G_Bank_Switch_Start		_bit10		/* strobe */
++#define G_Little_Big_Endian		_bit9
++#define G_Synchronized_Gate		_bit8
++#define G_Write_Switch			_bit7
++#define G_Up_Down(a)			(((a)&0x03)<<5)
++#define G_Disarm			_bit4		/* strobe */
++#define G_Analog_Trigger_Reset		_bit3		/* strobe */
++#define G_Save_Trace			_bit1
++#define G_Arm				_bit0		/* strobe */
++
++/* channel agnostic names for the command register #defines */
++#define G_Bank_Switch_Enable		_bit12
++#define G_Bank_Switch_Mode		_bit11
++#define G_Load				_bit2		/* strobe */
++
++/* input select register */
++#define G_Gate_Select(a)		(((a)&0x1f)<<7)
++#define G_Source_Select(a)		(((a)&0x1f)<<2)
++#define G_Write_Acknowledges_Irq	_bit1
++#define G_Read_Acknowledges_Irq		_bit0
++
++/* same input select register, but with channel agnostic names */
++#define G_Source_Polarity		_bit15
++#define G_Output_Polarity		_bit14
++#define G_OR_Gate			_bit13
++#define G_Gate_Select_Load_Source	_bit12
++
++/* mode register */
++#define G_Loading_On_TC			_bit12
++#define G_Output_Mode(a)		(((a)&0x03)<<8)
++#define G_Trigger_Mode_For_Edge_Gate(a)	(((a)&0x03)<<3)
++#define G_Gating_Mode(a)		(((a)&0x03)<<0)
++
++/* same input mode register, but with channel agnostic names */
++#define G_Load_Source_Select		_bit7
++#define G_Reload_Source_Switching	_bit15
++#define G_Loading_On_Gate		_bit14
++#define G_Gate_Polarity		_bit13
++
++#define G_Counting_Once(a)		(((a)&0x03)<<10)
++#define G_Stop_Mode(a)			(((a)&0x03)<<5)
++#define G_Gate_On_Both_Edges		_bit2
++
++/* G_Status_Register */
++#define G1_Gate_Error_St		_bit15
++#define G0_Gate_Error_St		_bit14
++#define G1_TC_Error_St			_bit13
++#define G0_TC_Error_St			_bit12
++#define G1_No_Load_Between_Gates_St	_bit11
++#define G0_No_Load_Between_Gates_St	_bit10
++#define G1_Armed_St			_bit9
++#define G0_Armed_St			_bit8
++#define G1_Stale_Data_St		_bit7
++#define G0_Stale_Data_St		_bit6
++#define G1_Next_Load_Source_St		_bit5
++#define G0_Next_Load_Source_St		_bit4
++#define G1_Counting_St			_bit3
++#define G0_Counting_St			_bit2
++#define G1_Save_St			_bit1
++#define G0_Save_St			_bit0
++
++/* general purpose counter timer */
++#define G_Autoincrement(a)              ((a)<<0)
++
++/*Analog_Trigger_Etc_Register*/
++#define Analog_Trigger_Mode(x) ((x) & 0x7)
++#define Analog_Trigger_Enable _bit3
++#define Analog_Trigger_Drive _bit4
++#define GPFO_1_Output_Select		_bit7
++#define GPFO_0_Output_Select(a)		((a)<<11)
++#define GPFO_0_Output_Enable		_bit14
++#define GPFO_1_Output_Enable		_bit15
++
++/* Additional windowed registers unique to E series */
++
++/* 16 bit registers shadowed from DAQ-STC */
++#define Window_Address			0x00
++#define Window_Data			0x02
++
++#define Configuration_Memory_Clear	82
++#define ADC_FIFO_Clear			83
++#define DAC_FIFO_Clear			84
++
++/* i/o port offsets */
++
++/* 8 bit registers */
++#define XXX_Status			0x01
++#define PROMOUT					_bit0
++#define AI_FIFO_LOWER_NOT_EMPTY			_bit3
++
++#define Serial_Command			0x0d
++#define Misc_Command			0x0f
++#define Port_A				0x19
++#define Port_B				0x1b
++#define Port_C				0x1d
++#define Configuration			0x1f
++#define Strobes				0x01
++#define Channel_A_Mode			0x03
++#define Channel_B_Mode			0x05
++#define Channel_C_Mode			0x07
++#define AI_AO_Select			0x09
++#define AI_DMA_Select_Shift		0
++#define AI_DMA_Select_Mask		0xf
++#define AO_DMA_Select_Shift		4
++#define AO_DMA_Select_Mask		(0xf << AO_DMA_Select_Shift)
++
++#define G0_G1_Select			0x0b
++
++static inline unsigned ni_stc_dma_channel_select_bitfield(unsigned channel)
++{
++	if(channel < 4) return 1 << channel;
++	if(channel == 4) return 0x3;
++	if(channel == 5) return 0x5;
++	BUG();
++	return 0;
++}
++static inline unsigned GPCT_DMA_Select_Bits(unsigned gpct_index, unsigned mite_channel)
++{
++	BUG_ON(gpct_index > 1);
++	return ni_stc_dma_channel_select_bitfield(mite_channel) << (4 * gpct_index);
++}
++static inline unsigned GPCT_DMA_Select_Mask(unsigned gpct_index)
++{
++	BUG_ON(gpct_index > 1);
++	return 0xf << (4 * gpct_index);
++}
++
++/* 16 bit registers */
++
++#define Configuration_Memory_Low	0x10
++#define AI_DITHER				_bit9
++#define AI_LAST_CHANNEL				_bit15
++
++#define Configuration_Memory_High	0x12
++#define AI_AC_COUPLE				_bit11
++#define AI_DIFFERENTIAL				_bit12
++#define AI_COMMON				_bit13
++#define AI_GROUND				(_bit12|_bit13)
++#define AI_CONFIG_CHANNEL(x)			(x&0x3f)
++
++#define ADC_FIFO_Data_Register		0x1c
++
++#define AO_Configuration		0x16
++#define AO_Bipolar		_bit0
++#define AO_Deglitch		_bit1
++#define AO_Ext_Ref		_bit2
++#define AO_Ground_Ref		_bit3
++#define AO_Channel(x)		((x) << 8)
++
++#define DAC_FIFO_Data			0x1e
++#define DAC0_Direct_Data		0x18
++#define DAC1_Direct_Data		0x1a
++
++/* 611x registers (these boards differ from the e-series) */
++
++#define Magic_611x			0x19 /* w8 (new) */
++#define Calibration_Channel_Select_611x	0x1a /* w16 (new) */
++#define ADC_FIFO_Data_611x		0x1c /* r32 (incompatible) */
++#define AI_FIFO_Offset_Load_611x	0x05 /* r8 (new) */
++#define DAC_FIFO_Data_611x		0x14 /* w32 (incompatible) */
++#define Cal_Gain_Select_611x		0x05 /* w8 (new) */
++
++#define AO_Window_Address_611x		0x18
++#define AO_Window_Data_611x		0x1e
++
++/* 6143 registers */
++#define Magic_6143			0x19 /* w8 */
++#define G0G1_DMA_Select_6143		0x0B /* w8 */
++#define PipelineDelay_6143		0x1f /* w8 */
++#define EOC_Set_6143			0x1D /* w8 */
++#define AIDMA_Select_6143		0x09 /* w8 */
++#define AIFIFO_Data_6143		0x8C /* w32 */
++#define AIFIFO_Flag_6143		0x84 /* w32 */
++#define AIFIFO_Control_6143		0x88 /* w32 */
++#define AIFIFO_Status_6143		0x88 /* w32 */
++#define AIFIFO_DMAThreshold_6143	0x90 /* w32 */
++#define AIFIFO_Words_Available_6143	0x94 /* w32 */
++
++#define Calibration_Channel_6143	0x42 /* w16 */
++#define Calibration_LowTime_6143	0x20 /* w16 */
++#define Calibration_HighTime_6143	0x22 /* w16 */
++#define Relay_Counter_Load_Val__6143	0x4C /* w32 */
++#define Signature_6143			0x50 /* w32 */
++#define Release_Date_6143		0x54 /* w32 */
++#define Release_Oldest_Date_6143	0x58 /* w32 */
++
++#define Calibration_Channel_6143_RelayOn	0x8000	/* Calibration relay switch On */
++#define Calibration_Channel_6143_RelayOff	0x4000	/* Calibration relay switch Off */
++#define Calibration_Channel_Gnd_Gnd	0x00	/* Offset Calibration */
++#define Calibration_Channel_2v5_Gnd	0x02	/* 2.5V Reference */
++#define Calibration_Channel_Pwm_Gnd	0x05	/* +/- 5V Self Cal */
++#define Calibration_Channel_2v5_Pwm	0x0a	/* PWM Calibration */
++#define Calibration_Channel_Pwm_Pwm	0x0d	/* CMRR */
++#define Calibration_Channel_Gnd_Pwm	0x0e	/* PWM Calibration */
++
++/* 671x, 611x registers */
++
++/* 671xi 611x windowed ao registers */
++#define AO_Immediate_671x			0x11 /* W 16 */
++#define AO_Timed_611x				0x10 /* W 16 */
++#define AO_FIFO_Offset_Load_611x		0x13 /* W32 */
++#define AO_Later_Single_Point_Updates		0x14 /* W 16 */
++#define AO_Waveform_Generation_611x		0x15 /* W 16 */
++#define AO_Misc_611x				0x16 /* W 16 */
++#define AO_Calibration_Channel_Select_67xx	0x17 /* W 16 */
++#define AO_Configuration_2_67xx			0x18 /* W 16 */
++#define CAL_ADC_Command_67xx			0x19 /* W 8 */
++#define CAL_ADC_Status_67xx			0x1a /* R 8 */
++#define CAL_ADC_Data_67xx			0x1b /* R 16 */
++#define CAL_ADC_Config_Data_High_Word_67xx	0x1c /* RW 16 */
++#define CAL_ADC_Config_Data_Low_Word_67xx	0x1d /* RW 16 */
++
++static inline unsigned int DACx_Direct_Data_671x(int channel)
++{
++	return channel;
++}
++
++#define CLEAR_WG				_bit0
++
++#define CSCFG_CAL_CONTROL_MASK			0x7
++#define CSCFG_SELF_CAL_OFFSET			0x1
++#define CSCFG_SELF_CAL_GAIN			0x2
++#define CSCFG_SELF_CAL_OFFSET_GAIN		0x3
++#define CSCFG_SYSTEM_CAL_OFFSET			0x5
++#define CSCFG_SYSTEM_CAL_GAIN			0x6
++#define CSCFG_DONE				(1 << 3)
++#define CSCFG_POWER_SAVE_SELECT			(1 << 4)
++#define CSCFG_PORT_MODE				(1 << 5)
++#define CSCFG_RESET_VALID			(1 << 6)
++#define CSCFG_RESET				(1 << 7)
++#define CSCFG_UNIPOLAR				(1 << 12)
++#define CSCFG_WORD_RATE_2180_CYCLES		(0x0 << 13)
++#define CSCFG_WORD_RATE_1092_CYCLES		(0x1 << 13)
++#define CSCFG_WORD_RATE_532_CYCLES		(0x2 << 13)
++#define CSCFG_WORD_RATE_388_CYCLES		(0x3 << 13)
++#define CSCFG_WORD_RATE_324_CYCLES		(0x4 << 13)
++#define CSCFG_WORD_RATE_17444_CYCLES		(0x5 << 13)
++#define CSCFG_WORD_RATE_8724_CYCLES		(0x6 << 13)
++#define CSCFG_WORD_RATE_4364_CYCLES		(0x7 << 13)
++#define CSCFG_WORD_RATE_MASK			(0x7 << 13)
++#define CSCFG_LOW_POWER				(1 << 16)
++
++#define CS5529_CONFIG_DOUT(x)			(1 << (18 + x))
++#define CS5529_CONFIG_AOUT(x)			(1 << (22 + x))
++
++/* cs5529 command bits */
++#define CSCMD_POWER_SAVE			_bit0
++#define CSCMD_REGISTER_SELECT_MASK		0xe
++#define CSCMD_OFFSET_REGISTER			0x0
++#define CSCMD_GAIN_REGISTER			_bit1
++#define CSCMD_CONFIG_REGISTER			_bit2
++#define CSCMD_READ				_bit4
++#define CSCMD_CONTINUOUS_CONVERSIONS		_bit5
++#define CSCMD_SINGLE_CONVERSION			_bit6
++#define CSCMD_COMMAND				_bit7
++
++/* cs5529 status bits */
++#define CSS_ADC_BUSY				_bit0
++#define CSS_OSC_DETECT				_bit1 /* indicates adc error */
++#define CSS_OVERRANGE				_bit3
++
++#define SerDacLd(x)			(0x08<<(x))
++
++/*
++	This is stuff unique to the NI E series drivers,
++	but I thought I'd put it here anyway.
++*/
++
++enum
++{
++	ai_gain_16 = 0,
++	ai_gain_8,
++	ai_gain_14,
++	ai_gain_4,
++	ai_gain_611x,
++	ai_gain_622x,
++	ai_gain_628x,
++	ai_gain_6143
++};
++enum caldac_enum
++{
++	caldac_none=0,
++	mb88341,
++	dac8800,
++	dac8043,
++	ad8522,
++	ad8804,
++	ad8842,
++	ad8804_debug
++};
++enum ni_reg_type
++{
++	ni_reg_normal = 0x0,
++	ni_reg_611x = 0x1,
++	ni_reg_6711 = 0x2,
++	ni_reg_6713 = 0x4,
++	ni_reg_67xx_mask = 0x6,
++	ni_reg_6xxx_mask = 0x7,
++	ni_reg_622x = 0x8,
++	ni_reg_625x = 0x10,
++	ni_reg_628x = 0x18,
++	ni_reg_m_series_mask = 0x18,
++	ni_reg_6143 = 0x20
++};
++
++/* M Series registers offsets */
++#define M_Offset_CDIO_DMA_Select		0x7 /* write */
++#define M_Offset_SCXI_Status			0x7 /* read */
++#define M_Offset_AI_AO_Select			0x9 /* write, same offset as e-series */
++#define M_Offset_SCXI_Serial_Data_In		0x9 /* read */
++#define M_Offset_G0_G1_Select			0xb /* write, same offset as e-series */
++#define M_Offset_Misc_Command			0xf
++#define M_Offset_SCXI_Serial_Data_Out		0x11
++#define M_Offset_SCXI_Control			0x13
++#define M_Offset_SCXI_Output_Enable		0x15
++#define M_Offset_AI_FIFO_Data			0x1c
++#define M_Offset_Static_Digital_Output		0x24 /* write */
++#define M_Offset_Static_Digital_Input		0x24 /* read */
++#define M_Offset_DIO_Direction			0x28
++#define M_Offset_Cal_PWM			0x40
++#define M_Offset_AI_Config_FIFO_Data		0x5e
++#define M_Offset_Interrupt_C_Enable		0x88 /* write */
++#define M_Offset_Interrupt_C_Status		0x88 /* read */
++#define M_Offset_Analog_Trigger_Control		0x8c
++#define M_Offset_AO_Serial_Interrupt_Enable	0xa0
++#define M_Offset_AO_Serial_Interrupt_Ack	0xa1 /* write */
++#define M_Offset_AO_Serial_Interrupt_Status	0xa1 /* read */
++#define M_Offset_AO_Calibration			0xa3
++#define M_Offset_AO_FIFO_Data			0xa4
++#define M_Offset_PFI_Filter			0xb0
++#define M_Offset_RTSI_Filter			0xb4
++#define M_Offset_SCXI_Legacy_Compatibility	0xbc
++#define M_Offset_Interrupt_A_Ack		0x104 /* write */
++#define M_Offset_AI_Status_1			0x104 /* read */
++#define M_Offset_Interrupt_B_Ack		0x106 /* write */
++#define M_Offset_AO_Status_1			0x106 /* read */
++#define M_Offset_AI_Command_2			0x108 /* write */
++#define M_Offset_G01_Status			0x108 /* read */
++#define M_Offset_AO_Command_2			0x10a
++#define M_Offset_AO_Status_2			0x10c /* read */
++#define M_Offset_G0_Command			0x10c /* write */
++#define M_Offset_G1_Command			0x10e /* write */
++#define M_Offset_G0_HW_Save			0x110
++#define M_Offset_G0_HW_Save_High		0x110
++#define M_Offset_AI_Command_1			0x110
++#define M_Offset_G0_HW_Save_Low			0x112
++#define M_Offset_AO_Command_1			0x112
++#define M_Offset_G1_HW_Save			0x114
++#define M_Offset_G1_HW_Save_High		0x114
++#define M_Offset_G1_HW_Save_Low			0x116
++#define M_Offset_AI_Mode_1			0x118
++#define M_Offset_G0_Save			0x118
++#define M_Offset_G0_Save_High			0x118
++#define M_Offset_AI_Mode_2			0x11a
++#define M_Offset_G0_Save_Low			0x11a
++#define M_Offset_AI_SI_Load_A			0x11c
++#define M_Offset_G1_Save			0x11c
++#define M_Offset_G1_Save_High			0x11c
++#define M_Offset_G1_Save_Low			0x11e
++#define M_Offset_AI_SI_Load_B			0x120 /* write */
++#define M_Offset_AO_UI_Save			0x120 /* read */
++#define M_Offset_AI_SC_Load_A			0x124 /* write */
++#define M_Offset_AO_BC_Save			0x124 /* read */
++#define M_Offset_AI_SC_Load_B			0x128 /* write */
++#define M_Offset_AO_UC_Save			0x128 /* read */
++#define M_Offset_AI_SI2_Load_A			0x12c
++#define M_Offset_AI_SI2_Load_B			0x130
++#define M_Offset_G0_Mode			0x134
++#define M_Offset_G1_Mode			0x136 /* write */
++#define M_Offset_Joint_Status_1			0x136 /* read */
++#define M_Offset_G0_Load_A			0x138
++#define M_Offset_Joint_Status_2			0x13a
++#define M_Offset_G0_Load_B			0x13c
++#define M_Offset_G1_Load_A			0x140
++#define M_Offset_G1_Load_B			0x144
++#define M_Offset_G0_Input_Select		0x148
++#define M_Offset_G1_Input_Select		0x14a
++#define M_Offset_AO_Mode_1			0x14c
++#define M_Offset_AO_Mode_2			0x14e
++#define M_Offset_AO_UI_Load_A			0x150
++#define M_Offset_AO_UI_Load_B			0x154
++#define M_Offset_AO_BC_Load_A			0x158
++#define M_Offset_AO_BC_Load_B			0x15c
++#define M_Offset_AO_UC_Load_A			0x160
++#define M_Offset_AO_UC_Load_B			0x164
++#define M_Offset_Clock_and_FOUT			0x170
++#define M_Offset_IO_Bidirection_Pin		0x172
++#define M_Offset_RTSI_Trig_Direction		0x174
++#define M_Offset_Interrupt_Control		0x176
++#define M_Offset_AI_Output_Control		0x178
++#define M_Offset_Analog_Trigger_Etc		0x17a
++#define M_Offset_AI_START_STOP_Select		0x17c
++#define M_Offset_AI_Trigger_Select		0x17e
++#define M_Offset_AI_SI_Save			0x180 /* read */
++#define M_Offset_AI_DIV_Load_A			0x180 /* write */
++#define M_Offset_AI_SC_Save			0x184 /* read */
++#define M_Offset_AO_Start_Select		0x184 /* write */
++#define M_Offset_AO_Trigger_Select		0x186
++#define M_Offset_AO_Mode_3			0x18c
++#define M_Offset_G0_Autoincrement		0x188
++#define M_Offset_G1_Autoincrement		0x18a
++#define M_Offset_Joint_Reset			0x190
++#define M_Offset_Interrupt_A_Enable		0x192
++#define M_Offset_Interrupt_B_Enable		0x196
++#define M_Offset_AI_Personal			0x19a
++#define M_Offset_AO_Personal			0x19c
++#define M_Offset_RTSI_Trig_A_Output		0x19e
++#define M_Offset_RTSI_Trig_B_Output		0x1a0
++#define M_Offset_RTSI_Shared_MUX		0x1a2
++#define M_Offset_AO_Output_Control		0x1ac
++#define M_Offset_AI_Mode_3			0x1ae
++#define M_Offset_Configuration_Memory_Clear	0x1a4
++#define M_Offset_AI_FIFO_Clear			0x1a6
++#define M_Offset_AO_FIFO_Clear			0x1a8
++#define M_Offset_G0_Counting_Mode		0x1b0
++#define M_Offset_G1_Counting_Mode		0x1b2
++#define M_Offset_G0_Second_Gate			0x1b4
++#define M_Offset_G1_Second_Gate			0x1b6
++#define M_Offset_G0_DMA_Config			0x1b8 /* write */
++#define M_Offset_G0_DMA_Status			0x1b8 /* read */
++#define M_Offset_G1_DMA_Config			0x1ba /* write */
++#define M_Offset_G1_DMA_Status			0x1ba /* read */
++#define M_Offset_G0_MSeries_ABZ			0x1c0
++#define M_Offset_G1_MSeries_ABZ			0x1c2
++#define M_Offset_Clock_and_Fout2		0x1c4
++#define M_Offset_PLL_Control			0x1c6
++#define M_Offset_PLL_Status			0x1c8
++#define M_Offset_PFI_Output_Select_1		0x1d0
++#define M_Offset_PFI_Output_Select_2		0x1d2
++#define M_Offset_PFI_Output_Select_3		0x1d4
++#define M_Offset_PFI_Output_Select_4		0x1d6
++#define M_Offset_PFI_Output_Select_5		0x1d8
++#define M_Offset_PFI_Output_Select_6		0x1da
++#define M_Offset_PFI_DI				0x1dc
++#define M_Offset_PFI_DO				0x1de
++#define M_Offset_AI_Config_FIFO_Bypass		0x218
++#define M_Offset_SCXI_DIO_Enable		0x21c
++#define M_Offset_CDI_FIFO_Data			0x220 /* read */
++#define M_Offset_CDO_FIFO_Data			0x220 /* write */
++#define M_Offset_CDIO_Status			0x224 /* read */
++#define M_Offset_CDIO_Command			0x224 /* write */
++#define M_Offset_CDI_Mode			0x228
++#define M_Offset_CDO_Mode			0x22c
++#define M_Offset_CDI_Mask_Enable		0x230
++#define M_Offset_CDO_Mask_Enable		0x234
++#define M_Offset_AO_Waveform_Order(x)		(0xc2 + 0x4 * x)
++#define M_Offset_AO_Config_Bank(x)		(0xc3 + 0x4 * x)
++#define M_Offset_DAC_Direct_Data(x)		(0xc0 + 0x4 * x)
++#define M_Offset_Gen_PWM(x)			(0x44 + 0x2 * x)
++
++static inline int M_Offset_Static_AI_Control(int i)
++{
++	int offset[] =
++	{
++		0x64,
++		0x261,
++		0x262,
++		0x263,
++	};
++	if(((unsigned)i) >= sizeof(offset) / sizeof(offset[0]))
++	{
++		rtdm_printk("%s: invalid channel=%i\n", __FUNCTION__, i);
++		return offset[0];
++	}
++	return offset[i];
++};
++static inline int M_Offset_AO_Reference_Attenuation(int channel)
++{
++	int offset[] =
++	{
++		0x264,
++		0x265,
++		0x266,
++		0x267
++	};
++	if(((unsigned)channel) >= sizeof(offset) / sizeof(offset[0]))
++	{
++		rtdm_printk("%s: invalid channel=%i\n", __FUNCTION__, channel);
++		return offset[0];
++	}
++	return offset[channel];
++};
++static inline unsigned M_Offset_PFI_Output_Select(unsigned n)
++{
++	if(n < 1 || n > NUM_PFI_OUTPUT_SELECT_REGS)
++	{
++		rtdm_printk("%s: invalid pfi output select register=%i\n", __FUNCTION__, n);
++		return M_Offset_PFI_Output_Select_1;
++	}
++	return M_Offset_PFI_Output_Select_1 + (n - 1) * 2;
++}
++
++#define MSeries_AI_Config_Channel_Type_Mask			(0x7 << 6)
++#define MSeries_AI_Config_Channel_Type_Calibration_Bits		0x0
++#define MSeries_AI_Config_Channel_Type_Differential_Bits	(0x1 << 6)
++#define MSeries_AI_Config_Channel_Type_Common_Ref_Bits		(0x2 << 6)
++#define MSeries_AI_Config_Channel_Type_Ground_Ref_Bits		(0x3 << 6)
++#define MSeries_AI_Config_Channel_Type_Aux_Bits			(0x5 << 6)
++#define MSeries_AI_Config_Channel_Type_Ghost_Bits		(0x7 << 6)
++#define MSeries_AI_Config_Polarity_Bit				0x1000 /* 0 for 2's complement encoding */
++#define MSeries_AI_Config_Dither_Bit				0x2000
++#define MSeries_AI_Config_Last_Channel_Bit			0x4000
++#define MSeries_AI_Config_Channel_Bits(x)			(x & 0xf)
++#define MSeries_AI_Config_Gain_Bits(x)				((x & 0x7) << 9)
++
++static inline
++unsigned int MSeries_AI_Config_Bank_Bits(unsigned int reg_type,
++					 unsigned int channel)
++{
++	unsigned int bits = channel & 0x30;
++	if (reg_type == ni_reg_622x) {
++		if (channel & 0x40)
++			bits |= 0x400;
++	}
++	return bits;
++}
++
++#define MSeries_PLL_In_Source_Select_RTSI0_Bits			0xb
++#define MSeries_PLL_In_Source_Select_Star_Trigger_Bits		0x14
++#define MSeries_PLL_In_Source_Select_RTSI7_Bits			0x1b
++#define MSeries_PLL_In_Source_Select_PXI_Clock10		0x1d
++#define MSeries_PLL_In_Source_Select_Mask			0x1f
++#define MSeries_Timebase1_Select_Bit				0x20 /* use PLL for timebase 1 */
++#define MSeries_Timebase3_Select_Bit				0x40 /* use PLL for timebase 3 */
++/* Use 10MHz instead of 20MHz for RTSI clock frequency.  Appears
++   to have no effect, at least on pxi-6281, which always uses
++   20MHz rtsi clock frequency */
++#define MSeries_RTSI_10MHz_Bit					0x80
++
++static inline
++unsigned int MSeries_PLL_In_Source_Select_RTSI_Bits(unsigned int RTSI_channel)
++{
++	if(RTSI_channel > 7)
++	{
++		rtdm_printk("%s: bug, invalid RTSI_channel=%i\n", __FUNCTION__, RTSI_channel);
++		return 0;
++	}
++	if(RTSI_channel == 7) return MSeries_PLL_In_Source_Select_RTSI7_Bits;
++	else return MSeries_PLL_In_Source_Select_RTSI0_Bits + RTSI_channel;
++}
++
++#define MSeries_PLL_Enable_Bit					0x1000
++#define MSeries_PLL_VCO_Mode_200_325MHz_Bits			0x0
++#define MSeries_PLL_VCO_Mode_175_225MHz_Bits			0x2000
++#define MSeries_PLL_VCO_Mode_100_225MHz_Bits			0x4000
++#define MSeries_PLL_VCO_Mode_75_150MHz_Bits			0x6000
++
++static inline
++unsigned int MSeries_PLL_Divisor_Bits(unsigned int divisor)
++{
++	static const unsigned int max_divisor = 0x10;
++	if(divisor < 1 || divisor > max_divisor)
++	{
++		rtdm_printk("%s: bug, invalid divisor=%i\n", __FUNCTION__, divisor);
++		return 0;
++	}
++	return (divisor & 0xf) << 8;
++}
++static inline
++unsigned int MSeries_PLL_Multiplier_Bits(unsigned int multiplier)
++{
++	static const unsigned int max_multiplier = 0x100;
++	if(multiplier < 1 || multiplier > max_multiplier)
++	{
++		rtdm_printk("%s: bug, invalid multiplier=%i\n", __FUNCTION__, multiplier);
++		return 0;
++	}
++	return multiplier & 0xff;
++}
++
++#define MSeries_PLL_Locked_Bit				0x1
++
++#define MSeries_AI_Bypass_Channel_Mask			0x7
++#define MSeries_AI_Bypass_Bank_Mask			0x78
++#define MSeries_AI_Bypass_Cal_Sel_Pos_Mask		0x380
++#define MSeries_AI_Bypass_Cal_Sel_Neg_Mask		0x1c00
++#define MSeries_AI_Bypass_Mode_Mux_Mask			0x6000
++#define MSeries_AO_Bypass_AO_Cal_Sel_Mask		0x38000
++#define MSeries_AI_Bypass_Gain_Mask			0x1c0000
++#define MSeries_AI_Bypass_Dither_Bit			0x200000
++#define MSeries_AI_Bypass_Polarity_Bit			0x400000 /* 0 for 2's complement encoding */
++#define MSeries_AI_Bypass_Config_FIFO_Bit		0x80000000
++#define MSeries_AI_Bypass_Cal_Sel_Pos_Bits(x)		((x << 7) & \
++							 MSeries_AI_Bypass_Cal_Sel_Pos_Mask)
++#define MSeries_AI_Bypass_Cal_Sel_Neg_Bits(x)		((x << 10) & \
++							 MSeries_AI_Bypass_Cal_Sel_Pos_Mask)
++#define MSeries_AI_Bypass_Gain_Bits(x)			((x << 18) & \
++							 MSeries_AI_Bypass_Gain_Mask)
++
++#define MSeries_AO_DAC_Offset_Select_Mask		0x7
++#define MSeries_AO_DAC_Offset_0V_Bits			0x0
++#define MSeries_AO_DAC_Offset_5V_Bits			0x1
++#define MSeries_AO_DAC_Reference_Mask			0x38
++#define MSeries_AO_DAC_Reference_10V_Internal_Bits	0x0
++#define MSeries_AO_DAC_Reference_5V_Internal_Bits	0x8
++#define MSeries_AO_Update_Timed_Bit			0x40
++#define MSeries_AO_Bipolar_Bit				0x80 /* turns on 2's complement encoding */
++
++#define MSeries_Attenuate_x5_Bit			0x1
++
++#define MSeries_Cal_PWM_High_Time_Bits(x)		((x << 16) & 0xffff0000)
++#define MSeries_Cal_PWM_Low_Time_Bits(x)		(x & 0xffff)
++
++#define MSeries_PFI_Output_Select_Mask(x)		(0x1f << (x % 3) * 5)
++#define MSeries_PFI_Output_Select_Bits(x, y)		((y & 0x1f) << ((x % 3) * 5))
++// inverse to MSeries_PFI_Output_Select_Bits
++#define MSeries_PFI_Output_Select_Source(x, y)		((y >> ((x % 3) * 5)) & 0x1f)
++
++#define Gi_DMA_BankSW_Error_Bit				0x10
++#define Gi_DMA_Reset_Bit				0x8
++#define Gi_DMA_Int_Enable_Bit				0x4
++#define Gi_DMA_Write_Bit				0x2
++#define Gi_DMA_Enable_Bit				0x1
++
++#define MSeries_PFI_Filter_Select_Mask(x)		(0x3 << (x * 2))
++#define MSeries_PFI_Filter_Select_Bits(x, y)		((y << (x * 2)) & \
++							 MSeries_PFI_Filter_Select_Mask(x))
++
++/* CDIO DMA select bits */
++#define CDI_DMA_Select_Shift	0
++#define CDI_DMA_Select_Mask	0xf
++#define CDO_DMA_Select_Shift	4
++#define CDO_DMA_Select_Mask	0xf << CDO_DMA_Select_Shift
++
++/* CDIO status bits */
++#define CDO_FIFO_Empty_Bit	0x1
++#define CDO_FIFO_Full_Bit	0x2
++#define CDO_FIFO_Request_Bit	0x4
++#define CDO_Overrun_Bit		0x8
++#define CDO_Underflow_Bit	0x10
++#define CDI_FIFO_Empty_Bit	0x10000
++#define CDI_FIFO_Full_Bit	0x20000
++#define CDI_FIFO_Request_Bit	0x40000
++#define CDI_Overrun_Bit		0x80000
++#define CDI_Overflow_Bit	0x100000
++
++/* CDIO command bits */
++#define CDO_Disarm_Bit					0x1
++#define CDO_Arm_Bit					0x2
++#define CDI_Disarm_Bit					0x4
++#define CDI_Arm_Bit					0x8
++#define CDO_Reset_Bit					0x10
++#define CDI_Reset_Bit					0x20
++#define CDO_Error_Interrupt_Enable_Set_Bit		0x40
++#define CDO_Error_Interrupt_Enable_Clear_Bit		0x80
++#define CDI_Error_Interrupt_Enable_Set_Bit		0x100
++#define CDI_Error_Interrupt_Enable_Clear_Bit		0x200
++#define CDO_FIFO_Request_Interrupt_Enable_Set_Bit	0x400
++#define CDO_FIFO_Request_Interrupt_Enable_Clear_Bit	0x800
++#define CDI_FIFO_Request_Interrupt_Enable_Set_Bit	0x1000
++#define CDI_FIFO_Request_Interrupt_Enable_Clear_Bit	0x2000
++#define CDO_Error_Interrupt_Confirm_Bit			0x4000
++#define CDI_Error_Interrupt_Confirm_Bit			0x8000
++#define CDO_Empty_FIFO_Interrupt_Enable_Set_Bit		0x10000
++#define CDO_Empty_FIFO_Interrupt_Enable_Clear_Bit	0x20000
++#define CDO_SW_Update_Bit				0x80000
++#define CDI_SW_Update_Bit				0x100000
++
++/* CDIO mode bits */
++#define CDI_Sample_Source_Select_Mask	0x3f
++#define CDI_Halt_On_Error_Bit		0x200
++/* sample clock on falling edge */
++#define CDI_Polarity_Bit		0x400
++/* set for half full mode, clear for not empty mode */
++#define CDI_FIFO_Mode_Bit		0x800
++/* data lanes specify which dio channels map to byte or word accesses
++   to the dio fifos */
++#define CDI_Data_Lane_Mask		0x3000
++#define CDI_Data_Lane_0_15_Bits		0x0
++#define CDI_Data_Lane_16_31_Bits	0x1000
++#define CDI_Data_Lane_0_7_Bits		0x0
++#define CDI_Data_Lane_8_15_Bits		0x1000
++#define CDI_Data_Lane_16_23_Bits	0x2000
++#define CDI_Data_Lane_24_31_Bits	0x3000
++
++/* CDO mode bits */
++#define CDO_Sample_Source_Select_Mask	0x3f
++#define CDO_Retransmit_Bit		0x100
++#define CDO_Halt_On_Error_Bit		0x200
++/* sample clock on falling edge */
++#define CDO_Polarity_Bit		0x400
++/* set for half full mode, clear for not full mode */
++#define CDO_FIFO_Mode_Bit		0x800
++/* data lanes specify which dio channels map to byte or word accesses
++   to the dio fifos */
++#define CDO_Data_Lane_Mask		0x3000
++#define CDO_Data_Lane_0_15_Bits		0x0
++#define CDO_Data_Lane_16_31_Bits	0x1000
++#define CDO_Data_Lane_0_7_Bits		0x0
++#define CDO_Data_Lane_8_15_Bits		0x1000
++#define CDO_Data_Lane_16_23_Bits	0x2000
++#define CDO_Data_Lane_24_31_Bits	0x3000
++
++/* Interrupt C bits */
++#define Interrupt_Group_C_Enable_Bit	0x1
++#define Interrupt_Group_C_Status_Bit	0x1
++
++#define M_SERIES_EEPROM_SIZE 1024
++
++typedef struct ni_board_struct{
++	unsigned short device_id;
++	int isapnp_id;
++	char *name;
++
++	int n_adchan;
++	int adbits;
++
++	int ai_fifo_depth;
++	unsigned int alwaysdither : 1;
++	int gainlkup;
++	int ai_speed;
++
++	int n_aochan;
++	int aobits;
++	struct a4l_rngdesc *ao_range_table;
++	int ao_fifo_depth;
++
++	unsigned ao_speed;
++
++	unsigned num_p0_dio_channels;
++
++	int reg_type;
++	unsigned int ao_unipolar : 1;
++	unsigned int has_8255 : 1;
++	unsigned int has_analog_trig : 1;
++
++	enum caldac_enum caldac[3];
++} ni_board;
++
++#define n_ni_boards  (sizeof(ni_boards)/sizeof(ni_board))
++
++#define MAX_N_CALDACS 34
++#define MAX_N_AO_CHAN 8
++#define NUM_GPCT 2
++
++#define NI_PRIVATE_COMMON					\
++	uint16_t (*stc_readw)(struct a4l_device *dev, int register);	\
++	uint32_t (*stc_readl)(struct a4l_device *dev, int register);	\
++	void (*stc_writew)(struct a4l_device *dev, uint16_t value, int register);	\
++	void (*stc_writel)(struct a4l_device *dev, uint32_t value, int register);	\
++	\
++	int dio_state;						\
++	int pfi_state;						\
++	int io_bits;						\
++	unsigned short dio_output;				\
++	unsigned short dio_control;				\
++	int ao0p,ao1p;						\
++	int lastchan;						\
++	int last_do;						\
++	int rt_irq;						\
++	int irq_polarity;					\
++	int irq_pin;						\
++	int aimode;						\
++	int ai_continuous;					\
++	int blocksize;						\
++	int n_left;						\
++	unsigned int ai_calib_source;				\
++	unsigned int ai_calib_source_enabled;			\
++	rtdm_lock_t window_lock; \
++	rtdm_lock_t soft_reg_copy_lock; \
++	rtdm_lock_t mite_channel_lock; \
++								\
++	int changain_state;					\
++	unsigned int changain_spec;				\
++								\
++	unsigned int caldac_maxdata_list[MAX_N_CALDACS];	\
++	unsigned short ao[MAX_N_AO_CHAN];					\
++	unsigned short caldacs[MAX_N_CALDACS];				\
++								\
++	unsigned short ai_cmd2;	\
++								\
++	unsigned short ao_conf[MAX_N_AO_CHAN];				\
++	unsigned short ao_mode1;				\
++	unsigned short ao_mode2;				\
++	unsigned short ao_mode3;				\
++	unsigned short ao_cmd1;					\
++	unsigned short ao_cmd2;					\
++	unsigned short ao_cmd3;					\
++	unsigned short ao_trigger_select;			\
++								\
++	struct ni_gpct_device *counter_dev;	\
++	unsigned short an_trig_etc_reg;				\
++								\
++	unsigned ai_offset[512];				\
++								\
++	unsigned long serial_interval_ns;                       \
++	unsigned char serial_hw_mode;                           \
++	unsigned short clock_and_fout;				\
++	unsigned short clock_and_fout2;				\
++								\
++	unsigned short int_a_enable_reg;			\
++	unsigned short int_b_enable_reg;			\
++	unsigned short io_bidirection_pin_reg;			\
++	unsigned short rtsi_trig_direction_reg;			\
++	unsigned short rtsi_trig_a_output_reg; \
++	unsigned short rtsi_trig_b_output_reg; \
++	unsigned short pfi_output_select_reg[NUM_PFI_OUTPUT_SELECT_REGS]; \
++	unsigned short ai_ao_select_reg; \
++	unsigned short g0_g1_select_reg; \
++	unsigned short cdio_dma_select_reg; \
++	\
++	unsigned clock_ns; \
++	unsigned clock_source; \
++	\
++	unsigned short atrig_mode;				\
++	unsigned short atrig_high;				\
++	unsigned short atrig_low;				\
++	\
++	unsigned short pwm_up_count;	\
++	unsigned short pwm_down_count;	\
++	\
++	sampl_t ai_fifo_buffer[0x2000];				\
++	uint8_t eeprom_buffer[M_SERIES_EEPROM_SIZE]; \
++	\
++	struct mite_struct *mite; \
++	struct mite_channel *ai_mite_chan; \
++	struct mite_channel *ao_mite_chan;\
++	struct mite_channel *cdo_mite_chan;\
++	struct mite_dma_descriptor_ring *ai_mite_ring; \
++	struct mite_dma_descriptor_ring *ao_mite_ring; \
++	struct mite_dma_descriptor_ring *cdo_mite_ring; \
++	struct mite_dma_descriptor_ring *gpct_mite_ring[NUM_GPCT]; \
++	subd_8255_t subd_8255
++
++
++typedef struct {
++	ni_board *board_ptr;
++	NI_PRIVATE_COMMON;
++} ni_private;
++
++#define devpriv ((ni_private *)dev->priv)
++#define boardtype (*(ni_board *)devpriv->board_ptr)
++
++/* How we access registers */
++
++#define ni_writel(a,b)	(writel((a), devpriv->mite->daq_io_addr + (b)))
++#define ni_readl(a)	(readl(devpriv->mite->daq_io_addr + (a)))
++#define ni_writew(a,b)	(writew((a), devpriv->mite->daq_io_addr + (b)))
++#define ni_readw(a)	(readw(devpriv->mite->daq_io_addr + (a)))
++#define ni_writeb(a,b)	(writeb((a), devpriv->mite->daq_io_addr + (b)))
++#define ni_readb(a)	(readb(devpriv->mite->daq_io_addr + (a)))
++
++/* INSN_CONFIG_SET_CLOCK_SRC argument for NI cards */
++#define NI_FREQ_OUT_TIMEBASE_1_DIV_2_CLOCK_SRC	0 /* 10 MHz */
++#define NI_FREQ_OUT_TIMEBASE_2_CLOCK_SRC	1 /* 100 KHz */
++
++#endif /* _ANALOGY_NI_STC_H */
+--- linux/drivers/xenomai/analogy/rtdm_interface.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/rtdm_interface.c	2021-04-07 16:01:27.791633360 +0800
+@@ -0,0 +1,310 @@
++/*
++ * Analogy for Linux, user interface (open, read, write, ioctl, proc)
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/fs.h>
++#include <linux/proc_fs.h>
++#include <linux/uaccess.h>
++#include <rtdm/driver.h>
++#include <rtdm/analogy/device.h>
++
++MODULE_AUTHOR("Alexis Berlemont");
++MODULE_DESCRIPTION("Analogy core driver");
++MODULE_LICENSE("GPL");
++
++int (* const a4l_ioctl_functions[]) (struct a4l_device_context *, void *) = {
++	[_IOC_NR(A4L_DEVCFG)] = a4l_ioctl_devcfg,
++	[_IOC_NR(A4L_DEVINFO)] = a4l_ioctl_devinfo,
++	[_IOC_NR(A4L_SUBDINFO)] = a4l_ioctl_subdinfo,
++	[_IOC_NR(A4L_CHANINFO)] = a4l_ioctl_chaninfo,
++	[_IOC_NR(A4L_RNGINFO)] = a4l_ioctl_rnginfo,
++	[_IOC_NR(A4L_CMD)] = a4l_ioctl_cmd,
++	[_IOC_NR(A4L_CANCEL)] = a4l_ioctl_cancel,
++	[_IOC_NR(A4L_INSNLIST)] = a4l_ioctl_insnlist,
++	[_IOC_NR(A4L_INSN)] = a4l_ioctl_insn,
++	[_IOC_NR(A4L_BUFCFG)] = a4l_ioctl_bufcfg,
++	[_IOC_NR(A4L_BUFINFO)] = a4l_ioctl_bufinfo,
++	[_IOC_NR(A4L_POLL)] = a4l_ioctl_poll,
++	[_IOC_NR(A4L_MMAP)] = a4l_ioctl_mmap,
++	[_IOC_NR(A4L_NBCHANINFO)] = a4l_ioctl_nbchaninfo,
++	[_IOC_NR(A4L_NBRNGINFO)] = a4l_ioctl_nbrnginfo,
++	[_IOC_NR(A4L_BUFCFG2)] = a4l_ioctl_bufcfg2,
++	[_IOC_NR(A4L_BUFINFO2)] = a4l_ioctl_bufinfo2
++};
++
++#ifdef CONFIG_PROC_FS
++struct proc_dir_entry *a4l_proc_root;
++
++static int a4l_proc_devs_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, a4l_rdproc_devs, NULL);
++}
++
++static const struct file_operations a4l_proc_devs_ops = {
++	.open		= a4l_proc_devs_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++static int a4l_proc_drvs_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, a4l_rdproc_drvs, NULL);
++}
++
++static const struct file_operations a4l_proc_drvs_ops = {
++	.open		= a4l_proc_drvs_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++int a4l_init_proc(void)
++{
++	int ret = 0;
++	struct proc_dir_entry *entry;
++
++	/* Creates the global directory */
++	a4l_proc_root = proc_mkdir("analogy", NULL);
++	if (a4l_proc_root == NULL) {
++		__a4l_err("a4l_proc_init: "
++			  "failed to create /proc/analogy\n");
++		return -ENOMEM;
++	}
++
++	/* Creates the devices related file */
++	entry = proc_create("devices", 0444, a4l_proc_root,
++			    &a4l_proc_devs_ops);
++	if (entry == NULL) {
++		__a4l_err("a4l_proc_init: "
++			  "failed to create /proc/analogy/devices\n");
++		ret = -ENOMEM;
++		goto err_proc_init;
++	}
++
++	/* Creates the drivers related file */
++	entry = proc_create("drivers", 0444, a4l_proc_root,
++			    &a4l_proc_drvs_ops);
++	if (entry == NULL) {
++		__a4l_err("a4l_proc_init: "
++			  "failed to create /proc/analogy/drivers\n");
++		ret = -ENOMEM;
++		goto err_proc_init;
++	}
++
++	return 0;
++
++err_proc_init:
++	remove_proc_entry("devices", a4l_proc_root);
++	remove_proc_entry("analogy", NULL);
++	return ret;
++}
++
++void a4l_cleanup_proc(void)
++{
++	remove_proc_entry("drivers", a4l_proc_root);
++	remove_proc_entry("devices", a4l_proc_root);
++	remove_proc_entry("analogy", NULL);
++}
++
++#else /* !CONFIG_PROC_FS */
++
++#define a4l_init_proc() 0
++#define a4l_cleanup_proc()
++
++#endif /* CONFIG_PROC_FS */
++
++int a4l_open(struct rtdm_fd *fd, int flags)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	/* Get a pointer on the selected device (thanks to minor index) */
++	a4l_set_dev(cxt);
++
++	/* Initialize the buffer structure */
++	cxt->buffer = rtdm_malloc(sizeof(struct a4l_buffer));
++
++	a4l_init_buffer(cxt->buffer);
++	/* Allocate the asynchronous buffer
++	   NOTE: it should be interesting to allocate the buffer only
++	   on demand especially if the system is short of memory */
++	if (cxt->dev->transfer.default_bufsize)
++		a4l_alloc_buffer(cxt->buffer,
++				 cxt->dev->transfer.default_bufsize);
++
++	__a4l_dbg(1, core_dbg, "cxt=%p cxt->buf=%p, cxt->buf->buf=%p\n",
++		cxt, cxt->buffer, cxt->buffer->buf);
++
++	return 0;
++}
++
++void a4l_close(struct rtdm_fd *fd)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	/* Cancel the maybe occuring asynchronous transfer */
++	a4l_cancel_buffer(cxt);
++
++	/* Free the buffer which was linked with this context and... */
++	a4l_free_buffer(cxt->buffer);
++
++	/* ...free the other buffer resources (sync) and... */
++	a4l_cleanup_buffer(cxt->buffer);
++
++	/* ...free the structure */
++	rtdm_free(cxt->buffer);
++}
++
++ssize_t a4l_read(struct rtdm_fd *fd, void *buf, size_t nbytes)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	/* Jump into the RT domain if possible */
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	if (nbytes == 0)
++		return 0;
++
++	return a4l_read_buffer(cxt, buf, nbytes);
++}
++
++ssize_t a4l_write(struct rtdm_fd *fd, const void *buf, size_t nbytes)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	/* Jump into the RT domain if possible */
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	if (nbytes == 0)
++		return 0;
++
++	return a4l_write_buffer(cxt, buf, nbytes);
++}
++
++int a4l_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	return a4l_ioctl_functions[_IOC_NR(request)] (cxt, arg);
++}
++
++int a4l_rt_select(struct rtdm_fd *fd,
++		  rtdm_selector_t *selector,
++		  enum rtdm_selecttype type, unsigned fd_index)
++{
++	struct a4l_device_context *cxt = (struct a4l_device_context *)rtdm_fd_to_private(fd);
++
++	return a4l_select(cxt, selector, type, fd_index);
++}
++
++static struct rtdm_driver analogy_driver = {
++	.profile_info =		RTDM_PROFILE_INFO(analogy,
++						  RTDM_CLASS_EXPERIMENTAL,
++						  RTDM_SUBCLASS_ANALOGY,
++						  0),
++	.device_flags =		RTDM_NAMED_DEVICE,
++	.device_count =		A4L_NB_DEVICES,
++	.context_size =		sizeof(struct a4l_device_context),
++	.ops = {
++		.open =		a4l_open,
++		.close =	a4l_close,
++		.ioctl_rt =	a4l_ioctl,
++		.read_rt =	a4l_read,
++		.write_rt =	a4l_write,
++		.ioctl_nrt =	a4l_ioctl,
++		.read_nrt =	a4l_read,
++		.write_nrt =	a4l_write,
++		.select =	a4l_rt_select,
++	},
++};
++
++static struct rtdm_device rtdm_devs[A4L_NB_DEVICES] = {
++	[0 ... A4L_NB_DEVICES - 1] = {
++		.driver = &analogy_driver,
++		.label = "analogy%d",
++	}
++};
++
++int a4l_register(void)
++{
++	int i, ret;
++
++	for (i = 0; i < A4L_NB_DEVICES; i++) {
++		ret = rtdm_dev_register(rtdm_devs + i);
++		if (ret)
++			goto fail;
++	}
++
++	return 0;
++fail:
++	while (i-- > 0)
++		rtdm_dev_unregister(rtdm_devs + i);
++
++	return ret;
++}
++
++void a4l_unregister(void)
++{
++	int i;
++	for (i = 0; i < A4L_NB_DEVICES; i++)
++		rtdm_dev_unregister(&(rtdm_devs[i]));
++}
++
++static int __init a4l_init(void)
++{
++	int ret;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	/* Initializes the devices */
++	a4l_init_devs();
++
++	/* Initializes Analogy time management */
++	a4l_init_time();
++
++	/* Registers RTDM / fops interface */
++	ret = a4l_register();
++	if (ret != 0) {
++		a4l_unregister();
++		goto out_a4l_init;
++	}
++
++	/* Initializes Analogy proc layer */
++	ret = a4l_init_proc();
++
++out_a4l_init:
++	return ret;
++}
++
++static void __exit a4l_cleanup(void)
++{
++	/* Removes Analogy proc files */
++	a4l_cleanup_proc();
++
++	/* Unregisters RTDM / fops interface */
++	a4l_unregister();
++}
++
++module_init(a4l_init);
++module_exit(a4l_cleanup);
+--- linux/drivers/xenomai/analogy/instruction.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/instruction.c	2021-04-07 16:01:27.786633367 +0800
+@@ -0,0 +1,427 @@
++/*
++ * Analogy for Linux, instruction related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/ioport.h>
++#include <linux/mman.h>
++#include <asm/div64.h>
++#include <asm/io.h>
++#include <asm/errno.h>
++#include <rtdm/analogy/device.h>
++
++int a4l_do_insn_gettime(struct a4l_kernel_instruction * dsc)
++{
++	nanosecs_abs_t ns;
++	uint32_t ns2;
++
++	unsigned int *data = (unsigned int *)dsc->data;
++
++	/* Basic checkings */
++	if (dsc->data_size != 2 * sizeof(unsigned int)) {
++		__a4l_err("a4l_do_insn_gettime: data size should be 2\n");
++		return -EINVAL;
++	}
++
++	/* Get a timestamp */
++	ns = a4l_get_time();
++
++	/* Perform the conversion */
++	ns2 = do_div(ns, 1000000000);
++	data[0] = (unsigned int) ns;
++	data[1] = (unsigned int) ns2 / 1000;
++
++	return 0;
++}
++
++int a4l_do_insn_wait(struct a4l_kernel_instruction * dsc)
++{
++	unsigned int us;
++	unsigned int *data = (unsigned int *)dsc->data;
++
++	/* Basic checkings */
++	if (dsc->data_size != sizeof(unsigned int)) {
++		__a4l_err("a4l_do_insn_wait: data size should be 1\n");
++		return -EINVAL;
++	}
++
++	if (data[0] > A4L_INSN_WAIT_MAX) {
++		__a4l_err("a4l_do_insn_wait: wait duration is out of range\n");
++		return -EINVAL;
++	}
++
++	/* As we use (a4l_)udelay, we have to convert the delay into
++	   microseconds */
++	us = data[0] / 1000;
++
++	/* At least, the delay is rounded up to 1 microsecond */
++	if (us == 0)
++		us = 1;
++
++	/* Performs the busy waiting */
++	a4l_udelay(us);
++
++	return 0;
++}
++
++int a4l_do_insn_trig(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc)
++{
++	struct a4l_subdevice *subd;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	unsigned int trignum;
++	unsigned int *data = (unsigned int*)dsc->data;
++
++	/* Basic checkings */
++	if (dsc->data_size > 1) {
++		__a4l_err("a4l_do_insn_trig: data size should not be > 1\n");
++		return -EINVAL;
++	}
++
++	trignum = (dsc->data_size == sizeof(unsigned int)) ? data[0] : 0;
++
++	if (dsc->idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_do_insn_trig: "
++			  "subdevice index is out of range\n");
++		return -EINVAL;
++	}
++
++	subd = dev->transfer.subds[dsc->idx_subd];
++
++	/* Checks that the concerned subdevice is trigger-compliant */
++	if ((subd->flags & A4L_SUBD_CMD) == 0 || subd->trigger == NULL) {
++		__a4l_err("a4l_do_insn_trig: subdevice does not support "
++			  "triggering or asynchronous acquisition\n");
++		return -EINVAL;
++	}
++
++	/* Performs the trigger */
++	return subd->trigger(subd, trignum);
++}
++
++int a4l_fill_insndsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int ret = 0;
++	void *tmp_data = NULL;
++
++	ret = rtdm_safe_copy_from_user(fd,
++				       dsc, arg, sizeof(a4l_insn_t));
++	if (ret != 0)
++		goto out_insndsc;
++
++	if (dsc->data_size != 0 && dsc->data == NULL) {
++		__a4l_err("a4l_fill_insndsc: no data pointer specified\n");
++		ret = -EINVAL;
++		goto out_insndsc;
++	}
++
++	if (dsc->data_size != 0 && dsc->data != NULL) {
++		tmp_data = rtdm_malloc(dsc->data_size);
++		if (tmp_data == NULL) {
++			ret = -ENOMEM;
++			goto out_insndsc;
++		}
++
++		if ((dsc->type & A4L_INSN_MASK_WRITE) != 0) {
++			ret = rtdm_safe_copy_from_user(fd,
++						       tmp_data, dsc->data,
++						       dsc->data_size);
++			if (ret < 0)
++				goto out_insndsc;
++		}
++	}
++
++	dsc->__udata = dsc->data;
++	dsc->data = tmp_data;
++
++out_insndsc:
++
++	if (ret != 0 && tmp_data != NULL)
++		rtdm_free(tmp_data);
++
++	return ret;
++}
++
++int a4l_free_insndsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int ret = 0;
++
++	if ((dsc->type & A4L_INSN_MASK_READ) != 0)
++		ret = rtdm_safe_copy_to_user(fd,
++					     dsc->__udata,
++					     dsc->data, dsc->data_size);
++
++	if (dsc->data != NULL)
++		rtdm_free(dsc->data);
++
++	return ret;
++}
++
++int a4l_do_special_insn(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc)
++{
++	int ret = 0;
++
++	switch (dsc->type) {
++	case A4L_INSN_GTOD:
++		ret = a4l_do_insn_gettime(dsc);
++		break;
++	case A4L_INSN_WAIT:
++		ret = a4l_do_insn_wait(dsc);
++		break;
++	case A4L_INSN_INTTRIG:
++		ret = a4l_do_insn_trig(cxt, dsc);
++		break;
++	default:
++		__a4l_err("a4l_do_special_insn: "
++			  "incoherent instruction code\n");
++		return -EINVAL;
++	}
++
++	if (ret < 0)
++		__a4l_err("a4l_do_special_insn: "
++			  "execution of the instruction failed (err=%d)\n",
++			  ret);
++
++	return ret;
++}
++
++int a4l_do_insn(struct a4l_device_context * cxt, struct a4l_kernel_instruction * dsc)
++{
++	int ret = 0;
++	struct a4l_subdevice *subd;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++	int (*hdlr) (struct a4l_subdevice *, struct a4l_kernel_instruction *) = NULL;
++
++	/* Checks the subdevice index */
++	if (dsc->idx_subd >= dev->transfer.nb_subd) {
++		__a4l_err("a4l_do_insn: "
++			  "subdevice index out of range (idx=%d)\n",
++			  dsc->idx_subd);
++		return -EINVAL;
++	}
++
++	/* Recovers pointers on the proper subdevice */
++	subd = dev->transfer.subds[dsc->idx_subd];
++
++	/* Checks the subdevice's characteristics */
++	if ((subd->flags & A4L_SUBD_TYPES) == A4L_SUBD_UNUSED) {
++		__a4l_err("a4l_do_insn: wrong subdevice selected\n");
++		return -EINVAL;
++	}
++
++	/* Checks the channel descriptor */
++	if ((subd->flags & A4L_SUBD_TYPES) != A4L_SUBD_CALIB) {
++		ret = a4l_check_chanlist(dev->transfer.subds[dsc->idx_subd],
++					 1, &dsc->chan_desc);
++		if (ret < 0)
++			return ret;
++	}
++
++	/* Choose the proper handler, we can check the pointer because
++	   the subdevice was memset to 0 at allocation time */
++	switch (dsc->type) {
++	case A4L_INSN_READ:
++		hdlr = subd->insn_read;
++		break;
++	case A4L_INSN_WRITE:
++		hdlr = subd->insn_write;
++		break;
++	case A4L_INSN_BITS:
++		hdlr = subd->insn_bits;
++		break;
++	case A4L_INSN_CONFIG:
++		hdlr = subd->insn_config;
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	/* We check the instruction type */
++	if (ret < 0)
++		return ret;
++
++	/* We check whether a handler is available */
++	if (hdlr == NULL)
++		return -ENOSYS;
++
++	/* Prevents the subdevice from being used during
++	   the following operations */
++	if (test_and_set_bit(A4L_SUBD_BUSY_NR, &subd->status)) {
++		ret = -EBUSY;
++		goto out_do_insn;
++	}
++
++	/* Let's the driver-specific code perform the instruction */
++	ret = hdlr(subd, dsc);
++
++	if (ret < 0)
++		__a4l_err("a4l_do_insn: "
++			  "execution of the instruction failed (err=%d)\n",
++			  ret);
++
++out_do_insn:
++
++	/* Releases the subdevice from its reserved state */
++	clear_bit(A4L_SUBD_BUSY_NR, &subd->status);
++
++	return ret;
++}
++
++int a4l_ioctl_insn(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int ret = 0;
++	struct a4l_kernel_instruction insn;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_insn: unattached device\n");
++		return -EINVAL;
++	}
++
++	/* Recovers the instruction descriptor */
++	ret = a4l_fill_insndsc(cxt, &insn, arg);
++	if (ret != 0)
++		goto err_ioctl_insn;
++
++	/* Performs the instruction */
++	if ((insn.type & A4L_INSN_MASK_SPECIAL) != 0)
++		ret = a4l_do_special_insn(cxt, &insn);
++	else
++		ret = a4l_do_insn(cxt, &insn);
++
++	if (ret < 0)
++		goto err_ioctl_insn;
++
++	/* Frees the used memory and sends back some
++	   data, if need be */
++	ret = a4l_free_insndsc(cxt, &insn);
++
++	return ret;
++
++err_ioctl_insn:
++	a4l_free_insndsc(cxt, &insn);
++	return ret;
++}
++
++int a4l_fill_ilstdsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction_list * dsc, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int i, ret = 0;
++
++	dsc->insns = NULL;
++
++	/* Recovers the structure from user space */
++	ret = rtdm_safe_copy_from_user(fd,
++				       dsc, arg, sizeof(a4l_insnlst_t));
++	if (ret < 0)
++		return ret;
++
++	/* Some basic checking */
++	if (dsc->count == 0) {
++		__a4l_err("a4l_fill_ilstdsc: instruction list's count is 0\n");
++		return -EINVAL;
++	}
++
++	/* Keeps the user pointer in an opaque field */
++	dsc->__uinsns = (a4l_insn_t *)dsc->insns;
++
++	dsc->insns = rtdm_malloc(dsc->count * sizeof(struct a4l_kernel_instruction));
++	if (dsc->insns == NULL)
++		return -ENOMEM;
++
++	/* Recovers the instructions, one by one. This part is not
++	   optimized */
++	for (i = 0; i < dsc->count && ret == 0; i++)
++		ret = a4l_fill_insndsc(cxt,
++				       &(dsc->insns[i]),
++				       &(dsc->__uinsns[i]));
++
++	/* In case of error, frees the allocated memory */
++	if (ret < 0 && dsc->insns != NULL)
++		rtdm_free(dsc->insns);
++
++	return ret;
++}
++
++int a4l_free_ilstdsc(struct a4l_device_context * cxt, struct a4l_kernel_instruction_list * dsc)
++{
++	int i, ret = 0;
++
++	if (dsc->insns != NULL) {
++
++		for (i = 0; i < dsc->count && ret == 0; i++)
++			ret = a4l_free_insndsc(cxt, &(dsc->insns[i]));
++
++		while (i < dsc->count) {
++			a4l_free_insndsc(cxt, &(dsc->insns[i]));
++			i++;
++		}
++
++		rtdm_free(dsc->insns);
++	}
++
++	return ret;
++}
++
++/* This function is not optimized in terms of memory footprint and
++   CPU charge; however, the whole analogy instruction system was not
++   designed for performance issues */
++int a4l_ioctl_insnlist(struct a4l_device_context * cxt, void *arg)
++{
++	struct rtdm_fd *fd = rtdm_private_to_fd(cxt);
++	int i, ret = 0;
++	struct a4l_kernel_instruction_list ilst;
++	struct a4l_device *dev = a4l_get_dev(cxt);
++
++	if (!rtdm_in_rt_context() && rtdm_rt_capable(fd))
++		return -ENOSYS;
++
++	/* Basic checking */
++	if (!test_bit(A4L_DEV_ATTACHED_NR, &dev->flags)) {
++		__a4l_err("a4l_ioctl_insnlist: unattached device\n");
++		return -EINVAL;
++	}
++
++	if ((ret = a4l_fill_ilstdsc(cxt, &ilst, arg)) < 0)
++		return ret;
++
++	/* Performs the instructions */
++	for (i = 0; i < ilst.count && ret == 0; i++) {
++		if ((ilst.insns[i].type & A4L_INSN_MASK_SPECIAL) != 0)
++			ret = a4l_do_special_insn(cxt, &ilst.insns[i]);
++		else
++			ret = a4l_do_insn(cxt, &ilst.insns[i]);
++	}
++
++	if (ret < 0)
++		goto err_ioctl_ilst;
++
++	return a4l_free_ilstdsc(cxt, &ilst);
++
++err_ioctl_ilst:
++	a4l_free_ilstdsc(cxt, &ilst);
++	return ret;
++}
+--- linux/drivers/xenomai/analogy/driver.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/analogy/driver.c	2021-04-07 16:01:27.782633373 +0800
+@@ -0,0 +1,104 @@
++/*
++ * Analogy for Linux, driver related features
++ *
++ * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org>
++ * Copyright (C) 2008 Alexis Berlemont <alexis.berlemont@free.fr>
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <rtdm/analogy/device.h>
++
++#include "proc.h"
++
++static LIST_HEAD(a4l_drvs);
++
++/* --- Driver list management functions --- */
++
++int a4l_lct_drv(char *pin, struct a4l_driver ** pio)
++{
++	struct list_head *this;
++	int ret = -EINVAL;
++
++	__a4l_dbg(1, core_dbg, "name=%s\n", pin);
++
++	/* Goes through the linked list so as to find
++	   a driver instance with the same name */
++	list_for_each(this, &a4l_drvs) {
++		struct a4l_driver *drv = list_entry(this, struct a4l_driver, list);
++
++		if (strcmp(drv->board_name, pin) == 0) {
++			/* The argument pio can be NULL
++			   if there is no need to retrieve the pointer */
++			if (pio != NULL)
++				*pio = drv;
++			ret = 0;
++			break;
++		}
++	}
++
++	return ret;
++}
++
++int a4l_register_drv(struct a4l_driver * drv)
++{
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	__a4l_dbg(1, core_dbg, "board name=%s\n", drv->board_name);
++
++	if (a4l_lct_drv(drv->board_name, NULL) != 0) {
++		list_add(&drv->list, &a4l_drvs);
++		return 0;
++	} else
++		return -EINVAL;
++}
++
++int a4l_unregister_drv(struct a4l_driver * drv)
++{
++	__a4l_dbg(1, core_dbg, "board name=%s\n", drv->board_name);
++
++	if (a4l_lct_drv(drv->board_name, NULL) == 0) {
++		/* Here, we consider the argument is pointing
++		   to a real driver struct (not a blank structure
++		   with only the name field properly set */
++		list_del(&drv->list);
++		return 0;
++	} else
++		return -EINVAL;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/* --- Driver list proc section --- */
++
++int a4l_rdproc_drvs(struct seq_file *p, void *data)
++{
++	int i = 0;
++	struct list_head *this;
++
++	seq_printf(p, "--  Analogy drivers --\n\n");
++
++	seq_printf(p, "| idx | board name \n");
++
++	list_for_each(this, &a4l_drvs) {
++		struct a4l_driver *drv = list_entry(this, struct a4l_driver, list);
++		seq_printf(p, "|  %02d | %s \n", i++, drv->board_name);
++	}
++	return 0;
++}
++
++#endif /* CONFIG_PROC_FS */
+--- linux/drivers/xenomai/net/drivers/rt_macb.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/rt_macb.h	2021-04-07 16:01:27.777633380 +0800
+@@ -0,0 +1,624 @@
++/*
++ * Atmel MACB Ethernet Controller driver
++ *
++ * Copyright (C) 2004-2006 Atmel Corporation
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MACB_H
++#define _MACB_H
++
++#define MACB_GREGS_NBR 16
++#define MACB_GREGS_VERSION 1
++
++/* MACB register offsets */
++#define MACB_NCR				0x0000
++#define MACB_NCFGR				0x0004
++#define MACB_NSR				0x0008
++#define MACB_TAR				0x000c /* AT91RM9200 only */
++#define MACB_TCR				0x0010 /* AT91RM9200 only */
++#define MACB_TSR				0x0014
++#define MACB_RBQP				0x0018
++#define MACB_TBQP				0x001c
++#define MACB_RSR				0x0020
++#define MACB_ISR				0x0024
++#define MACB_IER				0x0028
++#define MACB_IDR				0x002c
++#define MACB_IMR				0x0030
++#define MACB_MAN				0x0034
++#define MACB_PTR				0x0038
++#define MACB_PFR				0x003c
++#define MACB_FTO				0x0040
++#define MACB_SCF				0x0044
++#define MACB_MCF				0x0048
++#define MACB_FRO				0x004c
++#define MACB_FCSE				0x0050
++#define MACB_ALE				0x0054
++#define MACB_DTF				0x0058
++#define MACB_LCOL				0x005c
++#define MACB_EXCOL				0x0060
++#define MACB_TUND				0x0064
++#define MACB_CSE				0x0068
++#define MACB_RRE				0x006c
++#define MACB_ROVR				0x0070
++#define MACB_RSE				0x0074
++#define MACB_ELE				0x0078
++#define MACB_RJA				0x007c
++#define MACB_USF				0x0080
++#define MACB_STE				0x0084
++#define MACB_RLE				0x0088
++#define MACB_TPF				0x008c
++#define MACB_HRB				0x0090
++#define MACB_HRT				0x0094
++#define MACB_SA1B				0x0098
++#define MACB_SA1T				0x009c
++#define MACB_SA2B				0x00a0
++#define MACB_SA2T				0x00a4
++#define MACB_SA3B				0x00a8
++#define MACB_SA3T				0x00ac
++#define MACB_SA4B				0x00b0
++#define MACB_SA4T				0x00b4
++#define MACB_TID				0x00b8
++#define MACB_TPQ				0x00bc
++#define MACB_USRIO				0x00c0
++#define MACB_WOL				0x00c4
++#define MACB_MID				0x00fc
++
++/* GEM register offsets. */
++#define GEM_NCFGR				0x0004
++#define GEM_USRIO				0x000c
++#define GEM_DMACFG				0x0010
++#define GEM_HRB					0x0080
++#define GEM_HRT					0x0084
++#define GEM_SA1B				0x0088
++#define GEM_SA1T				0x008C
++#define GEM_SA2B				0x0090
++#define GEM_SA2T				0x0094
++#define GEM_SA3B				0x0098
++#define GEM_SA3T				0x009C
++#define GEM_SA4B				0x00A0
++#define GEM_SA4T				0x00A4
++#define GEM_OTX					0x0100
++#define GEM_DCFG1				0x0280
++#define GEM_DCFG2				0x0284
++#define GEM_DCFG3				0x0288
++#define GEM_DCFG4				0x028c
++#define GEM_DCFG5				0x0290
++#define GEM_DCFG6				0x0294
++#define GEM_DCFG7				0x0298
++
++/* Bitfields in NCR */
++#define MACB_LB_OFFSET				0
++#define MACB_LB_SIZE				1
++#define MACB_LLB_OFFSET				1
++#define MACB_LLB_SIZE				1
++#define MACB_RE_OFFSET				2
++#define MACB_RE_SIZE				1
++#define MACB_TE_OFFSET				3
++#define MACB_TE_SIZE				1
++#define MACB_MPE_OFFSET				4
++#define MACB_MPE_SIZE				1
++#define MACB_CLRSTAT_OFFSET			5
++#define MACB_CLRSTAT_SIZE			1
++#define MACB_INCSTAT_OFFSET			6
++#define MACB_INCSTAT_SIZE			1
++#define MACB_WESTAT_OFFSET			7
++#define MACB_WESTAT_SIZE			1
++#define MACB_BP_OFFSET				8
++#define MACB_BP_SIZE				1
++#define MACB_TSTART_OFFSET			9
++#define MACB_TSTART_SIZE			1
++#define MACB_THALT_OFFSET			10
++#define MACB_THALT_SIZE				1
++#define MACB_NCR_TPF_OFFSET			11
++#define MACB_NCR_TPF_SIZE			1
++#define MACB_TZQ_OFFSET				12
++#define MACB_TZQ_SIZE				1
++
++/* Bitfields in NCFGR */
++#define MACB_SPD_OFFSET				0
++#define MACB_SPD_SIZE				1
++#define MACB_FD_OFFSET				1
++#define MACB_FD_SIZE				1
++#define MACB_BIT_RATE_OFFSET			2
++#define MACB_BIT_RATE_SIZE			1
++#define MACB_JFRAME_OFFSET			3
++#define MACB_JFRAME_SIZE			1
++#define MACB_CAF_OFFSET				4
++#define MACB_CAF_SIZE				1
++#define MACB_NBC_OFFSET				5
++#define MACB_NBC_SIZE				1
++#define MACB_NCFGR_MTI_OFFSET			6
++#define MACB_NCFGR_MTI_SIZE			1
++#define MACB_UNI_OFFSET				7
++#define MACB_UNI_SIZE				1
++#define MACB_BIG_OFFSET				8
++#define MACB_BIG_SIZE				1
++#define MACB_EAE_OFFSET				9
++#define MACB_EAE_SIZE				1
++#define MACB_CLK_OFFSET				10
++#define MACB_CLK_SIZE				2
++#define MACB_RTY_OFFSET				12
++#define MACB_RTY_SIZE				1
++#define MACB_PAE_OFFSET				13
++#define MACB_PAE_SIZE				1
++#define MACB_RM9200_RMII_OFFSET			13 /* AT91RM9200 only */
++#define MACB_RM9200_RMII_SIZE			1  /* AT91RM9200 only */
++#define MACB_RBOF_OFFSET			14
++#define MACB_RBOF_SIZE				2
++#define MACB_RLCE_OFFSET			16
++#define MACB_RLCE_SIZE				1
++#define MACB_DRFCS_OFFSET			17
++#define MACB_DRFCS_SIZE				1
++#define MACB_EFRHD_OFFSET			18
++#define MACB_EFRHD_SIZE				1
++#define MACB_IRXFCS_OFFSET			19
++#define MACB_IRXFCS_SIZE			1
++
++/* GEM specific NCFGR bitfields. */
++#define GEM_GBE_OFFSET				10
++#define GEM_GBE_SIZE				1
++#define GEM_CLK_OFFSET				18
++#define GEM_CLK_SIZE				3
++#define GEM_DBW_OFFSET				21
++#define GEM_DBW_SIZE				2
++
++/* Constants for data bus width. */
++#define GEM_DBW32				0
++#define GEM_DBW64				1
++#define GEM_DBW128				2
++
++/* Bitfields in DMACFG. */
++#define GEM_FBLDO_OFFSET			0
++#define GEM_FBLDO_SIZE				5
++#define GEM_ENDIA_OFFSET			7
++#define GEM_ENDIA_SIZE				1
++#define GEM_RXBMS_OFFSET			8
++#define GEM_RXBMS_SIZE				2
++#define GEM_TXPBMS_OFFSET			10
++#define GEM_TXPBMS_SIZE				1
++#define GEM_TXCOEN_OFFSET			11
++#define GEM_TXCOEN_SIZE				1
++#define GEM_RXBS_OFFSET				16
++#define GEM_RXBS_SIZE				8
++#define GEM_DDRP_OFFSET				24
++#define GEM_DDRP_SIZE				1
++
++
++/* Bitfields in NSR */
++#define MACB_NSR_LINK_OFFSET			0
++#define MACB_NSR_LINK_SIZE			1
++#define MACB_MDIO_OFFSET			1
++#define MACB_MDIO_SIZE				1
++#define MACB_IDLE_OFFSET			2
++#define MACB_IDLE_SIZE				1
++
++/* Bitfields in TSR */
++#define MACB_UBR_OFFSET				0
++#define MACB_UBR_SIZE				1
++#define MACB_COL_OFFSET				1
++#define MACB_COL_SIZE				1
++#define MACB_TSR_RLE_OFFSET			2
++#define MACB_TSR_RLE_SIZE			1
++#define MACB_TGO_OFFSET				3
++#define MACB_TGO_SIZE				1
++#define MACB_BEX_OFFSET				4
++#define MACB_BEX_SIZE				1
++#define MACB_RM9200_BNQ_OFFSET			4 /* AT91RM9200 only */
++#define MACB_RM9200_BNQ_SIZE			1 /* AT91RM9200 only */
++#define MACB_COMP_OFFSET			5
++#define MACB_COMP_SIZE				1
++#define MACB_UND_OFFSET				6
++#define MACB_UND_SIZE				1
++
++/* Bitfields in RSR */
++#define MACB_BNA_OFFSET				0
++#define MACB_BNA_SIZE				1
++#define MACB_REC_OFFSET				1
++#define MACB_REC_SIZE				1
++#define MACB_OVR_OFFSET				2
++#define MACB_OVR_SIZE				1
++
++/* Bitfields in ISR/IER/IDR/IMR */
++#define MACB_MFD_OFFSET				0
++#define MACB_MFD_SIZE				1
++#define MACB_RCOMP_OFFSET			1
++#define MACB_RCOMP_SIZE				1
++#define MACB_RXUBR_OFFSET			2
++#define MACB_RXUBR_SIZE				1
++#define MACB_TXUBR_OFFSET			3
++#define MACB_TXUBR_SIZE				1
++#define MACB_ISR_TUND_OFFSET			4
++#define MACB_ISR_TUND_SIZE			1
++#define MACB_ISR_RLE_OFFSET			5
++#define MACB_ISR_RLE_SIZE			1
++#define MACB_TXERR_OFFSET			6
++#define MACB_TXERR_SIZE				1
++#define MACB_TCOMP_OFFSET			7
++#define MACB_TCOMP_SIZE				1
++#define MACB_ISR_LINK_OFFSET			9
++#define MACB_ISR_LINK_SIZE			1
++#define MACB_ISR_ROVR_OFFSET			10
++#define MACB_ISR_ROVR_SIZE			1
++#define MACB_HRESP_OFFSET			11
++#define MACB_HRESP_SIZE				1
++#define MACB_PFR_OFFSET				12
++#define MACB_PFR_SIZE				1
++#define MACB_PTZ_OFFSET				13
++#define MACB_PTZ_SIZE				1
++
++/* Bitfields in MAN */
++#define MACB_DATA_OFFSET			0
++#define MACB_DATA_SIZE				16
++#define MACB_CODE_OFFSET			16
++#define MACB_CODE_SIZE				2
++#define MACB_REGA_OFFSET			18
++#define MACB_REGA_SIZE				5
++#define MACB_PHYA_OFFSET			23
++#define MACB_PHYA_SIZE				5
++#define MACB_RW_OFFSET				28
++#define MACB_RW_SIZE				2
++#define MACB_SOF_OFFSET				30
++#define MACB_SOF_SIZE				2
++
++/* Bitfields in USRIO (AVR32) */
++#define MACB_MII_OFFSET				0
++#define MACB_MII_SIZE				1
++#define MACB_EAM_OFFSET				1
++#define MACB_EAM_SIZE				1
++#define MACB_TX_PAUSE_OFFSET			2
++#define MACB_TX_PAUSE_SIZE			1
++#define MACB_TX_PAUSE_ZERO_OFFSET		3
++#define MACB_TX_PAUSE_ZERO_SIZE			1
++
++/* Bitfields in USRIO (AT91) */
++#define MACB_RMII_OFFSET			0
++#define MACB_RMII_SIZE				1
++#define GEM_RGMII_OFFSET			0	/* GEM gigabit mode */
++#define GEM_RGMII_SIZE				1
++#define MACB_CLKEN_OFFSET			1
++#define MACB_CLKEN_SIZE				1
++
++/* Bitfields in WOL */
++#define MACB_IP_OFFSET				0
++#define MACB_IP_SIZE				16
++#define MACB_MAG_OFFSET				16
++#define MACB_MAG_SIZE				1
++#define MACB_ARP_OFFSET				17
++#define MACB_ARP_SIZE				1
++#define MACB_SA1_OFFSET				18
++#define MACB_SA1_SIZE				1
++#define MACB_WOL_MTI_OFFSET			19
++#define MACB_WOL_MTI_SIZE			1
++
++/* Bitfields in MID */
++#define MACB_IDNUM_OFFSET			16
++#define MACB_IDNUM_SIZE				16
++#define MACB_REV_OFFSET				0
++#define MACB_REV_SIZE				16
++
++/* Bitfields in DCFG1. */
++#define GEM_IRQCOR_OFFSET			23
++#define GEM_IRQCOR_SIZE				1
++#define GEM_DBWDEF_OFFSET			25
++#define GEM_DBWDEF_SIZE				3
++
++/* Constants for CLK */
++#define MACB_CLK_DIV8				0
++#define MACB_CLK_DIV16				1
++#define MACB_CLK_DIV32				2
++#define MACB_CLK_DIV64				3
++
++/* GEM specific constants for CLK. */
++#define GEM_CLK_DIV8				0
++#define GEM_CLK_DIV16				1
++#define GEM_CLK_DIV32				2
++#define GEM_CLK_DIV48				3
++#define GEM_CLK_DIV64				4
++#define GEM_CLK_DIV96				5
++
++/* Constants for MAN register */
++#define MACB_MAN_SOF				1
++#define MACB_MAN_WRITE				1
++#define MACB_MAN_READ				2
++#define MACB_MAN_CODE				2
++
++/* Capability mask bits */
++#define MACB_CAPS_ISR_CLEAR_ON_WRITE		0x1
++
++/* Bit manipulation macros */
++#define MACB_BIT(name)					\
++	(1 << MACB_##name##_OFFSET)
++#define MACB_BF(name,value)				\
++	(((value) & ((1 << MACB_##name##_SIZE) - 1))	\
++	 << MACB_##name##_OFFSET)
++#define MACB_BFEXT(name,value)\
++	(((value) >> MACB_##name##_OFFSET)		\
++	 & ((1 << MACB_##name##_SIZE) - 1))
++#define MACB_BFINS(name,value,old)			\
++	(((old) & ~(((1 << MACB_##name##_SIZE) - 1)	\
++		    << MACB_##name##_OFFSET))		\
++	 | MACB_BF(name,value))
++
++#define GEM_BIT(name)					\
++	(1 << GEM_##name##_OFFSET)
++#define GEM_BF(name, value)				\
++	(((value) & ((1 << GEM_##name##_SIZE) - 1))	\
++	 << GEM_##name##_OFFSET)
++#define GEM_BFEXT(name, value)\
++	(((value) >> GEM_##name##_OFFSET)		\
++	 & ((1 << GEM_##name##_SIZE) - 1))
++#define GEM_BFINS(name, value, old)			\
++	(((old) & ~(((1 << GEM_##name##_SIZE) - 1)	\
++		    << GEM_##name##_OFFSET))		\
++	 | GEM_BF(name, value))
++
++/* Register access macros */
++#define macb_readl(port,reg)				\
++	__raw_readl((port)->regs + MACB_##reg)
++#define macb_writel(port,reg,value)			\
++	__raw_writel((value), (port)->regs + MACB_##reg)
++#define gem_readl(port, reg)				\
++	__raw_readl((port)->regs + GEM_##reg)
++#define gem_writel(port, reg, value)			\
++	__raw_writel((value), (port)->regs + GEM_##reg)
++
++/*
++ * Conditional GEM/MACB macros.  These perform the operation to the correct
++ * register dependent on whether the device is a GEM or a MACB.  For registers
++ * and bitfields that are common across both devices, use macb_{read,write}l
++ * to avoid the cost of the conditional.
++ */
++#define macb_or_gem_writel(__bp, __reg, __value) \
++	({ \
++		if (macb_is_gem((__bp))) \
++			gem_writel((__bp), __reg, __value); \
++		else \
++			macb_writel((__bp), __reg, __value); \
++	})
++
++#define macb_or_gem_readl(__bp, __reg) \
++	({ \
++		u32 __v; \
++		if (macb_is_gem((__bp))) \
++			__v = gem_readl((__bp), __reg); \
++		else \
++			__v = macb_readl((__bp), __reg); \
++		__v; \
++	})
++
++/**
++ * @brief Hardware DMA descriptor
++ * @anchor macb_dma_desc
++ */
++struct macb_dma_desc {
++	/** DMA address of data buffer */
++	u32	addr;
++	/** Control and status bits */
++	u32	ctrl;
++};
++
++/* DMA descriptor bitfields */
++#define MACB_RX_USED_OFFSET			0
++#define MACB_RX_USED_SIZE			1
++#define MACB_RX_WRAP_OFFSET			1
++#define MACB_RX_WRAP_SIZE			1
++#define MACB_RX_WADDR_OFFSET			2
++#define MACB_RX_WADDR_SIZE			30
++
++#define MACB_RX_FRMLEN_OFFSET			0
++#define MACB_RX_FRMLEN_SIZE			12
++#define MACB_RX_OFFSET_OFFSET			12
++#define MACB_RX_OFFSET_SIZE			2
++#define MACB_RX_SOF_OFFSET			14
++#define MACB_RX_SOF_SIZE			1
++#define MACB_RX_EOF_OFFSET			15
++#define MACB_RX_EOF_SIZE			1
++#define MACB_RX_CFI_OFFSET			16
++#define MACB_RX_CFI_SIZE			1
++#define MACB_RX_VLAN_PRI_OFFSET			17
++#define MACB_RX_VLAN_PRI_SIZE			3
++#define MACB_RX_PRI_TAG_OFFSET			20
++#define MACB_RX_PRI_TAG_SIZE			1
++#define MACB_RX_VLAN_TAG_OFFSET			21
++#define MACB_RX_VLAN_TAG_SIZE			1
++#define MACB_RX_TYPEID_MATCH_OFFSET		22
++#define MACB_RX_TYPEID_MATCH_SIZE		1
++#define MACB_RX_SA4_MATCH_OFFSET		23
++#define MACB_RX_SA4_MATCH_SIZE			1
++#define MACB_RX_SA3_MATCH_OFFSET		24
++#define MACB_RX_SA3_MATCH_SIZE			1
++#define MACB_RX_SA2_MATCH_OFFSET		25
++#define MACB_RX_SA2_MATCH_SIZE			1
++#define MACB_RX_SA1_MATCH_OFFSET		26
++#define MACB_RX_SA1_MATCH_SIZE			1
++#define MACB_RX_EXT_MATCH_OFFSET		28
++#define MACB_RX_EXT_MATCH_SIZE			1
++#define MACB_RX_UHASH_MATCH_OFFSET		29
++#define MACB_RX_UHASH_MATCH_SIZE		1
++#define MACB_RX_MHASH_MATCH_OFFSET		30
++#define MACB_RX_MHASH_MATCH_SIZE		1
++#define MACB_RX_BROADCAST_OFFSET		31
++#define MACB_RX_BROADCAST_SIZE			1
++
++#define MACB_TX_FRMLEN_OFFSET			0
++#define MACB_TX_FRMLEN_SIZE			11
++#define MACB_TX_LAST_OFFSET			15
++#define MACB_TX_LAST_SIZE			1
++#define MACB_TX_NOCRC_OFFSET			16
++#define MACB_TX_NOCRC_SIZE			1
++#define MACB_TX_BUF_EXHAUSTED_OFFSET		27
++#define MACB_TX_BUF_EXHAUSTED_SIZE		1
++#define MACB_TX_UNDERRUN_OFFSET			28
++#define MACB_TX_UNDERRUN_SIZE			1
++#define MACB_TX_ERROR_OFFSET			29
++#define MACB_TX_ERROR_SIZE			1
++#define MACB_TX_WRAP_OFFSET			30
++#define MACB_TX_WRAP_SIZE			1
++#define MACB_TX_USED_OFFSET			31
++#define MACB_TX_USED_SIZE			1
++
++/**
++ * @brief Data about an skb which is being transmitted
++ * @anchor macb_tx_skb
++ */
++struct macb_tx_skb {
++	/** skb currently being transmitted */
++	struct rtskb		*skb;
++	/** DMA address of the skb's data buffer */
++	dma_addr_t		mapping;
++};
++
++/*
++ * Hardware-collected statistics. Used when updating the network
++ * device stats by a periodic timer.
++ */
++struct macb_stats {
++	u32	rx_pause_frames;
++	u32	tx_ok;
++	u32	tx_single_cols;
++	u32	tx_multiple_cols;
++	u32	rx_ok;
++	u32	rx_fcs_errors;
++	u32	rx_align_errors;
++	u32	tx_deferred;
++	u32	tx_late_cols;
++	u32	tx_excessive_cols;
++	u32	tx_underruns;
++	u32	tx_carrier_errors;
++	u32	rx_resource_errors;
++	u32	rx_overruns;
++	u32	rx_symbol_errors;
++	u32	rx_oversize_pkts;
++	u32	rx_jabbers;
++	u32	rx_undersize_pkts;
++	u32	sqe_test_errors;
++	u32	rx_length_mismatch;
++	u32	tx_pause_frames;
++};
++
++struct gem_stats {
++	u32	tx_octets_31_0;
++	u32	tx_octets_47_32;
++	u32	tx_frames;
++	u32	tx_broadcast_frames;
++	u32	tx_multicast_frames;
++	u32	tx_pause_frames;
++	u32	tx_64_byte_frames;
++	u32	tx_65_127_byte_frames;
++	u32	tx_128_255_byte_frames;
++	u32	tx_256_511_byte_frames;
++	u32	tx_512_1023_byte_frames;
++	u32	tx_1024_1518_byte_frames;
++	u32	tx_greater_than_1518_byte_frames;
++	u32	tx_underrun;
++	u32	tx_single_collision_frames;
++	u32	tx_multiple_collision_frames;
++	u32	tx_excessive_collisions;
++	u32	tx_late_collisions;
++	u32	tx_deferred_frames;
++	u32	tx_carrier_sense_errors;
++	u32	rx_octets_31_0;
++	u32	rx_octets_47_32;
++	u32	rx_frames;
++	u32	rx_broadcast_frames;
++	u32	rx_multicast_frames;
++	u32	rx_pause_frames;
++	u32	rx_64_byte_frames;
++	u32	rx_65_127_byte_frames;
++	u32	rx_128_255_byte_frames;
++	u32	rx_256_511_byte_frames;
++	u32	rx_512_1023_byte_frames;
++	u32	rx_1024_1518_byte_frames;
++	u32	rx_greater_than_1518_byte_frames;
++	u32	rx_undersized_frames;
++	u32	rx_oversize_frames;
++	u32	rx_jabbers;
++	u32	rx_frame_check_sequence_errors;
++	u32	rx_length_field_frame_errors;
++	u32	rx_symbol_errors;
++	u32	rx_alignment_errors;
++	u32	rx_resource_errors;
++	u32	rx_overruns;
++	u32	rx_ip_header_checksum_errors;
++	u32	rx_tcp_checksum_errors;
++	u32	rx_udp_checksum_errors;
++};
++
++struct macb;
++
++struct macb_or_gem_ops {
++	int	(*mog_alloc_rx_buffers)(struct macb *bp);
++	void	(*mog_free_rx_buffers)(struct macb *bp);
++	void	(*mog_init_rings)(struct macb *bp);
++	int	(*mog_rx)(struct macb *bp, int budget, nanosecs_abs_t *ts);
++};
++
++struct macb {
++	void __iomem		*regs;
++
++	unsigned int		rx_tail;
++	unsigned int		rx_prepared_head;
++	struct macb_dma_desc	*rx_ring;
++	struct rtskb		**rx_skbuff;
++	void			*rx_buffers;
++	size_t			rx_buffer_size;
++
++	unsigned int		tx_head, tx_tail;
++	struct macb_dma_desc	*tx_ring;
++	struct macb_tx_skb	*tx_skb;
++
++	rtdm_lock_t		lock;
++	struct platform_device	*pdev;
++	struct clk		*pclk;
++	struct clk		*hclk;
++	struct clk		*tx_clk;
++	struct rtnet_device	*dev;
++	struct work_struct	tx_error_task;
++	struct net_device_stats	stats;
++	union {
++		struct macb_stats	macb;
++		struct gem_stats	gem;
++	}			hw_stats;
++
++	dma_addr_t		rx_ring_dma;
++	dma_addr_t		tx_ring_dma;
++	dma_addr_t		rx_buffers_dma;
++
++	struct macb_or_gem_ops	macbgem_ops;
++
++	struct mii_bus		*mii_bus;
++	struct phy_device	*phy_dev;
++	unsigned int		link;
++	unsigned int		speed;
++	unsigned int		duplex;
++
++	u32			caps;
++
++	phy_interface_t		phy_interface;
++
++	struct net_device	*phy_phony_net_device;
++	rtdm_irq_t		irq_handle;
++
++	/* AT91RM9200 transmit */
++	struct rtskb *skb;			/* holds skb until xmit interrupt completes */
++	dma_addr_t skb_physaddr;		/* phys addr from pci_map_single */
++	int skb_length;				/* saved skb length for pci_unmap_single */
++};
++
++extern const struct ethtool_ops macb_ethtool_ops;
++
++int rtmacb_mii_init(struct macb *bp);
++int rtmacb_ioctl(struct rtnet_device *dev, unsigned cmd, void *arg);
++struct net_device_stats *rtmacb_get_stats(struct rtnet_device *dev);
++void rtmacb_set_hwaddr(struct macb *bp);
++void rtmacb_get_hwaddr(struct macb *bp);
++
++static inline bool macb_is_gem(struct macb *bp)
++{
++	return MACB_BFEXT(IDNUM, macb_readl(bp, MID)) == 0x2;
++}
++
++#endif /* _MACB_H */
+--- linux/drivers/xenomai/net/drivers/rt_fec.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/rt_fec.h	2021-04-07 16:01:27.772633387 +0800
+@@ -0,0 +1,153 @@
++/****************************************************************************/
++
++/*
++ *	fec.h  --  Fast Ethernet Controller for Motorola ColdFire SoC
++ *		   processors.
++ *
++ *	(C) Copyright 2000-2005, Greg Ungerer (gerg@snapgear.com)
++ *	(C) Copyright 2000-2001, Lineo (www.lineo.com)
++ */
++
++/****************************************************************************/
++#ifndef RT_FEC_H
++#define	RT_FEC_H
++/****************************************************************************/
++
++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
++    defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
++    defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
++/*
++ *	Just figures, Motorola would have to change the offsets for
++ *	registers in the same peripheral device on different models
++ *	of the ColdFire!
++ */
++#define FEC_IEVENT		0x004 /* Interrupt event reg */
++#define FEC_IMASK		0x008 /* Interrupt mask reg */
++#define FEC_R_DES_ACTIVE	0x010 /* Receive descriptor reg */
++#define FEC_X_DES_ACTIVE	0x014 /* Transmit descriptor reg */
++#define FEC_ECNTRL		0x024 /* Ethernet control reg */
++#define FEC_MII_DATA		0x040 /* MII manage frame reg */
++#define FEC_MII_SPEED		0x044 /* MII speed control reg */
++#define FEC_MIB_CTRLSTAT	0x064 /* MIB control/status reg */
++#define FEC_R_CNTRL		0x084 /* Receive control reg */
++#define FEC_X_CNTRL		0x0c4 /* Transmit Control reg */
++#define FEC_ADDR_LOW		0x0e4 /* Low 32bits MAC address */
++#define FEC_ADDR_HIGH		0x0e8 /* High 16bits MAC address */
++#define FEC_OPD			0x0ec /* Opcode + Pause duration */
++#define FEC_HASH_TABLE_HIGH	0x118 /* High 32bits hash table */
++#define FEC_HASH_TABLE_LOW	0x11c /* Low 32bits hash table */
++#define FEC_GRP_HASH_TABLE_HIGH	0x120 /* High 32bits hash table */
++#define FEC_GRP_HASH_TABLE_LOW	0x124 /* Low 32bits hash table */
++#define FEC_X_WMRK		0x144 /* FIFO transmit water mark */
++#define FEC_R_BOUND		0x14c /* FIFO receive bound reg */
++#define FEC_R_FSTART		0x150 /* FIFO receive start reg */
++#define FEC_R_DES_START		0x180 /* Receive descriptor ring */
++#define FEC_X_DES_START		0x184 /* Transmit descriptor ring */
++#define FEC_R_BUFF_SIZE		0x188 /* Maximum receive buff size */
++#define FEC_TACC		0x1c0 /* Transmit accelerator reg */
++#define FEC_MIIGSK_CFGR		0x300 /* MIIGSK Configuration reg */
++#define FEC_MIIGSK_ENR		0x308 /* MIIGSK Enable reg */
++
++#define BM_MIIGSK_CFGR_MII		0x00
++#define BM_MIIGSK_CFGR_RMII		0x01
++#define BM_MIIGSK_CFGR_FRCONT_10M	0x40
++
++#else
++
++#define FEC_ECNTRL		0x000 /* Ethernet control reg */
++#define FEC_IEVENT		0x004 /* Interrupt even reg */
++#define FEC_IMASK		0x008 /* Interrupt mask reg */
++#define FEC_IVEC		0x00c /* Interrupt vec status reg */
++#define FEC_R_DES_ACTIVE	0x010 /* Receive descriptor reg */
++#define FEC_X_DES_ACTIVE	0x014 /* Transmit descriptor reg */
++#define FEC_MII_DATA		0x040 /* MII manage frame reg */
++#define FEC_MII_SPEED		0x044 /* MII speed control reg */
++#define FEC_R_BOUND		0x08c /* FIFO receive bound reg */
++#define FEC_R_FSTART		0x090 /* FIFO receive start reg */
++#define FEC_X_WMRK		0x0a4 /* FIFO transmit water mark */
++#define FEC_X_FSTART		0x0ac /* FIFO transmit start reg */
++#define FEC_R_CNTRL		0x104 /* Receive control reg */
++#define FEC_MAX_FRM_LEN		0x108 /* Maximum frame length reg */
++#define FEC_X_CNTRL		0x144 /* Transmit Control reg */
++#define FEC_ADDR_LOW		0x3c0 /* Low 32bits MAC address */
++#define FEC_ADDR_HIGH		0x3c4 /* High 16bits MAC address */
++#define FEC_GRP_HASH_TABLE_HIGH	0x3c8 /* High 32bits hash table */
++#define FEC_GRP_HASH_TABLE_LOW	0x3cc /* Low 32bits hash table */
++#define FEC_R_DES_START		0x3d0 /* Receive descriptor ring */
++#define FEC_X_DES_START		0x3d4 /* Transmit descriptor ring */
++#define FEC_R_BUFF_SIZE		0x3d8 /* Maximum receive buff size */
++#define FEC_FIFO_RAM		0x400 /* FIFO RAM buffer */
++
++#endif /* CONFIG_M5272 */
++
++
++/*
++ *	Define the buffer descriptor structure.
++ */
++#if defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28)
++struct bufdesc {
++	unsigned short cbd_datlen;	/* Data length */
++	unsigned short cbd_sc;	/* Control and status info */
++	unsigned long cbd_bufaddr;	/* Buffer address */
++};
++#else
++struct bufdesc {
++	unsigned short	cbd_sc;			/* Control and status info */
++	unsigned short	cbd_datlen;		/* Data length */
++	unsigned long	cbd_bufaddr;		/* Buffer address */
++};
++#endif
++
++/*
++ *	The following definitions courtesy of commproc.h, which where
++ *	Copyright (c) 1997 Dan Malek (dmalek@jlc.net).
++ */
++#define BD_SC_EMPTY     ((ushort)0x8000)        /* Receive is empty */
++#define BD_SC_READY     ((ushort)0x8000)        /* Transmit is ready */
++#define BD_SC_WRAP      ((ushort)0x2000)        /* Last buffer descriptor */
++#define BD_SC_INTRPT    ((ushort)0x1000)        /* Interrupt on change */
++#define BD_SC_CM        ((ushort)0x0200)        /* Continuous mode */
++#define BD_SC_ID        ((ushort)0x0100)        /* Rec'd too many idles */
++#define BD_SC_P         ((ushort)0x0100)        /* xmt preamble */
++#define BD_SC_BR        ((ushort)0x0020)        /* Break received */
++#define BD_SC_FR        ((ushort)0x0010)        /* Framing error */
++#define BD_SC_PR        ((ushort)0x0008)        /* Parity error */
++#define BD_SC_OV        ((ushort)0x0002)        /* Overrun */
++#define BD_SC_CD        ((ushort)0x0001)        /* ?? */
++
++/* Buffer descriptor control/status used by Ethernet receive.
++*/
++#define BD_ENET_RX_EMPTY        ((ushort)0x8000)
++#define BD_ENET_RX_WRAP         ((ushort)0x2000)
++#define BD_ENET_RX_INTR         ((ushort)0x1000)
++#define BD_ENET_RX_LAST         ((ushort)0x0800)
++#define BD_ENET_RX_FIRST        ((ushort)0x0400)
++#define BD_ENET_RX_MISS         ((ushort)0x0100)
++#define BD_ENET_RX_LG           ((ushort)0x0020)
++#define BD_ENET_RX_NO           ((ushort)0x0010)
++#define BD_ENET_RX_SH           ((ushort)0x0008)
++#define BD_ENET_RX_CR           ((ushort)0x0004)
++#define BD_ENET_RX_OV           ((ushort)0x0002)
++#define BD_ENET_RX_CL           ((ushort)0x0001)
++#define BD_ENET_RX_STATS        ((ushort)0x013f)        /* All status bits */
++
++/* Buffer descriptor control/status used by Ethernet transmit.
++*/
++#define BD_ENET_TX_READY        ((ushort)0x8000)
++#define BD_ENET_TX_PAD          ((ushort)0x4000)
++#define BD_ENET_TX_WRAP         ((ushort)0x2000)
++#define BD_ENET_TX_INTR         ((ushort)0x1000)
++#define BD_ENET_TX_LAST         ((ushort)0x0800)
++#define BD_ENET_TX_TC           ((ushort)0x0400)
++#define BD_ENET_TX_DEF          ((ushort)0x0200)
++#define BD_ENET_TX_HB           ((ushort)0x0100)
++#define BD_ENET_TX_LC           ((ushort)0x0080)
++#define BD_ENET_TX_RL           ((ushort)0x0040)
++#define BD_ENET_TX_RCMASK       ((ushort)0x003c)
++#define BD_ENET_TX_UN           ((ushort)0x0002)
++#define BD_ENET_TX_CSL          ((ushort)0x0001)
++#define BD_ENET_TX_STATS        ((ushort)0x03ff)        /* All status bits */
++
++
++/****************************************************************************/
++#endif /* RT_FEC_H */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.h	2021-04-07 16:01:27.768633393 +0800
+@@ -0,0 +1,81 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_MANAGE_H_
++#define _E1000_MANAGE_H_
++
++bool e1000_check_mng_mode_generic(struct e1000_hw *hw);
++bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw);
++s32  e1000_mng_enable_host_if_generic(struct e1000_hw *hw);
++s32  e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer,
++                                     u16 length, u16 offset, u8 *sum);
++s32  e1000_mng_write_cmd_header_generic(struct e1000_hw *hw,
++                                    struct e1000_host_mng_command_header *hdr);
++s32  e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw,
++                                       u8 *buffer, u16 length);
++
++typedef enum {
++	e1000_mng_mode_none = 0,
++	e1000_mng_mode_asf,
++	e1000_mng_mode_pt,
++	e1000_mng_mode_ipmi,
++	e1000_mng_mode_host_if_only
++} e1000_mng_mode;
++
++#define E1000_FACTPS_MNGCG    0x20000000
++
++#define E1000_FWSM_MODE_MASK  0xE
++#define E1000_FWSM_MODE_SHIFT 1
++
++#define E1000_MNG_IAMT_MODE                  0x3
++#define E1000_MNG_DHCP_COOKIE_LENGTH         0x10
++#define E1000_MNG_DHCP_COOKIE_OFFSET         0x6F0
++#define E1000_MNG_DHCP_COMMAND_TIMEOUT       10
++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD        64
++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1
++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN    0x2
++
++#define E1000_VFTA_ENTRY_SHIFT               5
++#define E1000_VFTA_ENTRY_MASK                0x7F
++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK      0x1F
++
++#define E1000_HI_MAX_BLOCK_BYTE_LENGTH       1792 /* Num of bytes in range */
++#define E1000_HI_MAX_BLOCK_DWORD_LENGTH      448 /* Num of dwords in range */
++#define E1000_HI_COMMAND_TIMEOUT             500 /* Process HI command limit */
++
++#define E1000_HICR_EN              0x01  /* Enable bit - RO */
++/* Driver sets this bit when done to put command in RAM */
++#define E1000_HICR_C               0x02
++#define E1000_HICR_SV              0x04  /* Status Validity */
++#define E1000_HICR_FW_RESET_ENABLE 0x40
++#define E1000_HICR_FW_RESET        0x80
++
++/* Intel(R) Active Management Technology signature */
++#define E1000_IAMT_SIGNATURE  0x544D4149
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_regs.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_regs.h	2021-04-07 16:01:27.763633400 +0800
+@@ -0,0 +1,307 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_REGS_H_
++#define _E1000_REGS_H_
++
++#define E1000_CTRL     0x00000  /* Device Control - RW */
++#define E1000_CTRL_DUP 0x00004  /* Device Control Duplicate (Shadow) - RW */
++#define E1000_STATUS   0x00008  /* Device Status - RO */
++#define E1000_EECD     0x00010  /* EEPROM/Flash Control - RW */
++#define E1000_EERD     0x00014  /* EEPROM Read - RW */
++#define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
++#define E1000_FLA      0x0001C  /* Flash Access - RW */
++#define E1000_MDIC     0x00020  /* MDI Control - RW */
++#define E1000_SCTL     0x00024  /* SerDes Control - RW */
++#define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
++#define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
++#define E1000_FEXTNVM  0x00028  /* Future Extended NVM - RW */
++#define E1000_FCT      0x00030  /* Flow Control Type - RW */
++#define E1000_CONNSW   0x00034  /* Copper/Fiber switch control - RW */
++#define E1000_VET      0x00038  /* VLAN Ether Type - RW */
++#define E1000_ICR      0x000C0  /* Interrupt Cause Read - R/clr */
++#define E1000_ITR      0x000C4  /* Interrupt Throttling Rate - RW */
++#define E1000_ICS      0x000C8  /* Interrupt Cause Set - WO */
++#define E1000_IMS      0x000D0  /* Interrupt Mask Set - RW */
++#define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
++#define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
++#define E1000_RCTL     0x00100  /* Rx Control - RW */
++#define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
++#define E1000_TXCW     0x00178  /* Tx Configuration Word - RW */
++#define E1000_RXCW     0x00180  /* Rx Configuration Word - RO */
++#define E1000_EICR     0x01580  /* Ext. Interrupt Cause Read - R/clr */
++#define E1000_EITR(_n) (0x01680 + (0x4 * (_n)))
++#define E1000_EICS     0x01520  /* Ext. Interrupt Cause Set - W0 */
++#define E1000_EIMS     0x01524  /* Ext. Interrupt Mask Set/Read - RW */
++#define E1000_EIMC     0x01528  /* Ext. Interrupt Mask Clear - WO */
++#define E1000_EIAC     0x0152C  /* Ext. Interrupt Auto Clear - RW */
++#define E1000_EIAM     0x01530  /* Ext. Interrupt Ack Auto Clear Mask - RW */
++#define E1000_TCTL     0x00400  /* Tx Control - RW */
++#define E1000_TCTL_EXT 0x00404  /* Extended Tx Control - RW */
++#define E1000_TIPG     0x00410  /* Tx Inter-packet gap -RW */
++#define E1000_TBT      0x00448  /* Tx Burst Timer - RW */
++#define E1000_AIT      0x00458  /* Adaptive Interframe Spacing Throttle - RW */
++#define E1000_LEDCTL   0x00E00  /* LED Control - RW */
++#define E1000_EXTCNF_CTRL  0x00F00  /* Extended Configuration Control */
++#define E1000_EXTCNF_SIZE  0x00F08  /* Extended Configuration Size */
++#define E1000_PHY_CTRL     0x00F10  /* PHY Control Register in CSR */
++#define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
++#define E1000_PBS      0x01008  /* Packet Buffer Size */
++#define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
++#define E1000_EEARBC   0x01024  /* EEPROM Auto Read Bus Control */
++#define E1000_FLASHT   0x01028  /* FLASH Timer Register */
++#define E1000_EEWR     0x0102C  /* EEPROM Write Register - RW */
++#define E1000_FLSWCTL  0x01030  /* FLASH control register */
++#define E1000_FLSWDATA 0x01034  /* FLASH data register */
++#define E1000_FLSWCNT  0x01038  /* FLASH Access Counter */
++#define E1000_FLOP     0x0103C  /* FLASH Opcode Register */
++#define E1000_I2CCMD   0x01028  /* SFPI2C Command Register - RW */
++#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */
++#define E1000_WDSTP    0x01040  /* Watchdog Setup - RW */
++#define E1000_SWDSTS   0x01044  /* SW Device Status - RW */
++#define E1000_FRTIMER  0x01048  /* Free Running Timer - RW */
++#define E1000_TCPTIMER 0x0104C  /* TCP Timer - RW */
++#define E1000_ERT      0x02008  /* Early Rx Threshold - RW */
++#define E1000_FCRTL    0x02160  /* Flow Control Receive Threshold Low - RW */
++#define E1000_FCRTH    0x02168  /* Flow Control Receive Threshold High - RW */
++#define E1000_PSRCTL   0x02170  /* Packet Split Receive Control - RW */
++#define E1000_RDFPCQ(_n)  (0x02430 + (0x4 * (_n)))
++#define E1000_PBRTH    0x02458  /* PB Rx Arbitration Threshold - RW */
++#define E1000_FCRTV    0x02460  /* Flow Control Refresh Timer Value - RW */
++/* Split and Replication Rx Control - RW */
++#define E1000_RDPUMB   0x025CC  /* DMA Rx Descriptor uC Mailbox - RW */
++#define E1000_RDPUAD   0x025D0  /* DMA Rx Descriptor uC Addr Command - RW */
++#define E1000_RDPUWD   0x025D4  /* DMA Rx Descriptor uC Data Write - RW */
++#define E1000_RDPURD   0x025D8  /* DMA Rx Descriptor uC Data Read - RW */
++#define E1000_RDPUCTL  0x025DC  /* DMA Rx Descriptor uC Control - RW */
++#define E1000_RDTR     0x02820  /* Rx Delay Timer - RW */
++#define E1000_RADV     0x0282C  /* Rx Interrupt Absolute Delay Timer - RW */
++/*
++ * Convenience macros
++ *
++ * Note: "_n" is the queue number of the register to be written to.
++ *
++ * Example usage:
++ * E1000_RDBAL_REG(current_rx_queue)
++ */
++#define E1000_RDBAL(_n)   ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : (0x0C000 + ((_n) * 0x40)))
++#define E1000_RDBAH(_n)   ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : (0x0C004 + ((_n) * 0x40)))
++#define E1000_RDLEN(_n)   ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : (0x0C008 + ((_n) * 0x40)))
++#define E1000_SRRCTL(_n)  ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : (0x0C00C + ((_n) * 0x40)))
++#define E1000_RDH(_n)     ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : (0x0C010 + ((_n) * 0x40)))
++#define E1000_RDT(_n)     ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : (0x0C018 + ((_n) * 0x40)))
++#define E1000_RXDCTL(_n)  ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : (0x0C028 + ((_n) * 0x40)))
++#define E1000_TDBAL(_n)   ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : (0x0E000 + ((_n) * 0x40)))
++#define E1000_TDBAH(_n)   ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : (0x0E004 + ((_n) * 0x40)))
++#define E1000_TDLEN(_n)   ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : (0x0E008 + ((_n) * 0x40)))
++#define E1000_TDH(_n)     ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : (0x0E010 + ((_n) * 0x40)))
++#define E1000_TDT(_n)     ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : (0x0E018 + ((_n) * 0x40)))
++#define E1000_TXDCTL(_n)  ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : (0x0E028 + ((_n) * 0x40)))
++#define E1000_TARC(_n)    (0x03840 + (_n << 8))
++#define E1000_DCA_TXCTRL(_n) (0x03814 + (_n << 8))
++#define E1000_DCA_RXCTRL(_n) (0x02814 + (_n << 8))
++#define E1000_TDWBAL(_n)  ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : (0x0E038 + ((_n) * 0x40)))
++#define E1000_TDWBAH(_n)  ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : (0x0E03C + ((_n) * 0x40)))
++#define E1000_RSRPD    0x02C00  /* Rx Small Packet Detect - RW */
++#define E1000_RAID     0x02C08  /* Receive Ack Interrupt Delay - RW */
++#define E1000_TXDMAC   0x03000  /* Tx DMA Control - RW */
++#define E1000_KABGTXD  0x03004  /* AFE Band Gap Transmit Ref Data */
++#define E1000_PSRTYPE(_i)       (0x05480 + ((_i) * 4))
++#define E1000_RAL(_i)           (0x05400 + ((_i) * 8))
++#define E1000_RAH(_i)           (0x05404 + ((_i) * 8))
++#define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
++#define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
++#define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
++#define E1000_FFMT_REG(_i)      (0x09000 + ((_i) * 8))
++#define E1000_FFVT_REG(_i)      (0x09800 + ((_i) * 8))
++#define E1000_FFLT_REG(_i)      (0x05F00 + ((_i) * 8))
++#define E1000_TDFH     0x03410  /* Tx Data FIFO Head - RW */
++#define E1000_TDFT     0x03418  /* Tx Data FIFO Tail - RW */
++#define E1000_TDFHS    0x03420  /* Tx Data FIFO Head Saved - RW */
++#define E1000_TDFTS    0x03428  /* Tx Data FIFO Tail Saved - RW */
++#define E1000_TDFPC    0x03430  /* Tx Data FIFO Packet Count - RW */
++#define E1000_TDPUMB   0x0357C  /* DMA Tx Descriptor uC Mail Box - RW */
++#define E1000_TDPUAD   0x03580  /* DMA Tx Descriptor uC Addr Command - RW */
++#define E1000_TDPUWD   0x03584  /* DMA Tx Descriptor uC Data Write - RW */
++#define E1000_TDPURD   0x03588  /* DMA Tx Descriptor uC Data  Read  - RW */
++#define E1000_TDPUCTL  0x0358C  /* DMA Tx Descriptor uC Control - RW */
++#define E1000_DTXCTL   0x03590  /* DMA Tx Control - RW */
++#define E1000_TIDV     0x03820  /* Tx Interrupt Delay Value - RW */
++#define E1000_TADV     0x0382C  /* Tx Interrupt Absolute Delay Val - RW */
++#define E1000_TSPMT    0x03830  /* TCP Segmentation PAD & Min Threshold - RW */
++#define E1000_CRCERRS  0x04000  /* CRC Error Count - R/clr */
++#define E1000_ALGNERRC 0x04004  /* Alignment Error Count - R/clr */
++#define E1000_SYMERRS  0x04008  /* Symbol Error Count - R/clr */
++#define E1000_RXERRC   0x0400C  /* Receive Error Count - R/clr */
++#define E1000_MPC      0x04010  /* Missed Packet Count - R/clr */
++#define E1000_SCC      0x04014  /* Single Collision Count - R/clr */
++#define E1000_ECOL     0x04018  /* Excessive Collision Count - R/clr */
++#define E1000_MCC      0x0401C  /* Multiple Collision Count - R/clr */
++#define E1000_LATECOL  0x04020  /* Late Collision Count - R/clr */
++#define E1000_COLC     0x04028  /* Collision Count - R/clr */
++#define E1000_DC       0x04030  /* Defer Count - R/clr */
++#define E1000_TNCRS    0x04034  /* Tx-No CRS - R/clr */
++#define E1000_SEC      0x04038  /* Sequence Error Count - R/clr */
++#define E1000_CEXTERR  0x0403C  /* Carrier Extension Error Count - R/clr */
++#define E1000_RLEC     0x04040  /* Receive Length Error Count - R/clr */
++#define E1000_XONRXC   0x04048  /* XON Rx Count - R/clr */
++#define E1000_XONTXC   0x0404C  /* XON Tx Count - R/clr */
++#define E1000_XOFFRXC  0x04050  /* XOFF Rx Count - R/clr */
++#define E1000_XOFFTXC  0x04054  /* XOFF Tx Count - R/clr */
++#define E1000_FCRUC    0x04058  /* Flow Control Rx Unsupported Count- R/clr */
++#define E1000_PRC64    0x0405C  /* Packets Rx (64 bytes) - R/clr */
++#define E1000_PRC127   0x04060  /* Packets Rx (65-127 bytes) - R/clr */
++#define E1000_PRC255   0x04064  /* Packets Rx (128-255 bytes) - R/clr */
++#define E1000_PRC511   0x04068  /* Packets Rx (255-511 bytes) - R/clr */
++#define E1000_PRC1023  0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
++#define E1000_PRC1522  0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
++#define E1000_GPRC     0x04074  /* Good Packets Rx Count - R/clr */
++#define E1000_BPRC     0x04078  /* Broadcast Packets Rx Count - R/clr */
++#define E1000_MPRC     0x0407C  /* Multicast Packets Rx Count - R/clr */
++#define E1000_GPTC     0x04080  /* Good Packets Tx Count - R/clr */
++#define E1000_GORCL    0x04088  /* Good Octets Rx Count Low - R/clr */
++#define E1000_GORCH    0x0408C  /* Good Octets Rx Count High - R/clr */
++#define E1000_GOTCL    0x04090  /* Good Octets Tx Count Low - R/clr */
++#define E1000_GOTCH    0x04094  /* Good Octets Tx Count High - R/clr */
++#define E1000_RNBC     0x040A0  /* Rx No Buffers Count - R/clr */
++#define E1000_RUC      0x040A4  /* Rx Undersize Count - R/clr */
++#define E1000_RFC      0x040A8  /* Rx Fragment Count - R/clr */
++#define E1000_ROC      0x040AC  /* Rx Oversize Count - R/clr */
++#define E1000_RJC      0x040B0  /* Rx Jabber Count - R/clr */
++#define E1000_MGTPRC   0x040B4  /* Management Packets Rx Count - R/clr */
++#define E1000_MGTPDC   0x040B8  /* Management Packets Dropped Count - R/clr */
++#define E1000_MGTPTC   0x040BC  /* Management Packets Tx Count - R/clr */
++#define E1000_TORL     0x040C0  /* Total Octets Rx Low - R/clr */
++#define E1000_TORH     0x040C4  /* Total Octets Rx High - R/clr */
++#define E1000_TOTL     0x040C8  /* Total Octets Tx Low - R/clr */
++#define E1000_TOTH     0x040CC  /* Total Octets Tx High - R/clr */
++#define E1000_TPR      0x040D0  /* Total Packets Rx - R/clr */
++#define E1000_TPT      0x040D4  /* Total Packets Tx - R/clr */
++#define E1000_PTC64    0x040D8  /* Packets Tx (64 bytes) - R/clr */
++#define E1000_PTC127   0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
++#define E1000_PTC255   0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
++#define E1000_PTC511   0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
++#define E1000_PTC1023  0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
++#define E1000_PTC1522  0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
++#define E1000_MPTC     0x040F0  /* Multicast Packets Tx Count - R/clr */
++#define E1000_BPTC     0x040F4  /* Broadcast Packets Tx Count - R/clr */
++#define E1000_TSCTC    0x040F8  /* TCP Segmentation Context Tx - R/clr */
++#define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context Tx Fail - R/clr */
++#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
++#define E1000_ICRXPTC  0x04104  /* Interrupt Cause Rx Packet Timer Expire Count */
++#define E1000_ICRXATC  0x04108  /* Interrupt Cause Rx Absolute Timer Expire Count */
++#define E1000_ICTXPTC  0x0410C  /* Interrupt Cause Tx Packet Timer Expire Count */
++#define E1000_ICTXATC  0x04110  /* Interrupt Cause Tx Absolute Timer Expire Count */
++#define E1000_ICTXQEC  0x04118  /* Interrupt Cause Tx Queue Empty Count */
++#define E1000_ICTXQMTC 0x0411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
++#define E1000_ICRXDMTC 0x04120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
++#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
++
++#define E1000_PCS_CFG0    0x04200  /* PCS Configuration 0 - RW */
++#define E1000_PCS_LCTL    0x04208  /* PCS Link Control - RW */
++#define E1000_PCS_LSTAT   0x0420C  /* PCS Link Status - RO */
++#define E1000_CBTMPC      0x0402C  /* Circuit Breaker Tx Packet Count */
++#define E1000_HTDPMC      0x0403C  /* Host Transmit Discarded Packets */
++#define E1000_CBRDPC      0x04044  /* Circuit Breaker Rx Dropped Count */
++#define E1000_CBRMPC      0x040FC  /* Circuit Breaker Rx Packet Count */
++#define E1000_RPTHC       0x04104  /* Rx Packets To Host */
++#define E1000_HGPTC       0x04118  /* Host Good Packets Tx Count */
++#define E1000_HTCBDPC     0x04124  /* Host Tx Circuit Breaker Dropped Count */
++#define E1000_HGORCL      0x04128  /* Host Good Octets Received Count Low */
++#define E1000_HGORCH      0x0412C  /* Host Good Octets Received Count High */
++#define E1000_HGOTCL      0x04130  /* Host Good Octets Transmit Count Low */
++#define E1000_HGOTCH      0x04134  /* Host Good Octets Transmit Count High */
++#define E1000_LENERRS     0x04138  /* Length Errors Count */
++#define E1000_SCVPC       0x04228  /* SerDes/SGMII Code Violation Pkt Count */
++#define E1000_HRMPC       0x0A018  /* Header Redirection Missed Packet Count */
++#define E1000_PCS_ANADV   0x04218  /* AN advertisement - RW */
++#define E1000_PCS_LPAB    0x0421C  /* Link Partner Ability - RW */
++#define E1000_PCS_NPTX    0x04220  /* AN Next Page Transmit - RW */
++#define E1000_PCS_LPABNP  0x04224  /* Link Partner Ability Next Page - RW */
++#define E1000_1GSTAT_RCV  0x04228  /* 1GSTAT Code Violation Packet Count - RW */
++#define E1000_RXCSUM   0x05000  /* Rx Checksum Control - RW */
++#define E1000_RLPML    0x05004  /* Rx Long Packet Max Length */
++#define E1000_RFCTL    0x05008  /* Receive Filter Control*/
++#define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
++#define E1000_RA       0x05400  /* Receive Address - RW Array */
++#define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
++#define E1000_VMD_CTL  0x0581C  /* VMDq Control - RW */
++#define E1000_VFQA0    0x0B000  /* VLAN Filter Queue Array 0 - RW Array */
++#define E1000_VFQA1    0x0B200  /* VLAN Filter Queue Array 1 - RW Array */
++#define E1000_WUC      0x05800  /* Wakeup Control - RW */
++#define E1000_WUFC     0x05808  /* Wakeup Filter Control - RW */
++#define E1000_WUS      0x05810  /* Wakeup Status - RO */
++#define E1000_MANC     0x05820  /* Management Control - RW */
++#define E1000_IPAV     0x05838  /* IP Address Valid - RW */
++#define E1000_IP4AT    0x05840  /* IPv4 Address Table - RW Array */
++#define E1000_IP6AT    0x05880  /* IPv6 Address Table - RW Array */
++#define E1000_WUPL     0x05900  /* Wakeup Packet Length - RW */
++#define E1000_WUPM     0x05A00  /* Wakeup Packet Memory - RO A */
++#define E1000_PBACL    0x05B68  /* MSIx PBA Clear - Read/Write 1's to clear */
++#define E1000_FFLT     0x05F00  /* Flexible Filter Length Table - RW Array */
++#define E1000_HOST_IF  0x08800  /* Host Interface */
++#define E1000_FFMT     0x09000  /* Flexible Filter Mask Table - RW Array */
++#define E1000_FFVT     0x09800  /* Flexible Filter Value Table - RW Array */
++
++#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */
++#define E1000_MDPHYA      0x0003C /* PHY address - RW */
++#define E1000_MANC2H      0x05860 /* Management Control To Host - RW */
++#define E1000_SW_FW_SYNC  0x05B5C /* Software-Firmware Synchronization - RW */
++#define E1000_CCMCTL      0x05B48 /* CCM Control Register */
++#define E1000_GIOCTL      0x05B44 /* GIO Analog Control Register */
++#define E1000_SCCTL       0x05B4C /* PCIc PLL Configuration Register */
++#define E1000_GCR         0x05B00 /* PCI-Ex Control */
++#define E1000_GSCL_1    0x05B10 /* PCI-Ex Statistic Control #1 */
++#define E1000_GSCL_2    0x05B14 /* PCI-Ex Statistic Control #2 */
++#define E1000_GSCL_3    0x05B18 /* PCI-Ex Statistic Control #3 */
++#define E1000_GSCL_4    0x05B1C /* PCI-Ex Statistic Control #4 */
++#define E1000_FACTPS    0x05B30 /* Function Active and Power State to MNG */
++#define E1000_SWSM      0x05B50 /* SW Semaphore */
++#define E1000_FWSM      0x05B54 /* FW Semaphore */
++#define E1000_DCA_ID    0x05B70 /* DCA Requester ID Information - RO */
++#define E1000_DCA_CTRL  0x05B74 /* DCA Control - RW */
++#define E1000_FFLT_DBG  0x05F04 /* Debug Register */
++#define E1000_HICR      0x08F00 /* Host Inteface Control */
++
++/* RSS registers */
++#define E1000_CPUVEC    0x02C10 /* CPU Vector Register - RW */
++#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
++#define E1000_IMIR(_i)      (0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
++#define E1000_IMIREXT(_i)   (0x05AA0 + ((_i) * 4))  /* Immediate Interrupt Ext*/
++#define E1000_IMIRVP    0x05AC0 /* Immediate Interrupt Rx VLAN Priority - RW */
++#define E1000_MSIXBM(_i)    (0x01600 + ((_i) * 4)) /* MSI-X Allocation Register (_i) - RW */
++#define E1000_MSIXTADD(_i)  (0x0C000 + ((_i) * 0x10)) /* MSI-X Table entry addr low reg 0 - RW */
++#define E1000_MSIXTUADD(_i) (0x0C004 + ((_i) * 0x10)) /* MSI-X Table entry addr upper reg 0 - RW */
++#define E1000_MSIXTMSG(_i)  (0x0C008 + ((_i) * 0x10)) /* MSI-X Table entry message reg 0 - RW */
++#define E1000_MSIXVCTRL(_i) (0x0C00C + ((_i) * 0x10)) /* MSI-X Table entry vector ctrl reg 0 - RW */
++#define E1000_MSIXPBA    0x0E000 /* MSI-X Pending bit array */
++#define E1000_RETA(_i)  (0x05C00 + ((_i) * 4)) /* Redirection Table - RW Array */
++#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW Array */
++#define E1000_RSSIM     0x05864 /* RSS Interrupt Mask */
++#define E1000_RSSIR     0x05868 /* RSS Interrupt Request */
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.h	2021-04-07 16:01:27.758633407 +0800
+@@ -0,0 +1,84 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_82541_H_
++#define _E1000_82541_H_
++
++#define NVM_WORD_SIZE_BASE_SHIFT_82541 (NVM_WORD_SIZE_BASE_SHIFT + 1)
++
++#define IGP01E1000_PHY_CHANNEL_NUM                    4
++
++#define IGP01E1000_PHY_AGC_A                     0x1172
++#define IGP01E1000_PHY_AGC_B                     0x1272
++#define IGP01E1000_PHY_AGC_C                     0x1472
++#define IGP01E1000_PHY_AGC_D                     0x1872
++
++#define IGP01E1000_PHY_AGC_PARAM_A               0x1171
++#define IGP01E1000_PHY_AGC_PARAM_B               0x1271
++#define IGP01E1000_PHY_AGC_PARAM_C               0x1471
++#define IGP01E1000_PHY_AGC_PARAM_D               0x1871
++
++#define IGP01E1000_PHY_EDAC_MU_INDEX             0xC000
++#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS      0x8000
++
++#define IGP01E1000_PHY_DSP_RESET                 0x1F33
++
++#define IGP01E1000_PHY_DSP_FFE                   0x1F35
++#define IGP01E1000_PHY_DSP_FFE_CM_CP             0x0069
++#define IGP01E1000_PHY_DSP_FFE_DEFAULT           0x002A
++
++#define IGP01E1000_IEEE_FORCE_GIG                0x0140
++#define IGP01E1000_IEEE_RESTART_AUTONEG          0x3300
++
++#define IGP01E1000_AGC_LENGTH_SHIFT                   7
++#define IGP01E1000_AGC_RANGE                         10
++
++#define FFE_IDLE_ERR_COUNT_TIMEOUT_20                20
++#define FFE_IDLE_ERR_COUNT_TIMEOUT_100              100
++
++#define IGP01E1000_ANALOG_FUSE_STATUS            0x20D0
++#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS      0x20D1
++#define IGP01E1000_ANALOG_FUSE_CONTROL           0x20DC
++#define IGP01E1000_ANALOG_FUSE_BYPASS            0x20DE
++
++#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED     0x0100
++#define IGP01E1000_ANALOG_FUSE_FINE_MASK         0x0F80
++#define IGP01E1000_ANALOG_FUSE_COARSE_MASK       0x0070
++#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH     0x0040
++#define IGP01E1000_ANALOG_FUSE_COARSE_10         0x0010
++#define IGP01E1000_ANALOG_FUSE_FINE_1            0x0080
++#define IGP01E1000_ANALOG_FUSE_FINE_10           0x0500
++#define IGP01E1000_ANALOG_FUSE_POLY_MASK         0xF000
++#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL 0x0002
++
++#define IGP01E1000_MSE_CHANNEL_D                 0x000F
++#define IGP01E1000_MSE_CHANNEL_C                 0x00F0
++#define IGP01E1000_MSE_CHANNEL_B                 0x0F00
++#define IGP01E1000_MSE_CHANNEL_A                 0xF000
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.h	2021-04-07 16:01:27.753633415 +0800
+@@ -0,0 +1,95 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_80003ES2LAN_H_
++#define _E1000_80003ES2LAN_H_
++
++#define E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL       0x00
++#define E1000_KMRNCTRLSTA_OFFSET_INB_CTRL        0x02
++#define E1000_KMRNCTRLSTA_OFFSET_HD_CTRL         0x10
++#define E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE  0x1F
++
++#define E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS    0x0008
++#define E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS    0x0800
++#define E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING   0x0010
++
++#define E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT 0x0004
++#define E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT   0x0000
++#define E1000_KMRNCTRLSTA_OPMODE_E_IDLE          0x2000
++
++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */
++#define DEFAULT_TCTL_EXT_GCEX_80003ES2LAN        0x00010000
++
++#define DEFAULT_TIPG_IPGT_1000_80003ES2LAN       0x8
++#define DEFAULT_TIPG_IPGT_10_100_80003ES2LAN     0x9
++
++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */
++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE  0x0002 /* 1=Reversal Disabled */
++#define GG82563_PSCR_CROSSOVER_MODE_MASK        0x0060
++#define GG82563_PSCR_CROSSOVER_MODE_MDI         0x0000 /* 00=Manual MDI */
++#define GG82563_PSCR_CROSSOVER_MODE_MDIX        0x0020 /* 01=Manual MDIX */
++#define GG82563_PSCR_CROSSOVER_MODE_AUTO        0x0060 /* 11=Auto crossover */
++
++/* PHY Specific Control Register 2 (Page 0, Register 26) */
++#define GG82563_PSCR2_REVERSE_AUTO_NEG          0x2000
++                                               /* 1=Reverse Auto-Negotiation */
++
++/* MAC Specific Control Register (Page 2, Register 21) */
++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */
++#define GG82563_MSCR_TX_CLK_MASK                0x0007
++#define GG82563_MSCR_TX_CLK_10MBPS_2_5          0x0004
++#define GG82563_MSCR_TX_CLK_100MBPS_25          0x0005
++#define GG82563_MSCR_TX_CLK_1000MBPS_2_5        0x0006
++#define GG82563_MSCR_TX_CLK_1000MBPS_25         0x0007
++
++#define GG82563_MSCR_ASSERT_CRS_ON_TX           0x0010 /* 1=Assert */
++
++/* DSP Distance Register (Page 5, Register 26) */
++/*
++ * 0 = <50M
++ * 1 = 50-80M
++ * 2 = 80-100M
++ * 3 = 110-140M
++ * 4 = >140M
++ */
++#define GG82563_DSPD_CABLE_LENGTH               0x0007
++
++/* Kumeran Mode Control Register (Page 193, Register 16) */
++#define GG82563_KMCR_PASS_FALSE_CARRIER         0x0800
++
++/* Max number of times Kumeran read/write should be validated */
++#define GG82563_MAX_KMRN_RETRY                  0x5
++
++/* Power Management Control Register (Page 193, Register 20) */
++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE     0x0001
++                                          /* 1=Enable SERDES Electrical Idle */
++
++/* In-Band Control Register (Page 194, Register 18) */
++#define GG82563_ICR_DIS_PADDING                 0x0010 /* Disable Padding */
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_main.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_main.c	2021-04-07 16:01:27.749633420 +0800
+@@ -0,0 +1,5983 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/vmalloc.h>
++#include <linux/pagemap.h>
++#include <linux/netdevice.h>
++#include <linux/tcp.h>
++#include <linux/ipv6.h>
++
++
++// RTNET defines...
++#ifdef NETIF_F_TSO
++#undef NETIF_F_TSO
++#endif
++
++#ifdef NETIF_F_TSO6
++#undef NETIF_F_TSO6
++#endif
++
++#ifdef NETIF_F_HW_VLAN_TX
++#undef NETIF_F_HW_VLAN_TX
++#endif
++
++#ifdef CONFIG_E1000_NAPI
++#undef CONFIG_E1000_NAPI
++#endif
++
++#ifdef MAX_SKB_FRAGS
++#undef MAX_SKB_FRAGS
++#endif
++
++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
++#define CONFIG_E1000_DISABLE_PACKET_SPLIT
++#endif
++
++#ifdef CONFIG_E1000_MQ
++#undef CONFIG_E1000_MQ
++#endif
++
++#ifdef CONFIG_NET_POLL_CONTROLLER
++#undef CONFIG_NET_POLL_CONTROLLER
++#endif
++
++#ifdef CONFIG_PM
++#undef CONFIG_PM
++#endif
++
++#ifdef HAVE_PCI_ERS
++#error "STOP it here"
++#undef HAVE_PCI_ERS
++#endif
++
++#ifdef USE_REBOOT_NOTIFIER
++#undef USE_REBOOT_NOTIFIER
++#endif
++
++#ifdef HAVE_TX_TIMEOUT
++#undef HAVE_TX_TIMEOUT
++#endif
++
++
++#ifdef NETIF_F_TSO
++#include <net/checksum.h>
++#ifdef NETIF_F_TSO6
++#include <net/ip6_checksum.h>
++#endif
++#endif
++#ifdef SIOCGMIIPHY
++#include <linux/mii.h>
++#endif
++#ifdef SIOCETHTOOL
++#include <linux/ethtool.h>
++#endif
++#ifdef NETIF_F_HW_VLAN_TX
++#include <linux/if_vlan.h>
++#endif
++#ifdef CONFIG_E1000_MQ
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#endif
++
++#include "e1000.h"
++
++#ifdef HAVE_PCI_ERS
++#error "STOP it here"
++#endif
++
++
++
++char e1000_driver_name[MODULE_NAME_LEN] = "rt_e1000";
++static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
++
++#ifdef CONFIG_E1000_NAPI
++#define DRV_NAPI "-NAPI"
++#else
++#define DRV_NAPI
++#endif
++
++
++#define DRV_DEBUG
++
++#define DRV_HW_PERF
++
++/*
++ * Port to rtnet based on e1000 driver version 7.6.15.5 (22-Sep-2008 Mathias Koehrer)
++ *
++ * */
++
++#define DRV_VERSION "7.6.15.5" DRV_NAPI DRV_DEBUG DRV_HW_PERF " ported to RTnet"
++const char e1000_driver_version[] = DRV_VERSION;
++static const char e1000_copyright[] = "Copyright (c) 1999-2008 Intel Corporation.";
++
++// RTNET wrappers
++#define kmalloc(a,b) rtdm_malloc(a)
++#define vmalloc(a) rtdm_malloc(a)
++#define kfree(a) rtdm_free(a)
++#define vfree(a) rtdm_free(a)
++#define skb_reserve(a,b) rtskb_reserve(a,b)
++#define net_device rtnet_device
++#define sk_buff rtskb
++#define netdev_priv(a) a->priv
++// ----------------------
++
++
++
++/* e1000_pci_tbl - PCI Device ID Table
++ *
++ * Last entry must be all 0s
++ *
++ * Macro expands to...
++ *   {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
++ */
++
++#define PCI_ID_LIST_PCI  \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82542), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82543GC_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82543GC_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544EI_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544EI_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544GC_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82544GC_LOM), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EM), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545EM_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545EM_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541EI), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541ER_LOM), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EM_LOM), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP_LOM), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541EI_MOBILE), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547EI), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547EI_MOBILE), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546EB_QUAD_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82540EP_LP), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82545GM_SERDES), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82547GI), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI_MOBILE), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541ER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_FIBER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_SERDES), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82541GI_LF), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_PCIE), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_QUAD_COPPER), \
++	  INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3)
++
++#define PCI_ID_LIST_PCIE  \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_M_AMT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_AMT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_C), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IGP_M), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_COPPER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_FIBER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_COPPER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_FIBER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI_SERDES), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573E), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573E_IAMT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_COPPER_DPT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_SERDES_DPT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82573L), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_COPPER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_FIBER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES_DUAL), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_SERDES_QUAD), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82572EI), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_COPPER_SPT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_80003ES2LAN_SERDES_SPT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571EB_QUAD_COPPER_LP), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_82571PT_QUAD_COPPER), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE_GT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH8_IFE_G), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IGP_AMT), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IGP_C), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE_G), \
++	 INTEL_E1000_ETHERNET_DEVICE(E1000_DEV_ID_ICH9_IFE_GT)
++
++
++
++
++static struct pci_device_id e1000_pci_tbl[] = {
++    PCI_ID_LIST_PCI,
++    PCI_ID_LIST_PCIE,
++	/* required last entry */
++	{0,}
++};
++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
++
++static struct pci_device_id e1000_pcipure_tbl[] = {
++    PCI_ID_LIST_PCI,
++	/* required last entry */
++	{0,}
++};
++
++static struct pci_device_id e1000_pcie_tbl[] = {
++    PCI_ID_LIST_PCIE,
++	/* required last entry */
++	{0,}
++};
++
++
++
++static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring);
++static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring);
++static void e1000_free_tx_resources(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring);
++static void e1000_free_rx_resources(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring);
++
++static int e1000_init_module(void);
++static void e1000_exit_module(void);
++static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
++static void e1000_remove(struct pci_dev *pdev);
++static int e1000_alloc_queues(struct e1000_adapter *adapter);
++#ifdef CONFIG_E1000_MQ
++static void e1000_setup_queue_mapping(struct e1000_adapter *adapter);
++#endif
++static int e1000_sw_init(struct e1000_adapter *adapter);
++static int e1000_open(struct net_device *netdev);
++static int e1000_close(struct net_device *netdev);
++static void e1000_configure(struct e1000_adapter *adapter);
++static void e1000_configure_tx(struct e1000_adapter *adapter);
++static void e1000_configure_rx(struct e1000_adapter *adapter);
++static void e1000_setup_rctl(struct e1000_adapter *adapter);
++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter);
++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter);
++static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
++				struct e1000_tx_ring *tx_ring);
++static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
++				struct e1000_rx_ring *rx_ring);
++static void e1000_set_multi(struct net_device *netdev);
++static void e1000_update_phy_info_task(struct work_struct *work);
++static void e1000_watchdog_task(struct work_struct *work);
++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work);
++static int e1000_xmit_frame_ring(struct sk_buff *skb, struct net_device *netdev,
++				 struct e1000_tx_ring *tx_ring);
++static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
++#ifdef CONFIG_E1000_MQ
++static int e1000_subqueue_xmit_frame(struct sk_buff *skb,
++				     struct net_device *netdev, int queue);
++#endif
++static void e1000_phy_read_status(struct e1000_adapter *adapter);
++#if 0
++static struct net_device_stats * e1000_get_stats(struct net_device *netdev);
++static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
++static int e1000_set_mac(struct net_device *netdev, void *p);
++#endif
++static int  e1000_intr(rtdm_irq_t *irq_handle);
++static int e1000_intr_msi(rtdm_irq_t *irq_handle);
++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring);
++#ifdef CONFIG_E1000_NAPI
++static int e1000_poll(struct napi_struct *napi, int budget);
++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring,
++				    int *work_done, int work_to_do);
++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
++				       struct e1000_rx_ring *rx_ring,
++				       int *work_done, int work_to_do);
++static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
++					  struct e1000_rx_ring *rx_ring,
++					  int *work_done, int work_to_do);
++static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
++					 struct e1000_rx_ring *rx_ring,
++					 int cleaned_count);
++#else
++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring,
++					 nanosecs_abs_t *time_stamp);
++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
++				       struct e1000_rx_ring *rx_ring,
++					 nanosecs_abs_t *time_stamp);
++#endif
++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
++				   struct e1000_rx_ring *rx_ring,
++				   int cleaned_count);
++static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
++				      struct e1000_rx_ring *rx_ring,
++				      int cleaned_count);
++#if 0
++static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
++#ifdef SIOCGMIIPHY
++static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
++			   int cmd);
++static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
++static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
++static void e1000_tx_timeout(struct net_device *dev);
++#endif
++#endif
++static void e1000_reset_task(struct work_struct *work);
++static void e1000_smartspeed(struct e1000_adapter *adapter);
++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
++				       struct sk_buff *skb);
++
++#ifdef NETIF_F_HW_VLAN_TX
++static void e1000_vlan_rx_register(struct net_device *netdev,
++				   struct vlan_group *grp);
++static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
++static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
++static void e1000_restore_vlan(struct e1000_adapter *adapter);
++#endif
++
++// static int e1000_suspend(struct pci_dev *pdev, pm_message_t state);
++#ifdef CONFIG_PM
++static int e1000_resume(struct pci_dev *pdev);
++#endif
++#ifndef USE_REBOOT_NOTIFIER
++// static void e1000_shutdown(struct pci_dev *pdev);
++#else
++static int e1000_notify_reboot(struct notifier_block *, unsigned long event,
++			       void *ptr);
++static struct notifier_block e1000_notifier_reboot = {
++	.notifier_call	= e1000_notify_reboot,
++	.next		= NULL,
++	.priority	= 0
++};
++#endif
++
++#ifdef CONFIG_NET_POLL_CONTROLLER
++/* for netdump / net console */
++static void e1000_netpoll (struct net_device *netdev);
++#endif
++
++#define COPYBREAK_DEFAULT 256
++static unsigned int copybreak __read_mostly = COPYBREAK_DEFAULT;
++module_param(copybreak, uint, 0644);
++MODULE_PARM_DESC(copybreak,
++	"Maximum size of packet that is copied to a new buffer on receive");
++
++
++#ifdef HAVE_PCI_ERS
++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
++		     pci_channel_state_t state);
++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev);
++static void e1000_io_resume(struct pci_dev *pdev);
++
++static struct pci_error_handlers e1000_err_handler = {
++	.error_detected = e1000_io_error_detected,
++	.slot_reset = e1000_io_slot_reset,
++	.resume = e1000_io_resume,
++};
++#endif
++
++static struct pci_driver e1000_driver = {
++	.name     = e1000_driver_name,
++	.id_table = e1000_pci_tbl,
++	.probe    = e1000_probe,
++	.remove   = e1000_remove,
++#ifdef HAVE_PCI_ERS
++	.err_handler = &e1000_err_handler
++#endif
++};
++
++MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
++
++#define MAX_UNITS 8
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (eg. 1,0,1)");
++
++
++static int local_debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
++module_param(local_debug, int, 0);
++MODULE_PARM_DESC(local_debug, "Debug level (0=none,...,16=all)");
++
++/* The parameter 'pciif' might be used to use this driver for
++ * PCI or PCIe only NICs.
++ * This allows to reflect the situation that newer Linux kernels
++ * have two different (non real time) drivers for the e1000:
++ * e1000 for PCI only
++ * e1000e for PCIe only
++ *
++ * Using the 'pciif' parameter allows to load the driver
++ *  modprobe rt_e1000 pciif=pci
++ * to use it as PCI only
++ * and a
++ *  modprobe rt_e1000 -o rt_e1000e pciif=pcie
++ * allows to load a second instance of this driver named 'rt_e1000e'
++ *
++ * If the 'pciif' paramter is not specified, all (PCI and PCIe) e1000
++ * NICs will be used.
++ * */
++static char *pciif = "all";
++module_param(pciif, charp, 0);
++MODULE_PARM_DESC(pciif, "PCI Interface: 'all' (default), 'pci', 'pcie'");
++
++
++//#define register_netdev(a) rt_register_rtnetdev(a)
++//#define unregister_netdev(a) rt_unregister_rtnetdev(a)
++//#define free_netdev(a) rtdev_free(a)
++//#define netif_stop_queue(a) rtnetif_stop_queue(a)
++
++/**
++ * e1000_init_module - Driver Registration Routine
++ *
++ * e1000_init_module is the first routine called when the driver is
++ * loaded. All it does is register with the PCI subsystem.
++ **/
++static int __init e1000_init_module(void)
++{
++	int ret;
++    strcpy(e1000_driver_name, THIS_MODULE->name);
++	printk(KERN_INFO "%s - %s version %s (pciif: %s)\n",
++	       e1000_driver_string, e1000_driver_name, e1000_driver_version, pciif);
++
++	printk(KERN_INFO "%s\n", e1000_copyright);
++
++
++    if (0 == strcmp(pciif, "pcie"))
++    {
++	// PCIe only
++	    e1000_driver.id_table = e1000_pcie_tbl;
++    }
++    else if (0 == strcmp(pciif, "pci"))
++    {
++	// PCI only
++	    e1000_driver.id_table = e1000_pcipure_tbl;
++    }
++
++	ret = pci_register_driver(&e1000_driver);
++#ifdef USE_REBOOT_NOTIFIER
++	if (ret >= 0) {
++		register_reboot_notifier(&e1000_notifier_reboot);
++	}
++#endif
++	if (copybreak != COPYBREAK_DEFAULT) {
++		if (copybreak == 0)
++			printk(KERN_INFO "e1000: copybreak disabled\n");
++		else
++			printk(KERN_INFO "e1000: copybreak enabled for "
++			       "packets <= %u bytes\n", copybreak);
++	}
++	return ret;
++}
++
++module_init(e1000_init_module);
++
++/**
++ * e1000_exit_module - Driver Exit Cleanup Routine
++ *
++ * e1000_exit_module is called just before the driver is removed
++ * from memory.
++ **/
++static void __exit e1000_exit_module(void)
++{
++#ifdef USE_REBOOT_NOTIFIER
++	unregister_reboot_notifier(&e1000_notifier_reboot);
++#endif
++	pci_unregister_driver(&e1000_driver);
++}
++
++module_exit(e1000_exit_module);
++
++static int e1000_request_irq(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	int err = 0;
++
++	if (adapter->flags & E1000_FLAG_HAS_MSI) {
++		err = pci_enable_msi(adapter->pdev);
++		if (!err)
++			adapter->flags |= E1000_FLAG_MSI_ENABLED;
++	}
++    rt_stack_connect(netdev, &STACK_manager);
++	if (adapter->flags & E1000_FLAG_MSI_ENABLED) {
++		err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq, e1000_intr_msi,
++				  0, netdev->name, netdev);
++		if (!err) {
++			return err;
++		} else {
++			adapter->flags &= ~E1000_FLAG_MSI_ENABLED;
++			pci_disable_msi(adapter->pdev);
++		}
++	}
++	err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq,
++			       e1000_intr, RTDM_IRQTYPE_SHARED, netdev->name,
++			       netdev);
++	if (err)
++		DPRINTK(PROBE, ERR, "Unable to allocate interrupt Error: %d\n",
++			err);
++
++	return err;
++}
++
++static void e1000_free_irq(struct e1000_adapter *adapter)
++{
++	// struct net_device *netdev = adapter->netdev;
++
++	rtdm_irq_free(&adapter->irq_handle);
++
++	if (adapter->flags & E1000_FLAG_MSI_ENABLED) {
++		pci_disable_msi(adapter->pdev);
++		adapter->flags &= ~E1000_FLAG_MSI_ENABLED;
++	}
++}
++
++/**
++ * e1000_irq_disable - Mask off interrupt generation on the NIC
++ * @adapter: board private structure
++ **/
++static void e1000_irq_disable(struct e1000_adapter *adapter)
++{
++	atomic_inc(&adapter->irq_sem);
++	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	synchronize_irq(adapter->pdev->irq);
++}
++
++/**
++ * e1000_irq_enable - Enable default interrupt generation settings
++ * @adapter: board private structure
++ **/
++
++static void e1000_irq_enable(struct e1000_adapter *adapter)
++{
++	if (likely(atomic_dec_and_test(&adapter->irq_sem))) {
++		E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK);
++		E1000_WRITE_FLUSH(&adapter->hw);
++	}
++}
++#ifdef NETIF_F_HW_VLAN_TX
++
++static void e1000_update_mng_vlan(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	u16 vid = adapter->hw.mng_cookie.vlan_id;
++	u16 old_vid = adapter->mng_vlan_id;
++	if (adapter->vlgrp) {
++		if (!vlan_group_get_device(adapter->vlgrp, vid)) {
++			if (adapter->hw.mng_cookie.status &
++				E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
++				e1000_vlan_rx_add_vid(netdev, vid);
++				adapter->mng_vlan_id = vid;
++			} else {
++				adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++			}
++
++			if ((old_vid != (u16)E1000_MNG_VLAN_NONE) &&
++					(vid != old_vid) &&
++			    !vlan_group_get_device(adapter->vlgrp, old_vid))
++				e1000_vlan_rx_kill_vid(netdev, old_vid);
++		} else {
++			adapter->mng_vlan_id = vid;
++		}
++	}
++}
++#endif
++
++/**
++ * e1000_release_hw_control - release control of the h/w to f/w
++ * @adapter: address of board private structure
++ *
++ * e1000_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that the
++ * driver is no longer loaded. For AMT version (only with 82573) i
++ * of the f/w this means that the network i/f is closed.
++ *
++ **/
++static void e1000_release_hw_control(struct e1000_adapter *adapter)
++{
++	u32 ctrl_ext;
++	u32 swsm;
++
++	/* Let firmware taken over control of h/w */
++	switch (adapter->hw.mac.type) {
++	case e1000_82573:
++		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
++		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
++				swsm & ~E1000_SWSM_DRV_LOAD);
++		break;
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
++		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
++				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
++		break;
++	default:
++		break;
++	}
++}
++
++/**
++ * e1000_get_hw_control - get control of the h/w from f/w
++ * @adapter: address of board private structure
++ *
++ * e1000_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that
++ * the driver is loaded. For AMT version (only with 82573)
++ * of the f/w this means that the network i/f is open.
++ *
++ **/
++static void e1000_get_hw_control(struct e1000_adapter *adapter)
++{
++	u32 ctrl_ext;
++	u32 swsm;
++
++	/* Let firmware know the driver has taken over */
++	switch (adapter->hw.mac.type) {
++	case e1000_82573:
++		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
++		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
++				swsm | E1000_SWSM_DRV_LOAD);
++		break;
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
++		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
++				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
++		break;
++	default:
++		break;
++	}
++}
++
++static void e1000_init_manageability(struct e1000_adapter *adapter)
++{
++}
++
++static void e1000_release_manageability(struct e1000_adapter *adapter)
++{
++}
++
++/**
++ * e1000_configure - configure the hardware for RX and TX
++ * @adapter: private board structure
++ **/
++static void e1000_configure(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	int i;
++
++	e1000_set_multi(netdev);
++
++#ifdef NETIF_F_HW_VLAN_TX
++	e1000_restore_vlan(adapter);
++#endif
++	e1000_init_manageability(adapter);
++
++	e1000_configure_tx(adapter);
++	e1000_setup_rctl(adapter);
++	e1000_configure_rx(adapter);
++	/* call E1000_DESC_UNUSED which always leaves
++	 * at least 1 descriptor unused to make sure
++	 * next_to_use != next_to_clean */
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		struct e1000_rx_ring *ring = &adapter->rx_ring[i];
++		adapter->alloc_rx_buf(adapter, ring,
++				      E1000_DESC_UNUSED(ring));
++	}
++
++#ifdef CONFIG_E1000_MQ
++	e1000_setup_queue_mapping(adapter);
++#endif
++
++	// adapter->tx_queue_len = netdev->tx_queue_len;
++}
++
++static void e1000_napi_enable_all(struct e1000_adapter *adapter)
++{
++#ifdef CONFIG_E1000_NAPI
++	int i;
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		napi_enable(&adapter->rx_ring[i].napi);
++#endif
++}
++
++static void e1000_napi_disable_all(struct e1000_adapter *adapter)
++{
++#ifdef CONFIG_E1000_NAPI
++	int i;
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		napi_disable(&adapter->rx_ring[i].napi);
++#endif
++}
++
++int e1000_up(struct e1000_adapter *adapter)
++{
++	/* hardware has been reset, we need to reload some things */
++	e1000_configure(adapter);
++
++	clear_bit(__E1000_DOWN, &adapter->state);
++
++	e1000_napi_enable_all(adapter);
++
++	e1000_irq_enable(adapter);
++
++	/* fire a link change interrupt to start the watchdog */
++	// E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
++	return 0;
++}
++
++static void e1000_down_and_stop(struct e1000_adapter *adapter)
++{
++	/* signal that we're down so the interrupt handler does not
++	 * reschedule our watchdog timer */
++	set_bit(__E1000_DOWN, &adapter->state);
++
++	cancel_work_sync(&adapter->reset_task);
++	cancel_delayed_work_sync(&adapter->watchdog_task);
++	cancel_delayed_work_sync(&adapter->phy_info_task);
++	cancel_delayed_work_sync(&adapter->fifo_stall_task);
++}
++
++void e1000_down(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	u32 tctl, rctl;
++
++	e1000_down_and_stop(adapter);
++
++	/* disable receives in the hardware */
++	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
++	/* flush and sleep below */
++
++#ifdef NETIF_F_LLTX
++	rtnetif_stop_queue(netdev);
++#else
++	rtnetif_tx_disable(netdev);
++#endif
++
++	/* disable transmits in the hardware */
++	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
++	tctl &= ~E1000_TCTL_EN;
++	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
++	/* flush both disables and wait for them to finish */
++	E1000_WRITE_FLUSH(&adapter->hw);
++	msleep(10);
++
++	e1000_napi_disable_all(adapter);
++
++	e1000_irq_disable(adapter);
++
++	// netdev->tx_queue_len = adapter->tx_queue_len;
++	rtnetif_carrier_off(netdev);
++	adapter->link_speed = 0;
++	adapter->link_duplex = 0;
++
++	e1000_reset(adapter);
++	e1000_clean_all_tx_rings(adapter);
++	e1000_clean_all_rx_rings(adapter);
++}
++
++void e1000_reinit_locked(struct e1000_adapter *adapter)
++{
++	WARN_ON(in_interrupt());
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		msleep(1);
++	e1000_down(adapter);
++	e1000_up(adapter);
++	clear_bit(__E1000_RESETTING, &adapter->state);
++}
++
++void e1000_reset(struct e1000_adapter *adapter)
++{
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	struct e1000_fc_info *fc = &adapter->hw.fc;
++	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
++	bool legacy_pba_adjust = FALSE;
++	u16 hwm;
++
++	/* Repartition Pba for greater than 9k mtu
++	 * To take effect CTRL.RST is required.
++	 */
++
++	switch (mac->type) {
++	case e1000_82542:
++	case e1000_82543:
++	case e1000_82544:
++	case e1000_82540:
++	case e1000_82541:
++	case e1000_82541_rev_2:
++		legacy_pba_adjust = TRUE;
++		pba = E1000_PBA_48K;
++		break;
++	case e1000_82545:
++	case e1000_82545_rev_3:
++	case e1000_82546:
++	case e1000_82546_rev_3:
++		pba = E1000_PBA_48K;
++		break;
++	case e1000_82547:
++	case e1000_82547_rev_2:
++		legacy_pba_adjust = TRUE;
++		pba = E1000_PBA_30K;
++		break;
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++		pba = E1000_PBA_38K;
++		break;
++	case e1000_82573:
++		pba = E1000_PBA_20K;
++		break;
++	case e1000_ich8lan:
++		pba = E1000_PBA_8K;
++		break;
++	case e1000_ich9lan:
++#define E1000_PBA_10K 0x000A
++		pba = E1000_PBA_10K;
++		break;
++	case e1000_undefined:
++	case e1000_num_macs:
++		break;
++	}
++
++	if (legacy_pba_adjust == TRUE) {
++		if (adapter->max_frame_size > E1000_RXBUFFER_8192)
++			pba -= 8; /* allocate more FIFO for Tx */
++
++		if (mac->type == e1000_82547) {
++			adapter->tx_fifo_head = 0;
++			adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT;
++			adapter->tx_fifo_size =
++				(E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT;
++			atomic_set(&adapter->tx_fifo_stall, 0);
++		}
++	} else if (adapter->max_frame_size > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) {
++		/* adjust PBA for jumbo frames */
++		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
++
++		/* To maintain wire speed transmits, the Tx FIFO should be
++		 * large enough to accommodate two full transmit packets,
++		 * rounded up to the next 1KB and expressed in KB.  Likewise,
++		 * the Rx FIFO should be large enough to accommodate at least
++		 * one full receive packet and is similarly rounded up and
++		 * expressed in KB. */
++		pba = E1000_READ_REG(&adapter->hw, E1000_PBA);
++		/* upper 16 bits has Tx packet buffer allocation size in KB */
++		tx_space = pba >> 16;
++		/* lower 16 bits has Rx packet buffer allocation size in KB */
++		pba &= 0xffff;
++		/* the tx fifo also stores 16 bytes of information about the tx
++		 * but don't include ethernet FCS because hardware appends it */
++		min_tx_space = (adapter->max_frame_size +
++				sizeof(struct e1000_tx_desc) -
++				ETHERNET_FCS_SIZE) * 2;
++		min_tx_space = ALIGN(min_tx_space, 1024);
++		min_tx_space >>= 10;
++		/* software strips receive CRC, so leave room for it */
++		min_rx_space = adapter->max_frame_size;
++		min_rx_space = ALIGN(min_rx_space, 1024);
++		min_rx_space >>= 10;
++
++		/* If current Tx allocation is less than the min Tx FIFO size,
++		 * and the min Tx FIFO size is less than the current Rx FIFO
++		 * allocation, take space away from current Rx allocation */
++		if (tx_space < min_tx_space &&
++		    ((min_tx_space - tx_space) < pba)) {
++			pba = pba - (min_tx_space - tx_space);
++
++			/* PCI/PCIx hardware has PBA alignment constraints */
++			switch (mac->type) {
++			case e1000_82545 ... e1000_82546_rev_3:
++				pba &= ~(E1000_PBA_8K - 1);
++				break;
++			default:
++				break;
++			}
++
++			/* if short on rx space, rx wins and must trump tx
++			 * adjustment or use Early Receive if available */
++			if (pba < min_rx_space) {
++				switch (mac->type) {
++				case e1000_82573:
++				case e1000_ich9lan:
++					/* ERT enabled in e1000_configure_rx */
++					break;
++				default:
++					pba = min_rx_space;
++					break;
++				}
++			}
++		}
++	}
++
++	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
++
++	/* flow control settings */
++	/* The high water mark must be low enough to fit one full frame
++	 * (or the size used for early receive) above it in the Rx FIFO.
++	 * Set it to the lower of:
++	 * - 90% of the Rx FIFO size, and
++	 * - the full Rx FIFO size minus the early receive size (for parts
++	 *   with ERT support assuming ERT set to E1000_ERT_2048), or
++	 * - the full Rx FIFO size minus one full frame */
++	hwm = min(((pba << 10) * 9 / 10),
++		  ((mac->type == e1000_82573 || mac->type == e1000_ich9lan) ?
++		      (u16)((pba << 10) - (E1000_ERT_2048 << 3)) :
++		      ((pba << 10) - adapter->max_frame_size)));
++
++	fc->high_water = hwm & 0xFFF8;	/* 8-byte granularity */
++	fc->low_water = fc->high_water - 8;
++
++	if (mac->type == e1000_80003es2lan)
++		fc->pause_time = 0xFFFF;
++	else
++		fc->pause_time = E1000_FC_PAUSE_TIME;
++	fc->send_xon = 1;
++	fc->type = fc->original_type;
++
++	/* Allow time for pending master requests to run */
++	e1000_reset_hw(&adapter->hw);
++
++	/* For 82573 and ICHx if AMT is enabled, let the firmware know
++	 * that the network interface is in control */
++	if (((adapter->hw.mac.type == e1000_82573) ||
++	     (adapter->hw.mac.type == e1000_ich8lan) ||
++	     (adapter->hw.mac.type == e1000_ich9lan)) &&
++	    e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	if (mac->type >= e1000_82544)
++		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
++
++	if (e1000_init_hw(&adapter->hw))
++		DPRINTK(PROBE, ERR, "Hardware Error\n");
++#ifdef NETIF_F_HW_VLAN_TX
++	e1000_update_mng_vlan(adapter);
++#endif
++	/* if (adapter->hwflags & HWFLAGS_PHY_PWR_BIT) { */
++	if (mac->type >= e1000_82544 &&
++	    mac->type <= e1000_82547_rev_2 &&
++	    mac->autoneg == 1 &&
++	    adapter->hw.phy.autoneg_advertised == ADVERTISE_1000_FULL) {
++		u32 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++		/* clear phy power management bit if we are in gig only mode,
++		 * which if enabled will attempt negotiation to 100Mb, which
++		 * can cause a loss of link at power off or driver unload */
++		ctrl &= ~E1000_CTRL_SWDPIN3;
++		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
++	}
++
++#if defined(CONFIG_PPC64) || defined(CONFIG_PPC)
++#define E1000_GCR_DISABLE_TIMEOUT_MECHANISM 0x80000000
++	if (adapter->hw.mac.type == e1000_82571) {
++		/* work around pSeries hardware by disabling timeouts */
++		u32 gcr = E1000_READ_REG(&adapter->hw, E1000_GCR);
++		gcr |= E1000_GCR_DISABLE_TIMEOUT_MECHANISM;
++		E1000_WRITE_REG(&adapter->hw, E1000_GCR, gcr);
++	}
++#endif
++
++	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
++	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
++
++	e1000_reset_adaptive(&adapter->hw);
++	e1000_get_phy_info(&adapter->hw);
++
++	if (!(adapter->flags & E1000_FLAG_SMART_POWER_DOWN) &&
++	    (mac->type == e1000_82571 || mac->type == e1000_82572)) {
++		u16 phy_data = 0;
++		/* speed up time to link by disabling smart power down, ignore
++		 * the return value of this function because there is nothing
++		 * different we would do if it failed */
++		e1000_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
++				   &phy_data);
++		phy_data &= ~IGP02E1000_PM_SPD;
++		e1000_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
++				    phy_data);
++	}
++
++	e1000_release_manageability(adapter);
++}
++
++/**
++ * e1000_probe - Device Initialization Routine
++ * @pdev: PCI device information struct
++ * @ent: entry in e1000_pci_tbl
++ *
++ * Returns 0 on success, negative on failure
++ *
++ * e1000_probe initializes an adapter identified by a pci_dev structure.
++ * The OS initialization, configuring of the adapter private structure,
++ * and a hardware reset occur.
++ **/
++static int e1000_probe(struct pci_dev *pdev,
++				 const struct pci_device_id *ent)
++{
++	struct net_device *netdev;
++	struct e1000_adapter *adapter;
++
++	static int cards_found = 0;
++	static int global_quad_port_a = 0; /* global ksp3 port a indication */
++	int i, err, pci_using_dac;
++	u16 eeprom_data = 0;
++	u16 eeprom_apme_mask = E1000_EEPROM_APME;
++
++    if (cards[cards_found++] == 0)
++    {
++	return -ENODEV;
++    }
++
++	if ((err = pci_enable_device(pdev)))
++		return err;
++
++	if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) &&
++	    !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) {
++		pci_using_dac = 1;
++	} else {
++		if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) &&
++		    (err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK))) {
++			E1000_ERR("No usable DMA configuration, aborting\n");
++			goto err_dma;
++		}
++		pci_using_dac = 0;
++	}
++
++	if ((err = pci_request_regions(pdev, e1000_driver_name)))
++		goto err_pci_reg;
++
++	pci_set_master(pdev);
++
++	err = -ENOMEM;
++#ifdef CONFIG_E1000_MQ
++	netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter) +
++							(sizeof(struct net_device_subqueue) *
++								E1000_MAX_TX_QUEUES), 16);
++#else
++	netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter),
++				2 * E1000_DEFAULT_RXD + E1000_DEFAULT_TXD);
++#endif
++	if (!netdev)
++		goto err_alloc_etherdev;
++
++    memset(netdev->priv, 0, sizeof(struct e1000_adapter));
++    rt_rtdev_connect(netdev, &RTDEV_manager);
++
++	// SET_NETDEV_DEV(netdev, &pdev->dev);
++    netdev->vers = RTDEV_VERS_2_0;
++
++	pci_set_drvdata(pdev, netdev);
++	adapter = netdev->priv;
++	adapter->netdev = netdev;
++	adapter->pdev = pdev;
++	adapter->hw.back = adapter;
++	adapter->msg_enable = (1 << local_debug) - 1;
++
++	err = -EIO;
++	adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, BAR_0),
++				      pci_resource_len(pdev, BAR_0));
++	if (!adapter->hw.hw_addr)
++		goto err_ioremap;
++
++	for (i = BAR_1; i <= BAR_5; i++) {
++		if (pci_resource_len(pdev, i) == 0)
++			continue;
++		if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
++			adapter->hw.io_base = pci_resource_start(pdev, i);
++			break;
++		}
++	}
++
++	netdev->open = &e1000_open;
++	netdev->stop = &e1000_close;
++	netdev->hard_start_xmit = &e1000_xmit_frame;
++#ifdef CONFIG_E1000_MQ
++	netdev->hard_start_subqueue_xmit = &e1000_subqueue_xmit_frame;
++#endif
++#ifdef HAVE_TX_TIMEOUT
++	netdev->tx_timeout = &e1000_tx_timeout;
++	netdev->watchdog_timeo = 5 * HZ;
++#endif
++#ifdef NETIF_F_HW_VLAN_TX
++	netdev->vlan_rx_register = e1000_vlan_rx_register;
++	netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
++	netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid;
++#endif
++#ifdef CONFIG_NET_POLL_CONTROLLER
++	netdev->poll_controller = e1000_netpoll;
++#endif
++	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
++
++	adapter->bd_number = cards_found;
++
++	/* setup the private structure */
++	if ((err = e1000_sw_init(adapter)))
++		goto err_sw_init;
++
++	err = -EIO;
++	/* Flash BAR mapping must happen after e1000_sw_init
++	 * because it depends on mac.type */
++	if (((adapter->hw.mac.type == e1000_ich8lan) ||
++	     (adapter->hw.mac.type == e1000_ich9lan)) &&
++	   (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
++		adapter->hw.flash_address = ioremap(pci_resource_start(pdev, 1),
++						    pci_resource_len(pdev, 1));
++		if (!adapter->hw.flash_address)
++			goto err_flashmap;
++	}
++
++	if ((err = e1000_init_mac_params(&adapter->hw)))
++		goto err_hw_init;
++
++	if ((err = e1000_init_nvm_params(&adapter->hw)))
++		goto err_hw_init;
++
++	if ((err = e1000_init_phy_params(&adapter->hw)))
++		goto err_hw_init;
++
++	e1000_get_bus_info(&adapter->hw);
++
++	e1000_init_script_state_82541(&adapter->hw, TRUE);
++	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
++
++	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
++	adapter->hw.mac.adaptive_ifs = FALSE;
++
++	/* Copper options */
++
++	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++		adapter->hw.phy.mdix = AUTO_ALL_MODES;
++		adapter->hw.phy.disable_polarity_correction = FALSE;
++		adapter->hw.phy.ms_type = E1000_MASTER_SLAVE;
++	}
++
++	if (e1000_check_reset_block(&adapter->hw))
++		DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n");
++
++#ifdef MAX_SKB_FRAGS
++	if (adapter->hw.mac.type >= e1000_82543) {
++#ifdef NETIF_F_HW_VLAN_TX
++		netdev->features = NETIF_F_SG |
++				   NETIF_F_HW_CSUM |
++				   NETIF_F_HW_VLAN_TX |
++				   NETIF_F_HW_VLAN_RX |
++				   NETIF_F_HW_VLAN_FILTER;
++		if ((adapter->hw.mac.type == e1000_ich8lan) ||
++		    (adapter->hw.mac.type == e1000_ich9lan))
++			netdev->features &= ~NETIF_F_HW_VLAN_FILTER;
++#else
++		netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM;
++#endif
++	}
++
++#ifdef NETIF_F_TSO
++	if ((adapter->hw.mac.type >= e1000_82544) &&
++	   (adapter->hw.mac.type != e1000_82547)) {
++		adapter->flags |= E1000_FLAG_HAS_TSO;
++		netdev->features |= NETIF_F_TSO;
++	}
++
++#ifdef NETIF_F_TSO6
++	if (adapter->hw.mac.type > e1000_82547_rev_2) {
++		adapter->flags |= E1000_FLAG_HAS_TSO6;
++		netdev->features |= NETIF_F_TSO6;
++	}
++#endif
++#endif
++	if (pci_using_dac)
++		netdev->features |= NETIF_F_HIGHDMA;
++
++#endif
++#ifdef NETIF_F_LLTX
++	netdev->features |= NETIF_F_LLTX;
++#endif
++
++	/* Hardware features, flags and workarounds */
++	if (adapter->hw.mac.type >= e1000_82571) {
++		adapter->flags |= E1000_FLAG_INT_ASSERT_AUTO_MASK;
++		adapter->flags |= E1000_FLAG_HAS_MSI;
++		adapter->flags |= E1000_FLAG_HAS_MANC2H;
++	}
++
++	if (adapter->hw.mac.type >= e1000_82540) {
++		adapter->flags |= E1000_FLAG_HAS_SMBUS;
++		adapter->flags |= E1000_FLAG_HAS_INTR_MODERATION;
++	}
++
++	if (adapter->hw.mac.type == e1000_82543)
++		adapter->flags |= E1000_FLAG_BAD_TX_CARRIER_STATS_FD;
++
++	/* In rare occasions, ESB2 systems would end up started without
++	 * the RX unit being turned on. */
++	if (adapter->hw.mac.type == e1000_80003es2lan)
++		adapter->flags |= E1000_FLAG_RX_NEEDS_RESTART;
++
++	adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw);
++
++	/* before reading the NVM, reset the controller to
++	 * put the device in a known good starting state */
++
++	e1000_reset_hw(&adapter->hw);
++
++	/* make sure we don't intercept ARP packets until we're up */
++	e1000_release_manageability(adapter);
++
++	/* make sure the NVM is good */
++
++	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
++		DPRINTK(PROBE, ERR, "The NVM Checksum Is Not Valid\n");
++		err = -EIO;
++		goto err_eeprom;
++	}
++
++	/* copy the MAC address out of the NVM */
++
++	if (e1000_read_mac_addr(&adapter->hw))
++		DPRINTK(PROBE, ERR, "NVM Read Error\n");
++	memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
++#ifdef ETHTOOL_GPERMADDR
++	memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len);
++
++	if (!is_valid_ether_addr(netdev->perm_addr)) {
++#else
++	if (!is_valid_ether_addr(netdev->dev_addr)) {
++#endif
++		DPRINTK(PROBE, ERR, "Invalid MAC Address\n");
++		err = -EIO;
++		goto err_eeprom;
++	}
++
++	INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task);
++	INIT_DELAYED_WORK(&adapter->fifo_stall_task,
++			  e1000_82547_tx_fifo_stall_task);
++	INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task);
++	INIT_WORK(&adapter->reset_task, e1000_reset_task);
++
++	e1000_check_options(adapter);
++
++	/* Initial Wake on LAN setting
++	 * If APM wake is enabled in the EEPROM,
++	 * enable the ACPI Magic Packet filter
++	 */
++
++	switch (adapter->hw.mac.type) {
++	case e1000_82542:
++	case e1000_82543:
++		break;
++	case e1000_82544:
++		e1000_read_nvm(&adapter->hw,
++			NVM_INIT_CONTROL2_REG, 1, &eeprom_data);
++		eeprom_apme_mask = E1000_EEPROM_82544_APM;
++		break;
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		/* APME bit in EEPROM is mapped to WUC.APME */
++		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
++		eeprom_apme_mask = E1000_WUC_APME;
++		break;
++	case e1000_82546:
++	case e1000_82546_rev_3:
++	case e1000_82571:
++	case e1000_80003es2lan:
++		if (adapter->hw.bus.func == 1) {
++			e1000_read_nvm(&adapter->hw,
++				NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
++			break;
++		}
++		/* Fall Through */
++	default:
++		e1000_read_nvm(&adapter->hw,
++			NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
++		break;
++	}
++	if (eeprom_data & eeprom_apme_mask)
++		adapter->eeprom_wol |= E1000_WUFC_MAG;
++
++	/* now that we have the eeprom settings, apply the special cases
++	 * where the eeprom may be wrong or the board simply won't support
++	 * wake on lan on a particular port */
++	switch (pdev->device) {
++	case E1000_DEV_ID_82546GB_PCIE:
++	case E1000_DEV_ID_82571EB_SERDES_QUAD:
++		adapter->eeprom_wol = 0;
++		break;
++	case E1000_DEV_ID_82546EB_FIBER:
++	case E1000_DEV_ID_82546GB_FIBER:
++	case E1000_DEV_ID_82571EB_FIBER:
++		/* Wake events only supported on port A for dual fiber
++		 * regardless of eeprom setting */
++		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
++		    E1000_STATUS_FUNC_1)
++			adapter->eeprom_wol = 0;
++		break;
++	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
++	case E1000_DEV_ID_82571PT_QUAD_COPPER:
++		/* if quad port adapter, disable WoL on all but port A */
++		if (global_quad_port_a != 0)
++			adapter->eeprom_wol = 0;
++		else
++			adapter->flags |= E1000_FLAG_QUAD_PORT_A;
++		/* Reset for multiple quad port adapters */
++		if (++global_quad_port_a == 4)
++			global_quad_port_a = 0;
++		break;
++	}
++
++	/* initialize the wol settings based on the eeprom settings */
++	adapter->wol = adapter->eeprom_wol;
++
++	/* print bus type/speed/width info */
++	{
++	struct e1000_hw *hw = &adapter->hw;
++	DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ",
++		((hw->bus.type == e1000_bus_type_pcix) ? "-X" :
++		 (hw->bus.type == e1000_bus_type_pci_express ? " Express":"")),
++		((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
++		 (hw->bus.speed == e1000_bus_speed_133) ? "133MHz" :
++		 (hw->bus.speed == e1000_bus_speed_120) ? "120MHz" :
++		 (hw->bus.speed == e1000_bus_speed_100) ? "100MHz" :
++		 (hw->bus.speed == e1000_bus_speed_66) ? "66MHz" : "33MHz"),
++		((hw->bus.width == e1000_bus_width_64) ? "64-bit" :
++		 (hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
++		 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
++		 "32-bit"));
++	}
++
++	for (i = 0; i < 6; i++)
++		printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
++
++	/* reset the hardware with the new settings */
++	e1000_reset(adapter);
++
++	/* If the controller is 82573 or ICH and f/w is AMT, do not set
++	 * DRV_LOAD until the interface is up.  For all other cases,
++	 * let the f/w know that the h/w is now under the control
++	 * of the driver. */
++	if (((adapter->hw.mac.type != e1000_82573) &&
++	     (adapter->hw.mac.type != e1000_ich8lan) &&
++	     (adapter->hw.mac.type != e1000_ich9lan)) ||
++	    !e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	/* tell the stack to leave us alone until e1000_open() is called */
++	rtnetif_carrier_off(netdev);
++	rtnetif_stop_queue(netdev);
++
++	strcpy(netdev->name, "rteth%d");
++	err = rt_register_rtnetdev(netdev);
++	if (err)
++		goto err_register;
++
++	DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
++
++	cards_found++;
++	return 0;
++
++err_register:
++err_hw_init:
++	e1000_release_hw_control(adapter);
++err_eeprom:
++	if (!e1000_check_reset_block(&adapter->hw))
++		e1000_phy_hw_reset(&adapter->hw);
++
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++
++	e1000_remove_device(&adapter->hw);
++err_flashmap:
++	kfree(adapter->tx_ring);
++	kfree(adapter->rx_ring);
++err_sw_init:
++	iounmap(adapter->hw.hw_addr);
++err_ioremap:
++	rtdev_free(netdev);
++err_alloc_etherdev:
++	pci_release_regions(pdev);
++err_pci_reg:
++err_dma:
++	pci_disable_device(pdev);
++	return err;
++}
++
++/**
++ * e1000_remove - Device Removal Routine
++ * @pdev: PCI device information struct
++ *
++ * e1000_remove is called by the PCI subsystem to alert the driver
++ * that it should release a PCI device.  The could be caused by a
++ * Hot-Plug event, or because the driver is going to be removed from
++ * memory.
++ **/
++static void e1000_remove(struct pci_dev *pdev)
++{
++	struct net_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	e1000_down_and_stop(adapter);
++
++	e1000_release_manageability(adapter);
++
++	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant. */
++	e1000_release_hw_control(adapter);
++
++	rt_unregister_rtnetdev(netdev);
++
++	if (!e1000_check_reset_block(&adapter->hw))
++		e1000_phy_hw_reset(&adapter->hw);
++
++	e1000_remove_device(&adapter->hw);
++
++	kfree(adapter->tx_ring);
++	kfree(adapter->rx_ring);
++
++	iounmap(adapter->hw.hw_addr);
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++	pci_release_regions(pdev);
++
++	rtdev_free(netdev);
++
++	pci_disable_device(pdev);
++}
++
++/**
++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
++ * @adapter: board private structure to initialize
++ *
++ * e1000_sw_init initializes the Adapter private data structure.
++ * Fields are initialized based on PCI device information and
++ * OS network device settings (MTU size).
++ **/
++static int e1000_sw_init(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++#ifdef CONFIG_E1000_NAPI
++	int i;
++#endif
++
++	/* PCI config space info */
++
++	hw->vendor_id = pdev->vendor;
++	hw->device_id = pdev->device;
++	hw->subsystem_vendor_id = pdev->subsystem_vendor;
++	hw->subsystem_device_id = pdev->subsystem_device;
++
++	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
++
++	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
++
++	adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
++	adapter->rx_ps_bsize0 = E1000_RXBUFFER_128;
++	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETHERNET_FCS_SIZE;
++	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
++
++	/* Initialize the hardware-specific values */
++	if (e1000_setup_init_funcs(hw, FALSE)) {
++		DPRINTK(PROBE, ERR, "Hardware Initialization Failure\n");
++		return -EIO;
++	}
++
++#ifdef CONFIG_E1000_MQ
++	/* Number of supported queues.
++	 * TODO: It's assumed num_rx_queues >= num_tx_queues, since multi-rx
++	 * queues are much more interesting.  Is it worth coding for the
++	 * possibility (however improbable) of num_tx_queues > num_rx_queues?
++	 */
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_82573:
++	case e1000_80003es2lan:
++		adapter->num_tx_queues = 2;
++		adapter->num_rx_queues = 2;
++		break;
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		if ((adapter->hw.device_id == E1000_DEV_ID_ICH8_IGP_AMT) ||
++		    (adapter->hw.device_id == E1000_DEV_ID_ICH8_IGP_M_AMT) ||
++		    (adapter->hw.device_id == E1000_DEV_ID_ICH9_IGP_AMT)) {
++			adapter->num_tx_queues = 2;
++			adapter->num_rx_queues = 2;
++			break;
++		}
++		/* Fall through - remaining ICH SKUs do not support MQ */
++	default:
++		/* All hardware before 82571 only have 1 queue each for Rx/Tx.
++		 * However, the 82571 family does not have MSI-X, so multi-
++		 * queue isn't enabled.
++		 * It'd be wise not to mess with this default case. :) */
++		adapter->num_tx_queues = 1;
++		adapter->num_rx_queues = 1;
++		netdev->egress_subqueue_count = 0;
++		break;
++	}
++	adapter->num_rx_queues = min(adapter->num_rx_queues, num_online_cpus());
++	adapter->num_tx_queues = min(adapter->num_tx_queues, num_online_cpus());
++
++	if ((adapter->num_tx_queues > 1) || (adapter->num_rx_queues > 1)) {
++		netdev->egress_subqueue = (struct net_device_subqueue *)
++					   ((void *)adapter +
++					    sizeof(struct e1000_adapter));
++		netdev->egress_subqueue_count = adapter->num_tx_queues;
++		DPRINTK(DRV, INFO, "Multiqueue Enabled: RX queues = %u, "
++			"TX queues = %u\n", adapter->num_rx_queues,
++			adapter->num_tx_queues);
++	}
++#else
++	adapter->num_tx_queues = 1;
++	adapter->num_rx_queues = 1;
++#endif
++
++	if (e1000_alloc_queues(adapter)) {
++		DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
++		return -ENOMEM;
++	}
++
++#ifdef CONFIG_E1000_NAPI
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		struct e1000_rx_ring *rx_ring = &adapter->rx_ring[i];
++		netif_napi_add(adapter->netdev, &rx_ring->napi, e1000_poll, 64);
++	}
++	rtdm_lock_init(&adapter->tx_queue_lock);
++#ifdef CONFIG_E1000_MQ
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		rtdm_lock_init(&adapter->tx_ring[i].tx_queue_lock);
++#endif
++#endif
++
++	/* Explicitly disable IRQ since the NIC can be in any state. */
++	atomic_set(&adapter->irq_sem, 0);
++	e1000_irq_disable(adapter);
++
++	rtdm_lock_init(&adapter->stats_lock);
++
++	set_bit(__E1000_DOWN, &adapter->state);
++	return 0;
++}
++
++/**
++ * e1000_alloc_queues - Allocate memory for all rings
++ * @adapter: board private structure to initialize
++ **/
++static int e1000_alloc_queues(struct e1000_adapter *adapter)
++{
++	adapter->tx_ring = kcalloc(adapter->num_tx_queues,
++				   sizeof(struct e1000_tx_ring), GFP_KERNEL);
++	if (!adapter->tx_ring)
++		return -ENOMEM;
++
++	adapter->rx_ring = kcalloc(adapter->num_rx_queues,
++				   sizeof(struct e1000_rx_ring), GFP_KERNEL);
++	if (!adapter->rx_ring) {
++		kfree(adapter->tx_ring);
++		return -ENOMEM;
++	}
++
++#ifdef CONFIG_E1000_MQ
++	adapter->cpu_tx_ring = alloc_percpu(struct e1000_tx_ring *);
++#endif
++
++	return E1000_SUCCESS;
++}
++
++#ifdef CONFIG_E1000_MQ
++static void e1000_setup_queue_mapping(struct e1000_adapter *adapter)
++{
++	int i, cpu;
++
++	lock_cpu_hotplug();
++	i = 0;
++	for_each_online_cpu(cpu) {
++		*per_cpu_ptr(adapter->cpu_tx_ring, cpu) =
++			     &adapter->tx_ring[i % adapter->num_tx_queues];
++		i++;
++	}
++	unlock_cpu_hotplug();
++}
++#endif
++
++/**
++ * e1000_intr_msi_test - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static irqreturn_t e1000_intr_msi_test(int irq, void *data)
++{
++	struct net_device *netdev = data;
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	u32 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
++	DPRINTK(HW,INFO, "icr is %08X\n", icr);
++	if (icr & E1000_ICR_RXSEQ) {
++		adapter->flags |= E1000_FLAG_HAS_MSI;
++		wmb();
++	}
++
++	return IRQ_HANDLED;
++}
++
++/**
++ * e1000_test_msi_interrupt - Returns 0 for successful test
++ * @adapter: board private struct
++ *
++ * code flow taken from tg3.c
++ **/
++static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	int err;
++
++	/* poll_enable hasn't been called yet, so don't need disable */
++	/* clear any pending events */
++	E1000_READ_REG(&adapter->hw, E1000_ICR);
++
++	/* free the real vector and request a test handler */
++	e1000_free_irq(adapter);
++
++	err = pci_enable_msi(adapter->pdev);
++	err = request_irq(adapter->pdev->irq, &e1000_intr_msi_test, 0,
++			  netdev->name, netdev);
++	if (err) {
++		pci_disable_msi(adapter->pdev);
++		goto msi_test_failed;
++	}
++
++	/* our temporary test variable */
++	adapter->flags &= ~E1000_FLAG_HAS_MSI;
++	wmb();
++
++	e1000_irq_enable(adapter);
++
++	/* fire an unusual interrupt on the test handler */
++	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_RXSEQ);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	msleep(50);
++
++	e1000_irq_disable(adapter);
++
++	rmb();
++	if (!(adapter->flags & E1000_FLAG_HAS_MSI)) {
++		adapter->flags |= E1000_FLAG_HAS_MSI;
++		err = -EIO;
++		DPRINTK(HW, INFO, "MSI interrupt test failed!\n");
++	}
++
++	free_irq(adapter->pdev->irq, netdev);
++	pci_disable_msi(adapter->pdev);
++
++	if (err == -EIO)
++		goto msi_test_failed;
++
++	/* okay so the test worked, restore settings */
++	DPRINTK(HW, INFO, "MSI interrupt test succeeded!\n");
++msi_test_failed:
++	/* restore the original vector, even if it failed */
++	e1000_request_irq(adapter);
++	return err;
++}
++
++/**
++ * e1000_test_msi - Returns 0 if MSI test succeeds and INTx mode is restored
++ * @adapter: board private struct
++ *
++ * code flow taken from tg3.c, called with e1000 interrupts disabled.
++ **/
++static int e1000_test_msi(struct e1000_adapter *adapter)
++{
++	int err;
++	u16 pci_cmd;
++
++	if (!(adapter->flags & E1000_FLAG_MSI_ENABLED) ||
++	    !(adapter->flags & E1000_FLAG_HAS_MSI))
++		return 0;
++
++	/* disable SERR in case the MSI write causes a master abort */
++	pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
++	pci_write_config_word(adapter->pdev, PCI_COMMAND,
++			      pci_cmd & ~PCI_COMMAND_SERR);
++
++	err = e1000_test_msi_interrupt(adapter);
++
++	/* restore previous setting of command word */
++	pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd);
++
++	/* success ! */
++	if (!err)
++		return 0;
++
++	/* EIO means MSI test failed */
++	if (err != -EIO)
++		return err;
++
++	/* back to INTx mode */
++	DPRINTK(PROBE, WARNING, "MSI interrupt test failed, using legacy "
++		"interrupt.\n");
++
++	e1000_free_irq(adapter);
++	adapter->flags &= ~E1000_FLAG_HAS_MSI;
++
++	err = e1000_request_irq(adapter);
++
++	return err;
++}
++
++/**
++ * e1000_open - Called when a network interface is made active
++ * @netdev: network interface device structure
++ *
++ * Returns 0 on success, negative value on failure
++ *
++ * The open entry point is called when a network interface is made
++ * active by the system (IFF_UP).  At this point all resources needed
++ * for transmit and receive operations are allocated, the interrupt
++ * handler is registered with the OS, the watchdog timer is started,
++ * and the stack is notified that the interface is ready.
++ **/
++static int e1000_open(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	int err;
++	/* disallow open during test */
++	if (test_bit(__E1000_TESTING, &adapter->state))
++		return -EBUSY;
++
++	/* allocate transmit descriptors */
++	err = e1000_setup_all_tx_resources(adapter);
++	if (err)
++		goto err_setup_tx;
++
++	/* allocate receive descriptors */
++	err = e1000_setup_all_rx_resources(adapter);
++	if (err)
++		goto err_setup_rx;
++
++	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++		e1000_power_up_phy(&adapter->hw);
++		e1000_setup_link(&adapter->hw);
++	}
++
++#ifdef NETIF_F_HW_VLAN_TX
++	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN)) {
++		e1000_update_mng_vlan(adapter);
++	}
++#endif
++
++	/* For 82573 and ICHx if AMT is enabled, let the firmware know
++	 * that the network interface is now open */
++	if (((adapter->hw.mac.type == e1000_82573) ||
++	     (adapter->hw.mac.type == e1000_ich8lan) ||
++	     (adapter->hw.mac.type == e1000_ich9lan)) &&
++	    e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	/* before we allocate an interrupt, we must be ready to handle it.
++	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
++	 * as soon as we call pci_request_irq, so we have to setup our
++	 * clean_rx handler before we do so.  */
++	e1000_configure(adapter);
++
++
++	err = e1000_request_irq(adapter);
++	if (err)
++		goto err_req_irq;
++
++	/* work around PCIe errata with MSI interrupts causing some chipsets to
++	 * ignore e1000 MSI messages, which means we need to test our MSI
++	 * interrupt now */
++	err = e1000_test_msi(adapter);
++	if (err) {
++		DPRINTK(PROBE, ERR, "Interrupt allocation failed\n");
++		goto err_req_irq;
++	}
++
++	/* From here on the code is the same as e1000_up() */
++	clear_bit(__E1000_DOWN, &adapter->state);
++
++	e1000_napi_enable_all(adapter);
++
++	schedule_delayed_work(&adapter->watchdog_task, 1);
++	e1000_irq_enable(adapter);
++
++	/* fire a link status change interrupt to start the watchdog */
++	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
++
++	return E1000_SUCCESS;
++
++err_req_irq:
++	e1000_release_hw_control(adapter);
++	/* Power down the PHY so no link is implied when interface is down *
++	 * The PHY cannot be powered down if any of the following is TRUE *
++	 * (a) WoL is enabled
++	 * (b) AMT is active
++	 * (c) SoL/IDER session is active */
++	if (!adapter->wol && adapter->hw.mac.type >= e1000_82540 &&
++	   adapter->hw.phy.media_type == e1000_media_type_copper)
++		e1000_power_down_phy(&adapter->hw);
++	e1000_free_all_rx_resources(adapter);
++err_setup_rx:
++	e1000_free_all_tx_resources(adapter);
++err_setup_tx:
++	e1000_reset(adapter);
++
++	return err;
++}
++
++/**
++ * e1000_close - Disables a network interface
++ * @netdev: network interface device structure
++ *
++ * Returns 0, this is not allowed to fail
++ *
++ * The close entry point is called when an interface is de-activated
++ * by the OS.  The hardware is still under the drivers control, but
++ * needs to be disabled.  A global MAC reset is issued to stop the
++ * hardware, and all transmit and receive resources are freed.
++ **/
++static int e1000_close(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
++	e1000_down(adapter);
++	/* Power down the PHY so no link is implied when interface is down *
++	 * The PHY cannot be powered down if any of the following is TRUE *
++	 * (a) WoL is enabled
++	 * (b) AMT is active
++	 * (c) SoL/IDER session is active */
++	if (!adapter->wol && adapter->hw.mac.type >= e1000_82540 &&
++	   adapter->hw.phy.media_type == e1000_media_type_copper)
++		e1000_power_down_phy(&adapter->hw);
++	e1000_free_irq(adapter);
++
++	e1000_free_all_tx_resources(adapter);
++	e1000_free_all_rx_resources(adapter);
++
++#ifdef NETIF_F_HW_VLAN_TX
++	/* kill manageability vlan ID if supported, but not if a vlan with
++	 * the same ID is registered on the host OS (let 8021q kill it) */
++	if ((adapter->hw.mng_cookie.status &
++			  E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
++	     !(adapter->vlgrp &&
++	       vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id))) {
++		e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
++	}
++#endif
++
++	/* For 82573 and ICHx if AMT is enabled, let the firmware know
++	 * that the network interface is now closed */
++	if (((adapter->hw.mac.type == e1000_82573) ||
++	     (adapter->hw.mac.type == e1000_ich8lan) ||
++	     (adapter->hw.mac.type == e1000_ich9lan)) &&
++	    e1000_check_mng_mode(&adapter->hw))
++		e1000_release_hw_control(adapter);
++
++	return 0;
++}
++
++/**
++ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary
++ * @adapter: address of board private structure
++ * @start: address of beginning of memory
++ * @len: length of memory
++ **/
++static bool e1000_check_64k_bound(struct e1000_adapter *adapter,
++				       void *start, unsigned long len)
++{
++	unsigned long begin = (unsigned long) start;
++	unsigned long end = begin + len;
++
++	/* First rev 82545 and 82546 need to not allow any memory
++	 * write location to cross 64k boundary due to errata 23 */
++	if (adapter->hw.mac.type == e1000_82545 ||
++	    adapter->hw.mac.type == e1000_82546) {
++		return ((begin ^ (end - 1)) >> 16) != 0 ? FALSE : TRUE;
++	}
++
++	return TRUE;
++}
++
++/**
++ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
++ * @adapter: board private structure
++ * @tx_ring:    tx descriptor ring (for a specific queue) to setup
++ *
++ * Return 0 on success, negative on failure
++ **/
++static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int size;
++
++	size = sizeof(struct e1000_buffer) * tx_ring->count;
++	tx_ring->buffer_info = vmalloc(size);
++	if (!tx_ring->buffer_info) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the transmit descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(tx_ring->buffer_info, 0, size);
++
++	/* round up to nearest 4K */
++
++	tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
++	tx_ring->size = ALIGN(tx_ring->size, 4096);
++
++	tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
++					     &tx_ring->dma);
++	if (!tx_ring->desc) {
++setup_tx_desc_die:
++		vfree(tx_ring->buffer_info);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the transmit descriptor ring\n");
++		return -ENOMEM;
++	}
++
++	/* Fix for errata 23, can't cross 64kB boundary */
++	if (!e1000_check_64k_bound(adapter, tx_ring->desc, tx_ring->size)) {
++		void *olddesc = tx_ring->desc;
++		dma_addr_t olddma = tx_ring->dma;
++		DPRINTK(TX_ERR, ERR, "tx_ring align check failed: %u bytes "
++				     "at %p\n", tx_ring->size, tx_ring->desc);
++		/* Try again, without freeing the previous */
++		tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
++						     &tx_ring->dma);
++		/* Failed allocation, critical failure */
++		if (!tx_ring->desc) {
++			pci_free_consistent(pdev, tx_ring->size, olddesc,
++					    olddma);
++			goto setup_tx_desc_die;
++		}
++
++		if (!e1000_check_64k_bound(adapter, tx_ring->desc,
++					   tx_ring->size)) {
++			/* give up */
++			pci_free_consistent(pdev, tx_ring->size, tx_ring->desc,
++					    tx_ring->dma);
++			pci_free_consistent(pdev, tx_ring->size, olddesc,
++					    olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate aligned memory "
++				"for the transmit descriptor ring\n");
++			vfree(tx_ring->buffer_info);
++			return -ENOMEM;
++		} else {
++			/* Free old allocation, new allocation was successful */
++			pci_free_consistent(pdev, tx_ring->size, olddesc,
++					    olddma);
++		}
++	}
++	memset(tx_ring->desc, 0, tx_ring->size);
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++	rtdm_lock_init(&tx_ring->tx_lock);
++
++	return 0;
++}
++
++/**
++ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
++ * @adapter: board private structure
++ *
++ * this allocates tx resources for all queues, return 0 on success, negative
++ * on failure
++ **/
++int e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
++{
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
++		if (err) {
++			DPRINTK(PROBE, ERR,
++				"Allocation for Tx Queue %u failed\n", i);
++			for (i-- ; i >= 0; i--)
++				e1000_free_tx_resources(adapter,
++							&adapter->tx_ring[i]);
++			break;
++		}
++	}
++
++	return err;
++}
++
++/**
++ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Tx unit of the MAC after a reset.
++ **/
++static void e1000_configure_tx(struct e1000_adapter *adapter)
++{
++	u64 tdba;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 tdlen, tctl, tipg, tarc;
++	u32 ipgr1, ipgr2;
++	int i;
++
++	/* Setup the HW Tx Head and Tail descriptor pointers */
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		tdba = adapter->tx_ring[i].dma;
++		tdlen = adapter->tx_ring[i].count * sizeof(struct e1000_tx_desc);
++		E1000_WRITE_REG(hw, E1000_TDBAL(i), (tdba & 0x00000000ffffffffULL));
++		E1000_WRITE_REG(hw, E1000_TDBAH(i), (tdba >> 32));
++		E1000_WRITE_REG(hw, E1000_TDLEN(i), tdlen);
++		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
++		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
++		adapter->tx_ring[i].tdh = E1000_REGISTER(hw, E1000_TDH(i));
++		adapter->tx_ring[i].tdt = E1000_REGISTER(hw, E1000_TDT(i));
++	}
++
++
++	/* Set the default values for the Tx Inter Packet Gap timer */
++	if (adapter->hw.mac.type <= e1000_82547_rev_2 &&
++	    (hw->phy.media_type == e1000_media_type_fiber ||
++	     hw->phy.media_type == e1000_media_type_internal_serdes))
++		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
++	else
++		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
++
++	switch (hw->mac.type) {
++	case e1000_82542:
++		tipg = DEFAULT_82542_TIPG_IPGT;
++		ipgr1 = DEFAULT_82542_TIPG_IPGR1;
++		ipgr2 = DEFAULT_82542_TIPG_IPGR2;
++		break;
++	case e1000_80003es2lan:
++		ipgr1 = DEFAULT_82543_TIPG_IPGR1;
++		ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2;
++		break;
++	default:
++		ipgr1 = DEFAULT_82543_TIPG_IPGR1;
++		ipgr2 = DEFAULT_82543_TIPG_IPGR2;
++		break;
++	}
++	tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT;
++	tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT;
++	E1000_WRITE_REG(hw, E1000_TIPG, tipg);
++
++	/* Set the Tx Interrupt Delay register */
++
++	E1000_WRITE_REG(hw, E1000_TIDV, adapter->tx_int_delay);
++	if (adapter->flags & E1000_FLAG_HAS_INTR_MODERATION)
++		E1000_WRITE_REG(hw, E1000_TADV, adapter->tx_abs_int_delay);
++
++	/* Program the Transmit Control Register */
++
++	tctl = E1000_READ_REG(hw, E1000_TCTL);
++	tctl &= ~E1000_TCTL_CT;
++	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
++		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
++
++	if (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572) {
++		tarc = E1000_READ_REG(hw, E1000_TARC(0));
++		/* set the speed mode bit, we'll clear it if we're not at
++		 * gigabit link later */
++#define SPEED_MODE_BIT (1 << 21)
++		tarc |= SPEED_MODE_BIT;
++		E1000_WRITE_REG(hw, E1000_TARC(0), tarc);
++	} else if (hw->mac.type == e1000_80003es2lan) {
++		tarc = E1000_READ_REG(hw, E1000_TARC(0));
++		tarc |= 1;
++		E1000_WRITE_REG(hw, E1000_TARC(0), tarc);
++		tarc = E1000_READ_REG(hw, E1000_TARC(1));
++		tarc |= 1;
++		E1000_WRITE_REG(hw, E1000_TARC(1), tarc);
++	}
++
++	e1000_config_collision_dist(hw);
++
++	/* Setup Transmit Descriptor Settings for eop descriptor */
++	adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
++
++	/* only set IDE if we are delaying interrupts using the timers */
++	if (adapter->tx_int_delay)
++		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
++
++	if (hw->mac.type < e1000_82543)
++		adapter->txd_cmd |= E1000_TXD_CMD_RPS;
++	else
++		adapter->txd_cmd |= E1000_TXD_CMD_RS;
++
++	/* Cache if we're 82544 running in PCI-X because we'll
++	 * need this to apply a workaround later in the send path. */
++	if (hw->mac.type == e1000_82544 &&
++	    hw->bus.type == e1000_bus_type_pcix)
++		adapter->pcix_82544 = 1;
++
++	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
++
++}
++
++/**
++ * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
++ * @adapter: board private structure
++ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
++ *
++ * Returns 0 on success, negative on failure
++ **/
++static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int size, desc_len;
++
++	size = sizeof(struct e1000_rx_buffer) * rx_ring->count;
++	rx_ring->buffer_info = vmalloc(size);
++	if (!rx_ring->buffer_info) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(rx_ring->buffer_info, 0, size);
++
++	rx_ring->ps_page = kcalloc(rx_ring->count, sizeof(struct e1000_ps_page),
++				   GFP_KERNEL);
++	if (!rx_ring->ps_page) {
++		vfree(rx_ring->buffer_info);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++
++	rx_ring->ps_page_dma = kcalloc(rx_ring->count,
++				       sizeof(struct e1000_ps_page_dma),
++				       GFP_KERNEL);
++	if (!rx_ring->ps_page_dma) {
++		vfree(rx_ring->buffer_info);
++		kfree(rx_ring->ps_page);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++
++	if (adapter->hw.mac.type <= e1000_82547_rev_2)
++		desc_len = sizeof(struct e1000_rx_desc);
++	else
++		desc_len = sizeof(union e1000_rx_desc_packet_split);
++
++	/* Round up to nearest 4K */
++
++	rx_ring->size = rx_ring->count * desc_len;
++	rx_ring->size = ALIGN(rx_ring->size, 4096);
++
++	rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
++					     &rx_ring->dma);
++
++	if (!rx_ring->desc) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++setup_rx_desc_die:
++		vfree(rx_ring->buffer_info);
++		kfree(rx_ring->ps_page);
++		kfree(rx_ring->ps_page_dma);
++		return -ENOMEM;
++	}
++
++	/* Fix for errata 23, can't cross 64kB boundary */
++	if (!e1000_check_64k_bound(adapter, rx_ring->desc, rx_ring->size)) {
++		void *olddesc = rx_ring->desc;
++		dma_addr_t olddma = rx_ring->dma;
++		DPRINTK(RX_ERR, ERR, "rx_ring align check failed: %u bytes "
++				     "at %p\n", rx_ring->size, rx_ring->desc);
++		/* Try again, without freeing the previous */
++		rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
++						     &rx_ring->dma);
++		/* Failed allocation, critical failure */
++		if (!rx_ring->desc) {
++			pci_free_consistent(pdev, rx_ring->size, olddesc,
++					    olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate memory "
++				"for the receive descriptor ring\n");
++			goto setup_rx_desc_die;
++		}
++
++		if (!e1000_check_64k_bound(adapter, rx_ring->desc,
++					   rx_ring->size)) {
++			/* give up */
++			pci_free_consistent(pdev, rx_ring->size, rx_ring->desc,
++					    rx_ring->dma);
++			pci_free_consistent(pdev, rx_ring->size, olddesc,
++					    olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate aligned memory "
++				"for the receive descriptor ring\n");
++			goto setup_rx_desc_die;
++		} else {
++			/* Free old allocation, new allocation was successful */
++			pci_free_consistent(pdev, rx_ring->size, olddesc,
++					    olddma);
++		}
++	}
++	memset(rx_ring->desc, 0, rx_ring->size);
++
++	/* set up ring defaults */
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++	rx_ring->rx_skb_top = NULL;
++	rx_ring->adapter = adapter;
++
++	return 0;
++}
++
++/**
++ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources
++ * @adapter: board private structure
++ *
++ * this allocates rx resources for all queues, return 0 on success, negative
++ * on failure
++ **/
++int e1000_setup_all_rx_resources(struct e1000_adapter *adapter)
++{
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]);
++		if (err) {
++			DPRINTK(PROBE, ERR,
++				"Allocation for Rx Queue %u failed\n", i);
++			for (i-- ; i >= 0; i--)
++				e1000_free_rx_resources(adapter,
++							&adapter->rx_ring[i]);
++			break;
++		}
++	}
++
++	return err;
++}
++
++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
++			(((S) & (PAGE_SIZE - 1)) ? 1 : 0))
++/**
++ * e1000_setup_rctl - configure the receive control registers
++ * @adapter: Board private structure
++ **/
++static void e1000_setup_rctl(struct e1000_adapter *adapter)
++{
++	u32 rctl, rfctl;
++	u32 psrctl = 0;
++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
++	u32 pages = 0;
++#endif
++
++	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++
++	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
++
++	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
++		E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
++		(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
++
++	/* disable the stripping of CRC because it breaks
++	 * BMC firmware connected over SMBUS
++	if (adapter->hw.mac.type > e1000_82543)
++		rctl |= E1000_RCTL_SECRC;
++	*/
++
++	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
++		rctl |= E1000_RCTL_SBP;
++	else
++		rctl &= ~E1000_RCTL_SBP;
++
++	if (adapter->netdev->mtu <= ETH_DATA_LEN)
++		rctl &= ~E1000_RCTL_LPE;
++	else
++		rctl |= E1000_RCTL_LPE;
++
++	/* Setup buffer sizes */
++	rctl &= ~E1000_RCTL_SZ_4096;
++	rctl |= E1000_RCTL_BSEX;
++	switch (adapter->rx_buffer_len) {
++		case E1000_RXBUFFER_256:
++			rctl |= E1000_RCTL_SZ_256;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_512:
++			rctl |= E1000_RCTL_SZ_512;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_1024:
++			rctl |= E1000_RCTL_SZ_1024;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_2048:
++		default:
++			rctl |= E1000_RCTL_SZ_2048;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_4096:
++			rctl |= E1000_RCTL_SZ_4096;
++			break;
++		case E1000_RXBUFFER_8192:
++			rctl |= E1000_RCTL_SZ_8192;
++			break;
++		case E1000_RXBUFFER_16384:
++			rctl |= E1000_RCTL_SZ_16384;
++			break;
++	}
++
++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
++	/* 82571 and greater support packet-split where the protocol
++	 * header is placed in skb->data and the packet data is
++	 * placed in pages hanging off of skb_shinfo(skb)->nr_frags.
++	 * In the case of a non-split, skb->data is linearly filled,
++	 * followed by the page buffers.  Therefore, skb->data is
++	 * sized to hold the largest protocol header.
++	 */
++	/* allocations using alloc_page take too long for regular MTU
++	 * so only enable packet split for jumbo frames */
++	pages = PAGE_USE_COUNT(adapter->netdev->mtu);
++	if ((adapter->hw.mac.type >= e1000_82571) && (pages <= 3) &&
++	    PAGE_SIZE <= 16384 && (rctl & E1000_RCTL_LPE))
++		adapter->rx_ps_pages = pages;
++	else
++		adapter->rx_ps_pages = 0;
++#endif
++
++	if (adapter->rx_ps_pages) {
++		/* Configure extra packet-split registers */
++		rfctl = E1000_READ_REG(&adapter->hw, E1000_RFCTL);
++		rfctl |= E1000_RFCTL_EXTEN;
++		/* disable packet split support for IPv6 extension headers,
++		 * because some malformed IPv6 headers can hang the RX */
++		rfctl |= (E1000_RFCTL_IPV6_EX_DIS |
++			  E1000_RFCTL_NEW_IPV6_EXT_DIS);
++
++		E1000_WRITE_REG(&adapter->hw, E1000_RFCTL, rfctl);
++
++		/* disable the stripping of CRC because it breaks
++		 * BMC firmware connected over SMBUS */
++		rctl |= E1000_RCTL_DTYP_PS /* | E1000_RCTL_SECRC */;
++
++		psrctl |= adapter->rx_ps_bsize0 >>
++			E1000_PSRCTL_BSIZE0_SHIFT;
++
++		switch (adapter->rx_ps_pages) {
++		case 3:
++			psrctl |= PAGE_SIZE <<
++				E1000_PSRCTL_BSIZE3_SHIFT;
++		case 2:
++			psrctl |= PAGE_SIZE <<
++				E1000_PSRCTL_BSIZE2_SHIFT;
++		case 1:
++			psrctl |= PAGE_SIZE >>
++				E1000_PSRCTL_BSIZE1_SHIFT;
++			break;
++		}
++
++		E1000_WRITE_REG(&adapter->hw, E1000_PSRCTL, psrctl);
++	}
++
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++	adapter->flags &= ~E1000_FLAG_RX_RESTART_NOW;
++}
++
++/**
++ * e1000_configure_rx - Configure 8254x Receive Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Rx unit of the MAC after a reset.
++ **/
++static void e1000_configure_rx(struct e1000_adapter *adapter)
++{
++	u64 rdba;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rdlen, rctl, rxcsum, ctrl_ext;
++	int i;
++
++	if (adapter->rx_ps_pages) {
++		/* this is a 32 byte descriptor */
++		rdlen = adapter->rx_ring[0].count *
++			sizeof(union e1000_rx_desc_packet_split);
++		adapter->clean_rx = e1000_clean_rx_irq_ps;
++		adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
++#ifdef CONFIG_E1000_NAPI
++	} else if (adapter->netdev->mtu > MAXIMUM_ETHERNET_VLAN_SIZE) {
++		rdlen = adapter->rx_ring[0].count *
++			sizeof(struct e1000_rx_desc);
++		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
++		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
++#endif
++	} else {
++		rdlen = adapter->rx_ring[0].count *
++			sizeof(struct e1000_rx_desc);
++		adapter->clean_rx = e1000_clean_rx_irq;
++		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
++	}
++
++	/* disable receives while setting up the descriptors */
++	rctl = E1000_READ_REG(hw, E1000_RCTL);
++	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
++	E1000_WRITE_FLUSH(hw);
++	mdelay(10);
++
++	/* set the Receive Delay Timer Register */
++	E1000_WRITE_REG(hw, E1000_RDTR, adapter->rx_int_delay);
++
++	if (adapter->flags & E1000_FLAG_HAS_INTR_MODERATION) {
++		E1000_WRITE_REG(hw, E1000_RADV, adapter->rx_abs_int_delay);
++		if (adapter->itr_setting != 0)
++			E1000_WRITE_REG(hw, E1000_ITR,
++				1000000000 / (adapter->itr * 256));
++	}
++
++	if (hw->mac.type >= e1000_82571) {
++		ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++		/* Reset delay timers after every interrupt */
++		ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR;
++#ifdef CONFIG_E1000_NAPI
++		/* Auto-Mask interrupts upon ICR access */
++		ctrl_ext |= E1000_CTRL_EXT_IAME;
++		E1000_WRITE_REG(hw, E1000_IAM, 0xffffffff);
++#endif
++		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/* Setup the HW Rx Head and Tail Descriptor Pointers and
++	 * the Base and Length of the Rx Descriptor Ring */
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		rdba = adapter->rx_ring[i].dma;
++		E1000_WRITE_REG(hw, E1000_RDBAL(i), (rdba & 0x00000000ffffffffULL));
++		E1000_WRITE_REG(hw, E1000_RDBAH(i), (rdba >> 32));
++		E1000_WRITE_REG(hw, E1000_RDLEN(i), rdlen);
++		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
++		E1000_WRITE_REG(hw, E1000_RDT(i), 0);
++		adapter->rx_ring[i].rdh = E1000_REGISTER(hw, E1000_RDH(i));
++		adapter->rx_ring[i].rdt = E1000_REGISTER(hw, E1000_RDT(i));
++	}
++
++#ifdef CONFIG_E1000_MQ
++	if (adapter->num_rx_queues > 1) {
++		u32 random[10];
++		u32 reta, mrqc;
++		int i;
++
++		get_random_bytes(&random[0], 40);
++
++		switch (adapter->num_rx_queues) {
++		default:
++			reta = 0x00800080;
++			mrqc = E1000_MRQC_ENABLE_RSS_2Q;
++			break;
++		}
++
++		/* Fill out redirection table */
++		for (i = 0; i < 32; i++)
++			E1000_WRITE_REG_ARRAY(hw, E1000_RETA, i, reta);
++		/* Fill out hash function seeds */
++		for (i = 0; i < 10; i++)
++			E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK, i, random[i]);
++
++		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
++			 E1000_MRQC_RSS_FIELD_IPV4_TCP);
++
++		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
++
++		/* Multiqueue and packet checksumming are mutually exclusive. */
++		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
++		rxcsum |= E1000_RXCSUM_PCSD;
++		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
++	} else if (hw->mac.type >= e1000_82543) {
++#else
++	if (hw->mac.type >= e1000_82543) {
++#endif /* CONFIG_E1000_MQ */
++		/* Enable 82543 Receive Checksum Offload for TCP and UDP */
++		rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
++		if (adapter->rx_csum == TRUE) {
++			rxcsum |= E1000_RXCSUM_TUOFL;
++
++			/* Enable 82571 IPv4 payload checksum for UDP fragments
++			 * Must be used in conjunction with packet-split. */
++			if ((hw->mac.type >= e1000_82571) &&
++			    (adapter->rx_ps_pages)) {
++				rxcsum |= E1000_RXCSUM_IPPCSE;
++			}
++		} else {
++			rxcsum &= ~E1000_RXCSUM_TUOFL;
++			/* don't need to clear IPPCSE as it defaults to 0 */
++		}
++		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
++	}
++
++	/* Enable early receives on supported devices, only takes effect when
++	 * packet size is equal or larger than the specified value (in 8 byte
++	 * units), e.g. using jumbo frames when setting to E1000_ERT_2048 */
++	if ((hw->mac.type == e1000_82573 || hw->mac.type == e1000_ich9lan) &&
++	    (adapter->netdev->mtu > ETH_DATA_LEN))
++		E1000_WRITE_REG(hw, E1000_ERT, E1000_ERT_2048);
++
++	/* Enable Receives */
++	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++}
++
++/**
++ * e1000_free_tx_resources - Free Tx Resources per Queue
++ * @adapter: board private structure
++ * @tx_ring: Tx descriptor ring for a specific queue
++ *
++ * Free all transmit software resources
++ **/
++static void e1000_free_tx_resources(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++
++	e1000_clean_tx_ring(adapter, tx_ring);
++
++	vfree(tx_ring->buffer_info);
++	tx_ring->buffer_info = NULL;
++
++	pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma);
++
++	tx_ring->desc = NULL;
++}
++
++/**
++ * e1000_free_all_tx_resources - Free Tx Resources for All Queues
++ * @adapter: board private structure
++ *
++ * Free all transmit software resources
++ **/
++void e1000_free_all_tx_resources(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		e1000_free_tx_resources(adapter, &adapter->tx_ring[i]);
++}
++
++static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
++					     struct e1000_buffer *buffer_info)
++{
++	if (buffer_info->dma) {
++		pci_unmap_page(adapter->pdev,
++				buffer_info->dma,
++				buffer_info->length,
++				PCI_DMA_TODEVICE);
++		buffer_info->dma = 0;
++	}
++	if (buffer_info->skb) {
++		kfree_rtskb(buffer_info->skb);
++		buffer_info->skb = NULL;
++	}
++	/* buffer_info must be completely set up in the transmit path */
++}
++
++/**
++ * e1000_clean_tx_ring - Free Tx Buffers
++ * @adapter: board private structure
++ * @tx_ring: ring to be cleaned
++ **/
++static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
++				struct e1000_tx_ring *tx_ring)
++{
++	struct e1000_buffer *buffer_info;
++	unsigned long size;
++	unsigned int i;
++
++	/* Free all the Tx ring sk_buffs */
++
++	for (i = 0; i < tx_ring->count; i++) {
++		buffer_info = &tx_ring->buffer_info[i];
++		e1000_unmap_and_free_tx_resource(adapter, buffer_info);
++	}
++
++	size = sizeof(struct e1000_buffer) * tx_ring->count;
++	memset(tx_ring->buffer_info, 0, size);
++
++	/* Zero out the descriptor ring */
++
++	memset(tx_ring->desc, 0, tx_ring->size);
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++	tx_ring->last_tx_tso = 0;
++
++	writel(0, adapter->hw.hw_addr + tx_ring->tdh);
++	writel(0, adapter->hw.hw_addr + tx_ring->tdt);
++}
++
++/**
++ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues
++ * @adapter: board private structure
++ **/
++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]);
++}
++
++/**
++ * e1000_free_rx_resources - Free Rx Resources
++ * @adapter: board private structure
++ * @rx_ring: ring to clean the resources from
++ *
++ * Free all receive software resources
++ **/
++static void e1000_free_rx_resources(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++
++	e1000_clean_rx_ring(adapter, rx_ring);
++
++	vfree(rx_ring->buffer_info);
++	rx_ring->buffer_info = NULL;
++	kfree(rx_ring->ps_page);
++	rx_ring->ps_page = NULL;
++	kfree(rx_ring->ps_page_dma);
++	rx_ring->ps_page_dma = NULL;
++
++	pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
++
++	rx_ring->desc = NULL;
++}
++
++/**
++ * e1000_free_all_rx_resources - Free Rx Resources for All Queues
++ * @adapter: board private structure
++ *
++ * Free all receive software resources
++ **/
++void e1000_free_all_rx_resources(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		e1000_free_rx_resources(adapter, &adapter->rx_ring[i]);
++}
++
++/**
++ * e1000_clean_rx_ring - Free Rx Buffers per Queue
++ * @adapter: board private structure
++ * @rx_ring: ring to free buffers from
++ **/
++static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
++				struct e1000_rx_ring *rx_ring)
++{
++	struct e1000_rx_buffer *buffer_info;
++	struct e1000_ps_page *ps_page;
++	struct e1000_ps_page_dma *ps_page_dma;
++	struct pci_dev *pdev = adapter->pdev;
++	unsigned long size;
++	unsigned int i, j;
++
++	/* Free all the Rx ring sk_buffs */
++	for (i = 0; i < rx_ring->count; i++) {
++		buffer_info = &rx_ring->buffer_info[i];
++		if (buffer_info->dma &&
++		    adapter->clean_rx == e1000_clean_rx_irq) {
++			pci_unmap_single(pdev, buffer_info->dma,
++					 adapter->rx_buffer_len,
++					 PCI_DMA_FROMDEVICE);
++#ifdef CONFIG_E1000_NAPI
++		} else if (buffer_info->dma &&
++			   adapter->clean_rx == e1000_clean_jumbo_rx_irq) {
++			pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE,
++				       PCI_DMA_FROMDEVICE);
++#endif
++		} else if (buffer_info->dma &&
++			   adapter->clean_rx == e1000_clean_rx_irq_ps) {
++			pci_unmap_single(pdev, buffer_info->dma,
++					 adapter->rx_ps_bsize0,
++					 PCI_DMA_FROMDEVICE);
++		}
++		buffer_info->dma = 0;
++		if (buffer_info->page) {
++			put_page(buffer_info->page);
++			buffer_info->page = NULL;
++		}
++		if (buffer_info->skb) {
++			kfree_rtskb(buffer_info->skb);
++			buffer_info->skb = NULL;
++		}
++		ps_page = &rx_ring->ps_page[i];
++		ps_page_dma = &rx_ring->ps_page_dma[i];
++		for (j = 0; j < adapter->rx_ps_pages; j++) {
++			if (!ps_page->ps_page[j]) break;
++			pci_unmap_page(pdev,
++				       ps_page_dma->ps_page_dma[j],
++				       PAGE_SIZE, PCI_DMA_FROMDEVICE);
++			ps_page_dma->ps_page_dma[j] = 0;
++			put_page(ps_page->ps_page[j]);
++			ps_page->ps_page[j] = NULL;
++		}
++	}
++
++#ifdef CONFIG_E1000_NAPI
++	/* there also may be some cached data from a chained receive */
++	if (rx_ring->rx_skb_top) {
++		kfree_rtskb(rx_ring->rx_skb_top);
++		rx_ring->rx_skb_top = NULL;
++	}
++#endif
++
++	size = sizeof(struct e1000_rx_buffer) * rx_ring->count;
++	memset(rx_ring->buffer_info, 0, size);
++	size = sizeof(struct e1000_ps_page) * rx_ring->count;
++	memset(rx_ring->ps_page, 0, size);
++	size = sizeof(struct e1000_ps_page_dma) * rx_ring->count;
++	memset(rx_ring->ps_page_dma, 0, size);
++
++	/* Zero out the descriptor ring */
++
++	memset(rx_ring->desc, 0, rx_ring->size);
++
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++
++	writel(0, adapter->hw.hw_addr + rx_ring->rdh);
++	writel(0, adapter->hw.hw_addr + rx_ring->rdt);
++}
++
++/**
++ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues
++ * @adapter: board private structure
++ **/
++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]);
++}
++
++/* The 82542 2.0 (revision 2) needs to have the receive unit in reset
++ * and memory write and invalidate disabled for certain operations
++ */
++#if 0
++static void e1000_enter_82542_rst(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	u32 rctl;
++
++	if (adapter->hw.mac.type != e1000_82542)
++		return;
++	if (adapter->hw.revision_id != E1000_REVISION_2)
++		return;
++
++	e1000_pci_clear_mwi(&adapter->hw);
++
++	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++	rctl |= E1000_RCTL_RST;
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	mdelay(5);
++
++	if (rtnetif_running(netdev))
++		e1000_clean_all_rx_rings(adapter);
++}
++
++static void e1000_leave_82542_rst(struct e1000_adapter *adapter)
++{
++	struct net_device *netdev = adapter->netdev;
++	u32 rctl;
++
++	if (adapter->hw.mac.type != e1000_82542)
++		return;
++	if (adapter->hw.revision_id != E1000_REVISION_2)
++		return;
++
++	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++	rctl &= ~E1000_RCTL_RST;
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	mdelay(5);
++
++	if (adapter->hw.bus.pci_cmd_word & PCI_COMMAND_INVALIDATE)
++		e1000_pci_set_mwi(&adapter->hw);
++
++	if (rtnetif_running(netdev)) {
++		/* No need to loop, because 82542 supports only 1 queue */
++		struct e1000_rx_ring *ring = &adapter->rx_ring[0];
++		e1000_configure_rx(adapter);
++		adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring));
++	}
++}
++
++/**
++ * e1000_set_mac - Change the Ethernet Address of the NIC
++ * @netdev: network interface device structure
++ * @p: pointer to an address structure
++ *
++ * Returns 0 on success, negative on failure
++ **/
++static int e1000_set_mac(struct net_device *netdev, void *p)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct sockaddr *addr = p;
++
++	if (!is_valid_ether_addr(addr->sa_data))
++		return -EADDRNOTAVAIL;
++
++	/* 82542 2.0 needs to be in reset to write receive address registers */
++
++	if (adapter->hw.mac.type == e1000_82542)
++		e1000_enter_82542_rst(adapter);
++
++	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
++	memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len);
++
++	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
++
++	/* With 82571 controllers, LAA may be overwritten (with the default)
++	 * due to controller reset from the other port. */
++	if (adapter->hw.mac.type == e1000_82571) {
++		/* activate the work around */
++		e1000_set_laa_state_82571(&adapter->hw, TRUE);
++
++		/* Hold a copy of the LAA in RAR[14] This is done so that
++		 * between the time RAR[0] gets clobbered  and the time it
++		 * gets fixed (in e1000_watchdog), the actual LAA is in one
++		 * of the RARs and no incoming packets directed to this port
++		 * are dropped. Eventually the LAA will be in RAR[0] and
++		 * RAR[14] */
++		e1000_rar_set(&adapter->hw,
++			      adapter->hw.mac.addr,
++			      adapter->hw.mac.rar_entry_count - 1);
++	}
++
++	if (adapter->hw.mac.type == e1000_82542)
++		e1000_leave_82542_rst(adapter);
++
++	return 0;
++}
++#endif
++
++/**
++ * e1000_set_multi - Multicast and Promiscuous mode set
++ * @netdev: network interface device structure
++ *
++ * The set_multi entry point is called whenever the multicast address
++ * list or the network interface flags are updated.  This routine is
++ * responsible for configuring the hardware for proper multicast,
++ * promiscuous mode, and all-multi behavior.
++ **/
++static void e1000_set_multi(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	/* Check for Promiscuous and All Multicast modes */
++
++	rctl = E1000_READ_REG(hw, E1000_RCTL);
++
++	if (netdev->flags & IFF_PROMISC) {
++		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
++	} else if (netdev->flags & IFF_ALLMULTI) {
++		rctl |= E1000_RCTL_MPE;
++		rctl &= ~E1000_RCTL_UPE;
++	} else {
++		rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
++	}
++
++	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++}
++
++/* Need to wait a few seconds after link up to get diagnostic information from
++ * the phy */
++static void e1000_update_phy_info_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     phy_info_task.work);
++	e1000_get_phy_info(&adapter->hw);
++}
++
++/**
++ * e1000_82547_tx_fifo_stall_task - task to complete work
++ * @work: work struct contained inside adapter struct
++ **/
++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     fifo_stall_task.work);
++	struct net_device *netdev = adapter->netdev;
++	u32 tctl;
++
++	if (atomic_read(&adapter->tx_fifo_stall)) {
++		if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
++		    E1000_READ_REG(&adapter->hw, E1000_TDH(0))) &&
++		   (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
++		    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
++		   (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
++		    E1000_READ_REG(&adapter->hw, E1000_TDFHS))) {
++			tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
++			E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
++					tctl & ~E1000_TCTL_EN);
++			E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
++			E1000_WRITE_FLUSH(&adapter->hw);
++
++			adapter->tx_fifo_head = 0;
++			atomic_set(&adapter->tx_fifo_stall, 0);
++			rtnetif_wake_queue(netdev);
++		} else if (!test_bit(__E1000_DOWN, &adapter->state))
++			schedule_delayed_work(&adapter->fifo_stall_task, 1);
++	}
++}
++
++static bool e1000_has_link(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	bool link_active = FALSE;
++	s32 ret_val = 0;
++
++	/* get_link_status is set on LSC (link status) interrupt or
++	 * rx sequence error interrupt.  get_link_status will stay
++	 * false until the e1000_check_for_link establishes link
++	 * for copper adapters ONLY
++	 */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		if (hw->mac.get_link_status) {
++			ret_val = e1000_check_for_link(hw);
++			link_active = !hw->mac.get_link_status;
++		} else {
++			link_active = TRUE;
++		}
++		break;
++	case e1000_media_type_fiber:
++		ret_val = e1000_check_for_link(hw);
++		link_active = !!(E1000_READ_REG(hw, E1000_STATUS) &
++				 E1000_STATUS_LU);
++		break;
++	case e1000_media_type_internal_serdes:
++		ret_val = e1000_check_for_link(hw);
++		link_active = adapter->hw.mac.serdes_has_link;
++		break;
++	default:
++	case e1000_media_type_unknown:
++		break;
++	}
++
++	if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
++	    (E1000_READ_REG(&adapter->hw, E1000_CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
++		/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
++		DPRINTK(LINK, INFO,
++			"Gigabit has been disabled, downgrading speed\n");
++	}
++
++	return link_active;
++}
++
++static void e1000_enable_receives(struct e1000_adapter *adapter)
++{
++	/* make sure the receive unit is started */
++	if ((adapter->flags & E1000_FLAG_RX_NEEDS_RESTART) &&
++	    (adapter->flags & E1000_FLAG_RX_RESTART_NOW)) {
++		struct e1000_hw *hw = &adapter->hw;
++		u32 rctl = E1000_READ_REG(hw, E1000_RCTL);
++		E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
++		adapter->flags &= ~E1000_FLAG_RX_RESTART_NOW;
++	}
++}
++
++static void e1000_watchdog_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     watchdog_task.work);
++
++	struct net_device *netdev = adapter->netdev;
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	struct e1000_tx_ring *tx_ring;
++	u32 link, tctl;
++	int i, tx_pending = 0;
++
++	link = e1000_has_link(adapter);
++	if ((rtnetif_carrier_ok(netdev)) && link) {
++		e1000_enable_receives(adapter);
++		goto link_up;
++	}
++
++	if (mac->type == e1000_82573) {
++		e1000_enable_tx_pkt_filtering(&adapter->hw);
++#ifdef NETIF_F_HW_VLAN_TX
++		if (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id)
++			e1000_update_mng_vlan(adapter);
++#endif
++	}
++
++	if (link) {
++		if (!rtnetif_carrier_ok(netdev)) {
++			u32 ctrl;
++			bool txb2b = 1;
++#ifdef SIOCGMIIPHY
++			/* update snapshot of PHY registers on LSC */
++			e1000_phy_read_status(adapter);
++#endif
++			e1000_get_speed_and_duplex(&adapter->hw,
++						   &adapter->link_speed,
++						   &adapter->link_duplex);
++
++			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++			DPRINTK(LINK, INFO, "NIC Link is Up %d Mbps %s, "
++				"Flow Control: %s\n",
++				adapter->link_speed,
++				adapter->link_duplex == FULL_DUPLEX ?
++				"Full Duplex" : "Half Duplex",
++				((ctrl & E1000_CTRL_TFCE) && (ctrl &
++				E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
++				E1000_CTRL_RFCE) ? "RX" : ((ctrl &
++				E1000_CTRL_TFCE) ? "TX" : "None" )));
++
++			/* tweak tx_queue_len according to speed/duplex
++			 * and adjust the timeout factor */
++			//netdev->tx_queue_len = adapter->tx_queue_len;
++			adapter->tx_timeout_factor = 1;
++			switch (adapter->link_speed) {
++			case SPEED_10:
++				txb2b = 0;
++				//netdev->tx_queue_len = 10;
++				adapter->tx_timeout_factor = 16;
++				break;
++			case SPEED_100:
++				txb2b = 0;
++				//netdev->tx_queue_len = 100;
++				/* maybe add some timeout factor ? */
++				break;
++			}
++
++			if ((mac->type == e1000_82571 ||
++			     mac->type == e1000_82572) &&
++			    txb2b == 0) {
++				u32 tarc0;
++				tarc0 = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
++				tarc0 &= ~SPEED_MODE_BIT;
++				E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc0);
++			}
++
++#ifdef NETIF_F_TSO
++			/* disable TSO for pcie and 10/100 speeds, to avoid
++			 * some hardware issues */
++			if (!(adapter->flags & E1000_FLAG_TSO_FORCE) &&
++			    adapter->hw.bus.type == e1000_bus_type_pci_express){
++				switch (adapter->link_speed) {
++				case SPEED_10:
++				case SPEED_100:
++					DPRINTK(PROBE,INFO,
++					"10/100 speed: disabling TSO\n");
++					netdev->features &= ~NETIF_F_TSO;
++#ifdef NETIF_F_TSO6
++					netdev->features &= ~NETIF_F_TSO6;
++#endif
++					break;
++				case SPEED_1000:
++					netdev->features |= NETIF_F_TSO;
++#ifdef NETIF_F_TSO6
++					netdev->features |= NETIF_F_TSO6;
++#endif
++					break;
++				default:
++					/* oops */
++					break;
++				}
++			}
++#endif
++
++			/* enable transmits in the hardware, need to do this
++			 * after setting TARC0 */
++			tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
++			tctl |= E1000_TCTL_EN;
++			E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
++
++			rtnetif_carrier_on(netdev);
++			rtnetif_wake_queue(netdev);
++#ifdef CONFIG_E1000_MQ
++			if (netif_is_multiqueue(netdev))
++				for (i = 0; i < adapter->num_tx_queues; i++)
++					netif_wake_subqueue(netdev, i);
++#endif
++
++			if (!test_bit(__E1000_DOWN, &adapter->state))
++				schedule_delayed_work(&adapter->phy_info_task,
++						      2 * HZ);
++			adapter->smartspeed = 0;
++		}
++	} else {
++		if (rtnetif_carrier_ok(netdev)) {
++			adapter->link_speed = 0;
++			adapter->link_duplex = 0;
++			DPRINTK(LINK, INFO, "NIC Link is Down\n");
++			rtnetif_carrier_off(netdev);
++			rtnetif_stop_queue(netdev);
++			if (!test_bit(__E1000_DOWN, &adapter->state))
++				schedule_delayed_work(&adapter->phy_info_task,
++						      2 * HZ);
++
++			/* 80003ES2LAN workaround--
++			 * For packet buffer work-around on link down event;
++			 * disable receives in the ISR and
++			 * reset device here in the watchdog
++			 */
++			if (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART)
++				/* reset device */
++				schedule_work(&adapter->reset_task);
++		}
++
++		e1000_smartspeed(adapter);
++	}
++
++link_up:
++	e1000_update_stats(adapter);
++
++	mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
++	adapter->tpt_old = adapter->stats.tpt;
++	mac->collision_delta = adapter->stats.colc - adapter->colc_old;
++	adapter->colc_old = adapter->stats.colc;
++
++	adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
++	adapter->gorc_old = adapter->stats.gorc;
++	adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
++	adapter->gotc_old = adapter->stats.gotc;
++
++	e1000_update_adaptive(&adapter->hw);
++
++	if (!rtnetif_carrier_ok(netdev)) {
++		for (i = 0 ; i < adapter->num_tx_queues ; i++) {
++			tx_ring = &adapter->tx_ring[i];
++			tx_pending |= (E1000_DESC_UNUSED(tx_ring) + 1 <
++							       tx_ring->count);
++		}
++		if (tx_pending) {
++			/* We've lost link, so the controller stops DMA,
++			 * but we've got queued Tx work that's never going
++			 * to get done, so reset controller to flush Tx.
++			 * (Do the reset outside of interrupt context). */
++			adapter->tx_timeout_count++;
++			schedule_work(&adapter->reset_task);
++		}
++	}
++
++	/* Cause software interrupt to ensure rx ring is cleaned */
++	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_RXDMT0);
++
++	/* Force detection of hung controller every watchdog period */
++	adapter->detect_tx_hung = TRUE;
++
++	/* With 82571 controllers, LAA may be overwritten due to controller
++	 * reset from the other port. Set the appropriate LAA in RAR[0] */
++	if (e1000_get_laa_state_82571(&adapter->hw) == TRUE)
++		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
++
++	/* Reschedule the task */
++	if (!test_bit(__E1000_DOWN, &adapter->state))
++		schedule_delayed_work(&adapter->watchdog_task, 2 * HZ);
++}
++
++enum latency_range {
++	lowest_latency = 0,
++	low_latency = 1,
++	bulk_latency = 2,
++	latency_invalid = 255
++};
++
++/**
++ * e1000_update_itr - update the dynamic ITR value based on statistics
++ * @adapter: pointer to adapter
++ * @itr_setting: current adapter->itr
++ * @packets: the number of packets during this measurement interval
++ * @bytes: the number of bytes during this measurement interval
++ *
++ *      Stores a new ITR value based on packets and byte
++ *      counts during the last interrupt.  The advantage of per interrupt
++ *      computation is faster updates and more accurate ITR for the current
++ *      traffic pattern.  Constants in this function were computed
++ *      based on theoretical maximum wire speed and thresholds were set based
++ *      on testing data as well as attempting to minimize response time
++ *      while increasing bulk throughput.
++ *      this functionality is controlled by the InterruptThrottleRate module
++ *      parameter (see e1000_param.c)
++ **/
++#if 0
++static unsigned int e1000_update_itr(struct e1000_adapter *adapter,
++				     u16 itr_setting, int packets,
++				     int bytes)
++{
++	unsigned int retval = itr_setting;
++
++	if (unlikely(!(adapter->flags & E1000_FLAG_HAS_INTR_MODERATION)))
++		goto update_itr_done;
++
++	if (packets == 0)
++		goto update_itr_done;
++
++	switch (itr_setting) {
++	case lowest_latency:
++		/* handle TSO and jumbo frames */
++		if (bytes/packets > 8000)
++			retval = bulk_latency;
++		else if ((packets < 5) && (bytes > 512)) {
++			retval = low_latency;
++		}
++		break;
++	case low_latency:  /* 50 usec aka 20000 ints/s */
++		if (bytes > 10000) {
++			/* this if handles the TSO accounting */
++			if (bytes/packets > 8000) {
++				retval = bulk_latency;
++			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
++				retval = bulk_latency;
++			} else if ((packets > 35)) {
++				retval = lowest_latency;
++			}
++		} else if (bytes/packets > 2000) {
++			retval = bulk_latency;
++		} else if (packets <= 2 && bytes < 512) {
++			retval = lowest_latency;
++		}
++		break;
++	case bulk_latency: /* 250 usec aka 4000 ints/s */
++		if (bytes > 25000) {
++			if (packets > 35) {
++				retval = low_latency;
++			}
++		} else if (bytes < 6000) {
++			retval = low_latency;
++		}
++		break;
++	}
++
++update_itr_done:
++	return retval;
++}
++#endif
++
++static void e1000_set_itr(struct e1000_adapter *adapter)
++{
++}
++
++#define E1000_TX_FLAGS_CSUM		0x00000001
++#define E1000_TX_FLAGS_VLAN		0x00000002
++#define E1000_TX_FLAGS_TSO		0x00000004
++#define E1000_TX_FLAGS_IPV4		0x00000008
++#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
++#define E1000_TX_FLAGS_VLAN_SHIFT	16
++
++static int e1000_tso(struct e1000_adapter *adapter,
++		     struct e1000_tx_ring *tx_ring, struct sk_buff *skb)
++{
++#ifdef NETIF_F_TSO
++	struct e1000_context_desc *context_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i;
++	u32 cmd_length = 0;
++	u16 ipcse = 0, tucse, mss;
++	u8 ipcss, ipcso, tucss, tucso, hdr_len;
++	int err;
++
++	if (skb_is_gso(skb)) {
++		if (skb_header_cloned(skb)) {
++			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
++			if (err)
++				return err;
++		}
++
++		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
++		mss = skb_shinfo(skb)->gso_size;
++		if (skb->protocol == htons(ETH_P_IP)) {
++			struct iphdr *iph = ip_hdr(skb);
++			iph->tot_len = 0;
++			iph->check = 0;
++			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
++								 iph->daddr, 0,
++								 IPPROTO_TCP,
++								 0);
++			cmd_length = E1000_TXD_CMD_IP;
++			ipcse = skb_transport_offset(skb) - 1;
++#ifdef NETIF_F_TSO6
++		} else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
++			ipv6_hdr(skb)->payload_len = 0;
++			tcp_hdr(skb)->check =
++				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
++						 &ipv6_hdr(skb)->daddr,
++						 0, IPPROTO_TCP, 0);
++			ipcse = 0;
++#endif
++		}
++		ipcss = skb_network_offset(skb);
++		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
++		tucss = skb_transport_offset(skb);
++		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
++		tucse = 0;
++
++		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
++			       E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
++
++		i = tx_ring->next_to_use;
++		context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
++		buffer_info = &tx_ring->buffer_info[i];
++
++		context_desc->lower_setup.ip_fields.ipcss  = ipcss;
++		context_desc->lower_setup.ip_fields.ipcso  = ipcso;
++		context_desc->lower_setup.ip_fields.ipcse  = cpu_to_le16(ipcse);
++		context_desc->upper_setup.tcp_fields.tucss = tucss;
++		context_desc->upper_setup.tcp_fields.tucso = tucso;
++		context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
++		context_desc->tcp_seg_setup.fields.mss     = cpu_to_le16(mss);
++		context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
++		context_desc->cmd_and_length = cpu_to_le32(cmd_length);
++
++		buffer_info->time_stamp = jiffies;
++		buffer_info->next_to_watch = i;
++
++		if (++i == tx_ring->count) i = 0;
++		tx_ring->next_to_use = i;
++
++		return TRUE;
++	}
++#endif
++
++	return FALSE;
++}
++
++static bool e1000_tx_csum(struct e1000_adapter *adapter,
++			       struct e1000_tx_ring *tx_ring,
++			       struct sk_buff *skb)
++{
++	struct e1000_context_desc *context_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i;
++	// u8 css;
++	u32 cmd_len = E1000_TXD_CMD_DEXT;
++
++	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL))
++		return FALSE;
++
++	switch (skb->protocol) {
++	case __constant_htons(ETH_P_IP):
++		break;
++	default:
++		if (unlikely(net_ratelimit())) {
++			DPRINTK(PROBE, WARNING, "checksum_partial proto=%x!\n",
++				skb->protocol);
++		}
++		break;
++	}
++
++	// css = skb_transport_offset(skb);
++
++	i = tx_ring->next_to_use;
++	buffer_info = &tx_ring->buffer_info[i];
++	context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
++
++	context_desc->lower_setup.ip_config = 0;
++	context_desc->cmd_and_length = cpu_to_le32(cmd_len);
++
++	buffer_info->time_stamp = jiffies;
++	buffer_info->next_to_watch = i;
++
++	if (unlikely(++i == tx_ring->count)) i = 0;
++	tx_ring->next_to_use = i;
++
++	return TRUE;
++}
++
++#define E1000_MAX_TXD_PWR	12
++#define E1000_MAX_DATA_PER_TXD	(1<<E1000_MAX_TXD_PWR)
++
++static int e1000_tx_map(struct e1000_adapter *adapter,
++			struct e1000_tx_ring *tx_ring,
++			struct sk_buff *skb, unsigned int first,
++			unsigned int max_per_txd, unsigned int nr_frags,
++			unsigned int mss)
++{
++	struct e1000_buffer *buffer_info;
++	unsigned int len = skb->len;
++	unsigned int offset = 0, size, count = 0, i;
++#ifdef MAX_SKB_FRAGS
++	unsigned int f;
++	len -= skb->data_len;
++#endif
++
++	i = tx_ring->next_to_use;
++
++	while (len) {
++		buffer_info = &tx_ring->buffer_info[i];
++		size = min(len, max_per_txd);
++#ifdef NETIF_F_TSO
++		/* Workaround for Controller erratum --
++		 * descriptor for non-tso packet in a linear SKB that follows a
++		 * tso gets written back prematurely before the data is fully
++		 * DMA'd to the controller */
++		if (tx_ring->last_tx_tso && !skb_is_gso(skb)) {
++			tx_ring->last_tx_tso = 0;
++			if (!skb->data_len)
++				size -= 4;
++		}
++
++		/* Workaround for premature desc write-backs
++		 * in TSO mode.  Append 4-byte sentinel desc */
++		if (unlikely(mss && !nr_frags && size == len && size > 8))
++			size -= 4;
++#endif
++		/* work-around for errata 10 and it applies
++		 * to all controllers in PCI-X mode
++		 * The fix is to make sure that the first descriptor of a
++		 * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
++		 */
++		if (unlikely((adapter->hw.bus.type == e1000_bus_type_pcix) &&
++				(size > 2015) && count == 0))
++			size = 2015;
++
++		/* Workaround for potential 82544 hang in PCI-X.  Avoid
++		 * terminating buffers within evenly-aligned dwords. */
++		if (unlikely(adapter->pcix_82544 &&
++		   !((unsigned long)(skb->data + offset + size - 1) & 4) &&
++		   size > 4))
++			size -= 4;
++
++		buffer_info->length = size;
++		/* set time_stamp *before* dma to help avoid a possible race */
++		buffer_info->time_stamp = jiffies;
++		buffer_info->dma =
++			pci_map_single(adapter->pdev,
++				skb->data + offset,
++				size,
++				PCI_DMA_TODEVICE);
++		buffer_info->next_to_watch = i;
++
++		len -= size;
++		offset += size;
++		count++;
++		if (unlikely(++i == tx_ring->count)) i = 0;
++	}
++
++#ifdef MAX_SKB_FRAGS
++	for (f = 0; f < nr_frags; f++) {
++		struct skb_frag_struct *frag;
++
++		frag = &skb_shinfo(skb)->frags[f];
++		len = frag->size;
++		offset = frag->page_offset;
++
++		while (len) {
++			buffer_info = &tx_ring->buffer_info[i];
++			size = min(len, max_per_txd);
++#ifdef NETIF_F_TSO
++			/* Workaround for premature desc write-backs
++			 * in TSO mode.  Append 4-byte sentinel desc */
++			if (unlikely(mss && f == (nr_frags-1) && size == len && size > 8))
++				size -= 4;
++#endif
++			/* Workaround for potential 82544 hang in PCI-X.
++			 * Avoid terminating buffers within evenly-aligned
++			 * dwords. */
++			if (unlikely(adapter->pcix_82544 &&
++			   !((unsigned long)(frag->page+offset+size-1) & 4) &&
++			   size > 4))
++				size -= 4;
++
++			buffer_info->length = size;
++			buffer_info->time_stamp = jiffies;
++			buffer_info->dma =
++				pci_map_page(adapter->pdev,
++					frag->page,
++					offset,
++					size,
++					PCI_DMA_TODEVICE);
++			buffer_info->next_to_watch = i;
++
++			len -= size;
++			offset += size;
++			count++;
++			if (unlikely(++i == tx_ring->count)) i = 0;
++		}
++	}
++#endif
++
++	i = (i == 0) ? tx_ring->count - 1 : i - 1;
++	tx_ring->buffer_info[i].skb = skb;
++	tx_ring->buffer_info[first].next_to_watch = i;
++
++	return count;
++}
++
++static void e1000_tx_queue(struct e1000_adapter *adapter,
++			   struct e1000_tx_ring *tx_ring,
++			   int tx_flags, int count, nanosecs_abs_t *xmit_stamp)
++{
++	struct e1000_tx_desc *tx_desc = NULL;
++	struct e1000_buffer *buffer_info;
++	u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
++	unsigned int i;
++    rtdm_lockctx_t context;
++
++	if (likely(tx_flags & E1000_TX_FLAGS_TSO)) {
++		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
++			     E1000_TXD_CMD_TSE;
++		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
++
++		if (likely(tx_flags & E1000_TX_FLAGS_IPV4))
++			txd_upper |= E1000_TXD_POPTS_IXSM << 8;
++	}
++
++	if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) {
++		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
++		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
++	}
++
++	if (unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) {
++		txd_lower |= E1000_TXD_CMD_VLE;
++		txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
++	}
++
++	i = tx_ring->next_to_use;
++
++	while (count--) {
++		buffer_info = &tx_ring->buffer_info[i];
++		tx_desc = E1000_TX_DESC(*tx_ring, i);
++		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++		tx_desc->lower.data =
++			cpu_to_le32(txd_lower | buffer_info->length);
++		tx_desc->upper.data = cpu_to_le32(txd_upper);
++		if (unlikely(++i == tx_ring->count)) i = 0;
++	}
++
++	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
++
++    rtdm_lock_irqsave(context);
++
++    if (xmit_stamp)
++	*xmit_stamp = cpu_to_be64(rtdm_clock_read() + *xmit_stamp);
++
++	/* Force memory writes to complete before letting h/w
++	 * know there are new descriptors to fetch.  (Only
++	 * applicable for weak-ordered memory model archs,
++	 * such as IA-64). */
++	wmb();
++
++	tx_ring->next_to_use = i;
++	writel(i, adapter->hw.hw_addr + tx_ring->tdt);
++
++    rtdm_lock_irqrestore(context);
++	/* we need this if more than one processor can write to our tail
++	 * at a time, it synchronizes IO on IA64/Altix systems */
++	mmiowb();
++}
++
++#define E1000_FIFO_HDR			0x10
++#define E1000_82547_PAD_LEN		0x3E0
++
++/**
++ * 82547 workaround to avoid controller hang in half-duplex environment.
++ * The workaround is to avoid queuing a large packet that would span
++ * the internal Tx FIFO ring boundary by notifying the stack to resend
++ * the packet at a later time.  This gives the Tx FIFO an opportunity to
++ * flush all packets.  When that occurs, we reset the Tx FIFO pointers
++ * to the beginning of the Tx FIFO.
++ **/
++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
++				       struct sk_buff *skb)
++{
++	u32 fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
++	u32 skb_fifo_len = skb->len + E1000_FIFO_HDR;
++
++	skb_fifo_len = ALIGN(skb_fifo_len, E1000_FIFO_HDR);
++
++	if (adapter->link_duplex != HALF_DUPLEX)
++		goto no_fifo_stall_required;
++
++	if (atomic_read(&adapter->tx_fifo_stall))
++		return 1;
++
++	if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) {
++		atomic_set(&adapter->tx_fifo_stall, 1);
++		return 1;
++	}
++
++no_fifo_stall_required:
++	adapter->tx_fifo_head += skb_fifo_len;
++	if (adapter->tx_fifo_head >= adapter->tx_fifo_size)
++		adapter->tx_fifo_head -= adapter->tx_fifo_size;
++	return 0;
++}
++
++#define MINIMUM_DHCP_PACKET_SIZE 282
++static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
++				    struct sk_buff *skb)
++{
++	struct e1000_hw *hw =  &adapter->hw;
++	u16 length, offset;
++#ifdef NETIF_F_HW_VLAN_TX
++	if (vlan_tx_tag_present(skb)) {
++		if (!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id)
++		    && (adapter->hw.mng_cookie.status &
++			E1000_MNG_DHCP_COOKIE_STATUS_VLAN)))
++			return 0;
++	}
++#endif
++	if (skb->len > MINIMUM_DHCP_PACKET_SIZE) {
++		struct ethhdr *eth = (struct ethhdr *) skb->data;
++		if ((htons(ETH_P_IP) == eth->h_proto)) {
++			const struct iphdr *ip =
++				(struct iphdr *)((u8 *)skb->data+14);
++			if (IPPROTO_UDP == ip->protocol) {
++				struct udphdr *udp =
++					(struct udphdr *)((u8 *)ip +
++						(ip->ihl << 2));
++				if (ntohs(udp->dest) == 67) {
++					offset = (u8 *)udp + 8 - skb->data;
++					length = skb->len - offset;
++
++					return e1000_mng_write_dhcp_info(hw,
++							(u8 *)udp + 8,
++							length);
++				}
++			}
++		}
++	}
++	return 0;
++}
++
++static int __e1000_maybe_stop_tx(struct net_device *netdev,
++				 struct e1000_tx_ring *tx_ring, int size)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	rtnetif_stop_queue(netdev);
++	/* Herbert's original patch had:
++	 *  smp_mb__after_netif_stop_queue();
++	 * but since that doesn't exist yet, just open code it. */
++	smp_mb();
++
++	/* We need to check again in a case another CPU has just
++	 * made room available. */
++	if (likely(E1000_DESC_UNUSED(tx_ring) < size))
++		return -EBUSY;
++
++	/* A reprieve! */
++	rtnetif_start_queue(netdev);
++	++adapter->restart_queue;
++	return 0;
++}
++
++static int e1000_maybe_stop_tx(struct net_device *netdev,
++			       struct e1000_tx_ring *tx_ring, int size)
++{
++	if (likely(E1000_DESC_UNUSED(tx_ring) >= size))
++		return 0;
++	return __e1000_maybe_stop_tx(netdev, tx_ring, size);
++}
++
++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
++static int e1000_xmit_frame_ring(struct sk_buff *skb,
++				 struct net_device *netdev,
++				 struct e1000_tx_ring *tx_ring)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
++	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
++	unsigned int tx_flags = 0;
++	unsigned int len = skb->len;
++	unsigned long irq_flags;
++	unsigned int nr_frags = 0;
++	unsigned int mss = 0;
++	int count = 0;
++	int tso;
++#ifdef MAX_SKB_FRAGS
++	unsigned int f;
++	len -= skb->data_len;
++#endif
++
++	if (test_bit(__E1000_DOWN, &adapter->state)) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	if (unlikely(skb->len <= 0)) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++
++	/* 82571 and newer doesn't need the workaround that limited descriptor
++	 * length to 4kB */
++	if (adapter->hw.mac.type >= e1000_82571)
++		max_per_txd = 8192;
++
++#ifdef NETIF_F_TSO
++	mss = skb_shinfo(skb)->gso_size;
++	/* The controller does a simple calculation to
++	 * make sure there is enough room in the FIFO before
++	 * initiating the DMA for each buffer.  The calc is:
++	 * 4 = ceil(buffer len/mss).  To make sure we don't
++	 * overrun the FIFO, adjust the max buffer len if mss
++	 * drops. */
++	if (mss) {
++		u8 hdr_len;
++		max_per_txd = min(mss << 2, max_per_txd);
++		max_txd_pwr = fls(max_per_txd) - 1;
++
++		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
++		* points to just header, pull a few bytes of payload from
++		* frags into skb->data */
++		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
++		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
++			switch (adapter->hw.mac.type) {
++				unsigned int pull_size;
++			case e1000_82544:
++				/* Make sure we have room to chop off 4 bytes,
++				 * and that the end alignment will work out to
++				 * this hardware's requirements
++				 * NOTE: this is a TSO only workaround
++				 * if end byte alignment not correct move us
++				 * into the next dword */
++				if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
++					break;
++				/* fall through */
++			case e1000_82571:
++			case e1000_82572:
++			case e1000_82573:
++			case e1000_ich8lan:
++			case e1000_ich9lan:
++				pull_size = min((unsigned int)4, skb->data_len);
++				if (!__pskb_pull_tail(skb, pull_size)) {
++					DPRINTK(DRV, ERR,
++						"__pskb_pull_tail failed.\n");
++					kfree_rtskb(skb);
++					return NETDEV_TX_OK;
++				}
++				len = skb->len - skb->data_len;
++				break;
++			default:
++				/* do nothing */
++				break;
++			}
++		}
++	}
++
++	/* reserve a descriptor for the offload context */
++	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
++		count++;
++	count++;
++#else
++	if (skb->ip_summed == CHECKSUM_PARTIAL)
++		count++;
++#endif
++
++#ifdef NETIF_F_TSO
++	/* Controller Erratum workaround */
++	if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
++		count++;
++#endif
++
++	count += TXD_USE_COUNT(len, max_txd_pwr);
++
++	if (adapter->pcix_82544)
++		count++;
++
++	/* work-around for errata 10 and it applies to all controllers
++	 * in PCI-X mode, so add one more descriptor to the count
++	 */
++	if (unlikely((adapter->hw.bus.type == e1000_bus_type_pcix) &&
++			(len > 2015)))
++		count++;
++
++#ifdef MAX_SKB_FRAGS
++	nr_frags = skb_shinfo(skb)->nr_frags;
++	for (f = 0; f < nr_frags; f++)
++		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
++				       max_txd_pwr);
++	if (adapter->pcix_82544)
++		count += nr_frags;
++
++#endif
++
++	if (adapter->hw.mac.tx_pkt_filtering &&
++	    (adapter->hw.mac.type == e1000_82573))
++		e1000_transfer_dhcp_info(adapter, skb);
++
++	rtdm_lock_get_irqsave(&tx_ring->tx_lock, irq_flags);
++
++	/* need: count + 2 desc gap to keep tail from touching
++	 * head, otherwise try next time */
++	if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) {
++		rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags);
++		rtdm_printk("FATAL: rt_e1000 ran into tail close to head situation!\n");
++		return NETDEV_TX_BUSY;
++	}
++
++	if (unlikely(adapter->hw.mac.type == e1000_82547)) {
++		if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) {
++			rtnetif_stop_queue(netdev);
++			rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags);
++			if (!test_bit(__E1000_DOWN, &adapter->state))
++				schedule_delayed_work(&adapter->fifo_stall_task,
++						      1);
++		    rtdm_printk("FATAL: rt_e1000 ran into tail 82547 controller bug!\n");
++			return NETDEV_TX_BUSY;
++		}
++	}
++
++#ifndef NETIF_F_LLTX
++	rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags);
++
++#endif
++#ifdef NETIF_F_HW_VLAN_TX
++	if (unlikely(adapter->vlgrp && vlan_tx_tag_present(skb))) {
++		tx_flags |= E1000_TX_FLAGS_VLAN;
++		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
++	}
++#endif
++
++	first = tx_ring->next_to_use;
++
++	tso = e1000_tso(adapter, tx_ring, skb);
++	if (tso < 0) {
++		kfree_rtskb(skb);
++#ifdef NETIF_F_LLTX
++		rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags);
++#endif
++		return NETDEV_TX_OK;
++	}
++
++	if (likely(tso)) {
++		tx_ring->last_tx_tso = 1;
++		tx_flags |= E1000_TX_FLAGS_TSO;
++	} else if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
++		tx_flags |= E1000_TX_FLAGS_CSUM;
++
++	/* Old method was to assume IPv4 packet by default if TSO was enabled.
++	 * 82571 hardware supports TSO capabilities for IPv6 as well...
++	 * no longer assume, we must. */
++	if (likely(skb->protocol == htons(ETH_P_IP)))
++		tx_flags |= E1000_TX_FLAGS_IPV4;
++
++	e1000_tx_queue(adapter, tx_ring, tx_flags,
++		       e1000_tx_map(adapter, tx_ring, skb, first,
++				    max_per_txd, nr_frags, mss),
++		   skb->xmit_stamp);
++
++	// netdev->trans_start = jiffies;
++
++	/* Make sure there is space in the ring for the next send. */
++	// e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2);
++
++#ifdef NETIF_F_LLTX
++	rtdm_lock_put_irqrestore(&tx_ring->tx_lock, irq_flags);
++#endif
++	return NETDEV_TX_OK;
++}
++
++static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_tx_ring *tx_ring = adapter->tx_ring;
++
++	/* This goes back to the question of how to logically map a tx queue
++	 * to a flow.  Right now, performance is impacted slightly negatively
++	 * if using multiple tx queues.  If the stack breaks away from a
++	 * single qdisc implementation, we can look at this again. */
++	return (e1000_xmit_frame_ring(skb, netdev, tx_ring));
++}
++
++#ifdef CONFIG_E1000_MQ
++static int e1000_subqueue_xmit_frame(struct sk_buff *skb,
++				     struct net_device *netdev, int queue)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_tx_ring *tx_ring = &adapter->tx_ring[queue];
++
++	return (e1000_xmit_frame_ring(skb, netdev, tx_ring));
++}
++#endif
++
++
++/**
++ * e1000_tx_timeout - Respond to a Tx Hang
++ * @netdev: network interface device structure
++ **/
++#if 0
++static void e1000_tx_timeout(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	/* Do the reset outside of interrupt context */
++	adapter->tx_timeout_count++;
++	schedule_work(&adapter->reset_task);
++}
++#endif
++
++static void e1000_reset_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter;
++	adapter = container_of(work, struct e1000_adapter, reset_task);
++
++	e1000_reinit_locked(adapter);
++}
++
++#if 0
++/**
++ * e1000_get_stats - Get System Network Statistics
++ * @netdev: network interface device structure
++ *
++ * Returns the address of the device statistics structure.
++ * The statistics are actually updated from the timer callback.
++ **/
++static struct net_device_stats * e1000_get_stats(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	/* only return the current stats */
++	return &adapter->net_stats;
++}
++
++/**
++ * e1000_change_mtu - Change the Maximum Transfer Unit
++ * @netdev: network interface device structure
++ * @new_mtu: new value for maximum frame size
++ *
++ * Returns 0 on success, negative on failure
++ **/
++static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	int max_frame = new_mtu + ETH_HLEN + ETHERNET_FCS_SIZE;
++	u16 eeprom_data = 0;
++
++	if ((max_frame < ETH_ZLEN + ETHERNET_FCS_SIZE) ||
++	    (max_frame > MAX_JUMBO_FRAME_SIZE)) {
++		DPRINTK(PROBE, ERR, "Invalid MTU setting\n");
++		return -EINVAL;
++	}
++
++	/* Adapter-specific max frame size limits. */
++	switch (adapter->hw.mac.type) {
++	case e1000_undefined:
++	case e1000_82542:
++	case e1000_ich8lan:
++		if (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) {
++			DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n");
++			return -EINVAL;
++		}
++		break;
++	case e1000_82573:
++		/* Jumbo Frames not supported if:
++		 * - this is not an 82573L device
++		 * - ASPM is enabled in any way (0x1A bits 3:2) */
++		e1000_read_nvm(&adapter->hw, NVM_INIT_3GIO_3, 1, &eeprom_data);
++		if ((adapter->hw.device_id != E1000_DEV_ID_82573L) ||
++		    (eeprom_data & NVM_WORD1A_ASPM_MASK)) {
++			if (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE) {
++				DPRINTK(PROBE, ERR,
++					"Jumbo Frames not supported.\n");
++				return -EINVAL;
++			}
++			break;
++		}
++		/* ERT will be enabled later to enable wire speed receives */
++
++		/* fall through to get support */
++	case e1000_ich9lan:
++		if ((adapter->hw.phy.type == e1000_phy_ife) &&
++		    (max_frame > ETH_FRAME_LEN + ETHERNET_FCS_SIZE)) {
++			DPRINTK(PROBE, ERR, "Jumbo Frames not supported.\n");
++			return -EINVAL;
++		}
++		/* fall through to get support */
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++#define MAX_STD_JUMBO_FRAME_SIZE 9234
++		if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
++			DPRINTK(PROBE, ERR, "MTU > 9216 not supported.\n");
++			return -EINVAL;
++		}
++		break;
++	default:
++		/* Capable of supporting up to MAX_JUMBO_FRAME_SIZE limit. */
++		break;
++	}
++
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		msleep(1);
++	/* e1000_down has a dependency on max_frame_size */
++	adapter->max_frame_size = max_frame;
++	if (rtnetif_running(netdev))
++		e1000_down(adapter);
++
++	/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
++	 * means we reserve 2 more, this pushes us to allocate from the next
++	 * larger slab size.
++	 * i.e. RXBUFFER_2048 --> size-4096 slab
++	 *  however with the new *_jumbo_rx* routines, jumbo receives will use
++	 *  fragmented skbs */
++
++	if (max_frame <= E1000_RXBUFFER_256)
++		adapter->rx_buffer_len = E1000_RXBUFFER_256;
++	else if (max_frame <= E1000_RXBUFFER_512)
++		adapter->rx_buffer_len = E1000_RXBUFFER_512;
++	else if (max_frame <= E1000_RXBUFFER_1024)
++		adapter->rx_buffer_len = E1000_RXBUFFER_1024;
++	else if (max_frame <= E1000_RXBUFFER_2048)
++		adapter->rx_buffer_len = E1000_RXBUFFER_2048;
++#ifdef CONFIG_E1000_NAPI
++	else
++		adapter->rx_buffer_len = E1000_RXBUFFER_4096;
++#else
++	else if (max_frame <= E1000_RXBUFFER_4096)
++		adapter->rx_buffer_len = E1000_RXBUFFER_4096;
++	else if (max_frame <= E1000_RXBUFFER_8192)
++		adapter->rx_buffer_len = E1000_RXBUFFER_8192;
++	else if (max_frame <= E1000_RXBUFFER_16384)
++		adapter->rx_buffer_len = E1000_RXBUFFER_16384;
++#endif
++
++	/* adjust allocation if LPE protects us, and we aren't using SBP */
++	if (!e1000_tbi_sbp_enabled_82543(&adapter->hw) &&
++	    ((max_frame == ETH_FRAME_LEN + ETHERNET_FCS_SIZE) ||
++	     (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)))
++		adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
++
++	DPRINTK(PROBE, INFO, "changing MTU from %d to %d\n",
++		netdev->mtu, new_mtu);
++	netdev->mtu = new_mtu;
++
++	if (rtnetif_running(netdev))
++		e1000_up(adapter);
++	else
++		e1000_reset(adapter);
++
++	clear_bit(__E1000_RESETTING, &adapter->state);
++
++	return 0;
++}
++#endif
++
++/**
++ * e1000_update_stats - Update the board statistics counters
++ * @adapter: board private structure
++ **/
++void e1000_update_stats(struct e1000_adapter *adapter)
++{
++}
++#ifdef SIOCGMIIPHY
++
++/**
++ * e1000_phy_read_status - Update the PHY register status snapshot
++ * @adapter: board private structure
++ **/
++static void e1000_phy_read_status(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_phy_regs *phy = &adapter->phy_regs;
++	int ret_val = E1000_SUCCESS;
++	unsigned long irq_flags;
++
++
++	rtdm_lock_get_irqsave(&adapter->stats_lock, irq_flags);
++
++	if (E1000_READ_REG(hw, E1000_STATUS)& E1000_STATUS_LU) {
++		ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy->bmcr);
++		ret_val |= e1000_read_phy_reg(hw, PHY_STATUS, &phy->bmsr);
++		ret_val |= e1000_read_phy_reg(hw, PHY_AUTONEG_ADV,
++					      &phy->advertise);
++		ret_val |= e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy->lpa);
++		ret_val |= e1000_read_phy_reg(hw, PHY_AUTONEG_EXP,
++					      &phy->expansion);
++		ret_val |= e1000_read_phy_reg(hw, PHY_1000T_CTRL,
++					      &phy->ctrl1000);
++		ret_val |= e1000_read_phy_reg(hw, PHY_1000T_STATUS,
++					      &phy->stat1000);
++		ret_val |= e1000_read_phy_reg(hw, PHY_EXT_STATUS,
++					      &phy->estatus);
++		if (ret_val)
++			DPRINTK(DRV, WARNING, "Error reading PHY register\n");
++	} else {
++		/* Do not read PHY registers if link is not up
++		 * Set values to typical power-on defaults */
++		phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX);
++		phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL |
++			     BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE |
++			     BMSR_ERCAP);
++		phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP |
++				  ADVERTISE_ALL | ADVERTISE_CSMA);
++		phy->lpa = 0;
++		phy->expansion = EXPANSION_ENABLENPAGE;
++		phy->ctrl1000 = ADVERTISE_1000FULL;
++		phy->stat1000 = 0;
++		phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF);
++	}
++
++	rtdm_lock_put_irqrestore(&adapter->stats_lock, irq_flags);
++}
++#endif
++
++
++/**
++ * e1000_intr_msi - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static int e1000_intr_msi(rtdm_irq_t *irq_handle)
++{
++    struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++#ifndef CONFIG_E1000_NAPI
++	int i, j;
++	int rx_cleaned, tx_cleaned;
++#endif
++	u32 icr = E1000_READ_REG(hw, E1000_ICR);
++    nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++
++
++#ifdef CONFIG_E1000_NAPI
++	/* read ICR disables interrupts using IAM, so keep up with our
++	 * enable/disable accounting */
++	atomic_inc(&adapter->irq_sem);
++#endif
++	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
++		hw->mac.get_link_status = 1;
++		/* ICH8 workaround-- Call gig speed drop workaround on cable
++		 * disconnect (LSC) before accessing any PHY registers */
++		if ((hw->mac.type == e1000_ich8lan) &&
++		    (hw->phy.type == e1000_phy_igp_3) &&
++		    (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)))
++			e1000_gig_downshift_workaround_ich8lan(hw);
++
++		/* 80003ES2LAN workaround-- For packet buffer work-around on
++		 * link down event; disable receives here in the ISR and reset
++		 * adapter in watchdog */
++		if (rtnetif_carrier_ok(netdev) &&
++		    (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART)) {
++			/* disable receives */
++			u32 rctl = E1000_READ_REG(hw, E1000_RCTL);
++			E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
++			adapter->flags |= E1000_FLAG_RX_RESTART_NOW;
++		}
++		/* guard against interrupt when we're going down */
++		//if (!test_bit(__E1000_DOWN, &adapter->state))
++		//	mod_timer(&adapter->watchdog_timer, jiffies + 1);
++	}
++
++#ifdef CONFIG_E1000_NAPI
++	/* XXX only using ring 0 for napi */
++	if (likely(netif_rx_schedule_prep(netdev, &adapter->rx_ring[0].napi))) {
++		adapter->total_tx_bytes = 0;
++		adapter->total_tx_packets = 0;
++		adapter->total_rx_bytes = 0;
++		adapter->total_rx_packets = 0;
++		__netif_rx_schedule(netdev, &adapter->rx_ring[0].napi);
++	} else {
++		atomic_dec(&adapter->irq_sem);
++	}
++#else
++	adapter->total_tx_bytes = 0;
++	adapter->total_rx_bytes = 0;
++	adapter->total_tx_packets = 0;
++	adapter->total_rx_packets = 0;
++    adapter->data_received = 0;
++
++	for (i = 0; i < E1000_MAX_INTR; i++) {
++		rx_cleaned = 0;
++		for (j = 0; j < adapter->num_rx_queues; j++)
++			rx_cleaned |= adapter->clean_rx(adapter,
++							&adapter->rx_ring[j], &time_stamp);
++
++		tx_cleaned = 0;
++		for (j = 0 ; j < adapter->num_tx_queues ; j++)
++			tx_cleaned |= e1000_clean_tx_irq(adapter,
++							 &adapter->tx_ring[j]);
++
++		if (!rx_cleaned && !tx_cleaned)
++			break;
++	}
++
++	if (likely(adapter->itr_setting & 3))
++		e1000_set_itr(adapter);
++#endif
++
++	if (adapter->data_received)
++		rt_mark_stack_mgr(netdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++/**
++ * e1000_intr - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static int e1000_intr(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl, icr = E1000_READ_REG(hw, E1000_ICR);
++#ifndef CONFIG_E1000_NAPI
++	int i, j;
++	int rx_cleaned, tx_cleaned;
++#endif
++    nanosecs_abs_t time_stamp = rtdm_clock_read();
++	if (unlikely(!icr))
++		return RTDM_IRQ_NONE;  /* Not our interrupt */
++
++#ifdef CONFIG_E1000_NAPI
++	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
++	 * not set, then the adapter didn't send an interrupt */
++	if ((adapter->flags & E1000_FLAG_INT_ASSERT_AUTO_MASK) &&
++	    !(icr & E1000_ICR_INT_ASSERTED))
++		return IRQ_NONE;
++
++	/* Interrupt Auto-Mask...upon reading ICR,
++	 * interrupts are masked.  No need for the
++	 * IMC write, but it does mean we should
++	 * account for it ASAP. */
++	if (likely(hw->mac.type >= e1000_82571))
++		atomic_inc(&adapter->irq_sem);
++#endif
++
++	if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {
++		hw->mac.get_link_status = 1;
++		/* ICH8 workaround-- Call gig speed drop workaround on cable
++		 * disconnect (LSC) before accessing any PHY registers */
++		if ((hw->mac.type == e1000_ich8lan) &&
++		    (hw->phy.type == e1000_phy_igp_3) &&
++		    (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)))
++			e1000_gig_downshift_workaround_ich8lan(hw);
++
++		/* 80003ES2LAN workaround--
++		 * For packet buffer work-around on link down event;
++		 * disable receives here in the ISR and
++		 * reset adapter in watchdog
++		 */
++		if (rtnetif_carrier_ok(netdev) &&
++		    (adapter->flags & E1000_FLAG_RX_NEEDS_RESTART)) {
++			/* disable receives */
++			rctl = E1000_READ_REG(hw, E1000_RCTL);
++			E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
++			adapter->flags |= E1000_FLAG_RX_RESTART_NOW;
++		}
++		/* guard against interrupt when we're going down */
++		//if (!test_bit(__E1000_DOWN, &adapter->state))
++		//	mod_timer(&adapter->watchdog_timer, jiffies + 1);
++	}
++
++#ifdef CONFIG_E1000_NAPI
++	if (hw->mac.type < e1000_82571) {
++		/* disable interrupts, without the synchronize_irq bit */
++		atomic_inc(&adapter->irq_sem);
++		E1000_WRITE_REG(hw, E1000_IMC, ~0);
++		E1000_WRITE_FLUSH(hw);
++	}
++	/* XXX only using ring 0 for napi */
++	if (likely(netif_rx_schedule_prep(netdev, &adapter->rx_ring[0].napi))) {
++		adapter->total_tx_bytes = 0;
++		adapter->total_tx_packets = 0;
++		adapter->total_rx_bytes = 0;
++		adapter->total_rx_packets = 0;
++		__netif_rx_schedule(netdev, &adapter->rx_ring[0].napi);
++	} else {
++		atomic_dec(&adapter->irq_sem);
++	}
++#else
++	/* Writing IMC and IMS is needed for 82547.
++	 * Due to Hub Link bus being occupied, an interrupt
++	 * de-assertion message is not able to be sent.
++	 * When an interrupt assertion message is generated later,
++	 * two messages are re-ordered and sent out.
++	 * That causes APIC to think 82547 is in de-assertion
++	 * state, while 82547 is in assertion state, resulting
++	 * in dead lock. Writing IMC forces 82547 into
++	 * de-assertion state.
++	 */
++	if (hw->mac.type == e1000_82547 || hw->mac.type == e1000_82547_rev_2) {
++		atomic_inc(&adapter->irq_sem);
++		E1000_WRITE_REG(hw, E1000_IMC, ~0);
++	}
++
++    adapter->data_received = 0;
++	adapter->total_tx_bytes = 0;
++	adapter->total_rx_bytes = 0;
++	adapter->total_tx_packets = 0;
++	adapter->total_rx_packets = 0;
++
++	for (i = 0; i < E1000_MAX_INTR; i++) {
++		rx_cleaned = 0;
++		for (j = 0; j < adapter->num_rx_queues; j++)
++			rx_cleaned |= adapter->clean_rx(adapter,
++							&adapter->rx_ring[j], &time_stamp);
++
++		tx_cleaned = 0;
++		for (j = 0 ; j < adapter->num_tx_queues ; j++)
++			tx_cleaned |= e1000_clean_tx_irq(adapter,
++							 &adapter->tx_ring[j]);
++
++		if (!rx_cleaned && !tx_cleaned)
++			break;
++	}
++
++	if (likely(adapter->itr_setting & 3))
++		e1000_set_itr(adapter);
++
++	if (hw->mac.type == e1000_82547 || hw->mac.type == e1000_82547_rev_2)
++		e1000_irq_enable(adapter);
++
++#endif
++
++	if (adapter->data_received)
++		rt_mark_stack_mgr(netdev);
++	return RTDM_IRQ_HANDLED;
++}
++
++#ifdef CONFIG_E1000_NAPI
++/**
++ * e1000_poll - NAPI Rx polling callback
++ * @napi: struct associated with this polling callback
++ * @budget: amount of packets driver is allowed to process this poll
++ **/
++static int e1000_poll(struct napi_struct *napi, int budget)
++{
++	struct e1000_rx_ring *rx_ring = container_of(napi, struct e1000_rx_ring,
++						     napi);
++	struct e1000_adapter *adapter = rx_ring->adapter;
++	struct net_device *netdev = adapter->netdev;
++	int tx_clean_complete = 1, work_done = 0;
++	int i;
++
++	/* FIXME: i think this code is un-necessary when using base netdev */
++	/* Keep link state information with original netdev */
++	if (!rtnetif_carrier_ok(netdev))
++		goto quit_polling;
++
++	/* e1000_poll is called per-cpu.  This lock protects
++	 * tx_ring[i] from being cleaned by multiple cpus
++	 * simultaneously.  A failure obtaining the lock means
++	 * tx_ring[i] is currently being cleaned anyway. */
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++#ifdef CONFIG_E1000_MQ
++		if (spin_trylock(&adapter->tx_ring[i].tx_queue_lock)) {
++			tx_clean_complete &= e1000_clean_tx_irq(adapter,
++							&adapter->tx_ring[i]);
++			spin_unlock(&adapter->tx_ring[i].tx_queue_lock);
++		}
++#else
++		if (spin_trylock(&adapter->tx_queue_lock)) {
++			tx_clean_complete &= e1000_clean_tx_irq(adapter,
++							&adapter->tx_ring[i]);
++			spin_unlock(&adapter->tx_queue_lock);
++		}
++#endif
++	}
++
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		adapter->clean_rx(adapter, &adapter->rx_ring[i],
++				  &work_done, budget);
++	}
++
++	/* If no Tx and not enough Rx work done, exit the polling mode */
++	if ((tx_clean_complete && (work_done == 0)) ||
++	   !rtnetif_running(netdev)) {
++quit_polling:
++		if (likely(adapter->itr_setting & 3))
++			e1000_set_itr(adapter);
++		netif_rx_complete(netdev, napi);
++		if (test_bit(__E1000_DOWN, &adapter->state))
++			atomic_dec(&adapter->irq_sem);
++		else
++			e1000_irq_enable(adapter);
++		return 0;
++	}
++
++	/* need to make sure the stack is aware of a tx-only poll loop */
++	if (!tx_clean_complete)
++		work_done = budget;
++
++	return work_done;
++}
++
++#endif
++/**
++ * e1000_clean_tx_irq - Reclaim resources after transmit completes
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct e1000_tx_desc *tx_desc, *eop_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i, eop;
++#ifdef CONFIG_E1000_NAPI
++	unsigned int count = 0;
++#endif
++	bool cleaned = FALSE;
++	bool retval = TRUE;
++	unsigned int total_tx_bytes=0, total_tx_packets=0;
++
++
++	i = tx_ring->next_to_clean;
++	eop = tx_ring->buffer_info[i].next_to_watch;
++	eop_desc = E1000_TX_DESC(*tx_ring, eop);
++
++	while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
++		for (cleaned = FALSE; !cleaned; ) {
++			tx_desc = E1000_TX_DESC(*tx_ring, i);
++			buffer_info = &tx_ring->buffer_info[i];
++			cleaned = (i == eop);
++
++#ifdef CONFIG_E1000_MQ
++			tx_ring->tx_stats.bytes += buffer_info->length;
++#endif
++			if (cleaned) {
++				struct sk_buff *skb = buffer_info->skb;
++#ifdef NETIF_F_TSO
++				unsigned int segs, bytecount;
++				segs = skb_shinfo(skb)->gso_segs ?: 1;
++				/* multiply data chunks by size of headers */
++				bytecount = ((segs - 1) * skb_headlen(skb)) +
++					    skb->len;
++				total_tx_packets += segs;
++				total_tx_bytes += bytecount;
++#else
++				total_tx_packets++;
++				total_tx_bytes += skb->len;
++#endif
++			}
++			e1000_unmap_and_free_tx_resource(adapter, buffer_info);
++			tx_desc->upper.data = 0;
++
++			if (unlikely(++i == tx_ring->count)) i = 0;
++		}
++
++#ifdef CONFIG_E1000_MQ
++		tx_ring->tx_stats.packets++;
++#endif
++		eop = tx_ring->buffer_info[i].next_to_watch;
++		eop_desc = E1000_TX_DESC(*tx_ring, eop);
++#ifdef CONFIG_E1000_NAPI
++#define E1000_TX_WEIGHT 64
++		/* weight of a sort for tx, to avoid endless transmit cleanup */
++		if (count++ == E1000_TX_WEIGHT) {
++			retval = FALSE;
++			break;
++		}
++#endif
++	}
++
++	tx_ring->next_to_clean = i;
++
++#define TX_WAKE_THRESHOLD 32
++	if (unlikely(cleaned && rtnetif_carrier_ok(netdev) &&
++		     E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) {
++		/* Make sure that anybody stopping the queue after this
++		 * sees the new next_to_clean.
++		 */
++		smp_mb();
++
++		if (rtnetif_queue_stopped(netdev) &&
++		    !(test_bit(__E1000_DOWN, &adapter->state))) {
++			rtnetif_wake_queue(netdev);
++			++adapter->restart_queue;
++		}
++	}
++
++	if (adapter->detect_tx_hung) {
++		/* Detect a transmit hang in hardware, this serializes the
++		 * check with the clearing of time_stamp and movement of i */
++		adapter->detect_tx_hung = FALSE;
++		if (tx_ring->buffer_info[eop].dma &&
++		    time_after(jiffies, tx_ring->buffer_info[eop].time_stamp +
++			       (adapter->tx_timeout_factor * HZ))
++		    && !(E1000_READ_REG(&adapter->hw, E1000_STATUS) &
++			 E1000_STATUS_TXOFF)) {
++
++			/* detected Tx unit hang */
++			DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
++					"  Tx Queue             <%lu>\n"
++					"  TDH                  <%x>\n"
++					"  TDT                  <%x>\n"
++					"  next_to_use          <%x>\n"
++					"  next_to_clean        <%x>\n"
++					"buffer_info[next_to_clean]\n"
++					"  time_stamp           <%lx>\n"
++					"  next_to_watch        <%x>\n"
++					"  jiffies              <%lx>\n"
++					"  next_to_watch.status <%x>\n",
++				(unsigned long)((tx_ring - adapter->tx_ring) /
++					sizeof(struct e1000_tx_ring)),
++				readl(adapter->hw.hw_addr + tx_ring->tdh),
++				readl(adapter->hw.hw_addr + tx_ring->tdt),
++				tx_ring->next_to_use,
++				tx_ring->next_to_clean,
++				tx_ring->buffer_info[eop].time_stamp,
++				eop,
++				jiffies,
++				eop_desc->upper.fields.status);
++			rtnetif_stop_queue(netdev);
++		}
++	}
++	adapter->total_tx_bytes += total_tx_bytes;
++	adapter->total_tx_packets += total_tx_packets;
++	adapter->net_stats.tx_bytes += total_tx_bytes;
++	adapter->net_stats.tx_packets += total_tx_packets;
++	return retval;
++}
++
++/**
++ * e1000_rx_checksum - Receive Checksum Offload for 82543
++ * @adapter:     board private structure
++ * @status_err:  receive descriptor status and error fields
++ * @csum:        receive descriptor csum field
++ * @sk_buff:     socket buffer with received data
++ **/
++static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
++			      u32 csum, struct sk_buff *skb)
++{
++	u16 status = (u16)status_err;
++	u8 errors = (u8)(status_err >> 24);
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* 82543 or newer only */
++	if (unlikely(adapter->hw.mac.type < e1000_82543)) return;
++	/* Ignore Checksum bit is set */
++	if (unlikely(status & E1000_RXD_STAT_IXSM)) return;
++	/* TCP/UDP checksum error bit is set */
++	if (unlikely(errors & E1000_RXD_ERR_TCPE)) {
++		/* let the stack verify checksum errors */
++		adapter->hw_csum_err++;
++		return;
++	}
++	/* TCP/UDP Checksum has not been calculated */
++	if (adapter->hw.mac.type <= e1000_82547_rev_2) {
++		if (!(status & E1000_RXD_STAT_TCPCS))
++			return;
++	} else {
++		if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
++			return;
++	}
++	/* It must be a TCP or UDP packet with a valid checksum */
++	if (likely(status & E1000_RXD_STAT_TCPCS)) {
++		/* TCP checksum is good */
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	} else if (adapter->hw.mac.type > e1000_82547_rev_2) {
++		/* IP fragment with UDP payload */
++		/* Hardware complements the payload checksum, so we undo it
++		 * and then put the value in host order for further stack use.
++		 */
++		csum = ntohl(csum ^ 0xFFFF);
++		skb->csum = csum;
++		skb->ip_summed = CHECKSUM_COMPLETE;
++	}
++	adapter->hw_csum_good++;
++}
++
++/**
++ * e1000_receive_skb - helper function to handle rx indications
++ * @adapter: board private structure
++ * @status: descriptor status field as written by hardware
++ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
++ * @skb: pointer to sk_buff to be indicated to stack
++ **/
++static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status,
++			      u16 vlan, struct sk_buff *skb)
++{
++#ifdef CONFIG_E1000_NAPI
++#ifdef NETIF_F_HW_VLAN_TX
++	if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) {
++		vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
++					 le16_to_cpu(vlan) &
++					 E1000_RXD_SPC_VLAN_MASK);
++	} else {
++		netif_receive_skb(skb);
++	}
++#else
++	netif_receive_skb(skb);
++#endif
++#else /* CONFIG_E1000_NAPI */
++#ifdef NETIF_F_HW_VLAN_TX
++	if (unlikely(adapter->vlgrp && (status & E1000_RXD_STAT_VP))) {
++		vlan_hwaccel_rx(skb, adapter->vlgrp,
++				le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK);
++	} else {
++		netif_rx(skb);
++	}
++#else
++	rtnetif_rx(skb);
++#endif
++#endif /* CONFIG_E1000_NAPI */
++}
++
++#ifdef CONFIG_E1000_NAPI
++/* NOTE: these new jumbo frame routines rely on NAPI because of the
++ * pskb_may_pull call, which eventually must call kmap_atomic which you cannot
++ * call from hard irq context */
++
++/**
++ * e1000_consume_page - helper function
++ **/
++static void e1000_consume_page(struct e1000_rx_buffer *bi, struct sk_buff *skb,
++			       u16 length)
++{
++	bi->page = NULL;
++	skb->len += length;
++	skb->data_len += length;
++	skb->truesize += length;
++}
++
++/**
++ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
++					  struct e1000_rx_ring *rx_ring,
++					  int *work_done, int work_to_do)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc, *next_rxd;
++	struct e1000_rx_buffer *buffer_info, *next_buffer;
++	unsigned long irq_flags;
++	u32 length;
++	unsigned int i;
++	int cleaned_count = 0;
++	bool cleaned = FALSE;
++	unsigned int total_rx_bytes=0, total_rx_packets=0;
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC(*rx_ring, i);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (rx_desc->status & E1000_RXD_STAT_DD) {
++		struct sk_buff *skb;
++		u8 status;
++
++		if (*work_done >= work_to_do)
++			break;
++		(*work_done)++;
++
++		status = rx_desc->status;
++		skb = buffer_info->skb;
++		buffer_info->skb = NULL;
++
++		if (++i == rx_ring->count) i = 0;
++		next_rxd = E1000_RX_DESC(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++
++		cleaned = TRUE;
++		cleaned_count++;
++		pci_unmap_page(pdev,
++			       buffer_info->dma,
++			       PAGE_SIZE,
++			       PCI_DMA_FROMDEVICE);
++		buffer_info->dma = 0;
++
++		length = le16_to_cpu(rx_desc->length);
++
++		/* errors is only valid for DD + EOP descriptors */
++		if (unlikely((status & E1000_RXD_STAT_EOP) &&
++		    (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
++			u8 last_byte = *(skb->data + length - 1);
++			if (TBI_ACCEPT(&adapter->hw, status,
++				      rx_desc->errors, length, last_byte,
++				      adapter->min_frame_size,
++				      adapter->max_frame_size)) {
++				rtdm_lock_get_irqsave(&adapter->stats_lock,
++						  irq_flags);
++				e1000_tbi_adjust_stats_82543(&adapter->hw,
++						      &adapter->stats,
++						      length, skb->data,
++						      adapter->max_frame_size);
++				rtdm_lock_put_irqrestore(&adapter->stats_lock,
++						       irq_flags);
++				length--;
++			} else {
++				/* recycle both page and skb */
++				buffer_info->skb = skb;
++				/* an error means any chain goes out the window
++				 * too */
++				if (rx_ring->rx_skb_top)
++					kfree_rtskb(rx_ring->rx_skb_top);
++				rx_ring->rx_skb_top = NULL;
++				goto next_desc;
++			}
++		}
++
++#define rxtop rx_ring->rx_skb_top
++		if (!(status & E1000_RXD_STAT_EOP)) {
++			/* this descriptor is only the beginning (or middle) */
++			if (!rxtop) {
++				/* this is the beginning of a chain */
++				rxtop = skb;
++				skb_fill_page_desc(rxtop, 0, buffer_info->page,
++						   0, length);
++			} else {
++				/* this is the middle of a chain */
++				skb_fill_page_desc(rxtop,
++				    skb_shinfo(rxtop)->nr_frags,
++				    buffer_info->page, 0, length);
++				/* re-use the skb, only consumed the page */
++				buffer_info->skb = skb;
++			}
++			e1000_consume_page(buffer_info, rxtop, length);
++			goto next_desc;
++		} else {
++			if (rxtop) {
++				/* end of the chain */
++				skb_fill_page_desc(rxtop,
++				    skb_shinfo(rxtop)->nr_frags,
++				    buffer_info->page, 0, length);
++				/* re-use the current skb, we only consumed the
++				 * page */
++				buffer_info->skb = skb;
++				skb = rxtop;
++				rxtop = NULL;
++				e1000_consume_page(buffer_info, skb, length);
++			} else {
++				/* no chain, got EOP, this buf is the packet
++				 * copybreak to save the put_page/alloc_page */
++				if (length <= copybreak &&
++				    skb_tailroom(skb) >= length) {
++					u8 *vaddr;
++					vaddr = kmap_atomic(buffer_info->page,
++							   KM_SKB_DATA_SOFTIRQ);
++					memcpy(skb_tail_pointer(skb), vaddr, length);
++					kunmap_atomic(vaddr,
++						      KM_SKB_DATA_SOFTIRQ);
++					/* re-use the page, so don't erase
++					 * buffer_info->page */
++					rtskb_put(skb, length);
++				} else {
++					skb_fill_page_desc(skb, 0,
++							   buffer_info->page, 0,
++							   length);
++					e1000_consume_page(buffer_info, skb,
++							   length);
++				}
++			}
++		}
++
++		/* Receive Checksum Offload XXX recompute due to CRC strip? */
++		e1000_rx_checksum(adapter,
++				  (u32)(status) |
++				  ((u32)(rx_desc->errors) << 24),
++				  le16_to_cpu(rx_desc->csum), skb);
++
++		pskb_trim(skb, skb->len - 4);
++
++		/* probably a little skewed due to removing CRC */
++		total_rx_bytes += skb->len;
++		total_rx_packets++;
++
++		/* eth type trans needs skb->data to point to something */
++		if (!pskb_may_pull(skb, ETH_HLEN)) {
++			DPRINTK(DRV, ERR, "__pskb_pull_tail failed.\n");
++			kfree_rtskb(skb);
++			goto next_desc;
++		}
++
++		skb->protocol = rt_eth_type_trans(skb, netdev);
++
++		e1000_receive_skb(adapter, status, rx_desc->special, skb);
++	adapter->data_received = 1; // Set flag for the main interrupt routine
++
++		netdev->last_rx = jiffies;
++#ifdef CONFIG_E1000_MQ
++		rx_ring->rx_stats.packets++;
++		rx_ring->rx_stats.bytes += length;
++#endif
++
++next_desc:
++		rx_desc->status = 0;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
++			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = E1000_DESC_UNUSED(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++
++	adapter->total_rx_packets += total_rx_packets;
++	adapter->total_rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_packets += total_rx_packets;
++	return cleaned;
++}
++#endif /* NAPI */
++
++
++/**
++ * e1000_clean_rx_irq - Send received data up the network stack; legacy
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++#ifdef CONFIG_E1000_NAPI
++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring,
++				    int *work_done, int work_to_do)
++#else
++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring,
++				    nanosecs_abs_t *time_stamp)
++#endif
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc, *next_rxd;
++	struct e1000_rx_buffer *buffer_info, *next_buffer;
++	u32 length;
++	unsigned int i;
++	int cleaned_count = 0;
++	bool cleaned = FALSE;
++	unsigned int total_rx_bytes=0, total_rx_packets=0;
++
++	// rtdm_printk("<2> e1000_clean_rx_irq %i\n", __LINE__);
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC(*rx_ring, i);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (rx_desc->status & E1000_RXD_STAT_DD) {
++		struct sk_buff *skb;
++		u8 status;
++
++#ifdef CONFIG_E1000_NAPI
++		if (*work_done >= work_to_do)
++			break;
++		(*work_done)++;
++#endif
++		status = rx_desc->status;
++		skb = buffer_info->skb;
++		buffer_info->skb = NULL;
++
++		prefetch(skb->data - NET_IP_ALIGN);
++
++		if (++i == rx_ring->count) i = 0;
++		next_rxd = E1000_RX_DESC(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++
++		cleaned = TRUE;
++		cleaned_count++;
++		pci_unmap_single(pdev,
++				 buffer_info->dma,
++				 adapter->rx_buffer_len,
++				 PCI_DMA_FROMDEVICE);
++		buffer_info->dma = 0;
++
++		length = le16_to_cpu(rx_desc->length);
++
++		/* !EOP means multiple descriptors were used to store a single
++		 * packet, also make sure the frame isn't just CRC only */
++		if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) {
++			/* All receives must fit into a single buffer */
++			E1000_DBG("%s: Receive packet consumed multiple"
++				  " buffers\n", netdev->name);
++			/* recycle */
++			buffer_info->skb = skb;
++			goto next_desc;
++		}
++
++		if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
++			u8 last_byte = *(skb->data + length - 1);
++			if (TBI_ACCEPT(&adapter->hw, status,
++				      rx_desc->errors, length, last_byte,
++				      adapter->min_frame_size,
++				      adapter->max_frame_size)) {
++				length--;
++			} else {
++				/* recycle */
++				buffer_info->skb = skb;
++				goto next_desc;
++			}
++		}
++
++		/* adjust length to remove Ethernet CRC, this must be
++		 * done after the TBI_ACCEPT workaround above */
++		length -= 4;
++
++		/* probably a little skewed due to removing CRC */
++		total_rx_bytes += length;
++		total_rx_packets++;
++
++		rtskb_put(skb, length);
++
++		/* Receive Checksum Offload */
++		e1000_rx_checksum(adapter,
++				  (u32)(status) |
++				  ((u32)(rx_desc->errors) << 24),
++				  le16_to_cpu(rx_desc->csum), skb);
++
++		skb->protocol = rt_eth_type_trans(skb, netdev);
++	skb->time_stamp = *time_stamp;
++
++		e1000_receive_skb(adapter, status, rx_desc->special, skb);
++	adapter->data_received = 1; // Set flag for the main interrupt routine
++
++		// netdev->last_rx = jiffies;
++#ifdef CONFIG_E1000_MQ
++		rx_ring->rx_stats.packets++;
++		rx_ring->rx_stats.bytes += length;
++#endif
++
++next_desc:
++		rx_desc->status = 0;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
++			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = E1000_DESC_UNUSED(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++
++	adapter->total_rx_packets += total_rx_packets;
++	adapter->total_rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_packets += total_rx_packets;
++	return cleaned;
++}
++
++/**
++ * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++#ifdef CONFIG_E1000_NAPI
++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
++				       struct e1000_rx_ring *rx_ring,
++				       int *work_done, int work_to_do)
++#else
++static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
++				       struct e1000_rx_ring *rx_ring,
++				       nanosecs_abs_t *time_stamp)
++#endif
++{
++#ifdef CONFIG_E1000_DISABLE_PACKET_SPLIT
++    return true;
++
++#else
++
++	union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_buffer *buffer_info, *next_buffer;
++	struct e1000_ps_page *ps_page;
++	struct e1000_ps_page_dma *ps_page_dma;
++	struct sk_buff *skb;
++	unsigned int i, j;
++	u32 length, staterr;
++	int cleaned_count = 0;
++	bool cleaned = FALSE;
++	unsigned int total_rx_bytes=0, total_rx_packets=0;
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
++	staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (staterr & E1000_RXD_STAT_DD) {
++		ps_page = &rx_ring->ps_page[i];
++		ps_page_dma = &rx_ring->ps_page_dma[i];
++#ifdef CONFIG_E1000_NAPI
++		if (unlikely(*work_done >= work_to_do))
++			break;
++		(*work_done)++;
++#endif
++		skb = buffer_info->skb;
++
++		/* in the packet split case this is header only */
++		prefetch(skb->data - NET_IP_ALIGN);
++
++		if (++i == rx_ring->count) i = 0;
++		next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++
++		cleaned = TRUE;
++		cleaned_count++;
++		pci_unmap_single(pdev, buffer_info->dma,
++				 adapter->rx_ps_bsize0,
++				 PCI_DMA_FROMDEVICE);
++		buffer_info->dma = 0;
++
++		if (unlikely(!(staterr & E1000_RXD_STAT_EOP))) {
++			E1000_DBG("%s: Packet Split buffers didn't pick up"
++				  " the full packet\n", netdev->name);
++			dev_kfree_skb_irq(skb);
++			goto next_desc;
++		}
++
++		if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
++			dev_kfree_skb_irq(skb);
++			goto next_desc;
++		}
++
++		length = le16_to_cpu(rx_desc->wb.middle.length0);
++
++		if (unlikely(!length)) {
++			E1000_DBG("%s: Last part of the packet spanning"
++				  " multiple descriptors\n", netdev->name);
++			dev_kfree_skb_irq(skb);
++			goto next_desc;
++		}
++
++		/* Good Receive */
++		rtskb_put(skb, length);
++#ifdef CONFIG_E1000_MQ
++		rx_ring->rx_stats.packets++;
++		rx_ring->rx_stats.bytes += skb->len;
++#endif
++
++#ifdef CONFIG_E1000_NAPI
++		{
++		/* this looks ugly, but it seems compiler issues make it
++		   more efficient than reusing j */
++		int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]);
++
++		/* page alloc/put takes too long and effects small packet
++		 * throughput, so unsplit small packets and save the alloc/put
++		 * only valid in softirq (napi) context to call kmap_* */
++		if (l1 && (l1 <= copybreak) &&
++		    ((length + l1) <= adapter->rx_ps_bsize0)) {
++			u8 *vaddr;
++			/* there is no documentation about how to call
++			 * kmap_atomic, so we can't hold the mapping
++			 * very long */
++			pci_dma_sync_single_for_cpu(pdev,
++				ps_page_dma->ps_page_dma[0],
++				PAGE_SIZE,
++				PCI_DMA_FROMDEVICE);
++			vaddr = kmap_atomic(ps_page->ps_page[0],
++					    KM_SKB_DATA_SOFTIRQ);
++			memcpy(skb_tail_pointer(skb), vaddr, l1);
++			kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
++			pci_dma_sync_single_for_device(pdev,
++				ps_page_dma->ps_page_dma[0],
++				PAGE_SIZE, PCI_DMA_FROMDEVICE);
++			/* remove the CRC */
++			l1 -= 4;
++			rtskb_put(skb, l1);
++			goto copydone;
++		} /* if */
++		}
++#endif
++
++		for (j = 0; j < adapter->rx_ps_pages; j++) {
++			if (!(length= le16_to_cpu(rx_desc->wb.upper.length[j])))
++				break;
++			pci_unmap_page(pdev, ps_page_dma->ps_page_dma[j],
++					PAGE_SIZE, PCI_DMA_FROMDEVICE);
++			ps_page_dma->ps_page_dma[j] = 0;
++			skb_fill_page_desc(skb, j, ps_page->ps_page[j], 0,
++					   length);
++			ps_page->ps_page[j] = NULL;
++			skb->len += length;
++			skb->data_len += length;
++			skb->truesize += length;
++		}
++
++		/* strip the ethernet crc, problem is we're using pages now so
++		 * this whole operation can get a little cpu intensive */
++		pskb_trim(skb, skb->len - 4);
++
++#ifdef CONFIG_E1000_NAPI
++copydone:
++#endif
++		total_rx_bytes += skb->len;
++		total_rx_packets++;
++
++		e1000_rx_checksum(adapter, staterr,
++				  le16_to_cpu(rx_desc->wb.lower.hi_dword.csum_ip.csum), skb);
++		skb->protocol = rt_eth_type_trans(skb, netdev);
++
++		if (likely(rx_desc->wb.upper.header_status &
++			   cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP)))
++			adapter->rx_hdr_split++;
++
++		e1000_receive_skb(adapter, staterr, rx_desc->wb.middle.vlan,
++				  skb);
++		netdev->last_rx = jiffies;
++
++next_desc:
++		rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF);
++		buffer_info->skb = NULL;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
++			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++
++		staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = E1000_DESC_UNUSED(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++
++	adapter->total_rx_packets += total_rx_packets;
++	adapter->total_rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_packets += total_rx_packets;
++	return cleaned;
++#endif
++}
++
++#ifdef CONFIG_E1000_NAPI
++/**
++ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
++ * @adapter: address of board private structure
++ * @rx_ring: pointer to receive ring structure
++ * @cleaned_count: number of buffers to allocate this pass
++ **/
++static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
++					 struct e1000_rx_ring *rx_ring,
++					 int cleaned_count)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc;
++	struct e1000_rx_buffer *buffer_info;
++	struct sk_buff *skb;
++	unsigned int i;
++	unsigned int bufsz = 256 -
++			     16 /*for skb_reserve */ -
++			     NET_IP_ALIGN;
++
++	i = rx_ring->next_to_use;
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (cleaned_count--) {
++		skb = buffer_info->skb;
++		if (skb) {
++			skb_trim(skb, 0);
++			goto check_page;
++		}
++
++		skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++		if (unlikely(!skb)) {
++			/* Better luck next round */
++			adapter->alloc_rx_buff_failed++;
++			break;
++		}
++
++		/* Fix for errata 23, can't cross 64kB boundary */
++		if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++			struct sk_buff *oldskb = skb;
++			DPRINTK(PROBE, ERR, "skb align check failed: %u bytes "
++					     "at %p\n", bufsz, skb->data);
++			/* Try again, without freeing the previous */
++			skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++			/* Failed allocation, critical failure */
++			if (!skb) {
++				kfree_rtskb(oldskb);
++				adapter->alloc_rx_buff_failed++;
++				break;
++			}
++
++			if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++				/* give up */
++				kfree_rtskb(skb);
++				kfree_rtskb(oldskb);
++				adapter->alloc_rx_buff_failed++;
++				break; /* while !buffer_info->skb */
++			}
++
++			/* Use new allocation */
++			kfree_rtskb(oldskb);
++		}
++		/* Make buffer alignment 2 beyond a 16 byte boundary
++		 * this will result in a 16 byte aligned IP header after
++		 * the 14 byte MAC header is removed
++		 */
++		skb_reserve(skb, NET_IP_ALIGN);
++
++		buffer_info->skb = skb;
++check_page:
++		/* allocate a new page if necessary */
++		if (!buffer_info->page) {
++			buffer_info->page = alloc_page(GFP_ATOMIC);
++			if (unlikely(!buffer_info->page)) {
++				adapter->alloc_rx_buff_failed++;
++				break;
++			}
++		}
++
++		if (!buffer_info->dma)
++			buffer_info->dma = pci_map_page(pdev,
++							buffer_info->page, 0,
++							PAGE_SIZE,
++							PCI_DMA_FROMDEVICE);
++
++		rx_desc = E1000_RX_DESC(*rx_ring, i);
++		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++
++		if (unlikely(++i == rx_ring->count))
++			i = 0;
++		buffer_info = &rx_ring->buffer_info[i];
++	}
++
++	if (likely(rx_ring->next_to_use != i)) {
++		rx_ring->next_to_use = i;
++		if (unlikely(i-- == 0))
++			i = (rx_ring->count - 1);
++
++		/* Force memory writes to complete before letting h/w
++		 * know there are new descriptors to fetch.  (Only
++		 * applicable for weak-ordered memory model archs,
++		 * such as IA-64). */
++		wmb();
++		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
++	}
++}
++#endif /* NAPI */
++
++/**
++ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
++ * @adapter: address of board private structure
++ **/
++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
++				   struct e1000_rx_ring *rx_ring,
++				   int cleaned_count)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc;
++	struct e1000_rx_buffer *buffer_info;
++	struct sk_buff *skb;
++	unsigned int i;
++	unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
++
++	i = rx_ring->next_to_use;
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (cleaned_count--) {
++		skb = buffer_info->skb;
++		if (skb) {
++			rtskb_trim(skb, 0);
++			goto map_skb;
++		}
++
++		skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++		if (unlikely(!skb)) {
++			/* Better luck next round */
++			adapter->alloc_rx_buff_failed++;
++			break;
++		}
++
++		/* Fix for errata 23, can't cross 64kB boundary */
++		if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++			struct sk_buff *oldskb = skb;
++			DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes "
++					     "at %p\n", bufsz, skb->data);
++			/* Try again, without freeing the previous */
++			skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++			/* Failed allocation, critical failure */
++			if (!skb) {
++				kfree_rtskb(oldskb);
++				adapter->alloc_rx_buff_failed++;
++				break;
++			}
++
++			if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++				/* give up */
++				kfree_rtskb(skb);
++				kfree_rtskb(oldskb);
++				adapter->alloc_rx_buff_failed++;
++				break; /* while !buffer_info->skb */
++			}
++
++			/* Use new allocation */
++			kfree_rtskb(oldskb);
++		}
++		/* Make buffer alignment 2 beyond a 16 byte boundary
++		 * this will result in a 16 byte aligned IP header after
++		 * the 14 byte MAC header is removed
++		 */
++		skb_reserve(skb, NET_IP_ALIGN);
++
++		buffer_info->skb = skb;
++map_skb:
++		buffer_info->dma = pci_map_single(pdev,
++						  skb->data,
++						  adapter->rx_buffer_len,
++						  PCI_DMA_FROMDEVICE);
++
++		/* Fix for errata 23, can't cross 64kB boundary */
++		if (!e1000_check_64k_bound(adapter,
++					(void *)(unsigned long)buffer_info->dma,
++					adapter->rx_buffer_len)) {
++			DPRINTK(RX_ERR, ERR,
++				"dma align check failed: %u bytes at %p\n",
++				adapter->rx_buffer_len,
++				(void *)(unsigned long)buffer_info->dma);
++			kfree_rtskb(skb);
++			buffer_info->skb = NULL;
++
++			pci_unmap_single(pdev, buffer_info->dma,
++					 adapter->rx_buffer_len,
++					 PCI_DMA_FROMDEVICE);
++			buffer_info->dma = 0;
++
++			adapter->alloc_rx_buff_failed++;
++			break; /* while !buffer_info->skb */
++		}
++		rx_desc = E1000_RX_DESC(*rx_ring, i);
++		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++
++		if (unlikely(++i == rx_ring->count))
++			i = 0;
++		buffer_info = &rx_ring->buffer_info[i];
++	}
++
++	if (likely(rx_ring->next_to_use != i)) {
++		rx_ring->next_to_use = i;
++		if (unlikely(i-- == 0))
++			i = (rx_ring->count - 1);
++
++		/* Force memory writes to complete before letting h/w
++		 * know there are new descriptors to fetch.  (Only
++		 * applicable for weak-ordered memory model archs,
++		 * such as IA-64). */
++		wmb();
++		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
++	}
++}
++
++/**
++ * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split
++ * @adapter: address of board private structure
++ **/
++static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
++				      struct e1000_rx_ring *rx_ring,
++				      int cleaned_count)
++{
++}
++
++/**
++ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
++ * @adapter:
++ **/
++static void e1000_smartspeed(struct e1000_adapter *adapter)
++{
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	struct e1000_phy_info *phy = &adapter->hw.phy;
++	u16 phy_status;
++	u16 phy_ctrl;
++
++	if ((phy->type != e1000_phy_igp) || !mac->autoneg ||
++	    !(phy->autoneg_advertised & ADVERTISE_1000_FULL))
++		return;
++
++	if (adapter->smartspeed == 0) {
++		/* If Master/Slave config fault is asserted twice,
++		 * we assume back-to-back */
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status);
++		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status);
++		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl);
++		if (phy_ctrl & CR_1000T_MS_ENABLE) {
++			phy_ctrl &= ~CR_1000T_MS_ENABLE;
++			e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
++					    phy_ctrl);
++			adapter->smartspeed++;
++			if (!e1000_phy_setup_autoneg(&adapter->hw) &&
++			   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL,
++					       &phy_ctrl)) {
++				phy_ctrl |= (MII_CR_AUTO_NEG_EN |
++					     MII_CR_RESTART_AUTO_NEG);
++				e1000_write_phy_reg(&adapter->hw, PHY_CONTROL,
++						    phy_ctrl);
++			}
++		}
++		return;
++	} else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) {
++		/* If still no link, perhaps using 2/3 pair cable */
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl);
++		phy_ctrl |= CR_1000T_MS_ENABLE;
++		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl);
++		if (!e1000_phy_setup_autoneg(&adapter->hw) &&
++		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_ctrl)) {
++			phy_ctrl |= (MII_CR_AUTO_NEG_EN |
++				     MII_CR_RESTART_AUTO_NEG);
++			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_ctrl);
++		}
++	}
++	/* Restart process after E1000_SMARTSPEED_MAX iterations */
++	if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX)
++		adapter->smartspeed = 0;
++}
++
++/**
++ * e1000_ioctl -
++ * @netdev:
++ * @ifreq:
++ * @cmd:
++ **/
++#if 0
++static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
++{
++	switch (cmd) {
++#ifdef SIOCGMIIPHY
++	case SIOCGMIIPHY:
++	case SIOCGMIIREG:
++	case SIOCSMIIREG:
++		return e1000_mii_ioctl(netdev, ifr, cmd);
++#endif
++#ifdef ETHTOOL_OPS_COMPAT
++	case SIOCETHTOOL:
++		return ethtool_ioctl(ifr);
++#endif
++	default:
++		return -EOPNOTSUPP;
++	}
++}
++
++#ifdef SIOCGMIIPHY
++/**
++ * e1000_mii_ioctl -
++ * @netdev:
++ * @ifreq:
++ * @cmd:
++ **/
++static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
++			   int cmd)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct mii_ioctl_data *data = if_mii(ifr);
++
++	if (adapter->hw.phy.media_type != e1000_media_type_copper)
++		return -EOPNOTSUPP;
++
++	switch (cmd) {
++	case SIOCGMIIPHY:
++		data->phy_id = adapter->hw.phy.addr;
++		break;
++	case SIOCGMIIREG:
++		if (!capable(CAP_NET_ADMIN))
++			return -EPERM;
++		switch (data->reg_num & 0x1F) {
++		case MII_BMCR:
++			data->val_out = adapter->phy_regs.bmcr;
++			break;
++		case MII_BMSR:
++			data->val_out = adapter->phy_regs.bmsr;
++			break;
++		case MII_PHYSID1:
++			data->val_out = (adapter->hw.phy.id >> 16);
++			break;
++		case MII_PHYSID2:
++			data->val_out = (adapter->hw.phy.id & 0xFFFF);
++			break;
++		case MII_ADVERTISE:
++			data->val_out = adapter->phy_regs.advertise;
++			break;
++		case MII_LPA:
++			data->val_out = adapter->phy_regs.lpa;
++			break;
++		case MII_EXPANSION:
++			data->val_out = adapter->phy_regs.expansion;
++			break;
++		case MII_CTRL1000:
++			data->val_out = adapter->phy_regs.ctrl1000;
++			break;
++		case MII_STAT1000:
++			data->val_out = adapter->phy_regs.stat1000;
++			break;
++		case MII_ESTATUS:
++			data->val_out = adapter->phy_regs.estatus;
++			break;
++		default:
++			return -EIO;
++		}
++		break;
++	case SIOCSMIIREG:
++	default:
++		return -EOPNOTSUPP;
++	}
++	return E1000_SUCCESS;
++}
++#endif
++#endif
++
++void e1000_pci_set_mwi(struct e1000_hw *hw)
++{
++	struct e1000_adapter *adapter = hw->back;
++	int ret_val = pci_set_mwi(adapter->pdev);
++
++	if (ret_val)
++		DPRINTK(PROBE, ERR, "Error in setting MWI\n");
++}
++
++void e1000_pci_clear_mwi(struct e1000_hw *hw)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++	pci_clear_mwi(adapter->pdev);
++}
++
++void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++	pci_read_config_word(adapter->pdev, reg, value);
++}
++
++void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++	pci_write_config_word(adapter->pdev, reg, *value);
++}
++
++s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct e1000_adapter *adapter = hw->back;
++	u16 cap_offset;
++
++	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
++	if (!cap_offset)
++		return -E1000_ERR_CONFIG;
++
++	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
++
++	return E1000_SUCCESS;
++}
++
++#ifdef NETIF_F_HW_VLAN_TX
++static void e1000_vlan_rx_register(struct net_device *netdev,
++				   struct vlan_group *grp)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	u32 ctrl, rctl;
++
++	e1000_irq_disable(adapter);
++	adapter->vlgrp = grp;
++
++	if (grp) {
++		/* enable VLAN tag insert/strip */
++		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++		ctrl |= E1000_CTRL_VME;
++		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
++
++		if ((adapter->hw.mac.type != e1000_ich8lan) &&
++		    (adapter->hw.mac.type != e1000_ich9lan)) {
++			/* enable VLAN receive filtering */
++			rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++			rctl |= E1000_RCTL_VFE;
++			rctl &= ~E1000_RCTL_CFIEN;
++			E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++			e1000_update_mng_vlan(adapter);
++		}
++	} else {
++		/* disable VLAN tag insert/strip */
++		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++		ctrl &= ~E1000_CTRL_VME;
++		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
++
++		if ((adapter->hw.mac.type != e1000_ich8lan) &&
++		    (adapter->hw.mac.type != e1000_ich9lan)) {
++			/* disable VLAN filtering */
++			rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++			rctl &= ~E1000_RCTL_VFE;
++			E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++			if (adapter->mng_vlan_id !=
++			    (u16)E1000_MNG_VLAN_NONE) {
++				e1000_vlan_rx_kill_vid(netdev,
++						       adapter->mng_vlan_id);
++				adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++			}
++		}
++	}
++
++	e1000_irq_enable(adapter);
++}
++
++static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	u32 vfta, index;
++	struct net_device *v_netdev;
++
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
++	    (vid == adapter->mng_vlan_id))
++		return;
++	/* add VID to filter table */
++	index = (vid >> 5) & 0x7F;
++	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
++	vfta |= (1 << (vid & 0x1F));
++	e1000_write_vfta(&adapter->hw, index, vfta);
++	/* Copy feature flags from netdev to the vlan netdev for this vid.
++	 * This allows things like TSO to bubble down to our vlan device.
++	 */
++	v_netdev = vlan_group_get_device(adapter->vlgrp, vid);
++	v_netdev->features |= adapter->netdev->features;
++	vlan_group_set_device(adapter->vlgrp, vid, v_netdev);
++}
++
++static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	u32 vfta, index;
++
++	e1000_irq_disable(adapter);
++	vlan_group_set_device(adapter->vlgrp, vid, NULL);
++	e1000_irq_enable(adapter);
++
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
++	    (vid == adapter->mng_vlan_id)) {
++		/* release control to f/w */
++		e1000_release_hw_control(adapter);
++		return;
++	}
++
++	/* remove VID from filter table */
++	index = (vid >> 5) & 0x7F;
++	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
++	vfta &= ~(1 << (vid & 0x1F));
++	e1000_write_vfta(&adapter->hw, index, vfta);
++}
++
++static void e1000_restore_vlan(struct e1000_adapter *adapter)
++{
++	e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp);
++
++	if (adapter->vlgrp) {
++		u16 vid;
++		for (vid = 0; vid < VLAN_N_VID; vid++) {
++			if (!vlan_group_get_device(adapter->vlgrp, vid))
++				continue;
++			e1000_vlan_rx_add_vid(adapter->netdev, vid);
++		}
++	}
++}
++#endif
++
++int e1000_set_spd_dplx(struct e1000_adapter *adapter, u16 spddplx)
++{
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++
++	mac->autoneg = 0;
++
++	/* Fiber NICs only allow 1000 gbps Full duplex */
++	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) &&
++		spddplx != (SPEED_1000 + DUPLEX_FULL)) {
++		DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n");
++		return -EINVAL;
++	}
++
++	switch (spddplx) {
++	case SPEED_10 + DUPLEX_HALF:
++		mac->forced_speed_duplex = ADVERTISE_10_HALF;
++		break;
++	case SPEED_10 + DUPLEX_FULL:
++		mac->forced_speed_duplex = ADVERTISE_10_FULL;
++		break;
++	case SPEED_100 + DUPLEX_HALF:
++		mac->forced_speed_duplex = ADVERTISE_100_HALF;
++		break;
++	case SPEED_100 + DUPLEX_FULL:
++		mac->forced_speed_duplex = ADVERTISE_100_FULL;
++		break;
++	case SPEED_1000 + DUPLEX_FULL:
++		mac->autoneg = 1;
++		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	case SPEED_1000 + DUPLEX_HALF: /* not supported */
++	default:
++		DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n");
++		return -EINVAL;
++	}
++	return 0;
++}
++
++#ifdef USE_REBOOT_NOTIFIER
++/* only want to do this for 2.4 kernels? */
++static int e1000_notify_reboot(struct notifier_block *nb,
++			       unsigned long event, void *p)
++{
++	struct pci_dev *pdev = NULL;
++
++	switch (event) {
++	case SYS_DOWN:
++	case SYS_HALT:
++	case SYS_POWER_OFF:
++		while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
++			if (pci_dev_driver(pdev) == &e1000_driver)
++				e1000_suspend(pdev, PMSG_SUSPEND);
++		}
++	}
++	return NOTIFY_DONE;
++}
++#endif
++
++#ifdef CONFIG_PM
++static int e1000_resume(struct pci_dev *pdev)
++{
++	struct net_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	u32 err;
++
++	pci_set_power_state(pdev, PCI_D0);
++	pci_restore_state(pdev);
++	if ((err = pci_enable_device(pdev))) {
++		printk(KERN_ERR "e1000: Cannot enable PCI device from suspend\n");
++		return err;
++	}
++	pci_set_master(pdev);
++
++	pci_enable_wake(pdev, PCI_D3hot, 0);
++	pci_enable_wake(pdev, PCI_D3cold, 0);
++
++	if (rtnetif_running(netdev) && (err = e1000_request_irq(adapter)))
++		return err;
++
++	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++		e1000_power_up_phy(&adapter->hw);
++		e1000_setup_link(&adapter->hw);
++	}
++	e1000_reset(adapter);
++	E1000_WRITE_REG(&adapter->hw, E1000_WUS, ~0);
++
++	e1000_init_manageability(adapter);
++
++	if (rtnetif_running(netdev))
++		e1000_up(adapter);
++
++	netif_device_attach(netdev);
++
++	/* If the controller is 82573 or ICHx and f/w is AMT, do not set
++	 * DRV_LOAD until the interface is up.  For all other cases,
++	 * let the f/w know that the h/w is now under the control
++	 * of the driver. */
++	if (((adapter->hw.mac.type != e1000_82573) &&
++	     (adapter->hw.mac.type != e1000_ich8lan) &&
++	     (adapter->hw.mac.type != e1000_ich9lan)) ||
++	    !e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	return 0;
++}
++#endif
++
++#ifdef CONFIG_NET_POLL_CONTROLLER
++/*
++ * Polling 'interrupt' - used by things like netconsole to send skbs
++ * without having to re-enable interrupts. It's not called while
++ * the interrupt routine is executing.
++ */
++static void e1000_netpoll(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	int i;
++
++	disable_irq(adapter->pdev->irq);
++	e1000_intr(adapter->pdev->irq, netdev);
++
++	for (i = 0; i < adapter->num_tx_queues ; i++ )
++		e1000_clean_tx_irq(adapter, &adapter->tx_ring[i]);
++#ifndef CONFIG_E1000_NAPI
++	for (i = 0; i < adapter->num_rx_queues ; i++ )
++		adapter->clean_rx(adapter, &adapter->rx_ring[i], NULL);
++#endif
++	enable_irq(adapter->pdev->irq);
++}
++#endif
++
++#ifdef HAVE_PCI_ERS
++/**
++ * e1000_io_error_detected - called when PCI error is detected
++ * @pdev: Pointer to PCI device
++ * @state: The current pci connection state
++ *
++ * This function is called after a PCI bus error affecting
++ * this device has been detected.
++ */
++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
++						pci_channel_state_t state)
++{
++	struct net_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	netif_device_detach(netdev);
++
++	if (rtnetif_running(netdev))
++		e1000_down(adapter);
++	pci_disable_device(pdev);
++
++	/* Request a slot slot reset. */
++	return PCI_ERS_RESULT_NEED_RESET;
++}
++
++/**
++ * e1000_io_slot_reset - called after the pci bus has been reset.
++ * @pdev: Pointer to PCI device
++ *
++ * Restart the card from scratch, as if from a cold-boot. Implementation
++ * resembles the first-half of the e1000_resume routine.
++ */
++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
++{
++	struct net_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	if (pci_enable_device(pdev)) {
++		printk(KERN_ERR "e1000: Cannot re-enable PCI device after reset.\n");
++		return PCI_ERS_RESULT_DISCONNECT;
++	}
++	pci_set_master(pdev);
++
++	pci_enable_wake(pdev, PCI_D3hot, 0);
++	pci_enable_wake(pdev, PCI_D3cold, 0);
++
++	e1000_reset(adapter);
++	E1000_WRITE_REG(&adapter->hw, E1000_WUS, ~0);
++
++	return PCI_ERS_RESULT_RECOVERED;
++}
++
++/**
++ * e1000_io_resume - called when traffic can start flowing again.
++ * @pdev: Pointer to PCI device
++ *
++ * This callback is called when the error recovery driver tells us that
++ * its OK to resume normal operation. Implementation resembles the
++ * second-half of the e1000_resume routine.
++ */
++static void e1000_io_resume(struct pci_dev *pdev)
++{
++	struct net_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	e1000_init_manageability(adapter);
++
++	if (rtnetif_running(netdev)) {
++		if (e1000_up(adapter)) {
++			printk("e1000: can't bring device back up after reset\n");
++			return;
++		}
++	}
++
++	netif_device_attach(netdev);
++
++	/* If the controller is 82573 or ICHx and f/w is AMT, do not set
++	 * DRV_LOAD until the interface is up.  For all other cases,
++	 * let the f/w know that the h/w is now under the control
++	 * of the driver. */
++	if (((adapter->hw.mac.type != e1000_82573) &&
++	     (adapter->hw.mac.type != e1000_ich8lan) &&
++	     (adapter->hw.mac.type != e1000_ich9lan)) ||
++	    !e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++}
++#endif /* HAVE_PCI_ERS */
++
++s32 e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, u32 size)
++{
++	hw->dev_spec = kmalloc(size, GFP_KERNEL);
++
++	if (!hw->dev_spec)
++		return -ENOMEM;
++
++	memset(hw->dev_spec, 0, size);
++
++	return E1000_SUCCESS;
++}
++
++void e1000_free_dev_spec_struct(struct e1000_hw *hw)
++{
++	if (!hw->dev_spec)
++		return;
++
++	kfree(hw->dev_spec);
++}
++
++/* vim: set ts=4: */
++/* e1000_main.c */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.c	2021-04-07 16:01:27.743633429 +0800
+@@ -0,0 +1,2582 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_ich8lan
++ * e1000_ich9lan
++ */
++
++#include "e1000_api.h"
++#include "e1000_ich8lan.h"
++
++static s32  e1000_init_phy_params_ich8lan(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_ich8lan(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_ich8lan(struct e1000_hw *hw);
++static s32  e1000_acquire_swflag_ich8lan(struct e1000_hw *hw);
++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw);
++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
++static s32  e1000_check_polarity_ife_ich8lan(struct e1000_hw *hw);
++static s32  e1000_check_reset_block_ich8lan(struct e1000_hw *hw);
++static s32  e1000_phy_force_speed_duplex_ich8lan(struct e1000_hw *hw);
++static s32  e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw);
++static s32  e1000_get_phy_info_ich8lan(struct e1000_hw *hw);
++static s32  e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw,
++                                            bool active);
++static s32  e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw,
++                                            bool active);
++static s32  e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
++                                   u16 words, u16 *data);
++static s32  e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset,
++                                    u16 words, u16 *data);
++static s32  e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw);
++static s32  e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw);
++static s32  e1000_valid_led_default_ich8lan(struct e1000_hw *hw,
++                                            u16 *data);
++static s32  e1000_get_bus_info_ich8lan(struct e1000_hw *hw);
++static s32  e1000_reset_hw_ich8lan(struct e1000_hw *hw);
++static s32  e1000_init_hw_ich8lan(struct e1000_hw *hw);
++static s32  e1000_setup_link_ich8lan(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_ich8lan(struct e1000_hw *hw);
++static s32  e1000_get_link_up_info_ich8lan(struct e1000_hw *hw,
++                                           u16 *speed, u16 *duplex);
++static s32  e1000_cleanup_led_ich8lan(struct e1000_hw *hw);
++static s32  e1000_led_on_ich8lan(struct e1000_hw *hw);
++static s32  e1000_led_off_ich8lan(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw);
++static s32  e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank);
++static s32  e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout);
++static s32  e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw);
++static s32  e1000_get_phy_info_ife_ich8lan(struct e1000_hw *hw);
++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw);
++static s32  e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw);
++static s32  e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++                                          u8 size, u16* data);
++static s32  e1000_read_flash_word_ich8lan(struct e1000_hw *hw,
++                                          u32 offset, u16 *data);
++static s32  e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
++                                                 u32 offset, u8 byte);
++static s32  e1000_write_flash_byte_ich8lan(struct e1000_hw *hw,
++                                           u32 offset, u8 data);
++static s32  e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++                                           u8 size, u16 data);
++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw);
++
++/* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
++/* Offset 04h HSFSTS */
++union ich8_hws_flash_status {
++	struct ich8_hsfsts {
++		u16 flcdone    :1; /* bit 0 Flash Cycle Done */
++		u16 flcerr     :1; /* bit 1 Flash Cycle Error */
++		u16 dael       :1; /* bit 2 Direct Access error Log */
++		u16 berasesz   :2; /* bit 4:3 Sector Erase Size */
++		u16 flcinprog  :1; /* bit 5 flash cycle in Progress */
++		u16 reserved1  :2; /* bit 13:6 Reserved */
++		u16 reserved2  :6; /* bit 13:6 Reserved */
++		u16 fldesvalid :1; /* bit 14 Flash Descriptor Valid */
++		u16 flockdn    :1; /* bit 15 Flash Config Lock-Down */
++	} hsf_status;
++	u16 regval;
++};
++
++/* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */
++/* Offset 06h FLCTL */
++union ich8_hws_flash_ctrl {
++	struct ich8_hsflctl {
++		u16 flcgo      :1;   /* 0 Flash Cycle Go */
++		u16 flcycle    :2;   /* 2:1 Flash Cycle */
++		u16 reserved   :5;   /* 7:3 Reserved  */
++		u16 fldbcount  :2;   /* 9:8 Flash Data Byte Count */
++		u16 flockdn    :6;   /* 15:10 Reserved */
++	} hsf_ctrl;
++	u16 regval;
++};
++
++/* ICH Flash Region Access Permissions */
++union ich8_hws_flash_regacc {
++	struct ich8_flracc {
++		u32 grra      :8; /* 0:7 GbE region Read Access */
++		u32 grwa      :8; /* 8:15 GbE region Write Access */
++		u32 gmrag     :8; /* 23:16 GbE Master Read Access Grant */
++		u32 gmwag     :8; /* 31:24 GbE Master Write Access Grant */
++	} hsf_flregacc;
++	u16 regval;
++};
++
++struct e1000_shadow_ram {
++	u16  value;
++	bool modified;
++};
++
++struct e1000_dev_spec_ich8lan {
++	bool kmrn_lock_loss_workaround_enabled;
++	struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS];
++};
++
++/**
++ *  e1000_init_phy_params_ich8lan - Initialize PHY function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific PHY parameters and function pointers.
++ **/
++static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i = 0;
++
++	DEBUGFUNC("e1000_init_phy_params_ich8lan");
++
++	phy->addr                       = 1;
++	phy->reset_delay_us             = 100;
++
++	func->acquire_phy               = e1000_acquire_swflag_ich8lan;
++	func->check_polarity            = e1000_check_polarity_ife_ich8lan;
++	func->check_reset_block         = e1000_check_reset_block_ich8lan;
++	func->force_speed_duplex        = e1000_phy_force_speed_duplex_ich8lan;
++	func->get_cable_length          = e1000_get_cable_length_igp_2;
++	func->get_cfg_done              = e1000_get_cfg_done_ich8lan;
++	func->get_phy_info              = e1000_get_phy_info_ich8lan;
++	func->read_phy_reg              = e1000_read_phy_reg_igp;
++	func->release_phy               = e1000_release_swflag_ich8lan;
++	func->reset_phy                 = e1000_phy_hw_reset_ich8lan;
++	func->set_d0_lplu_state         = e1000_set_d0_lplu_state_ich8lan;
++	func->set_d3_lplu_state         = e1000_set_d3_lplu_state_ich8lan;
++	func->write_phy_reg             = e1000_write_phy_reg_igp;
++	func->power_up_phy              = e1000_power_up_phy_copper;
++	func->power_down_phy            = e1000_power_down_phy_copper_ich8lan;
++
++
++	phy->id = 0;
++	while ((e1000_phy_unknown == e1000_get_phy_type_from_id(phy->id)) &&
++	       (i++ < 100)) {
++		msec_delay(1);
++		ret_val = e1000_get_phy_id(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Verify phy id */
++	switch (phy->id) {
++	case IGP03E1000_E_PHY_ID:
++		phy->type = e1000_phy_igp_3;
++		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++		break;
++	case IFE_E_PHY_ID:
++	case IFE_PLUS_E_PHY_ID:
++	case IFE_C_E_PHY_ID:
++		phy->type = e1000_phy_ife;
++		phy->autoneg_mask = E1000_ALL_NOT_GIG;
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_ich8lan - Initialize NVM function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific NVM parameters and function
++ *  pointers.
++ **/
++static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++	struct e1000_dev_spec_ich8lan *dev_spec;
++	u32 gfpreg, sector_base_addr, sector_end_addr;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_nvm_params_ich8lan");
++
++	/* Can't read flash registers if the register set isn't mapped. */
++	if (!hw->flash_address) {
++		DEBUGOUT("ERROR: Flash registers not mapped\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	nvm->type               = e1000_nvm_flash_sw;
++
++	gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG);
++
++	/*
++	 * sector_X_addr is a "sector"-aligned address (4096 bytes)
++	 * Add 1 to sector_end_addr since this sector is included in
++	 * the overall size.
++	 */
++	sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
++	sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
++
++	/* flash_base_addr is byte-aligned */
++	nvm->flash_base_addr    = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT;
++
++	/*
++	 * find total size of the NVM, then cut in half since the total
++	 * size represents two separate NVM banks.
++	 */
++	nvm->flash_bank_size    = (sector_end_addr - sector_base_addr)
++	                          << FLASH_SECTOR_ADDR_SHIFT;
++	nvm->flash_bank_size    /= 2;
++	/* Adjust to word count */
++	nvm->flash_bank_size    /= sizeof(u16);
++
++	nvm->word_size          = E1000_SHADOW_RAM_WORDS;
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/* Clear shadow ram */
++	for (i = 0; i < nvm->word_size; i++) {
++		dev_spec->shadow_ram[i].modified = FALSE;
++		dev_spec->shadow_ram[i].value    = 0xFFFF;
++	}
++
++	/* Function Pointers */
++	func->acquire_nvm       = e1000_acquire_swflag_ich8lan;
++	func->read_nvm          = e1000_read_nvm_ich8lan;
++	func->release_nvm       = e1000_release_swflag_ich8lan;
++	func->update_nvm        = e1000_update_nvm_checksum_ich8lan;
++	func->valid_led_default = e1000_valid_led_default_ich8lan;
++	func->validate_nvm      = e1000_validate_nvm_checksum_ich8lan;
++	func->write_nvm         = e1000_write_nvm_ich8lan;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_mac_params_ich8lan - Initialize MAC function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific MAC parameters and function
++ *  pointers.
++ **/
++static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_mac_params_ich8lan");
++
++	/* Set media type function pointer */
++	hw->phy.media_type = e1000_media_type_copper;
++
++	/* Set mta register count */
++	mac->mta_reg_count = 32;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_ICH_RAR_ENTRIES;
++	if (mac->type == e1000_ich8lan)
++		mac->rar_entry_count--;
++	/* Set if part includes ASF firmware */
++	mac->asf_firmware_present = TRUE;
++	/* Set if manageability features are enabled. */
++	mac->arc_subsystem_valid = TRUE;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_ich8lan;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_ich8lan;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_ich8lan;
++	/* link setup */
++	func->setup_link = e1000_setup_link_ich8lan;
++	/* physical interface setup */
++	func->setup_physical_interface = e1000_setup_copper_link_ich8lan;
++	/* check for link */
++	func->check_for_link = e1000_check_for_copper_link_generic;
++	/* check management mode */
++	func->check_mng_mode = e1000_check_mng_mode_ich8lan;
++	/* link info */
++	func->get_link_up_info = e1000_get_link_up_info_ich8lan;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* blink LED */
++	func->blink_led = e1000_blink_led_generic;
++	/* setup LED */
++	func->setup_led = e1000_setup_led_generic;
++	/* cleanup LED */
++	func->cleanup_led = e1000_cleanup_led_ich8lan;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_ich8lan;
++	func->led_off = e1000_led_off_ich8lan;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_ich8lan;
++
++	hw->dev_spec_size = sizeof(struct e1000_dev_spec_ich8lan);
++
++	/* Device-specific structure allocation */
++	ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size);
++	if (ret_val)
++		goto out;
++
++	/* Enable PCS Lock-loss workaround for ICH8 */
++	if (mac->type == e1000_ich8lan)
++		e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, TRUE);
++
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_ich8lan - Initialize ICH8 function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific function pointers for PHY, MAC, and NVM.
++ **/
++void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_ich8lan");
++
++	hw->func.init_mac_params = e1000_init_mac_params_ich8lan;
++	hw->func.init_nvm_params = e1000_init_nvm_params_ich8lan;
++	hw->func.init_phy_params = e1000_init_phy_params_ich8lan;
++}
++
++/**
++ *  e1000_acquire_swflag_ich8lan - Acquire software control flag
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquires the software control flag for performing NVM and PHY
++ *  operations.  This is a function pointer entry point only called by
++ *  read/write routines for the PHY and NVM parts.
++ **/
++static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_acquire_swflag_ich8lan");
++
++	while (timeout) {
++		extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++		extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
++		E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl);
++
++		extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++		if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
++			break;
++		msec_delay_irq(1);
++		timeout--;
++	}
++
++	if (!timeout) {
++		DEBUGOUT("FW or HW has locked the resource for too long.\n");
++		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
++		E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl);
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_release_swflag_ich8lan - Release software control flag
++ *  @hw: pointer to the HW structure
++ *
++ *  Releases the software control flag for performing NVM and PHY operations.
++ *  This is a function pointer entry point only called by read/write
++ *  routines for the PHY and NVM parts.
++ **/
++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl;
++
++	DEBUGFUNC("e1000_release_swflag_ich8lan");
++
++	extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++	extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
++	E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl);
++
++	return;
++}
++
++/**
++ *  e1000_check_mng_mode_ich8lan - Checks management mode
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks if the adapter has manageability enabled.
++ *  This is a function pointer entry point only called by read/write
++ *  routines for the PHY and NVM parts.
++ **/
++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	DEBUGFUNC("e1000_check_mng_mode_ich8lan");
++
++	fwsm = E1000_READ_REG(hw, E1000_FWSM);
++
++	return ((fwsm & E1000_FWSM_MODE_MASK) ==
++	        (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
++}
++
++/**
++ *  e1000_check_reset_block_ich8lan - Check if PHY reset is blocked
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks if firmware is blocking the reset of the PHY.
++ *  This is a function pointer entry point only called by
++ *  reset routines.
++ **/
++static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	DEBUGFUNC("e1000_check_reset_block_ich8lan");
++
++	fwsm = E1000_READ_REG(hw, E1000_FWSM);
++
++	return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? E1000_SUCCESS
++	                                        : E1000_BLK_PHY_RESET;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_ich8lan - Force PHY speed & duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  Forces the speed and duplex settings of the PHY.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_phy_force_speed_duplex_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_ich8lan");
++
++	if (phy->type != e1000_phy_ife) {
++		ret_val = e1000_phy_force_speed_duplex_igp(hw);
++		goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	e1000_phy_force_speed_duplex_setup(hw, &data);
++
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, data);
++	if (ret_val)
++		goto out;
++
++	/* Disable MDI-X support for 10/100 */
++	ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	data &= ~IFE_PMC_AUTO_MDIX;
++	data &= ~IFE_PMC_FORCE_MDIX;
++
++	ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, data);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT1("IFE PMC: %X\n", data);
++
++	usec_delay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n");
++
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++
++		if (!link) {
++			DEBUGOUT("Link taking longer than expected.\n");
++		}
++
++		/* Try once more */
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_hw_reset_ich8lan - Performs a PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Resets the PHY
++ *  This is a function pointer entry point called by drivers
++ *  or other shared routines.
++ **/
++static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask;
++	s32 ret_val;
++	u16 loop = E1000_ICH8_LAN_INIT_TIMEOUT;
++	u16 word_addr, reg_data, reg_addr, phy_page = 0;
++
++	DEBUGFUNC("e1000_phy_hw_reset_ich8lan");
++
++	ret_val = e1000_phy_hw_reset_generic(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Initialize the PHY from the NVM on ICH platforms.  This
++	 * is needed due to an issue where the NVM configuration is
++	 * not properly autoloaded after power transitions.
++	 * Therefore, after each PHY reset, we will load the
++	 * configuration data out of the NVM manually.
++	 */
++	if (hw->mac.type == e1000_ich8lan && phy->type == e1000_phy_igp_3) {
++		/* Check if SW needs configure the PHY */
++		if ((hw->device_id == E1000_DEV_ID_ICH8_IGP_M_AMT) ||
++		    (hw->device_id == E1000_DEV_ID_ICH8_IGP_M))
++			sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
++		else
++			sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG;
++
++		data = E1000_READ_REG(hw, E1000_FEXTNVM);
++		if (!(data & sw_cfg_mask))
++			goto out;
++
++		/* Wait for basic configuration completes before proceeding*/
++		do {
++			data = E1000_READ_REG(hw, E1000_STATUS);
++			data &= E1000_STATUS_LAN_INIT_DONE;
++			usec_delay(100);
++		} while ((!data) && --loop);
++
++		/*
++		 * If basic configuration is incomplete before the above loop
++		 * count reaches 0, loading the configuration from NVM will
++		 * leave the PHY in a bad state possibly resulting in no link.
++		 */
++		if (loop == 0) {
++			DEBUGOUT("LAN_INIT_DONE not set, increase timeout\n");
++		}
++
++		/* Clear the Init Done bit for the next init event */
++		data = E1000_READ_REG(hw, E1000_STATUS);
++		data &= ~E1000_STATUS_LAN_INIT_DONE;
++		E1000_WRITE_REG(hw, E1000_STATUS, data);
++
++		/*
++		 * Make sure HW does not configure LCD from PHY
++		 * extended configuration before SW configuration
++		 */
++		data = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++		if (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE)
++			goto out;
++
++		cnf_size = E1000_READ_REG(hw, E1000_EXTCNF_SIZE);
++		cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK;
++		cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT;
++		if (!cnf_size)
++			goto out;
++
++		cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK;
++		cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT;
++
++		/*
++		 * Configure LCD from extended configuration
++		 * region.
++		 */
++
++		/* cnf_base_addr is in DWORD */
++		word_addr = (u16)(cnf_base_addr << 1);
++
++		for (i = 0; i < cnf_size; i++) {
++			ret_val = e1000_read_nvm(hw,
++			                        (word_addr + i * 2),
++			                        1,
++			                        &reg_data);
++			if (ret_val)
++				goto out;
++
++			ret_val = e1000_read_nvm(hw,
++			                        (word_addr + i * 2 + 1),
++			                        1,
++			                        &reg_addr);
++			if (ret_val)
++				goto out;
++
++			/* Save off the PHY page for future writes. */
++			if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) {
++				phy_page = reg_data;
++				continue;
++			}
++
++			reg_addr |= phy_page;
++
++			ret_val = e1000_write_phy_reg(hw,
++			                             (u32)reg_addr,
++			                             reg_data);
++			if (ret_val)
++				goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_ich8lan - Calls appropriate PHY type get_phy_info
++ *  @hw: pointer to the HW structure
++ *
++ *  Wrapper for calling the get_phy_info routines for the appropriate phy type.
++ *  This is a function pointer entry point called by drivers
++ *  or other shared routines.
++ **/
++static s32 e1000_get_phy_info_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = -E1000_ERR_PHY_TYPE;
++
++	DEBUGFUNC("e1000_get_phy_info_ich8lan");
++
++	switch (hw->phy.type) {
++	case e1000_phy_ife:
++		ret_val = e1000_get_phy_info_ife_ich8lan(hw);
++		break;
++	case e1000_phy_igp_3:
++		ret_val = e1000_get_phy_info_igp(hw);
++		break;
++	default:
++		break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_ife_ich8lan - Retrieves various IFE PHY states
++ *  @hw: pointer to the HW structure
++ *
++ *  Populates "phy" structure with various feature states.
++ *  This function is only called by other family-specific
++ *  routines.
++ **/
++static s32 e1000_get_phy_info_ife_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	DEBUGFUNC("e1000_get_phy_info_ife_ich8lan");
++
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		DEBUGOUT("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data);
++	if (ret_val)
++		goto out;
++	phy->polarity_correction = (data & IFE_PSC_AUTO_POLARITY_DISABLE)
++	                           ? FALSE : TRUE;
++
++	if (phy->polarity_correction) {
++		ret_val = e1000_check_polarity_ife_ich8lan(hw);
++		if (ret_val)
++			goto out;
++	} else {
++		/* Polarity is forced */
++		phy->cable_polarity = (data & IFE_PSC_FORCE_POLARITY)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & IFE_PMC_MDIX_STATUS) ? TRUE : FALSE;
++
++	/* The following parameters are undefined for 10/100 operation. */
++	phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++	phy->local_rx = e1000_1000t_rx_status_undefined;
++	phy->remote_rx = e1000_1000t_rx_status_undefined;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_ife_ich8lan - Check cable polarity for IFE PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Polarity is determined on the polarity reveral feature being enabled.
++ *  This function is only called by other family-specific
++ *  routines.
++ **/
++static s32 e1000_check_polarity_ife_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, offset, mask;
++
++	DEBUGFUNC("e1000_check_polarity_ife_ich8lan");
++
++	/*
++	 * Polarity is determined based on the reversal feature
++	 * being enabled.
++	 */
++	if (phy->polarity_correction) {
++		offset	= IFE_PHY_EXTENDED_STATUS_CONTROL;
++		mask	= IFE_PESC_POLARITY_REVERSED;
++	} else {
++		offset	= IFE_PHY_SPECIAL_CONTROL;
++		mask	= IFE_PSC_FORCE_POLARITY;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, offset, &phy_data);
++
++	if (!ret_val)
++		phy->cable_polarity = (phy_data & mask)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: TRUE to enable LPLU, FALSE to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw,
++                                           bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 phy_ctrl;
++	s32 ret_val = E1000_SUCCESS;
++	u16 data;
++
++	DEBUGFUNC("e1000_set_d0_lplu_state_ich8lan");
++
++	if (phy->type == e1000_phy_ife)
++		goto out;
++
++	phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL);
++
++	if (active) {
++		phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU;
++		E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl);
++
++		/*
++		 * Call gig speed drop workaround on LPLU before accessing
++		 * any PHY registers
++		 */
++		if ((hw->mac.type == e1000_ich8lan) &&
++		    (hw->phy.type == e1000_phy_igp_3))
++			e1000_gig_downshift_workaround_ich8lan(hw);
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1000_read_phy_reg(hw,
++		                            IGP01E1000_PHY_PORT_CONFIG,
++		                            &data);
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1000_write_phy_reg(hw,
++		                             IGP01E1000_PHY_PORT_CONFIG,
++		                             data);
++		if (ret_val)
++			goto out;
++	} else {
++		phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU;
++		E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl);
++
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state
++ *  @hw: pointer to the HW structure
++ *  @active: TRUE to enable LPLU, FALSE to disable
++ *
++ *  Sets the LPLU D3 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw,
++                                           bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 phy_ctrl;
++	s32 ret_val = E1000_SUCCESS;
++	u16 data;
++
++	DEBUGFUNC("e1000_set_d3_lplu_state_ich8lan");
++
++	phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL);
++
++	if (!active) {
++		phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU;
++		E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl);
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++	           (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++	           (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU;
++		E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl);
++
++		/*
++		 * Call gig speed drop workaround on LPLU before accessing
++		 * any PHY registers
++		 */
++		if ((hw->mac.type == e1000_ich8lan) &&
++		    (hw->phy.type == e1000_phy_igp_3))
++			e1000_gig_downshift_workaround_ich8lan(hw);
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1000_read_phy_reg(hw,
++		                            IGP01E1000_PHY_PORT_CONFIG,
++		                            &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1000_write_phy_reg(hw,
++		                             IGP01E1000_PHY_PORT_CONFIG,
++		                             data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1
++ *  @hw: pointer to the HW structure
++ *  @bank:  pointer to the variable that returns the active bank
++ *
++ *  Reads signature byte from the NVM using the flash access registers.
++ **/
++static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
++{
++	s32 ret_val = E1000_SUCCESS;
++	if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_SEC1VAL)
++		*bank = 1;
++	else
++		*bank = 0;
++	
++	return ret_val;
++}
++
++/**
++ *  e1000_read_nvm_ich8lan - Read word(s) from the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the word(s) to read.
++ *  @words: Size of data to read in words
++ *  @data: Pointer to the word(s) to read at offset.
++ *
++ *  Reads a word(s) from the NVM using the flash access registers.
++ **/
++static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
++                                  u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec;
++	u32 act_offset;
++	s32 ret_val = E1000_SUCCESS;
++	u32 bank = 0;
++	u16 i, word;
++
++	DEBUGFUNC("e1000_read_nvm_ich8lan");
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
++	if (ret_val != E1000_SUCCESS)
++		goto out;
++
++	act_offset = (bank) ? nvm->flash_bank_size : 0;
++	act_offset += offset;
++
++	for (i = 0; i < words; i++) {
++		if ((dev_spec->shadow_ram) &&
++		    (dev_spec->shadow_ram[offset+i].modified)) {
++			data[i] = dev_spec->shadow_ram[offset+i].value;
++		} else {
++			ret_val = e1000_read_flash_word_ich8lan(hw,
++			                                        act_offset + i,
++			                                        &word);
++			if (ret_val)
++				break;
++			data[i] = word;
++		}
++	}
++
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_flash_cycle_init_ich8lan - Initialize flash
++ *  @hw: pointer to the HW structure
++ *
++ *  This function does initial flash setup so that a new read/write/erase cycle
++ *  can be started.
++ **/
++static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
++{
++	union ich8_hws_flash_status hsfsts;
++	s32 ret_val = -E1000_ERR_NVM;
++	s32 i = 0;
++
++	DEBUGFUNC("e1000_flash_cycle_init_ich8lan");
++
++	hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS);
++
++	/* Check if the flash descriptor is valid */
++	if (hsfsts.hsf_status.fldesvalid == 0) {
++		DEBUGOUT("Flash descriptor invalid.  "
++		         "SW Sequencing must be used.");
++		goto out;
++	}
++
++	/* Clear FCERR and DAEL in hw status by writing 1 */
++	hsfsts.hsf_status.flcerr = 1;
++	hsfsts.hsf_status.dael = 1;
++
++	E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
++
++	/*
++	 * Either we should have a hardware SPI cycle in progress
++	 * bit to check against, in order to start a new cycle or
++	 * FDONE bit should be changed in the hardware so that it
++	 * is 1 after harware reset, which can then be used as an
++	 * indication whether a cycle is in progress or has been
++	 * completed.
++	 */
++
++	if (hsfsts.hsf_status.flcinprog == 0) {
++		/*
++		 * There is no cycle running at present,
++		 * so we can start a cycle.
++		 * Begin by setting Flash Cycle Done.
++		 */
++		hsfsts.hsf_status.flcdone = 1;
++		E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval);
++		ret_val = E1000_SUCCESS;
++	} else {
++		/*
++		 * Otherwise poll for sometime so the current
++		 * cycle has a chance to end before giving up.
++		 */
++		for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) {
++			hsfsts.regval = E1000_READ_FLASH_REG16(hw,
++			                                      ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcinprog == 0) {
++				ret_val = E1000_SUCCESS;
++				break;
++			}
++			usec_delay(1);
++		}
++		if (ret_val == E1000_SUCCESS) {
++			/*
++			 * Successful in waiting for previous cycle to timeout,
++			 * now set the Flash Cycle Done.
++			 */
++			hsfsts.hsf_status.flcdone = 1;
++			E1000_WRITE_FLASH_REG16(hw,
++			                        ICH_FLASH_HSFSTS,
++			                        hsfsts.regval);
++		} else {
++			DEBUGOUT("Flash controller busy, cannot get access");
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase)
++ *  @hw: pointer to the HW structure
++ *  @timeout: maximum time to wait for completion
++ *
++ *  This function starts a flash cycle and waits for its completion.
++ **/
++static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
++{
++	union ich8_hws_flash_ctrl hsflctl;
++	union ich8_hws_flash_status hsfsts;
++	s32 ret_val = -E1000_ERR_NVM;
++	u32 i = 0;
++
++	DEBUGFUNC("e1000_flash_cycle_ich8lan");
++
++	/* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
++	hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
++	hsflctl.hsf_ctrl.flcgo = 1;
++	E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
++
++	/* wait till FDONE bit is set to 1 */
++	do {
++		hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS);
++		if (hsfsts.hsf_status.flcdone == 1)
++			break;
++		usec_delay(1);
++	} while (i++ < timeout);
++
++	if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0)
++		ret_val = E1000_SUCCESS;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_read_flash_word_ich8lan - Read word from flash
++ *  @hw: pointer to the HW structure
++ *  @offset: offset to data location
++ *  @data: pointer to the location for storing the data
++ *
++ *  Reads the flash word at offset into data.  Offset is converted
++ *  to bytes before read.
++ **/
++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset,
++                                         u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_read_flash_word_ich8lan");
++
++	if (!data) {
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	/* Must convert offset into bytes. */
++	offset <<= 1;
++
++	ret_val = e1000_read_flash_data_ich8lan(hw, offset, 2, data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_flash_data_ich8lan - Read byte or word from NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the byte or word to read.
++ *  @size: Size of data to read, 1=byte 2=word
++ *  @data: Pointer to the word to store the value read.
++ *
++ *  Reads a byte or word from the NVM using the flash access registers.
++ **/
++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++                                         u8 size, u16* data)
++{
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	u32 flash_data = 0;
++	s32 ret_val = -E1000_ERR_NVM;
++	u8 count = 0;
++
++	DEBUGFUNC("e1000_read_flash_data_ich8lan");
++
++	if (size < 1  || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
++		goto out;
++
++	flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) +
++	                    hw->nvm.flash_base_addr;
++
++	do {
++		usec_delay(1);
++		/* Steps */
++		ret_val = e1000_flash_cycle_init_ich8lan(hw);
++		if (ret_val != E1000_SUCCESS)
++			break;
++
++		hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
++		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++		hsflctl.hsf_ctrl.fldbcount = size - 1;
++		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
++		E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
++
++		E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
++
++		ret_val = e1000_flash_cycle_ich8lan(hw,
++		                                ICH_FLASH_READ_COMMAND_TIMEOUT);
++
++		/*
++		 * Check if FCERR is set to 1, if set to 1, clear it
++		 * and try the whole sequence a few more times, else
++		 * read in (shift in) the Flash Data0, the order is
++		 * least significant byte first msb to lsb
++		 */
++		if (ret_val == E1000_SUCCESS) {
++			flash_data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0);
++			if (size == 1) {
++				*data = (u8)(flash_data & 0x000000FF);
++			} else if (size == 2) {
++				*data = (u16)(flash_data & 0x0000FFFF);
++			}
++			break;
++		} else {
++			/*
++			 * If we've gotten here, then things are probably
++			 * completely hosed, but if the error condition is
++			 * detected, it won't hurt to give it another try...
++			 * ICH_FLASH_CYCLE_REPEAT_COUNT times.
++			 */
++			hsfsts.regval = E1000_READ_FLASH_REG16(hw,
++			                                      ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcerr == 1) {
++				/* Repeat for some time before giving up. */
++				continue;
++			} else if (hsfsts.hsf_status.flcdone == 0) {
++				DEBUGOUT("Timeout error - flash cycle "
++				         "did not complete.");
++				break;
++			}
++		}
++	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_ich8lan - Write word(s) to the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the word(s) to write.
++ *  @words: Size of data to write in words
++ *  @data: Pointer to the word(s) to write at offset.
++ *
++ *  Writes a byte or word to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
++                                   u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i;
++
++	DEBUGFUNC("e1000_write_nvm_ich8lan");
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	for (i = 0; i < words; i++) {
++		dev_spec->shadow_ram[offset+i].modified = TRUE;
++		dev_spec->shadow_ram[offset+i].value = data[i];
++	}
++
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  The NVM checksum is updated by calling the generic update_nvm_checksum,
++ *  which writes the checksum to the shadow ram.  The changes in the shadow
++ *  ram are then committed to the EEPROM by processing each bank at a time
++ *  checking for the modified bit and writing only the pending changes.
++ *  After a succesful commit, the shadow ram is cleared and is ready for
++ *  future writes.
++ **/
++static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec;
++	u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_update_nvm_checksum_ich8lan");
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	ret_val = e1000_update_nvm_checksum_generic(hw);
++	if (ret_val)
++		goto out;
++
++	if (nvm->type != e1000_nvm_flash_sw)
++		goto out;
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * We're writing to the opposite bank so if we're on bank 1,
++	 * write to bank 0 etc.  We also need to erase the segment that
++	 * is going to be written
++	 */
++	ret_val =  e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
++	if (ret_val != E1000_SUCCESS)
++		goto out;
++
++	if (bank == 0) {
++		new_bank_offset = nvm->flash_bank_size;
++		old_bank_offset = 0;
++		e1000_erase_flash_bank_ich8lan(hw, 1);
++	} else {
++		old_bank_offset = nvm->flash_bank_size;
++		new_bank_offset = 0;
++		e1000_erase_flash_bank_ich8lan(hw, 0);
++	}
++
++	for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
++		/*
++		 * Determine whether to write the value stored
++		 * in the other NVM bank or a modified value stored
++		 * in the shadow RAM
++		 */
++		if (dev_spec->shadow_ram[i].modified) {
++			data = dev_spec->shadow_ram[i].value;
++		} else {
++			e1000_read_flash_word_ich8lan(hw,
++			                              i + old_bank_offset,
++			                              &data);
++		}
++
++		/*
++		 * If the word is 0x13, then make sure the signature bits
++		 * (15:14) are 11b until the commit has completed.
++		 * This will allow us to write 10b which indicates the
++		 * signature is valid.  We want to do this after the write
++		 * has completed so that we don't mark the segment valid
++		 * while the write is still in progress
++		 */
++		if (i == E1000_ICH_NVM_SIG_WORD)
++			data |= E1000_ICH_NVM_SIG_MASK;
++
++		/* Convert offset to bytes. */
++		act_offset = (i + new_bank_offset) << 1;
++
++		usec_delay(100);
++		/* Write the bytes to the new bank. */
++		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++		                                               act_offset,
++		                                               (u8)data);
++		if (ret_val)
++			break;
++
++		usec_delay(100);
++		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++		                                          act_offset + 1,
++		                                          (u8)(data >> 8));
++		if (ret_val)
++			break;
++	}
++
++	/*
++	 * Don't bother writing the segment valid bits if sector
++	 * programming failed.
++	 */
++	if (ret_val) {
++		DEBUGOUT("Flash commit failed.\n");
++		e1000_release_nvm(hw);
++		goto out;
++	}
++
++	/*
++	 * Finally validate the new segment by setting bit 15:14
++	 * to 10b in word 0x13 , this can be done without an
++	 * erase as well since these bits are 11 to start with
++	 * and we need to change bit 14 to 0b
++	 */
++	act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
++	e1000_read_flash_word_ich8lan(hw, act_offset, &data);
++	data &= 0xBFFF;
++	ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++	                                               act_offset * 2 + 1,
++	                                               (u8)(data >> 8));
++	if (ret_val) {
++		e1000_release_nvm(hw);
++		goto out;
++	}
++
++	/*
++	 * And invalidate the previously valid segment by setting
++	 * its signature word (0x13) high_byte to 0b. This can be
++	 * done without an erase because flash erase sets all bits
++	 * to 1's. We can write 1's to 0's without an erase
++	 */
++	act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1;
++	ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0);
++	if (ret_val) {
++		e1000_release_nvm(hw);
++		goto out;
++	}
++
++	/* Great!  Everything worked, we can now clear the cached entries. */
++	for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
++		dev_spec->shadow_ram[i].modified = FALSE;
++		dev_spec->shadow_ram[i].value = 0xFFFF;
++	}
++
++	e1000_release_nvm(hw);
++
++	/*
++	 * Reload the EEPROM, or else modifications will not appear
++	 * until after the next adapter reset.
++	 */
++	e1000_reload_nvm(hw);
++	msec_delay(10);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Check to see if checksum needs to be fixed by reading bit 6 in word 0x19.
++ *  If the bit is 0, that the EEPROM had been modified, but the checksum was
++ *  not calculated, in which case we need to calculate the checksum and set
++ *  bit 6.
++ **/
++static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 data;
++
++	DEBUGFUNC("e1000_validate_nvm_checksum_ich8lan");
++
++	/*
++	 * Read 0x19 and check bit 6.  If this bit is 0, the checksum
++	 * needs to be fixed.  This bit is an indication that the NVM
++	 * was prepared by OEM software and did not calculate the
++	 * checksum...a likely scenario.
++	 */
++	ret_val = e1000_read_nvm(hw, 0x19, 1, &data);
++	if (ret_val)
++		goto out;
++
++	if ((data & 0x40) == 0) {
++		data |= 0x40;
++		ret_val = e1000_write_nvm(hw, 0x19, 1, &data);
++		if (ret_val)
++			goto out;
++		ret_val = e1000_update_nvm_checksum(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000_validate_nvm_checksum_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_flash_data_ich8lan - Writes bytes to the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the byte/word to read.
++ *  @size: Size of data to read, 1=byte 2=word
++ *  @data: The byte(s) to write to the NVM.
++ *
++ *  Writes one/two bytes to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++                                          u8 size, u16 data)
++{
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	u32 flash_data = 0;
++	s32 ret_val = -E1000_ERR_NVM;
++	u8 count = 0;
++
++	DEBUGFUNC("e1000_write_ich8_data");
++
++	if (size < 1 || size > 2 || data > size * 0xff ||
++	    offset > ICH_FLASH_LINEAR_ADDR_MASK)
++		goto out;
++
++	flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) +
++	                    hw->nvm.flash_base_addr;
++
++	do {
++		usec_delay(1);
++		/* Steps */
++		ret_val = e1000_flash_cycle_init_ich8lan(hw);
++		if (ret_val != E1000_SUCCESS)
++			break;
++
++		hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL);
++		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++		hsflctl.hsf_ctrl.fldbcount = size -1;
++		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
++		E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval);
++
++		E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr);
++
++		if (size == 1)
++			flash_data = (u32)data & 0x00FF;
++		else
++			flash_data = (u32)data;
++
++		E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, flash_data);
++
++		/*
++		 * check if FCERR is set to 1 , if set to 1, clear it
++		 * and try the whole sequence a few more times else done
++		 */
++		ret_val = e1000_flash_cycle_ich8lan(hw,
++		                               ICH_FLASH_WRITE_COMMAND_TIMEOUT);
++		if (ret_val == E1000_SUCCESS) {
++			break;
++		} else {
++			/*
++			 * If we're here, then things are most likely
++			 * completely hosed, but if the error condition
++			 * is detected, it won't hurt to give it another
++			 * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
++			 */
++			hsfsts.regval = E1000_READ_FLASH_REG16(hw,
++			                                      ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcerr == 1) {
++				/* Repeat for some time before giving up. */
++				continue;
++			} else if (hsfsts.hsf_status.flcdone == 0) {
++				DEBUGOUT("Timeout error - flash cycle "
++				         "did not complete.");
++				break;
++			}
++		}
++	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_flash_byte_ich8lan - Write a single byte to NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The index of the byte to read.
++ *  @data: The byte to write to the NVM.
++ *
++ *  Writes a single byte to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
++                                          u8 data)
++{
++	u16 word = (u16)data;
++
++	DEBUGFUNC("e1000_write_flash_byte_ich8lan");
++
++	return e1000_write_flash_data_ich8lan(hw, offset, 1, word);
++}
++
++/**
++ *  e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset of the byte to write.
++ *  @byte: The byte to write to the NVM.
++ *
++ *  Writes a single byte to the NVM using the flash access registers.
++ *  Goes through a retry algorithm before giving up.
++ **/
++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
++                                                u32 offset, u8 byte)
++{
++	s32 ret_val;
++	u16 program_retries;
++
++	DEBUGFUNC("e1000_retry_write_flash_byte_ich8lan");
++
++	ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
++	if (ret_val == E1000_SUCCESS)
++		goto out;
++
++	for (program_retries = 0; program_retries < 100; program_retries++) {
++		DEBUGOUT2("Retrying Byte %2.2X at offset %u\n", byte, offset);
++		usec_delay(100);
++		ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
++		if (ret_val == E1000_SUCCESS)
++			break;
++	}
++	if (program_retries == 100) {
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM
++ *  @hw: pointer to the HW structure
++ *  @bank: 0 for first bank, 1 for second bank, etc.
++ *
++ *  Erases the bank specified. Each bank is a 4k block. Banks are 0 based.
++ *  bank N is 4096 * N + flash_reg_addr.
++ **/
++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	/* bank size is in 16bit words - adjust to bytes */
++	u32 flash_bank_size = nvm->flash_bank_size * 2;
++	s32  ret_val = E1000_SUCCESS;
++	s32  count = 0;
++	s32  j, iteration, sector_size;
++
++	DEBUGFUNC("e1000_erase_flash_bank_ich8lan");
++
++	hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS);
++
++	/*
++	 * Determine HW Sector size: Read BERASE bits of hw flash status
++	 * register
++	 * 00: The Hw sector is 256 bytes, hence we need to erase 16
++	 *     consecutive sectors.  The start index for the nth Hw sector
++	 *     can be calculated as = bank * 4096 + n * 256
++	 * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector.
++	 *     The start index for the nth Hw sector can be calculated
++	 *     as = bank * 4096
++	 * 10: The Hw sector is 8K bytes, nth sector = bank * 8192
++	 *     (ich9 only, otherwise error condition)
++	 * 11: The Hw sector is 64K bytes, nth sector = bank * 65536
++	 */
++	switch (hsfsts.hsf_status.berasesz) {
++	case 0:
++		/* Hw sector size 256 */
++		sector_size = ICH_FLASH_SEG_SIZE_256;
++		iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256;
++		break;
++	case 1:
++		sector_size = ICH_FLASH_SEG_SIZE_4K;
++		iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_4K;
++		break;
++	case 2:
++		if (hw->mac.type == e1000_ich9lan) {
++			sector_size = ICH_FLASH_SEG_SIZE_8K;
++			iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_8K;
++		} else {
++			ret_val = -E1000_ERR_NVM;
++			goto out;
++		}
++		break;
++	case 3:
++		sector_size = ICH_FLASH_SEG_SIZE_64K;
++		iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_64K;
++		break;
++	default:
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	/* Start with the base address, then add the sector offset. */
++	flash_linear_addr = hw->nvm.flash_base_addr;
++	flash_linear_addr += (bank) ? (sector_size * iteration) : 0;
++
++	for (j = 0; j < iteration ; j++) {
++		do {
++			/* Steps */
++			ret_val = e1000_flash_cycle_init_ich8lan(hw);
++			if (ret_val)
++				goto out;
++
++			/*
++			 * Write a value 11 (block Erase) in Flash
++			 * Cycle field in hw flash control
++			 */
++			hsflctl.regval = E1000_READ_FLASH_REG16(hw,
++			                                      ICH_FLASH_HSFCTL);
++			hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE;
++			E1000_WRITE_FLASH_REG16(hw,
++			                        ICH_FLASH_HSFCTL,
++			                        hsflctl.regval);
++
++			/*
++			 * Write the last 24 bits of an index within the
++			 * block into Flash Linear address field in Flash
++			 * Address.
++			 */
++			flash_linear_addr += (j * sector_size);
++			E1000_WRITE_FLASH_REG(hw,
++			                      ICH_FLASH_FADDR,
++			                      flash_linear_addr);
++
++			ret_val = e1000_flash_cycle_ich8lan(hw,
++			                       ICH_FLASH_ERASE_COMMAND_TIMEOUT);
++			if (ret_val == E1000_SUCCESS) {
++				break;
++			} else {
++				/*
++				 * Check if FCERR is set to 1.  If 1,
++				 * clear it and try the whole sequence
++				 * a few more times else Done
++				 */
++				hsfsts.regval = E1000_READ_FLASH_REG16(hw,
++				                              ICH_FLASH_HSFSTS);
++				if (hsfsts.hsf_status.flcerr == 1) {
++					/*
++					 * repeat for some time before
++					 * giving up
++					 */
++					continue;
++				} else if (hsfsts.hsf_status.flcdone == 0)
++					goto out;
++			}
++		} while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_valid_led_default_ich8lan - Set the default LED settings
++ *  @hw: pointer to the HW structure
++ *  @data: Pointer to the LED settings
++ *
++ *  Reads the LED default settings from the NVM to data.  If the NVM LED
++ *  settings is all 0's or F's, set the LED default to a valid LED default
++ *  setting.
++ **/
++static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_valid_led_default_ich8lan");
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 ||
++	    *data == ID_LED_RESERVED_FFFF)
++		*data = ID_LED_DEFAULT_ICH8LAN;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_bus_info_ich8lan - Get/Set the bus type and width
++ *  @hw: pointer to the HW structure
++ *
++ *  ICH8 use the PCI Express bus, but does not contain a PCI Express Capability
++ *  register, so the the bus width is hard coded.
++ **/
++static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_get_bus_info_ich8lan");
++
++	ret_val = e1000_get_bus_info_pcie_generic(hw);
++
++	/*
++	 * ICH devices are "PCI Express"-ish.  They have
++	 * a configuration space, but do not contain
++	 * PCI Express Capability registers, so bus width
++	 * must be hardcoded.
++	 */
++	if (bus->width == e1000_bus_width_unknown)
++		bus->width = e1000_bus_width_pcie_x1;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_ich8lan - Reset the hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  Does a full reset of the hardware which includes a reset of the PHY and
++ *  MAC.
++ **/
++static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
++{
++	u32 ctrl, icr, kab;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_reset_hw_ich8lan");
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000_disable_pcie_master_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("PCI-E Master disable polling has failed.\n");
++	}
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++
++	/*
++	 * Disable the Transmit and Receive units.  Then delay to allow
++	 * any pending transactions to complete before we hit the MAC
++	 * with the global reset.
++	 */
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	msec_delay(10);
++
++	/* Workaround for ICH8 bit corruption issue in FIFO memory */
++	if (hw->mac.type == e1000_ich8lan) {
++		/* Set Tx and Rx buffer allocation to 8k apiece. */
++		E1000_WRITE_REG(hw, E1000_PBA, E1000_PBA_8K);
++		/* Set Packet Buffer Size to 16k. */
++		E1000_WRITE_REG(hw, E1000_PBS, E1000_PBS_16K);
++	}
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	if (!e1000_check_reset_block(hw) && !hw->phy.reset_disable) {
++		/*
++		 * PHY HW reset requires MAC CORE reset at the same
++		 * time to make sure the interface between MAC and the
++		 * external PHY is reset.
++		 */
++		ctrl |= E1000_CTRL_PHY_RST;
++	}
++	ret_val = e1000_acquire_swflag_ich8lan(hw);
++	DEBUGOUT("Issuing a global reset to ich8lan");
++	E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_RST));
++	msec_delay(20);
++
++	ret_val = e1000_get_auto_rd_done_generic(hw);
++	if (ret_val) {
++		/*
++		 * When auto config read does not complete, do not
++		 * return with an error. This can happen in situations
++		 * where there is no eeprom and prevents getting link.
++		 */
++		DEBUGOUT("Auto Read Done did not complete\n");
++	}
++
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	kab = E1000_READ_REG(hw, E1000_KABGTXD);
++	kab |= E1000_KABGTXD_BGSQLBIAS;
++	E1000_WRITE_REG(hw, E1000_KABGTXD, kab);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_ich8lan - Initialize the hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  Prepares the hardware for transmit and receive by doing the following:
++ *   - initialize hardware bits
++ *   - initialize LED identification
++ *   - setup receive address registers
++ *   - setup flow control
++ *   - setup transmit discriptors
++ *   - clear statistics
++ **/
++static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 ctrl_ext, txdctl, snoop;
++	s32 ret_val;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_hw_ich8lan");
++
++	e1000_initialize_hw_bits_ich8lan(hw);
++
++	/* Initialize identification LED */
++	ret_val = e1000_id_led_init_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	/* Set the transmit descriptor write-back policy for both queues */
++	txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++		 E1000_TXDCTL_FULL_TX_DESC_WB;
++	txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) |
++	         E1000_TXDCTL_MAX_TX_DESC_PREFETCH;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl);
++	txdctl = E1000_READ_REG(hw, E1000_TXDCTL(1));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++		 E1000_TXDCTL_FULL_TX_DESC_WB;
++	txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) |
++	         E1000_TXDCTL_MAX_TX_DESC_PREFETCH;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(1), txdctl);
++
++	/*
++	 * ICH8 has opposite polarity of no_snoop bits.
++	 * By default, we should use snoop behavior.
++	 */
++	if (mac->type == e1000_ich8lan)
++		snoop = PCIE_ICH8_SNOOP_ALL;
++	else
++		snoop = (u32)~(PCIE_NO_SNOOP_ALL);
++	e1000_set_pcie_no_snoop_generic(hw, snoop);
++
++	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_ich8lan(hw);
++
++	return ret_val;
++}
++/**
++ *  e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets/Clears required hardware bits necessary for correctly setting up the
++ *  hardware for transmit and receive.
++ **/
++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	DEBUGFUNC("e1000_initialize_hw_bits_ich8lan");
++
++	if (hw->mac.disable_hw_init_bits)
++		goto out;
++
++	/* Extended Device Control */
++	reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
++
++	/* Transmit Descriptor Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(1));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TARC(0));
++	if (hw->mac.type == e1000_ich8lan)
++		reg |= (1 << 28) | (1 << 29);
++	reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27);
++	E1000_WRITE_REG(hw, E1000_TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TARC(1));
++	if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR)
++		reg &= ~(1 << 28);
++	else
++		reg |= (1 << 28);
++	reg |= (1 << 24) | (1 << 26) | (1 << 30);
++	E1000_WRITE_REG(hw, E1000_TARC(1), reg);
++
++	/* Device Status */
++	if (hw->mac.type == e1000_ich8lan) {
++		reg = E1000_READ_REG(hw, E1000_STATUS);
++		reg &= ~(1 << 31);
++		E1000_WRITE_REG(hw, E1000_STATUS, reg);
++	}
++
++out:
++	return;
++}
++
++/**
++ *  e1000_setup_link_ich8lan - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_link_ich8lan");
++
++	if (e1000_check_reset_block(hw))
++		goto out;
++
++	/*
++	 * ICH parts do not have a word in the NVM to determine
++	 * the default flow control setting, so we explicitly
++	 * set it to full.
++	 */
++	if (hw->fc.type == e1000_fc_default)
++		hw->fc.type = e1000_fc_full;
++
++	hw->fc.original_type = hw->fc.type;
++
++	DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type);
++
++	/* Continue to configure the copper link. */
++	ret_val = func->setup_physical_interface(hw);
++	if (ret_val)
++		goto out;
++
++	E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
++
++	ret_val = e1000_set_fc_watermarks_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the kumeran interface to the PHY to wait the appropriate time
++ *  when polling the PHY, then call the generic setup_copper_link to finish
++ *  configuring the copper link.
++ **/
++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++	u16 reg_data;
++
++	DEBUGFUNC("e1000_setup_copper_link_ich8lan");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	/*
++	 * Set the mac to wait the maximum time between each iteration
++	 * and increase the max iterations when polling the phy;
++	 * this fixes erroneous timeouts at 10Mbps.
++	 */
++	ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF);
++	if (ret_val)
++		goto out;
++	ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), &reg_data);
++	if (ret_val)
++		goto out;
++	reg_data |= 0x3F;
++	ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data);
++	if (ret_val)
++		goto out;
++
++	if (hw->phy.type == e1000_phy_igp_3) {
++		ret_val = e1000_copper_link_setup_igp(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	if (hw->phy.type == e1000_phy_ife) {
++		ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &reg_data);
++		if (ret_val)
++			goto out;
++
++		reg_data &= ~IFE_PMC_AUTO_MDIX;
++
++		switch (hw->phy.mdix) {
++		case 1:
++			reg_data &= ~IFE_PMC_FORCE_MDIX;
++			break;
++		case 2:
++			reg_data |= IFE_PMC_FORCE_MDIX;
++			break;
++		case 0:
++		default:
++			reg_data |= IFE_PMC_AUTO_MDIX;
++			break;
++		}
++		ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, reg_data);
++		if (ret_val)
++			goto out;
++	}
++	ret_val = e1000_setup_copper_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_link_up_info_ich8lan - Get current link speed and duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: pointer to store current link speed
++ *  @duplex: pointer to store the current link duplex
++ *
++ *  Calls the generic get_speed_and_duplex to retreive the current link
++ *  information and then calls the Kumeran lock loss workaround for links at
++ *  gigabit speeds.
++ **/
++static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed,
++                                          u16 *duplex)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_get_link_up_info_ich8lan");
++
++	ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex);
++	if (ret_val)
++		goto out;
++
++	if ((hw->mac.type == e1000_ich8lan) &&
++	    (hw->phy.type == e1000_phy_igp_3) &&
++	    (*speed == SPEED_1000)) {
++		ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround
++ *  @hw: pointer to the HW structure
++ *
++ *  Work-around for 82566 Kumeran PCS lock loss:
++ *  On link status change (i.e. PCI reset, speed change) and link is up and
++ *  speed is gigabit-
++ *    0) if workaround is optionally disabled do nothing
++ *    1) wait 1ms for Kumeran link to come up
++ *    2) check Kumeran Diagnostic register PCS lock loss bit
++ *    3) if not set the link is locked (all is good), otherwise...
++ *    4) reset the PHY
++ *    5) repeat up to 10 times
++ *  Note: this is only called for IGP3 copper when speed is 1gb.
++ **/
++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec;
++	u32 phy_ctrl;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i, data;
++	bool link;
++
++	DEBUGFUNC("e1000_kmrn_lock_loss_workaround_ich8lan");
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	if (!(dev_spec->kmrn_lock_loss_workaround_enabled))
++		goto out;
++
++	/*
++	 * Make sure link is up before proceeding.  If not just return.
++	 * Attempting this while link is negotiating fouled up link
++	 * stability
++	 */
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (!link) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	for (i = 0; i < 10; i++) {
++		/* read once to clear */
++		ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &data);
++		if (ret_val)
++			goto out;
++		/* and again to get new status */
++		ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &data);
++		if (ret_val)
++			goto out;
++
++		/* check for PCS lock */
++		if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) {
++			ret_val = E1000_SUCCESS;
++			goto out;
++		}
++
++		/* Issue PHY reset */
++		e1000_phy_hw_reset(hw);
++		msec_delay_irq(5);
++	}
++	/* Disable GigE link negotiation */
++	phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL);
++	phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE |
++	             E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++	E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl);
++
++	/*
++	 * Call gig speed drop workaround on Giga disable before accessing
++	 * any PHY registers
++	 */
++	e1000_gig_downshift_workaround_ich8lan(hw);
++
++	/* unable to acquire PCS lock */
++	ret_val = -E1000_ERR_PHY;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_kmrn_lock_loss_workaound_ich8lan - Set Kumeran workaround state
++ *  @hw: pointer to the HW structure
++ *  @state: boolean value used to set the current Kumaran workaround state
++ *
++ *  If ICH8, set the current Kumeran workaround state (enabled - TRUE
++ *  /disabled - FALSE).
++ **/
++void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
++                                                 bool state)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec;
++
++	DEBUGFUNC("e1000_set_kmrn_lock_loss_workaround_ich8lan");
++
++	if (hw->mac.type != e1000_ich8lan) {
++		DEBUGOUT("Workaround applies to ICH8 only.\n");
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_ich8lan *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		goto out;
++	}
++
++	dev_spec->kmrn_lock_loss_workaround_enabled = state;
++
++out:
++	return;
++}
++
++/**
++ *  e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
++ *  @hw: pointer to the HW structure
++ *
++ *  Workaround for 82566 power-down on D3 entry:
++ *    1) disable gigabit link
++ *    2) write VR power-down enable
++ *    3) read it back
++ *  Continue if successful, else issue LCD reset and repeat
++ **/
++void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw)
++{
++	u32 reg;
++	u16 data;
++	u8  retry = 0;
++
++	DEBUGFUNC("e1000_igp3_phy_powerdown_workaround_ich8lan");
++
++	if (hw->phy.type != e1000_phy_igp_3)
++		goto out;
++
++	/* Try the workaround twice (if needed) */
++	do {
++		/* Disable link */
++		reg = E1000_READ_REG(hw, E1000_PHY_CTRL);
++		reg |= (E1000_PHY_CTRL_GBE_DISABLE |
++		        E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++		E1000_WRITE_REG(hw, E1000_PHY_CTRL, reg);
++
++		/*
++		 * Call gig speed drop workaround on Giga disable before
++		 * accessing any PHY registers
++		 */
++		if (hw->mac.type == e1000_ich8lan)
++			e1000_gig_downshift_workaround_ich8lan(hw);
++
++		/* Write VR power-down enable */
++		e1000_read_phy_reg(hw, IGP3_VR_CTRL, &data);
++		data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
++		e1000_write_phy_reg(hw,
++		                   IGP3_VR_CTRL,
++		                   data | IGP3_VR_CTRL_MODE_SHUTDOWN);
++
++		/* Read it back and test */
++		e1000_read_phy_reg(hw, IGP3_VR_CTRL, &data);
++		data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
++		if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry)
++			break;
++
++		/* Issue PHY reset and repeat at most one more time */
++		reg = E1000_READ_REG(hw, E1000_CTRL);
++		E1000_WRITE_REG(hw, E1000_CTRL, reg | E1000_CTRL_PHY_RST);
++		retry++;
++	} while (retry);
++
++out:
++	return;
++}
++
++/**
++ *  e1000_gig_downshift_workaround_ich8lan - WoL from S5 stops working
++ *  @hw: pointer to the HW structure
++ *
++ *  Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC),
++ *  LPLU, Giga disable, MDIC PHY reset):
++ *    1) Set Kumeran Near-end loopback
++ *    2) Clear Kumeran Near-end loopback
++ *  Should only be called for ICH8[m] devices with IGP_3 Phy.
++ **/
++void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 reg_data;
++
++	DEBUGFUNC("e1000_gig_downshift_workaround_ich8lan");
++
++	if ((hw->mac.type != e1000_ich8lan) ||
++	    (hw->phy.type != e1000_phy_igp_3))
++		goto out;
++
++	ret_val = e1000_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++	                              &reg_data);
++	if (ret_val)
++		goto out;
++	reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK;
++	ret_val = e1000_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++	                               reg_data);
++	if (ret_val)
++		goto out;
++	reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK;
++	ret_val = e1000_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++	                               reg_data);
++out:
++	return;
++}
++
++/**
++ *  e1000_cleanup_led_ich8lan - Restore the default LED operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the LED back to the default configuration.
++ **/
++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_cleanup_led_ich8lan");
++
++	if (hw->phy.type == e1000_phy_ife)
++		ret_val = e1000_write_phy_reg(hw,
++		                              IFE_PHY_SPECIAL_CONTROL_LED,
++		                              0);
++	else
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_led_on_ich8lan - Turn LED's on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn on the LED's.
++ **/
++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_led_on_ich8lan");
++
++	if (hw->phy.type == e1000_phy_ife)
++		ret_val = e1000_write_phy_reg(hw,
++		                IFE_PHY_SPECIAL_CONTROL_LED,
++		                (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON));
++	else
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_led_off_ich8lan - Turn LED's off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn off the LED's.
++ **/
++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_led_off_ich8lan");
++
++	if (hw->phy.type == e1000_phy_ife)
++		ret_val = e1000_write_phy_reg(hw,
++		               IFE_PHY_SPECIAL_CONTROL_LED,
++		               (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF));
++	else
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cfg_done_ich8lan - Read config done bit
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the management control register for the config done bit for
++ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
++ *  to read the config done bit, so an error is *ONLY* logged and returns
++ *  E1000_SUCCESS.  If we were to return with error, EEPROM-less silicon
++ *  would not be able to be reset or change link.
++ **/
++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	e1000_get_cfg_done_generic(hw);
++
++	/* If EEPROM is not marked present, init the IGP 3 PHY manually */
++	if (((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) == 0) &&
++	    (hw->phy.type == e1000_phy_igp_3)) {
++		e1000_phy_init_script_igp3(hw);
++	}
++
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++
++	return;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_ich8lan - Clear statistical counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears hardware counters specific to the silicon family and calls
++ *  clear_hw_cntrs_generic to clear all general purpose counters.
++ **/
++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_ich8lan");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++
++	temp = E1000_READ_REG(hw, E1000_MGTPRC);
++	temp = E1000_READ_REG(hw, E1000_MGTPDC);
++	temp = E1000_READ_REG(hw, E1000_MGTPTC);
++
++	temp = E1000_READ_REG(hw, E1000_IAC);
++	temp = E1000_READ_REG(hw, E1000_ICRXOC);
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.h	2021-04-07 16:01:27.738633436 +0800
+@@ -0,0 +1,86 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_MAC_H_
++#define _E1000_MAC_H_
++
++/*
++ * Functions that should not be called directly from drivers but can be used
++ * by other files in this 'shared code'
++ */
++s32  e1000_blink_led_generic(struct e1000_hw *hw);
++s32  e1000_check_for_copper_link_generic(struct e1000_hw *hw);
++s32  e1000_check_for_fiber_link_generic(struct e1000_hw *hw);
++s32  e1000_check_for_serdes_link_generic(struct e1000_hw *hw);
++s32  e1000_cleanup_led_generic(struct e1000_hw *hw);
++s32  e1000_commit_fc_settings_generic(struct e1000_hw *hw);
++s32  e1000_config_fc_after_link_up_generic(struct e1000_hw *hw);
++s32  e1000_disable_pcie_master_generic(struct e1000_hw *hw);
++s32  e1000_force_mac_fc_generic(struct e1000_hw *hw);
++s32  e1000_get_auto_rd_done_generic(struct e1000_hw *hw);
++s32  e1000_get_bus_info_pci_generic(struct e1000_hw *hw);
++s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
++s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
++s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
++                                               u16 *duplex);
++s32  e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
++                                                     u16 *speed, u16 *duplex);
++s32  e1000_id_led_init_generic(struct e1000_hw *hw);
++s32  e1000_led_on_generic(struct e1000_hw *hw);
++s32  e1000_led_off_generic(struct e1000_hw *hw);
++void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
++	                               u8 *mc_addr_list, u32 mc_addr_count,
++	                               u32 rar_used_count, u32 rar_count);
++s32  e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw);
++s32  e1000_set_default_fc_generic(struct e1000_hw *hw);
++s32  e1000_set_fc_watermarks_generic(struct e1000_hw *hw);
++s32  e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw);
++s32  e1000_setup_led_generic(struct e1000_hw *hw);
++s32  e1000_setup_link_generic(struct e1000_hw *hw);
++s32  e1000_validate_mdi_setting_generic(struct e1000_hw *hw);
++s32  e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
++                                       u32 offset, u8 data);
++
++u32  e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr);
++
++void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw);
++void e1000_clear_vfta_generic(struct e1000_hw *hw);
++void e1000_config_collision_dist_generic(struct e1000_hw *hw);
++void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
++void e1000_mta_set_generic(struct e1000_hw *hw, u32 hash_value);
++void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw);
++void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
++void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
++s32  e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
++void e1000_remove_device_generic(struct e1000_hw *hw);
++void e1000_reset_adaptive_generic(struct e1000_hw *hw);
++void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
++void e1000_update_adaptive_generic(struct e1000_hw *hw);
++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000.h	2021-04-07 16:01:27.734633442 +0800
+@@ -0,0 +1,425 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++
++/* Linux PRO/1000 Ethernet Driver main header file */
++
++#ifndef _E1000_H_
++#define _E1000_H_
++
++#include "kcompat.h"
++
++#include "e1000_api.h"
++
++#define BAR_0		0
++#define BAR_1		1
++#define BAR_5		5
++
++#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
++	PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
++
++struct e1000_adapter;
++
++#define E1000_DBG(args...)
++
++#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args)
++
++#define PFX "e1000: "
++#define DPRINTK(nlevel, klevel, fmt, args...) \
++	(void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
++	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
++		__FUNCTION__ , ## args))
++
++#define E1000_MAX_INTR 10
++
++/* TX/RX descriptor defines */
++#define E1000_DEFAULT_TXD                  256
++#define E1000_MAX_TXD                      256
++#define E1000_MIN_TXD                       80
++#define E1000_MAX_82544_TXD               4096
++
++#define E1000_DEFAULT_RXD                  256
++#define E1000_MAX_RXD                      256
++
++#define E1000_MIN_RXD                       80
++#define E1000_MAX_82544_RXD               4096
++
++#define E1000_MIN_ITR_USECS                 10 /* 100000 irq/sec */
++#define E1000_MAX_ITR_USECS              10000 /* 100    irq/sec */
++
++#ifdef CONFIG_E1000_MQ
++#define E1000_MAX_TX_QUEUES                  4
++#endif
++
++/* this is the size past which hardware will drop packets when setting LPE=0 */
++#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
++
++/* Supported Rx Buffer Sizes */
++#define E1000_RXBUFFER_128   128    /* Used for packet split */
++#define E1000_RXBUFFER_256   256    /* Used for packet split */
++#define E1000_RXBUFFER_512   512
++#define E1000_RXBUFFER_1024  1024
++#define E1000_RXBUFFER_2048  2048
++#define E1000_RXBUFFER_4096  4096
++#define E1000_RXBUFFER_8192  8192
++#define E1000_RXBUFFER_16384 16384
++
++/* SmartSpeed delimiters */
++#define E1000_SMARTSPEED_DOWNSHIFT 3
++#define E1000_SMARTSPEED_MAX       15
++
++/* Packet Buffer allocations */
++#define E1000_PBA_BYTES_SHIFT 0xA
++#define E1000_TX_HEAD_ADDR_SHIFT 7
++#define E1000_PBA_TX_MASK 0xFFFF0000
++
++/* Early Receive defines */
++#define E1000_ERT_2048 0x100
++
++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */
++
++/* How many Tx Descriptors do we need to call netif_wake_queue ? */
++#define E1000_TX_QUEUE_WAKE	16
++/* How many Rx Buffers do we bundle into one write to the hardware ? */
++#define E1000_RX_BUFFER_WRITE	16	/* Must be power of 2 */
++
++#define AUTO_ALL_MODES            0
++#define E1000_EEPROM_82544_APM    0x0004
++#define E1000_EEPROM_APME         0x0400
++
++#ifndef E1000_MASTER_SLAVE
++/* Switch to override PHY master/slave setting */
++#define E1000_MASTER_SLAVE	e1000_ms_hw_default
++#endif
++
++#ifdef NETIF_F_HW_VLAN_TX
++#define E1000_MNG_VLAN_NONE -1
++#endif
++/* Number of packet split data buffers (not including the header buffer) */
++#define PS_PAGE_BUFFERS MAX_PS_BUFFERS-1
++
++/* wrapper around a pointer to a socket buffer,
++ * so a DMA handle can be stored along with the buffer */
++struct e1000_buffer {
++	struct rtskb *skb;
++	dma_addr_t dma;
++	unsigned long time_stamp;
++	u16 length;
++	u16 next_to_watch;
++};
++
++struct e1000_rx_buffer {
++	struct rtskb *skb;
++	dma_addr_t dma;
++	struct page *page;
++};
++
++#ifdef CONFIG_E1000_MQ
++struct e1000_queue_stats {
++	u64 packets;
++	u64 bytes;
++};
++#endif
++
++struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; };
++struct e1000_ps_page_dma { u64 ps_page_dma[PS_PAGE_BUFFERS]; };
++
++struct e1000_tx_ring {
++	/* pointer to the descriptor ring memory */
++	void *desc;
++	/* physical address of the descriptor ring */
++	dma_addr_t dma;
++	/* length of descriptor ring in bytes */
++	unsigned int size;
++	/* number of descriptors in the ring */
++	unsigned int count;
++	/* next descriptor to associate a buffer with */
++	unsigned int next_to_use;
++	/* next descriptor to check for DD status bit */
++	unsigned int next_to_clean;
++	/* array of buffer information structs */
++	struct e1000_buffer *buffer_info;
++
++#ifdef CONFIG_E1000_MQ
++	/* for tx ring cleanup - needed for multiqueue */
++	spinlock_t tx_queue_lock;
++#endif
++	rtdm_lock_t tx_lock;
++	u16 tdh;
++	u16 tdt;
++#ifdef CONFIG_E1000_MQ
++	struct e1000_queue_stats tx_stats;
++#endif
++	bool last_tx_tso;
++};
++
++struct e1000_rx_ring {
++	struct e1000_adapter *adapter; /* back link */
++	/* pointer to the descriptor ring memory */
++	void *desc;
++	/* physical address of the descriptor ring */
++	dma_addr_t dma;
++	/* length of descriptor ring in bytes */
++	unsigned int size;
++	/* number of descriptors in the ring */
++	unsigned int count;
++	/* next descriptor to associate a buffer with */
++	unsigned int next_to_use;
++	/* next descriptor to check for DD status bit */
++	unsigned int next_to_clean;
++#ifdef CONFIG_E1000_NAPI
++	struct napi_struct napi;
++#endif
++	/* array of buffer information structs */
++	struct e1000_rx_buffer *buffer_info;
++	/* arrays of page information for packet split */
++	struct e1000_ps_page *ps_page;
++	struct e1000_ps_page_dma *ps_page_dma;
++	struct sk_buff *rx_skb_top;
++
++	/* cpu for rx queue */
++	int cpu;
++
++	u16 rdh;
++	u16 rdt;
++#ifdef CONFIG_E1000_MQ
++	struct e1000_queue_stats rx_stats;
++#endif
++};
++
++#define E1000_DESC_UNUSED(R) \
++	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
++	(R)->next_to_clean - (R)->next_to_use - 1)
++
++#define E1000_RX_DESC_PS(R, i)	    \
++	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
++#define E1000_RX_DESC_EXT(R, i)	    \
++	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
++#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
++#define E1000_RX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_rx_desc)
++#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
++#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
++
++#ifdef SIOCGMIIPHY
++/* PHY register snapshot values */
++struct e1000_phy_regs {
++	u16 bmcr;		/* basic mode control register    */
++	u16 bmsr;		/* basic mode status register     */
++	u16 advertise;		/* auto-negotiation advertisement */
++	u16 lpa;		/* link partner ability register  */
++	u16 expansion;		/* auto-negotiation expansion reg */
++	u16 ctrl1000;		/* 1000BASE-T control register    */
++	u16 stat1000;		/* 1000BASE-T status register     */
++	u16 estatus;		/* extended status register       */
++};
++#endif
++
++/* board specific private data structure */
++
++struct e1000_adapter {
++#ifdef NETIF_F_HW_VLAN_TX
++	struct vlan_group *vlgrp;
++	u16 mng_vlan_id;
++#endif
++	u32 bd_number;
++	u32 rx_buffer_len;
++	u32 wol;
++	u32 smartspeed;
++	u32 en_mng_pt;
++	u16 link_speed;
++	u16 link_duplex;
++	rtdm_lock_t  stats_lock;
++#ifdef CONFIG_E1000_NAPI
++	spinlock_t tx_queue_lock;
++#endif
++	atomic_t irq_sem;
++	unsigned int total_tx_bytes;
++	unsigned int total_tx_packets;
++	unsigned int total_rx_bytes;
++	unsigned int total_rx_packets;
++	/* Interrupt Throttle Rate */
++	u32 itr;
++	u32 itr_setting;
++	u16 tx_itr;
++	u16 rx_itr;
++
++	bool fc_autoneg;
++
++#ifdef ETHTOOL_PHYS_ID
++	struct timer_list blink_timer;
++	unsigned long led_status;
++#endif
++
++	/* TX */
++	struct e1000_tx_ring *tx_ring;      /* One per active queue */
++#ifdef CONFIG_E1000_MQ
++	struct e1000_tx_ring **cpu_tx_ring; /* per-cpu */
++#endif
++	unsigned int restart_queue;
++	unsigned long tx_queue_len;
++	u32 txd_cmd;
++	u32 tx_int_delay;
++	u32 tx_abs_int_delay;
++	u32 gotc;
++	u64 gotc_old;
++	u64 tpt_old;
++	u64 colc_old;
++	u32 tx_timeout_count;
++	u32 tx_fifo_head;
++	u32 tx_head_addr;
++	u32 tx_fifo_size;
++	u8 tx_timeout_factor;
++	atomic_t tx_fifo_stall;
++	bool pcix_82544;
++	bool detect_tx_hung;
++
++	/* RX */
++#ifdef CONFIG_E1000_NAPI
++	bool (*clean_rx) (struct e1000_adapter *adapter,
++			       struct e1000_rx_ring *rx_ring,
++			       int *work_done, int work_to_do);
++#else
++	bool (*clean_rx) (struct e1000_adapter *adapter,
++			       struct e1000_rx_ring *rx_ring,
++			       nanosecs_abs_t *time_stamp);
++#endif
++	void (*alloc_rx_buf) (struct e1000_adapter *adapter,
++			      struct e1000_rx_ring *rx_ring,
++				int cleaned_count);
++	struct e1000_rx_ring *rx_ring;      /* One per active queue */
++#ifdef CONFIG_E1000_NAPI
++	//struct napi_struct napi;
++#endif
++	int num_tx_queues;
++	int num_rx_queues;
++
++	u64 hw_csum_err;
++	u64 hw_csum_good;
++	u64 rx_hdr_split;
++	u32 alloc_rx_buff_failed;
++	u32 rx_int_delay;
++	u32 rx_abs_int_delay;
++	bool rx_csum;
++	unsigned int rx_ps_pages;
++	u32 gorc;
++	u64 gorc_old;
++	u16 rx_ps_bsize0;
++	u32 max_frame_size;
++	u32 min_frame_size;
++
++
++	/* OS defined structs */
++	struct rtnet_device *netdev;
++	struct pci_dev *pdev;
++	struct net_device_stats net_stats;
++
++	rtdm_irq_t irq_handle;
++	char  data_received;
++
++	/* structs defined in e1000_hw.h */
++	struct e1000_hw hw;
++	struct e1000_hw_stats stats;
++	struct e1000_phy_info phy_info;
++	struct e1000_phy_stats phy_stats;
++
++#ifdef SIOCGMIIPHY
++	/* Snapshot of PHY registers */
++	struct e1000_phy_regs phy_regs;
++#endif
++
++#ifdef ETHTOOL_TEST
++	u32 test_icr;
++	struct e1000_tx_ring test_tx_ring;
++	struct e1000_rx_ring test_rx_ring;
++#endif
++
++
++	int msg_enable;
++	/* to not mess up cache alignment, always add to the bottom */
++	unsigned long state;
++	u32 eeprom_wol;
++
++	u32 *config_space;
++
++	/* hardware capability, feature, and workaround flags */
++	unsigned int flags;
++
++	struct work_struct reset_task;
++	struct delayed_work watchdog_task;
++	struct delayed_work fifo_stall_task;
++	struct delayed_work phy_info_task;
++};
++
++#define E1000_FLAG_HAS_SMBUS                (1 << 0)
++#define E1000_FLAG_HAS_MANC2H               (1 << 1)
++#define E1000_FLAG_HAS_MSI                  (1 << 2)
++#define E1000_FLAG_MSI_ENABLED              (1 << 3)
++#define E1000_FLAG_HAS_INTR_MODERATION      (1 << 4)
++#define E1000_FLAG_RX_NEEDS_RESTART         (1 << 5)
++#define E1000_FLAG_BAD_TX_CARRIER_STATS_FD  (1 << 6)
++#define E1000_FLAG_INT_ASSERT_AUTO_MASK     (1 << 7)
++#define E1000_FLAG_QUAD_PORT_A              (1 << 8)
++#define E1000_FLAG_SMART_POWER_DOWN         (1 << 9)
++#ifdef NETIF_F_TSO
++#define E1000_FLAG_HAS_TSO                  (1 << 10)
++#ifdef NETIF_F_TSO6
++#define E1000_FLAG_HAS_TSO6                 (1 << 11)
++#endif
++#define E1000_FLAG_TSO_FORCE                (1 << 12)
++#endif
++#define E1000_FLAG_RX_RESTART_NOW           (1 << 13)
++
++enum e1000_state_t {
++	__E1000_TESTING,
++	__E1000_RESETTING,
++	__E1000_DOWN
++};
++
++extern char e1000_driver_name[];
++extern const char e1000_driver_version[];
++
++extern void e1000_power_up_phy(struct e1000_hw *hw);
++
++extern void e1000_set_ethtool_ops(struct net_device *netdev);
++extern void e1000_check_options(struct e1000_adapter *adapter);
++
++extern int e1000_up(struct e1000_adapter *adapter);
++extern void e1000_down(struct e1000_adapter *adapter);
++extern void e1000_reinit_locked(struct e1000_adapter *adapter);
++extern void e1000_reset(struct e1000_adapter *adapter);
++extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, u16 spddplx);
++extern int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
++extern int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
++extern void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
++extern void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
++extern void e1000_update_stats(struct e1000_adapter *adapter);
++#ifdef ETHTOOL_OPS_COMPAT
++extern int ethtool_ioctl(struct ifreq *ifr);
++#endif
++
++#endif /* _E1000_H_ */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_hw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_hw.h	2021-04-07 16:01:27.729633449 +0800
+@@ -0,0 +1,711 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_HW_H_
++#define _E1000_HW_H_
++
++#include "e1000_osdep.h"
++#include "e1000_regs.h"
++#include "e1000_defines.h"
++
++struct e1000_hw;
++
++#define E1000_DEV_ID_82542                    0x1000
++#define E1000_DEV_ID_82543GC_FIBER            0x1001
++#define E1000_DEV_ID_82543GC_COPPER           0x1004
++#define E1000_DEV_ID_82544EI_COPPER           0x1008
++#define E1000_DEV_ID_82544EI_FIBER            0x1009
++#define E1000_DEV_ID_82544GC_COPPER           0x100C
++#define E1000_DEV_ID_82544GC_LOM              0x100D
++#define E1000_DEV_ID_82540EM                  0x100E
++#define E1000_DEV_ID_82540EM_LOM              0x1015
++#define E1000_DEV_ID_82540EP_LOM              0x1016
++#define E1000_DEV_ID_82540EP                  0x1017
++#define E1000_DEV_ID_82540EP_LP               0x101E
++#define E1000_DEV_ID_82545EM_COPPER           0x100F
++#define E1000_DEV_ID_82545EM_FIBER            0x1011
++#define E1000_DEV_ID_82545GM_COPPER           0x1026
++#define E1000_DEV_ID_82545GM_FIBER            0x1027
++#define E1000_DEV_ID_82545GM_SERDES           0x1028
++#define E1000_DEV_ID_82546EB_COPPER           0x1010
++#define E1000_DEV_ID_82546EB_FIBER            0x1012
++#define E1000_DEV_ID_82546EB_QUAD_COPPER      0x101D
++#define E1000_DEV_ID_82546GB_COPPER           0x1079
++#define E1000_DEV_ID_82546GB_FIBER            0x107A
++#define E1000_DEV_ID_82546GB_SERDES           0x107B
++#define E1000_DEV_ID_82546GB_PCIE             0x108A
++#define E1000_DEV_ID_82546GB_QUAD_COPPER      0x1099
++#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
++#define E1000_DEV_ID_82541EI                  0x1013
++#define E1000_DEV_ID_82541EI_MOBILE           0x1018
++#define E1000_DEV_ID_82541ER_LOM              0x1014
++#define E1000_DEV_ID_82541ER                  0x1078
++#define E1000_DEV_ID_82541GI                  0x1076
++#define E1000_DEV_ID_82541GI_LF               0x107C
++#define E1000_DEV_ID_82541GI_MOBILE           0x1077
++#define E1000_DEV_ID_82547EI                  0x1019
++#define E1000_DEV_ID_82547EI_MOBILE           0x101A
++#define E1000_DEV_ID_82547GI                  0x1075
++#define E1000_DEV_ID_82571EB_COPPER           0x105E
++#define E1000_DEV_ID_82571EB_FIBER            0x105F
++#define E1000_DEV_ID_82571EB_SERDES           0x1060
++#define E1000_DEV_ID_82571EB_SERDES_DUAL      0x10D9
++#define E1000_DEV_ID_82571EB_SERDES_QUAD      0x10DA
++#define E1000_DEV_ID_82571EB_QUAD_COPPER      0x10A4
++#define E1000_DEV_ID_82571PT_QUAD_COPPER      0x10D5
++#define E1000_DEV_ID_82571EB_QUAD_FIBER       0x10A5
++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP   0x10BC
++#define E1000_DEV_ID_82572EI_COPPER           0x107D
++#define E1000_DEV_ID_82572EI_FIBER            0x107E
++#define E1000_DEV_ID_82572EI_SERDES           0x107F
++#define E1000_DEV_ID_82572EI                  0x10B9
++#define E1000_DEV_ID_82573E                   0x108B
++#define E1000_DEV_ID_82573E_IAMT              0x108C
++#define E1000_DEV_ID_82573L                   0x109A
++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT   0x1096
++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT   0x1098
++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT   0x10BA
++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT   0x10BB
++#define E1000_DEV_ID_ICH8_IGP_M_AMT           0x1049
++#define E1000_DEV_ID_ICH8_IGP_AMT             0x104A
++#define E1000_DEV_ID_ICH8_IGP_C               0x104B
++#define E1000_DEV_ID_ICH8_IFE                 0x104C
++#define E1000_DEV_ID_ICH8_IFE_GT              0x10C4
++#define E1000_DEV_ID_ICH8_IFE_G               0x10C5
++#define E1000_DEV_ID_ICH8_IGP_M               0x104D
++#define E1000_DEV_ID_ICH9_IGP_AMT             0x10BD
++#define E1000_DEV_ID_ICH9_IGP_C               0x294C
++#define E1000_DEV_ID_ICH9_IFE                 0x10C0
++#define E1000_DEV_ID_ICH9_IFE_GT              0x10C3
++#define E1000_DEV_ID_ICH9_IFE_G               0x10C2
++
++#define E1000_REVISION_0 0
++#define E1000_REVISION_1 1
++#define E1000_REVISION_2 2
++#define E1000_REVISION_3 3
++#define E1000_REVISION_4 4
++
++#define E1000_FUNC_0     0
++#define E1000_FUNC_1     1
++
++typedef enum {
++	e1000_undefined = 0,
++	e1000_82542,
++	e1000_82543,
++	e1000_82544,
++	e1000_82540,
++	e1000_82545,
++	e1000_82545_rev_3,
++	e1000_82546,
++	e1000_82546_rev_3,
++	e1000_82541,
++	e1000_82541_rev_2,
++	e1000_82547,
++	e1000_82547_rev_2,
++	e1000_82571,
++	e1000_82572,
++	e1000_82573,
++	e1000_80003es2lan,
++	e1000_ich8lan,
++	e1000_ich9lan,
++	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
++} e1000_mac_type;
++
++typedef enum {
++	e1000_media_type_unknown = 0,
++	e1000_media_type_copper = 1,
++	e1000_media_type_fiber = 2,
++	e1000_media_type_internal_serdes = 3,
++	e1000_num_media_types
++} e1000_media_type;
++
++typedef enum {
++	e1000_nvm_unknown = 0,
++	e1000_nvm_none,
++	e1000_nvm_eeprom_spi,
++	e1000_nvm_eeprom_microwire,
++	e1000_nvm_flash_hw,
++	e1000_nvm_flash_sw
++} e1000_nvm_type;
++
++typedef enum {
++	e1000_nvm_override_none = 0,
++	e1000_nvm_override_spi_small,
++	e1000_nvm_override_spi_large,
++	e1000_nvm_override_microwire_small,
++	e1000_nvm_override_microwire_large
++} e1000_nvm_override;
++
++typedef enum {
++	e1000_phy_unknown = 0,
++	e1000_phy_none,
++	e1000_phy_m88,
++	e1000_phy_igp,
++	e1000_phy_igp_2,
++	e1000_phy_gg82563,
++	e1000_phy_igp_3,
++	e1000_phy_ife,
++} e1000_phy_type;
++
++typedef enum {
++	e1000_bus_type_unknown = 0,
++	e1000_bus_type_pci,
++	e1000_bus_type_pcix,
++	e1000_bus_type_pci_express,
++	e1000_bus_type_reserved
++} e1000_bus_type;
++
++typedef enum {
++	e1000_bus_speed_unknown = 0,
++	e1000_bus_speed_33,
++	e1000_bus_speed_66,
++	e1000_bus_speed_100,
++	e1000_bus_speed_120,
++	e1000_bus_speed_133,
++	e1000_bus_speed_2500,
++	e1000_bus_speed_5000,
++	e1000_bus_speed_reserved
++} e1000_bus_speed;
++
++typedef enum {
++	e1000_bus_width_unknown = 0,
++	e1000_bus_width_pcie_x1,
++	e1000_bus_width_pcie_x2,
++	e1000_bus_width_pcie_x4 = 4,
++	e1000_bus_width_pcie_x8 = 8,
++	e1000_bus_width_32,
++	e1000_bus_width_64,
++	e1000_bus_width_reserved
++} e1000_bus_width;
++
++typedef enum {
++	e1000_1000t_rx_status_not_ok = 0,
++	e1000_1000t_rx_status_ok,
++	e1000_1000t_rx_status_undefined = 0xFF
++} e1000_1000t_rx_status;
++
++typedef enum {
++	e1000_rev_polarity_normal = 0,
++	e1000_rev_polarity_reversed,
++	e1000_rev_polarity_undefined = 0xFF
++} e1000_rev_polarity;
++
++typedef enum {
++	e1000_fc_none = 0,
++	e1000_fc_rx_pause,
++	e1000_fc_tx_pause,
++	e1000_fc_full,
++	e1000_fc_default = 0xFF
++} e1000_fc_type;
++
++typedef enum {
++	e1000_ffe_config_enabled = 0,
++	e1000_ffe_config_active,
++	e1000_ffe_config_blocked
++} e1000_ffe_config;
++
++typedef enum {
++	e1000_dsp_config_disabled = 0,
++	e1000_dsp_config_enabled,
++	e1000_dsp_config_activated,
++	e1000_dsp_config_undefined = 0xFF
++} e1000_dsp_config;
++
++/* Receive Descriptor */
++struct e1000_rx_desc {
++	u64 buffer_addr; /* Address of the descriptor's data buffer */
++	u16 length;      /* Length of data DMAed into data buffer */
++	u16 csum;        /* Packet checksum */
++	u8  status;      /* Descriptor status */
++	u8  errors;      /* Descriptor Errors */
++	u16 special;
++};
++
++/* Receive Descriptor - Extended */
++union e1000_rx_desc_extended {
++	struct {
++		u64 buffer_addr;
++		u64 reserved;
++	} read;
++	struct {
++		struct {
++			u32 mrq;              /* Multiple Rx Queues */
++			union {
++				u32 rss;            /* RSS Hash */
++				struct {
++					u16 ip_id;  /* IP id */
++					u16 csum;   /* Packet Checksum */
++				} csum_ip;
++			} hi_dword;
++		} lower;
++		struct {
++			u32 status_error;     /* ext status/error */
++			u16 length;
++			u16 vlan;             /* VLAN tag */
++		} upper;
++	} wb;  /* writeback */
++};
++
++#define MAX_PS_BUFFERS 4
++/* Receive Descriptor - Packet Split */
++union e1000_rx_desc_packet_split {
++	struct {
++		/* one buffer for protocol header(s), three data buffers */
++		u64 buffer_addr[MAX_PS_BUFFERS];
++	} read;
++	struct {
++		struct {
++			u32 mrq;              /* Multiple Rx Queues */
++			union {
++				u32 rss;              /* RSS Hash */
++				struct {
++					u16 ip_id;    /* IP id */
++					u16 csum;     /* Packet Checksum */
++				} csum_ip;
++			} hi_dword;
++		} lower;
++		struct {
++			u32 status_error;     /* ext status/error */
++			u16 length0;          /* length of buffer 0 */
++			u16 vlan;             /* VLAN tag */
++		} middle;
++		struct {
++			u16 header_status;
++			u16 length[3];        /* length of buffers 1-3 */
++		} upper;
++		u64 reserved;
++	} wb; /* writeback */
++};
++
++/* Transmit Descriptor */
++struct e1000_tx_desc {
++	u64 buffer_addr;      /* Address of the descriptor's data buffer */
++	union {
++		u32 data;
++		struct {
++			u16 length;    /* Data buffer length */
++			u8 cso;        /* Checksum offset */
++			u8 cmd;        /* Descriptor control */
++		} flags;
++	} lower;
++	union {
++		u32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 css;        /* Checksum start */
++			u16 special;
++		} fields;
++	} upper;
++};
++
++/* Offload Context Descriptor */
++struct e1000_context_desc {
++	union {
++		u32 ip_config;
++		struct {
++			u8 ipcss;      /* IP checksum start */
++			u8 ipcso;      /* IP checksum offset */
++			u16 ipcse;     /* IP checksum end */
++		} ip_fields;
++	} lower_setup;
++	union {
++		u32 tcp_config;
++		struct {
++			u8 tucss;      /* TCP checksum start */
++			u8 tucso;      /* TCP checksum offset */
++			u16 tucse;     /* TCP checksum end */
++		} tcp_fields;
++	} upper_setup;
++	u32 cmd_and_length;
++	union {
++		u32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 hdr_len;    /* Header length */
++			u16 mss;       /* Maximum segment size */
++		} fields;
++	} tcp_seg_setup;
++};
++
++/* Offload data descriptor */
++struct e1000_data_desc {
++	u64 buffer_addr;   /* Address of the descriptor's buffer address */
++	union {
++		u32 data;
++		struct {
++			u16 length;    /* Data buffer length */
++			u8 typ_len_ext;
++			u8 cmd;
++		} flags;
++	} lower;
++	union {
++		u32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 popts;      /* Packet Options */
++			u16 special;
++		} fields;
++	} upper;
++};
++
++/* Statistics counters collected by the MAC */
++struct e1000_hw_stats {
++	u64 crcerrs;
++	u64 algnerrc;
++	u64 symerrs;
++	u64 rxerrc;
++	u64 mpc;
++	u64 scc;
++	u64 ecol;
++	u64 mcc;
++	u64 latecol;
++	u64 colc;
++	u64 dc;
++	u64 tncrs;
++	u64 sec;
++	u64 cexterr;
++	u64 rlec;
++	u64 xonrxc;
++	u64 xontxc;
++	u64 xoffrxc;
++	u64 xofftxc;
++	u64 fcruc;
++	u64 prc64;
++	u64 prc127;
++	u64 prc255;
++	u64 prc511;
++	u64 prc1023;
++	u64 prc1522;
++	u64 gprc;
++	u64 bprc;
++	u64 mprc;
++	u64 gptc;
++	u64 gorc;
++	u64 gotc;
++	u64 rnbc;
++	u64 ruc;
++	u64 rfc;
++	u64 roc;
++	u64 rjc;
++	u64 mgprc;
++	u64 mgpdc;
++	u64 mgptc;
++	u64 tor;
++	u64 tot;
++	u64 tpr;
++	u64 tpt;
++	u64 ptc64;
++	u64 ptc127;
++	u64 ptc255;
++	u64 ptc511;
++	u64 ptc1023;
++	u64 ptc1522;
++	u64 mptc;
++	u64 bptc;
++	u64 tsctc;
++	u64 tsctfc;
++	u64 iac;
++	u64 icrxptc;
++	u64 icrxatc;
++	u64 ictxptc;
++	u64 ictxatc;
++	u64 ictxqec;
++	u64 ictxqmtc;
++	u64 icrxdmtc;
++	u64 icrxoc;
++	u64 cbtmpc;
++	u64 htdpmc;
++	u64 cbrdpc;
++	u64 cbrmpc;
++	u64 rpthc;
++	u64 hgptc;
++	u64 htcbdpc;
++	u64 hgorc;
++	u64 hgotc;
++	u64 lenerrs;
++	u64 scvpc;
++	u64 hrmpc;
++};
++
++struct e1000_phy_stats {
++	u32 idle_errors;
++	u32 receive_errors;
++};
++
++struct e1000_host_mng_dhcp_cookie {
++	u32 signature;
++	u8  status;
++	u8  reserved0;
++	u16 vlan_id;
++	u32 reserved1;
++	u16 reserved2;
++	u8  reserved3;
++	u8  checksum;
++};
++
++/* Host Interface "Rev 1" */
++struct e1000_host_command_header {
++	u8 command_id;
++	u8 command_length;
++	u8 command_options;
++	u8 checksum;
++};
++
++#define E1000_HI_MAX_DATA_LENGTH     252
++struct e1000_host_command_info {
++	struct e1000_host_command_header command_header;
++	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
++};
++
++/* Host Interface "Rev 2" */
++struct e1000_host_mng_command_header {
++	u8  command_id;
++	u8  checksum;
++	u16 reserved1;
++	u16 reserved2;
++	u16 command_length;
++};
++
++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
++struct e1000_host_mng_command_info {
++	struct e1000_host_mng_command_header command_header;
++	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
++};
++
++#include "e1000_mac.h"
++#include "e1000_phy.h"
++#include "e1000_nvm.h"
++#include "e1000_manage.h"
++
++struct e1000_functions {
++	/* Function pointers for the MAC. */
++	s32  (*init_mac_params)(struct e1000_hw *);
++	s32  (*blink_led)(struct e1000_hw *);
++	s32  (*check_for_link)(struct e1000_hw *);
++	bool (*check_mng_mode)(struct e1000_hw *hw);
++	s32  (*cleanup_led)(struct e1000_hw *);
++	void (*clear_hw_cntrs)(struct e1000_hw *);
++	void (*clear_vfta)(struct e1000_hw *);
++	s32  (*get_bus_info)(struct e1000_hw *);
++	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
++	s32  (*led_on)(struct e1000_hw *);
++	s32  (*led_off)(struct e1000_hw *);
++	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32, u32,
++	                            u32);
++	void (*remove_device)(struct e1000_hw *);
++	s32  (*reset_hw)(struct e1000_hw *);
++	s32  (*init_hw)(struct e1000_hw *);
++	s32  (*setup_link)(struct e1000_hw *);
++	s32  (*setup_physical_interface)(struct e1000_hw *);
++	s32  (*setup_led)(struct e1000_hw *);
++	void (*write_vfta)(struct e1000_hw *, u32, u32);
++	void (*mta_set)(struct e1000_hw *, u32);
++	void (*config_collision_dist)(struct e1000_hw*);
++	void (*rar_set)(struct e1000_hw*, u8*, u32);
++	s32  (*read_mac_addr)(struct e1000_hw*);
++	s32  (*validate_mdi_setting)(struct e1000_hw*);
++	s32  (*mng_host_if_write)(struct e1000_hw*, u8*, u16, u16, u8*);
++	s32  (*mng_write_cmd_header)(struct e1000_hw *hw,
++                      struct e1000_host_mng_command_header*);
++	s32  (*mng_enable_host_if)(struct e1000_hw*);
++	s32  (*wait_autoneg)(struct e1000_hw*);
++
++	/* Function pointers for the PHY. */
++	s32  (*init_phy_params)(struct e1000_hw *);
++	s32  (*acquire_phy)(struct e1000_hw *);
++	s32  (*check_polarity)(struct e1000_hw *);
++	s32  (*check_reset_block)(struct e1000_hw *);
++	s32  (*commit_phy)(struct e1000_hw *);
++	s32  (*force_speed_duplex)(struct e1000_hw *);
++	s32  (*get_cfg_done)(struct e1000_hw *hw);
++	s32  (*get_cable_length)(struct e1000_hw *);
++	s32  (*get_phy_info)(struct e1000_hw *);
++	s32  (*read_phy_reg)(struct e1000_hw *, u32, u16 *);
++	void (*release_phy)(struct e1000_hw *);
++	s32  (*reset_phy)(struct e1000_hw *);
++	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
++	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
++	s32  (*write_phy_reg)(struct e1000_hw *, u32, u16);
++	void (*power_up_phy)(struct e1000_hw *);
++	void (*power_down_phy)(struct e1000_hw *);
++
++	/* Function pointers for the NVM. */
++	s32  (*init_nvm_params)(struct e1000_hw *);
++	s32  (*acquire_nvm)(struct e1000_hw *);
++	s32  (*read_nvm)(struct e1000_hw *, u16, u16, u16 *);
++	void (*release_nvm)(struct e1000_hw *);
++	void (*reload_nvm)(struct e1000_hw *);
++	s32  (*update_nvm)(struct e1000_hw *);
++	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
++	s32  (*validate_nvm)(struct e1000_hw *);
++	s32  (*write_nvm)(struct e1000_hw *, u16, u16, u16 *);
++};
++
++struct e1000_mac_info {
++	u8 addr[6];
++	u8 perm_addr[6];
++
++	e1000_mac_type type;
++
++	u32 collision_delta;
++	u32 ledctl_default;
++	u32 ledctl_mode1;
++	u32 ledctl_mode2;
++	u32 mc_filter_type;
++	u32 tx_packet_delta;
++	u32 txcw;
++
++	u16 current_ifs_val;
++	u16 ifs_max_val;
++	u16 ifs_min_val;
++	u16 ifs_ratio;
++	u16 ifs_step_size;
++	u16 mta_reg_count;
++	u16 rar_entry_count;
++
++	u8  forced_speed_duplex;
++
++	bool adaptive_ifs;
++	bool arc_subsystem_valid;
++	bool asf_firmware_present;
++	bool autoneg;
++	bool autoneg_failed;
++	bool disable_av;
++	bool disable_hw_init_bits;
++	bool get_link_status;
++	bool ifs_params_forced;
++	bool in_ifs_mode;
++	bool report_tx_early;
++	bool serdes_has_link;
++	bool tx_pkt_filtering;
++};
++
++struct e1000_phy_info {
++	e1000_phy_type type;
++
++	e1000_1000t_rx_status local_rx;
++	e1000_1000t_rx_status remote_rx;
++	e1000_ms_type ms_type;
++	e1000_ms_type original_ms_type;
++	e1000_rev_polarity cable_polarity;
++	e1000_smart_speed smart_speed;
++
++	u32 addr;
++	u32 id;
++	u32 reset_delay_us; /* in usec */
++	u32 revision;
++
++	e1000_media_type media_type;
++
++	u16 autoneg_advertised;
++	u16 autoneg_mask;
++	u16 cable_length;
++	u16 max_cable_length;
++	u16 min_cable_length;
++
++	u8 mdix;
++
++	bool disable_polarity_correction;
++	bool is_mdix;
++	bool polarity_correction;
++	bool reset_disable;
++	bool speed_downgraded;
++	bool autoneg_wait_to_complete;
++};
++
++struct e1000_nvm_info {
++	e1000_nvm_type type;
++	e1000_nvm_override override;
++
++	u32 flash_bank_size;
++	u32 flash_base_addr;
++
++	u16 word_size;
++	u16 delay_usec;
++	u16 address_bits;
++	u16 opcode_bits;
++	u16 page_size;
++};
++
++struct e1000_bus_info {
++	e1000_bus_type type;
++	e1000_bus_speed speed;
++	e1000_bus_width width;
++
++	u32 snoop;
++
++	u16 func;
++	u16 pci_cmd_word;
++};
++
++struct e1000_fc_info {
++	u32 high_water;     /* Flow control high-water mark */
++	u32 low_water;      /* Flow control low-water mark */
++	u16 pause_time;     /* Flow control pause timer */
++	bool send_xon;      /* Flow control send XON */
++	bool strict_ieee;   /* Strict IEEE mode */
++	e1000_fc_type type; /* Type of flow control */
++	e1000_fc_type original_type;
++};
++
++struct e1000_hw {
++	void *back;
++	void *dev_spec;
++
++	u8 __iomem *hw_addr;
++	u8 __iomem *flash_address;
++	unsigned long io_base;
++
++	struct e1000_functions func;
++	struct e1000_mac_info  mac;
++	struct e1000_fc_info   fc;
++	struct e1000_phy_info  phy;
++	struct e1000_nvm_info  nvm;
++	struct e1000_bus_info  bus;
++	struct e1000_host_mng_dhcp_cookie mng_cookie;
++
++	u32 dev_spec_size;
++
++	u16 device_id;
++	u16 subsystem_vendor_id;
++	u16 subsystem_device_id;
++	u16 vendor_id;
++
++	u8  revision_id;
++};
++
++/* These functions must be implemented by drivers */
++void e1000_pci_clear_mwi(struct e1000_hw *hw);
++void e1000_pci_set_mwi(struct e1000_hw *hw);
++s32  e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, u32 size);
++s32  e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
++void e1000_free_dev_spec_struct(struct e1000_hw *hw);
++void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
++void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ich8lan.h	2021-04-07 16:01:27.724633456 +0800
+@@ -0,0 +1,110 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_ICH8LAN_H_
++#define _E1000_ICH8LAN_H_
++
++#define ICH_FLASH_GFPREG                 0x0000
++#define ICH_FLASH_HSFSTS                 0x0004
++#define ICH_FLASH_HSFCTL                 0x0006
++#define ICH_FLASH_FADDR                  0x0008
++#define ICH_FLASH_FDATA0                 0x0010
++
++#define ICH_FLASH_READ_COMMAND_TIMEOUT   500
++#define ICH_FLASH_WRITE_COMMAND_TIMEOUT  500
++#define ICH_FLASH_ERASE_COMMAND_TIMEOUT  3000000
++#define ICH_FLASH_LINEAR_ADDR_MASK       0x00FFFFFF
++#define ICH_FLASH_CYCLE_REPEAT_COUNT     10
++
++#define ICH_CYCLE_READ                   0
++#define ICH_CYCLE_WRITE                  2
++#define ICH_CYCLE_ERASE                  3
++
++#define FLASH_GFPREG_BASE_MASK           0x1FFF
++#define FLASH_SECTOR_ADDR_SHIFT          12
++
++#define E1000_SHADOW_RAM_WORDS           2048
++
++#define ICH_FLASH_SEG_SIZE_256           256
++#define ICH_FLASH_SEG_SIZE_4K            4096
++#define ICH_FLASH_SEG_SIZE_8K            8192
++#define ICH_FLASH_SEG_SIZE_64K           65536
++#define ICH_FLASH_SECTOR_SIZE            4096
++
++#define ICH_FLASH_REG_MAPSIZE            0x00A0
++
++#define E1000_ICH_FWSM_RSPCIPHY          0x00000040 /* Reset PHY on PCI Reset */
++#define E1000_ICH_FWSM_DISSW             0x10000000 /* FW Disables SW Writes */
++/* FW established a valid mode */
++#define E1000_ICH_FWSM_FW_VALID          0x00008000
++
++#define E1000_ICH_MNG_IAMT_MODE          0x2
++
++#define ID_LED_DEFAULT_ICH8LAN  ((ID_LED_DEF1_DEF2 << 12) | \
++                                 (ID_LED_DEF1_OFF2 <<  8) | \
++                                 (ID_LED_DEF1_ON2  <<  4) | \
++                                 (ID_LED_DEF1_DEF2))
++
++#define E1000_ICH_NVM_SIG_WORD           0x13
++#define E1000_ICH_NVM_SIG_MASK           0xC000
++
++#define E1000_ICH8_LAN_INIT_TIMEOUT      1500
++
++#define E1000_FEXTNVM_SW_CONFIG        1
++#define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M */
++
++#define PCIE_ICH8_SNOOP_ALL   PCIE_NO_SNOOP_ALL
++
++#define E1000_ICH_RAR_ENTRIES            7
++
++#define PHY_PAGE_SHIFT 5
++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \
++                           ((reg) & MAX_PHY_REG_ADDRESS))
++#define IGP3_KMRN_DIAG  PHY_REG(770, 19) /* KMRN Diagnostic */
++#define IGP3_VR_CTRL    PHY_REG(776, 18) /* Voltage Regulator Control */
++#define IGP3_CAPABILITY PHY_REG(776, 19) /* Capability */
++#define IGP3_PM_CTRL    PHY_REG(769, 20) /* Power Management Control */
++
++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS         0x0002
++#define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK 0x0300
++#define IGP3_VR_CTRL_MODE_SHUTDOWN           0x0200
++#define IGP3_PM_CTRL_FORCE_PWR_DOWN          0x0020
++
++/*
++ * Additional interrupts need to be handled for ICH family:
++ *  DSW = The FW changed the status of the DISSW bit in FWSM
++ *  PHYINT = The LAN connected device generates an interrupt
++ *  EPRST = Manageability reset event
++ */
++#define IMS_ICH_ENABLE_MASK (\
++    E1000_IMS_DSW   | \
++    E1000_IMS_PHYINT | \
++    E1000_IMS_EPRST)
++
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/Makefile	2021-04-07 16:01:27.719633463 +0800
+@@ -0,0 +1,19 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000_NEW) += rt_e1000_new.o
++
++rt_e1000_new-y := \
++	e1000_80003es2lan.o \
++	e1000_82540.o \
++	e1000_82541.o \
++	e1000_82542.o \
++	e1000_82543.o \
++	e1000_82571.o \
++	e1000_api.o \
++	e1000_ich8lan.o \
++	e1000_mac.o \
++	e1000_main.o \
++	e1000_manage.o \
++	e1000_nvm.o \
++	e1000_param.o \
++	e1000_phy.o
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.c	2021-04-07 16:01:27.715633469 +0800
+@@ -0,0 +1,1164 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000_api.h"
++#include "e1000_mac.h"
++#include "e1000_nvm.h"
++#include "e1000_phy.h"
++
++/**
++ *  e1000_init_mac_params - Initialize MAC function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  This function initializes the function pointers for the MAC
++ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
++ **/
++s32 e1000_init_mac_params(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	if (hw->func.init_mac_params) {
++		ret_val = hw->func.init_mac_params(hw);
++		if (ret_val) {
++			DEBUGOUT("MAC Initialization Error\n");
++			goto out;
++		}
++	} else {
++		DEBUGOUT("mac.init_mac_params was NULL\n");
++		ret_val = -E1000_ERR_CONFIG;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params - Initialize NVM function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  This function initializes the function pointers for the NVM
++ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
++ **/
++s32 e1000_init_nvm_params(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	if (hw->func.init_nvm_params) {
++		ret_val = hw->func.init_nvm_params(hw);
++		if (ret_val) {
++			DEBUGOUT("NVM Initialization Error\n");
++			goto out;
++		}
++	} else {
++		DEBUGOUT("nvm.init_nvm_params was NULL\n");
++		ret_val = -E1000_ERR_CONFIG;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_phy_params - Initialize PHY function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  This function initializes the function pointers for the PHY
++ *  set of functions.  Called by drivers or by e1000_setup_init_funcs.
++ **/
++s32 e1000_init_phy_params(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	if (hw->func.init_phy_params) {
++		ret_val = hw->func.init_phy_params(hw);
++		if (ret_val) {
++			DEBUGOUT("PHY Initialization Error\n");
++			goto out;
++		}
++	} else {
++		DEBUGOUT("phy.init_phy_params was NULL\n");
++		ret_val =  -E1000_ERR_CONFIG;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_mac_type - Sets MAC type
++ *  @hw: pointer to the HW structure
++ *
++ *  This function sets the mac type of the adapter based on the
++ *  device ID stored in the hw structure.
++ *  MUST BE FIRST FUNCTION CALLED (explicitly or through
++ *  e1000_setup_init_funcs()).
++ **/
++s32 e1000_set_mac_type(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_set_mac_type");
++
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82542:
++		mac->type = e1000_82542;
++		break;
++	case E1000_DEV_ID_82543GC_FIBER:
++	case E1000_DEV_ID_82543GC_COPPER:
++		mac->type = e1000_82543;
++		break;
++	case E1000_DEV_ID_82544EI_COPPER:
++	case E1000_DEV_ID_82544EI_FIBER:
++	case E1000_DEV_ID_82544GC_COPPER:
++	case E1000_DEV_ID_82544GC_LOM:
++		mac->type = e1000_82544;
++		break;
++	case E1000_DEV_ID_82540EM:
++	case E1000_DEV_ID_82540EM_LOM:
++	case E1000_DEV_ID_82540EP:
++	case E1000_DEV_ID_82540EP_LOM:
++	case E1000_DEV_ID_82540EP_LP:
++		mac->type = e1000_82540;
++		break;
++	case E1000_DEV_ID_82545EM_COPPER:
++	case E1000_DEV_ID_82545EM_FIBER:
++		mac->type = e1000_82545;
++		break;
++	case E1000_DEV_ID_82545GM_COPPER:
++	case E1000_DEV_ID_82545GM_FIBER:
++	case E1000_DEV_ID_82545GM_SERDES:
++		mac->type = e1000_82545_rev_3;
++		break;
++	case E1000_DEV_ID_82546EB_COPPER:
++	case E1000_DEV_ID_82546EB_FIBER:
++	case E1000_DEV_ID_82546EB_QUAD_COPPER:
++		mac->type = e1000_82546;
++		break;
++	case E1000_DEV_ID_82546GB_COPPER:
++	case E1000_DEV_ID_82546GB_FIBER:
++	case E1000_DEV_ID_82546GB_SERDES:
++	case E1000_DEV_ID_82546GB_PCIE:
++	case E1000_DEV_ID_82546GB_QUAD_COPPER:
++	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++		mac->type = e1000_82546_rev_3;
++		break;
++	case E1000_DEV_ID_82541EI:
++	case E1000_DEV_ID_82541EI_MOBILE:
++	case E1000_DEV_ID_82541ER_LOM:
++		mac->type = e1000_82541;
++		break;
++	case E1000_DEV_ID_82541ER:
++	case E1000_DEV_ID_82541GI:
++	case E1000_DEV_ID_82541GI_LF:
++	case E1000_DEV_ID_82541GI_MOBILE:
++		mac->type = e1000_82541_rev_2;
++		break;
++	case E1000_DEV_ID_82547EI:
++	case E1000_DEV_ID_82547EI_MOBILE:
++		mac->type = e1000_82547;
++		break;
++	case E1000_DEV_ID_82547GI:
++		mac->type = e1000_82547_rev_2;
++		break;
++	case E1000_DEV_ID_82571EB_COPPER:
++	case E1000_DEV_ID_82571EB_FIBER:
++	case E1000_DEV_ID_82571EB_SERDES:
++	case E1000_DEV_ID_82571EB_SERDES_DUAL:
++	case E1000_DEV_ID_82571EB_SERDES_QUAD:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER:
++	case E1000_DEV_ID_82571PT_QUAD_COPPER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
++		mac->type = e1000_82571;
++		break;
++	case E1000_DEV_ID_82572EI:
++	case E1000_DEV_ID_82572EI_COPPER:
++	case E1000_DEV_ID_82572EI_FIBER:
++	case E1000_DEV_ID_82572EI_SERDES:
++		mac->type = e1000_82572;
++		break;
++	case E1000_DEV_ID_82573E:
++	case E1000_DEV_ID_82573E_IAMT:
++	case E1000_DEV_ID_82573L:
++		mac->type = e1000_82573;
++		break;
++	case E1000_DEV_ID_80003ES2LAN_COPPER_DPT:
++	case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
++	case E1000_DEV_ID_80003ES2LAN_COPPER_SPT:
++	case E1000_DEV_ID_80003ES2LAN_SERDES_SPT:
++		mac->type = e1000_80003es2lan;
++		break;
++	case E1000_DEV_ID_ICH8_IFE:
++	case E1000_DEV_ID_ICH8_IFE_GT:
++	case E1000_DEV_ID_ICH8_IFE_G:
++	case E1000_DEV_ID_ICH8_IGP_M:
++	case E1000_DEV_ID_ICH8_IGP_M_AMT:
++	case E1000_DEV_ID_ICH8_IGP_AMT:
++	case E1000_DEV_ID_ICH8_IGP_C:
++		mac->type = e1000_ich8lan;
++		break;
++	case E1000_DEV_ID_ICH9_IFE:
++	case E1000_DEV_ID_ICH9_IFE_GT:
++	case E1000_DEV_ID_ICH9_IFE_G:
++	case E1000_DEV_ID_ICH9_IGP_AMT:
++	case E1000_DEV_ID_ICH9_IGP_C:
++		mac->type = e1000_ich9lan;
++		break;
++	default:
++		/* Should never have loaded on this device */
++		ret_val = -E1000_ERR_MAC_INIT;
++		break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_init_funcs - Initializes function pointers
++ *  @hw: pointer to the HW structure
++ *  @init_device: TRUE will initialize the rest of the function pointers
++ *                 getting the device ready for use.  FALSE will only set
++ *                 MAC type and the function pointers for the other init
++ *                 functions.  Passing FALSE will not generate any hardware
++ *                 reads or writes.
++ *
++ *  This function must be called by a driver in order to use the rest
++ *  of the 'shared' code files. Called by drivers only.
++ **/
++s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device)
++{
++	s32 ret_val;
++
++	/* Can't do much good without knowing the MAC type. */
++	ret_val = e1000_set_mac_type(hw);
++	if (ret_val) {
++		DEBUGOUT("ERROR: MAC type could not be set properly.\n");
++		goto out;
++	}
++
++	if (!hw->hw_addr) {
++		DEBUGOUT("ERROR: Registers not mapped\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/*
++	 * Init some generic function pointers that are currently all pointing
++	 * to generic implementations. We do this first allowing a driver
++	 * module to override it afterwards.
++	 */
++	hw->func.config_collision_dist = e1000_config_collision_dist_generic;
++	hw->func.rar_set = e1000_rar_set_generic;
++	hw->func.validate_mdi_setting = e1000_validate_mdi_setting_generic;
++	hw->func.mng_host_if_write = e1000_mng_host_if_write_generic;
++	hw->func.mng_write_cmd_header = e1000_mng_write_cmd_header_generic;
++	hw->func.mng_enable_host_if = e1000_mng_enable_host_if_generic;
++	hw->func.wait_autoneg = e1000_wait_autoneg_generic;
++	hw->func.reload_nvm = e1000_reload_nvm_generic;
++
++	/*
++	 * Set up the init function pointers. These are functions within the
++	 * adapter family file that sets up function pointers for the rest of
++	 * the functions in that family.
++	 */
++	switch (hw->mac.type) {
++	case e1000_82542:
++		e1000_init_function_pointers_82542(hw);
++		break;
++	case e1000_82543:
++	case e1000_82544:
++		e1000_init_function_pointers_82543(hw);
++		break;
++	case e1000_82540:
++	case e1000_82545:
++	case e1000_82545_rev_3:
++	case e1000_82546:
++	case e1000_82546_rev_3:
++		e1000_init_function_pointers_82540(hw);
++		break;
++	case e1000_82541:
++	case e1000_82541_rev_2:
++	case e1000_82547:
++	case e1000_82547_rev_2:
++		e1000_init_function_pointers_82541(hw);
++		break;
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_82573:
++		e1000_init_function_pointers_82571(hw);
++		break;
++	case e1000_80003es2lan:
++		e1000_init_function_pointers_80003es2lan(hw);
++		break;
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		e1000_init_function_pointers_ich8lan(hw);
++		break;
++	default:
++		DEBUGOUT("Hardware not supported\n");
++		ret_val = -E1000_ERR_CONFIG;
++		break;
++	}
++
++	/*
++	 * Initialize the rest of the function pointers. These require some
++	 * register reads/writes in some cases.
++	 */
++	if (!(ret_val) && init_device) {
++		ret_val = e1000_init_mac_params(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_init_nvm_params(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_init_phy_params(hw);
++		if (ret_val)
++			goto out;
++
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_remove_device - Free device specific structure
++ *  @hw: pointer to the HW structure
++ *
++ *  If a device specific structure was allocated, this function will
++ *  free it. This is a function pointer entry point called by drivers.
++ **/
++void e1000_remove_device(struct e1000_hw *hw)
++{
++	if (hw->func.remove_device)
++		hw->func.remove_device(hw);
++}
++
++/**
++ *  e1000_get_bus_info - Obtain bus information for adapter
++ *  @hw: pointer to the HW structure
++ *
++ *  This will obtain information about the HW bus for which the
++ *  adaper is attached and stores it in the hw structure. This is a
++ *  function pointer entry point called by drivers.
++ **/
++s32 e1000_get_bus_info(struct e1000_hw *hw)
++{
++	if (hw->func.get_bus_info)
++		return hw->func.get_bus_info(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_clear_vfta - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  This clears the VLAN filter table on the adapter. This is a function
++ *  pointer entry point called by drivers.
++ **/
++void e1000_clear_vfta(struct e1000_hw *hw)
++{
++	if (hw->func.clear_vfta)
++		hw->func.clear_vfta (hw);
++}
++
++/**
++ *  e1000_write_vfta - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: the 32-bit offset in which to write the value to.
++ *  @value: the 32-bit value to write at location offset.
++ *
++ *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
++ *  table. This is a function pointer entry point called by drivers.
++ **/
++void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	if (hw->func.write_vfta)
++		hw->func.write_vfta(hw, offset, value);
++}
++
++/**
++ *  e1000_update_mc_addr_list - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *  @rar_used_count: the first RAR register free to program
++ *  @rar_count: total number of supported Receive Address Registers
++ *
++ *  Updates the Receive Address Registers and Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ *  The parameter rar_count will usually be hw->mac.rar_entry_count
++ *  unless there are workarounds that change this.  Currently no func pointer
++ *  exists and all implementations are handled in the generic version of this
++ *  function.
++ **/
++void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
++                               u32 mc_addr_count, u32 rar_used_count,
++                               u32 rar_count)
++{
++	if (hw->func.update_mc_addr_list)
++		hw->func.update_mc_addr_list(hw,
++		                             mc_addr_list,
++		                             mc_addr_count,
++		                             rar_used_count,
++		                             rar_count);
++}
++
++/**
++ *  e1000_force_mac_fc - Force MAC flow control
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the MAC's flow control settings. Currently no func pointer exists
++ *  and all implementations are handled in the generic version of this
++ *  function.
++ **/
++s32 e1000_force_mac_fc(struct e1000_hw *hw)
++{
++	return e1000_force_mac_fc_generic(hw);
++}
++
++/**
++ *  e1000_check_for_link - Check/Store link connection
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks the link condition of the adapter and stores the
++ *  results in the hw->mac structure. This is a function pointer entry
++ *  point called by drivers.
++ **/
++s32 e1000_check_for_link(struct e1000_hw *hw)
++{
++	if (hw->func.check_for_link)
++		return hw->func.check_for_link(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_check_mng_mode - Check management mode
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks if the adapter has manageability enabled.
++ *  This is a function pointer entry point called by drivers.
++ **/
++bool e1000_check_mng_mode(struct e1000_hw *hw)
++{
++	if (hw->func.check_mng_mode)
++		return hw->func.check_mng_mode(hw);
++
++	return FALSE;
++}
++
++/**
++ *  e1000_mng_write_dhcp_info - Writes DHCP info to host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface
++ *  @length: size of the buffer
++ *
++ *  Writes the DHCP information to the host interface.
++ **/
++s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
++{
++	return e1000_mng_write_dhcp_info_generic(hw, buffer, length);
++}
++
++/**
++ *  e1000_reset_hw - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state. This is a function pointer
++ *  entry point called by drivers.
++ **/
++s32 e1000_reset_hw(struct e1000_hw *hw)
++{
++	if (hw->func.reset_hw)
++		return hw->func.reset_hw(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_init_hw - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation. This is a function
++ *  pointer entry point called by drivers.
++ **/
++s32 e1000_init_hw(struct e1000_hw *hw)
++{
++	if (hw->func.init_hw)
++		return hw->func.init_hw(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_setup_link - Configures link and flow control
++ *  @hw: pointer to the HW structure
++ *
++ *  This configures link and flow control settings for the adapter. This
++ *  is a function pointer entry point called by drivers. While modules can
++ *  also call this, they probably call their own version of this function.
++ **/
++s32 e1000_setup_link(struct e1000_hw *hw)
++{
++	if (hw->func.setup_link)
++		return hw->func.setup_link(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_get_speed_and_duplex - Returns current speed and duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: pointer to a 16-bit value to store the speed
++ *  @duplex: pointer to a 16-bit value to store the duplex.
++ *
++ *  This returns the speed and duplex of the adapter in the two 'out'
++ *  variables passed in. This is a function pointer entry point called
++ *  by drivers.
++ **/
++s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
++{
++	if (hw->func.get_link_up_info)
++		return hw->func.get_link_up_info(hw, speed, duplex);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_setup_led - Configures SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This prepares the SW controllable LED for use and saves the current state
++ *  of the LED so it can be later restored. This is a function pointer entry
++ *  point called by drivers.
++ **/
++s32 e1000_setup_led(struct e1000_hw *hw)
++{
++	if (hw->func.setup_led)
++		return hw->func.setup_led(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_cleanup_led - Restores SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This restores the SW controllable LED to the value saved off by
++ *  e1000_setup_led. This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_cleanup_led(struct e1000_hw *hw)
++{
++	if (hw->func.cleanup_led)
++		return hw->func.cleanup_led(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_blink_led - Blink SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This starts the adapter LED blinking. Request the LED to be setup first
++ *  and cleaned up after. This is a function pointer entry point called by
++ *  drivers.
++ **/
++s32 e1000_blink_led(struct e1000_hw *hw)
++{
++	if (hw->func.blink_led)
++		return hw->func.blink_led(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_on - Turn on SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED on. This is a function pointer entry point
++ *  called by drivers.
++ **/
++s32 e1000_led_on(struct e1000_hw *hw)
++{
++	if (hw->func.led_on)
++		return hw->func.led_on(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_off - Turn off SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED off. This is a function pointer entry point
++ *  called by drivers.
++ **/
++s32 e1000_led_off(struct e1000_hw *hw)
++{
++	if (hw->func.led_off)
++		return hw->func.led_off(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_reset_adaptive - Reset adaptive IFS
++ *  @hw: pointer to the HW structure
++ *
++ *  Resets the adaptive IFS. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++void e1000_reset_adaptive(struct e1000_hw *hw)
++{
++	e1000_reset_adaptive_generic(hw);
++}
++
++/**
++ *  e1000_update_adaptive - Update adaptive IFS
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates adapter IFS. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++void e1000_update_adaptive(struct e1000_hw *hw)
++{
++    return; // TODO
++	e1000_update_adaptive_generic(hw);
++}
++
++/**
++ *  e1000_disable_pcie_master - Disable PCI-Express master access
++ *  @hw: pointer to the HW structure
++ *
++ *  Disables PCI-Express master access and verifies there are no pending
++ *  requests. Currently no func pointer exists and all implementations are
++ *  handled in the generic version of this function.
++ **/
++s32 e1000_disable_pcie_master(struct e1000_hw *hw)
++{
++	return e1000_disable_pcie_master_generic(hw);
++}
++
++/**
++ *  e1000_config_collision_dist - Configure collision distance
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the collision distance to the default value and is used
++ *  during link setup.
++ **/
++void e1000_config_collision_dist(struct e1000_hw *hw)
++{
++	if (hw->func.config_collision_dist)
++		hw->func.config_collision_dist(hw);
++}
++
++/**
++ *  e1000_rar_set - Sets a receive address register
++ *  @hw: pointer to the HW structure
++ *  @addr: address to set the RAR to
++ *  @index: the RAR to set
++ *
++ *  Sets a Receive Address Register (RAR) to the specified address.
++ **/
++void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
++{
++	if (hw->func.rar_set)
++		hw->func.rar_set(hw, addr, index);
++}
++
++/**
++ *  e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state
++ *  @hw: pointer to the HW structure
++ *
++ *  Ensures that the MDI/MDIX SW state is valid.
++ **/
++s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
++{
++	if (hw->func.validate_mdi_setting)
++		return hw->func.validate_mdi_setting(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_mta_set - Sets multicast table bit
++ *  @hw: pointer to the HW structure
++ *  @hash_value: Multicast hash value.
++ *
++ *  This sets the bit in the multicast table corresponding to the
++ *  hash value.  This is a function pointer entry point called by drivers.
++ **/
++void e1000_mta_set(struct e1000_hw *hw, u32 hash_value)
++{
++	if (hw->func.mta_set)
++		hw->func.mta_set(hw, hash_value);
++}
++
++/**
++ *  e1000_hash_mc_addr - Determines address location in multicast table
++ *  @hw: pointer to the HW structure
++ *  @mc_addr: Multicast address to hash.
++ *
++ *  This hashes an address to determine its location in the multicast
++ *  table. Currently no func pointer exists and all implementations
++ *  are handled in the generic version of this function.
++ **/
++u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
++{
++	return e1000_hash_mc_addr_generic(hw, mc_addr);
++}
++
++/**
++ *  e1000_enable_tx_pkt_filtering - Enable packet filtering on TX
++ *  @hw: pointer to the HW structure
++ *
++ *  Enables packet filtering on transmit packets if manageability is enabled
++ *  and host interface is enabled.
++ *  Currently no func pointer exists and all implementations are handled in the
++ *  generic version of this function.
++ **/
++bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
++{
++	return e1000_enable_tx_pkt_filtering_generic(hw);
++}
++
++/**
++ *  e1000_mng_host_if_write - Writes to the manageability host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface buffer
++ *  @length: size of the buffer
++ *  @offset: location in the buffer to write to
++ *  @sum: sum of the data (not checksum)
++ *
++ *  This function writes the buffer content at the offset given on the host if.
++ *  It also does alignment considerations to do the writes in most efficient
++ *  way.  Also fills up the sum of the buffer in *buffer parameter.
++ **/
++s32 e1000_mng_host_if_write(struct e1000_hw * hw, u8 *buffer, u16 length,
++                            u16 offset, u8 *sum)
++{
++	if (hw->func.mng_host_if_write)
++		return hw->func.mng_host_if_write(hw, buffer, length, offset,
++		                                  sum);
++
++	return E1000_NOT_IMPLEMENTED;
++}
++
++/**
++ *  e1000_mng_write_cmd_header - Writes manageability command header
++ *  @hw: pointer to the HW structure
++ *  @hdr: pointer to the host interface command header
++ *
++ *  Writes the command header after does the checksum calculation.
++ **/
++s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
++                               struct e1000_host_mng_command_header *hdr)
++{
++	if (hw->func.mng_write_cmd_header)
++		return hw->func.mng_write_cmd_header(hw, hdr);
++
++	return E1000_NOT_IMPLEMENTED;
++}
++
++/**
++ *  e1000_mng_enable_host_if - Checks host interface is enabled
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
++ *
++ *  This function checks whether the HOST IF is enabled for command operaton
++ *  and also checks whether the previous command is completed.  It busy waits
++ *  in case of previous command is not completed.
++ **/
++s32 e1000_mng_enable_host_if(struct e1000_hw * hw)
++{
++	if (hw->func.mng_enable_host_if)
++		return hw->func.mng_enable_host_if(hw);
++
++	return E1000_NOT_IMPLEMENTED;
++}
++
++/**
++ *  e1000_wait_autoneg - Waits for autonegotiation completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Waits for autoneg to complete. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++s32 e1000_wait_autoneg(struct e1000_hw *hw)
++{
++	if (hw->func.wait_autoneg)
++		return hw->func.wait_autoneg(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_check_reset_block - Verifies PHY can be reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks if the PHY is in a state that can be reset or if manageability
++ *  has it tied up. This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_check_reset_block(struct e1000_hw *hw)
++{
++	if (hw->func.check_reset_block)
++		return hw->func.check_reset_block(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_read_phy_reg - Reads PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: the register to read
++ *  @data: the buffer to store the 16-bit read.
++ *
++ *  Reads the PHY register and returns the value in data.
++ *  This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	if (hw->func.read_phy_reg)
++		return hw->func.read_phy_reg(hw, offset, data);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_write_phy_reg - Writes PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: the register to write
++ *  @data: the value to write.
++ *
++ *  Writes the PHY register at offset with the value in data.
++ *  This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	if (hw->func.write_phy_reg)
++		return hw->func.write_phy_reg(hw, offset, data);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_read_kmrn_reg - Reads register using Kumeran interface
++ *  @hw: pointer to the HW structure
++ *  @offset: the register to read
++ *  @data: the location to store the 16-bit value read.
++ *
++ *  Reads a register out of the Kumeran interface. Currently no func pointer
++ *  exists and all implementations are handled in the generic version of
++ *  this function.
++ **/
++s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return e1000_read_kmrn_reg_generic(hw, offset, data);
++}
++
++/**
++ *  e1000_write_kmrn_reg - Writes register using Kumeran interface
++ *  @hw: pointer to the HW structure
++ *  @offset: the register to write
++ *  @data: the value to write.
++ *
++ *  Writes a register to the Kumeran interface. Currently no func pointer
++ *  exists and all implementations are handled in the generic version of
++ *  this function.
++ **/
++s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return e1000_write_kmrn_reg_generic(hw, offset, data);
++}
++
++/**
++ *  e1000_get_cable_length - Retrieves cable length estimation
++ *  @hw: pointer to the HW structure
++ *
++ *  This function estimates the cable length and stores them in
++ *  hw->phy.min_length and hw->phy.max_length. This is a function pointer
++ *  entry point called by drivers.
++ **/
++s32 e1000_get_cable_length(struct e1000_hw *hw)
++{
++	if (hw->func.get_cable_length)
++		return hw->func.get_cable_length(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_get_phy_info - Retrieves PHY information from registers
++ *  @hw: pointer to the HW structure
++ *
++ *  This function gets some information from various PHY registers and
++ *  populates hw->phy values with it. This is a function pointer entry
++ *  point called by drivers.
++ **/
++s32 e1000_get_phy_info(struct e1000_hw *hw)
++{
++	if (hw->func.get_phy_info)
++		return hw->func.get_phy_info(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_phy_hw_reset - Hard PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs a hard PHY reset. This is a function pointer entry point called
++ *  by drivers.
++ **/
++s32 e1000_phy_hw_reset(struct e1000_hw *hw)
++{
++	if (hw->func.reset_phy)
++		return hw->func.reset_phy(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_phy_commit - Soft PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs a soft PHY reset on those that apply. This is a function pointer
++ *  entry point called by drivers.
++ **/
++s32 e1000_phy_commit(struct e1000_hw *hw)
++{
++	if (hw->func.commit_phy)
++		return hw->func.commit_phy(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_set_d3_lplu_state - Sets low power link up state for D0
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D0
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D0
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.  This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
++{
++	if (hw->func.set_d0_lplu_state)
++		return hw->func.set_d0_lplu_state(hw, active);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_set_d3_lplu_state - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.  This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
++{
++	if (hw->func.set_d3_lplu_state)
++		return hw->func.set_d3_lplu_state(hw, active);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_read_mac_addr - Reads MAC address
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the MAC address out of the adapter and stores it in the HW structure.
++ *  Currently no func pointer exists and all implementations are handled in the
++ *  generic version of this function.
++ **/
++s32 e1000_read_mac_addr(struct e1000_hw *hw)
++{
++	if (hw->func.read_mac_addr)
++		return hw->func.read_mac_addr(hw);
++
++	return e1000_read_mac_addr_generic(hw);
++}
++
++/**
++ *  e1000_read_pba_num - Read device part number
++ *  @hw: pointer to the HW structure
++ *  @pba_num: pointer to device part number
++ *
++ *  Reads the product board assembly (PBA) number from the EEPROM and stores
++ *  the value in pba_num.
++ *  Currently no func pointer exists and all implementations are handled in the
++ *  generic version of this function.
++ **/
++s32 e1000_read_pba_num(struct e1000_hw *hw, u32 *pba_num)
++{
++	return e1000_read_pba_num_generic(hw, pba_num);
++}
++
++/**
++ *  e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Validates the NVM checksum is correct. This is a function pointer entry
++ *  point called by drivers.
++ **/
++s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
++{
++	if (hw->func.validate_nvm)
++		return hw->func.validate_nvm(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the NVM checksum. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++s32 e1000_update_nvm_checksum(struct e1000_hw *hw)
++{
++	if (hw->func.update_nvm)
++		return hw->func.update_nvm(hw);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_reload_nvm - Reloads EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
++ *  extended control register.
++ **/
++void e1000_reload_nvm(struct e1000_hw *hw)
++{
++	if (hw->func.reload_nvm)
++		hw->func.reload_nvm(hw);
++}
++
++/**
++ *  e1000_read_nvm - Reads NVM (EEPROM)
++ *  @hw: pointer to the HW structure
++ *  @offset: the word offset to read
++ *  @words: number of 16-bit words to read
++ *  @data: pointer to the properly sized buffer for the data.
++ *
++ *  Reads 16-bit chunks of data from the NVM (EEPROM). This is a function
++ *  pointer entry point called by drivers.
++ **/
++s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	if (hw->func.read_nvm)
++		return hw->func.read_nvm(hw, offset, words, data);
++
++	return -E1000_ERR_CONFIG;
++}
++
++/**
++ *  e1000_write_nvm - Writes to NVM (EEPROM)
++ *  @hw: pointer to the HW structure
++ *  @offset: the word offset to read
++ *  @words: number of 16-bit words to write
++ *  @data: pointer to the properly sized buffer for the data.
++ *
++ *  Writes 16-bit chunks of data to the NVM (EEPROM). This is a function
++ *  pointer entry point called by drivers.
++ **/
++s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	if (hw->func.write_nvm)
++		return hw->func.write_nvm(hw, offset, words, data);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_write_8bit_ctrl_reg - Writes 8bit Control register
++ *  @hw: pointer to the HW structure
++ *  @reg: 32bit register offset
++ *  @offset: the register to write
++ *  @data: the value to write.
++ *
++ *  Writes the PHY register at offset with the value in data.
++ *  This is a function pointer entry point called by drivers.
++ **/
++s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset,
++                              u8 data)
++{
++	return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data);
++}
++
++/**
++ * e1000_power_up_phy - Restores link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * The phy may be powered down to save power, to turn off link when the
++ * driver is unloaded, or wake on lan is not enabled (among others).
++ **/
++void e1000_power_up_phy(struct e1000_hw *hw)
++{
++	if (hw->func.power_up_phy)
++		hw->func.power_up_phy(hw);
++
++	e1000_setup_link(hw);
++}
++
++/**
++ * e1000_power_down_phy - Power down PHY
++ * @hw: pointer to the HW structure
++ *
++ * The phy may be powered down to save power, to turn off link when the
++ * driver is unloaded, or wake on lan is not enabled (among others).
++ **/
++void e1000_power_down_phy(struct e1000_hw *hw)
++{
++	if (hw->func.power_down_phy)
++		hw->func.power_down_phy(hw);
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_manage.c	2021-04-07 16:01:27.710633476 +0800
+@@ -0,0 +1,384 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000_api.h"
++#include "e1000_manage.h"
++
++static u8 e1000_calculate_checksum(u8 *buffer, u32 length);
++
++/**
++ *  e1000_calculate_checksum - Calculate checksum for buffer
++ *  @buffer: pointer to EEPROM
++ *  @length: size of EEPROM to calculate a checksum for
++ *
++ *  Calculates the checksum for some buffer on a specified length.  The
++ *  checksum calculated is returned.
++ **/
++static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
++{
++	u32 i;
++	u8  sum = 0;
++
++	DEBUGFUNC("e1000_calculate_checksum");
++
++	if (!buffer)
++		return 0;
++
++	for (i = 0; i < length; i++)
++		sum += buffer[i];
++
++	return (u8) (0 - sum);
++}
++
++/**
++ *  e1000_mng_enable_host_if_generic - Checks host interface is enabled
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
++ *
++ *  This function checks whether the HOST IF is enabled for command operaton
++ *  and also checks whether the previous command is completed.  It busy waits
++ *  in case of previous command is not completed.
++ **/
++s32 e1000_mng_enable_host_if_generic(struct e1000_hw * hw)
++{
++	u32 hicr;
++	s32 ret_val = E1000_SUCCESS;
++	u8  i;
++
++	DEBUGFUNC("e1000_mng_enable_host_if_generic");
++
++	/* Check that the host interface is enabled. */
++	hicr = E1000_READ_REG(hw, E1000_HICR);
++	if ((hicr & E1000_HICR_EN) == 0) {
++		DEBUGOUT("E1000_HOST_EN bit disabled.\n");
++		ret_val = -E1000_ERR_HOST_INTERFACE_COMMAND;
++		goto out;
++	}
++	/* check the previous command is completed */
++	for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
++		hicr = E1000_READ_REG(hw, E1000_HICR);
++		if (!(hicr & E1000_HICR_C))
++			break;
++		msec_delay_irq(1);
++	}
++
++	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
++		DEBUGOUT("Previous command timeout failed .\n");
++		ret_val = -E1000_ERR_HOST_INTERFACE_COMMAND;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_mng_mode_generic - Generic check managament mode
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the firmware semaphore register and returns true (>0) if
++ *  manageability is enabled, else false (0).
++ **/
++bool e1000_check_mng_mode_generic(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	DEBUGFUNC("e1000_check_mng_mode_generic");
++
++	fwsm = E1000_READ_REG(hw, E1000_FWSM);
++
++	return ((fwsm & E1000_FWSM_MODE_MASK) ==
++	        (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
++}
++
++/**
++ *  e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on TX
++ *  @hw: pointer to the HW structure
++ *
++ *  Enables packet filtering on transmit packets if manageability is enabled
++ *  and host interface is enabled.
++ **/
++bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw)
++{
++	struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
++	u32 *buffer = (u32 *)&hw->mng_cookie;
++	u32 offset;
++	s32 ret_val, hdr_csum, csum;
++	u8 i, len;
++	bool tx_filter = TRUE;
++
++	DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic");
++
++	/* No manageability, no filtering */
++	if (!e1000_check_mng_mode(hw)) {
++		tx_filter = FALSE;
++		goto out;
++	}
++
++	/*
++	 * If we can't read from the host interface for whatever
++	 * reason, disable filtering.
++	 */
++	ret_val = e1000_mng_enable_host_if(hw);
++	if (ret_val != E1000_SUCCESS) {
++		tx_filter = FALSE;
++		goto out;
++	}
++
++	/* Read in the header.  Length and offset are in dwords. */
++	len    = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
++	offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
++	for (i = 0; i < len; i++) {
++		*(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw,
++		                                           E1000_HOST_IF,
++		                                           offset + i);
++	}
++	hdr_csum = hdr->checksum;
++	hdr->checksum = 0;
++	csum = e1000_calculate_checksum((u8 *)hdr,
++	                                E1000_MNG_DHCP_COOKIE_LENGTH);
++	/*
++	 * If either the checksums or signature don't match, then
++	 * the cookie area isn't considered valid, in which case we
++	 * take the safe route of assuming Tx filtering is enabled.
++	 */
++	if (hdr_csum != csum)
++		goto out;
++	if (hdr->signature != E1000_IAMT_SIGNATURE)
++		goto out;
++
++	/* Cookie area is valid, make the final check for filtering. */
++	if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING))
++		tx_filter = FALSE;
++
++out:
++	hw->mac.tx_pkt_filtering = tx_filter;
++	return tx_filter;
++}
++
++/**
++ *  e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface
++ *  @length: size of the buffer
++ *
++ *  Writes the DHCP information to the host interface.
++ **/
++s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw * hw, u8 *buffer,
++                                      u16 length)
++{
++	struct e1000_host_mng_command_header hdr;
++	s32 ret_val;
++	u32 hicr;
++
++	DEBUGFUNC("e1000_mng_write_dhcp_info_generic");
++
++	hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
++	hdr.command_length = length;
++	hdr.reserved1 = 0;
++	hdr.reserved2 = 0;
++	hdr.checksum = 0;
++
++	/* Enable the host interface */
++	ret_val = e1000_mng_enable_host_if(hw);
++	if (ret_val)
++		goto out;
++
++	/* Populate the host interface with the contents of "buffer". */
++	ret_val = e1000_mng_host_if_write(hw, buffer, length,
++	                                  sizeof(hdr), &(hdr.checksum));
++	if (ret_val)
++		goto out;
++
++	/* Write the manageability command header */
++	ret_val = e1000_mng_write_cmd_header(hw, &hdr);
++	if (ret_val)
++		goto out;
++
++	/* Tell the ARC a new command is pending. */
++	hicr = E1000_READ_REG(hw, E1000_HICR);
++	E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_mng_write_cmd_header_generic - Writes manageability command header
++ *  @hw: pointer to the HW structure
++ *  @hdr: pointer to the host interface command header
++ *
++ *  Writes the command header after does the checksum calculation.
++ **/
++s32 e1000_mng_write_cmd_header_generic(struct e1000_hw * hw,
++                                    struct e1000_host_mng_command_header * hdr)
++{
++	u16 i, length = sizeof(struct e1000_host_mng_command_header);
++
++	DEBUGFUNC("e1000_mng_write_cmd_header_generic");
++
++	/* Write the whole command header structure with new checksum. */
++
++	hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
++
++	length >>= 2;
++	/* Write the relevant command block into the ram area. */
++	for (i = 0; i < length; i++) {
++		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i,
++		                            *((u32 *) hdr + i));
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_mng_host_if_write_generic - Write to the manageability host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface buffer
++ *  @length: size of the buffer
++ *  @offset: location in the buffer to write to
++ *  @sum: sum of the data (not checksum)
++ *
++ *  This function writes the buffer content at the offset given on the host if.
++ *  It also does alignment considerations to do the writes in most efficient
++ *  way.  Also fills up the sum of the buffer in *buffer parameter.
++ **/
++s32 e1000_mng_host_if_write_generic(struct e1000_hw * hw, u8 *buffer,
++                                    u16 length, u16 offset, u8 *sum)
++{
++	u8 *tmp;
++	u8 *bufptr = buffer;
++	u32 data = 0;
++	s32 ret_val = E1000_SUCCESS;
++	u16 remaining, i, j, prev_bytes;
++
++	DEBUGFUNC("e1000_mng_host_if_write_generic");
++
++	/* sum = only sum of the data and it is not checksum */
++
++	if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) {
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	tmp = (u8 *)&data;
++	prev_bytes = offset & 0x3;
++	offset >>= 2;
++
++	if (prev_bytes) {
++		data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset);
++		for (j = prev_bytes; j < sizeof(u32); j++) {
++			*(tmp + j) = *bufptr++;
++			*sum += *(tmp + j);
++		}
++		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data);
++		length -= j - prev_bytes;
++		offset++;
++	}
++
++	remaining = length & 0x3;
++	length -= remaining;
++
++	/* Calculate length in DWORDs */
++	length >>= 2;
++
++	/*
++	 * The device driver writes the relevant command block into the
++	 * ram area.
++	 */
++	for (i = 0; i < length; i++) {
++		for (j = 0; j < sizeof(u32); j++) {
++			*(tmp + j) = *bufptr++;
++			*sum += *(tmp + j);
++		}
++
++		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, data);
++	}
++	if (remaining) {
++		for (j = 0; j < sizeof(u32); j++) {
++			if (j < remaining)
++				*(tmp + j) = *bufptr++;
++			else
++				*(tmp + j) = 0;
++
++			*sum += *(tmp + j);
++		}
++		E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_enable_mng_pass_thru - Enable processing of ARP's
++ *  @hw: pointer to the HW structure
++ *
++ *  Verifies the hardware needs to allow ARPs to be processed by the host.
++ **/
++bool e1000_enable_mng_pass_thru(struct e1000_hw *hw)
++{
++	u32 manc;
++	u32 fwsm, factps;
++	bool ret_val = FALSE;
++
++	DEBUGFUNC("e1000_enable_mng_pass_thru");
++
++	if (!hw->mac.asf_firmware_present)
++		goto out;
++
++	manc = E1000_READ_REG(hw, E1000_MANC);
++
++	if (!(manc & E1000_MANC_RCV_TCO_EN) ||
++	    !(manc & E1000_MANC_EN_MAC_ADDR_FILTER))
++		goto out;
++
++	if (hw->mac.arc_subsystem_valid) {
++		fwsm = E1000_READ_REG(hw, E1000_FWSM);
++		factps = E1000_READ_REG(hw, E1000_FACTPS);
++
++		if (!(factps & E1000_FACTPS_MNGCG) &&
++		    ((fwsm & E1000_FWSM_MODE_MASK) ==
++		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) {
++			ret_val = TRUE;
++			goto out;
++		}
++	} else {
++		if ((manc & E1000_MANC_SMBUS_EN) &&
++		    !(manc & E1000_MANC_ASF_EN)) {
++			ret_val = TRUE;
++			goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.h	2021-04-07 16:01:27.705633483 +0800
+@@ -0,0 +1,44 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_82543_H_
++#define _E1000_82543_H_
++
++#define PHY_PREAMBLE      0xFFFFFFFF
++#define PHY_PREAMBLE_SIZE 32
++#define PHY_SOF           0x1
++#define PHY_OP_READ       0x2
++#define PHY_OP_WRITE      0x1
++#define PHY_TURNAROUND    0x2
++
++#define TBI_COMPAT_ENABLED 0x1 /* Global "knob" for the workaround */
++/* If TBI_COMPAT_ENABLED, then this is the current state (on/off) */
++#define TBI_SBP_ENABLED    0x2 
++                                
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.h	2021-04-07 16:01:27.700633490 +0800
+@@ -0,0 +1,168 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_PHY_H_
++#define _E1000_PHY_H_
++
++typedef enum {
++	e1000_ms_hw_default = 0,
++	e1000_ms_force_master,
++	e1000_ms_force_slave,
++	e1000_ms_auto
++} e1000_ms_type;
++
++typedef enum {
++	e1000_smart_speed_default = 0,
++	e1000_smart_speed_on,
++	e1000_smart_speed_off
++} e1000_smart_speed;
++
++s32  e1000_check_downshift_generic(struct e1000_hw *hw);
++s32  e1000_check_polarity_m88(struct e1000_hw *hw);
++s32  e1000_check_polarity_igp(struct e1000_hw *hw);
++s32  e1000_check_reset_block_generic(struct e1000_hw *hw);
++s32  e1000_copper_link_autoneg(struct e1000_hw *hw);
++s32  e1000_phy_force_speed_duplex(struct e1000_hw *hw);
++s32  e1000_copper_link_setup_igp(struct e1000_hw *hw);
++s32  e1000_copper_link_setup_m88(struct e1000_hw *hw);
++s32  e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw);
++s32  e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw);
++s32  e1000_get_cable_length_m88(struct e1000_hw *hw);
++s32  e1000_get_cable_length_igp_2(struct e1000_hw *hw);
++s32  e1000_get_cfg_done_generic(struct e1000_hw *hw);
++s32  e1000_get_phy_id(struct e1000_hw *hw);
++s32  e1000_get_phy_info_igp(struct e1000_hw *hw);
++s32  e1000_get_phy_info_m88(struct e1000_hw *hw);
++s32  e1000_phy_sw_reset_generic(struct e1000_hw *hw);
++void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
++s32  e1000_phy_hw_reset_generic(struct e1000_hw *hw);
++s32  e1000_phy_reset_dsp_generic(struct e1000_hw *hw);
++s32  e1000_phy_setup_autoneg(struct e1000_hw *hw);
++s32  e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active);
++s32  e1000_setup_copper_link_generic(struct e1000_hw *hw);
++s32  e1000_wait_autoneg_generic(struct e1000_hw *hw);
++s32  e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data);
++s32  e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
++s32  e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
++s32  e1000_phy_reset_dsp(struct e1000_hw *hw);
++s32  e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
++                                u32 usec_interval, bool *success);
++s32  e1000_phy_init_script_igp3(struct e1000_hw *hw);
++e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id);
++void e1000_power_up_phy_copper(struct e1000_hw *hw);
++void e1000_power_down_phy_copper(struct e1000_hw *hw);
++s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
++s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
++
++#define E1000_MAX_PHY_ADDR                4
++
++/* IGP01E1000 Specific Registers */
++#define IGP01E1000_PHY_PORT_CONFIG        0x10 /* Port Config */
++#define IGP01E1000_PHY_PORT_STATUS        0x11 /* Status */
++#define IGP01E1000_PHY_PORT_CTRL          0x12 /* Control */
++#define IGP01E1000_PHY_LINK_HEALTH        0x13 /* PHY Link Health */
++#define IGP01E1000_GMII_FIFO              0x14 /* GMII FIFO */
++#define IGP01E1000_PHY_CHANNEL_QUALITY    0x15 /* PHY Channel Quality */
++#define IGP02E1000_PHY_POWER_MGMT         0x19 /* Power Management */
++#define IGP01E1000_PHY_PAGE_SELECT        0x1F /* Page Select */
++#define BM_PHY_PAGE_SELECT                22   /* Page Select for BM */
++#define IGP_PAGE_SHIFT                    5
++#define PHY_REG_MASK                      0x1F
++
++
++#define IGP01E1000_PHY_PCS_INIT_REG       0x00B4
++#define IGP01E1000_PHY_POLARITY_MASK      0x0078
++
++#define IGP01E1000_PSCR_AUTO_MDIX         0x1000
++#define IGP01E1000_PSCR_FORCE_MDI_MDIX    0x2000 /* 0=MDI, 1=MDIX */
++
++#define IGP01E1000_PSCFR_SMART_SPEED      0x0080
++
++/* Enable flexible speed on link-up */
++#define IGP01E1000_GMII_FLEX_SPD          0x0010
++#define IGP01E1000_GMII_SPD               0x0020 /* Enable SPD */
++
++#define IGP02E1000_PM_SPD                 0x0001 /* Smart Power Down */
++#define IGP02E1000_PM_D0_LPLU             0x0002 /* For D0a states */
++#define IGP02E1000_PM_D3_LPLU             0x0004 /* For all other states */
++
++#define IGP01E1000_PLHR_SS_DOWNGRADE      0x8000
++
++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002
++#define IGP01E1000_PSSR_MDIX              0x0008
++#define IGP01E1000_PSSR_SPEED_MASK        0xC000
++#define IGP01E1000_PSSR_SPEED_1000MBPS    0xC000
++
++#define IGP02E1000_PHY_CHANNEL_NUM        4
++#define IGP02E1000_PHY_AGC_A              0x11B1
++#define IGP02E1000_PHY_AGC_B              0x12B1
++#define IGP02E1000_PHY_AGC_C              0x14B1
++#define IGP02E1000_PHY_AGC_D              0x18B1
++
++#define IGP02E1000_AGC_LENGTH_SHIFT       9   /* Course - 15:13, Fine - 12:9 */
++#define IGP02E1000_AGC_LENGTH_MASK        0x7F
++#define IGP02E1000_AGC_RANGE              15
++
++#define IGP03E1000_PHY_MISC_CTRL          0x1B
++#define IGP03E1000_PHY_MISC_DUPLEX_MANUAL_SET  0x1000 /* Manually Set Duplex */
++
++#define E1000_CABLE_LENGTH_UNDEFINED      0xFF
++
++#define E1000_KMRNCTRLSTA_OFFSET          0x001F0000
++#define E1000_KMRNCTRLSTA_OFFSET_SHIFT    16
++#define E1000_KMRNCTRLSTA_REN             0x00200000
++#define E1000_KMRNCTRLSTA_DIAG_OFFSET     0x3    /* Kumeran Diagnostic */
++#define E1000_KMRNCTRLSTA_DIAG_NELPBK     0x1000 /* Nearend Loopback mode */
++
++#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10
++#define IFE_PHY_SPECIAL_CONTROL     0x11 /* 100BaseTx PHY Special Control */
++#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Control */
++#define IFE_PHY_MDIX_CONTROL        0x1C /* MDI/MDI-X Control */
++
++/* IFE PHY Extended Status Control */
++#define IFE_PESC_POLARITY_REVERSED    0x0100
++
++/* IFE PHY Special Control */
++#define IFE_PSC_AUTO_POLARITY_DISABLE      0x0010
++#define IFE_PSC_FORCE_POLARITY             0x0020
++#define IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN 0x0100
++
++/* IFE PHY Special Control and LED Control */
++#define IFE_PSCL_PROBE_MODE            0x0020
++#define IFE_PSCL_PROBE_LEDS_OFF        0x0006 /* Force LEDs 0 and 2 off */
++#define IFE_PSCL_PROBE_LEDS_ON         0x0007 /* Force LEDs 0 and 2 on */
++
++/* IFE PHY MDIX Control */
++#define IFE_PMC_MDIX_STATUS      0x0020 /* 1=MDI-X, 0=MDI */
++#define IFE_PMC_FORCE_MDIX       0x0040 /* 1=force MDI-X, 0=force MDI */
++#define IFE_PMC_AUTO_MDIX        0x0080 /* 1=enable auto MDI/MDI-X, 0=disable */
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.h	2021-04-07 16:01:27.696633496 +0800
+@@ -0,0 +1,61 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_NVM_H_
++#define _E1000_NVM_H_
++
++s32  e1000_acquire_nvm_generic(struct e1000_hw *hw);
++
++s32  e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
++s32  e1000_read_mac_addr_generic(struct e1000_hw *hw);
++s32  e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num);
++s32  e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++s32  e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset,
++                              u16 words, u16 *data);
++s32  e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words,
++                         u16 *data);
++s32  e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data);
++s32  e1000_validate_nvm_checksum_generic(struct e1000_hw *hw);
++s32  e1000_write_nvm_eewr(struct e1000_hw *hw, u16 offset,
++                          u16 words, u16 *data);
++s32  e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset,
++                               u16 words, u16 *data);
++s32  e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words,
++                         u16 *data);
++s32  e1000_update_nvm_checksum_generic(struct e1000_hw *hw);
++void e1000_stop_nvm(struct e1000_hw *hw);
++void e1000_release_nvm_generic(struct e1000_hw *hw);
++void e1000_reload_nvm_generic(struct e1000_hw *hw);
++
++/* Function pointers */
++s32  e1000_acquire_nvm(struct e1000_hw *hw);
++void e1000_release_nvm(struct e1000_hw *hw);
++
++#define E1000_STM_OPCODE  0xDB00
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.h	2021-04-07 16:01:27.691633503 +0800
+@@ -0,0 +1,40 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_82571_H_
++#define _E1000_82571_H_
++
++#define ID_LED_RESERVED_F746 0xF746
++#define ID_LED_DEFAULT_82573 ((ID_LED_DEF1_DEF2 << 12) | \
++                              (ID_LED_OFF1_ON2  <<  8) | \
++                              (ID_LED_DEF1_DEF2 <<  4) | \
++                              (ID_LED_DEF1_DEF2))
++
++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_80003es2lan.c	2021-04-07 16:01:27.686633510 +0800
+@@ -0,0 +1,1401 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_80003es2lan
++ */
++
++#include "e1000_api.h"
++#include "e1000_80003es2lan.h"
++
++static s32  e1000_init_phy_params_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_acquire_phy_80003es2lan(struct e1000_hw *hw);
++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw);
++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++                                                   u32 offset,
++                                                   u16 *data);
++static s32  e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++                                                    u32 offset,
++                                                    u16 data);
++static s32  e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset,
++                                        u16 words, u16 *data);
++static s32  e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_get_cable_length_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed,
++                                               u16 *duplex);
++static s32  e1000_reset_hw_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_init_hw_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
++static s32  e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex);
++static s32  e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw);
++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw);
++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
++static s32  e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw);
++
++/*
++ * A table for the GG82563 cable length where the range is defined
++ * with a lower bound at "index" and the upper bound at
++ * "index + 5".
++ */
++static const u16 e1000_gg82563_cable_length_table[] =
++         { 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF };
++#define GG82563_CABLE_LENGTH_TABLE_SIZE \
++                (sizeof(e1000_gg82563_cable_length_table) / \
++                 sizeof(e1000_gg82563_cable_length_table[0]))
++
++/**
++ *  e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_phy_params_80003es2lan");
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type        = e1000_phy_none;
++		goto out;
++	} else {
++		func->power_up_phy = e1000_power_up_phy_copper;
++		func->power_down_phy = e1000_power_down_phy_copper_80003es2lan;
++	}
++
++	phy->addr                = 1;
++	phy->autoneg_mask        = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us      = 100;
++	phy->type                = e1000_phy_gg82563;
++
++	func->acquire_phy        = e1000_acquire_phy_80003es2lan;
++	func->check_polarity     = e1000_check_polarity_m88;
++	func->check_reset_block  = e1000_check_reset_block_generic;
++	func->commit_phy         = e1000_phy_sw_reset_generic;
++	func->get_cfg_done       = e1000_get_cfg_done_80003es2lan;
++	func->get_phy_info       = e1000_get_phy_info_m88;
++	func->release_phy        = e1000_release_phy_80003es2lan;
++	func->reset_phy          = e1000_phy_hw_reset_generic;
++	func->set_d3_lplu_state  = e1000_set_d3_lplu_state_generic;
++
++	func->force_speed_duplex = e1000_phy_force_speed_duplex_80003es2lan;
++	func->get_cable_length   = e1000_get_cable_length_80003es2lan;
++	func->read_phy_reg       = e1000_read_phy_reg_gg82563_80003es2lan;
++	func->write_phy_reg      = e1000_write_phy_reg_gg82563_80003es2lan;
++
++	/* This can only be done after all function pointers are setup. */
++	ret_val = e1000_get_phy_id(hw);
++
++	/* Verify phy id */
++	if (phy->id != GG82563_E_PHY_ID) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	u16 size;
++
++	DEBUGFUNC("e1000_init_nvm_params_80003es2lan");
++
++	nvm->opcode_bits        = 8;
++	nvm->delay_usec         = 1;
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->page_size    = 32;
++		nvm->address_bits = 16;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->page_size    = 8;
++		nvm->address_bits = 8;
++		break;
++	default:
++		nvm->page_size    = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
++		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
++		break;
++	}
++
++	nvm->type               = e1000_nvm_eeprom_spi;
++
++	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++	                  E1000_EECD_SIZE_EX_SHIFT);
++
++	/*
++	 * Added to a constant, "size" becomes the left-shift value
++	 * for setting word_size.
++	 */
++	size += NVM_WORD_SIZE_BASE_SHIFT;
++
++	/* EEPROM access above 16k is unsupported */
++	if (size > 14)
++		size = 14;
++	nvm->word_size	= 1 << size;
++
++	/* Function Pointers */
++	func->acquire_nvm       = e1000_acquire_nvm_80003es2lan;
++	func->read_nvm          = e1000_read_nvm_eerd;
++	func->release_nvm       = e1000_release_nvm_80003es2lan;
++	func->update_nvm        = e1000_update_nvm_checksum_generic;
++	func->valid_led_default = e1000_valid_led_default_generic;
++	func->validate_nvm      = e1000_validate_nvm_checksum_generic;
++	func->write_nvm         = e1000_write_nvm_80003es2lan;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_mac_params_80003es2lan");
++
++	/* Set media type */
++	switch (hw->device_id) {
++	case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++	/* Set if part includes ASF firmware */
++	mac->asf_firmware_present = TRUE;
++	/* Set if manageability features are enabled. */
++	mac->arc_subsystem_valid =
++	        (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK)
++	                ? TRUE : FALSE;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_pcie_generic;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_80003es2lan;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_80003es2lan;
++	/* link setup */
++	func->setup_link = e1000_setup_link_generic;
++	/* physical interface link setup */
++	func->setup_physical_interface =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_setup_copper_link_80003es2lan
++	                : e1000_setup_fiber_serdes_link_generic;
++	/* check for link */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		func->check_for_link = e1000_check_for_copper_link_generic;
++		break;
++	case e1000_media_type_fiber:
++		func->check_for_link = e1000_check_for_fiber_link_generic;
++		break;
++	case e1000_media_type_internal_serdes:
++		func->check_for_link = e1000_check_for_serdes_link_generic;
++		break;
++	default:
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++		break;
++	}
++	/* check management mode */
++	func->check_mng_mode = e1000_check_mng_mode_generic;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_generic;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* read mac address */
++	func->read_mac_addr = e1000_read_mac_addr_80003es2lan;
++	/* blink LED */
++	func->blink_led = e1000_blink_led_generic;
++	/* setup LED */
++	func->setup_led = e1000_setup_led_generic;
++	/* cleanup LED */
++	func->cleanup_led = e1000_cleanup_led_generic;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_generic;
++	func->led_off = e1000_led_off_generic;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_80003es2lan;
++	/* link info */
++	func->get_link_up_info = e1000_get_link_up_info_80003es2lan;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_80003es2lan - Init ESB2 func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  The only function explicitly called by the api module to initialize
++ *  all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_80003es2lan");
++
++	hw->func.init_mac_params = e1000_init_mac_params_80003es2lan;
++	hw->func.init_nvm_params = e1000_init_nvm_params_80003es2lan;
++	hw->func.init_phy_params = e1000_init_phy_params_80003es2lan;
++}
++
++/**
++ *  e1000_acquire_phy_80003es2lan - Acquire rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  A wrapper to acquire access rights to the correct PHY.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	DEBUGFUNC("e1000_acquire_phy_80003es2lan");
++
++	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
++	mask |= E1000_SWFW_CSR_SM;
++
++	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_release_phy_80003es2lan - Release rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  A wrapper to release access rights to the correct PHY.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	DEBUGFUNC("e1000_release_phy_80003es2lan");
++
++	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
++	mask |= E1000_SWFW_CSR_SM;
++
++	e1000_release_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the semaphore to access the EEPROM.  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_acquire_nvm_80003es2lan");
++
++	ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_acquire_nvm_generic(hw);
++
++	if (ret_val)
++		e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_release_nvm_80003es2lan - Relinquish rights to access NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  Release the semaphore used to access the EEPROM.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_release_nvm_80003es2lan");
++
++	e1000_release_nvm_generic(hw);
++	e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++}
++
++/**
++ *  e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
++ *  will also specify which port we're acquiring the lock for.
++ **/
++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++	u32 swmask = mask;
++	u32 fwmask = mask << 16;
++	s32 ret_val = E1000_SUCCESS;
++	s32 i = 0, timeout = 200;
++
++	DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan");
++
++	while (i < timeout) {
++		if (e1000_get_hw_semaphore_generic(hw)) {
++			ret_val = -E1000_ERR_SWFW_SYNC;
++			goto out;
++		}
++
++		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
++		if (!(swfw_sync & (fwmask | swmask)))
++			break;
++
++		/*
++		 * Firmware currently using resource (fwmask)
++		 * or other software thread using resource (swmask)
++		 */
++		e1000_put_hw_semaphore_generic(hw);
++		msec_delay_irq(5);
++		i++;
++	}
++
++	if (i == timeout) {
++		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
++		ret_val = -E1000_ERR_SWFW_SYNC;
++		goto out;
++	}
++
++	swfw_sync |= swmask;
++	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
++
++	e1000_put_hw_semaphore_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
++ *  will also specify which port we're releasing the lock for.
++ **/
++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++
++	DEBUGFUNC("e1000_release_swfw_sync_80003es2lan");
++
++	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS);
++	/* Empty */
++
++	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
++	swfw_sync &= ~mask;
++	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
++
++	e1000_put_hw_semaphore_generic(hw);
++}
++
++/**
++ *  e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @data: pointer to the data returned from the operation
++ *
++ *  Read the GG82563 PHY register.  This is a function pointer entry
++ *  point called by the api module.
++ **/
++static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++                                                  u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u32 page_select;
++	u16 temp;
++
++	DEBUGFUNC("e1000_read_phy_reg_gg82563_80003es2lan");
++
++	ret_val = e1000_acquire_phy_80003es2lan(hw);
++	if (ret_val)
++		goto out;
++
++	/* Select Configuration Page */
++	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++		page_select = GG82563_PHY_PAGE_SELECT;
++	} else {
++		/*
++		 * Use Alternative Page Select register to access
++		 * registers 30 and 31
++		 */
++		page_select = GG82563_PHY_PAGE_SELECT_ALT;
++	}
++
++	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
++	ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp);
++	if (ret_val) {
++		e1000_release_phy_80003es2lan(hw);
++		goto out;
++	}
++
++	/*
++	 * The "ready" bit in the MDIC register may be incorrectly set
++	 * before the device has completed the "Page Select" MDI
++	 * transaction.  So we wait 200us after each MDI command...
++	 */
++	usec_delay(200);
++
++	/* ...and verify the command was successful. */
++	ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp);
++
++	if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
++		ret_val = -E1000_ERR_PHY;
++		e1000_release_phy_80003es2lan(hw);
++		goto out;
++	}
++
++	usec_delay(200);
++
++	ret_val = e1000_read_phy_reg_mdic(hw,
++	                                 MAX_PHY_REG_ADDRESS & offset,
++	                                 data);
++
++	usec_delay(200);
++	e1000_release_phy_80003es2lan(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @data: value to write to the register
++ *
++ *  Write to the GG82563 PHY register.  This is a function pointer entry
++ *  point called by the api module.
++ **/
++static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++                                                   u32 offset, u16 data)
++{
++	s32 ret_val;
++	u32 page_select;
++	u16 temp;
++
++	DEBUGFUNC("e1000_write_phy_reg_gg82563_80003es2lan");
++
++	ret_val = e1000_acquire_phy_80003es2lan(hw);
++	if (ret_val)
++		goto out;
++
++	/* Select Configuration Page */
++	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++		page_select = GG82563_PHY_PAGE_SELECT;
++	} else {
++		/*
++		 * Use Alternative Page Select register to access
++		 * registers 30 and 31
++		 */
++		page_select = GG82563_PHY_PAGE_SELECT_ALT;
++	}
++
++	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
++	ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp);
++	if (ret_val) {
++		e1000_release_phy_80003es2lan(hw);
++		goto out;
++	}
++
++
++	/*
++	 * The "ready" bit in the MDIC register may be incorrectly set
++	 * before the device has completed the "Page Select" MDI
++	 * transaction.  So we wait 200us after each MDI command...
++	 */
++	usec_delay(200);
++
++	/* ...and verify the command was successful. */
++	ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp);
++
++	if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
++		ret_val = -E1000_ERR_PHY;
++		e1000_release_phy_80003es2lan(hw);
++		goto out;
++	}
++
++	usec_delay(200);
++
++	ret_val = e1000_write_phy_reg_mdic(hw,
++	                                  MAX_PHY_REG_ADDRESS & offset,
++	                                  data);
++
++	usec_delay(200);
++	e1000_release_phy_80003es2lan(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_80003es2lan - Write to ESB2 NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @words: number of words to write
++ *  @data: buffer of data to write to the NVM
++ *
++ *  Write "words" of data to the ESB2 NVM.  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset,
++                            u16 words, u16 *data)
++{
++	DEBUGFUNC("e1000_write_nvm_80003es2lan");
++
++	return e1000_write_nvm_spi(hw, offset, words, data);
++}
++
++/**
++ *  e1000_get_cfg_done_80003es2lan - Wait for configuration to complete
++ *  @hw: pointer to the HW structure
++ *
++ *  Wait a specific amount of time for manageability processes to complete.
++ *  This is a function pointer entry point called by the phy module.
++ **/
++static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw)
++{
++	s32 timeout = PHY_CFG_TIMEOUT;
++	s32 ret_val = E1000_SUCCESS;
++	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
++
++	DEBUGFUNC("e1000_get_cfg_done_80003es2lan");
++
++	if (hw->bus.func == 1)
++		mask = E1000_NVM_CFG_DONE_PORT_1;
++
++	while (timeout) {
++		if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask)
++			break;
++		msec_delay(1);
++		timeout--;
++	}
++	if (!timeout) {
++		DEBUGOUT("MNG configuration cycle has not completed.\n");
++		ret_val = -E1000_ERR_RESET;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the speed and duplex settings onto the PHY.  This is a
++ *  function pointer entry point called by the phy module.
++ **/
++static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_80003es2lan");
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO;
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT1("GG82563 PSCR: %X\n", phy_data);
++
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	/* Reset the phy to commit changes. */
++	phy_data |= MII_CR_RESET;
++
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	usec_delay(1);
++
++	if (hw->phy.autoneg_wait_to_complete) {
++		DEBUGOUT("Waiting for forced speed/duplex link "
++		         "on GG82563 phy.\n");
++
++		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++		                                     100000, &link);
++		if (ret_val)
++			goto out;
++
++		if (!link) {
++			/*
++			 * We didn't get link.
++			 * Reset the DSP and cross our fingers.
++			 */
++			ret_val = e1000_phy_reset_dsp_generic(hw);
++			if (ret_val)
++				goto out;
++		}
++
++		/* Try once more */
++		ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++		                                     100000, &link);
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Resetting the phy means we need to verify the TX_CLK corresponds
++	 * to the link speed.  10Mbps -> 2.5MHz, else 25MHz.
++	 */
++	phy_data &= ~GG82563_MSCR_TX_CLK_MASK;
++	if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED)
++		phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5;
++	else
++		phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25;
++
++	/*
++	 * In addition, we must re-enable CRS on Tx for both half and full
++	 * duplex.
++	 */
++	phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_80003es2lan - Set approximate cable length
++ *  @hw: pointer to the HW structure
++ *
++ *  Find the approximate cable length as measured by the GG82563 PHY.
++ *  This is a function pointer entry point called by the phy module.
++ **/
++static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, index;
++
++	DEBUGFUNC("e1000_get_cable_length_80003es2lan");
++
++	ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE, &phy_data);
++	if (ret_val)
++		goto out;
++
++	index = phy_data & GG82563_DSPD_CABLE_LENGTH;
++	phy->min_cable_length = e1000_gg82563_cable_length_table[index];
++	phy->max_cable_length = e1000_gg82563_cable_length_table[index+5];
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_link_up_info_80003es2lan - Report speed and duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: pointer to speed buffer
++ *  @duplex: pointer to duplex buffer
++ *
++ *  Retrieve the current speed and duplex configuration.
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed,
++                                              u16 *duplex)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_get_link_up_info_80003es2lan");
++
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		ret_val = e1000_get_speed_and_duplex_copper_generic(hw,
++		                                                    speed,
++		                                                    duplex);
++		if (ret_val)
++			goto out;
++		if (*speed == SPEED_1000)
++			ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw);
++		else
++			ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw,
++			                                      *duplex);
++	} else {
++		ret_val = e1000_get_speed_and_duplex_fiber_serdes_generic(hw,
++		                                                  speed,
++		                                                  duplex);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_80003es2lan - Reset the ESB2 controller
++ *  @hw: pointer to the HW structure
++ *
++ *  Perform a global reset to the ESB2 controller.
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
++{
++	u32 ctrl, icr;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_reset_hw_80003es2lan");
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000_disable_pcie_master_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("PCI-E Master disable polling has failed.\n");
++	}
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	msec_delay(10);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGOUT("Issuing a global reset to MAC\n");
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++
++	ret_val = e1000_get_auto_rd_done_generic(hw);
++	if (ret_val)
++		/* We don't want to continue accessing MAC registers. */
++		goto out;
++
++	/* Clear any pending interrupt events. */
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	ret_val = e1000_check_alt_mac_addr_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_80003es2lan - Initialize the ESB2 controller
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize the hw bits, LED, VFTA, MTA, link and hw counters.
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 reg_data;
++	s32 ret_val;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_hw_80003es2lan");
++
++	e1000_initialize_hw_bits_80003es2lan(hw);
++
++	/* Initialize identification LED */
++	ret_val = e1000_id_led_init_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Disabling VLAN filtering */
++	DEBUGOUT("Initializing the IEEE VLAN\n");
++	e1000_clear_vfta(hw);
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	/* Set the transmit descriptor write-back policy */
++	reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++	           E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data);
++
++	/* ...for both queues. */
++	reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++	           E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data);
++
++	/* Enable retransmit on late collisions */
++	reg_data = E1000_READ_REG(hw, E1000_TCTL);
++	reg_data |= E1000_TCTL_RTLC;
++	E1000_WRITE_REG(hw, E1000_TCTL, reg_data);
++
++	/* Configure Gigabit Carry Extend Padding */
++	reg_data = E1000_READ_REG(hw, E1000_TCTL_EXT);
++	reg_data &= ~E1000_TCTL_EXT_GCEX_MASK;
++	reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN;
++	E1000_WRITE_REG(hw, E1000_TCTL_EXT, reg_data);
++
++	/* Configure Transmit Inter-Packet Gap */
++	reg_data = E1000_READ_REG(hw, E1000_TIPG);
++	reg_data &= ~E1000_TIPG_IPGT_MASK;
++	reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
++	E1000_WRITE_REG(hw, E1000_TIPG, reg_data);
++
++	reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001);
++	reg_data &= ~0x00100000;
++	E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes required hardware-dependent bits needed for normal operation.
++ **/
++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	DEBUGFUNC("e1000_initialize_hw_bits_80003es2lan");
++
++	if (hw->mac.disable_hw_init_bits)
++		goto out;
++
++	/* Transmit Descriptor Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(1));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TARC(0));
++	reg &= ~(0xF << 27); /* 30:27 */
++	if (hw->phy.media_type != e1000_media_type_copper)
++		reg &= ~(1 << 20);
++	E1000_WRITE_REG(hw, E1000_TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TARC(1));
++	if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR)
++		reg &= ~(1 << 28);
++	else
++		reg |= (1 << 28);
++	E1000_WRITE_REG(hw, E1000_TARC(1), reg);
++
++out:
++	return;
++}
++
++/**
++ *  e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link
++ *  @hw: pointer to the HW structure
++ *
++ *  Setup some GG82563 PHY registers for obtaining link
++ **/
++static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw)
++{
++	struct   e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u32 ctrl_ext;
++	u16 data;
++
++	DEBUGFUNC("e1000_copper_link_setup_gg82563_80003es2lan");
++
++	if (!phy->reset_disable) {
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL,
++		                             &data);
++		if (ret_val)
++			goto out;
++
++		data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++		/* Use 25MHz for both link down and 1000Base-T for Tx clock. */
++		data |= GG82563_MSCR_TX_CLK_1000MBPS_25;
++
++		ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL,
++		                              data);
++		if (ret_val)
++			goto out;
++
++		/*
++		 * Options:
++		 *   MDI/MDI-X = 0 (default)
++		 *   0 - Auto for all speeds
++		 *   1 - MDI mode
++		 *   2 - MDI-X mode
++		 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++		 */
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL, &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK;
++
++		switch (phy->mdix) {
++		case 1:
++			data |= GG82563_PSCR_CROSSOVER_MODE_MDI;
++			break;
++		case 2:
++			data |= GG82563_PSCR_CROSSOVER_MODE_MDIX;
++			break;
++		case 0:
++		default:
++			data |= GG82563_PSCR_CROSSOVER_MODE_AUTO;
++			break;
++		}
++
++		/*
++		 * Options:
++		 *   disable_polarity_correction = 0 (default)
++		 *       Automatic Correction for Reversed Cable Polarity
++		 *   0 - Disabled
++		 *   1 - Enabled
++		 */
++		data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++		if (phy->disable_polarity_correction)
++			data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++
++		ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, data);
++		if (ret_val)
++			goto out;
++
++		/* SW Reset the PHY so all changes take effect */
++		ret_val = e1000_phy_commit(hw);
++		if (ret_val) {
++			DEBUGOUT("Error Resetting the PHY\n");
++			goto out;
++		}
++
++	}
++
++	/* Bypass Rx and Tx FIFO's */
++	ret_val = e1000_write_kmrn_reg(hw,
++	                        E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL,
++	                        E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS |
++	                                E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_kmrn_reg(hw,
++	                              E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE,
++	                              &data);
++	if (ret_val)
++		goto out;
++	data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE;
++	ret_val = e1000_write_kmrn_reg(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE,
++	                               data);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, &data);
++	if (ret_val)
++		goto out;
++
++	data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG;
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, data);
++	if (ret_val)
++		goto out;
++
++	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	ctrl_ext &= ~(E1000_CTRL_EXT_LINK_MODE_MASK);
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++
++	ret_val = e1000_read_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, &data);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Do not init these registers when the HW is in IAMT mode, since the
++	 * firmware will have already initialized them.  We only initialize
++	 * them if the HW is not in IAMT mode.
++	 */
++	if (!(e1000_check_mng_mode(hw))) {
++		/* Enable Electrical Idle on the PHY */
++		data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE;
++		ret_val = e1000_write_phy_reg(hw,
++		                             GG82563_PHY_PWR_MGMT_CTRL,
++		                             data);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw,
++		                            GG82563_PHY_KMRN_MODE_CTRL,
++		                            &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++		ret_val = e1000_write_phy_reg(hw,
++		                             GG82563_PHY_KMRN_MODE_CTRL,
++		                             data);
++
++		if (ret_val)
++			goto out;
++	}
++
++	/*
++	 * Workaround: Disable padding in Kumeran interface in the MAC
++	 * and in the PHY to avoid CRC errors.
++	 */
++	ret_val = e1000_read_phy_reg(hw, GG82563_PHY_INBAND_CTRL, &data);
++	if (ret_val)
++		goto out;
++
++	data |= GG82563_ICR_DIS_PADDING;
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_INBAND_CTRL, data);
++	if (ret_val)
++		goto out;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2
++ *  @hw: pointer to the HW structure
++ *
++ *  Essentially a wrapper for setting up all things "copper" related.
++ *  This is a function pointer entry point called by the mac module.
++ **/
++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32  ret_val;
++	u16 reg_data;
++
++	DEBUGFUNC("e1000_setup_copper_link_80003es2lan");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	/*
++	 * Set the mac to wait the maximum time between each
++	 * iteration and increase the max iterations when
++	 * polling the phy; this fixes erroneous timeouts at 10Mbps.
++	 */
++	ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF);
++	if (ret_val)
++		goto out;
++	ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), &reg_data);
++	if (ret_val)
++		goto out;
++	reg_data |= 0x3F;
++	ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data);
++	if (ret_val)
++		goto out;
++	ret_val = e1000_read_kmrn_reg(hw,
++	                              E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
++	                              &reg_data);
++	if (ret_val)
++		goto out;
++	reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING;
++	ret_val = e1000_write_kmrn_reg(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
++	                               reg_data);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_setup_copper_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation
++ *  @hw: pointer to the HW structure
++ *  @duplex: current duplex setting
++ *
++ *  Configure the KMRN interface by applying last minute quirks for
++ *  10/100 operation.
++ **/
++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u32 tipg;
++	u32 i = 0;
++	u16 reg_data, reg_data2;
++
++	DEBUGFUNC("e1000_configure_kmrn_for_10_100");
++
++	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT;
++	ret_val = e1000_write_kmrn_reg(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
++	                               reg_data);
++	if (ret_val)
++		goto out;
++
++	/* Configure Transmit Inter-Packet Gap */
++	tipg = E1000_READ_REG(hw, E1000_TIPG);
++	tipg &= ~E1000_TIPG_IPGT_MASK;
++	tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN;
++	E1000_WRITE_REG(hw, E1000_TIPG, tipg);
++
++
++	do {
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++		                             &reg_data);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++		                             &reg_data2);
++		if (ret_val)
++			goto out;
++		i++;
++	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
++
++	if (duplex == HALF_DUPLEX)
++		reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER;
++	else
++		reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Configure the KMRN interface by applying last minute quirks for
++ *  gigabit operation.
++ **/
++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 reg_data, reg_data2;
++	u32 tipg;
++	u32 i = 0;
++
++	DEBUGFUNC("e1000_configure_kmrn_for_1000");
++
++	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT;
++	ret_val = e1000_write_kmrn_reg(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
++	                               reg_data);
++	if (ret_val)
++		goto out;
++
++	/* Configure Transmit Inter-Packet Gap */
++	tipg = E1000_READ_REG(hw, E1000_TIPG);
++	tipg &= ~E1000_TIPG_IPGT_MASK;
++	tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
++	E1000_WRITE_REG(hw, E1000_TIPG, tipg);
++
++
++	do {
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++		                             &reg_data);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++		                             &reg_data2);
++		if (ret_val)
++			goto out;
++		i++;
++	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
++
++	reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++	ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_mac_addr_80003es2lan - Read device MAC address
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_read_mac_addr_80003es2lan");
++	if (e1000_check_alt_mac_addr_generic(hw))
++		ret_val = e1000_read_mac_addr_generic(hw);
++
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++
++	return;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_80003es2lan");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++
++	temp = E1000_READ_REG(hw, E1000_MGTPRC);
++	temp = E1000_READ_REG(hw, E1000_MGTPDC);
++	temp = E1000_READ_REG(hw, E1000_MGTPTC);
++
++	temp = E1000_READ_REG(hw, E1000_IAC);
++	temp = E1000_READ_REG(hw, E1000_ICRXOC);
++
++	temp = E1000_READ_REG(hw, E1000_ICRXPTC);
++	temp = E1000_READ_REG(hw, E1000_ICRXATC);
++	temp = E1000_READ_REG(hw, E1000_ICTXPTC);
++	temp = E1000_READ_REG(hw, E1000_ICTXATC);
++	temp = E1000_READ_REG(hw, E1000_ICTXQEC);
++	temp = E1000_READ_REG(hw, E1000_ICTXQMTC);
++	temp = E1000_READ_REG(hw, E1000_ICRXDMTC);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_nvm.c	2021-04-07 16:01:27.682633516 +0800
+@@ -0,0 +1,893 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000_api.h"
++#include "e1000_nvm.h"
++
++/**
++ *  e1000_raise_eec_clk - Raise EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Enable/Raise the EEPROM clock bit.
++ **/
++static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd | E1000_EECD_SK;
++	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
++	E1000_WRITE_FLUSH(hw);
++	usec_delay(hw->nvm.delay_usec);
++}
++
++/**
++ *  e1000_lower_eec_clk - Lower EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Clear/Lower the EEPROM clock bit.
++ **/
++static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd & ~E1000_EECD_SK;
++	E1000_WRITE_REG(hw, E1000_EECD, *eecd);
++	E1000_WRITE_FLUSH(hw);
++	usec_delay(hw->nvm.delay_usec);
++}
++
++/**
++ *  e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @data: data to send to the EEPROM
++ *  @count: number of bits to shift out
++ *
++ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
++ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
++ *  In order to do this, "data" must be broken down into bits.
++ **/
++static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	u32 mask;
++
++	DEBUGFUNC("e1000_shift_out_eec_bits");
++
++	mask = 0x01 << (count - 1);
++	if (nvm->type == e1000_nvm_eeprom_microwire)
++		eecd &= ~E1000_EECD_DO;
++	else if (nvm->type == e1000_nvm_eeprom_spi)
++		eecd |= E1000_EECD_DO;
++
++	do {
++		eecd &= ~E1000_EECD_DI;
++
++		if (data & mask)
++			eecd |= E1000_EECD_DI;
++
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		E1000_WRITE_FLUSH(hw);
++
++		usec_delay(nvm->delay_usec);
++
++		e1000_raise_eec_clk(hw, &eecd);
++		e1000_lower_eec_clk(hw, &eecd);
++
++		mask >>= 1;
++	} while (mask);
++
++	eecd &= ~E1000_EECD_DI;
++	E1000_WRITE_REG(hw, E1000_EECD, eecd);
++}
++
++/**
++ *  e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @count: number of bits to shift in
++ *
++ *  In order to read a register from the EEPROM, we need to shift 'count' bits
++ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
++ *  the EEPROM (setting the SK bit), and then reading the value of the data out
++ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
++ *  always be clear.
++ **/
++static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
++{
++	u32 eecd;
++	u32 i;
++	u16 data;
++
++	DEBUGFUNC("e1000_shift_in_eec_bits");
++
++	eecd = E1000_READ_REG(hw, E1000_EECD);
++
++	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
++	data = 0;
++
++	for (i = 0; i < count; i++) {
++		data <<= 1;
++		e1000_raise_eec_clk(hw, &eecd);
++
++		eecd = E1000_READ_REG(hw, E1000_EECD);
++
++		eecd &= ~E1000_EECD_DI;
++		if (eecd & E1000_EECD_DO)
++			data |= 1;
++
++		e1000_lower_eec_clk(hw, &eecd);
++	}
++
++	return data;
++}
++
++/**
++ *  e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion
++ *  @hw: pointer to the HW structure
++ *  @ee_reg: EEPROM flag for polling
++ *
++ *  Polls the EEPROM status bit for either read or write completion based
++ *  upon the value of 'ee_reg'.
++ **/
++s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
++{
++	u32 attempts = 100000;
++	u32 i, reg = 0;
++	s32 ret_val = -E1000_ERR_NVM;
++
++	DEBUGFUNC("e1000_poll_eerd_eewr_done");
++
++	for (i = 0; i < attempts; i++) {
++		if (ee_reg == E1000_NVM_POLL_READ)
++			reg = E1000_READ_REG(hw, E1000_EERD);
++		else
++			reg = E1000_READ_REG(hw, E1000_EEWR);
++
++		if (reg & E1000_NVM_RW_REG_DONE) {
++			ret_val = E1000_SUCCESS;
++			break;
++		}
++
++		usec_delay(5);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_acquire_nvm_generic - Generic request for access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
++ *  Return successful if access grant bit set, else clear the request for
++ *  EEPROM access and return -E1000_ERR_NVM (-1).
++ **/
++s32 e1000_acquire_nvm_generic(struct e1000_hw *hw)
++{
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_acquire_nvm_generic");
++
++	E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ);
++	eecd = E1000_READ_REG(hw, E1000_EECD);
++
++	while (timeout) {
++		if (eecd & E1000_EECD_GNT)
++			break;
++		usec_delay(5);
++		eecd = E1000_READ_REG(hw, E1000_EECD);
++		timeout--;
++	}
++
++	if (!timeout) {
++		eecd &= ~E1000_EECD_REQ;
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		DEBUGOUT("Could not acquire NVM grant\n");
++		ret_val = -E1000_ERR_NVM;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_standby_nvm - Return EEPROM to standby state
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the EEPROM to a standby state.
++ **/
++static void e1000_standby_nvm(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++
++	DEBUGFUNC("e1000_standby_nvm");
++
++	if (nvm->type == e1000_nvm_eeprom_microwire) {
++		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		E1000_WRITE_FLUSH(hw);
++		usec_delay(nvm->delay_usec);
++
++		e1000_raise_eec_clk(hw, &eecd);
++
++		/* Select EEPROM */
++		eecd |= E1000_EECD_CS;
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		E1000_WRITE_FLUSH(hw);
++		usec_delay(nvm->delay_usec);
++
++		e1000_lower_eec_clk(hw, &eecd);
++	} else if (nvm->type == e1000_nvm_eeprom_spi) {
++		/* Toggle CS to flush commands */
++		eecd |= E1000_EECD_CS;
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		E1000_WRITE_FLUSH(hw);
++		usec_delay(nvm->delay_usec);
++		eecd &= ~E1000_EECD_CS;
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		E1000_WRITE_FLUSH(hw);
++		usec_delay(nvm->delay_usec);
++	}
++}
++
++/**
++ *  e1000_stop_nvm - Terminate EEPROM command
++ *  @hw: pointer to the HW structure
++ *
++ *  Terminates the current command by inverting the EEPROM's chip select pin.
++ **/
++void e1000_stop_nvm(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	DEBUGFUNC("e1000_stop_nvm");
++
++	eecd = E1000_READ_REG(hw, E1000_EECD);
++	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
++		/* Pull CS high */
++		eecd |= E1000_EECD_CS;
++		e1000_lower_eec_clk(hw, &eecd);
++	} else if (hw->nvm.type == e1000_nvm_eeprom_microwire) {
++		/* CS on Microcwire is active-high */
++		eecd &= ~(E1000_EECD_CS | E1000_EECD_DI);
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		e1000_raise_eec_clk(hw, &eecd);
++		e1000_lower_eec_clk(hw, &eecd);
++	}
++}
++
++/**
++ *  e1000_release_nvm_generic - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
++ **/
++void e1000_release_nvm_generic(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	DEBUGFUNC("e1000_release_nvm_generic");
++
++	e1000_stop_nvm(hw);
++
++	eecd = E1000_READ_REG(hw, E1000_EECD);
++	eecd &= ~E1000_EECD_REQ;
++	E1000_WRITE_REG(hw, E1000_EECD, eecd);
++}
++
++/**
++ *  e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
++ *  @hw: pointer to the HW structure
++ *
++ *  Setups the EEPROM for reading and writing.
++ **/
++static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	s32 ret_val = E1000_SUCCESS;
++	u16 timeout = 0;
++	u8 spi_stat_reg;
++
++	DEBUGFUNC("e1000_ready_nvm_eeprom");
++
++	if (nvm->type == e1000_nvm_eeprom_microwire) {
++		/* Clear SK and DI */
++		eecd &= ~(E1000_EECD_DI | E1000_EECD_SK);
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		/* Set CS */
++		eecd |= E1000_EECD_CS;
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++	} else if (nvm->type == e1000_nvm_eeprom_spi) {
++		/* Clear SK and CS */
++		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++		E1000_WRITE_REG(hw, E1000_EECD, eecd);
++		usec_delay(1);
++		timeout = NVM_MAX_RETRY_SPI;
++
++		/*
++		 * Read "Status Register" repeatedly until the LSB is cleared.
++		 * The EEPROM will signal that the command has been completed
++		 * by clearing bit 0 of the internal status register.  If it's
++		 * not cleared within 'timeout', then error out.
++		 */
++		while (timeout) {
++			e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
++			                         hw->nvm.opcode_bits);
++			spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
++			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
++				break;
++
++			usec_delay(5);
++			e1000_standby_nvm(hw);
++			timeout--;
++		}
++
++		if (!timeout) {
++			DEBUGOUT("SPI NVM Status error\n");
++			ret_val = -E1000_ERR_NVM;
++			goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_nvm_spi - Read EEPROM's using SPI
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM.
++ **/
++s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i = 0;
++	s32 ret_val;
++	u16 word_in;
++	u8 read_opcode = NVM_READ_OPCODE_SPI;
++
++	DEBUGFUNC("e1000_read_nvm_spi");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_ready_nvm_eeprom(hw);
++	if (ret_val)
++		goto release;
++
++	e1000_standby_nvm(hw);
++
++	if ((nvm->address_bits == 8) && (offset >= 128))
++		read_opcode |= NVM_A8_OPCODE_SPI;
++
++	/* Send the READ command (opcode + addr) */
++	e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
++	e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
++
++	/*
++	 * Read the data.  SPI NVMs increment the address with each byte
++	 * read and will roll over if reading beyond the end.  This allows
++	 * us to read the whole NVM from any offset
++	 */
++	for (i = 0; i < words; i++) {
++		word_in = e1000_shift_in_eec_bits(hw, 16);
++		data[i] = (word_in >> 8) | (word_in << 8);
++	}
++
++release:
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_nvm_microwire - Reads EEPROM's using microwire
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM.
++ **/
++s32 e1000_read_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words,
++                             u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i = 0;
++	s32 ret_val;
++	u8 read_opcode = NVM_READ_OPCODE_MICROWIRE;
++
++	DEBUGFUNC("e1000_read_nvm_microwire");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_ready_nvm_eeprom(hw);
++	if (ret_val)
++		goto release;
++
++	for (i = 0; i < words; i++) {
++		/* Send the READ command (opcode + addr) */
++		e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
++		e1000_shift_out_eec_bits(hw, (u16)(offset + i),
++					nvm->address_bits);
++
++		/*
++		 * Read the data.  For microwire, each word requires the
++		 * overhead of setup and tear-down.
++		 */
++		data[i] = e1000_shift_in_eec_bits(hw, 16);
++		e1000_standby_nvm(hw);
++	}
++
++release:
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_nvm_eerd - Reads EEPROM using EERD register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM using the EERD register.
++ **/
++s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, eerd = 0;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_read_nvm_eerd");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * too many words for the offset, and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	for (i = 0; i < words; i++) {
++		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
++		       E1000_NVM_RW_REG_START;
++
++		E1000_WRITE_REG(hw, E1000_EERD, eerd);
++		ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
++		if (ret_val)
++			break;
++
++		data[i] = (E1000_READ_REG(hw, E1000_EERD) >>
++		           E1000_NVM_RW_REG_DATA);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_spi - Write to EEPROM using SPI
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  Writes data to EEPROM at offset using SPI interface.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function , the
++ *  EEPROM will most likley contain an invalid checksum.
++ **/
++s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32 ret_val;
++	u16 widx = 0;
++
++	DEBUGFUNC("e1000_write_nvm_spi");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	msec_delay(10);
++
++	while (widx < words) {
++		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
++
++		ret_val = e1000_ready_nvm_eeprom(hw);
++		if (ret_val)
++			goto release;
++
++		e1000_standby_nvm(hw);
++
++		/* Send the WRITE ENABLE command (8 bit opcode) */
++		e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
++		                         nvm->opcode_bits);
++
++		e1000_standby_nvm(hw);
++
++		/*
++		 * Some SPI eeproms use the 8th address bit embedded in the
++		 * opcode
++		 */
++		if ((nvm->address_bits == 8) && (offset >= 128))
++			write_opcode |= NVM_A8_OPCODE_SPI;
++
++		/* Send the Write command (8-bit opcode + addr) */
++		e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
++		e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
++		                         nvm->address_bits);
++
++		/* Loop to allow for up to whole page write of eeprom */
++		while (widx < words) {
++			u16 word_out = data[widx];
++			word_out = (word_out >> 8) | (word_out << 8);
++			e1000_shift_out_eec_bits(hw, word_out, 16);
++			widx++;
++
++			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
++				e1000_standby_nvm(hw);
++				break;
++			}
++		}
++	}
++
++	msec_delay(10);
++release:
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_microwire - Writes EEPROM using microwire
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  Writes data to EEPROM at offset using microwire interface.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function , the
++ *  EEPROM will most likley contain an invalid checksum.
++ **/
++s32 e1000_write_nvm_microwire(struct e1000_hw *hw, u16 offset, u16 words,
++                              u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32  ret_val;
++	u32 eecd;
++	u16 words_written = 0;
++	u16 widx = 0;
++
++	DEBUGFUNC("e1000_write_nvm_microwire");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_nvm(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_ready_nvm_eeprom(hw);
++	if (ret_val)
++		goto release;
++
++	e1000_shift_out_eec_bits(hw, NVM_EWEN_OPCODE_MICROWIRE,
++	                         (u16)(nvm->opcode_bits + 2));
++
++	e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2));
++
++	e1000_standby_nvm(hw);
++
++	while (words_written < words) {
++		e1000_shift_out_eec_bits(hw, NVM_WRITE_OPCODE_MICROWIRE,
++		                         nvm->opcode_bits);
++
++		e1000_shift_out_eec_bits(hw, (u16)(offset + words_written),
++		                         nvm->address_bits);
++
++		e1000_shift_out_eec_bits(hw, data[words_written], 16);
++
++		e1000_standby_nvm(hw);
++
++		for (widx = 0; widx < 200; widx++) {
++			eecd = E1000_READ_REG(hw, E1000_EECD);
++			if (eecd & E1000_EECD_DO)
++				break;
++			usec_delay(50);
++		}
++
++		if (widx == 200) {
++			DEBUGOUT("NVM Write did not complete\n");
++			ret_val = -E1000_ERR_NVM;
++			goto release;
++		}
++
++		e1000_standby_nvm(hw);
++
++		words_written++;
++	}
++
++	e1000_shift_out_eec_bits(hw, NVM_EWDS_OPCODE_MICROWIRE,
++	                         (u16)(nvm->opcode_bits + 2));
++
++	e1000_shift_out_eec_bits(hw, 0, (u16)(nvm->address_bits - 2));
++
++release:
++	e1000_release_nvm(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_pba_num_generic - Read device part number
++ *  @hw: pointer to the HW structure
++ *  @pba_num: pointer to device part number
++ *
++ *  Reads the product board assembly (PBA) number from the EEPROM and stores
++ *  the value in pba_num.
++ **/
++s32 e1000_read_pba_num_generic(struct e1000_hw *hw, u32 *pba_num)
++{
++	s32  ret_val;
++	u16 nvm_data;
++
++	DEBUGFUNC("e1000_read_pba_num_generic");
++
++	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++	*pba_num = (u32)(nvm_data << 16);
++
++	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &nvm_data);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++	*pba_num |= nvm_data;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_mac_addr_generic - Read device MAC address
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the device MAC address from the EEPROM and stores the value.
++ *  Since devices with two ports use the same EEPROM, we increment the
++ *  last bit in the MAC address for the second port.
++ **/
++s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
++{
++	s32  ret_val = E1000_SUCCESS;
++	u16 offset, nvm_data, i;
++
++	DEBUGFUNC("e1000_read_mac_addr");
++
++	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
++		offset = i >> 1;
++		ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data);
++		if (ret_val) {
++			DEBUGOUT("NVM Read Error\n");
++			goto out;
++		}
++		hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF);
++		hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8);
++	}
++
++	/* Flip last bit of mac address if we're on second port */
++	if (hw->bus.func == E1000_FUNC_1)
++		hw->mac.perm_addr[5] ^= 1;
++
++	for (i = 0; i < ETH_ADDR_LEN; i++)
++		hw->mac.addr[i] = hw->mac.perm_addr[i];
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_validate_nvm_checksum_generic - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	DEBUGFUNC("e1000_validate_nvm_checksum_generic");
++
++	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
++		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			DEBUGOUT("NVM Read Error\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++
++	if (checksum != (u16) NVM_SUM) {
++		DEBUGOUT("NVM Checksum Invalid\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_update_nvm_checksum_generic - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw)
++{
++	s32  ret_val;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	DEBUGFUNC("e1000_update_nvm_checksum");
++
++	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
++		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			DEBUGOUT("NVM Read Error while updating checksum.\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++	checksum = (u16) NVM_SUM - checksum;
++	ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum);
++	if (ret_val) {
++		DEBUGOUT("NVM Write Error while updating checksum.\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_reload_nvm_generic - Reloads EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
++ *  extended control register.
++ **/
++void e1000_reload_nvm_generic(struct e1000_hw *hw)
++{
++	u32 ctrl_ext;
++
++	DEBUGFUNC("e1000_reload_nvm_generic");
++
++	usec_delay(10);
++	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++	E1000_WRITE_FLUSH(hw);
++}
++
++/* Function pointers local to this file and not intended for public use */
++
++/**
++ *  e1000_acquire_nvm - Acquire exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  For those silicon families which have implemented a NVM acquire function,
++ *  run the defined function else return success.
++ **/
++s32 e1000_acquire_nvm(struct e1000_hw *hw)
++{
++	if (hw->func.acquire_nvm)
++		return hw->func.acquire_nvm(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_release_nvm - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  For those silicon families which have implemented a NVM release function,
++ *  run the defined fucntion else return success.
++ **/
++void e1000_release_nvm(struct e1000_hw *hw)
++{
++	if (hw->func.release_nvm)
++		hw->func.release_nvm(hw);
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82541.c	2021-04-07 16:01:27.677633523 +0800
+@@ -0,0 +1,1328 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_82541
++ * e1000_82547
++ * e1000_82541_rev_2
++ * e1000_82547_rev_2
++ */
++
++#include "e1000_api.h"
++#include "e1000_82541.h"
++
++static s32  e1000_init_phy_params_82541(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_82541(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_82541(struct e1000_hw *hw);
++static s32  e1000_reset_hw_82541(struct e1000_hw *hw);
++static s32  e1000_init_hw_82541(struct e1000_hw *hw);
++static s32  e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed,
++                                         u16 *duplex);
++static s32  e1000_phy_hw_reset_82541(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_82541(struct e1000_hw *hw);
++static s32  e1000_check_for_link_82541(struct e1000_hw *hw);
++static s32  e1000_get_cable_length_igp_82541(struct e1000_hw *hw);
++static s32  e1000_set_d3_lplu_state_82541(struct e1000_hw *hw,
++                                          bool active);
++static s32  e1000_setup_led_82541(struct e1000_hw *hw);
++static s32  e1000_cleanup_led_82541(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw);
++static s32  e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
++                                                     bool link_up);
++static s32  e1000_phy_init_script_82541(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw);
++
++static const u16 e1000_igp_cable_length_table[] =
++    { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
++      5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25,
++      25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40,
++      40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60,
++      60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90,
++      90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
++      100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
++      110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120};
++#define IGP01E1000_AGC_LENGTH_TABLE_SIZE \
++                (sizeof(e1000_igp_cable_length_table) / \
++                 sizeof(e1000_igp_cable_length_table[0]))
++
++struct e1000_dev_spec_82541 {
++	e1000_dsp_config dsp_config;
++	e1000_ffe_config ffe_config;
++	u16 spd_default;
++	bool phy_init_script;
++};
++
++/**
++ *  e1000_init_phy_params_82541 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_82541(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_phy_params_82541");
++
++	phy->addr                       = 1;
++	phy->autoneg_mask               = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us             = 10000;
++	phy->type                       = e1000_phy_igp;
++
++	/* Function Pointers */
++	func->check_polarity            = e1000_check_polarity_igp;
++	func->force_speed_duplex        = e1000_phy_force_speed_duplex_igp;
++	func->get_cable_length          = e1000_get_cable_length_igp_82541;
++	func->get_cfg_done              = e1000_get_cfg_done_generic;
++	func->get_phy_info              = e1000_get_phy_info_igp;
++	func->read_phy_reg              = e1000_read_phy_reg_igp;
++	func->reset_phy                 = e1000_phy_hw_reset_82541;
++	func->set_d3_lplu_state         = e1000_set_d3_lplu_state_82541;
++	func->write_phy_reg             = e1000_write_phy_reg_igp;
++	func->power_up_phy              = e1000_power_up_phy_copper;
++	func->power_down_phy            = e1000_power_down_phy_copper_82541;
++
++	ret_val = e1000_get_phy_id(hw);
++	if (ret_val)
++		goto out;
++
++	/* Verify phy id */
++	if (phy->id != IGP01E1000_I_PHY_ID) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_82541 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_82541(struct e1000_hw *hw)
++{
++	struct   e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++	s32  ret_val = E1000_SUCCESS;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	u16 size;
++
++	DEBUGFUNC("e1000_init_nvm_params_82541");
++
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->type = e1000_nvm_eeprom_spi;
++		eecd |= E1000_EECD_ADDR_BITS;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->type = e1000_nvm_eeprom_spi;
++		eecd &= ~E1000_EECD_ADDR_BITS;
++		break;
++	case e1000_nvm_override_microwire_large:
++		nvm->type = e1000_nvm_eeprom_microwire;
++		eecd |= E1000_EECD_SIZE;
++		break;
++	case e1000_nvm_override_microwire_small:
++		nvm->type = e1000_nvm_eeprom_microwire;
++		eecd &= ~E1000_EECD_SIZE;
++		break;
++	default:
++		nvm->type = eecd & E1000_EECD_TYPE
++		            ? e1000_nvm_eeprom_spi
++		            : e1000_nvm_eeprom_microwire;
++		break;
++	}
++
++	if (nvm->type == e1000_nvm_eeprom_spi) {
++		nvm->address_bits       = (eecd & E1000_EECD_ADDR_BITS)
++		                          ? 16 : 8;
++		nvm->delay_usec         = 1;
++		nvm->opcode_bits        = 8;
++		nvm->page_size          = (eecd & E1000_EECD_ADDR_BITS)
++		                          ? 32 : 8;
++
++		/* Function Pointers */
++		func->acquire_nvm       = e1000_acquire_nvm_generic;
++		func->read_nvm          = e1000_read_nvm_spi;
++		func->release_nvm       = e1000_release_nvm_generic;
++		func->update_nvm        = e1000_update_nvm_checksum_generic;
++		func->valid_led_default = e1000_valid_led_default_generic;
++		func->validate_nvm      = e1000_validate_nvm_checksum_generic;
++		func->write_nvm         = e1000_write_nvm_spi;
++
++		/*
++		 * nvm->word_size must be discovered after the pointers
++		 * are set so we can verify the size from the nvm image
++		 * itself.  Temporarily set it to a dummy value so the
++		 * read will work.
++		 */
++		nvm->word_size = 64;
++		ret_val = e1000_read_nvm(hw, NVM_CFG, 1, &size);
++		if (ret_val)
++			goto out;
++		size = (size & NVM_SIZE_MASK) >> NVM_SIZE_SHIFT;
++		/*
++		 * if size != 0, it can be added to a constant and become
++		 * the left-shift value to set the word_size.  Otherwise,
++		 * word_size stays at 64.
++		 */
++		if (size) {
++			size += NVM_WORD_SIZE_BASE_SHIFT_82541;
++			nvm->word_size = 1 << size;
++		}
++	} else {
++		nvm->address_bits       = (eecd & E1000_EECD_ADDR_BITS)
++		                          ? 8 : 6;
++		nvm->delay_usec         = 50;
++		nvm->opcode_bits        = 3;
++		nvm->word_size          = (eecd & E1000_EECD_ADDR_BITS)
++		                          ? 256 : 64;
++
++		/* Function Pointers */
++		func->acquire_nvm       = e1000_acquire_nvm_generic;
++		func->read_nvm          = e1000_read_nvm_microwire;
++		func->release_nvm       = e1000_release_nvm_generic;
++		func->update_nvm        = e1000_update_nvm_checksum_generic;
++		func->valid_led_default = e1000_valid_led_default_generic;
++		func->validate_nvm      = e1000_validate_nvm_checksum_generic;
++		func->write_nvm         = e1000_write_nvm_microwire;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_mac_params_82541 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_82541(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_init_mac_params_82541");
++
++	/* Set media type */
++	hw->phy.media_type = e1000_media_type_copper;
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++	/* Set if part includes ASF firmware */
++	mac->asf_firmware_present = TRUE;
++
++	/* Function Pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_pci_generic;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_82541;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_82541;
++	/* link setup */
++	func->setup_link = e1000_setup_link_generic;
++	/* physical interface link setup */
++	func->setup_physical_interface = e1000_setup_copper_link_82541;
++	/* check for link */
++	func->check_for_link = e1000_check_for_link_82541;
++	/* link info */
++	func->get_link_up_info = e1000_get_link_up_info_82541;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_generic;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* setup LED */
++	func->setup_led = e1000_setup_led_82541;
++	/* cleanup LED */
++	func->cleanup_led = e1000_cleanup_led_82541;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_generic;
++	func->led_off = e1000_led_off_generic;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_82541;
++
++	hw->dev_spec_size = sizeof(struct e1000_dev_spec_82541);
++
++	/* Device-specific structure allocation */
++	ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_82541 - Init func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  The only function explicitly called by the api module to initialize
++ *  all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_82541(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_82541");
++
++	hw->func.init_mac_params = e1000_init_mac_params_82541;
++	hw->func.init_nvm_params = e1000_init_nvm_params_82541;
++	hw->func.init_phy_params = e1000_init_phy_params_82541;
++}
++
++/**
++ *  e1000_reset_hw_82541 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_82541(struct e1000_hw *hw)
++{
++	u32 ledctl, ctrl, icr, manc;
++
++	DEBUGFUNC("e1000_reset_hw_82541");
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	/*
++	 * Delay to allow any outstanding PCI transactions to complete
++	 * before resetting the device.
++	 */
++	msec_delay(10);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/* Must reset the Phy before resetting the MAC */
++	if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) {
++		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_PHY_RST));
++		msec_delay(5);
++	}
++
++	DEBUGOUT("Issuing a global reset to 82541/82547 MAC\n");
++	switch (hw->mac.type) {
++	case e1000_82541:
++	case e1000_82541_rev_2:
++		/*
++		 * These controllers can't ack the 64-bit write when
++		 * issuing the reset, so we use IO-mapping as a
++		 * workaround to issue the reset.
++		 */
++		E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++		break;
++	default:
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++		break;
++	}
++
++	/* Wait for NVM reload */
++	msec_delay(20);
++
++	/* Disable HW ARPs on ASF enabled adapters */
++	manc = E1000_READ_REG(hw, E1000_MANC);
++	manc &= ~E1000_MANC_ARP_EN;
++	E1000_WRITE_REG(hw, E1000_MANC, manc);
++
++	if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) {
++		e1000_phy_init_script_82541(hw);
++
++		/* Configure activity LED after Phy reset */
++		ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
++		ledctl &= IGP_ACTIVITY_LED_MASK;
++		ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++		E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
++	}
++
++	/* Once again, mask the interrupts */
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
++
++	/* Clear any pending interrupt events. */
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_init_hw_82541 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_hw_82541(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 i, txdctl;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_init_hw_82541");
++
++	/* Initialize identification LED */
++	ret_val = e1000_id_led_init_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Disabling VLAN filtering */
++	DEBUGOUT("Initializing the IEEE VLAN\n");
++	e1000_clear_vfta(hw);
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++		/*
++		 * Avoid back to back register writes by adding the register
++		 * read (flush).  This is to protect against some strange
++		 * bridge configurations that may issue Memory Write Block
++		 * (MWB) to our register space.
++		 */
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++	         E1000_TXDCTL_FULL_TX_DESC_WB;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82541(hw);
++
++	return ret_val;
++}
++
++/**
++ * e1000_get_link_up_info_82541 - Report speed and duplex
++ * @hw: pointer to the HW structure
++ * @speed: pointer to speed buffer
++ * @duplex: pointer to duplex buffer
++ *
++ * Retrieve the current speed and duplex configuration.
++ * This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_get_link_up_info_82541(struct e1000_hw *hw, u16 *speed,
++                                        u16 *duplex)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_get_link_up_info_82541");
++
++	ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex);
++	if (ret_val)
++		goto out;
++
++	if (!phy->speed_downgraded)
++		goto out;
++
++	/*
++	 * IGP01 PHY may advertise full duplex operation after speed
++	 * downgrade even if it is operating at half duplex.
++	 * Here we set the duplex settings to match the duplex in the
++	 * link partner's capabilities.
++	 */
++	ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &data);
++	if (ret_val)
++		goto out;
++
++	if (!(data & NWAY_ER_LP_NWAY_CAPS)) {
++		*duplex = HALF_DUPLEX;
++	} else {
++		ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &data);
++		if (ret_val)
++			goto out;
++
++		if (*speed == SPEED_100) {
++			if (!(data & NWAY_LPAR_100TX_FD_CAPS))
++				*duplex = HALF_DUPLEX;
++		} else if (*speed == SPEED_10) {
++			if (!(data & NWAY_LPAR_10T_FD_CAPS))
++				*duplex = HALF_DUPLEX;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_hw_reset_82541 - PHY hardware reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify the reset block is not blocking us from resetting.  Acquire
++ *  semaphore (if necessary) and read/set/write the device control reset
++ *  bit in the PHY.  Wait the appropriate delay time for the device to
++ *  reset and relase the semaphore (if necessary).
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_phy_hw_reset_82541(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u32 ledctl;
++
++	DEBUGFUNC("e1000_phy_hw_reset_82541");
++
++	ret_val = e1000_phy_hw_reset_generic(hw);
++	if (ret_val)
++		goto out;
++
++	e1000_phy_init_script_82541(hw);
++
++	if ((hw->mac.type == e1000_82541) || (hw->mac.type == e1000_82547)) {
++		/* Configure activity LED after PHY reset */
++		ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
++		ledctl &= IGP_ACTIVITY_LED_MASK;
++		ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++		E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_82541 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the appropriate function to configure the link for auto-neg or forced
++ *  speed and duplex.  Then we check for link, once link is established calls
++ *  to configure collision distance and flow control are called.  If link is
++ *  not established, we return -E1000_ERR_PHY (-2).  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_setup_copper_link_82541(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_dev_spec_82541 *dev_spec;
++	s32  ret_val;
++	u32 ctrl, ledctl;
++
++	DEBUGFUNC("e1000_setup_copper_link_82541");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	hw->phy.reset_disable = FALSE;
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	/* Earlier revs of the IGP phy require us to force MDI. */
++	if (hw->mac.type == e1000_82541 || hw->mac.type == e1000_82547) {
++		dev_spec->dsp_config = e1000_dsp_config_disabled;
++		phy->mdix = 1;
++	} else {
++		dev_spec->dsp_config = e1000_dsp_config_enabled;
++	}
++
++	ret_val = e1000_copper_link_setup_igp(hw);
++	if (ret_val)
++		goto out;
++
++	if (hw->mac.autoneg) {
++		if (dev_spec->ffe_config == e1000_ffe_config_active)
++			dev_spec->ffe_config = e1000_ffe_config_enabled;
++	}
++
++	/* Configure activity LED after Phy reset */
++	ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
++	ledctl &= IGP_ACTIVITY_LED_MASK;
++	ledctl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++	E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
++
++	ret_val = e1000_setup_copper_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_for_link_82541 - Check/Store link connection
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks the link condition of the adapter and stores the
++ *  results in the hw->mac structure. This is a function pointer entry
++ *  point called by the api module.
++ **/
++static s32 e1000_check_for_link_82541(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	bool link;
++
++	DEBUGFUNC("e1000_check_for_link_82541");
++
++	/*
++	 * We only want to go out to the PHY registers to see if Auto-Neg
++	 * has completed and/or if our link status has changed.  The
++	 * get_link_status flag is set upon receiving a Link Status
++	 * Change or Rx Sequence Error interrupt.
++	 */
++	if (!mac->get_link_status) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	/*
++	 * First we want to see if the MII Status Register reports
++	 * link.  If so, then we want to get the current speed/duplex
++	 * of the PHY.
++	 */
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		ret_val = e1000_config_dsp_after_link_change_82541(hw, FALSE);
++		goto out; /* No link detected */
++	}
++
++	mac->get_link_status = FALSE;
++
++	/*
++	 * Check if there was DownShift, must be checked
++	 * immediately after link-up
++	 */
++	e1000_check_downshift_generic(hw);
++
++	/*
++	 * If we are forcing speed/duplex, then we simply return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1000_config_dsp_after_link_change_82541(hw, TRUE);
++
++	/*
++	 * Auto-Neg is enabled.  Auto Speed Detection takes care
++	 * of MAC speed/duplex configuration.  So we only need to
++	 * configure Collision Distance in the MAC.
++	 */
++	e1000_config_collision_dist_generic(hw);
++
++	/*
++	 * Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = e1000_config_fc_after_link_up_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error configuring flow control\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_config_dsp_after_link_change_82541 - Config DSP after link
++ *  @hw: pointer to the HW structure
++ *  @link_up: boolean flag for link up status
++ *
++ *  Return E1000_ERR_PHY when failing to read/write the PHY, else E1000_SUCCESS
++ *  at any other case.
++ *
++ *  82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a
++ *  gigabit link is achieved to improve link quality.
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_config_dsp_after_link_change_82541(struct e1000_hw *hw,
++                                                    bool link_up)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_dev_spec_82541 *dev_spec;
++	s32 ret_val;
++	u32 idle_errs = 0;
++	u16 phy_data, phy_saved_data, speed, duplex, i;
++	u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20;
++	u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
++	                                           {IGP01E1000_PHY_AGC_PARAM_A,
++	                                            IGP01E1000_PHY_AGC_PARAM_B,
++	                                            IGP01E1000_PHY_AGC_PARAM_C,
++	                                            IGP01E1000_PHY_AGC_PARAM_D};
++
++	DEBUGFUNC("e1000_config_dsp_after_link_change_82541");
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	if (link_up) {
++		ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++		if (ret_val) {
++			DEBUGOUT("Error getting link speed and duplex\n");
++			goto out;
++		}
++
++		if (speed != SPEED_1000) {
++			ret_val = E1000_SUCCESS;
++			goto out;
++		}
++
++		ret_val = e1000_get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		if ((dev_spec->dsp_config == e1000_dsp_config_enabled) &&
++		    phy->min_cable_length >= 50) {
++
++			for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++				ret_val = e1000_read_phy_reg(hw,
++				                            dsp_reg_array[i],
++				                            &phy_data);
++				if (ret_val)
++					goto out;
++
++				phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
++
++				ret_val = e1000_write_phy_reg(hw,
++				                             dsp_reg_array[i],
++				                             phy_data);
++				if (ret_val)
++					goto out;
++			}
++			dev_spec->dsp_config = e1000_dsp_config_activated;
++		}
++
++		if ((dev_spec->ffe_config != e1000_ffe_config_enabled) ||
++		    (phy->min_cable_length >= 50)) {
++			ret_val = E1000_SUCCESS;
++			goto out;
++		}
++
++		/* clear previous idle error counts */
++		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
++		if (ret_val)
++			goto out;
++
++		for (i = 0; i < ffe_idle_err_timeout; i++) {
++			usec_delay(1000);
++			ret_val = e1000_read_phy_reg(hw,
++			                            PHY_1000T_STATUS,
++			                            &phy_data);
++			if (ret_val)
++				goto out;
++
++			idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT);
++			if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) {
++				dev_spec->ffe_config = e1000_ffe_config_active;
++
++				ret_val = e1000_write_phy_reg(hw,
++				                  IGP01E1000_PHY_DSP_FFE,
++				                  IGP01E1000_PHY_DSP_FFE_CM_CP);
++				if (ret_val)
++					goto out;
++				break;
++			}
++
++			if (idle_errs)
++				ffe_idle_err_timeout =
++				                 FFE_IDLE_ERR_COUNT_TIMEOUT_100;
++		}
++	} else {
++		if (dev_spec->dsp_config == e1000_dsp_config_activated) {
++			/*
++			 * Save off the current value of register 0x2F5B
++			 * to be restored at the end of the routines.
++			 */
++			ret_val = e1000_read_phy_reg(hw,
++			                            0x2F5B,
++			                            &phy_saved_data);
++			if (ret_val)
++				goto out;
++
++			/* Disable the PHY transmitter */
++			ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++			if (ret_val)
++				goto out;
++
++			msec_delay_irq(20);
++
++			ret_val = e1000_write_phy_reg(hw,
++			                             0x0000,
++			                             IGP01E1000_IEEE_FORCE_GIG);
++			if (ret_val)
++				goto out;
++			for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++				ret_val = e1000_read_phy_reg(hw,
++				                            dsp_reg_array[i],
++				                            &phy_data);
++				if (ret_val)
++					goto out;
++
++				phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
++				phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS;
++
++				ret_val = e1000_write_phy_reg(hw,
++				                             dsp_reg_array[i],
++				                             phy_data);
++				if (ret_val)
++					goto out;
++			}
++
++			ret_val = e1000_write_phy_reg(hw,
++			                       0x0000,
++			                       IGP01E1000_IEEE_RESTART_AUTONEG);
++			if (ret_val)
++				goto out;
++
++			msec_delay_irq(20);
++
++			/* Now enable the transmitter */
++			ret_val = e1000_write_phy_reg(hw,
++			                             0x2F5B,
++			                             phy_saved_data);
++			if (ret_val)
++				goto out;
++
++			dev_spec->dsp_config = e1000_dsp_config_enabled;
++		}
++
++		if (dev_spec->ffe_config != e1000_ffe_config_active) {
++			ret_val = E1000_SUCCESS;
++			goto out;
++		}
++
++		/*
++		 * Save off the current value of register 0x2F5B
++		 * to be restored at the end of the routines.
++		 */
++		ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++		if (ret_val)
++			goto out;
++
++		/* Disable the PHY transmitter */
++		ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++		if (ret_val)
++			goto out;
++
++		msec_delay_irq(20);
++
++		ret_val = e1000_write_phy_reg(hw,
++		                             0x0000,
++		                             IGP01E1000_IEEE_FORCE_GIG);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_write_phy_reg(hw,
++		                             IGP01E1000_PHY_DSP_FFE,
++		                             IGP01E1000_PHY_DSP_FFE_DEFAULT);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_write_phy_reg(hw,
++		                             0x0000,
++		                             IGP01E1000_IEEE_RESTART_AUTONEG);
++		if (ret_val)
++			goto out;
++
++		msec_delay_irq(20);
++
++		/* Now enable the transmitter */
++		ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++		if (ret_val)
++			goto out;
++
++		dev_spec->ffe_config = e1000_ffe_config_enabled;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_igp_82541 - Determine cable length for igp PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  The automatic gain control (agc) normalizes the amplitude of the
++ *  received signal, adjusting for the attenuation produced by the
++ *  cable.  By reading the AGC registers, which reperesent the
++ *  cobination of course and fine gain value, the value can be put
++ *  into a lookup table to obtain the approximate cable length
++ *  for each channel.  This is a function pointer entry point called by the
++ *  api module.
++ **/
++static s32 e1000_get_cable_length_igp_82541(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i, data;
++	u16 cur_agc_value, agc_value = 0;
++	u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
++	u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
++	                                                 {IGP01E1000_PHY_AGC_A,
++	                                                  IGP01E1000_PHY_AGC_B,
++	                                                  IGP01E1000_PHY_AGC_C,
++	                                                  IGP01E1000_PHY_AGC_D};
++
++	DEBUGFUNC("e1000_get_cable_length_igp_82541");
++
++	/* Read the AGC registers for all channels */
++	for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++		ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &data);
++		if (ret_val)
++			goto out;
++
++		cur_agc_value = data >> IGP01E1000_AGC_LENGTH_SHIFT;
++
++		/* Bounds checking */
++		if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) ||
++		    (cur_agc_value == 0)) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++
++		agc_value += cur_agc_value;
++
++		if (min_agc_value > cur_agc_value)
++			min_agc_value = cur_agc_value;
++	}
++
++	/* Remove the minimal AGC result for length < 50m */
++	if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * 50) {
++		agc_value -= min_agc_value;
++		/* Average the three remaining channels for the length. */
++		agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1);
++	} else {
++		/* Average the channels for the length. */
++		agc_value /= IGP01E1000_PHY_CHANNEL_NUM;
++	}
++
++	phy->min_cable_length = (e1000_igp_cable_length_table[agc_value] >
++	                         IGP01E1000_AGC_RANGE)
++	                        ? (e1000_igp_cable_length_table[agc_value] -
++	                           IGP01E1000_AGC_RANGE)
++	                        : 0;
++	phy->max_cable_length = e1000_igp_cable_length_table[agc_value] +
++	                        IGP01E1000_AGC_RANGE;
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_d3_lplu_state_82541 - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.  This is a function pointer entry point called by the
++ *  api module.
++ **/
++static s32 e1000_set_d3_lplu_state_82541(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_set_d3_lplu_state_82541");
++
++	switch (hw->mac.type) {
++	case e1000_82541_rev_2:
++	case e1000_82547_rev_2:
++		break;
++	default:
++		ret_val = e1000_set_d3_lplu_state_generic(hw, active);
++		goto out;
++		break;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &data);
++	if (ret_val)
++		goto out;
++
++	if (!active) {
++		data &= ~IGP01E1000_GMII_FLEX_SPD;
++		ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, data);
++		if (ret_val)
++			goto out;
++
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++	           (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++	           (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= IGP01E1000_GMII_FLEX_SPD;
++		ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, data);
++		if (ret_val)
++			goto out;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1000_read_phy_reg(hw,
++		                            IGP01E1000_PHY_PORT_CONFIG,
++		                            &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1000_write_phy_reg(hw,
++		                             IGP01E1000_PHY_PORT_CONFIG,
++		                             data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_led_82541 - Configures SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This prepares the SW controllable LED for use and saves the current state
++ *  of the LED so it can be later restored.  This is a function pointer entry
++ *  point called by the api module.
++ **/
++static s32 e1000_setup_led_82541(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82541 *dev_spec;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_setup_led_82541");
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	ret_val = e1000_read_phy_reg(hw,
++	                            IGP01E1000_GMII_FIFO,
++	                            &dev_spec->spd_default);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw,
++	                             IGP01E1000_GMII_FIFO,
++	                             (u16)(dev_spec->spd_default &
++	                                        ~IGP01E1000_GMII_SPD));
++	if (ret_val)
++		goto out;
++
++	E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_cleanup_led_82541 - Set LED config to default operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Remove the current LED configuration and set the LED configuration
++ *  to the default value, saved from the EEPROM.  This is a function pointer
++ *  entry point called by the api module.
++ **/
++static s32 e1000_cleanup_led_82541(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82541 *dev_spec;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_cleanup_led_82541");
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	ret_val = e1000_write_phy_reg(hw,
++	                             IGP01E1000_GMII_FIFO,
++	                             dev_spec->spd_default);
++	if (ret_val)
++		goto out;
++
++	E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_init_script_82541 - Initialize GbE PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes the IGP PHY.
++ **/
++static s32 e1000_phy_init_script_82541(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82541 *dev_spec;
++	u32 ret_val;
++	u16 phy_saved_data;
++
++	DEBUGFUNC("e1000_phy_init_script_82541");
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	if (!dev_spec->phy_init_script) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	/* Delay after phy reset to enable NVM configuration to load */
++	msec_delay(20);
++
++	/*
++	 * Save off the current value of register 0x2F5B to be restored at
++	 * the end of this routine.
++	 */
++	ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++	/* Disabled the PHY transmitter */
++	e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++	msec_delay(20);
++
++	e1000_write_phy_reg(hw, 0x0000, 0x0140);
++
++	msec_delay(5);
++
++	switch (hw->mac.type) {
++	case e1000_82541:
++	case e1000_82547:
++		e1000_write_phy_reg(hw, 0x1F95, 0x0001);
++
++		e1000_write_phy_reg(hw, 0x1F71, 0xBD21);
++
++		e1000_write_phy_reg(hw, 0x1F79, 0x0018);
++
++		e1000_write_phy_reg(hw, 0x1F30, 0x1600);
++
++		e1000_write_phy_reg(hw, 0x1F31, 0x0014);
++
++		e1000_write_phy_reg(hw, 0x1F32, 0x161C);
++
++		e1000_write_phy_reg(hw, 0x1F94, 0x0003);
++
++		e1000_write_phy_reg(hw, 0x1F96, 0x003F);
++
++		e1000_write_phy_reg(hw, 0x2010, 0x0008);
++		break;
++	case e1000_82541_rev_2:
++	case e1000_82547_rev_2:
++		e1000_write_phy_reg(hw, 0x1F73, 0x0099);
++		break;
++	default:
++		break;
++	}
++
++	e1000_write_phy_reg(hw, 0x0000, 0x3300);
++
++	msec_delay(20);
++
++	/* Now enable the transmitter */
++	e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++	if (hw->mac.type == e1000_82547) {
++		u16 fused, fine, coarse;
++
++		/* Move to analog registers page */
++		e1000_read_phy_reg(hw,
++		                  IGP01E1000_ANALOG_SPARE_FUSE_STATUS,
++		                  &fused);
++
++		if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) {
++			e1000_read_phy_reg(hw,
++			                  IGP01E1000_ANALOG_FUSE_STATUS,
++			                  &fused);
++
++			fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK;
++			coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK;
++
++			if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) {
++				coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10;
++				fine -= IGP01E1000_ANALOG_FUSE_FINE_1;
++			} else if (coarse ==
++			           IGP01E1000_ANALOG_FUSE_COARSE_THRESH)
++				fine -= IGP01E1000_ANALOG_FUSE_FINE_10;
++
++			fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) |
++			        (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) |
++			        (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK);
++
++			e1000_write_phy_reg(hw,
++			                   IGP01E1000_ANALOG_FUSE_CONTROL,
++			                   fused);
++			e1000_write_phy_reg(hw,
++			              IGP01E1000_ANALOG_FUSE_BYPASS,
++			              IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL);
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_script_state_82541 - Enable/Disable PHY init script
++ *  @hw: pointer to the HW structure
++ *  @state: boolean value used to enable/disable PHY init script
++ *
++ *  Allows the driver to enable/disable the PHY init script, if the PHY is an
++ *  IGP PHY.  This is a function pointer entry point called by the api module.
++ **/
++void e1000_init_script_state_82541(struct e1000_hw *hw, bool state)
++{
++	struct e1000_dev_spec_82541 *dev_spec;
++
++	DEBUGFUNC("e1000_init_script_state_82541");
++
++	if (hw->phy.type != e1000_phy_igp) {
++		DEBUGOUT("Initialization script not necessary.\n");
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_82541 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		goto out;
++	}
++
++	dev_spec->phy_init_script = state;
++
++out:
++	return;
++}
++
++/**
++ * e1000_power_down_phy_copper_82541 - Remove link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_82541(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN))
++		e1000_power_down_phy_copper(hw);
++
++	return;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82541 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82541(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_82541");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++
++	temp = E1000_READ_REG(hw, E1000_MGTPRC);
++	temp = E1000_READ_REG(hw, E1000_MGTPDC);
++	temp = E1000_READ_REG(hw, E1000_MGTPTC);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_phy.c	2021-04-07 16:01:27.672633530 +0800
+@@ -0,0 +1,2106 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000_api.h"
++#include "e1000_phy.h"
++
++static s32  e1000_get_phy_cfg_done(struct e1000_hw *hw);
++static void e1000_release_phy(struct e1000_hw *hw);
++static s32  e1000_acquire_phy(struct e1000_hw *hw);
++
++/* Cable length tables */
++static const u16 e1000_m88_cable_length_table[] =
++	{ 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
++                (sizeof(e1000_m88_cable_length_table) / \
++                 sizeof(e1000_m88_cable_length_table[0]))
++
++static const u16 e1000_igp_2_cable_length_table[] =
++    { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
++      0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
++      6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
++      21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
++      40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
++      60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
++      83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
++      104, 109, 114, 118, 121, 124};
++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
++                (sizeof(e1000_igp_2_cable_length_table) / \
++                 sizeof(e1000_igp_2_cable_length_table[0]))
++
++/**
++ *  e1000_check_reset_block_generic - Check if PHY reset is blocked
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the PHY management control register and check whether a PHY reset
++ *  is blocked.  If a reset is not blocked return E1000_SUCCESS, otherwise
++ *  return E1000_BLK_PHY_RESET (12).
++ **/
++s32 e1000_check_reset_block_generic(struct e1000_hw *hw)
++{
++	u32 manc;
++
++	DEBUGFUNC("e1000_check_reset_block");
++
++	manc = E1000_READ_REG(hw, E1000_MANC);
++
++	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
++	       E1000_BLK_PHY_RESET : E1000_SUCCESS;
++}
++
++/**
++ *  e1000_get_phy_id - Retrieve the PHY ID and revision
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
++ *  revision in the hardware structure.
++ **/
++s32 e1000_get_phy_id(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++	u16 phy_id;
++
++	DEBUGFUNC("e1000_get_phy_id");
++
++	ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id);
++	if (ret_val)
++		goto out;
++
++	phy->id = (u32)(phy_id << 16);
++	usec_delay(20);
++	ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id);
++	if (ret_val)
++		goto out;
++
++	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
++	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_reset_dsp_generic - Reset PHY DSP
++ *  @hw: pointer to the HW structure
++ *
++ *  Reset the digital signal processor.
++ **/
++s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_phy_reset_dsp_generic");
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_phy_reg_mdic - Read MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the MDI control regsiter in the PHY at offset and stores the
++ *  information read to data.
++ **/
++s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_read_phy_reg_mdic");
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		DEBUGOUT1("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/*
++	 * Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
++	        (phy->addr << E1000_MDIC_PHY_SHIFT) |
++	        (E1000_MDIC_OP_READ));
++
++	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
++
++	/*
++	 * Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		usec_delay(50);
++		mdic = E1000_READ_REG(hw, E1000_MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		DEBUGOUT("MDI Read did not complete\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		DEBUGOUT("MDI Error\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	*data = (u16) mdic;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_mdic - Write MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write to register at offset
++ *
++ *  Writes data to MDI control register in the PHY at offset.
++ **/
++s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_write_phy_reg_mdic");
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		DEBUGOUT1("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/*
++	 * Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = (((u32)data) |
++	        (offset << E1000_MDIC_REG_SHIFT) |
++	        (phy->addr << E1000_MDIC_PHY_SHIFT) |
++	        (E1000_MDIC_OP_WRITE));
++
++	E1000_WRITE_REG(hw, E1000_MDIC, mdic);
++
++	/*
++	 * Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		usec_delay(50);
++		mdic = E1000_READ_REG(hw, E1000_MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		DEBUGOUT("MDI Write did not complete\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		DEBUGOUT("MDI Error\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_phy_reg_m88 - Read m88 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_read_phy_reg_m88");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg_mdic(hw,
++	                                  MAX_PHY_REG_ADDRESS & offset,
++	                                  data);
++
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_m88 - Write m88 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_write_phy_reg_m88");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg_mdic(hw,
++	                                   MAX_PHY_REG_ADDRESS & offset,
++	                                   data);
++
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_phy_reg_igp - Read igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_read_phy_reg_igp");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = e1000_write_phy_reg_mdic(hw,
++		                                   IGP01E1000_PHY_PAGE_SELECT,
++		                                   (u16)offset);
++		if (ret_val) {
++			e1000_release_phy(hw);
++			goto out;
++		}
++	}
++
++	ret_val = e1000_read_phy_reg_mdic(hw,
++	                                  MAX_PHY_REG_ADDRESS & offset,
++	                                  data);
++
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_igp - Write igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_write_phy_reg_igp");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = e1000_write_phy_reg_mdic(hw,
++		                                   IGP01E1000_PHY_PAGE_SELECT,
++		                                   (u16)offset);
++		if (ret_val) {
++			e1000_release_phy(hw);
++			goto out;
++		}
++	}
++
++	ret_val = e1000_write_phy_reg_mdic(hw,
++	                                   MAX_PHY_REG_ADDRESS & offset,
++	                                   data);
++
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_kmrn_reg_generic - Read kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary.  Then reads the PHY register at offset
++ *  using the kumeran interface.  The information retrieved is stored in data.
++ *  Release any acquired semaphores before exiting.
++ **/
++s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_read_kmrn_reg_generic");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++	               E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
++	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
++
++	usec_delay(2);
++
++	kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA);
++	*data = (u16)kmrnctrlsta;
++
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_kmrn_reg_generic - Write kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary.  Then write the data to PHY register
++ *  at the offset using the kumeran interface.  Release any acquired semaphores
++ *  before exiting.
++ **/
++s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_write_kmrn_reg_generic");
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++	               E1000_KMRNCTRLSTA_OFFSET) | data;
++	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta);
++
++	usec_delay(2);
++	e1000_release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
++ *  and downshift values are set also.
++ **/
++s32 e1000_copper_link_setup_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++
++	DEBUGFUNC("e1000_copper_link_setup_m88");
++
++	if (phy->reset_disable) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	/* Enable CRS on TX. This must be set for half-duplex operation. */
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++
++	/*
++	 * Options:
++	 *   MDI/MDI-X = 0 (default)
++	 *   0 - Auto for all speeds
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++	 */
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++
++	switch (phy->mdix) {
++		case 1:
++			phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++			break;
++		case 2:
++			phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++			break;
++		case 3:
++			phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++			break;
++		case 0:
++		default:
++			phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++			break;
++	}
++
++	/*
++	 * Options:
++	 *   disable_polarity_correction = 0 (default)
++	 *       Automatic Correction for Reversed Cable Polarity
++	 *   0 - Disabled
++	 *   1 - Enabled
++	 */
++	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++	if (phy->disable_polarity_correction == 1)
++		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	if (phy->revision < E1000_REVISION_4) {
++		/*
++		 * Force TX_CLK in the Extended PHY Specific Control Register
++		 * to 25MHz clock.
++		 */
++		ret_val = e1000_read_phy_reg(hw,
++		                             M88E1000_EXT_PHY_SPEC_CTRL,
++		                             &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy_data |= M88E1000_EPSCR_TX_CLK_25;
++
++		if ((phy->revision == E1000_REVISION_2) &&
++		    (phy->id == M88E1111_I_PHY_ID)) {
++			/* 82573L PHY - set the downshift counter to 5x. */
++			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
++			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
++		} else {
++			/* Configure Master and Slave downshift values */
++			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
++			             M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
++			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
++			             M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
++		}
++		ret_val = e1000_write_phy_reg(hw,
++		                             M88E1000_EXT_PHY_SPEC_CTRL,
++		                             phy_data);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Commit the changes. */
++	ret_val = e1000_phy_commit(hw);
++	if (ret_val) {
++		DEBUGOUT("Error committing the PHY changes\n");
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_copper_link_setup_igp - Setup igp PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
++ *  igp PHY's.
++ **/
++s32 e1000_copper_link_setup_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_copper_link_setup_igp");
++
++	if (phy->reset_disable) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	ret_val = e1000_phy_hw_reset(hw);
++	if (ret_val) {
++		DEBUGOUT("Error resetting the PHY.\n");
++		goto out;
++	}
++
++	/* Wait 15ms for MAC to configure PHY from NVM settings. */
++	msec_delay(15);
++
++	/*
++	 * The NVM settings will configure LPLU in D3 for
++	 * non-IGP1 PHYs.
++	 */
++	if (phy->type == e1000_phy_igp) {
++		/* disable lplu d3 during driver init */
++		ret_val = e1000_set_d3_lplu_state(hw, FALSE);
++		if (ret_val) {
++			DEBUGOUT("Error Disabling LPLU D3\n");
++			goto out;
++		}
++	}
++
++	/* disable lplu d0 during driver init */
++	ret_val = e1000_set_d0_lplu_state(hw, FALSE);
++	if (ret_val) {
++		DEBUGOUT("Error Disabling LPLU D0\n");
++		goto out;
++	}
++	/* Configure mdi-mdix settings */
++	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
++	if (ret_val)
++		goto out;
++
++	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++
++	switch (phy->mdix) {
++	case 1:
++		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 2:
++		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 0:
++	default:
++		data |= IGP01E1000_PSCR_AUTO_MDIX;
++		break;
++	}
++	ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
++	if (ret_val)
++		goto out;
++
++	/* set auto-master slave resolution settings */
++	if (hw->mac.autoneg) {
++		/*
++		 * when autonegotiation advertisement is only 1000Mbps then we
++		 * should disable SmartSpeed and enable Auto MasterSlave
++		 * resolution as hardware default.
++		 */
++		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
++			/* Disable SmartSpeed */
++			ret_val = e1000_read_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++
++			/* Set auto Master/Slave resolution process */
++			ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~CR_1000T_MS_ENABLE;
++			ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, data);
++			if (ret_val)
++				goto out;
++		}
++
++		ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &data);
++		if (ret_val)
++			goto out;
++
++		/* load defaults for future use */
++		phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ?
++			((data & CR_1000T_MS_VALUE) ?
++			e1000_ms_force_master :
++			e1000_ms_force_slave) :
++			e1000_ms_auto;
++
++		switch (phy->ms_type) {
++		case e1000_ms_force_master:
++			data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_force_slave:
++			data |= CR_1000T_MS_ENABLE;
++			data &= ~(CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_auto:
++			data &= ~CR_1000T_MS_ENABLE;
++		default:
++			break;
++		}
++		ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, data);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs initial bounds checking on autoneg advertisement parameter, then
++ *  configure to advertise the full capability.  Setup the PHY to autoneg
++ *  and restart the negotiation process between the link partner.  If
++ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
++ **/
++s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_ctrl;
++
++	DEBUGFUNC("e1000_copper_link_autoneg");
++
++	/*
++	 * Perform some bounds checking on the autoneg advertisement
++	 * parameter.
++	 */
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/*
++	 * If autoneg_advertised is zero, we assume it was not defaulted
++	 * by the calling code so we set to advertise full capability.
++	 */
++	if (phy->autoneg_advertised == 0)
++		phy->autoneg_advertised = phy->autoneg_mask;
++
++	DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
++	ret_val = e1000_phy_setup_autoneg(hw);
++	if (ret_val) {
++		DEBUGOUT("Error Setting up Auto-Negotiation\n");
++		goto out;
++	}
++	DEBUGOUT("Restarting Auto-Neg\n");
++
++	/*
++	 * Restart auto-negotiation by setting the Auto Neg Enable bit and
++	 * the Auto Neg Restart bit in the PHY control register.
++	 */
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Does the user want to wait for Auto-Neg to complete here, or
++	 * check at a later time (for example, callback routine).
++	 */
++	if (phy->autoneg_wait_to_complete) {
++		ret_val = e1000_wait_autoneg(hw);
++		if (ret_val) {
++			DEBUGOUT("Error while waiting for "
++			         "autoneg to complete\n");
++			goto out;
++		}
++	}
++
++	hw->mac.get_link_status = TRUE;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the MII auto-neg advertisement register and/or the 1000T control
++ *  register and if the PHY is already setup for auto-negotiation, then
++ *  return successful.  Otherwise, setup advertisement and flow control to
++ *  the appropriate values for the wanted auto-negotiation.
++ **/
++s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 mii_autoneg_adv_reg;
++	u16 mii_1000t_ctrl_reg = 0;
++
++	DEBUGFUNC("e1000_phy_setup_autoneg");
++
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
++	ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++	if (ret_val)
++		goto out;
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
++		/* Read the MII 1000Base-T Control Register (Address 9). */
++		ret_val = e1000_read_phy_reg(hw,
++		                            PHY_1000T_CTRL,
++		                            &mii_1000t_ctrl_reg);
++		if (ret_val)
++			goto out;
++	}
++
++	/*
++	 * Need to parse both autoneg_advertised and fc and set up
++	 * the appropriate PHY registers.  First we will parse for
++	 * autoneg_advertised software override.  Since we can advertise
++	 * a plethora of combinations, we need to check each bit
++	 * individually.
++	 */
++
++	/*
++	 * First we clear all the 10/100 mb speed bits in the Auto-Neg
++	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
++	 * the  1000Base-T Control Register (Address 9).
++	 */
++	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
++	                         NWAY_AR_100TX_HD_CAPS |
++	                         NWAY_AR_10T_FD_CAPS   |
++	                         NWAY_AR_10T_HD_CAPS);
++	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
++
++	DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised);
++
++	/* Do we want to advertise 10 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
++		DEBUGOUT("Advertise 10mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
++	}
++
++	/* Do we want to advertise 10 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
++		DEBUGOUT("Advertise 10mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
++		DEBUGOUT("Advertise 100mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
++		DEBUGOUT("Advertise 100mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
++	}
++
++	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
++	if (phy->autoneg_advertised & ADVERTISE_1000_HALF) {
++		DEBUGOUT("Advertise 1000mb Half duplex request denied!\n");
++	}
++
++	/* Do we want to advertise 1000 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
++		DEBUGOUT("Advertise 1000mb Full duplex\n");
++		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
++	}
++
++	/*
++	 * Check for a software override of the flow control settings, and
++	 * setup the PHY advertisement registers accordingly.  If
++	 * auto-negotiation is enabled, then software will have to set the
++	 * "PAUSE" bits to the correct value in the Auto-Negotiation
++	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
++	 * negotiation.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause frames
++	 *          but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *          but we do not support receiving pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
++	 *  other:  No software override.  The flow control configuration
++	 *          in the EEPROM is used.
++	 */
++	switch (hw->fc.type) {
++	case e1000_fc_none:
++		/*
++		 * Flow control (Rx & Tx) is completely disabled by a
++		 * software over-ride.
++		 */
++		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_rx_pause:
++		/*
++		 * Rx Flow control is enabled, and Tx Flow control is
++		 * disabled, by a software over-ride.
++		 *
++		 * Since there really isn't a way to advertise that we are
++		 * capable of Rx Pause ONLY, we will advertise that we
++		 * support both symmetric and asymmetric Rx PAUSE.  Later
++		 * (in e1000_config_fc_after_link_up) we will disable the
++		 * hw's ability to send PAUSE frames.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_tx_pause:
++		/*
++		 * Tx Flow control is enabled, and Rx Flow control is
++		 * disabled, by a software over-ride.
++		 */
++		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
++		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
++		break;
++	case e1000_fc_full:
++		/*
++		 * Flow control (both Rx and Tx) is enabled by a software
++		 * over-ride.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	default:
++		DEBUGOUT("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
++		ret_val = e1000_write_phy_reg(hw,
++		                              PHY_1000T_CTRL,
++		                              mii_1000t_ctrl_reg);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_generic - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the appropriate function to configure the link for auto-neg or forced
++ *  speed and duplex.  Then we check for link, once link is established calls
++ *  to configure collision distance and flow control are called.  If link is
++ *  not established, we return -E1000_ERR_PHY (-2).
++ **/
++s32 e1000_setup_copper_link_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	bool link;
++
++	DEBUGFUNC("e1000_setup_copper_link_generic");
++
++	if (hw->mac.autoneg) {
++		/*
++		 * Setup autoneg and flow control advertisement and perform
++		 * autonegotiation.
++		 */
++		ret_val = e1000_copper_link_autoneg(hw);
++		if (ret_val)
++			goto out;
++	} else {
++		/*
++		 * PHY will be set to 10H, 10F, 100H or 100F
++		 * depending on user settings.
++		 */
++		DEBUGOUT("Forcing Speed and Duplex\n");
++		ret_val = e1000_phy_force_speed_duplex(hw);
++		if (ret_val) {
++			DEBUGOUT("Error Forcing Speed and Duplex\n");
++			goto out;
++		}
++	}
++
++	/*
++	 * Check link status. Wait up to 100 microseconds for link to become
++	 * valid.
++	 */
++	ret_val = e1000_phy_has_link_generic(hw,
++	                                     COPPER_LINK_UP_LIMIT,
++	                                     10,
++	                                     &link);
++	if (ret_val)
++		goto out;
++
++	if (link) {
++		DEBUGOUT("Valid link established!!!\n");
++		e1000_config_collision_dist_generic(hw);
++		ret_val = e1000_config_fc_after_link_up_generic(hw);
++	} else {
++		DEBUGOUT("Unable to establish link!!!\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Waits for link and returns
++ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
++ **/
++s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_igp");
++
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  IGP requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++
++	ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT1("IGP PSCR: %X\n", phy_data);
++
++	usec_delay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n");
++
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++
++		if (!link) {
++			DEBUGOUT("Link taking longer than expected.\n");
++		}
++
++		/* Try once more */
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
++ *  changes.  If time expires while waiting for link up, we reset the DSP.
++ *  After reset, TX_CLK and CRS on Tx must be set.  Return successful upon
++ *  successful completion, else return corresponding error code.
++ **/
++s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_m88");
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data);
++
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	e1000_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	/* Reset the phy to commit changes. */
++	phy_data |= MII_CR_RESET;
++
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	usec_delay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n");
++
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++
++		if (!link) {
++			/*
++			 * We didn't get link.
++			 * Reset the DSP and cross our fingers.
++			 */
++			ret_val = e1000_write_phy_reg(hw,
++			                              M88E1000_PHY_PAGE_SELECT,
++			                              0x001d);
++			if (ret_val)
++				goto out;
++			ret_val = e1000_phy_reset_dsp_generic(hw);
++			if (ret_val)
++				goto out;
++		}
++
++		/* Try once more */
++		ret_val = e1000_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Resetting the phy means we need to re-force TX_CLK in the
++	 * Extended PHY Specific Control Register to 25MHz clock from
++	 * the reset value of 2.5MHz.
++	 */
++	phy_data |= M88E1000_EPSCR_TX_CLK_25;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * In addition, we must re-enable CRS on Tx for both half and full
++	 * duplex.
++	 */
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @phy_ctrl: pointer to current value of PHY_CONTROL
++ *
++ *  Forces speed and duplex on the PHY by doing the following: disable flow
++ *  control, force speed/duplex on the MAC, disable auto speed detection,
++ *  disable auto-negotiation, configure duplex, configure speed, configure
++ *  the collision distance, write configuration to CTRL register.  The
++ *  caller must write to the PHY_CONTROL register for these settings to
++ *  take affect.
++ **/
++void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 ctrl;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_setup");
++
++	/* Turn off flow control when forcing speed/duplex */
++	hw->fc.type = e1000_fc_none;
++
++	/* Force speed/duplex on the mac */
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ctrl &= ~E1000_CTRL_SPD_SEL;
++
++	/* Disable Auto Speed Detection */
++	ctrl &= ~E1000_CTRL_ASDE;
++
++	/* Disable autoneg on the phy */
++	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
++
++	/* Forcing Full or Half Duplex? */
++	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
++		ctrl &= ~E1000_CTRL_FD;
++		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
++		DEBUGOUT("Half Duplex\n");
++	} else {
++		ctrl |= E1000_CTRL_FD;
++		*phy_ctrl |= MII_CR_FULL_DUPLEX;
++		DEBUGOUT("Full Duplex\n");
++	}
++
++	/* Forcing 10mb or 100mb? */
++	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
++		ctrl |= E1000_CTRL_SPD_100;
++		*phy_ctrl |= MII_CR_SPEED_100;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
++		DEBUGOUT("Forcing 100mb\n");
++	} else {
++		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++		*phy_ctrl |= MII_CR_SPEED_10;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
++		DEBUGOUT("Forcing 10mb\n");
++	}
++
++	e1000_config_collision_dist_generic(hw);
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++}
++
++/**
++ *  e1000_set_d3_lplu_state_generic - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.
++ **/
++s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_set_d3_lplu_state_generic");
++
++	ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		goto out;
++
++	if (!active) {
++		data &= ~IGP02E1000_PM_D3_LPLU;
++		ret_val = e1000_write_phy_reg(hw,
++		                             IGP02E1000_PHY_POWER_MGMT,
++		                             data);
++		if (ret_val)
++			goto out;
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1000_read_phy_reg(hw,
++			                            IGP01E1000_PHY_PORT_CONFIG,
++			                            &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1000_read_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++	           (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++	           (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= IGP02E1000_PM_D3_LPLU;
++		ret_val = e1000_write_phy_reg(hw,
++		                              IGP02E1000_PHY_POWER_MGMT,
++		                              data);
++		if (ret_val)
++			goto out;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1000_read_phy_reg(hw,
++		                             IGP01E1000_PHY_PORT_CONFIG,
++		                             &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1000_write_phy_reg(hw,
++		                              IGP01E1000_PHY_PORT_CONFIG,
++		                              data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_downshift_generic - Checks whether a downshift in speed occured
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  A downshift is detected by querying the PHY link health.
++ **/
++s32 e1000_check_downshift_generic(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, offset, mask;
++
++	DEBUGFUNC("e1000_check_downshift_generic");
++
++	switch (phy->type) {
++	case e1000_phy_m88:
++	case e1000_phy_gg82563:
++		offset	= M88E1000_PHY_SPEC_STATUS;
++		mask	= M88E1000_PSSR_DOWNSHIFT;
++		break;
++	case e1000_phy_igp_2:
++	case e1000_phy_igp:
++	case e1000_phy_igp_3:
++		offset	= IGP01E1000_PHY_LINK_HEALTH;
++		mask	= IGP01E1000_PLHR_SS_DOWNGRADE;
++		break;
++	default:
++		/* speed downshift not supported */
++		phy->speed_downgraded = FALSE;
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, offset, &phy_data);
++
++	if (!ret_val)
++		phy->speed_downgraded = (phy_data & mask) ? TRUE : FALSE;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_m88 - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY specific status register.
++ **/
++s32 e1000_check_polarity_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_check_polarity_m88");
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_igp - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY port status register, and the
++ *  current speed (since there is no polarity at 100Mbps).
++ **/
++s32 e1000_check_polarity_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data, offset, mask;
++
++	DEBUGFUNC("e1000_check_polarity_igp");
++
++	/*
++	 * Polarity is determined based on the speed of
++	 * our connection.
++	 */
++	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		goto out;
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		offset	= IGP01E1000_PHY_PCS_INIT_REG;
++		mask	= IGP01E1000_PHY_POLARITY_MASK;
++	} else {
++		/*
++		 * This really only applies to 10Mbps since
++		 * there is no polarity for 100Mbps (always 0).
++		 */
++		offset	= IGP01E1000_PHY_PORT_STATUS;
++		mask	= IGP01E1000_PSSR_POLARITY_REVERSED;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, offset, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & mask)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_wait_autoneg_generic - Wait for auto-neg compeletion
++ *  @hw: pointer to the HW structure
++ *
++ *  Waits for auto-negotiation to complete or for the auto-negotiation time
++ *  limit to expire, which ever happens first.
++ **/
++s32 e1000_wait_autoneg_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 i, phy_status;
++
++	DEBUGFUNC("e1000_wait_autoneg_generic");
++
++	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
++	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_AUTONEG_COMPLETE)
++			break;
++		msec_delay(100);
++	}
++
++	/*
++	 * PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
++	 * has completed.
++	 */
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_has_link_generic - Polls PHY for link
++ *  @hw: pointer to the HW structure
++ *  @iterations: number of times to poll for link
++ *  @usec_interval: delay between polling attempts
++ *  @success: pointer to whether polling was successful or not
++ *
++ *  Polls the PHY status register for link, 'iterations' number of times.
++ **/
++s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
++                               u32 usec_interval, bool *success)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 i, phy_status;
++
++	DEBUGFUNC("e1000_phy_has_link_generic");
++
++	for (i = 0; i < iterations; i++) {
++		/*
++		 * Some PHYs require the PHY_STATUS register to be read
++		 * twice due to the link bit being sticky.  No harm doing
++		 * it across the board.
++		 */
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_LINK_STATUS)
++			break;
++		if (usec_interval >= 1000)
++			msec_delay_irq(usec_interval/1000);
++		else
++			usec_delay(usec_interval);
++	}
++
++	*success = (i < iterations) ? TRUE : FALSE;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_m88 - Determine cable length for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY specific status register to retrieve the cable length
++ *  information.  The cable length is determined by averaging the minimum and
++ *  maximum values to get the "average" cable length.  The m88 PHY has four
++ *  possible cable length values, which are:
++ *	Register Value		Cable Length
++ *	0			< 50 meters
++ *	1			50 - 80 meters
++ *	2			80 - 110 meters
++ *	3			110 - 140 meters
++ *	4			> 140 meters
++ **/
++s32 e1000_get_cable_length_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, index;
++
++	DEBUGFUNC("e1000_get_cable_length_m88");
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++	        M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++	phy->min_cable_length = e1000_m88_cable_length_table[index];
++	phy->max_cable_length = e1000_m88_cable_length_table[index+1];
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  The automatic gain control (agc) normalizes the amplitude of the
++ *  received signal, adjusting for the attenuation produced by the
++ *  cable.  By reading the AGC registers, which reperesent the
++ *  cobination of course and fine gain value, the value can be put
++ *  into a lookup table to obtain the approximate cable length
++ *  for each channel.
++ **/
++s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++	u16 phy_data, i, agc_value = 0;
++	u16 cur_agc_index, max_agc_index = 0;
++	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
++	u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] =
++	                                                 {IGP02E1000_PHY_AGC_A,
++	                                                  IGP02E1000_PHY_AGC_B,
++	                                                  IGP02E1000_PHY_AGC_C,
++	                                                  IGP02E1000_PHY_AGC_D};
++
++	DEBUGFUNC("e1000_get_cable_length_igp_2");
++
++	/* Read the AGC registers for all channels */
++	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
++		ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
++		if (ret_val)
++			goto out;
++
++		/*
++		 * Getting bits 15:9, which represent the combination of
++		 * course and fine gain values.  The result is a number
++		 * that can be put into the lookup table to obtain the
++		 * approximate cable length.
++		 */
++		cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
++		                IGP02E1000_AGC_LENGTH_MASK;
++
++		/* Array index bound check. */
++		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
++		    (cur_agc_index == 0)) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++
++		/* Remove min & max AGC values from calculation. */
++		if (e1000_igp_2_cable_length_table[min_agc_index] >
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			min_agc_index = cur_agc_index;
++		if (e1000_igp_2_cable_length_table[max_agc_index] <
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			max_agc_index = cur_agc_index;
++
++		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
++	}
++
++	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
++	              e1000_igp_2_cable_length_table[max_agc_index]);
++	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
++
++	/* Calculate cable length with the error range of +/- 10 meters. */
++	phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
++	                         (agc_value - IGP02E1000_AGC_RANGE) : 0;
++	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_m88 - Retrieve PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Valid for only copper links.  Read the PHY status register (sticky read)
++ *  to verify that link is up.  Read the PHY special control register to
++ *  determine the polarity and 10base-T extended distance.  Read the PHY
++ *  special status register to determine MDI/MDIx and current speed.  If
++ *  speed is 1000, then determine cable length, local and remote receiver.
++ **/
++s32 e1000_get_phy_info_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u16 phy_data;
++	bool link;
++
++	DEBUGFUNC("e1000_get_phy_info_m88");
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		DEBUGOUT("Phy info is only valid for copper media\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		DEBUGOUT("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy->polarity_correction = (phy_data & M88E1000_PSCR_POLARITY_REVERSAL)
++	                           ? TRUE
++	                           : FALSE;
++
++	ret_val = e1000_check_polarity_m88(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX) ? TRUE : FALSE;
++
++	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
++		ret_val = e1000_get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
++		                ? e1000_1000t_rx_status_ok
++		                : e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
++		                 ? e1000_1000t_rx_status_ok
++		                 : e1000_1000t_rx_status_not_ok;
++	} else {
++		/* Set values to "undefined" */
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_igp - Retrieve igp PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Read PHY status to determine if link is up.  If link is up, then
++ *  set/determine 10base-T extended distance and polarity correction.  Read
++ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
++ *  determine on the cable length, local and remote receiver.
++ **/
++s32 e1000_get_phy_info_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	DEBUGFUNC("e1000_get_phy_info_igp");
++
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		DEBUGOUT("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	phy->polarity_correction = TRUE;
++
++	ret_val = e1000_check_polarity_igp(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & IGP01E1000_PSSR_MDIX) ? TRUE : FALSE;
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		ret_val = e1000_get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
++		                ? e1000_1000t_rx_status_ok
++		                : e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
++		                 ? e1000_1000t_rx_status_ok
++		                 : e1000_1000t_rx_status_not_ok;
++	} else {
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_sw_reset_generic - PHY software reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Does a software reset of the PHY by reading the PHY control register and
++ *  setting/write the control register reset bit to the PHY.
++ **/
++s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_ctrl;
++
++	DEBUGFUNC("e1000_phy_sw_reset_generic");
++
++	ret_val = e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	phy_ctrl |= MII_CR_RESET;
++	ret_val = e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	usec_delay(1);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_hw_reset_generic - PHY hardware reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify the reset block is not blocking us from resetting.  Acquire
++ *  semaphore (if necessary) and read/set/write the device control reset
++ *  bit in the PHY.  Wait the appropriate delay time for the device to
++ *  reset and relase the semaphore (if necessary).
++ **/
++s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u32 ctrl;
++
++	DEBUGFUNC("e1000_phy_hw_reset_generic");
++
++	ret_val = e1000_check_reset_block(hw);
++	if (ret_val) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	ret_val = e1000_acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
++	E1000_WRITE_FLUSH(hw);
++
++	usec_delay(phy->reset_delay_us);
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	E1000_WRITE_FLUSH(hw);
++
++	usec_delay(150);
++
++	e1000_release_phy(hw);
++
++	ret_val = e1000_get_phy_cfg_done(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cfg_done_generic - Generic configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Generic function to wait 10 milli-seconds for configuration to complete
++ *  and return success.
++ **/
++s32 e1000_get_cfg_done_generic(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_get_cfg_done_generic");
++
++	msec_delay_irq(10);
++
++	return E1000_SUCCESS;
++}
++
++/* Internal function pointers */
++
++/**
++ *  e1000_get_phy_cfg_done - Generic PHY configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Return success if silicon family did not implement a family specific
++ *  get_cfg_done function.
++ **/
++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw)
++{
++	if (hw->func.get_cfg_done)
++		return hw->func.get_cfg_done(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_release_phy - Generic release PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Return if silicon family does not require a semaphore when accessing the
++ *  PHY.
++ **/
++static void e1000_release_phy(struct e1000_hw *hw)
++{
++	if (hw->func.release_phy)
++		hw->func.release_phy(hw);
++}
++
++/**
++ *  e1000_acquire_phy - Generic acquire PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Return success if silicon family does not require a semaphore when
++ *  accessing the PHY.
++ **/
++static s32 e1000_acquire_phy(struct e1000_hw *hw)
++{
++	if (hw->func.acquire_phy)
++		return hw->func.acquire_phy(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex - Generic force PHY speed/duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  When the silicon family has not implemented a forced speed/duplex
++ *  function for the PHY, simply return E1000_SUCCESS.
++ **/
++s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw)
++{
++	if (hw->func.force_speed_duplex)
++		return hw->func.force_speed_duplex(hw);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_phy_init_script_igp3 - Inits the IGP3 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
++ **/
++s32 e1000_phy_init_script_igp3(struct e1000_hw *hw)
++{
++	DEBUGOUT("Running IGP 3 PHY init script\n");
++
++	/* PHY init IGP 3 */
++	/* Enable rise/fall, 10-mode work in class-A */
++	e1000_write_phy_reg(hw, 0x2F5B, 0x9018);
++	/* Remove all caps from Replica path filter */
++	e1000_write_phy_reg(hw, 0x2F52, 0x0000);
++	/* Bias trimming for ADC, AFE and Driver (Default) */
++	e1000_write_phy_reg(hw, 0x2FB1, 0x8B24);
++	/* Increase Hybrid poly bias */
++	e1000_write_phy_reg(hw, 0x2FB2, 0xF8F0);
++	/* Add 4% to Tx amplitude in Giga mode */
++	e1000_write_phy_reg(hw, 0x2010, 0x10B0);
++	/* Disable trimming (TTT) */
++	e1000_write_phy_reg(hw, 0x2011, 0x0000);
++	/* Poly DC correction to 94.6% + 2% for all channels */
++	e1000_write_phy_reg(hw, 0x20DD, 0x249A);
++	/* ABS DC correction to 95.9% */
++	e1000_write_phy_reg(hw, 0x20DE, 0x00D3);
++	/* BG temp curve trim */
++	e1000_write_phy_reg(hw, 0x28B4, 0x04CE);
++	/* Increasing ADC OPAMP stage 1 currents to max */
++	e1000_write_phy_reg(hw, 0x2F70, 0x29E4);
++	/* Force 1000 ( required for enabling PHY regs configuration) */
++	e1000_write_phy_reg(hw, 0x0000, 0x0140);
++	/* Set upd_freq to 6 */
++	e1000_write_phy_reg(hw, 0x1F30, 0x1606);
++	/* Disable NPDFE */
++	e1000_write_phy_reg(hw, 0x1F31, 0xB814);
++	/* Disable adaptive fixed FFE (Default) */
++	e1000_write_phy_reg(hw, 0x1F35, 0x002A);
++	/* Enable FFE hysteresis */
++	e1000_write_phy_reg(hw, 0x1F3E, 0x0067);
++	/* Fixed FFE for short cable lengths */
++	e1000_write_phy_reg(hw, 0x1F54, 0x0065);
++	/* Fixed FFE for medium cable lengths */
++	e1000_write_phy_reg(hw, 0x1F55, 0x002A);
++	/* Fixed FFE for long cable lengths */
++	e1000_write_phy_reg(hw, 0x1F56, 0x002A);
++	/* Enable Adaptive Clip Threshold */
++	e1000_write_phy_reg(hw, 0x1F72, 0x3FB0);
++	/* AHT reset limit to 1 */
++	e1000_write_phy_reg(hw, 0x1F76, 0xC0FF);
++	/* Set AHT master delay to 127 msec */
++	e1000_write_phy_reg(hw, 0x1F77, 0x1DEC);
++	/* Set scan bits for AHT */
++	e1000_write_phy_reg(hw, 0x1F78, 0xF9EF);
++	/* Set AHT Preset bits */
++	e1000_write_phy_reg(hw, 0x1F79, 0x0210);
++	/* Change integ_factor of channel A to 3 */
++	e1000_write_phy_reg(hw, 0x1895, 0x0003);
++	/* Change prop_factor of channels BCD to 8 */
++	e1000_write_phy_reg(hw, 0x1796, 0x0008);
++	/* Change cg_icount + enable integbp for channels BCD */
++	e1000_write_phy_reg(hw, 0x1798, 0xD008);
++	/*
++	 * Change cg_icount + enable integbp + change prop_factor_master
++	 * to 8 for channel A
++	 */
++	e1000_write_phy_reg(hw, 0x1898, 0xD918);
++	/* Disable AHT in Slave mode on channel A */
++	e1000_write_phy_reg(hw, 0x187A, 0x0800);
++	/*
++	 * Enable LPLU and disable AN to 1000 in non-D0a states,
++	 * Enable SPD+B2B
++	 */
++	e1000_write_phy_reg(hw, 0x0019, 0x008D);
++	/* Enable restart AN on an1000_dis change */
++	e1000_write_phy_reg(hw, 0x001B, 0x2080);
++	/* Enable wh_fifo read clock in 10/100 modes */
++	e1000_write_phy_reg(hw, 0x0014, 0x0045);
++	/* Restart AN, Speed selection is 1000 */
++	e1000_write_phy_reg(hw, 0x0000, 0x1340);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_get_phy_type_from_id - Get PHY type from id
++ *  @phy_id: phy_id read from the phy
++ *
++ *  Returns the phy type from the id.
++ **/
++e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id)
++{
++	e1000_phy_type phy_type = e1000_phy_unknown;
++
++	switch (phy_id)	{
++	case M88E1000_I_PHY_ID:
++	case M88E1000_E_PHY_ID:
++	case M88E1111_I_PHY_ID:
++	case M88E1011_I_PHY_ID:
++		phy_type = e1000_phy_m88;
++		break;
++	case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */
++		phy_type = e1000_phy_igp_2;
++		break;
++	case GG82563_E_PHY_ID:
++		phy_type = e1000_phy_gg82563;
++		break;
++	case IGP03E1000_E_PHY_ID:
++		phy_type = e1000_phy_igp_3;
++		break;
++	case IFE_E_PHY_ID:
++	case IFE_PLUS_E_PHY_ID:
++	case IFE_C_E_PHY_ID:
++		phy_type = e1000_phy_ife;
++		break;
++	default:
++		phy_type = e1000_phy_unknown;
++		break;
++	}
++	return phy_type;
++}
++
++/**
++ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, restore the link to previous
++ * settings.
++ **/
++void e1000_power_up_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	e1000_read_phy_reg(hw, PHY_CONTROL, &mii_reg);
++	mii_reg &= ~MII_CR_POWER_DOWN;
++	e1000_write_phy_reg(hw, PHY_CONTROL, mii_reg);
++}
++
++/**
++ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, restore the link to previous
++ * settings.
++ **/
++void e1000_power_down_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	e1000_read_phy_reg(hw, PHY_CONTROL, &mii_reg);
++	mii_reg |= MII_CR_POWER_DOWN;
++	e1000_write_phy_reg(hw, PHY_CONTROL, mii_reg);
++	msec_delay(1);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_osdep.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_osdep.h	2021-04-07 16:01:27.667633538 +0800
+@@ -0,0 +1,124 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++
++/* glue for the OS-dependent part of e1000
++ * includes register access macros
++ */
++
++#ifndef _E1000_OSDEP_H_
++#define _E1000_OSDEP_H_
++
++#include <linux/pci.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++
++#include "kcompat.h"
++
++#define usec_delay(x) udelay(x)
++#ifndef msec_delay
++#define msec_delay(x)	do { if(in_interrupt()) { \
++				/* Don't sleep in interrupt context! */ \
++				BUG(); \
++			} else { \
++				msleep(x); \
++			} } while (0)
++
++/* Some workarounds require millisecond delays and are run during interrupt
++ * context.  Most notably, when establishing link, the phy may need tweaking
++ * but cannot process phy register reads/writes faster than millisecond
++ * intervals...and we establish link due to a "link status change" interrupt.
++ */
++#define msec_delay_irq(x) mdelay(x)
++#endif
++
++#define PCI_COMMAND_REGISTER   PCI_COMMAND
++#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
++#define ETH_ADDR_LEN           ETH_ALEN
++
++#ifdef __BIG_ENDIAN
++#define E1000_BIG_ENDIAN __BIG_ENDIAN
++#endif
++
++
++#define DEBUGOUT(S)
++#define DEBUGOUT1(S, A...)
++
++#define DEBUGFUNC(F) DEBUGOUT(F "\n")
++#define DEBUGOUT2 DEBUGOUT1
++#define DEBUGOUT3 DEBUGOUT2
++#define DEBUGOUT7 DEBUGOUT3
++
++#define E1000_REGISTER(a, reg) (((a)->mac.type >= e1000_82543) \
++			       ? reg                           \
++			       : e1000_translate_register_82542(reg))
++
++#define E1000_WRITE_REG(a, reg, value) ( \
++    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg))))
++
++#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg)))
++
++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
++    writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))))
++
++#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
++    readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))
++
++#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
++#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
++
++#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
++    writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))))
++
++#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
++    readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))
++
++#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
++    writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))))
++
++#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
++    readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))
++
++#define E1000_WRITE_REG_IO(a, reg, offset) do { \
++    outl(reg, ((a)->io_base));                  \
++    outl(offset, ((a)->io_base + 4));      } while(0)
++
++#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
++
++#define E1000_WRITE_FLASH_REG(a, reg, value) ( \
++    writel((value), ((a)->flash_address + reg)))
++
++#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \
++    writew((value), ((a)->flash_address + reg)))
++
++#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg))
++
++#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg))
++
++#endif /* _E1000_OSDEP_H_ */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_mac.c	2021-04-07 16:01:27.663633543 +0800
+@@ -0,0 +1,2039 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000_api.h"
++#include "e1000_mac.h"
++
++/**
++ *  e1000_remove_device_generic - Free device specific structure
++ *  @hw: pointer to the HW structure
++ *
++ *  If a device specific structure was allocated, this function will
++ *  free it.
++ **/
++void e1000_remove_device_generic(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_remove_device_generic");
++
++	/* Freeing the dev_spec member of e1000_hw structure */
++	e1000_free_dev_spec_struct(hw);
++}
++
++/**
++ *  e1000_get_bus_info_pci_generic - Get PCI(x) bus information
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines and stores the system bus information for a particular
++ *  network interface.  The following bus information is determined and stored:
++ *  bus speed, bus width, type (PCI/PCIx), and PCI(-x) function.
++ **/
++s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	u32 status = E1000_READ_REG(hw, E1000_STATUS);
++	s32 ret_val = E1000_SUCCESS;
++	u16 pci_header_type;
++
++	DEBUGFUNC("e1000_get_bus_info_pci_generic");
++
++	/* PCI or PCI-X? */
++	bus->type = (status & E1000_STATUS_PCIX_MODE)
++			? e1000_bus_type_pcix
++			: e1000_bus_type_pci;
++
++	/* Bus speed */
++	if (bus->type == e1000_bus_type_pci) {
++		bus->speed = (status & E1000_STATUS_PCI66)
++		             ? e1000_bus_speed_66
++		             : e1000_bus_speed_33;
++	} else {
++		switch (status & E1000_STATUS_PCIX_SPEED) {
++		case E1000_STATUS_PCIX_SPEED_66:
++			bus->speed = e1000_bus_speed_66;
++			break;
++		case E1000_STATUS_PCIX_SPEED_100:
++			bus->speed = e1000_bus_speed_100;
++			break;
++		case E1000_STATUS_PCIX_SPEED_133:
++			bus->speed = e1000_bus_speed_133;
++			break;
++		default:
++			bus->speed = e1000_bus_speed_reserved;
++			break;
++		}
++	}
++
++	/* Bus width */
++	bus->width = (status & E1000_STATUS_BUS64)
++	             ? e1000_bus_width_64
++	             : e1000_bus_width_32;
++
++	/* Which PCI(-X) function? */
++	e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type);
++	if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC)
++		bus->func = (status & E1000_STATUS_FUNC_MASK)
++		            >> E1000_STATUS_FUNC_SHIFT;
++	else
++		bus->func = 0;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_bus_info_pcie_generic - Get PCIe bus information
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines and stores the system bus information for a particular
++ *  network interface.  The following bus information is determined and stored:
++ *  bus speed, bus width, type (PCIe), and PCIe function.
++ **/
++s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	s32 ret_val;
++	u32 status;
++	u16 pcie_link_status, pci_header_type;
++
++	DEBUGFUNC("e1000_get_bus_info_pcie_generic");
++
++	bus->type = e1000_bus_type_pci_express;
++	bus->speed = e1000_bus_speed_2500;
++
++	ret_val = e1000_read_pcie_cap_reg(hw,
++	                                  PCIE_LINK_STATUS,
++	                                  &pcie_link_status);
++	if (ret_val)
++		bus->width = e1000_bus_width_unknown;
++	else
++		bus->width = (e1000_bus_width)((pcie_link_status &
++		                                PCIE_LINK_WIDTH_MASK) >>
++		                               PCIE_LINK_WIDTH_SHIFT);
++
++	e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type);
++	if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) {
++		status = E1000_READ_REG(hw, E1000_STATUS);
++		bus->func = (status & E1000_STATUS_FUNC_MASK)
++		            >> E1000_STATUS_FUNC_SHIFT;
++	} else {
++		bus->func = 0;
++	}
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_clear_vfta_generic - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++void e1000_clear_vfta_generic(struct e1000_hw *hw)
++{
++	u32 offset;
++
++	DEBUGFUNC("e1000_clear_vfta_generic");
++
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++}
++
++/**
++ *  e1000_write_vfta_generic - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset in VLAN filter table
++ *  @value: register value written to VLAN filter table
++ *
++ *  Writes value at the given offset in the register array which stores
++ *  the VLAN filter table.
++ **/
++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	DEBUGFUNC("e1000_write_vfta_generic");
++
++	E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
++	E1000_WRITE_FLUSH(hw);
++}
++
++/**
++ *  e1000_init_rx_addrs_generic - Initialize receive address's
++ *  @hw: pointer to the HW structure
++ *  @rar_count: receive address registers
++ *
++ *  Setups the receive address registers by setting the base receive address
++ *  register to the devices MAC address and clearing all the other receive
++ *  address registers to 0.
++ **/
++void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count)
++{
++	u32 i;
++
++	DEBUGFUNC("e1000_init_rx_addrs_generic");
++
++	/* Setup the receive address */
++	DEBUGOUT("Programming MAC Address into RAR[0]\n");
++
++	e1000_rar_set_generic(hw, hw->mac.addr, 0);
++
++	/* Zero out the other (rar_entry_count - 1) receive addresses */
++	DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1);
++	for (i = 1; i < rar_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1), 0);
++		E1000_WRITE_FLUSH(hw);
++		E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((i << 1) + 1), 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++}
++
++/**
++ *  e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
++ *  can be setup by pre-boot software and must be treated like a permanent
++ *  address and must override the actual permanent MAC address.  If an
++ *  alternate MAC address is found it is saved in the hw struct and
++ *  programmed into RAR0 and the function returns success, otherwise the
++ *  function returns an error.
++ **/
++s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
++{
++	u32 i;
++	s32 ret_val = E1000_SUCCESS;
++	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
++	u8 alt_mac_addr[ETH_ADDR_LEN];
++
++	DEBUGFUNC("e1000_check_alt_mac_addr_generic");
++
++	ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1,
++	                         &nvm_alt_mac_addr_offset);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++
++	if (nvm_alt_mac_addr_offset == 0xFFFF) {
++		ret_val = -(E1000_NOT_IMPLEMENTED);
++		goto out;
++	}
++
++	if (hw->bus.func == E1000_FUNC_1)
++		nvm_alt_mac_addr_offset += ETH_ADDR_LEN/sizeof(u16);
++
++	for (i = 0; i < ETH_ADDR_LEN; i += 2) {
++		offset = nvm_alt_mac_addr_offset + (i >> 1);
++		ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data);
++		if (ret_val) {
++			DEBUGOUT("NVM Read Error\n");
++			goto out;
++		}
++
++		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
++		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
++	}
++
++	/* if multicast bit is set, the alternate address will not be used */
++	if (alt_mac_addr[0] & 0x01) {
++		ret_val = -(E1000_NOT_IMPLEMENTED);
++		goto out;
++	}
++
++	for (i = 0; i < ETH_ADDR_LEN; i++)
++		hw->mac.addr[i] = hw->mac.perm_addr[i] = alt_mac_addr[i];
++
++	e1000_rar_set(hw, hw->mac.perm_addr, 0);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_rar_set_generic - Set receive address register
++ *  @hw: pointer to the HW structure
++ *  @addr: pointer to the receive address
++ *  @index: receive address array register
++ *
++ *  Sets the receive address array register at index to the address passed
++ *  in by addr.
++ **/
++void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
++{
++	u32 rar_low, rar_high;
++
++	DEBUGFUNC("e1000_rar_set_generic");
++
++	/*
++	 * HW expects these in little endian so we reverse the byte order
++	 * from network order (big endian) to little endian
++	 */
++	rar_low = ((u32) addr[0] |
++	           ((u32) addr[1] << 8) |
++	           ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
++
++	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
++
++	/* If MAC address zero, no need to set the AV bit */
++	if (rar_low || rar_high) {
++		if (!hw->mac.disable_av)
++			rar_high |= E1000_RAH_AV;
++	}
++
++	E1000_WRITE_REG_ARRAY(hw, E1000_RA, (index << 1), rar_low);
++	E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((index << 1) + 1), rar_high);
++}
++
++/**
++ *  e1000_mta_set_generic - Set multicast filter table address
++ *  @hw: pointer to the HW structure
++ *  @hash_value: determines the MTA register and bit to set
++ *
++ *  The multicast table address is a register array of 32-bit registers.
++ *  The hash_value is used to determine what register the bit is in, the
++ *  current value is read, the new bit is OR'd in and the new value is
++ *  written back into the register.
++ **/
++void e1000_mta_set_generic(struct e1000_hw *hw, u32 hash_value)
++{
++	u32 hash_bit, hash_reg, mta;
++
++	DEBUGFUNC("e1000_mta_set_generic");
++	/*
++	 * The MTA is a register array of 32-bit registers. It is
++	 * treated like an array of (32*mta_reg_count) bits.  We want to
++	 * set bit BitArray[hash_value]. So we figure out what register
++	 * the bit is in, read it, OR in the new bit, then write
++	 * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
++	 * mask to bits 31:5 of the hash value which gives us the
++	 * register we're modifying.  The hash bit within that register
++	 * is determined by the lower 5 bits of the hash value.
++	 */
++	hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
++	hash_bit = hash_value & 0x1F;
++
++	mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
++
++	mta |= (1 << hash_bit);
++
++	E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
++	E1000_WRITE_FLUSH(hw);
++}
++
++/**
++ *  e1000_update_mc_addr_list_generic - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *  @rar_used_count: the first RAR register free to program
++ *  @rar_count: total number of supported Receive Address Registers
++ *
++ *  Updates the Receive Address Registers and Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ *  The parameter rar_count will usually be hw->mac.rar_entry_count
++ *  unless there are workarounds that change this.
++ **/
++void e1000_update_mc_addr_list_generic(struct e1000_hw *hw,
++                                       u8 *mc_addr_list, u32 mc_addr_count,
++                                       u32 rar_used_count, u32 rar_count)
++{
++	u32 hash_value;
++	u32 i;
++
++	DEBUGFUNC("e1000_update_mc_addr_list_generic");
++
++	/*
++	 * Load the first set of multicast addresses into the exact
++	 * filters (RAR).  If there are not enough to fill the RAR
++	 * array, clear the filters.
++	 */
++	for (i = rar_used_count; i < rar_count; i++) {
++		if (mc_addr_count) {
++			e1000_rar_set(hw, mc_addr_list, i);
++			mc_addr_count--;
++			mc_addr_list += ETH_ADDR_LEN;
++		} else {
++			E1000_WRITE_REG_ARRAY(hw, E1000_RA, i << 1, 0);
++			E1000_WRITE_FLUSH(hw);
++			E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1) + 1, 0);
++			E1000_WRITE_FLUSH(hw);
++		}
++	}
++
++	/* Clear the old settings from the MTA */
++	DEBUGOUT("Clearing MTA\n");
++	for (i = 0; i < hw->mac.mta_reg_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/* Load any remaining multicast addresses into the hash table. */
++	for (; mc_addr_count > 0; mc_addr_count--) {
++		hash_value = e1000_hash_mc_addr(hw, mc_addr_list);
++		DEBUGOUT1("Hash value = 0x%03X\n", hash_value);
++		e1000_mta_set(hw, hash_value);
++		mc_addr_list += ETH_ADDR_LEN;
++	}
++}
++
++/**
++ *  e1000_hash_mc_addr_generic - Generate a multicast hash value
++ *  @hw: pointer to the HW structure
++ *  @mc_addr: pointer to a multicast address
++ *
++ *  Generates a multicast address hash value which is used to determine
++ *  the multicast filter table array address and new table value.  See
++ *  e1000_mta_set_generic()
++ **/
++u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr)
++{
++	u32 hash_value, hash_mask;
++	u8 bit_shift = 0;
++
++	DEBUGFUNC("e1000_hash_mc_addr_generic");
++
++	/* Register count multiplied by bits per register */
++	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
++
++	/*
++	 * For a mc_filter_type of 0, bit_shift is the number of left-shifts
++	 * where 0xFF would still fall within the hash mask.
++	 */
++	while (hash_mask >> bit_shift != 0xFF)
++		bit_shift++;
++
++	/*
++	 * The portion of the address that is used for the hash table
++	 * is determined by the mc_filter_type setting.
++	 * The algorithm is such that there is a total of 8 bits of shifting.
++	 * The bit_shift for a mc_filter_type of 0 represents the number of
++	 * left-shifts where the MSB of mc_addr[5] would still fall within
++	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
++	 * of 8 bits of shifting, then mc_addr[4] will shift right the
++	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
++	 * cases are a variation of this algorithm...essentially raising the
++	 * number of bits to shift mc_addr[5] left, while still keeping the
++	 * 8-bit shifting total.
++	 *
++	 * For example, given the following Destination MAC Address and an
++	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
++	 * we can see that the bit_shift for case 0 is 4.  These are the hash
++	 * values resulting from each mc_filter_type...
++	 * [0] [1] [2] [3] [4] [5]
++	 * 01  AA  00  12  34  56
++	 * LSB                 MSB
++	 *
++	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
++	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
++	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
++	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
++	 */
++	switch (hw->mac.mc_filter_type) {
++		default:
++		case 0:
++			break;
++		case 1:
++			bit_shift += 1;
++			break;
++		case 2:
++			bit_shift += 2;
++			break;
++		case 3:
++			bit_shift += 4;
++			break;
++	}
++
++	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
++	                          (((u16) mc_addr[5]) << bit_shift)));
++
++	return hash_value;
++}
++
++/**
++ *  e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value
++ *  @hw: pointer to the HW structure
++ *
++ *  In certain situations, a system BIOS may report that the PCIx maximum
++ *  memory read byte count (MMRBC) value is higher than than the actual
++ *  value. We check the PCIx command regsiter with the current PCIx status
++ *  regsiter.
++ **/
++void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw)
++{
++	u16 cmd_mmrbc;
++	u16 pcix_cmd;
++	u16 pcix_stat_hi_word;
++	u16 stat_mmrbc;
++
++	DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic");
++
++	/* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */
++	if (hw->bus.type != e1000_bus_type_pcix)
++		return;
++
++	e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd);
++	e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word);
++	cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >>
++	             PCIX_COMMAND_MMRBC_SHIFT;
++	stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >>
++	              PCIX_STATUS_HI_MMRBC_SHIFT;
++	if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K)
++		stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K;
++	if (cmd_mmrbc > stat_mmrbc) {
++		pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK;
++		pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT;
++		e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd);
++	}
++}
++
++/**
++ *  e1000_clear_hw_cntrs_base_generic - Clear base hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the base hardware counters by reading the counter registers.
++ **/
++void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_base_generic");
++
++	temp = E1000_READ_REG(hw, E1000_CRCERRS);
++	temp = E1000_READ_REG(hw, E1000_SYMERRS);
++	temp = E1000_READ_REG(hw, E1000_MPC);
++	temp = E1000_READ_REG(hw, E1000_SCC);
++	temp = E1000_READ_REG(hw, E1000_ECOL);
++	temp = E1000_READ_REG(hw, E1000_MCC);
++	temp = E1000_READ_REG(hw, E1000_LATECOL);
++	temp = E1000_READ_REG(hw, E1000_COLC);
++	temp = E1000_READ_REG(hw, E1000_DC);
++	temp = E1000_READ_REG(hw, E1000_SEC);
++	temp = E1000_READ_REG(hw, E1000_RLEC);
++	temp = E1000_READ_REG(hw, E1000_XONRXC);
++	temp = E1000_READ_REG(hw, E1000_XONTXC);
++	temp = E1000_READ_REG(hw, E1000_XOFFRXC);
++	temp = E1000_READ_REG(hw, E1000_XOFFTXC);
++	temp = E1000_READ_REG(hw, E1000_FCRUC);
++	temp = E1000_READ_REG(hw, E1000_GPRC);
++	temp = E1000_READ_REG(hw, E1000_BPRC);
++	temp = E1000_READ_REG(hw, E1000_MPRC);
++	temp = E1000_READ_REG(hw, E1000_GPTC);
++	temp = E1000_READ_REG(hw, E1000_GORCL);
++	temp = E1000_READ_REG(hw, E1000_GORCH);
++	temp = E1000_READ_REG(hw, E1000_GOTCL);
++	temp = E1000_READ_REG(hw, E1000_GOTCH);
++	temp = E1000_READ_REG(hw, E1000_RNBC);
++	temp = E1000_READ_REG(hw, E1000_RUC);
++	temp = E1000_READ_REG(hw, E1000_RFC);
++	temp = E1000_READ_REG(hw, E1000_ROC);
++	temp = E1000_READ_REG(hw, E1000_RJC);
++	temp = E1000_READ_REG(hw, E1000_TORL);
++	temp = E1000_READ_REG(hw, E1000_TORH);
++	temp = E1000_READ_REG(hw, E1000_TOTL);
++	temp = E1000_READ_REG(hw, E1000_TOTH);
++	temp = E1000_READ_REG(hw, E1000_TPR);
++	temp = E1000_READ_REG(hw, E1000_TPT);
++	temp = E1000_READ_REG(hw, E1000_MPTC);
++	temp = E1000_READ_REG(hw, E1000_BPTC);
++}
++
++/**
++ *  e1000_check_for_copper_link_generic - Check for link (Copper)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks to see of the link status of the hardware has changed.  If a
++ *  change in link status has been detected, then we read the PHY registers
++ *  to get the current speed/duplex if link exists.
++ **/
++s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	bool link;
++
++	DEBUGFUNC("e1000_check_for_copper_link");
++
++	/*
++	 * We only want to go out to the PHY registers to see if Auto-Neg
++	 * has completed and/or if our link status has changed.  The
++	 * get_link_status flag is set upon receiving a Link Status
++	 * Change or Rx Sequence Error interrupt.
++	 */
++	if (!mac->get_link_status) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	/*
++	 * First we want to see if the MII Status Register reports
++	 * link.  If so, then we want to get the current speed/duplex
++	 * of the PHY.
++	 */
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link)
++		goto out; /* No link detected */
++
++	mac->get_link_status = FALSE;
++
++	/*
++	 * Check if there was DownShift, must be checked
++	 * immediately after link-up
++	 */
++	e1000_check_downshift_generic(hw);
++
++	/*
++	 * If we are forcing speed/duplex, then we simply return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/*
++	 * Auto-Neg is enabled.  Auto Speed Detection takes care
++	 * of MAC speed/duplex configuration.  So we only need to
++	 * configure Collision Distance in the MAC.
++	 */
++	e1000_config_collision_dist_generic(hw);
++
++	/*
++	 * Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = e1000_config_fc_after_link_up_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error configuring flow control\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_for_fiber_link_generic - Check for link (Fiber)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks for link up on the hardware.  If link is not up and we have
++ *  a signal, then we need to force link up.
++ **/
++s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw;
++	u32 ctrl;
++	u32 status;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_check_for_fiber_link_generic");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	status = E1000_READ_REG(hw, E1000_STATUS);
++	rxcw = E1000_READ_REG(hw, E1000_RXCW);
++
++	/*
++	 * If we don't have link (auto-negotiation failed or link partner
++	 * cannot auto-negotiate), the cable is plugged in (we have signal),
++	 * and our link partner is not trying to auto-negotiate with us (we
++	 * are receiving idles or data), we need to force link up. We also
++	 * need to give auto-negotiation time to complete, in case the cable
++	 * was just plugged in. The autoneg_failed flag does this.
++	 */
++	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
++	if ((ctrl & E1000_CTRL_SWDPIN1) && (!(status & E1000_STATUS_LU)) &&
++	    (!(rxcw & E1000_RXCW_C))) {
++		if (mac->autoneg_failed == 0) {
++			mac->autoneg_failed = 1;
++			goto out;
++		}
++		DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n");
++
++		/* Disable auto-negotiation in the TXCW register */
++		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++
++		/* Force link-up and also force full-duplex. */
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++		/* Configure Flow Control after forcing link up. */
++		ret_val = e1000_config_fc_after_link_up_generic(hw);
++		if (ret_val) {
++			DEBUGOUT("Error configuring flow control\n");
++			goto out;
++		}
++	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++		/*
++		 * If we are forcing link and we are receiving /C/ ordered
++		 * sets, re-enable auto-negotiation in the TXCW register
++		 * and disable forced link in the Device Control register
++		 * in an attempt to auto-negotiate with our link partner.
++		 */
++		DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n");
++		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
++		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++		mac->serdes_has_link = TRUE;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_for_serdes_link_generic - Check for link (Serdes)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks for link up on the hardware.  If link is not up and we have
++ *  a signal, then we need to force link up.
++ **/
++s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw;
++	u32 ctrl;
++	u32 status;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_check_for_serdes_link_generic");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	status = E1000_READ_REG(hw, E1000_STATUS);
++	rxcw = E1000_READ_REG(hw, E1000_RXCW);
++
++	/*
++	 * If we don't have link (auto-negotiation failed or link partner
++	 * cannot auto-negotiate), and our link partner is not trying to
++	 * auto-negotiate with us (we are receiving idles or data),
++	 * we need to force link up. We also need to give auto-negotiation
++	 * time to complete.
++	 */
++	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
++	if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) {
++		if (mac->autoneg_failed == 0) {
++			mac->autoneg_failed = 1;
++			goto out;
++		}
++		DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n");
++
++		/* Disable auto-negotiation in the TXCW register */
++		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++
++		/* Force link-up and also force full-duplex. */
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++		/* Configure Flow Control after forcing link up. */
++		ret_val = e1000_config_fc_after_link_up_generic(hw);
++		if (ret_val) {
++			DEBUGOUT("Error configuring flow control\n");
++			goto out;
++		}
++	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++		/*
++		 * If we are forcing link and we are receiving /C/ ordered
++		 * sets, re-enable auto-negotiation in the TXCW register
++		 * and disable forced link in the Device Control register
++		 * in an attempt to auto-negotiate with our link partner.
++		 */
++		DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n");
++		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
++		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++		mac->serdes_has_link = TRUE;
++	} else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) {
++		/*
++		 * If we force link for non-auto-negotiation switch, check
++		 * link status based on MAC synchronization for internal
++		 * serdes media type.
++		 */
++		/* SYNCH bit and IV bit are sticky. */
++		usec_delay(10);
++		if (E1000_RXCW_SYNCH & E1000_READ_REG(hw, E1000_RXCW)) {
++			if (!(rxcw & E1000_RXCW_IV)) {
++				mac->serdes_has_link = TRUE;
++				DEBUGOUT("SERDES: Link is up.\n");
++			}
++		} else {
++			mac->serdes_has_link = FALSE;
++			DEBUGOUT("SERDES: Link is down.\n");
++		}
++	}
++
++	if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) {
++		status = E1000_READ_REG(hw, E1000_STATUS);
++		mac->serdes_has_link = (status & E1000_STATUS_LU)
++					? TRUE
++					: FALSE;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_link_generic - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++s32 e1000_setup_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_link_generic");
++
++	/*
++	 * In the case of the phy reset being blocked, we already have a link.
++	 * We do not need to set it up again.
++	 */
++	if (e1000_check_reset_block(hw))
++		goto out;
++
++	/*
++	 * If flow control is set to default, set flow control based on
++	 * the EEPROM flow control settings.
++	 */
++	if (hw->fc.type == e1000_fc_default) {
++		ret_val = e1000_set_default_fc_generic(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	/*
++	 * We want to save off the original Flow Control configuration just
++	 * in case we get disconnected and then reconnected into a different
++	 * hub or switch with different Flow Control capabilities.
++	 */
++	hw->fc.original_type = hw->fc.type;
++
++	DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type);
++
++	/* Call the necessary media_type subroutine to configure the link. */
++	ret_val = func->setup_physical_interface(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Initialize the flow control address, type, and PAUSE timer
++	 * registers to their default values.  This is done even if flow
++	 * control is disabled, because it does not hurt anything to
++	 * initialize these registers.
++	 */
++	DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
++	E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE);
++	E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
++	E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
++
++	E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
++
++	ret_val = e1000_set_fc_watermarks_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures collision distance and flow control for fiber and serdes
++ *  links.  Upon successful setup, poll for link.
++ **/
++s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_fiber_serdes_link_generic");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/* Take the link out of reset */
++	ctrl &= ~E1000_CTRL_LRST;
++
++	e1000_config_collision_dist_generic(hw);
++
++	ret_val = e1000_commit_fc_settings_generic(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Since auto-negotiation is enabled, take the link out of reset (the
++	 * link will be in reset, because we previously reset the chip). This
++	 * will restart auto-negotiation.  If auto-negotiation is successful
++	 * then the link-up status bit will be set and the flow control enable
++	 * bits (RFCE and TFCE) will be set according to their negotiated value.
++	 */
++	DEBUGOUT("Auto-negotiation enabled\n");
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	E1000_WRITE_FLUSH(hw);
++	msec_delay(1);
++
++	/*
++	 * For these adapters, the SW defineable pin 1 is set when the optics
++	 * detect a signal.  If we have a signal, then poll for a "Link-Up"
++	 * indication.
++	 */
++	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
++	    (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
++		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
++	} else {
++		DEBUGOUT("No signal detected\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_config_collision_dist_generic - Configure collision distance
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the collision distance to the default value and is used
++ *  during link setup. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++void e1000_config_collision_dist_generic(struct e1000_hw *hw)
++{
++	u32 tctl;
++
++	DEBUGFUNC("e1000_config_collision_dist_generic");
++
++	tctl = E1000_READ_REG(hw, E1000_TCTL);
++
++	tctl &= ~E1000_TCTL_COLD;
++	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
++
++	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
++	E1000_WRITE_FLUSH(hw);
++}
++
++/**
++ *  e1000_poll_fiber_serdes_link_generic - Poll for link up
++ *  @hw: pointer to the HW structure
++ *
++ *  Polls for link up by reading the status register, if link fails to come
++ *  up with auto-negotiation, then the link is forced if a signal is detected.
++ **/
++s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 i, status;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_poll_fiber_serdes_link_generic");
++
++	/*
++	 * If we have a signal (the cable is plugged in, or assumed true for
++	 * serdes media) then poll for a "Link-Up" indication in the Device
++	 * Status Register.  Time-out if a link isn't seen in 500 milliseconds
++	 * seconds (Auto-negotiation should complete in less than 500
++	 * milliseconds even if the other end is doing it in SW).
++	 */
++	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
++		msec_delay(10);
++		status = E1000_READ_REG(hw, E1000_STATUS);
++		if (status & E1000_STATUS_LU)
++			break;
++	}
++	if (i == FIBER_LINK_UP_LIMIT) {
++		DEBUGOUT("Never got a valid link from auto-neg!!!\n");
++		mac->autoneg_failed = 1;
++		/*
++		 * AutoNeg failed to achieve a link, so we'll call
++		 * mac->check_for_link. This routine will force the
++		 * link up if we detect a signal. This will allow us to
++		 * communicate with non-autonegotiating link partners.
++		 */
++		ret_val = e1000_check_for_link(hw);
++		if (ret_val) {
++			DEBUGOUT("Error while checking for link\n");
++			goto out;
++		}
++		mac->autoneg_failed = 0;
++	} else {
++		mac->autoneg_failed = 0;
++		DEBUGOUT("Valid Link Found\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_commit_fc_settings_generic - Configure flow control
++ *  @hw: pointer to the HW structure
++ *
++ *  Write the flow control settings to the Transmit Config Word Register (TXCW)
++ *  base on the flow control settings in e1000_mac_info.
++ **/
++s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 txcw;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_commit_fc_settings_generic");
++
++	/*
++	 * Check for a software override of the flow control settings, and
++	 * setup the device accordingly.  If auto-negotiation is enabled, then
++	 * software will have to set the "PAUSE" bits to the correct value in
++	 * the Transmit Config Word Register (TXCW) and re-start auto-
++	 * negotiation.  However, if auto-negotiation is disabled, then
++	 * software will have to manually configure the two flow control enable
++	 * bits in the CTRL register.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause frames,
++	 *          but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames but we
++	 *          do not support receiving pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
++	 */
++	switch (hw->fc.type) {
++	case e1000_fc_none:
++		/* Flow control completely disabled by a software over-ride. */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
++		break;
++	case e1000_fc_rx_pause:
++		/*
++		 * Rx Flow control is enabled and Tx Flow control is disabled
++		 * by a software over-ride. Since there really isn't a way to
++		 * advertise that we are capable of Rx Pause ONLY, we will
++		 * advertise that we support both symmetric and asymmetric RX
++		 * PAUSE.  Later, we will disable the adapter's ability to send
++		 * PAUSE frames.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++		break;
++	case e1000_fc_tx_pause:
++		/*
++		 * Tx Flow control is enabled, and Rx Flow control is disabled,
++		 * by a software over-ride.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
++		break;
++	case e1000_fc_full:
++		/*
++		 * Flow control (both Rx and Tx) is enabled by a software
++		 * over-ride.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++		break;
++	default:
++		DEBUGOUT("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++		break;
++	}
++
++	E1000_WRITE_REG(hw, E1000_TXCW, txcw);
++	mac->txcw = txcw;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_fc_watermarks_generic - Set flow control high/low watermarks
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the flow control high/low threshold (watermark) registers.  If
++ *  flow control XON frame transmission is enabled, then set XON frame
++ *  tansmission as well.
++ **/
++s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u32 fcrtl = 0, fcrth = 0;
++
++	DEBUGFUNC("e1000_set_fc_watermarks_generic");
++
++	/*
++	 * Set the flow control receive threshold registers.  Normally,
++	 * these registers will be set to a default threshold that may be
++	 * adjusted later by the driver's runtime code.  However, if the
++	 * ability to transmit pause frames is not enabled, then these
++	 * registers will be set to 0.
++	 */
++	if (hw->fc.type & e1000_fc_tx_pause) {
++		/*
++		 * We need to set up the Receive Threshold high and low water
++		 * marks as well as (optionally) enabling the transmission of
++		 * XON frames.
++		 */
++		fcrtl = hw->fc.low_water;
++		if (hw->fc.send_xon)
++			fcrtl |= E1000_FCRTL_XONE;
++
++		fcrth = hw->fc.high_water;
++	}
++	E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl);
++	E1000_WRITE_REG(hw, E1000_FCRTH, fcrth);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_set_default_fc_generic - Set flow control default values
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the EEPROM for the default values for flow control and store the
++ *  values.
++ **/
++s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 nvm_data;
++
++	DEBUGFUNC("e1000_set_default_fc_generic");
++
++	/*
++	 * Read and store word 0x0F of the EEPROM. This word contains bits
++	 * that determine the hardware's default PAUSE (flow control) mode,
++	 * a bit that determines whether the HW defaults to enabling or
++	 * disabling auto-negotiation, and the direction of the
++	 * SW defined pins. If there is no SW over-ride of the flow
++	 * control setting, then the variable hw->fc will
++	 * be initialized based on a value in the EEPROM.
++	 */
++	ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
++
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++
++	if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0)
++		hw->fc.type = e1000_fc_none;
++	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
++		 NVM_WORD0F_ASM_DIR)
++		hw->fc.type = e1000_fc_tx_pause;
++	else
++		hw->fc.type = e1000_fc_full;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_force_mac_fc_generic - Force the MAC's flow control settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
++ *  device control register to reflect the adapter settings.  TFCE and RFCE
++ *  need to be explicitly set by software when a copper PHY is used because
++ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
++ *  also configure these bits when link is forced on a fiber connection.
++ **/
++s32 e1000_force_mac_fc_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_force_mac_fc_generic");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/*
++	 * Because we didn't get link via the internal auto-negotiation
++	 * mechanism (we either forced link or we got link via PHY
++	 * auto-neg), we have to manually enable/disable transmit an
++	 * receive flow control.
++	 *
++	 * The "Case" statement below enables/disable flow control
++	 * according to the "hw->fc.type" parameter.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause
++	 *          frames but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *          frames but we do not receive pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) is enabled.
++	 *  other:  No other values should be possible at this point.
++	 */
++	DEBUGOUT1("hw->fc.type = %u\n", hw->fc.type);
++
++	switch (hw->fc.type) {
++	case e1000_fc_none:
++		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
++		break;
++	case e1000_fc_rx_pause:
++		ctrl &= (~E1000_CTRL_TFCE);
++		ctrl |= E1000_CTRL_RFCE;
++		break;
++	case e1000_fc_tx_pause:
++		ctrl &= (~E1000_CTRL_RFCE);
++		ctrl |= E1000_CTRL_TFCE;
++		break;
++	case e1000_fc_full:
++		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
++		break;
++	default:
++		DEBUGOUT("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_config_fc_after_link_up_generic - Configures flow control after link
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the status of auto-negotiation after link up to ensure that the
++ *  speed and duplex were not forced.  If the link needed to be forced, then
++ *  flow control needs to be forced also.  If auto-negotiation is enabled
++ *  and did not fail, then we configure flow control based on our link
++ *  partner.
++ **/
++s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val = E1000_SUCCESS;
++	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
++	u16 speed, duplex;
++
++	DEBUGFUNC("e1000_config_fc_after_link_up_generic");
++
++	/*
++	 * Check for the case where we have fiber media and auto-neg failed
++	 * so we had to force link.  In this case, we need to force the
++	 * configuration of the MAC to match the "fc" parameter.
++	 */
++	if (mac->autoneg_failed) {
++		if (hw->phy.media_type == e1000_media_type_fiber ||
++		    hw->phy.media_type == e1000_media_type_internal_serdes)
++			ret_val = e1000_force_mac_fc_generic(hw);
++	} else {
++		if (hw->phy.media_type == e1000_media_type_copper)
++			ret_val = e1000_force_mac_fc_generic(hw);
++	}
++
++	if (ret_val) {
++		DEBUGOUT("Error forcing flow control settings\n");
++		goto out;
++	}
++
++	/*
++	 * Check for the case where we have copper media and auto-neg is
++	 * enabled.  In this case, we need to check and see if Auto-Neg
++	 * has completed, and if so, how the PHY and link partner has
++	 * flow control configured.
++	 */
++	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
++		/*
++		 * Read the MII Status Register and check to see if AutoNeg
++		 * has completed.  We read this twice because this reg has
++		 * some "sticky" (latched) bits.
++		 */
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			goto out;
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			goto out;
++
++		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
++			DEBUGOUT("Copper PHY and Auto Neg "
++			         "has not completed.\n");
++			goto out;
++		}
++
++		/*
++		 * The AutoNeg process has completed, so we now need to
++		 * read both the Auto Negotiation Advertisement
++		 * Register (Address 4) and the Auto_Negotiation Base
++		 * Page Ability Register (Address 5) to determine how
++		 * flow control was negotiated.
++		 */
++		ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV,
++		                             &mii_nway_adv_reg);
++		if (ret_val)
++			goto out;
++		ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY,
++		                             &mii_nway_lp_ability_reg);
++		if (ret_val)
++			goto out;
++
++		/*
++		 * Two bits in the Auto Negotiation Advertisement Register
++		 * (Address 4) and two bits in the Auto Negotiation Base
++		 * Page Ability Register (Address 5) determine flow control
++		 * for both the PHY and the link partner.  The following
++		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
++		 * 1999, describes these PAUSE resolution bits and how flow
++		 * control is determined based upon these settings.
++		 * NOTE:  DC = Don't Care
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
++		 *   0   |    1    |   0   |   DC    | e1000_fc_none
++		 *   0   |    1    |   1   |    0    | e1000_fc_none
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 *   1   |    0    |   0   |   DC    | e1000_fc_none
++		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
++		 *   1   |    1    |   0   |    0    | e1000_fc_none
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 *
++		 * Are both PAUSE bits set to 1?  If so, this implies
++		 * Symmetric Flow Control is enabled at both ends.  The
++		 * ASM_DIR bits are irrelevant per the spec.
++		 *
++		 * For Symmetric Flow Control:
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
++		 *
++		 */
++		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
++			/*
++			 * Now we need to check if the user selected Rx ONLY
++			 * of pause frames.  In this case, we had to advertise
++			 * FULL flow control because we could not advertise RX
++			 * ONLY. Hence, we must now check to see if we need to
++			 * turn OFF  the TRANSMISSION of PAUSE frames.
++			 */
++			if (hw->fc.original_type == e1000_fc_full) {
++				hw->fc.type = e1000_fc_full;
++				DEBUGOUT("Flow Control = FULL.\r\n");
++			} else {
++				hw->fc.type = e1000_fc_rx_pause;
++				DEBUGOUT("Flow Control = "
++				         "RX PAUSE frames only.\r\n");
++			}
++		}
++		/*
++		 * For receiving PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 */
++		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++		          (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++		          (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++		          (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.type = e1000_fc_tx_pause;
++			DEBUGOUT("Flow Control = TX PAUSE frames only.\r\n");
++		}
++		/*
++		 * For transmitting PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 */
++		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++		         (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++		         !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++		         (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.type = e1000_fc_rx_pause;
++			DEBUGOUT("Flow Control = RX PAUSE frames only.\r\n");
++		} else {
++			/*
++			 * Per the IEEE spec, at this point flow control
++			 * should be disabled.
++			 */
++			hw->fc.type = e1000_fc_none;
++			DEBUGOUT("Flow Control = NONE.\r\n");
++		}
++
++		/*
++		 * Now we need to do one last check...  If we auto-
++		 * negotiated to HALF DUPLEX, flow control should not be
++		 * enabled per IEEE 802.3 spec.
++		 */
++		ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++		if (ret_val) {
++			DEBUGOUT("Error getting link speed and duplex\n");
++			goto out;
++		}
++
++		if (duplex == HALF_DUPLEX)
++			hw->fc.type = e1000_fc_none;
++
++		/*
++		 * Now we call a subroutine to actually force the MAC
++		 * controller to use the correct flow control settings.
++		 */
++		ret_val = e1000_force_mac_fc_generic(hw);
++		if (ret_val) {
++			DEBUGOUT("Error forcing flow control settings\n");
++			goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_speed_and_duplex_copper_generic - Retreive current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Read the status register for the current speed/duplex and store the current
++ *  speed and duplex for copper connections.
++ **/
++s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
++                                              u16 *duplex)
++{
++	u32 status;
++
++	DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic");
++
++	status = E1000_READ_REG(hw, E1000_STATUS);
++	if (status & E1000_STATUS_SPEED_1000) {
++		*speed = SPEED_1000;
++		DEBUGOUT("1000 Mbs, ");
++	} else if (status & E1000_STATUS_SPEED_100) {
++		*speed = SPEED_100;
++		DEBUGOUT("100 Mbs, ");
++	} else {
++		*speed = SPEED_10;
++		DEBUGOUT("10 Mbs, ");
++	}
++
++	if (status & E1000_STATUS_FD) {
++		*duplex = FULL_DUPLEX;
++		DEBUGOUT("Full Duplex\n");
++	} else {
++		*duplex = HALF_DUPLEX;
++		DEBUGOUT("Half Duplex\n");
++	}
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_get_speed_and_duplex_fiber_generic - Retreive current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Sets the speed and duplex to gigabit full duplex (the only possible option)
++ *  for fiber/serdes links.
++ **/
++s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
++                                                    u16 *speed, u16 *duplex)
++{
++	DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic");
++
++	*speed = SPEED_1000;
++	*duplex = FULL_DUPLEX;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_get_hw_semaphore_generic - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ **/
++s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 ret_val = E1000_SUCCESS;
++	s32 timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	DEBUGFUNC("e1000_get_hw_semaphore_generic");
++
++	/* Get the SW semaphore */
++	while (i < timeout) {
++		swsm = E1000_READ_REG(hw, E1000_SWSM);
++		if (!(swsm & E1000_SWSM_SMBI))
++			break;
++
++		usec_delay(50);
++		i++;
++	}
++
++	if (i == timeout) {
++		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	/* Get the FW semaphore. */
++	for (i = 0; i < timeout; i++) {
++		swsm = E1000_READ_REG(hw, E1000_SWSM);
++		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		usec_delay(50);
++	}
++
++	if (i == timeout) {
++		/* Release semaphores */
++		e1000_put_hw_semaphore_generic(hw);
++		DEBUGOUT("Driver can't access the NVM\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_put_hw_semaphore_generic - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ **/
++void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
++{
++	u32 swsm;
++
++	DEBUGFUNC("e1000_put_hw_semaphore_generic");
++
++	swsm = E1000_READ_REG(hw, E1000_SWSM);
++
++	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++
++	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
++}
++
++/**
++ *  e1000_get_auto_rd_done_generic - Check for auto read completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Check EEPROM for Auto Read done bit.
++ **/
++s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw)
++{
++	s32 i = 0;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_get_auto_rd_done_generic");
++
++	while (i < AUTO_READ_DONE_TIMEOUT) {
++		if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD)
++			break;
++		msec_delay(1);
++		i++;
++	}
++
++	if (i == AUTO_READ_DONE_TIMEOUT) {
++		DEBUGOUT("Auto read by HW from NVM has not completed.\n");
++		ret_val = -E1000_ERR_RESET;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_valid_led_default_generic - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_valid_led_default_generic");
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
++		*data = ID_LED_DEFAULT;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_id_led_init_generic -
++ *  @hw: pointer to the HW structure
++ *
++ **/
++s32 e1000_id_led_init_generic(struct e1000_hw * hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	const u32 ledctl_mask = 0x000000FF;
++	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
++	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
++	u16 data, i, temp;
++	const u16 led_mask = 0x0F;
++
++	DEBUGFUNC("e1000_id_led_init_generic");
++
++	ret_val = hw->func.valid_led_default(hw, &data);
++	if (ret_val)
++		goto out;
++
++	mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL);
++	mac->ledctl_mode1 = mac->ledctl_default;
++	mac->ledctl_mode2 = mac->ledctl_default;
++
++	for (i = 0; i < 4; i++) {
++		temp = (data >> (i << 2)) & led_mask;
++		switch (temp) {
++		case ID_LED_ON1_DEF2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_ON1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_OFF1_DEF2:
++		case ID_LED_OFF1_ON2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++		switch (temp) {
++		case ID_LED_DEF1_ON2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_OFF1_ON2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_DEF1_OFF2:
++		case ID_LED_ON1_OFF2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_led_generic - Configures SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This prepares the SW controllable LED for use and saves the current state
++ *  of the LED so it can be later restored.
++ **/
++s32 e1000_setup_led_generic(struct e1000_hw *hw)
++{
++	u32 ledctl;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_led_generic");
++
++	if (hw->func.setup_led != e1000_setup_led_generic) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	if (hw->phy.media_type == e1000_media_type_fiber) {
++		ledctl = E1000_READ_REG(hw, E1000_LEDCTL);
++		hw->mac.ledctl_default = ledctl;
++		/* Turn off LED0 */
++		ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
++		            E1000_LEDCTL_LED0_BLINK |
++		            E1000_LEDCTL_LED0_MODE_MASK);
++		ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
++		           E1000_LEDCTL_LED0_MODE_SHIFT);
++		E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl);
++	} else if (hw->phy.media_type == e1000_media_type_copper) {
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_cleanup_led_generic - Set LED config to default operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Remove the current LED configuration and set the LED configuration
++ *  to the default value, saved from the EEPROM.
++ **/
++s32 e1000_cleanup_led_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_cleanup_led_generic");
++
++	if (hw->func.cleanup_led != e1000_cleanup_led_generic) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_blink_led_generic - Blink LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Blink the led's which are set to be on.
++ **/
++s32 e1000_blink_led_generic(struct e1000_hw *hw)
++{
++	u32 ledctl_blink = 0;
++	u32 i;
++
++	DEBUGFUNC("e1000_blink_led_generic");
++
++	if (hw->phy.media_type == e1000_media_type_fiber) {
++		/* always blink LED0 for PCI-E fiber */
++		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
++		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
++	} else {
++		/*
++		 * set the blink bit for each LED that's "on" (0x0E)
++		 * in ledctl_mode2
++		 */
++		ledctl_blink = hw->mac.ledctl_mode2;
++		for (i = 0; i < 4; i++)
++			if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) ==
++			    E1000_LEDCTL_MODE_LED_ON)
++				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK <<
++				                 (i * 8));
++	}
++
++	E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_on_generic - Turn LED on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED on.
++ **/
++s32 e1000_led_on_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	DEBUGFUNC("e1000_led_on_generic");
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_fiber:
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl &= ~E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++		break;
++	case e1000_media_type_copper:
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2);
++		break;
++	default:
++		break;
++	}
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_off_generic - Turn LED off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED off.
++ **/
++s32 e1000_led_off_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	DEBUGFUNC("e1000_led_off_generic");
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_fiber:
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl |= E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++		break;
++	case e1000_media_type_copper:
++		E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1);
++		break;
++	default:
++		break;
++	}
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities
++ *  @hw: pointer to the HW structure
++ *  @no_snoop: bitmap of snoop events
++ *
++ *  Set the PCI-express register to snoop for events enabled in 'no_snoop'.
++ **/
++void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop)
++{
++	u32 gcr;
++
++	DEBUGFUNC("e1000_set_pcie_no_snoop_generic");
++
++	if (hw->bus.type != e1000_bus_type_pci_express)
++		goto out;
++
++	if (no_snoop) {
++		gcr = E1000_READ_REG(hw, E1000_GCR);
++		gcr &= ~(PCIE_NO_SNOOP_ALL);
++		gcr |= no_snoop;
++		E1000_WRITE_REG(hw, E1000_GCR, gcr);
++	}
++out:
++	return;
++}
++
++/**
++ *  e1000_disable_pcie_master_generic - Disables PCI-express master access
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns 0 (E1000_SUCCESS) if successful, else returns -10
++ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not casued
++ *  the master requests to be disabled.
++ *
++ *  Disables PCI-Express master access and verifies there are no pending
++ *  requests.
++ **/
++s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 timeout = MASTER_DISABLE_TIMEOUT;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_disable_pcie_master_generic");
++
++	if (hw->bus.type != e1000_bus_type_pci_express)
++		goto out;
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	while (timeout) {
++		if (!(E1000_READ_REG(hw, E1000_STATUS) &
++		      E1000_STATUS_GIO_MASTER_ENABLE))
++			break;
++		usec_delay(100);
++		timeout--;
++	}
++
++	if (!timeout) {
++		DEBUGOUT("Master requests are pending.\n");
++		ret_val = -E1000_ERR_MASTER_REQUESTS_PENDING;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing
++ *  @hw: pointer to the HW structure
++ *
++ *  Reset the Adaptive Interframe Spacing throttle to default values.
++ **/
++void e1000_reset_adaptive_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++
++	DEBUGFUNC("e1000_reset_adaptive_generic");
++
++	if (!mac->adaptive_ifs) {
++		DEBUGOUT("Not in Adaptive IFS mode!\n");
++		goto out;
++	}
++
++	if (!mac->ifs_params_forced) {
++		mac->current_ifs_val = 0;
++		mac->ifs_min_val = IFS_MIN;
++		mac->ifs_max_val = IFS_MAX;
++		mac->ifs_step_size = IFS_STEP;
++		mac->ifs_ratio = IFS_RATIO;
++	}
++
++	mac->in_ifs_mode = FALSE;
++	E1000_WRITE_REG(hw, E1000_AIT, 0);
++out:
++	return;
++}
++
++/**
++ *  e1000_update_adaptive_generic - Update Adaptive Interframe Spacing
++ *  @hw: pointer to the HW structure
++ *
++ *  Update the Adaptive Interframe Spacing Throttle value based on the
++ *  time between transmitted packets and time between collisions.
++ **/
++void e1000_update_adaptive_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++
++	DEBUGFUNC("e1000_update_adaptive_generic");
++
++	if (!mac->adaptive_ifs) {
++		DEBUGOUT("Not in Adaptive IFS mode!\n");
++		goto out;
++	}
++
++	if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
++		if (mac->tx_packet_delta > MIN_NUM_XMITS) {
++			mac->in_ifs_mode = TRUE;
++			if (mac->current_ifs_val < mac->ifs_max_val) {
++				if (!mac->current_ifs_val)
++					mac->current_ifs_val = mac->ifs_min_val;
++				else
++					mac->current_ifs_val +=
++						mac->ifs_step_size;
++				E1000_WRITE_REG(hw, E1000_AIT, mac->current_ifs_val);
++			}
++		}
++	} else {
++		if (mac->in_ifs_mode &&
++		    (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
++			mac->current_ifs_val = 0;
++			mac->in_ifs_mode = FALSE;
++			E1000_WRITE_REG(hw, E1000_AIT, 0);
++		}
++	}
++out:
++	return;
++}
++
++/**
++ *  e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify that when not using auto-negotitation that MDI/MDIx is correctly
++ *  set, which is forced to MDI mode only.
++ **/
++s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_validate_mdi_setting_generic");
++
++	if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
++		DEBUGOUT("Invalid MDI setting detected\n");
++		hw->phy.mdix = 1;
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register
++ *  @hw: pointer to the HW structure
++ *  @reg: 32bit register offset such as E1000_SCTL
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes an address/data control type register.  There are several of these
++ *  and they all have the format address << 8 | data and bit 31 is polled for
++ *  completion.
++ **/
++s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
++                                      u32 offset, u8 data)
++{
++	u32 i, regvalue = 0;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic");
++
++	/* Set up the address and data */
++	regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
++	E1000_WRITE_REG(hw, reg, regvalue);
++
++	/* Poll the ready bit to see if the MDI read completed */
++	for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
++		usec_delay(5);
++		regvalue = E1000_READ_REG(hw, reg);
++		if (regvalue & E1000_GEN_CTL_READY)
++			break;
++	}
++	if (!(regvalue & E1000_GEN_CTL_READY)) {
++		DEBUGOUT1("Reg %08x did not indicate ready\n", reg);
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_api.h	2021-04-07 16:01:27.658633550 +0800
+@@ -0,0 +1,166 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_API_H_
++#define _E1000_API_H_
++
++#include "e1000_hw.h"
++
++extern void    e1000_init_function_pointers_82542(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_82543(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_82540(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_82571(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_82541(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw);
++extern void    e1000_init_function_pointers_ich8lan(struct e1000_hw *hw);
++
++s32  e1000_set_mac_type(struct e1000_hw *hw);
++s32  e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device);
++s32  e1000_init_mac_params(struct e1000_hw *hw);
++s32  e1000_init_nvm_params(struct e1000_hw *hw);
++s32  e1000_init_phy_params(struct e1000_hw *hw);
++void e1000_remove_device(struct e1000_hw *hw);
++s32  e1000_get_bus_info(struct e1000_hw *hw);
++void e1000_clear_vfta(struct e1000_hw *hw);
++void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
++s32  e1000_force_mac_fc(struct e1000_hw *hw);
++s32  e1000_check_for_link(struct e1000_hw *hw);
++s32  e1000_reset_hw(struct e1000_hw *hw);
++s32  e1000_init_hw(struct e1000_hw *hw);
++s32  e1000_setup_link(struct e1000_hw *hw);
++s32  e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed,
++                                u16 *duplex);
++s32  e1000_disable_pcie_master(struct e1000_hw *hw);
++void e1000_config_collision_dist(struct e1000_hw *hw);
++void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
++void e1000_mta_set(struct e1000_hw *hw, u32 hash_value);
++u32  e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr);
++void e1000_update_mc_addr_list(struct e1000_hw *hw,
++                               u8 *mc_addr_list, u32 mc_addr_count,
++                               u32 rar_used_count, u32 rar_count);
++s32  e1000_setup_led(struct e1000_hw *hw);
++s32  e1000_cleanup_led(struct e1000_hw *hw);
++s32  e1000_check_reset_block(struct e1000_hw *hw);
++s32  e1000_blink_led(struct e1000_hw *hw);
++s32  e1000_led_on(struct e1000_hw *hw);
++s32  e1000_led_off(struct e1000_hw *hw);
++void e1000_reset_adaptive(struct e1000_hw *hw);
++void e1000_update_adaptive(struct e1000_hw *hw);
++s32  e1000_get_cable_length(struct e1000_hw *hw);
++s32  e1000_validate_mdi_setting(struct e1000_hw *hw);
++s32  e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data);
++s32  e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
++                               u32 offset, u8 data);
++s32  e1000_get_phy_info(struct e1000_hw *hw);
++s32  e1000_phy_hw_reset(struct e1000_hw *hw);
++s32  e1000_phy_commit(struct e1000_hw *hw);
++void e1000_power_up_phy(struct e1000_hw *hw);
++void e1000_power_down_phy(struct e1000_hw *hw);
++s32  e1000_read_mac_addr(struct e1000_hw *hw);
++s32  e1000_read_pba_num(struct e1000_hw *hw, u32 *part_num);
++void e1000_reload_nvm(struct e1000_hw *hw);
++s32  e1000_update_nvm_checksum(struct e1000_hw *hw);
++s32  e1000_validate_nvm_checksum(struct e1000_hw *hw);
++s32  e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++s32  e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
++s32  e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words,
++                     u16 *data);
++s32  e1000_wait_autoneg(struct e1000_hw *hw);
++s32  e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active);
++s32  e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
++bool e1000_check_mng_mode(struct e1000_hw *hw);
++bool e1000_enable_mng_pass_thru(struct e1000_hw *hw);
++bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
++s32  e1000_mng_enable_host_if(struct e1000_hw *hw);
++s32  e1000_mng_host_if_write(struct e1000_hw *hw,
++                             u8 *buffer, u16 length, u16 offset, u8 *sum);
++s32  e1000_mng_write_cmd_header(struct e1000_hw *hw,
++                                struct e1000_host_mng_command_header *hdr);
++s32  e1000_mng_write_dhcp_info(struct e1000_hw * hw,
++                                    u8 *buffer, u16 length);
++void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
++                                  struct e1000_hw_stats *stats,
++                                  u32 frame_len, u8 *mac_addr,
++                                  u32 max_frame_size);
++void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw,
++                                       bool state);
++bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw);
++u32  e1000_translate_register_82542(u32 reg);
++void e1000_init_script_state_82541(struct e1000_hw *hw, bool state);
++bool e1000_get_laa_state_82571(struct e1000_hw *hw);
++void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state);
++void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
++                                                 bool state);
++void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw);
++void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw);
++
++
++/*
++ * TBI_ACCEPT macro definition:
++ *
++ * This macro requires:
++ *      adapter = a pointer to struct e1000_hw
++ *      status = the 8 bit status field of the Rx descriptor with EOP set
++ *      error = the 8 bit error field of the Rx descriptor with EOP set
++ *      length = the sum of all the length fields of the Rx descriptors that
++ *               make up the current frame
++ *      last_byte = the last byte of the frame DMAed by the hardware
++ *      max_frame_length = the maximum frame length we want to accept.
++ *      min_frame_length = the minimum frame length we want to accept.
++ *
++ * This macro is a conditional that should be used in the interrupt
++ * handler's Rx processing routine when RxErrors have been detected.
++ *
++ * Typical use:
++ *  ...
++ *  if (TBI_ACCEPT) {
++ *      accept_frame = TRUE;
++ *      e1000_tbi_adjust_stats(adapter, MacAddress);
++ *      frame_length--;
++ *  } else {
++ *      accept_frame = FALSE;
++ *  }
++ *  ...
++ */
++
++/* The carrier extension symbol, as received by the NIC. */
++#define CARRIER_EXTENSION   0x0F
++
++#define TBI_ACCEPT(a, status, errors, length, last_byte, min_frame_size, max_frame_size) \
++    (e1000_tbi_sbp_enabled_82543(a) && \
++     (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
++     ((last_byte) == CARRIER_EXTENSION) && \
++     (((status) & E1000_RXD_STAT_VP) ? \
++          (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \
++           ((length) <= (max_frame_size + 1))) : \
++          (((length) > min_frame_size) && \
++           ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1)))))
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_ethtool.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_ethtool.c	2021-04-07 16:01:27.653633557 +0800
+@@ -0,0 +1,2205 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* ethtool support for e1000 */
++
++#include <linux/netdevice.h>
++
++#ifdef SIOCETHTOOL
++#include <linux/ethtool.h>
++
++#include "e1000.h"
++#include "e1000_82541.h"
++#ifdef NETIF_F_HW_VLAN_TX
++#include <linux/if_vlan.h>
++#endif
++
++#ifdef ETHTOOL_OPS_COMPAT
++#include "kcompat_ethtool.c"
++#endif
++
++#ifdef ETHTOOL_GSTATS
++struct e1000_stats {
++	char stat_string[ETH_GSTRING_LEN];
++	int sizeof_stat;
++	int stat_offset;
++};
++
++#define E1000_STAT(m) sizeof(((struct e1000_adapter *)0)->m), \
++		      offsetof(struct e1000_adapter, m)
++static const struct e1000_stats e1000_gstrings_stats[] = {
++	{ "rx_packets", E1000_STAT(stats.gprc) },
++	{ "tx_packets", E1000_STAT(stats.gptc) },
++	{ "rx_bytes", E1000_STAT(stats.gorc) },
++	{ "tx_bytes", E1000_STAT(stats.gotc) },
++	{ "rx_broadcast", E1000_STAT(stats.bprc) },
++	{ "tx_broadcast", E1000_STAT(stats.bptc) },
++	{ "rx_multicast", E1000_STAT(stats.mprc) },
++	{ "tx_multicast", E1000_STAT(stats.mptc) },
++	{ "rx_errors", E1000_STAT(net_stats.rx_errors) },
++	{ "tx_errors", E1000_STAT(net_stats.tx_errors) },
++	{ "tx_dropped", E1000_STAT(net_stats.tx_dropped) },
++	{ "multicast", E1000_STAT(stats.mprc) },
++	{ "collisions", E1000_STAT(stats.colc) },
++	{ "rx_length_errors", E1000_STAT(net_stats.rx_length_errors) },
++	{ "rx_over_errors", E1000_STAT(net_stats.rx_over_errors) },
++	{ "rx_crc_errors", E1000_STAT(stats.crcerrs) },
++	{ "rx_frame_errors", E1000_STAT(net_stats.rx_frame_errors) },
++	{ "rx_no_buffer_count", E1000_STAT(stats.rnbc) },
++	{ "rx_missed_errors", E1000_STAT(stats.mpc) },
++	{ "tx_aborted_errors", E1000_STAT(stats.ecol) },
++	{ "tx_carrier_errors", E1000_STAT(stats.tncrs) },
++	{ "tx_fifo_errors", E1000_STAT(net_stats.tx_fifo_errors) },
++	{ "tx_heartbeat_errors", E1000_STAT(net_stats.tx_heartbeat_errors) },
++	{ "tx_window_errors", E1000_STAT(stats.latecol) },
++	{ "tx_abort_late_coll", E1000_STAT(stats.latecol) },
++	{ "tx_deferred_ok", E1000_STAT(stats.dc) },
++	{ "tx_single_coll_ok", E1000_STAT(stats.scc) },
++	{ "tx_multi_coll_ok", E1000_STAT(stats.mcc) },
++	{ "tx_timeout_count", E1000_STAT(tx_timeout_count) },
++	{ "tx_restart_queue", E1000_STAT(restart_queue) },
++	{ "rx_long_length_errors", E1000_STAT(stats.roc) },
++	{ "rx_short_length_errors", E1000_STAT(stats.ruc) },
++	{ "rx_align_errors", E1000_STAT(stats.algnerrc) },
++	{ "tx_tcp_seg_good", E1000_STAT(stats.tsctc) },
++	{ "tx_tcp_seg_failed", E1000_STAT(stats.tsctfc) },
++	{ "rx_flow_control_xon", E1000_STAT(stats.xonrxc) },
++	{ "rx_flow_control_xoff", E1000_STAT(stats.xoffrxc) },
++	{ "tx_flow_control_xon", E1000_STAT(stats.xontxc) },
++	{ "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) },
++	{ "rx_long_byte_count", E1000_STAT(stats.gorc) },
++	{ "rx_csum_offload_good", E1000_STAT(hw_csum_good) },
++	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) },
++	{ "rx_header_split", E1000_STAT(rx_hdr_split) },
++	{ "alloc_rx_buff_failed", E1000_STAT(alloc_rx_buff_failed) },
++	{ "tx_smbus", E1000_STAT(stats.mgptc) },
++	{ "rx_smbus", E1000_STAT(stats.mgprc) },
++	{ "dropped_smbus", E1000_STAT(stats.mgpdc) },
++};
++
++#ifdef CONFIG_E1000_MQ
++#define E1000_QUEUE_STATS_LEN \
++	((((((struct e1000_adapter *)netdev->priv)->num_rx_queues > 1) ? \
++	  ((struct e1000_adapter *)netdev->priv)->num_rx_queues : 0 ) + \
++	 (((((struct e1000_adapter *)netdev->priv)->num_tx_queues > 1) ? \
++	  ((struct e1000_adapter *)netdev->priv)->num_tx_queues : 0 ))) * \
++	(sizeof(struct e1000_queue_stats) / sizeof(u64)))
++#else
++#define E1000_QUEUE_STATS_LEN 0
++#endif
++#define E1000_GLOBAL_STATS_LEN	\
++	sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats)
++#define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN + E1000_QUEUE_STATS_LEN)
++#endif /* ETHTOOL_GSTATS */
++#ifdef ETHTOOL_TEST
++static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = {
++	"Register test  (offline)", "Eeprom test    (offline)",
++	"Interrupt test (offline)", "Loopback test  (offline)",
++	"Link test   (on/offline)"
++};
++#define E1000_TEST_LEN sizeof(e1000_gstrings_test) / ETH_GSTRING_LEN
++#endif /* ETHTOOL_TEST */
++
++static int e1000_get_settings(struct net_device *netdev,
++                              struct ethtool_cmd *ecmd)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 status;
++
++	if (hw->phy.media_type == e1000_media_type_copper) {
++
++		ecmd->supported = (SUPPORTED_10baseT_Half |
++		                   SUPPORTED_10baseT_Full |
++		                   SUPPORTED_100baseT_Half |
++		                   SUPPORTED_100baseT_Full |
++		                   SUPPORTED_1000baseT_Full|
++		                   SUPPORTED_Autoneg |
++		                   SUPPORTED_TP);
++		if (hw->phy.type == e1000_phy_ife)
++			ecmd->supported &= ~SUPPORTED_1000baseT_Full;
++		ecmd->advertising = ADVERTISED_TP;
++
++		if (hw->mac.autoneg == 1) {
++			ecmd->advertising |= ADVERTISED_Autoneg;
++			/* the e1000 autoneg seems to match ethtool nicely */
++			ecmd->advertising |= hw->phy.autoneg_advertised;
++		}
++
++		ecmd->port = PORT_TP;
++		ecmd->phy_address = hw->phy.addr;
++
++		if (hw->mac.type == e1000_82543)
++			ecmd->transceiver = XCVR_EXTERNAL;
++		else
++			ecmd->transceiver = XCVR_INTERNAL;
++
++	} else {
++		ecmd->supported   = (SUPPORTED_1000baseT_Full |
++				     SUPPORTED_FIBRE |
++				     SUPPORTED_Autoneg);
++
++		ecmd->advertising = (ADVERTISED_1000baseT_Full |
++				     ADVERTISED_FIBRE |
++				     ADVERTISED_Autoneg);
++
++		ecmd->port = PORT_FIBRE;
++
++		if (hw->mac.type >= e1000_82545)
++			ecmd->transceiver = XCVR_INTERNAL;
++		else
++			ecmd->transceiver = XCVR_EXTERNAL;
++	}
++
++	status = E1000_READ_REG(&adapter->hw, E1000_STATUS);
++
++	if (status & E1000_STATUS_LU) {
++
++		if ((status & E1000_STATUS_SPEED_1000) ||
++		    hw->phy.media_type != e1000_media_type_copper)
++			ecmd->speed = SPEED_1000;
++		else if (status & E1000_STATUS_SPEED_100)
++			ecmd->speed = SPEED_100;
++		else
++			ecmd->speed = SPEED_10;
++
++		if ((status & E1000_STATUS_FD) ||
++		    hw->phy.media_type != e1000_media_type_copper)
++			ecmd->duplex = DUPLEX_FULL;
++		else
++			ecmd->duplex = DUPLEX_HALF;
++	} else {
++		ecmd->speed = -1;
++		ecmd->duplex = -1;
++	}
++
++	ecmd->autoneg = ((hw->phy.media_type == e1000_media_type_fiber) ||
++			 hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
++	return 0;
++}
++
++static int e1000_set_settings(struct net_device *netdev,
++                              struct ethtool_cmd *ecmd)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* When SoL/IDER sessions are active, autoneg/speed/duplex
++	 * cannot be changed */
++	if (e1000_check_reset_block(hw)) {
++		DPRINTK(DRV, ERR, "Cannot change link characteristics "
++		        "when SoL/IDER is active.\n");
++		return -EINVAL;
++	}
++
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		msleep(1);
++
++	if (ecmd->autoneg == AUTONEG_ENABLE) {
++		hw->mac.autoneg = 1;
++		if (hw->phy.media_type == e1000_media_type_fiber)
++			hw->phy.autoneg_advertised = ADVERTISED_1000baseT_Full |
++			                             ADVERTISED_FIBRE |
++			                             ADVERTISED_Autoneg;
++		else
++			hw->phy.autoneg_advertised = ecmd->advertising |
++			                             ADVERTISED_TP |
++			                             ADVERTISED_Autoneg;
++		ecmd->advertising = hw->phy.autoneg_advertised;
++		if (adapter->fc_autoneg)
++			hw->fc.original_type = e1000_fc_default;
++	} else {
++		if (e1000_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) {
++			clear_bit(__E1000_RESETTING, &adapter->state);
++			return -EINVAL;
++		}
++	}
++
++	/* reset the link */
++
++	if (netif_running(adapter->netdev)) {
++		e1000_down(adapter);
++		e1000_up(adapter);
++	} else {
++		e1000_reset(adapter);
++	}
++
++	clear_bit(__E1000_RESETTING, &adapter->state);
++	return 0;
++}
++
++static void e1000_get_pauseparam(struct net_device *netdev,
++                                 struct ethtool_pauseparam *pause)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++
++	pause->autoneg =
++		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
++
++	if (hw->fc.type == e1000_fc_rx_pause)
++		pause->rx_pause = 1;
++	else if (hw->fc.type == e1000_fc_tx_pause)
++		pause->tx_pause = 1;
++	else if (hw->fc.type == e1000_fc_full) {
++		pause->rx_pause = 1;
++		pause->tx_pause = 1;
++	}
++}
++
++static int e1000_set_pauseparam(struct net_device *netdev,
++                                struct ethtool_pauseparam *pause)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	int retval = 0;
++
++	adapter->fc_autoneg = pause->autoneg;
++
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		msleep(1);
++
++	if (pause->rx_pause && pause->tx_pause)
++		hw->fc.type = e1000_fc_full;
++	else if (pause->rx_pause && !pause->tx_pause)
++		hw->fc.type = e1000_fc_rx_pause;
++	else if (!pause->rx_pause && pause->tx_pause)
++		hw->fc.type = e1000_fc_tx_pause;
++	else if (!pause->rx_pause && !pause->tx_pause)
++		hw->fc.type = e1000_fc_none;
++
++	hw->fc.original_type = hw->fc.type;
++
++	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
++		hw->fc.type = e1000_fc_default;
++		if (netif_running(adapter->netdev)) {
++			e1000_down(adapter);
++			e1000_up(adapter);
++		} else {
++			e1000_reset(adapter);
++		}
++	} else {
++		retval = ((hw->phy.media_type == e1000_media_type_fiber) ?
++			  e1000_setup_link(hw) : e1000_force_mac_fc(hw));
++	}
++
++	clear_bit(__E1000_RESETTING, &adapter->state);
++	return retval;
++}
++
++static u32 e1000_get_rx_csum(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	return adapter->rx_csum;
++}
++
++static int e1000_set_rx_csum(struct net_device *netdev, u32 data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	adapter->rx_csum = data;
++
++	if (netif_running(netdev))
++		e1000_reinit_locked(adapter);
++	else
++		e1000_reset(adapter);
++	return 0;
++}
++
++static u32 e1000_get_tx_csum(struct net_device *netdev)
++{
++	return (netdev->features & NETIF_F_HW_CSUM) != 0;
++}
++
++static int e1000_set_tx_csum(struct net_device *netdev, u32 data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	if (adapter->hw.mac.type < e1000_82543) {
++		if (!data)
++			return -EINVAL;
++		return 0;
++	}
++
++	if (data)
++		netdev->features |= NETIF_F_HW_CSUM;
++	else
++		netdev->features &= ~NETIF_F_HW_CSUM;
++
++	return 0;
++}
++
++#ifdef NETIF_F_TSO
++static int e1000_set_tso(struct net_device *netdev, u32 data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	int i;
++	struct net_device *v_netdev;
++	if (!(adapter->flags & E1000_FLAG_HAS_TSO))
++		return data ? -EINVAL : 0;
++
++	if (data) {
++		netdev->features |= NETIF_F_TSO;
++#ifdef NETIF_F_TSO6
++		if (adapter->flags & E1000_FLAG_HAS_TSO6)
++			netdev->features |= NETIF_F_TSO6;
++#endif
++	} else {
++		netdev->features &= ~NETIF_F_TSO;
++#ifdef NETIF_F_TSO6
++		if (adapter->flags & E1000_FLAG_HAS_TSO6)
++			netdev->features &= ~NETIF_F_TSO6;
++#endif
++#ifdef NETIF_F_HW_VLAN_TX
++		/* disable TSO on all VLANs if they're present */
++		if (!adapter->vlgrp)
++			goto tso_out;
++		for (i = 0; i < VLAN_N_VID; i++) {
++			v_netdev = vlan_group_get_device(adapter->vlgrp, i);
++			if (!v_netdev)
++				continue;
++
++			v_netdev->features &= ~NETIF_F_TSO;
++#ifdef NETIF_F_TSO6
++			if (adapter->flags & E1000_FLAG_HAS_TSO6)
++				v_netdev->features &= ~NETIF_F_TSO6;
++#endif
++			vlan_group_set_device(adapter->vlgrp, i, v_netdev);
++		}
++#endif
++	}
++
++tso_out:
++	DPRINTK(PROBE, INFO, "TSO is %s\n", data ? "Enabled" : "Disabled");
++	adapter->flags |= E1000_FLAG_TSO_FORCE;
++	return 0;
++}
++#endif /* NETIF_F_TSO */
++
++static u32 e1000_get_msglevel(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	return adapter->msg_enable;
++}
++
++static void e1000_set_msglevel(struct net_device *netdev, u32 data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	adapter->msg_enable = data;
++}
++
++static int e1000_get_regs_len(struct net_device *netdev)
++{
++#define E1000_REGS_LEN 32
++	return E1000_REGS_LEN * sizeof(u32);
++}
++
++static void e1000_get_regs(struct net_device *netdev,
++                           struct ethtool_regs *regs, void *p)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 *regs_buff = p;
++	u16 phy_data;
++
++	memset(p, 0, E1000_REGS_LEN * sizeof(u32));
++
++	regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
++
++	regs_buff[0]  = E1000_READ_REG(hw, E1000_CTRL);
++	regs_buff[1]  = E1000_READ_REG(hw, E1000_STATUS);
++
++	regs_buff[2]  = E1000_READ_REG(hw, E1000_RCTL);
++	regs_buff[3]  = E1000_READ_REG(hw, E1000_RDLEN(0));
++	regs_buff[4]  = E1000_READ_REG(hw, E1000_RDH(0));
++	regs_buff[5]  = E1000_READ_REG(hw, E1000_RDT(0));
++	regs_buff[6]  = E1000_READ_REG(hw, E1000_RDTR);
++
++	regs_buff[7]  = E1000_READ_REG(hw, E1000_TCTL);
++	regs_buff[8]  = E1000_READ_REG(hw, E1000_TDLEN(0));
++	regs_buff[9]  = E1000_READ_REG(hw, E1000_TDH(0));
++	regs_buff[10] = E1000_READ_REG(hw, E1000_TDT(0));
++	regs_buff[11] = E1000_READ_REG(hw, E1000_TIDV);
++
++	regs_buff[12] = adapter->hw.phy.type;  /* PHY type (IGP=1, M88=0) */
++	if (hw->phy.type == e1000_phy_igp) {
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
++				    IGP01E1000_PHY_AGC_A);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_A &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[13] = (u32)phy_data; /* cable length */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
++				    IGP01E1000_PHY_AGC_B);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_B &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[14] = (u32)phy_data; /* cable length */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
++				    IGP01E1000_PHY_AGC_C);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_C &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[15] = (u32)phy_data; /* cable length */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
++				    IGP01E1000_PHY_AGC_D);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_D &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[16] = (u32)phy_data; /* cable length */
++		regs_buff[17] = 0; /* extended 10bt distance (not needed) */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[18] = (u32)phy_data; /* cable polarity */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
++				    IGP01E1000_PHY_PCS_INIT_REG);
++		e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG &
++				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
++		regs_buff[19] = (u32)phy_data; /* cable polarity */
++		regs_buff[20] = 0; /* polarity correction enabled (always) */
++		regs_buff[22] = 0; /* phy receive errors (unavailable) */
++		regs_buff[23] = regs_buff[18]; /* mdix mode */
++		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0);
++	} else {
++		e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++		regs_buff[13] = (u32)phy_data; /* cable length */
++		regs_buff[14] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
++		regs_buff[15] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
++		regs_buff[16] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
++		e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++		regs_buff[17] = (u32)phy_data; /* extended 10bt distance */
++		regs_buff[18] = regs_buff[13]; /* cable polarity */
++		regs_buff[19] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
++		regs_buff[20] = regs_buff[17]; /* polarity correction */
++		/* phy receive errors */
++		regs_buff[22] = adapter->phy_stats.receive_errors;
++		regs_buff[23] = regs_buff[13]; /* mdix mode */
++	}
++	regs_buff[21] = adapter->phy_stats.idle_errors;  /* phy idle errors */
++	e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
++	regs_buff[24] = (u32)phy_data;  /* phy local receiver status */
++	regs_buff[25] = regs_buff[24];  /* phy remote receiver status */
++	if (hw->mac.type >= e1000_82540 &&
++	    hw->mac.type < e1000_82571 &&
++	    hw->phy.media_type == e1000_media_type_copper) {
++		regs_buff[26] = E1000_READ_REG(hw, E1000_MANC);
++	}
++}
++
++static int e1000_get_eeprom_len(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	return adapter->hw.nvm.word_size * 2;
++}
++
++static int e1000_get_eeprom(struct net_device *netdev,
++                            struct ethtool_eeprom *eeprom, u8 *bytes)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u16 *eeprom_buff;
++	int first_word, last_word;
++	int ret_val = 0;
++	u16 i;
++
++	if (eeprom->len == 0)
++		return -EINVAL;
++
++	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
++
++	first_word = eeprom->offset >> 1;
++	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
++
++	eeprom_buff = kmalloc(sizeof(u16) *
++			(last_word - first_word + 1), GFP_KERNEL);
++	if (!eeprom_buff)
++		return -ENOMEM;
++
++	if (hw->nvm.type == e1000_nvm_eeprom_spi)
++		ret_val = e1000_read_nvm(hw, first_word,
++		                         last_word - first_word + 1,
++		                         eeprom_buff);
++	else {
++		for (i = 0; i < last_word - first_word + 1; i++)
++			if ((ret_val = e1000_read_nvm(hw, first_word + i, 1,
++			                              &eeprom_buff[i])))
++				break;
++	}
++
++	/* Device's eeprom is always little-endian, word addressable */
++	for (i = 0; i < last_word - first_word + 1; i++)
++		le16_to_cpus(&eeprom_buff[i]);
++
++	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
++			eeprom->len);
++	kfree(eeprom_buff);
++
++	return ret_val;
++}
++
++static int e1000_set_eeprom(struct net_device *netdev,
++                            struct ethtool_eeprom *eeprom, u8 *bytes)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u16 *eeprom_buff;
++	void *ptr;
++	int max_len, first_word, last_word, ret_val = 0;
++	u16 i;
++
++	if (eeprom->len == 0)
++		return -EOPNOTSUPP;
++
++	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
++		return -EFAULT;
++
++	max_len = hw->nvm.word_size * 2;
++
++	first_word = eeprom->offset >> 1;
++	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
++	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
++	if (!eeprom_buff)
++		return -ENOMEM;
++
++	ptr = (void *)eeprom_buff;
++
++	if (eeprom->offset & 1) {
++		/* need read/modify/write of first changed EEPROM word */
++		/* only the second byte of the word is being modified */
++		ret_val = e1000_read_nvm(hw, first_word, 1,
++					    &eeprom_buff[0]);
++		ptr++;
++	}
++	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
++		/* need read/modify/write of last changed EEPROM word */
++		/* only the first byte of the word is being modified */
++		ret_val = e1000_read_nvm(hw, last_word, 1,
++		                  &eeprom_buff[last_word - first_word]);
++	}
++
++	/* Device's eeprom is always little-endian, word addressable */
++	for (i = 0; i < last_word - first_word + 1; i++)
++		le16_to_cpus(&eeprom_buff[i]);
++
++	memcpy(ptr, bytes, eeprom->len);
++
++	for (i = 0; i < last_word - first_word + 1; i++)
++		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
++
++	ret_val = e1000_write_nvm(hw, first_word,
++	                          last_word - first_word + 1, eeprom_buff);
++
++	/* Update the checksum over the first part of the EEPROM if needed
++	 * and flush shadow RAM for 82573 controllers */
++	if ((ret_val == 0) && ((first_word <= NVM_CHECKSUM_REG) ||
++				(hw->mac.type == e1000_82573)))
++		e1000_update_nvm_checksum(hw);
++
++	kfree(eeprom_buff);
++	return ret_val;
++}
++
++static void e1000_get_drvinfo(struct net_device *netdev,
++                              struct ethtool_drvinfo *drvinfo)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	char firmware_version[32];
++	u16 eeprom_data;
++
++	strncpy(drvinfo->driver,  e1000_driver_name, 32);
++	strncpy(drvinfo->version, e1000_driver_version, 32);
++
++	/* EEPROM image version # is reported as firmware version # for
++	 * 8257{1|2|3} controllers */
++	e1000_read_nvm(&adapter->hw, 5, 1, &eeprom_data);
++	switch (adapter->hw.mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_82573:
++	case e1000_80003es2lan:
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		sprintf(firmware_version, "%d.%d-%d",
++			(eeprom_data & 0xF000) >> 12,
++			(eeprom_data & 0x0FF0) >> 4,
++			eeprom_data & 0x000F);
++		break;
++	default:
++		sprintf(firmware_version, "N/A");
++	}
++
++	strncpy(drvinfo->fw_version, firmware_version, 32);
++	strncpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
++	drvinfo->n_stats = E1000_STATS_LEN;
++	drvinfo->testinfo_len = E1000_TEST_LEN;
++	drvinfo->regdump_len = e1000_get_regs_len(netdev);
++	drvinfo->eedump_len = e1000_get_eeprom_len(netdev);
++}
++
++static void e1000_get_ringparam(struct net_device *netdev,
++                                struct ethtool_ringparam *ring)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	e1000_mac_type mac_type = adapter->hw.mac.type;
++	struct e1000_tx_ring *tx_ring = adapter->tx_ring;
++	struct e1000_rx_ring *rx_ring = adapter->rx_ring;
++
++	ring->rx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_RXD :
++		E1000_MAX_82544_RXD;
++	ring->tx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_TXD :
++		E1000_MAX_82544_TXD;
++	ring->rx_mini_max_pending = 0;
++	ring->rx_jumbo_max_pending = 0;
++	ring->rx_pending = rx_ring->count;
++	ring->tx_pending = tx_ring->count;
++	ring->rx_mini_pending = 0;
++	ring->rx_jumbo_pending = 0;
++}
++
++static int e1000_set_ringparam(struct net_device *netdev,
++                               struct ethtool_ringparam *ring)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	e1000_mac_type mac_type = adapter->hw.mac.type;
++	struct e1000_tx_ring *tx_ring, *tx_old;
++	struct e1000_rx_ring *rx_ring, *rx_old;
++	int i, err, tx_ring_size, rx_ring_size;
++
++	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
++		return -EINVAL;
++
++	tx_ring_size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues;
++	rx_ring_size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues;
++
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		msleep(1);
++
++	if (netif_running(adapter->netdev))
++		e1000_down(adapter);
++
++	tx_old = adapter->tx_ring;
++	rx_old = adapter->rx_ring;
++
++	err = -ENOMEM;
++	tx_ring = kzalloc(tx_ring_size, GFP_KERNEL);
++	if (!tx_ring)
++		goto err_alloc_tx;
++	/* use a memcpy to save any previously configured
++	 * items like napi structs from having to be
++	 * reinitialized */
++	memcpy(tx_ring, tx_old, tx_ring_size);
++
++	rx_ring = kzalloc(rx_ring_size, GFP_KERNEL);
++	if (!rx_ring)
++		goto err_alloc_rx;
++	memcpy(rx_ring, rx_old, rx_ring_size);
++
++	adapter->tx_ring = tx_ring;
++	adapter->rx_ring = rx_ring;
++
++	rx_ring->count = max(ring->rx_pending,(u32)E1000_MIN_RXD);
++	rx_ring->count = min(rx_ring->count,(u32)(mac_type < e1000_82544 ?
++		E1000_MAX_RXD : E1000_MAX_82544_RXD));
++	rx_ring->count = ALIGN(rx_ring->count, REQ_RX_DESCRIPTOR_MULTIPLE);
++
++	tx_ring->count = max(ring->tx_pending,(u32)E1000_MIN_TXD);
++	tx_ring->count = min(tx_ring->count,(u32)(mac_type < e1000_82544 ?
++		E1000_MAX_TXD : E1000_MAX_82544_TXD));
++	tx_ring->count = ALIGN(tx_ring->count, REQ_TX_DESCRIPTOR_MULTIPLE);
++
++	/* overwrite the counts with the new values */
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		tx_ring[i].count = tx_ring->count;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		rx_ring[i].count = rx_ring->count;
++
++	if (netif_running(adapter->netdev)) {
++		/* Try to get new resources before deleting old */
++		if ((err = e1000_setup_all_rx_resources(adapter)))
++			goto err_setup_rx;
++		if ((err = e1000_setup_all_tx_resources(adapter)))
++			goto err_setup_tx;
++
++		/* restore the old in order to free it,
++		 * then add in the new */
++		adapter->rx_ring = rx_old;
++		adapter->tx_ring = tx_old;
++		e1000_free_all_rx_resources(adapter);
++		e1000_free_all_tx_resources(adapter);
++		kfree(tx_old);
++		kfree(rx_old);
++		adapter->rx_ring = rx_ring;
++		adapter->tx_ring = tx_ring;
++		if ((err = e1000_up(adapter)))
++			goto err_setup;
++	}
++
++	clear_bit(__E1000_RESETTING, &adapter->state);
++	return 0;
++err_setup_tx:
++	e1000_free_all_rx_resources(adapter);
++err_setup_rx:
++	adapter->rx_ring = rx_old;
++	adapter->tx_ring = tx_old;
++	kfree(rx_ring);
++err_alloc_rx:
++	kfree(tx_ring);
++err_alloc_tx:
++	e1000_up(adapter);
++err_setup:
++	clear_bit(__E1000_RESETTING, &adapter->state);
++	return err;
++}
++
++static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data,
++			     int reg, int offset, u32 mask, u32 write)
++{                                                                              \
++	u32 pat, val;
++	static const u32 test[] =
++		{0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
++	for (pat = 0; pat < ARRAY_SIZE(test); pat++) {
++		E1000_WRITE_REG_ARRAY(&adapter->hw, reg, offset,
++		                      (test[pat] & write));
++		val = E1000_READ_REG_ARRAY(&adapter->hw, reg, offset);
++		if (val != (test[pat] & write & mask)) {
++			DPRINTK(DRV, ERR, "pattern test reg %04X failed: got "
++			        "0x%08X expected 0x%08X\n",
++			        E1000_REGISTER(&adapter->hw, reg) + offset,
++			        val, (test[pat] & write & mask));
++			*data = E1000_REGISTER(&adapter->hw, reg);
++			return 1;
++		}
++	}
++	return 0;
++}
++
++static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data,
++			      int reg, u32 mask, u32 write)
++{
++	u32 val;
++	E1000_WRITE_REG(&adapter->hw, reg, write & mask);
++	val = E1000_READ_REG(&adapter->hw, reg);
++	if ((write & mask) != (val & mask)) {
++		DPRINTK(DRV, ERR, "set/check reg %04X test failed: got 0x%08X"
++		        "expected 0x%08X\n", reg, (val & mask), (write & mask));
++		*data = E1000_REGISTER(&adapter->hw, reg);
++		return 1;
++	}
++	return 0;
++}
++#define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write)                       \
++	do {                                                                   \
++		if (reg_pattern_test(adapter, data, reg, offset, mask, write)) \
++			return 1;                                              \
++	} while (0)
++#define REG_PATTERN_TEST(reg, mask, write)                                     \
++	REG_PATTERN_TEST_ARRAY(reg, 0, mask, write)
++
++#define REG_SET_AND_CHECK(reg, mask, write)                                    \
++	do {                                                                   \
++		if (reg_set_and_check(adapter, data, reg, mask, write))       \
++			return 1;                                              \
++	} while (0)
++
++static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
++{
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	u32 value, before, after;
++	u32 i, toggle;
++
++	/* The status register is Read Only, so a write should fail.
++	 * Some bits that get toggled are ignored.
++	 */
++	switch (mac->type) {
++	/* there are several bits on newer hardware that are r/w */
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++		toggle = 0x7FFFF3FF;
++		break;
++	case e1000_82573:
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		toggle = 0x7FFFF033;
++		break;
++	default:
++		toggle = 0xFFFFF833;
++		break;
++	}
++
++	before = E1000_READ_REG(&adapter->hw, E1000_STATUS);
++	value = (E1000_READ_REG(&adapter->hw, E1000_STATUS) & toggle);
++	E1000_WRITE_REG(&adapter->hw, E1000_STATUS, toggle);
++	after = E1000_READ_REG(&adapter->hw, E1000_STATUS) & toggle;
++	if (value != after) {
++		DPRINTK(DRV, ERR, "failed STATUS register test got: "
++		        "0x%08X expected: 0x%08X\n", after, value);
++		*data = 1;
++		return 1;
++	}
++	/* restore previous status */
++	E1000_WRITE_REG(&adapter->hw, E1000_STATUS, before);
++
++	if ((mac->type != e1000_ich8lan) &&
++	    (mac->type != e1000_ich9lan)) {
++		REG_PATTERN_TEST(E1000_FCAL, 0xFFFFFFFF, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_FCAH, 0x0000FFFF, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_FCT, 0x0000FFFF, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_VET, 0x0000FFFF, 0xFFFFFFFF);
++	}
++
++	REG_PATTERN_TEST(E1000_RDTR, 0x0000FFFF, 0xFFFFFFFF);
++	REG_PATTERN_TEST(E1000_RDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF);
++	REG_PATTERN_TEST(E1000_RDLEN(0), 0x000FFF80, 0x000FFFFF);
++	REG_PATTERN_TEST(E1000_RDH(0), 0x0000FFFF, 0x0000FFFF);
++	REG_PATTERN_TEST(E1000_RDT(0), 0x0000FFFF, 0x0000FFFF);
++	REG_PATTERN_TEST(E1000_FCRTH, 0x0000FFF8, 0x0000FFF8);
++	REG_PATTERN_TEST(E1000_FCTTV, 0x0000FFFF, 0x0000FFFF);
++	REG_PATTERN_TEST(E1000_TIPG, 0x3FFFFFFF, 0x3FFFFFFF);
++	REG_PATTERN_TEST(E1000_TDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF);
++	REG_PATTERN_TEST(E1000_TDLEN(0), 0x000FFF80, 0x000FFFFF);
++
++	REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x00000000);
++
++	before = (((mac->type == e1000_ich8lan) ||
++		   (mac->type == e1000_ich9lan)) ? 0x06C3B33E : 0x06DFB3FE);
++	REG_SET_AND_CHECK(E1000_RCTL, before, 0x003FFFFB);
++	REG_SET_AND_CHECK(E1000_TCTL, 0xFFFFFFFF, 0x00000000);
++
++	if (mac->type >= e1000_82543) {
++
++		REG_SET_AND_CHECK(E1000_RCTL, before, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_RDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF);
++		if ((mac->type != e1000_ich8lan) &&
++		    (mac->type != e1000_ich9lan))
++			REG_PATTERN_TEST(E1000_TXCW, 0xC000FFFF, 0x0000FFFF);
++		REG_PATTERN_TEST(E1000_TDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_TIDV, 0x0000FFFF, 0x0000FFFF);
++		for (i = 0; i < mac->rar_entry_count; i++) {
++			REG_PATTERN_TEST_ARRAY(E1000_RA, ((i << 1) + 1),
++			                       0x8003FFFF, 0xFFFFFFFF);
++		}
++
++	} else {
++
++		REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x01FFFFFF);
++		REG_PATTERN_TEST(E1000_RDBAL(0), 0xFFFFF000, 0xFFFFFFFF);
++		REG_PATTERN_TEST(E1000_TXCW, 0x0000FFFF, 0x0000FFFF);
++		REG_PATTERN_TEST(E1000_TDBAL(0), 0xFFFFF000, 0xFFFFFFFF);
++
++	}
++
++	for (i = 0; i < mac->mta_reg_count; i++)
++		REG_PATTERN_TEST_ARRAY(E1000_MTA, i, 0xFFFFFFFF, 0xFFFFFFFF);
++
++	*data = 0;
++	return 0;
++}
++
++static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data)
++{
++	u16 temp;
++	u16 checksum = 0;
++	u16 i;
++
++	*data = 0;
++	/* Read and add up the contents of the EEPROM */
++	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
++		if ((e1000_read_nvm(&adapter->hw, i, 1, &temp)) < 0) {
++			*data = 1;
++			break;
++		}
++		checksum += temp;
++	}
++
++	/* If Checksum is not Correct return error else test passed */
++	if ((checksum != (u16) NVM_SUM) && !(*data))
++		*data = 2;
++
++	return *data;
++}
++
++static irqreturn_t e1000_test_intr(int irq, void *data)
++{
++	struct net_device *netdev = (struct net_device *) data;
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	adapter->test_icr |= E1000_READ_REG(&adapter->hw, E1000_ICR);
++
++	return IRQ_HANDLED;
++}
++
++static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
++{
++	struct net_device *netdev = adapter->netdev;
++	u32 mask, i=0, shared_int = TRUE;
++	u32 irq = adapter->pdev->irq;
++
++	*data = 0;
++
++	/* NOTE: we don't test MSI interrupts here, yet */
++	/* Hook up test interrupt handler just for this test */
++	if (!request_irq(irq, &e1000_test_intr, IRQF_PROBE_SHARED, netdev->name,
++	                 netdev))
++		shared_int = FALSE;
++	else if (request_irq(irq, &e1000_test_intr, IRQF_SHARED,
++	         netdev->name, netdev)) {
++		*data = 1;
++		return -1;
++	}
++	DPRINTK(HW, INFO, "testing %s interrupt\n",
++	        (shared_int ? "shared" : "unshared"));
++
++	/* Disable all the interrupts */
++	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xFFFFFFFF);
++	msleep(10);
++
++	/* Test each interrupt */
++	for (; i < 10; i++) {
++
++		if (((adapter->hw.mac.type == e1000_ich8lan) ||
++		     (adapter->hw.mac.type == e1000_ich9lan)) && i == 8)
++			continue;
++
++		/* Interrupt to test */
++		mask = 1 << i;
++
++		if (!shared_int) {
++			/* Disable the interrupt to be reported in
++			 * the cause register and then force the same
++			 * interrupt and see if one gets posted.  If
++			 * an interrupt was posted to the bus, the
++			 * test failed.
++			 */
++			adapter->test_icr = 0;
++			E1000_WRITE_REG(&adapter->hw, E1000_IMC, mask);
++			E1000_WRITE_REG(&adapter->hw, E1000_ICS, mask);
++			msleep(10);
++
++			if (adapter->test_icr & mask) {
++				*data = 3;
++				break;
++			}
++		}
++
++		/* Enable the interrupt to be reported in
++		 * the cause register and then force the same
++		 * interrupt and see if one gets posted.  If
++		 * an interrupt was not posted to the bus, the
++		 * test failed.
++		 */
++		adapter->test_icr = 0;
++		E1000_WRITE_REG(&adapter->hw, E1000_IMS, mask);
++		E1000_WRITE_REG(&adapter->hw, E1000_ICS, mask);
++		msleep(10);
++
++		if (!(adapter->test_icr & mask)) {
++			*data = 4;
++			break;
++		}
++
++		if (!shared_int) {
++			/* Disable the other interrupts to be reported in
++			 * the cause register and then force the other
++			 * interrupts and see if any get posted.  If
++			 * an interrupt was posted to the bus, the
++			 * test failed.
++			 */
++			adapter->test_icr = 0;
++			E1000_WRITE_REG(&adapter->hw, E1000_IMC,
++			                ~mask & 0x00007FFF);
++			E1000_WRITE_REG(&adapter->hw, E1000_ICS,
++			                ~mask & 0x00007FFF);
++			msleep(10);
++
++			if (adapter->test_icr) {
++				*data = 5;
++				break;
++			}
++		}
++	}
++
++	/* Disable all the interrupts */
++	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xFFFFFFFF);
++	msleep(10);
++
++	/* Unhook test interrupt handler */
++	free_irq(irq, netdev);
++
++	return *data;
++}
++
++static void e1000_free_desc_rings(struct e1000_adapter *adapter)
++{
++	struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring;
++	struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring;
++	struct pci_dev *pdev = adapter->pdev;
++	int i;
++
++	if (tx_ring->desc && tx_ring->buffer_info) {
++		for (i = 0; i < tx_ring->count; i++) {
++			if (tx_ring->buffer_info[i].dma)
++				pci_unmap_single(pdev, tx_ring->buffer_info[i].dma,
++						 tx_ring->buffer_info[i].length,
++						 PCI_DMA_TODEVICE);
++			if (tx_ring->buffer_info[i].skb)
++				dev_kfree_skb(tx_ring->buffer_info[i].skb);
++		}
++	}
++
++	if (rx_ring->desc && rx_ring->buffer_info) {
++		for (i = 0; i < rx_ring->count; i++) {
++			if (rx_ring->buffer_info[i].dma)
++				pci_unmap_single(pdev, rx_ring->buffer_info[i].dma,
++						 E1000_RXBUFFER_2048,
++						 PCI_DMA_FROMDEVICE);
++			if (rx_ring->buffer_info[i].skb)
++				dev_kfree_skb(rx_ring->buffer_info[i].skb);
++		}
++	}
++
++	if (tx_ring->desc) {
++		pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma);
++		tx_ring->desc = NULL;
++	}
++	if (rx_ring->desc) {
++		pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
++		rx_ring->desc = NULL;
++	}
++
++	kfree(tx_ring->buffer_info);
++	tx_ring->buffer_info = NULL;
++	kfree(rx_ring->buffer_info);
++	rx_ring->buffer_info = NULL;
++
++	return;
++}
++
++static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
++{
++	struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring;
++	struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring;
++	struct pci_dev *pdev = adapter->pdev;
++	u32 rctl;
++	int i, ret_val;
++
++	/* Setup Tx descriptor ring and Tx buffers */
++
++	if (!tx_ring->count)
++		tx_ring->count = E1000_DEFAULT_TXD;
++
++	if (!(tx_ring->buffer_info = kcalloc(tx_ring->count,
++	                                     sizeof(struct e1000_buffer),
++	                                     GFP_KERNEL))) {
++		ret_val = 1;
++		goto err_nomem;
++	}
++
++	tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
++	tx_ring->size = ALIGN(tx_ring->size, 4096);
++	if (!(tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
++	                                           &tx_ring->dma))) {
++		ret_val = 2;
++		goto err_nomem;
++	}
++	tx_ring->next_to_use = tx_ring->next_to_clean = 0;
++
++	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0),
++			((u64) tx_ring->dma & 0x00000000FFFFFFFF));
++	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), ((u64) tx_ring->dma >> 32));
++	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0),
++			tx_ring->count * sizeof(struct e1000_tx_desc));
++	E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0);
++	E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0);
++	E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
++			E1000_TCTL_MULR |
++			E1000_TCTL_PSP | E1000_TCTL_EN |
++			E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT |
++			E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT);
++
++	for (i = 0; i < tx_ring->count; i++) {
++		struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*tx_ring, i);
++		struct sk_buff *skb;
++		unsigned int size = 1024;
++
++		if (!(skb = alloc_skb(size, GFP_KERNEL))) {
++			ret_val = 3;
++			goto err_nomem;
++		}
++		skb_put(skb, size);
++		tx_ring->buffer_info[i].skb = skb;
++		tx_ring->buffer_info[i].length = skb->len;
++		tx_ring->buffer_info[i].dma =
++			pci_map_single(pdev, skb->data, skb->len,
++				       PCI_DMA_TODEVICE);
++		tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
++		tx_desc->lower.data = cpu_to_le32(skb->len);
++		tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP |
++						   E1000_TXD_CMD_IFCS);
++		if (adapter->hw.mac.type < e1000_82543)
++			tx_desc->lower.data |= E1000_TXD_CMD_RPS;
++		else
++			tx_desc->lower.data |= E1000_TXD_CMD_RS;
++
++		tx_desc->upper.data = 0;
++	}
++
++	/* Setup Rx descriptor ring and Rx buffers */
++
++	if (!rx_ring->count)
++		rx_ring->count = E1000_DEFAULT_RXD;
++
++	if (!(rx_ring->buffer_info = kcalloc(rx_ring->count,
++	                                     sizeof(struct e1000_rx_buffer),
++	                                     GFP_KERNEL))) {
++		ret_val = 4;
++		goto err_nomem;
++	}
++
++	rx_ring->size = rx_ring->count * sizeof(struct e1000_rx_desc);
++	if (!(rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
++	                                           &rx_ring->dma))) {
++		ret_val = 5;
++		goto err_nomem;
++	}
++	rx_ring->next_to_use = rx_ring->next_to_clean = 0;
++
++	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
++	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0),
++			((u64) rx_ring->dma & 0xFFFFFFFF));
++	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), ((u64) rx_ring->dma >> 32));
++	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), rx_ring->size);
++	E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0);
++	E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), 0);
++	rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 |
++		E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
++		(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
++	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
++
++	for (i = 0; i < rx_ring->count; i++) {
++		struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rx_ring, i);
++		struct sk_buff *skb;
++
++		if (!(skb = alloc_skb(E1000_RXBUFFER_2048 + NET_IP_ALIGN,
++				GFP_KERNEL))) {
++			ret_val = 6;
++			goto err_nomem;
++		}
++		skb_reserve(skb, NET_IP_ALIGN);
++		rx_ring->buffer_info[i].skb = skb;
++		rx_ring->buffer_info[i].dma =
++			pci_map_single(pdev, skb->data, E1000_RXBUFFER_2048,
++				       PCI_DMA_FROMDEVICE);
++		rx_desc->buffer_addr = cpu_to_le64(rx_ring->buffer_info[i].dma);
++		memset(skb->data, 0x00, skb->len);
++	}
++
++	return 0;
++
++err_nomem:
++	e1000_free_desc_rings(adapter);
++	return ret_val;
++}
++
++static void e1000_phy_disable_receiver(struct e1000_adapter *adapter)
++{
++	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
++	e1000_write_phy_reg(&adapter->hw, 29, 0x001F);
++	e1000_write_phy_reg(&adapter->hw, 30, 0x8FFC);
++	e1000_write_phy_reg(&adapter->hw, 29, 0x001A);
++	e1000_write_phy_reg(&adapter->hw, 30, 0x8FF0);
++}
++
++static void e1000_phy_reset_clk_and_crs(struct e1000_adapter *adapter)
++{
++	u16 phy_reg;
++
++	/* Because we reset the PHY above, we need to re-force TX_CLK in the
++	 * Extended PHY Specific Control Register to 25MHz clock.  This
++	 * value defaults back to a 2.5MHz clock when the PHY is reset.
++	 */
++	e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg);
++	phy_reg |= M88E1000_EPSCR_TX_CLK_25;
++	e1000_write_phy_reg(&adapter->hw,
++		M88E1000_EXT_PHY_SPEC_CTRL, phy_reg);
++
++	/* In addition, because of the s/w reset above, we need to enable
++	 * CRS on TX.  This must be set for both full and half duplex
++	 * operation.
++	 */
++	e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg);
++	phy_reg |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++	e1000_write_phy_reg(&adapter->hw,
++		M88E1000_PHY_SPEC_CTRL, phy_reg);
++}
++
++static int e1000_nonintegrated_phy_loopback(struct e1000_adapter *adapter)
++{
++	u32 ctrl_reg;
++	u16 phy_reg;
++
++	/* Setup the Device Control Register for PHY loopback test. */
++
++	ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++	ctrl_reg |= (E1000_CTRL_ILOS |		/* Invert Loss-Of-Signal */
++		     E1000_CTRL_FRCSPD |	/* Set the Force Speed Bit */
++		     E1000_CTRL_FRCDPX |	/* Set the Force Duplex Bit */
++		     E1000_CTRL_SPD_1000 |	/* Force Speed to 1000 */
++		     E1000_CTRL_FD);		/* Force Duplex to FULL */
++
++	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl_reg);
++
++	/* Read the PHY Specific Control Register (0x10) */
++	e1000_read_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, &phy_reg);
++
++	/* Clear Auto-Crossover bits in PHY Specific Control Register
++	 * (bits 6:5).
++	 */
++	phy_reg &= ~M88E1000_PSCR_AUTO_X_MODE;
++	e1000_write_phy_reg(&adapter->hw, M88E1000_PHY_SPEC_CTRL, phy_reg);
++
++	/* Perform software reset on the PHY */
++	e1000_phy_commit(&adapter->hw);
++
++	/* Have to setup TX_CLK and TX_CRS after software reset */
++	e1000_phy_reset_clk_and_crs(adapter);
++
++	e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x8100);
++
++	/* Wait for reset to complete. */
++	udelay(500);
++
++	/* Have to setup TX_CLK and TX_CRS after software reset */
++	e1000_phy_reset_clk_and_crs(adapter);
++
++	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
++	e1000_phy_disable_receiver(adapter);
++
++	/* Set the loopback bit in the PHY control register. */
++	e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg);
++	phy_reg |= MII_CR_LOOPBACK;
++	e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_reg);
++
++	/* Setup TX_CLK and TX_CRS one more time. */
++	e1000_phy_reset_clk_and_crs(adapter);
++
++	/* Check Phy Configuration */
++	e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg);
++	if (phy_reg != 0x4100)
++		 return 9;
++
++	e1000_read_phy_reg(&adapter->hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg);
++	if (phy_reg != 0x0070)
++		return 10;
++
++	e1000_read_phy_reg(&adapter->hw, 29, &phy_reg);
++	if (phy_reg != 0x001A)
++		return 11;
++
++	return 0;
++}
++
++static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
++{
++	u32 ctrl_reg = 0;
++	u32 stat_reg = 0;
++
++	adapter->hw.mac.autoneg = FALSE;
++
++	if (adapter->hw.phy.type == e1000_phy_m88) {
++		/* Auto-MDI/MDIX Off */
++		e1000_write_phy_reg(&adapter->hw,
++				    M88E1000_PHY_SPEC_CTRL, 0x0808);
++		/* reset to update Auto-MDI/MDIX */
++		e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x9140);
++		/* autoneg off */
++		e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x8140);
++	} else if (adapter->hw.phy.type == e1000_phy_gg82563)
++		e1000_write_phy_reg(&adapter->hw,
++		                    GG82563_PHY_KMRN_MODE_CTRL,
++		                    0x1CC);
++
++	ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++
++	if (adapter->hw.phy.type == e1000_phy_ife) {
++		/* force 100, set loopback */
++		e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x6100);
++
++		/* Now set up the MAC to the same speed/duplex as the PHY. */
++		ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
++		ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
++			     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
++			     E1000_CTRL_SPD_100 |/* Force Speed to 100 */
++			     E1000_CTRL_FD);	 /* Force Duplex to FULL */
++	} else {
++		/* force 1000, set loopback */
++		e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, 0x4140);
++
++		/* Now set up the MAC to the same speed/duplex as the PHY. */
++		ctrl_reg = E1000_READ_REG(&adapter->hw, E1000_CTRL);
++		ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
++		ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
++			     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
++			     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
++			     E1000_CTRL_FD);	 /* Force Duplex to FULL */
++	}
++
++	if (adapter->hw.phy.media_type == e1000_media_type_copper &&
++	   adapter->hw.phy.type == e1000_phy_m88) {
++		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
++	} else {
++		/* Set the ILOS bit on the fiber Nic if half duplex link is
++		 * detected. */
++		stat_reg = E1000_READ_REG(&adapter->hw, E1000_STATUS);
++		if ((stat_reg & E1000_STATUS_FD) == 0)
++			ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
++	}
++
++	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl_reg);
++
++	/* Disable the receiver on the PHY so when a cable is plugged in, the
++	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
++	 */
++	if (adapter->hw.phy.type == e1000_phy_m88)
++		e1000_phy_disable_receiver(adapter);
++
++	udelay(500);
++
++	return 0;
++}
++
++static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	int link = 0;
++
++	/* special requirements for 82571/82572 fiber adapters */
++
++	/* jump through hoops to make sure link is up because serdes
++	 * link is hardwired up */
++	ctrl |= E1000_CTRL_SLU;
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	/* disable autoneg */
++	ctrl = E1000_READ_REG(hw, E1000_TXCW);
++	ctrl &= ~(1 << 31);
++	E1000_WRITE_REG(hw, E1000_TXCW, ctrl);
++
++	link = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU);
++
++	if (!link) {
++		/* set invert loss of signal */
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl |= E1000_CTRL_ILOS;
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	}
++
++	/* special write to serdes control register to enable SerDes analog
++	 * loopback */
++#define E1000_SERDES_LB_ON 0x410
++	E1000_WRITE_REG(hw, E1000_SCTL, E1000_SERDES_LB_ON);
++	msleep(10);
++
++	return 0;
++}
++
++static int e1000_set_phy_loopback(struct e1000_adapter *adapter)
++{
++	u16 phy_reg = 0;
++	u16 count = 0;
++
++	switch (adapter->hw.mac.type) {
++	case e1000_82543:
++		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++			/* Attempt to setup Loopback mode on Non-integrated PHY.
++			 * Some PHY registers get corrupted at random, so
++			 * attempt this 10 times.
++			 */
++			while (e1000_nonintegrated_phy_loopback(adapter) &&
++			      count++ < 10);
++			if (count < 11)
++				return 0;
++		}
++		break;
++
++	case e1000_82544:
++	case e1000_82540:
++	case e1000_82545:
++	case e1000_82545_rev_3:
++	case e1000_82546:
++	case e1000_82546_rev_3:
++	case e1000_82541:
++	case e1000_82541_rev_2:
++	case e1000_82547:
++	case e1000_82547_rev_2:
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_82573:
++	case e1000_80003es2lan:
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		return e1000_integrated_phy_loopback(adapter);
++		break;
++
++	default:
++		/* Default PHY loopback work is to read the MII
++		 * control register and assert bit 14 (loopback mode).
++		 */
++		e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_reg);
++		phy_reg |= MII_CR_LOOPBACK;
++		e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_reg);
++		return 0;
++		break;
++	}
++
++	return 8;
++}
++
++/* only call this for fiber/serdes connections to es2lan */
++static int e1000_set_es2lan_mac_loopback(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrlext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/* save CTRL_EXT to restore later, reuse an empty variable (unused
++	   on mac_type 80003es2lan) */
++	adapter->tx_fifo_head = ctrlext;
++
++	/* clear the serdes mode bits, putting the device into mac loopback */
++	ctrlext &= ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrlext);
++
++	/* force speed to 1000/FD, link up */
++	ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++	ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX |
++	         E1000_CTRL_SPD_1000 | E1000_CTRL_FD);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	/* set mac loopback */
++	ctrl = E1000_READ_REG(hw, E1000_RCTL);
++	ctrl |= E1000_RCTL_LBM_MAC;
++	E1000_WRITE_REG(hw, E1000_RCTL, ctrl);
++
++	/* set testing mode parameters (no need to reset later) */
++#define KMRNCTRLSTA_OPMODE (0x1F << 16)
++#define KMRNCTRLSTA_OPMODE_1GB_FD_GMII 0x0582
++	E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA,
++		(KMRNCTRLSTA_OPMODE | KMRNCTRLSTA_OPMODE_1GB_FD_GMII));
++
++	return 0;
++}
++
++static int e1000_setup_loopback_test(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	if (hw->phy.media_type == e1000_media_type_fiber ||
++	    hw->phy.media_type == e1000_media_type_internal_serdes) {
++		switch (hw->mac.type) {
++		case e1000_80003es2lan:
++			return e1000_set_es2lan_mac_loopback(adapter);
++			break;
++		case e1000_82545:
++		case e1000_82546:
++		case e1000_82545_rev_3:
++		case e1000_82546_rev_3:
++			return e1000_set_phy_loopback(adapter);
++			break;
++		case e1000_82571:
++		case e1000_82572:
++			return e1000_set_82571_fiber_loopback(adapter);
++			break;
++		default:
++			rctl = E1000_READ_REG(hw, E1000_RCTL);
++			rctl |= E1000_RCTL_LBM_TCVR;
++			E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++			return 0;
++		}
++	} else if (hw->phy.media_type == e1000_media_type_copper)
++		return e1000_set_phy_loopback(adapter);
++
++	return 7;
++}
++
++static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++	u16 phy_reg;
++
++	rctl = E1000_READ_REG(hw, E1000_RCTL);
++	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
++	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++
++	switch (hw->mac.type) {
++	case e1000_80003es2lan:
++		if (hw->phy.media_type == e1000_media_type_fiber ||
++		    hw->phy.media_type == e1000_media_type_internal_serdes) {
++			/* restore CTRL_EXT, stealing space from tx_fifo_head */
++			E1000_WRITE_REG(hw, E1000_CTRL_EXT, adapter->tx_fifo_head);
++			adapter->tx_fifo_head = 0;
++		}
++		/* fall through */
++	case e1000_82571:
++	case e1000_82572:
++		if (hw->phy.media_type == e1000_media_type_fiber ||
++		    hw->phy.media_type == e1000_media_type_internal_serdes) {
++#define E1000_SERDES_LB_OFF 0x400
++			E1000_WRITE_REG(hw, E1000_SCTL, E1000_SERDES_LB_OFF);
++			msleep(10);
++			break;
++		}
++		/* Fall Through */
++	case e1000_82545:
++	case e1000_82546:
++	case e1000_82545_rev_3:
++	case e1000_82546_rev_3:
++	default:
++		hw->mac.autoneg = TRUE;
++		if (hw->phy.type == e1000_phy_gg82563)
++			e1000_write_phy_reg(hw,
++					    GG82563_PHY_KMRN_MODE_CTRL,
++					    0x180);
++		e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg);
++		if (phy_reg & MII_CR_LOOPBACK) {
++			phy_reg &= ~MII_CR_LOOPBACK;
++			e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg);
++			e1000_phy_commit(hw);
++		}
++		break;
++	}
++}
++
++static void e1000_create_lbtest_frame(struct sk_buff *skb,
++                                      unsigned int frame_size)
++{
++	memset(skb->data, 0xFF, frame_size);
++	frame_size &= ~1;
++	memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
++	memset(&skb->data[frame_size / 2 + 10], 0xBE, 1);
++	memset(&skb->data[frame_size / 2 + 12], 0xAF, 1);
++}
++
++static int e1000_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size)
++{
++	frame_size &= ~1;
++	if (*(skb->data + 3) == 0xFF) {
++		if ((*(skb->data + frame_size / 2 + 10) == 0xBE) &&
++		   (*(skb->data + frame_size / 2 + 12) == 0xAF)) {
++			return 0;
++		}
++	}
++	return 13;
++}
++
++static int e1000_run_loopback_test(struct e1000_adapter *adapter)
++{
++	struct e1000_tx_ring *tx_ring = &adapter->test_tx_ring;
++	struct e1000_rx_ring *rx_ring = &adapter->test_rx_ring;
++	struct pci_dev *pdev = adapter->pdev;
++	int i, j, k, l, lc, good_cnt, ret_val=0;
++	unsigned long time;
++
++	E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rx_ring->count - 1);
++
++	/* Calculate the loop count based on the largest descriptor ring
++	 * The idea is to wrap the largest ring a number of times using 64
++	 * send/receive pairs during each loop
++	 */
++
++	if (rx_ring->count <= tx_ring->count)
++		lc = ((tx_ring->count / 64) * 2) + 1;
++	else
++		lc = ((rx_ring->count / 64) * 2) + 1;
++
++	k = l = 0;
++	for (j = 0; j <= lc; j++) { /* loop count loop */
++		for (i = 0; i < 64; i++) { /* send the packets */
++			e1000_create_lbtest_frame(tx_ring->buffer_info[k].skb,
++					1024);
++			pci_dma_sync_single_for_device(pdev,
++					tx_ring->buffer_info[k].dma,
++				    	tx_ring->buffer_info[k].length,
++				    	PCI_DMA_TODEVICE);
++			if (unlikely(++k == tx_ring->count)) k = 0;
++		}
++		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), k);
++		msleep(200);
++		time = jiffies; /* set the start time for the receive */
++		good_cnt = 0;
++		do { /* receive the sent packets */
++			pci_dma_sync_single_for_cpu(pdev,
++			                rx_ring->buffer_info[l].dma,
++			                E1000_RXBUFFER_2048,
++			                PCI_DMA_FROMDEVICE);
++
++			ret_val = e1000_check_lbtest_frame(
++					rx_ring->buffer_info[l].skb,
++				   	1024);
++			if (!ret_val)
++				good_cnt++;
++			if (unlikely(++l == rx_ring->count)) l = 0;
++			/* time + 20 msecs (200 msecs on 2.4) is more than
++			 * enough time to complete the receives, if it's
++			 * exceeded, break and error off
++			 */
++		} while (good_cnt < 64 && jiffies < (time + 20));
++		if (good_cnt != 64) {
++			ret_val = 13; /* ret_val is the same as mis-compare */
++			break;
++		}
++		if (jiffies >= (time + 20)) {
++			ret_val = 14; /* error code for time out error */
++			break;
++		}
++	} /* end loop count loop */
++	return ret_val;
++}
++
++static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data)
++{
++	/* PHY loopback cannot be performed if SoL/IDER
++	 * sessions are active */
++	if (e1000_check_reset_block(&adapter->hw)) {
++		DPRINTK(DRV, ERR, "Cannot do PHY loopback test "
++		        "when SoL/IDER is active.\n");
++		*data = 0;
++		goto out;
++	}
++
++	if ((*data = e1000_setup_desc_rings(adapter)))
++		goto out;
++	if ((*data = e1000_setup_loopback_test(adapter)))
++		goto err_loopback;
++	*data = e1000_run_loopback_test(adapter);
++	e1000_loopback_cleanup(adapter);
++
++err_loopback:
++	e1000_free_desc_rings(adapter);
++out:
++	return *data;
++}
++
++static int e1000_link_test(struct e1000_adapter *adapter, u64 *data)
++{
++	*data = 0;
++	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
++		int i = 0;
++		adapter->hw.mac.serdes_has_link = FALSE;
++
++		/* On some blade server designs, link establishment
++		 * could take as long as 2-3 minutes */
++		do {
++			e1000_check_for_link(&adapter->hw);
++			if (adapter->hw.mac.serdes_has_link == TRUE)
++				return *data;
++			msleep(20);
++		} while (i++ < 3750);
++
++		*data = 1;
++	} else {
++		e1000_check_for_link(&adapter->hw);
++		if (adapter->hw.mac.autoneg)
++			msleep(4000);
++
++		if (!(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
++			*data = 1;
++		}
++	}
++	return *data;
++}
++
++static int e1000_diag_test_count(struct net_device *netdev)
++{
++	return E1000_TEST_LEN;
++}
++
++static void e1000_diag_test(struct net_device *netdev,
++                            struct ethtool_test *eth_test, u64 *data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	u16 autoneg_advertised;
++	u8 forced_speed_duplex, autoneg;
++	bool if_running = netif_running(netdev);
++
++	set_bit(__E1000_TESTING, &adapter->state);
++	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
++		/* Offline tests */
++
++		/* save speed, duplex, autoneg settings */
++		autoneg_advertised = adapter->hw.phy.autoneg_advertised;
++		forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
++		autoneg = adapter->hw.mac.autoneg;
++
++		DPRINTK(HW, INFO, "offline testing starting\n");
++
++		/* Link test performed before hardware reset so autoneg doesn't
++		 * interfere with test result */
++		if (e1000_link_test(adapter, &data[4]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		if (if_running)
++			/* indicate we're in test mode */
++			dev_close(netdev);
++		else
++			e1000_reset(adapter);
++
++		if (e1000_reg_test(adapter, &data[0]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		e1000_reset(adapter);
++		if (e1000_eeprom_test(adapter, &data[1]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		e1000_reset(adapter);
++		if (e1000_intr_test(adapter, &data[2]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		e1000_reset(adapter);
++		/* make sure the phy is powered up */
++		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++			e1000_power_up_phy(&adapter->hw);
++			e1000_setup_link(&adapter->hw);
++		}
++		if (e1000_loopback_test(adapter, &data[3]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		/* restore speed, duplex, autoneg settings */
++		adapter->hw.phy.autoneg_advertised = autoneg_advertised;
++		adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
++		adapter->hw.mac.autoneg = autoneg;
++
++		/* force this routine to wait until autoneg complete/timeout */
++		adapter->hw.phy.autoneg_wait_to_complete = TRUE;
++		e1000_reset(adapter);
++		adapter->hw.phy.autoneg_wait_to_complete = FALSE;
++
++		clear_bit(__E1000_TESTING, &adapter->state);
++		if (if_running)
++			dev_open(netdev);
++	} else {
++		DPRINTK(HW, INFO, "online testing starting\n");
++		/* Online tests */
++		if (e1000_link_test(adapter, &data[4]))
++			eth_test->flags |= ETH_TEST_FL_FAILED;
++
++		/* Online tests aren't run; pass by default */
++		data[0] = 0;
++		data[1] = 0;
++		data[2] = 0;
++		data[3] = 0;
++
++		clear_bit(__E1000_TESTING, &adapter->state);
++	}
++	msleep_interruptible(4 * 1000);
++}
++
++static int e1000_wol_exclusion(struct e1000_adapter *adapter,
++                               struct ethtool_wolinfo *wol)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	int retval = 1; /* fail by default */
++
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82542:
++	case E1000_DEV_ID_82543GC_FIBER:
++	case E1000_DEV_ID_82543GC_COPPER:
++	case E1000_DEV_ID_82544EI_FIBER:
++	case E1000_DEV_ID_82546EB_QUAD_COPPER:
++	case E1000_DEV_ID_82545EM_FIBER:
++	case E1000_DEV_ID_82545EM_COPPER:
++	case E1000_DEV_ID_82546GB_QUAD_COPPER:
++	case E1000_DEV_ID_82546GB_PCIE:
++	case E1000_DEV_ID_82571EB_SERDES_QUAD:
++		/* these don't support WoL at all */
++		wol->supported = 0;
++		break;
++	case E1000_DEV_ID_82546EB_FIBER:
++	case E1000_DEV_ID_82546GB_FIBER:
++	case E1000_DEV_ID_82571EB_FIBER:
++	case E1000_DEV_ID_82571EB_SERDES:
++	case E1000_DEV_ID_82571EB_COPPER:
++		/* Wake events not supported on port B */
++		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) {
++			wol->supported = 0;
++			break;
++		}
++		/* return success for non excluded adapter ports */
++		retval = 0;
++		break;
++	case E1000_DEV_ID_82571EB_QUAD_COPPER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
++	case E1000_DEV_ID_82571PT_QUAD_COPPER:
++	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++		/* quad port adapters only support WoL on port A */
++		if (!(adapter->flags & E1000_FLAG_QUAD_PORT_A)) {
++			wol->supported = 0;
++			break;
++		}
++		/* return success for non excluded adapter ports */
++		retval = 0;
++		break;
++	default:
++		/* dual port cards only support WoL on port A from now on
++		 * unless it was enabled in the eeprom for port B
++		 * so exclude FUNC_1 ports from having WoL enabled */
++		if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1 &&
++		    !adapter->eeprom_wol) {
++			wol->supported = 0;
++			break;
++		}
++
++		retval = 0;
++	}
++
++	return retval;
++}
++
++static void e1000_get_wol(struct net_device *netdev,
++                          struct ethtool_wolinfo *wol)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	wol->supported = WAKE_UCAST | WAKE_MCAST |
++	                 WAKE_BCAST | WAKE_MAGIC;
++	wol->wolopts = 0;
++
++	/* this function will set ->supported = 0 and return 1 if wol is not
++	 * supported by this hardware */
++	if (e1000_wol_exclusion(adapter, wol))
++		return;
++
++	/* apply any specific unsupported masks here */
++	switch (adapter->hw.device_id) {
++	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++		/* KSP3 does not support UCAST wake-ups */
++		wol->supported &= ~WAKE_UCAST;
++
++		if (adapter->wol & E1000_WUFC_EX)
++			DPRINTK(DRV, ERR, "Interface does not support "
++		        "directed (unicast) frame wake-up packets\n");
++		break;
++	default:
++		break;
++	}
++
++	if (adapter->wol & E1000_WUFC_EX)
++		wol->wolopts |= WAKE_UCAST;
++	if (adapter->wol & E1000_WUFC_MC)
++		wol->wolopts |= WAKE_MCAST;
++	if (adapter->wol & E1000_WUFC_BC)
++		wol->wolopts |= WAKE_BCAST;
++	if (adapter->wol & E1000_WUFC_MAG)
++		wol->wolopts |= WAKE_MAGIC;
++
++	return;
++}
++
++static int e1000_set_wol(struct net_device *netdev,
++                         struct ethtool_wolinfo *wol)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
++		return -EOPNOTSUPP;
++
++	if (e1000_wol_exclusion(adapter, wol))
++		return wol->wolopts ? -EOPNOTSUPP : 0;
++
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++		if (wol->wolopts & WAKE_UCAST) {
++			DPRINTK(DRV, ERR, "Interface does not support "
++		        "directed (unicast) frame wake-up packets\n");
++			return -EOPNOTSUPP;
++		}
++		break;
++	default:
++		break;
++	}
++
++	/* these settings will always override what we currently have */
++	adapter->wol = 0;
++
++	if (wol->wolopts & WAKE_UCAST)
++		adapter->wol |= E1000_WUFC_EX;
++	if (wol->wolopts & WAKE_MCAST)
++		adapter->wol |= E1000_WUFC_MC;
++	if (wol->wolopts & WAKE_BCAST)
++		adapter->wol |= E1000_WUFC_BC;
++	if (wol->wolopts & WAKE_MAGIC)
++		adapter->wol |= E1000_WUFC_MAG;
++
++	return 0;
++}
++
++/* toggle LED 4 times per second = 2 "blinks" per second */
++#define E1000_ID_INTERVAL	(HZ/4)
++
++/* bit defines for adapter->led_status */
++#define E1000_LED_ON		0
++
++static void e1000_led_blink_callback(unsigned long data)
++{
++	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
++
++	if (test_and_change_bit(E1000_LED_ON, &adapter->led_status))
++		e1000_led_off(&adapter->hw);
++	else
++		e1000_led_on(&adapter->hw);
++
++	mod_timer(&adapter->blink_timer, jiffies + E1000_ID_INTERVAL);
++}
++
++static int e1000_phys_id(struct net_device *netdev, u32 data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	if (!data)
++		data = INT_MAX;
++
++	if (adapter->hw.mac.type < e1000_82571) {
++		if (!adapter->blink_timer.function) {
++			init_timer(&adapter->blink_timer);
++			adapter->blink_timer.function = e1000_led_blink_callback;
++			adapter->blink_timer.data = (unsigned long) adapter;
++		}
++		e1000_setup_led(&adapter->hw);
++		mod_timer(&adapter->blink_timer, jiffies);
++		msleep_interruptible(data * 1000);
++		del_timer_sync(&adapter->blink_timer);
++	} else if (adapter->hw.phy.type == e1000_phy_ife) {
++		if (!adapter->blink_timer.function) {
++			init_timer(&adapter->blink_timer);
++			adapter->blink_timer.function = e1000_led_blink_callback;
++			adapter->blink_timer.data = (unsigned long) adapter;
++		}
++		mod_timer(&adapter->blink_timer, jiffies);
++		msleep_interruptible(data * 1000);
++		del_timer_sync(&adapter->blink_timer);
++		e1000_write_phy_reg(&(adapter->hw), IFE_PHY_SPECIAL_CONTROL_LED, 0);
++	} else {
++		e1000_blink_led(&adapter->hw);
++		msleep_interruptible(data * 1000);
++	}
++
++	e1000_led_off(&adapter->hw);
++	clear_bit(E1000_LED_ON, &adapter->led_status);
++	e1000_cleanup_led(&adapter->hw);
++
++	return 0;
++}
++
++static int e1000_get_coalesce(struct net_device *netdev,
++			      struct ethtool_coalesce *ec)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	if (adapter->itr_setting <= 3)
++		ec->rx_coalesce_usecs = adapter->itr_setting;
++	else
++		ec->rx_coalesce_usecs = 1000000 / adapter->itr_setting;
++
++	return 0;
++}
++
++static int e1000_set_coalesce(struct net_device *netdev,
++			      struct ethtool_coalesce *ec)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++
++	if ((ec->rx_coalesce_usecs > E1000_MAX_ITR_USECS) ||
++	    ((ec->rx_coalesce_usecs > 3) &&
++	     (ec->rx_coalesce_usecs < E1000_MIN_ITR_USECS)) ||
++	    (ec->rx_coalesce_usecs == 2))
++		return -EINVAL;
++
++	if (!(adapter->flags & E1000_FLAG_HAS_INTR_MODERATION))
++		return -ENOTSUPP;
++
++	if (ec->rx_coalesce_usecs <= 3) {
++		adapter->itr = 20000;
++		adapter->itr_setting = ec->rx_coalesce_usecs;
++	} else {
++		adapter->itr = (1000000 / ec->rx_coalesce_usecs);
++		adapter->itr_setting = adapter->itr & ~3;
++	}
++
++	if (adapter->itr_setting != 0)
++		E1000_WRITE_REG(&adapter->hw, E1000_ITR,
++			1000000000 / (adapter->itr * 256));
++	else
++		E1000_WRITE_REG(&adapter->hw, E1000_ITR, 0);
++
++	return 0;
++}
++
++static int e1000_nway_reset(struct net_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++	if (netif_running(netdev))
++		e1000_reinit_locked(adapter);
++	return 0;
++}
++
++static int e1000_get_stats_count(struct net_device *netdev)
++{
++	return E1000_STATS_LEN;
++}
++
++static void e1000_get_ethtool_stats(struct net_device *netdev,
++                                    struct ethtool_stats *stats, u64 *data)
++{
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++#ifdef CONFIG_E1000_MQ
++	u64 *queue_stat;
++	int stat_count = sizeof(struct e1000_queue_stats) / sizeof(u64);
++	int j, k;
++#endif
++	int i;
++
++	e1000_update_stats(adapter);
++	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
++		char *p = (char *)adapter+e1000_gstrings_stats[i].stat_offset;
++		data[i] = (e1000_gstrings_stats[i].sizeof_stat ==
++			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
++	}
++#ifdef CONFIG_E1000_MQ
++	if (adapter->num_tx_queues > 1) {
++		for (j = 0; j < adapter->num_tx_queues; j++) {
++			queue_stat = (u64 *)&adapter->tx_ring[j].tx_stats;
++			for (k = 0; k < stat_count; k++)
++				data[i + k] = queue_stat[k];
++			i += k;
++		}
++	}
++	if (adapter->num_rx_queues > 1) {
++		for (j = 0; j < adapter->num_rx_queues; j++) {
++			queue_stat = (u64 *)&adapter->rx_ring[j].rx_stats;
++			for (k = 0; k < stat_count; k++)
++				data[i + k] = queue_stat[k];
++			i += k;
++		}
++	}
++#endif
++/*	BUG_ON(i != E1000_STATS_LEN); */
++}
++
++static void e1000_get_strings(struct net_device *netdev, u32 stringset,
++                              u8 *data)
++{
++#ifdef CONFIG_E1000_MQ
++	struct e1000_adapter *adapter = netdev_priv(netdev);
++#endif
++	u8 *p = data;
++	int i;
++
++	switch (stringset) {
++	case ETH_SS_TEST:
++		memcpy(data, *e1000_gstrings_test,
++			E1000_TEST_LEN*ETH_GSTRING_LEN);
++		break;
++	case ETH_SS_STATS:
++		for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
++			memcpy(p, e1000_gstrings_stats[i].stat_string,
++			       ETH_GSTRING_LEN);
++			p += ETH_GSTRING_LEN;
++		}
++#ifdef CONFIG_E1000_MQ
++		if (adapter->num_tx_queues > 1) {
++			for (i = 0; i < adapter->num_tx_queues; i++) {
++				sprintf(p, "tx_queue_%u_packets", i);
++				p += ETH_GSTRING_LEN;
++				sprintf(p, "tx_queue_%u_bytes", i);
++				p += ETH_GSTRING_LEN;
++			}
++		}
++		if (adapter->num_rx_queues > 1) {
++			for (i = 0; i < adapter->num_rx_queues; i++) {
++				sprintf(p, "rx_queue_%u_packets", i);
++				p += ETH_GSTRING_LEN;
++				sprintf(p, "rx_queue_%u_bytes", i);
++				p += ETH_GSTRING_LEN;
++			}
++		}
++#endif
++/*		BUG_ON(p - data != E1000_STATS_LEN * ETH_GSTRING_LEN); */
++		break;
++	}
++}
++
++static struct ethtool_ops e1000_ethtool_ops = {
++	.get_settings           = e1000_get_settings,
++	.set_settings           = e1000_set_settings,
++	.get_drvinfo            = e1000_get_drvinfo,
++	.get_regs_len           = e1000_get_regs_len,
++	.get_regs               = e1000_get_regs,
++	.get_wol                = e1000_get_wol,
++	.set_wol                = e1000_set_wol,
++	.get_msglevel           = e1000_get_msglevel,
++	.set_msglevel           = e1000_set_msglevel,
++	.nway_reset             = e1000_nway_reset,
++	.get_link               = ethtool_op_get_link,
++	.get_eeprom_len         = e1000_get_eeprom_len,
++	.get_eeprom             = e1000_get_eeprom,
++	.set_eeprom             = e1000_set_eeprom,
++	.get_ringparam          = e1000_get_ringparam,
++	.set_ringparam          = e1000_set_ringparam,
++	.get_pauseparam         = e1000_get_pauseparam,
++	.set_pauseparam         = e1000_set_pauseparam,
++	.get_rx_csum            = e1000_get_rx_csum,
++	.set_rx_csum            = e1000_set_rx_csum,
++	.get_tx_csum            = e1000_get_tx_csum,
++	.set_tx_csum            = e1000_set_tx_csum,
++	.get_sg                 = ethtool_op_get_sg,
++	.set_sg                 = ethtool_op_set_sg,
++#ifdef NETIF_F_TSO
++	.get_tso                = ethtool_op_get_tso,
++	.set_tso                = e1000_set_tso,
++#endif
++	.self_test_count        = e1000_diag_test_count,
++	.self_test              = e1000_diag_test,
++	.get_strings            = e1000_get_strings,
++	.phys_id                = e1000_phys_id,
++	.get_stats_count        = e1000_get_stats_count,
++	.get_ethtool_stats      = e1000_get_ethtool_stats,
++#ifdef ETHTOOL_GPERMADDR
++	.get_perm_addr          = ethtool_op_get_perm_addr,
++#endif
++	.get_coalesce           = e1000_get_coalesce,
++	.set_coalesce           = e1000_set_coalesce,
++};
++
++void e1000_set_ethtool_ops(struct net_device *netdev)
++{
++	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
++}
++#endif	/* SIOCETHTOOL */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_defines.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_defines.h	2021-04-07 16:01:27.648633565 +0800
+@@ -0,0 +1,1397 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_DEFINES_H_
++#define _E1000_DEFINES_H_
++
++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
++#define REQ_TX_DESCRIPTOR_MULTIPLE  8
++#define REQ_RX_DESCRIPTOR_MULTIPLE  8
++
++/* Definitions for power management and wakeup registers */
++/* Wake Up Control */
++#define E1000_WUC_APME       0x00000001 /* APM Enable */
++#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
++#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */
++#define E1000_WUC_APMPME     0x00000008 /* Assert PME on APM Wakeup */
++#define E1000_WUC_LSCWE      0x00000010 /* Link Status wake up enable */
++#define E1000_WUC_LSCWO      0x00000020 /* Link Status wake up override */
++#define E1000_WUC_SPM        0x80000000 /* Enable SPM */
++#define E1000_WUC_PHY_WAKE   0x00000100 /* if PHY supports wakeup */
++
++/* Wake Up Filter Control */
++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
++#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
++#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
++#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
++#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
++#define E1000_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
++#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
++#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
++#define E1000_WUFC_IGNORE_TCO   0x00008000 /* Ignore WakeOn TCO packets */
++#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
++#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
++#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
++#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
++#define E1000_WUFC_ALL_FILTERS  0x000F00FF /* Mask for all wakeup filters */
++#define E1000_WUFC_FLX_OFFSET   16 /* Offset to the Flexible Filters bits */
++#define E1000_WUFC_FLX_FILTERS  0x000F0000 /* Mask for the 4 flexible filters */
++
++/* Wake Up Status */
++#define E1000_WUS_LNKC         E1000_WUFC_LNKC
++#define E1000_WUS_MAG          E1000_WUFC_MAG
++#define E1000_WUS_EX           E1000_WUFC_EX
++#define E1000_WUS_MC           E1000_WUFC_MC
++#define E1000_WUS_BC           E1000_WUFC_BC
++#define E1000_WUS_ARP          E1000_WUFC_ARP
++#define E1000_WUS_IPV4         E1000_WUFC_IPV4
++#define E1000_WUS_IPV6         E1000_WUFC_IPV6
++#define E1000_WUS_FLX0         E1000_WUFC_FLX0
++#define E1000_WUS_FLX1         E1000_WUFC_FLX1
++#define E1000_WUS_FLX2         E1000_WUFC_FLX2
++#define E1000_WUS_FLX3         E1000_WUFC_FLX3
++#define E1000_WUS_FLX_FILTERS  E1000_WUFC_FLX_FILTERS
++
++/* Wake Up Packet Length */
++#define E1000_WUPL_LENGTH_MASK 0x0FFF   /* Only the lower 12 bits are valid */
++
++/* Four Flexible Filters are supported */
++#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4
++
++/* Each Flexible Filter is at most 128 (0x80) bytes in length */
++#define E1000_FLEXIBLE_FILTER_SIZE_MAX  128
++
++#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX
++#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
++#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
++
++/* Extended Device Control */
++#define E1000_CTRL_EXT_GPI0_EN   0x00000001 /* Maps SDP4 to GPI0 */
++#define E1000_CTRL_EXT_GPI1_EN   0x00000002 /* Maps SDP5 to GPI1 */
++#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN
++#define E1000_CTRL_EXT_GPI2_EN   0x00000004 /* Maps SDP6 to GPI2 */
++#define E1000_CTRL_EXT_GPI3_EN   0x00000008 /* Maps SDP7 to GPI3 */
++/* Reserved (bits 4,5) in >= 82575 */
++#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */
++#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */
++#define E1000_CTRL_EXT_PHY_INT   E1000_CTRL_EXT_SDP5_DATA
++#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */
++#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */
++/* SDP 4/5 (bits 8,9) are reserved in >= 82575 */
++#define E1000_CTRL_EXT_SDP4_DIR  0x00000100 /* Direction of SDP4 0=in 1=out */
++#define E1000_CTRL_EXT_SDP5_DIR  0x00000200 /* Direction of SDP5 0=in 1=out */
++#define E1000_CTRL_EXT_SDP6_DIR  0x00000400 /* Direction of SDP6 0=in 1=out */
++#define E1000_CTRL_EXT_SDP7_DIR  0x00000800 /* Direction of SDP7 0=in 1=out */
++#define E1000_CTRL_EXT_ASDCHK    0x00001000 /* Initiate an ASD sequence */
++#define E1000_CTRL_EXT_EE_RST    0x00002000 /* Reinitialize from EEPROM */
++#define E1000_CTRL_EXT_IPS       0x00004000 /* Invert Power State */
++#define E1000_CTRL_EXT_SPD_BYPS  0x00008000 /* Speed Select Bypass */
++#define E1000_CTRL_EXT_RO_DIS    0x00020000 /* Relaxed Ordering disable */
++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
++#define E1000_CTRL_EXT_LINK_MODE_TBI  0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_KMRN    0x00000000
++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES  0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_PCIX_SERDES  0x00800000
++#define E1000_CTRL_EXT_LINK_MODE_SGMII   0x00800000
++#define E1000_CTRL_EXT_EIAME          0x01000000
++#define E1000_CTRL_EXT_IRCA           0x00000001
++#define E1000_CTRL_EXT_WR_WMARK_MASK  0x03000000
++#define E1000_CTRL_EXT_WR_WMARK_256   0x00000000
++#define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
++#define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
++#define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
++#define E1000_CTRL_EXT_CANC           0x04000000 /* Interrupt delay cancellation */
++#define E1000_CTRL_EXT_DRV_LOAD       0x10000000 /* Driver loaded bit for FW */
++/* IAME enable bit (27) was removed in >= 82575 */
++#define E1000_CTRL_EXT_IAME           0x08000000 /* Interrupt acknowledge Auto-mask */
++#define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000 /* Clear Interrupt timers after IMS clear */
++#define E1000_CRTL_EXT_PB_PAREN       0x01000000 /* packet buffer parity error detection enabled */
++#define E1000_CTRL_EXT_DF_PAREN       0x02000000 /* descriptor FIFO parity error detection enable */
++#define E1000_CTRL_EXT_GHOST_PAREN    0x40000000
++#define E1000_CTRL_EXT_PBA_CLR        0x80000000 /* PBA Clear */
++#define E1000_I2CCMD_REG_ADDR_SHIFT   16
++#define E1000_I2CCMD_REG_ADDR         0x00FF0000
++#define E1000_I2CCMD_PHY_ADDR_SHIFT   24
++#define E1000_I2CCMD_PHY_ADDR         0x07000000
++#define E1000_I2CCMD_OPCODE_READ      0x08000000
++#define E1000_I2CCMD_OPCODE_WRITE     0x00000000
++#define E1000_I2CCMD_RESET            0x10000000
++#define E1000_I2CCMD_READY            0x20000000
++#define E1000_I2CCMD_INTERRUPT_ENA    0x40000000
++#define E1000_I2CCMD_ERROR            0x80000000
++#define E1000_MAX_SGMII_PHY_REG_ADDR  255
++#define E1000_I2CCMD_PHY_TIMEOUT      200
++
++/* Receive Decriptor bit definitions */
++#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
++#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
++#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
++#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
++#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum caculated */
++#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
++#define E1000_RXD_STAT_IPCS     0x40    /* IP xsum calculated */
++#define E1000_RXD_STAT_PIF      0x80    /* passed in-exact filter */
++#define E1000_RXD_STAT_CRCV     0x100   /* Speculative CRC Valid */
++#define E1000_RXD_STAT_IPIDV    0x200   /* IP identification valid */
++#define E1000_RXD_STAT_UDPV     0x400   /* Valid UDP checksum */
++#define E1000_RXD_STAT_DYNINT   0x800   /* Pkt caused INT via DYNINT */
++#define E1000_RXD_STAT_ACK      0x8000  /* ACK Packet indication */
++#define E1000_RXD_ERR_CE        0x01    /* CRC Error */
++#define E1000_RXD_ERR_SE        0x02    /* Symbol Error */
++#define E1000_RXD_ERR_SEQ       0x04    /* Sequence Error */
++#define E1000_RXD_ERR_CXE       0x10    /* Carrier Extension Error */
++#define E1000_RXD_ERR_TCPE      0x20    /* TCP/UDP Checksum Error */
++#define E1000_RXD_ERR_IPE       0x40    /* IP Checksum Error */
++#define E1000_RXD_ERR_RXE       0x80    /* Rx Data Error */
++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF  /* VLAN ID is in lower 12 bits */
++#define E1000_RXD_SPC_PRI_MASK  0xE000  /* Priority is in upper 3 bits */
++#define E1000_RXD_SPC_PRI_SHIFT 13
++#define E1000_RXD_SPC_CFI_MASK  0x1000  /* CFI is bit 12 */
++#define E1000_RXD_SPC_CFI_SHIFT 12
++
++#define E1000_RXDEXT_STATERR_CE    0x01000000
++#define E1000_RXDEXT_STATERR_SE    0x02000000
++#define E1000_RXDEXT_STATERR_SEQ   0x04000000
++#define E1000_RXDEXT_STATERR_CXE   0x10000000
++#define E1000_RXDEXT_STATERR_TCPE  0x20000000
++#define E1000_RXDEXT_STATERR_IPE   0x40000000
++#define E1000_RXDEXT_STATERR_RXE   0x80000000
++
++/* mask to determine if packets should be dropped due to frame errors */
++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
++    E1000_RXD_ERR_CE  |                \
++    E1000_RXD_ERR_SE  |                \
++    E1000_RXD_ERR_SEQ |                \
++    E1000_RXD_ERR_CXE |                \
++    E1000_RXD_ERR_RXE)
++
++/* Same mask, but for extended and packet split descriptors */
++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
++    E1000_RXDEXT_STATERR_CE  |            \
++    E1000_RXDEXT_STATERR_SE  |            \
++    E1000_RXDEXT_STATERR_SEQ |            \
++    E1000_RXDEXT_STATERR_CXE |            \
++    E1000_RXDEXT_STATERR_RXE)
++
++#define E1000_MRQC_ENABLE_MASK                 0x00000007
++#define E1000_MRQC_ENABLE_RSS_2Q               0x00000001
++#define E1000_MRQC_ENABLE_RSS_INT              0x00000004
++#define E1000_MRQC_RSS_FIELD_MASK              0xFFFF0000
++#define E1000_MRQC_RSS_FIELD_IPV4_TCP          0x00010000
++#define E1000_MRQC_RSS_FIELD_IPV4              0x00020000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX       0x00040000
++#define E1000_MRQC_RSS_FIELD_IPV6_EX           0x00080000
++#define E1000_MRQC_RSS_FIELD_IPV6              0x00100000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP          0x00200000
++
++#define E1000_RXDPS_HDRSTAT_HDRSP              0x00008000
++#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK        0x000003FF
++
++/* Management Control */
++#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
++#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
++#define E1000_MANC_R_ON_FORCE    0x00000004 /* Reset on Force TCO - RO */
++#define E1000_MANC_RMCP_EN       0x00000100 /* Enable RCMP 026Fh Filtering */
++#define E1000_MANC_0298_EN       0x00000200 /* Enable RCMP 0298h Filtering */
++#define E1000_MANC_IPV4_EN       0x00000400 /* Enable IPv4 */
++#define E1000_MANC_IPV6_EN       0x00000800 /* Enable IPv6 */
++#define E1000_MANC_SNAP_EN       0x00001000 /* Accept LLC/SNAP */
++#define E1000_MANC_ARP_EN        0x00002000 /* Enable ARP Request Filtering */
++/* Enable Neighbor Discovery Filtering */
++#define E1000_MANC_NEIGHBOR_EN   0x00004000
++#define E1000_MANC_ARP_RES_EN    0x00008000 /* Enable ARP response Filtering */
++#define E1000_MANC_TCO_RESET     0x00010000 /* TCO Reset Occurred */
++#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
++#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */
++#define E1000_MANC_RCV_ALL       0x00080000 /* Receive All Enabled */
++#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
++/* Enable MAC address filtering */
++#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000
++/* Enable MNG packets to host memory */
++#define E1000_MANC_EN_MNG2HOST   0x00200000
++/* Enable IP address filtering */
++#define E1000_MANC_EN_IP_ADDR_FILTER    0x00400000
++#define E1000_MANC_EN_XSUM_FILTER   0x00800000 /* Enable checksum filtering */
++#define E1000_MANC_BR_EN            0x01000000 /* Enable broadcast filtering */
++#define E1000_MANC_SMB_REQ       0x01000000 /* SMBus Request */
++#define E1000_MANC_SMB_GNT       0x02000000 /* SMBus Grant */
++#define E1000_MANC_SMB_CLK_IN    0x04000000 /* SMBus Clock In */
++#define E1000_MANC_SMB_DATA_IN   0x08000000 /* SMBus Data In */
++#define E1000_MANC_SMB_DATA_OUT  0x10000000 /* SMBus Data Out */
++#define E1000_MANC_SMB_CLK_OUT   0x20000000 /* SMBus Clock Out */
++
++#define E1000_MANC_SMB_DATA_OUT_SHIFT  28 /* SMBus Data Out Shift */
++#define E1000_MANC_SMB_CLK_OUT_SHIFT   29 /* SMBus Clock Out Shift */
++
++/* Receive Control */
++#define E1000_RCTL_RST            0x00000001    /* Software reset */
++#define E1000_RCTL_EN             0x00000002    /* enable */
++#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
++#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
++#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
++#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
++#define E1000_RCTL_LBM_NO         0x00000000    /* no loopback mode */
++#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
++#define E1000_RCTL_LBM_SLP        0x00000080    /* serial link loopback mode */
++#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
++#define E1000_RCTL_DTYP_MASK      0x00000C00    /* Descriptor type mask */
++#define E1000_RCTL_DTYP_PS        0x00000400    /* Packet Split descriptor */
++#define E1000_RCTL_RDMTS_HALF     0x00000000    /* rx desc min threshold size */
++#define E1000_RCTL_RDMTS_QUAT     0x00000100    /* rx desc min threshold size */
++#define E1000_RCTL_RDMTS_EIGTH    0x00000200    /* rx desc min threshold size */
++#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
++#define E1000_RCTL_MO_0           0x00000000    /* multicast offset 11:0 */
++#define E1000_RCTL_MO_1           0x00001000    /* multicast offset 12:1 */
++#define E1000_RCTL_MO_2           0x00002000    /* multicast offset 13:2 */
++#define E1000_RCTL_MO_3           0x00003000    /* multicast offset 15:4 */
++#define E1000_RCTL_MDR            0x00004000    /* multicast desc ring 0 */
++#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
++#define E1000_RCTL_SZ_2048        0x00000000    /* rx buffer size 2048 */
++#define E1000_RCTL_SZ_1024        0x00010000    /* rx buffer size 1024 */
++#define E1000_RCTL_SZ_512         0x00020000    /* rx buffer size 512 */
++#define E1000_RCTL_SZ_256         0x00030000    /* rx buffer size 256 */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
++#define E1000_RCTL_SZ_16384       0x00010000    /* rx buffer size 16384 */
++#define E1000_RCTL_SZ_8192        0x00020000    /* rx buffer size 8192 */
++#define E1000_RCTL_SZ_4096        0x00030000    /* rx buffer size 4096 */
++#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
++#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
++#define E1000_RCTL_CFI            0x00100000    /* canonical form indicator */
++#define E1000_RCTL_DPF            0x00400000    /* discard pause frames */
++#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
++#define E1000_RCTL_BSEX           0x02000000    /* Buffer size extension */
++#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
++#define E1000_RCTL_FLXBUF_MASK    0x78000000    /* Flexible buffer size */
++#define E1000_RCTL_FLXBUF_SHIFT   27            /* Flexible buffer shift */
++
++/*
++ * Use byte values for the following shift parameters
++ * Usage:
++ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE0_MASK) |
++ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE1_MASK) |
++ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE2_MASK) |
++ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
++ *                  E1000_PSRCTL_BSIZE3_MASK))
++ * where value0 = [128..16256],  default=256
++ *       value1 = [1024..64512], default=4096
++ *       value2 = [0..64512],    default=4096
++ *       value3 = [0..64512],    default=0
++ */
++
++#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
++#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
++#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
++#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
++
++#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
++#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
++#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
++#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
++
++/* SWFW_SYNC Definitions */
++#define E1000_SWFW_EEP_SM   0x1
++#define E1000_SWFW_PHY0_SM  0x2
++#define E1000_SWFW_PHY1_SM  0x4
++#define E1000_SWFW_CSR_SM   0x8
++
++/* FACTPS Definitions */
++#define E1000_FACTPS_LFS    0x40000000  /* LAN Function Select */
++/* Device Control */
++#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
++#define E1000_CTRL_BEM      0x00000002  /* Endian Mode.0=little,1=big */
++#define E1000_CTRL_PRIOR    0x00000004  /* Priority on PCI. 0=rx,1=fair */
++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
++#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
++#define E1000_CTRL_TME      0x00000010  /* Test mode. 0=normal,1=test */
++#define E1000_CTRL_SLE      0x00000020  /* Serial Link on 0=dis,1=en */
++#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
++#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
++#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
++#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
++#define E1000_CTRL_SPD_10   0x00000000  /* Force 10Mb */
++#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
++#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
++#define E1000_CTRL_BEM32    0x00000400  /* Big Endian 32 mode */
++#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
++#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
++#define E1000_CTRL_D_UD_EN  0x00002000  /* Dock/Undock enable */
++#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */
++#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */
++#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */
++#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
++#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
++#define E1000_CTRL_SWDPIN2  0x00100000  /* SWDPIN 2 value */
++#define E1000_CTRL_SWDPIN3  0x00200000  /* SWDPIN 3 value */
++#define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
++#define E1000_CTRL_SWDPIO1  0x00800000  /* SWDPIN 1 input or output */
++#define E1000_CTRL_SWDPIO2  0x01000000  /* SWDPIN 2 input or output */
++#define E1000_CTRL_SWDPIO3  0x02000000  /* SWDPIN 3 input or output */
++#define E1000_CTRL_RST      0x04000000  /* Global reset */
++#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
++#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
++#define E1000_CTRL_RTE      0x20000000  /* Routing tag enable */
++#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
++#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
++#define E1000_CTRL_SW2FW_INT 0x02000000  /* Initiate an interrupt to manageability engine */
++#define E1000_CTRL_I2C_ENA  0x02000000  /* I2C enable */
++
++/* Bit definitions for the Management Data IO (MDIO) and Management Data
++ * Clock (MDC) pins in the Device Control Register.
++ */
++#define E1000_CTRL_PHY_RESET_DIR  E1000_CTRL_SWDPIO0
++#define E1000_CTRL_PHY_RESET      E1000_CTRL_SWDPIN0
++#define E1000_CTRL_MDIO_DIR       E1000_CTRL_SWDPIO2
++#define E1000_CTRL_MDIO           E1000_CTRL_SWDPIN2
++#define E1000_CTRL_MDC_DIR        E1000_CTRL_SWDPIO3
++#define E1000_CTRL_MDC            E1000_CTRL_SWDPIN3
++#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR
++#define E1000_CTRL_PHY_RESET4     E1000_CTRL_EXT_SDP4_DATA
++
++#define E1000_CONNSW_ENRGSRC             0x4
++#define E1000_PCS_LCTL_FLV_LINK_UP       1
++#define E1000_PCS_LCTL_FSV_10            0
++#define E1000_PCS_LCTL_FSV_100           2
++#define E1000_PCS_LCTL_FSV_1000          4
++#define E1000_PCS_LCTL_FDV_FULL          8
++#define E1000_PCS_LCTL_FSD               0x10
++#define E1000_PCS_LCTL_FORCE_LINK        0x20
++#define E1000_PCS_LCTL_LOW_LINK_LATCH    0x40
++#define E1000_PCS_LCTL_AN_ENABLE         0x10000
++#define E1000_PCS_LCTL_AN_RESTART        0x20000
++#define E1000_PCS_LCTL_AN_TIMEOUT        0x40000
++#define E1000_PCS_LCTL_AN_SGMII_BYPASS   0x80000
++#define E1000_PCS_LCTL_AN_SGMII_TRIGGER  0x100000
++#define E1000_PCS_LCTL_FAST_LINK_TIMER   0x1000000
++#define E1000_PCS_LCTL_LINK_OK_FIX       0x2000000
++#define E1000_PCS_LCTL_CRS_ON_NI         0x4000000
++#define E1000_ENABLE_SERDES_LOOPBACK     0x0410
++
++#define E1000_PCS_LSTS_LINK_OK           1
++#define E1000_PCS_LSTS_SPEED_10          0
++#define E1000_PCS_LSTS_SPEED_100         2
++#define E1000_PCS_LSTS_SPEED_1000        4
++#define E1000_PCS_LSTS_DUPLEX_FULL       8
++#define E1000_PCS_LSTS_SYNK_OK           0x10
++#define E1000_PCS_LSTS_AN_COMPLETE       0x10000
++#define E1000_PCS_LSTS_AN_PAGE_RX        0x20000
++#define E1000_PCS_LSTS_AN_TIMED_OUT      0x40000
++#define E1000_PCS_LSTS_AN_REMOTE_FAULT   0x80000
++#define E1000_PCS_LSTS_AN_ERROR_RWS      0x100000
++
++/* Device Status */
++#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
++#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
++#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
++#define E1000_STATUS_FUNC_SHIFT 2
++#define E1000_STATUS_FUNC_0     0x00000000      /* Function 0 */
++#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
++#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
++#define E1000_STATUS_TBIMODE    0x00000020      /* TBI mode */
++#define E1000_STATUS_SPEED_MASK 0x000000C0
++#define E1000_STATUS_SPEED_10   0x00000000      /* Speed 10Mb/s */
++#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
++#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
++#define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion by NVM */
++#define E1000_STATUS_ASDV       0x00000300      /* Auto speed detect value */
++#define E1000_STATUS_DOCK_CI    0x00000800      /* Change in Dock/Undock state. Clear on write '0'. */
++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */
++#define E1000_STATUS_MTXCKOK    0x00000400      /* MTX clock running OK */
++#define E1000_STATUS_PCI66      0x00000800      /* In 66Mhz slot */
++#define E1000_STATUS_BUS64      0x00001000      /* In 64 bit slot */
++#define E1000_STATUS_PCIX_MODE  0x00002000      /* PCI-X mode */
++#define E1000_STATUS_PCIX_SPEED 0x0000C000      /* PCI-X bus speed */
++#define E1000_STATUS_BMC_SKU_0  0x00100000 /* BMC USB redirect disabled */
++#define E1000_STATUS_BMC_SKU_1  0x00200000 /* BMC SRAM disabled */
++#define E1000_STATUS_BMC_SKU_2  0x00400000 /* BMC SDRAM disabled */
++#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */
++#define E1000_STATUS_BMC_LITE   0x01000000 /* BMC external code execution disabled */
++#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */
++#define E1000_STATUS_FUSE_8       0x04000000
++#define E1000_STATUS_FUSE_9       0x08000000
++#define E1000_STATUS_SERDES0_DIS  0x10000000 /* SERDES disabled on port 0 */
++#define E1000_STATUS_SERDES1_DIS  0x20000000 /* SERDES disabled on port 1 */
++
++/* Constants used to intrepret the masked PCI-X bus speed. */
++#define E1000_STATUS_PCIX_SPEED_66  0x00000000 /* PCI-X bus speed  50-66 MHz */
++#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed  66-100 MHz */
++#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */
++
++#define SPEED_10    10
++#define SPEED_100   100
++#define SPEED_1000  1000
++#define HALF_DUPLEX 1
++#define FULL_DUPLEX 2
++
++#define PHY_FORCE_TIME   20
++
++#define ADVERTISE_10_HALF                 0x0001
++#define ADVERTISE_10_FULL                 0x0002
++#define ADVERTISE_100_HALF                0x0004
++#define ADVERTISE_100_FULL                0x0008
++#define ADVERTISE_1000_HALF               0x0010 /* Not used, just FYI */
++#define ADVERTISE_1000_FULL               0x0020
++
++/* 1000/H is not supported, nor spec-compliant. */
++#define E1000_ALL_SPEED_DUPLEX ( ADVERTISE_10_HALF |   ADVERTISE_10_FULL | \
++                                ADVERTISE_100_HALF |  ADVERTISE_100_FULL | \
++                                                     ADVERTISE_1000_FULL)
++#define E1000_ALL_NOT_GIG      ( ADVERTISE_10_HALF |   ADVERTISE_10_FULL | \
++                                ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_100_SPEED    (ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_10_SPEED      (ADVERTISE_10_HALF |   ADVERTISE_10_FULL)
++#define E1000_ALL_FULL_DUPLEX   (ADVERTISE_10_FULL |  ADVERTISE_100_FULL | \
++                                                     ADVERTISE_1000_FULL)
++#define E1000_ALL_HALF_DUPLEX   (ADVERTISE_10_HALF |  ADVERTISE_100_HALF)
++
++#define AUTONEG_ADVERTISE_SPEED_DEFAULT   E1000_ALL_SPEED_DUPLEX
++
++/* LED Control */
++#define E1000_LEDCTL_LED0_MODE_MASK       0x0000000F
++#define E1000_LEDCTL_LED0_MODE_SHIFT      0
++#define E1000_LEDCTL_LED0_BLINK_RATE      0x00000020
++#define E1000_LEDCTL_LED0_IVRT            0x00000040
++#define E1000_LEDCTL_LED0_BLINK           0x00000080
++#define E1000_LEDCTL_LED1_MODE_MASK       0x00000F00
++#define E1000_LEDCTL_LED1_MODE_SHIFT      8
++#define E1000_LEDCTL_LED1_BLINK_RATE      0x00002000
++#define E1000_LEDCTL_LED1_IVRT            0x00004000
++#define E1000_LEDCTL_LED1_BLINK           0x00008000
++#define E1000_LEDCTL_LED2_MODE_MASK       0x000F0000
++#define E1000_LEDCTL_LED2_MODE_SHIFT      16
++#define E1000_LEDCTL_LED2_BLINK_RATE      0x00200000
++#define E1000_LEDCTL_LED2_IVRT            0x00400000
++#define E1000_LEDCTL_LED2_BLINK           0x00800000
++#define E1000_LEDCTL_LED3_MODE_MASK       0x0F000000
++#define E1000_LEDCTL_LED3_MODE_SHIFT      24
++#define E1000_LEDCTL_LED3_BLINK_RATE      0x20000000
++#define E1000_LEDCTL_LED3_IVRT            0x40000000
++#define E1000_LEDCTL_LED3_BLINK           0x80000000
++
++#define E1000_LEDCTL_MODE_LINK_10_1000  0x0
++#define E1000_LEDCTL_MODE_LINK_100_1000 0x1
++#define E1000_LEDCTL_MODE_LINK_UP       0x2
++#define E1000_LEDCTL_MODE_ACTIVITY      0x3
++#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4
++#define E1000_LEDCTL_MODE_LINK_10       0x5
++#define E1000_LEDCTL_MODE_LINK_100      0x6
++#define E1000_LEDCTL_MODE_LINK_1000     0x7
++#define E1000_LEDCTL_MODE_PCIX_MODE     0x8
++#define E1000_LEDCTL_MODE_FULL_DUPLEX   0x9
++#define E1000_LEDCTL_MODE_COLLISION     0xA
++#define E1000_LEDCTL_MODE_BUS_SPEED     0xB
++#define E1000_LEDCTL_MODE_BUS_SIZE      0xC
++#define E1000_LEDCTL_MODE_PAUSED        0xD
++#define E1000_LEDCTL_MODE_LED_ON        0xE
++#define E1000_LEDCTL_MODE_LED_OFF       0xF
++
++/* Transmit Descriptor bit definitions */
++#define E1000_TXD_DTYP_D     0x00100000 /* Data Descriptor */
++#define E1000_TXD_DTYP_C     0x00000000 /* Context Descriptor */
++#define E1000_TXD_POPTS_SHIFT 8         /* POPTS shift */
++#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
++#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
++#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
++#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
++#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
++#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
++#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
++#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
++#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
++#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
++#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
++#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
++#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
++#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
++#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
++#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
++#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
++/* Extended desc bits for Linksec and timesync */
++
++/* Transmit Control */
++#define E1000_TCTL_RST    0x00000001    /* software reset */
++#define E1000_TCTL_EN     0x00000002    /* enable tx */
++#define E1000_TCTL_BCE    0x00000004    /* busy check enable */
++#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
++#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
++#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
++#define E1000_TCTL_SWXOFF 0x00400000    /* SW Xoff transmission */
++#define E1000_TCTL_PBE    0x00800000    /* Packet Burst Enable */
++#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
++#define E1000_TCTL_NRTU   0x02000000    /* No Re-transmit on underrun */
++#define E1000_TCTL_MULR   0x10000000    /* Multiple request support */
++
++/* Transmit Arbitration Count */
++#define E1000_TARC0_ENABLE     0x00000400   /* Enable Tx Queue 0 */
++
++/* SerDes Control */
++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
++
++/* Receive Checksum Control */
++#define E1000_RXCSUM_PCSS_MASK 0x000000FF   /* Packet Checksum Start */
++#define E1000_RXCSUM_IPOFL     0x00000100   /* IPv4 checksum offload */
++#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
++#define E1000_RXCSUM_IPV6OFL   0x00000400   /* IPv6 checksum offload */
++#define E1000_RXCSUM_CRCOFL    0x00000800   /* CRC32 offload enable */
++#define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
++#define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
++
++/* Header split receive */
++#define E1000_RFCTL_ISCSI_DIS           0x00000001
++#define E1000_RFCTL_ISCSI_DWC_MASK      0x0000003E
++#define E1000_RFCTL_ISCSI_DWC_SHIFT     1
++#define E1000_RFCTL_NFSW_DIS            0x00000040
++#define E1000_RFCTL_NFSR_DIS            0x00000080
++#define E1000_RFCTL_NFS_VER_MASK        0x00000300
++#define E1000_RFCTL_NFS_VER_SHIFT       8
++#define E1000_RFCTL_IPV6_DIS            0x00000400
++#define E1000_RFCTL_IPV6_XSUM_DIS       0x00000800
++#define E1000_RFCTL_ACK_DIS             0x00001000
++#define E1000_RFCTL_ACKD_DIS            0x00002000
++#define E1000_RFCTL_IPFRSP_DIS          0x00004000
++#define E1000_RFCTL_EXTEN               0x00008000
++#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
++#define E1000_RFCTL_NEW_IPV6_EXT_DIS    0x00020000
++
++/* Collision related configuration parameters */
++#define E1000_COLLISION_THRESHOLD       15
++#define E1000_CT_SHIFT                  4
++#define E1000_COLLISION_DISTANCE        63
++#define E1000_COLD_SHIFT                12
++
++/* Default values for the transmit IPG register */
++#define DEFAULT_82542_TIPG_IPGT        10
++#define DEFAULT_82543_TIPG_IPGT_FIBER  9
++#define DEFAULT_82543_TIPG_IPGT_COPPER 8
++
++#define E1000_TIPG_IPGT_MASK  0x000003FF
++#define E1000_TIPG_IPGR1_MASK 0x000FFC00
++#define E1000_TIPG_IPGR2_MASK 0x3FF00000
++
++#define DEFAULT_82542_TIPG_IPGR1 2
++#define DEFAULT_82543_TIPG_IPGR1 8
++#define E1000_TIPG_IPGR1_SHIFT  10
++
++#define DEFAULT_82542_TIPG_IPGR2 10
++#define DEFAULT_82543_TIPG_IPGR2 6
++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7
++#define E1000_TIPG_IPGR2_SHIFT  20
++
++/* Ethertype field values */
++#define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
++
++#define ETHERNET_FCS_SIZE       4
++#define MAX_JUMBO_FRAME_SIZE    0x3F00
++
++/* Extended Configuration Control and Size */
++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP      0x00000020
++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE       0x00000001
++#define E1000_EXTCNF_CTRL_SWFLAG                 0x00000020
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK   0x00FF0000
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT          16
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK   0x0FFF0000
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT          16
++
++#define E1000_PHY_CTRL_SPD_EN             0x00000001
++#define E1000_PHY_CTRL_D0A_LPLU           0x00000002
++#define E1000_PHY_CTRL_NOND0A_LPLU        0x00000004
++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008
++#define E1000_PHY_CTRL_GBE_DISABLE        0x00000040
++
++#define E1000_KABGTXD_BGSQLBIAS           0x00050000
++
++/* PBA constants */
++#define E1000_PBA_8K  0x0008    /* 8KB */
++#define E1000_PBA_12K 0x000C    /* 12KB */
++#define E1000_PBA_16K 0x0010    /* 16KB */
++#define E1000_PBA_20K 0x0014
++#define E1000_PBA_22K 0x0016
++#define E1000_PBA_24K 0x0018
++#define E1000_PBA_30K 0x001E
++#define E1000_PBA_32K 0x0020
++#define E1000_PBA_34K 0x0022
++#define E1000_PBA_38K 0x0026
++#define E1000_PBA_40K 0x0028
++#define E1000_PBA_48K 0x0030    /* 48KB */
++#define E1000_PBA_64K 0x0040    /* 64KB */
++
++#define E1000_PBS_16K E1000_PBA_16K
++#define E1000_PBS_24K E1000_PBA_24K
++
++#define IFS_MAX       80
++#define IFS_MIN       40
++#define IFS_RATIO     4
++#define IFS_STEP      10
++#define MIN_NUM_XMITS 1000
++
++/* SW Semaphore Register */
++#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
++#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
++#define E1000_SWSM_WMNG         0x00000004 /* Wake MNG Clock */
++#define E1000_SWSM_DRV_LOAD     0x00000008 /* Driver Loaded Bit */
++
++/* Interrupt Cause Read */
++#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
++#define E1000_ICR_TXQE          0x00000002 /* Transmit Queue empty */
++#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
++#define E1000_ICR_RXSEQ         0x00000008 /* rx sequence error */
++#define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
++#define E1000_ICR_RXO           0x00000040 /* rx overrun */
++#define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
++#define E1000_ICR_MDAC          0x00000200 /* MDIO access complete */
++#define E1000_ICR_RXCFG         0x00000400 /* Rx /c/ ordered set */
++#define E1000_ICR_GPI_EN0       0x00000800 /* GP Int 0 */
++#define E1000_ICR_GPI_EN1       0x00001000 /* GP Int 1 */
++#define E1000_ICR_GPI_EN2       0x00002000 /* GP Int 2 */
++#define E1000_ICR_GPI_EN3       0x00004000 /* GP Int 3 */
++#define E1000_ICR_TXD_LOW       0x00008000
++#define E1000_ICR_SRPD          0x00010000
++#define E1000_ICR_ACK           0x00020000 /* Receive Ack frame */
++#define E1000_ICR_MNG           0x00040000 /* Manageability event */
++#define E1000_ICR_DOCK          0x00080000 /* Dock/Undock */
++#define E1000_ICR_INT_ASSERTED  0x80000000 /* If this bit asserted, the driver should claim the interrupt */
++#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_ICR_HOST_ARB_PAR  0x00400000 /* host arb read buffer parity error */
++#define E1000_ICR_PB_PAR        0x00800000 /* packet buffer parity error */
++#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_ICR_ALL_PARITY    0x03F00000 /* all parity error bits */
++#define E1000_ICR_DSW           0x00000020 /* FW changed the status of DISSW bit in the FWSM */
++#define E1000_ICR_PHYINT        0x00001000 /* LAN connected device generates an interrupt */
++#define E1000_ICR_EPRST         0x00100000 /* ME handware reset occurs */
++
++/* Extended Interrupt Cause Read */
++#define E1000_EICR_RX_QUEUE0    0x00000001 /* Rx Queue 0 Interrupt */
++#define E1000_EICR_RX_QUEUE1    0x00000002 /* Rx Queue 1 Interrupt */
++#define E1000_EICR_RX_QUEUE2    0x00000004 /* Rx Queue 2 Interrupt */
++#define E1000_EICR_RX_QUEUE3    0x00000008 /* Rx Queue 3 Interrupt */
++#define E1000_EICR_TX_QUEUE0    0x00000100 /* Tx Queue 0 Interrupt */
++#define E1000_EICR_TX_QUEUE1    0x00000200 /* Tx Queue 1 Interrupt */
++#define E1000_EICR_TX_QUEUE2    0x00000400 /* Tx Queue 2 Interrupt */
++#define E1000_EICR_TX_QUEUE3    0x00000800 /* Tx Queue 3 Interrupt */
++#define E1000_EICR_TCP_TIMER    0x40000000 /* TCP Timer */
++#define E1000_EICR_OTHER        0x80000000 /* Interrupt Cause Active */
++/* TCP Timer */
++#define E1000_TCPTIMER_KS       0x00000100 /* KickStart */
++#define E1000_TCPTIMER_COUNT_ENABLE       0x00000200 /* Count Enable */
++#define E1000_TCPTIMER_COUNT_FINISH       0x00000400 /* Count finish */
++#define E1000_TCPTIMER_LOOP     0x00000800 /* Loop */
++
++/*
++ * This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ */
++#define POLL_IMS_ENABLE_MASK ( \
++    E1000_IMS_RXDMT0 |    \
++    E1000_IMS_RXSEQ)
++
++/*
++ * This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXT0   = Receiver Timer Interrupt (ring 0)
++ *   o TXDW   = Transmit Descriptor Written Back
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ *   o LSC    = Link Status Change
++ */
++#define IMS_ENABLE_MASK ( \
++    E1000_IMS_RXT0   |    \
++    E1000_IMS_TXDW   |    \
++    E1000_IMS_RXDMT0 |    \
++    E1000_IMS_RXSEQ  |    \
++    E1000_IMS_LSC)
++
++/* Interrupt Mask Set */
++#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_IMS_TXQE      E1000_ICR_TXQE      /* Transmit Queue empty */
++#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_IMS_RXO       E1000_ICR_RXO       /* rx overrun */
++#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
++#define E1000_IMS_RXCFG     E1000_ICR_RXCFG     /* Rx /c/ ordered set */
++#define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
++#define E1000_IMS_GPI_EN1   E1000_ICR_GPI_EN1   /* GP Int 1 */
++#define E1000_IMS_GPI_EN2   E1000_ICR_GPI_EN2   /* GP Int 2 */
++#define E1000_IMS_GPI_EN3   E1000_ICR_GPI_EN3   /* GP Int 3 */
++#define E1000_IMS_TXD_LOW   E1000_ICR_TXD_LOW
++#define E1000_IMS_SRPD      E1000_ICR_SRPD
++#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive Ack frame */
++#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability event */
++#define E1000_IMS_DOCK      E1000_ICR_DOCK      /* Dock/Undock */
++#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_IMS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR  /* host arb read buffer parity error */
++#define E1000_IMS_PB_PAR        E1000_ICR_PB_PAR        /* packet buffer parity error */
++#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_IMS_DSW       E1000_ICR_DSW
++#define E1000_IMS_PHYINT    E1000_ICR_PHYINT
++#define E1000_IMS_EPRST     E1000_ICR_EPRST
++
++/* Extended Interrupt Mask Set */
++#define E1000_EIMS_RX_QUEUE0    E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
++#define E1000_EIMS_RX_QUEUE1    E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
++#define E1000_EIMS_RX_QUEUE2    E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
++#define E1000_EIMS_RX_QUEUE3    E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
++#define E1000_EIMS_TX_QUEUE0    E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
++#define E1000_EIMS_TX_QUEUE1    E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
++#define E1000_EIMS_TX_QUEUE2    E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
++#define E1000_EIMS_TX_QUEUE3    E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
++#define E1000_EIMS_TCP_TIMER    E1000_EICR_TCP_TIMER /* TCP Timer */
++#define E1000_EIMS_OTHER        E1000_EICR_OTHER   /* Interrupt Cause Active */
++
++/* Interrupt Cause Set */
++#define E1000_ICS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_ICS_TXQE      E1000_ICR_TXQE      /* Transmit Queue empty */
++#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_ICS_RXO       E1000_ICR_RXO       /* rx overrun */
++#define E1000_ICS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_ICS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
++#define E1000_ICS_RXCFG     E1000_ICR_RXCFG     /* Rx /c/ ordered set */
++#define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
++#define E1000_ICS_GPI_EN1   E1000_ICR_GPI_EN1   /* GP Int 1 */
++#define E1000_ICS_GPI_EN2   E1000_ICR_GPI_EN2   /* GP Int 2 */
++#define E1000_ICS_GPI_EN3   E1000_ICR_GPI_EN3   /* GP Int 3 */
++#define E1000_ICS_TXD_LOW   E1000_ICR_TXD_LOW
++#define E1000_ICS_SRPD      E1000_ICR_SRPD
++#define E1000_ICS_ACK       E1000_ICR_ACK       /* Receive Ack frame */
++#define E1000_ICS_MNG       E1000_ICR_MNG       /* Manageability event */
++#define E1000_ICS_DOCK      E1000_ICR_DOCK      /* Dock/Undock */
++#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_ICS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR  /* host arb read buffer parity error */
++#define E1000_ICS_PB_PAR        E1000_ICR_PB_PAR        /* packet buffer parity error */
++#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_ICS_DSW       E1000_ICR_DSW
++#define E1000_ICS_PHYINT    E1000_ICR_PHYINT
++#define E1000_ICS_EPRST     E1000_ICR_EPRST
++
++/* Extended Interrupt Cause Set */
++#define E1000_EICS_RX_QUEUE0    E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */
++#define E1000_EICS_RX_QUEUE1    E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */
++#define E1000_EICS_RX_QUEUE2    E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */
++#define E1000_EICS_RX_QUEUE3    E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */
++#define E1000_EICS_TX_QUEUE0    E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */
++#define E1000_EICS_TX_QUEUE1    E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */
++#define E1000_EICS_TX_QUEUE2    E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */
++#define E1000_EICS_TX_QUEUE3    E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */
++#define E1000_EICS_TCP_TIMER    E1000_EICR_TCP_TIMER /* TCP Timer */
++#define E1000_EICS_OTHER        E1000_EICR_OTHER   /* Interrupt Cause Active */
++
++/* Transmit Descriptor Control */
++#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
++#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */
++#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */
++#define E1000_TXDCTL_GRAN    0x01000000 /* TXDCTL Granularity */
++#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */
++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
++#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
++/* Enable the counting of descriptors still to be processed. */
++#define E1000_TXDCTL_COUNT_DESC 0x00400000
++
++/* Flow Control Constants */
++#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
++#define FLOW_CONTROL_TYPE         0x8808
++
++/* 802.1q VLAN Packet Size */
++#define VLAN_TAG_SIZE              4    /* 802.3ac tag (not DMA'd) */
++#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
++
++/* Receive Address */
++/*
++ * Number of high/low register pairs in the RAR. The RAR (Receive Address
++ * Registers) holds the directed and multicast addresses that we monitor.
++ * Technically, we have 16 spots.  However, we reserve one of these spots
++ * (RAR[15]) for our directed address used by controllers with
++ * manageability enabled, allowing us room for 15 multicast addresses.
++ */
++#define E1000_RAR_ENTRIES     15
++#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
++
++/* Error Codes */
++#define E1000_SUCCESS      0
++#define E1000_ERR_NVM      1
++#define E1000_ERR_PHY      2
++#define E1000_ERR_CONFIG   3
++#define E1000_ERR_PARAM    4
++#define E1000_ERR_MAC_INIT 5
++#define E1000_ERR_PHY_TYPE 6
++#define E1000_ERR_RESET   9
++#define E1000_ERR_MASTER_REQUESTS_PENDING 10
++#define E1000_ERR_HOST_INTERFACE_COMMAND 11
++#define E1000_BLK_PHY_RESET   12
++#define E1000_ERR_SWFW_SYNC 13
++#define E1000_NOT_IMPLEMENTED 14
++
++/* Loop limit on how long we wait for auto-negotiation to complete */
++#define FIBER_LINK_UP_LIMIT               50
++#define COPPER_LINK_UP_LIMIT              10
++#define PHY_AUTO_NEG_LIMIT                45
++#define PHY_FORCE_LIMIT                   20
++/* Number of 100 microseconds we wait for PCI Express master disable */
++#define MASTER_DISABLE_TIMEOUT      800
++/* Number of milliseconds we wait for PHY configuration done after MAC reset */
++#define PHY_CFG_TIMEOUT             100
++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
++#define MDIO_OWNERSHIP_TIMEOUT      10
++/* Number of milliseconds for NVM auto read done after MAC reset. */
++#define AUTO_READ_DONE_TIMEOUT      10
++
++/* Flow Control */
++#define E1000_FCRTH_RTH  0x0000FFF8     /* Mask Bits[15:3] for RTH */
++#define E1000_FCRTH_XFCE 0x80000000     /* External Flow Control Enable */
++#define E1000_FCRTL_RTL  0x0000FFF8     /* Mask Bits[15:3] for RTL */
++#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
++
++/* Transmit Configuration Word */
++#define E1000_TXCW_FD         0x00000020        /* TXCW full duplex */
++#define E1000_TXCW_HD         0x00000040        /* TXCW half duplex */
++#define E1000_TXCW_PAUSE      0x00000080        /* TXCW sym pause request */
++#define E1000_TXCW_ASM_DIR    0x00000100        /* TXCW astm pause direction */
++#define E1000_TXCW_PAUSE_MASK 0x00000180        /* TXCW pause request mask */
++#define E1000_TXCW_RF         0x00003000        /* TXCW remote fault */
++#define E1000_TXCW_NP         0x00008000        /* TXCW next page */
++#define E1000_TXCW_CW         0x0000ffff        /* TxConfigWord mask */
++#define E1000_TXCW_TXC        0x40000000        /* Transmit Config control */
++#define E1000_TXCW_ANE        0x80000000        /* Auto-neg enable */
++
++/* Receive Configuration Word */
++#define E1000_RXCW_CW         0x0000ffff        /* RxConfigWord mask */
++#define E1000_RXCW_NC         0x04000000        /* Receive config no carrier */
++#define E1000_RXCW_IV         0x08000000        /* Receive config invalid */
++#define E1000_RXCW_CC         0x10000000        /* Receive config change */
++#define E1000_RXCW_C          0x20000000        /* Receive config */
++#define E1000_RXCW_SYNCH      0x40000000        /* Receive config synch */
++#define E1000_RXCW_ANC        0x80000000        /* Auto-neg complete */
++
++/* PCI Express Control */
++#define E1000_GCR_RXD_NO_SNOOP          0x00000001
++#define E1000_GCR_RXDSCW_NO_SNOOP       0x00000002
++#define E1000_GCR_RXDSCR_NO_SNOOP       0x00000004
++#define E1000_GCR_TXD_NO_SNOOP          0x00000008
++#define E1000_GCR_TXDSCW_NO_SNOOP       0x00000010
++#define E1000_GCR_TXDSCR_NO_SNOOP       0x00000020
++
++#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP         | \
++                           E1000_GCR_RXDSCW_NO_SNOOP      | \
++                           E1000_GCR_RXDSCR_NO_SNOOP      | \
++                           E1000_GCR_TXD_NO_SNOOP         | \
++                           E1000_GCR_TXDSCW_NO_SNOOP      | \
++                           E1000_GCR_TXDSCR_NO_SNOOP)
++
++/* PHY Control Register */
++#define MII_CR_SPEED_SELECT_MSB 0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
++#define MII_CR_COLL_TEST_ENABLE 0x0080  /* Collision test enable */
++#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
++#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
++#define MII_CR_ISOLATE          0x0400  /* Isolate PHY from MII */
++#define MII_CR_POWER_DOWN       0x0800  /* Power down */
++#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
++#define MII_CR_SPEED_SELECT_LSB 0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
++#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
++#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
++#define MII_CR_SPEED_1000       0x0040
++#define MII_CR_SPEED_100        0x2000
++#define MII_CR_SPEED_10         0x0000
++
++/* PHY Status Register */
++#define MII_SR_EXTENDED_CAPS     0x0001 /* Extended register capabilities */
++#define MII_SR_JABBER_DETECT     0x0002 /* Jabber Detected */
++#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
++#define MII_SR_AUTONEG_CAPS      0x0008 /* Auto Neg Capable */
++#define MII_SR_REMOTE_FAULT      0x0010 /* Remote Fault Detect */
++#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
++#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
++#define MII_SR_EXTENDED_STATUS   0x0100 /* Ext. status info in Reg 0x0F */
++#define MII_SR_100T2_HD_CAPS     0x0200 /* 100T2 Half Duplex Capable */
++#define MII_SR_100T2_FD_CAPS     0x0400 /* 100T2 Full Duplex Capable */
++#define MII_SR_10T_HD_CAPS       0x0800 /* 10T   Half Duplex Capable */
++#define MII_SR_10T_FD_CAPS       0x1000 /* 10T   Full Duplex Capable */
++#define MII_SR_100X_HD_CAPS      0x2000 /* 100X  Half Duplex Capable */
++#define MII_SR_100X_FD_CAPS      0x4000 /* 100X  Full Duplex Capable */
++#define MII_SR_100T4_CAPS        0x8000 /* 100T4 Capable */
++
++/* Autoneg Advertisement Register */
++#define NWAY_AR_SELECTOR_FIELD   0x0001   /* indicates IEEE 802.3 CSMA/CD */
++#define NWAY_AR_10T_HD_CAPS      0x0020   /* 10T   Half Duplex Capable */
++#define NWAY_AR_10T_FD_CAPS      0x0040   /* 10T   Full Duplex Capable */
++#define NWAY_AR_100TX_HD_CAPS    0x0080   /* 100TX Half Duplex Capable */
++#define NWAY_AR_100TX_FD_CAPS    0x0100   /* 100TX Full Duplex Capable */
++#define NWAY_AR_100T4_CAPS       0x0200   /* 100T4 Capable */
++#define NWAY_AR_PAUSE            0x0400   /* Pause operation desired */
++#define NWAY_AR_ASM_DIR          0x0800   /* Asymmetric Pause Direction bit */
++#define NWAY_AR_REMOTE_FAULT     0x2000   /* Remote Fault detected */
++#define NWAY_AR_NEXT_PAGE        0x8000   /* Next Page ability supported */
++
++/* Link Partner Ability Register (Base Page) */
++#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */
++#define NWAY_LPAR_10T_HD_CAPS    0x0020 /* LP is 10T   Half Duplex Capable */
++#define NWAY_LPAR_10T_FD_CAPS    0x0040 /* LP is 10T   Full Duplex Capable */
++#define NWAY_LPAR_100TX_HD_CAPS  0x0080 /* LP is 100TX Half Duplex Capable */
++#define NWAY_LPAR_100TX_FD_CAPS  0x0100 /* LP is 100TX Full Duplex Capable */
++#define NWAY_LPAR_100T4_CAPS     0x0200 /* LP is 100T4 Capable */
++#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
++#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
++#define NWAY_LPAR_REMOTE_FAULT   0x2000 /* LP has detected Remote Fault */
++#define NWAY_LPAR_ACKNOWLEDGE    0x4000 /* LP has rx'd link code word */
++#define NWAY_LPAR_NEXT_PAGE      0x8000 /* Next Page ability supported */
++
++/* Autoneg Expansion Register */
++#define NWAY_ER_LP_NWAY_CAPS      0x0001 /* LP has Auto Neg Capability */
++#define NWAY_ER_PAGE_RXD          0x0002 /* LP is 10T   Half Duplex Capable */
++#define NWAY_ER_NEXT_PAGE_CAPS    0x0004 /* LP is 10T   Full Duplex Capable */
++#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */
++#define NWAY_ER_PAR_DETECT_FAULT  0x0010 /* LP is 100TX Full Duplex Capable */
++
++/* 1000BASE-T Control Register */
++#define CR_1000T_ASYM_PAUSE      0x0080 /* Advertise asymmetric pause bit */
++#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
++#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
++#define CR_1000T_REPEATER_DTE    0x0400 /* 1=Repeater/switch device port */
++                                        /* 0=DTE device */
++#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
++                                        /* 0=Configure PHY as Slave */
++#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
++                                        /* 0=Automatic Master/Slave config */
++#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
++#define CR_1000T_TEST_MODE_1     0x2000 /* Transmit Waveform test */
++#define CR_1000T_TEST_MODE_2     0x4000 /* Master Transmit Jitter test */
++#define CR_1000T_TEST_MODE_3     0x6000 /* Slave Transmit Jitter test */
++#define CR_1000T_TEST_MODE_4     0x8000 /* Transmitter Distortion test */
++
++/* 1000BASE-T Status Register */
++#define SR_1000T_IDLE_ERROR_CNT   0x00FF /* Num idle errors since last read */
++#define SR_1000T_ASYM_PAUSE_DIR   0x0100 /* LP asymmetric pause direction bit */
++#define SR_1000T_LP_HD_CAPS       0x0400 /* LP is 1000T HD capable */
++#define SR_1000T_LP_FD_CAPS       0x0800 /* LP is 1000T FD capable */
++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
++#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
++#define SR_1000T_MS_CONFIG_RES    0x4000 /* 1=Local Tx is Master, 0=Slave */
++#define SR_1000T_MS_CONFIG_FAULT  0x8000 /* Master/Slave config fault */
++
++#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5
++
++/* PHY 1000 MII Register/Bit Definitions */
++/* PHY Registers defined by IEEE */
++#define PHY_CONTROL      0x00 /* Control Register */
++#define PHY_STATUS       0x01 /* Status Regiser */
++#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
++#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
++#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
++#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
++#define PHY_AUTONEG_EXP  0x06 /* Autoneg Expansion Reg */
++#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */
++#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */
++#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
++#define PHY_EXT_STATUS   0x0F /* Extended Status Reg */
++
++/* NVM Control */
++#define E1000_EECD_SK        0x00000001 /* NVM Clock */
++#define E1000_EECD_CS        0x00000002 /* NVM Chip Select */
++#define E1000_EECD_DI        0x00000004 /* NVM Data In */
++#define E1000_EECD_DO        0x00000008 /* NVM Data Out */
++#define E1000_EECD_FWE_MASK  0x00000030
++#define E1000_EECD_FWE_DIS   0x00000010 /* Disable FLASH writes */
++#define E1000_EECD_FWE_EN    0x00000020 /* Enable FLASH writes */
++#define E1000_EECD_FWE_SHIFT 4
++#define E1000_EECD_REQ       0x00000040 /* NVM Access Request */
++#define E1000_EECD_GNT       0x00000080 /* NVM Access Grant */
++#define E1000_EECD_PRES      0x00000100 /* NVM Present */
++#define E1000_EECD_SIZE      0x00000200 /* NVM Size (0=64 word 1=256 word) */
++/* NVM Addressing bits based on type 0=small, 1=large */
++#define E1000_EECD_ADDR_BITS 0x00000400
++#define E1000_EECD_TYPE      0x00002000 /* NVM Type (1-SPI, 0-Microwire) */
++#define E1000_NVM_GRANT_ATTEMPTS   1000 /* NVM # attempts to gain grant */
++#define E1000_EECD_AUTO_RD          0x00000200  /* NVM Auto Read done */
++#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* NVM Size */
++#define E1000_EECD_SIZE_EX_SHIFT     11
++#define E1000_EECD_NVADDS    0x00018000 /* NVM Address Size */
++#define E1000_EECD_SELSHAD   0x00020000 /* Select Shadow RAM */
++#define E1000_EECD_INITSRAM  0x00040000 /* Initialize Shadow RAM */
++#define E1000_EECD_FLUPD     0x00080000 /* Update FLASH */
++#define E1000_EECD_AUPDEN    0x00100000 /* Enable Autonomous FLASH update */
++#define E1000_EECD_SHADV     0x00200000 /* Shadow RAM Data Valid */
++#define E1000_EECD_SEC1VAL   0x00400000 /* Sector One Valid */
++#define E1000_EECD_SECVAL_SHIFT      22
++
++#define E1000_NVM_SWDPIN0   0x0001   /* SWDPIN 0 NVM Value */
++#define E1000_NVM_LED_LOGIC 0x0020   /* Led Logic Word */
++#define E1000_NVM_RW_REG_DATA   16   /* Offset to data in NVM read/write registers */
++#define E1000_NVM_RW_REG_DONE   2    /* Offset to READ/WRITE done bit */
++#define E1000_NVM_RW_REG_START  1    /* Start operation */
++#define E1000_NVM_RW_ADDR_SHIFT 2    /* Shift to the address bits */
++#define E1000_NVM_POLL_WRITE    1    /* Flag for polling for write complete */
++#define E1000_NVM_POLL_READ     0    /* Flag for polling for read complete */
++#define E1000_FLASH_UPDATES  2000
++
++/* NVM Word Offsets */
++#define NVM_COMPAT                 0x0003
++#define NVM_ID_LED_SETTINGS        0x0004
++#define NVM_VERSION                0x0005
++#define NVM_SERDES_AMPLITUDE       0x0006 /* For SERDES output amplitude adjustment. */
++#define NVM_PHY_CLASS_WORD         0x0007
++#define NVM_INIT_CONTROL1_REG      0x000A
++#define NVM_INIT_CONTROL2_REG      0x000F
++#define NVM_SWDEF_PINS_CTRL_PORT_1 0x0010
++#define NVM_INIT_CONTROL3_PORT_B   0x0014
++#define NVM_INIT_3GIO_3            0x001A
++#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020
++#define NVM_INIT_CONTROL3_PORT_A   0x0024
++#define NVM_CFG                    0x0012
++#define NVM_FLASH_VERSION          0x0032
++#define NVM_ALT_MAC_ADDR_PTR       0x0037
++#define NVM_CHECKSUM_REG           0x003F
++
++#define E1000_NVM_CFG_DONE_PORT_0  0x40000 /* MNG config cycle done */
++#define E1000_NVM_CFG_DONE_PORT_1  0x80000 /* ...for second port */
++
++/* Mask bits for fields in Word 0x0f of the NVM */
++#define NVM_WORD0F_PAUSE_MASK       0x3000
++#define NVM_WORD0F_PAUSE            0x1000
++#define NVM_WORD0F_ASM_DIR          0x2000
++#define NVM_WORD0F_ANE              0x0800
++#define NVM_WORD0F_SWPDIO_EXT_MASK  0x00F0
++#define NVM_WORD0F_LPLU             0x0001
++
++/* Mask bits for fields in Word 0x1a of the NVM */
++#define NVM_WORD1A_ASPM_MASK  0x000C
++
++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
++#define NVM_SUM                    0xBABA
++
++#define NVM_MAC_ADDR_OFFSET        0
++#define NVM_PBA_OFFSET_0           8
++#define NVM_PBA_OFFSET_1           9
++#define NVM_RESERVED_WORD          0xFFFF
++#define NVM_PHY_CLASS_A            0x8000
++#define NVM_SERDES_AMPLITUDE_MASK  0x000F
++#define NVM_SIZE_MASK              0x1C00
++#define NVM_SIZE_SHIFT             10
++#define NVM_WORD_SIZE_BASE_SHIFT   6
++#define NVM_SWDPIO_EXT_SHIFT       4
++
++/* NVM Commands - Microwire */
++#define NVM_READ_OPCODE_MICROWIRE  0x6  /* NVM read opcode */
++#define NVM_WRITE_OPCODE_MICROWIRE 0x5  /* NVM write opcode */
++#define NVM_ERASE_OPCODE_MICROWIRE 0x7  /* NVM erase opcode */
++#define NVM_EWEN_OPCODE_MICROWIRE  0x13 /* NVM erase/write enable */
++#define NVM_EWDS_OPCODE_MICROWIRE  0x10 /* NVM erast/write disable */
++
++/* NVM Commands - SPI */
++#define NVM_MAX_RETRY_SPI          5000 /* Max wait of 5ms, for RDY signal */
++#define NVM_READ_OPCODE_SPI        0x03 /* NVM read opcode */
++#define NVM_WRITE_OPCODE_SPI       0x02 /* NVM write opcode */
++#define NVM_A8_OPCODE_SPI          0x08 /* opcode bit-3 = address bit-8 */
++#define NVM_WREN_OPCODE_SPI        0x06 /* NVM set Write Enable latch */
++#define NVM_WRDI_OPCODE_SPI        0x04 /* NVM reset Write Enable latch */
++#define NVM_RDSR_OPCODE_SPI        0x05 /* NVM read Status register */
++#define NVM_WRSR_OPCODE_SPI        0x01 /* NVM write Status register */
++
++/* SPI NVM Status Register */
++#define NVM_STATUS_RDY_SPI         0x01
++#define NVM_STATUS_WEN_SPI         0x02
++#define NVM_STATUS_BP0_SPI         0x04
++#define NVM_STATUS_BP1_SPI         0x08
++#define NVM_STATUS_WPEN_SPI        0x80
++
++/* Word definitions for ID LED Settings */
++#define ID_LED_RESERVED_0000 0x0000
++#define ID_LED_RESERVED_FFFF 0xFFFF
++#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2  << 12) | \
++                              (ID_LED_OFF1_OFF2 <<  8) | \
++                              (ID_LED_DEF1_DEF2 <<  4) | \
++                              (ID_LED_DEF1_DEF2))
++#define ID_LED_DEF1_DEF2     0x1
++#define ID_LED_DEF1_ON2      0x2
++#define ID_LED_DEF1_OFF2     0x3
++#define ID_LED_ON1_DEF2      0x4
++#define ID_LED_ON1_ON2       0x5
++#define ID_LED_ON1_OFF2      0x6
++#define ID_LED_OFF1_DEF2     0x7
++#define ID_LED_OFF1_ON2      0x8
++#define ID_LED_OFF1_OFF2     0x9
++
++#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
++#define IGP_ACTIVITY_LED_ENABLE 0x0300
++#define IGP_LED3_MODE           0x07000000
++
++/* PCI/PCI-X/PCI-EX Config space */
++#define PCIX_COMMAND_REGISTER        0xE6
++#define PCIX_STATUS_REGISTER_LO      0xE8
++#define PCIX_STATUS_REGISTER_HI      0xEA
++#define PCI_HEADER_TYPE_REGISTER     0x0E
++#define PCIE_LINK_STATUS             0x12
++
++#define PCIX_COMMAND_MMRBC_MASK      0x000C
++#define PCIX_COMMAND_MMRBC_SHIFT     0x2
++#define PCIX_STATUS_HI_MMRBC_MASK    0x0060
++#define PCIX_STATUS_HI_MMRBC_SHIFT   0x5
++#define PCIX_STATUS_HI_MMRBC_4K      0x3
++#define PCIX_STATUS_HI_MMRBC_2K      0x2
++#define PCIX_STATUS_LO_FUNC_MASK     0x7
++#define PCI_HEADER_TYPE_MULTIFUNC    0x80
++#define PCIE_LINK_WIDTH_MASK         0x3F0
++#define PCIE_LINK_WIDTH_SHIFT        4
++
++#ifndef ETH_ADDR_LEN
++#define ETH_ADDR_LEN                 6
++#endif
++
++#define PHY_REVISION_MASK      0xFFFFFFF0
++#define MAX_PHY_REG_ADDRESS    0x1F  /* 5 bit address bus (0-0x1F) */
++#define MAX_PHY_MULTI_PAGE_REG 0xF
++
++/* Bit definitions for valid PHY IDs. */
++/*
++ * I = Integrated
++ * E = External
++ */
++#define M88E1000_E_PHY_ID    0x01410C50
++#define M88E1000_I_PHY_ID    0x01410C30
++#define M88E1011_I_PHY_ID    0x01410C20
++#define IGP01E1000_I_PHY_ID  0x02A80380
++#define M88E1011_I_REV_4     0x04
++#define M88E1111_I_PHY_ID    0x01410CC0
++#define GG82563_E_PHY_ID     0x01410CA0
++#define IGP03E1000_E_PHY_ID  0x02A80390
++#define IFE_E_PHY_ID         0x02A80330
++#define IFE_PLUS_E_PHY_ID    0x02A80320
++#define IFE_C_E_PHY_ID       0x02A80310
++#define M88_VENDOR           0x0141
++
++/* M88E1000 Specific Registers */
++#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
++#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
++#define M88E1000_INT_ENABLE        0x12  /* Interrupt Enable Register */
++#define M88E1000_INT_STATUS        0x13  /* Interrupt Status Register */
++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
++#define M88E1000_RX_ERR_CNTR       0x15  /* Receive Error Counter */
++
++#define M88E1000_PHY_EXT_CTRL      0x1A  /* PHY extend control register */
++#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
++#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
++#define M88E1000_PHY_VCO_REG_BIT8  0x100 /* Bits 8 & 11 are adjusted for */
++#define M88E1000_PHY_VCO_REG_BIT11 0x800    /* improved BER performance */
++
++/* M88E1000 PHY Specific Control Register */
++#define M88E1000_PSCR_JABBER_DISABLE    0x0001 /* 1=Jabber Function disabled */
++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
++#define M88E1000_PSCR_SQE_TEST          0x0004 /* 1=SQE Test enabled */
++/* 1=CLK125 low, 0=CLK125 toggling */
++#define M88E1000_PSCR_CLK125_DISABLE    0x0010
++#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
++                                               /* Manual MDI configuration */
++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
++#define M88E1000_PSCR_AUTO_X_1000T     0x0040
++/* Auto crossover enabled all speeds */
++#define M88E1000_PSCR_AUTO_X_MODE      0x0060
++/*
++ * 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold
++ * 0=Normal 10BASE-T Rx Threshold
++ */
++#define M88E1000_PSCR_EN_10BT_EXT_DIST 0x0080
++/* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */
++#define M88E1000_PSCR_MII_5BIT_ENABLE      0x0100
++#define M88E1000_PSCR_SCRAMBLER_DISABLE    0x0200 /* 1=Scrambler disable */
++#define M88E1000_PSCR_FORCE_LINK_GOOD      0x0400 /* 1=Force link good */
++#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800 /* 1=Assert CRS on Transmit */
++
++/* M88E1000 PHY Specific Status Register */
++#define M88E1000_PSSR_JABBER             0x0001 /* 1=Jabber */
++#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
++#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
++#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
++/*
++ * 0 = <50M
++ * 1 = 50-80M
++ * 2 = 80-110M
++ * 3 = 110-140M
++ * 4 = >140M
++ */
++#define M88E1000_PSSR_CABLE_LENGTH       0x0380
++#define M88E1000_PSSR_LINK               0x0400 /* 1=Link up, 0=Link down */
++#define M88E1000_PSSR_SPD_DPLX_RESOLVED  0x0800 /* 1=Speed & Duplex resolved */
++#define M88E1000_PSSR_PAGE_RCVD          0x1000 /* 1=Page received */
++#define M88E1000_PSSR_DPLX               0x2000 /* 1=Duplex 0=Half Duplex */
++#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
++#define M88E1000_PSSR_10MBS              0x0000 /* 00=10Mbs */
++#define M88E1000_PSSR_100MBS             0x4000 /* 01=100Mbs */
++#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
++
++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
++
++/* M88E1000 Extended PHY Specific Control Register */
++#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */
++/*
++ * 1 = Lost lock detect enabled.
++ * Will assert lost lock and bring
++ * link down if idle not seen
++ * within 1ms in 1000BASE-T
++ */
++#define M88E1000_EPSCR_DOWN_NO_IDLE   0x8000
++/*
++ * Number of times we will attempt to autonegotiate before downshifting if we
++ * are the master
++ */
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X   0x0400
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X   0x0800
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X   0x0C00
++/*
++ * Number of times we will attempt to autonegotiate before downshifting if we
++ * are the slave
++ */
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS   0x0000
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X    0x0200
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X    0x0300
++#define M88E1000_EPSCR_TX_CLK_2_5     0x0060 /* 2.5 MHz TX_CLK */
++#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
++#define M88E1000_EPSCR_TX_CLK_0       0x0000 /* NO  TX_CLK */
++
++/* M88EC018 Rev 2 specific DownShift settings */
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X    0x0000
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_2X    0x0200
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_3X    0x0400
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_4X    0x0600
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_6X    0x0A00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_7X    0x0C00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_8X    0x0E00
++
++/*
++ * Bits...
++ * 15-5: page
++ * 4-0: register offset
++ */
++#define GG82563_PAGE_SHIFT        5
++#define GG82563_REG(page, reg)    \
++        (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
++#define GG82563_MIN_ALT_REG       30
++
++/* GG82563 Specific Registers */
++#define GG82563_PHY_SPEC_CTRL           \
++        GG82563_REG(0, 16) /* PHY Specific Control */
++#define GG82563_PHY_SPEC_STATUS         \
++        GG82563_REG(0, 17) /* PHY Specific Status */
++#define GG82563_PHY_INT_ENABLE          \
++        GG82563_REG(0, 18) /* Interrupt Enable */
++#define GG82563_PHY_SPEC_STATUS_2       \
++        GG82563_REG(0, 19) /* PHY Specific Status 2 */
++#define GG82563_PHY_RX_ERR_CNTR         \
++        GG82563_REG(0, 21) /* Receive Error Counter */
++#define GG82563_PHY_PAGE_SELECT         \
++        GG82563_REG(0, 22) /* Page Select */
++#define GG82563_PHY_SPEC_CTRL_2         \
++        GG82563_REG(0, 26) /* PHY Specific Control 2 */
++#define GG82563_PHY_PAGE_SELECT_ALT     \
++        GG82563_REG(0, 29) /* Alternate Page Select */
++#define GG82563_PHY_TEST_CLK_CTRL       \
++        GG82563_REG(0, 30) /* Test Clock Control (use reg. 29 to select) */
++
++#define GG82563_PHY_MAC_SPEC_CTRL       \
++        GG82563_REG(2, 21) /* MAC Specific Control Register */
++#define GG82563_PHY_MAC_SPEC_CTRL_2     \
++        GG82563_REG(2, 26) /* MAC Specific Control 2 */
++
++#define GG82563_PHY_DSP_DISTANCE    \
++        GG82563_REG(5, 26) /* DSP Distance */
++
++/* Page 193 - Port Control Registers */
++#define GG82563_PHY_KMRN_MODE_CTRL   \
++        GG82563_REG(193, 16) /* Kumeran Mode Control */
++#define GG82563_PHY_PORT_RESET          \
++        GG82563_REG(193, 17) /* Port Reset */
++#define GG82563_PHY_REVISION_ID         \
++        GG82563_REG(193, 18) /* Revision ID */
++#define GG82563_PHY_DEVICE_ID           \
++        GG82563_REG(193, 19) /* Device ID */
++#define GG82563_PHY_PWR_MGMT_CTRL       \
++        GG82563_REG(193, 20) /* Power Management Control */
++#define GG82563_PHY_RATE_ADAPT_CTRL     \
++        GG82563_REG(193, 25) /* Rate Adaptation Control */
++
++/* Page 194 - KMRN Registers */
++#define GG82563_PHY_KMRN_FIFO_CTRL_STAT \
++        GG82563_REG(194, 16) /* FIFO's Control/Status */
++#define GG82563_PHY_KMRN_CTRL           \
++        GG82563_REG(194, 17) /* Control */
++#define GG82563_PHY_INBAND_CTRL         \
++        GG82563_REG(194, 18) /* Inband Control */
++#define GG82563_PHY_KMRN_DIAGNOSTIC     \
++        GG82563_REG(194, 19) /* Diagnostic */
++#define GG82563_PHY_ACK_TIMEOUTS        \
++        GG82563_REG(194, 20) /* Acknowledge Timeouts */
++#define GG82563_PHY_ADV_ABILITY         \
++        GG82563_REG(194, 21) /* Advertised Ability */
++#define GG82563_PHY_LINK_PARTNER_ADV_ABILITY \
++        GG82563_REG(194, 23) /* Link Partner Advertised Ability */
++#define GG82563_PHY_ADV_NEXT_PAGE       \
++        GG82563_REG(194, 24) /* Advertised Next Page */
++#define GG82563_PHY_LINK_PARTNER_ADV_NEXT_PAGE \
++        GG82563_REG(194, 25) /* Link Partner Advertised Next page */
++#define GG82563_PHY_KMRN_MISC           \
++        GG82563_REG(194, 26) /* Misc. */
++
++/* MDI Control */
++#define E1000_MDIC_DATA_MASK 0x0000FFFF
++#define E1000_MDIC_REG_MASK  0x001F0000
++#define E1000_MDIC_REG_SHIFT 16
++#define E1000_MDIC_PHY_MASK  0x03E00000
++#define E1000_MDIC_PHY_SHIFT 21
++#define E1000_MDIC_OP_WRITE  0x04000000
++#define E1000_MDIC_OP_READ   0x08000000
++#define E1000_MDIC_READY     0x10000000
++#define E1000_MDIC_INT_EN    0x20000000
++#define E1000_MDIC_ERROR     0x40000000
++
++/* SerDes Control */
++#define E1000_GEN_CTL_READY             0x80000000
++#define E1000_GEN_CTL_ADDRESS_SHIFT     8
++#define E1000_GEN_POLL_TIMEOUT          640
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82542.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82542.c	2021-04-07 16:01:27.643633572 +0800
+@@ -0,0 +1,543 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_82542 (rev 1 & 2)
++ */
++
++#include "e1000_api.h"
++
++static s32  e1000_init_phy_params_82542(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_82542(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_82542(struct e1000_hw *hw);
++static s32  e1000_get_bus_info_82542(struct e1000_hw *hw);
++static s32  e1000_reset_hw_82542(struct e1000_hw *hw);
++static s32  e1000_init_hw_82542(struct e1000_hw *hw);
++static s32  e1000_setup_link_82542(struct e1000_hw *hw);
++static s32  e1000_led_on_82542(struct e1000_hw *hw);
++static s32  e1000_led_off_82542(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw);
++
++struct e1000_dev_spec_82542 {
++	bool dma_fairness;
++};
++
++/**
++ *  e1000_init_phy_params_82542 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_82542(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_phy_params_82542");
++
++	phy->type               = e1000_phy_none;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_82542 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_82542(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++
++	DEBUGFUNC("e1000_init_nvm_params_82542");
++
++	nvm->address_bits       =  6;
++	nvm->delay_usec         = 50;
++	nvm->opcode_bits        =  3;
++	nvm->type               = e1000_nvm_eeprom_microwire;
++	nvm->word_size          = 64;
++
++	/* Function Pointers */
++	func->read_nvm          = e1000_read_nvm_microwire;
++	func->release_nvm       = e1000_stop_nvm;
++	func->write_nvm         = e1000_write_nvm_microwire;
++	func->update_nvm        = e1000_update_nvm_checksum_generic;
++	func->validate_nvm      = e1000_validate_nvm_checksum_generic;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_init_mac_params_82542 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_82542(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_mac_params_82542");
++
++	/* Set media type */
++	hw->phy.media_type = e1000_media_type_fiber;
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_82542;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_82542;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_82542;
++	/* link setup */
++	func->setup_link = e1000_setup_link_82542;
++	/* phy/fiber/serdes setup */
++	func->setup_physical_interface = e1000_setup_fiber_serdes_link_generic;
++	/* check for link */
++	func->check_for_link = e1000_check_for_fiber_link_generic;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_generic;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_82542;
++	func->led_off = e1000_led_off_82542;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_82542;
++	/* link info */
++	func->get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic;
++
++	hw->dev_spec_size = sizeof(struct e1000_dev_spec_82542);
++
++	/* Device-specific structure allocation */
++	ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_82542 - Init func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  The only function explicitly called by the api module to initialize
++ *  all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_82542(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_82542");
++
++	hw->func.init_mac_params = e1000_init_mac_params_82542;
++	hw->func.init_nvm_params = e1000_init_nvm_params_82542;
++	hw->func.init_phy_params = e1000_init_phy_params_82542;
++}
++
++/**
++ *  e1000_get_bus_info_82542 - Obtain bus information for adapter
++ *  @hw: pointer to the HW structure
++ *
++ *  This will obtain information about the HW bus for which the
++ *  adaper is attached and stores it in the hw structure.  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_get_bus_info_82542(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_get_bus_info_82542");
++
++	hw->bus.type = e1000_bus_type_pci;
++	hw->bus.speed = e1000_bus_speed_unknown;
++	hw->bus.width = e1000_bus_width_unknown;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_reset_hw_82542 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_82542(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	s32 ret_val = E1000_SUCCESS;
++	u32 ctrl, icr;
++
++	DEBUGFUNC("e1000_reset_hw_82542");
++
++	if (hw->revision_id == E1000_REVISION_2) {
++		DEBUGOUT("Disabling MWI on 82542 rev 2\n");
++		e1000_pci_clear_mwi(hw);
++	}
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	/*
++	 * Delay to allow any outstanding PCI transactions to complete before
++	 * resetting the device
++	 */
++	msec_delay(10);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGOUT("Issuing a global reset to 82542/82543 MAC\n");
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++
++	e1000_reload_nvm(hw);
++	msec_delay(2);
++
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	if (hw->revision_id == E1000_REVISION_2) {
++		if (bus->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
++			e1000_pci_set_mwi(hw);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_82542 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_hw_82542(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_dev_spec_82542 *dev_spec;
++	s32 ret_val = E1000_SUCCESS;
++	u32 ctrl;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_hw_82542");
++
++	dev_spec = (struct e1000_dev_spec_82542 *)hw->dev_spec;
++
++	/* Disabling VLAN filtering */
++	E1000_WRITE_REG(hw, E1000_VET, 0);
++	e1000_clear_vfta(hw);
++
++	/* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
++	if (hw->revision_id == E1000_REVISION_2) {
++		DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
++		e1000_pci_clear_mwi(hw);
++		E1000_WRITE_REG(hw, E1000_RCTL, E1000_RCTL_RST);
++		E1000_WRITE_FLUSH(hw);
++		msec_delay(5);
++	}
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */
++	if (hw->revision_id == E1000_REVISION_2) {
++		E1000_WRITE_REG(hw, E1000_RCTL, 0);
++		E1000_WRITE_FLUSH(hw);
++		msec_delay(1);
++		if (hw->bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
++			e1000_pci_set_mwi(hw);
++	}
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/*
++	 * Set the PCI priority bit correctly in the CTRL register.  This
++	 * determines if the adapter gives priority to receives, or if it
++	 * gives equal priority to transmits and receives.
++	 */
++	if (dev_spec->dma_fairness) {
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR);
++	}
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link_82542(hw);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82542(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_link_82542 - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_setup_link_82542(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_link_82542");
++
++	ret_val = e1000_set_default_fc_generic(hw);
++	if (ret_val)
++		goto out;
++
++	hw->fc.type &= ~e1000_fc_tx_pause;
++
++	if (mac->report_tx_early == 1)
++		hw->fc.type &= ~e1000_fc_rx_pause;
++
++	/*
++	 * We want to save off the original Flow Control configuration just in
++	 * case we get disconnected and then reconnected into a different hub
++	 * or switch with different Flow Control capabilities.
++	 */
++	hw->fc.original_type = hw->fc.type;
++
++	DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.type);
++
++	/* Call the necessary subroutine to configure the link. */
++	ret_val = func->setup_physical_interface(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Initialize the flow control address, type, and PAUSE timer
++	 * registers to their default values.  This is done even if flow
++	 * control is disabled, because it does not hurt anything to
++	 * initialize these registers.
++	 */
++	DEBUGOUT("Initializing Flow Control address, type and timer regs\n");
++
++	E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
++	E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
++	E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE);
++
++	E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time);
++
++	ret_val = e1000_set_fc_watermarks_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_led_on_82542 - Turn on SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED on.  This is a function pointer entry point
++ *  called by the api module.
++ **/
++static s32 e1000_led_on_82542(struct e1000_hw *hw)
++{
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGFUNC("e1000_led_on_82542");
++
++	ctrl |= E1000_CTRL_SWDPIN0;
++	ctrl |= E1000_CTRL_SWDPIO0;
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_off_82542 - Turn off SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED off.  This is a function pointer entry point
++ *  called by the api module.
++ **/
++static s32 e1000_led_off_82542(struct e1000_hw *hw)
++{
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGFUNC("e1000_led_off_82542");
++
++	ctrl &= ~E1000_CTRL_SWDPIN0;
++	ctrl |= E1000_CTRL_SWDPIO0;
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_translate_register_82542 - Translate the proper regiser offset
++ *  @reg: e1000 register to be read
++ *
++ *  Registers in 82542 are located in different offsets than other adapters
++ *  even though they function in the same manner.  This function takes in
++ *  the name of the register to read and returns the correct offset for
++ *  82542 silicon.
++ **/
++u32 e1000_translate_register_82542(u32 reg)
++{
++	/*
++	 * Some of the 82542 registers are located at different
++	 * offsets than they are in newer adapters.
++	 * Despite the difference in location, the registers
++	 * function in the same manner.
++	 */
++	switch (reg) {
++	case E1000_RA:
++		reg = 0x00040;
++		break;
++	case E1000_RDTR:
++		reg = 0x00108;
++		break;
++	case E1000_RDBAL(0):
++		reg = 0x00110;
++		break;
++	case E1000_RDBAH(0):
++		reg = 0x00114;
++		break;
++	case E1000_RDLEN(0):
++		reg = 0x00118;
++		break;
++	case E1000_RDH(0):
++		reg = 0x00120;
++		break;
++	case E1000_RDT(0):
++		reg = 0x00128;
++		break;
++	case E1000_RDBAL(1):
++		reg = 0x00138;
++		break;
++	case E1000_RDBAH(1):
++		reg = 0x0013C;
++		break;
++	case E1000_RDLEN(1):
++		reg = 0x00140;
++		break;
++	case E1000_RDH(1):
++		reg = 0x00148;
++		break;
++	case E1000_RDT(1):
++		reg = 0x00150;
++		break;
++	case E1000_FCRTH:
++		reg = 0x00160;
++		break;
++	case E1000_FCRTL:
++		reg = 0x00168;
++		break;
++	case E1000_MTA:
++		reg = 0x00200;
++		break;
++	case E1000_TDBAL(0):
++		reg = 0x00420;
++		break;
++	case E1000_TDBAH(0):
++		reg = 0x00424;
++		break;
++	case E1000_TDLEN(0):
++		reg = 0x00428;
++		break;
++	case E1000_TDH(0):
++		reg = 0x00430;
++		break;
++	case E1000_TDT(0):
++		reg = 0x00438;
++		break;
++	case E1000_TIDV:
++		reg = 0x00440;
++		break;
++	case E1000_VFTA:
++		reg = 0x00600;
++		break;
++	case E1000_TDFH:
++		reg = 0x08010;
++		break;
++	case E1000_TDFT:
++		reg = 0x08018;
++		break;
++	default:
++		break;
++	}
++
++	return reg;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82542 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82542(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_82542");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82543.c	2021-04-07 16:01:27.638633579 +0800
+@@ -0,0 +1,1654 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_82543
++ * e1000_82544
++ */
++
++#include "e1000_api.h"
++#include "e1000_82543.h"
++
++static s32  e1000_init_phy_params_82543(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_82543(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_82543(struct e1000_hw *hw);
++static s32  e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset,
++                                     u16 *data);
++static s32  e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset,
++                                      u16 data);
++static s32  e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw);
++static s32  e1000_phy_hw_reset_82543(struct e1000_hw *hw);
++static s32  e1000_reset_hw_82543(struct e1000_hw *hw);
++static s32  e1000_init_hw_82543(struct e1000_hw *hw);
++static s32  e1000_setup_link_82543(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_82543(struct e1000_hw *hw);
++static s32  e1000_setup_fiber_link_82543(struct e1000_hw *hw);
++static s32  e1000_check_for_copper_link_82543(struct e1000_hw *hw);
++static s32  e1000_check_for_fiber_link_82543(struct e1000_hw *hw);
++static s32  e1000_led_on_82543(struct e1000_hw *hw);
++static s32  e1000_led_off_82543(struct e1000_hw *hw);
++static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset,
++                                   u32 value);
++static void e1000_mta_set_82543(struct e1000_hw *hw, u32 hash_value);
++static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw);
++static s32  e1000_config_mac_to_phy_82543(struct e1000_hw *hw);
++static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw);
++static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl);
++static s32  e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw);
++static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl);
++static u16  e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw);
++static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
++                                           u16 count);
++static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw);
++static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state);
++
++struct e1000_dev_spec_82543 {
++	u32  tbi_compatibility;
++	bool dma_fairness;
++	bool init_phy_disabled;
++};
++
++/**
++ *  e1000_init_phy_params_82543 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_82543(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_phy_params_82543");
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type               = e1000_phy_none;
++		goto out;
++	} else {
++		func->power_up_phy      = e1000_power_up_phy_copper;
++		func->power_down_phy    = e1000_power_down_phy_copper;
++	}
++
++	phy->addr                       = 1;
++	phy->autoneg_mask               = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us             = 10000;
++	phy->type                       = e1000_phy_m88;
++
++	/* Function Pointers */
++	func->check_polarity            = e1000_check_polarity_m88;
++	func->commit_phy                = e1000_phy_sw_reset_generic;
++	func->force_speed_duplex        = e1000_phy_force_speed_duplex_82543;
++	func->get_cable_length          = e1000_get_cable_length_m88;
++	func->get_cfg_done              = e1000_get_cfg_done_generic;
++	func->read_phy_reg              = (hw->mac.type == e1000_82543)
++	                                  ? e1000_read_phy_reg_82543
++	                                  : e1000_read_phy_reg_m88;
++	func->reset_phy                 = (hw->mac.type == e1000_82543)
++	                                  ? e1000_phy_hw_reset_82543
++	                                  : e1000_phy_hw_reset_generic;
++	func->write_phy_reg             = (hw->mac.type == e1000_82543)
++	                                  ? e1000_write_phy_reg_82543
++	                                  : e1000_write_phy_reg_m88;
++	func->get_phy_info              = e1000_get_phy_info_m88;
++
++	/*
++	 * The external PHY of the 82543 can be in a funky state.
++	 * Resetting helps us read the PHY registers for acquiring
++	 * the PHY ID.
++	 */
++	if (!e1000_init_phy_disabled_82543(hw)) {
++		ret_val = e1000_phy_hw_reset(hw);
++		if (ret_val) {
++			DEBUGOUT("Resetting PHY during init failed.\n");
++			goto out;
++		}
++		msec_delay(20);
++	}
++
++	ret_val = e1000_get_phy_id(hw);
++	if (ret_val)
++		goto out;
++
++	/* Verify phy id */
++	switch (hw->mac.type) {
++	case e1000_82543:
++		if (phy->id != M88E1000_E_PHY_ID) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++		break;
++	case e1000_82544:
++		if (phy->id != M88E1000_I_PHY_ID) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++		break;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_82543 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_82543(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++
++	DEBUGFUNC("e1000_init_nvm_params_82543");
++
++	nvm->type               = e1000_nvm_eeprom_microwire;
++	nvm->word_size          = 64;
++	nvm->delay_usec         = 50;
++	nvm->address_bits       =  6;
++	nvm->opcode_bits        =  3;
++
++	/* Function Pointers */
++	func->read_nvm          = e1000_read_nvm_microwire;
++	func->update_nvm        = e1000_update_nvm_checksum_generic;
++	func->valid_led_default = e1000_valid_led_default_generic;
++	func->validate_nvm      = e1000_validate_nvm_checksum_generic;
++	func->write_nvm         = e1000_write_nvm_microwire;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_init_mac_params_82543 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_82543(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_init_mac_params_82543");
++
++	/* Set media type */
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82543GC_FIBER:
++	case E1000_DEV_ID_82544EI_FIBER:
++		hw->phy.media_type = e1000_media_type_fiber;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_pci_generic;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_82543;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_82543;
++	/* link setup */
++	func->setup_link = e1000_setup_link_82543;
++	/* physical interface setup */
++	func->setup_physical_interface =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_setup_copper_link_82543
++	                : e1000_setup_fiber_link_82543;
++	/* check for link */
++	func->check_for_link =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_check_for_copper_link_82543
++	                : e1000_check_for_fiber_link_82543;
++	/* link info */
++	func->get_link_up_info =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_get_speed_and_duplex_copper_generic
++	                : e1000_get_speed_and_duplex_fiber_serdes_generic;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_82543;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_82543;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_82543;
++	func->led_off = e1000_led_off_82543;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_82543;
++
++	hw->dev_spec_size = sizeof(struct e1000_dev_spec_82543);
++
++	/* Device-specific structure allocation */
++	ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size);
++	if (ret_val)
++		goto out;
++
++	/* Set tbi compatibility */
++	if ((hw->mac.type != e1000_82543) ||
++	    (hw->phy.media_type == e1000_media_type_fiber))
++		e1000_set_tbi_compatibility_82543(hw, FALSE);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_82543 - Init func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  The only function explicitly called by the api module to initialize
++ *  all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_82543(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_82543");
++
++	hw->func.init_mac_params = e1000_init_mac_params_82543;
++	hw->func.init_nvm_params = e1000_init_nvm_params_82543;
++	hw->func.init_phy_params = e1000_init_phy_params_82543;
++}
++
++/**
++ *  e1000_tbi_compatibility_enabled_82543 - Returns TBI compat status
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns the curent status of 10-bit Interface (TBI) compatibility
++ *  (enabled/disabled).
++ **/
++static bool e1000_tbi_compatibility_enabled_82543(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82543 *dev_spec;
++	bool state = FALSE;
++
++	DEBUGFUNC("e1000_tbi_compatibility_enabled_82543");
++
++	if (hw->mac.type != e1000_82543) {
++		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		goto out;
++	}
++
++	state = (dev_spec->tbi_compatibility & TBI_COMPAT_ENABLED)
++	        ? TRUE : FALSE;
++
++out:
++	return state;
++}
++
++/**
++ *  e1000_set_tbi_compatibility_82543 - Set TBI compatibility
++ *  @hw: pointer to the HW structure
++ *  @state: enable/disable TBI compatibility
++ *
++ *  Enables or disabled 10-bit Interface (TBI) compatibility.
++ **/
++void e1000_set_tbi_compatibility_82543(struct e1000_hw *hw, bool state)
++{
++	struct e1000_dev_spec_82543 *dev_spec;
++
++	DEBUGFUNC("e1000_set_tbi_compatibility_82543");
++
++	if (hw->mac.type != e1000_82543) {
++		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		goto out;
++	}
++
++	if (state)
++		dev_spec->tbi_compatibility |= TBI_COMPAT_ENABLED;
++	else
++		dev_spec->tbi_compatibility &= ~TBI_COMPAT_ENABLED;
++
++out:
++	return;
++}
++
++/**
++ *  e1000_tbi_sbp_enabled_82543 - Returns TBI SBP status
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns the curent status of 10-bit Interface (TBI) store bad packet (SBP)
++ *  (enabled/disabled).
++ **/
++bool e1000_tbi_sbp_enabled_82543(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82543 *dev_spec;
++	bool state = FALSE;
++
++	DEBUGFUNC("e1000_tbi_sbp_enabled_82543");
++
++	if (hw->mac.type != e1000_82543) {
++		DEBUGOUT("TBI compatibility workaround for 82543 only.\n");
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		goto out;
++	}
++
++	state = (dev_spec->tbi_compatibility & TBI_SBP_ENABLED)
++	        ? TRUE : FALSE;
++
++out:
++	return state;
++}
++
++/**
++ *  e1000_set_tbi_sbp_82543 - Set TBI SBP
++ *  @hw: pointer to the HW structure
++ *  @state: enable/disable TBI store bad packet
++ *
++ *  Enables or disabled 10-bit Interface (TBI) store bad packet (SBP).
++ **/
++static void e1000_set_tbi_sbp_82543(struct e1000_hw *hw, bool state)
++{
++	struct e1000_dev_spec_82543 *dev_spec;
++
++	DEBUGFUNC("e1000_set_tbi_sbp_82543");
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (state && e1000_tbi_compatibility_enabled_82543(hw))
++		dev_spec->tbi_compatibility |= TBI_SBP_ENABLED;
++	else
++		dev_spec->tbi_compatibility &= ~TBI_SBP_ENABLED;
++
++	return;
++}
++
++/**
++ *  e1000_init_phy_disabled_82543 - Returns init PHY status
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns the current status of whether PHY initialization is disabled.
++ *  True if PHY initialization is disabled else false.
++ **/
++static bool e1000_init_phy_disabled_82543(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82543 *dev_spec;
++	bool ret_val;
++
++	DEBUGFUNC("e1000_init_phy_disabled_82543");
++
++	if (hw->mac.type != e1000_82543) {
++		ret_val = FALSE;
++		goto out;
++	}
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = FALSE;
++		goto out;
++	}
++
++	ret_val = dev_spec->init_phy_disabled;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_tbi_adjust_stats_82543 - Adjust stats when TBI enabled
++ *  @hw: pointer to the HW structure
++ *  @stats: Struct containing statistic register values
++ *  @frame_len: The length of the frame in question
++ *  @mac_addr: The Ethernet destination address of the frame in question
++ *  @max_frame_size: The maximum frame size
++ *
++ *  Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
++ **/
++void e1000_tbi_adjust_stats_82543(struct e1000_hw *hw,
++                                  struct e1000_hw_stats *stats, u32 frame_len,
++                                  u8 *mac_addr, u32 max_frame_size)
++{
++	if (!(e1000_tbi_sbp_enabled_82543(hw)))
++		goto out;
++
++	/* First adjust the frame length. */
++	frame_len--;
++	/*
++	 * We need to adjust the statistics counters, since the hardware
++	 * counters overcount this packet as a CRC error and undercount
++	 * the packet as a good packet
++	 */
++	/* This packet should not be counted as a CRC error.    */
++	stats->crcerrs--;
++	/* This packet does count as a Good Packet Received.    */
++	stats->gprc++;
++
++	/* Adjust the Good Octets received counters             */
++	stats->gorc += frame_len;
++
++	/*
++	 * Is this a broadcast or multicast?  Check broadcast first,
++	 * since the test for a multicast frame will test positive on
++	 * a broadcast frame.
++	 */
++	if ((mac_addr[0] == 0xff) && (mac_addr[1] == 0xff))
++		/* Broadcast packet */
++		stats->bprc++;
++	else if (*mac_addr & 0x01)
++		/* Multicast packet */
++		stats->mprc++;
++
++	/*
++	 * In this case, the hardware has overcounted the number of
++	 * oversize frames.
++	 */
++	if ((frame_len == max_frame_size) && (stats->roc > 0))
++		stats->roc--;
++
++	/*
++	 * Adjust the bin counters when the extra byte put the frame in the
++	 * wrong bin. Remember that the frame_len was adjusted above.
++	 */
++	if (frame_len == 64) {
++		stats->prc64++;
++		stats->prc127--;
++	} else if (frame_len == 127) {
++		stats->prc127++;
++		stats->prc255--;
++	} else if (frame_len == 255) {
++		stats->prc255++;
++		stats->prc511--;
++	} else if (frame_len == 511) {
++		stats->prc511++;
++		stats->prc1023--;
++	} else if (frame_len == 1023) {
++		stats->prc1023++;
++		stats->prc1522--;
++	} else if (frame_len == 1522) {
++		stats->prc1522++;
++	}
++
++out:
++	return;
++}
++
++/**
++ *  e1000_read_phy_reg_82543 - Read PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY at offset and stores the information read to data.
++ **/
++static s32 e1000_read_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	u32 mdic;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_read_phy_reg_82543");
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		DEBUGOUT1("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/*
++	 * We must first send a preamble through the MDIO pin to signal the
++	 * beginning of an MII instruction.  This is done by sending 32
++	 * consecutive "1" bits.
++	 */
++	e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
++
++	/*
++	 * Now combine the next few fields that are required for a read
++	 * operation.  We use this method instead of calling the
++	 * e1000_shift_out_mdi_bits routine five different times.  The format
++	 * of an MII read instruction consists of a shift out of 14 bits and
++	 * is defined as follows:
++	 * 	<Preamble><SOF><Op Code><Phy Addr><Offset>
++	 * followed by a shift in of 18 bits.  This first two bits shifted in
++	 * are TurnAround bits used to avoid contention on the MDIO pin when a
++	 * READ operation is performed.  These two bits are thrown away
++	 * followed by a shift in of 16 bits which contains the desired data.
++	 */
++	mdic = (offset | (hw->phy.addr << 5) |
++		(PHY_OP_READ << 10) | (PHY_SOF << 12));
++
++	e1000_shift_out_mdi_bits_82543(hw, mdic, 14);
++
++	/*
++	 * Now that we've shifted out the read command to the MII, we need to
++	 * "shift in" the 16-bit value (18 total bits) of the requested PHY
++	 * register address.
++	 */
++	*data = e1000_shift_in_mdi_bits_82543(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_82543 - Write PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be written
++ *  @data: pointer to the data to be written at offset
++ *
++ *  Writes data to the PHY at offset.
++ **/
++static s32 e1000_write_phy_reg_82543(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	u32 mdic;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_write_phy_reg_82543");
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		DEBUGOUT1("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/*
++	 * We'll need to use the SW defined pins to shift the write command
++	 * out to the PHY. We first send a preamble to the PHY to signal the
++	 * beginning of the MII instruction.  This is done by sending 32
++	 * consecutive "1" bits.
++	 */
++	e1000_shift_out_mdi_bits_82543(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
++
++	/*
++	 * Now combine the remaining required fields that will indicate a
++	 * write operation. We use this method instead of calling the
++	 * e1000_shift_out_mdi_bits routine for each field in the command. The
++	 * format of a MII write instruction is as follows:
++	 * <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
++	 */
++	mdic = ((PHY_TURNAROUND) | (offset << 2) | (hw->phy.addr << 7) |
++	        (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
++	mdic <<= 16;
++	mdic |= (u32) data;
++
++	e1000_shift_out_mdi_bits_82543(hw, mdic, 32);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_raise_mdi_clk_82543 - Raise Management Data Input clock
++ *  @hw: pointer to the HW structure
++ *  @ctrl: pointer to the control register
++ *
++ *  Raise the management data input clock by setting the MDC bit in the control
++ *  register.
++ **/
++static void e1000_raise_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl)
++{
++	/*
++	 * Raise the clock input to the Management Data Clock (by setting the
++	 * MDC bit), and then delay a sufficient amount of time.
++	 */
++	E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl | E1000_CTRL_MDC));
++	E1000_WRITE_FLUSH(hw);
++	usec_delay(10);
++}
++
++/**
++ *  e1000_lower_mdi_clk_82543 - Lower Management Data Input clock
++ *  @hw: pointer to the HW structure
++ *  @ctrl: pointer to the control register
++ *
++ *  Lower the management data input clock by clearing the MDC bit in the
++ *  control register.
++ **/
++static void e1000_lower_mdi_clk_82543(struct e1000_hw *hw, u32 *ctrl)
++{
++	/*
++	 * Lower the clock input to the Management Data Clock (by clearing the
++	 * MDC bit), and then delay a sufficient amount of time.
++	 */
++	E1000_WRITE_REG(hw, E1000_CTRL, (*ctrl & ~E1000_CTRL_MDC));
++	E1000_WRITE_FLUSH(hw);
++	usec_delay(10);
++}
++
++/**
++ *  e1000_shift_out_mdi_bits_82543 - Shift data bits our to the PHY
++ *  @hw: pointer to the HW structure
++ *  @data: data to send to the PHY
++ *  @count: number of bits to shift out
++ *
++ *  We need to shift 'count' bits out to the PHY.  So, the value in the
++ *  "data" parameter will be shifted out to the PHY one bit at a time.
++ *  In order to do this, "data" must be broken down into bits.
++ **/
++static void e1000_shift_out_mdi_bits_82543(struct e1000_hw *hw, u32 data,
++                                           u16 count)
++{
++	u32 ctrl, mask;
++
++	/*
++	 * We need to shift "count" number of bits out to the PHY.  So, the
++	 * value in the "data" parameter will be shifted out to the PHY one
++	 * bit at a time.  In order to do this, "data" must be broken down
++	 * into bits.
++	 */
++	mask = 0x01;
++	mask <<= (count -1);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
++	ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
++
++	while (mask) {
++		/*
++		 * A "1" is shifted out to the PHY by setting the MDIO bit to
++		 * "1" and then raising and lowering the Management Data Clock.
++		 * A "0" is shifted out to the PHY by setting the MDIO bit to
++		 * "0" and then raising and lowering the clock.
++		 */
++		if (data & mask) ctrl |= E1000_CTRL_MDIO;
++		else ctrl &= ~E1000_CTRL_MDIO;
++
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++		E1000_WRITE_FLUSH(hw);
++
++		usec_delay(10);
++
++		e1000_raise_mdi_clk_82543(hw, &ctrl);
++		e1000_lower_mdi_clk_82543(hw, &ctrl);
++
++		mask >>= 1;
++	}
++}
++
++/**
++ *  e1000_shift_in_mdi_bits_82543 - Shift data bits in from the PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  In order to read a register from the PHY, we need to shift 18 bits
++ *  in from the PHY.  Bits are "shifted in" by raising the clock input to
++ *  the PHY (setting the MDC bit), and then reading the value of the data out
++ *  MDIO bit.
++ **/
++static u16 e1000_shift_in_mdi_bits_82543(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	u16 data = 0;
++	u8 i;
++
++	/*
++	 * In order to read a register from the PHY, we need to shift in a
++	 * total of 18 bits from the PHY.  The first two bit (turnaround)
++	 * times are used to avoid contention on the MDIO pin when a read
++	 * operation is performed.  These two bits are ignored by us and
++	 * thrown away.  Bits are "shifted in" by raising the input to the
++	 * Management Data Clock (setting the MDC bit) and then reading the
++	 * value of the MDIO bit.
++	 */
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/*
++	 * Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as
++	 * input.
++	 */
++	ctrl &= ~E1000_CTRL_MDIO_DIR;
++	ctrl &= ~E1000_CTRL_MDIO;
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	E1000_WRITE_FLUSH(hw);
++
++	/*
++	 * Raise and lower the clock before reading in the data.  This accounts
++	 * for the turnaround bits.  The first clock occurred when we clocked
++	 * out the last bit of the Register Address.
++	 */
++	e1000_raise_mdi_clk_82543(hw, &ctrl);
++	e1000_lower_mdi_clk_82543(hw, &ctrl);
++
++	for (data = 0, i = 0; i < 16; i++) {
++		data <<= 1;
++		e1000_raise_mdi_clk_82543(hw, &ctrl);
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		/* Check to see if we shifted in a "1". */
++		if (ctrl & E1000_CTRL_MDIO)
++			data |= 1;
++		e1000_lower_mdi_clk_82543(hw, &ctrl);
++	}
++
++	e1000_raise_mdi_clk_82543(hw, &ctrl);
++	e1000_lower_mdi_clk_82543(hw, &ctrl);
++
++	return data;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_82543 - Force speed/duplex for PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the function to force speed and duplex for the m88 PHY, and
++ *  if the PHY is not auto-negotiating and the speed is forced to 10Mbit,
++ *  then call the function for polarity reversal workaround.
++ **/
++static s32 e1000_phy_force_speed_duplex_82543(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_phy_force_speed_duplex_82543");
++
++	ret_val = e1000_phy_force_speed_duplex_m88(hw);
++	if (ret_val)
++		goto out;
++
++	if (!hw->mac.autoneg &&
++	    (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED))
++		ret_val = e1000_polarity_reversal_workaround_82543(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_polarity_reversal_workaround_82543 - Workaround polarity reversal
++ *  @hw: pointer to the HW structure
++ *
++ *  When forcing link to 10 Full or 10 Half, the PHY can reverse the polarity
++ *  inadvertantly.  To workaround the issue, we disable the transmitter on
++ *  the PHY until we have established the link partner's link parameters.
++ **/
++static s32 e1000_polarity_reversal_workaround_82543(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 mii_status_reg;
++	u16 i;
++	bool link;
++
++	/* Polarity reversal workaround for forced 10F/10H links. */
++
++	/* Disable the transmitter on the PHY */
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++	if (ret_val)
++		goto out;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * This loop will early-out if the NO link condition has been met.
++	 * In other words, DO NOT use e1000_phy_has_link_generic() here.
++	 */
++	for (i = PHY_FORCE_TIME; i > 0; i--) {
++		/*
++		 * Read the MII Status Register and wait for Link Status bit
++		 * to be clear.
++		 */
++
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			goto out;
++
++		if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0)
++			break;
++		msec_delay_irq(100);
++	}
++
++	/* Recommended delay time after link has been lost */
++	msec_delay_irq(1000);
++
++	/* Now we will re-enable the transmitter on the PHY */
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++	if (ret_val)
++		goto out;
++	msec_delay_irq(50);
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0);
++	if (ret_val)
++		goto out;
++	msec_delay_irq(50);
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00);
++	if (ret_val)
++		goto out;
++	msec_delay_irq(50);
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Read the MII Status Register and wait for Link Status bit
++	 * to be set.
++	 */
++	ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_TIME, 100000, &link);
++	if (ret_val)
++		goto out;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_hw_reset_82543 - PHY hardware reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the PHY_RESET_DIR bit in the extended device control register
++ *  to put the PHY into a reset and waits for completion.  Once the reset
++ *  has been accomplished, clear the PHY_RESET_DIR bit to take the PHY out
++ *  of reset.  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_phy_hw_reset_82543(struct e1000_hw *hw)
++{
++	struct e1000_functions *func = &hw->func;
++	u32 ctrl_ext;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_phy_hw_reset_82543");
++
++	/*
++	 * Read the Extended Device Control Register, assert the PHY_RESET_DIR
++	 * bit to put the PHY into reset...
++	 */
++	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
++	ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++	E1000_WRITE_FLUSH(hw);
++
++	msec_delay(10);
++
++	/* ...then take it out of reset. */
++	ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
++	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++	E1000_WRITE_FLUSH(hw);
++
++	usec_delay(150);
++
++	ret_val = func->get_cfg_done(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_82543 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_82543(struct e1000_hw *hw)
++{
++	u32 ctrl, icr;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_reset_hw_82543");
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	e1000_set_tbi_sbp_82543(hw, FALSE);
++
++	/*
++	 * Delay to allow any outstanding PCI transactions to complete before
++	 * resetting the device
++	 */
++	msec_delay(10);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGOUT("Issuing a global reset to 82543/82544 MAC\n");
++	if (hw->mac.type == e1000_82543) {
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++	} else {
++		/*
++		 * The 82544 can't ACK the 64-bit write when issuing the
++		 * reset, so use IO-mapping as a workaround.
++		 */
++		E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++	}
++
++	/*
++	 * After MAC reset, force reload of NVM to restore power-on
++	 * settings to device.
++	 */
++	e1000_reload_nvm(hw);
++	msec_delay(2);
++
++	/* Masking off and clearing any pending interrupts */
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_82543 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.
++ **/
++static s32 e1000_init_hw_82543(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_dev_spec_82543 *dev_spec;
++	u32 ctrl;
++	s32 ret_val;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_hw_82543");
++
++	dev_spec = (struct e1000_dev_spec_82543 *)hw->dev_spec;
++
++	if (!dev_spec) {
++		DEBUGOUT("dev_spec pointer is set to NULL.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/* Disabling VLAN filtering */
++	E1000_WRITE_REG(hw, E1000_VET, 0);
++	e1000_clear_vfta(hw);
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/*
++	 * Set the PCI priority bit correctly in the CTRL register.  This
++	 * determines if the adapter gives priority to receives, or if it
++	 * gives equal priority to transmits and receives.
++	 */
++	if (hw->mac.type == e1000_82543 && dev_spec->dma_fairness) {
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PRIOR);
++	}
++
++	e1000_pcix_mmrbc_workaround_generic(hw);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82543(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_link_82543 - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the EEPROM to determine the initial polarity value and write the
++ *  extended device control register with the information before calling
++ *  the generic setup link function, which does the following:
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++static s32 e1000_setup_link_82543(struct e1000_hw *hw)
++{
++	u32 ctrl_ext;
++	s32  ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_setup_link_82543");
++
++	/*
++	 * Take the 4 bits from NVM word 0xF that determine the initial
++	 * polarity value for the SW controlled pins, and setup the
++	 * Extended Device Control reg with that info.
++	 * This is needed because one of the SW controlled pins is used for
++	 * signal detection.  So this should be done before phy setup.
++	 */
++	if (hw->mac.type == e1000_82543) {
++		ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data);
++		if (ret_val) {
++			DEBUGOUT("NVM Read Error\n");
++			ret_val = -E1000_ERR_NVM;
++			goto out;
++		}
++		ctrl_ext = ((data & NVM_WORD0F_SWPDIO_EXT_MASK) <<
++		            NVM_SWDPIO_EXT_SHIFT);
++		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++	}
++
++	ret_val = e1000_setup_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_82543 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
++ *  for link, once link is established calls to configure collision distance
++ *  and flow control are called.
++ **/
++static s32 e1000_setup_copper_link_82543(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++	bool link;
++
++	DEBUGFUNC("e1000_setup_copper_link_82543");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL) | E1000_CTRL_SLU;
++	/*
++	 * With 82543, we need to force speed and duplex on the MAC
++	 * equal to what the PHY speed and duplex configuration is.
++	 * In addition, we need to perform a hardware reset on the
++	 * PHY to take it out of reset.
++	 */
++	if (hw->mac.type == e1000_82543) {
++		ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++		ret_val = e1000_phy_hw_reset(hw);
++		if (ret_val)
++			goto out;
++		hw->phy.reset_disable = FALSE;
++	} else {
++		ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	}
++
++	/* Set MDI/MDI-X, Polarity Reversal, and downshift settings */
++	ret_val = e1000_copper_link_setup_m88(hw);
++	if (ret_val)
++		goto out;
++
++	if (hw->mac.autoneg) {
++		/*
++		 * Setup autoneg and flow control advertisement and perform
++		 * autonegotiation.
++		 */
++		ret_val = e1000_copper_link_autoneg(hw);
++		if (ret_val)
++			goto out;
++	} else {
++		/*
++		 * PHY will be set to 10H, 10F, 100H or 100F
++		 * depending on user settings.
++		 */
++		DEBUGOUT("Forcing Speed and Duplex\n");
++		ret_val = e1000_phy_force_speed_duplex_82543(hw);
++		if (ret_val) {
++			DEBUGOUT("Error Forcing Speed and Duplex\n");
++			goto out;
++		}
++	}
++
++	/*
++	 * Check link status. Wait up to 100 microseconds for link to become
++	 * valid.
++	 */
++	ret_val = e1000_phy_has_link_generic(hw,
++	                                     COPPER_LINK_UP_LIMIT,
++	                                     10,
++	                                     &link);
++	if (ret_val)
++		goto out;
++
++
++	if (link) {
++		DEBUGOUT("Valid link established!!!\n");
++		/* Config the MAC and PHY after link is up */
++		if (hw->mac.type == e1000_82544) {
++			e1000_config_collision_dist_generic(hw);
++		} else {
++			ret_val = e1000_config_mac_to_phy_82543(hw);
++			if (ret_val)
++				goto out;
++		}
++		ret_val = e1000_config_fc_after_link_up_generic(hw);
++	} else {
++		DEBUGOUT("Unable to establish link!!!\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_fiber_link_82543 - Setup link for fiber
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures collision distance and flow control for fiber links.  Upon
++ *  successful setup, poll for link.
++ **/
++static s32 e1000_setup_fiber_link_82543(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_setup_fiber_link_82543");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	/* Take the link out of reset */
++	ctrl &= ~E1000_CTRL_LRST;
++
++	e1000_config_collision_dist_generic(hw);
++
++	ret_val = e1000_commit_fc_settings_generic(hw);
++	if (ret_val)
++		goto out;
++
++	DEBUGOUT("Auto-negotiation enabled\n");
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++	E1000_WRITE_FLUSH(hw);
++	msec_delay(1);
++
++	/*
++	 * For these adapters, the SW defineable pin 1 is cleared when the
++	 * optics detect a signal.  If we have a signal, then poll for a
++	 * "Link-Up" indication.
++	 */
++	if (!(E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) {
++		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
++	} else {
++		DEBUGOUT("No signal detected\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_for_copper_link_82543 - Check for link (Copper)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the phy for link, if link exists, do the following:
++ *   - check for downshift
++ *   - do polarity workaround (if necessary)
++ *   - configure collision distance
++ *   - configure flow control after link up
++ *   - configure tbi compatibility
++ **/
++static s32 e1000_check_for_copper_link_82543(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 icr, rctl;
++	s32 ret_val;
++	u16 speed, duplex;
++	bool link;
++
++	DEBUGFUNC("e1000_check_for_copper_link_82543");
++
++	if (!mac->get_link_status) {
++		ret_val = E1000_SUCCESS;
++		goto out;
++	}
++
++	ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link)
++		goto out; /* No link detected */
++
++	mac->get_link_status = FALSE;
++
++	e1000_check_downshift_generic(hw);
++
++	/*
++	 * If we are forcing speed/duplex, then we can return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		/*
++		 * If speed and duplex are forced to 10H or 10F, then we will
++		 * implement the polarity reversal workaround.  We disable
++		 * interrupts first, and upon returning, place the devices
++		 * interrupt state to its previous value except for the link
++		 * status change interrupt which will happened due to the
++		 * execution of this workaround.
++		 */
++		if (mac->forced_speed_duplex & E1000_ALL_10_SPEED) {
++			E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
++			ret_val = e1000_polarity_reversal_workaround_82543(hw);
++			icr = E1000_READ_REG(hw, E1000_ICR);
++			E1000_WRITE_REG(hw, E1000_ICS, (icr & ~E1000_ICS_LSC));
++			E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
++		}
++
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/*
++	 * We have a M88E1000 PHY and Auto-Neg is enabled.  If we
++	 * have Si on board that is 82544 or newer, Auto
++	 * Speed Detection takes care of MAC speed/duplex
++	 * configuration.  So we only need to configure Collision
++	 * Distance in the MAC.  Otherwise, we need to force
++	 * speed/duplex on the MAC to the current PHY speed/duplex
++	 * settings.
++	 */
++	if (mac->type == e1000_82544)
++		e1000_config_collision_dist_generic(hw);
++	else {
++		ret_val = e1000_config_mac_to_phy_82543(hw);
++		if (ret_val) {
++			DEBUGOUT("Error configuring MAC to PHY settings\n");
++			goto out;
++		}
++	}
++
++	/*
++	 * Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = e1000_config_fc_after_link_up_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error configuring flow control\n");
++	}
++
++	/*
++	 * At this point we know that we are on copper and we have
++	 * auto-negotiated link.  These are conditions for checking the link
++	 * partner capability register.  We use the link speed to determine if
++	 * TBI compatibility needs to be turned on or off.  If the link is not
++	 * at gigabit speed, then TBI compatibility is not needed.  If we are
++	 * at gigabit speed, we turn on TBI compatibility.
++	 */
++	if (e1000_tbi_compatibility_enabled_82543(hw)) {
++		ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++		if (ret_val) {
++			DEBUGOUT("Error getting link speed and duplex\n");
++			return ret_val;
++		}
++		if (speed != SPEED_1000) {
++			/*
++			 * If link speed is not set to gigabit speed,
++			 * we do not need to enable TBI compatibility.
++			 */
++			if (e1000_tbi_sbp_enabled_82543(hw)) {
++				/*
++				 * If we previously were in the mode,
++				 * turn it off.
++				 */
++				e1000_set_tbi_sbp_82543(hw, FALSE);
++				rctl = E1000_READ_REG(hw, E1000_RCTL);
++				rctl &= ~E1000_RCTL_SBP;
++				E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++			}
++		} else {
++			/*
++			 * If TBI compatibility is was previously off,
++			 * turn it on. For compatibility with a TBI link
++			 * partner, we will store bad packets. Some
++			 * frames have an additional byte on the end and
++			 * will look like CRC errors to to the hardware.
++			 */
++			if (!e1000_tbi_sbp_enabled_82543(hw)) {
++				e1000_set_tbi_sbp_82543(hw, TRUE);
++				rctl = E1000_READ_REG(hw, E1000_RCTL);
++				rctl |= E1000_RCTL_SBP;
++				E1000_WRITE_REG(hw, E1000_RCTL, rctl);
++			}
++		}
++	}
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_for_fiber_link_82543 - Check for link (Fiber)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks for link up on the hardware.  If link is not up and we have
++ *  a signal, then we need to force link up.
++ **/
++static s32 e1000_check_for_fiber_link_82543(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw, ctrl, status;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_check_for_fiber_link_82543");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	status = E1000_READ_REG(hw, E1000_STATUS);
++	rxcw = E1000_READ_REG(hw, E1000_RXCW);
++
++	/*
++	 * If we don't have link (auto-negotiation failed or link partner
++	 * cannot auto-negotiate), the cable is plugged in (we have signal),
++	 * and our link partner is not trying to auto-negotiate with us (we
++	 * are receiving idles or data), we need to force link up. We also
++	 * need to give auto-negotiation time to complete, in case the cable
++	 * was just plugged in. The autoneg_failed flag does this.
++	 */
++	/* (ctrl & E1000_CTRL_SWDPIN1) == 0 == have signal */
++	if ((!(ctrl & E1000_CTRL_SWDPIN1)) &&
++	    (!(status & E1000_STATUS_LU)) &&
++	    (!(rxcw & E1000_RXCW_C))) {
++		if (mac->autoneg_failed == 0) {
++			mac->autoneg_failed = 1;
++			ret_val = 0;
++			goto out;
++		}
++		DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n");
++
++		/* Disable auto-negotiation in the TXCW register */
++		E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++
++		/* Force link-up and also force full-duplex. */
++		ctrl = E1000_READ_REG(hw, E1000_CTRL);
++		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++		E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++		/* Configure Flow Control after forcing link up. */
++		ret_val = e1000_config_fc_after_link_up_generic(hw);
++		if (ret_val) {
++			DEBUGOUT("Error configuring flow control\n");
++			goto out;
++		}
++	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++		/*
++		 * If we are forcing link and we are receiving /C/ ordered
++		 * sets, re-enable auto-negotiation in the TXCW register
++		 * and disable forced link in the Device Control register
++		 * in an attempt to auto-negotiate with our link partner.
++		 */
++		DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n");
++		E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw);
++		E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++		mac->serdes_has_link = TRUE;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_config_mac_to_phy_82543 - Configure MAC to PHY settings
++ *  @hw: pointer to the HW structure
++ *
++ *  For the 82543 silicon, we need to set the MAC to match the settings
++ *  of the PHY, even if the PHY is auto-negotiating.
++ **/
++static s32 e1000_config_mac_to_phy_82543(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++	u16 phy_data;
++
++	DEBUGFUNC("e1000_config_mac_to_phy_82543");
++
++	/* Set the bits to force speed and duplex */
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
++
++	/*
++	 * Set up duplex in the Device Control and Transmit Control
++	 * registers depending on negotiated values.
++	 */
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	ctrl &= ~E1000_CTRL_FD;
++	if (phy_data & M88E1000_PSSR_DPLX)
++		ctrl |= E1000_CTRL_FD;
++
++	e1000_config_collision_dist_generic(hw);
++
++	/*
++	 * Set up speed in the Device Control register depending on
++	 * negotiated values.
++	 */
++	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
++		ctrl |= E1000_CTRL_SPD_1000;
++	else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
++		ctrl |= E1000_CTRL_SPD_100;
++
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_write_vfta_82543 - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: the 32-bit offset in which to write the value to.
++ *  @value: the 32-bit value to write at location offset.
++ *
++ *  This writes a 32-bit value to a 32-bit offset in the VLAN filter
++ *  table.
++ **/
++static void e1000_write_vfta_82543(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	u32 temp;
++
++	DEBUGFUNC("e1000_write_vfta_82543");
++
++	if ((hw->mac.type == e1000_82544) && (offset & 1)) {
++		temp = E1000_READ_REG_ARRAY(hw, E1000_VFTA, offset - 1);
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
++		E1000_WRITE_FLUSH(hw);
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset - 1, temp);
++		E1000_WRITE_FLUSH(hw);
++	} else {
++		e1000_write_vfta_generic(hw, offset, value);
++	}
++}
++
++/**
++ *  e1000_mta_set_82543 - Set multicast filter table address
++ *  @hw: pointer to the HW structure
++ *  @hash_value: determines the MTA register and bit to set
++ *
++ *  The multicast table address is a register array of 32-bit registers.
++ *  The hash_value is used to determine what register the bit is in, the
++ *  current value is read, the new bit is OR'd in and the new value is
++ *  written back into the register.
++ **/
++static void e1000_mta_set_82543(struct e1000_hw *hw, u32 hash_value)
++{
++	u32 hash_bit, hash_reg, mta, temp;
++
++	DEBUGFUNC("e1000_mta_set_82543");
++
++	hash_reg = (hash_value >> 5);
++
++	/*
++	 * If we are on an 82544 and we are trying to write an odd offset
++	 * in the MTA, save off the previous entry before writing and
++	 * restore the old value after writing.
++	 */
++	if ((hw->mac.type == e1000_82544) && (hash_reg & 1)) {
++		hash_reg &= (hw->mac.mta_reg_count - 1);
++		hash_bit = hash_value & 0x1F;
++		mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
++		mta |= (1 << hash_bit);
++		temp = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg - 1);
++
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
++		E1000_WRITE_FLUSH(hw);
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg - 1, temp);
++		E1000_WRITE_FLUSH(hw);
++	} else {
++		e1000_mta_set_generic(hw, hash_value);
++	}
++}
++
++/**
++ *  e1000_led_on_82543 - Turn on SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED on.  This is a function pointer entry point
++ *  called by the api module.
++ **/
++static s32 e1000_led_on_82543(struct e1000_hw *hw)
++{
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGFUNC("e1000_led_on_82543");
++
++	if (hw->mac.type == e1000_82544 &&
++	    hw->phy.media_type == e1000_media_type_copper) {
++		/* Clear SW-defineable Pin 0 to turn on the LED */
++		ctrl &= ~E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++	} else {
++		/* Fiber 82544 and all 82543 use this method */
++		ctrl |= E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++	}
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_led_off_82543 - Turn off SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Turns the SW defined LED off.  This is a function pointer entry point
++ *  called by the api module.
++ **/
++static s32 e1000_led_off_82543(struct e1000_hw *hw)
++{
++	u32 ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGFUNC("e1000_led_off_82543");
++
++	if (hw->mac.type == e1000_82544 &&
++	    hw->phy.media_type == e1000_media_type_copper) {
++		/* Set SW-defineable Pin 0 to turn off the LED */
++		ctrl |= E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++	} else {
++		ctrl &= ~E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++	}
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82543 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82543(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_82543");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/kcompat.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/kcompat.h	2021-04-07 16:01:27.633633586 +0800
+@@ -0,0 +1,603 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _KCOMPAT_H_
++#define _KCOMPAT_H_
++
++#include <linux/version.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/delay.h>
++#include <linux/sched.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/udp.h>
++#include <linux/mii.h>
++#include <asm/io.h>
++
++#include <rtnet_port.h>
++/* NAPI enable/disable flags here */
++
++
++#ifdef _E1000_H_
++#ifdef CONFIG_E1000_NAPI
++#define NAPI
++#endif
++#ifdef E1000_NAPI
++#undef NAPI
++#define NAPI
++#endif
++#ifdef E1000_NO_NAPI
++#undef NAPI
++#endif
++#endif
++
++#ifdef _IGB_H_
++#define NAPI
++#endif
++
++#ifdef _IXGB_H_
++#ifdef CONFIG_IXGB_NAPI
++#define NAPI
++#endif
++#ifdef IXGB_NAPI
++#undef NAPI
++#define NAPI
++#endif
++#ifdef IXGB_NO_NAPI
++#undef NAPI
++#endif
++#endif
++
++
++#ifdef DRIVER_E1000
++#define adapter_struct e1000_adapter
++#endif
++
++
++// RTNET settings
++#ifdef NAPI
++#undef NAPI
++#endif
++
++#undef NETIF_F_TSO
++#undef NETIF_F_HW_VLAN_TX
++#undef CONFIG_NET_POLL_CONTROLLER
++#ifdef ETHTOOL_GPERMADDR
++#undef ETHTOOL_GPERMADDR
++#endif
++
++
++/* and finally set defines so that the code sees the changes */
++#ifdef NAPI
++#ifndef CONFIG_E1000_NAPI
++#define CONFIG_E1000_NAPI
++#endif
++#ifndef CONFIG_IXGB_NAPI
++#define CONFIG_IXGB_NAPI
++#endif
++#else
++#undef CONFIG_E1000_NAPI
++#undef CONFIG_IXGB_NAPI
++#endif
++
++/* packet split disable/enable */
++#ifdef DISABLE_PACKET_SPLIT
++#undef CONFIG_E1000_DISABLE_PACKET_SPLIT
++#define CONFIG_E1000_DISABLE_PACKET_SPLIT
++#endif
++
++/* MSI compatibility code for all kernels and drivers */
++#ifdef DISABLE_PCI_MSI
++#undef CONFIG_PCI_MSI
++#endif
++
++#ifdef DISABLE_PM
++#undef CONFIG_PM
++#endif
++
++#ifdef DISABLE_NET_POLL_CONTROLLER
++#undef CONFIG_NET_POLL_CONTROLLER
++#endif
++
++#ifndef PMSG_SUSPEND
++#define PMSG_SUSPEND 3
++#endif
++
++/* generic boolean compatibility */
++#undef TRUE
++#undef FALSE
++#define TRUE true
++#define FALSE false
++#ifdef GCC_VERSION
++#if ( GCC_VERSION < 3000 )
++#define _Bool char
++#endif
++#endif
++#ifndef bool
++#define bool _Bool
++#define true 1
++#define false 0
++#endif
++
++
++#ifndef module_param
++#define module_param(v,t,p) MODULE_PARM(v, "i");
++#endif
++
++#ifndef DMA_64BIT_MASK
++#define DMA_64BIT_MASK  0xffffffffffffffffULL
++#endif
++
++#ifndef DMA_32BIT_MASK
++#define DMA_32BIT_MASK  0x00000000ffffffffULL
++#endif
++
++#ifndef PCI_CAP_ID_EXP
++#define PCI_CAP_ID_EXP 0x10
++#endif
++
++#ifndef mmiowb
++#ifdef CONFIG_IA64
++#define mmiowb() asm volatile ("mf.a" ::: "memory")
++#else
++#define mmiowb()
++#endif
++#endif
++
++#ifndef SET_NETDEV_DEV
++#define SET_NETDEV_DEV(net, pdev)
++#endif
++
++#ifndef HAVE_FREE_NETDEV
++#define free_netdev(x)	kfree(x)
++#endif
++
++#ifdef HAVE_POLL_CONTROLLER
++#define CONFIG_NET_POLL_CONTROLLER
++#endif
++
++#ifndef NETDEV_TX_OK
++#define NETDEV_TX_OK 0
++#endif
++
++#ifndef NETDEV_TX_BUSY
++#define NETDEV_TX_BUSY 1
++#endif
++
++#ifndef NETDEV_TX_LOCKED
++#define NETDEV_TX_LOCKED -1
++#endif
++
++#ifndef SKB_DATAREF_SHIFT
++/* if we do not have the infrastructure to detect if skb_header is cloned
++   just return false in all cases */
++#define skb_header_cloned(x) 0
++#endif
++
++#ifndef NETIF_F_GSO
++#define gso_size tso_size
++#define gso_segs tso_segs
++#endif
++
++#ifndef CHECKSUM_PARTIAL
++#define CHECKSUM_PARTIAL CHECKSUM_HW
++#define CHECKSUM_COMPLETE CHECKSUM_HW
++#endif
++
++#ifndef __read_mostly
++#define __read_mostly
++#endif
++
++#ifndef MII_RESV1
++#define MII_RESV1		0x17		/* Reserved...		*/
++#endif
++
++#ifndef unlikely
++#define unlikely(_x) _x
++#define likely(_x) _x
++#endif
++
++#ifndef WARN_ON
++#define WARN_ON(x)
++#endif
++
++#ifndef PCI_DEVICE
++#define PCI_DEVICE(vend,dev) \
++	.vendor = (vend), .device = (dev), \
++	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
++#endif
++
++#ifndef num_online_cpus
++#define num_online_cpus() smp_num_cpus
++#endif
++
++#ifndef _LINUX_RANDOM_H
++#include <linux/random.h>
++#endif
++
++#ifndef DECLARE_BITMAP
++#ifndef BITS_TO_LONGS
++#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
++#endif
++#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)]
++#endif
++
++#ifndef VLAN_HLEN
++#define VLAN_HLEN 4
++#endif
++
++#ifndef VLAN_ETH_HLEN
++#define VLAN_ETH_HLEN 18
++#endif
++
++#ifndef VLAN_ETH_FRAME_LEN
++#define VLAN_ETH_FRAME_LEN 1518
++#endif
++
++
++/*****************************************************************************/
++/* Installations with ethtool version without eeprom, adapter id, or statistics
++ * support */
++
++#ifndef ETH_GSTRING_LEN
++#define ETH_GSTRING_LEN 32
++#endif
++
++#ifndef ETHTOOL_GSTATS
++#define ETHTOOL_GSTATS 0x1d
++#undef ethtool_drvinfo
++#define ethtool_drvinfo k_ethtool_drvinfo
++struct k_ethtool_drvinfo {
++	u32 cmd;
++	char driver[32];
++	char version[32];
++	char fw_version[32];
++	char bus_info[32];
++	char reserved1[32];
++	char reserved2[16];
++	u32 n_stats;
++	u32 testinfo_len;
++	u32 eedump_len;
++	u32 regdump_len;
++};
++
++struct ethtool_stats {
++	u32 cmd;
++	u32 n_stats;
++	u64 data[0];
++};
++#endif /* ETHTOOL_GSTATS */
++
++#ifndef ETHTOOL_PHYS_ID
++#define ETHTOOL_PHYS_ID 0x1c
++#endif /* ETHTOOL_PHYS_ID */
++
++#ifndef ETHTOOL_GSTRINGS
++#define ETHTOOL_GSTRINGS 0x1b
++enum ethtool_stringset {
++	ETH_SS_TEST             = 0,
++	ETH_SS_STATS,
++};
++struct ethtool_gstrings {
++	u32 cmd;            /* ETHTOOL_GSTRINGS */
++	u32 string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
++	u32 len;            /* number of strings in the string set */
++	u8 data[0];
++};
++#endif /* ETHTOOL_GSTRINGS */
++
++#ifndef ETHTOOL_TEST
++#define ETHTOOL_TEST 0x1a
++enum ethtool_test_flags {
++	ETH_TEST_FL_OFFLINE	= (1 << 0),
++	ETH_TEST_FL_FAILED	= (1 << 1),
++};
++struct ethtool_test {
++	u32 cmd;
++	u32 flags;
++	u32 reserved;
++	u32 len;
++	u64 data[0];
++};
++#endif /* ETHTOOL_TEST */
++
++#ifndef ETHTOOL_GEEPROM
++#define ETHTOOL_GEEPROM 0xb
++#undef ETHTOOL_GREGS
++struct ethtool_eeprom {
++	u32 cmd;
++	u32 magic;
++	u32 offset;
++	u32 len;
++	u8 data[0];
++};
++
++struct ethtool_value {
++	u32 cmd;
++	u32 data;
++};
++#endif /* ETHTOOL_GEEPROM */
++
++#ifndef ETHTOOL_GLINK
++#define ETHTOOL_GLINK 0xa
++#endif /* ETHTOOL_GLINK */
++
++#ifndef ETHTOOL_GREGS
++#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
++#define ethtool_regs _kc_ethtool_regs
++/* for passing big chunks of data */
++struct _kc_ethtool_regs {
++	u32 cmd;
++	u32 version; /* driver-specific, indicates different chips/revs */
++	u32 len; /* bytes */
++	u8 data[0];
++};
++#endif /* ETHTOOL_GREGS */
++
++#ifndef ETHTOOL_GMSGLVL
++#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
++#endif
++#ifndef ETHTOOL_SMSGLVL
++#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
++#endif
++#ifndef ETHTOOL_NWAY_RST
++#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
++#endif
++#ifndef ETHTOOL_GLINK
++#define ETHTOOL_GLINK		0x0000000a /* Get link status */
++#endif
++#ifndef ETHTOOL_GEEPROM
++#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
++#endif
++#ifndef ETHTOOL_SEEPROM
++#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
++#endif
++#ifndef ETHTOOL_GCOALESCE
++#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
++/* for configuring coalescing parameters of chip */
++#define ethtool_coalesce _kc_ethtool_coalesce
++struct _kc_ethtool_coalesce {
++	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
++
++	/* How many usecs to delay an RX interrupt after
++	 * a packet arrives.  If 0, only rx_max_coalesced_frames
++	 * is used.
++	 */
++	u32	rx_coalesce_usecs;
++
++	/* How many packets to delay an RX interrupt after
++	 * a packet arrives.  If 0, only rx_coalesce_usecs is
++	 * used.  It is illegal to set both usecs and max frames
++	 * to zero as this would cause RX interrupts to never be
++	 * generated.
++	 */
++	u32	rx_max_coalesced_frames;
++
++	/* Same as above two parameters, except that these values
++	 * apply while an IRQ is being serviced by the host.  Not
++	 * all cards support this feature and the values are ignored
++	 * in that case.
++	 */
++	u32	rx_coalesce_usecs_irq;
++	u32	rx_max_coalesced_frames_irq;
++
++	/* How many usecs to delay a TX interrupt after
++	 * a packet is sent.  If 0, only tx_max_coalesced_frames
++	 * is used.
++	 */
++	u32	tx_coalesce_usecs;
++
++	/* How many packets to delay a TX interrupt after
++	 * a packet is sent.  If 0, only tx_coalesce_usecs is
++	 * used.  It is illegal to set both usecs and max frames
++	 * to zero as this would cause TX interrupts to never be
++	 * generated.
++	 */
++	u32	tx_max_coalesced_frames;
++
++	/* Same as above two parameters, except that these values
++	 * apply while an IRQ is being serviced by the host.  Not
++	 * all cards support this feature and the values are ignored
++	 * in that case.
++	 */
++	u32	tx_coalesce_usecs_irq;
++	u32	tx_max_coalesced_frames_irq;
++
++	/* How many usecs to delay in-memory statistics
++	 * block updates.  Some drivers do not have an in-memory
++	 * statistic block, and in such cases this value is ignored.
++	 * This value must not be zero.
++	 */
++	u32	stats_block_coalesce_usecs;
++
++	/* Adaptive RX/TX coalescing is an algorithm implemented by
++	 * some drivers to improve latency under low packet rates and
++	 * improve throughput under high packet rates.  Some drivers
++	 * only implement one of RX or TX adaptive coalescing.  Anything
++	 * not implemented by the driver causes these values to be
++	 * silently ignored.
++	 */
++	u32	use_adaptive_rx_coalesce;
++	u32	use_adaptive_tx_coalesce;
++
++	/* When the packet rate (measured in packets per second)
++	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
++	 * used.
++	 */
++	u32	pkt_rate_low;
++	u32	rx_coalesce_usecs_low;
++	u32	rx_max_coalesced_frames_low;
++	u32	tx_coalesce_usecs_low;
++	u32	tx_max_coalesced_frames_low;
++
++	/* When the packet rate is below pkt_rate_high but above
++	 * pkt_rate_low (both measured in packets per second) the
++	 * normal {rx,tx}_* coalescing parameters are used.
++	 */
++
++	/* When the packet rate is (measured in packets per second)
++	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
++	 * used.
++	 */
++	u32	pkt_rate_high;
++	u32	rx_coalesce_usecs_high;
++	u32	rx_max_coalesced_frames_high;
++	u32	tx_coalesce_usecs_high;
++	u32	tx_max_coalesced_frames_high;
++
++	/* How often to do adaptive coalescing packet rate sampling,
++	 * measured in seconds.  Must not be zero.
++	 */
++	u32	rate_sample_interval;
++};
++#endif /* ETHTOOL_GCOALESCE */
++
++#ifndef ETHTOOL_SCOALESCE
++#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
++#endif
++#ifndef ETHTOOL_GRINGPARAM
++#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
++/* for configuring RX/TX ring parameters */
++#define ethtool_ringparam _kc_ethtool_ringparam
++struct _kc_ethtool_ringparam {
++	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
++
++	/* Read only attributes.  These indicate the maximum number
++	 * of pending RX/TX ring entries the driver will allow the
++	 * user to set.
++	 */
++	u32	rx_max_pending;
++	u32	rx_mini_max_pending;
++	u32	rx_jumbo_max_pending;
++	u32	tx_max_pending;
++
++	/* Values changeable by the user.  The valid values are
++	 * in the range 1 to the "*_max_pending" counterpart above.
++	 */
++	u32	rx_pending;
++	u32	rx_mini_pending;
++	u32	rx_jumbo_pending;
++	u32	tx_pending;
++};
++#endif /* ETHTOOL_GRINGPARAM */
++
++#ifndef ETHTOOL_SRINGPARAM
++#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
++#endif
++#ifndef ETHTOOL_GPAUSEPARAM
++#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
++/* for configuring link flow control parameters */
++#define ethtool_pauseparam _kc_ethtool_pauseparam
++struct _kc_ethtool_pauseparam {
++	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
++
++	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
++	 * being true) the user may set 'autoneg' here non-zero to have the
++	 * pause parameters be auto-negotiated too.  In such a case, the
++	 * {rx,tx}_pause values below determine what capabilities are
++	 * advertised.
++	 *
++	 * If 'autoneg' is zero or the link is not being auto-negotiated,
++	 * then {rx,tx}_pause force the driver to use/not-use pause
++	 * flow control.
++	 */
++	u32	autoneg;
++	u32	rx_pause;
++	u32	tx_pause;
++};
++#endif /* ETHTOOL_GPAUSEPARAM */
++
++#ifndef ETHTOOL_SPAUSEPARAM
++#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
++#endif
++#ifndef ETHTOOL_GRXCSUM
++#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_SRXCSUM
++#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_GTXCSUM
++#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_STXCSUM
++#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_GSG
++#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
++					    * (ethtool_value) */
++#endif
++#ifndef ETHTOOL_SSG
++#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
++					    * (ethtool_value). */
++#endif
++#ifndef ETHTOOL_TEST
++#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
++#endif
++#ifndef ETHTOOL_GSTRINGS
++#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
++#endif
++#ifndef ETHTOOL_PHYS_ID
++#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
++#endif
++#ifndef ETHTOOL_GSTATS
++#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
++#endif
++#ifndef ETHTOOL_GTSO
++#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_STSO
++#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
++#endif
++
++#ifndef ETHTOOL_BUSINFO_LEN
++#define ETHTOOL_BUSINFO_LEN	32
++#endif
++
++#ifndef HAVE_PCI_SET_MWI
++#define pci_set_mwi(X) pci_write_config_word(X, \
++			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \
++			       PCI_COMMAND_INVALIDATE);
++#define pci_clear_mwi(X) pci_write_config_word(X, \
++			       PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \
++			       ~PCI_COMMAND_INVALIDATE);
++#endif
++
++
++#undef HAVE_PCI_ERS
++
++#endif /* _KCOMPAT_H_ */
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82571.c	2021-04-07 16:01:27.629633592 +0800
+@@ -0,0 +1,1430 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_82571
++ * e1000_82572
++ * e1000_82573
++ * e1000_82574
++ */
++
++#include "e1000_api.h"
++#include "e1000_82571.h"
++
++static s32  e1000_init_phy_params_82571(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_82571(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_82571(struct e1000_hw *hw);
++static s32  e1000_acquire_nvm_82571(struct e1000_hw *hw);
++static void e1000_release_nvm_82571(struct e1000_hw *hw);
++static s32  e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset,
++                                  u16 words, u16 *data);
++static s32  e1000_update_nvm_checksum_82571(struct e1000_hw *hw);
++static s32  e1000_validate_nvm_checksum_82571(struct e1000_hw *hw);
++static s32  e1000_get_cfg_done_82571(struct e1000_hw *hw);
++static s32  e1000_set_d0_lplu_state_82571(struct e1000_hw *hw,
++                                          bool active);
++static s32  e1000_reset_hw_82571(struct e1000_hw *hw);
++static s32  e1000_init_hw_82571(struct e1000_hw *hw);
++static void e1000_clear_vfta_82571(struct e1000_hw *hw);
++static void e1000_update_mc_addr_list_82571(struct e1000_hw *hw,
++                                           u8 *mc_addr_list, u32 mc_addr_count,
++                                           u32 rar_used_count, u32 rar_count);
++static s32  e1000_setup_link_82571(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_82571(struct e1000_hw *hw);
++static s32  e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw);
++static s32  e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data);
++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw);
++static s32  e1000_get_hw_semaphore_82571(struct e1000_hw *hw);
++static s32  e1000_fix_nvm_checksum_82571(struct e1000_hw *hw);
++static s32  e1000_get_phy_id_82571(struct e1000_hw *hw);
++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw);
++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw);
++static s32  e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
++                                       u16 words, u16 *data);
++static s32  e1000_read_mac_addr_82571(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw);
++
++struct e1000_dev_spec_82571 {
++	bool laa_is_present;
++};
++
++/**
++ *  e1000_init_phy_params_82571 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_phy_params_82571");
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type        = e1000_phy_none;
++		goto out;
++	}
++
++	phy->addr                        = 1;
++	phy->autoneg_mask                = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us              = 100;
++
++	func->acquire_phy                = e1000_get_hw_semaphore_82571;
++	func->check_polarity             = e1000_check_polarity_igp;
++	func->check_reset_block          = e1000_check_reset_block_generic;
++	func->release_phy                = e1000_put_hw_semaphore_82571;
++	func->reset_phy                  = e1000_phy_hw_reset_generic;
++	func->set_d0_lplu_state          = e1000_set_d0_lplu_state_82571;
++	func->set_d3_lplu_state          = e1000_set_d3_lplu_state_generic;
++	func->power_up_phy               = e1000_power_up_phy_copper;
++	func->power_down_phy             = e1000_power_down_phy_copper_82571;
++
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		phy->type                = e1000_phy_igp_2;
++		func->get_cfg_done       = e1000_get_cfg_done_82571;
++		func->get_phy_info       = e1000_get_phy_info_igp;
++		func->force_speed_duplex = e1000_phy_force_speed_duplex_igp;
++		func->get_cable_length   = e1000_get_cable_length_igp_2;
++		func->read_phy_reg       = e1000_read_phy_reg_igp;
++		func->write_phy_reg      = e1000_write_phy_reg_igp;
++
++		/* This uses above function pointers */
++		ret_val = e1000_get_phy_id_82571(hw);
++
++		/* Verify PHY ID */
++		if (phy->id != IGP01E1000_I_PHY_ID) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++		break;
++	case e1000_82573:
++		phy->type                = e1000_phy_m88;
++		func->get_cfg_done       = e1000_get_cfg_done_generic;
++		func->get_phy_info       = e1000_get_phy_info_m88;
++		func->commit_phy         = e1000_phy_sw_reset_generic;
++		func->force_speed_duplex = e1000_phy_force_speed_duplex_m88;
++		func->get_cable_length   = e1000_get_cable_length_m88;
++		func->read_phy_reg       = e1000_read_phy_reg_m88;
++		func->write_phy_reg      = e1000_write_phy_reg_m88;
++
++		/* This uses above function pointers */
++		ret_val = e1000_get_phy_id_82571(hw);
++
++		/* Verify PHY ID */
++		if (phy->id != M88E1111_I_PHY_ID) {
++			ret_val = -E1000_ERR_PHY;
++			DEBUGOUT1("PHY ID unknown: type = 0x%08x\n", phy->id);
++			goto out;
++		}
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++		break;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_82571 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++	u16 size;
++
++	DEBUGFUNC("e1000_init_nvm_params_82571");
++
++	nvm->opcode_bits          = 8;
++	nvm->delay_usec           = 1;
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->page_size    = 32;
++		nvm->address_bits = 16;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->page_size    = 8;
++		nvm->address_bits = 8;
++		break;
++	default:
++		nvm->page_size    = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
++		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
++		break;
++	}
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++		if (((eecd >> 15) & 0x3) == 0x3) {
++			nvm->type = e1000_nvm_flash_hw;
++			nvm->word_size = 2048;
++			/*
++			 * Autonomous Flash update bit must be cleared due
++			 * to Flash update issue.
++			 */
++			eecd &= ~E1000_EECD_AUPDEN;
++			E1000_WRITE_REG(hw, E1000_EECD, eecd);
++			break;
++		}
++		/* Fall Through */
++	default:
++		nvm->type	= e1000_nvm_eeprom_spi;
++		size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++		                  E1000_EECD_SIZE_EX_SHIFT);
++		/*
++		 * Added to a constant, "size" becomes the left-shift value
++		 * for setting word_size.
++		 */
++		size += NVM_WORD_SIZE_BASE_SHIFT;
++
++		/* EEPROM access above 16k is unsupported */
++		if (size > 14)
++			size = 14;
++		nvm->word_size	= 1 << size;
++		break;
++	}
++
++	/* Function Pointers */
++	func->acquire_nvm       = e1000_acquire_nvm_82571;
++	func->read_nvm          = (hw->mac.type == e1000_82573)
++	                          ? e1000_read_nvm_eerd
++	                          : e1000_read_nvm_spi;
++	func->release_nvm       = e1000_release_nvm_82571;
++	func->update_nvm        = e1000_update_nvm_checksum_82571;
++	func->validate_nvm      = e1000_validate_nvm_checksum_82571;
++	func->valid_led_default = e1000_valid_led_default_82571;
++	func->write_nvm         = e1000_write_nvm_82571;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ *  e1000_init_mac_params_82571 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_82571(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_mac_params_82571");
++
++	/* Set media type */
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82571EB_FIBER:
++	case E1000_DEV_ID_82572EI_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++		hw->phy.media_type = e1000_media_type_fiber;
++		break;
++	case E1000_DEV_ID_82571EB_SERDES:
++	case E1000_DEV_ID_82571EB_SERDES_DUAL:
++	case E1000_DEV_ID_82571EB_SERDES_QUAD:
++	case E1000_DEV_ID_82572EI_SERDES:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++	/* Set if part includes ASF firmware */
++	mac->asf_firmware_present = TRUE;
++	/* Set if manageability features are enabled. */
++	mac->arc_subsystem_valid =
++	        (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK)
++	                ? TRUE : FALSE;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_pcie_generic;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_82571;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_82571;
++	/* link setup */
++	func->setup_link = e1000_setup_link_82571;
++	/* physical interface link setup */
++	func->setup_physical_interface =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_setup_copper_link_82571
++	                : e1000_setup_fiber_serdes_link_82571;
++	/* check for link */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		func->check_for_link = e1000_check_for_copper_link_generic;
++		break;
++	case e1000_media_type_fiber:
++		func->check_for_link = e1000_check_for_fiber_link_generic;
++		break;
++	case e1000_media_type_internal_serdes:
++		func->check_for_link = e1000_check_for_serdes_link_generic;
++		break;
++	default:
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++		break;
++	}
++	/* check management mode */
++	func->check_mng_mode = e1000_check_mng_mode_generic;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_82571;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_generic;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_82571;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* read mac address */
++	func->read_mac_addr = e1000_read_mac_addr_82571;
++	/* blink LED */
++	func->blink_led = e1000_blink_led_generic;
++	/* setup LED */
++	func->setup_led = e1000_setup_led_generic;
++	/* cleanup LED */
++	func->cleanup_led = e1000_cleanup_led_generic;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_generic;
++	func->led_off = e1000_led_off_generic;
++	/* remove device */
++	func->remove_device = e1000_remove_device_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_82571;
++	/* link info */
++	func->get_link_up_info =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_get_speed_and_duplex_copper_generic
++	                : e1000_get_speed_and_duplex_fiber_serdes_generic;
++
++	hw->dev_spec_size = sizeof(struct e1000_dev_spec_82571);
++
++	/* Device-specific structure allocation */
++	ret_val = e1000_alloc_zeroed_dev_spec_struct(hw, hw->dev_spec_size);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_function_pointers_82571 - Init func ptrs.
++ *  @hw: pointer to the HW structure
++ *
++ *  The only function explicitly called by the api module to initialize
++ *  all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_82571(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_82571");
++
++	hw->func.init_mac_params = e1000_init_mac_params_82571;
++	hw->func.init_nvm_params = e1000_init_nvm_params_82571;
++	hw->func.init_phy_params = e1000_init_phy_params_82571;
++}
++
++/**
++ *  e1000_get_phy_id_82571 - Retrieve the PHY ID and revision
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
++ *  revision in the hardware structure.
++ **/
++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_get_phy_id_82571");
++
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		/*
++		 * The 82571 firmware may still be configuring the PHY.
++		 * In this case, we cannot access the PHY until the
++		 * configuration is done.  So we explicitly set the
++		 * PHY ID.
++		 */
++		phy->id = IGP01E1000_I_PHY_ID;
++		break;
++	case e1000_82573:
++		ret_val = e1000_get_phy_id(hw);
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_hw_semaphore_82571 - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ **/
++static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 ret_val = E1000_SUCCESS;
++	s32 timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	DEBUGFUNC("e1000_get_hw_semaphore_82571");
++
++	/* Get the FW semaphore. */
++	for (i = 0; i < timeout; i++) {
++		swsm = E1000_READ_REG(hw, E1000_SWSM);
++		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		usec_delay(50);
++	}
++
++	if (i == timeout) {
++		/* Release semaphores */
++		e1000_put_hw_semaphore_generic(hw);
++		DEBUGOUT("Driver can't access the NVM\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_put_hw_semaphore_82571 - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ **/
++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw)
++{
++	u32 swsm;
++
++	DEBUGFUNC("e1000_put_hw_semaphore_82571");
++
++	swsm = E1000_READ_REG(hw, E1000_SWSM);
++
++	swsm &= ~E1000_SWSM_SWESMBI;
++
++	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
++}
++
++/**
++ *  e1000_acquire_nvm_82571 - Request for access to the EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  To gain access to the EEPROM, first we must obtain a hardware semaphore.
++ *  Then for non-82573 hardware, set the EEPROM access request bit and wait
++ *  for EEPROM access grant bit.  If the access grant bit is not set, release
++ *  hardware semaphore.
++ **/
++static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_acquire_nvm_82571");
++
++	ret_val = e1000_get_hw_semaphore_82571(hw);
++	if (ret_val)
++		goto out;
++
++	if (hw->mac.type != e1000_82573)
++		ret_val = e1000_acquire_nvm_generic(hw);
++
++	if (ret_val)
++		e1000_put_hw_semaphore_82571(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_release_nvm_82571 - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
++ **/
++static void e1000_release_nvm_82571(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_release_nvm_82571");
++
++	e1000_release_nvm_generic(hw);
++	e1000_put_hw_semaphore_82571(hw);
++}
++
++/**
++ *  e1000_write_nvm_82571 - Write to EEPROM using appropriate interface
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  For non-82573 silicon, write data to EEPROM at offset using SPI interface.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function, the
++ *  EEPROM will most likley contain an invalid checksum.
++ **/
++static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words,
++                                 u16 *data)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_write_nvm_82571");
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++		ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data);
++		break;
++	case e1000_82571:
++	case e1000_82572:
++		ret_val = e1000_write_nvm_spi(hw, offset, words, data);
++		break;
++	default:
++		ret_val = -E1000_ERR_NVM;
++		break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_update_nvm_checksum_82571 - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	u32 eecd;
++	s32 ret_val;
++	u16 i;
++
++	DEBUGFUNC("e1000_update_nvm_checksum_82571");
++
++	ret_val = e1000_update_nvm_checksum_generic(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * If our nvm is an EEPROM, then we're done
++	 * otherwise, commit the checksum to the flash NVM.
++	 */
++	if (hw->nvm.type != e1000_nvm_flash_hw)
++		goto out;
++
++	/* Check for pending operations. */
++	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
++		msec_delay(1);
++		if ((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD) == 0)
++			break;
++	}
++
++	if (i == E1000_FLASH_UPDATES) {
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	/* Reset the firmware if using STM opcode. */
++	if ((E1000_READ_REG(hw, E1000_FLOP) & 0xFF00) == E1000_STM_OPCODE) {
++		/*
++		 * The enabling of and the actual reset must be done
++		 * in two write cycles.
++		 */
++		E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET_ENABLE);
++		E1000_WRITE_FLUSH(hw);
++		E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET);
++	}
++
++	/* Commit the write to flash */
++	eecd = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD;
++	E1000_WRITE_REG(hw, E1000_EECD, eecd);
++
++	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
++		msec_delay(1);
++		if ((E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD) == 0)
++			break;
++	}
++
++	if (i == E1000_FLASH_UPDATES) {
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_validate_nvm_checksum_82571");
++
++	if (hw->nvm.type == e1000_nvm_flash_hw)
++		e1000_fix_nvm_checksum_82571(hw);
++
++	return e1000_validate_nvm_checksum_generic(hw);
++}
++
++/**
++ *  e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  After checking for invalid values, poll the EEPROM to ensure the previous
++ *  command has completed before trying to write the next word.  After write
++ *  poll for completion.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function, the
++ *  EEPROM will most likley contain an invalid checksum.
++ **/
++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
++                                      u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, eewr = 0;
++	s32 ret_val = 0;
++
++	DEBUGFUNC("e1000_write_nvm_eewr_82571");
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		DEBUGOUT("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	for (i = 0; i < words; i++) {
++		eewr = (data[i] << E1000_NVM_RW_REG_DATA) |
++		       ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
++		       E1000_NVM_RW_REG_START;
++
++		ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
++		if (ret_val)
++			break;
++
++		E1000_WRITE_REG(hw, E1000_EEWR, eewr);
++
++		ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
++		if (ret_val)
++			break;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cfg_done_82571 - Poll for configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the management control register for the config done bit to be set.
++ **/
++static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw)
++{
++	s32 timeout = PHY_CFG_TIMEOUT;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_get_cfg_done_82571");
++
++	while (timeout) {
++		if (E1000_READ_REG(hw, E1000_EEMNGCTL) & E1000_NVM_CFG_DONE_PORT_0)
++			break;
++		msec_delay(1);
++		timeout--;
++	}
++	if (!timeout) {
++		DEBUGOUT("MNG configuration cycle has not completed.\n");
++		ret_val = -E1000_ERR_RESET;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: TRUE to enable LPLU, FALSE to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When activating LPLU
++ *  this function also disables smart speed and vice versa.  LPLU will not be
++ *  activated unless the device autonegotiation advertisement meets standards
++ *  of either 10 or 10/100 or 10/100/1000 at all duplexes.  This is a function
++ *  pointer entry point only called by PHY setup routines.
++ **/
++static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	DEBUGFUNC("e1000_set_d0_lplu_state_82571");
++
++	ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		goto out;
++
++	if (active) {
++		data |= IGP02E1000_PM_D0_LPLU;
++		ret_val = e1000_write_phy_reg(hw,
++		                              IGP02E1000_PHY_POWER_MGMT,
++		                              data);
++		if (ret_val)
++			goto out;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1000_read_phy_reg(hw,
++		                             IGP01E1000_PHY_PORT_CONFIG,
++		                             &data);
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1000_write_phy_reg(hw,
++		                              IGP01E1000_PHY_PORT_CONFIG,
++		                              data);
++		if (ret_val)
++			goto out;
++	} else {
++		data &= ~IGP02E1000_PM_D0_LPLU;
++		ret_val = e1000_write_phy_reg(hw,
++		                              IGP02E1000_PHY_POWER_MGMT,
++		                              data);
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1000_read_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1000_read_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1000_write_phy_reg(hw,
++			                             IGP01E1000_PHY_PORT_CONFIG,
++			                             data);
++			if (ret_val)
++				goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_82571 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
++{
++	u32 ctrl, extcnf_ctrl, ctrl_ext, icr;
++	s32 ret_val;
++	u16 i = 0;
++
++	DEBUGFUNC("e1000_reset_hw_82571");
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000_disable_pcie_master_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("PCI-E Master disable polling has failed.\n");
++	}
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	msec_delay(10);
++
++	/*
++	 * Must acquire the MDIO ownership before MAC reset.
++	 * Ownership defaults to firmware after a reset.
++	 */
++	if (hw->mac.type == e1000_82573) {
++		extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++		extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++		do {
++			E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl);
++			extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
++
++			if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP)
++				break;
++
++			extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++			msec_delay(2);
++			i++;
++		} while (i < MDIO_OWNERSHIP_TIMEOUT);
++	}
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGOUT("Issuing a global reset to MAC\n");
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++
++	if (hw->nvm.type == e1000_nvm_flash_hw) {
++		usec_delay(10);
++		ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++		ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	ret_val = e1000_get_auto_rd_done_generic(hw);
++	if (ret_val)
++		/* We don't want to continue accessing MAC registers. */
++		goto out;
++
++	/*
++	 * Phy configuration from NVM just starts after EECD_AUTO_RD is set.
++	 * Need to wait for Phy configuration completion before accessing
++	 * NVM and Phy.
++	 */
++	if (hw->mac.type == e1000_82573)
++		msec_delay(25);
++
++	/* Clear any pending interrupt events. */
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	if (!(e1000_check_alt_mac_addr_generic(hw)))
++		e1000_set_laa_state_82571(hw, TRUE);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_82571 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.
++ **/
++static s32 e1000_init_hw_82571(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 reg_data;
++	s32 ret_val;
++	u16 i, rar_count = mac->rar_entry_count;
++
++	DEBUGFUNC("e1000_init_hw_82571");
++
++	e1000_initialize_hw_bits_82571(hw);
++
++	/* Initialize identification LED */
++	ret_val = e1000_id_led_init_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Disabling VLAN filtering */
++	DEBUGOUT("Initializing the IEEE VLAN\n");
++	e1000_clear_vfta(hw);
++
++	/* Setup the receive address. */
++	/*
++	 * If, however, a locally administered address was assigned to the
++	 * 82571, we must reserve a RAR for it to work around an issue where
++	 * resetting one port will reload the MAC on the other port.
++	 */
++	if (e1000_get_laa_state_82571(hw))
++		rar_count--;
++	e1000_init_rx_addrs_generic(hw, rar_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	/* Set the transmit descriptor write-back policy */
++	reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++	           E1000_TXDCTL_FULL_TX_DESC_WB |
++	           E1000_TXDCTL_COUNT_DESC;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data);
++
++	/* ...for both queues. */
++	if (mac->type != e1000_82573) {
++		reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1));
++		reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++		           E1000_TXDCTL_FULL_TX_DESC_WB |
++		           E1000_TXDCTL_COUNT_DESC;
++		E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data);
++	} else {
++		e1000_enable_tx_pkt_filtering(hw);
++		reg_data = E1000_READ_REG(hw, E1000_GCR);
++		reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
++		E1000_WRITE_REG(hw, E1000_GCR, reg_data);
++	}
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82571(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes required hardware-dependent bits needed for normal operation.
++ **/
++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	DEBUGFUNC("e1000_initialize_hw_bits_82571");
++
++	if (hw->mac.disable_hw_init_bits)
++		goto out;
++
++	/* Transmit Descriptor Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TXDCTL(1));
++	reg |= (1 << 22);
++	E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = E1000_READ_REG(hw, E1000_TARC(0));
++	reg &= ~(0xF << 27); /* 30:27 */
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		reg |= (1 << 23) | (1 << 24) | (1 << 25) | (1 << 26);
++		break;
++	default:
++		break;
++	}
++	E1000_WRITE_REG(hw, E1000_TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = E1000_READ_REG(hw, E1000_TARC(1));
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		reg &= ~((1 << 29) | (1 << 30));
++		reg |= (1 << 22) | (1 << 24) | (1 << 25) | (1 << 26);
++		if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR)
++			reg &= ~(1 << 28);
++		else
++			reg |= (1 << 28);
++		E1000_WRITE_REG(hw, E1000_TARC(1), reg);
++		break;
++	default:
++		break;
++	}
++
++	/* Device Control */
++	if (hw->mac.type == e1000_82573) {
++		reg = E1000_READ_REG(hw, E1000_CTRL);
++		reg &= ~(1 << 29);
++		E1000_WRITE_REG(hw, E1000_CTRL, reg);
++	}
++
++	/* Extended Device Control */
++	if (hw->mac.type == e1000_82573) {
++		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
++		reg &= ~(1 << 23);
++		reg |= (1 << 22);
++		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
++	}
++
++out:
++	return;
++}
++
++/**
++ *  e1000_clear_vfta_82571 - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++static void e1000_clear_vfta_82571(struct e1000_hw *hw)
++{
++	u32 offset;
++	u32 vfta_value = 0;
++	u32 vfta_offset = 0;
++	u32 vfta_bit_in_reg = 0;
++
++	DEBUGFUNC("e1000_clear_vfta_82571");
++
++	if (hw->mac.type == e1000_82573) {
++		if (hw->mng_cookie.vlan_id != 0) {
++			/*
++			 * The VFTA is a 4096b bit-field, each identifying
++			 * a single VLAN ID.  The following operations
++			 * determine which 32b entry (i.e. offset) into the
++			 * array we want to set the VLAN ID (i.e. bit) of
++			 * the manageability unit.
++			 */
++			vfta_offset = (hw->mng_cookie.vlan_id >>
++			               E1000_VFTA_ENTRY_SHIFT) &
++			              E1000_VFTA_ENTRY_MASK;
++			vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id &
++			                       E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
++		}
++	}
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		/*
++		 * If the offset we want to clear is the same offset of the
++		 * manageability VLAN ID, then clear all bits except that of
++		 * the manageability unit.
++		 */
++		vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value);
++		E1000_WRITE_FLUSH(hw);
++	}
++}
++
++/**
++ *  e1000_update_mc_addr_list_82571 - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *  @rar_used_count: the first RAR register free to program
++ *  @rar_count: total number of supported Receive Address Registers
++ *
++ *  Updates the Receive Address Registers and Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ *  The parameter rar_count will usually be hw->mac.rar_entry_count
++ *  unless there are workarounds that change this.
++ **/
++static void e1000_update_mc_addr_list_82571(struct e1000_hw *hw,
++                                           u8 *mc_addr_list, u32 mc_addr_count,
++                                           u32 rar_used_count, u32 rar_count)
++{
++	DEBUGFUNC("e1000_update_mc_addr_list_82571");
++
++	if (e1000_get_laa_state_82571(hw))
++		rar_count--;
++
++	e1000_update_mc_addr_list_generic(hw, mc_addr_list, mc_addr_count,
++	                                  rar_used_count, rar_count);
++}
++
++/**
++ *  e1000_setup_link_82571 - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++static s32 e1000_setup_link_82571(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_setup_link_82571");
++
++	/*
++	 * 82573 does not have a word in the NVM to determine
++	 * the default flow control setting, so we explicitly
++	 * set it to full.
++	 */
++	if (hw->mac.type == e1000_82573)
++		hw->fc.type = e1000_fc_full;
++
++	return e1000_setup_link_generic(hw);
++}
++
++/**
++ *  e1000_setup_copper_link_82571 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
++ *  for link, once link is established calls to configure collision distance
++ *  and flow control are called.
++ **/
++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw)
++{
++	u32 ctrl, led_ctrl;
++	s32  ret_val;
++
++	DEBUGFUNC("e1000_setup_copper_link_82571");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	switch (hw->phy.type) {
++	case e1000_phy_m88:
++		ret_val = e1000_copper_link_setup_m88(hw);
++		break;
++	case e1000_phy_igp_2:
++		ret_val = e1000_copper_link_setup_igp(hw);
++		/* Setup activity LED */
++		led_ctrl = E1000_READ_REG(hw, E1000_LEDCTL);
++		led_ctrl &= IGP_ACTIVITY_LED_MASK;
++		led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++		E1000_WRITE_REG(hw, E1000_LEDCTL, led_ctrl);
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		break;
++	}
++
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_setup_copper_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures collision distance and flow control for fiber and serdes links.
++ *  Upon successful setup, poll for link.
++ **/
++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_setup_fiber_serdes_link_82571");
++
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		/*
++		 * If SerDes loopback mode is entered, there is no form
++		 * of reset to take the adapter out of that mode.  So we
++		 * have to explicitly take the adapter out of loopback
++		 * mode.  This prevents drivers from twidling their thumbs
++		 * if another tool failed to take it out of loopback mode.
++		 */
++		E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
++		break;
++	default:
++		break;
++	}
++
++	return e1000_setup_fiber_serdes_link_generic(hw);
++}
++
++/**
++ *  e1000_valid_led_default_82571 - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	DEBUGFUNC("e1000_valid_led_default_82571");
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		DEBUGOUT("NVM Read Error\n");
++		goto out;
++	}
++
++	if (hw->mac.type == e1000_82573 &&
++	    *data == ID_LED_RESERVED_F746)
++		*data = ID_LED_DEFAULT_82573;
++	else if (*data == ID_LED_RESERVED_0000 ||
++	         *data == ID_LED_RESERVED_FFFF)
++		*data = ID_LED_DEFAULT;
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_laa_state_82571 - Get locally administered address state
++ *  @hw: pointer to the HW structure
++ *
++ *  Retrieve and return the current locally administed address state.
++ **/
++bool e1000_get_laa_state_82571(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82571 *dev_spec;
++	bool state = FALSE;
++
++	DEBUGFUNC("e1000_get_laa_state_82571");
++
++	if (hw->mac.type != e1000_82571)
++		goto out;
++
++	dev_spec = (struct e1000_dev_spec_82571 *)hw->dev_spec;
++
++	state = dev_spec->laa_is_present;
++
++out:
++	return state;
++}
++
++/**
++ *  e1000_set_laa_state_82571 - Set locally administered address state
++ *  @hw: pointer to the HW structure
++ *  @state: enable/disable locally administered address
++ *
++ *  Enable/Disable the current locally administed address state.
++ **/
++void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state)
++{
++	struct e1000_dev_spec_82571 *dev_spec;
++
++	DEBUGFUNC("e1000_set_laa_state_82571");
++
++	if (hw->mac.type != e1000_82571)
++		goto out;
++
++	dev_spec = (struct e1000_dev_spec_82571 *)hw->dev_spec;
++
++	dev_spec->laa_is_present = state;
++
++	/* If workaround is activated... */
++	if (state) {
++		/*
++		 * Hold a copy of the LAA in RAR[14] This is done so that
++		 * between the time RAR[0] gets clobbered and the time it
++		 * gets fixed, the actual LAA is in one of the RARs and no
++		 * incoming packets directed to this port are dropped.
++		 * Eventually the LAA will be in RAR[0] and RAR[14].
++		 */
++		e1000_rar_set_generic(hw, hw->mac.addr,
++		                      hw->mac.rar_entry_count - 1);
++	}
++
++out:
++	return;
++}
++
++/**
++ *  e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Verifies that the EEPROM has completed the update.  After updating the
++ *  EEPROM, we need to check bit 15 in work 0x23 for the checksum fix.  If
++ *  the checksum fix is not implemented, we need to set the bit and update
++ *  the checksum.  Otherwise, if bit 15 is set and the checksum is incorrect,
++ *  we need to return bad checksum.
++ **/
++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32 ret_val = E1000_SUCCESS;
++	u16 data;
++
++	DEBUGFUNC("e1000_fix_nvm_checksum_82571");
++
++	if (nvm->type != e1000_nvm_flash_hw)
++		goto out;
++
++	/*
++	 * Check bit 4 of word 10h.  If it is 0, firmware is done updating
++	 * 10h-12h.  Checksum may need to be fixed.
++	 */
++	ret_val = e1000_read_nvm(hw, 0x10, 1, &data);
++	if (ret_val)
++		goto out;
++
++	if (!(data & 0x10)) {
++		/*
++		 * Read 0x23 and check bit 15.  This bit is a 1
++		 * when the checksum has already been fixed.  If
++		 * the checksum is still wrong and this bit is a
++		 * 1, we need to return bad checksum.  Otherwise,
++		 * we need to set this bit to a 1 and update the
++		 * checksum.
++		 */
++		ret_val = e1000_read_nvm(hw, 0x23, 1, &data);
++		if (ret_val)
++			goto out;
++
++		if (!(data & 0x8000)) {
++			data |= 0x8000;
++			ret_val = e1000_write_nvm(hw, 0x23, 1, &data);
++			if (ret_val)
++				goto out;
++			ret_val = e1000_update_nvm_checksum(hw);
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_mac_addr_82571 - Read device MAC address
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_read_mac_addr_82571");
++	if (e1000_check_alt_mac_addr_generic(hw))
++		ret_val = e1000_read_mac_addr_generic(hw);
++
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_82571 - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(e1000_check_mng_mode(hw) || e1000_check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++
++	return;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_82571");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++
++	temp = E1000_READ_REG(hw, E1000_MGTPRC);
++	temp = E1000_READ_REG(hw, E1000_MGTPDC);
++	temp = E1000_READ_REG(hw, E1000_MGTPTC);
++
++	temp = E1000_READ_REG(hw, E1000_IAC);
++	temp = E1000_READ_REG(hw, E1000_ICRXOC);
++
++	temp = E1000_READ_REG(hw, E1000_ICRXPTC);
++	temp = E1000_READ_REG(hw, E1000_ICRXATC);
++	temp = E1000_READ_REG(hw, E1000_ICTXPTC);
++	temp = E1000_READ_REG(hw, E1000_ICTXATC);
++	temp = E1000_READ_REG(hw, E1000_ICTXQEC);
++	temp = E1000_READ_REG(hw, E1000_ICTXQMTC);
++	temp = E1000_READ_REG(hw, E1000_ICRXDMTC);
++}
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_param.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_param.c	2021-04-07 16:01:27.624633599 +0800
+@@ -0,0 +1,894 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++
++#include <linux/netdevice.h>
++
++#include "e1000.h"
++
++/* This is the only thing that needs to be changed to adjust the
++ * maximum number of ports that the driver can manage.
++ */
++
++#define E1000_MAX_NIC 32
++
++#define OPTION_UNSET   -1
++#define OPTION_DISABLED 0
++#define OPTION_ENABLED  1
++
++/* All parameters are treated the same, as an integer array of values.
++ * This macro just reduces the need to repeat the same declaration code
++ * over and over (plus this helps to avoid typo bugs).
++ */
++
++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
++#ifndef module_param_array
++/* Module Parameters are always initialized to -1, so that the driver
++ * can tell the difference between no user specified value or the
++ * user asking for the default value.
++ * The true default values are loaded in when e1000_check_options is called.
++ *
++ * This is a GCC extension to ANSI C.
++ * See the item "Labeled Elements in Initializers" in the section
++ * "Extensions to the C Language Family" of the GCC documentation.
++ */
++
++#define E1000_PARAM(X, desc) \
++	static const int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
++	MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \
++	MODULE_PARM_DESC(X, desc);
++#else
++#define E1000_PARAM(X, desc) \
++	static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
++	static unsigned int num_##X = 0; \
++	module_param_array_named(X, X, int, &num_##X, 0); \
++	MODULE_PARM_DESC(X, desc);
++#endif
++
++/* Transmit Descriptor Count
++ *
++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
++ * Valid Range: 80-4096 for 82544 and newer
++ *
++ * Default Value: 256
++ */
++E1000_PARAM(TxDescriptors, "Number of transmit descriptors");
++
++/* Receive Descriptor Count
++ *
++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
++ * Valid Range: 80-4096 for 82544 and newer
++ *
++ * Default Value: 256
++ */
++E1000_PARAM(RxDescriptors, "Number of receive descriptors");
++
++/* User Specified Speed Override
++ *
++ * Valid Range: 0, 10, 100, 1000
++ *  - 0    - auto-negotiate at all supported speeds
++ *  - 10   - only link at 10 Mbps
++ *  - 100  - only link at 100 Mbps
++ *  - 1000 - only link at 1000 Mbps
++ *
++ * Default Value: 0
++ */
++E1000_PARAM(Speed, "Speed setting");
++
++/* User Specified Duplex Override
++ *
++ * Valid Range: 0-2
++ *  - 0 - auto-negotiate for duplex
++ *  - 1 - only link at half duplex
++ *  - 2 - only link at full duplex
++ *
++ * Default Value: 0
++ */
++E1000_PARAM(Duplex, "Duplex setting");
++
++/* Auto-negotiation Advertisement Override
++ *
++ * Valid Range: 0x01-0x0F, 0x20-0x2F (copper); 0x20 (fiber)
++ *
++ * The AutoNeg value is a bit mask describing which speed and duplex
++ * combinations should be advertised during auto-negotiation.
++ * The supported speed and duplex modes are listed below
++ *
++ * Bit           7     6     5      4      3     2     1      0
++ * Speed (Mbps)  N/A   N/A   1000   N/A    100   100   10     10
++ * Duplex                    Full          Full  Half  Full   Half
++ *
++ * Default Value: 0x2F (copper); 0x20 (fiber)
++ */
++E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting");
++#define AUTONEG_ADV_DEFAULT  0x2F
++#define AUTONEG_ADV_MASK     0x2F
++
++/* User Specified Flow Control Override
++ *
++ * Valid Range: 0-3
++ *  - 0 - No Flow Control
++ *  - 1 - Rx only, respond to PAUSE frames but do not generate them
++ *  - 2 - Tx only, generate PAUSE frames but ignore them on receive
++ *  - 3 - Full Flow Control Support
++ *
++ * Default Value: Read flow control settings from the EEPROM
++ */
++E1000_PARAM(FlowControl, "Flow Control setting");
++#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL
++
++/* XsumRX - Receive Checksum Offload Enable/Disable
++ *
++ * Valid Range: 0, 1
++ *  - 0 - disables all checksum offload
++ *  - 1 - enables receive IP/TCP/UDP checksum offload
++ *        on 82543 and newer -based NICs
++ *
++ * Default Value: 1
++ */
++E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
++
++/* Transmit Interrupt Delay in units of 1.024 microseconds
++ *  Tx interrupt delay needs to typically be set to something non zero
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
++#define DEFAULT_TIDV                   0
++#define MAX_TXDELAY               0xFFFF
++#define MIN_TXDELAY                    0
++
++/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
++#define DEFAULT_TADV                   0
++#define MAX_TXABSDELAY            0xFFFF
++#define MIN_TXABSDELAY                 0
++
++/* Receive Interrupt Delay in units of 1.024 microseconds
++ *   hardware will likely hang if you set this to anything but zero.
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
++#define DEFAULT_RDTR                   0
++#define MAX_RXDELAY               0xFFFF
++#define MIN_RXDELAY                    0
++
++/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
++#define DEFAULT_RADV                   0
++#define MAX_RXABSDELAY            0xFFFF
++#define MIN_RXABSDELAY                 0
++
++/* Interrupt Throttle Rate (interrupts/sec)
++ *
++ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
++ */
++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
++#define DEFAULT_ITR                    0
++#define MAX_ITR                   100000
++#define MIN_ITR                      100
++
++/* Enable Smart Power Down of the PHY
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 0 (disabled)
++ */
++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down");
++
++/* Enable Kumeran Lock Loss workaround
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 1 (enabled)
++ */
++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround");
++
++
++struct e1000_option {
++	enum { enable_option, range_option, list_option } type;
++	const char *name;
++	const char *err;
++	int def;
++	union {
++		struct { /* range_option info */
++			int min;
++			int max;
++		} r;
++		struct { /* list_option info */
++			int nr;
++			struct e1000_opt_list { int i; char *str; } *p;
++		} l;
++	} arg;
++};
++
++static int e1000_validate_option(unsigned int *value,
++                                           const struct e1000_option *opt,
++                                           struct e1000_adapter *adapter)
++{
++	if (*value == OPTION_UNSET) {
++		*value = opt->def;
++		return 0;
++	}
++
++	switch (opt->type) {
++	case enable_option:
++		switch (*value) {
++		case OPTION_ENABLED:
++			DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name);
++			return 0;
++		case OPTION_DISABLED:
++			DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name);
++			return 0;
++		}
++		break;
++	case range_option:
++		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
++			DPRINTK(PROBE, INFO,
++					"%s set to %i\n", opt->name, *value);
++			return 0;
++		}
++		break;
++	case list_option: {
++		int i;
++		struct e1000_opt_list *ent;
++
++		for (i = 0; i < opt->arg.l.nr; i++) {
++			ent = &opt->arg.l.p[i];
++			if (*value == ent->i) {
++				if (ent->str[0] != '\0')
++					DPRINTK(PROBE, INFO, "%s\n", ent->str);
++				return 0;
++			}
++		}
++	}
++		break;
++	default:
++		BUG();
++	}
++
++	DPRINTK(PROBE, INFO, "Invalid %s value specified (%i) %s\n",
++	       opt->name, *value, opt->err);
++	*value = opt->def;
++	return -1;
++}
++
++static void e1000_check_fiber_options(struct e1000_adapter *adapter);
++static void e1000_check_copper_options(struct e1000_adapter *adapter);
++
++/**
++ * e1000_check_options - Range Checking for Command Line Parameters
++ * @adapter: board private structure
++ *
++ * This routine checks all command line parameters for valid user
++ * input.  If an invalid value is given, or if no user specified
++ * value exists, a default value is used.  The final value is stored
++ * in a variable in the adapter structure.
++ **/
++void e1000_check_options(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	int bd = adapter->bd_number;
++	if (bd >= E1000_MAX_NIC) {
++		DPRINTK(PROBE, NOTICE,
++		       "Warning: no configuration for board #%i\n", bd);
++		DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
++#ifndef module_param_array
++		bd = E1000_MAX_NIC;
++#endif
++	}
++
++	{ /* Transmit Descriptor Count */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Descriptors",
++			.err  = "using default of "
++				__MODULE_STRING(E1000_DEFAULT_TXD),
++			.def  = E1000_DEFAULT_TXD,
++			.arg  = { .r = { .min = E1000_MIN_TXD }}
++		};
++		struct e1000_tx_ring *tx_ring = adapter->tx_ring;
++		int i;
++		opt.arg.r.max = hw->mac.type < e1000_82544 ?
++			E1000_MAX_TXD : E1000_MAX_82544_TXD;
++
++#ifdef module_param_array
++		if (num_TxDescriptors > bd) {
++#endif
++			tx_ring->count = TxDescriptors[bd];
++			e1000_validate_option(&tx_ring->count, &opt, adapter);
++			tx_ring->count = ALIGN(tx_ring->count,
++			                       REQ_TX_DESCRIPTOR_MULTIPLE);
++#ifdef module_param_array
++		} else {
++			tx_ring->count = opt.def;
++		}
++#endif
++		for (i = 0; i < adapter->num_tx_queues; i++)
++			tx_ring[i].count = tx_ring->count;
++	}
++	{ /* Receive Descriptor Count */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Descriptors",
++			.err  = "using default of "
++				__MODULE_STRING(E1000_DEFAULT_RXD),
++			.def  = E1000_DEFAULT_RXD,
++			.arg  = { .r = { .min = E1000_MIN_RXD }}
++		};
++		struct e1000_rx_ring *rx_ring = adapter->rx_ring;
++		int i;
++		opt.arg.r.max = hw->mac.type < e1000_82544 ? E1000_MAX_RXD :
++			E1000_MAX_82544_RXD;
++
++#ifdef module_param_array
++		if (num_RxDescriptors > bd) {
++#endif
++			rx_ring->count = RxDescriptors[bd];
++			e1000_validate_option(&rx_ring->count, &opt, adapter);
++			rx_ring->count = ALIGN(rx_ring->count,
++			                       REQ_RX_DESCRIPTOR_MULTIPLE);
++#ifdef module_param_array
++		} else {
++			rx_ring->count = opt.def;
++		}
++#endif
++		for (i = 0; i < adapter->num_rx_queues; i++)
++			rx_ring[i].count = rx_ring->count;
++	}
++	{ /* Checksum Offload Enable/Disable */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Checksum Offload",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++#ifdef module_param_array
++		if (num_XsumRX > bd) {
++#endif
++			unsigned int rx_csum = XsumRX[bd];
++			e1000_validate_option(&rx_csum, &opt, adapter);
++			adapter->rx_csum = rx_csum;
++#ifdef module_param_array
++		} else {
++			adapter->rx_csum = opt.def;
++		}
++#endif
++	}
++	{ /* Flow Control */
++
++		struct e1000_opt_list fc_list[] =
++			{{ e1000_fc_none,    "Flow Control Disabled" },
++			 { e1000_fc_rx_pause,"Flow Control Receive Only" },
++			 { e1000_fc_tx_pause,"Flow Control Transmit Only" },
++			 { e1000_fc_full,    "Flow Control Enabled" },
++			 { e1000_fc_default, "Flow Control Hardware Default" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Flow Control",
++			.err  = "reading default settings from EEPROM",
++			.def  = e1000_fc_default,
++			.arg  = { .l = { .nr = ARRAY_SIZE(fc_list),
++					 .p = fc_list }}
++		};
++
++#ifdef module_param_array
++		if (num_FlowControl > bd) {
++#endif
++			unsigned int fc = FlowControl[bd];
++			e1000_validate_option(&fc, &opt, adapter);
++			hw->fc.original_type = fc;
++			hw->fc.type = fc;
++#ifdef module_param_array
++		} else {
++			hw->fc.original_type = opt.def;
++			hw->fc.type = opt.def;
++		}
++#endif
++	}
++	{ /* Transmit Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_TIDV),
++			.def  = DEFAULT_TIDV,
++			.arg  = { .r = { .min = MIN_TXDELAY,
++					 .max = MAX_TXDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_TxIntDelay > bd) {
++#endif
++			adapter->tx_int_delay = TxIntDelay[bd];
++			e1000_validate_option(&adapter->tx_int_delay, &opt,
++			                      adapter);
++#ifdef module_param_array
++		} else {
++			adapter->tx_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Transmit Absolute Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Absolute Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_TADV),
++			.def  = DEFAULT_TADV,
++			.arg  = { .r = { .min = MIN_TXABSDELAY,
++					 .max = MAX_TXABSDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_TxAbsIntDelay > bd) {
++#endif
++			adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
++			                      adapter);
++#ifdef module_param_array
++		} else {
++			adapter->tx_abs_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Receive Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_RDTR),
++			.def  = DEFAULT_RDTR,
++			.arg  = { .r = { .min = MIN_RXDELAY,
++					 .max = MAX_RXDELAY }}
++		};
++
++		/* modify min and default if 82573 for slow ping w/a,
++		 * a value greater than 8 needs to be set for RDTR */
++
++#ifdef module_param_array
++		if (num_RxIntDelay > bd) {
++#endif
++			adapter->rx_int_delay = RxIntDelay[bd];
++			e1000_validate_option(&adapter->rx_int_delay, &opt,
++			                      adapter);
++#ifdef module_param_array
++		} else {
++			adapter->rx_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Receive Absolute Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Absolute Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_RADV),
++			.def  = DEFAULT_RADV,
++			.arg  = { .r = { .min = MIN_RXABSDELAY,
++					 .max = MAX_RXABSDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_RxAbsIntDelay > bd) {
++#endif
++			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
++			                      adapter);
++#ifdef module_param_array
++		} else {
++			adapter->rx_abs_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Interrupt Throttling Rate */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Interrupt Throttling Rate (ints/sec)",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
++			.def  = DEFAULT_ITR,
++			.arg  = { .r = { .min = MIN_ITR,
++					 .max = MAX_ITR }}
++		};
++
++#ifdef module_param_array
++		if (num_InterruptThrottleRate > bd) {
++#endif
++			adapter->itr = InterruptThrottleRate[bd];
++			switch (adapter->itr) {
++			case 0:
++				DPRINTK(PROBE, INFO, "%s turned off\n",
++				        opt.name);
++				break;
++			case 1:
++				DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
++					opt.name);
++				adapter->itr_setting = adapter->itr;
++				adapter->itr = 20000;
++				break;
++			case 3:
++				DPRINTK(PROBE, INFO,
++				        "%s set to dynamic conservative mode\n",
++					opt.name);
++				adapter->itr_setting = adapter->itr;
++				adapter->itr = 20000;
++				break;
++			default:
++				e1000_validate_option(&adapter->itr, &opt,
++				        adapter);
++				/* save the setting, because the dynamic bits change itr */
++				/* clear the lower two bits because they are
++				 * used as control */
++				adapter->itr_setting = adapter->itr & ~3;
++				break;
++			}
++#ifdef module_param_array
++		} else {
++			adapter->itr_setting = opt.def;
++			adapter->itr = 20000;
++		}
++#endif
++	}
++	{ /* Smart Power Down */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "PHY Smart Power Down",
++			.err  = "defaulting to Disabled",
++			.def  = OPTION_DISABLED
++		};
++
++#ifdef module_param_array
++		if (num_SmartPowerDownEnable > bd) {
++#endif
++			unsigned int spd = SmartPowerDownEnable[bd];
++			e1000_validate_option(&spd, &opt, adapter);
++			adapter->flags |= spd ? E1000_FLAG_SMART_POWER_DOWN : 0;
++#ifdef module_param_array
++		} else {
++			adapter->flags &= ~E1000_FLAG_SMART_POWER_DOWN;
++		}
++#endif
++	}
++	{ /* Kumeran Lock Loss Workaround */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Kumeran Lock Loss Workaround",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++#ifdef module_param_array
++		if (num_KumeranLockLoss > bd) {
++#endif
++			unsigned int kmrn_lock_loss = KumeranLockLoss[bd];
++			e1000_validate_option(&kmrn_lock_loss, &opt, adapter);
++			if (hw->mac.type == e1000_ich8lan)
++				e1000_set_kmrn_lock_loss_workaround_ich8lan(hw,
++				                                kmrn_lock_loss);
++#ifdef module_param_array
++		} else {
++			if (hw->mac.type == e1000_ich8lan)
++				e1000_set_kmrn_lock_loss_workaround_ich8lan(hw,
++				                                       opt.def);
++		}
++#endif
++	}
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_fiber:
++	case e1000_media_type_internal_serdes:
++		e1000_check_fiber_options(adapter);
++		break;
++	case e1000_media_type_copper:
++		e1000_check_copper_options(adapter);
++		break;
++	default:
++		BUG();
++	}
++
++}
++
++/**
++ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version
++ * @adapter: board private structure
++ *
++ * Handles speed and duplex options on fiber adapters
++ **/
++static void e1000_check_fiber_options(struct e1000_adapter *adapter)
++{
++	int bd = adapter->bd_number;
++#ifndef module_param_array
++	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
++	if ((Speed[bd] != OPTION_UNSET)) {
++#else
++	if (num_Speed > bd) {
++#endif
++		DPRINTK(PROBE, INFO, "Speed not valid for fiber adapters, "
++		       "parameter ignored\n");
++	}
++
++#ifndef module_param_array
++	if ((Duplex[bd] != OPTION_UNSET)) {
++#else
++	if (num_Duplex > bd) {
++#endif
++		DPRINTK(PROBE, INFO, "Duplex not valid for fiber adapters, "
++		       "parameter ignored\n");
++	}
++
++#ifndef module_param_array
++	if ((AutoNeg[bd] != OPTION_UNSET) && (AutoNeg[bd] != 0x20)) {
++#else
++	if ((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) {
++#endif
++		DPRINTK(PROBE, INFO, "AutoNeg other than 1000/Full is "
++				 "not valid for fiber adapters, "
++				 "parameter ignored\n");
++	}
++}
++
++/**
++ * e1000_check_copper_options - Range Checking for Link Options, Copper Version
++ * @adapter: board private structure
++ *
++ * Handles speed and duplex options on copper adapters
++ **/
++static void e1000_check_copper_options(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	unsigned int speed, dplx, an;
++	int bd = adapter->bd_number;
++#ifndef module_param_array
++	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
++#endif
++
++	{ /* Speed */
++		struct e1000_opt_list speed_list[] = {{          0, "" },
++						      {   SPEED_10, "" },
++						      {  SPEED_100, "" },
++						      { SPEED_1000, "" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Speed",
++			.err  = "parameter ignored",
++			.def  = 0,
++			.arg  = { .l = { .nr = ARRAY_SIZE(speed_list),
++					 .p = speed_list }}
++		};
++
++#ifdef module_param_array
++		if (num_Speed > bd) {
++#endif
++			speed = Speed[bd];
++			e1000_validate_option(&speed, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			speed = opt.def;
++		}
++#endif
++	}
++	{ /* Duplex */
++		struct e1000_opt_list dplx_list[] = {{           0, "" },
++						     { HALF_DUPLEX, "" },
++						     { FULL_DUPLEX, "" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Duplex",
++			.err  = "parameter ignored",
++			.def  = 0,
++			.arg  = { .l = { .nr = ARRAY_SIZE(dplx_list),
++					 .p = dplx_list }}
++		};
++
++		if (e1000_check_reset_block(hw)) {
++			DPRINTK(PROBE, INFO,
++				"Link active due to SoL/IDER Session. "
++			        "Speed/Duplex/AutoNeg parameter ignored.\n");
++			return;
++		}
++#ifdef module_param_array
++		if (num_Duplex > bd) {
++#endif
++			dplx = Duplex[bd];
++			e1000_validate_option(&dplx, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			dplx = opt.def;
++		}
++#endif
++	}
++
++#ifdef module_param_array
++	if ((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) {
++#else
++	if (AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) {
++#endif
++		DPRINTK(PROBE, INFO,
++		       "AutoNeg specified along with Speed or Duplex, "
++		       "parameter ignored\n");
++		hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
++	} else { /* Autoneg */
++		struct e1000_opt_list an_list[] =
++			#define AA "AutoNeg advertising "
++			{{ 0x01, AA "10/HD" },
++			 { 0x02, AA "10/FD" },
++			 { 0x03, AA "10/FD, 10/HD" },
++			 { 0x04, AA "100/HD" },
++			 { 0x05, AA "100/HD, 10/HD" },
++			 { 0x06, AA "100/HD, 10/FD" },
++			 { 0x07, AA "100/HD, 10/FD, 10/HD" },
++			 { 0x08, AA "100/FD" },
++			 { 0x09, AA "100/FD, 10/HD" },
++			 { 0x0a, AA "100/FD, 10/FD" },
++			 { 0x0b, AA "100/FD, 10/FD, 10/HD" },
++			 { 0x0c, AA "100/FD, 100/HD" },
++			 { 0x0d, AA "100/FD, 100/HD, 10/HD" },
++			 { 0x0e, AA "100/FD, 100/HD, 10/FD" },
++			 { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" },
++			 { 0x20, AA "1000/FD" },
++			 { 0x21, AA "1000/FD, 10/HD" },
++			 { 0x22, AA "1000/FD, 10/FD" },
++			 { 0x23, AA "1000/FD, 10/FD, 10/HD" },
++			 { 0x24, AA "1000/FD, 100/HD" },
++			 { 0x25, AA "1000/FD, 100/HD, 10/HD" },
++			 { 0x26, AA "1000/FD, 100/HD, 10/FD" },
++			 { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" },
++			 { 0x28, AA "1000/FD, 100/FD" },
++			 { 0x29, AA "1000/FD, 100/FD, 10/HD" },
++			 { 0x2a, AA "1000/FD, 100/FD, 10/FD" },
++			 { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" },
++			 { 0x2c, AA "1000/FD, 100/FD, 100/HD" },
++			 { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" },
++			 { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" },
++			 { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "AutoNeg",
++			.err  = "parameter ignored",
++			.def  = AUTONEG_ADV_DEFAULT,
++			.arg  = { .l = { .nr = ARRAY_SIZE(an_list),
++					 .p = an_list }}
++		};
++
++#ifdef module_param_array
++		if (num_AutoNeg > bd) {
++#endif
++			an = AutoNeg[bd];
++			e1000_validate_option(&an, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			an = opt.def;
++		}
++#endif
++		hw->phy.autoneg_advertised = an;
++	}
++
++	switch (speed + dplx) {
++	case 0:
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++#ifdef module_param_array
++		if ((num_Speed > bd) && (speed != 0 || dplx != 0))
++#else
++		if (Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET)
++#endif
++			DPRINTK(PROBE, INFO,
++			       "Speed and duplex autonegotiation enabled\n");
++		break;
++	case HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Half Duplex specified without Speed\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"Half Duplex only\n");
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++		hw->phy.autoneg_advertised = ADVERTISE_10_HALF |
++		                             ADVERTISE_100_HALF;
++		break;
++	case FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Full Duplex specified without Speed\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"Full Duplex only\n");
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++		hw->phy.autoneg_advertised = ADVERTISE_10_FULL |
++		                             ADVERTISE_100_FULL |
++		                             ADVERTISE_1000_FULL;
++		break;
++	case SPEED_10:
++		DPRINTK(PROBE, INFO, "10 Mbps Speed specified "
++			"without Duplex\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at 10 Mbps only\n");
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++		hw->phy.autoneg_advertised = ADVERTISE_10_HALF |
++		                             ADVERTISE_10_FULL;
++		break;
++	case SPEED_10 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Half Duplex\n");
++		hw->mac.autoneg = adapter->fc_autoneg = FALSE;
++		hw->mac.forced_speed_duplex = ADVERTISE_10_HALF;
++		hw->phy.autoneg_advertised = 0;
++		break;
++	case SPEED_10 + FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Full Duplex\n");
++		hw->mac.autoneg = adapter->fc_autoneg = FALSE;
++		hw->mac.forced_speed_duplex = ADVERTISE_10_FULL;
++		hw->phy.autoneg_advertised = 0;
++		break;
++	case SPEED_100:
++		DPRINTK(PROBE, INFO, "100 Mbps Speed specified "
++			"without Duplex\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"100 Mbps only\n");
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++		hw->phy.autoneg_advertised = ADVERTISE_100_HALF |
++		                             ADVERTISE_100_FULL;
++		break;
++	case SPEED_100 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Half Duplex\n");
++		hw->mac.autoneg = adapter->fc_autoneg = FALSE;
++		hw->mac.forced_speed_duplex = ADVERTISE_100_HALF;
++		hw->phy.autoneg_advertised = 0;
++		break;
++	case SPEED_100 + FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Full Duplex\n");
++		hw->mac.autoneg = adapter->fc_autoneg = FALSE;
++		hw->mac.forced_speed_duplex = ADVERTISE_100_FULL;
++		hw->phy.autoneg_advertised = 0;
++		break;
++	case SPEED_1000:
++		DPRINTK(PROBE, INFO, "1000 Mbps Speed specified without "
++			"Duplex\n");
++		goto full_duplex_only;
++	case SPEED_1000 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO,
++			"Half Duplex is not supported at 1000 Mbps\n");
++		/* fall through */
++	case SPEED_1000 + FULL_DUPLEX:
++full_duplex_only:
++		DPRINTK(PROBE, INFO,
++		       "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
++		hw->mac.autoneg = adapter->fc_autoneg = TRUE;
++		hw->phy.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	default:
++		BUG();
++	}
++
++	/* Speed, AutoNeg and MDI/MDI-X must all play nice */
++	if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
++		DPRINTK(PROBE, INFO,
++			"Speed, AutoNeg and MDI-X specifications are "
++			"incompatible. Setting MDI-X to a compatible value.\n");
++	}
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/e1000/e1000_82540.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/e1000/e1000_82540.c	2021-04-07 16:01:27.619633606 +0800
+@@ -0,0 +1,680 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2008 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_82540
++ * e1000_82545
++ * e1000_82546
++ * e1000_82545_rev_3
++ * e1000_82546_rev_3
++ */
++
++#include "e1000_api.h"
++
++static s32  e1000_init_phy_params_82540(struct e1000_hw *hw);
++static s32  e1000_init_nvm_params_82540(struct e1000_hw *hw);
++static s32  e1000_init_mac_params_82540(struct e1000_hw *hw);
++static s32  e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw);
++static s32  e1000_init_hw_82540(struct e1000_hw *hw);
++static s32  e1000_reset_hw_82540(struct e1000_hw *hw);
++static s32  e1000_set_phy_mode_82540(struct e1000_hw *hw);
++static s32  e1000_set_vco_speed_82540(struct e1000_hw *hw);
++static s32  e1000_setup_copper_link_82540(struct e1000_hw *hw);
++static s32  e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw);
++
++/**
++ * e1000_init_phy_params_82540 - Init PHY func ptrs.
++ * @hw: pointer to the HW structure
++ *
++ * This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_phy_params_82540(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	phy->addr                       = 1;
++	phy->autoneg_mask               = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us             = 10000;
++	phy->type                       = e1000_phy_m88;
++
++	/* Function Pointers */
++	func->check_polarity            = e1000_check_polarity_m88;
++	func->commit_phy                = e1000_phy_sw_reset_generic;
++	func->force_speed_duplex        = e1000_phy_force_speed_duplex_m88;
++	func->get_cable_length          = e1000_get_cable_length_m88;
++	func->get_cfg_done              = e1000_get_cfg_done_generic;
++	func->read_phy_reg              = e1000_read_phy_reg_m88;
++	func->reset_phy                 = e1000_phy_hw_reset_generic;
++	func->write_phy_reg             = e1000_write_phy_reg_m88;
++	func->get_phy_info              = e1000_get_phy_info_m88;
++	func->power_up_phy              = e1000_power_up_phy_copper;
++	func->power_down_phy            = e1000_power_down_phy_copper_82540;
++
++	ret_val = e1000_get_phy_id(hw);
++	if (ret_val)
++		goto out;
++
++	/* Verify phy id */
++	switch (hw->mac.type) {
++	case e1000_82540:
++	case e1000_82545:
++	case e1000_82545_rev_3:
++	case e1000_82546:
++	case e1000_82546_rev_3:
++		if (phy->id == M88E1011_I_PHY_ID)
++			break;
++		/* Fall Through */
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++		break;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_init_nvm_params_82540 - Init NVM func ptrs.
++ * @hw: pointer to the HW structure
++ *
++ * This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_nvm_params_82540(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_functions *func = &hw->func;
++	u32 eecd = E1000_READ_REG(hw, E1000_EECD);
++
++	DEBUGFUNC("e1000_init_nvm_params_82540");
++
++	nvm->type               = e1000_nvm_eeprom_microwire;
++	nvm->delay_usec         = 50;
++	nvm->opcode_bits        = 3;
++	switch (nvm->override) {
++	case e1000_nvm_override_microwire_large:
++		nvm->address_bits       = 8;
++		nvm->word_size          = 256;
++		break;
++	case e1000_nvm_override_microwire_small:
++		nvm->address_bits       = 6;
++		nvm->word_size          = 64;
++		break;
++	default:
++		nvm->address_bits       = eecd & E1000_EECD_SIZE ? 8 : 6;
++		nvm->word_size          = eecd & E1000_EECD_SIZE ? 256 : 64;
++		break;
++	}
++
++	/* Function Pointers */
++	func->acquire_nvm        = e1000_acquire_nvm_generic;
++	func->read_nvm           = e1000_read_nvm_microwire;
++	func->release_nvm        = e1000_release_nvm_generic;
++	func->update_nvm         = e1000_update_nvm_checksum_generic;
++	func->valid_led_default  = e1000_valid_led_default_generic;
++	func->validate_nvm       = e1000_validate_nvm_checksum_generic;
++	func->write_nvm          = e1000_write_nvm_microwire;
++
++	return E1000_SUCCESS;
++}
++
++/**
++ * e1000_init_mac_params_82540 - Init MAC func ptrs.
++ * @hw: pointer to the HW structure
++ *
++ * This is a function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_mac_params_82540(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_functions *func = &hw->func;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_init_mac_params_82540");
++
++	/* Set media type */
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82545EM_FIBER:
++	case E1000_DEV_ID_82545GM_FIBER:
++	case E1000_DEV_ID_82546EB_FIBER:
++	case E1000_DEV_ID_82546GB_FIBER:
++		hw->phy.media_type = e1000_media_type_fiber;
++		break;
++	case E1000_DEV_ID_82545GM_SERDES:
++	case E1000_DEV_ID_82546GB_SERDES:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++
++	/* Function pointers */
++
++	/* bus type/speed/width */
++	func->get_bus_info = e1000_get_bus_info_pci_generic;
++	/* reset */
++	func->reset_hw = e1000_reset_hw_82540;
++	/* hw initialization */
++	func->init_hw = e1000_init_hw_82540;
++	/* link setup */
++	func->setup_link = e1000_setup_link_generic;
++	/* physical interface setup */
++	func->setup_physical_interface =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_setup_copper_link_82540
++	                : e1000_setup_fiber_serdes_link_82540;
++	/* check for link */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		func->check_for_link = e1000_check_for_copper_link_generic;
++		break;
++	case e1000_media_type_fiber:
++		func->check_for_link = e1000_check_for_fiber_link_generic;
++		break;
++	case e1000_media_type_internal_serdes:
++		func->check_for_link = e1000_check_for_serdes_link_generic;
++		break;
++	default:
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++		break;
++	}
++	/* link info */
++	func->get_link_up_info =
++	        (hw->phy.media_type == e1000_media_type_copper)
++	                ? e1000_get_speed_and_duplex_copper_generic
++	                : e1000_get_speed_and_duplex_fiber_serdes_generic;
++	/* multicast address update */
++	func->update_mc_addr_list = e1000_update_mc_addr_list_generic;
++	/* writing VFTA */
++	func->write_vfta = e1000_write_vfta_generic;
++	/* clearing VFTA */
++	func->clear_vfta = e1000_clear_vfta_generic;
++	/* setting MTA */
++	func->mta_set = e1000_mta_set_generic;
++	/* setup LED */
++	func->setup_led = e1000_setup_led_generic;
++	/* cleanup LED */
++	func->cleanup_led = e1000_cleanup_led_generic;
++	/* turn on/off LED */
++	func->led_on = e1000_led_on_generic;
++	func->led_off = e1000_led_off_generic;
++	/* clear hardware counters */
++	func->clear_hw_cntrs = e1000_clear_hw_cntrs_82540;
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_init_function_pointers_82540 - Init func ptrs.
++ * @hw: pointer to the HW structure
++ *
++ * The only function explicitly called by the api module to initialize
++ * all function pointers and parameters.
++ **/
++void e1000_init_function_pointers_82540(struct e1000_hw *hw)
++{
++	DEBUGFUNC("e1000_init_function_pointers_82540");
++
++	hw->func.init_mac_params = e1000_init_mac_params_82540;
++	hw->func.init_nvm_params = e1000_init_nvm_params_82540;
++	hw->func.init_phy_params = e1000_init_phy_params_82540;
++}
++
++/**
++ *  e1000_reset_hw_82540 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_reset_hw_82540(struct e1000_hw *hw)
++{
++	u32 ctrl, icr, manc;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_reset_hw_82540");
++
++	DEBUGOUT("Masking off all interrupts\n");
++	E1000_WRITE_REG(hw, E1000_IMC, 0xFFFFFFFF);
++
++	E1000_WRITE_REG(hw, E1000_RCTL, 0);
++	E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP);
++	E1000_WRITE_FLUSH(hw);
++
++	/*
++	 * Delay to allow any outstanding PCI transactions to complete
++	 * before resetting the device.
++	 */
++	msec_delay(10);
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++
++	DEBUGOUT("Issuing a global reset to 82540/82545/82546 MAC\n");
++	switch (hw->mac.type) {
++	case e1000_82545_rev_3:
++	case e1000_82546_rev_3:
++		E1000_WRITE_REG(hw, E1000_CTRL_DUP, ctrl | E1000_CTRL_RST);
++		break;
++	default:
++		/*
++		 * These controllers can't ack the 64-bit write when
++		 * issuing the reset, so we use IO-mapping as a
++		 * workaround to issue the reset.
++		 */
++		E1000_WRITE_REG_IO(hw, E1000_CTRL, ctrl | E1000_CTRL_RST);
++		break;
++	}
++
++	/* Wait for EEPROM reload */
++	msec_delay(5);
++
++	/* Disable HW ARPs on ASF enabled adapters */
++	manc = E1000_READ_REG(hw, E1000_MANC);
++	manc &= ~E1000_MANC_ARP_EN;
++	E1000_WRITE_REG(hw, E1000_MANC, manc);
++
++	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
++	icr = E1000_READ_REG(hw, E1000_ICR);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_82540 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 e1000_init_hw_82540(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 txdctl, ctrl_ext;
++	s32 ret_val = E1000_SUCCESS;
++	u16 i;
++
++	DEBUGFUNC("e1000_init_hw_82540");
++
++	/* Initialize identification LED */
++	ret_val = e1000_id_led_init_generic(hw);
++	if (ret_val) {
++		DEBUGOUT("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Disabling VLAN filtering */
++	DEBUGOUT("Initializing the IEEE VLAN\n");
++	if (mac->type < e1000_82545_rev_3)
++		E1000_WRITE_REG(hw, E1000_VET, 0);
++
++	e1000_clear_vfta(hw);
++
++	/* Setup the receive address. */
++	e1000_init_rx_addrs_generic(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	DEBUGOUT("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++		/*
++		 * Avoid back to back register writes by adding the register
++		 * read (flush).  This is to protect against some strange
++		 * bridge configurations that may issue Memory Write Block
++		 * (MWB) to our register space.  The *_rev_3 hardware at
++		 * least doesn't respond correctly to every other dword in an
++		 * MWB to our register space.
++		 */
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	if (mac->type < e1000_82545_rev_3)
++		e1000_pcix_mmrbc_workaround_generic(hw);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link(hw);
++
++	txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++	         E1000_TXDCTL_FULL_TX_DESC_WB;
++	E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82540(hw);
++
++	if ((hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER) ||
++	    (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3)) {
++		ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
++		/*
++		 * Relaxed ordering must be disabled to avoid a parity
++		 * error crash in a PCI slot.
++		 */
++		ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
++		E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_copper_link_82540 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the appropriate function to configure the link for auto-neg or forced
++ *  speed and duplex.  Then we check for link, once link is established calls
++ *  to configure collision distance and flow control are called.  If link is
++ *  not established, we return -E1000_ERR_PHY (-2).  This is a function
++ *  pointer entry point called by the api module.
++ **/
++static s32 e1000_setup_copper_link_82540(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val = E1000_SUCCESS;
++	u16 data;
++
++	DEBUGFUNC("e1000_setup_copper_link_82540");
++
++	ctrl = E1000_READ_REG(hw, E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
++
++	ret_val = e1000_set_phy_mode_82540(hw);
++	if (ret_val)
++		goto out;
++
++	if (hw->mac.type == e1000_82545_rev_3 ||
++	    hw->mac.type == e1000_82546_rev_3) {
++		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &data);
++		if (ret_val)
++			goto out;
++		data |= 0x00000008;
++		ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, data);
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000_copper_link_setup_m88(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_setup_copper_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_fiber_serdes_link_82540 - Setup link for fiber/serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Set the output amplitude to the value in the EEPROM and adjust the VCO
++ *  speed to improve Bit Error Rate (BER) performance.  Configures collision
++ *  distance and flow control for fiber and serdes links.  Upon successful
++ *  setup, poll for link.  This is a function pointer entry point called by
++ *  the api module.
++ **/
++static s32 e1000_setup_fiber_serdes_link_82540(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val = E1000_SUCCESS;
++
++	DEBUGFUNC("e1000_setup_fiber_serdes_link_82540");
++
++	switch (mac->type) {
++	case e1000_82545_rev_3:
++	case e1000_82546_rev_3:
++		if (hw->phy.media_type == e1000_media_type_internal_serdes) {
++			/*
++			 * If we're on serdes media, adjust the output
++			 * amplitude to value set in the EEPROM.
++			 */
++			ret_val = e1000_adjust_serdes_amplitude_82540(hw);
++			if (ret_val)
++				goto out;
++		}
++		/* Adjust VCO speed to improve BER performance */
++		ret_val = e1000_set_vco_speed_82540(hw);
++		if (ret_val)
++			goto out;
++	default:
++		break;
++	}
++
++	ret_val = e1000_setup_fiber_serdes_link_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_adjust_serdes_amplitude_82540 - Adjust amplitude based on EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Adjust the SERDES ouput amplitude based on the EEPROM settings.
++ **/
++static s32 e1000_adjust_serdes_amplitude_82540(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_SUCCESS;
++	u16 nvm_data;
++
++	DEBUGFUNC("e1000_adjust_serdes_amplitude_82540");
++
++	ret_val = e1000_read_nvm(hw, NVM_SERDES_AMPLITUDE, 1, &nvm_data);
++	if (ret_val)
++		goto out;
++
++	if (nvm_data != NVM_RESERVED_WORD) {
++		/* Adjust serdes output amplitude only. */
++		nvm_data &= NVM_SERDES_AMPLITUDE_MASK;
++		ret_val = e1000_write_phy_reg(hw,
++		                             M88E1000_PHY_EXT_CTRL,
++		                             nvm_data);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_vco_speed_82540 - Set VCO speed for better performance
++ *  @hw: pointer to the HW structure
++ *
++ *  Set the VCO speed to improve Bit Error Rate (BER) performance.
++ **/
++static s32 e1000_set_vco_speed_82540(struct e1000_hw *hw)
++{
++	s32  ret_val = E1000_SUCCESS;
++	u16 default_page = 0;
++	u16 phy_data;
++
++	DEBUGFUNC("e1000_set_vco_speed_82540");
++
++	/* Set PHY register 30, page 5, bit 8 to 0 */
++
++	ret_val = e1000_read_phy_reg(hw,
++	                            M88E1000_PHY_PAGE_SELECT,
++	                            &default_page);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~M88E1000_PHY_VCO_REG_BIT8;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Set PHY register 30, page 4, bit 11 to 1 */
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= M88E1000_PHY_VCO_REG_BIT11;
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT,
++	                              default_page);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_phy_mode_82540 - Set PHY to class A mode
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the PHY to class A mode and assumes the following operations will
++ *  follow to enable the new class mode:
++ *    1.  Do a PHY soft reset.
++ *    2.  Restart auto-negotiation or force link.
++ **/
++static s32 e1000_set_phy_mode_82540(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = E1000_SUCCESS;
++	u16 nvm_data;
++
++	DEBUGFUNC("e1000_set_phy_mode_82540");
++
++	if (hw->mac.type != e1000_82545_rev_3)
++		goto out;
++
++	ret_val = e1000_read_nvm(hw, NVM_PHY_CLASS_WORD, 1, &nvm_data);
++	if (ret_val) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++	if ((nvm_data != NVM_RESERVED_WORD) && (nvm_data & NVM_PHY_CLASS_A)) {
++		ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT,
++		                              0x000B);
++		if (ret_val) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++		ret_val = e1000_write_phy_reg(hw,
++		                              M88E1000_PHY_GEN_CONTROL,
++		                              0x8104);
++		if (ret_val) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++
++		phy->reset_disable = FALSE;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_82540 - Remove link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_82540(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_SMBUS_EN))
++		e1000_power_down_phy_copper(hw);
++
++	return;
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82540 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82540(struct e1000_hw *hw)
++{
++	volatile u32 temp;
++
++	DEBUGFUNC("e1000_clear_hw_cntrs_82540");
++
++	e1000_clear_hw_cntrs_base_generic(hw);
++
++	temp = E1000_READ_REG(hw, E1000_PRC64);
++	temp = E1000_READ_REG(hw, E1000_PRC127);
++	temp = E1000_READ_REG(hw, E1000_PRC255);
++	temp = E1000_READ_REG(hw, E1000_PRC511);
++	temp = E1000_READ_REG(hw, E1000_PRC1023);
++	temp = E1000_READ_REG(hw, E1000_PRC1522);
++	temp = E1000_READ_REG(hw, E1000_PTC64);
++	temp = E1000_READ_REG(hw, E1000_PTC127);
++	temp = E1000_READ_REG(hw, E1000_PTC255);
++	temp = E1000_READ_REG(hw, E1000_PTC511);
++	temp = E1000_READ_REG(hw, E1000_PTC1023);
++	temp = E1000_READ_REG(hw, E1000_PTC1522);
++
++	temp = E1000_READ_REG(hw, E1000_ALGNERRC);
++	temp = E1000_READ_REG(hw, E1000_RXERRC);
++	temp = E1000_READ_REG(hw, E1000_TNCRS);
++	temp = E1000_READ_REG(hw, E1000_CEXTERR);
++	temp = E1000_READ_REG(hw, E1000_TSCTC);
++	temp = E1000_READ_REG(hw, E1000_TSCTFC);
++
++	temp = E1000_READ_REG(hw, E1000_MGTPRC);
++	temp = E1000_READ_REG(hw, E1000_MGTPDC);
++	temp = E1000_READ_REG(hw, E1000_MGTPTC);
++}
++
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00core.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00core.c	2021-04-07 16:01:27.614633613 +0800
+@@ -0,0 +1,444 @@
++/* rt2x00core.c
++ *
++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project
++ *			     <http://rt2x00.serialmonkey.com>
++ *               2006        rtnet adaption by Daniel Gregorek
++ *                           <dxg@gmx.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the
++ * Free Software Foundation, Inc.,
++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++ * Module: rt2x00core
++ * Abstract: rt2x00 core routines.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/version.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <asm/io.h>
++
++#include <rtnet_port.h>
++
++#include "rt2x00.h"
++
++#ifdef DRV_NAME
++#undef DRV_NAME
++#define DRV_NAME "rt_rt2x00core"
++#endif /* DRV_NAME */
++
++static int rt2x00_radio_on(struct _rt2x00_core *core);
++static int rt2x00_radio_off(struct _rt2x00_core *core);
++
++static int cards[MAX_UNITS] = { [0 ...(MAX_UNITS - 1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++
++/*
++ * Writes the pending configuration to the device
++ */
++static void rt2x00_update_config(struct _rt2x00_core *core)
++{
++	u16 update_flags = 0x0000;
++
++	if (!test_bit(DEVICE_ENABLED, &core->flags) &&
++	    !test_bit(DEVICE_RADIO_ON, &core->flags))
++		return;
++
++	if (test_and_set_bit(DEVICE_CONFIG_UPDATE, &core->flags))
++		return;
++
++	update_flags = core->config.update_flags;
++	core->config.update_flags = 0;
++
++	if (likely(update_flags))
++		core->handler->dev_update_config(core, update_flags);
++
++	clear_bit(DEVICE_CONFIG_UPDATE, &core->flags);
++}
++
++/*
++ * Radio control.
++ */
++static int rt2x00_radio_on(struct _rt2x00_core *core)
++{
++	int status = 0x00000000;
++
++	if (test_bit(DEVICE_RADIO_ON, &core->flags)) {
++		WARNING("Radio already on.\n");
++		return -ENOTCONN;
++	}
++
++	status = core->handler->dev_radio_on(core);
++	if (status)
++		return status;
++
++	set_bit(DEVICE_RADIO_ON, &core->flags);
++
++	return 0;
++}
++
++static int rt2x00_radio_off(struct _rt2x00_core *core)
++{
++	if (!test_and_clear_bit(DEVICE_RADIO_ON, &core->flags)) {
++		WARNING("Radio already off.\n");
++		return -ENOTCONN;
++	}
++
++	core->handler->dev_radio_off(core);
++
++	return 0;
++}
++
++/*
++ * user space io handler
++ */
++static int rt2x00_ioctl(struct rtnet_device *rtnet_dev, struct ifreq *ifr,
++			int request)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev);
++	struct rtwlan_cmd *cmd;
++	u8 rate, dsss_rate, ofdm_rate;
++	u32 address, value;
++
++	cmd = (struct rtwlan_cmd *)ifr->ifr_data;
++
++	switch (request) {
++	case IOC_RTWLAN_IFINFO:
++		cmd->args.info.bitrate = core->config.bitrate;
++		cmd->args.info.channel = core->config.channel;
++		cmd->args.info.retry = core->config.short_retry;
++		cmd->args.info.txpower = core->config.txpower;
++		cmd->args.info.bbpsens = core->config.bbpsens;
++		cmd->args.info.mode = core->rtwlan_dev->mode;
++		cmd->args.info.rx_packets = core->rtwlan_dev->stats.rx_packets;
++		cmd->args.info.tx_packets = core->rtwlan_dev->stats.tx_packets;
++		cmd->args.info.tx_retry = core->rtwlan_dev->stats.tx_retry;
++		cmd->args.info.autoresponder =
++			core->config.config_flags & CONFIG_AUTORESP ? 1 : 0;
++		cmd->args.info.dropbcast =
++			core->config.config_flags & CONFIG_DROP_BCAST ? 1 : 0;
++		cmd->args.info.dropmcast =
++			core->config.config_flags & CONFIG_DROP_MCAST ? 1 : 0;
++		DEBUG("rtwlan_dev->mode=%d\n", rtwlan_dev->mode);
++		break;
++	case IOC_RTWLAN_BITRATE:
++		rate = cmd->args.set.bitrate;
++		ofdm_rate = ieee80211_is_ofdm_rate(rate);
++		dsss_rate = ieee80211_is_dsss_rate(rate);
++		DEBUG("bitrate=%d\n", rate);
++		if (!(dsss_rate ^ ofdm_rate))
++			NOTICE("Rate %d is not DSSS and not OFDM.\n", rate);
++		core->config.bitrate = rate;
++		core->config.update_flags |= UPDATE_BITRATE;
++		break;
++	case IOC_RTWLAN_CHANNEL:
++		DEBUG("channel=%d\n", cmd->args.set.channel);
++		core->config.channel = cmd->args.set.channel;
++		core->config.update_flags |= UPDATE_CHANNEL;
++		break;
++	case IOC_RTWLAN_RETRY:
++		core->config.short_retry = cmd->args.set.retry;
++		core->config.update_flags |= UPDATE_RETRY;
++		break;
++	case IOC_RTWLAN_TXPOWER:
++		core->config.txpower = cmd->args.set.txpower;
++		core->config.update_flags |= UPDATE_TXPOWER;
++		break;
++	case IOC_RTWLAN_AUTORESP:
++		if (cmd->args.set.autoresponder)
++			core->config.config_flags |= CONFIG_AUTORESP;
++		else
++			core->config.config_flags &= ~CONFIG_AUTORESP;
++		core->config.update_flags |= UPDATE_AUTORESP;
++		break;
++	case IOC_RTWLAN_DROPBCAST:
++		if (cmd->args.set.dropbcast)
++			core->config.config_flags |= CONFIG_DROP_BCAST;
++		else
++			core->config.config_flags &= ~CONFIG_DROP_BCAST;
++		core->config.update_flags |= UPDATE_PACKET_FILTER;
++		break;
++	case IOC_RTWLAN_DROPMCAST:
++		if (cmd->args.set.dropmcast)
++			core->config.config_flags |= CONFIG_DROP_MCAST;
++		else
++			core->config.config_flags &= ~CONFIG_DROP_MCAST;
++		core->config.update_flags |= UPDATE_PACKET_FILTER;
++		break;
++	case IOC_RTWLAN_TXMODE:
++		core->rtwlan_dev->mode = cmd->args.set.mode;
++		break;
++	case IOC_RTWLAN_BBPSENS:
++		value = cmd->args.set.bbpsens;
++		if (value < 0)
++			value = 0;
++		if (value > 127)
++			value = 127;
++		core->config.bbpsens = value;
++		core->config.update_flags |= UPDATE_BBPSENS;
++		break;
++	case IOC_RTWLAN_REGREAD:
++	case IOC_RTWLAN_BBPREAD:
++		address = cmd->args.reg.address;
++		core->handler->dev_register_access(core, request, address,
++						   &value);
++		cmd->args.reg.value = value;
++		break;
++	case IOC_RTWLAN_REGWRITE:
++	case IOC_RTWLAN_BBPWRITE:
++		address = cmd->args.reg.address;
++		value = cmd->args.reg.value;
++		core->handler->dev_register_access(core, request, address,
++						   &value);
++		break;
++	default:
++		ERROR("Unknown request!\n");
++		return -1;
++	}
++
++	if (request != IOC_RTWLAN_IFINFO)
++		rt2x00_update_config(core);
++
++	return 0;
++}
++
++/*
++ * TX/RX related routines.
++ */
++static int rt2x00_start_xmit(struct rtskb *rtskb,
++			     struct rtnet_device *rtnet_dev)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev);
++	u16 xmit_flags = 0x0000;
++	u8 rate = 0x00;
++
++	if (unlikely(rtskb)) {
++		rate = core->config.bitrate;
++		if (ieee80211_is_ofdm_rate(rate))
++			xmit_flags |= XMIT_OFDM;
++
++		/* Check if the packet should be acknowledged */
++		if (core->rtwlan_dev->mode == RTWLAN_TXMODE_ACK)
++			xmit_flags |= XMIT_ACK;
++
++		if (core->handler->dev_xmit_packet(core, rtskb, rate,
++						   xmit_flags))
++			ERROR("Packet dropped !");
++
++		dev_kfree_rtskb(rtskb);
++	}
++
++	return 0;
++}
++
++/***
++ *  rt2x00_open
++ *  @rtdev
++ */
++static int rt2x00_open(struct rtnet_device *rtnet_dev)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev);
++	int status = 0x00000000;
++
++	DEBUG("Start.\n");
++
++	if (test_and_set_bit(DEVICE_ENABLED, &core->flags)) {
++		ERROR("device already enabled.\n");
++		return -EBUSY;
++	}
++
++	/*
++     * Start rtnet interface.
++     */
++	rt_stack_connect(rtnet_dev, &STACK_manager);
++
++	status = rt2x00_radio_on(core);
++	if (status) {
++		clear_bit(DEVICE_ENABLED, &core->flags);
++		ERROR("Couldn't activate radio.\n");
++		return status;
++	}
++
++	core->config.led_status = 1;
++	core->config.update_flags |= UPDATE_LED_STATUS;
++	rt2x00_update_config(core);
++
++	rtnetif_start_queue(rtnet_dev);
++
++	DEBUG("Exit success.\n");
++
++	return 0;
++}
++
++/***
++ *  rt2x00_close
++ *  @rtdev
++ */
++static int rt2x00_close(struct rtnet_device *rtnet_dev)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev);
++
++	DEBUG("Start.\n");
++
++	if (!test_and_clear_bit(DEVICE_ENABLED, &core->flags)) {
++		ERROR("device already disabled.\n");
++		return -EBUSY;
++	}
++
++	rt2x00_radio_off(core);
++
++	rtnetif_stop_queue(rtnet_dev);
++	rt_stack_disconnect(rtnet_dev);
++
++	return 0;
++}
++
++/*
++ * Initialization handlers.
++ */
++static void rt2x00_init_config(struct _rt2x00_core *core)
++{
++	DEBUG("Start.\n");
++
++	memset(&core->config.bssid, '\0', sizeof(core->config.bssid));
++
++	core->config.channel = 1;
++	core->config.bitrate = capabilities.bitrate[0];
++	core->config.bbpsens = 50;
++	core->config.config_flags = 0;
++	core->config.config_flags |=
++		CONFIG_DROP_BCAST | CONFIG_DROP_MCAST | CONFIG_AUTORESP;
++	core->config.short_retry = 4;
++	core->config.long_retry = 7;
++	core->config.txpower = 100;
++	core->config.plcp = 48;
++	core->config.sifs = 10;
++	core->config.slot_time = 20;
++	core->rtwlan_dev->mode = RTWLAN_TXMODE_RAW;
++	core->config.update_flags = UPDATE_ALL_CONFIG;
++}
++
++struct rtnet_device *rt2x00_core_probe(struct _rt2x00_dev_handler *handler,
++				       void *priv, u32 sizeof_dev)
++{
++	struct rtnet_device *rtnet_dev = NULL;
++	struct _rt2x00_core *core = NULL;
++	struct rtwlan_device *rtwlan_dev = NULL;
++	static int cards_found = -1;
++	int err;
++
++	DEBUG("Start.\n");
++
++	cards_found++;
++	if (cards[cards_found] == 0)
++		goto exit;
++
++	rtnet_dev =
++		rtwlan_alloc_dev(sizeof_dev + sizeof(*core), RX_ENTRIES * 2);
++	if (!rtnet_dev)
++		goto exit;
++
++	rt_rtdev_connect(rtnet_dev, &RTDEV_manager);
++	rtnet_dev->vers = RTDEV_VERS_2_0;
++
++	rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	memset(rtwlan_dev, 0x00, sizeof(*rtwlan_dev));
++
++	core = rtwlan_priv(rtwlan_dev);
++	memset(core, 0x00, sizeof(*core));
++
++	core->rtwlan_dev = rtwlan_dev;
++	core->handler = handler;
++	core->priv = (void *)core + sizeof(*core);
++	core->rtnet_dev = rtnet_dev;
++
++	/* Set configuration default values. */
++	rt2x00_init_config(core);
++
++	if (core->handler->dev_probe && core->handler->dev_probe(core, priv)) {
++		ERROR("device probe failed.\n");
++		goto exit;
++	}
++	INFO("Device " MAC_FMT " detected.\n", MAC_ARG(rtnet_dev->dev_addr));
++
++	rtwlan_dev->hard_start_xmit = rt2x00_start_xmit;
++
++	rtnet_dev->open = &rt2x00_open;
++	rtnet_dev->stop = &rt2x00_close;
++	rtnet_dev->do_ioctl = &rt2x00_ioctl;
++	rtnet_dev->hard_header = &rt_eth_header;
++
++	if ((err = rt_register_rtnetdev(rtnet_dev)) != 0) {
++		rtdev_free(rtnet_dev);
++		ERROR("rtnet_device registration failed.\n");
++		printk("err=%d\n", err);
++		goto exit_dev_remove;
++	}
++
++	set_bit(DEVICE_AWAKE, &core->flags);
++
++	return rtnet_dev;
++
++exit_dev_remove:
++	if (core->handler->dev_remove)
++		core->handler->dev_remove(core);
++
++exit:
++	return NULL;
++}
++EXPORT_SYMBOL_GPL(rt2x00_core_probe);
++
++void rt2x00_core_remove(struct rtnet_device *rtnet_dev)
++{
++	rt_unregister_rtnetdev(rtnet_dev);
++	rt_rtdev_disconnect(rtnet_dev);
++
++	rtdev_free(rtnet_dev);
++}
++EXPORT_SYMBOL_GPL(rt2x00_core_remove);
++
++/*
++ * RT2x00 core module information.
++ */
++static char version[] = DRV_NAME " - " DRV_VERSION;
++
++MODULE_AUTHOR(DRV_AUTHOR);
++MODULE_DESCRIPTION("RTnet rt2500 PCI WLAN driver (Core Module)");
++MODULE_LICENSE("GPL");
++
++static int __init rt2x00_core_init(void)
++{
++	printk(KERN_INFO "Loading module: %s\n", version);
++	return 0;
++}
++
++static void __exit rt2x00_core_exit(void)
++{
++	printk(KERN_INFO "Unloading module: %s\n", version);
++}
++
++module_init(rt2x00_core_init);
++module_exit(rt2x00_core_exit);
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.c	2021-04-07 16:01:27.610633619 +0800
+@@ -0,0 +1,1274 @@
++/* rt2500pci.c
++ *
++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project
++ *			     <http://rt2x00.serialmonkey.com>
++ *               2006        rtnet adaption by Daniel Gregorek
++ *                           <dxg@gmx.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the
++ * Free Software Foundation, Inc.,
++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++ * Module: rt_rt2500pci
++ * Abstract: rt2500pci device specific routines.
++ * Supported chipsets: RT2560.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/delay.h>
++
++#include "rt2x00.h"
++#include "rt2500pci.h"
++
++#include <rtnet_port.h>
++
++#ifdef DRV_NAME
++#undef DRV_NAME
++#define DRV_NAME "rt_rt2500pci"
++#endif /* DRV_NAME */
++
++/* handler for direct register access from core module */
++static int rt2x00_dev_register_access(struct _rt2x00_core *core, int request,
++				      u32 address, u32 *value)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++	u8 u8_value;
++
++	switch (request) {
++	case IOC_RTWLAN_REGREAD:
++		rt2x00_register_read(rt2x00pci, address, value);
++		break;
++	case IOC_RTWLAN_REGWRITE:
++		rt2x00_register_write(rt2x00pci, address, *value);
++		break;
++	case IOC_RTWLAN_BBPREAD:
++		rt2x00_bbp_regread(rt2x00pci, address, &u8_value);
++		*value = u8_value;
++		break;
++	case IOC_RTWLAN_BBPWRITE:
++		rt2x00_bbp_regwrite(rt2x00pci, address, *value);
++		break;
++	default:
++		return -1;
++	}
++
++	return 0;
++}
++
++/*
++ * Interrupt routines.
++ * rt2x00_interrupt_txdone processes all transmitted packetss results.
++ * rt2x00_interrupt_rxdone processes all received rx packets.
++ */
++static void rt2x00_interrupt_txdone(struct _data_ring *ring)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(ring->core->rtnet_dev);
++	struct _txd *txd = NULL;
++	u8 tx_result = 0x00;
++	/*    u8			retry_count = 0x00; */
++
++	do {
++		txd = DESC_ADDR_DONE(ring);
++
++		if (rt2x00_get_field32(txd->word0, TXD_W0_OWNER_NIC) ||
++		    !rt2x00_get_field32(txd->word0, TXD_W0_VALID))
++			break;
++
++		if (ring->ring_type == RING_TX) {
++			tx_result =
++				rt2x00_get_field32(txd->word0, TXD_W0_RESULT);
++			/*	    retry_count = rt2x00_get_field32(txd->word0, TXD_W0_RETRY_COUNT); */
++
++			switch (tx_result) {
++			case TX_SUCCESS:
++				rtwlan_dev->stats.tx_packets++;
++				break;
++			case TX_SUCCESS_RETRY:
++				rtwlan_dev->stats.tx_retry++;
++				break;
++			case TX_FAIL_RETRY:
++				DEBUG("TX_FAIL_RETRY.\n");
++				break;
++			case TX_FAIL_INVALID:
++				DEBUG("TX_FAIL_INVALID.\n");
++				break;
++			case TX_FAIL_OTHER:
++				DEBUG("TX_FAIL_OTHER.\n");
++				break;
++			default:
++				DEBUG("Unknown tx result.\n");
++			}
++		}
++
++		rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 0);
++
++		rt2x00_ring_index_done_inc(ring);
++	} while (!rt2x00_ring_empty(ring));
++}
++
++static void rt2x00_interrupt_rxdone(struct _data_ring *ring,
++				    nanosecs_abs_t *time_stamp)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(ring->core);
++	struct rtnet_device *rtnet_dev = ring->core->rtnet_dev;
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rxd *rxd = NULL;
++	struct rtskb *rtskb;
++	void *data = NULL;
++	u16 size = 0x0000;
++	/*    u16                    rssi = 0x0000; */
++
++	while (1) {
++		rxd = DESC_ADDR(ring);
++		data = DATA_ADDR(ring);
++
++		if (rt2x00_get_field32(rxd->word0, RXD_W0_OWNER_NIC))
++			break;
++
++		size = rt2x00_get_field32(rxd->word0, RXD_W0_DATABYTE_COUNT);
++		/*	rssi = rt2x00_get_field32(rxd->word2, RXD_W2_RSSI); */
++
++		/* prepare rtskb */
++		rtskb = rtnetdev_alloc_rtskb(rtnet_dev, size + NET_IP_ALIGN);
++		if (!rtskb) {
++			ERROR("Couldn't allocate rtskb, packet dropped.\n");
++			break;
++		}
++		rtskb->time_stamp = *time_stamp;
++		rtskb_reserve(rtskb, NET_IP_ALIGN);
++
++		memcpy(rtskb->data, data, size);
++		rtskb_put(rtskb, size);
++
++		/* give incoming frame to rtwlan stack */
++		rtwlan_rx(rtskb, rtnet_dev);
++
++		rtwlan_dev->stats.rx_packets++;
++
++		rt2x00_set_field32(&rxd->word0, RXD_W0_OWNER_NIC, 1);
++		rt2x00_ring_index_inc(&rt2x00pci->rx);
++	}
++}
++
++int rt2x00_interrupt(rtdm_irq_t *irq_handle)
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++	struct rtnet_device *rtnet_dev =
++		rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct _rt2x00_core *core = rtwlan_priv(rtwlan_dev);
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++	unsigned int old_packet_cnt = rtwlan_dev->stats.rx_packets;
++	u32 reg = 0x00000000;
++
++	rtdm_lock_get(&rt2x00pci->lock);
++
++	rt2x00_register_read(rt2x00pci, CSR7, &reg);
++	rt2x00_register_write(rt2x00pci, CSR7, reg);
++
++	if (!reg) {
++		rtdm_lock_put(&rt2x00pci->lock);
++		return RTDM_IRQ_NONE;
++	}
++
++	if (rt2x00_get_field32(
++		    reg,
++		    CSR7_TBCN_EXPIRE)) /* Beacon timer expired interrupt. */
++		DEBUG("Beacon timer expired.\n");
++	if (rt2x00_get_field32(reg, CSR7_RXDONE)) /* Rx ring done interrupt. */
++		rt2x00_interrupt_rxdone(&rt2x00pci->rx, &time_stamp);
++	if (rt2x00_get_field32(
++		    reg,
++		    CSR7_TXDONE_ATIMRING)) /* Atim ring transmit done interrupt. */
++		DEBUG("AtimTxDone.\n");
++	if (rt2x00_get_field32(
++		    reg,
++		    CSR7_TXDONE_PRIORING)) /* Priority ring transmit done interrupt. */
++		DEBUG("PrioTxDone.\n");
++	if (rt2x00_get_field32(
++		    reg,
++		    CSR7_TXDONE_TXRING)) /* Tx ring transmit done interrupt. */
++		rt2x00_interrupt_txdone(&rt2x00pci->tx);
++
++	rtdm_lock_put(&rt2x00pci->lock);
++
++	if (old_packet_cnt != rtwlan_dev->stats.rx_packets)
++		rt_mark_stack_mgr(rtnet_dev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++void rt2x00_init_eeprom(struct _rt2x00_pci *rt2x00pci,
++			struct _rt2x00_config *config)
++{
++	u32 reg = 0x00000000;
++	u16 eeprom = 0x0000;
++
++	/*
++     * 1 - Detect EEPROM width.
++     */
++	rt2x00_register_read(rt2x00pci, CSR21, &reg);
++	rt2x00pci->eeprom_width = rt2x00_get_field32(reg, CSR21_TYPE_93C46) ?
++					  EEPROM_WIDTH_93c46 :
++					  EEPROM_WIDTH_93c66;
++
++	/*
++     * 2 - Identify rf chipset.
++     */
++	eeprom = rt2x00_eeprom_read_word(rt2x00pci, EEPROM_ANTENNA);
++	set_chip(&rt2x00pci->chip, RT2560,
++		 rt2x00_get_field16(eeprom, EEPROM_ANTENNA_RF_TYPE));
++
++	/*
++     * 3 - Identify default antenna configuration.
++     */
++	config->antenna_tx =
++		rt2x00_get_field16(eeprom, EEPROM_ANTENNA_TX_DEFAULT);
++	config->antenna_rx =
++		rt2x00_get_field16(eeprom, EEPROM_ANTENNA_RX_DEFAULT);
++
++	DEBUG("antenna_tx=%d antenna_rx=%d\n", config->antenna_tx,
++	      config->antenna_rx);
++
++	/*
++     * 4 - Read BBP data from EEPROM and store in private structure.
++     */
++	memset(&rt2x00pci->eeprom, 0x00, sizeof(rt2x00pci->eeprom));
++	for (eeprom = 0; eeprom < EEPROM_BBP_SIZE; eeprom++)
++		rt2x00pci->eeprom[eeprom] = rt2x00_eeprom_read_word(
++			rt2x00pci, EEPROM_BBP_START + eeprom);
++}
++
++void rt2x00_dev_read_mac(struct _rt2x00_pci *rt2x00pci,
++			 struct rtnet_device *rtnet_dev)
++{
++	u32 reg[2];
++
++	memset(&reg, 0x00, sizeof(reg));
++
++	rt2x00_register_multiread(rt2x00pci, CSR3, &reg[0], sizeof(reg));
++
++	rtnet_dev->dev_addr[0] = rt2x00_get_field32(reg[0], CSR3_BYTE0);
++	rtnet_dev->dev_addr[1] = rt2x00_get_field32(reg[0], CSR3_BYTE1);
++	rtnet_dev->dev_addr[2] = rt2x00_get_field32(reg[0], CSR3_BYTE2);
++	rtnet_dev->dev_addr[3] = rt2x00_get_field32(reg[0], CSR3_BYTE3);
++	rtnet_dev->dev_addr[4] = rt2x00_get_field32(reg[1], CSR4_BYTE4);
++	rtnet_dev->dev_addr[5] = rt2x00_get_field32(reg[1], CSR4_BYTE5);
++
++	rtnet_dev->addr_len = 6;
++}
++
++int rt2x00_dev_probe(struct _rt2x00_core *core, void *priv)
++{
++	struct pci_dev *pci_dev = (struct pci_dev *)priv;
++	struct _rt2x00_pci *rt2x00pci = core->priv;
++
++	memset(rt2x00pci, 0x00, sizeof(*rt2x00pci));
++
++	if (unlikely(!pci_dev)) {
++		ERROR("invalid priv pointer.\n");
++		return -ENODEV;
++	}
++	rt2x00pci->pci_dev = pci_dev;
++
++	rt2x00pci->rx.data_addr = NULL;
++	rt2x00pci->tx.data_addr = NULL;
++
++	rt2x00pci->csr_addr = ioremap(pci_resource_start(pci_dev, 0),
++				      pci_resource_len(pci_dev, 0));
++	if (!rt2x00pci->csr_addr) {
++		ERROR("ioremap failed.\n");
++		return -ENOMEM;
++	}
++
++	rt2x00_init_eeprom(rt2x00pci, &core->config);
++	rt2x00_dev_read_mac(rt2x00pci, core->rtnet_dev);
++
++	return 0;
++}
++
++int rt2x00_dev_remove(struct _rt2x00_core *core)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++
++	if (rt2x00pci->csr_addr) {
++		iounmap(rt2x00pci->csr_addr);
++		rt2x00pci->csr_addr = NULL;
++	}
++
++	return 0;
++}
++
++/*
++ * rt2x00_clear_ring
++ * During the initialization some of the descriptor variables are filled in.
++ * The default value of the owner variable is different between the types of the descriptor,
++ * DMA ring entries that receive packets are owned by the device untill a packet is received.
++ * DMA ring entries that are used to transmit a packet are owned by the module untill the device,
++ * for these rings the valid bit is set to 0 to indicate it is ready for use.
++ * should transmit the packet that particular DMA ring entry.
++ * The BUFFER_ADDRESS variable is used to link a descriptor to a packet data block.
++ */
++static void rt2x00_clear_ring(struct _rt2x00_pci *rt2x00pci,
++			      struct _data_ring *ring)
++{
++	struct _rxd *rxd = NULL;
++	struct _txd *txd = NULL;
++	dma_addr_t data_dma =
++		ring->data_dma + (ring->max_entries * ring->desc_size);
++	u8 counter = 0x00;
++
++	memset(ring->data_addr, 0x00, ring->mem_size);
++
++	for (; counter < ring->max_entries; counter++) {
++		if (ring->ring_type == RING_RX) {
++			rxd = (struct _rxd *)__DESC_ADDR(ring, counter);
++
++			rt2x00_set_field32(&rxd->word1, RXD_W1_BUFFER_ADDRESS,
++					   data_dma);
++			rt2x00_set_field32(&rxd->word0, RXD_W0_OWNER_NIC, 1);
++		} else {
++			txd = (struct _txd *)__DESC_ADDR(ring, counter);
++
++			rt2x00_set_field32(&txd->word1, TXD_W1_BUFFER_ADDRESS,
++					   data_dma);
++			rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 0);
++			rt2x00_set_field32(&txd->word0, TXD_W0_OWNER_NIC, 0);
++		}
++
++		data_dma += ring->entry_size;
++	}
++
++	rt2x00_ring_clear_index(ring);
++}
++
++/*
++ * rt2x00_init_ring_register
++ * The registers should be updated with the descriptor size and the
++ * number of entries of each ring.
++ * The address of the first entry of the descriptor ring is written to the register
++ * corresponding to the ring.
++ */
++static void rt2x00_init_ring_register(struct _rt2x00_pci *rt2x00pci)
++{
++	u32 reg = 0x00000000;
++
++	/* Initialize ring register for RX/TX */
++
++	rt2x00_set_field32(&reg, TXCSR2_TXD_SIZE, rt2x00pci->tx.desc_size);
++	rt2x00_set_field32(&reg, TXCSR2_NUM_TXD, rt2x00pci->tx.max_entries);
++	rt2x00_register_write(rt2x00pci, TXCSR2, reg);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, TXCSR3_TX_RING_REGISTER,
++			   rt2x00pci->tx.data_dma);
++	rt2x00_register_write(rt2x00pci, TXCSR3, reg);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, RXCSR1_RXD_SIZE, rt2x00pci->rx.desc_size);
++	rt2x00_set_field32(&reg, RXCSR1_NUM_RXD, rt2x00pci->rx.max_entries);
++	rt2x00_register_write(rt2x00pci, RXCSR1, reg);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, RXCSR2_RX_RING_REGISTER,
++			   rt2x00pci->rx.data_dma);
++	rt2x00_register_write(rt2x00pci, RXCSR2, reg);
++}
++
++static int rt2x00_init_registers(struct _rt2x00_pci *rt2x00pci)
++{
++	u32 reg = 0x00000000;
++
++	DEBUG("Start.\n");
++
++	rt2x00_register_write(rt2x00pci, PWRCSR0, cpu_to_le32(0x3f3b3100));
++
++	rt2x00_register_write(rt2x00pci, PSCSR0, cpu_to_le32(0x00020002));
++	rt2x00_register_write(rt2x00pci, PSCSR1, cpu_to_le32(0x00000002));
++	rt2x00_register_write(rt2x00pci, PSCSR2, cpu_to_le32(0x00020002));
++	rt2x00_register_write(rt2x00pci, PSCSR3, cpu_to_le32(0x00000002));
++
++	rt2x00_register_read(rt2x00pci, TIMECSR, &reg);
++	rt2x00_set_field32(&reg, TIMECSR_US_COUNT, 33);
++	rt2x00_set_field32(&reg, TIMECSR_US_64_COUNT, 63);
++	rt2x00_set_field32(&reg, TIMECSR_BEACON_EXPECT, 0);
++	rt2x00_register_write(rt2x00pci, TIMECSR, reg);
++
++	rt2x00_register_read(rt2x00pci, CSR9, &reg);
++	rt2x00_set_field32(&reg, CSR9_MAX_FRAME_UNIT,
++			   (rt2x00pci->rx.entry_size / 128));
++	rt2x00_register_write(rt2x00pci, CSR9, reg);
++
++	rt2x00_register_write(rt2x00pci, CNT3, cpu_to_le32(0x3f080000));
++
++	rt2x00_register_read(rt2x00pci, RXCSR0, &reg);
++	rt2x00_set_field32(&reg, RXCSR0_DISABLE_RX, 0);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL, 0);
++	rt2x00_register_write(rt2x00pci, RXCSR0, reg);
++
++	rt2x00_register_write(rt2x00pci, MACCSR0, cpu_to_le32(0x00213223));
++
++	rt2x00_register_read(rt2x00pci, MACCSR1, &reg);
++	rt2x00_set_field32(&reg, MACCSR1_AUTO_TXBBP, 1);
++	rt2x00_set_field32(&reg, MACCSR1_AUTO_RXBBP, 1);
++	rt2x00_register_write(rt2x00pci, MACCSR1, reg);
++
++	rt2x00_register_read(rt2x00pci, MACCSR2, &reg);
++	rt2x00_set_field32(&reg, MACCSR2_DELAY, 64);
++	rt2x00_register_write(rt2x00pci, MACCSR2, reg);
++
++	rt2x00_register_read(rt2x00pci, RXCSR3, &reg);
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID0, 47); /* Signal. */
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID0_VALID, 1);
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID1, 51); /* Rssi. */
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID1_VALID, 1);
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID2, 42); /* OFDM Rate. */
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID2_VALID, 1);
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID3, 51); /* OFDM. */
++	rt2x00_set_field32(&reg, RXCSR3_BBP_ID3_VALID, 1);
++	rt2x00_register_write(rt2x00pci, RXCSR3, reg);
++
++	rt2x00_register_read(rt2x00pci, RALINKCSR, &reg);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_DATA0, 17);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_ID0, 26);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_VALID0, 1);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_DATA1, 0);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_ID1, 26);
++	rt2x00_set_field32(&reg, RALINKCSR_AR_BBP_VALID1, 1);
++	rt2x00_register_write(rt2x00pci, RALINKCSR, reg);
++
++	rt2x00_register_write(rt2x00pci, BBPCSR1, cpu_to_le32(0x82188200));
++
++	rt2x00_register_write(rt2x00pci, TXACKCSR0, cpu_to_le32(0x00000020));
++
++	rt2x00_register_write(rt2x00pci, ARTCSR0, cpu_to_le32(0x7038140a));
++	rt2x00_register_write(rt2x00pci, ARTCSR1, cpu_to_le32(0x1d21252d));
++	rt2x00_register_write(rt2x00pci, ARTCSR2, cpu_to_le32(0x1919191d));
++
++	/* disable Beacon timer */
++	rt2x00_register_write(rt2x00pci, CSR14, 0x0);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, LEDCSR_ON_PERIOD, 30);
++	rt2x00_set_field32(&reg, LEDCSR_OFF_PERIOD, 70);
++	rt2x00_set_field32(&reg, LEDCSR_LINK, 0);
++	rt2x00_set_field32(&reg, LEDCSR_ACTIVITY, 0);
++	rt2x00_register_write(rt2x00pci, LEDCSR, reg);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, CSR1_SOFT_RESET, 1);
++	rt2x00_register_write(rt2x00pci, CSR1, reg);
++
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, CSR1_HOST_READY, 1);
++	rt2x00_register_write(rt2x00pci, CSR1, reg);
++
++	/*
++     * We must clear the FCS and FIFI error count.
++     * These registers are cleared on read, so we may pass a useless variable to store the value.
++     */
++	rt2x00_register_read(rt2x00pci, CNT0, &reg);
++	rt2x00_register_read(rt2x00pci, CNT4, &reg);
++
++	return 0;
++}
++
++static void rt2x00_init_write_mac(struct _rt2x00_pci *rt2x00pci,
++				  struct rtnet_device *rtnet_dev)
++{
++	u32 reg[2];
++
++	memset(&reg, 0x00, sizeof(reg));
++
++	rt2x00_set_field32(&reg[0], CSR3_BYTE0, rtnet_dev->dev_addr[0]);
++	rt2x00_set_field32(&reg[0], CSR3_BYTE1, rtnet_dev->dev_addr[1]);
++	rt2x00_set_field32(&reg[0], CSR3_BYTE2, rtnet_dev->dev_addr[2]);
++	rt2x00_set_field32(&reg[0], CSR3_BYTE3, rtnet_dev->dev_addr[3]);
++	rt2x00_set_field32(&reg[1], CSR4_BYTE4, rtnet_dev->dev_addr[4]);
++	rt2x00_set_field32(&reg[1], CSR4_BYTE5, rtnet_dev->dev_addr[5]);
++
++	rt2x00_register_multiwrite(rt2x00pci, CSR3, &reg[0], sizeof(reg));
++}
++
++static int rt2x00_init_bbp(struct _rt2x00_pci *rt2x00pci)
++{
++	u8 reg_id = 0x00;
++	u8 value = 0x00;
++	u8 counter = 0x00;
++
++	for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) {
++		rt2x00_bbp_regread(rt2x00pci, 0x00, &value);
++		if ((value != 0xff) && (value != 0x00))
++			goto continue_csr_init;
++		NOTICE("Waiting for BBP register.\n");
++	}
++
++	ERROR("hardware problem, BBP register access failed, aborting.\n");
++	return -EACCES;
++
++continue_csr_init:
++	rt2x00_bbp_regwrite(rt2x00pci, 3, 0x02);
++	rt2x00_bbp_regwrite(rt2x00pci, 4, 0x19);
++	rt2x00_bbp_regwrite(rt2x00pci, 14, 0x1c);
++	rt2x00_bbp_regwrite(rt2x00pci, 15, 0x30);
++	rt2x00_bbp_regwrite(rt2x00pci, 16, 0xac);
++	rt2x00_bbp_regwrite(rt2x00pci, 17, 0x48);
++	rt2x00_bbp_regwrite(rt2x00pci, 18, 0x18);
++	rt2x00_bbp_regwrite(rt2x00pci, 19, 0xff);
++	rt2x00_bbp_regwrite(rt2x00pci, 20, 0x1e);
++	rt2x00_bbp_regwrite(rt2x00pci, 21, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 22, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 23, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 24, 0x70);
++	rt2x00_bbp_regwrite(rt2x00pci, 25, 0x40);
++	rt2x00_bbp_regwrite(rt2x00pci, 26, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 27, 0x23);
++	rt2x00_bbp_regwrite(rt2x00pci, 30, 0x10);
++	rt2x00_bbp_regwrite(rt2x00pci, 31, 0x2b);
++	rt2x00_bbp_regwrite(rt2x00pci, 32, 0xb9);
++	rt2x00_bbp_regwrite(rt2x00pci, 34, 0x12);
++	rt2x00_bbp_regwrite(rt2x00pci, 35, 0x50);
++	rt2x00_bbp_regwrite(rt2x00pci, 39, 0xc4);
++	rt2x00_bbp_regwrite(rt2x00pci, 40, 0x02);
++	rt2x00_bbp_regwrite(rt2x00pci, 41, 0x60);
++	rt2x00_bbp_regwrite(rt2x00pci, 53, 0x10);
++	rt2x00_bbp_regwrite(rt2x00pci, 54, 0x18);
++	rt2x00_bbp_regwrite(rt2x00pci, 56, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 57, 0x10);
++	rt2x00_bbp_regwrite(rt2x00pci, 58, 0x08);
++	rt2x00_bbp_regwrite(rt2x00pci, 61, 0x6d);
++	rt2x00_bbp_regwrite(rt2x00pci, 62, 0x10);
++
++	DEBUG("Start reading EEPROM contents...\n");
++	for (counter = 0; counter < EEPROM_BBP_SIZE; counter++) {
++		if (rt2x00pci->eeprom[counter] != 0xffff &&
++		    rt2x00pci->eeprom[counter] != 0x0000) {
++			reg_id = rt2x00_get_field16(rt2x00pci->eeprom[counter],
++						    EEPROM_BBP_REG_ID);
++			value = rt2x00_get_field16(rt2x00pci->eeprom[counter],
++						   EEPROM_BBP_VALUE);
++			DEBUG("BBP reg_id: 0x%02x, value: 0x%02x.\n", reg_id,
++			      value);
++			rt2x00_bbp_regwrite(rt2x00pci, reg_id, value);
++		}
++	}
++	DEBUG("...End of EEPROM contents.\n");
++
++	return 0;
++}
++
++/*
++ * Device radio routines.
++ * When the radio is switched on or off, the TX and RX
++ * should always be reset using the TXCSR0 and RXCSR0 registers.
++ * The radio itself is switched on and off using the PWRCSR0 register.
++ */
++
++static int rt2x00_dev_radio_on(struct _rt2x00_core *core)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++	u32 reg = 0x00000000;
++	int retval;
++
++	if (rt2x00_pci_alloc_rings(core))
++		goto exit_fail;
++
++	rt2x00_clear_ring(rt2x00pci, &rt2x00pci->rx);
++	rt2x00_clear_ring(rt2x00pci, &rt2x00pci->tx);
++
++	rt2x00_init_ring_register(rt2x00pci);
++
++	if (rt2x00_init_registers(rt2x00pci))
++		goto exit_fail;
++
++	rt2x00_init_write_mac(rt2x00pci, core->rtnet_dev);
++
++	if (rt2x00_init_bbp(rt2x00pci))
++		goto exit_fail;
++
++	/*
++     * Clear interrupts.
++     */
++	rt2x00_register_read(rt2x00pci, CSR7, &reg);
++	rt2x00_register_write(rt2x00pci, CSR7, reg);
++
++	/* Register rtdm-irq */
++	retval = rtdm_irq_request(&rt2x00pci->irq_handle, core->rtnet_dev->irq,
++				  rt2x00_interrupt, 0, core->rtnet_dev->name,
++				  core->rtnet_dev);
++
++	/*
++     * Enable interrupts.
++     */
++	rt2x00_register_read(rt2x00pci, CSR8, &reg);
++	rt2x00_set_field32(&reg, CSR8_TBCN_EXPIRE, 0);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_TXRING, 0);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_ATIMRING, 0);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_PRIORING, 0);
++	rt2x00_set_field32(&reg, CSR8_RXDONE, 0);
++	rt2x00_register_write(rt2x00pci, CSR8, reg);
++
++	return 0;
++
++exit_fail:
++	rt2x00_pci_free_rings(core);
++
++	return -ENOMEM;
++}
++
++static int rt2x00_dev_radio_off(struct _rt2x00_core *core)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++	u32 reg = 0x00000000;
++	int retval = 0;
++
++	rt2x00_register_write(rt2x00pci, PWRCSR0, cpu_to_le32(0x00000000));
++
++	rt2x00_register_read(rt2x00pci, TXCSR0, &reg);
++	rt2x00_set_field32(&reg, TXCSR0_ABORT, 1);
++	rt2x00_register_write(rt2x00pci, TXCSR0, reg);
++
++	rt2x00_register_read(rt2x00pci, RXCSR0, &reg);
++	rt2x00_set_field32(&reg, RXCSR0_DISABLE_RX, 1);
++	rt2x00_register_write(rt2x00pci, RXCSR0, reg);
++
++	rt2x00_register_read(rt2x00pci, LEDCSR, &reg);
++	rt2x00_set_field32(&reg, LEDCSR_LINK, 0);
++	rt2x00_register_write(rt2x00pci, LEDCSR, reg);
++
++	rt2x00_register_read(rt2x00pci, CSR8, &reg);
++	rt2x00_set_field32(&reg, CSR8_TBCN_EXPIRE, 1);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_TXRING, 1);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_ATIMRING, 1);
++	rt2x00_set_field32(&reg, CSR8_TXDONE_PRIORING, 1);
++	rt2x00_set_field32(&reg, CSR8_RXDONE, 1);
++	rt2x00_register_write(rt2x00pci, CSR8, reg);
++
++	rt2x00_pci_free_rings(core);
++
++	if ((retval = rtdm_irq_free(&rt2x00pci->irq_handle)) != 0)
++		ERROR("rtdm_irq_free=%d\n", retval);
++
++	rt_stack_disconnect(core->rtnet_dev);
++
++	return retval;
++}
++
++/*
++ * Configuration handlers.
++ */
++
++static void rt2x00_dev_update_autoresp(struct _rt2x00_pci *rt2x00pci,
++				       struct _rt2x00_config *config)
++{
++	u32 reg = 0;
++
++	DEBUG("Start.\n");
++
++	rt2x00_register_read(rt2x00pci, TXCSR1, &reg);
++
++	if (config->config_flags & CONFIG_AUTORESP)
++		rt2x00_set_field32(&reg, TXCSR1_AUTORESPONDER, 1);
++	else
++		rt2x00_set_field32(&reg, TXCSR1_AUTORESPONDER, 0);
++
++	rt2x00_register_write(rt2x00pci, TXCSR1, reg);
++}
++
++static void rt2x00_dev_update_bbpsens(struct _rt2x00_pci *rt2x00pci,
++				      struct _rt2x00_config *config)
++{
++	rt2x00_bbp_regwrite(rt2x00pci, 0x11, config->bbpsens);
++}
++
++static void rt2x00_dev_update_bssid(struct _rt2x00_pci *rt2x00pci,
++				    struct _rt2x00_config *config)
++{
++	u32 reg[2];
++
++	memset(&reg, 0x00, sizeof(reg));
++
++	rt2x00_set_field32(&reg[0], CSR5_BYTE0, config->bssid[0]);
++	rt2x00_set_field32(&reg[0], CSR5_BYTE1, config->bssid[1]);
++	rt2x00_set_field32(&reg[0], CSR5_BYTE2, config->bssid[2]);
++	rt2x00_set_field32(&reg[0], CSR5_BYTE3, config->bssid[3]);
++	rt2x00_set_field32(&reg[1], CSR6_BYTE4, config->bssid[4]);
++	rt2x00_set_field32(&reg[1], CSR6_BYTE5, config->bssid[5]);
++
++	rt2x00_register_multiwrite(rt2x00pci, CSR5, &reg[0], sizeof(reg));
++}
++
++static void rt2x00_dev_update_packet_filter(struct _rt2x00_pci *rt2x00pci,
++					    struct _rt2x00_config *config)
++{
++	u32 reg = 0x00000000;
++
++	DEBUG("Start.\n");
++
++	rt2x00_register_read(rt2x00pci, RXCSR0, &reg);
++
++	rt2x00_set_field32(&reg, RXCSR0_DROP_TODS, 0);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_CRC, 1);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_PHYSICAL, 1);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_CONTROL, 1);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_VERSION_ERROR, 1);
++	rt2x00_set_field32(&reg, RXCSR0_DROP_NOT_TO_ME, 1);
++
++	/*
++     * This looks like a bug, but for an unknown reason the register seems to swap the bits !!!
++     */
++	if (config->config_flags & CONFIG_DROP_BCAST)
++		rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST, 1);
++	else
++		rt2x00_set_field32(&reg, RXCSR0_DROP_MCAST, 0);
++
++	if (config->config_flags & CONFIG_DROP_MCAST)
++		rt2x00_set_field32(&reg, RXCSR0_DROP_BCAST, 1);
++	else
++		rt2x00_set_field32(&reg, RXCSR0_DROP_BCAST, 0);
++
++	rt2x00_register_write(rt2x00pci, RXCSR0, reg);
++}
++
++static void rt2x00_dev_update_channel(struct _rt2x00_pci *rt2x00pci,
++				      struct _rt2x00_config *config)
++{
++	u8 txpower = rt2x00_get_txpower(&rt2x00pci->chip, config->txpower);
++	u32 reg = 0x00000000;
++
++	if (rt2x00_get_rf_value(&rt2x00pci->chip, config->channel,
++				&rt2x00pci->channel)) {
++		ERROR("RF values for chip %04x and channel %d not found.\n",
++		      rt2x00_get_rf(&rt2x00pci->chip), config->channel);
++		return;
++	}
++
++	/*
++     * Set TXpower.
++     */
++	rt2x00_set_field32(&rt2x00pci->channel.rf3, RF3_TXPOWER, txpower);
++
++	/*
++     * For RT2525 we should first set the channel to half band higher.
++     */
++	if (rt2x00_rf(&rt2x00pci->chip, RF2525)) {
++		rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf1);
++		rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf2 +
++						      cpu_to_le32(0x00000020));
++		rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3);
++		if (rt2x00pci->channel.rf4)
++			rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf4);
++	}
++
++	rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf1);
++	rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf2);
++	rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3);
++	if (rt2x00pci->channel.rf4)
++		rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf4);
++
++	/*
++     * Channel 14 requires the Japan filter bit to be set.
++     */
++	rt2x00_bbp_regwrite(rt2x00pci, 70,
++			    (config->channel == 14) ? 0x4e : 0x46);
++
++	msleep(1);
++
++	/*
++     * Clear false CRC during channel switch.
++     */
++	rt2x00_register_read(rt2x00pci, CNT0, &reg);
++
++	DEBUG("Switching to channel %d. RF1: 0x%08x, RF2: 0x%08x, RF3: 0x%08x, RF4: 0x%08x.\n",
++	      config->channel, rt2x00pci->channel.rf1, rt2x00pci->channel.rf2,
++	      rt2x00pci->channel.rf3, rt2x00pci->channel.rf4);
++}
++
++static void rt2x00_dev_update_rate(struct _rt2x00_pci *rt2x00pci,
++				   struct _rt2x00_config *config)
++{
++	u32 value = 0x00000000;
++	u32 reg = 0x00000000;
++	u8 counter = 0x00;
++
++	DEBUG("Start.\n");
++
++	rt2x00_register_read(rt2x00pci, TXCSR1, &reg);
++
++	value = config->sifs + (2 * config->slot_time) + config->plcp +
++		get_preamble(config) +
++		get_duration(ACK_SIZE, capabilities.bitrate[0]);
++	rt2x00_set_field32(&reg, TXCSR1_ACK_TIMEOUT, value);
++
++	value = config->sifs + config->plcp + get_preamble(config) +
++		get_duration(ACK_SIZE, capabilities.bitrate[0]);
++	rt2x00_set_field32(&reg, TXCSR1_ACK_CONSUME_TIME, value);
++
++	rt2x00_set_field32(&reg, TXCSR1_TSF_OFFSET, 0x18);
++	rt2x00_set_field32(&reg, TXCSR1_AUTORESPONDER, 1);
++
++	rt2x00_register_write(rt2x00pci, TXCSR1, reg);
++
++	reg = 0x00000000;
++	for (counter = 0; counter < 12; counter++) {
++		reg |= cpu_to_le32(0x00000001 << counter);
++		if (capabilities.bitrate[counter] == config->bitrate)
++			break;
++	}
++
++	rt2x00_register_write(rt2x00pci, ARCSR1, reg);
++}
++
++static void rt2x00_dev_update_txpower(struct _rt2x00_pci *rt2x00pci,
++				      struct _rt2x00_config *config)
++{
++	u8 txpower = rt2x00_get_txpower(&rt2x00pci->chip, config->txpower);
++
++	DEBUG("Start.\n");
++
++	rt2x00_set_field32(&rt2x00pci->channel.rf3, RF3_TXPOWER, txpower);
++	rt2x00_rf_regwrite(rt2x00pci, rt2x00pci->channel.rf3);
++}
++
++static void rt2x00_dev_update_antenna(struct _rt2x00_pci *rt2x00pci,
++				      struct _rt2x00_config *config)
++{
++	u32 reg;
++	u8 reg_rx;
++	u8 reg_tx;
++
++	rt2x00_register_read(rt2x00pci, BBPCSR1, &reg);
++	rt2x00_bbp_regread(rt2x00pci, 14, &reg_rx);
++	rt2x00_bbp_regread(rt2x00pci, 2, &reg_tx);
++
++	/* TX antenna select */
++	if (config->antenna_tx == 1) {
++		/* Antenna A */
++		reg_tx = (reg_tx & 0xfc) | 0x00;
++		reg = (reg & 0xfffcfffc) | 0x00;
++	} else if (config->antenna_tx == 2) {
++		/* Antenna B */
++		reg_tx = (reg_tx & 0xfc) | 0x02;
++		reg = (reg & 0xfffcfffc) | 0x00020002;
++	} else {
++		/* Diversity */
++		reg_tx = (reg_tx & 0xfc) | 0x02;
++		reg = (reg & 0xfffcfffc) | 0x00020002;
++	}
++
++	/* RX antenna select */
++	if (config->antenna_rx == 1)
++		reg_rx = (reg_rx & 0xfc) | 0x00;
++	else if (config->antenna_rx == 2)
++		reg_rx = (reg_rx & 0xfc) | 0x02;
++	else
++		reg_rx = (reg_rx & 0xfc) | 0x02;
++
++	/*
++     * RT2525E and RT5222 need to flip I/Q
++     */
++	if (rt2x00_rf(&rt2x00pci->chip, RF5222)) {
++		reg |= 0x00040004;
++		reg_tx |= 0x04;
++	} else if (rt2x00_rf(&rt2x00pci->chip, RF2525E)) {
++		reg |= 0x00040004;
++		reg_tx |= 0x04;
++		reg_rx |= 0xfb;
++	}
++
++	rt2x00_register_write(rt2x00pci, BBPCSR1, reg);
++	rt2x00_bbp_regwrite(rt2x00pci, 14, reg_rx);
++	rt2x00_bbp_regwrite(rt2x00pci, 2, reg_tx);
++}
++
++static void rt2x00_dev_update_duration(struct _rt2x00_pci *rt2x00pci,
++				       struct _rt2x00_config *config)
++{
++	u32 reg = 0x00000000;
++
++	DEBUG("Start.\n");
++
++	rt2x00_register_read(rt2x00pci, CSR11, &reg);
++	rt2x00_set_field32(&reg, CSR11_CWMIN, 5); /* 2^5 = 32. */
++	rt2x00_set_field32(&reg, CSR11_CWMAX, 10); /* 2^10 = 1024. */
++	rt2x00_set_field32(&reg, CSR11_SLOT_TIME, config->slot_time);
++	rt2x00_set_field32(&reg, CSR11_CW_SELECT, 1);
++	rt2x00_register_write(rt2x00pci, CSR11, reg);
++
++	rt2x00_register_read(rt2x00pci, CSR18, &reg);
++	rt2x00_set_field32(&reg, CSR18_SIFS, config->sifs);
++	rt2x00_set_field32(&reg, CSR18_PIFS, config->sifs + config->slot_time);
++	rt2x00_register_write(rt2x00pci, CSR18, reg);
++
++	rt2x00_register_read(rt2x00pci, CSR19, &reg);
++	rt2x00_set_field32(&reg, CSR19_DIFS,
++			   config->sifs + (2 * config->slot_time));
++	rt2x00_set_field32(&reg, CSR19_EIFS,
++			   config->sifs +
++				   get_duration((IEEE80211_HEADER + ACK_SIZE),
++						capabilities.bitrate[0]));
++	rt2x00_register_write(rt2x00pci, CSR19, reg);
++}
++
++static void rt2x00_dev_update_retry(struct _rt2x00_pci *rt2x00pci,
++				    struct _rt2x00_config *config)
++{
++	u32 reg = 0x00000000;
++
++	rt2x00_register_read(rt2x00pci, CSR11, &reg);
++	rt2x00_set_field32(&reg, CSR11_LONG_RETRY, config->long_retry);
++	rt2x00_set_field32(&reg, CSR11_SHORT_RETRY, config->short_retry);
++	rt2x00_register_write(rt2x00pci, CSR11, reg);
++}
++
++static void rt2x00_dev_update_preamble(struct _rt2x00_pci *rt2x00pci,
++				       struct _rt2x00_config *config)
++{
++	u32 reg[4];
++	u32 preamble = 0x00000000;
++
++	memset(&reg, 0x00, sizeof(reg));
++
++	reg[0] = cpu_to_le32(0x00700400 | preamble); /* ARCSR2 */
++	reg[1] = cpu_to_le32(0x00380401 | preamble); /* ARCSR3 */
++	reg[2] = cpu_to_le32(0x00150402 | preamble); /* ARCSR4 */
++	reg[3] = cpu_to_le32(0x000b8403 | preamble); /* ARCSR5 */
++
++	rt2x00_register_multiwrite(rt2x00pci, ARCSR2, &reg[0], sizeof(reg));
++}
++
++static void rt2x00_dev_update_led(struct _rt2x00_pci *rt2x00pci,
++				  struct _rt2x00_config *config)
++{
++	u32 reg = 0x00000000;
++
++	rt2x00_register_read(rt2x00pci, LEDCSR, &reg);
++	rt2x00_set_field32(&reg, LEDCSR_LINK, config->led_status ? 1 : 0);
++	rt2x00_register_write(rt2x00pci, LEDCSR, reg);
++}
++
++static int rt2x00_dev_update_config(struct _rt2x00_core *core, u16 update_flags)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++
++	DEBUG("Start.\n");
++
++	if (update_flags & UPDATE_BSSID)
++		rt2x00_dev_update_bssid(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_PACKET_FILTER)
++		rt2x00_dev_update_packet_filter(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_CHANNEL)
++		rt2x00_dev_update_channel(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_BITRATE)
++		rt2x00_dev_update_rate(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_TXPOWER)
++		rt2x00_dev_update_txpower(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_ANTENNA)
++		rt2x00_dev_update_antenna(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_DURATION)
++		rt2x00_dev_update_duration(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_RETRY)
++		rt2x00_dev_update_retry(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_PREAMBLE)
++		rt2x00_dev_update_preamble(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_LED_STATUS)
++		rt2x00_dev_update_led(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_AUTORESP)
++		rt2x00_dev_update_autoresp(rt2x00pci, &core->config);
++
++	if (update_flags & UPDATE_BBPSENS)
++		rt2x00_dev_update_bbpsens(rt2x00pci, &core->config);
++
++	DEBUG("Exit.\n");
++
++	return 0;
++}
++
++/*
++ * Transmission routines.
++ * rt2x00_write_tx_desc will write the txd descriptor.
++ * rt2x00_dev_xmit_packet will copy the packets to the appropriate DMA ring.
++ */
++
++/*
++ * PLCP_SIGNAL, PLCP_SERVICE, PLCP_LENGTH_LOW and PLCP_LENGTH_HIGH are BBP registers.
++ * For RT2460 devices we need, besides the value we want to write,
++ * also set the busy bit (0x8000) and the register number (0x0f00).
++ * The value we want to write is stored in 0x00ff.
++ * For PLCP_SIGNAL we can optionally enable SHORT_PREAMBLE.
++ * For PLCP_SERVICE we can set the length extension bit according to
++ * 802.11b standard 18.2.3.5.
++ */
++static void rt2x00_write_tx_desc(struct _rt2x00_pci *rt2x00pci,
++				 struct _txd *txd, u32 packet_size, u16 rate,
++				 u16 xmit_flags)
++{
++	u32 residual = 0x00000000;
++	u32 duration = 0x00000000;
++	u16 signal = 0x0000;
++	u16 service = 0x0000;
++	u16 length_low = 0x0000;
++	u16 length_high = 0x0000;
++
++	rt2x00_set_field32(&txd->word0, TXD_W0_VALID, 1);
++	rt2x00_set_field32(&txd->word0, TXD_W0_DATABYTE_COUNT, packet_size);
++	rt2x00_set_field32(&txd->word0, TXD_W0_ACK,
++			   (xmit_flags & XMIT_ACK) ? 1 : 0);
++	rt2x00_set_field32(&txd->word0, TXD_W0_RETRY_MODE,
++			   (xmit_flags & XMIT_LONG_RETRY) ? 1 : 0);
++	rt2x00_set_field32(&txd->word0, TXD_W0_TIMESTAMP,
++			   (xmit_flags & XMIT_TIMESTAMP) ? 1 : 0);
++	rt2x00_set_field32(&txd->word0, TXD_W0_MORE_FRAG,
++			   (xmit_flags & XMIT_MORE_FRAGS) ? 1 : 0);
++	rt2x00_set_field32(&txd->word0, TXD_W0_MORE_FRAG,
++			   (xmit_flags & XMIT_RTS) ? 1 : 0);
++	rt2x00_set_field32(&txd->word10, TXD_W10_RTS,
++			   (xmit_flags & XMIT_RTS) ? 1 : 0);
++	rt2x00_set_field32(&txd->word0, TXD_W0_OFDM,
++			   (xmit_flags & XMIT_OFDM) ? 1 : 0);
++
++	packet_size += 4;
++
++	if (xmit_flags & XMIT_OFDM) {
++		/*
++	 * convert length to microseconds.
++	 */
++		length_high = (packet_size >> 6) & 0x3f;
++		length_low = (packet_size & 0x3f);
++	} else {
++		residual = get_duration_res(packet_size, rate);
++		duration = get_duration(packet_size, rate);
++
++		if (residual != 0)
++			duration++;
++
++		length_high = duration >> 8;
++		length_low = duration & 0xff;
++	}
++
++	signal |= 0x8500 | rt2x00_get_plcp(rate);
++	if (xmit_flags & XMIT_SHORT_PREAMBLE)
++		signal |= 0x0008;
++
++	service |= 0x0600 | 0x0004;
++	if (residual <= (8 % 11))
++		service |= 0x0080;
++
++	rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_SIGNAL, signal);
++	rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_SERVICE, service);
++	rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_LENGTH_LOW, length_low);
++	rt2x00_set_field32(&txd->word3, TXD_W3_PLCP_LENGTH_HIGH, length_high);
++
++	/* set XMIT_IFS to XMIT_IFS_NONE */
++	rt2x00_set_field32(&txd->word0, TXD_W0_IFS, XMIT_IFS_NONE);
++
++	/* highest priority */
++	rt2x00_set_field32(&txd->word2, TXD_W2_CWMIN, 1);
++	rt2x00_set_field32(&txd->word2, TXD_W2_CWMAX, 2);
++	rt2x00_set_field32(&txd->word2, TXD_W2_AIFS, 1);
++
++	/*
++     * set this last, after this the device can start transmitting the packet.
++     */
++	rt2x00_set_field32(&txd->word0, TXD_W0_OWNER_NIC, 1);
++}
++
++static int rt2x00_dev_xmit_packet(struct _rt2x00_core *core,
++				  struct rtskb *rtskb, u16 rate, u16 xmit_flags)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++	struct _data_ring *ring = NULL;
++	struct _txd *txd = NULL;
++	void *data = NULL;
++	u32 reg = 0x00000000;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&rt2x00pci->lock, context);
++
++	/* load tx-control register */
++	rt2x00_register_read(rt2x00pci, TXCSR0, &reg);
++
++	/* select tx-descriptor ring and prepare xmit */
++	ring = &rt2x00pci->tx;
++	rt2x00_set_field32(&reg, TXCSR0_KICK_TX, 1);
++
++	txd = DESC_ADDR(ring);
++	data = DATA_ADDR(ring);
++
++	if (rt2x00_get_field32(txd->word0, TXD_W0_OWNER_NIC) ||
++	    rt2x00_get_field32(txd->word0, TXD_W0_VALID)) {
++		rtdm_lock_put_irqrestore(&rt2x00pci->lock, context);
++		return -ENOMEM;
++	}
++
++	/* get and patch time stamp just before the transmission */
++	if (rtskb->xmit_stamp)
++		*rtskb->xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *rtskb->xmit_stamp);
++
++	/* copy rtskb to dma */
++	memcpy(data, rtskb->data, rtskb->len);
++
++	rt2x00_write_tx_desc(rt2x00pci, txd, rtskb->len, rate, xmit_flags);
++	rt2x00_ring_index_inc(ring);
++
++	/* let the device do the rest ... */
++	rt2x00_register_write(rt2x00pci, TXCSR0, reg);
++
++	rtdm_lock_put_irqrestore(&rt2x00pci->lock, context);
++
++	return 0;
++}
++
++/*
++ * PCI device handlers for usage by core module.
++ */
++static struct _rt2x00_dev_handler rt2x00_pci_handler = {
++
++	.dev_module = THIS_MODULE,
++	.dev_probe = rt2x00_dev_probe,
++	.dev_remove = rt2x00_dev_remove,
++	.dev_radio_on = rt2x00_dev_radio_on,
++	.dev_radio_off = rt2x00_dev_radio_off,
++	.dev_update_config = rt2x00_dev_update_config,
++	.dev_register_access = rt2x00_dev_register_access,
++	.dev_xmit_packet = rt2x00_dev_xmit_packet,
++};
++
++int rt2x00_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
++{
++	struct rtnet_device *rtnet_dev = NULL;
++	int status = 0x00000000;
++
++	DEBUG("start.\n");
++
++	if (id->driver_data != RT2560) {
++		ERROR("detected device not supported.\n");
++		status = -ENODEV;
++		goto exit;
++	}
++
++	if (pci_enable_device(pci_dev)) {
++		ERROR("enable device failed.\n");
++		status = -EIO;
++		goto exit;
++	}
++
++	pci_set_master(pci_dev);
++
++	if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) &&
++	    pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
++		ERROR("PCI DMA not supported\n");
++		status = -EIO;
++		goto exit_disable_device;
++	}
++
++	if (pci_request_regions(pci_dev, pci_name(pci_dev))) {
++		ERROR("PCI request regions failed.\n");
++		status = -EBUSY;
++		goto exit_disable_device;
++	}
++	INFO("pci_dev->irq=%d\n", pci_dev->irq);
++
++	rtnet_dev = rt2x00_core_probe(&rt2x00_pci_handler, pci_dev,
++				      sizeof(struct _rt2x00_pci));
++
++	if (!rtnet_dev) {
++		ERROR("rtnet_device allocation failed.\n");
++		status = -ENOMEM;
++		goto exit_release_regions;
++	}
++
++	rtnet_dev->irq = pci_dev->irq;
++
++	pci_set_drvdata(pci_dev, rtnet_dev);
++
++	return 0;
++
++exit_release_regions:
++	pci_release_regions(pci_dev);
++
++exit_disable_device:
++	if (status != -EBUSY)
++		pci_disable_device(pci_dev);
++
++exit:
++	return status;
++}
++
++static void rt2x00_pci_remove(struct pci_dev *pci_dev)
++{
++	struct rtnet_device *rtnet_dev = pci_get_drvdata(pci_dev);
++
++	rt2x00_core_remove(rtnet_dev);
++	pci_set_drvdata(pci_dev, NULL);
++	pci_release_regions(pci_dev);
++	pci_disable_device(pci_dev);
++}
++
++/*
++ * RT2500 PCI module information.
++ */
++char version[] = DRV_NAME " - " DRV_VERSION;
++
++struct pci_device_id rt2x00_device_pci_tbl[] = {
++	{ PCI_DEVICE(0x1814, 0x0201),
++	  .driver_data = RT2560 }, /* Ralink 802.11g */
++	{
++		0,
++	}
++};
++
++MODULE_AUTHOR(DRV_AUTHOR);
++MODULE_DESCRIPTION("RTnet rt2500 PCI WLAN driver (PCI Module)");
++MODULE_LICENSE("GPL");
++
++struct pci_driver rt2x00_pci_driver = {
++	.name = DRV_NAME,
++	.id_table = rt2x00_device_pci_tbl,
++	.probe = rt2x00_pci_probe,
++	.remove = rt2x00_pci_remove,
++};
++
++static int __init rt2x00_pci_init(void)
++{
++	rtdm_printk(KERN_INFO "Loading module: %s\n", version);
++	return pci_register_driver(&rt2x00_pci_driver);
++}
++
++static void __exit rt2x00_pci_exit(void)
++{
++	rtdm_printk(KERN_INFO "Unloading module: %s\n", version);
++	pci_unregister_driver(&rt2x00_pci_driver);
++}
++
++module_init(rt2x00_pci_init);
++module_exit(rt2x00_pci_exit);
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/Makefile	2021-04-07 16:01:27.605633626 +0800
+@@ -0,0 +1,6 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_RT2500) += rt_rt2x00core.o rt_rt2500pci.o
++
++rt_rt2x00core-y := rt2x00core.o
++rt_rt2500pci-y := rt2500pci.o
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2x00.h	2021-04-07 16:01:27.600633633 +0800
+@@ -0,0 +1,649 @@
++/* rt2x00.h
++ *
++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project
++ *	                     <http://rt2x00.serialmonkey.com>
++ *               2006        rtnet adaption by Daniel Gregorek 
++ *                           <dxg@gmx.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the
++ * Free Software Foundation, Inc.,
++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++  Module: rt2x00
++  Abstract: rt2x00 global information.
++  Supported chipsets: RT2560
++*/
++
++#ifndef RT2X00_H
++#define RT2X00_H
++
++#include <linux/netdevice.h>
++#include <linux/wireless.h>
++
++#include <rtnet_port.h>
++#include <rtwlan.h>
++
++#define MAX_UNITS 2
++
++/*
++ * Module information.
++ */
++#define DRV_NAME "rt2x00"
++#define DRV_VERSION "0.1"
++#define DRV_AUTHOR "Daniel Gregorek <dxg@gmx.de>"
++//#define CONFIG_RT2X00_DEBUG
++
++/*
++ * Debug defines.
++ * The debug variable will be exported by the device specific module.
++ * For this reason this variable must be set to extern to make it accessible
++ * to the core module as well.
++ */
++#ifdef CONFIG_RT2X00_DEBUG
++extern int rt2x00_debug_level;
++#define DEBUG_PRINTK(__message...)                                             \
++	do {                                                                   \
++		rtdm_printk(__message);                                        \
++	} while (0)
++#else /* CONFIG_RT2X00_DEBUG */
++#define DEBUG_PRINTK(__message...)                                             \
++	do {                                                                   \
++	} while (0)
++#endif /* CONFIG_RT2X00_DEBUG */
++
++/*
++ * Various debug levels.
++ * PANIC and ERROR indicates serious problems within the module,
++ * these should never be ignored and thus we will always print the message.
++ */
++#define PANIC(__message, __args...)                                            \
++	rtdm_printk(KERN_PANIC DRV_NAME "->%s: Panic - " __message,            \
++		    __FUNCTION__, ##__args);
++#define ERROR(__message, __args...)                                            \
++	rtdm_printk(KERN_ERR DRV_NAME "->%s: Error - " __message,              \
++		    __FUNCTION__, ##__args);
++#define WARNING(__message, __args...)                                          \
++	rtdm_printk(KERN_WARNING DRV_NAME "->%s: Warning - " __message,        \
++		    __FUNCTION__, ##__args);
++#define NOTICE(__message, __args...)                                           \
++	rtdm_printk(KERN_NOTICE DRV_NAME "->%s: Notice - " __message,          \
++		    __FUNCTION__, ##__args);
++#define INFO(__message, __args...)                                             \
++	rtdm_printk(KERN_INFO DRV_NAME "->%s: Info - " __message,              \
++		    __FUNCTION__, ##__args);
++#define DEBUG(__message, __args...)                                            \
++	DEBUG_PRINTK(KERN_DEBUG DRV_NAME "->%s: Debug - " __message,           \
++		     __FUNCTION__, ##__args);
++
++/*
++ * RT2x00 ring types.
++ */
++
++/*
++ * Ring names.
++ */
++#define RING_RX 0x01 /* Ring used for receiving packets. */
++#define RING_TX 0x02 /* Ring used for transmitting normal packets. */
++
++/*
++ * Ring sizes.
++ */
++#define DATA_FRAME_SIZE 2432
++#define MGMT_FRAME_SIZE 256
++
++/*
++ * RT2x00 xmit flags.
++ */
++#define XMIT_IFS_SIFS 0x0001
++#define XMIT_IFS_BACKOFF 0x0002
++#define XMIT_IFS_NEW_BACKOFF 0x0004
++#define XMIT_IFS_NONE 0x0008
++#define XMIT_NEW_SEQUENCE 0x0010
++#define XMIT_ACK 0x0020
++#define XMIT_TIMESTAMP 0x0040
++#define XMIT_RTS 0x0080
++#define XMIT_OFDM 0x0100
++#define XMIT_LONG_RETRY 0x0200
++#define XMIT_MORE_FRAGS 0x0400
++#define XMIT_SHORT_PREAMBLE 0x0800
++#define XMIT_START 0x1000
++
++/*
++ * RT2x00 Statistics flags.
++ */
++#define STATS_TX_RESULT 0x01
++#define STATS_TX_RETRY_COUNT 0x02
++#define STATS_RX_CRC 0x10
++#define STATS_RX_PHYSICAL 0x20
++#define STATS_RX_QUALITY 0x40
++#define STATS_RX_DROP 0x80
++
++/*
++ * TX result flags.
++ */
++#define TX_SUCCESS 0
++#define TX_SUCCESS_RETRY 1
++#define TX_FAIL_RETRY 2
++#define TX_FAIL_INVALID 3
++#define TX_FAIL_OTHER 4
++
++/*
++ * Channel type defines.
++ */
++#define CHANNEL_OFDM 0x01
++#define CHANNEL_UNII_LOW 0x02
++#define CHANNEL_HIPERLAN2 0x04
++#define CHANNEL_UNII_HIGH 0x08
++
++#define CHANNEL_OFDM_MIN 1
++#define CHANNEL_OFDM_MAX 14
++#define CHANNEL_UNII_LOW_MIN 36
++#define CHANNEL_UNII_LOW_MAX 64
++#define CHANNEL_HIPERLAN2_MIN 100
++#define CHANNEL_HIPERLAN2_MAX 140
++#define CHANNEL_UNII_HIGH_MIN 149
++#define CHANNEL_UNII_HIGH_MAX 161
++
++/*
++ * Device 802.11abg capabilities.
++ */
++static struct _rt2x00_capabilities {
++	u8 txpower[6];
++	u8 bitrate[12];
++} __attribute__ ((packed)) capabilities = {
++    /*
++     * tx-power.
++     */
++    .txpower = {
++          3, 12, 25, 50, 75, 100,
++      },
++
++    /*
++     * Bitrates
++     */
++    .bitrate = {
++         2, 4, 11, 22,						/* CCK. */
++         12, 18, 24, 36, 48, 72, 96, 108,			/* OFDM. */
++     },
++};
++
++struct _rt2x00_config {
++	u8 config_flags;
++#define CONFIG_DROP_BCAST 0x0001
++#define CONFIG_DROP_MCAST 0x0002
++#define CONFIG_AUTORESP 0x0004
++
++	u8 antenna_tx;
++	u8 antenna_rx;
++
++	u8 bssid[ETH_ALEN];
++	u8 short_retry;
++	u8 long_retry;
++
++	u8 channel;
++	u8 bitrate; /* 0.5Mbit/sec */
++	u8 txpower; /* % */
++
++	u8 bbpsens;
++
++	/*
++     * LED status
++     */
++	u8 led_status;
++
++	u16 __pad2; /* For alignment only. */
++
++	/*
++     * Duration values in us.
++     */
++	u8 plcp;
++	u8 sifs;
++	u8 slot_time;
++
++	/*
++     * Configuration values that have to be updated to device.
++     */
++	u16 update_flags;
++#define UPDATE_ALL_CONFIG 0xffff
++#define UPDATE_BSSID 0x0001
++#define UPDATE_PACKET_FILTER 0x0002
++#define UPDATE_CHANNEL 0x0004
++#define UPDATE_BITRATE 0x0008
++#define UPDATE_RETRY 0x0010
++#define UPDATE_TXPOWER 0x0020
++#define UPDATE_ANTENNA 0x0040
++#define UPDATE_DURATION 0x0080
++#define UPDATE_PREAMBLE 0x0100
++#define UPDATE_AUTORESP 0x0200
++#define UPDATE_LED_STATUS 0x0400
++#define UPDATE_BBPSENS 0x0800
++
++} __attribute__((packed));
++
++struct _rt2x00_core {
++	/*
++     * RT2x00 device status flags (atomic read/write access).
++     */
++	unsigned long flags;
++
++#define DEVICE_ENABLED 0 /* Device has been opened. */
++#define DEVICE_AWAKE 1 /* Device is not suspended. */
++#define DEVICE_RADIO_ON 2 /* Device antenna is enabled. */
++#define DEVICE_CONFIG_UPDATE 3 /* Device is updating configuration. */
++
++	/*
++     * Device handler.
++     */
++	struct _rt2x00_dev_handler *handler;
++
++	/*
++     * RTnet device we belong to.
++     */
++	struct rtnet_device *rtnet_dev;
++
++	/*
++     * RTwlan stack structure.
++     */
++	struct rtwlan_device *rtwlan_dev;
++
++	/*
++     * Device configuration.
++     */
++	struct _rt2x00_config config;
++
++	void *priv;
++
++} __attribute__((packed));
++
++/*
++ * Device specific handlers.
++ */
++struct _rt2x00_dev_handler {
++	/*
++     * Device specific module.
++     */
++	struct module *dev_module;
++
++	/*
++     * Initialization handlers.
++     */
++	int (*dev_probe)(struct _rt2x00_core *core, void *priv);
++	int (*dev_remove)(struct _rt2x00_core *core);
++
++	/*
++     * Radio control.
++     */
++	int (*dev_radio_on)(struct _rt2x00_core *core);
++	int (*dev_radio_off)(struct _rt2x00_core *core);
++
++	/*
++     * Configuration handlers.
++     */
++	int (*dev_update_config)(struct _rt2x00_core *core, u16 update_flags);
++
++	/*
++     * xmit handler.
++     */
++	int (*dev_xmit_packet)(struct _rt2x00_core *core, struct rtskb *rtskb,
++			       u16 rate, u16 xmit_flags);
++
++	/*
++     * Handler for direct access to register from core.
++     */
++	int (*dev_register_access)(struct _rt2x00_core *core, int request,
++				   u32 address, u32 *value);
++
++} __attribute__((packed));
++
++static inline void *rt2x00_priv(const struct _rt2x00_core *core)
++{
++	return core->priv;
++}
++
++/*
++ * Duration calculations
++ * The rate variable passed is: 2 * real_rate (in Mb/s).
++ * Therefore length has to be multiplied with 8 to convert bytes to bits and  mulltiply the length
++ * with 2 to compensate for the difference between real_rate and the rate variable.
++ */
++#define ACK_SIZE 14
++#define IEEE80211_HEADER 24
++
++static inline u16 get_duration(const unsigned int size, const u8 rate)
++{
++	return ((size * 8 * 2) / rate);
++}
++
++static inline u16 get_duration_res(const unsigned int size, const u8 rate)
++{
++	return ((size * 8 * 2) % rate);
++}
++
++static inline u16 get_preamble(const struct _rt2x00_config *config)
++{
++	return 144;
++}
++
++/*
++ * Register handlers.
++ * We store the position of a register field inside a field structure,
++ * This will simplify the process of setting and reading a certain field
++ * inside the register.
++ */
++struct _rt2x00_field16 {
++	u16 bit_offset;
++	u16 bit_mask;
++} __attribute__((packed));
++
++struct _rt2x00_field32 {
++	u32 bit_offset;
++	u32 bit_mask;
++} __attribute__((packed));
++
++#define FIELD16(__offset, __mask)                                              \
++	((struct _rt2x00_field16){ (__offset), (__mask) })
++#define FIELD32(__offset, __mask)                                              \
++	((struct _rt2x00_field32){ (__offset), (__mask) })
++
++static inline void rt2x00_set_field32(u32 *reg,
++				      const struct _rt2x00_field32 field,
++				      const u32 value)
++{
++	*reg &= cpu_to_le32(~(field.bit_mask));
++	*reg |= cpu_to_le32((value << field.bit_offset) & field.bit_mask);
++}
++
++static inline void rt2x00_set_field32_nb(u32 *reg,
++					 const struct _rt2x00_field32 field,
++					 const u32 value)
++{
++	*reg &= ~(field.bit_mask);
++	*reg |= (value << field.bit_offset) & field.bit_mask;
++}
++
++static inline u32 rt2x00_get_field32(const u32 reg,
++				     const struct _rt2x00_field32 field)
++{
++	return (le32_to_cpu(reg) & field.bit_mask) >> field.bit_offset;
++}
++
++static inline u32 rt2x00_get_field32_nb(const u32 reg,
++					const struct _rt2x00_field32 field)
++{
++	return (reg & field.bit_mask) >> field.bit_offset;
++}
++
++static inline void rt2x00_set_field16(u16 *reg,
++				      const struct _rt2x00_field16 field,
++				      const u16 value)
++{
++	*reg &= cpu_to_le16(~(field.bit_mask));
++	*reg |= cpu_to_le16((value << field.bit_offset) & field.bit_mask);
++}
++
++static inline void rt2x00_set_field16_nb(u16 *reg,
++					 const struct _rt2x00_field16 field,
++					 const u16 value)
++{
++	*reg &= ~(field.bit_mask);
++	*reg |= (value << field.bit_offset) & field.bit_mask;
++}
++
++static inline u16 rt2x00_get_field16(const u16 reg,
++				     const struct _rt2x00_field16 field)
++{
++	return (le16_to_cpu(reg) & field.bit_mask) >> field.bit_offset;
++}
++
++static inline u16 rt2x00_get_field16_nb(const u16 reg,
++					const struct _rt2x00_field16 field)
++{
++	return (reg & field.bit_mask) >> field.bit_offset;
++}
++
++/*
++ * rf register sructure for channel selection.
++ */
++struct _rf_channel {
++	u32 rf1;
++	u32 rf2;
++	u32 rf3;
++	u32 rf4;
++} __attribute__((packed));
++
++/*
++ * Chipset identification
++ * The chipset on the device is composed of a RT and RF chip.
++ * The chipset combination is important for determining device capabilities.
++ */
++struct _rt2x00_chip {
++	u16 rt;
++	u16 rf;
++} __attribute__((packed));
++
++/*
++ * Set chipset data.
++ * Some rf values for RT2400 devices are equal to rf values for RT2500 devices.
++ * To prevent problems, all rf values will be masked to clearly seperate each chipset.
++ */
++static inline void set_chip(struct _rt2x00_chip *chipset, const u16 rt,
++			    const u16 rf)
++{
++	INFO("Chipset detected - rt: %04x, rf: %04x.\n", rt, rf);
++
++	chipset->rt = rt;
++	chipset->rf = rf | (chipset->rt & 0xff00);
++}
++
++static inline char rt2x00_rt(const struct _rt2x00_chip *chipset, const u16 chip)
++{
++	return (chipset->rt == chip);
++}
++
++static inline char rt2x00_rf(const struct _rt2x00_chip *chipset, const u16 chip)
++{
++	return (chipset->rf == chip);
++}
++
++static inline u16 rt2x00_get_rf(const struct _rt2x00_chip *chipset)
++{
++	return chipset->rf;
++}
++
++/*
++ * _data_ring
++ * Data rings are used by the device to send and receive packets.
++ * The data_addr is the base address of the data memory.
++ * Device specifice information is pointed to by the priv pointer.
++ * The index values may only be changed with the functions ring_index_inc()
++ * and ring_index_done_inc().
++ */
++struct _data_ring {
++	/*
++     * Base address of packet ring.
++     */
++	dma_addr_t data_dma;
++	void *data_addr;
++
++	/*
++     * Private device specific data.
++     */
++	void *priv;
++	struct _rt2x00_core *core;
++
++	/*
++     * Current index values.
++     */
++	u8 index;
++	u8 index_done;
++
++	/*
++     * Ring type set with RING_* define.
++     */
++	u8 ring_type;
++
++	/*
++     * Number of entries in this ring.
++     */
++	u8 max_entries;
++
++	/*
++     * Size of packet and descriptor in bytes.
++     */
++	u16 entry_size;
++	u16 desc_size;
++
++	/*
++     * Total allocated memory size.
++     */
++	u32 mem_size;
++} __attribute__((packed));
++
++/*
++ * Number of entries in a packet ring.
++ */
++#define RX_ENTRIES 8
++#define TX_ENTRIES 8
++#define ATIM_ENTRIES 1
++#define PRIO_ENTRIES 2
++#define BEACON_ENTRIES 1
++
++/*
++ * Initialization and cleanup routines.
++ */
++static inline void rt2x00_init_ring(struct _rt2x00_core *core,
++				    struct _data_ring *ring, const u8 ring_type,
++				    const u16 max_entries, const u16 entry_size,
++				    const u16 desc_size)
++{
++	ring->core = core;
++	ring->index = 0;
++	ring->index_done = 0;
++	ring->ring_type = ring_type;
++	ring->max_entries = max_entries;
++	ring->entry_size = entry_size;
++	ring->desc_size = desc_size;
++	ring->mem_size =
++		ring->max_entries * (ring->desc_size + ring->entry_size);
++}
++
++static inline void rt2x00_deinit_ring(struct _data_ring *ring)
++{
++	ring->core = NULL;
++	ring->index = 0;
++	ring->index_done = 0;
++	ring->ring_type = 0;
++	ring->max_entries = 0;
++	ring->entry_size = 0;
++	ring->desc_size = 0;
++	ring->mem_size = 0;
++}
++
++/*
++ * Ring index manipulation functions.
++ */
++static inline void rt2x00_ring_index_inc(struct _data_ring *ring)
++{
++	ring->index = (++ring->index < ring->max_entries) ? ring->index : 0;
++}
++
++static inline void rt2x00_ring_index_done_inc(struct _data_ring *ring)
++{
++	ring->index_done =
++		(++ring->index_done < ring->max_entries) ? ring->index_done : 0;
++}
++
++static inline void rt2x00_ring_clear_index(struct _data_ring *ring)
++{
++	ring->index = 0;
++	ring->index_done = 0;
++}
++
++static inline u8 rt2x00_ring_empty(struct _data_ring *ring)
++{
++	return ring->index_done == ring->index;
++}
++
++static inline u8 rt2x00_ring_free_entries(struct _data_ring *ring)
++{
++	if (ring->index >= ring->index_done)
++		return ring->max_entries - (ring->index - ring->index_done);
++	else
++		return ring->index_done - ring->index;
++}
++
++/*
++ * Return PLCP value matching the rate.
++ * PLCP values according to ieee802.11a-1999 p.14.
++ */
++static inline u8 rt2x00_get_plcp(const u8 rate)
++{
++	u8 counter = 0x00;
++	u8 plcp[12] = {
++		0x00, 0x01, 0x02, 0x03, /* CCK. */
++		0x0b, 0x0f, 0x0a, 0x0e, 0x09, 0x0d, 0x08, 0x0c, /* OFDM. */
++	};
++
++	for (; counter < 12; counter++) {
++		if (capabilities.bitrate[counter] == rate)
++			return plcp[counter];
++	}
++
++	return 0xff;
++}
++
++#define OFDM_CHANNEL(__channel)                                                \
++	((__channel) >= CHANNEL_OFDM_MIN && (__channel) <= CHANNEL_OFDM_MAX)
++#define UNII_LOW_CHANNEL(__channel)                                            \
++	((__channel) >= CHANNEL_UNII_LOW_MIN &&                                \
++	 (__channel) <= CHANNEL_UNII_LOW_MAX)
++#define HIPERLAN2_CHANNEL(__channel)                                           \
++	((__channel) >= CHANNEL_HIPERLAN2_MIN &&                               \
++	 (__channel) <= CHANNEL_HIPERLAN2_MAX)
++#define UNII_HIGH_CHANNEL(__channel)                                           \
++	((__channel) >= CHANNEL_UNII_HIGH_MIN &&                               \
++	 (__channel) <= CHANNEL_UNII_HIGH_MAX)
++
++/*
++ * Return the index value of the channel starting from the first channel of the range.
++ * Where range can be OFDM, UNII (low), HiperLAN2 or UNII (high).
++ */
++static inline int rt2x00_get_channel_index(const u8 channel)
++{
++	if (OFDM_CHANNEL(channel))
++		return (channel - 1);
++
++	if (channel % 4)
++		return -EINVAL;
++
++	if (UNII_LOW_CHANNEL(channel))
++		return ((channel - CHANNEL_UNII_LOW_MIN) / 4);
++	else if (HIPERLAN2_CHANNEL(channel))
++		return ((channel - CHANNEL_HIPERLAN2_MIN) / 4);
++	else if (UNII_HIGH_CHANNEL(channel))
++		return ((channel - CHANNEL_UNII_HIGH_MIN) / 4);
++	return -EINVAL;
++}
++
++/*
++ * RT2x00 core module functions that can be used in the device specific modules.
++ */
++extern struct rtnet_device *
++rt2x00_core_probe(struct _rt2x00_dev_handler *handler, void *priv,
++		  u32 sizeof_dev);
++extern void rt2x00_core_remove(struct rtnet_device *rtnet_dev);
++
++#endif
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/Kconfig	2021-04-07 16:01:27.595633640 +0800
+@@ -0,0 +1,4 @@
++config XENO_DRIVERS_NET_DRV_RT2500
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Ralink 2500 WLAN"
++    select XENO_DRIVERS_NET_RTWLAN
+--- linux/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/rt2500/rt2500pci.h	2021-04-07 16:01:27.591633646 +0800
+@@ -0,0 +1,1498 @@
++/* rt2500pci.h
++ *
++ * Copyright (C) 2004 - 2005 rt2x00-2.0.0-b3 SourceForge Project
++ *	                     <http://rt2x00.serialmonkey.com>
++ *               2006        rtnet adaption by Daniel Gregorek 
++ *                           <dxg@gmx.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the
++ * Free Software Foundation, Inc.,
++ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/*
++ *	Module: rt2500pci
++ * Abstract: Data structures and registers for the rt2500pci module.
++ * Supported chipsets: RT2560.
++ */
++
++#ifndef RT2500PCI_H
++#define RT2500PCI_H
++
++/*
++ * RT chip defines
++ */
++#define RT2560 0x0201
++
++/*
++ * RF chip defines
++ */
++#define RF2522 0x0200
++#define RF2523 0x0201
++#define RF2524 0x0202
++#define RF2525 0x0203
++#define RF2525E 0x0204
++#define RF5222 0x0210
++
++/*
++ * Control/Status Registers(CSR).
++ */
++#define CSR0 0x0000 /* ASIC revision number. */
++#define CSR1 0x0004 /* System control register. */
++#define CSR2 0x0008 /* System admin status register (invalid). */
++#define CSR3 0x000c /* STA MAC address register 0. */
++#define CSR4 0x0010 /* STA MAC address register 1. */
++#define CSR5 0x0014 /* BSSID register 0. */
++#define CSR6 0x0018 /* BSSID register 1. */
++#define CSR7 0x001c /* Interrupt source register. */
++#define CSR8 0x0020 /* Interrupt mask register. */
++#define CSR9 0x0024 /* Maximum frame length register. */
++#define SECCSR0 0x0028 /* WEP control register. */
++#define CSR11 0x002c /* Back-off control register. */
++#define CSR12 0x0030 /* Synchronization configuration register 0. */
++#define CSR13 0x0034 /* Synchronization configuration register 1. */
++#define CSR14 0x0038 /* Synchronization control register. */
++#define CSR15 0x003c /* Synchronization status register. */
++#define CSR16 0x0040 /* TSF timer register 0. */
++#define CSR17 0x0044 /* TSF timer register 1. */
++#define CSR18 0x0048 /* IFS timer register 0. */
++#define CSR19 0x004c /* IFS timer register 1. */
++#define CSR20 0x0050 /* WakeUp register. */
++#define CSR21 0x0054 /* EEPROM control register. */
++#define CSR22 0x0058 /* CFP Control Register. */
++
++/*
++ * Transmit related CSRs.
++ */
++#define TXCSR0 0x0060 /* TX control register. */
++#define TXCSR1 0x0064 /* TX configuration register. */
++#define TXCSR2 0x0068 /* TX descriptor configuratioon register. */
++#define TXCSR3 0x006c /* TX Ring Base address register. */
++#define TXCSR4 0x0070 /* TX Atim Ring Base address register. */
++#define TXCSR5 0x0074 /* TX Prio Ring Base address register. */
++#define TXCSR6 0x0078 /* Beacon base address. */
++#define TXCSR7 0x007c /* AutoResponder Control Register. */
++#define TXCSR8 0x0098 /* CCK TX BBP registers. */
++#define TXCSR9 0x0094 /* OFDM TX BBP registers. */
++
++/*
++ * Receive related CSRs.
++ */
++#define RXCSR0 0x0080 /* RX control register. */
++#define RXCSR1 0x0084 /* RX descriptor configuration register. */
++#define RXCSR2 0x0088 /* RX Ring base address register. */
++#define RXCSR3 0x0090 /* BBP ID register 0 */
++#define ARCSR1 0x009c /* Auto Responder PLCP config register 1. */
++
++/*
++ * PCI control CSRs.
++ */
++#define PCICSR 0x008c /* PCI control register. */
++
++/*
++ * Statistic Register.
++ */
++#define CNT0 0x00a0 /* FCS error count. */
++#define TIMECSR2 0x00a8
++#define CNT1 0x00ac /* PLCP error count. */
++#define CNT2 0x00b0 /* long error count. */
++#define TIMECSR3 0x00b4
++#define CNT3 0x00b8 /* CCA false alarm count. */
++#define CNT4 0x00bc /* Rx FIFO overflow count. */
++#define CNT5 0x00c0 /* Tx FIFO underrun count. */
++
++/*
++ * Baseband Control Register.
++ */
++#define PWRCSR0 0x00c4 /* Power mode configuration. */
++#define PSCSR0 0x00c8 /* Power state transition time. */
++#define PSCSR1 0x00cc /* Power state transition time. */
++#define PSCSR2 0x00d0 /* Power state transition time. */
++#define PSCSR3 0x00d4 /* Power state transition time. */
++#define PWRCSR1 0x00d8 /* Manual power control / status. */
++#define TIMECSR 0x00dc /* Timer control. */
++#define MACCSR0 0x00e0 /* MAC configuration. */
++#define MACCSR1 0x00e4 /* MAC configuration. */
++#define RALINKCSR 0x00e8 /* Ralink Auto-reset register. */
++#define BCNCSR 0x00ec /* Beacon interval control register. */
++
++/*
++ * BBP / RF / IF Control Register.
++ */
++#define BBPCSR 0x00f0 /* BBP serial control. */
++#define RFCSR 0x00f4 /* RF serial control. */
++#define LEDCSR 0x00f8 /* LED control register */
++
++#define SECCSR3 0x00fc /* AES control register. */
++
++/*
++ * ASIC pointer information.
++ */
++#define RXPTR 0x0100 /* Current RX ring address. */
++#define TXPTR 0x0104 /* Current Tx ring address. */
++#define PRIPTR 0x0108 /* Current Priority ring address. */
++#define ATIMPTR 0x010c /* Current ATIM ring address. */
++
++#define TXACKCSR0 0x0110 /* TX ACK timeout. */
++#define ACKCNT0 0x0114 /* TX ACK timeout count. */
++#define ACKCNT1 0x0118 /* RX ACK timeout count. */
++
++/*
++ * GPIO and others.
++ */
++#define GPIOCSR 0x0120 /* GPIO. */
++#define FIFOCSR0 0x0128 /* TX FIFO pointer. */
++#define FIFOCSR1 0x012c /* RX FIFO pointer. */
++#define BCNCSR1 0x0130 /* Tx BEACON offset time, unit: 1 usec. */
++#define MACCSR2 0x0134 /* TX_PE to RX_PE delay time, unit: 1 PCI clock cycle. */
++#define TESTCSR 0x0138 /* TEST mode selection register. */
++#define ARCSR2 0x013c /* 1 Mbps ACK/CTS PLCP. */
++#define ARCSR3 0x0140 /* 2 Mbps ACK/CTS PLCP. */
++#define ARCSR4 0x0144 /* 5.5 Mbps ACK/CTS PLCP. */
++#define ARCSR5 0x0148 /* 11 Mbps ACK/CTS PLCP. */
++#define ARTCSR0 0x014c /* ACK/CTS payload consumed time for 1/2/5.5/11 mbps. */
++#define ARTCSR1                                                                \
++	0x0150 /* OFDM ACK/CTS payload consumed time for 6/9/12/18 mbps. */
++#define ARTCSR2                                                                \
++	0x0154 /* OFDM ACK/CTS payload consumed time for 24/36/48/54 mbps. */
++#define SECCSR1 0x0158 /* WEP control register. */
++#define BBPCSR1 0x015c /* BBP TX configuration. */
++#define DBANDCSR0 0x0160 /* Dual band configuration register 0. */
++#define DBANDCSR1 0x0164 /* Dual band configuration register 1. */
++#define BBPPCSR 0x0168 /* BBP Pin control register. */
++#define DBGSEL0 0x016c /* MAC special debug mode selection register 0. */
++#define DBGSEL1 0x0170 /* MAC special debug mode selection register 1. */
++#define BISTCSR 0x0174 /* BBP BIST register. */
++#define MCAST0 0x0178 /* multicast filter register 0. */
++#define MCAST1 0x017c /* multicast filter register 1. */
++#define UARTCSR0 0x0180 /* UART1 TX register. */
++#define UARTCSR1 0x0184 /* UART1 RX register. */
++#define UARTCSR3 0x0188 /* UART1 frame control register. */
++#define UARTCSR4 0x018c /* UART1 buffer control register. */
++#define UART2CSR0 0x0190 /* UART2 TX register. */
++#define UART2CSR1 0x0194 /* UART2 RX register. */
++#define UART2CSR3 0x0198 /* UART2 frame control register. */
++#define UART2CSR4 0x019c /* UART2 buffer control register. */
++
++/*
++ * EEPROM addresses
++ */
++#define EEPROM_ANTENNA 0x10
++#define EEPROM_GEOGRAPHY 0x12
++#define EEPROM_BBP_START 0x13
++#define EEPROM_BBP_END 0x22
++
++#define EEPROM_BBP_SIZE 16
++
++/*
++ * CSR Registers.
++ * Some values are set in TU, whereas 1 TU == 1024 us.
++ */
++
++/*
++ * CSR1: System control register.
++ */
++#define CSR1_SOFT_RESET                                                        \
++	FIELD32(0, 0x00000001) /* Software reset, 1: reset, 0: normal. */
++#define CSR1_BBP_RESET                                                         \
++	FIELD32(1, 0x00000002) /* Hardware reset, 1: reset, 0, release. */
++#define CSR1_HOST_READY                                                        \
++	FIELD32(2, 0x00000004) /* Host ready after initialization. */
++
++/*
++ * CSR3: STA MAC address register 0.
++ */
++#define CSR3_BYTE0 FIELD32(0, 0x000000ff) /* MAC address byte 0. */
++#define CSR3_BYTE1 FIELD32(8, 0x0000ff00) /* MAC address byte 1. */
++#define CSR3_BYTE2 FIELD32(16, 0x00ff0000) /* MAC address byte 2. */
++#define CSR3_BYTE3 FIELD32(24, 0xff000000) /* MAC address byte 3. */
++
++/*
++ * CSR4: STA MAC address register 1.
++ */
++#define CSR4_BYTE4 FIELD32(0, 0x000000ff) /* MAC address byte 4. */
++#define CSR4_BYTE5 FIELD32(8, 0x0000ff00) /* MAC address byte 5. */
++
++/*
++ * CSR5: BSSID register 0.
++ */
++#define CSR5_BYTE0 FIELD32(0, 0x000000ff) /* BSSID address byte 0. */
++#define CSR5_BYTE1 FIELD32(8, 0x0000ff00) /* BSSID address byte 1. */
++#define CSR5_BYTE2 FIELD32(16, 0x00ff0000) /* BSSID address byte 2. */
++#define CSR5_BYTE3 FIELD32(24, 0xff000000) /* BSSID address byte 3. */
++
++/*
++ * CSR6: BSSID register 1.
++ */
++#define CSR6_BYTE4 FIELD32(0, 0x000000ff) /* BSSID address byte 4. */
++#define CSR6_BYTE5 FIELD32(8, 0x0000ff00) /* BSSID address byte 5. */
++
++/*
++ * CSR7: Interrupt source register.
++ * Write 1 to clear.
++ */
++#define CSR7_TBCN_EXPIRE                                                       \
++	FIELD32(0, 0x00000001) /* beacon timer expired interrupt. */
++#define CSR7_TWAKE_EXPIRE                                                      \
++	FIELD32(1, 0x00000002) /* wakeup timer expired interrupt. */
++#define CSR7_TATIMW_EXPIRE                                                     \
++	FIELD32(2, 0x00000004) /* timer of atim window expired interrupt. */
++#define CSR7_TXDONE_TXRING                                                     \
++	FIELD32(3, 0x00000008) /* tx ring transmit done interrupt. */
++#define CSR7_TXDONE_ATIMRING                                                   \
++	FIELD32(4, 0x00000010) /* atim ring transmit done interrupt. */
++#define CSR7_TXDONE_PRIORING                                                   \
++	FIELD32(5, 0x00000020) /* priority ring transmit done interrupt. */
++#define CSR7_RXDONE FIELD32(6, 0x00000040) /* receive done interrupt. */
++#define CSR7_DECRYPTION_DONE                                                   \
++	FIELD32(7, 0x00000080) /* Decryption done interrupt. */
++#define CSR7_ENCRYPTION_DONE                                                   \
++	FIELD32(8, 0x00000100) /* Encryption done interrupt. */
++#define CSR7_UART1_TX_TRESHOLD                                                 \
++	FIELD32(9, 0x00000200) /* UART1 TX reaches threshold. */
++#define CSR7_UART1_RX_TRESHOLD                                                 \
++	FIELD32(10, 0x00000400) /* UART1 RX reaches threshold. */
++#define CSR7_UART1_IDLE_TRESHOLD                                               \
++	FIELD32(11, 0x00000800) /* UART1 IDLE over threshold. */
++#define CSR7_UART1_TX_BUFF_ERROR                                               \
++	FIELD32(12, 0x00001000) /* UART1 TX buffer error. */
++#define CSR7_UART1_RX_BUFF_ERROR                                               \
++	FIELD32(13, 0x00002000) /* UART1 RX buffer error. */
++#define CSR7_UART2_TX_TRESHOLD                                                 \
++	FIELD32(14, 0x00004000) /* UART2 TX reaches threshold. */
++#define CSR7_UART2_RX_TRESHOLD                                                 \
++	FIELD32(15, 0x00008000) /* UART2 RX reaches threshold. */
++#define CSR7_UART2_IDLE_TRESHOLD                                               \
++	FIELD32(16, 0x00010000) /* UART2 IDLE over threshold. */
++#define CSR7_UART2_TX_BUFF_ERROR                                               \
++	FIELD32(17, 0x00020000) /* UART2 TX buffer error. */
++#define CSR7_UART2_RX_BUFF_ERROR                                               \
++	FIELD32(18, 0x00040000) /* UART2 RX buffer error. */
++#define CSR7_TIMER_CSR3_EXPIRE                                                 \
++	FIELD32(19,                                                            \
++		0x00080000) /* TIMECSR3 timer expired (802.1H quiet period). */
++
++/*
++ * CSR8: Interrupt mask register.
++ * Write 1 to mask interrupt.
++ */
++#define CSR8_TBCN_EXPIRE                                                       \
++	FIELD32(0, 0x00000001) /* beacon timer expired interrupt. */
++#define CSR8_TWAKE_EXPIRE                                                      \
++	FIELD32(1, 0x00000002) /* wakeup timer expired interrupt. */
++#define CSR8_TATIMW_EXPIRE                                                     \
++	FIELD32(2, 0x00000004) /* timer of atim window expired interrupt. */
++#define CSR8_TXDONE_TXRING                                                     \
++	FIELD32(3, 0x00000008) /* tx ring transmit done interrupt. */
++#define CSR8_TXDONE_ATIMRING                                                   \
++	FIELD32(4, 0x00000010) /* atim ring transmit done interrupt. */
++#define CSR8_TXDONE_PRIORING                                                   \
++	FIELD32(5, 0x00000020) /* priority ring transmit done interrupt. */
++#define CSR8_RXDONE FIELD32(6, 0x00000040) /* receive done interrupt. */
++#define CSR8_DECRYPTION_DONE                                                   \
++	FIELD32(7, 0x00000080) /* Decryption done interrupt. */
++#define CSR8_ENCRYPTION_DONE                                                   \
++	FIELD32(8, 0x00000100) /* Encryption done interrupt. */
++#define CSR8_UART1_TX_TRESHOLD                                                 \
++	FIELD32(9, 0x00000200) /* UART1 TX reaches threshold. */
++#define CSR8_UART1_RX_TRESHOLD                                                 \
++	FIELD32(10, 0x00000400) /* UART1 RX reaches threshold. */
++#define CSR8_UART1_IDLE_TRESHOLD                                               \
++	FIELD32(11, 0x00000800) /* UART1 IDLE over threshold. */
++#define CSR8_UART1_TX_BUFF_ERROR                                               \
++	FIELD32(12, 0x00001000) /* UART1 TX buffer error. */
++#define CSR8_UART1_RX_BUFF_ERROR                                               \
++	FIELD32(13, 0x00002000) /* UART1 RX buffer error. */
++#define CSR8_UART2_TX_TRESHOLD                                                 \
++	FIELD32(14, 0x00004000) /* UART2 TX reaches threshold. */
++#define CSR8_UART2_RX_TRESHOLD                                                 \
++	FIELD32(15, 0x00008000) /* UART2 RX reaches threshold. */
++#define CSR8_UART2_IDLE_TRESHOLD                                               \
++	FIELD32(16, 0x00010000) /* UART2 IDLE over threshold. */
++#define CSR8_UART2_TX_BUFF_ERROR                                               \
++	FIELD32(17, 0x00020000) /* UART2 TX buffer error. */
++#define CSR8_UART2_RX_BUFF_ERROR                                               \
++	FIELD32(18, 0x00040000) /* UART2 RX buffer error. */
++#define CSR8_TIMER_CSR3_EXPIRE                                                 \
++	FIELD32(19,                                                            \
++		0x00080000) /* TIMECSR3 timer expired (802.1H quiet period). */
++
++/*
++ * CSR9: Maximum frame length register.
++ */
++#define CSR9_MAX_FRAME_UNIT                                                    \
++	FIELD32(7,                                                             \
++		0x00000f80) /* maximum frame length in 128b unit, default: 12. */
++
++/*
++ * SECCSR0: WEP control register.
++ */
++#define SECCSR0_KICK_DECRYPT                                                   \
++	FIELD32(0, 0x00000001) /* Kick decryption engine, self-clear. */
++#define SECCSR0_ONE_SHOT                                                       \
++	FIELD32(1, 0x00000002) /* 0: ring mode, 1: One shot only mode. */
++#define SECCSR0_DESC_ADDRESS                                                   \
++	FIELD32(2, 0xfffffffc) /* Descriptor physical address of frame. */
++
++/*
++ * CSR11: Back-off control register.
++ */
++#define CSR11_CWMIN                                                            \
++	FIELD32(0, 0x0000000f) /* CWmin. Default cwmin is 31 (2^5 - 1). */
++#define CSR11_CWMAX                                                            \
++	FIELD32(4, 0x000000f0) /* CWmax. Default cwmax is 1023 (2^10 - 1). */
++#define CSR11_SLOT_TIME                                                        \
++	FIELD32(8, 0x00001f00) /* slot time, default is 20us for 802.11b */
++#define CSR11_CW_SELECT                                                        \
++	FIELD32(13,                                                            \
++		0x00002000) /* CWmin/CWmax selection, 1: Register, 0: TXD. */
++#define CSR11_LONG_RETRY FIELD32(16, 0x00ff0000) /* long retry count. */
++#define CSR11_SHORT_RETRY FIELD32(24, 0xff000000) /* short retry count. */
++
++/*
++ * CSR12: Synchronization configuration register 0.
++ * All units in 1/16 TU.
++ */
++#define CSR12_BEACON_INTERVAL                                                  \
++	FIELD32(0, 0x0000ffff) /* beacon interval, default is 100 TU. */
++#define CSR12_CFPMAX_DURATION                                                  \
++	FIELD32(16, 0xffff0000) /* cfp maximum duration, default is 100 TU. */
++
++/*
++ * CSR13: Synchronization configuration register 1.
++ * All units in 1/16 TU.
++ */
++#define CSR13_ATIMW_DURATION FIELD32(0, 0x0000ffff) /* atim window duration. */
++#define CSR13_CFP_PERIOD                                                       \
++	FIELD32(16, 0x00ff0000) /* cfp period, default is 0 TU. */
++
++/*
++ * CSR14: Synchronization control register.
++ */
++#define CSR14_TSF_COUNT FIELD32(0, 0x00000001) /* enable tsf auto counting. */
++#define CSR14_TSF_SYNC                                                         \
++	FIELD32(1,                                                             \
++		0x00000006) /* tsf sync, 0: disable, 1: infra, 2: ad-hoc mode. */
++#define CSR14_TBCN FIELD32(3, 0x00000008) /* enable tbcn with reload value. */
++#define CSR14_TCFP                                                             \
++	FIELD32(4, 0x00000010) /* enable tcfp & cfp / cp switching. */
++#define CSR14_TATIMW                                                           \
++	FIELD32(5, 0x00000020) /* enable tatimw & atim window switching. */
++#define CSR14_BEACON_GEN FIELD32(6, 0x00000040) /* enable beacon generator. */
++#define CSR14_CFP_COUNT_PRELOAD                                                \
++	FIELD32(8, 0x0000ff00) /* cfp count preload value. */
++#define CSR14_TBCM_PRELOAD                                                     \
++	FIELD32(16, 0xffff0000) /* tbcn preload value in units of 64us. */
++
++/*
++ * CSR15: Synchronization status register.
++ */
++#define CSR15_CFP                                                              \
++	FIELD32(0, 0x00000001) /* ASIC is in contention-free period. */
++#define CSR15_ATIMW FIELD32(1, 0x00000002) /* ASIC is in ATIM window. */
++#define CSR15_BEACON_SENT FIELD32(2, 0x00000004) /* Beacon is send. */
++
++/*
++ * CSR16: TSF timer register 0.
++ */
++#define CSR16_LOW_TSFTIMER FIELD32(0, 0xffffffff)
++
++/*
++ * CSR17: TSF timer register 1.
++ */
++#define CSR17_HIGH_TSFTIMER FIELD32(0, 0xffffffff)
++
++/*
++ * CSR18: IFS timer register 0.
++ */
++#define CSR18_SIFS FIELD32(0, 0x000001ff) /* sifs, default is 10 us. */
++#define CSR18_PIFS FIELD32(16, 0x01f00000) /* pifs, default is 30 us. */
++
++/*
++ * CSR19: IFS timer register 1.
++ */
++#define CSR19_DIFS FIELD32(0, 0x0000ffff) /* difs, default is 50 us. */
++#define CSR19_EIFS FIELD32(16, 0xffff0000) /* eifs, default is 364 us. */
++
++/*
++ * CSR20: Wakeup timer register.
++ */
++#define CSR20_DELAY_AFTER_TBCN                                                 \
++	FIELD32(0,                                                             \
++		0x0000ffff) /* delay after tbcn expired in units of 1/16 TU. */
++#define CSR20_TBCN_BEFORE_WAKEUP                                               \
++	FIELD32(16, 0x00ff0000) /* number of beacon before wakeup. */
++#define CSR20_AUTOWAKE                                                         \
++	FIELD32(24, 0x01000000) /* enable auto wakeup / sleep mechanism. */
++
++/*
++ * CSR21: EEPROM control register.
++ */
++#define CSR21_RELOAD                                                           \
++	FIELD32(0, 0x00000001) /* Write 1 to reload eeprom content. */
++#define CSR21_EEPROM_DATA_CLOCK FIELD32(1, 0x00000002)
++#define CSR21_EEPROM_CHIP_SELECT FIELD32(2, 0x00000004)
++#define CSR21_EEPROM_DATA_IN FIELD32(3, 0x00000008)
++#define CSR21_EEPROM_DATA_OUT FIELD32(4, 0x00000010)
++#define CSR21_TYPE_93C46 FIELD32(5, 0x00000020) /* 1: 93c46, 0:93c66. */
++
++/*
++ * CSR22: CFP control register.
++ */
++#define CSR22_CFP_DURATION_REMAIN                                              \
++	FIELD32(0, 0x0000ffff) /* cfp duration remain, in units of TU. */
++#define CSR22_RELOAD_CFP_DURATION                                              \
++	FIELD32(16, 0x00010000) /* Write 1 to reload cfp duration remain. */
++
++/*
++ * TX / RX Registers.
++ * Some values are set in TU, whereas 1 TU == 1024 us.
++ */
++
++/*
++ * TXCSR0: TX Control Register.
++ */
++#define TXCSR0_KICK_TX FIELD32(0, 0x00000001) /* kick tx ring. */
++#define TXCSR0_KICK_ATIM FIELD32(1, 0x00000002) /* kick atim ring. */
++#define TXCSR0_KICK_PRIO FIELD32(2, 0x00000004) /* kick priority ring. */
++#define TXCSR0_ABORT                                                           \
++	FIELD32(3, 0x00000008) /* abort all transmit related ring operation. */
++
++/*
++ * TXCSR1: TX Configuration Register.
++ */
++#define TXCSR1_ACK_TIMEOUT                                                     \
++	FIELD32(0,                                                             \
++		0x000001ff) /* ack timeout, default = sifs + 2*slottime + acktime @ 1mbps. */
++#define TXCSR1_ACK_CONSUME_TIME                                                \
++	FIELD32(9,                                                             \
++		0x0003fe00) /* ack consume time, default = sifs + acktime @ 1mbps. */
++#define TXCSR1_TSF_OFFSET FIELD32(18, 0x00fc0000) /* insert tsf offset. */
++#define TXCSR1_AUTORESPONDER                                                   \
++	FIELD32(24,                                                            \
++		0x01000000) /* enable auto responder which include ack & cts. */
++
++/*
++ * TXCSR2: Tx descriptor configuration register.
++ */
++#define TXCSR2_TXD_SIZE                                                        \
++	FIELD32(0, 0x000000ff) /* tx descriptor size, default is 48. */
++#define TXCSR2_NUM_TXD FIELD32(8, 0x0000ff00) /* number of txd in ring. */
++#define TXCSR2_NUM_ATIM FIELD32(16, 0x00ff0000) /* number of atim in ring. */
++#define TXCSR2_NUM_PRIO                                                        \
++	FIELD32(24, 0xff000000) /* number of priority in ring. */
++
++/*
++ * TXCSR3: TX Ring Base address register.
++ */
++#define TXCSR3_TX_RING_REGISTER FIELD32(0, 0xffffffff)
++
++/*
++ * TXCSR4: TX Atim Ring Base address register.
++ */
++#define TXCSR4_ATIM_RING_REGISTER FIELD32(0, 0xffffffff)
++
++/*
++ * TXCSR5: TX Prio Ring Base address register.
++ */
++#define TXCSR5_PRIO_RING_REGISTER FIELD32(0, 0xffffffff)
++
++/*
++ * TXCSR6: Beacon Base address register.
++ */
++#define TXCSR6_BEACON_REGISTER FIELD32(0, 0xffffffff)
++
++/*
++ * TXCSR7: Auto responder control register.
++ */
++#define TXCSR7_AR_POWERMANAGEMENT                                              \
++	FIELD32(0, 0x00000001) /* auto responder power management bit. */
++
++/*
++ * TXCSR8: CCK Tx BBP register.
++ */
++#define TXCSR8_CCK_SIGNAL                                                      \
++	FIELD32(0, 0x000000ff) /* BBP rate field address for CCK. */
++#define TXCSR8_CCK_SERVICE                                                     \
++	FIELD32(8, 0x0000ff00) /* BBP service field address for CCK. */
++#define TXCSR8_CCK_LENGTH_LOW                                                  \
++	FIELD32(16, 0x00ff0000) /* BBP length low byte address for CCK. */
++#define TXCSR8_CCK_LENGTH_HIGH                                                 \
++	FIELD32(24, 0xff000000) /* BBP length high byte address for CCK. */
++
++/* 
++ * TXCSR9: OFDM TX BBP registers
++ */
++#define TXCSR9_OFDM_RATE                                                       \
++	FIELD32(0, 0x000000ff) /* BBP rate field address for OFDM. */
++#define TXCSR9_OFDM_SERVICE                                                    \
++	FIELD32(8, 0x0000ff00) /* BBP service field address for OFDM. */
++#define TXCSR9_OFDM_LENGTH_LOW                                                 \
++	FIELD32(16, 0x00ff0000) /* BBP length low byte address for OFDM. */
++#define TXCSR9_OFDM_LENGTH_HIGH                                                \
++	FIELD32(24, 0xff000000) /* BBP length high byte address for OFDM. */
++
++/*
++ * RXCSR0: RX Control Register.
++ */
++#define RXCSR0_DISABLE_RX FIELD32(0, 0x00000001) /* disable rx engine. */
++#define RXCSR0_DROP_CRC FIELD32(1, 0x00000002) /* drop crc error. */
++#define RXCSR0_DROP_PHYSICAL FIELD32(2, 0x00000004) /* drop physical error. */
++#define RXCSR0_DROP_CONTROL FIELD32(3, 0x00000008) /* drop control frame. */
++#define RXCSR0_DROP_NOT_TO_ME                                                  \
++	FIELD32(4, 0x00000010) /* drop not to me unicast frame. */
++#define RXCSR0_DROP_TODS                                                       \
++	FIELD32(5, 0x00000020) /* drop frame tods bit is true. */
++#define RXCSR0_DROP_VERSION_ERROR                                              \
++	FIELD32(6, 0x00000040) /* drop version error frame. */
++#define RXCSR0_PASS_CRC                                                        \
++	FIELD32(7, 0x00000080) /* pass all packets with crc attached. */
++#define RXCSR0_PASS_PLCP                                                       \
++	FIELD32(8,                                                             \
++		0x00000100) /* Pass all packets with 4 bytes PLCP attached. */
++#define RXCSR0_DROP_MCAST FIELD32(9, 0x00000200) /* Drop multicast frames. */
++#define RXCSR0_DROP_BCAST FIELD32(10, 0x00000400) /* Drop broadcast frames. */
++#define RXCSR0_ENABLE_QOS                                                      \
++	FIELD32(11, 0x00000800) /* Accept QOS data frame and parse QOS field. */
++
++/*
++ * RXCSR1: RX descriptor configuration register.
++ */
++#define RXCSR1_RXD_SIZE                                                        \
++	FIELD32(0, 0x000000ff) /* rx descriptor size, default is 32b. */
++#define RXCSR1_NUM_RXD FIELD32(8, 0x0000ff00) /* number of rxd in ring. */
++
++/*
++ * RXCSR2: RX Ring base address register.
++ */
++#define RXCSR2_RX_RING_REGISTER FIELD32(0, 0xffffffff)
++
++/*
++ * RXCSR3: BBP ID register for Rx operation.
++ */
++#define RXCSR3_BBP_ID0 FIELD32(0, 0x0000007f) /* bbp register 0 id. */
++#define RXCSR3_BBP_ID0_VALID                                                   \
++	FIELD32(7, 0x00000080) /* bbp register 0 id is valid or not. */
++#define RXCSR3_BBP_ID1 FIELD32(8, 0x00007f00) /* bbp register 1 id. */
++#define RXCSR3_BBP_ID1_VALID                                                   \
++	FIELD32(15, 0x00008000) /* bbp register 1 id is valid or not. */
++#define RXCSR3_BBP_ID2 FIELD32(16, 0x007f0000) /* bbp register 2 id. */
++#define RXCSR3_BBP_ID2_VALID                                                   \
++	FIELD32(23, 0x00800000) /* bbp register 2 id is valid or not. */
++#define RXCSR3_BBP_ID3 FIELD32(24, 0x7f000000) /* bbp register 3 id. */
++#define RXCSR3_BBP_ID3_VALID                                                   \
++	FIELD32(31, 0x80000000) /* bbp register 3 id is valid or not. */
++
++/*
++ * ARCSR1: Auto Responder PLCP config register 1.
++ */
++#define ARCSR1_AR_BBP_DATA2                                                    \
++	FIELD32(0, 0x000000ff) /* Auto responder BBP register 2 data. */
++#define ARCSR1_AR_BBP_ID2                                                      \
++	FIELD32(8, 0x0000ff00) /* Auto responder BBP register 2 Id. */
++#define ARCSR1_AR_BBP_DATA3                                                    \
++	FIELD32(16, 0x00ff0000) /* Auto responder BBP register 3 data. */
++#define ARCSR1_AR_BBP_ID3                                                      \
++	FIELD32(24, 0xff000000) /* Auto responder BBP register 3 Id. */
++
++/*
++ * Miscellaneous Registers.
++ * Some values are set in TU, whereas 1 TU == 1024 us.
++ */
++
++/*
++ * PCISR: PCI control register.
++ */
++#define PCICSR_BIG_ENDIAN                                                      \
++	FIELD32(0, 0x00000001) /* 1: big endian, 0: little endian. */
++#define PCICSR_RX_TRESHOLD                                                     \
++	FIELD32(1, 0x00000006) /* rx threshold in dw to start pci access */
++/* 0: 16dw (default), 1: 8dw, 2: 4dw, 3: 32dw. */
++#define PCICSR_TX_TRESHOLD                                                     \
++	FIELD32(3, 0x00000018) /* tx threshold in dw to start pci access */
++/* 0: 0dw (default), 1: 1dw, 2: 4dw, 3: forward. */
++#define PCICSR_BURST_LENTH FIELD32(5, 0x00000060) /* pci burst length */
++/* 0: 4dw (default, 1: 8dw, 2: 16dw, 3:32dw. */
++#define PCICSR_ENABLE_CLK FIELD32(7, 0x00000080) /* enable clk_run, */
++/* pci clock can't going down to non-operational. */
++#define PCICSR_READ_MULTIPLE                                                   \
++	FIELD32(8, 0x00000100) /* Enable memory read multiple. */
++#define PCICSR_WRITE_INVALID                                                   \
++	FIELD32(9, 0x00000200) /* Enable memory write & invalid. */
++
++/*
++ * PWRCSR1: Manual power control / status register.
++ * state: 0 deep_sleep, 1: sleep, 2: standby, 3: awake.
++ */
++#define PWRCSR1_SET_STATE                                                      \
++	FIELD32(0,                                                             \
++		0x00000001) /* set state. Write 1 to trigger, self cleared. */
++#define PWRCSR1_BBP_DESIRE_STATE FIELD32(1, 0x00000006) /* BBP desired state. */
++#define PWRCSR1_RF_DESIRE_STATE FIELD32(3, 0x00000018) /* RF desired state. */
++#define PWRCSR1_BBP_CURR_STATE FIELD32(5, 0x00000060) /* BBP current state. */
++#define PWRCSR1_RF_CURR_STATE FIELD32(7, 0x00000180) /* RF current state. */
++#define PWRCSR1_PUT_TO_SLEEP                                                   \
++	FIELD32(9,                                                             \
++		0x00000200) /* put to sleep. Write 1 to trigger, self cleared. */
++
++/*
++ * TIMECSR: Timer control register.
++ */
++#define TIMECSR_US_COUNT                                                       \
++	FIELD32(0, 0x000000ff) /* 1 us timer count in units of clock cycles. */
++#define TIMECSR_US_64_COUNT                                                    \
++	FIELD32(8, 0x0000ff00) /* 64 us timer count in units of 1 us timer. */
++#define TIMECSR_BEACON_EXPECT                                                  \
++	FIELD32(16, 0x00070000) /* Beacon expect window. */
++
++/*
++ * MACCSR1: MAC configuration register 1.
++ */
++#define MACCSR1_KICK_RX                                                        \
++	FIELD32(0, 0x00000001) /* kick one-shot rx in one-shot rx mode. */
++#define MACCSR1_ONESHOT_RXMODE                                                 \
++	FIELD32(1, 0x00000002) /* enable one-shot rx mode for debugging. */
++#define MACCSR1_BBPRX_RESET_MODE                                               \
++	FIELD32(2, 0x00000004) /* ralink bbp rx reset mode. */
++#define MACCSR1_AUTO_TXBBP                                                     \
++	FIELD32(3, 0x00000008) /* auto tx logic access bbp control register. */
++#define MACCSR1_AUTO_RXBBP                                                     \
++	FIELD32(4, 0x00000010) /* auto rx logic access bbp control register. */
++#define MACCSR1_LOOPBACK FIELD32(5, 0x00000060) /* loopback mode. */
++/* 0: normal, 1: internal, 2: external, 3:rsvd. */
++#define MACCSR1_INTERSIL_IF                                                    \
++	FIELD32(7, 0x00000080) /* intersil if calibration pin. */
++
++/*
++ * RALINKCSR: Ralink Rx auto-reset BBCR.
++ */
++#define RALINKCSR_AR_BBP_DATA0                                                 \
++	FIELD32(0, 0x000000ff) /* auto reset bbp register 0 data. */
++#define RALINKCSR_AR_BBP_ID0                                                   \
++	FIELD32(8, 0x00007f00) /* auto reset bbp register 0 id. */
++#define RALINKCSR_AR_BBP_VALID0                                                \
++	FIELD32(15, 0x00008000) /* auto reset bbp register 0 valid. */
++#define RALINKCSR_AR_BBP_DATA1                                                 \
++	FIELD32(16, 0x00ff0000) /* auto reset bbp register 1 data. */
++#define RALINKCSR_AR_BBP_ID1                                                   \
++	FIELD32(24, 0x7f000000) /* auto reset bbp register 1 id. */
++#define RALINKCSR_AR_BBP_VALID1                                                \
++	FIELD32(31, 0x80000000) /* auto reset bbp register 1 valid. */
++
++/*
++ * BCNCSR: Beacon interval control register.
++ */
++#define BCNCSR_CHANGE                                                          \
++	FIELD32(0, 0x00000001) /* write one to change beacon interval. */
++#define BCNCSR_DELTATIME FIELD32(1, 0x0000001e) /* the delta time value. */
++#define BCNCSR_NUM_BEACON                                                      \
++	FIELD32(5, 0x00001fe0) /* number of beacon according to mode. */
++#define BCNCSR_MODE FIELD32(13, 0x00006000) /* please refer to asic specs. */
++#define BCNCSR_PLUS                                                            \
++	FIELD32(15, 0x00008000) /* plus or minus delta time value. */
++
++/*
++ * BBPCSR: BBP serial control register.
++ */
++#define BBPCSR_VALUE                                                           \
++	FIELD32(0, 0x000000ff) /* register value to program into bbp. */
++#define BBPCSR_REGNUM FIELD32(8, 0x00007f00) /* selected bbp register. */
++#define BBPCSR_BUSY                                                            \
++	FIELD32(15, 0x00008000) /* 1: asic is busy execute bbp programming. */
++#define BBPCSR_WRITE_CONTROL                                                   \
++	FIELD32(16, 0x00010000) /* 1: write bbp, 0: read bbp. */
++
++/*
++ * RFCSR: RF serial control register.
++ */
++#define RFCSR_VALUE                                                            \
++	FIELD32(0, 0x00ffffff) /* register value + id to program into rf/if. */
++#define RFCSR_NUMBER_OF_BITS                                                   \
++	FIELD32(24,                                                            \
++		0x1f000000) /* number of bits used in value (i:20, rfmd:22). */
++#define RFCSR_IF_SELECT                                                        \
++	FIELD32(29, 0x20000000) /* chip to program: 0: rf, 1: if. */
++#define RFCSR_PLL_LD FIELD32(30, 0x40000000) /* rf pll_ld status. */
++#define RFCSR_BUSY                                                             \
++	FIELD32(31, 0x80000000) /* 1: asic is busy execute rf programming. */
++
++/*
++ * LEDCSR: LED control register.
++ */
++#define LEDCSR_ON_PERIOD FIELD32(0, 0x000000ff) /* on period, default 70ms. */
++#define LEDCSR_OFF_PERIOD FIELD32(8, 0x0000ff00) /* off period, default 30ms. */
++#define LEDCSR_LINK FIELD32(16, 0x00010000) /* 0: linkoff, 1: linkup. */
++#define LEDCSR_ACTIVITY FIELD32(17, 0x00020000) /* 0: idle, 1: active. */
++#define LEDCSR_LINK_POLARITY                                                   \
++	FIELD32(18, 0x00040000) /* 0: active low, 1: active high. */
++#define LEDCSR_ACTIVITY_POLARITY                                               \
++	FIELD32(19, 0x00080000) /* 0: active low, 1: active high. */
++#define LEDCSR_LED_DEFAULT                                                     \
++	FIELD32(20, 0x00100000) /* LED state for "enable" 0: ON, 1: OFF. */
++
++/*
++ * GPIOCSR: GPIO control register.
++ */
++#define GPIOCSR_BIT0 FIELD32(0, 0x00000001)
++#define GPIOCSR_BIT1 FIELD32(1, 0x00000002)
++#define GPIOCSR_BIT2 FIELD32(2, 0x00000004)
++#define GPIOCSR_BIT3 FIELD32(3, 0x00000008)
++#define GPIOCSR_BIT4 FIELD32(4, 0x00000010)
++#define GPIOCSR_BIT5 FIELD32(5, 0x00000020)
++#define GPIOCSR_BIT6 FIELD32(6, 0x00000040)
++#define GPIOCSR_BIT7 FIELD32(7, 0x00000080)
++#define GPIOCSR_DIR0 FIELD32(8, 0x00000100)
++#define GPIOCSR_DIR1 FIELD32(9, 0x00000200)
++#define GPIOCSR_DIR2 FIELD32(10, 0x00000400)
++#define GPIOCSR_DIR3 FIELD32(11, 0x00000800)
++#define GPIOCSR_DIR4 FIELD32(12, 0x00001000)
++#define GPIOCSR_DIR5 FIELD32(13, 0x00002000)
++#define GPIOCSR_DIR6 FIELD32(14, 0x00004000)
++#define GPIOCSR_DIR7 FIELD32(15, 0x00008000)
++
++/*
++ * BCNCSR1: Tx BEACON offset time control register.
++ */
++#define BCNCSR1_PRELOAD                                                        \
++	FIELD32(0, 0x0000ffff) /* beacon timer offset in units of usec. */
++#define BCNCSR1_BEACON_CWMIN FIELD32(16, 0x000f0000) /* 2^CwMin. */
++
++/*
++ * MACCSR2: TX_PE to RX_PE turn-around time control register
++ */
++#define MACCSR2_DELAY                                                          \
++	FIELD32(0,                                                             \
++		0x000000ff) /* RX_PE low width, in units of pci clock cycle. */
++
++/*
++ * SECCSR1_RT2509: WEP control register 
++ */
++#define SECCSR1_KICK_ENCRYPT                                                   \
++	FIELD32(0, 0x00000001) /* Kick encryption engine, self-clear. */
++#define SECCSR1_ONE_SHOT                                                       \
++	FIELD32(1, 0x00000002) /* 0: ring mode, 1: One shot only mode. */
++#define SECCSR1_DESC_ADDRESS                                                   \
++	FIELD32(2, 0xfffffffc) /* Descriptor physical address of frame. */
++
++/*
++ * RF registers
++ */
++#define RF1_TUNER FIELD32(17, 0x00020000)
++#define RF3_TUNER FIELD32(8, 0x00000100)
++#define RF3_TXPOWER FIELD32(9, 0x00003e00)
++
++/*
++ * EEPROM content format.
++ * The wordsize of the EEPROM is 16 bits.
++ */
++
++/*
++ * EEPROM operation defines.
++ */
++#define EEPROM_WIDTH_93c46 6
++#define EEPROM_WIDTH_93c66 8
++#define EEPROM_WRITE_OPCODE 0x05
++#define EEPROM_READ_OPCODE 0x06
++
++/*
++ * EEPROM antenna.
++ */
++#define EEPROM_ANTENNA_NUM FIELD16(0, 0x0003) /* Number of antenna's. */
++#define EEPROM_ANTENNA_TX_DEFAULT                                              \
++	FIELD16(2, 0x000c) /* Default antenna 0: diversity, 1: A, 2: B. */
++#define EEPROM_ANTENNA_RX_DEFAULT                                              \
++	FIELD16(4, 0x0030) /* Default antenna 0: diversity, 1: A, 2: B. */
++#define EEPROM_ANTENNA_LED_MODE                                                \
++	FIELD16(6, 0x01c0) /* 0: default, 1: TX/RX activity, */
++/* 2: Single LED (ignore link), 3: reserved. */
++#define EEPROM_ANTENNA_DYN_TXAGC                                               \
++	FIELD16(9, 0x0200) /* Dynamic TX AGC control. */
++#define EEPROM_ANTENNA_HARDWARE_RADIO                                          \
++	FIELD16(10, 0x0400) /* 1: Hardware controlled radio. Read GPIO0. */
++#define EEPROM_ANTENNA_RF_TYPE                                                 \
++	FIELD16(11, 0xf800) /* rf_type of this adapter. */
++
++/*
++ * EEPROM geography.
++ */
++#define EEPROM_GEOGRAPHY_GEO                                                   \
++	FIELD16(8, 0x0f00) /* Default geography setting for device. */
++
++/*
++ * EEPROM NIC config.
++ */
++#define EEPROM_NIC_CARDBUS_ACCEL FIELD16(0, 0x0001) /* 0: enable, 1: disable. */
++#define EEPROM_NIC_DYN_BBP_TUNE FIELD16(1, 0x0002) /* 0: enable, 1: disable. */
++#define EEPROM_NIC_CCK_TX_POWER                                                \
++	FIELD16(2, 0x000c) /* CCK TX power compensation. */
++
++/*
++ * EEPROM TX power.
++ */
++#define EEPROM_TX_POWER1 FIELD16(0, 0x00ff)
++#define EEPROM_TX_POWER2 FIELD16(8, 0xff00)
++
++/*
++ * EEPROM BBP.
++ */
++#define EEPROM_BBP_VALUE FIELD16(0, 0x00ff)
++#define EEPROM_BBP_REG_ID FIELD16(8, 0xff00)
++
++/*
++ * EEPROM VERSION.
++ */
++#define EEPROM_VERSION_FAE FIELD16(0, 0x00ff) /* FAE release number. */
++#define EEPROM_VERSION FIELD16(8, 0xff00)
++
++/*
++ * DMA ring defines and data structures.
++ */
++
++/*
++ * Size of a single descriptor.
++ */
++#define SIZE_DESCRIPTOR 48
++
++/*
++ * TX descriptor format for TX, PRIO, ATIM and Beacon Ring.
++ */
++struct _txd {
++	u32 word0;
++#define TXD_W0_OWNER_NIC FIELD32(0, 0x00000001)
++#define TXD_W0_VALID FIELD32(1, 0x00000002)
++#define TXD_W0_RESULT FIELD32(2, 0x0000001c) /* Set by device. */
++#define TXD_W0_RETRY_COUNT FIELD32(5, 0x000000e0) /* Set by device. */
++#define TXD_W0_MORE_FRAG FIELD32(8, 0x00000100) /* Set by device. */
++#define TXD_W0_ACK FIELD32(9, 0x00000200)
++#define TXD_W0_TIMESTAMP FIELD32(10, 0x00000400)
++#define TXD_W0_OFDM FIELD32(11, 0x00000800)
++#define TXD_W0_CIPHER_OWNER FIELD32(12, 0x00001000)
++#define TXD_W0_IFS FIELD32(13, 0x00006000)
++#define TXD_W0_RETRY_MODE FIELD32(15, 0x00008000)
++#define TXD_W0_DATABYTE_COUNT FIELD32(16, 0x0fff0000)
++#define TXD_W0_CIPHER_ALG FIELD32(29, 0xe0000000)
++
++	u32 word1;
++#define TXD_W1_BUFFER_ADDRESS FIELD32(0, 0xffffffff)
++
++	u32 word2;
++#define TXD_W2_IV_OFFSET FIELD32(0, 0x0000003f)
++#define TXD_W2_AIFS FIELD32(6, 0x000000c0)
++#define TXD_W2_CWMIN FIELD32(8, 0x00000f00)
++#define TXD_W2_CWMAX FIELD32(12, 0x0000f000)
++
++	u32 word3;
++#define TXD_W3_PLCP_SIGNAL FIELD32(0, 0x000000ff)
++#define TXD_W3_PLCP_SERVICE FIELD32(8, 0x0000ff00)
++#define TXD_W3_PLCP_LENGTH_LOW FIELD32(16, 0x00ff0000)
++#define TXD_W3_PLCP_LENGTH_HIGH FIELD32(24, 0xff000000)
++
++	u32 word4;
++#define TXD_W4_IV FIELD32(0, 0xffffffff)
++
++	u32 word5;
++#define TXD_W5_EIV FIELD32(0, 0xffffffff)
++
++	u32 word6;
++#define TXD_W6_KEY FIELD32(0, 0xffffffff)
++
++	u32 word7;
++#define TXD_W7_KEY FIELD32(0, 0xffffffff)
++
++	u32 word8;
++#define TXD_W8_KEY FIELD32(0, 0xffffffff)
++
++	u32 word9;
++#define TXD_W9_KEY FIELD32(0, 0xffffffff)
++
++	u32 word10;
++#define TXD_W10_RTS FIELD32(0, 0x00000001)
++#define TXD_W10_TX_RATE FIELD32(0, 0x000000fe) /* For module only. */
++} __attribute__((packed));
++
++/*
++ * RX descriptor format for RX Ring.
++ */
++struct _rxd {
++	u32 word0;
++#define RXD_W0_OWNER_NIC FIELD32(0, 0x00000001)
++#define RXD_W0_UNICAST_TO_ME FIELD32(1, 0x00000002)
++#define RXD_W0_MULTICAST FIELD32(2, 0x00000004)
++#define RXD_W0_BROADCAST FIELD32(3, 0x00000008)
++#define RXD_W0_MY_BSS FIELD32(4, 0x00000010)
++#define RXD_W0_CRC FIELD32(5, 0x00000020)
++#define RXD_W0_OFDM FIELD32(6, 0x00000040)
++#define RXD_W0_PHYSICAL_ERROR FIELD32(7, 0x00000080)
++#define RXD_W0_CIPHER_OWNER FIELD32(8, 0x00000100)
++#define RXD_W0_ICV_ERROR FIELD32(9, 0x00000200)
++#define RXD_W0_IV_OFFSET FIELD32(10, 0x0000fc00)
++#define RXD_W0_DATABYTE_COUNT FIELD32(16, 0x0fff0000)
++#define RXD_W0_CIPHER_ALG FIELD32(29, 0xe0000000)
++
++	u32 word1;
++#define RXD_W1_BUFFER_ADDRESS FIELD32(0, 0xffffffff)
++
++	u32 word2;
++#define RXD_W2_BBR0 FIELD32(0, 0x000000ff)
++#define RXD_W2_RSSI FIELD32(8, 0x0000ff00)
++#define RXD_W2_TA FIELD32(16, 0xffff0000)
++
++	u32 word3;
++#define RXD_W3_TA FIELD32(0, 0xffffffff)
++
++	u32 word4;
++#define RXD_W4_IV FIELD32(0, 0xffffffff)
++
++	u32 word5;
++#define RXD_W5_EIV FIELD32(0, 0xffffffff)
++
++	u32 word6;
++#define RXD_W6_KEY FIELD32(0, 0xffffffff)
++
++	u32 word7;
++#define RXD_W7_KEY FIELD32(0, 0xffffffff)
++
++	u32 word8;
++#define RXD_W8_KEY FIELD32(0, 0xffffffff)
++
++	u32 word9;
++#define RXD_W9_KEY FIELD32(0, 0xffffffff)
++
++	u32 word10;
++#define RXD_W10_DROP FIELD32(0, 0x00000001)
++} __attribute__((packed));
++
++/*
++ * _rt2x00_pci
++ * This is the main structure which contains all variables required to communicate with the PCI device.
++ */
++struct _rt2x00_pci {
++	/*
++     * PCI device structure.
++     */
++	struct pci_dev *pci_dev;
++
++	/*
++     * Chipset identification.
++     */
++	struct _rt2x00_chip chip;
++
++	/*
++     * csr_addr
++     * Base address of device registers, all exact register addresses are calculated from this address.
++     */
++	void __iomem *csr_addr;
++
++	/*
++     * RF register values for current channel.
++     */
++	struct _rf_channel channel;
++
++	/*
++     * EEPROM bus width.
++     */
++	u8 eeprom_width;
++
++	u8 __pad; /* For alignment only. */
++
++	/*
++     * EEPROM BBP data.
++     */
++	u16 eeprom[EEPROM_BBP_SIZE];
++
++	/*
++     * DMA packet ring.
++     */
++	struct _data_ring rx;
++	struct _data_ring tx;
++
++	rtdm_irq_t irq_handle;
++	rtdm_lock_t lock;
++
++} __attribute__((packed));
++
++static int rt2x00_get_rf_value(const struct _rt2x00_chip *chip,
++			       const u8 channel, struct _rf_channel *rf_reg)
++{
++	int index = 0x00;
++
++	index = rt2x00_get_channel_index(channel);
++	if (index < 0)
++		return -EINVAL;
++
++	memset(rf_reg, 0x00, sizeof(*rf_reg));
++
++	if (rt2x00_rf(chip, RF2522)) {
++		rf_reg->rf1 = 0x00002050;
++		rf_reg->rf3 = 0x00000101;
++		goto update_rf2_1;
++	}
++	if (rt2x00_rf(chip, RF2523)) {
++		rf_reg->rf1 = 0x00022010;
++		rf_reg->rf3 = 0x000e0111;
++		rf_reg->rf4 = 0x00000a1b;
++		goto update_rf2_2;
++	}
++	if (rt2x00_rf(chip, RF2524)) {
++		rf_reg->rf1 = 0x00032020;
++		rf_reg->rf3 = 0x00000101;
++		rf_reg->rf4 = 0x00000a1b;
++		goto update_rf2_2;
++	}
++	if (rt2x00_rf(chip, RF2525)) {
++		rf_reg->rf1 = 0x00022020;
++		rf_reg->rf2 = 0x00080000;
++		rf_reg->rf3 = 0x00060111;
++		rf_reg->rf4 = 0x00000a1b;
++		goto update_rf2_2;
++	}
++	if (rt2x00_rf(chip, RF2525E)) {
++		rf_reg->rf2 = 0x00080000;
++		rf_reg->rf3 = 0x00060111;
++		goto update_rf2_3;
++	}
++	if (rt2x00_rf(chip, RF5222)) {
++		rf_reg->rf3 = 0x00000101;
++		goto update_rf2_3;
++	}
++
++	return -EINVAL;
++
++update_rf2_1: /* RF2522. */
++	rf_reg->rf2 = 0x000c1fda + (index * 0x14);
++	if (channel == 14)
++		rf_reg->rf2 += 0x0000001c;
++	goto exit;
++
++update_rf2_2: /* RF2523, RF2524, RF2525. */
++	rf_reg->rf2 |= 0x00000c9e + (index * 0x04);
++	if (rf_reg->rf2 & 0x00000040)
++		rf_reg->rf2 += 0x00000040;
++	if (channel == 14) {
++		rf_reg->rf2 += 0x08;
++		rf_reg->rf4 &= ~0x00000018;
++	}
++	goto exit;
++
++update_rf2_3: /* RF2525E, RF5222. */
++	if (OFDM_CHANNEL(channel)) {
++		rf_reg->rf1 = 0x00022020;
++		rf_reg->rf2 |= 0x00001136 + (index * 0x04);
++		if (rf_reg->rf2 & 0x00000040)
++			rf_reg->rf2 += 0x00000040;
++		if (channel == 14) {
++			rf_reg->rf2 += 0x04;
++			rf_reg->rf4 = 0x00000a1b;
++		} else {
++			rf_reg->rf4 = 0x00000a0b;
++		}
++	} else if (UNII_LOW_CHANNEL(channel)) {
++		rf_reg->rf1 = 0x00022010;
++		rf_reg->rf2 = 0x00018896 + (index * 0x04);
++		rf_reg->rf4 = 0x00000a1f;
++	} else if (HIPERLAN2_CHANNEL(channel)) {
++		rf_reg->rf1 = 0x00022010;
++		rf_reg->rf2 = 0x00008802 + (index * 0x04);
++		rf_reg->rf4 = 0x00000a0f;
++	} else if (UNII_HIGH_CHANNEL(channel)) {
++		rf_reg->rf1 = 0x00022020;
++		rf_reg->rf2 = 0x000090a6 + (index * 0x08);
++		rf_reg->rf4 = 0x00000a07;
++	}
++
++exit:
++	rf_reg->rf1 = cpu_to_le32(rf_reg->rf1);
++	rf_reg->rf2 = cpu_to_le32(rf_reg->rf2);
++	rf_reg->rf3 = cpu_to_le32(rf_reg->rf3);
++	rf_reg->rf4 = cpu_to_le32(rf_reg->rf4);
++
++	return 0;
++}
++
++/*
++ * Get txpower value in dBm mathing the requested percentage.
++ */
++static inline u8 rt2x00_get_txpower(const struct _rt2x00_chip *chip,
++				    const u8 tx_power)
++{
++	return tx_power / 100 * 31;
++
++	/*
++      if(tx_power <= 3)
++      return 19;
++      else if(tx_power <= 12)
++      return 22;
++      else if(tx_power <= 25)
++      return 25;
++      else if(tx_power <= 50)
++      return 28;
++      else if(tx_power <= 75)
++      return 30;
++      else if(tx_power <= 100)
++      return 31;
++    
++      ERROR("Invalid tx_power.\n");
++      return 31;
++    */
++}
++
++/*
++ * Ring handlers.
++ */
++static inline int
++rt2x00_pci_alloc_ring(struct _rt2x00_core *core, struct _data_ring *ring,
++		      const u8 ring_type, const u16 max_entries,
++		      const u16 entry_size, const u16 desc_size)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++
++	rt2x00_init_ring(core, ring, ring_type, max_entries, entry_size,
++			 desc_size);
++
++	ring->data_addr =
++		dma_alloc_coherent(&rt2x00pci->pci_dev->dev, ring->mem_size,
++				   &ring->data_dma, GFP_KERNEL);
++	if (!ring->data_addr)
++		return -ENOMEM;
++
++	memset(ring->data_addr, 0x00, ring->mem_size);
++
++	return 0;
++}
++
++static int rt2x00_pci_alloc_rings(struct _rt2x00_core *core)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++
++	if (rt2x00_pci_alloc_ring(core, &rt2x00pci->rx, RING_RX, RX_ENTRIES,
++				  DATA_FRAME_SIZE, SIZE_DESCRIPTOR) ||
++	    rt2x00_pci_alloc_ring(core, &rt2x00pci->tx, RING_TX, TX_ENTRIES,
++				  DATA_FRAME_SIZE, SIZE_DESCRIPTOR)) {
++		ERROR("DMA allocation failed.\n");
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static inline void rt2x00_pci_free_ring(struct _data_ring *ring)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(ring->core);
++
++	if (ring->data_addr)
++		dma_free_coherent(&rt2x00pci->pci_dev->dev, ring->mem_size,
++				  ring->data_addr, ring->data_dma);
++	ring->data_addr = NULL;
++
++	rt2x00_deinit_ring(ring);
++}
++
++static void rt2x00_pci_free_rings(struct _rt2x00_core *core)
++{
++	struct _rt2x00_pci *rt2x00pci = rt2x00_priv(core);
++
++	rt2x00_pci_free_ring(&rt2x00pci->rx);
++	rt2x00_pci_free_ring(&rt2x00pci->tx);
++}
++
++/*
++ * Macro's for calculating exact position in data ring.
++ */
++#define DESC_BASE(__ring) ((void *)((__ring)->data_addr))
++#define DATA_BASE(__ring)                                                      \
++	((void *)(DESC_BASE(__ring) +                                          \
++		  ((__ring)->max_entries * (__ring)->desc_size)))
++
++#define __DESC_ADDR(__ring, __index)                                           \
++	((void *)(DESC_BASE(__ring) + ((__index) * (__ring)->desc_size)))
++#define __DATA_ADDR(__ring, __index)                                           \
++	((void *)(DATA_BASE(__ring) + ((__index) * (__ring)->entry_size)))
++
++#define DESC_ADDR(__ring) (__DESC_ADDR(__ring, (__ring)->index))
++#define DESC_ADDR_DONE(__ring) (__DESC_ADDR(__ring, (__ring)->index_done))
++
++#define DATA_ADDR(__ring) (__DATA_ADDR(__ring, (__ring)->index))
++#define DATA_ADDR_DONE(__ring) (__DATA_ADDR(__ring, (__ring)->index_done))
++
++/*
++ * Register access.
++ * All access to the registers will go through rt2x00_register_read and rt2x00_register_write.
++ * BBP and RF register require indirect register access through the register BBPCSR and RFCSR.
++ * The indirect register access work with busy bits, and a read or write function call can fail.
++ * Specific fields within a register can be accessed using the set and get field routines,
++ * these function will handle the requirement of little_endian and big_endian conversions.
++ */
++#define REGISTER_BUSY_COUNT                                                    \
++	10 /* Number of retries before failing access BBP & RF indirect register */
++#define REGISTER_BUSY_DELAY                                                    \
++	100 /* Delay between each register access retry. (us) */
++
++static void rt2x00_register_read(const struct _rt2x00_pci *rt2x00pci,
++				 const unsigned long offset, u32 *value)
++{
++	*value = readl((void *)(rt2x00pci->csr_addr + offset));
++}
++
++static void rt2x00_register_multiread(const struct _rt2x00_pci *rt2x00pci,
++				      const unsigned long offset, u32 *value,
++				      const u16 length)
++{
++	memcpy_fromio((void *)value, (void *)(rt2x00pci->csr_addr + offset),
++		      length);
++}
++
++static void rt2x00_register_write(const struct _rt2x00_pci *rt2x00pci,
++				  const unsigned long offset, const u32 value)
++{
++	writel(value, (void *)(rt2x00pci->csr_addr + offset));
++}
++
++static void rt2x00_register_multiwrite(const struct _rt2x00_pci *rt2x00pci,
++				       const unsigned long offset, u32 *value,
++				       const u16 length)
++{
++	memcpy_toio((void *)(rt2x00pci->csr_addr + offset), (void *)value,
++		    length);
++}
++
++static void rt2x00_bbp_regwrite(const struct _rt2x00_pci *rt2x00pci,
++				const u8 reg_id, const u8 value)
++{
++	u32 reg = 0x00000000;
++	u8 counter = 0x00;
++
++	for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) {
++		rt2x00_register_read(rt2x00pci, BBPCSR, &reg);
++		if (!rt2x00_get_field32(reg, BBPCSR_BUSY))
++			goto bbp_write;
++		udelay(REGISTER_BUSY_DELAY);
++	}
++
++	ERROR("BBPCSR register busy. Write failed\n");
++	return;
++
++bbp_write:
++	reg = 0x00000000;
++	rt2x00_set_field32(&reg, BBPCSR_VALUE, value);
++	rt2x00_set_field32(&reg, BBPCSR_REGNUM, reg_id);
++	rt2x00_set_field32(&reg, BBPCSR_BUSY, 1);
++	rt2x00_set_field32(&reg, BBPCSR_WRITE_CONTROL, 1);
++
++	rt2x00_register_write(rt2x00pci, BBPCSR, reg);
++}
++
++static void rt2x00_bbp_regread(const struct _rt2x00_pci *rt2x00pci,
++			       const u8 reg_id, u8 *value)
++{
++	u32 reg = 0x00000000;
++	u8 counter = 0x00;
++
++	/*
++     * We first have to acquire the requested BBP register,
++     * so we write the register id into the BBP register first.
++     */
++	rt2x00_set_field32(&reg, BBPCSR_REGNUM, reg_id);
++	rt2x00_set_field32(&reg, BBPCSR_BUSY, 1);
++	rt2x00_set_field32(&reg, BBPCSR_WRITE_CONTROL, 0);
++
++	rt2x00_register_write(rt2x00pci, BBPCSR, reg);
++
++	for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) {
++		rt2x00_register_read(rt2x00pci, BBPCSR, &reg);
++		if (!rt2x00_get_field32(reg, BBPCSR_BUSY)) {
++			*value = rt2x00_get_field32(reg, BBPCSR_VALUE);
++			return;
++		}
++		udelay(REGISTER_BUSY_DELAY);
++	}
++
++	ERROR("BBPCSR register busy. Read failed\n");
++	*value = 0xff;
++}
++
++static void rt2x00_rf_regwrite(const struct _rt2x00_pci *rt2x00pci,
++			       const u32 value)
++{
++	u32 reg = 0x00000000;
++	u8 counter = 0x00;
++
++	for (counter = 0x00; counter < REGISTER_BUSY_COUNT; counter++) {
++		rt2x00_register_read(rt2x00pci, RFCSR, &reg);
++		if (!rt2x00_get_field32(reg, RFCSR_BUSY))
++			goto rf_write;
++		udelay(REGISTER_BUSY_DELAY);
++	}
++
++	ERROR("RFCSR register busy. Write failed\n");
++	return;
++
++rf_write:
++	reg = value;
++	rt2x00_set_field32(&reg, RFCSR_NUMBER_OF_BITS, 20);
++	rt2x00_set_field32(&reg, RFCSR_IF_SELECT, 0);
++	rt2x00_set_field32(&reg, RFCSR_BUSY, 1);
++
++	//  printk(KERN_INFO "DEBUG: %s:%d: reg=%x\n", __FILE__, __LINE__, reg);
++
++	rt2x00_register_write(rt2x00pci, RFCSR, reg);
++}
++
++/*
++ * EEPROM access.
++ * The EEPROM is being accessed by word index.
++ * rt2x00_eeprom_read_word is the main access function that can be called by
++ * the rest of the module. It will take the index number of the eeprom word
++ * and the bus width.
++ */
++static inline void rt2x00_eeprom_pulse_high(const struct _rt2x00_pci *rt2x00pci,
++					    u32 *flags)
++{
++	rt2x00_set_field32(flags, CSR21_EEPROM_DATA_CLOCK, 1);
++	rt2x00_register_write(rt2x00pci, CSR21, *flags);
++	udelay(1);
++}
++
++static inline void rt2x00_eeprom_pulse_low(const struct _rt2x00_pci *rt2x00pci,
++					   u32 *flags)
++{
++	rt2x00_set_field32(flags, CSR21_EEPROM_DATA_CLOCK, 0);
++	rt2x00_register_write(rt2x00pci, CSR21, *flags);
++	udelay(1);
++}
++
++static void rt2x00_eeprom_shift_out_bits(const struct _rt2x00_pci *rt2x00pci,
++					 const u16 data, const u16 count)
++{
++	u32 flags = 0x00000000;
++	u32 mask = 0x0001 << (count - 1);
++
++	rt2x00_register_read(rt2x00pci, CSR21, &flags);
++
++	/*
++     * Clear data flags.
++     */
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0);
++
++	/*
++     * Start writing all bits. 
++     */
++	do {
++		/*
++         * Only set the data_in flag when we are at the correct bit.
++         */
++		rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN,
++				   (data & mask) ? 1 : 0);
++
++		rt2x00_register_write(rt2x00pci, CSR21, flags);
++
++		rt2x00_eeprom_pulse_high(rt2x00pci, &flags);
++		rt2x00_eeprom_pulse_low(rt2x00pci, &flags);
++
++		/*
++         * Shift to next bit.
++         */
++		mask >>= 1;
++	} while (mask);
++
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++	rt2x00_register_write(rt2x00pci, CSR21, flags);
++}
++
++static void rt2x00_eeprom_shift_in_bits(const struct _rt2x00_pci *rt2x00pci,
++					u16 *data)
++{
++	u32 flags = 0x00000000;
++	u8 counter = 0x00;
++
++	rt2x00_register_read(rt2x00pci, CSR21, &flags);
++
++	/*
++     * Clear data flags.
++     */
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0);
++
++	/*
++     * Start reading all 16 bits.
++     */
++	for (counter = 0; counter < 16; counter++) {
++		/*
++         * Shift to the next bit.
++         */
++		*data <<= 1;
++
++		rt2x00_eeprom_pulse_high(rt2x00pci, &flags);
++
++		rt2x00_register_read(rt2x00pci, CSR21, &flags);
++
++		/*
++         * Clear data_in flag and set the data bit to 1 when the data_out flag is set.
++         */
++		rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++		if (rt2x00_get_field32(flags, CSR21_EEPROM_DATA_OUT))
++			*data |= 1;
++
++		rt2x00_eeprom_pulse_low(rt2x00pci, &flags);
++	}
++}
++
++static u16 rt2x00_eeprom_read_word(const struct _rt2x00_pci *rt2x00pci,
++				   const u8 word)
++{
++	u32 flags = 0x00000000;
++	u16 data = 0x0000;
++
++	/*
++     * Clear all flags, and enable chip select.
++     */
++	rt2x00_register_read(rt2x00pci, CSR21, &flags);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_OUT, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_CLOCK, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_CHIP_SELECT, 1);
++	rt2x00_register_write(rt2x00pci, CSR21, flags);
++
++	/*
++     * kick a pulse.
++     */
++	rt2x00_eeprom_pulse_high(rt2x00pci, &flags);
++	rt2x00_eeprom_pulse_low(rt2x00pci, &flags);
++
++	/*
++     * Select the read opcode and bus_width.
++     */
++	rt2x00_eeprom_shift_out_bits(rt2x00pci, EEPROM_READ_OPCODE, 3);
++	rt2x00_eeprom_shift_out_bits(rt2x00pci, word, rt2x00pci->eeprom_width);
++
++	rt2x00_eeprom_shift_in_bits(rt2x00pci, &data);
++
++	/*
++     * Clear chip_select and data_in flags.
++     */
++	rt2x00_register_read(rt2x00pci, CSR21, &flags);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_DATA_IN, 0);
++	rt2x00_set_field32(&flags, CSR21_EEPROM_CHIP_SELECT, 0);
++	rt2x00_register_write(rt2x00pci, CSR21, flags);
++
++	/*
++     * kick a pulse.
++     */
++	rt2x00_eeprom_pulse_high(rt2x00pci, &flags);
++	rt2x00_eeprom_pulse_low(rt2x00pci, &flags);
++
++	return data;
++}
++
++#endif /* RT2500PCI_H */
+--- linux/drivers/xenomai/net/drivers/experimental/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/Makefile	2021-04-07 16:01:27.586633653 +0800
+@@ -0,0 +1,9 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_RT2500) += rt2500/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000_NEW) += e1000/
++
++obj-$(CONFIG_RTNET_DRV_3C59X) += rt_3c59x.o
++
++rt_3c59x-y := 3c59x.o
+--- linux/drivers/xenomai/net/drivers/experimental/3c59x.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/3c59x.c	2021-04-07 16:01:27.581633660 +0800
+@@ -0,0 +1,2749 @@
++#warning  *********************************************************************
++#warning  This driver is probably not real-time safe! Under certain conditions
++#warning  it can cause interrupt locks of up to 1 second (issue_and_wait). We
++#warning  need a rewrite of critical parts, but we are lacking the knowledge
++#warning  about the hardware details (e.g. how long does a normal delay take =>
++#warning  apply this value and throw an error message on timeouts).
++#warning  *********************************************************************
++
++/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux / RTnet. */
++/*
++  RTnet porting 2002 by Mathias Koehrer (mathias_koehrer@yahoo.de)
++  -- Support only for PCI boards, EISA stuff ignored...
++
++  Originally written 1996-1999 by Donald Becker.
++
++  This software may be used and distributed according to the terms
++  of the GNU General Public License, incorporated herein by reference.
++
++  This driver is for the 3Com "Vortex" and "Boomerang" series ethercards.
++  Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597
++  and the EtherLink XL 3c900 and 3c905 cards.
++
++  Problem reports and questions should be directed to
++  vortex@scyld.com
++
++  The author may be reached as becker@scyld.com, or C/O
++  Scyld Computing Corporation
++  410 Severn Ave., Suite 210
++  Annapolis MD 21403
++
++  Linux Kernel Additions:
++
++  0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates
++  0.99H+lk1.0 - Jeff Garzik <jgarzik@mandrakesoft.com>
++  Remove compatibility defines for kernel versions < 2.2.x.
++  Update for new 2.3.x module interface
++  LK1.1.2 (March 19, 2000)
++  * New PCI interface (jgarzik)
++
++  LK1.1.3 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
++  - Merged with 3c575_cb.c
++  - Don't set RxComplete in boomerang interrupt enable reg
++  - spinlock in vortex_timer to protect mdio functions
++  - disable local interrupts around call to vortex_interrupt in
++  vortex_tx_timeout() (So vortex_interrupt can use spin_lock())
++  - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl
++  - In vortex_start_xmit(), move the lock to _after_ we've altered
++  vp->cur_tx and vp->tx_full.  This defeats the race between
++  vortex_start_xmit() and vortex_interrupt which was identified
++  by Bogdan Costescu.
++  - Merged back support for six new cards from various sources
++  - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus
++  insertion oops)
++  - Tell it that 3c905C has NWAY for 100bT autoneg
++  - Fix handling of SetStatusEnd in 'Too much work..' code, as
++  per 2.3.99's 3c575_cb (Dave Hinds).
++  - Split ISR into two for vortex & boomerang
++  - Fix MOD_INC/DEC races
++  - Handle resource allocation failures.
++  - Fix 3CCFE575CT LED polarity
++  - Make tx_interrupt_mitigation the default
++
++  LK1.1.4 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
++  - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs.
++  - Put vortex_info_tbl into __devinitdata
++  - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well
++  as in the hardware.
++  - Increased the loop counter in issue_and_wait from 2,000 to 4,000.
++
++  LK1.1.5 28 April 2000, andrewm
++  - Added powerpc defines (John Daniel <jdaniel@etresoft.com> said these work...)
++  - Some extra diagnostics
++  - In vortex_error(), reset the Tx on maxCollisions.  Otherwise most
++  chips usually get a Tx timeout.
++  - Added extra_reset module parm
++  - Replaced some inline timer manip with mod_timer
++  (Franois romieu <Francois.Romieu@nic.fr>)
++  - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway
++  (this came across from 3c575_cb).
++
++  LK1.1.6 06 Jun 2000, andrewm
++  - Backed out the PPC defines.
++  - Use del_timer_sync(), mod_timer().
++  - Fix wrapped ulong comparison in boomerang_rx()
++  - Add IS_TORNADO, use it to suppress 3c905C checksum error msg
++  (Donald Becker, I Lee Hetherington <ilh@sls.lcs.mit.edu>)
++  - Replace union wn3_config with BFINS/BFEXT manipulation for
++  sparc64 (Pete Zaitcev, Peter Jones)
++  - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex):
++  do a netif_wake_queue() to better recover from errors. (Anders Pedersen,
++  Donald Becker)
++  - Print a warning on out-of-memory (rate limited to 1 per 10 secs)
++  - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland)
++
++  LK1.1.7 2 Jul 2000 andrewm
++  - Better handling of shared IRQs
++  - Reset the transmitter on a Tx reclaim error
++  - Fixed crash under OOM during vortex_open() (Mark Hemment)
++  - Fix Rx cessation problem during OOM (help from Mark Hemment)
++  - The spinlocks around the mdio access were blocking interrupts for 300uS.
++  Fix all this to use spin_lock_bh() within mdio_read/write
++  - Only write to TxFreeThreshold if it's a boomerang - other NICs don't
++  have one.
++  - Added 802.3x MAC-layer flow control support
++
++  LK1.1.8 13 Aug 2000 andrewm
++  - Ignore request_region() return value - already reserved if Cardbus.
++  - Merged some additional Cardbus flags from Don's 0.99Qk
++  - Some fixes for 3c556 (Fred Maciel)
++  - Fix for EISA initialisation (Jan Rekorajski)
++  - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers
++  - Fixed MII_XCVR_PWR for 3CCFE575CT
++  - Added INVERT_LED_PWR, used it.
++  - Backed out the extra_reset stuff
++
++  LK1.1.9 12 Sep 2000 andrewm
++  - Backed out the tx_reset_resume flags.  It was a no-op.
++  - In vortex_error, don't reset the Tx on txReclaim errors
++  - In vortex_error, don't reset the Tx on maxCollisions errors.
++  Hence backed out all the DownListPtr logic here.
++  - In vortex_error, give Tornado cards a partial TxReset on
++  maxCollisions (David Hinds).	Defined MAX_COLLISION_RESET for this.
++  - Redid some driver flags and device names based on pcmcia_cs-3.1.20.
++  - Fixed a bug where, if vp->tx_full is set when the interface
++  is downed, it remains set when the interface is upped.  Bad
++  things happen.
++
++  LK1.1.10 17 Sep 2000 andrewm
++  - Added EEPROM_8BIT for 3c555 (Fred Maciel)
++  - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg)
++  - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO"
++
++  LK1.1.11 13 Nov 2000 andrewm
++  - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER
++
++  LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1)
++  - Call pci_enable_device before we request our IRQ (Tobias Ringstrom)
++  - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra)
++  - Added extended issue_and_wait for the 3c905CX.
++  - Look for an MII on PHY index 24 first (3c905CX oddity).
++  - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger)
++  - Don't free skbs we don't own on oom path in vortex_open().
++
++  LK1.1.13 27 Jan 2001
++  - Added explicit `medialock' flag so we can truly
++  lock the media type down with `options'.
++  - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo <acme@conectiva.com.br>)
++  - Added and used EEPROM_NORESET for 3c556B PM resumes.
++  - Fixed leakage of vp->rx_ring.
++  - Break out separate HAS_HWCKSM device capability flag.
++  - Kill vp->tx_full (ANK)
++  - Merge zerocopy fragment handling (ANK?)
++
++  LK1.1.14 15 Feb 2001
++  - Enable WOL.  Can be turned on with `enable_wol' module option.
++  - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul)
++  - If a device's internalconfig register reports it has NWAY,
++  use it, even if autoselect is enabled.
++
++  LK1.1.15 6 June 2001 akpm
++  - Prevent double counting of received bytes (Lars Christensen)
++  - Add ethtool support (jgarzik)
++  - Add module parm descriptions (Andrzej M. Krzysztofowicz)
++  - Implemented alloc_etherdev() API
++  - Special-case the 'Tx error 82' message.
++
++  LK1.1.16 18 July 2001 akpm
++  - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM
++  - Lessen verbosity of bootup messages
++  - Fix WOL - use new PM API functions.
++  - Use netif_running() instead of vp->open in suspend/resume.
++  - Don't reset the interface logic on open/close/rmmod.  It upsets
++  autonegotiation, and hence DHCP (from 0.99T).
++  - Back out EEPROM_NORESET flag because of the above (we do it for all
++  NICs).
++  - Correct 3c982 identification string
++  - Rename wait_for_completion() to issue_and_wait() to avoid completion.h
++  clash.
++
++  - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
++  - Also see Documentation/networking/vortex.txt
++*/
++
++/*
++ * FIXME: This driver _could_ support MTU changing, but doesn't.  See Don's hamachi.c implementation
++ * as well as other drivers
++ *
++ * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k
++ * due to dead code elimination.  There will be some performance benefits from this due to
++ * elimination of all the tests and reduced cache footprint.
++ */
++
++
++#define DRV_NAME	"3c59x"
++#define DRV_VERSION	"LK1.1.16"
++#define DRV_RELDATE	"19 July 2001"
++
++
++
++/* A few values that may be tweaked. */
++/* Keep the ring sizes a power of two for efficiency. */
++#define TX_RING_SIZE	16
++#define RX_RING_SIZE	8 /*** RTnet ***/
++#define PKT_BUF_SZ		1536			/* Size of each temporary Rx buffer.*/
++
++/* "Knobs" that adjust features and parameters. */
++/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
++   Setting to > 1512 effectively disables this feature. */
++/*** RTnet ***/
++/*** RTnet ***/
++/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */
++static const int mtu = 1500;
++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
++static int max_interrupt_work = 32;
++/* Tx timeout interval (millisecs) */
++// *** RTnet ***
++//static int watchdog = 5000;
++// *** RTnet ***
++
++/* Allow aggregation of Tx interrupts.	Saves CPU load at the cost
++ * of possible Tx stalls if the system is blocking interrupts
++ * somewhere else.  Undefine this to disable.
++ */
++#define tx_interrupt_mitigation 1
++
++/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */
++#define vortex_debug debug
++#ifdef VORTEX_DEBUG
++static int vortex_debug = VORTEX_DEBUG;
++#else
++static int vortex_debug = 1;
++#endif
++
++#ifndef __OPTIMIZE__
++#error You must compile this file with the correct options!
++#error See the last lines of the source file.
++#error You must compile this driver with "-O".
++#endif
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/timer.h>
++#include <linux/errno.h>
++#include <linux/in.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/mii.h>
++#include <linux/init.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ethtool.h>
++#include <linux/highmem.h>
++#include <linux/uaccess.h>
++#include <asm/irq.h>			/* For NR_IRQS only. */
++#include <asm/bitops.h>
++#include <asm/io.h>
++
++// *** RTnet ***
++#include <rtnet_port.h>
++
++static int cards = INT_MAX;
++module_param(cards, int, 0444);
++MODULE_PARM_DESC(cards, "number of cards to be supported");
++// *** RTnet ***
++
++/* Kernel compatibility defines, some common to David Hinds' PCMCIA package.
++   This is only in the support-all-kernels source code. */
++
++#define RUN_AT(x) (jiffies + (x))
++
++#include <linux/delay.h>
++
++// *** RTnet - no power management ***
++#undef pci_set_power_state
++#define pci_set_power_state null_set_power_state
++static inline int null_set_power_state(struct pci_dev *dev, int state)
++{
++	return 0;
++}
++// *** RTnet ***
++
++
++static char version[] =
++	DRV_NAME " for RTnet : Donald Becker and others. www.scyld.com/network/vortex.html\n";
++
++MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
++MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver for RTnet "
++		DRV_VERSION " " DRV_RELDATE);
++MODULE_LICENSE("GPL");
++
++/* Operational parameter that usually are not changed. */
++
++/* The Vortex size is twice that of the original EtherLinkIII series: the
++   runtime register window, window 1, is now always mapped in.
++   The Boomerang size is twice as large as the Vortex -- it has additional
++   bus master control registers. */
++#define VORTEX_TOTAL_SIZE 0x20
++#define BOOMERANG_TOTAL_SIZE 0x40
++
++/* Set iff a MII transceiver on any interface requires mdio preamble.
++   This only set with the original DP83840 on older 3c905 boards, so the extra
++   code size of a per-interface flag is not worthwhile. */
++static char mii_preamble_required;
++
++#define PFX DRV_NAME ": "
++
++
++
++/*
++  Theory of Operation
++
++  I. Board Compatibility
++
++  This device driver is designed for the 3Com FastEtherLink and FastEtherLink
++  XL, 3Com's PCI to 10/100baseT adapters.  It also works with the 10Mbs
++  versions of the FastEtherLink cards.	The supported product IDs are
++  3c590, 3c592, 3c595, 3c597, 3c900, 3c905
++
++  The related ISA 3c515 is supported with a separate driver, 3c515.c, included
++  with the kernel source or available from
++  cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html
++
++  II. Board-specific settings
++
++  PCI bus devices are configured by the system at boot time, so no jumpers
++  need to be set on the board.	The system BIOS should be set to assign the
++  PCI INTA signal to an otherwise unused system IRQ line.
++
++  The EEPROM settings for media type and forced-full-duplex are observed.
++  The EEPROM media type should be left at the default "autoselect" unless using
++  10base2 or AUI connections which cannot be reliably detected.
++
++  III. Driver operation
++
++  The 3c59x series use an interface that's very similar to the previous 3c5x9
++  series.  The primary interface is two programmed-I/O FIFOs, with an
++  alternate single-contiguous-region bus-master transfer (see next).
++
++  The 3c900 "Boomerang" series uses a full-bus-master interface with separate
++  lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet,
++  DEC Tulip and Intel Speedo3.	The first chip version retains a compatible
++  programmed-I/O interface that has been removed in 'B' and subsequent board
++  revisions.
++
++  One extension that is advertised in a very large font is that the adapters
++  are capable of being bus masters.  On the Vortex chip this capability was
++  only for a single contiguous region making it far less useful than the full
++  bus master capability.  There is a significant performance impact of taking
++  an extra interrupt or polling for the completion of each transfer, as well
++  as difficulty sharing the single transfer engine between the transmit and
++  receive threads.  Using DMA transfers is a win only with large blocks or
++  with the flawed versions of the Intel Orion motherboard PCI controller.
++
++  The Boomerang chip's full-bus-master interface is useful, and has the
++  currently-unused advantages over other similar chips that queued transmit
++  packets may be reordered and receive buffer groups are associated with a
++  single frame.
++
++  With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme.
++  Rather than a fixed intermediate receive buffer, this scheme allocates
++  full-sized skbuffs as receive buffers.  The value RX_COPYBREAK is used as
++  the copying breakpoint: it is chosen to trade-off the memory wasted by
++  passing the full-sized skbuff to the queue layer for all frames vs. the
++  copying cost of copying a frame to a correctly-sized skbuff.
++
++  IIIC. Synchronization
++  The driver runs as two independent, single-threaded flows of control.  One
++  is the send-packet routine, which enforces single-threaded use by the
++  dev->tbusy flag.  The other thread is the interrupt handler, which is single
++  threaded by the hardware and other software.
++
++  IV. Notes
++
++  Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development
++  3c590, 3c595, and 3c900 boards.
++  The name "Vortex" is the internal 3Com project name for the PCI ASIC, and
++  the EISA version is called "Demon".  According to Terry these names come
++  from rides at the local amusement park.
++
++  The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes!
++  This driver only supports ethernet packets because of the skbuff allocation
++  limit of 4K.
++*/
++
++/* This table drives the PCI probe routines.  It's mostly boilerplate in all
++   of the drivers, and will likely be provided by some future kernel.
++*/
++enum pci_flags_bit {
++	PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
++	PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
++};
++
++enum {	IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
++	EEPROM_8BIT=0x10,	/* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
++	HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
++	INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800,
++	EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 };
++
++enum vortex_chips {
++	CH_3C590 = 0,
++	CH_3C592,
++	CH_3C597,
++	CH_3C595_1,
++	CH_3C595_2,
++
++	CH_3C595_3,
++	CH_3C900_1,
++	CH_3C900_2,
++	CH_3C900_3,
++	CH_3C900_4,
++
++	CH_3C900_5,
++	CH_3C900B_FL,
++	CH_3C905_1,
++	CH_3C905_2,
++	CH_3C905B_1,
++
++	CH_3C905B_2,
++	CH_3C905B_FX,
++	CH_3C905C,
++	CH_3C980,
++	CH_3C9805,
++
++	CH_3CSOHO100_TX,
++	CH_3C555,
++	CH_3C556,
++	CH_3C556B,
++	CH_3C575,
++
++	CH_3C575_1,
++	CH_3CCFE575,
++	CH_3CCFE575CT,
++	CH_3CCFE656,
++	CH_3CCFEM656,
++
++	CH_3CCFEM656_1,
++	CH_3C450,
++};
++
++
++/* note: this array directly indexed by above enums, and MUST
++ * be kept in sync with both the enums above, and the PCI device
++ * table below
++ */
++static struct vortex_chip_info {
++	const char *name;
++	int flags;
++	int drv_flags;
++	int io_size;
++} vortex_info_tbl[] = {
++#define EISA_TBL_OFFSET	0		/* Offset of this entry for vortex_eisa_init */
++	{"3c590 Vortex 10Mbps",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++	{"3c592 EISA 10Mbps Demon/Vortex",					/* AKPM: from Don's 3c59x_cb.c 0.49H */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++	{"3c597 EISA Fast Demon/Vortex",					/* AKPM: from Don's 3c59x_cb.c 0.49H */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++	{"3c595 Vortex 100baseTx",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++	{"3c595 Vortex 100baseT4",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++
++	{"3c595 Vortex 100base-MII",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
++	{"3c900 Boomerang 10baseT",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
++	{"3c900 Boomerang 10Mbps Combo",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
++	{"3c900 Cyclone 10Mbps TPO",						/* AKPM: from Don's 0.99M */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
++	{"3c900 Cyclone 10Mbps Combo",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++
++	{"3c900 Cyclone 10Mbps TPC",						/* AKPM: from Don's 0.99M */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++	{"3c900B-FL Cyclone 10base-FL",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++	{"3c905 Boomerang 100baseTx",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
++	{"3c905 Boomerang 100baseT4",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
++	{"3c905B Cyclone 100baseTx",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
++
++	{"3c905B Cyclone 10/100/BNC",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
++	{"3c905B-FX Cyclone 100baseFx",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++	{"3c905C Tornado",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
++	{"3c980 Cyclone",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++	{"3c982 Dual Port Server Cyclone",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++
++	{"3cSOHO100-TX Hurricane",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
++	{"3c555 Laptop Hurricane",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, },
++	{"3c556 Laptop Tornado",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR|
++	 HAS_HWCKSM, 128, },
++	{"3c556B Laptop Hurricane",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR|
++	 HAS_HWCKSM, 128, },
++	{"3c575 [Megahertz] 10/100 LAN	CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
++
++	{"3c575 Boomerang CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
++	{"3CCFE575BT Cyclone CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|
++	 INVERT_LED_PWR|HAS_HWCKSM, 128, },
++	{"3CCFE575CT Tornado CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
++	 MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
++	{"3CCFE656 Cyclone CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
++	 INVERT_LED_PWR|HAS_HWCKSM, 128, },
++	{"3CCFEM656B Cyclone+Winmodem CardBus",
++	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
++	 INVERT_LED_PWR|HAS_HWCKSM, 128, },
++
++	{"3CXFEM656C Tornado+Winmodem CardBus",			/* From pcmcia-cs-3.1.5 */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
++	 MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
++	{"3c450 HomePNA Tornado",						/* AKPM: from Don's 0.99Q */
++	 PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
++	{0,}, /* 0 terminated list. */
++};
++
++
++static struct pci_device_id vortex_pci_tbl[] = {
++	{ 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 },
++	{ 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 },
++	{ 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 },
++	{ 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 },
++	{ 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 },
++
++	{ 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 },
++	{ 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 },
++	{ 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 },
++	{ 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 },
++	{ 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 },
++
++	{ 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 },
++	{ 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL },
++	{ 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 },
++	{ 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 },
++	{ 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 },
++
++	{ 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 },
++	{ 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX },
++	{ 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C },
++	{ 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 },
++	{ 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 },
++
++	{ 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX },
++	{ 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 },
++	{ 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 },
++	{ 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B },
++	{ 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 },
++
++	{ 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 },
++	{ 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 },
++	{ 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT },
++	{ 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 },
++	{ 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 },
++
++	{ 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 },
++	{ 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 },
++	{0,}						/* 0 terminated list. */
++};
++MODULE_DEVICE_TABLE(pci, vortex_pci_tbl);
++
++
++/* Operational definitions.
++   These are not used by other compilation units and thus are not
++   exported in a ".h" file.
++
++   First the windows.  There are eight register windows, with the command
++   and status registers available in each.
++*/
++#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
++#define EL3_CMD 0x0e
++#define EL3_STATUS 0x0e
++
++/* The top five bits written to EL3_CMD are a command, the lower
++   11 bits are the parameter, if applicable.
++   Note that 11 parameters bits was fine for ethernet, but the new chip
++   can handle FDDI length frames (~4500 octets) and now parameters count
++   32-bit 'Dwords' rather than octets. */
++
++enum vortex_cmd {
++	TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
++	RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11,
++	UpStall = 6<<11, UpUnstall = (6<<11)+1,
++	DownStall = (6<<11)+2, DownUnstall = (6<<11)+3,
++	RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
++	FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
++	SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
++	SetTxThreshold = 18<<11, SetTxStart = 19<<11,
++	StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11,
++	StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,};
++
++/* The SetRxFilter command accepts the following classes: */
++enum RxFilter {
++	RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
++
++/* Bits in the general status register. */
++enum vortex_status {
++	IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004,
++	TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
++	IntReq = 0x0040, StatsFull = 0x0080,
++	DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10,
++	DMAInProgress = 1<<11,			/* DMA controller is still busy.*/
++	CmdInProgress = 1<<12,			/* EL3_CMD is still busy.*/
++};
++
++/* Register window 1 offsets, the window used in normal operation.
++   On the Vortex this window is always mapped at offsets 0x10-0x1f. */
++enum Window1 {
++	TX_FIFO = 0x10,  RX_FIFO = 0x10,  RxErrors = 0x14,
++	RxStatus = 0x18,  Timer=0x1A, TxStatus = 0x1B,
++	TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */
++};
++enum Window0 {
++	Wn0EepromCmd = 10,		/* Window 0: EEPROM command register. */
++	Wn0EepromData = 12,		/* Window 0: EEPROM results register. */
++	IntrStatus=0x0E,		/* Valid in all windows. */
++};
++enum Win0_EEPROM_bits {
++	EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0,
++	EEPROM_EWENB = 0x30,		/* Enable erasing/writing for 10 msec. */
++	EEPROM_EWDIS = 0x00,		/* Disable EWENB before 10 msec timeout. */
++};
++/* EEPROM locations. */
++enum eeprom_offset {
++	PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3,
++	EtherLink3ID=7, IFXcvrIO=8, IRQLine=9,
++	NodeAddr01=10, NodeAddr23=11, NodeAddr45=12,
++	DriverTune=13, Checksum=15};
++
++enum Window2 {			/* Window 2. */
++	Wn2_ResetOptions=12,
++};
++enum Window3 {			/* Window 3: MAC/config bits. */
++	Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8,
++};
++
++#define BFEXT(value, offset, bitcount)					\
++	((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1))
++
++#define BFINS(lhs, rhs, offset, bitcount)				\
++	(((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) |		\
++		(((rhs) & ((1 << (bitcount)) - 1)) << (offset)))
++
++#define RAM_SIZE(v)		BFEXT(v, 0, 3)
++#define RAM_WIDTH(v)	BFEXT(v, 3, 1)
++#define RAM_SPEED(v)	BFEXT(v, 4, 2)
++#define ROM_SIZE(v)		BFEXT(v, 6, 2)
++#define RAM_SPLIT(v)	BFEXT(v, 16, 2)
++#define XCVR(v)			BFEXT(v, 20, 4)
++#define AUTOSELECT(v)	BFEXT(v, 24, 1)
++
++enum Window4 {		/* Window 4: Xcvr/media bits. */
++	Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10,
++};
++enum Win4_Media_bits {
++	Media_SQE = 0x0008,		/* Enable SQE error counting for AUI. */
++	Media_10TP = 0x00C0,	/* Enable link beat and jabber for 10baseT. */
++	Media_Lnk = 0x0080,		/* Enable just link beat for 100TX/100FX. */
++	Media_LnkBeat = 0x0800,
++};
++enum Window7 {					/* Window 7: Bus Master control. */
++	Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12,
++};
++/* Boomerang bus master control registers. */
++enum MasterCtrl {
++	PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c,
++	TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38,
++};
++
++/* The Rx and Tx descriptor lists.
++   Caution Alpha hackers: these types are 32 bits!  Note also the 8 byte
++   alignment contraint on tx_ring[] and rx_ring[]. */
++#define LAST_FRAG	0x80000000			/* Last Addr/Len pair in descriptor. */
++#define DN_COMPLETE	0x00010000			/* This packet has been downloaded */
++struct boom_rx_desc {
++	u32 next;					/* Last entry points to 0.   */
++	s32 status;
++	u32 addr;					/* Up to 63 addr/len pairs possible. */
++	s32 length;					/* Set LAST_FRAG to indicate last pair. */
++};
++/* Values for the Rx status entry. */
++enum rx_desc_status {
++	RxDComplete=0x00008000, RxDError=0x4000,
++	/* See boomerang_rx() for actual error bits */
++	IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27,
++	IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31,
++};
++
++// *** RTnet ***
++//#ifdef MAX_SKB_FRAGS
++//#define DO_ZEROCOPY 1
++//#else
++#define DO_ZEROCOPY 0
++//#endif
++
++struct boom_tx_desc {
++	u32 next;					/* Last entry points to 0.   */
++	s32 status;					/* bits 0:12 length, others see below.	*/
++#if DO_ZEROCOPY
++	struct {
++		u32 addr;
++		s32 length;
++	} frag[1+MAX_SKB_FRAGS];
++#else
++	u32 addr;
++	s32 length;
++#endif
++};
++
++/* Values for the Tx status entry. */
++enum tx_desc_status {
++	CRCDisable=0x2000, TxDComplete=0x8000,
++	AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000,
++	TxIntrUploaded=0x80000000,		/* IRQ when in FIFO, but maybe not sent. */
++};
++
++/* Chip features we care about in vp->capabilities, read from the EEPROM. */
++enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 };
++
++struct vortex_private {
++	/* The Rx and Tx rings should be quad-word-aligned. */
++	struct boom_rx_desc* rx_ring;
++	struct boom_tx_desc* tx_ring;
++	dma_addr_t rx_ring_dma;
++	dma_addr_t tx_ring_dma;
++	/* The addresses of transmit- and receive-in-place skbuffs. */
++
++	// *** RTnet ***
++	struct rtskb *tx_skbuff[TX_RING_SIZE];
++	struct rtskb *rx_skbuff[RX_RING_SIZE];
++	// *** RTnet ***
++
++	struct rtnet_device *next_module;		/* NULL if PCI device */
++	unsigned int cur_rx, cur_tx;		/* The next free ring entry */
++	unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
++	struct net_device_stats stats;
++	struct rtskb *tx_skb;				/* Packet being eaten by bus master ctrl.  */
++	dma_addr_t tx_skb_dma;				/* Allocated DMA address for bus master ctrl DMA.   */
++
++	/* PCI configuration space information. */
++	struct pci_dev *pdev;
++	char *cb_fn_base;					/* CardBus function status addr space. */
++
++	/* Some values here only for performance evaluation and path-coverage */
++	int rx_nocopy, rx_copy, queued_packet, rx_csumhits;
++	int card_idx;
++
++	/* The remainder are related to chip state, mostly media selection. */
++	struct timer_list timer;			/* Media selection timer. */
++	struct timer_list rx_oom_timer;		/* Rx skb allocation retry timer */
++	int options;						/* User-settable misc. driver options. */
++	unsigned int media_override:4,		/* Passed-in media type. */
++		default_media:4,				/* Read from the EEPROM/Wn3_Config. */
++		full_duplex:1, force_fd:1, autoselect:1,
++		bus_master:1,					/* Vortex can only do a fragment bus-m. */
++		full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang  */
++		flow_ctrl:1,					/* Use 802.3x flow control (PAUSE only) */
++		partner_flow_ctrl:1,			/* Partner supports flow control */
++		has_nway:1,
++		enable_wol:1,					/* Wake-on-LAN is enabled */
++		pm_state_valid:1,				/* power_state[] has sane contents */
++		open:1,
++		medialock:1,
++		must_free_region:1;				/* Flag: if zero, Cardbus owns the I/O region */
++	int drv_flags;
++	u16 status_enable;
++	u16 intr_enable;
++	u16 available_media;				/* From Wn3_Options. */
++	u16 capabilities, info1, info2;		/* Various, from EEPROM. */
++	u16 advertising;					/* NWay media advertisement */
++	unsigned char phys[2];				/* MII device addresses. */
++	u16 deferred;						/* Resend these interrupts when we
++								 * bale from the ISR */
++	u16 io_size;						/* Size of PCI region (for release_region) */
++	rtdm_lock_t lock;					/* Serialise access to device & its vortex_private */
++	spinlock_t mdio_lock;				/* Serialise access to mdio hardware */
++	u32 power_state[16];
++	rtdm_irq_t irq_handle;
++};
++
++/* The action to take with a media selection timer tick.
++   Note that we deviate from the 3Com order by checking 10base2 before AUI.
++*/
++enum xcvr_types {
++	XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx,
++	XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10,
++};
++
++static struct media_table {
++	char *name;
++	unsigned int media_bits:16,		/* Bits to set in Wn4_Media register. */
++		mask:8,						/* The transceiver-present bit in Wn3_Config.*/
++		next:8;						/* The media type to try next. */
++	int wait;						/* Time before we check media status. */
++} media_tbl[] = {
++	{	"10baseT",   Media_10TP,0x08, XCVR_10base2, (14*HZ)/10},
++	{ "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10},
++	{ "undefined", 0,			0x80, XCVR_10baseT, 10000},
++	{ "10base2",   0,			0x10, XCVR_AUI,		(1*HZ)/10},
++	{ "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10},
++	{ "100baseFX", Media_Lnk, 0x04, XCVR_MII,		(14*HZ)/10},
++	{ "MII",               0,			0x41, XCVR_10baseT, 3*HZ },
++	{ "undefined", 0,			0x01, XCVR_10baseT, 10000},
++	{ "Autonegotiate", 0,		0x41, XCVR_10baseT, 3*HZ},
++	{ "MII-External",      0,		0x41, XCVR_10baseT, 3*HZ },
++	{ "Default",   0,			0xFF, XCVR_10baseT, 10000},
++};
++
++static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq,
++			int chip_idx, int card_idx);
++static void vortex_up(struct rtnet_device *rtdev);
++static void vortex_down(struct rtnet_device *rtdev);
++static int vortex_open(struct rtnet_device *rtdev);
++static void mdio_sync(long ioaddr, int bits);
++static int mdio_read(struct rtnet_device *rtdev, int phy_id, int location);
++static void mdio_write(struct rtnet_device *vp, int phy_id, int location, int value);
++
++// *** RTnet ***
++//static void vortex_timer(unsigned long arg);
++//static void rx_oom_timer(unsigned long arg);
++// *** RTnet ***
++
++static int vortex_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static int boomerang_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static int vortex_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp);
++static int boomerang_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp);
++static int vortex_interrupt(rtdm_irq_t *irq_handle);
++static int boomerang_interrupt(rtdm_irq_t *irq_handle);
++static int vortex_close(struct rtnet_device *rtdev);
++static void dump_tx_ring(struct rtnet_device *rtdev);
++
++static void update_stats(long ioaddr, struct rtnet_device *dev);
++static struct net_device_stats *vortex_get_stats(struct rtnet_device *rtdev);
++
++static void set_rx_mode(struct rtnet_device *rtdev);
++
++// *** RTnet ***
++//static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
++//static void vortex_tx_timeout(struct net_device *dev);
++// *** RTnet ***
++
++static void acpi_set_WOL(struct rtnet_device *rtdev);
++
++/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */
++/* Option count limit only -- unlimited interfaces are supported. */
++#define MAX_UNITS 8
++static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,};
++static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++
++module_param(debug, int, 0444);
++module_param_array(options, int, NULL, 0444);
++module_param_array(full_duplex, int, NULL, 0444);
++module_param_array(hw_checksums, int, NULL, 0444);
++module_param_array(flow_ctrl, int, NULL, 0444);
++module_param_array(enable_wol, int, NULL, 0444);
++/*** RTnet ***
++     MODULE_PARM(rx_copybreak, "i");
++     *** RTnet ***/
++module_param(max_interrupt_work, int, 0444);
++/*** RTnet ***
++     MODULE_PARM(compaq_ioaddr, "i");
++     MODULE_PARM(compaq_irq, "i");
++     MODULE_PARM(compaq_device_id, "i");
++     MODULE_PARM(watchdog, "i");
++     *** RTnet ***/
++MODULE_PARM_DESC(debug, "3c59x debug level (0-6)");
++MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex");
++MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)");
++MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)");
++MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)");
++MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)");
++/*** RTnet ***
++     MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames");
++     *** RTnet ***/
++MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt");
++/*** RTnet ***
++     MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)");
++     MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)");
++     MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)");
++     MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds");
++     *** RTnet ***/
++
++/* #define dev_alloc_skb dev_alloc_skb_debug */
++
++/* A list of all installed Vortex EISA devices, for removing the driver module. */
++static struct rtnet_device *root_vortex_eisa_dev;
++
++/* Variables to work-around the Compaq PCI BIOS32 problem. */
++// *** RTnet ***
++//static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900;
++// *** RTnet ***
++
++static int vortex_cards_found;
++
++#ifdef CONFIG_PM
++
++#endif /* CONFIG_PM */
++
++/* returns count found (>= 0), or negative on error */
++
++/* returns count (>= 0), or negative on error */
++static int vortex_init_one (struct pci_dev *pdev,
++			const struct pci_device_id *ent)
++{
++	int rc;
++
++	if( vortex_cards_found >= cards )
++		return -ENODEV;
++
++	/* wake up and enable device */
++	if (pci_enable_device (pdev)) {
++		rc = -EIO;
++	} else {
++		rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
++				ent->driver_data, vortex_cards_found);
++		if (rc == 0)
++			vortex_cards_found++;
++	}
++	return rc;
++}
++
++/*
++ * Start up the PCI device which is described by *pdev.
++ * Return 0 on success.
++ *
++ * NOTE: pdev can be NULL, for the case of an EISA driver
++ */
++static int vortex_probe1(struct pci_dev *pdev,
++			long ioaddr, int irq,
++			int chip_idx, int card_idx)
++{
++	// *** RTnet ***
++	struct rtnet_device *rtdev = NULL;
++	// *** RTnet ***
++
++	struct vortex_private *vp;
++	int option;
++	unsigned int eeprom[0x40], checksum = 0;		/* EEPROM contents */
++	int i, step;
++	static int printed_version;
++	int retval, print_info;
++	struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx];
++	const char *print_name;
++
++
++
++	if (!printed_version) {
++		printk (version);
++		printed_version = 1;
++	}
++
++	print_name = pdev ? pci_name(pdev) : "3c59x";
++
++	// *** RTnet ***
++	rtdev = rt_alloc_etherdev(sizeof(*vp), RX_RING_SIZE * 2 + TX_RING_SIZE);
++	retval = -ENOMEM;
++	if (!rtdev) {
++		printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n");
++		goto out;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++	memset(rtdev->priv, 0, sizeof(*vp));
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++	// *** RTnet ***
++
++	vp = rtdev->priv;
++
++	/* The lower four bits are the media type. */
++	if (rtdev->mem_start) {
++		/*
++		 * The 'options' param is passed in as the third arg to the
++		 * LILO 'ether=' argument for non-modular use
++		 */
++		option = rtdev->mem_start;
++	}
++	else if (card_idx < MAX_UNITS)
++		option = options[card_idx];
++	else
++		option = -1;
++
++	if (option > 0) {
++		if (option & 0x8000)
++			vortex_debug = 7;
++		if (option & 0x4000)
++			vortex_debug = 2;
++		if (option & 0x0400)
++			vp->enable_wol = 1;
++	}
++
++	print_info = (vortex_debug > 1);
++	if (print_info)
++		printk (KERN_INFO "See Documentation/networking/vortex.txt\n");
++
++	printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n",
++		print_name,
++		pdev ? "PCI" : "EISA",
++		vci->name,
++		ioaddr);
++
++	rtdev->base_addr = ioaddr;
++	rtdev->irq = irq;
++	rtdev->mtu = mtu;
++	vp->drv_flags = vci->drv_flags;
++	vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0;
++	vp->io_size = vci->io_size;
++	vp->card_idx = card_idx;
++
++	/* module list only for EISA devices */
++	if (pdev == NULL) {
++		vp->next_module = root_vortex_eisa_dev;
++		root_vortex_eisa_dev = rtdev;
++	}
++
++	/* PCI-only startup logic */
++	if (pdev) {
++		/* EISA resources already marked, so only PCI needs to do this here */
++		/* Ignore return value, because Cardbus drivers already allocate for us */
++		if (!request_region(ioaddr, vci->io_size, print_name))
++			printk(KERN_INFO "rt_3c50x: request region failed\n");
++		else
++			vp->must_free_region = 1;
++
++		/* enable bus-mastering if necessary */
++		if (vci->flags & PCI_USES_MASTER)
++			pci_set_master (pdev);
++
++		if (vci->drv_flags & IS_VORTEX) {
++			u8 pci_latency;
++			u8 new_latency = 248;
++
++			/* Check the PCI latency value.  On the 3c590 series the latency timer
++			   must be set to the maximum value to avoid data corruption that occurs
++			   when the timer expires during a transfer.  This bug exists the Vortex
++			   chip only. */
++			pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);
++			if (pci_latency < new_latency) {
++				printk(KERN_INFO "%s: Overriding PCI latency"
++					" timer (CFLT) setting of %d, new value is %d.\n",
++					print_name, pci_latency, new_latency);
++				pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency);
++			}
++		}
++	}
++
++	rtdm_lock_init(&vp->lock);
++	spin_lock_init(&vp->mdio_lock);
++	vp->pdev = pdev;
++
++	/* Makes sure rings are at least 16 byte aligned. */
++	vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
++					+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
++					&vp->rx_ring_dma);
++	retval = -ENOMEM;
++	if (vp->rx_ring == 0)
++		goto free_region;
++
++	vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE);
++	vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE;
++
++	/* if we are a PCI driver, we store info in pdev->driver_data
++	 * instead of a module list */
++	if (pdev)
++		pci_set_drvdata(pdev, rtdev);
++
++	vp->media_override = 7;
++	if (option >= 0) {
++		vp->media_override = ((option & 7) == 2)  ?  0	:  option & 15;
++		if (vp->media_override != 7)
++			vp->medialock = 1;
++		vp->full_duplex = (option & 0x200) ? 1 : 0;
++		vp->bus_master = (option & 16) ? 1 : 0;
++	}
++
++	if (card_idx < MAX_UNITS) {
++		if (full_duplex[card_idx] > 0)
++			vp->full_duplex = 1;
++		if (flow_ctrl[card_idx] > 0)
++			vp->flow_ctrl = 1;
++		if (enable_wol[card_idx] > 0)
++			vp->enable_wol = 1;
++	}
++
++	vp->force_fd = vp->full_duplex;
++	vp->options = option;
++
++	/* Read the station address from the EEPROM. */
++	EL3WINDOW(0);
++	{
++		int base;
++
++		if (vci->drv_flags & EEPROM_8BIT)
++			base = 0x230;
++		else if (vci->drv_flags & EEPROM_OFFSET)
++			base = EEPROM_Read + 0x30;
++		else
++			base = EEPROM_Read;
++
++		for (i = 0; i < 0x40; i++) {
++			int timer;
++			outw(base + i, ioaddr + Wn0EepromCmd);
++			/* Pause for at least 162 us. for the read to take place. */
++			for (timer = 10; timer >= 0; timer--) {
++				udelay(162);
++				if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0)
++					break;
++			}
++			eeprom[i] = inw(ioaddr + Wn0EepromData);
++		}
++	}
++	for (i = 0; i < 0x18; i++)
++		checksum ^= eeprom[i];
++	checksum = (checksum ^ (checksum >> 8)) & 0xff;
++	if (checksum != 0x00) {		/* Grrr, needless incompatible change 3Com. */
++		while (i < 0x21)
++			checksum ^= eeprom[i++];
++		checksum = (checksum ^ (checksum >> 8)) & 0xff;
++	}
++	if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO))
++		printk(" ***INVALID CHECKSUM %4.4x*** ", checksum);
++
++	for (i = 0; i < 3; i++)
++		((u16 *)rtdev->dev_addr)[i] = htons(eeprom[i + 10]);
++	if (print_info) {
++		for (i = 0; i < 6; i++)
++			printk("%c%2.2x", i ? ':' : ' ', rtdev->dev_addr[i]);
++	}
++	EL3WINDOW(2);
++	for (i = 0; i < 6; i++)
++		outb(rtdev->dev_addr[i], ioaddr + i);
++
++#ifdef __sparc__
++	if (print_info)
++		printk(", IRQ %s\n", __irq_itoa(rtdev->irq));
++#else
++	if (print_info)
++		printk(", IRQ %d\n", rtdev->irq);
++	/* Tell them about an invalid IRQ. */
++	if (rtdev->irq <= 0 || rtdev->irq >= NR_IRQS)
++		printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
++			rtdev->irq);
++#endif
++
++	EL3WINDOW(4);
++	step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1;
++	if (print_info) {
++		printk(KERN_INFO "  product code %02x%02x rev %02x.%d date %02d-"
++			"%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14],
++			step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9);
++	}
++
++
++	if (pdev && vci->drv_flags & HAS_CB_FNS) {
++		unsigned long fn_st_addr;			/* Cardbus function status space */
++		unsigned short n;
++
++		fn_st_addr = pci_resource_start (pdev, 2);
++		if (fn_st_addr) {
++			vp->cb_fn_base = ioremap(fn_st_addr, 128);
++			retval = -ENOMEM;
++			if (!vp->cb_fn_base)
++				goto free_ring;
++		}
++		if (print_info) {
++			printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n",
++				print_name, fn_st_addr, vp->cb_fn_base);
++		}
++		EL3WINDOW(2);
++
++		n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
++		if (vp->drv_flags & INVERT_LED_PWR)
++			n |= 0x10;
++		if (vp->drv_flags & INVERT_MII_PWR)
++			n |= 0x4000;
++		outw(n, ioaddr + Wn2_ResetOptions);
++	}
++
++	/* Extract our information from the EEPROM data. */
++	vp->info1 = eeprom[13];
++	vp->info2 = eeprom[15];
++	vp->capabilities = eeprom[16];
++
++	if (vp->info1 & 0x8000) {
++		vp->full_duplex = 1;
++		if (print_info)
++			printk(KERN_INFO "Full duplex capable\n");
++	}
++
++	{
++		static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
++		unsigned int config;
++		EL3WINDOW(3);
++		vp->available_media = inw(ioaddr + Wn3_Options);
++		if ((vp->available_media & 0xff) == 0)		/* Broken 3c916 */
++			vp->available_media = 0x40;
++		config = inl(ioaddr + Wn3_Config);
++		if (print_info) {
++			printk(KERN_DEBUG "  Internal config register is %4.4x, "
++				"transceivers %#x.\n", config, inw(ioaddr + Wn3_Options));
++			printk(KERN_INFO "  %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n",
++				8 << RAM_SIZE(config),
++				RAM_WIDTH(config) ? "word" : "byte",
++				ram_split[RAM_SPLIT(config)],
++				AUTOSELECT(config) ? "autoselect/" : "",
++				XCVR(config) > XCVR_ExtMII ? "<invalid transceiver>" :
++				media_tbl[XCVR(config)].name);
++		}
++		vp->default_media = XCVR(config);
++		if (vp->default_media == XCVR_NWAY)
++			vp->has_nway = 1;
++		vp->autoselect = AUTOSELECT(config);
++	}
++
++	if (vp->media_override != 7) {
++		printk(KERN_INFO "%s:  Media override to transceiver type %d (%s).\n",
++			print_name, vp->media_override,
++			media_tbl[vp->media_override].name);
++		rtdev->if_port = vp->media_override;
++	} else
++		rtdev->if_port = vp->default_media;
++
++	if (rtdev->if_port == XCVR_MII || rtdev->if_port == XCVR_NWAY) {
++		int phy, phy_idx = 0;
++		EL3WINDOW(4);
++		mii_preamble_required++;
++		mii_preamble_required++;
++		mdio_read(rtdev, 24, 1);
++		for (phy = 0; phy < 32 && phy_idx < 1; phy++) {
++			int mii_status, phyx;
++
++			/*
++			 * For the 3c905CX we look at index 24 first, because it bogusly
++			 * reports an external PHY at all indices
++			 */
++			if (phy == 0)
++				phyx = 24;
++			else if (phy <= 24)
++				phyx = phy - 1;
++			else
++				phyx = phy;
++			mii_status = mdio_read(rtdev, phyx, 1);
++			if (mii_status	&&  mii_status != 0xffff) {
++				vp->phys[phy_idx++] = phyx;
++				if (print_info) {
++					printk(KERN_INFO "  MII transceiver found at address %d,"
++						" status %4x.\n", phyx, mii_status);
++				}
++				if ((mii_status & 0x0040) == 0)
++					mii_preamble_required++;
++			}
++		}
++		mii_preamble_required--;
++		if (phy_idx == 0) {
++			printk(KERN_WARNING"  ***WARNING*** No MII transceivers found!\n");
++			vp->phys[0] = 24;
++		} else {
++			vp->advertising = mdio_read(rtdev, vp->phys[0], 4);
++			if (vp->full_duplex) {
++				/* Only advertise the FD media types. */
++				vp->advertising &= ~0x02A0;
++				mdio_write(rtdev, vp->phys[0], 4, vp->advertising);
++			}
++		}
++	}
++
++	if (vp->capabilities & CapBusMaster) {
++		vp->full_bus_master_tx = 1;
++		if (print_info) {
++			printk(KERN_INFO "  Enabling bus-master transmits and %s receives.\n",
++				(vp->info2 & 1) ? "early" : "whole-frame" );
++		}
++		vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2;
++		vp->bus_master = 0;		/* AKPM: vortex only */
++	}
++
++	// *** RTnet ***
++	/* The 3c59x-specific entries in the device structure. */
++	rtdev->open = vortex_open;
++	if (vp->full_bus_master_tx) {
++		rtdev->hard_start_xmit = boomerang_start_xmit;
++		/* Actually, it still should work with iommu. */
++		rtdev->features |= NETIF_F_SG;
++		if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) ||
++			(hw_checksums[card_idx] == 1)) {
++			rtdev->features |= NETIF_F_IP_CSUM;
++		}
++	} else {
++		rtdev->hard_start_xmit = vortex_start_xmit;
++	}
++	rtdev->get_stats = vortex_get_stats;
++
++	if (print_info) {
++		printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n",
++			print_name,
++			(rtdev->features & NETIF_F_SG) ? "en":"dis",
++			(rtdev->features & NETIF_F_IP_CSUM) ? "en":"dis");
++	}
++
++	rtdev->stop = vortex_close;
++	retval = rt_register_rtnetdev(rtdev);
++	if (retval) {
++		printk(KERN_ERR "rt_3c59x: rtnet device registration failed %d\n",retval);
++		goto free_ring;
++	}
++	return 0;
++
++	// *** RTnet ***
++
++  free_ring:
++	pci_free_consistent(pdev,
++			sizeof(struct boom_rx_desc) * RX_RING_SIZE
++			+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
++			vp->rx_ring,
++			vp->rx_ring_dma);
++  free_region:
++	if (vp->must_free_region)
++		release_region(ioaddr, vci->io_size);
++	rtdev_free (rtdev);
++	printk(KERN_ERR PFX "vortex_probe1 fails.  Returns %d\n", retval);
++  out:
++	return retval;
++}
++
++static void
++issue_and_wait(struct rtnet_device *rtdev, int cmd)
++{
++	int i;
++
++	outw(cmd, rtdev->base_addr + EL3_CMD);
++	for (i = 0; i < 2000; i++) {
++		if (!(inw(rtdev->base_addr + EL3_STATUS) & CmdInProgress))
++			return;
++	}
++
++	/* OK, that didn't work.  Do it the slow way.  One second */
++	for (i = 0; i < 100000; i++) {
++		if (!(inw(rtdev->base_addr + EL3_STATUS) & CmdInProgress)) {
++			if (vortex_debug > 1)
++				rtdm_printk(KERN_INFO "%s: command 0x%04x took %d usecs\n",
++					rtdev->name, cmd, i * 10);
++			return;
++		}
++		udelay(10);
++	}
++	rtdm_printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n",
++		rtdev->name, cmd, inw(rtdev->base_addr + EL3_STATUS));
++}
++
++static void
++vortex_up(struct rtnet_device *rtdev)
++{
++	long ioaddr = rtdev->base_addr;
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	unsigned int config;
++	int i;
++
++	if (vp->pdev && vp->enable_wol) {
++		pci_set_power_state(vp->pdev, 0);	/* Go active */
++		pci_restore_state(vp->pdev, vp->power_state);
++	}
++
++	/* Before initializing select the active media port. */
++	EL3WINDOW(3);
++	config = inl(ioaddr + Wn3_Config);
++
++	if (vp->media_override != 7) {
++		printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n",
++			rtdev->name, vp->media_override,
++			media_tbl[vp->media_override].name);
++		rtdev->if_port = vp->media_override;
++	} else if (vp->autoselect) {
++		if (vp->has_nway) {
++			if (vortex_debug > 1)
++				printk(KERN_INFO "%s: using NWAY device table, not %d\n",
++					rtdev->name, rtdev->if_port);
++			rtdev->if_port = XCVR_NWAY;
++		} else {
++			/* Find first available media type, starting with 100baseTx. */
++			rtdev->if_port = XCVR_100baseTx;
++			while (! (vp->available_media & media_tbl[rtdev->if_port].mask))
++				rtdev->if_port = media_tbl[rtdev->if_port].next;
++			if (vortex_debug > 1)
++				printk(KERN_INFO "%s: first available media type: %s\n",
++					rtdev->name, media_tbl[rtdev->if_port].name);
++		}
++	} else {
++		rtdev->if_port = vp->default_media;
++		if (vortex_debug > 1)
++			printk(KERN_INFO "%s: using default media %s\n",
++				rtdev->name, media_tbl[rtdev->if_port].name);
++	}
++
++	init_timer(&vp->timer);
++	vp->timer.expires = RUN_AT(media_tbl[rtdev->if_port].wait);
++	vp->timer.data = (unsigned long)rtdev;
++	// *** RTnet  vp->timer.function = vortex_timer;		/* timer handler */
++	// *** RTnet  add_timer(&vp->timer);
++
++	init_timer(&vp->rx_oom_timer);
++	vp->rx_oom_timer.data = (unsigned long)rtdev;
++	// **** RTnet *** vp->rx_oom_timer.function = rx_oom_timer;
++
++	if (vortex_debug > 1)
++		printk(KERN_DEBUG "%s: Initial media type %s.\n",
++			rtdev->name, media_tbl[rtdev->if_port].name);
++
++	vp->full_duplex = vp->force_fd;
++	config = BFINS(config, rtdev->if_port, 20, 4);
++	if (vortex_debug > 6)
++		printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config);
++	outl(config, ioaddr + Wn3_Config);
++
++	if (rtdev->if_port == XCVR_MII || rtdev->if_port == XCVR_NWAY) {
++		int mii_reg1, mii_reg5;
++		EL3WINDOW(4);
++		/* Read BMSR (reg1) only to clear old status. */
++		mii_reg1 = mdio_read(rtdev, vp->phys[0], 1);
++		mii_reg5 = mdio_read(rtdev, vp->phys[0], 5);
++		if (mii_reg5 == 0xffff	||  mii_reg5 == 0x0000)
++			;					/* No MII device or no link partner report */
++		else if ((mii_reg5 & 0x0100) != 0	/* 100baseTx-FD */
++			|| (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */
++			vp->full_duplex = 1;
++		vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
++		if (vortex_debug > 1)
++			printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x,"
++				" info1 %04x, setting %s-duplex.\n",
++				rtdev->name, vp->phys[0],
++				mii_reg1, mii_reg5,
++				vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half");
++		EL3WINDOW(3);
++	}
++
++	/* Set the full-duplex bit. */
++	outw(	((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) |
++		(rtdev->mtu > 1500 ? 0x40 : 0) |
++		((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
++		ioaddr + Wn3_MAC_Ctrl);
++
++	if (vortex_debug > 1) {
++		printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n",
++			rtdev->name, config);
++	}
++
++	issue_and_wait(rtdev, TxReset);
++	/*
++	 * Don't reset the PHY - that upsets autonegotiation during DHCP operations.
++	 */
++	issue_and_wait(rtdev, RxReset|0x04);
++
++	outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
++
++	if (vortex_debug > 1) {
++		EL3WINDOW(4);
++		printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n",
++			rtdev->name, rtdev->irq, inw(ioaddr + Wn4_Media));
++	}
++
++	/* Set the station address and mask in window 2 each time opened. */
++	EL3WINDOW(2);
++	for (i = 0; i < 6; i++)
++		outb(rtdev->dev_addr[i], ioaddr + i);
++	for (; i < 12; i+=2)
++		outw(0, ioaddr + i);
++
++	if (vp->cb_fn_base) {
++		unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
++		if (vp->drv_flags & INVERT_LED_PWR)
++			n |= 0x10;
++		if (vp->drv_flags & INVERT_MII_PWR)
++			n |= 0x4000;
++		outw(n, ioaddr + Wn2_ResetOptions);
++	}
++
++	if (rtdev->if_port == XCVR_10base2)
++		/* Start the thinnet transceiver. We should really wait 50ms...*/
++		outw(StartCoax, ioaddr + EL3_CMD);
++	if (rtdev->if_port != XCVR_NWAY) {
++		EL3WINDOW(4);
++		outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) |
++			media_tbl[rtdev->if_port].media_bits, ioaddr + Wn4_Media);
++	}
++
++	/* Switch to the stats window, and clear all stats by reading. */
++	outw(StatsDisable, ioaddr + EL3_CMD);
++	EL3WINDOW(6);
++	for (i = 0; i < 10; i++)
++		inb(ioaddr + i);
++	inw(ioaddr + 10);
++	inw(ioaddr + 12);
++	/* New: On the Vortex we must also clear the BadSSD counter. */
++	EL3WINDOW(4);
++	inb(ioaddr + 12);
++	/* ..and on the Boomerang we enable the extra statistics bits. */
++	outw(0x0040, ioaddr + Wn4_NetDiag);
++
++	/* Switch to register set 7 for normal use. */
++	EL3WINDOW(7);
++
++	if (vp->full_bus_master_rx) { /* Boomerang bus master. */
++		vp->cur_rx = vp->dirty_rx = 0;
++		/* Initialize the RxEarly register as recommended. */
++		outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
++		outl(0x0020, ioaddr + PktStatus);
++		outl(vp->rx_ring_dma, ioaddr + UpListPtr);
++	}
++	if (vp->full_bus_master_tx) {		/* Boomerang bus master Tx. */
++		vp->cur_tx = vp->dirty_tx = 0;
++		if (vp->drv_flags & IS_BOOMERANG)
++			outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */
++		/* Clear the Rx, Tx rings. */
++		for (i = 0; i < RX_RING_SIZE; i++)	/* AKPM: this is done in vortex_open, too */
++			vp->rx_ring[i].status = 0;
++		for (i = 0; i < TX_RING_SIZE; i++)
++			vp->tx_skbuff[i] = 0;
++		outl(0, ioaddr + DownListPtr);
++	}
++	/* Set receiver mode: presumably accept b-case and phys addr only. */
++	set_rx_mode(rtdev);
++	outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
++
++//	issue_and_wait(dev, SetTxStart|0x07ff);
++	outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
++	outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
++	/* Allow status bits to be seen. */
++	vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete|
++		(vp->full_bus_master_tx ? DownComplete : TxAvailable) |
++		(vp->full_bus_master_rx ? UpComplete : RxComplete) |
++		(vp->bus_master ? DMADone : 0);
++	vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable |
++		(vp->full_bus_master_rx ? 0 : RxComplete) |
++		StatsFull | HostError | TxComplete | IntReq
++		| (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete;
++	outw(vp->status_enable, ioaddr + EL3_CMD);
++	/* Ack all pending events, and set active indicator mask. */
++	outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
++		ioaddr + EL3_CMD);
++	outw(vp->intr_enable, ioaddr + EL3_CMD);
++	if (vp->cb_fn_base)			/* The PCMCIA people are idiots.  */
++		writel(0x8000, vp->cb_fn_base + 4);
++	rtnetif_start_queue (rtdev);
++}
++
++static int
++vortex_open(struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	int i;
++	int retval;
++
++	// *** RTnet ***
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	if ((retval = rtdm_irq_request(&vp->irq_handle, rtdev->irq,
++						(vp->full_bus_master_rx ? boomerang_interrupt : vortex_interrupt),
++						0, "rt_3c59x", rtdev))) {
++		printk(KERN_ERR "%s: Could not reserve IRQ %d\n", rtdev->name, rtdev->irq);
++		goto out;
++	}
++	// *** RTnet ***
++
++	if (vp->full_bus_master_rx) { /* Boomerang bus master. */
++		if (vortex_debug > 2)
++			printk(KERN_DEBUG "%s:	Filling in the Rx ring.\n", rtdev->name);
++		for (i = 0; i < RX_RING_SIZE; i++) {
++			struct rtskb *skb; // *** RTnet
++			vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1));
++			vp->rx_ring[i].status = 0;	/* Clear complete bit. */
++			vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG);
++			skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ);
++			vp->rx_skbuff[i] = skb;
++			if (skb == NULL)
++				break;			/* Bad news!  */
++			// *** RTnet ***
++			rtskb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
++			vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev,
++													skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
++			// *** RTnet ***
++		}
++		if (i != RX_RING_SIZE) {
++			int j;
++			printk(KERN_EMERG "%s: no memory for rx ring\n", rtdev->name);
++			for (j = 0; j < i; j++) {
++				if (vp->rx_skbuff[j]) {
++					dev_kfree_rtskb(vp->rx_skbuff[j]);
++					vp->rx_skbuff[j] = 0;
++				}
++			}
++			retval = -ENOMEM;
++			goto out_free_irq;
++		}
++		/* Wrap the ring. */
++		vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma);
++	}
++
++	vortex_up(rtdev);
++	return 0;
++
++  out_free_irq:
++
++	// *** RTnet ***
++	if ( (i=rtdm_irq_free(&vp->irq_handle))<0 )
++		return i;
++	rt_stack_disconnect(rtdev);
++	// *** RTnet ***
++  out:
++	if (vortex_debug > 1)
++		printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", rtdev->name, retval);
++	return retval;
++}
++
++/*
++ * Handle uncommon interrupt sources.  This is a separate routine to minimize
++ * the cache impact.
++ */
++static void
++vortex_error(struct rtnet_device *rtdev, int status, nanosecs_abs_t *time_stamp)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int do_tx_reset = 0, reset_mask = 0;
++	unsigned char tx_status = 0;
++	int packets=0;
++
++	if (vortex_debug > 2) {
++		rtdm_printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", rtdev->name, status);
++	}
++
++	if (status & TxComplete) {			/* Really "TxError" for us. */
++		tx_status = inb(ioaddr + TxStatus);
++		/* Presumably a tx-timeout. We must merely re-enable. */
++		if (vortex_debug > 2
++			|| (tx_status != 0x88 && vortex_debug > 0)) {
++			rtdm_printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n",
++				rtdev->name, tx_status);
++			if (tx_status == 0x82) {
++				rtdm_printk(KERN_ERR "Probably a duplex mismatch.  See "
++					"Documentation/networking/vortex.txt\n");
++			}
++			dump_tx_ring(rtdev);
++		}
++		if (tx_status & 0x14)  vp->stats.tx_fifo_errors++;
++		if (tx_status & 0x38)  vp->stats.tx_aborted_errors++;
++		outb(0, ioaddr + TxStatus);
++		if (tx_status & 0x30) {			/* txJabber or txUnderrun */
++			do_tx_reset = 1;
++		} else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) {	/* maxCollisions */
++			do_tx_reset = 1;
++			reset_mask = 0x0108;		/* Reset interface logic, but not download logic */
++		} else {						/* Merely re-enable the transmitter. */
++			outw(TxEnable, ioaddr + EL3_CMD);
++		}
++	}
++
++	if (status & RxEarly) {				/* Rx early is unused. */
++		vortex_rx(rtdev, &packets, time_stamp);
++		outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
++	}
++	if (status & StatsFull) {			/* Empty statistics. */
++		static int DoneDidThat;
++		if (vortex_debug > 4)
++			rtdm_printk(KERN_DEBUG "%s: Updating stats.\n", rtdev->name);
++		// *** RTnet *** update_stats(ioaddr, dev);
++		/* HACK: Disable statistics as an interrupt source. */
++		/* This occurs when we have the wrong media type! */
++		if (DoneDidThat == 0  &&
++			inw(ioaddr + EL3_STATUS) & StatsFull) {
++			rtdm_printk(KERN_WARNING "%s: Updating statistics failed, disabling "
++				"stats as an interrupt source.\n", rtdev->name);
++			EL3WINDOW(5);
++			outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD);
++			vp->intr_enable &= ~StatsFull;
++			EL3WINDOW(7);
++			DoneDidThat++;
++		}
++	}
++	if (status & IntReq) {		/* Restore all interrupt sources.  */
++		outw(vp->status_enable, ioaddr + EL3_CMD);
++		outw(vp->intr_enable, ioaddr + EL3_CMD);
++	}
++	if (status & HostError) {
++		u16 fifo_diag;
++		EL3WINDOW(4);
++		fifo_diag = inw(ioaddr + Wn4_FIFODiag);
++		rtdm_printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n",
++			rtdev->name, fifo_diag);
++		/* Adapter failure requires Tx/Rx reset and reinit. */
++		if (vp->full_bus_master_tx) {
++			int bus_status = inl(ioaddr + PktStatus);
++			/* 0x80000000 PCI master abort. */
++			/* 0x40000000 PCI target abort. */
++			if (vortex_debug)
++				rtdm_printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", rtdev->name, bus_status);
++
++			/* In this case, blow the card away */
++			vortex_down(rtdev);
++			issue_and_wait(rtdev, TotalReset | 0xff);
++			vortex_up(rtdev);		/* AKPM: bug.  vortex_up() assumes that the rx ring is full. It may not be. */
++		} else if (fifo_diag & 0x0400)
++			do_tx_reset = 1;
++		if (fifo_diag & 0x3000) {
++			/* Reset Rx fifo and upload logic */
++			issue_and_wait(rtdev, RxReset|0x07);
++			/* Set the Rx filter to the current state. */
++			set_rx_mode(rtdev);
++			outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
++			outw(AckIntr | HostError, ioaddr + EL3_CMD);
++		}
++	}
++
++	if (do_tx_reset) {
++		issue_and_wait(rtdev, TxReset|reset_mask);
++		outw(TxEnable, ioaddr + EL3_CMD);
++		if (!vp->full_bus_master_tx)
++			rtnetif_wake_queue(rtdev);
++	}
++}
++
++static int
++vortex_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	rtdm_lockctx_t context;
++
++	/* Put out the doubleword header... */
++	outl(skb->len, ioaddr + TX_FIFO);
++	if (vp->bus_master) {
++		/* Set the bus-master controller to transfer the packet. */
++		int len = (skb->len + 3) & ~3;
++		outl(	vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data,
++							len, PCI_DMA_TODEVICE),
++			ioaddr + Wn7_MasterAddr);
++		outw(len, ioaddr + Wn7_MasterLen);
++		vp->tx_skb = skb;
++
++		rtdm_lock_irqsave(context);
++		if (unlikely(skb->xmit_stamp != NULL))
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++						*skb->xmit_stamp);
++		outw(StartDMADown, ioaddr + EL3_CMD);
++		rtdm_lock_irqrestore(context);
++
++		/* rtnetif_wake_queue() will be called at the DMADone interrupt. */
++	} else {
++		rtdm_printk("rt_3x59x: UNSUPPORTED CODE PATH (device is lacking DMA support)!\n");
++		/* ... and the packet rounded to a doubleword. */
++		outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
++		dev_kfree_rtskb (skb);
++		if (inw(ioaddr + TxFree) > 1536) {
++			rtnetif_start_queue (rtdev);	/* AKPM: redundant? */
++		} else {
++			/* Interrupt us when the FIFO has room for max-sized packet. */
++			rtnetif_stop_queue(rtdev);
++			outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
++		}
++	}
++
++	//rtdev->trans_start = jiffies;
++
++	/* Clear the Tx status stack. */
++	{
++		int tx_status;
++		int i = 32;
++
++		while (--i > 0	&&	(tx_status = inb(ioaddr + TxStatus)) > 0) {
++			if (tx_status & 0x3C) {		/* A Tx-disabling error occurred.  */
++				if (vortex_debug > 2)
++					printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n",
++						rtdev->name, tx_status);
++				if (tx_status & 0x04) vp->stats.tx_fifo_errors++;
++				if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
++				if (tx_status & 0x30) {
++					issue_and_wait(rtdev, TxReset);
++				}
++				outw(TxEnable, ioaddr + EL3_CMD);
++			}
++			outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */
++		}
++	}
++	return 0;
++}
++
++static int
++boomerang_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	/* Calculate the next Tx descriptor entry. */
++	int entry = vp->cur_tx % TX_RING_SIZE;
++	struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE];
++	rtdm_lockctx_t context;
++
++	if (vortex_debug > 6) {
++		rtdm_printk(KERN_DEBUG "boomerang_start_xmit()\n");
++		if (vortex_debug > 3)
++			rtdm_printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n",
++				rtdev->name, vp->cur_tx);
++	}
++
++	if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) {
++		if (vortex_debug > 0)
++			rtdm_printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n",
++				rtdev->name);
++		rtnetif_stop_queue(rtdev);
++		return 1;
++	}
++
++	vp->tx_skbuff[entry] = skb;
++
++	vp->tx_ring[entry].next = 0;
++#if DO_ZEROCOPY
++	if (skb->ip_summed != CHECKSUM_HW)
++		vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
++	else
++		vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum);
++
++	if (!skb_shinfo(skb)->nr_frags) {
++		{
++//            int j;
++//            for (j=0; j<skb->len; j++)
++//            {
++//                rtdm_printk("%02x ", skb->data[j]);
++//            }
++
++		}
++		vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev,
++														skb->data, skb->len, PCI_DMA_TODEVICE));
++		vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG);
++	} else {
++		int i;
++
++		vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev,
++														skb->data, skb->len, PCI_DMA_TODEVICE));
++		vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len);
++
++		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
++
++			vp->tx_ring[entry].frag[i+1].addr =
++				cpu_to_le32(pci_map_single(vp->pdev, // *** RTnet: page mapping correct? Or is this code never used?
++								(void*)page_address(frag->page) + frag->page_offset,
++								frag->size, PCI_DMA_TODEVICE));
++
++			if (i == skb_shinfo(skb)->nr_frags-1)
++				vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG);
++			else
++				vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size);
++		}
++	}
++#else
++	vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev,
++											skb->data, skb->len, PCI_DMA_TODEVICE));
++	vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
++	vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
++#endif
++
++	// *** RTnet ***
++	rtdm_irq_disable(&vp->irq_handle);
++	rtdm_lock_get(&vp->lock);
++	// *** RTnet ***
++
++	/* Wait for the stall to complete. */
++	issue_and_wait(rtdev, DownStall);
++
++	rtdm_lock_irqsave(context);
++	if (unlikely(skb->xmit_stamp != NULL))
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc));
++	if (inl(ioaddr + DownListPtr) == 0) {
++		outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr);
++		vp->queued_packet++;
++	}
++
++	vp->cur_tx++;
++	if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) {
++		rtnetif_stop_queue (rtdev);
++	} else {					/* Clear previous interrupt enable. */
++#if defined(tx_interrupt_mitigation)
++		/* Dubious. If in boomeang_interrupt "faster" cyclone ifdef
++		 * were selected, this would corrupt DN_COMPLETE. No?
++		 */
++		prev_entry->status &= cpu_to_le32(~TxIntrUploaded);
++#endif
++	}
++	outw(DownUnstall, ioaddr + EL3_CMD);
++	rtdm_lock_put_irqrestore(&vp->lock, context);
++	rtdm_irq_enable(&vp->irq_handle);
++	//rtdev->trans_start = jiffies;
++	return 0;
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++
++/*
++ * This is the ISR for the vortex series chips.
++ * full_bus_master_tx == 0 && full_bus_master_rx == 0
++ */
++
++static int vortex_interrupt(rtdm_irq_t *irq_handle)
++{
++	// *** RTnet ***
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	// *** RTnet ***
++
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr;
++	int status;
++	int work_done = max_interrupt_work;
++
++	ioaddr = rtdev->base_addr;
++	rtdm_lock_get(&vp->lock);
++
++	status = inw(ioaddr + EL3_STATUS);
++
++	if (vortex_debug > 6)
++		printk("vortex_interrupt(). status=0x%4x\n", status);
++
++	if ((status & IntLatch) == 0)
++		goto handler_exit;		/* No interrupt: shared IRQs cause this */
++
++	if (status & IntReq) {
++		status |= vp->deferred;
++		vp->deferred = 0;
++	}
++
++	if (status == 0xffff)		/* h/w no longer present (hotplug)? */
++		goto handler_exit;
++
++	if (vortex_debug > 4)
++		rtdm_printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
++			rtdev->name, status, inb(ioaddr + Timer));
++
++	do {
++		if (vortex_debug > 5)
++			rtdm_printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
++				rtdev->name, status);
++		if (status & RxComplete)
++			vortex_rx(rtdev, &packets, &time_stamp);
++
++		if (status & TxAvailable) {
++			if (vortex_debug > 5)
++				rtdm_printk(KERN_DEBUG "	TX room bit was handled.\n");
++			/* There's room in the FIFO for a full-sized packet. */
++			outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
++			rtnetif_wake_queue (rtdev);
++		}
++
++		if (status & DMADone) {
++			if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) {
++				outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
++				pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
++				dev_kfree_rtskb(vp->tx_skb); /* Release the transferred buffer */
++				if (inw(ioaddr + TxFree) > 1536) {
++					/*
++					 * AKPM: FIXME: I don't think we need this.  If the queue was stopped due to
++					 * insufficient FIFO room, the TxAvailable test will succeed and call
++					 * rtnetif_wake_queue()
++					 */
++					rtnetif_wake_queue(rtdev);
++				} else { /* Interrupt when FIFO has room for max-sized packet. */
++					outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
++					rtnetif_stop_queue(rtdev);
++				}
++			}
++		}
++		/* Check for all uncommon interrupts at once. */
++		if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) {
++			if (status == 0xffff)
++				break;
++			vortex_error(rtdev, status, &time_stamp);
++		}
++
++		if (--work_done < 0) {
++			rtdm_printk(KERN_WARNING "%s: Too much work in interrupt, status "
++				"%4.4x.\n", rtdev->name, status);
++			/* Disable all pending interrupts. */
++			do {
++				vp->deferred |= status;
++				outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
++					ioaddr + EL3_CMD);
++				outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
++			} while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
++			/* The timer will reenable interrupts. */
++			mod_timer(&vp->timer, jiffies + 1*HZ);
++			break;
++		}
++		/* Acknowledge the IRQ. */
++		outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
++	} while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete));
++
++	if (vortex_debug > 4)
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
++			rtdev->name, status);
++  handler_exit:
++	rtdm_lock_put(&vp->lock);
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++/*
++ * This is the ISR for the boomerang series chips.
++ * full_bus_master_tx == 1 && full_bus_master_rx == 1
++ */
++
++static int boomerang_interrupt(rtdm_irq_t *irq_handle)
++{
++	// *** RTnet ***
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	// *** RTnet ***
++
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr;
++	int status;
++	int work_done = max_interrupt_work;
++
++	ioaddr = rtdev->base_addr;
++
++	/*
++	 * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
++	 * and boomerang_start_xmit
++	 */
++	rtdm_lock_get(&vp->lock);
++
++	status = inw(ioaddr + EL3_STATUS);
++
++	if (vortex_debug > 6)
++		rtdm_printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status);
++
++	if ((status & IntLatch) == 0)
++		goto handler_exit;		/* No interrupt: shared IRQs can cause this */
++
++	if (status == 0xffff) {		/* h/w no longer present (hotplug)? */
++		if (vortex_debug > 1)
++			rtdm_printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n");
++		goto handler_exit;
++	}
++
++	if (status & IntReq) {
++		status |= vp->deferred;
++		vp->deferred = 0;
++	}
++
++	if (vortex_debug > 4)
++		rtdm_printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
++			rtdev->name, status, inb(ioaddr + Timer));
++	do {
++		if (vortex_debug > 5)
++			rtdm_printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
++				rtdev->name, status);
++		if (status & UpComplete) {
++			outw(AckIntr | UpComplete, ioaddr + EL3_CMD);
++			if (vortex_debug > 5)
++				rtdm_printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n");
++			boomerang_rx(rtdev, &packets, &time_stamp);
++		}
++
++		if (status & DownComplete) {
++			unsigned int dirty_tx = vp->dirty_tx;
++
++			outw(AckIntr | DownComplete, ioaddr + EL3_CMD);
++			while (vp->cur_tx - dirty_tx > 0) {
++				int entry = dirty_tx % TX_RING_SIZE;
++				if (inl(ioaddr + DownListPtr) ==
++					vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc))
++					break;			/* It still hasn't been processed. */
++
++				if (vp->tx_skbuff[entry]) {
++					struct rtskb *skb = vp->tx_skbuff[entry];
++#if DO_ZEROCOPY
++					int i;
++					for (i=0; i<=skb_shinfo(skb)->nr_frags; i++)
++						pci_unmap_single(vp->pdev,
++								le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
++								le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
++								PCI_DMA_TODEVICE);
++#else
++					pci_unmap_single(vp->pdev,
++							le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
++#endif
++					dev_kfree_rtskb(skb);
++					vp->tx_skbuff[entry] = 0;
++				} else {
++					rtdm_printk(KERN_DEBUG "boomerang_interrupt: no skb!\n");
++				}
++				/* vp->stats.tx_packets++;  Counted below. */
++				dirty_tx++;
++			}
++			vp->dirty_tx = dirty_tx;
++			if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) {
++				if (vortex_debug > 6)
++					rtdm_printk(KERN_DEBUG "boomerang_interrupt: wake queue\n");
++				rtnetif_wake_queue (rtdev);
++			}
++		}
++
++		/* Check for all uncommon interrupts at once. */
++		if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq))
++			vortex_error(rtdev, status, &time_stamp);
++
++		if (--work_done < 0) {
++			rtdm_printk(KERN_WARNING "%s: Too much work in interrupt, status "
++				"%4.4x.\n", rtdev->name, status);
++			/* Disable all pending interrupts. */
++			do {
++				vp->deferred |= status;
++				outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
++					ioaddr + EL3_CMD);
++				outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
++			} while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
++			/* The timer will reenable interrupts. */
++			mod_timer(&vp->timer, jiffies + 1*HZ);
++			break;
++		}
++		/* Acknowledge the IRQ. */
++		outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
++		if (vp->cb_fn_base)			/* The PCMCIA people are idiots.  */
++			writel(0x8000, vp->cb_fn_base + 4);
++
++	} while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch);
++
++	if (vortex_debug > 4)
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
++			rtdev->name, status);
++  handler_exit:
++	rtdm_lock_put(&vp->lock);
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int vortex_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int i;
++	short rx_status;
++
++	if (vortex_debug > 5)
++		printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n",
++			inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus));
++	while ((rx_status = inw(ioaddr + RxStatus)) > 0) {
++		if (rx_status & 0x4000) { /* Error, update stats. */
++			unsigned char rx_error = inb(ioaddr + RxErrors);
++			if (vortex_debug > 2)
++				printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
++			vp->stats.rx_errors++;
++			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
++			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
++			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
++			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
++			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
++		} else {
++			/* The packet length: up to 4.5K!. */
++			int pkt_len = rx_status & 0x1fff;
++			struct rtskb *skb;
++
++			skb = rtnetdev_alloc_rtskb(rtdev, pkt_len + 5);
++			if (vortex_debug > 4)
++				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
++					pkt_len, rx_status);
++			if (skb != NULL) {
++				rtskb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
++				/* 'skb_put()' points to the start of sk_buff data area. */
++				if (vp->bus_master &&
++					! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) {
++					dma_addr_t dma = pci_map_single(vp->pdev,
++									rtskb_put(skb, pkt_len),
++									pkt_len, PCI_DMA_FROMDEVICE);
++					outl(dma, ioaddr + Wn7_MasterAddr);
++					outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
++					outw(StartDMAUp, ioaddr + EL3_CMD);
++					while (inw(ioaddr + Wn7_MasterStatus) & 0x8000)
++						;
++					pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE);
++				} else {
++					insl(ioaddr + RX_FIFO, rtskb_put(skb, pkt_len),
++						(pkt_len + 3) >> 2);
++				}
++				outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
++				skb->protocol = rt_eth_type_trans(skb, rtdev);
++				skb->time_stamp = *time_stamp;
++				rtnetif_rx(skb);
++				//rtdev->last_rx = jiffies;
++				vp->stats.rx_packets++;
++				(*packets)++;
++
++				/* Wait a limited time to go to next packet. */
++				for (i = 200; i >= 0; i--)
++					if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress))
++						break;
++				continue;
++			} else if (vortex_debug > 0)
++				printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of "
++					"size %d.\n", rtdev->name, pkt_len);
++		}
++		vp->stats.rx_dropped++;
++		issue_and_wait(rtdev, RxDiscard);
++	}
++
++	return 0;
++}
++
++static int
++boomerang_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	int entry = vp->cur_rx % RX_RING_SIZE;
++	long ioaddr = rtdev->base_addr;
++	int rx_status;
++	int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
++
++
++	if (vortex_debug > 5)
++		rtdm_printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS));
++
++	while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){
++		if (--rx_work_limit < 0)
++			break;
++		if (rx_status & RxDError) { /* Error, update stats. */
++			unsigned char rx_error = rx_status >> 16;
++			if (vortex_debug > 2)
++				rtdm_printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
++			vp->stats.rx_errors++;
++			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
++			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
++			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
++			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
++			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
++		} else {
++			/* The packet length: up to 4.5K!. */
++			int pkt_len = rx_status & 0x1fff;
++			struct rtskb *skb;
++			dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
++
++			if (vortex_debug > 4)
++				rtdm_printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
++					pkt_len, rx_status);
++
++			/* Check if the packet is long enough to just accept without
++			   copying to a properly sized skbuff. */
++			{
++/*** RTnet ***/
++				/* Pass up the skbuff already on the Rx ring. */
++				skb = vp->rx_skbuff[entry];
++				vp->rx_skbuff[entry] = NULL;
++				rtskb_put(skb, pkt_len);
++				pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++				vp->rx_nocopy++;
++			}
++			skb->protocol = rt_eth_type_trans(skb, rtdev);
++			skb->time_stamp = *time_stamp;
++			{					/* Use hardware checksum info. */
++				int csum_bits = rx_status & 0xee000000;
++				if (csum_bits &&
++					(csum_bits == (IPChksumValid | TCPChksumValid) ||
++						csum_bits == (IPChksumValid | UDPChksumValid))) {
++					skb->ip_summed = CHECKSUM_UNNECESSARY;
++					vp->rx_csumhits++;
++				}
++			}
++			rtnetif_rx(skb);
++			//rtdev->last_rx = jiffies;
++			vp->stats.rx_packets++;
++			(*packets)++;
++		}
++		entry = (++vp->cur_rx) % RX_RING_SIZE;
++	}
++	/* Refill the Rx ring buffers. */
++	for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
++		struct rtskb *skb;
++		entry = vp->dirty_rx % RX_RING_SIZE;
++		if (vp->rx_skbuff[entry] == NULL) {
++			skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ);
++			if (skb == NULL) {
++				static unsigned long last_jif;
++				if ((jiffies - last_jif) > 10 * HZ) {
++					rtdm_printk(KERN_WARNING "%s: memory shortage\n", rtdev->name);
++					last_jif = jiffies;
++				}
++				if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
++					{
++						// *** RTnet *** mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
++						;
++					}
++				break;			/* Bad news!  */
++			}
++			rtskb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
++			vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev,
++													skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
++			vp->rx_skbuff[entry] = skb;
++		}
++		vp->rx_ring[entry].status = 0;	/* Clear complete bit. */
++		outw(UpUnstall, ioaddr + EL3_CMD);
++	}
++	return 0;
++}
++
++/*
++ * If we've hit a total OOM refilling the Rx ring we poll once a second
++ * for some memory.  Otherwise there is no way to restart the rx process.
++ */
++static void
++vortex_down(struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++
++	rtnetif_stop_queue (rtdev);
++
++	del_timer_sync(&vp->rx_oom_timer);
++	del_timer_sync(&vp->timer);
++
++	/* Turn off statistics ASAP.  We update vp->stats below. */
++	outw(StatsDisable, ioaddr + EL3_CMD);
++
++	/* Disable the receiver and transmitter. */
++	outw(RxDisable, ioaddr + EL3_CMD);
++	outw(TxDisable, ioaddr + EL3_CMD);
++
++	if (rtdev->if_port == XCVR_10base2)
++		/* Turn off thinnet power.  Green! */
++		outw(StopCoax, ioaddr + EL3_CMD);
++
++	outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD);
++
++	// *** RTnet ***  update_stats(ioaddr, dev);
++	if (vp->full_bus_master_rx)
++		outl(0, ioaddr + UpListPtr);
++	if (vp->full_bus_master_tx)
++		outl(0, ioaddr + DownListPtr);
++
++	if (vp->pdev && vp->enable_wol) {
++		pci_save_state(vp->pdev, vp->power_state);
++		acpi_set_WOL(rtdev);
++	}
++}
++
++static int
++vortex_close(struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int i;
++
++	// rtnet_device is always present after vortex_open was called.
++	//if (netif_device_present(dev))
++	//	vortex_down(dev);
++	vortex_down(rtdev);
++
++	if (vortex_debug > 1) {
++		printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n",
++			rtdev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus));
++		printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d"
++			" tx_queued %d Rx pre-checksummed %d.\n",
++			rtdev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits);
++	}
++
++#if DO_ZEROCOPY
++	if (	vp->rx_csumhits &&
++		((vp->drv_flags & HAS_HWCKSM) == 0) &&
++		(hw_checksums[vp->card_idx] == -1)) {
++		printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", rtdev->name);
++		printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n");
++	}
++#endif
++
++	// *** RTnet ***
++	if ( (i=rtdm_irq_free(&vp->irq_handle))<0 )
++		return i;
++
++	rt_stack_disconnect(rtdev);
++
++	// *** RTnet ***
++
++	if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
++		for (i = 0; i < RX_RING_SIZE; i++)
++			if (vp->rx_skbuff[i]) {
++				pci_unmap_single(	vp->pdev, le32_to_cpu(vp->rx_ring[i].addr),
++						PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++				dev_kfree_rtskb(vp->rx_skbuff[i]);
++				vp->rx_skbuff[i] = 0;
++			}
++	}
++	if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */
++		for (i = 0; i < TX_RING_SIZE; i++) {
++			if (vp->tx_skbuff[i]) {
++				struct rtskb *skb = vp->tx_skbuff[i];
++#if DO_ZEROCOPY
++				int k;
++
++				for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
++					pci_unmap_single(vp->pdev,
++							le32_to_cpu(vp->tx_ring[i].frag[k].addr),
++							le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
++							PCI_DMA_TODEVICE);
++#else
++				pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
++#endif
++				dev_kfree_rtskb(skb);
++				vp->tx_skbuff[i] = 0;
++			}
++		}
++	}
++
++	return 0;
++}
++
++static void
++dump_tx_ring(struct rtnet_device *rtdev)
++{
++	if (vortex_debug > 0) {
++		struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++		long ioaddr = rtdev->base_addr;
++
++		if (vp->full_bus_master_tx) {
++			int i;
++			int stalled = inl(ioaddr + PktStatus) & 0x04;	/* Possible racy. But it's only debug stuff */
++
++			rtdm_printk(KERN_ERR "	Flags; bus-master %d, dirty %d(%d) current %d(%d)\n",
++				vp->full_bus_master_tx,
++				vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE,
++				vp->cur_tx, vp->cur_tx % TX_RING_SIZE);
++			rtdm_printk(KERN_ERR "	Transmit list %8.8x vs. %p.\n",
++				inl(ioaddr + DownListPtr),
++				&vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]);
++			issue_and_wait(rtdev, DownStall);
++			for (i = 0; i < TX_RING_SIZE; i++) {
++				rtdm_printk(KERN_ERR "	%d: @%p  length %8.8x status %8.8x\n", i,
++					&vp->tx_ring[i],
++#if DO_ZEROCOPY
++					le32_to_cpu(vp->tx_ring[i].frag[0].length),
++#else
++					le32_to_cpu(vp->tx_ring[i].length),
++#endif
++					le32_to_cpu(vp->tx_ring[i].status));
++			}
++			if (!stalled)
++				outw(DownUnstall, ioaddr + EL3_CMD);
++		}
++	}
++}
++
++static struct net_device_stats *vortex_get_stats(struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	rtdm_lockctx_t flags;
++
++	if (rtnetif_device_present(rtdev)) {	/* AKPM: Used to be netif_running */
++		rtdm_lock_get_irqsave (&vp->lock, flags);
++		update_stats(rtdev->base_addr, rtdev);
++		rtdm_lock_put_irqrestore (&vp->lock, flags);
++	}
++	return &vp->stats;
++}
++
++/*  Update statistics.
++    Unlike with the EL3 we need not worry about interrupts changing
++    the window setting from underneath us, but we must still guard
++    against a race condition with a StatsUpdate interrupt updating the
++    table.  This is done by checking that the ASM (!) code generated uses
++    atomic updates with '+='.
++*/
++static void update_stats(long ioaddr, struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	int old_window = inw(ioaddr + EL3_CMD);
++
++	if (old_window == 0xffff)	/* Chip suspended or ejected. */
++		return;
++	/* Unlike the 3c5x9 we need not turn off stats updates while reading. */
++	/* Switch to the stats window, and read everything. */
++	EL3WINDOW(6);
++	vp->stats.tx_carrier_errors		+= inb(ioaddr + 0);
++	vp->stats.tx_heartbeat_errors	+= inb(ioaddr + 1);
++	/* Multiple collisions. */		inb(ioaddr + 2);
++	vp->stats.collisions			+= inb(ioaddr + 3);
++	vp->stats.tx_window_errors		+= inb(ioaddr + 4);
++	vp->stats.rx_fifo_errors		+= inb(ioaddr + 5);
++	vp->stats.tx_packets			+= inb(ioaddr + 6);
++	vp->stats.tx_packets			+= (inb(ioaddr + 9)&0x30) << 4;
++	/* Rx packets	*/				inb(ioaddr + 7);   /* Must read to clear */
++	/* Tx deferrals */				inb(ioaddr + 8);
++	/* Don't bother with register 9, an extension of registers 6&7.
++	   If we do use the 6&7 values the atomic update assumption above
++	   is invalid. */
++	vp->stats.rx_bytes += inw(ioaddr + 10);
++	vp->stats.tx_bytes += inw(ioaddr + 12);
++	/* New: On the Vortex we must also clear the BadSSD counter. */
++	EL3WINDOW(4);
++	inb(ioaddr + 12);
++
++	{
++		u8 up = inb(ioaddr + 13);
++		vp->stats.rx_bytes += (up & 0x0f) << 16;
++		vp->stats.tx_bytes += (up & 0xf0) << 12;
++	}
++
++	EL3WINDOW(old_window >> 13);
++	return;
++}
++
++/* Pre-Cyclone chips have no documented multicast filter, so the only
++   multicast setting is to receive all multicast frames.  At least
++   the chip has a very clean way to set the mode, unlike many others. */
++static void set_rx_mode(struct rtnet_device *rtdev)
++{
++	long ioaddr = rtdev->base_addr;
++	int new_mode;
++
++	if (rtdev->flags & IFF_PROMISC) {
++		if (vortex_debug > 0)
++			printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", rtdev->name);
++		new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm;
++	} else	if (rtdev->flags & IFF_ALLMULTI) {
++		new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast;
++	} else
++		new_mode = SetRxFilter | RxStation | RxBroadcast;
++
++	outw(new_mode, ioaddr + EL3_CMD);
++}
++
++/* MII transceiver control section.
++   Read and write the MII registers using software-generated serial
++   MDIO protocol.  See the MII specifications or DP83840A data sheet
++   for details. */
++
++/* The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
++   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
++   "overclocking" issues. */
++#define mdio_delay() inl(mdio_addr)
++
++#define MDIO_SHIFT_CLK	0x01
++#define MDIO_DIR_WRITE	0x04
++#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE)
++#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE)
++#define MDIO_DATA_READ	0x02
++#define MDIO_ENB_IN		0x00
++
++/* Generate the preamble required for initial synchronization and
++   a few older transceivers. */
++static void mdio_sync(long ioaddr, int bits)
++{
++	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
++
++	/* Establish sync by sending at least 32 logic ones. */
++	while (-- bits >= 0) {
++		outw(MDIO_DATA_WRITE1, mdio_addr);
++		mdio_delay();
++		outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++}
++
++static int mdio_read(struct rtnet_device *rtdev, int phy_id, int location)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	int i;
++	long ioaddr = rtdev->base_addr;
++	int read_cmd = (0xf6 << 10) | (phy_id << 5) | location;
++	unsigned int retval = 0;
++	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
++
++	spin_lock_bh(&vp->mdio_lock);
++
++	if (mii_preamble_required)
++		mdio_sync(ioaddr, 32);
++
++	/* Shift the read command bits out. */
++	for (i = 14; i >= 0; i--) {
++		int dataval = (read_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
++		outw(dataval, mdio_addr);
++		mdio_delay();
++		outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Read the two transition, 16 data, and wire-idle bits. */
++	for (i = 19; i > 0; i--) {
++		outw(MDIO_ENB_IN, mdio_addr);
++		mdio_delay();
++		retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
++		outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	spin_unlock_bh(&vp->mdio_lock);
++	return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff;
++}
++
++static void mdio_write(struct rtnet_device *rtdev, int phy_id, int location, int value)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value;
++	long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
++	int i;
++
++	spin_lock_bh(&vp->mdio_lock);
++
++	if (mii_preamble_required)
++		mdio_sync(ioaddr, 32);
++
++	/* Shift the command bits out. */
++	for (i = 31; i >= 0; i--) {
++		int dataval = (write_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
++		outw(dataval, mdio_addr);
++		mdio_delay();
++		outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Leave the interface idle. */
++	for (i = 1; i >= 0; i--) {
++		outw(MDIO_ENB_IN, mdio_addr);
++		mdio_delay();
++		outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	spin_unlock_bh(&vp->mdio_lock);
++	return;
++}
++
++/* ACPI: Advanced Configuration and Power Interface. */
++/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */
++static void acpi_set_WOL(struct rtnet_device *rtdev)
++{
++	struct vortex_private *vp = (struct vortex_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++
++	/* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */
++	EL3WINDOW(7);
++	outw(2, ioaddr + 0x0c);
++	/* The RxFilter must accept the WOL frames. */
++	outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD);
++	outw(RxEnable, ioaddr + EL3_CMD);
++
++	/* Change the power state to D3; RxEnable doesn't take effect. */
++	pci_enable_wake(vp->pdev, 0, 1);
++	pci_set_power_state(vp->pdev, 3);
++}
++
++
++static void vortex_remove_one (struct pci_dev *pdev)
++{
++	struct vortex_private *vp;
++	// *** RTnet ***
++	struct rtnet_device *rtdev = pci_get_drvdata (pdev);
++
++
++
++	if (!rtdev) {
++		printk("vortex_remove_one called for EISA device!\n");
++		BUG();
++	}
++
++	vp = rtdev->priv;
++
++	/* AKPM: FIXME: we should have
++	 *	if (vp->cb_fn_base) iounmap(vp->cb_fn_base);
++	 * here
++	 */
++	rt_unregister_rtnetdev(rtdev);
++	/* Should really use issue_and_wait() here */
++	outw(TotalReset|0x14, rtdev->base_addr + EL3_CMD);
++
++	if (vp->pdev && vp->enable_wol) {
++		pci_set_power_state(vp->pdev, 0);	/* Go active */
++		if (vp->pm_state_valid)
++			pci_restore_state(vp->pdev, vp->power_state);
++	}
++
++	pci_free_consistent(pdev,
++			sizeof(struct boom_rx_desc) * RX_RING_SIZE
++			+ sizeof(struct boom_tx_desc) * TX_RING_SIZE,
++			vp->rx_ring,
++			vp->rx_ring_dma);
++	if (vp->must_free_region)
++		release_region(rtdev->base_addr, vp->io_size);
++	// *** RTnet ***
++	rtdev_free(rtdev);
++	// *** RTnet ***
++}
++
++
++static struct pci_driver vortex_driver = {
++  name:		"3c59x_rt",
++  probe:		vortex_init_one,
++  remove:		vortex_remove_one,
++  id_table:	vortex_pci_tbl,
++#ifdef CONFIG_PM
++  suspend:	NULL,
++  resume:		NULL,
++#endif
++};
++
++
++static int vortex_have_pci;
++
++
++static int __init vortex_init (void)
++{
++	int pci_rc;
++
++	pci_rc = pci_register_driver(&vortex_driver);
++
++	if (pci_rc == 0)
++		vortex_have_pci = 1;
++
++	return (vortex_have_pci) ? 0 : -ENODEV;
++}
++
++
++static void __exit vortex_cleanup (void)
++{
++	if (vortex_have_pci)
++		pci_unregister_driver (&vortex_driver);
++}
++
++module_init(vortex_init);
++module_exit(vortex_cleanup);
+--- linux/drivers/xenomai/net/drivers/experimental/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/experimental/Kconfig	2021-04-07 16:01:27.576633667 +0800
+@@ -0,0 +1,17 @@
++config XENO_DRIVERS_NET_EXP_DRIVERS
++    depends on XENO_DRIVERS_NET && PCI
++    bool "Experimental Drivers"
++
++if XENO_DRIVERS_NET_EXP_DRIVERS
++
++config XENO_DRIVERS_NET_DRV_3C59X
++    depends on PCI
++    tristate "3Com 59x"
++
++config XENO_DRIVERS_NET_DRV_E1000_NEW
++    depends on PCI
++    tristate "New Intel(R) PRO/1000 (Gigabit)"
++
++source "drivers/xenomai/net/drivers/experimental/rt2500/Kconfig"
++
++endif
+--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/rt_mpc52xx_fec.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/rt_mpc52xx_fec.h	2021-04-07 16:01:27.571633675 +0800
+@@ -0,0 +1,428 @@
++/*
++ * arch/ppc/5xxx_io/fec.h
++ *
++ * Header file for the MPC5xxx Fast Ethernet Controller driver
++ *
++ * Author: Dale Farnsworth <dfarnsworth@mvista.com>
++ *
++ * Copyright 2003 MontaVista Software
++ *
++ * 2003 (c) MontaVista, Software, Inc.  This file is licensed under the terms
++ * of the GNU General Public License version 2.  This program is licensed
++ * "as is" without any warranty of any kind, whether express or implied.
++ */
++
++#ifndef __RT_MPC52XX_FEC_H_
++#define __RT_MPC52XX_FEC_H_
++
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/spinlock.h>
++#include <linux/mii.h>
++#include <linux/skbuff.h>
++#include <asm/mpc5xxx.h>
++#include <bestcomm_api.h>
++
++/* Define board specific options */
++#define CONFIG_XENO_DRIVERS_NET_USE_MDIO
++#define CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY
++#define CONFIG_XENO_DRIVERS_NET_FEC_LXT971
++#undef CONFIG_XENO_DRIVERS_NET_FEC_DP83847
++
++/* Tunable constants */
++#define MPC5xxx_FEC_RECV_BUFFER_SIZE	1518	/* max receive packet size */
++#define MPC5xxx_FEC_RECV_BUFFER_SIZE_BC 2048	/* max receive packet size */
++#define MPC5xxx_FEC_TBD_NUM		256	/* max transmit packets */
++#define MPC5xxx_FEC_RBD_NUM		256	/* max receive packets */
++
++struct mpc5xxx_fec {
++	volatile u32 fec_id;			/* FEC + 0x000 */
++	volatile u32 ievent;			/* FEC + 0x004 */
++	volatile u32 imask;			/* FEC + 0x008 */
++
++	volatile u32 reserved0[1];		/* FEC + 0x00C */
++	volatile u32 r_des_active;		/* FEC + 0x010 */
++	volatile u32 x_des_active;		/* FEC + 0x014 */
++	volatile u32 r_des_active_cl;		/* FEC + 0x018 */
++	volatile u32 x_des_active_cl;		/* FEC + 0x01C */
++	volatile u32 ivent_set;			/* FEC + 0x020 */
++	volatile u32 ecntrl;			/* FEC + 0x024 */
++
++	volatile u32 reserved1[6];		/* FEC + 0x028-03C */
++	volatile u32 mii_data;			/* FEC + 0x040 */
++	volatile u32 mii_speed;			/* FEC + 0x044 */
++	volatile u32 mii_status;		/* FEC + 0x048 */
++
++	volatile u32 reserved2[5];		/* FEC + 0x04C-05C */
++	volatile u32 mib_data;			/* FEC + 0x060 */
++	volatile u32 mib_control;		/* FEC + 0x064 */
++
++	volatile u32 reserved3[6];		/* FEC + 0x068-7C */
++	volatile u32 r_activate;		/* FEC + 0x080 */
++	volatile u32 r_cntrl;			/* FEC + 0x084 */
++	volatile u32 r_hash;			/* FEC + 0x088 */
++	volatile u32 r_data;			/* FEC + 0x08C */
++	volatile u32 ar_done;			/* FEC + 0x090 */
++	volatile u32 r_test;			/* FEC + 0x094 */
++	volatile u32 r_mib;			/* FEC + 0x098 */
++	volatile u32 r_da_low;			/* FEC + 0x09C */
++	volatile u32 r_da_high;			/* FEC + 0x0A0 */
++
++	volatile u32 reserved4[7];		/* FEC + 0x0A4-0BC */
++	volatile u32 x_activate;		/* FEC + 0x0C0 */
++	volatile u32 x_cntrl;			/* FEC + 0x0C4 */
++	volatile u32 backoff;			/* FEC + 0x0C8 */
++	volatile u32 x_data;			/* FEC + 0x0CC */
++	volatile u32 x_status;			/* FEC + 0x0D0 */
++	volatile u32 x_mib;			/* FEC + 0x0D4 */
++	volatile u32 x_test;			/* FEC + 0x0D8 */
++	volatile u32 fdxfc_da1;			/* FEC + 0x0DC */
++	volatile u32 fdxfc_da2;			/* FEC + 0x0E0 */
++	volatile u32 paddr1;			/* FEC + 0x0E4 */
++	volatile u32 paddr2;			/* FEC + 0x0E8 */
++	volatile u32 op_pause;			/* FEC + 0x0EC */
++
++	volatile u32 reserved5[4];		/* FEC + 0x0F0-0FC */
++	volatile u32 instr_reg;			/* FEC + 0x100 */
++	volatile u32 context_reg;		/* FEC + 0x104 */
++	volatile u32 test_cntrl;		/* FEC + 0x108 */
++	volatile u32 acc_reg;			/* FEC + 0x10C */
++	volatile u32 ones;			/* FEC + 0x110 */
++	volatile u32 zeros;			/* FEC + 0x114 */
++	volatile u32 iaddr1;			/* FEC + 0x118 */
++	volatile u32 iaddr2;			/* FEC + 0x11C */
++	volatile u32 gaddr1;			/* FEC + 0x120 */
++	volatile u32 gaddr2;			/* FEC + 0x124 */
++	volatile u32 random;			/* FEC + 0x128 */
++	volatile u32 rand1;			/* FEC + 0x12C */
++	volatile u32 tmp;			/* FEC + 0x130 */
++
++	volatile u32 reserved6[3];		/* FEC + 0x134-13C */
++	volatile u32 fifo_id;			/* FEC + 0x140 */
++	volatile u32 x_wmrk;			/* FEC + 0x144 */
++	volatile u32 fcntrl;			/* FEC + 0x148 */
++	volatile u32 r_bound;			/* FEC + 0x14C */
++	volatile u32 r_fstart;			/* FEC + 0x150 */
++	volatile u32 r_count;			/* FEC + 0x154 */
++	volatile u32 r_lag;			/* FEC + 0x158 */
++	volatile u32 r_read;			/* FEC + 0x15C */
++	volatile u32 r_write;			/* FEC + 0x160 */
++	volatile u32 x_count;			/* FEC + 0x164 */
++	volatile u32 x_lag;			/* FEC + 0x168 */
++	volatile u32 x_retry;			/* FEC + 0x16C */
++	volatile u32 x_write;			/* FEC + 0x170 */
++	volatile u32 x_read;			/* FEC + 0x174 */
++
++	volatile u32 reserved7[2];		/* FEC + 0x178-17C */
++	volatile u32 fm_cntrl;			/* FEC + 0x180 */
++	volatile u32 rfifo_data;		/* FEC + 0x184 */
++	volatile u32 rfifo_status;		/* FEC + 0x188 */
++	volatile u32 rfifo_cntrl;		/* FEC + 0x18C */
++	volatile u32 rfifo_lrf_ptr;		/* FEC + 0x190 */
++	volatile u32 rfifo_lwf_ptr;		/* FEC + 0x194 */
++	volatile u32 rfifo_alarm;		/* FEC + 0x198 */
++	volatile u32 rfifo_rdptr;		/* FEC + 0x19C */
++	volatile u32 rfifo_wrptr;		/* FEC + 0x1A0 */
++	volatile u32 tfifo_data;		/* FEC + 0x1A4 */
++	volatile u32 tfifo_status;		/* FEC + 0x1A8 */
++	volatile u32 tfifo_cntrl;		/* FEC + 0x1AC */
++	volatile u32 tfifo_lrf_ptr;		/* FEC + 0x1B0 */
++	volatile u32 tfifo_lwf_ptr;		/* FEC + 0x1B4 */
++	volatile u32 tfifo_alarm;		/* FEC + 0x1B8 */
++	volatile u32 tfifo_rdptr;		/* FEC + 0x1BC */
++	volatile u32 tfifo_wrptr;		/* FEC + 0x1C0 */
++
++	volatile u32 reset_cntrl;		/* FEC + 0x1C4 */
++	volatile u32 xmit_fsm;			/* FEC + 0x1C8 */
++
++	volatile u32 reserved8[3];		/* FEC + 0x1CC-1D4 */
++	volatile u32 rdes_data0;		/* FEC + 0x1D8 */
++	volatile u32 rdes_data1;		/* FEC + 0x1DC */
++	volatile u32 r_length;			/* FEC + 0x1E0 */
++	volatile u32 x_length;			/* FEC + 0x1E4 */
++	volatile u32 x_addr;			/* FEC + 0x1E8 */
++	volatile u32 cdes_data;			/* FEC + 0x1EC */
++	volatile u32 status;			/* FEC + 0x1F0 */
++	volatile u32 dma_control;		/* FEC + 0x1F4 */
++	volatile u32 des_cmnd;			/* FEC + 0x1F8 */
++	volatile u32 data;			/* FEC + 0x1FC */
++
++	volatile u32 rmon_t_drop;		/* FEC + 0x200 */
++	volatile u32 rmon_t_packets;		/* FEC + 0x204 */
++	volatile u32 rmon_t_bc_pkt;		/* FEC + 0x208 */
++	volatile u32 rmon_t_mc_pkt;		/* FEC + 0x20C */
++	volatile u32 rmon_t_crc_align;		/* FEC + 0x210 */
++	volatile u32 rmon_t_undersize;		/* FEC + 0x214 */
++	volatile u32 rmon_t_oversize;		/* FEC + 0x218 */
++	volatile u32 rmon_t_frag;		/* FEC + 0x21C */
++	volatile u32 rmon_t_jab;		/* FEC + 0x220 */
++	volatile u32 rmon_t_col;		/* FEC + 0x224 */
++	volatile u32 rmon_t_p64;		/* FEC + 0x228 */
++	volatile u32 rmon_t_p65to127;		/* FEC + 0x22C */
++	volatile u32 rmon_t_p128to255;		/* FEC + 0x230 */
++	volatile u32 rmon_t_p256to511;		/* FEC + 0x234 */
++	volatile u32 rmon_t_p512to1023;		/* FEC + 0x238 */
++	volatile u32 rmon_t_p1024to2047;	/* FEC + 0x23C */
++	volatile u32 rmon_t_p_gte2048;		/* FEC + 0x240 */
++	volatile u32 rmon_t_octets;		/* FEC + 0x244 */
++	volatile u32 ieee_t_drop;		/* FEC + 0x248 */
++	volatile u32 ieee_t_frame_ok;		/* FEC + 0x24C */
++	volatile u32 ieee_t_1col;		/* FEC + 0x250 */
++	volatile u32 ieee_t_mcol;		/* FEC + 0x254 */
++	volatile u32 ieee_t_def;		/* FEC + 0x258 */
++	volatile u32 ieee_t_lcol;		/* FEC + 0x25C */
++	volatile u32 ieee_t_excol;		/* FEC + 0x260 */
++	volatile u32 ieee_t_macerr;		/* FEC + 0x264 */
++	volatile u32 ieee_t_cserr;		/* FEC + 0x268 */
++	volatile u32 ieee_t_sqe;		/* FEC + 0x26C */
++	volatile u32 t_fdxfc;			/* FEC + 0x270 */
++	volatile u32 ieee_t_octets_ok;		/* FEC + 0x274 */
++
++	volatile u32 reserved9[2];		/* FEC + 0x278-27C */
++	volatile u32 rmon_r_drop;		/* FEC + 0x280 */
++	volatile u32 rmon_r_packets;		/* FEC + 0x284 */
++	volatile u32 rmon_r_bc_pkt;		/* FEC + 0x288 */
++	volatile u32 rmon_r_mc_pkt;		/* FEC + 0x28C */
++	volatile u32 rmon_r_crc_align;		/* FEC + 0x290 */
++	volatile u32 rmon_r_undersize;		/* FEC + 0x294 */
++	volatile u32 rmon_r_oversize;		/* FEC + 0x298 */
++	volatile u32 rmon_r_frag;		/* FEC + 0x29C */
++	volatile u32 rmon_r_jab;		/* FEC + 0x2A0 */
++
++	volatile u32 rmon_r_resvd_0;		/* FEC + 0x2A4 */
++
++	volatile u32 rmon_r_p64;		/* FEC + 0x2A8 */
++	volatile u32 rmon_r_p65to127;		/* FEC + 0x2AC */
++	volatile u32 rmon_r_p128to255;		/* FEC + 0x2B0 */
++	volatile u32 rmon_r_p256to511;		/* FEC + 0x2B4 */
++	volatile u32 rmon_r_p512to1023;		/* FEC + 0x2B8 */
++	volatile u32 rmon_r_p1024to2047;	/* FEC + 0x2BC */
++	volatile u32 rmon_r_p_gte2048;		/* FEC + 0x2C0 */
++	volatile u32 rmon_r_octets;		/* FEC + 0x2C4 */
++	volatile u32 ieee_r_drop;		/* FEC + 0x2C8 */
++	volatile u32 ieee_r_frame_ok;		/* FEC + 0x2CC */
++	volatile u32 ieee_r_crc;		/* FEC + 0x2D0 */
++	volatile u32 ieee_r_align;		/* FEC + 0x2D4 */
++	volatile u32 r_macerr;			/* FEC + 0x2D8 */
++	volatile u32 r_fdxfc;			/* FEC + 0x2DC */
++	volatile u32 ieee_r_octets_ok;		/* FEC + 0x2E0 */
++
++	volatile u32 reserved10[6];		/* FEC + 0x2E4-2FC */
++
++	volatile u32 reserved11[64];		/* FEC + 0x300-3FF */
++};
++
++#define MPC5xxx_FEC_MIB_DISABLE			0x80000000
++
++#define MPC5xxx_FEC_IEVENT_HBERR		0x80000000
++#define MPC5xxx_FEC_IEVENT_BABR			0x40000000
++#define MPC5xxx_FEC_IEVENT_BABT			0x20000000
++#define MPC5xxx_FEC_IEVENT_GRA			0x10000000
++#define MPC5xxx_FEC_IEVENT_TFINT		0x08000000
++#define MPC5xxx_FEC_IEVENT_MII			0x00800000
++#define MPC5xxx_FEC_IEVENT_LATE_COL		0x00200000
++#define MPC5xxx_FEC_IEVENT_COL_RETRY_LIM	0x00100000
++#define MPC5xxx_FEC_IEVENT_XFIFO_UN		0x00080000
++#define MPC5xxx_FEC_IEVENT_XFIFO_ERROR		0x00040000
++#define MPC5xxx_FEC_IEVENT_RFIFO_ERROR		0x00020000
++
++#define MPC5xxx_FEC_IMASK_HBERR			0x80000000
++#define MPC5xxx_FEC_IMASK_BABR			0x40000000
++#define MPC5xxx_FEC_IMASK_BABT			0x20000000
++#define MPC5xxx_FEC_IMASK_GRA			0x10000000
++#define MPC5xxx_FEC_IMASK_MII			0x00800000
++#define MPC5xxx_FEC_IMASK_LATE_COL		0x00200000
++#define MPC5xxx_FEC_IMASK_COL_RETRY_LIM		0x00100000
++#define MPC5xxx_FEC_IMASK_XFIFO_UN		0x00080000
++#define MPC5xxx_FEC_IMASK_XFIFO_ERROR		0x00040000
++#define MPC5xxx_FEC_IMASK_RFIFO_ERROR		0x00020000
++
++#define MPC5xxx_FEC_RCNTRL_MAX_FL_SHIFT		16
++#define MPC5xxx_FEC_RCNTRL_LOOP			0x01
++#define MPC5xxx_FEC_RCNTRL_DRT			0x02
++#define MPC5xxx_FEC_RCNTRL_MII_MODE		0x04
++#define MPC5xxx_FEC_RCNTRL_PROM			0x08
++#define MPC5xxx_FEC_RCNTRL_BC_REJ		0x10
++#define MPC5xxx_FEC_RCNTRL_FCE			0x20
++
++#define MPC5xxx_FEC_TCNTRL_GTS			0x00000001
++#define MPC5xxx_FEC_TCNTRL_HBC			0x00000002
++#define MPC5xxx_FEC_TCNTRL_FDEN			0x00000004
++#define MPC5xxx_FEC_TCNTRL_TFC_PAUSE		0x00000008
++#define MPC5xxx_FEC_TCNTRL_RFC_PAUSE		0x00000010
++
++#define MPC5xxx_FEC_ECNTRL_RESET		0x00000001
++#define MPC5xxx_FEC_ECNTRL_ETHER_EN		0x00000002
++
++#define MPC5xxx_FEC_RESET_DELAY			50 /* uS */
++
++
++/* Receive & Transmit Buffer Descriptor definitions */
++struct mpc5xxx_fec_bd {
++	volatile u32 status;
++	volatile u32 data;
++};
++
++/* Receive data buffer format */
++struct mpc5xxx_rbuf {
++	u8 data[MPC5xxx_FEC_RECV_BUFFER_SIZE_BC];
++};
++
++struct fec_queue {
++	volatile struct mpc5xxx_fec_bd *bd_base;
++	struct rtskb **skb_base;
++	u16 last_index;
++	u16 start_index;
++	u16 finish_index;
++};
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++#define MII_ADVERTISE_HALF	(ADVERTISE_100HALF | ADVERTISE_10HALF | \
++				 ADVERTISE_CSMA)
++
++#define MII_ADVERTISE_ALL	(ADVERTISE_100FULL | ADVERTISE_10FULL | \
++				 MII_ADVERTISE_HALF)
++#ifdef PHY_INTERRUPT
++#define MII_ADVERTISE_DEFAULT   MII_ADVERTISE_ALL
++#else
++#define MII_ADVERTISE_DEFAULT   MII_ADVERTISE_HALF
++#endif
++
++typedef struct {
++	uint mii_data;
++	void (*funct)(uint mii_reg, struct rtnet_device *dev, uint data);
++} phy_cmd_t;
++
++typedef struct {
++	uint id;
++	char *name;
++
++	const phy_cmd_t *config;
++	const phy_cmd_t *startup;
++	const phy_cmd_t *ack_int;
++	const phy_cmd_t *shutdown;
++} phy_info_t;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++struct mpc5xxx_fec_priv {
++	int full_duplex;
++	int tx_full;
++	int r_tasknum;
++	int t_tasknum;
++	int r_irq;
++	int t_irq;
++	rtdm_irq_t irq_handle;
++	rtdm_irq_t r_irq_handle;
++	rtdm_irq_t t_irq_handle;
++	u32 last_transmit_time;
++	u32 last_receive_time;
++	struct mpc5xxx_fec *fec;
++	struct mpc5xxx_sram_fec *sram;
++	struct mpc5xxx_gpio *gpio;
++	struct mpc5xxx_sdma *sdma;
++	struct fec_queue r_queue;
++	struct rtskb *rskb[MPC5xxx_FEC_RBD_NUM];
++	struct fec_queue t_queue;
++	struct rtskb *tskb[MPC5xxx_FEC_TBD_NUM];
++	rtdm_lock_t lock;
++	unsigned long open_time;
++	struct net_device_stats stats;
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	uint phy_id;
++	uint phy_id_done;
++	uint phy_status;
++	uint phy_speed;
++	phy_info_t *phy;
++	struct tq_struct phy_task;
++	volatile uint sequence_done;
++	uint link;
++	uint phy_addr;
++
++	struct tq_struct link_up_task;
++	int duplex_change;
++	int link_up;
++
++	struct timer_list phy_timer_list;
++	u16 old_status;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++};
++
++struct mpc5xxx_sram_fec {
++	volatile struct mpc5xxx_fec_bd tbd[MPC5xxx_FEC_TBD_NUM];
++	volatile struct mpc5xxx_fec_bd rbd[MPC5xxx_FEC_RBD_NUM];
++};
++
++#define MPC5xxx_FEC_RBD_READY	0x40000000
++#define MPC5xxx_FEC_RBD_RFD	0x08000000	/* receive frame done */
++
++#define MPC5xxx_FEC_RBD_INIT	MPC5xxx_FEC_RBD_READY
++
++#define MPC5xxx_FEC_TBD_READY	0x40000000
++#define MPC5xxx_FEC_TBD_TFD	0x08000000	/* transmit frame done */
++#define MPC5xxx_FEC_TBD_INT	0x04000000	/* Interrupt */
++
++#define MPC5xxx_FEC_TBD_INIT	(MPC5xxx_FEC_TBD_INT | MPC5xxx_FEC_TBD_TFD | \
++				 MPC5xxx_FEC_TBD_READY)
++
++
++
++/* MII-related definitions */
++#define MPC5xxx_FEC_MII_DATA_ST		0x40000000	/* Start frame */
++#define MPC5xxx_FEC_MII_DATA_OP_RD	0x20000000	/* Perform read */
++#define MPC5xxx_FEC_MII_DATA_OP_WR	0x10000000	/* Perform write */
++#define MPC5xxx_FEC_MII_DATA_PA_MSK	0x0f800000	/* PHY Address mask */
++#define MPC5xxx_FEC_MII_DATA_RA_MSK	0x007c0000	/* PHY Register mask */
++#define MPC5xxx_FEC_MII_DATA_TA		0x00020000	/* Turnaround */
++#define MPC5xxx_FEC_MII_DATA_DATAMSK	0x00000fff	/* PHY data mask */
++
++#define MPC5xxx_FEC_MII_DATA_RA_SHIFT	0x12		/* MII reg addr bits */
++#define MPC5xxx_FEC_MII_DATA_PA_SHIFT	0x17		/* MII PHY addr bits */
++
++#define MPC5xxx_FEC_MII_SPEED		(5 * 2)
++
++const char mpc5xxx_fec_name[] = "eth0";
++
++struct mibCounters {
++	unsigned int byteReceived;
++	unsigned int byteSent;
++	unsigned int framesReceived;
++	unsigned int framesSent;
++	unsigned int totalByteReceived;
++	unsigned int totalFramesReceived;
++	unsigned int broadcastFramesReceived;
++	unsigned int multicastFramesReceived;
++	unsigned int cRCError;
++	unsigned int oversizeFrames;
++	unsigned int fragments;
++	unsigned int jabber;
++	unsigned int collision;
++	unsigned int lateCollision;
++	unsigned int frames64;
++	unsigned int frames65_127;
++	unsigned int frames128_255;
++	unsigned int frames256_511;
++	unsigned int frames512_1023;
++	unsigned int frames1024_MaxSize;
++	unsigned int macRxError;
++	unsigned int droppedFrames;
++	unsigned int outMulticastFrames;
++	unsigned int outBroadcastFrames;
++	unsigned int undersizeFrames;
++};
++
++#define MPC5xxx_FEC_WATCHDOG_TIMEOUT  ((400*HZ)/1000)
++
++
++#define MPC5xxx_FEC_FRAME_LAST		0x08000000	/* Last */
++#define MPC5xxx_FEC_FRAME_M		0x01000000	/* M? */
++#define MPC5xxx_FEC_FRAME_BC		0x00800000	/* Broadcast */
++#define MPC5xxx_FEC_FRAME_MC		0x00400000	/* Multicast */
++#define MPC5xxx_FEC_FRAME_LG		0x00200000	/* Length error */
++#define MPC5xxx_FEC_FRAME_NO		0x00100000	/* Non-octet aligned frame error */
++#define MPC5xxx_FEC_FRAME_CR		0x00040000	/* CRC frame error */
++#define MPC5xxx_FEC_FRAME_OV		0x00020000	/* Overrun error */
++#define MPC5xxx_FEC_FRAME_TR		0x00010000	/* Truncated error */
++
++
++
++#endif	/* __RT_MPC52XX_FEC_H_ */
+--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/Makefile	2021-04-07 16:01:27.566633682 +0800
+@@ -0,0 +1,5 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MPC52XX_FEC) += rt_mpc52xx_fec.o
++
++rt_mpc52xx_fec-y := mpc52xx_fec.o
+--- linux/drivers/xenomai/net/drivers/mpc52xx_fec/mpc52xx_fec.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc52xx_fec/mpc52xx_fec.c	2021-04-07 16:01:27.562633688 +0800
+@@ -0,0 +1,1985 @@
++/*
++ * arch/ppc/5xxx_io/fec.c
++ *
++ * Driver for the MPC5200 Fast Ethernet Controller
++ * Support for MPC5100 FEC has been removed, contact the author if you need it
++ *
++ * Author: Dale Farnsworth <dfarnsworth@mvista.com>
++ *
++ * 2003 (c) MontaVista, Software, Inc.  This file is licensed under the terms
++ * of the GNU General Public License version 2.  This program is licensed
++ * "as is" without any warranty of any kind, whether express or implied.
++ *
++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/5xxx_io/fec.c".
++ * Copyright (c) 2008 Wolfgang Grandegger <wg@denx.de>
++ */
++
++/* #define PARANOID_CHECKS*/
++/* #define MUST_ALIGN_TRANSMIT_DATA*/
++#define MUST_UNALIGN_RECEIVE_DATA
++/* #define EXIT_ISR_AT_MEMORY_SQUEEZE*/
++/* #define DISPLAY_WARNINGS*/
++
++#ifdef ORIGINAL_CODE
++static const char *version = "fec.c v0.2\n";
++#endif /* ORIGINAL_CODE */
++
++#include <linux/module.h>
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/types.h>
++#include <linux/fcntl.h>
++#include <linux/interrupt.h>
++#include <linux/ptrace.h>
++#include <linux/ioport.h>
++#include <linux/in.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <asm/system.h>
++#include <asm/bitops.h>
++#include <linux/spinlock.h>
++#include <asm/io.h>
++#include <asm/dma.h>
++#include <linux/errno.h>
++#include <linux/init.h>
++#include <linux/crc32.h>
++
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/skbuff.h>
++#include <asm/delay.h>
++#include <rtnet_port.h>
++#include "rt_mpc52xx_fec.h"
++#ifdef CONFIG_UBOOT
++#include <asm/ppcboot.h>
++#endif
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FASTROUTE
++#error "Fast Routing on MPC5200 ethernet not supported"
++#endif
++
++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTnet driver for MPC52xx FEC");
++MODULE_LICENSE("GPL");
++
++static unsigned int rx_pool_size =  0;
++MODULE_PARM(rx_pool_size, "i");
++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size");
++
++#define printk(fmt,args...)	rtdm_printk (fmt ,##args)
++
++static struct rtnet_device *mpc5xxx_fec_dev;
++static int mpc5xxx_fec_interrupt(rtdm_irq_t *irq_handle);
++static int mpc5xxx_fec_receive_interrupt(rtdm_irq_t *irq_handle);
++static int mpc5xxx_fec_transmit_interrupt(rtdm_irq_t *irq_handle);
++static struct net_device_stats *mpc5xxx_fec_get_stats(struct rtnet_device *dev);
++#ifdef ORIGINAL_CODE
++static void mpc5xxx_fec_set_multicast_list(struct rtnet_device *dev);
++#endif /* ORIGINAL_CODE */
++static void mpc5xxx_fec_reinit(struct rtnet_device* dev);
++static int mpc5xxx_fec_setup(struct rtnet_device *dev, int reinit);
++static int mpc5xxx_fec_cleanup(struct rtnet_device *dev, int reinit);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static void mpc5xxx_fec_mii(struct rtnet_device *dev);
++#ifdef ORIGINAL_CODE
++static int mpc5xxx_fec_ioctl(struct rtnet_device *, struct ifreq *rq, int cmd);
++static int mpc5xxx_netdev_ethtool_ioctl(struct rtnet_device *dev, void *useraddr);
++#endif /* ORIGINAL_CODE */
++static void mdio_timer_callback(unsigned long data);
++static void mii_display_status(struct rtnet_device *dev);
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET
++static void mpc5xxx_mdio_callback(uint regval, struct rtnet_device *dev, uint data);
++static int mpc5xxx_mdio_read(struct rtnet_device *dev, int phy_id, int location);
++#endif
++
++static void mpc5xxx_fec_update_stat(struct rtnet_device *);
++
++/* MII processing.  We keep this as simple as possible.  Requests are
++ * placed on the list (if there is room).  When the request is finished
++ * by the MII, an optional function may be called.
++ */
++typedef struct mii_list {
++	uint    mii_regval;
++	void    (*mii_func)(uint val, struct rtnet_device *dev, uint data);
++	struct  mii_list *mii_next;
++	uint    mii_data;
++} mii_list_t;
++
++#define		NMII	20
++mii_list_t      mii_cmds[NMII];
++mii_list_t      *mii_free;
++mii_list_t      *mii_head;
++mii_list_t      *mii_tail;
++
++typedef struct mdio_read_data {
++	u16 regval;
++	struct task_struct *sleeping_task;
++} mdio_read_data_t;
++
++static int mii_queue(struct rtnet_device *dev, int request,
++		void (*func)(uint, struct rtnet_device *, uint), uint data);
++
++/* Make MII read/write commands for the FEC.
++ * */
++#define mk_mii_read(REG)	(0x60020000 | ((REG & 0x1f) << 18))
++#define mk_mii_write(REG, VAL)	(0x50020000 | ((REG & 0x1f) << 18) | \
++							(VAL & 0xffff))
++#define mk_mii_end	0
++
++/* Register definitions for the PHY.
++*/
++
++#define MII_REG_CR	 0	/* Control Register */
++#define MII_REG_SR	 1	/* Status Register */
++#define MII_REG_PHYIR1	 2	/* PHY Identification Register 1 */
++#define MII_REG_PHYIR2	 3	/* PHY Identification Register 2 */
++#define MII_REG_ANAR	 4	/* A-N Advertisement Register */
++#define MII_REG_ANLPAR	 5	/* A-N Link Partner Ability Register */
++#define MII_REG_ANER	 6	/* A-N Expansion Register */
++#define MII_REG_ANNPTR	 7	/* A-N Next Page Transmit Register */
++#define MII_REG_ANLPRNPR 8	/* A-N Link Partner Received Next Page Reg. */
++
++/* values for phy_status */
++
++#define PHY_CONF_ANE	0x0001	/* 1 auto-negotiation enabled */
++#define PHY_CONF_LOOP	0x0002	/* 1 loopback mode enabled */
++#define PHY_CONF_SPMASK	0x00f0	/* mask for speed */
++#define PHY_CONF_10HDX	0x0010	/* 10 Mbit half duplex supported */
++#define PHY_CONF_10FDX	0x0020	/* 10 Mbit full duplex supported */
++#define PHY_CONF_100HDX	0x0040	/* 100 Mbit half duplex supported */
++#define PHY_CONF_100FDX	0x0080	/* 100 Mbit full duplex supported */
++
++#define PHY_STAT_LINK	0x0100	/* 1 up - 0 down */
++#define PHY_STAT_FAULT	0x0200	/* 1 remote fault */
++#define PHY_STAT_ANC	0x0400	/* 1 auto-negotiation complete	*/
++#define PHY_STAT_SPMASK	0xf000	/* mask for speed */
++#define PHY_STAT_10HDX	0x1000	/* 10 Mbit half duplex selected	*/
++#define PHY_STAT_10FDX	0x2000	/* 10 Mbit full duplex selected	*/
++#define PHY_STAT_100HDX	0x4000	/* 100 Mbit half duplex selected */
++#define PHY_STAT_100FDX	0x8000	/* 100 Mbit full duplex selected */
++
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++u8 mpc5xxx_fec_mac_addr[6];
++u8 null_mac[6];
++
++#ifdef ORIGINAL_CODE
++static void mpc5xxx_fec_tx_timeout(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++
++	priv->stats.tx_errors++;
++
++	if (!priv->tx_full)
++		rtnetif_wake_queue(dev);
++}
++#endif /* ORIGINAL_CODE */
++
++static void
++mpc5xxx_fec_set_paddr(struct rtnet_device *dev, u8 *mac)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++
++	out_be32(&fec->paddr1, (mac[0]<<24) | (mac[1]<<16)
++			| (mac[2]<<8) | (mac[3]<<0));
++	out_be32(&fec->paddr2, (mac[4]<<24) | (mac[5]<<16) | 0x8808);
++}
++
++#ifdef ORIGINAL_CODE
++static int
++mpc5xxx_fec_set_mac_address(struct rtnet_device *dev, void *addr)
++{
++	struct sockaddr *sock = (struct sockaddr *)addr;
++
++	mpc5xxx_fec_set_paddr(dev, sock->sa_data);
++	return 0;
++}
++#endif /* ORIGINAL_CODE */
++
++/* This function is called to start or restart the FEC during a link
++ * change.  This happens on fifo errors or when switching between half
++ * and full duplex.
++ */
++static void
++mpc5xxx_fec_restart(struct rtnet_device *dev, int duplex)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	u32 rcntrl;
++	u32 tcntrl;
++	int i;
++
++#if MPC5xxx_FEC_DEBUG > 1
++	printk("mpc5xxx_fec_restart\n");
++#endif
++	out_be32(&fec->rfifo_status, in_be32(&fec->rfifo_status) & 0x700000);
++	out_be32(&fec->tfifo_status, in_be32(&fec->tfifo_status) & 0x700000);
++	out_be32(&fec->reset_cntrl, 0x1000000);
++
++	/* Whack a reset.  We should wait for this. */
++	out_be32(&fec->ecntrl, MPC5xxx_FEC_ECNTRL_RESET);
++	for (i = 0; i < MPC5xxx_FEC_RESET_DELAY; ++i) {
++		if ((in_be32(&fec->ecntrl) & MPC5xxx_FEC_ECNTRL_RESET) == 0)
++			break;
++		udelay(1);
++	}
++	if (i == MPC5xxx_FEC_RESET_DELAY)
++		printk ("FEC Reset timeout!\n");
++
++	/* Set station address. */
++	out_be32(&fec->paddr1, *(u32 *)&dev->dev_addr[0]);
++	out_be32(&fec->paddr2,
++		((*(u16 *)&dev->dev_addr[4]) << 16) | 0x8808);
++
++#ifdef ORIGINAL_CODE
++	mpc5xxx_fec_set_multicast_list(dev);
++#endif /* ORIGINAL_CODE */
++
++	rcntrl = MPC5xxx_FEC_RECV_BUFFER_SIZE << 16;	/* max frame length */
++	rcntrl |= MPC5xxx_FEC_RCNTRL_FCE;
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	rcntrl |= MPC5xxx_FEC_RCNTRL_MII_MODE;
++#endif
++	if (duplex)
++		tcntrl = MPC5xxx_FEC_TCNTRL_FDEN;		/* FD enable */
++	else {
++		rcntrl |= MPC5xxx_FEC_RCNTRL_DRT;
++		tcntrl = 0;
++	}
++	out_be32(&fec->r_cntrl, rcntrl);
++	out_be32(&fec->x_cntrl, tcntrl);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Set MII speed. */
++	out_be32(&fec->mii_speed, priv->phy_speed);
++#endif
++
++	priv->full_duplex = duplex;
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	priv->duplex_change = 0;
++#endif
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("%s: duplex set to %d\n", dev->name, priv->full_duplex);
++#endif
++
++	/* Clear any outstanding interrupt. */
++	out_be32(&fec->ievent, 0xffffffff);	/* clear intr events */
++
++	/* Enable interrupts we wish to service.
++	*/
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	out_be32(&fec->imask, 0xf0fe0000);	/* enable all intr but tfint */
++#else
++	out_be32(&fec->imask, 0xf07e0000);	/* enable all intr but tfint */
++#endif
++
++	/* And last, enable the transmit and receive processing.
++	*/
++	out_be32(&fec->ecntrl, MPC5xxx_FEC_ECNTRL_ETHER_EN);
++	out_be32(&fec->r_des_active, 0x01000000);
++
++	/* The tx ring is no longer full. */
++	if (priv->tx_full)
++	{
++		priv->tx_full = 0;
++		rtnetif_wake_queue(dev);
++	}
++}
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static void
++mpc5xxx_fec_mii(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	mii_list_t	*mip;
++	uint		mii_reg;
++
++	mii_reg = in_be32(&fec->mii_data);
++
++	if ((mip = mii_head) == NULL) {
++		printk("MII and no head!\n");
++		return;
++	}
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("mpc5xxx_fec_mii %08x %08x %08x\n",
++		mii_reg, (u32)mip->mii_func, mip->mii_data);
++#endif
++
++	if (mip->mii_func != NULL)
++		(*(mip->mii_func))(mii_reg, dev, mip->mii_data);
++
++	mii_head = mip->mii_next;
++	mip->mii_next = mii_free;
++	mii_free = mip;
++
++	if ((mip = mii_head) != NULL)
++		out_be32(&fec->mii_data, mip->mii_regval);
++}
++
++static int
++mii_queue(struct rtnet_device *dev, int regval, void (*func)(uint, struct rtnet_device *, uint), uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	rtdm_lockctx_t	context;
++	mii_list_t	*mip;
++	int		retval;
++
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("mii_queue: %08x %08x %08x\n", regval, (u32)func, data);
++#endif
++
++	/* Add PHY address to register command.
++	*/
++	regval |= priv->phy_addr << 23;
++
++	retval = 0;
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++
++	if ((mip = mii_free) != NULL) {
++		mii_free = mip->mii_next;
++		mip->mii_regval = regval;
++		mip->mii_func = func;
++		mip->mii_next = NULL;
++		mip->mii_data = data;
++		if (mii_head) {
++			mii_tail->mii_next = mip;
++			mii_tail = mip;
++		} else {
++			mii_head = mii_tail = mip;
++			out_be32(&fec->mii_data, regval);
++		}
++	} else
++		retval = 1;
++
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	return retval;
++}
++
++static void mii_do_cmd(struct rtnet_device *dev, const phy_cmd_t *c)
++{
++	int k;
++
++	if (!c)
++		return;
++
++	for (k = 0; (c+k)->mii_data != mk_mii_end; k++)
++		mii_queue(dev, (c+k)->mii_data, (c+k)->funct, 0);
++}
++
++static void mii_parse_sr(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC);
++
++	if (mii_reg & 0x0004)
++		s |= PHY_STAT_LINK;
++	if (mii_reg & 0x0010)
++		s |= PHY_STAT_FAULT;
++	if (mii_reg & 0x0020)
++		s |= PHY_STAT_ANC;
++
++	priv->phy_status = s;
++	priv->link = (s & PHY_STAT_LINK) ? 1 : 0;
++}
++
++static void mii_parse_cr(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP);
++
++	if (mii_reg & 0x1000)
++		s |= PHY_CONF_ANE;
++	if (mii_reg & 0x4000)
++		s |= PHY_CONF_LOOP;
++
++	priv->phy_status = s;
++}
++
++static void mii_parse_anar(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	s &= ~(PHY_CONF_SPMASK);
++
++	if (mii_reg & 0x0020)
++		s |= PHY_CONF_10HDX;
++	if (mii_reg & 0x0040)
++		s |= PHY_CONF_10FDX;
++	if (mii_reg & 0x0080)
++		s |= PHY_CONF_100HDX;
++	if (mii_reg & 0x0100)
++		s |= PHY_CONF_100FDX;
++
++	priv->phy_status = s;
++}
++
++/* ------------------------------------------------------------------------- */
++/* Generic PHY support.  Should work for all PHYs, but does not support link
++ * change interrupts.
++ */
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY
++
++static phy_info_t phy_info_generic = {
++	0x00000000, /* 0-->match any PHY */
++	"GENERIC",
++
++	(const phy_cmd_t []) {  /* config */
++		/* advertise only half-duplex capabilities */
++		{ mk_mii_write(MII_ADVERTISE, MII_ADVERTISE_HALF),
++			mii_parse_anar },
++
++		/* enable auto-negotiation */
++		{ mk_mii_write(MII_BMCR, BMCR_ANENABLE), mii_parse_cr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup */
++		/* restart auto-negotiation */
++		{ mk_mii_write(MII_BMCR, (BMCR_ANENABLE | BMCR_ANRESTART)),
++			NULL },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* We don't actually use the ack_int table with a generic
++		 * PHY, but putting a reference to mii_parse_sr here keeps
++		 * us from getting a compiler warning about unused static
++		 * functions in the case where we only compile in generic
++		 * PHY support.
++		 */
++		{ mk_mii_read(MII_BMSR), mii_parse_sr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown */
++		{ mk_mii_end, }
++	},
++};
++#endif	/* CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY */
++
++/* ------------------------------------------------------------------------- */
++/* The Level one LXT971 is used on some of my custom boards		     */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_LXT971
++
++/* register definitions for the 971 */
++
++#define MII_LXT971_PCR	16	/* Port Control Register	*/
++#define MII_LXT971_SR2	17	/* Status Register 2		*/
++#define MII_LXT971_IER	18	/* Interrupt Enable Register	*/
++#define MII_LXT971_ISR	19	/* Interrupt Status Register	*/
++#define MII_LXT971_LCR	20	/* LED Control Register		*/
++#define MII_LXT971_TCR	30	/* Transmit Control Register	*/
++
++static void mii_parse_lxt971_sr2(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x4000) {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++	else {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	if (mii_reg & 0x0008)
++		s |= PHY_STAT_FAULT;
++
++	/* Record the new full_duplex value only if the link is up
++	 * (so we don't bother restarting the driver on duplex
++	 * changes when the link is down).
++	 */
++	if (priv->link) {
++		int prev_duplex = priv->full_duplex;
++		priv->full_duplex = ((mii_reg & 0x0200) != 0);
++		if (priv->full_duplex != prev_duplex) {
++			/* trigger a restart with changed duplex */
++			priv->duplex_change = 1;
++#if MPC5xxx_FEC_DEBUG > 1
++			printk("%s: duplex change: %s\n",
++			       dev->name, priv->full_duplex ? "full" : "half");
++#endif
++		}
++	}
++	priv->phy_status = s;
++}
++
++static phy_info_t phy_info_lxt971 = {
++	0x0001378e,
++	"LXT971",
++
++	(const phy_cmd_t []) {	/* config */
++#ifdef MPC5100_FIX10HDX
++		{ mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10 Mbps, HD */
++#else
++/*		{ mk_mii_write(MII_REG_ANAR, 0x0A1), NULL }, *//*  10/100, HD */
++		{ mk_mii_write(MII_REG_ANAR, 0x01E1), NULL }, /* 10/100, FD */
++#endif
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {	/* startup - enable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x00f2), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++
++		/* Somehow does the 971 tell me that the link is down
++		 * the first read after power-up.
++		 * read here to get a valid value in ack_int */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++#if defined(CONFIG_UC101)
++		{ mk_mii_write(MII_LXT971_LCR, 0x4122), NULL }, /* LED settings */
++#endif
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* find out the current status */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 },
++
++		/* we only need to read ISR to acknowledge */
++
++		{ mk_mii_read(MII_LXT971_ISR), NULL },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {	/* shutdown - disable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_LXT971 */
++
++/* ----------------------------------------------------------------- */
++/* The National Semiconductor DP83847 is used on a INKA 4X0 board    */
++/* ----------------------------------------------------------------- */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_DP83847
++
++/* Register definitions */
++#define MII_DP83847_PHYSTS 0x10  /* PHY Status Register */
++
++static void mii_parse_dp83847_physts(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x2) {
++		if (mii_reg & 0x4)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	else {
++		if (mii_reg & 0x4)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++	if (mii_reg & 0x40)
++		s |= PHY_STAT_FAULT;
++
++	priv->full_duplex = ((mii_reg & 0x4) != 0);
++
++	priv->phy_status = s;
++}
++
++static phy_info_t phy_info_dp83847 = {
++	0x020005c3,
++	"DP83847",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_write(MII_REG_ANAR, 0x01E1), NULL  }, /* Auto-Negociation Register Control set to    */
++							       /* auto-negociate 10/100MBps, Half/Full duplex */
++		{ mk_mii_read(MII_REG_CR),   mii_parse_cr   },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup */
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* Enable and Restart Auto-Negotiation */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr   },
++		{ mk_mii_read(MII_DP83847_PHYSTS), mii_parse_dp83847_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr   },
++		{ mk_mii_read(MII_DP83847_PHYSTS), mii_parse_dp83847_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_end, }
++	}
++};
++
++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_DP83847 */
++
++static phy_info_t *phy_info[] = {
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_LXT971
++	&phy_info_lxt971,
++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_LXT971 */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_DP83847
++	&phy_info_dp83847,
++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_DP83847 */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY
++	/* Generic PHY support.  This must be the last PHY in the table.
++	 * It will be used to support any PHY that doesn't match a previous
++	 * entry in the table.
++	 */
++	&phy_info_generic,
++#endif /* CONFIG_XENO_DRIVERS_NET_FEC_GENERIC_PHY */
++
++	NULL
++};
++
++static void mii_display_config(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint s = priv->phy_status;
++
++	printk("%s: config: auto-negotiation ", dev->name);
++
++	if (s & PHY_CONF_ANE)
++		printk("on");
++	else
++		printk("off");
++
++	if (s & PHY_CONF_100FDX)
++		printk(", 100FDX");
++	if (s & PHY_CONF_100HDX)
++		printk(", 100HDX");
++	if (s & PHY_CONF_10FDX)
++		printk(", 10FDX");
++	if (s & PHY_CONF_10HDX)
++		printk(", 10HDX");
++	if (!(s & PHY_CONF_SPMASK))
++		printk(", No speed/duplex selected?");
++
++	if (s & PHY_CONF_LOOP)
++		printk(", loopback enabled");
++
++	printk(".\n");
++
++	priv->sequence_done = 1;
++}
++
++static void mii_queue_config(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++
++	priv->phy_task.routine = (void *)mii_display_config;
++	priv->phy_task.data = dev;
++	schedule_task(&priv->phy_task);
++}
++
++
++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config },
++			       { mk_mii_end, } };
++
++
++/* Read remainder of PHY ID.
++*/
++static void
++mii_discover_phy3(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	int	i;
++
++	priv->phy_id |= (mii_reg & 0xffff);
++
++	for (i = 0; phy_info[i]; i++) {
++		if (phy_info[i]->id == (priv->phy_id >> 4) || !phy_info[i]->id)
++			break;
++		if (phy_info[i]->id == 0)	/* check generic entry */
++			break;
++	}
++
++	if (!phy_info[i])
++		panic("%s: PHY id 0x%08x is not supported!\n",
++			dev->name, priv->phy_id);
++
++	priv->phy = phy_info[i];
++	priv->phy_id_done = 1;
++
++	printk("%s: Phy @ 0x%x, type %s (0x%08x)\n",
++		dev->name, priv->phy_addr, priv->phy->name, priv->phy_id);
++#if defined(CONFIG_UC101)
++	mii_do_cmd(dev, priv->phy->startup);
++#endif
++}
++
++/* Scan all of the MII PHY addresses looking for someone to respond
++ * with a valid ID.  This usually happens quickly.
++ */
++static void
++mii_discover_phy(uint mii_reg, struct rtnet_device *dev, uint data)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	uint	phytype;
++
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("mii_discover_phy\n");
++#endif
++
++	if ((phytype = (mii_reg & 0xffff)) != 0xffff) {
++		/* Got first part of ID, now get remainder.
++		*/
++		priv->phy_id = phytype << 16;
++		mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3, 0);
++	} else {
++		priv->phy_addr++;
++		if (priv->phy_addr < 32)
++			mii_queue(dev, mk_mii_read(MII_REG_PHYIR1),
++							mii_discover_phy, 0);
++		else
++			printk("fec: No PHY device found.\n");
++	}
++}
++
++static void
++mpc5xxx_fec_link_up(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)(dev->priv);
++
++	printk("mpc5xxx_fec_link_up: link_up=%d\n", priv->link_up);
++#ifdef ORIGINAL_CODE
++	priv->link_up = 0;
++#endif /* ORIGINAL_CODE */
++	mii_display_status(dev);
++	if (priv->duplex_change) {
++#if MPC5xxx_FEC_DEBUG > 1
++		printk("%s: restarting with %s duplex...\n",
++		       dev->name, priv->full_duplex ? "full" : "half");
++#endif
++		mpc5xxx_fec_restart(dev, priv->full_duplex);
++	}
++}
++
++/*
++ * Execute the ack_int command set and schedules next timer call back.
++ */
++static void mdio_timer_callback(unsigned long data)
++{
++	struct rtnet_device *dev = (struct rtnet_device *)data;
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)(dev->priv);
++	mii_do_cmd(dev, priv->phy->ack_int);
++
++	if (priv->link_up) {
++#ifdef ORIGINAL_CODE
++		priv->link_up_task.routine = (void *)mpc5xxx_fec_link_up;
++		priv->link_up_task.data = dev;
++		schedule_task(&priv->link_up_task);
++#else
++		mpc5xxx_fec_link_up(dev);
++		return;
++#endif /* ORIGINAL_CODE */
++	}
++	/* Reschedule in 1 second */
++	priv->phy_timer_list.expires = jiffies + (1000 * HZ / 1000);
++	add_timer(&priv->phy_timer_list);
++}
++
++/*
++ * Displays the current status of the PHY.
++ */
++static void mii_display_status(struct rtnet_device *dev)
++{
++    struct mpc5xxx_fec_priv *priv = dev->priv;
++    uint s = priv->phy_status;
++
++    printk("%s: status: ", dev->name);
++
++    if (!priv->link) {
++	printk("link down");
++    } else {
++	printk("link up");
++
++	switch(s & PHY_STAT_SPMASK) {
++	case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break;
++	case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break;
++	case PHY_STAT_10FDX:  printk(", 10 Mbps Full Duplex");  break;
++	case PHY_STAT_10HDX:  printk(", 10 Mbps Half Duplex");  break;
++	default:
++	    printk(", Unknown speed/duplex");
++	}
++
++	if (s & PHY_STAT_ANC)
++	    printk(", auto-negotiation complete");
++    }
++
++    if (s & PHY_STAT_FAULT)
++	printk(", remote fault");
++
++    printk(".\n");
++}
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++
++#define RFIFO_DATA	0xf0003184
++#define TFIFO_DATA	0xf00031a4
++
++/*
++ * Initialize FEC receive task.
++ * Returns task number of FEC receive task.
++ * Returns -1 on failure
++ */
++int
++mpc5xxx_fec_rx_task_setup(int num_bufs, int maxbufsize)
++{
++	static TaskSetupParamSet_t params;
++	int tasknum;
++
++	params.NumBD = num_bufs;
++	params.Size.MaxBuf = maxbufsize;
++	params.StartAddrSrc = RFIFO_DATA;
++	params.IncrSrc = 0;
++	params.SzSrc = 4;
++	params.IncrDst = 4;
++	params.SzDst = 4;
++
++	tasknum = TaskSetup(TASK_FEC_RX, &params);
++
++	/* clear pending interrupt bits */
++	TaskIntClear(tasknum);
++
++	return tasknum;
++}
++
++/*
++ * Initialize FEC transmit task.
++ * Returns task number of FEC transmit task.
++ * Returns -1 on failure
++ */
++int
++mpc5xxx_fec_tx_task_setup(int num_bufs)
++{
++	static TaskSetupParamSet_t params;
++	int tasknum;
++
++	params.NumBD = num_bufs;
++	params.IncrSrc = 4;
++	params.SzSrc = 4;
++	params.StartAddrDst = TFIFO_DATA;
++	params.IncrDst = 0;
++	params.SzDst = 4;
++
++	tasknum = TaskSetup(TASK_FEC_TX, &params);
++
++	/* clear pending interrupt bits */
++	TaskIntClear(tasknum);
++
++	return tasknum;
++}
++
++
++
++#ifdef PARANOID_CHECKS
++static volatile int tx_fifo_cnt, tx_fifo_ipos, tx_fifo_opos;
++static volatile int rx_fifo_opos;
++#endif
++
++static struct rtskb *tx_fifo_skb[MPC5xxx_FEC_TBD_NUM];
++static struct rtskb *rx_fifo_skb[MPC5xxx_FEC_RBD_NUM];
++static BDIdx mpc5xxx_bdi_tx = 0;
++
++
++static int
++mpc5xxx_fec_setup(struct rtnet_device *dev, int reinit)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_xlb *xlb = (struct mpc5xxx_xlb *)MPC5xxx_XLB;
++	struct rtskb *skb;
++	int i;
++	struct mpc5xxx_rbuf *rbuf;
++	struct mpc5xxx_fec *fec = priv->fec;
++	u32 u32_value;
++	u16 u16_value;
++
++#if MPC5xxx_FEC_DEBUG > 1
++	printk("mpc5xxx_fec_setup\n");
++#endif
++
++	mpc5xxx_fec_set_paddr(dev, dev->dev_addr);
++
++	/*
++	 * Initialize receive queue
++	 */
++	priv->r_tasknum = mpc5xxx_fec_rx_task_setup(MPC5xxx_FEC_RBD_NUM,
++						    MPC5xxx_FEC_RECV_BUFFER_SIZE_BC);
++	TaskBDReset(priv->r_tasknum);
++	for(i=0;i<MPC5xxx_FEC_RBD_NUM;i++) {
++		BDIdx bdi_a;
++		if(!reinit) {
++			skb = dev_alloc_rtskb(sizeof *rbuf, dev);
++			if (skb == 0)
++				goto eagain;
++#ifdef MUST_UNALIGN_RECEIVE_DATA
++			rtskb_reserve(skb,2);
++#endif
++			rbuf = (struct mpc5xxx_rbuf *)rtskb_put(skb, sizeof *rbuf);
++			rx_fifo_skb[i]=skb;
++		}
++		else {
++			skb=rx_fifo_skb[i];
++			rbuf = (struct mpc5xxx_rbuf *)skb->data;
++		}
++		bdi_a = TaskBDAssign(priv->r_tasknum,
++					(void*)virt_to_phys((void *)&rbuf->data),
++					0, sizeof *rbuf, MPC5xxx_FEC_RBD_INIT);
++		if(bdi_a<0)
++			panic("mpc5xxx_fec_setup: error while TaskBDAssign, err=%i\n",(int)bdi_a);
++	}
++#ifdef PARANOID_CHECKS
++	rx_fifo_opos = 0;
++#endif
++
++	/*
++	 * Initialize transmit queue
++	 */
++	if(!reinit) {
++		priv->t_tasknum = mpc5xxx_fec_tx_task_setup(MPC5xxx_FEC_TBD_NUM);
++		TaskBDReset(priv->t_tasknum);
++		mpc5xxx_bdi_tx = 0;
++		for(i=0;i<MPC5xxx_FEC_TBD_NUM;i++) tx_fifo_skb[i]=0;
++#ifdef PARANOID_CHECKS
++		tx_fifo_cnt = tx_fifo_ipos = tx_fifo_opos = 0;
++#endif
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		if (reinit) {
++			if (!priv->sequence_done) {
++				if (!priv->phy) {
++					printk("mpc5xxx_fec_setup: PHY not configured\n");
++					return -ENODEV; /* No PHY we understand */
++				}
++
++				mii_do_cmd(dev, priv->phy->config);
++				mii_do_cmd(dev, phy_cmd_config); /* display configuration */
++				while(!priv->sequence_done)
++					schedule();
++
++				mii_do_cmd(dev, priv->phy->startup);
++			}
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++		dev->irq = MPC5xxx_FEC_IRQ;
++		priv->r_irq = MPC5xxx_SDMA_IRQ_BASE + priv->r_tasknum;
++		priv->t_irq = MPC5xxx_SDMA_IRQ_BASE + priv->t_tasknum;
++
++		if ((i = rtdm_irq_request(&priv->irq_handle, dev->irq,
++					  mpc5xxx_fec_interrupt, 0,
++					  "rteth_err", dev))) {
++			printk(KERN_ERR "FEC interrupt allocation failed\n");
++			return i;
++		}
++
++		if ((i = rtdm_irq_request(&priv->r_irq_handle, priv->r_irq,
++					  mpc5xxx_fec_receive_interrupt, 0,
++					  "rteth_recv", dev))) {
++			printk(KERN_ERR "FEC receive task interrupt allocation failed\n");
++			return i;
++		}
++
++		if ((i = rtdm_irq_request(&priv->t_irq_handle, priv->t_irq,
++					  mpc5xxx_fec_transmit_interrupt, 0,
++					  "rteth_xmit", dev))) {
++			printk(KERN_ERR "FEC transmit task interrupt allocation failed\n");
++			return i;
++		}
++
++		rt_stack_connect(dev, &STACK_manager);
++
++		u32_value = in_be32(&priv->gpio->port_config);
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		u32_value |= 0x00050000;	/* 100MBit with MD	*/
++#else
++		u32_value |= 0x00020000;	/* 10MBit with 7-wire	*/
++#endif
++		out_be32(&priv->gpio->port_config, u32_value);
++
++	}
++
++	out_be32(&fec->op_pause, 0x00010020);	/* change to 0xffff0020 ??? */
++	out_be32(&fec->rfifo_cntrl, 0x0f240000);
++	out_be32(&fec->rfifo_alarm, 0x0000030c);
++	out_be32(&fec->tfifo_cntrl, 0x0f240000);
++	out_be32(&fec->tfifo_alarm, 0x00000100);
++	out_be32(&fec->x_wmrk, 0x3);		/* xmit fifo watermark = 256 */
++	out_be32(&fec->xmit_fsm, 0x03000000);	/* enable crc generation */
++	out_be32(&fec->iaddr1, 0x00000000);	/* No individual filter */
++	out_be32(&fec->iaddr2, 0x00000000);	/* No individual filter */
++
++#ifdef CONFIG_MPC5200
++	/* Disable COMM Bus Prefetch */
++	u16_value = in_be16(&priv->sdma->PtdCntrl);
++	u16_value |= 1;
++	out_be16(&priv->sdma->PtdCntrl, u16_value);
++
++	/* Disable (or enable?) BestComm XLB address snooping */
++	out_be32(&xlb->config, in_be32(&xlb->config) | MPC5200B_XLB_CONF_BSDIS);
++#endif
++
++	if(!reinit) {
++#if !defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO)
++		mpc5xxx_fec_restart (dev, 0);	/* always use half duplex mode only */
++#else
++#ifdef CONFIG_UBOOT
++		extern unsigned char __res[];
++		bd_t *bd = (bd_t *)__res;
++#define MPC5xxx_IPBFREQ bd->bi_ipbfreq
++#else
++#define MPC5xxx_IPBFREQ CONFIG_PPC_5xxx_IPBFREQ
++#endif
++
++		for (i=0; i<NMII-1; i++)
++			mii_cmds[i].mii_next = &mii_cmds[i+1];
++		mii_free = mii_cmds;
++
++		priv->phy_speed = (((MPC5xxx_IPBFREQ >> 20) / 5) << 1);
++
++		/*mpc5xxx_fec_restart (dev, 0);*/ /* half duplex, negotiate speed */
++		mpc5xxx_fec_restart (dev, 1);	/* full duplex, negotiate speed */
++
++		/* Queue up command to detect the PHY and initialize the
++		 * remainder of the interface.
++		 */
++		priv->phy_id_done = 0;
++		priv->phy_addr = 0;
++		mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy, 0);
++
++		priv->old_status = 0;
++
++		/*
++		 * Read MIB counters in order to reset them,
++		 * then zero all the stats fields in memory
++		 */
++		mpc5xxx_fec_update_stat(dev);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		if (reinit) {
++			if (!priv->sequence_done) {
++				if (!priv->phy) {
++					printk("mpc5xxx_fec_open: PHY not configured\n");
++					return -ENODEV;		/* No PHY we understand */
++				}
++
++				mii_do_cmd(dev, priv->phy->config);
++				mii_do_cmd(dev, phy_cmd_config);  /* display configuration */
++				while(!priv->sequence_done)
++					schedule();
++
++				mii_do_cmd(dev, priv->phy->startup);
++
++				/*
++				 * Currently, MII link interrupts are not supported,
++				 * so start the 100 msec timer to monitor the link up event.
++				 */
++				init_timer(&priv->phy_timer_list);
++
++				priv->phy_timer_list.expires = jiffies + (100 * HZ / 1000);
++				priv->phy_timer_list.data = (unsigned long)dev;
++				priv->phy_timer_list.function = mdio_timer_callback;
++				add_timer(&priv->phy_timer_list);
++
++				printk("%s: Waiting for the link to be up...\n", dev->name);
++				while (priv->link == 0) {
++					schedule();
++				}
++				mii_display_status(dev);
++				if (priv->full_duplex == 0) { /* FD is not negotiated, restart the fec in HD */
++					mpc5xxx_fec_restart(dev, 0);
++				}
++			}
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++#endif
++	}
++	else {
++		mpc5xxx_fec_restart (dev, 0);
++	}
++
++	rtnetif_start_queue(dev);
++
++	TaskStart(priv->r_tasknum, TASK_AUTOSTART_ENABLE,
++		  priv->r_tasknum, TASK_INTERRUPT_ENABLE);
++
++	if(reinit) {
++		TaskStart(priv->t_tasknum, TASK_AUTOSTART_ENABLE,
++			  priv->t_tasknum, TASK_INTERRUPT_ENABLE);
++	}
++
++	return 0;
++
++eagain:
++	printk("mpc5xxx_fec_setup: failed\n");
++	for (i=0; i<MPC5xxx_FEC_RBD_NUM; i++) {
++		skb = rx_fifo_skb[i];
++		if (skb == 0)
++			break;
++		dev_kfree_rtskb(skb);
++	}
++	TaskBDReset(priv->r_tasknum);
++
++	return -EAGAIN;
++}
++
++static int
++mpc5xxx_fec_open(struct rtnet_device *dev)
++{
++	return mpc5xxx_fec_setup(dev,0);
++}
++
++/* This will only be invoked if your driver is _not_ in XOFF state.
++ * What this means is that you need not check it, and that this
++ * invariant will hold if you make sure that the netif_*_queue()
++ * calls are done at the proper times.
++ */
++static int
++mpc5xxx_fec_hard_start_xmit(struct rtskb *skb, struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	rtdm_lockctx_t context;
++	int pad;
++	short length;
++	BDIdx bdi_a;
++
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("mpc5xxx_fec_hard_start_xmit:\n");
++	printk("dev %08x, priv %08x, skb %08x\n",
++			(u32)dev, (u32)priv, (u32)skb);
++#endif
++#if MPC5xxx_FEC_DEBUG > 0
++	if (fec_start_status(&priv->t_queue) & MPC5xxx_FEC_TBD_TFD)
++		panic("MPC5xxx transmit queue overrun\n");
++#endif
++
++	length = skb->len;
++#ifdef	MUST_ALIGN_TRANSMIT_DATA
++	pad = (int)skb->data & 3;
++	if (pad) {
++		void *old_data = skb->data;
++		rtskb_push(skb, pad);
++		memcpy(skb->data, old_data, length);
++		rtskb_trim(skb, length);
++	}
++#endif
++	/* Zero out up to the minimum length ethernet packet size,
++	 * so we don't inadvertently expose sensitive data
++	 */
++	pad = ETH_ZLEN - skb->len;
++	if (pad > 0) {
++		skb = rtskb_padto(skb, ETH_ZLEN);
++		if (skb == 0) {
++			printk("rtskb_padto failed\n");
++			return 0;
++		}
++		length += pad;
++	}
++
++	flush_dcache_range((u32)skb->data, (u32)skb->data + length);
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++
++	bdi_a = TaskBDAssign(priv->t_tasknum,(void*)virt_to_phys((void *)skb->data),
++			     NULL,length,MPC5xxx_FEC_TBD_INIT);
++
++#ifdef PARANOID_CHECKS
++	/* check for other errors during assignment*/
++	if((bdi_a<0)||(bdi_a>=MPC5xxx_FEC_TBD_NUM))
++		panic("mpc5xxx_fec_hard_start_xmit: error while TaskBDAssign, err=%i\n",(int)bdi_a);
++
++	/* sanity check: bdi must always equal tx_fifo_ipos*/
++	if(bdi_a!=tx_fifo_ipos)
++		panic("bdi_a!=tx_fifo_ipos: %i, %i\n",(int)bdi_a,tx_fifo_ipos);
++
++	tx_fifo_cnt++;
++	tx_fifo_ipos++;
++	if(tx_fifo_ipos==MPC5xxx_FEC_TBD_NUM) tx_fifo_ipos=0;
++
++	/* check number of BDs in use*/
++	if(TaskBDInUse(priv->t_tasknum)!=tx_fifo_cnt)
++		panic("TaskBDInUse != tx_fifo_cnt: %i %i\n",TaskBDInUse(priv->t_tasknum),tx_fifo_cnt);
++#endif
++
++	tx_fifo_skb[bdi_a]=skb;
++
++#ifdef ORIGINAL_CODE
++	dev->trans_start = jiffies;
++#endif /* ORIGINAL_CODE */
++
++	/* Get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	TaskStart(priv->t_tasknum, TASK_AUTOSTART_ENABLE, priv->t_tasknum, TASK_INTERRUPT_ENABLE);
++
++	if(TaskBDInUse(priv->t_tasknum)==MPC5xxx_FEC_TBD_NUM) {
++		priv->tx_full = 1;
++		rtnetif_stop_queue(dev);
++	}
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	return 0;
++}
++
++/* This handles SDMA transmit task interrupts
++ */
++static int
++mpc5xxx_fec_transmit_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	BDIdx bdi_r;
++
++	rtdm_lock_get(&priv->lock);
++
++	while(TaskBDInUse(priv->t_tasknum)) {
++
++		/* relase BD*/
++		bdi_r = TaskBDRelease(priv->t_tasknum);
++
++		/* we are done if we can't release any more BDs*/
++		if(bdi_r==TASK_ERR_BD_BUSY) break;
++		/* if(bdi_r<0) break;*/
++
++#ifdef PARANOID_CHECKS
++		/* check for other errors during release*/
++		if((bdi_r<0)||(bdi_r>=MPC5xxx_FEC_TBD_NUM))
++			panic("mpc5xxx_fec_transmit_interrupt: error while TaskBDRelease, err=%i\n",(int)bdi_r);
++
++		tx_fifo_cnt--;
++		tx_fifo_opos++;
++		if(tx_fifo_opos==MPC5xxx_FEC_TBD_NUM) tx_fifo_opos=0;
++
++		/* sanity check: bdi_r must always equal tx_fifo_opos*/
++		if(bdi_r!=tx_fifo_opos) {
++			panic("bdi_r!=tx_fifo_opos: %i, %i\n",(int)bdi_r,tx_fifo_opos);
++		}
++
++		/* check number of BDs in use*/
++		if(TaskBDInUse(priv->t_tasknum)!=tx_fifo_cnt)
++			panic("TaskBDInUse != tx_fifo_cnt: %i %i\n",TaskBDInUse(priv->t_tasknum),tx_fifo_cnt);
++#endif
++
++		if((tx_fifo_skb[mpc5xxx_bdi_tx])==0)
++			panic("skb confusion in tx\n");
++
++		dev_kfree_rtskb(tx_fifo_skb[mpc5xxx_bdi_tx]);
++		tx_fifo_skb[mpc5xxx_bdi_tx]=0;
++
++		mpc5xxx_bdi_tx = bdi_r;
++
++		if(TaskBDInUse(priv->t_tasknum)<MPC5xxx_FEC_TBD_NUM/2)
++			priv->tx_full = 0;
++
++	}
++
++	if (rtnetif_queue_stopped(dev) && !priv->tx_full)
++		rtnetif_wake_queue(dev);
++
++	rtdm_lock_put(&priv->lock);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static BDIdx mpc5xxx_bdi_rx = 0;
++
++static int
++mpc5xxx_fec_receive_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct rtskb *skb;
++	struct rtskb *nskb;
++	struct mpc5xxx_rbuf *rbuf;
++	struct mpc5xxx_rbuf *nrbuf;
++	u32 status;
++	int length;
++	BDIdx bdi_a, bdi_r;
++	int discard = 0;
++	int dropped = 0;
++	int packets = 0;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++	while(1) {
++
++		/* release BD*/
++		bdi_r = TaskBDRelease(priv->r_tasknum);
++
++		/* we are done if we can't release any more BDs*/
++		if(bdi_r==TASK_ERR_BD_BUSY) break;
++
++#ifdef PARANOID_CHECKS
++		/* check for other errors during release*/
++		if((bdi_r<0)||(bdi_r>=MPC5xxx_FEC_RBD_NUM))
++			panic("mpc5xxx_fec_receive_interrupt: error while TaskBDRelease, err=%i\n",(int)bdi_r);
++
++		rx_fifo_opos++;
++		if(rx_fifo_opos==MPC5xxx_FEC_RBD_NUM) rx_fifo_opos=0;
++
++		if(bdi_r != rx_fifo_opos)
++			panic("bdi_r != rx_fifo_opos: %i, %i\n",bdi_r, rx_fifo_opos);
++#endif
++
++		/* get BD status in order to determine length*/
++		status = TaskGetBD(priv->r_tasknum,mpc5xxx_bdi_rx)->Status;
++
++		/* determine packet length and pointer to socket buffer / actual data*/
++		skb = rx_fifo_skb[mpc5xxx_bdi_rx];
++		length = (status & 0xffff) - 4;
++		rbuf = (struct mpc5xxx_rbuf *)skb->data;
++
++#ifndef EXIT_ISR_AT_MEMORY_SQUEEZE
++		/* in case of a memory squeeze, we just drop all packets, because*/
++		/* subsequent allocations will also fail.*/
++		if(discard!=3) {
++#endif
++
++			/* check for frame errors*/
++			if(status&0x00370000) {
++				/* frame error, drop */
++#ifdef DISPLAY_WARNINGS
++				if(status&MPC5xxx_FEC_FRAME_LG)
++					printk("%s: Frame length error, dropping packet (status=0x%08x)\n",dev->name,status);
++				if(status&MPC5xxx_FEC_FRAME_NO)
++					printk("%s: Non-octet aligned frame error, dropping packet (status=0x%08x)\n",dev->name,status);
++				if(status&MPC5xxx_FEC_FRAME_CR)
++					printk("%s: Frame CRC error, dropping packet (status=0x%08x)\n",dev->name,status);
++				if(status&MPC5xxx_FEC_FRAME_OV)
++					printk("%s: FIFO overrun error, dropping packet (status=0x%08x)\n",dev->name,status);
++				if(status&MPC5xxx_FEC_FRAME_TR)
++					printk("%s: Frame truncated error, dropping packet (status=0x%08x)\n",dev->name,status);
++#endif
++				discard=1;
++			}
++			else if (length>(MPC5xxx_FEC_RECV_BUFFER_SIZE-4)) {
++				/* packet too big, drop */
++#ifdef DISPLAY_WARNINGS
++				printk("%s: Frame too big, dropping packet (length=%i)\n",dev->name,length);
++#endif
++				discard=2;
++			}
++			else {
++				/* allocate replacement skb */
++				nskb = dev_alloc_rtskb(sizeof *nrbuf, dev);
++				if (nskb == NULL) {
++					/* memory squeeze, drop */
++					discard=3;
++					dropped++;
++				}
++				else {
++					discard=0;
++				}
++			}
++
++#ifndef EXIT_ISR_AT_MEMORY_SQUEEZE
++		}
++		else {
++			dropped++;
++		}
++#endif
++
++		if (discard) {
++			priv->stats.rx_dropped++;
++			nrbuf = (struct mpc5xxx_rbuf *)skb->data;
++		}
++		else {
++#ifdef MUST_UNALIGN_RECEIVE_DATA
++			rtskb_reserve(nskb,2);
++#endif
++			nrbuf = (struct mpc5xxx_rbuf *)rtskb_put(nskb, sizeof *nrbuf);
++
++			/* only invalidate the number of bytes in dcache actually received*/
++#ifdef MUST_UNALIGN_RECEIVE_DATA
++			invalidate_dcache_range((u32)rbuf - 2, (u32)rbuf + length);
++#else
++			invalidate_dcache_range((u32)rbuf, (u32)rbuf + length);
++#endif
++			rtskb_trim(skb, length);
++			skb->protocol = rt_eth_type_trans(skb, dev);
++			skb->time_stamp = time_stamp;
++			rtnetif_rx(skb);
++			packets++;
++#ifdef ORIGINAL_CODE
++			dev->last_rx = jiffies;
++#endif /* ORIGINAL_CODE */
++			rx_fifo_skb[mpc5xxx_bdi_rx] = nskb;
++		}
++
++		/* Assign new socket buffer to BD*/
++		bdi_a = TaskBDAssign(priv->r_tasknum, (void*)virt_to_phys((void *)&nrbuf->data),
++				     0, sizeof *nrbuf, MPC5xxx_FEC_RBD_INIT);
++
++#ifdef PARANOID_CHECKS
++		/* check for errors during assignment*/
++		if((bdi_a<0)||(bdi_r>=MPC5xxx_FEC_RBD_NUM))
++			panic("mpc5xxx_fec_receive_interrupt: error while TaskBDAssign, err=%i\n",(int)bdi_a);
++
++		/* check if Assign/Release sequence numbers are ok*/
++		if(((bdi_a+1)%MPC5xxx_FEC_RBD_NUM) != bdi_r)
++			panic("bdi_a+1 != bdi_r: %i %i\n",(int)((bdi_a+1)%MPC5xxx_FEC_RBD_NUM),(int)bdi_r);
++#endif
++
++		mpc5xxx_bdi_rx = bdi_r;
++
++#ifdef EXIT_ISR_AT_MEMORY_SQUEEZE
++		/* if we couldn't get memory for a new socket buffer, then it doesn't*/
++		/* make sense to proceed.*/
++		if (discard==3)
++			break;
++#endif
++
++	}
++
++#ifdef DISPLAY_WARNINGS
++	if(dropped) {
++		printk("%s: Memory squeeze, dropped %i packets\n",dev->name,dropped);
++	}
++#endif
++	TaskStart(priv->r_tasknum, TASK_AUTOSTART_ENABLE, priv->r_tasknum, TASK_INTERRUPT_ENABLE);
++
++	if (packets > 0)
++		rt_mark_stack_mgr(dev);
++	return RTDM_IRQ_HANDLED;
++}
++
++
++static void
++mpc5xxx_fec_reinit(struct rtnet_device *dev)
++{
++	int retval;
++	printk("mpc5xxx_fec_reinit\n");
++	mpc5xxx_fec_cleanup(dev,1);
++	retval=mpc5xxx_fec_setup(dev,1);
++	if(retval) panic("reinit failed\n");
++}
++
++
++static int
++mpc5xxx_fec_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	int ievent;
++
++#if MPC5xxx_FEC_DEBUG > 4
++	printk("mpc5xxx_fec_interrupt:\n");
++#endif
++
++	ievent = in_be32(&fec->ievent);
++	out_be32(&fec->ievent, ievent);		/* clear pending events */
++
++	if (ievent & (MPC5xxx_FEC_IEVENT_RFIFO_ERROR |
++		      MPC5xxx_FEC_IEVENT_XFIFO_ERROR)) {
++		if (ievent & MPC5xxx_FEC_IEVENT_RFIFO_ERROR)
++			printk(KERN_WARNING "MPC5xxx_FEC_IEVENT_RFIFO_ERROR\n");
++		if (ievent & MPC5xxx_FEC_IEVENT_XFIFO_ERROR)
++			printk(KERN_WARNING "MPC5xxx_FEC_IEVENT_XFIFO_ERROR\n");
++		mpc5xxx_fec_reinit(dev);
++	}
++	else if (ievent & MPC5xxx_FEC_IEVENT_MII) {
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		mpc5xxx_fec_mii(dev);
++#else
++		printk("%s[%d] %s: unexpected MPC5xxx_FEC_IEVENT_MII\n",
++			__FILE__, __LINE__, __FUNCTION__);
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++	}
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int
++mpc5xxx_fec_cleanup(struct rtnet_device *dev, int reinit)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	unsigned long timeout;
++	int i;
++
++	priv->open_time = 0;
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	priv->sequence_done = 0;
++#endif
++
++	rtnetif_stop_queue(dev);
++
++	/* Wait for rx queue to drain */
++	if(!reinit) {
++		timeout = jiffies + 2*HZ;
++		while (TaskBDInUse(priv->t_tasknum) && (jiffies < timeout)) {
++			set_current_state(TASK_INTERRUPTIBLE);
++			schedule_timeout(HZ/10);
++		}
++	}
++
++	/* Disable FEC interrupts */
++	out_be32(&fec->imask, 0x0);
++
++	/* Stop FEC */
++	out_be32(&fec->ecntrl, in_be32(&fec->ecntrl) & ~0x2);
++
++	/* Disable the rx and tx queues. */
++	TaskStop(priv->r_tasknum);
++	TaskStop(priv->t_tasknum);
++
++	/* Release irqs */
++	if(!reinit) {
++		rtdm_irq_disable(&priv->irq_handle);
++		rtdm_irq_disable(&priv->r_irq_handle);
++		rtdm_irq_disable(&priv->t_irq_handle);
++		rtdm_irq_free(&priv->irq_handle);
++		rtdm_irq_free(&priv->r_irq_handle);
++		rtdm_irq_free(&priv->t_irq_handle);
++		rt_stack_disconnect(dev);
++	}
++
++	/* Free rx Buffers */
++	if(!reinit) {
++		for (i=0; i<MPC5xxx_FEC_RBD_NUM; i++) {
++			dev_kfree_rtskb(rx_fifo_skb[i]);
++		}
++	}
++
++	mpc5xxx_fec_get_stats(dev);
++
++	return 0;
++}
++
++static int
++mpc5xxx_fec_close(struct rtnet_device *dev)
++{
++	int ret = mpc5xxx_fec_cleanup(dev,0);
++	return ret;
++}
++
++/*
++ * Get the current statistics.
++ * This may be called with the card open or closed.
++ */
++static struct net_device_stats *mpc5xxx_fec_get_stats(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct net_device_stats *stats = &priv->stats;
++	struct mpc5xxx_fec *fec = priv->fec;
++
++	stats->rx_bytes = in_be32(&fec->rmon_r_octets);
++	stats->rx_packets = in_be32(&fec->rmon_r_packets);
++	stats->rx_errors = stats->rx_packets - (
++					in_be32(&fec->ieee_r_frame_ok) +
++					in_be32(&fec->rmon_r_mc_pkt));
++	stats->tx_bytes = in_be32(&fec->rmon_t_octets);
++	stats->tx_packets = in_be32(&fec->rmon_t_packets);
++	stats->tx_errors = stats->tx_packets - (
++					in_be32(&fec->ieee_t_frame_ok) +
++					in_be32(&fec->rmon_t_col) +
++					in_be32(&fec->ieee_t_1col) +
++					in_be32(&fec->ieee_t_mcol) +
++					in_be32(&fec->ieee_t_def));
++	stats->multicast = in_be32(&fec->rmon_r_mc_pkt);
++	stats->collisions = in_be32(&fec->rmon_t_col);
++
++	/* detailed rx_errors: */
++	stats->rx_length_errors = in_be32(&fec->rmon_r_undersize)
++			+ in_be32(&fec->rmon_r_oversize)
++			+ in_be32(&fec->rmon_r_frag)
++			+ in_be32(&fec->rmon_r_jab);
++	stats->rx_over_errors = in_be32(&fec->r_macerr);
++	stats->rx_crc_errors = in_be32(&fec->ieee_r_crc);
++	stats->rx_frame_errors = in_be32(&fec->ieee_r_align);
++	stats->rx_fifo_errors = in_be32(&fec->rmon_r_drop);
++	stats->rx_missed_errors = in_be32(&fec->rmon_r_drop);
++
++	/* detailed tx_errors: */
++	stats->tx_aborted_errors = 0;
++	stats->tx_carrier_errors = in_be32(&fec->ieee_t_cserr);
++	stats->tx_fifo_errors = in_be32(&fec->rmon_t_drop) +
++				in_be32(&fec->ieee_t_macerr);
++	stats->tx_heartbeat_errors = in_be32(&fec->ieee_t_sqe);
++	stats->tx_window_errors = in_be32(&fec->ieee_t_lcol);
++
++	return stats;
++}
++
++static void
++mpc5xxx_fec_update_stat(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct net_device_stats *stats = &priv->stats;
++	struct mpc5xxx_fec *fec = priv->fec;
++
++	out_be32(&fec->mib_control, MPC5xxx_FEC_MIB_DISABLE);
++	memset_io(&fec->rmon_t_drop, 0,
++			(u32)&fec->reserved10 - (u32)&fec->rmon_t_drop);
++	out_be32(&fec->mib_control, 0);
++	memset(stats, 0, sizeof *stats);
++	mpc5xxx_fec_get_stats(dev);
++}
++
++#ifdef ORIGINAL_CODE
++/*
++ * Set or clear the multicast filter for this adaptor.
++ */
++static void
++mpc5xxx_fec_set_multicast_list(struct rtnet_device *dev)
++{
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++	struct mpc5xxx_fec *fec = priv->fec;
++	u32 u32_value;
++
++	if (dev->flags & IFF_PROMISC) {
++		printk("%s: Promiscuous mode enabled.\n", dev->name);
++		u32_value = in_be32(&fec->r_cntrl);
++		u32_value |= MPC5xxx_FEC_RCNTRL_PROM;
++		out_be32(&fec->r_cntrl, u32_value);
++	}
++	else if (dev->flags & IFF_ALLMULTI) {
++		u32_value = in_be32(&fec->r_cntrl);
++		u32_value &= ~MPC5xxx_FEC_RCNTRL_PROM;
++		out_be32(&fec->r_cntrl, u32_value);
++		out_be32(&fec->gaddr1, 0xffffffff);
++		out_be32(&fec->gaddr2, 0xffffffff);
++	}
++	else {
++		u32 crc;
++		int i;
++		struct dev_mc_list *dmi;
++		u32 gaddr1 = 0x00000000;
++		u32 gaddr2 = 0x00000000;
++
++		dmi = dev->mc_list;
++		for (i=0; i<dev->mc_count; i++) {
++			crc = ether_crc_le(6, dmi->dmi_addr) >> 26;
++			if (crc >= 32)
++				gaddr1 |= 1 << (crc-32);
++			else
++				gaddr2 |= 1 << crc;
++			dmi = dmi->next;
++		}
++		out_be32(&fec->gaddr1, gaddr1);
++		out_be32(&fec->gaddr2, gaddr2);
++	}
++}
++#endif /* ORIGINAL_CODE */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET
++static void mpc5xxx_mdio_callback(uint regval, struct rtnet_device *dev, uint data)
++{
++	mdio_read_data_t* mrd = (mdio_read_data_t *)data;
++	mrd->regval = 0xFFFF & regval;
++	wake_up_process(mrd->sleeping_task);
++}
++
++static int mpc5xxx_mdio_read(struct rtnet_device *dev, int phy_id, int location)
++{
++	uint retval;
++	mdio_read_data_t* mrd = (mdio_read_data_t *)kmalloc(sizeof(*mrd),
++			GFP_KERNEL);
++
++	mrd->sleeping_task = current;
++	set_current_state(TASK_INTERRUPTIBLE);
++	mii_queue(dev, mk_mii_read(location),
++		mpc5xxx_mdio_callback, (unsigned int) mrd);
++	schedule();
++
++	retval = mrd->regval;
++
++	kfree(mrd);
++
++	return retval;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX
++static void mpc5xxx_mdio_write(struct rtnet_device *dev, int phy_id, int location, int value)
++{
++	mii_queue(dev, mk_mii_write(location, value), NULL, 0);
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++#ifdef ORIGINAL_CODE
++static int
++mpc5xxx_netdev_ethtool_ioctl(struct rtnet_device *dev, void *useraddr)
++{
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX
++	struct mpc5xxx_fec_priv *private = (struct mpc5xxx_fec_priv *)dev->priv;
++#endif
++	u32 ethcmd;
++
++	if (copy_from_user(&ethcmd, useraddr, sizeof ethcmd))
++		return -EFAULT;
++
++	switch (ethcmd) {
++
++		/* Get driver info */
++	case ETHTOOL_GDRVINFO:{
++			struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
++			strncpy(info.driver, "gt64260",
++				sizeof info.driver - 1);
++			strncpy(info.version, version,
++				sizeof info.version - 1);
++			if (copy_to_user(useraddr, &info, sizeof info))
++				return -EFAULT;
++			return 0;
++		}
++		/* get settings */
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX
++	case ETHTOOL_GSET:{
++			struct ethtool_cmd ecmd = { ETHTOOL_GSET };
++			spin_lock_irq(&private->lock);
++			mii_ethtool_gset(&private->mii_if, &ecmd);
++			spin_unlock_irq(&private->lock);
++			if (copy_to_user(useraddr, &ecmd, sizeof ecmd))
++				return -EFAULT;
++			return 0;
++		}
++		/* set settings */
++	case ETHTOOL_SSET:{
++			int r;
++			struct ethtool_cmd ecmd;
++			if (copy_from_user(&ecmd, useraddr, sizeof ecmd))
++				return -EFAULT;
++			spin_lock_irq(&private->lock);
++			r = mii_ethtool_sset(&private->mii_if, &ecmd);
++			spin_unlock_irq(&private->lock);
++			return r;
++		}
++		/* restart autonegotiation */
++	case ETHTOOL_NWAY_RST:{
++			return mii_nway_restart(&private->mii_if);
++		}
++		/* get link status */
++	case ETHTOOL_GLINK:{
++			struct ethtool_value edata = { ETHTOOL_GLINK };
++			edata.data = mii_link_ok(&private->mii_if);
++			if (copy_to_user(useraddr, &edata, sizeof edata))
++				return -EFAULT;
++			return 0;
++		}
++#endif
++		/* get message-level */
++	case ETHTOOL_GMSGLVL:{
++			struct ethtool_value edata = { ETHTOOL_GMSGLVL };
++			edata.data = 0;	/* XXX */
++			if (copy_to_user(useraddr, &edata, sizeof edata))
++				return -EFAULT;
++			return 0;
++		}
++		/* set message-level */
++	case ETHTOOL_SMSGLVL:{
++			struct ethtool_value edata;
++			if (copy_from_user(&edata, useraddr, sizeof edata))
++				return -EFAULT;
++/* debug = edata.data; *//* XXX */
++			return 0;
++		}
++	}
++	return -EOPNOTSUPP;
++}
++
++static int
++mpc5xxx_fec_ioctl(struct rtnet_device *dev, struct ifreq *rq, int cmd)
++{
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX
++	struct mii_ioctl_data *data = (struct mii_ioctl_data *) &rq->ifr_data;
++	int phy = dev->base_addr & 0x1f;
++#endif
++	int retval;
++
++	switch (cmd) {
++	case SIOCETHTOOL:
++		retval = mpc5xxx_netdev_ethtool_ioctl(
++					dev, (void *) rq->ifr_data);
++		break;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO_NOT_YET_XXX
++	case SIOCGMIIPHY:	/* Get address of MII PHY in use. */
++	case SIOCDEVPRIVATE:	/* for binary compat, remove in 2.5 */
++		data->phy_id = phy;
++		/* Fall through */
++
++	case SIOCGMIIREG:	/* Read MII PHY register. */
++	case SIOCDEVPRIVATE + 1:	/* for binary compat, remove in 2.5 */
++		data->val_out =
++			mpc5xxx_mdio_read(dev, data->phy_id&0x1f,
++				data->reg_num&0x1f);
++		retval = 0;
++		break;
++
++	case SIOCSMIIREG:	/* Write MII PHY register. */
++	case SIOCDEVPRIVATE + 2:	/* for binary compat, remove in 2.5 */
++		if (!capable(CAP_NET_ADMIN)) {
++			retval = -EPERM;
++		} else {
++			mpc5xxx_mdio_write(dev, data->phy_id & 0x1f,
++				data->reg_num & 0x1f, data->val_in);
++			retval = 0;
++		}
++		break;
++#endif
++
++	default:
++		retval = -EOPNOTSUPP;
++		break;
++	}
++	return retval;
++}
++
++static void __init
++mpc5xxx_fec_str2mac(char *str, unsigned char *mac)
++{
++	int i;
++	u64 val64;
++
++	val64 = simple_strtoull(str, NULL, 16);
++
++	for (i = 0; i < 6; i++)
++		mac[5-i] = val64 >> (i*8);
++}
++
++static int __init
++mpc5xxx_fec_mac_setup(char *mac_address)
++{
++	mpc5xxx_fec_str2mac(mac_address, mpc5xxx_fec_mac_addr);
++	return 0;
++}
++
++__setup("mpc5xxx_mac=", mpc5xxx_fec_mac_setup);
++#endif /* ORIGINAL_CODE */
++
++static int __init
++mpc5xxx_fec_init(void)
++{
++	struct mpc5xxx_fec *fec;
++	struct rtnet_device *dev;
++	struct mpc5xxx_fec_priv *priv;
++	int err = 0;
++
++#if MPC5xxx_FEC_DEBUG > 1
++	printk("mpc5xxx_fec_init\n");
++#endif
++
++	if (!rx_pool_size)
++		rx_pool_size = MPC5xxx_FEC_RBD_NUM * 2;
++
++	dev = rt_alloc_etherdev(sizeof(*priv), rx_pool_size + MPC5xxx_FEC_TBD_NUM);
++	if (!dev)
++		return -EIO;
++	rtdev_alloc_name(dev, "rteth%d");
++	memset(dev->priv, 0, sizeof(*priv));
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++
++
++	mpc5xxx_fec_dev = dev;
++	priv = (struct mpc5xxx_fec_priv *)dev->priv;
++#if MPC5xxx_FEC_DEBUG > 1
++	printk("fec_priv %08x\n", (u32)priv);
++#endif
++	priv->fec = fec = (struct mpc5xxx_fec *)MPC5xxx_FEC;
++	priv->gpio = (struct mpc5xxx_gpio *)MPC5xxx_GPIO;
++	priv->sdma = (struct mpc5xxx_sdma *)MPC5xxx_SDMA;
++
++	rtdm_lock_init(&priv->lock);
++	dev->open		= mpc5xxx_fec_open;
++	dev->stop		= mpc5xxx_fec_close;
++	dev->hard_start_xmit	= mpc5xxx_fec_hard_start_xmit;
++	//FIXME dev->hard_header	= &rt_eth_header;
++	dev->get_stats		= mpc5xxx_fec_get_stats;
++#ifdef ORIGINAL_CODE
++	dev->do_ioctl		= mpc5xxx_fec_ioctl;
++	dev->set_mac_address	= mpc5xxx_fec_set_mac_address;
++	dev->set_multicast_list = mpc5xxx_fec_set_multicast_list;
++
++	dev->tx_timeout		= mpc5xxx_fec_tx_timeout;
++	dev->watchdog_timeo	= MPC5xxx_FEC_WATCHDOG_TIMEOUT;
++#endif /* ORIGINAL_CODE */
++	dev->flags &= ~IFF_RUNNING;
++
++	if ((err = rt_register_rtnetdev(dev)))
++		goto abort;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_FASTROUTE
++	dev->accept_fastpath = mpc5xxx_fec_accept_fastpath;
++#endif
++	if (memcmp(mpc5xxx_fec_mac_addr, null_mac, 6) != 0)
++		memcpy(dev->dev_addr, mpc5xxx_fec_mac_addr, 6);
++	else {
++		*(u32 *)&dev->dev_addr[0] = in_be32(&fec->paddr1);
++		*(u16 *)&dev->dev_addr[4] = in_be16((u16*)&fec->paddr2);
++	}
++
++	/*
++	 * Read MIB counters in order to reset them,
++	 * then zero all the stats fields in memory
++	 */
++	mpc5xxx_fec_update_stat(dev);
++
++	return 0;
++
++abort:
++	rtdev_free(dev);
++
++	return err;
++}
++
++static void __exit
++mpc5xxx_fec_uninit(void)
++{
++	struct rtnet_device *dev = mpc5xxx_fec_dev;
++	struct mpc5xxx_fec_priv *priv = (struct mpc5xxx_fec_priv *)dev->priv;
++
++	rt_stack_disconnect(dev);
++	rt_unregister_rtnetdev(dev);
++	rt_rtdev_disconnect(dev);
++	printk("%s: unloaded\n", dev->name);
++	rtdev_free(dev);
++	dev->priv = NULL;
++}
++
++static int __init
++mpc5xxx_fec_module_init(void)
++{
++	return mpc5xxx_fec_init();
++}
++
++static void __exit
++mpc5xxx_fec_module_exit(void)
++{
++	mpc5xxx_fec_uninit();
++}
++
++module_init(mpc5xxx_fec_module_init);
++module_exit(mpc5xxx_fec_module_exit);
+--- linux/drivers/xenomai/net/drivers/loopback.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/loopback.c	2021-04-07 16:01:27.557633695 +0800
+@@ -0,0 +1,139 @@
++/* loopback.c
++ *
++ * Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ * extended by Jose Carlos Billalabeitia and Jan Kiszka
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/printk.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++
++#include <linux/netdevice.h>
++
++#include <rtnet_port.h>
++#include <stack_mgr.h>
++
++MODULE_AUTHOR("Maintainer: Jan Kiszka <Jan.Kiszka@web.de>");
++MODULE_DESCRIPTION("RTnet loopback driver");
++MODULE_LICENSE("GPL");
++
++static struct rtnet_device *rt_loopback_dev;
++
++/***
++ *  rt_loopback_open
++ *  @rtdev
++ */
++static int rt_loopback_open(struct rtnet_device *rtdev)
++{
++	rt_stack_connect(rtdev, &STACK_manager);
++	rtnetif_start_queue(rtdev);
++
++	return 0;
++}
++
++/***
++ *  rt_loopback_close
++ *  @rtdev
++ */
++static int rt_loopback_close(struct rtnet_device *rtdev)
++{
++	rtnetif_stop_queue(rtdev);
++	rt_stack_disconnect(rtdev);
++
++	return 0;
++}
++
++/***
++ *  rt_loopback_xmit - begin packet transmission
++ *  @skb: packet to be sent
++ *  @dev: network device to which packet is sent
++ *
++ */
++static int rt_loopback_xmit(struct rtskb *rtskb, struct rtnet_device *rtdev)
++{
++	/* write transmission stamp - in case any protocol ever gets the idea to
++       ask the lookback device for this service... */
++	if (rtskb->xmit_stamp)
++		*rtskb->xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *rtskb->xmit_stamp);
++
++	/* make sure that critical fields are re-intialised */
++	rtskb->chain_end = rtskb;
++
++	/* parse the Ethernet header as usual */
++	rtskb->protocol = rt_eth_type_trans(rtskb, rtdev);
++
++	rt_stack_deliver(rtskb);
++
++	return 0;
++}
++
++/***
++ *  loopback_init
++ */
++static int __init loopback_init(void)
++{
++	int err;
++	struct rtnet_device *rtdev;
++
++	pr_info("initializing loopback interface...\n");
++
++	if ((rtdev = rt_alloc_etherdev(0, 1)) == NULL)
++		return -ENODEV;
++
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++
++	strcpy(rtdev->name, "rtlo");
++
++	rtdev->vers = RTDEV_VERS_2_0;
++	rtdev->open = &rt_loopback_open;
++	rtdev->stop = &rt_loopback_close;
++	rtdev->hard_start_xmit = &rt_loopback_xmit;
++	rtdev->flags |= IFF_LOOPBACK;
++	rtdev->flags &= ~IFF_BROADCAST;
++	rtdev->features |= NETIF_F_LLTX;
++
++	if ((err = rt_register_rtnetdev(rtdev)) != 0) {
++		rtdev_free(rtdev);
++		return err;
++	}
++
++	rt_loopback_dev = rtdev;
++
++	return 0;
++}
++
++/***
++ *  loopback_cleanup
++ */
++static void __exit loopback_cleanup(void)
++{
++	struct rtnet_device *rtdev = rt_loopback_dev;
++
++	pr_info("removing loopback interface...\n");
++
++	rt_unregister_rtnetdev(rtdev);
++	rt_rtdev_disconnect(rtdev);
++
++	rtdev_free(rtdev);
++}
++
++module_init(loopback_init);
++module_exit(loopback_cleanup);
+--- linux/drivers/xenomai/net/drivers/8139too.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/8139too.c	2021-04-07 16:01:27.545633712 +0800
+@@ -0,0 +1,1727 @@
++/***
++ * rt_8139too.c - Realtime driver for
++ * for more information, look to end of file or '8139too.c'
++ *
++ * Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++ /*
++  * This Version was modified by Fabian Koch
++  * It includes a different implementation of the 'cards' module parameter
++  * we are using an array of integers to determine which cards to use
++  * for RTnet (e.g. cards=0,1,0)
++  *
++  * Thanks to Jan Kiszka for this idea
++  */
++
++#define DRV_NAME            "rt_8139too"
++#define DRV_VERSION         "0.9.24-rt0.7"
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/compiler.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/ioport.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/if.h>
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++#include <linux/delay.h>
++#include <linux/ethtool.h>
++#include <linux/mii.h>
++#include <linux/completion.h>
++#include <linux/crc32.h>
++#include <linux/uaccess.h>
++#include <asm/io.h>
++
++/* *** RTnet *** */
++#include <rtnet_port.h>
++
++#define MAX_UNITS               8
++#define DEFAULT_RX_POOL_SIZE    16
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++static int media[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = -1 };
++static unsigned int rx_pool_size = DEFAULT_RX_POOL_SIZE;
++module_param_array(cards, int, NULL, 0444);
++module_param_array(media, int, NULL, 0444);
++module_param(rx_pool_size, uint, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++MODULE_PARM_DESC(media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps");
++MODULE_PARM_DESC(rx_pool_size, "number of receive buffers");
++
++/* *** RTnet *** */
++
++
++#define RTL8139_DRIVER_NAME   DRV_NAME " Fast Ethernet driver " DRV_VERSION
++#define PFX DRV_NAME ": "
++
++/* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */
++/* *** RTnet ***
++#ifdef CONFIG_8139TOO_PIO
++#define USE_IO_OPS 1
++#endif
++ *** RTnet *** */
++
++/* Size of the in-memory receive ring. */
++#define RX_BUF_LEN_IDX        2        /* 0==8K, 1==16K, 2==32K, 3==64K */
++#define RX_BUF_LEN        (8192 << RX_BUF_LEN_IDX)
++#define RX_BUF_PAD        16
++#define RX_BUF_WRAP_PAD 2048 /* spare padding to handle lack of packet wrap */
++#define RX_BUF_TOT_LEN        (RX_BUF_LEN + RX_BUF_PAD + RX_BUF_WRAP_PAD)
++
++/* Number of Tx descriptor registers. */
++#define NUM_TX_DESC        4
++
++/* max supported ethernet frame size -- must be at least (rtdev->mtu+14+4).*/
++#define MAX_ETH_FRAME_SIZE        1536
++
++/* Size of the Tx bounce buffers -- must be at least (rtdev->mtu+14+4). */
++#define TX_BUF_SIZE        MAX_ETH_FRAME_SIZE
++#define TX_BUF_TOT_LEN        (TX_BUF_SIZE * NUM_TX_DESC)
++
++/* PCI Tuning Parameters
++   Threshold is bytes transferred to chip before transmission starts. */
++#define TX_FIFO_THRESH 256        /* In bytes, rounded down to 32 byte units. */
++
++/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
++#define RX_FIFO_THRESH        7        /* Rx buffer level before first PCI xfer.  */
++#define RX_DMA_BURST        7        /* Maximum PCI burst, '6' is 1024 */
++#define TX_DMA_BURST        6        /* Maximum PCI burst, '6' is 1024 */
++#define TX_RETRY        8        /* 0-15.  retries = 16 + (TX_RETRY * 16) */
++
++/* Operational parameters that usually are not changed. */
++/* Time in jiffies before concluding the transmitter is hung. */
++#define TX_TIMEOUT  (6*HZ)
++
++
++enum {
++	HAS_MII_XCVR = 0x010000,
++	HAS_CHIP_XCVR = 0x020000,
++	HAS_LNK_CHNG = 0x040000,
++};
++
++#define RTL_MIN_IO_SIZE 0x80
++#define RTL8139B_IO_SIZE 256
++
++#define RTL8129_CAPS        HAS_MII_XCVR
++#define RTL8139_CAPS        HAS_CHIP_XCVR|HAS_LNK_CHNG
++
++typedef enum {
++	RTL8139 = 0,
++	RTL8139_CB,
++	SMC1211TX,
++	/*MPX5030,*/
++	DELTA8139,
++	ADDTRON8139,
++	DFE538TX,
++	DFE690TXD,
++	FE2000VX,
++	ALLIED8139,
++	RTL8129,
++} board_t;
++
++
++/* indexed by board_t, above */
++static struct {
++	const char *name;
++	u32 hw_flags;
++} board_info[] = {
++	{ "RealTek RTL8139", RTL8139_CAPS },
++	{ "RealTek RTL8129", RTL8129_CAPS },
++};
++
++
++static struct pci_device_id rtl8139_pci_tbl[] = {
++	{0x10ec, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x10ec, 0x8138, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1500, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x4033, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1186, 0x1300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1186, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x13d1, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1259, 0xa117, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1259, 0xa11e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x14ea, 0xab06, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x14ea, 0xab07, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x11db, 0x1234, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1432, 0x9130, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x02ac, 0x1012, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x018a, 0x0106, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x126c, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x1743, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++	{0x021b, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++
++#ifdef CONFIG_SH_SECUREEDGE5410
++	/* Bogus 8139 silicon reports 8129 without external PROM :-( */
++	{0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
++#endif
++#ifdef CONFIG_8139TOO_8129
++	{0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8129 },
++#endif
++
++	/* some crazy cards report invalid vendor ids like
++	 * 0x0001 here.  The other ids are valid and constant,
++	 * so we simply don't match on the main vendor id.
++	 */
++	{PCI_ANY_ID, 0x8139, 0x10ec, 0x8139, 0, 0, RTL8139 },
++	{PCI_ANY_ID, 0x8139, 0x1186, 0x1300, 0, 0, RTL8139 },
++	{PCI_ANY_ID, 0x8139, 0x13d1, 0xab06, 0, 0, RTL8139 },
++
++	{0,}
++};
++MODULE_DEVICE_TABLE (pci, rtl8139_pci_tbl);
++
++/* The rest of these values should never change. */
++
++/* Symbolic offsets to registers. */
++enum RTL8139_registers {
++	MAC0 = 0,                /* Ethernet hardware address. */
++	MAR0 = 8,                /* Multicast filter. */
++	TxStatus0 = 0x10,        /* Transmit status (Four 32bit registers). */
++	TxAddr0 = 0x20,                /* Tx descriptors (also four 32bit). */
++	RxBuf = 0x30,
++	ChipCmd = 0x37,
++	RxBufPtr = 0x38,
++	RxBufAddr = 0x3A,
++	IntrMask = 0x3C,
++	IntrStatus = 0x3E,
++	TxConfig = 0x40,
++	ChipVersion = 0x43,
++	RxConfig = 0x44,
++	Timer = 0x48,                /* A general-purpose counter. */
++	RxMissed = 0x4C,        /* 24 bits valid, write clears. */
++	Cfg9346 = 0x50,
++	Config0 = 0x51,
++	Config1 = 0x52,
++	FlashReg = 0x54,
++	MediaStatus = 0x58,
++	Config3 = 0x59,
++	Config4 = 0x5A,                /* absent on RTL-8139A */
++	HltClk = 0x5B,
++	MultiIntr = 0x5C,
++	TxSummary = 0x60,
++	BasicModeCtrl = 0x62,
++	BasicModeStatus = 0x64,
++	NWayAdvert = 0x66,
++	NWayLPAR = 0x68,
++	NWayExpansion = 0x6A,
++	/* Undocumented registers, but required for proper operation. */
++	FIFOTMS = 0x70,                /* FIFO Control and test. */
++	CSCR = 0x74,                /* Chip Status and Configuration Register. */
++	PARA78 = 0x78,
++	PARA7c = 0x7c,                /* Magic transceiver parameter register. */
++	Config5 = 0xD8,                /* absent on RTL-8139A */
++};
++
++enum ClearBitMasks {
++	MultiIntrClear = 0xF000,
++	ChipCmdClear = 0xE2,
++	Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1),
++};
++
++enum ChipCmdBits {
++	CmdReset = 0x10,
++	CmdRxEnb = 0x08,
++	CmdTxEnb = 0x04,
++	RxBufEmpty = 0x01,
++};
++
++/* Interrupt register bits, using my own meaningful names. */
++enum IntrStatusBits {
++	PCIErr = 0x8000,
++	PCSTimeout = 0x4000,
++	RxFIFOOver = 0x40,
++	RxUnderrun = 0x20,
++	RxOverflow = 0x10,
++	TxErr = 0x08,
++	TxOK = 0x04,
++	RxErr = 0x02,
++	RxOK = 0x01,
++
++	RxAckBits = RxFIFOOver | RxOverflow | RxOK,
++};
++
++enum TxStatusBits {
++	TxHostOwns = 0x2000,
++	TxUnderrun = 0x4000,
++	TxStatOK = 0x8000,
++	TxOutOfWindow = 0x20000000,
++	TxAborted = 0x40000000,
++	TxCarrierLost = 0x80000000,
++};
++enum RxStatusBits {
++	RxMulticast = 0x8000,
++	RxPhysical = 0x4000,
++	RxBroadcast = 0x2000,
++	RxBadSymbol = 0x0020,
++	RxRunt = 0x0010,
++	RxTooLong = 0x0008,
++	RxCRCErr = 0x0004,
++	RxBadAlign = 0x0002,
++	RxStatusOK = 0x0001,
++};
++
++/* Bits in RxConfig. */
++enum rx_mode_bits {
++	AcceptErr = 0x20,
++	AcceptRunt = 0x10,
++	AcceptBroadcast = 0x08,
++	AcceptMulticast = 0x04,
++	AcceptMyPhys = 0x02,
++	AcceptAllPhys = 0x01,
++};
++
++/* Bits in TxConfig. */
++enum tx_config_bits {
++
++	/* Interframe Gap Time. Only TxIFG96 doesn't violate IEEE 802.3 */
++	TxIFGShift = 24,
++	TxIFG84 = (0 << TxIFGShift),    /* 8.4us / 840ns (10 / 100Mbps) */
++	TxIFG88 = (1 << TxIFGShift),    /* 8.8us / 880ns (10 / 100Mbps) */
++	TxIFG92 = (2 << TxIFGShift),    /* 9.2us / 920ns (10 / 100Mbps) */
++	TxIFG96 = (3 << TxIFGShift),    /* 9.6us / 960ns (10 / 100Mbps) */
++
++	TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */
++	TxCRC = (1 << 16),        /* DISABLE appending CRC to end of Tx packets */
++	TxClearAbt = (1 << 0),        /* Clear abort (WO) */
++	TxDMAShift = 8,                /* DMA burst value (0-7) is shifted this many bits */
++	TxRetryShift = 4,        /* TXRR value (0-15) is shifted this many bits */
++
++	TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */
++};
++
++/* Bits in Config1 */
++enum Config1Bits {
++	Cfg1_PM_Enable = 0x01,
++	Cfg1_VPD_Enable = 0x02,
++	Cfg1_PIO = 0x04,
++	Cfg1_MMIO = 0x08,
++	LWAKE = 0x10,                /* not on 8139, 8139A */
++	Cfg1_Driver_Load = 0x20,
++	Cfg1_LED0 = 0x40,
++	Cfg1_LED1 = 0x80,
++	SLEEP = (1 << 1),        /* only on 8139, 8139A */
++	PWRDN = (1 << 0),        /* only on 8139, 8139A */
++};
++
++/* Bits in Config3 */
++enum Config3Bits {
++	Cfg3_FBtBEn    = (1 << 0), /* 1 = Fast Back to Back */
++	Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */
++	Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */
++	Cfg3_CardB_En  = (1 << 3), /* 1 = enable CardBus registers */
++	Cfg3_LinkUp    = (1 << 4), /* 1 = wake up on link up */
++	Cfg3_Magic     = (1 << 5), /* 1 = wake up on Magic Packet (tm) */
++	Cfg3_PARM_En   = (1 << 6), /* 0 = software can set twister parameters */
++	Cfg3_GNTSel    = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */
++};
++
++/* Bits in Config4 */
++enum Config4Bits {
++	LWPTN = (1 << 2),        /* not on 8139, 8139A */
++};
++
++/* Bits in Config5 */
++enum Config5Bits {
++	Cfg5_PME_STS     = (1 << 0), /* 1 = PCI reset resets PME_Status */
++	Cfg5_LANWake     = (1 << 1), /* 1 = enable LANWake signal */
++	Cfg5_LDPS        = (1 << 2), /* 0 = save power when link is down */
++	Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */
++	Cfg5_UWF         = (1 << 4), /* 1 = accept unicast wakeup frame */
++	Cfg5_MWF         = (1 << 5), /* 1 = accept multicast wakeup frame */
++	Cfg5_BWF         = (1 << 6), /* 1 = accept broadcast wakeup frame */
++};
++
++enum RxConfigBits {
++	/* rx fifo threshold */
++	RxCfgFIFOShift = 13,
++	RxCfgFIFONone = (7 << RxCfgFIFOShift),
++
++	/* Max DMA burst */
++	RxCfgDMAShift = 8,
++	RxCfgDMAUnlimited = (7 << RxCfgDMAShift),
++
++	/* rx ring buffer length */
++	RxCfgRcv8K = 0,
++	RxCfgRcv16K = (1 << 11),
++	RxCfgRcv32K = (1 << 12),
++	RxCfgRcv64K = (1 << 11) | (1 << 12),
++
++	/* Disable packet wrap at end of Rx buffer */
++	RxNoWrap = (1 << 7),
++};
++
++
++/* Twister tuning parameters from RealTek.
++   Completely undocumented, but required to tune bad links. */
++enum CSCRBits {
++	CSCR_LinkOKBit = 0x0400,
++	CSCR_LinkChangeBit = 0x0800,
++	CSCR_LinkStatusBits = 0x0f000,
++	CSCR_LinkDownOffCmd = 0x003c0,
++	CSCR_LinkDownCmd = 0x0f3c0,
++};
++
++
++enum Cfg9346Bits {
++	Cfg9346_Lock = 0x00,
++	Cfg9346_Unlock = 0xC0,
++};
++
++
++#define PARA78_default        0x78fa8388
++#define PARA7c_default        0xcb38de43        /* param[0][3] */
++#define PARA7c_xxx                0xcb38de43
++/*static const unsigned long param[4][4] = {
++	{0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43},
++	{0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
++	{0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
++	{0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83}
++};*/
++
++typedef enum {
++	CH_8139 = 0,
++	CH_8139_K,
++	CH_8139A,
++	CH_8139B,
++	CH_8130,
++	CH_8139C,
++} chip_t;
++
++enum chip_flags {
++	HasHltClk = (1 << 0),
++	HasLWake = (1 << 1),
++};
++
++
++/* directly indexed by chip_t, above */
++const static struct {
++	const char *name;
++	u8 version; /* from RTL8139C docs */
++	u32 flags;
++} rtl_chip_info[] = {
++	{ "RTL-8139",
++	  0x40,
++	  HasHltClk,
++	},
++
++	{ "RTL-8139 rev K",
++	  0x60,
++	  HasHltClk,
++	},
++
++	{ "RTL-8139A",
++	  0x70,
++	  HasHltClk, /* XXX undocumented? */
++	},
++
++	{ "RTL-8139A rev G",
++	  0x72,
++	  HasHltClk, /* XXX undocumented? */
++	},
++
++	{ "RTL-8139B",
++	  0x78,
++	  HasLWake,
++	},
++
++	{ "RTL-8130",
++	  0x7C,
++	  HasLWake,
++	},
++
++	{ "RTL-8139C",
++	  0x74,
++	  HasLWake,
++	},
++
++	{ "RTL-8100",
++	  0x7A,
++	  HasLWake,
++	 },
++
++	{ "RTL-8100B/8139D",
++	  0x75,
++	  HasHltClk /* XXX undocumented? */
++	  | HasLWake,
++	},
++
++	{ "RTL-8101",
++	  0x77,
++	  HasLWake,
++	},
++};
++
++struct rtl_extra_stats {
++	unsigned long early_rx;
++	unsigned long tx_buf_mapped;
++	unsigned long tx_timeouts;
++	unsigned long rx_lost_in_ring;
++};
++
++struct rtl8139_private {
++	void *mmio_addr;
++	int drv_flags;
++	struct pci_dev *pci_dev;
++	struct net_device_stats stats;
++	unsigned char *rx_ring;
++	unsigned int cur_rx;        /* Index into the Rx buffer of next Rx pkt. */
++	unsigned int tx_flag;
++	unsigned long cur_tx;
++	unsigned long dirty_tx;
++	unsigned char *tx_buf[NUM_TX_DESC];        /* Tx bounce buffers */
++	unsigned char *tx_bufs;        /* Tx bounce buffer region. */
++	dma_addr_t rx_ring_dma;
++	dma_addr_t tx_bufs_dma;
++	signed char phys[4];                /* MII device addresses. */
++	char twistie, twist_row, twist_col;        /* Twister tune state. */
++	unsigned int default_port:4;        /* Last rtdev->if_port value. */
++	unsigned int medialock:1;        /* Don't sense media type. */
++	rtdm_lock_t lock;
++	chip_t chipset;
++	pid_t thr_pid;
++	u32 rx_config;
++	struct rtl_extra_stats xstats;
++	int time_to_die;
++	struct mii_if_info mii;
++	rtdm_irq_t irq_handle;
++};
++
++MODULE_AUTHOR ("Jeff Garzik <jgarzik@mandrakesoft.com>");
++MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver");
++MODULE_LICENSE("GPL");
++
++static int read_eeprom (void *ioaddr, int location, int addr_len);
++static int mdio_read (struct rtnet_device *rtdev, int phy_id, int location);
++static void mdio_write (struct rtnet_device *rtdev, int phy_id, int location, int val);
++
++
++static int rtl8139_open (struct rtnet_device *rtdev);
++static int rtl8139_close (struct rtnet_device *rtdev);
++static int rtl8139_interrupt (rtdm_irq_t *irq_handle);
++static int rtl8139_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev);
++
++static int rtl8139_ioctl(struct rtnet_device *, struct ifreq *rq, int cmd);
++static struct net_device_stats *rtl8139_get_stats(struct rtnet_device*rtdev);
++
++static void rtl8139_init_ring (struct rtnet_device *rtdev);
++static void rtl8139_set_rx_mode (struct rtnet_device *rtdev);
++static void __set_rx_mode (struct rtnet_device *rtdev);
++static void rtl8139_hw_start (struct rtnet_device *rtdev);
++
++#ifdef USE_IO_OPS
++
++#define RTL_R8(reg)                inb (((unsigned long)ioaddr) + (reg))
++#define RTL_R16(reg)                inw (((unsigned long)ioaddr) + (reg))
++#define RTL_R32(reg)                inl (((unsigned long)ioaddr) + (reg))
++#define RTL_W8(reg, val8)        outb ((val8), ((unsigned long)ioaddr) + (reg))
++#define RTL_W16(reg, val16)        outw ((val16), ((unsigned long)ioaddr) + (reg))
++#define RTL_W32(reg, val32)        outl ((val32), ((unsigned long)ioaddr) + (reg))
++#define RTL_W8_F                RTL_W8
++#define RTL_W16_F                RTL_W16
++#define RTL_W32_F                RTL_W32
++#undef readb
++#undef readw
++#undef readl
++#undef writeb
++#undef writew
++#undef writel
++#define readb(addr) inb((unsigned long)(addr))
++#define readw(addr) inw((unsigned long)(addr))
++#define readl(addr) inl((unsigned long)(addr))
++#define writeb(val,addr) outb((val),(unsigned long)(addr))
++#define writew(val,addr) outw((val),(unsigned long)(addr))
++#define writel(val,addr) outl((val),(unsigned long)(addr))
++
++#else
++
++/* write MMIO register, with flush */
++/* Flush avoids rtl8139 bug w/ posted MMIO writes */
++#define RTL_W8_F(reg, val8)        do { writeb ((val8), ioaddr + (reg)); readb (ioaddr + (reg)); } while (0)
++#define RTL_W16_F(reg, val16)        do { writew ((val16), ioaddr + (reg)); readw (ioaddr + (reg)); } while (0)
++#define RTL_W32_F(reg, val32)        do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0)
++
++
++#define MMIO_FLUSH_AUDIT_COMPLETE 1
++#if MMIO_FLUSH_AUDIT_COMPLETE
++
++/* write MMIO register */
++#define RTL_W8(reg, val8)        writeb ((val8), ioaddr + (reg))
++#define RTL_W16(reg, val16)        writew ((val16), ioaddr + (reg))
++#define RTL_W32(reg, val32)        writel ((val32), ioaddr + (reg))
++
++#else
++
++/* write MMIO register, then flush */
++#define RTL_W8                RTL_W8_F
++#define RTL_W16                RTL_W16_F
++#define RTL_W32                RTL_W32_F
++
++#endif /* MMIO_FLUSH_AUDIT_COMPLETE */
++
++/* read MMIO register */
++#define RTL_R8(reg)                readb (ioaddr + (reg))
++#define RTL_R16(reg)                readw (ioaddr + (reg))
++#define RTL_R32(reg)                readl (ioaddr + (reg))
++
++#endif /* USE_IO_OPS */
++
++
++static const u16 rtl8139_intr_mask =
++	PCIErr | PCSTimeout | RxUnderrun | RxOverflow | RxFIFOOver |
++	TxErr | TxOK | RxErr | RxOK;
++
++static const unsigned int rtl8139_rx_config =
++	RxCfgRcv32K | RxNoWrap |
++	(RX_FIFO_THRESH << RxCfgFIFOShift) |
++	(RX_DMA_BURST << RxCfgDMAShift);
++
++static const unsigned int rtl8139_tx_config =
++	TxIFG96 | (TX_DMA_BURST << TxDMAShift) | (TX_RETRY << TxRetryShift);
++
++
++
++
++static void rtl8139_chip_reset (void *ioaddr)
++{
++	int i;
++
++	/* Soft reset the chip. */
++	RTL_W8 (ChipCmd, CmdReset);
++
++	/* Check that the chip has finished the reset. */
++	for (i = 1000; i > 0; i--) {
++		barrier();
++		if ((RTL_R8 (ChipCmd) & CmdReset) == 0)
++			break;
++		udelay (10);
++	}
++}
++
++
++static int rtl8139_init_board (struct pci_dev *pdev,
++					 struct rtnet_device **dev_out)
++{
++	void *ioaddr;
++	struct rtnet_device *rtdev;
++	struct rtl8139_private *tp;
++	u8 tmp8;
++	int rc;
++	unsigned int i;
++#ifdef USE_IO_OPS
++	u32 pio_start, pio_end, pio_flags, pio_len;
++#endif
++	unsigned long mmio_start, mmio_flags, mmio_len;
++	u32 tmp;
++
++
++	*dev_out = NULL;
++
++	/* dev and rtdev->priv zeroed in alloc_etherdev */
++	rtdev=rt_alloc_etherdev(sizeof (struct rtl8139_private),
++				rx_pool_size + NUM_TX_DESC);
++	if (rtdev==NULL) {
++		rtdm_printk (KERN_ERR PFX "%s: Unable to alloc new net device\n", pci_name(pdev));
++		return -ENOMEM;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++
++	rtdev->vers = RTDEV_VERS_2_0;
++	rtdev->sysbind = &pdev->dev;
++	tp = rtdev->priv;
++	tp->pci_dev = pdev;
++
++	/* enable device (incl. PCI PM wakeup and hotplug setup) */
++	rc = pci_enable_device (pdev);
++	if (rc)
++		goto err_out;
++
++	rc = pci_request_regions (pdev, "rtnet8139too");
++	if (rc)
++		goto err_out;
++
++	/* enable PCI bus-mastering */
++	pci_set_master (pdev);
++
++	mmio_start = pci_resource_start (pdev, 1);
++	mmio_flags = pci_resource_flags (pdev, 1);
++	mmio_len = pci_resource_len (pdev, 1);
++
++	/* set this immediately, we need to know before
++	 * we talk to the chip directly */
++#ifdef USE_IO_OPS
++	pio_start = pci_resource_start (pdev, 0);
++	pio_end = pci_resource_end (pdev, 0);
++	pio_flags = pci_resource_flags (pdev, 0);
++	pio_len = pci_resource_len (pdev, 0);
++
++	/* make sure PCI base addr 0 is PIO */
++	if (!(pio_flags & IORESOURCE_IO)) {
++		rtdm_printk (KERN_ERR PFX "%s: region #0 not a PIO resource, aborting\n", pci_name(pdev));
++		rc = -ENODEV;
++		goto err_out;
++	}
++	/* check for weird/broken PCI region reporting */
++	if (pio_len < RTL_MIN_IO_SIZE) {
++		rtdm_printk (KERN_ERR PFX "%s: Invalid PCI I/O region size(s), aborting\n", pci_name(pdev));
++		rc = -ENODEV;
++		goto err_out;
++	}
++#else
++	/* make sure PCI base addr 1 is MMIO */
++	if (!(mmio_flags & IORESOURCE_MEM)) {
++		rtdm_printk(KERN_ERR PFX "%s: region #1 not an MMIO resource, aborting\n", pci_name(pdev));
++		rc = -ENODEV;
++		goto err_out;
++	}
++	if (mmio_len < RTL_MIN_IO_SIZE) {
++		rtdm_printk(KERN_ERR PFX "%s: Invalid PCI mem region size(s), aborting\n", pci_name(pdev));
++		rc = -ENODEV;
++		goto err_out;
++	}
++#endif
++
++#ifdef USE_IO_OPS
++	ioaddr = (void *) pio_start;
++	rtdev->base_addr = pio_start;
++	tp->mmio_addr = ioaddr;
++#else
++	/* ioremap MMIO region */
++	ioaddr = ioremap (mmio_start, mmio_len);
++	if (ioaddr == NULL) {
++		rtdm_printk(KERN_ERR PFX "%s: cannot remap MMIO, aborting\n", pci_name(pdev));
++		rc = -EIO;
++		goto err_out;
++	}
++	rtdev->base_addr = (long) ioaddr;
++	tp->mmio_addr = ioaddr;
++#endif /* USE_IO_OPS */
++
++	/* Bring old chips out of low-power mode. */
++	RTL_W8 (HltClk, 'R');
++
++	/* check for missing/broken hardware */
++	if (RTL_R32 (TxConfig) == 0xFFFFFFFF) {
++		rtdm_printk(KERN_ERR PFX "%s: Chip not responding, ignoring board\n", pci_name(pdev));
++		rc = -EIO;
++		goto err_out;
++	}
++
++	/* identify chip attached to board */
++	tmp = RTL_R8 (ChipVersion);
++	for (i = 0; i < ARRAY_SIZE (rtl_chip_info); i++)
++		if (tmp == rtl_chip_info[i].version) {
++			tp->chipset = i;
++			goto match;
++		}
++
++	rtdm_printk("rt8139too: unknown chip version, assuming RTL-8139\n");
++	rtdm_printk("rt8139too: TxConfig = 0x%08x\n", RTL_R32 (TxConfig));
++
++	tp->chipset = 0;
++
++match:
++	if (tp->chipset >= CH_8139B) {
++		u8 new_tmp8 = tmp8 = RTL_R8 (Config1);
++		if ((rtl_chip_info[tp->chipset].flags & HasLWake) &&
++		    (tmp8 & LWAKE))
++			new_tmp8 &= ~LWAKE;
++		new_tmp8 |= Cfg1_PM_Enable;
++		if (new_tmp8 != tmp8) {
++			RTL_W8 (Cfg9346, Cfg9346_Unlock);
++			RTL_W8 (Config1, tmp8);
++			RTL_W8 (Cfg9346, Cfg9346_Lock);
++		}
++		if (rtl_chip_info[tp->chipset].flags & HasLWake) {
++			tmp8 = RTL_R8 (Config4);
++			if (tmp8 & LWPTN) {
++				RTL_W8 (Cfg9346, Cfg9346_Unlock);
++				RTL_W8 (Config4, tmp8 & ~LWPTN);
++				RTL_W8 (Cfg9346, Cfg9346_Lock);
++			}
++		}
++	} else {
++		tmp8 = RTL_R8 (Config1);
++		tmp8 &= ~(SLEEP | PWRDN);
++		RTL_W8 (Config1, tmp8);
++	}
++
++	rtl8139_chip_reset (ioaddr);
++
++	*dev_out = rtdev;
++	return 0;
++
++err_out:
++#ifndef USE_IO_OPS
++	if (tp->mmio_addr) iounmap (tp->mmio_addr);
++#endif /* !USE_IO_OPS */
++	/* it's ok to call this even if we have no regions to free */
++	pci_release_regions (pdev);
++	rtdev_free(rtdev);
++	pci_set_drvdata (pdev, NULL);
++
++	return rc;
++}
++
++
++
++
++static int rtl8139_init_one (struct pci_dev *pdev,
++				       const struct pci_device_id *ent)
++{
++	struct rtnet_device *rtdev = NULL;
++	struct rtl8139_private *tp;
++	int i, addr_len;
++	int option;
++	void *ioaddr;
++	static int board_idx = -1;
++
++	board_idx++;
++
++	if( cards[board_idx] == 0)
++		return -ENODEV;
++
++	/* when we're built into the kernel, the driver version message
++	 * is only printed if at least one 8139 board has been found
++	 */
++#ifndef MODULE
++	{
++		static int printed_version;
++		if (!printed_version++)
++			rtdm_printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
++	}
++#endif
++
++	if ((i=rtl8139_init_board (pdev, &rtdev)) < 0)
++		return i;
++
++
++	tp = rtdev->priv;
++	ioaddr = tp->mmio_addr;
++
++	addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6;
++	for (i = 0; i < 3; i++)
++		((u16 *) (rtdev->dev_addr))[i] =
++		    le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len));
++
++	/* The Rtl8139-specific entries in the device structure. */
++	rtdev->open = rtl8139_open;
++	rtdev->stop = rtl8139_close;
++	rtdev->hard_header = &rt_eth_header;
++	rtdev->hard_start_xmit = rtl8139_start_xmit;
++	rtdev->do_ioctl = rtl8139_ioctl;
++	rtdev->get_stats = rtl8139_get_stats;
++
++	/*rtdev->set_multicast_list = rtl8139_set_rx_mode; */
++	rtdev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
++
++	rtdev->irq = pdev->irq;
++
++	/* rtdev->priv/tp zeroed and aligned in init_etherdev */
++	tp = rtdev->priv;
++
++	/* note: tp->chipset set in rtl8139_init_board */
++	tp->drv_flags = board_info[ent->driver_data].hw_flags;
++	tp->mmio_addr = ioaddr;
++	rtdm_lock_init (&tp->lock);
++
++	if ( (i=rt_register_rtnetdev(rtdev)) )
++		goto err_out;
++
++	pci_set_drvdata (pdev, rtdev);
++
++	tp->phys[0] = 32;
++
++	/* The lower four bits are the media type. */
++	option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx];
++	if (option > 0) {
++		tp->mii.full_duplex = (option & 0x210) ? 1 : 0;
++		tp->default_port = option & 0xFF;
++		if (tp->default_port)
++			tp->medialock = 1;
++	}
++	if (tp->default_port) {
++		rtdm_printk(KERN_INFO "  Forcing %dMbps %s-duplex operation.\n",
++			    (option & 0x20 ? 100 : 10),
++			    (option & 0x10 ? "full" : "half"));
++		mdio_write(rtdev, tp->phys[0], 0,
++				   ((option & 0x20) ? 0x2000 : 0) |         /* 100Mbps? */
++				   ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */
++	}
++
++
++	/* Put the chip into low-power mode. */
++	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
++		RTL_W8 (HltClk, 'H');        /* 'R' would leave the clock running. */
++
++	return 0;
++
++
++err_out:
++#ifndef USE_IO_OPS
++	if (tp->mmio_addr) iounmap (tp->mmio_addr);
++#endif /* !USE_IO_OPS */
++	/* it's ok to call this even if we have no regions to free */
++	pci_release_regions (pdev);
++	rtdev_free(rtdev);
++	pci_set_drvdata (pdev, NULL);
++
++	return i;
++}
++
++
++static void rtl8139_remove_one (struct pci_dev *pdev)
++{
++	struct rtnet_device *rtdev = pci_get_drvdata(pdev);
++
++#ifndef USE_IO_OPS
++	struct rtl8139_private *tp = rtdev->priv;
++
++	if (tp->mmio_addr)
++		iounmap (tp->mmio_addr);
++#endif /* !USE_IO_OPS */
++
++	/* it's ok to call this even if we have no regions to free */
++	rt_unregister_rtnetdev(rtdev);
++	rt_rtdev_disconnect(rtdev);
++
++	pci_release_regions(pdev);
++	pci_set_drvdata(pdev, NULL);
++
++	rtdev_free(rtdev);
++}
++
++
++/* Serial EEPROM section. */
++
++/*  EEPROM_Ctrl bits. */
++#define EE_SHIFT_CLK        0x04        /* EEPROM shift clock. */
++#define EE_CS                        0x08        /* EEPROM chip select. */
++#define EE_DATA_WRITE        0x02        /* EEPROM chip data in. */
++#define EE_WRITE_0                0x00
++#define EE_WRITE_1                0x02
++#define EE_DATA_READ        0x01        /* EEPROM chip data out. */
++#define EE_ENB                        (0x80 | EE_CS)
++
++/* Delay between EEPROM clock transitions.
++   No extra delay is needed with 33Mhz PCI, but 66Mhz may change this.
++ */
++
++#define eeprom_delay()        readl(ee_addr)
++
++/* The EEPROM commands include the alway-set leading bit. */
++#define EE_WRITE_CMD        (5)
++#define EE_READ_CMD                (6)
++#define EE_ERASE_CMD        (7)
++
++static int read_eeprom (void *ioaddr, int location, int addr_len)
++{
++	int i;
++	unsigned retval = 0;
++	void *ee_addr = ioaddr + Cfg9346;
++	int read_cmd = location | (EE_READ_CMD << addr_len);
++
++	writeb (EE_ENB & ~EE_CS, ee_addr);
++	writeb (EE_ENB, ee_addr);
++	eeprom_delay ();
++
++	/* Shift the read command bits out. */
++	for (i = 4 + addr_len; i >= 0; i--) {
++		int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
++		writeb (EE_ENB | dataval, ee_addr);
++		eeprom_delay ();
++		writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
++		eeprom_delay ();
++	}
++	writeb (EE_ENB, ee_addr);
++	eeprom_delay ();
++
++	for (i = 16; i > 0; i--) {
++		writeb (EE_ENB | EE_SHIFT_CLK, ee_addr);
++		eeprom_delay ();
++		retval =
++		    (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 :
++				     0);
++		writeb (EE_ENB, ee_addr);
++		eeprom_delay ();
++	}
++
++	/* Terminate the EEPROM access. */
++	writeb (~EE_CS, ee_addr);
++	eeprom_delay ();
++
++	return retval;
++}
++
++/* MII serial management: mostly bogus for now. */
++/* Read and write the MII management registers using software-generated
++   serial MDIO protocol.
++   The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
++   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
++   "overclocking" issues. */
++#define MDIO_DIR                0x80
++#define MDIO_DATA_OUT        0x04
++#define MDIO_DATA_IN        0x02
++#define MDIO_CLK                0x01
++#define MDIO_WRITE0 (MDIO_DIR)
++#define MDIO_WRITE1 (MDIO_DIR | MDIO_DATA_OUT)
++
++#define mdio_delay(mdio_addr)        readb(mdio_addr)
++
++
++
++static char mii_2_8139_map[8] = {
++	BasicModeCtrl,
++	BasicModeStatus,
++	0,
++	0,
++	NWayAdvert,
++	NWayLPAR,
++	NWayExpansion,
++	0
++};
++
++#ifdef CONFIG_8139TOO_8129
++/* Syncronize the MII management interface by shifting 32 one bits out. */
++static void mdio_sync (void *mdio_addr)
++{
++	int i;
++
++	for (i = 32; i >= 0; i--) {
++		writeb (MDIO_WRITE1, mdio_addr);
++		mdio_delay (mdio_addr);
++		writeb (MDIO_WRITE1 | MDIO_CLK, mdio_addr);
++		mdio_delay (mdio_addr);
++	}
++}
++#endif
++
++
++static int mdio_read (struct rtnet_device *rtdev, int phy_id, int location)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	int retval = 0;
++#ifdef CONFIG_8139TOO_8129
++	void *mdio_addr = tp->mmio_addr + Config4;
++	int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location;
++	int i;
++#endif
++
++	if (phy_id > 31) {        /* Really a 8139.  Use internal registers. */
++		return location < 8 && mii_2_8139_map[location] ?
++		    readw (tp->mmio_addr + mii_2_8139_map[location]) : 0;
++	}
++
++#ifdef CONFIG_8139TOO_8129
++	mdio_sync (mdio_addr);
++	/* Shift the read command bits out. */
++	for (i = 15; i >= 0; i--) {
++		int dataval = (mii_cmd & (1 << i)) ? MDIO_DATA_OUT : 0;
++
++		writeb (MDIO_DIR | dataval, mdio_addr);
++		mdio_delay (mdio_addr);
++		writeb (MDIO_DIR | dataval | MDIO_CLK, mdio_addr);
++		mdio_delay (mdio_addr);
++	}
++
++	/* Read the two transition, 16 data, and wire-idle bits. */
++	for (i = 19; i > 0; i--) {
++		writeb (0, mdio_addr);
++		mdio_delay (mdio_addr);
++		retval = (retval << 1) | ((readb (mdio_addr) & MDIO_DATA_IN) ? 1 : 0);
++		writeb (MDIO_CLK, mdio_addr);
++		mdio_delay (mdio_addr);
++	}
++#endif
++
++	return (retval >> 1) & 0xffff;
++}
++
++
++static void mdio_write (struct rtnet_device *rtdev, int phy_id, int location,
++			int value)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++#ifdef CONFIG_8139TOO_8129
++	void *mdio_addr = tp->mmio_addr + Config4;
++	int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value;
++	int i;
++#endif
++
++	if (phy_id > 31) {        /* Really a 8139.  Use internal registers. */
++		void *ioaddr = tp->mmio_addr;
++		if (location == 0) {
++			RTL_W8 (Cfg9346, Cfg9346_Unlock);
++			RTL_W16 (BasicModeCtrl, value);
++			RTL_W8 (Cfg9346, Cfg9346_Lock);
++		} else if (location < 8 && mii_2_8139_map[location])
++			RTL_W16 (mii_2_8139_map[location], value);
++		return;
++	}
++
++#ifdef CONFIG_8139TOO_8129
++	mdio_sync (mdio_addr);
++
++	/* Shift the command bits out. */
++	for (i = 31; i >= 0; i--) {
++		int dataval =
++		    (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0;
++		writeb (dataval, mdio_addr);
++		mdio_delay (mdio_addr);
++		writeb (dataval | MDIO_CLK, mdio_addr);
++		mdio_delay (mdio_addr);
++	}
++	/* Clear out extra bits. */
++	for (i = 2; i > 0; i--) {
++		writeb (0, mdio_addr);
++		mdio_delay (mdio_addr);
++		writeb (MDIO_CLK, mdio_addr);
++		mdio_delay (mdio_addr);
++	}
++#endif
++}
++
++static int rtl8139_open (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	int retval;
++
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	retval = rtdm_irq_request(&tp->irq_handle, rtdev->irq,
++				  rtl8139_interrupt, RTDM_IRQTYPE_SHARED,
++				  rtdev->name, rtdev);
++	if (retval)
++		return retval;
++
++	tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN, &tp->tx_bufs_dma);
++	tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN, &tp->rx_ring_dma);
++
++	if (tp->tx_bufs == NULL || tp->rx_ring == NULL) {
++		rtdm_irq_free(&tp->irq_handle);
++		if (tp->tx_bufs)
++			pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, tp->tx_bufs, tp->tx_bufs_dma);
++		if (tp->rx_ring)
++			pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, tp->rx_ring, tp->rx_ring_dma);
++
++		return -ENOMEM;
++	}
++	/* FIXME: create wrapper for duplex_lock vs. force_media
++	   tp->mii.full_duplex = tp->mii.duplex_lock; */
++	tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000;
++	tp->twistie = 1;
++	tp->time_to_die = 0;
++
++	rtl8139_init_ring (rtdev);
++	rtl8139_hw_start (rtdev);
++
++	return 0;
++}
++
++
++static void rtl_check_media (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	u16 mii_lpa;
++
++	if (tp->phys[0] < 0)
++		return;
++
++	mii_lpa = mdio_read(rtdev, tp->phys[0], MII_LPA);
++	if (mii_lpa == 0xffff)
++		return;
++
++	tp->mii.full_duplex = (mii_lpa & LPA_100FULL) == LPA_100FULL ||
++		(mii_lpa & 0x00C0) == LPA_10FULL;
++}
++
++
++/* Start the hardware at open or resume. */
++static void rtl8139_hw_start (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	void *ioaddr = tp->mmio_addr;
++	u32 i;
++	u8 tmp;
++
++	/* Bring old chips out of low-power mode. */
++	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
++		RTL_W8 (HltClk, 'R');
++
++	rtl8139_chip_reset(ioaddr);
++
++	/* unlock Config[01234] and BMCR register writes */
++	RTL_W8_F (Cfg9346, Cfg9346_Unlock);
++	/* Restore our idea of the MAC address. */
++	RTL_W32_F (MAC0 + 0, cpu_to_le32 (*(u32 *) (rtdev->dev_addr + 0)));
++	RTL_W32_F (MAC0 + 4, cpu_to_le32 (*(u32 *) (rtdev->dev_addr + 4)));
++
++	tp->cur_rx = 0;
++
++	/* init Rx ring buffer DMA address */
++	RTL_W32_F (RxBuf, tp->rx_ring_dma);
++
++	/* Must enable Tx/Rx before setting transfer thresholds! */
++	RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
++
++	tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys;
++	RTL_W32 (RxConfig, tp->rx_config);
++
++	/* Check this value: the documentation for IFG contradicts ifself. */
++	RTL_W32 (TxConfig, rtl8139_tx_config);
++
++	rtl_check_media (rtdev);
++
++	if (tp->chipset >= CH_8139B) {
++		/* Disable magic packet scanning, which is enabled
++		 * when PM is enabled in Config1.  It can be reenabled
++		 * via ETHTOOL_SWOL if desired.  */
++		RTL_W8 (Config3, RTL_R8 (Config3) & ~Cfg3_Magic);
++	}
++
++	/* Lock Config[01234] and BMCR register writes */
++	RTL_W8 (Cfg9346, Cfg9346_Lock);
++
++	/* init Tx buffer DMA addresses */
++	for (i = 0; i < NUM_TX_DESC; i++)
++		RTL_W32_F (TxAddr0 + (i * 4), tp->tx_bufs_dma + (tp->tx_buf[i] - tp->tx_bufs));
++
++	RTL_W32 (RxMissed, 0);
++
++	rtl8139_set_rx_mode (rtdev);
++
++	/* no early-rx interrupts */
++	RTL_W16 (MultiIntr, RTL_R16 (MultiIntr) & MultiIntrClear);
++
++	/* make sure RxTx has started */
++	tmp = RTL_R8 (ChipCmd);
++	if ((!(tmp & CmdRxEnb)) || (!(tmp & CmdTxEnb)))
++		RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
++
++	/* Enable all known interrupts by setting the interrupt mask. */
++	RTL_W16 (IntrMask, rtl8139_intr_mask);
++
++	rtnetif_start_queue (rtdev);
++}
++
++
++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
++static void rtl8139_init_ring (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	int i;
++
++	tp->cur_rx = 0;
++	tp->cur_tx = 0;
++	tp->dirty_tx = 0;
++
++	for (i = 0; i < NUM_TX_DESC; i++)
++		tp->tx_buf[i] = &tp->tx_bufs[i * TX_BUF_SIZE];
++}
++
++
++static void rtl8139_tx_clear (struct rtl8139_private *tp)
++{
++	tp->cur_tx = 0;
++	tp->dirty_tx = 0;
++
++	/* XXX account for unsent Tx packets in tp->stats.tx_dropped */
++}
++
++
++
++static int rtl8139_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++
++	void *ioaddr = tp->mmio_addr;
++	unsigned int entry;
++	unsigned int len = skb->len;
++	rtdm_lockctx_t context;
++
++	/* Calculate the next Tx descriptor entry. */
++	entry = tp->cur_tx % NUM_TX_DESC;
++
++	if (likely(len < TX_BUF_SIZE)) {
++		if (unlikely(skb->xmit_stamp != NULL)) {
++			rtdm_lock_irqsave(context);
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++						       *skb->xmit_stamp);
++			/* typically, we are only copying a few bytes here */
++			rtskb_copy_and_csum_dev(skb, tp->tx_buf[entry]);
++		} else {
++			/* copy larger packets outside the lock */
++			rtskb_copy_and_csum_dev(skb, tp->tx_buf[entry]);
++			rtdm_lock_irqsave(context);
++		}
++	} else {
++		dev_kfree_rtskb(skb);
++		tp->stats.tx_dropped++;
++		return 0;
++	}
++
++
++	/* Note: the chip doesn't have auto-pad! */
++	rtdm_lock_get(&tp->lock);
++	RTL_W32_F (TxStatus0 + (entry * sizeof (u32)), tp->tx_flag | max(len, (unsigned int)ETH_ZLEN));
++	tp->cur_tx++;
++	wmb();
++	if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx)
++		rtnetif_stop_queue (rtdev);
++	rtdm_lock_put_irqrestore(&tp->lock, context);
++
++	dev_kfree_rtskb(skb);
++
++#ifdef DEBUG
++	rtdm_printk ("%s: Queued Tx packet size %u to slot %d.\n", rtdev->name, len, entry);
++#endif
++	return 0;
++}
++
++static int rtl8139_ioctl(struct rtnet_device *rtdev, struct ifreq *ifr, int cmd)
++{
++    struct rtl8139_private *tp = rtdev->priv;
++    void *ioaddr = tp->mmio_addr;
++    int nReturn = 0;
++    struct ethtool_value *value;
++
++    switch (cmd) {
++	case SIOCETHTOOL:
++	    /* TODO: user-safe parameter access, most probably one layer higher */
++	    value = (struct ethtool_value *)ifr->ifr_data;
++	    if (value->cmd == ETHTOOL_GLINK)
++	    {
++		if (RTL_R16(CSCR) & CSCR_LinkOKBit)
++		    value->data = 1;
++		else
++		    value->data = 0;
++	    }
++	    break;
++
++	default:
++	    nReturn = -EOPNOTSUPP;
++	    break;
++    }
++    return nReturn;
++}
++
++static struct net_device_stats *rtl8139_get_stats(struct rtnet_device*rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	return &tp->stats;
++}
++
++static void rtl8139_tx_interrupt (struct rtnet_device *rtdev,
++				  struct rtl8139_private *tp,
++				  void *ioaddr)
++{
++	unsigned long dirty_tx, tx_left;
++
++	dirty_tx = tp->dirty_tx;
++	tx_left = tp->cur_tx - dirty_tx;
++
++	while (tx_left > 0) {
++		int entry = dirty_tx % NUM_TX_DESC;
++		int txstatus;
++
++		txstatus = RTL_R32 (TxStatus0 + (entry * sizeof (u32)));
++
++		if (!(txstatus & (TxStatOK | TxUnderrun | TxAborted)))
++			break;        /* It still hasn't been Txed */
++
++		/* Note: TxCarrierLost is always asserted at 100mbps. */
++		if (txstatus & (TxOutOfWindow | TxAborted)) {
++			/* There was an major error, log it. */
++			rtdm_printk("%s: Transmit error, Tx status %8.8x.\n",
++				    rtdev->name, txstatus);
++			tp->stats.tx_errors++;
++			if (txstatus & TxAborted) {
++				tp->stats.tx_aborted_errors++;
++				RTL_W32 (TxConfig, TxClearAbt);
++				RTL_W16 (IntrStatus, TxErr);
++				wmb();
++			}
++			if (txstatus & TxCarrierLost)
++				tp->stats.tx_carrier_errors++;
++			if (txstatus & TxOutOfWindow)
++				tp->stats.tx_window_errors++;
++#ifdef ETHER_STATS
++			if ((txstatus & 0x0f000000) == 0x0f000000)
++				tp->stats.collisions16++;
++#endif
++		} else {
++			if (txstatus & TxUnderrun) {
++				/* Add 64 to the Tx FIFO threshold. */
++				if (tp->tx_flag < 0x00300000)
++					tp->tx_flag += 0x00020000;
++				tp->stats.tx_fifo_errors++;
++			}
++			tp->stats.collisions += (txstatus >> 24) & 15;
++			tp->stats.tx_bytes += txstatus & 0x7ff;
++			tp->stats.tx_packets++;
++		}
++
++		dirty_tx++;
++		tx_left--;
++	}
++
++	/* only wake the queue if we did work, and the queue is stopped */
++	if (tp->dirty_tx != dirty_tx) {
++		tp->dirty_tx = dirty_tx;
++		mb();
++		if (rtnetif_queue_stopped (rtdev))
++			rtnetif_wake_queue (rtdev);
++	}
++}
++
++
++/* TODO: clean this up!  Rx reset need not be this intensive */
++static void rtl8139_rx_err
++(u32 rx_status, struct rtnet_device *rtdev, struct rtl8139_private *tp, void *ioaddr)
++{
++/*        u8 tmp8;
++#ifndef CONFIG_8139_NEW_RX_RESET
++	int tmp_work;
++#endif */
++
++	/* RTnet-TODO: We really need an error manager to handle such issues... */
++	rtdm_printk("%s: FATAL - Ethernet frame had errors, status %8.8x.\n",
++		    rtdev->name, rx_status);
++}
++
++
++static void rtl8139_rx_interrupt (struct rtnet_device *rtdev,
++				  struct rtl8139_private *tp, void *ioaddr,
++				  nanosecs_abs_t *time_stamp)
++{
++	unsigned char *rx_ring;
++	u16 cur_rx;
++
++	rx_ring = tp->rx_ring;
++	cur_rx = tp->cur_rx;
++
++	while ((RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {
++		int ring_offset = cur_rx % RX_BUF_LEN;
++		u32 rx_status;
++		unsigned int rx_size;
++		unsigned int pkt_size;
++		struct rtskb *skb;
++
++		rmb();
++
++		/* read size+status of next frame from DMA ring buffer */
++		rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
++		rx_size = rx_status >> 16;
++		pkt_size = rx_size - 4;
++
++		/* Packet copy from FIFO still in progress.
++		 * Theoretically, this should never happen
++		 * since EarlyRx is disabled.
++		 */
++		if (rx_size == 0xfff0) {
++			tp->xstats.early_rx++;
++			break;
++		}
++
++		/* If Rx err or invalid rx_size/rx_status received
++		 * (which happens if we get lost in the ring),
++		 * Rx process gets reset, so we abort any further
++		 * Rx processing.
++		 */
++		if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
++		    (rx_size < 8) ||
++		    (!(rx_status & RxStatusOK))) {
++			rtl8139_rx_err (rx_status, rtdev, tp, ioaddr);
++			return;
++		}
++
++		/* Malloc up new buffer, compatible with net-2e. */
++		/* Omit the four octet CRC from the length. */
++
++		/* TODO: consider allocating skb's outside of
++		 * interrupt context, both to speed interrupt processing,
++		 * and also to reduce the chances of having to
++		 * drop packets here under memory pressure.
++		 */
++
++		skb = rtnetdev_alloc_rtskb(rtdev, pkt_size + 2);
++		if (skb) {
++			skb->time_stamp = *time_stamp;
++			rtskb_reserve (skb, 2);        /* 16 byte align the IP fields. */
++
++
++			/* eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); */
++			memcpy (skb->data, &rx_ring[ring_offset + 4], pkt_size);
++			rtskb_put (skb, pkt_size);
++			skb->protocol = rt_eth_type_trans (skb, rtdev);
++			rtnetif_rx (skb);
++			tp->stats.rx_bytes += pkt_size;
++			tp->stats.rx_packets++;
++		} else {
++			rtdm_printk (KERN_WARNING"%s: Memory squeeze, dropping packet.\n", rtdev->name);
++			tp->stats.rx_dropped++;
++		}
++
++		cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
++		RTL_W16 (RxBufPtr, cur_rx - 16);
++
++		if (RTL_R16 (IntrStatus) & RxAckBits)
++			RTL_W16_F (IntrStatus, RxAckBits);
++	}
++
++	tp->cur_rx = cur_rx;
++}
++
++
++static void rtl8139_weird_interrupt (struct rtnet_device *rtdev,
++				     struct rtl8139_private *tp,
++				     void *ioaddr,
++				     int status, int link_changed)
++{
++	rtdm_printk ("%s: Abnormal interrupt, status %8.8x.\n",
++		      rtdev->name, status);
++
++	/* Update the error count. */
++	tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
++	RTL_W32 (RxMissed, 0);
++
++	if ((status & RxUnderrun) && link_changed && (tp->drv_flags & HAS_LNK_CHNG)) {
++		/* Really link-change on new chips. */
++		status &= ~RxUnderrun;
++	}
++
++	/* XXX along with rtl8139_rx_err, are we double-counting errors? */
++	if (status &
++	    (RxUnderrun | RxOverflow | RxErr | RxFIFOOver))
++		tp->stats.rx_errors++;
++
++	if (status & PCSTimeout)
++		tp->stats.rx_length_errors++;
++
++	if (status & (RxUnderrun | RxFIFOOver))
++		tp->stats.rx_fifo_errors++;
++
++	if (status & PCIErr) {
++		u16 pci_cmd_status;
++		pci_read_config_word (tp->pci_dev, PCI_STATUS, &pci_cmd_status);
++		pci_write_config_word (tp->pci_dev, PCI_STATUS, pci_cmd_status);
++
++		rtdm_printk (KERN_ERR "%s: PCI Bus error %4.4x.\n", rtdev->name, pci_cmd_status);
++	}
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++static int rtl8139_interrupt(rtdm_irq_t *irq_handle)
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct rtl8139_private *tp = rtdev->priv;
++	void *ioaddr = tp->mmio_addr;
++	int ackstat;
++	int status;
++	int link_changed = 0; /* avoid bogus "uninit" warning */
++	int saved_status = 0;
++	int ret = RTDM_IRQ_NONE;
++
++	rtdm_lock_get(&tp->lock);
++
++	status = RTL_R16(IntrStatus);
++
++	/* h/w no longer present (hotplug?) or major error, bail */
++	if (unlikely(status == 0xFFFF) || unlikely(!(status & rtl8139_intr_mask)))
++		goto out;
++
++	ret = RTDM_IRQ_HANDLED;
++
++	/* close possible race with dev_close */
++	if (unlikely(!rtnetif_running(rtdev))) {
++		RTL_W16(IntrMask, 0);
++		goto out;
++	}
++
++	/* Acknowledge all of the current interrupt sources ASAP, but
++	   first get an additional status bit from CSCR. */
++	if (unlikely(status & RxUnderrun))
++		link_changed = RTL_R16(CSCR) & CSCR_LinkChangeBit;
++
++	/* The chip takes special action when we clear RxAckBits,
++	 * so we clear them later in rtl8139_rx_interrupt
++	 */
++	ackstat = status & ~(RxAckBits | TxErr);
++	if (ackstat)
++		RTL_W16(IntrStatus, ackstat);
++
++	if (status & RxAckBits) {
++		saved_status |= RxAckBits;
++		rtl8139_rx_interrupt(rtdev, tp, ioaddr, &time_stamp);
++	}
++
++	/* Check uncommon events with one test. */
++	if (unlikely(status & (PCIErr | PCSTimeout | RxUnderrun | RxErr)))
++		rtl8139_weird_interrupt(rtdev, tp, ioaddr, status, link_changed);
++
++	if (status & (TxOK |TxErr)) {
++		rtl8139_tx_interrupt(rtdev, tp, ioaddr);
++		if (status & TxErr) {
++			RTL_W16(IntrStatus, TxErr);
++			saved_status |= TxErr;
++		}
++	}
++ out:
++	rtdm_lock_put(&tp->lock);
++
++	if (saved_status & RxAckBits)
++		rt_mark_stack_mgr(rtdev);
++
++	if (saved_status & TxErr)
++		rtnetif_err_tx(rtdev);
++
++	return ret;
++}
++
++
++static int rtl8139_close (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	void *ioaddr = tp->mmio_addr;
++	rtdm_lockctx_t context;
++
++	printk ("%s: Shutting down ethercard, status was 0x%4.4x.\n", rtdev->name, RTL_R16 (IntrStatus));
++
++	rtnetif_stop_queue (rtdev);
++
++	rtdm_lock_get_irqsave (&tp->lock, context);
++	/* Stop the chip's Tx and Rx DMA processes. */
++	RTL_W8 (ChipCmd, 0);
++	/* Disable interrupts by clearing the interrupt mask. */
++	RTL_W16 (IntrMask, 0);
++	/* Update the error counts. */
++	tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
++	RTL_W32 (RxMissed, 0);
++	rtdm_lock_put_irqrestore (&tp->lock, context);
++
++	rtdm_irq_free(&tp->irq_handle);
++
++	rt_stack_disconnect(rtdev);
++
++	rtl8139_tx_clear (tp);
++
++	pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, tp->rx_ring, tp->rx_ring_dma);
++	pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, tp->tx_bufs, tp->tx_bufs_dma);
++	tp->rx_ring = NULL;
++	tp->tx_bufs = NULL;
++
++	/* Green! Put the chip in low-power mode. */
++	RTL_W8 (Cfg9346, Cfg9346_Unlock);
++
++	if (rtl_chip_info[tp->chipset].flags & HasHltClk)
++		RTL_W8 (HltClk, 'H');        /* 'R' would leave the clock running. */
++
++	return 0;
++}
++
++
++
++/* Set or clear the multicast filter for this adaptor.
++   This routine is not state sensitive and need not be SMP locked. */
++static void __set_rx_mode (struct rtnet_device *rtdev)
++{
++	struct rtl8139_private *tp = rtdev->priv;
++	void *ioaddr = tp->mmio_addr;
++	u32 mc_filter[2];        /* Multicast hash filter */
++	int rx_mode;
++	u32 tmp;
++
++#ifdef DEBUG
++	rtdm_printk ("%s:   rtl8139_set_rx_mode(%4.4x) done -- Rx config %8.8lx.\n",
++			rtdev->name, rtdev->flags, RTL_R32 (RxConfig));
++#endif
++
++	/* Note: do not reorder, GCC is clever about common statements. */
++	if (rtdev->flags & IFF_PROMISC) {
++		/* Unconditionally log net taps. */
++		/*printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", rtdev->name);*/
++		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys | AcceptAllPhys;
++		mc_filter[1] = mc_filter[0] = 0xffffffff;
++	} else if (rtdev->flags & IFF_ALLMULTI) {
++		/* Too many to filter perfectly -- accept all multicasts. */
++		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
++		mc_filter[1] = mc_filter[0] = 0xffffffff;
++	} else {
++		rx_mode = AcceptBroadcast | AcceptMyPhys;
++		mc_filter[1] = mc_filter[0] = 0;
++	}
++
++	/* We can safely update without stopping the chip. */
++	tmp = rtl8139_rx_config | rx_mode;
++	if (tp->rx_config != tmp) {
++		RTL_W32_F (RxConfig, tmp);
++		tp->rx_config = tmp;
++	}
++	RTL_W32_F (MAR0 + 0, mc_filter[0]);
++	RTL_W32_F (MAR0 + 4, mc_filter[1]);
++}
++
++static void rtl8139_set_rx_mode (struct rtnet_device *rtdev)
++{
++	rtdm_lockctx_t context;
++	struct rtl8139_private *tp = rtdev->priv;
++
++	rtdm_lock_get_irqsave (&tp->lock, context);
++	__set_rx_mode(rtdev);
++	rtdm_lock_put_irqrestore (&tp->lock, context);
++}
++
++static struct pci_driver rtl8139_pci_driver = {
++	name:                   DRV_NAME,
++	id_table:               rtl8139_pci_tbl,
++	probe:                  rtl8139_init_one,
++	remove:                 rtl8139_remove_one,
++	suspend:                NULL,
++	resume:                 NULL,
++};
++
++
++static int __init rtl8139_init_module (void)
++{
++	/* when we're a module, we always print a version message,
++	 * even if no 8139 board is found.
++	 */
++
++#ifdef MODULE
++	printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
++#endif
++
++	return pci_register_driver (&rtl8139_pci_driver);
++}
++
++
++static void __exit rtl8139_cleanup_module (void)
++{
++	pci_unregister_driver (&rtl8139_pci_driver);
++}
++
++
++module_init(rtl8139_init_module);
++module_exit(rtl8139_cleanup_module);
+--- linux/drivers/xenomai/net/drivers/igb/e1000_i210.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_i210.h	2021-04-07 16:01:27.533633729 +0800
+@@ -0,0 +1,93 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_I210_H_
++#define _E1000_I210_H_
++
++s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
++void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
++s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
++s32 igb_read_invm_version(struct e1000_hw *hw,
++			  struct e1000_fw_version *invm_ver);
++s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data);
++s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data);
++s32 igb_init_nvm_params_i210(struct e1000_hw *hw);
++bool igb_get_flash_presence_i210(struct e1000_hw *hw);
++s32 igb_pll_workaround_i210(struct e1000_hw *hw);
++
++#define E1000_STM_OPCODE		0xDB00
++#define E1000_EEPROM_FLASH_SIZE_WORD	0x11
++
++#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \
++	(u8)((invm_dword) & 0x7)
++#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \
++	(u8)(((invm_dword) & 0x0000FE00) >> 9)
++#define INVM_DWORD_TO_WORD_DATA(invm_dword) \
++	(u16)(((invm_dword) & 0xFFFF0000) >> 16)
++
++enum E1000_INVM_STRUCTURE_TYPE {
++	E1000_INVM_UNINITIALIZED_STRUCTURE		= 0x00,
++	E1000_INVM_WORD_AUTOLOAD_STRUCTURE		= 0x01,
++	E1000_INVM_CSR_AUTOLOAD_STRUCTURE		= 0x02,
++	E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE	= 0x03,
++	E1000_INVM_RSA_KEY_SHA256_STRUCTURE		= 0x04,
++	E1000_INVM_INVALIDATED_STRUCTURE		= 0x0F,
++};
++
++#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS	8
++#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS	1
++#define E1000_INVM_ULT_BYTES_SIZE			8
++#define E1000_INVM_RECORD_SIZE_IN_BYTES			4
++#define E1000_INVM_VER_FIELD_ONE			0x1FF8
++#define E1000_INVM_VER_FIELD_TWO			0x7FE000
++#define E1000_INVM_IMGTYPE_FIELD			0x1F800000
++
++#define E1000_INVM_MAJOR_MASK		0x3F0
++#define E1000_INVM_MINOR_MASK		0xF
++#define E1000_INVM_MAJOR_SHIFT		4
++
++#define ID_LED_DEFAULT_I210		((ID_LED_OFF1_ON2  << 8) | \
++					 (ID_LED_DEF1_DEF2 <<  4) | \
++					 (ID_LED_OFF1_OFF2))
++#define ID_LED_DEFAULT_I210_SERDES	((ID_LED_DEF1_DEF2 << 8) | \
++					 (ID_LED_DEF1_DEF2 <<  4) | \
++					 (ID_LED_OFF1_ON2))
++
++/* NVM offset defaults for i211 device */
++#define NVM_INIT_CTRL_2_DEFAULT_I211	0X7243
++#define NVM_INIT_CTRL_4_DEFAULT_I211	0x00C1
++#define NVM_LED_1_CFG_DEFAULT_I211	0x0184
++#define NVM_LED_0_2_CFG_DEFAULT_I211	0x200C
++
++/* PLL Defines */
++#define E1000_PCI_PMCSR			0x44
++#define E1000_PCI_PMCSR_D3		0x03
++#define E1000_MAX_PLL_TRIES		5
++#define E1000_PHY_PLL_UNCONF		0xFF
++#define E1000_PHY_PLL_FREQ_PAGE		0xFC0000
++#define E1000_PHY_PLL_FREQ_REG		0x000E
++#define E1000_INVM_DEFAULT_AL		0x202F
++#define E1000_INVM_AUTOLOAD		0x0A
++#define E1000_INVM_PLL_WO_VAL		0x0010
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_regs.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_regs.h	2021-04-07 16:01:27.528633736 +0800
+@@ -0,0 +1,427 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_REGS_H_
++#define _E1000_REGS_H_
++
++#define E1000_CTRL     0x00000  /* Device Control - RW */
++#define E1000_STATUS   0x00008  /* Device Status - RO */
++#define E1000_EECD     0x00010  /* EEPROM/Flash Control - RW */
++#define E1000_EERD     0x00014  /* EEPROM Read - RW */
++#define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
++#define E1000_MDIC     0x00020  /* MDI Control - RW */
++#define E1000_MDICNFG  0x00E04  /* MDI Config - RW */
++#define E1000_SCTL     0x00024  /* SerDes Control - RW */
++#define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
++#define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
++#define E1000_FCT      0x00030  /* Flow Control Type - RW */
++#define E1000_CONNSW   0x00034  /* Copper/Fiber switch control - RW */
++#define E1000_VET      0x00038  /* VLAN Ether Type - RW */
++#define E1000_TSSDP    0x0003C  /* Time Sync SDP Configuration Register - RW */
++#define E1000_ICR      0x000C0  /* Interrupt Cause Read - R/clr */
++#define E1000_ITR      0x000C4  /* Interrupt Throttling Rate - RW */
++#define E1000_ICS      0x000C8  /* Interrupt Cause Set - WO */
++#define E1000_IMS      0x000D0  /* Interrupt Mask Set - RW */
++#define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
++#define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
++#define E1000_RCTL     0x00100  /* RX Control - RW */
++#define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
++#define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
++#define E1000_EICR     0x01580  /* Ext. Interrupt Cause Read - R/clr */
++#define E1000_EITR(_n) (0x01680 + (0x4 * (_n)))
++#define E1000_EICS     0x01520  /* Ext. Interrupt Cause Set - W0 */
++#define E1000_EIMS     0x01524  /* Ext. Interrupt Mask Set/Read - RW */
++#define E1000_EIMC     0x01528  /* Ext. Interrupt Mask Clear - WO */
++#define E1000_EIAC     0x0152C  /* Ext. Interrupt Auto Clear - RW */
++#define E1000_EIAM     0x01530  /* Ext. Interrupt Ack Auto Clear Mask - RW */
++#define E1000_GPIE     0x01514  /* General Purpose Interrupt Enable - RW */
++#define E1000_IVAR0    0x01700  /* Interrupt Vector Allocation (array) - RW */
++#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */
++#define E1000_TCTL     0x00400  /* TX Control - RW */
++#define E1000_TCTL_EXT 0x00404  /* Extended TX Control - RW */
++#define E1000_TIPG     0x00410  /* TX Inter-packet gap -RW */
++#define E1000_AIT      0x00458  /* Adaptive Interframe Spacing Throttle - RW */
++#define E1000_LEDCTL   0x00E00  /* LED Control - RW */
++#define E1000_LEDMUX   0x08130  /* LED MUX Control */
++#define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
++#define E1000_PBS      0x01008  /* Packet Buffer Size */
++#define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
++#define E1000_EEARBC_I210 0x12024  /* EEPROM Auto Read Bus Control */
++#define E1000_EEWR     0x0102C  /* EEPROM Write Register - RW */
++#define E1000_I2CCMD   0x01028  /* SFPI2C Command Register - RW */
++#define E1000_FRTIMER  0x01048  /* Free Running Timer - RW */
++#define E1000_TCPTIMER 0x0104C  /* TCP Timer - RW */
++#define E1000_FCRTL    0x02160  /* Flow Control Receive Threshold Low - RW */
++#define E1000_FCRTH    0x02168  /* Flow Control Receive Threshold High - RW */
++#define E1000_FCRTV    0x02460  /* Flow Control Refresh Timer Value - RW */
++#define E1000_I2CPARAMS        0x0102C /* SFPI2C Parameters Register - RW */
++#define E1000_I2CBB_EN      0x00000100  /* I2C - Bit Bang Enable */
++#define E1000_I2C_CLK_OUT   0x00000200  /* I2C- Clock */
++#define E1000_I2C_DATA_OUT  0x00000400  /* I2C- Data Out */
++#define E1000_I2C_DATA_OE_N 0x00000800  /* I2C- Data Output Enable */
++#define E1000_I2C_DATA_IN   0x00001000  /* I2C- Data In */
++#define E1000_I2C_CLK_OE_N  0x00002000  /* I2C- Clock Output Enable */
++#define E1000_I2C_CLK_IN    0x00004000  /* I2C- Clock In */
++#define E1000_MPHY_ADDR_CTRL	0x0024 /* GbE MPHY Address Control */
++#define E1000_MPHY_DATA		0x0E10 /* GBE MPHY Data */
++#define E1000_MPHY_STAT		0x0E0C /* GBE MPHY Statistics */
++
++/* IEEE 1588 TIMESYNCH */
++#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */
++#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */
++#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */
++#define E1000_RXSTMPL    0x0B624 /* Rx timestamp Low - RO */
++#define E1000_RXSTMPH    0x0B628 /* Rx timestamp High - RO */
++#define E1000_RXSATRL    0x0B62C /* Rx timestamp attribute low - RO */
++#define E1000_RXSATRH    0x0B630 /* Rx timestamp attribute high - RO */
++#define E1000_TXSTMPL    0x0B618 /* Tx timestamp value Low - RO */
++#define E1000_TXSTMPH    0x0B61C /* Tx timestamp value High - RO */
++#define E1000_SYSTIML    0x0B600 /* System time register Low - RO */
++#define E1000_SYSTIMH    0x0B604 /* System time register High - RO */
++#define E1000_TIMINCA    0x0B608 /* Increment attributes register - RW */
++#define E1000_TSAUXC     0x0B640 /* Timesync Auxiliary Control register */
++#define E1000_TRGTTIML0  0x0B644 /* Target Time Register 0 Low  - RW */
++#define E1000_TRGTTIMH0  0x0B648 /* Target Time Register 0 High - RW */
++#define E1000_TRGTTIML1  0x0B64C /* Target Time Register 1 Low  - RW */
++#define E1000_TRGTTIMH1  0x0B650 /* Target Time Register 1 High - RW */
++#define E1000_AUXSTMPL0  0x0B65C /* Auxiliary Time Stamp 0 Register Low  - RO */
++#define E1000_AUXSTMPH0  0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */
++#define E1000_AUXSTMPL1  0x0B664 /* Auxiliary Time Stamp 1 Register Low  - RO */
++#define E1000_AUXSTMPH1  0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */
++#define E1000_SYSTIMR    0x0B6F8 /* System time register Residue */
++#define E1000_TSICR      0x0B66C /* Interrupt Cause Register */
++#define E1000_TSIM       0x0B674 /* Interrupt Mask Register */
++
++/* Filtering Registers */
++#define E1000_SAQF(_n) (0x5980 + 4 * (_n))
++#define E1000_DAQF(_n) (0x59A0 + 4 * (_n))
++#define E1000_SPQF(_n) (0x59C0 + 4 * (_n))
++#define E1000_FTQF(_n) (0x59E0 + 4 * (_n))
++#define E1000_SAQF0 E1000_SAQF(0)
++#define E1000_DAQF0 E1000_DAQF(0)
++#define E1000_SPQF0 E1000_SPQF(0)
++#define E1000_FTQF0 E1000_FTQF(0)
++#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
++#define E1000_ETQF(_n)  (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
++
++#define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40))
++
++/* DMA Coalescing registers */
++#define E1000_DMACR	0x02508 /* Control Register */
++#define E1000_DMCTXTH	0x03550 /* Transmit Threshold */
++#define E1000_DMCTLX	0x02514 /* Time to Lx Request */
++#define E1000_DMCRTRH	0x05DD0 /* Receive Packet Rate Threshold */
++#define E1000_DMCCNT	0x05DD4 /* Current Rx Count */
++#define E1000_FCRTC	0x02170 /* Flow Control Rx high watermark */
++#define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
++
++/* TX Rate Limit Registers */
++#define E1000_RTTDQSEL	0x3604 /* Tx Desc Plane Queue Select - WO */
++#define E1000_RTTBCNRM	0x3690 /* Tx BCN Rate-scheduler MMW */
++#define E1000_RTTBCNRC	0x36B0 /* Tx BCN Rate-Scheduler Config - WO */
++
++/* Split and Replication RX Control - RW */
++#define E1000_RXPBS	0x02404 /* Rx Packet Buffer Size - RW */
++
++/* Thermal sensor configuration and status registers */
++#define E1000_THMJT	0x08100 /* Junction Temperature */
++#define E1000_THLOWTC	0x08104 /* Low Threshold Control */
++#define E1000_THMIDTC	0x08108 /* Mid Threshold Control */
++#define E1000_THHIGHTC	0x0810C /* High Threshold Control */
++#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
++
++/* Convenience macros
++ *
++ * Note: "_n" is the queue number of the register to be written to.
++ *
++ * Example usage:
++ * E1000_RDBAL_REG(current_rx_queue)
++ */
++#define E1000_RDBAL(_n)   ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) \
++				    : (0x0C000 + ((_n) * 0x40)))
++#define E1000_RDBAH(_n)   ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) \
++				    : (0x0C004 + ((_n) * 0x40)))
++#define E1000_RDLEN(_n)   ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) \
++				    : (0x0C008 + ((_n) * 0x40)))
++#define E1000_SRRCTL(_n)  ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) \
++				    : (0x0C00C + ((_n) * 0x40)))
++#define E1000_RDH(_n)     ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) \
++				    : (0x0C010 + ((_n) * 0x40)))
++#define E1000_RDT(_n)     ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) \
++				    : (0x0C018 + ((_n) * 0x40)))
++#define E1000_RXDCTL(_n)  ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) \
++				    : (0x0C028 + ((_n) * 0x40)))
++#define E1000_TDBAL(_n)   ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) \
++				    : (0x0E000 + ((_n) * 0x40)))
++#define E1000_TDBAH(_n)   ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) \
++				    : (0x0E004 + ((_n) * 0x40)))
++#define E1000_TDLEN(_n)   ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) \
++				    : (0x0E008 + ((_n) * 0x40)))
++#define E1000_TDH(_n)     ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) \
++				    : (0x0E010 + ((_n) * 0x40)))
++#define E1000_TDT(_n)     ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) \
++				    : (0x0E018 + ((_n) * 0x40)))
++#define E1000_TXDCTL(_n)  ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) \
++				    : (0x0E028 + ((_n) * 0x40)))
++#define E1000_RXCTL(_n)	  ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \
++				      (0x0C014 + ((_n) * 0x40)))
++#define E1000_DCA_RXCTRL(_n)	E1000_RXCTL(_n)
++#define E1000_TXCTL(_n)   ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \
++				      (0x0E014 + ((_n) * 0x40)))
++#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n)
++#define E1000_TDWBAL(_n)  ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) \
++				    : (0x0E038 + ((_n) * 0x40)))
++#define E1000_TDWBAH(_n)  ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) \
++				    : (0x0E03C + ((_n) * 0x40)))
++
++#define E1000_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
++#define E1000_TXPBS	0x03404  /* Tx Packet Buffer Size - RW */
++
++#define E1000_TDFH     0x03410  /* TX Data FIFO Head - RW */
++#define E1000_TDFT     0x03418  /* TX Data FIFO Tail - RW */
++#define E1000_TDFHS    0x03420  /* TX Data FIFO Head Saved - RW */
++#define E1000_TDFPC    0x03430  /* TX Data FIFO Packet Count - RW */
++#define E1000_DTXCTL   0x03590  /* DMA TX Control - RW */
++#define E1000_CRCERRS  0x04000  /* CRC Error Count - R/clr */
++#define E1000_ALGNERRC 0x04004  /* Alignment Error Count - R/clr */
++#define E1000_SYMERRS  0x04008  /* Symbol Error Count - R/clr */
++#define E1000_RXERRC   0x0400C  /* Receive Error Count - R/clr */
++#define E1000_MPC      0x04010  /* Missed Packet Count - R/clr */
++#define E1000_SCC      0x04014  /* Single Collision Count - R/clr */
++#define E1000_ECOL     0x04018  /* Excessive Collision Count - R/clr */
++#define E1000_MCC      0x0401C  /* Multiple Collision Count - R/clr */
++#define E1000_LATECOL  0x04020  /* Late Collision Count - R/clr */
++#define E1000_COLC     0x04028  /* Collision Count - R/clr */
++#define E1000_DC       0x04030  /* Defer Count - R/clr */
++#define E1000_TNCRS    0x04034  /* TX-No CRS - R/clr */
++#define E1000_SEC      0x04038  /* Sequence Error Count - R/clr */
++#define E1000_CEXTERR  0x0403C  /* Carrier Extension Error Count - R/clr */
++#define E1000_RLEC     0x04040  /* Receive Length Error Count - R/clr */
++#define E1000_XONRXC   0x04048  /* XON RX Count - R/clr */
++#define E1000_XONTXC   0x0404C  /* XON TX Count - R/clr */
++#define E1000_XOFFRXC  0x04050  /* XOFF RX Count - R/clr */
++#define E1000_XOFFTXC  0x04054  /* XOFF TX Count - R/clr */
++#define E1000_FCRUC    0x04058  /* Flow Control RX Unsupported Count- R/clr */
++#define E1000_PRC64    0x0405C  /* Packets RX (64 bytes) - R/clr */
++#define E1000_PRC127   0x04060  /* Packets RX (65-127 bytes) - R/clr */
++#define E1000_PRC255   0x04064  /* Packets RX (128-255 bytes) - R/clr */
++#define E1000_PRC511   0x04068  /* Packets RX (255-511 bytes) - R/clr */
++#define E1000_PRC1023  0x0406C  /* Packets RX (512-1023 bytes) - R/clr */
++#define E1000_PRC1522  0x04070  /* Packets RX (1024-1522 bytes) - R/clr */
++#define E1000_GPRC     0x04074  /* Good Packets RX Count - R/clr */
++#define E1000_BPRC     0x04078  /* Broadcast Packets RX Count - R/clr */
++#define E1000_MPRC     0x0407C  /* Multicast Packets RX Count - R/clr */
++#define E1000_GPTC     0x04080  /* Good Packets TX Count - R/clr */
++#define E1000_GORCL    0x04088  /* Good Octets RX Count Low - R/clr */
++#define E1000_GORCH    0x0408C  /* Good Octets RX Count High - R/clr */
++#define E1000_GOTCL    0x04090  /* Good Octets TX Count Low - R/clr */
++#define E1000_GOTCH    0x04094  /* Good Octets TX Count High - R/clr */
++#define E1000_RNBC     0x040A0  /* RX No Buffers Count - R/clr */
++#define E1000_RUC      0x040A4  /* RX Undersize Count - R/clr */
++#define E1000_RFC      0x040A8  /* RX Fragment Count - R/clr */
++#define E1000_ROC      0x040AC  /* RX Oversize Count - R/clr */
++#define E1000_RJC      0x040B0  /* RX Jabber Count - R/clr */
++#define E1000_MGTPRC   0x040B4  /* Management Packets RX Count - R/clr */
++#define E1000_MGTPDC   0x040B8  /* Management Packets Dropped Count - R/clr */
++#define E1000_MGTPTC   0x040BC  /* Management Packets TX Count - R/clr */
++#define E1000_TORL     0x040C0  /* Total Octets RX Low - R/clr */
++#define E1000_TORH     0x040C4  /* Total Octets RX High - R/clr */
++#define E1000_TOTL     0x040C8  /* Total Octets TX Low - R/clr */
++#define E1000_TOTH     0x040CC  /* Total Octets TX High - R/clr */
++#define E1000_TPR      0x040D0  /* Total Packets RX - R/clr */
++#define E1000_TPT      0x040D4  /* Total Packets TX - R/clr */
++#define E1000_PTC64    0x040D8  /* Packets TX (64 bytes) - R/clr */
++#define E1000_PTC127   0x040DC  /* Packets TX (65-127 bytes) - R/clr */
++#define E1000_PTC255   0x040E0  /* Packets TX (128-255 bytes) - R/clr */
++#define E1000_PTC511   0x040E4  /* Packets TX (256-511 bytes) - R/clr */
++#define E1000_PTC1023  0x040E8  /* Packets TX (512-1023 bytes) - R/clr */
++#define E1000_PTC1522  0x040EC  /* Packets TX (1024-1522 Bytes) - R/clr */
++#define E1000_MPTC     0x040F0  /* Multicast Packets TX Count - R/clr */
++#define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
++#define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
++#define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
++#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
++/* Interrupt Cause Rx Packet Timer Expire Count */
++#define E1000_ICRXPTC  0x04104
++/* Interrupt Cause Rx Absolute Timer Expire Count */
++#define E1000_ICRXATC  0x04108
++/* Interrupt Cause Tx Packet Timer Expire Count */
++#define E1000_ICTXPTC  0x0410C
++/* Interrupt Cause Tx Absolute Timer Expire Count */
++#define E1000_ICTXATC  0x04110
++/* Interrupt Cause Tx Queue Empty Count */
++#define E1000_ICTXQEC  0x04118
++/* Interrupt Cause Tx Queue Minimum Threshold Count */
++#define E1000_ICTXQMTC 0x0411C
++/* Interrupt Cause Rx Descriptor Minimum Threshold Count */
++#define E1000_ICRXDMTC 0x04120
++#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
++#define E1000_PCS_CFG0    0x04200  /* PCS Configuration 0 - RW */
++#define E1000_PCS_LCTL    0x04208  /* PCS Link Control - RW */
++#define E1000_PCS_LSTAT   0x0420C  /* PCS Link Status - RO */
++#define E1000_CBTMPC      0x0402C  /* Circuit Breaker TX Packet Count */
++#define E1000_HTDPMC      0x0403C  /* Host Transmit Discarded Packets */
++#define E1000_CBRMPC      0x040FC  /* Circuit Breaker RX Packet Count */
++#define E1000_RPTHC       0x04104  /* Rx Packets To Host */
++#define E1000_HGPTC       0x04118  /* Host Good Packets TX Count */
++#define E1000_HTCBDPC     0x04124  /* Host TX Circuit Breaker Dropped Count */
++#define E1000_HGORCL      0x04128  /* Host Good Octets Received Count Low */
++#define E1000_HGORCH      0x0412C  /* Host Good Octets Received Count High */
++#define E1000_HGOTCL      0x04130  /* Host Good Octets Transmit Count Low */
++#define E1000_HGOTCH      0x04134  /* Host Good Octets Transmit Count High */
++#define E1000_LENERRS     0x04138  /* Length Errors Count */
++#define E1000_SCVPC       0x04228  /* SerDes/SGMII Code Violation Pkt Count */
++#define E1000_PCS_ANADV   0x04218  /* AN advertisement - RW */
++#define E1000_PCS_LPAB    0x0421C  /* Link Partner Ability - RW */
++#define E1000_PCS_NPTX    0x04220  /* AN Next Page Transmit - RW */
++#define E1000_PCS_LPABNP  0x04224  /* Link Partner Ability Next Page - RW */
++#define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
++#define E1000_RLPML    0x05004  /* RX Long Packet Max Length */
++#define E1000_RFCTL    0x05008  /* Receive Filter Control*/
++#define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
++#define E1000_RA       0x05400  /* Receive Address - RW Array */
++#define E1000_RA2      0x054E0  /* 2nd half of Rx address array - RW Array */
++#define E1000_PSRTYPE(_i)       (0x05480 + ((_i) * 4))
++#define E1000_RAL(_i)  (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
++					(0x054E0 + ((_i - 16) * 8)))
++#define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
++					(0x054E4 + ((_i - 16) * 8)))
++#define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
++#define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
++#define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
++#define E1000_FFMT_REG(_i)      (0x09000 + ((_i) * 8))
++#define E1000_FFVT_REG(_i)      (0x09800 + ((_i) * 8))
++#define E1000_FFLT_REG(_i)      (0x05F00 + ((_i) * 8))
++#define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
++#define E1000_VT_CTL   0x0581C  /* VMDq Control - RW */
++#define E1000_WUC      0x05800  /* Wakeup Control - RW */
++#define E1000_WUFC     0x05808  /* Wakeup Filter Control - RW */
++#define E1000_WUS      0x05810  /* Wakeup Status - RO */
++#define E1000_MANC     0x05820  /* Management Control - RW */
++#define E1000_IPAV     0x05838  /* IP Address Valid - RW */
++#define E1000_WUPL     0x05900  /* Wakeup Packet Length - RW */
++
++#define E1000_SW_FW_SYNC  0x05B5C /* Software-Firmware Synchronization - RW */
++#define E1000_CCMCTL      0x05B48 /* CCM Control Register */
++#define E1000_GIOCTL      0x05B44 /* GIO Analog Control Register */
++#define E1000_SCCTL       0x05B4C /* PCIc PLL Configuration Register */
++#define E1000_GCR         0x05B00 /* PCI-Ex Control */
++#define E1000_FACTPS    0x05B30 /* Function Active and Power State to MNG */
++#define E1000_SWSM      0x05B50 /* SW Semaphore */
++#define E1000_FWSM      0x05B54 /* FW Semaphore */
++#define E1000_DCA_CTRL  0x05B74 /* DCA Control - RW */
++
++/* RSS registers */
++#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
++#define E1000_IMIR(_i)      (0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
++#define E1000_IMIREXT(_i)   (0x05AA0 + ((_i) * 4))  /* Immediate Interrupt Ext*/
++#define E1000_IMIRVP    0x05AC0 /* Immediate Interrupt RX VLAN Priority - RW */
++/* MSI-X Allocation Register (_i) - RW */
++#define E1000_MSIXBM(_i)    (0x01600 + ((_i) * 4))
++/* Redirection Table - RW Array */
++#define E1000_RETA(_i)  (0x05C00 + ((_i) * 4))
++#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW Array */
++
++/* VT Registers */
++#define E1000_MBVFICR   0x00C80 /* Mailbox VF Cause - RWC */
++#define E1000_MBVFIMR   0x00C84 /* Mailbox VF int Mask - RW */
++#define E1000_VFLRE     0x00C88 /* VF Register Events - RWC */
++#define E1000_VFRE      0x00C8C /* VF Receive Enables */
++#define E1000_VFTE      0x00C90 /* VF Transmit Enables */
++#define E1000_QDE       0x02408 /* Queue Drop Enable - RW */
++#define E1000_DTXSWC    0x03500 /* DMA Tx Switch Control - RW */
++#define E1000_WVBR      0x03554 /* VM Wrong Behavior - RWS */
++#define E1000_RPLOLR    0x05AF0 /* Replication Offload - RW */
++#define E1000_UTA       0x0A000 /* Unicast Table Array - RW */
++#define E1000_IOVTCL    0x05BBC /* IOV Control Register */
++#define E1000_TXSWC     0x05ACC /* Tx Switch Control */
++#define E1000_LVMMC	0x03548 /* Last VM Misbehavior cause */
++/* These act per VF so an array friendly macro is used */
++#define E1000_P2VMAILBOX(_n)   (0x00C00 + (4 * (_n)))
++#define E1000_VMBMEM(_n)       (0x00800 + (64 * (_n)))
++#define E1000_VMOLR(_n)        (0x05AD0 + (4 * (_n)))
++#define E1000_DVMOLR(_n)       (0x0C038 + (64 * (_n)))
++#define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN VM Filter */
++#define E1000_VMVIR(_n)        (0x03700 + (4 * (_n)))
++
++struct e1000_hw;
++
++u32 igb_rd32(struct e1000_hw *hw, u32 reg);
++
++/* write operations, indexed using DWORDS */
++#define wr32(reg, val) \
++do { \
++	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
++	if (!E1000_REMOVED(hw_addr)) \
++		writel((val), &hw_addr[(reg)]); \
++} while (0)
++
++#define rd32(reg) (igb_rd32(hw, reg))
++
++#define wrfl() ((void)rd32(E1000_STATUS))
++
++#define array_wr32(reg, offset, value) \
++	wr32((reg) + ((offset) << 2), (value))
++
++#define array_rd32(reg, offset) \
++	(readl(hw->hw_addr + reg + ((offset) << 2)))
++
++/* DMA Coalescing registers */
++#define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
++
++/* Energy Efficient Ethernet "EEE" register */
++#define E1000_IPCNFG	0x0E38 /* Internal PHY Configuration */
++#define E1000_EEER	0x0E30 /* Energy Efficient Ethernet */
++#define E1000_EEE_SU	0X0E34 /* EEE Setup */
++#define E1000_EMIADD	0x10   /* Extended Memory Indirect Address */
++#define E1000_EMIDATA	0x11   /* Extended Memory Indirect Data */
++#define E1000_MMDAC	13     /* MMD Access Control */
++#define E1000_MMDAAD	14     /* MMD Access Address/Data */
++
++/* Thermal Sensor Register */
++#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
++
++/* OS2BMC Registers */
++#define E1000_B2OSPC	0x08FE0 /* BMC2OS packets sent by BMC */
++#define E1000_B2OGPRC	0x04158 /* BMC2OS packets received by host */
++#define E1000_O2BGPTC	0x08FE4 /* OS2BMC packets received by BMC */
++#define E1000_O2BSPC	0x0415C /* OS2BMC packets transmitted by host */
++
++#define E1000_SRWR		0x12018  /* Shadow Ram Write Register - RW */
++#define E1000_I210_FLMNGCTL	0x12038
++#define E1000_I210_FLMNGDATA	0x1203C
++#define E1000_I210_FLMNGCNT	0x12040
++
++#define E1000_I210_FLSWCTL	0x12048
++#define E1000_I210_FLSWDATA	0x1204C
++#define E1000_I210_FLSWCNT	0x12050
++
++#define E1000_I210_FLA		0x1201C
++
++#define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
++#define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
++
++#define E1000_REMOVED(h) unlikely(!(h))
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_mac.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mac.h	2021-04-07 16:01:27.523633743 +0800
+@@ -0,0 +1,88 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_MAC_H_
++#define _E1000_MAC_H_
++
++#include "e1000_hw.h"
++
++#include "e1000_phy.h"
++#include "e1000_nvm.h"
++#include "e1000_defines.h"
++#include "e1000_i210.h"
++
++/* Functions that should not be called directly from drivers but can be used
++ * by other files in this 'shared code'
++ */
++s32  igb_blink_led(struct e1000_hw *hw);
++s32  igb_check_for_copper_link(struct e1000_hw *hw);
++s32  igb_cleanup_led(struct e1000_hw *hw);
++s32  igb_config_fc_after_link_up(struct e1000_hw *hw);
++s32  igb_disable_pcie_master(struct e1000_hw *hw);
++s32  igb_force_mac_fc(struct e1000_hw *hw);
++s32  igb_get_auto_rd_done(struct e1000_hw *hw);
++s32  igb_get_bus_info_pcie(struct e1000_hw *hw);
++s32  igb_get_hw_semaphore(struct e1000_hw *hw);
++s32  igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
++				     u16 *duplex);
++s32  igb_id_led_init(struct e1000_hw *hw);
++s32  igb_led_off(struct e1000_hw *hw);
++void igb_update_mc_addr_list(struct e1000_hw *hw,
++			     u8 *mc_addr_list, u32 mc_addr_count);
++s32  igb_setup_link(struct e1000_hw *hw);
++s32  igb_validate_mdi_setting(struct e1000_hw *hw);
++s32  igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
++			     u32 offset, u8 data);
++
++void igb_clear_hw_cntrs_base(struct e1000_hw *hw);
++void igb_clear_vfta(struct e1000_hw *hw);
++void igb_clear_vfta_i350(struct e1000_hw *hw);
++s32  igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add);
++void igb_config_collision_dist(struct e1000_hw *hw);
++void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count);
++void igb_mta_set(struct e1000_hw *hw, u32 hash_value);
++void igb_put_hw_semaphore(struct e1000_hw *hw);
++void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
++s32  igb_check_alt_mac_addr(struct e1000_hw *hw);
++
++bool igb_enable_mng_pass_thru(struct e1000_hw *hw);
++
++enum e1000_mng_mode {
++	e1000_mng_mode_none = 0,
++	e1000_mng_mode_asf,
++	e1000_mng_mode_pt,
++	e1000_mng_mode_ipmi,
++	e1000_mng_mode_host_if_only
++};
++
++#define E1000_FACTPS_MNGCG	0x20000000
++
++#define E1000_FWSM_MODE_MASK	0xE
++#define E1000_FWSM_MODE_SHIFT	1
++
++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
++
++void e1000_init_function_pointers_82575(struct e1000_hw *hw);
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/igb.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/igb.h	2021-04-07 16:01:27.519633749 +0800
+@@ -0,0 +1,557 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++/* Linux PRO/1000 Ethernet Driver main header file */
++
++#ifndef _IGB_H_
++#define _IGB_H_
++
++#include "e1000_mac.h"
++#include "e1000_82575.h"
++
++#include <linux/bitops.h>
++#include <linux/if_vlan.h>
++#include <linux/i2c.h>
++#include <linux/i2c-algo-bit.h>
++#include <linux/pci.h>
++#include <linux/mdio.h>
++
++#include <rtdev.h>
++
++struct igb_adapter;
++
++#define E1000_PCS_CFG_IGN_SD	1
++
++/* Interrupt defines */
++#define IGB_START_ITR		648 /* ~6000 ints/sec */
++#define IGB_4K_ITR		980
++#define IGB_20K_ITR		196
++#define IGB_70K_ITR		56
++
++/* TX/RX descriptor defines */
++#define IGB_DEFAULT_TXD		256
++#define IGB_DEFAULT_TX_WORK	128
++#define IGB_MIN_TXD		80
++#define IGB_MAX_TXD		4096
++
++#define IGB_DEFAULT_RXD		256
++#define IGB_MIN_RXD		80
++#define IGB_MAX_RXD		4096
++
++#define IGB_DEFAULT_ITR		3 /* dynamic */
++#define IGB_MAX_ITR_USECS	10000
++#define IGB_MIN_ITR_USECS	10
++#define NON_Q_VECTORS		1
++#define MAX_Q_VECTORS		8
++#define MAX_MSIX_ENTRIES	10
++
++/* Transmit and receive queues */
++#define IGB_MAX_RX_QUEUES	8
++#define IGB_MAX_RX_QUEUES_82575	4
++#define IGB_MAX_RX_QUEUES_I211	2
++#define IGB_MAX_TX_QUEUES	8
++#define IGB_MAX_VF_MC_ENTRIES	30
++#define IGB_MAX_VF_FUNCTIONS	8
++#define IGB_MAX_VFTA_ENTRIES	128
++#define IGB_82576_VF_DEV_ID	0x10CA
++#define IGB_I350_VF_DEV_ID	0x1520
++
++/* NVM version defines */
++#define IGB_MAJOR_MASK		0xF000
++#define IGB_MINOR_MASK		0x0FF0
++#define IGB_BUILD_MASK		0x000F
++#define IGB_COMB_VER_MASK	0x00FF
++#define IGB_MAJOR_SHIFT		12
++#define IGB_MINOR_SHIFT		4
++#define IGB_COMB_VER_SHFT	8
++#define IGB_NVM_VER_INVALID	0xFFFF
++#define IGB_ETRACK_SHIFT	16
++#define NVM_ETRACK_WORD		0x0042
++#define NVM_COMB_VER_OFF	0x0083
++#define NVM_COMB_VER_PTR	0x003d
++
++struct vf_data_storage {
++	unsigned char vf_mac_addresses[ETH_ALEN];
++	u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
++	u16 num_vf_mc_hashes;
++	u16 vlans_enabled;
++	u32 flags;
++	unsigned long last_nack;
++	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
++	u16 pf_qos;
++	u16 tx_rate;
++	bool spoofchk_enabled;
++};
++
++#define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
++#define IGB_VF_FLAG_UNI_PROMISC    0x00000002 /* VF has unicast promisc */
++#define IGB_VF_FLAG_MULTI_PROMISC  0x00000004 /* VF has multicast promisc */
++#define IGB_VF_FLAG_PF_SET_MAC     0x00000008 /* PF has set MAC address */
++
++/* RX descriptor control thresholds.
++ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
++ *           descriptors available in its onboard memory.
++ *           Setting this to 0 disables RX descriptor prefetch.
++ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
++ *           available in host memory.
++ *           If PTHRESH is 0, this should also be 0.
++ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
++ *           descriptors until either it has this many to write back, or the
++ *           ITR timer expires.
++ */
++#define IGB_RX_PTHRESH	((hw->mac.type == e1000_i354) ? 12 : 8)
++#define IGB_RX_HTHRESH	8
++#define IGB_TX_PTHRESH	((hw->mac.type == e1000_i354) ? 20 : 8)
++#define IGB_TX_HTHRESH	1
++#define IGB_RX_WTHRESH	((hw->mac.type == e1000_82576 && \
++			  (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 4)
++#define IGB_TX_WTHRESH	((hw->mac.type == e1000_82576 && \
++			  (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 16)
++
++/* this is the size past which hardware will drop packets when setting LPE=0 */
++#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
++
++/* Supported Rx Buffer Sizes */
++#define IGB_RXBUFFER_256	256
++#define IGB_RXBUFFER_2048	2048
++#define IGB_RX_HDR_LEN		IGB_RXBUFFER_256
++#define IGB_RX_BUFSZ		IGB_RXBUFFER_2048
++
++/* How many Rx Buffers do we bundle into one write to the hardware ? */
++#define IGB_RX_BUFFER_WRITE	16 /* Must be power of 2 */
++
++#define AUTO_ALL_MODES		0
++#define IGB_EEPROM_APME		0x0400
++
++#ifndef IGB_MASTER_SLAVE
++/* Switch to override PHY master/slave setting */
++#define IGB_MASTER_SLAVE	e1000_ms_hw_default
++#endif
++
++#define IGB_MNG_VLAN_NONE	-1
++
++enum igb_tx_flags {
++	/* cmd_type flags */
++	IGB_TX_FLAGS_VLAN	= 0x01,
++	IGB_TX_FLAGS_TSO	= 0x02,
++	IGB_TX_FLAGS_TSTAMP	= 0x04,
++
++	/* olinfo flags */
++	IGB_TX_FLAGS_IPV4	= 0x10,
++	IGB_TX_FLAGS_CSUM	= 0x20,
++};
++
++/* VLAN info */
++#define IGB_TX_FLAGS_VLAN_MASK	0xffff0000
++#define IGB_TX_FLAGS_VLAN_SHIFT	16
++
++/* The largest size we can write to the descriptor is 65535.  In order to
++ * maintain a power of two alignment we have to limit ourselves to 32K.
++ */
++#define IGB_MAX_TXD_PWR	15
++#define IGB_MAX_DATA_PER_TXD	(1 << IGB_MAX_TXD_PWR)
++
++/* Tx Descriptors needed, worst case */
++#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD)
++#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
++
++/* EEPROM byte offsets */
++#define IGB_SFF_8472_SWAP		0x5C
++#define IGB_SFF_8472_COMP		0x5E
++
++/* Bitmasks */
++#define IGB_SFF_ADDRESSING_MODE		0x4
++#define IGB_SFF_8472_UNSUP		0x00
++
++/* wrapper around a pointer to a socket buffer,
++ * so a DMA handle can be stored along with the buffer
++ */
++struct igb_tx_buffer {
++	union e1000_adv_tx_desc *next_to_watch;
++	unsigned long time_stamp;
++	struct rtskb *skb;
++	unsigned int bytecount;
++	u16 gso_segs;
++	__be16 protocol;
++
++	u32 tx_flags;
++};
++
++struct igb_rx_buffer {
++	dma_addr_t dma;
++	struct rtskb *skb;
++};
++
++struct igb_tx_queue_stats {
++	u64 packets;
++	u64 bytes;
++	u64 restart_queue;
++	u64 restart_queue2;
++};
++
++struct igb_rx_queue_stats {
++	u64 packets;
++	u64 bytes;
++	u64 drops;
++	u64 csum_err;
++	u64 alloc_failed;
++};
++
++struct igb_ring_container {
++	struct igb_ring *ring;		/* pointer to linked list of rings */
++	unsigned int total_bytes;	/* total bytes processed this int */
++	unsigned int total_packets;	/* total packets processed this int */
++	u16 work_limit;			/* total work allowed per interrupt */
++	u8 count;			/* total number of rings in vector */
++	u8 itr;				/* current ITR setting for ring */
++};
++
++struct igb_ring {
++	struct igb_q_vector *q_vector;	/* backlink to q_vector */
++	struct rtnet_device *netdev;	/* back pointer to net_device */
++	struct device *dev;		/* device pointer for dma mapping */
++	union {				/* array of buffer info structs */
++		struct igb_tx_buffer *tx_buffer_info;
++		struct igb_rx_buffer *rx_buffer_info;
++	};
++	void *desc;			/* descriptor ring memory */
++	unsigned long flags;		/* ring specific flags */
++	void __iomem *tail;		/* pointer to ring tail register */
++	dma_addr_t dma;			/* phys address of the ring */
++	unsigned int  size;		/* length of desc. ring in bytes */
++
++	u16 count;			/* number of desc. in the ring */
++	u8 queue_index;			/* logical index of the ring*/
++	u8 reg_idx;			/* physical index of the ring */
++
++	/* everything past this point are written often */
++	u16 next_to_clean;
++	u16 next_to_use;
++	u16 next_to_alloc;
++
++	union {
++		/* TX */
++		struct {
++			struct igb_tx_queue_stats tx_stats;
++		};
++		/* RX */
++		struct {
++			struct igb_rx_queue_stats rx_stats;
++			u16 rx_buffer_len;
++		};
++	};
++} ____cacheline_internodealigned_in_smp;
++
++struct igb_q_vector {
++	struct igb_adapter *adapter;	/* backlink */
++	int cpu;			/* CPU for DCA */
++	u32 eims_value;			/* EIMS mask value */
++
++	u16 itr_val;
++	u8 set_itr;
++	void __iomem *itr_register;
++
++	struct igb_ring_container rx, tx;
++
++	struct rcu_head rcu;	/* to avoid race with update stats on free */
++	char name[IFNAMSIZ + 9];
++
++	/* for dynamic allocation of rings associated with this q_vector */
++	struct igb_ring ring[0] ____cacheline_internodealigned_in_smp;
++};
++
++enum e1000_ring_flags_t {
++	IGB_RING_FLAG_RX_SCTP_CSUM,
++	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
++	IGB_RING_FLAG_TX_CTX_IDX,
++	IGB_RING_FLAG_TX_DETECT_HANG
++};
++
++#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
++
++#define IGB_RX_DESC(R, i)	\
++	(&(((union e1000_adv_rx_desc *)((R)->desc))[i]))
++#define IGB_TX_DESC(R, i)	\
++	(&(((union e1000_adv_tx_desc *)((R)->desc))[i]))
++#define IGB_TX_CTXTDESC(R, i)	\
++	(&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
++
++/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
++static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
++				      const u32 stat_err_bits)
++{
++	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
++}
++
++/* igb_desc_unused - calculate if we have unused descriptors */
++static inline int igb_desc_unused(struct igb_ring *ring)
++{
++	if (ring->next_to_clean > ring->next_to_use)
++		return ring->next_to_clean - ring->next_to_use - 1;
++
++	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
++}
++
++#ifdef CONFIG_IGB_HWMON
++
++#define IGB_HWMON_TYPE_LOC	0
++#define IGB_HWMON_TYPE_TEMP	1
++#define IGB_HWMON_TYPE_CAUTION	2
++#define IGB_HWMON_TYPE_MAX	3
++
++struct hwmon_attr {
++	struct device_attribute dev_attr;
++	struct e1000_hw *hw;
++	struct e1000_thermal_diode_data *sensor;
++	char name[12];
++	};
++
++struct hwmon_buff {
++	struct attribute_group group;
++	const struct attribute_group *groups[2];
++	struct attribute *attrs[E1000_MAX_SENSORS * 4 + 1];
++	struct hwmon_attr hwmon_list[E1000_MAX_SENSORS * 4];
++	unsigned int n_hwmon;
++	};
++#endif
++
++#define IGB_N_EXTTS	2
++#define IGB_N_PEROUT	2
++#define IGB_N_SDP	4
++#define IGB_RETA_SIZE	128
++
++/* board specific private data structure */
++struct igb_adapter {
++	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
++
++	struct rtnet_device *netdev;
++
++	unsigned long state;
++	unsigned int flags;
++
++	unsigned int num_q_vectors;
++	struct msix_entry msix_entries[MAX_MSIX_ENTRIES];
++	rtdm_irq_t msix_irq_handle[MAX_MSIX_ENTRIES];
++	rtdm_irq_t irq_handle;
++	rtdm_nrtsig_t watchdog_nrtsig;
++	spinlock_t stats64_lock;
++
++	/* Interrupt Throttle Rate */
++	u32 rx_itr_setting;
++	u32 tx_itr_setting;
++	u16 tx_itr;
++	u16 rx_itr;
++
++	/* TX */
++	u16 tx_work_limit;
++	u32 tx_timeout_count;
++	int num_tx_queues;
++	struct igb_ring *tx_ring[16];
++
++	/* RX */
++	int num_rx_queues;
++	struct igb_ring *rx_ring[16];
++
++	u32 max_frame_size;
++	u32 min_frame_size;
++
++	struct timer_list watchdog_timer;
++	struct timer_list phy_info_timer;
++
++	u16 mng_vlan_id;
++	u32 bd_number;
++	u32 wol;
++	u32 en_mng_pt;
++	u16 link_speed;
++	u16 link_duplex;
++
++	struct work_struct reset_task;
++	struct work_struct watchdog_task;
++	bool fc_autoneg;
++	u8  tx_timeout_factor;
++	struct timer_list blink_timer;
++	unsigned long led_status;
++
++	/* OS defined structs */
++	struct pci_dev *pdev;
++
++	struct net_device_stats net_stats;
++
++	/* structs defined in e1000_hw.h */
++	struct e1000_hw hw;
++	struct e1000_hw_stats stats;
++	struct e1000_phy_info phy_info;
++
++	u32 test_icr;
++	struct igb_ring test_tx_ring;
++	struct igb_ring test_rx_ring;
++
++	struct igb_q_vector *q_vector[MAX_Q_VECTORS];
++	u32 eims_enable_mask;
++	u32 eims_other;
++
++	/* to not mess up cache alignment, always add to the bottom */
++	u16 tx_ring_count;
++	u16 rx_ring_count;
++	int vf_rate_link_speed;
++	u32 rss_queues;
++	u32 wvbr;
++	u32 *shadow_vfta;
++
++	unsigned long last_rx_timestamp;
++
++	char fw_version[32];
++#ifdef CONFIG_IGB_HWMON
++	struct hwmon_buff *igb_hwmon_buff;
++	bool ets;
++#endif
++	struct i2c_algo_bit_data i2c_algo;
++	struct i2c_adapter i2c_adap;
++	struct i2c_client *i2c_client;
++	u32 rss_indir_tbl_init;
++	u8 rss_indir_tbl[IGB_RETA_SIZE];
++
++	unsigned long link_check_timeout;
++	int copper_tries;
++	struct e1000_info ei;
++	u16 eee_advert;
++};
++
++#define IGB_FLAG_HAS_MSI		(1 << 0)
++#define IGB_FLAG_DCA_ENABLED		(1 << 1)
++#define IGB_FLAG_QUAD_PORT_A		(1 << 2)
++#define IGB_FLAG_QUEUE_PAIRS		(1 << 3)
++#define IGB_FLAG_DMAC			(1 << 4)
++#define IGB_FLAG_PTP			(1 << 5)
++#define IGB_FLAG_RSS_FIELD_IPV4_UDP	(1 << 6)
++#define IGB_FLAG_RSS_FIELD_IPV6_UDP	(1 << 7)
++#define IGB_FLAG_WOL_SUPPORTED		(1 << 8)
++#define IGB_FLAG_NEED_LINK_UPDATE	(1 << 9)
++#define IGB_FLAG_MEDIA_RESET		(1 << 10)
++#define IGB_FLAG_MAS_CAPABLE		(1 << 11)
++#define IGB_FLAG_MAS_ENABLE		(1 << 12)
++#define IGB_FLAG_HAS_MSIX		(1 << 13)
++#define IGB_FLAG_EEE			(1 << 14)
++
++/* Media Auto Sense */
++#define IGB_MAS_ENABLE_0		0X0001
++#define IGB_MAS_ENABLE_1		0X0002
++#define IGB_MAS_ENABLE_2		0X0004
++#define IGB_MAS_ENABLE_3		0X0008
++
++/* DMA Coalescing defines */
++#define IGB_MIN_TXPBSIZE	20408
++#define IGB_TX_BUF_4096		4096
++#define IGB_DMCTLX_DCFLUSH_DIS	0x80000000  /* Disable DMA Coal Flush */
++
++#define IGB_82576_TSYNC_SHIFT	19
++#define IGB_TS_HDR_LEN		16
++enum e1000_state_t {
++	__IGB_TESTING,
++	__IGB_RESETTING,
++	__IGB_DOWN,
++	__IGB_PTP_TX_IN_PROGRESS,
++};
++
++enum igb_boards {
++	board_82575,
++};
++
++extern char igb_driver_name[];
++extern char igb_driver_version[];
++
++int igb_up(struct igb_adapter *);
++void igb_down(struct igb_adapter *);
++void igb_reinit_locked(struct igb_adapter *);
++void igb_reset(struct igb_adapter *);
++int igb_reinit_queues(struct igb_adapter *);
++void igb_write_rss_indir_tbl(struct igb_adapter *);
++int igb_set_spd_dplx(struct igb_adapter *, u32, u8);
++int igb_setup_tx_resources(struct igb_ring *);
++int igb_setup_rx_resources(struct igb_ring *);
++void igb_free_tx_resources(struct igb_ring *);
++void igb_free_rx_resources(struct igb_ring *);
++void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
++void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
++void igb_setup_tctl(struct igb_adapter *);
++void igb_setup_rctl(struct igb_adapter *);
++netdev_tx_t igb_xmit_frame_ring(struct rtskb *, struct igb_ring *);
++void igb_unmap_and_free_tx_resource(struct igb_ring *, struct igb_tx_buffer *);
++void igb_alloc_rx_buffers(struct igb_ring *, u16);
++void igb_update_stats(struct igb_adapter *);
++bool igb_has_link(struct igb_adapter *adapter);
++void igb_set_ethtool_ops(struct rtnet_device *);
++void igb_power_up_link(struct igb_adapter *);
++void igb_set_fw_version(struct igb_adapter *);
++void igb_ptp_init(struct igb_adapter *adapter);
++void igb_ptp_stop(struct igb_adapter *adapter);
++void igb_ptp_reset(struct igb_adapter *adapter);
++void igb_ptp_rx_hang(struct igb_adapter *adapter);
++void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct rtskb *skb);
++void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va,
++			 struct rtskb *skb);
++int igb_ptp_set_ts_config(struct rtnet_device *netdev, struct ifreq *ifr);
++int igb_ptp_get_ts_config(struct rtnet_device *netdev, struct ifreq *ifr);
++#ifdef CONFIG_IGB_HWMON
++void igb_sysfs_exit(struct igb_adapter *adapter);
++int igb_sysfs_init(struct igb_adapter *adapter);
++#endif
++static inline s32 igb_reset_phy(struct e1000_hw *hw)
++{
++	if (hw->phy.ops.reset)
++		return hw->phy.ops.reset(hw);
++
++	return 0;
++}
++
++static inline s32 igb_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	if (hw->phy.ops.read_reg)
++		return hw->phy.ops.read_reg(hw, offset, data);
++
++	return 0;
++}
++
++static inline s32 igb_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	if (hw->phy.ops.write_reg)
++		return hw->phy.ops.write_reg(hw, offset, data);
++
++	return 0;
++}
++
++static inline s32 igb_get_phy_info(struct e1000_hw *hw)
++{
++	if (hw->phy.ops.get_phy_info)
++		return hw->phy.ops.get_phy_info(hw);
++
++	return 0;
++}
++
++static inline struct rtnet_device *txring_txq(const struct igb_ring *tx_ring)
++{
++	return tx_ring->netdev;
++}
++
++#endif /* _IGB_H_ */
+--- linux/drivers/xenomai/net/drivers/igb/e1000_hw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_hw.h	2021-04-07 16:01:27.514633756 +0800
+@@ -0,0 +1,570 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_HW_H_
++#define _E1000_HW_H_
++
++#include <linux/types.h>
++#include <linux/delay.h>
++#include <linux/io.h>
++#include <linux/netdevice.h>
++#include <rtnet_port.h>
++
++#include "e1000_regs.h"
++#include "e1000_defines.h"
++
++struct e1000_hw;
++
++#define E1000_DEV_ID_82576			0x10C9
++#define E1000_DEV_ID_82576_FIBER		0x10E6
++#define E1000_DEV_ID_82576_SERDES		0x10E7
++#define E1000_DEV_ID_82576_QUAD_COPPER		0x10E8
++#define E1000_DEV_ID_82576_QUAD_COPPER_ET2	0x1526
++#define E1000_DEV_ID_82576_NS			0x150A
++#define E1000_DEV_ID_82576_NS_SERDES		0x1518
++#define E1000_DEV_ID_82576_SERDES_QUAD		0x150D
++#define E1000_DEV_ID_82575EB_COPPER		0x10A7
++#define E1000_DEV_ID_82575EB_FIBER_SERDES	0x10A9
++#define E1000_DEV_ID_82575GB_QUAD_COPPER	0x10D6
++#define E1000_DEV_ID_82580_COPPER		0x150E
++#define E1000_DEV_ID_82580_FIBER		0x150F
++#define E1000_DEV_ID_82580_SERDES		0x1510
++#define E1000_DEV_ID_82580_SGMII		0x1511
++#define E1000_DEV_ID_82580_COPPER_DUAL		0x1516
++#define E1000_DEV_ID_82580_QUAD_FIBER		0x1527
++#define E1000_DEV_ID_DH89XXCC_SGMII		0x0438
++#define E1000_DEV_ID_DH89XXCC_SERDES		0x043A
++#define E1000_DEV_ID_DH89XXCC_BACKPLANE		0x043C
++#define E1000_DEV_ID_DH89XXCC_SFP		0x0440
++#define E1000_DEV_ID_I350_COPPER		0x1521
++#define E1000_DEV_ID_I350_FIBER			0x1522
++#define E1000_DEV_ID_I350_SERDES		0x1523
++#define E1000_DEV_ID_I350_SGMII			0x1524
++#define E1000_DEV_ID_I210_COPPER		0x1533
++#define E1000_DEV_ID_I210_FIBER			0x1536
++#define E1000_DEV_ID_I210_SERDES		0x1537
++#define E1000_DEV_ID_I210_SGMII			0x1538
++#define E1000_DEV_ID_I210_COPPER_FLASHLESS	0x157B
++#define E1000_DEV_ID_I210_SERDES_FLASHLESS	0x157C
++#define E1000_DEV_ID_I211_COPPER		0x1539
++#define E1000_DEV_ID_I354_BACKPLANE_1GBPS	0x1F40
++#define E1000_DEV_ID_I354_SGMII			0x1F41
++#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS	0x1F45
++
++#define E1000_REVISION_2 2
++#define E1000_REVISION_4 4
++
++#define E1000_FUNC_0     0
++#define E1000_FUNC_1     1
++#define E1000_FUNC_2     2
++#define E1000_FUNC_3     3
++
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0   0
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1   3
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2   6
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3   9
++
++enum e1000_mac_type {
++	e1000_undefined = 0,
++	e1000_82575,
++	e1000_82576,
++	e1000_82580,
++	e1000_i350,
++	e1000_i354,
++	e1000_i210,
++	e1000_i211,
++	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
++};
++
++enum e1000_media_type {
++	e1000_media_type_unknown = 0,
++	e1000_media_type_copper = 1,
++	e1000_media_type_fiber = 2,
++	e1000_media_type_internal_serdes = 3,
++	e1000_num_media_types
++};
++
++enum e1000_nvm_type {
++	e1000_nvm_unknown = 0,
++	e1000_nvm_none,
++	e1000_nvm_eeprom_spi,
++	e1000_nvm_flash_hw,
++	e1000_nvm_invm,
++	e1000_nvm_flash_sw
++};
++
++enum e1000_nvm_override {
++	e1000_nvm_override_none = 0,
++	e1000_nvm_override_spi_small,
++	e1000_nvm_override_spi_large,
++};
++
++enum e1000_phy_type {
++	e1000_phy_unknown = 0,
++	e1000_phy_none,
++	e1000_phy_m88,
++	e1000_phy_igp,
++	e1000_phy_igp_2,
++	e1000_phy_gg82563,
++	e1000_phy_igp_3,
++	e1000_phy_ife,
++	e1000_phy_82580,
++	e1000_phy_i210,
++};
++
++enum e1000_bus_type {
++	e1000_bus_type_unknown = 0,
++	e1000_bus_type_pci,
++	e1000_bus_type_pcix,
++	e1000_bus_type_pci_express,
++	e1000_bus_type_reserved
++};
++
++enum e1000_bus_speed {
++	e1000_bus_speed_unknown = 0,
++	e1000_bus_speed_33,
++	e1000_bus_speed_66,
++	e1000_bus_speed_100,
++	e1000_bus_speed_120,
++	e1000_bus_speed_133,
++	e1000_bus_speed_2500,
++	e1000_bus_speed_5000,
++	e1000_bus_speed_reserved
++};
++
++enum e1000_bus_width {
++	e1000_bus_width_unknown = 0,
++	e1000_bus_width_pcie_x1,
++	e1000_bus_width_pcie_x2,
++	e1000_bus_width_pcie_x4 = 4,
++	e1000_bus_width_pcie_x8 = 8,
++	e1000_bus_width_32,
++	e1000_bus_width_64,
++	e1000_bus_width_reserved
++};
++
++enum e1000_1000t_rx_status {
++	e1000_1000t_rx_status_not_ok = 0,
++	e1000_1000t_rx_status_ok,
++	e1000_1000t_rx_status_undefined = 0xFF
++};
++
++enum e1000_rev_polarity {
++	e1000_rev_polarity_normal = 0,
++	e1000_rev_polarity_reversed,
++	e1000_rev_polarity_undefined = 0xFF
++};
++
++enum e1000_fc_mode {
++	e1000_fc_none = 0,
++	e1000_fc_rx_pause,
++	e1000_fc_tx_pause,
++	e1000_fc_full,
++	e1000_fc_default = 0xFF
++};
++
++/* Statistics counters collected by the MAC */
++struct e1000_hw_stats {
++	u64 crcerrs;
++	u64 algnerrc;
++	u64 symerrs;
++	u64 rxerrc;
++	u64 mpc;
++	u64 scc;
++	u64 ecol;
++	u64 mcc;
++	u64 latecol;
++	u64 colc;
++	u64 dc;
++	u64 tncrs;
++	u64 sec;
++	u64 cexterr;
++	u64 rlec;
++	u64 xonrxc;
++	u64 xontxc;
++	u64 xoffrxc;
++	u64 xofftxc;
++	u64 fcruc;
++	u64 prc64;
++	u64 prc127;
++	u64 prc255;
++	u64 prc511;
++	u64 prc1023;
++	u64 prc1522;
++	u64 gprc;
++	u64 bprc;
++	u64 mprc;
++	u64 gptc;
++	u64 gorc;
++	u64 gotc;
++	u64 rnbc;
++	u64 ruc;
++	u64 rfc;
++	u64 roc;
++	u64 rjc;
++	u64 mgprc;
++	u64 mgpdc;
++	u64 mgptc;
++	u64 tor;
++	u64 tot;
++	u64 tpr;
++	u64 tpt;
++	u64 ptc64;
++	u64 ptc127;
++	u64 ptc255;
++	u64 ptc511;
++	u64 ptc1023;
++	u64 ptc1522;
++	u64 mptc;
++	u64 bptc;
++	u64 tsctc;
++	u64 tsctfc;
++	u64 iac;
++	u64 icrxptc;
++	u64 icrxatc;
++	u64 ictxptc;
++	u64 ictxatc;
++	u64 ictxqec;
++	u64 ictxqmtc;
++	u64 icrxdmtc;
++	u64 icrxoc;
++	u64 cbtmpc;
++	u64 htdpmc;
++	u64 cbrdpc;
++	u64 cbrmpc;
++	u64 rpthc;
++	u64 hgptc;
++	u64 htcbdpc;
++	u64 hgorc;
++	u64 hgotc;
++	u64 lenerrs;
++	u64 scvpc;
++	u64 hrmpc;
++	u64 doosync;
++	u64 o2bgptc;
++	u64 o2bspc;
++	u64 b2ospc;
++	u64 b2ogprc;
++};
++
++struct e1000_host_mng_dhcp_cookie {
++	u32 signature;
++	u8  status;
++	u8  reserved0;
++	u16 vlan_id;
++	u32 reserved1;
++	u16 reserved2;
++	u8  reserved3;
++	u8  checksum;
++};
++
++/* Host Interface "Rev 1" */
++struct e1000_host_command_header {
++	u8 command_id;
++	u8 command_length;
++	u8 command_options;
++	u8 checksum;
++};
++
++#define E1000_HI_MAX_DATA_LENGTH     252
++struct e1000_host_command_info {
++	struct e1000_host_command_header command_header;
++	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
++};
++
++/* Host Interface "Rev 2" */
++struct e1000_host_mng_command_header {
++	u8  command_id;
++	u8  checksum;
++	u16 reserved1;
++	u16 reserved2;
++	u16 command_length;
++};
++
++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
++struct e1000_host_mng_command_info {
++	struct e1000_host_mng_command_header command_header;
++	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
++};
++
++#include "e1000_mac.h"
++#include "e1000_phy.h"
++#include "e1000_nvm.h"
++#include "e1000_mbx.h"
++
++struct e1000_mac_operations {
++	s32 (*check_for_link)(struct e1000_hw *);
++	s32 (*reset_hw)(struct e1000_hw *);
++	s32 (*init_hw)(struct e1000_hw *);
++	bool (*check_mng_mode)(struct e1000_hw *);
++	s32 (*setup_physical_interface)(struct e1000_hw *);
++	void (*rar_set)(struct e1000_hw *, u8 *, u32);
++	s32 (*read_mac_addr)(struct e1000_hw *);
++	s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *);
++	s32 (*acquire_swfw_sync)(struct e1000_hw *, u16);
++	void (*release_swfw_sync)(struct e1000_hw *, u16);
++#ifdef CONFIG_IGB_HWMON
++	s32 (*get_thermal_sensor_data)(struct e1000_hw *);
++	s32 (*init_thermal_sensor_thresh)(struct e1000_hw *);
++#endif
++
++};
++
++struct e1000_phy_operations {
++	s32 (*acquire)(struct e1000_hw *);
++	s32 (*check_polarity)(struct e1000_hw *);
++	s32 (*check_reset_block)(struct e1000_hw *);
++	s32 (*force_speed_duplex)(struct e1000_hw *);
++	s32 (*get_cfg_done)(struct e1000_hw *hw);
++	s32 (*get_cable_length)(struct e1000_hw *);
++	s32 (*get_phy_info)(struct e1000_hw *);
++	s32 (*read_reg)(struct e1000_hw *, u32, u16 *);
++	void (*release)(struct e1000_hw *);
++	s32 (*reset)(struct e1000_hw *);
++	s32 (*set_d0_lplu_state)(struct e1000_hw *, bool);
++	s32 (*set_d3_lplu_state)(struct e1000_hw *, bool);
++	s32 (*write_reg)(struct e1000_hw *, u32, u16);
++	s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
++	s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
++};
++
++struct e1000_nvm_operations {
++	s32 (*acquire)(struct e1000_hw *);
++	s32 (*read)(struct e1000_hw *, u16, u16, u16 *);
++	void (*release)(struct e1000_hw *);
++	s32 (*write)(struct e1000_hw *, u16, u16, u16 *);
++	s32 (*update)(struct e1000_hw *);
++	s32 (*validate)(struct e1000_hw *);
++	s32 (*valid_led_default)(struct e1000_hw *, u16 *);
++};
++
++#define E1000_MAX_SENSORS		3
++
++struct e1000_thermal_diode_data {
++	u8 location;
++	u8 temp;
++	u8 caution_thresh;
++	u8 max_op_thresh;
++};
++
++struct e1000_thermal_sensor_data {
++	struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS];
++};
++
++struct e1000_info {
++	s32 (*get_invariants)(struct e1000_hw *);
++	struct e1000_mac_operations *mac_ops;
++	struct e1000_phy_operations *phy_ops;
++	struct e1000_nvm_operations *nvm_ops;
++};
++
++extern const struct e1000_info e1000_82575_info;
++
++struct e1000_mac_info {
++	struct e1000_mac_operations ops;
++
++	u8 addr[6];
++	u8 perm_addr[6];
++
++	enum e1000_mac_type type;
++
++	u32 ledctl_default;
++	u32 ledctl_mode1;
++	u32 ledctl_mode2;
++	u32 mc_filter_type;
++	u32 txcw;
++
++	u16 mta_reg_count;
++	u16 uta_reg_count;
++
++	/* Maximum size of the MTA register table in all supported adapters */
++	#define MAX_MTA_REG 128
++	u32 mta_shadow[MAX_MTA_REG];
++	u16 rar_entry_count;
++
++	u8  forced_speed_duplex;
++
++	bool adaptive_ifs;
++	bool arc_subsystem_valid;
++	bool asf_firmware_present;
++	bool autoneg;
++	bool autoneg_failed;
++	bool disable_hw_init_bits;
++	bool get_link_status;
++	bool ifs_params_forced;
++	bool in_ifs_mode;
++	bool report_tx_early;
++	bool serdes_has_link;
++	bool tx_pkt_filtering;
++	struct e1000_thermal_sensor_data thermal_sensor_data;
++};
++
++struct e1000_phy_info {
++	struct e1000_phy_operations ops;
++
++	enum e1000_phy_type type;
++
++	enum e1000_1000t_rx_status local_rx;
++	enum e1000_1000t_rx_status remote_rx;
++	enum e1000_ms_type ms_type;
++	enum e1000_ms_type original_ms_type;
++	enum e1000_rev_polarity cable_polarity;
++	enum e1000_smart_speed smart_speed;
++
++	u32 addr;
++	u32 id;
++	u32 reset_delay_us; /* in usec */
++	u32 revision;
++
++	enum e1000_media_type media_type;
++
++	u16 autoneg_advertised;
++	u16 autoneg_mask;
++	u16 cable_length;
++	u16 max_cable_length;
++	u16 min_cable_length;
++
++	u8 mdix;
++
++	bool disable_polarity_correction;
++	bool is_mdix;
++	bool polarity_correction;
++	bool reset_disable;
++	bool speed_downgraded;
++	bool autoneg_wait_to_complete;
++};
++
++struct e1000_nvm_info {
++	struct e1000_nvm_operations ops;
++	enum e1000_nvm_type type;
++	enum e1000_nvm_override override;
++
++	u32 flash_bank_size;
++	u32 flash_base_addr;
++
++	u16 word_size;
++	u16 delay_usec;
++	u16 address_bits;
++	u16 opcode_bits;
++	u16 page_size;
++};
++
++struct e1000_bus_info {
++	enum e1000_bus_type type;
++	enum e1000_bus_speed speed;
++	enum e1000_bus_width width;
++
++	u32 snoop;
++
++	u16 func;
++	u16 pci_cmd_word;
++};
++
++struct e1000_fc_info {
++	u32 high_water;     /* Flow control high-water mark */
++	u32 low_water;      /* Flow control low-water mark */
++	u16 pause_time;     /* Flow control pause timer */
++	bool send_xon;      /* Flow control send XON */
++	bool strict_ieee;   /* Strict IEEE mode */
++	enum e1000_fc_mode current_mode; /* Type of flow control */
++	enum e1000_fc_mode requested_mode;
++};
++
++struct e1000_mbx_operations {
++	s32 (*init_params)(struct e1000_hw *hw);
++	s32 (*read)(struct e1000_hw *, u32 *, u16,  u16);
++	s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
++	s32 (*read_posted)(struct e1000_hw *, u32 *, u16,  u16);
++	s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
++	s32 (*check_for_msg)(struct e1000_hw *, u16);
++	s32 (*check_for_ack)(struct e1000_hw *, u16);
++	s32 (*check_for_rst)(struct e1000_hw *, u16);
++};
++
++struct e1000_mbx_stats {
++	u32 msgs_tx;
++	u32 msgs_rx;
++
++	u32 acks;
++	u32 reqs;
++	u32 rsts;
++};
++
++struct e1000_mbx_info {
++	struct e1000_mbx_operations ops;
++	struct e1000_mbx_stats stats;
++	u32 timeout;
++	u32 usec_delay;
++	u16 size;
++};
++
++struct e1000_dev_spec_82575 {
++	bool sgmii_active;
++	bool global_device_reset;
++	bool eee_disable;
++	bool clear_semaphore_once;
++	struct e1000_sfp_flags eth_flags;
++	bool module_plugged;
++	u8 media_port;
++	bool media_changed;
++	bool mas_capable;
++};
++
++struct e1000_hw {
++	void *back;
++
++	u8 __iomem *hw_addr;
++	u8 __iomem *flash_address;
++	unsigned long io_base;
++
++	struct e1000_mac_info  mac;
++	struct e1000_fc_info   fc;
++	struct e1000_phy_info  phy;
++	struct e1000_nvm_info  nvm;
++	struct e1000_bus_info  bus;
++	struct e1000_mbx_info mbx;
++	struct e1000_host_mng_dhcp_cookie mng_cookie;
++
++	union {
++		struct e1000_dev_spec_82575	_82575;
++	} dev_spec;
++
++	u16 device_id;
++	u16 subsystem_vendor_id;
++	u16 subsystem_device_id;
++	u16 vendor_id;
++
++	u8  revision_id;
++};
++
++struct rtnet_device *igb_get_hw_dev(struct e1000_hw *hw);
++#define hw_dbg(format, arg...) \
++	rtdev_dbg(igb_get_hw_dev(hw), format, ##arg)
++
++/* These functions must be implemented by drivers */
++s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
++s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
++
++void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
++void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
++#endif /* _E1000_HW_H_ */
+--- linux/drivers/xenomai/net/drivers/igb/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/Makefile	2021-04-07 16:01:27.509633763 +0800
+@@ -0,0 +1,13 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_IGB) += rt_igb.o
++
++rt_igb-y :=  					\
++	e1000_82575.o				\
++	e1000_i210.o				\
++	e1000_mac.o				\
++	e1000_mbx.o				\
++	e1000_nvm.o				\
++	e1000_phy.o				\
++	igb_hwmon.o				\
++	igb_main.o
+--- linux/drivers/xenomai/net/drivers/igb/igb_hwmon.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/igb_hwmon.c	2021-04-07 16:01:27.505633769 +0800
+@@ -0,0 +1,249 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#include "igb.h"
++#include "e1000_82575.h"
++#include "e1000_hw.h"
++
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/sysfs.h>
++#include <linux/kobject.h>
++#include <linux/device.h>
++#include <linux/netdevice.h>
++#include <linux/hwmon.h>
++#include <linux/pci.h>
++
++#ifdef CONFIG_IGB_HWMON
++static struct i2c_board_info i350_sensor_info = {
++	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
++};
++
++/* hwmon callback functions */
++static ssize_t igb_hwmon_show_location(struct device *dev,
++				       struct device_attribute *attr,
++				       char *buf)
++{
++	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
++						   dev_attr);
++	return sprintf(buf, "loc%u\n",
++		       igb_attr->sensor->location);
++}
++
++static ssize_t igb_hwmon_show_temp(struct device *dev,
++				   struct device_attribute *attr,
++				   char *buf)
++{
++	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
++						   dev_attr);
++	unsigned int value;
++
++	/* reset the temp field */
++	igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
++
++	value = igb_attr->sensor->temp;
++
++	/* display millidegree */
++	value *= 1000;
++
++	return sprintf(buf, "%u\n", value);
++}
++
++static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
++					    struct device_attribute *attr,
++					    char *buf)
++{
++	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
++						   dev_attr);
++	unsigned int value = igb_attr->sensor->caution_thresh;
++
++	/* display millidegree */
++	value *= 1000;
++
++	return sprintf(buf, "%u\n", value);
++}
++
++static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
++					  struct device_attribute *attr,
++					  char *buf)
++{
++	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
++						   dev_attr);
++	unsigned int value = igb_attr->sensor->max_op_thresh;
++
++	/* display millidegree */
++	value *= 1000;
++
++	return sprintf(buf, "%u\n", value);
++}
++
++/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
++ * @ adapter: pointer to the adapter structure
++ * @ offset: offset in the eeprom sensor data table
++ * @ type: type of sensor data to display
++ *
++ * For each file we want in hwmon's sysfs interface we need a device_attribute
++ * This is included in our hwmon_attr struct that contains the references to
++ * the data structures we need to get the data to display.
++ */
++static int igb_add_hwmon_attr(struct igb_adapter *adapter,
++			      unsigned int offset, int type)
++{
++	int rc;
++	unsigned int n_attr;
++	struct hwmon_attr *igb_attr;
++
++	n_attr = adapter->igb_hwmon_buff->n_hwmon;
++	igb_attr = &adapter->igb_hwmon_buff->hwmon_list[n_attr];
++
++	switch (type) {
++	case IGB_HWMON_TYPE_LOC:
++		igb_attr->dev_attr.show = igb_hwmon_show_location;
++		snprintf(igb_attr->name, sizeof(igb_attr->name),
++			 "temp%u_label", offset + 1);
++		break;
++	case IGB_HWMON_TYPE_TEMP:
++		igb_attr->dev_attr.show = igb_hwmon_show_temp;
++		snprintf(igb_attr->name, sizeof(igb_attr->name),
++			 "temp%u_input", offset + 1);
++		break;
++	case IGB_HWMON_TYPE_CAUTION:
++		igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
++		snprintf(igb_attr->name, sizeof(igb_attr->name),
++			 "temp%u_max", offset + 1);
++		break;
++	case IGB_HWMON_TYPE_MAX:
++		igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
++		snprintf(igb_attr->name, sizeof(igb_attr->name),
++			 "temp%u_crit", offset + 1);
++		break;
++	default:
++		rc = -EPERM;
++		return rc;
++	}
++
++	/* These always the same regardless of type */
++	igb_attr->sensor =
++		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
++	igb_attr->hw = &adapter->hw;
++	igb_attr->dev_attr.store = NULL;
++	igb_attr->dev_attr.attr.mode = S_IRUGO;
++	igb_attr->dev_attr.attr.name = igb_attr->name;
++	sysfs_attr_init(&igb_attr->dev_attr.attr);
++
++	adapter->igb_hwmon_buff->attrs[n_attr] = &igb_attr->dev_attr.attr;
++
++	++adapter->igb_hwmon_buff->n_hwmon;
++
++	return 0;
++}
++
++static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
++{
++}
++
++/* called from igb_main.c */
++void igb_sysfs_exit(struct igb_adapter *adapter)
++{
++	igb_sysfs_del_adapter(adapter);
++}
++
++/* called from igb_main.c */
++int igb_sysfs_init(struct igb_adapter *adapter)
++{
++	struct hwmon_buff *igb_hwmon;
++	struct i2c_client *client;
++	struct device *hwmon_dev;
++	unsigned int i;
++	int rc = 0;
++
++	/* If this method isn't defined we don't support thermals */
++	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
++		goto exit;
++
++	/* Don't create thermal hwmon interface if no sensors present */
++	rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
++	if (rc)
++		goto exit;
++
++	igb_hwmon = devm_kzalloc(&adapter->pdev->dev, sizeof(*igb_hwmon),
++				 GFP_KERNEL);
++	if (!igb_hwmon) {
++		rc = -ENOMEM;
++		goto exit;
++	}
++	adapter->igb_hwmon_buff = igb_hwmon;
++
++	for (i = 0; i < E1000_MAX_SENSORS; i++) {
++
++		/* Only create hwmon sysfs entries for sensors that have
++		 * meaningful data.
++		 */
++		if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
++			continue;
++
++		/* Bail if any hwmon attr struct fails to initialize */
++		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
++		if (rc)
++			goto exit;
++		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
++		if (rc)
++			goto exit;
++		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
++		if (rc)
++			goto exit;
++		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
++		if (rc)
++			goto exit;
++	}
++
++	/* init i2c_client */
++	client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info);
++	if (client == NULL) {
++		dev_info(&adapter->pdev->dev,
++			 "Failed to create new i2c device.\n");
++		rc = -ENODEV;
++		goto exit;
++	}
++	adapter->i2c_client = client;
++
++	igb_hwmon->groups[0] = &igb_hwmon->group;
++	igb_hwmon->group.attrs = igb_hwmon->attrs;
++
++	hwmon_dev = devm_hwmon_device_register_with_groups(&adapter->pdev->dev,
++							   client->name,
++							   igb_hwmon,
++							   igb_hwmon->groups);
++	if (IS_ERR(hwmon_dev)) {
++		rc = PTR_ERR(hwmon_dev);
++		goto err;
++	}
++
++	goto exit;
++
++err:
++	igb_sysfs_del_adapter(adapter);
++exit:
++	return rc;
++}
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_82575.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_82575.h	2021-04-07 16:01:27.500633776 +0800
+@@ -0,0 +1,280 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_82575_H_
++#define _E1000_82575_H_
++
++void igb_shutdown_serdes_link_82575(struct e1000_hw *hw);
++void igb_power_up_serdes_link_82575(struct e1000_hw *hw);
++void igb_power_down_phy_copper_82575(struct e1000_hw *hw);
++void igb_rx_fifo_flush_82575(struct e1000_hw *hw);
++s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr,
++		      u8 *data);
++s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr,
++		       u8 data);
++
++#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \
++				     (ID_LED_DEF1_DEF2 <<  8) | \
++				     (ID_LED_DEF1_DEF2 <<  4) | \
++				     (ID_LED_OFF1_ON2))
++
++#define E1000_RAR_ENTRIES_82575        16
++#define E1000_RAR_ENTRIES_82576        24
++#define E1000_RAR_ENTRIES_82580        24
++#define E1000_RAR_ENTRIES_I350         32
++
++#define E1000_SW_SYNCH_MB              0x00000100
++#define E1000_STAT_DEV_RST_SET         0x00100000
++#define E1000_CTRL_DEV_RST             0x20000000
++
++/* SRRCTL bit definitions */
++#define E1000_SRRCTL_BSIZEPKT_SHIFT                     10 /* Shift _right_ */
++#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT                 2  /* Shift _left_ */
++#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF                0x02000000
++#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS          0x0A000000
++#define E1000_SRRCTL_DROP_EN                            0x80000000
++#define E1000_SRRCTL_TIMESTAMP                          0x40000000
++
++
++#define E1000_MRQC_ENABLE_RSS_4Q            0x00000002
++#define E1000_MRQC_ENABLE_VMDQ              0x00000003
++#define E1000_MRQC_RSS_FIELD_IPV4_UDP       0x00400000
++#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q       0x00000005
++#define E1000_MRQC_RSS_FIELD_IPV6_UDP       0x00800000
++#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX    0x01000000
++
++#define E1000_EICR_TX_QUEUE ( \
++	E1000_EICR_TX_QUEUE0 |    \
++	E1000_EICR_TX_QUEUE1 |    \
++	E1000_EICR_TX_QUEUE2 |    \
++	E1000_EICR_TX_QUEUE3)
++
++#define E1000_EICR_RX_QUEUE ( \
++	E1000_EICR_RX_QUEUE0 |    \
++	E1000_EICR_RX_QUEUE1 |    \
++	E1000_EICR_RX_QUEUE2 |    \
++	E1000_EICR_RX_QUEUE3)
++
++/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
++#define E1000_IMIREXT_SIZE_BP     0x00001000  /* Packet size bypass */
++#define E1000_IMIREXT_CTRL_BP     0x00080000  /* Bypass check of ctrl bits */
++
++/* Receive Descriptor - Advanced */
++union e1000_adv_rx_desc {
++	struct {
++		__le64 pkt_addr;             /* Packet buffer address */
++		__le64 hdr_addr;             /* Header buffer address */
++	} read;
++	struct {
++		struct {
++			struct {
++				__le16 pkt_info;   /* RSS type, Packet type */
++				__le16 hdr_info;   /* Split Head, buf len */
++			} lo_dword;
++			union {
++				__le32 rss;          /* RSS Hash */
++				struct {
++					__le16 ip_id;    /* IP id */
++					__le16 csum;     /* Packet Checksum */
++				} csum_ip;
++			} hi_dword;
++		} lower;
++		struct {
++			__le32 status_error;     /* ext status/error */
++			__le16 length;           /* Packet length */
++			__le16 vlan;             /* VLAN tag */
++		} upper;
++	} wb;  /* writeback */
++};
++
++#define E1000_RXDADV_HDRBUFLEN_MASK      0x7FE0
++#define E1000_RXDADV_HDRBUFLEN_SHIFT     5
++#define E1000_RXDADV_STAT_TS             0x10000 /* Pkt was time stamped */
++#define E1000_RXDADV_STAT_TSIP           0x08000 /* timestamp in packet */
++
++/* Transmit Descriptor - Advanced */
++union e1000_adv_tx_desc {
++	struct {
++		__le64 buffer_addr;    /* Address of descriptor's data buf */
++		__le32 cmd_type_len;
++		__le32 olinfo_status;
++	} read;
++	struct {
++		__le64 rsvd;       /* Reserved */
++		__le32 nxtseq_seed;
++		__le32 status;
++	} wb;
++};
++
++/* Adv Transmit Descriptor Config Masks */
++#define E1000_ADVTXD_MAC_TSTAMP   0x00080000 /* IEEE1588 Timestamp packet */
++#define E1000_ADVTXD_DTYP_CTXT    0x00200000 /* Advanced Context Descriptor */
++#define E1000_ADVTXD_DTYP_DATA    0x00300000 /* Advanced Data Descriptor */
++#define E1000_ADVTXD_DCMD_EOP     0x01000000 /* End of Packet */
++#define E1000_ADVTXD_DCMD_IFCS    0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_ADVTXD_DCMD_RS      0x08000000 /* Report Status */
++#define E1000_ADVTXD_DCMD_DEXT    0x20000000 /* Descriptor extension (1=Adv) */
++#define E1000_ADVTXD_DCMD_VLE     0x40000000 /* VLAN pkt enable */
++#define E1000_ADVTXD_DCMD_TSE     0x80000000 /* TCP Seg enable */
++#define E1000_ADVTXD_PAYLEN_SHIFT    14 /* Adv desc PAYLEN shift */
++
++/* Context descriptors */
++struct e1000_adv_tx_context_desc {
++	__le32 vlan_macip_lens;
++	__le32 seqnum_seed;
++	__le32 type_tucmd_mlhl;
++	__le32 mss_l4len_idx;
++};
++
++#define E1000_ADVTXD_MACLEN_SHIFT    9  /* Adv ctxt desc mac len shift */
++#define E1000_ADVTXD_TUCMD_IPV4    0x00000400  /* IP Packet Type: 1=IPv4 */
++#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800  /* L4 Packet TYPE of TCP */
++#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
++/* IPSec Encrypt Enable for ESP */
++#define E1000_ADVTXD_L4LEN_SHIFT     8  /* Adv ctxt L4LEN shift */
++#define E1000_ADVTXD_MSS_SHIFT      16  /* Adv ctxt MSS shift */
++/* Adv ctxt IPSec SA IDX mask */
++/* Adv ctxt IPSec ESP len mask */
++
++/* Additional Transmit Descriptor Control definitions */
++#define E1000_TXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Tx Queue */
++/* Tx Queue Arbitration Priority 0=low, 1=high */
++
++/* Additional Receive Descriptor Control definitions */
++#define E1000_RXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Rx Queue */
++
++/* Direct Cache Access (DCA) definitions */
++#define E1000_DCA_CTRL_DCA_MODE_DISABLE 0x01 /* DCA Disable */
++#define E1000_DCA_CTRL_DCA_MODE_CB2     0x02 /* DCA Mode CB2 */
++
++#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */
++#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */
++#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header enable */
++#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload enable */
++#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */
++
++#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */
++#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */
++#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */
++#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */
++#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */
++
++/* Additional DCA related definitions, note change in position of CPUID */
++#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */
++#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */
++#define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */
++#define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */
++
++/* ETQF register bit definitions */
++#define E1000_ETQF_FILTER_ENABLE   (1 << 26)
++#define E1000_ETQF_1588            (1 << 30)
++
++/* FTQF register bit definitions */
++#define E1000_FTQF_VF_BP               0x00008000
++#define E1000_FTQF_1588_TIME_STAMP     0x08000000
++#define E1000_FTQF_MASK                0xF0000000
++#define E1000_FTQF_MASK_PROTO_BP       0x10000000
++#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000
++
++#define E1000_NVM_APME_82575          0x0400
++#define MAX_NUM_VFS                   8
++
++#define E1000_DTXSWC_MAC_SPOOF_MASK   0x000000FF /* Per VF MAC spoof control */
++#define E1000_DTXSWC_VLAN_SPOOF_MASK  0x0000FF00 /* Per VF VLAN spoof control */
++#define E1000_DTXSWC_LLE_MASK         0x00FF0000 /* Per VF Local LB enables */
++#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8
++#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31)  /* global VF LB enable */
++
++/* Easy defines for setting default pool, would normally be left a zero */
++#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7
++#define E1000_VT_CTL_DEFAULT_POOL_MASK  (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT)
++
++/* Other useful VMD_CTL register defines */
++#define E1000_VT_CTL_IGNORE_MAC         (1 << 28)
++#define E1000_VT_CTL_DISABLE_DEF_POOL   (1 << 29)
++#define E1000_VT_CTL_VM_REPL_EN         (1 << 30)
++
++/* Per VM Offload register setup */
++#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */
++#define E1000_VMOLR_LPE        0x00010000 /* Accept Long packet */
++#define E1000_VMOLR_RSSE       0x00020000 /* Enable RSS */
++#define E1000_VMOLR_AUPE       0x01000000 /* Accept untagged packets */
++#define E1000_VMOLR_ROMPE      0x02000000 /* Accept overflow multicast */
++#define E1000_VMOLR_ROPE       0x04000000 /* Accept overflow unicast */
++#define E1000_VMOLR_BAM        0x08000000 /* Accept Broadcast packets */
++#define E1000_VMOLR_MPME       0x10000000 /* Multicast promiscuous mode */
++#define E1000_VMOLR_STRVLAN    0x40000000 /* Vlan stripping enable */
++#define E1000_VMOLR_STRCRC     0x80000000 /* CRC stripping enable */
++
++#define E1000_DVMOLR_HIDEVLAN  0x20000000 /* Hide vlan enable */
++#define E1000_DVMOLR_STRVLAN   0x40000000 /* Vlan stripping enable */
++#define E1000_DVMOLR_STRCRC    0x80000000 /* CRC stripping enable */
++
++#define E1000_VLVF_ARRAY_SIZE     32
++#define E1000_VLVF_VLANID_MASK    0x00000FFF
++#define E1000_VLVF_POOLSEL_SHIFT  12
++#define E1000_VLVF_POOLSEL_MASK   (0xFF << E1000_VLVF_POOLSEL_SHIFT)
++#define E1000_VLVF_LVLAN          0x00100000
++#define E1000_VLVF_VLANID_ENABLE  0x80000000
++
++#define E1000_VMVIR_VLANA_DEFAULT      0x40000000 /* Always use default VLAN */
++#define E1000_VMVIR_VLANA_NEVER        0x80000000 /* Never insert VLAN tag */
++
++#define E1000_IOVCTL 0x05BBC
++#define E1000_IOVCTL_REUSE_VFQ 0x00000001
++
++#define E1000_RPLOLR_STRVLAN   0x40000000
++#define E1000_RPLOLR_STRCRC    0x80000000
++
++#define E1000_DTXCTL_8023LL     0x0004
++#define E1000_DTXCTL_VLAN_ADDED 0x0008
++#define E1000_DTXCTL_OOS_ENABLE 0x0010
++#define E1000_DTXCTL_MDP_EN     0x0020
++#define E1000_DTXCTL_SPOOF_INT  0x0040
++
++#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT	(1 << 14)
++
++#define ALL_QUEUES   0xFFFF
++
++/* RX packet buffer size defines */
++#define E1000_RXPBS_SIZE_MASK_82576  0x0000007F
++void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *, bool, int);
++void igb_vmdq_set_loopback_pf(struct e1000_hw *, bool);
++void igb_vmdq_set_replication_pf(struct e1000_hw *, bool);
++u16 igb_rxpbs_adjust_82580(u32 data);
++s32 igb_read_emi_reg(struct e1000_hw *, u16 addr, u16 *data);
++s32 igb_set_eee_i350(struct e1000_hw *, bool adv1G, bool adv100M);
++s32 igb_set_eee_i354(struct e1000_hw *, bool adv1G, bool adv100M);
++s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status);
++
++#define E1000_I2C_THERMAL_SENSOR_ADDR	0xF8
++#define E1000_EMC_INTERNAL_DATA		0x00
++#define E1000_EMC_INTERNAL_THERM_LIMIT	0x20
++#define E1000_EMC_DIODE1_DATA		0x01
++#define E1000_EMC_DIODE1_THERM_LIMIT	0x19
++#define E1000_EMC_DIODE2_DATA		0x23
++#define E1000_EMC_DIODE2_THERM_LIMIT	0x1A
++#define E1000_EMC_DIODE3_DATA		0x2A
++#define E1000_EMC_DIODE3_THERM_LIMIT	0x30
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_phy.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_phy.h	2021-04-07 16:01:27.496633782 +0800
+@@ -0,0 +1,175 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_PHY_H_
++#define _E1000_PHY_H_
++
++enum e1000_ms_type {
++	e1000_ms_hw_default = 0,
++	e1000_ms_force_master,
++	e1000_ms_force_slave,
++	e1000_ms_auto
++};
++
++enum e1000_smart_speed {
++	e1000_smart_speed_default = 0,
++	e1000_smart_speed_on,
++	e1000_smart_speed_off
++};
++
++s32  igb_check_downshift(struct e1000_hw *hw);
++s32  igb_check_reset_block(struct e1000_hw *hw);
++s32  igb_copper_link_setup_igp(struct e1000_hw *hw);
++s32  igb_copper_link_setup_m88(struct e1000_hw *hw);
++s32  igb_copper_link_setup_m88_gen2(struct e1000_hw *hw);
++s32  igb_phy_force_speed_duplex_igp(struct e1000_hw *hw);
++s32  igb_phy_force_speed_duplex_m88(struct e1000_hw *hw);
++s32  igb_get_cable_length_m88(struct e1000_hw *hw);
++s32  igb_get_cable_length_m88_gen2(struct e1000_hw *hw);
++s32  igb_get_cable_length_igp_2(struct e1000_hw *hw);
++s32  igb_get_phy_id(struct e1000_hw *hw);
++s32  igb_get_phy_info_igp(struct e1000_hw *hw);
++s32  igb_get_phy_info_m88(struct e1000_hw *hw);
++s32  igb_phy_sw_reset(struct e1000_hw *hw);
++s32  igb_phy_hw_reset(struct e1000_hw *hw);
++s32  igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  igb_set_d3_lplu_state(struct e1000_hw *hw, bool active);
++s32  igb_setup_copper_link(struct e1000_hw *hw);
++s32  igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
++s32  igb_phy_has_link(struct e1000_hw *hw, u32 iterations,
++				u32 usec_interval, bool *success);
++void igb_power_up_phy_copper(struct e1000_hw *hw);
++void igb_power_down_phy_copper(struct e1000_hw *hw);
++s32  igb_phy_init_script_igp3(struct e1000_hw *hw);
++s32  igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
++s32  igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data);
++s32  igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data);
++s32  igb_copper_link_setup_82580(struct e1000_hw *hw);
++s32  igb_get_phy_info_82580(struct e1000_hw *hw);
++s32  igb_phy_force_speed_duplex_82580(struct e1000_hw *hw);
++s32  igb_get_cable_length_82580(struct e1000_hw *hw);
++s32  igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data);
++s32  igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data);
++s32  igb_check_polarity_m88(struct e1000_hw *hw);
++
++/* IGP01E1000 Specific Registers */
++#define IGP01E1000_PHY_PORT_CONFIG        0x10 /* Port Config */
++#define IGP01E1000_PHY_PORT_STATUS        0x11 /* Status */
++#define IGP01E1000_PHY_PORT_CTRL          0x12 /* Control */
++#define IGP01E1000_PHY_LINK_HEALTH        0x13 /* PHY Link Health */
++#define IGP02E1000_PHY_POWER_MGMT         0x19 /* Power Management */
++#define IGP01E1000_PHY_PAGE_SELECT        0x1F /* Page Select */
++#define IGP01E1000_PHY_PCS_INIT_REG       0x00B4
++#define IGP01E1000_PHY_POLARITY_MASK      0x0078
++#define IGP01E1000_PSCR_AUTO_MDIX         0x1000
++#define IGP01E1000_PSCR_FORCE_MDI_MDIX    0x2000 /* 0=MDI, 1=MDIX */
++#define IGP01E1000_PSCFR_SMART_SPEED      0x0080
++
++#define I82580_ADDR_REG                   16
++#define I82580_CFG_REG                    22
++#define I82580_CFG_ASSERT_CRS_ON_TX       (1 << 15)
++#define I82580_CFG_ENABLE_DOWNSHIFT       (3 << 10) /* auto downshift 100/10 */
++#define I82580_CTRL_REG                   23
++#define I82580_CTRL_DOWNSHIFT_MASK        (7 << 10)
++
++/* 82580 specific PHY registers */
++#define I82580_PHY_CTRL_2            18
++#define I82580_PHY_LBK_CTRL          19
++#define I82580_PHY_STATUS_2          26
++#define I82580_PHY_DIAG_STATUS       31
++
++/* I82580 PHY Status 2 */
++#define I82580_PHY_STATUS2_REV_POLARITY   0x0400
++#define I82580_PHY_STATUS2_MDIX           0x0800
++#define I82580_PHY_STATUS2_SPEED_MASK     0x0300
++#define I82580_PHY_STATUS2_SPEED_1000MBPS 0x0200
++#define I82580_PHY_STATUS2_SPEED_100MBPS  0x0100
++
++/* I82580 PHY Control 2 */
++#define I82580_PHY_CTRL2_MANUAL_MDIX      0x0200
++#define I82580_PHY_CTRL2_AUTO_MDI_MDIX    0x0400
++#define I82580_PHY_CTRL2_MDIX_CFG_MASK    0x0600
++
++/* I82580 PHY Diagnostics Status */
++#define I82580_DSTATUS_CABLE_LENGTH       0x03FC
++#define I82580_DSTATUS_CABLE_LENGTH_SHIFT 2
++
++/* 82580 PHY Power Management */
++#define E1000_82580_PHY_POWER_MGMT	0xE14
++#define E1000_82580_PM_SPD		0x0001 /* Smart Power Down */
++#define E1000_82580_PM_D0_LPLU		0x0002 /* For D0a states */
++#define E1000_82580_PM_D3_LPLU		0x0004 /* For all other states */
++#define E1000_82580_PM_GO_LINKD		0x0020 /* Go Link Disconnect */
++
++/* Enable flexible speed on link-up */
++#define IGP02E1000_PM_D0_LPLU             0x0002 /* For D0a states */
++#define IGP02E1000_PM_D3_LPLU             0x0004 /* For all other states */
++#define IGP01E1000_PLHR_SS_DOWNGRADE      0x8000
++#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002
++#define IGP01E1000_PSSR_MDIX              0x0800
++#define IGP01E1000_PSSR_SPEED_MASK        0xC000
++#define IGP01E1000_PSSR_SPEED_1000MBPS    0xC000
++#define IGP02E1000_PHY_CHANNEL_NUM        4
++#define IGP02E1000_PHY_AGC_A              0x11B1
++#define IGP02E1000_PHY_AGC_B              0x12B1
++#define IGP02E1000_PHY_AGC_C              0x14B1
++#define IGP02E1000_PHY_AGC_D              0x18B1
++#define IGP02E1000_AGC_LENGTH_SHIFT       9   /* Course - 15:13, Fine - 12:9 */
++#define IGP02E1000_AGC_LENGTH_MASK        0x7F
++#define IGP02E1000_AGC_RANGE              15
++
++#define E1000_CABLE_LENGTH_UNDEFINED      0xFF
++
++/* GS40G - I210 PHY defines */
++#define GS40G_PAGE_SELECT		0x16
++#define GS40G_PAGE_SHIFT		16
++#define GS40G_OFFSET_MASK		0xFFFF
++#define GS40G_PAGE_2			0x20000
++#define GS40G_MAC_REG2			0x15
++#define GS40G_MAC_LB			0x4140
++#define GS40G_MAC_SPEED_1G		0X0006
++#define GS40G_COPPER_SPEC		0x0010
++#define GS40G_LINE_LB			0x4000
++
++/* SFP modules ID memory locations */
++#define E1000_SFF_IDENTIFIER_OFFSET	0x00
++#define E1000_SFF_IDENTIFIER_SFF	0x02
++#define E1000_SFF_IDENTIFIER_SFP	0x03
++
++#define E1000_SFF_ETH_FLAGS_OFFSET	0x06
++/* Flags for SFP modules compatible with ETH up to 1Gb */
++struct e1000_sfp_flags {
++	u8 e1000_base_sx:1;
++	u8 e1000_base_lx:1;
++	u8 e1000_base_cx:1;
++	u8 e1000_base_t:1;
++	u8 e100_base_lx:1;
++	u8 e100_base_fx:1;
++	u8 e10_base_bx10:1;
++	u8 e10_base_px:1;
++};
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_nvm.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_nvm.h	2021-04-07 16:01:27.491633789 +0800
+@@ -0,0 +1,57 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_NVM_H_
++#define _E1000_NVM_H_
++
++s32  igb_acquire_nvm(struct e1000_hw *hw);
++void igb_release_nvm(struct e1000_hw *hw);
++s32  igb_read_mac_addr(struct e1000_hw *hw);
++s32  igb_read_part_num(struct e1000_hw *hw, u32 *part_num);
++s32  igb_read_part_string(struct e1000_hw *hw, u8 *part_num,
++			  u32 part_num_size);
++s32  igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++s32  igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++s32  igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++s32  igb_validate_nvm_checksum(struct e1000_hw *hw);
++s32  igb_update_nvm_checksum(struct e1000_hw *hw);
++
++struct e1000_fw_version {
++	u32 etrack_id;
++	u16 eep_major;
++	u16 eep_minor;
++	u16 eep_build;
++
++	u8 invm_major;
++	u8 invm_minor;
++	u8 invm_img_type;
++
++	bool or_valid;
++	u16 or_major;
++	u16 or_build;
++	u16 or_patch;
++};
++void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers);
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/e1000_82575.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_82575.c	2021-04-07 16:01:27.486633796 +0800
+@@ -0,0 +1,2889 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2015 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++/* e1000_82575
++ * e1000_82576
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/types.h>
++#include <linux/if_ether.h>
++#include <linux/i2c.h>
++
++#include "e1000_mac.h"
++#include "e1000_82575.h"
++#include "e1000_i210.h"
++
++static s32  igb_get_invariants_82575(struct e1000_hw *);
++static s32  igb_acquire_phy_82575(struct e1000_hw *);
++static void igb_release_phy_82575(struct e1000_hw *);
++static s32  igb_acquire_nvm_82575(struct e1000_hw *);
++static void igb_release_nvm_82575(struct e1000_hw *);
++static s32  igb_check_for_link_82575(struct e1000_hw *);
++static s32  igb_get_cfg_done_82575(struct e1000_hw *);
++static s32  igb_init_hw_82575(struct e1000_hw *);
++static s32  igb_phy_hw_reset_sgmii_82575(struct e1000_hw *);
++static s32  igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *);
++static s32  igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *);
++static s32  igb_write_phy_reg_82580(struct e1000_hw *, u32, u16);
++static s32  igb_reset_hw_82575(struct e1000_hw *);
++static s32  igb_reset_hw_82580(struct e1000_hw *);
++static s32  igb_set_d0_lplu_state_82575(struct e1000_hw *, bool);
++static s32  igb_set_d0_lplu_state_82580(struct e1000_hw *, bool);
++static s32  igb_set_d3_lplu_state_82580(struct e1000_hw *, bool);
++static s32  igb_setup_copper_link_82575(struct e1000_hw *);
++static s32  igb_setup_serdes_link_82575(struct e1000_hw *);
++static s32  igb_write_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16);
++static void igb_clear_hw_cntrs_82575(struct e1000_hw *);
++static s32  igb_acquire_swfw_sync_82575(struct e1000_hw *, u16);
++static s32  igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *, u16 *,
++						 u16 *);
++static s32  igb_get_phy_id_82575(struct e1000_hw *);
++static void igb_release_swfw_sync_82575(struct e1000_hw *, u16);
++static bool igb_sgmii_active_82575(struct e1000_hw *);
++static s32  igb_reset_init_script_82575(struct e1000_hw *);
++static s32  igb_read_mac_addr_82575(struct e1000_hw *);
++static s32  igb_set_pcie_completion_timeout(struct e1000_hw *hw);
++static s32  igb_reset_mdicnfg_82580(struct e1000_hw *hw);
++static s32  igb_validate_nvm_checksum_82580(struct e1000_hw *hw);
++static s32  igb_update_nvm_checksum_82580(struct e1000_hw *hw);
++static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw);
++static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw);
++static const u16 e1000_82580_rxpbs_table[] = {
++	36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
++
++/**
++ *  igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
++ *  @hw: pointer to the HW structure
++ *
++ *  Called to determine if the I2C pins are being used for I2C or as an
++ *  external MDIO interface since the two options are mutually exclusive.
++ **/
++static bool igb_sgmii_uses_mdio_82575(struct e1000_hw *hw)
++{
++	u32 reg = 0;
++	bool ext_mdio = false;
++
++	switch (hw->mac.type) {
++	case e1000_82575:
++	case e1000_82576:
++		reg = rd32(E1000_MDIC);
++		ext_mdio = !!(reg & E1000_MDIC_DEST);
++		break;
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_i210:
++	case e1000_i211:
++		reg = rd32(E1000_MDICNFG);
++		ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO);
++		break;
++	default:
++		break;
++	}
++	return ext_mdio;
++}
++
++/**
++ *  igb_check_for_link_media_swap - Check which M88E1112 interface linked
++ *  @hw: pointer to the HW structure
++ *
++ *  Poll the M88E1112 interfaces to see which interface achieved link.
++ */
++static s32 igb_check_for_link_media_swap(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	u8 port = 0;
++
++	/* Check the copper medium. */
++	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
++	if (ret_val)
++		return ret_val;
++
++	if (data & E1000_M88E1112_STATUS_LINK)
++		port = E1000_MEDIA_PORT_COPPER;
++
++	/* Check the other medium. */
++	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
++	if (ret_val)
++		return ret_val;
++
++	/* reset page to 0 */
++	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
++	if (ret_val)
++		return ret_val;
++
++	if (data & E1000_M88E1112_STATUS_LINK)
++		port = E1000_MEDIA_PORT_OTHER;
++
++	/* Determine if a swap needs to happen. */
++	if (port && (hw->dev_spec._82575.media_port != port)) {
++		hw->dev_spec._82575.media_port = port;
++		hw->dev_spec._82575.media_changed = true;
++	} else {
++		ret_val = igb_check_for_link_82575(hw);
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_init_phy_params_82575 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u32 ctrl_ext;
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type = e1000_phy_none;
++		goto out;
++	}
++
++	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us	= 100;
++
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++
++	if (igb_sgmii_active_82575(hw)) {
++		phy->ops.reset = igb_phy_hw_reset_sgmii_82575;
++		ctrl_ext |= E1000_CTRL_I2C_ENA;
++	} else {
++		phy->ops.reset = igb_phy_hw_reset;
++		ctrl_ext &= ~E1000_CTRL_I2C_ENA;
++	}
++
++	wr32(E1000_CTRL_EXT, ctrl_ext);
++	igb_reset_mdicnfg_82580(hw);
++
++	if (igb_sgmii_active_82575(hw) && !igb_sgmii_uses_mdio_82575(hw)) {
++		phy->ops.read_reg = igb_read_phy_reg_sgmii_82575;
++		phy->ops.write_reg = igb_write_phy_reg_sgmii_82575;
++	} else {
++		switch (hw->mac.type) {
++		case e1000_82580:
++		case e1000_i350:
++		case e1000_i354:
++			phy->ops.read_reg = igb_read_phy_reg_82580;
++			phy->ops.write_reg = igb_write_phy_reg_82580;
++			break;
++		case e1000_i210:
++		case e1000_i211:
++			phy->ops.read_reg = igb_read_phy_reg_gs40g;
++			phy->ops.write_reg = igb_write_phy_reg_gs40g;
++			break;
++		default:
++			phy->ops.read_reg = igb_read_phy_reg_igp;
++			phy->ops.write_reg = igb_write_phy_reg_igp;
++		}
++	}
++
++	/* set lan id */
++	hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
++			E1000_STATUS_FUNC_SHIFT;
++
++	/* Set phy->phy_addr and phy->id. */
++	ret_val = igb_get_phy_id_82575(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Verify phy id and set remaining function pointers */
++	switch (phy->id) {
++	case M88E1543_E_PHY_ID:
++	case I347AT4_E_PHY_ID:
++	case M88E1112_E_PHY_ID:
++	case M88E1111_I_PHY_ID:
++		phy->type		= e1000_phy_m88;
++		phy->ops.check_polarity	= igb_check_polarity_m88;
++		phy->ops.get_phy_info	= igb_get_phy_info_m88;
++		if (phy->id != M88E1111_I_PHY_ID)
++			phy->ops.get_cable_length =
++					 igb_get_cable_length_m88_gen2;
++		else
++			phy->ops.get_cable_length = igb_get_cable_length_m88;
++		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88;
++		/* Check if this PHY is confgured for media swap. */
++		if (phy->id == M88E1112_E_PHY_ID) {
++			u16 data;
++
++			ret_val = phy->ops.write_reg(hw,
++						     E1000_M88E1112_PAGE_ADDR,
++						     2);
++			if (ret_val)
++				goto out;
++
++			ret_val = phy->ops.read_reg(hw,
++						    E1000_M88E1112_MAC_CTRL_1,
++						    &data);
++			if (ret_val)
++				goto out;
++
++			data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
++			       E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
++			if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
++			    data == E1000_M88E1112_AUTO_COPPER_BASEX)
++				hw->mac.ops.check_for_link =
++						igb_check_for_link_media_swap;
++		}
++		break;
++	case IGP03E1000_E_PHY_ID:
++		phy->type = e1000_phy_igp_3;
++		phy->ops.get_phy_info = igb_get_phy_info_igp;
++		phy->ops.get_cable_length = igb_get_cable_length_igp_2;
++		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_igp;
++		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82575;
++		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state;
++		break;
++	case I82580_I_PHY_ID:
++	case I350_I_PHY_ID:
++		phy->type = e1000_phy_82580;
++		phy->ops.force_speed_duplex =
++					 igb_phy_force_speed_duplex_82580;
++		phy->ops.get_cable_length = igb_get_cable_length_82580;
++		phy->ops.get_phy_info = igb_get_phy_info_82580;
++		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580;
++		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580;
++		break;
++	case I210_I_PHY_ID:
++		phy->type		= e1000_phy_i210;
++		phy->ops.check_polarity	= igb_check_polarity_m88;
++		phy->ops.get_phy_info	= igb_get_phy_info_m88;
++		phy->ops.get_cable_length = igb_get_cable_length_m88_gen2;
++		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580;
++		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580;
++		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88;
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_init_nvm_params_82575 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 igb_init_nvm_params_82575(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = rd32(E1000_EECD);
++	u16 size;
++
++	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++		     E1000_EECD_SIZE_EX_SHIFT);
++
++	/* Added to a constant, "size" becomes the left-shift value
++	 * for setting word_size.
++	 */
++	size += NVM_WORD_SIZE_BASE_SHIFT;
++
++	/* Just in case size is out of range, cap it to the largest
++	 * EEPROM size supported
++	 */
++	if (size > 15)
++		size = 15;
++
++	nvm->word_size = 1 << size;
++	nvm->opcode_bits = 8;
++	nvm->delay_usec = 1;
++
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->page_size = 32;
++		nvm->address_bits = 16;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->page_size = 8;
++		nvm->address_bits = 8;
++		break;
++	default:
++		nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
++		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
++				    16 : 8;
++		break;
++	}
++	if (nvm->word_size == (1 << 15))
++		nvm->page_size = 128;
++
++	nvm->type = e1000_nvm_eeprom_spi;
++
++	/* NVM Function Pointers */
++	nvm->ops.acquire = igb_acquire_nvm_82575;
++	nvm->ops.release = igb_release_nvm_82575;
++	nvm->ops.write = igb_write_nvm_spi;
++	nvm->ops.validate = igb_validate_nvm_checksum;
++	nvm->ops.update = igb_update_nvm_checksum;
++	if (nvm->word_size < (1 << 15))
++		nvm->ops.read = igb_read_nvm_eerd;
++	else
++		nvm->ops.read = igb_read_nvm_spi;
++
++	/* override generic family function pointers for specific descendants */
++	switch (hw->mac.type) {
++	case e1000_82580:
++		nvm->ops.validate = igb_validate_nvm_checksum_82580;
++		nvm->ops.update = igb_update_nvm_checksum_82580;
++		break;
++	case e1000_i354:
++	case e1000_i350:
++		nvm->ops.validate = igb_validate_nvm_checksum_i350;
++		nvm->ops.update = igb_update_nvm_checksum_i350;
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_init_mac_params_82575 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 igb_init_mac_params_82575(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	switch (mac->type) {
++	case e1000_82576:
++		mac->rar_entry_count = E1000_RAR_ENTRIES_82576;
++		break;
++	case e1000_82580:
++		mac->rar_entry_count = E1000_RAR_ENTRIES_82580;
++		break;
++	case e1000_i350:
++	case e1000_i354:
++		mac->rar_entry_count = E1000_RAR_ENTRIES_I350;
++		break;
++	default:
++		mac->rar_entry_count = E1000_RAR_ENTRIES_82575;
++		break;
++	}
++	/* reset */
++	if (mac->type >= e1000_82580)
++		mac->ops.reset_hw = igb_reset_hw_82580;
++	else
++		mac->ops.reset_hw = igb_reset_hw_82575;
++
++	if (mac->type >= e1000_i210) {
++		mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_i210;
++		mac->ops.release_swfw_sync = igb_release_swfw_sync_i210;
++
++	} else {
++		mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_82575;
++		mac->ops.release_swfw_sync = igb_release_swfw_sync_82575;
++	}
++
++	/* Set if part includes ASF firmware */
++	mac->asf_firmware_present = true;
++	/* Set if manageability features are enabled. */
++	mac->arc_subsystem_valid =
++		(rd32(E1000_FWSM) & E1000_FWSM_MODE_MASK)
++			? true : false;
++	/* enable EEE on i350 parts and later parts */
++	if (mac->type >= e1000_i350)
++		dev_spec->eee_disable = false;
++	else
++		dev_spec->eee_disable = true;
++	/* Allow a single clear of the SW semaphore on I210 and newer */
++	if (mac->type >= e1000_i210)
++		dev_spec->clear_semaphore_once = true;
++	/* physical interface link setup */
++	mac->ops.setup_physical_interface =
++		(hw->phy.media_type == e1000_media_type_copper)
++			? igb_setup_copper_link_82575
++			: igb_setup_serdes_link_82575;
++
++	if (mac->type == e1000_82580) {
++		switch (hw->device_id) {
++		/* feature not supported on these id's */
++		case E1000_DEV_ID_DH89XXCC_SGMII:
++		case E1000_DEV_ID_DH89XXCC_SERDES:
++		case E1000_DEV_ID_DH89XXCC_BACKPLANE:
++		case E1000_DEV_ID_DH89XXCC_SFP:
++			break;
++		default:
++			hw->dev_spec._82575.mas_capable = true;
++			break;
++		}
++	}
++	return 0;
++}
++
++/**
++ *  igb_set_sfp_media_type_82575 - derives SFP module media type.
++ *  @hw: pointer to the HW structure
++ *
++ *  The media type is chosen based on SFP module.
++ *  compatibility flags retrieved from SFP ID EEPROM.
++ **/
++static s32 igb_set_sfp_media_type_82575(struct e1000_hw *hw)
++{
++	s32 ret_val = E1000_ERR_CONFIG;
++	u32 ctrl_ext = 0;
++	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
++	struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags;
++	u8 tranceiver_type = 0;
++	s32 timeout = 3;
++
++	/* Turn I2C interface ON and power on sfp cage */
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
++	wr32(E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA);
++
++	wrfl();
++
++	/* Read SFP module data */
++	while (timeout) {
++		ret_val = igb_read_sfp_data_byte(hw,
++			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET),
++			&tranceiver_type);
++		if (ret_val == 0)
++			break;
++		msleep(100);
++		timeout--;
++	}
++	if (ret_val != 0)
++		goto out;
++
++	ret_val = igb_read_sfp_data_byte(hw,
++			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET),
++			(u8 *)eth_flags);
++	if (ret_val != 0)
++		goto out;
++
++	/* Check if there is some SFP module plugged and powered */
++	if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) ||
++	    (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) {
++		dev_spec->module_plugged = true;
++		if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) {
++			hw->phy.media_type = e1000_media_type_internal_serdes;
++		} else if (eth_flags->e100_base_fx) {
++			dev_spec->sgmii_active = true;
++			hw->phy.media_type = e1000_media_type_internal_serdes;
++		} else if (eth_flags->e1000_base_t) {
++			dev_spec->sgmii_active = true;
++			hw->phy.media_type = e1000_media_type_copper;
++		} else {
++			hw->phy.media_type = e1000_media_type_unknown;
++			hw_dbg("PHY module has not been recognized\n");
++			goto out;
++		}
++	} else {
++		hw->phy.media_type = e1000_media_type_unknown;
++	}
++	ret_val = 0;
++out:
++	/* Restore I2C interface setting */
++	wr32(E1000_CTRL_EXT, ctrl_ext);
++	return ret_val;
++}
++
++static s32 igb_get_invariants_82575(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
++	s32 ret_val;
++	u32 ctrl_ext = 0;
++	u32 link_mode = 0;
++
++	switch (hw->device_id) {
++	case E1000_DEV_ID_82575EB_COPPER:
++	case E1000_DEV_ID_82575EB_FIBER_SERDES:
++	case E1000_DEV_ID_82575GB_QUAD_COPPER:
++		mac->type = e1000_82575;
++		break;
++	case E1000_DEV_ID_82576:
++	case E1000_DEV_ID_82576_NS:
++	case E1000_DEV_ID_82576_NS_SERDES:
++	case E1000_DEV_ID_82576_FIBER:
++	case E1000_DEV_ID_82576_SERDES:
++	case E1000_DEV_ID_82576_QUAD_COPPER:
++	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
++	case E1000_DEV_ID_82576_SERDES_QUAD:
++		mac->type = e1000_82576;
++		break;
++	case E1000_DEV_ID_82580_COPPER:
++	case E1000_DEV_ID_82580_FIBER:
++	case E1000_DEV_ID_82580_QUAD_FIBER:
++	case E1000_DEV_ID_82580_SERDES:
++	case E1000_DEV_ID_82580_SGMII:
++	case E1000_DEV_ID_82580_COPPER_DUAL:
++	case E1000_DEV_ID_DH89XXCC_SGMII:
++	case E1000_DEV_ID_DH89XXCC_SERDES:
++	case E1000_DEV_ID_DH89XXCC_BACKPLANE:
++	case E1000_DEV_ID_DH89XXCC_SFP:
++		mac->type = e1000_82580;
++		break;
++	case E1000_DEV_ID_I350_COPPER:
++	case E1000_DEV_ID_I350_FIBER:
++	case E1000_DEV_ID_I350_SERDES:
++	case E1000_DEV_ID_I350_SGMII:
++		mac->type = e1000_i350;
++		break;
++	case E1000_DEV_ID_I210_COPPER:
++	case E1000_DEV_ID_I210_FIBER:
++	case E1000_DEV_ID_I210_SERDES:
++	case E1000_DEV_ID_I210_SGMII:
++	case E1000_DEV_ID_I210_COPPER_FLASHLESS:
++	case E1000_DEV_ID_I210_SERDES_FLASHLESS:
++		mac->type = e1000_i210;
++		break;
++	case E1000_DEV_ID_I211_COPPER:
++		mac->type = e1000_i211;
++		break;
++	case E1000_DEV_ID_I354_BACKPLANE_1GBPS:
++	case E1000_DEV_ID_I354_SGMII:
++	case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS:
++		mac->type = e1000_i354;
++		break;
++	default:
++		return -E1000_ERR_MAC_INIT;
++	}
++
++	/* Set media type */
++	/* The 82575 uses bits 22:23 for link mode. The mode can be changed
++	 * based on the EEPROM. We cannot rely upon device ID. There
++	 * is no distinguishable difference between fiber and internal
++	 * SerDes mode on the 82575. There can be an external PHY attached
++	 * on the SGMII interface. For this, we'll set sgmii_active to true.
++	 */
++	hw->phy.media_type = e1000_media_type_copper;
++	dev_spec->sgmii_active = false;
++	dev_spec->module_plugged = false;
++
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++
++	link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK;
++	switch (link_mode) {
++	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	case E1000_CTRL_EXT_LINK_MODE_SGMII:
++		/* Get phy control interface type set (MDIO vs. I2C)*/
++		if (igb_sgmii_uses_mdio_82575(hw)) {
++			hw->phy.media_type = e1000_media_type_copper;
++			dev_spec->sgmii_active = true;
++			break;
++		}
++		/* fall through for I2C based SGMII */
++	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
++		/* read media type from SFP EEPROM */
++		ret_val = igb_set_sfp_media_type_82575(hw);
++		if ((ret_val != 0) ||
++		    (hw->phy.media_type == e1000_media_type_unknown)) {
++			/* If media type was not identified then return media
++			 * type defined by the CTRL_EXT settings.
++			 */
++			hw->phy.media_type = e1000_media_type_internal_serdes;
++
++			if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) {
++				hw->phy.media_type = e1000_media_type_copper;
++				dev_spec->sgmii_active = true;
++			}
++
++			break;
++		}
++
++		/* do not change link mode for 100BaseFX */
++		if (dev_spec->eth_flags.e100_base_fx)
++			break;
++
++		/* change current link mode setting */
++		ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
++
++		if (hw->phy.media_type == e1000_media_type_copper)
++			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII;
++		else
++			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
++
++		wr32(E1000_CTRL_EXT, ctrl_ext);
++
++		break;
++	default:
++		break;
++	}
++
++	/* mac initialization and operations */
++	ret_val = igb_init_mac_params_82575(hw);
++	if (ret_val)
++		goto out;
++
++	/* NVM initialization */
++	ret_val = igb_init_nvm_params_82575(hw);
++	switch (hw->mac.type) {
++	case e1000_i210:
++	case e1000_i211:
++		ret_val = igb_init_nvm_params_i210(hw);
++		break;
++	default:
++		break;
++	}
++
++	if (ret_val)
++		goto out;
++
++	/* if part supports SR-IOV then initialize mailbox parameters */
++	switch (mac->type) {
++	case e1000_82576:
++	case e1000_i350:
++		igb_init_mbx_params_pf(hw);
++		break;
++	default:
++		break;
++	}
++
++	/* setup PHY parameters */
++	ret_val = igb_init_phy_params_82575(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_acquire_phy_82575 - Acquire rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire access rights to the correct PHY.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 igb_acquire_phy_82575(struct e1000_hw *hw)
++{
++	u16 mask = E1000_SWFW_PHY0_SM;
++
++	if (hw->bus.func == E1000_FUNC_1)
++		mask = E1000_SWFW_PHY1_SM;
++	else if (hw->bus.func == E1000_FUNC_2)
++		mask = E1000_SWFW_PHY2_SM;
++	else if (hw->bus.func == E1000_FUNC_3)
++		mask = E1000_SWFW_PHY3_SM;
++
++	return hw->mac.ops.acquire_swfw_sync(hw, mask);
++}
++
++/**
++ *  igb_release_phy_82575 - Release rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  A wrapper to release access rights to the correct PHY.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static void igb_release_phy_82575(struct e1000_hw *hw)
++{
++	u16 mask = E1000_SWFW_PHY0_SM;
++
++	if (hw->bus.func == E1000_FUNC_1)
++		mask = E1000_SWFW_PHY1_SM;
++	else if (hw->bus.func == E1000_FUNC_2)
++		mask = E1000_SWFW_PHY2_SM;
++	else if (hw->bus.func == E1000_FUNC_3)
++		mask = E1000_SWFW_PHY3_SM;
++
++	hw->mac.ops.release_swfw_sync(hw, mask);
++}
++
++/**
++ *  igb_read_phy_reg_sgmii_82575 - Read PHY register using sgmii
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY register at offset using the serial gigabit media independent
++ *  interface and stores the retrieved information in data.
++ **/
++static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
++					  u16 *data)
++{
++	s32 ret_val = -E1000_ERR_PARAM;
++
++	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
++		hw_dbg("PHY Address %u is out of range\n", offset);
++		goto out;
++	}
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_read_phy_reg_i2c(hw, offset, data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_phy_reg_sgmii_82575 - Write PHY register using sgmii
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes the data to PHY register at the offset using the serial gigabit
++ *  media independent interface.
++ **/
++static s32 igb_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
++					   u16 data)
++{
++	s32 ret_val = -E1000_ERR_PARAM;
++
++
++	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
++		hw_dbg("PHY Address %d is out of range\n", offset);
++		goto out;
++	}
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_write_phy_reg_i2c(hw, offset, data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_phy_id_82575 - Retrieve PHY addr and id
++ *  @hw: pointer to the HW structure
++ *
++ *  Retrieves the PHY address and ID for both PHY's which do and do not use
++ *  sgmi interface.
++ **/
++static s32 igb_get_phy_id_82575(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val = 0;
++	u16 phy_id;
++	u32 ctrl_ext;
++	u32 mdic;
++
++	/* Extra read required for some PHY's on i354 */
++	if (hw->mac.type == e1000_i354)
++		igb_get_phy_id(hw);
++
++	/* For SGMII PHYs, we try the list of possible addresses until
++	 * we find one that works.  For non-SGMII PHYs
++	 * (e.g. integrated copper PHYs), an address of 1 should
++	 * work.  The result of this function should mean phy->phy_addr
++	 * and phy->id are set correctly.
++	 */
++	if (!(igb_sgmii_active_82575(hw))) {
++		phy->addr = 1;
++		ret_val = igb_get_phy_id(hw);
++		goto out;
++	}
++
++	if (igb_sgmii_uses_mdio_82575(hw)) {
++		switch (hw->mac.type) {
++		case e1000_82575:
++		case e1000_82576:
++			mdic = rd32(E1000_MDIC);
++			mdic &= E1000_MDIC_PHY_MASK;
++			phy->addr = mdic >> E1000_MDIC_PHY_SHIFT;
++			break;
++		case e1000_82580:
++		case e1000_i350:
++		case e1000_i354:
++		case e1000_i210:
++		case e1000_i211:
++			mdic = rd32(E1000_MDICNFG);
++			mdic &= E1000_MDICNFG_PHY_MASK;
++			phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT;
++			break;
++		default:
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++		ret_val = igb_get_phy_id(hw);
++		goto out;
++	}
++
++	/* Power on sgmii phy if it is disabled */
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	wr32(E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA);
++	wrfl();
++	msleep(300);
++
++	/* The address field in the I2CCMD register is 3 bits and 0 is invalid.
++	 * Therefore, we need to test 1-7
++	 */
++	for (phy->addr = 1; phy->addr < 8; phy->addr++) {
++		ret_val = igb_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id);
++		if (ret_val == 0) {
++			hw_dbg("Vendor ID 0x%08X read at address %u\n",
++			       phy_id, phy->addr);
++			/* At the time of this writing, The M88 part is
++			 * the only supported SGMII PHY product.
++			 */
++			if (phy_id == M88_VENDOR)
++				break;
++		} else {
++			hw_dbg("PHY address %u was unreadable\n", phy->addr);
++		}
++	}
++
++	/* A valid PHY type couldn't be found. */
++	if (phy->addr == 8) {
++		phy->addr = 0;
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	} else {
++		ret_val = igb_get_phy_id(hw);
++	}
++
++	/* restore previous sfp cage power state */
++	wr32(E1000_CTRL_EXT, ctrl_ext);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_hw_reset_sgmii_82575 - Performs a PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Resets the PHY using the serial gigabit media independent interface.
++ **/
++static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	/* This isn't a true "hard" reset, but is the only reset
++	 * available to us at this time.
++	 */
++
++	hw_dbg("Soft resetting SGMII attached PHY...\n");
++
++	/* SFP documentation requires the following to configure the SPF module
++	 * to work on SGMII.  No further documentation is given.
++	 */
++	ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_phy_sw_reset(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		goto out;
++
++	if (active) {
++		data |= IGP02E1000_PM_D0_LPLU;
++		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++						 data);
++		if (ret_val)
++			goto out;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++						&data);
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++						 data);
++		if (ret_val)
++			goto out;
++	} else {
++		data &= ~IGP02E1000_PM_D0_LPLU;
++		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++						 data);
++		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = phy->ops.read_reg(hw,
++					IGP01E1000_PHY_PORT_CONFIG, &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = phy->ops.write_reg(hw,
++					IGP01E1000_PHY_PORT_CONFIG, data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = phy->ops.read_reg(hw,
++					IGP01E1000_PHY_PORT_CONFIG, &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = phy->ops.write_reg(hw,
++					IGP01E1000_PHY_PORT_CONFIG, data);
++			if (ret_val)
++				goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u16 data;
++
++	data = rd32(E1000_82580_PHY_POWER_MGMT);
++
++	if (active) {
++		data |= E1000_82580_PM_D0_LPLU;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		data &= ~E1000_82580_PM_SPD;
++	} else {
++		data &= ~E1000_82580_PM_D0_LPLU;
++
++		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on)
++			data |= E1000_82580_PM_SPD;
++		else if (phy->smart_speed == e1000_smart_speed_off)
++			data &= ~E1000_82580_PM_SPD; }
++
++	wr32(E1000_82580_PHY_POWER_MGMT, data);
++	return 0;
++}
++
++/**
++ *  igb_set_d3_lplu_state_82580 - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.
++ **/
++static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u16 data;
++
++	data = rd32(E1000_82580_PHY_POWER_MGMT);
++
++	if (!active) {
++		data &= ~E1000_82580_PM_D3_LPLU;
++		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on)
++			data |= E1000_82580_PM_SPD;
++		else if (phy->smart_speed == e1000_smart_speed_off)
++			data &= ~E1000_82580_PM_SPD;
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= E1000_82580_PM_D3_LPLU;
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		data &= ~E1000_82580_PM_SPD;
++	}
++
++	wr32(E1000_82580_PHY_POWER_MGMT, data);
++	return 0;
++}
++
++/**
++ *  igb_acquire_nvm_82575 - Request for access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
++ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
++ *  Return successful if access grant bit set, else clear the request for
++ *  EEPROM access and return -E1000_ERR_NVM (-1).
++ **/
++static s32 igb_acquire_nvm_82575(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	ret_val = hw->mac.ops.acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_acquire_nvm(hw);
++
++	if (ret_val)
++		hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_release_nvm_82575 - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
++ *  then release the semaphores acquired.
++ **/
++static void igb_release_nvm_82575(struct e1000_hw *hw)
++{
++	igb_release_nvm(hw);
++	hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM);
++}
++
++/**
++ *  igb_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
++ *  will also specify which port we're acquiring the lock for.
++ **/
++static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++	u32 swmask = mask;
++	u32 fwmask = mask << 16;
++	s32 ret_val = 0;
++	s32 i = 0, timeout = 200;
++
++	while (i < timeout) {
++		if (igb_get_hw_semaphore(hw)) {
++			ret_val = -E1000_ERR_SWFW_SYNC;
++			goto out;
++		}
++
++		swfw_sync = rd32(E1000_SW_FW_SYNC);
++		if (!(swfw_sync & (fwmask | swmask)))
++			break;
++
++		/* Firmware currently using resource (fwmask)
++		 * or other software thread using resource (swmask)
++		 */
++		igb_put_hw_semaphore(hw);
++		mdelay(5);
++		i++;
++	}
++
++	if (i == timeout) {
++		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
++		ret_val = -E1000_ERR_SWFW_SYNC;
++		goto out;
++	}
++
++	swfw_sync |= swmask;
++	wr32(E1000_SW_FW_SYNC, swfw_sync);
++
++	igb_put_hw_semaphore(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_release_swfw_sync_82575 - Release SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
++ *  will also specify which port we're releasing the lock for.
++ **/
++static void igb_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++
++	while (igb_get_hw_semaphore(hw) != 0)
++		; /* Empty */
++
++	swfw_sync = rd32(E1000_SW_FW_SYNC);
++	swfw_sync &= ~mask;
++	wr32(E1000_SW_FW_SYNC, swfw_sync);
++
++	igb_put_hw_semaphore(hw);
++}
++
++/**
++ *  igb_get_cfg_done_82575 - Read config done bit
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the management control register for the config done bit for
++ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
++ *  to read the config done bit, so an error is *ONLY* logged and returns
++ *  0.  If we were to return with error, EEPROM-less silicon
++ *  would not be able to be reset or change link.
++ **/
++static s32 igb_get_cfg_done_82575(struct e1000_hw *hw)
++{
++	s32 timeout = PHY_CFG_TIMEOUT;
++	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
++
++	if (hw->bus.func == 1)
++		mask = E1000_NVM_CFG_DONE_PORT_1;
++	else if (hw->bus.func == E1000_FUNC_2)
++		mask = E1000_NVM_CFG_DONE_PORT_2;
++	else if (hw->bus.func == E1000_FUNC_3)
++		mask = E1000_NVM_CFG_DONE_PORT_3;
++
++	while (timeout) {
++		if (rd32(E1000_EEMNGCTL) & mask)
++			break;
++		usleep_range(1000, 2000);
++		timeout--;
++	}
++	if (!timeout)
++		hw_dbg("MNG configuration cycle has not completed.\n");
++
++	/* If EEPROM is not marked present, init the PHY manually */
++	if (((rd32(E1000_EECD) & E1000_EECD_PRES) == 0) &&
++	    (hw->phy.type == e1000_phy_igp_3))
++		igb_phy_init_script_igp3(hw);
++
++	return 0;
++}
++
++/**
++ *  igb_get_link_up_info_82575 - Get link speed/duplex info
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  This is a wrapper function, if using the serial gigabit media independent
++ *  interface, use PCS to retrieve the link speed and duplex information.
++ *  Otherwise, use the generic function to get the link speed and duplex info.
++ **/
++static s32 igb_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
++					u16 *duplex)
++{
++	s32 ret_val;
++
++	if (hw->phy.media_type != e1000_media_type_copper)
++		ret_val = igb_get_pcs_speed_and_duplex_82575(hw, speed,
++							       duplex);
++	else
++		ret_val = igb_get_speed_and_duplex_copper(hw, speed,
++								    duplex);
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_link_82575 - Check for link
++ *  @hw: pointer to the HW structure
++ *
++ *  If sgmii is enabled, then use the pcs register to determine link, otherwise
++ *  use the generic interface for determining link.
++ **/
++static s32 igb_check_for_link_82575(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 speed, duplex;
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed,
++							     &duplex);
++		/* Use this flag to determine if link needs to be checked or
++		 * not.  If  we have link clear the flag so that we do not
++		 * continue to check for link.
++		 */
++		hw->mac.get_link_status = !hw->mac.serdes_has_link;
++
++		/* Configure Flow Control now that Auto-Neg has completed.
++		 * First, we need to restore the desired flow control
++		 * settings because we may have had to re-autoneg with a
++		 * different link partner.
++		 */
++		ret_val = igb_config_fc_after_link_up(hw);
++		if (ret_val)
++			hw_dbg("Error configuring flow control\n");
++	} else {
++		ret_val = igb_check_for_copper_link(hw);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_power_up_serdes_link_82575 - Power up the serdes link after shutdown
++ *  @hw: pointer to the HW structure
++ **/
++void igb_power_up_serdes_link_82575(struct e1000_hw *hw)
++{
++	u32 reg;
++
++
++	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
++	    !igb_sgmii_active_82575(hw))
++		return;
++
++	/* Enable PCS to turn on link */
++	reg = rd32(E1000_PCS_CFG0);
++	reg |= E1000_PCS_CFG_PCS_EN;
++	wr32(E1000_PCS_CFG0, reg);
++
++	/* Power up the laser */
++	reg = rd32(E1000_CTRL_EXT);
++	reg &= ~E1000_CTRL_EXT_SDP3_DATA;
++	wr32(E1000_CTRL_EXT, reg);
++
++	/* flush the write to verify completion */
++	wrfl();
++	usleep_range(1000, 2000);
++}
++
++/**
++ *  igb_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Using the physical coding sub-layer (PCS), retrieve the current speed and
++ *  duplex, then store the values in the pointers provided.
++ **/
++static s32 igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed,
++						u16 *duplex)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 pcs, status;
++
++	/* Set up defaults for the return values of this function */
++	mac->serdes_has_link = false;
++	*speed = 0;
++	*duplex = 0;
++
++	/* Read the PCS Status register for link state. For non-copper mode,
++	 * the status register is not accurate. The PCS status register is
++	 * used instead.
++	 */
++	pcs = rd32(E1000_PCS_LSTAT);
++
++	/* The link up bit determines when link is up on autoneg. The sync ok
++	 * gets set once both sides sync up and agree upon link. Stable link
++	 * can be determined by checking for both link up and link sync ok
++	 */
++	if ((pcs & E1000_PCS_LSTS_LINK_OK) && (pcs & E1000_PCS_LSTS_SYNK_OK)) {
++		mac->serdes_has_link = true;
++
++		/* Detect and store PCS speed */
++		if (pcs & E1000_PCS_LSTS_SPEED_1000)
++			*speed = SPEED_1000;
++		else if (pcs & E1000_PCS_LSTS_SPEED_100)
++			*speed = SPEED_100;
++		else
++			*speed = SPEED_10;
++
++		/* Detect and store PCS duplex */
++		if (pcs & E1000_PCS_LSTS_DUPLEX_FULL)
++			*duplex = FULL_DUPLEX;
++		else
++			*duplex = HALF_DUPLEX;
++
++	/* Check if it is an I354 2.5Gb backplane connection. */
++		if (mac->type == e1000_i354) {
++			status = rd32(E1000_STATUS);
++			if ((status & E1000_STATUS_2P5_SKU) &&
++			    !(status & E1000_STATUS_2P5_SKU_OVER)) {
++				*speed = SPEED_2500;
++				*duplex = FULL_DUPLEX;
++				hw_dbg("2500 Mbs, ");
++				hw_dbg("Full Duplex\n");
++			}
++		}
++
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_shutdown_serdes_link_82575 - Remove link during power down
++ *  @hw: pointer to the HW structure
++ *
++ *  In the case of fiber serdes, shut down optics and PCS on driver unload
++ *  when management pass thru is not enabled.
++ **/
++void igb_shutdown_serdes_link_82575(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	if (hw->phy.media_type != e1000_media_type_internal_serdes &&
++	    igb_sgmii_active_82575(hw))
++		return;
++
++	if (!igb_enable_mng_pass_thru(hw)) {
++		/* Disable PCS to turn off link */
++		reg = rd32(E1000_PCS_CFG0);
++		reg &= ~E1000_PCS_CFG_PCS_EN;
++		wr32(E1000_PCS_CFG0, reg);
++
++		/* shutdown the laser */
++		reg = rd32(E1000_CTRL_EXT);
++		reg |= E1000_CTRL_EXT_SDP3_DATA;
++		wr32(E1000_CTRL_EXT, reg);
++
++		/* flush the write to verify completion */
++		wrfl();
++		usleep_range(1000, 2000);
++	}
++}
++
++/**
++ *  igb_reset_hw_82575 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.  This is a
++ *  function pointer entry point called by the api module.
++ **/
++static s32 igb_reset_hw_82575(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++
++	/* Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = igb_disable_pcie_master(hw);
++	if (ret_val)
++		hw_dbg("PCI-E Master disable polling has failed.\n");
++
++	/* set the completion timeout for interface */
++	ret_val = igb_set_pcie_completion_timeout(hw);
++	if (ret_val)
++		hw_dbg("PCI-E Set completion timeout has failed.\n");
++
++	hw_dbg("Masking off all interrupts\n");
++	wr32(E1000_IMC, 0xffffffff);
++
++	wr32(E1000_RCTL, 0);
++	wr32(E1000_TCTL, E1000_TCTL_PSP);
++	wrfl();
++
++	usleep_range(10000, 20000);
++
++	ctrl = rd32(E1000_CTRL);
++
++	hw_dbg("Issuing a global reset to MAC\n");
++	wr32(E1000_CTRL, ctrl | E1000_CTRL_RST);
++
++	ret_val = igb_get_auto_rd_done(hw);
++	if (ret_val) {
++		/* When auto config read does not complete, do not
++		 * return with an error. This can happen in situations
++		 * where there is no eeprom and prevents getting link.
++		 */
++		hw_dbg("Auto Read Done did not complete\n");
++	}
++
++	/* If EEPROM is not present, run manual init scripts */
++	if ((rd32(E1000_EECD) & E1000_EECD_PRES) == 0)
++		igb_reset_init_script_82575(hw);
++
++	/* Clear any pending interrupt events. */
++	wr32(E1000_IMC, 0xffffffff);
++	rd32(E1000_ICR);
++
++	/* Install any alternate MAC address into RAR0 */
++	ret_val = igb_check_alt_mac_addr(hw);
++
++	return ret_val;
++}
++
++/**
++ *  igb_init_hw_82575 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.
++ **/
++static s32 igb_init_hw_82575(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	u16 i, rar_count = mac->rar_entry_count;
++
++	if ((hw->mac.type >= e1000_i210) &&
++	    !(igb_get_flash_presence_i210(hw))) {
++		ret_val = igb_pll_workaround_i210(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Initialize identification LED */
++	ret_val = igb_id_led_init(hw);
++	if (ret_val) {
++		hw_dbg("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++	}
++
++	/* Disabling VLAN filtering */
++	hw_dbg("Initializing the IEEE VLAN\n");
++	if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354))
++		igb_clear_vfta_i350(hw);
++	else
++		igb_clear_vfta(hw);
++
++	/* Setup the receive address */
++	igb_init_rx_addrs(hw, rar_count);
++
++	/* Zero out the Multicast HASH table */
++	hw_dbg("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		array_wr32(E1000_MTA, i, 0);
++
++	/* Zero out the Unicast HASH table */
++	hw_dbg("Zeroing the UTA\n");
++	for (i = 0; i < mac->uta_reg_count; i++)
++		array_wr32(E1000_UTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = igb_setup_link(hw);
++
++	/* Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	igb_clear_hw_cntrs_82575(hw);
++	return ret_val;
++}
++
++/**
++ *  igb_setup_copper_link_82575 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
++ *  for link, once link is established calls to configure collision distance
++ *  and flow control are called.
++ **/
++static s32 igb_setup_copper_link_82575(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32  ret_val;
++	u32 phpm_reg;
++
++	ctrl = rd32(E1000_CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	wr32(E1000_CTRL, ctrl);
++
++	/* Clear Go Link Disconnect bit on supported devices */
++	switch (hw->mac.type) {
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i210:
++	case e1000_i211:
++		phpm_reg = rd32(E1000_82580_PHY_POWER_MGMT);
++		phpm_reg &= ~E1000_82580_PM_GO_LINKD;
++		wr32(E1000_82580_PHY_POWER_MGMT, phpm_reg);
++		break;
++	default:
++		break;
++	}
++
++	ret_val = igb_setup_serdes_link_82575(hw);
++	if (ret_val)
++		goto out;
++
++	if (igb_sgmii_active_82575(hw) && !hw->phy.reset_disable) {
++		/* allow time for SFP cage time to power up phy */
++		msleep(300);
++
++		ret_val = hw->phy.ops.reset(hw);
++		if (ret_val) {
++			hw_dbg("Error resetting the PHY.\n");
++			goto out;
++		}
++	}
++	switch (hw->phy.type) {
++	case e1000_phy_i210:
++	case e1000_phy_m88:
++		switch (hw->phy.id) {
++		case I347AT4_E_PHY_ID:
++		case M88E1112_E_PHY_ID:
++		case M88E1543_E_PHY_ID:
++		case I210_I_PHY_ID:
++			ret_val = igb_copper_link_setup_m88_gen2(hw);
++			break;
++		default:
++			ret_val = igb_copper_link_setup_m88(hw);
++			break;
++		}
++		break;
++	case e1000_phy_igp_3:
++		ret_val = igb_copper_link_setup_igp(hw);
++		break;
++	case e1000_phy_82580:
++		ret_val = igb_copper_link_setup_82580(hw);
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		break;
++	}
++
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_setup_copper_link(hw);
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_setup_serdes_link_82575 - Setup link for serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Configure the physical coding sub-layer (PCS) link.  The PCS link is
++ *  used on copper connections where the serialized gigabit media independent
++ *  interface (sgmii), or serdes fiber is being used.  Configures the link
++ *  for auto-negotiation or forces speed/duplex.
++ **/
++static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw)
++{
++	u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
++	bool pcs_autoneg;
++	s32 ret_val = 0;
++	u16 data;
++
++	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
++	    !igb_sgmii_active_82575(hw))
++		return ret_val;
++
++
++	/* On the 82575, SerDes loopback mode persists until it is
++	 * explicitly turned off or a power cycle is performed.  A read to
++	 * the register does not indicate its status.  Therefore, we ensure
++	 * loopback mode is disabled during initialization.
++	 */
++	wr32(E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
++
++	/* power on the sfp cage if present and turn on I2C */
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
++	ctrl_ext |= E1000_CTRL_I2C_ENA;
++	wr32(E1000_CTRL_EXT, ctrl_ext);
++
++	ctrl_reg = rd32(E1000_CTRL);
++	ctrl_reg |= E1000_CTRL_SLU;
++
++	if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) {
++		/* set both sw defined pins */
++		ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
++
++		/* Set switch control to serdes energy detect */
++		reg = rd32(E1000_CONNSW);
++		reg |= E1000_CONNSW_ENRGSRC;
++		wr32(E1000_CONNSW, reg);
++	}
++
++	reg = rd32(E1000_PCS_LCTL);
++
++	/* default pcs_autoneg to the same setting as mac autoneg */
++	pcs_autoneg = hw->mac.autoneg;
++
++	switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) {
++	case E1000_CTRL_EXT_LINK_MODE_SGMII:
++		/* sgmii mode lets the phy handle forcing speed/duplex */
++		pcs_autoneg = true;
++		/* autoneg time out should be disabled for SGMII mode */
++		reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT);
++		break;
++	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
++		/* disable PCS autoneg and support parallel detect only */
++		pcs_autoneg = false;
++	default:
++		if (hw->mac.type == e1000_82575 ||
++		    hw->mac.type == e1000_82576) {
++			ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
++			if (ret_val) {
++				hw_dbg(KERN_DEBUG "NVM Read Error\n\n");
++				return ret_val;
++			}
++
++			if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT)
++				pcs_autoneg = false;
++		}
++
++		/* non-SGMII modes only supports a speed of 1000/Full for the
++		 * link so it is best to just force the MAC and let the pcs
++		 * link either autoneg or be forced to 1000/Full
++		 */
++		ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
++				E1000_CTRL_FD | E1000_CTRL_FRCDPX;
++
++		/* set speed of 1000/Full if speed/duplex is forced */
++		reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
++		break;
++	}
++
++	wr32(E1000_CTRL, ctrl_reg);
++
++	/* New SerDes mode allows for forcing speed or autonegotiating speed
++	 * at 1gb. Autoneg should be default set by most drivers. This is the
++	 * mode that will be compatible with older link partners and switches.
++	 * However, both are supported by the hardware and some drivers/tools.
++	 */
++	reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP |
++		E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK);
++
++	if (pcs_autoneg) {
++		/* Set PCS register for autoneg */
++		reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */
++		       E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */
++
++		/* Disable force flow control for autoneg */
++		reg &= ~E1000_PCS_LCTL_FORCE_FCTRL;
++
++		/* Configure flow control advertisement for autoneg */
++		anadv_reg = rd32(E1000_PCS_ANADV);
++		anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE);
++		switch (hw->fc.requested_mode) {
++		case e1000_fc_full:
++		case e1000_fc_rx_pause:
++			anadv_reg |= E1000_TXCW_ASM_DIR;
++			anadv_reg |= E1000_TXCW_PAUSE;
++			break;
++		case e1000_fc_tx_pause:
++			anadv_reg |= E1000_TXCW_ASM_DIR;
++			break;
++		default:
++			break;
++		}
++		wr32(E1000_PCS_ANADV, anadv_reg);
++
++		hw_dbg("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg);
++	} else {
++		/* Set PCS register for forced link */
++		reg |= E1000_PCS_LCTL_FSD;        /* Force Speed */
++
++		/* Force flow control for forced link */
++		reg |= E1000_PCS_LCTL_FORCE_FCTRL;
++
++		hw_dbg("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg);
++	}
++
++	wr32(E1000_PCS_LCTL, reg);
++
++	if (!pcs_autoneg && !igb_sgmii_active_82575(hw))
++		igb_force_mac_fc(hw);
++
++	return ret_val;
++}
++
++/**
++ *  igb_sgmii_active_82575 - Return sgmii state
++ *  @hw: pointer to the HW structure
++ *
++ *  82575 silicon has a serialized gigabit media independent interface (sgmii)
++ *  which can be enabled for use in the embedded applications.  Simply
++ *  return the current state of the sgmii interface.
++ **/
++static bool igb_sgmii_active_82575(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
++	return dev_spec->sgmii_active;
++}
++
++/**
++ *  igb_reset_init_script_82575 - Inits HW defaults after reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Inits recommended HW defaults after a reset when there is no EEPROM
++ *  detected. This is only for the 82575.
++ **/
++static s32 igb_reset_init_script_82575(struct e1000_hw *hw)
++{
++	if (hw->mac.type == e1000_82575) {
++		hw_dbg("Running reset init script for 82575\n");
++		/* SerDes configuration via SERDESCTRL */
++		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x00, 0x0C);
++		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x01, 0x78);
++		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x1B, 0x23);
++		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x23, 0x15);
++
++		/* CCM configuration via CCMCTL register */
++		igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x14, 0x00);
++		igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x10, 0x00);
++
++		/* PCIe lanes configuration */
++		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x00, 0xEC);
++		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x61, 0xDF);
++		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x34, 0x05);
++		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x2F, 0x81);
++
++		/* PCIe PLL Configuration */
++		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x02, 0x47);
++		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x14, 0x00);
++		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x10, 0x00);
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_read_mac_addr_82575 - Read device MAC address
++ *  @hw: pointer to the HW structure
++ **/
++static s32 igb_read_mac_addr_82575(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	/* If there's an alternate MAC address place it in RAR0
++	 * so that it will override the Si installed default perm
++	 * address.
++	 */
++	ret_val = igb_check_alt_mac_addr(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_read_mac_addr(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ * igb_power_down_phy_copper_82575 - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++void igb_power_down_phy_copper_82575(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(igb_enable_mng_pass_thru(hw) || igb_check_reset_block(hw)))
++		igb_power_down_phy_copper(hw);
++}
++
++/**
++ *  igb_clear_hw_cntrs_82575 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw)
++{
++	igb_clear_hw_cntrs_base(hw);
++
++	rd32(E1000_PRC64);
++	rd32(E1000_PRC127);
++	rd32(E1000_PRC255);
++	rd32(E1000_PRC511);
++	rd32(E1000_PRC1023);
++	rd32(E1000_PRC1522);
++	rd32(E1000_PTC64);
++	rd32(E1000_PTC127);
++	rd32(E1000_PTC255);
++	rd32(E1000_PTC511);
++	rd32(E1000_PTC1023);
++	rd32(E1000_PTC1522);
++
++	rd32(E1000_ALGNERRC);
++	rd32(E1000_RXERRC);
++	rd32(E1000_TNCRS);
++	rd32(E1000_CEXTERR);
++	rd32(E1000_TSCTC);
++	rd32(E1000_TSCTFC);
++
++	rd32(E1000_MGTPRC);
++	rd32(E1000_MGTPDC);
++	rd32(E1000_MGTPTC);
++
++	rd32(E1000_IAC);
++	rd32(E1000_ICRXOC);
++
++	rd32(E1000_ICRXPTC);
++	rd32(E1000_ICRXATC);
++	rd32(E1000_ICTXPTC);
++	rd32(E1000_ICTXATC);
++	rd32(E1000_ICTXQEC);
++	rd32(E1000_ICTXQMTC);
++	rd32(E1000_ICRXDMTC);
++
++	rd32(E1000_CBTMPC);
++	rd32(E1000_HTDPMC);
++	rd32(E1000_CBRMPC);
++	rd32(E1000_RPTHC);
++	rd32(E1000_HGPTC);
++	rd32(E1000_HTCBDPC);
++	rd32(E1000_HGORCL);
++	rd32(E1000_HGORCH);
++	rd32(E1000_HGOTCL);
++	rd32(E1000_HGOTCH);
++	rd32(E1000_LENERRS);
++
++	/* This register should not be read in copper configurations */
++	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
++	    igb_sgmii_active_82575(hw))
++		rd32(E1000_SCVPC);
++}
++
++/**
++ *  igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable
++ *  @hw: pointer to the HW structure
++ *
++ *  After rx enable if manageability is enabled then there is likely some
++ *  bad data at the start of the fifo and possibly in the DMA fifo. This
++ *  function clears the fifos and flushes any packets that came in as rx was
++ *  being enabled.
++ **/
++void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
++{
++	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
++	int i, ms_wait;
++
++	/* disable IPv6 options as per hardware errata */
++	rfctl = rd32(E1000_RFCTL);
++	rfctl |= E1000_RFCTL_IPV6_EX_DIS;
++	wr32(E1000_RFCTL, rfctl);
++
++	if (hw->mac.type != e1000_82575 ||
++	    !(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN))
++		return;
++
++	/* Disable all RX queues */
++	for (i = 0; i < 4; i++) {
++		rxdctl[i] = rd32(E1000_RXDCTL(i));
++		wr32(E1000_RXDCTL(i),
++		     rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE);
++	}
++	/* Poll all queues to verify they have shut down */
++	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
++		usleep_range(1000, 2000);
++		rx_enabled = 0;
++		for (i = 0; i < 4; i++)
++			rx_enabled |= rd32(E1000_RXDCTL(i));
++		if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE))
++			break;
++	}
++
++	if (ms_wait == 10)
++		hw_dbg("Queue disable timed out after 10ms\n");
++
++	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
++	 * incoming packets are rejected.  Set enable and wait 2ms so that
++	 * any packet that was coming in as RCTL.EN was set is flushed
++	 */
++	wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
++
++	rlpml = rd32(E1000_RLPML);
++	wr32(E1000_RLPML, 0);
++
++	rctl = rd32(E1000_RCTL);
++	temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP);
++	temp_rctl |= E1000_RCTL_LPE;
++
++	wr32(E1000_RCTL, temp_rctl);
++	wr32(E1000_RCTL, temp_rctl | E1000_RCTL_EN);
++	wrfl();
++	usleep_range(2000, 3000);
++
++	/* Enable RX queues that were previously enabled and restore our
++	 * previous state
++	 */
++	for (i = 0; i < 4; i++)
++		wr32(E1000_RXDCTL(i), rxdctl[i]);
++	wr32(E1000_RCTL, rctl);
++	wrfl();
++
++	wr32(E1000_RLPML, rlpml);
++	wr32(E1000_RFCTL, rfctl);
++
++	/* Flush receive errors generated by workaround */
++	rd32(E1000_ROC);
++	rd32(E1000_RNBC);
++	rd32(E1000_MPC);
++}
++
++/**
++ *  igb_set_pcie_completion_timeout - set pci-e completion timeout
++ *  @hw: pointer to the HW structure
++ *
++ *  The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
++ *  however the hardware default for these parts is 500us to 1ms which is less
++ *  than the 10ms recommended by the pci-e spec.  To address this we need to
++ *  increase the value to either 10ms to 200ms for capability version 1 config,
++ *  or 16ms to 55ms for version 2.
++ **/
++static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw)
++{
++	u32 gcr = rd32(E1000_GCR);
++	s32 ret_val = 0;
++	u16 pcie_devctl2;
++
++	/* only take action if timeout value is defaulted to 0 */
++	if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
++		goto out;
++
++	/* if capabilities version is type 1 we can write the
++	 * timeout of 10ms to 200ms through the GCR register
++	 */
++	if (!(gcr & E1000_GCR_CAP_VER2)) {
++		gcr |= E1000_GCR_CMPL_TMOUT_10ms;
++		goto out;
++	}
++
++	/* for version 2 capabilities we need to write the config space
++	 * directly in order to set the completion timeout value for
++	 * 16ms to 55ms
++	 */
++	ret_val = igb_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
++					&pcie_devctl2);
++	if (ret_val)
++		goto out;
++
++	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
++
++	ret_val = igb_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
++					 &pcie_devctl2);
++out:
++	/* disable completion timeout resend */
++	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
++
++	wr32(E1000_GCR, gcr);
++	return ret_val;
++}
++
++/**
++ *  igb_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing
++ *  @hw: pointer to the hardware struct
++ *  @enable: state to enter, either enabled or disabled
++ *  @pf: Physical Function pool - do not set anti-spoofing for the PF
++ *
++ *  enables/disables L2 switch anti-spoofing functionality.
++ **/
++void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
++{
++	u32 reg_val, reg_offset;
++
++	switch (hw->mac.type) {
++	case e1000_82576:
++		reg_offset = E1000_DTXSWC;
++		break;
++	case e1000_i350:
++	case e1000_i354:
++		reg_offset = E1000_TXSWC;
++		break;
++	default:
++		return;
++	}
++
++	reg_val = rd32(reg_offset);
++	if (enable) {
++		reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
++			     E1000_DTXSWC_VLAN_SPOOF_MASK);
++		/* The PF can spoof - it has to in order to
++		 * support emulation mode NICs
++		 */
++		reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
++	} else {
++		reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
++			     E1000_DTXSWC_VLAN_SPOOF_MASK);
++	}
++	wr32(reg_offset, reg_val);
++}
++
++/**
++ *  igb_vmdq_set_loopback_pf - enable or disable vmdq loopback
++ *  @hw: pointer to the hardware struct
++ *  @enable: state to enter, either enabled or disabled
++ *
++ *  enables/disables L2 switch loopback functionality.
++ **/
++void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
++{
++	u32 dtxswc;
++
++	switch (hw->mac.type) {
++	case e1000_82576:
++		dtxswc = rd32(E1000_DTXSWC);
++		if (enable)
++			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
++		else
++			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
++		wr32(E1000_DTXSWC, dtxswc);
++		break;
++	case e1000_i354:
++	case e1000_i350:
++		dtxswc = rd32(E1000_TXSWC);
++		if (enable)
++			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
++		else
++			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
++		wr32(E1000_TXSWC, dtxswc);
++		break;
++	default:
++		/* Currently no other hardware supports loopback */
++		break;
++	}
++
++}
++
++/**
++ *  igb_vmdq_set_replication_pf - enable or disable vmdq replication
++ *  @hw: pointer to the hardware struct
++ *  @enable: state to enter, either enabled or disabled
++ *
++ *  enables/disables replication of packets across multiple pools.
++ **/
++void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
++{
++	u32 vt_ctl = rd32(E1000_VT_CTL);
++
++	if (enable)
++		vt_ctl |= E1000_VT_CTL_VM_REPL_EN;
++	else
++		vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN;
++
++	wr32(E1000_VT_CTL, vt_ctl);
++}
++
++/**
++ *  igb_read_phy_reg_82580 - Read 82580 MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the MDI control register in the PHY at offset and stores the
++ *  information read to data.
++ **/
++static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_read_phy_reg_mdic(hw, offset, data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_phy_reg_82580 - Write 82580 MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write to register at offset
++ *
++ *  Writes data to MDI control register in the PHY at offset.
++ **/
++static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_write_phy_reg_mdic(hw, offset, data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
++ *  the values found in the EEPROM.  This addresses an issue in which these
++ *  bits are not restored from EEPROM after reset.
++ **/
++static s32 igb_reset_mdicnfg_82580(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u32 mdicnfg;
++	u16 nvm_data = 0;
++
++	if (hw->mac.type != e1000_82580)
++		goto out;
++	if (!igb_sgmii_active_82575(hw))
++		goto out;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
++				   NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
++				   &nvm_data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	mdicnfg = rd32(E1000_MDICNFG);
++	if (nvm_data & NVM_WORD24_EXT_MDIO)
++		mdicnfg |= E1000_MDICNFG_EXT_MDIO;
++	if (nvm_data & NVM_WORD24_COM_MDIO)
++		mdicnfg |= E1000_MDICNFG_COM_MDIO;
++	wr32(E1000_MDICNFG, mdicnfg);
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_reset_hw_82580 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets function or entire device (all ports, etc.)
++ *  to a known state.
++ **/
++static s32 igb_reset_hw_82580(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	/* BH SW mailbox bit in SW_FW_SYNC */
++	u16 swmbsw_mask = E1000_SW_SYNCH_MB;
++	u32 ctrl;
++	bool global_device_reset = hw->dev_spec._82575.global_device_reset;
++
++	hw->dev_spec._82575.global_device_reset = false;
++
++	/* due to hw errata, global device reset doesn't always
++	 * work on 82580
++	 */
++	if (hw->mac.type == e1000_82580)
++		global_device_reset = false;
++
++	/* Get current control state. */
++	ctrl = rd32(E1000_CTRL);
++
++	/* Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = igb_disable_pcie_master(hw);
++	if (ret_val)
++		hw_dbg("PCI-E Master disable polling has failed.\n");
++
++	hw_dbg("Masking off all interrupts\n");
++	wr32(E1000_IMC, 0xffffffff);
++	wr32(E1000_RCTL, 0);
++	wr32(E1000_TCTL, E1000_TCTL_PSP);
++	wrfl();
++
++	usleep_range(10000, 11000);
++
++	/* Determine whether or not a global dev reset is requested */
++	if (global_device_reset &&
++		hw->mac.ops.acquire_swfw_sync(hw, swmbsw_mask))
++			global_device_reset = false;
++
++	if (global_device_reset &&
++		!(rd32(E1000_STATUS) & E1000_STAT_DEV_RST_SET))
++		ctrl |= E1000_CTRL_DEV_RST;
++	else
++		ctrl |= E1000_CTRL_RST;
++
++	wr32(E1000_CTRL, ctrl);
++	wrfl();
++
++	/* Add delay to insure DEV_RST has time to complete */
++	if (global_device_reset)
++		usleep_range(5000, 6000);
++
++	ret_val = igb_get_auto_rd_done(hw);
++	if (ret_val) {
++		/* When auto config read does not complete, do not
++		 * return with an error. This can happen in situations
++		 * where there is no eeprom and prevents getting link.
++		 */
++		hw_dbg("Auto Read Done did not complete\n");
++	}
++
++	/* clear global device reset status bit */
++	wr32(E1000_STATUS, E1000_STAT_DEV_RST_SET);
++
++	/* Clear any pending interrupt events. */
++	wr32(E1000_IMC, 0xffffffff);
++	rd32(E1000_ICR);
++
++	ret_val = igb_reset_mdicnfg_82580(hw);
++	if (ret_val)
++		hw_dbg("Could not reset MDICNFG based on EEPROM\n");
++
++	/* Install any alternate MAC address into RAR0 */
++	ret_val = igb_check_alt_mac_addr(hw);
++
++	/* Release semaphore */
++	if (global_device_reset)
++		hw->mac.ops.release_swfw_sync(hw, swmbsw_mask);
++
++	return ret_val;
++}
++
++/**
++ *  igb_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual RX PBA size
++ *  @data: data received by reading RXPBS register
++ *
++ *  The 82580 uses a table based approach for packet buffer allocation sizes.
++ *  This function converts the retrieved value into the correct table value
++ *     0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7
++ *  0x0 36  72 144   1   2   4   8  16
++ *  0x8 35  70 140 rsv rsv rsv rsv rsv
++ */
++u16 igb_rxpbs_adjust_82580(u32 data)
++{
++	u16 ret_val = 0;
++
++	if (data < ARRAY_SIZE(e1000_82580_rxpbs_table))
++		ret_val = e1000_82580_rxpbs_table[data];
++
++	return ret_val;
++}
++
++/**
++ *  igb_validate_nvm_checksum_with_offset - Validate EEPROM
++ *  checksum
++ *  @hw: pointer to the HW structure
++ *  @offset: offset in words of the checksum protected region
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
++						 u16 offset)
++{
++	s32 ret_val = 0;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) {
++		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++
++	if (checksum != (u16) NVM_SUM) {
++		hw_dbg("NVM Checksum Invalid\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_update_nvm_checksum_with_offset - Update EEPROM
++ *  checksum
++ *  @hw: pointer to the HW structure
++ *  @offset: offset in words of the checksum protected region
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++static s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
++{
++	s32 ret_val;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) {
++		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error while updating checksum.\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++	checksum = (u16) NVM_SUM - checksum;
++	ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1,
++				&checksum);
++	if (ret_val)
++		hw_dbg("NVM Write Error while updating checksum.\n");
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_validate_nvm_checksum_82580 - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM section checksum by reading/adding each word of
++ *  the EEPROM and then verifies that the sum of the EEPROM is
++ *  equal to 0xBABA.
++ **/
++static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 eeprom_regions_count = 1;
++	u16 j, nvm_data;
++	u16 nvm_offset;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) {
++		/* if checksums compatibility bit is set validate checksums
++		 * for all 4 ports.
++		 */
++		eeprom_regions_count = 4;
++	}
++
++	for (j = 0; j < eeprom_regions_count; j++) {
++		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
++		ret_val = igb_validate_nvm_checksum_with_offset(hw,
++								nvm_offset);
++		if (ret_val != 0)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_update_nvm_checksum_82580 - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
++ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
++ *  checksum and writes the value to the EEPROM.
++ **/
++static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 j, nvm_data;
++	u16 nvm_offset;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error while updating checksum compatibility bit.\n");
++		goto out;
++	}
++
++	if ((nvm_data & NVM_COMPATIBILITY_BIT_MASK) == 0) {
++		/* set compatibility bit to validate checksums appropriately */
++		nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK;
++		ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
++					&nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Write Error while updating checksum compatibility bit.\n");
++			goto out;
++		}
++	}
++
++	for (j = 0; j < 4; j++) {
++		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
++		ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_validate_nvm_checksum_i350 - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM section checksum by reading/adding each word of
++ *  the EEPROM and then verifies that the sum of the EEPROM is
++ *  equal to 0xBABA.
++ **/
++static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 j;
++	u16 nvm_offset;
++
++	for (j = 0; j < 4; j++) {
++		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
++		ret_val = igb_validate_nvm_checksum_with_offset(hw,
++								nvm_offset);
++		if (ret_val != 0)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_update_nvm_checksum_i350 - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
++ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
++ *  checksum and writes the value to the EEPROM.
++ **/
++static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 j;
++	u16 nvm_offset;
++
++	for (j = 0; j < 4; j++) {
++		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
++		ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset);
++		if (ret_val != 0)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  __igb_access_emi_reg - Read/write EMI register
++ *  @hw: pointer to the HW structure
++ *  @addr: EMI address to program
++ *  @data: pointer to value to read/write from/to the EMI address
++ *  @read: boolean flag to indicate read or write
++ **/
++static s32 __igb_access_emi_reg(struct e1000_hw *hw, u16 address,
++				  u16 *data, bool read)
++{
++	s32 ret_val = 0;
++
++	ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
++	if (ret_val)
++		return ret_val;
++
++	if (read)
++		ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data);
++	else
++		ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data);
++
++	return ret_val;
++}
++
++/**
++ *  igb_read_emi_reg - Read Extended Management Interface register
++ *  @hw: pointer to the HW structure
++ *  @addr: EMI address to program
++ *  @data: value to be read from the EMI address
++ **/
++s32 igb_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
++{
++	return __igb_access_emi_reg(hw, addr, data, true);
++}
++
++/**
++ *  igb_set_eee_i350 - Enable/disable EEE support
++ *  @hw: pointer to the HW structure
++ *  @adv1G: boolean flag enabling 1G EEE advertisement
++ *  @adv100m: boolean flag enabling 100M EEE advertisement
++ *
++ *  Enable/disable EEE based on setting in dev_spec structure.
++ *
++ **/
++s32 igb_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M)
++{
++	u32 ipcnfg, eeer;
++
++	if ((hw->mac.type < e1000_i350) ||
++	    (hw->phy.media_type != e1000_media_type_copper))
++		goto out;
++	ipcnfg = rd32(E1000_IPCNFG);
++	eeer = rd32(E1000_EEER);
++
++	/* enable or disable per user setting */
++	if (!(hw->dev_spec._82575.eee_disable)) {
++		u32 eee_su = rd32(E1000_EEE_SU);
++
++		if (adv100M)
++			ipcnfg |= E1000_IPCNFG_EEE_100M_AN;
++		else
++			ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN;
++
++		if (adv1G)
++			ipcnfg |= E1000_IPCNFG_EEE_1G_AN;
++		else
++			ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN;
++
++		eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
++			E1000_EEER_LPI_FC);
++
++		/* This bit should not be set in normal operation. */
++		if (eee_su & E1000_EEE_SU_LPI_CLK_STP)
++			hw_dbg("LPI Clock Stop Bit should not be set!\n");
++
++	} else {
++		ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN |
++			E1000_IPCNFG_EEE_100M_AN);
++		eeer &= ~(E1000_EEER_TX_LPI_EN |
++			E1000_EEER_RX_LPI_EN |
++			E1000_EEER_LPI_FC);
++	}
++	wr32(E1000_IPCNFG, ipcnfg);
++	wr32(E1000_EEER, eeer);
++	rd32(E1000_IPCNFG);
++	rd32(E1000_EEER);
++out:
++
++	return 0;
++}
++
++/**
++ *  igb_set_eee_i354 - Enable/disable EEE support
++ *  @hw: pointer to the HW structure
++ *  @adv1G: boolean flag enabling 1G EEE advertisement
++ *  @adv100m: boolean flag enabling 100M EEE advertisement
++ *
++ *  Enable/disable EEE legacy mode based on setting in dev_spec structure.
++ *
++ **/
++s32 igb_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_data;
++
++	if ((hw->phy.media_type != e1000_media_type_copper) ||
++	    (phy->id != M88E1543_E_PHY_ID))
++		goto out;
++
++	if (!hw->dev_spec._82575.eee_disable) {
++		/* Switch to PHY page 18. */
++		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1,
++					    &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy_data |= E1000_M88E1543_EEE_CTRL_1_MS;
++		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1,
++					     phy_data);
++		if (ret_val)
++			goto out;
++
++		/* Return the PHY to page 0. */
++		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
++		if (ret_val)
++			goto out;
++
++		/* Turn on EEE advertisement. */
++		ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
++					     E1000_EEE_ADV_DEV_I354,
++					     &phy_data);
++		if (ret_val)
++			goto out;
++
++		if (adv100M)
++			phy_data |= E1000_EEE_ADV_100_SUPPORTED;
++		else
++			phy_data &= ~E1000_EEE_ADV_100_SUPPORTED;
++
++		if (adv1G)
++			phy_data |= E1000_EEE_ADV_1000_SUPPORTED;
++		else
++			phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED;
++
++		ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
++						E1000_EEE_ADV_DEV_I354,
++						phy_data);
++	} else {
++		/* Turn off EEE advertisement. */
++		ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
++					     E1000_EEE_ADV_DEV_I354,
++					     &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED |
++			      E1000_EEE_ADV_1000_SUPPORTED);
++		ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
++					      E1000_EEE_ADV_DEV_I354,
++					      phy_data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_eee_status_i354 - Get EEE status
++ *  @hw: pointer to the HW structure
++ *  @status: EEE status
++ *
++ *  Get EEE status by guessing based on whether Tx or Rx LPI indications have
++ *  been received.
++ **/
++s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_data;
++
++	/* Check if EEE is supported on this device. */
++	if ((hw->phy.media_type != e1000_media_type_copper) ||
++	    (phy->id != M88E1543_E_PHY_ID))
++		goto out;
++
++	ret_val = igb_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354,
++				     E1000_PCS_STATUS_DEV_I354,
++				     &phy_data);
++	if (ret_val)
++		goto out;
++
++	*status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD |
++			      E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false;
++
++out:
++	return ret_val;
++}
++
++static const u8 e1000_emc_temp_data[4] = {
++	E1000_EMC_INTERNAL_DATA,
++	E1000_EMC_DIODE1_DATA,
++	E1000_EMC_DIODE2_DATA,
++	E1000_EMC_DIODE3_DATA
++};
++static const u8 e1000_emc_therm_limit[4] = {
++	E1000_EMC_INTERNAL_THERM_LIMIT,
++	E1000_EMC_DIODE1_THERM_LIMIT,
++	E1000_EMC_DIODE2_THERM_LIMIT,
++	E1000_EMC_DIODE3_THERM_LIMIT
++};
++
++#ifdef CONFIG_IGB_HWMON
++/**
++ *  igb_get_thermal_sensor_data_generic - Gathers thermal sensor data
++ *  @hw: pointer to hardware structure
++ *
++ *  Updates the temperatures in mac.thermal_sensor_data
++ **/
++static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
++{
++	u16 ets_offset;
++	u16 ets_cfg;
++	u16 ets_sensor;
++	u8  num_sensors;
++	u8  sensor_index;
++	u8  sensor_location;
++	u8  i;
++	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
++
++	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
++		return E1000_NOT_IMPLEMENTED;
++
++	data->sensor[0].temp = (rd32(E1000_THMJT) & 0xFF);
++
++	/* Return the internal sensor only if ETS is unsupported */
++	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
++	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
++		return 0;
++
++	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
++	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++	    != NVM_ETS_TYPE_EMC)
++		return E1000_NOT_IMPLEMENTED;
++
++	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
++	if (num_sensors > E1000_MAX_SENSORS)
++		num_sensors = E1000_MAX_SENSORS;
++
++	for (i = 1; i < num_sensors; i++) {
++		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
++		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
++				NVM_ETS_DATA_INDEX_SHIFT);
++		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
++				   NVM_ETS_DATA_LOC_SHIFT);
++
++		if (sensor_location != 0)
++			hw->phy.ops.read_i2c_byte(hw,
++					e1000_emc_temp_data[sensor_index],
++					E1000_I2C_THERMAL_SENSOR_ADDR,
++					&data->sensor[i].temp);
++	}
++	return 0;
++}
++
++/**
++ *  igb_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds
++ *  @hw: pointer to hardware structure
++ *
++ *  Sets the thermal sensor thresholds according to the NVM map
++ *  and save off the threshold and location values into mac.thermal_sensor_data
++ **/
++static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
++{
++	u16 ets_offset;
++	u16 ets_cfg;
++	u16 ets_sensor;
++	u8  low_thresh_delta;
++	u8  num_sensors;
++	u8  sensor_index;
++	u8  sensor_location;
++	u8  therm_limit;
++	u8  i;
++	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
++
++	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
++		return E1000_NOT_IMPLEMENTED;
++
++	memset(data, 0, sizeof(struct e1000_thermal_sensor_data));
++
++	data->sensor[0].location = 0x1;
++	data->sensor[0].caution_thresh =
++		(rd32(E1000_THHIGHTC) & 0xFF);
++	data->sensor[0].max_op_thresh =
++		(rd32(E1000_THLOWTC) & 0xFF);
++
++	/* Return the internal sensor only if ETS is unsupported */
++	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
++	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
++		return 0;
++
++	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
++	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
++	    != NVM_ETS_TYPE_EMC)
++		return E1000_NOT_IMPLEMENTED;
++
++	low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
++			    NVM_ETS_LTHRES_DELTA_SHIFT);
++	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
++
++	for (i = 1; i <= num_sensors; i++) {
++		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
++		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
++				NVM_ETS_DATA_INDEX_SHIFT);
++		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
++				   NVM_ETS_DATA_LOC_SHIFT);
++		therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
++
++		hw->phy.ops.write_i2c_byte(hw,
++			e1000_emc_therm_limit[sensor_index],
++			E1000_I2C_THERMAL_SENSOR_ADDR,
++			therm_limit);
++
++		if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) {
++			data->sensor[i].location = sensor_location;
++			data->sensor[i].caution_thresh = therm_limit;
++			data->sensor[i].max_op_thresh = therm_limit -
++							low_thresh_delta;
++		}
++	}
++	return 0;
++}
++
++#endif
++static struct e1000_mac_operations e1000_mac_ops_82575 = {
++	.init_hw              = igb_init_hw_82575,
++	.check_for_link       = igb_check_for_link_82575,
++	.rar_set              = igb_rar_set,
++	.read_mac_addr        = igb_read_mac_addr_82575,
++	.get_speed_and_duplex = igb_get_link_up_info_82575,
++#ifdef CONFIG_IGB_HWMON
++	.get_thermal_sensor_data = igb_get_thermal_sensor_data_generic,
++	.init_thermal_sensor_thresh = igb_init_thermal_sensor_thresh_generic,
++#endif
++};
++
++static struct e1000_phy_operations e1000_phy_ops_82575 = {
++	.acquire              = igb_acquire_phy_82575,
++	.get_cfg_done         = igb_get_cfg_done_82575,
++	.release              = igb_release_phy_82575,
++	.write_i2c_byte       = igb_write_i2c_byte,
++	.read_i2c_byte        = igb_read_i2c_byte,
++};
++
++static struct e1000_nvm_operations e1000_nvm_ops_82575 = {
++	.acquire              = igb_acquire_nvm_82575,
++	.read                 = igb_read_nvm_eerd,
++	.release              = igb_release_nvm_82575,
++	.write                = igb_write_nvm_spi,
++};
++
++const struct e1000_info e1000_82575_info = {
++	.get_invariants = igb_get_invariants_82575,
++	.mac_ops = &e1000_mac_ops_82575,
++	.phy_ops = &e1000_phy_ops_82575,
++	.nvm_ops = &e1000_nvm_ops_82575,
++};
++
+--- linux/drivers/xenomai/net/drivers/igb/e1000_nvm.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_nvm.c	2021-04-07 16:01:27.481633803 +0800
+@@ -0,0 +1,803 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#include <linux/if_ether.h>
++#include <linux/delay.h>
++
++#include "e1000_mac.h"
++#include "e1000_nvm.h"
++
++/**
++ *  igb_raise_eec_clk - Raise EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Enable/Raise the EEPROM clock bit.
++ **/
++static void igb_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd | E1000_EECD_SK;
++	wr32(E1000_EECD, *eecd);
++	wrfl();
++	udelay(hw->nvm.delay_usec);
++}
++
++/**
++ *  igb_lower_eec_clk - Lower EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Clear/Lower the EEPROM clock bit.
++ **/
++static void igb_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd & ~E1000_EECD_SK;
++	wr32(E1000_EECD, *eecd);
++	wrfl();
++	udelay(hw->nvm.delay_usec);
++}
++
++/**
++ *  igb_shift_out_eec_bits - Shift data bits our to the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @data: data to send to the EEPROM
++ *  @count: number of bits to shift out
++ *
++ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
++ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
++ *  In order to do this, "data" must be broken down into bits.
++ **/
++static void igb_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = rd32(E1000_EECD);
++	u32 mask;
++
++	mask = 0x01 << (count - 1);
++	if (nvm->type == e1000_nvm_eeprom_spi)
++		eecd |= E1000_EECD_DO;
++
++	do {
++		eecd &= ~E1000_EECD_DI;
++
++		if (data & mask)
++			eecd |= E1000_EECD_DI;
++
++		wr32(E1000_EECD, eecd);
++		wrfl();
++
++		udelay(nvm->delay_usec);
++
++		igb_raise_eec_clk(hw, &eecd);
++		igb_lower_eec_clk(hw, &eecd);
++
++		mask >>= 1;
++	} while (mask);
++
++	eecd &= ~E1000_EECD_DI;
++	wr32(E1000_EECD, eecd);
++}
++
++/**
++ *  igb_shift_in_eec_bits - Shift data bits in from the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @count: number of bits to shift in
++ *
++ *  In order to read a register from the EEPROM, we need to shift 'count' bits
++ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
++ *  the EEPROM (setting the SK bit), and then reading the value of the data out
++ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
++ *  always be clear.
++ **/
++static u16 igb_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
++{
++	u32 eecd;
++	u32 i;
++	u16 data;
++
++	eecd = rd32(E1000_EECD);
++
++	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
++	data = 0;
++
++	for (i = 0; i < count; i++) {
++		data <<= 1;
++		igb_raise_eec_clk(hw, &eecd);
++
++		eecd = rd32(E1000_EECD);
++
++		eecd &= ~E1000_EECD_DI;
++		if (eecd & E1000_EECD_DO)
++			data |= 1;
++
++		igb_lower_eec_clk(hw, &eecd);
++	}
++
++	return data;
++}
++
++/**
++ *  igb_poll_eerd_eewr_done - Poll for EEPROM read/write completion
++ *  @hw: pointer to the HW structure
++ *  @ee_reg: EEPROM flag for polling
++ *
++ *  Polls the EEPROM status bit for either read or write completion based
++ *  upon the value of 'ee_reg'.
++ **/
++static s32 igb_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
++{
++	u32 attempts = 100000;
++	u32 i, reg = 0;
++	s32 ret_val = -E1000_ERR_NVM;
++
++	for (i = 0; i < attempts; i++) {
++		if (ee_reg == E1000_NVM_POLL_READ)
++			reg = rd32(E1000_EERD);
++		else
++			reg = rd32(E1000_EEWR);
++
++		if (reg & E1000_NVM_RW_REG_DONE) {
++			ret_val = 0;
++			break;
++		}
++
++		udelay(5);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_acquire_nvm - Generic request for access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
++ *  Return successful if access grant bit set, else clear the request for
++ *  EEPROM access and return -E1000_ERR_NVM (-1).
++ **/
++s32 igb_acquire_nvm(struct e1000_hw *hw)
++{
++	u32 eecd = rd32(E1000_EECD);
++	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
++	s32 ret_val = 0;
++
++
++	wr32(E1000_EECD, eecd | E1000_EECD_REQ);
++	eecd = rd32(E1000_EECD);
++
++	while (timeout) {
++		if (eecd & E1000_EECD_GNT)
++			break;
++		udelay(5);
++		eecd = rd32(E1000_EECD);
++		timeout--;
++	}
++
++	if (!timeout) {
++		eecd &= ~E1000_EECD_REQ;
++		wr32(E1000_EECD, eecd);
++		hw_dbg("Could not acquire NVM grant\n");
++		ret_val = -E1000_ERR_NVM;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_standby_nvm - Return EEPROM to standby state
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the EEPROM to a standby state.
++ **/
++static void igb_standby_nvm(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = rd32(E1000_EECD);
++
++	if (nvm->type == e1000_nvm_eeprom_spi) {
++		/* Toggle CS to flush commands */
++		eecd |= E1000_EECD_CS;
++		wr32(E1000_EECD, eecd);
++		wrfl();
++		udelay(nvm->delay_usec);
++		eecd &= ~E1000_EECD_CS;
++		wr32(E1000_EECD, eecd);
++		wrfl();
++		udelay(nvm->delay_usec);
++	}
++}
++
++/**
++ *  e1000_stop_nvm - Terminate EEPROM command
++ *  @hw: pointer to the HW structure
++ *
++ *  Terminates the current command by inverting the EEPROM's chip select pin.
++ **/
++static void e1000_stop_nvm(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	eecd = rd32(E1000_EECD);
++	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
++		/* Pull CS high */
++		eecd |= E1000_EECD_CS;
++		igb_lower_eec_clk(hw, &eecd);
++	}
++}
++
++/**
++ *  igb_release_nvm - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
++ **/
++void igb_release_nvm(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	e1000_stop_nvm(hw);
++
++	eecd = rd32(E1000_EECD);
++	eecd &= ~E1000_EECD_REQ;
++	wr32(E1000_EECD, eecd);
++}
++
++/**
++ *  igb_ready_nvm_eeprom - Prepares EEPROM for read/write
++ *  @hw: pointer to the HW structure
++ *
++ *  Setups the EEPROM for reading and writing.
++ **/
++static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = rd32(E1000_EECD);
++	s32 ret_val = 0;
++	u16 timeout = 0;
++	u8 spi_stat_reg;
++
++
++	if (nvm->type == e1000_nvm_eeprom_spi) {
++		/* Clear SK and CS */
++		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++		wr32(E1000_EECD, eecd);
++		wrfl();
++		udelay(1);
++		timeout = NVM_MAX_RETRY_SPI;
++
++		/* Read "Status Register" repeatedly until the LSB is cleared.
++		 * The EEPROM will signal that the command has been completed
++		 * by clearing bit 0 of the internal status register.  If it's
++		 * not cleared within 'timeout', then error out.
++		 */
++		while (timeout) {
++			igb_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
++					       hw->nvm.opcode_bits);
++			spi_stat_reg = (u8)igb_shift_in_eec_bits(hw, 8);
++			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
++				break;
++
++			udelay(5);
++			igb_standby_nvm(hw);
++			timeout--;
++		}
++
++		if (!timeout) {
++			hw_dbg("SPI NVM Status error\n");
++			ret_val = -E1000_ERR_NVM;
++			goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_read_nvm_spi - Read EEPROM's using SPI
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM.
++ **/
++s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i = 0;
++	s32 ret_val;
++	u16 word_in;
++	u8 read_opcode = NVM_READ_OPCODE_SPI;
++
++	/* A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		hw_dbg("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	ret_val = nvm->ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = igb_ready_nvm_eeprom(hw);
++	if (ret_val)
++		goto release;
++
++	igb_standby_nvm(hw);
++
++	if ((nvm->address_bits == 8) && (offset >= 128))
++		read_opcode |= NVM_A8_OPCODE_SPI;
++
++	/* Send the READ command (opcode + addr) */
++	igb_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
++	igb_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
++
++	/* Read the data.  SPI NVMs increment the address with each byte
++	 * read and will roll over if reading beyond the end.  This allows
++	 * us to read the whole NVM from any offset
++	 */
++	for (i = 0; i < words; i++) {
++		word_in = igb_shift_in_eec_bits(hw, 16);
++		data[i] = (word_in >> 8) | (word_in << 8);
++	}
++
++release:
++	nvm->ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_read_nvm_eerd - Reads EEPROM using EERD register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM using the EERD register.
++ **/
++s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, eerd = 0;
++	s32 ret_val = 0;
++
++	/* A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		hw_dbg("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	for (i = 0; i < words; i++) {
++		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
++			E1000_NVM_RW_REG_START;
++
++		wr32(E1000_EERD, eerd);
++		ret_val = igb_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
++		if (ret_val)
++			break;
++
++		data[i] = (rd32(E1000_EERD) >>
++			E1000_NVM_RW_REG_DATA);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_nvm_spi - Write to EEPROM using SPI
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  Writes data to EEPROM at offset using SPI interface.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function , the
++ *  EEPROM will most likley contain an invalid checksum.
++ **/
++s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32 ret_val = -E1000_ERR_NVM;
++	u16 widx = 0;
++
++	/* A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		hw_dbg("nvm parameter(s) out of bounds\n");
++		return ret_val;
++	}
++
++	while (widx < words) {
++		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
++
++		ret_val = nvm->ops.acquire(hw);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = igb_ready_nvm_eeprom(hw);
++		if (ret_val) {
++			nvm->ops.release(hw);
++			return ret_val;
++		}
++
++		igb_standby_nvm(hw);
++
++		/* Send the WRITE ENABLE command (8 bit opcode) */
++		igb_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
++					 nvm->opcode_bits);
++
++		igb_standby_nvm(hw);
++
++		/* Some SPI eeproms use the 8th address bit embedded in the
++		 * opcode
++		 */
++		if ((nvm->address_bits == 8) && (offset >= 128))
++			write_opcode |= NVM_A8_OPCODE_SPI;
++
++		/* Send the Write command (8-bit opcode + addr) */
++		igb_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
++		igb_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
++					 nvm->address_bits);
++
++		/* Loop to allow for up to whole page write of eeprom */
++		while (widx < words) {
++			u16 word_out = data[widx];
++
++			word_out = (word_out >> 8) | (word_out << 8);
++			igb_shift_out_eec_bits(hw, word_out, 16);
++			widx++;
++
++			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
++				igb_standby_nvm(hw);
++				break;
++			}
++		}
++		usleep_range(1000, 2000);
++		nvm->ops.release(hw);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_read_part_string - Read device part number
++ *  @hw: pointer to the HW structure
++ *  @part_num: pointer to device part number
++ *  @part_num_size: size of part number buffer
++ *
++ *  Reads the product board assembly (PBA) number from the EEPROM and stores
++ *  the value in part_num.
++ **/
++s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, u32 part_num_size)
++{
++	s32 ret_val;
++	u16 nvm_data;
++	u16 pointer;
++	u16 offset;
++	u16 length;
++
++	if (part_num == NULL) {
++		hw_dbg("PBA string buffer was null\n");
++		ret_val = E1000_ERR_INVALID_ARGUMENT;
++		goto out;
++	}
++
++	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pointer);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	/* if nvm_data is not ptr guard the PBA must be in legacy format which
++	 * means pointer is actually our second data word for the PBA number
++	 * and we can decode it into an ascii string
++	 */
++	if (nvm_data != NVM_PBA_PTR_GUARD) {
++		hw_dbg("NVM PBA number is not stored as string\n");
++
++		/* we will need 11 characters to store the PBA */
++		if (part_num_size < 11) {
++			hw_dbg("PBA string buffer too small\n");
++			return E1000_ERR_NO_SPACE;
++		}
++
++		/* extract hex string from data and pointer */
++		part_num[0] = (nvm_data >> 12) & 0xF;
++		part_num[1] = (nvm_data >> 8) & 0xF;
++		part_num[2] = (nvm_data >> 4) & 0xF;
++		part_num[3] = nvm_data & 0xF;
++		part_num[4] = (pointer >> 12) & 0xF;
++		part_num[5] = (pointer >> 8) & 0xF;
++		part_num[6] = '-';
++		part_num[7] = 0;
++		part_num[8] = (pointer >> 4) & 0xF;
++		part_num[9] = pointer & 0xF;
++
++		/* put a null character on the end of our string */
++		part_num[10] = '\0';
++
++		/* switch all the data but the '-' to hex char */
++		for (offset = 0; offset < 10; offset++) {
++			if (part_num[offset] < 0xA)
++				part_num[offset] += '0';
++			else if (part_num[offset] < 0x10)
++				part_num[offset] += 'A' - 0xA;
++		}
++
++		goto out;
++	}
++
++	ret_val = hw->nvm.ops.read(hw, pointer, 1, &length);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if (length == 0xFFFF || length == 0) {
++		hw_dbg("NVM PBA number section invalid length\n");
++		ret_val = E1000_ERR_NVM_PBA_SECTION;
++		goto out;
++	}
++	/* check if part_num buffer is big enough */
++	if (part_num_size < (((u32)length * 2) - 1)) {
++		hw_dbg("PBA string buffer too small\n");
++		ret_val = E1000_ERR_NO_SPACE;
++		goto out;
++	}
++
++	/* trim pba length from start of string */
++	pointer++;
++	length--;
++
++	for (offset = 0; offset < length; offset++) {
++		ret_val = hw->nvm.ops.read(hw, pointer + offset, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error\n");
++			goto out;
++		}
++		part_num[offset * 2] = (u8)(nvm_data >> 8);
++		part_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
++	}
++	part_num[offset * 2] = '\0';
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_read_mac_addr - Read device MAC address
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the device MAC address from the EEPROM and stores the value.
++ *  Since devices with two ports use the same EEPROM, we increment the
++ *  last bit in the MAC address for the second port.
++ **/
++s32 igb_read_mac_addr(struct e1000_hw *hw)
++{
++	u32 rar_high;
++	u32 rar_low;
++	u16 i;
++
++	rar_high = rd32(E1000_RAH(0));
++	rar_low = rd32(E1000_RAL(0));
++
++	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
++		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
++
++	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
++		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
++
++	for (i = 0; i < ETH_ALEN; i++)
++		hw->mac.addr[i] = hw->mac.perm_addr[i];
++
++	return 0;
++}
++
++/**
++ *  igb_validate_nvm_checksum - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++s32 igb_validate_nvm_checksum(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
++		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++
++	if (checksum != (u16) NVM_SUM) {
++		hw_dbg("NVM Checksum Invalid\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_update_nvm_checksum - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++s32 igb_update_nvm_checksum(struct e1000_hw *hw)
++{
++	s32  ret_val;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
++		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error while updating checksum.\n");
++			goto out;
++		}
++		checksum += nvm_data;
++	}
++	checksum = (u16) NVM_SUM - checksum;
++	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
++	if (ret_val)
++		hw_dbg("NVM Write Error while updating checksum.\n");
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_fw_version - Get firmware version information
++ *  @hw: pointer to the HW structure
++ *  @fw_vers: pointer to output structure
++ *
++ *  unsupported MAC types will return all 0 version structure
++ **/
++void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
++{
++	u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
++	u8 q, hval, rem, result;
++	u16 comb_verh, comb_verl, comb_offset;
++
++	memset(fw_vers, 0, sizeof(struct e1000_fw_version));
++
++	/* basic eeprom version numbers and bits used vary by part and by tool
++	 * used to create the nvm images. Check which data format we have.
++	 */
++	hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
++	switch (hw->mac.type) {
++	case e1000_i211:
++		igb_read_invm_version(hw, fw_vers);
++		return;
++	case e1000_82575:
++	case e1000_82576:
++	case e1000_82580:
++		/* Use this format, unless EETRACK ID exists,
++		 * then use alternate format
++		 */
++		if ((etrack_test &  NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
++			hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
++			fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
++					      >> NVM_MAJOR_SHIFT;
++			fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
++					      >> NVM_MINOR_SHIFT;
++			fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
++			goto etrack_id;
++		}
++		break;
++	case e1000_i210:
++		if (!(igb_get_flash_presence_i210(hw))) {
++			igb_read_invm_version(hw, fw_vers);
++			return;
++		}
++		/* fall through */
++	case e1000_i350:
++		/* find combo image version */
++		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
++		if ((comb_offset != 0x0) &&
++		    (comb_offset != NVM_VER_INVALID)) {
++
++			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
++					 + 1), 1, &comb_verh);
++			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
++					 1, &comb_verl);
++
++			/* get Option Rom version if it exists and is valid */
++			if ((comb_verh && comb_verl) &&
++			    ((comb_verh != NVM_VER_INVALID) &&
++			     (comb_verl != NVM_VER_INVALID))) {
++
++				fw_vers->or_valid = true;
++				fw_vers->or_major =
++					comb_verl >> NVM_COMB_VER_SHFT;
++				fw_vers->or_build =
++					(comb_verl << NVM_COMB_VER_SHFT)
++					| (comb_verh >> NVM_COMB_VER_SHFT);
++				fw_vers->or_patch =
++					comb_verh & NVM_COMB_VER_MASK;
++			}
++		}
++		break;
++	default:
++		return;
++	}
++	hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
++	fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
++			      >> NVM_MAJOR_SHIFT;
++
++	/* check for old style version format in newer images*/
++	if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
++		eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
++	} else {
++		eeprom_verl = (fw_version & NVM_MINOR_MASK)
++				>> NVM_MINOR_SHIFT;
++	}
++	/* Convert minor value to hex before assigning to output struct
++	 * Val to be converted will not be higher than 99, per tool output
++	 */
++	q = eeprom_verl / NVM_HEX_CONV;
++	hval = q * NVM_HEX_TENS;
++	rem = eeprom_verl % NVM_HEX_CONV;
++	result = hval + rem;
++	fw_vers->eep_minor = result;
++
++etrack_id:
++	if ((etrack_test &  NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
++		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
++		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
++		fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
++			| eeprom_verl;
++	}
++}
+--- linux/drivers/xenomai/net/drivers/igb/e1000_mbx.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mbx.h	2021-04-07 16:01:27.477633809 +0800
+@@ -0,0 +1,73 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_MBX_H_
++#define _E1000_MBX_H_
++
++#include "e1000_hw.h"
++
++#define E1000_P2VMAILBOX_STS	0x00000001 /* Initiate message send to VF */
++#define E1000_P2VMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
++#define E1000_P2VMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
++#define E1000_P2VMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
++#define E1000_P2VMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
++
++#define E1000_MBVFICR_VFREQ_MASK	0x000000FF /* bits for VF messages */
++#define E1000_MBVFICR_VFREQ_VF1		0x00000001 /* bit for VF 1 message */
++#define E1000_MBVFICR_VFACK_MASK	0x00FF0000 /* bits for VF acks */
++#define E1000_MBVFICR_VFACK_VF1		0x00010000 /* bit for VF 1 ack */
++
++#define E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
++
++/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
++ * PF.  The reverse is true if it is E1000_PF_*.
++ * Message ACK's are the value or'd with 0xF0000000
++ */
++/* Messages below or'd with this are the ACK */
++#define E1000_VT_MSGTYPE_ACK	0x80000000
++/* Messages below or'd with this are the NACK */
++#define E1000_VT_MSGTYPE_NACK	0x40000000
++/* Indicates that VF is still clear to send requests */
++#define E1000_VT_MSGTYPE_CTS	0x20000000
++#define E1000_VT_MSGINFO_SHIFT	16
++/* bits 23:16 are used for exra info for certain messages */
++#define E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
++
++#define E1000_VF_RESET		0x01 /* VF requests reset */
++#define E1000_VF_SET_MAC_ADDR	0x02 /* VF requests to set MAC addr */
++#define E1000_VF_SET_MULTICAST	0x03 /* VF requests to set MC addr */
++#define E1000_VF_SET_VLAN	0x04 /* VF requests to set VLAN */
++#define E1000_VF_SET_LPE	0x05 /* VF requests to set VMOLR.LPE */
++#define E1000_VF_SET_PROMISC	0x06 /*VF requests to clear VMOLR.ROPE/MPME*/
++#define E1000_VF_SET_PROMISC_MULTICAST	(0x02 << E1000_VT_MSGINFO_SHIFT)
++
++#define E1000_PF_CONTROL_MSG	0x0100 /* PF control message */
++
++s32 igb_read_mbx(struct e1000_hw *, u32 *, u16, u16);
++s32 igb_write_mbx(struct e1000_hw *, u32 *, u16, u16);
++s32 igb_check_for_msg(struct e1000_hw *, u16);
++s32 igb_check_for_ack(struct e1000_hw *, u16);
++s32 igb_check_for_rst(struct e1000_hw *, u16);
++s32 igb_init_mbx_params_pf(struct e1000_hw *);
++
++#endif /* _E1000_MBX_H_ */
+--- linux/drivers/xenomai/net/drivers/igb/e1000_phy.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_phy.c	2021-04-07 16:01:27.472633816 +0800
+@@ -0,0 +1,2512 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#include <linux/if_ether.h>
++#include <linux/delay.h>
++
++#include "e1000_mac.h"
++#include "e1000_phy.h"
++
++static s32  igb_phy_setup_autoneg(struct e1000_hw *hw);
++static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw,
++					     u16 *phy_ctrl);
++static s32  igb_wait_autoneg(struct e1000_hw *hw);
++static s32  igb_set_master_slave_mode(struct e1000_hw *hw);
++
++/* Cable length tables */
++static const u16 e1000_m88_cable_length_table[] = {
++	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
++	(sizeof(e1000_m88_cable_length_table) / \
++	sizeof(e1000_m88_cable_length_table[0]))
++
++static const u16 e1000_igp_2_cable_length_table[] = {
++	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
++	0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
++	6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
++	21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
++	40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
++	60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
++	83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
++	104, 109, 114, 118, 121, 124};
++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
++	(sizeof(e1000_igp_2_cable_length_table) / \
++	 sizeof(e1000_igp_2_cable_length_table[0]))
++
++/**
++ *  igb_check_reset_block - Check if PHY reset is blocked
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the PHY management control register and check whether a PHY reset
++ *  is blocked.  If a reset is not blocked return 0, otherwise
++ *  return E1000_BLK_PHY_RESET (12).
++ **/
++s32 igb_check_reset_block(struct e1000_hw *hw)
++{
++	u32 manc;
++
++	manc = rd32(E1000_MANC);
++
++	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0;
++}
++
++/**
++ *  igb_get_phy_id - Retrieve the PHY ID and revision
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
++ *  revision in the hardware structure.
++ **/
++s32 igb_get_phy_id(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_id;
++
++	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
++	if (ret_val)
++		goto out;
++
++	phy->id = (u32)(phy_id << 16);
++	udelay(20);
++	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
++	if (ret_val)
++		goto out;
++
++	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
++	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_reset_dsp - Reset PHY DSP
++ *  @hw: pointer to the HW structure
++ *
++ *  Reset the digital signal processor.
++ **/
++static s32 igb_phy_reset_dsp(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	if (!(hw->phy.ops.write_reg))
++		goto out;
++
++	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
++	if (ret_val)
++		goto out;
++
++	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_read_phy_reg_mdic - Read MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the MDI control regsiter in the PHY at offset and stores the
++ *  information read to data.
++ **/
++s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++	s32 ret_val = 0;
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		hw_dbg("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/* Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
++		(phy->addr << E1000_MDIC_PHY_SHIFT) |
++		(E1000_MDIC_OP_READ));
++
++	wr32(E1000_MDIC, mdic);
++
++	/* Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		udelay(50);
++		mdic = rd32(E1000_MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		hw_dbg("MDI Read did not complete\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		hw_dbg("MDI Error\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	*data = (u16) mdic;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_phy_reg_mdic - Write MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write to register at offset
++ *
++ *  Writes data to MDI control register in the PHY at offset.
++ **/
++s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++	s32 ret_val = 0;
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		hw_dbg("PHY Address %d is out of range\n", offset);
++		ret_val = -E1000_ERR_PARAM;
++		goto out;
++	}
++
++	/* Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = (((u32)data) |
++		(offset << E1000_MDIC_REG_SHIFT) |
++		(phy->addr << E1000_MDIC_PHY_SHIFT) |
++		(E1000_MDIC_OP_WRITE));
++
++	wr32(E1000_MDIC, mdic);
++
++	/* Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		udelay(50);
++		mdic = rd32(E1000_MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		hw_dbg("MDI Write did not complete\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		hw_dbg("MDI Error\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_read_phy_reg_i2c - Read PHY register using i2c
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY register at offset using the i2c interface and stores the
++ *  retrieved information in data.
++ **/
++s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, i2ccmd = 0;
++
++	/* Set up Op-code, Phy Address, and register address in the I2CCMD
++	 * register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
++		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
++		  (E1000_I2CCMD_OPCODE_READ));
++
++	wr32(E1000_I2CCMD, i2ccmd);
++
++	/* Poll the ready bit to see if the I2C read completed */
++	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
++		udelay(50);
++		i2ccmd = rd32(E1000_I2CCMD);
++		if (i2ccmd & E1000_I2CCMD_READY)
++			break;
++	}
++	if (!(i2ccmd & E1000_I2CCMD_READY)) {
++		hw_dbg("I2CCMD Read did not complete\n");
++		return -E1000_ERR_PHY;
++	}
++	if (i2ccmd & E1000_I2CCMD_ERROR) {
++		hw_dbg("I2CCMD Error bit set\n");
++		return -E1000_ERR_PHY;
++	}
++
++	/* Need to byte-swap the 16-bit value. */
++	*data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00);
++
++	return 0;
++}
++
++/**
++ *  igb_write_phy_reg_i2c - Write PHY register using i2c
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes the data to PHY register at the offset using the i2c interface.
++ **/
++s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, i2ccmd = 0;
++	u16 phy_data_swapped;
++
++	/* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/
++	if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
++		hw_dbg("PHY I2C Address %d is out of range.\n",
++			  hw->phy.addr);
++		return -E1000_ERR_CONFIG;
++	}
++
++	/* Swap the data bytes for the I2C interface */
++	phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00);
++
++	/* Set up Op-code, Phy Address, and register address in the I2CCMD
++	 * register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
++		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
++		  E1000_I2CCMD_OPCODE_WRITE |
++		  phy_data_swapped);
++
++	wr32(E1000_I2CCMD, i2ccmd);
++
++	/* Poll the ready bit to see if the I2C read completed */
++	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
++		udelay(50);
++		i2ccmd = rd32(E1000_I2CCMD);
++		if (i2ccmd & E1000_I2CCMD_READY)
++			break;
++	}
++	if (!(i2ccmd & E1000_I2CCMD_READY)) {
++		hw_dbg("I2CCMD Write did not complete\n");
++		return -E1000_ERR_PHY;
++	}
++	if (i2ccmd & E1000_I2CCMD_ERROR) {
++		hw_dbg("I2CCMD Error bit set\n");
++		return -E1000_ERR_PHY;
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_read_sfp_data_byte - Reads SFP module data.
++ *  @hw: pointer to the HW structure
++ *  @offset: byte location offset to be read
++ *  @data: read data buffer pointer
++ *
++ *  Reads one byte from SFP module data stored
++ *  in SFP resided EEPROM memory or SFP diagnostic area.
++ *  Function should be called with
++ *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
++ *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
++ *  access
++ **/
++s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data)
++{
++	u32 i = 0;
++	u32 i2ccmd = 0;
++	u32 data_local = 0;
++
++	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
++		hw_dbg("I2CCMD command address exceeds upper limit\n");
++		return -E1000_ERR_PHY;
++	}
++
++	/* Set up Op-code, EEPROM Address,in the I2CCMD
++	 * register. The MAC will take care of interfacing with the
++	 * EEPROM to retrieve the desired data.
++	 */
++	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
++		  E1000_I2CCMD_OPCODE_READ);
++
++	wr32(E1000_I2CCMD, i2ccmd);
++
++	/* Poll the ready bit to see if the I2C read completed */
++	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
++		udelay(50);
++		data_local = rd32(E1000_I2CCMD);
++		if (data_local & E1000_I2CCMD_READY)
++			break;
++	}
++	if (!(data_local & E1000_I2CCMD_READY)) {
++		hw_dbg("I2CCMD Read did not complete\n");
++		return -E1000_ERR_PHY;
++	}
++	if (data_local & E1000_I2CCMD_ERROR) {
++		hw_dbg("I2CCMD Error bit set\n");
++		return -E1000_ERR_PHY;
++	}
++	*data = (u8) data_local & 0xFF;
++
++	return 0;
++}
++
++/**
++ *  igb_read_phy_reg_igp - Read igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val = 0;
++
++	if (!(hw->phy.ops.acquire))
++		goto out;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = igb_write_phy_reg_mdic(hw,
++						 IGP01E1000_PHY_PAGE_SELECT,
++						 (u16)offset);
++		if (ret_val) {
++			hw->phy.ops.release(hw);
++			goto out;
++		}
++	}
++
++	ret_val = igb_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_phy_reg_igp - Write igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val = 0;
++
++	if (!(hw->phy.ops.acquire))
++		goto out;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = igb_write_phy_reg_mdic(hw,
++						 IGP01E1000_PHY_PAGE_SELECT,
++						 (u16)offset);
++		if (ret_val) {
++			hw->phy.ops.release(hw);
++			goto out;
++		}
++	}
++
++	ret_val = igb_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					 data);
++
++	hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_copper_link_setup_82580 - Setup 82580 PHY for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up Carrier-sense on Transmit and downshift values.
++ **/
++s32 igb_copper_link_setup_82580(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++
++	if (phy->reset_disable) {
++		ret_val = 0;
++		goto out;
++	}
++
++	if (phy->type == e1000_phy_82580) {
++		ret_val = hw->phy.ops.reset(hw);
++		if (ret_val) {
++			hw_dbg("Error resetting the PHY.\n");
++			goto out;
++		}
++	}
++
++	/* Enable CRS on TX. This must be set for half-duplex operation. */
++	ret_val = phy->ops.read_reg(hw, I82580_CFG_REG, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= I82580_CFG_ASSERT_CRS_ON_TX;
++
++	/* Enable downshift */
++	phy_data |= I82580_CFG_ENABLE_DOWNSHIFT;
++
++	ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Set MDI/MDIX mode */
++	ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data);
++	if (ret_val)
++		goto out;
++	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
++	/* Options:
++	 *   0 - Auto (default)
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 */
++	switch (hw->phy.mdix) {
++	case 1:
++		break;
++	case 2:
++		phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX;
++		break;
++	case 0:
++	default:
++		phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX;
++		break;
++	}
++	ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_copper_link_setup_m88 - Setup m88 PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
++ *  and downshift values are set also.
++ **/
++s32 igb_copper_link_setup_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++
++	if (phy->reset_disable) {
++		ret_val = 0;
++		goto out;
++	}
++
++	/* Enable CRS on TX. This must be set for half-duplex operation. */
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++
++	/* Options:
++	 *   MDI/MDI-X = 0 (default)
++	 *   0 - Auto for all speeds
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++	 */
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++
++	switch (phy->mdix) {
++	case 1:
++		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++		break;
++	case 2:
++		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++		break;
++	case 3:
++		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++		break;
++	case 0:
++	default:
++		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++		break;
++	}
++
++	/* Options:
++	 *   disable_polarity_correction = 0 (default)
++	 *       Automatic Correction for Reversed Cable Polarity
++	 *   0 - Disabled
++	 *   1 - Enabled
++	 */
++	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++	if (phy->disable_polarity_correction == 1)
++		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++
++	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	if (phy->revision < E1000_REVISION_4) {
++		/* Force TX_CLK in the Extended PHY Specific Control Register
++		 * to 25MHz clock.
++		 */
++		ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
++					    &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy_data |= M88E1000_EPSCR_TX_CLK_25;
++
++		if ((phy->revision == E1000_REVISION_2) &&
++		    (phy->id == M88E1111_I_PHY_ID)) {
++			/* 82573L PHY - set the downshift counter to 5x. */
++			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
++			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
++		} else {
++			/* Configure Master and Slave downshift values */
++			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
++				      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
++			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
++				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
++		}
++		ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
++					     phy_data);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Commit the changes. */
++	ret_val = igb_phy_sw_reset(hw);
++	if (ret_val) {
++		hw_dbg("Error committing the PHY changes\n");
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's.
++ *  Also enables and sets the downshift parameters.
++ **/
++s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++
++	if (phy->reset_disable)
++		return 0;
++
++	/* Enable CRS on Tx. This must be set for half-duplex operation. */
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/* Options:
++	 *   MDI/MDI-X = 0 (default)
++	 *   0 - Auto for all speeds
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++	 */
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++
++	switch (phy->mdix) {
++	case 1:
++		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++		break;
++	case 2:
++		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++		break;
++	case 3:
++		/* M88E1112 does not support this mode) */
++		if (phy->id != M88E1112_E_PHY_ID) {
++			phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++			break;
++		}
++	case 0:
++	default:
++		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++		break;
++	}
++
++	/* Options:
++	 *   disable_polarity_correction = 0 (default)
++	 *       Automatic Correction for Reversed Cable Polarity
++	 *   0 - Disabled
++	 *   1 - Enabled
++	 */
++	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++	if (phy->disable_polarity_correction == 1)
++		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++
++	/* Enable downshift and setting it to X6 */
++	if (phy->id == M88E1543_E_PHY_ID) {
++		phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE;
++		ret_val =
++		    phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = igb_phy_sw_reset(hw);
++		if (ret_val) {
++			hw_dbg("Error committing the PHY changes\n");
++			return ret_val;
++		}
++	}
++
++	phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK;
++	phy_data |= I347AT4_PSCR_DOWNSHIFT_6X;
++	phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE;
++
++	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/* Commit the changes. */
++	ret_val = igb_phy_sw_reset(hw);
++	if (ret_val) {
++		hw_dbg("Error committing the PHY changes\n");
++		return ret_val;
++	}
++	ret_val = igb_set_master_slave_mode(hw);
++	if (ret_val)
++		return ret_val;
++
++	return 0;
++}
++
++/**
++ *  igb_copper_link_setup_igp - Setup igp PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
++ *  igp PHY's.
++ **/
++s32 igb_copper_link_setup_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	if (phy->reset_disable) {
++		ret_val = 0;
++		goto out;
++	}
++
++	ret_val = phy->ops.reset(hw);
++	if (ret_val) {
++		hw_dbg("Error resetting the PHY.\n");
++		goto out;
++	}
++
++	/* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
++	 * timeout issues when LFS is enabled.
++	 */
++	msleep(100);
++
++	/* The NVM settings will configure LPLU in D3 for
++	 * non-IGP1 PHYs.
++	 */
++	if (phy->type == e1000_phy_igp) {
++		/* disable lplu d3 during driver init */
++		if (phy->ops.set_d3_lplu_state)
++			ret_val = phy->ops.set_d3_lplu_state(hw, false);
++		if (ret_val) {
++			hw_dbg("Error Disabling LPLU D3\n");
++			goto out;
++		}
++	}
++
++	/* disable lplu d0 during driver init */
++	ret_val = phy->ops.set_d0_lplu_state(hw, false);
++	if (ret_val) {
++		hw_dbg("Error Disabling LPLU D0\n");
++		goto out;
++	}
++	/* Configure mdi-mdix settings */
++	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
++	if (ret_val)
++		goto out;
++
++	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++
++	switch (phy->mdix) {
++	case 1:
++		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 2:
++		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 0:
++	default:
++		data |= IGP01E1000_PSCR_AUTO_MDIX;
++		break;
++	}
++	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
++	if (ret_val)
++		goto out;
++
++	/* set auto-master slave resolution settings */
++	if (hw->mac.autoneg) {
++		/* when autonegotiation advertisement is only 1000Mbps then we
++		 * should disable SmartSpeed and enable Auto MasterSlave
++		 * resolution as hardware default.
++		 */
++		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
++			/* Disable SmartSpeed */
++			ret_val = phy->ops.read_reg(hw,
++						    IGP01E1000_PHY_PORT_CONFIG,
++						    &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = phy->ops.write_reg(hw,
++						     IGP01E1000_PHY_PORT_CONFIG,
++						     data);
++			if (ret_val)
++				goto out;
++
++			/* Set auto Master/Slave resolution process */
++			ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~CR_1000T_MS_ENABLE;
++			ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
++			if (ret_val)
++				goto out;
++		}
++
++		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
++		if (ret_val)
++			goto out;
++
++		/* load defaults for future use */
++		phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ?
++			((data & CR_1000T_MS_VALUE) ?
++			e1000_ms_force_master :
++			e1000_ms_force_slave) :
++			e1000_ms_auto;
++
++		switch (phy->ms_type) {
++		case e1000_ms_force_master:
++			data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_force_slave:
++			data |= CR_1000T_MS_ENABLE;
++			data &= ~(CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_auto:
++			data &= ~CR_1000T_MS_ENABLE;
++		default:
++			break;
++		}
++		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_copper_link_autoneg - Setup/Enable autoneg for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs initial bounds checking on autoneg advertisement parameter, then
++ *  configure to advertise the full capability.  Setup the PHY to autoneg
++ *  and restart the negotiation process between the link partner.  If
++ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
++ **/
++static s32 igb_copper_link_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_ctrl;
++
++	/* Perform some bounds checking on the autoneg advertisement
++	 * parameter.
++	 */
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/* If autoneg_advertised is zero, we assume it was not defaulted
++	 * by the calling code so we set to advertise full capability.
++	 */
++	if (phy->autoneg_advertised == 0)
++		phy->autoneg_advertised = phy->autoneg_mask;
++
++	hw_dbg("Reconfiguring auto-neg advertisement params\n");
++	ret_val = igb_phy_setup_autoneg(hw);
++	if (ret_val) {
++		hw_dbg("Error Setting up Auto-Negotiation\n");
++		goto out;
++	}
++	hw_dbg("Restarting Auto-Neg\n");
++
++	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
++	 * the Auto Neg Restart bit in the PHY control register.
++	 */
++	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
++	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	/* Does the user want to wait for Auto-Neg to complete here, or
++	 * check at a later time (for example, callback routine).
++	 */
++	if (phy->autoneg_wait_to_complete) {
++		ret_val = igb_wait_autoneg(hw);
++		if (ret_val) {
++			hw_dbg("Error while waiting for autoneg to complete\n");
++			goto out;
++		}
++	}
++
++	hw->mac.get_link_status = true;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_setup_autoneg - Configure PHY for auto-negotiation
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the MII auto-neg advertisement register and/or the 1000T control
++ *  register and if the PHY is already setup for auto-negotiation, then
++ *  return successful.  Otherwise, setup advertisement and flow control to
++ *  the appropriate values for the wanted auto-negotiation.
++ **/
++static s32 igb_phy_setup_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 mii_autoneg_adv_reg;
++	u16 mii_1000t_ctrl_reg = 0;
++
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
++	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++	if (ret_val)
++		goto out;
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
++		/* Read the MII 1000Base-T Control Register (Address 9). */
++		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
++					    &mii_1000t_ctrl_reg);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Need to parse both autoneg_advertised and fc and set up
++	 * the appropriate PHY registers.  First we will parse for
++	 * autoneg_advertised software override.  Since we can advertise
++	 * a plethora of combinations, we need to check each bit
++	 * individually.
++	 */
++
++	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
++	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
++	 * the  1000Base-T Control Register (Address 9).
++	 */
++	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
++				 NWAY_AR_100TX_HD_CAPS |
++				 NWAY_AR_10T_FD_CAPS   |
++				 NWAY_AR_10T_HD_CAPS);
++	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
++
++	hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
++
++	/* Do we want to advertise 10 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
++		hw_dbg("Advertise 10mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
++	}
++
++	/* Do we want to advertise 10 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
++		hw_dbg("Advertise 10mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
++		hw_dbg("Advertise 100mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
++		hw_dbg("Advertise 100mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
++	}
++
++	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
++	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
++		hw_dbg("Advertise 1000mb Half duplex request denied!\n");
++
++	/* Do we want to advertise 1000 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
++		hw_dbg("Advertise 1000mb Full duplex\n");
++		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
++	}
++
++	/* Check for a software override of the flow control settings, and
++	 * setup the PHY advertisement registers accordingly.  If
++	 * auto-negotiation is enabled, then software will have to set the
++	 * "PAUSE" bits to the correct value in the Auto-Negotiation
++	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
++	 * negotiation.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause frames
++	 *          but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *          but we do not support receiving pause frames).
++	 *      3:  Both Rx and TX flow control (symmetric) are enabled.
++	 *  other:  No software override.  The flow control configuration
++	 *          in the EEPROM is used.
++	 */
++	switch (hw->fc.current_mode) {
++	case e1000_fc_none:
++		/* Flow control (RX & TX) is completely disabled by a
++		 * software over-ride.
++		 */
++		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_rx_pause:
++		/* RX Flow control is enabled, and TX Flow control is
++		 * disabled, by a software over-ride.
++		 *
++		 * Since there really isn't a way to advertise that we are
++		 * capable of RX Pause ONLY, we will advertise that we
++		 * support both symmetric and asymmetric RX PAUSE.  Later
++		 * (in e1000_config_fc_after_link_up) we will disable the
++		 * hw's ability to send PAUSE frames.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_tx_pause:
++		/* TX Flow control is enabled, and RX Flow control is
++		 * disabled, by a software over-ride.
++		 */
++		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
++		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
++		break;
++	case e1000_fc_full:
++		/* Flow control (both RX and TX) is enabled by a software
++		 * over-ride.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	default:
++		hw_dbg("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
++	if (ret_val)
++		goto out;
++
++	hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
++		ret_val = phy->ops.write_reg(hw,
++					     PHY_1000T_CTRL,
++					     mii_1000t_ctrl_reg);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_setup_copper_link - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the appropriate function to configure the link for auto-neg or forced
++ *  speed and duplex.  Then we check for link, once link is established calls
++ *  to configure collision distance and flow control are called.  If link is
++ *  not established, we return -E1000_ERR_PHY (-2).
++ **/
++s32 igb_setup_copper_link(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	bool link;
++
++	if (hw->mac.autoneg) {
++		/* Setup autoneg and flow control advertisement and perform
++		 * autonegotiation.
++		 */
++		ret_val = igb_copper_link_autoneg(hw);
++		if (ret_val)
++			goto out;
++	} else {
++		/* PHY will be set to 10H, 10F, 100H or 100F
++		 * depending on user settings.
++		 */
++		hw_dbg("Forcing Speed and Duplex\n");
++		ret_val = hw->phy.ops.force_speed_duplex(hw);
++		if (ret_val) {
++			hw_dbg("Error Forcing Speed and Duplex\n");
++			goto out;
++		}
++	}
++
++	/* Check link status. Wait up to 100 microseconds for link to become
++	 * valid.
++	 */
++	ret_val = igb_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
++	if (ret_val)
++		goto out;
++
++	if (link) {
++		hw_dbg("Valid link established!!!\n");
++		igb_config_collision_dist(hw);
++		ret_val = igb_config_fc_after_link_up(hw);
++	} else {
++		hw_dbg("Unable to establish link!!!\n");
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Waits for link and returns
++ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
++ **/
++s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	igb_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++
++	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	hw_dbg("IGP PSCR: %X\n", phy_data);
++
++	udelay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		hw_dbg("Waiting for forced speed/duplex link on IGP phy.\n");
++
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link);
++		if (ret_val)
++			goto out;
++
++		if (!link)
++			hw_dbg("Link taking longer than expected.\n");
++
++		/* Try once more */
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
++ *  changes.  If time expires while waiting for link up, we reset the DSP.
++ *  After reset, TX_CLK and CRS on TX must be set.  Return successful upon
++ *  successful completion, else return corresponding error code.
++ **/
++s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	/* I210 and I211 devices support Auto-Crossover in forced operation. */
++	if (phy->type != e1000_phy_i210) {
++		/* Clear Auto-Crossover to force MDI manually.  M88E1000
++		 * requires MDI forced whenever speed and duplex are forced.
++		 */
++		ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL,
++					    &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++		ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL,
++					     phy_data);
++		if (ret_val)
++			goto out;
++
++		hw_dbg("M88E1000 PSCR: %X\n", phy_data);
++	}
++
++	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	igb_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Reset the phy to commit changes. */
++	ret_val = igb_phy_sw_reset(hw);
++	if (ret_val)
++		goto out;
++
++	if (phy->autoneg_wait_to_complete) {
++		hw_dbg("Waiting for forced speed/duplex link on M88 phy.\n");
++
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
++		if (ret_val)
++			goto out;
++
++		if (!link) {
++			bool reset_dsp = true;
++
++			switch (hw->phy.id) {
++			case I347AT4_E_PHY_ID:
++			case M88E1112_E_PHY_ID:
++			case I210_I_PHY_ID:
++				reset_dsp = false;
++				break;
++			default:
++				if (hw->phy.type != e1000_phy_m88)
++					reset_dsp = false;
++				break;
++			}
++			if (!reset_dsp)
++				hw_dbg("Link taking longer than expected.\n");
++			else {
++				/* We didn't get link.
++				 * Reset the DSP and cross our fingers.
++				 */
++				ret_val = phy->ops.write_reg(hw,
++						M88E1000_PHY_PAGE_SELECT,
++						0x001d);
++				if (ret_val)
++					goto out;
++				ret_val = igb_phy_reset_dsp(hw);
++				if (ret_val)
++					goto out;
++			}
++		}
++
++		/* Try once more */
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT,
++					   100000, &link);
++		if (ret_val)
++			goto out;
++	}
++
++	if (hw->phy.type != e1000_phy_m88 ||
++	    hw->phy.id == I347AT4_E_PHY_ID ||
++	    hw->phy.id == M88E1112_E_PHY_ID ||
++	    hw->phy.id == I210_I_PHY_ID)
++		goto out;
++
++	ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Resetting the phy means we need to re-force TX_CLK in the
++	 * Extended PHY Specific Control Register to 25MHz clock from
++	 * the reset value of 2.5MHz.
++	 */
++	phy_data |= M88E1000_EPSCR_TX_CLK_25;
++	ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* In addition, we must re-enable CRS on Tx for both half and full
++	 * duplex.
++	 */
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @phy_ctrl: pointer to current value of PHY_CONTROL
++ *
++ *  Forces speed and duplex on the PHY by doing the following: disable flow
++ *  control, force speed/duplex on the MAC, disable auto speed detection,
++ *  disable auto-negotiation, configure duplex, configure speed, configure
++ *  the collision distance, write configuration to CTRL register.  The
++ *  caller must write to the PHY_CONTROL register for these settings to
++ *  take affect.
++ **/
++static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw,
++					     u16 *phy_ctrl)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 ctrl;
++
++	/* Turn off flow control when forcing speed/duplex */
++	hw->fc.current_mode = e1000_fc_none;
++
++	/* Force speed/duplex on the mac */
++	ctrl = rd32(E1000_CTRL);
++	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ctrl &= ~E1000_CTRL_SPD_SEL;
++
++	/* Disable Auto Speed Detection */
++	ctrl &= ~E1000_CTRL_ASDE;
++
++	/* Disable autoneg on the phy */
++	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
++
++	/* Forcing Full or Half Duplex? */
++	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
++		ctrl &= ~E1000_CTRL_FD;
++		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
++		hw_dbg("Half Duplex\n");
++	} else {
++		ctrl |= E1000_CTRL_FD;
++		*phy_ctrl |= MII_CR_FULL_DUPLEX;
++		hw_dbg("Full Duplex\n");
++	}
++
++	/* Forcing 10mb or 100mb? */
++	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
++		ctrl |= E1000_CTRL_SPD_100;
++		*phy_ctrl |= MII_CR_SPEED_100;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
++		hw_dbg("Forcing 100mb\n");
++	} else {
++		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++		*phy_ctrl |= MII_CR_SPEED_10;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
++		hw_dbg("Forcing 10mb\n");
++	}
++
++	igb_config_collision_dist(hw);
++
++	wr32(E1000_CTRL, ctrl);
++}
++
++/**
++ *  igb_set_d3_lplu_state - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.
++ **/
++s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 data;
++
++	if (!(hw->phy.ops.read_reg))
++		goto out;
++
++	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		goto out;
++
++	if (!active) {
++		data &= ~IGP02E1000_PM_D3_LPLU;
++		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++					     data);
++		if (ret_val)
++			goto out;
++		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = phy->ops.read_reg(hw,
++						    IGP01E1000_PHY_PORT_CONFIG,
++						    &data);
++			if (ret_val)
++				goto out;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = phy->ops.write_reg(hw,
++						     IGP01E1000_PHY_PORT_CONFIG,
++						     data);
++			if (ret_val)
++				goto out;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = phy->ops.read_reg(hw,
++						     IGP01E1000_PHY_PORT_CONFIG,
++						     &data);
++			if (ret_val)
++				goto out;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = phy->ops.write_reg(hw,
++						     IGP01E1000_PHY_PORT_CONFIG,
++						     data);
++			if (ret_val)
++				goto out;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= IGP02E1000_PM_D3_LPLU;
++		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++					      data);
++		if (ret_val)
++			goto out;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++					    &data);
++		if (ret_val)
++			goto out;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++					     data);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_check_downshift - Checks whether a downshift in speed occurred
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  A downshift is detected by querying the PHY link health.
++ **/
++s32 igb_check_downshift(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, offset, mask;
++
++	switch (phy->type) {
++	case e1000_phy_i210:
++	case e1000_phy_m88:
++	case e1000_phy_gg82563:
++		offset	= M88E1000_PHY_SPEC_STATUS;
++		mask	= M88E1000_PSSR_DOWNSHIFT;
++		break;
++	case e1000_phy_igp_2:
++	case e1000_phy_igp:
++	case e1000_phy_igp_3:
++		offset	= IGP01E1000_PHY_LINK_HEALTH;
++		mask	= IGP01E1000_PLHR_SS_DOWNGRADE;
++		break;
++	default:
++		/* speed downshift not supported */
++		phy->speed_downgraded = false;
++		ret_val = 0;
++		goto out;
++	}
++
++	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
++
++	if (!ret_val)
++		phy->speed_downgraded = (phy_data & mask) ? true : false;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_check_polarity_m88 - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY specific status register.
++ **/
++s32 igb_check_polarity_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY)
++				      ? e1000_rev_polarity_reversed
++				      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_polarity_igp - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY port status register, and the
++ *  current speed (since there is no polarity at 100Mbps).
++ **/
++static s32 igb_check_polarity_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data, offset, mask;
++
++	/* Polarity is determined based on the speed of
++	 * our connection.
++	 */
++	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		goto out;
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		offset	= IGP01E1000_PHY_PCS_INIT_REG;
++		mask	= IGP01E1000_PHY_POLARITY_MASK;
++	} else {
++		/* This really only applies to 10Mbps since
++		 * there is no polarity for 100Mbps (always 0).
++		 */
++		offset	= IGP01E1000_PHY_PORT_STATUS;
++		mask	= IGP01E1000_PSSR_POLARITY_REVERSED;
++	}
++
++	ret_val = phy->ops.read_reg(hw, offset, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & mask)
++				      ? e1000_rev_polarity_reversed
++				      : e1000_rev_polarity_normal;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_wait_autoneg - Wait for auto-neg completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Waits for auto-negotiation to complete or for the auto-negotiation time
++ *  limit to expire, which ever happens first.
++ **/
++static s32 igb_wait_autoneg(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 i, phy_status;
++
++	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
++	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_AUTONEG_COMPLETE)
++			break;
++		msleep(100);
++	}
++
++	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
++	 * has completed.
++	 */
++	return ret_val;
++}
++
++/**
++ *  igb_phy_has_link - Polls PHY for link
++ *  @hw: pointer to the HW structure
++ *  @iterations: number of times to poll for link
++ *  @usec_interval: delay between polling attempts
++ *  @success: pointer to whether polling was successful or not
++ *
++ *  Polls the PHY status register for link, 'iterations' number of times.
++ **/
++s32 igb_phy_has_link(struct e1000_hw *hw, u32 iterations,
++		     u32 usec_interval, bool *success)
++{
++	s32 ret_val = 0;
++	u16 i, phy_status;
++
++	for (i = 0; i < iterations; i++) {
++		/* Some PHYs require the PHY_STATUS register to be read
++		 * twice due to the link bit being sticky.  No harm doing
++		 * it across the board.
++		 */
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val && usec_interval > 0) {
++			/* If the first read fails, another entity may have
++			 * ownership of the resources, wait and try again to
++			 * see if they have relinquished the resources yet.
++			 */
++			if (usec_interval >= 1000)
++				mdelay(usec_interval/1000);
++			else
++				udelay(usec_interval);
++		}
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_LINK_STATUS)
++			break;
++		if (usec_interval >= 1000)
++			mdelay(usec_interval/1000);
++		else
++			udelay(usec_interval);
++	}
++
++	*success = (i < iterations) ? true : false;
++
++	return ret_val;
++}
++
++/**
++ *  igb_get_cable_length_m88 - Determine cable length for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY specific status register to retrieve the cable length
++ *  information.  The cable length is determined by averaging the minimum and
++ *  maximum values to get the "average" cable length.  The m88 PHY has four
++ *  possible cable length values, which are:
++ *	Register Value		Cable Length
++ *	0			< 50 meters
++ *	1			50 - 80 meters
++ *	2			80 - 110 meters
++ *	3			110 - 140 meters
++ *	4			> 140 meters
++ **/
++s32 igb_get_cable_length_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, index;
++
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++		M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++	phy->min_cable_length = e1000_m88_cable_length_table[index];
++	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, phy_data2, index, default_page, is_cm;
++
++	switch (hw->phy.id) {
++	case I210_I_PHY_ID:
++		/* Get cable length from PHY Cable Diagnostics Control Reg */
++		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
++					    (I347AT4_PCDL + phy->addr),
++					    &phy_data);
++		if (ret_val)
++			return ret_val;
++
++		/* Check if the unit of cable length is meters or cm */
++		ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) +
++					    I347AT4_PCDC, &phy_data2);
++		if (ret_val)
++			return ret_val;
++
++		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
++
++		/* Populate the phy structure with cable length in meters */
++		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
++		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
++		phy->cable_length = phy_data / (is_cm ? 100 : 1);
++		break;
++	case M88E1543_E_PHY_ID:
++	case I347AT4_E_PHY_ID:
++		/* Remember the original page select and set it to 7 */
++		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
++					    &default_page);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
++		if (ret_val)
++			goto out;
++
++		/* Get cable length from PHY Cable Diagnostics Control Reg */
++		ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
++					    &phy_data);
++		if (ret_val)
++			goto out;
++
++		/* Check if the unit of cable length is meters or cm */
++		ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
++		if (ret_val)
++			goto out;
++
++		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
++
++		/* Populate the phy structure with cable length in meters */
++		phy->min_cable_length = phy_data / (is_cm ? 100 : 1);
++		phy->max_cable_length = phy_data / (is_cm ? 100 : 1);
++		phy->cable_length = phy_data / (is_cm ? 100 : 1);
++
++		/* Reset the page selec to its original value */
++		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
++					     default_page);
++		if (ret_val)
++			goto out;
++		break;
++	case M88E1112_E_PHY_ID:
++		/* Remember the original page select and set it to 5 */
++		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
++					    &default_page);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE,
++					    &phy_data);
++		if (ret_val)
++			goto out;
++
++		index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++			M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++		if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++
++		phy->min_cable_length = e1000_m88_cable_length_table[index];
++		phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
++
++		phy->cable_length = (phy->min_cable_length +
++				     phy->max_cable_length) / 2;
++
++		/* Reset the page select to its original value */
++		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
++					     default_page);
++		if (ret_val)
++			goto out;
++
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_cable_length_igp_2 - Determine cable length for igp2 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  The automatic gain control (agc) normalizes the amplitude of the
++ *  received signal, adjusting for the attenuation produced by the
++ *  cable.  By reading the AGC registers, which represent the
++ *  combination of coarse and fine gain value, the value can be put
++ *  into a lookup table to obtain the approximate cable length
++ *  for each channel.
++ **/
++s32 igb_get_cable_length_igp_2(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_data, i, agc_value = 0;
++	u16 cur_agc_index, max_agc_index = 0;
++	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
++	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
++		IGP02E1000_PHY_AGC_A,
++		IGP02E1000_PHY_AGC_B,
++		IGP02E1000_PHY_AGC_C,
++		IGP02E1000_PHY_AGC_D
++	};
++
++	/* Read the AGC registers for all channels */
++	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
++		ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data);
++		if (ret_val)
++			goto out;
++
++		/* Getting bits 15:9, which represent the combination of
++		 * coarse and fine gain values.  The result is a number
++		 * that can be put into the lookup table to obtain the
++		 * approximate cable length.
++		 */
++		cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
++				IGP02E1000_AGC_LENGTH_MASK;
++
++		/* Array index bound check. */
++		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
++		    (cur_agc_index == 0)) {
++			ret_val = -E1000_ERR_PHY;
++			goto out;
++		}
++
++		/* Remove min & max AGC values from calculation. */
++		if (e1000_igp_2_cable_length_table[min_agc_index] >
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			min_agc_index = cur_agc_index;
++		if (e1000_igp_2_cable_length_table[max_agc_index] <
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			max_agc_index = cur_agc_index;
++
++		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
++	}
++
++	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
++		      e1000_igp_2_cable_length_table[max_agc_index]);
++	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
++
++	/* Calculate cable length with the error range of +/- 10 meters. */
++	phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
++				 (agc_value - IGP02E1000_AGC_RANGE) : 0;
++	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_phy_info_m88 - Retrieve PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Valid for only copper links.  Read the PHY status register (sticky read)
++ *  to verify that link is up.  Read the PHY special control register to
++ *  determine the polarity and 10base-T extended distance.  Read the PHY
++ *  special status register to determine MDI/MDIx and current speed.  If
++ *  speed is 1000, then determine cable length, local and remote receiver.
++ **/
++s32 igb_get_phy_info_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u16 phy_data;
++	bool link;
++
++	if (phy->media_type != e1000_media_type_copper) {
++		hw_dbg("Phy info is only valid for copper media\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = igb_phy_has_link(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		hw_dbg("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy->polarity_correction = (phy_data & M88E1000_PSCR_POLARITY_REVERSAL)
++				   ? true : false;
++
++	ret_val = igb_check_polarity_m88(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX) ? true : false;
++
++	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
++		ret_val = phy->ops.get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
++				? e1000_1000t_rx_status_ok
++				: e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
++				 ? e1000_1000t_rx_status_ok
++				 : e1000_1000t_rx_status_not_ok;
++	} else {
++		/* Set values to "undefined" */
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_phy_info_igp - Retrieve igp PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Read PHY status to determine if link is up.  If link is up, then
++ *  set/determine 10base-T extended distance and polarity correction.  Read
++ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
++ *  determine on the cable length, local and remote receiver.
++ **/
++s32 igb_get_phy_info_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = igb_phy_has_link(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		hw_dbg("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	phy->polarity_correction = true;
++
++	ret_val = igb_check_polarity_igp(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & IGP01E1000_PSSR_MDIX) ? true : false;
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		ret_val = phy->ops.get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
++				? e1000_1000t_rx_status_ok
++				: e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
++				 ? e1000_1000t_rx_status_ok
++				 : e1000_1000t_rx_status_not_ok;
++	} else {
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_sw_reset - PHY software reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Does a software reset of the PHY by reading the PHY control register and
++ *  setting/write the control register reset bit to the PHY.
++ **/
++s32 igb_phy_sw_reset(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 phy_ctrl;
++
++	if (!(hw->phy.ops.read_reg))
++		goto out;
++
++	ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	phy_ctrl |= MII_CR_RESET;
++	ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		goto out;
++
++	udelay(1);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_hw_reset - PHY hardware reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify the reset block is not blocking us from resetting.  Acquire
++ *  semaphore (if necessary) and read/set/write the device control reset
++ *  bit in the PHY.  Wait the appropriate delay time for the device to
++ *  reset and release the semaphore (if necessary).
++ **/
++s32 igb_phy_hw_reset(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u32 ctrl;
++
++	ret_val = igb_check_reset_block(hw);
++	if (ret_val) {
++		ret_val = 0;
++		goto out;
++	}
++
++	ret_val = phy->ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	ctrl = rd32(E1000_CTRL);
++	wr32(E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
++	wrfl();
++
++	udelay(phy->reset_delay_us);
++
++	wr32(E1000_CTRL, ctrl);
++	wrfl();
++
++	udelay(150);
++
++	phy->ops.release(hw);
++
++	ret_val = phy->ops.get_cfg_done(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_phy_init_script_igp3 - Inits the IGP3 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
++ **/
++s32 igb_phy_init_script_igp3(struct e1000_hw *hw)
++{
++	hw_dbg("Running IGP 3 PHY init script\n");
++
++	/* PHY init IGP 3 */
++	/* Enable rise/fall, 10-mode work in class-A */
++	hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018);
++	/* Remove all caps from Replica path filter */
++	hw->phy.ops.write_reg(hw, 0x2F52, 0x0000);
++	/* Bias trimming for ADC, AFE and Driver (Default) */
++	hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24);
++	/* Increase Hybrid poly bias */
++	hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0);
++	/* Add 4% to TX amplitude in Giga mode */
++	hw->phy.ops.write_reg(hw, 0x2010, 0x10B0);
++	/* Disable trimming (TTT) */
++	hw->phy.ops.write_reg(hw, 0x2011, 0x0000);
++	/* Poly DC correction to 94.6% + 2% for all channels */
++	hw->phy.ops.write_reg(hw, 0x20DD, 0x249A);
++	/* ABS DC correction to 95.9% */
++	hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3);
++	/* BG temp curve trim */
++	hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE);
++	/* Increasing ADC OPAMP stage 1 currents to max */
++	hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4);
++	/* Force 1000 ( required for enabling PHY regs configuration) */
++	hw->phy.ops.write_reg(hw, 0x0000, 0x0140);
++	/* Set upd_freq to 6 */
++	hw->phy.ops.write_reg(hw, 0x1F30, 0x1606);
++	/* Disable NPDFE */
++	hw->phy.ops.write_reg(hw, 0x1F31, 0xB814);
++	/* Disable adaptive fixed FFE (Default) */
++	hw->phy.ops.write_reg(hw, 0x1F35, 0x002A);
++	/* Enable FFE hysteresis */
++	hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067);
++	/* Fixed FFE for short cable lengths */
++	hw->phy.ops.write_reg(hw, 0x1F54, 0x0065);
++	/* Fixed FFE for medium cable lengths */
++	hw->phy.ops.write_reg(hw, 0x1F55, 0x002A);
++	/* Fixed FFE for long cable lengths */
++	hw->phy.ops.write_reg(hw, 0x1F56, 0x002A);
++	/* Enable Adaptive Clip Threshold */
++	hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0);
++	/* AHT reset limit to 1 */
++	hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF);
++	/* Set AHT master delay to 127 msec */
++	hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC);
++	/* Set scan bits for AHT */
++	hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF);
++	/* Set AHT Preset bits */
++	hw->phy.ops.write_reg(hw, 0x1F79, 0x0210);
++	/* Change integ_factor of channel A to 3 */
++	hw->phy.ops.write_reg(hw, 0x1895, 0x0003);
++	/* Change prop_factor of channels BCD to 8 */
++	hw->phy.ops.write_reg(hw, 0x1796, 0x0008);
++	/* Change cg_icount + enable integbp for channels BCD */
++	hw->phy.ops.write_reg(hw, 0x1798, 0xD008);
++	/* Change cg_icount + enable integbp + change prop_factor_master
++	 * to 8 for channel A
++	 */
++	hw->phy.ops.write_reg(hw, 0x1898, 0xD918);
++	/* Disable AHT in Slave mode on channel A */
++	hw->phy.ops.write_reg(hw, 0x187A, 0x0800);
++	/* Enable LPLU and disable AN to 1000 in non-D0a states,
++	 * Enable SPD+B2B
++	 */
++	hw->phy.ops.write_reg(hw, 0x0019, 0x008D);
++	/* Enable restart AN on an1000_dis change */
++	hw->phy.ops.write_reg(hw, 0x001B, 0x2080);
++	/* Enable wh_fifo read clock in 10/100 modes */
++	hw->phy.ops.write_reg(hw, 0x0014, 0x0045);
++	/* Restart AN, Speed selection is 1000 */
++	hw->phy.ops.write_reg(hw, 0x0000, 0x1340);
++
++	return 0;
++}
++
++/**
++ * igb_power_up_phy_copper - Restore copper link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, restore the link to previous settings.
++ **/
++void igb_power_up_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
++	mii_reg &= ~MII_CR_POWER_DOWN;
++	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
++}
++
++/**
++ * igb_power_down_phy_copper - Power down copper PHY
++ * @hw: pointer to the HW structure
++ *
++ * Power down PHY to save power when interface is down and wake on lan
++ * is not enabled.
++ **/
++void igb_power_down_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
++	mii_reg |= MII_CR_POWER_DOWN;
++	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
++	usleep_range(1000, 2000);
++}
++
++/**
++ *  igb_check_polarity_82580 - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY specific status register.
++ **/
++static s32 igb_check_polarity_82580(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++
++	ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & I82580_PHY_STATUS2_REV_POLARITY)
++				      ? e1000_rev_polarity_reversed
++				      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  igb_phy_force_speed_duplex_82580 - Force speed/duplex for I82580 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Waits for link and returns
++ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
++ **/
++s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	igb_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	/* Clear Auto-Crossover to force MDI manually.  82580 requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
++
++	ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
++	if (ret_val)
++		goto out;
++
++	hw_dbg("I82580_PHY_CTRL_2: %X\n", phy_data);
++
++	udelay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		hw_dbg("Waiting for forced speed/duplex link on 82580 phy\n");
++
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
++		if (ret_val)
++			goto out;
++
++		if (!link)
++			hw_dbg("Link taking longer than expected.\n");
++
++		/* Try once more */
++		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_phy_info_82580 - Retrieve I82580 PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Read PHY status to determine if link is up.  If link is up, then
++ *  set/determine 10base-T extended distance and polarity correction.  Read
++ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
++ *  determine on the cable length, local and remote receiver.
++ **/
++s32 igb_get_phy_info_82580(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = igb_phy_has_link(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		hw_dbg("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	phy->polarity_correction = true;
++
++	ret_val = igb_check_polarity_82580(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & I82580_PHY_STATUS2_MDIX) ? true : false;
++
++	if ((data & I82580_PHY_STATUS2_SPEED_MASK) ==
++	    I82580_PHY_STATUS2_SPEED_1000MBPS) {
++		ret_val = hw->phy.ops.get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
++				? e1000_1000t_rx_status_ok
++				: e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
++				 ? e1000_1000t_rx_status_ok
++				 : e1000_1000t_rx_status_not_ok;
++	} else {
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_cable_length_82580 - Determine cable length for 82580 PHY
++ *  @hw: pointer to the HW structure
++ *
++ * Reads the diagnostic status register and verifies result is valid before
++ * placing it in the phy_cable_length field.
++ **/
++s32 igb_get_cable_length_82580(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, length;
++
++	ret_val = phy->ops.read_reg(hw, I82580_PHY_DIAG_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >>
++		 I82580_DSTATUS_CABLE_LENGTH_SHIFT;
++
++	if (length == E1000_CABLE_LENGTH_UNDEFINED)
++		ret_val = -E1000_ERR_PHY;
++
++	phy->cable_length = length;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_phy_reg_gs40g - Write GS40G PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: lower half is register offset to write to
++ *     upper half is page to use.
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 igb_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++	u16 page = offset >> GS40G_PAGE_SHIFT;
++
++	offset = offset & GS40G_OFFSET_MASK;
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
++	if (ret_val)
++		goto release;
++	ret_val = igb_write_phy_reg_mdic(hw, offset, data);
++
++release:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  igb_read_phy_reg_gs40g - Read GS40G  PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: lower half is register offset to read to
++ *     upper half is page to use.
++ *  @data: data to read at register offset
++ *
++ *  Acquires semaphore, if necessary, then reads the data in the PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 igb_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u16 page = offset >> GS40G_PAGE_SHIFT;
++
++	offset = offset & GS40G_OFFSET_MASK;
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = igb_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page);
++	if (ret_val)
++		goto release;
++	ret_val = igb_read_phy_reg_mdic(hw, offset, data);
++
++release:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  igb_set_master_slave_mode - Setup PHY for Master/slave mode
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up Master/slave mode
++ **/
++static s32 igb_set_master_slave_mode(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_data;
++
++	/* Resolve Master/Slave mode */
++	ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/* load defaults for future use */
++	hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ?
++				   ((phy_data & CR_1000T_MS_VALUE) ?
++				    e1000_ms_force_master :
++				    e1000_ms_force_slave) : e1000_ms_auto;
++
++	switch (hw->phy.ms_type) {
++	case e1000_ms_force_master:
++		phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++		break;
++	case e1000_ms_force_slave:
++		phy_data |= CR_1000T_MS_ENABLE;
++		phy_data &= ~(CR_1000T_MS_VALUE);
++		break;
++	case e1000_ms_auto:
++		phy_data &= ~CR_1000T_MS_ENABLE;
++		/* fall-through */
++	default:
++		break;
++	}
++
++	return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data);
++}
+--- linux/drivers/xenomai/net/drivers/igb/e1000_mac.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mac.c	2021-04-07 16:01:27.467633823 +0800
+@@ -0,0 +1,1607 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#include <linux/if_ether.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++
++#include "e1000_mac.h"
++
++#include "igb.h"
++
++static s32 igb_set_default_fc(struct e1000_hw *hw);
++static s32 igb_set_fc_watermarks(struct e1000_hw *hw);
++
++/**
++ *  igb_get_bus_info_pcie - Get PCIe bus information
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines and stores the system bus information for a particular
++ *  network interface.  The following bus information is determined and stored:
++ *  bus speed, bus width, type (PCIe), and PCIe function.
++ **/
++s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	s32 ret_val;
++	u32 reg;
++	u16 pcie_link_status;
++
++	bus->type = e1000_bus_type_pci_express;
++
++	ret_val = igb_read_pcie_cap_reg(hw,
++					PCI_EXP_LNKSTA,
++					&pcie_link_status);
++	if (ret_val) {
++		bus->width = e1000_bus_width_unknown;
++		bus->speed = e1000_bus_speed_unknown;
++	} else {
++		switch (pcie_link_status & PCI_EXP_LNKSTA_CLS) {
++		case PCI_EXP_LNKSTA_CLS_2_5GB:
++			bus->speed = e1000_bus_speed_2500;
++			break;
++		case PCI_EXP_LNKSTA_CLS_5_0GB:
++			bus->speed = e1000_bus_speed_5000;
++			break;
++		default:
++			bus->speed = e1000_bus_speed_unknown;
++			break;
++		}
++
++		bus->width = (enum e1000_bus_width)((pcie_link_status &
++						     PCI_EXP_LNKSTA_NLW) >>
++						     PCI_EXP_LNKSTA_NLW_SHIFT);
++	}
++
++	reg = rd32(E1000_STATUS);
++	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++
++	return 0;
++}
++
++/**
++ *  igb_clear_vfta - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++void igb_clear_vfta(struct e1000_hw *hw)
++{
++	u32 offset;
++
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		array_wr32(E1000_VFTA, offset, 0);
++		wrfl();
++	}
++}
++
++/**
++ *  igb_write_vfta - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset in VLAN filter table
++ *  @value: register value written to VLAN filter table
++ *
++ *  Writes value at the given offset in the register array which stores
++ *  the VLAN filter table.
++ **/
++static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	array_wr32(E1000_VFTA, offset, value);
++	wrfl();
++}
++
++/* Due to a hw errata, if the host tries to  configure the VFTA register
++ * while performing queries from the BMC or DMA, then the VFTA in some
++ * cases won't be written.
++ */
++
++/**
++ *  igb_clear_vfta_i350 - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++void igb_clear_vfta_i350(struct e1000_hw *hw)
++{
++	u32 offset;
++	int i;
++
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		for (i = 0; i < 10; i++)
++			array_wr32(E1000_VFTA, offset, 0);
++
++		wrfl();
++	}
++}
++
++/**
++ *  igb_write_vfta_i350 - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset in VLAN filter table
++ *  @value: register value written to VLAN filter table
++ *
++ *  Writes value at the given offset in the register array which stores
++ *  the VLAN filter table.
++ **/
++static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	int i;
++
++	for (i = 0; i < 10; i++)
++		array_wr32(E1000_VFTA, offset, value);
++
++	wrfl();
++}
++
++/**
++ *  igb_init_rx_addrs - Initialize receive address's
++ *  @hw: pointer to the HW structure
++ *  @rar_count: receive address registers
++ *
++ *  Setups the receive address registers by setting the base receive address
++ *  register to the devices MAC address and clearing all the other receive
++ *  address registers to 0.
++ **/
++void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count)
++{
++	u32 i;
++	u8 mac_addr[ETH_ALEN] = {0};
++
++	/* Setup the receive address */
++	hw_dbg("Programming MAC Address into RAR[0]\n");
++
++	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
++
++	/* Zero out the other (rar_entry_count - 1) receive addresses */
++	hw_dbg("Clearing RAR[1-%u]\n", rar_count-1);
++	for (i = 1; i < rar_count; i++)
++		hw->mac.ops.rar_set(hw, mac_addr, i);
++}
++
++/**
++ *  igb_vfta_set - enable or disable vlan in VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @vid: VLAN id to add or remove
++ *  @add: if true add filter, if false remove
++ *
++ *  Sets or clears a bit in the VLAN filter table array based on VLAN id
++ *  and if we are adding or removing the filter
++ **/
++s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add)
++{
++	u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
++	u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
++	u32 vfta;
++	struct igb_adapter *adapter = hw->back;
++	s32 ret_val = 0;
++
++	vfta = adapter->shadow_vfta[index];
++
++	/* bit was set/cleared before we started */
++	if ((!!(vfta & mask)) == add) {
++		ret_val = -E1000_ERR_CONFIG;
++	} else {
++		if (add)
++			vfta |= mask;
++		else
++			vfta &= ~mask;
++	}
++	if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354))
++		igb_write_vfta_i350(hw, index, vfta);
++	else
++		igb_write_vfta(hw, index, vfta);
++	adapter->shadow_vfta[index] = vfta;
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_alt_mac_addr - Check for alternate MAC addr
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
++ *  can be setup by pre-boot software and must be treated like a permanent
++ *  address and must override the actual permanent MAC address.  If an
++ *  alternate MAC address is found it is saved in the hw struct and
++ *  programmed into RAR0 and the function returns success, otherwise the
++ *  function returns an error.
++ **/
++s32 igb_check_alt_mac_addr(struct e1000_hw *hw)
++{
++	u32 i;
++	s32 ret_val = 0;
++	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
++	u8 alt_mac_addr[ETH_ALEN];
++
++	/* Alternate MAC address is handled by the option ROM for 82580
++	 * and newer. SW support not required.
++	 */
++	if (hw->mac.type >= e1000_82580)
++		goto out;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
++				 &nvm_alt_mac_addr_offset);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
++	    (nvm_alt_mac_addr_offset == 0x0000))
++		/* There is no Alternate MAC Address */
++		goto out;
++
++	if (hw->bus.func == E1000_FUNC_1)
++		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
++	if (hw->bus.func == E1000_FUNC_2)
++		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2;
++
++	if (hw->bus.func == E1000_FUNC_3)
++		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3;
++	for (i = 0; i < ETH_ALEN; i += 2) {
++		offset = nvm_alt_mac_addr_offset + (i >> 1);
++		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
++		if (ret_val) {
++			hw_dbg("NVM Read Error\n");
++			goto out;
++		}
++
++		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
++		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
++	}
++
++	/* if multicast bit is set, the alternate address will not be used */
++	if (is_multicast_ether_addr(alt_mac_addr)) {
++		hw_dbg("Ignoring Alternate Mac Address with MC bit set\n");
++		goto out;
++	}
++
++	/* We have a valid alternate MAC address, and we want to treat it the
++	 * same as the normal permanent MAC address stored by the HW into the
++	 * RAR. Do this by mapping this address into RAR0.
++	 */
++	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_rar_set - Set receive address register
++ *  @hw: pointer to the HW structure
++ *  @addr: pointer to the receive address
++ *  @index: receive address array register
++ *
++ *  Sets the receive address array register at index to the address passed
++ *  in by addr.
++ **/
++void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
++{
++	u32 rar_low, rar_high;
++
++	/* HW expects these in little endian so we reverse the byte order
++	 * from network order (big endian) to little endian
++	 */
++	rar_low = ((u32) addr[0] |
++		   ((u32) addr[1] << 8) |
++		    ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
++
++	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
++
++	/* If MAC address zero, no need to set the AV bit */
++	if (rar_low || rar_high)
++		rar_high |= E1000_RAH_AV;
++
++	/* Some bridges will combine consecutive 32-bit writes into
++	 * a single burst write, which will malfunction on some parts.
++	 * The flushes avoid this.
++	 */
++	wr32(E1000_RAL(index), rar_low);
++	wrfl();
++	wr32(E1000_RAH(index), rar_high);
++	wrfl();
++}
++
++/**
++ *  igb_mta_set - Set multicast filter table address
++ *  @hw: pointer to the HW structure
++ *  @hash_value: determines the MTA register and bit to set
++ *
++ *  The multicast table address is a register array of 32-bit registers.
++ *  The hash_value is used to determine what register the bit is in, the
++ *  current value is read, the new bit is OR'd in and the new value is
++ *  written back into the register.
++ **/
++void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
++{
++	u32 hash_bit, hash_reg, mta;
++
++	/* The MTA is a register array of 32-bit registers. It is
++	 * treated like an array of (32*mta_reg_count) bits.  We want to
++	 * set bit BitArray[hash_value]. So we figure out what register
++	 * the bit is in, read it, OR in the new bit, then write
++	 * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
++	 * mask to bits 31:5 of the hash value which gives us the
++	 * register we're modifying.  The hash bit within that register
++	 * is determined by the lower 5 bits of the hash value.
++	 */
++	hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
++	hash_bit = hash_value & 0x1F;
++
++	mta = array_rd32(E1000_MTA, hash_reg);
++
++	mta |= (1 << hash_bit);
++
++	array_wr32(E1000_MTA, hash_reg, mta);
++	wrfl();
++}
++
++/**
++ *  igb_hash_mc_addr - Generate a multicast hash value
++ *  @hw: pointer to the HW structure
++ *  @mc_addr: pointer to a multicast address
++ *
++ *  Generates a multicast address hash value which is used to determine
++ *  the multicast filter table array address and new table value.  See
++ *  igb_mta_set()
++ **/
++static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
++{
++	u32 hash_value, hash_mask;
++	u8 bit_shift = 0;
++
++	/* Register count multiplied by bits per register */
++	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
++
++	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
++	 * where 0xFF would still fall within the hash mask.
++	 */
++	while (hash_mask >> bit_shift != 0xFF)
++		bit_shift++;
++
++	/* The portion of the address that is used for the hash table
++	 * is determined by the mc_filter_type setting.
++	 * The algorithm is such that there is a total of 8 bits of shifting.
++	 * The bit_shift for a mc_filter_type of 0 represents the number of
++	 * left-shifts where the MSB of mc_addr[5] would still fall within
++	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
++	 * of 8 bits of shifting, then mc_addr[4] will shift right the
++	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
++	 * cases are a variation of this algorithm...essentially raising the
++	 * number of bits to shift mc_addr[5] left, while still keeping the
++	 * 8-bit shifting total.
++	 *
++	 * For example, given the following Destination MAC Address and an
++	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
++	 * we can see that the bit_shift for case 0 is 4.  These are the hash
++	 * values resulting from each mc_filter_type...
++	 * [0] [1] [2] [3] [4] [5]
++	 * 01  AA  00  12  34  56
++	 * LSB                 MSB
++	 *
++	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
++	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
++	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
++	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
++	 */
++	switch (hw->mac.mc_filter_type) {
++	default:
++	case 0:
++		break;
++	case 1:
++		bit_shift += 1;
++		break;
++	case 2:
++		bit_shift += 2;
++		break;
++	case 3:
++		bit_shift += 4;
++		break;
++	}
++
++	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
++				  (((u16) mc_addr[5]) << bit_shift)));
++
++	return hash_value;
++}
++
++/**
++ *  igb_update_mc_addr_list - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *
++ *  Updates entire Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ **/
++void igb_update_mc_addr_list(struct e1000_hw *hw,
++			     u8 *mc_addr_list, u32 mc_addr_count)
++{
++	u32 hash_value, hash_bit, hash_reg;
++	int i;
++
++	/* clear mta_shadow */
++	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
++
++	/* update mta_shadow from mc_addr_list */
++	for (i = 0; (u32) i < mc_addr_count; i++) {
++		hash_value = igb_hash_mc_addr(hw, mc_addr_list);
++
++		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
++		hash_bit = hash_value & 0x1F;
++
++		hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
++		mc_addr_list += (ETH_ALEN);
++	}
++
++	/* replace the entire MTA table */
++	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
++		array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
++	wrfl();
++}
++
++/**
++ *  igb_clear_hw_cntrs_base - Clear base hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the base hardware counters by reading the counter registers.
++ **/
++void igb_clear_hw_cntrs_base(struct e1000_hw *hw)
++{
++	rd32(E1000_CRCERRS);
++	rd32(E1000_SYMERRS);
++	rd32(E1000_MPC);
++	rd32(E1000_SCC);
++	rd32(E1000_ECOL);
++	rd32(E1000_MCC);
++	rd32(E1000_LATECOL);
++	rd32(E1000_COLC);
++	rd32(E1000_DC);
++	rd32(E1000_SEC);
++	rd32(E1000_RLEC);
++	rd32(E1000_XONRXC);
++	rd32(E1000_XONTXC);
++	rd32(E1000_XOFFRXC);
++	rd32(E1000_XOFFTXC);
++	rd32(E1000_FCRUC);
++	rd32(E1000_GPRC);
++	rd32(E1000_BPRC);
++	rd32(E1000_MPRC);
++	rd32(E1000_GPTC);
++	rd32(E1000_GORCL);
++	rd32(E1000_GORCH);
++	rd32(E1000_GOTCL);
++	rd32(E1000_GOTCH);
++	rd32(E1000_RNBC);
++	rd32(E1000_RUC);
++	rd32(E1000_RFC);
++	rd32(E1000_ROC);
++	rd32(E1000_RJC);
++	rd32(E1000_TORL);
++	rd32(E1000_TORH);
++	rd32(E1000_TOTL);
++	rd32(E1000_TOTH);
++	rd32(E1000_TPR);
++	rd32(E1000_TPT);
++	rd32(E1000_MPTC);
++	rd32(E1000_BPTC);
++}
++
++/**
++ *  igb_check_for_copper_link - Check for link (Copper)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks to see of the link status of the hardware has changed.  If a
++ *  change in link status has been detected, then we read the PHY registers
++ *  to get the current speed/duplex if link exists.
++ **/
++s32 igb_check_for_copper_link(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	bool link;
++
++	/* We only want to go out to the PHY registers to see if Auto-Neg
++	 * has completed and/or if our link status has changed.  The
++	 * get_link_status flag is set upon receiving a Link Status
++	 * Change or Rx Sequence Error interrupt.
++	 */
++	if (!mac->get_link_status) {
++		ret_val = 0;
++		goto out;
++	}
++
++	/* First we want to see if the MII Status Register reports
++	 * link.  If so, then we want to get the current speed/duplex
++	 * of the PHY.
++	 */
++	ret_val = igb_phy_has_link(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link)
++		goto out; /* No link detected */
++
++	mac->get_link_status = false;
++
++	/* Check if there was DownShift, must be checked
++	 * immediately after link-up
++	 */
++	igb_check_downshift(hw);
++
++	/* If we are forcing speed/duplex, then we simply return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/* Auto-Neg is enabled.  Auto Speed Detection takes care
++	 * of MAC speed/duplex configuration.  So we only need to
++	 * configure Collision Distance in the MAC.
++	 */
++	igb_config_collision_dist(hw);
++
++	/* Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = igb_config_fc_after_link_up(hw);
++	if (ret_val)
++		hw_dbg("Error configuring flow control\n");
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_setup_link - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++s32 igb_setup_link(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	/* In the case of the phy reset being blocked, we already have a link.
++	 * We do not need to set it up again.
++	 */
++	if (igb_check_reset_block(hw))
++		goto out;
++
++	/* If requested flow control is set to default, set flow control
++	 * based on the EEPROM flow control settings.
++	 */
++	if (hw->fc.requested_mode == e1000_fc_default) {
++		ret_val = igb_set_default_fc(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	/* We want to save off the original Flow Control configuration just
++	 * in case we get disconnected and then reconnected into a different
++	 * hub or switch with different Flow Control capabilities.
++	 */
++	hw->fc.current_mode = hw->fc.requested_mode;
++
++	hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
++
++	/* Call the necessary media_type subroutine to configure the link. */
++	ret_val = hw->mac.ops.setup_physical_interface(hw);
++	if (ret_val)
++		goto out;
++
++	/* Initialize the flow control address, type, and PAUSE timer
++	 * registers to their default values.  This is done even if flow
++	 * control is disabled, because it does not hurt anything to
++	 * initialize these registers.
++	 */
++	hw_dbg("Initializing the Flow Control address, type and timer regs\n");
++	wr32(E1000_FCT, FLOW_CONTROL_TYPE);
++	wr32(E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
++	wr32(E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
++
++	wr32(E1000_FCTTV, hw->fc.pause_time);
++
++	ret_val = igb_set_fc_watermarks(hw);
++
++out:
++
++	return ret_val;
++}
++
++/**
++ *  igb_config_collision_dist - Configure collision distance
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the collision distance to the default value and is used
++ *  during link setup. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++void igb_config_collision_dist(struct e1000_hw *hw)
++{
++	u32 tctl;
++
++	tctl = rd32(E1000_TCTL);
++
++	tctl &= ~E1000_TCTL_COLD;
++	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
++
++	wr32(E1000_TCTL, tctl);
++	wrfl();
++}
++
++/**
++ *  igb_set_fc_watermarks - Set flow control high/low watermarks
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the flow control high/low threshold (watermark) registers.  If
++ *  flow control XON frame transmission is enabled, then set XON frame
++ *  tansmission as well.
++ **/
++static s32 igb_set_fc_watermarks(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u32 fcrtl = 0, fcrth = 0;
++
++	/* Set the flow control receive threshold registers.  Normally,
++	 * these registers will be set to a default threshold that may be
++	 * adjusted later by the driver's runtime code.  However, if the
++	 * ability to transmit pause frames is not enabled, then these
++	 * registers will be set to 0.
++	 */
++	if (hw->fc.current_mode & e1000_fc_tx_pause) {
++		/* We need to set up the Receive Threshold high and low water
++		 * marks as well as (optionally) enabling the transmission of
++		 * XON frames.
++		 */
++		fcrtl = hw->fc.low_water;
++		if (hw->fc.send_xon)
++			fcrtl |= E1000_FCRTL_XONE;
++
++		fcrth = hw->fc.high_water;
++	}
++	wr32(E1000_FCRTL, fcrtl);
++	wr32(E1000_FCRTH, fcrth);
++
++	return ret_val;
++}
++
++/**
++ *  igb_set_default_fc - Set flow control default values
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the EEPROM for the default values for flow control and store the
++ *  values.
++ **/
++static s32 igb_set_default_fc(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 lan_offset;
++	u16 nvm_data;
++
++	/* Read and store word 0x0F of the EEPROM. This word contains bits
++	 * that determine the hardware's default PAUSE (flow control) mode,
++	 * a bit that determines whether the HW defaults to enabling or
++	 * disabling auto-negotiation, and the direction of the
++	 * SW defined pins. If there is no SW over-ride of the flow
++	 * control setting, then the variable hw->fc will
++	 * be initialized based on a value in the EEPROM.
++	 */
++	if (hw->mac.type == e1000_i350) {
++		lan_offset = NVM_82580_LAN_FUNC_OFFSET(hw->bus.func);
++		ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG
++					   + lan_offset, 1, &nvm_data);
++	 } else {
++		ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG,
++					   1, &nvm_data);
++	 }
++
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0)
++		hw->fc.requested_mode = e1000_fc_none;
++	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
++		 NVM_WORD0F_ASM_DIR)
++		hw->fc.requested_mode = e1000_fc_tx_pause;
++	else
++		hw->fc.requested_mode = e1000_fc_full;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_force_mac_fc - Force the MAC's flow control settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
++ *  device control register to reflect the adapter settings.  TFCE and RFCE
++ *  need to be explicitly set by software when a copper PHY is used because
++ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
++ *  also configure these bits when link is forced on a fiber connection.
++ **/
++s32 igb_force_mac_fc(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val = 0;
++
++	ctrl = rd32(E1000_CTRL);
++
++	/* Because we didn't get link via the internal auto-negotiation
++	 * mechanism (we either forced link or we got link via PHY
++	 * auto-neg), we have to manually enable/disable transmit an
++	 * receive flow control.
++	 *
++	 * The "Case" statement below enables/disable flow control
++	 * according to the "hw->fc.current_mode" parameter.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause
++	 *          frames but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *          frames but we do not receive pause frames).
++	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
++	 *  other:  No other values should be possible at this point.
++	 */
++	hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
++
++	switch (hw->fc.current_mode) {
++	case e1000_fc_none:
++		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
++		break;
++	case e1000_fc_rx_pause:
++		ctrl &= (~E1000_CTRL_TFCE);
++		ctrl |= E1000_CTRL_RFCE;
++		break;
++	case e1000_fc_tx_pause:
++		ctrl &= (~E1000_CTRL_RFCE);
++		ctrl |= E1000_CTRL_TFCE;
++		break;
++	case e1000_fc_full:
++		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
++		break;
++	default:
++		hw_dbg("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	wr32(E1000_CTRL, ctrl);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_config_fc_after_link_up - Configures flow control after link
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the status of auto-negotiation after link up to ensure that the
++ *  speed and duplex were not forced.  If the link needed to be forced, then
++ *  flow control needs to be forced also.  If auto-negotiation is enabled
++ *  and did not fail, then we configure flow control based on our link
++ *  partner.
++ **/
++s32 igb_config_fc_after_link_up(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val = 0;
++	u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
++	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
++	u16 speed, duplex;
++
++	/* Check for the case where we have fiber media and auto-neg failed
++	 * so we had to force link.  In this case, we need to force the
++	 * configuration of the MAC to match the "fc" parameter.
++	 */
++	if (mac->autoneg_failed) {
++		if (hw->phy.media_type == e1000_media_type_internal_serdes)
++			ret_val = igb_force_mac_fc(hw);
++	} else {
++		if (hw->phy.media_type == e1000_media_type_copper)
++			ret_val = igb_force_mac_fc(hw);
++	}
++
++	if (ret_val) {
++		hw_dbg("Error forcing flow control settings\n");
++		goto out;
++	}
++
++	/* Check for the case where we have copper media and auto-neg is
++	 * enabled.  In this case, we need to check and see if Auto-Neg
++	 * has completed, and if so, how the PHY and link partner has
++	 * flow control configured.
++	 */
++	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
++		/* Read the MII Status Register and check to see if AutoNeg
++		 * has completed.  We read this twice because this reg has
++		 * some "sticky" (latched) bits.
++		 */
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
++						   &mii_status_reg);
++		if (ret_val)
++			goto out;
++		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
++						   &mii_status_reg);
++		if (ret_val)
++			goto out;
++
++		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
++			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
++			goto out;
++		}
++
++		/* The AutoNeg process has completed, so we now need to
++		 * read both the Auto Negotiation Advertisement
++		 * Register (Address 4) and the Auto_Negotiation Base
++		 * Page Ability Register (Address 5) to determine how
++		 * flow control was negotiated.
++		 */
++		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
++					    &mii_nway_adv_reg);
++		if (ret_val)
++			goto out;
++		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
++					    &mii_nway_lp_ability_reg);
++		if (ret_val)
++			goto out;
++
++		/* Two bits in the Auto Negotiation Advertisement Register
++		 * (Address 4) and two bits in the Auto Negotiation Base
++		 * Page Ability Register (Address 5) determine flow control
++		 * for both the PHY and the link partner.  The following
++		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
++		 * 1999, describes these PAUSE resolution bits and how flow
++		 * control is determined based upon these settings.
++		 * NOTE:  DC = Don't Care
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
++		 *   0   |    1    |   0   |   DC    | e1000_fc_none
++		 *   0   |    1    |   1   |    0    | e1000_fc_none
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 *   1   |    0    |   0   |   DC    | e1000_fc_none
++		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
++		 *   1   |    1    |   0   |    0    | e1000_fc_none
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 *
++		 * Are both PAUSE bits set to 1?  If so, this implies
++		 * Symmetric Flow Control is enabled at both ends.  The
++		 * ASM_DIR bits are irrelevant per the spec.
++		 *
++		 * For Symmetric Flow Control:
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
++		 *
++		 */
++		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
++			/* Now we need to check if the user selected RX ONLY
++			 * of pause frames.  In this case, we had to advertise
++			 * FULL flow control because we could not advertise RX
++			 * ONLY. Hence, we must now check to see if we need to
++			 * turn OFF  the TRANSMISSION of PAUSE frames.
++			 */
++			if (hw->fc.requested_mode == e1000_fc_full) {
++				hw->fc.current_mode = e1000_fc_full;
++				hw_dbg("Flow Control = FULL.\n");
++			} else {
++				hw->fc.current_mode = e1000_fc_rx_pause;
++				hw_dbg("Flow Control = RX PAUSE frames only.\n");
++			}
++		}
++		/* For receiving PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 */
++		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++			  (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++			  (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++			  (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_tx_pause;
++			hw_dbg("Flow Control = TX PAUSE frames only.\n");
++		}
++		/* For transmitting PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 */
++		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_rx_pause;
++			hw_dbg("Flow Control = RX PAUSE frames only.\n");
++		}
++		/* Per the IEEE spec, at this point flow control should be
++		 * disabled.  However, we want to consider that we could
++		 * be connected to a legacy switch that doesn't advertise
++		 * desired flow control, but can be forced on the link
++		 * partner.  So if we advertised no flow control, that is
++		 * what we will resolve to.  If we advertised some kind of
++		 * receive capability (Rx Pause Only or Full Flow Control)
++		 * and the link partner advertised none, we will configure
++		 * ourselves to enable Rx Flow Control only.  We can do
++		 * this safely for two reasons:  If the link partner really
++		 * didn't want flow control enabled, and we enable Rx, no
++		 * harm done since we won't be receiving any PAUSE frames
++		 * anyway.  If the intent on the link partner was to have
++		 * flow control enabled, then by us enabling RX only, we
++		 * can at least receive pause frames and process them.
++		 * This is a good idea because in most cases, since we are
++		 * predominantly a server NIC, more times than not we will
++		 * be asked to delay transmission of packets than asking
++		 * our link partner to pause transmission of frames.
++		 */
++		else if ((hw->fc.requested_mode == e1000_fc_none) ||
++			 (hw->fc.requested_mode == e1000_fc_tx_pause) ||
++			 (hw->fc.strict_ieee)) {
++			hw->fc.current_mode = e1000_fc_none;
++			hw_dbg("Flow Control = NONE.\n");
++		} else {
++			hw->fc.current_mode = e1000_fc_rx_pause;
++			hw_dbg("Flow Control = RX PAUSE frames only.\n");
++		}
++
++		/* Now we need to do one last check...  If we auto-
++		 * negotiated to HALF DUPLEX, flow control should not be
++		 * enabled per IEEE 802.3 spec.
++		 */
++		ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
++		if (ret_val) {
++			hw_dbg("Error getting link speed and duplex\n");
++			goto out;
++		}
++
++		if (duplex == HALF_DUPLEX)
++			hw->fc.current_mode = e1000_fc_none;
++
++		/* Now we call a subroutine to actually force the MAC
++		 * controller to use the correct flow control settings.
++		 */
++		ret_val = igb_force_mac_fc(hw);
++		if (ret_val) {
++			hw_dbg("Error forcing flow control settings\n");
++			goto out;
++		}
++	}
++	/* Check for the case where we have SerDes media and auto-neg is
++	 * enabled.  In this case, we need to check and see if Auto-Neg
++	 * has completed, and if so, how the PHY and link partner has
++	 * flow control configured.
++	 */
++	if ((hw->phy.media_type == e1000_media_type_internal_serdes)
++		&& mac->autoneg) {
++		/* Read the PCS_LSTS and check to see if AutoNeg
++		 * has completed.
++		 */
++		pcs_status_reg = rd32(E1000_PCS_LSTAT);
++
++		if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
++			hw_dbg("PCS Auto Neg has not completed.\n");
++			return ret_val;
++		}
++
++		/* The AutoNeg process has completed, so we now need to
++		 * read both the Auto Negotiation Advertisement
++		 * Register (PCS_ANADV) and the Auto_Negotiation Base
++		 * Page Ability Register (PCS_LPAB) to determine how
++		 * flow control was negotiated.
++		 */
++		pcs_adv_reg = rd32(E1000_PCS_ANADV);
++		pcs_lp_ability_reg = rd32(E1000_PCS_LPAB);
++
++		/* Two bits in the Auto Negotiation Advertisement Register
++		 * (PCS_ANADV) and two bits in the Auto Negotiation Base
++		 * Page Ability Register (PCS_LPAB) determine flow control
++		 * for both the PHY and the link partner.  The following
++		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
++		 * 1999, describes these PAUSE resolution bits and how flow
++		 * control is determined based upon these settings.
++		 * NOTE:  DC = Don't Care
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
++		 *   0   |    1    |   0   |   DC    | e1000_fc_none
++		 *   0   |    1    |   1   |    0    | e1000_fc_none
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 *   1   |    0    |   0   |   DC    | e1000_fc_none
++		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
++		 *   1   |    1    |   0   |    0    | e1000_fc_none
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 *
++		 * Are both PAUSE bits set to 1?  If so, this implies
++		 * Symmetric Flow Control is enabled at both ends.  The
++		 * ASM_DIR bits are irrelevant per the spec.
++		 *
++		 * For Symmetric Flow Control:
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
++		 *
++		 */
++		if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
++		    (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
++			/* Now we need to check if the user selected Rx ONLY
++			 * of pause frames.  In this case, we had to advertise
++			 * FULL flow control because we could not advertise Rx
++			 * ONLY. Hence, we must now check to see if we need to
++			 * turn OFF the TRANSMISSION of PAUSE frames.
++			 */
++			if (hw->fc.requested_mode == e1000_fc_full) {
++				hw->fc.current_mode = e1000_fc_full;
++				hw_dbg("Flow Control = FULL.\n");
++			} else {
++				hw->fc.current_mode = e1000_fc_rx_pause;
++				hw_dbg("Flow Control = Rx PAUSE frames only.\n");
++			}
++		}
++		/* For receiving PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 */
++		else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
++			  (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
++			  (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
++			  (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_tx_pause;
++			hw_dbg("Flow Control = Tx PAUSE frames only.\n");
++		}
++		/* For transmitting PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 */
++		else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
++			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
++			 !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
++			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_rx_pause;
++			hw_dbg("Flow Control = Rx PAUSE frames only.\n");
++		} else {
++			/* Per the IEEE spec, at this point flow control
++			 * should be disabled.
++			 */
++			hw->fc.current_mode = e1000_fc_none;
++			hw_dbg("Flow Control = NONE.\n");
++		}
++
++		/* Now we call a subroutine to actually force the MAC
++		 * controller to use the correct flow control settings.
++		 */
++		pcs_ctrl_reg = rd32(E1000_PCS_LCTL);
++		pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
++		wr32(E1000_PCS_LCTL, pcs_ctrl_reg);
++
++		ret_val = igb_force_mac_fc(hw);
++		if (ret_val) {
++			hw_dbg("Error forcing flow control settings\n");
++			return ret_val;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_get_speed_and_duplex_copper - Retrieve current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Read the status register for the current speed/duplex and store the current
++ *  speed and duplex for copper connections.
++ **/
++s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
++				      u16 *duplex)
++{
++	u32 status;
++
++	status = rd32(E1000_STATUS);
++	if (status & E1000_STATUS_SPEED_1000) {
++		*speed = SPEED_1000;
++		hw_dbg("1000 Mbs, ");
++	} else if (status & E1000_STATUS_SPEED_100) {
++		*speed = SPEED_100;
++		hw_dbg("100 Mbs, ");
++	} else {
++		*speed = SPEED_10;
++		hw_dbg("10 Mbs, ");
++	}
++
++	if (status & E1000_STATUS_FD) {
++		*duplex = FULL_DUPLEX;
++		hw_dbg("Full Duplex\n");
++	} else {
++		*duplex = HALF_DUPLEX;
++		hw_dbg("Half Duplex\n");
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_get_hw_semaphore - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ **/
++s32 igb_get_hw_semaphore(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 ret_val = 0;
++	s32 timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	/* Get the SW semaphore */
++	while (i < timeout) {
++		swsm = rd32(E1000_SWSM);
++		if (!(swsm & E1000_SWSM_SMBI))
++			break;
++
++		udelay(50);
++		i++;
++	}
++
++	if (i == timeout) {
++		hw_dbg("Driver can't access device - SMBI bit is set.\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	/* Get the FW semaphore. */
++	for (i = 0; i < timeout; i++) {
++		swsm = rd32(E1000_SWSM);
++		wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		udelay(50);
++	}
++
++	if (i == timeout) {
++		/* Release semaphores */
++		igb_put_hw_semaphore(hw);
++		hw_dbg("Driver can't access the NVM\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_put_hw_semaphore - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ **/
++void igb_put_hw_semaphore(struct e1000_hw *hw)
++{
++	u32 swsm;
++
++	swsm = rd32(E1000_SWSM);
++
++	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++
++	wr32(E1000_SWSM, swsm);
++}
++
++/**
++ *  igb_get_auto_rd_done - Check for auto read completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Check EEPROM for Auto Read done bit.
++ **/
++s32 igb_get_auto_rd_done(struct e1000_hw *hw)
++{
++	s32 i = 0;
++	s32 ret_val = 0;
++
++
++	while (i < AUTO_READ_DONE_TIMEOUT) {
++		if (rd32(E1000_EECD) & E1000_EECD_AUTO_RD)
++			break;
++		usleep_range(1000, 2000);
++		i++;
++	}
++
++	if (i == AUTO_READ_DONE_TIMEOUT) {
++		hw_dbg("Auto read by HW from NVM has not completed.\n");
++		ret_val = -E1000_ERR_RESET;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_valid_led_default - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++static s32 igb_valid_led_default(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
++		switch (hw->phy.media_type) {
++		case e1000_media_type_internal_serdes:
++			*data = ID_LED_DEFAULT_82575_SERDES;
++			break;
++		case e1000_media_type_copper:
++		default:
++			*data = ID_LED_DEFAULT;
++			break;
++		}
++	}
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_id_led_init -
++ *  @hw: pointer to the HW structure
++ *
++ **/
++s32 igb_id_led_init(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	const u32 ledctl_mask = 0x000000FF;
++	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
++	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
++	u16 data, i, temp;
++	const u16 led_mask = 0x0F;
++
++	/* i210 and i211 devices have different LED mechanism */
++	if ((hw->mac.type == e1000_i210) ||
++	    (hw->mac.type == e1000_i211))
++		ret_val = igb_valid_led_default_i210(hw, &data);
++	else
++		ret_val = igb_valid_led_default(hw, &data);
++
++	if (ret_val)
++		goto out;
++
++	mac->ledctl_default = rd32(E1000_LEDCTL);
++	mac->ledctl_mode1 = mac->ledctl_default;
++	mac->ledctl_mode2 = mac->ledctl_default;
++
++	for (i = 0; i < 4; i++) {
++		temp = (data >> (i << 2)) & led_mask;
++		switch (temp) {
++		case ID_LED_ON1_DEF2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_ON1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_OFF1_DEF2:
++		case ID_LED_OFF1_ON2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++		switch (temp) {
++		case ID_LED_DEF1_ON2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_OFF1_ON2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_DEF1_OFF2:
++		case ID_LED_ON1_OFF2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_cleanup_led - Set LED config to default operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Remove the current LED configuration and set the LED configuration
++ *  to the default value, saved from the EEPROM.
++ **/
++s32 igb_cleanup_led(struct e1000_hw *hw)
++{
++	wr32(E1000_LEDCTL, hw->mac.ledctl_default);
++	return 0;
++}
++
++/**
++ *  igb_blink_led - Blink LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Blink the led's which are set to be on.
++ **/
++s32 igb_blink_led(struct e1000_hw *hw)
++{
++	u32 ledctl_blink = 0;
++	u32 i;
++
++	if (hw->phy.media_type == e1000_media_type_fiber) {
++		/* always blink LED0 for PCI-E fiber */
++		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
++		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
++	} else {
++		/* Set the blink bit for each LED that's "on" (0x0E)
++		 * (or "off" if inverted) in ledctl_mode2.  The blink
++		 * logic in hardware only works when mode is set to "on"
++		 * so it must be changed accordingly when the mode is
++		 * "off" and inverted.
++		 */
++		ledctl_blink = hw->mac.ledctl_mode2;
++		for (i = 0; i < 32; i += 8) {
++			u32 mode = (hw->mac.ledctl_mode2 >> i) &
++			    E1000_LEDCTL_LED0_MODE_MASK;
++			u32 led_default = hw->mac.ledctl_default >> i;
++
++			if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
++			     (mode == E1000_LEDCTL_MODE_LED_ON)) ||
++			    ((led_default & E1000_LEDCTL_LED0_IVRT) &&
++			     (mode == E1000_LEDCTL_MODE_LED_OFF))) {
++				ledctl_blink &=
++				    ~(E1000_LEDCTL_LED0_MODE_MASK << i);
++				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
++						 E1000_LEDCTL_MODE_LED_ON) << i;
++			}
++		}
++	}
++
++	wr32(E1000_LEDCTL, ledctl_blink);
++
++	return 0;
++}
++
++/**
++ *  igb_led_off - Turn LED off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED off.
++ **/
++s32 igb_led_off(struct e1000_hw *hw)
++{
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		wr32(E1000_LEDCTL, hw->mac.ledctl_mode1);
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_disable_pcie_master - Disables PCI-express master access
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns 0 (0) if successful, else returns -10
++ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
++ *  the master requests to be disabled.
++ *
++ *  Disables PCI-Express master access and verifies there are no pending
++ *  requests.
++ **/
++s32 igb_disable_pcie_master(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 timeout = MASTER_DISABLE_TIMEOUT;
++	s32 ret_val = 0;
++
++	if (hw->bus.type != e1000_bus_type_pci_express)
++		goto out;
++
++	ctrl = rd32(E1000_CTRL);
++	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
++	wr32(E1000_CTRL, ctrl);
++
++	while (timeout) {
++		if (!(rd32(E1000_STATUS) &
++		      E1000_STATUS_GIO_MASTER_ENABLE))
++			break;
++		udelay(100);
++		timeout--;
++	}
++
++	if (!timeout) {
++		hw_dbg("Master requests are pending.\n");
++		ret_val = -E1000_ERR_MASTER_REQUESTS_PENDING;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_validate_mdi_setting - Verify MDI/MDIx settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify that when not using auto-negotitation that MDI/MDIx is correctly
++ *  set, which is forced to MDI mode only.
++ **/
++s32 igb_validate_mdi_setting(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	/* All MDI settings are supported on 82580 and newer. */
++	if (hw->mac.type >= e1000_82580)
++		goto out;
++
++	if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
++		hw_dbg("Invalid MDI setting detected\n");
++		hw->phy.mdix = 1;
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_8bit_ctrl_reg - Write a 8bit CTRL register
++ *  @hw: pointer to the HW structure
++ *  @reg: 32bit register offset such as E1000_SCTL
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes an address/data control type register.  There are several of these
++ *  and they all have the format address << 8 | data and bit 31 is polled for
++ *  completion.
++ **/
++s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
++			      u32 offset, u8 data)
++{
++	u32 i, regvalue = 0;
++	s32 ret_val = 0;
++
++	/* Set up the address and data */
++	regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
++	wr32(reg, regvalue);
++
++	/* Poll the ready bit to see if the MDI read completed */
++	for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
++		udelay(5);
++		regvalue = rd32(reg);
++		if (regvalue & E1000_GEN_CTL_READY)
++			break;
++	}
++	if (!(regvalue & E1000_GEN_CTL_READY)) {
++		hw_dbg("Reg %08x did not indicate ready\n", reg);
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_enable_mng_pass_thru - Enable processing of ARP's
++ *  @hw: pointer to the HW structure
++ *
++ *  Verifies the hardware needs to leave interface enabled so that frames can
++ *  be directed to and from the management interface.
++ **/
++bool igb_enable_mng_pass_thru(struct e1000_hw *hw)
++{
++	u32 manc;
++	u32 fwsm, factps;
++	bool ret_val = false;
++
++	if (!hw->mac.asf_firmware_present)
++		goto out;
++
++	manc = rd32(E1000_MANC);
++
++	if (!(manc & E1000_MANC_RCV_TCO_EN))
++		goto out;
++
++	if (hw->mac.arc_subsystem_valid) {
++		fwsm = rd32(E1000_FWSM);
++		factps = rd32(E1000_FACTPS);
++
++		if (!(factps & E1000_FACTPS_MNGCG) &&
++		    ((fwsm & E1000_FWSM_MODE_MASK) ==
++		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) {
++			ret_val = true;
++			goto out;
++		}
++	} else {
++		if ((manc & E1000_MANC_SMBUS_EN) &&
++		    !(manc & E1000_MANC_ASF_EN)) {
++			ret_val = true;
++			goto out;
++		}
++	}
++
++out:
++	return ret_val;
++}
+--- linux/drivers/xenomai/net/drivers/igb/e1000_mbx.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_mbx.c	2021-04-07 16:01:27.462633830 +0800
+@@ -0,0 +1,443 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#include "e1000_mbx.h"
++
++/**
++ *  igb_read_mbx - Reads a message from the mailbox
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @mbx_id: id of mailbox to read
++ *
++ *  returns SUCCESS if it successfully read message from buffer
++ **/
++s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	/* limit read to size of mailbox */
++	if (size > mbx->size)
++		size = mbx->size;
++
++	if (mbx->ops.read)
++		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
++
++	return ret_val;
++}
++
++/**
++ *  igb_write_mbx - Write a message to the mailbox
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @mbx_id: id of mailbox to write
++ *
++ *  returns SUCCESS if it successfully copied message into the buffer
++ **/
++s32 igb_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = 0;
++
++	if (size > mbx->size)
++		ret_val = -E1000_ERR_MBX;
++
++	else if (mbx->ops.write)
++		ret_val = mbx->ops.write(hw, msg, size, mbx_id);
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_msg - checks to see if someone sent us mail
++ *  @hw: pointer to the HW structure
++ *  @mbx_id: id of mailbox to check
++ *
++ *  returns SUCCESS if the Status bit was found or else ERR_MBX
++ **/
++s32 igb_check_for_msg(struct e1000_hw *hw, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (mbx->ops.check_for_msg)
++		ret_val = mbx->ops.check_for_msg(hw, mbx_id);
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_ack - checks to see if someone sent us ACK
++ *  @hw: pointer to the HW structure
++ *  @mbx_id: id of mailbox to check
++ *
++ *  returns SUCCESS if the Status bit was found or else ERR_MBX
++ **/
++s32 igb_check_for_ack(struct e1000_hw *hw, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (mbx->ops.check_for_ack)
++		ret_val = mbx->ops.check_for_ack(hw, mbx_id);
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_rst - checks to see if other side has reset
++ *  @hw: pointer to the HW structure
++ *  @mbx_id: id of mailbox to check
++ *
++ *  returns SUCCESS if the Status bit was found or else ERR_MBX
++ **/
++s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (mbx->ops.check_for_rst)
++		ret_val = mbx->ops.check_for_rst(hw, mbx_id);
++
++	return ret_val;
++}
++
++/**
++ *  igb_poll_for_msg - Wait for message notification
++ *  @hw: pointer to the HW structure
++ *  @mbx_id: id of mailbox to write
++ *
++ *  returns SUCCESS if it successfully received a message notification
++ **/
++static s32 igb_poll_for_msg(struct e1000_hw *hw, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	int countdown = mbx->timeout;
++
++	if (!countdown || !mbx->ops.check_for_msg)
++		goto out;
++
++	while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) {
++		countdown--;
++		if (!countdown)
++			break;
++		udelay(mbx->usec_delay);
++	}
++
++	/* if we failed, all future posted messages fail until reset */
++	if (!countdown)
++		mbx->timeout = 0;
++out:
++	return countdown ? 0 : -E1000_ERR_MBX;
++}
++
++/**
++ *  igb_poll_for_ack - Wait for message acknowledgement
++ *  @hw: pointer to the HW structure
++ *  @mbx_id: id of mailbox to write
++ *
++ *  returns SUCCESS if it successfully received a message acknowledgement
++ **/
++static s32 igb_poll_for_ack(struct e1000_hw *hw, u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	int countdown = mbx->timeout;
++
++	if (!countdown || !mbx->ops.check_for_ack)
++		goto out;
++
++	while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) {
++		countdown--;
++		if (!countdown)
++			break;
++		udelay(mbx->usec_delay);
++	}
++
++	/* if we failed, all future posted messages fail until reset */
++	if (!countdown)
++		mbx->timeout = 0;
++out:
++	return countdown ? 0 : -E1000_ERR_MBX;
++}
++
++/**
++ *  igb_read_posted_mbx - Wait for message notification and receive message
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @mbx_id: id of mailbox to write
++ *
++ *  returns SUCCESS if it successfully received a message notification and
++ *  copied it into the receive buffer.
++ **/
++static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size,
++			       u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (!mbx->ops.read)
++		goto out;
++
++	ret_val = igb_poll_for_msg(hw, mbx_id);
++
++	if (!ret_val)
++		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_posted_mbx - Write a message to the mailbox, wait for ack
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @mbx_id: id of mailbox to write
++ *
++ *  returns SUCCESS if it successfully copied message into the buffer and
++ *  received an ack to that message within delay * timeout period
++ **/
++static s32 igb_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size,
++				u16 mbx_id)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++	s32 ret_val = -E1000_ERR_MBX;
++
++	/* exit if either we can't write or there isn't a defined timeout */
++	if (!mbx->ops.write || !mbx->timeout)
++		goto out;
++
++	/* send msg */
++	ret_val = mbx->ops.write(hw, msg, size, mbx_id);
++
++	/* if msg sent wait until we receive an ack */
++	if (!ret_val)
++		ret_val = igb_poll_for_ack(hw, mbx_id);
++out:
++	return ret_val;
++}
++
++static s32 igb_check_for_bit_pf(struct e1000_hw *hw, u32 mask)
++{
++	u32 mbvficr = rd32(E1000_MBVFICR);
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (mbvficr & mask) {
++		ret_val = 0;
++		wr32(E1000_MBVFICR, mask);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_msg_pf - checks to see if the VF has sent mail
++ *  @hw: pointer to the HW structure
++ *  @vf_number: the VF index
++ *
++ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
++ **/
++static s32 igb_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number)
++{
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) {
++		ret_val = 0;
++		hw->mbx.stats.reqs++;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_ack_pf - checks to see if the VF has ACKed
++ *  @hw: pointer to the HW structure
++ *  @vf_number: the VF index
++ *
++ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
++ **/
++static s32 igb_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number)
++{
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) {
++		ret_val = 0;
++		hw->mbx.stats.acks++;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_check_for_rst_pf - checks to see if the VF has reset
++ *  @hw: pointer to the HW structure
++ *  @vf_number: the VF index
++ *
++ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
++ **/
++static s32 igb_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number)
++{
++	u32 vflre = rd32(E1000_VFLRE);
++	s32 ret_val = -E1000_ERR_MBX;
++
++	if (vflre & (1 << vf_number)) {
++		ret_val = 0;
++		wr32(E1000_VFLRE, (1 << vf_number));
++		hw->mbx.stats.rsts++;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_obtain_mbx_lock_pf - obtain mailbox lock
++ *  @hw: pointer to the HW structure
++ *  @vf_number: the VF index
++ *
++ *  return SUCCESS if we obtained the mailbox lock
++ **/
++static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
++{
++	s32 ret_val = -E1000_ERR_MBX;
++	u32 p2v_mailbox;
++
++	/* Take ownership of the buffer */
++	wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
++
++	/* reserve mailbox for vf use */
++	p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number));
++	if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
++		ret_val = 0;
++
++	return ret_val;
++}
++
++/**
++ *  igb_write_mbx_pf - Places a message in the mailbox
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @vf_number: the VF index
++ *
++ *  returns SUCCESS if it successfully copied message into the buffer
++ **/
++static s32 igb_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
++			    u16 vf_number)
++{
++	s32 ret_val;
++	u16 i;
++
++	/* lock the mailbox to prevent pf/vf race condition */
++	ret_val = igb_obtain_mbx_lock_pf(hw, vf_number);
++	if (ret_val)
++		goto out_no_write;
++
++	/* flush msg and acks as we are overwriting the message buffer */
++	igb_check_for_msg_pf(hw, vf_number);
++	igb_check_for_ack_pf(hw, vf_number);
++
++	/* copy the caller specified message to the mailbox memory buffer */
++	for (i = 0; i < size; i++)
++		array_wr32(E1000_VMBMEM(vf_number), i, msg[i]);
++
++	/* Interrupt VF to tell it a message has been sent and release buffer*/
++	wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS);
++
++	/* update stats */
++	hw->mbx.stats.msgs_tx++;
++
++out_no_write:
++	return ret_val;
++
++}
++
++/**
++ *  igb_read_mbx_pf - Read a message from the mailbox
++ *  @hw: pointer to the HW structure
++ *  @msg: The message buffer
++ *  @size: Length of buffer
++ *  @vf_number: the VF index
++ *
++ *  This function copies a message from the mailbox buffer to the caller's
++ *  memory buffer.  The presumption is that the caller knows that there was
++ *  a message due to a VF request so no polling for message is needed.
++ **/
++static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
++			   u16 vf_number)
++{
++	s32 ret_val;
++	u16 i;
++
++	/* lock the mailbox to prevent pf/vf race condition */
++	ret_val = igb_obtain_mbx_lock_pf(hw, vf_number);
++	if (ret_val)
++		goto out_no_read;
++
++	/* copy the message to the mailbox memory buffer */
++	for (i = 0; i < size; i++)
++		msg[i] = array_rd32(E1000_VMBMEM(vf_number), i);
++
++	/* Acknowledge the message and release buffer */
++	wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
++
++	/* update stats */
++	hw->mbx.stats.msgs_rx++;
++
++out_no_read:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_mbx_params_pf - set initial values for pf mailbox
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes the hw->mbx struct to correct values for pf mailbox
++ */
++s32 igb_init_mbx_params_pf(struct e1000_hw *hw)
++{
++	struct e1000_mbx_info *mbx = &hw->mbx;
++
++	mbx->timeout = 0;
++	mbx->usec_delay = 0;
++
++	mbx->size = E1000_VFMAILBOX_SIZE;
++
++	mbx->ops.read = igb_read_mbx_pf;
++	mbx->ops.write = igb_write_mbx_pf;
++	mbx->ops.read_posted = igb_read_posted_mbx;
++	mbx->ops.write_posted = igb_write_posted_mbx;
++	mbx->ops.check_for_msg = igb_check_for_msg_pf;
++	mbx->ops.check_for_ack = igb_check_for_ack_pf;
++	mbx->ops.check_for_rst = igb_check_for_rst_pf;
++
++	mbx->stats.msgs_tx = 0;
++	mbx->stats.msgs_rx = 0;
++	mbx->stats.reqs = 0;
++	mbx->stats.acks = 0;
++	mbx->stats.rsts = 0;
++
++	return 0;
++}
++
+--- linux/drivers/xenomai/net/drivers/igb/e1000_i210.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_i210.c	2021-04-07 16:01:27.457633838 +0800
+@@ -0,0 +1,902 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++/* e1000_i210
++ * e1000_i211
++ */
++
++#include <linux/types.h>
++#include <linux/if_ether.h>
++
++#include "e1000_hw.h"
++#include "e1000_i210.h"
++
++static s32 igb_update_flash_i210(struct e1000_hw *hw);
++
++/**
++ * igb_get_hw_semaphore_i210 - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ */
++static s32 igb_get_hw_semaphore_i210(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	/* Get the SW semaphore */
++	while (i < timeout) {
++		swsm = rd32(E1000_SWSM);
++		if (!(swsm & E1000_SWSM_SMBI))
++			break;
++
++		udelay(50);
++		i++;
++	}
++
++	if (i == timeout) {
++		/* In rare circumstances, the SW semaphore may already be held
++		 * unintentionally. Clear the semaphore once before giving up.
++		 */
++		if (hw->dev_spec._82575.clear_semaphore_once) {
++			hw->dev_spec._82575.clear_semaphore_once = false;
++			igb_put_hw_semaphore(hw);
++			for (i = 0; i < timeout; i++) {
++				swsm = rd32(E1000_SWSM);
++				if (!(swsm & E1000_SWSM_SMBI))
++					break;
++
++				udelay(50);
++			}
++		}
++
++		/* If we do not have the semaphore here, we have to give up. */
++		if (i == timeout) {
++			hw_dbg("Driver can't access device - SMBI bit is set.\n");
++			return -E1000_ERR_NVM;
++		}
++	}
++
++	/* Get the FW semaphore. */
++	for (i = 0; i < timeout; i++) {
++		swsm = rd32(E1000_SWSM);
++		wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		udelay(50);
++	}
++
++	if (i == timeout) {
++		/* Release semaphores */
++		igb_put_hw_semaphore(hw);
++		hw_dbg("Driver can't access the NVM\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  igb_acquire_nvm_i210 - Request for access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
++ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
++ *  Return successful if access grant bit set, else clear the request for
++ *  EEPROM access and return -E1000_ERR_NVM (-1).
++ **/
++static s32 igb_acquire_nvm_i210(struct e1000_hw *hw)
++{
++	return igb_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
++}
++
++/**
++ *  igb_release_nvm_i210 - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
++ *  then release the semaphores acquired.
++ **/
++static void igb_release_nvm_i210(struct e1000_hw *hw)
++{
++	igb_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
++}
++
++/**
++ *  igb_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
++ *  will also specify which port we're acquiring the lock for.
++ **/
++s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++	u32 swmask = mask;
++	u32 fwmask = mask << 16;
++	s32 ret_val = 0;
++	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
++
++	while (i < timeout) {
++		if (igb_get_hw_semaphore_i210(hw)) {
++			ret_val = -E1000_ERR_SWFW_SYNC;
++			goto out;
++		}
++
++		swfw_sync = rd32(E1000_SW_FW_SYNC);
++		if (!(swfw_sync & (fwmask | swmask)))
++			break;
++
++		/* Firmware currently using resource (fwmask) */
++		igb_put_hw_semaphore(hw);
++		mdelay(5);
++		i++;
++	}
++
++	if (i == timeout) {
++		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
++		ret_val = -E1000_ERR_SWFW_SYNC;
++		goto out;
++	}
++
++	swfw_sync |= swmask;
++	wr32(E1000_SW_FW_SYNC, swfw_sync);
++
++	igb_put_hw_semaphore(hw);
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_release_swfw_sync_i210 - Release SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
++ *  will also specify which port we're releasing the lock for.
++ **/
++void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++
++	while (igb_get_hw_semaphore_i210(hw))
++		; /* Empty */
++
++	swfw_sync = rd32(E1000_SW_FW_SYNC);
++	swfw_sync &= ~mask;
++	wr32(E1000_SW_FW_SYNC, swfw_sync);
++
++	igb_put_hw_semaphore(hw);
++}
++
++/**
++ *  igb_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the Shadow Ram to read
++ *  @words: number of words to read
++ *  @data: word read from the Shadow Ram
++ *
++ *  Reads a 16 bit word from the Shadow Ram using the EERD register.
++ *  Uses necessary synchronization semaphores.
++ **/
++static s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
++				  u16 *data)
++{
++	s32 status = 0;
++	u16 i, count;
++
++	/* We cannot hold synchronization semaphores for too long,
++	 * because of forceful takeover procedure. However it is more efficient
++	 * to read in bursts than synchronizing access for each word.
++	 */
++	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
++		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
++			E1000_EERD_EEWR_MAX_COUNT : (words - i);
++		if (!(hw->nvm.ops.acquire(hw))) {
++			status = igb_read_nvm_eerd(hw, offset, count,
++						     data + i);
++			hw->nvm.ops.release(hw);
++		} else {
++			status = E1000_ERR_SWFW_SYNC;
++		}
++
++		if (status)
++			break;
++	}
++
++	return status;
++}
++
++/**
++ *  igb_write_nvm_srwr - Write to Shadow Ram using EEWR
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the Shadow Ram to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the Shadow Ram
++ *
++ *  Writes data to Shadow Ram at offset using EEWR register.
++ *
++ *  If igb_update_nvm_checksum is not called after this function , the
++ *  Shadow Ram will most likely contain an invalid checksum.
++ **/
++static s32 igb_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
++				u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, k, eewr = 0;
++	u32 attempts = 100000;
++	s32 ret_val = 0;
++
++	/* A check for invalid values:  offset too large, too many words,
++	 * too many words for the offset, and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		hw_dbg("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	for (i = 0; i < words; i++) {
++		eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
++			(data[i] << E1000_NVM_RW_REG_DATA) |
++			E1000_NVM_RW_REG_START;
++
++		wr32(E1000_SRWR, eewr);
++
++		for (k = 0; k < attempts; k++) {
++			if (E1000_NVM_RW_REG_DONE &
++			    rd32(E1000_SRWR)) {
++				ret_val = 0;
++				break;
++			}
++			udelay(5);
++	}
++
++		if (ret_val) {
++			hw_dbg("Shadow RAM write EEWR timed out\n");
++			break;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the Shadow RAM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the Shadow RAM
++ *
++ *  Writes data to Shadow RAM at offset using EEWR register.
++ *
++ *  If e1000_update_nvm_checksum is not called after this function , the
++ *  data will not be committed to FLASH and also Shadow RAM will most likely
++ *  contain an invalid checksum.
++ *
++ *  If error code is returned, data and Shadow RAM may be inconsistent - buffer
++ *  partially written.
++ **/
++static s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
++				   u16 *data)
++{
++	s32 status = 0;
++	u16 i, count;
++
++	/* We cannot hold synchronization semaphores for too long,
++	 * because of forceful takeover procedure. However it is more efficient
++	 * to write in bursts than synchronizing access for each word.
++	 */
++	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
++		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
++			E1000_EERD_EEWR_MAX_COUNT : (words - i);
++		if (!(hw->nvm.ops.acquire(hw))) {
++			status = igb_write_nvm_srwr(hw, offset, count,
++						      data + i);
++			hw->nvm.ops.release(hw);
++		} else {
++			status = E1000_ERR_SWFW_SYNC;
++		}
++
++		if (status)
++			break;
++	}
++
++	return status;
++}
++
++/**
++ *  igb_read_invm_word_i210 - Reads OTP
++ *  @hw: pointer to the HW structure
++ *  @address: the word address (aka eeprom offset) to read
++ *  @data: pointer to the data read
++ *
++ *  Reads 16-bit words from the OTP. Return error when the word is not
++ *  stored in OTP.
++ **/
++static s32 igb_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
++{
++	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
++	u32 invm_dword;
++	u16 i;
++	u8 record_type, word_address;
++
++	for (i = 0; i < E1000_INVM_SIZE; i++) {
++		invm_dword = rd32(E1000_INVM_DATA_REG(i));
++		/* Get record type */
++		record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
++		if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
++			break;
++		if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
++			i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
++		if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
++			i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
++		if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
++			word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
++			if (word_address == address) {
++				*data = INVM_DWORD_TO_WORD_DATA(invm_dword);
++				hw_dbg("Read INVM Word 0x%02x = %x\n",
++					  address, *data);
++				status = 0;
++				break;
++			}
++		}
++	}
++	if (status)
++		hw_dbg("Requested word 0x%02x not found in OTP\n", address);
++	return status;
++}
++
++/**
++ * igb_read_invm_i210 - Read invm wrapper function for I210/I211
++ *  @hw: pointer to the HW structure
++ *  @words: number of words to read
++ *  @data: pointer to the data read
++ *
++ *  Wrapper function to return data formerly found in the NVM.
++ **/
++static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset,
++				u16 words __always_unused, u16 *data)
++{
++	s32 ret_val = 0;
++
++	/* Only the MAC addr is required to be present in the iNVM */
++	switch (offset) {
++	case NVM_MAC_ADDR:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, &data[0]);
++		ret_val |= igb_read_invm_word_i210(hw, (u8)offset+1,
++						     &data[1]);
++		ret_val |= igb_read_invm_word_i210(hw, (u8)offset+2,
++						     &data[2]);
++		if (ret_val)
++			hw_dbg("MAC Addr not found in iNVM\n");
++		break;
++	case NVM_INIT_CTRL_2:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
++		if (ret_val) {
++			*data = NVM_INIT_CTRL_2_DEFAULT_I211;
++			ret_val = 0;
++		}
++		break;
++	case NVM_INIT_CTRL_4:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
++		if (ret_val) {
++			*data = NVM_INIT_CTRL_4_DEFAULT_I211;
++			ret_val = 0;
++		}
++		break;
++	case NVM_LED_1_CFG:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
++		if (ret_val) {
++			*data = NVM_LED_1_CFG_DEFAULT_I211;
++			ret_val = 0;
++		}
++		break;
++	case NVM_LED_0_2_CFG:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
++		if (ret_val) {
++			*data = NVM_LED_0_2_CFG_DEFAULT_I211;
++			ret_val = 0;
++		}
++		break;
++	case NVM_ID_LED_SETTINGS:
++		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
++		if (ret_val) {
++			*data = ID_LED_RESERVED_FFFF;
++			ret_val = 0;
++		}
++		break;
++	case NVM_SUB_DEV_ID:
++		*data = hw->subsystem_device_id;
++		break;
++	case NVM_SUB_VEN_ID:
++		*data = hw->subsystem_vendor_id;
++		break;
++	case NVM_DEV_ID:
++		*data = hw->device_id;
++		break;
++	case NVM_VEN_ID:
++		*data = hw->vendor_id;
++		break;
++	default:
++		hw_dbg("NVM word 0x%02x is not mapped.\n", offset);
++		*data = NVM_RESERVED_WORD;
++		break;
++	}
++	return ret_val;
++}
++
++/**
++ *  igb_read_invm_version - Reads iNVM version and image type
++ *  @hw: pointer to the HW structure
++ *  @invm_ver: version structure for the version read
++ *
++ *  Reads iNVM version and image type.
++ **/
++s32 igb_read_invm_version(struct e1000_hw *hw,
++			  struct e1000_fw_version *invm_ver) {
++	u32 *record = NULL;
++	u32 *next_record = NULL;
++	u32 i = 0;
++	u32 invm_dword = 0;
++	u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE /
++					     E1000_INVM_RECORD_SIZE_IN_BYTES);
++	u32 buffer[E1000_INVM_SIZE];
++	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
++	u16 version = 0;
++
++	/* Read iNVM memory */
++	for (i = 0; i < E1000_INVM_SIZE; i++) {
++		invm_dword = rd32(E1000_INVM_DATA_REG(i));
++		buffer[i] = invm_dword;
++	}
++
++	/* Read version number */
++	for (i = 1; i < invm_blocks; i++) {
++		record = &buffer[invm_blocks - i];
++		next_record = &buffer[invm_blocks - i + 1];
++
++		/* Check if we have first version location used */
++		if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
++			version = 0;
++			status = 0;
++			break;
++		}
++		/* Check if we have second version location used */
++		else if ((i == 1) &&
++			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
++			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++			status = 0;
++			break;
++		}
++		/* Check if we have odd version location
++		 * used and it is the last one used
++		 */
++		else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
++			 ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
++			 (i != 1))) {
++			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
++				  >> 13;
++			status = 0;
++			break;
++		}
++		/* Check if we have even version location
++		 * used and it is the last one used
++		 */
++		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
++			 ((*record & 0x3) == 0)) {
++			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
++			status = 0;
++			break;
++		}
++	}
++
++	if (!status) {
++		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
++					>> E1000_INVM_MAJOR_SHIFT;
++		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
++	}
++	/* Read Image Type */
++	for (i = 1; i < invm_blocks; i++) {
++		record = &buffer[invm_blocks - i];
++		next_record = &buffer[invm_blocks - i + 1];
++
++		/* Check if we have image type in first location used */
++		if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
++			invm_ver->invm_img_type = 0;
++			status = 0;
++			break;
++		}
++		/* Check if we have image type in first location used */
++		else if ((((*record & 0x3) == 0) &&
++			 ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
++			 ((((*record & 0x3) != 0) && (i != 1)))) {
++			invm_ver->invm_img_type =
++				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
++			status = 0;
++			break;
++		}
++	}
++	return status;
++}
++
++/**
++ *  igb_validate_nvm_checksum_i210 - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++static s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw)
++{
++	s32 status = 0;
++	s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
++
++	if (!(hw->nvm.ops.acquire(hw))) {
++
++		/* Replace the read function with semaphore grabbing with
++		 * the one that skips this for a while.
++		 * We have semaphore taken already here.
++		 */
++		read_op_ptr = hw->nvm.ops.read;
++		hw->nvm.ops.read = igb_read_nvm_eerd;
++
++		status = igb_validate_nvm_checksum(hw);
++
++		/* Revert original read operation. */
++		hw->nvm.ops.read = read_op_ptr;
++
++		hw->nvm.ops.release(hw);
++	} else {
++		status = E1000_ERR_SWFW_SYNC;
++	}
++
++	return status;
++}
++
++/**
++ *  igb_update_nvm_checksum_i210 - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM. Next commit EEPROM data onto the Flash.
++ **/
++static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	/* Read the first word from the EEPROM. If this times out or fails, do
++	 * not continue or we could be in for a very long wait while every
++	 * EEPROM read fails
++	 */
++	ret_val = igb_read_nvm_eerd(hw, 0, 1, &nvm_data);
++	if (ret_val) {
++		hw_dbg("EEPROM read failed\n");
++		goto out;
++	}
++
++	if (!(hw->nvm.ops.acquire(hw))) {
++		/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
++		 * because we do not want to take the synchronization
++		 * semaphores twice here.
++		 */
++
++		for (i = 0; i < NVM_CHECKSUM_REG; i++) {
++			ret_val = igb_read_nvm_eerd(hw, i, 1, &nvm_data);
++			if (ret_val) {
++				hw->nvm.ops.release(hw);
++				hw_dbg("NVM Read Error while updating checksum.\n");
++				goto out;
++			}
++			checksum += nvm_data;
++		}
++		checksum = (u16) NVM_SUM - checksum;
++		ret_val = igb_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
++						&checksum);
++		if (ret_val) {
++			hw->nvm.ops.release(hw);
++			hw_dbg("NVM Write Error while updating checksum.\n");
++			goto out;
++		}
++
++		hw->nvm.ops.release(hw);
++
++		ret_val = igb_update_flash_i210(hw);
++	} else {
++		ret_val = -E1000_ERR_SWFW_SYNC;
++	}
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_pool_flash_update_done_i210 - Pool FLUDONE status.
++ *  @hw: pointer to the HW structure
++ *
++ **/
++static s32 igb_pool_flash_update_done_i210(struct e1000_hw *hw)
++{
++	s32 ret_val = -E1000_ERR_NVM;
++	u32 i, reg;
++
++	for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
++		reg = rd32(E1000_EECD);
++		if (reg & E1000_EECD_FLUDONE_I210) {
++			ret_val = 0;
++			break;
++		}
++		udelay(5);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  igb_get_flash_presence_i210 - Check if flash device is detected.
++ *  @hw: pointer to the HW structure
++ *
++ **/
++bool igb_get_flash_presence_i210(struct e1000_hw *hw)
++{
++	u32 eec = 0;
++	bool ret_val = false;
++
++	eec = rd32(E1000_EECD);
++	if (eec & E1000_EECD_FLASH_DETECTED_I210)
++		ret_val = true;
++
++	return ret_val;
++}
++
++/**
++ *  igb_update_flash_i210 - Commit EEPROM to the flash
++ *  @hw: pointer to the HW structure
++ *
++ **/
++static s32 igb_update_flash_i210(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u32 flup;
++
++	ret_val = igb_pool_flash_update_done_i210(hw);
++	if (ret_val == -E1000_ERR_NVM) {
++		hw_dbg("Flash update time out\n");
++		goto out;
++	}
++
++	flup = rd32(E1000_EECD) | E1000_EECD_FLUPD_I210;
++	wr32(E1000_EECD, flup);
++
++	ret_val = igb_pool_flash_update_done_i210(hw);
++	if (ret_val)
++		hw_dbg("Flash update complete\n");
++	else
++		hw_dbg("Flash update time out\n");
++
++out:
++	return ret_val;
++}
++
++/**
++ *  igb_valid_led_default_i210 - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		hw_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
++		switch (hw->phy.media_type) {
++		case e1000_media_type_internal_serdes:
++			*data = ID_LED_DEFAULT_I210_SERDES;
++			break;
++		case e1000_media_type_copper:
++		default:
++			*data = ID_LED_DEFAULT_I210;
++			break;
++		}
++	}
++out:
++	return ret_val;
++}
++
++/**
++ *  __igb_access_xmdio_reg - Read/write XMDIO register
++ *  @hw: pointer to the HW structure
++ *  @address: XMDIO address to program
++ *  @dev_addr: device address to program
++ *  @data: pointer to value to read/write from/to the XMDIO address
++ *  @read: boolean flag to indicate read or write
++ **/
++static s32 __igb_access_xmdio_reg(struct e1000_hw *hw, u16 address,
++				  u8 dev_addr, u16 *data, bool read)
++{
++	s32 ret_val = 0;
++
++	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA |
++							 dev_addr);
++	if (ret_val)
++		return ret_val;
++
++	if (read)
++		ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data);
++	else
++		ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data);
++	if (ret_val)
++		return ret_val;
++
++	/* Recalibrate the device back to 0 */
++	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0);
++	if (ret_val)
++		return ret_val;
++
++	return ret_val;
++}
++
++/**
++ *  igb_read_xmdio_reg - Read XMDIO register
++ *  @hw: pointer to the HW structure
++ *  @addr: XMDIO address to program
++ *  @dev_addr: device address to program
++ *  @data: value to be read from the EMI address
++ **/
++s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data)
++{
++	return __igb_access_xmdio_reg(hw, addr, dev_addr, data, true);
++}
++
++/**
++ *  igb_write_xmdio_reg - Write XMDIO register
++ *  @hw: pointer to the HW structure
++ *  @addr: XMDIO address to program
++ *  @dev_addr: device address to program
++ *  @data: value to be written to the XMDIO address
++ **/
++s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data)
++{
++	return __igb_access_xmdio_reg(hw, addr, dev_addr, &data, false);
++}
++
++/**
++ *  igb_init_nvm_params_i210 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++s32 igb_init_nvm_params_i210(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	struct e1000_nvm_info *nvm = &hw->nvm;
++
++	nvm->ops.acquire = igb_acquire_nvm_i210;
++	nvm->ops.release = igb_release_nvm_i210;
++	nvm->ops.valid_led_default = igb_valid_led_default_i210;
++
++	/* NVM Function Pointers */
++	if (igb_get_flash_presence_i210(hw)) {
++		hw->nvm.type = e1000_nvm_flash_hw;
++		nvm->ops.read    = igb_read_nvm_srrd_i210;
++		nvm->ops.write   = igb_write_nvm_srwr_i210;
++		nvm->ops.validate = igb_validate_nvm_checksum_i210;
++		nvm->ops.update   = igb_update_nvm_checksum_i210;
++	} else {
++		hw->nvm.type = e1000_nvm_invm;
++		nvm->ops.read     = igb_read_invm_i210;
++		nvm->ops.write    = NULL;
++		nvm->ops.validate = NULL;
++		nvm->ops.update   = NULL;
++	}
++	return ret_val;
++}
++
++/**
++ * igb_pll_workaround_i210
++ * @hw: pointer to the HW structure
++ *
++ * Works around an errata in the PLL circuit where it occasionally
++ * provides the wrong clock frequency after power up.
++ **/
++s32 igb_pll_workaround_i210(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val;
++	u16 nvm_word, phy_word, pci_word, tmp_nvm;
++	int i;
++
++	/* Get and set needed register values */
++	wuc = rd32(E1000_WUC);
++	mdicnfg = rd32(E1000_MDICNFG);
++	reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO;
++	wr32(E1000_MDICNFG, reg_val);
++
++	/* Get data from NVM, or set default */
++	ret_val = igb_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD,
++					  &nvm_word);
++	if (ret_val)
++		nvm_word = E1000_INVM_DEFAULT_AL;
++	tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
++	for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
++		/* check current state directly from internal PHY */
++		igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE |
++					 E1000_PHY_PLL_FREQ_REG), &phy_word);
++		if ((phy_word & E1000_PHY_PLL_UNCONF)
++		    != E1000_PHY_PLL_UNCONF) {
++			ret_val = 0;
++			break;
++		} else {
++			ret_val = -E1000_ERR_PHY;
++		}
++		/* directly reset the internal PHY */
++		ctrl = rd32(E1000_CTRL);
++		wr32(E1000_CTRL, ctrl|E1000_CTRL_PHY_RST);
++
++		ctrl_ext = rd32(E1000_CTRL_EXT);
++		ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE);
++		wr32(E1000_CTRL_EXT, ctrl_ext);
++
++		wr32(E1000_WUC, 0);
++		reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16);
++		wr32(E1000_EEARBC_I210, reg_val);
++
++		igb_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
++		pci_word |= E1000_PCI_PMCSR_D3;
++		igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
++		usleep_range(1000, 2000);
++		pci_word &= ~E1000_PCI_PMCSR_D3;
++		igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
++		reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16);
++		wr32(E1000_EEARBC_I210, reg_val);
++
++		/* restore WUC register */
++		wr32(E1000_WUC, wuc);
++	}
++	/* restore MDICNFG setting */
++	wr32(E1000_MDICNFG, mdicnfg);
++	return ret_val;
++}
+--- linux/drivers/xenomai/net/drivers/igb/e1000_defines.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/e1000_defines.h	2021-04-07 16:01:27.452633845 +0800
+@@ -0,0 +1,1018 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2014 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#ifndef _E1000_DEFINES_H_
++#define _E1000_DEFINES_H_
++
++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
++#define REQ_TX_DESCRIPTOR_MULTIPLE  8
++#define REQ_RX_DESCRIPTOR_MULTIPLE  8
++
++/* Definitions for power management and wakeup registers */
++/* Wake Up Control */
++#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
++
++/* Wake Up Filter Control */
++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
++#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
++#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
++#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
++#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
++
++/* Extended Device Control */
++#define E1000_CTRL_EXT_SDP2_DATA 0x00000040 /* Value of SW Defineable Pin 2 */
++#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Defineable Pin 3 */
++#define E1000_CTRL_EXT_SDP2_DIR  0x00000400 /* SDP2 Data direction */
++#define E1000_CTRL_EXT_SDP3_DIR  0x00000800 /* SDP3 Data direction */
++
++/* Physical Func Reset Done Indication */
++#define E1000_CTRL_EXT_PFRSTD	0x00004000
++#define E1000_CTRL_EXT_SDLPE	0X00040000  /* SerDes Low Power Enable */
++#define E1000_CTRL_EXT_LINK_MODE_MASK	0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES	0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX	0x00400000
++#define E1000_CTRL_EXT_LINK_MODE_SGMII	0x00800000
++#define E1000_CTRL_EXT_LINK_MODE_GMII	0x00000000
++#define E1000_CTRL_EXT_EIAME	0x01000000
++#define E1000_CTRL_EXT_IRCA		0x00000001
++/* Interrupt delay cancellation */
++/* Driver loaded bit for FW */
++#define E1000_CTRL_EXT_DRV_LOAD       0x10000000
++/* Interrupt acknowledge Auto-mask */
++/* Clear Interrupt timers after IMS clear */
++/* packet buffer parity error detection enabled */
++/* descriptor FIFO parity error detection enable */
++#define E1000_CTRL_EXT_PBA_CLR		0x80000000 /* PBA Clear */
++#define E1000_CTRL_EXT_PHYPDEN		0x00100000
++#define E1000_I2CCMD_REG_ADDR_SHIFT	16
++#define E1000_I2CCMD_PHY_ADDR_SHIFT	24
++#define E1000_I2CCMD_OPCODE_READ	0x08000000
++#define E1000_I2CCMD_OPCODE_WRITE	0x00000000
++#define E1000_I2CCMD_READY		0x20000000
++#define E1000_I2CCMD_ERROR		0x80000000
++#define E1000_I2CCMD_SFP_DATA_ADDR(a)	(0x0000 + (a))
++#define E1000_I2CCMD_SFP_DIAG_ADDR(a)	(0x0100 + (a))
++#define E1000_MAX_SGMII_PHY_REG_ADDR	255
++#define E1000_I2CCMD_PHY_TIMEOUT	200
++#define E1000_IVAR_VALID		0x80
++#define E1000_GPIE_NSICR		0x00000001
++#define E1000_GPIE_MSIX_MODE		0x00000010
++#define E1000_GPIE_EIAME		0x40000000
++#define E1000_GPIE_PBA			0x80000000
++
++/* Receive Descriptor bit definitions */
++#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
++#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
++#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
++#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
++#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
++#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
++#define E1000_RXD_STAT_TS       0x10000 /* Pkt was time stamped */
++
++#define E1000_RXDEXT_STATERR_LB    0x00040000
++#define E1000_RXDEXT_STATERR_CE    0x01000000
++#define E1000_RXDEXT_STATERR_SE    0x02000000
++#define E1000_RXDEXT_STATERR_SEQ   0x04000000
++#define E1000_RXDEXT_STATERR_CXE   0x10000000
++#define E1000_RXDEXT_STATERR_TCPE  0x20000000
++#define E1000_RXDEXT_STATERR_IPE   0x40000000
++#define E1000_RXDEXT_STATERR_RXE   0x80000000
++
++/* Same mask, but for extended and packet split descriptors */
++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
++	E1000_RXDEXT_STATERR_CE  |            \
++	E1000_RXDEXT_STATERR_SE  |            \
++	E1000_RXDEXT_STATERR_SEQ |            \
++	E1000_RXDEXT_STATERR_CXE |            \
++	E1000_RXDEXT_STATERR_RXE)
++
++#define E1000_MRQC_RSS_FIELD_IPV4_TCP          0x00010000
++#define E1000_MRQC_RSS_FIELD_IPV4              0x00020000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX       0x00040000
++#define E1000_MRQC_RSS_FIELD_IPV6              0x00100000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP          0x00200000
++
++
++/* Management Control */
++#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
++#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
++#define E1000_MANC_EN_BMC2OS     0x10000000 /* OSBMC is Enabled or not */
++/* Enable Neighbor Discovery Filtering */
++#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
++#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
++/* Enable MAC address filtering */
++#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000
++
++/* Receive Control */
++#define E1000_RCTL_EN             0x00000002    /* enable */
++#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
++#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
++#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
++#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
++#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
++#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
++#define E1000_RCTL_RDMTS_HALF     0x00000000    /* rx desc min threshold size */
++#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
++#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
++#define E1000_RCTL_SZ_512         0x00020000    /* rx buffer size 512 */
++#define E1000_RCTL_SZ_256         0x00030000    /* rx buffer size 256 */
++#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
++#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
++#define E1000_RCTL_DPF            0x00400000    /* Discard Pause Frames */
++#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
++#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
++
++/* Use byte values for the following shift parameters
++ * Usage:
++ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE0_MASK) |
++ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE1_MASK) |
++ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE2_MASK) |
++ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
++ *                  E1000_PSRCTL_BSIZE3_MASK))
++ * where value0 = [128..16256],  default=256
++ *       value1 = [1024..64512], default=4096
++ *       value2 = [0..64512],    default=4096
++ *       value3 = [0..64512],    default=0
++ */
++
++#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
++#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
++#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
++#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
++
++#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
++#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
++#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
++#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
++
++/* SWFW_SYNC Definitions */
++#define E1000_SWFW_EEP_SM   0x1
++#define E1000_SWFW_PHY0_SM  0x2
++#define E1000_SWFW_PHY1_SM  0x4
++#define E1000_SWFW_PHY2_SM  0x20
++#define E1000_SWFW_PHY3_SM  0x40
++
++/* FACTPS Definitions */
++/* Device Control */
++#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
++#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
++#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
++#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
++#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
++#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
++#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
++#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
++#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
++#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
++/* Defined polarity of Dock/Undock indication in SDP[0] */
++/* Reset both PHY ports, through PHYRST_N pin */
++/* enable link status from external LINK_0 and LINK_1 pins */
++#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
++#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
++#define E1000_CTRL_SDP0_DIR 0x00400000  /* SDP0 Data direction */
++#define E1000_CTRL_SDP1_DIR 0x00800000  /* SDP1 Data direction */
++#define E1000_CTRL_RST      0x04000000  /* Global reset */
++#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
++#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
++#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
++#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
++/* Initiate an interrupt to manageability engine */
++#define E1000_CTRL_I2C_ENA  0x02000000  /* I2C enable */
++
++/* Bit definitions for the Management Data IO (MDIO) and Management Data
++ * Clock (MDC) pins in the Device Control Register.
++ */
++
++#define E1000_CONNSW_ENRGSRC             0x4
++#define E1000_CONNSW_PHYSD		0x400
++#define E1000_CONNSW_PHY_PDN		0x800
++#define E1000_CONNSW_SERDESD		0x200
++#define E1000_CONNSW_AUTOSENSE_CONF	0x2
++#define E1000_CONNSW_AUTOSENSE_EN	0x1
++#define E1000_PCS_CFG_PCS_EN             8
++#define E1000_PCS_LCTL_FLV_LINK_UP       1
++#define E1000_PCS_LCTL_FSV_100           2
++#define E1000_PCS_LCTL_FSV_1000          4
++#define E1000_PCS_LCTL_FDV_FULL          8
++#define E1000_PCS_LCTL_FSD               0x10
++#define E1000_PCS_LCTL_FORCE_LINK        0x20
++#define E1000_PCS_LCTL_FORCE_FCTRL       0x80
++#define E1000_PCS_LCTL_AN_ENABLE         0x10000
++#define E1000_PCS_LCTL_AN_RESTART        0x20000
++#define E1000_PCS_LCTL_AN_TIMEOUT        0x40000
++#define E1000_ENABLE_SERDES_LOOPBACK     0x0410
++
++#define E1000_PCS_LSTS_LINK_OK           1
++#define E1000_PCS_LSTS_SPEED_100         2
++#define E1000_PCS_LSTS_SPEED_1000        4
++#define E1000_PCS_LSTS_DUPLEX_FULL       8
++#define E1000_PCS_LSTS_SYNK_OK           0x10
++
++/* Device Status */
++#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
++#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
++#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
++#define E1000_STATUS_FUNC_SHIFT 2
++#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
++#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
++#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
++#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
++/* Change in Dock/Undock state. Clear on write '0'. */
++/* Status of Master requests. */
++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000
++/* BMC external code execution disabled */
++
++#define E1000_STATUS_2P5_SKU		0x00001000 /* Val of 2.5GBE SKU strap */
++#define E1000_STATUS_2P5_SKU_OVER	0x00002000 /* Val of 2.5GBE SKU Over */
++/* Constants used to intrepret the masked PCI-X bus speed. */
++
++#define SPEED_10    10
++#define SPEED_100   100
++#define SPEED_1000  1000
++#define SPEED_2500  2500
++#define HALF_DUPLEX 1
++#define FULL_DUPLEX 2
++
++
++#define ADVERTISE_10_HALF                 0x0001
++#define ADVERTISE_10_FULL                 0x0002
++#define ADVERTISE_100_HALF                0x0004
++#define ADVERTISE_100_FULL                0x0008
++#define ADVERTISE_1000_HALF               0x0010 /* Not used, just FYI */
++#define ADVERTISE_1000_FULL               0x0020
++
++/* 1000/H is not supported, nor spec-compliant. */
++#define E1000_ALL_SPEED_DUPLEX (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL | \
++				ADVERTISE_100_HALF |  ADVERTISE_100_FULL | \
++						      ADVERTISE_1000_FULL)
++#define E1000_ALL_NOT_GIG      (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL | \
++				ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_100_SPEED    (ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_10_SPEED     (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL)
++#define E1000_ALL_FULL_DUPLEX  (ADVERTISE_10_FULL  |  ADVERTISE_100_FULL | \
++						      ADVERTISE_1000_FULL)
++#define E1000_ALL_HALF_DUPLEX  (ADVERTISE_10_HALF  |  ADVERTISE_100_HALF)
++
++#define AUTONEG_ADVERTISE_SPEED_DEFAULT   E1000_ALL_SPEED_DUPLEX
++
++/* LED Control */
++#define E1000_LEDCTL_LED0_MODE_SHIFT	0
++#define E1000_LEDCTL_LED0_BLINK		0x00000080
++#define E1000_LEDCTL_LED0_MODE_MASK	0x0000000F
++#define E1000_LEDCTL_LED0_IVRT		0x00000040
++
++#define E1000_LEDCTL_MODE_LED_ON        0xE
++#define E1000_LEDCTL_MODE_LED_OFF       0xF
++
++/* Transmit Descriptor bit definitions */
++#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
++#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
++#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
++#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
++#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
++#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
++/* Extended desc bits for Linksec and timesync */
++
++/* Transmit Control */
++#define E1000_TCTL_EN     0x00000002    /* enable tx */
++#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
++#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
++#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
++#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
++
++/* DMA Coalescing register fields */
++#define E1000_DMACR_DMACWT_MASK         0x00003FFF /* DMA Coal Watchdog Timer */
++#define E1000_DMACR_DMACTHR_MASK        0x00FF0000 /* DMA Coal Rx Threshold */
++#define E1000_DMACR_DMACTHR_SHIFT       16
++#define E1000_DMACR_DMAC_LX_MASK        0x30000000 /* Lx when no PCIe trans */
++#define E1000_DMACR_DMAC_LX_SHIFT       28
++#define E1000_DMACR_DMAC_EN             0x80000000 /* Enable DMA Coalescing */
++/* DMA Coalescing BMC-to-OS Watchdog Enable */
++#define E1000_DMACR_DC_BMC2OSW_EN	0x00008000
++
++#define E1000_DMCTXTH_DMCTTHR_MASK      0x00000FFF /* DMA Coal Tx Threshold */
++
++#define E1000_DMCTLX_TTLX_MASK          0x00000FFF /* Time to LX request */
++
++#define E1000_DMCRTRH_UTRESH_MASK       0x0007FFFF /* Rx Traffic Rate Thresh */
++#define E1000_DMCRTRH_LRPRCW            0x80000000 /* Rx pkt rate curr window */
++
++#define E1000_DMCCNT_CCOUNT_MASK        0x01FFFFFF /* DMA Coal Rx Current Cnt */
++
++#define E1000_FCRTC_RTH_COAL_MASK       0x0003FFF0 /* FC Rx Thresh High val */
++#define E1000_FCRTC_RTH_COAL_SHIFT      4
++#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision */
++
++/* Timestamp in Rx buffer */
++#define E1000_RXPBS_CFG_TS_EN           0x80000000
++
++#define I210_RXPBSIZE_DEFAULT		0x000000A2 /* RXPBSIZE default */
++#define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
++
++/* SerDes Control */
++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
++
++/* Receive Checksum Control */
++#define E1000_RXCSUM_IPOFL     0x00000100   /* IPv4 checksum offload */
++#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
++#define E1000_RXCSUM_CRCOFL    0x00000800   /* CRC32 offload enable */
++#define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
++
++/* Header split receive */
++#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
++#define E1000_RFCTL_LEF                 0x00040000
++
++/* Collision related configuration parameters */
++#define E1000_COLLISION_THRESHOLD       15
++#define E1000_CT_SHIFT                  4
++#define E1000_COLLISION_DISTANCE        63
++#define E1000_COLD_SHIFT                12
++
++/* Ethertype field values */
++#define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
++
++#define MAX_JUMBO_FRAME_SIZE    0x3F00
++
++/* PBA constants */
++#define E1000_PBA_34K 0x0022
++#define E1000_PBA_64K 0x0040    /* 64KB */
++
++/* SW Semaphore Register */
++#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
++#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
++
++/* Interrupt Cause Read */
++#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
++#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
++#define E1000_ICR_RXSEQ         0x00000008 /* rx sequence error */
++#define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
++#define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
++#define E1000_ICR_VMMB          0x00000100 /* VM MB event */
++#define E1000_ICR_TS            0x00080000 /* Time Sync Interrupt */
++#define E1000_ICR_DRSTA         0x40000000 /* Device Reset Asserted */
++/* If this bit asserted, the driver should claim the interrupt */
++#define E1000_ICR_INT_ASSERTED  0x80000000
++/* LAN connected device generates an interrupt */
++#define E1000_ICR_DOUTSYNC      0x10000000 /* NIC DMA out of sync */
++
++/* Extended Interrupt Cause Read */
++#define E1000_EICR_RX_QUEUE0    0x00000001 /* Rx Queue 0 Interrupt */
++#define E1000_EICR_RX_QUEUE1    0x00000002 /* Rx Queue 1 Interrupt */
++#define E1000_EICR_RX_QUEUE2    0x00000004 /* Rx Queue 2 Interrupt */
++#define E1000_EICR_RX_QUEUE3    0x00000008 /* Rx Queue 3 Interrupt */
++#define E1000_EICR_TX_QUEUE0    0x00000100 /* Tx Queue 0 Interrupt */
++#define E1000_EICR_TX_QUEUE1    0x00000200 /* Tx Queue 1 Interrupt */
++#define E1000_EICR_TX_QUEUE2    0x00000400 /* Tx Queue 2 Interrupt */
++#define E1000_EICR_TX_QUEUE3    0x00000800 /* Tx Queue 3 Interrupt */
++#define E1000_EICR_OTHER        0x80000000 /* Interrupt Cause Active */
++/* TCP Timer */
++
++/* This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXT0   = Receiver Timer Interrupt (ring 0)
++ *   o TXDW   = Transmit Descriptor Written Back
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ *   o LSC    = Link Status Change
++ */
++#define IMS_ENABLE_MASK ( \
++	E1000_IMS_RXT0   |    \
++	E1000_IMS_TXDW   |    \
++	E1000_IMS_RXDMT0 |    \
++	E1000_IMS_RXSEQ  |    \
++	E1000_IMS_LSC    |    \
++	E1000_IMS_DOUTSYNC)
++
++/* Interrupt Mask Set */
++#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_IMS_VMMB      E1000_ICR_VMMB      /* Mail box activity */
++#define E1000_IMS_TS        E1000_ICR_TS        /* Time Sync Interrupt */
++#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Asserted */
++#define E1000_IMS_DOUTSYNC  E1000_ICR_DOUTSYNC /* NIC DMA out of sync */
++
++/* Extended Interrupt Mask Set */
++#define E1000_EIMS_OTHER        E1000_EICR_OTHER   /* Interrupt Cause Active */
++
++/* Interrupt Cause Set */
++#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_ICS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Aserted */
++
++/* Extended Interrupt Cause Set */
++/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
++#define E1000_EITR_CNT_IGNR     0x80000000 /* Don't reset counters on write */
++
++
++/* Transmit Descriptor Control */
++/* Enable the counting of descriptors still to be processed. */
++
++/* Flow Control Constants */
++#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
++#define FLOW_CONTROL_TYPE         0x8808
++
++/* Transmit Config Word */
++#define E1000_TXCW_ASM_DIR	0x00000100 /* TXCW astm pause direction */
++#define E1000_TXCW_PAUSE	0x00000080 /* TXCW sym pause request */
++
++/* 802.1q VLAN Packet Size */
++#define VLAN_TAG_SIZE              4    /* 802.3ac tag (not DMA'd) */
++#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
++
++/* Receive Address */
++/* Number of high/low register pairs in the RAR. The RAR (Receive Address
++ * Registers) holds the directed and multicast addresses that we monitor.
++ * Technically, we have 16 spots.  However, we reserve one of these spots
++ * (RAR[15]) for our directed address used by controllers with
++ * manageability enabled, allowing us room for 15 multicast addresses.
++ */
++#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
++#define E1000_RAL_MAC_ADDR_LEN 4
++#define E1000_RAH_MAC_ADDR_LEN 2
++#define E1000_RAH_POOL_MASK 0x03FC0000
++#define E1000_RAH_POOL_1 0x00040000
++
++/* Error Codes */
++#define E1000_ERR_NVM      1
++#define E1000_ERR_PHY      2
++#define E1000_ERR_CONFIG   3
++#define E1000_ERR_PARAM    4
++#define E1000_ERR_MAC_INIT 5
++#define E1000_ERR_RESET   9
++#define E1000_ERR_MASTER_REQUESTS_PENDING 10
++#define E1000_BLK_PHY_RESET   12
++#define E1000_ERR_SWFW_SYNC 13
++#define E1000_NOT_IMPLEMENTED 14
++#define E1000_ERR_MBX      15
++#define E1000_ERR_INVALID_ARGUMENT  16
++#define E1000_ERR_NO_SPACE          17
++#define E1000_ERR_NVM_PBA_SECTION   18
++#define E1000_ERR_INVM_VALUE_NOT_FOUND	19
++#define E1000_ERR_I2C               20
++
++/* Loop limit on how long we wait for auto-negotiation to complete */
++#define COPPER_LINK_UP_LIMIT              10
++#define PHY_AUTO_NEG_LIMIT                45
++#define PHY_FORCE_LIMIT                   20
++/* Number of 100 microseconds we wait for PCI Express master disable */
++#define MASTER_DISABLE_TIMEOUT      800
++/* Number of milliseconds we wait for PHY configuration done after MAC reset */
++#define PHY_CFG_TIMEOUT             100
++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
++/* Number of milliseconds for NVM auto read done after MAC reset. */
++#define AUTO_READ_DONE_TIMEOUT      10
++
++/* Flow Control */
++#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
++
++#define E1000_TSYNCTXCTL_VALID    0x00000001 /* tx timestamp valid */
++#define E1000_TSYNCTXCTL_ENABLED  0x00000010 /* enable tx timestampping */
++
++#define E1000_TSYNCRXCTL_VALID      0x00000001 /* rx timestamp valid */
++#define E1000_TSYNCRXCTL_TYPE_MASK  0x0000000E /* rx type mask */
++#define E1000_TSYNCRXCTL_TYPE_L2_V2       0x00
++#define E1000_TSYNCRXCTL_TYPE_L4_V1       0x02
++#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2    0x04
++#define E1000_TSYNCRXCTL_TYPE_ALL         0x08
++#define E1000_TSYNCRXCTL_TYPE_EVENT_V2    0x0A
++#define E1000_TSYNCRXCTL_ENABLED    0x00000010 /* enable rx timestampping */
++
++#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK   0x000000FF
++#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE       0x00
++#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE  0x01
++#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE   0x02
++#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03
++#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04
++
++#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK               0x00000F00
++#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE                 0x0000
++#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE            0x0100
++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE       0x0200
++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE      0x0300
++#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE             0x0800
++#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE           0x0900
++#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE  0x0A00
++#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE             0x0B00
++#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE           0x0C00
++#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE           0x0D00
++
++#define E1000_TIMINCA_16NS_SHIFT 24
++
++/* Time Sync Interrupt Cause/Mask Register Bits */
++
++#define TSINTR_SYS_WRAP  (1 << 0) /* SYSTIM Wrap around. */
++#define TSINTR_TXTS      (1 << 1) /* Transmit Timestamp. */
++#define TSINTR_RXTS      (1 << 2) /* Receive Timestamp. */
++#define TSINTR_TT0       (1 << 3) /* Target Time 0 Trigger. */
++#define TSINTR_TT1       (1 << 4) /* Target Time 1 Trigger. */
++#define TSINTR_AUTT0     (1 << 5) /* Auxiliary Timestamp 0 Taken. */
++#define TSINTR_AUTT1     (1 << 6) /* Auxiliary Timestamp 1 Taken. */
++#define TSINTR_TADJ      (1 << 7) /* Time Adjust Done. */
++
++#define TSYNC_INTERRUPTS TSINTR_TXTS
++#define E1000_TSICR_TXTS TSINTR_TXTS
++
++/* TSAUXC Configuration Bits */
++#define TSAUXC_EN_TT0    (1 << 0)  /* Enable target time 0. */
++#define TSAUXC_EN_TT1    (1 << 1)  /* Enable target time 1. */
++#define TSAUXC_EN_CLK0   (1 << 2)  /* Enable Configurable Frequency Clock 0. */
++#define TSAUXC_SAMP_AUT0 (1 << 3)  /* Latch SYSTIML/H into AUXSTMPL/0. */
++#define TSAUXC_ST0       (1 << 4)  /* Start Clock 0 Toggle on Target Time 0. */
++#define TSAUXC_EN_CLK1   (1 << 5)  /* Enable Configurable Frequency Clock 1. */
++#define TSAUXC_SAMP_AUT1 (1 << 6)  /* Latch SYSTIML/H into AUXSTMPL/1. */
++#define TSAUXC_ST1       (1 << 7)  /* Start Clock 1 Toggle on Target Time 1. */
++#define TSAUXC_EN_TS0    (1 << 8)  /* Enable hardware timestamp 0. */
++#define TSAUXC_AUTT0     (1 << 9)  /* Auxiliary Timestamp Taken. */
++#define TSAUXC_EN_TS1    (1 << 10) /* Enable hardware timestamp 0. */
++#define TSAUXC_AUTT1     (1 << 11) /* Auxiliary Timestamp Taken. */
++#define TSAUXC_PLSG      (1 << 17) /* Generate a pulse. */
++#define TSAUXC_DISABLE   (1 << 31) /* Disable SYSTIM Count Operation. */
++
++/* SDP Configuration Bits */
++#define AUX0_SEL_SDP0    (0 << 0)  /* Assign SDP0 to auxiliary time stamp 0. */
++#define AUX0_SEL_SDP1    (1 << 0)  /* Assign SDP1 to auxiliary time stamp 0. */
++#define AUX0_SEL_SDP2    (2 << 0)  /* Assign SDP2 to auxiliary time stamp 0. */
++#define AUX0_SEL_SDP3    (3 << 0)  /* Assign SDP3 to auxiliary time stamp 0. */
++#define AUX0_TS_SDP_EN   (1 << 2)  /* Enable auxiliary time stamp trigger 0. */
++#define AUX1_SEL_SDP0    (0 << 3)  /* Assign SDP0 to auxiliary time stamp 1. */
++#define AUX1_SEL_SDP1    (1 << 3)  /* Assign SDP1 to auxiliary time stamp 1. */
++#define AUX1_SEL_SDP2    (2 << 3)  /* Assign SDP2 to auxiliary time stamp 1. */
++#define AUX1_SEL_SDP3    (3 << 3)  /* Assign SDP3 to auxiliary time stamp 1. */
++#define AUX1_TS_SDP_EN   (1 << 5)  /* Enable auxiliary time stamp trigger 1. */
++#define TS_SDP0_SEL_TT0  (0 << 6)  /* Target time 0 is output on SDP0. */
++#define TS_SDP0_SEL_TT1  (1 << 6)  /* Target time 1 is output on SDP0. */
++#define TS_SDP0_SEL_FC0  (2 << 6)  /* Freq clock  0 is output on SDP0. */
++#define TS_SDP0_SEL_FC1  (3 << 6)  /* Freq clock  1 is output on SDP0. */
++#define TS_SDP0_EN       (1 << 8)  /* SDP0 is assigned to Tsync. */
++#define TS_SDP1_SEL_TT0  (0 << 9)  /* Target time 0 is output on SDP1. */
++#define TS_SDP1_SEL_TT1  (1 << 9)  /* Target time 1 is output on SDP1. */
++#define TS_SDP1_SEL_FC0  (2 << 9)  /* Freq clock  0 is output on SDP1. */
++#define TS_SDP1_SEL_FC1  (3 << 9)  /* Freq clock  1 is output on SDP1. */
++#define TS_SDP1_EN       (1 << 11) /* SDP1 is assigned to Tsync. */
++#define TS_SDP2_SEL_TT0  (0 << 12) /* Target time 0 is output on SDP2. */
++#define TS_SDP2_SEL_TT1  (1 << 12) /* Target time 1 is output on SDP2. */
++#define TS_SDP2_SEL_FC0  (2 << 12) /* Freq clock  0 is output on SDP2. */
++#define TS_SDP2_SEL_FC1  (3 << 12) /* Freq clock  1 is output on SDP2. */
++#define TS_SDP2_EN       (1 << 14) /* SDP2 is assigned to Tsync. */
++#define TS_SDP3_SEL_TT0  (0 << 15) /* Target time 0 is output on SDP3. */
++#define TS_SDP3_SEL_TT1  (1 << 15) /* Target time 1 is output on SDP3. */
++#define TS_SDP3_SEL_FC0  (2 << 15) /* Freq clock  0 is output on SDP3. */
++#define TS_SDP3_SEL_FC1  (3 << 15) /* Freq clock  1 is output on SDP3. */
++#define TS_SDP3_EN       (1 << 17) /* SDP3 is assigned to Tsync. */
++
++#define E1000_MDICNFG_EXT_MDIO    0x80000000      /* MDI ext/int destination */
++#define E1000_MDICNFG_COM_MDIO    0x40000000      /* MDI shared w/ lan 0 */
++#define E1000_MDICNFG_PHY_MASK    0x03E00000
++#define E1000_MDICNFG_PHY_SHIFT   21
++
++#define E1000_MEDIA_PORT_COPPER			1
++#define E1000_MEDIA_PORT_OTHER			2
++#define E1000_M88E1112_AUTO_COPPER_SGMII	0x2
++#define E1000_M88E1112_AUTO_COPPER_BASEX	0x3
++#define E1000_M88E1112_STATUS_LINK		0x0004 /* Interface Link Bit */
++#define E1000_M88E1112_MAC_CTRL_1		0x10
++#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK	0x0380 /* Mode Select */
++#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT	7
++#define E1000_M88E1112_PAGE_ADDR		0x16
++#define E1000_M88E1112_STATUS			0x01
++
++/* PCI Express Control */
++#define E1000_GCR_CMPL_TMOUT_MASK       0x0000F000
++#define E1000_GCR_CMPL_TMOUT_10ms       0x00001000
++#define E1000_GCR_CMPL_TMOUT_RESEND     0x00010000
++#define E1000_GCR_CAP_VER2              0x00040000
++
++/* mPHY Address Control and Data Registers */
++#define E1000_MPHY_ADDR_CTL          0x0024 /* mPHY Address Control Register */
++#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000
++#define E1000_MPHY_DATA                 0x0E10 /* mPHY Data Register */
++
++/* mPHY PCS CLK Register */
++#define E1000_MPHY_PCS_CLK_REG_OFFSET  0x0004 /* mPHY PCS CLK AFE CSR Offset */
++/* mPHY Near End Digital Loopback Override Bit */
++#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10
++
++#define E1000_PCS_LCTL_FORCE_FCTRL	0x80
++#define E1000_PCS_LSTS_AN_COMPLETE	0x10000
++
++/* PHY Control Register */
++#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
++#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
++#define MII_CR_POWER_DOWN       0x0800  /* Power down */
++#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
++#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
++#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
++#define MII_CR_SPEED_1000       0x0040
++#define MII_CR_SPEED_100        0x2000
++#define MII_CR_SPEED_10         0x0000
++
++/* PHY Status Register */
++#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
++#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
++
++/* Autoneg Advertisement Register */
++#define NWAY_AR_10T_HD_CAPS      0x0020   /* 10T   Half Duplex Capable */
++#define NWAY_AR_10T_FD_CAPS      0x0040   /* 10T   Full Duplex Capable */
++#define NWAY_AR_100TX_HD_CAPS    0x0080   /* 100TX Half Duplex Capable */
++#define NWAY_AR_100TX_FD_CAPS    0x0100   /* 100TX Full Duplex Capable */
++#define NWAY_AR_PAUSE            0x0400   /* Pause operation desired */
++#define NWAY_AR_ASM_DIR          0x0800   /* Asymmetric Pause Direction bit */
++
++/* Link Partner Ability Register (Base Page) */
++#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
++#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
++
++/* Autoneg Expansion Register */
++
++/* 1000BASE-T Control Register */
++#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
++#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
++#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
++					/* 0=Configure PHY as Slave */
++#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
++					/* 0=Automatic Master/Slave config */
++
++/* 1000BASE-T Status Register */
++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
++#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
++
++
++/* PHY 1000 MII Register/Bit Definitions */
++/* PHY Registers defined by IEEE */
++#define PHY_CONTROL      0x00 /* Control Register */
++#define PHY_STATUS       0x01 /* Status Register */
++#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
++#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
++#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
++#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
++#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
++
++/* NVM Control */
++#define E1000_EECD_SK        0x00000001 /* NVM Clock */
++#define E1000_EECD_CS        0x00000002 /* NVM Chip Select */
++#define E1000_EECD_DI        0x00000004 /* NVM Data In */
++#define E1000_EECD_DO        0x00000008 /* NVM Data Out */
++#define E1000_EECD_REQ       0x00000040 /* NVM Access Request */
++#define E1000_EECD_GNT       0x00000080 /* NVM Access Grant */
++#define E1000_EECD_PRES      0x00000100 /* NVM Present */
++/* NVM Addressing bits based on type 0=small, 1=large */
++#define E1000_EECD_ADDR_BITS 0x00000400
++#define E1000_NVM_GRANT_ATTEMPTS   1000 /* NVM # attempts to gain grant */
++#define E1000_EECD_AUTO_RD          0x00000200  /* NVM Auto Read done */
++#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* NVM Size */
++#define E1000_EECD_SIZE_EX_SHIFT     11
++#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
++#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done*/
++#define E1000_EECD_FLASH_DETECTED_I210	0x00080000 /* FLASH detected */
++#define E1000_FLUDONE_ATTEMPTS		20000
++#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
++#define E1000_I210_FIFO_SEL_RX		0x00
++#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
++#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
++#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
++#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
++#define E1000_I210_FLASH_SECTOR_SIZE	0x1000 /* 4KB FLASH sector unit size */
++/* Secure FLASH mode requires removing MSb */
++#define E1000_I210_FW_PTR_MASK		0x7FFF
++/* Firmware code revision field word offset*/
++#define E1000_I210_FW_VER_OFFSET	328
++#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
++#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done*/
++#define E1000_FLUDONE_ATTEMPTS		20000
++#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
++#define E1000_I210_FIFO_SEL_RX		0x00
++#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
++#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
++#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
++#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
++
++
++/* Offset to data in NVM read/write registers */
++#define E1000_NVM_RW_REG_DATA   16
++#define E1000_NVM_RW_REG_DONE   2    /* Offset to READ/WRITE done bit */
++#define E1000_NVM_RW_REG_START  1    /* Start operation */
++#define E1000_NVM_RW_ADDR_SHIFT 2    /* Shift to the address bits */
++#define E1000_NVM_POLL_READ     0    /* Flag for polling for read complete */
++
++/* NVM Word Offsets */
++#define NVM_COMPAT                 0x0003
++#define NVM_ID_LED_SETTINGS        0x0004 /* SERDES output amplitude */
++#define NVM_VERSION                0x0005
++#define NVM_INIT_CONTROL2_REG      0x000F
++#define NVM_INIT_CONTROL3_PORT_B   0x0014
++#define NVM_INIT_CONTROL3_PORT_A   0x0024
++#define NVM_ALT_MAC_ADDR_PTR       0x0037
++#define NVM_CHECKSUM_REG           0x003F
++#define NVM_COMPATIBILITY_REG_3    0x0003
++#define NVM_COMPATIBILITY_BIT_MASK 0x8000
++#define NVM_MAC_ADDR               0x0000
++#define NVM_SUB_DEV_ID             0x000B
++#define NVM_SUB_VEN_ID             0x000C
++#define NVM_DEV_ID                 0x000D
++#define NVM_VEN_ID                 0x000E
++#define NVM_INIT_CTRL_2            0x000F
++#define NVM_INIT_CTRL_4            0x0013
++#define NVM_LED_1_CFG              0x001C
++#define NVM_LED_0_2_CFG            0x001F
++#define NVM_ETRACK_WORD            0x0042
++#define NVM_ETRACK_HIWORD          0x0043
++#define NVM_COMB_VER_OFF           0x0083
++#define NVM_COMB_VER_PTR           0x003d
++
++/* NVM version defines */
++#define NVM_MAJOR_MASK			0xF000
++#define NVM_MINOR_MASK			0x0FF0
++#define NVM_IMAGE_ID_MASK		0x000F
++#define NVM_COMB_VER_MASK		0x00FF
++#define NVM_MAJOR_SHIFT			12
++#define NVM_MINOR_SHIFT			4
++#define NVM_COMB_VER_SHFT		8
++#define NVM_VER_INVALID			0xFFFF
++#define NVM_ETRACK_SHIFT		16
++#define NVM_ETRACK_VALID		0x8000
++#define NVM_NEW_DEC_MASK		0x0F00
++#define NVM_HEX_CONV			16
++#define NVM_HEX_TENS			10
++
++#define NVM_ETS_CFG			0x003E
++#define NVM_ETS_LTHRES_DELTA_MASK	0x07C0
++#define NVM_ETS_LTHRES_DELTA_SHIFT	6
++#define NVM_ETS_TYPE_MASK		0x0038
++#define NVM_ETS_TYPE_SHIFT		3
++#define NVM_ETS_TYPE_EMC		0x000
++#define NVM_ETS_NUM_SENSORS_MASK	0x0007
++#define NVM_ETS_DATA_LOC_MASK		0x3C00
++#define NVM_ETS_DATA_LOC_SHIFT		10
++#define NVM_ETS_DATA_INDEX_MASK		0x0300
++#define NVM_ETS_DATA_INDEX_SHIFT	8
++#define NVM_ETS_DATA_HTHRESH_MASK	0x00FF
++
++#define E1000_NVM_CFG_DONE_PORT_0  0x040000 /* MNG config cycle done */
++#define E1000_NVM_CFG_DONE_PORT_1  0x080000 /* ...for second port */
++#define E1000_NVM_CFG_DONE_PORT_2  0x100000 /* ...for third port */
++#define E1000_NVM_CFG_DONE_PORT_3  0x200000 /* ...for fourth port */
++
++#define NVM_82580_LAN_FUNC_OFFSET(a) (a ? (0x40 + (0x40 * a)) : 0)
++
++/* Mask bits for fields in Word 0x24 of the NVM */
++#define NVM_WORD24_COM_MDIO         0x0008 /* MDIO interface shared */
++#define NVM_WORD24_EXT_MDIO         0x0004 /* MDIO accesses routed external */
++
++/* Mask bits for fields in Word 0x0f of the NVM */
++#define NVM_WORD0F_PAUSE_MASK       0x3000
++#define NVM_WORD0F_ASM_DIR          0x2000
++
++/* Mask bits for fields in Word 0x1a of the NVM */
++
++/* length of string needed to store part num */
++#define E1000_PBANUM_LENGTH         11
++
++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
++#define NVM_SUM                    0xBABA
++
++#define NVM_PBA_OFFSET_0           8
++#define NVM_PBA_OFFSET_1           9
++#define NVM_RESERVED_WORD		0xFFFF
++#define NVM_PBA_PTR_GUARD          0xFAFA
++#define NVM_WORD_SIZE_BASE_SHIFT   6
++
++/* NVM Commands - Microwire */
++
++/* NVM Commands - SPI */
++#define NVM_MAX_RETRY_SPI          5000 /* Max wait of 5ms, for RDY signal */
++#define NVM_WRITE_OPCODE_SPI       0x02 /* NVM write opcode */
++#define NVM_READ_OPCODE_SPI        0x03 /* NVM read opcode */
++#define NVM_A8_OPCODE_SPI          0x08 /* opcode bit-3 = address bit-8 */
++#define NVM_WREN_OPCODE_SPI        0x06 /* NVM set Write Enable latch */
++#define NVM_RDSR_OPCODE_SPI        0x05 /* NVM read Status register */
++
++/* SPI NVM Status Register */
++#define NVM_STATUS_RDY_SPI         0x01
++
++/* Word definitions for ID LED Settings */
++#define ID_LED_RESERVED_0000 0x0000
++#define ID_LED_RESERVED_FFFF 0xFFFF
++#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2  << 12) | \
++			      (ID_LED_OFF1_OFF2 <<  8) | \
++			      (ID_LED_DEF1_DEF2 <<  4) | \
++			      (ID_LED_DEF1_DEF2))
++#define ID_LED_DEF1_DEF2     0x1
++#define ID_LED_DEF1_ON2      0x2
++#define ID_LED_DEF1_OFF2     0x3
++#define ID_LED_ON1_DEF2      0x4
++#define ID_LED_ON1_ON2       0x5
++#define ID_LED_ON1_OFF2      0x6
++#define ID_LED_OFF1_DEF2     0x7
++#define ID_LED_OFF1_ON2      0x8
++#define ID_LED_OFF1_OFF2     0x9
++
++#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
++#define IGP_ACTIVITY_LED_ENABLE 0x0300
++#define IGP_LED3_MODE           0x07000000
++
++/* PCI/PCI-X/PCI-EX Config space */
++#define PCIE_DEVICE_CONTROL2         0x28
++#define PCIE_DEVICE_CONTROL2_16ms    0x0005
++
++#define PHY_REVISION_MASK      0xFFFFFFF0
++#define MAX_PHY_REG_ADDRESS    0x1F  /* 5 bit address bus (0-0x1F) */
++#define MAX_PHY_MULTI_PAGE_REG 0xF
++
++/* Bit definitions for valid PHY IDs. */
++/* I = Integrated
++ * E = External
++ */
++#define M88E1111_I_PHY_ID    0x01410CC0
++#define M88E1112_E_PHY_ID    0x01410C90
++#define I347AT4_E_PHY_ID     0x01410DC0
++#define IGP03E1000_E_PHY_ID  0x02A80390
++#define I82580_I_PHY_ID      0x015403A0
++#define I350_I_PHY_ID        0x015403B0
++#define M88_VENDOR           0x0141
++#define I210_I_PHY_ID        0x01410C00
++#define M88E1543_E_PHY_ID    0x01410EA0
++
++/* M88E1000 Specific Registers */
++#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
++#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
++
++#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
++#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
++
++/* M88E1000 PHY Specific Control Register */
++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
++/* 1=CLK125 low, 0=CLK125 toggling */
++#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
++					       /* Manual MDI configuration */
++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
++#define M88E1000_PSCR_AUTO_X_1000T     0x0040
++/* Auto crossover enabled all speeds */
++#define M88E1000_PSCR_AUTO_X_MODE      0x0060
++/* 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold
++ * 0=Normal 10BASE-T Rx Threshold
++ */
++/* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */
++#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800 /* 1=Assert CRS on Transmit */
++
++/* M88E1000 PHY Specific Status Register */
++#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
++#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
++#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
++/* 0 = <50M
++ * 1 = 50-80M
++ * 2 = 80-110M
++ * 3 = 110-140M
++ * 4 = >140M
++ */
++#define M88E1000_PSSR_CABLE_LENGTH       0x0380
++#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
++#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
++
++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
++
++/* M88E1000 Extended PHY Specific Control Register */
++/* 1 = Lost lock detect enabled.
++ * Will assert lost lock and bring
++ * link down if idle not seen
++ * within 1ms in 1000BASE-T
++ */
++/* Number of times we will attempt to autonegotiate before downshifting if we
++ * are the master
++ */
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
++/* Number of times we will attempt to autonegotiate before downshifting if we
++ * are the slave
++ */
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
++#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
++
++/* Intel i347-AT4 Registers */
++
++#define I347AT4_PCDL                   0x10 /* PHY Cable Diagnostics Length */
++#define I347AT4_PCDC                   0x15 /* PHY Cable Diagnostics Control */
++#define I347AT4_PAGE_SELECT            0x16
++
++/* i347-AT4 Extended PHY Specific Control Register */
++
++/*  Number of times we will attempt to autonegotiate before downshifting if we
++ *  are the master
++ */
++#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800
++#define I347AT4_PSCR_DOWNSHIFT_MASK   0x7000
++#define I347AT4_PSCR_DOWNSHIFT_1X     0x0000
++#define I347AT4_PSCR_DOWNSHIFT_2X     0x1000
++#define I347AT4_PSCR_DOWNSHIFT_3X     0x2000
++#define I347AT4_PSCR_DOWNSHIFT_4X     0x3000
++#define I347AT4_PSCR_DOWNSHIFT_5X     0x4000
++#define I347AT4_PSCR_DOWNSHIFT_6X     0x5000
++#define I347AT4_PSCR_DOWNSHIFT_7X     0x6000
++#define I347AT4_PSCR_DOWNSHIFT_8X     0x7000
++
++/* i347-AT4 PHY Cable Diagnostics Control */
++#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */
++
++/* Marvell 1112 only registers */
++#define M88E1112_VCT_DSP_DISTANCE       0x001A
++
++/* M88EC018 Rev 2 specific DownShift settings */
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
++
++/* MDI Control */
++#define E1000_MDIC_DATA_MASK 0x0000FFFF
++#define E1000_MDIC_REG_MASK  0x001F0000
++#define E1000_MDIC_REG_SHIFT 16
++#define E1000_MDIC_PHY_MASK  0x03E00000
++#define E1000_MDIC_PHY_SHIFT 21
++#define E1000_MDIC_OP_WRITE  0x04000000
++#define E1000_MDIC_OP_READ   0x08000000
++#define E1000_MDIC_READY     0x10000000
++#define E1000_MDIC_INT_EN    0x20000000
++#define E1000_MDIC_ERROR     0x40000000
++#define E1000_MDIC_DEST      0x80000000
++
++/* Thermal Sensor */
++#define E1000_THSTAT_PWR_DOWN       0x00000001 /* Power Down Event */
++#define E1000_THSTAT_LINK_THROTTLE  0x00000002 /* Link Speed Throttle Event */
++
++/* Energy Efficient Ethernet */
++#define E1000_IPCNFG_EEE_1G_AN       0x00000008  /* EEE Enable 1G AN */
++#define E1000_IPCNFG_EEE_100M_AN     0x00000004  /* EEE Enable 100M AN */
++#define E1000_EEER_TX_LPI_EN         0x00010000  /* EEE Tx LPI Enable */
++#define E1000_EEER_RX_LPI_EN         0x00020000  /* EEE Rx LPI Enable */
++#define E1000_EEER_FRC_AN            0x10000000  /* Enable EEE in loopback */
++#define E1000_EEER_LPI_FC            0x00040000  /* EEE Enable on FC */
++#define E1000_EEE_SU_LPI_CLK_STP     0X00800000  /* EEE LPI Clock Stop */
++#define E1000_EEER_EEE_NEG           0x20000000  /* EEE capability nego */
++#define E1000_EEE_LP_ADV_ADDR_I350   0x040F      /* EEE LP Advertisement */
++#define E1000_EEE_LP_ADV_DEV_I210    7           /* EEE LP Adv Device */
++#define E1000_EEE_LP_ADV_ADDR_I210   61          /* EEE LP Adv Register */
++#define E1000_MMDAC_FUNC_DATA        0x4000      /* Data, no post increment */
++#define E1000_M88E1543_PAGE_ADDR	0x16       /* Page Offset Register */
++#define E1000_M88E1543_EEE_CTRL_1	0x0
++#define E1000_M88E1543_EEE_CTRL_1_MS	0x0001     /* EEE Master/Slave */
++#define E1000_EEE_ADV_DEV_I354		7
++#define E1000_EEE_ADV_ADDR_I354		60
++#define E1000_EEE_ADV_100_SUPPORTED	(1 << 1)   /* 100BaseTx EEE Supported */
++#define E1000_EEE_ADV_1000_SUPPORTED	(1 << 2)   /* 1000BaseT EEE Supported */
++#define E1000_PCS_STATUS_DEV_I354	3
++#define E1000_PCS_STATUS_ADDR_I354	1
++#define E1000_PCS_STATUS_TX_LPI_IND	0x0200     /* Tx in LPI state */
++#define E1000_PCS_STATUS_RX_LPI_RCVD	0x0400
++#define E1000_PCS_STATUS_TX_LPI_RCVD	0x0800
++
++/* SerDes Control */
++#define E1000_GEN_CTL_READY             0x80000000
++#define E1000_GEN_CTL_ADDRESS_SHIFT     8
++#define E1000_GEN_POLL_TIMEOUT          640
++
++#define E1000_VFTA_ENTRY_SHIFT               5
++#define E1000_VFTA_ENTRY_MASK                0x7F
++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK      0x1F
++
++/* DMA Coalescing register fields */
++#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power on DMA coal */
++
++/* Tx Rate-Scheduler Config fields */
++#define E1000_RTTBCNRC_RS_ENA		0x80000000
++#define E1000_RTTBCNRC_RF_DEC_MASK	0x00003FFF
++#define E1000_RTTBCNRC_RF_INT_SHIFT	14
++#define E1000_RTTBCNRC_RF_INT_MASK	\
++	(E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
++
++#endif
+--- linux/drivers/xenomai/net/drivers/igb/igb_main.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/igb/igb_main.c	2021-04-07 16:01:27.448633850 +0800
+@@ -0,0 +1,5676 @@
++/* Intel(R) Gigabit Ethernet Linux driver
++ * Copyright(c) 2007-2015 Intel Corporation.
++ * RTnet port   2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ * Copyright(c) 2015 Gilles Chanteperdrix <gch@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, see <http://www.gnu.org/licenses/>.
++ *
++ * The full GNU General Public License is included in this distribution in
++ * the file called "COPYING".
++ *
++ * Contact Information:
++ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/bitops.h>
++#include <linux/vmalloc.h>
++#include <linux/pagemap.h>
++#include <linux/netdevice.h>
++#include <linux/ipv6.h>
++#include <linux/slab.h>
++#include <net/checksum.h>
++#include <net/ip6_checksum.h>
++#include <linux/net_tstamp.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/if.h>
++#include <linux/if_vlan.h>
++#include <linux/pci.h>
++#include <linux/pci-aspm.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/sctp.h>
++#include <linux/if_ether.h>
++#include <linux/aer.h>
++#include <linux/prefetch.h>
++#include <linux/pm_runtime.h>
++#include <linux/i2c.h>
++#include "igb.h"
++
++#include <rtnet_port.h>
++
++// RTNET redefines
++#ifdef  NETIF_F_TSO
++#undef  NETIF_F_TSO
++#define NETIF_F_TSO 0
++#endif
++
++#ifdef  NETIF_F_TSO6
++#undef  NETIF_F_TSO6
++#define NETIF_F_TSO6 0
++#endif
++
++#ifdef  NETIF_F_HW_VLAN_TX
++#undef  NETIF_F_HW_VLAN_TX
++#define NETIF_F_HW_VLAN_TX 0
++#endif
++
++#ifdef  NETIF_F_HW_VLAN_RX
++#undef  NETIF_F_HW_VLAN_RX
++#define NETIF_F_HW_VLAN_RX 0
++#endif
++
++#ifdef  NETIF_F_HW_VLAN_FILTER
++#undef  NETIF_F_HW_VLAN_FILTER
++#define NETIF_F_HW_VLAN_FILTER 0
++#endif
++
++#ifdef  IGB_MAX_TX_QUEUES
++#undef  IGB_MAX_TX_QUEUES
++#define IGB_MAX_TX_QUEUES 1
++#endif
++
++#ifdef  IGB_MAX_RX_QUEUES
++#undef  IGB_MAX_RX_QUEUES
++#define IGB_MAX_RX_QUEUES 1
++#endif
++
++#ifdef CONFIG_IGB_NAPI
++#undef CONFIG_IGB_NAPI
++#endif
++
++#ifdef IGB_HAVE_TX_TIMEOUT
++#undef IGB_HAVE_TX_TIMEOUT
++#endif
++
++#ifdef ETHTOOL_GPERMADDR
++#undef ETHTOOL_GPERMADDR
++#endif
++
++#ifdef CONFIG_PM
++#undef CONFIG_PM
++#endif
++
++#ifdef CONFIG_NET_POLL_CONTROLLER
++#undef CONFIG_NET_POLL_CONTROLLER
++#endif
++
++#ifdef MAX_SKB_FRAGS
++#undef MAX_SKB_FRAGS
++#define MAX_SKB_FRAGS 1
++#endif
++
++#ifdef IGB_FRAMES_SUPPORT
++#undef IGB_FRAMES_SUPPORT
++#endif
++
++#define MAJ 5
++#define MIN 2
++#define BUILD 18
++#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
++__stringify(BUILD) "-k"
++char igb_driver_name[] = "rt_igb";
++char igb_driver_version[] = DRV_VERSION;
++static const char igb_driver_string[] =
++				"Intel(R) Gigabit Ethernet Network Driver";
++static const char igb_copyright[] =
++				"Copyright (c) 2007-2014 Intel Corporation.";
++
++static const struct e1000_info *igb_info_tbl[] = {
++	[board_82575] = &e1000_82575_info,
++};
++
++#define MAX_UNITS 8
++static int InterruptThrottle = 0;
++module_param(InterruptThrottle, uint, 0);
++MODULE_PARM_DESC(InterruptThrottle, "Throttle interrupts (boolean, false by default)");
++
++static const struct pci_device_id igb_pci_tbl[] = {
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
++	/* required last entry */
++	{0, }
++};
++
++MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
++
++static int igb_setup_all_tx_resources(struct igb_adapter *);
++static int igb_setup_all_rx_resources(struct igb_adapter *);
++static void igb_free_all_tx_resources(struct igb_adapter *);
++static void igb_free_all_rx_resources(struct igb_adapter *);
++static void igb_setup_mrqc(struct igb_adapter *);
++static int igb_probe(struct pci_dev *, const struct pci_device_id *);
++static void igb_remove(struct pci_dev *pdev);
++static int igb_sw_init(struct igb_adapter *);
++static int igb_open(struct rtnet_device *);
++static int igb_close(struct rtnet_device *);
++static void igb_configure(struct igb_adapter *);
++static void igb_configure_tx(struct igb_adapter *);
++static void igb_configure_rx(struct igb_adapter *);
++static void igb_clean_all_tx_rings(struct igb_adapter *);
++static void igb_clean_all_rx_rings(struct igb_adapter *);
++static void igb_clean_tx_ring(struct igb_ring *);
++static void igb_clean_rx_ring(struct igb_ring *);
++static void igb_set_rx_mode(struct rtnet_device *);
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++static void igb_update_phy_info(struct timer_list *);
++static void igb_watchdog(struct timer_list *);
++#else
++static void igb_update_phy_info(unsigned long);
++static void igb_watchdog(unsigned long);
++#endif
++static void igb_watchdog_task(struct work_struct *);
++static netdev_tx_t igb_xmit_frame(struct rtskb *skb, struct rtnet_device *);
++static struct net_device_stats *igb_get_stats(struct rtnet_device *);
++static int igb_intr(rtdm_irq_t *irq_handle);
++static int igb_intr_msi(rtdm_irq_t *irq_handle);
++static void igb_nrtsig_watchdog(rtdm_nrtsig_t *sig, void *data);
++static irqreturn_t igb_msix_other(int irq, void *);
++static int igb_msix_ring(rtdm_irq_t *irq_handle);
++static void igb_poll(struct igb_q_vector *);
++static bool igb_clean_tx_irq(struct igb_q_vector *);
++static bool igb_clean_rx_irq(struct igb_q_vector *, int);
++static int igb_ioctl(struct rtnet_device *, struct ifreq *ifr, int cmd);
++static void igb_reset_task(struct work_struct *);
++static void igb_vlan_mode(struct rtnet_device *netdev,
++			  netdev_features_t features);
++static int igb_vlan_rx_add_vid(struct rtnet_device *, __be16, u16);
++static void igb_restore_vlan(struct igb_adapter *);
++static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
++
++#ifdef CONFIG_PM
++#ifdef CONFIG_PM_SLEEP
++static int igb_suspend(struct device *);
++#endif
++static int igb_resume(struct device *);
++static int igb_runtime_suspend(struct device *dev);
++static int igb_runtime_resume(struct device *dev);
++static int igb_runtime_idle(struct device *dev);
++static const struct dev_pm_ops igb_pm_ops = {
++	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
++	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
++			igb_runtime_idle)
++};
++#endif
++static void igb_shutdown(struct pci_dev *);
++static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
++#ifdef CONFIG_NET_POLL_CONTROLLER
++/* for netdump / net console */
++static void igb_netpoll(struct rtnet_device *);
++#endif
++
++static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
++		     pci_channel_state_t);
++static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
++static void igb_io_resume(struct pci_dev *);
++
++static const struct pci_error_handlers igb_err_handler = {
++	.error_detected = igb_io_error_detected,
++	.slot_reset = igb_io_slot_reset,
++	.resume = igb_io_resume,
++};
++
++static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
++
++static struct pci_driver igb_driver = {
++	.name     = igb_driver_name,
++	.id_table = igb_pci_tbl,
++	.probe    = igb_probe,
++	.remove   = igb_remove,
++#ifdef CONFIG_PM
++	.driver.pm = &igb_pm_ops,
++#endif
++	.shutdown = igb_shutdown,
++	.sriov_configure = igb_pci_sriov_configure,
++	.err_handler = &igb_err_handler
++};
++
++MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
++MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
++
++#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
++static int local_debug = -1;
++module_param_named(debug, local_debug, int, 0);
++MODULE_PARM_DESC(debug, "debug level (0=none,...,16=all)");
++
++struct igb_reg_info {
++	u32 ofs;
++	char *name;
++};
++
++static const struct igb_reg_info igb_reg_info_tbl[] = {
++
++	/* General Registers */
++	{E1000_CTRL, "CTRL"},
++	{E1000_STATUS, "STATUS"},
++	{E1000_CTRL_EXT, "CTRL_EXT"},
++
++	/* Interrupt Registers */
++	{E1000_ICR, "ICR"},
++
++	/* RX Registers */
++	{E1000_RCTL, "RCTL"},
++	{E1000_RDLEN(0), "RDLEN"},
++	{E1000_RDH(0), "RDH"},
++	{E1000_RDT(0), "RDT"},
++	{E1000_RXDCTL(0), "RXDCTL"},
++	{E1000_RDBAL(0), "RDBAL"},
++	{E1000_RDBAH(0), "RDBAH"},
++
++	/* TX Registers */
++	{E1000_TCTL, "TCTL"},
++	{E1000_TDBAL(0), "TDBAL"},
++	{E1000_TDBAH(0), "TDBAH"},
++	{E1000_TDLEN(0), "TDLEN"},
++	{E1000_TDH(0), "TDH"},
++	{E1000_TDT(0), "TDT"},
++	{E1000_TXDCTL(0), "TXDCTL"},
++	{E1000_TDFH, "TDFH"},
++	{E1000_TDFT, "TDFT"},
++	{E1000_TDFHS, "TDFHS"},
++	{E1000_TDFPC, "TDFPC"},
++
++	/* List Terminator */
++	{}
++};
++
++/* igb_regdump - register printout routine */
++static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
++{
++	int n = 0;
++	char rname[16];
++	u32 regs[8];
++
++	switch (reginfo->ofs) {
++	case E1000_RDLEN(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDLEN(n));
++		break;
++	case E1000_RDH(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDH(n));
++		break;
++	case E1000_RDT(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDT(n));
++		break;
++	case E1000_RXDCTL(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RXDCTL(n));
++		break;
++	case E1000_RDBAL(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDBAL(n));
++		break;
++	case E1000_RDBAH(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDBAH(n));
++		break;
++	case E1000_TDBAL(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_RDBAL(n));
++		break;
++	case E1000_TDBAH(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_TDBAH(n));
++		break;
++	case E1000_TDLEN(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_TDLEN(n));
++		break;
++	case E1000_TDH(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_TDH(n));
++		break;
++	case E1000_TDT(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_TDT(n));
++		break;
++	case E1000_TXDCTL(0):
++		for (n = 0; n < 4; n++)
++			regs[n] = rd32(E1000_TXDCTL(n));
++		break;
++	default:
++		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
++		return;
++	}
++
++	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
++	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
++		regs[2], regs[3]);
++}
++
++/* igb_dump - Print registers, Tx-rings and Rx-rings */
++static void igb_dump(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	struct igb_reg_info *reginfo;
++	struct igb_ring *tx_ring;
++	union e1000_adv_tx_desc *tx_desc;
++	struct my_u0 { u64 a; u64 b; } *u0;
++	struct igb_ring *rx_ring;
++	union e1000_adv_rx_desc *rx_desc;
++	u32 staterr;
++	u16 i, n;
++
++	/* Print netdevice Info */
++	if (netdev) {
++		dev_info(&adapter->pdev->dev, "Net device Info\n");
++		pr_info("Device Name\n");
++		pr_info("%s\n", netdev->name);
++	}
++
++	/* Print Registers */
++	dev_info(&adapter->pdev->dev, "Register Dump\n");
++	pr_info(" Register Name   Value\n");
++	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
++	     reginfo->name; reginfo++) {
++		igb_regdump(hw, reginfo);
++	}
++
++	/* Print TX Ring Summary */
++	if (!netdev || !rtnetif_running(netdev))
++		goto exit;
++
++	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
++	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
++	for (n = 0; n < adapter->num_tx_queues; n++) {
++		struct igb_tx_buffer *buffer_info;
++		tx_ring = adapter->tx_ring[n];
++		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
++		pr_info(" %5d %5X %5X %p %016llX\n",
++			n, tx_ring->next_to_use, tx_ring->next_to_clean,
++			buffer_info->next_to_watch,
++			(u64)buffer_info->time_stamp);
++	}
++
++	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
++
++	/* Transmit Descriptor Formats
++	 *
++	 * Advanced Transmit Descriptor
++	 *   +--------------------------------------------------------------+
++	 * 0 |         Buffer Address [63:0]                                |
++	 *   +--------------------------------------------------------------+
++	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
++	 *   +--------------------------------------------------------------+
++	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
++	 */
++
++	for (n = 0; n < adapter->num_tx_queues; n++) {
++		tx_ring = adapter->tx_ring[n];
++		pr_info("------------------------------------\n");
++		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
++		pr_info("------------------------------------\n");
++		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
++			"[bi->dma       ] leng  ntw timestamp        "
++			"bi->skb\n");
++
++		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
++			const char *next_desc;
++			struct igb_tx_buffer *buffer_info;
++			tx_desc = IGB_TX_DESC(tx_ring, i);
++			buffer_info = &tx_ring->tx_buffer_info[i];
++			u0 = (struct my_u0 *)tx_desc;
++			if (i == tx_ring->next_to_use &&
++			    i == tx_ring->next_to_clean)
++				next_desc = " NTC/U";
++			else if (i == tx_ring->next_to_use)
++				next_desc = " NTU";
++			else if (i == tx_ring->next_to_clean)
++				next_desc = " NTC";
++			else
++				next_desc = "";
++
++			pr_info("T [0x%03X]    %016llX %016llX"
++				"  %p %016llX %p%s\n", i,
++				le64_to_cpu(u0->a),
++				le64_to_cpu(u0->b),
++				buffer_info->next_to_watch,
++				(u64)buffer_info->time_stamp,
++				buffer_info->skb, next_desc);
++
++			if (buffer_info->skb)
++				print_hex_dump(KERN_INFO, "",
++					DUMP_PREFIX_ADDRESS,
++					16, 1, buffer_info->skb->data,
++					14,
++					true);
++		}
++	}
++
++	/* Print RX Rings Summary */
++	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
++	pr_info("Queue [NTU] [NTC]\n");
++	for (n = 0; n < adapter->num_rx_queues; n++) {
++		rx_ring = adapter->rx_ring[n];
++		pr_info(" %5d %5X %5X\n",
++			n, rx_ring->next_to_use, rx_ring->next_to_clean);
++	}
++
++	/* Print RX Rings */
++	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
++
++	/* Advanced Receive Descriptor (Read) Format
++	 *    63                                           1        0
++	 *    +-----------------------------------------------------+
++	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
++	 *    +----------------------------------------------+------+
++	 *  8 |       Header Buffer Address [63:1]           |  DD  |
++	 *    +-----------------------------------------------------+
++	 *
++	 *
++	 * Advanced Receive Descriptor (Write-Back) Format
++	 *
++	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
++	 *   +------------------------------------------------------+
++	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
++	 *   | Checksum   Ident  |   |           |    | Type | Type |
++	 *   +------------------------------------------------------+
++	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
++	 *   +------------------------------------------------------+
++	 *   63       48 47    32 31            20 19               0
++	 */
++
++	for (n = 0; n < adapter->num_rx_queues; n++) {
++		rx_ring = adapter->rx_ring[n];
++		pr_info("------------------------------------\n");
++		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
++		pr_info("------------------------------------\n");
++		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
++			"[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
++		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
++			"----------- [bi->skb] <-- Adv Rx Write-Back format\n");
++
++		for (i = 0; i < rx_ring->count; i++) {
++			const char *next_desc;
++			struct igb_rx_buffer *buffer_info;
++			buffer_info = &rx_ring->rx_buffer_info[i];
++			rx_desc = IGB_RX_DESC(rx_ring, i);
++			u0 = (struct my_u0 *)rx_desc;
++			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
++
++			if (i == rx_ring->next_to_use)
++				next_desc = " NTU";
++			else if (i == rx_ring->next_to_clean)
++				next_desc = " NTC";
++			else
++				next_desc = "";
++
++			if (staterr & E1000_RXD_STAT_DD) {
++				/* Descriptor Done */
++				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
++					"RWB", i,
++					le64_to_cpu(u0->a),
++					le64_to_cpu(u0->b),
++					next_desc);
++			} else {
++				pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
++					"R  ", i,
++					le64_to_cpu(u0->a),
++					le64_to_cpu(u0->b),
++					(u64)buffer_info->dma,
++					next_desc);
++
++			}
++		}
++	}
++
++exit:
++	return;
++}
++
++/**
++ *  igb_get_hw_dev - return device
++ *  @hw: pointer to hardware structure
++ *
++ *  used by hardware layer to print debugging information
++ **/
++struct rtnet_device *igb_get_hw_dev(struct e1000_hw *hw)
++{
++	struct igb_adapter *adapter = hw->back;
++	return adapter->netdev;
++}
++
++/**
++ *  igb_init_module - Driver Registration Routine
++ *
++ *  igb_init_module is the first routine called when the driver is
++ *  loaded. All it does is register with the PCI subsystem.
++ **/
++static int __init igb_init_module(void)
++{
++	int ret;
++
++	pr_info("%s - version %s\n",
++	       igb_driver_string, igb_driver_version);
++	pr_info("%s\n", igb_copyright);
++
++	ret = pci_register_driver(&igb_driver);
++	return ret;
++}
++
++module_init(igb_init_module);
++
++/**
++ *  igb_exit_module - Driver Exit Cleanup Routine
++ *
++ *  igb_exit_module is called just before the driver is removed
++ *  from memory.
++ **/
++static void __exit igb_exit_module(void)
++{
++	pci_unregister_driver(&igb_driver);
++}
++
++module_exit(igb_exit_module);
++
++#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
++/**
++ *  igb_cache_ring_register - Descriptor ring to register mapping
++ *  @adapter: board private structure to initialize
++ *
++ *  Once we know the feature-set enabled for the device, we'll cache
++ *  the register offset the descriptor ring is assigned to.
++ **/
++static void igb_cache_ring_register(struct igb_adapter *adapter)
++{
++	int i = 0, j = 0;
++	u32 rbase_offset = 0;
++
++	switch (adapter->hw.mac.type) {
++	case e1000_82576:
++		/* The queues are allocated for virtualization such that VF 0
++		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
++		 * In order to avoid collision we start at the first free queue
++		 * and continue consuming queues in the same sequence
++		 */
++		/* Fall through */
++	case e1000_82575:
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_i210:
++	case e1000_i211:
++		/* Fall through */
++	default:
++		for (; i < adapter->num_rx_queues; i++)
++			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
++		for (; j < adapter->num_tx_queues; j++)
++			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
++		break;
++	}
++}
++
++u32 igb_rd32(struct e1000_hw *hw, u32 reg)
++{
++	struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
++	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
++	u32 value = 0;
++
++	if (E1000_REMOVED(hw_addr))
++		return ~value;
++
++	value = readl(&hw_addr[reg]);
++
++	/* reads should not return all F's */
++	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
++		struct rtnet_device *netdev = igb->netdev;
++		hw->hw_addr = NULL;
++		rtnetif_device_detach(netdev);
++		rtdev_err(netdev, "PCIe link lost, device now detached\n");
++	}
++
++	return value;
++}
++
++/**
++ *  igb_write_ivar - configure ivar for given MSI-X vector
++ *  @hw: pointer to the HW structure
++ *  @msix_vector: vector number we are allocating to a given ring
++ *  @index: row index of IVAR register to write within IVAR table
++ *  @offset: column offset of in IVAR, should be multiple of 8
++ *
++ *  This function is intended to handle the writing of the IVAR register
++ *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
++ *  each containing an cause allocation for an Rx and Tx ring, and a
++ *  variable number of rows depending on the number of queues supported.
++ **/
++static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
++			   int index, int offset)
++{
++	u32 ivar = array_rd32(E1000_IVAR0, index);
++
++	/* clear any bits that are currently set */
++	ivar &= ~((u32)0xFF << offset);
++
++	/* write vector and valid bit */
++	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
++
++	array_wr32(E1000_IVAR0, index, ivar);
++}
++
++#define IGB_N0_QUEUE -1
++static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
++{
++	struct igb_adapter *adapter = q_vector->adapter;
++	struct e1000_hw *hw = &adapter->hw;
++	int rx_queue = IGB_N0_QUEUE;
++	int tx_queue = IGB_N0_QUEUE;
++	u32 msixbm = 0;
++
++	if (q_vector->rx.ring)
++		rx_queue = q_vector->rx.ring->reg_idx;
++	if (q_vector->tx.ring)
++		tx_queue = q_vector->tx.ring->reg_idx;
++
++	switch (hw->mac.type) {
++	case e1000_82575:
++		/* The 82575 assigns vectors using a bitmask, which matches the
++		 * bitmask for the EICR/EIMS/EIMC registers.  To assign one
++		 * or more queues to a vector, we write the appropriate bits
++		 * into the MSIXBM register for that vector.
++		 */
++		if (rx_queue > IGB_N0_QUEUE)
++			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
++		if (tx_queue > IGB_N0_QUEUE)
++			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
++		if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0)
++			msixbm |= E1000_EIMS_OTHER;
++		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
++		q_vector->eims_value = msixbm;
++		break;
++	case e1000_82576:
++		/* 82576 uses a table that essentially consists of 2 columns
++		 * with 8 rows.  The ordering is column-major so we use the
++		 * lower 3 bits as the row index, and the 4th bit as the
++		 * column offset.
++		 */
++		if (rx_queue > IGB_N0_QUEUE)
++			igb_write_ivar(hw, msix_vector,
++				       rx_queue & 0x7,
++				       (rx_queue & 0x8) << 1);
++		if (tx_queue > IGB_N0_QUEUE)
++			igb_write_ivar(hw, msix_vector,
++				       tx_queue & 0x7,
++				       ((tx_queue & 0x8) << 1) + 8);
++		q_vector->eims_value = 1 << msix_vector;
++		break;
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_i210:
++	case e1000_i211:
++		/* On 82580 and newer adapters the scheme is similar to 82576
++		 * however instead of ordering column-major we have things
++		 * ordered row-major.  So we traverse the table by using
++		 * bit 0 as the column offset, and the remaining bits as the
++		 * row index.
++		 */
++		if (rx_queue > IGB_N0_QUEUE)
++			igb_write_ivar(hw, msix_vector,
++				       rx_queue >> 1,
++				       (rx_queue & 0x1) << 4);
++		if (tx_queue > IGB_N0_QUEUE)
++			igb_write_ivar(hw, msix_vector,
++				       tx_queue >> 1,
++				       ((tx_queue & 0x1) << 4) + 8);
++		q_vector->eims_value = 1 << msix_vector;
++		break;
++	default:
++		BUG();
++		break;
++	}
++
++	/* add q_vector eims value to global eims_enable_mask */
++	adapter->eims_enable_mask |= q_vector->eims_value;
++
++	/* configure q_vector to set itr on first interrupt */
++	q_vector->set_itr = 1;
++}
++
++/**
++ *  igb_configure_msix - Configure MSI-X hardware
++ *  @adapter: board private structure to initialize
++ *
++ *  igb_configure_msix sets up the hardware to properly
++ *  generate MSI-X interrupts.
++ **/
++static void igb_configure_msix(struct igb_adapter *adapter)
++{
++	u32 tmp;
++	int i, vector = 0;
++	struct e1000_hw *hw = &adapter->hw;
++
++	adapter->eims_enable_mask = 0;
++
++	/* set vector for other causes, i.e. link changes */
++	switch (hw->mac.type) {
++	case e1000_82575:
++		tmp = rd32(E1000_CTRL_EXT);
++		/* enable MSI-X PBA support*/
++		tmp |= E1000_CTRL_EXT_PBA_CLR;
++
++		/* Auto-Mask interrupts upon ICR read. */
++		tmp |= E1000_CTRL_EXT_EIAME;
++		tmp |= E1000_CTRL_EXT_IRCA;
++
++		wr32(E1000_CTRL_EXT, tmp);
++
++		/* enable msix_other interrupt */
++		array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER);
++		adapter->eims_other = E1000_EIMS_OTHER;
++
++		break;
++
++	case e1000_82576:
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_i210:
++	case e1000_i211:
++		/* Turn on MSI-X capability first, or our settings
++		 * won't stick.  And it will take days to debug.
++		 */
++		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
++		     E1000_GPIE_PBA | E1000_GPIE_EIAME |
++		     E1000_GPIE_NSICR);
++
++		/* enable msix_other interrupt */
++		adapter->eims_other = 1 << vector;
++		tmp = (vector++ | E1000_IVAR_VALID) << 8;
++
++		wr32(E1000_IVAR_MISC, tmp);
++		break;
++	default:
++		/* do nothing, since nothing else supports MSI-X */
++		break;
++	} /* switch (hw->mac.type) */
++
++	adapter->eims_enable_mask |= adapter->eims_other;
++
++	for (i = 0; i < adapter->num_q_vectors; i++)
++		igb_assign_vector(adapter->q_vector[i], vector++);
++
++	wrfl();
++}
++
++/**
++ *  igb_request_msix - Initialize MSI-X interrupts
++ *  @adapter: board private structure to initialize
++ *
++ *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
++ *  kernel.
++ **/
++static int igb_request_msix(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	int i, err = 0, vector = 0, free_vector = 0;
++
++	err = request_irq(adapter->msix_entries[vector].vector,
++			  igb_msix_other, 0, netdev->name, adapter);
++	if (err)
++		goto err_out;
++
++	for (i = 0; i < adapter->num_q_vectors; i++) {
++		struct igb_q_vector *q_vector = adapter->q_vector[i];
++
++		vector++;
++
++		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
++
++		if (q_vector->rx.ring && q_vector->tx.ring)
++			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
++				q_vector->rx.ring->queue_index);
++		else if (q_vector->tx.ring)
++			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
++				q_vector->tx.ring->queue_index);
++		else if (q_vector->rx.ring)
++			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
++				q_vector->rx.ring->queue_index);
++		else
++			sprintf(q_vector->name, "%s-unused", netdev->name);
++
++		err = rtdm_irq_request(&adapter->msix_irq_handle[vector],
++				adapter->msix_entries[vector].vector,
++				igb_msix_ring, 0, q_vector->name, q_vector);
++		if (err)
++			goto err_free;
++	}
++
++	igb_configure_msix(adapter);
++	return 0;
++
++err_free:
++	/* free already assigned IRQs */
++	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
++
++	vector--;
++	for (i = 0; i < vector; i++)
++		rtdm_irq_free(&adapter->msix_irq_handle[free_vector++]);
++err_out:
++	return err;
++}
++
++/**
++ *  igb_free_q_vector - Free memory allocated for specific interrupt vector
++ *  @adapter: board private structure to initialize
++ *  @v_idx: Index of vector to be freed
++ *
++ *  This function frees the memory allocated to the q_vector.
++ **/
++static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
++{
++	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
++
++	adapter->q_vector[v_idx] = NULL;
++
++	/* igb_get_stats64() might access the rings on this vector,
++	 * we must wait a grace period before freeing it.
++	 */
++	if (q_vector)
++		kfree_rcu(q_vector, rcu);
++}
++
++/**
++ *  igb_reset_q_vector - Reset config for interrupt vector
++ *  @adapter: board private structure to initialize
++ *  @v_idx: Index of vector to be reset
++ *
++ *  If NAPI is enabled it will delete any references to the
++ *  NAPI struct. This is preparation for igb_free_q_vector.
++ **/
++static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
++{
++	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
++
++	/* Coming from igb_set_interrupt_capability, the vectors are not yet
++	 * allocated. So, q_vector is NULL so we should stop here.
++	 */
++	if (!q_vector)
++		return;
++
++	if (q_vector->tx.ring)
++		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
++
++	if (q_vector->rx.ring)
++		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
++}
++
++static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
++{
++	int v_idx = adapter->num_q_vectors;
++
++	if (adapter->flags & IGB_FLAG_HAS_MSIX)
++		pci_disable_msix(adapter->pdev);
++	else if (adapter->flags & IGB_FLAG_HAS_MSI)
++		pci_disable_msi(adapter->pdev);
++
++	while (v_idx--)
++		igb_reset_q_vector(adapter, v_idx);
++}
++
++/**
++ *  igb_free_q_vectors - Free memory allocated for interrupt vectors
++ *  @adapter: board private structure to initialize
++ *
++ *  This function frees the memory allocated to the q_vectors.  In addition if
++ *  NAPI is enabled it will delete any references to the NAPI struct prior
++ *  to freeing the q_vector.
++ **/
++static void igb_free_q_vectors(struct igb_adapter *adapter)
++{
++	int v_idx = adapter->num_q_vectors;
++
++	adapter->num_tx_queues = 0;
++	adapter->num_rx_queues = 0;
++	adapter->num_q_vectors = 0;
++
++	while (v_idx--) {
++		igb_reset_q_vector(adapter, v_idx);
++		igb_free_q_vector(adapter, v_idx);
++	}
++}
++
++/**
++ *  igb_clear_interrupt_scheme - reset the device to a state of no interrupts
++ *  @adapter: board private structure to initialize
++ *
++ *  This function resets the device so that it has 0 Rx queues, Tx queues, and
++ *  MSI-X interrupts allocated.
++ */
++static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
++{
++	igb_free_q_vectors(adapter);
++	igb_reset_interrupt_capability(adapter);
++}
++
++/**
++ *  igb_set_interrupt_capability - set MSI or MSI-X if supported
++ *  @adapter: board private structure to initialize
++ *  @msix: boolean value of MSIX capability
++ *
++ *  Attempt to configure interrupts using the best available
++ *  capabilities of the hardware and kernel.
++ **/
++static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
++{
++	int err;
++	int numvecs, i;
++
++	if (!msix)
++		goto msi_only;
++	adapter->flags |= IGB_FLAG_HAS_MSIX;
++
++	/* Number of supported queues. */
++	adapter->num_rx_queues = adapter->rss_queues;
++	adapter->num_tx_queues = adapter->rss_queues;
++
++	/* start with one vector for every Rx queue */
++	numvecs = adapter->num_rx_queues;
++
++	/* if Tx handler is separate add 1 for every Tx queue */
++	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
++		numvecs += adapter->num_tx_queues;
++
++	/* store the number of vectors reserved for queues */
++	adapter->num_q_vectors = numvecs;
++
++	/* add 1 vector for link status interrupts */
++	numvecs++;
++	for (i = 0; i < numvecs; i++)
++		adapter->msix_entries[i].entry = i;
++
++	err = pci_enable_msix_range(adapter->pdev,
++				    adapter->msix_entries,
++				    numvecs,
++				    numvecs);
++	if (err > 0)
++		return;
++
++	igb_reset_interrupt_capability(adapter);
++
++	/* If we can't do MSI-X, try MSI */
++msi_only:
++	adapter->flags &= ~IGB_FLAG_HAS_MSIX;
++	adapter->rss_queues = 1;
++	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
++	adapter->num_rx_queues = 1;
++	adapter->num_tx_queues = 1;
++	adapter->num_q_vectors = 1;
++	if (!pci_enable_msi(adapter->pdev))
++		adapter->flags |= IGB_FLAG_HAS_MSI;
++}
++
++static void igb_add_ring(struct igb_ring *ring,
++			 struct igb_ring_container *head)
++{
++	head->ring = ring;
++	head->count++;
++}
++
++/**
++ *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
++ *  @adapter: board private structure to initialize
++ *  @v_count: q_vectors allocated on adapter, used for ring interleaving
++ *  @v_idx: index of vector in adapter struct
++ *  @txr_count: total number of Tx rings to allocate
++ *  @txr_idx: index of first Tx ring to allocate
++ *  @rxr_count: total number of Rx rings to allocate
++ *  @rxr_idx: index of first Rx ring to allocate
++ *
++ *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
++ **/
++static int igb_alloc_q_vector(struct igb_adapter *adapter,
++			      int v_count, int v_idx,
++			      int txr_count, int txr_idx,
++			      int rxr_count, int rxr_idx)
++{
++	struct igb_q_vector *q_vector;
++	struct igb_ring *ring;
++	int ring_count, size;
++
++	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
++	if (txr_count > 1 || rxr_count > 1)
++		return -ENOMEM;
++
++	ring_count = txr_count + rxr_count;
++	size = sizeof(struct igb_q_vector) +
++	       (sizeof(struct igb_ring) * ring_count);
++
++	/* allocate q_vector and rings */
++	q_vector = adapter->q_vector[v_idx];
++	if (!q_vector)
++		q_vector = kzalloc(size, GFP_KERNEL);
++	else
++		memset(q_vector, 0, size);
++	if (!q_vector)
++		return -ENOMEM;
++
++	/* tie q_vector and adapter together */
++	adapter->q_vector[v_idx] = q_vector;
++	q_vector->adapter = adapter;
++
++	/* initialize work limits */
++	q_vector->tx.work_limit = adapter->tx_work_limit;
++
++	/* initialize ITR configuration */
++	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
++	q_vector->itr_val = IGB_START_ITR;
++
++	/* initialize pointer to rings */
++	ring = q_vector->ring;
++
++	/* intialize ITR */
++	if (rxr_count) {
++		/* rx or rx/tx vector */
++		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
++			q_vector->itr_val = adapter->rx_itr_setting;
++	} else {
++		/* tx only vector */
++		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
++			q_vector->itr_val = adapter->tx_itr_setting;
++	}
++
++	if (txr_count) {
++		/* assign generic ring traits */
++		ring->dev = &adapter->pdev->dev;
++		ring->netdev = adapter->netdev;
++
++		/* configure backlink on ring */
++		ring->q_vector = q_vector;
++
++		/* update q_vector Tx values */
++		igb_add_ring(ring, &q_vector->tx);
++
++		/* For 82575, context index must be unique per ring. */
++		if (adapter->hw.mac.type == e1000_82575)
++			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
++
++		/* apply Tx specific ring traits */
++		ring->count = adapter->tx_ring_count;
++		ring->queue_index = txr_idx;
++
++		/* assign ring to adapter */
++		adapter->tx_ring[txr_idx] = ring;
++
++		/* push pointer to next ring */
++		ring++;
++	}
++
++	if (rxr_count) {
++		/* assign generic ring traits */
++		ring->dev = &adapter->pdev->dev;
++		ring->netdev = adapter->netdev;
++
++		/* configure backlink on ring */
++		ring->q_vector = q_vector;
++
++		/* update q_vector Rx values */
++		igb_add_ring(ring, &q_vector->rx);
++
++		/* set flag indicating ring supports SCTP checksum offload */
++		if (adapter->hw.mac.type >= e1000_82576)
++			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
++
++		/* On i350, i354, i210, and i211, loopback VLAN packets
++		 * have the tag byte-swapped.
++		 */
++		if (adapter->hw.mac.type >= e1000_i350)
++			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
++
++		/* apply Rx specific ring traits */
++		ring->count = adapter->rx_ring_count;
++		ring->queue_index = rxr_idx;
++
++		/* assign ring to adapter */
++		adapter->rx_ring[rxr_idx] = ring;
++	}
++
++	return 0;
++}
++
++
++/**
++ *  igb_alloc_q_vectors - Allocate memory for interrupt vectors
++ *  @adapter: board private structure to initialize
++ *
++ *  We allocate one q_vector per queue interrupt.  If allocation fails we
++ *  return -ENOMEM.
++ **/
++static int igb_alloc_q_vectors(struct igb_adapter *adapter)
++{
++	int q_vectors = adapter->num_q_vectors;
++	int rxr_remaining = adapter->num_rx_queues;
++	int txr_remaining = adapter->num_tx_queues;
++	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
++	int err;
++
++	if (q_vectors >= (rxr_remaining + txr_remaining)) {
++		for (; rxr_remaining; v_idx++) {
++			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
++						 0, 0, 1, rxr_idx);
++
++			if (err)
++				goto err_out;
++
++			/* update counts and index */
++			rxr_remaining--;
++			rxr_idx++;
++		}
++	}
++
++	for (; v_idx < q_vectors; v_idx++) {
++		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
++		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
++
++		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
++					 tqpv, txr_idx, rqpv, rxr_idx);
++
++		if (err)
++			goto err_out;
++
++		/* update counts and index */
++		rxr_remaining -= rqpv;
++		txr_remaining -= tqpv;
++		rxr_idx++;
++		txr_idx++;
++	}
++
++	return 0;
++
++err_out:
++	adapter->num_tx_queues = 0;
++	adapter->num_rx_queues = 0;
++	adapter->num_q_vectors = 0;
++
++	while (v_idx--)
++		igb_free_q_vector(adapter, v_idx);
++
++	return -ENOMEM;
++}
++
++/**
++ *  igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
++ *  @adapter: board private structure to initialize
++ *  @msix: boolean value of MSIX capability
++ *
++ *  This function initializes the interrupts and allocates all of the queues.
++ **/
++static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int err;
++
++	igb_set_interrupt_capability(adapter, msix);
++
++	err = igb_alloc_q_vectors(adapter);
++	if (err) {
++		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
++		goto err_alloc_q_vectors;
++	}
++
++	igb_cache_ring_register(adapter);
++
++	return 0;
++
++err_alloc_q_vectors:
++	igb_reset_interrupt_capability(adapter);
++	return err;
++}
++
++/**
++ *  igb_request_irq - initialize interrupts
++ *  @adapter: board private structure to initialize
++ *
++ *  Attempts to configure interrupts using the best available
++ *  capabilities of the hardware and kernel.
++ **/
++static int igb_request_irq(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	int err = 0;
++
++	rt_stack_connect(netdev, &STACK_manager);
++
++	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
++		err = igb_request_msix(adapter);
++		if (!err)
++			goto request_done;
++		/* fall back to MSI */
++		igb_free_all_tx_resources(adapter);
++		igb_free_all_rx_resources(adapter);
++
++		igb_clear_interrupt_scheme(adapter);
++		err = igb_init_interrupt_scheme(adapter, false);
++		if (err)
++			goto request_done;
++
++		igb_setup_all_tx_resources(adapter);
++		igb_setup_all_rx_resources(adapter);
++		igb_configure(adapter);
++	}
++
++	igb_assign_vector(adapter->q_vector[0], 0);
++
++	if (adapter->flags & IGB_FLAG_HAS_MSI) {
++		err = rtdm_irq_request(&adapter->irq_handle,
++				pdev->irq, igb_intr_msi, 0,
++				netdev->name, adapter);
++		if (!err)
++			goto request_done;
++
++		/* fall back to legacy interrupts */
++		igb_reset_interrupt_capability(adapter);
++		adapter->flags &= ~IGB_FLAG_HAS_MSI;
++	}
++
++	err = rtdm_irq_request(&adapter->irq_handle,
++			pdev->irq, igb_intr, IRQF_SHARED,
++			netdev->name, adapter);
++
++	if (err)
++		dev_err(&pdev->dev, "Error %d getting interrupt\n",
++			err);
++
++request_done:
++	return err;
++}
++
++static void igb_free_irq(struct igb_adapter *adapter)
++{
++	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
++		int vector = 0, i;
++
++		free_irq(adapter->msix_entries[vector++].vector, adapter);
++
++		for (i = 0; i < adapter->num_q_vectors; i++)
++			rtdm_irq_free(&adapter->msix_irq_handle[vector++]);
++	} else {
++		rtdm_irq_free(&adapter->irq_handle);
++	}
++}
++
++/**
++ *  igb_irq_disable - Mask off interrupt generation on the NIC
++ *  @adapter: board private structure
++ **/
++static void igb_irq_disable(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* we need to be careful when disabling interrupts.  The VFs are also
++	 * mapped into these registers and so clearing the bits can cause
++	 * issues on the VF drivers so we only need to clear what we set
++	 */
++	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
++		u32 regval = rd32(E1000_EIAM);
++
++		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
++		wr32(E1000_EIMC, adapter->eims_enable_mask);
++		regval = rd32(E1000_EIAC);
++		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
++	}
++
++	wr32(E1000_IAM, 0);
++	wr32(E1000_IMC, ~0);
++	wrfl();
++
++	msleep(10);
++}
++
++/**
++ *  igb_irq_enable - Enable default interrupt generation settings
++ *  @adapter: board private structure
++ **/
++static void igb_irq_enable(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
++		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
++		u32 regval = rd32(E1000_EIAC);
++
++		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
++		regval = rd32(E1000_EIAM);
++		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
++		wr32(E1000_EIMS, adapter->eims_enable_mask);
++		wr32(E1000_IMS, ims);
++	} else {
++		wr32(E1000_IMS, IMS_ENABLE_MASK |
++				E1000_IMS_DRSTA);
++		wr32(E1000_IAM, IMS_ENABLE_MASK |
++				E1000_IMS_DRSTA);
++	}
++}
++
++static void igb_update_mng_vlan(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u16 vid = adapter->hw.mng_cookie.vlan_id;
++	u16 old_vid = adapter->mng_vlan_id;
++
++	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
++		/* add VID to filter table */
++		igb_vfta_set(hw, vid, true);
++		adapter->mng_vlan_id = vid;
++	} else {
++		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
++	}
++
++	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
++	    (vid != old_vid) &&
++	    !test_bit(old_vid, adapter->active_vlans)) {
++		/* remove VID from filter table */
++		igb_vfta_set(hw, old_vid, false);
++	}
++}
++
++/**
++ *  igb_release_hw_control - release control of the h/w to f/w
++ *  @adapter: address of board private structure
++ *
++ *  igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
++ *  For ASF and Pass Through versions of f/w this means that the
++ *  driver is no longer loaded.
++ **/
++static void igb_release_hw_control(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl_ext;
++
++	/* Let firmware take over control of h/w */
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	wr32(E1000_CTRL_EXT,
++			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
++}
++
++/**
++ *  igb_get_hw_control - get control of the h/w from f/w
++ *  @adapter: address of board private structure
++ *
++ *  igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
++ *  For ASF and Pass Through versions of f/w this means that
++ *  the driver is loaded.
++ **/
++static void igb_get_hw_control(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl_ext;
++
++	/* Let firmware know the driver has taken over */
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	wr32(E1000_CTRL_EXT,
++			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
++}
++
++/**
++ *  igb_configure - configure the hardware for RX and TX
++ *  @adapter: private board structure
++ **/
++static void igb_configure(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	int i;
++
++	igb_get_hw_control(adapter);
++	igb_set_rx_mode(netdev);
++
++	igb_restore_vlan(adapter);
++
++	igb_setup_tctl(adapter);
++	igb_setup_mrqc(adapter);
++	igb_setup_rctl(adapter);
++
++	igb_configure_tx(adapter);
++	igb_configure_rx(adapter);
++
++	igb_rx_fifo_flush_82575(&adapter->hw);
++
++	/* call igb_desc_unused which always leaves
++	 * at least 1 descriptor unused to make sure
++	 * next_to_use != next_to_clean
++	 */
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		struct igb_ring *ring = adapter->rx_ring[i];
++		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
++	}
++}
++
++/**
++ *  igb_power_up_link - Power up the phy/serdes link
++ *  @adapter: address of board private structure
++ **/
++void igb_power_up_link(struct igb_adapter *adapter)
++{
++	igb_reset_phy(&adapter->hw);
++
++	if (adapter->hw.phy.media_type == e1000_media_type_copper)
++		igb_power_up_phy_copper(&adapter->hw);
++	else
++		igb_power_up_serdes_link_82575(&adapter->hw);
++
++	igb_setup_link(&adapter->hw);
++}
++
++/**
++ *  igb_power_down_link - Power down the phy/serdes link
++ *  @adapter: address of board private structure
++ */
++static void igb_power_down_link(struct igb_adapter *adapter)
++{
++	if (adapter->hw.phy.media_type == e1000_media_type_copper)
++		igb_power_down_phy_copper_82575(&adapter->hw);
++	else
++		igb_shutdown_serdes_link_82575(&adapter->hw);
++}
++
++/**
++ * Detect and switch function for Media Auto Sense
++ * @adapter: address of the board private structure
++ **/
++static void igb_check_swap_media(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl_ext, connsw;
++	bool swap_now = false;
++
++	ctrl_ext = rd32(E1000_CTRL_EXT);
++	connsw = rd32(E1000_CONNSW);
++
++	/* need to live swap if current media is copper and we have fiber/serdes
++	 * to go to.
++	 */
++
++	if ((hw->phy.media_type == e1000_media_type_copper) &&
++	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
++		swap_now = true;
++	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
++		/* copper signal takes time to appear */
++		if (adapter->copper_tries < 4) {
++			adapter->copper_tries++;
++			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
++			wr32(E1000_CONNSW, connsw);
++			return;
++		} else {
++			adapter->copper_tries = 0;
++			if ((connsw & E1000_CONNSW_PHYSD) &&
++			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
++				swap_now = true;
++				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
++				wr32(E1000_CONNSW, connsw);
++			}
++		}
++	}
++
++	if (!swap_now)
++		return;
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		rtdev_info(adapter->netdev,
++			"MAS: changing media to fiber/serdes\n");
++		ctrl_ext |=
++			E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
++		adapter->flags |= IGB_FLAG_MEDIA_RESET;
++		adapter->copper_tries = 0;
++		break;
++	case e1000_media_type_internal_serdes:
++	case e1000_media_type_fiber:
++		rtdev_info(adapter->netdev,
++			"MAS: changing media to copper\n");
++		ctrl_ext &=
++			~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
++		adapter->flags |= IGB_FLAG_MEDIA_RESET;
++		break;
++	default:
++		/* shouldn't get here during regular operation */
++		rtdev_err(adapter->netdev,
++			"AMS: Invalid media type found, returning\n");
++		break;
++	}
++	wr32(E1000_CTRL_EXT, ctrl_ext);
++}
++
++/**
++ *  igb_up - Open the interface and prepare it to handle traffic
++ *  @adapter: board private structure
++ **/
++int igb_up(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* hardware has been reset, we need to reload some things */
++	igb_configure(adapter);
++
++	clear_bit(__IGB_DOWN, &adapter->state);
++
++	if (adapter->flags & IGB_FLAG_HAS_MSIX)
++		igb_configure_msix(adapter);
++	else
++		igb_assign_vector(adapter->q_vector[0], 0);
++
++	/* Clear any pending interrupts. */
++	rd32(E1000_ICR);
++	igb_irq_enable(adapter);
++
++	rtnetif_start_queue(adapter->netdev);
++
++	/* start the watchdog. */
++	hw->mac.get_link_status = 1;
++	schedule_work(&adapter->watchdog_task);
++
++	if ((adapter->flags & IGB_FLAG_EEE) &&
++	    (!hw->dev_spec._82575.eee_disable))
++		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
++
++	return 0;
++}
++
++void igb_down(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 tctl, rctl;
++
++	/* signal that we're down so the interrupt handler does not
++	 * reschedule our watchdog timer
++	 */
++	set_bit(__IGB_DOWN, &adapter->state);
++
++	/* disable receives in the hardware */
++	rctl = rd32(E1000_RCTL);
++	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
++	/* flush and sleep below */
++
++	rtnetif_stop_queue(netdev);
++
++	/* disable transmits in the hardware */
++	tctl = rd32(E1000_TCTL);
++	tctl &= ~E1000_TCTL_EN;
++	wr32(E1000_TCTL, tctl);
++	/* flush both disables and wait for them to finish */
++	wrfl();
++	usleep_range(10000, 11000);
++
++	igb_irq_disable(adapter);
++
++	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
++
++	del_timer_sync(&adapter->watchdog_timer);
++	del_timer_sync(&adapter->phy_info_timer);
++
++	/* record the stats before reset*/
++	spin_lock(&adapter->stats64_lock);
++	igb_update_stats(adapter);
++	spin_unlock(&adapter->stats64_lock);
++
++	rtnetif_carrier_off(netdev);
++	adapter->link_speed = 0;
++	adapter->link_duplex = 0;
++
++	if (!pci_channel_offline(adapter->pdev))
++		igb_reset(adapter);
++	igb_clean_all_tx_rings(adapter);
++	igb_clean_all_rx_rings(adapter);
++}
++
++void igb_reinit_locked(struct igb_adapter *adapter)
++{
++	WARN_ON(in_interrupt());
++	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
++		usleep_range(1000, 2000);
++	igb_down(adapter);
++	igb_up(adapter);
++	clear_bit(__IGB_RESETTING, &adapter->state);
++}
++
++/** igb_enable_mas - Media Autosense re-enable after swap
++ *
++ * @adapter: adapter struct
++ **/
++static void igb_enable_mas(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 connsw = rd32(E1000_CONNSW);
++
++	/* configure for SerDes media detect */
++	if ((hw->phy.media_type == e1000_media_type_copper) &&
++	    (!(connsw & E1000_CONNSW_SERDESD))) {
++		connsw |= E1000_CONNSW_ENRGSRC;
++		connsw |= E1000_CONNSW_AUTOSENSE_EN;
++		wr32(E1000_CONNSW, connsw);
++		wrfl();
++	}
++}
++
++void igb_reset(struct igb_adapter *adapter)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_fc_info *fc = &hw->fc;
++	u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
++
++	/* Repartition Pba for greater than 9k mtu
++	 * To take effect CTRL.RST is required.
++	 */
++	switch (mac->type) {
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_82580:
++		pba = rd32(E1000_RXPBS);
++		pba = igb_rxpbs_adjust_82580(pba);
++		break;
++	case e1000_82576:
++		pba = rd32(E1000_RXPBS);
++		pba &= E1000_RXPBS_SIZE_MASK_82576;
++		break;
++	case e1000_82575:
++	case e1000_i210:
++	case e1000_i211:
++	default:
++		pba = E1000_PBA_34K;
++		break;
++	}
++
++	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
++	    (mac->type < e1000_82576)) {
++		/* adjust PBA for jumbo frames */
++		wr32(E1000_PBA, pba);
++
++		/* To maintain wire speed transmits, the Tx FIFO should be
++		 * large enough to accommodate two full transmit packets,
++		 * rounded up to the next 1KB and expressed in KB.  Likewise,
++		 * the Rx FIFO should be large enough to accommodate at least
++		 * one full receive packet and is similarly rounded up and
++		 * expressed in KB.
++		 */
++		pba = rd32(E1000_PBA);
++		/* upper 16 bits has Tx packet buffer allocation size in KB */
++		tx_space = pba >> 16;
++		/* lower 16 bits has Rx packet buffer allocation size in KB */
++		pba &= 0xffff;
++		/* the Tx fifo also stores 16 bytes of information about the Tx
++		 * but don't include ethernet FCS because hardware appends it
++		 */
++		min_tx_space = (adapter->max_frame_size +
++				sizeof(union e1000_adv_tx_desc) -
++				ETH_FCS_LEN) * 2;
++		min_tx_space = ALIGN(min_tx_space, 1024);
++		min_tx_space >>= 10;
++		/* software strips receive CRC, so leave room for it */
++		min_rx_space = adapter->max_frame_size;
++		min_rx_space = ALIGN(min_rx_space, 1024);
++		min_rx_space >>= 10;
++
++		/* If current Tx allocation is less than the min Tx FIFO size,
++		 * and the min Tx FIFO size is less than the current Rx FIFO
++		 * allocation, take space away from current Rx allocation
++		 */
++		if (tx_space < min_tx_space &&
++		    ((min_tx_space - tx_space) < pba)) {
++			pba = pba - (min_tx_space - tx_space);
++
++			/* if short on Rx space, Rx wins and must trump Tx
++			 * adjustment
++			 */
++			if (pba < min_rx_space)
++				pba = min_rx_space;
++		}
++		wr32(E1000_PBA, pba);
++	}
++
++	/* flow control settings */
++	/* The high water mark must be low enough to fit one full frame
++	 * (or the size used for early receive) above it in the Rx FIFO.
++	 * Set it to the lower of:
++	 * - 90% of the Rx FIFO size, or
++	 * - the full Rx FIFO size minus one full frame
++	 */
++	hwm = min(((pba << 10) * 9 / 10),
++			((pba << 10) - 2 * adapter->max_frame_size));
++
++	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
++	fc->low_water = fc->high_water - 16;
++	fc->pause_time = 0xFFFF;
++	fc->send_xon = 1;
++	fc->current_mode = fc->requested_mode;
++
++	/* Allow time for pending master requests to run */
++	hw->mac.ops.reset_hw(hw);
++	wr32(E1000_WUC, 0);
++
++	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
++		/* need to resetup here after media swap */
++		adapter->ei.get_invariants(hw);
++		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
++	}
++	if ((mac->type == e1000_82575) &&
++	    (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
++		igb_enable_mas(adapter);
++	}
++	if (hw->mac.ops.init_hw(hw))
++		dev_err(&pdev->dev, "Hardware Error\n");
++
++	/* Flow control settings reset on hardware reset, so guarantee flow
++	 * control is off when forcing speed.
++	 */
++	if (!hw->mac.autoneg)
++		igb_force_mac_fc(hw);
++
++	igb_init_dmac(adapter, pba);
++#ifdef CONFIG_IGB_HWMON
++	/* Re-initialize the thermal sensor on i350 devices. */
++	if (!test_bit(__IGB_DOWN, &adapter->state)) {
++		if (mac->type == e1000_i350 && hw->bus.func == 0) {
++			/* If present, re-initialize the external thermal sensor
++			 * interface.
++			 */
++			if (adapter->ets)
++				mac->ops.init_thermal_sensor_thresh(hw);
++		}
++	}
++#endif
++	/* Re-establish EEE setting */
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		switch (mac->type) {
++		case e1000_i350:
++		case e1000_i210:
++		case e1000_i211:
++			igb_set_eee_i350(hw, true, true);
++			break;
++		case e1000_i354:
++			igb_set_eee_i354(hw, true, true);
++			break;
++		default:
++			break;
++		}
++	}
++	if (!rtnetif_running(adapter->netdev))
++		igb_power_down_link(adapter);
++
++	igb_update_mng_vlan(adapter);
++
++	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
++	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
++
++	igb_get_phy_info(hw);
++}
++
++
++/**
++ * igb_set_fw_version - Configure version string for ethtool
++ * @adapter: adapter struct
++ **/
++void igb_set_fw_version(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_fw_version fw;
++
++	igb_get_fw_version(hw, &fw);
++
++	switch (hw->mac.type) {
++	case e1000_i210:
++	case e1000_i211:
++		if (!(igb_get_flash_presence_i210(hw))) {
++			snprintf(adapter->fw_version,
++				 sizeof(adapter->fw_version),
++				 "%2d.%2d-%d",
++				 fw.invm_major, fw.invm_minor,
++				 fw.invm_img_type);
++			break;
++		}
++		/* fall through */
++	default:
++		/* if option is rom valid, display its version too */
++		if (fw.or_valid) {
++			snprintf(adapter->fw_version,
++				 sizeof(adapter->fw_version),
++				 "%d.%d, 0x%08x, %d.%d.%d",
++				 fw.eep_major, fw.eep_minor, fw.etrack_id,
++				 fw.or_major, fw.or_build, fw.or_patch);
++		/* no option rom */
++		} else if (fw.etrack_id != 0X0000) {
++			snprintf(adapter->fw_version,
++			    sizeof(adapter->fw_version),
++			    "%d.%d, 0x%08x",
++			    fw.eep_major, fw.eep_minor, fw.etrack_id);
++		} else {
++		snprintf(adapter->fw_version,
++		    sizeof(adapter->fw_version),
++		    "%d.%d.%d",
++		    fw.eep_major, fw.eep_minor, fw.eep_build);
++		}
++		break;
++	}
++}
++
++/**
++ * igb_init_mas - init Media Autosense feature if enabled in the NVM
++ *
++ * @adapter: adapter struct
++ **/
++static void igb_init_mas(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u16 eeprom_data;
++
++	hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data);
++	switch (hw->bus.func) {
++	case E1000_FUNC_0:
++		if (eeprom_data & IGB_MAS_ENABLE_0) {
++			adapter->flags |= IGB_FLAG_MAS_ENABLE;
++			rtdev_info(adapter->netdev,
++				"MAS: Enabling Media Autosense for port %d\n",
++				hw->bus.func);
++		}
++		break;
++	case E1000_FUNC_1:
++		if (eeprom_data & IGB_MAS_ENABLE_1) {
++			adapter->flags |= IGB_FLAG_MAS_ENABLE;
++			rtdev_info(adapter->netdev,
++				"MAS: Enabling Media Autosense for port %d\n",
++				hw->bus.func);
++		}
++		break;
++	case E1000_FUNC_2:
++		if (eeprom_data & IGB_MAS_ENABLE_2) {
++			adapter->flags |= IGB_FLAG_MAS_ENABLE;
++			rtdev_info(adapter->netdev,
++				"MAS: Enabling Media Autosense for port %d\n",
++				hw->bus.func);
++		}
++		break;
++	case E1000_FUNC_3:
++		if (eeprom_data & IGB_MAS_ENABLE_3) {
++			adapter->flags |= IGB_FLAG_MAS_ENABLE;
++			rtdev_info(adapter->netdev,
++				"MAS: Enabling Media Autosense for port %d\n",
++				hw->bus.func);
++		}
++		break;
++	default:
++		/* Shouldn't get here */
++		rtdev_err(adapter->netdev,
++			"MAS: Invalid port configuration, returning\n");
++		break;
++	}
++}
++
++static dma_addr_t igb_map_rtskb(struct rtnet_device *netdev,
++				struct rtskb *skb)
++{
++	struct igb_adapter *adapter = netdev->priv;
++	struct device *dev = &adapter->pdev->dev;
++	dma_addr_t addr;
++
++	addr = dma_map_single(dev, skb->buf_start, RTSKB_SIZE,
++			      DMA_BIDIRECTIONAL);
++	if (dma_mapping_error(dev, addr)) {
++		dev_err(dev, "DMA map failed\n");
++		return RTSKB_UNMAPPED;
++	}
++	return addr;
++}
++
++static void igb_unmap_rtskb(struct rtnet_device *netdev,
++			      struct rtskb *skb)
++{
++	struct igb_adapter *adapter = netdev->priv;
++	struct device *dev = &adapter->pdev->dev;
++
++	dma_unmap_single(dev, skb->buf_dma_addr, RTSKB_SIZE,
++			 DMA_BIDIRECTIONAL);
++}
++
++/**
++ *  igb_probe - Device Initialization Routine
++ *  @pdev: PCI device information struct
++ *  @ent: entry in igb_pci_tbl
++ *
++ *  Returns 0 on success, negative on failure
++ *
++ *  igb_probe initializes an adapter identified by a pci_dev structure.
++ *  The OS initialization, configuring of the adapter private structure,
++ *  and a hardware reset occur.
++ **/
++static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
++{
++	struct rtnet_device *netdev;
++	struct igb_adapter *adapter;
++	struct e1000_hw *hw;
++	u16 eeprom_data = 0;
++	s32 ret_val;
++	static int global_quad_port_a; /* global quad port a indication */
++	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
++	int err, pci_using_dac;
++	u8 part_str[E1000_PBANUM_LENGTH];
++
++	/* Catch broken hardware that put the wrong VF device ID in
++	 * the PCIe SR-IOV capability.
++	 */
++	if (pdev->is_virtfn) {
++		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
++			pci_name(pdev), pdev->vendor, pdev->device);
++		return -EINVAL;
++	}
++
++	err = pci_enable_device_mem(pdev);
++	if (err)
++		return err;
++
++	pci_using_dac = 0;
++	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
++	if (!err) {
++		pci_using_dac = 1;
++	} else {
++		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
++		if (err) {
++			dev_err(&pdev->dev,
++				"No usable DMA configuration, aborting\n");
++			goto err_dma;
++		}
++	}
++
++	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
++					   IORESOURCE_MEM),
++					   igb_driver_name);
++	if (err)
++		goto err_pci_reg;
++
++	pci_enable_pcie_error_reporting(pdev);
++
++	pci_set_master(pdev);
++	pci_save_state(pdev);
++
++	err = -ENOMEM;
++	netdev = rt_alloc_etherdev(sizeof(*adapter),
++				2 * IGB_DEFAULT_RXD + IGB_DEFAULT_TXD);
++	if (!netdev)
++		goto err_alloc_etherdev;
++
++	rtdev_alloc_name(netdev, "rteth%d");
++	rt_rtdev_connect(netdev, &RTDEV_manager);
++
++	netdev->vers = RTDEV_VERS_2_0;
++	netdev->sysbind = &pdev->dev;
++
++	pci_set_drvdata(pdev, netdev);
++	adapter = rtnetdev_priv(netdev);
++	adapter->netdev = netdev;
++	adapter->pdev = pdev;
++	hw = &adapter->hw;
++	hw->back = adapter;
++
++	err = -EIO;
++	hw->hw_addr = pci_iomap(pdev, 0, 0);
++	if (!hw->hw_addr)
++		goto err_ioremap;
++
++	netdev->open = igb_open;
++	netdev->stop = igb_close;
++	netdev->hard_start_xmit = igb_xmit_frame;
++	netdev->get_stats = igb_get_stats;
++	netdev->map_rtskb = igb_map_rtskb;
++	netdev->unmap_rtskb = igb_unmap_rtskb;
++	netdev->do_ioctl = igb_ioctl;
++#if 0
++	netdev->set_multicast_list = igb_set_multi;
++	netdev->set_mac_address = igb_set_mac;
++	netdev->change_mtu = igb_change_mtu;
++
++	// No ethtool support for now
++	igb_set_ethtool_ops(netdev);
++	netdev->watchdog_timeo = 5 * HZ;
++#endif
++
++	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
++
++	netdev->mem_start = pci_resource_start(pdev, 0);
++	netdev->mem_end = pci_resource_end(pdev, 0);
++
++	/* PCI config space info */
++	hw->vendor_id = pdev->vendor;
++	hw->device_id = pdev->device;
++	hw->revision_id = pdev->revision;
++	hw->subsystem_vendor_id = pdev->subsystem_vendor;
++	hw->subsystem_device_id = pdev->subsystem_device;
++
++	/* Copy the default MAC, PHY and NVM function pointers */
++	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
++	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
++	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
++	/* Initialize skew-specific constants */
++	err = ei->get_invariants(hw);
++	if (err)
++		goto err_sw_init;
++
++	/* setup the private structure */
++	err = igb_sw_init(adapter);
++	if (err)
++		goto err_sw_init;
++
++	igb_get_bus_info_pcie(hw);
++
++	hw->phy.autoneg_wait_to_complete = false;
++
++	/* Copper options */
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		hw->phy.mdix = AUTO_ALL_MODES;
++		hw->phy.disable_polarity_correction = false;
++		hw->phy.ms_type = e1000_ms_hw_default;
++	}
++
++	if (igb_check_reset_block(hw))
++		dev_info(&pdev->dev,
++			"PHY reset is blocked due to SOL/IDER session.\n");
++
++	/* features is initialized to 0 in allocation, it might have bits
++	 * set by igb_sw_init so we should use an or instead of an
++	 * assignment.
++	 */
++	netdev->features |= NETIF_F_SG |
++			    NETIF_F_IP_CSUM |
++			    NETIF_F_IPV6_CSUM |
++			    NETIF_F_TSO |
++			    NETIF_F_TSO6 |
++			    NETIF_F_RXHASH |
++			    NETIF_F_RXCSUM |
++			    NETIF_F_HW_VLAN_CTAG_RX |
++			    NETIF_F_HW_VLAN_CTAG_TX;
++
++#if 0
++	/* set this bit last since it cannot be part of hw_features */
++	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
++#endif
++
++	netdev->priv_flags |= IFF_SUPP_NOFCS;
++
++	if (pci_using_dac)
++		netdev->features |= NETIF_F_HIGHDMA;
++
++	netdev->priv_flags |= IFF_UNICAST_FLT;
++
++	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
++
++	/* before reading the NVM, reset the controller to put the device in a
++	 * known good starting state
++	 */
++	hw->mac.ops.reset_hw(hw);
++
++	/* make sure the NVM is good , i211/i210 parts can have special NVM
++	 * that doesn't contain a checksum
++	 */
++	switch (hw->mac.type) {
++	case e1000_i210:
++	case e1000_i211:
++		if (igb_get_flash_presence_i210(hw)) {
++			if (hw->nvm.ops.validate(hw) < 0) {
++				dev_err(&pdev->dev,
++					"The NVM Checksum Is Not Valid\n");
++				err = -EIO;
++				goto err_eeprom;
++			}
++		}
++		break;
++	default:
++		if (hw->nvm.ops.validate(hw) < 0) {
++			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
++			err = -EIO;
++			goto err_eeprom;
++		}
++		break;
++	}
++
++	/* copy the MAC address out of the NVM */
++	if (hw->mac.ops.read_mac_addr(hw))
++		dev_err(&pdev->dev, "NVM Read Error\n");
++
++	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
++
++	if (!is_valid_ether_addr(netdev->dev_addr)) {
++		dev_err(&pdev->dev, "Invalid MAC Address\n");
++		err = -EIO;
++		goto err_eeprom;
++	}
++
++	/* get firmware version for ethtool -i */
++	igb_set_fw_version(adapter);
++
++	/* configure RXPBSIZE and TXPBSIZE */
++	if (hw->mac.type == e1000_i210) {
++		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
++		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
++	}
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++	timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
++	timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
++#else /* < 4.14 */
++	setup_timer(&adapter->watchdog_timer, igb_watchdog,
++		    (unsigned long) adapter);
++	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
++		    (unsigned long) adapter);
++#endif /* < 4.14 */
++
++	INIT_WORK(&adapter->reset_task, igb_reset_task);
++	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
++	rtdm_nrtsig_init(&adapter->watchdog_nrtsig,
++			igb_nrtsig_watchdog, adapter);
++
++	/* Initialize link properties that are user-changeable */
++	adapter->fc_autoneg = true;
++	hw->mac.autoneg = true;
++	hw->phy.autoneg_advertised = 0x2f;
++
++	hw->fc.requested_mode = e1000_fc_default;
++	hw->fc.current_mode = e1000_fc_default;
++
++	igb_validate_mdi_setting(hw);
++
++	/* By default, support wake on port A */
++	if (hw->bus.func == 0)
++		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
++
++	/* Check the NVM for wake support on non-port A ports */
++	if (hw->mac.type >= e1000_82580)
++		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
++				 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
++				 &eeprom_data);
++	else if (hw->bus.func == 1)
++		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
++
++	if (eeprom_data & IGB_EEPROM_APME)
++		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
++
++	/* now that we have the eeprom settings, apply the special cases where
++	 * the eeprom may be wrong or the board simply won't support wake on
++	 * lan on a particular port
++	 */
++	switch (pdev->device) {
++	case E1000_DEV_ID_82575GB_QUAD_COPPER:
++		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
++		break;
++	case E1000_DEV_ID_82575EB_FIBER_SERDES:
++	case E1000_DEV_ID_82576_FIBER:
++	case E1000_DEV_ID_82576_SERDES:
++		/* Wake events only supported on port A for dual fiber
++		 * regardless of eeprom setting
++		 */
++		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
++			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
++		break;
++	case E1000_DEV_ID_82576_QUAD_COPPER:
++	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
++		/* if quad port adapter, disable WoL on all but port A */
++		if (global_quad_port_a != 0)
++			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
++		else
++			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
++		/* Reset for multiple quad port adapters */
++		if (++global_quad_port_a == 4)
++			global_quad_port_a = 0;
++		break;
++	default:
++		/* If the device can't wake, don't set software support */
++		if (!device_can_wakeup(&adapter->pdev->dev))
++			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
++	}
++
++	/* initialize the wol settings based on the eeprom settings */
++	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
++		adapter->wol |= E1000_WUFC_MAG;
++
++	/* Some vendors want WoL disabled by default, but still supported */
++	if ((hw->mac.type == e1000_i350) &&
++	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
++		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
++		adapter->wol = 0;
++	}
++
++	device_set_wakeup_enable(&adapter->pdev->dev,
++				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
++
++	/* reset the hardware with the new settings */
++	igb_reset(adapter);
++
++	/* let the f/w know that the h/w is now under the control of the
++	 * driver.
++	 */
++	igb_get_hw_control(adapter);
++
++	strcpy(netdev->name, "rteth%d");
++	err = rt_register_rtnetdev(netdev);
++	if (err)
++		goto err_release_hw_control;
++
++	/* carrier off reporting is important to ethtool even BEFORE open */
++	rtnetif_carrier_off(netdev);
++
++#ifdef CONFIG_IGB_HWMON
++	/* Initialize the thermal sensor on i350 devices. */
++	if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
++		u16 ets_word;
++
++		/* Read the NVM to determine if this i350 device supports an
++		 * external thermal sensor.
++		 */
++		hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);
++		if (ets_word != 0x0000 && ets_word != 0xFFFF)
++			adapter->ets = true;
++		else
++			adapter->ets = false;
++		if (igb_sysfs_init(adapter))
++			dev_err(&pdev->dev,
++				"failed to allocate sysfs resources\n");
++	} else {
++		adapter->ets = false;
++	}
++#endif
++	/* Check if Media Autosense is enabled */
++	adapter->ei = *ei;
++	if (hw->dev_spec._82575.mas_capable)
++		igb_init_mas(adapter);
++
++	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
++	/* print bus type/speed/width info, not applicable to i354 */
++	if (hw->mac.type != e1000_i354) {
++		dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
++			 netdev->name,
++			 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
++			  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
++			   "unknown"),
++			 ((hw->bus.width == e1000_bus_width_pcie_x4) ?
++			  "Width x4" :
++			  (hw->bus.width == e1000_bus_width_pcie_x2) ?
++			  "Width x2" :
++			  (hw->bus.width == e1000_bus_width_pcie_x1) ?
++			  "Width x1" : "unknown"), netdev->dev_addr);
++	}
++
++	if ((hw->mac.type >= e1000_i210 ||
++	     igb_get_flash_presence_i210(hw))) {
++		ret_val = igb_read_part_string(hw, part_str,
++					       E1000_PBANUM_LENGTH);
++	} else {
++		ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;
++	}
++
++	if (ret_val)
++		strcpy(part_str, "Unknown");
++	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
++	dev_info(&pdev->dev,
++		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
++		(adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :
++		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
++		adapter->num_rx_queues, adapter->num_tx_queues);
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		switch (hw->mac.type) {
++		case e1000_i350:
++		case e1000_i210:
++		case e1000_i211:
++			/* Enable EEE for internal copper PHY devices */
++			err = igb_set_eee_i350(hw, true, true);
++			if ((!err) &&
++			    (!hw->dev_spec._82575.eee_disable)) {
++				adapter->eee_advert =
++					MDIO_EEE_100TX | MDIO_EEE_1000T;
++				adapter->flags |= IGB_FLAG_EEE;
++			}
++			break;
++		case e1000_i354:
++			if ((rd32(E1000_CTRL_EXT) &
++			    E1000_CTRL_EXT_LINK_MODE_SGMII)) {
++				err = igb_set_eee_i354(hw, true, true);
++				if ((!err) &&
++					(!hw->dev_spec._82575.eee_disable)) {
++					adapter->eee_advert =
++					   MDIO_EEE_100TX | MDIO_EEE_1000T;
++					adapter->flags |= IGB_FLAG_EEE;
++				}
++			}
++			break;
++		default:
++			break;
++		}
++	}
++	pm_runtime_put_noidle(&pdev->dev);
++	return 0;
++
++err_release_hw_control:
++	igb_release_hw_control(adapter);
++	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
++err_eeprom:
++	if (!igb_check_reset_block(hw))
++		igb_reset_phy(hw);
++
++	if (hw->flash_address)
++		iounmap(hw->flash_address);
++err_sw_init:
++	igb_clear_interrupt_scheme(adapter);
++	pci_iounmap(pdev, hw->hw_addr);
++err_ioremap:
++	rtdev_free(netdev);
++err_alloc_etherdev:
++	pci_release_selected_regions(pdev,
++				     pci_select_bars(pdev, IORESOURCE_MEM));
++err_pci_reg:
++err_dma:
++	pci_disable_device(pdev);
++	return err;
++}
++
++/**
++ *  igb_remove_i2c - Cleanup  I2C interface
++ *  @adapter: pointer to adapter structure
++ **/
++static void igb_remove_i2c(struct igb_adapter *adapter)
++{
++	/* free the adapter bus structure */
++	i2c_del_adapter(&adapter->i2c_adap);
++}
++
++/**
++ *  igb_remove - Device Removal Routine
++ *  @pdev: PCI device information struct
++ *
++ *  igb_remove is called by the PCI subsystem to alert the driver
++ *  that it should release a PCI device.  The could be caused by a
++ *  Hot-Plug event, or because the driver is going to be removed from
++ *  memory.
++ **/
++static void igb_remove(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++
++	rtdev_down(netdev);
++	igb_down(adapter);
++
++	pm_runtime_get_noresume(&pdev->dev);
++#ifdef CONFIG_IGB_HWMON
++	igb_sysfs_exit(adapter);
++#endif
++	igb_remove_i2c(adapter);
++	/* The watchdog timer may be rescheduled, so explicitly
++	 * disable watchdog from being rescheduled.
++	 */
++	del_timer_sync(&adapter->watchdog_timer);
++	del_timer_sync(&adapter->phy_info_timer);
++
++	cancel_work_sync(&adapter->reset_task);
++	cancel_work_sync(&adapter->watchdog_task);
++
++	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant.
++	 */
++	igb_release_hw_control(adapter);
++
++	rt_rtdev_disconnect(netdev);
++	rt_unregister_rtnetdev(netdev);
++
++	igb_clear_interrupt_scheme(adapter);
++
++	pci_iounmap(pdev, hw->hw_addr);
++	if (hw->flash_address)
++		iounmap(hw->flash_address);
++	pci_release_selected_regions(pdev,
++				     pci_select_bars(pdev, IORESOURCE_MEM));
++
++	kfree(adapter->shadow_vfta);
++	rtdev_free(netdev);
++
++	pci_disable_pcie_error_reporting(pdev);
++
++	pci_disable_device(pdev);
++}
++
++/**
++ *  igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
++ *  @adapter: board private structure to initialize
++ *
++ *  This function initializes the vf specific data storage and then attempts to
++ *  allocate the VFs.  The reason for ordering it this way is because it is much
++ *  mor expensive time wise to disable SR-IOV than it is to allocate and free
++ *  the memory for the VFs.
++ **/
++static void igb_probe_vfs(struct igb_adapter *adapter)
++{
++}
++
++static void igb_init_queue_configuration(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 max_rss_queues;
++
++	max_rss_queues = 1;
++	adapter->rss_queues = max_rss_queues;
++
++	/* Determine if we need to pair queues. */
++	switch (hw->mac.type) {
++	case e1000_82575:
++	case e1000_i211:
++		/* Device supports enough interrupts without queue pairing. */
++		break;
++	case e1000_82576:
++		/* If VFs are going to be allocated with RSS queues then we
++		 * should pair the queues in order to conserve interrupts due
++		 * to limited supply.
++		 */
++		/* fall through */
++	case e1000_82580:
++	case e1000_i350:
++	case e1000_i354:
++	case e1000_i210:
++	default:
++		/* If rss_queues > half of max_rss_queues, pair the queues in
++		 * order to conserve interrupts due to limited supply.
++		 */
++		if (adapter->rss_queues > (max_rss_queues / 2))
++			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
++		break;
++	}
++}
++
++/**
++ *  igb_sw_init - Initialize general software structures (struct igb_adapter)
++ *  @adapter: board private structure to initialize
++ *
++ *  igb_sw_init initializes the Adapter private data structure.
++ *  Fields are initialized based on PCI device information and
++ *  OS network device settings (MTU size).
++ **/
++static int igb_sw_init(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++
++	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
++
++	/* set default ring sizes */
++	adapter->tx_ring_count = IGB_DEFAULT_TXD;
++	adapter->rx_ring_count = IGB_DEFAULT_RXD;
++
++	/* set default ITR values */
++	if (InterruptThrottle) {
++		adapter->rx_itr_setting = IGB_DEFAULT_ITR;
++		adapter->tx_itr_setting = IGB_DEFAULT_ITR;
++	} else {
++		adapter->rx_itr_setting = IGB_MIN_ITR_USECS;
++		adapter->tx_itr_setting = IGB_MIN_ITR_USECS;
++	}
++
++	/* set default work limits */
++	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
++
++	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
++				  VLAN_HLEN;
++	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
++
++	spin_lock_init(&adapter->stats64_lock);
++
++	igb_init_queue_configuration(adapter);
++
++	/* Setup and initialize a copy of the hw vlan table array */
++	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
++				       GFP_ATOMIC);
++
++	/* This call may decrease the number of queues */
++	if (igb_init_interrupt_scheme(adapter, true)) {
++		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
++		return -ENOMEM;
++	}
++
++	igb_probe_vfs(adapter);
++
++	/* Explicitly disable IRQ since the NIC can be in any state. */
++	igb_irq_disable(adapter);
++
++	if (hw->mac.type >= e1000_i350)
++		adapter->flags &= ~IGB_FLAG_DMAC;
++
++	set_bit(__IGB_DOWN, &adapter->state);
++	return 0;
++}
++
++/**
++ *  igb_open - Called when a network interface is made active
++ *  @netdev: network interface device structure
++ *
++ *  Returns 0 on success, negative value on failure
++ *
++ *  The open entry point is called when a network interface is made
++ *  active by the system (IFF_UP).  At this point all resources needed
++ *  for transmit and receive operations are allocated, the interrupt
++ *  handler is registered with the OS, the watchdog timer is started,
++ *  and the stack is notified that the interface is ready.
++ **/
++static int __igb_open(struct rtnet_device *netdev, bool resuming)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	struct pci_dev *pdev = adapter->pdev;
++	int err;
++
++	/* disallow open during test */
++	if (test_bit(__IGB_TESTING, &adapter->state)) {
++		WARN_ON(resuming);
++		return -EBUSY;
++	}
++
++	if (!resuming)
++		pm_runtime_get_sync(&pdev->dev);
++
++	rtnetif_carrier_off(netdev);
++
++	/* allocate transmit descriptors */
++	err = igb_setup_all_tx_resources(adapter);
++	if (err)
++		goto err_setup_tx;
++
++	/* allocate receive descriptors */
++	err = igb_setup_all_rx_resources(adapter);
++	if (err)
++		goto err_setup_rx;
++
++	igb_power_up_link(adapter);
++
++	/* before we allocate an interrupt, we must be ready to handle it.
++	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
++	 * as soon as we call pci_request_irq, so we have to setup our
++	 * clean_rx handler before we do so.
++	 */
++	igb_configure(adapter);
++
++	err = igb_request_irq(adapter);
++	if (err)
++		goto err_req_irq;
++
++	/* From here on the code is the same as igb_up() */
++	clear_bit(__IGB_DOWN, &adapter->state);
++
++	/* Clear any pending interrupts. */
++	rd32(E1000_ICR);
++
++	igb_irq_enable(adapter);
++
++	rtnetif_start_queue(netdev);
++
++	if (!resuming)
++		pm_runtime_put(&pdev->dev);
++
++	/* start the watchdog. */
++	hw->mac.get_link_status = 1;
++	schedule_work(&adapter->watchdog_task);
++
++	return 0;
++
++err_req_irq:
++	igb_release_hw_control(adapter);
++	igb_power_down_link(adapter);
++	igb_free_all_rx_resources(adapter);
++err_setup_rx:
++	igb_free_all_tx_resources(adapter);
++err_setup_tx:
++	igb_reset(adapter);
++	if (!resuming)
++		pm_runtime_put(&pdev->dev);
++
++	return err;
++}
++
++static int igb_open(struct rtnet_device *netdev)
++{
++	return __igb_open(netdev, false);
++}
++
++/**
++ *  igb_close - Disables a network interface
++ *  @netdev: network interface device structure
++ *
++ *  Returns 0, this is not allowed to fail
++ *
++ *  The close entry point is called when an interface is de-activated
++ *  by the OS.  The hardware is still under the driver's control, but
++ *  needs to be disabled.  A global MAC reset is issued to stop the
++ *  hardware, and all transmit and receive resources are freed.
++ **/
++static int __igb_close(struct rtnet_device *netdev, bool suspending)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct pci_dev *pdev = adapter->pdev;
++
++	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
++
++	if (!suspending)
++		pm_runtime_get_sync(&pdev->dev);
++
++	igb_down(adapter);
++	igb_free_irq(adapter);
++
++	rt_stack_disconnect(netdev);
++
++	igb_free_all_tx_resources(adapter);
++	igb_free_all_rx_resources(adapter);
++
++	if (!suspending)
++		pm_runtime_put_sync(&pdev->dev);
++	return 0;
++}
++
++static int igb_close(struct rtnet_device *netdev)
++{
++	return __igb_close(netdev, false);
++}
++
++/**
++ *  igb_setup_tx_resources - allocate Tx resources (Descriptors)
++ *  @tx_ring: tx descriptor ring (for a specific queue) to setup
++ *
++ *  Return 0 on success, negative on failure
++ **/
++int igb_setup_tx_resources(struct igb_ring *tx_ring)
++{
++	struct device *dev = tx_ring->dev;
++	int size;
++
++	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
++
++	tx_ring->tx_buffer_info = vzalloc(size);
++	if (!tx_ring->tx_buffer_info)
++		goto err;
++
++	/* round up to nearest 4K */
++	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
++	tx_ring->size = ALIGN(tx_ring->size, 4096);
++
++	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
++					   &tx_ring->dma, GFP_KERNEL);
++	if (!tx_ring->desc)
++		goto err;
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++
++	return 0;
++
++err:
++	vfree(tx_ring->tx_buffer_info);
++	tx_ring->tx_buffer_info = NULL;
++	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
++	return -ENOMEM;
++}
++
++/**
++ *  igb_setup_all_tx_resources - wrapper to allocate Tx resources
++ *				 (Descriptors) for all queues
++ *  @adapter: board private structure
++ *
++ *  Return 0 on success, negative on failure
++ **/
++static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		err = igb_setup_tx_resources(adapter->tx_ring[i]);
++		if (err) {
++			dev_err(&pdev->dev,
++				"Allocation for Tx Queue %u failed\n", i);
++			for (i--; i >= 0; i--)
++				igb_free_tx_resources(adapter->tx_ring[i]);
++			break;
++		}
++	}
++
++	return err;
++}
++
++/**
++ *  igb_setup_tctl - configure the transmit control registers
++ *  @adapter: Board private structure
++ **/
++void igb_setup_tctl(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 tctl;
++
++	/* disable queue 0 which is enabled by default on 82575 and 82576 */
++	wr32(E1000_TXDCTL(0), 0);
++
++	/* Program the Transmit Control Register */
++	tctl = rd32(E1000_TCTL);
++	tctl &= ~E1000_TCTL_CT;
++	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
++		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
++
++	igb_config_collision_dist(hw);
++
++	/* Enable transmits */
++	tctl |= E1000_TCTL_EN;
++
++	wr32(E1000_TCTL, tctl);
++}
++
++/**
++ *  igb_configure_tx_ring - Configure transmit ring after Reset
++ *  @adapter: board private structure
++ *  @ring: tx ring to configure
++ *
++ *  Configure a transmit ring after a reset.
++ **/
++void igb_configure_tx_ring(struct igb_adapter *adapter,
++			   struct igb_ring *ring)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 txdctl = 0;
++	u64 tdba = ring->dma;
++	int reg_idx = ring->reg_idx;
++
++	/* disable the queue */
++	wr32(E1000_TXDCTL(reg_idx), 0);
++	wrfl();
++	mdelay(10);
++
++	wr32(E1000_TDLEN(reg_idx),
++	     ring->count * sizeof(union e1000_adv_tx_desc));
++	wr32(E1000_TDBAL(reg_idx),
++	     tdba & 0x00000000ffffffffULL);
++	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
++
++	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
++	wr32(E1000_TDH(reg_idx), 0);
++	writel(0, ring->tail);
++
++	txdctl |= IGB_TX_PTHRESH;
++	txdctl |= IGB_TX_HTHRESH << 8;
++	txdctl |= IGB_TX_WTHRESH << 16;
++
++	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
++	wr32(E1000_TXDCTL(reg_idx), txdctl);
++}
++
++/**
++ *  igb_configure_tx - Configure transmit Unit after Reset
++ *  @adapter: board private structure
++ *
++ *  Configure the Tx unit of the MAC after a reset.
++ **/
++static void igb_configure_tx(struct igb_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
++}
++
++/**
++ *  igb_setup_rx_resources - allocate Rx resources (Descriptors)
++ *  @rx_ring: Rx descriptor ring (for a specific queue) to setup
++ *
++ *  Returns 0 on success, negative on failure
++ **/
++int igb_setup_rx_resources(struct igb_ring *rx_ring)
++{
++	struct device *dev = rx_ring->dev;
++	int size;
++
++	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
++
++	rx_ring->rx_buffer_info = vzalloc(size);
++	if (!rx_ring->rx_buffer_info)
++		goto err;
++
++	/* Round up to nearest 4K */
++	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
++	rx_ring->size = ALIGN(rx_ring->size, 4096);
++
++	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
++					   &rx_ring->dma, GFP_KERNEL);
++	if (!rx_ring->desc)
++		goto err;
++
++	rx_ring->next_to_alloc = 0;
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++
++	return 0;
++
++err:
++	vfree(rx_ring->rx_buffer_info);
++	rx_ring->rx_buffer_info = NULL;
++	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
++	return -ENOMEM;
++}
++
++/**
++ *  igb_setup_all_rx_resources - wrapper to allocate Rx resources
++ *				 (Descriptors) for all queues
++ *  @adapter: board private structure
++ *
++ *  Return 0 on success, negative on failure
++ **/
++static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		err = igb_setup_rx_resources(adapter->rx_ring[i]);
++		if (err) {
++			dev_err(&pdev->dev,
++				"Allocation for Rx Queue %u failed\n", i);
++			for (i--; i >= 0; i--)
++				igb_free_rx_resources(adapter->rx_ring[i]);
++			break;
++		}
++	}
++
++	return err;
++}
++
++/**
++ *  igb_setup_mrqc - configure the multiple receive queue control registers
++ *  @adapter: Board private structure
++ **/
++static void igb_setup_mrqc(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 mrqc, rxcsum;
++	u32 j, num_rx_queues;
++	u32 rss_key[10];
++
++	get_random_bytes(rss_key, sizeof(rss_key));
++	for (j = 0; j < 10; j++)
++		wr32(E1000_RSSRK(j), rss_key[j]);
++
++	num_rx_queues = adapter->rss_queues;
++
++	switch (hw->mac.type) {
++	case e1000_82576:
++		/* 82576 supports 2 RSS queues for SR-IOV */
++		break;
++	default:
++		break;
++	}
++
++	if (adapter->rss_indir_tbl_init != num_rx_queues) {
++		for (j = 0; j < IGB_RETA_SIZE; j++)
++			adapter->rss_indir_tbl[j] =
++			(j * num_rx_queues) / IGB_RETA_SIZE;
++		adapter->rss_indir_tbl_init = num_rx_queues;
++	}
++
++	/* Disable raw packet checksumming so that RSS hash is placed in
++	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
++	 * offloads as they are enabled by default
++	 */
++	rxcsum = rd32(E1000_RXCSUM);
++	rxcsum |= E1000_RXCSUM_PCSD;
++
++	if (adapter->hw.mac.type >= e1000_82576)
++		/* Enable Receive Checksum Offload for SCTP */
++		rxcsum |= E1000_RXCSUM_CRCOFL;
++
++	/* Don't need to set TUOFL or IPOFL, they default to 1 */
++	wr32(E1000_RXCSUM, rxcsum);
++
++	/* Generate RSS hash based on packet types, TCP/UDP
++	 * port numbers and/or IPv4/v6 src and dst addresses
++	 */
++	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
++	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
++	       E1000_MRQC_RSS_FIELD_IPV6 |
++	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
++	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
++
++	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
++		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
++	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
++		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
++
++	/* If VMDq is enabled then we set the appropriate mode for that, else
++	 * we default to RSS so that an RSS hash is calculated per packet even
++	 * if we are only using one queue
++	 */
++	if (hw->mac.type != e1000_i211)
++		mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
++
++	wr32(E1000_MRQC, mrqc);
++}
++
++/**
++ *  igb_setup_rctl - configure the receive control registers
++ *  @adapter: Board private structure
++ **/
++void igb_setup_rctl(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	rctl = rd32(E1000_RCTL);
++
++	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
++	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
++
++	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
++		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
++
++	/* enable stripping of CRC. It's unlikely this will break BMC
++	 * redirection as it did with e1000. Newer features require
++	 * that the HW strips the CRC.
++	 */
++	rctl |= E1000_RCTL_SECRC;
++
++	/* disable store bad packets and clear size bits. */
++	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
++
++	/* enable LPE to prevent packets larger than max_frame_size */
++	rctl |= E1000_RCTL_LPE;
++
++	/* disable queue 0 to prevent tail write w/o re-config */
++	wr32(E1000_RXDCTL(0), 0);
++
++	/* This is useful for sniffing bad packets. */
++	if (adapter->netdev->features & NETIF_F_RXALL) {
++		/* UPE and MPE will be handled by normal PROMISC logic
++		 * in e1000e_set_rx_mode
++		 */
++		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
++			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
++			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
++
++		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
++			  E1000_RCTL_DPF | /* Allow filtered pause */
++			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
++		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
++		 * and that breaks VLANs.
++		 */
++	}
++
++	wr32(E1000_RCTL, rctl);
++}
++
++/**
++ *  igb_rlpml_set - set maximum receive packet size
++ *  @adapter: board private structure
++ *
++ *  Configure maximum receivable packet size.
++ **/
++static void igb_rlpml_set(struct igb_adapter *adapter)
++{
++	u32 max_frame_size = adapter->max_frame_size;
++	struct e1000_hw *hw = &adapter->hw;
++
++	wr32(E1000_RLPML, max_frame_size);
++}
++
++static inline void igb_set_vmolr(struct igb_adapter *adapter,
++				 int vfn, bool aupe)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 vmolr;
++
++	/* This register exists only on 82576 and newer so if we are older then
++	 * we should exit and do nothing
++	 */
++	if (hw->mac.type < e1000_82576)
++		return;
++
++	vmolr = rd32(E1000_VMOLR(vfn));
++	vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
++	if (hw->mac.type == e1000_i350) {
++		u32 dvmolr;
++
++		dvmolr = rd32(E1000_DVMOLR(vfn));
++		dvmolr |= E1000_DVMOLR_STRVLAN;
++		wr32(E1000_DVMOLR(vfn), dvmolr);
++	}
++	if (aupe)
++		vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
++	else
++		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
++
++	/* clear all bits that might not be set */
++	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
++
++	if (adapter->rss_queues > 1)
++		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
++	/* for VMDq only allow the VFs and pool 0 to accept broadcast and
++	 * multicast packets
++	 */
++	vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
++
++	wr32(E1000_VMOLR(vfn), vmolr);
++}
++
++/**
++ *  igb_configure_rx_ring - Configure a receive ring after Reset
++ *  @adapter: board private structure
++ *  @ring: receive ring to be configured
++ *
++ *  Configure the Rx unit of the MAC after a reset.
++ **/
++void igb_configure_rx_ring(struct igb_adapter *adapter,
++			   struct igb_ring *ring)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u64 rdba = ring->dma;
++	int reg_idx = ring->reg_idx;
++	u32 srrctl = 0, rxdctl = 0;
++
++	ring->rx_buffer_len = max_t(u32, adapter->max_frame_size,
++				MAXIMUM_ETHERNET_VLAN_SIZE);
++
++	/* disable the queue */
++	wr32(E1000_RXDCTL(reg_idx), 0);
++
++	/* Set DMA base address registers */
++	wr32(E1000_RDBAL(reg_idx),
++	     rdba & 0x00000000ffffffffULL);
++	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
++	wr32(E1000_RDLEN(reg_idx),
++	     ring->count * sizeof(union e1000_adv_rx_desc));
++
++	/* initialize head and tail */
++	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
++	wr32(E1000_RDH(reg_idx), 0);
++	writel(0, ring->tail);
++
++	/* set descriptor configuration */
++	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
++	srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
++	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
++	if (hw->mac.type >= e1000_82580)
++		srrctl |= E1000_SRRCTL_TIMESTAMP;
++	/* Only set Drop Enable if we are supporting multiple queues */
++	if (adapter->num_rx_queues > 1)
++		srrctl |= E1000_SRRCTL_DROP_EN;
++
++	wr32(E1000_SRRCTL(reg_idx), srrctl);
++
++	/* set filtering for VMDQ pools */
++	igb_set_vmolr(adapter, reg_idx & 0x7, true);
++
++	rxdctl |= IGB_RX_PTHRESH;
++	rxdctl |= IGB_RX_HTHRESH << 8;
++	rxdctl |= IGB_RX_WTHRESH << 16;
++
++	/* enable receive descriptor fetching */
++	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
++	wr32(E1000_RXDCTL(reg_idx), rxdctl);
++}
++
++/**
++ *  igb_configure_rx - Configure receive Unit after Reset
++ *  @adapter: board private structure
++ *
++ *  Configure the Rx unit of the MAC after a reset.
++ **/
++static void igb_configure_rx(struct igb_adapter *adapter)
++{
++	int i;
++
++	/* set the correct pool for the PF default MAC address in entry 0 */
++	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, 0);
++
++	/* Setup the HW Rx Head and Tail Descriptor Pointers and
++	 * the Base and Length of the Rx Descriptor Ring
++	 */
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
++}
++
++/**
++ *  igb_free_tx_resources - Free Tx Resources per Queue
++ *  @tx_ring: Tx descriptor ring for a specific queue
++ *
++ *  Free all transmit software resources
++ **/
++void igb_free_tx_resources(struct igb_ring *tx_ring)
++{
++	igb_clean_tx_ring(tx_ring);
++
++	vfree(tx_ring->tx_buffer_info);
++	tx_ring->tx_buffer_info = NULL;
++
++	/* if not set, then don't free */
++	if (!tx_ring->desc)
++		return;
++
++	dma_free_coherent(tx_ring->dev, tx_ring->size,
++			  tx_ring->desc, tx_ring->dma);
++
++	tx_ring->desc = NULL;
++}
++
++/**
++ *  igb_free_all_tx_resources - Free Tx Resources for All Queues
++ *  @adapter: board private structure
++ *
++ *  Free all transmit software resources
++ **/
++static void igb_free_all_tx_resources(struct igb_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		if (adapter->tx_ring[i])
++			igb_free_tx_resources(adapter->tx_ring[i]);
++}
++
++void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
++				    struct igb_tx_buffer *tx_buffer)
++{
++	if (tx_buffer->skb) {
++		kfree_rtskb(tx_buffer->skb);
++		tx_buffer->skb = NULL;
++	}
++	tx_buffer->next_to_watch = NULL;
++	/* buffer_info must be completely set up in the transmit path */
++}
++
++/**
++ *  igb_clean_tx_ring - Free Tx Buffers
++ *  @tx_ring: ring to be cleaned
++ **/
++static void igb_clean_tx_ring(struct igb_ring *tx_ring)
++{
++	struct igb_tx_buffer *buffer_info;
++	unsigned long size;
++	u16 i;
++
++	if (!tx_ring->tx_buffer_info)
++		return;
++	/* Free all the Tx ring sk_buffs */
++
++	for (i = 0; i < tx_ring->count; i++) {
++		buffer_info = &tx_ring->tx_buffer_info[i];
++		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
++	}
++
++	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
++	memset(tx_ring->tx_buffer_info, 0, size);
++
++	/* Zero out the descriptor ring */
++	memset(tx_ring->desc, 0, tx_ring->size);
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++}
++
++/**
++ *  igb_clean_all_tx_rings - Free Tx Buffers for all queues
++ *  @adapter: board private structure
++ **/
++static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		if (adapter->tx_ring[i])
++			igb_clean_tx_ring(adapter->tx_ring[i]);
++}
++
++/**
++ *  igb_free_rx_resources - Free Rx Resources
++ *  @rx_ring: ring to clean the resources from
++ *
++ *  Free all receive software resources
++ **/
++void igb_free_rx_resources(struct igb_ring *rx_ring)
++{
++	igb_clean_rx_ring(rx_ring);
++
++	vfree(rx_ring->rx_buffer_info);
++	rx_ring->rx_buffer_info = NULL;
++
++	/* if not set, then don't free */
++	if (!rx_ring->desc)
++		return;
++
++	dma_free_coherent(rx_ring->dev, rx_ring->size,
++			  rx_ring->desc, rx_ring->dma);
++
++	rx_ring->desc = NULL;
++}
++
++/**
++ *  igb_free_all_rx_resources - Free Rx Resources for All Queues
++ *  @adapter: board private structure
++ *
++ *  Free all receive software resources
++ **/
++static void igb_free_all_rx_resources(struct igb_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		if (adapter->rx_ring[i])
++			igb_free_rx_resources(adapter->rx_ring[i]);
++}
++
++/**
++ *  igb_clean_rx_ring - Free Rx Buffers per Queue
++ *  @rx_ring: ring to free buffers from
++ **/
++static void igb_clean_rx_ring(struct igb_ring *rx_ring)
++{
++	unsigned long size;
++	u16 i;
++
++	if (!rx_ring->rx_buffer_info)
++		return;
++
++	/* Free all the Rx ring sk_buffs */
++	for (i = 0; i < rx_ring->count; i++) {
++		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
++
++		if (buffer_info->dma)
++			buffer_info->dma = 0;
++
++		if (buffer_info->skb) {
++			kfree_rtskb(buffer_info->skb);
++			buffer_info->skb = NULL;
++		}
++	}
++
++	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
++	memset(rx_ring->rx_buffer_info, 0, size);
++
++	/* Zero out the descriptor ring */
++	memset(rx_ring->desc, 0, rx_ring->size);
++
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++}
++
++/**
++ *  igb_clean_all_rx_rings - Free Rx Buffers for all queues
++ *  @adapter: board private structure
++ **/
++static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		if (adapter->rx_ring[i])
++			igb_clean_rx_ring(adapter->rx_ring[i]);
++}
++
++/**
++ *  igb_write_mc_addr_list - write multicast addresses to MTA
++ *  @netdev: network interface device structure
++ *
++ *  Writes multicast address list to the MTA hash table.
++ *  Returns: -ENOMEM on failure
++ *           0 on no addresses written
++ *           X on writing X addresses to MTA
++ **/
++static int igb_write_mc_addr_list(struct rtnet_device *netdev)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++#if 0
++	struct netdev_hw_addr *ha;
++	u8  *mta_list;
++	int i;
++	if (netdev_mc_empty(netdev)) {
++		/* nothing to program, so clear mc list */
++		igb_update_mc_addr_list(hw, NULL, 0);
++		igb_restore_vf_multicasts(adapter);
++		return 0;
++	}
++
++	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
++	if (!mta_list)
++		return -ENOMEM;
++
++	/* The shared function expects a packed array of only addresses. */
++	i = 0;
++	netdev_for_each_mc_addr(ha, netdev)
++		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
++
++	igb_update_mc_addr_list(hw, mta_list, i);
++	kfree(mta_list);
++
++	return netdev_mc_count(netdev);
++#else
++	igb_update_mc_addr_list(hw, NULL, 0);
++	return 0;
++#endif
++}
++
++/**
++ *  igb_write_uc_addr_list - write unicast addresses to RAR table
++ *  @netdev: network interface device structure
++ *
++ *  Writes unicast address list to the RAR table.
++ *  Returns: -ENOMEM on failure/insufficient address space
++ *           0 on no addresses written
++ *           X on writing X addresses to the RAR table
++ **/
++static int igb_write_uc_addr_list(struct rtnet_device *netdev)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	unsigned int vfn = 0;
++	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
++	int count = 0;
++
++	/* write the addresses in reverse order to avoid write combining */
++	for (; rar_entries > 0 ; rar_entries--) {
++		wr32(E1000_RAH(rar_entries), 0);
++		wr32(E1000_RAL(rar_entries), 0);
++	}
++	wrfl();
++
++	return count;
++}
++
++/**
++ *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
++ *  @netdev: network interface device structure
++ *
++ *  The set_rx_mode entry point is called whenever the unicast or multicast
++ *  address lists or the network interface flags are updated.  This routine is
++ *  responsible for configuring the hardware for proper unicast, multicast,
++ *  promiscuous mode, and all-multi behavior.
++ **/
++static void igb_set_rx_mode(struct rtnet_device *netdev)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	unsigned int vfn = 0;
++	u32 rctl, vmolr = 0;
++	int count;
++
++	/* Check for Promiscuous and All Multicast modes */
++	rctl = rd32(E1000_RCTL);
++
++	/* clear the effected bits */
++	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
++
++	if (netdev->flags & IFF_PROMISC) {
++		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
++		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
++	} else {
++		if (netdev->flags & IFF_ALLMULTI) {
++			rctl |= E1000_RCTL_MPE;
++			vmolr |= E1000_VMOLR_MPME;
++		} else {
++			/* Write addresses to the MTA, if the attempt fails
++			 * then we should just turn on promiscuous mode so
++			 * that we can at least receive multicast traffic
++			 */
++			count = igb_write_mc_addr_list(netdev);
++			if (count < 0) {
++				rctl |= E1000_RCTL_MPE;
++				vmolr |= E1000_VMOLR_MPME;
++			} else if (count) {
++				vmolr |= E1000_VMOLR_ROMPE;
++			}
++		}
++		/* Write addresses to available RAR registers, if there is not
++		 * sufficient space to store all the addresses then enable
++		 * unicast promiscuous mode
++		 */
++		count = igb_write_uc_addr_list(netdev);
++		if (count < 0) {
++			rctl |= E1000_RCTL_UPE;
++			vmolr |= E1000_VMOLR_ROPE;
++		}
++		rctl |= E1000_RCTL_VFE;
++	}
++	wr32(E1000_RCTL, rctl);
++
++	/* In order to support SR-IOV and eventually VMDq it is necessary to set
++	 * the VMOLR to enable the appropriate modes.  Without this workaround
++	 * we will have issues with VLAN tag stripping not being done for frames
++	 * that are only arriving because we are the default pool
++	 */
++	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
++		return;
++
++	vmolr |= rd32(E1000_VMOLR(vfn)) &
++		 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
++	wr32(E1000_VMOLR(vfn), vmolr);
++}
++
++static void igb_check_wvbr(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 wvbr = 0;
++
++	switch (hw->mac.type) {
++	case e1000_82576:
++	case e1000_i350:
++		wvbr = rd32(E1000_WVBR);
++		if (!wvbr)
++			return;
++		break;
++	default:
++		break;
++	}
++
++	adapter->wvbr |= wvbr;
++}
++
++#define IGB_STAGGERED_QUEUE_OFFSET 8
++
++/* Need to wait a few seconds after link up to get diagnostic information from
++ * the phy
++ */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++static void igb_update_phy_info(struct timer_list *t)
++{
++	struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
++#else /* < 4.14 */
++static void igb_update_phy_info(unsigned long data)
++{
++	struct igb_adapter *adapter = (struct igb_adapter *) data;
++#endif /* < 4.14 */
++	igb_get_phy_info(&adapter->hw);
++}
++
++/**
++ *  igb_has_link - check shared code for link and determine up/down
++ *  @adapter: pointer to driver private info
++ **/
++bool igb_has_link(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	bool link_active = false;
++
++	/* get_link_status is set on LSC (link status) interrupt or
++	 * rx sequence error interrupt.  get_link_status will stay
++	 * false until the e1000_check_for_link establishes link
++	 * for copper adapters ONLY
++	 */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		if (!hw->mac.get_link_status)
++			return true;
++	case e1000_media_type_internal_serdes:
++		hw->mac.ops.check_for_link(hw);
++		link_active = !hw->mac.get_link_status;
++		break;
++	default:
++	case e1000_media_type_unknown:
++		break;
++	}
++
++	if (((hw->mac.type == e1000_i210) ||
++	     (hw->mac.type == e1000_i211)) &&
++	     (hw->phy.id == I210_I_PHY_ID)) {
++		if (!rtnetif_carrier_ok(adapter->netdev)) {
++			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
++		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
++			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
++			adapter->link_check_timeout = jiffies;
++		}
++	}
++
++	return link_active;
++}
++
++static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
++{
++	bool ret = false;
++	u32 ctrl_ext, thstat;
++
++	/* check for thermal sensor event on i350 copper only */
++	if (hw->mac.type == e1000_i350) {
++		thstat = rd32(E1000_THSTAT);
++		ctrl_ext = rd32(E1000_CTRL_EXT);
++
++		if ((hw->phy.media_type == e1000_media_type_copper) &&
++		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII))
++			ret = !!(thstat & event);
++	}
++
++	return ret;
++}
++
++/**
++ *  igb_check_lvmmc - check for malformed packets received
++ *  and indicated in LVMMC register
++ *  @adapter: pointer to adapter
++ **/
++static void igb_check_lvmmc(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 lvmmc;
++
++	lvmmc = rd32(E1000_LVMMC);
++	if (lvmmc) {
++		if (unlikely(net_ratelimit())) {
++			rtdev_warn(adapter->netdev,
++				    "malformed Tx packet detected and dropped, LVMMC:0x%08x\n",
++				    lvmmc);
++		}
++	}
++}
++
++/**
++ *  igb_watchdog - Timer Call-back
++ *  @data: pointer to adapter cast into an unsigned long
++ **/
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++static void igb_watchdog(struct timer_list *t)
++{
++	struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
++#else /* < 4.14 */
++static void igb_watchdog(unsigned long data)
++{
++	struct igb_adapter *adapter = (struct igb_adapter *)data;
++#endif /* < 4.14 */
++	/* Do the rest outside of interrupt context */
++	schedule_work(&adapter->watchdog_task);
++}
++
++static void igb_watchdog_task(struct work_struct *work)
++{
++	struct igb_adapter *adapter = container_of(work,
++						   struct igb_adapter,
++						   watchdog_task);
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_phy_info *phy = &hw->phy;
++	struct rtnet_device *netdev = adapter->netdev;
++	u32 link;
++	int i;
++	u32 connsw;
++
++	link = igb_has_link(adapter);
++
++	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
++		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
++			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
++		else
++			link = false;
++	}
++
++	/* Force link down if we have fiber to swap to */
++	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
++		if (hw->phy.media_type == e1000_media_type_copper) {
++			connsw = rd32(E1000_CONNSW);
++			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
++				link = 0;
++		}
++	}
++	if (link) {
++		/* Perform a reset if the media type changed. */
++		if (hw->dev_spec._82575.media_changed) {
++			hw->dev_spec._82575.media_changed = false;
++			adapter->flags |= IGB_FLAG_MEDIA_RESET;
++			igb_reset(adapter);
++		}
++		/* Cancel scheduled suspend requests. */
++		pm_runtime_resume(adapter->pdev->dev.parent);
++
++		if (!rtnetif_carrier_ok(netdev)) {
++			u32 ctrl;
++
++			hw->mac.ops.get_speed_and_duplex(hw,
++							 &adapter->link_speed,
++							 &adapter->link_duplex);
++
++			ctrl = rd32(E1000_CTRL);
++			/* Links status message must follow this format */
++			rtdev_info(netdev,
++			       "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
++			       netdev->name,
++			       adapter->link_speed,
++			       adapter->link_duplex == FULL_DUPLEX ?
++			       "Full" : "Half",
++			       (ctrl & E1000_CTRL_TFCE) &&
++			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
++			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
++			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
++
++			/* disable EEE if enabled */
++			if ((adapter->flags & IGB_FLAG_EEE) &&
++				(adapter->link_duplex == HALF_DUPLEX)) {
++				dev_info(&adapter->pdev->dev,
++				"EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n");
++				adapter->hw.dev_spec._82575.eee_disable = true;
++				adapter->flags &= ~IGB_FLAG_EEE;
++			}
++
++			/* check if SmartSpeed worked */
++			igb_check_downshift(hw);
++			if (phy->speed_downgraded)
++				rtdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
++
++			/* check for thermal sensor event */
++			if (igb_thermal_sensor_event(hw,
++			    E1000_THSTAT_LINK_THROTTLE))
++				rtdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n");
++
++			/* adjust timeout factor according to speed/duplex */
++			adapter->tx_timeout_factor = 1;
++			switch (adapter->link_speed) {
++			case SPEED_10:
++				adapter->tx_timeout_factor = 14;
++				break;
++			case SPEED_100:
++				/* maybe add some timeout factor ? */
++				break;
++			}
++
++			rtnetif_carrier_on(netdev);
++
++			/* link state has changed, schedule phy info update */
++			if (!test_bit(__IGB_DOWN, &adapter->state))
++				mod_timer(&adapter->phy_info_timer,
++					  round_jiffies(jiffies + 2 * HZ));
++		}
++	} else {
++		if (rtnetif_carrier_ok(netdev)) {
++			adapter->link_speed = 0;
++			adapter->link_duplex = 0;
++
++			/* check for thermal sensor event */
++			if (igb_thermal_sensor_event(hw,
++			    E1000_THSTAT_PWR_DOWN)) {
++				rtdev_err(netdev, "The network adapter was stopped because it overheated\n");
++			}
++
++			/* Links status message must follow this format */
++			rtdev_info(netdev, "igb: %s NIC Link is Down\n",
++			       netdev->name);
++			rtnetif_carrier_off(netdev);
++
++			/* link state has changed, schedule phy info update */
++			if (!test_bit(__IGB_DOWN, &adapter->state))
++				mod_timer(&adapter->phy_info_timer,
++					  round_jiffies(jiffies + 2 * HZ));
++
++			/* link is down, time to check for alternate media */
++			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
++				igb_check_swap_media(adapter);
++				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
++					schedule_work(&adapter->reset_task);
++					/* return immediately */
++					return;
++				}
++			}
++			pm_schedule_suspend(adapter->pdev->dev.parent,
++					    MSEC_PER_SEC * 5);
++
++		/* also check for alternate media here */
++		} else if (!rtnetif_carrier_ok(netdev) &&
++			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
++			igb_check_swap_media(adapter);
++			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
++				schedule_work(&adapter->reset_task);
++				/* return immediately */
++				return;
++			}
++		}
++	}
++
++	spin_lock(&adapter->stats64_lock);
++	igb_update_stats(adapter);
++	spin_unlock(&adapter->stats64_lock);
++
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		struct igb_ring *tx_ring = adapter->tx_ring[i];
++		if (!rtnetif_carrier_ok(netdev)) {
++			/* We've lost link, so the controller stops DMA,
++			 * but we've got queued Tx work that's never going
++			 * to get done, so reset controller to flush Tx.
++			 * (Do the reset outside of interrupt context).
++			 */
++			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
++				adapter->tx_timeout_count++;
++				schedule_work(&adapter->reset_task);
++				/* return immediately since reset is imminent */
++				return;
++			}
++		}
++
++		/* Force detection of hung controller every watchdog period */
++		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
++	}
++
++	/* Cause software interrupt to ensure Rx ring is cleaned */
++	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
++		u32 eics = 0;
++
++		for (i = 0; i < adapter->num_q_vectors; i++)
++			eics |= adapter->q_vector[i]->eims_value;
++		wr32(E1000_EICS, eics);
++	} else {
++		wr32(E1000_ICS, E1000_ICS_RXDMT0);
++	}
++
++	/* Check LVMMC register on i350/i354 only */
++	if ((adapter->hw.mac.type == e1000_i350) ||
++	    (adapter->hw.mac.type == e1000_i354))
++		igb_check_lvmmc(adapter);
++
++	/* Reset the timer */
++	if (!test_bit(__IGB_DOWN, &adapter->state)) {
++		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
++			mod_timer(&adapter->watchdog_timer,
++				  round_jiffies(jiffies +  HZ));
++		else
++			mod_timer(&adapter->watchdog_timer,
++				  round_jiffies(jiffies + 2 * HZ));
++	}
++}
++
++enum latency_range {
++	lowest_latency = 0,
++	low_latency = 1,
++	bulk_latency = 2,
++	latency_invalid = 255
++};
++
++/**
++ *  igb_update_ring_itr - update the dynamic ITR value based on packet size
++ *  @q_vector: pointer to q_vector
++ *
++ *  Stores a new ITR value based on strictly on packet size.  This
++ *  algorithm is less sophisticated than that used in igb_update_itr,
++ *  due to the difficulty of synchronizing statistics across multiple
++ *  receive rings.  The divisors and thresholds used by this function
++ *  were determined based on theoretical maximum wire speed and testing
++ *  data, in order to minimize response time while increasing bulk
++ *  throughput.
++ *  This functionality is controlled by ethtool's coalescing settings.
++ *  NOTE:  This function is called only when operating in a multiqueue
++ *         receive environment.
++ **/
++static void igb_update_ring_itr(struct igb_q_vector *q_vector)
++{
++	int new_val = q_vector->itr_val;
++	int avg_wire_size = 0;
++	struct igb_adapter *adapter = q_vector->adapter;
++	unsigned int packets;
++
++	if (!InterruptThrottle)
++		return;
++
++	/* For non-gigabit speeds, just fix the interrupt rate at 4000
++	 * ints/sec - ITR timer value of 120 ticks.
++	 */
++	if (adapter->link_speed != SPEED_1000) {
++		new_val = IGB_4K_ITR;
++		goto set_itr_val;
++	}
++
++	packets = q_vector->rx.total_packets;
++	if (packets)
++		avg_wire_size = q_vector->rx.total_bytes / packets;
++
++	packets = q_vector->tx.total_packets;
++	if (packets)
++		avg_wire_size = max_t(u32, avg_wire_size,
++				      q_vector->tx.total_bytes / packets);
++
++	/* if avg_wire_size isn't set no work was done */
++	if (!avg_wire_size)
++		goto clear_counts;
++
++	/* Add 24 bytes to size to account for CRC, preamble, and gap */
++	avg_wire_size += 24;
++
++	/* Don't starve jumbo frames */
++	avg_wire_size = min(avg_wire_size, 3000);
++
++	/* Give a little boost to mid-size frames */
++	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
++		new_val = avg_wire_size / 3;
++	else
++		new_val = avg_wire_size / 2;
++
++	/* conservative mode (itr 3) eliminates the lowest_latency setting */
++	if (new_val < IGB_20K_ITR &&
++	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
++	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
++		new_val = IGB_20K_ITR;
++
++set_itr_val:
++	if (new_val != q_vector->itr_val) {
++		q_vector->itr_val = new_val;
++		q_vector->set_itr = 1;
++	}
++clear_counts:
++	q_vector->rx.total_bytes = 0;
++	q_vector->rx.total_packets = 0;
++	q_vector->tx.total_bytes = 0;
++	q_vector->tx.total_packets = 0;
++}
++
++/**
++ *  igb_update_itr - update the dynamic ITR value based on statistics
++ *  @q_vector: pointer to q_vector
++ *  @ring_container: ring info to update the itr for
++ *
++ *  Stores a new ITR value based on packets and byte
++ *  counts during the last interrupt.  The advantage of per interrupt
++ *  computation is faster updates and more accurate ITR for the current
++ *  traffic pattern.  Constants in this function were computed
++ *  based on theoretical maximum wire speed and thresholds were set based
++ *  on testing data as well as attempting to minimize response time
++ *  while increasing bulk throughput.
++ *  This functionality is controlled by ethtool's coalescing settings.
++ *  NOTE:  These calculations are only valid when operating in a single-
++ *         queue environment.
++ **/
++static void igb_update_itr(struct igb_q_vector *q_vector,
++			   struct igb_ring_container *ring_container)
++{
++	unsigned int packets = ring_container->total_packets;
++	unsigned int bytes = ring_container->total_bytes;
++	u8 itrval = ring_container->itr;
++
++	/* no packets, exit with status unchanged */
++	if (packets == 0)
++		return;
++
++	switch (itrval) {
++	case lowest_latency:
++		/* handle TSO and jumbo frames */
++		if (bytes/packets > 8000)
++			itrval = bulk_latency;
++		else if ((packets < 5) && (bytes > 512))
++			itrval = low_latency;
++		break;
++	case low_latency:  /* 50 usec aka 20000 ints/s */
++		if (bytes > 10000) {
++			/* this if handles the TSO accounting */
++			if (bytes/packets > 8000)
++				itrval = bulk_latency;
++			else if ((packets < 10) || ((bytes/packets) > 1200))
++				itrval = bulk_latency;
++			else if ((packets > 35))
++				itrval = lowest_latency;
++		} else if (bytes/packets > 2000) {
++			itrval = bulk_latency;
++		} else if (packets <= 2 && bytes < 512) {
++			itrval = lowest_latency;
++		}
++		break;
++	case bulk_latency: /* 250 usec aka 4000 ints/s */
++		if (bytes > 25000) {
++			if (packets > 35)
++				itrval = low_latency;
++		} else if (bytes < 1500) {
++			itrval = low_latency;
++		}
++		break;
++	}
++
++	/* clear work counters since we have the values we need */
++	ring_container->total_bytes = 0;
++	ring_container->total_packets = 0;
++
++	/* write updated itr to ring container */
++	ring_container->itr = itrval;
++}
++
++static void igb_set_itr(struct igb_q_vector *q_vector)
++{
++	struct igb_adapter *adapter = q_vector->adapter;
++	u32 new_itr = q_vector->itr_val;
++	u8 current_itr = 0;
++
++	if (!InterruptThrottle)
++		return;
++
++	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
++	if (adapter->link_speed != SPEED_1000) {
++		current_itr = 0;
++		new_itr = IGB_4K_ITR;
++		goto set_itr_now;
++	}
++
++	igb_update_itr(q_vector, &q_vector->tx);
++	igb_update_itr(q_vector, &q_vector->rx);
++
++	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
++
++	/* conservative mode (itr 3) eliminates the lowest_latency setting */
++	if (current_itr == lowest_latency &&
++	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
++	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
++		current_itr = low_latency;
++
++	switch (current_itr) {
++	/* counts and packets in update_itr are dependent on these numbers */
++	case lowest_latency:
++		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
++		break;
++	case low_latency:
++		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
++		break;
++	case bulk_latency:
++		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
++		break;
++	default:
++		break;
++	}
++
++set_itr_now:
++	if (new_itr != q_vector->itr_val) {
++		/* this attempts to bias the interrupt rate towards Bulk
++		 * by adding intermediate steps when interrupt rate is
++		 * increasing
++		 */
++		new_itr = new_itr > q_vector->itr_val ?
++			  max((new_itr * q_vector->itr_val) /
++			  (new_itr + (q_vector->itr_val >> 2)),
++			  new_itr) : new_itr;
++		/* Don't write the value here; it resets the adapter's
++		 * internal timer, and causes us to delay far longer than
++		 * we should between interrupts.  Instead, we write the ITR
++		 * value at the beginning of the next interrupt so the timing
++		 * ends up being correct.
++		 */
++		q_vector->itr_val = new_itr;
++		q_vector->set_itr = 1;
++	}
++}
++
++
++#define IGB_SET_FLAG(_input, _flag, _result) \
++	((_flag <= _result) ? \
++	 ((u32)(_input & _flag) * (_result / _flag)) : \
++	 ((u32)(_input & _flag) / (_flag / _result)))
++
++static u32 igb_tx_cmd_type(struct rtskb *skb, u32 tx_flags)
++{
++	/* set type for advanced descriptor with frame checksum insertion */
++	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
++		       E1000_ADVTXD_DCMD_DEXT |
++		       E1000_ADVTXD_DCMD_IFCS;
++
++	return cmd_type;
++}
++
++static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
++				 union e1000_adv_tx_desc *tx_desc,
++				 u32 tx_flags, unsigned int paylen)
++{
++	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
++
++	/* 82575 requires a unique index per ring */
++	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
++		olinfo_status |= tx_ring->reg_idx << 4;
++
++	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
++}
++
++static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
++{
++	struct rtnet_device *netdev = tx_ring->netdev;
++
++	rtnetif_stop_queue(netdev);
++
++	/* Herbert's original patch had:
++	 *  smp_mb__after_netif_stop_queue();
++	 * but since that doesn't exist yet, just open code it.
++	 */
++	smp_mb();
++
++	/* We need to check again in a case another CPU has just
++	 * made room available.
++	 */
++	if (igb_desc_unused(tx_ring) < size)
++		return -EBUSY;
++
++	/* A reprieve! */
++	rtnetif_wake_queue(netdev);
++
++	tx_ring->tx_stats.restart_queue2++;
++
++	return 0;
++}
++
++static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
++{
++	if (igb_desc_unused(tx_ring) >= size)
++		return 0;
++	return __igb_maybe_stop_tx(tx_ring, size);
++}
++
++static void igb_tx_map(struct igb_ring *tx_ring,
++		       struct igb_tx_buffer *first,
++		       const u8 hdr_len)
++{
++	struct rtskb *skb = first->skb;
++	struct igb_tx_buffer *tx_buffer;
++	union e1000_adv_tx_desc *tx_desc;
++	dma_addr_t dma;
++	unsigned int size;
++	u32 tx_flags = first->tx_flags;
++	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
++	u16 i = tx_ring->next_to_use;
++
++	/* first descriptor is also last, set RS and EOP bits */
++	cmd_type |= IGB_TXD_DCMD;
++	tx_desc = IGB_TX_DESC(tx_ring, i);
++
++	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
++
++	size = skb->len;
++
++	dma = rtskb_data_dma_addr(skb, 0);
++
++	tx_buffer = first;
++
++	tx_desc->read.buffer_addr = cpu_to_le64(dma);
++	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
++
++	/* set the timestamp */
++	first->time_stamp = jiffies;
++	first->next_to_watch = tx_desc;
++
++	i++;
++	tx_desc++;
++	if (i == tx_ring->count) {
++		tx_desc = IGB_TX_DESC(tx_ring, 0);
++		i = 0;
++	}
++
++	/* Force memory writes to complete before letting h/w know there
++	 * are new descriptors to fetch.  (Only applicable for weak-ordered
++	 * memory model archs, such as IA-64).
++	 *
++	 * We also need this memory barrier to make certain all of the
++	 * status bits have been updated before next_to_watch is written.
++	 */
++	wmb();
++
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++	/* set next_to_watch value indicating a packet is present */
++	tx_ring->next_to_use = i;
++
++	/* Make sure there is space in the ring for the next send. */
++	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
++
++	writel(i, tx_ring->tail);
++
++	/* we need this if more than one processor can write to our tail
++	 * at a time, it synchronizes IO on IA64/Altix systems
++	 */
++	mmiowb();
++
++	return;
++}
++
++netdev_tx_t igb_xmit_frame_ring(struct rtskb *skb,
++				struct igb_ring *tx_ring)
++{
++	struct igb_tx_buffer *first;
++	u32 tx_flags = 0;
++	u16 count = 2;
++	u8 hdr_len = 0;
++
++	/* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
++	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
++	 *       + 2 desc gap to keep tail from touching head,
++	 *       + 1 desc for context descriptor,
++	 * otherwise try next time
++	 */
++	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
++		/* this is a hard error */
++		return NETDEV_TX_BUSY;
++	}
++
++	if (skb->protocol == htons(ETH_P_IP))
++		tx_flags |= IGB_TX_FLAGS_IPV4;
++
++	/* record the location of the first descriptor for this packet */
++	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
++	first->skb = skb;
++	first->bytecount = skb->len;
++	first->gso_segs = 1;
++
++	/* record initial flags and protocol */
++	first->tx_flags = tx_flags;
++	first->protocol = skb->protocol;
++
++	igb_tx_map(tx_ring, first, hdr_len);
++
++	return NETDEV_TX_OK;
++}
++
++static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
++						    struct rtskb *skb)
++{
++	return adapter->tx_ring[0];
++}
++
++static netdev_tx_t igb_xmit_frame(struct rtskb *skb,
++				  struct rtnet_device *netdev)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++
++	if (test_bit(__IGB_DOWN, &adapter->state)) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	if (skb->len <= 0) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
++	 * in order to meet this minimum size requirement.
++	 */
++	if (skb->len < 17) {
++		skb = rtskb_padto(skb, 17);
++		if (!skb)
++			return NETDEV_TX_OK;
++	}
++
++	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
++}
++
++static void igb_reset_task(struct work_struct *work)
++{
++	struct igb_adapter *adapter;
++	adapter = container_of(work, struct igb_adapter, reset_task);
++
++	igb_dump(adapter);
++	rtdev_err(adapter->netdev, "Reset adapter\n");
++	igb_reinit_locked(adapter);
++}
++
++/**
++ * igb_get_stats - Get System Network Statistics
++ * @netdev: network interface device structure
++ *
++ * Returns the address of the device statistics structure.
++ * The statistics are actually updated from the timer callback.
++ **/
++static struct net_device_stats *
++igb_get_stats(struct rtnet_device *netdev)
++{
++	struct igb_adapter *adapter = netdev->priv;
++
++	/* only return the current stats */
++	return &adapter->net_stats;
++}
++
++/**
++ *  igb_update_stats - Update the board statistics counters
++ *  @adapter: board private structure
++ **/
++void igb_update_stats(struct igb_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct pci_dev *pdev = adapter->pdev;
++	struct net_device_stats *net_stats;
++	u32 reg, mpc;
++	int i;
++	u64 bytes, packets;
++
++	/* Prevent stats update while adapter is being reset, or if the pci
++	 * connection is down.
++	 */
++	if (adapter->link_speed == 0)
++		return;
++	if (pci_channel_offline(pdev))
++		return;
++
++	net_stats = &adapter->net_stats;
++	bytes = 0;
++	packets = 0;
++
++	rcu_read_lock();
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		struct igb_ring *ring = adapter->rx_ring[i];
++		u32 rqdpc = rd32(E1000_RQDPC(i));
++		if (hw->mac.type >= e1000_i210)
++			wr32(E1000_RQDPC(i), 0);
++
++		if (rqdpc) {
++			ring->rx_stats.drops += rqdpc;
++			net_stats->rx_fifo_errors += rqdpc;
++		}
++
++		bytes += ring->rx_stats.bytes;
++		packets += ring->rx_stats.packets;
++	}
++
++	net_stats->rx_bytes = bytes;
++	net_stats->rx_packets = packets;
++
++	bytes = 0;
++	packets = 0;
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		struct igb_ring *ring = adapter->tx_ring[i];
++		bytes += ring->tx_stats.bytes;
++		packets += ring->tx_stats.packets;
++	}
++	net_stats->tx_bytes = bytes;
++	net_stats->tx_packets = packets;
++	rcu_read_unlock();
++
++	/* read stats registers */
++	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
++	adapter->stats.gprc += rd32(E1000_GPRC);
++	adapter->stats.gorc += rd32(E1000_GORCL);
++	rd32(E1000_GORCH); /* clear GORCL */
++	adapter->stats.bprc += rd32(E1000_BPRC);
++	adapter->stats.mprc += rd32(E1000_MPRC);
++	adapter->stats.roc += rd32(E1000_ROC);
++
++	adapter->stats.prc64 += rd32(E1000_PRC64);
++	adapter->stats.prc127 += rd32(E1000_PRC127);
++	adapter->stats.prc255 += rd32(E1000_PRC255);
++	adapter->stats.prc511 += rd32(E1000_PRC511);
++	adapter->stats.prc1023 += rd32(E1000_PRC1023);
++	adapter->stats.prc1522 += rd32(E1000_PRC1522);
++	adapter->stats.symerrs += rd32(E1000_SYMERRS);
++	adapter->stats.sec += rd32(E1000_SEC);
++
++	mpc = rd32(E1000_MPC);
++	adapter->stats.mpc += mpc;
++	net_stats->rx_fifo_errors += mpc;
++	adapter->stats.scc += rd32(E1000_SCC);
++	adapter->stats.ecol += rd32(E1000_ECOL);
++	adapter->stats.mcc += rd32(E1000_MCC);
++	adapter->stats.latecol += rd32(E1000_LATECOL);
++	adapter->stats.dc += rd32(E1000_DC);
++	adapter->stats.rlec += rd32(E1000_RLEC);
++	adapter->stats.xonrxc += rd32(E1000_XONRXC);
++	adapter->stats.xontxc += rd32(E1000_XONTXC);
++	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
++	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
++	adapter->stats.fcruc += rd32(E1000_FCRUC);
++	adapter->stats.gptc += rd32(E1000_GPTC);
++	adapter->stats.gotc += rd32(E1000_GOTCL);
++	rd32(E1000_GOTCH); /* clear GOTCL */
++	adapter->stats.rnbc += rd32(E1000_RNBC);
++	adapter->stats.ruc += rd32(E1000_RUC);
++	adapter->stats.rfc += rd32(E1000_RFC);
++	adapter->stats.rjc += rd32(E1000_RJC);
++	adapter->stats.tor += rd32(E1000_TORH);
++	adapter->stats.tot += rd32(E1000_TOTH);
++	adapter->stats.tpr += rd32(E1000_TPR);
++
++	adapter->stats.ptc64 += rd32(E1000_PTC64);
++	adapter->stats.ptc127 += rd32(E1000_PTC127);
++	adapter->stats.ptc255 += rd32(E1000_PTC255);
++	adapter->stats.ptc511 += rd32(E1000_PTC511);
++	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
++	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
++
++	adapter->stats.mptc += rd32(E1000_MPTC);
++	adapter->stats.bptc += rd32(E1000_BPTC);
++
++	adapter->stats.tpt += rd32(E1000_TPT);
++	adapter->stats.colc += rd32(E1000_COLC);
++
++	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
++	/* read internal phy specific stats */
++	reg = rd32(E1000_CTRL_EXT);
++	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
++		adapter->stats.rxerrc += rd32(E1000_RXERRC);
++
++		/* this stat has invalid values on i210/i211 */
++		if ((hw->mac.type != e1000_i210) &&
++		    (hw->mac.type != e1000_i211))
++			adapter->stats.tncrs += rd32(E1000_TNCRS);
++	}
++
++	adapter->stats.tsctc += rd32(E1000_TSCTC);
++	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
++
++	adapter->stats.iac += rd32(E1000_IAC);
++	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
++	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
++	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
++	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
++	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
++	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
++	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
++	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
++
++	/* Fill out the OS statistics structure */
++	net_stats->multicast = adapter->stats.mprc;
++	net_stats->collisions = adapter->stats.colc;
++
++	/* Rx Errors */
++
++	/* RLEC on some newer hardware can be incorrect so build
++	 * our own version based on RUC and ROC
++	 */
++	net_stats->rx_errors = adapter->stats.rxerrc +
++		adapter->stats.crcerrs + adapter->stats.algnerrc +
++		adapter->stats.ruc + adapter->stats.roc +
++		adapter->stats.cexterr;
++	net_stats->rx_length_errors = adapter->stats.ruc +
++				      adapter->stats.roc;
++	net_stats->rx_crc_errors = adapter->stats.crcerrs;
++	net_stats->rx_frame_errors = adapter->stats.algnerrc;
++	net_stats->rx_missed_errors = adapter->stats.mpc;
++
++	/* Tx Errors */
++	net_stats->tx_errors = adapter->stats.ecol +
++			       adapter->stats.latecol;
++	net_stats->tx_aborted_errors = adapter->stats.ecol;
++	net_stats->tx_window_errors = adapter->stats.latecol;
++	net_stats->tx_carrier_errors = adapter->stats.tncrs;
++
++	/* Tx Dropped needs to be maintained elsewhere */
++
++	/* Management Stats */
++	adapter->stats.mgptc += rd32(E1000_MGTPTC);
++	adapter->stats.mgprc += rd32(E1000_MGTPRC);
++	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
++
++	/* OS2BMC Stats */
++	reg = rd32(E1000_MANC);
++	if (reg & E1000_MANC_EN_BMC2OS) {
++		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
++		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
++		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
++		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
++	}
++}
++
++static void igb_nrtsig_watchdog(rtdm_nrtsig_t *sig, void *data)
++{
++	struct igb_adapter *adapter = data;
++	mod_timer(&adapter->watchdog_timer, jiffies + 1);
++}
++
++static void igb_other_handler(struct igb_adapter *adapter, u32 icr, bool root)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (icr & E1000_ICR_DRSTA)
++		rtdm_schedule_nrt_work(&adapter->reset_task);
++
++	if (icr & E1000_ICR_DOUTSYNC) {
++		/* HW is reporting DMA is out of sync */
++		adapter->stats.doosync++;
++		/* The DMA Out of Sync is also indication of a spoof event
++		 * in IOV mode. Check the Wrong VM Behavior register to
++		 * see if it is really a spoof event.
++		 */
++		igb_check_wvbr(adapter);
++	}
++
++	if (icr & E1000_ICR_LSC) {
++		hw->mac.get_link_status = 1;
++		/* guard against interrupt when we're going down */
++		if (!test_bit(__IGB_DOWN, &adapter->state)) {
++			if (root)
++				mod_timer(&adapter->watchdog_timer,
++					jiffies + 1);
++			else
++				rtdm_nrtsig_pend(&adapter->watchdog_nrtsig);
++		}
++	}
++}
++
++static irqreturn_t igb_msix_other(int irq, void *data)
++{
++	struct igb_adapter *adapter = data;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 icr = rd32(E1000_ICR);
++	/* reading ICR causes bit 31 of EICR to be cleared */
++
++	igb_other_handler(adapter, icr, true);
++
++	wr32(E1000_EIMS, adapter->eims_other);
++
++	return IRQ_HANDLED;
++}
++
++static void igb_write_itr(struct igb_q_vector *q_vector)
++{
++	struct igb_adapter *adapter = q_vector->adapter;
++	u32 itr_val = (q_vector->itr_val + 0x3) & 0x7FFC;
++
++	if (!q_vector->set_itr)
++		return;
++
++	if (!itr_val)
++		itr_val = 0x4;
++
++	if (adapter->hw.mac.type == e1000_82575)
++		itr_val |= itr_val << 16;
++	else
++		itr_val |= E1000_EITR_CNT_IGNR;
++
++	writel(itr_val, q_vector->itr_register);
++	q_vector->set_itr = 0;
++}
++
++static int igb_msix_ring(rtdm_irq_t *ih)
++{
++	struct igb_q_vector *q_vector =
++		rtdm_irq_get_arg(ih, struct igb_q_vector);
++
++	/* Write the ITR value calculated from the previous interrupt. */
++	igb_write_itr(q_vector);
++
++	igb_poll(q_vector);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++
++/**
++ *  igb_intr_msi - Interrupt Handler
++ *  @irq: interrupt number
++ *  @data: pointer to a network interface device structure
++ **/
++static int igb_intr_msi(rtdm_irq_t *ih)
++{
++	struct igb_adapter *adapter =
++		rtdm_irq_get_arg(ih, struct igb_adapter);
++	struct igb_q_vector *q_vector = adapter->q_vector[0];
++	struct e1000_hw *hw = &adapter->hw;
++	u32 icr = rd32(E1000_ICR);
++
++	igb_write_itr(q_vector);
++
++	igb_other_handler(adapter, icr, false);
++
++	igb_poll(q_vector);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++/**
++ *  igb_intr - Legacy Interrupt Handler
++ *  @irq: interrupt number
++ *  @data: pointer to a network interface device structure
++ **/
++static int igb_intr(rtdm_irq_t *ih)
++{
++	struct igb_adapter *adapter =
++		rtdm_irq_get_arg(ih, struct igb_adapter);
++	struct igb_q_vector *q_vector = adapter->q_vector[0];
++	struct e1000_hw *hw = &adapter->hw;
++	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
++	 * need for the IMC write
++	 */
++	u32 icr = rd32(E1000_ICR);
++
++	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
++	 * not set, then the adapter didn't send an interrupt
++	 */
++	if (!(icr & E1000_ICR_INT_ASSERTED))
++		return IRQ_NONE;
++
++	igb_write_itr(q_vector);
++
++	igb_other_handler(adapter, icr, false);
++
++	igb_poll(q_vector);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
++{
++	struct igb_adapter *adapter = q_vector->adapter;
++	struct e1000_hw *hw = &adapter->hw;
++
++	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
++	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
++		if (adapter->num_q_vectors == 1)
++			igb_set_itr(q_vector);
++		else
++			igb_update_ring_itr(q_vector);
++	}
++
++	if (!test_bit(__IGB_DOWN, &adapter->state)) {
++		if (adapter->flags & IGB_FLAG_HAS_MSIX)
++			wr32(E1000_EIMS, q_vector->eims_value);
++		else
++			igb_irq_enable(adapter);
++	}
++}
++
++/**
++ *  igb_poll - NAPI Rx polling callback
++ *  @napi: napi polling structure
++ *  @budget: count of how many packets we should handle
++ **/
++static void igb_poll(struct igb_q_vector *q_vector)
++{
++	if (q_vector->tx.ring)
++		igb_clean_tx_irq(q_vector);
++
++	if (q_vector->rx.ring)
++		igb_clean_rx_irq(q_vector, 64);
++
++	igb_ring_irq_enable(q_vector);
++}
++
++/**
++ *  igb_clean_tx_irq - Reclaim resources after transmit completes
++ *  @q_vector: pointer to q_vector containing needed info
++ *
++ *  returns true if ring is completely cleaned
++ **/
++static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
++{
++	struct igb_adapter *adapter = q_vector->adapter;
++	struct igb_ring *tx_ring = q_vector->tx.ring;
++	struct igb_tx_buffer *tx_buffer;
++	union e1000_adv_tx_desc *tx_desc;
++	unsigned int total_bytes = 0, total_packets = 0;
++	unsigned int budget = q_vector->tx.work_limit;
++	unsigned int i = tx_ring->next_to_clean;
++
++	if (test_bit(__IGB_DOWN, &adapter->state))
++		return true;
++
++	tx_buffer = &tx_ring->tx_buffer_info[i];
++	tx_desc = IGB_TX_DESC(tx_ring, i);
++	i -= tx_ring->count;
++
++	do {
++		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
++
++		/* if next_to_watch is not set then there is no work pending */
++		if (!eop_desc)
++			break;
++
++		/* prevent any other reads prior to eop_desc */
++		read_barrier_depends();
++
++		/* if DD is not set pending work has not been completed */
++		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
++			break;
++
++		/* clear next_to_watch to prevent false hangs */
++		tx_buffer->next_to_watch = NULL;
++
++		/* update the statistics for this packet */
++		total_bytes += tx_buffer->bytecount;
++		total_packets += tx_buffer->gso_segs;
++
++		/* free the skb */
++		kfree_rtskb(tx_buffer->skb);
++
++		/* clear tx_buffer data */
++		tx_buffer->skb = NULL;
++
++		/* clear last DMA location and unmap remaining buffers */
++		while (tx_desc != eop_desc) {
++			tx_buffer++;
++			tx_desc++;
++			i++;
++			if (unlikely(!i)) {
++				i -= tx_ring->count;
++				tx_buffer = tx_ring->tx_buffer_info;
++				tx_desc = IGB_TX_DESC(tx_ring, 0);
++			}
++		}
++
++		/* move us one more past the eop_desc for start of next pkt */
++		tx_buffer++;
++		tx_desc++;
++		i++;
++		if (unlikely(!i)) {
++			i -= tx_ring->count;
++			tx_buffer = tx_ring->tx_buffer_info;
++			tx_desc = IGB_TX_DESC(tx_ring, 0);
++		}
++
++		/* issue prefetch for next Tx descriptor */
++		prefetch(tx_desc);
++
++		/* update budget accounting */
++		budget--;
++	} while (likely(budget));
++
++	i += tx_ring->count;
++	tx_ring->next_to_clean = i;
++	tx_ring->tx_stats.bytes += total_bytes;
++	tx_ring->tx_stats.packets += total_packets;
++	q_vector->tx.total_bytes += total_bytes;
++	q_vector->tx.total_packets += total_packets;
++
++	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
++		struct e1000_hw *hw = &adapter->hw;
++
++		/* Detect a transmit hang in hardware, this serializes the
++		 * check with the clearing of time_stamp and movement of i
++		 */
++		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
++		if (tx_buffer->next_to_watch &&
++		    time_after(jiffies, tx_buffer->time_stamp +
++			       (adapter->tx_timeout_factor * HZ)) &&
++		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
++
++			/* detected Tx unit hang */
++			dev_err(tx_ring->dev,
++				"Detected Tx Unit Hang\n"
++				"  Tx Queue             <%d>\n"
++				"  TDH                  <%x>\n"
++				"  TDT                  <%x>\n"
++				"  next_to_use          <%x>\n"
++				"  next_to_clean        <%x>\n"
++				"buffer_info[next_to_clean]\n"
++				"  time_stamp           <%lx>\n"
++				"  next_to_watch        <%p>\n"
++				"  jiffies              <%lx>\n"
++				"  desc.status          <%x>\n",
++				tx_ring->queue_index,
++				rd32(E1000_TDH(tx_ring->reg_idx)),
++				readl(tx_ring->tail),
++				tx_ring->next_to_use,
++				tx_ring->next_to_clean,
++				tx_buffer->time_stamp,
++				tx_buffer->next_to_watch,
++				jiffies,
++				tx_buffer->next_to_watch->wb.status);
++			rtnetif_stop_queue(tx_ring->netdev);
++
++			/* we are about to reset, no point in enabling stuff */
++			return true;
++		}
++	}
++
++#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
++	if (unlikely(total_packets &&
++	    rtnetif_carrier_ok(tx_ring->netdev) &&
++	    igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
++		/* Make sure that anybody stopping the queue after this
++		 * sees the new next_to_clean.
++		 */
++		smp_mb();
++		if (rtnetif_queue_stopped(tx_ring->netdev) &&
++		    !(test_bit(__IGB_DOWN, &adapter->state))) {
++			rtnetif_wake_queue(tx_ring->netdev);
++
++			tx_ring->tx_stats.restart_queue++;
++		}
++	}
++
++	return !!budget;
++}
++
++static struct rtskb *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
++					   union e1000_adv_rx_desc *rx_desc)
++{
++	struct igb_rx_buffer *rx_buffer;
++	struct rtskb *skb;
++
++	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
++	skb = rx_buffer->skb;
++	prefetchw(skb->data);
++
++	/* pull the header of the skb in */
++	rtskb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
++	rx_buffer->skb = NULL;
++	rx_buffer->dma = 0;
++
++	return skb;
++}
++
++static inline void igb_rx_checksum(struct igb_ring *ring,
++				   union e1000_adv_rx_desc *rx_desc,
++				   struct rtskb *skb)
++{
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* Ignore Checksum bit is set */
++	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
++		return;
++
++	/* Rx checksum disabled via ethtool */
++	if (!(ring->netdev->features & NETIF_F_RXCSUM))
++		return;
++
++	/* TCP/UDP checksum error bit is set */
++	if (igb_test_staterr(rx_desc,
++			     E1000_RXDEXT_STATERR_TCPE |
++			     E1000_RXDEXT_STATERR_IPE)) {
++		/* work around errata with sctp packets where the TCPE aka
++		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
++		 * packets, (aka let the stack check the crc32c)
++		 */
++		if (!((skb->len == 60) &&
++		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
++			ring->rx_stats.csum_err++;
++		}
++		/* let the stack verify checksum errors */
++		return;
++	}
++	/* It must be a TCP or UDP packet with a valid checksum */
++	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
++				      E1000_RXD_STAT_UDPCS))
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++	dev_dbg(ring->dev, "cksum success: bits %08X\n",
++		le32_to_cpu(rx_desc->wb.upper.status_error));
++}
++
++/**
++ *  igb_is_non_eop - process handling of non-EOP buffers
++ *  @rx_ring: Rx ring being processed
++ *  @rx_desc: Rx descriptor for current buffer
++ *  @skb: current socket buffer containing buffer in progress
++ *
++ *  This function updates next to clean.  If the buffer is an EOP buffer
++ *  this function exits returning false, otherwise it will place the
++ *  sk_buff in the next buffer to be chained and return true indicating
++ *  that this is in fact a non-EOP buffer.
++ **/
++static bool igb_is_non_eop(struct igb_ring *rx_ring,
++			   union e1000_adv_rx_desc *rx_desc)
++{
++	u32 ntc = rx_ring->next_to_clean + 1;
++
++	/* fetch, update, and store next to clean */
++	ntc = (ntc < rx_ring->count) ? ntc : 0;
++	rx_ring->next_to_clean = ntc;
++
++	prefetch(IGB_RX_DESC(rx_ring, ntc));
++
++	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
++		return false;
++
++	return true;
++}
++
++/**
++ *  igb_cleanup_headers - Correct corrupted or empty headers
++ *  @rx_ring: rx descriptor ring packet is being transacted on
++ *  @rx_desc: pointer to the EOP Rx descriptor
++ *  @skb: pointer to current skb being fixed
++ *
++ *  Address the case where we are pulling data in on pages only
++ *  and as such no data is present in the skb header.
++ *
++ *  In addition if skb is not at least 60 bytes we need to pad it so that
++ *  it is large enough to qualify as a valid Ethernet frame.
++ *
++ *  Returns true if an error was encountered and skb was freed.
++ **/
++static bool igb_cleanup_headers(struct igb_ring *rx_ring,
++				union e1000_adv_rx_desc *rx_desc,
++				struct rtskb *skb)
++{
++	if (unlikely((igb_test_staterr(rx_desc,
++				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
++		struct rtnet_device *netdev = rx_ring->netdev;
++		if (!(netdev->features & NETIF_F_RXALL)) {
++			kfree_rtskb(skb);
++			return true;
++		}
++	}
++
++	return false;
++}
++
++/**
++ *  igb_process_skb_fields - Populate skb header fields from Rx descriptor
++ *  @rx_ring: rx descriptor ring packet is being transacted on
++ *  @rx_desc: pointer to the EOP Rx descriptor
++ *  @skb: pointer to current skb being populated
++ *
++ *  This function checks the ring, descriptor, and packet information in
++ *  order to populate the hash, checksum, VLAN, timestamp, protocol, and
++ *  other fields within the skb.
++ **/
++static void igb_process_skb_fields(struct igb_ring *rx_ring,
++				   union e1000_adv_rx_desc *rx_desc,
++				   struct rtskb *skb)
++{
++	igb_rx_checksum(rx_ring, rx_desc, skb);
++
++	skb->protocol = rt_eth_type_trans(skb, rx_ring->netdev);
++}
++
++static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
++{
++	struct igb_ring *rx_ring = q_vector->rx.ring;
++	unsigned int total_bytes = 0, total_packets = 0;
++	u16 cleaned_count = igb_desc_unused(rx_ring);
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtskb *skb;
++
++	while (likely(total_packets < budget)) {
++		union e1000_adv_rx_desc *rx_desc;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
++			igb_alloc_rx_buffers(rx_ring, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
++
++		if (!rx_desc->wb.upper.status_error)
++			break;
++
++		/* This memory barrier is needed to keep us from reading
++		 * any other fields out of the rx_desc until we know the
++		 * descriptor has been written back
++		 */
++		rmb();
++
++		/* retrieve a buffer from the ring */
++		skb = igb_fetch_rx_buffer(rx_ring, rx_desc);
++		skb->time_stamp = time_stamp;
++
++		cleaned_count++;
++
++		/* fetch next buffer in frame if non-eop */
++		if (igb_is_non_eop(rx_ring, rx_desc)) {
++			kfree_rtskb(skb);
++			continue;
++		}
++
++		/* verify the packet layout is correct */
++		if (igb_cleanup_headers(rx_ring, rx_desc, skb))
++			continue;
++
++		/* probably a little skewed due to removing CRC */
++		total_bytes += skb->len;
++
++		/* populate checksum, timestamp, VLAN, and protocol */
++		igb_process_skb_fields(rx_ring, rx_desc, skb);
++
++		rtnetif_rx(skb);
++
++		/* reset skb pointer */
++		skb = NULL;
++
++		/* update budget accounting */
++		total_packets++;
++	}
++
++	rx_ring->rx_stats.packets += total_packets;
++	rx_ring->rx_stats.bytes += total_bytes;
++	q_vector->rx.total_packets += total_packets;
++	q_vector->rx.total_bytes += total_bytes;
++
++	if (cleaned_count)
++		igb_alloc_rx_buffers(rx_ring, cleaned_count);
++
++	if (total_packets)
++		rt_mark_stack_mgr(q_vector->adapter->netdev);
++
++	return total_packets < budget;
++}
++
++static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
++				 struct igb_rx_buffer *bi)
++{
++	struct igb_adapter *adapter = rx_ring->q_vector->adapter;
++	struct rtskb *skb = bi->skb;
++	dma_addr_t dma = bi->dma;
++
++	if (dma)
++		return true;
++
++	if (likely(!skb)) {
++		skb = rtnetdev_alloc_rtskb(adapter->netdev,
++					rx_ring->rx_buffer_len + NET_IP_ALIGN);
++		if (!skb) {
++			rx_ring->rx_stats.alloc_failed++;
++			return false;
++		}
++
++		rtskb_reserve(skb, NET_IP_ALIGN);
++		skb->rtdev = adapter->netdev;
++
++		bi->skb = skb;
++		bi->dma = rtskb_data_dma_addr(skb, 0);
++	}
++
++	return true;
++}
++
++/**
++ *  igb_alloc_rx_buffers - Replace used receive buffers; packet split
++ *  @adapter: address of board private structure
++ **/
++void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
++{
++	union e1000_adv_rx_desc *rx_desc;
++	struct igb_rx_buffer *bi;
++	u16 i = rx_ring->next_to_use;
++
++	/* nothing to do */
++	if (!cleaned_count)
++		return;
++
++	rx_desc = IGB_RX_DESC(rx_ring, i);
++	bi = &rx_ring->rx_buffer_info[i];
++	i -= rx_ring->count;
++
++	do {
++		if (!igb_alloc_mapped_skb(rx_ring, bi))
++			break;
++
++		/* Refresh the desc even if buffer_addrs didn't change
++		 * because each write-back erases this info.
++		 */
++		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
++
++		rx_desc++;
++		bi++;
++		i++;
++		if (unlikely(!i)) {
++			rx_desc = IGB_RX_DESC(rx_ring, 0);
++			bi = rx_ring->rx_buffer_info;
++			i -= rx_ring->count;
++		}
++
++		/* clear the status bits for the next_to_use descriptor */
++		rx_desc->wb.upper.status_error = 0;
++
++		cleaned_count--;
++	} while (cleaned_count);
++
++	i += rx_ring->count;
++
++	if (rx_ring->next_to_use != i) {
++		/* record the next descriptor to use */
++		rx_ring->next_to_use = i;
++
++		/* Force memory writes to complete before letting h/w
++		 * know there are new descriptors to fetch.  (Only
++		 * applicable for weak-ordered memory model archs,
++		 * such as IA-64).
++		 */
++		wmb();
++		writel(i, rx_ring->tail);
++	}
++}
++
++/**
++ * igb_mii_ioctl -
++ * @netdev:
++ * @ifreq:
++ * @cmd:
++ **/
++static int igb_mii_ioctl(struct rtnet_device *netdev, struct ifreq *ifr, int cmd)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct mii_ioctl_data *data = if_mii(ifr);
++
++	if (adapter->hw.phy.media_type != e1000_media_type_copper)
++		return -EOPNOTSUPP;
++
++	switch (cmd) {
++	case SIOCGMIIPHY:
++		data->phy_id = adapter->hw.phy.addr;
++		break;
++	case SIOCGMIIREG:
++		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
++				     &data->val_out))
++			return -EIO;
++		break;
++	case SIOCSMIIREG:
++	default:
++		return -EOPNOTSUPP;
++	}
++	return 0;
++}
++
++/**
++ * igb_ioctl -
++ * @netdev:
++ * @ifreq:
++ * @cmd:
++ **/
++static int igb_ioctl(struct rtnet_device *netdev, struct ifreq *ifr, int cmd)
++{
++	if (rtdm_in_rt_context())
++		return -ENOSYS;
++	
++	switch (cmd) {
++	case SIOCGMIIPHY:
++	case SIOCGMIIREG:
++	case SIOCSMIIREG:
++		return igb_mii_ioctl(netdev, ifr, cmd);
++
++	default:
++		return -EOPNOTSUPP;
++	}
++}
++
++void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct igb_adapter *adapter = hw->back;
++
++	pci_read_config_word(adapter->pdev, reg, value);
++}
++
++void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct igb_adapter *adapter = hw->back;
++
++	pci_write_config_word(adapter->pdev, reg, *value);
++}
++
++s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct igb_adapter *adapter = hw->back;
++
++	if (pcie_capability_read_word(adapter->pdev, reg, value))
++		return -E1000_ERR_CONFIG;
++
++	return 0;
++}
++
++s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
++{
++	struct igb_adapter *adapter = hw->back;
++
++	if (pcie_capability_write_word(adapter->pdev, reg, *value))
++		return -E1000_ERR_CONFIG;
++
++	return 0;
++}
++
++static void igb_vlan_mode(struct rtnet_device *netdev, netdev_features_t features)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl;
++
++	/* disable VLAN tag insert/strip */
++	ctrl = rd32(E1000_CTRL);
++	ctrl &= ~E1000_CTRL_VME;
++	wr32(E1000_CTRL, ctrl);
++
++	igb_rlpml_set(adapter);
++}
++
++static int igb_vlan_rx_add_vid(struct rtnet_device *netdev,
++			       __be16 proto, u16 vid)
++{
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* add the filter since PF can receive vlans w/o entry in vlvf */
++	igb_vfta_set(hw, vid, true);
++
++	set_bit(vid, adapter->active_vlans);
++
++	return 0;
++}
++
++static void igb_restore_vlan(struct igb_adapter *adapter)
++{
++	u16 vid;
++
++	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
++
++	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
++		igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
++}
++
++static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
++			  bool runtime)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl, rctl, status;
++	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
++#ifdef CONFIG_PM
++	int retval = 0;
++#endif
++
++	rtnetif_device_detach(netdev);
++
++	if (rtnetif_running(netdev))
++		__igb_close(netdev, true);
++
++	igb_clear_interrupt_scheme(adapter);
++
++#ifdef CONFIG_PM
++	retval = pci_save_state(pdev);
++	if (retval)
++		return retval;
++#endif
++
++	status = rd32(E1000_STATUS);
++	if (status & E1000_STATUS_LU)
++		wufc &= ~E1000_WUFC_LNKC;
++
++	if (wufc) {
++		igb_setup_rctl(adapter);
++		igb_set_rx_mode(netdev);
++
++		/* turn on all-multi mode if wake on multicast is enabled */
++		if (wufc & E1000_WUFC_MC) {
++			rctl = rd32(E1000_RCTL);
++			rctl |= E1000_RCTL_MPE;
++			wr32(E1000_RCTL, rctl);
++		}
++
++		ctrl = rd32(E1000_CTRL);
++		/* advertise wake from D3Cold */
++		#define E1000_CTRL_ADVD3WUC 0x00100000
++		/* phy power management enable */
++		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
++		ctrl |= E1000_CTRL_ADVD3WUC;
++		wr32(E1000_CTRL, ctrl);
++
++		/* Allow time for pending master requests to run */
++		igb_disable_pcie_master(hw);
++
++		wr32(E1000_WUC, E1000_WUC_PME_EN);
++		wr32(E1000_WUFC, wufc);
++	} else {
++		wr32(E1000_WUC, 0);
++		wr32(E1000_WUFC, 0);
++	}
++
++	*enable_wake = wufc || adapter->en_mng_pt;
++	if (!*enable_wake)
++		igb_power_down_link(adapter);
++	else
++		igb_power_up_link(adapter);
++
++	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant.
++	 */
++	igb_release_hw_control(adapter);
++
++	pci_disable_device(pdev);
++
++	return 0;
++}
++
++#ifdef CONFIG_PM
++#ifdef CONFIG_PM_SLEEP
++static int igb_suspend(struct device *dev)
++{
++	int retval;
++	bool wake;
++	struct pci_dev *pdev = to_pci_dev(dev);
++
++	retval = __igb_shutdown(pdev, &wake, 0);
++	if (retval)
++		return retval;
++
++	if (wake) {
++		pci_prepare_to_sleep(pdev);
++	} else {
++		pci_wake_from_d3(pdev, false);
++		pci_set_power_state(pdev, PCI_D3hot);
++	}
++
++	return 0;
++}
++#endif /* CONFIG_PM_SLEEP */
++
++static int igb_resume(struct device *dev)
++{
++	struct pci_dev *pdev = to_pci_dev(dev);
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	u32 err;
++
++	pci_set_power_state(pdev, PCI_D0);
++	pci_restore_state(pdev);
++	pci_save_state(pdev);
++
++	if (!pci_device_is_present(pdev))
++		return -ENODEV;
++	err = pci_enable_device_mem(pdev);
++	if (err) {
++		dev_err(&pdev->dev,
++			"igb: Cannot enable PCI device from suspend\n");
++		return err;
++	}
++	pci_set_master(pdev);
++
++	pci_enable_wake(pdev, PCI_D3hot, 0);
++	pci_enable_wake(pdev, PCI_D3cold, 0);
++
++	if (igb_init_interrupt_scheme(adapter, true)) {
++		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
++		return -ENOMEM;
++	}
++
++	igb_reset(adapter);
++
++	/* let the f/w know that the h/w is now under the control of the
++	 * driver.
++	 */
++	igb_get_hw_control(adapter);
++
++	wr32(E1000_WUS, ~0);
++
++	if (netdev->flags & IFF_UP) {
++		rtnl_lock();
++		err = __igb_open(netdev, true);
++		rtnl_unlock();
++		if (err)
++			return err;
++	}
++
++	rtnetif_device_attach(netdev);
++	return 0;
++}
++
++static int igb_runtime_idle(struct device *dev)
++{
++	struct pci_dev *pdev = to_pci_dev(dev);
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++
++	if (!igb_has_link(adapter))
++		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
++
++	return -EBUSY;
++}
++
++static int igb_runtime_suspend(struct device *dev)
++{
++	struct pci_dev *pdev = to_pci_dev(dev);
++	int retval;
++	bool wake;
++
++	retval = __igb_shutdown(pdev, &wake, 1);
++	if (retval)
++		return retval;
++
++	if (wake) {
++		pci_prepare_to_sleep(pdev);
++	} else {
++		pci_wake_from_d3(pdev, false);
++		pci_set_power_state(pdev, PCI_D3hot);
++	}
++
++	return 0;
++}
++
++static int igb_runtime_resume(struct device *dev)
++{
++	return igb_resume(dev);
++}
++#endif /* CONFIG_PM */
++
++static void igb_shutdown(struct pci_dev *pdev)
++{
++	bool wake;
++
++	__igb_shutdown(pdev, &wake, 0);
++
++	if (system_state == SYSTEM_POWER_OFF) {
++		pci_wake_from_d3(pdev, wake);
++		pci_set_power_state(pdev, PCI_D3hot);
++	}
++}
++
++static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
++{
++	return 0;
++}
++
++/**
++ *  igb_io_error_detected - called when PCI error is detected
++ *  @pdev: Pointer to PCI device
++ *  @state: The current pci connection state
++ *
++ *  This function is called after a PCI bus error affecting
++ *  this device has been detected.
++ **/
++static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
++					      pci_channel_state_t state)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++
++	rtnetif_device_detach(netdev);
++
++	if (state == pci_channel_io_perm_failure)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	if (rtnetif_running(netdev))
++		igb_down(adapter);
++	pci_disable_device(pdev);
++
++	/* Request a slot slot reset. */
++	return PCI_ERS_RESULT_NEED_RESET;
++}
++
++/**
++ *  igb_io_slot_reset - called after the pci bus has been reset.
++ *  @pdev: Pointer to PCI device
++ *
++ *  Restart the card from scratch, as if from a cold-boot. Implementation
++ *  resembles the first-half of the igb_resume routine.
++ **/
++static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++	struct e1000_hw *hw = &adapter->hw;
++	pci_ers_result_t result;
++	int err;
++
++	if (pci_enable_device_mem(pdev)) {
++		dev_err(&pdev->dev,
++			"Cannot re-enable PCI device after reset.\n");
++		result = PCI_ERS_RESULT_DISCONNECT;
++	} else {
++		pci_set_master(pdev);
++		pci_restore_state(pdev);
++		pci_save_state(pdev);
++
++		pci_enable_wake(pdev, PCI_D3hot, 0);
++		pci_enable_wake(pdev, PCI_D3cold, 0);
++
++		igb_reset(adapter);
++		wr32(E1000_WUS, ~0);
++		result = PCI_ERS_RESULT_RECOVERED;
++	}
++
++	err = pci_cleanup_aer_uncorrect_error_status(pdev);
++	if (err) {
++		dev_err(&pdev->dev,
++			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
++			err);
++		/* non-fatal, continue */
++	}
++
++	return result;
++}
++
++/**
++ *  igb_io_resume - called when traffic can start flowing again.
++ *  @pdev: Pointer to PCI device
++ *
++ *  This callback is called when the error recovery driver tells us that
++ *  its OK to resume normal operation. Implementation resembles the
++ *  second-half of the igb_resume routine.
++ */
++static void igb_io_resume(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct igb_adapter *adapter = rtnetdev_priv(netdev);
++
++	if (rtnetif_running(netdev)) {
++		if (igb_up(adapter)) {
++			dev_err(&pdev->dev, "igb_up failed after reset\n");
++			return;
++		}
++	}
++
++	rtnetif_device_attach(netdev);
++
++	/* let the f/w know that the h/w is now under the control of the
++	 * driver.
++	 */
++	igb_get_hw_control(adapter);
++}
++
++static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
++			     u8 qsel)
++{
++	u32 rar_low, rar_high;
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* HW expects these in little endian so we reverse the byte order
++	 * from network order (big endian) to little endian
++	 */
++	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
++		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
++	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
++
++	/* Indicate to hardware the Address is Valid. */
++	rar_high |= E1000_RAH_AV;
++
++	if (hw->mac.type == e1000_82575)
++		rar_high |= E1000_RAH_POOL_1 * qsel;
++	else
++		rar_high |= E1000_RAH_POOL_1 << qsel;
++
++	wr32(E1000_RAL(index), rar_low);
++	wrfl();
++	wr32(E1000_RAH(index), rar_high);
++	wrfl();
++}
++
++static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 dmac_thr;
++	u16 hwm;
++
++	if (hw->mac.type > e1000_82580) {
++		if (adapter->flags & IGB_FLAG_DMAC) {
++			u32 reg;
++
++			/* force threshold to 0. */
++			wr32(E1000_DMCTXTH, 0);
++
++			/* DMA Coalescing high water mark needs to be greater
++			 * than the Rx threshold. Set hwm to PBA - max frame
++			 * size in 16B units, capping it at PBA - 6KB.
++			 */
++			hwm = 64 * pba - adapter->max_frame_size / 16;
++			if (hwm < 64 * (pba - 6))
++				hwm = 64 * (pba - 6);
++			reg = rd32(E1000_FCRTC);
++			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
++			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
++				& E1000_FCRTC_RTH_COAL_MASK);
++			wr32(E1000_FCRTC, reg);
++
++			/* Set the DMA Coalescing Rx threshold to PBA - 2 * max
++			 * frame size, capping it at PBA - 10KB.
++			 */
++			dmac_thr = pba - adapter->max_frame_size / 512;
++			if (dmac_thr < pba - 10)
++				dmac_thr = pba - 10;
++			reg = rd32(E1000_DMACR);
++			reg &= ~E1000_DMACR_DMACTHR_MASK;
++			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
++				& E1000_DMACR_DMACTHR_MASK);
++
++			/* transition to L0x or L1 if available..*/
++			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
++
++			/* watchdog timer= +-1000 usec in 32usec intervals */
++			reg |= (1000 >> 5);
++
++			/* Disable BMC-to-OS Watchdog Enable */
++			if (hw->mac.type != e1000_i354)
++				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
++
++			wr32(E1000_DMACR, reg);
++
++			/* no lower threshold to disable
++			 * coalescing(smart fifb)-UTRESH=0
++			 */
++			wr32(E1000_DMCRTRH, 0);
++
++			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
++
++			wr32(E1000_DMCTLX, reg);
++
++			/* free space in tx packet buffer to wake from
++			 * DMA coal
++			 */
++			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
++			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
++
++			/* make low power state decision controlled
++			 * by DMA coal
++			 */
++			reg = rd32(E1000_PCIEMISC);
++			reg &= ~E1000_PCIEMISC_LX_DECISION;
++			wr32(E1000_PCIEMISC, reg);
++		} /* endif adapter->dmac is not disabled */
++	} else if (hw->mac.type == e1000_82580) {
++		u32 reg = rd32(E1000_PCIEMISC);
++
++		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
++		wr32(E1000_DMACR, 0);
++	}
++}
++
++/**
++ *  igb_read_i2c_byte - Reads 8 bit word over I2C
++ *  @hw: pointer to hardware structure
++ *  @byte_offset: byte offset to read
++ *  @dev_addr: device address
++ *  @data: value read
++ *
++ *  Performs byte read operation over I2C interface at
++ *  a specified device address.
++ **/
++s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
++		      u8 dev_addr, u8 *data)
++{
++	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
++	struct i2c_client *this_client = adapter->i2c_client;
++	s32 status;
++	u16 swfw_mask = 0;
++
++	if (!this_client)
++		return E1000_ERR_I2C;
++
++	swfw_mask = E1000_SWFW_PHY0_SM;
++
++	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
++		return E1000_ERR_SWFW_SYNC;
++
++	status = i2c_smbus_read_byte_data(this_client, byte_offset);
++	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
++
++	if (status < 0)
++		return E1000_ERR_I2C;
++	else {
++		*data = status;
++		return 0;
++	}
++}
++
++/**
++ *  igb_write_i2c_byte - Writes 8 bit word over I2C
++ *  @hw: pointer to hardware structure
++ *  @byte_offset: byte offset to write
++ *  @dev_addr: device address
++ *  @data: value to write
++ *
++ *  Performs byte write operation over I2C interface at
++ *  a specified device address.
++ **/
++s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
++		       u8 dev_addr, u8 data)
++{
++	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
++	struct i2c_client *this_client = adapter->i2c_client;
++	s32 status;
++	u16 swfw_mask = E1000_SWFW_PHY0_SM;
++
++	if (!this_client)
++		return E1000_ERR_I2C;
++
++	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
++		return E1000_ERR_SWFW_SYNC;
++	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
++	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
++
++	if (status)
++		return E1000_ERR_I2C;
++	else
++		return 0;
++
++}
++
++int igb_reinit_queues(struct igb_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	int err = 0;
++
++	if (rtnetif_running(netdev))
++		igb_close(netdev);
++
++	igb_reset_interrupt_capability(adapter);
++
++	if (igb_init_interrupt_scheme(adapter, true)) {
++		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
++		return -ENOMEM;
++	}
++
++	if (rtnetif_running(netdev))
++		err = igb_open(netdev);
++
++	return err;
++}
++/* igb_main.c */
+--- linux/drivers/xenomai/net/drivers/e1000/e1000_main.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_main.c	2021-04-07 16:01:27.442633859 +0800
+@@ -0,0 +1,3171 @@
++/*******************************************************************************
++
++
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms of the GNU General Public License as published by the Free
++  Software Foundation; either version 2 of the License, or (at your option)
++  any later version.
++
++  This program is distributed in the hope that it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000.h"
++
++/* Change Log
++ *
++ * Port to rtnet (0.9.3) by Mathias Koehrer. Base version: e1000-7.1.9
++ *             8-Aug-2006
++ *
++ * 7.0.36      10-Mar-2006
++ *   o fixups for compilation issues on older kernels
++ * 7.0.35      3-Mar-2006
++ * 7.0.34
++ *   o Major performance fixes by understanding relationship of rx_buffer_len
++ *     to window size growth.  _ps and legacy receive paths changed
++ *   o merge with kernel changes
++ *   o legacy receive path went back to single descriptor model for jumbos
++ * 7.0.33      3-Feb-2006
++ *   o Added another fix for the pass false carrier bit
++ * 7.0.32      24-Jan-2006
++ *   o Need to rebuild with noew version number for the pass false carrier
++ *     fix in e1000_hw.c
++ * 7.0.30      18-Jan-2006
++ *   o fixup for tso workaround to disable it for pci-x
++ *   o fix mem leak on 82542
++ *   o fixes for 10 Mb/s connections and incorrect stats
++ * 7.0.28      01/06/2006
++ *   o hardware workaround to only set "speed mode" bit for 1G link.
++ * 7.0.26      12/23/2005
++ *   o wake on lan support modified for device ID 10B5
++ *   o fix dhcp + vlan issue not making it to the iAMT firmware
++ * 7.0.24      12/9/2005
++ *   o New hardware support for the Gigabit NIC embedded in the south bridge
++ *   o Fixes to the recycling logic (skb->tail) from IBM LTC
++ * 6.3.7	11/18/2005
++ *   o Honor eeprom setting for enabling/disabling Wake On Lan
++ * 6.3.5	11/17/2005
++ *   o Fix memory leak in rx ring handling for PCI Express adapters
++ * 6.3.4	11/8/05
++ *   o Patch from Jesper Juhl to remove redundant NULL checks for kfree
++ * 6.3.2	9/20/05
++ *   o Render logic that sets/resets DRV_LOAD as inline functions to
++ *     avoid code replication. If f/w is AMT then set DRV_LOAD only when
++ *     network interface is open.
++ *   o Handle DRV_LOAD set/reset in cases where AMT uses VLANs.
++ *   o Adjust PBA partioning for Jumbo frames using MTU size and not
++ *     rx_buffer_len
++ * 6.3.1	9/19/05
++ *   o Use adapter->tx_timeout_factor in Tx Hung Detect logic
++ *      (e1000_clean_tx_irq)
++ *   o Support for 8086:10B5 device (Quad Port)
++ */
++
++char e1000_driver_name[] = "rt_e1000";
++static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
++#ifndef CONFIG_E1000_NAPI
++#define DRIVERNAPI
++#else
++#define DRIVERNAPI "-NAPI"
++#endif
++#define DRV_VERSION "7.1.9"DRIVERNAPI
++char e1000_driver_version[] = DRV_VERSION;
++static char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation.";
++
++/* e1000_pci_tbl - PCI Device ID Table
++ *
++ * Last entry must be all 0s
++ *
++ * Macro expands to...
++ *   {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
++ */
++static struct pci_device_id e1000_pci_tbl[] = {
++	INTEL_E1000_ETHERNET_DEVICE(0x1000),
++	INTEL_E1000_ETHERNET_DEVICE(0x1001),
++	INTEL_E1000_ETHERNET_DEVICE(0x1004),
++	INTEL_E1000_ETHERNET_DEVICE(0x1008),
++	INTEL_E1000_ETHERNET_DEVICE(0x1009),
++	INTEL_E1000_ETHERNET_DEVICE(0x100C),
++	INTEL_E1000_ETHERNET_DEVICE(0x100D),
++	INTEL_E1000_ETHERNET_DEVICE(0x100E),
++	INTEL_E1000_ETHERNET_DEVICE(0x100F),
++	INTEL_E1000_ETHERNET_DEVICE(0x1010),
++	INTEL_E1000_ETHERNET_DEVICE(0x1011),
++	INTEL_E1000_ETHERNET_DEVICE(0x1012),
++	INTEL_E1000_ETHERNET_DEVICE(0x1013),
++	INTEL_E1000_ETHERNET_DEVICE(0x1014),
++	INTEL_E1000_ETHERNET_DEVICE(0x1015),
++	INTEL_E1000_ETHERNET_DEVICE(0x1016),
++	INTEL_E1000_ETHERNET_DEVICE(0x1017),
++	INTEL_E1000_ETHERNET_DEVICE(0x1018),
++	INTEL_E1000_ETHERNET_DEVICE(0x1019),
++	INTEL_E1000_ETHERNET_DEVICE(0x101A),
++	INTEL_E1000_ETHERNET_DEVICE(0x101D),
++	INTEL_E1000_ETHERNET_DEVICE(0x101E),
++	INTEL_E1000_ETHERNET_DEVICE(0x1026),
++	INTEL_E1000_ETHERNET_DEVICE(0x1027),
++	INTEL_E1000_ETHERNET_DEVICE(0x1028),
++	INTEL_E1000_ETHERNET_DEVICE(0x1049),
++	INTEL_E1000_ETHERNET_DEVICE(0x104A),
++	INTEL_E1000_ETHERNET_DEVICE(0x104B),
++	INTEL_E1000_ETHERNET_DEVICE(0x104C),
++	INTEL_E1000_ETHERNET_DEVICE(0x104D),
++	INTEL_E1000_ETHERNET_DEVICE(0x105E),
++	INTEL_E1000_ETHERNET_DEVICE(0x105F),
++	INTEL_E1000_ETHERNET_DEVICE(0x1060),
++	INTEL_E1000_ETHERNET_DEVICE(0x1075),
++	INTEL_E1000_ETHERNET_DEVICE(0x1076),
++	INTEL_E1000_ETHERNET_DEVICE(0x1077),
++	INTEL_E1000_ETHERNET_DEVICE(0x1078),
++	INTEL_E1000_ETHERNET_DEVICE(0x1079),
++	INTEL_E1000_ETHERNET_DEVICE(0x107A),
++	INTEL_E1000_ETHERNET_DEVICE(0x107B),
++	INTEL_E1000_ETHERNET_DEVICE(0x107C),
++	INTEL_E1000_ETHERNET_DEVICE(0x107D),
++	INTEL_E1000_ETHERNET_DEVICE(0x107E),
++	INTEL_E1000_ETHERNET_DEVICE(0x107F),
++	INTEL_E1000_ETHERNET_DEVICE(0x108A),
++	INTEL_E1000_ETHERNET_DEVICE(0x108B),
++	INTEL_E1000_ETHERNET_DEVICE(0x108C),
++	INTEL_E1000_ETHERNET_DEVICE(0x1096),
++	INTEL_E1000_ETHERNET_DEVICE(0x1098),
++	INTEL_E1000_ETHERNET_DEVICE(0x1099),
++	INTEL_E1000_ETHERNET_DEVICE(0x109A),
++	INTEL_E1000_ETHERNET_DEVICE(0x10A4),
++	INTEL_E1000_ETHERNET_DEVICE(0x10B5),
++	INTEL_E1000_ETHERNET_DEVICE(0x10B9),
++	INTEL_E1000_ETHERNET_DEVICE(0x10BA),
++	INTEL_E1000_ETHERNET_DEVICE(0x10BB),
++	INTEL_E1000_ETHERNET_DEVICE(0x10BC),
++	INTEL_E1000_ETHERNET_DEVICE(0x10C4),
++	INTEL_E1000_ETHERNET_DEVICE(0x10C5),
++	/* required last entry */
++	{0,}
++};
++
++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
++
++int e1000_up(struct e1000_adapter *adapter);
++void e1000_down(struct e1000_adapter *adapter);
++void e1000_reinit_locked(struct e1000_adapter *adapter);
++void e1000_reset(struct e1000_adapter *adapter);
++int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx);
++int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
++int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
++void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
++void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
++static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
++			     struct e1000_tx_ring *txdr);
++static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
++			     struct e1000_rx_ring *rxdr);
++static void e1000_free_tx_resources(struct e1000_adapter *adapter,
++			     struct e1000_tx_ring *tx_ring);
++static void e1000_free_rx_resources(struct e1000_adapter *adapter,
++			     struct e1000_rx_ring *rx_ring);
++
++static int e1000_init_module(void);
++static void e1000_exit_module(void);
++static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
++static void e1000_remove(struct pci_dev *pdev);
++static int e1000_alloc_queues(struct e1000_adapter *adapter);
++static int e1000_sw_init(struct e1000_adapter *adapter);
++static int e1000_open(struct rtnet_device *netdev);
++static int e1000_close(struct rtnet_device *netdev);
++static void e1000_configure_tx(struct e1000_adapter *adapter);
++static void e1000_configure_rx(struct e1000_adapter *adapter);
++static void e1000_setup_rctl(struct e1000_adapter *adapter);
++static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter);
++static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter);
++static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
++				struct e1000_tx_ring *tx_ring);
++static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
++				struct e1000_rx_ring *rx_ring);
++static void e1000_set_multi(struct rtnet_device *netdev);
++static void e1000_update_phy_info_task(struct work_struct *work);
++static void e1000_watchdog(struct work_struct *work);
++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work);
++static int e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev);
++static int e1000_intr(rtdm_irq_t *irq_handle);
++static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter,
++				    struct e1000_tx_ring *tx_ring);
++static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
++				    struct e1000_rx_ring *rx_ring,
++				    nanosecs_abs_t *time_stamp);
++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
++				   struct e1000_rx_ring *rx_ring,
++				   int cleaned_count);
++#ifdef SIOCGMIIPHY
++#endif
++void e1000_set_ethtool_ops(struct rtnet_device *netdev);
++#ifdef ETHTOOL_OPS_COMPAT
++extern int ethtool_ioctl(struct ifreq *ifr);
++#endif
++static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
++static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
++static void e1000_smartspeed(struct e1000_adapter *adapter);
++static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
++				       struct rtskb *skb);
++
++
++
++
++
++/* Exported from other modules */
++
++extern void e1000_check_options(struct e1000_adapter *adapter);
++
++static struct pci_driver e1000_driver = {
++	.name     = e1000_driver_name,
++	.id_table = e1000_pci_tbl,
++	.probe    = e1000_probe,
++	.remove   = e1000_remove,
++};
++
++MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver for rtnet");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
++
++static int local_debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
++module_param_named(debug, local_debug, int, 0);
++MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
++
++
++#define MAX_UNITS           8
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (eg. 1,0,1)");
++
++
++#define kmalloc(a,b) rtdm_malloc(a)
++#define vmalloc(a) rtdm_malloc(a)
++#define kfree(a) rtdm_free(a)
++#define vfree(a) rtdm_free(a)
++
++
++/**
++ * e1000_init_module - Driver Registration Routine
++ *
++ * e1000_init_module is the first routine called when the driver is
++ * loaded. All it does is register with the PCI subsystem.
++ **/
++
++static int __init
++e1000_init_module(void)
++{
++	int ret;
++	printk(KERN_INFO "%s - version %s\n",
++	       e1000_driver_string, e1000_driver_version);
++
++	printk(KERN_INFO "%s\n", e1000_copyright);
++
++	ret = pci_register_driver(&e1000_driver);
++	return ret;
++}
++
++module_init(e1000_init_module);
++
++/**
++ * e1000_exit_module - Driver Exit Cleanup Routine
++ *
++ * e1000_exit_module is called just before the driver is removed
++ * from memory.
++ **/
++
++static void __exit
++e1000_exit_module(void)
++{
++	pci_unregister_driver(&e1000_driver);
++}
++
++module_exit(e1000_exit_module);
++
++static int e1000_request_irq(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	int flags, err = 0;
++
++	flags = RTDM_IRQTYPE_SHARED;
++#ifdef CONFIG_PCI_MSI
++	if (adapter->hw.mac_type > e1000_82547_rev_2) {
++		adapter->have_msi = TRUE;
++		if ((err = pci_enable_msi(adapter->pdev))) {
++			DPRINTK(PROBE, ERR,
++			 "Unable to allocate MSI interrupt Error: %d\n", err);
++			adapter->have_msi = FALSE;
++		}
++	}
++	if (adapter->have_msi)
++		flags = 0;
++#endif
++	rt_stack_connect(netdev, &STACK_manager);
++	if ((err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq,
++				    e1000_intr, flags, netdev->name, netdev)))
++		DPRINTK(PROBE, ERR,
++		    "Unable to allocate interrupt Error: %d\n", err);
++
++	return err;
++}
++
++static void e1000_free_irq(struct e1000_adapter *adapter)
++{
++	// struct rtnet_device *netdev = adapter->netdev;
++
++	rtdm_irq_free(&adapter->irq_handle);
++
++#ifdef CONFIG_PCI_MSI
++	if (adapter->have_msi)
++		pci_disable_msi(adapter->pdev);
++#endif
++}
++
++/**
++ * e1000_irq_disable - Mask off interrupt generation on the NIC
++ * @adapter: board private structure
++ **/
++
++static void
++e1000_irq_disable(struct e1000_adapter *adapter)
++{
++	atomic_inc(&adapter->irq_sem);
++	E1000_WRITE_REG(&adapter->hw, IMC, ~0);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	synchronize_irq(adapter->pdev->irq);
++}
++
++/**
++ * e1000_irq_enable - Enable default interrupt generation settings
++ * @adapter: board private structure
++ **/
++
++static void
++e1000_irq_enable(struct e1000_adapter *adapter)
++{
++	if (likely(atomic_dec_and_test(&adapter->irq_sem))) {
++		E1000_WRITE_REG(&adapter->hw, IMS, IMS_ENABLE_MASK);
++		E1000_WRITE_FLUSH(&adapter->hw);
++	}
++}
++
++/**
++ * e1000_release_hw_control - release control of the h/w to f/w
++ * @adapter: address of board private structure
++ *
++ * e1000_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that the
++ * driver is no longer loaded. For AMT version (only with 82573) i
++ * of the f/w this means that the netowrk i/f is closed.
++ *
++ **/
++
++static void
++e1000_release_hw_control(struct e1000_adapter *adapter)
++{
++	uint32_t ctrl_ext;
++	uint32_t swsm;
++	uint32_t extcnf;
++
++	/* Let firmware taken over control of h/w */
++	switch (adapter->hw.mac_type) {
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
++		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
++				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
++		break;
++	case e1000_82573:
++		swsm = E1000_READ_REG(&adapter->hw, SWSM);
++		E1000_WRITE_REG(&adapter->hw, SWSM,
++				swsm & ~E1000_SWSM_DRV_LOAD);
++	case e1000_ich8lan:
++		extcnf = E1000_READ_REG(&adapter->hw, CTRL_EXT);
++		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
++				extcnf & ~E1000_CTRL_EXT_DRV_LOAD);
++		break;
++	default:
++		break;
++	}
++}
++
++/**
++ * e1000_get_hw_control - get control of the h/w from f/w
++ * @adapter: address of board private structure
++ *
++ * e1000_get_hw_control sets {CTRL_EXT|FWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that
++ * the driver is loaded. For AMT version (only with 82573)
++ * of the f/w this means that the netowrk i/f is open.
++ *
++ **/
++
++static void
++e1000_get_hw_control(struct e1000_adapter *adapter)
++{
++	uint32_t ctrl_ext;
++	uint32_t swsm;
++	uint32_t extcnf;
++	/* Let firmware know the driver has taken over */
++	switch (adapter->hw.mac_type) {
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
++		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
++				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
++		break;
++	case e1000_82573:
++		swsm = E1000_READ_REG(&adapter->hw, SWSM);
++		E1000_WRITE_REG(&adapter->hw, SWSM,
++				swsm | E1000_SWSM_DRV_LOAD);
++		break;
++	case e1000_ich8lan:
++		extcnf = E1000_READ_REG(&adapter->hw, EXTCNF_CTRL);
++		E1000_WRITE_REG(&adapter->hw, EXTCNF_CTRL,
++				extcnf | E1000_EXTCNF_CTRL_SWFLAG);
++		break;
++	default:
++		break;
++	}
++}
++
++int
++e1000_up(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	int i;
++
++	/* hardware has been reset, we need to reload some things */
++
++	e1000_set_multi(netdev);
++
++
++	e1000_configure_tx(adapter);
++	e1000_setup_rctl(adapter);
++	e1000_configure_rx(adapter);
++	/* call E1000_DESC_UNUSED which always leaves
++	 * at least 1 descriptor unused to make sure
++	 * next_to_use != next_to_clean */
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		struct e1000_rx_ring *ring = &adapter->rx_ring[i];
++		adapter->alloc_rx_buf(adapter, ring,
++				      E1000_DESC_UNUSED(ring));
++	}
++
++	// TODO makoehre adapter->tx_queue_len = netdev->tx_queue_len;
++
++	schedule_delayed_work(&adapter->watchdog_task, 1);
++
++	e1000_irq_enable(adapter);
++
++	return 0;
++}
++
++/**
++ * e1000_power_up_phy - restore link in case the phy was powered down
++ * @adapter: address of board private structure
++ *
++ * The phy may be powered down to save power and turn off link when the
++ * driver is unloaded and wake on lan is not enabled (among others)
++ * *** this routine MUST be followed by a call to e1000_reset ***
++ *
++ **/
++
++static void e1000_power_up_phy(struct e1000_adapter *adapter)
++{
++	uint16_t mii_reg = 0;
++
++	/* Just clear the power down bit to wake the phy back up */
++	if (adapter->hw.media_type == e1000_media_type_copper) {
++		/* according to the manual, the phy will retain its
++		 * settings across a power-down/up cycle */
++		e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg);
++		mii_reg &= ~MII_CR_POWER_DOWN;
++		e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg);
++	}
++}
++
++static void e1000_power_down_phy(struct e1000_adapter *adapter)
++{
++	boolean_t mng_mode_enabled = (adapter->hw.mac_type >= e1000_82571) &&
++				      e1000_check_mng_mode(&adapter->hw);
++	/* Power down the PHY so no link is implied when interface is down *
++	 * The PHY cannot be powered down if any of the following is TRUE *
++	 * (a) WoL is enabled
++	 * (b) AMT is active
++	 * (c) SoL/IDER session is active */
++	if (!adapter->wol && adapter->hw.mac_type >= e1000_82540 &&
++	   adapter->hw.mac_type != e1000_ich8lan &&
++	   adapter->hw.media_type == e1000_media_type_copper &&
++	   !(E1000_READ_REG(&adapter->hw, MANC) & E1000_MANC_SMBUS_EN) &&
++	   !mng_mode_enabled &&
++	   !e1000_check_phy_reset_block(&adapter->hw)) {
++		uint16_t mii_reg = 0;
++		e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &mii_reg);
++		mii_reg |= MII_CR_POWER_DOWN;
++		e1000_write_phy_reg(&adapter->hw, PHY_CTRL, mii_reg);
++		mdelay(1);
++	}
++}
++
++static void e1000_down_and_stop(struct e1000_adapter *adapter)
++{
++	cancel_work_sync(&adapter->reset_task);
++	cancel_delayed_work_sync(&adapter->watchdog_task);
++	cancel_delayed_work_sync(&adapter->phy_info_task);
++	cancel_delayed_work_sync(&adapter->fifo_stall_task);
++}
++
++void
++e1000_down(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++
++	e1000_irq_disable(adapter);
++
++	e1000_down_and_stop(adapter);
++
++	// TODO makoehre     netdev->tx_queue_len = adapter->tx_queue_len;
++	adapter->link_speed = 0;
++	adapter->link_duplex = 0;
++	rtnetif_carrier_off(netdev);
++	rtnetif_stop_queue(netdev);
++
++	e1000_reset(adapter);
++	e1000_clean_all_tx_rings(adapter);
++	e1000_clean_all_rx_rings(adapter);
++}
++
++void
++e1000_reinit_locked(struct e1000_adapter *adapter)
++{
++	WARN_ON(in_interrupt());
++	if (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
++		msleep(1);
++	e1000_down(adapter);
++	e1000_up(adapter);
++	clear_bit(__E1000_RESETTING, &adapter->flags);
++}
++
++void
++e1000_reset(struct e1000_adapter *adapter)
++{
++	uint32_t pba;
++	uint16_t fc_high_water_mark = E1000_FC_HIGH_DIFF;
++
++	/* Repartition Pba for greater than 9k mtu
++	 * To take effect CTRL.RST is required.
++	 */
++
++	switch (adapter->hw.mac_type) {
++	case e1000_82547:
++	case e1000_82547_rev_2:
++		pba = E1000_PBA_30K;
++		break;
++	case e1000_82571:
++	case e1000_82572:
++	case e1000_80003es2lan:
++		pba = E1000_PBA_38K;
++		break;
++	case e1000_82573:
++		pba = E1000_PBA_12K;
++		break;
++	case e1000_ich8lan:
++		pba = E1000_PBA_8K;
++		break;
++	default:
++		pba = E1000_PBA_48K;
++		break;
++	}
++
++	if ((adapter->hw.mac_type != e1000_82573) &&
++	   (adapter->netdev->mtu > E1000_RXBUFFER_8192))
++		pba -= 8; /* allocate more FIFO for Tx */
++
++
++	if (adapter->hw.mac_type == e1000_82547) {
++		adapter->tx_fifo_head = 0;
++		adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT;
++		adapter->tx_fifo_size =
++			(E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT;
++		atomic_set(&adapter->tx_fifo_stall, 0);
++	}
++
++	E1000_WRITE_REG(&adapter->hw, PBA, pba);
++
++	/* flow control settings */
++	/* Set the FC high water mark to 90% of the FIFO size.
++	 * Required to clear last 3 LSB */
++	fc_high_water_mark = ((pba * 9216)/10) & 0xFFF8;
++	/* We can't use 90% on small FIFOs because the remainder
++	 * would be less than 1 full frame.  In this case, we size
++	 * it to allow at least a full frame above the high water
++	 *  mark. */
++	if (pba < E1000_PBA_16K)
++		fc_high_water_mark = (pba * 1024) - 1600;
++
++	adapter->hw.fc_high_water = fc_high_water_mark;
++	adapter->hw.fc_low_water = fc_high_water_mark - 8;
++	if (adapter->hw.mac_type == e1000_80003es2lan)
++		adapter->hw.fc_pause_time = 0xFFFF;
++	else
++		adapter->hw.fc_pause_time = E1000_FC_PAUSE_TIME;
++	adapter->hw.fc_send_xon = 1;
++	adapter->hw.fc = adapter->hw.original_fc;
++
++	/* Allow time for pending master requests to run */
++	e1000_reset_hw(&adapter->hw);
++	if (adapter->hw.mac_type >= e1000_82544)
++		E1000_WRITE_REG(&adapter->hw, WUC, 0);
++	if (e1000_init_hw(&adapter->hw))
++		DPRINTK(PROBE, ERR, "Hardware Error\n");
++	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
++	E1000_WRITE_REG(&adapter->hw, VET, ETHERNET_IEEE_VLAN_TYPE);
++
++	E1000_WRITE_REG(&adapter->hw, AIT, 0);  // Set adaptive interframe spacing to zero
++
++	// e1000_reset_adaptive(&adapter->hw);
++	e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
++
++	if (!adapter->smart_power_down &&
++	    (adapter->hw.mac_type == e1000_82571 ||
++	     adapter->hw.mac_type == e1000_82572)) {
++		uint16_t phy_data = 0;
++		/* speed up time to link by disabling smart power down, ignore
++		 * the return value of this function because there is nothing
++		 * different we would do if it failed */
++		e1000_read_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
++				   &phy_data);
++		phy_data &= ~IGP02E1000_PM_SPD;
++		e1000_write_phy_reg(&adapter->hw, IGP02E1000_PHY_POWER_MGMT,
++				    phy_data);
++	}
++
++}
++
++static void
++e1000_reset_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter =
++		container_of(work, struct e1000_adapter, reset_task);
++
++	e1000_reinit_locked(adapter);
++}
++
++/**
++ * e1000_probe - Device Initialization Routine
++ * @pdev: PCI device information struct
++ * @ent: entry in e1000_pci_tbl
++ *
++ * Returns 0 on success, negative on failure
++ *
++ * e1000_probe initializes an adapter identified by a pci_dev structure.
++ * The OS initialization, configuring of the adapter private structure,
++ * and a hardware reset occur.
++ **/
++
++static int e1000_probe(struct pci_dev *pdev,
++	    const struct pci_device_id *ent)
++{
++	struct rtnet_device *netdev;
++	struct e1000_adapter *adapter;
++	unsigned long mmio_start, mmio_len;
++	unsigned long flash_start, flash_len;
++
++	static int cards_found = 0;
++	static int e1000_ksp3_port_a = 0; /* global ksp3 port a indication */
++	int i, err;
++	uint16_t eeprom_data;
++	uint16_t eeprom_apme_mask = E1000_EEPROM_APME;
++
++	if (cards[cards_found++] == 0)
++	{
++	    return -ENODEV;
++	}
++
++	if ((err = pci_enable_device(pdev)))
++		return err;
++
++	if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) ||
++	    (err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))) {
++		if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) &&
++		    (err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))) {
++			E1000_ERR("No usable DMA configuration, aborting\n");
++			return err;
++		}
++	}
++
++	if ((err = pci_request_regions(pdev, e1000_driver_name)))
++		return err;
++
++	pci_set_master(pdev);
++
++	netdev = rt_alloc_etherdev(sizeof(struct e1000_adapter), 48);
++	if (!netdev) {
++		err = -ENOMEM;
++		goto err_alloc_etherdev;
++	}
++	memset(netdev->priv, 0, sizeof(struct e1000_adapter));
++
++	rt_rtdev_connect(netdev, &RTDEV_manager);
++
++
++	// SET_NETDEV_DEV(netdev, &pdev->dev);
++	netdev->vers = RTDEV_VERS_2_0;
++	netdev->sysbind = &pdev->dev;
++
++	pci_set_drvdata(pdev, netdev);
++	adapter = netdev->priv;
++	adapter->netdev = netdev;
++	adapter->pdev = pdev;
++	adapter->hw.back = adapter;
++	adapter->msg_enable = (1 << local_debug) - 1;
++
++	mmio_start = pci_resource_start(pdev, BAR_0);
++	mmio_len = pci_resource_len(pdev, BAR_0);
++
++	adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
++	if (!adapter->hw.hw_addr) {
++		err = -EIO;
++		goto err_ioremap;
++	}
++
++	for (i = BAR_1; i <= BAR_5; i++) {
++		if (pci_resource_len(pdev, i) == 0)
++			continue;
++		if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
++			adapter->hw.io_base = pci_resource_start(pdev, i);
++			break;
++		}
++	}
++
++	netdev->open = &e1000_open;
++	netdev->stop = &e1000_close;
++	netdev->hard_start_xmit = &e1000_xmit_frame;
++	// netdev->get_stats = &e1000_get_stats;
++	// netdev->set_multicast_list = &e1000_set_multi;
++	// netdev->set_mac_address = &e1000_set_mac;
++	// netdev->change_mtu = &e1000_change_mtu;
++	// netdev->do_ioctl = &e1000_ioctl;
++	// e1000_set_ethtool_ops(netdev);
++	strcpy(netdev->name, pci_name(pdev));
++
++	netdev->mem_start = mmio_start;
++	netdev->mem_end = mmio_start + mmio_len;
++	netdev->base_addr = adapter->hw.io_base;
++
++	adapter->bd_number = cards_found - 1;
++
++	/* setup the private structure */
++
++	if ((err = e1000_sw_init(adapter)))
++		goto err_sw_init;
++
++	/* Flash BAR mapping must happen after e1000_sw_init
++	 * because it depends on mac_type */
++	if ((adapter->hw.mac_type == e1000_ich8lan) &&
++	   (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
++		flash_start = pci_resource_start(pdev, 1);
++		flash_len = pci_resource_len(pdev, 1);
++		adapter->hw.flash_address = ioremap(flash_start, flash_len);
++		if (!adapter->hw.flash_address) {
++			err = -EIO;
++			goto err_flashmap;
++		}
++	}
++
++	if ((err = e1000_check_phy_reset_block(&adapter->hw)))
++		DPRINTK(PROBE, INFO, "PHY reset is blocked due to SOL/IDER session.\n");
++
++	/* if ksp3, indicate if it's port a being setup */
++	if (pdev->device == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 &&
++			e1000_ksp3_port_a == 0)
++		adapter->ksp3_port_a = 1;
++	e1000_ksp3_port_a++;
++	/* Reset for multiple KP3 adapters */
++	if (e1000_ksp3_port_a == 4)
++		e1000_ksp3_port_a = 0;
++
++	netdev->features |= NETIF_F_LLTX;
++
++	adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw);
++
++	/* initialize eeprom parameters */
++
++	if (e1000_init_eeprom_params(&adapter->hw)) {
++		E1000_ERR("EEPROM initialization failed\n");
++		return -EIO;
++	}
++
++	/* before reading the EEPROM, reset the controller to
++	 * put the device in a known good starting state */
++
++	e1000_reset_hw(&adapter->hw);
++
++	/* make sure the EEPROM is good */
++
++	if (e1000_validate_eeprom_checksum(&adapter->hw) < 0) {
++		DPRINTK(PROBE, ERR, "The EEPROM Checksum Is Not Valid\n");
++		err = -EIO;
++		goto err_eeprom;
++	}
++
++	/* copy the MAC address out of the EEPROM */
++
++	if (e1000_read_mac_addr(&adapter->hw))
++		DPRINTK(PROBE, ERR, "EEPROM Read Error\n");
++	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
++#ifdef ETHTOOL_GPERMADDR
++	memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len);
++
++	if (!is_valid_ether_addr(netdev->perm_addr)) {
++#else
++	if (!is_valid_ether_addr(netdev->dev_addr)) {
++#endif
++		DPRINTK(PROBE, ERR, "Invalid MAC Address\n");
++		err = -EIO;
++		goto err_eeprom;
++	}
++
++	e1000_read_part_num(&adapter->hw, &(adapter->part_num));
++
++	e1000_get_bus_info(&adapter->hw);
++
++	INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog);
++	INIT_DELAYED_WORK(&adapter->fifo_stall_task,
++			  e1000_82547_tx_fifo_stall_task);
++	INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task);
++	INIT_WORK(&adapter->reset_task,
++		(void (*)(struct work_struct *))e1000_reset_task);
++
++	/* we're going to reset, so assume we have no link for now */
++
++	rtnetif_carrier_off(netdev);
++	rtnetif_stop_queue(netdev);
++
++	e1000_check_options(adapter);
++
++	/* Initial Wake on LAN setting
++	 * If APM wake is enabled in the EEPROM,
++	 * enable the ACPI Magic Packet filter
++	 */
++
++	switch (adapter->hw.mac_type) {
++	case e1000_82542_rev2_0:
++	case e1000_82542_rev2_1:
++	case e1000_82543:
++		break;
++	case e1000_82544:
++		e1000_read_eeprom(&adapter->hw,
++			EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data);
++		eeprom_apme_mask = E1000_EEPROM_82544_APM;
++		break;
++	case e1000_ich8lan:
++		e1000_read_eeprom(&adapter->hw,
++			EEPROM_INIT_CONTROL1_REG, 1, &eeprom_data);
++		eeprom_apme_mask = E1000_EEPROM_ICH8_APME;
++		break;
++	case e1000_82546:
++	case e1000_82546_rev_3:
++	case e1000_82571:
++	case e1000_80003es2lan:
++		if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_FUNC_1){
++			e1000_read_eeprom(&adapter->hw,
++				EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
++			break;
++		}
++		/* Fall Through */
++	default:
++		e1000_read_eeprom(&adapter->hw,
++			EEPROM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
++		break;
++	}
++	if (eeprom_data & eeprom_apme_mask)
++		adapter->wol |= E1000_WUFC_MAG;
++
++	/* print bus type/speed/width info */
++	{
++	struct e1000_hw *hw = &adapter->hw;
++	DPRINTK(PROBE, INFO, "(PCI%s:%s:%s) ",
++		((hw->bus_type == e1000_bus_type_pcix) ? "-X" :
++		 (hw->bus_type == e1000_bus_type_pci_express ? " Express":"")),
++		((hw->bus_speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
++		 (hw->bus_speed == e1000_bus_speed_133) ? "133MHz" :
++		 (hw->bus_speed == e1000_bus_speed_120) ? "120MHz" :
++		 (hw->bus_speed == e1000_bus_speed_100) ? "100MHz" :
++		 (hw->bus_speed == e1000_bus_speed_66) ? "66MHz" : "33MHz"),
++		((hw->bus_width == e1000_bus_width_64) ? "64-bit" :
++		 (hw->bus_width == e1000_bus_width_pciex_4) ? "Width x4" :
++		 (hw->bus_width == e1000_bus_width_pciex_1) ? "Width x1" :
++		 "32-bit"));
++	}
++
++	printk(KERN_INFO "e1000: hw ");
++	for (i = 0; i < 6; i++)
++		printk(KERN_CONT "%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
++
++	/* reset the hardware with the new settings */
++	e1000_reset(adapter);
++
++	/* If the controller is 82573 and f/w is AMT, do not set
++	 * DRV_LOAD until the interface is up.  For all other cases,
++	 * let the f/w know that the h/w is now under the control
++	 * of the driver. */
++	if (adapter->hw.mac_type != e1000_82573 ||
++	    !e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	strcpy(netdev->name, "rteth%d");
++	if ((err = rt_register_rtnetdev(netdev)))
++		goto err_register;
++
++	DPRINTK(PROBE, INFO, "Intel(R) PRO/1000 Network Connection\n");
++
++	return 0;
++
++err_register:
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++err_flashmap:
++err_sw_init:
++err_eeprom:
++	iounmap(adapter->hw.hw_addr);
++err_ioremap:
++	rtdev_free(netdev);
++err_alloc_etherdev:
++	pci_release_regions(pdev);
++	return err;
++}
++
++/**
++ * e1000_remove - Device Removal Routine
++ * @pdev: PCI device information struct
++ *
++ * e1000_remove is called by the PCI subsystem to alert the driver
++ * that it should release a PCI device.  The could be caused by a
++ * Hot-Plug event, or because the driver is going to be removed from
++ * memory.
++ **/
++
++static void e1000_remove(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++	uint32_t manc;
++
++	e1000_down_and_stop(adapter);
++
++	if (adapter->hw.mac_type >= e1000_82540 &&
++	   adapter->hw.mac_type != e1000_ich8lan &&
++	   adapter->hw.media_type == e1000_media_type_copper) {
++		manc = E1000_READ_REG(&adapter->hw, MANC);
++		if (manc & E1000_MANC_SMBUS_EN) {
++			manc |= E1000_MANC_ARP_EN;
++			E1000_WRITE_REG(&adapter->hw, MANC, manc);
++		}
++	}
++
++	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant. */
++	e1000_release_hw_control(adapter);
++
++	rt_unregister_rtnetdev(netdev);
++
++	if (!e1000_check_phy_reset_block(&adapter->hw))
++		e1000_phy_hw_reset(&adapter->hw);
++
++	kfree(adapter->tx_ring);
++	kfree(adapter->rx_ring);
++
++
++	iounmap(adapter->hw.hw_addr);
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++	pci_release_regions(pdev);
++
++	rtdev_free(netdev);
++
++	pci_disable_device(pdev);
++}
++
++/**
++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
++ * @adapter: board private structure to initialize
++ *
++ * e1000_sw_init initializes the Adapter private data structure.
++ * Fields are initialized based on PCI device information and
++ * OS network device settings (MTU size).
++ **/
++
++static int e1000_sw_init(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++
++	/* PCI config space info */
++
++	hw->vendor_id = pdev->vendor;
++	hw->device_id = pdev->device;
++	hw->subsystem_vendor_id = pdev->subsystem_vendor;
++	hw->subsystem_id = pdev->subsystem_device;
++
++	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
++
++	pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
++
++	adapter->rx_buffer_len = MAXIMUM_ETHERNET_FRAME_SIZE;
++	adapter->rx_ps_bsize0 = E1000_RXBUFFER_128;
++	hw->max_frame_size = netdev->mtu +
++			     ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
++	hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE;
++
++	/* identify the MAC */
++
++	if (e1000_set_mac_type(hw)) {
++		DPRINTK(PROBE, ERR, "Unknown MAC Type\n");
++		return -EIO;
++	}
++
++	switch (hw->mac_type) {
++	default:
++		break;
++	case e1000_82541:
++	case e1000_82547:
++	case e1000_82541_rev_2:
++	case e1000_82547_rev_2:
++		hw->phy_init_script = 1;
++		break;
++	}
++
++	e1000_set_media_type(hw);
++
++	hw->wait_autoneg_complete = FALSE;
++	hw->tbi_compatibility_en = TRUE;
++	hw->adaptive_ifs = FALSE;
++
++	/* Copper options */
++
++	if (hw->media_type == e1000_media_type_copper) {
++		hw->mdix = AUTO_ALL_MODES;
++		hw->disable_polarity_correction = FALSE;
++		hw->master_slave = E1000_MASTER_SLAVE;
++	}
++
++	adapter->num_tx_queues = 1;
++	adapter->num_rx_queues = 1;
++
++
++	if (e1000_alloc_queues(adapter)) {
++		DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n");
++		return -ENOMEM;
++	}
++
++	atomic_set(&adapter->irq_sem, 1);
++
++	return 0;
++}
++
++/**
++ * e1000_alloc_queues - Allocate memory for all rings
++ * @adapter: board private structure to initialize
++ *
++ * We allocate one ring per queue at run-time since we don't know the
++ * number of queues at compile-time.  The polling_netdev array is
++ * intended for Multiqueue, but should work fine with a single queue.
++ **/
++
++static int e1000_alloc_queues(struct e1000_adapter *adapter)
++{
++	int size;
++
++	size = sizeof(struct e1000_tx_ring) * adapter->num_tx_queues;
++	adapter->tx_ring = kmalloc(size, GFP_KERNEL);
++	if (!adapter->tx_ring)
++		return -ENOMEM;
++	memset(adapter->tx_ring, 0, size);
++
++	size = sizeof(struct e1000_rx_ring) * adapter->num_rx_queues;
++	adapter->rx_ring = kmalloc(size, GFP_KERNEL);
++	if (!adapter->rx_ring) {
++		kfree(adapter->tx_ring);
++		return -ENOMEM;
++	}
++	memset(adapter->rx_ring, 0, size);
++
++
++	return E1000_SUCCESS;
++}
++
++/**
++ * e1000_open - Called when a network interface is made active
++ * @netdev: network interface device structure
++ *
++ * Returns 0 on success, negative value on failure
++ *
++ * The open entry point is called when a network interface is made
++ * active by the system (IFF_UP).  At this point all resources needed
++ * for transmit and receive operations are allocated, the interrupt
++ * handler is registered with the OS, the watchdog timer is started,
++ * and the stack is notified that the interface is ready.
++ **/
++
++static int
++e1000_open(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	int err;
++
++	/* disallow open during test */
++	if (test_bit(__E1000_DRIVER_TESTING, &adapter->flags))
++		return -EBUSY;
++
++	/* allocate transmit descriptors */
++
++	if ((err = e1000_setup_all_tx_resources(adapter)))
++		goto err_setup_tx;
++
++	/* allocate receive descriptors */
++
++	if ((err = e1000_setup_all_rx_resources(adapter)))
++		goto err_setup_rx;
++
++	err = e1000_request_irq(adapter);
++	if (err)
++		goto err_up;
++
++	e1000_power_up_phy(adapter);
++
++	if ((err = e1000_up(adapter)))
++		goto err_up;
++
++	/* If AMT is enabled, let the firmware know that the network
++	 * interface is now open */
++	if (adapter->hw.mac_type == e1000_82573 &&
++	    e1000_check_mng_mode(&adapter->hw))
++		e1000_get_hw_control(adapter);
++
++	/* Wait for the hardware to come up */
++	msleep(3000);
++
++	return E1000_SUCCESS;
++
++err_up:
++	e1000_free_all_rx_resources(adapter);
++err_setup_rx:
++	e1000_free_all_tx_resources(adapter);
++err_setup_tx:
++	e1000_reset(adapter);
++
++	return err;
++}
++
++/**
++ * e1000_close - Disables a network interface
++ * @netdev: network interface device structure
++ *
++ * Returns 0, this is not allowed to fail
++ *
++ * The close entry point is called when an interface is de-activated
++ * by the OS.  The hardware is still under the drivers control, but
++ * needs to be disabled.  A global MAC reset is issued to stop the
++ * hardware, and all transmit and receive resources are freed.
++ **/
++
++static int
++e1000_close(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++
++	WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
++	e1000_down(adapter);
++	e1000_power_down_phy(adapter);
++	e1000_free_irq(adapter);
++
++	e1000_free_all_tx_resources(adapter);
++	e1000_free_all_rx_resources(adapter);
++
++
++	/* If AMT is enabled, let the firmware know that the network
++	 * interface is now closed */
++	if (adapter->hw.mac_type == e1000_82573 &&
++	    e1000_check_mng_mode(&adapter->hw))
++		e1000_release_hw_control(adapter);
++
++	return 0;
++}
++
++/**
++ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary
++ * @adapter: address of board private structure
++ * @start: address of beginning of memory
++ * @len: length of memory
++ **/
++static boolean_t
++e1000_check_64k_bound(struct e1000_adapter *adapter,
++		      void *start, unsigned long len)
++{
++	unsigned long begin = (unsigned long) start;
++	unsigned long end = begin + len;
++
++	/* First rev 82545 and 82546 need to not allow any memory
++	 * write location to cross 64k boundary due to errata 23 */
++	if (adapter->hw.mac_type == e1000_82545 ||
++	    adapter->hw.mac_type == e1000_82546) {
++		return ((begin ^ (end - 1)) >> 16) != 0 ? FALSE : TRUE;
++	}
++
++	return TRUE;
++}
++
++/**
++ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
++ * @adapter: board private structure
++ * @txdr:    tx descriptor ring (for a specific queue) to setup
++ *
++ * Return 0 on success, negative on failure
++ **/
++
++static int
++e1000_setup_tx_resources(struct e1000_adapter *adapter,
++			 struct e1000_tx_ring *txdr)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int size;
++
++	size = sizeof(struct e1000_buffer) * txdr->count;
++	txdr->buffer_info = vmalloc(size);
++	if (!txdr->buffer_info) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the transmit descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(txdr->buffer_info, 0, size);
++
++	/* round up to nearest 4K */
++
++	txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
++	E1000_ROUNDUP(txdr->size, 4096);
++
++	txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
++	if (!txdr->desc) {
++setup_tx_desc_die:
++		vfree(txdr->buffer_info);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the transmit descriptor ring\n");
++		return -ENOMEM;
++	}
++
++	/* Fix for errata 23, can't cross 64kB boundary */
++	if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
++		void *olddesc = txdr->desc;
++		dma_addr_t olddma = txdr->dma;
++		DPRINTK(TX_ERR, ERR, "txdr align check failed: %u bytes "
++				     "at %p\n", txdr->size, txdr->desc);
++		/* Try again, without freeing the previous */
++		txdr->desc = pci_alloc_consistent(pdev, txdr->size, &txdr->dma);
++		/* Failed allocation, critical failure */
++		if (!txdr->desc) {
++			pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++			goto setup_tx_desc_die;
++		}
++
++		if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
++			/* give up */
++			pci_free_consistent(pdev, txdr->size, txdr->desc,
++					    txdr->dma);
++			pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate aligned memory "
++				"for the transmit descriptor ring\n");
++			vfree(txdr->buffer_info);
++			return -ENOMEM;
++		} else {
++			/* Free old allocation, new allocation was successful */
++			pci_free_consistent(pdev, txdr->size, olddesc, olddma);
++		}
++	}
++	memset(txdr->desc, 0, txdr->size);
++
++	txdr->next_to_use = 0;
++	txdr->next_to_clean = 0;
++	rtdm_lock_init(&txdr->tx_lock);
++
++	return 0;
++}
++
++/**
++ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
++ *				  (Descriptors) for all queues
++ * @adapter: board private structure
++ *
++ * If this function returns with an error, then it's possible one or
++ * more of the rings is populated (while the rest are not).  It is the
++ * callers duty to clean those orphaned rings.
++ *
++ * Return 0 on success, negative on failure
++ **/
++
++int
++e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
++{
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_tx_queues; i++) {
++		err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
++		if (err) {
++			DPRINTK(PROBE, ERR,
++				"Allocation for Tx Queue %u failed\n", i);
++			break;
++		}
++	}
++
++	return err;
++}
++
++/**
++ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Tx unit of the MAC after a reset.
++ **/
++
++static void
++e1000_configure_tx(struct e1000_adapter *adapter)
++{
++	uint64_t tdba;
++	struct e1000_hw *hw = &adapter->hw;
++	uint32_t tdlen, tctl, tipg, tarc;
++	uint32_t ipgr1, ipgr2;
++
++	/* Setup the HW Tx Head and Tail descriptor pointers */
++
++	switch (adapter->num_tx_queues) {
++	case 1:
++	default:
++		tdba = adapter->tx_ring[0].dma;
++		tdlen = adapter->tx_ring[0].count *
++			sizeof(struct e1000_tx_desc);
++		E1000_WRITE_REG(hw, TDLEN, tdlen);
++		E1000_WRITE_REG(hw, TDBAH, (tdba >> 32));
++		E1000_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL));
++		E1000_WRITE_REG(hw, TDT, 0);
++		E1000_WRITE_REG(hw, TDH, 0);
++		adapter->tx_ring[0].tdh = E1000_TDH;
++		adapter->tx_ring[0].tdt = E1000_TDT;
++		break;
++	}
++
++	/* Set the default values for the Tx Inter Packet Gap timer */
++
++	if (hw->media_type == e1000_media_type_fiber ||
++	    hw->media_type == e1000_media_type_internal_serdes)
++		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
++	else
++		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
++
++	switch (hw->mac_type) {
++	case e1000_82542_rev2_0:
++	case e1000_82542_rev2_1:
++		tipg = DEFAULT_82542_TIPG_IPGT;
++		ipgr1 = DEFAULT_82542_TIPG_IPGR1;
++		ipgr2 = DEFAULT_82542_TIPG_IPGR2;
++		break;
++	case e1000_80003es2lan:
++		ipgr1 = DEFAULT_82543_TIPG_IPGR1;
++		ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2;
++		break;
++	default:
++		ipgr1 = DEFAULT_82543_TIPG_IPGR1;
++		ipgr2 = DEFAULT_82543_TIPG_IPGR2;
++		break;
++	}
++	tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT;
++	tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT;
++	E1000_WRITE_REG(hw, TIPG, tipg);
++
++	/* Set the Tx Interrupt Delay register */
++
++	E1000_WRITE_REG(hw, TIDV, adapter->tx_int_delay);
++	if (hw->mac_type >= e1000_82540)
++		E1000_WRITE_REG(hw, TADV, adapter->tx_abs_int_delay);
++
++	/* Program the Transmit Control Register */
++
++	tctl = E1000_READ_REG(hw, TCTL);
++
++	tctl &= ~E1000_TCTL_CT;
++	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
++		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
++
++#ifdef DISABLE_MULR
++	/* disable Multiple Reads for debugging */
++	tctl &= ~E1000_TCTL_MULR;
++#endif
++
++	if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572) {
++		tarc = E1000_READ_REG(hw, TARC0);
++		tarc |= ((1 << 25) | (1 << 21));
++		E1000_WRITE_REG(hw, TARC0, tarc);
++		tarc = E1000_READ_REG(hw, TARC1);
++		tarc |= (1 << 25);
++		if (tctl & E1000_TCTL_MULR)
++			tarc &= ~(1 << 28);
++		else
++			tarc |= (1 << 28);
++		E1000_WRITE_REG(hw, TARC1, tarc);
++	} else if (hw->mac_type == e1000_80003es2lan) {
++		tarc = E1000_READ_REG(hw, TARC0);
++		tarc |= 1;
++		if (hw->media_type == e1000_media_type_internal_serdes)
++			tarc |= (1 << 20);
++		E1000_WRITE_REG(hw, TARC0, tarc);
++		tarc = E1000_READ_REG(hw, TARC1);
++		tarc |= 1;
++		E1000_WRITE_REG(hw, TARC1, tarc);
++	}
++
++	e1000_config_collision_dist(hw);
++
++	/* Setup Transmit Descriptor Settings for eop descriptor */
++	adapter->txd_cmd = E1000_TXD_CMD_IDE | E1000_TXD_CMD_EOP |
++		E1000_TXD_CMD_IFCS;
++
++	if (hw->mac_type < e1000_82543)
++		adapter->txd_cmd |= E1000_TXD_CMD_RPS;
++	else
++		adapter->txd_cmd |= E1000_TXD_CMD_RS;
++
++	/* Cache if we're 82544 running in PCI-X because we'll
++	 * need this to apply a workaround later in the send path. */
++	if (hw->mac_type == e1000_82544 &&
++	    hw->bus_type == e1000_bus_type_pcix)
++		adapter->pcix_82544 = 1;
++
++	E1000_WRITE_REG(hw, TCTL, tctl);
++
++}
++
++/**
++ * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
++ * @adapter: board private structure
++ * @rxdr:    rx descriptor ring (for a specific queue) to setup
++ *
++ * Returns 0 on success, negative on failure
++ **/
++
++static int
++e1000_setup_rx_resources(struct e1000_adapter *adapter,
++			 struct e1000_rx_ring *rxdr)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	int size, desc_len;
++
++	size = sizeof(struct e1000_buffer) * rxdr->count;
++	rxdr->buffer_info = vmalloc(size);
++	if (!rxdr->buffer_info) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(rxdr->buffer_info, 0, size);
++
++	size = sizeof(struct e1000_ps_page) * rxdr->count;
++	rxdr->ps_page = kmalloc(size, GFP_KERNEL);
++	if (!rxdr->ps_page) {
++		vfree(rxdr->buffer_info);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(rxdr->ps_page, 0, size);
++
++	size = sizeof(struct e1000_ps_page_dma) * rxdr->count;
++	rxdr->ps_page_dma = kmalloc(size, GFP_KERNEL);
++	if (!rxdr->ps_page_dma) {
++		vfree(rxdr->buffer_info);
++		kfree(rxdr->ps_page);
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++		return -ENOMEM;
++	}
++	memset(rxdr->ps_page_dma, 0, size);
++
++	if (adapter->hw.mac_type <= e1000_82547_rev_2)
++		desc_len = sizeof(struct e1000_rx_desc);
++	else
++		desc_len = sizeof(union e1000_rx_desc_packet_split);
++
++	/* Round up to nearest 4K */
++
++	rxdr->size = rxdr->count * desc_len;
++	E1000_ROUNDUP(rxdr->size, 4096);
++
++	rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
++
++	if (!rxdr->desc) {
++		DPRINTK(PROBE, ERR,
++		"Unable to allocate memory for the receive descriptor ring\n");
++setup_rx_desc_die:
++		vfree(rxdr->buffer_info);
++		kfree(rxdr->ps_page);
++		kfree(rxdr->ps_page_dma);
++		return -ENOMEM;
++	}
++
++	/* Fix for errata 23, can't cross 64kB boundary */
++	if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
++		void *olddesc = rxdr->desc;
++		dma_addr_t olddma = rxdr->dma;
++		DPRINTK(RX_ERR, ERR, "rxdr align check failed: %u bytes "
++				     "at %p\n", rxdr->size, rxdr->desc);
++		/* Try again, without freeing the previous */
++		rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma);
++		/* Failed allocation, critical failure */
++		if (!rxdr->desc) {
++			pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate memory "
++				"for the receive descriptor ring\n");
++			goto setup_rx_desc_die;
++		}
++
++		if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
++			/* give up */
++			pci_free_consistent(pdev, rxdr->size, rxdr->desc,
++					    rxdr->dma);
++			pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++			DPRINTK(PROBE, ERR,
++				"Unable to allocate aligned memory "
++				"for the receive descriptor ring\n");
++			goto setup_rx_desc_die;
++		} else {
++			/* Free old allocation, new allocation was successful */
++			pci_free_consistent(pdev, rxdr->size, olddesc, olddma);
++		}
++	}
++	memset(rxdr->desc, 0, rxdr->size);
++
++	rxdr->next_to_clean = 0;
++	rxdr->next_to_use = 0;
++
++	return 0;
++}
++
++/**
++ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources
++ *				  (Descriptors) for all queues
++ * @adapter: board private structure
++ *
++ * If this function returns with an error, then it's possible one or
++ * more of the rings is populated (while the rest are not).  It is the
++ * callers duty to clean those orphaned rings.
++ *
++ * Return 0 on success, negative on failure
++ **/
++
++int
++e1000_setup_all_rx_resources(struct e1000_adapter *adapter)
++{
++	int i, err = 0;
++
++	for (i = 0; i < adapter->num_rx_queues; i++) {
++		err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]);
++		if (err) {
++			DPRINTK(PROBE, ERR,
++				"Allocation for Rx Queue %u failed\n", i);
++			break;
++		}
++	}
++
++	return err;
++}
++
++/**
++ * e1000_setup_rctl - configure the receive control registers
++ * @adapter: Board private structure
++ **/
++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
++			(((S) & (PAGE_SIZE - 1)) ? 1 : 0))
++static void
++e1000_setup_rctl(struct e1000_adapter *adapter)
++{
++	uint32_t rctl;
++#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT
++	uint32_t pages = 0;
++#endif
++
++	rctl = E1000_READ_REG(&adapter->hw, RCTL);
++
++	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
++
++	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
++		E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
++		(adapter->hw.mc_filter_type << E1000_RCTL_MO_SHIFT);
++
++	/* FIXME: disable the stripping of CRC because it breaks
++	 * BMC firmware connected over SMBUS
++	if (adapter->hw.mac_type > e1000_82543)
++		rctl |= E1000_RCTL_SECRC;
++	*/
++
++	if (adapter->hw.tbi_compatibility_on == 1)
++		rctl |= E1000_RCTL_SBP;
++	else
++		rctl &= ~E1000_RCTL_SBP;
++
++	if (adapter->netdev->mtu <= ETH_DATA_LEN)
++		rctl &= ~E1000_RCTL_LPE;
++	else
++		rctl |= E1000_RCTL_LPE;
++
++	/* Setup buffer sizes */
++	rctl &= ~E1000_RCTL_SZ_4096;
++	rctl |= E1000_RCTL_BSEX;
++	switch (adapter->rx_buffer_len) {
++		case E1000_RXBUFFER_256:
++			rctl |= E1000_RCTL_SZ_256;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_512:
++			rctl |= E1000_RCTL_SZ_512;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_1024:
++			rctl |= E1000_RCTL_SZ_1024;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_2048:
++		default:
++			rctl |= E1000_RCTL_SZ_2048;
++			rctl &= ~E1000_RCTL_BSEX;
++			break;
++		case E1000_RXBUFFER_4096:
++			rctl |= E1000_RCTL_SZ_4096;
++			break;
++		case E1000_RXBUFFER_8192:
++			rctl |= E1000_RCTL_SZ_8192;
++			break;
++		case E1000_RXBUFFER_16384:
++			rctl |= E1000_RCTL_SZ_16384;
++			break;
++	}
++
++	adapter->rx_ps_pages = 0;
++
++	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
++}
++
++/**
++ * e1000_configure_rx - Configure 8254x Receive Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Rx unit of the MAC after a reset.
++ **/
++
++static void
++e1000_configure_rx(struct e1000_adapter *adapter)
++{
++	uint64_t rdba;
++	struct e1000_hw *hw = &adapter->hw;
++	uint32_t rdlen, rctl, rxcsum, ctrl_ext;
++
++	{
++		rdlen = adapter->rx_ring[0].count *
++			sizeof(struct e1000_rx_desc);
++		adapter->clean_rx = NULL; /* unused */
++		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
++	}
++
++	/* disable receives while setting up the descriptors */
++	rctl = E1000_READ_REG(hw, RCTL);
++	E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN);
++
++	/* set the Receive Delay Timer Register */
++	E1000_WRITE_REG(hw, RDTR, adapter->rx_int_delay);
++
++	if (hw->mac_type >= e1000_82540) {
++		E1000_WRITE_REG(hw, RADV, adapter->rx_abs_int_delay);
++		if (adapter->itr > 1)
++			E1000_WRITE_REG(hw, ITR,
++				1000000000 / (adapter->itr * 256));
++	}
++
++	if (hw->mac_type >= e1000_82571) {
++		ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++		/* Reset delay timers after every interrupt */
++		ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR;
++		E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++		E1000_WRITE_REG(hw, IAM, ~0);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/* Setup the HW Rx Head and Tail Descriptor Pointers and
++	 * the Base and Length of the Rx Descriptor Ring */
++	switch (adapter->num_rx_queues) {
++	case 1:
++	default:
++		rdba = adapter->rx_ring[0].dma;
++		E1000_WRITE_REG(hw, RDLEN, rdlen);
++		E1000_WRITE_REG(hw, RDBAH, (rdba >> 32));
++		E1000_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL));
++		E1000_WRITE_REG(hw, RDT, 0);
++		E1000_WRITE_REG(hw, RDH, 0);
++		adapter->rx_ring[0].rdh = E1000_RDH;
++		adapter->rx_ring[0].rdt = E1000_RDT;
++		break;
++	}
++
++	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
++	if (hw->mac_type >= e1000_82543) {
++		rxcsum = E1000_READ_REG(hw, RXCSUM);
++		if (adapter->rx_csum == TRUE) {
++			rxcsum |= E1000_RXCSUM_TUOFL;
++
++		} else {
++			rxcsum &= ~E1000_RXCSUM_TUOFL;
++			/* don't need to clear IPPCSE as it defaults to 0 */
++		}
++		E1000_WRITE_REG(hw, RXCSUM, rxcsum);
++	}
++
++
++	/* Enable Receives */
++	E1000_WRITE_REG(hw, RCTL, rctl);
++}
++
++/**
++ * e1000_free_tx_resources - Free Tx Resources per Queue
++ * @adapter: board private structure
++ * @tx_ring: Tx descriptor ring for a specific queue
++ *
++ * Free all transmit software resources
++ **/
++
++static void
++e1000_free_tx_resources(struct e1000_adapter *adapter,
++			struct e1000_tx_ring *tx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++
++	e1000_clean_tx_ring(adapter, tx_ring);
++
++	vfree(tx_ring->buffer_info);
++	tx_ring->buffer_info = NULL;
++
++	pci_free_consistent(pdev, tx_ring->size, tx_ring->desc, tx_ring->dma);
++
++	tx_ring->desc = NULL;
++}
++
++/**
++ * e1000_free_all_tx_resources - Free Tx Resources for All Queues
++ * @adapter: board private structure
++ *
++ * Free all transmit software resources
++ **/
++
++void
++e1000_free_all_tx_resources(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		e1000_free_tx_resources(adapter, &adapter->tx_ring[i]);
++}
++
++static void
++e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
++			struct e1000_buffer *buffer_info)
++{
++	if (buffer_info->dma) {
++		pci_unmap_page(adapter->pdev,
++				buffer_info->dma,
++				buffer_info->length,
++				PCI_DMA_TODEVICE);
++	}
++	if (buffer_info->skb)
++		kfree_rtskb(buffer_info->skb);
++	memset(buffer_info, 0, sizeof(struct e1000_buffer));
++}
++
++/**
++ * e1000_clean_tx_ring - Free Tx Buffers
++ * @adapter: board private structure
++ * @tx_ring: ring to be cleaned
++ **/
++
++static void
++e1000_clean_tx_ring(struct e1000_adapter *adapter,
++		    struct e1000_tx_ring *tx_ring)
++{
++	struct e1000_buffer *buffer_info;
++	unsigned long size;
++	unsigned int i;
++
++	/* Free all the Tx ring sk_buffs */
++
++	for (i = 0; i < tx_ring->count; i++) {
++		buffer_info = &tx_ring->buffer_info[i];
++		e1000_unmap_and_free_tx_resource(adapter, buffer_info);
++	}
++
++	size = sizeof(struct e1000_buffer) * tx_ring->count;
++	memset(tx_ring->buffer_info, 0, size);
++
++	/* Zero out the descriptor ring */
++
++	memset(tx_ring->desc, 0, tx_ring->size);
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++	tx_ring->last_tx_tso = 0;
++
++	writel(0, adapter->hw.hw_addr + tx_ring->tdh);
++	writel(0, adapter->hw.hw_addr + tx_ring->tdt);
++}
++
++/**
++ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues
++ * @adapter: board private structure
++ **/
++
++static void
++e1000_clean_all_tx_rings(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_tx_queues; i++)
++		e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]);
++}
++
++/**
++ * e1000_free_rx_resources - Free Rx Resources
++ * @adapter: board private structure
++ * @rx_ring: ring to clean the resources from
++ *
++ * Free all receive software resources
++ **/
++
++static void
++e1000_free_rx_resources(struct e1000_adapter *adapter,
++			struct e1000_rx_ring *rx_ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++
++	e1000_clean_rx_ring(adapter, rx_ring);
++
++	vfree(rx_ring->buffer_info);
++	rx_ring->buffer_info = NULL;
++	kfree(rx_ring->ps_page);
++	rx_ring->ps_page = NULL;
++	kfree(rx_ring->ps_page_dma);
++	rx_ring->ps_page_dma = NULL;
++
++	pci_free_consistent(pdev, rx_ring->size, rx_ring->desc, rx_ring->dma);
++
++	rx_ring->desc = NULL;
++}
++
++/**
++ * e1000_free_all_rx_resources - Free Rx Resources for All Queues
++ * @adapter: board private structure
++ *
++ * Free all receive software resources
++ **/
++
++void
++e1000_free_all_rx_resources(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		e1000_free_rx_resources(adapter, &adapter->rx_ring[i]);
++}
++
++/**
++ * e1000_clean_rx_ring - Free Rx Buffers per Queue
++ * @adapter: board private structure
++ * @rx_ring: ring to free buffers from
++ **/
++
++static void
++e1000_clean_rx_ring(struct e1000_adapter *adapter,
++		    struct e1000_rx_ring *rx_ring)
++{
++	struct e1000_buffer *buffer_info;
++	struct pci_dev *pdev = adapter->pdev;
++	unsigned long size;
++	unsigned int i;
++
++	/* Free all the Rx ring sk_buffs */
++	for (i = 0; i < rx_ring->count; i++) {
++		buffer_info = &rx_ring->buffer_info[i];
++		if (buffer_info->skb) {
++			pci_unmap_single(pdev,
++					 buffer_info->dma,
++					 buffer_info->length,
++					 PCI_DMA_FROMDEVICE);
++
++			kfree_rtskb(buffer_info->skb);
++			buffer_info->skb = NULL;
++		}
++	}
++
++	size = sizeof(struct e1000_buffer) * rx_ring->count;
++	memset(rx_ring->buffer_info, 0, size);
++	size = sizeof(struct e1000_ps_page) * rx_ring->count;
++	memset(rx_ring->ps_page, 0, size);
++	size = sizeof(struct e1000_ps_page_dma) * rx_ring->count;
++	memset(rx_ring->ps_page_dma, 0, size);
++
++	/* Zero out the descriptor ring */
++
++	memset(rx_ring->desc, 0, rx_ring->size);
++
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++
++	writel(0, adapter->hw.hw_addr + rx_ring->rdh);
++	writel(0, adapter->hw.hw_addr + rx_ring->rdt);
++}
++
++/**
++ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues
++ * @adapter: board private structure
++ **/
++
++static void
++e1000_clean_all_rx_rings(struct e1000_adapter *adapter)
++{
++	int i;
++
++	for (i = 0; i < adapter->num_rx_queues; i++)
++		e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]);
++}
++
++/* The 82542 2.0 (revision 2) needs to have the receive unit in reset
++ * and memory write and invalidate disabled for certain operations
++ */
++static void
++e1000_enter_82542_rst(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	uint32_t rctl;
++
++	e1000_pci_clear_mwi(&adapter->hw);
++
++	rctl = E1000_READ_REG(&adapter->hw, RCTL);
++	rctl |= E1000_RCTL_RST;
++	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	mdelay(5);
++
++	if (rtnetif_running(netdev))
++		e1000_clean_all_rx_rings(adapter);
++}
++
++static void
++e1000_leave_82542_rst(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	uint32_t rctl;
++
++	rctl = E1000_READ_REG(&adapter->hw, RCTL);
++	rctl &= ~E1000_RCTL_RST;
++	E1000_WRITE_REG(&adapter->hw, RCTL, rctl);
++	E1000_WRITE_FLUSH(&adapter->hw);
++	mdelay(5);
++
++	if (adapter->hw.pci_cmd_word & PCI_COMMAND_INVALIDATE)
++		e1000_pci_set_mwi(&adapter->hw);
++
++	if (rtnetif_running(netdev)) {
++		/* No need to loop, because 82542 supports only 1 queue */
++		struct e1000_rx_ring *ring = &adapter->rx_ring[0];
++		e1000_configure_rx(adapter);
++		adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring));
++	}
++}
++
++/**
++ * e1000_set_multi - Multicast and Promiscuous mode set
++ * @netdev: network interface device structure
++ *
++ * The set_multi entry point is called whenever the multicast address
++ * list or the network interface flags are updated.  This routine is
++ * responsible for configuring the hardware for proper multicast,
++ * promiscuous mode, and all-multi behavior.
++ **/
++
++static void
++e1000_set_multi(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	uint32_t rctl;
++	int i, rar_entries = E1000_RAR_ENTRIES;
++	int mta_reg_count = (hw->mac_type == e1000_ich8lan) ?
++				E1000_NUM_MTA_REGISTERS_ICH8LAN :
++				E1000_NUM_MTA_REGISTERS;
++
++	if (adapter->hw.mac_type == e1000_ich8lan)
++		rar_entries = E1000_RAR_ENTRIES_ICH8LAN;
++
++	/* reserve RAR[14] for LAA over-write work-around */
++	if (adapter->hw.mac_type == e1000_82571)
++		rar_entries--;
++
++	/* Check for Promiscuous and All Multicast modes */
++
++	rctl = E1000_READ_REG(hw, RCTL);
++
++	if (netdev->flags & IFF_PROMISC) {
++		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
++	} else if (netdev->flags & IFF_ALLMULTI) {
++		rctl |= E1000_RCTL_MPE;
++		rctl &= ~E1000_RCTL_UPE;
++	} else {
++		rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
++	}
++
++	E1000_WRITE_REG(hw, RCTL, rctl);
++
++	/* 82542 2.0 needs to be in reset to write receive address registers */
++
++	if (hw->mac_type == e1000_82542_rev2_0)
++		e1000_enter_82542_rst(adapter);
++
++	/* load the first 14 multicast address into the exact filters 1-14
++	 * RAR 0 is used for the station MAC adddress
++	 * if there are not 14 addresses, go ahead and clear the filters
++	 * -- with 82571 controllers only 0-13 entries are filled here
++	 */
++
++	for (i = 1; i < rar_entries; i++) {
++		E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0);
++		E1000_WRITE_FLUSH(hw);
++		E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	/* clear the old settings from the multicast hash table */
++
++	for (i = 0; i < mta_reg_count; i++) {
++		E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
++		E1000_WRITE_FLUSH(hw);
++	}
++
++	if (hw->mac_type == e1000_82542_rev2_0)
++		e1000_leave_82542_rst(adapter);
++}
++
++/**
++ * e1000_update_phy_info_task - get phy info
++ * @work: work struct contained inside adapter struct
++ *
++ * Need to wait a few seconds after link up to get diagnostic information from
++ * the phy
++ */
++static void e1000_update_phy_info_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     phy_info_task.work);
++	e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
++}
++
++/**
++ * e1000_82547_tx_fifo_stall_task - task to complete work
++ * @work: work struct contained inside adapter struct
++ **/
++
++static void e1000_82547_tx_fifo_stall_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     fifo_stall_task.work);
++	struct rtnet_device *netdev = adapter->netdev;
++	uint32_t tctl;
++
++	if (atomic_read(&adapter->tx_fifo_stall)) {
++		if ((E1000_READ_REG(&adapter->hw, TDT) ==
++		    E1000_READ_REG(&adapter->hw, TDH)) &&
++		   (E1000_READ_REG(&adapter->hw, TDFT) ==
++		    E1000_READ_REG(&adapter->hw, TDFH)) &&
++		   (E1000_READ_REG(&adapter->hw, TDFTS) ==
++		    E1000_READ_REG(&adapter->hw, TDFHS))) {
++			tctl = E1000_READ_REG(&adapter->hw, TCTL);
++			E1000_WRITE_REG(&adapter->hw, TCTL,
++					tctl & ~E1000_TCTL_EN);
++			E1000_WRITE_REG(&adapter->hw, TDFT,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, TDFH,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, TDFTS,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, TDFHS,
++					adapter->tx_head_addr);
++			E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
++			E1000_WRITE_FLUSH(&adapter->hw);
++
++			adapter->tx_fifo_head = 0;
++			atomic_set(&adapter->tx_fifo_stall, 0);
++			rtnetif_wake_queue(netdev);
++		} else {
++			schedule_delayed_work(&adapter->fifo_stall_task, 1);
++		}
++	}
++}
++
++/**
++ * e1000_watchdog - work function
++ * @work: work struct contained inside adapter struct
++ **/
++static void e1000_watchdog(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++						     struct e1000_adapter,
++						     watchdog_task.work);
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_tx_ring *txdr = adapter->tx_ring;
++	uint32_t link, tctl;
++	int32_t ret_val;
++
++	ret_val = e1000_check_for_link(&adapter->hw);
++	if ((ret_val == E1000_ERR_PHY) &&
++	    (adapter->hw.phy_type == e1000_phy_igp_3) &&
++	    (E1000_READ_REG(&adapter->hw, CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
++		/* See e1000_kumeran_lock_loss_workaround() */
++		DPRINTK(LINK, INFO,
++			"Gigabit has been disabled, downgrading speed\n");
++	}
++	if (adapter->hw.mac_type == e1000_82573) {
++		e1000_enable_tx_pkt_filtering(&adapter->hw);
++	}
++
++	if ((adapter->hw.media_type == e1000_media_type_internal_serdes) &&
++	   !(E1000_READ_REG(&adapter->hw, TXCW) & E1000_TXCW_ANE))
++		link = !adapter->hw.serdes_link_down;
++	else
++		link = E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_LU;
++
++	if (link) {
++		if (!rtnetif_carrier_ok(netdev)) {
++			boolean_t txb2b = 1;
++			e1000_get_speed_and_duplex(&adapter->hw,
++						   &adapter->link_speed,
++						   &adapter->link_duplex);
++
++			DPRINTK(LINK, INFO, "NIC Link is Up %d Mbps %s\n",
++			       adapter->link_speed,
++			       adapter->link_duplex == FULL_DUPLEX ?
++			       "Full Duplex" : "Half Duplex");
++
++			/* tweak tx_queue_len according to speed/duplex
++			 * and adjust the timeout factor */
++			// TODO makoehre netdev->tx_queue_len = adapter->tx_queue_len;
++			adapter->tx_timeout_factor = 1;
++			switch (adapter->link_speed) {
++			case SPEED_10:
++				txb2b = 0;
++				// TODO makoehre netdev->tx_queue_len = 10;
++				adapter->tx_timeout_factor = 8;
++				break;
++			case SPEED_100:
++				txb2b = 0;
++				// TODO makoehre netdev->tx_queue_len = 100;
++				/* maybe add some timeout factor ? */
++				break;
++			}
++
++			if ((adapter->hw.mac_type == e1000_82571 ||
++			     adapter->hw.mac_type == e1000_82572) &&
++			    txb2b == 0) {
++#define SPEED_MODE_BIT (1 << 21)
++				uint32_t tarc0;
++				tarc0 = E1000_READ_REG(&adapter->hw, TARC0);
++				tarc0 &= ~SPEED_MODE_BIT;
++				E1000_WRITE_REG(&adapter->hw, TARC0, tarc0);
++			}
++
++
++			/* enable transmits in the hardware, need to do this
++			 * after setting TARC0 */
++			tctl = E1000_READ_REG(&adapter->hw, TCTL);
++			tctl |= E1000_TCTL_EN;
++			E1000_WRITE_REG(&adapter->hw, TCTL, tctl);
++
++			rtnetif_carrier_on(netdev);
++			rtnetif_wake_queue(netdev);
++			schedule_delayed_work(&adapter->phy_info_task, 2 * HZ);
++			adapter->smartspeed = 0;
++		}
++	} else {
++		if (rtnetif_carrier_ok(netdev)) {
++			adapter->link_speed = 0;
++			adapter->link_duplex = 0;
++			DPRINTK(LINK, INFO, "NIC Link is Down\n");
++			rtnetif_carrier_off(netdev);
++			rtnetif_stop_queue(netdev);
++			schedule_delayed_work(&adapter->phy_info_task, 2 * HZ);
++
++			/* 80003ES2LAN workaround--
++			 * For packet buffer work-around on link down event;
++			 * disable receives in the ISR and
++			 * reset device here in the watchdog
++			 */
++			if (adapter->hw.mac_type == e1000_80003es2lan)
++				/* reset device */
++				schedule_work(&adapter->reset_task);
++		}
++
++		e1000_smartspeed(adapter);
++	}
++
++
++	adapter->hw.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
++	adapter->tpt_old = adapter->stats.tpt;
++	adapter->hw.collision_delta = adapter->stats.colc - adapter->colc_old;
++	adapter->colc_old = adapter->stats.colc;
++
++	adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old;
++	adapter->gorcl_old = adapter->stats.gorcl;
++	adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old;
++	adapter->gotcl_old = adapter->stats.gotcl;
++
++	// e1000_update_adaptive(&adapter->hw);
++
++	if (!rtnetif_carrier_ok(netdev)) {
++		if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) {
++			/* We've lost link, so the controller stops DMA,
++			 * but we've got queued Tx work that's never going
++			 * to get done, so reset controller to flush Tx.
++			 * (Do the reset outside of interrupt context). */
++			adapter->tx_timeout_count++;
++			schedule_work(&adapter->reset_task);
++		}
++	}
++
++	/* Dynamic mode for Interrupt Throttle Rate (ITR) */
++	if (adapter->hw.mac_type >= e1000_82540 && adapter->itr == 1) {
++		/* Symmetric Tx/Rx gets a reduced ITR=2000; Total
++		 * asymmetrical Tx or Rx gets ITR=8000; everyone
++		 * else is between 2000-8000. */
++		uint32_t goc = (adapter->gotcl + adapter->gorcl) / 10000;
++		uint32_t dif = (adapter->gotcl > adapter->gorcl ?
++			adapter->gotcl - adapter->gorcl :
++			adapter->gorcl - adapter->gotcl) / 10000;
++		uint32_t itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000;
++		E1000_WRITE_REG(&adapter->hw, ITR, 1000000000 / (itr * 256));
++	}
++
++	/* Cause software interrupt to ensure rx ring is cleaned */
++	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_RXDMT0);
++
++	/* Force detection of hung controller every watchdog period */
++	adapter->detect_tx_hung = TRUE;
++
++	/* With 82571 controllers, LAA may be overwritten due to controller
++	 * reset from the other port. Set the appropriate LAA in RAR[0] */
++	if (adapter->hw.mac_type == e1000_82571 && adapter->hw.laa_is_present)
++		e1000_rar_set(&adapter->hw, adapter->hw.mac_addr, 0);
++
++	/* Reschedule the task */
++	schedule_delayed_work(&adapter->watchdog_task, 2 * HZ);
++}
++
++#define E1000_TX_FLAGS_CSUM		0x00000001
++#define E1000_TX_FLAGS_VLAN		0x00000002
++#define E1000_TX_FLAGS_TSO		0x00000004
++#define E1000_TX_FLAGS_IPV4		0x00000008
++#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
++#define E1000_TX_FLAGS_VLAN_SHIFT	16
++
++
++static boolean_t
++e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
++	      struct rtskb *skb)
++{
++	struct e1000_context_desc *context_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i;
++	uint8_t css;
++
++	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
++		css = skb->h.raw - skb->data;
++
++		i = tx_ring->next_to_use;
++		buffer_info = &tx_ring->buffer_info[i];
++		context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
++
++		context_desc->upper_setup.tcp_fields.tucss = css;
++		context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
++		context_desc->upper_setup.tcp_fields.tucse = 0;
++		context_desc->tcp_seg_setup.data = 0;
++		context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
++
++		buffer_info->time_stamp = jiffies;
++
++		if (unlikely(++i == tx_ring->count)) i = 0;
++		tx_ring->next_to_use = i;
++
++		return TRUE;
++	}
++
++	return FALSE;
++}
++
++#define E1000_MAX_TXD_PWR	12
++#define E1000_MAX_DATA_PER_TXD	(1<<E1000_MAX_TXD_PWR)
++
++static int
++e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
++	     struct rtskb *skb, unsigned int first, unsigned int max_per_txd,
++	     unsigned int nr_frags, unsigned int mss)
++{
++	struct e1000_buffer *buffer_info;
++	unsigned int len = skb->len;
++	unsigned int offset = 0, size, count = 0, i;
++
++	i = tx_ring->next_to_use;
++
++	while (len) {
++		buffer_info = &tx_ring->buffer_info[i];
++		size = min(len, max_per_txd);
++		/* work-around for errata 10 and it applies
++		 * to all controllers in PCI-X mode
++		 * The fix is to make sure that the first descriptor of a
++		 * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
++		 */
++		if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) &&
++				(size > 2015) && count == 0))
++			size = 2015;
++
++		/* Workaround for potential 82544 hang in PCI-X.  Avoid
++		 * terminating buffers within evenly-aligned dwords. */
++		if (unlikely(adapter->pcix_82544 &&
++		   !((unsigned long)(skb->data + offset + size - 1) & 4) &&
++		   size > 4))
++			size -= 4;
++
++		buffer_info->length = size;
++		buffer_info->dma =
++			pci_map_single(adapter->pdev,
++				skb->data + offset,
++				size,
++				PCI_DMA_TODEVICE);
++		buffer_info->time_stamp = jiffies;
++
++		len -= size;
++		offset += size;
++		count++;
++		if (unlikely(++i == tx_ring->count)) i = 0;
++	}
++
++
++	i = (i == 0) ? tx_ring->count - 1 : i - 1;
++	tx_ring->buffer_info[i].skb = skb;
++	tx_ring->buffer_info[first].next_to_watch = i;
++
++	return count;
++}
++
++static void
++e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
++	       int tx_flags, int count, nanosecs_abs_t *xmit_stamp)
++{
++	struct e1000_tx_desc *tx_desc = NULL;
++	struct e1000_buffer *buffer_info;
++	uint32_t txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
++	unsigned int i;
++
++
++	if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) {
++		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
++		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
++	}
++
++	i = tx_ring->next_to_use;
++
++	while (count--) {
++		buffer_info = &tx_ring->buffer_info[i];
++		tx_desc = E1000_TX_DESC(*tx_ring, i);
++		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++		tx_desc->lower.data =
++			cpu_to_le32(txd_lower | buffer_info->length);
++		tx_desc->upper.data = cpu_to_le32(txd_upper);
++		if (unlikely(++i == tx_ring->count)) i = 0;
++	}
++
++	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
++
++	if (xmit_stamp)
++		*xmit_stamp = cpu_to_be64(rtdm_clock_read() + *xmit_stamp);
++
++	/* Force memory writes to complete before letting h/w
++	 * know there are new descriptors to fetch.  (Only
++	 * applicable for weak-ordered memory model archs,
++	 * such as IA-64). */
++	wmb();
++
++	tx_ring->next_to_use = i;
++	writel(i, adapter->hw.hw_addr + tx_ring->tdt);
++}
++
++/**
++ * 82547 workaround to avoid controller hang in half-duplex environment.
++ * The workaround is to avoid queuing a large packet that would span
++ * the internal Tx FIFO ring boundary by notifying the stack to resend
++ * the packet at a later time.  This gives the Tx FIFO an opportunity to
++ * flush all packets.  When that occurs, we reset the Tx FIFO pointers
++ * to the beginning of the Tx FIFO.
++ **/
++
++#define E1000_FIFO_HDR			0x10
++#define E1000_82547_PAD_LEN		0x3E0
++
++static int
++e1000_82547_fifo_workaround(struct e1000_adapter *adapter, struct rtskb *skb)
++{
++	uint32_t fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
++	uint32_t skb_fifo_len = skb->len + E1000_FIFO_HDR;
++
++	E1000_ROUNDUP(skb_fifo_len, E1000_FIFO_HDR);
++
++	if (adapter->link_duplex != HALF_DUPLEX)
++		goto no_fifo_stall_required;
++
++	if (atomic_read(&adapter->tx_fifo_stall))
++		return 1;
++
++	if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) {
++		atomic_set(&adapter->tx_fifo_stall, 1);
++		return 1;
++	}
++
++no_fifo_stall_required:
++	adapter->tx_fifo_head += skb_fifo_len;
++	if (adapter->tx_fifo_head >= adapter->tx_fifo_size)
++		adapter->tx_fifo_head -= adapter->tx_fifo_size;
++	return 0;
++}
++
++#define MINIMUM_DHCP_PACKET_SIZE 282
++static int
++e1000_transfer_dhcp_info(struct e1000_adapter *adapter, struct rtskb *skb)
++{
++	struct e1000_hw *hw =  &adapter->hw;
++	uint16_t length, offset;
++	if (skb->len > MINIMUM_DHCP_PACKET_SIZE) {
++		struct ethhdr *eth = (struct ethhdr *) skb->data;
++		if ((htons(ETH_P_IP) == eth->h_proto)) {
++			const struct iphdr *ip =
++				(struct iphdr *)((uint8_t *)skb->data+14);
++			if (IPPROTO_UDP == ip->protocol) {
++				struct udphdr *udp =
++					(struct udphdr *)((uint8_t *)ip +
++						(ip->ihl << 2));
++				if (ntohs(udp->dest) == 67) {
++					offset = (uint8_t *)udp + 8 - skb->data;
++					length = skb->len - offset;
++
++					return e1000_mng_write_dhcp_info(hw,
++							(uint8_t *)udp + 8,
++							length);
++				}
++			}
++		}
++	}
++	return 0;
++}
++
++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
++static int
++e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_tx_ring *tx_ring;
++	unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
++	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
++	unsigned int tx_flags = 0;
++	unsigned int len = skb->len;
++	rtdm_lockctx_t context;
++	unsigned int nr_frags = 0;
++	unsigned int mss = 0;
++	int count = 0;
++
++	/* This goes back to the question of how to logically map a tx queue
++	 * to a flow.  Right now, performance is impacted slightly negatively
++	 * if using multiple tx queues.  If the stack breaks away from a
++	 * single qdisc implementation, we can look at this again. */
++	tx_ring = adapter->tx_ring;
++
++	if (unlikely(skb->len <= 0)) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	if (skb->ip_summed == CHECKSUM_PARTIAL)
++		count++;
++
++
++	count += TXD_USE_COUNT(len, max_txd_pwr);
++
++	if (adapter->pcix_82544)
++		count++;
++
++	/* work-around for errata 10 and it applies to all controllers
++	 * in PCI-X mode, so add one more descriptor to the count
++	 */
++	if (unlikely((adapter->hw.bus_type == e1000_bus_type_pcix) &&
++			(len > 2015)))
++		count++;
++
++
++	if (adapter->hw.tx_pkt_filtering &&
++	    (adapter->hw.mac_type == e1000_82573))
++		e1000_transfer_dhcp_info(adapter, skb);
++
++	rtdm_lock_get_irqsave(&tx_ring->tx_lock, context);
++
++	/* need: count + 2 desc gap to keep tail from touching
++	 * head, otherwise try next time */
++	if (unlikely(E1000_DESC_UNUSED(tx_ring) < count + 2)) {
++		rtnetif_stop_queue(netdev);
++		rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context);
++		rtdm_printk("FATAL: rt_e1000 ran into tail close to head situation!\n");
++		return NETDEV_TX_BUSY;
++	}
++
++	if (unlikely(adapter->hw.mac_type == e1000_82547)) {
++		if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) {
++			rtnetif_stop_queue(netdev);
++			rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context);
++
++			/* FIXME: warn the user earlier, i.e. on startup if
++			   half-duplex is detected! */
++			rtdm_printk("FATAL: rt_e1000 ran into 82547 "
++				    "controller bug!\n");
++			return NETDEV_TX_BUSY;
++		}
++	}
++
++	first = tx_ring->next_to_use;
++
++	if (likely(e1000_tx_csum(adapter, tx_ring, skb)))
++		tx_flags |= E1000_TX_FLAGS_CSUM;
++
++	e1000_tx_queue(adapter, tx_ring, tx_flags,
++		       e1000_tx_map(adapter, tx_ring, skb, first,
++				    max_per_txd, nr_frags, mss),
++		       skb->xmit_stamp);
++
++	rtdm_lock_put_irqrestore(&tx_ring->tx_lock, context);
++
++	return NETDEV_TX_OK;
++}
++
++/**
++ * e1000_intr - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ * @pt_regs: CPU registers structure
++ **/
++
++static int
++e1000_intr(rtdm_irq_t *irq_handle)
++    /* int irq, void *data, struct pt_regs *regs) */
++{
++
++	struct rtnet_device *netdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	uint32_t rctl, icr = E1000_READ_REG(hw, ICR);
++	int i;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++	if (unlikely(!icr)) {
++		return RTDM_IRQ_NONE;  /* Not our interrupt */
++	}
++	if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {
++		hw->get_link_status = 1;
++		/* 80003ES2LAN workaround--
++		 * For packet buffer work-around on link down event;
++		 * disable receives here in the ISR and
++		 * reset adapter in watchdog
++		 */
++		if (rtnetif_carrier_ok(netdev) &&
++		    (adapter->hw.mac_type == e1000_80003es2lan)) {
++			/* disable receives */
++			rctl = E1000_READ_REG(hw, RCTL);
++			E1000_WRITE_REG(hw, RCTL, rctl & ~E1000_RCTL_EN);
++		}
++		/* FIXME: we need to handle this via some yet-to-be-invented
++		   error manager (Linux botton-half and/or kthread)
++		mod_timer(&adapter->watchdog_timer, jiffies);*/
++	}
++
++	/* Writing IMC and IMS is needed for 82547.
++	 * Due to Hub Link bus being occupied, an interrupt
++	 * de-assertion message is not able to be sent.
++	 * When an interrupt assertion message is generated later,
++	 * two messages are re-ordered and sent out.
++	 * That causes APIC to think 82547 is in de-assertion
++	 * state, while 82547 is in assertion state, resulting
++	 * in dead lock. Writing IMC forces 82547 into
++	 * de-assertion state.
++	 */
++	if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2) {
++		atomic_inc(&adapter->irq_sem);
++		E1000_WRITE_REG(hw, IMC, ~0);
++	}
++
++	adapter->data_received = 0;
++
++	for (i = 0; i < E1000_MAX_INTR; i++)
++		if (unlikely(!e1000_clean_rx_irq(adapter, adapter->rx_ring,
++						 &time_stamp) &
++		   !e1000_clean_tx_irq(adapter, adapter->tx_ring)))
++			break;
++
++	if (hw->mac_type == e1000_82547 || hw->mac_type == e1000_82547_rev_2)
++		e1000_irq_enable(adapter);
++
++
++	if (adapter->data_received)
++		rt_mark_stack_mgr(netdev);
++	return RTDM_IRQ_HANDLED;
++}
++
++/**
++ * e1000_clean_tx_irq - Reclaim resources after transmit completes
++ * @adapter: board private structure
++ **/
++
++static boolean_t
++e1000_clean_tx_irq(struct e1000_adapter *adapter,
++		   struct e1000_tx_ring *tx_ring)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_tx_desc *tx_desc, *eop_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i, eop;
++	boolean_t cleaned = FALSE;
++
++	i = tx_ring->next_to_clean;
++	eop = tx_ring->buffer_info[i].next_to_watch;
++	eop_desc = E1000_TX_DESC(*tx_ring, eop);
++
++	while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
++		for (cleaned = FALSE; !cleaned; ) {
++			tx_desc = E1000_TX_DESC(*tx_ring, i);
++			buffer_info = &tx_ring->buffer_info[i];
++			cleaned = (i == eop);
++
++			e1000_unmap_and_free_tx_resource(adapter, buffer_info);
++			memset(tx_desc, 0, sizeof(struct e1000_tx_desc));
++
++			if (unlikely(++i == tx_ring->count)) i = 0;
++		}
++
++
++		eop = tx_ring->buffer_info[i].next_to_watch;
++		eop_desc = E1000_TX_DESC(*tx_ring, eop);
++	}
++
++	tx_ring->next_to_clean = i;
++
++#define TX_WAKE_THRESHOLD 32
++	if (unlikely(cleaned && rtnetif_queue_stopped(netdev) &&
++		     rtnetif_carrier_ok(netdev))) {
++		rtdm_lock_get(&tx_ring->tx_lock);
++		if (rtnetif_queue_stopped(netdev) &&
++		    (E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))
++			rtnetif_wake_queue(netdev);
++		rtdm_lock_put(&tx_ring->tx_lock);
++	}
++
++	if (adapter->detect_tx_hung) {
++		/* Detect a transmit hang in hardware, this serializes the
++		 * check with the clearing of time_stamp and movement of i */
++		adapter->detect_tx_hung = FALSE;
++		if (tx_ring->buffer_info[eop].dma &&
++		    time_after(jiffies, tx_ring->buffer_info[eop].time_stamp +
++			       (adapter->tx_timeout_factor * HZ))
++		    && !(E1000_READ_REG(&adapter->hw, STATUS) &
++			 E1000_STATUS_TXOFF)) {
++
++			/* detected Tx unit hang */
++			DPRINTK(DRV, ERR, "Detected Tx Unit Hang\n"
++					"  Tx Queue             <%lu>\n"
++					"  TDH                  <%x>\n"
++					"  TDT                  <%x>\n"
++					"  next_to_use          <%x>\n"
++					"  next_to_clean        <%x>\n"
++					"buffer_info[next_to_clean]\n"
++					"  time_stamp           <%lx>\n"
++					"  next_to_watch        <%x>\n"
++					"  jiffies              <%lx>\n"
++					"  next_to_watch.status <%x>\n",
++				(unsigned long)((tx_ring - adapter->tx_ring) /
++					sizeof(struct e1000_tx_ring)),
++				readl(adapter->hw.hw_addr + tx_ring->tdh),
++				readl(adapter->hw.hw_addr + tx_ring->tdt),
++				tx_ring->next_to_use,
++				tx_ring->next_to_clean,
++				tx_ring->buffer_info[eop].time_stamp,
++				eop,
++				jiffies,
++				eop_desc->upper.fields.status);
++			rtnetif_stop_queue(netdev);
++		}
++	}
++	return cleaned;
++}
++
++/**
++ * e1000_rx_checksum - Receive Checksum Offload for 82543
++ * @adapter:     board private structure
++ * @status_err:  receive descriptor status and error fields
++ * @csum:        receive descriptor csum field
++ * @sk_buff:     socket buffer with received data
++ **/
++
++static void
++e1000_rx_checksum(struct e1000_adapter *adapter,
++		  uint32_t status_err, uint32_t csum,
++		  struct rtskb *skb)
++{
++	uint16_t status = (uint16_t)status_err;
++	uint8_t errors = (uint8_t)(status_err >> 24);
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* 82543 or newer only */
++	if (unlikely(adapter->hw.mac_type < e1000_82543)) return;
++	/* Ignore Checksum bit is set */
++	if (unlikely(status & E1000_RXD_STAT_IXSM)) return;
++	/* TCP/UDP checksum error bit is set */
++	if (unlikely(errors & E1000_RXD_ERR_TCPE)) {
++		/* let the stack verify checksum errors */
++		adapter->hw_csum_err++;
++		return;
++	}
++	/* TCP/UDP Checksum has not been calculated */
++	if (adapter->hw.mac_type <= e1000_82547_rev_2) {
++		if (!(status & E1000_RXD_STAT_TCPCS))
++			return;
++	} else {
++		if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
++			return;
++	}
++	/* It must be a TCP or UDP packet with a valid checksum */
++	if (likely(status & E1000_RXD_STAT_TCPCS)) {
++		/* TCP checksum is good */
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	} else if (adapter->hw.mac_type > e1000_82547_rev_2) {
++		/* IP fragment with UDP payload */
++		/* Hardware complements the payload checksum, so we undo it
++		 * and then put the value in host order for further stack use.
++		 */
++		csum = ntohl(csum ^ 0xFFFF);
++		skb->csum = csum;
++		skb->ip_summed = CHECKSUM_PARTIAL;
++	}
++	adapter->hw_csum_good++;
++}
++
++/**
++ * e1000_clean_rx_irq - Send received data up the network stack; legacy
++ * @adapter: board private structure
++ **/
++
++static boolean_t
++e1000_clean_rx_irq(struct e1000_adapter *adapter,
++		   struct e1000_rx_ring *rx_ring,
++		   nanosecs_abs_t *time_stamp)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc, *next_rxd;
++	struct e1000_buffer *buffer_info, *next_buffer;
++	uint32_t length;
++	uint8_t last_byte;
++	unsigned int i;
++	int cleaned_count = 0;
++	boolean_t cleaned = FALSE;
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC(*rx_ring, i);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (rx_desc->status & E1000_RXD_STAT_DD) {
++		struct rtskb *skb, *next_skb;
++		u8 status;
++
++		status = rx_desc->status;
++		skb = buffer_info->skb;
++		buffer_info->skb = NULL;
++
++		prefetch(skb->data - NET_IP_ALIGN);
++
++		if (++i == rx_ring->count) i = 0;
++		next_rxd = E1000_RX_DESC(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++		next_skb = next_buffer->skb;
++		prefetch(next_skb->data - NET_IP_ALIGN);
++
++		cleaned = TRUE;
++		cleaned_count++;
++		pci_unmap_single(pdev,
++				 buffer_info->dma,
++				 buffer_info->length,
++				 PCI_DMA_FROMDEVICE);
++
++		length = le16_to_cpu(rx_desc->length);
++
++		if (unlikely(!(status & E1000_RXD_STAT_EOP))) {
++			/* All receives must fit into a single buffer */
++			E1000_DBG("%s: Receive packet consumed multiple"
++				  " buffers\n", netdev->name);
++			/* recycle */
++			buffer_info->skb = skb;
++			goto next_desc;
++		}
++
++		if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
++			last_byte = *(skb->data + length - 1);
++			if (TBI_ACCEPT(&adapter->hw, status,
++				      rx_desc->errors, length, last_byte)) {
++				length--;
++			} else {
++				/* recycle */
++				buffer_info->skb = skb;
++				goto next_desc;
++			}
++		}
++
++		/* code added for copybreak, this should improve
++		 * performance for small packets with large amounts
++		 * of reassembly being done in the stack */
++		rtskb_put(skb, length);
++
++		/* end copybreak code */
++
++		/* Receive Checksum Offload */
++		e1000_rx_checksum(adapter,
++				  (uint32_t)(status) |
++				  ((uint32_t)(rx_desc->errors) << 24),
++				  le16_to_cpu(rx_desc->csum), skb);
++
++		skb->protocol = rt_eth_type_trans(skb, netdev);
++		skb->time_stamp = *time_stamp;
++		rtnetif_rx(skb);
++		adapter->data_received = 1; // Set flag for the main interrupt routine
++
++next_desc:
++		rx_desc->status = 0;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
++			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = E1000_DESC_UNUSED(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
++
++	return cleaned;
++}
++
++/**
++ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
++ * @adapter: address of board private structure
++ **/
++
++static void
++e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
++		       struct e1000_rx_ring *rx_ring,
++		       int cleaned_count)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc;
++	struct e1000_buffer *buffer_info;
++	struct rtskb *skb;
++	unsigned int i;
++	unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
++
++	i = rx_ring->next_to_use;
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (cleaned_count--) {
++		if (!(skb = buffer_info->skb))
++			skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++		else {
++			rtskb_trim(skb, 0);
++			goto map_skb;
++		}
++
++		if (unlikely(!skb)) {
++			/* Better luck next round */
++			adapter->alloc_rx_buff_failed++;
++			break;
++		}
++
++		/* Fix for errata 23, can't cross 64kB boundary */
++		if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++			struct rtskb *oldskb = skb;
++			DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes "
++					     "at %p\n", bufsz, skb->data);
++			/* Try again, without freeing the previous */
++			skb = rtnetdev_alloc_rtskb(netdev, bufsz);
++			/* Failed allocation, critical failure */
++			if (!skb) {
++				kfree_rtskb(oldskb);
++				break;
++			}
++
++			if (!e1000_check_64k_bound(adapter, skb->data, bufsz)) {
++				/* give up */
++				kfree_rtskb(skb);
++				kfree_rtskb(oldskb);
++				break; /* while !buffer_info->skb */
++			} else {
++				/* Use new allocation */
++				kfree_rtskb(oldskb);
++			}
++		}
++		/* Make buffer alignment 2 beyond a 16 byte boundary
++		 * this will result in a 16 byte aligned IP header after
++		 * the 14 byte MAC header is removed
++		 */
++		rtskb_reserve(skb, NET_IP_ALIGN);
++
++		buffer_info->skb = skb;
++		buffer_info->length = adapter->rx_buffer_len;
++map_skb:
++		buffer_info->dma = pci_map_single(pdev,
++						  skb->data,
++						  adapter->rx_buffer_len,
++						  PCI_DMA_FROMDEVICE);
++
++		/* Fix for errata 23, can't cross 64kB boundary */
++		if (!e1000_check_64k_bound(adapter,
++					(void *)(unsigned long)buffer_info->dma,
++					adapter->rx_buffer_len)) {
++			DPRINTK(RX_ERR, ERR,
++				"dma align check failed: %u bytes at %p\n",
++				adapter->rx_buffer_len,
++				(void *)(unsigned long)buffer_info->dma);
++			kfree_rtskb(skb);
++			buffer_info->skb = NULL;
++
++			pci_unmap_single(pdev, buffer_info->dma,
++					 adapter->rx_buffer_len,
++					 PCI_DMA_FROMDEVICE);
++
++			break; /* while !buffer_info->skb */
++		}
++		rx_desc = E1000_RX_DESC(*rx_ring, i);
++		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++
++		if (unlikely(++i == rx_ring->count))
++			i = 0;
++		buffer_info = &rx_ring->buffer_info[i];
++	}
++
++	if (likely(rx_ring->next_to_use != i)) {
++		rx_ring->next_to_use = i;
++		if (unlikely(i-- == 0))
++			i = (rx_ring->count - 1);
++
++		/* Force memory writes to complete before letting h/w
++		 * know there are new descriptors to fetch.  (Only
++		 * applicable for weak-ordered memory model archs,
++		 * such as IA-64). */
++		wmb();
++		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
++	}
++}
++
++
++/**
++ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
++ * @adapter:
++ **/
++
++static void
++e1000_smartspeed(struct e1000_adapter *adapter)
++{
++	uint16_t phy_status;
++	uint16_t phy_ctrl;
++
++	if ((adapter->hw.phy_type != e1000_phy_igp) || !adapter->hw.autoneg ||
++	   !(adapter->hw.autoneg_advertised & ADVERTISE_1000_FULL))
++		return;
++
++	if (adapter->smartspeed == 0) {
++		/* If Master/Slave config fault is asserted twice,
++		 * we assume back-to-back */
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status);
++		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_status);
++		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT)) return;
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl);
++		if (phy_ctrl & CR_1000T_MS_ENABLE) {
++			phy_ctrl &= ~CR_1000T_MS_ENABLE;
++			e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL,
++					    phy_ctrl);
++			adapter->smartspeed++;
++			if (!e1000_phy_setup_autoneg(&adapter->hw) &&
++			   !e1000_read_phy_reg(&adapter->hw, PHY_CTRL,
++					       &phy_ctrl)) {
++				phy_ctrl |= (MII_CR_AUTO_NEG_EN |
++					     MII_CR_RESTART_AUTO_NEG);
++				e1000_write_phy_reg(&adapter->hw, PHY_CTRL,
++						    phy_ctrl);
++			}
++		}
++		return;
++	} else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) {
++		/* If still no link, perhaps using 2/3 pair cable */
++		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_ctrl);
++		phy_ctrl |= CR_1000T_MS_ENABLE;
++		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_ctrl);
++		if (!e1000_phy_setup_autoneg(&adapter->hw) &&
++		   !e1000_read_phy_reg(&adapter->hw, PHY_CTRL, &phy_ctrl)) {
++			phy_ctrl |= (MII_CR_AUTO_NEG_EN |
++				     MII_CR_RESTART_AUTO_NEG);
++			e1000_write_phy_reg(&adapter->hw, PHY_CTRL, phy_ctrl);
++		}
++	}
++	/* Restart process after E1000_SMARTSPEED_MAX iterations */
++	if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX)
++		adapter->smartspeed = 0;
++}
++
++
++
++void
++e1000_pci_set_mwi(struct e1000_hw *hw)
++{
++	struct e1000_adapter *adapter = hw->back;
++#ifdef HAVE_PCI_SET_MWI
++	int ret_val = pci_set_mwi(adapter->pdev);
++
++	if (ret_val)
++		DPRINTK(PROBE, ERR, "Error in setting MWI\n");
++#else
++	pci_write_config_word(adapter->pdev, PCI_COMMAND,
++			      adapter->hw.pci_cmd_word |
++			      PCI_COMMAND_INVALIDATE);
++#endif
++}
++
++void
++e1000_pci_clear_mwi(struct e1000_hw *hw)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++#ifdef HAVE_PCI_SET_MWI
++	pci_clear_mwi(adapter->pdev);
++#else
++	pci_write_config_word(adapter->pdev, PCI_COMMAND,
++			      adapter->hw.pci_cmd_word &
++			      ~PCI_COMMAND_INVALIDATE);
++#endif
++}
++
++void
++e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++	pci_read_config_word(adapter->pdev, reg, value);
++}
++
++void
++e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
++{
++	struct e1000_adapter *adapter = hw->back;
++
++	pci_write_config_word(adapter->pdev, reg, *value);
++}
++
++uint32_t
++e1000_io_read(struct e1000_hw *hw, unsigned long port)
++{
++	return inl(port);
++}
++
++void
++e1000_io_write(struct e1000_hw *hw, unsigned long port, uint32_t value)
++{
++	outl(value, port);
++}
++
++
++int
++e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx)
++{
++	adapter->hw.autoneg = 0;
++
++	/* Fiber NICs only allow 1000 gbps Full duplex */
++	if ((adapter->hw.media_type == e1000_media_type_fiber) &&
++		spddplx != (SPEED_1000 + DUPLEX_FULL)) {
++		DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n");
++		return -EINVAL;
++	}
++
++	switch (spddplx) {
++	case SPEED_10 + DUPLEX_HALF:
++		adapter->hw.forced_speed_duplex = e1000_10_half;
++		break;
++	case SPEED_10 + DUPLEX_FULL:
++		adapter->hw.forced_speed_duplex = e1000_10_full;
++		break;
++	case SPEED_100 + DUPLEX_HALF:
++		adapter->hw.forced_speed_duplex = e1000_100_half;
++		break;
++	case SPEED_100 + DUPLEX_FULL:
++		adapter->hw.forced_speed_duplex = e1000_100_full;
++		break;
++	case SPEED_1000 + DUPLEX_FULL:
++		adapter->hw.autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	case SPEED_1000 + DUPLEX_HALF: /* not supported */
++	default:
++		DPRINTK(PROBE, ERR, "Unsupported Speed/Duplex configuration\n");
++		return -EINVAL;
++	}
++	return 0;
++}
+--- linux/drivers/xenomai/net/drivers/e1000/e1000.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000.h	2021-04-07 16:01:27.437633866 +0800
+@@ -0,0 +1,391 @@
++/*******************************************************************************
++
++
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms of the GNU General Public License as published by the Free
++  Software Foundation; either version 2 of the License, or (at your option)
++  any later version.
++
++  This program is distributed in the hope that it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++
++/* Linux PRO/1000 Ethernet Driver main header file */
++
++#ifndef _E1000_H_
++#define _E1000_H_
++
++#include <linux/stddef.h>
++#include <linux/module.h>
++#include <linux/types.h>
++#include <asm/byteorder.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/pci.h>
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/delay.h>
++#include <linux/timer.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/interrupt.h>
++#include <linux/string.h>
++#include <linux/pagemap.h>
++#include <linux/bitops.h>
++#include <asm/io.h>
++#include <asm/irq.h>
++#include <linux/capability.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <net/pkt_sched.h>
++#include <linux/list.h>
++#include <linux/reboot.h>
++#ifdef NETIF_F_ISO
++#undef NETIF_F_ISO
++#endif
++
++#ifdef NETIF_F_TSO
++#include <net/checksum.h>
++#endif
++#ifdef SIOCGMIIPHY
++#include <linux/mii.h>
++#endif
++#ifdef SIOCETHTOOL
++#include <linux/ethtool.h>
++#endif
++
++#ifdef NETIF_F_HW_VLAN_TX
++#undef NETIF_F_HW_VLAN_TX
++#endif
++
++#ifdef NETIF_F_HW_VLAN_TX
++#include <linux/if_vlan.h>
++#endif
++
++// RTNET
++#include <rtnet_port.h>
++
++
++#define BAR_0		0
++#define BAR_1		1
++#define BAR_5		5
++
++#include "kcompat.h"
++#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
++	PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
++
++struct e1000_adapter;
++
++#include "e1000_hw.h"
++
++#ifdef DBG
++#define E1000_DBG(args...) printk(KERN_DEBUG "e1000: " args)
++#else
++#define E1000_DBG(args...)
++#endif
++
++#define E1000_ERR(args...) printk(KERN_ERR "e1000: " args)
++
++#define PFX "e1000: "
++#define DPRINTK(nlevel, klevel, fmt, args...) \
++	(void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
++	printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
++		__FUNCTION__ , ## args))
++
++#define E1000_MAX_INTR 10
++
++/* TX/RX descriptor defines */
++#define E1000_DEFAULT_TXD                  256
++#define E1000_MAX_TXD                      256
++#define E1000_MIN_TXD                       80
++#define E1000_MAX_82544_TXD               4096
++
++#define E1000_DEFAULT_RXD                  256
++#define E1000_MAX_RXD                      256
++#define E1000_MIN_RXD                       80
++#define E1000_MAX_82544_RXD               4096
++
++/* Supported Rx Buffer Sizes */
++#define E1000_RXBUFFER_128   128    /* Used for packet split */
++#define E1000_RXBUFFER_256   256    /* Used for packet split */
++#define E1000_RXBUFFER_512   512
++#define E1000_RXBUFFER_1024  1024
++#define E1000_RXBUFFER_2048  2048
++#define E1000_RXBUFFER_4096  4096
++#define E1000_RXBUFFER_8192  8192
++#define E1000_RXBUFFER_16384 16384
++
++/* SmartSpeed delimiters */
++#define E1000_SMARTSPEED_DOWNSHIFT 3
++#define E1000_SMARTSPEED_MAX       15
++
++/* Packet Buffer allocations */
++#define E1000_PBA_BYTES_SHIFT 0xA
++#define E1000_TX_HEAD_ADDR_SHIFT 7
++#define E1000_PBA_TX_MASK 0xFFFF0000
++
++/* Flow Control Watermarks */
++#define E1000_FC_HIGH_DIFF 0x1638  /* High: 5688 bytes below Rx FIFO size */
++#define E1000_FC_LOW_DIFF 0x1640   /* Low:  5696 bytes below Rx FIFO size */
++
++#define E1000_FC_PAUSE_TIME 0x0680 /* 858 usec */
++
++/* How many Tx Descriptors do we need to call netif_wake_queue ? */
++#define E1000_TX_QUEUE_WAKE	16
++/* How many Rx Buffers do we bundle into one write to the hardware ? */
++#define E1000_RX_BUFFER_WRITE	16	/* Must be power of 2 */
++
++#define AUTO_ALL_MODES            0
++#define E1000_EEPROM_82544_APM    0x0004
++#define E1000_EEPROM_ICH8_APME    0x0004
++#define E1000_EEPROM_APME         0x0400
++
++#ifndef E1000_MASTER_SLAVE
++/* Switch to override PHY master/slave setting */
++#define E1000_MASTER_SLAVE	e1000_ms_hw_default
++#endif
++
++#ifdef NETIF_F_HW_VLAN_TX
++#define E1000_MNG_VLAN_NONE -1
++#endif
++/* Number of packet split data buffers (not including the header buffer) */
++#define PS_PAGE_BUFFERS MAX_PS_BUFFERS-1
++
++/* only works for sizes that are powers of 2 */
++#define E1000_ROUNDUP(i, size) ((i) = (((i) + (size) - 1) & ~((size) - 1)))
++
++/* wrapper around a pointer to a socket buffer,
++ * so a DMA handle can be stored along with the buffer */
++struct e1000_buffer {
++	struct rtskb *skb;
++	dma_addr_t dma;
++	unsigned long time_stamp;
++	uint16_t length;
++	uint16_t next_to_watch;
++};
++
++
++struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; };
++struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; };
++
++struct e1000_tx_ring {
++	/* pointer to the descriptor ring memory */
++	void *desc;
++	/* physical address of the descriptor ring */
++	dma_addr_t dma;
++	/* length of descriptor ring in bytes */
++	unsigned int size;
++	/* number of descriptors in the ring */
++	unsigned int count;
++	/* next descriptor to associate a buffer with */
++	unsigned int next_to_use;
++	/* next descriptor to check for DD status bit */
++	unsigned int next_to_clean;
++	/* array of buffer information structs */
++	struct e1000_buffer *buffer_info;
++
++	rtdm_lock_t tx_lock;
++	uint16_t tdh;
++	uint16_t tdt;
++	boolean_t last_tx_tso;
++};
++
++struct e1000_rx_ring {
++	/* pointer to the descriptor ring memory */
++	void *desc;
++	/* physical address of the descriptor ring */
++	dma_addr_t dma;
++	/* length of descriptor ring in bytes */
++	unsigned int size;
++	/* number of descriptors in the ring */
++	unsigned int count;
++	/* next descriptor to associate a buffer with */
++	unsigned int next_to_use;
++	/* next descriptor to check for DD status bit */
++	unsigned int next_to_clean;
++	/* array of buffer information structs */
++	struct e1000_buffer *buffer_info;
++	/* arrays of page information for packet split */
++	struct e1000_ps_page *ps_page;
++	struct e1000_ps_page_dma *ps_page_dma;
++
++	/* cpu for rx queue */
++	int cpu;
++
++	uint16_t rdh;
++	uint16_t rdt;
++};
++
++#define E1000_DESC_UNUSED(R) \
++	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
++	(R)->next_to_clean - (R)->next_to_use - 1)
++
++#define E1000_RX_DESC_PS(R, i)	    \
++	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
++#define E1000_RX_DESC_EXT(R, i)	    \
++	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
++#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
++#define E1000_RX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_rx_desc)
++#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
++#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
++
++/* board specific private data structure */
++
++struct e1000_adapter {
++#ifdef NETIF_F_HW_VLAN_TX
++	struct vlan_group *vlgrp;
++	uint16_t mng_vlan_id;
++#endif
++	uint32_t bd_number;
++	uint32_t rx_buffer_len;
++	uint32_t part_num;
++	uint32_t wol;
++	uint32_t ksp3_port_a;
++	uint32_t smartspeed;
++	uint32_t en_mng_pt;
++	uint16_t link_speed;
++	uint16_t link_duplex;
++#ifdef CONFIG_E1000_NAPI
++	spinlock_t tx_queue_lock;
++#endif
++	atomic_t irq_sem;
++	struct work_struct reset_task;
++	uint8_t fc_autoneg;
++
++#ifdef ETHTOOL_PHYS_ID
++	struct timer_list blink_timer;
++	unsigned long led_status;
++#endif
++
++	/* TX */
++	struct e1000_tx_ring *tx_ring;      /* One per active queue */
++	unsigned long tx_queue_len;
++	uint32_t txd_cmd;
++	uint32_t tx_int_delay;
++	uint32_t tx_abs_int_delay;
++	uint32_t gotcl;
++	uint64_t gotcl_old;
++	uint64_t tpt_old;
++	uint64_t colc_old;
++	uint32_t tx_timeout_count;
++	uint32_t tx_fifo_head;
++	uint32_t tx_head_addr;
++	uint32_t tx_fifo_size;
++	uint8_t  tx_timeout_factor;
++	atomic_t tx_fifo_stall;
++	boolean_t pcix_82544;
++	boolean_t detect_tx_hung;
++
++	/* RX */
++#ifdef CONFIG_E1000_NAPI
++	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
++			       struct e1000_rx_ring *rx_ring,
++			       int *work_done, int work_to_do);
++#else
++	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
++			       struct e1000_rx_ring *rx_ring);
++#endif
++	void (*alloc_rx_buf) (struct e1000_adapter *adapter,
++			      struct e1000_rx_ring *rx_ring,
++				int cleaned_count);
++	struct e1000_rx_ring *rx_ring;      /* One per active queue */
++#ifdef CONFIG_E1000_NAPI
++	struct net_device *polling_netdev;  /* One per active queue */
++#endif
++	int num_tx_queues;
++	int num_rx_queues;
++
++	uint64_t hw_csum_err;
++	uint64_t hw_csum_good;
++	uint64_t rx_hdr_split;
++	uint32_t alloc_rx_buff_failed;
++	uint32_t rx_int_delay;
++	uint32_t rx_abs_int_delay;
++	boolean_t rx_csum;
++	unsigned int rx_ps_pages;
++	uint32_t gorcl;
++	uint64_t gorcl_old;
++	uint16_t rx_ps_bsize0;
++
++	/* Interrupt Throttle Rate */
++	uint32_t itr;
++
++	/* OS defined structs */
++	struct rtnet_device *netdev;
++	struct pci_dev *pdev;
++	struct net_device_stats net_stats;
++
++	rtdm_irq_t irq_handle;
++	boolean_t data_received;
++
++	/* structs defined in e1000_hw.h */
++	struct e1000_hw hw;
++	struct e1000_hw_stats stats;
++	struct e1000_phy_info phy_info;
++	struct e1000_phy_stats phy_stats;
++
++#ifdef ETHTOOL_TEST
++	uint32_t test_icr;
++	struct e1000_tx_ring test_tx_ring;
++	struct e1000_rx_ring test_rx_ring;
++#endif
++
++#ifdef E1000_COUNT_ICR
++	uint64_t icr_txdw;
++	uint64_t icr_txqe;
++	uint64_t icr_lsc;
++	uint64_t icr_rxseq;
++	uint64_t icr_rxdmt;
++	uint64_t icr_rxo;
++	uint64_t icr_rxt;
++	uint64_t icr_mdac;
++	uint64_t icr_rxcfg;
++	uint64_t icr_gpi;
++#endif
++
++	uint32_t *config_space;
++	int msg_enable;
++#ifdef CONFIG_PCI_MSI
++	boolean_t have_msi;
++#endif
++	/* to not mess up cache alignment, always add to the bottom */
++#ifdef NETIF_F_TSO
++	boolean_t tso_force;
++#endif
++	boolean_t smart_power_down;	/* phy smart power down */
++	unsigned long flags;
++
++	struct delayed_work watchdog_task;
++	struct delayed_work fifo_stall_task;
++	struct delayed_work phy_info_task;
++};
++
++enum e1000_state_t {
++	__E1000_DRIVER_TESTING,
++	__E1000_RESETTING,
++};
++#endif /* _E1000_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000/e1000_hw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_hw.h	2021-04-07 16:01:27.433633872 +0800
+@@ -0,0 +1,3454 @@
++/*******************************************************************************
++
++  
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++  
++  This program is free software; you can redistribute it and/or modify it 
++  under the terms of the GNU General Public License as published by the Free 
++  Software Foundation; either version 2 of the License, or (at your option) 
++  any later version.
++  
++  This program is distributed in the hope that it will be useful, but WITHOUT 
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
++  more details.
++  
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59 
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++  
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++  
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_hw.h
++ * Structures, enums, and macros for the MAC
++ */
++
++#ifndef _E1000_HW_H_
++#define _E1000_HW_H_
++
++#include "e1000_osdep.h"
++
++
++/* Forward declarations of structures used by the shared code */
++struct e1000_hw;
++struct e1000_hw_stats;
++
++/* Enumerated types specific to the e1000 hardware */
++/* Media Access Controlers */
++typedef enum {
++    e1000_undefined = 0,
++    e1000_82542_rev2_0,
++    e1000_82542_rev2_1,
++    e1000_82543,
++    e1000_82544,
++    e1000_82540,
++    e1000_82545,
++    e1000_82545_rev_3,
++    e1000_82546,
++    e1000_82546_rev_3,
++    e1000_82541,
++    e1000_82541_rev_2,
++    e1000_82547,
++    e1000_82547_rev_2,
++    e1000_82571,
++    e1000_82572,
++    e1000_82573,
++    e1000_80003es2lan,
++    e1000_ich8lan,
++    e1000_num_macs
++} e1000_mac_type;
++
++typedef enum {
++    e1000_eeprom_uninitialized = 0,
++    e1000_eeprom_spi,
++    e1000_eeprom_microwire,
++    e1000_eeprom_flash,
++    e1000_eeprom_ich8,
++    e1000_eeprom_none, /* No NVM support */
++    e1000_num_eeprom_types
++} e1000_eeprom_type;
++
++/* Media Types */
++typedef enum {
++    e1000_media_type_copper = 0,
++    e1000_media_type_fiber = 1,
++    e1000_media_type_internal_serdes = 2,
++    e1000_num_media_types
++} e1000_media_type;
++
++typedef enum {
++    e1000_10_half = 0,
++    e1000_10_full = 1,
++    e1000_100_half = 2,
++    e1000_100_full = 3
++} e1000_speed_duplex_type;
++
++/* Flow Control Settings */
++typedef enum {
++    e1000_fc_none = 0,
++    e1000_fc_rx_pause = 1,
++    e1000_fc_tx_pause = 2,
++    e1000_fc_full = 3,
++    e1000_fc_default = 0xFF
++} e1000_fc_type;
++
++struct e1000_shadow_ram {
++    uint16_t    eeprom_word;
++    boolean_t   modified;
++};
++
++/* PCI bus types */
++typedef enum {
++    e1000_bus_type_unknown = 0,
++    e1000_bus_type_pci,
++    e1000_bus_type_pcix,
++    e1000_bus_type_pci_express,
++    e1000_bus_type_reserved
++} e1000_bus_type;
++
++/* PCI bus speeds */
++typedef enum {
++    e1000_bus_speed_unknown = 0,
++    e1000_bus_speed_33,
++    e1000_bus_speed_66,
++    e1000_bus_speed_100,
++    e1000_bus_speed_120,
++    e1000_bus_speed_133,
++    e1000_bus_speed_2500,
++    e1000_bus_speed_reserved
++} e1000_bus_speed;
++
++/* PCI bus widths */
++typedef enum {
++    e1000_bus_width_unknown = 0,
++    e1000_bus_width_32,
++    e1000_bus_width_64,
++    e1000_bus_width_pciex_1,
++    e1000_bus_width_pciex_2,
++    e1000_bus_width_pciex_4,
++    e1000_bus_width_reserved
++} e1000_bus_width;
++
++/* PHY status info structure and supporting enums */
++typedef enum {
++    e1000_cable_length_50 = 0,
++    e1000_cable_length_50_80,
++    e1000_cable_length_80_110,
++    e1000_cable_length_110_140,
++    e1000_cable_length_140,
++    e1000_cable_length_undefined = 0xFF
++} e1000_cable_length;
++
++typedef enum {
++    e1000_gg_cable_length_60 = 0,
++    e1000_gg_cable_length_60_115 = 1,
++    e1000_gg_cable_length_115_150 = 2,
++    e1000_gg_cable_length_150 = 4
++} e1000_gg_cable_length;
++
++typedef enum {
++    e1000_igp_cable_length_10  = 10,
++    e1000_igp_cable_length_20  = 20,
++    e1000_igp_cable_length_30  = 30,
++    e1000_igp_cable_length_40  = 40,
++    e1000_igp_cable_length_50  = 50,
++    e1000_igp_cable_length_60  = 60,
++    e1000_igp_cable_length_70  = 70,
++    e1000_igp_cable_length_80  = 80,
++    e1000_igp_cable_length_90  = 90,
++    e1000_igp_cable_length_100 = 100,
++    e1000_igp_cable_length_110 = 110,
++    e1000_igp_cable_length_115 = 115,
++    e1000_igp_cable_length_120 = 120,
++    e1000_igp_cable_length_130 = 130,
++    e1000_igp_cable_length_140 = 140,
++    e1000_igp_cable_length_150 = 150,
++    e1000_igp_cable_length_160 = 160,
++    e1000_igp_cable_length_170 = 170,
++    e1000_igp_cable_length_180 = 180
++} e1000_igp_cable_length;
++
++typedef enum {
++    e1000_10bt_ext_dist_enable_normal = 0,
++    e1000_10bt_ext_dist_enable_lower,
++    e1000_10bt_ext_dist_enable_undefined = 0xFF
++} e1000_10bt_ext_dist_enable;
++
++typedef enum {
++    e1000_rev_polarity_normal = 0,
++    e1000_rev_polarity_reversed,
++    e1000_rev_polarity_undefined = 0xFF
++} e1000_rev_polarity;
++
++typedef enum {
++    e1000_downshift_normal = 0,
++    e1000_downshift_activated,
++    e1000_downshift_undefined = 0xFF
++} e1000_downshift;
++
++typedef enum {
++    e1000_smart_speed_default = 0,
++    e1000_smart_speed_on,
++    e1000_smart_speed_off
++} e1000_smart_speed;
++
++typedef enum {
++    e1000_polarity_reversal_enabled = 0,
++    e1000_polarity_reversal_disabled,
++    e1000_polarity_reversal_undefined = 0xFF
++} e1000_polarity_reversal;
++
++typedef enum {
++    e1000_auto_x_mode_manual_mdi = 0,
++    e1000_auto_x_mode_manual_mdix,
++    e1000_auto_x_mode_auto1,
++    e1000_auto_x_mode_auto2,
++    e1000_auto_x_mode_undefined = 0xFF
++} e1000_auto_x_mode;
++
++typedef enum {
++    e1000_1000t_rx_status_not_ok = 0,
++    e1000_1000t_rx_status_ok,
++    e1000_1000t_rx_status_undefined = 0xFF
++} e1000_1000t_rx_status;
++
++typedef enum {
++    e1000_phy_m88 = 0,
++    e1000_phy_igp,
++    e1000_phy_igp_2,
++    e1000_phy_gg82563,
++    e1000_phy_igp_3,
++    e1000_phy_ife,
++    e1000_phy_undefined = 0xFF
++} e1000_phy_type;
++
++typedef enum {
++    e1000_ms_hw_default = 0,
++    e1000_ms_force_master,
++    e1000_ms_force_slave,
++    e1000_ms_auto
++} e1000_ms_type;
++
++typedef enum {
++    e1000_ffe_config_enabled = 0,
++    e1000_ffe_config_active,
++    e1000_ffe_config_blocked
++} e1000_ffe_config;
++
++typedef enum {
++    e1000_dsp_config_disabled = 0,
++    e1000_dsp_config_enabled,
++    e1000_dsp_config_activated,
++    e1000_dsp_config_undefined = 0xFF
++} e1000_dsp_config;
++
++struct e1000_phy_info {
++    e1000_cable_length cable_length;
++    e1000_10bt_ext_dist_enable extended_10bt_distance;
++    e1000_rev_polarity cable_polarity;
++    e1000_downshift downshift;
++    e1000_polarity_reversal polarity_correction;
++    e1000_auto_x_mode mdix_mode;
++    e1000_1000t_rx_status local_rx;
++    e1000_1000t_rx_status remote_rx;
++};
++
++struct e1000_phy_stats {
++    uint32_t idle_errors;
++    uint32_t receive_errors;
++};
++
++struct e1000_eeprom_info {
++    e1000_eeprom_type type;
++    uint16_t word_size;
++    uint16_t opcode_bits;
++    uint16_t address_bits;
++    uint16_t delay_usec;
++    uint16_t page_size;
++    boolean_t use_eerd;
++    boolean_t use_eewr;
++};
++
++/* Flex ASF Information */
++#define E1000_HOST_IF_MAX_SIZE  2048
++
++typedef enum {
++    e1000_byte_align = 0,
++    e1000_word_align = 1,
++    e1000_dword_align = 2
++} e1000_align_type;
++
++
++
++/* Error Codes */
++#define E1000_SUCCESS      0
++#define E1000_ERR_EEPROM   1
++#define E1000_ERR_PHY      2
++#define E1000_ERR_CONFIG   3
++#define E1000_ERR_PARAM    4
++#define E1000_ERR_MAC_TYPE 5
++#define E1000_ERR_PHY_TYPE 6
++#define E1000_ERR_RESET   9
++#define E1000_ERR_MASTER_REQUESTS_PENDING 10
++#define E1000_ERR_HOST_INTERFACE_COMMAND 11
++#define E1000_BLK_PHY_RESET   12
++#define E1000_ERR_SWFW_SYNC 13
++
++/* Function prototypes */
++/* Initialization */
++int32_t e1000_reset_hw(struct e1000_hw *hw);
++int32_t e1000_init_hw(struct e1000_hw *hw);
++int32_t e1000_id_led_init(struct e1000_hw * hw);
++int32_t e1000_set_mac_type(struct e1000_hw *hw);
++void e1000_set_media_type(struct e1000_hw *hw);
++
++/* Link Configuration */
++int32_t e1000_setup_link(struct e1000_hw *hw);
++int32_t e1000_phy_setup_autoneg(struct e1000_hw *hw);
++void e1000_config_collision_dist(struct e1000_hw *hw);
++int32_t e1000_config_fc_after_link_up(struct e1000_hw *hw);
++int32_t e1000_check_for_link(struct e1000_hw *hw);
++int32_t e1000_get_speed_and_duplex(struct e1000_hw *hw, uint16_t * speed, uint16_t * duplex);
++int32_t e1000_wait_autoneg(struct e1000_hw *hw);
++int32_t e1000_force_mac_fc(struct e1000_hw *hw);
++
++/* PHY */
++int32_t e1000_read_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *phy_data);
++int32_t e1000_write_phy_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data);
++int32_t e1000_phy_hw_reset(struct e1000_hw *hw);
++int32_t e1000_phy_reset(struct e1000_hw *hw);
++void e1000_phy_powerdown_workaround(struct e1000_hw *hw);
++int32_t e1000_kumeran_lock_loss_workaround(struct e1000_hw *hw);
++int32_t e1000_duplex_reversal(struct e1000_hw *hw);
++int32_t e1000_init_lcd_from_nvm_config_region(struct e1000_hw *hw, uint32_t cnf_base_addr, uint32_t cnf_size);
++int32_t e1000_init_lcd_from_nvm(struct e1000_hw *hw);
++int32_t e1000_detect_gig_phy(struct e1000_hw *hw);
++int32_t e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
++int32_t e1000_phy_m88_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
++int32_t e1000_phy_igp_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
++int32_t e1000_get_cable_length(struct e1000_hw *hw, uint16_t *min_length, uint16_t *max_length);
++int32_t e1000_check_polarity(struct e1000_hw *hw, uint16_t *polarity);
++int32_t e1000_check_downshift(struct e1000_hw *hw);
++int32_t e1000_validate_mdi_setting(struct e1000_hw *hw);
++int32_t e1000_read_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t *data);
++int32_t e1000_write_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, uint16_t data);
++
++/* EEPROM Functions */
++int32_t e1000_init_eeprom_params(struct e1000_hw *hw);
++boolean_t e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw);
++int32_t e1000_read_eeprom_eerd(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data);
++int32_t e1000_write_eeprom_eewr(struct e1000_hw *hw, uint16_t offset, uint16_t words, uint16_t *data);
++int32_t e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd);
++
++/* MNG HOST IF functions */
++uint32_t e1000_enable_mng_pass_thru(struct e1000_hw *hw);
++
++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD   64
++#define E1000_HI_MAX_MNG_DATA_LENGTH    0x6F8   /* Host Interface data length */
++
++#define E1000_MNG_DHCP_COMMAND_TIMEOUT  10      /* Time in ms to process MNG command */
++#define E1000_MNG_DHCP_COOKIE_OFFSET    0x6F0   /* Cookie offset */
++#define E1000_MNG_DHCP_COOKIE_LENGTH    0x10    /* Cookie length */
++#define E1000_MNG_IAMT_MODE             0x3
++#define E1000_MNG_ICH_IAMT_MODE         0x2
++#define E1000_IAMT_SIGNATURE            0x544D4149 /* Intel(R) Active Management Technology signature */
++
++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT 0x1 /* DHCP parsing enabled */
++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT    0x2 /* DHCP parsing enabled */
++#define E1000_VFTA_ENTRY_SHIFT                       0x5
++#define E1000_VFTA_ENTRY_MASK                        0x7F
++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK              0x1F
++
++struct e1000_host_mng_command_header {
++    uint8_t command_id;
++    uint8_t checksum;
++    uint16_t reserved1;
++    uint16_t reserved2;
++    uint16_t command_length;
++};
++
++struct e1000_host_mng_command_info {
++    struct e1000_host_mng_command_header command_header;  /* Command Head/Command Result Head has 4 bytes */
++    uint8_t command_data[E1000_HI_MAX_MNG_DATA_LENGTH];   /* Command data can length 0..0x658*/
++};
++#ifdef E1000_BIG_ENDIAN
++struct e1000_host_mng_dhcp_cookie{
++    uint32_t signature;
++    uint16_t vlan_id;
++    uint8_t reserved0;
++    uint8_t status;
++    uint32_t reserved1;
++    uint8_t checksum;
++    uint8_t reserved3;
++    uint16_t reserved2;
++};
++#else
++struct e1000_host_mng_dhcp_cookie{
++    uint32_t signature;
++    uint8_t status;
++    uint8_t reserved0;
++    uint16_t vlan_id;
++    uint32_t reserved1;
++    uint16_t reserved2;
++    uint8_t reserved3;
++    uint8_t checksum;
++};
++#endif
++
++int32_t e1000_mng_write_dhcp_info(struct e1000_hw *hw, uint8_t *buffer,
++                                  uint16_t length);
++boolean_t e1000_check_mng_mode(struct e1000_hw *hw);
++boolean_t e1000_enable_tx_pkt_filtering(struct e1000_hw *hw);
++int32_t e1000_mng_enable_host_if(struct e1000_hw *hw);
++int32_t e1000_mng_host_if_write(struct e1000_hw *hw, uint8_t *buffer,
++                            uint16_t length, uint16_t offset, uint8_t *sum);
++int32_t e1000_mng_write_cmd_header(struct e1000_hw* hw,
++                                   struct e1000_host_mng_command_header* hdr);
++
++int32_t e1000_mng_write_commit(struct e1000_hw *hw);
++
++int32_t e1000_read_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data);
++int32_t e1000_validate_eeprom_checksum(struct e1000_hw *hw);
++int32_t e1000_update_eeprom_checksum(struct e1000_hw *hw);
++int32_t e1000_write_eeprom(struct e1000_hw *hw, uint16_t reg, uint16_t words, uint16_t *data);
++int32_t e1000_read_part_num(struct e1000_hw *hw, uint32_t * part_num);
++int32_t e1000_read_mac_addr(struct e1000_hw * hw);
++int32_t e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask);
++void e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask);
++void e1000_release_software_flag(struct e1000_hw *hw);
++int32_t e1000_get_software_flag(struct e1000_hw *hw);
++
++/* Filters (multicast, vlan, receive) */
++void e1000_init_rx_addrs(struct e1000_hw *hw);
++void e1000_mc_addr_list_update(struct e1000_hw *hw, uint8_t * mc_addr_list, uint32_t mc_addr_count, uint32_t pad, uint32_t rar_used_count);
++uint32_t e1000_hash_mc_addr(struct e1000_hw *hw, uint8_t * mc_addr);
++void e1000_mta_set(struct e1000_hw *hw, uint32_t hash_value);
++void e1000_rar_set(struct e1000_hw *hw, uint8_t * mc_addr, uint32_t rar_index);
++void e1000_write_vfta(struct e1000_hw *hw, uint32_t offset, uint32_t value);
++void e1000_clear_vfta(struct e1000_hw *hw);
++
++/* LED functions */
++int32_t e1000_setup_led(struct e1000_hw *hw);
++int32_t e1000_cleanup_led(struct e1000_hw *hw);
++int32_t e1000_led_on(struct e1000_hw *hw);
++int32_t e1000_led_off(struct e1000_hw *hw);
++int32_t e1000_blink_led_start(struct e1000_hw *hw);
++
++/* Adaptive IFS Functions */
++
++/* Everything else */
++void e1000_clear_hw_cntrs(struct e1000_hw *hw);
++void e1000_reset_adaptive(struct e1000_hw *hw);
++void e1000_update_adaptive(struct e1000_hw *hw);
++void e1000_tbi_adjust_stats(struct e1000_hw *hw, struct e1000_hw_stats *stats, uint32_t frame_len, uint8_t * mac_addr);
++void e1000_get_bus_info(struct e1000_hw *hw);
++void e1000_pci_set_mwi(struct e1000_hw *hw);
++void e1000_pci_clear_mwi(struct e1000_hw *hw);
++void e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
++void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t * value);
++/* Port I/O is only supported on 82544 and newer */
++uint32_t e1000_io_read(struct e1000_hw *hw, unsigned long port);
++uint32_t e1000_read_reg_io(struct e1000_hw *hw, uint32_t offset);
++void e1000_io_write(struct e1000_hw *hw, unsigned long port, uint32_t value);
++void e1000_write_reg_io(struct e1000_hw *hw, uint32_t offset, uint32_t value);
++int32_t e1000_config_dsp_after_link_change(struct e1000_hw *hw, boolean_t link_up);
++int32_t e1000_set_d3_lplu_state(struct e1000_hw *hw, boolean_t active);
++int32_t e1000_set_d0_lplu_state(struct e1000_hw *hw, boolean_t active);
++void e1000_set_pci_express_master_disable(struct e1000_hw *hw);
++void e1000_enable_pciex_master(struct e1000_hw *hw);
++int32_t e1000_disable_pciex_master(struct e1000_hw *hw);
++int32_t e1000_get_auto_rd_done(struct e1000_hw *hw);
++int32_t e1000_get_phy_cfg_done(struct e1000_hw *hw);
++int32_t e1000_get_software_semaphore(struct e1000_hw *hw);
++void e1000_release_software_semaphore(struct e1000_hw *hw);
++int32_t e1000_check_phy_reset_block(struct e1000_hw *hw);
++int32_t e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw);
++void e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw);
++int32_t e1000_commit_shadow_ram(struct e1000_hw *hw);
++uint8_t e1000_arc_subsystem_valid(struct e1000_hw *hw);
++int32_t e1000_set_pci_ex_no_snoop(struct e1000_hw *hw, uint32_t no_snoop);
++
++int32_t e1000_read_ich8_byte(struct e1000_hw *hw, uint32_t index,
++                             uint8_t *data);
++int32_t e1000_verify_write_ich8_byte(struct e1000_hw *hw, uint32_t index,
++                                     uint8_t byte);
++int32_t e1000_write_ich8_byte(struct e1000_hw *hw, uint32_t index,
++                              uint8_t byte);
++int32_t e1000_read_ich8_word(struct e1000_hw *hw, uint32_t index,
++                             uint16_t *data);
++int32_t e1000_write_ich8_word(struct e1000_hw *hw, uint32_t index,
++                              uint16_t word);
++int32_t e1000_read_ich8_data(struct e1000_hw *hw, uint32_t index,
++                             uint32_t size, uint16_t *data);
++int32_t e1000_write_ich8_data(struct e1000_hw *hw, uint32_t index,
++                              uint32_t size, uint16_t data);
++int32_t e1000_read_eeprom_ich8(struct e1000_hw *hw, uint16_t offset,
++                               uint16_t words, uint16_t *data);
++int32_t e1000_write_eeprom_ich8(struct e1000_hw *hw, uint16_t offset,
++                                uint16_t words, uint16_t *data);
++int32_t e1000_erase_ich8_4k_segment(struct e1000_hw *hw, uint32_t segment);
++int32_t e1000_ich8_cycle_init(struct e1000_hw *hw);
++int32_t e1000_ich8_flash_cycle(struct e1000_hw *hw, uint32_t timeout);
++int32_t e1000_phy_ife_get_info(struct e1000_hw *hw,
++                               struct e1000_phy_info *phy_info);
++int32_t e1000_ife_disable_dynamic_power_down(struct e1000_hw *hw);
++int32_t e1000_ife_enable_dynamic_power_down(struct e1000_hw *hw);
++
++#define E1000_READ_REG_IO(a, reg) \
++    e1000_read_reg_io((a), E1000_##reg)
++#define E1000_WRITE_REG_IO(a, reg, val) \
++    e1000_write_reg_io((a), E1000_##reg, val)
++
++/* PCI Device IDs */
++#define E1000_DEV_ID_82542               0x1000
++#define E1000_DEV_ID_82543GC_FIBER       0x1001
++#define E1000_DEV_ID_82543GC_COPPER      0x1004
++#define E1000_DEV_ID_82544EI_COPPER      0x1008
++#define E1000_DEV_ID_82544EI_FIBER       0x1009
++#define E1000_DEV_ID_82544GC_COPPER      0x100C
++#define E1000_DEV_ID_82544GC_LOM         0x100D
++#define E1000_DEV_ID_82540EM             0x100E
++#define E1000_DEV_ID_82540EM_LOM         0x1015
++#define E1000_DEV_ID_82540EP_LOM         0x1016
++#define E1000_DEV_ID_82540EP             0x1017
++#define E1000_DEV_ID_82540EP_LP          0x101E
++#define E1000_DEV_ID_82545EM_COPPER      0x100F
++#define E1000_DEV_ID_82545EM_FIBER       0x1011
++#define E1000_DEV_ID_82545GM_COPPER      0x1026
++#define E1000_DEV_ID_82545GM_FIBER       0x1027
++#define E1000_DEV_ID_82545GM_SERDES      0x1028
++#define E1000_DEV_ID_82546EB_COPPER      0x1010
++#define E1000_DEV_ID_82546EB_FIBER       0x1012
++#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D
++#define E1000_DEV_ID_82541EI             0x1013
++#define E1000_DEV_ID_82541EI_MOBILE      0x1018
++#define E1000_DEV_ID_82541ER_LOM         0x1014
++#define E1000_DEV_ID_82541ER             0x1078
++#define E1000_DEV_ID_82547GI             0x1075
++#define E1000_DEV_ID_82541GI             0x1076
++#define E1000_DEV_ID_82541GI_MOBILE      0x1077
++#define E1000_DEV_ID_82541GI_LF          0x107C
++#define E1000_DEV_ID_82546GB_COPPER      0x1079
++#define E1000_DEV_ID_82546GB_FIBER       0x107A
++#define E1000_DEV_ID_82546GB_SERDES      0x107B
++#define E1000_DEV_ID_82546GB_PCIE        0x108A
++#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
++#define E1000_DEV_ID_82547EI             0x1019
++#define E1000_DEV_ID_82547EI_MOBILE      0x101A
++#define E1000_DEV_ID_82571EB_COPPER      0x105E
++#define E1000_DEV_ID_82571EB_FIBER       0x105F
++#define E1000_DEV_ID_82571EB_SERDES      0x1060
++#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4
++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE  0x10BC
++#define E1000_DEV_ID_82572EI_COPPER      0x107D
++#define E1000_DEV_ID_82572EI_FIBER       0x107E
++#define E1000_DEV_ID_82572EI_SERDES      0x107F
++#define E1000_DEV_ID_82572EI             0x10B9
++#define E1000_DEV_ID_82573E              0x108B
++#define E1000_DEV_ID_82573E_IAMT         0x108C
++#define E1000_DEV_ID_82573L              0x109A
++#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT     0x1096
++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT     0x1098
++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT     0x10BA
++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT     0x10BB
++
++#define E1000_DEV_ID_ICH8_IGP_M_AMT      0x1049
++#define E1000_DEV_ID_ICH8_IGP_AMT        0x104A
++#define E1000_DEV_ID_ICH8_IGP_C          0x104B
++#define E1000_DEV_ID_ICH8_IFE            0x104C
++#define E1000_DEV_ID_ICH8_IFE_GT         0x10C4
++#define E1000_DEV_ID_ICH8_IFE_G          0x10C5
++#define E1000_DEV_ID_ICH8_IGP_M          0x104D
++
++
++#define NODE_ADDRESS_SIZE 6
++#define ETH_LENGTH_OF_ADDRESS 6
++
++/* MAC decode size is 128K - This is the size of BAR0 */
++#define MAC_DECODE_SIZE (128 * 1024)
++
++#define E1000_82542_2_0_REV_ID 2
++#define E1000_82542_2_1_REV_ID 3
++#define E1000_REVISION_0       0
++#define E1000_REVISION_1       1
++#define E1000_REVISION_2       2
++#define E1000_REVISION_3       3
++
++#define SPEED_10    10
++#define SPEED_100   100
++#define SPEED_1000  1000
++#define HALF_DUPLEX 1
++#define FULL_DUPLEX 2
++
++/* The sizes (in bytes) of a ethernet packet */
++#define ENET_HEADER_SIZE             14
++#define MAXIMUM_ETHERNET_FRAME_SIZE  1518 /* With FCS */
++#define MINIMUM_ETHERNET_FRAME_SIZE  64   /* With FCS */
++#define ETHERNET_FCS_SIZE            4
++#define MAXIMUM_ETHERNET_PACKET_SIZE \
++    (MAXIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
++#define MINIMUM_ETHERNET_PACKET_SIZE \
++    (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
++#define CRC_LENGTH                   ETHERNET_FCS_SIZE
++#define MAX_JUMBO_FRAME_SIZE         0x3F00
++
++
++/* 802.1q VLAN Packet Sizes */
++#define VLAN_TAG_SIZE  4     /* 802.3ac tag (not DMAed) */
++
++/* Ethertype field values */
++#define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
++#define ETHERNET_IP_TYPE        0x0800  /* IP packets */
++#define ETHERNET_ARP_TYPE       0x0806  /* Address Resolution Protocol (ARP) */
++
++/* Packet Header defines */
++#define IP_PROTOCOL_TCP    6
++#define IP_PROTOCOL_UDP    0x11
++
++/* This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ */
++#define POLL_IMS_ENABLE_MASK ( \
++    E1000_IMS_RXDMT0 |         \
++    E1000_IMS_RXSEQ)
++
++/* This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXT0   = Receiver Timer Interrupt (ring 0)
++ *   o TXDW   = Transmit Descriptor Written Back
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ *   o LSC    = Link Status Change
++ */
++#define IMS_ENABLE_MASK ( \
++    E1000_IMS_RXT0   |    \
++    E1000_IMS_TXDW   |    \
++    E1000_IMS_RXDMT0 |    \
++    E1000_IMS_RXSEQ  |    \
++    E1000_IMS_LSC)
++
++/* Additional interrupts need to be handled for e1000_ich8lan:
++    DSW = The FW changed the status of the DISSW bit in FWSM
++    PHYINT = The LAN connected device generates an interrupt
++    EPRST = Manageability reset event */
++#define IMS_ICH8LAN_ENABLE_MASK (\
++    E1000_IMS_DSW   | \
++    E1000_IMS_PHYINT | \
++    E1000_IMS_EPRST)
++
++/* Number of high/low register pairs in the RAR. The RAR (Receive Address
++ * Registers) holds the directed and multicast addresses that we monitor. We
++ * reserve one of these spots for our directed address, allowing us room for
++ * E1000_RAR_ENTRIES - 1 multicast addresses.
++ */
++#define E1000_RAR_ENTRIES 15
++#define E1000_RAR_ENTRIES_ICH8LAN  7
++
++#define MIN_NUMBER_OF_DESCRIPTORS 8
++#define MAX_NUMBER_OF_DESCRIPTORS 0xFFF8
++
++/* Receive Descriptor */
++struct e1000_rx_desc {
++    uint64_t buffer_addr; /* Address of the descriptor's data buffer */
++    uint16_t length;     /* Length of data DMAed into data buffer */
++    uint16_t csum;       /* Packet checksum */
++    uint8_t status;      /* Descriptor status */
++    uint8_t errors;      /* Descriptor Errors */
++    uint16_t special;
++};
++
++/* Receive Descriptor - Extended */
++union e1000_rx_desc_extended {
++    struct {
++        uint64_t buffer_addr;
++        uint64_t reserved;
++    } read;
++    struct {
++        struct {
++            uint32_t mrq;              /* Multiple Rx Queues */
++            union {
++                uint32_t rss;          /* RSS Hash */
++                struct {
++                    uint16_t ip_id;    /* IP id */
++                    uint16_t csum;     /* Packet Checksum */
++                } csum_ip;
++            } hi_dword;
++        } lower;
++        struct {
++            uint32_t status_error;     /* ext status/error */
++            uint16_t length;
++            uint16_t vlan;             /* VLAN tag */
++        } upper;
++    } wb;  /* writeback */
++};
++
++#define MAX_PS_BUFFERS 4
++/* Receive Descriptor - Packet Split */
++union e1000_rx_desc_packet_split {
++    struct {
++        /* one buffer for protocol header(s), three data buffers */
++        uint64_t buffer_addr[MAX_PS_BUFFERS];
++    } read;
++    struct {
++        struct {
++            uint32_t mrq;              /* Multiple Rx Queues */
++            union {
++                uint32_t rss;          /* RSS Hash */
++                struct {
++                    uint16_t ip_id;    /* IP id */
++                    uint16_t csum;     /* Packet Checksum */
++                } csum_ip;
++            } hi_dword;
++        } lower;
++        struct {
++            uint32_t status_error;     /* ext status/error */
++            uint16_t length0;          /* length of buffer 0 */
++            uint16_t vlan;             /* VLAN tag */
++        } middle;
++        struct {
++            uint16_t header_status;
++            uint16_t length[3];        /* length of buffers 1-3 */
++        } upper;
++        uint64_t reserved;
++    } wb; /* writeback */
++};
++
++/* Receive Decriptor bit definitions */
++#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
++#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
++#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
++#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
++#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum caculated */
++#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
++#define E1000_RXD_STAT_IPCS     0x40    /* IP xsum calculated */
++#define E1000_RXD_STAT_PIF      0x80    /* passed in-exact filter */
++#define E1000_RXD_STAT_IPIDV    0x200   /* IP identification valid */
++#define E1000_RXD_STAT_UDPV     0x400   /* Valid UDP checksum */
++#define E1000_RXD_STAT_ACK      0x8000  /* ACK Packet indication */
++#define E1000_RXD_ERR_CE        0x01    /* CRC Error */
++#define E1000_RXD_ERR_SE        0x02    /* Symbol Error */
++#define E1000_RXD_ERR_SEQ       0x04    /* Sequence Error */
++#define E1000_RXD_ERR_CXE       0x10    /* Carrier Extension Error */
++#define E1000_RXD_ERR_TCPE      0x20    /* TCP/UDP Checksum Error */
++#define E1000_RXD_ERR_IPE       0x40    /* IP Checksum Error */
++#define E1000_RXD_ERR_RXE       0x80    /* Rx Data Error */
++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF  /* VLAN ID is in lower 12 bits */
++#define E1000_RXD_SPC_PRI_MASK  0xE000  /* Priority is in upper 3 bits */
++#define E1000_RXD_SPC_PRI_SHIFT 13
++#define E1000_RXD_SPC_CFI_MASK  0x1000  /* CFI is bit 12 */
++#define E1000_RXD_SPC_CFI_SHIFT 12
++
++#define E1000_RXDEXT_STATERR_CE    0x01000000
++#define E1000_RXDEXT_STATERR_SE    0x02000000
++#define E1000_RXDEXT_STATERR_SEQ   0x04000000
++#define E1000_RXDEXT_STATERR_CXE   0x10000000
++#define E1000_RXDEXT_STATERR_TCPE  0x20000000
++#define E1000_RXDEXT_STATERR_IPE   0x40000000
++#define E1000_RXDEXT_STATERR_RXE   0x80000000
++
++#define E1000_RXDPS_HDRSTAT_HDRSP        0x00008000
++#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK  0x000003FF
++
++/* mask to determine if packets should be dropped due to frame errors */
++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
++    E1000_RXD_ERR_CE  |                \
++    E1000_RXD_ERR_SE  |                \
++    E1000_RXD_ERR_SEQ |                \
++    E1000_RXD_ERR_CXE |                \
++    E1000_RXD_ERR_RXE)
++
++
++/* Same mask, but for extended and packet split descriptors */
++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
++    E1000_RXDEXT_STATERR_CE  |            \
++    E1000_RXDEXT_STATERR_SE  |            \
++    E1000_RXDEXT_STATERR_SEQ |            \
++    E1000_RXDEXT_STATERR_CXE |            \
++    E1000_RXDEXT_STATERR_RXE)
++
++/* Transmit Descriptor */
++struct e1000_tx_desc {
++    uint64_t buffer_addr;       /* Address of the descriptor's data buffer */
++    union {
++        uint32_t data;
++        struct {
++            uint16_t length;    /* Data buffer length */
++            uint8_t cso;        /* Checksum offset */
++            uint8_t cmd;        /* Descriptor control */
++        } flags;
++    } lower;
++    union {
++        uint32_t data;
++        struct {
++            uint8_t status;     /* Descriptor status */
++            uint8_t css;        /* Checksum start */
++            uint16_t special;
++        } fields;
++    } upper;
++};
++
++/* Transmit Descriptor bit definitions */
++#define E1000_TXD_DTYP_D     0x00100000 /* Data Descriptor */
++#define E1000_TXD_DTYP_C     0x00000000 /* Context Descriptor */
++#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
++#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
++#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
++#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
++#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
++#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
++#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
++#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
++#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
++#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
++#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
++#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
++#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
++#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
++#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
++#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
++#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
++
++/* Offload Context Descriptor */
++struct e1000_context_desc {
++    union {
++        uint32_t ip_config;
++        struct {
++            uint8_t ipcss;      /* IP checksum start */
++            uint8_t ipcso;      /* IP checksum offset */
++            uint16_t ipcse;     /* IP checksum end */
++        } ip_fields;
++    } lower_setup;
++    union {
++        uint32_t tcp_config;
++        struct {
++            uint8_t tucss;      /* TCP checksum start */
++            uint8_t tucso;      /* TCP checksum offset */
++            uint16_t tucse;     /* TCP checksum end */
++        } tcp_fields;
++    } upper_setup;
++    uint32_t cmd_and_length;    /* */
++    union {
++        uint32_t data;
++        struct {
++            uint8_t status;     /* Descriptor status */
++            uint8_t hdr_len;    /* Header length */
++            uint16_t mss;       /* Maximum segment size */
++        } fields;
++    } tcp_seg_setup;
++};
++
++/* Offload data descriptor */
++struct e1000_data_desc {
++    uint64_t buffer_addr;       /* Address of the descriptor's buffer address */
++    union {
++        uint32_t data;
++        struct {
++            uint16_t length;    /* Data buffer length */
++            uint8_t typ_len_ext;        /* */
++            uint8_t cmd;        /* */
++        } flags;
++    } lower;
++    union {
++        uint32_t data;
++        struct {
++            uint8_t status;     /* Descriptor status */
++            uint8_t popts;      /* Packet Options */
++            uint16_t special;   /* */
++        } fields;
++    } upper;
++};
++
++/* Filters */
++#define E1000_NUM_UNICAST          16   /* Unicast filter entries */
++#define E1000_MC_TBL_SIZE          128  /* Multicast Filter Table (4096 bits) */
++#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
++
++#define E1000_NUM_UNICAST_ICH8LAN  7
++#define E1000_MC_TBL_SIZE_ICH8LAN  32
++
++
++/* Receive Address Register */
++struct e1000_rar {
++    volatile uint32_t low;      /* receive address low */
++    volatile uint32_t high;     /* receive address high */
++};
++
++/* Number of entries in the Multicast Table Array (MTA). */
++#define E1000_NUM_MTA_REGISTERS 128
++#define E1000_NUM_MTA_REGISTERS_ICH8LAN 32
++
++/* IPv4 Address Table Entry */
++struct e1000_ipv4_at_entry {
++    volatile uint32_t ipv4_addr;        /* IP Address (RW) */
++    volatile uint32_t reserved;
++};
++
++/* Four wakeup IP addresses are supported */
++#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4
++#define E1000_IP4AT_SIZE                  E1000_WAKEUP_IP_ADDRESS_COUNT_MAX
++#define E1000_IP4AT_SIZE_ICH8LAN          3
++#define E1000_IP6AT_SIZE                  1
++
++/* IPv6 Address Table Entry */
++struct e1000_ipv6_at_entry {
++    volatile uint8_t ipv6_addr[16];
++};
++
++/* Flexible Filter Length Table Entry */
++struct e1000_fflt_entry {
++    volatile uint32_t length;   /* Flexible Filter Length (RW) */
++    volatile uint32_t reserved;
++};
++
++/* Flexible Filter Mask Table Entry */
++struct e1000_ffmt_entry {
++    volatile uint32_t mask;     /* Flexible Filter Mask (RW) */
++    volatile uint32_t reserved;
++};
++
++/* Flexible Filter Value Table Entry */
++struct e1000_ffvt_entry {
++    volatile uint32_t value;    /* Flexible Filter Value (RW) */
++    volatile uint32_t reserved;
++};
++
++/* Four Flexible Filters are supported */
++#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4
++
++/* Each Flexible Filter is at most 128 (0x80) bytes in length */
++#define E1000_FLEXIBLE_FILTER_SIZE_MAX  128
++
++#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX
++#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
++#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
++
++#define E1000_DISABLE_SERDES_LOOPBACK   0x0400
++
++/* Register Set. (82543, 82544)
++ *
++ * Registers are defined to be 32 bits and  should be accessed as 32 bit values.
++ * These registers are physically located on the NIC, but are mapped into the
++ * host memory address space.
++ *
++ * RW - register is both readable and writable
++ * RO - register is read only
++ * WO - register is write only
++ * R/clr - register is read only and is cleared when read
++ * A - register array
++ */
++#define E1000_CTRL     0x00000  /* Device Control - RW */
++#define E1000_CTRL_DUP 0x00004  /* Device Control Duplicate (Shadow) - RW */
++#define E1000_STATUS   0x00008  /* Device Status - RO */
++#define E1000_EECD     0x00010  /* EEPROM/Flash Control - RW */
++#define E1000_EERD     0x00014  /* EEPROM Read - RW */
++#define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
++#define E1000_FLA      0x0001C  /* Flash Access - RW */
++#define E1000_MDIC     0x00020  /* MDI Control - RW */
++#define E1000_SCTL     0x00024  /* SerDes Control - RW */
++#define E1000_FEXTNVM  0x00028  /* Future Extended NVM register */
++#define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
++#define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
++#define E1000_FCT      0x00030  /* Flow Control Type - RW */
++#define E1000_VET      0x00038  /* VLAN Ether Type - RW */
++#define E1000_ICR      0x000C0  /* Interrupt Cause Read - R/clr */
++#define E1000_ITR      0x000C4  /* Interrupt Throttling Rate - RW */
++#define E1000_ICS      0x000C8  /* Interrupt Cause Set - WO */
++#define E1000_IMS      0x000D0  /* Interrupt Mask Set - RW */
++#define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
++#define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
++#define E1000_RCTL     0x00100  /* RX Control - RW */
++#define E1000_RDTR1    0x02820  /* RX Delay Timer (1) - RW */
++#define E1000_RDBAL1   0x02900  /* RX Descriptor Base Address Low (1) - RW */
++#define E1000_RDBAH1   0x02904  /* RX Descriptor Base Address High (1) - RW */
++#define E1000_RDLEN1   0x02908  /* RX Descriptor Length (1) - RW */
++#define E1000_RDH1     0x02910  /* RX Descriptor Head (1) - RW */
++#define E1000_RDT1     0x02918  /* RX Descriptor Tail (1) - RW */
++#define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
++#define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
++#define E1000_RXCW     0x00180  /* RX Configuration Word - RO */
++#define E1000_TCTL     0x00400  /* TX Control - RW */
++#define E1000_TCTL_EXT 0x00404  /* Extended TX Control - RW */
++#define E1000_TIPG     0x00410  /* TX Inter-packet gap -RW */
++#define E1000_TBT      0x00448  /* TX Burst Timer - RW */
++#define E1000_AIT      0x00458  /* Adaptive Interframe Spacing Throttle - RW */
++#define E1000_LEDCTL   0x00E00  /* LED Control - RW */
++#define E1000_EXTCNF_CTRL  0x00F00  /* Extended Configuration Control */
++#define E1000_EXTCNF_SIZE  0x00F08  /* Extended Configuration Size */
++#define E1000_PHY_CTRL     0x00F10  /* PHY Control Register in CSR */
++#define FEXTNVM_SW_CONFIG  0x0001
++#define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
++#define E1000_PBS      0x01008  /* Packet Buffer Size */
++#define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
++#define E1000_FLASH_UPDATES 1000
++#define E1000_EEARBC   0x01024  /* EEPROM Auto Read Bus Control */
++#define E1000_FLASHT   0x01028  /* FLASH Timer Register */
++#define E1000_EEWR     0x0102C  /* EEPROM Write Register - RW */
++#define E1000_FLSWCTL  0x01030  /* FLASH control register */
++#define E1000_FLSWDATA 0x01034  /* FLASH data register */
++#define E1000_FLSWCNT  0x01038  /* FLASH Access Counter */
++#define E1000_FLOP     0x0103C  /* FLASH Opcode Register */
++#define E1000_ERT      0x02008  /* Early Rx Threshold - RW */
++#define E1000_FCRTL    0x02160  /* Flow Control Receive Threshold Low - RW */
++#define E1000_FCRTH    0x02168  /* Flow Control Receive Threshold High - RW */
++#define E1000_PSRCTL   0x02170  /* Packet Split Receive Control - RW */
++#define E1000_RDBAL    0x02800  /* RX Descriptor Base Address Low - RW */
++#define E1000_RDBAH    0x02804  /* RX Descriptor Base Address High - RW */
++#define E1000_RDLEN    0x02808  /* RX Descriptor Length - RW */
++#define E1000_RDH      0x02810  /* RX Descriptor Head - RW */
++#define E1000_RDT      0x02818  /* RX Descriptor Tail - RW */
++#define E1000_RDTR     0x02820  /* RX Delay Timer - RW */
++#define E1000_RDBAL0   E1000_RDBAL /* RX Desc Base Address Low (0) - RW */
++#define E1000_RDBAH0   E1000_RDBAH /* RX Desc Base Address High (0) - RW */
++#define E1000_RDLEN0   E1000_RDLEN /* RX Desc Length (0) - RW */
++#define E1000_RDH0     E1000_RDH   /* RX Desc Head (0) - RW */
++#define E1000_RDT0     E1000_RDT   /* RX Desc Tail (0) - RW */
++#define E1000_RDTR0    E1000_RDTR  /* RX Delay Timer (0) - RW */
++#define E1000_RXDCTL   0x02828  /* RX Descriptor Control queue 0 - RW */
++#define E1000_RXDCTL1  0x02928  /* RX Descriptor Control queue 1 - RW */
++#define E1000_RADV     0x0282C  /* RX Interrupt Absolute Delay Timer - RW */
++#define E1000_RSRPD    0x02C00  /* RX Small Packet Detect - RW */
++#define E1000_RAID     0x02C08  /* Receive Ack Interrupt Delay - RW */
++#define E1000_TXDMAC   0x03000  /* TX DMA Control - RW */
++#define E1000_KABGTXD  0x03004  /* AFE Band Gap Transmit Ref Data */
++#define E1000_TDFH     0x03410  /* TX Data FIFO Head - RW */
++#define E1000_TDFT     0x03418  /* TX Data FIFO Tail - RW */
++#define E1000_TDFHS    0x03420  /* TX Data FIFO Head Saved - RW */
++#define E1000_TDFTS    0x03428  /* TX Data FIFO Tail Saved - RW */
++#define E1000_TDFPC    0x03430  /* TX Data FIFO Packet Count - RW */
++#define E1000_TDBAL    0x03800  /* TX Descriptor Base Address Low - RW */
++#define E1000_TDBAH    0x03804  /* TX Descriptor Base Address High - RW */
++#define E1000_TDLEN    0x03808  /* TX Descriptor Length - RW */
++#define E1000_TDH      0x03810  /* TX Descriptor Head - RW */
++#define E1000_TDT      0x03818  /* TX Descripotr Tail - RW */
++#define E1000_TIDV     0x03820  /* TX Interrupt Delay Value - RW */
++#define E1000_TXDCTL   0x03828  /* TX Descriptor Control - RW */
++#define E1000_TADV     0x0382C  /* TX Interrupt Absolute Delay Val - RW */
++#define E1000_TSPMT    0x03830  /* TCP Segmentation PAD & Min Threshold - RW */
++#define E1000_TARC0    0x03840  /* TX Arbitration Count (0) */
++#define E1000_TDBAL1   0x03900  /* TX Desc Base Address Low (1) - RW */
++#define E1000_TDBAH1   0x03904  /* TX Desc Base Address High (1) - RW */
++#define E1000_TDLEN1   0x03908  /* TX Desc Length (1) - RW */
++#define E1000_TDH1     0x03910  /* TX Desc Head (1) - RW */
++#define E1000_TDT1     0x03918  /* TX Desc Tail (1) - RW */
++#define E1000_TXDCTL1  0x03928  /* TX Descriptor Control (1) - RW */
++#define E1000_TARC1    0x03940  /* TX Arbitration Count (1) */
++#define E1000_CRCERRS  0x04000  /* CRC Error Count - R/clr */
++#define E1000_ALGNERRC 0x04004  /* Alignment Error Count - R/clr */
++#define E1000_SYMERRS  0x04008  /* Symbol Error Count - R/clr */
++#define E1000_RXERRC   0x0400C  /* Receive Error Count - R/clr */
++#define E1000_MPC      0x04010  /* Missed Packet Count - R/clr */
++#define E1000_SCC      0x04014  /* Single Collision Count - R/clr */
++#define E1000_ECOL     0x04018  /* Excessive Collision Count - R/clr */
++#define E1000_MCC      0x0401C  /* Multiple Collision Count - R/clr */
++#define E1000_LATECOL  0x04020  /* Late Collision Count - R/clr */
++#define E1000_COLC     0x04028  /* Collision Count - R/clr */
++#define E1000_DC       0x04030  /* Defer Count - R/clr */
++#define E1000_TNCRS    0x04034  /* TX-No CRS - R/clr */
++#define E1000_SEC      0x04038  /* Sequence Error Count - R/clr */
++#define E1000_CEXTERR  0x0403C  /* Carrier Extension Error Count - R/clr */
++#define E1000_RLEC     0x04040  /* Receive Length Error Count - R/clr */
++#define E1000_XONRXC   0x04048  /* XON RX Count - R/clr */
++#define E1000_XONTXC   0x0404C  /* XON TX Count - R/clr */
++#define E1000_XOFFRXC  0x04050  /* XOFF RX Count - R/clr */
++#define E1000_XOFFTXC  0x04054  /* XOFF TX Count - R/clr */
++#define E1000_FCRUC    0x04058  /* Flow Control RX Unsupported Count- R/clr */
++#define E1000_PRC64    0x0405C  /* Packets RX (64 bytes) - R/clr */
++#define E1000_PRC127   0x04060  /* Packets RX (65-127 bytes) - R/clr */
++#define E1000_PRC255   0x04064  /* Packets RX (128-255 bytes) - R/clr */
++#define E1000_PRC511   0x04068  /* Packets RX (255-511 bytes) - R/clr */
++#define E1000_PRC1023  0x0406C  /* Packets RX (512-1023 bytes) - R/clr */
++#define E1000_PRC1522  0x04070  /* Packets RX (1024-1522 bytes) - R/clr */
++#define E1000_GPRC     0x04074  /* Good Packets RX Count - R/clr */
++#define E1000_BPRC     0x04078  /* Broadcast Packets RX Count - R/clr */
++#define E1000_MPRC     0x0407C  /* Multicast Packets RX Count - R/clr */
++#define E1000_GPTC     0x04080  /* Good Packets TX Count - R/clr */
++#define E1000_GORCL    0x04088  /* Good Octets RX Count Low - R/clr */
++#define E1000_GORCH    0x0408C  /* Good Octets RX Count High - R/clr */
++#define E1000_GOTCL    0x04090  /* Good Octets TX Count Low - R/clr */
++#define E1000_GOTCH    0x04094  /* Good Octets TX Count High - R/clr */
++#define E1000_RNBC     0x040A0  /* RX No Buffers Count - R/clr */
++#define E1000_RUC      0x040A4  /* RX Undersize Count - R/clr */
++#define E1000_RFC      0x040A8  /* RX Fragment Count - R/clr */
++#define E1000_ROC      0x040AC  /* RX Oversize Count - R/clr */
++#define E1000_RJC      0x040B0  /* RX Jabber Count - R/clr */
++#define E1000_MGTPRC   0x040B4  /* Management Packets RX Count - R/clr */
++#define E1000_MGTPDC   0x040B8  /* Management Packets Dropped Count - R/clr */
++#define E1000_MGTPTC   0x040BC  /* Management Packets TX Count - R/clr */
++#define E1000_TORL     0x040C0  /* Total Octets RX Low - R/clr */
++#define E1000_TORH     0x040C4  /* Total Octets RX High - R/clr */
++#define E1000_TOTL     0x040C8  /* Total Octets TX Low - R/clr */
++#define E1000_TOTH     0x040CC  /* Total Octets TX High - R/clr */
++#define E1000_TPR      0x040D0  /* Total Packets RX - R/clr */
++#define E1000_TPT      0x040D4  /* Total Packets TX - R/clr */
++#define E1000_PTC64    0x040D8  /* Packets TX (64 bytes) - R/clr */
++#define E1000_PTC127   0x040DC  /* Packets TX (65-127 bytes) - R/clr */
++#define E1000_PTC255   0x040E0  /* Packets TX (128-255 bytes) - R/clr */
++#define E1000_PTC511   0x040E4  /* Packets TX (256-511 bytes) - R/clr */
++#define E1000_PTC1023  0x040E8  /* Packets TX (512-1023 bytes) - R/clr */
++#define E1000_PTC1522  0x040EC  /* Packets TX (1024-1522 Bytes) - R/clr */
++#define E1000_MPTC     0x040F0  /* Multicast Packets TX Count - R/clr */
++#define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
++#define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
++#define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
++#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
++#define E1000_ICRXPTC  0x04104  /* Interrupt Cause Rx Packet Timer Expire Count */
++#define E1000_ICRXATC  0x04108  /* Interrupt Cause Rx Absolute Timer Expire Count */
++#define E1000_ICTXPTC  0x0410C  /* Interrupt Cause Tx Packet Timer Expire Count */
++#define E1000_ICTXATC  0x04110  /* Interrupt Cause Tx Absolute Timer Expire Count */
++#define E1000_ICTXQEC  0x04118  /* Interrupt Cause Tx Queue Empty Count */
++#define E1000_ICTXQMTC 0x0411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
++#define E1000_ICRXDMTC 0x04120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
++#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
++#define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
++#define E1000_RFCTL    0x05008  /* Receive Filter Control*/
++#define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
++#define E1000_RA       0x05400  /* Receive Address - RW Array */
++#define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
++#define E1000_WUC      0x05800  /* Wakeup Control - RW */
++#define E1000_WUFC     0x05808  /* Wakeup Filter Control - RW */
++#define E1000_WUS      0x05810  /* Wakeup Status - RO */
++#define E1000_MANC     0x05820  /* Management Control - RW */
++#define E1000_IPAV     0x05838  /* IP Address Valid - RW */
++#define E1000_IP4AT    0x05840  /* IPv4 Address Table - RW Array */
++#define E1000_IP6AT    0x05880  /* IPv6 Address Table - RW Array */
++#define E1000_WUPL     0x05900  /* Wakeup Packet Length - RW */
++#define E1000_WUPM     0x05A00  /* Wakeup Packet Memory - RO A */
++#define E1000_FFLT     0x05F00  /* Flexible Filter Length Table - RW Array */
++#define E1000_HOST_IF  0x08800  /* Host Interface */
++#define E1000_FFMT     0x09000  /* Flexible Filter Mask Table - RW Array */
++#define E1000_FFVT     0x09800  /* Flexible Filter Value Table - RW Array */
++
++#define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */
++#define E1000_MDPHYA     0x0003C  /* PHY address - RW */
++#define E1000_MANC2H     0x05860  /* Managment Control To Host - RW */
++#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */
++
++#define E1000_GCR       0x05B00 /* PCI-Ex Control */
++#define E1000_GSCL_1    0x05B10 /* PCI-Ex Statistic Control #1 */
++#define E1000_GSCL_2    0x05B14 /* PCI-Ex Statistic Control #2 */
++#define E1000_GSCL_3    0x05B18 /* PCI-Ex Statistic Control #3 */
++#define E1000_GSCL_4    0x05B1C /* PCI-Ex Statistic Control #4 */
++#define E1000_FACTPS    0x05B30 /* Function Active and Power State to MNG */
++#define E1000_SWSM      0x05B50 /* SW Semaphore */
++#define E1000_FWSM      0x05B54 /* FW Semaphore */
++#define E1000_FFLT_DBG  0x05F04 /* Debug Register */
++#define E1000_HICR      0x08F00 /* Host Inteface Control */
++
++/* RSS registers */
++#define E1000_CPUVEC    0x02C10 /* CPU Vector Register - RW */
++#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
++#define E1000_RETA      0x05C00 /* Redirection Table - RW Array */
++#define E1000_RSSRK     0x05C80 /* RSS Random Key - RW Array */
++#define E1000_RSSIM     0x05864 /* RSS Interrupt Mask */
++#define E1000_RSSIR     0x05868 /* RSS Interrupt Request */
++/* Register Set (82542)
++ *
++ * Some of the 82542 registers are located at different offsets than they are
++ * in more current versions of the 8254x. Despite the difference in location,
++ * the registers function in the same manner.
++ */
++#define E1000_82542_CTRL     E1000_CTRL
++#define E1000_82542_CTRL_DUP E1000_CTRL_DUP
++#define E1000_82542_STATUS   E1000_STATUS
++#define E1000_82542_EECD     E1000_EECD
++#define E1000_82542_EERD     E1000_EERD
++#define E1000_82542_CTRL_EXT E1000_CTRL_EXT
++#define E1000_82542_FLA      E1000_FLA
++#define E1000_82542_MDIC     E1000_MDIC
++#define E1000_82542_SCTL     E1000_SCTL
++#define E1000_82542_FEXTNVM  E1000_FEXTNVM
++#define E1000_82542_FCAL     E1000_FCAL
++#define E1000_82542_FCAH     E1000_FCAH
++#define E1000_82542_FCT      E1000_FCT
++#define E1000_82542_VET      E1000_VET
++#define E1000_82542_RA       0x00040
++#define E1000_82542_ICR      E1000_ICR
++#define E1000_82542_ITR      E1000_ITR
++#define E1000_82542_ICS      E1000_ICS
++#define E1000_82542_IMS      E1000_IMS
++#define E1000_82542_IMC      E1000_IMC
++#define E1000_82542_RCTL     E1000_RCTL
++#define E1000_82542_RDTR     0x00108
++#define E1000_82542_RDBAL    0x00110
++#define E1000_82542_RDBAH    0x00114
++#define E1000_82542_RDLEN    0x00118
++#define E1000_82542_RDH      0x00120
++#define E1000_82542_RDT      0x00128
++#define E1000_82542_RDTR0    E1000_82542_RDTR
++#define E1000_82542_RDBAL0   E1000_82542_RDBAL
++#define E1000_82542_RDBAH0   E1000_82542_RDBAH
++#define E1000_82542_RDLEN0   E1000_82542_RDLEN
++#define E1000_82542_RDH0     E1000_82542_RDH
++#define E1000_82542_RDT0     E1000_82542_RDT
++#define E1000_82542_SRRCTL(_n) (0x280C + ((_n) << 8)) /* Split and Replication
++                                                       * RX Control - RW */
++#define E1000_82542_DCA_RXCTRL(_n) (0x02814 + ((_n) << 8))
++#define E1000_82542_RDBAH3   0x02B04 /* RX Desc Base High Queue 3 - RW */
++#define E1000_82542_RDBAL3   0x02B00 /* RX Desc Low Queue 3 - RW */
++#define E1000_82542_RDLEN3   0x02B08 /* RX Desc Length Queue 3 - RW */
++#define E1000_82542_RDH3     0x02B10 /* RX Desc Head Queue 3 - RW */
++#define E1000_82542_RDT3     0x02B18 /* RX Desc Tail Queue 3 - RW */
++#define E1000_82542_RDBAL2   0x02A00 /* RX Desc Base Low Queue 2 - RW */
++#define E1000_82542_RDBAH2   0x02A04 /* RX Desc Base High Queue 2 - RW */
++#define E1000_82542_RDLEN2   0x02A08 /* RX Desc Length Queue 2 - RW */
++#define E1000_82542_RDH2     0x02A10 /* RX Desc Head Queue 2 - RW */
++#define E1000_82542_RDT2     0x02A18 /* RX Desc Tail Queue 2 - RW */
++#define E1000_82542_RDTR1    0x00130
++#define E1000_82542_RDBAL1   0x00138
++#define E1000_82542_RDBAH1   0x0013C
++#define E1000_82542_RDLEN1   0x00140
++#define E1000_82542_RDH1     0x00148
++#define E1000_82542_RDT1     0x00150
++#define E1000_82542_FCRTH    0x00160
++#define E1000_82542_FCRTL    0x00168
++#define E1000_82542_FCTTV    E1000_FCTTV
++#define E1000_82542_TXCW     E1000_TXCW
++#define E1000_82542_RXCW     E1000_RXCW
++#define E1000_82542_MTA      0x00200
++#define E1000_82542_TCTL     E1000_TCTL
++#define E1000_82542_TCTL_EXT E1000_TCTL_EXT
++#define E1000_82542_TIPG     E1000_TIPG
++#define E1000_82542_TDBAL    0x00420
++#define E1000_82542_TDBAH    0x00424
++#define E1000_82542_TDLEN    0x00428
++#define E1000_82542_TDH      0x00430
++#define E1000_82542_TDT      0x00438
++#define E1000_82542_TIDV     0x00440
++#define E1000_82542_TBT      E1000_TBT
++#define E1000_82542_AIT      E1000_AIT
++#define E1000_82542_VFTA     0x00600
++#define E1000_82542_LEDCTL   E1000_LEDCTL
++#define E1000_82542_PBA      E1000_PBA
++#define E1000_82542_PBS      E1000_PBS
++#define E1000_82542_EEMNGCTL E1000_EEMNGCTL
++#define E1000_82542_EEARBC   E1000_EEARBC
++#define E1000_82542_FLASHT   E1000_FLASHT
++#define E1000_82542_EEWR     E1000_EEWR
++#define E1000_82542_FLSWCTL  E1000_FLSWCTL
++#define E1000_82542_FLSWDATA E1000_FLSWDATA
++#define E1000_82542_FLSWCNT  E1000_FLSWCNT
++#define E1000_82542_FLOP     E1000_FLOP
++#define E1000_82542_EXTCNF_CTRL  E1000_EXTCNF_CTRL
++#define E1000_82542_EXTCNF_SIZE  E1000_EXTCNF_SIZE
++#define E1000_82542_PHY_CTRL E1000_PHY_CTRL
++#define E1000_82542_ERT      E1000_ERT
++#define E1000_82542_RXDCTL   E1000_RXDCTL
++#define E1000_82542_RXDCTL1  E1000_RXDCTL1
++#define E1000_82542_RADV     E1000_RADV
++#define E1000_82542_RSRPD    E1000_RSRPD
++#define E1000_82542_TXDMAC   E1000_TXDMAC
++#define E1000_82542_KABGTXD  E1000_KABGTXD
++#define E1000_82542_TDFHS    E1000_TDFHS
++#define E1000_82542_TDFTS    E1000_TDFTS
++#define E1000_82542_TDFPC    E1000_TDFPC
++#define E1000_82542_TXDCTL   E1000_TXDCTL
++#define E1000_82542_TADV     E1000_TADV
++#define E1000_82542_TSPMT    E1000_TSPMT
++#define E1000_82542_CRCERRS  E1000_CRCERRS
++#define E1000_82542_ALGNERRC E1000_ALGNERRC
++#define E1000_82542_SYMERRS  E1000_SYMERRS
++#define E1000_82542_RXERRC   E1000_RXERRC
++#define E1000_82542_MPC      E1000_MPC
++#define E1000_82542_SCC      E1000_SCC
++#define E1000_82542_ECOL     E1000_ECOL
++#define E1000_82542_MCC      E1000_MCC
++#define E1000_82542_LATECOL  E1000_LATECOL
++#define E1000_82542_COLC     E1000_COLC
++#define E1000_82542_DC       E1000_DC
++#define E1000_82542_TNCRS    E1000_TNCRS
++#define E1000_82542_SEC      E1000_SEC
++#define E1000_82542_CEXTERR  E1000_CEXTERR
++#define E1000_82542_RLEC     E1000_RLEC
++#define E1000_82542_XONRXC   E1000_XONRXC
++#define E1000_82542_XONTXC   E1000_XONTXC
++#define E1000_82542_XOFFRXC  E1000_XOFFRXC
++#define E1000_82542_XOFFTXC  E1000_XOFFTXC
++#define E1000_82542_FCRUC    E1000_FCRUC
++#define E1000_82542_PRC64    E1000_PRC64
++#define E1000_82542_PRC127   E1000_PRC127
++#define E1000_82542_PRC255   E1000_PRC255
++#define E1000_82542_PRC511   E1000_PRC511
++#define E1000_82542_PRC1023  E1000_PRC1023
++#define E1000_82542_PRC1522  E1000_PRC1522
++#define E1000_82542_GPRC     E1000_GPRC
++#define E1000_82542_BPRC     E1000_BPRC
++#define E1000_82542_MPRC     E1000_MPRC
++#define E1000_82542_GPTC     E1000_GPTC
++#define E1000_82542_GORCL    E1000_GORCL
++#define E1000_82542_GORCH    E1000_GORCH
++#define E1000_82542_GOTCL    E1000_GOTCL
++#define E1000_82542_GOTCH    E1000_GOTCH
++#define E1000_82542_RNBC     E1000_RNBC
++#define E1000_82542_RUC      E1000_RUC
++#define E1000_82542_RFC      E1000_RFC
++#define E1000_82542_ROC      E1000_ROC
++#define E1000_82542_RJC      E1000_RJC
++#define E1000_82542_MGTPRC   E1000_MGTPRC
++#define E1000_82542_MGTPDC   E1000_MGTPDC
++#define E1000_82542_MGTPTC   E1000_MGTPTC
++#define E1000_82542_TORL     E1000_TORL
++#define E1000_82542_TORH     E1000_TORH
++#define E1000_82542_TOTL     E1000_TOTL
++#define E1000_82542_TOTH     E1000_TOTH
++#define E1000_82542_TPR      E1000_TPR
++#define E1000_82542_TPT      E1000_TPT
++#define E1000_82542_PTC64    E1000_PTC64
++#define E1000_82542_PTC127   E1000_PTC127
++#define E1000_82542_PTC255   E1000_PTC255
++#define E1000_82542_PTC511   E1000_PTC511
++#define E1000_82542_PTC1023  E1000_PTC1023
++#define E1000_82542_PTC1522  E1000_PTC1522
++#define E1000_82542_MPTC     E1000_MPTC
++#define E1000_82542_BPTC     E1000_BPTC
++#define E1000_82542_TSCTC    E1000_TSCTC
++#define E1000_82542_TSCTFC   E1000_TSCTFC
++#define E1000_82542_RXCSUM   E1000_RXCSUM
++#define E1000_82542_WUC      E1000_WUC
++#define E1000_82542_WUFC     E1000_WUFC
++#define E1000_82542_WUS      E1000_WUS
++#define E1000_82542_MANC     E1000_MANC
++#define E1000_82542_IPAV     E1000_IPAV
++#define E1000_82542_IP4AT    E1000_IP4AT
++#define E1000_82542_IP6AT    E1000_IP6AT
++#define E1000_82542_WUPL     E1000_WUPL
++#define E1000_82542_WUPM     E1000_WUPM
++#define E1000_82542_FFLT     E1000_FFLT
++#define E1000_82542_TDFH     0x08010
++#define E1000_82542_TDFT     0x08018
++#define E1000_82542_FFMT     E1000_FFMT
++#define E1000_82542_FFVT     E1000_FFVT
++#define E1000_82542_HOST_IF  E1000_HOST_IF
++#define E1000_82542_IAM         E1000_IAM
++#define E1000_82542_EEMNGCTL    E1000_EEMNGCTL
++#define E1000_82542_PSRCTL      E1000_PSRCTL
++#define E1000_82542_RAID        E1000_RAID
++#define E1000_82542_TARC0       E1000_TARC0
++#define E1000_82542_TDBAL1      E1000_TDBAL1
++#define E1000_82542_TDBAH1      E1000_TDBAH1
++#define E1000_82542_TDLEN1      E1000_TDLEN1
++#define E1000_82542_TDH1        E1000_TDH1
++#define E1000_82542_TDT1        E1000_TDT1
++#define E1000_82542_TXDCTL1     E1000_TXDCTL1
++#define E1000_82542_TARC1       E1000_TARC1
++#define E1000_82542_RFCTL       E1000_RFCTL
++#define E1000_82542_GCR         E1000_GCR
++#define E1000_82542_GSCL_1      E1000_GSCL_1
++#define E1000_82542_GSCL_2      E1000_GSCL_2
++#define E1000_82542_GSCL_3      E1000_GSCL_3
++#define E1000_82542_GSCL_4      E1000_GSCL_4
++#define E1000_82542_FACTPS      E1000_FACTPS
++#define E1000_82542_SWSM        E1000_SWSM
++#define E1000_82542_FWSM        E1000_FWSM
++#define E1000_82542_FFLT_DBG    E1000_FFLT_DBG
++#define E1000_82542_IAC         E1000_IAC
++#define E1000_82542_ICRXPTC     E1000_ICRXPTC
++#define E1000_82542_ICRXATC     E1000_ICRXATC
++#define E1000_82542_ICTXPTC     E1000_ICTXPTC
++#define E1000_82542_ICTXATC     E1000_ICTXATC
++#define E1000_82542_ICTXQEC     E1000_ICTXQEC
++#define E1000_82542_ICTXQMTC    E1000_ICTXQMTC
++#define E1000_82542_ICRXDMTC    E1000_ICRXDMTC
++#define E1000_82542_ICRXOC      E1000_ICRXOC
++#define E1000_82542_HICR        E1000_HICR
++
++#define E1000_82542_CPUVEC      E1000_CPUVEC
++#define E1000_82542_MRQC        E1000_MRQC
++#define E1000_82542_RETA        E1000_RETA
++#define E1000_82542_RSSRK       E1000_RSSRK
++#define E1000_82542_RSSIM       E1000_RSSIM
++#define E1000_82542_RSSIR       E1000_RSSIR
++#define E1000_82542_KUMCTRLSTA E1000_KUMCTRLSTA
++#define E1000_82542_SW_FW_SYNC E1000_SW_FW_SYNC
++
++/* Statistics counters collected by the MAC */
++struct e1000_hw_stats {
++    uint64_t crcerrs;
++    uint64_t algnerrc;
++    uint64_t symerrs;
++    uint64_t rxerrc;
++    uint64_t mpc;
++    uint64_t scc;
++    uint64_t ecol;
++    uint64_t mcc;
++    uint64_t latecol;
++    uint64_t colc;
++    uint64_t dc;
++    uint64_t tncrs;
++    uint64_t sec;
++    uint64_t cexterr;
++    uint64_t rlec;
++    uint64_t xonrxc;
++    uint64_t xontxc;
++    uint64_t xoffrxc;
++    uint64_t xofftxc;
++    uint64_t fcruc;
++    uint64_t prc64;
++    uint64_t prc127;
++    uint64_t prc255;
++    uint64_t prc511;
++    uint64_t prc1023;
++    uint64_t prc1522;
++    uint64_t gprc;
++    uint64_t bprc;
++    uint64_t mprc;
++    uint64_t gptc;
++    uint64_t gorcl;
++    uint64_t gorch;
++    uint64_t gotcl;
++    uint64_t gotch;
++    uint64_t rnbc;
++    uint64_t ruc;
++    uint64_t rfc;
++    uint64_t roc;
++    uint64_t rjc;
++    uint64_t mgprc;
++    uint64_t mgpdc;
++    uint64_t mgptc;
++    uint64_t torl;
++    uint64_t torh;
++    uint64_t totl;
++    uint64_t toth;
++    uint64_t tpr;
++    uint64_t tpt;
++    uint64_t ptc64;
++    uint64_t ptc127;
++    uint64_t ptc255;
++    uint64_t ptc511;
++    uint64_t ptc1023;
++    uint64_t ptc1522;
++    uint64_t mptc;
++    uint64_t bptc;
++    uint64_t tsctc;
++    uint64_t tsctfc;
++    uint64_t iac;
++    uint64_t icrxptc;
++    uint64_t icrxatc;
++    uint64_t ictxptc;
++    uint64_t ictxatc;
++    uint64_t ictxqec;
++    uint64_t ictxqmtc;
++    uint64_t icrxdmtc;
++    uint64_t icrxoc;
++};
++
++/* Structure containing variables used by the shared code (e1000_hw.c) */
++struct e1000_hw {
++    uint8_t *hw_addr;
++    uint8_t *flash_address;
++    e1000_mac_type mac_type;
++    e1000_phy_type phy_type;
++    uint32_t phy_init_script;
++    e1000_media_type media_type;
++    void *back;
++    struct e1000_shadow_ram *eeprom_shadow_ram;
++    uint32_t flash_bank_size;
++    uint32_t flash_base_addr;
++    e1000_fc_type fc;
++    e1000_bus_speed bus_speed;
++    e1000_bus_width bus_width;
++    e1000_bus_type bus_type;
++    struct e1000_eeprom_info eeprom;
++    e1000_ms_type master_slave;
++    e1000_ms_type original_master_slave;
++    e1000_ffe_config ffe_config_state;
++    uint32_t asf_firmware_present;
++    uint32_t eeprom_semaphore_present;
++    uint32_t swfw_sync_present;
++    uint32_t swfwhw_semaphore_present;
++    unsigned long io_base;
++    uint32_t phy_id;
++    uint32_t phy_revision;
++    uint32_t phy_addr;
++    uint32_t original_fc;
++    uint32_t txcw;
++    uint32_t autoneg_failed;
++    uint32_t max_frame_size;
++    uint32_t min_frame_size;
++    uint32_t mc_filter_type;
++    uint32_t num_mc_addrs;
++    uint32_t collision_delta;
++    uint32_t tx_packet_delta;
++    uint32_t ledctl_default;
++    uint32_t ledctl_mode1;
++    uint32_t ledctl_mode2;
++    boolean_t tx_pkt_filtering;
++    struct e1000_host_mng_dhcp_cookie mng_cookie;
++    uint16_t phy_spd_default;
++    uint16_t autoneg_advertised;
++    uint16_t pci_cmd_word;
++    uint16_t fc_high_water;
++    uint16_t fc_low_water;
++    uint16_t fc_pause_time;
++    uint16_t current_ifs_val;
++    uint16_t ifs_min_val;
++    uint16_t ifs_max_val;
++    uint16_t ifs_step_size;
++    uint16_t ifs_ratio;
++    uint16_t device_id;
++    uint16_t vendor_id;
++    uint16_t subsystem_id;
++    uint16_t subsystem_vendor_id;
++    uint8_t revision_id;
++    uint8_t autoneg;
++    uint8_t mdix;
++    uint8_t forced_speed_duplex;
++    uint8_t wait_autoneg_complete;
++    uint8_t dma_fairness;
++    uint8_t mac_addr[NODE_ADDRESS_SIZE];
++    uint8_t perm_mac_addr[NODE_ADDRESS_SIZE];
++    boolean_t disable_polarity_correction;
++    boolean_t speed_downgraded;
++    e1000_smart_speed smart_speed;
++    e1000_dsp_config dsp_config_state;
++    boolean_t get_link_status;
++    boolean_t serdes_link_down;
++    boolean_t tbi_compatibility_en;
++    boolean_t tbi_compatibility_on;
++    boolean_t laa_is_present;
++    boolean_t phy_reset_disable;
++    boolean_t fc_send_xon;
++    boolean_t fc_strict_ieee;
++    boolean_t report_tx_early;
++    boolean_t adaptive_ifs;
++    boolean_t ifs_params_forced;
++    boolean_t in_ifs_mode;
++    boolean_t mng_reg_access_disabled;
++    boolean_t leave_av_bit_off;
++    boolean_t kmrn_lock_loss_workaround_disabled;
++};
++
++
++#define E1000_EEPROM_SWDPIN0   0x0001   /* SWDPIN 0 EEPROM Value */
++#define E1000_EEPROM_LED_LOGIC 0x0020   /* Led Logic Word */
++#define E1000_EEPROM_RW_REG_DATA   16   /* Offset to data in EEPROM read/write registers */
++#define E1000_EEPROM_RW_REG_DONE   2    /* Offset to READ/WRITE done bit */
++#define E1000_EEPROM_RW_REG_START  1    /* First bit for telling part to start operation */
++#define E1000_EEPROM_RW_ADDR_SHIFT 2    /* Shift to the address bits */
++#define E1000_EEPROM_POLL_WRITE    1    /* Flag for polling for write complete */
++#define E1000_EEPROM_POLL_READ     0    /* Flag for polling for read complete */
++/* Register Bit Masks */
++/* Device Control */
++#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
++#define E1000_CTRL_BEM      0x00000002  /* Endian Mode.0=little,1=big */
++#define E1000_CTRL_PRIOR    0x00000004  /* Priority on PCI. 0=rx,1=fair */
++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
++#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
++#define E1000_CTRL_TME      0x00000010  /* Test mode. 0=normal,1=test */
++#define E1000_CTRL_SLE      0x00000020  /* Serial Link on 0=dis,1=en */
++#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
++#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
++#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
++#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
++#define E1000_CTRL_SPD_10   0x00000000  /* Force 10Mb */
++#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
++#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
++#define E1000_CTRL_BEM32    0x00000400  /* Big Endian 32 mode */
++#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
++#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
++#define E1000_CTRL_D_UD_EN  0x00002000  /* Dock/Undock enable */
++#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */
++#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */
++#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */
++#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
++#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
++#define E1000_CTRL_SWDPIN2  0x00100000  /* SWDPIN 2 value */
++#define E1000_CTRL_SWDPIN3  0x00200000  /* SWDPIN 3 value */
++#define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
++#define E1000_CTRL_SWDPIO1  0x00800000  /* SWDPIN 1 input or output */
++#define E1000_CTRL_SWDPIO2  0x01000000  /* SWDPIN 2 input or output */
++#define E1000_CTRL_SWDPIO3  0x02000000  /* SWDPIN 3 input or output */
++#define E1000_CTRL_RST      0x04000000  /* Global reset */
++#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
++#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
++#define E1000_CTRL_RTE      0x20000000  /* Routing tag enable */
++#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
++#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
++#define E1000_CTRL_SW2FW_INT 0x02000000  /* Initiate an interrupt to manageability engine */
++
++/* Device Status */
++#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
++#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
++#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
++#define E1000_STATUS_FUNC_SHIFT 2
++#define E1000_STATUS_FUNC_0     0x00000000      /* Function 0 */
++#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
++#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
++#define E1000_STATUS_TBIMODE    0x00000020      /* TBI mode */
++#define E1000_STATUS_SPEED_MASK 0x000000C0
++#define E1000_STATUS_SPEED_10   0x00000000      /* Speed 10Mb/s */
++#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
++#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
++#define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion
++                                                   by EEPROM/Flash */
++#define E1000_STATUS_ASDV       0x00000300      /* Auto speed detect value */
++#define E1000_STATUS_DOCK_CI    0x00000800      /* Change in Dock/Undock state. Clear on write '0'. */
++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */
++#define E1000_STATUS_MTXCKOK    0x00000400      /* MTX clock running OK */
++#define E1000_STATUS_PCI66      0x00000800      /* In 66Mhz slot */
++#define E1000_STATUS_BUS64      0x00001000      /* In 64 bit slot */
++#define E1000_STATUS_PCIX_MODE  0x00002000      /* PCI-X mode */
++#define E1000_STATUS_PCIX_SPEED 0x0000C000      /* PCI-X bus speed */
++#define E1000_STATUS_BMC_SKU_0  0x00100000 /* BMC USB redirect disabled */
++#define E1000_STATUS_BMC_SKU_1  0x00200000 /* BMC SRAM disabled */
++#define E1000_STATUS_BMC_SKU_2  0x00400000 /* BMC SDRAM disabled */
++#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */
++#define E1000_STATUS_BMC_LITE   0x01000000 /* BMC external code execution disabled */
++#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */
++#define E1000_STATUS_FUSE_8       0x04000000
++#define E1000_STATUS_FUSE_9       0x08000000
++#define E1000_STATUS_SERDES0_DIS  0x10000000 /* SERDES disabled on port 0 */
++#define E1000_STATUS_SERDES1_DIS  0x20000000 /* SERDES disabled on port 1 */
++
++/* Constants used to intrepret the masked PCI-X bus speed. */
++#define E1000_STATUS_PCIX_SPEED_66  0x00000000 /* PCI-X bus speed  50-66 MHz */
++#define E1000_STATUS_PCIX_SPEED_100 0x00004000 /* PCI-X bus speed  66-100 MHz */
++#define E1000_STATUS_PCIX_SPEED_133 0x00008000 /* PCI-X bus speed 100-133 MHz */
++
++/* EEPROM/Flash Control */
++#define E1000_EECD_SK        0x00000001 /* EEPROM Clock */
++#define E1000_EECD_CS        0x00000002 /* EEPROM Chip Select */
++#define E1000_EECD_DI        0x00000004 /* EEPROM Data In */
++#define E1000_EECD_DO        0x00000008 /* EEPROM Data Out */
++#define E1000_EECD_FWE_MASK  0x00000030
++#define E1000_EECD_FWE_DIS   0x00000010 /* Disable FLASH writes */
++#define E1000_EECD_FWE_EN    0x00000020 /* Enable FLASH writes */
++#define E1000_EECD_FWE_SHIFT 4
++#define E1000_EECD_REQ       0x00000040 /* EEPROM Access Request */
++#define E1000_EECD_GNT       0x00000080 /* EEPROM Access Grant */
++#define E1000_EECD_PRES      0x00000100 /* EEPROM Present */
++#define E1000_EECD_SIZE      0x00000200 /* EEPROM Size (0=64 word 1=256 word) */
++#define E1000_EECD_ADDR_BITS 0x00000400 /* EEPROM Addressing bits based on type
++                                         * (0-small, 1-large) */
++#define E1000_EECD_TYPE      0x00002000 /* EEPROM Type (1-SPI, 0-Microwire) */
++#ifndef E1000_EEPROM_GRANT_ATTEMPTS
++#define E1000_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */
++#endif
++#define E1000_EECD_AUTO_RD          0x00000200  /* EEPROM Auto Read done */
++#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* EEprom Size */
++#define E1000_EECD_SIZE_EX_SHIFT    11
++#define E1000_EECD_NVADDS    0x00018000 /* NVM Address Size */
++#define E1000_EECD_SELSHAD   0x00020000 /* Select Shadow RAM */
++#define E1000_EECD_INITSRAM  0x00040000 /* Initialize Shadow RAM */
++#define E1000_EECD_FLUPD     0x00080000 /* Update FLASH */
++#define E1000_EECD_AUPDEN    0x00100000 /* Enable Autonomous FLASH update */
++#define E1000_EECD_SHADV     0x00200000 /* Shadow RAM Data Valid */
++#define E1000_EECD_SEC1VAL   0x00400000 /* Sector One Valid */
++#define E1000_EECD_SECVAL_SHIFT      22
++#define E1000_STM_OPCODE     0xDB00
++#define E1000_HICR_FW_RESET  0xC0
++
++#define E1000_SHADOW_RAM_WORDS     2048
++#define E1000_ICH8_NVM_SIG_WORD    0x13
++#define E1000_ICH8_NVM_SIG_MASK    0xC0
++
++/* EEPROM Read */
++#define E1000_EERD_START      0x00000001 /* Start Read */
++#define E1000_EERD_DONE       0x00000010 /* Read Done */
++#define E1000_EERD_ADDR_SHIFT 8
++#define E1000_EERD_ADDR_MASK  0x0000FF00 /* Read Address */
++#define E1000_EERD_DATA_SHIFT 16
++#define E1000_EERD_DATA_MASK  0xFFFF0000 /* Read Data */
++
++/* SPI EEPROM Status Register */
++#define EEPROM_STATUS_RDY_SPI  0x01
++#define EEPROM_STATUS_WEN_SPI  0x02
++#define EEPROM_STATUS_BP0_SPI  0x04
++#define EEPROM_STATUS_BP1_SPI  0x08
++#define EEPROM_STATUS_WPEN_SPI 0x80
++
++/* Extended Device Control */
++#define E1000_CTRL_EXT_GPI0_EN   0x00000001 /* Maps SDP4 to GPI0 */
++#define E1000_CTRL_EXT_GPI1_EN   0x00000002 /* Maps SDP5 to GPI1 */
++#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN
++#define E1000_CTRL_EXT_GPI2_EN   0x00000004 /* Maps SDP6 to GPI2 */
++#define E1000_CTRL_EXT_GPI3_EN   0x00000008 /* Maps SDP7 to GPI3 */
++#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* Value of SW Defineable Pin 4 */
++#define E1000_CTRL_EXT_SDP5_DATA 0x00000020 /* Value of SW Defineable Pin 5 */
++#define E1000_CTRL_EXT_PHY_INT   E1000_CTRL_EXT_SDP5_DATA
++#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* Value of SW Defineable Pin 6 */
++#define E1000_CTRL_EXT_SDP7_DATA 0x00000080 /* Value of SW Defineable Pin 7 */
++#define E1000_CTRL_EXT_SDP4_DIR  0x00000100 /* Direction of SDP4 0=in 1=out */
++#define E1000_CTRL_EXT_SDP5_DIR  0x00000200 /* Direction of SDP5 0=in 1=out */
++#define E1000_CTRL_EXT_SDP6_DIR  0x00000400 /* Direction of SDP6 0=in 1=out */
++#define E1000_CTRL_EXT_SDP7_DIR  0x00000800 /* Direction of SDP7 0=in 1=out */
++#define E1000_CTRL_EXT_ASDCHK    0x00001000 /* Initiate an ASD sequence */
++#define E1000_CTRL_EXT_EE_RST    0x00002000 /* Reinitialize from EEPROM */
++#define E1000_CTRL_EXT_IPS       0x00004000 /* Invert Power State */
++#define E1000_CTRL_EXT_SPD_BYPS  0x00008000 /* Speed Select Bypass */
++#define E1000_CTRL_EXT_RO_DIS    0x00020000 /* Relaxed Ordering disable */
++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
++#define E1000_CTRL_EXT_LINK_MODE_TBI  0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_KMRN    0x00000000
++#define E1000_CTRL_EXT_LINK_MODE_SERDES  0x00C00000
++#define E1000_CTRL_EXT_WR_WMARK_MASK  0x03000000
++#define E1000_CTRL_EXT_WR_WMARK_256   0x00000000
++#define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
++#define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
++#define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
++#define E1000_CTRL_EXT_DRV_LOAD       0x10000000  /* Driver loaded bit for FW */
++#define E1000_CTRL_EXT_IAME           0x08000000  /* Interrupt acknowledge Auto-mask */
++#define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000  /* Clear Interrupt timers after IMS clear */
++#define E1000_CRTL_EXT_PB_PAREN       0x01000000 /* packet buffer parity error detection enabled */
++#define E1000_CTRL_EXT_DF_PAREN       0x02000000 /* descriptor FIFO parity error detection enable */
++#define E1000_CTRL_EXT_GHOST_PAREN    0x40000000
++
++/* MDI Control */
++#define E1000_MDIC_DATA_MASK 0x0000FFFF
++#define E1000_MDIC_REG_MASK  0x001F0000
++#define E1000_MDIC_REG_SHIFT 16
++#define E1000_MDIC_PHY_MASK  0x03E00000
++#define E1000_MDIC_PHY_SHIFT 21
++#define E1000_MDIC_OP_WRITE  0x04000000
++#define E1000_MDIC_OP_READ   0x08000000
++#define E1000_MDIC_READY     0x10000000
++#define E1000_MDIC_INT_EN    0x20000000
++#define E1000_MDIC_ERROR     0x40000000
++
++#define E1000_KUMCTRLSTA_MASK           0x0000FFFF
++#define E1000_KUMCTRLSTA_OFFSET         0x001F0000
++#define E1000_KUMCTRLSTA_OFFSET_SHIFT   16
++#define E1000_KUMCTRLSTA_REN            0x00200000
++
++#define E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL      0x00000000
++#define E1000_KUMCTRLSTA_OFFSET_CTRL           0x00000001
++#define E1000_KUMCTRLSTA_OFFSET_INB_CTRL       0x00000002
++#define E1000_KUMCTRLSTA_OFFSET_DIAG           0x00000003
++#define E1000_KUMCTRLSTA_OFFSET_TIMEOUTS       0x00000004
++#define E1000_KUMCTRLSTA_OFFSET_INB_PARAM      0x00000009
++#define E1000_KUMCTRLSTA_OFFSET_HD_CTRL        0x00000010
++#define E1000_KUMCTRLSTA_OFFSET_M2P_SERDES     0x0000001E
++#define E1000_KUMCTRLSTA_OFFSET_M2P_MODES      0x0000001F
++
++/* FIFO Control */
++#define E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS   0x00000008
++#define E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS   0x00000800
++
++/* In-Band Control */
++#define E1000_KUMCTRLSTA_INB_CTRL_LINK_STATUS_TX_TIMEOUT_DEFAULT    0x00000500
++#define E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING  0x00000010
++
++/* Half-Duplex Control */
++#define E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT 0x00000004
++#define E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT  0x00000000
++
++#define E1000_KUMCTRLSTA_OFFSET_K0S_CTRL       0x0000001E
++
++#define E1000_KUMCTRLSTA_DIAG_FELPBK           0x2000
++#define E1000_KUMCTRLSTA_DIAG_NELPBK           0x1000
++
++#define E1000_KUMCTRLSTA_K0S_100_EN            0x2000
++#define E1000_KUMCTRLSTA_K0S_GBE_EN            0x1000
++#define E1000_KUMCTRLSTA_K0S_ENTRY_LATENCY_MASK   0x0003
++
++#define E1000_KABGTXD_BGSQLBIAS                0x00050000
++
++#define E1000_PHY_CTRL_SPD_EN                  0x00000001
++#define E1000_PHY_CTRL_D0A_LPLU                0x00000002
++#define E1000_PHY_CTRL_NOND0A_LPLU             0x00000004
++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE      0x00000008
++#define E1000_PHY_CTRL_GBE_DISABLE             0x00000040
++#define E1000_PHY_CTRL_B2B_EN                  0x00000080
++
++/* LED Control */
++#define E1000_LEDCTL_LED0_MODE_MASK       0x0000000F
++#define E1000_LEDCTL_LED0_MODE_SHIFT      0
++#define E1000_LEDCTL_LED0_BLINK_RATE      0x0000020
++#define E1000_LEDCTL_LED0_IVRT            0x00000040
++#define E1000_LEDCTL_LED0_BLINK           0x00000080
++#define E1000_LEDCTL_LED1_MODE_MASK       0x00000F00
++#define E1000_LEDCTL_LED1_MODE_SHIFT      8
++#define E1000_LEDCTL_LED1_BLINK_RATE      0x0002000
++#define E1000_LEDCTL_LED1_IVRT            0x00004000
++#define E1000_LEDCTL_LED1_BLINK           0x00008000
++#define E1000_LEDCTL_LED2_MODE_MASK       0x000F0000
++#define E1000_LEDCTL_LED2_MODE_SHIFT      16
++#define E1000_LEDCTL_LED2_BLINK_RATE      0x00200000
++#define E1000_LEDCTL_LED2_IVRT            0x00400000
++#define E1000_LEDCTL_LED2_BLINK           0x00800000
++#define E1000_LEDCTL_LED3_MODE_MASK       0x0F000000
++#define E1000_LEDCTL_LED3_MODE_SHIFT      24
++#define E1000_LEDCTL_LED3_BLINK_RATE      0x20000000
++#define E1000_LEDCTL_LED3_IVRT            0x40000000
++#define E1000_LEDCTL_LED3_BLINK           0x80000000
++
++#define E1000_LEDCTL_MODE_LINK_10_1000  0x0
++#define E1000_LEDCTL_MODE_LINK_100_1000 0x1
++#define E1000_LEDCTL_MODE_LINK_UP       0x2
++#define E1000_LEDCTL_MODE_ACTIVITY      0x3
++#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4
++#define E1000_LEDCTL_MODE_LINK_10       0x5
++#define E1000_LEDCTL_MODE_LINK_100      0x6
++#define E1000_LEDCTL_MODE_LINK_1000     0x7
++#define E1000_LEDCTL_MODE_PCIX_MODE     0x8
++#define E1000_LEDCTL_MODE_FULL_DUPLEX   0x9
++#define E1000_LEDCTL_MODE_COLLISION     0xA
++#define E1000_LEDCTL_MODE_BUS_SPEED     0xB
++#define E1000_LEDCTL_MODE_BUS_SIZE      0xC
++#define E1000_LEDCTL_MODE_PAUSED        0xD
++#define E1000_LEDCTL_MODE_LED_ON        0xE
++#define E1000_LEDCTL_MODE_LED_OFF       0xF
++
++/* Receive Address */
++#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
++
++/* Interrupt Cause Read */
++#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
++#define E1000_ICR_TXQE          0x00000002 /* Transmit Queue empty */
++#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
++#define E1000_ICR_RXSEQ         0x00000008 /* rx sequence error */
++#define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
++#define E1000_ICR_RXO           0x00000040 /* rx overrun */
++#define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
++#define E1000_ICR_MDAC          0x00000200 /* MDIO access complete */
++#define E1000_ICR_RXCFG         0x00000400 /* RX /c/ ordered set */
++#define E1000_ICR_GPI_EN0       0x00000800 /* GP Int 0 */
++#define E1000_ICR_GPI_EN1       0x00001000 /* GP Int 1 */
++#define E1000_ICR_GPI_EN2       0x00002000 /* GP Int 2 */
++#define E1000_ICR_GPI_EN3       0x00004000 /* GP Int 3 */
++#define E1000_ICR_TXD_LOW       0x00008000
++#define E1000_ICR_SRPD          0x00010000
++#define E1000_ICR_ACK           0x00020000 /* Receive Ack frame */
++#define E1000_ICR_MNG           0x00040000 /* Manageability event */
++#define E1000_ICR_DOCK          0x00080000 /* Dock/Undock */
++#define E1000_ICR_INT_ASSERTED  0x80000000 /* If this bit asserted, the driver should claim the interrupt */
++#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_ICR_HOST_ARB_PAR  0x00400000 /* host arb read buffer parity error */
++#define E1000_ICR_PB_PAR        0x00800000 /* packet buffer parity error */
++#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_ICR_ALL_PARITY    0x03F00000 /* all parity error bits */
++#define E1000_ICR_DSW           0x00000020 /* FW changed the status of DISSW bit in the FWSM */
++#define E1000_ICR_PHYINT        0x00001000 /* LAN connected device generates an interrupt */
++#define E1000_ICR_EPRST         0x00100000 /* ME handware reset occurs */
++
++/* Interrupt Cause Set */
++#define E1000_ICS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_ICS_TXQE      E1000_ICR_TXQE      /* Transmit Queue empty */
++#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_ICS_RXO       E1000_ICR_RXO       /* rx overrun */
++#define E1000_ICS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_ICS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
++#define E1000_ICS_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
++#define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
++#define E1000_ICS_GPI_EN1   E1000_ICR_GPI_EN1   /* GP Int 1 */
++#define E1000_ICS_GPI_EN2   E1000_ICR_GPI_EN2   /* GP Int 2 */
++#define E1000_ICS_GPI_EN3   E1000_ICR_GPI_EN3   /* GP Int 3 */
++#define E1000_ICS_TXD_LOW   E1000_ICR_TXD_LOW
++#define E1000_ICS_SRPD      E1000_ICR_SRPD
++#define E1000_ICS_ACK       E1000_ICR_ACK       /* Receive Ack frame */
++#define E1000_ICS_MNG       E1000_ICR_MNG       /* Manageability event */
++#define E1000_ICS_DOCK      E1000_ICR_DOCK      /* Dock/Undock */
++#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_ICS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR  /* host arb read buffer parity error */
++#define E1000_ICS_PB_PAR        E1000_ICR_PB_PAR        /* packet buffer parity error */
++#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_ICS_DSW       E1000_ICR_DSW
++#define E1000_ICS_PHYINT    E1000_ICR_PHYINT
++#define E1000_ICS_EPRST     E1000_ICR_EPRST
++
++/* Interrupt Mask Set */
++#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_IMS_TXQE      E1000_ICR_TXQE      /* Transmit Queue empty */
++#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_IMS_RXO       E1000_ICR_RXO       /* rx overrun */
++#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
++#define E1000_IMS_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
++#define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
++#define E1000_IMS_GPI_EN1   E1000_ICR_GPI_EN1   /* GP Int 1 */
++#define E1000_IMS_GPI_EN2   E1000_ICR_GPI_EN2   /* GP Int 2 */
++#define E1000_IMS_GPI_EN3   E1000_ICR_GPI_EN3   /* GP Int 3 */
++#define E1000_IMS_TXD_LOW   E1000_ICR_TXD_LOW
++#define E1000_IMS_SRPD      E1000_ICR_SRPD
++#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive Ack frame */
++#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability event */
++#define E1000_IMS_DOCK      E1000_ICR_DOCK      /* Dock/Undock */
++#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_IMS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR  /* host arb read buffer parity error */
++#define E1000_IMS_PB_PAR        E1000_ICR_PB_PAR        /* packet buffer parity error */
++#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_IMS_DSW       E1000_ICR_DSW
++#define E1000_IMS_PHYINT    E1000_ICR_PHYINT
++#define E1000_IMS_EPRST     E1000_ICR_EPRST
++
++/* Interrupt Mask Clear */
++#define E1000_IMC_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_IMC_TXQE      E1000_ICR_TXQE      /* Transmit Queue empty */
++#define E1000_IMC_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_IMC_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
++#define E1000_IMC_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
++#define E1000_IMC_RXO       E1000_ICR_RXO       /* rx overrun */
++#define E1000_IMC_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMC_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
++#define E1000_IMC_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
++#define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
++#define E1000_IMC_GPI_EN1   E1000_ICR_GPI_EN1   /* GP Int 1 */
++#define E1000_IMC_GPI_EN2   E1000_ICR_GPI_EN2   /* GP Int 2 */
++#define E1000_IMC_GPI_EN3   E1000_ICR_GPI_EN3   /* GP Int 3 */
++#define E1000_IMC_TXD_LOW   E1000_ICR_TXD_LOW
++#define E1000_IMC_SRPD      E1000_ICR_SRPD
++#define E1000_IMC_ACK       E1000_ICR_ACK       /* Receive Ack frame */
++#define E1000_IMC_MNG       E1000_ICR_MNG       /* Manageability event */
++#define E1000_IMC_DOCK      E1000_ICR_DOCK      /* Dock/Undock */
++#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */
++#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */
++#define E1000_IMC_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR  /* host arb read buffer parity error */
++#define E1000_IMC_PB_PAR        E1000_ICR_PB_PAR        /* packet buffer parity error */
++#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */
++#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */
++#define E1000_IMC_DSW       E1000_ICR_DSW
++#define E1000_IMC_PHYINT    E1000_ICR_PHYINT
++#define E1000_IMC_EPRST     E1000_ICR_EPRST
++
++/* Receive Control */
++#define E1000_RCTL_RST            0x00000001    /* Software reset */
++#define E1000_RCTL_EN             0x00000002    /* enable */
++#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
++#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
++#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
++#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
++#define E1000_RCTL_LBM_NO         0x00000000    /* no loopback mode */
++#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
++#define E1000_RCTL_LBM_SLP        0x00000080    /* serial link loopback mode */
++#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
++#define E1000_RCTL_DTYP_MASK      0x00000C00    /* Descriptor type mask */
++#define E1000_RCTL_DTYP_PS        0x00000400    /* Packet Split descriptor */
++#define E1000_RCTL_RDMTS_HALF     0x00000000    /* rx desc min threshold size */
++#define E1000_RCTL_RDMTS_QUAT     0x00000100    /* rx desc min threshold size */
++#define E1000_RCTL_RDMTS_EIGTH    0x00000200    /* rx desc min threshold size */
++#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
++#define E1000_RCTL_MO_0           0x00000000    /* multicast offset 11:0 */
++#define E1000_RCTL_MO_1           0x00001000    /* multicast offset 12:1 */
++#define E1000_RCTL_MO_2           0x00002000    /* multicast offset 13:2 */
++#define E1000_RCTL_MO_3           0x00003000    /* multicast offset 15:4 */
++#define E1000_RCTL_MDR            0x00004000    /* multicast desc ring 0 */
++#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
++#define E1000_RCTL_SZ_2048        0x00000000    /* rx buffer size 2048 */
++#define E1000_RCTL_SZ_1024        0x00010000    /* rx buffer size 1024 */
++#define E1000_RCTL_SZ_512         0x00020000    /* rx buffer size 512 */
++#define E1000_RCTL_SZ_256         0x00030000    /* rx buffer size 256 */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
++#define E1000_RCTL_SZ_16384       0x00010000    /* rx buffer size 16384 */
++#define E1000_RCTL_SZ_8192        0x00020000    /* rx buffer size 8192 */
++#define E1000_RCTL_SZ_4096        0x00030000    /* rx buffer size 4096 */
++#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
++#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
++#define E1000_RCTL_CFI            0x00100000    /* canonical form indicator */
++#define E1000_RCTL_DPF            0x00400000    /* discard pause frames */
++#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
++#define E1000_RCTL_BSEX           0x02000000    /* Buffer size extension */
++#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
++#define E1000_RCTL_FLXBUF_MASK    0x78000000    /* Flexible buffer size */
++#define E1000_RCTL_FLXBUF_SHIFT   27            /* Flexible buffer shift */
++
++/* Use byte values for the following shift parameters
++ * Usage:
++ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE0_MASK) |
++ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE1_MASK) |
++ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE2_MASK) |
++ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
++ *                  E1000_PSRCTL_BSIZE3_MASK))
++ * where value0 = [128..16256],  default=256
++ *       value1 = [1024..64512], default=4096
++ *       value2 = [0..64512],    default=4096
++ *       value3 = [0..64512],    default=0
++ */
++
++#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
++#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
++#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
++#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
++
++#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
++#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
++#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
++#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
++
++/* SW_W_SYNC definitions */
++#define E1000_SWFW_EEP_SM     0x0001
++#define E1000_SWFW_PHY0_SM    0x0002
++#define E1000_SWFW_PHY1_SM    0x0004
++#define E1000_SWFW_MAC_CSR_SM 0x0008
++
++/* Receive Descriptor */
++#define E1000_RDT_DELAY 0x0000ffff      /* Delay timer (1=1024us) */
++#define E1000_RDT_FPDB  0x80000000      /* Flush descriptor block */
++#define E1000_RDLEN_LEN 0x0007ff80      /* descriptor length */
++#define E1000_RDH_RDH   0x0000ffff      /* receive descriptor head */
++#define E1000_RDT_RDT   0x0000ffff      /* receive descriptor tail */
++
++/* Flow Control */
++#define E1000_FCRTH_RTH  0x0000FFF8     /* Mask Bits[15:3] for RTH */
++#define E1000_FCRTH_XFCE 0x80000000     /* External Flow Control Enable */
++#define E1000_FCRTL_RTL  0x0000FFF8     /* Mask Bits[15:3] for RTL */
++#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
++
++/* Header split receive */
++#define E1000_RFCTL_ISCSI_DIS           0x00000001
++#define E1000_RFCTL_ISCSI_DWC_MASK      0x0000003E
++#define E1000_RFCTL_ISCSI_DWC_SHIFT     1
++#define E1000_RFCTL_NFSW_DIS            0x00000040
++#define E1000_RFCTL_NFSR_DIS            0x00000080
++#define E1000_RFCTL_NFS_VER_MASK        0x00000300
++#define E1000_RFCTL_NFS_VER_SHIFT       8
++#define E1000_RFCTL_IPV6_DIS            0x00000400
++#define E1000_RFCTL_IPV6_XSUM_DIS       0x00000800
++#define E1000_RFCTL_ACK_DIS             0x00001000
++#define E1000_RFCTL_ACKD_DIS            0x00002000
++#define E1000_RFCTL_IPFRSP_DIS          0x00004000
++#define E1000_RFCTL_EXTEN               0x00008000
++#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
++#define E1000_RFCTL_NEW_IPV6_EXT_DIS    0x00020000
++
++/* Receive Descriptor Control */
++#define E1000_RXDCTL_PTHRESH 0x0000003F /* RXDCTL Prefetch Threshold */
++#define E1000_RXDCTL_HTHRESH 0x00003F00 /* RXDCTL Host Threshold */
++#define E1000_RXDCTL_WTHRESH 0x003F0000 /* RXDCTL Writeback Threshold */
++#define E1000_RXDCTL_GRAN    0x01000000 /* RXDCTL Granularity */
++
++/* Transmit Descriptor Control */
++#define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */
++#define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */
++#define E1000_TXDCTL_WTHRESH 0x00FF0000 /* TXDCTL Writeback Threshold */
++#define E1000_TXDCTL_GRAN    0x01000000 /* TXDCTL Granularity */
++#define E1000_TXDCTL_LWTHRESH 0xFE000000 /* TXDCTL Low Threshold */
++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
++#define E1000_TXDCTL_COUNT_DESC 0x00400000 /* Enable the counting of desc.
++                                              still to be processed. */
++/* Transmit Configuration Word */
++#define E1000_TXCW_FD         0x00000020        /* TXCW full duplex */
++#define E1000_TXCW_HD         0x00000040        /* TXCW half duplex */
++#define E1000_TXCW_PAUSE      0x00000080        /* TXCW sym pause request */
++#define E1000_TXCW_ASM_DIR    0x00000100        /* TXCW astm pause direction */
++#define E1000_TXCW_PAUSE_MASK 0x00000180        /* TXCW pause request mask */
++#define E1000_TXCW_RF         0x00003000        /* TXCW remote fault */
++#define E1000_TXCW_NP         0x00008000        /* TXCW next page */
++#define E1000_TXCW_CW         0x0000ffff        /* TxConfigWord mask */
++#define E1000_TXCW_TXC        0x40000000        /* Transmit Config control */
++#define E1000_TXCW_ANE        0x80000000        /* Auto-neg enable */
++
++/* Receive Configuration Word */
++#define E1000_RXCW_CW    0x0000ffff     /* RxConfigWord mask */
++#define E1000_RXCW_NC    0x04000000     /* Receive config no carrier */
++#define E1000_RXCW_IV    0x08000000     /* Receive config invalid */
++#define E1000_RXCW_CC    0x10000000     /* Receive config change */
++#define E1000_RXCW_C     0x20000000     /* Receive config */
++#define E1000_RXCW_SYNCH 0x40000000     /* Receive config synch */
++#define E1000_RXCW_ANC   0x80000000     /* Auto-neg complete */
++
++/* Transmit Control */
++#define E1000_TCTL_RST    0x00000001    /* software reset */
++#define E1000_TCTL_EN     0x00000002    /* enable tx */
++#define E1000_TCTL_BCE    0x00000004    /* busy check enable */
++#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
++#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
++#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
++#define E1000_TCTL_SWXOFF 0x00400000    /* SW Xoff transmission */
++#define E1000_TCTL_PBE    0x00800000    /* Packet Burst Enable */
++#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
++#define E1000_TCTL_NRTU   0x02000000    /* No Re-transmit on underrun */
++#define E1000_TCTL_MULR   0x10000000    /* Multiple request support */
++/* Extended Transmit Control */
++#define E1000_TCTL_EXT_BST_MASK  0x000003FF /* Backoff Slot Time */
++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */
++
++#define DEFAULT_80003ES2LAN_TCTL_EXT_GCEX   0x00010000
++
++/* Receive Checksum Control */
++#define E1000_RXCSUM_PCSS_MASK 0x000000FF   /* Packet Checksum Start */
++#define E1000_RXCSUM_IPOFL     0x00000100   /* IPv4 checksum offload */
++#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
++#define E1000_RXCSUM_IPV6OFL   0x00000400   /* IPv6 checksum offload */
++#define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
++#define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
++
++/* Multiple Receive Queue Control */
++#define E1000_MRQC_ENABLE_MASK              0x00000003
++#define E1000_MRQC_ENABLE_RSS_2Q            0x00000001
++#define E1000_MRQC_ENABLE_RSS_INT           0x00000004
++#define E1000_MRQC_RSS_FIELD_MASK           0xFFFF0000
++#define E1000_MRQC_RSS_FIELD_IPV4_TCP       0x00010000
++#define E1000_MRQC_RSS_FIELD_IPV4           0x00020000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX    0x00040000
++#define E1000_MRQC_RSS_FIELD_IPV6_EX        0x00080000
++#define E1000_MRQC_RSS_FIELD_IPV6           0x00100000
++#define E1000_MRQC_RSS_FIELD_IPV6_TCP       0x00200000
++
++/* Definitions for power management and wakeup registers */
++/* Wake Up Control */
++#define E1000_WUC_APME       0x00000001 /* APM Enable */
++#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
++#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */
++#define E1000_WUC_APMPME     0x00000008 /* Assert PME on APM Wakeup */
++#define E1000_WUC_SPM        0x80000000 /* Enable SPM */
++
++/* Wake Up Filter Control */
++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
++#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
++#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
++#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
++#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
++#define E1000_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
++#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
++#define E1000_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
++#define E1000_WUFC_IGNORE_TCO      0x00008000 /* Ignore WakeOn TCO packets */
++#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
++#define E1000_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
++#define E1000_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
++#define E1000_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
++#define E1000_WUFC_ALL_FILTERS 0x000F00FF /* Mask for all wakeup filters */
++#define E1000_WUFC_FLX_OFFSET 16       /* Offset to the Flexible Filters bits */
++#define E1000_WUFC_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
++
++/* Wake Up Status */
++#define E1000_WUS_LNKC 0x00000001 /* Link Status Changed */
++#define E1000_WUS_MAG  0x00000002 /* Magic Packet Received */
++#define E1000_WUS_EX   0x00000004 /* Directed Exact Received */
++#define E1000_WUS_MC   0x00000008 /* Directed Multicast Received */
++#define E1000_WUS_BC   0x00000010 /* Broadcast Received */
++#define E1000_WUS_ARP  0x00000020 /* ARP Request Packet Received */
++#define E1000_WUS_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Received */
++#define E1000_WUS_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Received */
++#define E1000_WUS_FLX0 0x00010000 /* Flexible Filter 0 Match */
++#define E1000_WUS_FLX1 0x00020000 /* Flexible Filter 1 Match */
++#define E1000_WUS_FLX2 0x00040000 /* Flexible Filter 2 Match */
++#define E1000_WUS_FLX3 0x00080000 /* Flexible Filter 3 Match */
++#define E1000_WUS_FLX_FILTERS 0x000F0000 /* Mask for the 4 flexible filters */
++
++/* Management Control */
++#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
++#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
++#define E1000_MANC_R_ON_FORCE    0x00000004 /* Reset on Force TCO - RO */
++#define E1000_MANC_RMCP_EN       0x00000100 /* Enable RCMP 026Fh Filtering */
++#define E1000_MANC_0298_EN       0x00000200 /* Enable RCMP 0298h Filtering */
++#define E1000_MANC_IPV4_EN       0x00000400 /* Enable IPv4 */
++#define E1000_MANC_IPV6_EN       0x00000800 /* Enable IPv6 */
++#define E1000_MANC_SNAP_EN       0x00001000 /* Accept LLC/SNAP */
++#define E1000_MANC_ARP_EN        0x00002000 /* Enable ARP Request Filtering */
++#define E1000_MANC_NEIGHBOR_EN   0x00004000 /* Enable Neighbor Discovery
++                                             * Filtering */
++#define E1000_MANC_ARP_RES_EN    0x00008000 /* Enable ARP response Filtering */
++#define E1000_MANC_TCO_RESET     0x00010000 /* TCO Reset Occurred */
++#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
++#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */
++#define E1000_MANC_RCV_ALL       0x00080000 /* Receive All Enabled */
++#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
++#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000 /* Enable MAC address
++                                                    * filtering */
++#define E1000_MANC_EN_MNG2HOST   0x00200000 /* Enable MNG packets to host
++                                             * memory */
++#define E1000_MANC_EN_IP_ADDR_FILTER    0x00400000 /* Enable IP address
++                                                    * filtering */
++#define E1000_MANC_EN_XSUM_FILTER   0x00800000 /* Enable checksum filtering */
++#define E1000_MANC_BR_EN         0x01000000 /* Enable broadcast filtering */
++#define E1000_MANC_SMB_REQ       0x01000000 /* SMBus Request */
++#define E1000_MANC_SMB_GNT       0x02000000 /* SMBus Grant */
++#define E1000_MANC_SMB_CLK_IN    0x04000000 /* SMBus Clock In */
++#define E1000_MANC_SMB_DATA_IN   0x08000000 /* SMBus Data In */
++#define E1000_MANC_SMB_DATA_OUT  0x10000000 /* SMBus Data Out */
++#define E1000_MANC_SMB_CLK_OUT   0x20000000 /* SMBus Clock Out */
++
++#define E1000_MANC_SMB_DATA_OUT_SHIFT  28 /* SMBus Data Out Shift */
++#define E1000_MANC_SMB_CLK_OUT_SHIFT   29 /* SMBus Clock Out Shift */
++
++/* SW Semaphore Register */
++#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
++#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
++#define E1000_SWSM_WMNG         0x00000004 /* Wake MNG Clock */
++#define E1000_SWSM_DRV_LOAD     0x00000008 /* Driver Loaded Bit */
++
++/* FW Semaphore Register */
++#define E1000_FWSM_MODE_MASK    0x0000000E /* FW mode */
++#define E1000_FWSM_MODE_SHIFT            1
++#define E1000_FWSM_FW_VALID     0x00008000 /* FW established a valid mode */
++
++#define E1000_FWSM_RSPCIPHY        0x00000040 /* Reset PHY on PCI reset */
++#define E1000_FWSM_DISSW           0x10000000 /* FW disable SW Write Access */
++#define E1000_FWSM_SKUSEL_MASK     0x60000000 /* LAN SKU select */
++#define E1000_FWSM_SKUEL_SHIFT     29
++#define E1000_FWSM_SKUSEL_EMB      0x0 /* Embedded SKU */
++#define E1000_FWSM_SKUSEL_CONS     0x1 /* Consumer SKU */
++#define E1000_FWSM_SKUSEL_PERF_100 0x2 /* Perf & Corp 10/100 SKU */
++#define E1000_FWSM_SKUSEL_PERF_GBE 0x3 /* Perf & Copr GbE SKU */
++
++/* FFLT Debug Register */
++#define E1000_FFLT_DBG_INVC     0x00100000 /* Invalid /C/ code handling */
++
++typedef enum {
++    e1000_mng_mode_none     = 0,
++    e1000_mng_mode_asf,
++    e1000_mng_mode_pt,
++    e1000_mng_mode_ipmi,
++    e1000_mng_mode_host_interface_only
++} e1000_mng_mode;
++
++/* Host Inteface Control Register */
++#define E1000_HICR_EN           0x00000001  /* Enable Bit - RO */
++#define E1000_HICR_C            0x00000002  /* Driver sets this bit when done
++                                             * to put command in RAM */
++#define E1000_HICR_SV           0x00000004  /* Status Validity */
++#define E1000_HICR_FWR          0x00000080  /* FW reset. Set by the Host */
++
++/* Host Interface Command Interface - Address range 0x8800-0x8EFF */
++#define E1000_HI_MAX_DATA_LENGTH         252 /* Host Interface data length */
++#define E1000_HI_MAX_BLOCK_BYTE_LENGTH  1792 /* Number of bytes in range */
++#define E1000_HI_MAX_BLOCK_DWORD_LENGTH  448 /* Number of dwords in range */
++#define E1000_HI_COMMAND_TIMEOUT         500 /* Time in ms to process HI command */
++
++struct e1000_host_command_header {
++    uint8_t command_id;
++    uint8_t command_length;
++    uint8_t command_options;   /* I/F bits for command, status for return */
++    uint8_t checksum;
++};
++struct e1000_host_command_info {
++    struct e1000_host_command_header command_header;  /* Command Head/Command Result Head has 4 bytes */
++    uint8_t command_data[E1000_HI_MAX_DATA_LENGTH];   /* Command data can length 0..252 */
++};
++
++/* Host SMB register #0 */
++#define E1000_HSMC0R_CLKIN      0x00000001  /* SMB Clock in */
++#define E1000_HSMC0R_DATAIN     0x00000002  /* SMB Data in */
++#define E1000_HSMC0R_DATAOUT    0x00000004  /* SMB Data out */
++#define E1000_HSMC0R_CLKOUT     0x00000008  /* SMB Clock out */
++
++/* Host SMB register #1 */
++#define E1000_HSMC1R_CLKIN      E1000_HSMC0R_CLKIN
++#define E1000_HSMC1R_DATAIN     E1000_HSMC0R_DATAIN
++#define E1000_HSMC1R_DATAOUT    E1000_HSMC0R_DATAOUT
++#define E1000_HSMC1R_CLKOUT     E1000_HSMC0R_CLKOUT
++
++/* FW Status Register */
++#define E1000_FWSTS_FWS_MASK    0x000000FF  /* FW Status */
++
++/* Wake Up Packet Length */
++#define E1000_WUPL_LENGTH_MASK 0x0FFF   /* Only the lower 12 bits are valid */
++
++#define E1000_MDALIGN          4096
++
++/* PCI-Ex registers*/
++
++/* PCI-Ex Control Register */
++#define E1000_GCR_RXD_NO_SNOOP          0x00000001
++#define E1000_GCR_RXDSCW_NO_SNOOP       0x00000002
++#define E1000_GCR_RXDSCR_NO_SNOOP       0x00000004
++#define E1000_GCR_TXD_NO_SNOOP          0x00000008
++#define E1000_GCR_TXDSCW_NO_SNOOP       0x00000010
++#define E1000_GCR_TXDSCR_NO_SNOOP       0x00000020
++
++#define PCI_EX_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP         | \
++                             E1000_GCR_RXDSCW_NO_SNOOP      | \
++                             E1000_GCR_RXDSCR_NO_SNOOP      | \
++                             E1000_GCR_TXD_NO_SNOOP         | \
++                             E1000_GCR_TXDSCW_NO_SNOOP      | \
++                             E1000_GCR_TXDSCR_NO_SNOOP)
++
++#define PCI_EX_82566_SNOOP_ALL PCI_EX_NO_SNOOP_ALL
++
++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
++/* Function Active and Power State to MNG */
++#define E1000_FACTPS_FUNC0_POWER_STATE_MASK         0x00000003
++#define E1000_FACTPS_LAN0_VALID                     0x00000004
++#define E1000_FACTPS_FUNC0_AUX_EN                   0x00000008
++#define E1000_FACTPS_FUNC1_POWER_STATE_MASK         0x000000C0
++#define E1000_FACTPS_FUNC1_POWER_STATE_SHIFT        6
++#define E1000_FACTPS_LAN1_VALID                     0x00000100
++#define E1000_FACTPS_FUNC1_AUX_EN                   0x00000200
++#define E1000_FACTPS_FUNC2_POWER_STATE_MASK         0x00003000
++#define E1000_FACTPS_FUNC2_POWER_STATE_SHIFT        12
++#define E1000_FACTPS_IDE_ENABLE                     0x00004000
++#define E1000_FACTPS_FUNC2_AUX_EN                   0x00008000
++#define E1000_FACTPS_FUNC3_POWER_STATE_MASK         0x000C0000
++#define E1000_FACTPS_FUNC3_POWER_STATE_SHIFT        18
++#define E1000_FACTPS_SP_ENABLE                      0x00100000
++#define E1000_FACTPS_FUNC3_AUX_EN                   0x00200000
++#define E1000_FACTPS_FUNC4_POWER_STATE_MASK         0x03000000
++#define E1000_FACTPS_FUNC4_POWER_STATE_SHIFT        24
++#define E1000_FACTPS_IPMI_ENABLE                    0x04000000
++#define E1000_FACTPS_FUNC4_AUX_EN                   0x08000000
++#define E1000_FACTPS_MNGCG                          0x20000000
++#define E1000_FACTPS_LAN_FUNC_SEL                   0x40000000
++#define E1000_FACTPS_PM_STATE_CHANGED               0x80000000
++
++/* EEPROM Commands - Microwire */
++#define EEPROM_READ_OPCODE_MICROWIRE  0x6  /* EEPROM read opcode */
++#define EEPROM_WRITE_OPCODE_MICROWIRE 0x5  /* EEPROM write opcode */
++#define EEPROM_ERASE_OPCODE_MICROWIRE 0x7  /* EEPROM erase opcode */
++#define EEPROM_EWEN_OPCODE_MICROWIRE  0x13 /* EEPROM erase/write enable */
++#define EEPROM_EWDS_OPCODE_MICROWIRE  0x10 /* EEPROM erast/write disable */
++
++/* EEPROM Commands - SPI */
++#define EEPROM_MAX_RETRY_SPI        5000 /* Max wait of 5ms, for RDY signal */
++#define EEPROM_READ_OPCODE_SPI      0x03  /* EEPROM read opcode */
++#define EEPROM_WRITE_OPCODE_SPI     0x02  /* EEPROM write opcode */
++#define EEPROM_A8_OPCODE_SPI        0x08  /* opcode bit-3 = address bit-8 */
++#define EEPROM_WREN_OPCODE_SPI      0x06  /* EEPROM set Write Enable latch */
++#define EEPROM_WRDI_OPCODE_SPI      0x04  /* EEPROM reset Write Enable latch */
++#define EEPROM_RDSR_OPCODE_SPI      0x05  /* EEPROM read Status register */
++#define EEPROM_WRSR_OPCODE_SPI      0x01  /* EEPROM write Status register */
++#define EEPROM_ERASE4K_OPCODE_SPI   0x20  /* EEPROM ERASE 4KB */
++#define EEPROM_ERASE64K_OPCODE_SPI  0xD8  /* EEPROM ERASE 64KB */
++#define EEPROM_ERASE256_OPCODE_SPI  0xDB  /* EEPROM ERASE 256B */
++
++/* EEPROM Size definitions */
++#define EEPROM_WORD_SIZE_SHIFT  6
++#define EEPROM_SIZE_SHIFT       10
++#define EEPROM_SIZE_MASK        0x1C00
++
++/* EEPROM Word Offsets */
++#define EEPROM_COMPAT                 0x0003
++#define EEPROM_ID_LED_SETTINGS        0x0004
++#define EEPROM_VERSION                0x0005
++#define EEPROM_SERDES_AMPLITUDE       0x0006 /* For SERDES output amplitude adjustment. */
++#define EEPROM_PHY_CLASS_WORD         0x0007
++#define EEPROM_INIT_CONTROL1_REG      0x000A
++#define EEPROM_INIT_CONTROL2_REG      0x000F
++#define EEPROM_SWDEF_PINS_CTRL_PORT_1 0x0010
++#define EEPROM_INIT_CONTROL3_PORT_B   0x0014
++#define EEPROM_INIT_3GIO_3            0x001A
++#define EEPROM_SWDEF_PINS_CTRL_PORT_0 0x0020
++#define EEPROM_INIT_CONTROL3_PORT_A   0x0024
++#define EEPROM_CFG                    0x0012
++#define EEPROM_FLASH_VERSION          0x0032
++#define EEPROM_CHECKSUM_REG           0x003F
++
++#define E1000_EEPROM_CFG_DONE         0x00040000   /* MNG config cycle done */
++#define E1000_EEPROM_CFG_DONE_PORT_1  0x00080000   /* ...for second port */
++
++/* Word definitions for ID LED Settings */
++#define ID_LED_RESERVED_0000 0x0000
++#define ID_LED_RESERVED_FFFF 0xFFFF
++#define ID_LED_RESERVED_82573  0xF746
++#define ID_LED_DEFAULT_82573   0x1811
++#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2 << 12) | \
++                              (ID_LED_OFF1_OFF2 << 8) | \
++                              (ID_LED_DEF1_DEF2 << 4) | \
++                              (ID_LED_DEF1_DEF2))
++#define ID_LED_DEFAULT_ICH8LAN  ((ID_LED_DEF1_DEF2 << 12) | \
++                                 (ID_LED_DEF1_OFF2 <<  8) | \
++                                 (ID_LED_DEF1_ON2  <<  4) | \
++                                 (ID_LED_DEF1_DEF2))
++#define ID_LED_DEF1_DEF2     0x1
++#define ID_LED_DEF1_ON2      0x2
++#define ID_LED_DEF1_OFF2     0x3
++#define ID_LED_ON1_DEF2      0x4
++#define ID_LED_ON1_ON2       0x5
++#define ID_LED_ON1_OFF2      0x6
++#define ID_LED_OFF1_DEF2     0x7
++#define ID_LED_OFF1_ON2      0x8
++#define ID_LED_OFF1_OFF2     0x9
++
++#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
++#define IGP_ACTIVITY_LED_ENABLE 0x0300
++#define IGP_LED3_MODE           0x07000000
++
++
++/* Mask bits for SERDES amplitude adjustment in Word 6 of the EEPROM */
++#define EEPROM_SERDES_AMPLITUDE_MASK  0x000F
++
++/* Mask bit for PHY class in Word 7 of the EEPROM */
++#define EEPROM_PHY_CLASS_A   0x8000
++
++/* Mask bits for fields in Word 0x0a of the EEPROM */
++#define EEPROM_WORD0A_ILOS   0x0010
++#define EEPROM_WORD0A_SWDPIO 0x01E0
++#define EEPROM_WORD0A_LRST   0x0200
++#define EEPROM_WORD0A_FD     0x0400
++#define EEPROM_WORD0A_66MHZ  0x0800
++
++/* Mask bits for fields in Word 0x0f of the EEPROM */
++#define EEPROM_WORD0F_PAUSE_MASK 0x3000
++#define EEPROM_WORD0F_PAUSE      0x1000
++#define EEPROM_WORD0F_ASM_DIR    0x2000
++#define EEPROM_WORD0F_ANE        0x0800
++#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0
++#define EEPROM_WORD0F_LPLU       0x0001
++
++/* Mask bits for fields in Word 0x10/0x20 of the EEPROM */
++#define EEPROM_WORD1020_GIGA_DISABLE         0x0010
++#define EEPROM_WORD1020_GIGA_DISABLE_NON_D0A 0x0008
++
++/* Mask bits for fields in Word 0x1a of the EEPROM */
++#define EEPROM_WORD1A_ASPM_MASK  0x000C
++
++/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */
++#define EEPROM_SUM 0xBABA
++
++/* EEPROM Map defines (WORD OFFSETS)*/
++#define EEPROM_NODE_ADDRESS_BYTE_0 0
++#define EEPROM_PBA_BYTE_1          8
++
++#define EEPROM_RESERVED_WORD          0xFFFF
++
++/* EEPROM Map Sizes (Byte Counts) */
++#define PBA_SIZE 4
++
++/* Collision related configuration parameters */
++#define E1000_COLLISION_THRESHOLD       15
++#define E1000_CT_SHIFT                  4
++/* Collision distance is a 0-based value that applies to
++ * half-duplex-capable hardware only. */
++#define E1000_COLLISION_DISTANCE        63
++#define E1000_COLLISION_DISTANCE_82542  64
++#define E1000_FDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
++#define E1000_HDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
++#define E1000_COLD_SHIFT                12
++
++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
++#define REQ_TX_DESCRIPTOR_MULTIPLE  8
++#define REQ_RX_DESCRIPTOR_MULTIPLE  8
++
++/* Default values for the transmit IPG register */
++#define DEFAULT_82542_TIPG_IPGT        10
++#define DEFAULT_82543_TIPG_IPGT_FIBER  9
++#define DEFAULT_82543_TIPG_IPGT_COPPER 8
++
++#define E1000_TIPG_IPGT_MASK  0x000003FF
++#define E1000_TIPG_IPGR1_MASK 0x000FFC00
++#define E1000_TIPG_IPGR2_MASK 0x3FF00000
++
++#define DEFAULT_82542_TIPG_IPGR1 2
++#define DEFAULT_82543_TIPG_IPGR1 8
++#define E1000_TIPG_IPGR1_SHIFT  10
++
++#define DEFAULT_82542_TIPG_IPGR2 10
++#define DEFAULT_82543_TIPG_IPGR2 6
++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7
++#define E1000_TIPG_IPGR2_SHIFT  20
++
++#define DEFAULT_80003ES2LAN_TIPG_IPGT_10_100 0x00000009
++#define DEFAULT_80003ES2LAN_TIPG_IPGT_1000   0x00000008
++#define E1000_TXDMAC_DPP 0x00000001
++
++/* Adaptive IFS defines */
++#define TX_THRESHOLD_START     8
++#define TX_THRESHOLD_INCREMENT 10
++#define TX_THRESHOLD_DECREMENT 1
++#define TX_THRESHOLD_STOP      190
++#define TX_THRESHOLD_DISABLE   0
++#define TX_THRESHOLD_TIMER_MS  10000
++#define MIN_NUM_XMITS          1000
++#define IFS_MAX                80
++#define IFS_STEP               10
++#define IFS_MIN                40
++#define IFS_RATIO              4
++
++/* Extended Configuration Control and Size */
++#define E1000_EXTCNF_CTRL_PCIE_WRITE_ENABLE 0x00000001
++#define E1000_EXTCNF_CTRL_PHY_WRITE_ENABLE  0x00000002
++#define E1000_EXTCNF_CTRL_D_UD_ENABLE       0x00000004
++#define E1000_EXTCNF_CTRL_D_UD_LATENCY      0x00000008
++#define E1000_EXTCNF_CTRL_D_UD_OWNER        0x00000010
++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020
++#define E1000_EXTCNF_CTRL_MDIO_HW_OWNERSHIP 0x00000040
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER   0x0FFF0000
++
++#define E1000_EXTCNF_SIZE_EXT_PHY_LENGTH    0x000000FF
++#define E1000_EXTCNF_SIZE_EXT_DOCK_LENGTH   0x0000FF00
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH   0x00FF0000
++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE  0x00000001
++#define E1000_EXTCNF_CTRL_SWFLAG            0x00000020
++
++/* PBA constants */
++#define E1000_PBA_8K 0x0008    /* 8KB, default Rx allocation */
++#define E1000_PBA_12K 0x000C    /* 12KB, default Rx allocation */
++#define E1000_PBA_16K 0x0010    /* 16KB, default TX allocation */
++#define E1000_PBA_22K 0x0016
++#define E1000_PBA_24K 0x0018
++#define E1000_PBA_30K 0x001E
++#define E1000_PBA_32K 0x0020
++#define E1000_PBA_34K 0x0022
++#define E1000_PBA_38K 0x0026
++#define E1000_PBA_40K 0x0028
++#define E1000_PBA_48K 0x0030    /* 48KB, default RX allocation */
++
++#define E1000_PBS_16K E1000_PBA_16K
++
++/* Flow Control Constants */
++#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
++#define FLOW_CONTROL_TYPE         0x8808
++
++/* The historical defaults for the flow control values are given below. */
++#define FC_DEFAULT_HI_THRESH        (0x8000)    /* 32KB */
++#define FC_DEFAULT_LO_THRESH        (0x4000)    /* 16KB */
++#define FC_DEFAULT_TX_TIMER         (0x100)     /* ~130 us */
++
++/* PCIX Config space */
++#define PCIX_COMMAND_REGISTER    0xE6
++#define PCIX_STATUS_REGISTER_LO  0xE8
++#define PCIX_STATUS_REGISTER_HI  0xEA
++
++#define PCIX_COMMAND_MMRBC_MASK      0x000C
++#define PCIX_COMMAND_MMRBC_SHIFT     0x2
++#define PCIX_STATUS_HI_MMRBC_MASK    0x0060
++#define PCIX_STATUS_HI_MMRBC_SHIFT   0x5
++#define PCIX_STATUS_HI_MMRBC_4K      0x3
++#define PCIX_STATUS_HI_MMRBC_2K      0x2
++
++
++/* Number of bits required to shift right the "pause" bits from the
++ * EEPROM (bits 13:12) to the "pause" (bits 8:7) field in the TXCW register.
++ */
++#define PAUSE_SHIFT 5
++
++/* Number of bits required to shift left the "SWDPIO" bits from the
++ * EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field in the CTRL register.
++ */
++#define SWDPIO_SHIFT 17
++
++/* Number of bits required to shift left the "SWDPIO_EXT" bits from the
++ * EEPROM word F (bits 7:4) to the bits 11:8 of The Extended CTRL register.
++ */
++#define SWDPIO__EXT_SHIFT 4
++
++/* Number of bits required to shift left the "ILOS" bit from the EEPROM
++ * (bit 4) to the "ILOS" (bit 7) field in the CTRL register.
++ */
++#define ILOS_SHIFT  3
++
++
++#define RECEIVE_BUFFER_ALIGN_SIZE  (256)
++
++/* Number of milliseconds we wait for auto-negotiation to complete */
++#define LINK_UP_TIMEOUT             500
++
++/* Number of 100 microseconds we wait for PCI Express master disable */
++#define MASTER_DISABLE_TIMEOUT      800
++/* Number of milliseconds we wait for Eeprom auto read bit done after MAC reset */
++#define AUTO_READ_DONE_TIMEOUT      10
++/* Number of milliseconds we wait for PHY configuration done after MAC reset */
++#define PHY_CFG_TIMEOUT             100
++
++#define E1000_TX_BUFFER_SIZE ((uint32_t)1514)
++
++/* The carrier extension symbol, as received by the NIC. */
++#define CARRIER_EXTENSION   0x0F
++
++/* TBI_ACCEPT macro definition:
++ *
++ * This macro requires:
++ *      adapter = a pointer to struct e1000_hw
++ *      status = the 8 bit status field of the RX descriptor with EOP set
++ *      error = the 8 bit error field of the RX descriptor with EOP set
++ *      length = the sum of all the length fields of the RX descriptors that
++ *               make up the current frame
++ *      last_byte = the last byte of the frame DMAed by the hardware
++ *      max_frame_length = the maximum frame length we want to accept.
++ *      min_frame_length = the minimum frame length we want to accept.
++ *
++ * This macro is a conditional that should be used in the interrupt
++ * handler's Rx processing routine when RxErrors have been detected.
++ *
++ * Typical use:
++ *  ...
++ *  if (TBI_ACCEPT) {
++ *      accept_frame = TRUE;
++ *      e1000_tbi_adjust_stats(adapter, MacAddress);
++ *      frame_length--;
++ *  } else {
++ *      accept_frame = FALSE;
++ *  }
++ *  ...
++ */
++
++#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \
++    ((adapter)->tbi_compatibility_on && \
++     (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
++     ((last_byte) == CARRIER_EXTENSION) && \
++     (((status) & E1000_RXD_STAT_VP) ? \
++          (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \
++           ((length) <= ((adapter)->max_frame_size + 1))) : \
++          (((length) > (adapter)->min_frame_size) && \
++           ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1)))))
++
++
++/* Structures, enums, and macros for the PHY */
++
++/* Bit definitions for the Management Data IO (MDIO) and Management Data
++ * Clock (MDC) pins in the Device Control Register.
++ */
++#define E1000_CTRL_PHY_RESET_DIR  E1000_CTRL_SWDPIO0
++#define E1000_CTRL_PHY_RESET      E1000_CTRL_SWDPIN0
++#define E1000_CTRL_MDIO_DIR       E1000_CTRL_SWDPIO2
++#define E1000_CTRL_MDIO           E1000_CTRL_SWDPIN2
++#define E1000_CTRL_MDC_DIR        E1000_CTRL_SWDPIO3
++#define E1000_CTRL_MDC            E1000_CTRL_SWDPIN3
++#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR
++#define E1000_CTRL_PHY_RESET4     E1000_CTRL_EXT_SDP4_DATA
++
++/* PHY 1000 MII Register/Bit Definitions */
++/* PHY Registers defined by IEEE */
++#define PHY_CTRL         0x00 /* Control Register */
++#define PHY_STATUS       0x01 /* Status Regiser */
++#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
++#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
++#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
++#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
++#define PHY_AUTONEG_EXP  0x06 /* Autoneg Expansion Reg */
++#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */
++#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */
++#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
++#define PHY_EXT_STATUS   0x0F /* Extended Status Reg */
++
++#define MAX_PHY_REG_ADDRESS        0x1F  /* 5 bit address bus (0-0x1F) */
++#define MAX_PHY_MULTI_PAGE_REG     0xF   /* Registers equal on all pages */
++
++/* M88E1000 Specific Registers */
++#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
++#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
++#define M88E1000_INT_ENABLE        0x12  /* Interrupt Enable Register */
++#define M88E1000_INT_STATUS        0x13  /* Interrupt Status Register */
++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
++#define M88E1000_RX_ERR_CNTR       0x15  /* Receive Error Counter */
++
++#define M88E1000_PHY_EXT_CTRL      0x1A  /* PHY extend control register */
++#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
++#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
++#define M88E1000_PHY_VCO_REG_BIT8  0x100 /* Bits 8 & 11 are adjusted for */
++#define M88E1000_PHY_VCO_REG_BIT11 0x800    /* improved BER performance */
++
++#define IGP01E1000_IEEE_REGS_PAGE  0x0000
++#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300
++#define IGP01E1000_IEEE_FORCE_GIGA      0x0140
++
++/* IGP01E1000 Specific Registers */
++#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* PHY Specific Port Config Register */
++#define IGP01E1000_PHY_PORT_STATUS 0x11 /* PHY Specific Status Register */
++#define IGP01E1000_PHY_PORT_CTRL   0x12 /* PHY Specific Control Register */
++#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health Register */
++#define IGP01E1000_GMII_FIFO       0x14 /* GMII FIFO Register */
++#define IGP01E1000_PHY_CHANNEL_QUALITY 0x15 /* PHY Channel Quality Register */
++#define IGP02E1000_PHY_POWER_MGMT      0x19
++#define IGP01E1000_PHY_PAGE_SELECT     0x1F /* PHY Page Select Core Register */
++
++/* IGP01E1000 AGC Registers - stores the cable length values*/
++#define IGP01E1000_PHY_AGC_A        0x1172
++#define IGP01E1000_PHY_AGC_B        0x1272
++#define IGP01E1000_PHY_AGC_C        0x1472
++#define IGP01E1000_PHY_AGC_D        0x1872
++
++/* IGP02E1000 AGC Registers for cable length values */
++#define IGP02E1000_PHY_AGC_A        0x11B1
++#define IGP02E1000_PHY_AGC_B        0x12B1
++#define IGP02E1000_PHY_AGC_C        0x14B1
++#define IGP02E1000_PHY_AGC_D        0x18B1
++
++/* IGP01E1000 DSP Reset Register */
++#define IGP01E1000_PHY_DSP_RESET   0x1F33
++#define IGP01E1000_PHY_DSP_SET     0x1F71
++#define IGP01E1000_PHY_DSP_FFE     0x1F35
++
++#define IGP01E1000_PHY_CHANNEL_NUM    4
++#define IGP02E1000_PHY_CHANNEL_NUM    4
++
++#define IGP01E1000_PHY_AGC_PARAM_A    0x1171
++#define IGP01E1000_PHY_AGC_PARAM_B    0x1271
++#define IGP01E1000_PHY_AGC_PARAM_C    0x1471
++#define IGP01E1000_PHY_AGC_PARAM_D    0x1871
++
++#define IGP01E1000_PHY_EDAC_MU_INDEX        0xC000
++#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000
++
++#define IGP01E1000_PHY_ANALOG_TX_STATE      0x2890
++#define IGP01E1000_PHY_ANALOG_CLASS_A       0x2000
++#define IGP01E1000_PHY_FORCE_ANALOG_ENABLE  0x0004
++#define IGP01E1000_PHY_DSP_FFE_CM_CP        0x0069
++
++#define IGP01E1000_PHY_DSP_FFE_DEFAULT      0x002A
++/* IGP01E1000 PCS Initialization register - stores the polarity status when
++ * speed = 1000 Mbps. */
++#define IGP01E1000_PHY_PCS_INIT_REG  0x00B4
++#define IGP01E1000_PHY_PCS_CTRL_REG  0x00B5
++
++#define IGP01E1000_ANALOG_REGS_PAGE  0x20C0
++
++/* Bits...
++ * 15-5: page
++ * 4-0: register offset
++ */
++#define GG82563_PAGE_SHIFT        5
++#define GG82563_REG(page, reg)    \
++        (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
++#define GG82563_MIN_ALT_REG       30
++
++/* GG82563 Specific Registers */
++#define GG82563_PHY_SPEC_CTRL           \
++        GG82563_REG(0, 16) /* PHY Specific Control */
++#define GG82563_PHY_SPEC_STATUS         \
++        GG82563_REG(0, 17) /* PHY Specific Status */
++#define GG82563_PHY_INT_ENABLE          \
++        GG82563_REG(0, 18) /* Interrupt Enable */
++#define GG82563_PHY_SPEC_STATUS_2       \
++        GG82563_REG(0, 19) /* PHY Specific Status 2 */
++#define GG82563_PHY_RX_ERR_CNTR         \
++        GG82563_REG(0, 21) /* Receive Error Counter */
++#define GG82563_PHY_PAGE_SELECT         \
++        GG82563_REG(0, 22) /* Page Select */
++#define GG82563_PHY_SPEC_CTRL_2         \
++        GG82563_REG(0, 26) /* PHY Specific Control 2 */
++#define GG82563_PHY_PAGE_SELECT_ALT     \
++        GG82563_REG(0, 29) /* Alternate Page Select */
++#define GG82563_PHY_TEST_CLK_CTRL       \
++        GG82563_REG(0, 30) /* Test Clock Control (use reg. 29 to select) */
++
++#define GG82563_PHY_MAC_SPEC_CTRL       \
++        GG82563_REG(2, 21) /* MAC Specific Control Register */
++#define GG82563_PHY_MAC_SPEC_CTRL_2     \
++        GG82563_REG(2, 26) /* MAC Specific Control 2 */
++
++#define GG82563_PHY_DSP_DISTANCE    \
++        GG82563_REG(5, 26) /* DSP Distance */
++
++/* Page 193 - Port Control Registers */
++#define GG82563_PHY_KMRN_MODE_CTRL   \
++        GG82563_REG(193, 16) /* Kumeran Mode Control */
++#define GG82563_PHY_PORT_RESET          \
++        GG82563_REG(193, 17) /* Port Reset */
++#define GG82563_PHY_REVISION_ID         \
++        GG82563_REG(193, 18) /* Revision ID */
++#define GG82563_PHY_DEVICE_ID           \
++        GG82563_REG(193, 19) /* Device ID */
++#define GG82563_PHY_PWR_MGMT_CTRL       \
++        GG82563_REG(193, 20) /* Power Management Control */
++#define GG82563_PHY_RATE_ADAPT_CTRL     \
++        GG82563_REG(193, 25) /* Rate Adaptation Control */
++
++/* Page 194 - KMRN Registers */
++#define GG82563_PHY_KMRN_FIFO_CTRL_STAT \
++        GG82563_REG(194, 16) /* FIFO's Control/Status */
++#define GG82563_PHY_KMRN_CTRL           \
++        GG82563_REG(194, 17) /* Control */
++#define GG82563_PHY_INBAND_CTRL         \
++        GG82563_REG(194, 18) /* Inband Control */
++#define GG82563_PHY_KMRN_DIAGNOSTIC     \
++        GG82563_REG(194, 19) /* Diagnostic */
++#define GG82563_PHY_ACK_TIMEOUTS        \
++        GG82563_REG(194, 20) /* Acknowledge Timeouts */
++#define GG82563_PHY_ADV_ABILITY         \
++        GG82563_REG(194, 21) /* Advertised Ability */
++#define GG82563_PHY_LINK_PARTNER_ADV_ABILITY \
++        GG82563_REG(194, 23) /* Link Partner Advertised Ability */
++#define GG82563_PHY_ADV_NEXT_PAGE       \
++        GG82563_REG(194, 24) /* Advertised Next Page */
++#define GG82563_PHY_LINK_PARTNER_ADV_NEXT_PAGE \
++        GG82563_REG(194, 25) /* Link Partner Advertised Next page */
++#define GG82563_PHY_KMRN_MISC           \
++        GG82563_REG(194, 26) /* Misc. */
++
++/* PHY Control Register */
++#define MII_CR_SPEED_SELECT_MSB 0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
++#define MII_CR_COLL_TEST_ENABLE 0x0080  /* Collision test enable */
++#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
++#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
++#define MII_CR_ISOLATE          0x0400  /* Isolate PHY from MII */
++#define MII_CR_POWER_DOWN       0x0800  /* Power down */
++#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
++#define MII_CR_SPEED_SELECT_LSB 0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
++#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
++#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
++
++/* PHY Status Register */
++#define MII_SR_EXTENDED_CAPS     0x0001 /* Extended register capabilities */
++#define MII_SR_JABBER_DETECT     0x0002 /* Jabber Detected */
++#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
++#define MII_SR_AUTONEG_CAPS      0x0008 /* Auto Neg Capable */
++#define MII_SR_REMOTE_FAULT      0x0010 /* Remote Fault Detect */
++#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
++#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */
++#define MII_SR_EXTENDED_STATUS   0x0100 /* Ext. status info in Reg 0x0F */
++#define MII_SR_100T2_HD_CAPS     0x0200 /* 100T2 Half Duplex Capable */
++#define MII_SR_100T2_FD_CAPS     0x0400 /* 100T2 Full Duplex Capable */
++#define MII_SR_10T_HD_CAPS       0x0800 /* 10T   Half Duplex Capable */
++#define MII_SR_10T_FD_CAPS       0x1000 /* 10T   Full Duplex Capable */
++#define MII_SR_100X_HD_CAPS      0x2000 /* 100X  Half Duplex Capable */
++#define MII_SR_100X_FD_CAPS      0x4000 /* 100X  Full Duplex Capable */
++#define MII_SR_100T4_CAPS        0x8000 /* 100T4 Capable */
++
++/* Autoneg Advertisement Register */
++#define NWAY_AR_SELECTOR_FIELD 0x0001   /* indicates IEEE 802.3 CSMA/CD */
++#define NWAY_AR_10T_HD_CAPS    0x0020   /* 10T   Half Duplex Capable */
++#define NWAY_AR_10T_FD_CAPS    0x0040   /* 10T   Full Duplex Capable */
++#define NWAY_AR_100TX_HD_CAPS  0x0080   /* 100TX Half Duplex Capable */
++#define NWAY_AR_100TX_FD_CAPS  0x0100   /* 100TX Full Duplex Capable */
++#define NWAY_AR_100T4_CAPS     0x0200   /* 100T4 Capable */
++#define NWAY_AR_PAUSE          0x0400   /* Pause operation desired */
++#define NWAY_AR_ASM_DIR        0x0800   /* Asymmetric Pause Direction bit */
++#define NWAY_AR_REMOTE_FAULT   0x2000   /* Remote Fault detected */
++#define NWAY_AR_NEXT_PAGE      0x8000   /* Next Page ability supported */
++
++/* Link Partner Ability Register (Base Page) */
++#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */
++#define NWAY_LPAR_10T_HD_CAPS    0x0020 /* LP is 10T   Half Duplex Capable */
++#define NWAY_LPAR_10T_FD_CAPS    0x0040 /* LP is 10T   Full Duplex Capable */
++#define NWAY_LPAR_100TX_HD_CAPS  0x0080 /* LP is 100TX Half Duplex Capable */
++#define NWAY_LPAR_100TX_FD_CAPS  0x0100 /* LP is 100TX Full Duplex Capable */
++#define NWAY_LPAR_100T4_CAPS     0x0200 /* LP is 100T4 Capable */
++#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
++#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
++#define NWAY_LPAR_REMOTE_FAULT   0x2000 /* LP has detected Remote Fault */
++#define NWAY_LPAR_ACKNOWLEDGE    0x4000 /* LP has rx'd link code word */
++#define NWAY_LPAR_NEXT_PAGE      0x8000 /* Next Page ability supported */
++
++/* Autoneg Expansion Register */
++#define NWAY_ER_LP_NWAY_CAPS      0x0001 /* LP has Auto Neg Capability */
++#define NWAY_ER_PAGE_RXD          0x0002 /* LP is 10T   Half Duplex Capable */
++#define NWAY_ER_NEXT_PAGE_CAPS    0x0004 /* LP is 10T   Full Duplex Capable */
++#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP is 100TX Half Duplex Capable */
++#define NWAY_ER_PAR_DETECT_FAULT  0x0010 /* LP is 100TX Full Duplex Capable */
++
++/* Next Page TX Register */
++#define NPTX_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
++#define NPTX_TOGGLE         0x0800 /* Toggles between exchanges
++                                    * of different NP
++                                    */
++#define NPTX_ACKNOWLDGE2    0x1000 /* 1 = will comply with msg
++                                    * 0 = cannot comply with msg
++                                    */
++#define NPTX_MSG_PAGE       0x2000 /* formatted(1)/unformatted(0) pg */
++#define NPTX_NEXT_PAGE      0x8000 /* 1 = addition NP will follow
++                                    * 0 = sending last NP
++                                    */
++
++/* Link Partner Next Page Register */
++#define LP_RNPR_MSG_CODE_FIELD 0x0001 /* NP msg code or unformatted data */
++#define LP_RNPR_TOGGLE         0x0800 /* Toggles between exchanges
++                                       * of different NP
++                                       */
++#define LP_RNPR_ACKNOWLDGE2    0x1000 /* 1 = will comply with msg
++                                       * 0 = cannot comply with msg
++                                       */
++#define LP_RNPR_MSG_PAGE       0x2000  /* formatted(1)/unformatted(0) pg */
++#define LP_RNPR_ACKNOWLDGE     0x4000  /* 1 = ACK / 0 = NO ACK */
++#define LP_RNPR_NEXT_PAGE      0x8000  /* 1 = addition NP will follow
++                                        * 0 = sending last NP
++                                        */
++
++/* 1000BASE-T Control Register */
++#define CR_1000T_ASYM_PAUSE      0x0080 /* Advertise asymmetric pause bit */
++#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
++#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
++#define CR_1000T_REPEATER_DTE    0x0400 /* 1=Repeater/switch device port */
++                                        /* 0=DTE device */
++#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
++                                        /* 0=Configure PHY as Slave */
++#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
++                                        /* 0=Automatic Master/Slave config */
++#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */
++#define CR_1000T_TEST_MODE_1     0x2000 /* Transmit Waveform test */
++#define CR_1000T_TEST_MODE_2     0x4000 /* Master Transmit Jitter test */
++#define CR_1000T_TEST_MODE_3     0x6000 /* Slave Transmit Jitter test */
++#define CR_1000T_TEST_MODE_4     0x8000 /* Transmitter Distortion test */
++
++/* 1000BASE-T Status Register */
++#define SR_1000T_IDLE_ERROR_CNT   0x00FF /* Num idle errors since last read */
++#define SR_1000T_ASYM_PAUSE_DIR   0x0100 /* LP asymmetric pause direction bit */
++#define SR_1000T_LP_HD_CAPS       0x0400 /* LP is 1000T HD capable */
++#define SR_1000T_LP_FD_CAPS       0x0800 /* LP is 1000T FD capable */
++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
++#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
++#define SR_1000T_MS_CONFIG_RES    0x4000 /* 1=Local TX is Master, 0=Slave */
++#define SR_1000T_MS_CONFIG_FAULT  0x8000 /* Master/Slave config fault */
++#define SR_1000T_REMOTE_RX_STATUS_SHIFT          12
++#define SR_1000T_LOCAL_RX_STATUS_SHIFT           13
++#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT    5
++#define FFE_IDLE_ERR_COUNT_TIMEOUT_20            20
++#define FFE_IDLE_ERR_COUNT_TIMEOUT_100           100
++
++/* Extended Status Register */
++#define IEEE_ESR_1000T_HD_CAPS 0x1000 /* 1000T HD capable */
++#define IEEE_ESR_1000T_FD_CAPS 0x2000 /* 1000T FD capable */
++#define IEEE_ESR_1000X_HD_CAPS 0x4000 /* 1000X HD capable */
++#define IEEE_ESR_1000X_FD_CAPS 0x8000 /* 1000X FD capable */
++
++#define PHY_TX_POLARITY_MASK   0x0100 /* register 10h bit 8 (polarity bit) */
++#define PHY_TX_NORMAL_POLARITY 0      /* register 10h bit 8 (normal polarity) */
++
++#define AUTO_POLARITY_DISABLE  0x0010 /* register 11h bit 4 */
++                                      /* (0=enable, 1=disable) */
++
++/* M88E1000 PHY Specific Control Register */
++#define M88E1000_PSCR_JABBER_DISABLE    0x0001 /* 1=Jabber Function disabled */
++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
++#define M88E1000_PSCR_SQE_TEST          0x0004 /* 1=SQE Test enabled */
++#define M88E1000_PSCR_CLK125_DISABLE    0x0010 /* 1=CLK125 low,
++                                                * 0=CLK125 toggling
++                                                */
++#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
++                                               /* Manual MDI configuration */
++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
++#define M88E1000_PSCR_AUTO_X_1000T     0x0040  /* 1000BASE-T: Auto crossover,
++                                                *  100BASE-TX/10BASE-T:
++                                                *  MDI Mode
++                                                */
++#define M88E1000_PSCR_AUTO_X_MODE      0x0060  /* Auto crossover enabled
++                                                * all speeds.
++                                                */
++#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080
++                                        /* 1=Enable Extended 10BASE-T distance
++                                         * (Lower 10BASE-T RX Threshold)
++                                         * 0=Normal 10BASE-T RX Threshold */
++#define M88E1000_PSCR_MII_5BIT_ENABLE      0x0100
++                                        /* 1=5-Bit interface in 100BASE-TX
++                                         * 0=MII interface in 100BASE-TX */
++#define M88E1000_PSCR_SCRAMBLER_DISABLE    0x0200 /* 1=Scrambler disable */
++#define M88E1000_PSCR_FORCE_LINK_GOOD      0x0400 /* 1=Force link good */
++#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800 /* 1=Assert CRS on Transmit */
++
++#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT    1
++#define M88E1000_PSCR_AUTO_X_MODE_SHIFT          5
++#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7
++
++/* M88E1000 PHY Specific Status Register */
++#define M88E1000_PSSR_JABBER             0x0001 /* 1=Jabber */
++#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
++#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
++#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
++#define M88E1000_PSSR_CABLE_LENGTH       0x0380 /* 0=<50M;1=50-80M;2=80-110M;
++                                            * 3=110-140M;4=>140M */
++#define M88E1000_PSSR_LINK               0x0400 /* 1=Link up, 0=Link down */
++#define M88E1000_PSSR_SPD_DPLX_RESOLVED  0x0800 /* 1=Speed & Duplex resolved */
++#define M88E1000_PSSR_PAGE_RCVD          0x1000 /* 1=Page received */
++#define M88E1000_PSSR_DPLX               0x2000 /* 1=Duplex 0=Half Duplex */
++#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
++#define M88E1000_PSSR_10MBS              0x0000 /* 00=10Mbs */
++#define M88E1000_PSSR_100MBS             0x4000 /* 01=100Mbs */
++#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
++
++#define M88E1000_PSSR_REV_POLARITY_SHIFT 1
++#define M88E1000_PSSR_DOWNSHIFT_SHIFT    5
++#define M88E1000_PSSR_MDIX_SHIFT         6
++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
++
++/* M88E1000 Extended PHY Specific Control Register */
++#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000 /* 1=Fiber loopback */
++#define M88E1000_EPSCR_DOWN_NO_IDLE   0x8000 /* 1=Lost lock detect enabled.
++                                              * Will assert lost lock and bring
++                                              * link down if idle not seen
++                                              * within 1ms in 1000BASE-T
++                                              */
++/* Number of times we will attempt to autonegotiate before downshifting if we
++ * are the master */
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X   0x0400
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X   0x0800
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X   0x0C00
++/* Number of times we will attempt to autonegotiate before downshifting if we
++ * are the slave */
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS   0x0000
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X    0x0200
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X    0x0300
++#define M88E1000_EPSCR_TX_CLK_2_5     0x0060 /* 2.5 MHz TX_CLK */
++#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
++#define M88E1000_EPSCR_TX_CLK_0       0x0000 /* NO  TX_CLK */
++
++/* M88EC018 Rev 2 specific DownShift settings */
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X    0x0000
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_2X    0x0200
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_3X    0x0400
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_4X    0x0600
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_6X    0x0A00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_7X    0x0C00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_8X    0x0E00
++
++/* IGP01E1000 Specific Port Config Register - R/W */
++#define IGP01E1000_PSCFR_AUTO_MDIX_PAR_DETECT  0x0010
++#define IGP01E1000_PSCFR_PRE_EN                0x0020
++#define IGP01E1000_PSCFR_SMART_SPEED           0x0080
++#define IGP01E1000_PSCFR_DISABLE_TPLOOPBACK    0x0100
++#define IGP01E1000_PSCFR_DISABLE_JABBER        0x0400
++#define IGP01E1000_PSCFR_DISABLE_TRANSMIT      0x2000
++
++/* IGP01E1000 Specific Port Status Register - R/O */
++#define IGP01E1000_PSSR_AUTONEG_FAILED         0x0001 /* RO LH SC */
++#define IGP01E1000_PSSR_POLARITY_REVERSED      0x0002
++#define IGP01E1000_PSSR_CABLE_LENGTH           0x007C
++#define IGP01E1000_PSSR_FULL_DUPLEX            0x0200
++#define IGP01E1000_PSSR_LINK_UP                0x0400
++#define IGP01E1000_PSSR_MDIX                   0x0800
++#define IGP01E1000_PSSR_SPEED_MASK             0xC000 /* speed bits mask */
++#define IGP01E1000_PSSR_SPEED_10MBPS           0x4000
++#define IGP01E1000_PSSR_SPEED_100MBPS          0x8000
++#define IGP01E1000_PSSR_SPEED_1000MBPS         0xC000
++#define IGP01E1000_PSSR_CABLE_LENGTH_SHIFT     0x0002 /* shift right 2 */
++#define IGP01E1000_PSSR_MDIX_SHIFT             0x000B /* shift right 11 */
++
++/* IGP01E1000 Specific Port Control Register - R/W */
++#define IGP01E1000_PSCR_TP_LOOPBACK            0x0010
++#define IGP01E1000_PSCR_CORRECT_NC_SCMBLR      0x0200
++#define IGP01E1000_PSCR_TEN_CRS_SELECT         0x0400
++#define IGP01E1000_PSCR_FLIP_CHIP              0x0800
++#define IGP01E1000_PSCR_AUTO_MDIX              0x1000
++#define IGP01E1000_PSCR_FORCE_MDI_MDIX         0x2000 /* 0-MDI, 1-MDIX */
++
++/* IGP01E1000 Specific Port Link Health Register */
++#define IGP01E1000_PLHR_SS_DOWNGRADE           0x8000
++#define IGP01E1000_PLHR_GIG_SCRAMBLER_ERROR    0x4000
++#define IGP01E1000_PLHR_MASTER_FAULT           0x2000
++#define IGP01E1000_PLHR_MASTER_RESOLUTION      0x1000
++#define IGP01E1000_PLHR_GIG_REM_RCVR_NOK       0x0800 /* LH */
++#define IGP01E1000_PLHR_IDLE_ERROR_CNT_OFLOW   0x0400 /* LH */
++#define IGP01E1000_PLHR_DATA_ERR_1             0x0200 /* LH */
++#define IGP01E1000_PLHR_DATA_ERR_0             0x0100
++#define IGP01E1000_PLHR_AUTONEG_FAULT          0x0040
++#define IGP01E1000_PLHR_AUTONEG_ACTIVE         0x0010
++#define IGP01E1000_PLHR_VALID_CHANNEL_D        0x0008
++#define IGP01E1000_PLHR_VALID_CHANNEL_C        0x0004
++#define IGP01E1000_PLHR_VALID_CHANNEL_B        0x0002
++#define IGP01E1000_PLHR_VALID_CHANNEL_A        0x0001
++
++/* IGP01E1000 Channel Quality Register */
++#define IGP01E1000_MSE_CHANNEL_D        0x000F
++#define IGP01E1000_MSE_CHANNEL_C        0x00F0
++#define IGP01E1000_MSE_CHANNEL_B        0x0F00
++#define IGP01E1000_MSE_CHANNEL_A        0xF000
++
++#define IGP02E1000_PM_SPD                         0x0001  /* Smart Power Down */
++#define IGP02E1000_PM_D3_LPLU                     0x0004  /* Enable LPLU in non-D0a modes */
++#define IGP02E1000_PM_D0_LPLU                     0x0002  /* Enable LPLU in D0a mode */
++
++/* IGP01E1000 DSP reset macros */
++#define DSP_RESET_ENABLE     0x0
++#define DSP_RESET_DISABLE    0x2
++#define E1000_MAX_DSP_RESETS 10
++
++/* IGP01E1000 & IGP02E1000 AGC Registers */
++
++#define IGP01E1000_AGC_LENGTH_SHIFT 7         /* Coarse - 13:11, Fine - 10:7 */
++#define IGP02E1000_AGC_LENGTH_SHIFT 9         /* Coarse - 15:13, Fine - 12:9 */
++
++/* IGP02E1000 AGC Register Length 9-bit mask */
++#define IGP02E1000_AGC_LENGTH_MASK  0x7F
++
++/* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */
++#define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128
++#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 113
++
++/* The precision error of the cable length is +/- 10 meters */
++#define IGP01E1000_AGC_RANGE    10
++#define IGP02E1000_AGC_RANGE    15
++
++/* IGP01E1000 PCS Initialization register */
++/* bits 3:6 in the PCS registers stores the channels polarity */
++#define IGP01E1000_PHY_POLARITY_MASK    0x0078
++
++/* IGP01E1000 GMII FIFO Register */
++#define IGP01E1000_GMII_FLEX_SPD               0x10 /* Enable flexible speed
++                                                     * on Link-Up */
++#define IGP01E1000_GMII_SPD                    0x20 /* Enable SPD */
++
++/* IGP01E1000 Analog Register */
++#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS       0x20D1
++#define IGP01E1000_ANALOG_FUSE_STATUS             0x20D0
++#define IGP01E1000_ANALOG_FUSE_CONTROL            0x20DC
++#define IGP01E1000_ANALOG_FUSE_BYPASS             0x20DE
++
++#define IGP01E1000_ANALOG_FUSE_POLY_MASK            0xF000
++#define IGP01E1000_ANALOG_FUSE_FINE_MASK            0x0F80
++#define IGP01E1000_ANALOG_FUSE_COARSE_MASK          0x0070
++#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED        0x0100
++#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL    0x0002
++
++#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH        0x0040
++#define IGP01E1000_ANALOG_FUSE_COARSE_10            0x0010
++#define IGP01E1000_ANALOG_FUSE_FINE_1               0x0080
++#define IGP01E1000_ANALOG_FUSE_FINE_10              0x0500
++
++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */
++#define GG82563_PSCR_DISABLE_JABBER             0x0001 /* 1=Disable Jabber */
++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE  0x0002 /* 1=Polarity Reversal Disabled */
++#define GG82563_PSCR_POWER_DOWN                 0x0004 /* 1=Power Down */
++#define GG82563_PSCR_COPPER_TRANSMITER_DISABLE  0x0008 /* 1=Transmitter Disabled */
++#define GG82563_PSCR_CROSSOVER_MODE_MASK        0x0060
++#define GG82563_PSCR_CROSSOVER_MODE_MDI         0x0000 /* 00=Manual MDI configuration */
++#define GG82563_PSCR_CROSSOVER_MODE_MDIX        0x0020 /* 01=Manual MDIX configuration */
++#define GG82563_PSCR_CROSSOVER_MODE_AUTO        0x0060 /* 11=Automatic crossover */
++#define GG82563_PSCR_ENALBE_EXTENDED_DISTANCE   0x0080 /* 1=Enable Extended Distance */
++#define GG82563_PSCR_ENERGY_DETECT_MASK         0x0300
++#define GG82563_PSCR_ENERGY_DETECT_OFF          0x0000 /* 00,01=Off */
++#define GG82563_PSCR_ENERGY_DETECT_RX           0x0200 /* 10=Sense on Rx only (Energy Detect) */
++#define GG82563_PSCR_ENERGY_DETECT_RX_TM        0x0300 /* 11=Sense and Tx NLP */
++#define GG82563_PSCR_FORCE_LINK_GOOD            0x0400 /* 1=Force Link Good */
++#define GG82563_PSCR_DOWNSHIFT_ENABLE           0x0800 /* 1=Enable Downshift */
++#define GG82563_PSCR_DOWNSHIFT_COUNTER_MASK     0x7000
++#define GG82563_PSCR_DOWNSHIFT_COUNTER_SHIFT    12
++
++/* PHY Specific Status Register (Page 0, Register 17) */
++#define GG82563_PSSR_JABBER                0x0001 /* 1=Jabber */
++#define GG82563_PSSR_POLARITY              0x0002 /* 1=Polarity Reversed */
++#define GG82563_PSSR_LINK                  0x0008 /* 1=Link is Up */
++#define GG82563_PSSR_ENERGY_DETECT         0x0010 /* 1=Sleep, 0=Active */
++#define GG82563_PSSR_DOWNSHIFT             0x0020 /* 1=Downshift */
++#define GG82563_PSSR_CROSSOVER_STATUS      0x0040 /* 1=MDIX, 0=MDI */
++#define GG82563_PSSR_RX_PAUSE_ENABLED      0x0100 /* 1=Receive Pause Enabled */
++#define GG82563_PSSR_TX_PAUSE_ENABLED      0x0200 /* 1=Transmit Pause Enabled */
++#define GG82563_PSSR_LINK_UP               0x0400 /* 1=Link Up */
++#define GG82563_PSSR_SPEED_DUPLEX_RESOLVED 0x0800 /* 1=Resolved */
++#define GG82563_PSSR_PAGE_RECEIVED         0x1000 /* 1=Page Received */
++#define GG82563_PSSR_DUPLEX                0x2000 /* 1-Full-Duplex */
++#define GG82563_PSSR_SPEED_MASK            0xC000
++#define GG82563_PSSR_SPEED_10MBPS          0x0000 /* 00=10Mbps */
++#define GG82563_PSSR_SPEED_100MBPS         0x4000 /* 01=100Mbps */
++#define GG82563_PSSR_SPEED_1000MBPS        0x8000 /* 10=1000Mbps */
++
++/* PHY Specific Status Register 2 (Page 0, Register 19) */
++#define GG82563_PSSR2_JABBER                0x0001 /* 1=Jabber */
++#define GG82563_PSSR2_POLARITY_CHANGED      0x0002 /* 1=Polarity Changed */
++#define GG82563_PSSR2_ENERGY_DETECT_CHANGED 0x0010 /* 1=Energy Detect Changed */
++#define GG82563_PSSR2_DOWNSHIFT_INTERRUPT   0x0020 /* 1=Downshift Detected */
++#define GG82563_PSSR2_MDI_CROSSOVER_CHANGE  0x0040 /* 1=Crossover Changed */
++#define GG82563_PSSR2_FALSE_CARRIER         0x0100 /* 1=False Carrier */
++#define GG82563_PSSR2_SYMBOL_ERROR          0x0200 /* 1=Symbol Error */
++#define GG82563_PSSR2_LINK_STATUS_CHANGED   0x0400 /* 1=Link Status Changed */
++#define GG82563_PSSR2_AUTO_NEG_COMPLETED    0x0800 /* 1=Auto-Neg Completed */
++#define GG82563_PSSR2_PAGE_RECEIVED         0x1000 /* 1=Page Received */
++#define GG82563_PSSR2_DUPLEX_CHANGED        0x2000 /* 1=Duplex Changed */
++#define GG82563_PSSR2_SPEED_CHANGED         0x4000 /* 1=Speed Changed */
++#define GG82563_PSSR2_AUTO_NEG_ERROR        0x8000 /* 1=Auto-Neg Error */
++
++/* PHY Specific Control Register 2 (Page 0, Register 26) */
++#define GG82563_PSCR2_10BT_POLARITY_FORCE           0x0002 /* 1=Force Negative Polarity */
++#define GG82563_PSCR2_1000MB_TEST_SELECT_MASK       0x000C
++#define GG82563_PSCR2_1000MB_TEST_SELECT_NORMAL     0x0000 /* 00,01=Normal Operation */
++#define GG82563_PSCR2_1000MB_TEST_SELECT_112NS      0x0008 /* 10=Select 112ns Sequence */
++#define GG82563_PSCR2_1000MB_TEST_SELECT_16NS       0x000C /* 11=Select 16ns Sequence */
++#define GG82563_PSCR2_REVERSE_AUTO_NEG              0x2000 /* 1=Reverse Auto-Negotiation */
++#define GG82563_PSCR2_1000BT_DISABLE                0x4000 /* 1=Disable 1000BASE-T */
++#define GG82563_PSCR2_TRANSMITER_TYPE_MASK          0x8000
++#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_B      0x0000 /* 0=Class B */
++#define GG82563_PSCR2_TRANSMITTER_TYPE_CLASS_A      0x8000 /* 1=Class A */
++
++/* MAC Specific Control Register (Page 2, Register 21) */
++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */
++#define GG82563_MSCR_TX_CLK_MASK                    0x0007
++#define GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ           0x0004
++#define GG82563_MSCR_TX_CLK_100MBPS_25MHZ           0x0005
++#define GG82563_MSCR_TX_CLK_1000MBPS_2_5MHZ         0x0006
++#define GG82563_MSCR_TX_CLK_1000MBPS_25MHZ          0x0007
++
++#define GG82563_MSCR_ASSERT_CRS_ON_TX               0x0010 /* 1=Assert */
++
++/* DSP Distance Register (Page 5, Register 26) */
++#define GG82563_DSPD_CABLE_LENGTH               0x0007 /* 0 = <50M;
++                                                          1 = 50-80M;
++                                                          2 = 80-110M;
++                                                          3 = 110-140M;
++                                                          4 = >140M */
++
++/* Kumeran Mode Control Register (Page 193, Register 16) */
++#define GG82563_KMCR_PHY_LEDS_EN                    0x0020 /* 1=PHY LEDs, 0=Kumeran Inband LEDs */
++#define GG82563_KMCR_FORCE_LINK_UP                  0x0040 /* 1=Force Link Up */
++#define GG82563_KMCR_SUPPRESS_SGMII_EPD_EXT         0x0080
++#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT_MASK     0x0400
++#define GG82563_KMCR_MDIO_BUS_SPEED_SELECT          0x0400 /* 1=6.25MHz, 0=0.8MHz */
++#define GG82563_KMCR_PASS_FALSE_CARRIER             0x0800
++
++/* Power Management Control Register (Page 193, Register 20) */
++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE         0x0001 /* 1=Enalbe SERDES Electrical Idle */
++#define GG82563_PMCR_DISABLE_PORT                   0x0002 /* 1=Disable Port */
++#define GG82563_PMCR_DISABLE_SERDES                 0x0004 /* 1=Disable SERDES */
++#define GG82563_PMCR_REVERSE_AUTO_NEG               0x0008 /* 1=Enable Reverse Auto-Negotiation */
++#define GG82563_PMCR_DISABLE_1000_NON_D0            0x0010 /* 1=Disable 1000Mbps Auto-Neg in non D0 */
++#define GG82563_PMCR_DISABLE_1000                   0x0020 /* 1=Disable 1000Mbps Auto-Neg Always */
++#define GG82563_PMCR_REVERSE_AUTO_NEG_D0A           0x0040 /* 1=Enable D0a Reverse Auto-Negotiation */
++#define GG82563_PMCR_FORCE_POWER_STATE              0x0080 /* 1=Force Power State */
++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_MASK    0x0300
++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_DR      0x0000 /* 00=Dr */
++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0U     0x0100 /* 01=D0u */
++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D0A     0x0200 /* 10=D0a */
++#define GG82563_PMCR_PROGRAMMED_POWER_STATE_D3      0x0300 /* 11=D3 */
++
++/* In-Band Control Register (Page 194, Register 18) */
++#define GG82563_ICR_DIS_PADDING                     0x0010 /* Disable Padding Use */
++
++
++/* Bit definitions for valid PHY IDs. */
++/* I = Integrated
++ * E = External
++ */
++#define M88E1000_E_PHY_ID  0x01410C50
++#define M88E1000_I_PHY_ID  0x01410C30
++#define M88E1011_I_PHY_ID  0x01410C20
++#define IGP01E1000_I_PHY_ID  0x02A80380
++#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID
++#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
++#define M88E1011_I_REV_4   0x04
++#define M88E1111_I_PHY_ID  0x01410CC0
++#define L1LXT971A_PHY_ID   0x001378E0
++#define GG82563_E_PHY_ID   0x01410CA0
++
++
++/* Bits...
++ * 15-5: page
++ * 4-0: register offset
++ */
++#define PHY_PAGE_SHIFT        5
++#define PHY_REG(page, reg)    \
++        (((page) << PHY_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
++
++#define IGP3_PHY_PORT_CTRL           \
++        PHY_REG(769, 17) /* Port General Configuration */
++#define IGP3_PHY_RATE_ADAPT_CTRL \
++        PHY_REG(769, 25) /* Rate Adapter Control Register */
++
++#define IGP3_KMRN_FIFO_CTRL_STATS \
++        PHY_REG(770, 16) /* KMRN FIFO's control/status register */
++#define IGP3_KMRN_POWER_MNG_CTRL \
++        PHY_REG(770, 17) /* KMRN Power Management Control Register */
++#define IGP3_KMRN_INBAND_CTRL \
++        PHY_REG(770, 18) /* KMRN Inband Control Register */
++#define IGP3_KMRN_DIAG \
++        PHY_REG(770, 19) /* KMRN Diagnostic register */
++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002 /* RX PCS is not synced */
++#define IGP3_KMRN_ACK_TIMEOUT \
++        PHY_REG(770, 20) /* KMRN Acknowledge Timeouts register */
++
++#define IGP3_VR_CTRL \
++        PHY_REG(776, 18) /* Voltage regulator control register */
++#define IGP3_VR_CTRL_MODE_SHUT       0x0200 /* Enter powerdown, shutdown VRs */
++
++#define IGP3_CAPABILITY \
++        PHY_REG(776, 19) /* IGP3 Capability Register */
++
++/* Capabilities for SKU Control  */
++#define IGP3_CAP_INITIATE_TEAM       0x0001 /* Able to initiate a team */
++#define IGP3_CAP_WFM                 0x0002 /* Support WoL and PXE */
++#define IGP3_CAP_ASF                 0x0004 /* Support ASF */
++#define IGP3_CAP_LPLU                0x0008 /* Support Low Power Link Up */
++#define IGP3_CAP_DC_AUTO_SPEED       0x0010 /* Support AC/DC Auto Link Speed */
++#define IGP3_CAP_SPD                 0x0020 /* Support Smart Power Down */
++#define IGP3_CAP_MULT_QUEUE          0x0040 /* Support 2 tx & 2 rx queues */
++#define IGP3_CAP_RSS                 0x0080 /* Support RSS */
++#define IGP3_CAP_8021PQ              0x0100 /* Support 802.1Q & 802.1p */
++#define IGP3_CAP_AMT_CB              0x0200 /* Support active manageability and circuit breaker */
++
++#define IGP3_PPC_JORDAN_EN           0x0001
++#define IGP3_PPC_JORDAN_GIGA_SPEED   0x0002
++
++#define IGP3_KMRN_PMC_EE_IDLE_LINK_DIS         0x0001
++#define IGP3_KMRN_PMC_K0S_ENTRY_LATENCY_MASK   0x001E
++#define IGP3_KMRN_PMC_K0S_MODE1_EN_GIGA        0x0020
++#define IGP3_KMRN_PMC_K0S_MODE1_EN_100         0x0040
++
++#define IGP3E1000_PHY_MISC_CTRL                0x1B   /* Misc. Ctrl register */
++#define IGP3_PHY_MISC_DUPLEX_MANUAL_SET        0x1000 /* Duplex Manual Set */
++
++#define IGP3_KMRN_EXT_CTRL  PHY_REG(770, 18)
++#define IGP3_KMRN_EC_DIS_INBAND    0x0080
++
++#define IGP03E1000_E_PHY_ID  0x02A80390
++#define IFE_E_PHY_ID         0x02A80330 /* 10/100 PHY */
++#define IFE_PLUS_E_PHY_ID    0x02A80320
++#define IFE_C_E_PHY_ID       0x02A80310
++
++#define IFE_PHY_EXTENDED_STATUS_CONTROL   0x10  /* 100BaseTx Extended Status, Control and Address */
++#define IFE_PHY_SPECIAL_CONTROL           0x11  /* 100BaseTx PHY special control register */
++#define IFE_PHY_RCV_FALSE_CARRIER         0x13  /* 100BaseTx Receive False Carrier Counter */
++#define IFE_PHY_RCV_DISCONNECT            0x14  /* 100BaseTx Receive Disconnet Counter */
++#define IFE_PHY_RCV_ERROT_FRAME           0x15  /* 100BaseTx Receive Error Frame Counter */
++#define IFE_PHY_RCV_SYMBOL_ERR            0x16  /* Receive Symbol Error Counter */
++#define IFE_PHY_PREM_EOF_ERR              0x17  /* 100BaseTx Receive Premature End Of Frame Error Counter */
++#define IFE_PHY_RCV_EOF_ERR               0x18  /* 10BaseT Receive End Of Frame Error Counter */
++#define IFE_PHY_TX_JABBER_DETECT          0x19  /* 10BaseT Transmit Jabber Detect Counter */
++#define IFE_PHY_EQUALIZER                 0x1A  /* PHY Equalizer Control and Status */
++#define IFE_PHY_SPECIAL_CONTROL_LED       0x1B  /* PHY special control and LED configuration */
++#define IFE_PHY_MDIX_CONTROL              0x1C  /* MDI/MDI-X Control register */
++#define IFE_PHY_HWI_CONTROL               0x1D  /* Hardware Integrity Control (HWI) */
++
++#define IFE_PESC_REDUCED_POWER_DOWN_DISABLE  0x2000  /* Defaut 1 = Disable auto reduced power down */
++#define IFE_PESC_100BTX_POWER_DOWN           0x0400  /* Indicates the power state of 100BASE-TX */
++#define IFE_PESC_10BTX_POWER_DOWN            0x0200  /* Indicates the power state of 10BASE-T */
++#define IFE_PESC_POLARITY_REVERSED           0x0100  /* Indicates 10BASE-T polarity */
++#define IFE_PESC_PHY_ADDR_MASK               0x007C  /* Bit 6:2 for sampled PHY address */
++#define IFE_PESC_SPEED                       0x0002  /* Auto-negotiation speed result 1=100Mbs, 0=10Mbs */
++#define IFE_PESC_DUPLEX                      0x0001  /* Auto-negotiation duplex result 1=Full, 0=Half */
++#define IFE_PESC_POLARITY_REVERSED_SHIFT     8
++
++#define IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN   0x0100  /* 1 = Dyanmic Power Down disabled */
++#define IFE_PSC_FORCE_POLARITY               0x0020  /* 1=Reversed Polarity, 0=Normal */
++#define IFE_PSC_AUTO_POLARITY_DISABLE        0x0010  /* 1=Auto Polarity Disabled, 0=Enabled */
++#define IFE_PSC_JABBER_FUNC_DISABLE          0x0001  /* 1=Jabber Disabled, 0=Normal Jabber Operation */
++#define IFE_PSC_FORCE_POLARITY_SHIFT         5
++#define IFE_PSC_AUTO_POLARITY_DISABLE_SHIFT  4
++
++#define IFE_PMC_AUTO_MDIX                    0x0080  /* 1=enable MDI/MDI-X feature, default 0=disabled */
++#define IFE_PMC_FORCE_MDIX                   0x0040  /* 1=force MDIX-X, 0=force MDI */
++#define IFE_PMC_MDIX_STATUS                  0x0020  /* 1=MDI-X, 0=MDI */
++#define IFE_PMC_AUTO_MDIX_COMPLETE           0x0010  /* Resolution algorthm is completed */
++#define IFE_PMC_MDIX_MODE_SHIFT              6
++#define IFE_PHC_MDIX_RESET_ALL_MASK          0x0000  /* Disable auto MDI-X */
++
++#define IFE_PHC_HWI_ENABLE                   0x8000  /* Enable the HWI feature */
++#define IFE_PHC_ABILITY_CHECK                0x4000  /* 1= Test Passed, 0=failed */
++#define IFE_PHC_TEST_EXEC                    0x2000  /* PHY launch test pulses on the wire */
++#define IFE_PHC_HIGHZ                        0x0200  /* 1 = Open Circuit */
++#define IFE_PHC_LOWZ                         0x0400  /* 1 = Short Circuit */
++#define IFE_PHC_LOW_HIGH_Z_MASK              0x0600  /* Mask for indication type of problem on the line */
++#define IFE_PHC_DISTANCE_MASK                0x01FF  /* Mask for distance to the cable problem, in 80cm granularity */
++#define IFE_PHC_RESET_ALL_MASK               0x0000  /* Disable HWI */
++#define IFE_PSCL_PROBE_MODE                  0x0020  /* LED Probe mode */
++#define IFE_PSCL_PROBE_LEDS_OFF              0x0006  /* Force LEDs 0 and 2 off */
++#define IFE_PSCL_PROBE_LEDS_ON               0x0007  /* Force LEDs 0 and 2 on */
++
++#define ICH8_FLASH_COMMAND_TIMEOUT           500   /* 500 ms , should be adjusted */
++#define ICH8_FLASH_CYCLE_REPEAT_COUNT        10    /* 10 cycles , should be adjusted */
++#define ICH8_FLASH_SEG_SIZE_256              256
++#define ICH8_FLASH_SEG_SIZE_4K               4096
++#define ICH8_FLASH_SEG_SIZE_64K              65536
++
++#define ICH8_CYCLE_READ                      0x0
++#define ICH8_CYCLE_RESERVED                  0x1
++#define ICH8_CYCLE_WRITE                     0x2
++#define ICH8_CYCLE_ERASE                     0x3
++
++#define ICH8_FLASH_GFPREG   0x0000
++#define ICH8_FLASH_HSFSTS   0x0004
++#define ICH8_FLASH_HSFCTL   0x0006
++#define ICH8_FLASH_FADDR    0x0008
++#define ICH8_FLASH_FDATA0   0x0010
++#define ICH8_FLASH_FRACC    0x0050
++#define ICH8_FLASH_FREG0    0x0054
++#define ICH8_FLASH_FREG1    0x0058
++#define ICH8_FLASH_FREG2    0x005C
++#define ICH8_FLASH_FREG3    0x0060
++#define ICH8_FLASH_FPR0     0x0074
++#define ICH8_FLASH_FPR1     0x0078
++#define ICH8_FLASH_SSFSTS   0x0090
++#define ICH8_FLASH_SSFCTL   0x0092
++#define ICH8_FLASH_PREOP    0x0094
++#define ICH8_FLASH_OPTYPE   0x0096
++#define ICH8_FLASH_OPMENU   0x0098
++
++#define ICH8_FLASH_REG_MAPSIZE      0x00A0
++#define ICH8_FLASH_SECTOR_SIZE      4096
++#define ICH8_GFPREG_BASE_MASK       0x1FFF
++#define ICH8_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF
++
++/* ICH8 GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
++/* Offset 04h HSFSTS */
++union ich8_hws_flash_status {
++    struct ich8_hsfsts {
++#ifdef E1000_BIG_ENDIAN
++        uint16_t reserved2      :6;
++        uint16_t fldesvalid     :1;
++        uint16_t flockdn        :1;
++        uint16_t flcdone        :1;
++        uint16_t flcerr         :1;
++        uint16_t dael           :1;
++        uint16_t berasesz       :2;
++        uint16_t flcinprog      :1;
++        uint16_t reserved1      :2;
++#else
++        uint16_t flcdone        :1;   /* bit 0 Flash Cycle Done */
++        uint16_t flcerr         :1;   /* bit 1 Flash Cycle Error */
++        uint16_t dael           :1;   /* bit 2 Direct Access error Log */
++        uint16_t berasesz       :2;   /* bit 4:3 Block/Sector Erase Size */
++        uint16_t flcinprog      :1;   /* bit 5 flash SPI cycle in Progress */
++        uint16_t reserved1      :2;   /* bit 13:6 Reserved */
++        uint16_t reserved2      :6;   /* bit 13:6 Reserved */
++        uint16_t fldesvalid     :1;   /* bit 14 Flash Descriptor Valid */
++        uint16_t flockdn        :1;   /* bit 15 Flash Configuration Lock-Down */
++#endif
++    } hsf_status;
++    uint16_t regval;
++};
++
++/* ICH8 GbE Flash Hardware Sequencing Flash control Register bit breakdown */
++/* Offset 06h FLCTL */
++union ich8_hws_flash_ctrl {
++    struct ich8_hsflctl {
++#ifdef E1000_BIG_ENDIAN
++        uint16_t fldbcount      :2;
++        uint16_t flockdn        :6;
++        uint16_t flcgo          :1;
++        uint16_t flcycle        :2;
++        uint16_t reserved       :5;
++#else
++        uint16_t flcgo          :1;   /* 0 Flash Cycle Go */
++        uint16_t flcycle        :2;   /* 2:1 Flash Cycle */
++        uint16_t reserved       :5;   /* 7:3 Reserved  */
++        uint16_t fldbcount      :2;   /* 9:8 Flash Data Byte Count */
++        uint16_t flockdn        :6;   /* 15:10 Reserved */
++#endif
++    } hsf_ctrl;
++    uint16_t regval;
++};
++
++/* ICH8 Flash Region Access Permissions */
++union ich8_hws_flash_regacc {
++    struct ich8_flracc {
++#ifdef E1000_BIG_ENDIAN
++        uint32_t gmwag          :8;
++        uint32_t gmrag          :8;
++        uint32_t grwa           :8;
++        uint32_t grra           :8;
++#else
++        uint32_t grra           :8;   /* 0:7 GbE region Read Access */
++        uint32_t grwa           :8;   /* 8:15 GbE region Write Access */
++        uint32_t gmrag          :8;   /* 23:16 GbE Master Read Access Grant  */
++        uint32_t gmwag          :8;   /* 31:24 GbE Master Write Access Grant */
++#endif
++    } hsf_flregacc;
++    uint16_t regval;
++};
++
++/* Miscellaneous PHY bit definitions. */
++#define PHY_PREAMBLE        0xFFFFFFFF
++#define PHY_SOF             0x01
++#define PHY_OP_READ         0x02
++#define PHY_OP_WRITE        0x01
++#define PHY_TURNAROUND      0x02
++#define PHY_PREAMBLE_SIZE   32
++#define MII_CR_SPEED_1000   0x0040
++#define MII_CR_SPEED_100    0x2000
++#define MII_CR_SPEED_10     0x0000
++#define E1000_PHY_ADDRESS   0x01
++#define PHY_AUTO_NEG_TIME   45  /* 4.5 Seconds */
++#define PHY_FORCE_TIME      20  /* 2.0 Seconds */
++#define PHY_REVISION_MASK   0xFFFFFFF0
++#define DEVICE_SPEED_MASK   0x00000300  /* Device Ctrl Reg Speed Mask */
++#define REG4_SPEED_MASK     0x01E0
++#define REG9_SPEED_MASK     0x0300
++#define ADVERTISE_10_HALF   0x0001
++#define ADVERTISE_10_FULL   0x0002
++#define ADVERTISE_100_HALF  0x0004
++#define ADVERTISE_100_FULL  0x0008
++#define ADVERTISE_1000_HALF 0x0010
++#define ADVERTISE_1000_FULL 0x0020
++#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F  /* Everything but 1000-Half */
++#define AUTONEG_ADVERTISE_10_100_ALL    0x000F /* All 10/100 speeds*/
++#define AUTONEG_ADVERTISE_10_ALL        0x0003 /* 10Mbps Full & Half speeds*/
++
++#endif /* _E1000_HW_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/Makefile	2021-04-07 16:01:27.428633879 +0800
+@@ -0,0 +1,8 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000) += rt_e1000.o
++
++rt_e1000-y := \
++	e1000_hw.o \
++	e1000_main.o \
++	e1000_param.o
+--- linux/drivers/xenomai/net/drivers/e1000/e1000_osdep.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_osdep.h	2021-04-07 16:01:27.423633886 +0800
+@@ -0,0 +1,148 @@
++/*******************************************************************************
++
++
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms of the GNU General Public License as published by the Free
++  Software Foundation; either version 2 of the License, or (at your option)
++  any later version.
++
++  This program is distributed in the hope that it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++
++/* glue for the OS independent part of e1000
++ * includes register access macros
++ */
++
++#ifndef _E1000_OSDEP_H_
++#define _E1000_OSDEP_H_
++
++#include <linux/types.h>
++#include <linux/pci.h>
++#include <linux/delay.h>
++#include <asm/io.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include "kcompat.h"
++
++#define usec_delay(x) udelay(x)
++#ifndef msec_delay
++#define msec_delay(x)	do { if(in_interrupt()) { \
++				/* Don't mdelay in interrupt context! */ \
++				BUG(); \
++			} else { \
++				msleep(x); \
++			} } while (0)
++
++/* Some workarounds require millisecond delays and are run during interrupt
++ * context.  Most notably, when establishing link, the phy may need tweaking
++ * but cannot process phy register reads/writes faster than millisecond
++ * intervals...and we establish link due to a "link status change" interrupt.
++ */
++#define msec_delay_irq(x) mdelay(x)
++#endif
++
++#define PCI_COMMAND_REGISTER   PCI_COMMAND
++#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE
++
++typedef enum {
++#undef FALSE
++    FALSE = 0,
++#undef TRUE
++    TRUE = 1
++} boolean_t;
++
++#define MSGOUT(S, A, B)	printk(KERN_DEBUG S "\n", A, B)
++
++#ifdef DBG
++#define DEBUGOUT(S)		printk(KERN_DEBUG S "\n")
++#define DEBUGOUT1(S, A...)	printk(KERN_DEBUG S "\n", A)
++#else
++#define DEBUGOUT(S)
++#define DEBUGOUT1(S, A...)
++#endif
++
++#define DEBUGFUNC(F) DEBUGOUT(F)
++#define DEBUGOUT2 DEBUGOUT1
++#define DEBUGOUT3 DEBUGOUT2
++#define DEBUGOUT7 DEBUGOUT3
++
++#ifdef __BIG_ENDIAN
++#define E1000_BIG_ENDIAN __BIG_ENDIAN
++#endif
++
++#define E1000_WRITE_REG(a, reg, value) ( \
++    writel((value), ((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg))))
++
++#define E1000_READ_REG(a, reg) ( \
++    readl((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg)))
++
++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
++    writel((value), ((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	((offset) << 2))))
++
++#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
++    readl((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	((offset) << 2)))
++
++#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
++#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
++
++#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
++    writew((value), ((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	((offset) << 1))))
++
++#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
++    readw((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	((offset) << 1)))
++
++#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
++    writeb((value), ((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	(offset))))
++
++#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
++    readb((a)->hw_addr + \
++	(((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
++	(offset)))
++
++#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, STATUS)
++
++#define E1000_WRITE_ICH8_REG(a, reg, value) ( \
++    writel((value), ((a)->flash_address + reg)))
++
++#define E1000_READ_ICH8_REG(a, reg) ( \
++    readl((a)->flash_address + reg))
++
++#define E1000_WRITE_ICH8_REG16(a, reg, value) ( \
++    writew((value), ((a)->flash_address + reg)))
++
++#define E1000_READ_ICH8_REG16(a, reg) ( \
++    readw((a)->flash_address + reg))
++
++
++#endif /* _E1000_OSDEP_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000/kcompat.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/kcompat.h	2021-04-07 16:01:27.418633893 +0800
+@@ -0,0 +1,446 @@
++/*******************************************************************************
++
++
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms of the GNU General Public License as published by the Free
++  Software Foundation; either version 2 of the License, or (at your option)
++  any later version.
++
++  This program is distributed in the hope that it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _KCOMPAT_H_
++#define _KCOMPAT_H_
++
++#include <linux/version.h>
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <asm/io.h>
++
++#include <rtnet_port.h>
++
++#ifndef IRQ_HANDLED
++#define irqreturn_t void
++#define IRQ_HANDLED
++#define IRQ_NONE
++#endif
++
++#ifndef SET_NETDEV_DEV
++#define SET_NETDEV_DEV(net, pdev)
++#endif
++
++/* Useful settings for rtnet */
++#undef MAX_SKB_FRAGS
++#undef NETIF_F_TSO
++#undef E1000_COUNT_ICR
++#undef NETIF_F_HW_VLAN_TX
++#undef CONFIG_NET_POLL_CONTROLLER
++#undef ETHTOOL_OPS_COMPAT
++#undef ETHTOOL_GPERMADDR
++
++#ifndef HAVE_FREE_NETDEV
++#define free_netdev(x)	kfree(x)
++#endif
++
++#undef E1000_NAPI
++#undef CONFIG_E1000_NAPI
++
++#undef CONFIG_E1000_DISABLE_PACKET_SPLIT
++#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1
++
++
++#ifdef DISABLE_PCI_MSI
++#undef CONFIG_PCI_MSI
++#endif
++
++#ifdef DISABLE_PM
++#undef CONFIG_PM
++#endif
++#undef CONFIG_PM
++
++#ifndef module_param
++#define module_param(v,t,p) MODULE_PARM(v, "i");
++#endif
++
++#ifndef DMA_64BIT_MASK
++#define DMA_64BIT_MASK  0xffffffffffffffffULL
++#endif
++
++#ifndef DMA_32BIT_MASK
++#define DMA_32BIT_MASK  0x00000000ffffffffULL
++#endif
++
++/*****************************************************************************/
++#ifndef unlikely
++#define unlikely(_x) _x
++#define likely(_x) _x
++#endif
++/*****************************************************************************/
++
++#ifndef PCI_DEVICE
++#define PCI_DEVICE(vend,dev) \
++	.vendor = (vend), .device = (dev), \
++	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
++#endif
++
++/*****************************************************************************/
++/* Installations with ethtool version without eeprom, adapter id, or statistics
++ * support */
++#ifndef ETHTOOL_GSTATS
++#define ETHTOOL_GSTATS 0x1d
++#undef ethtool_drvinfo
++#define ethtool_drvinfo k_ethtool_drvinfo
++struct k_ethtool_drvinfo {
++	uint32_t cmd;
++	char	 driver[32];
++	char	 version[32];
++	char	 fw_version[32];
++	char	 bus_info[32];
++	char	 reserved1[32];
++	char	 reserved2[16];
++	uint32_t n_stats;
++	uint32_t testinfo_len;
++	uint32_t eedump_len;
++	uint32_t regdump_len;
++};
++
++struct ethtool_stats {
++	uint32_t cmd;
++	uint32_t n_stats;
++	uint64_t data[0];
++};
++
++#ifndef ETHTOOL_PHYS_ID
++#define ETHTOOL_PHYS_ID 0x1c
++#ifndef ETHTOOL_GSTRINGS
++#define ETHTOOL_GSTRINGS 0x1b
++enum ethtool_stringset {
++	ETH_SS_TEST             = 0,
++	ETH_SS_STATS,
++};
++struct ethtool_gstrings {
++	u32     cmd;            /* ETHTOOL_GSTRINGS */
++	u32     string_set;     /* string set id e.c. ETH_SS_TEST, etc*/
++	u32     len;            /* number of strings in the string set */
++	u8      data[0];
++};
++#ifndef ETHTOOL_TEST
++#define ETHTOOL_TEST 0x1a
++enum ethtool_test_flags {
++	ETH_TEST_FL_OFFLINE	= (1 << 0),
++	ETH_TEST_FL_FAILED	= (1 << 1),
++};
++struct ethtool_test {
++	uint32_t cmd;
++	uint32_t flags;
++	uint32_t reserved;
++	uint32_t len;
++	uint64_t data[0];
++};
++#ifndef ETHTOOL_GEEPROM
++#define ETHTOOL_GEEPROM 0xb
++#undef ETHTOOL_GREGS
++struct ethtool_eeprom {
++	uint32_t cmd;
++	uint32_t magic;
++	uint32_t offset;
++	uint32_t len;
++	uint8_t	 data[0];
++};
++
++struct ethtool_value {
++	uint32_t cmd;
++	uint32_t data;
++};
++
++#ifndef ETHTOOL_GLINK
++#define ETHTOOL_GLINK 0xa
++#endif /* Ethtool version without link support */
++#endif /* Ethtool version without eeprom support */
++#endif /* Ethtool version without test support */
++#endif /* Ethtool version without strings support */
++#endif /* Ethtool version wihtout adapter id support */
++#endif /* Ethtool version without statistics support */
++
++#ifndef ETHTOOL_GREGS
++#define ETHTOOL_GREGS		0x00000004 /* Get NIC registers */
++#define ethtool_regs _kc_ethtool_regs
++/* for passing big chunks of data */
++struct _kc_ethtool_regs {
++	u32	cmd;
++	u32	version; /* driver-specific, indicates different chips/revs */
++	u32	len; /* bytes */
++	u8	data[0];
++};
++#endif
++#ifndef ETHTOOL_GMSGLVL
++#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
++#endif
++#ifndef ETHTOOL_SMSGLVL
++#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level, priv. */
++#endif
++#ifndef ETHTOOL_NWAY_RST
++#define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation, priv */
++#endif
++#ifndef ETHTOOL_GLINK
++#define ETHTOOL_GLINK		0x0000000a /* Get link status */
++#endif
++#ifndef ETHTOOL_GEEPROM
++#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
++#endif
++#ifndef ETHTOOL_SEEPROM
++#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data */
++#endif
++#ifndef ETHTOOL_GCOALESCE
++#define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
++/* for configuring coalescing parameters of chip */
++#define ethtool_coalesce _kc_ethtool_coalesce
++struct _kc_ethtool_coalesce {
++	u32	cmd;	/* ETHTOOL_{G,S}COALESCE */
++
++	/* How many usecs to delay an RX interrupt after
++	 * a packet arrives.  If 0, only rx_max_coalesced_frames
++	 * is used.
++	 */
++	u32	rx_coalesce_usecs;
++
++	/* How many packets to delay an RX interrupt after
++	 * a packet arrives.  If 0, only rx_coalesce_usecs is
++	 * used.  It is illegal to set both usecs and max frames
++	 * to zero as this would cause RX interrupts to never be
++	 * generated.
++	 */
++	u32	rx_max_coalesced_frames;
++
++	/* Same as above two parameters, except that these values
++	 * apply while an IRQ is being serviced by the host.  Not
++	 * all cards support this feature and the values are ignored
++	 * in that case.
++	 */
++	u32	rx_coalesce_usecs_irq;
++	u32	rx_max_coalesced_frames_irq;
++
++	/* How many usecs to delay a TX interrupt after
++	 * a packet is sent.  If 0, only tx_max_coalesced_frames
++	 * is used.
++	 */
++	u32	tx_coalesce_usecs;
++
++	/* How many packets to delay a TX interrupt after
++	 * a packet is sent.  If 0, only tx_coalesce_usecs is
++	 * used.  It is illegal to set both usecs and max frames
++	 * to zero as this would cause TX interrupts to never be
++	 * generated.
++	 */
++	u32	tx_max_coalesced_frames;
++
++	/* Same as above two parameters, except that these values
++	 * apply while an IRQ is being serviced by the host.  Not
++	 * all cards support this feature and the values are ignored
++	 * in that case.
++	 */
++	u32	tx_coalesce_usecs_irq;
++	u32	tx_max_coalesced_frames_irq;
++
++	/* How many usecs to delay in-memory statistics
++	 * block updates.  Some drivers do not have an in-memory
++	 * statistic block, and in such cases this value is ignored.
++	 * This value must not be zero.
++	 */
++	u32	stats_block_coalesce_usecs;
++
++	/* Adaptive RX/TX coalescing is an algorithm implemented by
++	 * some drivers to improve latency under low packet rates and
++	 * improve throughput under high packet rates.  Some drivers
++	 * only implement one of RX or TX adaptive coalescing.  Anything
++	 * not implemented by the driver causes these values to be
++	 * silently ignored.
++	 */
++	u32	use_adaptive_rx_coalesce;
++	u32	use_adaptive_tx_coalesce;
++
++	/* When the packet rate (measured in packets per second)
++	 * is below pkt_rate_low, the {rx,tx}_*_low parameters are
++	 * used.
++	 */
++	u32	pkt_rate_low;
++	u32	rx_coalesce_usecs_low;
++	u32	rx_max_coalesced_frames_low;
++	u32	tx_coalesce_usecs_low;
++	u32	tx_max_coalesced_frames_low;
++
++	/* When the packet rate is below pkt_rate_high but above
++	 * pkt_rate_low (both measured in packets per second) the
++	 * normal {rx,tx}_* coalescing parameters are used.
++	 */
++
++	/* When the packet rate is (measured in packets per second)
++	 * is above pkt_rate_high, the {rx,tx}_*_high parameters are
++	 * used.
++	 */
++	u32	pkt_rate_high;
++	u32	rx_coalesce_usecs_high;
++	u32	rx_max_coalesced_frames_high;
++	u32	tx_coalesce_usecs_high;
++	u32	tx_max_coalesced_frames_high;
++
++	/* How often to do adaptive coalescing packet rate sampling,
++	 * measured in seconds.  Must not be zero.
++	 */
++	u32	rate_sample_interval;
++};
++#endif
++#ifndef ETHTOOL_SCOALESCE
++#define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
++#endif
++#ifndef ETHTOOL_GRINGPARAM
++#define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
++/* for configuring RX/TX ring parameters */
++#define ethtool_ringparam _kc_ethtool_ringparam
++struct _kc_ethtool_ringparam {
++	u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
++
++	/* Read only attributes.  These indicate the maximum number
++	 * of pending RX/TX ring entries the driver will allow the
++	 * user to set.
++	 */
++	u32	rx_max_pending;
++	u32	rx_mini_max_pending;
++	u32	rx_jumbo_max_pending;
++	u32	tx_max_pending;
++
++	/* Values changeable by the user.  The valid values are
++	 * in the range 1 to the "*_max_pending" counterpart above.
++	 */
++	u32	rx_pending;
++	u32	rx_mini_pending;
++	u32	rx_jumbo_pending;
++	u32	tx_pending;
++};
++#endif
++#ifndef ETHTOOL_SRINGPARAM
++#define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters, priv. */
++#endif
++#ifndef ETHTOOL_GPAUSEPARAM
++#define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
++/* for configuring link flow control parameters */
++#define ethtool_pauseparam _kc_ethtool_pauseparam
++struct _kc_ethtool_pauseparam {
++	u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
++
++	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
++	 * being true) the user may set 'autonet' here non-zero to have the
++	 * pause parameters be auto-negotiated too.  In such a case, the
++	 * {rx,tx}_pause values below determine what capabilities are
++	 * advertised.
++	 *
++	 * If 'autoneg' is zero or the link is not being auto-negotiated,
++	 * then {rx,tx}_pause force the driver to use/not-use pause
++	 * flow control.
++	 */
++	u32	autoneg;
++	u32	rx_pause;
++	u32	tx_pause;
++};
++#endif
++#ifndef ETHTOOL_SPAUSEPARAM
++#define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
++#endif
++#ifndef ETHTOOL_GRXCSUM
++#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_SRXCSUM
++#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_GTXCSUM
++#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_STXCSUM
++#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_GSG
++#define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
++					    * (ethtool_value) */
++#endif
++#ifndef ETHTOOL_SSG
++#define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
++					    * (ethtool_value). */
++#endif
++#ifndef ETHTOOL_TEST
++#define ETHTOOL_TEST		0x0000001a /* execute NIC self-test, priv. */
++#endif
++#ifndef ETHTOOL_GSTRINGS
++#define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
++#endif
++#ifndef ETHTOOL_PHYS_ID
++#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
++#endif
++#ifndef ETHTOOL_GSTATS
++#define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
++#endif
++#ifndef ETHTOOL_GTSO
++#define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
++#endif
++#ifndef ETHTOOL_STSO
++#define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
++#endif
++
++#ifndef NET_IP_ALIGN
++#define NET_IP_ALIGN 2
++#endif
++
++#ifndef NETDEV_TX_OK
++#define NETDEV_TX_OK 0 /* driver took care of the packet */
++#endif
++
++#ifndef NETDEV_TX_BUSY
++#define NETDEV_TX_BUSY 1 /* driver tx path was busy */
++#endif
++
++#ifndef NETDEV_TX_LOCKED
++#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */
++#endif
++
++/* if we do not have the infrastructure to detect if skb_header is cloned *
++ * just return false in all cases */
++#ifndef SKB_DATAREF_SHIFT
++#define skb_header_cloned(x) 0
++#endif /* SKB_DATAREF_SHIFT not defined */
++
++#ifndef WARN_ON
++#define WARN_ON(x)
++#endif
++
++#define USE_DRIVER_SHUTDOWN_HANDLER
++
++#ifndef SA_PROBEIRQ
++#define SA_PROBEIRQ 0
++#endif
++
++#endif /* _KCOMPAT_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000/e1000_hw.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_hw.c	2021-04-07 16:01:27.414633899 +0800
+@@ -0,0 +1,9092 @@
++/*******************************************************************************
++
++  
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++  
++  This program is free software; you can redistribute it and/or modify it 
++  under the terms of the GNU General Public License as published by the Free 
++  Software Foundation; either version 2 of the License, or (at your option) 
++  any later version.
++  
++  This program is distributed in the hope that it will be useful, but WITHOUT 
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
++  more details.
++  
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59 
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++  
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++  
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* e1000_hw.c
++ * Shared functions for accessing and configuring the MAC
++ */
++
++
++#include "e1000_hw.h"
++
++static int32_t e1000_set_phy_type(struct e1000_hw *hw);
++static void e1000_phy_init_script(struct e1000_hw *hw);
++static int32_t e1000_setup_copper_link(struct e1000_hw *hw);
++static int32_t e1000_setup_fiber_serdes_link(struct e1000_hw *hw);
++static int32_t e1000_adjust_serdes_amplitude(struct e1000_hw *hw);
++static int32_t e1000_phy_force_speed_duplex(struct e1000_hw *hw);
++static int32_t e1000_config_mac_to_phy(struct e1000_hw *hw);
++static void e1000_raise_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
++static void e1000_lower_mdi_clk(struct e1000_hw *hw, uint32_t *ctrl);
++static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, uint32_t data,
++                                     uint16_t count);
++static uint16_t e1000_shift_in_mdi_bits(struct e1000_hw *hw);
++static int32_t e1000_phy_reset_dsp(struct e1000_hw *hw);
++static int32_t e1000_write_eeprom_spi(struct e1000_hw *hw, uint16_t offset,
++                                      uint16_t words, uint16_t *data);
++static int32_t e1000_write_eeprom_microwire(struct e1000_hw *hw,
++                                            uint16_t offset, uint16_t words,
++                                            uint16_t *data);
++static int32_t e1000_spi_eeprom_ready(struct e1000_hw *hw);
++static void e1000_raise_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
++static void e1000_lower_ee_clk(struct e1000_hw *hw, uint32_t *eecd);
++static void e1000_shift_out_ee_bits(struct e1000_hw *hw, uint16_t data,
++                                    uint16_t count);
++static int32_t e1000_write_phy_reg_ex(struct e1000_hw *hw, uint32_t reg_addr,
++                                      uint16_t phy_data);
++static int32_t e1000_read_phy_reg_ex(struct e1000_hw *hw,uint32_t reg_addr,
++                                     uint16_t *phy_data);
++static uint16_t e1000_shift_in_ee_bits(struct e1000_hw *hw, uint16_t count);
++static int32_t e1000_acquire_eeprom(struct e1000_hw *hw);
++static void e1000_release_eeprom(struct e1000_hw *hw);
++static void e1000_standby_eeprom(struct e1000_hw *hw);
++static int32_t e1000_set_vco_speed(struct e1000_hw *hw);
++static int32_t e1000_polarity_reversal_workaround(struct e1000_hw *hw);
++static int32_t e1000_set_phy_mode(struct e1000_hw *hw);
++static int32_t e1000_host_if_read_cookie(struct e1000_hw *hw, uint8_t *buffer);
++static uint8_t e1000_calculate_mng_checksum(char *buffer, uint32_t length);
++static int32_t e1000_configure_kmrn_for_10_100(struct e1000_hw *hw,
++                                               uint16_t duplex);
++static int32_t e1000_configure_kmrn_for_1000(struct e1000_hw *hw);
++
++/* IGP cable length table */
++static const
++uint16_t e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] =
++    { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
++      5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25,
++      25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40,
++      40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60,
++      60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90,
++      90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
++      100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
++      110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120};
++
++static const
++uint16_t e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] =
++    { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
++      0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
++      6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
++      21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
++      40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
++      60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
++      83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
++      104, 109, 114, 118, 121, 124};
++
++
++/******************************************************************************
++ * Set the phy type member in the hw struct.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_set_phy_type(struct e1000_hw *hw)
++{
++    DEBUGFUNC("e1000_set_phy_type");
++
++    if (hw->mac_type == e1000_undefined)
++        return -E1000_ERR_PHY_TYPE;
++
++    switch (hw->phy_id) {
++    case M88E1000_E_PHY_ID:
++    case M88E1000_I_PHY_ID:
++    case M88E1011_I_PHY_ID:
++    case M88E1111_I_PHY_ID:
++        hw->phy_type = e1000_phy_m88;
++        break;
++    case IGP01E1000_I_PHY_ID:
++        if (hw->mac_type == e1000_82541 ||
++            hw->mac_type == e1000_82541_rev_2 ||
++            hw->mac_type == e1000_82547 ||
++            hw->mac_type == e1000_82547_rev_2) {
++            hw->phy_type = e1000_phy_igp;
++            break;
++        }
++    case IGP03E1000_E_PHY_ID:
++        hw->phy_type = e1000_phy_igp_3;
++        break;
++    case IFE_E_PHY_ID:
++    case IFE_PLUS_E_PHY_ID:
++    case IFE_C_E_PHY_ID:
++        hw->phy_type = e1000_phy_ife;
++        break;
++    case GG82563_E_PHY_ID:
++        if (hw->mac_type == e1000_80003es2lan) {
++            hw->phy_type = e1000_phy_gg82563;
++            break;
++        }
++        /* Fall Through */
++    default:
++        /* Should never have loaded on this device */
++        hw->phy_type = e1000_phy_undefined;
++        return -E1000_ERR_PHY_TYPE;
++    }
++
++    return E1000_SUCCESS;
++}
++
++
++/******************************************************************************
++ * IGP phy init script - initializes the GbE PHY
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static void
++e1000_phy_init_script(struct e1000_hw *hw)
++{
++    uint32_t ret_val;
++    uint16_t phy_saved_data;
++
++    DEBUGFUNC("e1000_phy_init_script");
++
++    if (hw->phy_init_script) {
++        msec_delay(20);
++
++        /* Save off the current value of register 0x2F5B to be restored at
++         * the end of this routine. */
++        ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++        /* Disabled the PHY transmitter */
++        e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++        msec_delay(20);
++
++        e1000_write_phy_reg(hw,0x0000,0x0140);
++
++        msec_delay(5);
++
++        switch (hw->mac_type) {
++        case e1000_82541:
++        case e1000_82547:
++            e1000_write_phy_reg(hw, 0x1F95, 0x0001);
++
++            e1000_write_phy_reg(hw, 0x1F71, 0xBD21);
++
++            e1000_write_phy_reg(hw, 0x1F79, 0x0018);
++
++            e1000_write_phy_reg(hw, 0x1F30, 0x1600);
++
++            e1000_write_phy_reg(hw, 0x1F31, 0x0014);
++
++            e1000_write_phy_reg(hw, 0x1F32, 0x161C);
++
++            e1000_write_phy_reg(hw, 0x1F94, 0x0003);
++
++            e1000_write_phy_reg(hw, 0x1F96, 0x003F);
++
++            e1000_write_phy_reg(hw, 0x2010, 0x0008);
++            break;
++
++        case e1000_82541_rev_2:
++        case e1000_82547_rev_2:
++            e1000_write_phy_reg(hw, 0x1F73, 0x0099);
++            break;
++        default:
++            break;
++        }
++
++        e1000_write_phy_reg(hw, 0x0000, 0x3300);
++
++        msec_delay(20);
++
++        /* Now enable the transmitter */
++        e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++        if (hw->mac_type == e1000_82547) {
++            uint16_t fused, fine, coarse;
++
++            /* Move to analog registers page */
++            e1000_read_phy_reg(hw, IGP01E1000_ANALOG_SPARE_FUSE_STATUS, &fused);
++
++            if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) {
++                e1000_read_phy_reg(hw, IGP01E1000_ANALOG_FUSE_STATUS, &fused);
++
++                fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK;
++                coarse = fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK;
++
++                if (coarse > IGP01E1000_ANALOG_FUSE_COARSE_THRESH) {
++                    coarse -= IGP01E1000_ANALOG_FUSE_COARSE_10;
++                    fine -= IGP01E1000_ANALOG_FUSE_FINE_1;
++                } else if (coarse == IGP01E1000_ANALOG_FUSE_COARSE_THRESH)
++                    fine -= IGP01E1000_ANALOG_FUSE_FINE_10;
++
++                fused = (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) |
++                        (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) |
++                        (coarse & IGP01E1000_ANALOG_FUSE_COARSE_MASK);
++
++                e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_CONTROL, fused);
++                e1000_write_phy_reg(hw, IGP01E1000_ANALOG_FUSE_BYPASS,
++                                    IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL);
++            }
++        }
++    }
++}
++
++/******************************************************************************
++ * Set the mac type member in the hw struct.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_set_mac_type(struct e1000_hw *hw)
++{
++    DEBUGFUNC("e1000_set_mac_type");
++
++    switch (hw->device_id) {
++    case E1000_DEV_ID_82542:
++        switch (hw->revision_id) {
++        case E1000_82542_2_0_REV_ID:
++            hw->mac_type = e1000_82542_rev2_0;
++            break;
++        case E1000_82542_2_1_REV_ID:
++            hw->mac_type = e1000_82542_rev2_1;
++            break;
++        default:
++            /* Invalid 82542 revision ID */
++            return -E1000_ERR_MAC_TYPE;
++        }
++        break;
++    case E1000_DEV_ID_82543GC_FIBER:
++    case E1000_DEV_ID_82543GC_COPPER:
++        hw->mac_type = e1000_82543;
++        break;
++    case E1000_DEV_ID_82544EI_COPPER:
++    case E1000_DEV_ID_82544EI_FIBER:
++    case E1000_DEV_ID_82544GC_COPPER:
++    case E1000_DEV_ID_82544GC_LOM:
++        hw->mac_type = e1000_82544;
++        break;
++    case E1000_DEV_ID_82540EM:
++    case E1000_DEV_ID_82540EM_LOM:
++    case E1000_DEV_ID_82540EP:
++    case E1000_DEV_ID_82540EP_LOM:
++    case E1000_DEV_ID_82540EP_LP:
++        hw->mac_type = e1000_82540;
++        break;
++    case E1000_DEV_ID_82545EM_COPPER:
++    case E1000_DEV_ID_82545EM_FIBER:
++        hw->mac_type = e1000_82545;
++        break;
++    case E1000_DEV_ID_82545GM_COPPER:
++    case E1000_DEV_ID_82545GM_FIBER:
++    case E1000_DEV_ID_82545GM_SERDES:
++        hw->mac_type = e1000_82545_rev_3;
++        break;
++    case E1000_DEV_ID_82546EB_COPPER:
++    case E1000_DEV_ID_82546EB_FIBER:
++    case E1000_DEV_ID_82546EB_QUAD_COPPER:
++        hw->mac_type = e1000_82546;
++        break;
++    case E1000_DEV_ID_82546GB_COPPER:
++    case E1000_DEV_ID_82546GB_FIBER:
++    case E1000_DEV_ID_82546GB_SERDES:
++    case E1000_DEV_ID_82546GB_PCIE:
++    case E1000_DEV_ID_82546GB_QUAD_COPPER:
++    case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
++        hw->mac_type = e1000_82546_rev_3;
++        break;
++    case E1000_DEV_ID_82541EI:
++    case E1000_DEV_ID_82541EI_MOBILE:
++    case E1000_DEV_ID_82541ER_LOM:
++        hw->mac_type = e1000_82541;
++        break;
++    case E1000_DEV_ID_82541ER:
++    case E1000_DEV_ID_82541GI:
++    case E1000_DEV_ID_82541GI_LF:
++    case E1000_DEV_ID_82541GI_MOBILE:
++        hw->mac_type = e1000_82541_rev_2;
++        break;
++    case E1000_DEV_ID_82547EI:
++    case E1000_DEV_ID_82547EI_MOBILE:
++        hw->mac_type = e1000_82547;
++        break;
++    case E1000_DEV_ID_82547GI:
++        hw->mac_type = e1000_82547_rev_2;
++        break;
++    case E1000_DEV_ID_82571EB_COPPER:
++    case E1000_DEV_ID_82571EB_FIBER:
++    case E1000_DEV_ID_82571EB_SERDES:
++    case E1000_DEV_ID_82571EB_QUAD_COPPER:
++    case E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE:
++            hw->mac_type = e1000_82571;
++        break;
++    case E1000_DEV_ID_82572EI_COPPER:
++    case E1000_DEV_ID_82572EI_FIBER:
++    case E1000_DEV_ID_82572EI_SERDES:
++    case E1000_DEV_ID_82572EI:
++        hw->mac_type = e1000_82572;
++        break;
++    case E1000_DEV_ID_82573E:
++    case E1000_DEV_ID_82573E_IAMT:
++    case E1000_DEV_ID_82573L:
++        hw->mac_type = e1000_82573;
++        break;
++    case E1000_DEV_ID_80003ES2LAN_COPPER_SPT:
++    case E1000_DEV_ID_80003ES2LAN_SERDES_SPT:
++    case E1000_DEV_ID_80003ES2LAN_COPPER_DPT:
++    case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
++        hw->mac_type = e1000_80003es2lan;
++        break;
++    case E1000_DEV_ID_ICH8_IGP_M_AMT:
++    case E1000_DEV_ID_ICH8_IGP_AMT:
++    case E1000_DEV_ID_ICH8_IGP_C:
++    case E1000_DEV_ID_ICH8_IFE:
++    case E1000_DEV_ID_ICH8_IFE_GT:
++    case E1000_DEV_ID_ICH8_IFE_G:
++    case E1000_DEV_ID_ICH8_IGP_M:
++        hw->mac_type = e1000_ich8lan;
++        break;
++    default:
++        /* Should never have loaded on this device */
++        return -E1000_ERR_MAC_TYPE;
++    }
++
++    switch (hw->mac_type) {
++    case e1000_ich8lan:
++        hw->swfwhw_semaphore_present = TRUE;
++        hw->asf_firmware_present = TRUE;
++        break;
++    case e1000_80003es2lan:
++        hw->swfw_sync_present = TRUE;
++        /* fall through */
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_82573:
++        hw->eeprom_semaphore_present = TRUE;
++        /* fall through */
++    case e1000_82541:
++    case e1000_82547:
++    case e1000_82541_rev_2:
++    case e1000_82547_rev_2:
++        hw->asf_firmware_present = TRUE;
++        break;
++    default:
++        break;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ * Set media type and TBI compatibility.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * **************************************************************************/
++void
++e1000_set_media_type(struct e1000_hw *hw)
++{
++    uint32_t status;
++
++    DEBUGFUNC("e1000_set_media_type");
++
++    if (hw->mac_type != e1000_82543) {
++        /* tbi_compatibility is only valid on 82543 */
++        hw->tbi_compatibility_en = FALSE;
++    }
++
++    switch (hw->device_id) {
++    case E1000_DEV_ID_82545GM_SERDES:
++    case E1000_DEV_ID_82546GB_SERDES:
++    case E1000_DEV_ID_82571EB_SERDES:
++    case E1000_DEV_ID_82572EI_SERDES:
++    case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
++        hw->media_type = e1000_media_type_internal_serdes;
++        break;
++    default:
++        switch (hw->mac_type) {
++        case e1000_82542_rev2_0:
++        case e1000_82542_rev2_1:
++            hw->media_type = e1000_media_type_fiber;
++            break;
++        case e1000_ich8lan:
++        case e1000_82573:
++            /* The STATUS_TBIMODE bit is reserved or reused for the this
++             * device.
++             */
++            hw->media_type = e1000_media_type_copper;
++            break;
++        default:
++            status = E1000_READ_REG(hw, STATUS);
++            if (status & E1000_STATUS_TBIMODE) {
++                hw->media_type = e1000_media_type_fiber;
++                /* tbi_compatibility not valid on fiber */
++                hw->tbi_compatibility_en = FALSE;
++            } else {
++                hw->media_type = e1000_media_type_copper;
++            }
++            break;
++        }
++    }
++}
++
++/******************************************************************************
++ * Reset the transmit and receive units; mask and clear all interrupts.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_reset_hw(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    uint32_t ctrl_ext;
++    uint32_t icr;
++    uint32_t manc;
++    uint32_t led_ctrl;
++    uint32_t timeout;
++    uint32_t extcnf_ctrl;
++    int32_t ret_val;
++
++    DEBUGFUNC("e1000_reset_hw");
++
++    /* For 82542 (rev 2.0), disable MWI before issuing a device reset */
++    if (hw->mac_type == e1000_82542_rev2_0) {
++        DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
++        e1000_pci_clear_mwi(hw);
++    }
++
++    if (hw->bus_type == e1000_bus_type_pci_express) {
++        /* Prevent the PCI-E bus from sticking if there is no TLP connection
++         * on the last TLP read/write transaction when MAC is reset.
++         */
++        if (e1000_disable_pciex_master(hw) != E1000_SUCCESS) {
++            DEBUGOUT("PCI-E Master disable polling has failed.\n");
++        }
++    }
++
++    /* Clear interrupt mask to stop board from generating interrupts */
++    DEBUGOUT("Masking off all interrupts\n");
++    E1000_WRITE_REG(hw, IMC, 0xffffffff);
++
++    /* Disable the Transmit and Receive units.  Then delay to allow
++     * any pending transactions to complete before we hit the MAC with
++     * the global reset.
++     */
++    E1000_WRITE_REG(hw, RCTL, 0);
++    E1000_WRITE_REG(hw, TCTL, E1000_TCTL_PSP);
++    E1000_WRITE_FLUSH(hw);
++
++    /* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */
++    hw->tbi_compatibility_on = FALSE;
++
++    /* Delay to allow any outstanding PCI transactions to complete before
++     * resetting the device
++     */
++    msec_delay(10);
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++
++    /* Must reset the PHY before resetting the MAC */
++    if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
++        E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_PHY_RST));
++        msec_delay(5);
++    }
++
++    /* Must acquire the MDIO ownership before MAC reset.
++     * Ownership defaults to firmware after a reset. */
++    if (hw->mac_type == e1000_82573) {
++        timeout = 10;
++
++        extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++        extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++        do {
++            E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
++            extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++
++            if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP)
++                break;
++            else
++                extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++            msec_delay(2);
++            timeout--;
++        } while (timeout);
++    }
++
++    /* Workaround for ICH8 bit corruption issue in FIFO memory */
++    if (hw->mac_type == e1000_ich8lan) {
++        /* Set Tx and Rx buffer allocation to 8k apiece. */
++        E1000_WRITE_REG(hw, PBA, E1000_PBA_8K);
++        /* Set Packet Buffer Size to 16k. */
++        E1000_WRITE_REG(hw, PBS, E1000_PBS_16K);
++    }
++
++    /* Issue a global reset to the MAC.  This will reset the chip's
++     * transmit, receive, DMA, and link units.  It will not effect
++     * the current PCI configuration.  The global reset bit is self-
++     * clearing, and should clear within a microsecond.
++     */
++    DEBUGOUT("Issuing a global reset to MAC\n");
++
++    switch (hw->mac_type) {
++        case e1000_82544:
++        case e1000_82540:
++        case e1000_82545:
++        case e1000_82546:
++        case e1000_82541:
++        case e1000_82541_rev_2:
++            /* These controllers can't ack the 64-bit write when issuing the
++             * reset, so use IO-mapping as a workaround to issue the reset */
++            E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST));
++            break;
++        case e1000_82545_rev_3:
++        case e1000_82546_rev_3:
++            /* Reset is performed on a shadow of the control register */
++            E1000_WRITE_REG(hw, CTRL_DUP, (ctrl | E1000_CTRL_RST));
++            break;
++        case e1000_ich8lan:
++            if (!hw->phy_reset_disable &&
++                e1000_check_phy_reset_block(hw) == E1000_SUCCESS) {
++                /* e1000_ich8lan PHY HW reset requires MAC CORE reset
++                 * at the same time to make sure the interface between
++                 * MAC and the external PHY is reset.
++                 */
++                ctrl |= E1000_CTRL_PHY_RST;
++            }
++
++            e1000_get_software_flag(hw);
++            E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
++            msec_delay(5);
++            break;
++        default:
++            E1000_WRITE_REG(hw, CTRL, (ctrl | E1000_CTRL_RST));
++            break;
++    }
++
++    /* After MAC reset, force reload of EEPROM to restore power-on settings to
++     * device.  Later controllers reload the EEPROM automatically, so just wait
++     * for reload to complete.
++     */
++    switch (hw->mac_type) {
++        case e1000_82542_rev2_0:
++        case e1000_82542_rev2_1:
++        case e1000_82543:
++        case e1000_82544:
++            /* Wait for reset to complete */
++            usec_delay(10);
++            ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++            ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++            E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++            E1000_WRITE_FLUSH(hw);
++            /* Wait for EEPROM reload */
++            msec_delay(2);
++            break;
++        case e1000_82541:
++        case e1000_82541_rev_2:
++        case e1000_82547:
++        case e1000_82547_rev_2:
++            /* Wait for EEPROM reload */
++            msec_delay(20);
++            break;
++        case e1000_82573:
++            if (e1000_is_onboard_nvm_eeprom(hw) == FALSE) {
++                usec_delay(10);
++                ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++                ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++                E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++                E1000_WRITE_FLUSH(hw);
++            }
++            /* fall through */
++        case e1000_82571:
++        case e1000_82572:
++        case e1000_ich8lan:
++        case e1000_80003es2lan:
++            ret_val = e1000_get_auto_rd_done(hw);
++            if (ret_val)
++                /* We don't want to continue accessing MAC registers. */
++                return ret_val;
++            break;
++        default:
++            /* Wait for EEPROM reload (it happens automatically) */
++            msec_delay(5);
++            break;
++    }
++
++    /* Disable HW ARPs */
++    manc = E1000_READ_REG(hw, MANC);
++    manc &= ~(E1000_MANC_ARP_EN | E1000_MANC_ARP_RES_EN);
++    E1000_WRITE_REG(hw, MANC, manc);
++
++    if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
++        e1000_phy_init_script(hw);
++
++        /* Configure activity LED after PHY reset */
++        led_ctrl = E1000_READ_REG(hw, LEDCTL);
++        led_ctrl &= IGP_ACTIVITY_LED_MASK;
++        led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++        E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
++    }
++
++    /* Clear interrupt mask to stop board from generating interrupts */
++    DEBUGOUT("Masking off all interrupts\n");
++    E1000_WRITE_REG(hw, IMC, 0xffffffff);
++
++    /* Clear any pending interrupt events. */
++    icr = E1000_READ_REG(hw, ICR);
++
++    /* If MWI was previously enabled, reenable it. */
++    if (hw->mac_type == e1000_82542_rev2_0) {
++        if (hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
++            e1000_pci_set_mwi(hw);
++    }
++
++    if (hw->mac_type == e1000_ich8lan) {
++        uint32_t kab = E1000_READ_REG(hw, KABGTXD);
++        kab |= E1000_KABGTXD_BGSQLBIAS;
++        E1000_WRITE_REG(hw, KABGTXD, kab);
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Performs basic configuration of the adapter.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Assumes that the controller has previously been reset and is in a
++ * post-reset uninitialized state. Initializes the receive address registers,
++ * multicast table, and VLAN filter table. Calls routines to setup link
++ * configuration and flow control settings. Clears all on-chip counters. Leaves
++ * the transmit and receive units disabled and uninitialized.
++ *****************************************************************************/
++int32_t
++e1000_init_hw(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    uint32_t i;
++    int32_t ret_val;
++    uint16_t pcix_cmd_word;
++    uint16_t pcix_stat_hi_word;
++    uint16_t cmd_mmrbc;
++    uint16_t stat_mmrbc;
++    uint32_t mta_size;
++    uint32_t reg_data;
++    uint32_t ctrl_ext;
++
++    DEBUGFUNC("e1000_init_hw");
++
++    /* Initialize Identification LED */
++    ret_val = e1000_id_led_init(hw);
++    if (ret_val) {
++        DEBUGOUT("Error Initializing Identification LED\n");
++        return ret_val;
++    }
++
++    /* Set the media type and TBI compatibility */
++    e1000_set_media_type(hw);
++
++    /* Disabling VLAN filtering. */
++    DEBUGOUT("Initializing the IEEE VLAN\n");
++    /* VET hardcoded to standard value and VFTA removed in ICH8 LAN */
++    if (hw->mac_type != e1000_ich8lan) {
++        if (hw->mac_type < e1000_82545_rev_3)
++            E1000_WRITE_REG(hw, VET, 0);
++        e1000_clear_vfta(hw);
++    }
++
++    /* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
++    if (hw->mac_type == e1000_82542_rev2_0) {
++        DEBUGOUT("Disabling MWI on 82542 rev 2.0\n");
++        e1000_pci_clear_mwi(hw);
++        E1000_WRITE_REG(hw, RCTL, E1000_RCTL_RST);
++        E1000_WRITE_FLUSH(hw);
++        msec_delay(5);
++    }
++
++    /* Setup the receive address. This involves initializing all of the Receive
++     * Address Registers (RARs 0 - 15).
++     */
++    e1000_init_rx_addrs(hw);
++
++    /* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */
++    if (hw->mac_type == e1000_82542_rev2_0) {
++        E1000_WRITE_REG(hw, RCTL, 0);
++        E1000_WRITE_FLUSH(hw);
++        msec_delay(1);
++        if (hw->pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
++            e1000_pci_set_mwi(hw);
++    }
++
++    /* Zero out the Multicast HASH table */
++    DEBUGOUT("Zeroing the MTA\n");
++    mta_size = E1000_MC_TBL_SIZE;
++    if (hw->mac_type == e1000_ich8lan)
++        mta_size = E1000_MC_TBL_SIZE_ICH8LAN;
++    for (i = 0; i < mta_size; i++) {
++        E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
++        /* use write flush to prevent Memory Write Block (MWB) from
++         * occuring when accessing our register space */
++        E1000_WRITE_FLUSH(hw);
++    }
++
++    /* Set the PCI priority bit correctly in the CTRL register.  This
++     * determines if the adapter gives priority to receives, or if it
++     * gives equal priority to transmits and receives.  Valid only on
++     * 82542 and 82543 silicon.
++     */
++    if (hw->dma_fairness && hw->mac_type <= e1000_82543) {
++        ctrl = E1000_READ_REG(hw, CTRL);
++        E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PRIOR);
++    }
++
++    switch (hw->mac_type) {
++    case e1000_82545_rev_3:
++    case e1000_82546_rev_3:
++        break;
++    default:
++        /* Workaround for PCI-X problem when BIOS sets MMRBC incorrectly. */
++        if (hw->bus_type == e1000_bus_type_pcix) {
++            e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd_word);
++            e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI,
++                &pcix_stat_hi_word);
++            cmd_mmrbc = (pcix_cmd_word & PCIX_COMMAND_MMRBC_MASK) >>
++                PCIX_COMMAND_MMRBC_SHIFT;
++            stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >>
++                PCIX_STATUS_HI_MMRBC_SHIFT;
++            if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K)
++                stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K;
++            if (cmd_mmrbc > stat_mmrbc) {
++                pcix_cmd_word &= ~PCIX_COMMAND_MMRBC_MASK;
++                pcix_cmd_word |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT;
++                e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER,
++                    &pcix_cmd_word);
++            }
++        }
++        break;
++    }
++
++    /* More time needed for PHY to initialize */
++    if (hw->mac_type == e1000_ich8lan)
++        msec_delay(15);
++
++    /* Call a subroutine to configure the link and setup flow control. */
++    ret_val = e1000_setup_link(hw);
++
++    /* Set the transmit descriptor write-back policy */
++    if (hw->mac_type > e1000_82544) {
++        ctrl = E1000_READ_REG(hw, TXDCTL);
++        ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB;
++        switch (hw->mac_type) {
++        default:
++            break;
++        case e1000_82571:
++        case e1000_82572:
++        case e1000_82573:
++        case e1000_ich8lan:
++        case e1000_80003es2lan:
++            ctrl |= E1000_TXDCTL_COUNT_DESC;
++            break;
++        }
++        E1000_WRITE_REG(hw, TXDCTL, ctrl);
++    }
++
++    if (hw->mac_type == e1000_82573) {
++        e1000_enable_tx_pkt_filtering(hw);
++    }
++
++    switch (hw->mac_type) {
++    default:
++        break;
++    case e1000_80003es2lan:
++        /* Enable retransmit on late collisions */
++        reg_data = E1000_READ_REG(hw, TCTL);
++        reg_data |= E1000_TCTL_RTLC;
++        E1000_WRITE_REG(hw, TCTL, reg_data);
++
++        /* Configure Gigabit Carry Extend Padding */
++        reg_data = E1000_READ_REG(hw, TCTL_EXT);
++        reg_data &= ~E1000_TCTL_EXT_GCEX_MASK;
++        reg_data |= DEFAULT_80003ES2LAN_TCTL_EXT_GCEX;
++        E1000_WRITE_REG(hw, TCTL_EXT, reg_data);
++
++        /* Configure Transmit Inter-Packet Gap */
++        reg_data = E1000_READ_REG(hw, TIPG);
++        reg_data &= ~E1000_TIPG_IPGT_MASK;
++        reg_data |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000;
++        E1000_WRITE_REG(hw, TIPG, reg_data);
++
++        reg_data = E1000_READ_REG_ARRAY(hw, FFLT, 0x0001);
++        reg_data &= ~0x00100000;
++        E1000_WRITE_REG_ARRAY(hw, FFLT, 0x0001, reg_data);
++        /* Fall through */
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_ich8lan:
++        ctrl = E1000_READ_REG(hw, TXDCTL1);
++        ctrl = (ctrl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB;
++        if (hw->mac_type >= e1000_82571)
++            ctrl |= E1000_TXDCTL_COUNT_DESC;
++        E1000_WRITE_REG(hw, TXDCTL1, ctrl);
++        break;
++    }
++
++
++
++    if (hw->mac_type == e1000_82573) {
++        uint32_t gcr = E1000_READ_REG(hw, GCR);
++        gcr |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
++        E1000_WRITE_REG(hw, GCR, gcr);
++    }
++
++    /* Clear all of the statistics registers (clear on read).  It is
++     * important that we do this after we have tried to establish link
++     * because the symbol error count will increment wildly if there
++     * is no link.
++     */
++    e1000_clear_hw_cntrs(hw);
++
++    /* ICH8/Nahum No-snoop bits are opposite polarity.
++     * Set to snoop by default after reset. */
++    if (hw->mac_type == e1000_ich8lan)
++        e1000_set_pci_ex_no_snoop(hw, PCI_EX_82566_SNOOP_ALL);
++
++    if (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER ||
++        hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3) {
++        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++        /* Relaxed ordering must be disabled to avoid a parity
++         * error crash in a PCI slot. */
++        ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++    }
++
++    return ret_val;
++}
++
++/******************************************************************************
++ * Adjust SERDES output amplitude based on EEPROM setting.
++ *
++ * hw - Struct containing variables accessed by shared code.
++ *****************************************************************************/
++static int32_t
++e1000_adjust_serdes_amplitude(struct e1000_hw *hw)
++{
++    uint16_t eeprom_data;
++    int32_t  ret_val;
++
++    DEBUGFUNC("e1000_adjust_serdes_amplitude");
++
++    if (hw->media_type != e1000_media_type_internal_serdes)
++        return E1000_SUCCESS;
++
++    switch (hw->mac_type) {
++    case e1000_82545_rev_3:
++    case e1000_82546_rev_3:
++        break;
++    default:
++        return E1000_SUCCESS;
++    }
++
++    ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1, &eeprom_data);
++    if (ret_val) {
++        return ret_val;
++    }
++
++    if (eeprom_data != EEPROM_RESERVED_WORD) {
++        /* Adjust SERDES output amplitude only. */
++        eeprom_data &= EEPROM_SERDES_AMPLITUDE_MASK;
++        ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_EXT_CTRL, eeprom_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Configures flow control and link settings.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Determines which flow control settings to use. Calls the apropriate media-
++ * specific link configuration function. Configures the flow control settings.
++ * Assuming the adapter has a valid link partner, a valid link should be
++ * established. Assumes the hardware has previously been reset and the
++ * transmitter and receiver are not enabled.
++ *****************************************************************************/
++int32_t
++e1000_setup_link(struct e1000_hw *hw)
++{
++    uint32_t ctrl_ext;
++    int32_t ret_val;
++    uint16_t eeprom_data;
++
++    DEBUGFUNC("e1000_setup_link");
++
++    /* In the case of the phy reset being blocked, we already have a link.
++     * We do not have to set it up again. */
++    if (e1000_check_phy_reset_block(hw))
++        return E1000_SUCCESS;
++
++    /* Read and store word 0x0F of the EEPROM. This word contains bits
++     * that determine the hardware's default PAUSE (flow control) mode,
++     * a bit that determines whether the HW defaults to enabling or
++     * disabling auto-negotiation, and the direction of the
++     * SW defined pins. If there is no SW over-ride of the flow
++     * control setting, then the variable hw->fc will
++     * be initialized based on a value in the EEPROM.
++     */
++    if (hw->fc == e1000_fc_default) {
++        switch (hw->mac_type) {
++        case e1000_ich8lan:
++        case e1000_82573:
++            hw->fc = e1000_fc_full;
++            break;
++        default:
++            ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG,
++                                        1, &eeprom_data);
++            if (ret_val) {
++                DEBUGOUT("EEPROM Read Error\n");
++                return -E1000_ERR_EEPROM;
++            }
++            if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0)
++                hw->fc = e1000_fc_none;
++            else if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) ==
++                    EEPROM_WORD0F_ASM_DIR)
++                hw->fc = e1000_fc_tx_pause;
++            else
++                hw->fc = e1000_fc_full;
++            break;
++        }
++    }
++
++    /* We want to save off the original Flow Control configuration just
++     * in case we get disconnected and then reconnected into a different
++     * hub or switch with different Flow Control capabilities.
++     */
++    if (hw->mac_type == e1000_82542_rev2_0)
++        hw->fc &= (~e1000_fc_tx_pause);
++
++    if ((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1))
++        hw->fc &= (~e1000_fc_rx_pause);
++
++    hw->original_fc = hw->fc;
++
++    DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc);
++
++    /* Take the 4 bits from EEPROM word 0x0F that determine the initial
++     * polarity value for the SW controlled pins, and setup the
++     * Extended Device Control reg with that info.
++     * This is needed because one of the SW controlled pins is used for
++     * signal detection.  So this should be done before e1000_setup_pcs_link()
++     * or e1000_phy_setup() is called.
++     */
++    if (hw->mac_type == e1000_82543) {
++        ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG,
++                                    1, &eeprom_data);
++        if (ret_val) {
++            DEBUGOUT("EEPROM Read Error\n");
++            return -E1000_ERR_EEPROM;
++        }
++        ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) <<
++                    SWDPIO__EXT_SHIFT);
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++    }
++
++    /* Call the necessary subroutine to configure the link. */
++    ret_val = (hw->media_type == e1000_media_type_copper) ?
++              e1000_setup_copper_link(hw) :
++              e1000_setup_fiber_serdes_link(hw);
++
++    /* Initialize the flow control address, type, and PAUSE timer
++     * registers to their default values.  This is done even if flow
++     * control is disabled, because it does not hurt anything to
++     * initialize these registers.
++     */
++    DEBUGOUT("Initializing the Flow Control address, type and timer regs\n");
++
++    /* FCAL/H and FCT are hardcoded to standard values in e1000_ich8lan. */
++    if (hw->mac_type != e1000_ich8lan) {
++        E1000_WRITE_REG(hw, FCT, FLOW_CONTROL_TYPE);
++        E1000_WRITE_REG(hw, FCAH, FLOW_CONTROL_ADDRESS_HIGH);
++        E1000_WRITE_REG(hw, FCAL, FLOW_CONTROL_ADDRESS_LOW);
++    }
++
++    E1000_WRITE_REG(hw, FCTTV, hw->fc_pause_time);
++
++    /* Set the flow control receive threshold registers.  Normally,
++     * these registers will be set to a default threshold that may be
++     * adjusted later by the driver's runtime code.  However, if the
++     * ability to transmit pause frames in not enabled, then these
++     * registers will be set to 0.
++     */
++    if (!(hw->fc & e1000_fc_tx_pause)) {
++        E1000_WRITE_REG(hw, FCRTL, 0);
++        E1000_WRITE_REG(hw, FCRTH, 0);
++    } else {
++        /* We need to set up the Receive Threshold high and low water marks
++         * as well as (optionally) enabling the transmission of XON frames.
++         */
++        if (hw->fc_send_xon) {
++            E1000_WRITE_REG(hw, FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE));
++            E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
++        } else {
++            E1000_WRITE_REG(hw, FCRTL, hw->fc_low_water);
++            E1000_WRITE_REG(hw, FCRTH, hw->fc_high_water);
++        }
++    }
++    return ret_val;
++}
++
++/******************************************************************************
++ * Sets up link for a fiber based or serdes based adapter
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Manipulates Physical Coding Sublayer functions in order to configure
++ * link. Assumes the hardware has been previously reset and the transmitter
++ * and receiver are not enabled.
++ *****************************************************************************/
++static int32_t
++e1000_setup_fiber_serdes_link(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    uint32_t status;
++    uint32_t txcw = 0;
++    uint32_t i;
++    uint32_t signal = 0;
++    int32_t ret_val;
++
++    DEBUGFUNC("e1000_setup_fiber_serdes_link");
++
++    /* On 82571 and 82572 Fiber connections, SerDes loopback mode persists
++     * until explicitly turned off or a power cycle is performed.  A read to
++     * the register does not indicate its status.  Therefore, we ensure
++     * loopback mode is disabled during initialization.
++     */
++    if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572)
++        E1000_WRITE_REG(hw, SCTL, E1000_DISABLE_SERDES_LOOPBACK);
++
++    /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be
++     * set when the optics detect a signal. On older adapters, it will be
++     * cleared when there is a signal.  This applies to fiber media only.
++     * If we're on serdes media, adjust the output amplitude to value set in
++     * the EEPROM.
++     */
++    ctrl = E1000_READ_REG(hw, CTRL);
++    if (hw->media_type == e1000_media_type_fiber)
++        signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0;
++
++    ret_val = e1000_adjust_serdes_amplitude(hw);
++    if (ret_val)
++        return ret_val;
++
++    /* Take the link out of reset */
++    ctrl &= ~(E1000_CTRL_LRST);
++
++    /* Adjust VCO speed to improve BER performance */
++    ret_val = e1000_set_vco_speed(hw);
++    if (ret_val)
++        return ret_val;
++
++    e1000_config_collision_dist(hw);
++
++    /* Check for a software override of the flow control settings, and setup
++     * the device accordingly.  If auto-negotiation is enabled, then software
++     * will have to set the "PAUSE" bits to the correct value in the Tranmsit
++     * Config Word Register (TXCW) and re-start auto-negotiation.  However, if
++     * auto-negotiation is disabled, then software will have to manually
++     * configure the two flow control enable bits in the CTRL register.
++     *
++     * The possible values of the "fc" parameter are:
++     *      0:  Flow control is completely disabled
++     *      1:  Rx flow control is enabled (we can receive pause frames, but
++     *          not send pause frames).
++     *      2:  Tx flow control is enabled (we can send pause frames but we do
++     *          not support receiving pause frames).
++     *      3:  Both Rx and TX flow control (symmetric) are enabled.
++     */
++    switch (hw->fc) {
++    case e1000_fc_none:
++        /* Flow control is completely disabled by a software over-ride. */
++        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
++        break;
++    case e1000_fc_rx_pause:
++        /* RX Flow control is enabled and TX Flow control is disabled by a
++         * software over-ride. Since there really isn't a way to advertise
++         * that we are capable of RX Pause ONLY, we will advertise that we
++         * support both symmetric and asymmetric RX PAUSE. Later, we will
++         *  disable the adapter's ability to send PAUSE frames.
++         */
++        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++        break;
++    case e1000_fc_tx_pause:
++        /* TX Flow control is enabled, and RX Flow control is disabled, by a
++         * software over-ride.
++         */
++        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
++        break;
++    case e1000_fc_full:
++        /* Flow control (both RX and TX) is enabled by a software over-ride. */
++        txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++        break;
++    default:
++        DEBUGOUT("Flow control param set incorrectly\n");
++        return -E1000_ERR_CONFIG;
++        break;
++    }
++
++    /* Since auto-negotiation is enabled, take the link out of reset (the link
++     * will be in reset, because we previously reset the chip). This will
++     * restart auto-negotiation.  If auto-neogtiation is successful then the
++     * link-up status bit will be set and the flow control enable bits (RFCE
++     * and TFCE) will be set according to their negotiated value.
++     */
++    DEBUGOUT("Auto-negotiation enabled\n");
++
++    E1000_WRITE_REG(hw, TXCW, txcw);
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++    E1000_WRITE_FLUSH(hw);
++
++    hw->txcw = txcw;
++    msec_delay(1);
++
++    /* If we have a signal (the cable is plugged in) then poll for a "Link-Up"
++     * indication in the Device Status Register.  Time-out if a link isn't
++     * seen in 500 milliseconds seconds (Auto-negotiation should complete in
++     * less than 500 milliseconds even if the other end is doing it in SW).
++     * For internal serdes, we just assume a signal is present, then poll.
++     */
++    if (hw->media_type == e1000_media_type_internal_serdes ||
++       (E1000_READ_REG(hw, CTRL) & E1000_CTRL_SWDPIN1) == signal) {
++        DEBUGOUT("Looking for Link\n");
++        for (i = 0; i < (LINK_UP_TIMEOUT / 10); i++) {
++            msec_delay(10);
++            status = E1000_READ_REG(hw, STATUS);
++            if (status & E1000_STATUS_LU) break;
++        }
++        if (i == (LINK_UP_TIMEOUT / 10)) {
++            DEBUGOUT("Never got a valid link from auto-neg!!!\n");
++            hw->autoneg_failed = 1;
++            /* AutoNeg failed to achieve a link, so we'll call
++             * e1000_check_for_link. This routine will force the link up if
++             * we detect a signal. This will allow us to communicate with
++             * non-autonegotiating link partners.
++             */
++            ret_val = e1000_check_for_link(hw);
++            if (ret_val) {
++                DEBUGOUT("Error while checking for link\n");
++                return ret_val;
++            }
++            hw->autoneg_failed = 0;
++        } else {
++            hw->autoneg_failed = 0;
++            DEBUGOUT("Valid Link Found\n");
++        }
++    } else {
++        DEBUGOUT("No Signal Detected\n");
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Make sure we have a valid PHY and change PHY mode before link setup.
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_copper_link_preconfig(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_copper_link_preconfig");
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++    /* With 82543, we need to force speed and duplex on the MAC equal to what
++     * the PHY speed and duplex configuration is. In addition, we need to
++     * perform a hardware reset on the PHY to take it out of reset.
++     */
++    if (hw->mac_type > e1000_82543) {
++        ctrl |= E1000_CTRL_SLU;
++        ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++        E1000_WRITE_REG(hw, CTRL, ctrl);
++    } else {
++        ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU);
++        E1000_WRITE_REG(hw, CTRL, ctrl);
++        ret_val = e1000_phy_hw_reset(hw);
++        if (ret_val)
++            return ret_val;
++    }
++
++    /* Make sure we have a valid PHY */
++    ret_val = e1000_detect_gig_phy(hw);
++    if (ret_val) {
++        DEBUGOUT("Error, did not detect valid phy.\n");
++        return ret_val;
++    }
++    DEBUGOUT1("Phy ID = %x \n", hw->phy_id);
++
++    /* Set PHY to class A mode (if necessary) */
++    ret_val = e1000_set_phy_mode(hw);
++    if (ret_val)
++        return ret_val;
++
++    if ((hw->mac_type == e1000_82545_rev_3) ||
++       (hw->mac_type == e1000_82546_rev_3)) {
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++        phy_data |= 0x00000008;
++        ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++    }
++
++    if (hw->mac_type <= e1000_82543 ||
++        hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 ||
++        hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2)
++        hw->phy_reset_disable = FALSE;
++
++   return E1000_SUCCESS;
++}
++
++
++/********************************************************************
++* Copper link setup for e1000_phy_igp series.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_igp_setup(struct e1000_hw *hw)
++{
++    uint32_t led_ctrl;
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_copper_link_igp_setup");
++
++    if (hw->phy_reset_disable)
++        return E1000_SUCCESS;
++
++    ret_val = e1000_phy_reset(hw);
++    if (ret_val) {
++        DEBUGOUT("Error Resetting the PHY\n");
++        return ret_val;
++    }
++
++    /* Wait 15ms for MAC to configure PHY from eeprom settings */
++    msec_delay(15);
++    if (hw->mac_type != e1000_ich8lan) {
++    /* Configure activity LED after PHY reset */
++    led_ctrl = E1000_READ_REG(hw, LEDCTL);
++    led_ctrl &= IGP_ACTIVITY_LED_MASK;
++    led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++    E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
++    }
++
++    /* disable lplu d3 during driver init */
++    ret_val = e1000_set_d3_lplu_state(hw, FALSE);
++    if (ret_val) {
++        DEBUGOUT("Error Disabling LPLU D3\n");
++        return ret_val;
++    }
++
++    /* disable lplu d0 during driver init */
++    ret_val = e1000_set_d0_lplu_state(hw, FALSE);
++    if (ret_val) {
++        DEBUGOUT("Error Disabling LPLU D0\n");
++        return ret_val;
++    }
++    /* Configure mdi-mdix settings */
++    ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
++        hw->dsp_config_state = e1000_dsp_config_disabled;
++        /* Force MDI for earlier revs of the IGP PHY */
++        phy_data &= ~(IGP01E1000_PSCR_AUTO_MDIX | IGP01E1000_PSCR_FORCE_MDI_MDIX);
++        hw->mdix = 1;
++
++    } else {
++        hw->dsp_config_state = e1000_dsp_config_enabled;
++        phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++
++        switch (hw->mdix) {
++        case 1:
++            phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++            break;
++        case 2:
++            phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
++            break;
++        case 0:
++        default:
++            phy_data |= IGP01E1000_PSCR_AUTO_MDIX;
++            break;
++        }
++    }
++    ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    /* set auto-master slave resolution settings */
++    if (hw->autoneg) {
++        e1000_ms_type phy_ms_setting = hw->master_slave;
++
++        if (hw->ffe_config_state == e1000_ffe_config_active)
++            hw->ffe_config_state = e1000_ffe_config_enabled;
++
++        if (hw->dsp_config_state == e1000_dsp_config_activated)
++            hw->dsp_config_state = e1000_dsp_config_enabled;
++
++        /* when autonegotiation advertisment is only 1000Mbps then we
++          * should disable SmartSpeed and enable Auto MasterSlave
++          * resolution as hardware default. */
++        if (hw->autoneg_advertised == ADVERTISE_1000_FULL) {
++            /* Disable SmartSpeed */
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++            phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++            /* Set auto Master/Slave resolution process */
++            ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
++            if (ret_val)
++                return ret_val;
++            phy_data &= ~CR_1000T_MS_ENABLE;
++            ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
++            if (ret_val)
++                return ret_val;
++        }
++
++        ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* load defaults for future use */
++        hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ?
++                                        ((phy_data & CR_1000T_MS_VALUE) ?
++                                         e1000_ms_force_master :
++                                         e1000_ms_force_slave) :
++                                         e1000_ms_auto;
++
++        switch (phy_ms_setting) {
++        case e1000_ms_force_master:
++            phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++            break;
++        case e1000_ms_force_slave:
++            phy_data |= CR_1000T_MS_ENABLE;
++            phy_data &= ~(CR_1000T_MS_VALUE);
++            break;
++        case e1000_ms_auto:
++            phy_data &= ~CR_1000T_MS_ENABLE;
++            default:
++            break;
++        }
++        ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/********************************************************************
++* Copper link setup for e1000_phy_gg82563 series.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_ggp_setup(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++    uint32_t reg_data;
++
++    DEBUGFUNC("e1000_copper_link_ggp_setup");
++
++    if (!hw->phy_reset_disable) {
++
++        /* Enable CRS on TX for half-duplex operation. */
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++        /* Use 25MHz for both link down and 1000BASE-T for Tx clock */
++        phy_data |= GG82563_MSCR_TX_CLK_1000MBPS_25MHZ;
++
++        ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL,
++                                      phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* Options:
++         *   MDI/MDI-X = 0 (default)
++         *   0 - Auto for all speeds
++         *   1 - MDI mode
++         *   2 - MDI-X mode
++         *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++         */
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK;
++
++        switch (hw->mdix) {
++        case 1:
++            phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDI;
++            break;
++        case 2:
++            phy_data |= GG82563_PSCR_CROSSOVER_MODE_MDIX;
++            break;
++        case 0:
++        default:
++            phy_data |= GG82563_PSCR_CROSSOVER_MODE_AUTO;
++            break;
++        }
++
++        /* Options:
++         *   disable_polarity_correction = 0 (default)
++         *       Automatic Correction for Reversed Cable Polarity
++         *   0 - Disabled
++         *   1 - Enabled
++         */
++        phy_data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++        if (hw->disable_polarity_correction == 1)
++            phy_data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++        ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data);
++
++        if (ret_val)
++            return ret_val;
++
++        /* SW Reset the PHY so all changes take effect */
++        ret_val = e1000_phy_reset(hw);
++        if (ret_val) {
++            DEBUGOUT("Error Resetting the PHY\n");
++            return ret_val;
++        }
++    } /* phy_reset_disable */
++
++    if (hw->mac_type == e1000_80003es2lan) {
++        /* Bypass RX and TX FIFO's */
++        ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL,
++                                       E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS |
++                                       E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS);
++        if (ret_val)
++            return ret_val;
++
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG;
++        ret_val = e1000_write_phy_reg(hw, GG82563_PHY_SPEC_CTRL_2, phy_data);
++
++        if (ret_val)
++            return ret_val;
++
++        reg_data = E1000_READ_REG(hw, CTRL_EXT);
++        reg_data &= ~(E1000_CTRL_EXT_LINK_MODE_MASK);
++        E1000_WRITE_REG(hw, CTRL_EXT, reg_data);
++
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL,
++                                          &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* Do not init these registers when the HW is in IAMT mode, since the
++         * firmware will have already initialized them.  We only initialize
++         * them if the HW is not in IAMT mode.
++         */
++        if (e1000_check_mng_mode(hw) == FALSE) {
++            /* Enable Electrical Idle on the PHY */
++            phy_data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE;
++            ret_val = e1000_write_phy_reg(hw, GG82563_PHY_PWR_MGMT_CTRL,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++
++            ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            phy_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++            ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL,
++                                          phy_data);
++
++            if (ret_val)
++                return ret_val;
++        }
++
++        /* Workaround: Disable padding in Kumeran interface in the MAC
++         * and in the PHY to avoid CRC errors.
++         */
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_INBAND_CTRL,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++        phy_data |= GG82563_ICR_DIS_PADDING;
++        ret_val = e1000_write_phy_reg(hw, GG82563_PHY_INBAND_CTRL,
++                                      phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/********************************************************************
++* Copper link setup for e1000_phy_m88 series.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_mgp_setup(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_copper_link_mgp_setup");
++
++    if (hw->phy_reset_disable)
++        return E1000_SUCCESS;
++
++    /* Enable CRS on TX. This must be set for half-duplex operation. */
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++
++    /* Options:
++     *   MDI/MDI-X = 0 (default)
++     *   0 - Auto for all speeds
++     *   1 - MDI mode
++     *   2 - MDI-X mode
++     *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++     */
++    phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++
++    switch (hw->mdix) {
++    case 1:
++        phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++        break;
++    case 2:
++        phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++        break;
++    case 3:
++        phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++        break;
++    case 0:
++    default:
++        phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++        break;
++    }
++
++    /* Options:
++     *   disable_polarity_correction = 0 (default)
++     *       Automatic Correction for Reversed Cable Polarity
++     *   0 - Disabled
++     *   1 - Enabled
++     */
++    phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++    if (hw->disable_polarity_correction == 1)
++        phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    if (hw->phy_revision < M88E1011_I_REV_4) {
++        /* Force TX_CLK in the Extended PHY Specific Control Register
++         * to 25MHz clock.
++         */
++        ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |= M88E1000_EPSCR_TX_CLK_25;
++
++        if ((hw->phy_revision == E1000_REVISION_2) &&
++            (hw->phy_id == M88E1111_I_PHY_ID)) {
++            /* Vidalia Phy, set the downshift counter to 5x */
++            phy_data &= ~(M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK);
++            phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
++            ret_val = e1000_write_phy_reg(hw,
++                                        M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++            if (ret_val)
++                return ret_val;
++        } else {
++            /* Configure Master and Slave downshift values */
++            phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
++                              M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
++            phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
++                             M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
++            ret_val = e1000_write_phy_reg(hw,
++                                        M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++            if (ret_val)
++               return ret_val;
++        }
++    }
++
++    /* SW Reset the PHY so all changes take effect */
++    ret_val = e1000_phy_reset(hw);
++    if (ret_val) {
++        DEBUGOUT("Error Resetting the PHY\n");
++        return ret_val;
++    }
++
++   return E1000_SUCCESS;
++}
++
++/********************************************************************
++* Setup auto-negotiation and flow control advertisements,
++* and then perform auto-negotiation.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_autoneg(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_copper_link_autoneg");
++
++    /* Perform some bounds checking on the hw->autoneg_advertised
++     * parameter.  If this variable is zero, then set it to the default.
++     */
++    hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
++
++    /* If autoneg_advertised is zero, we assume it was not defaulted
++     * by the calling code so we set to advertise full capability.
++     */
++    if (hw->autoneg_advertised == 0)
++        hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++
++    /* IFE phy only supports 10/100 */
++    if (hw->phy_type == e1000_phy_ife)
++        hw->autoneg_advertised &= AUTONEG_ADVERTISE_10_100_ALL;
++
++    DEBUGOUT("Reconfiguring auto-neg advertisement params\n");
++    ret_val = e1000_phy_setup_autoneg(hw);
++    if (ret_val) {
++        DEBUGOUT("Error Setting up Auto-Negotiation\n");
++        return ret_val;
++    }
++    DEBUGOUT("Restarting Auto-Neg\n");
++
++    /* Restart auto-negotiation by setting the Auto Neg Enable bit and
++     * the Auto Neg Restart bit in the PHY control register.
++     */
++    ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
++    ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    /* Does the user want to wait for Auto-Neg to complete here, or
++     * check at a later time (for example, callback routine).
++     */
++    if (hw->wait_autoneg_complete) {
++        ret_val = e1000_wait_autoneg(hw);
++        if (ret_val) {
++            DEBUGOUT("Error while waiting for autoneg to complete\n");
++            return ret_val;
++        }
++    }
++
++    hw->get_link_status = TRUE;
++
++    return E1000_SUCCESS;
++}
++
++/********************************************************************
++* Copper link setup for e1000_phy_ife (Fast Ethernet PHY) series.
++*
++* hw - Struct containing variables accessed by shared code
++*********************************************************************/
++static int32_t
++e1000_copper_link_ife_setup(struct e1000_hw *hw)
++{
++    if (hw->phy_reset_disable)
++        return E1000_SUCCESS;
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Config the MAC and the PHY after link is up.
++*   1) Set up the MAC to the current PHY speed/duplex
++*      if we are on 82543.  If we
++*      are on newer silicon, we only need to configure
++*      collision distance in the Transmit Control Register.
++*   2) Set up flow control on the MAC to that established with
++*      the link partner.
++*   3) Config DSP to improve Gigabit link quality for some PHY revisions.
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_copper_link_postconfig(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    DEBUGFUNC("e1000_copper_link_postconfig");
++
++    if (hw->mac_type >= e1000_82544) {
++        e1000_config_collision_dist(hw);
++    } else {
++        ret_val = e1000_config_mac_to_phy(hw);
++        if (ret_val) {
++            DEBUGOUT("Error configuring MAC to PHY settings\n");
++            return ret_val;
++        }
++    }
++    ret_val = e1000_config_fc_after_link_up(hw);
++    if (ret_val) {
++        DEBUGOUT("Error Configuring Flow Control\n");
++        return ret_val;
++    }
++
++    /* Config DSP to improve Giga link quality */
++    if (hw->phy_type == e1000_phy_igp) {
++        ret_val = e1000_config_dsp_after_link_change(hw, TRUE);
++        if (ret_val) {
++            DEBUGOUT("Error Configuring DSP after link up\n");
++            return ret_val;
++        }
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Detects which PHY is present and setup the speed and duplex
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_setup_copper_link(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t i;
++    uint16_t phy_data;
++    uint16_t reg_data;
++
++    DEBUGFUNC("e1000_setup_copper_link");
++
++    switch (hw->mac_type) {
++    case e1000_80003es2lan:
++    case e1000_ich8lan:
++        /* Set the mac to wait the maximum time between each
++         * iteration and increase the max iterations when
++         * polling the phy; this fixes erroneous timeouts at 10Mbps. */
++        ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 4), 0xFFFF);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_read_kmrn_reg(hw, GG82563_REG(0x34, 9), &reg_data);
++        if (ret_val)
++            return ret_val;
++        reg_data |= 0x3F;
++        ret_val = e1000_write_kmrn_reg(hw, GG82563_REG(0x34, 9), reg_data);
++        if (ret_val)
++            return ret_val;
++    default:
++        break;
++    }
++
++    /* Check if it is a valid PHY and set PHY mode if necessary. */
++    ret_val = e1000_copper_link_preconfig(hw);
++    if (ret_val)
++        return ret_val;
++
++    switch (hw->mac_type) {
++    case e1000_80003es2lan:
++        /* Kumeran registers are written-only */
++        reg_data = E1000_KUMCTRLSTA_INB_CTRL_LINK_STATUS_TX_TIMEOUT_DEFAULT;
++        reg_data |= E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING;
++        ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_INB_CTRL,
++                                       reg_data);
++        if (ret_val)
++            return ret_val;
++        break;
++    default:
++        break;
++    }
++
++    if (hw->phy_type == e1000_phy_igp ||
++        hw->phy_type == e1000_phy_igp_3 ||
++        hw->phy_type == e1000_phy_igp_2) {
++        ret_val = e1000_copper_link_igp_setup(hw);
++        if (ret_val)
++            return ret_val;
++    } else if (hw->phy_type == e1000_phy_m88) {
++        ret_val = e1000_copper_link_mgp_setup(hw);
++        if (ret_val)
++            return ret_val;
++    } else if (hw->phy_type == e1000_phy_gg82563) {
++        ret_val = e1000_copper_link_ggp_setup(hw);
++        if (ret_val)
++            return ret_val;
++    } else if (hw->phy_type == e1000_phy_ife) {
++        ret_val = e1000_copper_link_ife_setup(hw);
++        if (ret_val)
++            return ret_val;
++    }
++
++    if (hw->autoneg) {
++        /* Setup autoneg and flow control advertisement
++          * and perform autonegotiation */
++        ret_val = e1000_copper_link_autoneg(hw);
++        if (ret_val)
++            return ret_val;
++    } else {
++        /* PHY will be set to 10H, 10F, 100H,or 100F
++          * depending on value from forced_speed_duplex. */
++        DEBUGOUT("Forcing speed and duplex\n");
++        ret_val = e1000_phy_force_speed_duplex(hw);
++        if (ret_val) {
++            DEBUGOUT("Error Forcing Speed and Duplex\n");
++            return ret_val;
++        }
++    }
++
++    /* Check link status. Wait up to 100 microseconds for link to become
++     * valid.
++     */
++    for (i = 0; i < 10; i++) {
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        if (phy_data & MII_SR_LINK_STATUS) {
++            /* Config the MAC and PHY after link is up */
++            ret_val = e1000_copper_link_postconfig(hw);
++            if (ret_val)
++                return ret_val;
++
++            DEBUGOUT("Valid link established!!!\n");
++            return E1000_SUCCESS;
++        }
++        usec_delay(10);
++    }
++
++    DEBUGOUT("Unable to establish link!!!\n");
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Configure the MAC-to-PHY interface for 10/100Mbps
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_configure_kmrn_for_10_100(struct e1000_hw *hw, uint16_t duplex)
++{
++    int32_t ret_val = E1000_SUCCESS;
++    uint32_t tipg;
++    uint16_t reg_data;
++
++    DEBUGFUNC("e1000_configure_kmrn_for_10_100");
++
++    reg_data = E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT;
++    ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL,
++                                   reg_data);
++    if (ret_val)
++        return ret_val;
++
++    /* Configure Transmit Inter-Packet Gap */
++    tipg = E1000_READ_REG(hw, TIPG);
++    tipg &= ~E1000_TIPG_IPGT_MASK;
++    tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_10_100;
++    E1000_WRITE_REG(hw, TIPG, tipg);
++
++    ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
++
++    if (ret_val)
++        return ret_val;
++
++    if (duplex == HALF_DUPLEX)
++        reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER;
++    else
++        reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++
++    ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++    return ret_val;
++}
++
++static int32_t
++e1000_configure_kmrn_for_1000(struct e1000_hw *hw)
++{
++    int32_t ret_val = E1000_SUCCESS;
++    uint16_t reg_data;
++    uint32_t tipg;
++
++    DEBUGFUNC("e1000_configure_kmrn_for_1000");
++
++    reg_data = E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT;
++    ret_val = e1000_write_kmrn_reg(hw, E1000_KUMCTRLSTA_OFFSET_HD_CTRL,
++                                   reg_data);
++    if (ret_val)
++        return ret_val;
++
++    /* Configure Transmit Inter-Packet Gap */
++    tipg = E1000_READ_REG(hw, TIPG);
++    tipg &= ~E1000_TIPG_IPGT_MASK;
++    tipg |= DEFAULT_80003ES2LAN_TIPG_IPGT_1000;
++    E1000_WRITE_REG(hw, TIPG, tipg);
++
++    ret_val = e1000_read_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
++
++    if (ret_val)
++        return ret_val;
++
++    reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++    ret_val = e1000_write_phy_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++    return ret_val;
++}
++
++/******************************************************************************
++* Configures PHY autoneg and flow control advertisement settings
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_phy_setup_autoneg(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t mii_autoneg_adv_reg;
++    uint16_t mii_1000t_ctrl_reg;
++
++    DEBUGFUNC("e1000_phy_setup_autoneg");
++
++    /* Read the MII Auto-Neg Advertisement Register (Address 4). */
++    ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++    if (ret_val)
++        return ret_val;
++
++    if (hw->phy_type != e1000_phy_ife) {
++        /* Read the MII 1000Base-T Control Register (Address 9). */
++        ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
++        if (ret_val)
++            return ret_val;
++    } else
++        mii_1000t_ctrl_reg=0;
++
++    /* Need to parse both autoneg_advertised and fc and set up
++     * the appropriate PHY registers.  First we will parse for
++     * autoneg_advertised software override.  Since we can advertise
++     * a plethora of combinations, we need to check each bit
++     * individually.
++     */
++
++    /* First we clear all the 10/100 mb speed bits in the Auto-Neg
++     * Advertisement Register (Address 4) and the 1000 mb speed bits in
++     * the  1000Base-T Control Register (Address 9).
++     */
++    mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
++    mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
++
++    DEBUGOUT1("autoneg_advertised %x\n", hw->autoneg_advertised);
++
++    /* Do we want to advertise 10 Mb Half Duplex? */
++    if (hw->autoneg_advertised & ADVERTISE_10_HALF) {
++        DEBUGOUT("Advertise 10mb Half duplex\n");
++        mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
++    }
++
++    /* Do we want to advertise 10 Mb Full Duplex? */
++    if (hw->autoneg_advertised & ADVERTISE_10_FULL) {
++        DEBUGOUT("Advertise 10mb Full duplex\n");
++        mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
++    }
++
++    /* Do we want to advertise 100 Mb Half Duplex? */
++    if (hw->autoneg_advertised & ADVERTISE_100_HALF) {
++        DEBUGOUT("Advertise 100mb Half duplex\n");
++        mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
++    }
++
++    /* Do we want to advertise 100 Mb Full Duplex? */
++    if (hw->autoneg_advertised & ADVERTISE_100_FULL) {
++        DEBUGOUT("Advertise 100mb Full duplex\n");
++        mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
++    }
++
++    /* We do not allow the Phy to advertise 1000 Mb Half Duplex */
++    if (hw->autoneg_advertised & ADVERTISE_1000_HALF) {
++        DEBUGOUT("Advertise 1000mb Half duplex requested, request denied!\n");
++    }
++
++    /* Do we want to advertise 1000 Mb Full Duplex? */
++    if (hw->autoneg_advertised & ADVERTISE_1000_FULL) {
++        DEBUGOUT("Advertise 1000mb Full duplex\n");
++        mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
++        if (hw->phy_type == e1000_phy_ife) {
++            DEBUGOUT("e1000_phy_ife is a 10/100 PHY. Gigabit speed is not supported.\n");
++        }
++    }
++
++    /* Check for a software override of the flow control settings, and
++     * setup the PHY advertisement registers accordingly.  If
++     * auto-negotiation is enabled, then software will have to set the
++     * "PAUSE" bits to the correct value in the Auto-Negotiation
++     * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-negotiation.
++     *
++     * The possible values of the "fc" parameter are:
++     *      0:  Flow control is completely disabled
++     *      1:  Rx flow control is enabled (we can receive pause frames
++     *          but not send pause frames).
++     *      2:  Tx flow control is enabled (we can send pause frames
++     *          but we do not support receiving pause frames).
++     *      3:  Both Rx and TX flow control (symmetric) are enabled.
++     *  other:  No software override.  The flow control configuration
++     *          in the EEPROM is used.
++     */
++    switch (hw->fc) {
++    case e1000_fc_none: /* 0 */
++        /* Flow control (RX & TX) is completely disabled by a
++         * software over-ride.
++         */
++        mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++        break;
++    case e1000_fc_rx_pause: /* 1 */
++        /* RX Flow control is enabled, and TX Flow control is
++         * disabled, by a software over-ride.
++         */
++        /* Since there really isn't a way to advertise that we are
++         * capable of RX Pause ONLY, we will advertise that we
++         * support both symmetric and asymmetric RX PAUSE.  Later
++         * (in e1000_config_fc_after_link_up) we will disable the
++         *hw's ability to send PAUSE frames.
++         */
++        mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++        break;
++    case e1000_fc_tx_pause: /* 2 */
++        /* TX Flow control is enabled, and RX Flow control is
++         * disabled, by a software over-ride.
++         */
++        mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
++        mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
++        break;
++    case e1000_fc_full: /* 3 */
++        /* Flow control (both RX and TX) is enabled by a software
++         * over-ride.
++         */
++        mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++        break;
++    default:
++        DEBUGOUT("Flow control param set incorrectly\n");
++        return -E1000_ERR_CONFIG;
++    }
++
++    ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
++    if (ret_val)
++        return ret_val;
++
++    DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
++
++    if (hw->phy_type != e1000_phy_ife) {
++        ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg);
++        if (ret_val)
++            return ret_val;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Force PHY speed and duplex settings to hw->forced_speed_duplex
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_phy_force_speed_duplex(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    int32_t ret_val;
++    uint16_t mii_ctrl_reg;
++    uint16_t mii_status_reg;
++    uint16_t phy_data;
++    uint16_t i;
++
++    DEBUGFUNC("e1000_phy_force_speed_duplex");
++
++    /* Turn off Flow control if we are forcing speed and duplex. */
++    hw->fc = e1000_fc_none;
++
++    DEBUGOUT1("hw->fc = %d\n", hw->fc);
++
++    /* Read the Device Control Register. */
++    ctrl = E1000_READ_REG(hw, CTRL);
++
++    /* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */
++    ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++    ctrl &= ~(DEVICE_SPEED_MASK);
++
++    /* Clear the Auto Speed Detect Enable bit. */
++    ctrl &= ~E1000_CTRL_ASDE;
++
++    /* Read the MII Control Register. */
++    ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg);
++    if (ret_val)
++        return ret_val;
++
++    /* We need to disable autoneg in order to force link and duplex. */
++
++    mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN;
++
++    /* Are we forcing Full or Half Duplex? */
++    if (hw->forced_speed_duplex == e1000_100_full ||
++        hw->forced_speed_duplex == e1000_10_full) {
++        /* We want to force full duplex so we SET the full duplex bits in the
++         * Device and MII Control Registers.
++         */
++        ctrl |= E1000_CTRL_FD;
++        mii_ctrl_reg |= MII_CR_FULL_DUPLEX;
++        DEBUGOUT("Full Duplex\n");
++    } else {
++        /* We want to force half duplex so we CLEAR the full duplex bits in
++         * the Device and MII Control Registers.
++         */
++        ctrl &= ~E1000_CTRL_FD;
++        mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX;
++        DEBUGOUT("Half Duplex\n");
++    }
++
++    /* Are we forcing 100Mbps??? */
++    if (hw->forced_speed_duplex == e1000_100_full ||
++       hw->forced_speed_duplex == e1000_100_half) {
++        /* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */
++        ctrl |= E1000_CTRL_SPD_100;
++        mii_ctrl_reg |= MII_CR_SPEED_100;
++        mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
++        DEBUGOUT("Forcing 100mb ");
++    } else {
++        /* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */
++        ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++        mii_ctrl_reg |= MII_CR_SPEED_10;
++        mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
++        DEBUGOUT("Forcing 10mb ");
++    }
++
++    e1000_config_collision_dist(hw);
++
++    /* Write the configured values back to the Device Control Reg. */
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++
++    if ((hw->phy_type == e1000_phy_m88) ||
++        (hw->phy_type == e1000_phy_gg82563)) {
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI
++         * forced whenever speed are duplex are forced.
++         */
++        phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++        ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++
++        DEBUGOUT1("M88E1000 PSCR: %x \n", phy_data);
++
++        /* Need to reset the PHY or these changes will be ignored */
++        mii_ctrl_reg |= MII_CR_RESET;
++    /* Disable MDI-X support for 10/100 */
++    } else if (hw->phy_type == e1000_phy_ife) {
++        ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~IFE_PMC_AUTO_MDIX;
++        phy_data &= ~IFE_PMC_FORCE_MDIX;
++
++        ret_val = e1000_write_phy_reg(hw, IFE_PHY_MDIX_CONTROL, phy_data);
++        if (ret_val)
++            return ret_val;
++    } else {
++        /* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
++         * forced whenever speed or duplex are forced.
++         */
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++        phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++
++        ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    /* Write back the modified PHY MII control register. */
++    ret_val = e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg);
++    if (ret_val)
++        return ret_val;
++
++    usec_delay(1);
++
++    /* The wait_autoneg_complete flag may be a little misleading here.
++     * Since we are forcing speed and duplex, Auto-Neg is not enabled.
++     * But we do want to delay for a period while forcing only so we
++     * don't generate false No Link messages.  So we will wait here
++     * only if the user has set wait_autoneg_complete to 1, which is
++     * the default.
++     */
++    if (hw->wait_autoneg_complete) {
++        /* We will wait for autoneg to complete. */
++        DEBUGOUT("Waiting for forced speed/duplex link.\n");
++        mii_status_reg = 0;
++
++        /* We will wait for autoneg to complete or 4.5 seconds to expire. */
++        for (i = PHY_FORCE_TIME; i > 0; i--) {
++            /* Read the MII Status Register and wait for Auto-Neg Complete bit
++             * to be set.
++             */
++            ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++            if (ret_val)
++                return ret_val;
++
++            ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++            if (ret_val)
++                return ret_val;
++
++            if (mii_status_reg & MII_SR_LINK_STATUS) break;
++            msec_delay(100);
++        }
++        if ((i == 0) &&
++           ((hw->phy_type == e1000_phy_m88) ||
++            (hw->phy_type == e1000_phy_gg82563))) {
++            /* We didn't get link.  Reset the DSP and wait again for link. */
++            ret_val = e1000_phy_reset_dsp(hw);
++            if (ret_val) {
++                DEBUGOUT("Error Resetting PHY DSP\n");
++                return ret_val;
++            }
++        }
++        /* This loop will early-out if the link condition has been met.  */
++        for (i = PHY_FORCE_TIME; i > 0; i--) {
++            if (mii_status_reg & MII_SR_LINK_STATUS) break;
++            msec_delay(100);
++            /* Read the MII Status Register and wait for Auto-Neg Complete bit
++             * to be set.
++             */
++            ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++            if (ret_val)
++                return ret_val;
++
++            ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++            if (ret_val)
++                return ret_val;
++        }
++    }
++
++    if (hw->phy_type == e1000_phy_m88) {
++        /* Because we reset the PHY above, we need to re-force TX_CLK in the
++         * Extended PHY Specific Control Register to 25MHz clock.  This value
++         * defaults back to a 2.5MHz clock when the PHY is reset.
++         */
++        ret_val = e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |= M88E1000_EPSCR_TX_CLK_25;
++        ret_val = e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* In addition, because of the s/w reset above, we need to enable CRS on
++         * TX.  This must be set for both full and half duplex operation.
++         */
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++        ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++
++        if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) &&
++            (!hw->autoneg) && (hw->forced_speed_duplex == e1000_10_full ||
++             hw->forced_speed_duplex == e1000_10_half)) {
++            ret_val = e1000_polarity_reversal_workaround(hw);
++            if (ret_val)
++                return ret_val;
++        }
++    } else if (hw->phy_type == e1000_phy_gg82563) {
++        /* The TX_CLK of the Extended PHY Specific Control Register defaults
++         * to 2.5MHz on a reset.  We need to re-force it back to 25MHz, if
++         * we're not in a forced 10/duplex configuration. */
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~GG82563_MSCR_TX_CLK_MASK;
++        if ((hw->forced_speed_duplex == e1000_10_full) ||
++            (hw->forced_speed_duplex == e1000_10_half))
++            phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5MHZ;
++        else
++            phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25MHZ;
++
++        /* Also due to the reset, we need to enable CRS on Tx. */
++        phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++
++        ret_val = e1000_write_phy_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Sets the collision distance in the Transmit Control register
++*
++* hw - Struct containing variables accessed by shared code
++*
++* Link should have been established previously. Reads the speed and duplex
++* information from the Device Status register.
++******************************************************************************/
++void
++e1000_config_collision_dist(struct e1000_hw *hw)
++{
++    uint32_t tctl, coll_dist;
++
++    DEBUGFUNC("e1000_config_collision_dist");
++
++    if (hw->mac_type < e1000_82543)
++        coll_dist = E1000_COLLISION_DISTANCE_82542;
++    else
++        coll_dist = E1000_COLLISION_DISTANCE;
++
++    tctl = E1000_READ_REG(hw, TCTL);
++
++    tctl &= ~E1000_TCTL_COLD;
++    tctl |= coll_dist << E1000_COLD_SHIFT;
++
++    E1000_WRITE_REG(hw, TCTL, tctl);
++    E1000_WRITE_FLUSH(hw);
++}
++
++/******************************************************************************
++* Sets MAC speed and duplex settings to reflect the those in the PHY
++*
++* hw - Struct containing variables accessed by shared code
++* mii_reg - data to write to the MII control register
++*
++* The contents of the PHY register containing the needed information need to
++* be passed in.
++******************************************************************************/
++static int32_t
++e1000_config_mac_to_phy(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_config_mac_to_phy");
++
++    /* 82544 or newer MAC, Auto Speed Detection takes care of
++    * MAC speed/duplex configuration.*/
++    if (hw->mac_type >= e1000_82544)
++        return E1000_SUCCESS;
++
++    /* Read the Device Control Register and set the bits to Force Speed
++     * and Duplex.
++     */
++    ctrl = E1000_READ_REG(hw, CTRL);
++    ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++    ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
++
++    /* Set up duplex in the Device Control and Transmit Control
++     * registers depending on negotiated values.
++     */
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    if (phy_data & M88E1000_PSSR_DPLX)
++        ctrl |= E1000_CTRL_FD;
++    else
++        ctrl &= ~E1000_CTRL_FD;
++
++    e1000_config_collision_dist(hw);
++
++    /* Set up speed in the Device Control register depending on
++     * negotiated values.
++     */
++    if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
++        ctrl |= E1000_CTRL_SPD_1000;
++    else if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_100MBS)
++        ctrl |= E1000_CTRL_SPD_100;
++
++    /* Write the configured values back to the Device Control Reg. */
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Forces the MAC's flow control settings.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Sets the TFCE and RFCE bits in the device control register to reflect
++ * the adapter settings. TFCE and RFCE need to be explicitly set by
++ * software when a Copper PHY is used because autonegotiation is managed
++ * by the PHY rather than the MAC. Software must also configure these
++ * bits when link is forced on a fiber connection.
++ *****************************************************************************/
++int32_t
++e1000_force_mac_fc(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++
++    DEBUGFUNC("e1000_force_mac_fc");
++
++    /* Get the current configuration of the Device Control Register */
++    ctrl = E1000_READ_REG(hw, CTRL);
++
++    /* Because we didn't get link via the internal auto-negotiation
++     * mechanism (we either forced link or we got link via PHY
++     * auto-neg), we have to manually enable/disable transmit an
++     * receive flow control.
++     *
++     * The "Case" statement below enables/disable flow control
++     * according to the "hw->fc" parameter.
++     *
++     * The possible values of the "fc" parameter are:
++     *      0:  Flow control is completely disabled
++     *      1:  Rx flow control is enabled (we can receive pause
++     *          frames but not send pause frames).
++     *      2:  Tx flow control is enabled (we can send pause frames
++     *          frames but we do not receive pause frames).
++     *      3:  Both Rx and TX flow control (symmetric) is enabled.
++     *  other:  No other values should be possible at this point.
++     */
++
++    switch (hw->fc) {
++    case e1000_fc_none:
++        ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
++        break;
++    case e1000_fc_rx_pause:
++        ctrl &= (~E1000_CTRL_TFCE);
++        ctrl |= E1000_CTRL_RFCE;
++        break;
++    case e1000_fc_tx_pause:
++        ctrl &= (~E1000_CTRL_RFCE);
++        ctrl |= E1000_CTRL_TFCE;
++        break;
++    case e1000_fc_full:
++        ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
++        break;
++    default:
++        DEBUGOUT("Flow control param set incorrectly\n");
++        return -E1000_ERR_CONFIG;
++    }
++
++    /* Disable TX Flow Control for 82542 (rev 2.0) */
++    if (hw->mac_type == e1000_82542_rev2_0)
++        ctrl &= (~E1000_CTRL_TFCE);
++
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Configures flow control settings after link is established
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Should be called immediately after a valid link has been established.
++ * Forces MAC flow control settings if link was forced. When in MII/GMII mode
++ * and autonegotiation is enabled, the MAC flow control settings will be set
++ * based on the flow control negotiated by the PHY. In TBI mode, the TFCE
++ * and RFCE bits will be automaticaly set to the negotiated flow control mode.
++ *****************************************************************************/
++int32_t
++e1000_config_fc_after_link_up(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t mii_status_reg;
++    uint16_t mii_nway_adv_reg;
++    uint16_t mii_nway_lp_ability_reg;
++    uint16_t speed;
++    uint16_t duplex;
++
++    DEBUGFUNC("e1000_config_fc_after_link_up");
++
++    /* Check for the case where we have fiber media and auto-neg failed
++     * so we had to force link.  In this case, we need to force the
++     * configuration of the MAC to match the "fc" parameter.
++     */
++    if (((hw->media_type == e1000_media_type_fiber) && (hw->autoneg_failed)) ||
++        ((hw->media_type == e1000_media_type_internal_serdes) &&
++         (hw->autoneg_failed)) ||
++        ((hw->media_type == e1000_media_type_copper) && (!hw->autoneg))) {
++        ret_val = e1000_force_mac_fc(hw);
++        if (ret_val) {
++            DEBUGOUT("Error forcing flow control settings\n");
++            return ret_val;
++        }
++    }
++
++    /* Check for the case where we have copper media and auto-neg is
++     * enabled.  In this case, we need to check and see if Auto-Neg
++     * has completed, and if so, how the PHY and link partner has
++     * flow control configured.
++     */
++    if ((hw->media_type == e1000_media_type_copper) && hw->autoneg) {
++        /* Read the MII Status Register and check to see if AutoNeg
++         * has completed.  We read this twice because this reg has
++         * some "sticky" (latched) bits.
++         */
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++
++        if (mii_status_reg & MII_SR_AUTONEG_COMPLETE) {
++            /* The AutoNeg process has completed, so we now need to
++             * read both the Auto Negotiation Advertisement Register
++             * (Address 4) and the Auto_Negotiation Base Page Ability
++             * Register (Address 5) to determine how flow control was
++             * negotiated.
++             */
++            ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV,
++                                         &mii_nway_adv_reg);
++            if (ret_val)
++                return ret_val;
++            ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY,
++                                         &mii_nway_lp_ability_reg);
++            if (ret_val)
++                return ret_val;
++
++            /* Two bits in the Auto Negotiation Advertisement Register
++             * (Address 4) and two bits in the Auto Negotiation Base
++             * Page Ability Register (Address 5) determine flow control
++             * for both the PHY and the link partner.  The following
++             * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
++             * 1999, describes these PAUSE resolution bits and how flow
++             * control is determined based upon these settings.
++             * NOTE:  DC = Don't Care
++             *
++             *   LOCAL DEVICE  |   LINK PARTNER
++             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
++             *-------|---------|-------|---------|--------------------
++             *   0   |    0    |  DC   |   DC    | e1000_fc_none
++             *   0   |    1    |   0   |   DC    | e1000_fc_none
++             *   0   |    1    |   1   |    0    | e1000_fc_none
++             *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++             *   1   |    0    |   0   |   DC    | e1000_fc_none
++             *   1   |   DC    |   1   |   DC    | e1000_fc_full
++             *   1   |    1    |   0   |    0    | e1000_fc_none
++             *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++             *
++             */
++            /* Are both PAUSE bits set to 1?  If so, this implies
++             * Symmetric Flow Control is enabled at both ends.  The
++             * ASM_DIR bits are irrelevant per the spec.
++             *
++             * For Symmetric Flow Control:
++             *
++             *   LOCAL DEVICE  |   LINK PARTNER
++             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++             *-------|---------|-------|---------|--------------------
++             *   1   |   DC    |   1   |   DC    | e1000_fc_full
++             *
++             */
++            if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++                (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
++                /* Now we need to check if the user selected RX ONLY
++                 * of pause frames.  In this case, we had to advertise
++                 * FULL flow control because we could not advertise RX
++                 * ONLY. Hence, we must now check to see if we need to
++                 * turn OFF  the TRANSMISSION of PAUSE frames.
++                 */
++                if (hw->original_fc == e1000_fc_full) {
++                    hw->fc = e1000_fc_full;
++                    DEBUGOUT("Flow Control = FULL.\n");
++                } else {
++                    hw->fc = e1000_fc_rx_pause;
++                    DEBUGOUT("Flow Control = RX PAUSE frames only.\n");
++                }
++            }
++            /* For receiving PAUSE frames ONLY.
++             *
++             *   LOCAL DEVICE  |   LINK PARTNER
++             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++             *-------|---------|-------|---------|--------------------
++             *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++             *
++             */
++            else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++                     (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++                     (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++                     (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++                hw->fc = e1000_fc_tx_pause;
++                DEBUGOUT("Flow Control = TX PAUSE frames only.\n");
++            }
++            /* For transmitting PAUSE frames ONLY.
++             *
++             *   LOCAL DEVICE  |   LINK PARTNER
++             * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++             *-------|---------|-------|---------|--------------------
++             *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++             *
++             */
++            else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++                     (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++                     !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++                     (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++                hw->fc = e1000_fc_rx_pause;
++                DEBUGOUT("Flow Control = RX PAUSE frames only.\n");
++            }
++            /* Per the IEEE spec, at this point flow control should be
++             * disabled.  However, we want to consider that we could
++             * be connected to a legacy switch that doesn't advertise
++             * desired flow control, but can be forced on the link
++             * partner.  So if we advertised no flow control, that is
++             * what we will resolve to.  If we advertised some kind of
++             * receive capability (Rx Pause Only or Full Flow Control)
++             * and the link partner advertised none, we will configure
++             * ourselves to enable Rx Flow Control only.  We can do
++             * this safely for two reasons:  If the link partner really
++             * didn't want flow control enabled, and we enable Rx, no
++             * harm done since we won't be receiving any PAUSE frames
++             * anyway.  If the intent on the link partner was to have
++             * flow control enabled, then by us enabling RX only, we
++             * can at least receive pause frames and process them.
++             * This is a good idea because in most cases, since we are
++             * predominantly a server NIC, more times than not we will
++             * be asked to delay transmission of packets than asking
++             * our link partner to pause transmission of frames.
++             */
++            else if ((hw->original_fc == e1000_fc_none ||
++                      hw->original_fc == e1000_fc_tx_pause) ||
++                      hw->fc_strict_ieee) {
++                hw->fc = e1000_fc_none;
++                DEBUGOUT("Flow Control = NONE.\n");
++            } else {
++                hw->fc = e1000_fc_rx_pause;
++                DEBUGOUT("Flow Control = RX PAUSE frames only.\n");
++            }
++
++            /* Now we need to do one last check...  If we auto-
++             * negotiated to HALF DUPLEX, flow control should not be
++             * enabled per IEEE 802.3 spec.
++             */
++            ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++            if (ret_val) {
++                DEBUGOUT("Error getting link speed and duplex\n");
++                return ret_val;
++            }
++
++            if (duplex == HALF_DUPLEX)
++                hw->fc = e1000_fc_none;
++
++            /* Now we call a subroutine to actually force the MAC
++             * controller to use the correct flow control settings.
++             */
++            ret_val = e1000_force_mac_fc(hw);
++            if (ret_val) {
++                DEBUGOUT("Error forcing flow control settings\n");
++                return ret_val;
++            }
++        } else {
++            DEBUGOUT("Copper PHY and Auto Neg has not completed.\n");
++        }
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Checks to see if the link status of the hardware has changed.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Called by any function that needs to check the link status of the adapter.
++ *****************************************************************************/
++int32_t
++e1000_check_for_link(struct e1000_hw *hw)
++{
++    uint32_t rxcw = 0;
++    uint32_t ctrl;
++    uint32_t status;
++    uint32_t rctl;
++    uint32_t icr;
++    uint32_t signal = 0;
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_check_for_link");
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++    status = E1000_READ_REG(hw, STATUS);
++
++    /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be
++     * set when the optics detect a signal. On older adapters, it will be
++     * cleared when there is a signal.  This applies to fiber media only.
++     */
++    if ((hw->media_type == e1000_media_type_fiber) ||
++        (hw->media_type == e1000_media_type_internal_serdes)) {
++        rxcw = E1000_READ_REG(hw, RXCW);
++
++        if (hw->media_type == e1000_media_type_fiber) {
++            signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0;
++            if (status & E1000_STATUS_LU)
++                hw->get_link_status = FALSE;
++        }
++    }
++
++    /* If we have a copper PHY then we only want to go out to the PHY
++     * registers to see if Auto-Neg has completed and/or if our link
++     * status has changed.  The get_link_status flag will be set if we
++     * receive a Link Status Change interrupt or we have Rx Sequence
++     * Errors.
++     */
++    if ((hw->media_type == e1000_media_type_copper) && hw->get_link_status) {
++        /* First we want to see if the MII Status Register reports
++         * link.  If so, then we want to get the current speed/duplex
++         * of the PHY.
++         * Read the register twice since the link bit is sticky.
++         */
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        if (phy_data & MII_SR_LINK_STATUS) {
++            hw->get_link_status = FALSE;
++            /* Check if there was DownShift, must be checked immediately after
++             * link-up */
++            e1000_check_downshift(hw);
++
++            /* If we are on 82544 or 82543 silicon and speed/duplex
++             * are forced to 10H or 10F, then we will implement the polarity
++             * reversal workaround.  We disable interrupts first, and upon
++             * returning, place the devices interrupt state to its previous
++             * value except for the link status change interrupt which will
++             * happen due to the execution of this workaround.
++             */
++
++            if ((hw->mac_type == e1000_82544 || hw->mac_type == e1000_82543) &&
++                (!hw->autoneg) &&
++                (hw->forced_speed_duplex == e1000_10_full ||
++                 hw->forced_speed_duplex == e1000_10_half)) {
++                E1000_WRITE_REG(hw, IMC, 0xffffffff);
++                ret_val = e1000_polarity_reversal_workaround(hw);
++                icr = E1000_READ_REG(hw, ICR);
++                E1000_WRITE_REG(hw, ICS, (icr & ~E1000_ICS_LSC));
++                E1000_WRITE_REG(hw, IMS, IMS_ENABLE_MASK);
++            }
++
++        } else {
++            /* No link detected */
++            e1000_config_dsp_after_link_change(hw, FALSE);
++            return 0;
++        }
++
++        /* If we are forcing speed/duplex, then we simply return since
++         * we have already determined whether we have link or not.
++         */
++        if (!hw->autoneg) return -E1000_ERR_CONFIG;
++
++        /* optimize the dsp settings for the igp phy */
++        e1000_config_dsp_after_link_change(hw, TRUE);
++
++        /* We have a M88E1000 PHY and Auto-Neg is enabled.  If we
++         * have Si on board that is 82544 or newer, Auto
++         * Speed Detection takes care of MAC speed/duplex
++         * configuration.  So we only need to configure Collision
++         * Distance in the MAC.  Otherwise, we need to force
++         * speed/duplex on the MAC to the current PHY speed/duplex
++         * settings.
++         */
++        if (hw->mac_type >= e1000_82544)
++            e1000_config_collision_dist(hw);
++        else {
++            ret_val = e1000_config_mac_to_phy(hw);
++            if (ret_val) {
++                DEBUGOUT("Error configuring MAC to PHY settings\n");
++                return ret_val;
++            }
++        }
++
++        /* Configure Flow Control now that Auto-Neg has completed. First, we
++         * need to restore the desired flow control settings because we may
++         * have had to re-autoneg with a different link partner.
++         */
++        ret_val = e1000_config_fc_after_link_up(hw);
++        if (ret_val) {
++            DEBUGOUT("Error configuring flow control\n");
++            return ret_val;
++        }
++
++        /* At this point we know that we are on copper and we have
++         * auto-negotiated link.  These are conditions for checking the link
++         * partner capability register.  We use the link speed to determine if
++         * TBI compatibility needs to be turned on or off.  If the link is not
++         * at gigabit speed, then TBI compatibility is not needed.  If we are
++         * at gigabit speed, we turn on TBI compatibility.
++         */
++        if (hw->tbi_compatibility_en) {
++            uint16_t speed, duplex;
++            ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++            if (ret_val) {
++                DEBUGOUT("Error getting link speed and duplex\n");
++                return ret_val;
++            }
++            if (speed != SPEED_1000) {
++                /* If link speed is not set to gigabit speed, we do not need
++                 * to enable TBI compatibility.
++                 */
++                if (hw->tbi_compatibility_on) {
++                    /* If we previously were in the mode, turn it off. */
++                    rctl = E1000_READ_REG(hw, RCTL);
++                    rctl &= ~E1000_RCTL_SBP;
++                    E1000_WRITE_REG(hw, RCTL, rctl);
++                    hw->tbi_compatibility_on = FALSE;
++                }
++            } else {
++                /* If TBI compatibility is was previously off, turn it on. For
++                 * compatibility with a TBI link partner, we will store bad
++                 * packets. Some frames have an additional byte on the end and
++                 * will look like CRC errors to to the hardware.
++                 */
++                if (!hw->tbi_compatibility_on) {
++                    hw->tbi_compatibility_on = TRUE;
++                    rctl = E1000_READ_REG(hw, RCTL);
++                    rctl |= E1000_RCTL_SBP;
++                    E1000_WRITE_REG(hw, RCTL, rctl);
++                }
++            }
++        }
++    }
++    /* If we don't have link (auto-negotiation failed or link partner cannot
++     * auto-negotiate), the cable is plugged in (we have signal), and our
++     * link partner is not trying to auto-negotiate with us (we are receiving
++     * idles or data), we need to force link up. We also need to give
++     * auto-negotiation time to complete, in case the cable was just plugged
++     * in. The autoneg_failed flag does this.
++     */
++    else if ((((hw->media_type == e1000_media_type_fiber) &&
++              ((ctrl & E1000_CTRL_SWDPIN1) == signal)) ||
++              (hw->media_type == e1000_media_type_internal_serdes)) &&
++              (!(status & E1000_STATUS_LU)) &&
++              (!(rxcw & E1000_RXCW_C))) {
++        if (hw->autoneg_failed == 0) {
++            hw->autoneg_failed = 1;
++            return 0;
++        }
++        DEBUGOUT("NOT RXing /C/, disable AutoNeg and force link.\n");
++
++        /* Disable auto-negotiation in the TXCW register */
++        E1000_WRITE_REG(hw, TXCW, (hw->txcw & ~E1000_TXCW_ANE));
++
++        /* Force link-up and also force full-duplex. */
++        ctrl = E1000_READ_REG(hw, CTRL);
++        ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++        E1000_WRITE_REG(hw, CTRL, ctrl);
++
++        /* Configure Flow Control after forcing link up. */
++        ret_val = e1000_config_fc_after_link_up(hw);
++        if (ret_val) {
++            DEBUGOUT("Error configuring flow control\n");
++            return ret_val;
++        }
++    }
++    /* If we are forcing link and we are receiving /C/ ordered sets, re-enable
++     * auto-negotiation in the TXCW register and disable forced link in the
++     * Device Control register in an attempt to auto-negotiate with our link
++     * partner.
++     */
++    else if (((hw->media_type == e1000_media_type_fiber) ||
++              (hw->media_type == e1000_media_type_internal_serdes)) &&
++              (ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++        DEBUGOUT("RXing /C/, enable AutoNeg and stop forcing link.\n");
++        E1000_WRITE_REG(hw, TXCW, hw->txcw);
++        E1000_WRITE_REG(hw, CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++        hw->serdes_link_down = FALSE;
++    }
++    /* If we force link for non-auto-negotiation switch, check link status
++     * based on MAC synchronization for internal serdes media type.
++     */
++    else if ((hw->media_type == e1000_media_type_internal_serdes) &&
++             !(E1000_TXCW_ANE & E1000_READ_REG(hw, TXCW))) {
++        /* SYNCH bit and IV bit are sticky. */
++        usec_delay(10);
++        if (E1000_RXCW_SYNCH & E1000_READ_REG(hw, RXCW)) {
++            if (!(rxcw & E1000_RXCW_IV)) {
++                hw->serdes_link_down = FALSE;
++                DEBUGOUT("SERDES: Link is up.\n");
++            }
++        } else {
++            hw->serdes_link_down = TRUE;
++            DEBUGOUT("SERDES: Link is down.\n");
++        }
++    }
++    if ((hw->media_type == e1000_media_type_internal_serdes) &&
++        (E1000_TXCW_ANE & E1000_READ_REG(hw, TXCW))) {
++        hw->serdes_link_down = !(E1000_STATUS_LU & E1000_READ_REG(hw, STATUS));
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Detects the current speed and duplex settings of the hardware.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * speed - Speed of the connection
++ * duplex - Duplex setting of the connection
++ *****************************************************************************/
++int32_t
++e1000_get_speed_and_duplex(struct e1000_hw *hw,
++                           uint16_t *speed,
++                           uint16_t *duplex)
++{
++    uint32_t status;
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_get_speed_and_duplex");
++
++    if (hw->mac_type >= e1000_82543) {
++        status = E1000_READ_REG(hw, STATUS);
++        if (status & E1000_STATUS_SPEED_1000) {
++            *speed = SPEED_1000;
++            DEBUGOUT("1000 Mbs, ");
++        } else if (status & E1000_STATUS_SPEED_100) {
++            *speed = SPEED_100;
++            DEBUGOUT("100 Mbs, ");
++        } else {
++            *speed = SPEED_10;
++            DEBUGOUT("10 Mbs, ");
++        }
++
++        if (status & E1000_STATUS_FD) {
++            *duplex = FULL_DUPLEX;
++            DEBUGOUT("Full Duplex\n");
++        } else {
++            *duplex = HALF_DUPLEX;
++            DEBUGOUT(" Half Duplex\n");
++        }
++    } else {
++        DEBUGOUT("1000 Mbs, Full Duplex\n");
++        *speed = SPEED_1000;
++        *duplex = FULL_DUPLEX;
++    }
++
++    /* IGP01 PHY may advertise full duplex operation after speed downgrade even
++     * if it is operating at half duplex.  Here we set the duplex settings to
++     * match the duplex in the link partner's capabilities.
++     */
++    if (hw->phy_type == e1000_phy_igp && hw->speed_downgraded) {
++        ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        if (!(phy_data & NWAY_ER_LP_NWAY_CAPS))
++            *duplex = HALF_DUPLEX;
++        else {
++            ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data);
++            if (ret_val)
++                return ret_val;
++            if ((*speed == SPEED_100 && !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) ||
++               (*speed == SPEED_10 && !(phy_data & NWAY_LPAR_10T_FD_CAPS)))
++                *duplex = HALF_DUPLEX;
++        }
++    }
++
++    if ((hw->mac_type == e1000_80003es2lan) &&
++        (hw->media_type == e1000_media_type_copper)) {
++        if (*speed == SPEED_1000)
++            ret_val = e1000_configure_kmrn_for_1000(hw);
++        else
++            ret_val = e1000_configure_kmrn_for_10_100(hw, *duplex);
++        if (ret_val)
++            return ret_val;
++    }
++
++    if ((hw->phy_type == e1000_phy_igp_3) && (*speed == SPEED_1000)) {
++        ret_val = e1000_kumeran_lock_loss_workaround(hw);
++        if (ret_val)
++            return ret_val;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Blocks until autoneg completes or times out (~4.5 seconds)
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_wait_autoneg(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t i;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_wait_autoneg");
++    DEBUGOUT("Waiting for Auto-Neg to complete.\n");
++
++    /* We will wait for autoneg to complete or 4.5 seconds to expire. */
++    for (i = PHY_AUTO_NEG_TIME; i > 0; i--) {
++        /* Read the MII Status Register and wait for Auto-Neg
++         * Complete bit to be set.
++         */
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++        if (phy_data & MII_SR_AUTONEG_COMPLETE) {
++            return E1000_SUCCESS;
++        }
++        msec_delay(100);
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Raises the Management Data Clock
++*
++* hw - Struct containing variables accessed by shared code
++* ctrl - Device control register's current value
++******************************************************************************/
++static void
++e1000_raise_mdi_clk(struct e1000_hw *hw,
++                    uint32_t *ctrl)
++{
++    /* Raise the clock input to the Management Data Clock (by setting the MDC
++     * bit), and then delay 10 microseconds.
++     */
++    E1000_WRITE_REG(hw, CTRL, (*ctrl | E1000_CTRL_MDC));
++    E1000_WRITE_FLUSH(hw);
++    usec_delay(10);
++}
++
++/******************************************************************************
++* Lowers the Management Data Clock
++*
++* hw - Struct containing variables accessed by shared code
++* ctrl - Device control register's current value
++******************************************************************************/
++static void
++e1000_lower_mdi_clk(struct e1000_hw *hw,
++                    uint32_t *ctrl)
++{
++    /* Lower the clock input to the Management Data Clock (by clearing the MDC
++     * bit), and then delay 10 microseconds.
++     */
++    E1000_WRITE_REG(hw, CTRL, (*ctrl & ~E1000_CTRL_MDC));
++    E1000_WRITE_FLUSH(hw);
++    usec_delay(10);
++}
++
++/******************************************************************************
++* Shifts data bits out to the PHY
++*
++* hw - Struct containing variables accessed by shared code
++* data - Data to send out to the PHY
++* count - Number of bits to shift out
++*
++* Bits are shifted out in MSB to LSB order.
++******************************************************************************/
++static void
++e1000_shift_out_mdi_bits(struct e1000_hw *hw,
++                         uint32_t data,
++                         uint16_t count)
++{
++    uint32_t ctrl;
++    uint32_t mask;
++
++    /* We need to shift "count" number of bits out to the PHY. So, the value
++     * in the "data" parameter will be shifted out to the PHY one bit at a
++     * time. In order to do this, "data" must be broken down into bits.
++     */
++    mask = 0x01;
++    mask <<= (count - 1);
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++
++    /* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
++    ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
++
++    while (mask) {
++        /* A "1" is shifted out to the PHY by setting the MDIO bit to "1" and
++         * then raising and lowering the Management Data Clock. A "0" is
++         * shifted out to the PHY by setting the MDIO bit to "0" and then
++         * raising and lowering the clock.
++         */
++        if (data & mask)
++            ctrl |= E1000_CTRL_MDIO;
++        else
++            ctrl &= ~E1000_CTRL_MDIO;
++
++        E1000_WRITE_REG(hw, CTRL, ctrl);
++        E1000_WRITE_FLUSH(hw);
++
++        usec_delay(10);
++
++        e1000_raise_mdi_clk(hw, &ctrl);
++        e1000_lower_mdi_clk(hw, &ctrl);
++
++        mask = mask >> 1;
++    }
++}
++
++/******************************************************************************
++* Shifts data bits in from the PHY
++*
++* hw - Struct containing variables accessed by shared code
++*
++* Bits are shifted in in MSB to LSB order.
++******************************************************************************/
++static uint16_t
++e1000_shift_in_mdi_bits(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++    uint16_t data = 0;
++    uint8_t i;
++
++    /* In order to read a register from the PHY, we need to shift in a total
++     * of 18 bits from the PHY. The first two bit (turnaround) times are used
++     * to avoid contention on the MDIO pin when a read operation is performed.
++     * These two bits are ignored by us and thrown away. Bits are "shifted in"
++     * by raising the input to the Management Data Clock (setting the MDC bit),
++     * and then reading the value of the MDIO bit.
++     */
++    ctrl = E1000_READ_REG(hw, CTRL);
++
++    /* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as input. */
++    ctrl &= ~E1000_CTRL_MDIO_DIR;
++    ctrl &= ~E1000_CTRL_MDIO;
++
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++    E1000_WRITE_FLUSH(hw);
++
++    /* Raise and Lower the clock before reading in the data. This accounts for
++     * the turnaround bits. The first clock occurred when we clocked out the
++     * last bit of the Register Address.
++     */
++    e1000_raise_mdi_clk(hw, &ctrl);
++    e1000_lower_mdi_clk(hw, &ctrl);
++
++    for (data = 0, i = 0; i < 16; i++) {
++        data = data << 1;
++        e1000_raise_mdi_clk(hw, &ctrl);
++        ctrl = E1000_READ_REG(hw, CTRL);
++        /* Check to see if we shifted in a "1". */
++        if (ctrl & E1000_CTRL_MDIO)
++            data |= 1;
++        e1000_lower_mdi_clk(hw, &ctrl);
++    }
++
++    e1000_raise_mdi_clk(hw, &ctrl);
++    e1000_lower_mdi_clk(hw, &ctrl);
++
++    return data;
++}
++
++int32_t
++e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask)
++{
++    uint32_t swfw_sync = 0;
++    uint32_t swmask = mask;
++    uint32_t fwmask = mask << 16;
++    int32_t timeout = 200;
++
++    DEBUGFUNC("e1000_swfw_sync_acquire");
++
++    if (hw->swfwhw_semaphore_present)
++        return e1000_get_software_flag(hw);
++
++    if (!hw->swfw_sync_present)
++        return e1000_get_hw_eeprom_semaphore(hw);
++
++    while (timeout) {
++            if (e1000_get_hw_eeprom_semaphore(hw))
++                return -E1000_ERR_SWFW_SYNC;
++
++            swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC);
++            if (!(swfw_sync & (fwmask | swmask))) {
++                break;
++            }
++
++            /* firmware currently using resource (fwmask) */
++            /* or other software thread currently using resource (swmask) */
++            e1000_put_hw_eeprom_semaphore(hw);
++            msec_delay_irq(5);
++            timeout--;
++    }
++
++    if (!timeout) {
++        DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
++        return -E1000_ERR_SWFW_SYNC;
++    }
++
++    swfw_sync |= swmask;
++    E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync);
++
++    e1000_put_hw_eeprom_semaphore(hw);
++    return E1000_SUCCESS;
++}
++
++void
++e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask)
++{
++    uint32_t swfw_sync;
++    uint32_t swmask = mask;
++
++    DEBUGFUNC("e1000_swfw_sync_release");
++
++    if (hw->swfwhw_semaphore_present) {
++        e1000_release_software_flag(hw);
++        return;
++    }
++
++    if (!hw->swfw_sync_present) {
++        e1000_put_hw_eeprom_semaphore(hw);
++        return;
++    }
++
++    /* if (e1000_get_hw_eeprom_semaphore(hw))
++     *    return -E1000_ERR_SWFW_SYNC; */
++    while (e1000_get_hw_eeprom_semaphore(hw) != E1000_SUCCESS);
++        /* empty */
++
++    swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC);
++    swfw_sync &= ~swmask;
++    E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync);
++
++    e1000_put_hw_eeprom_semaphore(hw);
++}
++
++/*****************************************************************************
++* Reads the value from a PHY register, if the value is on a specific non zero
++* page, sets the page first.
++* hw - Struct containing variables accessed by shared code
++* reg_addr - address of the PHY register to read
++******************************************************************************/
++int32_t
++e1000_read_phy_reg(struct e1000_hw *hw,
++                   uint32_t reg_addr,
++                   uint16_t *phy_data)
++{
++    uint32_t ret_val;
++    uint16_t swfw;
++
++    DEBUGFUNC("e1000_read_phy_reg");
++
++    if ((hw->mac_type == e1000_80003es2lan) &&
++        (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
++        swfw = E1000_SWFW_PHY1_SM;
++    } else {
++        swfw = E1000_SWFW_PHY0_SM;
++    }
++    if (e1000_swfw_sync_acquire(hw, swfw))
++        return -E1000_ERR_SWFW_SYNC;
++
++    if ((hw->phy_type == e1000_phy_igp ||
++        hw->phy_type == e1000_phy_igp_3 ||
++        hw->phy_type == e1000_phy_igp_2) &&
++       (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
++        ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
++                                         (uint16_t)reg_addr);
++        if (ret_val) {
++            e1000_swfw_sync_release(hw, swfw);
++            return ret_val;
++        }
++    } else if (hw->phy_type == e1000_phy_gg82563) {
++        if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) ||
++            (hw->mac_type == e1000_80003es2lan)) {
++            /* Select Configuration Page */
++            if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++                ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT,
++                          (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT));
++            } else {
++                /* Use Alternative Page Select register to access
++                 * registers 30 and 31
++                 */
++                ret_val = e1000_write_phy_reg_ex(hw,
++                                                 GG82563_PHY_PAGE_SELECT_ALT,
++                          (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT));
++            }
++
++            if (ret_val) {
++                e1000_swfw_sync_release(hw, swfw);
++                return ret_val;
++            }
++        }
++    }
++
++    ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
++                                    phy_data);
++
++    e1000_swfw_sync_release(hw, swfw);
++    return ret_val;
++}
++
++int32_t
++e1000_read_phy_reg_ex(struct e1000_hw *hw,
++                      uint32_t reg_addr,
++                      uint16_t *phy_data)
++{
++    uint32_t i;
++    uint32_t mdic = 0;
++    const uint32_t phy_addr = 1;
++
++    DEBUGFUNC("e1000_read_phy_reg_ex");
++
++    if (reg_addr > MAX_PHY_REG_ADDRESS) {
++        DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
++        return -E1000_ERR_PARAM;
++    }
++
++    if (hw->mac_type > e1000_82543) {
++        /* Set up Op-code, Phy Address, and register address in the MDI
++         * Control register.  The MAC will take care of interfacing with the
++         * PHY to retrieve the desired data.
++         */
++        mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
++                (phy_addr << E1000_MDIC_PHY_SHIFT) |
++                (E1000_MDIC_OP_READ));
++
++        E1000_WRITE_REG(hw, MDIC, mdic);
++
++        /* Poll the ready bit to see if the MDI read completed */
++        for (i = 0; i < 64; i++) {
++            usec_delay(50);
++            mdic = E1000_READ_REG(hw, MDIC);
++            if (mdic & E1000_MDIC_READY) break;
++        }
++        if (!(mdic & E1000_MDIC_READY)) {
++            DEBUGOUT("MDI Read did not complete\n");
++            return -E1000_ERR_PHY;
++        }
++        if (mdic & E1000_MDIC_ERROR) {
++            DEBUGOUT("MDI Error\n");
++            return -E1000_ERR_PHY;
++        }
++        *phy_data = (uint16_t) mdic;
++    } else {
++        /* We must first send a preamble through the MDIO pin to signal the
++         * beginning of an MII instruction.  This is done by sending 32
++         * consecutive "1" bits.
++         */
++        e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
++
++        /* Now combine the next few fields that are required for a read
++         * operation.  We use this method instead of calling the
++         * e1000_shift_out_mdi_bits routine five different times. The format of
++         * a MII read instruction consists of a shift out of 14 bits and is
++         * defined as follows:
++         *    <Preamble><SOF><Op Code><Phy Addr><Reg Addr>
++         * followed by a shift in of 18 bits.  This first two bits shifted in
++         * are TurnAround bits used to avoid contention on the MDIO pin when a
++         * READ operation is performed.  These two bits are thrown away
++         * followed by a shift in of 16 bits which contains the desired data.
++         */
++        mdic = ((reg_addr) | (phy_addr << 5) |
++                (PHY_OP_READ << 10) | (PHY_SOF << 12));
++
++        e1000_shift_out_mdi_bits(hw, mdic, 14);
++
++        /* Now that we've shifted out the read command to the MII, we need to
++         * "shift in" the 16-bit value (18 total bits) of the requested PHY
++         * register address.
++         */
++        *phy_data = e1000_shift_in_mdi_bits(hw);
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Writes a value to a PHY register
++*
++* hw - Struct containing variables accessed by shared code
++* reg_addr - address of the PHY register to write
++* data - data to write to the PHY
++******************************************************************************/
++int32_t
++e1000_write_phy_reg(struct e1000_hw *hw,
++                    uint32_t reg_addr,
++                    uint16_t phy_data)
++{
++    uint32_t ret_val;
++    uint16_t swfw;
++
++    DEBUGFUNC("e1000_write_phy_reg");
++
++    if ((hw->mac_type == e1000_80003es2lan) &&
++        (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
++        swfw = E1000_SWFW_PHY1_SM;
++    } else {
++        swfw = E1000_SWFW_PHY0_SM;
++    }
++    if (e1000_swfw_sync_acquire(hw, swfw))
++        return -E1000_ERR_SWFW_SYNC;
++
++    if ((hw->phy_type == e1000_phy_igp ||
++        hw->phy_type == e1000_phy_igp_3 ||
++        hw->phy_type == e1000_phy_igp_2) &&
++       (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
++        ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
++                                         (uint16_t)reg_addr);
++        if (ret_val) {
++            e1000_swfw_sync_release(hw, swfw);
++            return ret_val;
++        }
++    } else if (hw->phy_type == e1000_phy_gg82563) {
++        if (((reg_addr & MAX_PHY_REG_ADDRESS) > MAX_PHY_MULTI_PAGE_REG) ||
++            (hw->mac_type == e1000_80003es2lan)) {
++            /* Select Configuration Page */
++            if ((reg_addr & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++                ret_val = e1000_write_phy_reg_ex(hw, GG82563_PHY_PAGE_SELECT,
++                          (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT));
++            } else {
++                /* Use Alternative Page Select register to access
++                 * registers 30 and 31
++                 */
++                ret_val = e1000_write_phy_reg_ex(hw,
++                                                 GG82563_PHY_PAGE_SELECT_ALT,
++                          (uint16_t)((uint16_t)reg_addr >> GG82563_PAGE_SHIFT));
++            }
++
++            if (ret_val) {
++                e1000_swfw_sync_release(hw, swfw);
++                return ret_val;
++            }
++        }
++    }
++
++    ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
++                                     phy_data);
++
++    e1000_swfw_sync_release(hw, swfw);
++    return ret_val;
++}
++
++int32_t
++e1000_write_phy_reg_ex(struct e1000_hw *hw,
++                    uint32_t reg_addr,
++                    uint16_t phy_data)
++{
++    uint32_t i;
++    uint32_t mdic = 0;
++    const uint32_t phy_addr = 1;
++
++    DEBUGFUNC("e1000_write_phy_reg_ex");
++
++    if (reg_addr > MAX_PHY_REG_ADDRESS) {
++        DEBUGOUT1("PHY Address %d is out of range\n", reg_addr);
++        return -E1000_ERR_PARAM;
++    }
++
++    if (hw->mac_type > e1000_82543) {
++        /* Set up Op-code, Phy Address, register address, and data intended
++         * for the PHY register in the MDI Control register.  The MAC will take
++         * care of interfacing with the PHY to send the desired data.
++         */
++        mdic = (((uint32_t) phy_data) |
++                (reg_addr << E1000_MDIC_REG_SHIFT) |
++                (phy_addr << E1000_MDIC_PHY_SHIFT) |
++                (E1000_MDIC_OP_WRITE));
++
++        E1000_WRITE_REG(hw, MDIC, mdic);
++
++        /* Poll the ready bit to see if the MDI read completed */
++        for (i = 0; i < 641; i++) {
++            usec_delay(5);
++            mdic = E1000_READ_REG(hw, MDIC);
++            if (mdic & E1000_MDIC_READY) break;
++        }
++        if (!(mdic & E1000_MDIC_READY)) {
++            DEBUGOUT("MDI Write did not complete\n");
++            return -E1000_ERR_PHY;
++        }
++    } else {
++        /* We'll need to use the SW defined pins to shift the write command
++         * out to the PHY. We first send a preamble to the PHY to signal the
++         * beginning of the MII instruction.  This is done by sending 32
++         * consecutive "1" bits.
++         */
++        e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
++
++        /* Now combine the remaining required fields that will indicate a
++         * write operation. We use this method instead of calling the
++         * e1000_shift_out_mdi_bits routine for each field in the command. The
++         * format of a MII write instruction is as follows:
++         * <Preamble><SOF><Op Code><Phy Addr><Reg Addr><Turnaround><Data>.
++         */
++        mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) |
++                (PHY_OP_WRITE << 12) | (PHY_SOF << 14));
++        mdic <<= 16;
++        mdic |= (uint32_t) phy_data;
++
++        e1000_shift_out_mdi_bits(hw, mdic, 32);
++    }
++
++    return E1000_SUCCESS;
++}
++
++int32_t
++e1000_read_kmrn_reg(struct e1000_hw *hw,
++                    uint32_t reg_addr,
++                    uint16_t *data)
++{
++    uint32_t reg_val;
++    uint16_t swfw;
++    DEBUGFUNC("e1000_read_kmrn_reg");
++
++    if ((hw->mac_type == e1000_80003es2lan) &&
++        (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
++        swfw = E1000_SWFW_PHY1_SM;
++    } else {
++        swfw = E1000_SWFW_PHY0_SM;
++    }
++    if (e1000_swfw_sync_acquire(hw, swfw))
++        return -E1000_ERR_SWFW_SYNC;
++
++    /* Write register address */
++    reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) &
++              E1000_KUMCTRLSTA_OFFSET) |
++              E1000_KUMCTRLSTA_REN;
++    E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val);
++    usec_delay(2);
++
++    /* Read the data returned */
++    reg_val = E1000_READ_REG(hw, KUMCTRLSTA);
++    *data = (uint16_t)reg_val;
++
++    e1000_swfw_sync_release(hw, swfw);
++    return E1000_SUCCESS;
++}
++
++int32_t
++e1000_write_kmrn_reg(struct e1000_hw *hw,
++                     uint32_t reg_addr,
++                     uint16_t data)
++{
++    uint32_t reg_val;
++    uint16_t swfw;
++    DEBUGFUNC("e1000_write_kmrn_reg");
++
++    if ((hw->mac_type == e1000_80003es2lan) &&
++        (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
++        swfw = E1000_SWFW_PHY1_SM;
++    } else {
++        swfw = E1000_SWFW_PHY0_SM;
++    }
++    if (e1000_swfw_sync_acquire(hw, swfw))
++        return -E1000_ERR_SWFW_SYNC;
++
++    reg_val = ((reg_addr << E1000_KUMCTRLSTA_OFFSET_SHIFT) &
++              E1000_KUMCTRLSTA_OFFSET) | data;
++    E1000_WRITE_REG(hw, KUMCTRLSTA, reg_val);
++    usec_delay(2);
++
++    e1000_swfw_sync_release(hw, swfw);
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Returns the PHY to the power-on reset state
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_phy_hw_reset(struct e1000_hw *hw)
++{
++    uint32_t ctrl, ctrl_ext;
++    uint32_t led_ctrl;
++    int32_t ret_val;
++    uint16_t swfw;
++
++    DEBUGFUNC("e1000_phy_hw_reset");
++
++    /* In the case of the phy reset being blocked, it's not an error, we
++     * simply return success without performing the reset. */
++    ret_val = e1000_check_phy_reset_block(hw);
++    if (ret_val)
++        return E1000_SUCCESS;
++
++    DEBUGOUT("Resetting Phy...\n");
++
++    if (hw->mac_type > e1000_82543) {
++        if ((hw->mac_type == e1000_80003es2lan) &&
++            (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)) {
++            swfw = E1000_SWFW_PHY1_SM;
++        } else {
++            swfw = E1000_SWFW_PHY0_SM;
++        }
++        if (e1000_swfw_sync_acquire(hw, swfw)) {
++            e1000_release_software_semaphore(hw);
++            return -E1000_ERR_SWFW_SYNC;
++        }
++        /* Read the device control register and assert the E1000_CTRL_PHY_RST
++         * bit. Then, take it out of reset.
++         * For pre-e1000_82571 hardware, we delay for 10ms between the assert
++         * and deassert.  For e1000_82571 hardware and later, we instead delay
++         * for 50us between and 10ms after the deassertion.
++         */
++        ctrl = E1000_READ_REG(hw, CTRL);
++        E1000_WRITE_REG(hw, CTRL, ctrl | E1000_CTRL_PHY_RST);
++        E1000_WRITE_FLUSH(hw);
++
++        if (hw->mac_type < e1000_82571)
++            msec_delay(10);
++        else
++            usec_delay(100);
++
++        E1000_WRITE_REG(hw, CTRL, ctrl);
++        E1000_WRITE_FLUSH(hw);
++
++        if (hw->mac_type >= e1000_82571)
++            msec_delay_irq(10);
++        e1000_swfw_sync_release(hw, swfw);
++    } else {
++        /* Read the Extended Device Control Register, assert the PHY_RESET_DIR
++         * bit to put the PHY into reset. Then, take it out of reset.
++         */
++        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++        ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
++        ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++        E1000_WRITE_FLUSH(hw);
++        msec_delay(10);
++        ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++        E1000_WRITE_FLUSH(hw);
++    }
++    usec_delay(150);
++
++    if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
++        /* Configure activity LED after PHY reset */
++        led_ctrl = E1000_READ_REG(hw, LEDCTL);
++        led_ctrl &= IGP_ACTIVITY_LED_MASK;
++        led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
++        E1000_WRITE_REG(hw, LEDCTL, led_ctrl);
++    }
++
++    /* Wait for FW to finish PHY configuration. */
++    ret_val = e1000_get_phy_cfg_done(hw);
++    e1000_release_software_semaphore(hw);
++
++        if ((hw->mac_type == e1000_ich8lan) &&
++            (hw->phy_type == e1000_phy_igp_3)) {
++            ret_val = e1000_init_lcd_from_nvm(hw);
++            if (ret_val)
++                return ret_val;
++        }
++    return ret_val;
++}
++
++/******************************************************************************
++* Resets the PHY
++*
++* hw - Struct containing variables accessed by shared code
++*
++* Sets bit 15 of the MII Control regiser
++******************************************************************************/
++int32_t
++e1000_phy_reset(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_phy_reset");
++
++    /* In the case of the phy reset being blocked, it's not an error, we
++     * simply return success without performing the reset. */
++    ret_val = e1000_check_phy_reset_block(hw);
++    if (ret_val)
++        return E1000_SUCCESS;
++
++    switch (hw->mac_type) {
++    case e1000_82541_rev_2:
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_ich8lan:
++        ret_val = e1000_phy_hw_reset(hw);
++        if (ret_val)
++            return ret_val;
++
++        break;
++    default:
++        ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |= MII_CR_RESET;
++        ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
++        if (ret_val)
++            return ret_val;
++
++        usec_delay(1);
++        break;
++    }
++
++    if (hw->phy_type == e1000_phy_igp || hw->phy_type == e1000_phy_igp_2)
++        e1000_phy_init_script(hw);
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Work-around for 82566 power-down: on D3 entry-
++* 1) disable gigabit link
++* 2) write VR power-down enable
++* 3) read it back
++* if successful continue, else issue LCD reset and repeat
++*
++* hw - struct containing variables accessed by shared code
++******************************************************************************/
++void
++e1000_phy_powerdown_workaround(struct e1000_hw *hw)
++{
++    int32_t reg;
++    uint16_t phy_data;
++    int32_t retry = 0;
++
++    DEBUGFUNC("e1000_phy_powerdown_workaround");
++
++    if (hw->phy_type != e1000_phy_igp_3)
++        return;
++
++    do {
++        /* Disable link */
++        reg = E1000_READ_REG(hw, PHY_CTRL);
++        E1000_WRITE_REG(hw, PHY_CTRL, reg | E1000_PHY_CTRL_GBE_DISABLE |
++                        E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++
++        /* Write VR power-down enable */
++        e1000_read_phy_reg(hw, IGP3_VR_CTRL, &phy_data);
++        e1000_write_phy_reg(hw, IGP3_VR_CTRL, phy_data |
++                            IGP3_VR_CTRL_MODE_SHUT);
++
++        /* Read it back and test */
++        e1000_read_phy_reg(hw, IGP3_VR_CTRL, &phy_data);
++        if ((phy_data & IGP3_VR_CTRL_MODE_SHUT) || retry)
++            break;
++
++        /* Issue PHY reset and repeat at most one more time */
++        reg = E1000_READ_REG(hw, CTRL);
++        E1000_WRITE_REG(hw, CTRL, reg | E1000_CTRL_PHY_RST);
++        retry++;
++    } while (retry);
++
++    return;
++
++}
++
++/******************************************************************************
++* Work-around for 82566 Kumeran PCS lock loss:
++* On link status change (i.e. PCI reset, speed change) and link is up and
++* speed is gigabit-
++* 0) if workaround is optionally disabled do nothing
++* 1) wait 1ms for Kumeran link to come up
++* 2) check Kumeran Diagnostic register PCS lock loss bit
++* 3) if not set the link is locked (all is good), otherwise...
++* 4) reset the PHY
++* 5) repeat up to 10 times
++* Note: this is only called for IGP3 copper when speed is 1gb.
++*
++* hw - struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_kumeran_lock_loss_workaround(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    int32_t reg;
++    int32_t cnt;
++    uint16_t phy_data;
++
++    if (hw->kmrn_lock_loss_workaround_disabled)
++        return E1000_SUCCESS;
++
++    /* Make sure link is up before proceeding. If not just return. 
++     * Attempting this while link is negotiating fouls up link
++     * stability */
++    ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++    ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++
++    if (phy_data & MII_SR_LINK_STATUS) {
++        for (cnt = 0; cnt < 10; cnt++) {
++            /* read once to clear */
++            ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &phy_data);
++            if (ret_val)
++                return ret_val;
++            /* and again to get new status */
++            ret_val = e1000_read_phy_reg(hw, IGP3_KMRN_DIAG, &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            /* check for PCS lock */
++            if (!(phy_data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS))
++                return E1000_SUCCESS;
++
++            /* Issue PHY reset */
++            e1000_phy_hw_reset(hw);
++            msec_delay_irq(5);
++        }
++        /* Disable GigE link negotiation */
++        reg = E1000_READ_REG(hw, PHY_CTRL);
++        E1000_WRITE_REG(hw, PHY_CTRL, reg | E1000_PHY_CTRL_GBE_DISABLE |
++                        E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++
++        /* unable to acquire PCS lock */
++        return E1000_ERR_PHY;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Probes the expected PHY address for known PHY IDs
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++int32_t
++e1000_detect_gig_phy(struct e1000_hw *hw)
++{
++    int32_t phy_init_status, ret_val;
++    uint16_t phy_id_high, phy_id_low;
++    boolean_t match = FALSE;
++
++    DEBUGFUNC("e1000_detect_gig_phy");
++
++    /* The 82571 firmware may still be configuring the PHY.  In this
++     * case, we cannot access the PHY until the configuration is done.  So
++     * we explicitly set the PHY values. */
++    if (hw->mac_type == e1000_82571 ||
++        hw->mac_type == e1000_82572) {
++        hw->phy_id = IGP01E1000_I_PHY_ID;
++        hw->phy_type = e1000_phy_igp_2;
++        return E1000_SUCCESS;
++    }
++
++    /* ESB-2 PHY reads require e1000_phy_gg82563 to be set because of a work-
++     * around that forces PHY page 0 to be set or the reads fail.  The rest of
++     * the code in this routine uses e1000_read_phy_reg to read the PHY ID.
++     * So for ESB-2 we need to have this set so our reads won't fail.  If the
++     * attached PHY is not a e1000_phy_gg82563, the routines below will figure
++     * this out as well. */
++    if (hw->mac_type == e1000_80003es2lan)
++        hw->phy_type = e1000_phy_gg82563;
++
++    /* Read the PHY ID Registers to identify which PHY is onboard. */
++    ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high);
++    if (ret_val)
++        return ret_val;
++
++    hw->phy_id = (uint32_t) (phy_id_high << 16);
++    usec_delay(20);
++    ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low);
++    if (ret_val)
++        return ret_val;
++
++    hw->phy_id |= (uint32_t) (phy_id_low & PHY_REVISION_MASK);
++    hw->phy_revision = (uint32_t) phy_id_low & ~PHY_REVISION_MASK;
++
++    switch (hw->mac_type) {
++    case e1000_82543:
++        if (hw->phy_id == M88E1000_E_PHY_ID) match = TRUE;
++        break;
++    case e1000_82544:
++        if (hw->phy_id == M88E1000_I_PHY_ID) match = TRUE;
++        break;
++    case e1000_82540:
++    case e1000_82545:
++    case e1000_82545_rev_3:
++    case e1000_82546:
++    case e1000_82546_rev_3:
++        if (hw->phy_id == M88E1011_I_PHY_ID) match = TRUE;
++        break;
++    case e1000_82541:
++    case e1000_82541_rev_2:
++    case e1000_82547:
++    case e1000_82547_rev_2:
++        if (hw->phy_id == IGP01E1000_I_PHY_ID) match = TRUE;
++        break;
++    case e1000_82573:
++        if (hw->phy_id == M88E1111_I_PHY_ID) match = TRUE;
++        break;
++    case e1000_80003es2lan:
++        if (hw->phy_id == GG82563_E_PHY_ID) match = TRUE;
++        break;
++    case e1000_ich8lan:
++        if (hw->phy_id == IGP03E1000_E_PHY_ID) match = TRUE;
++        if (hw->phy_id == IFE_E_PHY_ID) match = TRUE;
++        if (hw->phy_id == IFE_PLUS_E_PHY_ID) match = TRUE;
++        if (hw->phy_id == IFE_C_E_PHY_ID) match = TRUE;
++        break;
++    default:
++        DEBUGOUT1("Invalid MAC type %d\n", hw->mac_type);
++        return -E1000_ERR_CONFIG;
++    }
++    phy_init_status = e1000_set_phy_type(hw);
++
++    if ((match) && (phy_init_status == E1000_SUCCESS)) {
++        DEBUGOUT1("PHY ID 0x%X detected\n", hw->phy_id);
++        return E1000_SUCCESS;
++    }
++    DEBUGOUT1("Invalid PHY ID 0x%X\n", hw->phy_id);
++    return -E1000_ERR_PHY;
++}
++
++/******************************************************************************
++* Resets the PHY's DSP
++*
++* hw - Struct containing variables accessed by shared code
++******************************************************************************/
++static int32_t
++e1000_phy_reset_dsp(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    DEBUGFUNC("e1000_phy_reset_dsp");
++
++    do {
++        if (hw->phy_type != e1000_phy_gg82563) {
++            ret_val = e1000_write_phy_reg(hw, 29, 0x001d);
++            if (ret_val) break;
++        }
++        ret_val = e1000_write_phy_reg(hw, 30, 0x00c1);
++        if (ret_val) break;
++        ret_val = e1000_write_phy_reg(hw, 30, 0x0000);
++        if (ret_val) break;
++        ret_val = E1000_SUCCESS;
++    } while (0);
++
++    return ret_val;
++}
++
++/******************************************************************************
++* Get PHY information from various PHY registers for igp PHY only.
++*
++* hw - Struct containing variables accessed by shared code
++* phy_info - PHY information structure
++******************************************************************************/
++int32_t
++e1000_phy_igp_get_info(struct e1000_hw *hw,
++                       struct e1000_phy_info *phy_info)
++{
++    int32_t ret_val;
++    uint16_t phy_data, polarity, min_length, max_length, average;
++
++    DEBUGFUNC("e1000_phy_igp_get_info");
++
++    /* The downshift status is checked only once, after link is established,
++     * and it stored in the hw->speed_downgraded parameter. */
++    phy_info->downshift = (e1000_downshift)hw->speed_downgraded;
++
++    /* IGP01E1000 does not need to support it. */
++    phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal;
++
++    /* IGP01E1000 always correct polarity reversal */
++    phy_info->polarity_correction = e1000_polarity_reversal_enabled;
++
++    /* Check polarity status */
++    ret_val = e1000_check_polarity(hw, &polarity);
++    if (ret_val)
++        return ret_val;
++
++    phy_info->cable_polarity = polarity;
++
++    ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_info->mdix_mode = (phy_data & IGP01E1000_PSSR_MDIX) >>
++                          IGP01E1000_PSSR_MDIX_SHIFT;
++
++    if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
++       IGP01E1000_PSSR_SPEED_1000MBPS) {
++        /* Local/Remote Receiver Information are only valid at 1000 Mbps */
++        ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >>
++                             SR_1000T_LOCAL_RX_STATUS_SHIFT;
++        phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >>
++                              SR_1000T_REMOTE_RX_STATUS_SHIFT;
++
++        /* Get cable length */
++        ret_val = e1000_get_cable_length(hw, &min_length, &max_length);
++        if (ret_val)
++            return ret_val;
++
++        /* Translate to old method */
++        average = (max_length + min_length) / 2;
++
++        if (average <= e1000_igp_cable_length_50)
++            phy_info->cable_length = e1000_cable_length_50;
++        else if (average <= e1000_igp_cable_length_80)
++            phy_info->cable_length = e1000_cable_length_50_80;
++        else if (average <= e1000_igp_cable_length_110)
++            phy_info->cable_length = e1000_cable_length_80_110;
++        else if (average <= e1000_igp_cable_length_140)
++            phy_info->cable_length = e1000_cable_length_110_140;
++        else
++            phy_info->cable_length = e1000_cable_length_140;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Get PHY information from various PHY registers for ife PHY only.
++*
++* hw - Struct containing variables accessed by shared code
++* phy_info - PHY information structure
++******************************************************************************/
++int32_t
++e1000_phy_ife_get_info(struct e1000_hw *hw,
++                       struct e1000_phy_info *phy_info)
++{
++    int32_t ret_val;
++    uint16_t phy_data, polarity;
++
++    DEBUGFUNC("e1000_phy_ife_get_info");
++
++    phy_info->downshift = (e1000_downshift)hw->speed_downgraded;
++    phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal;
++
++    ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data);
++    if (ret_val)
++        return ret_val;
++    phy_info->polarity_correction =
++                        (phy_data & IFE_PSC_AUTO_POLARITY_DISABLE) >>
++                        IFE_PSC_AUTO_POLARITY_DISABLE_SHIFT;
++
++    if (phy_info->polarity_correction == e1000_polarity_reversal_enabled) {
++        ret_val = e1000_check_polarity(hw, &polarity);
++        if (ret_val)
++            return ret_val;
++    } else {
++        /* Polarity is forced. */
++        polarity = (phy_data & IFE_PSC_FORCE_POLARITY) >>
++                       IFE_PSC_FORCE_POLARITY_SHIFT;
++    }
++    phy_info->cable_polarity = polarity;
++
++    ret_val = e1000_read_phy_reg(hw, IFE_PHY_MDIX_CONTROL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_info->mdix_mode =
++                     (phy_data & (IFE_PMC_AUTO_MDIX | IFE_PMC_FORCE_MDIX)) >>
++                     IFE_PMC_MDIX_MODE_SHIFT;
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Get PHY information from various PHY registers fot m88 PHY only.
++*
++* hw - Struct containing variables accessed by shared code
++* phy_info - PHY information structure
++******************************************************************************/
++int32_t
++e1000_phy_m88_get_info(struct e1000_hw *hw,
++                       struct e1000_phy_info *phy_info)
++{
++    int32_t ret_val;
++    uint16_t phy_data, polarity;
++
++    DEBUGFUNC("e1000_phy_m88_get_info");
++
++    /* The downshift status is checked only once, after link is established,
++     * and it stored in the hw->speed_downgraded parameter. */
++    phy_info->downshift = (e1000_downshift)hw->speed_downgraded;
++
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_info->extended_10bt_distance =
++        (phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
++        M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT;
++    phy_info->polarity_correction =
++        (phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
++        M88E1000_PSCR_POLARITY_REVERSAL_SHIFT;
++
++    /* Check polarity status */
++    ret_val = e1000_check_polarity(hw, &polarity);
++    if (ret_val)
++        return ret_val;
++    phy_info->cable_polarity = polarity;
++
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_info->mdix_mode = (phy_data & M88E1000_PSSR_MDIX) >>
++                          M88E1000_PSSR_MDIX_SHIFT;
++
++    if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
++        /* Cable Length Estimation and Local/Remote Receiver Information
++         * are only valid at 1000 Mbps.
++         */
++        if (hw->phy_type != e1000_phy_gg82563) {
++            phy_info->cable_length = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++                                      M88E1000_PSSR_CABLE_LENGTH_SHIFT);
++        } else {
++            ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            phy_info->cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH;
++        }
++
++        ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_info->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) >>
++                             SR_1000T_LOCAL_RX_STATUS_SHIFT;
++
++        phy_info->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) >>
++                              SR_1000T_REMOTE_RX_STATUS_SHIFT;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++* Get PHY information from various PHY registers
++*
++* hw - Struct containing variables accessed by shared code
++* phy_info - PHY information structure
++******************************************************************************/
++int32_t
++e1000_phy_get_info(struct e1000_hw *hw,
++                   struct e1000_phy_info *phy_info)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_phy_get_info");
++
++    phy_info->cable_length = e1000_cable_length_undefined;
++    phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined;
++    phy_info->cable_polarity = e1000_rev_polarity_undefined;
++    phy_info->downshift = e1000_downshift_undefined;
++    phy_info->polarity_correction = e1000_polarity_reversal_undefined;
++    phy_info->mdix_mode = e1000_auto_x_mode_undefined;
++    phy_info->local_rx = e1000_1000t_rx_status_undefined;
++    phy_info->remote_rx = e1000_1000t_rx_status_undefined;
++
++    if (hw->media_type != e1000_media_type_copper) {
++        DEBUGOUT("PHY info is only valid for copper media\n");
++        return -E1000_ERR_CONFIG;
++    }
++
++    ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    if ((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) {
++        DEBUGOUT("PHY info is only valid if link is up\n");
++        return -E1000_ERR_CONFIG;
++    }
++
++    if (hw->phy_type == e1000_phy_igp ||
++        hw->phy_type == e1000_phy_igp_3 ||
++        hw->phy_type == e1000_phy_igp_2)
++        return e1000_phy_igp_get_info(hw, phy_info);
++    else if (hw->phy_type == e1000_phy_ife)
++        return e1000_phy_ife_get_info(hw, phy_info);
++    else
++        return e1000_phy_m88_get_info(hw, phy_info);
++}
++
++int32_t
++e1000_validate_mdi_setting(struct e1000_hw *hw)
++{
++    DEBUGFUNC("e1000_validate_mdi_settings");
++
++    if (!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) {
++        DEBUGOUT("Invalid MDI setting detected\n");
++        hw->mdix = 1;
++        return -E1000_ERR_CONFIG;
++    }
++    return E1000_SUCCESS;
++}
++
++
++/******************************************************************************
++ * Sets up eeprom variables in the hw struct.  Must be called after mac_type
++ * is configured.  Additionally, if this is ICH8, the flash controller GbE
++ * registers must be mapped, or this will crash.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_init_eeprom_params(struct e1000_hw *hw)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t eecd = E1000_READ_REG(hw, EECD);
++    int32_t ret_val = E1000_SUCCESS;
++    uint16_t eeprom_size;
++
++    DEBUGFUNC("e1000_init_eeprom_params");
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++    case e1000_82543:
++    case e1000_82544:
++        eeprom->type = e1000_eeprom_microwire;
++        eeprom->word_size = 64;
++        eeprom->opcode_bits = 3;
++        eeprom->address_bits = 6;
++        eeprom->delay_usec = 50;
++        eeprom->use_eerd = FALSE;
++        eeprom->use_eewr = FALSE;
++        break;
++    case e1000_82540:
++    case e1000_82545:
++    case e1000_82545_rev_3:
++    case e1000_82546:
++    case e1000_82546_rev_3:
++        eeprom->type = e1000_eeprom_microwire;
++        eeprom->opcode_bits = 3;
++        eeprom->delay_usec = 50;
++        if (eecd & E1000_EECD_SIZE) {
++            eeprom->word_size = 256;
++            eeprom->address_bits = 8;
++        } else {
++            eeprom->word_size = 64;
++            eeprom->address_bits = 6;
++        }
++        eeprom->use_eerd = FALSE;
++        eeprom->use_eewr = FALSE;
++        break;
++    case e1000_82541:
++    case e1000_82541_rev_2:
++    case e1000_82547:
++    case e1000_82547_rev_2:
++        if (eecd & E1000_EECD_TYPE) {
++            eeprom->type = e1000_eeprom_spi;
++            eeprom->opcode_bits = 8;
++            eeprom->delay_usec = 1;
++            if (eecd & E1000_EECD_ADDR_BITS) {
++                eeprom->page_size = 32;
++                eeprom->address_bits = 16;
++            } else {
++                eeprom->page_size = 8;
++                eeprom->address_bits = 8;
++            }
++        } else {
++            eeprom->type = e1000_eeprom_microwire;
++            eeprom->opcode_bits = 3;
++            eeprom->delay_usec = 50;
++            if (eecd & E1000_EECD_ADDR_BITS) {
++                eeprom->word_size = 256;
++                eeprom->address_bits = 8;
++            } else {
++                eeprom->word_size = 64;
++                eeprom->address_bits = 6;
++            }
++        }
++        eeprom->use_eerd = FALSE;
++        eeprom->use_eewr = FALSE;
++        break;
++    case e1000_82571:
++    case e1000_82572:
++        eeprom->type = e1000_eeprom_spi;
++        eeprom->opcode_bits = 8;
++        eeprom->delay_usec = 1;
++        if (eecd & E1000_EECD_ADDR_BITS) {
++            eeprom->page_size = 32;
++            eeprom->address_bits = 16;
++        } else {
++            eeprom->page_size = 8;
++            eeprom->address_bits = 8;
++        }
++        eeprom->use_eerd = FALSE;
++        eeprom->use_eewr = FALSE;
++        break;
++    case e1000_82573:
++        eeprom->type = e1000_eeprom_spi;
++        eeprom->opcode_bits = 8;
++        eeprom->delay_usec = 1;
++        if (eecd & E1000_EECD_ADDR_BITS) {
++            eeprom->page_size = 32;
++            eeprom->address_bits = 16;
++        } else {
++            eeprom->page_size = 8;
++            eeprom->address_bits = 8;
++        }
++        eeprom->use_eerd = TRUE;
++        eeprom->use_eewr = TRUE;
++        if (e1000_is_onboard_nvm_eeprom(hw) == FALSE) {
++            eeprom->type = e1000_eeprom_flash;
++            eeprom->word_size = 2048;
++
++            /* Ensure that the Autonomous FLASH update bit is cleared due to
++             * Flash update issue on parts which use a FLASH for NVM. */
++            eecd &= ~E1000_EECD_AUPDEN;
++            E1000_WRITE_REG(hw, EECD, eecd);
++        }
++        break;
++    case e1000_80003es2lan:
++        eeprom->type = e1000_eeprom_spi;
++        eeprom->opcode_bits = 8;
++        eeprom->delay_usec = 1;
++        if (eecd & E1000_EECD_ADDR_BITS) {
++            eeprom->page_size = 32;
++            eeprom->address_bits = 16;
++        } else {
++            eeprom->page_size = 8;
++            eeprom->address_bits = 8;
++        }
++        eeprom->use_eerd = TRUE;
++        eeprom->use_eewr = FALSE;
++        break;
++    case e1000_ich8lan:
++    {
++        int32_t  i = 0;
++        uint32_t flash_size = E1000_READ_ICH8_REG(hw, ICH8_FLASH_GFPREG);
++
++        eeprom->type = e1000_eeprom_ich8;
++        eeprom->use_eerd = FALSE;
++        eeprom->use_eewr = FALSE;
++        eeprom->word_size = E1000_SHADOW_RAM_WORDS;
++
++        /* Zero the shadow RAM structure. But don't load it from NVM
++         * so as to save time for driver init */
++        if (hw->eeprom_shadow_ram != NULL) {
++            for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
++                hw->eeprom_shadow_ram[i].modified = FALSE;
++                hw->eeprom_shadow_ram[i].eeprom_word = 0xFFFF;
++            }
++        }
++
++        hw->flash_base_addr = (flash_size & ICH8_GFPREG_BASE_MASK) *
++                              ICH8_FLASH_SECTOR_SIZE;
++
++        hw->flash_bank_size = ((flash_size >> 16) & ICH8_GFPREG_BASE_MASK) + 1;
++        hw->flash_bank_size -= (flash_size & ICH8_GFPREG_BASE_MASK);
++        hw->flash_bank_size *= ICH8_FLASH_SECTOR_SIZE;
++        hw->flash_bank_size /= 2 * sizeof(uint16_t);
++
++        break;
++    }
++    default:
++        break;
++    }
++
++    if (eeprom->type == e1000_eeprom_spi) {
++        /* eeprom_size will be an enum [0..8] that maps to eeprom sizes 128B to
++         * 32KB (incremented by powers of 2).
++         */
++        if (hw->mac_type <= e1000_82547_rev_2) {
++            /* Set to default value for initial eeprom read. */
++            eeprom->word_size = 64;
++            ret_val = e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size);
++            if (ret_val)
++                return ret_val;
++            eeprom_size = (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
++            /* 256B eeprom size was not supported in earlier hardware, so we
++             * bump eeprom_size up one to ensure that "1" (which maps to 256B)
++             * is never the result used in the shifting logic below. */
++            if (eeprom_size)
++                eeprom_size++;
++        } else {
++            eeprom_size = (uint16_t)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++                          E1000_EECD_SIZE_EX_SHIFT);
++        }
++
++        eeprom->word_size = 1 << (eeprom_size + EEPROM_WORD_SIZE_SHIFT);
++    }
++    return ret_val;
++}
++
++/******************************************************************************
++ * Raises the EEPROM's clock input.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * eecd - EECD's current value
++ *****************************************************************************/
++static void
++e1000_raise_ee_clk(struct e1000_hw *hw,
++                   uint32_t *eecd)
++{
++    /* Raise the clock input to the EEPROM (by setting the SK bit), and then
++     * wait <delay> microseconds.
++     */
++    *eecd = *eecd | E1000_EECD_SK;
++    E1000_WRITE_REG(hw, EECD, *eecd);
++    E1000_WRITE_FLUSH(hw);
++    usec_delay(hw->eeprom.delay_usec);
++}
++
++/******************************************************************************
++ * Lowers the EEPROM's clock input.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * eecd - EECD's current value
++ *****************************************************************************/
++static void
++e1000_lower_ee_clk(struct e1000_hw *hw,
++                   uint32_t *eecd)
++{
++    /* Lower the clock input to the EEPROM (by clearing the SK bit), and then
++     * wait 50 microseconds.
++     */
++    *eecd = *eecd & ~E1000_EECD_SK;
++    E1000_WRITE_REG(hw, EECD, *eecd);
++    E1000_WRITE_FLUSH(hw);
++    usec_delay(hw->eeprom.delay_usec);
++}
++
++/******************************************************************************
++ * Shift data bits out to the EEPROM.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * data - data to send to the EEPROM
++ * count - number of bits to shift out
++ *****************************************************************************/
++static void
++e1000_shift_out_ee_bits(struct e1000_hw *hw,
++                        uint16_t data,
++                        uint16_t count)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t eecd;
++    uint32_t mask;
++
++    /* We need to shift "count" bits out to the EEPROM. So, value in the
++     * "data" parameter will be shifted out to the EEPROM one bit at a time.
++     * In order to do this, "data" must be broken down into bits.
++     */
++    mask = 0x01 << (count - 1);
++    eecd = E1000_READ_REG(hw, EECD);
++    if (eeprom->type == e1000_eeprom_microwire) {
++        eecd &= ~E1000_EECD_DO;
++    } else if (eeprom->type == e1000_eeprom_spi) {
++        eecd |= E1000_EECD_DO;
++    }
++    do {
++        /* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1",
++         * and then raising and then lowering the clock (the SK bit controls
++         * the clock input to the EEPROM).  A "0" is shifted out to the EEPROM
++         * by setting "DI" to "0" and then raising and then lowering the clock.
++         */
++        eecd &= ~E1000_EECD_DI;
++
++        if (data & mask)
++            eecd |= E1000_EECD_DI;
++
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++
++        usec_delay(eeprom->delay_usec);
++
++        e1000_raise_ee_clk(hw, &eecd);
++        e1000_lower_ee_clk(hw, &eecd);
++
++        mask = mask >> 1;
++
++    } while (mask);
++
++    /* We leave the "DI" bit set to "0" when we leave this routine. */
++    eecd &= ~E1000_EECD_DI;
++    E1000_WRITE_REG(hw, EECD, eecd);
++}
++
++/******************************************************************************
++ * Shift data bits in from the EEPROM
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static uint16_t
++e1000_shift_in_ee_bits(struct e1000_hw *hw,
++                       uint16_t count)
++{
++    uint32_t eecd;
++    uint32_t i;
++    uint16_t data;
++
++    /* In order to read a register from the EEPROM, we need to shift 'count'
++     * bits in from the EEPROM. Bits are "shifted in" by raising the clock
++     * input to the EEPROM (setting the SK bit), and then reading the value of
++     * the "DO" bit.  During this "shifting in" process the "DI" bit should
++     * always be clear.
++     */
++
++    eecd = E1000_READ_REG(hw, EECD);
++
++    eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
++    data = 0;
++
++    for (i = 0; i < count; i++) {
++        data = data << 1;
++        e1000_raise_ee_clk(hw, &eecd);
++
++        eecd = E1000_READ_REG(hw, EECD);
++
++        eecd &= ~(E1000_EECD_DI);
++        if (eecd & E1000_EECD_DO)
++            data |= 1;
++
++        e1000_lower_ee_clk(hw, &eecd);
++    }
++
++    return data;
++}
++
++/******************************************************************************
++ * Prepares EEPROM for access
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This
++ * function should be called before issuing a command to the EEPROM.
++ *****************************************************************************/
++static int32_t
++e1000_acquire_eeprom(struct e1000_hw *hw)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t eecd, i=0;
++
++    DEBUGFUNC("e1000_acquire_eeprom");
++
++    if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM))
++        return -E1000_ERR_SWFW_SYNC;
++    eecd = E1000_READ_REG(hw, EECD);
++
++    if (hw->mac_type != e1000_82573) {
++        /* Request EEPROM Access */
++        if (hw->mac_type > e1000_82544) {
++            eecd |= E1000_EECD_REQ;
++            E1000_WRITE_REG(hw, EECD, eecd);
++            eecd = E1000_READ_REG(hw, EECD);
++            while ((!(eecd & E1000_EECD_GNT)) &&
++                  (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
++                i++;
++                usec_delay(5);
++                eecd = E1000_READ_REG(hw, EECD);
++            }
++            if (!(eecd & E1000_EECD_GNT)) {
++                eecd &= ~E1000_EECD_REQ;
++                E1000_WRITE_REG(hw, EECD, eecd);
++                DEBUGOUT("Could not acquire EEPROM grant\n");
++                e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM);
++                return -E1000_ERR_EEPROM;
++            }
++        }
++    }
++
++    /* Setup EEPROM for Read/Write */
++
++    if (eeprom->type == e1000_eeprom_microwire) {
++        /* Clear SK and DI */
++        eecd &= ~(E1000_EECD_DI | E1000_EECD_SK);
++        E1000_WRITE_REG(hw, EECD, eecd);
++
++        /* Set CS */
++        eecd |= E1000_EECD_CS;
++        E1000_WRITE_REG(hw, EECD, eecd);
++    } else if (eeprom->type == e1000_eeprom_spi) {
++        /* Clear SK and CS */
++        eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++        E1000_WRITE_REG(hw, EECD, eecd);
++        usec_delay(1);
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Returns EEPROM to a "standby" state
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static void
++e1000_standby_eeprom(struct e1000_hw *hw)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t eecd;
++
++    eecd = E1000_READ_REG(hw, EECD);
++
++    if (eeprom->type == e1000_eeprom_microwire) {
++        eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++
++        /* Clock high */
++        eecd |= E1000_EECD_SK;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++
++        /* Select EEPROM */
++        eecd |= E1000_EECD_CS;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++
++        /* Clock low */
++        eecd &= ~E1000_EECD_SK;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++    } else if (eeprom->type == e1000_eeprom_spi) {
++        /* Toggle CS to flush commands */
++        eecd |= E1000_EECD_CS;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++        eecd &= ~E1000_EECD_CS;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(eeprom->delay_usec);
++    }
++}
++
++/******************************************************************************
++ * Terminates a command by inverting the EEPROM's chip select pin
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static void
++e1000_release_eeprom(struct e1000_hw *hw)
++{
++    uint32_t eecd;
++
++    DEBUGFUNC("e1000_release_eeprom");
++
++    eecd = E1000_READ_REG(hw, EECD);
++
++    if (hw->eeprom.type == e1000_eeprom_spi) {
++        eecd |= E1000_EECD_CS;  /* Pull CS high */
++        eecd &= ~E1000_EECD_SK; /* Lower SCK */
++
++        E1000_WRITE_REG(hw, EECD, eecd);
++
++        usec_delay(hw->eeprom.delay_usec);
++    } else if (hw->eeprom.type == e1000_eeprom_microwire) {
++        /* cleanup eeprom */
++
++        /* CS on Microwire is active-high */
++        eecd &= ~(E1000_EECD_CS | E1000_EECD_DI);
++
++        E1000_WRITE_REG(hw, EECD, eecd);
++
++        /* Rising edge of clock */
++        eecd |= E1000_EECD_SK;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(hw->eeprom.delay_usec);
++
++        /* Falling edge of clock */
++        eecd &= ~E1000_EECD_SK;
++        E1000_WRITE_REG(hw, EECD, eecd);
++        E1000_WRITE_FLUSH(hw);
++        usec_delay(hw->eeprom.delay_usec);
++    }
++
++    /* Stop requesting EEPROM access */
++    if (hw->mac_type > e1000_82544) {
++        eecd &= ~E1000_EECD_REQ;
++        E1000_WRITE_REG(hw, EECD, eecd);
++    }
++
++    e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM);
++}
++
++/******************************************************************************
++ * Reads a 16 bit word from the EEPROM.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_spi_eeprom_ready(struct e1000_hw *hw)
++{
++    uint16_t retry_count = 0;
++    uint8_t spi_stat_reg;
++
++    DEBUGFUNC("e1000_spi_eeprom_ready");
++
++    /* Read "Status Register" repeatedly until the LSB is cleared.  The
++     * EEPROM will signal that the command has been completed by clearing
++     * bit 0 of the internal status register.  If it's not cleared within
++     * 5 milliseconds, then error out.
++     */
++    retry_count = 0;
++    do {
++        e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI,
++                                hw->eeprom.opcode_bits);
++        spi_stat_reg = (uint8_t)e1000_shift_in_ee_bits(hw, 8);
++        if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI))
++            break;
++
++        usec_delay(5);
++        retry_count += 5;
++
++        e1000_standby_eeprom(hw);
++    } while (retry_count < EEPROM_MAX_RETRY_SPI);
++
++    /* ATMEL SPI write time could vary from 0-20mSec on 3.3V devices (and
++     * only 0-5mSec on 5V devices)
++     */
++    if (retry_count >= EEPROM_MAX_RETRY_SPI) {
++        DEBUGOUT("SPI EEPROM Status error\n");
++        return -E1000_ERR_EEPROM;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Reads a 16 bit word from the EEPROM.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of  word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_read_eeprom(struct e1000_hw *hw,
++                  uint16_t offset,
++                  uint16_t words,
++                  uint16_t *data)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t i = 0;
++    int32_t ret_val;
++
++    DEBUGFUNC("e1000_read_eeprom");
++
++    /* A check for invalid values:  offset too large, too many words, and not
++     * enough words.
++     */
++    if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) ||
++       (words == 0)) {
++        DEBUGOUT("\"words\" parameter out of bounds\n");
++        return -E1000_ERR_EEPROM;
++    }
++
++    /* FLASH reads without acquiring the semaphore are safe */
++    if (e1000_is_onboard_nvm_eeprom(hw) == TRUE &&
++        hw->eeprom.use_eerd == FALSE) {
++        switch (hw->mac_type) {
++        case e1000_80003es2lan:
++            break;
++        default:
++            /* Prepare the EEPROM for reading  */
++            if (e1000_acquire_eeprom(hw) != E1000_SUCCESS)
++                return -E1000_ERR_EEPROM;
++            break;
++        }
++    }
++
++    if (eeprom->use_eerd == TRUE) {
++        ret_val = e1000_read_eeprom_eerd(hw, offset, words, data);
++        if ((e1000_is_onboard_nvm_eeprom(hw) == TRUE) ||
++            (hw->mac_type != e1000_82573))
++            e1000_release_eeprom(hw);
++        return ret_val;
++    }
++
++    if (eeprom->type == e1000_eeprom_ich8)
++        return e1000_read_eeprom_ich8(hw, offset, words, data);
++
++    if (eeprom->type == e1000_eeprom_spi) {
++        uint16_t word_in;
++        uint8_t read_opcode = EEPROM_READ_OPCODE_SPI;
++
++        if (e1000_spi_eeprom_ready(hw)) {
++            e1000_release_eeprom(hw);
++            return -E1000_ERR_EEPROM;
++        }
++
++        e1000_standby_eeprom(hw);
++
++        /* Some SPI eeproms use the 8th address bit embedded in the opcode */
++        if ((eeprom->address_bits == 8) && (offset >= 128))
++            read_opcode |= EEPROM_A8_OPCODE_SPI;
++
++        /* Send the READ command (opcode + addr)  */
++        e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits);
++        e1000_shift_out_ee_bits(hw, (uint16_t)(offset*2), eeprom->address_bits);
++
++        /* Read the data.  The address of the eeprom internally increments with
++         * each byte (spi) being read, saving on the overhead of eeprom setup
++         * and tear-down.  The address counter will roll over if reading beyond
++         * the size of the eeprom, thus allowing the entire memory to be read
++         * starting from any offset. */
++        for (i = 0; i < words; i++) {
++            word_in = e1000_shift_in_ee_bits(hw, 16);
++            data[i] = (word_in >> 8) | (word_in << 8);
++        }
++    } else if (eeprom->type == e1000_eeprom_microwire) {
++        for (i = 0; i < words; i++) {
++            /* Send the READ command (opcode + addr)  */
++            e1000_shift_out_ee_bits(hw, EEPROM_READ_OPCODE_MICROWIRE,
++                                    eeprom->opcode_bits);
++            e1000_shift_out_ee_bits(hw, (uint16_t)(offset + i),
++                                    eeprom->address_bits);
++
++            /* Read the data.  For microwire, each word requires the overhead
++             * of eeprom setup and tear-down. */
++            data[i] = e1000_shift_in_ee_bits(hw, 16);
++            e1000_standby_eeprom(hw);
++        }
++    }
++
++    /* End this read operation */
++    e1000_release_eeprom(hw);
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Reads a 16 bit word from the EEPROM using the EERD register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of  word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_read_eeprom_eerd(struct e1000_hw *hw,
++                  uint16_t offset,
++                  uint16_t words,
++                  uint16_t *data)
++{
++    uint32_t i, eerd = 0;
++    int32_t error = 0;
++
++    for (i = 0; i < words; i++) {
++        eerd = ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) +
++                         E1000_EEPROM_RW_REG_START;
++
++        E1000_WRITE_REG(hw, EERD, eerd);
++        error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_READ);
++
++        if (error) {
++            break;
++        }
++        data[i] = (E1000_READ_REG(hw, EERD) >> E1000_EEPROM_RW_REG_DATA);
++
++    }
++
++    return error;
++}
++
++/******************************************************************************
++ * Writes a 16 bit word from the EEPROM using the EEWR register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of  word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom_eewr(struct e1000_hw *hw,
++                   uint16_t offset,
++                   uint16_t words,
++                   uint16_t *data)
++{
++    uint32_t    register_value = 0;
++    uint32_t    i              = 0;
++    int32_t     error          = 0;
++
++    if (e1000_swfw_sync_acquire(hw, E1000_SWFW_EEP_SM))
++        return -E1000_ERR_SWFW_SYNC;
++
++    for (i = 0; i < words; i++) {
++        register_value = (data[i] << E1000_EEPROM_RW_REG_DATA) |
++                         ((offset+i) << E1000_EEPROM_RW_ADDR_SHIFT) |
++                         E1000_EEPROM_RW_REG_START;
++
++        error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE);
++        if (error) {
++            break;
++        }
++
++        E1000_WRITE_REG(hw, EEWR, register_value);
++
++        error = e1000_poll_eerd_eewr_done(hw, E1000_EEPROM_POLL_WRITE);
++
++        if (error) {
++            break;
++        }
++    }
++
++    e1000_swfw_sync_release(hw, E1000_SWFW_EEP_SM);
++    return error;
++}
++
++/******************************************************************************
++ * Polls the status bit (bit 1) of the EERD to determine when the read is done.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int eerd)
++{
++    uint32_t attempts = 100000;
++    uint32_t i, reg = 0;
++    int32_t done = E1000_ERR_EEPROM;
++
++    for (i = 0; i < attempts; i++) {
++        if (eerd == E1000_EEPROM_POLL_READ)
++            reg = E1000_READ_REG(hw, EERD);
++        else
++            reg = E1000_READ_REG(hw, EEWR);
++
++        if (reg & E1000_EEPROM_RW_REG_DONE) {
++            done = E1000_SUCCESS;
++            break;
++        }
++        usec_delay(5);
++    }
++
++    return done;
++}
++
++/***************************************************************************
++* Description:     Determines if the onboard NVM is FLASH or EEPROM.
++*
++* hw - Struct containing variables accessed by shared code
++****************************************************************************/
++boolean_t
++e1000_is_onboard_nvm_eeprom(struct e1000_hw *hw)
++{
++    uint32_t eecd = 0;
++
++    DEBUGFUNC("e1000_is_onboard_nvm_eeprom");
++
++    if (hw->mac_type == e1000_ich8lan)
++        return FALSE;
++
++    if (hw->mac_type == e1000_82573) {
++        eecd = E1000_READ_REG(hw, EECD);
++
++        /* Isolate bits 15 & 16 */
++        eecd = ((eecd >> 15) & 0x03);
++
++        /* If both bits are set, device is Flash type */
++        if (eecd == 0x03) {
++            return FALSE;
++        }
++    }
++    return TRUE;
++}
++
++/******************************************************************************
++ * Verifies that the EEPROM has a valid checksum
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Reads the first 64 16 bit words of the EEPROM and sums the values read.
++ * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
++ * valid.
++ *****************************************************************************/
++int32_t
++e1000_validate_eeprom_checksum(struct e1000_hw *hw)
++{
++    uint16_t checksum = 0;
++    uint16_t i, eeprom_data;
++
++    DEBUGFUNC("e1000_validate_eeprom_checksum");
++
++    if ((hw->mac_type == e1000_82573) &&
++        (e1000_is_onboard_nvm_eeprom(hw) == FALSE)) {
++        /* Check bit 4 of word 10h.  If it is 0, firmware is done updating
++         * 10h-12h.  Checksum may need to be fixed. */
++        e1000_read_eeprom(hw, 0x10, 1, &eeprom_data);
++        if ((eeprom_data & 0x10) == 0) {
++            /* Read 0x23 and check bit 15.  This bit is a 1 when the checksum
++             * has already been fixed.  If the checksum is still wrong and this
++             * bit is a 1, we need to return bad checksum.  Otherwise, we need
++             * to set this bit to a 1 and update the checksum. */
++            e1000_read_eeprom(hw, 0x23, 1, &eeprom_data);
++            if ((eeprom_data & 0x8000) == 0) {
++                eeprom_data |= 0x8000;
++                e1000_write_eeprom(hw, 0x23, 1, &eeprom_data);
++                e1000_update_eeprom_checksum(hw);
++            }
++        }
++    }
++
++    if (hw->mac_type == e1000_ich8lan) {
++        /* Drivers must allocate the shadow ram structure for the
++         * EEPROM checksum to be updated.  Otherwise, this bit as well
++         * as the checksum must both be set correctly for this
++         * validation to pass.
++         */
++        e1000_read_eeprom(hw, 0x19, 1, &eeprom_data);
++        if ((eeprom_data & 0x40) == 0) {
++            eeprom_data |= 0x40;
++            e1000_write_eeprom(hw, 0x19, 1, &eeprom_data);
++            e1000_update_eeprom_checksum(hw);
++        }
++    }
++
++    for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
++        if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) {
++            DEBUGOUT("EEPROM Read Error\n");
++            return -E1000_ERR_EEPROM;
++        }
++        checksum += eeprom_data;
++    }
++
++    if (checksum == (uint16_t) EEPROM_SUM)
++        return E1000_SUCCESS;
++    else {
++        DEBUGOUT("EEPROM Checksum Invalid\n");
++        return -E1000_ERR_EEPROM;
++    }
++}
++
++/******************************************************************************
++ * Calculates the EEPROM checksum and writes it to the EEPROM
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA.
++ * Writes the difference to word offset 63 of the EEPROM.
++ *****************************************************************************/
++int32_t
++e1000_update_eeprom_checksum(struct e1000_hw *hw)
++{
++    uint32_t ctrl_ext;
++    uint16_t checksum = 0;
++    uint16_t i, eeprom_data;
++
++    DEBUGFUNC("e1000_update_eeprom_checksum");
++
++    for (i = 0; i < EEPROM_CHECKSUM_REG; i++) {
++        if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) {
++            DEBUGOUT("EEPROM Read Error\n");
++            return -E1000_ERR_EEPROM;
++        }
++        checksum += eeprom_data;
++    }
++    checksum = (uint16_t) EEPROM_SUM - checksum;
++    if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) {
++        DEBUGOUT("EEPROM Write Error\n");
++        return -E1000_ERR_EEPROM;
++    } else if (hw->eeprom.type == e1000_eeprom_flash) {
++        e1000_commit_shadow_ram(hw);
++    } else if (hw->eeprom.type == e1000_eeprom_ich8) {
++        e1000_commit_shadow_ram(hw);
++        /* Reload the EEPROM, or else modifications will not appear
++         * until after next adapter reset. */
++        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++        ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++        msec_delay(10);
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Parent function for writing words to the different EEPROM types.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset within the EEPROM to be written to
++ * words - number of words to write
++ * data - 16 bit word to be written to the EEPROM
++ *
++ * If e1000_update_eeprom_checksum is not called after this function, the
++ * EEPROM will most likely contain an invalid checksum.
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom(struct e1000_hw *hw,
++                   uint16_t offset,
++                   uint16_t words,
++                   uint16_t *data)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    int32_t status = 0;
++
++    DEBUGFUNC("e1000_write_eeprom");
++
++    /* A check for invalid values:  offset too large, too many words, and not
++     * enough words.
++     */
++    if ((offset >= eeprom->word_size) || (words > eeprom->word_size - offset) ||
++       (words == 0)) {
++        DEBUGOUT("\"words\" parameter out of bounds\n");
++        return -E1000_ERR_EEPROM;
++    }
++
++    /* 82573 writes only through eewr */
++    if (eeprom->use_eewr == TRUE)
++        return e1000_write_eeprom_eewr(hw, offset, words, data);
++
++    if (eeprom->type == e1000_eeprom_ich8)
++        return e1000_write_eeprom_ich8(hw, offset, words, data);
++
++    /* Prepare the EEPROM for writing  */
++    if (e1000_acquire_eeprom(hw) != E1000_SUCCESS)
++        return -E1000_ERR_EEPROM;
++
++    if (eeprom->type == e1000_eeprom_microwire) {
++        status = e1000_write_eeprom_microwire(hw, offset, words, data);
++    } else {
++        status = e1000_write_eeprom_spi(hw, offset, words, data);
++        msec_delay(10);
++    }
++
++    /* Done with writing */
++    e1000_release_eeprom(hw);
++
++    return status;
++}
++
++/******************************************************************************
++ * Writes a 16 bit word to a given offset in an SPI EEPROM.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset within the EEPROM to be written to
++ * words - number of words to write
++ * data - pointer to array of 8 bit words to be written to the EEPROM
++ *
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom_spi(struct e1000_hw *hw,
++                       uint16_t offset,
++                       uint16_t words,
++                       uint16_t *data)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint16_t widx = 0;
++
++    DEBUGFUNC("e1000_write_eeprom_spi");
++
++    while (widx < words) {
++        uint8_t write_opcode = EEPROM_WRITE_OPCODE_SPI;
++
++        if (e1000_spi_eeprom_ready(hw)) return -E1000_ERR_EEPROM;
++
++        e1000_standby_eeprom(hw);
++
++        /*  Send the WRITE ENABLE command (8 bit opcode )  */
++        e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI,
++                                    eeprom->opcode_bits);
++
++        e1000_standby_eeprom(hw);
++
++        /* Some SPI eeproms use the 8th address bit embedded in the opcode */
++        if ((eeprom->address_bits == 8) && (offset >= 128))
++            write_opcode |= EEPROM_A8_OPCODE_SPI;
++
++        /* Send the Write command (8-bit opcode + addr) */
++        e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits);
++
++        e1000_shift_out_ee_bits(hw, (uint16_t)((offset + widx)*2),
++                                eeprom->address_bits);
++
++        /* Send the data */
++
++        /* Loop to allow for up to whole page write (32 bytes) of eeprom */
++        while (widx < words) {
++            uint16_t word_out = data[widx];
++            word_out = (word_out >> 8) | (word_out << 8);
++            e1000_shift_out_ee_bits(hw, word_out, 16);
++            widx++;
++
++            /* Some larger eeprom sizes are capable of a 32-byte PAGE WRITE
++             * operation, while the smaller eeproms are capable of an 8-byte
++             * PAGE WRITE operation.  Break the inner loop to pass new address
++             */
++            if ((((offset + widx)*2) % eeprom->page_size) == 0) {
++                e1000_standby_eeprom(hw);
++                break;
++            }
++        }
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Writes a 16 bit word to a given offset in a Microwire EEPROM.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset within the EEPROM to be written to
++ * words - number of words to write
++ * data - pointer to array of 16 bit words to be written to the EEPROM
++ *
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom_microwire(struct e1000_hw *hw,
++                             uint16_t offset,
++                             uint16_t words,
++                             uint16_t *data)
++{
++    struct e1000_eeprom_info *eeprom = &hw->eeprom;
++    uint32_t eecd;
++    uint16_t words_written = 0;
++    uint16_t i = 0;
++
++    DEBUGFUNC("e1000_write_eeprom_microwire");
++
++    /* Send the write enable command to the EEPROM (3-bit opcode plus
++     * 6/8-bit dummy address beginning with 11).  It's less work to include
++     * the 11 of the dummy address as part of the opcode than it is to shift
++     * it over the correct number of bits for the address.  This puts the
++     * EEPROM into write/erase mode.
++     */
++    e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE,
++                            (uint16_t)(eeprom->opcode_bits + 2));
++
++    e1000_shift_out_ee_bits(hw, 0, (uint16_t)(eeprom->address_bits - 2));
++
++    /* Prepare the EEPROM */
++    e1000_standby_eeprom(hw);
++
++    while (words_written < words) {
++        /* Send the Write command (3-bit opcode + addr) */
++        e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE,
++                                eeprom->opcode_bits);
++
++        e1000_shift_out_ee_bits(hw, (uint16_t)(offset + words_written),
++                                eeprom->address_bits);
++
++        /* Send the data */
++        e1000_shift_out_ee_bits(hw, data[words_written], 16);
++
++        /* Toggle the CS line.  This in effect tells the EEPROM to execute
++         * the previous command.
++         */
++        e1000_standby_eeprom(hw);
++
++        /* Read DO repeatedly until it is high (equal to '1').  The EEPROM will
++         * signal that the command has been completed by raising the DO signal.
++         * If DO does not go high in 10 milliseconds, then error out.
++         */
++        for (i = 0; i < 200; i++) {
++            eecd = E1000_READ_REG(hw, EECD);
++            if (eecd & E1000_EECD_DO) break;
++            usec_delay(50);
++        }
++        if (i == 200) {
++            DEBUGOUT("EEPROM Write did not complete\n");
++            return -E1000_ERR_EEPROM;
++        }
++
++        /* Recover from write */
++        e1000_standby_eeprom(hw);
++
++        words_written++;
++    }
++
++    /* Send the write disable command to the EEPROM (3-bit opcode plus
++     * 6/8-bit dummy address beginning with 10).  It's less work to include
++     * the 10 of the dummy address as part of the opcode than it is to shift
++     * it over the correct number of bits for the address.  This takes the
++     * EEPROM out of write/erase mode.
++     */
++    e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE,
++                            (uint16_t)(eeprom->opcode_bits + 2));
++
++    e1000_shift_out_ee_bits(hw, 0, (uint16_t)(eeprom->address_bits - 2));
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Flushes the cached eeprom to NVM. This is done by saving the modified values
++ * in the eeprom cache and the non modified values in the currently active bank
++ * to the new bank.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of  word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_commit_shadow_ram(struct e1000_hw *hw)
++{
++    uint32_t attempts = 100000;
++    uint32_t eecd = 0;
++    uint32_t flop = 0;
++    uint32_t i = 0;
++    int32_t error = E1000_SUCCESS;
++    uint32_t old_bank_offset = 0;
++    uint32_t new_bank_offset = 0;
++    uint32_t sector_retries = 0;
++    uint8_t low_byte = 0;
++    uint8_t high_byte = 0;
++    uint8_t temp_byte = 0;
++    boolean_t sector_write_failed = FALSE;
++
++    if (hw->mac_type == e1000_82573) {
++        /* The flop register will be used to determine if flash type is STM */
++        flop = E1000_READ_REG(hw, FLOP);
++        for (i=0; i < attempts; i++) {
++            eecd = E1000_READ_REG(hw, EECD);
++            if ((eecd & E1000_EECD_FLUPD) == 0) {
++                break;
++            }
++            usec_delay(5);
++        }
++
++        if (i == attempts) {
++            return -E1000_ERR_EEPROM;
++        }
++
++        /* If STM opcode located in bits 15:8 of flop, reset firmware */
++        if ((flop & 0xFF00) == E1000_STM_OPCODE) {
++            E1000_WRITE_REG(hw, HICR, E1000_HICR_FW_RESET);
++        }
++
++        /* Perform the flash update */
++        E1000_WRITE_REG(hw, EECD, eecd | E1000_EECD_FLUPD);
++
++        for (i=0; i < attempts; i++) {
++            eecd = E1000_READ_REG(hw, EECD);
++            if ((eecd & E1000_EECD_FLUPD) == 0) {
++                break;
++            }
++            usec_delay(5);
++        }
++
++        if (i == attempts) {
++            return -E1000_ERR_EEPROM;
++        }
++    }
++
++    if (hw->mac_type == e1000_ich8lan && hw->eeprom_shadow_ram != NULL) {
++        /* We're writing to the opposite bank so if we're on bank 1,
++         * write to bank 0 etc.  We also need to erase the segment that
++         * is going to be written */
++        if (!(E1000_READ_REG(hw, EECD) & E1000_EECD_SEC1VAL)) {
++            new_bank_offset = hw->flash_bank_size * 2;
++            old_bank_offset = 0;
++            e1000_erase_ich8_4k_segment(hw, 1);
++        } else {
++            old_bank_offset = hw->flash_bank_size * 2;
++            new_bank_offset = 0;
++            e1000_erase_ich8_4k_segment(hw, 0);
++        }
++
++        do {
++            sector_write_failed = FALSE;
++            /* Loop for every byte in the shadow RAM,
++             * which is in units of words. */
++            for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) {
++                /* Determine whether to write the value stored
++                 * in the other NVM bank or a modified value stored
++                 * in the shadow RAM */
++                if (hw->eeprom_shadow_ram[i].modified == TRUE) {
++                    low_byte = (uint8_t)hw->eeprom_shadow_ram[i].eeprom_word;
++                    e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset,
++                                         &temp_byte);
++                    usec_delay(100);
++                    error = e1000_verify_write_ich8_byte(hw,
++                                                 (i << 1) + new_bank_offset,
++                                                 low_byte);
++                    if (error != E1000_SUCCESS)
++                        sector_write_failed = TRUE;
++                    high_byte =
++                        (uint8_t)(hw->eeprom_shadow_ram[i].eeprom_word >> 8);
++                    e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset + 1,
++                                         &temp_byte);
++                    usec_delay(100);
++                } else {
++                    e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset,
++                                         &low_byte);
++                    usec_delay(100);
++                    error = e1000_verify_write_ich8_byte(hw,
++                                 (i << 1) + new_bank_offset, low_byte);
++                    if (error != E1000_SUCCESS)
++                        sector_write_failed = TRUE;
++                    e1000_read_ich8_byte(hw, (i << 1) + old_bank_offset + 1,
++                                         &high_byte);
++                }
++
++                /* If the word is 0x13, then make sure the signature bits
++                 * (15:14) are 11b until the commit has completed.
++                 * This will allow us to write 10b which indicates the
++                 * signature is valid.  We want to do this after the write
++                 * has completed so that we don't mark the segment valid
++                 * while the write is still in progress */
++                if (i == E1000_ICH8_NVM_SIG_WORD)
++                    high_byte = E1000_ICH8_NVM_SIG_MASK | high_byte;
++
++                error = e1000_verify_write_ich8_byte(hw,
++                             (i << 1) + new_bank_offset + 1, high_byte);
++                if (error != E1000_SUCCESS)
++                    sector_write_failed = TRUE;
++
++                if (sector_write_failed == FALSE) {
++                    /* Clear the now not used entry in the cache */
++                    hw->eeprom_shadow_ram[i].modified = FALSE;
++                    hw->eeprom_shadow_ram[i].eeprom_word = 0xFFFF;
++                }
++            }
++
++            /* Don't bother writing the segment valid bits if sector
++             * programming failed. */
++            if (sector_write_failed == FALSE) {
++                /* Finally validate the new segment by setting bit 15:14
++                 * to 10b in word 0x13 , this can be done without an
++                 * erase as well since these bits are 11 to start with
++                 * and we need to change bit 14 to 0b */
++                e1000_read_ich8_byte(hw,
++                    E1000_ICH8_NVM_SIG_WORD * 2 + 1 + new_bank_offset,
++                    &high_byte);
++                high_byte &= 0xBF;
++                error = e1000_verify_write_ich8_byte(hw,
++                            E1000_ICH8_NVM_SIG_WORD * 2 + 1 + new_bank_offset,
++                            high_byte);
++                if (error != E1000_SUCCESS)
++                    sector_write_failed = TRUE;
++
++                /* And invalidate the previously valid segment by setting
++                 * its signature word (0x13) high_byte to 0b. This can be
++                 * done without an erase because flash erase sets all bits
++                 * to 1's. We can write 1's to 0's without an erase */
++                error = e1000_verify_write_ich8_byte(hw,
++                            E1000_ICH8_NVM_SIG_WORD * 2 + 1 + old_bank_offset,
++                            0);
++                if (error != E1000_SUCCESS)
++                    sector_write_failed = TRUE;
++            }
++        } while (++sector_retries < 10 && sector_write_failed == TRUE);
++    }
++
++    return error;
++}
++
++/******************************************************************************
++ * Reads the adapter's part number from the EEPROM
++ *
++ * hw - Struct containing variables accessed by shared code
++ * part_num - Adapter's part number
++ *****************************************************************************/
++int32_t
++e1000_read_part_num(struct e1000_hw *hw,
++                    uint32_t *part_num)
++{
++    uint16_t offset = EEPROM_PBA_BYTE_1;
++    uint16_t eeprom_data;
++
++    DEBUGFUNC("e1000_read_part_num");
++
++    /* Get word 0 from EEPROM */
++    if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) {
++        DEBUGOUT("EEPROM Read Error\n");
++        return -E1000_ERR_EEPROM;
++    }
++    /* Save word 0 in upper half of part_num */
++    *part_num = (uint32_t) (eeprom_data << 16);
++
++    /* Get word 1 from EEPROM */
++    if (e1000_read_eeprom(hw, ++offset, 1, &eeprom_data) < 0) {
++        DEBUGOUT("EEPROM Read Error\n");
++        return -E1000_ERR_EEPROM;
++    }
++    /* Save word 1 in lower half of part_num */
++    *part_num |= eeprom_data;
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the
++ * second function of dual function devices
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_read_mac_addr(struct e1000_hw * hw)
++{
++    uint16_t offset;
++    uint16_t eeprom_data, i;
++
++    DEBUGFUNC("e1000_read_mac_addr");
++
++    for (i = 0; i < NODE_ADDRESS_SIZE; i += 2) {
++        offset = i >> 1;
++        if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) {
++            DEBUGOUT("EEPROM Read Error\n");
++            return -E1000_ERR_EEPROM;
++        }
++        hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF);
++        hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8);
++    }
++
++    switch (hw->mac_type) {
++    default:
++        break;
++    case e1000_82546:
++    case e1000_82546_rev_3:
++    case e1000_82571:
++    case e1000_80003es2lan:
++        if (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
++            hw->perm_mac_addr[5] ^= 0x01;
++        break;
++    }
++
++    for (i = 0; i < NODE_ADDRESS_SIZE; i++)
++        hw->mac_addr[i] = hw->perm_mac_addr[i];
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Initializes receive address filters.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Places the MAC address in receive address register 0 and clears the rest
++ * of the receive addresss registers. Clears the multicast table. Assumes
++ * the receiver is in reset when the routine is called.
++ *****************************************************************************/
++void
++e1000_init_rx_addrs(struct e1000_hw *hw)
++{
++    uint32_t i;
++    uint32_t rar_num;
++
++    DEBUGFUNC("e1000_init_rx_addrs");
++
++    /* Setup the receive address. */
++    DEBUGOUT("Programming MAC Address into RAR[0]\n");
++
++    e1000_rar_set(hw, hw->mac_addr, 0);
++
++    rar_num = E1000_RAR_ENTRIES;
++
++    /* Reserve a spot for the Locally Administered Address to work around
++     * an 82571 issue in which a reset on one port will reload the MAC on
++     * the other port. */
++    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
++        rar_num -= 1;
++    if (hw->mac_type == e1000_ich8lan)
++        rar_num = E1000_RAR_ENTRIES_ICH8LAN;
++
++    /* Zero out the other 15 receive addresses. */
++    DEBUGOUT("Clearing RAR[1-15]\n");
++    for (i = 1; i < rar_num; i++) {
++        E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
++        E1000_WRITE_FLUSH(hw);
++        E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
++        E1000_WRITE_FLUSH(hw);
++    }
++}
++
++/******************************************************************************
++ * Updates the MAC's list of multicast addresses.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * mc_addr_list - the list of new multicast addresses
++ * mc_addr_count - number of addresses
++ * pad - number of bytes between addresses in the list
++ * rar_used_count - offset where to start adding mc addresses into the RAR's
++ *
++ * The given list replaces any existing list. Clears the last 15 receive
++ * address registers and the multicast table. Uses receive address registers
++ * for the first 15 multicast addresses, and hashes the rest into the
++ * multicast table.
++ *****************************************************************************/
++void
++e1000_mc_addr_list_update(struct e1000_hw *hw,
++                          uint8_t *mc_addr_list,
++                          uint32_t mc_addr_count,
++                          uint32_t pad,
++                          uint32_t rar_used_count)
++{
++    uint32_t hash_value;
++    uint32_t i;
++    uint32_t num_rar_entry;
++    uint32_t num_mta_entry;
++
++    DEBUGFUNC("e1000_mc_addr_list_update");
++
++    /* Set the new number of MC addresses that we are being requested to use. */
++    hw->num_mc_addrs = mc_addr_count;
++
++    /* Clear RAR[1-15] */
++    DEBUGOUT(" Clearing RAR[1-15]\n");
++    num_rar_entry = E1000_RAR_ENTRIES;
++    if (hw->mac_type == e1000_ich8lan)
++        num_rar_entry = E1000_RAR_ENTRIES_ICH8LAN;
++    /* Reserve a spot for the Locally Administered Address to work around
++     * an 82571 issue in which a reset on one port will reload the MAC on
++     * the other port. */
++    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
++        num_rar_entry -= 1;
++
++    for (i = rar_used_count; i < num_rar_entry; i++) {
++        E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
++        E1000_WRITE_FLUSH(hw);
++        E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
++        E1000_WRITE_FLUSH(hw);
++    }
++
++    /* Clear the MTA */
++    DEBUGOUT(" Clearing MTA\n");
++    num_mta_entry = E1000_NUM_MTA_REGISTERS;
++    if (hw->mac_type == e1000_ich8lan)
++        num_mta_entry = E1000_NUM_MTA_REGISTERS_ICH8LAN;
++    for (i = 0; i < num_mta_entry; i++) {
++        E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
++        E1000_WRITE_FLUSH(hw);
++    }
++
++    /* Add the new addresses */
++    for (i = 0; i < mc_addr_count; i++) {
++        DEBUGOUT(" Adding the multicast addresses:\n");
++        DEBUGOUT7(" MC Addr #%d =%.2X %.2X %.2X %.2X %.2X %.2X\n", i,
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad)],
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 1],
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 2],
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 3],
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 4],
++                  mc_addr_list[i * (ETH_LENGTH_OF_ADDRESS + pad) + 5]);
++
++        hash_value = e1000_hash_mc_addr(hw,
++                                        mc_addr_list +
++                                        (i * (ETH_LENGTH_OF_ADDRESS + pad)));
++
++        DEBUGOUT1(" Hash value = 0x%03X\n", hash_value);
++
++        /* Place this multicast address in the RAR if there is room, *
++         * else put it in the MTA
++         */
++        if (rar_used_count < num_rar_entry) {
++            e1000_rar_set(hw,
++                          mc_addr_list + (i * (ETH_LENGTH_OF_ADDRESS + pad)),
++                          rar_used_count);
++            rar_used_count++;
++        } else {
++            e1000_mta_set(hw, hash_value);
++        }
++    }
++    DEBUGOUT("MC Update Complete\n");
++}
++
++/******************************************************************************
++ * Hashes an address to determine its location in the multicast table
++ *
++ * hw - Struct containing variables accessed by shared code
++ * mc_addr - the multicast address to hash
++ *****************************************************************************/
++uint32_t
++e1000_hash_mc_addr(struct e1000_hw *hw,
++                   uint8_t *mc_addr)
++{
++    uint32_t hash_value = 0;
++
++    /* The portion of the address that is used for the hash table is
++     * determined by the mc_filter_type setting.
++     */
++    switch (hw->mc_filter_type) {
++    /* [0] [1] [2] [3] [4] [5]
++     * 01  AA  00  12  34  56
++     * LSB                 MSB
++     */
++    case 0:
++        if (hw->mac_type == e1000_ich8lan) {
++            /* [47:38] i.e. 0x158 for above example address */
++            hash_value = ((mc_addr[4] >> 6) | (((uint16_t) mc_addr[5]) << 2));
++        } else {
++            /* [47:36] i.e. 0x563 for above example address */
++            hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4));
++        }
++        break;
++    case 1:
++        if (hw->mac_type == e1000_ich8lan) {
++            /* [46:37] i.e. 0x2B1 for above example address */
++            hash_value = ((mc_addr[4] >> 5) | (((uint16_t) mc_addr[5]) << 3));
++        } else {
++            /* [46:35] i.e. 0xAC6 for above example address */
++            hash_value = ((mc_addr[4] >> 3) | (((uint16_t) mc_addr[5]) << 5));
++        }
++        break;
++    case 2:
++        if (hw->mac_type == e1000_ich8lan) {
++            /*[45:36] i.e. 0x163 for above example address */
++            hash_value = ((mc_addr[4] >> 4) | (((uint16_t) mc_addr[5]) << 4));
++        } else {
++            /* [45:34] i.e. 0x5D8 for above example address */
++            hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6));
++        }
++        break;
++    case 3:
++        if (hw->mac_type == e1000_ich8lan) {
++            /* [43:34] i.e. 0x18D for above example address */
++            hash_value = ((mc_addr[4] >> 2) | (((uint16_t) mc_addr[5]) << 6));
++        } else {
++            /* [43:32] i.e. 0x634 for above example address */
++            hash_value = ((mc_addr[4]) | (((uint16_t) mc_addr[5]) << 8));
++        }
++        break;
++    }
++
++    hash_value &= 0xFFF;
++    if (hw->mac_type == e1000_ich8lan)
++        hash_value &= 0x3FF;
++
++    return hash_value;
++}
++
++/******************************************************************************
++ * Sets the bit in the multicast table corresponding to the hash value.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * hash_value - Multicast address hash value
++ *****************************************************************************/
++void
++e1000_mta_set(struct e1000_hw *hw,
++              uint32_t hash_value)
++{
++    uint32_t hash_bit, hash_reg;
++    uint32_t mta;
++    uint32_t temp;
++
++    /* The MTA is a register array of 128 32-bit registers.
++     * It is treated like an array of 4096 bits.  We want to set
++     * bit BitArray[hash_value]. So we figure out what register
++     * the bit is in, read it, OR in the new bit, then write
++     * back the new value.  The register is determined by the
++     * upper 7 bits of the hash value and the bit within that
++     * register are determined by the lower 5 bits of the value.
++     */
++    hash_reg = (hash_value >> 5) & 0x7F;
++    if (hw->mac_type == e1000_ich8lan)
++        hash_reg &= 0x1F;
++    hash_bit = hash_value & 0x1F;
++
++    mta = E1000_READ_REG_ARRAY(hw, MTA, hash_reg);
++
++    mta |= (1 << hash_bit);
++
++    /* If we are on an 82544 and we are trying to write an odd offset
++     * in the MTA, save off the previous entry before writing and
++     * restore the old value after writing.
++     */
++    if ((hw->mac_type == e1000_82544) && ((hash_reg & 0x1) == 1)) {
++        temp = E1000_READ_REG_ARRAY(hw, MTA, (hash_reg - 1));
++        E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
++        E1000_WRITE_FLUSH(hw);
++        E1000_WRITE_REG_ARRAY(hw, MTA, (hash_reg - 1), temp);
++        E1000_WRITE_FLUSH(hw);
++    } else {
++        E1000_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta);
++        E1000_WRITE_FLUSH(hw);
++    }
++}
++
++/******************************************************************************
++ * Puts an ethernet address into a receive address register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * addr - Address to put into receive address register
++ * index - Receive address register to write
++ *****************************************************************************/
++void
++e1000_rar_set(struct e1000_hw *hw,
++              uint8_t *addr,
++              uint32_t index)
++{
++    uint32_t rar_low, rar_high;
++
++    /* HW expects these in little endian so we reverse the byte order
++     * from network order (big endian) to little endian
++     */
++    rar_low = ((uint32_t) addr[0] |
++               ((uint32_t) addr[1] << 8) |
++               ((uint32_t) addr[2] << 16) | ((uint32_t) addr[3] << 24));
++    rar_high = ((uint32_t) addr[4] | ((uint32_t) addr[5] << 8));
++
++    /* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx
++     * unit hang.
++     *
++     * Description:
++     * If there are any Rx frames queued up or otherwise present in the HW
++     * before RSS is enabled, and then we enable RSS, the HW Rx unit will
++     * hang.  To work around this issue, we have to disable receives and
++     * flush out all Rx frames before we enable RSS. To do so, we modify we
++     * redirect all Rx traffic to manageability and then reset the HW.
++     * This flushes away Rx frames, and (since the redirections to
++     * manageability persists across resets) keeps new ones from coming in
++     * while we work.  Then, we clear the Address Valid AV bit for all MAC
++     * addresses and undo the re-direction to manageability.
++     * Now, frames are coming in again, but the MAC won't accept them, so
++     * far so good.  We now proceed to initialize RSS (if necessary) and
++     * configure the Rx unit.  Last, we re-enable the AV bits and continue
++     * on our merry way.
++     */
++    switch (hw->mac_type) {
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_80003es2lan:
++        if (hw->leave_av_bit_off == TRUE)
++            break;
++    default:
++        /* Indicate to hardware the Address is Valid. */
++        rar_high |= E1000_RAH_AV;
++        break;
++    }
++
++    E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low);
++    E1000_WRITE_FLUSH(hw);
++    E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high);
++    E1000_WRITE_FLUSH(hw);
++}
++
++/******************************************************************************
++ * Writes a value to the specified offset in the VLAN filter table.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - Offset in VLAN filer table to write
++ * value - Value to write into VLAN filter table
++ *****************************************************************************/
++void
++e1000_write_vfta(struct e1000_hw *hw,
++                 uint32_t offset,
++                 uint32_t value)
++{
++    uint32_t temp;
++
++    if (hw->mac_type == e1000_ich8lan)
++        return;
++
++    if ((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) {
++        temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1));
++        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
++        E1000_WRITE_FLUSH(hw);
++        E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp);
++        E1000_WRITE_FLUSH(hw);
++    } else {
++        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
++        E1000_WRITE_FLUSH(hw);
++    }
++}
++
++/******************************************************************************
++ * Clears the VLAN filer table
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++void
++e1000_clear_vfta(struct e1000_hw *hw)
++{
++    uint32_t offset;
++    uint32_t vfta_value = 0;
++    uint32_t vfta_offset = 0;
++    uint32_t vfta_bit_in_reg = 0;
++
++    if (hw->mac_type == e1000_ich8lan)
++        return;
++
++    if (hw->mac_type == e1000_82573) {
++        if (hw->mng_cookie.vlan_id != 0) {
++            /* The VFTA is a 4096b bit-field, each identifying a single VLAN
++             * ID.  The following operations determine which 32b entry
++             * (i.e. offset) into the array we want to set the VLAN ID
++             * (i.e. bit) of the manageability unit. */
++            vfta_offset = (hw->mng_cookie.vlan_id >>
++                           E1000_VFTA_ENTRY_SHIFT) &
++                          E1000_VFTA_ENTRY_MASK;
++            vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id &
++                                    E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
++        }
++    }
++    for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++        /* If the offset we want to clear is the same offset of the
++         * manageability VLAN ID, then clear all bits except that of the
++         * manageability unit */
++        vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
++        E1000_WRITE_REG_ARRAY(hw, VFTA, offset, vfta_value);
++        E1000_WRITE_FLUSH(hw);
++    }
++}
++
++int32_t
++e1000_id_led_init(struct e1000_hw * hw)
++{
++    uint32_t ledctl;
++    const uint32_t ledctl_mask = 0x000000FF;
++    const uint32_t ledctl_on = E1000_LEDCTL_MODE_LED_ON;
++    const uint32_t ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
++    uint16_t eeprom_data, i, temp;
++    const uint16_t led_mask = 0x0F;
++
++    DEBUGFUNC("e1000_id_led_init");
++
++    if (hw->mac_type < e1000_82540) {
++        /* Nothing to do */
++        return E1000_SUCCESS;
++    }
++
++    ledctl = E1000_READ_REG(hw, LEDCTL);
++    hw->ledctl_default = ledctl;
++    hw->ledctl_mode1 = hw->ledctl_default;
++    hw->ledctl_mode2 = hw->ledctl_default;
++
++    if (e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, 1, &eeprom_data) < 0) {
++        DEBUGOUT("EEPROM Read Error\n");
++        return -E1000_ERR_EEPROM;
++    }
++
++    if ((hw->mac_type == e1000_82573) &&
++        (eeprom_data == ID_LED_RESERVED_82573))
++        eeprom_data = ID_LED_DEFAULT_82573;
++    else if ((eeprom_data == ID_LED_RESERVED_0000) ||
++            (eeprom_data == ID_LED_RESERVED_FFFF)) {
++        if (hw->mac_type == e1000_ich8lan)
++            eeprom_data = ID_LED_DEFAULT_ICH8LAN;
++        else
++            eeprom_data = ID_LED_DEFAULT;
++    }
++    for (i = 0; i < 4; i++) {
++        temp = (eeprom_data >> (i << 2)) & led_mask;
++        switch (temp) {
++        case ID_LED_ON1_DEF2:
++        case ID_LED_ON1_ON2:
++        case ID_LED_ON1_OFF2:
++            hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++            hw->ledctl_mode1 |= ledctl_on << (i << 3);
++            break;
++        case ID_LED_OFF1_DEF2:
++        case ID_LED_OFF1_ON2:
++        case ID_LED_OFF1_OFF2:
++            hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++            hw->ledctl_mode1 |= ledctl_off << (i << 3);
++            break;
++        default:
++            /* Do nothing */
++            break;
++        }
++        switch (temp) {
++        case ID_LED_DEF1_ON2:
++        case ID_LED_ON1_ON2:
++        case ID_LED_OFF1_ON2:
++            hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++            hw->ledctl_mode2 |= ledctl_on << (i << 3);
++            break;
++        case ID_LED_DEF1_OFF2:
++        case ID_LED_ON1_OFF2:
++        case ID_LED_OFF1_OFF2:
++            hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++            hw->ledctl_mode2 |= ledctl_off << (i << 3);
++            break;
++        default:
++            /* Do nothing */
++            break;
++        }
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Prepares SW controlable LED for use and saves the current state of the LED.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_setup_led(struct e1000_hw *hw)
++{
++    uint32_t ledctl;
++    int32_t ret_val = E1000_SUCCESS;
++
++    DEBUGFUNC("e1000_setup_led");
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++    case e1000_82543:
++    case e1000_82544:
++        /* No setup necessary */
++        break;
++    case e1000_82541:
++    case e1000_82547:
++    case e1000_82541_rev_2:
++    case e1000_82547_rev_2:
++        /* Turn off PHY Smart Power Down (if enabled) */
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO,
++                                     &hw->phy_spd_default);
++        if (ret_val)
++            return ret_val;
++        ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
++                                      (uint16_t)(hw->phy_spd_default &
++                                      ~IGP01E1000_GMII_SPD));
++        if (ret_val)
++            return ret_val;
++        /* Fall Through */
++    default:
++        if (hw->media_type == e1000_media_type_fiber) {
++            ledctl = E1000_READ_REG(hw, LEDCTL);
++            /* Save current LEDCTL settings */
++            hw->ledctl_default = ledctl;
++            /* Turn off LED0 */
++            ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
++                        E1000_LEDCTL_LED0_BLINK |
++                        E1000_LEDCTL_LED0_MODE_MASK);
++            ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
++                       E1000_LEDCTL_LED0_MODE_SHIFT);
++            E1000_WRITE_REG(hw, LEDCTL, ledctl);
++        } else if (hw->media_type == e1000_media_type_copper)
++            E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
++        break;
++    }
++
++    return E1000_SUCCESS;
++}
++
++
++/******************************************************************************
++ * Used on 82571 and later Si that has LED blink bits.
++ * Callers must use their own timer and should have already called
++ * e1000_id_led_init()
++ * Call e1000_cleanup led() to stop blinking
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_blink_led_start(struct e1000_hw *hw)
++{
++    int16_t  i;
++    uint32_t ledctl_blink = 0;
++
++    DEBUGFUNC("e1000_id_led_blink_on");
++
++    if (hw->mac_type < e1000_82571) {
++        /* Nothing to do */
++        return E1000_SUCCESS;
++    }
++    if (hw->media_type == e1000_media_type_fiber) {
++        /* always blink LED0 for PCI-E fiber */
++        ledctl_blink = E1000_LEDCTL_LED0_BLINK |
++                     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
++    } else {
++        /* set the blink bit for each LED that's "on" (0x0E) in ledctl_mode2 */
++        ledctl_blink = hw->ledctl_mode2;
++        for (i=0; i < 4; i++)
++            if (((hw->ledctl_mode2 >> (i * 8)) & 0xFF) ==
++                E1000_LEDCTL_MODE_LED_ON)
++                ledctl_blink |= (E1000_LEDCTL_LED0_BLINK << (i * 8));
++    }
++
++    E1000_WRITE_REG(hw, LEDCTL, ledctl_blink);
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Restores the saved state of the SW controlable LED.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_cleanup_led(struct e1000_hw *hw)
++{
++    int32_t ret_val = E1000_SUCCESS;
++
++    DEBUGFUNC("e1000_cleanup_led");
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++    case e1000_82543:
++    case e1000_82544:
++        /* No cleanup necessary */
++        break;
++    case e1000_82541:
++    case e1000_82547:
++    case e1000_82541_rev_2:
++    case e1000_82547_rev_2:
++        /* Turn on PHY Smart Power Down (if previously enabled) */
++        ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
++                                      hw->phy_spd_default);
++        if (ret_val)
++            return ret_val;
++        /* Fall Through */
++    default:
++        if (hw->phy_type == e1000_phy_ife) {
++            e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0);
++            break;
++        }
++        /* Restore LEDCTL settings */
++        E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_default);
++        break;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Turns on the software controllable LED
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_led_on(struct e1000_hw *hw)
++{
++    uint32_t ctrl = E1000_READ_REG(hw, CTRL);
++
++    DEBUGFUNC("e1000_led_on");
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++    case e1000_82543:
++        /* Set SW Defineable Pin 0 to turn on the LED */
++        ctrl |= E1000_CTRL_SWDPIN0;
++        ctrl |= E1000_CTRL_SWDPIO0;
++        break;
++    case e1000_82544:
++        if (hw->media_type == e1000_media_type_fiber) {
++            /* Set SW Defineable Pin 0 to turn on the LED */
++            ctrl |= E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        } else {
++            /* Clear SW Defineable Pin 0 to turn on the LED */
++            ctrl &= ~E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        }
++        break;
++    default:
++        if (hw->media_type == e1000_media_type_fiber) {
++            /* Clear SW Defineable Pin 0 to turn on the LED */
++            ctrl &= ~E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        } else if (hw->phy_type == e1000_phy_ife) {
++            e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED,
++                 (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON));
++        } else if (hw->media_type == e1000_media_type_copper) {
++            E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode2);
++            return E1000_SUCCESS;
++        }
++        break;
++    }
++
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Turns off the software controllable LED
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++int32_t
++e1000_led_off(struct e1000_hw *hw)
++{
++    uint32_t ctrl = E1000_READ_REG(hw, CTRL);
++
++    DEBUGFUNC("e1000_led_off");
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++    case e1000_82543:
++        /* Clear SW Defineable Pin 0 to turn off the LED */
++        ctrl &= ~E1000_CTRL_SWDPIN0;
++        ctrl |= E1000_CTRL_SWDPIO0;
++        break;
++    case e1000_82544:
++        if (hw->media_type == e1000_media_type_fiber) {
++            /* Clear SW Defineable Pin 0 to turn off the LED */
++            ctrl &= ~E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        } else {
++            /* Set SW Defineable Pin 0 to turn off the LED */
++            ctrl |= E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        }
++        break;
++    default:
++        if (hw->media_type == e1000_media_type_fiber) {
++            /* Set SW Defineable Pin 0 to turn off the LED */
++            ctrl |= E1000_CTRL_SWDPIN0;
++            ctrl |= E1000_CTRL_SWDPIO0;
++        } else if (hw->phy_type == e1000_phy_ife) {
++            e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED,
++                 (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF));
++        } else if (hw->media_type == e1000_media_type_copper) {
++            E1000_WRITE_REG(hw, LEDCTL, hw->ledctl_mode1);
++            return E1000_SUCCESS;
++        }
++        break;
++    }
++
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Clears all hardware statistics counters.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++void
++e1000_clear_hw_cntrs(struct e1000_hw *hw)
++{
++    volatile uint32_t temp;
++
++    temp = E1000_READ_REG(hw, CRCERRS);
++    temp = E1000_READ_REG(hw, SYMERRS);
++    temp = E1000_READ_REG(hw, MPC);
++    temp = E1000_READ_REG(hw, SCC);
++    temp = E1000_READ_REG(hw, ECOL);
++    temp = E1000_READ_REG(hw, MCC);
++    temp = E1000_READ_REG(hw, LATECOL);
++    temp = E1000_READ_REG(hw, COLC);
++    temp = E1000_READ_REG(hw, DC);
++    temp = E1000_READ_REG(hw, SEC);
++    temp = E1000_READ_REG(hw, RLEC);
++    temp = E1000_READ_REG(hw, XONRXC);
++    temp = E1000_READ_REG(hw, XONTXC);
++    temp = E1000_READ_REG(hw, XOFFRXC);
++    temp = E1000_READ_REG(hw, XOFFTXC);
++    temp = E1000_READ_REG(hw, FCRUC);
++
++    if (hw->mac_type != e1000_ich8lan) {
++    temp = E1000_READ_REG(hw, PRC64);
++    temp = E1000_READ_REG(hw, PRC127);
++    temp = E1000_READ_REG(hw, PRC255);
++    temp = E1000_READ_REG(hw, PRC511);
++    temp = E1000_READ_REG(hw, PRC1023);
++    temp = E1000_READ_REG(hw, PRC1522);
++    }
++
++    temp = E1000_READ_REG(hw, GPRC);
++    temp = E1000_READ_REG(hw, BPRC);
++    temp = E1000_READ_REG(hw, MPRC);
++    temp = E1000_READ_REG(hw, GPTC);
++    temp = E1000_READ_REG(hw, GORCL);
++    temp = E1000_READ_REG(hw, GORCH);
++    temp = E1000_READ_REG(hw, GOTCL);
++    temp = E1000_READ_REG(hw, GOTCH);
++    temp = E1000_READ_REG(hw, RNBC);
++    temp = E1000_READ_REG(hw, RUC);
++    temp = E1000_READ_REG(hw, RFC);
++    temp = E1000_READ_REG(hw, ROC);
++    temp = E1000_READ_REG(hw, RJC);
++    temp = E1000_READ_REG(hw, TORL);
++    temp = E1000_READ_REG(hw, TORH);
++    temp = E1000_READ_REG(hw, TOTL);
++    temp = E1000_READ_REG(hw, TOTH);
++    temp = E1000_READ_REG(hw, TPR);
++    temp = E1000_READ_REG(hw, TPT);
++
++    if (hw->mac_type != e1000_ich8lan) {
++    temp = E1000_READ_REG(hw, PTC64);
++    temp = E1000_READ_REG(hw, PTC127);
++    temp = E1000_READ_REG(hw, PTC255);
++    temp = E1000_READ_REG(hw, PTC511);
++    temp = E1000_READ_REG(hw, PTC1023);
++    temp = E1000_READ_REG(hw, PTC1522);
++    }
++
++    temp = E1000_READ_REG(hw, MPTC);
++    temp = E1000_READ_REG(hw, BPTC);
++
++    if (hw->mac_type < e1000_82543) return;
++
++    temp = E1000_READ_REG(hw, ALGNERRC);
++    temp = E1000_READ_REG(hw, RXERRC);
++    temp = E1000_READ_REG(hw, TNCRS);
++    temp = E1000_READ_REG(hw, CEXTERR);
++    temp = E1000_READ_REG(hw, TSCTC);
++    temp = E1000_READ_REG(hw, TSCTFC);
++
++    if (hw->mac_type <= e1000_82544) return;
++
++    temp = E1000_READ_REG(hw, MGTPRC);
++    temp = E1000_READ_REG(hw, MGTPDC);
++    temp = E1000_READ_REG(hw, MGTPTC);
++
++    if (hw->mac_type <= e1000_82547_rev_2) return;
++
++    temp = E1000_READ_REG(hw, IAC);
++    temp = E1000_READ_REG(hw, ICRXOC);
++
++    if (hw->mac_type == e1000_ich8lan) return;
++
++    temp = E1000_READ_REG(hw, ICRXPTC);
++    temp = E1000_READ_REG(hw, ICRXATC);
++    temp = E1000_READ_REG(hw, ICTXPTC);
++    temp = E1000_READ_REG(hw, ICTXATC);
++    temp = E1000_READ_REG(hw, ICTXQEC);
++    temp = E1000_READ_REG(hw, ICTXQMTC);
++    temp = E1000_READ_REG(hw, ICRXDMTC);
++}
++
++/******************************************************************************
++ * Resets Adaptive IFS to its default state.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * Call this after e1000_init_hw. You may override the IFS defaults by setting
++ * hw->ifs_params_forced to TRUE. However, you must initialize hw->
++ * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio
++ * before calling this function.
++ *****************************************************************************/
++void
++e1000_reset_adaptive(struct e1000_hw *hw)
++{
++    DEBUGFUNC("e1000_reset_adaptive");
++
++    if (hw->adaptive_ifs) {
++        if (!hw->ifs_params_forced) {
++            hw->current_ifs_val = 0;
++            hw->ifs_min_val = IFS_MIN;
++            hw->ifs_max_val = IFS_MAX;
++            hw->ifs_step_size = IFS_STEP;
++            hw->ifs_ratio = IFS_RATIO;
++        }
++        hw->in_ifs_mode = FALSE;
++        E1000_WRITE_REG(hw, AIT, 0);
++    } else {
++        DEBUGOUT("Not in Adaptive IFS mode!\n");
++    }
++}
++
++/******************************************************************************
++ * Called during the callback/watchdog routine to update IFS value based on
++ * the ratio of transmits to collisions.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * tx_packets - Number of transmits since last callback
++ * total_collisions - Number of collisions since last callback
++ *****************************************************************************/
++void
++e1000_update_adaptive(struct e1000_hw *hw)
++{
++    DEBUGFUNC("e1000_update_adaptive");
++
++    if (hw->adaptive_ifs) {
++        if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) {
++            if (hw->tx_packet_delta > MIN_NUM_XMITS) {
++                hw->in_ifs_mode = TRUE;
++                if (hw->current_ifs_val < hw->ifs_max_val) {
++                    if (hw->current_ifs_val == 0)
++                        hw->current_ifs_val = hw->ifs_min_val;
++                    else
++                        hw->current_ifs_val += hw->ifs_step_size;
++                    E1000_WRITE_REG(hw, AIT, hw->current_ifs_val);
++                }
++            }
++        } else {
++            if (hw->in_ifs_mode && (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
++                hw->current_ifs_val = 0;
++                hw->in_ifs_mode = FALSE;
++                E1000_WRITE_REG(hw, AIT, 0);
++            }
++        }
++    } else {
++        DEBUGOUT("Not in Adaptive IFS mode!\n");
++    }
++}
++
++/******************************************************************************
++ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
++ *
++ * hw - Struct containing variables accessed by shared code
++ * frame_len - The length of the frame in question
++ * mac_addr - The Ethernet destination address of the frame in question
++ *****************************************************************************/
++void
++e1000_tbi_adjust_stats(struct e1000_hw *hw,
++                       struct e1000_hw_stats *stats,
++                       uint32_t frame_len,
++                       uint8_t *mac_addr)
++{
++    uint64_t carry_bit;
++
++    /* First adjust the frame length. */
++    frame_len--;
++    /* We need to adjust the statistics counters, since the hardware
++     * counters overcount this packet as a CRC error and undercount
++     * the packet as a good packet
++     */
++    /* This packet should not be counted as a CRC error.    */
++    stats->crcerrs--;
++    /* This packet does count as a Good Packet Received.    */
++    stats->gprc++;
++
++    /* Adjust the Good Octets received counters             */
++    carry_bit = 0x80000000 & stats->gorcl;
++    stats->gorcl += frame_len;
++    /* If the high bit of Gorcl (the low 32 bits of the Good Octets
++     * Received Count) was one before the addition,
++     * AND it is zero after, then we lost the carry out,
++     * need to add one to Gorch (Good Octets Received Count High).
++     * This could be simplified if all environments supported
++     * 64-bit integers.
++     */
++    if (carry_bit && ((stats->gorcl & 0x80000000) == 0))
++        stats->gorch++;
++    /* Is this a broadcast or multicast?  Check broadcast first,
++     * since the test for a multicast frame will test positive on
++     * a broadcast frame.
++     */
++    if ((mac_addr[0] == (uint8_t) 0xff) && (mac_addr[1] == (uint8_t) 0xff))
++        /* Broadcast packet */
++        stats->bprc++;
++    else if (*mac_addr & 0x01)
++        /* Multicast packet */
++        stats->mprc++;
++
++    if (frame_len == hw->max_frame_size) {
++        /* In this case, the hardware has overcounted the number of
++         * oversize frames.
++         */
++        if (stats->roc > 0)
++            stats->roc--;
++    }
++
++    /* Adjust the bin counters when the extra byte put the frame in the
++     * wrong bin. Remember that the frame_len was adjusted above.
++     */
++    if (frame_len == 64) {
++        stats->prc64++;
++        stats->prc127--;
++    } else if (frame_len == 127) {
++        stats->prc127++;
++        stats->prc255--;
++    } else if (frame_len == 255) {
++        stats->prc255++;
++        stats->prc511--;
++    } else if (frame_len == 511) {
++        stats->prc511++;
++        stats->prc1023--;
++    } else if (frame_len == 1023) {
++        stats->prc1023++;
++        stats->prc1522--;
++    } else if (frame_len == 1522) {
++        stats->prc1522++;
++    }
++}
++
++/******************************************************************************
++ * Gets the current PCI bus type, speed, and width of the hardware
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++void
++e1000_get_bus_info(struct e1000_hw *hw)
++{
++    uint32_t status;
++
++    switch (hw->mac_type) {
++    case e1000_82542_rev2_0:
++    case e1000_82542_rev2_1:
++        hw->bus_type = e1000_bus_type_unknown;
++        hw->bus_speed = e1000_bus_speed_unknown;
++        hw->bus_width = e1000_bus_width_unknown;
++        break;
++    case e1000_82572:
++    case e1000_82573:
++        hw->bus_type = e1000_bus_type_pci_express;
++        hw->bus_speed = e1000_bus_speed_2500;
++        hw->bus_width = e1000_bus_width_pciex_1;
++        break;
++    case e1000_82571:
++    case e1000_ich8lan:
++    case e1000_80003es2lan:
++        hw->bus_type = e1000_bus_type_pci_express;
++        hw->bus_speed = e1000_bus_speed_2500;
++        hw->bus_width = e1000_bus_width_pciex_4;
++        break;
++    default:
++        status = E1000_READ_REG(hw, STATUS);
++        hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
++                       e1000_bus_type_pcix : e1000_bus_type_pci;
++
++        if (hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) {
++            hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ?
++                            e1000_bus_speed_66 : e1000_bus_speed_120;
++        } else if (hw->bus_type == e1000_bus_type_pci) {
++            hw->bus_speed = (status & E1000_STATUS_PCI66) ?
++                            e1000_bus_speed_66 : e1000_bus_speed_33;
++        } else {
++            switch (status & E1000_STATUS_PCIX_SPEED) {
++            case E1000_STATUS_PCIX_SPEED_66:
++                hw->bus_speed = e1000_bus_speed_66;
++                break;
++            case E1000_STATUS_PCIX_SPEED_100:
++                hw->bus_speed = e1000_bus_speed_100;
++                break;
++            case E1000_STATUS_PCIX_SPEED_133:
++                hw->bus_speed = e1000_bus_speed_133;
++                break;
++            default:
++                hw->bus_speed = e1000_bus_speed_reserved;
++                break;
++            }
++        }
++        hw->bus_width = (status & E1000_STATUS_BUS64) ?
++                        e1000_bus_width_64 : e1000_bus_width_32;
++        break;
++    }
++}
++/******************************************************************************
++ * Reads a value from one of the devices registers using port I/O (as opposed
++ * memory mapped I/O). Only 82544 and newer devices support port I/O.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset to read from
++ *****************************************************************************/
++uint32_t
++e1000_read_reg_io(struct e1000_hw *hw,
++                  uint32_t offset)
++{
++    unsigned long io_addr = hw->io_base;
++    unsigned long io_data = hw->io_base + 4;
++
++    e1000_io_write(hw, io_addr, offset);
++    return e1000_io_read(hw, io_data);
++}
++
++/******************************************************************************
++ * Writes a value to one of the devices registers using port I/O (as opposed to
++ * memory mapped I/O). Only 82544 and newer devices support port I/O.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset to write to
++ * value - value to write
++ *****************************************************************************/
++void
++e1000_write_reg_io(struct e1000_hw *hw,
++                   uint32_t offset,
++                   uint32_t value)
++{
++    unsigned long io_addr = hw->io_base;
++    unsigned long io_data = hw->io_base + 4;
++
++    e1000_io_write(hw, io_addr, offset);
++    e1000_io_write(hw, io_data, value);
++}
++
++
++/******************************************************************************
++ * Estimates the cable length.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * min_length - The estimated minimum length
++ * max_length - The estimated maximum length
++ *
++ * returns: - E1000_ERR_XXX
++ *            E1000_SUCCESS
++ *
++ * This function always returns a ranged length (minimum & maximum).
++ * So for M88 phy's, this function interprets the one value returned from the
++ * register to the minimum and maximum range.
++ * For IGP phy's, the function calculates the range by the AGC registers.
++ *****************************************************************************/
++int32_t
++e1000_get_cable_length(struct e1000_hw *hw,
++                       uint16_t *min_length,
++                       uint16_t *max_length)
++{
++    int32_t ret_val;
++    uint16_t agc_value = 0;
++    uint16_t i, phy_data;
++    uint16_t cable_length;
++
++    DEBUGFUNC("e1000_get_cable_length");
++
++    *min_length = *max_length = 0;
++
++    /* Use old method for Phy older than IGP */
++    if (hw->phy_type == e1000_phy_m88) {
++
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++        cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++                       M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++
++        /* Convert the enum value to ranged values */
++        switch (cable_length) {
++        case e1000_cable_length_50:
++            *min_length = 0;
++            *max_length = e1000_igp_cable_length_50;
++            break;
++        case e1000_cable_length_50_80:
++            *min_length = e1000_igp_cable_length_50;
++            *max_length = e1000_igp_cable_length_80;
++            break;
++        case e1000_cable_length_80_110:
++            *min_length = e1000_igp_cable_length_80;
++            *max_length = e1000_igp_cable_length_110;
++            break;
++        case e1000_cable_length_110_140:
++            *min_length = e1000_igp_cable_length_110;
++            *max_length = e1000_igp_cable_length_140;
++            break;
++        case e1000_cable_length_140:
++            *min_length = e1000_igp_cable_length_140;
++            *max_length = e1000_igp_cable_length_170;
++            break;
++        default:
++            return -E1000_ERR_PHY;
++            break;
++        }
++    } else if (hw->phy_type == e1000_phy_gg82563) {
++        ret_val = e1000_read_phy_reg(hw, GG82563_PHY_DSP_DISTANCE,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++        cable_length = phy_data & GG82563_DSPD_CABLE_LENGTH;
++
++        switch (cable_length) {
++        case e1000_gg_cable_length_60:
++            *min_length = 0;
++            *max_length = e1000_igp_cable_length_60;
++            break;
++        case e1000_gg_cable_length_60_115:
++            *min_length = e1000_igp_cable_length_60;
++            *max_length = e1000_igp_cable_length_115;
++            break;
++        case e1000_gg_cable_length_115_150:
++            *min_length = e1000_igp_cable_length_115;
++            *max_length = e1000_igp_cable_length_150;
++            break;
++        case e1000_gg_cable_length_150:
++            *min_length = e1000_igp_cable_length_150;
++            *max_length = e1000_igp_cable_length_180;
++            break;
++        default:
++            return -E1000_ERR_PHY;
++            break;
++        }
++    } else if (hw->phy_type == e1000_phy_igp) { /* For IGP PHY */
++        uint16_t cur_agc_value;
++        uint16_t min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
++        uint16_t agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
++                                                         {IGP01E1000_PHY_AGC_A,
++                                                          IGP01E1000_PHY_AGC_B,
++                                                          IGP01E1000_PHY_AGC_C,
++                                                          IGP01E1000_PHY_AGC_D};
++        /* Read the AGC registers for all channels */
++        for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++
++            ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            cur_agc_value = phy_data >> IGP01E1000_AGC_LENGTH_SHIFT;
++
++            /* Value bound check. */
++            if ((cur_agc_value >= IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) ||
++                (cur_agc_value == 0))
++                return -E1000_ERR_PHY;
++
++            agc_value += cur_agc_value;
++
++            /* Update minimal AGC value. */
++            if (min_agc_value > cur_agc_value)
++                min_agc_value = cur_agc_value;
++        }
++
++        /* Remove the minimal AGC result for length < 50m */
++        if (agc_value < IGP01E1000_PHY_CHANNEL_NUM * e1000_igp_cable_length_50) {
++            agc_value -= min_agc_value;
++
++            /* Get the average length of the remaining 3 channels */
++            agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1);
++        } else {
++            /* Get the average length of all the 4 channels. */
++            agc_value /= IGP01E1000_PHY_CHANNEL_NUM;
++        }
++
++        /* Set the range of the calculated length. */
++        *min_length = ((e1000_igp_cable_length_table[agc_value] -
++                       IGP01E1000_AGC_RANGE) > 0) ?
++                       (e1000_igp_cable_length_table[agc_value] -
++                       IGP01E1000_AGC_RANGE) : 0;
++        *max_length = e1000_igp_cable_length_table[agc_value] +
++                      IGP01E1000_AGC_RANGE;
++    } else if (hw->phy_type == e1000_phy_igp_2 ||
++               hw->phy_type == e1000_phy_igp_3) {
++        uint16_t cur_agc_index, max_agc_index = 0;
++        uint16_t min_agc_index = IGP02E1000_AGC_LENGTH_TABLE_SIZE - 1;
++        uint16_t agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] =
++                                                         {IGP02E1000_PHY_AGC_A,
++                                                          IGP02E1000_PHY_AGC_B,
++                                                          IGP02E1000_PHY_AGC_C,
++                                                          IGP02E1000_PHY_AGC_D};
++        /* Read the AGC registers for all channels */
++        for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
++            ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            /* Getting bits 15:9, which represent the combination of course and
++             * fine gain values.  The result is a number that can be put into
++             * the lookup table to obtain the approximate cable length. */
++            cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
++                            IGP02E1000_AGC_LENGTH_MASK;
++
++            /* Array index bound check. */
++            if ((cur_agc_index >= IGP02E1000_AGC_LENGTH_TABLE_SIZE) ||
++                (cur_agc_index == 0))
++                return -E1000_ERR_PHY;
++
++            /* Remove min & max AGC values from calculation. */
++            if (e1000_igp_2_cable_length_table[min_agc_index] >
++                e1000_igp_2_cable_length_table[cur_agc_index])
++                min_agc_index = cur_agc_index;
++            if (e1000_igp_2_cable_length_table[max_agc_index] <
++                e1000_igp_2_cable_length_table[cur_agc_index])
++                max_agc_index = cur_agc_index;
++
++            agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
++        }
++
++        agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
++                      e1000_igp_2_cable_length_table[max_agc_index]);
++        agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
++
++        /* Calculate cable length with the error range of +/- 10 meters. */
++        *min_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
++                       (agc_value - IGP02E1000_AGC_RANGE) : 0;
++        *max_length = agc_value + IGP02E1000_AGC_RANGE;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Check the cable polarity
++ *
++ * hw - Struct containing variables accessed by shared code
++ * polarity - output parameter : 0 - Polarity is not reversed
++ *                               1 - Polarity is reversed.
++ *
++ * returns: - E1000_ERR_XXX
++ *            E1000_SUCCESS
++ *
++ * For phy's older then IGP, this function simply reads the polarity bit in the
++ * Phy Status register.  For IGP phy's, this bit is valid only if link speed is
++ * 10 Mbps.  If the link speed is 100 Mbps there is no polarity so this bit will
++ * return 0.  If the link speed is 1000 Mbps the polarity status is in the
++ * IGP01E1000_PHY_PCS_INIT_REG.
++ *****************************************************************************/
++int32_t
++e1000_check_polarity(struct e1000_hw *hw,
++                     uint16_t *polarity)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_check_polarity");
++
++    if ((hw->phy_type == e1000_phy_m88) ||
++        (hw->phy_type == e1000_phy_gg82563)) {
++        /* return the Polarity bit in the Status register. */
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++        *polarity = (phy_data & M88E1000_PSSR_REV_POLARITY) >>
++                    M88E1000_PSSR_REV_POLARITY_SHIFT;
++    } else if (hw->phy_type == e1000_phy_igp ||
++              hw->phy_type == e1000_phy_igp_3 ||
++              hw->phy_type == e1000_phy_igp_2) {
++        /* Read the Status register to check the speed */
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        /* If speed is 1000 Mbps, must read the IGP01E1000_PHY_PCS_INIT_REG to
++         * find the polarity status */
++        if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
++           IGP01E1000_PSSR_SPEED_1000MBPS) {
++
++            /* Read the GIG initialization PCS register (0x00B4) */
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            /* Check the polarity bits */
++            *polarity = (phy_data & IGP01E1000_PHY_POLARITY_MASK) ? 1 : 0;
++        } else {
++            /* For 10 Mbps, read the polarity bit in the status register. (for
++             * 100 Mbps this bit is always 0) */
++            *polarity = phy_data & IGP01E1000_PSSR_POLARITY_REVERSED;
++        }
++    } else if (hw->phy_type == e1000_phy_ife) {
++        ret_val = e1000_read_phy_reg(hw, IFE_PHY_EXTENDED_STATUS_CONTROL,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++        *polarity = (phy_data & IFE_PESC_POLARITY_REVERSED) >>
++                           IFE_PESC_POLARITY_REVERSED_SHIFT;
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Check if Downshift occured
++ *
++ * hw - Struct containing variables accessed by shared code
++ * downshift - output parameter : 0 - No Downshift ocured.
++ *                                1 - Downshift ocured.
++ *
++ * returns: - E1000_ERR_XXX
++ *            E1000_SUCCESS
++ *
++ * For phy's older then IGP, this function reads the Downshift bit in the Phy
++ * Specific Status register.  For IGP phy's, it reads the Downgrade bit in the
++ * Link Health register.  In IGP this bit is latched high, so the driver must
++ * read it immediately after link is established.
++ *****************************************************************************/
++int32_t
++e1000_check_downshift(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_check_downshift");
++
++    if (hw->phy_type == e1000_phy_igp ||
++        hw->phy_type == e1000_phy_igp_3 ||
++        hw->phy_type == e1000_phy_igp_2) {
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_LINK_HEALTH,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        hw->speed_downgraded = (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0;
++    } else if ((hw->phy_type == e1000_phy_m88) ||
++               (hw->phy_type == e1000_phy_gg82563)) {
++        ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
++                                     &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
++                               M88E1000_PSSR_DOWNSHIFT_SHIFT;
++    } else if (hw->phy_type == e1000_phy_ife) {
++        /* e1000_phy_ife supports 10/100 speed only */
++        hw->speed_downgraded = FALSE;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ *
++ * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a
++ * gigabit link is achieved to improve link quality.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_PHY if fail to read/write the PHY
++ *            E1000_SUCCESS at any other case.
++ *
++ ****************************************************************************/
++
++int32_t
++e1000_config_dsp_after_link_change(struct e1000_hw *hw,
++                                   boolean_t link_up)
++{
++    int32_t ret_val;
++    uint16_t phy_data, phy_saved_data, speed, duplex, i;
++    uint16_t dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] =
++                                        {IGP01E1000_PHY_AGC_PARAM_A,
++                                        IGP01E1000_PHY_AGC_PARAM_B,
++                                        IGP01E1000_PHY_AGC_PARAM_C,
++                                        IGP01E1000_PHY_AGC_PARAM_D};
++    uint16_t min_length, max_length;
++
++    DEBUGFUNC("e1000_config_dsp_after_link_change");
++
++    if (hw->phy_type != e1000_phy_igp)
++        return E1000_SUCCESS;
++
++    if (link_up) {
++        ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
++        if (ret_val) {
++            DEBUGOUT("Error getting link speed and duplex\n");
++            return ret_val;
++        }
++
++        if (speed == SPEED_1000) {
++
++            ret_val = e1000_get_cable_length(hw, &min_length, &max_length);
++            if (ret_val)
++                return ret_val;
++
++            if ((hw->dsp_config_state == e1000_dsp_config_enabled) &&
++                min_length >= e1000_igp_cable_length_50) {
++
++                for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++                    ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i],
++                                                 &phy_data);
++                    if (ret_val)
++                        return ret_val;
++
++                    phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
++
++                    ret_val = e1000_write_phy_reg(hw, dsp_reg_array[i],
++                                                  phy_data);
++                    if (ret_val)
++                        return ret_val;
++                }
++                hw->dsp_config_state = e1000_dsp_config_activated;
++            }
++
++            if ((hw->ffe_config_state == e1000_ffe_config_enabled) &&
++               (min_length < e1000_igp_cable_length_50)) {
++
++                uint16_t ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20;
++                uint32_t idle_errs = 0;
++
++                /* clear previous idle error counts */
++                ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS,
++                                             &phy_data);
++                if (ret_val)
++                    return ret_val;
++
++                for (i = 0; i < ffe_idle_err_timeout; i++) {
++                    usec_delay(1000);
++                    ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS,
++                                                 &phy_data);
++                    if (ret_val)
++                        return ret_val;
++
++                    idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT);
++                    if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) {
++                        hw->ffe_config_state = e1000_ffe_config_active;
++
++                        ret_val = e1000_write_phy_reg(hw,
++                                    IGP01E1000_PHY_DSP_FFE,
++                                    IGP01E1000_PHY_DSP_FFE_CM_CP);
++                        if (ret_val)
++                            return ret_val;
++                        break;
++                    }
++
++                    if (idle_errs)
++                        ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_100;
++                }
++            }
++        }
++    } else {
++        if (hw->dsp_config_state == e1000_dsp_config_activated) {
++            /* Save off the current value of register 0x2F5B to be restored at
++             * the end of the routines. */
++            ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++            if (ret_val)
++                return ret_val;
++
++            /* Disable the PHY transmitter */
++            ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++            if (ret_val)
++                return ret_val;
++
++            msec_delay_irq(20);
++
++            ret_val = e1000_write_phy_reg(hw, 0x0000,
++                                          IGP01E1000_IEEE_FORCE_GIGA);
++            if (ret_val)
++                return ret_val;
++            for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
++                ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i], &phy_data);
++                if (ret_val)
++                    return ret_val;
++
++                phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
++                phy_data |=  IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS;
++
++                ret_val = e1000_write_phy_reg(hw,dsp_reg_array[i], phy_data);
++                if (ret_val)
++                    return ret_val;
++            }
++
++            ret_val = e1000_write_phy_reg(hw, 0x0000,
++                                          IGP01E1000_IEEE_RESTART_AUTONEG);
++            if (ret_val)
++                return ret_val;
++
++            msec_delay_irq(20);
++
++            /* Now enable the transmitter */
++            ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++            if (ret_val)
++                return ret_val;
++
++            hw->dsp_config_state = e1000_dsp_config_enabled;
++        }
++
++        if (hw->ffe_config_state == e1000_ffe_config_active) {
++            /* Save off the current value of register 0x2F5B to be restored at
++             * the end of the routines. */
++            ret_val = e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
++
++            if (ret_val)
++                return ret_val;
++
++            /* Disable the PHY transmitter */
++            ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
++
++            if (ret_val)
++                return ret_val;
++
++            msec_delay_irq(20);
++
++            ret_val = e1000_write_phy_reg(hw, 0x0000,
++                                          IGP01E1000_IEEE_FORCE_GIGA);
++            if (ret_val)
++                return ret_val;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_DSP_FFE,
++                                          IGP01E1000_PHY_DSP_FFE_DEFAULT);
++            if (ret_val)
++                return ret_val;
++
++            ret_val = e1000_write_phy_reg(hw, 0x0000,
++                                          IGP01E1000_IEEE_RESTART_AUTONEG);
++            if (ret_val)
++                return ret_val;
++
++            msec_delay_irq(20);
++
++            /* Now enable the transmitter */
++            ret_val = e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
++
++            if (ret_val)
++                return ret_val;
++
++            hw->ffe_config_state = e1000_ffe_config_enabled;
++        }
++    }
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ * Set PHY to class A mode
++ * Assumes the following operations will follow to enable the new class mode.
++ *  1. Do a PHY soft reset
++ *  2. Restart auto-negotiation or force link.
++ *
++ * hw - Struct containing variables accessed by shared code
++ ****************************************************************************/
++static int32_t
++e1000_set_phy_mode(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t eeprom_data;
++
++    DEBUGFUNC("e1000_set_phy_mode");
++
++    if ((hw->mac_type == e1000_82545_rev_3) &&
++        (hw->media_type == e1000_media_type_copper)) {
++        ret_val = e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1, &eeprom_data);
++        if (ret_val) {
++            return ret_val;
++        }
++
++        if ((eeprom_data != EEPROM_RESERVED_WORD) &&
++            (eeprom_data & EEPROM_PHY_CLASS_A)) {
++            ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x000B);
++            if (ret_val)
++                return ret_val;
++            ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x8104);
++            if (ret_val)
++                return ret_val;
++
++            hw->phy_reset_disable = FALSE;
++        }
++    }
++
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ *
++ * This function sets the lplu state according to the active flag.  When
++ * activating lplu this function also disables smart speed and vise versa.
++ * lplu will not be activated unless the device autonegotiation advertisment
++ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes.
++ * hw: Struct containing variables accessed by shared code
++ * active - true to enable lplu false to disable lplu.
++ *
++ * returns: - E1000_ERR_PHY if fail to read/write the PHY
++ *            E1000_SUCCESS at any other case.
++ *
++ ****************************************************************************/
++
++int32_t
++e1000_set_d3_lplu_state(struct e1000_hw *hw,
++                        boolean_t active)
++{
++    uint32_t phy_ctrl = 0;
++    int32_t ret_val;
++    uint16_t phy_data;
++    DEBUGFUNC("e1000_set_d3_lplu_state");
++
++    if (hw->phy_type != e1000_phy_igp && hw->phy_type != e1000_phy_igp_2
++        && hw->phy_type != e1000_phy_igp_3)
++        return E1000_SUCCESS;
++
++    /* During driver activity LPLU should not be used or it will attain link
++     * from the lowest speeds starting from 10Mbps. The capability is used for
++     * Dx transitions and states */
++    if (hw->mac_type == e1000_82541_rev_2 || hw->mac_type == e1000_82547_rev_2) {
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data);
++        if (ret_val)
++            return ret_val;
++    } else if (hw->mac_type == e1000_ich8lan) {
++        /* MAC writes into PHY register based on the state transition
++         * and start auto-negotiation. SW driver can overwrite the settings
++         * in CSR PHY power control E1000_PHY_CTRL register. */
++        phy_ctrl = E1000_READ_REG(hw, PHY_CTRL);
++    } else {
++        ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    if (!active) {
++        if (hw->mac_type == e1000_82541_rev_2 ||
++            hw->mac_type == e1000_82547_rev_2) {
++            phy_data &= ~IGP01E1000_GMII_FLEX_SPD;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
++            if (ret_val)
++                return ret_val;
++        } else {
++            if (hw->mac_type == e1000_ich8lan) {
++                phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU;
++                E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl);
++            } else {
++                phy_data &= ~IGP02E1000_PM_D3_LPLU;
++                ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++                                              phy_data);
++                if (ret_val)
++                    return ret_val;
++            }
++        }
++
++        /* LPLU and SmartSpeed are mutually exclusive.  LPLU is used during
++         * Dx states where the power conservation is most important.  During
++         * driver activity we should enable SmartSpeed, so performance is
++         * maintained. */
++        if (hw->smart_speed == e1000_smart_speed_on) {
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++        } else if (hw->smart_speed == e1000_smart_speed_off) {
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                         &phy_data);
++	    if (ret_val)
++                return ret_val;
++
++            phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++        }
++
++    } else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) ||
++               (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL ) ||
++               (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) {
++
++        if (hw->mac_type == e1000_82541_rev_2 ||
++            hw->mac_type == e1000_82547_rev_2) {
++            phy_data |= IGP01E1000_GMII_FLEX_SPD;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO, phy_data);
++            if (ret_val)
++                return ret_val;
++        } else {
++            if (hw->mac_type == e1000_ich8lan) {
++                phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU;
++                E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl);
++            } else {
++                phy_data |= IGP02E1000_PM_D3_LPLU;
++                ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT,
++                                              phy_data);
++                if (ret_val)
++                    return ret_val;
++            }
++        }
++
++        /* When LPLU is enabled we should disable SmartSpeed */
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++        ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data);
++        if (ret_val)
++            return ret_val;
++
++    }
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ *
++ * This function sets the lplu d0 state according to the active flag.  When
++ * activating lplu this function also disables smart speed and vise versa.
++ * lplu will not be activated unless the device autonegotiation advertisment
++ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes.
++ * hw: Struct containing variables accessed by shared code
++ * active - true to enable lplu false to disable lplu.
++ *
++ * returns: - E1000_ERR_PHY if fail to read/write the PHY
++ *            E1000_SUCCESS at any other case.
++ *
++ ****************************************************************************/
++
++int32_t
++e1000_set_d0_lplu_state(struct e1000_hw *hw,
++                        boolean_t active)
++{
++    uint32_t phy_ctrl = 0;
++    int32_t ret_val;
++    uint16_t phy_data;
++    DEBUGFUNC("e1000_set_d0_lplu_state");
++
++    if (hw->mac_type <= e1000_82547_rev_2)
++        return E1000_SUCCESS;
++
++    if (hw->mac_type == e1000_ich8lan) {
++        phy_ctrl = E1000_READ_REG(hw, PHY_CTRL);
++    } else {
++        ret_val = e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
++        if (ret_val)
++            return ret_val;
++    }
++
++    if (!active) {
++        if (hw->mac_type == e1000_ich8lan) {
++            phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU;
++            E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl);
++        } else {
++            phy_data &= ~IGP02E1000_PM_D0_LPLU;
++            ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
++            if (ret_val)
++                return ret_val;
++        }
++
++        /* LPLU and SmartSpeed are mutually exclusive.  LPLU is used during
++         * Dx states where the power conservation is most important.  During
++         * driver activity we should enable SmartSpeed, so performance is
++         * maintained. */
++        if (hw->smart_speed == e1000_smart_speed_on) {
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                         &phy_data);
++            if (ret_val)
++                return ret_val;
++
++            phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++        } else if (hw->smart_speed == e1000_smart_speed_off) {
++            ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                         &phy_data);
++	    if (ret_val)
++                return ret_val;
++
++            phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++            ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
++                                          phy_data);
++            if (ret_val)
++                return ret_val;
++        }
++
++
++    } else {
++
++        if (hw->mac_type == e1000_ich8lan) {
++            phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU;
++            E1000_WRITE_REG(hw, PHY_CTRL, phy_ctrl);
++        } else {
++            phy_data |= IGP02E1000_PM_D0_LPLU;
++            ret_val = e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
++            if (ret_val)
++                return ret_val;
++        }
++
++        /* When LPLU is enabled we should disable SmartSpeed */
++        ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++        ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG, phy_data);
++        if (ret_val)
++            return ret_val;
++
++    }
++    return E1000_SUCCESS;
++}
++
++/******************************************************************************
++ * Change VCO speed register to improve Bit Error Rate performance of SERDES.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *****************************************************************************/
++static int32_t
++e1000_set_vco_speed(struct e1000_hw *hw)
++{
++    int32_t  ret_val;
++    uint16_t default_page = 0;
++    uint16_t phy_data;
++
++    DEBUGFUNC("e1000_set_vco_speed");
++
++    switch (hw->mac_type) {
++    case e1000_82545_rev_3:
++    case e1000_82546_rev_3:
++       break;
++    default:
++        return E1000_SUCCESS;
++    }
++
++    /* Set PHY register 30, page 5, bit 8 to 0 */
++
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, &default_page);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data &= ~M88E1000_PHY_VCO_REG_BIT8;
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    /* Set PHY register 30, page 4, bit 11 to 1 */
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data |= M88E1000_PHY_VCO_REG_BIT11;
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, default_page);
++    if (ret_val)
++        return ret_val;
++
++    return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function reads the cookie from ARC ram.
++ *
++ * returns: - E1000_SUCCESS .
++ ****************************************************************************/
++int32_t
++e1000_host_if_read_cookie(struct e1000_hw * hw, uint8_t *buffer)
++{
++    uint8_t i;
++    uint32_t offset = E1000_MNG_DHCP_COOKIE_OFFSET;
++    uint8_t length = E1000_MNG_DHCP_COOKIE_LENGTH;
++
++    length = (length >> 2);
++    offset = (offset >> 2);
++
++    for (i = 0; i < length; i++) {
++        *((uint32_t *) buffer + i) =
++            E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset + i);
++    }
++    return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function checks whether the HOST IF is enabled for command operaton
++ * and also checks whether the previous command is completed.
++ * It busy waits in case of previous command is not completed.
++ *
++ * returns: - E1000_ERR_HOST_INTERFACE_COMMAND in case if is not ready or
++ *            timeout
++ *          - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_enable_host_if(struct e1000_hw * hw)
++{
++    uint32_t hicr;
++    uint8_t i;
++
++    /* Check that the host interface is enabled. */
++    hicr = E1000_READ_REG(hw, HICR);
++    if ((hicr & E1000_HICR_EN) == 0) {
++        DEBUGOUT("E1000_HOST_EN bit disabled.\n");
++        return -E1000_ERR_HOST_INTERFACE_COMMAND;
++    }
++    /* check the previous command is completed */
++    for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
++        hicr = E1000_READ_REG(hw, HICR);
++        if (!(hicr & E1000_HICR_C))
++            break;
++        msec_delay_irq(1);
++    }
++
++    if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
++        DEBUGOUT("Previous command timeout failed .\n");
++        return -E1000_ERR_HOST_INTERFACE_COMMAND;
++    }
++    return E1000_SUCCESS;
++}
++
++/*****************************************************************************
++ * This function writes the buffer content at the offset given on the host if.
++ * It also does alignment considerations to do the writes in most efficient way.
++ * Also fills up the sum of the buffer in *buffer parameter.
++ *
++ * returns  - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_host_if_write(struct e1000_hw * hw, uint8_t *buffer,
++                        uint16_t length, uint16_t offset, uint8_t *sum)
++{
++    uint8_t *tmp;
++    uint8_t *bufptr = buffer;
++    uint32_t data = 0;
++    uint16_t remaining, i, j, prev_bytes;
++
++    /* sum = only sum of the data and it is not checksum */
++
++    if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) {
++        return -E1000_ERR_PARAM;
++    }
++
++    tmp = (uint8_t *)&data;
++    prev_bytes = offset & 0x3;
++    offset &= 0xFFFC;
++    offset >>= 2;
++
++    if (prev_bytes) {
++        data = E1000_READ_REG_ARRAY_DWORD(hw, HOST_IF, offset);
++        for (j = prev_bytes; j < sizeof(uint32_t); j++) {
++            *(tmp + j) = *bufptr++;
++            *sum += *(tmp + j);
++        }
++        E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset, data);
++        length -= j - prev_bytes;
++        offset++;
++    }
++
++    remaining = length & 0x3;
++    length -= remaining;
++
++    /* Calculate length in DWORDs */
++    length >>= 2;
++
++    /* The device driver writes the relevant command block into the
++     * ram area. */
++    for (i = 0; i < length; i++) {
++        for (j = 0; j < sizeof(uint32_t); j++) {
++            *(tmp + j) = *bufptr++;
++            *sum += *(tmp + j);
++        }
++
++        E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data);
++    }
++    if (remaining) {
++        for (j = 0; j < sizeof(uint32_t); j++) {
++            if (j < remaining)
++                *(tmp + j) = *bufptr++;
++            else
++                *(tmp + j) = 0;
++
++            *sum += *(tmp + j);
++        }
++        E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, offset + i, data);
++    }
++
++    return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function writes the command header after does the checksum calculation.
++ *
++ * returns  - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_write_cmd_header(struct e1000_hw * hw,
++                           struct e1000_host_mng_command_header * hdr)
++{
++    uint16_t i;
++    uint8_t sum;
++    uint8_t *buffer;
++
++    /* Write the whole command header structure which includes sum of
++     * the buffer */
++
++    uint16_t length = sizeof(struct e1000_host_mng_command_header);
++
++    sum = hdr->checksum;
++    hdr->checksum = 0;
++
++    buffer = (uint8_t *) hdr;
++    i = length;
++    while (i--)
++        sum += buffer[i];
++
++    hdr->checksum = 0 - sum;
++
++    length >>= 2;
++    /* The device driver writes the relevant command block into the ram area. */
++    for (i = 0; i < length; i++) {
++        E1000_WRITE_REG_ARRAY_DWORD(hw, HOST_IF, i, *((uint32_t *) hdr + i));
++        E1000_WRITE_FLUSH(hw);
++    }
++
++    return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function indicates to ARC that a new command is pending which completes
++ * one write operation by the driver.
++ *
++ * returns  - E1000_SUCCESS for success.
++ ****************************************************************************/
++int32_t
++e1000_mng_write_commit(struct e1000_hw * hw)
++{
++    uint32_t hicr;
++
++    hicr = E1000_READ_REG(hw, HICR);
++    /* Setting this bit tells the ARC that a new command is pending. */
++    E1000_WRITE_REG(hw, HICR, hicr | E1000_HICR_C);
++
++    return E1000_SUCCESS;
++}
++
++
++/*****************************************************************************
++ * This function checks the mode of the firmware.
++ *
++ * returns  - TRUE when the mode is IAMT or FALSE.
++ ****************************************************************************/
++boolean_t
++e1000_check_mng_mode(struct e1000_hw *hw)
++{
++    uint32_t fwsm;
++
++    fwsm = E1000_READ_REG(hw, FWSM);
++
++    if (hw->mac_type == e1000_ich8lan) {
++        if ((fwsm & E1000_FWSM_MODE_MASK) ==
++            (E1000_MNG_ICH_IAMT_MODE << E1000_FWSM_MODE_SHIFT))
++            return TRUE;
++    } else if ((fwsm & E1000_FWSM_MODE_MASK) ==
++               (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT))
++        return TRUE;
++
++    return FALSE;
++}
++
++
++/*****************************************************************************
++ * This function writes the dhcp info .
++ ****************************************************************************/
++int32_t
++e1000_mng_write_dhcp_info(struct e1000_hw * hw, uint8_t *buffer,
++			  uint16_t length)
++{
++    int32_t ret_val;
++    struct e1000_host_mng_command_header hdr;
++
++    hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
++    hdr.command_length = length;
++    hdr.reserved1 = 0;
++    hdr.reserved2 = 0;
++    hdr.checksum = 0;
++
++    ret_val = e1000_mng_enable_host_if(hw);
++    if (ret_val == E1000_SUCCESS) {
++        ret_val = e1000_mng_host_if_write(hw, buffer, length, sizeof(hdr),
++                                          &(hdr.checksum));
++        if (ret_val == E1000_SUCCESS) {
++            ret_val = e1000_mng_write_cmd_header(hw, &hdr);
++            if (ret_val == E1000_SUCCESS)
++                ret_val = e1000_mng_write_commit(hw);
++        }
++    }
++    return ret_val;
++}
++
++
++/*****************************************************************************
++ * This function calculates the checksum.
++ *
++ * returns  - checksum of buffer contents.
++ ****************************************************************************/
++uint8_t
++e1000_calculate_mng_checksum(char *buffer, uint32_t length)
++{
++    uint8_t sum = 0;
++    uint32_t i;
++
++    if (!buffer)
++        return 0;
++
++    for (i=0; i < length; i++)
++        sum += buffer[i];
++
++    return (uint8_t) (0 - sum);
++}
++
++/*****************************************************************************
++ * This function checks whether tx pkt filtering needs to be enabled or not.
++ *
++ * returns  - TRUE for packet filtering or FALSE.
++ ****************************************************************************/
++boolean_t
++e1000_enable_tx_pkt_filtering(struct e1000_hw *hw)
++{
++    /* called in init as well as watchdog timer functions */
++
++    int32_t ret_val, checksum;
++    boolean_t tx_filter = FALSE;
++    struct e1000_host_mng_dhcp_cookie *hdr = &(hw->mng_cookie);
++    uint8_t *buffer = (uint8_t *) &(hw->mng_cookie);
++
++    if (e1000_check_mng_mode(hw)) {
++        ret_val = e1000_mng_enable_host_if(hw);
++        if (ret_val == E1000_SUCCESS) {
++            ret_val = e1000_host_if_read_cookie(hw, buffer);
++            if (ret_val == E1000_SUCCESS) {
++                checksum = hdr->checksum;
++                hdr->checksum = 0;
++                if ((hdr->signature == E1000_IAMT_SIGNATURE) &&
++                    checksum == e1000_calculate_mng_checksum((char *)buffer,
++                                               E1000_MNG_DHCP_COOKIE_LENGTH)) {
++                    if (hdr->status &
++                        E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT)
++                        tx_filter = TRUE;
++                } else
++                    tx_filter = TRUE;
++            } else
++                tx_filter = TRUE;
++        }
++    }
++
++    hw->tx_pkt_filtering = tx_filter;
++    return tx_filter;
++}
++
++/******************************************************************************
++ * Verifies the hardware needs to allow ARPs to be processed by the host
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * returns: - TRUE/FALSE
++ *
++ *****************************************************************************/
++uint32_t
++e1000_enable_mng_pass_thru(struct e1000_hw *hw)
++{
++    uint32_t manc;
++    uint32_t fwsm, factps;
++
++    if (hw->asf_firmware_present) {
++        manc = E1000_READ_REG(hw, MANC);
++
++        if (!(manc & E1000_MANC_RCV_TCO_EN) ||
++            !(manc & E1000_MANC_EN_MAC_ADDR_FILTER))
++            return FALSE;
++        if (e1000_arc_subsystem_valid(hw) == TRUE) {
++            fwsm = E1000_READ_REG(hw, FWSM);
++            factps = E1000_READ_REG(hw, FACTPS);
++
++            if (((fwsm & E1000_FWSM_MODE_MASK) ==
++                (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)) &&
++                (factps & E1000_FACTPS_MNGCG))
++                return TRUE;
++        } else
++            if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN))
++                return TRUE;
++    }
++    return FALSE;
++}
++
++static int32_t
++e1000_polarity_reversal_workaround(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t mii_status_reg;
++    uint16_t i;
++
++    /* Polarity reversal workaround for forced 10F/10H links. */
++
++    /* Disable the transmitter on the PHY */
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++    if (ret_val)
++        return ret_val;
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++    if (ret_val)
++        return ret_val;
++
++    /* This loop will early-out if the NO link condition has been met. */
++    for (i = PHY_FORCE_TIME; i > 0; i--) {
++        /* Read the MII Status Register and wait for Link Status bit
++         * to be clear.
++         */
++
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++
++        if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) break;
++        msec_delay_irq(100);
++    }
++
++    /* Recommended delay time after link has been lost */
++    msec_delay_irq(1000);
++
++    /* Now we will re-enable th transmitter on the PHY */
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
++    if (ret_val)
++        return ret_val;
++    msec_delay_irq(50);
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0);
++    if (ret_val)
++        return ret_val;
++    msec_delay_irq(50);
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00);
++    if (ret_val)
++        return ret_val;
++    msec_delay_irq(50);
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
++    if (ret_val)
++        return ret_val;
++
++    /* This loop will early-out if the link condition has been met. */
++    for (i = PHY_FORCE_TIME; i > 0; i--) {
++        /* Read the MII Status Register and wait for Link Status bit
++         * to be set.
++         */
++
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++
++        ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
++        if (ret_val)
++            return ret_val;
++
++        if (mii_status_reg & MII_SR_LINK_STATUS) break;
++        msec_delay_irq(100);
++    }
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Disables PCI-Express master access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - none.
++ *
++ ***************************************************************************/
++void
++e1000_set_pci_express_master_disable(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++
++    DEBUGFUNC("e1000_set_pci_express_master_disable");
++
++    if (hw->bus_type != e1000_bus_type_pci_express)
++        return;
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++    ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++}
++
++/***************************************************************************
++ *
++ * Enables PCI-Express master access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - none.
++ *
++ ***************************************************************************/
++void
++e1000_enable_pciex_master(struct e1000_hw *hw)
++{
++    uint32_t ctrl;
++
++    DEBUGFUNC("e1000_enable_pciex_master");
++
++    if (hw->bus_type != e1000_bus_type_pci_express)
++        return;
++
++    ctrl = E1000_READ_REG(hw, CTRL);
++    ctrl &= ~E1000_CTRL_GIO_MASTER_DISABLE;
++    E1000_WRITE_REG(hw, CTRL, ctrl);
++}
++
++/*******************************************************************************
++ *
++ * Disables PCI-Express master access and verifies there are no pending requests
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_MASTER_REQUESTS_PENDING if master disable bit hasn't
++ *            caused the master requests to be disabled.
++ *            E1000_SUCCESS master requests disabled.
++ *
++ ******************************************************************************/
++int32_t
++e1000_disable_pciex_master(struct e1000_hw *hw)
++{
++    int32_t timeout = MASTER_DISABLE_TIMEOUT;   /* 80ms */
++
++    DEBUGFUNC("e1000_disable_pciex_master");
++
++    if (hw->bus_type != e1000_bus_type_pci_express)
++        return E1000_SUCCESS;
++
++    e1000_set_pci_express_master_disable(hw);
++
++    while (timeout) {
++        if (!(E1000_READ_REG(hw, STATUS) & E1000_STATUS_GIO_MASTER_ENABLE))
++            break;
++        else
++            usec_delay(100);
++        timeout--;
++    }
++
++    if (!timeout) {
++        DEBUGOUT("Master requests are pending.\n");
++        return -E1000_ERR_MASTER_REQUESTS_PENDING;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/*******************************************************************************
++ *
++ * Check for EEPROM Auto Read bit done.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_RESET if fail to reset MAC
++ *            E1000_SUCCESS at any other case.
++ *
++ ******************************************************************************/
++int32_t
++e1000_get_auto_rd_done(struct e1000_hw *hw)
++{
++    int32_t timeout = AUTO_READ_DONE_TIMEOUT;
++
++    DEBUGFUNC("e1000_get_auto_rd_done");
++
++    switch (hw->mac_type) {
++    default:
++        msec_delay(5);
++        break;
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_82573:
++    case e1000_80003es2lan:
++    case e1000_ich8lan:
++        while (timeout) {
++            if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD)
++                break;
++            else msec_delay(1);
++            timeout--;
++        }
++
++        if (!timeout) {
++            DEBUGOUT("Auto read by HW from EEPROM has not completed.\n");
++            return -E1000_ERR_RESET;
++        }
++        break;
++    }
++
++    /* PHY configuration from NVM just starts after EECD_AUTO_RD sets to high.
++     * Need to wait for PHY configuration completion before accessing NVM
++     * and PHY. */
++    if (hw->mac_type == e1000_82573)
++        msec_delay(25);
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ * Checks if the PHY configuration is done
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_RESET if fail to reset MAC
++ *            E1000_SUCCESS at any other case.
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_phy_cfg_done(struct e1000_hw *hw)
++{
++    int32_t timeout = PHY_CFG_TIMEOUT;
++    uint32_t cfg_mask = E1000_EEPROM_CFG_DONE;
++
++    DEBUGFUNC("e1000_get_phy_cfg_done");
++
++    switch (hw->mac_type) {
++    default:
++        msec_delay_irq(10);
++        break;
++    case e1000_80003es2lan:
++        /* Separate *_CFG_DONE_* bit for each port */
++        if (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
++            cfg_mask = E1000_EEPROM_CFG_DONE_PORT_1;
++        /* Fall Through */
++    case e1000_82571:
++    case e1000_82572:
++        while (timeout) {
++            if (E1000_READ_REG(hw, EEMNGCTL) & cfg_mask)
++                break;
++            else
++                msec_delay(1);
++            timeout--;
++        }
++
++        if (!timeout) {
++            DEBUGOUT("MNG configuration cycle has not completed.\n");
++            return -E1000_ERR_RESET;
++        }
++        break;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Using the combination of SMBI and SWESMBI semaphore bits when resetting
++ * adapter or Eeprom access.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_EEPROM if fail to access EEPROM.
++ *            E1000_SUCCESS at any other case.
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_hw_eeprom_semaphore(struct e1000_hw *hw)
++{
++    int32_t timeout;
++    uint32_t swsm;
++
++    DEBUGFUNC("e1000_get_hw_eeprom_semaphore");
++
++    if (!hw->eeprom_semaphore_present)
++        return E1000_SUCCESS;
++
++    if (hw->mac_type == e1000_80003es2lan) {
++        /* Get the SW semaphore. */
++        if (e1000_get_software_semaphore(hw) != E1000_SUCCESS)
++            return -E1000_ERR_EEPROM;
++    }
++
++    /* Get the FW semaphore. */
++    timeout = hw->eeprom.word_size + 1;
++    while (timeout) {
++        swsm = E1000_READ_REG(hw, SWSM);
++        swsm |= E1000_SWSM_SWESMBI;
++        E1000_WRITE_REG(hw, SWSM, swsm);
++        /* if we managed to set the bit we got the semaphore. */
++        swsm = E1000_READ_REG(hw, SWSM);
++        if (swsm & E1000_SWSM_SWESMBI)
++            break;
++
++        usec_delay(50);
++        timeout--;
++    }
++
++    if (!timeout) {
++        /* Release semaphores */
++        e1000_put_hw_eeprom_semaphore(hw);
++        DEBUGOUT("Driver can't access the Eeprom - SWESMBI bit is set.\n");
++        return -E1000_ERR_EEPROM;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ * This function clears HW semaphore bits.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - None.
++ *
++ ***************************************************************************/
++void
++e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw)
++{
++    uint32_t swsm;
++
++    DEBUGFUNC("e1000_put_hw_eeprom_semaphore");
++
++    if (!hw->eeprom_semaphore_present)
++        return;
++
++    swsm = E1000_READ_REG(hw, SWSM);
++    if (hw->mac_type == e1000_80003es2lan) {
++        /* Release both semaphores. */
++        swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++    } else
++        swsm &= ~(E1000_SWSM_SWESMBI);
++    E1000_WRITE_REG(hw, SWSM, swsm);
++}
++
++/***************************************************************************
++ *
++ * Obtaining software semaphore bit (SMBI) before resetting PHY.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_ERR_RESET if fail to obtain semaphore.
++ *            E1000_SUCCESS at any other case.
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_software_semaphore(struct e1000_hw *hw)
++{
++    int32_t timeout = hw->eeprom.word_size + 1;
++    uint32_t swsm;
++
++    DEBUGFUNC("e1000_get_software_semaphore");
++
++    if (hw->mac_type != e1000_80003es2lan)
++        return E1000_SUCCESS;
++
++    while (timeout) {
++        swsm = E1000_READ_REG(hw, SWSM);
++        /* If SMBI bit cleared, it is now set and we hold the semaphore */
++        if (!(swsm & E1000_SWSM_SMBI))
++            break;
++        msec_delay_irq(1);
++        timeout--;
++    }
++
++    if (!timeout) {
++        DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
++        return -E1000_ERR_RESET;
++    }
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Release semaphore bit (SMBI).
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ ***************************************************************************/
++void
++e1000_release_software_semaphore(struct e1000_hw *hw)
++{
++    uint32_t swsm;
++
++    DEBUGFUNC("e1000_release_software_semaphore");
++
++    if (hw->mac_type != e1000_80003es2lan)
++        return;
++
++    swsm = E1000_READ_REG(hw, SWSM);
++    /* Release the SW semaphores.*/
++    swsm &= ~E1000_SWSM_SMBI;
++    E1000_WRITE_REG(hw, SWSM, swsm);
++}
++
++/******************************************************************************
++ * Checks if PHY reset is blocked due to SOL/IDER session, for example.
++ * Returning E1000_BLK_PHY_RESET isn't necessarily an error.  But it's up to
++ * the caller to figure out how to deal with it.
++ *
++ * hw - Struct containing variables accessed by shared code
++ *
++ * returns: - E1000_BLK_PHY_RESET
++ *            E1000_SUCCESS
++ *
++ *****************************************************************************/
++int32_t
++e1000_check_phy_reset_block(struct e1000_hw *hw)
++{
++    uint32_t manc = 0;
++    uint32_t fwsm = 0;
++
++    if (hw->mac_type == e1000_ich8lan) {
++        fwsm = E1000_READ_REG(hw, FWSM);
++        return (fwsm & E1000_FWSM_RSPCIPHY) ? E1000_SUCCESS
++                                            : E1000_BLK_PHY_RESET;
++    }
++
++    if (hw->mac_type > e1000_82547_rev_2)
++        manc = E1000_READ_REG(hw, MANC);
++    return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
++	    E1000_BLK_PHY_RESET : E1000_SUCCESS;
++}
++
++uint8_t
++e1000_arc_subsystem_valid(struct e1000_hw *hw)
++{
++    uint32_t fwsm;
++
++    /* On 8257x silicon, registers in the range of 0x8800 - 0x8FFC
++     * may not be provided a DMA clock when no manageability features are
++     * enabled.  We do not want to perform any reads/writes to these registers
++     * if this is the case.  We read FWSM to determine the manageability mode.
++     */
++    switch (hw->mac_type) {
++    case e1000_82571:
++    case e1000_82572:
++    case e1000_82573:
++    case e1000_80003es2lan:
++        fwsm = E1000_READ_REG(hw, FWSM);
++        if ((fwsm & E1000_FWSM_MODE_MASK) != 0)
++            return TRUE;
++        break;
++    case e1000_ich8lan:
++        return TRUE;
++    default:
++        break;
++    }
++    return FALSE;
++}
++
++
++/******************************************************************************
++ * Configure PCI-Ex no-snoop
++ *
++ * hw - Struct containing variables accessed by shared code.
++ * no_snoop - Bitmap of no-snoop events.
++ *
++ * returns: E1000_SUCCESS
++ *
++ *****************************************************************************/
++int32_t
++e1000_set_pci_ex_no_snoop(struct e1000_hw *hw, uint32_t no_snoop)
++{
++    uint32_t gcr_reg = 0;
++
++    DEBUGFUNC("e1000_set_pci_ex_no_snoop");
++
++    if (hw->bus_type == e1000_bus_type_unknown)
++        e1000_get_bus_info(hw);
++
++    if (hw->bus_type != e1000_bus_type_pci_express)
++        return E1000_SUCCESS;
++
++    if (no_snoop) {
++        gcr_reg = E1000_READ_REG(hw, GCR);
++        gcr_reg &= ~(PCI_EX_NO_SNOOP_ALL);
++        gcr_reg |= no_snoop;
++        E1000_WRITE_REG(hw, GCR, gcr_reg);
++    }
++    if (hw->mac_type == e1000_ich8lan) {
++        uint32_t ctrl_ext;
++
++        E1000_WRITE_REG(hw, GCR, PCI_EX_82566_SNOOP_ALL);
++
++        ctrl_ext = E1000_READ_REG(hw, CTRL_EXT);
++        ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
++        E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
++    }
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Get software semaphore FLAG bit (SWFLAG).
++ * SWFLAG is used to synchronize the access to all shared resource between
++ * SW, FW and HW.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ ***************************************************************************/
++int32_t
++e1000_get_software_flag(struct e1000_hw *hw)
++{
++    int32_t timeout = PHY_CFG_TIMEOUT;
++    uint32_t extcnf_ctrl;
++
++    DEBUGFUNC("e1000_get_software_flag");
++
++    if (hw->mac_type == e1000_ich8lan) {
++        while (timeout) {
++            extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++            extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
++            E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
++
++            extcnf_ctrl = E1000_READ_REG(hw, EXTCNF_CTRL);
++            if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
++                break;
++            msec_delay_irq(1);
++            timeout--;
++        }
++
++        if (!timeout) {
++            DEBUGOUT("FW or HW locks the resource too long.\n");
++            return -E1000_ERR_CONFIG;
++        }
++    }
++
++    return E1000_SUCCESS;
++}
++
++/***************************************************************************
++ *
++ * Release software semaphore FLAG bit (SWFLAG).
++ * SWFLAG is used to synchronize the access to all shared resource between
++ * SW, FW and HW.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ ***************************************************************************/
++void
++e1000_release_software_flag(struct e1000_hw *hw)
++{
++    uint32_t extcnf_ctrl;
++
++    DEBUGFUNC("e1000_release_software_flag");
++
++    if (hw->mac_type == e1000_ich8lan) {
++        extcnf_ctrl= E1000_READ_REG(hw, EXTCNF_CTRL);
++        extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
++        E1000_WRITE_REG(hw, EXTCNF_CTRL, extcnf_ctrl);
++    }
++
++    return;
++}
++
++/***************************************************************************
++ *
++ * Disable dynamic power down mode in ife PHY.
++ * It can be used to workaround band-gap problem.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ ***************************************************************************/
++int32_t
++e1000_ife_disable_dynamic_power_down(struct e1000_hw *hw)
++{
++    uint16_t phy_data;
++    int32_t ret_val = E1000_SUCCESS;
++
++    DEBUGFUNC("e1000_ife_disable_dynamic_power_down");
++
++    if (hw->phy_type == e1000_phy_ife) {
++        ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data |=  IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN;
++        ret_val = e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, phy_data);
++    }
++
++    return ret_val;
++}
++
++/***************************************************************************
++ *
++ * Enable dynamic power down mode in ife PHY.
++ * It can be used to workaround band-gap problem.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ ***************************************************************************/
++int32_t
++e1000_ife_enable_dynamic_power_down(struct e1000_hw *hw)
++{
++    uint16_t phy_data;
++    int32_t ret_val = E1000_SUCCESS;
++
++    DEBUGFUNC("e1000_ife_enable_dynamic_power_down");
++
++    if (hw->phy_type == e1000_phy_ife) {
++        ret_val = e1000_read_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, &phy_data);
++        if (ret_val)
++            return ret_val;
++
++        phy_data &=  ~IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN;
++        ret_val = e1000_write_phy_reg(hw, IFE_PHY_SPECIAL_CONTROL, phy_data);
++    }
++
++    return ret_val;
++}
++
++/******************************************************************************
++ * Reads a 16 bit word or words from the EEPROM using the ICH8's flash access
++ * register.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of word in the EEPROM to read
++ * data - word read from the EEPROM
++ * words - number of words to read
++ *****************************************************************************/
++int32_t
++e1000_read_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, uint16_t words,
++                       uint16_t *data)
++{
++    int32_t  error = E1000_SUCCESS;
++    uint32_t flash_bank = 0;
++    uint32_t act_offset = 0;
++    uint32_t bank_offset = 0;
++    uint16_t word = 0;
++    uint16_t i = 0;
++
++    /* We need to know which is the valid flash bank.  In the event
++     * that we didn't allocate eeprom_shadow_ram, we may not be
++     * managing flash_bank.  So it cannot be trusted and needs
++     * to be updated with each read.
++     */
++    /* Value of bit 22 corresponds to the flash bank we're on. */
++    flash_bank = (E1000_READ_REG(hw, EECD) & E1000_EECD_SEC1VAL) ? 1 : 0;
++
++    /* Adjust offset appropriately if we're on bank 1 - adjust for word size */
++    bank_offset = flash_bank * (hw->flash_bank_size * 2);
++
++    error = e1000_get_software_flag(hw);
++    if (error != E1000_SUCCESS)
++        return error;
++
++    for (i = 0; i < words; i++) {
++        if (hw->eeprom_shadow_ram != NULL &&
++            hw->eeprom_shadow_ram[offset+i].modified == TRUE) {
++            data[i] = hw->eeprom_shadow_ram[offset+i].eeprom_word;
++        } else {
++            /* The NVM part needs a byte offset, hence * 2 */
++            act_offset = bank_offset + ((offset + i) * 2);
++            error = e1000_read_ich8_word(hw, act_offset, &word);
++            if (error != E1000_SUCCESS)
++                break;
++            data[i] = word;
++        }
++    }
++
++    e1000_release_software_flag(hw);
++
++    return error;
++}
++
++/******************************************************************************
++ * Writes a 16 bit word or words to the EEPROM using the ICH8's flash access
++ * register.  Actually, writes are written to the shadow ram cache in the hw
++ * structure hw->e1000_shadow_ram.  e1000_commit_shadow_ram flushes this to
++ * the NVM, which occurs when the NVM checksum is updated.
++ *
++ * hw - Struct containing variables accessed by shared code
++ * offset - offset of word in the EEPROM to write
++ * words - number of words to write
++ * data - words to write to the EEPROM
++ *****************************************************************************/
++int32_t
++e1000_write_eeprom_ich8(struct e1000_hw *hw, uint16_t offset, uint16_t words,
++                        uint16_t *data)
++{
++    uint32_t i = 0;
++    int32_t error = E1000_SUCCESS;
++
++    error = e1000_get_software_flag(hw);
++    if (error != E1000_SUCCESS)
++        return error;
++
++    /* A driver can write to the NVM only if it has eeprom_shadow_ram
++     * allocated.  Subsequent reads to the modified words are read from
++     * this cached structure as well.  Writes will only go into this
++     * cached structure unless it's followed by a call to
++     * e1000_update_eeprom_checksum() where it will commit the changes
++     * and clear the "modified" field.
++     */
++    if (hw->eeprom_shadow_ram != NULL) {
++        for (i = 0; i < words; i++) {
++            if ((offset + i) < E1000_SHADOW_RAM_WORDS) {
++                hw->eeprom_shadow_ram[offset+i].modified = TRUE;
++                hw->eeprom_shadow_ram[offset+i].eeprom_word = data[i];
++            } else {
++                error = -E1000_ERR_EEPROM;
++                break;
++            }
++        }
++    } else {
++        /* Drivers have the option to not allocate eeprom_shadow_ram as long
++         * as they don't perform any NVM writes.  An attempt in doing so
++         * will result in this error.
++         */
++        error = -E1000_ERR_EEPROM;
++    }
++
++    e1000_release_software_flag(hw);
++
++    return error;
++}
++
++/******************************************************************************
++ * This function does initial flash setup so that a new read/write/erase cycle
++ * can be started.
++ *
++ * hw - The pointer to the hw structure
++ ****************************************************************************/
++int32_t
++e1000_ich8_cycle_init(struct e1000_hw *hw)
++{
++    union ich8_hws_flash_status hsfsts;
++    int32_t error = E1000_ERR_EEPROM;
++    int32_t i     = 0;
++
++    DEBUGFUNC("e1000_ich8_cycle_init");
++
++    hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++
++    /* May be check the Flash Des Valid bit in Hw status */
++    if (hsfsts.hsf_status.fldesvalid == 0) {
++        DEBUGOUT("Flash descriptor invalid.  SW Sequencing must be used.");
++        return error;
++    }
++
++    /* Clear FCERR in Hw status by writing 1 */
++    /* Clear DAEL in Hw status by writing a 1 */
++    hsfsts.hsf_status.flcerr = 1;
++    hsfsts.hsf_status.dael = 1;
++
++    E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval);
++
++    /* Either we should have a hardware SPI cycle in progress bit to check
++     * against, in order to start a new cycle or FDONE bit should be changed
++     * in the hardware so that it is 1 after harware reset, which can then be
++     * used as an indication whether a cycle is in progress or has been
++     * completed .. we should also have some software semaphore mechanism to
++     * guard FDONE or the cycle in progress bit so that two threads access to
++     * those bits can be sequentiallized or a way so that 2 threads dont
++     * start the cycle at the same time */
++
++    if (hsfsts.hsf_status.flcinprog == 0) {
++        /* There is no cycle running at present, so we can start a cycle */
++        /* Begin by setting Flash Cycle Done. */
++        hsfsts.hsf_status.flcdone = 1;
++        E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval);
++        error = E1000_SUCCESS;
++    } else {
++        /* otherwise poll for sometime so the current cycle has a chance
++         * to end before giving up. */
++        for (i = 0; i < ICH8_FLASH_COMMAND_TIMEOUT; i++) {
++            hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++            if (hsfsts.hsf_status.flcinprog == 0) {
++                error = E1000_SUCCESS;
++                break;
++            }
++            usec_delay(1);
++        }
++        if (error == E1000_SUCCESS) {
++            /* Successful in waiting for previous cycle to timeout,
++             * now set the Flash Cycle Done. */
++            hsfsts.hsf_status.flcdone = 1;
++            E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFSTS, hsfsts.regval);
++        } else {
++            DEBUGOUT("Flash controller busy, cannot get access");
++        }
++    }
++    return error;
++}
++
++/******************************************************************************
++ * This function starts a flash cycle and waits for its completion
++ *
++ * hw - The pointer to the hw structure
++ ****************************************************************************/
++int32_t
++e1000_ich8_flash_cycle(struct e1000_hw *hw, uint32_t timeout)
++{
++    union ich8_hws_flash_ctrl hsflctl;
++    union ich8_hws_flash_status hsfsts;
++    int32_t error = E1000_ERR_EEPROM;
++    uint32_t i = 0;
++
++    /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
++    hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL);
++    hsflctl.hsf_ctrl.flcgo = 1;
++    E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval);
++
++    /* wait till FDONE bit is set to 1 */
++    do {
++        hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++        if (hsfsts.hsf_status.flcdone == 1)
++            break;
++        usec_delay(1);
++        i++;
++    } while (i < timeout);
++    if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0) {
++        error = E1000_SUCCESS;
++    }
++    return error;
++}
++
++/******************************************************************************
++ * Reads a byte or word from the NVM using the ICH8 flash access registers.
++ *
++ * hw - The pointer to the hw structure
++ * index - The index of the byte or word to read.
++ * size - Size of data to read, 1=byte 2=word
++ * data - Pointer to the word to store the value read.
++ *****************************************************************************/
++int32_t
++e1000_read_ich8_data(struct e1000_hw *hw, uint32_t index,
++                     uint32_t size, uint16_t* data)
++{
++    union ich8_hws_flash_status hsfsts;
++    union ich8_hws_flash_ctrl hsflctl;
++    uint32_t flash_linear_address;
++    uint32_t flash_data = 0;
++    int32_t error = -E1000_ERR_EEPROM;
++    int32_t count = 0;
++
++    DEBUGFUNC("e1000_read_ich8_data");
++
++    if (size < 1  || size > 2 || data == 0x0 ||
++        index > ICH8_FLASH_LINEAR_ADDR_MASK)
++        return error;
++
++    flash_linear_address = (ICH8_FLASH_LINEAR_ADDR_MASK & index) +
++                           hw->flash_base_addr;
++
++    do {
++        usec_delay(1);
++        /* Steps */
++        error = e1000_ich8_cycle_init(hw);
++        if (error != E1000_SUCCESS)
++            break;
++
++        hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL);
++        /* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++        hsflctl.hsf_ctrl.fldbcount = size - 1;
++        hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_READ;
++        E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval);
++
++        /* Write the last 24 bits of index into Flash Linear address field in
++         * Flash Address */
++        /* TODO: TBD maybe check the index against the size of flash */
++
++        E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address);
++
++        error = e1000_ich8_flash_cycle(hw, ICH8_FLASH_COMMAND_TIMEOUT);
++
++        /* Check if FCERR is set to 1, if set to 1, clear it and try the whole
++         * sequence a few more times, else read in (shift in) the Flash Data0,
++         * the order is least significant byte first msb to lsb */
++        if (error == E1000_SUCCESS) {
++            flash_data = E1000_READ_ICH8_REG(hw, ICH8_FLASH_FDATA0);
++            if (size == 1) {
++                *data = (uint8_t)(flash_data & 0x000000FF);
++            } else if (size == 2) {
++                *data = (uint16_t)(flash_data & 0x0000FFFF);
++            }
++            break;
++        } else {
++            /* If we've gotten here, then things are probably completely hosed,
++             * but if the error condition is detected, it won't hurt to give
++             * it another try...ICH8_FLASH_CYCLE_REPEAT_COUNT times.
++             */
++            hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++            if (hsfsts.hsf_status.flcerr == 1) {
++                /* Repeat for some time before giving up. */
++                continue;
++            } else if (hsfsts.hsf_status.flcdone == 0) {
++                DEBUGOUT("Timeout error - flash cycle did not complete.");
++                break;
++            }
++        }
++    } while (count++ < ICH8_FLASH_CYCLE_REPEAT_COUNT);
++
++    return error;
++}
++
++/******************************************************************************
++ * Writes One /two bytes to the NVM using the ICH8 flash access registers.
++ *
++ * hw - The pointer to the hw structure
++ * index - The index of the byte/word to read.
++ * size - Size of data to read, 1=byte 2=word
++ * data - The byte(s) to write to the NVM.
++ *****************************************************************************/
++int32_t
++e1000_write_ich8_data(struct e1000_hw *hw, uint32_t index, uint32_t size,
++                      uint16_t data)
++{
++    union ich8_hws_flash_status hsfsts;
++    union ich8_hws_flash_ctrl hsflctl;
++    uint32_t flash_linear_address;
++    uint32_t flash_data = 0;
++    int32_t error = -E1000_ERR_EEPROM;
++    int32_t count = 0;
++
++    DEBUGFUNC("e1000_write_ich8_data");
++
++    if (size < 1  || size > 2 || data > size * 0xff ||
++        index > ICH8_FLASH_LINEAR_ADDR_MASK)
++        return error;
++
++    flash_linear_address = (ICH8_FLASH_LINEAR_ADDR_MASK & index) +
++                           hw->flash_base_addr;
++
++    do {
++        usec_delay(1);
++        /* Steps */
++        error = e1000_ich8_cycle_init(hw);
++        if (error != E1000_SUCCESS)
++            break;
++
++        hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL);
++        /* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++        hsflctl.hsf_ctrl.fldbcount = size -1;
++        hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_WRITE;
++        E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval);
++
++        /* Write the last 24 bits of index into Flash Linear address field in
++         * Flash Address */
++        E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address);
++
++        if (size == 1)
++            flash_data = (uint32_t)data & 0x00FF;
++        else
++            flash_data = (uint32_t)data;
++
++        E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FDATA0, flash_data);
++
++        /* check if FCERR is set to 1 , if set to 1, clear it and try the whole
++         * sequence a few more times else done */
++        error = e1000_ich8_flash_cycle(hw, ICH8_FLASH_COMMAND_TIMEOUT);
++        if (error == E1000_SUCCESS) {
++            break;
++        } else {
++            /* If we're here, then things are most likely completely hosed,
++             * but if the error condition is detected, it won't hurt to give
++             * it another try...ICH8_FLASH_CYCLE_REPEAT_COUNT times.
++             */
++            hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++            if (hsfsts.hsf_status.flcerr == 1) {
++                /* Repeat for some time before giving up. */
++                continue;
++            } else if (hsfsts.hsf_status.flcdone == 0) {
++                DEBUGOUT("Timeout error - flash cycle did not complete.");
++                break;
++            }
++        }
++    } while (count++ < ICH8_FLASH_CYCLE_REPEAT_COUNT);
++
++    return error;
++}
++
++/******************************************************************************
++ * Reads a single byte from the NVM using the ICH8 flash access registers.
++ *
++ * hw - pointer to e1000_hw structure
++ * index - The index of the byte to read.
++ * data - Pointer to a byte to store the value read.
++ *****************************************************************************/
++int32_t
++e1000_read_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t* data)
++{
++    int32_t status = E1000_SUCCESS;
++    uint16_t word = 0;
++
++    status = e1000_read_ich8_data(hw, index, 1, &word);
++    if (status == E1000_SUCCESS) {
++        *data = (uint8_t)word;
++    }
++
++    return status;
++}
++
++/******************************************************************************
++ * Writes a single byte to the NVM using the ICH8 flash access registers.
++ * Performs verification by reading back the value and then going through
++ * a retry algorithm before giving up.
++ *
++ * hw - pointer to e1000_hw structure
++ * index - The index of the byte to write.
++ * byte - The byte to write to the NVM.
++ *****************************************************************************/
++int32_t
++e1000_verify_write_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t byte)
++{
++    int32_t error = E1000_SUCCESS;
++    int32_t program_retries;
++    uint8_t temp_byte = 0;
++
++    e1000_write_ich8_byte(hw, index, byte);
++    usec_delay(100);
++
++    for (program_retries = 0; program_retries < 100; program_retries++) {
++        e1000_read_ich8_byte(hw, index, &temp_byte);
++        if (temp_byte == byte)
++            break;
++        usec_delay(10);
++        e1000_write_ich8_byte(hw, index, byte);
++        usec_delay(100);
++    }
++    if (program_retries == 100)
++        error = E1000_ERR_EEPROM;
++
++    return error;
++}
++
++/******************************************************************************
++ * Writes a single byte to the NVM using the ICH8 flash access registers.
++ *
++ * hw - pointer to e1000_hw structure
++ * index - The index of the byte to read.
++ * data - The byte to write to the NVM.
++ *****************************************************************************/
++int32_t
++e1000_write_ich8_byte(struct e1000_hw *hw, uint32_t index, uint8_t data)
++{
++    int32_t status = E1000_SUCCESS;
++    uint16_t word = (uint16_t)data;
++
++    status = e1000_write_ich8_data(hw, index, 1, word);
++
++    return status;
++}
++
++/******************************************************************************
++ * Reads a word from the NVM using the ICH8 flash access registers.
++ *
++ * hw - pointer to e1000_hw structure
++ * index - The starting byte index of the word to read.
++ * data - Pointer to a word to store the value read.
++ *****************************************************************************/
++int32_t
++e1000_read_ich8_word(struct e1000_hw *hw, uint32_t index, uint16_t *data)
++{
++    int32_t status = E1000_SUCCESS;
++    status = e1000_read_ich8_data(hw, index, 2, data);
++    return status;
++}
++
++/******************************************************************************
++ * Writes a word to the NVM using the ICH8 flash access registers.
++ *
++ * hw - pointer to e1000_hw structure
++ * index - The starting byte index of the word to read.
++ * data - The word to write to the NVM.
++ *****************************************************************************/
++int32_t
++e1000_write_ich8_word(struct e1000_hw *hw, uint32_t index, uint16_t data)
++{
++    int32_t status = E1000_SUCCESS;
++    status = e1000_write_ich8_data(hw, index, 2, data);
++    return status;
++}
++
++/******************************************************************************
++ * Erases the bank specified. Each bank is a 4k block. Segments are 0 based.
++ * segment N is 4096 * N + flash_reg_addr.
++ *
++ * hw - pointer to e1000_hw structure
++ * segment - 0 for first segment, 1 for second segment, etc.
++ *****************************************************************************/
++int32_t
++e1000_erase_ich8_4k_segment(struct e1000_hw *hw, uint32_t segment)
++{
++    union ich8_hws_flash_status hsfsts;
++    union ich8_hws_flash_ctrl hsflctl;
++    uint32_t flash_linear_address;
++    int32_t  count = 0;
++    int32_t  error = E1000_ERR_EEPROM;
++    int32_t  iteration, seg_size;
++    int32_t  sector_size;
++    int32_t  j = 0;
++    int32_t  error_flag = 0;
++
++    hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++
++    /* Determine HW Sector size: Read BERASE bits of Hw flash Status register */
++    /* 00: The Hw sector is 256 bytes, hence we need to erase 16
++     *     consecutive sectors.  The start index for the nth Hw sector can be
++     *     calculated as = segment * 4096 + n * 256
++     * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector.
++     *     The start index for the nth Hw sector can be calculated
++     *     as = segment * 4096
++     * 10: Error condition
++     * 11: The Hw sector size is much bigger than the size asked to
++     *     erase...error condition */
++    if (hsfsts.hsf_status.berasesz == 0x0) {
++        /* Hw sector size 256 */
++        sector_size = seg_size = ICH8_FLASH_SEG_SIZE_256;
++        iteration = ICH8_FLASH_SECTOR_SIZE / ICH8_FLASH_SEG_SIZE_256;
++    } else if (hsfsts.hsf_status.berasesz == 0x1) {
++        sector_size = seg_size = ICH8_FLASH_SEG_SIZE_4K;
++        iteration = 1;
++    } else if (hsfsts.hsf_status.berasesz == 0x3) {
++        sector_size = seg_size = ICH8_FLASH_SEG_SIZE_64K;
++        iteration = 1;
++    } else {
++        return error;
++    }
++
++    for (j = 0; j < iteration ; j++) {
++        do {
++            count++;
++            /* Steps */
++            error = e1000_ich8_cycle_init(hw);
++            if (error != E1000_SUCCESS) {
++                error_flag = 1;
++                break;
++            }
++
++            /* Write a value 11 (block Erase) in Flash Cycle field in Hw flash
++             * Control */
++            hsflctl.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFCTL);
++            hsflctl.hsf_ctrl.flcycle = ICH8_CYCLE_ERASE;
++            E1000_WRITE_ICH8_REG16(hw, ICH8_FLASH_HSFCTL, hsflctl.regval);
++
++            /* Write the last 24 bits of an index within the block into Flash
++             * Linear address field in Flash Address.  This probably needs to
++             * be calculated here based off the on-chip segment size and the
++             * software segment size assumed (4K) */
++            /* TBD */
++            flash_linear_address = segment * sector_size + j * seg_size;
++            flash_linear_address &= ICH8_FLASH_LINEAR_ADDR_MASK;
++            flash_linear_address += hw->flash_base_addr;
++
++            E1000_WRITE_ICH8_REG(hw, ICH8_FLASH_FADDR, flash_linear_address);
++
++            error = e1000_ich8_flash_cycle(hw, 1000000);
++            /* Check if FCERR is set to 1.  If 1, clear it and try the whole
++             * sequence a few more times else Done */
++            if (error == E1000_SUCCESS) {
++                break;
++            } else {
++                hsfsts.regval = E1000_READ_ICH8_REG16(hw, ICH8_FLASH_HSFSTS);
++                if (hsfsts.hsf_status.flcerr == 1) {
++                    /* repeat for some time before giving up */
++                    continue;
++                } else if (hsfsts.hsf_status.flcdone == 0) {
++                    error_flag = 1;
++                    break;
++                }
++            }
++        } while ((count < ICH8_FLASH_CYCLE_REPEAT_COUNT) && !error_flag);
++        if (error_flag == 1)
++            break;
++    }
++    if (error_flag != 1)
++        error = E1000_SUCCESS;
++    return error;
++}
++
++/******************************************************************************
++ *
++ * Reverse duplex setting without breaking the link.
++ *
++ * hw: Struct containing variables accessed by shared code
++ *
++ *****************************************************************************/
++int32_t
++e1000_duplex_reversal(struct e1000_hw *hw)
++{
++    int32_t ret_val;
++    uint16_t phy_data;
++
++    if (hw->phy_type != e1000_phy_igp_3)
++        return E1000_SUCCESS;
++
++    ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data ^= MII_CR_FULL_DUPLEX;
++
++    ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
++    if (ret_val)
++        return ret_val;
++
++    ret_val = e1000_read_phy_reg(hw, IGP3E1000_PHY_MISC_CTRL, &phy_data);
++    if (ret_val)
++        return ret_val;
++
++    phy_data |= IGP3_PHY_MISC_DUPLEX_MANUAL_SET;
++    ret_val = e1000_write_phy_reg(hw, IGP3E1000_PHY_MISC_CTRL, phy_data);
++
++    return ret_val;
++}
++
++int32_t
++e1000_init_lcd_from_nvm_config_region(struct e1000_hw *hw,
++                                      uint32_t cnf_base_addr, uint32_t cnf_size)
++{
++    uint32_t ret_val = E1000_SUCCESS;
++    uint16_t word_addr, reg_data, reg_addr;
++    uint16_t i;
++
++    /* cnf_base_addr is in DWORD */
++    word_addr = (uint16_t)(cnf_base_addr << 1);
++
++    /* cnf_size is returned in size of dwords */
++    for (i = 0; i < cnf_size; i++) {
++        ret_val = e1000_read_eeprom(hw, (word_addr + i*2), 1, &reg_data);
++        if (ret_val)
++            return ret_val;
++
++        ret_val = e1000_read_eeprom(hw, (word_addr + i*2 + 1), 1, &reg_addr);
++        if (ret_val)
++            return ret_val;
++
++        ret_val = e1000_get_software_flag(hw);
++        if (ret_val != E1000_SUCCESS)
++            return ret_val;
++
++        ret_val = e1000_write_phy_reg_ex(hw, (uint32_t)reg_addr, reg_data);
++
++        e1000_release_software_flag(hw);
++    }
++
++    return ret_val;
++}
++
++
++int32_t
++e1000_init_lcd_from_nvm(struct e1000_hw *hw)
++{
++    uint32_t reg_data, cnf_base_addr, cnf_size, ret_val, loop;
++
++    if (hw->phy_type != e1000_phy_igp_3)
++          return E1000_SUCCESS;
++
++    /* Check if SW needs configure the PHY */
++    reg_data = E1000_READ_REG(hw, FEXTNVM);
++    if (!(reg_data & FEXTNVM_SW_CONFIG))
++        return E1000_SUCCESS;
++
++    /* Wait for basic configuration completes before proceeding*/
++    loop = 0;
++    do {
++        reg_data = E1000_READ_REG(hw, STATUS) & E1000_STATUS_LAN_INIT_DONE;
++        usec_delay(100);
++        loop++;
++    } while ((!reg_data) && (loop < 50));
++
++    /* Clear the Init Done bit for the next init event */
++    reg_data = E1000_READ_REG(hw, STATUS);
++    reg_data &= ~E1000_STATUS_LAN_INIT_DONE;
++    E1000_WRITE_REG(hw, STATUS, reg_data);
++
++    /* Make sure HW does not configure LCD from PHY extended configuration
++       before SW configuration */
++    reg_data = E1000_READ_REG(hw, EXTCNF_CTRL);
++    if ((reg_data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE) == 0x0000) {
++        reg_data = E1000_READ_REG(hw, EXTCNF_SIZE);
++        cnf_size = reg_data & E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH;
++        cnf_size >>= 16;
++        if (cnf_size) {
++            reg_data = E1000_READ_REG(hw, EXTCNF_CTRL);
++            cnf_base_addr = reg_data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER;
++            /* cnf_base_addr is in DWORD */
++            cnf_base_addr >>= 16;
++
++            /* Configure LCD from extended configuration region. */
++            ret_val = e1000_init_lcd_from_nvm_config_region(hw, cnf_base_addr,
++                                                            cnf_size);
++            if (ret_val)
++                return ret_val;
++        }
++    }
++
++    return E1000_SUCCESS;
++}
++
++
++
+--- linux/drivers/xenomai/net/drivers/e1000/e1000_param.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000/e1000_param.c	2021-04-07 16:01:27.381633946 +0800
+@@ -0,0 +1,906 @@
++/*******************************************************************************
++
++  
++  Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved.
++  
++  This program is free software; you can redistribute it and/or modify it 
++  under the terms of the GNU General Public License as published by the Free 
++  Software Foundation; either version 2 of the License, or (at your option) 
++  any later version.
++  
++  This program is distributed in the hope that it will be useful, but WITHOUT 
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
++  more details.
++  
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc., 59 
++  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
++  
++  The full GNU General Public License is included in this distribution in the
++  file called LICENSE.
++  
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000.h"
++
++/* This is the only thing that needs to be changed to adjust the
++ * maximum number of ports that the driver can manage.
++ */
++
++#define E1000_MAX_NIC 32
++
++#define OPTION_UNSET   -1
++#define OPTION_DISABLED 0
++#define OPTION_ENABLED  1
++
++/* All parameters are treated the same, as an integer array of values.
++ * This macro just reduces the need to repeat the same declaration code
++ * over and over (plus this helps to avoid typo bugs).
++ */
++
++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
++#ifndef module_param_array
++/* Module Parameters are always initialized to -1, so that the driver
++ * can tell the difference between no user specified value or the
++ * user asking for the default value.
++ * The true default values are loaded in when e1000_check_options is called.
++ *
++ * This is a GCC extension to ANSI C.
++ * See the item "Labeled Elements in Initializers" in the section
++ * "Extensions to the C Language Family" of the GCC documentation.
++ */
++
++#define E1000_PARAM(X, desc) \
++	static const int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
++	MODULE_PARM(X, "1-" __MODULE_STRING(E1000_MAX_NIC) "i"); \
++	MODULE_PARM_DESC(X, desc);
++#else
++#define E1000_PARAM(X, desc) \
++	static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
++	static int num_##X = 0; \
++	module_param_array_named(X, X, int, &num_##X, 0); \
++	MODULE_PARM_DESC(X, desc);
++#endif
++
++/* Transmit Descriptor Count
++ *
++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
++ * Valid Range: 80-4096 for 82544 and newer
++ *
++ * Default Value: 256
++ */
++
++E1000_PARAM(TxDescriptors, "Number of transmit descriptors");
++
++/* Receive Descriptor Count
++ *
++ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
++ * Valid Range: 80-4096 for 82544 and newer
++ *
++ * Default Value: 256
++ */
++
++E1000_PARAM(RxDescriptors, "Number of receive descriptors");
++
++/* User Specified Speed Override
++ *
++ * Valid Range: 0, 10, 100, 1000
++ *  - 0    - auto-negotiate at all supported speeds
++ *  - 10   - only link at 10 Mbps
++ *  - 100  - only link at 100 Mbps
++ *  - 1000 - only link at 1000 Mbps
++ *
++ * Default Value: 0
++ */
++
++E1000_PARAM(Speed, "Speed setting");
++
++/* User Specified Duplex Override
++ *
++ * Valid Range: 0-2
++ *  - 0 - auto-negotiate for duplex
++ *  - 1 - only link at half duplex
++ *  - 2 - only link at full duplex
++ *
++ * Default Value: 0
++ */
++
++E1000_PARAM(Duplex, "Duplex setting");
++
++/* Auto-negotiation Advertisement Override
++ *
++ * Valid Range: 0x01-0x0F, 0x20-0x2F (copper); 0x20 (fiber)
++ *
++ * The AutoNeg value is a bit mask describing which speed and duplex
++ * combinations should be advertised during auto-negotiation.
++ * The supported speed and duplex modes are listed below
++ *
++ * Bit           7     6     5      4      3     2     1      0
++ * Speed (Mbps)  N/A   N/A   1000   N/A    100   100   10     10
++ * Duplex                    Full          Full  Half  Full   Half
++ *
++ * Default Value: 0x2F (copper); 0x20 (fiber)
++ */
++
++E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting");
++
++/* User Specified Flow Control Override
++ *
++ * Valid Range: 0-3
++ *  - 0 - No Flow Control
++ *  - 1 - Rx only, respond to PAUSE frames but do not generate them
++ *  - 2 - Tx only, generate PAUSE frames but ignore them on receive
++ *  - 3 - Full Flow Control Support
++ *
++ * Default Value: Read flow control settings from the EEPROM
++ */
++
++E1000_PARAM(FlowControl, "Flow Control setting");
++
++/* XsumRX - Receive Checksum Offload Enable/Disable
++ *
++ * Valid Range: 0, 1
++ *  - 0 - disables all checksum offload
++ *  - 1 - enables receive IP/TCP/UDP checksum offload
++ *        on 82543 and newer -based NICs
++ *
++ * Default Value: 1
++ */
++
++E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
++
++/* Transmit Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ *
++ * Default Value: 0 for rtnet
++ */
++
++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
++
++/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ *
++ * Default Value: 0
++ */
++
++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
++
++/* Receive Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ *
++ * Default Value: 0
++ */
++
++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
++
++/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ *
++ * Default Value: 0 for rtnet
++ */
++
++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
++
++/* Interrupt Throttle Rate (interrupts/sec)
++ *
++ * Valid Range: 100-100000 (0=off, 1=dynamic)
++ *
++ * Default Value: 0 for rtnet
++ */
++
++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
++
++/* Enable Smart Power Down of the PHY
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 0 (disabled)
++ */
++
++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down");
++
++/* Enable Kumeran Lock Loss workaround
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 1 (enabled)
++ */
++
++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround");
++
++#define AUTONEG_ADV_DEFAULT  0x2F
++#define AUTONEG_ADV_MASK     0x2F
++#define FLOW_CONTROL_DEFAULT FLOW_CONTROL_FULL
++
++#define DEFAULT_RDTR                   0
++#define MAX_RXDELAY               0xFFFF
++#define MIN_RXDELAY                    0
++
++#define DEFAULT_RADV                   0
++#define MAX_RXABSDELAY            0xFFFF
++#define MIN_RXABSDELAY                 0
++
++#define DEFAULT_TIDV                   0 
++#define MAX_TXDELAY               0xFFFF
++#define MIN_TXDELAY                    0
++
++#define DEFAULT_TADV                   0
++#define MAX_TXABSDELAY            0xFFFF
++#define MIN_TXABSDELAY                 0
++
++#define DEFAULT_ITR                    0
++#define MAX_ITR                   100000
++#define MIN_ITR                      100
++
++struct e1000_option {
++	enum { enable_option, range_option, list_option } type;
++	char *name;
++	char *err;
++	int  def;
++	union {
++		struct { /* range_option info */
++			int min;
++			int max;
++		} r;
++		struct { /* list_option info */
++			int nr;
++			struct e1000_opt_list { int i; char *str; } *p;
++		} l;
++	} arg;
++};
++
++static int e1000_validate_option(int *value, struct e1000_option *opt,
++		struct e1000_adapter *adapter)
++{
++	if (*value == OPTION_UNSET) {
++		*value = opt->def;
++		return 0;
++	}
++
++	switch (opt->type) {
++	case enable_option:
++		switch (*value) {
++		case OPTION_ENABLED:
++			DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name);
++			return 0;
++		case OPTION_DISABLED:
++			DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name);
++			return 0;
++		}
++		break;
++	case range_option:
++		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
++			DPRINTK(PROBE, INFO,
++					"%s set to %i\n", opt->name, *value);
++			return 0;
++		}
++		break;
++	case list_option: {
++		int i;
++		struct e1000_opt_list *ent;
++
++		for (i = 0; i < opt->arg.l.nr; i++) {
++			ent = &opt->arg.l.p[i];
++			if (*value == ent->i) {
++				if (ent->str[0] != '\0')
++					DPRINTK(PROBE, INFO, "%s\n", ent->str);
++				return 0;
++			}
++		}
++	}
++		break;
++	default:
++		BUG();
++	}
++
++	DPRINTK(PROBE, INFO, "Invalid %s value specified (%i) %s\n",
++	       opt->name, *value, opt->err);
++	*value = opt->def;
++	return -1;
++}
++
++static void e1000_check_fiber_options(struct e1000_adapter *adapter);
++static void e1000_check_copper_options(struct e1000_adapter *adapter);
++
++/**
++ * e1000_check_options - Range Checking for Command Line Parameters
++ * @adapter: board private structure
++ *
++ * This routine checks all command line parameters for valid user
++ * input.  If an invalid value is given, or if no user specified
++ * value exists, a default value is used.  The final value is stored
++ * in a variable in the adapter structure.
++ **/
++
++void e1000_check_options(struct e1000_adapter *adapter)
++{
++	int bd = adapter->bd_number;
++	if (bd >= E1000_MAX_NIC) {
++		DPRINTK(PROBE, NOTICE,
++		       "Warning: no configuration for board #%i\n", bd);
++		DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
++#ifndef module_param_array
++		bd = E1000_MAX_NIC;
++#endif
++	}
++
++	{ /* Transmit Descriptor Count */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Descriptors",
++			.err  = "using default of "
++				__MODULE_STRING(E1000_DEFAULT_TXD),
++			.def  = E1000_DEFAULT_TXD,
++			.arg  = { .r = { .min = E1000_MIN_TXD }}
++		};
++		struct e1000_tx_ring *tx_ring = adapter->tx_ring;
++		int i;
++		e1000_mac_type mac_type = adapter->hw.mac_type;
++		opt.arg.r.max = mac_type < e1000_82544 ?
++			E1000_MAX_TXD : E1000_MAX_82544_TXD;
++
++#ifdef module_param_array
++		if (num_TxDescriptors > bd) {
++#endif
++			tx_ring->count = TxDescriptors[bd];
++			e1000_validate_option(&tx_ring->count, &opt, adapter);
++			E1000_ROUNDUP(tx_ring->count,
++						REQ_TX_DESCRIPTOR_MULTIPLE);
++#ifdef module_param_array
++		} else {
++			tx_ring->count = opt.def;
++		}
++#endif
++		for (i = 0; i < adapter->num_tx_queues; i++)
++			tx_ring[i].count = tx_ring->count;
++	}
++	{ /* Receive Descriptor Count */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Descriptors",
++			.err  = "using default of "
++				__MODULE_STRING(E1000_DEFAULT_RXD),
++			.def  = E1000_DEFAULT_RXD,
++			.arg  = { .r = { .min = E1000_MIN_RXD }}
++		};
++		struct e1000_rx_ring *rx_ring = adapter->rx_ring;
++		int i;
++		e1000_mac_type mac_type = adapter->hw.mac_type;
++		opt.arg.r.max = mac_type < e1000_82544 ? E1000_MAX_RXD :
++			E1000_MAX_82544_RXD;
++
++#ifdef module_param_array
++		if (num_RxDescriptors > bd) {
++#endif
++			rx_ring->count = RxDescriptors[bd];
++			e1000_validate_option(&rx_ring->count, &opt, adapter);
++			E1000_ROUNDUP(rx_ring->count,
++						REQ_RX_DESCRIPTOR_MULTIPLE);
++#ifdef module_param_array
++		} else {
++			rx_ring->count = opt.def;
++		}
++#endif
++		for (i = 0; i < adapter->num_rx_queues; i++)
++			rx_ring[i].count = rx_ring->count;
++	}
++	{ /* Checksum Offload Enable/Disable */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Checksum Offload",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++#ifdef module_param_array
++		if (num_XsumRX > bd) {
++#endif
++			int rx_csum = XsumRX[bd];
++			e1000_validate_option(&rx_csum, &opt, adapter);
++			adapter->rx_csum = rx_csum;
++#ifdef module_param_array
++		} else {
++			adapter->rx_csum = opt.def;
++		}
++#endif
++	}
++	{ /* Flow Control */
++
++		struct e1000_opt_list fc_list[] =
++			{{ e1000_fc_none,    "Flow Control Disabled" },
++			 { e1000_fc_rx_pause,"Flow Control Receive Only" },
++			 { e1000_fc_tx_pause,"Flow Control Transmit Only" },
++			 { e1000_fc_full,    "Flow Control Enabled" },
++			 { e1000_fc_default, "Flow Control Hardware Default" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Flow Control",
++			.err  = "reading default settings from EEPROM",
++			.def  = e1000_fc_default,
++			.arg  = { .l = { .nr = ARRAY_SIZE(fc_list),
++					 .p = fc_list }}
++		};
++
++#ifdef module_param_array
++		if (num_FlowControl > bd) {
++#endif
++			int fc = FlowControl[bd];
++			e1000_validate_option(&fc, &opt, adapter);
++			adapter->hw.fc = adapter->hw.original_fc = fc;
++#ifdef module_param_array
++		} else {
++			adapter->hw.fc = adapter->hw.original_fc = opt.def;
++		}
++#endif
++	}
++	{ /* Transmit Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_TIDV),
++			.def  = DEFAULT_TIDV,
++			.arg  = { .r = { .min = MIN_TXDELAY,
++					 .max = MAX_TXDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_TxIntDelay > bd) {
++#endif
++			adapter->tx_int_delay = TxIntDelay[bd];
++			e1000_validate_option(&adapter->tx_int_delay, &opt,
++								adapter);
++#ifdef module_param_array
++		} else {
++			adapter->tx_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Transmit Absolute Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Absolute Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_TADV),
++			.def  = DEFAULT_TADV,
++			.arg  = { .r = { .min = MIN_TXABSDELAY,
++					 .max = MAX_TXABSDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_TxAbsIntDelay > bd) {
++#endif
++			adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
++								adapter);
++#ifdef module_param_array
++		} else {
++			adapter->tx_abs_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Receive Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_RDTR),
++			.def  = DEFAULT_RDTR,
++			.arg  = { .r = { .min = MIN_RXDELAY,
++					 .max = MAX_RXDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_RxIntDelay > bd) {
++#endif
++			adapter->rx_int_delay = RxIntDelay[bd];
++			e1000_validate_option(&adapter->rx_int_delay, &opt,
++								adapter);
++#ifdef module_param_array
++		} else {
++			adapter->rx_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Receive Absolute Interrupt Delay */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Absolute Interrupt Delay",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_RADV),
++			.def  = DEFAULT_RADV,
++			.arg  = { .r = { .min = MIN_RXABSDELAY,
++					 .max = MAX_RXABSDELAY }}
++		};
++
++#ifdef module_param_array
++		if (num_RxAbsIntDelay > bd) {
++#endif
++			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
++								adapter);
++#ifdef module_param_array
++		} else {
++			adapter->rx_abs_int_delay = opt.def;
++		}
++#endif
++	}
++	{ /* Interrupt Throttling Rate */
++		struct e1000_option opt = {
++			.type = range_option,
++			.name = "Interrupt Throttling Rate (ints/sec)",
++			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
++			.def  = DEFAULT_ITR,
++			.arg  = { .r = { .min = MIN_ITR,
++					 .max = MAX_ITR }}
++		};
++
++#ifdef module_param_array
++		if (num_InterruptThrottleRate > bd) {
++#endif
++			adapter->itr = InterruptThrottleRate[bd];
++			switch (adapter->itr) {
++			case 0:
++				DPRINTK(PROBE, INFO, "%s turned off\n",
++					opt.name);
++				break;
++			case 1:
++				DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
++					opt.name);
++				break;
++			default:
++				e1000_validate_option(&adapter->itr, &opt,
++					adapter);
++				break;
++			}
++#ifdef module_param_array
++		} else {
++			adapter->itr = opt.def;
++		}
++#endif
++	}
++	{ /* Smart Power Down */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "PHY Smart Power Down",
++			.err  = "defaulting to Disabled",
++			.def  = OPTION_DISABLED
++		};
++
++#ifdef module_param_array
++		if (num_SmartPowerDownEnable > bd) {
++#endif
++			int spd = SmartPowerDownEnable[bd];
++			e1000_validate_option(&spd, &opt, adapter);
++			adapter->smart_power_down = spd;
++#ifdef module_param_array
++		} else {
++			adapter->smart_power_down = opt.def;
++		}
++#endif
++	}
++	{ /* Kumeran Lock Loss Workaround */
++		struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Kumeran Lock Loss Workaround",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++#ifdef module_param_array
++		if (num_KumeranLockLoss > bd) {
++#endif
++			int kmrn_lock_loss = KumeranLockLoss[bd];
++			e1000_validate_option(&kmrn_lock_loss, &opt, adapter);
++			adapter->hw.kmrn_lock_loss_workaround_disabled = !kmrn_lock_loss;
++#ifdef module_param_array
++		} else {
++			adapter->hw.kmrn_lock_loss_workaround_disabled = !opt.def;
++		}
++#endif
++	}
++
++	switch (adapter->hw.media_type) {
++	case e1000_media_type_fiber:
++	case e1000_media_type_internal_serdes:
++		e1000_check_fiber_options(adapter);
++		break;
++	case e1000_media_type_copper:
++		e1000_check_copper_options(adapter);
++		break;
++	default:
++		BUG();
++	}
++}
++
++/**
++ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version
++ * @adapter: board private structure
++ *
++ * Handles speed and duplex options on fiber adapters
++ **/
++
++static void e1000_check_fiber_options(struct e1000_adapter *adapter)
++{
++	int bd = adapter->bd_number;
++#ifndef module_param_array
++	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
++	if ((Speed[bd] != OPTION_UNSET)) {
++#else
++	if (num_Speed > bd) {
++#endif
++		DPRINTK(PROBE, INFO, "Speed not valid for fiber adapters, "
++		       "parameter ignored\n");
++	}
++
++#ifndef module_param_array
++	if ((Duplex[bd] != OPTION_UNSET)) {
++#else
++	if (num_Duplex > bd) {
++#endif
++		DPRINTK(PROBE, INFO, "Duplex not valid for fiber adapters, "
++		       "parameter ignored\n");
++	}
++
++#ifndef module_param_array
++	if ((AutoNeg[bd] != OPTION_UNSET) && (AutoNeg[bd] != 0x20)) {
++#else
++	if ((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) {
++#endif
++		DPRINTK(PROBE, INFO, "AutoNeg other than 1000/Full is "
++				 "not valid for fiber adapters, "
++				 "parameter ignored\n");
++	}
++}
++
++/**
++ * e1000_check_copper_options - Range Checking for Link Options, Copper Version
++ * @adapter: board private structure
++ *
++ * Handles speed and duplex options on copper adapters
++ **/
++
++static void e1000_check_copper_options(struct e1000_adapter *adapter)
++{
++	int speed, dplx, an;
++	int bd = adapter->bd_number;
++#ifndef module_param_array
++	bd = bd > E1000_MAX_NIC ? E1000_MAX_NIC : bd;
++#endif
++
++	{ /* Speed */
++		struct e1000_opt_list speed_list[] = {{          0, "" },
++						      {   SPEED_10, "" },
++						      {  SPEED_100, "" },
++						      { SPEED_1000, "" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Speed",
++			.err  = "parameter ignored",
++			.def  = 0,
++			.arg  = { .l = { .nr = ARRAY_SIZE(speed_list),
++					 .p = speed_list }}
++		};
++
++#ifdef module_param_array
++		if (num_Speed > bd) {
++#endif
++			speed = Speed[bd];
++			e1000_validate_option(&speed, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			speed = opt.def;
++		}
++#endif
++	}
++	{ /* Duplex */
++		struct e1000_opt_list dplx_list[] = {{           0, "" },
++						     { HALF_DUPLEX, "" },
++						     { FULL_DUPLEX, "" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "Duplex",
++			.err  = "parameter ignored",
++			.def  = 0,
++			.arg  = { .l = { .nr = ARRAY_SIZE(dplx_list),
++					 .p = dplx_list }}
++		};
++
++		if (e1000_check_phy_reset_block(&adapter->hw)) {
++			DPRINTK(PROBE, INFO,
++				"Link active due to SoL/IDER Session. "
++			        "Speed/Duplex/AutoNeg parameter ignored.\n");
++			return;
++		}
++#ifdef module_param_array
++		if (num_Duplex > bd) {
++#endif
++			dplx = Duplex[bd];
++			e1000_validate_option(&dplx, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			dplx = opt.def;
++		}
++#endif
++	}
++
++#ifdef module_param_array
++	if ((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) {
++#else
++	if (AutoNeg[bd] != OPTION_UNSET && (speed != 0 || dplx != 0)) {
++#endif
++		DPRINTK(PROBE, INFO,
++		       "AutoNeg specified along with Speed or Duplex, "
++		       "parameter ignored\n");
++		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
++	} else { /* Autoneg */
++		struct e1000_opt_list an_list[] =
++			#define AA "AutoNeg advertising "
++			{{ 0x01, AA "10/HD" },
++			 { 0x02, AA "10/FD" },
++			 { 0x03, AA "10/FD, 10/HD" },
++			 { 0x04, AA "100/HD" },
++			 { 0x05, AA "100/HD, 10/HD" },
++			 { 0x06, AA "100/HD, 10/FD" },
++			 { 0x07, AA "100/HD, 10/FD, 10/HD" },
++			 { 0x08, AA "100/FD" },
++			 { 0x09, AA "100/FD, 10/HD" },
++			 { 0x0a, AA "100/FD, 10/FD" },
++			 { 0x0b, AA "100/FD, 10/FD, 10/HD" },
++			 { 0x0c, AA "100/FD, 100/HD" },
++			 { 0x0d, AA "100/FD, 100/HD, 10/HD" },
++			 { 0x0e, AA "100/FD, 100/HD, 10/FD" },
++			 { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" },
++			 { 0x20, AA "1000/FD" },
++			 { 0x21, AA "1000/FD, 10/HD" },
++			 { 0x22, AA "1000/FD, 10/FD" },
++			 { 0x23, AA "1000/FD, 10/FD, 10/HD" },
++			 { 0x24, AA "1000/FD, 100/HD" },
++			 { 0x25, AA "1000/FD, 100/HD, 10/HD" },
++			 { 0x26, AA "1000/FD, 100/HD, 10/FD" },
++			 { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" },
++			 { 0x28, AA "1000/FD, 100/FD" },
++			 { 0x29, AA "1000/FD, 100/FD, 10/HD" },
++			 { 0x2a, AA "1000/FD, 100/FD, 10/FD" },
++			 { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" },
++			 { 0x2c, AA "1000/FD, 100/FD, 100/HD" },
++			 { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" },
++			 { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" },
++			 { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }};
++
++		struct e1000_option opt = {
++			.type = list_option,
++			.name = "AutoNeg",
++			.err  = "parameter ignored",
++			.def  = AUTONEG_ADV_DEFAULT,
++			.arg  = { .l = { .nr = ARRAY_SIZE(an_list),
++					 .p = an_list }}
++		};
++
++#ifdef module_param_array
++		if (num_AutoNeg > bd) {
++#endif
++			an = AutoNeg[bd];
++			e1000_validate_option(&an, &opt, adapter);
++#ifdef module_param_array
++		} else {
++			an = opt.def;
++		}
++#endif
++		adapter->hw.autoneg_advertised = an;
++	}
++
++	switch (speed + dplx) {
++	case 0:
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++#ifdef module_param_array
++		if ((num_Speed > bd) && (speed != 0 || dplx != 0))
++#else
++		if (Speed[bd] != OPTION_UNSET || Duplex[bd] != OPTION_UNSET)
++#endif
++			DPRINTK(PROBE, INFO,
++			       "Speed and duplex autonegotiation enabled\n");
++		break;
++	case HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Half Duplex specified without Speed\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"Half Duplex only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
++		                                 ADVERTISE_100_HALF;
++		break;
++	case FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Full Duplex specified without Speed\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"Full Duplex only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_10_FULL |
++		                                 ADVERTISE_100_FULL |
++		                                 ADVERTISE_1000_FULL;
++		break;
++	case SPEED_10:
++		DPRINTK(PROBE, INFO, "10 Mbps Speed specified "
++			"without Duplex\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at 10 Mbps only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
++		                                 ADVERTISE_10_FULL;
++		break;
++	case SPEED_10 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Half Duplex\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 0;
++		adapter->hw.forced_speed_duplex = e1000_10_half;
++		adapter->hw.autoneg_advertised = 0;
++		break;
++	case SPEED_10 + FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 10 Mbps Full Duplex\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 0;
++		adapter->hw.forced_speed_duplex = e1000_10_full;
++		adapter->hw.autoneg_advertised = 0;
++		break;
++	case SPEED_100:
++		DPRINTK(PROBE, INFO, "100 Mbps Speed specified "
++			"without Duplex\n");
++		DPRINTK(PROBE, INFO, "Using Autonegotiation at "
++			"100 Mbps only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_100_HALF |
++		                                 ADVERTISE_100_FULL;
++		break;
++	case SPEED_100 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Half Duplex\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 0;
++		adapter->hw.forced_speed_duplex = e1000_100_half;
++		adapter->hw.autoneg_advertised = 0;
++		break;
++	case SPEED_100 + FULL_DUPLEX:
++		DPRINTK(PROBE, INFO, "Forcing to 100 Mbps Full Duplex\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 0;
++		adapter->hw.forced_speed_duplex = e1000_100_full;
++		adapter->hw.autoneg_advertised = 0;
++		break;
++	case SPEED_1000:
++		DPRINTK(PROBE, INFO, "1000 Mbps Speed specified without "
++			"Duplex\n");
++		DPRINTK(PROBE, INFO,
++			"Using Autonegotiation at 1000 Mbps "
++			"Full Duplex only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	case SPEED_1000 + HALF_DUPLEX:
++		DPRINTK(PROBE, INFO,
++			"Half Duplex is not supported at 1000 Mbps\n");
++		DPRINTK(PROBE, INFO,
++			"Using Autonegotiation at 1000 Mbps "
++			"Full Duplex only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	case SPEED_1000 + FULL_DUPLEX:
++		DPRINTK(PROBE, INFO,
++		       "Using Autonegotiation at 1000 Mbps Full Duplex only\n");
++		adapter->hw.autoneg = adapter->fc_autoneg = 1;
++		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
++		break;
++	default:
++		BUG();
++	}
++
++	/* Speed, AutoNeg and MDI/MDI-X must all play nice */
++	if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
++		DPRINTK(PROBE, INFO,
++			"Speed, AutoNeg and MDI-X specifications are "
++			"incompatible. Setting MDI-X to a compatible value.\n");
++	}
++}
++
+--- linux/drivers/xenomai/net/drivers/natsemi.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/natsemi.c	2021-04-07 16:01:27.370633962 +0800
+@@ -0,0 +1,2094 @@
++/* natsemi.c: A Linux PCI Ethernet driver for the NatSemi DP8381x series. */
++/*
++	Written/copyright 1999-2001 by Donald Becker.
++	Portions copyright (c) 2001,2002 Sun Microsystems (thockin@sun.com)
++	Portions copyright 2001,2002 Manfred Spraul (manfred@colorfullife.com)
++
++	This software may be used and distributed according to the terms of
++	the GNU General Public License (GPL), incorporated herein by reference.
++	Drivers based on or derived from this code fall under the GPL and must
++	retain the authorship, copyright and license notice.  This file is not
++	a complete program and may only be used when the entire operating
++	system is licensed under the GPL.  License for under other terms may be
++	available.  Contact the original author for details.
++
++	The original author may be reached as becker@scyld.com, or at
++	Scyld Computing Corporation
++	410 Severn Ave., Suite 210
++	Annapolis MD 21403
++
++	Support information and updates available at
++	http://www.scyld.com/network/netsemi.html
++
++
++	Linux kernel modifications:
++
++	Version 1.0.1:
++		- Spinlock fixes
++		- Bug fixes and better intr performance (Tjeerd)
++	Version 1.0.2:
++		- Now reads correct MAC address from eeprom
++	Version 1.0.3:
++		- Eliminate redundant priv->tx_full flag
++		- Call netif_start_queue from dev->tx_timeout
++		- wmb() in start_tx() to flush data
++		- Update Tx locking
++		- Clean up PCI enable (davej)
++	Version 1.0.4:
++		- Merge Donald Becker's natsemi.c version 1.07
++	Version 1.0.5:
++		- { fill me in }
++	Version 1.0.6:
++		* ethtool support (jgarzik)
++		* Proper initialization of the card (which sometimes
++		fails to occur and leaves the card in a non-functional
++		state). (uzi)
++
++		* Some documented register settings to optimize some
++		of the 100Mbit autodetection circuitry in rev C cards. (uzi)
++
++		* Polling of the PHY intr for stuff like link state
++		change and auto- negotiation to finally work properly. (uzi)
++
++		* One-liner removal of a duplicate declaration of
++		netdev_error(). (uzi)
++
++	Version 1.0.7: (Manfred Spraul)
++		* pci dma
++		* SMP locking update
++		* full reset added into tx_timeout
++		* correct multicast hash generation (both big and little endian)
++			[copied from a natsemi driver version
++			 from Myrio Corporation, Greg Smith]
++		* suspend/resume
++
++	version 1.0.8 (Tim Hockin <thockin@sun.com>)
++		* ETHTOOL_* support
++		* Wake on lan support (Erik Gilling)
++		* MXDMA fixes for serverworks
++		* EEPROM reload
++
++	version 1.0.9 (Manfred Spraul)
++		* Main change: fix lack of synchronize
++		netif_close/netif_suspend against a last interrupt
++		or packet.
++		* do not enable superflous interrupts (e.g. the
++		drivers relies on TxDone - TxIntr not needed)
++		* wait that the hardware has really stopped in close
++		and suspend.
++		* workaround for the (at least) gcc-2.95.1 compiler
++		problem. Also simplifies the code a bit.
++		* disable_irq() in tx_timeout - needed to protect
++		against rx interrupts.
++		* stop the nic before switching into silent rx mode
++		for wol (required according to docu).
++
++	version 1.0.10:
++		* use long for ee_addr (various)
++		* print pointers properly (DaveM)
++		* include asm/irq.h (?)
++
++	version 1.0.11:
++		* check and reset if PHY errors appear (Adrian Sun)
++		* WoL cleanup (Tim Hockin)
++		* Magic number cleanup (Tim Hockin)
++		* Don't reload EEPROM on every reset (Tim Hockin)
++		* Save and restore EEPROM state across reset (Tim Hockin)
++		* MDIO Cleanup (Tim Hockin)
++		* Reformat register offsets/bits (jgarzik)
++
++	version 1.0.12:
++		* ETHTOOL_* further support (Tim Hockin)
++
++	version 1.0.13:
++		* ETHTOOL_[G]EEPROM support (Tim Hockin)
++
++	version 1.0.13:
++		* crc cleanup (Matt Domsch <Matt_Domsch@dell.com>)
++
++	version 1.0.14:
++		* Cleanup some messages and autoneg in ethtool (Tim Hockin)
++
++	version 1.0.15:
++		* Get rid of cable_magic flag
++		* use new (National provided) solution for cable magic issue
++
++	version 1.0.16:
++		* call netdev_rx() for RxErrors (Manfred Spraul)
++		* formatting and cleanups
++		* change options and full_duplex arrays to be zero
++		  initialized
++		* enable only the WoL and PHY interrupts in wol mode
++
++	version 1.0.17:
++		* only do cable_magic on 83815 and early 83816 (Tim Hockin)
++		* create a function for rx refill (Manfred Spraul)
++		* combine drain_ring and init_ring (Manfred Spraul)
++		* oom handling (Manfred Spraul)
++		* hands_off instead of playing with netif_device_{de,a}ttach
++		  (Manfred Spraul)
++		* be sure to write the MAC back to the chip (Manfred Spraul)
++		* lengthen EEPROM timeout, and always warn about timeouts
++		  (Manfred Spraul)
++		* comments update (Manfred)
++		* do the right thing on a phy-reset (Manfred and Tim)
++
++	TODO:
++	* big endian support with CFG:BEM instead of cpu_to_le32
++	* support for an external PHY
++	* NAPI
++
++	Ported to RTNET: December 2003, Erik Buit <e.buit@student.utwente.nl>
++*/
++
++#if !defined(__OPTIMIZE__)
++#warning  You must compile this file with the correct options!
++#warning  See the last lines of the source file.
++#error You must compile this driver with "-O".
++#endif
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/timer.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/init.h>
++#include <linux/spinlock.h>
++#include <linux/ethtool.h>
++#include <linux/delay.h>
++#include <linux/rtnetlink.h>
++#include <linux/mii.h>
++#include <linux/uaccess.h>
++#include <asm/processor.h>	/* Processor type for cache alignment. */
++#include <asm/bitops.h>
++#include <asm/io.h>
++#include <asm/irq.h>
++
++/*** RTnet ***/
++#include <rtnet_port.h>
++
++#define MAX_UNITS 8		/* More are supported, limit only on options */
++#define DEFAULT_RX_POOL_SIZE    16
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++/*** RTnet ***/
++
++#define DRV_NAME	"natsemi-rt"
++#define DRV_VERSION	"1.07+LK1.0.17-RTnet-0.2"
++#define DRV_RELDATE	"Dec 16, 2003"
++
++/* Updated to recommendations in pci-skeleton v2.03. */
++
++/* The user-configurable values.
++   These may be modified when a driver module is loaded.*/
++
++#define NATSEMI_DEF_MSG		(NETIF_MSG_DRV		| \
++				 NETIF_MSG_LINK		| \
++				 NETIF_MSG_WOL		| \
++				 NETIF_MSG_RX_ERR	| \
++				 NETIF_MSG_TX_ERR)
++static int local_debug = -1;
++
++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
++static int max_interrupt_work = 20;
++static int mtu;
++
++/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
++   Setting to > 1518 effectively disables this feature. */
++/*** RTnet ***
++static int rx_copybreak;
++ *** RTnet ***/
++
++/* Used to pass the media type, etc.
++   Both 'options[]' and 'full_duplex[]' should exist for driver
++   interoperability.
++   The media type is usually passed in 'options[]'.
++*/
++static int options[MAX_UNITS];
++static int full_duplex[MAX_UNITS];
++
++/* Operational parameters that are set at compile time. */
++
++/* Keep the ring sizes a power of two for compile efficiency.
++   The compiler will convert <unsigned>'%'<2^N> into a bit mask.
++   Making the Tx ring too large decreases the effectiveness of channel
++   bonding and packet priority.
++   There are no ill effects from too-large receive rings. */
++#define TX_RING_SIZE	16
++#define TX_QUEUE_LEN	10 /* Limit ring entries actually used, min 4. */
++#define RX_RING_SIZE	8 /*** RTnet ***/
++
++/* Operational parameters that usually are not changed. */
++/* Time in jiffies before concluding the transmitter is hung. */
++#define TX_TIMEOUT  (2*HZ)
++
++#define NATSEMI_HW_TIMEOUT	400
++#define NATSEMI_TIMER_FREQ	3*HZ
++#define NATSEMI_PG0_NREGS	64
++#define NATSEMI_RFDR_NREGS	8
++#define NATSEMI_PG1_NREGS	4
++#define NATSEMI_NREGS		(NATSEMI_PG0_NREGS + NATSEMI_RFDR_NREGS + \
++				 NATSEMI_PG1_NREGS)
++#define NATSEMI_REGS_VER	1 /* v1 added RFDR registers */
++#define NATSEMI_REGS_SIZE	(NATSEMI_NREGS * sizeof(u32))
++#define NATSEMI_EEPROM_SIZE	24 /* 12 16-bit values */
++
++#define PKT_BUF_SZ		1536 /* Size of each temporary Rx buffer. */
++
++/* These identify the driver base version and may not be removed. */
++static char version[] =
++  KERN_INFO DRV_NAME " dp8381x driver, version "
++      DRV_VERSION ", " DRV_RELDATE "\n"
++  KERN_INFO "  originally by Donald Becker <becker@scyld.com>\n"
++  KERN_INFO "  http://www.scyld.com/network/natsemi.html\n"
++  KERN_INFO "  2.4.x kernel port by Jeff Garzik, Tjeerd Mulder\n"
++  KERN_INFO "  RTnet port by Erik Buit\n";
++
++MODULE_AUTHOR("Erik Buit");
++MODULE_DESCRIPTION("RTnet National Semiconductor DP8381x series PCI Ethernet driver");
++MODULE_LICENSE("GPL");
++
++module_param(max_interrupt_work, int, 0444);
++module_param(mtu, int, 0444);
++module_param_named(debug, local_debug, int, 0444);
++/*** RTnet ***
++MODULE_PARM(rx_copybreak, "i");
++ *** RTnet ***/
++module_param_array(options, int, NULL, 0444);
++module_param_array(full_duplex, int, NULL, 0444);
++MODULE_PARM_DESC(max_interrupt_work,
++	"DP8381x maximum events handled per interrupt");
++MODULE_PARM_DESC(mtu, "DP8381x MTU (all boards)");
++MODULE_PARM_DESC(debug, "DP8381x default debug level");
++/*** RTnet ***
++MODULE_PARM_DESC(rx_copybreak,
++	"DP8381x copy breakpoint for copy-only-tiny-frames");
++ *** RTnet ***/
++MODULE_PARM_DESC(options, "DP8381x: Bits 0-3: media type, bit 17: full duplex");
++MODULE_PARM_DESC(full_duplex, "DP8381x full duplex setting(s) (1)");
++
++/*
++				Theory of Operation
++
++I. Board Compatibility
++
++This driver is designed for National Semiconductor DP83815 PCI Ethernet NIC.
++It also works with other chips in in the DP83810 series.
++
++II. Board-specific settings
++
++This driver requires the PCI interrupt line to be valid.
++It honors the EEPROM-set values.
++
++III. Driver operation
++
++IIIa. Ring buffers
++
++This driver uses two statically allocated fixed-size descriptor lists
++formed into rings by a branch from the final descriptor to the beginning of
++the list.  The ring sizes are set at compile time by RX/TX_RING_SIZE.
++The NatSemi design uses a 'next descriptor' pointer that the driver forms
++into a list.
++
++IIIb/c. Transmit/Receive Structure
++
++This driver uses a zero-copy receive and transmit scheme.
++The driver allocates full frame size skbuffs for the Rx ring buffers at
++open() time and passes the skb->data field to the chip as receive data
++buffers.  When an incoming frame is less than RX_COPYBREAK bytes long,
++a fresh skbuff is allocated and the frame is copied to the new skbuff.
++When the incoming frame is larger, the skbuff is passed directly up the
++protocol stack.  Buffers consumed this way are replaced by newly allocated
++skbuffs in a later phase of receives.
++
++The RX_COPYBREAK value is chosen to trade-off the memory wasted by
++using a full-sized skbuff for small frames vs. the copying costs of larger
++frames.  New boards are typically used in generously configured machines
++and the underfilled buffers have negligible impact compared to the benefit of
++a single allocation size, so the default value of zero results in never
++copying packets.  When copying is done, the cost is usually mitigated by using
++a combined copy/checksum routine.  Copying also preloads the cache, which is
++most useful with small frames.
++
++A subtle aspect of the operation is that unaligned buffers are not permitted
++by the hardware.  Thus the IP header at offset 14 in an ethernet frame isn't
++longword aligned for further processing.  On copies frames are put into the
++skbuff at an offset of "+2", 16-byte aligning the IP header.
++
++IIId. Synchronization
++
++Most operations are synchronized on the np->lock irq spinlock, except the
++performance critical codepaths:
++
++The rx process only runs in the interrupt handler. Access from outside
++the interrupt handler is only permitted after disable_irq().
++
++The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap
++is set, then access is permitted under spin_lock_irq(&np->lock).
++
++Thus configuration functions that want to access everything must call
++	disable_irq(dev->irq);
++	spin_lock_bh(dev->xmit_lock);
++	spin_lock_irq(&np->lock);
++
++IV. Notes
++
++NatSemi PCI network controllers are very uncommon.
++
++IVb. References
++
++http://www.scyld.com/expert/100mbps.html
++http://www.scyld.com/expert/NWay.html
++Datasheet is available from:
++http://www.national.com/pf/DP/DP83815.html
++
++IVc. Errata
++
++None characterised.
++*/
++
++
++
++enum pcistuff {
++	PCI_USES_IO = 0x01,
++	PCI_USES_MEM = 0x02,
++	PCI_USES_MASTER = 0x04,
++	PCI_ADDR0 = 0x08,
++	PCI_ADDR1 = 0x10,
++};
++
++/* MMIO operations required */
++#define PCI_IOTYPE (PCI_USES_MASTER | PCI_USES_MEM | PCI_ADDR1)
++
++
++/* array of board data directly indexed by pci_tbl[x].driver_data */
++static struct {
++	const char *name;
++	unsigned long flags;
++} natsemi_pci_info[] = {
++	{ "NatSemi DP8381[56]", PCI_IOTYPE },
++};
++
++static struct pci_device_id natsemi_pci_tbl[] = {
++	{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_83815, PCI_ANY_ID, PCI_ANY_ID, },
++	{ 0, },
++};
++MODULE_DEVICE_TABLE(pci, natsemi_pci_tbl);
++
++/* Offsets to the device registers.
++   Unlike software-only systems, device drivers interact with complex hardware.
++   It's not useful to define symbolic names for every register bit in the
++   device.
++*/
++enum register_offsets {
++	ChipCmd			= 0x00,
++	ChipConfig		= 0x04,
++	EECtrl			= 0x08,
++	PCIBusCfg		= 0x0C,
++	IntrStatus		= 0x10,
++	IntrMask		= 0x14,
++	IntrEnable		= 0x18,
++	IntrHoldoff		= 0x16, /* DP83816 only */
++	TxRingPtr		= 0x20,
++	TxConfig		= 0x24,
++	RxRingPtr		= 0x30,
++	RxConfig		= 0x34,
++	ClkRun			= 0x3C,
++	WOLCmd			= 0x40,
++	PauseCmd		= 0x44,
++	RxFilterAddr		= 0x48,
++	RxFilterData		= 0x4C,
++	BootRomAddr		= 0x50,
++	BootRomData		= 0x54,
++	SiliconRev		= 0x58,
++	StatsCtrl		= 0x5C,
++	StatsData		= 0x60,
++	RxPktErrs		= 0x60,
++	RxMissed		= 0x68,
++	RxCRCErrs		= 0x64,
++	BasicControl		= 0x80,
++	BasicStatus		= 0x84,
++	AnegAdv			= 0x90,
++	AnegPeer		= 0x94,
++	PhyStatus		= 0xC0,
++	MIntrCtrl		= 0xC4,
++	MIntrStatus		= 0xC8,
++	PhyCtrl			= 0xE4,
++
++	/* These are from the spec, around page 78... on a separate table.
++	 * The meaning of these registers depend on the value of PGSEL. */
++	PGSEL			= 0xCC,
++	PMDCSR			= 0xE4,
++	TSTDAT			= 0xFC,
++	DSPCFG			= 0xF4,
++	SDCFG			= 0xF8
++};
++/* the values for the 'magic' registers above (PGSEL=1) */
++#define PMDCSR_VAL	0x189c	/* enable preferred adaptation circuitry */
++#define TSTDAT_VAL	0x0
++#define DSPCFG_VAL	0x5040
++#define SDCFG_VAL	0x008c	/* set voltage thresholds for Signal Detect */
++#define DSPCFG_LOCK	0x20	/* coefficient lock bit in DSPCFG */
++#define TSTDAT_FIXED	0xe8	/* magic number for bad coefficients */
++
++/* misc PCI space registers */
++enum pci_register_offsets {
++	PCIPM			= 0x44,
++};
++
++enum ChipCmd_bits {
++	ChipReset		= 0x100,
++	RxReset			= 0x20,
++	TxReset			= 0x10,
++	RxOff			= 0x08,
++	RxOn			= 0x04,
++	TxOff			= 0x02,
++	TxOn			= 0x01,
++};
++
++enum ChipConfig_bits {
++	CfgPhyDis		= 0x200,
++	CfgPhyRst		= 0x400,
++	CfgExtPhy		= 0x1000,
++	CfgAnegEnable		= 0x2000,
++	CfgAneg100		= 0x4000,
++	CfgAnegFull		= 0x8000,
++	CfgAnegDone		= 0x8000000,
++	CfgFullDuplex		= 0x20000000,
++	CfgSpeed100		= 0x40000000,
++	CfgLink			= 0x80000000,
++};
++
++enum EECtrl_bits {
++	EE_ShiftClk		= 0x04,
++	EE_DataIn		= 0x01,
++	EE_ChipSelect		= 0x08,
++	EE_DataOut		= 0x02,
++};
++
++enum PCIBusCfg_bits {
++	EepromReload		= 0x4,
++};
++
++/* Bits in the interrupt status/mask registers. */
++enum IntrStatus_bits {
++	IntrRxDone		= 0x0001,
++	IntrRxIntr		= 0x0002,
++	IntrRxErr		= 0x0004,
++	IntrRxEarly		= 0x0008,
++	IntrRxIdle		= 0x0010,
++	IntrRxOverrun		= 0x0020,
++	IntrTxDone		= 0x0040,
++	IntrTxIntr		= 0x0080,
++	IntrTxErr		= 0x0100,
++	IntrTxIdle		= 0x0200,
++	IntrTxUnderrun		= 0x0400,
++	StatsMax		= 0x0800,
++	SWInt			= 0x1000,
++	WOLPkt			= 0x2000,
++	LinkChange		= 0x4000,
++	IntrHighBits		= 0x8000,
++	RxStatusFIFOOver	= 0x10000,
++	IntrPCIErr		= 0xf00000,
++	RxResetDone		= 0x1000000,
++	TxResetDone		= 0x2000000,
++	IntrAbnormalSummary	= 0xCD20,
++};
++
++/*
++ * Default Interrupts:
++ * Rx OK, Rx Packet Error, Rx Overrun,
++ * Tx OK, Tx Packet Error, Tx Underrun,
++ * MIB Service, Phy Interrupt, High Bits,
++ * Rx Status FIFO overrun,
++ * Received Target Abort, Received Master Abort,
++ * Signalled System Error, Received Parity Error
++ */
++#define DEFAULT_INTR 0x00f1cd65
++
++enum TxConfig_bits {
++	TxDrthMask		= 0x3f,
++	TxFlthMask		= 0x3f00,
++	TxMxdmaMask		= 0x700000,
++	TxMxdma_512		= 0x0,
++	TxMxdma_4		= 0x100000,
++	TxMxdma_8		= 0x200000,
++	TxMxdma_16		= 0x300000,
++	TxMxdma_32		= 0x400000,
++	TxMxdma_64		= 0x500000,
++	TxMxdma_128		= 0x600000,
++	TxMxdma_256		= 0x700000,
++	TxCollRetry		= 0x800000,
++	TxAutoPad		= 0x10000000,
++	TxMacLoop		= 0x20000000,
++	TxHeartIgn		= 0x40000000,
++	TxCarrierIgn		= 0x80000000
++};
++
++enum RxConfig_bits {
++	RxDrthMask		= 0x3e,
++	RxMxdmaMask		= 0x700000,
++	RxMxdma_512		= 0x0,
++	RxMxdma_4		= 0x100000,
++	RxMxdma_8		= 0x200000,
++	RxMxdma_16		= 0x300000,
++	RxMxdma_32		= 0x400000,
++	RxMxdma_64		= 0x500000,
++	RxMxdma_128		= 0x600000,
++	RxMxdma_256		= 0x700000,
++	RxAcceptLong		= 0x8000000,
++	RxAcceptTx		= 0x10000000,
++	RxAcceptRunt		= 0x40000000,
++	RxAcceptErr		= 0x80000000
++};
++
++enum ClkRun_bits {
++	PMEEnable		= 0x100,
++	PMEStatus		= 0x8000,
++};
++
++enum WolCmd_bits {
++	WakePhy			= 0x1,
++	WakeUnicast		= 0x2,
++	WakeMulticast		= 0x4,
++	WakeBroadcast		= 0x8,
++	WakeArp			= 0x10,
++	WakePMatch0		= 0x20,
++	WakePMatch1		= 0x40,
++	WakePMatch2		= 0x80,
++	WakePMatch3		= 0x100,
++	WakeMagic		= 0x200,
++	WakeMagicSecure		= 0x400,
++	SecureHack		= 0x100000,
++	WokePhy			= 0x400000,
++	WokeUnicast		= 0x800000,
++	WokeMulticast		= 0x1000000,
++	WokeBroadcast		= 0x2000000,
++	WokeArp			= 0x4000000,
++	WokePMatch0		= 0x8000000,
++	WokePMatch1		= 0x10000000,
++	WokePMatch2		= 0x20000000,
++	WokePMatch3		= 0x40000000,
++	WokeMagic		= 0x80000000,
++	WakeOptsSummary		= 0x7ff
++};
++
++enum RxFilterAddr_bits {
++	RFCRAddressMask		= 0x3ff,
++	AcceptMulticast		= 0x00200000,
++	AcceptMyPhys		= 0x08000000,
++	AcceptAllPhys		= 0x10000000,
++	AcceptAllMulticast	= 0x20000000,
++	AcceptBroadcast		= 0x40000000,
++	RxFilterEnable		= 0x80000000
++};
++
++enum StatsCtrl_bits {
++	StatsWarn		= 0x1,
++	StatsFreeze		= 0x2,
++	StatsClear		= 0x4,
++	StatsStrobe		= 0x8,
++};
++
++enum MIntrCtrl_bits {
++	MICRIntEn		= 0x2,
++};
++
++enum PhyCtrl_bits {
++	PhyAddrMask		= 0xf,
++};
++
++/* values we might find in the silicon revision register */
++#define SRR_DP83815_C	0x0302
++#define SRR_DP83815_D	0x0403
++#define SRR_DP83816_A4	0x0504
++#define SRR_DP83816_A5	0x0505
++
++/* The Rx and Tx buffer descriptors. */
++/* Note that using only 32 bit fields simplifies conversion to big-endian
++   architectures. */
++struct netdev_desc {
++	u32 next_desc;
++	s32 cmd_status;
++	u32 addr;
++	u32 software_use;
++};
++
++/* Bits in network_desc.status */
++enum desc_status_bits {
++	DescOwn=0x80000000, DescMore=0x40000000, DescIntr=0x20000000,
++	DescNoCRC=0x10000000, DescPktOK=0x08000000,
++	DescSizeMask=0xfff,
++
++	DescTxAbort=0x04000000, DescTxFIFO=0x02000000,
++	DescTxCarrier=0x01000000, DescTxDefer=0x00800000,
++	DescTxExcDefer=0x00400000, DescTxOOWCol=0x00200000,
++	DescTxExcColl=0x00100000, DescTxCollCount=0x000f0000,
++
++	DescRxAbort=0x04000000, DescRxOver=0x02000000,
++	DescRxDest=0x01800000, DescRxLong=0x00400000,
++	DescRxRunt=0x00200000, DescRxInvalid=0x00100000,
++	DescRxCRC=0x00080000, DescRxAlign=0x00040000,
++	DescRxLoop=0x00020000, DesRxColl=0x00010000,
++};
++
++struct netdev_private {
++	/* Descriptor rings first for alignment */
++	dma_addr_t ring_dma;
++	struct netdev_desc *rx_ring;
++	struct netdev_desc *tx_ring;
++	/* The addresses of receive-in-place skbuffs */
++	struct rtskb *rx_skbuff[RX_RING_SIZE]; /*** RTnet ***/
++	dma_addr_t rx_dma[RX_RING_SIZE];
++	/* address of a sent-in-place packet/buffer, for later free() */
++	struct rtskb *tx_skbuff[TX_RING_SIZE]; /*** RTnet ***/
++	dma_addr_t tx_dma[TX_RING_SIZE];
++	struct net_device_stats stats;
++	/* Media monitoring timer */
++	struct timer_list timer;
++	/* Frequently used values: keep some adjacent for cache effect */
++	struct pci_dev *pci_dev;
++	struct netdev_desc *rx_head_desc;
++	/* Producer/consumer ring indices */
++	unsigned int cur_rx, dirty_rx;
++	unsigned int cur_tx, dirty_tx;
++	/* Based on MTU+slack. */
++	unsigned int rx_buf_sz;
++	int oom;
++	/* Do not touch the nic registers */
++	int hands_off;
++	/* These values are keep track of the transceiver/media in use */
++	unsigned int full_duplex;
++	/* Rx filter */
++	u32 cur_rx_mode;
++	u32 rx_filter[16];
++	/* FIFO and PCI burst thresholds */
++	u32 tx_config, rx_config;
++	/* original contents of ClkRun register */
++	u32 SavedClkRun;
++	/* silicon revision */
++	u32 srr;
++	/* expected DSPCFG value */
++	u16 dspcfg;
++	/* MII transceiver section */
++	u16 advertising;
++	unsigned int iosize;
++	rtdm_lock_t lock;
++	u32 msg_enable;
++
++	rtdm_irq_t irq_handle;
++};
++
++static int eeprom_read(long ioaddr, int location);
++static int mdio_read(struct rtnet_device *dev, int phy_id, int reg);
++/*static void mdio_write(struct rtnet_device *dev, int phy_id, int reg, u16 data);*/
++static void natsemi_reset(struct rtnet_device *dev);
++static void natsemi_reload_eeprom(struct rtnet_device *dev);
++static void natsemi_stop_rxtx(struct rtnet_device *dev);
++static int netdev_open(struct rtnet_device *dev);
++static void do_cable_magic(struct rtnet_device *dev);
++static void undo_cable_magic(struct rtnet_device *dev);
++static void check_link(struct rtnet_device *dev);
++/*static void netdev_timer(unsigned long data);*/
++static void dump_ring(struct rtnet_device *dev);
++/*static void tx_timeout(struct rtnet_device *dev);*/
++static int alloc_ring(struct rtnet_device *dev);
++static void refill_rx(struct rtnet_device *dev);
++static void init_ring(struct rtnet_device *dev);
++static void drain_tx(struct rtnet_device *dev);
++static void drain_ring(struct rtnet_device *dev);
++static void free_ring(struct rtnet_device *dev);
++/*static void reinit_ring(struct rtnet_device *dev);*/
++static void init_registers(struct rtnet_device *dev);
++static int start_tx(struct rtskb *skb, struct rtnet_device *dev);
++static int intr_handler(rtdm_irq_t *irq_handle);
++static void netdev_error(struct rtnet_device *dev, int intr_status);
++static void netdev_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp);
++static void netdev_tx_done(struct rtnet_device *dev);
++static void __set_rx_mode(struct rtnet_device *dev);
++/*static void set_rx_mode(struct rtnet_device *dev);*/
++static void __get_stats(struct rtnet_device *rtdev);
++static struct net_device_stats *get_stats(struct rtnet_device *dev);
++/*static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
++static int netdev_set_wol(struct rtnet_device *dev, u32 newval);
++static int netdev_get_wol(struct rtnet_device *dev, u32 *supported, u32 *cur);
++static int netdev_set_sopass(struct rtnet_device *dev, u8 *newval);
++static int netdev_get_sopass(struct rtnet_device *dev, u8 *data);
++static int netdev_get_ecmd(struct rtnet_device *dev, struct ethtool_cmd *ecmd);
++static int netdev_set_ecmd(struct rtnet_device *dev, struct ethtool_cmd *ecmd);
++static void enable_wol_mode(struct rtnet_device *dev, int enable_intr);*/
++static int netdev_close(struct rtnet_device *dev);
++/*static int netdev_get_regs(struct rtnet_device *dev, u8 *buf);
++static int netdev_get_eeprom(struct rtnet_device *dev, u8 *buf);*/
++
++
++static int natsemi_probe1 (struct pci_dev *pdev,
++	const struct pci_device_id *ent)
++{
++	struct rtnet_device *dev; /*** RTnet ***/
++	struct netdev_private *np;
++	int i, option, irq, chip_idx = ent->driver_data;
++	static int find_cnt = -1;
++	unsigned long ioaddr, iosize;
++	const int pcibar = 1; /* PCI base address register */
++	int prev_eedata;
++	u32 tmp;
++
++/* when built into the kernel, we only print version if device is found */
++#ifndef MODULE
++	static int printed_version;
++	if (!printed_version++)
++		rtdm_printk(version);
++#endif
++
++	i = pci_enable_device(pdev);
++	if (i) return i;
++
++	/* natsemi has a non-standard PM control register
++	 * in PCI config space.  Some boards apparently need
++	 * to be brought to D0 in this manner.
++	 */
++	pci_read_config_dword(pdev, PCIPM, &tmp);
++	if (tmp & PCI_PM_CTRL_STATE_MASK) {
++		/* D0 state, disable PME assertion */
++		u32 newtmp = tmp & ~PCI_PM_CTRL_STATE_MASK;
++		pci_write_config_dword(pdev, PCIPM, newtmp);
++	}
++
++	find_cnt++;
++	ioaddr = pci_resource_start(pdev, pcibar);
++	iosize = pci_resource_len(pdev, pcibar);
++	irq = pdev->irq;
++
++/*** RTnet ***/
++	if (cards[find_cnt] == 0)
++		goto err_out;
++/*** RTnet ***/
++
++	if (natsemi_pci_info[chip_idx].flags & PCI_USES_MASTER)
++		pci_set_master(pdev);
++
++/*** RTnet ***/
++	dev = rt_alloc_etherdev(sizeof(struct netdev_private),
++				RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (dev == NULL) {
++		rtdm_printk(KERN_ERR "init_ethernet failed for card #%d\n", find_cnt);
++		goto err_out;
++	}
++	rtdev_alloc_name(dev, "rteth%d");
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++	dev->sysbind = &pdev->dev;
++/*** RTnet ***/
++
++	i = pci_request_regions(pdev, dev->name);
++	if (i) {
++/*** RTnet ***/
++		rt_rtdev_disconnect(dev);
++		rtdev_free(dev);
++/*** RTnet ***/
++		return i;
++	}
++
++	{
++		void *mmio = ioremap (ioaddr, iosize);
++		if (!mmio) {
++			pci_release_regions(pdev);
++/*** RTnet ***/
++			rt_rtdev_disconnect(dev);
++			rtdev_free(dev);
++/*** RTnet ***/
++			return -ENOMEM;
++		}
++		ioaddr = (unsigned long) mmio;
++	}
++
++	/* Work around the dropped serial bit. */
++	prev_eedata = eeprom_read(ioaddr, 6);
++	for (i = 0; i < 3; i++) {
++		int eedata = eeprom_read(ioaddr, i + 7);
++		dev->dev_addr[i*2] = (eedata << 1) + (prev_eedata >> 15);
++		dev->dev_addr[i*2+1] = eedata >> 7;
++		prev_eedata = eedata;
++	}
++
++	dev->base_addr = ioaddr;
++	dev->irq = irq;
++
++	np = dev->priv;
++
++	np->pci_dev = pdev;
++	pci_set_drvdata(pdev, dev);
++	np->iosize = iosize;
++	rtdm_lock_init(&np->lock);
++	np->msg_enable = (local_debug >= 0) ? (1<<local_debug)-1 : NATSEMI_DEF_MSG;
++	np->hands_off = 0;
++
++	/* Reset the chip to erase previous misconfiguration. */
++	natsemi_reload_eeprom(dev);
++	natsemi_reset(dev);
++
++	option = find_cnt < MAX_UNITS ? options[find_cnt] : 0;
++	if (dev->mem_start)
++		option = dev->mem_start;
++
++	/* The lower four bits are the media type. */
++	if (option) {
++		if (option & 0x200)
++			np->full_duplex = 1;
++		if (option & 15)
++			rtdm_printk(KERN_INFO
++				"%s: ignoring user supplied media type %d",
++				dev->name, option & 15);
++	}
++	if (find_cnt < MAX_UNITS  &&  full_duplex[find_cnt])
++		np->full_duplex = 1;
++
++	/* The chip-specific entries in the device structure. */
++	dev->open = &netdev_open;
++	dev->hard_start_xmit = &start_tx;
++	dev->stop = &netdev_close;
++	dev->get_stats = &get_stats;
++/*** RTnet ***
++	dev->set_multicast_list = &set_rx_mode;
++	dev->do_ioctl = &netdev_ioctl;
++	dev->tx_timeout = &tx_timeout;
++	dev->watchdog_timeo = TX_TIMEOUT;
++  *** RTnet ***/
++
++	if (mtu)
++		dev->mtu = mtu;
++
++/*** RTnet ***/
++	i = rt_register_rtnetdev(dev);
++	if (i) {
++		goto err_out_unmap;
++	}
++/*** RTnet ***/
++
++	rtnetif_carrier_off(dev);
++
++	if (netif_msg_drv(np)) {
++		rtdm_printk(KERN_INFO "%s: %s at %#08lx, ",
++			dev->name, natsemi_pci_info[chip_idx].name, ioaddr);
++		for (i = 0; i < ETH_ALEN-1; i++)
++				rtdm_printk("%02x:", dev->dev_addr[i]);
++		rtdm_printk("%02x, IRQ %d.\n", dev->dev_addr[i], irq);
++	}
++
++	np->advertising = mdio_read(dev, 1, MII_ADVERTISE);
++	if ((readl((void *)(ioaddr + ChipConfig)) & 0xe000) != 0xe000
++	 && netif_msg_probe(np)) {
++		u32 chip_config = readl((void *)(ioaddr + ChipConfig));
++		rtdm_printk(KERN_INFO "%s: Transceiver default autonegotiation %s "
++			"10%s %s duplex.\n",
++			dev->name,
++			chip_config & CfgAnegEnable ?
++			  "enabled, advertise" : "disabled, force",
++			chip_config & CfgAneg100 ? "0" : "",
++			chip_config & CfgAnegFull ? "full" : "half");
++	}
++	if (netif_msg_probe(np))
++		rtdm_printk(KERN_INFO
++			"%s: Transceiver status %#04x advertising %#04x.\n",
++			dev->name, mdio_read(dev, 1, MII_BMSR),
++			np->advertising);
++
++	/* save the silicon revision for later querying */
++	np->srr = readl((void *)(ioaddr + SiliconRev));
++	if (netif_msg_hw(np))
++		rtdm_printk(KERN_INFO "%s: silicon revision %#04x.\n",
++				dev->name, np->srr);
++
++
++	return 0;
++
++err_out_unmap:
++#ifdef USE_MEM
++	iounmap((void *)ioaddr);
++err_out_free_res:
++#endif
++	pci_release_regions(pdev);
++/*err_out_free_netdev:*/
++/*** RTnet ***/
++	rt_rtdev_disconnect(dev);
++	rtdev_free(dev);
++/*** RTnet ***/
++err_out:
++	return -ENODEV;
++
++}
++
++
++/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces.
++   The EEPROM code is for the common 93c06/46 EEPROMs with 6 bit addresses. */
++
++/* Delay between EEPROM clock transitions.
++   No extra delay is needed with 33Mhz PCI, but future 66Mhz access may need
++   a delay.  Note that pre-2.0.34 kernels had a cache-alignment bug that
++   made udelay() unreliable.
++   The old method of using an ISA access as a delay, __SLOW_DOWN_IO__, is
++   depricated.
++*/
++#define eeprom_delay(ee_addr)	readl((void *)(ee_addr))
++
++#define EE_Write0 (EE_ChipSelect)
++#define EE_Write1 (EE_ChipSelect | EE_DataIn)
++
++/* The EEPROM commands include the alway-set leading bit. */
++enum EEPROM_Cmds {
++	EE_WriteCmd=(5 << 6), EE_ReadCmd=(6 << 6), EE_EraseCmd=(7 << 6),
++};
++
++static int eeprom_read(long addr, int location)
++{
++	int i;
++	int retval = 0;
++	long ee_addr = addr + EECtrl;
++	int read_cmd = location | EE_ReadCmd;
++	writel(EE_Write0, (void *)ee_addr);
++
++	/* Shift the read command bits out. */
++	for (i = 10; i >= 0; i--) {
++		short dataval = (read_cmd & (1 << i)) ? EE_Write1 : EE_Write0;
++		writel(dataval, (void *)ee_addr);
++		eeprom_delay(ee_addr);
++		writel(dataval | EE_ShiftClk, (void *)ee_addr);
++		eeprom_delay(ee_addr);
++	}
++	writel(EE_ChipSelect, (void *)ee_addr);
++	eeprom_delay(ee_addr);
++
++	for (i = 0; i < 16; i++) {
++		writel(EE_ChipSelect | EE_ShiftClk, (void *)ee_addr);
++		eeprom_delay(ee_addr);
++		retval |= (readl((void *)ee_addr) & EE_DataOut) ? 1 << i : 0;
++		writel(EE_ChipSelect, (void *)ee_addr);
++		eeprom_delay(ee_addr);
++	}
++
++	/* Terminate the EEPROM access. */
++	writel(EE_Write0, (void *)ee_addr);
++	writel(0, (void *)ee_addr);
++	return retval;
++}
++
++/* MII transceiver control section.
++ * The 83815 series has an internal transceiver, and we present the
++ * management registers as if they were MII connected. */
++
++static int mdio_read(struct rtnet_device *dev, int phy_id, int reg)
++{
++	if (phy_id == 1 && reg < 32)
++		return readl((void *)(dev->base_addr+BasicControl+(reg<<2)))&0xffff;
++	else
++		return 0xffff;
++}
++/*** RTnet
++static void mdio_write(struct rtnet_device *dev, int phy_id, int reg, u16 data)
++{
++	struct netdev_private *np = dev->priv;
++	if (phy_id == 1 && reg < 32) {
++		writew(data, dev->base_addr+BasicControl+(reg<<2));
++		switch (reg) {
++			case MII_ADVERTISE: np->advertising = data; break;
++		}
++	}
++}
++RTnet ***/
++/* CFG bits [13:16] [18:23] */
++#define CFG_RESET_SAVE 0xfde000
++/* WCSR bits [0:4] [9:10] */
++#define WCSR_RESET_SAVE 0x61f
++/* RFCR bits [20] [22] [27:31] */
++#define RFCR_RESET_SAVE 0xf8500000;
++
++static void natsemi_reset(struct rtnet_device *dev)
++{
++	int i;
++	u32 cfg;
++	u32 wcsr;
++	u32 rfcr;
++	u16 pmatch[3];
++	u16 sopass[3];
++	struct netdev_private *np = dev->priv;
++
++	/*
++	 * Resetting the chip causes some registers to be lost.
++	 * Natsemi suggests NOT reloading the EEPROM while live, so instead
++	 * we save the state that would have been loaded from EEPROM
++	 * on a normal power-up (see the spec EEPROM map).  This assumes
++	 * whoever calls this will follow up with init_registers() eventually.
++	 */
++
++	/* CFG */
++	cfg = readl((void *)(dev->base_addr + ChipConfig)) & CFG_RESET_SAVE;
++	/* WCSR */
++	wcsr = readl((void *)(dev->base_addr + WOLCmd)) & WCSR_RESET_SAVE;
++	/* RFCR */
++	rfcr = readl((void *)(dev->base_addr + RxFilterAddr)) & RFCR_RESET_SAVE;
++	/* PMATCH */
++	for (i = 0; i < 3; i++) {
++		writel(i*2, (void *)(dev->base_addr + RxFilterAddr));
++		pmatch[i] = readw((void *)(dev->base_addr + RxFilterData));
++	}
++	/* SOPAS */
++	for (i = 0; i < 3; i++) {
++		writel(0xa+(i*2), (void *)(dev->base_addr + RxFilterAddr));
++		sopass[i] = readw((void *)(dev->base_addr + RxFilterData));
++	}
++
++	/* now whack the chip */
++	writel(ChipReset, (void *)(dev->base_addr + ChipCmd));
++	for (i=0;i<NATSEMI_HW_TIMEOUT;i++) {
++		if (!(readl((void *)(dev->base_addr + ChipCmd)) & ChipReset))
++			break;
++		udelay(5);
++	}
++	if (i==NATSEMI_HW_TIMEOUT) {
++		rtdm_printk(KERN_WARNING "%s: reset did not complete in %d usec.\n",
++			dev->name, i*5);
++	} else if (netif_msg_hw(np)) {
++		rtdm_printk(KERN_DEBUG "%s: reset completed in %d usec.\n",
++			dev->name, i*5);
++	}
++
++	/* restore CFG */
++	cfg |= readl((void *)(dev->base_addr + ChipConfig)) & ~CFG_RESET_SAVE;
++	writel(cfg, (void *)(dev->base_addr + ChipConfig));
++	/* restore WCSR */
++	wcsr |= readl((void *)(dev->base_addr + WOLCmd)) & ~WCSR_RESET_SAVE;
++	writel(wcsr, (void *)(dev->base_addr + WOLCmd));
++	/* read RFCR */
++	rfcr |= readl((void *)(dev->base_addr + RxFilterAddr)) & ~RFCR_RESET_SAVE;
++	/* restore PMATCH */
++	for (i = 0; i < 3; i++) {
++		writel(i*2, (void *)(dev->base_addr + RxFilterAddr));
++		writew(pmatch[i], (void *)(dev->base_addr + RxFilterData));
++	}
++	for (i = 0; i < 3; i++) {
++		writel(0xa+(i*2), (void *)(dev->base_addr + RxFilterAddr));
++		writew(sopass[i], (void *)(dev->base_addr + RxFilterData));
++	}
++	/* restore RFCR */
++	writel(rfcr, (void *)(dev->base_addr + RxFilterAddr));
++}
++
++static void natsemi_reload_eeprom(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	writel(EepromReload, (void *)(dev->base_addr + PCIBusCfg));
++	for (i=0;i<NATSEMI_HW_TIMEOUT;i++) {
++		udelay(50);
++		if (!(readl((void *)(dev->base_addr + PCIBusCfg)) & EepromReload))
++			break;
++	}
++	if (i==NATSEMI_HW_TIMEOUT) {
++		rtdm_printk(KERN_WARNING "%s: EEPROM did not reload in %d usec.\n",
++			dev->name, i*50);
++	} else if (netif_msg_hw(np)) {
++		rtdm_printk(KERN_DEBUG "%s: EEPROM reloaded in %d usec.\n",
++			dev->name, i*50);
++	}
++}
++
++static void natsemi_stop_rxtx(struct rtnet_device *dev)
++{
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	writel(RxOff | TxOff, (void *)(ioaddr + ChipCmd));
++	for(i=0;i< NATSEMI_HW_TIMEOUT;i++) {
++		if ((readl((void *)(ioaddr + ChipCmd)) & (TxOn|RxOn)) == 0)
++			break;
++		udelay(5);
++	}
++	if (i==NATSEMI_HW_TIMEOUT) {
++		rtdm_printk(KERN_WARNING "%s: Tx/Rx process did not stop in %d usec.\n",
++			dev->name, i*5);
++	} else if (netif_msg_hw(np)) {
++		rtdm_printk(KERN_DEBUG "%s: Tx/Rx process stopped in %d usec.\n",
++			dev->name, i*5);
++	}
++}
++
++static int netdev_open(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	long ioaddr = dev->base_addr;
++	int i;
++
++	/* Reset the chip, just in case. */
++	natsemi_reset(dev);
++
++/*** RTnet ***/
++	rt_stack_connect(dev, &STACK_manager);
++	i = rtdm_irq_request(&np->irq_handle, dev->irq, intr_handler,
++			     RTDM_IRQTYPE_SHARED, "rt_natsemi", dev);
++/*** RTnet ***/
++/*	i = request_irq(dev->irq, &intr_handler, SA_SHIRQ, dev->name, dev);*/
++	if (i) {
++		return i;
++	}
++
++	if (netif_msg_ifup(np))
++		rtdm_printk(KERN_DEBUG "%s: netdev_open() irq %d.\n",
++			dev->name, dev->irq);
++	i = alloc_ring(dev);
++	if (i < 0) {
++		rtdm_irq_free(&np->irq_handle);
++		return i;
++	}
++	init_ring(dev);
++	init_registers(dev);
++	/* now set the MAC address according to dev->dev_addr */
++	for (i = 0; i < 3; i++) {
++		u16 mac = (dev->dev_addr[2*i+1]<<8) + dev->dev_addr[2*i];
++
++		writel(i*2, (void *)(ioaddr + RxFilterAddr));
++		writew(mac, (void *)(ioaddr + RxFilterData));
++	}
++	writel(np->cur_rx_mode, (void *)(ioaddr + RxFilterAddr));
++
++	rtnetif_start_queue(dev); /*** RTnet ***/
++
++	if (netif_msg_ifup(np))
++		rtdm_printk(KERN_DEBUG "%s: Done netdev_open(), status: %#08x.\n",
++			dev->name, (int)readl((void *)(ioaddr + ChipCmd)));
++
++/*** RTnet ***/
++	/* Set the timer to check for link beat. */
++/*** RTnet ***/
++
++	return 0;
++}
++
++static void do_cable_magic(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++
++	if (np->srr >= SRR_DP83816_A5)
++		return;
++
++	/*
++	 * 100 MBit links with short cables can trip an issue with the chip.
++	 * The problem manifests as lots of CRC errors and/or flickering
++	 * activity LED while idle.  This process is based on instructions
++	 * from engineers at National.
++	 */
++	if (readl((void *)(dev->base_addr + ChipConfig)) & CfgSpeed100) {
++		u16 data;
++
++		writew(1, (void *)(dev->base_addr + PGSEL));
++		/*
++		 * coefficient visibility should already be enabled via
++		 * DSPCFG | 0x1000
++		 */
++		data = readw((void *)(dev->base_addr + TSTDAT)) & 0xff;
++		/*
++		 * the value must be negative, and within certain values
++		 * (these values all come from National)
++		 */
++		if (!(data & 0x80) || ((data >= 0xd8) && (data <= 0xff))) {
++			struct netdev_private *np = dev->priv;
++
++			/* the bug has been triggered - fix the coefficient */
++			writew(TSTDAT_FIXED, (void *)(dev->base_addr + TSTDAT));
++			/* lock the value */
++			data = readw((void *)(dev->base_addr + DSPCFG));
++			np->dspcfg = data | DSPCFG_LOCK;
++			writew(np->dspcfg, (void *)(dev->base_addr + DSPCFG));
++		}
++		writew(0, (void *)(dev->base_addr + PGSEL));
++	}
++}
++
++static void undo_cable_magic(struct rtnet_device *dev)
++{
++	u16 data;
++	struct netdev_private *np = dev->priv;
++
++	if (np->srr >= SRR_DP83816_A5)
++		return;
++
++	writew(1, (void *)(dev->base_addr + PGSEL));
++	/* make sure the lock bit is clear */
++	data = readw((void *)(dev->base_addr + DSPCFG));
++	np->dspcfg = data & ~DSPCFG_LOCK;
++	writew(np->dspcfg, (void *)(dev->base_addr + DSPCFG));
++	writew(0, (void *)(dev->base_addr + PGSEL));
++}
++
++static void check_link(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	long ioaddr = dev->base_addr;
++	int duplex;
++	int chipcfg = readl((void *)(ioaddr + ChipConfig));
++
++	if (!(chipcfg & CfgLink)) {
++		if (rtnetif_carrier_ok(dev)) {
++			if (netif_msg_link(np))
++				rtdm_printk(KERN_NOTICE "%s: link down.\n",
++					dev->name);
++			rtnetif_carrier_off(dev);
++			undo_cable_magic(dev);
++		}
++		return;
++	}
++	if (!rtnetif_carrier_ok(dev)) {
++		if (netif_msg_link(np))
++			rtdm_printk(KERN_NOTICE "%s: link up.\n", dev->name);
++		rtnetif_carrier_on(dev);
++		do_cable_magic(dev);
++	}
++
++	duplex = np->full_duplex || (chipcfg & CfgFullDuplex ? 1 : 0);
++
++	/* if duplex is set then bit 28 must be set, too */
++	if (duplex ^ !!(np->rx_config & RxAcceptTx)) {
++		if (netif_msg_link(np))
++			rtdm_printk(KERN_INFO
++				"%s: Setting %s-duplex based on negotiated "
++				"link capability.\n", dev->name,
++				duplex ? "full" : "half");
++		if (duplex) {
++			np->rx_config |= RxAcceptTx;
++			np->tx_config |= TxCarrierIgn | TxHeartIgn;
++		} else {
++			np->rx_config &= ~RxAcceptTx;
++			np->tx_config &= ~(TxCarrierIgn | TxHeartIgn);
++		}
++		writel(np->tx_config, (void *)(ioaddr + TxConfig));
++		writel(np->rx_config, (void *)(ioaddr + RxConfig));
++	}
++}
++
++static void init_registers(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	long ioaddr = dev->base_addr;
++	int i;
++
++	for (i=0;i<NATSEMI_HW_TIMEOUT;i++) {
++		if (readl((void *)(dev->base_addr + ChipConfig)) & CfgAnegDone)
++			break;
++		udelay(10);
++	}
++	if (i==NATSEMI_HW_TIMEOUT && netif_msg_link(np)) {
++		rtdm_printk(KERN_INFO
++			"%s: autonegotiation did not complete in %d usec.\n",
++			dev->name, i*10);
++	}
++
++	/* On page 78 of the spec, they recommend some settings for "optimum
++	   performance" to be done in sequence.  These settings optimize some
++	   of the 100Mbit autodetection circuitry.  They say we only want to
++	   do this for rev C of the chip, but engineers at NSC (Bradley
++	   Kennedy) recommends always setting them.  If you don't, you get
++	   errors on some autonegotiations that make the device unusable.
++	*/
++	writew(1, (void *)(ioaddr + PGSEL));
++	writew(PMDCSR_VAL, (void *)(ioaddr + PMDCSR));
++	writew(TSTDAT_VAL, (void *)(ioaddr + TSTDAT));
++	writew(DSPCFG_VAL, (void *)(ioaddr + DSPCFG));
++	writew(SDCFG_VAL, (void *)(ioaddr + SDCFG));
++	writew(0, (void *)(ioaddr + PGSEL));
++	np->dspcfg = DSPCFG_VAL;
++
++	/* Enable PHY Specific event based interrupts.  Link state change
++	   and Auto-Negotiation Completion are among the affected.
++	   Read the intr status to clear it (needed for wake events).
++	*/
++	readw((void *)(ioaddr + MIntrStatus));
++	writew(MICRIntEn, (void *)(ioaddr + MIntrCtrl));
++
++	/* clear any interrupts that are pending, such as wake events */
++	readl((void *)(ioaddr + IntrStatus));
++
++	writel(np->ring_dma, (void *)(ioaddr + RxRingPtr));
++	writel(np->ring_dma + RX_RING_SIZE * sizeof(struct netdev_desc),
++		(void *)(ioaddr + TxRingPtr));
++
++	/* Initialize other registers.
++	 * Configure the PCI bus bursts and FIFO thresholds.
++	 * Configure for standard, in-spec Ethernet.
++	 * Start with half-duplex. check_link will update
++	 * to the correct settings.
++	 */
++
++	/* DRTH: 2: start tx if 64 bytes are in the fifo
++	 * FLTH: 0x10: refill with next packet if 512 bytes are free
++	 * MXDMA: 0: up to 256 byte bursts.
++	 *	MXDMA must be <= FLTH
++	 * ECRETRY=1
++	 * ATP=1
++	 */
++	np->tx_config = TxAutoPad | TxCollRetry | TxMxdma_256 | (0x1002);
++	writel(np->tx_config, (void *)(ioaddr + TxConfig));
++
++	/* DRTH 0x10: start copying to memory if 128 bytes are in the fifo
++	 * MXDMA 0: up to 256 byte bursts
++	 */
++	np->rx_config = RxMxdma_256 | 0x20;
++	writel(np->rx_config, (void *)(ioaddr + RxConfig));
++
++	/* Disable PME:
++	 * The PME bit is initialized from the EEPROM contents.
++	 * PCI cards probably have PME disabled, but motherboard
++	 * implementations may have PME set to enable WakeOnLan.
++	 * With PME set the chip will scan incoming packets but
++	 * nothing will be written to memory. */
++	np->SavedClkRun = readl((void *)(ioaddr + ClkRun));
++	writel(np->SavedClkRun & ~PMEEnable, (void *)(ioaddr + ClkRun));
++	if (np->SavedClkRun & PMEStatus && netif_msg_wol(np)) {
++		rtdm_printk(KERN_NOTICE "%s: Wake-up event %#08x\n",
++			dev->name, readl((void *)(ioaddr + WOLCmd)));
++	}
++
++	check_link(dev);
++	__set_rx_mode(dev);
++
++	/* Enable interrupts by setting the interrupt mask. */
++	writel(DEFAULT_INTR, (void *)(ioaddr + IntrMask));
++	writel(1, (void *)(ioaddr + IntrEnable));
++
++	writel(RxOn | TxOn, (void *)(ioaddr + ChipCmd));
++	writel(StatsClear, (void *)(ioaddr + StatsCtrl)); /* Clear Stats */
++}
++
++/*
++ * netdev_timer:
++ * Purpose:
++ * 1) check for link changes. Usually they are handled by the MII interrupt
++ *    but it doesn't hurt to check twice.
++ * 2) check for sudden death of the NIC:
++ *    It seems that a reference set for this chip went out with incorrect info,
++ *    and there exist boards that aren't quite right.  An unexpected voltage
++ *    drop can cause the PHY to get itself in a weird state (basically reset).
++ *    NOTE: this only seems to affect revC chips.
++ * 3) check of death of the RX path due to OOM
++ */
++/*** RTnet ***/
++/*** RTnet ***/
++
++static void dump_ring(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++
++	if (netif_msg_pktdata(np)) {
++		int i;
++		rtdm_printk(KERN_DEBUG "  Tx ring at %p:\n", np->tx_ring);
++		for (i = 0; i < TX_RING_SIZE; i++) {
++			rtdm_printk(KERN_DEBUG " #%d desc. %#08x %#08x %#08x.\n",
++				i, np->tx_ring[i].next_desc,
++				np->tx_ring[i].cmd_status,
++				np->tx_ring[i].addr);
++		}
++		rtdm_printk(KERN_DEBUG "  Rx ring %p:\n", np->rx_ring);
++		for (i = 0; i < RX_RING_SIZE; i++) {
++			rtdm_printk(KERN_DEBUG " #%d desc. %#08x %#08x %#08x.\n",
++				i, np->rx_ring[i].next_desc,
++				np->rx_ring[i].cmd_status,
++				np->rx_ring[i].addr);
++		}
++	}
++}
++
++/*** RTnet ***/
++/*** RTnet ***/
++
++static int alloc_ring(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	np->rx_ring = pci_alloc_consistent(np->pci_dev,
++		sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE),
++		&np->ring_dma);
++	if (!np->rx_ring)
++		return -ENOMEM;
++	np->tx_ring = &np->rx_ring[RX_RING_SIZE];
++	return 0;
++}
++
++static void refill_rx(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++
++	/* Refill the Rx ring buffers. */
++	for (; np->cur_rx - np->dirty_rx > 0; np->dirty_rx++) {
++		struct rtskb *skb;
++		int entry = np->dirty_rx % RX_RING_SIZE;
++		if (np->rx_skbuff[entry] == NULL) {
++			skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz);
++			np->rx_skbuff[entry] = skb;
++			if (skb == NULL)
++				break; /* Better luck next round. */
++			np->rx_dma[entry] = pci_map_single(np->pci_dev,
++				skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
++			np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]);
++		}
++		np->rx_ring[entry].cmd_status = cpu_to_le32(np->rx_buf_sz);
++	}
++	if (np->cur_rx - np->dirty_rx == RX_RING_SIZE) {
++		if (netif_msg_rx_err(np))
++			rtdm_printk(KERN_WARNING "%s: going OOM.\n", dev->name);
++		np->oom = 1;
++	}
++}
++
++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
++static void init_ring(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	/* 1) TX ring */
++	np->dirty_tx = np->cur_tx = 0;
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		np->tx_skbuff[i] = NULL;
++		np->tx_ring[i].next_desc = cpu_to_le32(np->ring_dma
++			+sizeof(struct netdev_desc)
++			*((i+1)%TX_RING_SIZE+RX_RING_SIZE));
++		np->tx_ring[i].cmd_status = 0;
++	}
++
++	/* 2) RX ring */
++	np->dirty_rx = 0;
++	np->cur_rx = RX_RING_SIZE;
++	np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
++	np->oom = 0;
++	np->rx_head_desc = &np->rx_ring[0];
++
++	/* Please be carefull before changing this loop - at least gcc-2.95.1
++	 * miscompiles it otherwise.
++	 */
++	/* Initialize all Rx descriptors. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		np->rx_ring[i].next_desc = cpu_to_le32(np->ring_dma
++				+sizeof(struct netdev_desc)
++				*((i+1)%RX_RING_SIZE));
++		np->rx_ring[i].cmd_status = cpu_to_le32(DescOwn);
++		np->rx_skbuff[i] = NULL;
++	}
++	refill_rx(dev);
++	dump_ring(dev);
++}
++
++static void drain_tx(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		if (np->tx_skbuff[i]) {
++			pci_unmap_single(np->pci_dev,
++				np->rx_dma[i], np->tx_skbuff[i]->len,
++				PCI_DMA_TODEVICE);
++			dev_kfree_rtskb(np->tx_skbuff[i]);
++			np->stats.tx_dropped++;
++		}
++		np->tx_skbuff[i] = NULL;
++	}
++}
++
++static void drain_ring(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	/* Free all the skbuffs in the Rx queue. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		np->rx_ring[i].cmd_status = 0;
++		np->rx_ring[i].addr = 0xBADF00D0; /* An invalid address. */
++		if (np->rx_skbuff[i]) {
++			pci_unmap_single(np->pci_dev,
++				np->rx_dma[i], np->rx_skbuff[i]->len,
++				PCI_DMA_FROMDEVICE);
++			dev_kfree_rtskb(np->rx_skbuff[i]);
++		}
++		np->rx_skbuff[i] = NULL;
++	}
++	drain_tx(dev);
++}
++
++static void free_ring(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	pci_free_consistent(np->pci_dev,
++		sizeof(struct netdev_desc) * (RX_RING_SIZE+TX_RING_SIZE),
++		np->rx_ring, np->ring_dma);
++}
++
++static int start_tx(struct rtskb *skb, struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	unsigned entry;
++/*** RTnet ***/
++	rtdm_lockctx_t context;
++/*** RTnet ***/
++
++	/* Note: Ordering is important here, set the field with the
++	   "ownership" bit last, and only then increment cur_tx. */
++
++	/* Calculate the next Tx descriptor entry. */
++	entry = np->cur_tx % TX_RING_SIZE;
++
++	np->tx_skbuff[entry] = skb;
++	np->tx_dma[entry] = pci_map_single(np->pci_dev,
++				skb->data,skb->len, PCI_DMA_TODEVICE);
++
++	np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]);
++
++/*	spin_lock_irq(&np->lock);*/
++/*** RTnet ***/
++	rtdm_lock_get_irqsave(&np->lock, context);
++/*** RTnet ***/
++
++	if (!np->hands_off) {
++		/* get and patch time stamp just before the transmission */
++		if (skb->xmit_stamp)
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++				*skb->xmit_stamp);
++		np->tx_ring[entry].cmd_status = cpu_to_le32(DescOwn | skb->len);
++		/* StrongARM: Explicitly cache flush np->tx_ring and
++		 * skb->data,skb->len. */
++		wmb();
++		np->cur_tx++;
++		if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1) {
++			netdev_tx_done(dev);
++			if (np->cur_tx - np->dirty_tx >= TX_QUEUE_LEN - 1)
++				rtnetif_stop_queue(dev);
++		}
++		/* Wake the potentially-idle transmit channel. */
++		writel(TxOn, (void *)(dev->base_addr + ChipCmd));
++	} else {
++		dev_kfree_rtskb(skb); /*** RTnet ***/
++		np->stats.tx_dropped++;
++	}
++
++/*	spin_unlock_irq(&np->lock);*/
++/*** RTnet ***/
++	rtdm_lock_put_irqrestore(&np->lock, context);
++/*** RTnet ***/
++
++/*	dev->trans_start = jiffies;*/
++
++	if (netif_msg_tx_queued(np)) {
++		rtdm_printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n",
++			dev->name, np->cur_tx, entry);
++	}
++	return 0;
++}
++
++static void netdev_tx_done(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++
++	for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) {
++		int entry = np->dirty_tx % TX_RING_SIZE;
++		if (np->tx_ring[entry].cmd_status & cpu_to_le32(DescOwn))
++			break;
++		if (netif_msg_tx_done(np))
++			rtdm_printk(KERN_DEBUG
++				"%s: tx frame #%d finished, status %#08x.\n",
++					dev->name, np->dirty_tx,
++					le32_to_cpu(np->tx_ring[entry].cmd_status));
++		if (np->tx_ring[entry].cmd_status & cpu_to_le32(DescPktOK)) {
++			np->stats.tx_packets++;
++			np->stats.tx_bytes += np->tx_skbuff[entry]->len;
++		} else { /* Various Tx errors */
++			int tx_status =
++				le32_to_cpu(np->tx_ring[entry].cmd_status);
++			if (tx_status & (DescTxAbort|DescTxExcColl))
++				np->stats.tx_aborted_errors++;
++			if (tx_status & DescTxFIFO)
++				np->stats.tx_fifo_errors++;
++			if (tx_status & DescTxCarrier)
++				np->stats.tx_carrier_errors++;
++			if (tx_status & DescTxOOWCol)
++				np->stats.tx_window_errors++;
++			np->stats.tx_errors++;
++		}
++		pci_unmap_single(np->pci_dev,np->tx_dma[entry],
++					np->tx_skbuff[entry]->len,
++					PCI_DMA_TODEVICE);
++		/* Free the original skb. */
++		dev_kfree_rtskb(np->tx_skbuff[entry]); /*** RTnet ***/
++/*		dev_kfree_skb_irq(np->tx_skbuff[entry]);*/
++		np->tx_skbuff[entry] = NULL;
++	}
++	if (rtnetif_queue_stopped(dev)
++		&& np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) {
++		/* The ring is no longer full, wake queue. */
++		rtnetif_wake_queue(dev);
++	}
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++static int intr_handler(rtdm_irq_t *irq_handle)
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/
++	struct rtnet_device *dev =
++	    rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/
++	struct netdev_private *np = dev->priv;
++	unsigned int old_packet_cnt = np->stats.rx_packets; /*** RTnet ***/
++	long ioaddr = dev->base_addr;
++	int boguscnt = max_interrupt_work;
++	int ret = RTDM_IRQ_NONE;
++
++	if (np->hands_off)
++		return ret;
++	do {
++		/* Reading automatically acknowledges all int sources. */
++		u32 intr_status = readl((void *)(ioaddr + IntrStatus));
++
++		if (netif_msg_intr(np))
++			rtdm_printk(KERN_DEBUG
++				"%s: Interrupt, status %#08x, mask %#08x.\n",
++				dev->name, intr_status,
++				readl((void *)(ioaddr + IntrMask)));
++
++		if (intr_status == 0)
++			break;
++
++		ret = RTDM_IRQ_HANDLED;
++
++		if (intr_status &
++		   (IntrRxDone | IntrRxIntr | RxStatusFIFOOver |
++		    IntrRxErr | IntrRxOverrun)) {
++			netdev_rx(dev, &time_stamp);
++		}
++
++		if (intr_status &
++		   (IntrTxDone | IntrTxIntr | IntrTxIdle | IntrTxErr)) {
++			rtdm_lock_get(&np->lock);
++			netdev_tx_done(dev);
++			rtdm_lock_put(&np->lock);
++		}
++
++		/* Abnormal error summary/uncommon events handlers. */
++		if (intr_status & IntrAbnormalSummary)
++			netdev_error(dev, intr_status);
++
++		if (--boguscnt < 0) {
++			if (netif_msg_intr(np))
++				rtdm_printk(KERN_WARNING
++					"%s: Too much work at interrupt, "
++					"status=%#08x.\n",
++					dev->name, intr_status);
++			break;
++		}
++	} while (1);
++
++	if (netif_msg_intr(np))
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt.\n", dev->name);
++
++/*** RTnet ***/
++	if (old_packet_cnt != np->stats.rx_packets)
++		rt_mark_stack_mgr(dev);
++	return ret;
++}
++
++/* This routine is logically part of the interrupt handler, but separated
++   for clarity and better register allocation. */
++static void netdev_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp)
++{
++	struct netdev_private *np = dev->priv;
++	int entry = np->cur_rx % RX_RING_SIZE;
++	int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx;
++	s32 desc_status = le32_to_cpu(np->rx_head_desc->cmd_status);
++
++	/* If the driver owns the next entry it's a new packet. Send it up. */
++	while (desc_status < 0) { /* e.g. & DescOwn */
++		if (netif_msg_rx_status(np))
++			rtdm_printk(KERN_DEBUG
++				"  netdev_rx() entry %d status was %#08x.\n",
++				entry, desc_status);
++		if (--boguscnt < 0)
++			break;
++		if ((desc_status&(DescMore|DescPktOK|DescRxLong)) != DescPktOK){
++			if (desc_status & DescMore) {
++				if (netif_msg_rx_err(np))
++					rtdm_printk(KERN_WARNING
++						"%s: Oversized(?) Ethernet "
++						"frame spanned multiple "
++						"buffers, entry %#08x "
++						"status %#08x.\n", dev->name,
++						np->cur_rx, desc_status);
++				np->stats.rx_length_errors++;
++			} else {
++				/* There was an error. */
++				np->stats.rx_errors++;
++				if (desc_status & (DescRxAbort|DescRxOver))
++					np->stats.rx_over_errors++;
++				if (desc_status & (DescRxLong|DescRxRunt))
++					np->stats.rx_length_errors++;
++				if (desc_status & (DescRxInvalid|DescRxAlign))
++					np->stats.rx_frame_errors++;
++				if (desc_status & DescRxCRC)
++					np->stats.rx_crc_errors++;
++			}
++		} else {
++			struct rtskb *skb;
++			/* Omit CRC size. */
++			int pkt_len = (desc_status & DescSizeMask) - 4;
++			/* Check if the packet is long enough to accept
++			 * without copying to a minimally-sized skbuff. */
++/*** RTnet ***/
++			{
++				skb = np->rx_skbuff[entry];
++				pci_unmap_single(np->pci_dev, np->rx_dma[entry],
++					np->rx_skbuff[entry]->len,
++					PCI_DMA_FROMDEVICE);
++				rtskb_put(skb, pkt_len);
++				np->rx_skbuff[entry] = NULL;
++			}
++/*** RTnet ***/
++			skb->protocol = rt_eth_type_trans(skb, dev);
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++			/*dev->last_rx = jiffies;*/
++/*** RTnet ***/
++			np->stats.rx_packets++;
++			np->stats.rx_bytes += pkt_len;
++		}
++		entry = (++np->cur_rx) % RX_RING_SIZE;
++		np->rx_head_desc = &np->rx_ring[entry];
++		desc_status = le32_to_cpu(np->rx_head_desc->cmd_status);
++	}
++	refill_rx(dev);
++
++	/* Restart Rx engine if stopped. */
++	if (np->oom)
++		;
++/*		mod_timer(&np->timer, jiffies + 1);*/
++	else
++		writel(RxOn, (void *)(dev->base_addr + ChipCmd));
++}
++
++static void netdev_error(struct rtnet_device *dev, int intr_status)
++{
++	struct netdev_private *np = dev->priv;
++	long ioaddr = dev->base_addr;
++
++	rtdm_lock_get(&np->lock);
++	if (intr_status & LinkChange) {
++		u16 adv = mdio_read(dev, 1, MII_ADVERTISE);
++		u16 lpa = mdio_read(dev, 1, MII_LPA);
++		if (mdio_read(dev, 1, MII_BMCR) & BMCR_ANENABLE
++		 && netif_msg_link(np)) {
++			rtdm_printk(KERN_INFO
++				"%s: Autonegotiation advertising"
++				" %#04x  partner %#04x.\n", dev->name,
++				adv, lpa);
++		}
++
++		/* read MII int status to clear the flag */
++		readw((void *)(ioaddr + MIntrStatus));
++		check_link(dev);
++	}
++	if (intr_status & StatsMax) {
++		__get_stats(dev);
++	}
++	if (intr_status & IntrTxUnderrun) {
++		if ((np->tx_config & TxDrthMask) < 62)
++			np->tx_config += 2;
++		if (netif_msg_tx_err(np))
++			rtdm_printk(KERN_NOTICE
++				"%s: increased Tx threshold, txcfg %#08x.\n",
++				dev->name, np->tx_config);
++		writel(np->tx_config, (void *)(ioaddr + TxConfig));
++	}
++	if (intr_status & WOLPkt && netif_msg_wol(np)) {
++		int wol_status = readl((void *)(ioaddr + WOLCmd));
++		rtdm_printk(KERN_NOTICE "%s: Link wake-up event %#08x\n",
++			dev->name, wol_status);
++	}
++	if (intr_status & RxStatusFIFOOver) {
++		if (netif_msg_rx_err(np) && netif_msg_intr(np)) {
++			rtdm_printk(KERN_NOTICE "%s: Rx status FIFO overrun\n",
++				dev->name);
++		}
++		np->stats.rx_fifo_errors++;
++	}
++	/* Hmmmmm, it's not clear how to recover from PCI faults. */
++	if (intr_status & IntrPCIErr) {
++		rtdm_printk(KERN_NOTICE "%s: PCI error %#08x\n", dev->name,
++			intr_status & IntrPCIErr);
++		np->stats.tx_fifo_errors++;
++		np->stats.rx_fifo_errors++;
++	}
++	rtdm_lock_put(&np->lock);
++}
++
++static void __get_stats(struct rtnet_device *dev)
++{
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++
++	/* The chip only need report frame silently dropped. */
++	np->stats.rx_crc_errors	+= readl((void *)(ioaddr + RxCRCErrs));
++	np->stats.rx_missed_errors += readl((void *)(ioaddr + RxMissed));
++}
++
++static struct net_device_stats *get_stats(struct rtnet_device *rtdev)
++{
++	struct netdev_private *np = rtdev->priv;
++	rtdm_lockctx_t context;
++
++	/* The chip only need report frame silently dropped. */
++	rtdm_lock_get_irqsave(&np->lock, context);
++	if (rtnetif_running(rtdev) && !np->hands_off)
++		__get_stats(rtdev);
++	rtdm_lock_put_irqrestore(&np->lock, context);
++
++	return &np->stats;
++}
++
++#define HASH_TABLE	0x200
++static void __set_rx_mode(struct rtnet_device *dev)
++{
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++	u8 mc_filter[64]; /* Multicast hash filter */
++	u32 rx_mode;
++
++	if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */
++		/* Unconditionally log net taps. */
++		rtdm_printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n",
++			dev->name);
++		rx_mode = RxFilterEnable | AcceptBroadcast
++			| AcceptAllMulticast | AcceptAllPhys | AcceptMyPhys;
++	} else if (dev->flags & IFF_ALLMULTI) {
++		rx_mode = RxFilterEnable | AcceptBroadcast
++			| AcceptAllMulticast | AcceptMyPhys;
++	} else {
++		int i;
++
++		memset(mc_filter, 0, sizeof(mc_filter));
++		rx_mode = RxFilterEnable | AcceptBroadcast
++			| AcceptMulticast | AcceptMyPhys;
++		for (i = 0; i < 64; i += 2) {
++			writew(HASH_TABLE + i, (void *)(ioaddr + RxFilterAddr));
++			writew((mc_filter[i+1]<<8) + mc_filter[i],
++				(void *)(ioaddr + RxFilterData));
++		}
++	}
++	writel(rx_mode, (void *)(ioaddr + RxFilterAddr));
++	np->cur_rx_mode = rx_mode;
++}
++/*** RTnet
++static void set_rx_mode(struct rtnet_device *dev)
++{
++	struct netdev_private *np = dev->priv;
++	spin_lock_irq(&np->lock);
++	if (!np->hands_off)
++		__set_rx_mode(dev);
++	spin_unlock_irq(&np->lock);
++}
++RTnet ***/
++/*** RTnet ***/
++/*** RTnet ***/
++
++static void enable_wol_mode(struct rtnet_device *dev, int enable_intr)
++{
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++
++	if (netif_msg_wol(np))
++		rtdm_printk(KERN_INFO "%s: remaining active for wake-on-lan\n",
++			dev->name);
++
++	/* For WOL we must restart the rx process in silent mode.
++	 * Write NULL to the RxRingPtr. Only possible if
++	 * rx process is stopped
++	 */
++	writel(0, (void *)(ioaddr + RxRingPtr));
++
++	/* read WoL status to clear */
++	readl((void *)(ioaddr + WOLCmd));
++
++	/* PME on, clear status */
++	writel(np->SavedClkRun | PMEEnable | PMEStatus, (void *)(ioaddr + ClkRun));
++
++	/* and restart the rx process */
++	writel(RxOn, (void *)(ioaddr + ChipCmd));
++
++	if (enable_intr) {
++		/* enable the WOL interrupt.
++		 * Could be used to send a netlink message.
++		 */
++		writel(WOLPkt | LinkChange, (void *)(ioaddr + IntrMask));
++		writel(1, (void *)(ioaddr + IntrEnable));
++	}
++}
++
++static int netdev_close(struct rtnet_device *dev)
++{
++	int i;
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++
++	if (netif_msg_ifdown(np))
++		rtdm_printk(KERN_DEBUG
++			"%s: Shutting down ethercard, status was %#04x.\n",
++			dev->name, (int)readl((void *)(ioaddr + ChipCmd)));
++	if (netif_msg_pktdata(np))
++		rtdm_printk(KERN_DEBUG
++			"%s: Queue pointers were Tx %d / %d,  Rx %d / %d.\n",
++			dev->name, np->cur_tx, np->dirty_tx,
++			np->cur_rx, np->dirty_rx);
++
++	/*
++	 * FIXME: what if someone tries to close a device
++	 * that is suspended?
++	 * Should we reenable the nic to switch to
++	 * the final WOL settings?
++	 */
++/*** RTnet ***
++	del_timer_sync(&np->timer);
++ *** RTnet ***/
++/*	disable_irq(dev->irq);*/
++	rtdm_irq_disable(&np->irq_handle);
++	rtdm_lock_get(&np->lock);
++	/* Disable interrupts, and flush posted writes */
++	writel(0, (void *)(ioaddr + IntrEnable));
++	readl((void *)(ioaddr + IntrEnable));
++	np->hands_off = 1;
++	rtdm_lock_put(&np->lock);
++
++/*** RTnet ***/
++	if ( (i=rtdm_irq_free(&np->irq_handle))<0 )
++		return i;
++
++	rt_stack_disconnect(dev);
++/*** RTnet ***/
++
++/*	enable_irq(dev->irq);*/
++
++/*	free_irq(dev->irq, dev);*/
++
++	/* Interrupt disabled, interrupt handler released,
++	 * queue stopped, timer deleted, rtnl_lock held
++	 * All async codepaths that access the driver are disabled.
++	 */
++	rtdm_lock_get(&np->lock);
++	np->hands_off = 0;
++	readl((void *)(ioaddr + IntrMask));
++	readw((void *)(ioaddr + MIntrStatus));
++
++	/* Freeze Stats */
++	writel(StatsFreeze, (void *)(ioaddr + StatsCtrl));
++
++	/* Stop the chip's Tx and Rx processes. */
++	natsemi_stop_rxtx(dev);
++
++	__get_stats(dev);
++	rtdm_lock_put(&np->lock);
++
++	/* clear the carrier last - an interrupt could reenable it otherwise */
++	rtnetif_carrier_off(dev);
++	rtnetif_stop_queue(dev);
++
++	dump_ring(dev);
++	drain_ring(dev);
++	free_ring(dev);
++
++	{
++		u32 wol = readl((void *)(ioaddr + WOLCmd)) & WakeOptsSummary;
++		if (wol) {
++			/* restart the NIC in WOL mode.
++			 * The nic must be stopped for this.
++			 */
++			enable_wol_mode(dev, 0);
++		} else {
++			/* Restore PME enable bit unmolested */
++			writel(np->SavedClkRun, (void *)(ioaddr + ClkRun));
++		}
++	}
++
++	return 0;
++}
++
++
++static void natsemi_remove1 (struct pci_dev *pdev)
++{
++
++ /*** RTnet ***/
++	struct rtnet_device *dev = pci_get_drvdata(pdev);
++
++	rt_unregister_rtnetdev(dev);
++	rt_rtdev_disconnect(dev);
++/*** RTnet ***/
++
++	pci_release_regions (pdev);
++	iounmap ((char *) dev->base_addr);
++	rtdev_free(dev); /*** RTnet ***/
++	pci_set_drvdata(pdev, NULL);
++}
++
++#ifdef CONFIG_PM
++
++/*
++ * The ns83815 chip doesn't have explicit RxStop bits.
++ * Kicking the Rx or Tx process for a new packet reenables the Rx process
++ * of the nic, thus this function must be very careful:
++ *
++ * suspend/resume synchronization:
++ * entry points:
++ *   netdev_open, netdev_close, netdev_ioctl, set_rx_mode, intr_handler,
++ *   start_tx, tx_timeout
++ *
++ * No function accesses the hardware without checking np->hands_off.
++ *	the check occurs under spin_lock_irq(&np->lock);
++ * exceptions:
++ *	* netdev_ioctl: noncritical access.
++ *	* netdev_open: cannot happen due to the device_detach
++ *	* netdev_close: doesn't hurt.
++ *	* netdev_timer: timer stopped by natsemi_suspend.
++ *	* intr_handler: doesn't acquire the spinlock. suspend calls
++ *		disable_irq() to enforce synchronization.
++ *
++ * Interrupts must be disabled, otherwise hands_off can cause irq storms.
++ */
++
++#endif /* CONFIG_PM */
++
++static struct pci_driver natsemi_driver = {
++	.name		= DRV_NAME,
++	.id_table	= natsemi_pci_tbl,
++	.probe		= natsemi_probe1,
++	.remove		= natsemi_remove1,
++/*#ifdef CONFIG_PM*/
++};
++
++static int __init natsemi_init_mod (void)
++{
++/* when a module, this is printed whether or not devices are found in probe */
++#ifdef MODULE
++	rtdm_printk(version);
++#endif
++
++	return pci_register_driver (&natsemi_driver);
++}
++
++static void __exit natsemi_exit_mod (void)
++{
++	pci_unregister_driver (&natsemi_driver);
++}
++
++module_init(natsemi_init_mod);
++module_exit(natsemi_exit_mod);
+--- linux/drivers/xenomai/net/drivers/macb.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/macb.c	2021-04-07 16:01:27.354633985 +0800
+@@ -0,0 +1,1821 @@
++/*
++ * Cadence MACB/GEM Ethernet Controller driver
++ *
++ * Copyright (C) 2004-2006 Atmel Corporation
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * RTnet porting by Cristiano Mantovani & Stefano Banzi (Marposs SpA).
++ * Copyright (C) 2014 Gilles Chanteperdrix <gch@xenomai.org>
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++#include <linux/clk.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/circ_buf.h>
++#include <linux/slab.h>
++#include <linux/init.h>
++#include <linux/io.h>
++#include <linux/gpio.h>
++#include <linux/interrupt.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/dma-mapping.h>
++#include <linux/platform_data/macb.h>
++#include <linux/platform_device.h>
++#include <linux/phy.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/of_mdio.h>
++#include <linux/of_net.h>
++#include <linux/pinctrl/consumer.h>
++
++#include <rtdev.h>
++#include <rtdm/net.h>
++#include <rtnet_port.h>
++#include <rtskb.h>
++
++#include "rt_macb.h"
++
++#define MACB_RX_BUFFER_SIZE	128
++#define RX_BUFFER_MULTIPLE	64  /* bytes */
++#define RX_RING_SIZE		512 /* must be power of 2 */
++#define RX_RING_BYTES		(sizeof(struct macb_dma_desc) * RX_RING_SIZE)
++
++#define TX_RING_SIZE		128 /* must be power of 2 */
++#define TX_RING_BYTES		(sizeof(struct macb_dma_desc) * TX_RING_SIZE)
++
++/* level of occupied TX descriptors under which we wake up TX process */
++#define MACB_TX_WAKEUP_THRESH	(3 * TX_RING_SIZE / 4)
++
++#define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(RXUBR)	\
++				 | MACB_BIT(ISR_ROVR))
++#define MACB_TX_ERR_FLAGS	(MACB_BIT(ISR_TUND)			\
++					| MACB_BIT(ISR_RLE)		\
++					| MACB_BIT(TXERR))
++#define MACB_TX_INT_FLAGS	(MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP))
++
++/*
++ * Graceful stop timeouts in us. We should allow up to
++ * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions)
++ */
++#define MACB_HALT_TIMEOUT	1230
++
++/* Ring buffer accessors */
++static unsigned int macb_tx_ring_wrap(unsigned int index)
++{
++	return index & (TX_RING_SIZE - 1);
++}
++
++static struct macb_dma_desc *macb_tx_desc(struct macb *bp, unsigned int index)
++{
++	return &bp->tx_ring[macb_tx_ring_wrap(index)];
++}
++
++static struct macb_tx_skb *macb_tx_skb(struct macb *bp, unsigned int index)
++{
++	return &bp->tx_skb[macb_tx_ring_wrap(index)];
++}
++
++static unsigned int macb_rx_ring_wrap(unsigned int index)
++{
++	return index & (RX_RING_SIZE - 1);
++}
++
++static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index)
++{
++	return &bp->rx_ring[macb_rx_ring_wrap(index)];
++}
++
++static void *macb_rx_buffer(struct macb *bp, unsigned int index)
++{
++	return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index);
++}
++
++void rtmacb_set_hwaddr(struct macb *bp)
++{
++	u32 bottom;
++	u16 top;
++
++	bottom = cpu_to_le32(*((u32 *)bp->dev->dev_addr));
++	macb_or_gem_writel(bp, SA1B, bottom);
++	top = cpu_to_le16(*((u16 *)(bp->dev->dev_addr + 4)));
++	macb_or_gem_writel(bp, SA1T, top);
++
++	/* Clear unused address register sets */
++	macb_or_gem_writel(bp, SA2B, 0);
++	macb_or_gem_writel(bp, SA2T, 0);
++	macb_or_gem_writel(bp, SA3B, 0);
++	macb_or_gem_writel(bp, SA3T, 0);
++	macb_or_gem_writel(bp, SA4B, 0);
++	macb_or_gem_writel(bp, SA4T, 0);
++}
++EXPORT_SYMBOL_GPL(rtmacb_set_hwaddr);
++
++void rtmacb_get_hwaddr(struct macb *bp)
++{
++	struct macb_platform_data *pdata;
++	u32 bottom;
++	u16 top;
++	u8 addr[6];
++	int i;
++
++	pdata = dev_get_platdata(&bp->pdev->dev);
++
++	/* Check all 4 address register for vaild address */
++	for (i = 0; i < 4; i++) {
++		bottom = macb_or_gem_readl(bp, SA1B + i * 8);
++		top = macb_or_gem_readl(bp, SA1T + i * 8);
++
++		if (pdata && pdata->rev_eth_addr) {
++			addr[5] = bottom & 0xff;
++			addr[4] = (bottom >> 8) & 0xff;
++			addr[3] = (bottom >> 16) & 0xff;
++			addr[2] = (bottom >> 24) & 0xff;
++			addr[1] = top & 0xff;
++			addr[0] = (top & 0xff00) >> 8;
++		} else {
++			addr[0] = bottom & 0xff;
++			addr[1] = (bottom >> 8) & 0xff;
++			addr[2] = (bottom >> 16) & 0xff;
++			addr[3] = (bottom >> 24) & 0xff;
++			addr[4] = top & 0xff;
++			addr[5] = (top >> 8) & 0xff;
++		}
++
++		if (is_valid_ether_addr(addr)) {
++			memcpy(bp->dev->dev_addr, addr, sizeof(addr));
++			return;
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(rtmacb_get_hwaddr);
++
++static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
++{
++	struct macb *bp = bus->priv;
++	int value;
++
++	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
++			      | MACB_BF(RW, MACB_MAN_READ)
++			      | MACB_BF(PHYA, mii_id)
++			      | MACB_BF(REGA, regnum)
++			      | MACB_BF(CODE, MACB_MAN_CODE)));
++
++	/* wait for end of transfer */
++	while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR)))
++		cpu_relax();
++
++	value = MACB_BFEXT(DATA, macb_readl(bp, MAN));
++
++	return value;
++}
++
++static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
++			   u16 value)
++{
++	struct macb *bp = bus->priv;
++
++	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
++			      | MACB_BF(RW, MACB_MAN_WRITE)
++			      | MACB_BF(PHYA, mii_id)
++			      | MACB_BF(REGA, regnum)
++			      | MACB_BF(CODE, MACB_MAN_CODE)
++			      | MACB_BF(DATA, value)));
++
++	/* wait for end of transfer */
++	while (!MACB_BFEXT(IDLE, macb_readl(bp, NSR)))
++		cpu_relax();
++
++	return 0;
++}
++
++/**
++ * macb_set_tx_clk() - Set a clock to a new frequency
++ * @clk		Pointer to the clock to change
++ * @rate	New frequency in Hz
++ * @dev		Pointer to the struct rtnet_device
++ */
++static void macb_set_tx_clk(struct clk *clk, int speed, struct rtnet_device *dev)
++{
++	long ferr, rate, rate_rounded;
++
++	switch (speed) {
++	case SPEED_10:
++		rate = 2500000;
++		break;
++	case SPEED_100:
++		rate = 25000000;
++		break;
++	case SPEED_1000:
++		rate = 125000000;
++		break;
++	default:
++		return;
++	}
++
++	rate_rounded = clk_round_rate(clk, rate);
++	if (rate_rounded < 0)
++		return;
++
++	/* RGMII allows 50 ppm frequency error. Test and warn if this limit
++	 * is not satisfied.
++	 */
++	ferr = abs(rate_rounded - rate);
++	ferr = DIV_ROUND_UP(ferr, rate / 100000);
++	if (ferr > 5)
++		rtdev_warn(dev, "unable to generate target frequency: %ld Hz\n",
++				rate);
++
++	if (clk_set_rate(clk, rate_rounded))
++		rtdev_err(dev, "adjusting tx_clk failed.\n");
++}
++
++struct macb_dummy_netdev_priv {
++	struct rtnet_device *rtdev;
++};
++
++static void macb_handle_link_change(struct net_device *nrt_dev)
++{
++	struct macb_dummy_netdev_priv *p = netdev_priv(nrt_dev);
++	struct rtnet_device *dev = p->rtdev;
++	struct macb *bp = rtnetdev_priv(dev);
++	struct phy_device *phydev = bp->phy_dev;
++	unsigned long flags;
++
++	int status_change = 0;
++
++	rtdm_lock_get_irqsave(&bp->lock, flags);
++
++	if (phydev->link) {
++		if ((bp->speed != phydev->speed) ||
++		    (bp->duplex != phydev->duplex)) {
++			u32 reg;
++
++			reg = macb_readl(bp, NCFGR);
++			reg &= ~(MACB_BIT(SPD) | MACB_BIT(FD));
++			if (macb_is_gem(bp))
++				reg &= ~GEM_BIT(GBE);
++
++			if (phydev->duplex)
++				reg |= MACB_BIT(FD);
++			if (phydev->speed == SPEED_100)
++				reg |= MACB_BIT(SPD);
++			if (phydev->speed == SPEED_1000)
++				reg |= GEM_BIT(GBE);
++
++			macb_or_gem_writel(bp, NCFGR, reg);
++
++			bp->speed = phydev->speed;
++			bp->duplex = phydev->duplex;
++			status_change = 1;
++		}
++	}
++
++	if (phydev->link != bp->link) {
++		if (!phydev->link) {
++			bp->speed = 0;
++			bp->duplex = -1;
++		}
++		bp->link = phydev->link;
++
++		status_change = 1;
++	}
++
++	rtdm_lock_put_irqrestore(&bp->lock, flags);
++
++	if (!IS_ERR(bp->tx_clk))
++		macb_set_tx_clk(bp->tx_clk, phydev->speed, dev);
++
++	if (status_change) {
++		if (phydev->link) {
++			rtnetif_carrier_on(dev);
++			rtdev_info(dev, "link up (%d/%s)\n",
++				    phydev->speed,
++				    phydev->duplex == DUPLEX_FULL ?
++				    "Full" : "Half");
++		} else {
++			rtnetif_carrier_off(dev);
++			rtdev_info(dev, "link down\n");
++		}
++	}
++}
++
++/* based on au1000_eth. c*/
++static int macb_mii_probe(struct rtnet_device *dev)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	struct macb_dummy_netdev_priv *p;
++	struct macb_platform_data *pdata;
++	struct phy_device *phydev;
++	struct net_device *dummy;
++	int phy_irq;
++	int ret;
++
++	phydev = phy_find_first(bp->mii_bus);
++	if (!phydev) {
++		rtdev_err(dev, "no PHY found\n");
++		return -ENXIO;
++	}
++
++	pdata = dev_get_platdata(&bp->pdev->dev);
++	if (pdata && gpio_is_valid(pdata->phy_irq_pin)) {
++		ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin, "phy int");
++		if (!ret) {
++			phy_irq = gpio_to_irq(pdata->phy_irq_pin);
++			phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq;
++		}
++	}
++
++	dummy = alloc_etherdev(sizeof(*p));
++	p = netdev_priv(dummy);
++	p->rtdev = dev;
++	bp->phy_phony_net_device = dummy;
++
++	/* attach the mac to the phy */
++	ret = phy_connect_direct(dummy, phydev, &macb_handle_link_change,
++				 bp->phy_interface);
++	if (ret) {
++		rtdev_err(dev, "Could not attach to PHY\n");
++		return ret;
++	}
++
++	/* mask with MAC supported features */
++	if (macb_is_gem(bp))
++		phydev->supported &= PHY_GBIT_FEATURES;
++	else
++		phydev->supported &= PHY_BASIC_FEATURES;
++
++	phydev->advertising = phydev->supported;
++
++	bp->link = 0;
++	bp->speed = 0;
++	bp->duplex = -1;
++	bp->phy_dev = phydev;
++
++	return 0;
++}
++
++int rtmacb_mii_init(struct macb *bp)
++{
++	struct macb_platform_data *pdata;
++	struct device_node *np;
++	int err = -ENXIO, i;
++
++	/* Enable management port */
++	macb_writel(bp, NCR, MACB_BIT(MPE));
++
++	bp->mii_bus = mdiobus_alloc();
++	if (bp->mii_bus == NULL) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++
++	bp->mii_bus->name = "MACB_mii_bus";
++	bp->mii_bus->read = &macb_mdio_read;
++	bp->mii_bus->write = &macb_mdio_write;
++	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
++		bp->pdev->name, bp->pdev->id);
++	bp->mii_bus->priv = bp;
++	bp->mii_bus->parent = &bp->pdev->dev;
++	pdata = dev_get_platdata(&bp->pdev->dev);
++
++	bp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);
++	if (!bp->mii_bus->irq) {
++		err = -ENOMEM;
++		goto err_out_free_mdiobus;
++	}
++
++	np = bp->pdev->dev.of_node;
++	if (np) {
++		/* try dt phy registration */
++		err = of_mdiobus_register(bp->mii_bus, np);
++
++		/* fallback to standard phy registration if no phy were
++		   found during dt phy registration */
++		if (!err && !phy_find_first(bp->mii_bus)) {
++			for (i = 0; i < PHY_MAX_ADDR; i++) {
++				struct phy_device *phydev;
++
++				phydev = mdiobus_scan(bp->mii_bus, i);
++				if (IS_ERR(phydev)) {
++					err = PTR_ERR(phydev);
++					break;
++				}
++			}
++
++			if (err)
++				goto err_out_unregister_bus;
++		}
++	} else {
++		for (i = 0; i < PHY_MAX_ADDR; i++)
++			bp->mii_bus->irq[i] = PHY_POLL;
++
++		if (pdata)
++			bp->mii_bus->phy_mask = pdata->phy_mask;
++
++		err = mdiobus_register(bp->mii_bus);
++	}
++
++	if (err)
++		goto err_out_free_mdio_irq;
++
++	err = macb_mii_probe(bp->dev);
++	if (err)
++		goto err_out_unregister_bus;
++
++	return 0;
++
++err_out_unregister_bus:
++	mdiobus_unregister(bp->mii_bus);
++err_out_free_mdio_irq:
++	kfree(bp->mii_bus->irq);
++err_out_free_mdiobus:
++	mdiobus_free(bp->mii_bus);
++err_out:
++	return err;
++}
++EXPORT_SYMBOL_GPL(rtmacb_mii_init);
++
++static void macb_update_stats(struct macb *bp)
++{
++	u32 __iomem *reg = bp->regs + MACB_PFR;
++	u32 *p = &bp->hw_stats.macb.rx_pause_frames;
++	u32 *end = &bp->hw_stats.macb.tx_pause_frames + 1;
++
++	WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4);
++
++	for(; p < end; p++, reg++)
++		*p += __raw_readl(reg);
++}
++
++static int macb_halt_tx(struct macb *bp)
++{
++	unsigned long	halt_time, timeout;
++	u32		status;
++
++	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT));
++
++	timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT);
++	do {
++		halt_time = jiffies;
++		status = macb_readl(bp, TSR);
++		if (!(status & MACB_BIT(TGO)))
++			return 0;
++
++		usleep_range(10, 250);
++	} while (time_before(halt_time, timeout));
++
++	return -ETIMEDOUT;
++}
++
++static void macb_tx_error_task(struct work_struct *work)
++{
++	struct macb	*bp = container_of(work, struct macb, tx_error_task);
++	struct macb_tx_skb	*tx_skb;
++	struct rtskb		*skb;
++	unsigned int		tail;
++
++	rtdev_vdbg(bp->dev, "macb_tx_error_task: t = %u, h = %u\n",
++		    bp->tx_tail, bp->tx_head);
++
++	/* Make sure nobody is trying to queue up new packets */
++	rtnetif_stop_queue(bp->dev);
++
++	/*
++	 * Stop transmission now
++	 * (in case we have just queued new packets)
++	 */
++	if (macb_halt_tx(bp))
++		/* Just complain for now, reinitializing TX path can be good */
++		rtdev_err(bp->dev, "BUG: halt tx timed out\n");
++
++	/* No need for the lock here as nobody will interrupt us anymore */
++
++	/*
++	 * Treat frames in TX queue including the ones that caused the error.
++	 * Free transmit buffers in upper layer.
++	 */
++	for (tail = bp->tx_tail; tail != bp->tx_head; tail++) {
++		struct macb_dma_desc	*desc;
++		u32			ctrl;
++
++		desc = macb_tx_desc(bp, tail);
++		ctrl = desc->ctrl;
++		tx_skb = macb_tx_skb(bp, tail);
++		skb = tx_skb->skb;
++
++		if (ctrl & MACB_BIT(TX_USED)) {
++			rtdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n",
++				    macb_tx_ring_wrap(tail), skb->data);
++			bp->stats.tx_packets++;
++			bp->stats.tx_bytes += skb->len;
++		} else {
++			/*
++			 * "Buffers exhausted mid-frame" errors may only happen
++			 * if the driver is buggy, so complain loudly about those.
++			 * Statistics are updated by hardware.
++			 */
++			if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED))
++				rtdev_err(bp->dev,
++					   "BUG: TX buffers exhausted mid-frame\n");
++
++			desc->ctrl = ctrl | MACB_BIT(TX_USED);
++		}
++
++		dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len,
++				 DMA_TO_DEVICE);
++		tx_skb->skb = NULL;
++		dev_kfree_rtskb(skb);
++	}
++
++	/* Make descriptor updates visible to hardware */
++	wmb();
++
++	/* Reinitialize the TX desc queue */
++	macb_writel(bp, TBQP, bp->tx_ring_dma);
++	/* Make TX ring reflect state of hardware */
++	bp->tx_head = bp->tx_tail = 0;
++
++	/* Now we are ready to start transmission again */
++	rtnetif_wake_queue(bp->dev);
++
++	/* Housework before enabling TX IRQ */
++	macb_writel(bp, TSR, macb_readl(bp, TSR));
++	macb_writel(bp, IER, MACB_TX_INT_FLAGS);
++}
++
++static void macb_tx_interrupt(struct macb *bp)
++{
++	unsigned int tail;
++	unsigned int head;
++	u32 status;
++
++	status = macb_readl(bp, TSR);
++	macb_writel(bp, TSR, status);
++
++	if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++		macb_writel(bp, ISR, MACB_BIT(TCOMP));
++
++	rtdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n",
++		(unsigned long)status);
++
++	head = bp->tx_head;
++	for (tail = bp->tx_tail; tail != head; tail++) {
++		struct macb_tx_skb	*tx_skb;
++		struct rtskb		*skb;
++		struct macb_dma_desc	*desc;
++		u32			ctrl;
++
++		desc = macb_tx_desc(bp, tail);
++
++		/* Make hw descriptor updates visible to CPU */
++		rmb();
++
++		ctrl = desc->ctrl;
++
++		if (!(ctrl & MACB_BIT(TX_USED)))
++			break;
++
++		tx_skb = macb_tx_skb(bp, tail);
++		skb = tx_skb->skb;
++
++		rtdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
++			macb_tx_ring_wrap(tail), skb->data);
++		dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len,
++				 DMA_TO_DEVICE);
++		bp->stats.tx_packets++;
++		bp->stats.tx_bytes += skb->len;
++		tx_skb->skb = NULL;
++		dev_kfree_rtskb(skb);
++	}
++
++	bp->tx_tail = tail;
++	if (rtnetif_queue_stopped(bp->dev)
++			&& CIRC_CNT(bp->tx_head, bp->tx_tail,
++				    TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH)
++		rtnetif_wake_queue(bp->dev);
++}
++
++static void gem_rx_refill(struct macb *bp)
++{
++	unsigned int		entry;
++	struct rtskb		*skb;
++	dma_addr_t		paddr;
++
++	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, RX_RING_SIZE) > 0) {
++		entry = macb_rx_ring_wrap(bp->rx_prepared_head);
++
++		/* Make hw descriptor updates visible to CPU */
++		rmb();
++
++		bp->rx_prepared_head++;
++
++		if (bp->rx_skbuff[entry] == NULL) {
++			/* allocate rtskb for this free entry in ring */
++			skb = rtnetdev_alloc_rtskb(bp->dev, bp->rx_buffer_size);
++			if (unlikely(skb == NULL)) {
++				rtdev_err(bp->dev,
++					   "Unable to allocate sk_buff\n");
++				break;
++			}
++
++			/* now fill corresponding descriptor entry */
++			paddr = dma_map_single(&bp->pdev->dev, skb->data,
++					       bp->rx_buffer_size, DMA_FROM_DEVICE);
++			if (dma_mapping_error(&bp->pdev->dev, paddr)) {
++				dev_kfree_rtskb(skb);
++				break;
++			}
++
++			bp->rx_skbuff[entry] = skb;
++
++			if (entry == RX_RING_SIZE - 1)
++				paddr |= MACB_BIT(RX_WRAP);
++			bp->rx_ring[entry].addr = paddr;
++			bp->rx_ring[entry].ctrl = 0;
++
++			/* properly align Ethernet header */
++			rtskb_reserve(skb, NET_IP_ALIGN);
++		}
++	}
++
++	/* Make descriptor updates visible to hardware */
++	wmb();
++
++	rtdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n",
++		   bp->rx_prepared_head, bp->rx_tail);
++}
++
++/* Mark DMA descriptors from begin up to and not including end as unused */
++static void discard_partial_frame(struct macb *bp, unsigned int begin,
++				  unsigned int end)
++{
++	unsigned int frag;
++
++	for (frag = begin; frag != end; frag++) {
++		struct macb_dma_desc *desc = macb_rx_desc(bp, frag);
++		desc->addr &= ~MACB_BIT(RX_USED);
++	}
++
++	/* Make descriptor updates visible to hardware */
++	wmb();
++
++	/*
++	 * When this happens, the hardware stats registers for
++	 * whatever caused this is updated, so we don't have to record
++	 * anything.
++	 */
++}
++
++static int gem_rx(struct macb *bp, int budget, nanosecs_abs_t *time_stamp)
++{
++	unsigned int		len;
++	unsigned int		entry;
++	struct rtskb		*skb;
++	struct macb_dma_desc	*desc;
++	int			count = 0, status;
++
++	status = macb_readl(bp, RSR);
++	macb_writel(bp, RSR, status);
++
++	if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++		macb_writel(bp, ISR, MACB_BIT(RCOMP));
++
++	while (count < budget) {
++		u32 addr, ctrl;
++
++		entry = macb_rx_ring_wrap(bp->rx_tail);
++		desc = &bp->rx_ring[entry];
++
++		/* Make hw descriptor updates visible to CPU */
++		rmb();
++
++		addr = desc->addr;
++		ctrl = desc->ctrl;
++
++		if (!(addr & MACB_BIT(RX_USED)))
++			break;
++
++		bp->rx_tail++;
++		count++;
++
++		if (!(ctrl & MACB_BIT(RX_SOF) && ctrl & MACB_BIT(RX_EOF))) {
++			rtdev_err(bp->dev,
++				   "not whole frame pointed by descriptor\n");
++			bp->stats.rx_dropped++;
++			break;
++		}
++		skb = bp->rx_skbuff[entry];
++		if (unlikely(!skb)) {
++			rtdev_err(bp->dev,
++				   "inconsistent Rx descriptor chain\n");
++			bp->stats.rx_dropped++;
++			break;
++		}
++		skb->time_stamp = *time_stamp;
++		/* now everything is ready for receiving packet */
++		bp->rx_skbuff[entry] = NULL;
++		len = MACB_BFEXT(RX_FRMLEN, ctrl);
++
++		rtdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
++
++		rtskb_put(skb, len);
++		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
++		dma_unmap_single(&bp->pdev->dev, addr,
++				 bp->rx_buffer_size, DMA_FROM_DEVICE);
++
++		skb->protocol = rt_eth_type_trans(skb, bp->dev);
++
++		bp->stats.rx_packets++;
++		bp->stats.rx_bytes += skb->len;
++
++#if defined(DEBUG) && defined(VERBOSE_DEBUG)
++		rtdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
++			    skb->len, skb->csum);
++		print_hex_dump(KERN_DEBUG, " mac: ", DUMP_PREFIX_ADDRESS, 16, 1,
++			       skb->mac_header, 16, true);
++		print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_ADDRESS, 16, 1,
++			       skb->data, 32, true);
++#endif
++
++		rtnetif_rx(skb);
++	}
++
++	gem_rx_refill(bp);
++
++	return count;
++}
++
++static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
++			unsigned int last_frag, nanosecs_abs_t *time_stamp)
++{
++	unsigned int len;
++	unsigned int frag;
++	unsigned int offset;
++	struct rtskb *skb;
++	struct macb_dma_desc *desc;
++
++	desc = macb_rx_desc(bp, last_frag);
++	len = MACB_BFEXT(RX_FRMLEN, desc->ctrl);
++
++	rtdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
++		macb_rx_ring_wrap(first_frag),
++		macb_rx_ring_wrap(last_frag), len);
++
++	/*
++	 * The ethernet header starts NET_IP_ALIGN bytes into the
++	 * first buffer. Since the header is 14 bytes, this makes the
++	 * payload word-aligned.
++	 *
++	 * Instead of calling skb_reserve(NET_IP_ALIGN), we just copy
++	 * the two padding bytes into the skb so that we avoid hitting
++	 * the slowpath in memcpy(), and pull them off afterwards.
++	 */
++	skb = rtnetdev_alloc_rtskb(bp->dev, len + NET_IP_ALIGN);
++	if (!skb) {
++		rtdev_notice(bp->dev, "Low memory, packet dropped.\n");
++		bp->stats.rx_dropped++;
++		for (frag = first_frag; ; frag++) {
++			desc = macb_rx_desc(bp, frag);
++			desc->addr &= ~MACB_BIT(RX_USED);
++			if (frag == last_frag)
++				break;
++		}
++
++		/* Make descriptor updates visible to hardware */
++		wmb();
++
++		return 1;
++	}
++
++	offset = 0;
++	len += NET_IP_ALIGN;
++	skb->time_stamp = *time_stamp;
++	rtskb_put(skb, len);
++
++	for (frag = first_frag; ; frag++) {
++		unsigned int frag_len = bp->rx_buffer_size;
++
++		if (offset + frag_len > len) {
++			BUG_ON(frag != last_frag);
++			frag_len = len - offset;
++		}
++		memcpy(skb->data + offset, macb_rx_buffer(bp, frag), frag_len);
++		offset += bp->rx_buffer_size;
++		desc = macb_rx_desc(bp, frag);
++		desc->addr &= ~MACB_BIT(RX_USED);
++
++		if (frag == last_frag)
++			break;
++	}
++
++	/* Make descriptor updates visible to hardware */
++	wmb();
++
++	__rtskb_pull(skb, NET_IP_ALIGN);
++	skb->protocol = rt_eth_type_trans(skb, bp->dev);
++
++	bp->stats.rx_packets++;
++	bp->stats.rx_bytes += skb->len;
++	rtdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
++		   skb->len, skb->csum);
++	rtnetif_rx(skb);
++
++	return 0;
++}
++
++static int macb_rx(struct macb *bp, int budget, nanosecs_abs_t *time_stamp)
++{
++	int received = 0;
++	unsigned int tail;
++	int first_frag = -1;
++
++	for (tail = bp->rx_tail; budget > 0; tail++) {
++		struct macb_dma_desc *desc = macb_rx_desc(bp, tail);
++		u32 addr, ctrl;
++
++		/* Make hw descriptor updates visible to CPU */
++		rmb();
++
++		addr = desc->addr;
++		ctrl = desc->ctrl;
++
++		if (!(addr & MACB_BIT(RX_USED)))
++			break;
++
++		if (ctrl & MACB_BIT(RX_SOF)) {
++			if (first_frag != -1)
++				discard_partial_frame(bp, first_frag, tail);
++			first_frag = tail;
++		}
++
++		if (ctrl & MACB_BIT(RX_EOF)) {
++			int dropped;
++			BUG_ON(first_frag == -1);
++
++			dropped = macb_rx_frame(bp, first_frag, tail, time_stamp);
++			first_frag = -1;
++			if (!dropped) {
++				received++;
++				budget--;
++			}
++		}
++	}
++
++	if (first_frag != -1)
++		bp->rx_tail = first_frag;
++	else
++		bp->rx_tail = tail;
++
++	return received;
++}
++
++static int macb_interrupt(rtdm_irq_t *irq_handle)
++{
++	void *dev_id = rtdm_irq_get_arg(irq_handle, void);
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtnet_device *dev = dev_id;
++	struct macb *bp = rtnetdev_priv(dev);
++	unsigned received = 0;
++	u32 status, ctrl;
++
++	status = macb_readl(bp, ISR);
++
++	if (unlikely(!status))
++		return RTDM_IRQ_NONE;
++
++	rtdm_lock_get(&bp->lock);
++
++	while (status) {
++		/* close possible race with dev_close */
++		if (unlikely(!rtnetif_running(dev))) {
++			macb_writel(bp, IDR, -1);
++			break;
++		}
++
++		rtdev_vdbg(bp->dev, "isr = 0x%08lx\n", (unsigned long)status);
++
++		if (status & MACB_BIT(RCOMP)) {
++			received += bp->macbgem_ops.mog_rx(bp, 100 - received,
++							&time_stamp);
++		}
++
++		if (unlikely(status & (MACB_TX_ERR_FLAGS))) {
++			macb_writel(bp, IDR, MACB_TX_INT_FLAGS);
++			rtdm_schedule_nrt_work(&bp->tx_error_task);
++
++			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++				macb_writel(bp, ISR, MACB_TX_ERR_FLAGS);
++
++			break;
++		}
++
++		if (status & MACB_BIT(TCOMP))
++			macb_tx_interrupt(bp);
++
++		/*
++		 * Link change detection isn't possible with RMII, so we'll
++		 * add that if/when we get our hands on a full-blown MII PHY.
++		 */
++
++		if (status & MACB_BIT(RXUBR)) {
++			ctrl = macb_readl(bp, NCR);
++			macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
++			macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
++
++			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++				macb_writel(bp, ISR, MACB_BIT(RXUBR));
++		}
++
++		if (status & MACB_BIT(ISR_ROVR)) {
++			/* We missed at least one packet */
++			if (macb_is_gem(bp))
++				bp->hw_stats.gem.rx_overruns++;
++			else
++				bp->hw_stats.macb.rx_overruns++;
++
++			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++				macb_writel(bp, ISR, MACB_BIT(ISR_ROVR));
++		}
++
++		if (status & MACB_BIT(HRESP)) {
++			/*
++			 * TODO: Reset the hardware, and maybe move the
++			 * rtdev_err to a lower-priority context as well
++			 * (work queue?)
++			 */
++			rtdev_err(dev, "DMA bus error: HRESP not OK\n");
++
++			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
++				macb_writel(bp, ISR, MACB_BIT(HRESP));
++		}
++
++		status = macb_readl(bp, ISR);
++	}
++
++	rtdm_lock_put(&bp->lock);
++
++	if (received)
++		rt_mark_stack_mgr(dev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int macb_start_xmit(struct rtskb *skb, struct rtnet_device *dev)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	dma_addr_t mapping;
++	unsigned int len, entry;
++	struct macb_dma_desc *desc;
++	struct macb_tx_skb *tx_skb;
++	u32 ctrl;
++	unsigned long flags;
++
++#if defined(DEBUG) && defined(VERBOSE_DEBUG)
++	rtdev_vdbg(bp->dev,
++		   "start_xmit: len %u head %p data %p tail %p end %p\n",
++		   skb->len, skb->head, skb->data,
++		   rtskb_tail_pointer(skb), rtskb_end_pointer(skb));
++	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
++		       skb->data, 16, true);
++#endif
++
++	len = skb->len;
++	rtdm_lock_get_irqsave(&bp->lock, flags);
++
++	/* This is a hard error, log it. */
++	if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) {
++		rtnetif_stop_queue(dev);
++		rtdm_lock_put_irqrestore(&bp->lock, flags);
++		rtdev_err(bp->dev, "BUG! Tx Ring full when queue awake!\n");
++		rtdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
++			   bp->tx_head, bp->tx_tail);
++		return RTDEV_TX_BUSY;
++	}
++
++	entry = macb_tx_ring_wrap(bp->tx_head);
++	rtdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry);
++	mapping = dma_map_single(&bp->pdev->dev, skb->data,
++				 len, DMA_TO_DEVICE);
++	if (dma_mapping_error(&bp->pdev->dev, mapping)) {
++		dev_kfree_rtskb(skb);
++		goto unlock;
++	}
++
++	bp->tx_head++;
++	tx_skb = &bp->tx_skb[entry];
++	tx_skb->skb = skb;
++	tx_skb->mapping = mapping;
++	rtdev_vdbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n",
++		   skb->data, (unsigned long)mapping);
++
++	ctrl = MACB_BF(TX_FRMLEN, len);
++	ctrl |= MACB_BIT(TX_LAST);
++	if (entry == (TX_RING_SIZE - 1))
++		ctrl |= MACB_BIT(TX_WRAP);
++
++	desc = &bp->tx_ring[entry];
++	desc->addr = mapping;
++	desc->ctrl = ctrl;
++
++	/* Make newly initialized descriptor visible to hardware */
++	wmb();
++
++	rtskb_tx_timestamp(skb);
++
++	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
++
++	if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1)
++		rtnetif_stop_queue(dev);
++
++unlock:
++	rtdm_lock_put_irqrestore(&bp->lock, flags);
++
++	return RTDEV_TX_OK;
++}
++
++static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
++{
++	if (!macb_is_gem(bp)) {
++		bp->rx_buffer_size = MACB_RX_BUFFER_SIZE;
++	} else {
++		bp->rx_buffer_size = size;
++
++		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
++			rtdev_dbg(bp->dev,
++				    "RX buffer must be multiple of %d bytes, expanding\n",
++				    RX_BUFFER_MULTIPLE);
++			bp->rx_buffer_size =
++				roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
++		}
++	}
++
++	rtdev_dbg(bp->dev, "mtu [%u] rx_buffer_size [%Zu]\n",
++		   bp->dev->mtu, bp->rx_buffer_size);
++}
++
++static void gem_free_rx_buffers(struct macb *bp)
++{
++	struct rtskb		*skb;
++	struct macb_dma_desc	*desc;
++	dma_addr_t		addr;
++	int i;
++
++	if (!bp->rx_skbuff)
++		return;
++
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		skb = bp->rx_skbuff[i];
++
++		if (skb == NULL)
++			continue;
++
++		desc = &bp->rx_ring[i];
++		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
++		dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
++				 DMA_FROM_DEVICE);
++		dev_kfree_rtskb(skb);
++		skb = NULL;
++	}
++
++	kfree(bp->rx_skbuff);
++	bp->rx_skbuff = NULL;
++}
++
++static void macb_free_rx_buffers(struct macb *bp)
++{
++	if (bp->rx_buffers) {
++		dma_free_coherent(&bp->pdev->dev,
++				  RX_RING_SIZE * bp->rx_buffer_size,
++				  bp->rx_buffers, bp->rx_buffers_dma);
++		bp->rx_buffers = NULL;
++	}
++}
++
++static void macb_free_consistent(struct macb *bp)
++{
++	if (bp->tx_skb) {
++		kfree(bp->tx_skb);
++		bp->tx_skb = NULL;
++	}
++	bp->macbgem_ops.mog_free_rx_buffers(bp);
++	if (bp->rx_ring) {
++		dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES,
++				  bp->rx_ring, bp->rx_ring_dma);
++		bp->rx_ring = NULL;
++	}
++	if (bp->tx_ring) {
++		dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES,
++				  bp->tx_ring, bp->tx_ring_dma);
++		bp->tx_ring = NULL;
++	}
++}
++
++static int gem_alloc_rx_buffers(struct macb *bp)
++{
++	int size;
++
++	size = RX_RING_SIZE * sizeof(struct rtskb *);
++	bp->rx_skbuff = kzalloc(size, GFP_KERNEL);
++	if (!bp->rx_skbuff)
++		return -ENOMEM;
++	else
++		rtdev_dbg(bp->dev,
++			   "Allocated %d RX struct rtskb entries at %p\n",
++			   RX_RING_SIZE, bp->rx_skbuff);
++	return 0;
++}
++
++static int macb_alloc_rx_buffers(struct macb *bp)
++{
++	int size;
++
++	size = RX_RING_SIZE * bp->rx_buffer_size;
++	bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
++					    &bp->rx_buffers_dma, GFP_KERNEL);
++	if (!bp->rx_buffers)
++		return -ENOMEM;
++	else
++		rtdev_dbg(bp->dev,
++			   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
++			   size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
++	return 0;
++}
++
++static int macb_alloc_consistent(struct macb *bp)
++{
++	int size;
++
++	size = TX_RING_SIZE * sizeof(struct macb_tx_skb);
++	bp->tx_skb = kmalloc(size, GFP_KERNEL);
++	if (!bp->tx_skb)
++		goto out_err;
++
++	size = RX_RING_BYTES;
++	bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
++					 &bp->rx_ring_dma, GFP_KERNEL);
++	if (!bp->rx_ring)
++		goto out_err;
++	rtdev_dbg(bp->dev,
++		   "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
++		   size, (unsigned long)bp->rx_ring_dma, bp->rx_ring);
++
++	size = TX_RING_BYTES;
++	bp->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
++					 &bp->tx_ring_dma, GFP_KERNEL);
++	if (!bp->tx_ring)
++		goto out_err;
++	rtdev_dbg(bp->dev,
++		   "Allocated TX ring of %d bytes at %08lx (mapped %p)\n",
++		   size, (unsigned long)bp->tx_ring_dma, bp->tx_ring);
++
++	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
++		goto out_err;
++
++	return 0;
++
++out_err:
++	macb_free_consistent(bp);
++	return -ENOMEM;
++}
++
++static void gem_init_rings(struct macb *bp)
++{
++	int i;
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		bp->tx_ring[i].addr = 0;
++		bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
++	}
++	bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
++
++	bp->rx_tail = bp->rx_prepared_head = bp->tx_head = bp->tx_tail = 0;
++
++	gem_rx_refill(bp);
++}
++
++static void macb_init_rings(struct macb *bp)
++{
++	int i;
++	dma_addr_t addr;
++
++	addr = bp->rx_buffers_dma;
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		bp->rx_ring[i].addr = addr;
++		bp->rx_ring[i].ctrl = 0;
++		addr += bp->rx_buffer_size;
++	}
++	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		bp->tx_ring[i].addr = 0;
++		bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
++	}
++	bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
++
++	bp->rx_tail = bp->tx_head = bp->tx_tail = 0;
++}
++
++static void macb_reset_hw(struct macb *bp)
++{
++	/*
++	 * Disable RX and TX (XXX: Should we halt the transmission
++	 * more gracefully?)
++	 */
++	macb_writel(bp, NCR, 0);
++
++	/* Clear the stats registers (XXX: Update stats first?) */
++	macb_writel(bp, NCR, MACB_BIT(CLRSTAT));
++
++	/* Clear all status flags */
++	macb_writel(bp, TSR, -1);
++	macb_writel(bp, RSR, -1);
++
++	/* Disable all interrupts */
++	macb_writel(bp, IDR, -1);
++	macb_readl(bp, ISR);
++}
++
++static u32 gem_mdc_clk_div(struct macb *bp)
++{
++	u32 config;
++	unsigned long pclk_hz = clk_get_rate(bp->pclk);
++
++	if (pclk_hz <= 20000000)
++		config = GEM_BF(CLK, GEM_CLK_DIV8);
++	else if (pclk_hz <= 40000000)
++		config = GEM_BF(CLK, GEM_CLK_DIV16);
++	else if (pclk_hz <= 80000000)
++		config = GEM_BF(CLK, GEM_CLK_DIV32);
++	else if (pclk_hz <= 120000000)
++		config = GEM_BF(CLK, GEM_CLK_DIV48);
++	else if (pclk_hz <= 160000000)
++		config = GEM_BF(CLK, GEM_CLK_DIV64);
++	else
++		config = GEM_BF(CLK, GEM_CLK_DIV96);
++
++	return config;
++}
++
++static u32 macb_mdc_clk_div(struct macb *bp)
++{
++	u32 config;
++	unsigned long pclk_hz;
++
++	if (macb_is_gem(bp))
++		return gem_mdc_clk_div(bp);
++
++	pclk_hz = clk_get_rate(bp->pclk);
++	if (pclk_hz <= 20000000)
++		config = MACB_BF(CLK, MACB_CLK_DIV8);
++	else if (pclk_hz <= 40000000)
++		config = MACB_BF(CLK, MACB_CLK_DIV16);
++	else if (pclk_hz <= 80000000)
++		config = MACB_BF(CLK, MACB_CLK_DIV32);
++	else
++		config = MACB_BF(CLK, MACB_CLK_DIV64);
++
++	return config;
++}
++
++/*
++ * Get the DMA bus width field of the network configuration register that we
++ * should program.  We find the width from decoding the design configuration
++ * register to find the maximum supported data bus width.
++ */
++static u32 macb_dbw(struct macb *bp)
++{
++	if (!macb_is_gem(bp))
++		return 0;
++
++	switch (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1))) {
++	case 4:
++		return GEM_BF(DBW, GEM_DBW128);
++	case 2:
++		return GEM_BF(DBW, GEM_DBW64);
++	case 1:
++	default:
++		return GEM_BF(DBW, GEM_DBW32);
++	}
++}
++
++/*
++ * Configure the receive DMA engine
++ * - use the correct receive buffer size
++ * - set the possibility to use INCR16 bursts
++ *   (if not supported by FIFO, it will fallback to default)
++ * - set both rx/tx packet buffers to full memory size
++ * These are configurable parameters for GEM.
++ */
++static void macb_configure_dma(struct macb *bp)
++{
++	u32 dmacfg;
++
++	if (macb_is_gem(bp)) {
++		dmacfg = gem_readl(bp, DMACFG) & ~GEM_BF(RXBS, -1L);
++		dmacfg |= GEM_BF(RXBS, bp->rx_buffer_size / RX_BUFFER_MULTIPLE);
++		dmacfg |= GEM_BF(FBLDO, 16);
++		dmacfg |= GEM_BIT(TXPBMS) | GEM_BF(RXBMS, -1L);
++		dmacfg &= ~GEM_BIT(ENDIA);
++		gem_writel(bp, DMACFG, dmacfg);
++	}
++}
++
++/*
++ * Configure peripheral capacities according to integration options used
++ */
++static void macb_configure_caps(struct macb *bp)
++{
++	if (macb_is_gem(bp)) {
++		if (GEM_BFEXT(IRQCOR, gem_readl(bp, DCFG1)) == 0)
++			bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE;
++	}
++	rtdev_vdbg(bp->dev, "Capabilities : %X\n", bp->caps);
++}
++
++static void macb_init_hw(struct macb *bp)
++{
++	u32 config;
++
++	macb_reset_hw(bp);
++	rtmacb_set_hwaddr(bp);
++
++	config = macb_mdc_clk_div(bp);
++	config |= MACB_BF(RBOF, NET_IP_ALIGN);	/* Make eth data aligned */
++	config |= MACB_BIT(PAE);		/* PAuse Enable */
++	config |= MACB_BIT(DRFCS);		/* Discard Rx FCS */
++	if (bp->dev->flags & IFF_PROMISC)
++		config |= MACB_BIT(CAF);	/* Copy All Frames */
++	if (!(bp->dev->flags & IFF_BROADCAST))
++		config |= MACB_BIT(NBC);	/* No BroadCast */
++	config |= macb_dbw(bp);
++	macb_writel(bp, NCFGR, config);
++	bp->speed = SPEED_10;
++	bp->duplex = DUPLEX_HALF;
++
++	macb_configure_dma(bp);
++	macb_configure_caps(bp);
++
++	/* Initialize TX and RX buffers */
++	macb_writel(bp, RBQP, bp->rx_ring_dma);
++	macb_writel(bp, TBQP, bp->tx_ring_dma);
++
++	/* Enable TX and RX */
++	macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE));
++
++	/* Enable interrupts */
++	macb_writel(bp, IER, (MACB_RX_INT_FLAGS
++			      | MACB_TX_INT_FLAGS
++			      | MACB_BIT(HRESP)));
++
++}
++
++static int macb_open(struct rtnet_device *dev)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	size_t bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN;
++	int err;
++
++	rt_stack_connect(dev, &STACK_manager);
++
++	rtdev_dbg(bp->dev, "open\n");
++
++	/* carrier starts down */
++	rtnetif_carrier_off(dev);
++
++	/* if the phy is not yet register, retry later*/
++	if (!bp->phy_dev)
++		return -EAGAIN;
++
++	/* RX buffers initialization */
++	macb_init_rx_buffer_size(bp, bufsz);
++
++	err = macb_alloc_consistent(bp);
++	if (err) {
++		rtdev_err(dev, "Unable to allocate DMA memory (error %d)\n",
++			   err);
++		return err;
++	}
++
++	bp->macbgem_ops.mog_init_rings(bp);
++	macb_init_hw(bp);
++
++	/* schedule a link state check */
++	phy_start(bp->phy_dev);
++
++	rtnetif_start_queue(dev);
++
++	return 0;
++}
++
++static int macb_close(struct rtnet_device *dev)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	unsigned long flags;
++
++	rtnetif_stop_queue(dev);
++
++	if (bp->phy_dev)
++		phy_stop(bp->phy_dev);
++
++	rtdm_lock_get_irqsave(&bp->lock, flags);
++	macb_reset_hw(bp);
++	rtnetif_carrier_off(dev);
++	rtdm_lock_put_irqrestore(&bp->lock, flags);
++
++	macb_free_consistent(bp);
++
++	rt_stack_disconnect(dev);
++
++	return 0;
++}
++
++static void gem_update_stats(struct macb *bp)
++{
++	u32 __iomem *reg = bp->regs + GEM_OTX;
++	u32 *p = &bp->hw_stats.gem.tx_octets_31_0;
++	u32 *end = &bp->hw_stats.gem.rx_udp_checksum_errors + 1;
++
++	for (; p < end; p++, reg++)
++		*p += __raw_readl(reg);
++}
++
++static struct net_device_stats *gem_get_stats(struct macb *bp)
++{
++	struct gem_stats *hwstat = &bp->hw_stats.gem;
++	struct net_device_stats *nstat = &bp->stats;
++
++	gem_update_stats(bp);
++
++	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
++			    hwstat->rx_alignment_errors +
++			    hwstat->rx_resource_errors +
++			    hwstat->rx_overruns +
++			    hwstat->rx_oversize_frames +
++			    hwstat->rx_jabbers +
++			    hwstat->rx_undersized_frames +
++			    hwstat->rx_length_field_frame_errors);
++	nstat->tx_errors = (hwstat->tx_late_collisions +
++			    hwstat->tx_excessive_collisions +
++			    hwstat->tx_underrun +
++			    hwstat->tx_carrier_sense_errors);
++	nstat->multicast = hwstat->rx_multicast_frames;
++	nstat->collisions = (hwstat->tx_single_collision_frames +
++			     hwstat->tx_multiple_collision_frames +
++			     hwstat->tx_excessive_collisions);
++	nstat->rx_length_errors = (hwstat->rx_oversize_frames +
++				   hwstat->rx_jabbers +
++				   hwstat->rx_undersized_frames +
++				   hwstat->rx_length_field_frame_errors);
++	nstat->rx_over_errors = hwstat->rx_resource_errors;
++	nstat->rx_crc_errors = hwstat->rx_frame_check_sequence_errors;
++	nstat->rx_frame_errors = hwstat->rx_alignment_errors;
++	nstat->rx_fifo_errors = hwstat->rx_overruns;
++	nstat->tx_aborted_errors = hwstat->tx_excessive_collisions;
++	nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors;
++	nstat->tx_fifo_errors = hwstat->tx_underrun;
++
++	return nstat;
++}
++
++struct net_device_stats *rtmacb_get_stats(struct rtnet_device *dev)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	struct net_device_stats *nstat = &bp->stats;
++	struct macb_stats *hwstat = &bp->hw_stats.macb;
++
++	if (macb_is_gem(bp))
++		return gem_get_stats(bp);
++
++	/* read stats from hardware */
++	macb_update_stats(bp);
++
++	/* Convert HW stats into netdevice stats */
++	nstat->rx_errors = (hwstat->rx_fcs_errors +
++			    hwstat->rx_align_errors +
++			    hwstat->rx_resource_errors +
++			    hwstat->rx_overruns +
++			    hwstat->rx_oversize_pkts +
++			    hwstat->rx_jabbers +
++			    hwstat->rx_undersize_pkts +
++			    hwstat->sqe_test_errors +
++			    hwstat->rx_length_mismatch);
++	nstat->tx_errors = (hwstat->tx_late_cols +
++			    hwstat->tx_excessive_cols +
++			    hwstat->tx_underruns +
++			    hwstat->tx_carrier_errors);
++	nstat->collisions = (hwstat->tx_single_cols +
++			     hwstat->tx_multiple_cols +
++			     hwstat->tx_excessive_cols);
++	nstat->rx_length_errors = (hwstat->rx_oversize_pkts +
++				   hwstat->rx_jabbers +
++				   hwstat->rx_undersize_pkts +
++				   hwstat->rx_length_mismatch);
++	nstat->rx_over_errors = hwstat->rx_resource_errors +
++				   hwstat->rx_overruns;
++	nstat->rx_crc_errors = hwstat->rx_fcs_errors;
++	nstat->rx_frame_errors = hwstat->rx_align_errors;
++	nstat->rx_fifo_errors = hwstat->rx_overruns;
++	/* XXX: What does "missed" mean? */
++	nstat->tx_aborted_errors = hwstat->tx_excessive_cols;
++	nstat->tx_carrier_errors = hwstat->tx_carrier_errors;
++	nstat->tx_fifo_errors = hwstat->tx_underruns;
++	/* Don't know about heartbeat or window errors... */
++
++	return nstat;
++}
++EXPORT_SYMBOL_GPL(rtmacb_get_stats);
++
++int rtmacb_ioctl(struct rtnet_device *dev, unsigned cmd, void *rq)
++{
++	struct macb *bp = rtnetdev_priv(dev);
++	struct phy_device *phydev = bp->phy_dev;
++
++	if (!rtnetif_running(dev))
++		return -EINVAL;
++
++	if (!phydev)
++		return -ENODEV;
++
++	return phy_mii_ioctl(phydev, rq, cmd);
++}
++EXPORT_SYMBOL_GPL(rtmacb_ioctl);
++
++#if defined(CONFIG_OF)
++static const struct of_device_id macb_dt_ids[] = {
++	{ .compatible = "cdns,at32ap7000-macb" },
++	{ .compatible = "cdns,at91sam9260-macb" },
++	{ .compatible = "cdns,macb" },
++	{ .compatible = "cdns,pc302-gem" },
++	{ .compatible = "cdns,gem" },
++	{ .compatible = "atmel,sama5d3-gem" },
++	{ /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(of, macb_dt_ids);
++#endif
++
++static int __init macb_probe(struct platform_device *pdev)
++{
++	struct macb_platform_data *pdata;
++	struct resource *regs;
++	struct rtnet_device *dev;
++	struct macb *bp;
++	struct phy_device *phydev;
++	u32 config;
++	int err = -ENXIO;
++	struct pinctrl *pinctrl;
++	const char *mac;
++
++	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	if (!regs) {
++		dev_err(&pdev->dev, "no mmio resource defined\n");
++		goto err_out;
++	}
++
++	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
++	if (IS_ERR(pinctrl)) {
++		err = PTR_ERR(pinctrl);
++		if (err == -EPROBE_DEFER)
++			goto err_out;
++
++		dev_warn(&pdev->dev, "No pinctrl provided\n");
++	}
++
++	err = -ENOMEM;
++	dev = rt_alloc_etherdev(sizeof(*bp), RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (!dev)
++		goto err_out;
++
++	rtdev_alloc_name(dev, "rteth%d");
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++	dev->sysbind = &pdev->dev;
++
++	/* TODO: Actually, we have some interesting features... */
++	dev->features |= 0;
++
++	bp = rtnetdev_priv(dev);
++	bp->pdev = pdev;
++	bp->dev = dev;
++
++	rtdm_lock_init(&bp->lock);
++	INIT_WORK(&bp->tx_error_task, macb_tx_error_task);
++
++	bp->pclk = devm_clk_get(&pdev->dev, "pclk");
++	if (IS_ERR(bp->pclk)) {
++		err = PTR_ERR(bp->pclk);
++		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
++		goto err_out_free_dev;
++	}
++
++	bp->hclk = devm_clk_get(&pdev->dev, "hclk");
++	if (IS_ERR(bp->hclk)) {
++		err = PTR_ERR(bp->hclk);
++		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
++		goto err_out_free_dev;
++	}
++
++	bp->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
++
++	err = clk_prepare_enable(bp->pclk);
++	if (err) {
++		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
++		goto err_out_free_dev;
++	}
++
++	err = clk_prepare_enable(bp->hclk);
++	if (err) {
++		dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err);
++		goto err_out_disable_pclk;
++	}
++
++	if (!IS_ERR(bp->tx_clk)) {
++		err = clk_prepare_enable(bp->tx_clk);
++		if (err) {
++			dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n",
++					err);
++			goto err_out_disable_hclk;
++		}
++	}
++
++	bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
++	if (!bp->regs) {
++		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
++		err = -ENOMEM;
++		goto err_out_disable_clocks;
++	}
++
++	dev->irq = platform_get_irq(pdev, 0);
++	rt_stack_connect(dev, &STACK_manager);
++
++	err = rtdm_irq_request(&bp->irq_handle, dev->irq, macb_interrupt, 0,
++			dev->name, dev);
++	if (err) {
++		dev_err(&pdev->dev, "Unable to request IRQ %d (error %d)\n",
++			dev->irq, err);
++		goto err_out_disable_clocks;
++	}
++
++	dev->open = macb_open;
++	dev->stop = macb_close;
++	dev->hard_start_xmit = macb_start_xmit;
++	dev->do_ioctl = rtmacb_ioctl;
++	dev->get_stats = rtmacb_get_stats;
++
++	dev->base_addr = regs->start;
++
++	/* setup appropriated routines according to adapter type */
++	if (macb_is_gem(bp)) {
++		bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers;
++		bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers;
++		bp->macbgem_ops.mog_init_rings = gem_init_rings;
++		bp->macbgem_ops.mog_rx = gem_rx;
++	} else {
++		bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers;
++		bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers;
++		bp->macbgem_ops.mog_init_rings = macb_init_rings;
++		bp->macbgem_ops.mog_rx = macb_rx;
++	}
++
++	/* Set MII management clock divider */
++	config = macb_mdc_clk_div(bp);
++	config |= macb_dbw(bp);
++	macb_writel(bp, NCFGR, config);
++
++	mac = of_get_mac_address(pdev->dev.of_node);
++	if (mac)
++		memcpy(bp->dev->dev_addr, mac, ETH_ALEN);
++	else
++		rtmacb_get_hwaddr(bp);
++
++	err = of_get_phy_mode(pdev->dev.of_node);
++	if (err < 0) {
++		pdata = dev_get_platdata(&pdev->dev);
++		if (pdata && pdata->is_rmii)
++			bp->phy_interface = PHY_INTERFACE_MODE_RMII;
++		else
++			bp->phy_interface = PHY_INTERFACE_MODE_MII;
++	} else {
++		bp->phy_interface = err;
++	}
++
++	if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII)
++		macb_or_gem_writel(bp, USRIO, GEM_BIT(RGMII));
++	else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII)
++#if defined(CONFIG_ARCH_AT91)
++		macb_or_gem_writel(bp, USRIO, (MACB_BIT(RMII) |
++					       MACB_BIT(CLKEN)));
++#else
++		macb_or_gem_writel(bp, USRIO, 0);
++#endif
++	else
++#if defined(CONFIG_ARCH_AT91)
++		macb_or_gem_writel(bp, USRIO, MACB_BIT(CLKEN));
++#else
++		macb_or_gem_writel(bp, USRIO, MACB_BIT(MII));
++#endif
++
++	err = rt_register_rtnetdev(dev);
++	if (err) {
++		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
++		goto err_out_irq_free;
++	}
++
++	err = rtmacb_mii_init(bp);
++	if (err)
++		goto err_out_unregister_netdev;
++
++	platform_set_drvdata(pdev, dev);
++
++	rtnetif_carrier_off(dev);
++
++	rtdev_info(dev, "Cadence %s at 0x%08lx irq %d (%pM)\n",
++		    macb_is_gem(bp) ? "GEM" : "MACB", dev->base_addr,
++		    dev->irq, dev->dev_addr);
++
++	phydev = bp->phy_dev;
++	rtdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
++		    phydev->drv->name, dev_name(&phydev->dev), phydev->irq);
++
++	return 0;
++
++err_out_unregister_netdev:
++	rt_unregister_rtnetdev(dev);
++err_out_irq_free:
++	rtdm_irq_free(&bp->irq_handle);
++err_out_disable_clocks:
++	if (!IS_ERR(bp->tx_clk))
++		clk_disable_unprepare(bp->tx_clk);
++err_out_disable_hclk:
++	clk_disable_unprepare(bp->hclk);
++err_out_disable_pclk:
++	clk_disable_unprepare(bp->pclk);
++err_out_free_dev:
++	rtdev_free(dev);
++err_out:
++	return err;
++}
++
++static int __exit macb_remove(struct platform_device *pdev)
++{
++	struct rtnet_device *dev;
++	struct macb *bp;
++
++	dev = platform_get_drvdata(pdev);
++
++	if (dev) {
++		bp = rtnetdev_priv(dev);
++		if (bp->phy_dev)
++			phy_disconnect(bp->phy_dev);
++		mdiobus_unregister(bp->mii_bus);
++		if (bp->phy_phony_net_device)
++			free_netdev(bp->phy_phony_net_device);
++		kfree(bp->mii_bus->irq);
++		rt_rtdev_disconnect(dev);
++		rtdm_irq_free(&bp->irq_handle);
++		mdiobus_free(bp->mii_bus);
++		rt_unregister_rtnetdev(dev);
++		if (!IS_ERR(bp->tx_clk))
++			clk_disable_unprepare(bp->tx_clk);
++		clk_disable_unprepare(bp->hclk);
++		clk_disable_unprepare(bp->pclk);
++		rtdev_free(dev);
++	}
++
++	return 0;
++}
++
++static struct platform_driver macb_driver = {
++	.remove		= __exit_p(macb_remove),
++	.driver		= {
++		.name		= "macb",
++		.owner	= THIS_MODULE,
++		.of_match_table	= of_match_ptr(macb_dt_ids),
++	},
++};
++
++static bool found;
++static int __init macb_driver_init(void)
++{
++	found = platform_driver_probe(&macb_driver, macb_probe) == 0;
++	return 0;
++}
++module_init(macb_driver_init);
++
++static void __exit macb_driver_exit(void)
++{
++	if (found)
++		platform_driver_unregister(&macb_driver);
++}
++module_exit(macb_driver_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("Cadence MACB/GEM Ethernet driver");
++MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
++MODULE_ALIAS("platform:macb");
+--- linux/drivers/xenomai/net/drivers/eepro100.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/eepro100.c	2021-04-07 16:01:27.349633992 +0800
+@@ -0,0 +1,1845 @@
++/* rtnet/drivers/eepro100-rt.c: An Intel i82557-559 Real-Time-Ethernet driver for Linux. */
++/*
++	RTnet porting 2002 by Jan Kiszka <Jan.Kiszka@web.de>
++	Originally written 1996-1999 by Donald Becker.
++
++	The driver also contains updates by different kernel developers
++	(see incomplete list below).
++	Current maintainer is Andrey V. Savochkin <saw@saw.sw.com.sg>.
++	Please use this email address and linux-kernel mailing list for bug reports.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	This driver is for the Intel EtherExpress Pro100 (Speedo3) design.
++	It should work with all i82557/558/559 boards.
++
++	Version history:
++	1998 Apr - 2000 Feb  Andrey V. Savochkin <saw@saw.sw.com.sg>
++		Serious fixes for multicast filter list setting, TX timeout routine;
++		RX ring refilling logic;  other stuff
++	2000 Feb  Jeff Garzik <jgarzik@mandrakesoft.com>
++		Convert to new PCI driver interface
++	2000 Mar 24  Dragan Stancevic <visitor@valinux.com>
++		Disabled FC and ER, to avoid lockups when when we get FCP interrupts.
++	2000 Jul 17 Goutham Rao <goutham.rao@intel.com>
++		PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary
++
++	2002 May 16 Jan Kiszka <Jan.Kiszka@web.de>
++		Ported to RTnet (RTAI version)
++*/
++
++static const char *version =
++"eepro100-rt.c:1.36-RTnet-0.8 2002-2006 Jan Kiszka <Jan.Kiszka@web.de>\n"
++"eepro100-rt.c: based on eepro100.c 1.36 by D. Becker, A. V. Savochkin and others\n";
++
++/* A few user-configurable values that apply to all boards.
++   First set is undocumented and spelled per Intel recommendations. */
++
++static int txfifo = 8;		/* Tx FIFO threshold in 4 byte units, 0-15 */
++static int rxfifo = 8;		/* Rx FIFO threshold, default 32 bytes. */
++/* Tx/Rx DMA burst length, 0-127, 0 == no preemption, tx==128 -> disabled. */
++static int txdmacount = 128;
++static int rxdmacount /* = 0 */;
++
++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
++static int max_interrupt_work = 20;
++
++/* Maximum number of multicast addresses to filter (vs. rx-all-multicast) */
++static int multicast_filter_limit = 64;
++
++/* 'options' is used to pass a transceiver override or full-duplex flag
++   e.g. "options=16" for FD, "options=32" for 100mbps-only. */
++static int full_duplex[] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int options[] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int local_debug = -1;	/* The debug level */
++
++/* A few values that may be tweaked. */
++/* The ring sizes should be a power of two for efficiency. */
++#define TX_RING_SIZE	32
++#define RX_RING_SIZE	8 /* RX_RING_SIZE*2 rtskbs will be preallocated */
++/* How much slots multicast filter setup may take.
++   Do not descrease without changing set_rx_mode() implementaion. */
++#define TX_MULTICAST_SIZE   2
++#define TX_MULTICAST_RESERV (TX_MULTICAST_SIZE*2)
++/* Actual number of TX packets queued, must be
++   <= TX_RING_SIZE-TX_MULTICAST_RESERV. */
++#define TX_QUEUE_LIMIT  (TX_RING_SIZE-TX_MULTICAST_RESERV)
++/* Hysteresis marking queue as no longer full. */
++#define TX_QUEUE_UNFULL (TX_QUEUE_LIMIT-4)
++
++/* Operational parameters that usually are not changed. */
++
++/* Time in jiffies before concluding the transmitter is hung. */
++#define TX_TIMEOUT		(2*HZ)
++/* Size of an pre-allocated Rx buffer: <Ethernet MTU> + slack.*/
++#define PKT_BUF_SZ		VLAN_ETH_FRAME_LEN
++
++#if !defined(__OPTIMIZE__)  ||  !defined(__KERNEL__)
++#warning  You must compile this file with the correct options!
++#warning  See the last lines of the source file.
++#error You must compile this driver with "-O".
++#endif
++
++#include <linux/version.h>
++#include <linux/module.h>
++
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/init.h>
++#include <linux/mii.h>
++#include <linux/delay.h>
++#include <linux/uaccess.h>
++
++#include <asm/bitops.h>
++#include <asm/io.h>
++
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ethtool.h>
++#include <linux/delay.h>
++
++// *** RTnet ***
++#include <linux/if_vlan.h>
++#include <rtnet_port.h>
++
++#define MAX_UNITS               8
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++// *** RTnet ***
++
++MODULE_AUTHOR("Maintainer: Jan Kiszka <Jan.Kiszka@web.de>");
++MODULE_DESCRIPTION("Intel i82557/i82558/i82559 PCI EtherExpressPro driver");
++MODULE_LICENSE("GPL");
++module_param_named(debug, local_debug, int, 0444);
++module_param_array(options, int, NULL, 0444);
++module_param_array(full_duplex, int, NULL, 0444);
++module_param(txfifo, int, 0444);
++module_param(rxfifo, int, 0444);
++module_param(txdmacount, int, 0444);
++module_param(rxdmacount, int, 0444);
++module_param(max_interrupt_work, int, 0444);
++module_param(multicast_filter_limit, int, 0444);
++MODULE_PARM_DESC(debug, "eepro100 debug level (0-6)");
++MODULE_PARM_DESC(options, "eepro100: Bits 0-3: tranceiver type, bit 4: full duplex, bit 5: 100Mbps");
++MODULE_PARM_DESC(full_duplex, "eepro100 full duplex setting(s) (1)");
++MODULE_PARM_DESC(txfifo, "eepro100 Tx FIFO threshold in 4 byte units, (0-15)");
++MODULE_PARM_DESC(rxfifo, "eepro100 Rx FIFO threshold in 4 byte units, (0-15)");
++MODULE_PARM_DESC(txdmaccount, "eepro100 Tx DMA burst length; 128 - disable (0-128)");
++MODULE_PARM_DESC(rxdmaccount, "eepro100 Rx DMA burst length; 128 - disable (0-128)");
++MODULE_PARM_DESC(max_interrupt_work, "eepro100 maximum events handled per interrupt");
++MODULE_PARM_DESC(multicast_filter_limit, "eepro100 maximum number of filtered multicast addresses");
++
++#define RUN_AT(x) (jiffies + (x))
++
++// *** RTnet - no power management ***
++#undef pci_set_power_state
++#define pci_set_power_state null_set_power_state
++static inline int null_set_power_state(struct pci_dev *dev, int state)
++{
++	return 0;
++}
++// *** RTnet ***
++
++#define netdevice_start(dev)
++#define netdevice_stop(dev)
++#define netif_set_tx_timeout(dev, tf, tm) \
++								do { \
++									(dev)->tx_timeout = (tf); \
++									(dev)->watchdog_timeo = (tm); \
++								} while(0)
++
++
++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG
++static int speedo_debug = 1;
++#else
++#define speedo_debug 0
++#endif
++
++/*
++				Theory of Operation
++
++I. Board Compatibility
++
++This device driver is designed for the Intel i82557 "Speedo3" chip, Intel's
++single-chip fast Ethernet controller for PCI, as used on the Intel
++EtherExpress Pro 100 adapter.
++
++II. Board-specific settings
++
++PCI bus devices are configured by the system at boot time, so no jumpers
++need to be set on the board.  The system BIOS should be set to assign the
++PCI INTA signal to an otherwise unused system IRQ line.  While it's
++possible to share PCI interrupt lines, it negatively impacts performance and
++only recent kernels support it.
++
++III. Driver operation
++
++IIIA. General
++The Speedo3 is very similar to other Intel network chips, that is to say
++"apparently designed on a different planet".  This chips retains the complex
++Rx and Tx descriptors and multiple buffers pointers as previous chips, but
++also has simplified Tx and Rx buffer modes.  This driver uses the "flexible"
++Tx mode, but in a simplified lower-overhead manner: it associates only a
++single buffer descriptor with each frame descriptor.
++
++Despite the extra space overhead in each receive skbuff, the driver must use
++the simplified Rx buffer mode to assure that only a single data buffer is
++associated with each RxFD. The driver implements this by reserving space
++for the Rx descriptor at the head of each Rx skbuff.
++
++The Speedo-3 has receive and command unit base addresses that are added to
++almost all descriptor pointers.  The driver sets these to zero, so that all
++pointer fields are absolute addresses.
++
++The System Control Block (SCB) of some previous Intel chips exists on the
++chip in both PCI I/O and memory space.  This driver uses the I/O space
++registers, but might switch to memory mapped mode to better support non-x86
++processors.
++
++IIIB. Transmit structure
++
++The driver must use the complex Tx command+descriptor mode in order to
++have a indirect pointer to the skbuff data section.  Each Tx command block
++(TxCB) is associated with two immediately appended Tx Buffer Descriptor
++(TxBD).  A fixed ring of these TxCB+TxBD pairs are kept as part of the
++speedo_private data structure for each adapter instance.
++
++The newer i82558 explicitly supports this structure, and can read the two
++TxBDs in the same PCI burst as the TxCB.
++
++This ring structure is used for all normal transmit packets, but the
++transmit packet descriptors aren't long enough for most non-Tx commands such
++as CmdConfigure.  This is complicated by the possibility that the chip has
++already loaded the link address in the previous descriptor.  So for these
++commands we convert the next free descriptor on the ring to a NoOp, and point
++that descriptor's link to the complex command.
++
++An additional complexity of these non-transmit commands are that they may be
++added asynchronous to the normal transmit queue, so we disable interrupts
++whenever the Tx descriptor ring is manipulated.
++
++A notable aspect of these special configure commands is that they do
++work with the normal Tx ring entry scavenge method.  The Tx ring scavenge
++is done at interrupt time using the 'dirty_tx' index, and checking for the
++command-complete bit.  While the setup frames may have the NoOp command on the
++Tx ring marked as complete, but not have completed the setup command, this
++is not a problem.  The tx_ring entry can be still safely reused, as the
++tx_skbuff[] entry is always empty for config_cmd and mc_setup frames.
++
++Commands may have bits set e.g. CmdSuspend in the command word to either
++suspend or stop the transmit/command unit.  This driver always flags the last
++command with CmdSuspend, erases the CmdSuspend in the previous command, and
++then issues a CU_RESUME.
++Note: Watch out for the potential race condition here: imagine
++	erasing the previous suspend
++		the chip processes the previous command
++		the chip processes the final command, and suspends
++	doing the CU_RESUME
++		the chip processes the next-yet-valid post-final-command.
++So blindly sending a CU_RESUME is only safe if we do it immediately after
++after erasing the previous CmdSuspend, without the possibility of an
++intervening delay.  Thus the resume command is always within the
++interrupts-disabled region.  This is a timing dependence, but handling this
++condition in a timing-independent way would considerably complicate the code.
++
++Note: In previous generation Intel chips, restarting the command unit was a
++notoriously slow process.  This is presumably no longer true.
++
++IIIC. Receive structure
++
++Because of the bus-master support on the Speedo3 this driver uses the new
++SKBUFF_RX_COPYBREAK scheme, rather than a fixed intermediate receive buffer.
++This scheme allocates full-sized skbuffs as receive buffers.  The value
++SKBUFF_RX_COPYBREAK is used as the copying breakpoint: it is chosen to
++trade-off the memory wasted by passing the full-sized skbuff to the queue
++layer for all frames vs. the copying cost of copying a frame to a
++correctly-sized skbuff.
++
++For small frames the copying cost is negligible (esp. considering that we
++are pre-loading the cache with immediately useful header information), so we
++allocate a new, minimally-sized skbuff.  For large frames the copying cost
++is non-trivial, and the larger copy might flush the cache of useful data, so
++we pass up the skbuff the packet was received into.
++
++IV. Notes
++
++Thanks to Steve Williams of Intel for arranging the non-disclosure agreement
++that stated that I could disclose the information.  But I still resent
++having to sign an Intel NDA when I'm helping Intel sell their own product!
++
++*/
++
++static int speedo_found1(struct pci_dev *pdev, long ioaddr, int fnd_cnt, int acpi_idle_state);
++
++enum pci_flags_bit {
++	PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
++	PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
++};
++
++static inline unsigned int io_inw(unsigned long port)
++{
++	return inw(port);
++}
++static inline void io_outw(unsigned int val, unsigned long port)
++{
++	outw(val, port);
++}
++
++#ifndef USE_IO
++/* Currently alpha headers define in/out macros.
++   Undefine them.  2000/03/30  SAW */
++#undef inb
++#undef inw
++#undef inl
++#undef outb
++#undef outw
++#undef outl
++#define inb(addr) readb((void *)(addr))
++#define inw(addr) readw((void *)(addr))
++#define inl(addr) readl((void *)(addr))
++#define outb(val, addr) writeb(val, (void *)(addr))
++#define outw(val, addr) writew(val, (void *)(addr))
++#define outl(val, addr) writel(val, (void *)(addr))
++#endif
++
++/* How to wait for the command unit to accept a command.
++   Typically this takes 0 ticks. */
++static inline void wait_for_cmd_done(long cmd_ioaddr)
++{
++	int wait = 1000;
++	do  udelay(1) ;
++	while(inb(cmd_ioaddr) && --wait >= 0);
++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG
++	if (wait < 0)
++		printk(KERN_ALERT "eepro100: wait_for_cmd_done timeout!\n");
++#endif
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDSTATS
++static inline int rt_wait_for_cmd_done(long cmd_ioaddr, const char *cmd)
++{
++    int wait = CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT;
++    rtmd_time_t t0, t1;
++
++    t0 = rtdm_clock_read();
++    while (inb(cmd_ioaddr) != 0) {
++	if (wait-- == 0) {
++	    rtdm_printk(KERN_ALERT "eepro100: rt_wait_for_cmd_done(%s) "
++			"timeout!\n", cmd);
++	    return 1;
++	}
++	rtdm_task_busy_sleep(1000);
++    }
++    return 0;
++}
++#else
++static inline int rt_wait_for_cmd_done(long cmd_ioaddr, const char *cmd)
++{
++    int wait = CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT;
++
++    while (inb(cmd_ioaddr) != 0) {
++	if (wait-- == 0)
++	    return 1;
++	rtdm_task_busy_sleep(1000);
++    }
++    return 0;
++}
++#endif
++
++/* Offsets to the various registers.
++   All accesses need not be longword aligned. */
++enum speedo_offsets {
++	SCBStatus = 0, SCBCmd = 2,	/* Rx/Command Unit command and status. */
++	SCBPointer = 4,				/* General purpose pointer. */
++	SCBPort = 8,				/* Misc. commands and operands.  */
++	SCBflash = 12, SCBeeprom = 14, /* EEPROM and flash memory control. */
++	SCBCtrlMDI = 16,			/* MDI interface control. */
++	SCBEarlyRx = 20,			/* Early receive byte count. */
++};
++/* Commands that can be put in a command list entry. */
++enum commands {
++	CmdNOp = 0, CmdIASetup = 0x10000, CmdConfigure = 0x20000,
++	CmdMulticastList = 0x30000, CmdTx = 0x40000, CmdTDR = 0x50000,
++	CmdDump = 0x60000, CmdDiagnose = 0x70000,
++	CmdSuspend = 0x40000000,	/* Suspend after completion. */
++	CmdIntr = 0x20000000,		/* Interrupt after completion. */
++	CmdTxFlex = 0x00080000,		/* Use "Flexible mode" for CmdTx command. */
++};
++/* Clear CmdSuspend (1<<30) avoiding interference with the card access to the
++   status bits.  Previous driver versions used separate 16 bit fields for
++   commands and statuses.  --SAW
++ */
++#if defined(__alpha__)
++# define clear_suspend(cmd)  clear_bit(30, &(cmd)->cmd_status);
++#else
++# if defined(__LITTLE_ENDIAN)
++#  define clear_suspend(cmd)  ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x4000
++# elif defined(__BIG_ENDIAN)
++#  define clear_suspend(cmd)  ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x0040
++# else
++#  error Unsupported byteorder
++# endif
++#endif
++
++enum SCBCmdBits {
++	SCBMaskCmdDone=0x8000, SCBMaskRxDone=0x4000, SCBMaskCmdIdle=0x2000,
++	SCBMaskRxSuspend=0x1000, SCBMaskEarlyRx=0x0800, SCBMaskFlowCtl=0x0400,
++	SCBTriggerIntr=0x0200, SCBMaskAll=0x0100,
++	/* The rest are Rx and Tx commands. */
++	CUStart=0x0010, CUResume=0x0020, CUStatsAddr=0x0040, CUShowStats=0x0050,
++	CUCmdBase=0x0060,	/* CU Base address (set to zero) . */
++	CUDumpStats=0x0070, /* Dump then reset stats counters. */
++	RxStart=0x0001, RxResume=0x0002, RxAbort=0x0004, RxAddrLoad=0x0006,
++	RxResumeNoResources=0x0007,
++};
++
++enum SCBPort_cmds {
++	PortReset=0, PortSelfTest=1, PortPartialReset=2, PortDump=3,
++};
++
++/* The Speedo3 Rx and Tx frame/buffer descriptors. */
++struct descriptor {                         /* A generic descriptor. */
++	s32 cmd_status;				/* All command and status fields. */
++	u32 link;                                   /* struct descriptor *  */
++	unsigned char params[0];
++};
++
++/* The Speedo3 Rx and Tx buffer descriptors. */
++struct RxFD {					/* Receive frame descriptor. */
++	s32 status;
++	u32 link;					/* struct RxFD * */
++	u32 rx_buf_addr;			/* void * */
++	u32 count;
++};
++
++/* Selected elements of the Tx/RxFD.status word. */
++enum RxFD_bits {
++	RxComplete=0x8000, RxOK=0x2000,
++	RxErrCRC=0x0800, RxErrAlign=0x0400, RxErrTooBig=0x0200, RxErrSymbol=0x0010,
++	RxEth2Type=0x0020, RxNoMatch=0x0004, RxNoIAMatch=0x0002,
++	TxUnderrun=0x1000,  StatusComplete=0x8000,
++};
++
++#define CONFIG_DATA_SIZE 22
++struct TxFD {					/* Transmit frame descriptor set. */
++	s32 status;
++	u32 link;					/* void * */
++	u32 tx_desc_addr;			/* Always points to the tx_buf_addr element. */
++	s32 count;					/* # of TBD (=1), Tx start thresh., etc. */
++	/* This constitutes two "TBD" entries -- we only use one. */
++#define TX_DESCR_BUF_OFFSET 16
++	u32 tx_buf_addr0;			/* void *, frame to be transmitted.  */
++	s32 tx_buf_size0;			/* Length of Tx frame. */
++	u32 tx_buf_addr1;			/* void *, frame to be transmitted.  */
++	s32 tx_buf_size1;			/* Length of Tx frame. */
++	/* the structure must have space for at least CONFIG_DATA_SIZE starting
++	 * from tx_desc_addr field */
++};
++
++/* Multicast filter setting block.  --SAW */
++struct speedo_mc_block {
++	struct speedo_mc_block *next;
++	unsigned int tx;
++	dma_addr_t frame_dma;
++	unsigned int len;
++	struct descriptor frame __attribute__ ((__aligned__(16)));
++};
++
++/* Elements of the dump_statistics block. This block must be lword aligned. */
++struct speedo_stats {
++	u32 tx_good_frames;
++	u32 tx_coll16_errs;
++	u32 tx_late_colls;
++	u32 tx_underruns;
++	u32 tx_lost_carrier;
++	u32 tx_deferred;
++	u32 tx_one_colls;
++	u32 tx_multi_colls;
++	u32 tx_total_colls;
++	u32 rx_good_frames;
++	u32 rx_crc_errs;
++	u32 rx_align_errs;
++	u32 rx_resource_errs;
++	u32 rx_overrun_errs;
++	u32 rx_colls_errs;
++	u32 rx_runt_errs;
++	u32 done_marker;
++};
++
++enum Rx_ring_state_bits {
++	RrNoMem=1, RrPostponed=2, RrNoResources=4, RrOOMReported=8,
++};
++
++/* Do not change the position (alignment) of the first few elements!
++   The later elements are grouped for cache locality.
++
++   Unfortunately, all the positions have been shifted since there.
++   A new re-alignment is required.  2000/03/06  SAW */
++struct speedo_private {
++	struct TxFD *tx_ring;			/* Commands (usually CmdTxPacket). */
++	struct RxFD *rx_ringp[RX_RING_SIZE];	/* Rx descriptor, used as ring. */
++
++	// *** RTnet ***
++	/* The addresses of a Tx/Rx-in-place packets/buffers. */
++	struct rtskb *tx_skbuff[TX_RING_SIZE];
++	struct rtskb *rx_skbuff[RX_RING_SIZE];
++	// *** RTnet ***
++
++	/* Mapped addresses of the rings. */
++	dma_addr_t tx_ring_dma;
++#define TX_RING_ELEM_DMA(sp, n) ((sp)->tx_ring_dma + (n)*sizeof(struct TxFD))
++	dma_addr_t rx_ring_dma[RX_RING_SIZE];
++	struct descriptor *last_cmd;		/* Last command sent. */
++	unsigned int cur_tx, dirty_tx;		/* The ring entries to be free()ed. */
++	rtdm_lock_t lock;					/* Group with Tx control cache line. */
++	u32 tx_threshold;					/* The value for txdesc.count. */
++	struct RxFD *last_rxf;				/* Last filled RX buffer. */
++	dma_addr_t last_rxf_dma;
++	unsigned int cur_rx, dirty_rx;		/* The next free ring entry */
++	long last_rx_time;			/* Last Rx, in jiffies, to handle Rx hang. */
++	struct net_device_stats stats;
++	struct speedo_stats *lstats;
++	dma_addr_t lstats_dma;
++	int chip_id;
++	struct pci_dev *pdev;
++	struct speedo_mc_block *mc_setup_head;/* Multicast setup frame list head. */
++	struct speedo_mc_block *mc_setup_tail;/* Multicast setup frame list tail. */
++	long in_interrupt;					/* Word-aligned rtdev->interrupt */
++	unsigned char acpi_pwr;
++	signed char rx_mode;					/* Current PROMISC/ALLMULTI setting. */
++	unsigned int tx_full:1;				/* The Tx queue is full. */
++	unsigned int full_duplex:1;			/* Full-duplex operation requested. */
++	unsigned int flow_ctrl:1;			/* Use 802.3x flow control. */
++	unsigned int rx_bug:1;				/* Work around receiver hang errata. */
++	unsigned char default_port:8;		/* Last rtdev->if_port value. */
++	unsigned char rx_ring_state;		/* RX ring status flags. */
++	unsigned short phy[2];				/* PHY media interfaces available. */
++	unsigned short advertising;			/* Current PHY advertised caps. */
++	unsigned short partner;				/* Link partner caps. */
++	rtdm_irq_t irq_handle;
++};
++
++/* The parameters for a CmdConfigure operation.
++   There are so many options that it would be difficult to document each bit.
++   We mostly use the default or recommended settings. */
++static const char i82558_config_cmd[CONFIG_DATA_SIZE] = {
++	22, 0x08, 0, 1,  0, 0, 0x22, 0x03,  1, /* 1=Use MII  0=Use AUI */
++	0, 0x2E, 0,  0x60, 0x08, 0x88,
++	0x68, 0, 0x40, 0xf2, 0x84,		/* Disable FC */
++	0x31, 0x05, };
++
++/* PHY media interface chips. */
++enum phy_chips { NonSuchPhy=0, I82553AB, I82553C, I82503, DP83840, S80C240,
++					 S80C24, I82555, DP83840A=10, };
++#define EE_READ_CMD		(6)
++
++static int eepro100_init_one(struct pci_dev *pdev,
++		const struct pci_device_id *ent);
++static void eepro100_remove_one (struct pci_dev *pdev);
++
++static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len);
++static int mdio_read(long ioaddr, int phy_id, int location);
++static int speedo_open(struct rtnet_device *rtdev);
++static void speedo_resume(struct rtnet_device *rtdev);
++static void speedo_init_rx_ring(struct rtnet_device *rtdev);
++static int speedo_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static void speedo_refill_rx_buffers(struct rtnet_device *rtdev, int force);
++static int speedo_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp);
++static void speedo_tx_buffer_gc(struct rtnet_device *rtdev);
++static int speedo_interrupt(rtdm_irq_t *irq_handle);
++static int speedo_close(struct rtnet_device *rtdev);
++static void set_rx_mode(struct rtnet_device *rtdev);
++static void speedo_show_state(struct rtnet_device *rtdev);
++static struct net_device_stats *speedo_get_stats(struct rtnet_device *rtdev);
++
++
++static inline void speedo_write_flush(long ioaddr)
++{
++	/* Flush previous PCI writes through intermediate bridges
++	 * by doing a benign read */
++	(void)readb((void *)(ioaddr + SCBStatus));
++}
++
++static int eepro100_init_one (struct pci_dev *pdev,
++		const struct pci_device_id *ent)
++{
++	unsigned long ioaddr;
++	int irq;
++	int acpi_idle_state = 0, pm;
++	static int cards_found = -1;
++
++	static int did_version /* = 0 */;		/* Already printed version info. */
++	if (speedo_debug > 0  &&  did_version++ == 0)
++		printk(version);
++
++	// *** RTnet ***
++	cards_found++;
++	if (cards[cards_found] == 0)
++		goto err_out_none;
++	// *** RTnet ***
++
++	if (!request_region(pci_resource_start(pdev, 1),
++			pci_resource_len(pdev, 1), "eepro100")) {
++		printk (KERN_ERR "eepro100: cannot reserve I/O ports\n");
++		goto err_out_none;
++	}
++	if (!request_mem_region(pci_resource_start(pdev, 0),
++			pci_resource_len(pdev, 0), "eepro100")) {
++		printk (KERN_ERR "eepro100: cannot reserve MMIO region\n");
++		goto err_out_free_pio_region;
++	}
++
++	irq = pdev->irq;
++#ifdef USE_IO
++	ioaddr = pci_resource_start(pdev, 1);
++	if (speedo_debug > 2)
++		printk("Found Intel i82557 PCI Speedo at I/O %#lx, IRQ %d.\n",
++			   ioaddr, irq);
++#else
++	ioaddr = (unsigned long)ioremap(pci_resource_start(pdev, 0),
++									pci_resource_len(pdev, 0));
++	if (!ioaddr) {
++		printk(KERN_ERR "eepro100: cannot remap MMIO region %llx @ %llx\n",
++		       (unsigned long long)pci_resource_len(pdev, 0),
++		       (unsigned long long)pci_resource_start(pdev, 0));
++		goto err_out_free_mmio_region;
++	}
++	if (speedo_debug > 2)
++		printk("Found Intel i82557 PCI Speedo, MMIO at %#llx, IRQ %d.\n",
++		       (unsigned long long)pci_resource_start(pdev, 0), irq);
++#endif
++
++	/* save power state b4 pci_enable_device overwrites it */
++	pm = pci_find_capability(pdev, PCI_CAP_ID_PM);
++	if (pm) {
++		u16 pwr_command;
++		pci_read_config_word(pdev, pm + PCI_PM_CTRL, &pwr_command);
++		acpi_idle_state = pwr_command & PCI_PM_CTRL_STATE_MASK;
++	}
++
++	if (pci_enable_device(pdev))
++		goto err_out_free_mmio_region;
++
++	pci_set_master(pdev);
++
++	if (speedo_found1(pdev, ioaddr, cards_found, acpi_idle_state) != 0)
++		goto err_out_iounmap;
++
++	return 0;
++
++err_out_iounmap: ;
++#ifndef USE_IO
++	iounmap ((void *)ioaddr);
++#endif
++err_out_free_mmio_region:
++	release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
++err_out_free_pio_region:
++	release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1));
++err_out_none:
++	return -ENODEV;
++}
++
++static int speedo_found1(struct pci_dev *pdev,
++		long ioaddr, int card_idx, int acpi_idle_state)
++{
++	// *** RTnet ***
++	struct rtnet_device *rtdev = NULL;
++	// *** RTnet ***
++
++	struct speedo_private *sp;
++	const char *product;
++	int i, option;
++	u16 eeprom[0x100];
++	int size;
++	void *tx_ring_space;
++	dma_addr_t tx_ring_dma;
++
++	size = TX_RING_SIZE * sizeof(struct TxFD) + sizeof(struct speedo_stats);
++	tx_ring_space = pci_alloc_consistent(pdev, size, &tx_ring_dma);
++	if (tx_ring_space == NULL)
++		return -1;
++
++	// *** RTnet ***
++	rtdev = rt_alloc_etherdev(sizeof(struct speedo_private),
++				RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (rtdev == NULL) {
++		printk(KERN_ERR "eepro100: Could not allocate ethernet device.\n");
++		pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma);
++		return -1;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++	memset(rtdev->priv, 0, sizeof(struct speedo_private));
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++	rtdev->sysbind = &pdev->dev;
++	// *** RTnet ***
++
++	if (rtdev->mem_start > 0)
++		option = rtdev->mem_start;
++	else if (card_idx >= 0  &&  options[card_idx] >= 0)
++		option = options[card_idx];
++	else
++		option = 0;
++
++	/* Read the station address EEPROM before doing the reset.
++	   Nominally his should even be done before accepting the device, but
++	   then we wouldn't have a device name with which to report the error.
++	   The size test is for 6 bit vs. 8 bit address serial EEPROMs.
++	*/
++	{
++		unsigned long iobase;
++		int read_cmd, ee_size;
++		u16 sum;
++		int j;
++
++		/* Use IO only to avoid postponed writes and satisfy EEPROM timing
++		   requirements. */
++		iobase = pci_resource_start(pdev, 1);
++		if ((do_eeprom_cmd(iobase, EE_READ_CMD << 24, 27) & 0xffe0000)
++			== 0xffe0000) {
++			ee_size = 0x100;
++			read_cmd = EE_READ_CMD << 24;
++		} else {
++			ee_size = 0x40;
++			read_cmd = EE_READ_CMD << 22;
++		}
++
++		for (j = 0, i = 0, sum = 0; i < ee_size; i++) {
++			u16 value = do_eeprom_cmd(iobase, read_cmd | (i << 16), 27);
++			eeprom[i] = value;
++			sum += value;
++			if (i < 3) {
++				rtdev->dev_addr[j++] = value;
++				rtdev->dev_addr[j++] = value >> 8;
++			}
++		}
++		if (sum != 0xBABA)
++			printk(KERN_WARNING "%s: Invalid EEPROM checksum %#4.4x, "
++				   "check settings before activating this device!\n",
++				   rtdev->name, sum);
++		/* Don't  unregister_netdev(dev);  as the EEPro may actually be
++		   usable, especially if the MAC address is set later.
++		   On the other hand, it may be unusable if MDI data is corrupted. */
++	}
++
++	/* Reset the chip: stop Tx and Rx processes and clear counters.
++	   This takes less than 10usec and will easily finish before the next
++	   action. */
++	outl(PortReset, ioaddr + SCBPort);
++	inl(ioaddr + SCBPort);
++	udelay(10);
++
++	if (eeprom[3] & 0x0100)
++		product = "OEM i82557/i82558 10/100 Ethernet";
++	else
++		product = pci_name(pdev);
++
++	printk(KERN_INFO "%s: %s, ", rtdev->name, product);
++
++	for (i = 0; i < 5; i++)
++		printk("%2.2X:", rtdev->dev_addr[i]);
++	printk("%2.2X, ", rtdev->dev_addr[i]);
++#ifdef USE_IO
++	printk("I/O at %#3lx, ", ioaddr);
++#endif
++	printk("IRQ %d.\n", pdev->irq);
++
++	outl(PortReset, ioaddr + SCBPort);
++	inl(ioaddr + SCBPort);
++	udelay(10);
++
++	/* Return the chip to its original power state. */
++	pci_set_power_state(pdev, acpi_idle_state);
++
++	rtdev->base_addr = ioaddr;
++	rtdev->irq = pdev->irq;
++
++	sp = rtdev->priv;
++	sp->pdev = pdev;
++	sp->acpi_pwr = acpi_idle_state;
++	sp->tx_ring = tx_ring_space;
++	sp->tx_ring_dma = tx_ring_dma;
++	sp->lstats = (struct speedo_stats *)(sp->tx_ring + TX_RING_SIZE);
++	sp->lstats_dma = TX_RING_ELEM_DMA(sp, TX_RING_SIZE);
++
++	sp->full_duplex = option >= 0 && (option & 0x10) ? 1 : 0;
++	if (card_idx >= 0) {
++		if (full_duplex[card_idx] >= 0)
++			sp->full_duplex = full_duplex[card_idx];
++	}
++	sp->default_port = option >= 0 ? (option & 0x0f) : 0;
++
++	sp->phy[0] = eeprom[6];
++	sp->phy[1] = eeprom[7];
++	sp->rx_bug = (eeprom[3] & 0x03) == 3 ? 0 : 1;
++	if (((pdev->device > 0x1030 && (pdev->device < 0x1039)))
++	    || (pdev->device == 0x2449)) {
++		sp->chip_id = 1;
++	}
++
++	if (sp->rx_bug)
++		printk(KERN_ERR "  *** Receiver lock-up bug detected ***\n"
++		       KERN_ERR "  Your device may not work reliably!\n");
++
++	// *** RTnet ***
++	/* The Speedo-specific entries in the device structure. */
++	rtdev->open = &speedo_open;
++	rtdev->hard_start_xmit = &speedo_start_xmit;
++	rtdev->stop = &speedo_close;
++	rtdev->hard_header = &rt_eth_header;
++	rtdev->get_stats = &speedo_get_stats;
++	//rtdev->do_ioctl = NULL;
++
++	if ( (i=rt_register_rtnetdev(rtdev)) )
++	{
++		pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma);
++		rtdev_free(rtdev);
++		return i;
++	}
++
++	pci_set_drvdata (pdev, rtdev);
++	// *** RTnet ***
++
++	return 0;
++}
++
++/* Serial EEPROM section.
++   A "bit" grungy, but we work our way through bit-by-bit :->. */
++/*  EEPROM_Ctrl bits. */
++#define EE_SHIFT_CLK	0x01	/* EEPROM shift clock. */
++#define EE_CS			0x02	/* EEPROM chip select. */
++#define EE_DATA_WRITE	0x04	/* EEPROM chip data in. */
++#define EE_DATA_READ	0x08	/* EEPROM chip data out. */
++#define EE_ENB			(0x4800 | EE_CS)
++#define EE_WRITE_0		0x4802
++#define EE_WRITE_1		0x4806
++#define EE_OFFSET		SCBeeprom
++
++/* The fixes for the code were kindly provided by Dragan Stancevic
++   <visitor@valinux.com> to strictly follow Intel specifications of EEPROM
++   access timing.
++   The publicly available sheet 64486302 (sec. 3.1) specifies 1us access
++   interval for serial EEPROM.  However, it looks like that there is an
++   additional requirement dictating larger udelay's in the code below.
++   2000/05/24  SAW */
++static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len)
++{
++	unsigned retval = 0;
++	long ee_addr = ioaddr + SCBeeprom;
++
++	io_outw(EE_ENB, ee_addr); udelay(2);
++	io_outw(EE_ENB | EE_SHIFT_CLK, ee_addr); udelay(2);
++
++	/* Shift the command bits out. */
++	do {
++		short dataval = (cmd & (1 << cmd_len)) ? EE_WRITE_1 : EE_WRITE_0;
++		io_outw(dataval, ee_addr); udelay(2);
++		io_outw(dataval | EE_SHIFT_CLK, ee_addr); udelay(2);
++		retval = (retval << 1) | ((io_inw(ee_addr) & EE_DATA_READ) ? 1 : 0);
++	} while (--cmd_len >= 0);
++	io_outw(EE_ENB, ee_addr); udelay(2);
++
++	/* Terminate the EEPROM access. */
++	io_outw(EE_ENB & ~EE_CS, ee_addr);
++	return retval;
++}
++
++static int mdio_read(long ioaddr, int phy_id, int location)
++{
++	int val, boguscnt = 64*10;		/* <64 usec. to complete, typ 27 ticks */
++	outl(0x08000000 | (location<<16) | (phy_id<<21), ioaddr + SCBCtrlMDI);
++	do {
++		val = inl(ioaddr + SCBCtrlMDI);
++		if (--boguscnt < 0) {
++			printk(KERN_ERR " mdio_read() timed out with val = %8.8x.\n", val);
++			break;
++		}
++	} while (! (val & 0x10000000));
++	return val & 0xffff;
++}
++
++
++static int
++speedo_open(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int retval;
++
++	if (speedo_debug > 1)
++		printk(KERN_DEBUG "%s: speedo_open() irq %d.\n", rtdev->name, rtdev->irq);
++
++	pci_set_power_state(sp->pdev, 0);
++
++	/* Set up the Tx queue early.. */
++	sp->cur_tx = 0;
++	sp->dirty_tx = 0;
++	sp->last_cmd = 0;
++	sp->tx_full = 0;
++	rtdm_lock_init(&sp->lock);
++	sp->in_interrupt = 0;
++
++	// *** RTnet ***
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	retval = rtdm_irq_request(&sp->irq_handle, rtdev->irq,
++				  speedo_interrupt, RTDM_IRQTYPE_SHARED,
++				  "rt_eepro100", rtdev);
++	if (retval) {
++		return retval;
++	}
++	// *** RTnet ***
++
++	rtdev->if_port = sp->default_port;
++
++	speedo_init_rx_ring(rtdev);
++
++	/* Fire up the hardware. */
++	outw(SCBMaskAll, ioaddr + SCBCmd);
++	speedo_write_flush(ioaddr);
++	speedo_resume(rtdev);
++
++	netdevice_start(rtdev);
++	rtnetif_start_queue(rtdev);
++
++	/* Setup the chip and configure the multicast list. */
++	sp->mc_setup_head = NULL;
++	sp->mc_setup_tail = NULL;
++	sp->flow_ctrl = sp->partner = 0;
++	sp->rx_mode = -1;			/* Invalid -> always reset the mode. */
++	set_rx_mode(rtdev);
++	if ((sp->phy[0] & 0x8000) == 0)
++		sp->advertising = mdio_read(ioaddr, sp->phy[0] & 0x1f, 4);
++
++	if (mdio_read(ioaddr, sp->phy[0] & 0x1f, MII_BMSR) & BMSR_LSTATUS)
++		rtnetif_carrier_on(rtdev);
++	else
++		rtnetif_carrier_off(rtdev);
++
++	if (speedo_debug > 2) {
++		printk(KERN_DEBUG "%s: Done speedo_open(), status %8.8x.\n",
++			   rtdev->name, inw(ioaddr + SCBStatus));
++	}
++
++	/* No need to wait for the command unit to accept here. */
++	if ((sp->phy[0] & 0x8000) == 0)
++		mdio_read(ioaddr, sp->phy[0] & 0x1f, 0);
++
++	return 0;
++}
++
++/* Start the chip hardware after a full reset. */
++static void speedo_resume(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++
++	/* Start with a Tx threshold of 256 (0x..20.... 8 byte units). */
++	sp->tx_threshold = 0x01208000;
++
++	/* Set the segment registers to '0'. */
++	wait_for_cmd_done(ioaddr + SCBCmd);
++	outl(0, ioaddr + SCBPointer);
++	/* impose a delay to avoid a bug */
++	inl(ioaddr + SCBPointer);
++	udelay(10);
++	outb(RxAddrLoad, ioaddr + SCBCmd);
++	wait_for_cmd_done(ioaddr + SCBCmd);
++	outb(CUCmdBase, ioaddr + SCBCmd);
++
++	/* Load the statistics block and rx ring addresses. */
++	wait_for_cmd_done(ioaddr + SCBCmd);
++	outl(sp->lstats_dma, ioaddr + SCBPointer);
++	outb(CUStatsAddr, ioaddr + SCBCmd);
++	sp->lstats->done_marker = 0;
++
++	if (sp->rx_ringp[sp->cur_rx % RX_RING_SIZE] == NULL) {
++		if (speedo_debug > 2)
++			printk(KERN_DEBUG "%s: NULL cur_rx in speedo_resume().\n",
++					rtdev->name);
++	} else {
++		wait_for_cmd_done(ioaddr + SCBCmd);
++		outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
++			 ioaddr + SCBPointer);
++		outb(RxStart, ioaddr + SCBCmd);
++	}
++
++	wait_for_cmd_done(ioaddr + SCBCmd);
++	outb(CUDumpStats, ioaddr + SCBCmd);
++	udelay(30);
++
++	/* Fill the first command with our physical address. */
++	{
++		struct descriptor *ias_cmd;
++
++		ias_cmd =
++			(struct descriptor *)&sp->tx_ring[sp->cur_tx++ % TX_RING_SIZE];
++		/* Avoid a bug(?!) here by marking the command already completed. */
++		ias_cmd->cmd_status = cpu_to_le32((CmdSuspend | CmdIASetup) | 0xa000);
++		ias_cmd->link =
++			cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE));
++		memcpy(ias_cmd->params, rtdev->dev_addr, 6);
++		sp->last_cmd = ias_cmd;
++	}
++
++	/* Start the chip's Tx process and unmask interrupts. */
++	wait_for_cmd_done(ioaddr + SCBCmd);
++	outl(TX_RING_ELEM_DMA(sp, sp->dirty_tx % TX_RING_SIZE),
++		 ioaddr + SCBPointer);
++	/* We are not ACK-ing FCP and ER in the interrupt handler yet so they should
++	   remain masked --Dragan */
++	outw(CUStart | SCBMaskEarlyRx | SCBMaskFlowCtl, ioaddr + SCBCmd);
++}
++
++static void speedo_show_state(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	unsigned int i;
++
++	/* Print a few items for debugging. */
++	if (speedo_debug > 0) {
++		printk(KERN_DEBUG "%s: Tx ring dump,  Tx queue %u / %u:\n", rtdev->name,
++			   sp->cur_tx, sp->dirty_tx);
++		for (i = 0; i < TX_RING_SIZE; i++)
++			printk(KERN_DEBUG "%s:  %c%c%2d %8.8x.\n", rtdev->name,
++				   i == sp->dirty_tx % TX_RING_SIZE ? '*' : ' ',
++				   i == sp->cur_tx % TX_RING_SIZE ? '=' : ' ',
++				   i, sp->tx_ring[i].status);
++	}
++	printk(KERN_DEBUG "%s: Printing Rx ring"
++		   " (next to receive into %u, dirty index %u).\n",
++		   rtdev->name, sp->cur_rx, sp->dirty_rx);
++
++	for (i = 0; i < RX_RING_SIZE; i++)
++		printk(KERN_DEBUG "%s: %c%c%c%2d %8.8x.\n", rtdev->name,
++			   sp->rx_ringp[i] == sp->last_rxf ? 'l' : ' ',
++			   i == sp->dirty_rx % RX_RING_SIZE ? '*' : ' ',
++			   i == sp->cur_rx % RX_RING_SIZE ? '=' : ' ',
++			   i, (sp->rx_ringp[i] != NULL) ?
++					   (unsigned)sp->rx_ringp[i]->status : 0);
++
++	{
++		long ioaddr = rtdev->base_addr;
++		int phy_num = sp->phy[0] & 0x1f;
++		for (i = 0; i < 16; i++) {
++			/* FIXME: what does it mean?  --SAW */
++			if (i == 6) i = 21;
++			printk(KERN_DEBUG "%s:  PHY index %d register %d is %4.4x.\n",
++				   rtdev->name, phy_num, i, mdio_read(ioaddr, phy_num, i));
++		}
++	}
++}
++
++static struct net_device_stats *speedo_get_stats(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	return &sp->stats;
++}
++
++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
++static void
++speedo_init_rx_ring(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	struct RxFD *rxf, *last_rxf = NULL;
++	dma_addr_t last_rxf_dma = 0 /* to shut up the compiler */;
++	int i;
++
++	sp->cur_rx = 0;
++
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		struct rtskb *skb;
++		skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ + 2 + sizeof(struct RxFD));
++		sp->rx_skbuff[i] = skb;
++		if (skb == NULL)
++			break;			/* OK.  Just initially short of Rx bufs. */
++		// *** RTnet ***
++		rtskb_reserve(skb, 2);  /* IP header alignment */
++		// *** RTnet ***
++		rxf = (struct RxFD *)skb->tail;
++		sp->rx_ringp[i] = rxf;
++		sp->rx_ring_dma[i] =
++			pci_map_single(sp->pdev, rxf,
++					PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_BIDIRECTIONAL);
++		rtskb_reserve(skb, sizeof(struct RxFD));
++		if (last_rxf) {
++			last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]);
++			pci_dma_sync_single_for_device(sp->pdev, last_rxf_dma,
++					sizeof(struct RxFD), PCI_DMA_TODEVICE);
++		}
++		last_rxf = rxf;
++		last_rxf_dma = sp->rx_ring_dma[i];
++		rxf->status = cpu_to_le32(0x00000001);	/* '1' is flag value only. */
++		rxf->link = 0;						/* None yet. */
++		/* This field unused by i82557. */
++		rxf->rx_buf_addr = 0xffffffff;
++		rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
++		pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[i],
++				sizeof(struct RxFD), PCI_DMA_TODEVICE);
++	}
++	sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
++	/* Mark the last entry as end-of-list. */
++	last_rxf->status = cpu_to_le32(0xC0000002);	/* '2' is flag value only. */
++	pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1],
++			sizeof(struct RxFD), PCI_DMA_TODEVICE);
++	sp->last_rxf = last_rxf;
++	sp->last_rxf_dma = last_rxf_dma;
++}
++
++static int
++speedo_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int entry;
++	// *** RTnet ***
++	rtdm_lockctx_t context;
++
++	/* Prevent interrupts from changing the Tx ring from underneath us. */
++	rtdm_lock_get_irqsave(&sp->lock, context);
++	// *** RTnet ***
++
++	/* Check if there are enough space. */
++	if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
++		// *** RTnet ***
++		rtnetif_stop_queue(rtdev);
++		sp->tx_full = 1;
++
++		rtdm_lock_put_irqrestore(&sp->lock, context);
++
++		rtdm_printk(KERN_ERR "%s: incorrect tbusy state, fixed.\n", rtdev->name);
++		// *** RTnet ***
++
++		return 1;
++	}
++
++	/* Calculate the Tx descriptor entry. */
++	entry = sp->cur_tx++ % TX_RING_SIZE;
++
++	sp->tx_skbuff[entry] = skb;
++	sp->tx_ring[entry].status =
++		cpu_to_le32(CmdSuspend | CmdTx | CmdTxFlex);
++	if (!(entry & ((TX_RING_SIZE>>2)-1)))
++		sp->tx_ring[entry].status |= cpu_to_le32(CmdIntr);
++	sp->tx_ring[entry].link =
++		cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE));
++	sp->tx_ring[entry].tx_desc_addr =
++		cpu_to_le32(TX_RING_ELEM_DMA(sp, entry) + TX_DESCR_BUF_OFFSET);
++	/* The data region is always in one buffer descriptor. */
++	sp->tx_ring[entry].count = cpu_to_le32(sp->tx_threshold);
++	sp->tx_ring[entry].tx_buf_addr0 =
++		cpu_to_le32(pci_map_single(sp->pdev, skb->data,
++					   skb->len, PCI_DMA_TODEVICE));
++	sp->tx_ring[entry].tx_buf_size0 = cpu_to_le32(skb->len);
++
++// *** RTnet ***
++// Disabled to gain shorter worst-case execution times.
++// Hope this bug is not relevant for us
++
++	/* Trigger the command unit resume. */
++	if (rt_wait_for_cmd_done(ioaddr + SCBCmd, __FUNCTION__) != 0) {
++		rtdm_lock_put_irqrestore(&sp->lock, context);
++
++		return 1;
++	}
++
++	/* get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++// *** RTnet ***
++
++	clear_suspend(sp->last_cmd);
++	/* We want the time window between clearing suspend flag on the previous
++	   command and resuming CU to be as small as possible.
++	   Interrupts in between are very undesired.  --SAW */
++	outb(CUResume, ioaddr + SCBCmd);
++	sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
++
++	/* Leave room for set_rx_mode(). If there is no more space than reserved
++	   for multicast filter mark the ring as full. */
++	if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
++		rtnetif_stop_queue(rtdev);
++		sp->tx_full = 1;
++	}
++
++	// *** RTnet ***
++	rtdm_lock_put_irqrestore(&sp->lock, context);
++	// *** RTnet ***
++
++	return 0;
++}
++
++static void speedo_tx_buffer_gc(struct rtnet_device *rtdev)
++{
++	unsigned int dirty_tx;
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++
++	dirty_tx = sp->dirty_tx;
++	while ((int)(sp->cur_tx - dirty_tx) > 0) {
++		int entry = dirty_tx % TX_RING_SIZE;
++		int status = le32_to_cpu(sp->tx_ring[entry].status);
++
++		if (speedo_debug > 5)
++			printk(KERN_DEBUG " scavenge candidate %d status %4.4x.\n",
++				   entry, status);
++		if ((status & StatusComplete) == 0)
++			break;			/* It still hasn't been processed. */
++		if (status & TxUnderrun)
++			if (sp->tx_threshold < 0x01e08000) {
++				if (speedo_debug > 2)
++					printk(KERN_DEBUG "%s: TX underrun, threshold adjusted.\n",
++						   rtdev->name);
++				sp->tx_threshold += 0x00040000;
++			}
++		/* Free the original skb. */
++		if (sp->tx_skbuff[entry]) {
++			sp->stats.tx_packets++;	/* Count only user packets. */
++			sp->stats.tx_bytes += sp->tx_skbuff[entry]->len;
++			pci_unmap_single(sp->pdev,
++					le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0),
++					sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
++
++			// *** RTnet ***
++			dev_kfree_rtskb(sp->tx_skbuff[entry]);
++			// *** RTnet ***
++
++			sp->tx_skbuff[entry] = 0;
++		}
++		dirty_tx++;
++	}
++
++// *** RTnet ***
++// *** RTnet ***
++
++	sp->dirty_tx = dirty_tx;
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++static int speedo_interrupt(rtdm_irq_t *irq_handle)
++{
++	// *** RTnet ***
++	nanosecs_abs_t      time_stamp = rtdm_clock_read();
++	struct rtnet_device *rtdev     =
++	rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	int ret = RTDM_IRQ_NONE;
++	// *** RTnet ***
++
++	struct speedo_private *sp;
++	long ioaddr, boguscnt = max_interrupt_work;
++	unsigned short status;
++
++
++	ioaddr = rtdev->base_addr;
++	sp = (struct speedo_private *)rtdev->priv;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG
++	/* A lock to prevent simultaneous entry on SMP machines. */
++	if (test_and_set_bit(0, (void*)&sp->in_interrupt)) {
++		rtdm_printk(KERN_ERR"%s: SMP simultaneous entry of an interrupt handler.\n",
++			   rtdev->name);
++		sp->in_interrupt = 0;	/* Avoid halting machine. */
++		return ret;
++	}
++#endif
++
++	do {
++		status = inw(ioaddr + SCBStatus);
++		/* Acknowledge all of the current interrupt sources ASAP. */
++		/* Will change from 0xfc00 to 0xff00 when we start handling
++		   FCP and ER interrupts --Dragan */
++		outw(status & 0xfc00, ioaddr + SCBStatus);
++		speedo_write_flush(ioaddr);
++
++		if (speedo_debug > 4)
++			rtdm_printk(KERN_DEBUG "%s: interrupt  status=%#4.4x.\n",
++				   rtdev->name, status);
++
++		if ((status & 0xfc00) == 0)
++			break;
++
++		ret = RTDM_IRQ_HANDLED;
++
++		/* Always check if all rx buffers are allocated.  --SAW */
++		speedo_refill_rx_buffers(rtdev, 0);
++
++		if ((status & 0x5000) ||	/* Packet received, or Rx error. */
++			(sp->rx_ring_state&(RrNoMem|RrPostponed)) == RrPostponed)
++									/* Need to gather the postponed packet. */
++			speedo_rx(rtdev, &packets, &time_stamp);
++
++		if (status & 0x1000) {
++			rtdm_lock_get(&sp->lock);
++			if ((status & 0x003c) == 0x0028) {		/* No more Rx buffers. */
++				struct RxFD *rxf;
++				rtdm_printk(KERN_WARNING "%s: card reports no RX buffers.\n",
++						rtdev->name);
++				rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE];
++				if (rxf == NULL) {
++					if (speedo_debug > 2)
++						rtdm_printk(KERN_DEBUG
++								"%s: NULL cur_rx in speedo_interrupt().\n",
++								rtdev->name);
++					sp->rx_ring_state |= RrNoMem|RrNoResources;
++				} else if (rxf == sp->last_rxf) {
++					if (speedo_debug > 2)
++						rtdm_printk(KERN_DEBUG
++								"%s: cur_rx is last in speedo_interrupt().\n",
++								rtdev->name);
++					sp->rx_ring_state |= RrNoMem|RrNoResources;
++				} else
++					outb(RxResumeNoResources, ioaddr + SCBCmd);
++			} else if ((status & 0x003c) == 0x0008) { /* No resources. */
++				struct RxFD *rxf;
++				rtdm_printk(KERN_WARNING "%s: card reports no resources.\n",
++						rtdev->name);
++				rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE];
++				if (rxf == NULL) {
++					if (speedo_debug > 2)
++						rtdm_printk(KERN_DEBUG
++								"%s: NULL cur_rx in speedo_interrupt().\n",
++								rtdev->name);
++					sp->rx_ring_state |= RrNoMem|RrNoResources;
++				} else if (rxf == sp->last_rxf) {
++					if (speedo_debug > 2)
++						rtdm_printk(KERN_DEBUG
++								"%s: cur_rx is last in speedo_interrupt().\n",
++								rtdev->name);
++					sp->rx_ring_state |= RrNoMem|RrNoResources;
++				} else {
++					/* Restart the receiver. */
++					outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
++						 ioaddr + SCBPointer);
++					outb(RxStart, ioaddr + SCBCmd);
++				}
++			}
++			sp->stats.rx_errors++;
++			rtdm_lock_put(&sp->lock);
++		}
++
++		if ((sp->rx_ring_state&(RrNoMem|RrNoResources)) == RrNoResources) {
++			rtdm_printk(KERN_WARNING
++					"%s: restart the receiver after a possible hang.\n",
++					rtdev->name);
++			rtdm_lock_get(&sp->lock);
++			/* Restart the receiver.
++			   I'm not sure if it's always right to restart the receiver
++			   here but I don't know another way to prevent receiver hangs.
++			   1999/12/25 SAW */
++			outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
++				 ioaddr + SCBPointer);
++			outb(RxStart, ioaddr + SCBCmd);
++			sp->rx_ring_state &= ~RrNoResources;
++			rtdm_lock_put(&sp->lock);
++		}
++
++		/* User interrupt, Command/Tx unit interrupt or CU not active. */
++		if (status & 0xA400) {
++			rtdm_lock_get(&sp->lock);
++			speedo_tx_buffer_gc(rtdev);
++			if (sp->tx_full
++				&& (int)(sp->cur_tx - sp->dirty_tx) < TX_QUEUE_UNFULL) {
++				/* The ring is no longer full. */
++				sp->tx_full = 0;
++				rtnetif_wake_queue(rtdev); /* Attention: under a spinlock.  --SAW */
++			}
++			rtdm_lock_put(&sp->lock);
++		}
++
++		if (--boguscnt < 0) {
++			rtdm_printk(KERN_ERR "%s: Too much work at interrupt, status=0x%4.4x.\n",
++				   rtdev->name, status);
++			/* Clear all interrupt sources. */
++			/* Will change from 0xfc00 to 0xff00 when we start handling
++			   FCP and ER interrupts --Dragan */
++			outw(0xfc00, ioaddr + SCBStatus);
++			break;
++		}
++	} while (1);
++
++	if (speedo_debug > 3)
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
++			   rtdev->name, inw(ioaddr + SCBStatus));
++
++	clear_bit(0, (void*)&sp->in_interrupt);
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++	return ret;
++}
++
++static inline struct RxFD *speedo_rx_alloc(struct rtnet_device *rtdev, int entry)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	struct RxFD *rxf;
++	struct rtskb *skb;
++	/* Get a fresh skbuff to replace the consumed one. */
++	skb = rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ + 2 + sizeof(struct RxFD));
++	sp->rx_skbuff[entry] = skb;
++	if (skb == NULL) {
++		sp->rx_ringp[entry] = NULL;
++		return NULL;
++	}
++	rtskb_reserve(skb, 2);  /* IP header alignment */
++	rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail;
++	sp->rx_ring_dma[entry] =
++		pci_map_single(sp->pdev, rxf,
++					   PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
++	rtskb_reserve(skb, sizeof(struct RxFD));
++	rxf->rx_buf_addr = 0xffffffff;
++	pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
++			sizeof(struct RxFD), PCI_DMA_TODEVICE);
++	return rxf;
++}
++
++static inline void speedo_rx_link(struct rtnet_device *rtdev, int entry,
++								  struct RxFD *rxf, dma_addr_t rxf_dma)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	rxf->status = cpu_to_le32(0xC0000001);  /* '1' for driver use only. */
++	rxf->link = 0;			/* None yet. */
++	rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
++	sp->last_rxf->link = cpu_to_le32(rxf_dma);
++	sp->last_rxf->status &= cpu_to_le32(~0xC0000000);
++	pci_dma_sync_single_for_device(sp->pdev, sp->last_rxf_dma,
++			sizeof(struct RxFD), PCI_DMA_TODEVICE);
++	sp->last_rxf = rxf;
++	sp->last_rxf_dma = rxf_dma;
++}
++
++static int speedo_refill_rx_buf(struct rtnet_device *rtdev, int force)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	int entry;
++	struct RxFD *rxf;
++
++	entry = sp->dirty_rx % RX_RING_SIZE;
++	if (sp->rx_skbuff[entry] == NULL) {
++		rxf = speedo_rx_alloc(rtdev, entry);
++		if (rxf == NULL) {
++			unsigned int forw;
++			int forw_entry;
++			if (speedo_debug > 2 || !(sp->rx_ring_state & RrOOMReported)) {
++				// *** RTnet ***
++				rtdm_printk(KERN_WARNING "%s: can't fill rx buffer (force %d)!\n",
++						rtdev->name, force);
++				// *** RTnet ***
++				sp->rx_ring_state |= RrOOMReported;
++			}
++			if (!force)
++				return -1;	/* Better luck next time!  */
++			/* Borrow an skb from one of next entries. */
++			for (forw = sp->dirty_rx + 1; forw != sp->cur_rx; forw++)
++				if (sp->rx_skbuff[forw % RX_RING_SIZE] != NULL)
++					break;
++			if (forw == sp->cur_rx)
++				return -1;
++			forw_entry = forw % RX_RING_SIZE;
++			sp->rx_skbuff[entry] = sp->rx_skbuff[forw_entry];
++			sp->rx_skbuff[forw_entry] = NULL;
++			rxf = sp->rx_ringp[forw_entry];
++			sp->rx_ringp[forw_entry] = NULL;
++			sp->rx_ringp[entry] = rxf;
++		}
++	} else {
++		rxf = sp->rx_ringp[entry];
++	}
++	speedo_rx_link(rtdev, entry, rxf, sp->rx_ring_dma[entry]);
++	sp->dirty_rx++;
++	sp->rx_ring_state &= ~(RrNoMem|RrOOMReported); /* Mark the progress. */
++	return 0;
++}
++
++static void speedo_refill_rx_buffers(struct rtnet_device *rtdev, int force)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++
++	/* Refill the RX ring. */
++	while ((int)(sp->cur_rx - sp->dirty_rx) > 0 &&
++			speedo_refill_rx_buf(rtdev, force) != -1);
++}
++
++static int
++speedo_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	int entry = sp->cur_rx % RX_RING_SIZE;
++	int rx_work_limit = sp->dirty_rx + RX_RING_SIZE - sp->cur_rx;
++	int alloc_ok = 1;
++
++	if (speedo_debug > 4)
++		rtdm_printk(KERN_DEBUG " In speedo_rx().\n");
++	/* If we own the next entry, it's a new packet. Send it up. */
++	while (sp->rx_ringp[entry] != NULL) {
++		int status;
++		int pkt_len;
++
++		pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
++			sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
++		status = le32_to_cpu(sp->rx_ringp[entry]->status);
++		pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
++
++		if (!(status & RxComplete))
++			break;
++
++		if (--rx_work_limit < 0)
++			break;
++
++		/* Check for a rare out-of-memory case: the current buffer is
++		   the last buffer allocated in the RX ring.  --SAW */
++		if (sp->last_rxf == sp->rx_ringp[entry]) {
++			/* Postpone the packet.  It'll be reaped at an interrupt when this
++			   packet is no longer the last packet in the ring. */
++			if (speedo_debug > 2)
++				rtdm_printk(KERN_DEBUG "%s: RX packet postponed!\n",
++					   rtdev->name);
++			sp->rx_ring_state |= RrPostponed;
++			break;
++		}
++
++		if (speedo_debug > 4)
++			rtdm_printk(KERN_DEBUG "  speedo_rx() status %8.8x len %d.\n", status,
++				   pkt_len);
++		if ((status & (RxErrTooBig|RxOK|0x0f90)) != RxOK) {
++			if (status & RxErrTooBig)
++				rtdm_printk(KERN_ERR "%s: Ethernet frame overran the Rx buffer, "
++					   "status %8.8x!\n", rtdev->name, status);
++			else if (! (status & RxOK)) {
++				/* There was a fatal error.  This *should* be impossible. */
++				sp->stats.rx_errors++;
++				rtdm_printk(KERN_ERR "%s: Anomalous event in speedo_rx(), "
++					   "status %8.8x.\n",
++					   rtdev->name, status);
++			}
++		} else {
++			struct rtskb *skb;
++
++// *** RTnet ***
++			{
++// *** RTnet ***
++				/* Pass up the already-filled skbuff. */
++				skb = sp->rx_skbuff[entry];
++				if (skb == NULL) {
++					rtdm_printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n",
++						   rtdev->name);
++					break;
++				}
++				sp->rx_skbuff[entry] = NULL;
++				rtskb_put(skb, pkt_len);
++				sp->rx_ringp[entry] = NULL;
++				pci_unmap_single(sp->pdev, sp->rx_ring_dma[entry],
++						PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
++			}
++			skb->protocol = rt_eth_type_trans(skb, rtdev);
++			//rtmac
++			skb->time_stamp = *time_stamp;
++			//rtmac
++			rtnetif_rx(skb);
++			(*packets)++;
++			sp->stats.rx_packets++;
++			sp->stats.rx_bytes += pkt_len;
++		}
++		entry = (++sp->cur_rx) % RX_RING_SIZE;
++		sp->rx_ring_state &= ~RrPostponed;
++		/* Refill the recently taken buffers.
++		   Do it one-by-one to handle traffic bursts better. */
++		if (alloc_ok && speedo_refill_rx_buf(rtdev, 0) == -1)
++			alloc_ok = 0;
++	}
++
++	/* Try hard to refill the recently taken buffers. */
++	speedo_refill_rx_buffers(rtdev, 1);
++
++	sp->last_rx_time = jiffies;
++
++	return 0;
++}
++
++static int
++speedo_close(struct rtnet_device *rtdev)
++{
++	long ioaddr = rtdev->base_addr;
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	int i;
++
++	netdevice_stop(rtdev);
++	rtnetif_stop_queue(rtdev);
++
++	if (speedo_debug > 1)
++		printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n",
++			   rtdev->name, inw(ioaddr + SCBStatus));
++
++	/* Shutdown procedure according to Intel's e100 */
++	outl(PortPartialReset, ioaddr + SCBPort);
++	speedo_write_flush(ioaddr); udelay(20);
++
++	outl(PortReset, ioaddr + SCBPort);
++	speedo_write_flush(ioaddr); udelay(20);
++
++	outw(SCBMaskAll, ioaddr + SCBCmd);
++	speedo_write_flush(ioaddr);
++
++	// *** RTnet ***
++	if ( (i=rtdm_irq_free(&sp->irq_handle))<0 )
++		return i;
++
++	rt_stack_disconnect(rtdev);
++
++	// *** RTnet ***
++
++	/* Print a few items for debugging. */
++	if (speedo_debug > 3)
++		speedo_show_state(rtdev);
++
++    /* Free all the skbuffs in the Rx and Tx queues. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		struct rtskb *skb = sp->rx_skbuff[i];
++		sp->rx_skbuff[i] = 0;
++		/* Clear the Rx descriptors. */
++		if (skb) {
++			pci_unmap_single(sp->pdev,
++					 sp->rx_ring_dma[i],
++					 PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
++			dev_kfree_rtskb(skb);
++		}
++	}
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		struct rtskb *skb = sp->tx_skbuff[i];
++		sp->tx_skbuff[i] = 0;
++		/* Clear the Tx descriptors. */
++		if (skb) {
++			pci_unmap_single(sp->pdev,
++					 le32_to_cpu(sp->tx_ring[i].tx_buf_addr0),
++					 skb->len, PCI_DMA_TODEVICE);
++
++			// *** RTnet ***
++			dev_kfree_rtskb(skb);
++			// *** RTnet ***
++		}
++	}
++
++// *** RTnet ***
++// *** RTnet ***
++
++	pci_set_power_state(sp->pdev, 2);
++
++	return 0;
++}
++
++
++/* Set or clear the multicast filter for this adaptor.
++   This is very ugly with Intel chips -- we usually have to execute an
++   entire configuration command, plus process a multicast command.
++   This is complicated.  We must put a large configuration command and
++   an arbitrarily-sized multicast command in the transmit list.
++   To minimize the disruption -- the previous command might have already
++   loaded the link -- we convert the current command block, normally a Tx
++   command, into a no-op and link it to the new command.
++*/
++static void set_rx_mode(struct rtnet_device *rtdev)
++{
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	struct descriptor *last_cmd;
++	char new_rx_mode;
++	//unsigned long flags;
++	int entry/*, i*/;
++
++	if (rtdev->flags & IFF_PROMISC) {			/* Set promiscuous. */
++		new_rx_mode = 3;
++	} else if (rtdev->flags & IFF_ALLMULTI) {
++		new_rx_mode = 1;
++	} else
++		new_rx_mode = 0;
++
++	if (speedo_debug > 3)
++		printk(KERN_DEBUG "%s: set_rx_mode %d -> %d\n", rtdev->name,
++				sp->rx_mode, new_rx_mode);
++
++	if ((int)(sp->cur_tx - sp->dirty_tx) > TX_RING_SIZE - TX_MULTICAST_SIZE) {
++	    /* The Tx ring is full -- don't add anything!  Hope the mode will be
++		 * set again later. */
++		sp->rx_mode = -1;
++		return;
++	}
++
++	if (new_rx_mode != sp->rx_mode) {
++		u8 *config_cmd_data;
++
++		//spin_lock_irqsave(&sp->lock, flags); --- disabled for now as it runs before irq handler is active
++		entry = sp->cur_tx++ % TX_RING_SIZE;
++		last_cmd = sp->last_cmd;
++		sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
++
++		sp->tx_skbuff[entry] = 0;			/* Redundant. */
++		sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdConfigure);
++		sp->tx_ring[entry].link =
++			cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
++		config_cmd_data = (void *)&sp->tx_ring[entry].tx_desc_addr;
++		/* Construct a full CmdConfig frame. */
++		memcpy(config_cmd_data, i82558_config_cmd, CONFIG_DATA_SIZE);
++		config_cmd_data[1] = (txfifo << 4) | rxfifo;
++		config_cmd_data[4] = rxdmacount;
++		config_cmd_data[5] = txdmacount + 0x80;
++		config_cmd_data[15] |= (new_rx_mode & 2) ? 1 : 0;
++		/* 0x80 doesn't disable FC 0x84 does.
++		   Disable Flow control since we are not ACK-ing any FC interrupts
++		   for now. --Dragan */
++		config_cmd_data[19] = 0x84;
++		config_cmd_data[19] |= sp->full_duplex ? 0x40 : 0;
++		config_cmd_data[21] = (new_rx_mode & 1) ? 0x0D : 0x05;
++		if (sp->phy[0] & 0x8000) {			/* Use the AUI port instead. */
++			config_cmd_data[15] |= 0x80;
++			config_cmd_data[8] = 0;
++		}
++		/* Trigger the command unit resume. */
++		wait_for_cmd_done(ioaddr + SCBCmd);
++		clear_suspend(last_cmd);
++		outb(CUResume, ioaddr + SCBCmd);
++		if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
++			rtnetif_stop_queue(rtdev);
++			sp->tx_full = 1;
++		}
++		//spin_unlock_irqrestore(&sp->lock, flags);
++	}
++
++	if (new_rx_mode == 0) {
++		/* The simple case of 0-3 multicast list entries occurs often, and
++		   fits within one tx_ring[] entry. */
++		/*struct dev_mc_list *mclist;*/
++		u16 *setup_params/*, *eaddrs*/;
++
++		//spin_lock_irqsave(&sp->lock, flags); --- disabled for now as it runs before irq handler is active
++		entry = sp->cur_tx++ % TX_RING_SIZE;
++		last_cmd = sp->last_cmd;
++		sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
++
++		sp->tx_skbuff[entry] = 0;
++		sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdMulticastList);
++		sp->tx_ring[entry].link =
++			cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
++		sp->tx_ring[entry].tx_desc_addr = 0; /* Really MC list count. */
++		setup_params = (u16 *)&sp->tx_ring[entry].tx_desc_addr;
++		*setup_params++ = cpu_to_le16(0); /* mc_count */
++// *** RTnet ***
++// *** RTnet ***
++
++		wait_for_cmd_done(ioaddr + SCBCmd);
++		clear_suspend(last_cmd);
++		/* Immediately trigger the command unit resume. */
++		outb(CUResume, ioaddr + SCBCmd);
++
++		if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
++			rtnetif_stop_queue(rtdev);
++			sp->tx_full = 1;
++		}
++		//spin_unlock_irqrestore(&sp->lock, flags);
++// *** RTnet ***
++// *** RTnet ***
++	}
++
++	sp->rx_mode = new_rx_mode;
++}
++
++
++static void eepro100_remove_one (struct pci_dev *pdev)
++{
++	// *** RTnet ***
++	struct rtnet_device *rtdev = pci_get_drvdata (pdev);
++
++	struct speedo_private *sp = (struct speedo_private *)rtdev->priv;
++
++	rt_unregister_rtnetdev(rtdev);
++	rt_rtdev_disconnect(rtdev);
++	// *** RTnet ***
++
++	release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1));
++	release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
++
++#ifndef USE_IO
++	iounmap((char *)rtdev->base_addr);
++#endif
++
++	pci_free_consistent(pdev, TX_RING_SIZE * sizeof(struct TxFD)
++								+ sizeof(struct speedo_stats),
++						sp->tx_ring, sp->tx_ring_dma);
++	pci_disable_device(pdev);
++
++	// *** RTnet ***
++	rtdev_free(rtdev);
++	// *** RTnet ***
++}
++
++static struct pci_device_id eepro100_pci_tbl[] = {
++	{ PCI_VENDOR_ID_INTEL, 0x1229, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1209, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1029, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1030, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1031, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1032, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1033, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1034, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1035, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1036, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1037, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1038, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1039, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x103A, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x103B, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x103C, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x103D, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x103E, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1092, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1227, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x1228, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x2449, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x2459, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x245D, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x27DC, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x5200, PCI_ANY_ID, PCI_ANY_ID, },
++	{ PCI_VENDOR_ID_INTEL, 0x5201, PCI_ANY_ID, PCI_ANY_ID, },
++	{ 0,}
++};
++MODULE_DEVICE_TABLE(pci, eepro100_pci_tbl);
++
++static struct pci_driver eepro100_driver = {
++	name:		"eepro100_rt",
++	id_table:	eepro100_pci_tbl,
++	probe:		eepro100_init_one,
++	remove:		eepro100_remove_one,
++	suspend:	NULL,
++	resume:		NULL,
++};
++
++static int __init eepro100_init_module(void)
++{
++#ifdef CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG
++	if (local_debug >= 0 && speedo_debug != local_debug)
++		printk(KERN_INFO "eepro100.c: Debug level is %d.\n", local_debug);
++	if (local_debug >= 0)
++		speedo_debug = local_debug;
++#else  /* !CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG */
++	local_debug = speedo_debug; /* touch debug variable */
++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100_DBG */
++
++	return pci_register_driver(&eepro100_driver);
++}
++
++static void __exit eepro100_cleanup_module(void)
++{
++	pci_unregister_driver(&eepro100_driver);
++}
++
++module_init(eepro100_init_module);
++module_exit(eepro100_cleanup_module);
+--- linux/drivers/xenomai/net/drivers/smc91111.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/smc91111.c	2021-04-07 16:01:27.344633999 +0800
+@@ -0,0 +1,3531 @@
++/*------------------------------------------------------------------------
++ . smc91111.c
++ . This is a driver for SMSC's 91C111 single-chip Ethernet device.
++ .
++ . Copyright (C) 2001 Standard Microsystems Corporation (SMSC)
++ .       Developed by Simple Network Magic Corporation (SNMC)
++ . Copyright (C) 1996 by Erik Stahlman (ES)
++ .
++ . This program is free software; you can redistribute it and/or modify
++ . it under the terms of the GNU General Public License as published by
++ . the Free Software Foundation; either version 2 of the License, or
++ . (at your option) any later version.
++ .
++ . This program is distributed in the hope that it will be useful,
++ . but WITHOUT ANY WARRANTY; without even the implied warranty of
++ . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ . GNU General Public License for more details.
++ .
++ . You should have received a copy of the GNU General Public License
++ . along with this program; if not, write to the Free Software
++ . Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ .
++ . Information contained in this file was obtained from the LAN91C111
++ . manual from SMC.  To get a copy, if you really want one, you can find
++ . information under www.smsc.com.
++ .
++ .
++ . "Features" of the SMC chip:
++ .   Integrated PHY/MAC for 10/100BaseT Operation
++ .   Supports internal and external MII
++ .   Integrated 8K packet memory
++ .   EEPROM interface for configuration
++ .
++ . Arguments:
++ .	io	= for the base address
++ .	irq	= for the IRQ
++ .	nowait	= 0 for normal wait states, 1 eliminates additional wait states
++ .
++ . author:
++ .	Erik Stahlman				( erik@vt.edu )
++ .	Daris A Nevil				( dnevil@snmc.com )
++ .	Pramod B Bhardwaj			(pramod.bhardwaj@smsc.com)
++ .
++ .
++ . Hardware multicast code from Peter Cammaert ( pc@denkart.be )
++ .
++ . Sources:
++ .    o   SMSC LAN91C111 databook (www.smsc.com)
++ .    o   smc9194.c by Erik Stahlman
++ .    o   skeleton.c by Donald Becker ( becker@cesdis.gsfc.nasa.gov )
++ .
++ . History:
++ .    09/24/01  Pramod B Bhardwaj, Added the changes for Kernel 2.4
++ .    08/21/01  Pramod B Bhardwaj Added support for RevB of LAN91C111
++ .	04/25/01  Daris A Nevil  Initial public release through SMSC
++ .	03/16/01  Daris A Nevil  Modified smc9194.c for use with LAN91C111
++
++	Ported to RTnet: March 2004, Jan Kiszka <Jan.Kiszka@web.de>
++ ----------------------------------------------------------------------------*/
++
++// Use power-down feature of the chip
++#define POWER_DOWN	1
++
++
++static const char version[] =
++	"SMSC LAN91C111 Driver (v2.0-rt), RTnet version - Jan Kiszka (jan.kiszka@web.de)\n\n";
++
++#ifdef MODULE
++#include <linux/module.h>
++#include <linux/version.h>
++#endif
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/types.h>
++#include <linux/fcntl.h>
++#include <linux/interrupt.h>
++#include <linux/ptrace.h>
++#include <linux/ioport.h>
++#include <linux/in.h>
++#include <linux/slab.h> //#include <linux/malloc.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <asm/bitops.h>
++#include <asm/io.h>
++#include <linux/errno.h>
++#include <linux/delay.h>
++
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++//#include <linux/kcomp.h>
++
++#ifdef DISABLED____CONFIG_SYSCTL
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#endif
++
++#include <rtnet_port.h>
++
++#include "rt_smc91111.h"
++/*------------------------------------------------------------------------
++ .
++ . Configuration options, for the experienced user to change.
++ .
++ -------------------------------------------------------------------------*/
++
++/*
++ . Do you want to use 32 bit xfers?  This should work on all chips, as
++ . the chipset is designed to accommodate them.
++*/
++#define USE_32_BIT 1
++
++
++/*
++ .the LAN91C111 can be at any of the following port addresses.  To change,
++ .for a slightly different card, you can add it to the array.  Keep in
++ .mind that the array must end in zero.
++*/
++static unsigned int smc_portlist[] __initdata =
++   { 0x200, 0x220, 0x240, 0x260, 0x280, 0x2A0, 0x2C0, 0x2E0,
++	 0x300, 0x320, 0x340, 0x360, 0x380, 0x3A0, 0x3C0, 0x3E0, 0};
++
++
++/*
++ . Wait time for memory to be free.  This probably shouldn't be
++ . tuned that much, as waiting for this means nothing else happens
++ . in the system
++*/
++#define MEMORY_WAIT_TIME 16
++
++
++/*
++ . Timeout in us for waiting on the completion of a previous MMU command
++ . in smc_rcv().
++*/
++#define MMU_CMD_TIMEOUT 5
++
++
++/*
++ . DEBUGGING LEVELS
++ .
++ . 0 for normal operation
++ . 1 for slightly more details
++ . >2 for various levels of increasingly useless information
++ .    2 for interrupt tracking, status flags
++ .    3 for packet info
++ .    4 for complete packet dumps
++*/
++//#define SMC_DEBUG 3 // Must be defined in makefile
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2)
++#define PRINTK3(args...) rtdm_printk(args)
++#else
++#define PRINTK3(args...)
++#endif
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 1)
++#define PRINTK2(args...) rtdm_printk(args)
++#else
++#define PRINTK2(args...)
++#endif
++
++#ifdef SMC_DEBUG
++#define PRINTK(args...) rtdm_printk(args)
++#else
++#define PRINTK(args...)
++#endif
++
++
++/*------------------------------------------------------------------------
++ .
++ . The internal workings of the driver.  If you are changing anything
++ . here with the SMC stuff, you should have the datasheet and know
++ . what you are doing.
++ .
++ -------------------------------------------------------------------------*/
++#define CARDNAME "LAN91C111"
++
++// Memory sizing constant
++#define LAN91C111_MEMORY_MULTIPLIER	(1024*2)
++
++/* store this information for the driver.. */
++struct smc_local {
++
++// these are things that the kernel wants me to keep, so users
++	// can find out semi-useless statistics of how well the card is
++	// performing
++	struct net_device_stats stats;
++
++	// If I have to wait until memory is available to send
++	// a packet, I will store the skbuff here, until I get the
++	// desired memory.  Then, I'll send it out and free it.
++	struct rtskb * saved_skb;
++
++	// This keeps track of how many packets that I have
++	// sent out.  When an TX_EMPTY interrupt comes, I know
++	// that all of these have been sent.
++	int	packets_waiting;
++
++	// Set to true during the auto-negotiation sequence
++	int	autoneg_active;
++
++	// Address of our PHY port
++	word	phyaddr;
++
++	// Type of PHY
++	word	phytype;
++
++	// Last contents of PHY Register 18
++	word	lastPhy18;
++
++	// Contains the current active transmission mode
++	word	tcr_cur_mode;
++
++	// Contains the current active receive mode
++	word	rcr_cur_mode;
++
++	// Contains the current active receive/phy mode
++	word	rpc_cur_mode;
++
++	/* => Pramod, Odd Byte issue */
++	// Contains the Current ChipID
++	unsigned short ChipID;
++
++	//Contains the Current ChipRevision
++	unsigned short ChipRev;
++	/* <= Pramod, Odd Byte issue */
++
++#ifdef DISABLED____CONFIG_SYSCTL
++
++	// Root directory /proc/sys/dev
++	// Second entry must be null to terminate the table
++	ctl_table root_table[2];
++
++	// Directory for this device /proc/sys/dev/ethX
++	// Again the second entry must be zero to terminate
++	ctl_table eth_table[2];
++
++	// This is the parameters (file) table
++	ctl_table param_table[CTL_SMC_LAST_ENTRY];
++
++	// Saves the sysctl header returned by register_sysctl_table()
++	// we send this to unregister_sysctl_table()
++	struct ctl_table_header *sysctl_header;
++
++	// Parameter variables (files) go here
++	char ctl_info[1024];
++	int ctl_swfdup;
++	int ctl_ephloop;
++	int ctl_miiop;
++	int ctl_autoneg;
++	int ctl_rfduplx;
++	int ctl_rspeed;
++	int ctl_afduplx;
++	int ctl_aspeed;
++	int ctl_lnkfail;
++	int ctl_forcol;
++	int ctl_filtcar;
++	int ctl_freemem;
++	int ctl_totmem;
++	int ctl_leda;
++	int ctl_ledb;
++	int ctl_chiprev;
++#ifdef SMC_DEBUG
++	int ctl_reg_bsr;
++	int ctl_reg_tcr;
++	int ctl_reg_esr;
++	int ctl_reg_rcr;
++	int ctl_reg_ctrr;
++	int ctl_reg_mir;
++	int ctl_reg_rpcr;
++	int ctl_reg_cfgr;
++	int ctl_reg_bar;
++	int ctl_reg_iar0;
++	int ctl_reg_iar1;
++	int ctl_reg_iar2;
++	int ctl_reg_gpr;
++	int ctl_reg_ctlr;
++	int ctl_reg_mcr;
++	int ctl_reg_pnr;
++	int ctl_reg_fpr;
++	int ctl_reg_ptr;
++	int ctl_reg_dr;
++	int ctl_reg_isr;
++	int ctl_reg_mtr1;
++	int ctl_reg_mtr2;
++	int ctl_reg_mtr3;
++	int ctl_reg_mtr4;
++	int ctl_reg_miir;
++	int ctl_reg_revr;
++	int ctl_reg_ercvr;
++	int ctl_reg_extr;
++	int ctl_phy_ctrl;
++	int ctl_phy_stat;
++	int ctl_phy_id1;
++	int ctl_phy_id2;
++	int ctl_phy_adc;
++	int ctl_phy_remc;
++	int ctl_phy_cfg1;
++	int ctl_phy_cfg2;
++	int ctl_phy_int;
++	int ctl_phy_mask;
++#endif // SMC_DEBUG
++
++#endif // CONFIG_SYSCTL
++
++	rtdm_irq_t irq_handle;
++};
++
++
++/*-----------------------------------------------------------------
++ .
++ .  The driver can be entered at any of the following entry points.
++ .
++ .------------------------------------------------------------------  */
++
++/*
++ . This is called by  register_netdev().  It is responsible for
++ . checking the portlist for the SMC9000 series chipset.  If it finds
++ . one, then it will initialize the device, find the hardware information,
++ . and sets up the appropriate device parameters.
++ . NOTE: Interrupts are *OFF* when this procedure is called.
++ .
++ . NB:This shouldn't be static since it is referred to externally.
++*/
++int smc_init(struct rtnet_device *dev);
++
++/*
++ . This is called by  unregister_netdev().  It is responsible for
++ . cleaning up before the driver is finally unregistered and discarded.
++*/
++//void smc_destructor(struct net_device *dev);
++
++/*
++ . The kernel calls this function when someone wants to use the net_device,
++ . typically 'ifconfig ethX up'.
++*/
++static int smc_open(struct rtnet_device *dev);
++
++/*
++ . This is called by the kernel to send a packet out into the net.  it's
++ . responsible for doing a best-effort send, but if it's simply not possible
++ . to send it, the packet gets dropped.
++*/
++//static void smc_timeout (struct net_device *dev);*/
++/*
++ . This is called by the kernel in response to 'ifconfig ethX down'.  It
++ . is responsible for cleaning up everything that the open routine
++ . does, and maybe putting the card into a powerdown state.
++*/
++static int smc_close(struct rtnet_device *dev);
++
++/*
++ . This routine allows the proc file system to query the driver's
++ . statistics.
++*/
++static struct net_device_stats *smc_query_statistics(struct rtnet_device *rtdev);
++
++/*
++ . Finally, a call to set promiscuous mode ( for TCPDUMP and related
++ . programs ) and multicast modes.
++*/
++static void smc_set_multicast_list(struct rtnet_device *dev);
++
++/*
++ . Configures the PHY through the MII Management interface
++*/
++static void smc_phy_configure(struct rtnet_device* dev);
++
++/*---------------------------------------------------------------
++ .
++ . Interrupt level calls..
++ .
++ ----------------------------------------------------------------*/
++
++/*
++ . Handles the actual interrupt
++*/
++static int smc_interrupt(rtdm_irq_t *irq_handle);
++/*
++ . This is a separate procedure to handle the receipt of a packet, to
++ . leave the interrupt code looking slightly cleaner
++*/
++inline static void smc_rcv( struct rtnet_device *dev );
++/*
++ . This handles a TX interrupt, which is only called when an error
++ . relating to a packet is sent.
++*/
++//inline static void smc_tx( struct net_device * dev );
++
++/*
++ . This handles interrupts generated from PHY register 18
++*/
++//static void smc_phy_interrupt(struct net_device* dev);
++
++/*
++ ------------------------------------------------------------
++ .
++ . Internal routines
++ .
++ ------------------------------------------------------------
++*/
++
++/*
++ . Test if a given location contains a chip, trying to cause as
++ . little damage as possible if it's not a SMC chip.
++*/
++static int smc_probe(struct rtnet_device *dev, int ioaddr);
++
++/*
++ . A rather simple routine to print out a packet for debugging purposes.
++*/
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2)
++static void print_packet( byte *, int );
++#endif
++
++#define tx_done(dev) 1
++
++/* this is called to actually send the packet to the chip */
++static void smc_hardware_send_packet( struct rtnet_device * dev );
++
++/* Since I am not sure if I will have enough room in the chip's ram
++ . to store the packet, I call this routine, which either sends it
++ . now, or generates an interrupt when the card is ready for the
++ . packet */
++static int  smc_wait_to_send_packet( struct rtskb * skb, struct rtnet_device *dev );
++
++/* this does a soft reset on the device */
++static void smc_reset( struct rtnet_device* dev );
++
++/* Enable Interrupts, Receive, and Transmit */
++static void smc_enable( struct rtnet_device *dev );
++
++/* this puts the device in an inactive state */
++static void smc_shutdown( int ioaddr );
++
++#ifndef NO_AUTOPROBE
++/* This routine will find the IRQ of the driver if one is not
++ . specified in the input to the device.  */
++static int smc_findirq( int ioaddr );
++#endif
++
++/* Routines to Read and Write the PHY Registers across the
++   MII Management Interface
++*/
++
++static word smc_read_phy_register(int ioaddr, byte phyaddr, byte phyreg);
++static void smc_write_phy_register(int ioaddr, byte phyaddr, byte phyreg, word phydata);
++
++/* Initilizes our device's sysctl proc filesystem */
++
++#ifdef DISABLED____CONFIG_SYSCTL
++static void smc_sysctl_register(struct rtnet_device *);
++static void smc_sysctl_unregister(struct rtnet_device *);
++#endif /* CONFIG_SYSCTL */
++
++/*
++ . Function: smc_reset( struct device* dev )
++ . Purpose:
++ .	This sets the SMC91111 chip to its normal state, hopefully from whatever
++ .	mess that any other DOS driver has put it in.
++ .
++ . Maybe I should reset more registers to defaults in here?  SOFTRST  should
++ . do that for me.
++ .
++ . Method:
++ .	1.  send a SOFT RESET
++ .	2.  wait for it to finish
++ .	3.  enable autorelease mode
++ .	4.  reset the memory management unit
++ .	5.  clear all interrupts
++ .
++*/
++static void smc_reset( struct rtnet_device* dev )
++{
++	//struct smc_local *lp	= (struct smc_local *)dev->priv;
++	int	ioaddr = dev->base_addr;
++
++	PRINTK2("%s:smc_reset\n", dev->name);
++
++	/* This resets the registers mostly to defaults, but doesn't
++	   affect EEPROM.  That seems unnecessary */
++	SMC_SELECT_BANK( 0 );
++	outw( RCR_SOFTRST, ioaddr + RCR_REG );
++
++	/* Setup the Configuration Register */
++	/* This is necessary because the CONFIG_REG is not affected */
++	/* by a soft reset */
++
++	SMC_SELECT_BANK( 1 );
++	outw( CONFIG_DEFAULT, ioaddr + CONFIG_REG);
++
++	/* Setup for fast accesses if requested */
++	/* If the card/system can't handle it then there will */
++	/* be no recovery except for a hard reset or power cycle */
++
++	if (dev->dma)
++		outw( inw( ioaddr + CONFIG_REG ) | CONFIG_NO_WAIT,
++			ioaddr + CONFIG_REG );
++
++#ifdef POWER_DOWN
++	/* Release from possible power-down state */
++	/* Configuration register is not affected by Soft Reset */
++	SMC_SELECT_BANK( 1 );
++	outw( inw( ioaddr + CONFIG_REG ) | CONFIG_EPH_POWER_EN,
++		ioaddr + CONFIG_REG  );
++#endif
++
++	SMC_SELECT_BANK( 0 );
++
++	/* this should pause enough for the chip to be happy */
++	mdelay(10);
++
++	/* Disable transmit and receive functionality */
++	outw( RCR_CLEAR, ioaddr + RCR_REG );
++	outw( TCR_CLEAR, ioaddr + TCR_REG );
++
++	/* set the control register to automatically
++	   release successfully transmitted packets, to make the best
++	   use out of our limited memory */
++	SMC_SELECT_BANK( 1 );
++	outw( inw( ioaddr + CTL_REG ) | CTL_AUTO_RELEASE , ioaddr + CTL_REG );
++
++	/* Reset the MMU */
++	SMC_SELECT_BANK( 2 );
++	outw( MC_RESET, ioaddr + MMU_CMD_REG );
++
++	/* Note:  It doesn't seem that waiting for the MMU busy is needed here,
++	   but this is a place where future chipsets _COULD_ break.  Be wary
++	   of issuing another MMU command right after this */
++
++	/* Disable all interrupts */
++	outb( 0, ioaddr + IM_REG );
++}
++
++/*
++ . Function: smc_enable
++ . Purpose: let the chip talk to the outside work
++ . Method:
++ .	1.  Enable the transmitter
++ .	2.  Enable the receiver
++ .	3.  Enable interrupts
++*/
++static void smc_enable( struct rtnet_device *dev )
++{
++	unsigned short ioaddr	= dev->base_addr;
++	struct smc_local *lp	= (struct smc_local *)dev->priv;
++
++	PRINTK2("%s:smc_enable\n", dev->name);
++
++	SMC_SELECT_BANK( 0 );
++	/* see the header file for options in TCR/RCR DEFAULT*/
++	outw( lp->tcr_cur_mode, ioaddr + TCR_REG );
++	outw( lp->rcr_cur_mode, ioaddr + RCR_REG );
++
++	/* now, enable interrupts */
++	SMC_SELECT_BANK( 2 );
++	outb( SMC_INTERRUPT_MASK, ioaddr + IM_REG );
++}
++
++/*
++ . Function: smc_shutdown
++ . Purpose:  closes down the SMC91xxx chip.
++ . Method:
++ .	1. zero the interrupt mask
++ .	2. clear the enable receive flag
++ .	3. clear the enable xmit flags
++ .
++ . TODO:
++ .   (1) maybe utilize power down mode.
++ .	Why not yet?  Because while the chip will go into power down mode,
++ .	the manual says that it will wake up in response to any I/O requests
++ .	in the register space.   Empirical results do not show this working.
++*/
++static void smc_shutdown( int ioaddr )
++{
++	PRINTK2("CARDNAME:smc_shutdown\n");
++
++	/* no more interrupts for me */
++	SMC_SELECT_BANK( 2 );
++	outb( 0, ioaddr + IM_REG );
++
++	/* and tell the card to stay away from that nasty outside world */
++	SMC_SELECT_BANK( 0 );
++	outb( RCR_CLEAR, ioaddr + RCR_REG );
++	outb( TCR_CLEAR, ioaddr + TCR_REG );
++
++#ifdef POWER_DOWN
++	/* finally, shut the chip down */
++	SMC_SELECT_BANK( 1 );
++	outw( inw( ioaddr + CONFIG_REG ) & ~CONFIG_EPH_POWER_EN,
++		ioaddr + CONFIG_REG  );
++#endif
++}
++
++
++/*
++ . Function: smc_wait_to_send_packet( struct sk_buff * skb, struct device * )
++ . Purpose:
++ .    Attempt to allocate memory for a packet, if chip-memory is not
++ .    available, then tell the card to generate an interrupt when it
++ .    is available.
++ .
++ . Algorithm:
++ .
++ . o	if the saved_skb is not currently null, then drop this packet
++ .	on the floor.  This should never happen, because of TBUSY.
++ . o	if the saved_skb is null, then replace it with the current packet,
++ . o	See if I can sending it now.
++ . o	(NO): Enable interrupts and let the interrupt handler deal with it.
++ . o	(YES):Send it now.
++*/
++static int smc_wait_to_send_packet( struct rtskb * skb, struct rtnet_device * dev )
++{
++	struct smc_local *lp	= (struct smc_local *)dev->priv;
++	unsigned short ioaddr	= dev->base_addr;
++	word			length;
++	unsigned short		numPages;
++	word			time_out;
++	word			status;
++
++	PRINTK3("%s:smc_wait_to_send_packet\n", dev->name);
++
++	rtnetif_stop_queue(dev);
++
++	if ( lp->saved_skb) {
++		/* THIS SHOULD NEVER HAPPEN. */
++		lp->stats.tx_aborted_errors++;
++		rtdm_printk("%s: Bad Craziness - sent packet while busy.\n",
++			dev->name);
++		return 1;
++	}
++	lp->saved_skb = skb;
++
++	length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
++
++
++	/*
++	** The MMU wants the number of pages to be the number of 256 bytes
++	** 'pages', minus 1 ( since a packet can't ever have 0 pages :) )
++	**
++	** The 91C111 ignores the size bits, but the code is left intact
++	** for backwards and future compatibility.
++	**
++	** Pkt size for allocating is data length +6 (for additional status
++	** words, length and ctl!)
++	**
++	** If odd size then last byte is included in this header.
++	*/
++	numPages =   ((length & 0xfffe) + 6);
++	numPages >>= 8; // Divide by 256
++
++	if (numPages > 7 ) {
++		rtdm_printk("%s: Far too big packet error. \n", dev->name);
++		/* freeing the packet is a good thing here... but should
++		 . any packets of this size get down here?   */
++		kfree_rtskb(skb);
++		lp->saved_skb = NULL;
++		/* this IS an error, but, i don't want the skb saved */
++		rtnetif_wake_queue(dev);
++		return 0;
++	}
++	/* either way, a packet is waiting now */
++	lp->packets_waiting++;
++
++	/* now, try to allocate the memory */
++	SMC_SELECT_BANK( 2 );
++	outw( MC_ALLOC | numPages, ioaddr + MMU_CMD_REG );
++	/*
++	. Performance Hack
++	.
++	. wait a short amount of time.. if I can send a packet now, I send
++	. it now.  Otherwise, I enable an interrupt and wait for one to be
++	. available.
++	.
++	. I could have handled this a slightly different way, by checking to
++	. see if any memory was available in the FREE MEMORY register.  However,
++	. either way, I need to generate an allocation, and the allocation works
++	. no matter what, so I saw no point in checking free memory.
++	*/
++	time_out = MEMORY_WAIT_TIME;
++	do {
++		status = inb( ioaddr + INT_REG );
++		if ( status & IM_ALLOC_INT ) {
++			/* acknowledge the interrupt */
++			outb( IM_ALLOC_INT, ioaddr + INT_REG );
++			break;
++		}
++	} while ( -- time_out );
++
++	if ( !time_out ) {
++		kfree_rtskb(skb);
++		lp->saved_skb = NULL;
++		rtnetif_wake_queue(dev);
++
++		rtdm_printk("%s: ERROR: unable to allocate card memory for "
++			"packet transmission.\n", dev->name);
++		return 0;
++	}
++	/* or YES! I can send the packet now.. */
++	smc_hardware_send_packet(dev);
++	rtnetif_wake_queue(dev);
++	return 0;
++}
++
++/*
++ . Function:  smc_hardware_send_packet(struct device * )
++ . Purpose:
++ .	This sends the actual packet to the SMC9xxx chip.
++ .
++ . Algorithm:
++ .	First, see if a saved_skb is available.
++ .		( this should NOT be called if there is no 'saved_skb'
++ .	Now, find the packet number that the chip allocated
++ .	Point the data pointers at it in memory
++ .	Set the length word in the chip's memory
++ .	Dump the packet to chip memory
++ .	Check if a last byte is needed ( odd length packet )
++ .		if so, set the control flag right
++ .	Tell the card to send it
++ .	Enable the transmit interrupt, so I know if it failed
++ .	Free the kernel data if I actually sent it.
++*/
++static void smc_hardware_send_packet( struct rtnet_device * dev )
++{
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	byte			packet_no;
++	struct rtskb *	skb = lp->saved_skb;
++	word			length;
++	unsigned short		ioaddr;
++	void			* buf;
++	rtdm_lockctx_t		context;
++
++	PRINTK3("%s:smc_hardware_send_packet\n", dev->name);
++
++	ioaddr = dev->base_addr;
++
++	if ( !skb ) {
++		PRINTK("%s: In XMIT with no packet to send \n", dev->name);
++		return;
++	}
++	length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
++	buf = skb->data;
++
++	/* If I get here, I _know_ there is a packet slot waiting for me */
++	packet_no = inb( ioaddr + AR_REG );
++	if ( packet_no & AR_FAILED ) {
++		/* or isn't there?  BAD CHIP! */
++		rtdm_printk(KERN_DEBUG "%s: Memory allocation failed. \n",
++			dev->name);
++		kfree_rtskb(skb);
++		lp->saved_skb = NULL;
++		rtnetif_wake_queue(dev);
++		return;
++	}
++
++	/* we have a packet address, so tell the card to use it */
++	outb( packet_no, ioaddr + PN_REG );
++
++	/* point to the beginning of the packet */
++	outw( PTR_AUTOINC , ioaddr + PTR_REG );
++
++	PRINTK3("%s: Trying to xmit packet of length %x\n",
++		dev->name, length);
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2)
++	rtdm_printk("Transmitting Packet\n");
++	print_packet( buf, length );
++#endif
++
++	/* send the packet length ( +6 for status, length and ctl byte )
++	   and the status word ( set to zeros ) */
++#ifdef USE_32_BIT
++	outl(  (length +6 ) << 16 , ioaddr + DATA_REG );
++#else
++	outw( 0, ioaddr + DATA_REG );
++	/* send the packet length ( +6 for status words, length, and ctl*/
++	outb( (length+6) & 0xFF,ioaddr + DATA_REG );
++	outb( (length+6) >> 8 , ioaddr + DATA_REG );
++#endif
++
++	/* send the actual data
++	 . I _think_ it's faster to send the longs first, and then
++	 . mop up by sending the last word.  It depends heavily
++	 . on alignment, at least on the 486.  Maybe it would be
++	 . a good idea to check which is optimal?  But that could take
++	 . almost as much time as is saved?
++	*/
++#ifdef USE_32_BIT
++	outsl(ioaddr + DATA_REG, buf,  length >> 2 );
++	if ( length & 0x2  )
++		outw(*((word *)(buf + (length & 0xFFFFFFFC))),ioaddr +DATA_REG);
++#else
++	outsw(ioaddr + DATA_REG , buf, (length ) >> 1);
++#endif // USE_32_BIT
++
++	/* Send the last byte, if there is one.   */
++	if ( (length & 1) == 0 ) {
++		outw( 0, ioaddr + DATA_REG );
++	} else {
++		outb( ((char *)buf)[length -1 ], ioaddr + DATA_REG );
++		outb( 0x20, ioaddr + DATA_REG); // Set odd bit in CONTROL BYTE
++	}
++
++	rtdm_lock_irqsave(context);
++
++	/* get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp) {
++		nanosecs_abs_t xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++		/* point to the patch address */
++		outw(PTR_AUTOINC |
++			(4 + (char *)skb->xmit_stamp - (char *)skb->data),
++			ioaddr + PTR_REG);
++		/* we don't check alignments, we just write bytes */
++		outsb(ioaddr + DATA_REG, (char *)&xmit_stamp,
++			sizeof(xmit_stamp));
++	}
++
++	/* enable the interrupts */
++	SMC_ENABLE_INT( (IM_TX_INT | IM_TX_EMPTY_INT) );
++
++	/* and let the chipset deal with it */
++	outw( MC_ENQUEUE , ioaddr + MMU_CMD_REG );
++
++	rtdm_lock_irqrestore(context);
++
++	PRINTK2("%s: Sent packet of length %d \n", dev->name, length);
++
++	lp->saved_skb = NULL;
++	kfree_rtskb(skb);
++
++//	dev->trans_start = jiffies;
++
++	/* we can send another packet */
++	rtnetif_wake_queue(dev);
++
++
++	return;
++}
++
++/*-------------------------------------------------------------------------
++ |
++ | smc_init( struct device * dev )
++ |   Input parameters:
++ |	dev->base_addr == 0, try to find all possible locations
++ |	dev->base_addr == 1, return failure code
++ |	dev->base_addr == 2, always allocate space,  and return success
++ |	dev->base_addr == <anything else>   this is the address to check
++ |
++ |   Output:
++ |	0 --> there is a device
++ |	anything else, error
++ |
++ ---------------------------------------------------------------------------
++*/
++int __init smc_init(struct rtnet_device *dev)
++{
++	int i;
++	int base_addr = dev ? dev->base_addr : 0;
++
++	PRINTK2("CARDNAME:smc_init\n");
++
++	/*  try a specific location */
++	if (base_addr > 0x1ff)
++		return smc_probe(dev, base_addr);
++	else if ( 0 != base_addr )
++			return -ENXIO;
++
++	/* check every ethernet address */
++	for (i = 0; smc_portlist[i]; i++)
++		if ( smc_probe(dev,smc_portlist[i]) ==0)
++			return 0;
++
++	/* couldn't find anything */
++	return -ENODEV;
++}
++
++
++#ifndef NO_AUTOPROBE
++/*----------------------------------------------------------------------
++ . smc_findirq
++ .
++ . This routine has a simple purpose -- make the SMC chip generate an
++ . interrupt, so an auto-detect routine can detect it, and find the IRQ,
++ ------------------------------------------------------------------------
++*/
++int __init smc_findirq( int ioaddr )
++{
++	int	timeout = 20;
++	unsigned long cookie;
++
++	PRINTK2("CARDNAME:smc_findirq\n");
++
++	/* I have to do a STI() here, because this is called from
++	   a routine that does an CLI during this process, making it
++	   rather difficult to get interrupts for auto detection */
++	local_irq_enable();
++
++	cookie = probe_irq_on();
++
++	/*
++	 * What I try to do here is trigger an ALLOC_INT. This is done
++	 * by allocating a small chunk of memory, which will give an interrupt
++	 * when done.
++	 */
++
++
++	SMC_SELECT_BANK(2);
++	/* enable ALLOCation interrupts ONLY */
++	outb( IM_ALLOC_INT, ioaddr + IM_REG );
++
++	/*
++	 . Allocate 512 bytes of memory.  Note that the chip was just
++	 . reset so all the memory is available
++	*/
++	outw( MC_ALLOC | 1, ioaddr + MMU_CMD_REG );
++
++	/*
++	 . Wait until positive that the interrupt has been generated
++	*/
++	while ( timeout ) {
++		byte	int_status;
++
++		int_status = inb( ioaddr + INT_REG );
++
++		if ( int_status & IM_ALLOC_INT )
++			break;		/* got the interrupt */
++		timeout--;
++	}
++
++	/* there is really nothing that I can do here if timeout fails,
++	   as autoirq_report will return a 0 anyway, which is what I
++	   want in this case.   Plus, the clean up is needed in both
++	   cases.  */
++
++	/* DELAY HERE!
++	   On a fast machine, the status might change before the interrupt
++	   is given to the processor.  This means that the interrupt was
++	   never detected, and autoirq_report fails to report anything.
++	   This should fix autoirq_* problems.
++	*/
++	mdelay(10);
++
++	/* and disable all interrupts again */
++	outb( 0, ioaddr + IM_REG );
++
++	/* clear hardware interrupts again, because that's how it
++	   was when I was called... */
++	local_irq_disable();
++
++	/* and return what I found */
++	return probe_irq_off(cookie);
++}
++#endif
++
++/*----------------------------------------------------------------------
++ . Function: smc_probe( int ioaddr )
++ .
++ . Purpose:
++ .	Tests to see if a given ioaddr points to an SMC91111 chip.
++ .	Returns a 0 on success
++ .
++ . Algorithm:
++ .	(1) see if the high byte of BANK_SELECT is 0x33
++ .	(2) compare the ioaddr with the base register's address
++ .	(3) see if I recognize the chip ID in the appropriate register
++ .
++ .---------------------------------------------------------------------
++ */
++/*---------------------------------------------------------------
++ . Here I do typical initialization tasks.
++ .
++ . o  Initialize the structure if needed
++ . o  print out my vanity message if not done so already
++ . o  print out what type of hardware is detected
++ . o  print out the ethernet address
++ . o  find the IRQ
++ . o  set up my private data
++ . o  configure the dev structure with my subroutines
++ . o  actually GRAB the irq.
++ . o  GRAB the region
++ .-----------------------------------------------------------------*/
++
++static int __init smc_probe(struct rtnet_device *dev, int ioaddr )
++{
++	int i, memory, retval;
++	static unsigned version_printed = 0;
++	unsigned int	bank;
++
++	const char *version_string;
++
++	/*registers */
++	word	revision_register;
++	word	base_address_register;
++	word	memory_info_register;
++	/*=> Pramod */
++	struct smc_local *lp;
++	/*<= Pramod */
++
++	PRINTK2("CARDNAME:smc_probe\n");
++
++	/* Grab the region so that no one else tries to probe our ioports. */
++	if (!request_region(ioaddr, SMC_IO_EXTENT, dev->name)) return -EBUSY;
++
++	/* First, see if the high byte is 0x33 */
++	bank = inw( ioaddr + BANK_SELECT );
++	if ( (bank & 0xFF00) != 0x3300 ) return -ENODEV;
++
++	/* The above MIGHT indicate a device, but I need to write to further test this.  */
++	outw( 0x0, ioaddr + BANK_SELECT );
++	bank = inw( ioaddr + BANK_SELECT );
++	if ( (bank & 0xFF00 ) != 0x3300 )
++	{
++		retval = -ENODEV;
++		goto err_out;
++	}
++
++	/* well, we've already written once, so hopefully another time won't
++	   hurt.  This time, I need to switch the bank register to bank 1,
++	   so I can access the base address register */
++	SMC_SELECT_BANK(1);
++	base_address_register = inw( ioaddr + BASE_REG );
++	if ( ioaddr != ( base_address_register >> 3 & 0x3E0 ) )
++	{
++		printk("CARDNAME: IOADDR %x doesn't match configuration (%x)."
++			"Probably not a SMC chip\n",
++			ioaddr, base_address_register >> 3 & 0x3E0 );
++		/* well, the base address register didn't match.  Must not have
++		   been a SMC chip after all. */
++		retval = -ENODEV;
++		goto err_out;
++	}
++
++	/*  check if the revision register is something that I recognize.
++	    These might need to be added to later, as future revisions
++	    could be added.  */
++	SMC_SELECT_BANK(3);
++	revision_register  = inw( ioaddr + REV_REG );
++	if ( !chip_ids[ ( revision_register  >> 4 ) & 0xF  ] )
++	{
++		/* I don't recognize this chip, so... */
++		printk("CARDNAME: IO %x: Unrecognized revision register:"
++			" %x, Contact author. \n",
++			ioaddr, revision_register );
++		retval =  -ENODEV;
++		goto err_out;
++	}
++
++	/* at this point I'll assume that the chip is an SMC9xxx.
++	   It might be prudent to check a listing of MAC addresses
++	   against the hardware address, or do some other tests. */
++
++	if (version_printed++ == 0)
++		printk("%s", version);
++
++	/* fill in some of the fields */
++	dev->base_addr = ioaddr;
++
++	/*
++	 . Get the MAC address ( bank 1, regs 4 - 9 )
++	*/
++	SMC_SELECT_BANK( 1 );
++	for ( i = 0; i < 6; i += 2 )
++	{
++		word	address;
++
++		address = inw( ioaddr + ADDR0_REG + i  );
++		dev->dev_addr[ i + 1] = address >> 8;
++		dev->dev_addr[ i ] = address & 0xFF;
++	}
++
++	/* get the memory information */
++
++	SMC_SELECT_BANK( 0 );
++	memory_info_register = inw( ioaddr + MIR_REG );
++	memory = memory_info_register & (word)0x00ff;
++	memory *= LAN91C111_MEMORY_MULTIPLIER;
++
++	/*
++	 Now, I want to find out more about the chip.  This is sort of
++	 redundant, but it's cleaner to have it in both, rather than having
++	 one VERY long probe procedure.
++	*/
++	SMC_SELECT_BANK(3);
++	revision_register  = inw( ioaddr + REV_REG );
++	version_string = chip_ids[ ( revision_register  >> 4 ) & 0xF  ];
++	if ( !version_string )
++	{
++		/* I shouldn't get here because this call was done before.... */
++		retval =  -ENODEV;
++		goto err_out;
++	}
++
++	/* now, reset the chip, and put it into a known state */
++	smc_reset( dev );
++
++	/*
++	 . If dev->irq is 0, then the device has to be banged on to see
++	 . what the IRQ is.
++	 .
++	 . This banging doesn't always detect the IRQ, for unknown reasons.
++	 . a workaround is to reset the chip and try again.
++	 .
++	 . Interestingly, the DOS packet driver *SETS* the IRQ on the card to
++	 . be what is requested on the command line.   I don't do that, mostly
++	 . because the card that I have uses a non-standard method of accessing
++	 . the IRQs, and because this _should_ work in most configurations.
++	 .
++	 . Specifying an IRQ is done with the assumption that the user knows
++	 . what (s)he is doing.  No checking is done!!!!
++	 .
++	*/
++	if ( dev->irq < 2 ) {
++		int	trials;
++
++		trials = 3;
++		while ( trials-- ) {
++			dev->irq = smc_findirq( ioaddr );
++			if ( dev->irq )
++				break;
++			/* kick the card and try again */
++			smc_reset( dev );
++		}
++	}
++	if (dev->irq == 0 ) {
++		printk("%s: Couldn't autodetect your IRQ. Use irq=xx.\n",
++			dev->name);
++		retval =  -ENODEV;
++		goto err_out;
++	}
++
++	if (dev->irq == 2) {
++		/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
++		 * or don't know which one to set.
++		 */
++		dev->irq = 9;
++	}
++
++	/* now, print out the card info, in a short format.. */
++
++	printk("%s: %s(rev:%d) at %#3x IRQ:%d MEMSIZE:%db NOWAIT:%d ",
++		dev->name,
++		version_string, revision_register & 0xF, ioaddr, dev->irq,
++		memory, dev->dma);
++	/*
++	 . Print the Ethernet address
++	*/
++	printk("ADDR: ");
++	for (i = 0; i < 5; i++)
++		printk("%2.2x:", dev->dev_addr[i] );
++	printk("%2.2x \n", dev->dev_addr[5] );
++
++
++	/* Initialize the private structure. */
++	/*if (dev->priv == NULL) {
++		dev->priv = kmalloc(sizeof(struct smc_local), GFP_KERNEL);
++		if (dev->priv == NULL) {
++			retval = -ENOMEM;
++			goto err_out;
++		}
++	}*/
++	/* set the private data to zero by default */
++	memset(dev->priv, 0, sizeof(struct smc_local));
++
++	/* Fill in the fields of the device structure with ethernet values. */
++//	ether_setup(dev);
++
++	rt_stack_connect(dev, &STACK_manager);
++
++	/* Grab the IRQ */
++    retval = rtdm_irq_request(&((struct smc_local *)dev->priv)->irq_handle,
++			      dev->irq, &smc_interrupt, 0,
++			      "rt_smx91111", dev);
++    if (retval) {
++	  printk("%s: unable to get IRQ %d (irqval=%d).\n",
++		dev->name, dev->irq, retval);
++		//kfree (dev->priv);
++		//dev->priv = NULL;
++		goto err_out;
++	}
++
++	dev->open			= smc_open;
++	dev->stop			= smc_close;
++	dev->hard_start_xmit	= smc_wait_to_send_packet;
++	dev->get_stats			= smc_query_statistics;
++//	dev->tx_timeout			= smc_timeout;
++#ifdef	HAVE_MULTICAST
++//	dev->set_multicast_list		= &smc_set_multicast_list;
++#endif
++
++	/* => Store the ChipRevision and ChipID, to be used in resolving the Odd-Byte issue in RevB of LAN91C111; Pramod */
++	SMC_SELECT_BANK(3);
++	revision_register  = inw( ioaddr + REV_REG );
++	lp = (struct smc_local *)dev->priv;
++	lp->ChipID = (revision_register >> 4) & 0xF;
++	lp->ChipRev = revision_register & 0xF;
++
++	return 0;
++
++err_out:
++	release_region (ioaddr, SMC_IO_EXTENT);
++	return retval;
++}
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2)
++static void print_packet( byte * buf, int length )
++{
++	int i;
++	int remainder;
++	int lines;
++
++	rtdm_printk("Packet of length %d \n", length );
++
++#if SMC_DEBUG > 3
++	lines = length / 16;
++	remainder = length % 16;
++
++	for ( i = 0; i < lines ; i ++ ) {
++		int cur;
++
++		for ( cur = 0; cur < 8; cur ++ ) {
++			byte a, b;
++
++			a = *(buf ++ );
++			b = *(buf ++ );
++			rtdm_printk("%02x%02x ", a, b );
++		}
++		rtdm_printk("\n");
++	}
++	for ( i = 0; i < remainder/2 ; i++ ) {
++		byte a, b;
++
++		a = *(buf ++ );
++		b = *(buf ++ );
++		rtdm_printk("%02x%02x ", a, b );
++	}
++	rtdm_printk("\n");
++#endif
++}
++#endif
++
++
++/*
++ * Open and Initialize the board
++ *
++ * Set up everything, reset the card, etc ..
++ *
++ */
++static int smc_open(struct rtnet_device *dev)
++{
++	struct smc_local *lp	= (struct smc_local *)dev->priv;
++	int	ioaddr = dev->base_addr;
++	int	i;	/* used to set hw ethernet address */
++
++	PRINTK2("%s:smc_open\n", dev->name);
++
++	/* clear out all the junk that was put here before... */
++	memset(dev->priv, 0, (size_t)&((struct smc_local *)0)->irq_handle);
++
++	rtnetif_start_queue(dev);
++
++	// Setup the default Register Modes
++	lp->tcr_cur_mode = TCR_DEFAULT;
++	lp->rcr_cur_mode = RCR_DEFAULT;
++	lp->rpc_cur_mode = RPC_DEFAULT;
++
++#ifdef DISABLED____CONFIG_SYSCTL
++	// Set default parameters (files)
++	lp->ctl_swfdup = 0;
++	lp->ctl_ephloop = 0;
++	lp->ctl_miiop = 0;
++	lp->ctl_autoneg = 1;
++	lp->ctl_rfduplx = 1;
++	lp->ctl_rspeed = 100;
++	lp->ctl_afduplx = 1;
++	lp->ctl_aspeed = 100;
++	lp->ctl_lnkfail = 1;
++	lp->ctl_forcol = 0;
++	lp->ctl_filtcar = 0;
++#endif /* CONFIG_SYSCTL */
++
++	/* reset the hardware */
++
++	smc_reset( dev );
++	smc_enable( dev );
++
++	/* Configure the PHY */
++	smc_phy_configure(dev);
++
++	smc_set_multicast_list(dev);
++
++	/*
++		According to Becker, I have to set the hardware address
++		at this point, because the (l)user can set it with an
++		ioctl.  Easily done...
++	*/
++	SMC_SELECT_BANK( 1 );
++	for ( i = 0; i < 6; i += 2 ) {
++		word	address;
++
++		address = dev->dev_addr[ i + 1 ] << 8 ;
++		address  |= dev->dev_addr[ i ];
++		outw( address, ioaddr + ADDR0_REG + i );
++	}
++
++#ifdef DISABLED____CONFIG_SYSCTL
++	smc_sysctl_register(dev);
++#endif /* CONFIG_SYSCTL */
++
++	rtnetif_start_queue(dev);
++	return 0;
++}
++
++/*-------------------------------------------------------------
++ .
++ . smc_rcv -  receive a packet from the card
++ .
++ . There is ( at least ) a packet waiting to be read from
++ . chip-memory.
++ .
++ . o Read the status
++ . o If an error, record it
++ . o otherwise, read in the packet
++ --------------------------------------------------------------
++*/
++static inline void smc_rcv(struct rtnet_device *dev)
++{
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	int	ioaddr = dev->base_addr;
++	int	packet_number;
++	word	status;
++	word	packet_length;
++	nanosecs_abs_t	time_stamp = rtdm_clock_read();
++	int		timeout;
++
++	PRINTK3("%s:smc_rcv\n", dev->name);
++
++	/* assume bank 2 */
++
++	packet_number = inw( ioaddr + RXFIFO_REG );
++
++	if ( packet_number & RXFIFO_REMPTY ) {
++
++		/* we got called , but nothing was on the FIFO */
++		PRINTK("%s: WARNING: smc_rcv with nothing on FIFO. \n",
++			dev->name);
++		/* don't need to restore anything */
++		return;
++	}
++
++	/*  start reading from the start of the packet */
++	outw( PTR_READ | PTR_RCV | PTR_AUTOINC, ioaddr + PTR_REG );
++	inw( ioaddr + MMU_CMD_REG ); /* min delay to avoid errors... */
++
++	/* First two words are status and packet_length */
++	status		= inw( ioaddr + DATA_REG );
++	packet_length	= inw( ioaddr + DATA_REG );
++
++	packet_length &= 0x07ff;  /* mask off top bits */
++
++	PRINTK2("RCV: STATUS %4x LENGTH %4x\n", status, packet_length );
++
++	if ( !(status & RS_ERRORS ) ){
++		/* do stuff to make a new packet */
++		struct rtskb  * skb;
++		void		* data;
++
++		/* set multicast stats */
++		if ( status & RS_MULTICAST )
++			lp->stats.multicast++;
++
++		// Allocate enough memory for entire receive frame, to be safe
++		skb = rtnetdev_alloc_rtskb(dev, packet_length);
++
++		/* Adjust for having already read the first two words */
++		packet_length -= 4;
++
++		if ( skb == NULL ) {
++			rtdm_printk(KERN_NOTICE "%s: Low memory, packet dropped.\n",
++				dev->name);
++			lp->stats.rx_dropped++;
++			goto done;
++		}
++
++		/*
++		 ! This should work without alignment, but it could be
++		 ! in the worse case
++		*/
++		/* TODO: Should I use 32bit alignment here ? */
++		rtskb_reserve( skb, 2 );   /* 16 bit alignment */
++
++		/* =>
++    ODD-BYTE ISSUE : The odd byte problem has been fixed in the LAN91C111 Rev B.
++		So we check if the Chip Revision, stored in smsc_local->ChipRev, is = 1.
++		If so then we increment the packet length only if RS_ODDFRAME is set.
++		If the Chip's revision is equal to 0, then we blindly increment the packet length
++		by 1, thus always assuming that the packet is odd length, leaving the higher layer
++		to decide the actual length.
++			-- Pramod
++		<= */
++		if ((9 == lp->ChipID) && (1 == lp->ChipRev))
++		{
++			if (status & RS_ODDFRAME)
++				data = rtskb_put( skb, packet_length + 1 );
++			else
++				data = rtskb_put( skb, packet_length);
++
++		}
++		else
++		{
++			// set odd length for bug in LAN91C111, REV A
++			// which never sets RS_ODDFRAME
++			data = rtskb_put( skb, packet_length + 1 );
++		}
++
++#ifdef USE_32_BIT
++		PRINTK3(" Reading %d dwords (and %d bytes) \n",
++			packet_length >> 2, packet_length & 3 );
++		/* QUESTION:  Like in the TX routine, do I want
++		   to send the DWORDs or the bytes first, or some
++		   mixture.  A mixture might improve already slow PIO
++		   performance  */
++		insl(ioaddr + DATA_REG , data, packet_length >> 2 );
++		/* read the left over bytes */
++		insb( ioaddr + DATA_REG, data + (packet_length & 0xFFFFFC),
++			packet_length & 0x3  );
++#else
++		PRINTK3(" Reading %d words and %d byte(s) \n",
++			(packet_length >> 1 ), packet_length & 1 );
++		insw(ioaddr + DATA_REG , data, packet_length >> 1);
++
++#endif // USE_32_BIT
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2)
++		rtdm_printk("Receiving Packet\n");
++		print_packet( data, packet_length );
++#endif
++
++		skb->protocol = rt_eth_type_trans(skb, dev );
++		skb->time_stamp = time_stamp;
++		rtnetif_rx(skb);
++		lp->stats.rx_packets++;
++	} else {
++		/* error ... */
++		lp->stats.rx_errors++;
++
++		if ( status & RS_ALGNERR )  lp->stats.rx_frame_errors++;
++		if ( status & (RS_TOOSHORT | RS_TOOLONG ) )
++			lp->stats.rx_length_errors++;
++		if ( status & RS_BADCRC)	lp->stats.rx_crc_errors++;
++	}
++
++	timeout = MMU_CMD_TIMEOUT;
++	while ( inw( ioaddr + MMU_CMD_REG ) & MC_BUSY ) {
++		rtdm_task_busy_sleep(1000); // Wait until not busy
++		if (--timeout == 0) {
++			rtdm_printk("%s: ERROR: timeout while waiting on MMU.\n",
++				dev->name);
++			break;
++		}
++	}
++done:
++	/*  error or good, tell the card to get rid of this packet */
++	outw( MC_RELEASE, ioaddr + MMU_CMD_REG );
++
++	return;
++}
++
++/*--------------------------------------------------------------------
++ .
++ . This is the main routine of the driver, to handle the net_device when
++ . it needs some attention.
++ .
++ . So:
++ .   first, save state of the chipset
++ .   branch off into routines to handle each case, and acknowledge
++ .	    each to the interrupt register
++ .   and finally restore state.
++ .
++ ---------------------------------------------------------------------*/
++static int smc_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *dev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int ioaddr		= dev->base_addr;
++	struct smc_local *lp	= (struct smc_local *)dev->priv;
++
++	byte	status;
++	word	card_stats;
++	byte	mask;
++	int	timeout;
++	/* state registers */
++	word	saved_bank;
++	word	saved_pointer;
++
++	unsigned int old_packet_cnt = lp->stats.rx_packets;
++
++
++
++	PRINTK3("%s: SMC interrupt started \n", dev->name);
++
++/*	if (dev == NULL) {
++		rtdm_printk(KERN_WARNING "%s: irq %d for unknown device.\n",
++			dev->name, irq);
++		return;
++	}*/
++
++/* will Linux let this happen ??  If not, this costs some speed
++	if ( dev->interrupt ) {
++		printk(KERN_WARNING "%s: interrupt inside interrupt.\n",
++			dev->name);
++		return;
++	}
++
++	dev->interrupt = 1; */
++
++	saved_bank = inw( ioaddr + BANK_SELECT );
++
++	SMC_SELECT_BANK(2);
++	saved_pointer = inw( ioaddr + PTR_REG );
++
++	/* read the interrupt status register */
++	mask = inb( ioaddr + IM_REG );
++
++	/* disable all interrupts */
++	outb( 0, ioaddr + IM_REG );
++
++	/*
++	 * The packet reception will take some time (up to several hundred us).
++	 * Re-enable other irqs now so that no critical deadline will be missed.
++	 */
++	hard_local_irq_enable();
++
++	/* set a timeout value, so I don't stay here forever */
++	timeout = 4;
++
++	PRINTK2(KERN_WARNING "%s: MASK IS %x \n", dev->name, mask);
++	do {
++		/* read the status flag, and mask it */
++		status = inb( ioaddr + INT_REG ) & mask;
++		if (!status )
++			break;
++
++		PRINTK3(KERN_WARNING "%s: Handling interrupt status %x \n",
++			dev->name, status);
++
++		if (status & IM_RCV_INT) {
++			/* Got a packet(s). */
++			PRINTK2(KERN_WARNING
++				"%s: Receive Interrupt\n", dev->name);
++			smc_rcv(dev);
++		} else if (status & IM_TX_INT ) {
++			rtdm_printk(KERN_ERR "%s: TX ERROR!\n", dev->name);
++			//smc_tx(dev);
++			// Acknowledge the interrupt
++			outb(IM_TX_INT, ioaddr + INT_REG );
++		} else if (status & IM_TX_EMPTY_INT ) {
++			/* update stats */
++			SMC_SELECT_BANK( 0 );
++			card_stats = inw( ioaddr + COUNTER_REG );
++			/* single collisions */
++			lp->stats.collisions += card_stats & 0xF;
++			card_stats >>= 4;
++			/* multiple collisions */
++			lp->stats.collisions += card_stats & 0xF;
++
++			/* these are for when linux supports these statistics */
++			SMC_SELECT_BANK( 2 );
++			PRINTK2(KERN_WARNING "%s: TX_BUFFER_EMPTY handled\n",
++				dev->name);
++			// Acknowledge the interrupt
++			outb( IM_TX_EMPTY_INT, ioaddr + INT_REG );
++			mask &= ~IM_TX_EMPTY_INT;
++			lp->stats.tx_packets += lp->packets_waiting;
++			lp->packets_waiting = 0;
++
++		} else if (status & IM_ALLOC_INT ) {
++			PRINTK2(KERN_DEBUG "%s: Allocation interrupt \n",
++				dev->name);
++			/* clear this interrupt so it doesn't happen again */
++			mask &= ~IM_ALLOC_INT;
++
++		} else if (status & IM_RX_OVRN_INT ) {
++			lp->stats.rx_errors++;
++			lp->stats.rx_fifo_errors++;
++			// Acknowledge the interrupt
++			outb( IM_RX_OVRN_INT, ioaddr + INT_REG );
++		} else if (status & IM_EPH_INT ) {
++			PRINTK("%s: UNSUPPORTED: EPH INTERRUPT \n",
++				dev->name);
++		} else if (status & IM_MDINT ) {
++			//smc_phy_interrupt(dev);
++			PRINTK("%s: UNSUPPORTED: MD INTERRUPT \n",
++				dev->name);
++			// Acknowledge the interrupt
++			outb(IM_MDINT, ioaddr + INT_REG );
++		} else if (status & IM_ERCV_INT ) {
++			PRINTK("%s: UNSUPPORTED: ERCV INTERRUPT \n",
++				dev->name);
++			// Acknowledge the interrupt
++			outb( IM_ERCV_INT, ioaddr + INT_REG );
++		}
++	} while ( timeout -- );
++
++
++	/* restore register states */
++
++	SMC_SELECT_BANK( 2 );
++
++	outb( mask, ioaddr + IM_REG );
++
++	PRINTK3( KERN_WARNING "%s: MASK is now %x \n", dev->name, mask);
++	outw( saved_pointer, ioaddr + PTR_REG );
++
++	SMC_SELECT_BANK( saved_bank );
++
++	if (old_packet_cnt != lp->stats.rx_packets)
++		rt_mark_stack_mgr(dev);
++
++	hard_local_irq_disable();
++
++	//dev->interrupt = 0;
++	PRINTK3("%s: Interrupt done\n", dev->name);
++	return RTDM_IRQ_HANDLED;
++}
++
++
++/*----------------------------------------------------
++ . smc_close
++ .
++ . this makes the board clean up everything that it can
++ . and not talk to the outside world.   Caused by
++ . an 'ifconfig ethX down'
++ .
++ -----------------------------------------------------*/
++static int smc_close(struct rtnet_device *dev)
++{
++	rtnetif_stop_queue(dev);
++	//dev->start = 0;
++
++	PRINTK2("%s:smc_close\n", dev->name);
++
++#ifdef DISABLED____CONFIG_SYSCTL
++	smc_sysctl_unregister(dev);
++#endif /* CONFIG_SYSCTL */
++
++	/* clear everything */
++	smc_shutdown( dev->base_addr );
++
++	/* Update the statistics here. */
++
++	return 0;
++}
++
++/*------------------------------------------------------------
++ . Get the current statistics.
++ . This may be called with the card open or closed.
++ .-------------------------------------------------------------*/
++static struct net_device_stats* smc_query_statistics(struct rtnet_device *rtdev)
++{
++	struct smc_local *lp = (struct smc_local *)rtdev->priv;
++
++	PRINTK2("%s:smc_query_statistics\n", rtdev->name);
++
++	return &lp->stats;
++}
++
++/*-----------------------------------------------------------
++ . smc_set_multicast_list
++ .
++ . This routine will, depending on the values passed to it,
++ . either make it accept multicast packets, go into
++ . promiscuous mode ( for TCPDUMP and cousins ) or accept
++ . a select set of multicast packets
++*/
++static void smc_set_multicast_list(struct rtnet_device *dev)
++{
++	short ioaddr = dev->base_addr;
++
++	PRINTK2("%s:smc_set_multicast_list\n", dev->name);
++
++	SMC_SELECT_BANK(0);
++	if ( dev->flags & IFF_PROMISC )
++		{
++		PRINTK2("%s:smc_set_multicast_list:RCR_PRMS\n", dev->name);
++		outw( inw(ioaddr + RCR_REG ) | RCR_PRMS, ioaddr + RCR_REG );
++		}
++
++/* BUG?  I never disable promiscuous mode if multicasting was turned on.
++   Now, I turn off promiscuous mode, but I don't do anything to multicasting
++   when promiscuous mode is turned on.
++*/
++
++	/* Here, I am setting this to accept all multicast packets.
++	   I don't need to zero the multicast table, because the flag is
++	   checked before the table is
++	*/
++	else if (dev->flags & IFF_ALLMULTI)
++		{
++		outw( inw(ioaddr + RCR_REG ) | RCR_ALMUL, ioaddr + RCR_REG );
++		PRINTK2("%s:smc_set_multicast_list:RCR_ALMUL\n", dev->name);
++		}
++
++	else  {
++		PRINTK2("%s:smc_set_multicast_list:~(RCR_PRMS|RCR_ALMUL)\n",
++			dev->name);
++		outw( inw( ioaddr + RCR_REG ) & ~(RCR_PRMS | RCR_ALMUL),
++			ioaddr + RCR_REG );
++
++		/*
++		  since I'm disabling all multicast entirely, I need to
++		  clear the multicast list
++		*/
++		SMC_SELECT_BANK( 3 );
++		outw( 0, ioaddr + MCAST_REG1 );
++		outw( 0, ioaddr + MCAST_REG2 );
++		outw( 0, ioaddr + MCAST_REG3 );
++		outw( 0, ioaddr + MCAST_REG4 );
++	}
++}
++
++#ifdef MODULE
++
++static struct rtnet_device *devSMC91111;
++int io = 0;
++int irq = 0;
++int nowait = 0;
++
++module_param(io, int, 0444);
++module_param(irq, int, 0444);
++module_param(nowait, int, 0444);
++
++/*------------------------------------------------------------
++ . Module initialization function
++ .-------------------------------------------------------------*/
++int __init init_module(void)
++{
++	int result;
++
++	PRINTK2("CARDNAME:init_module\n");
++	if (io == 0)
++		printk(KERN_WARNING
++		CARDNAME": You shouldn't use auto-probing with insmod!\n" );
++
++	devSMC91111 = rt_alloc_etherdev(sizeof(struct smc_local), 4 * 2 + 1);
++	if (devSMC91111 == NULL) {
++		printk (KERN_ERR "init_ethernet failed\n");
++		return -ENODEV;
++	}
++	rtdev_alloc_name(devSMC91111, "rteth%d");
++	rt_rtdev_connect(devSMC91111, &RTDEV_manager);
++	devSMC91111->vers = RTDEV_VERS_2_0;
++
++	/* copy the parameters from insmod into the device structure */
++	devSMC91111->base_addr	= io;
++	devSMC91111->irq		= irq;
++	devSMC91111->dma		= nowait; // Use DMA field for nowait
++	if ((result = smc_init(devSMC91111)) != 0)
++		return result;
++
++	if ((result = rt_register_rtnetdev(devSMC91111)) != 0) {
++		rt_rtdev_disconnect(devSMC91111);
++		release_region(devSMC91111->base_addr, SMC_IO_EXTENT);
++
++		rtdm_irq_free(&((struct smc_local *)devSMC91111)->irq_handle);
++
++		rtdev_free(devSMC91111);
++
++		return result;
++	}
++
++	return 0;
++}
++
++/*------------------------------------------------------------
++ . Cleanup when module is removed with rmmod
++ .-------------------------------------------------------------*/
++void __exit cleanup_module(void)
++{
++	/* No need to check MOD_IN_USE, as sys_delete_module() checks. */
++	rt_unregister_rtnetdev(devSMC91111);
++	rt_rtdev_disconnect(devSMC91111);
++
++	release_region(devSMC91111->base_addr, SMC_IO_EXTENT);
++
++	if (devSMC91111->priv) {
++		rtdm_irq_free(&((struct smc_local *)devSMC91111->priv)->irq_handle);
++	}
++
++	rtdev_free(devSMC91111);
++}
++
++#endif /* MODULE */
++
++
++#ifdef DISABLED____CONFIG_SYSCTL
++
++
++/*------------------------------------------------------------
++ . Modify a bit in the LAN91C111 register set
++ .-------------------------------------------------------------*/
++static word smc_modify_regbit(int bank, int ioaddr, int reg,
++	unsigned int bit, int val)
++{
++	word regval;
++
++	SMC_SELECT_BANK( bank );
++
++	regval = inw( ioaddr+reg );
++	if (val)
++		regval |= bit;
++	else
++		regval &= ~bit;
++
++	outw( regval, ioaddr );
++	return(regval);
++}
++
++
++/*------------------------------------------------------------
++ . Retrieve a bit in the LAN91C111 register set
++ .-------------------------------------------------------------*/
++static int smc_get_regbit(int bank, int ioaddr, int reg, unsigned int bit)
++{
++	SMC_SELECT_BANK( bank );
++	if ( inw( ioaddr+reg ) & bit)
++		return(1);
++	else
++		return(0);
++}
++
++
++/*------------------------------------------------------------
++ . Modify a LAN91C111 register (word access only)
++ .-------------------------------------------------------------*/
++static void smc_modify_reg(int bank, int ioaddr, int reg, word val)
++{
++	SMC_SELECT_BANK( bank );
++	outw( val, ioaddr+reg );
++}
++
++
++/*------------------------------------------------------------
++ . Retrieve a LAN91C111 register (word access only)
++ .-------------------------------------------------------------*/
++static int smc_get_reg(int bank, int ioaddr, int reg)
++{
++	SMC_SELECT_BANK( bank );
++	return(inw( ioaddr+reg ));
++}
++
++
++static const char smc_info_string[] =
++"\n"
++"info           Provides this information blurb\n"
++"swver          Prints the software version information of this driver\n"
++"autoneg        Auto-negotiate Mode = 1\n"
++"rspeed         Requested Speed, 100=100Mbps, 10=10Mpbs\n"
++"rfduplx        Requested Full Duplex Operation\n"
++"aspeed         Actual Speed, 100=100Mbps, 10=10Mpbs\n"
++"afduplx        Actual Full Duplex Operation\n"
++"lnkfail        PHY Link Failure when 1\n"
++"miiop          External MII when 1, Internal PHY when 0\n"
++"swfdup         Switched Full Duplex Mode (allowed only in MII operation)\n"
++"ephloop        EPH Block Loopback\n"
++"forcol         Force a collision\n"
++"filtcar        Filter leading edge of carrier sense for 12 bit times\n"
++"freemem        Free buffer memory in bytes\n"
++"totmem         Total buffer memory in bytes\n"
++"leda           Output of LED-A (green)\n"
++"ledb           Output of LED-B (yellow)\n"
++"chiprev        Revision ID of the LAN91C111 chip\n"
++"";
++
++/*------------------------------------------------------------
++ . Sysctl handler for all integer parameters
++ .-------------------------------------------------------------*/
++static int smc_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
++				void *buffer, size_t *lenp, loff_t *ppos)
++{
++	struct rtnet_device *dev = (struct rtnet_device*)ctl->extra1;
++	struct smc_local *lp = (struct smc_local *)ctl->extra2;
++	int ioaddr = dev->base_addr;
++	int *valp = ctl->data;
++	int val;
++	int ret;
++
++	// Update parameters from the real registers
++	switch (ctl->ctl_name)
++	{
++	case CTL_SMC_FORCOL:
++		*valp = smc_get_regbit(0, ioaddr, TCR_REG, TCR_FORCOL);
++		break;
++
++	case CTL_SMC_FREEMEM:
++		*valp = ( (word)smc_get_reg(0, ioaddr, MIR_REG) >> 8 )
++			* LAN91C111_MEMORY_MULTIPLIER;
++		break;
++
++
++	case CTL_SMC_TOTMEM:
++		*valp = ( smc_get_reg(0, ioaddr, MIR_REG) & (word)0x00ff )
++			* LAN91C111_MEMORY_MULTIPLIER;
++		break;
++
++	case CTL_SMC_CHIPREV:
++		*valp = smc_get_reg(3, ioaddr, REV_REG);
++		break;
++
++	case CTL_SMC_AFDUPLX:
++		*valp = (lp->lastPhy18 & PHY_INT_DPLXDET) ? 1 : 0;
++		break;
++
++	case CTL_SMC_ASPEED:
++		*valp = (lp->lastPhy18 & PHY_INT_SPDDET) ? 100 : 10;
++		break;
++
++	case CTL_SMC_LNKFAIL:
++		*valp = (lp->lastPhy18 & PHY_INT_LNKFAIL) ? 1 : 0;
++		break;
++
++	case CTL_SMC_LEDA:
++		*valp = (lp->rpc_cur_mode >> RPC_LSXA_SHFT) & (word)0x0007;
++		break;
++
++	case CTL_SMC_LEDB:
++		*valp = (lp->rpc_cur_mode >> RPC_LSXB_SHFT) & (word)0x0007;
++		break;
++
++	case CTL_SMC_MIIOP:
++		*valp = smc_get_regbit(1, ioaddr, CONFIG_REG, CONFIG_EXT_PHY);
++		break;
++
++#ifdef SMC_DEBUG
++	case CTL_SMC_REG_BSR:	// Bank Select
++		*valp = smc_get_reg(0, ioaddr, BSR_REG);
++		break;
++
++	case CTL_SMC_REG_TCR:	// Transmit Control
++		*valp = smc_get_reg(0, ioaddr, TCR_REG);
++		break;
++
++	case CTL_SMC_REG_ESR:	// EPH Status
++		*valp = smc_get_reg(0, ioaddr, EPH_STATUS_REG);
++		break;
++
++	case CTL_SMC_REG_RCR:	// Receive Control
++		*valp = smc_get_reg(0, ioaddr, RCR_REG);
++		break;
++
++	case CTL_SMC_REG_CTRR:	// Counter
++		*valp = smc_get_reg(0, ioaddr, COUNTER_REG);
++		break;
++
++	case CTL_SMC_REG_MIR:	// Memory Information
++		*valp = smc_get_reg(0, ioaddr, MIR_REG);
++		break;
++
++	case CTL_SMC_REG_RPCR:	// Receive/Phy Control
++		*valp = smc_get_reg(0, ioaddr, RPC_REG);
++		break;
++
++	case CTL_SMC_REG_CFGR:	// Configuration
++		*valp = smc_get_reg(1, ioaddr, CONFIG_REG);
++		break;
++
++	case CTL_SMC_REG_BAR:	// Base Address
++		*valp = smc_get_reg(1, ioaddr, BASE_REG);
++		break;
++
++	case CTL_SMC_REG_IAR0:	// Individual Address
++		*valp = smc_get_reg(1, ioaddr, ADDR0_REG);
++		break;
++
++	case CTL_SMC_REG_IAR1:	// Individual Address
++		*valp = smc_get_reg(1, ioaddr, ADDR1_REG);
++		break;
++
++	case CTL_SMC_REG_IAR2:	// Individual Address
++		*valp = smc_get_reg(1, ioaddr, ADDR2_REG);
++		break;
++
++	case CTL_SMC_REG_GPR:	// General Purpose
++		*valp = smc_get_reg(1, ioaddr, GP_REG);
++		break;
++
++	case CTL_SMC_REG_CTLR:	// Control
++		*valp = smc_get_reg(1, ioaddr, CTL_REG);
++		break;
++
++	case CTL_SMC_REG_MCR:	// MMU Command
++		*valp = smc_get_reg(2, ioaddr, MMU_CMD_REG);
++		break;
++
++	case CTL_SMC_REG_PNR:	// Packet Number
++		*valp = smc_get_reg(2, ioaddr, PN_REG);
++		break;
++
++	case CTL_SMC_REG_FPR:	// Allocation Result/FIFO Ports
++		*valp = smc_get_reg(2, ioaddr, RXFIFO_REG);
++		break;
++
++	case CTL_SMC_REG_PTR:	// Pointer
++		*valp = smc_get_reg(2, ioaddr, PTR_REG);
++		break;
++
++	case CTL_SMC_REG_DR:	// Data
++		*valp = smc_get_reg(2, ioaddr, DATA_REG);
++		break;
++
++	case CTL_SMC_REG_ISR:	// Interrupt Status/Mask
++		*valp = smc_get_reg(2, ioaddr, INT_REG);
++		break;
++
++	case CTL_SMC_REG_MTR1:	// Multicast Table Entry 1
++		*valp = smc_get_reg(3, ioaddr, MCAST_REG1);
++		break;
++
++	case CTL_SMC_REG_MTR2:	// Multicast Table Entry 2
++		*valp = smc_get_reg(3, ioaddr, MCAST_REG2);
++		break;
++
++	case CTL_SMC_REG_MTR3:	// Multicast Table Entry 3
++		*valp = smc_get_reg(3, ioaddr, MCAST_REG3);
++		break;
++
++	case CTL_SMC_REG_MTR4:	// Multicast Table Entry 4
++		*valp = smc_get_reg(3, ioaddr, MCAST_REG4);
++		break;
++
++	case CTL_SMC_REG_MIIR:	// Management Interface
++		*valp = smc_get_reg(3, ioaddr, MII_REG);
++		break;
++
++	case CTL_SMC_REG_REVR:	// Revision
++		*valp = smc_get_reg(3, ioaddr, REV_REG);
++		break;
++
++	case CTL_SMC_REG_ERCVR:	// Early RCV
++		*valp = smc_get_reg(3, ioaddr, ERCV_REG);
++		break;
++
++	case CTL_SMC_REG_EXTR:	// External
++		*valp = smc_get_reg(7, ioaddr, EXT_REG);
++		break;
++
++	case CTL_SMC_PHY_CTRL:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_CNTL_REG);
++		break;
++
++	case CTL_SMC_PHY_STAT:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_STAT_REG);
++		break;
++
++	case CTL_SMC_PHY_ID1:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_ID1_REG);
++		break;
++
++	case CTL_SMC_PHY_ID2:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_ID2_REG);
++		break;
++
++	case CTL_SMC_PHY_ADC:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_AD_REG);
++		break;
++
++	case CTL_SMC_PHY_REMC:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_RMT_REG);
++		break;
++
++	case CTL_SMC_PHY_CFG1:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_CFG1_REG);
++		break;
++
++	case CTL_SMC_PHY_CFG2:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_CFG2_REG);
++		break;
++
++	case CTL_SMC_PHY_INT:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_INT_REG);
++		break;
++
++	case CTL_SMC_PHY_MASK:
++		*valp = smc_read_phy_register(ioaddr, lp->phyaddr,
++			PHY_MASK_REG);
++		break;
++
++#endif // SMC_DEBUG
++
++	default:
++		// Just ignore unsupported parameters
++		break;
++	}
++
++	// Save old state
++	val = *valp;
++
++	// Perform the generic integer operation
++	if ((ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos)) != 0)
++		return(ret);
++
++	// Write changes out to the registers
++	if (write && *valp != val) {
++
++		val = *valp;
++		switch (ctl->ctl_name) {
++
++		case CTL_SMC_SWFDUP:
++			if (val)
++				lp->tcr_cur_mode |= TCR_SWFDUP;
++			else
++				lp->tcr_cur_mode &= ~TCR_SWFDUP;
++
++			smc_modify_regbit(0, ioaddr, TCR_REG, TCR_SWFDUP, val);
++			break;
++
++		case CTL_SMC_EPHLOOP:
++			if (val)
++				lp->tcr_cur_mode |= TCR_EPH_LOOP;
++			else
++				lp->tcr_cur_mode &= ~TCR_EPH_LOOP;
++
++			smc_modify_regbit(0, ioaddr, TCR_REG, TCR_EPH_LOOP, val);
++			break;
++
++		case CTL_SMC_FORCOL:
++			if (val)
++				lp->tcr_cur_mode |= TCR_FORCOL;
++			else
++				lp->tcr_cur_mode &= ~TCR_FORCOL;
++
++			// Update the EPH block
++			smc_modify_regbit(0, ioaddr, TCR_REG, TCR_FORCOL, val);
++			break;
++
++		case CTL_SMC_FILTCAR:
++			if (val)
++				lp->rcr_cur_mode |= RCR_FILT_CAR;
++			else
++				lp->rcr_cur_mode &= ~RCR_FILT_CAR;
++
++			// Update the EPH block
++			smc_modify_regbit(0, ioaddr, RCR_REG, RCR_FILT_CAR, val);
++			break;
++
++		case CTL_SMC_RFDUPLX:
++			// Disallow changes if in auto-negotiation mode
++			if (lp->ctl_autoneg)
++				break;
++
++			if (val)
++				{
++				lp->rpc_cur_mode |= RPC_DPLX;
++				}
++			else
++				{
++				lp->rpc_cur_mode &= ~RPC_DPLX;
++				}
++
++			// Reconfigure the PHY
++			smc_phy_configure(dev);
++
++			break;
++
++		case CTL_SMC_RSPEED:
++			// Disallow changes if in auto-negotiation mode
++			if (lp->ctl_autoneg)
++				break;
++
++			if (val > 10)
++				lp->rpc_cur_mode |= RPC_SPEED;
++			else
++				lp->rpc_cur_mode &= ~RPC_SPEED;
++
++			// Reconfigure the PHY
++			smc_phy_configure(dev);
++
++			break;
++
++		case CTL_SMC_AUTONEG:
++			if (val)
++				lp->rpc_cur_mode |= RPC_ANEG;
++			else
++				lp->rpc_cur_mode &= ~RPC_ANEG;
++
++			// Reconfigure the PHY
++			smc_phy_configure(dev);
++
++			break;
++
++		case CTL_SMC_LEDA:
++			val &= 0x07; // Restrict to 3 ls bits
++			lp->rpc_cur_mode &= ~(word)(0x07<<RPC_LSXA_SHFT);
++			lp->rpc_cur_mode |= (word)(val<<RPC_LSXA_SHFT);
++
++			// Update the Internal PHY block
++			smc_modify_reg(0, ioaddr, RPC_REG, lp->rpc_cur_mode);
++			break;
++
++		case CTL_SMC_LEDB:
++			val &= 0x07; // Restrict to 3 ls bits
++			lp->rpc_cur_mode &= ~(word)(0x07<<RPC_LSXB_SHFT);
++			lp->rpc_cur_mode |= (word)(val<<RPC_LSXB_SHFT);
++
++			// Update the Internal PHY block
++			smc_modify_reg(0, ioaddr, RPC_REG, lp->rpc_cur_mode);
++			break;
++
++		case CTL_SMC_MIIOP:
++			// Update the Internal PHY block
++			smc_modify_regbit(1, ioaddr, CONFIG_REG,
++				CONFIG_EXT_PHY, val);
++			break;
++
++#ifdef SMC_DEBUG
++		case CTL_SMC_REG_BSR:	// Bank Select
++			smc_modify_reg(0, ioaddr, BSR_REG, val);
++			break;
++
++		case CTL_SMC_REG_TCR:	// Transmit Control
++			smc_modify_reg(0, ioaddr, TCR_REG, val);
++			break;
++
++		case CTL_SMC_REG_ESR:	// EPH Status
++			smc_modify_reg(0, ioaddr, EPH_STATUS_REG, val);
++			break;
++
++		case CTL_SMC_REG_RCR:	// Receive Control
++			smc_modify_reg(0, ioaddr, RCR_REG, val);
++			break;
++
++		case CTL_SMC_REG_CTRR:	// Counter
++			smc_modify_reg(0, ioaddr, COUNTER_REG, val);
++			break;
++
++		case CTL_SMC_REG_MIR:	// Memory Information
++			smc_modify_reg(0, ioaddr, MIR_REG, val);
++			break;
++
++		case CTL_SMC_REG_RPCR:	// Receive/Phy Control
++			smc_modify_reg(0, ioaddr, RPC_REG, val);
++			break;
++
++		case CTL_SMC_REG_CFGR:	// Configuration
++			smc_modify_reg(1, ioaddr, CONFIG_REG, val);
++			break;
++
++		case CTL_SMC_REG_BAR:	// Base Address
++			smc_modify_reg(1, ioaddr, BASE_REG, val);
++			break;
++
++		case CTL_SMC_REG_IAR0:	// Individual Address
++			smc_modify_reg(1, ioaddr, ADDR0_REG, val);
++			break;
++
++		case CTL_SMC_REG_IAR1:	// Individual Address
++			smc_modify_reg(1, ioaddr, ADDR1_REG, val);
++			break;
++
++		case CTL_SMC_REG_IAR2:	// Individual Address
++			smc_modify_reg(1, ioaddr, ADDR2_REG, val);
++			break;
++
++		case CTL_SMC_REG_GPR:	// General Purpose
++			smc_modify_reg(1, ioaddr, GP_REG, val);
++			break;
++
++		case CTL_SMC_REG_CTLR:	// Control
++			smc_modify_reg(1, ioaddr, CTL_REG, val);
++			break;
++
++		case CTL_SMC_REG_MCR:	// MMU Command
++			smc_modify_reg(2, ioaddr, MMU_CMD_REG, val);
++			break;
++
++		case CTL_SMC_REG_PNR:	// Packet Number
++			smc_modify_reg(2, ioaddr, PN_REG, val);
++			break;
++
++		case CTL_SMC_REG_FPR:	// Allocation Result/FIFO Ports
++			smc_modify_reg(2, ioaddr, RXFIFO_REG, val);
++			break;
++
++		case CTL_SMC_REG_PTR:	// Pointer
++			smc_modify_reg(2, ioaddr, PTR_REG, val);
++			break;
++
++		case CTL_SMC_REG_DR:	// Data
++			smc_modify_reg(2, ioaddr, DATA_REG, val);
++			break;
++
++		case CTL_SMC_REG_ISR:	// Interrupt Status/Mask
++			smc_modify_reg(2, ioaddr, INT_REG, val);
++			break;
++
++		case CTL_SMC_REG_MTR1:	// Multicast Table Entry 1
++			smc_modify_reg(3, ioaddr, MCAST_REG1, val);
++			break;
++
++		case CTL_SMC_REG_MTR2:	// Multicast Table Entry 2
++			smc_modify_reg(3, ioaddr, MCAST_REG2, val);
++			break;
++
++		case CTL_SMC_REG_MTR3:	// Multicast Table Entry 3
++			smc_modify_reg(3, ioaddr, MCAST_REG3, val);
++			break;
++
++		case CTL_SMC_REG_MTR4:	// Multicast Table Entry 4
++			smc_modify_reg(3, ioaddr, MCAST_REG4, val);
++			break;
++
++		case CTL_SMC_REG_MIIR:	// Management Interface
++			smc_modify_reg(3, ioaddr, MII_REG, val);
++			break;
++
++		case CTL_SMC_REG_REVR:	// Revision
++			smc_modify_reg(3, ioaddr, REV_REG, val);
++			break;
++
++		case CTL_SMC_REG_ERCVR:	// Early RCV
++			smc_modify_reg(3, ioaddr, ERCV_REG, val);
++			break;
++
++		case CTL_SMC_REG_EXTR:	// External
++			smc_modify_reg(7, ioaddr, EXT_REG, val);
++			break;
++
++		case CTL_SMC_PHY_CTRL:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_CNTL_REG, val);
++			break;
++
++		case CTL_SMC_PHY_STAT:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_STAT_REG, val);
++			break;
++
++		case CTL_SMC_PHY_ID1:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_ID1_REG, val);
++			break;
++
++		case CTL_SMC_PHY_ID2:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_ID2_REG, val);
++			break;
++
++		case CTL_SMC_PHY_ADC:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_AD_REG, val);
++			break;
++
++		case CTL_SMC_PHY_REMC:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_RMT_REG, val);
++			break;
++
++		case CTL_SMC_PHY_CFG1:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_CFG1_REG, val);
++			break;
++
++		case CTL_SMC_PHY_CFG2:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_CFG2_REG, val);
++			break;
++
++		case CTL_SMC_PHY_INT:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_INT_REG, val);
++			break;
++
++		case CTL_SMC_PHY_MASK:
++			smc_write_phy_register(ioaddr, lp->phyaddr,
++				PHY_MASK_REG, val);
++			break;
++
++#endif // SMC_DEBUG
++
++		default:
++			// Just ignore unsupported parameters
++			break;
++		} // end switch
++
++	} // end if
++
++	return ret;
++}
++
++/*------------------------------------------------------------
++ . Sysctl registration function for all parameters (files)
++ .-------------------------------------------------------------*/
++static void smc_sysctl_register(struct rtnet_device *dev)
++{
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	static int ctl_name = CTL_SMC;
++	ctl_table* ct;
++	int i;
++
++	// Make sure the ctl_tables start out as all zeros
++	memset(lp->root_table, 0, sizeof lp->root_table);
++	memset(lp->eth_table, 0, sizeof lp->eth_table);
++	memset(lp->param_table, 0, sizeof lp->param_table);
++
++	// Initialize the root table
++	ct = lp->root_table;
++	ct->ctl_name = CTL_DEV;
++	ct->procname = "dev";
++	ct->maxlen = 0;
++	ct->mode = 0555;
++	ct->child = lp->eth_table;
++	// remaining fields are zero
++
++	// Initialize the ethX table (this device's table)
++	ct = lp->eth_table;
++	ct->ctl_name = ctl_name++; // Must be unique
++	ct->procname = dev->name;
++	ct->maxlen = 0;
++	ct->mode = 0555;
++	ct->child = lp->param_table;
++	// remaining fields are zero
++
++	// Initialize the parameter (files) table
++	// Make sure the last entry remains null
++	ct = lp->param_table;
++	for (i = 0; i < (CTL_SMC_LAST_ENTRY-1); ++i)
++		{
++		// Initialize fields common to all table entries
++		ct[i].proc_handler = smc_sysctl_handler;
++		ct[i].extra1 = (void*)dev; // Save our device pointer
++		ct[i].extra2 = (void*)lp;  // Save our smc_local data pointer
++		}
++
++	// INFO - this is our only string parameter
++	i = 0;
++	ct[i].proc_handler = proc_dostring; // use default handler
++	ct[i].ctl_name = CTL_SMC_INFO;
++	ct[i].procname = "info";
++	ct[i].data = (void*)smc_info_string;
++	ct[i].maxlen = sizeof smc_info_string;
++	ct[i].mode = 0444; // Read only
++
++	// SWVER
++	++i;
++	ct[i].proc_handler = proc_dostring; // use default handler
++	ct[i].ctl_name = CTL_SMC_SWVER;
++	ct[i].procname = "swver";
++	ct[i].data = (void*)version;
++	ct[i].maxlen = sizeof version;
++	ct[i].mode = 0444; // Read only
++
++	// SWFDUP
++	++i;
++	ct[i].ctl_name = CTL_SMC_SWFDUP;
++	ct[i].procname = "swfdup";
++	ct[i].data = (void*)&(lp->ctl_swfdup);
++	ct[i].maxlen = sizeof lp->ctl_swfdup;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// EPHLOOP
++	++i;
++	ct[i].ctl_name = CTL_SMC_EPHLOOP;
++	ct[i].procname = "ephloop";
++	ct[i].data = (void*)&(lp->ctl_ephloop);
++	ct[i].maxlen = sizeof lp->ctl_ephloop;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// MIIOP
++	++i;
++	ct[i].ctl_name = CTL_SMC_MIIOP;
++	ct[i].procname = "miiop";
++	ct[i].data = (void*)&(lp->ctl_miiop);
++	ct[i].maxlen = sizeof lp->ctl_miiop;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// AUTONEG
++	++i;
++	ct[i].ctl_name = CTL_SMC_AUTONEG;
++	ct[i].procname = "autoneg";
++	ct[i].data = (void*)&(lp->ctl_autoneg);
++	ct[i].maxlen = sizeof lp->ctl_autoneg;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// RFDUPLX
++	++i;
++	ct[i].ctl_name = CTL_SMC_RFDUPLX;
++	ct[i].procname = "rfduplx";
++	ct[i].data = (void*)&(lp->ctl_rfduplx);
++	ct[i].maxlen = sizeof lp->ctl_rfduplx;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// RSPEED
++	++i;
++	ct[i].ctl_name = CTL_SMC_RSPEED;
++	ct[i].procname = "rspeed";
++	ct[i].data = (void*)&(lp->ctl_rspeed);
++	ct[i].maxlen = sizeof lp->ctl_rspeed;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// AFDUPLX
++	++i;
++	ct[i].ctl_name = CTL_SMC_AFDUPLX;
++	ct[i].procname = "afduplx";
++	ct[i].data = (void*)&(lp->ctl_afduplx);
++	ct[i].maxlen = sizeof lp->ctl_afduplx;
++	ct[i].mode = 0444; // Read only
++
++	// ASPEED
++	++i;
++	ct[i].ctl_name = CTL_SMC_ASPEED;
++	ct[i].procname = "aspeed";
++	ct[i].data = (void*)&(lp->ctl_aspeed);
++	ct[i].maxlen = sizeof lp->ctl_aspeed;
++	ct[i].mode = 0444; // Read only
++
++	// LNKFAIL
++	++i;
++	ct[i].ctl_name = CTL_SMC_LNKFAIL;
++	ct[i].procname = "lnkfail";
++	ct[i].data = (void*)&(lp->ctl_lnkfail);
++	ct[i].maxlen = sizeof lp->ctl_lnkfail;
++	ct[i].mode = 0444; // Read only
++
++	// FORCOL
++	++i;
++	ct[i].ctl_name = CTL_SMC_FORCOL;
++	ct[i].procname = "forcol";
++	ct[i].data = (void*)&(lp->ctl_forcol);
++	ct[i].maxlen = sizeof lp->ctl_forcol;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// FILTCAR
++	++i;
++	ct[i].ctl_name = CTL_SMC_FILTCAR;
++	ct[i].procname = "filtcar";
++	ct[i].data = (void*)&(lp->ctl_filtcar);
++	ct[i].maxlen = sizeof lp->ctl_filtcar;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// FREEMEM
++	++i;
++	ct[i].ctl_name = CTL_SMC_FREEMEM;
++	ct[i].procname = "freemem";
++	ct[i].data = (void*)&(lp->ctl_freemem);
++	ct[i].maxlen = sizeof lp->ctl_freemem;
++	ct[i].mode = 0444; // Read only
++
++	// TOTMEM
++	++i;
++	ct[i].ctl_name = CTL_SMC_TOTMEM;
++	ct[i].procname = "totmem";
++	ct[i].data = (void*)&(lp->ctl_totmem);
++	ct[i].maxlen = sizeof lp->ctl_totmem;
++	ct[i].mode = 0444; // Read only
++
++	// LEDA
++	++i;
++	ct[i].ctl_name = CTL_SMC_LEDA;
++	ct[i].procname = "leda";
++	ct[i].data = (void*)&(lp->ctl_leda);
++	ct[i].maxlen = sizeof lp->ctl_leda;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// LEDB
++	++i;
++	ct[i].ctl_name = CTL_SMC_LEDB;
++	ct[i].procname = "ledb";
++	ct[i].data = (void*)&(lp->ctl_ledb);
++	ct[i].maxlen = sizeof lp->ctl_ledb;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// CHIPREV
++	++i;
++	ct[i].ctl_name = CTL_SMC_CHIPREV;
++	ct[i].procname = "chiprev";
++	ct[i].data = (void*)&(lp->ctl_chiprev);
++	ct[i].maxlen = sizeof lp->ctl_chiprev;
++	ct[i].mode = 0444; // Read only
++
++#ifdef SMC_DEBUG
++	// REG_BSR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_BSR;
++	ct[i].procname = "reg_bsr";
++	ct[i].data = (void*)&(lp->ctl_reg_bsr);
++	ct[i].maxlen = sizeof lp->ctl_reg_bsr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_TCR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_TCR;
++	ct[i].procname = "reg_tcr";
++	ct[i].data = (void*)&(lp->ctl_reg_tcr);
++	ct[i].maxlen = sizeof lp->ctl_reg_tcr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_ESR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_ESR;
++	ct[i].procname = "reg_esr";
++	ct[i].data = (void*)&(lp->ctl_reg_esr);
++	ct[i].maxlen = sizeof lp->ctl_reg_esr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_RCR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_RCR;
++	ct[i].procname = "reg_rcr";
++	ct[i].data = (void*)&(lp->ctl_reg_rcr);
++	ct[i].maxlen = sizeof lp->ctl_reg_rcr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_CTRR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_CTRR;
++	ct[i].procname = "reg_ctrr";
++	ct[i].data = (void*)&(lp->ctl_reg_ctrr);
++	ct[i].maxlen = sizeof lp->ctl_reg_ctrr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MIR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MIR;
++	ct[i].procname = "reg_mir";
++	ct[i].data = (void*)&(lp->ctl_reg_mir);
++	ct[i].maxlen = sizeof lp->ctl_reg_mir;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_RPCR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_RPCR;
++	ct[i].procname = "reg_rpcr";
++	ct[i].data = (void*)&(lp->ctl_reg_rpcr);
++	ct[i].maxlen = sizeof lp->ctl_reg_rpcr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_CFGR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_CFGR;
++	ct[i].procname = "reg_cfgr";
++	ct[i].data = (void*)&(lp->ctl_reg_cfgr);
++	ct[i].maxlen = sizeof lp->ctl_reg_cfgr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_BAR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_BAR;
++	ct[i].procname = "reg_bar";
++	ct[i].data = (void*)&(lp->ctl_reg_bar);
++	ct[i].maxlen = sizeof lp->ctl_reg_bar;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_IAR0
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_IAR0;
++	ct[i].procname = "reg_iar0";
++	ct[i].data = (void*)&(lp->ctl_reg_iar0);
++	ct[i].maxlen = sizeof lp->ctl_reg_iar0;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_IAR1
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_IAR1;
++	ct[i].procname = "reg_iar1";
++	ct[i].data = (void*)&(lp->ctl_reg_iar1);
++	ct[i].maxlen = sizeof lp->ctl_reg_iar1;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_IAR2
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_IAR2;
++	ct[i].procname = "reg_iar2";
++	ct[i].data = (void*)&(lp->ctl_reg_iar2);
++	ct[i].maxlen = sizeof lp->ctl_reg_iar2;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_GPR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_GPR;
++	ct[i].procname = "reg_gpr";
++	ct[i].data = (void*)&(lp->ctl_reg_gpr);
++	ct[i].maxlen = sizeof lp->ctl_reg_gpr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_CTLR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_CTLR;
++	ct[i].procname = "reg_ctlr";
++	ct[i].data = (void*)&(lp->ctl_reg_ctlr);
++	ct[i].maxlen = sizeof lp->ctl_reg_ctlr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MCR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MCR;
++	ct[i].procname = "reg_mcr";
++	ct[i].data = (void*)&(lp->ctl_reg_mcr);
++	ct[i].maxlen = sizeof lp->ctl_reg_mcr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_PNR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_PNR;
++	ct[i].procname = "reg_pnr";
++	ct[i].data = (void*)&(lp->ctl_reg_pnr);
++	ct[i].maxlen = sizeof lp->ctl_reg_pnr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_FPR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_FPR;
++	ct[i].procname = "reg_fpr";
++	ct[i].data = (void*)&(lp->ctl_reg_fpr);
++	ct[i].maxlen = sizeof lp->ctl_reg_fpr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_PTR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_PTR;
++	ct[i].procname = "reg_ptr";
++	ct[i].data = (void*)&(lp->ctl_reg_ptr);
++	ct[i].maxlen = sizeof lp->ctl_reg_ptr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_DR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_DR;
++	ct[i].procname = "reg_dr";
++	ct[i].data = (void*)&(lp->ctl_reg_dr);
++	ct[i].maxlen = sizeof lp->ctl_reg_dr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_ISR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_ISR;
++	ct[i].procname = "reg_isr";
++	ct[i].data = (void*)&(lp->ctl_reg_isr);
++	ct[i].maxlen = sizeof lp->ctl_reg_isr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MTR1
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MTR1;
++	ct[i].procname = "reg_mtr1";
++	ct[i].data = (void*)&(lp->ctl_reg_mtr1);
++	ct[i].maxlen = sizeof lp->ctl_reg_mtr1;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MTR2
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MTR2;
++	ct[i].procname = "reg_mtr2";
++	ct[i].data = (void*)&(lp->ctl_reg_mtr2);
++	ct[i].maxlen = sizeof lp->ctl_reg_mtr2;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MTR3
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MTR3;
++	ct[i].procname = "reg_mtr3";
++	ct[i].data = (void*)&(lp->ctl_reg_mtr3);
++	ct[i].maxlen = sizeof lp->ctl_reg_mtr3;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MTR4
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MTR4;
++	ct[i].procname = "reg_mtr4";
++	ct[i].data = (void*)&(lp->ctl_reg_mtr4);
++	ct[i].maxlen = sizeof lp->ctl_reg_mtr4;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_MIIR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_MIIR;
++	ct[i].procname = "reg_miir";
++	ct[i].data = (void*)&(lp->ctl_reg_miir);
++	ct[i].maxlen = sizeof lp->ctl_reg_miir;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_REVR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_REVR;
++	ct[i].procname = "reg_revr";
++	ct[i].data = (void*)&(lp->ctl_reg_revr);
++	ct[i].maxlen = sizeof lp->ctl_reg_revr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_ERCVR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_ERCVR;
++	ct[i].procname = "reg_ercvr";
++	ct[i].data = (void*)&(lp->ctl_reg_ercvr);
++	ct[i].maxlen = sizeof lp->ctl_reg_ercvr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// REG_EXTR
++	++i;
++	ct[i].ctl_name = CTL_SMC_REG_EXTR;
++	ct[i].procname = "reg_extr";
++	ct[i].data = (void*)&(lp->ctl_reg_extr);
++	ct[i].maxlen = sizeof lp->ctl_reg_extr;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Control
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_CTRL;
++	ct[i].procname = "phy_ctrl";
++	ct[i].data = (void*)&(lp->ctl_phy_ctrl);
++	ct[i].maxlen = sizeof lp->ctl_phy_ctrl;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Status
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_STAT;
++	ct[i].procname = "phy_stat";
++	ct[i].data = (void*)&(lp->ctl_phy_stat);
++	ct[i].maxlen = sizeof lp->ctl_phy_stat;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY ID1
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_ID1;
++	ct[i].procname = "phy_id1";
++	ct[i].data = (void*)&(lp->ctl_phy_id1);
++	ct[i].maxlen = sizeof lp->ctl_phy_id1;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY ID2
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_ID2;
++	ct[i].procname = "phy_id2";
++	ct[i].data = (void*)&(lp->ctl_phy_id2);
++	ct[i].maxlen = sizeof lp->ctl_phy_id2;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Advertise Capabilities
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_ADC;
++	ct[i].procname = "phy_adc";
++	ct[i].data = (void*)&(lp->ctl_phy_adc);
++	ct[i].maxlen = sizeof lp->ctl_phy_adc;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Remote Capabilities
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_REMC;
++	ct[i].procname = "phy_remc";
++	ct[i].data = (void*)&(lp->ctl_phy_remc);
++	ct[i].maxlen = sizeof lp->ctl_phy_remc;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Configuration 1
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_CFG1;
++	ct[i].procname = "phy_cfg1";
++	ct[i].data = (void*)&(lp->ctl_phy_cfg1);
++	ct[i].maxlen = sizeof lp->ctl_phy_cfg1;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Configuration 2
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_CFG2;
++	ct[i].procname = "phy_cfg2";
++	ct[i].data = (void*)&(lp->ctl_phy_cfg2);
++	ct[i].maxlen = sizeof lp->ctl_phy_cfg2;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Interrupt/Status Output
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_INT;
++	ct[i].procname = "phy_int";
++	ct[i].data = (void*)&(lp->ctl_phy_int);
++	ct[i].maxlen = sizeof lp->ctl_phy_int;
++	ct[i].mode = 0644; // Read by all, write by root
++
++	// PHY Interrupt/Status Mask
++	++i;
++	ct[i].ctl_name = CTL_SMC_PHY_MASK;
++	ct[i].procname = "phy_mask";
++	ct[i].data = (void*)&(lp->ctl_phy_mask);
++	ct[i].maxlen = sizeof lp->ctl_phy_mask;
++	ct[i].mode = 0644; // Read by all, write by root
++
++#endif // SMC_DEBUG
++
++	// Register /proc/sys/dev/ethX
++	lp->sysctl_header = register_sysctl_table(lp->root_table, 1);
++}
++
++
++/*------------------------------------------------------------
++ . Sysctl unregistration when driver is closed
++ .-------------------------------------------------------------*/
++static void smc_sysctl_unregister(struct rtnet_device *dev)
++{
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++
++	unregister_sysctl_table(lp->sysctl_header);
++}
++
++#endif /* endif CONFIG_SYSCTL */
++
++
++//---PHY CONTROL AND CONFIGURATION-----------------------------------------
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 )
++
++/*------------------------------------------------------------
++ . Debugging function for viewing MII Management serial bitstream
++ .-------------------------------------------------------------*/
++static void smc_dump_mii_stream(byte* bits, int size)
++{
++	int i;
++
++	printk("BIT#:");
++	for (i = 0; i < size; ++i)
++		{
++		printk("%d", i%10);
++		}
++
++	printk("\nMDOE:");
++	for (i = 0; i < size; ++i)
++		{
++		if (bits[i] & MII_MDOE)
++			printk("1");
++		else
++			printk("0");
++		}
++
++	printk("\nMDO :");
++	for (i = 0; i < size; ++i)
++		{
++		if (bits[i] & MII_MDO)
++			printk("1");
++		else
++			printk("0");
++		}
++
++	printk("\nMDI :");
++	for (i = 0; i < size; ++i)
++		{
++		if (bits[i] & MII_MDI)
++			printk("1");
++		else
++			printk("0");
++		}
++
++	printk("\n");
++}
++#endif
++
++/*------------------------------------------------------------
++ . Reads a register from the MII Management serial interface
++ .-------------------------------------------------------------*/
++static word smc_read_phy_register(int ioaddr, byte phyaddr, byte phyreg)
++{
++	int oldBank;
++	int i;
++	byte mask;
++	word mii_reg;
++	byte bits[64];
++	int clk_idx = 0;
++	int input_idx;
++	word phydata;
++
++	// 32 consecutive ones on MDO to establish sync
++	for (i = 0; i < 32; ++i)
++		bits[clk_idx++] = MII_MDOE | MII_MDO;
++
++	// Start code <01>
++	bits[clk_idx++] = MII_MDOE;
++	bits[clk_idx++] = MII_MDOE | MII_MDO;
++
++	// Read command <10>
++	bits[clk_idx++] = MII_MDOE | MII_MDO;
++	bits[clk_idx++] = MII_MDOE;
++
++	// Output the PHY address, msb first
++	mask = (byte)0x10;
++	for (i = 0; i < 5; ++i)
++		{
++		if (phyaddr & mask)
++			bits[clk_idx++] = MII_MDOE | MII_MDO;
++		else
++			bits[clk_idx++] = MII_MDOE;
++
++		// Shift to next lowest bit
++		mask >>= 1;
++		}
++
++	// Output the phy register number, msb first
++	mask = (byte)0x10;
++	for (i = 0; i < 5; ++i)
++		{
++		if (phyreg & mask)
++			bits[clk_idx++] = MII_MDOE | MII_MDO;
++		else
++			bits[clk_idx++] = MII_MDOE;
++
++		// Shift to next lowest bit
++		mask >>= 1;
++		}
++
++	// Tristate and turnaround (2 bit times)
++	bits[clk_idx++] = 0;
++	//bits[clk_idx++] = 0;
++
++	// Input starts at this bit time
++	input_idx = clk_idx;
++
++	// Will input 16 bits
++	for (i = 0; i < 16; ++i)
++		bits[clk_idx++] = 0;
++
++	// Final clock bit
++	bits[clk_idx++] = 0;
++
++	// Save the current bank
++	oldBank = inw( ioaddr+BANK_SELECT );
++
++	// Select bank 3
++	SMC_SELECT_BANK( 3 );
++
++	// Get the current MII register value
++	mii_reg = inw( ioaddr+MII_REG );
++
++	// Turn off all MII Interface bits
++	mii_reg &= ~(MII_MDOE|MII_MCLK|MII_MDI|MII_MDO);
++
++	// Clock all 64 cycles
++	for (i = 0; i < sizeof bits; ++i)
++		{
++		// Clock Low - output data
++		outw( mii_reg | bits[i], ioaddr+MII_REG );
++		udelay(50);
++
++
++		// Clock Hi - input data
++		outw( mii_reg | bits[i] | MII_MCLK, ioaddr+MII_REG );
++		udelay(50);
++		bits[i] |= inw( ioaddr+MII_REG ) & MII_MDI;
++		}
++
++	// Return to idle state
++	// Set clock to low, data to low, and output tristated
++	outw( mii_reg, ioaddr+MII_REG );
++	udelay(50);
++
++	// Restore original bank select
++	SMC_SELECT_BANK( oldBank );
++
++	// Recover input data
++	phydata = 0;
++	for (i = 0; i < 16; ++i)
++		{
++		phydata <<= 1;
++
++		if (bits[input_idx++] & MII_MDI)
++			phydata |= 0x0001;
++		}
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 )
++	printk("smc_read_phy_register(): phyaddr=%x,phyreg=%x,phydata=%x\n",
++		phyaddr, phyreg, phydata);
++	smc_dump_mii_stream(bits, sizeof bits);
++#endif
++
++	return(phydata);
++}
++
++
++/*------------------------------------------------------------
++ . Writes a register to the MII Management serial interface
++ .-------------------------------------------------------------*/
++static void smc_write_phy_register(int ioaddr,
++	byte phyaddr, byte phyreg, word phydata)
++{
++	int oldBank;
++	int i;
++	word mask;
++	word mii_reg;
++	byte bits[65];
++	int clk_idx = 0;
++
++	// 32 consecutive ones on MDO to establish sync
++	for (i = 0; i < 32; ++i)
++		bits[clk_idx++] = MII_MDOE | MII_MDO;
++
++	// Start code <01>
++	bits[clk_idx++] = MII_MDOE;
++	bits[clk_idx++] = MII_MDOE | MII_MDO;
++
++	// Write command <01>
++	bits[clk_idx++] = MII_MDOE;
++	bits[clk_idx++] = MII_MDOE | MII_MDO;
++
++	// Output the PHY address, msb first
++	mask = (byte)0x10;
++	for (i = 0; i < 5; ++i)
++		{
++		if (phyaddr & mask)
++			bits[clk_idx++] = MII_MDOE | MII_MDO;
++		else
++			bits[clk_idx++] = MII_MDOE;
++
++		// Shift to next lowest bit
++		mask >>= 1;
++		}
++
++	// Output the phy register number, msb first
++	mask = (byte)0x10;
++	for (i = 0; i < 5; ++i)
++		{
++		if (phyreg & mask)
++			bits[clk_idx++] = MII_MDOE | MII_MDO;
++		else
++			bits[clk_idx++] = MII_MDOE;
++
++		// Shift to next lowest bit
++		mask >>= 1;
++		}
++
++	// Tristate and turnaround (2 bit times)
++	bits[clk_idx++] = 0;
++	bits[clk_idx++] = 0;
++
++	// Write out 16 bits of data, msb first
++	mask = 0x8000;
++	for (i = 0; i < 16; ++i)
++		{
++		if (phydata & mask)
++			bits[clk_idx++] = MII_MDOE | MII_MDO;
++		else
++			bits[clk_idx++] = MII_MDOE;
++
++		// Shift to next lowest bit
++		mask >>= 1;
++		}
++
++	// Final clock bit (tristate)
++	bits[clk_idx++] = 0;
++
++	// Save the current bank
++	oldBank = inw( ioaddr+BANK_SELECT );
++
++	// Select bank 3
++	SMC_SELECT_BANK( 3 );
++
++	// Get the current MII register value
++	mii_reg = inw( ioaddr+MII_REG );
++
++	// Turn off all MII Interface bits
++	mii_reg &= ~(MII_MDOE|MII_MCLK|MII_MDI|MII_MDO);
++
++	// Clock all cycles
++	for (i = 0; i < sizeof bits; ++i)
++		{
++		// Clock Low - output data
++		outw( mii_reg | bits[i], ioaddr+MII_REG );
++		udelay(50);
++
++
++		// Clock Hi - input data
++		outw( mii_reg | bits[i] | MII_MCLK, ioaddr+MII_REG );
++		udelay(50);
++		bits[i] |= inw( ioaddr+MII_REG ) & MII_MDI;
++		}
++
++	// Return to idle state
++	// Set clock to low, data to low, and output tristated
++	outw( mii_reg, ioaddr+MII_REG );
++	udelay(50);
++
++	// Restore original bank select
++	SMC_SELECT_BANK( oldBank );
++
++#if defined(SMC_DEBUG) && (SMC_DEBUG > 2 )
++	printk("smc_write_phy_register(): phyaddr=%x,phyreg=%x,phydata=%x\n",
++		phyaddr, phyreg, phydata);
++	smc_dump_mii_stream(bits, sizeof bits);
++#endif
++}
++
++
++/*------------------------------------------------------------
++ . Finds and reports the PHY address
++ .-------------------------------------------------------------*/
++static int smc_detect_phy(struct rtnet_device* dev)
++{
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	int ioaddr = dev->base_addr;
++	word phy_id1;
++	word phy_id2;
++	int phyaddr;
++	int found = 0;
++
++	PRINTK3("%s:smc_detect_phy()\n", dev->name);
++
++	// Scan all 32 PHY addresses if necessary
++	for (phyaddr = 0; phyaddr < 32; ++phyaddr)
++		{
++		// Read the PHY identifiers
++		phy_id1  = smc_read_phy_register(ioaddr, phyaddr, PHY_ID1_REG);
++		phy_id2  = smc_read_phy_register(ioaddr, phyaddr, PHY_ID2_REG);
++
++		PRINTK3("%s: phy_id1=%x, phy_id2=%x\n",
++			dev->name, phy_id1, phy_id2);
++
++		// Make sure it is a valid identifier
++		if ((phy_id2 > 0x0000) && (phy_id2 < 0xffff) &&
++		    (phy_id1 > 0x0000) && (phy_id1 < 0xffff))
++			{
++			if ((phy_id1 != 0x8000) && (phy_id2 != 0x8000))
++				{
++				// Save the PHY's address
++				lp->phyaddr = phyaddr;
++				found = 1;
++				break;
++				}
++			}
++		}
++
++	if (!found)
++		{
++		PRINTK("%s: No PHY found\n", dev->name);
++		return(0);
++		}
++
++	// Set the PHY type
++	if ( (phy_id1 == 0x0016) && ((phy_id2 & 0xFFF0) == 0xF840 ) )
++		{
++		lp->phytype = PHY_LAN83C183;
++		PRINTK("%s: PHY=LAN83C183 (LAN91C111 Internal)\n", dev->name);
++		}
++
++	if ( (phy_id1 == 0x0282) && ((phy_id2 & 0xFFF0) == 0x1C50) )
++		{
++		lp->phytype = PHY_LAN83C180;
++		PRINTK("%s: PHY=LAN83C180\n", dev->name);
++		}
++
++	return(1);
++}
++
++/*------------------------------------------------------------
++ . Waits the specified number of milliseconds - kernel friendly
++ .-------------------------------------------------------------*/
++static void smc_wait_ms(unsigned int ms)
++{
++
++	if (!in_interrupt())
++		{
++		current->state = TASK_UNINTERRUPTIBLE;
++		schedule_timeout(1 + ms * HZ / 1000);
++		}
++	else
++		{
++		current->state = TASK_INTERRUPTIBLE;
++		schedule_timeout(1 + ms * HZ / 1000);
++		current->state = TASK_RUNNING;
++		}
++}
++
++/*------------------------------------------------------------
++ . Sets the PHY to a configuration as determined by the user
++ .-------------------------------------------------------------*/
++#ifdef DISABLED____CONFIG_SYSCTL
++static int smc_phy_fixed(struct rtnet_device* dev)
++{
++	int ioaddr = dev->base_addr;
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	byte phyaddr = lp->phyaddr;
++	word my_fixed_caps;
++	word cfg1;
++
++	PRINTK3("%s:smc_phy_fixed()\n", dev->name);
++
++	// Enter Link Disable state
++	cfg1 = smc_read_phy_register(ioaddr, phyaddr, PHY_CFG1_REG);
++	cfg1 |= PHY_CFG1_LNKDIS;
++	smc_write_phy_register(ioaddr, phyaddr, PHY_CFG1_REG, cfg1);
++
++	// Set our fixed capabilities
++	// Disable auto-negotiation
++	my_fixed_caps = 0;
++
++	if (lp->ctl_rfduplx)
++		my_fixed_caps |= PHY_CNTL_DPLX;
++
++	if (lp->ctl_rspeed == 100)
++		my_fixed_caps |= PHY_CNTL_SPEED;
++
++	// Write our capabilities to the phy control register
++	smc_write_phy_register(ioaddr, phyaddr, PHY_CNTL_REG, my_fixed_caps);
++
++	// Re-Configure the Receive/Phy Control register
++	outw( lp->rpc_cur_mode, ioaddr + RPC_REG );
++
++	// Success
++	return(1);
++}
++#endif // CONFIG_SYSCTL
++
++
++/*------------------------------------------------------------
++ . Configures the specified PHY using Autonegotiation. Calls
++ . smc_phy_fixed() if the user has requested a certain config.
++ .-------------------------------------------------------------*/
++static void smc_phy_configure(struct rtnet_device* dev)
++{
++	int ioaddr = dev->base_addr;
++	struct smc_local *lp = (struct smc_local *)dev->priv;
++	int timeout;
++	byte phyaddr;
++	word my_phy_caps; // My PHY capabilities
++	word my_ad_caps; // My Advertised capabilities
++	word status;
++	int failed = 0;
++
++	PRINTK3("%s:smc_program_phy()\n", dev->name);
++
++	// Set the blocking flag
++	lp->autoneg_active = 1;
++
++	// Find the address and type of our phy
++	if (!smc_detect_phy(dev))
++		{
++		goto smc_phy_configure_exit;
++		}
++
++	// Get the detected phy address
++	phyaddr = lp->phyaddr;
++
++	// Reset the PHY, setting all other bits to zero
++	smc_write_phy_register(ioaddr, phyaddr, PHY_CNTL_REG, PHY_CNTL_RST);
++
++	// Wait for the reset to complete, or time out
++	timeout = 6; // Wait up to 3 seconds
++	while (timeout--)
++		{
++		if (!(smc_read_phy_register(ioaddr, phyaddr, PHY_CNTL_REG)
++		    & PHY_CNTL_RST))
++			{
++			// reset complete
++			break;
++			}
++
++		smc_wait_ms(500); // wait 500 millisecs
++		if (signal_pending(current)) // Exit anyway if signaled
++			{
++			PRINTK2("%s:PHY reset interrupted by signal\n",
++				dev->name);
++			timeout = 0;
++			break;
++			}
++		}
++
++	if (timeout < 1)
++		{
++		PRINTK2("%s:PHY reset timed out\n", dev->name);
++		goto smc_phy_configure_exit;
++		}
++
++	// Read PHY Register 18, Status Output
++	lp->lastPhy18 = smc_read_phy_register(ioaddr, phyaddr, PHY_INT_REG);
++
++	// Enable PHY Interrupts (for register 18)
++	// Interrupts listed here are disabled
++	smc_write_phy_register(ioaddr, phyaddr, PHY_MASK_REG,
++		PHY_INT_LOSSSYNC | PHY_INT_CWRD | PHY_INT_SSD |
++		PHY_INT_ESD | PHY_INT_RPOL | PHY_INT_JAB |
++		PHY_INT_SPDDET | PHY_INT_DPLXDET);
++
++	/* Configure the Receive/Phy Control register */
++	SMC_SELECT_BANK( 0 );
++	outw( lp->rpc_cur_mode, ioaddr + RPC_REG );
++
++	// Copy our capabilities from PHY_STAT_REG to PHY_AD_REG
++	my_phy_caps = smc_read_phy_register(ioaddr, phyaddr, PHY_STAT_REG);
++	my_ad_caps  = PHY_AD_CSMA; // I am CSMA capable
++
++	if (my_phy_caps & PHY_STAT_CAP_T4)
++		my_ad_caps |= PHY_AD_T4;
++
++	if (my_phy_caps & PHY_STAT_CAP_TXF)
++		my_ad_caps |= PHY_AD_TX_FDX;
++
++	if (my_phy_caps & PHY_STAT_CAP_TXH)
++		my_ad_caps |= PHY_AD_TX_HDX;
++
++	if (my_phy_caps & PHY_STAT_CAP_TF)
++		my_ad_caps |= PHY_AD_10_FDX;
++
++	if (my_phy_caps & PHY_STAT_CAP_TH)
++		my_ad_caps |= PHY_AD_10_HDX;
++
++#ifdef DISABLED____CONFIG_SYSCTL
++	// Disable capabilities not selected by our user
++	if (lp->ctl_rspeed != 100)
++		{
++		my_ad_caps &= ~(PHY_AD_T4|PHY_AD_TX_FDX|PHY_AD_TX_HDX);
++		}
++
++	if (!lp->ctl_rfduplx)
++		{
++		my_ad_caps &= ~(PHY_AD_TX_FDX|PHY_AD_10_FDX);
++		}
++#endif // CONFIG_SYSCTL
++
++	// Update our Auto-Neg Advertisement Register
++	smc_write_phy_register(ioaddr, phyaddr, PHY_AD_REG, my_ad_caps);
++
++	PRINTK2("%s:phy caps=%x\n", dev->name, my_phy_caps);
++	PRINTK2("%s:phy advertised caps=%x\n", dev->name, my_ad_caps);
++
++#ifdef DISABLED____CONFIG_SYSCTL
++	// If the user requested no auto neg, then go set his request
++	if (!(lp->ctl_autoneg))
++		{
++		smc_phy_fixed(dev);
++		goto smc_phy_configure_exit;
++		}
++#endif // CONFIG_SYSCTL
++
++	// Restart auto-negotiation process in order to advertise my caps
++	smc_write_phy_register( ioaddr, phyaddr, PHY_CNTL_REG,
++		PHY_CNTL_ANEG_EN | PHY_CNTL_ANEG_RST );
++
++	// Wait for the auto-negotiation to complete.  This may take from
++	// 2 to 3 seconds.
++	// Wait for the reset to complete, or time out
++	timeout = 20-1; // Wait up to 10 seconds
++	do
++		{
++		status = smc_read_phy_register(ioaddr, phyaddr, PHY_STAT_REG);
++		if (status & PHY_STAT_ANEG_ACK)
++			{
++			// auto-negotiate complete
++			break;
++			}
++
++		smc_wait_ms(500); // wait 500 millisecs
++		if (signal_pending(current)) // Exit anyway if signaled
++			{
++			printk(KERN_DEBUG
++				"%s:PHY auto-negotiate interrupted by signal\n",
++				dev->name);
++			timeout = 0;
++			break;
++			}
++
++		// Restart auto-negotiation if remote fault
++		if (status & PHY_STAT_REM_FLT)
++			{
++			PRINTK2("%s:PHY remote fault detected\n", dev->name);
++
++			// Restart auto-negotiation
++			PRINTK2("%s:PHY restarting auto-negotiation\n",
++				dev->name);
++			smc_write_phy_register( ioaddr, phyaddr, PHY_CNTL_REG,
++				PHY_CNTL_ANEG_EN | PHY_CNTL_ANEG_RST |
++				PHY_CNTL_SPEED | PHY_CNTL_DPLX);
++			}
++		}
++	while (timeout--);
++
++	if (timeout < 1)
++		{
++		printk(KERN_DEBUG "%s:PHY auto-negotiate timed out\n",
++			dev->name);
++		PRINTK2("%s:PHY auto-negotiate timed out\n", dev->name);
++		failed = 1;
++		}
++
++	// Fail if we detected an auto-negotiate remote fault
++	if (status & PHY_STAT_REM_FLT)
++		{
++		printk(KERN_DEBUG "%s:PHY remote fault detected\n", dev->name);
++		PRINTK2("%s:PHY remote fault detected\n", dev->name);
++		failed = 1;
++		}
++
++	// The smc_phy_interrupt() routine will be called to update lastPhy18
++
++	// Set our sysctl parameters to match auto-negotiation results
++	if ( lp->lastPhy18 & PHY_INT_SPDDET )
++		{
++		PRINTK2("%s:PHY 100BaseT\n", dev->name);
++		lp->rpc_cur_mode |= RPC_SPEED;
++		}
++	else
++		{
++		PRINTK2("%s:PHY 10BaseT\n", dev->name);
++		lp->rpc_cur_mode &= ~RPC_SPEED;
++		}
++
++	if ( lp->lastPhy18 & PHY_INT_DPLXDET )
++		{
++		PRINTK2("%s:PHY Full Duplex\n", dev->name);
++		lp->rpc_cur_mode |= RPC_DPLX;
++		}
++	else
++		{
++		PRINTK2("%s:PHY Half Duplex\n", dev->name);
++		lp->rpc_cur_mode &= ~RPC_DPLX;
++		}
++
++	// Re-Configure the Receive/Phy Control register
++	outw( lp->rpc_cur_mode, ioaddr + RPC_REG );
++
++  smc_phy_configure_exit:
++
++	// Exit auto-negotiation
++	lp->autoneg_active = 0;
++}
++
++
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/drivers/tulip/eeprom.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/eeprom.c	2021-04-07 16:01:27.339634006 +0800
+@@ -0,0 +1,321 @@
++/*
++	drivers/net/tulip/eeprom.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#include "tulip.h"
++#include <linux/init.h>
++#include <asm/unaligned.h>
++
++
++
++/* Serial EEPROM section. */
++/* The main routine to parse the very complicated SROM structure.
++   Search www.digital.com for "21X4 SROM" to get details.
++   This code is very complex, and will require changes to support
++   additional cards, so I'll be verbose about what is going on.
++   */
++
++/* Known cards that have old-style EEPROMs. */
++static struct eeprom_fixup eeprom_fixups[] = {
++  {"Asante", 0, 0, 0x94, {0x1e00, 0x0000, 0x0800, 0x0100, 0x018c,
++			  0x0000, 0x0000, 0xe078, 0x0001, 0x0050, 0x0018 }},
++  {"SMC9332DST", 0, 0, 0xC0, { 0x1e00, 0x0000, 0x0800, 0x041f,
++			   0x0000, 0x009E, /* 10baseT */
++			   0x0004, 0x009E, /* 10baseT-FD */
++			   0x0903, 0x006D, /* 100baseTx */
++			   0x0905, 0x006D, /* 100baseTx-FD */ }},
++  {"Cogent EM100", 0, 0, 0x92, { 0x1e00, 0x0000, 0x0800, 0x063f,
++				 0x0107, 0x8021, /* 100baseFx */
++				 0x0108, 0x8021, /* 100baseFx-FD */
++				 0x0100, 0x009E, /* 10baseT */
++				 0x0104, 0x009E, /* 10baseT-FD */
++				 0x0103, 0x006D, /* 100baseTx */
++				 0x0105, 0x006D, /* 100baseTx-FD */ }},
++  {"Maxtech NX-110", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x0513,
++				   0x1001, 0x009E, /* 10base2, CSR12 0x10*/
++				   0x0000, 0x009E, /* 10baseT */
++				   0x0004, 0x009E, /* 10baseT-FD */
++				   0x0303, 0x006D, /* 100baseTx, CSR12 0x03 */
++				   0x0305, 0x006D, /* 100baseTx-FD CSR12 0x03 */}},
++  {"Accton EN1207", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x051F,
++				  0x1B01, 0x0000, /* 10base2,   CSR12 0x1B */
++				  0x0B00, 0x009E, /* 10baseT,   CSR12 0x0B */
++				  0x0B04, 0x009E, /* 10baseT-FD,CSR12 0x0B */
++				  0x1B03, 0x006D, /* 100baseTx, CSR12 0x1B */
++				  0x1B05, 0x006D, /* 100baseTx-FD CSR12 0x1B */
++   }},
++  {"NetWinder", 0x00, 0x10, 0x57,
++	/* Default media = MII
++	 * MII block, reset sequence (3) = 0x0821 0x0000 0x0001, capabilities 0x01e1
++	 */
++	{ 0x1e00, 0x0000, 0x000b, 0x8f01, 0x0103, 0x0300, 0x0821, 0x000, 0x0001, 0x0000, 0x01e1 }
++  },
++  {0, 0, 0, 0, {}}};
++
++
++static const char *block_name[] = {
++	"21140 non-MII",
++	"21140 MII PHY",
++	"21142 Serial PHY",
++	"21142 MII PHY",
++	"21143 SYM PHY",
++	"21143 reset method"
++};
++
++
++void tulip_parse_eeprom(/*RTnet*/struct rtnet_device *rtdev)
++{
++	/* The last media info list parsed, for multiport boards.  */
++	static struct mediatable *last_mediatable;
++	static unsigned char *last_ee_data;
++	static int controller_index;
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	unsigned char *ee_data = tp->eeprom;
++	int i;
++
++	tp->mtable = 0;
++	/* Detect an old-style (SA only) EEPROM layout:
++	   memcmp(eedata, eedata+16, 8). */
++	for (i = 0; i < 8; i ++)
++		if (ee_data[i] != ee_data[16+i])
++			break;
++	if (i >= 8) {
++		if (ee_data[0] == 0xff) {
++			if (last_mediatable) {
++				controller_index++;
++				/*RTnet*/rtdm_printk(KERN_INFO "%s:  Controller %d of multiport board.\n",
++					   rtdev->name, controller_index);
++				tp->mtable = last_mediatable;
++				ee_data = last_ee_data;
++				goto subsequent_board;
++			} else
++				/*RTnet*/rtdm_printk(KERN_INFO "%s:  Missing EEPROM, this interface may "
++					   "not work correctly!\n",
++			   rtdev->name);
++			return;
++		}
++	  /* Do a fix-up based on the vendor half of the station address prefix. */
++	  for (i = 0; eeprom_fixups[i].name; i++) {
++		if (rtdev->dev_addr[0] == eeprom_fixups[i].addr0
++			&&  rtdev->dev_addr[1] == eeprom_fixups[i].addr1
++			&&  rtdev->dev_addr[2] == eeprom_fixups[i].addr2) {
++		  if (rtdev->dev_addr[2] == 0xE8  &&  ee_data[0x1a] == 0x55)
++			  i++;			/* An Accton EN1207, not an outlaw Maxtech. */
++		  memcpy(ee_data + 26, eeprom_fixups[i].newtable,
++				 sizeof(eeprom_fixups[i].newtable));
++		  /*RTnet*/rtdm_printk(KERN_INFO "%s: Old format EEPROM on '%s' board.  Using"
++				 " substitute media control info.\n",
++				 rtdev->name, eeprom_fixups[i].name);
++		  break;
++		}
++	  }
++	  if (eeprom_fixups[i].name == NULL) { /* No fixup found. */
++		  /*RTnet*/rtdm_printk(KERN_INFO "%s: Old style EEPROM with no media selection "
++				 "information.\n",
++			   rtdev->name);
++		return;
++	  }
++	}
++
++	controller_index = 0;
++	if (ee_data[19] > 1) {		/* Multiport board. */
++		last_ee_data = ee_data;
++	}
++subsequent_board:
++
++	if (ee_data[27] == 0) {		/* No valid media table. */
++	} else if (tp->chip_id == DC21041) {
++		unsigned char *p = (void *)ee_data + ee_data[27 + controller_index*3];
++		int media = get_u16(p);
++		int count = p[2];
++		p += 3;
++
++		/*RTnet*/rtdm_printk(KERN_INFO "%s: 21041 Media table, default media %4.4x (%s).\n",
++			   rtdev->name, media,
++			   media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]);
++		for (i = 0; i < count; i++) {
++			unsigned char media_block = *p++;
++			int media_code = media_block & MEDIA_MASK;
++			if (media_block & 0x40)
++				p += 6;
++			/*RTnet*/rtdm_printk(KERN_INFO "%s:  21041 media #%d, %s.\n",
++				   rtdev->name, media_code, medianame[media_code]);
++		}
++	} else {
++		unsigned char *p = (void *)ee_data + ee_data[27];
++		unsigned char csr12dir = 0;
++		int count, new_advertise = 0;
++		struct mediatable *mtable;
++		u16 media = get_u16(p);
++
++		p += 2;
++		if (tp->flags & CSR12_IN_SROM)
++			csr12dir = *p++;
++		count = *p++;
++
++	        /* there is no phy information, don't even try to build mtable */
++	        if (count == 0) {
++			if (tulip_debug > 0)
++				/*RTnet*/rtdm_printk(KERN_WARNING "%s: no phy info, aborting mtable build\n", rtdev->name);
++		        return;
++		}
++
++		mtable = (struct mediatable *)
++		    kmalloc(sizeof(struct mediatable) + count*sizeof(struct medialeaf), GFP_KERNEL);
++
++		if (mtable == NULL)
++			return;				/* Horrible, impossible failure. */
++		last_mediatable = tp->mtable = mtable;
++		mtable->defaultmedia = media;
++		mtable->leafcount = count;
++		mtable->csr12dir = csr12dir;
++		mtable->has_nonmii = mtable->has_mii = mtable->has_reset = 0;
++		mtable->csr15dir = mtable->csr15val = 0;
++
++		/*RTnet*/rtdm_printk(KERN_INFO "%s:  EEPROM default media type %s.\n", rtdev->name,
++			   media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]);
++		for (i = 0; i < count; i++) {
++			struct medialeaf *leaf = &mtable->mleaf[i];
++
++			if ((p[0] & 0x80) == 0) { /* 21140 Compact block. */
++				leaf->type = 0;
++				leaf->media = p[0] & 0x3f;
++				leaf->leafdata = p;
++				if ((p[2] & 0x61) == 0x01)	/* Bogus, but Znyx boards do it. */
++					mtable->has_mii = 1;
++				p += 4;
++			} else {
++				leaf->type = p[1];
++				if (p[1] == 0x05) {
++					mtable->has_reset = i;
++					leaf->media = p[2] & 0x0f;
++				} else if (tp->chip_id == DM910X && p[1] == 0x80) {
++					/* Hack to ignore Davicom delay period block */
++					mtable->leafcount--;
++					count--;
++					i--;
++					leaf->leafdata = p + 2;
++					p += (p[0] & 0x3f) + 1;
++					continue;
++				} else if (p[1] & 1) {
++					int gpr_len, reset_len;
++
++					mtable->has_mii = 1;
++					leaf->media = 11;
++					gpr_len=p[3]*2;
++					reset_len=p[4+gpr_len]*2;
++					new_advertise |= get_u16(&p[7+gpr_len+reset_len]);
++				} else {
++					mtable->has_nonmii = 1;
++					leaf->media = p[2] & MEDIA_MASK;
++					/* Davicom's media number for 100BaseTX is strange */
++					if (tp->chip_id == DM910X && leaf->media == 1)
++						leaf->media = 3;
++					switch (leaf->media) {
++					case 0: new_advertise |= 0x0020; break;
++					case 4: new_advertise |= 0x0040; break;
++					case 3: new_advertise |= 0x0080; break;
++					case 5: new_advertise |= 0x0100; break;
++					case 6: new_advertise |= 0x0200; break;
++					}
++					if (p[1] == 2  &&  leaf->media == 0) {
++						if (p[2] & 0x40) {
++							u32 base15 = get_unaligned((u16*)&p[7]);
++							mtable->csr15dir =
++								(get_unaligned((u16*)&p[9])<<16) + base15;
++							mtable->csr15val =
++								(get_unaligned((u16*)&p[11])<<16) + base15;
++						} else {
++							mtable->csr15dir = get_unaligned((u16*)&p[3])<<16;
++							mtable->csr15val = get_unaligned((u16*)&p[5])<<16;
++						}
++					}
++				}
++				leaf->leafdata = p + 2;
++				p += (p[0] & 0x3f) + 1;
++			}
++			if (tulip_debug > 1  &&  leaf->media == 11) {
++				unsigned char *bp = leaf->leafdata;
++				/*RTnet*/rtdm_printk(KERN_INFO "%s:  MII interface PHY %d, setup/reset "
++					   "sequences %d/%d long, capabilities %2.2x %2.2x.\n",
++					   rtdev->name, bp[0], bp[1], bp[2 + bp[1]*2],
++					   bp[5 + bp[2 + bp[1]*2]*2], bp[4 + bp[2 + bp[1]*2]*2]);
++			}
++			/*RTnet*/rtdm_printk(KERN_INFO "%s:  Index #%d - Media %s (#%d) described "
++				   "by a %s (%d) block.\n",
++				   rtdev->name, i, medianame[leaf->media & 15], leaf->media,
++				   leaf->type < ARRAY_SIZE(block_name) ? block_name[leaf->type] : "<unknown>",
++				   leaf->type);
++		}
++		if (new_advertise)
++			tp->sym_advertise = new_advertise;
++	}
++}
++/* Reading a serial EEPROM is a "bit" grungy, but we work our way through:->.*/
++
++/*  EEPROM_Ctrl bits. */
++#define EE_SHIFT_CLK	0x02	/* EEPROM shift clock. */
++#define EE_CS			0x01	/* EEPROM chip select. */
++#define EE_DATA_WRITE	0x04	/* Data from the Tulip to EEPROM. */
++#define EE_WRITE_0		0x01
++#define EE_WRITE_1		0x05
++#define EE_DATA_READ	0x08	/* Data from the EEPROM chip. */
++#define EE_ENB			(0x4800 | EE_CS)
++
++/* Delay between EEPROM clock transitions.
++   Even at 33Mhz current PCI implementations don't overrun the EEPROM clock.
++   We add a bus turn-around to insure that this remains true. */
++#define eeprom_delay()	inl(ee_addr)
++
++/* The EEPROM commands include the alway-set leading bit. */
++#define EE_READ_CMD		(6)
++
++/* Note: this routine returns extra data bits for size detection. */
++int tulip_read_eeprom(long ioaddr, int location, int addr_len)
++{
++	int i;
++	unsigned retval = 0;
++	long ee_addr = ioaddr + CSR9;
++	int read_cmd = location | (EE_READ_CMD << addr_len);
++
++	outl(EE_ENB & ~EE_CS, ee_addr);
++	outl(EE_ENB, ee_addr);
++
++	/* Shift the read command bits out. */
++	for (i = 4 + addr_len; i >= 0; i--) {
++		short dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
++		outl(EE_ENB | dataval, ee_addr);
++		eeprom_delay();
++		outl(EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
++		eeprom_delay();
++		retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0);
++	}
++	outl(EE_ENB, ee_addr);
++	eeprom_delay();
++
++	for (i = 16; i > 0; i--) {
++		outl(EE_ENB | EE_SHIFT_CLK, ee_addr);
++		eeprom_delay();
++		retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0);
++		outl(EE_ENB, ee_addr);
++		eeprom_delay();
++	}
++
++	/* Terminate the EEPROM access. */
++	outl(EE_ENB & ~EE_CS, ee_addr);
++	return retval;
++}
++
+--- linux/drivers/xenomai/net/drivers/tulip/tulip_core.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/tulip_core.c	2021-04-07 16:01:27.334634013 +0800
+@@ -0,0 +1,1403 @@
++/* tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */
++
++/*
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000-2002  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#define DRV_NAME	"tulip-rt"
++#define DRV_VERSION	"0.9.15-pre11-rt"
++#define DRV_RELDATE	"May 11, 2002"
++
++#include <linux/module.h>
++#include "tulip.h"
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/etherdevice.h>
++#include <linux/delay.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/crc32.h>
++#include <linux/uaccess.h>
++#include <asm/unaligned.h>
++
++#ifdef __sparc__
++#include <asm/pbm.h>
++#endif
++
++#include <rtnet_port.h>
++
++static char version[] =
++	"Linux Tulip driver version " DRV_VERSION " (" DRV_RELDATE ")\n";
++
++
++/* A few user-configurable values. */
++
++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
++static unsigned int max_interrupt_work = 25;
++
++#define MAX_UNITS 8
++/* Used to pass the full-duplex flag, etc. */
++static int full_duplex[MAX_UNITS];
++static int options[MAX_UNITS];
++static int mtu[MAX_UNITS];			/* Jumbo MTU for interfaces. */
++
++/*  The possible media types that can be set in options[] are: */
++const char * const medianame[32] = {
++	"10baseT", "10base2", "AUI", "100baseTx",
++	"10baseT-FDX", "100baseTx-FDX", "100baseT4", "100baseFx",
++	"100baseFx-FDX", "MII 10baseT", "MII 10baseT-FDX", "MII",
++	"10baseT(forced)", "MII 100baseTx", "MII 100baseTx-FDX", "MII 100baseT4",
++	"MII 100baseFx-HDX", "MII 100baseFx-FDX", "Home-PNA 1Mbps", "Invalid-19",
++	"","","","", "","","","",  "","","","Transceiver reset",
++};
++
++/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
++#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
++	|| defined(__sparc_) || defined(__ia64__) \
++	|| defined(__sh__) || defined(__mips__)
++static int rx_copybreak = 1518;
++#else
++static int rx_copybreak = 100;
++#endif
++
++/*
++  Set the bus performance register.
++	Typical: Set 16 longword cache alignment, no burst limit.
++	Cache alignment bits 15:14           Burst length 13:8
++		0000	No alignment  0x00000000 unlimited		0800 8 longwords
++		4000	8  longwords		0100 1 longword		1000 16 longwords
++		8000	16 longwords		0200 2 longwords	2000 32 longwords
++		C000	32  longwords		0400 4 longwords
++	Warning: many older 486 systems are broken and require setting 0x00A04800
++	   8 longword cache alignment, 8 longword burst.
++	ToDo: Non-Intel setting could be better.
++*/
++
++#if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__)
++static int csr0 = 0x01A00000 | 0xE000;
++#elif defined(__i386__) || defined(__powerpc__)
++static int csr0 = 0x01A00000 | 0x8000;
++#elif defined(__sparc__) || defined(__hppa__)
++/* The UltraSparc PCI controllers will disconnect at every 64-byte
++ * crossing anyways so it makes no sense to tell Tulip to burst
++ * any more than that.
++ */
++static int csr0 = 0x01A00000 | 0x9000;
++#elif defined(__arm__) || defined(__sh__)
++static int csr0 = 0x01A00000 | 0x4800;
++#elif defined(__mips__)
++static int csr0 = 0x00200000 | 0x4000;
++#else
++#warning Processor architecture undefined!
++static int csr0 = 0x00A00000 | 0x4800;
++#endif
++
++/* Operational parameters that usually are not changed. */
++/* Time in jiffies before concluding the transmitter is hung. */
++#define TX_TIMEOUT  (4*HZ)
++
++
++MODULE_AUTHOR("The Linux Kernel Team");
++MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver");
++MODULE_LICENSE("GPL");
++module_param(tulip_debug, int, 0444);
++module_param(max_interrupt_work, int, 0444);
++/*MODULE_PARM(rx_copybreak, "i");*/
++module_param(csr0, int, 0444);
++module_param_array(options, int, NULL, 0444);
++module_param_array(full_duplex, int, NULL, 0444);
++
++#define PFX DRV_NAME ": "
++
++#ifdef TULIP_DEBUG
++int tulip_debug = TULIP_DEBUG;
++#else
++int tulip_debug = 1;
++#endif
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++
++
++
++/*
++ * This table use during operation for capabilities and media timer.
++ *
++ * It is indexed via the values in 'enum chips'
++ */
++
++struct tulip_chip_table tulip_tbl[] = {
++  /* DC21040 */
++  { "Digital DC21040 Tulip", 128, 0x0001ebef, 0 },
++
++  /* DC21041 */
++  { "Digital DC21041 Tulip", 128, 0x0001ebef,
++	HAS_MEDIA_TABLE | HAS_NWAY },
++
++  /* DC21140 */
++  { "Digital DS21140 Tulip", 128, 0x0001ebef,
++	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_PCI_MWI },
++
++  /* DC21142, DC21143 */
++  { "Digital DS21143 Tulip", 128, 0x0801fbff,
++	HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY
++	| HAS_INTR_MITIGATION | HAS_PCI_MWI },
++
++  /* LC82C168 */
++  { "Lite-On 82c168 PNIC", 256, 0x0001fbef,
++	HAS_MII | HAS_PNICNWAY },
++
++  /* MX98713 */
++  { "Macronix 98713 PMAC", 128, 0x0001ebef,
++	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM },
++
++  /* MX98715 */
++  { "Macronix 98715 PMAC", 256, 0x0001ebef,
++	HAS_MEDIA_TABLE },
++
++  /* MX98725 */
++  { "Macronix 98725 PMAC", 256, 0x0001ebef,
++	HAS_MEDIA_TABLE },
++
++  /* AX88140 */
++  { "ASIX AX88140", 128, 0x0001fbff,
++	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | MC_HASH_ONLY | IS_ASIX },
++
++  /* PNIC2 */
++  { "Lite-On PNIC-II", 256, 0x0801fbff,
++	HAS_MII | HAS_NWAY | HAS_8023X | HAS_PCI_MWI },
++
++  /* COMET */
++  { "ADMtek Comet", 256, 0x0001abef,
++	MC_HASH_ONLY | COMET_MAC_ADDR },
++
++  /* COMPEX9881 */
++  { "Compex 9881 PMAC", 128, 0x0001ebef,
++	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM },
++
++  /* I21145 */
++  { "Intel DS21145 Tulip", 128, 0x0801fbff,
++	HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI
++	| HAS_NWAY | HAS_PCI_MWI },
++
++  /* DM910X */
++  { "Davicom DM9102/DM9102A", 128, 0x0001ebef,
++	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI },
++};
++
++
++static struct pci_device_id tulip_pci_tbl[] = {
++	{ 0x1011, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21040 },
++	{ 0x1011, 0x0014, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21041 },
++	{ 0x1011, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21140 },
++	{ 0x1011, 0x0019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21143 },
++	{ 0x11AD, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, LC82C168 },
++	{ 0x10d9, 0x0512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98713 },
++	{ 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
++/*	{ 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98725 },*/
++	{ 0x125B, 0x1400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AX88140 },
++	{ 0x11AD, 0xc115, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PNIC2 },
++	{ 0x1317, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x1317, 0x0985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x1317, 0x1985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x1317, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x13D1, 0xAB02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x13D1, 0xAB03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x13D1, 0xAB08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x104A, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x104A, 0x2774, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 },
++	{ 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 },
++	{ 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
++	{ 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
++	{ 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
++	{ 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
++	{ } /* terminate list */
++};
++MODULE_DEVICE_TABLE(pci, tulip_pci_tbl);
++
++
++/* A full-duplex map for media types. */
++const char tulip_media_cap[32] =
++{0,0,0,16,  3,19,16,24,  27,4,7,5, 0,20,23,20,  28,31,0,0, };
++u8 t21040_csr13[] = {2,0x0C,8,4,  4,0,0,0, 0,0,0,0, 4,0,0,0};
++
++/* 21041 transceiver register settings: 10-T, 10-2, AUI, 10-T, 10T-FD*/
++u16 t21041_csr13[] = {
++	csr13_mask_10bt,		/* 10-T */
++	csr13_mask_auibnc,		/* 10-2 */
++	csr13_mask_auibnc,		/* AUI */
++	csr13_mask_10bt,		/* 10-T */
++	csr13_mask_10bt,		/* 10T-FD */
++};
++u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, };
++u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, };
++
++
++static void tulip_init_ring(/*RTnet*/struct rtnet_device *rtdev);
++static int tulip_start_xmit(struct /*RTnet*/rtskb *skb, /*RTnet*/struct rtnet_device *rtdev);
++static int tulip_open(/*RTnet*/struct rtnet_device *rtdev);
++static int tulip_close(/*RTnet*/struct rtnet_device *rtdev);
++static void tulip_up(/*RTnet*/struct rtnet_device *rtdev);
++static void tulip_down(/*RTnet*/struct rtnet_device *rtdev);
++static struct net_device_stats *tulip_get_stats(struct rtnet_device *rtdev);
++//static void set_rx_mode(struct net_device *dev);
++
++
++static void tulip_set_power_state (struct tulip_private *tp,
++				   int sleep, int snooze)
++{
++	if (tp->flags & HAS_ACPI) {
++		u32 tmp, newtmp;
++		pci_read_config_dword (tp->pdev, CFDD, &tmp);
++		newtmp = tmp & ~(CFDD_Sleep | CFDD_Snooze);
++		if (sleep)
++			newtmp |= CFDD_Sleep;
++		else if (snooze)
++			newtmp |= CFDD_Snooze;
++		if (tmp != newtmp)
++			pci_write_config_dword (tp->pdev, CFDD, newtmp);
++	}
++
++}
++
++static void tulip_up(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int i;
++
++	/* Wake the chip from sleep/snooze mode. */
++	tulip_set_power_state (tp, 0, 0);
++
++	/* On some chip revs we must set the MII/SYM port before the reset!? */
++	if (tp->mii_cnt  ||  (tp->mtable  &&  tp->mtable->has_mii))
++		outl(0x00040000, ioaddr + CSR6);
++
++	/* Reset the chip, holding bit 0 set at least 50 PCI cycles. */
++	outl(0x00000001, ioaddr + CSR0);
++	udelay(100);
++
++	/* Deassert reset.
++	   Wait the specified 50 PCI cycles after a reset by initializing
++	   Tx and Rx queues and the address filter list. */
++	outl(tp->csr0, ioaddr + CSR0);
++	udelay(100);
++
++	if (tulip_debug > 1)
++		printk(KERN_DEBUG "%s: tulip_up(), irq==%d.\n", rtdev->name, rtdev->irq);
++
++	outl(tp->rx_ring_dma, ioaddr + CSR3);
++	outl(tp->tx_ring_dma, ioaddr + CSR4);
++	tp->cur_rx = tp->cur_tx = 0;
++	tp->dirty_rx = tp->dirty_tx = 0;
++
++	if (tp->flags & MC_HASH_ONLY) {
++		u32 addr_low = cpu_to_le32(get_unaligned((u32 *)rtdev->dev_addr));
++		u32 addr_high = cpu_to_le32(get_unaligned((u16 *)(rtdev->dev_addr+4)));
++		if (tp->chip_id == AX88140) {
++			outl(0, ioaddr + CSR13);
++			outl(addr_low,  ioaddr + CSR14);
++			outl(1, ioaddr + CSR13);
++			outl(addr_high, ioaddr + CSR14);
++		} else if (tp->flags & COMET_MAC_ADDR) {
++			outl(addr_low,  ioaddr + 0xA4);
++			outl(addr_high, ioaddr + 0xA8);
++			outl(0, ioaddr + 0xAC);
++			outl(0, ioaddr + 0xB0);
++		}
++	} else {
++		/* This is set_rx_mode(), but without starting the transmitter. */
++		u16 *eaddrs = (u16 *)rtdev->dev_addr;
++		u16 *setup_frm = &tp->setup_frame[15*6];
++		dma_addr_t mapping;
++
++		/* 21140 bug: you must add the broadcast address. */
++		memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame));
++		/* Fill the final entry of the table with our physical address. */
++		*setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
++		*setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
++		*setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
++
++		mapping = pci_map_single(tp->pdev, tp->setup_frame,
++					 sizeof(tp->setup_frame),
++					 PCI_DMA_TODEVICE);
++		tp->tx_buffers[tp->cur_tx].skb = NULL;
++		tp->tx_buffers[tp->cur_tx].mapping = mapping;
++
++		/* Put the setup frame on the Tx list. */
++		tp->tx_ring[tp->cur_tx].length = cpu_to_le32(0x08000000 | 192);
++		tp->tx_ring[tp->cur_tx].buffer1 = cpu_to_le32(mapping);
++		tp->tx_ring[tp->cur_tx].status = cpu_to_le32(DescOwned);
++
++		tp->cur_tx++;
++	}
++
++	tp->saved_if_port = rtdev->if_port;
++	if (rtdev->if_port == 0)
++		rtdev->if_port = tp->default_port;
++
++	/* Allow selecting a default media. */
++	i = 0;
++	if (tp->mtable == NULL)
++		goto media_picked;
++	if (rtdev->if_port) {
++		int looking_for = tulip_media_cap[rtdev->if_port] & MediaIsMII ? 11 :
++			(rtdev->if_port == 12 ? 0 : rtdev->if_port);
++		for (i = 0; i < tp->mtable->leafcount; i++)
++			if (tp->mtable->mleaf[i].media == looking_for) {
++				printk(KERN_INFO "%s: Using user-specified media %s.\n",
++					   rtdev->name, medianame[rtdev->if_port]);
++				goto media_picked;
++			}
++	}
++	if ((tp->mtable->defaultmedia & 0x0800) == 0) {
++		int looking_for = tp->mtable->defaultmedia & MEDIA_MASK;
++		for (i = 0; i < tp->mtable->leafcount; i++)
++			if (tp->mtable->mleaf[i].media == looking_for) {
++				printk(KERN_INFO "%s: Using EEPROM-set media %s.\n",
++					   rtdev->name, medianame[looking_for]);
++				goto media_picked;
++			}
++	}
++	/* Start sensing first non-full-duplex media. */
++	for (i = tp->mtable->leafcount - 1;
++		 (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--)
++		;
++media_picked:
++
++	tp->csr6 = 0;
++	tp->cur_index = i;
++	tp->nwayset = 0;
++
++	if (rtdev->if_port) {
++		if (tp->chip_id == DC21143  &&
++		    (tulip_media_cap[rtdev->if_port] & MediaIsMII)) {
++			/* We must reset the media CSRs when we force-select MII mode. */
++			outl(0x0000, ioaddr + CSR13);
++			outl(0x0000, ioaddr + CSR14);
++			outl(0x0008, ioaddr + CSR15);
++		}
++		tulip_select_media(rtdev, 1);
++	} else if (tp->chip_id == DC21041) {
++		rtdev->if_port = 0;
++		tp->nway = tp->mediasense = 1;
++		tp->nwayset = tp->lpar = 0;
++		outl(0x00000000, ioaddr + CSR13);
++		outl(0xFFFFFFFF, ioaddr + CSR14);
++		outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */
++		tp->csr6 = 0x80020000;
++		if (tp->sym_advertise & 0x0040)
++			tp->csr6 |= FullDuplex;
++		outl(tp->csr6, ioaddr + CSR6);
++		outl(0x0000EF01, ioaddr + CSR13);
++
++	} else if (tp->chip_id == DC21142) {
++		if (tp->mii_cnt) {
++			tulip_select_media(rtdev, 1);
++			if (tulip_debug > 1)
++				printk(KERN_INFO "%s: Using MII transceiver %d, status %4.4x.\n",
++					   rtdev->name, tp->phys[0], tulip_mdio_read(rtdev, tp->phys[0], 1));
++			outl(csr6_mask_defstate, ioaddr + CSR6);
++			tp->csr6 = csr6_mask_hdcap;
++			rtdev->if_port = 11;
++			outl(0x0000, ioaddr + CSR13);
++			outl(0x0000, ioaddr + CSR14);
++		} else
++			t21142_start_nway(rtdev);
++	} else if (tp->chip_id == PNIC2) {
++		/* for initial startup advertise 10/100 Full and Half */
++		tp->sym_advertise = 0x01E0;
++		/* enable autonegotiate end interrupt */
++		outl(inl(ioaddr+CSR5)| 0x00008010, ioaddr + CSR5);
++		outl(inl(ioaddr+CSR7)| 0x00008010, ioaddr + CSR7);
++		pnic2_start_nway(rtdev);
++	} else if (tp->chip_id == LC82C168  &&  ! tp->medialock) {
++		if (tp->mii_cnt) {
++			rtdev->if_port = 11;
++			tp->csr6 = 0x814C0000 | (tp->full_duplex ? 0x0200 : 0);
++			outl(0x0001, ioaddr + CSR15);
++		} else if (inl(ioaddr + CSR5) & TPLnkPass)
++			pnic_do_nway(rtdev);
++		else {
++			/* Start with 10mbps to do autonegotiation. */
++			outl(0x32, ioaddr + CSR12);
++			tp->csr6 = 0x00420000;
++			outl(0x0001B078, ioaddr + 0xB8);
++			outl(0x0201B078, ioaddr + 0xB8);
++		}
++	} else if ((tp->chip_id == MX98713 || tp->chip_id == COMPEX9881)
++			   && ! tp->medialock) {
++		rtdev->if_port = 0;
++		tp->csr6 = 0x01880000 | (tp->full_duplex ? 0x0200 : 0);
++		outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80);
++	} else if (tp->chip_id == MX98715 || tp->chip_id == MX98725) {
++		/* Provided by BOLO, Macronix - 12/10/1998. */
++		rtdev->if_port = 0;
++		tp->csr6 = 0x01a80200;
++		outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80);
++		outl(0x11000 | inw(ioaddr + 0xa0), ioaddr + 0xa0);
++	} else if (tp->chip_id == COMET) {
++		/* Enable automatic Tx underrun recovery. */
++		outl(inl(ioaddr + 0x88) | 1, ioaddr + 0x88);
++		rtdev->if_port = tp->mii_cnt ? 11 : 0;
++		tp->csr6 = 0x00040000;
++	} else if (tp->chip_id == AX88140) {
++		tp->csr6 = tp->mii_cnt ? 0x00040100 : 0x00000100;
++	} else
++		tulip_select_media(rtdev, 1);
++
++	/* Start the chip's Tx to process setup frame. */
++	tulip_stop_rxtx(tp);
++	barrier();
++	udelay(5);
++	outl(tp->csr6 | TxOn, ioaddr + CSR6);
++
++	/* Enable interrupts by setting the interrupt mask. */
++	outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5);
++	outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
++	tulip_start_rxtx(tp);
++	outl(0, ioaddr + CSR2);		/* Rx poll demand */
++
++	if (tulip_debug > 2) {
++		printk(KERN_DEBUG "%s: Done tulip_up(), CSR0 %8.8x, CSR5 %8.8x CSR6 %8.8x.\n",
++			   rtdev->name, inl(ioaddr + CSR0), inl(ioaddr + CSR5),
++			   inl(ioaddr + CSR6));
++	}
++}
++
++
++static int
++tulip_open(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int retval;
++
++	if ((retval = /*RTnet*/rtdm_irq_request(&tp->irq_handle, rtdev->irq,
++						tulip_interrupt, 0, "rt_tulip",
++						rtdev))) {
++		printk("%s: Unable to install ISR for IRQ %d\n",
++			  rtdev->name,rtdev->irq);
++		return retval;
++	}
++
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	tulip_init_ring (rtdev);
++
++	tulip_up (rtdev);
++
++	rtnetif_start_queue (rtdev);
++
++	return 0;
++}
++
++/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
++static void tulip_init_ring(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int i;
++
++	tp->susp_rx = 0;
++	tp->ttimer = 0;
++	tp->nir = 0;
++
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		tp->rx_ring[i].status = 0x00000000;
++		tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ);
++		tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1));
++		tp->rx_buffers[i].skb = NULL;
++		tp->rx_buffers[i].mapping = 0;
++	}
++	/* Mark the last entry as wrapping the ring. */
++	tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP);
++	tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma);
++
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		dma_addr_t mapping;
++
++		/* Note the receive buffer must be longword aligned.
++		   dev_alloc_skb() provides 16 byte alignment.  But do *not*
++		   use skb_reserve() to align the IP header! */
++		struct /*RTnet*/rtskb *skb = /*RTnet*/rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ);
++		tp->rx_buffers[i].skb = skb;
++		if (skb == NULL)
++			break;
++		mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++		tp->rx_buffers[i].mapping = mapping;
++		tp->rx_ring[i].status = cpu_to_le32(DescOwned);	/* Owned by Tulip chip */
++		tp->rx_ring[i].buffer1 = cpu_to_le32(mapping);
++	}
++	tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
++
++	/* The Tx buffer descriptor is filled in as needed, but we
++	   do need to clear the ownership bit. */
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		tp->tx_buffers[i].skb = NULL;
++		tp->tx_buffers[i].mapping = 0;
++		tp->tx_ring[i].status = 0x00000000;
++		tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1));
++	}
++	tp->tx_ring[i-1].buffer2 = cpu_to_le32(tp->tx_ring_dma);
++}
++
++static int
++tulip_start_xmit(struct /*RTnet*/rtskb *skb, /*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int entry;
++	u32 flag;
++	dma_addr_t mapping;
++	/*RTnet*/
++	rtdm_lockctx_t context;
++
++
++	rtdm_lock_get_irqsave(&tp->lock, context);
++
++	/* TODO: move to rtdev_xmit, use queue */
++	if (rtnetif_queue_stopped(rtdev)) {
++		dev_kfree_rtskb(skb);
++		tp->stats.tx_dropped++;
++
++		rtdm_lock_put_irqrestore(&tp->lock, context);
++		return 0;
++	}
++	/*RTnet*/
++
++	/* Calculate the next Tx descriptor entry. */
++	entry = tp->cur_tx % TX_RING_SIZE;
++
++	tp->tx_buffers[entry].skb = skb;
++	mapping = pci_map_single(tp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
++	tp->tx_buffers[entry].mapping = mapping;
++	tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping);
++
++	if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */
++		flag = 0x60000000; /* No interrupt */
++	} else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) {
++		flag = 0xe0000000; /* Tx-done intr. */
++	} else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) {
++		flag = 0x60000000; /* No Tx-done intr. */
++	} else {		/* Leave room for set_rx_mode() to fill entries. */
++		flag = 0xe0000000; /* Tx-done intr. */
++		rtnetif_stop_queue(rtdev);
++	}
++	if (entry == TX_RING_SIZE-1)
++		flag = 0xe0000000 | DESC_RING_WRAP;
++
++	tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag);
++	/* if we were using Transmit Automatic Polling, we would need a
++	 * wmb() here. */
++	tp->tx_ring[entry].status = cpu_to_le32(DescOwned);
++
++	/*RTnet*/
++	/* get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++	/*RTnet*/
++
++	wmb();
++
++	tp->cur_tx++;
++
++	/* Trigger an immediate transmit demand. */
++	outl(0, rtdev->base_addr + CSR1);
++
++	/*RTnet*/
++	rtdm_lock_put_irqrestore(&tp->lock, context);
++	/*RTnet*/
++
++	return 0;
++}
++
++static void tulip_clean_tx_ring(struct tulip_private *tp)
++{
++	unsigned int dirty_tx;
++
++	for (dirty_tx = tp->dirty_tx ; tp->cur_tx - dirty_tx > 0;
++		dirty_tx++) {
++		int entry = dirty_tx % TX_RING_SIZE;
++		int status = le32_to_cpu(tp->tx_ring[entry].status);
++
++		if (status < 0) {
++			tp->stats.tx_errors++;	/* It wasn't Txed */
++			tp->tx_ring[entry].status = 0;
++		}
++
++		/* Check for Tx filter setup frames. */
++		if (tp->tx_buffers[entry].skb == NULL) {
++			/* test because dummy frames not mapped */
++			if (tp->tx_buffers[entry].mapping)
++				pci_unmap_single(tp->pdev,
++					tp->tx_buffers[entry].mapping,
++					sizeof(tp->setup_frame),
++					PCI_DMA_TODEVICE);
++			continue;
++		}
++
++		pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
++				tp->tx_buffers[entry].skb->len,
++				PCI_DMA_TODEVICE);
++
++		/* Free the original skb. */
++		/*RTnet*/dev_kfree_rtskb(tp->tx_buffers[entry].skb);
++		tp->tx_buffers[entry].skb = NULL;
++		tp->tx_buffers[entry].mapping = 0;
++	}
++}
++
++static struct net_device_stats *tulip_get_stats(struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *) rtdev->priv;
++	return &tp->stats;
++}
++
++static void tulip_down (/*RTnet*/struct rtnet_device *rtdev)
++{
++	long ioaddr = rtdev->base_addr;
++	struct tulip_private *tp = (struct tulip_private *) rtdev->priv;
++
++	rtdm_irq_disable(&tp->irq_handle);
++	rtdm_lock_get(&tp->lock); /* sync with IRQ handler on other cpu -JK- */
++
++	/* Disable interrupts by clearing the interrupt mask. */
++	outl (0x00000000, ioaddr + CSR7);
++
++	/* Stop the Tx and Rx processes. */
++	tulip_stop_rxtx(tp);
++
++	/* prepare receive buffers */
++	tulip_refill_rx(rtdev);
++
++	/* release any unconsumed transmit buffers */
++	tulip_clean_tx_ring(tp);
++
++	/* 21040 -- Leave the card in 10baseT state. */
++	if (tp->chip_id == DC21040)
++		outl (0x00000004, ioaddr + CSR13);
++
++	if (inl (ioaddr + CSR6) != 0xffffffff)
++		tp->stats.rx_missed_errors += inl (ioaddr + CSR8) & 0xffff;
++
++	rtdm_lock_put(&tp->lock);
++	rtdm_irq_enable(&tp->irq_handle);
++
++	rtdev->if_port = tp->saved_if_port;
++
++	/* Leave the driver in snooze, not sleep, mode. */
++	tulip_set_power_state (tp, 0, 1);
++}
++
++
++static int tulip_close (/*RTnet*/struct rtnet_device *rtdev)
++{
++	long ioaddr = rtdev->base_addr;
++	struct tulip_private *tp = (struct tulip_private *) rtdev->priv;
++	int i;
++
++	rtnetif_stop_queue (rtdev);
++
++	tulip_down (rtdev);
++
++	if (tulip_debug > 1)
++		printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
++			rtdev->name, inl (ioaddr + CSR5));
++
++	rtdm_irq_free(&tp->irq_handle);
++
++	/* Free all the skbuffs in the Rx queue. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		struct /*RTnet*/rtskb *skb = tp->rx_buffers[i].skb;
++		dma_addr_t mapping = tp->rx_buffers[i].mapping;
++
++		tp->rx_buffers[i].skb = NULL;
++		tp->rx_buffers[i].mapping = 0;
++
++		tp->rx_ring[i].status = 0;	/* Not owned by Tulip chip. */
++		tp->rx_ring[i].length = 0;
++		tp->rx_ring[i].buffer1 = 0xBADF00D0;	/* An invalid address. */
++		if (skb) {
++			pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ,
++					 PCI_DMA_FROMDEVICE);
++			/*RTnet*/dev_kfree_rtskb (skb);
++		}
++	}
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		struct /*RTnet*/rtskb *skb = tp->tx_buffers[i].skb;
++
++		if (skb != NULL) {
++			pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping,
++					 skb->len, PCI_DMA_TODEVICE);
++			/*RTnet*/dev_kfree_rtskb (skb);
++		}
++		tp->tx_buffers[i].skb = NULL;
++		tp->tx_buffers[i].mapping = 0;
++	}
++
++	rt_stack_disconnect(rtdev);
++
++	return 0;
++}
++
++#ifdef XXX_CONFIG_TULIP_MWI
++static void tulip_mwi_config (struct pci_dev *pdev,
++					struct net_device *dev)
++{
++	struct tulip_private *tp = rtdev->priv;
++	u8 cache;
++	u16 pci_command;
++	u32 csr0;
++
++	if (tulip_debug > 3)
++		printk(KERN_DEBUG "%s: tulip_mwi_config()\n", pci_name(pdev));
++
++	tp->csr0 = csr0 = 0;
++
++	/* if we have any cache line size at all, we can do MRM */
++	csr0 |= MRM;
++
++	/* ...and barring hardware bugs, MWI */
++	if (!(tp->chip_id == DC21143 && tp->revision == 65))
++		csr0 |= MWI;
++
++	/* set or disable MWI in the standard PCI command bit.
++	 * Check for the case where  mwi is desired but not available
++	 */
++	if (csr0 & MWI)	pci_set_mwi(pdev);
++	else		pci_clear_mwi(pdev);
++
++	/* read result from hardware (in case bit refused to enable) */
++	pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
++	if ((csr0 & MWI) && (!(pci_command & PCI_COMMAND_INVALIDATE)))
++		csr0 &= ~MWI;
++
++	/* if cache line size hardwired to zero, no MWI */
++	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache);
++	if ((csr0 & MWI) && (cache == 0)) {
++		csr0 &= ~MWI;
++		pci_clear_mwi(pdev);
++	}
++
++	/* assign per-cacheline-size cache alignment and
++	 * burst length values
++	 */
++	switch (cache) {
++	case 8:
++		csr0 |= MRL | (1 << CALShift) | (16 << BurstLenShift);
++		break;
++	case 16:
++		csr0 |= MRL | (2 << CALShift) | (16 << BurstLenShift);
++		break;
++	case 32:
++		csr0 |= MRL | (3 << CALShift) | (32 << BurstLenShift);
++		break;
++	default:
++		cache = 0;
++		break;
++	}
++
++	/* if we have a good cache line size, we by now have a good
++	 * csr0, so save it and exit
++	 */
++	if (cache)
++		goto out;
++
++	/* we don't have a good csr0 or cache line size, disable MWI */
++	if (csr0 & MWI) {
++		pci_clear_mwi(pdev);
++		csr0 &= ~MWI;
++	}
++
++	/* sane defaults for burst length and cache alignment
++	 * originally from de4x5 driver
++	 */
++	csr0 |= (8 << BurstLenShift) | (1 << CALShift);
++
++out:
++	tp->csr0 = csr0;
++	if (tulip_debug > 2)
++		printk(KERN_DEBUG "%s: MWI config cacheline=%d, csr0=%08x\n",
++		       pci_name(pdev), cache, csr0);
++}
++#endif
++
++
++static int tulip_init_one (struct pci_dev *pdev,
++				     const struct pci_device_id *ent)
++{
++	struct tulip_private *tp;
++	/* See note below on the multiport cards. */
++	static unsigned char last_phys_addr[6] = {0x00, 'L', 'i', 'n', 'u', 'x'};
++	static struct pci_device_id early_486_chipsets[] = {
++		{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424) },
++		{ PCI_DEVICE(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496) },
++		{ },
++	};
++#if defined(__i386__)
++	static int last_irq;
++#endif
++	u8 chip_rev;
++	unsigned int i, irq;
++	unsigned short sum;
++	u8 ee_data[EEPROM_SIZE];
++	/*RTnet*/struct rtnet_device *rtdev;
++	long ioaddr;
++	static int board_idx = -1;
++	int chip_idx = ent->driver_data;
++	unsigned int t2104x_mode = 0;
++	unsigned int eeprom_missing = 0;
++
++#ifndef MODULE
++	static int did_version;		/* Already printed version info. */
++	if (tulip_debug > 0  &&  did_version++ == 0)
++		printk(KERN_INFO "%s", version);
++#endif
++
++	board_idx++;
++
++	if (cards[board_idx] == 0)
++		return -ENODEV;
++
++	/*
++	 *	Lan media wire a tulip chip to a wan interface. Needs a very
++	 *	different driver (lmc driver)
++	 */
++
++	if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) {
++		printk(KERN_ERR PFX "skipping LMC card.\n");
++		return -ENODEV;
++	}
++
++	/*
++	 *	Early DM9100's need software CRC and the DMFE driver
++	 */
++
++	if (pdev->vendor == 0x1282 && pdev->device == 0x9100)
++	{
++		u32 dev_rev;
++		/* Read Chip revision */
++		pci_read_config_dword(pdev, PCI_REVISION_ID, &dev_rev);
++		if(dev_rev < 0x02000030)
++		{
++			printk(KERN_ERR PFX "skipping early DM9100 with Crc bug (use dmfe)\n");
++			return -ENODEV;
++		}
++	}
++
++	/*
++	 *	Looks for early PCI chipsets where people report hangs
++	 *	without the workarounds being on.
++	 */
++
++	/* 1. Intel Saturn. Switch to 8 long words burst, 8 long word cache
++	      aligned.  Aries might need this too. The Saturn errata are not
++	      pretty reading but thankfully it's an old 486 chipset.
++
++	   2. The dreaded SiS496 486 chipset. Same workaround as Intel
++	      Saturn.
++	 */
++
++	if (pci_dev_present(early_486_chipsets))
++		csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift);
++
++	/* bugfix: the ASIX must have a burst limit or horrible things happen. */
++	if (chip_idx == AX88140) {
++		if ((csr0 & 0x3f00) == 0)
++			csr0 |= 0x2000;
++	}
++
++	/* PNIC doesn't have MWI/MRL/MRM... */
++	if (chip_idx == LC82C168)
++		csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */
++
++	/* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */
++	if (pdev->vendor == 0x1282 && pdev->device == 0x9102)
++		csr0 &= ~0x01f100ff;
++
++#if defined(__sparc__)
++	/* DM9102A needs 32-dword alignment/burst length on sparc - chip bug? */
++	if (pdev->vendor == 0x1282 && pdev->device == 0x9102)
++		csr0 = (csr0 & ~0xff00) | 0xe000;
++#endif
++
++	/*
++	 *	And back to business
++	 */
++
++	i = pci_enable_device(pdev);
++	if (i) {
++		printk(KERN_ERR PFX
++			"Cannot enable tulip board #%d, aborting\n",
++			board_idx);
++		return i;
++	}
++
++	ioaddr = pci_resource_start (pdev, 0);
++	irq = pdev->irq;
++
++	/* alloc_etherdev ensures aligned and zeroed private structures */
++	rtdev = /*RTnet*/rt_alloc_etherdev (sizeof (*tp),
++					RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (!rtdev) {
++		printk(KERN_ERR PFX "ether device alloc failed, aborting\n");
++		return -ENOMEM;
++	}
++	//rtdev_alloc_name(rtdev, "eth%d");//Done by register_rtdev()
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++	rtdev->sysbind = &pdev->dev;
++
++	if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
++		printk(KERN_ERR PFX "%s: I/O region (0x%llx@0x%llx) too small, "
++			"aborting\n", pci_name(pdev),
++			(unsigned long long)pci_resource_len (pdev, 0),
++			(unsigned long long)pci_resource_start (pdev, 0));
++		goto err_out_free_netdev;
++	}
++
++	/* grab all resources from both PIO and MMIO regions, as we
++	 * don't want anyone else messing around with our hardware */
++	if (pci_request_regions (pdev, "tulip"))
++		goto err_out_free_netdev;
++
++#ifndef USE_IO_OPS
++	ioaddr = (unsigned long) ioremap (pci_resource_start (pdev, 1),
++					  tulip_tbl[chip_idx].io_size);
++	if (!ioaddr)
++		goto err_out_free_res;
++#endif
++
++	pci_read_config_byte (pdev, PCI_REVISION_ID, &chip_rev);
++
++	/*
++	 * initialize private data structure 'tp'
++	 * it is zeroed and aligned in alloc_etherdev
++	 */
++	tp = rtdev->priv;
++
++	tp->rx_ring = pci_alloc_consistent(pdev,
++					   sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
++					   sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
++					   &tp->rx_ring_dma);
++	if (!tp->rx_ring)
++		goto err_out_mtable;
++	tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE);
++	tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE;
++
++	tp->chip_id = chip_idx;
++	tp->flags = tulip_tbl[chip_idx].flags;
++	tp->pdev = pdev;
++	tp->base_addr = ioaddr;
++	tp->revision = chip_rev;
++	tp->csr0 = csr0;
++	rtdm_lock_init(&tp->lock);
++	spin_lock_init(&tp->mii_lock);
++
++	rtdev->base_addr = ioaddr;
++	rtdev->irq = irq;
++
++#ifdef XXX_CONFIG_TULIP_MWI
++	if (!force_csr0 && (tp->flags & HAS_PCI_MWI))
++		tulip_mwi_config (pdev, rtdev);
++#else
++	/* MWI is broken for DC21143 rev 65... */
++	if (chip_idx == DC21143 && chip_rev == 65)
++		tp->csr0 &= ~MWI;
++#endif
++
++	/* Stop the chip's Tx and Rx processes. */
++	tulip_stop_rxtx(tp);
++
++	pci_set_master(pdev);
++
++	/* Clear the missed-packet counter. */
++	inl(ioaddr + CSR8);
++
++	if (chip_idx == DC21041) {
++		if (inl(ioaddr + CSR9) & 0x8000) {
++			chip_idx = DC21040;
++			t2104x_mode = 1;
++		} else {
++			t2104x_mode = 2;
++		}
++	}
++
++	/* The station address ROM is read byte serially.  The register must
++	   be polled, waiting for the value to be read bit serially from the
++	   EEPROM.
++	   */
++	sum = 0;
++	if (chip_idx == DC21040) {
++		outl(0, ioaddr + CSR9);		/* Reset the pointer with a dummy write. */
++		for (i = 0; i < 6; i++) {
++			int value, boguscnt = 100000;
++			do
++				value = inl(ioaddr + CSR9);
++			while (value < 0  && --boguscnt > 0);
++			rtdev->dev_addr[i] = value;
++			sum += value & 0xff;
++		}
++	} else if (chip_idx == LC82C168) {
++		for (i = 0; i < 3; i++) {
++			int value, boguscnt = 100000;
++			outl(0x600 | i, ioaddr + 0x98);
++			do
++				value = inl(ioaddr + CSR9);
++			while (value < 0  && --boguscnt > 0);
++			put_unaligned(le16_to_cpu(value), ((u16*)rtdev->dev_addr) + i);
++			sum += value & 0xffff;
++		}
++	} else if (chip_idx == COMET) {
++		/* No need to read the EEPROM. */
++		put_unaligned(inl(ioaddr + 0xA4), (u32 *)rtdev->dev_addr);
++		put_unaligned(inl(ioaddr + 0xA8), (u16 *)(rtdev->dev_addr + 4));
++		for (i = 0; i < 6; i ++)
++			sum += rtdev->dev_addr[i];
++	} else {
++		/* A serial EEPROM interface, we read now and sort it out later. */
++		int sa_offset = 0;
++		int ee_addr_size = tulip_read_eeprom(ioaddr, 0xff, 8) & 0x40000 ? 8 : 6;
++
++		for (i = 0; i < sizeof(ee_data)/2; i++)
++			((u16 *)ee_data)[i] =
++				le16_to_cpu(tulip_read_eeprom(ioaddr, i, ee_addr_size));
++
++		/* DEC now has a specification (see Notes) but early board makers
++		   just put the address in the first EEPROM locations. */
++		/* This does  memcmp(eedata, eedata+16, 8) */
++		for (i = 0; i < 8; i ++)
++			if (ee_data[i] != ee_data[16+i])
++				sa_offset = 20;
++		if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&  ee_data[2] == 0)
++			sa_offset = 2;		/* Grrr, damn Matrox boards. */
++#ifdef CONFIG_DDB5476
++		if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 6)) {
++			/* DDB5476 MAC address in first EEPROM locations. */
++		       sa_offset = 0;
++		       /* No media table either */
++		       tp->flags &= ~HAS_MEDIA_TABLE;
++	       }
++#endif
++#ifdef CONFIG_DDB5477
++	       if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 4)) {
++		       /* DDB5477 MAC address in first EEPROM locations. */
++		       sa_offset = 0;
++		       /* No media table either */
++		       tp->flags &= ~HAS_MEDIA_TABLE;
++	       }
++#endif
++#ifdef CONFIG_MIPS_COBALT
++	       if ((pdev->bus->number == 0) &&
++		   ((PCI_SLOT(pdev->devfn) == 7) ||
++		    (PCI_SLOT(pdev->devfn) == 12))) {
++		       /* Cobalt MAC address in first EEPROM locations. */
++		       sa_offset = 0;
++		       /* No media table either */
++		       tp->flags &= ~HAS_MEDIA_TABLE;
++	       }
++#endif
++		for (i = 0; i < 6; i ++) {
++			rtdev->dev_addr[i] = ee_data[i + sa_offset];
++			sum += ee_data[i + sa_offset];
++		}
++	}
++	/* Lite-On boards have the address byte-swapped. */
++	if ((rtdev->dev_addr[0] == 0xA0  ||  rtdev->dev_addr[0] == 0xC0)
++		&&  rtdev->dev_addr[1] == 0x00)
++		for (i = 0; i < 6; i+=2) {
++			char tmp = rtdev->dev_addr[i];
++			rtdev->dev_addr[i] = rtdev->dev_addr[i+1];
++			rtdev->dev_addr[i+1] = tmp;
++		}
++	/* On the Zynx 315 Etherarray and other multiport boards only the
++	   first Tulip has an EEPROM.
++	   On Sparc systems the mac address is held in the OBP property
++	   "local-mac-address".
++	   The addresses of the subsequent ports are derived from the first.
++	   Many PCI BIOSes also incorrectly report the IRQ line, so we correct
++	   that here as well. */
++	if (sum == 0  || sum == 6*0xff) {
++#if defined(__sparc__)
++		struct pcidev_cookie *pcp = pdev->sysdata;
++#endif
++		eeprom_missing = 1;
++		for (i = 0; i < 5; i++)
++			rtdev->dev_addr[i] = last_phys_addr[i];
++		rtdev->dev_addr[i] = last_phys_addr[i] + 1;
++#if defined(__sparc__)
++		if ((pcp != NULL) && prom_getproplen(pcp->prom_node,
++			"local-mac-address") == 6) {
++			prom_getproperty(pcp->prom_node, "local-mac-address",
++			    rtdev->dev_addr, 6);
++		}
++#endif
++#if defined(__i386__)		/* Patch up x86 BIOS bug. */
++		if (last_irq)
++			irq = last_irq;
++#endif
++	}
++
++	for (i = 0; i < 6; i++)
++		last_phys_addr[i] = rtdev->dev_addr[i];
++#if defined(__i386__)
++	last_irq = irq;
++#endif
++
++	/* The lower four bits are the media type. */
++	if (board_idx >= 0  &&  board_idx < MAX_UNITS) {
++		/* Somehow required for this RTnet version, don't ask me why... */
++		if (!options[board_idx])
++			tp->default_port = 11; /*MII*/
++		/*RTnet*/
++
++		if (options[board_idx] & MEDIA_MASK)
++			tp->default_port = options[board_idx] & MEDIA_MASK;
++		if ((options[board_idx] & FullDuplex) || full_duplex[board_idx] > 0)
++			tp->full_duplex = 1;
++		if (mtu[board_idx] > 0)
++			rtdev->mtu = mtu[board_idx];
++	}
++	if (rtdev->mem_start & MEDIA_MASK)
++		tp->default_port = rtdev->mem_start & MEDIA_MASK;
++	if (tp->default_port) {
++		printk(KERN_INFO "tulip%d: Transceiver selection forced to %s.\n",
++		       board_idx, medianame[tp->default_port & MEDIA_MASK]);
++		tp->medialock = 1;
++		if (tulip_media_cap[tp->default_port] & MediaAlwaysFD)
++			tp->full_duplex = 1;
++	}
++	if (tp->full_duplex)
++		tp->full_duplex_lock = 1;
++
++	if (tulip_media_cap[tp->default_port] & MediaIsMII) {
++		u16 media2advert[] = { 0x20, 0x40, 0x03e0, 0x60, 0x80, 0x100, 0x200 };
++		tp->mii_advertise = media2advert[tp->default_port - 9];
++		tp->mii_advertise |= (tp->flags & HAS_8023X); /* Matching bits! */
++	}
++
++	if (tp->flags & HAS_MEDIA_TABLE) {
++		memcpy(tp->eeprom, ee_data, sizeof(tp->eeprom));
++
++		sprintf(rtdev->name, "tulip%d", board_idx);	/* hack */
++		tulip_parse_eeprom(rtdev);
++		strcpy(rtdev->name, "rteth%d");			/* un-hack */
++	}
++
++	if ((tp->flags & ALWAYS_CHECK_MII) ||
++		(tp->mtable  &&  tp->mtable->has_mii) ||
++		( ! tp->mtable  &&  (tp->flags & HAS_MII))) {
++		if (tp->mtable  &&  tp->mtable->has_mii) {
++			for (i = 0; i < tp->mtable->leafcount; i++)
++				if (tp->mtable->mleaf[i].media == 11) {
++					tp->cur_index = i;
++					tp->saved_if_port = rtdev->if_port;
++					tulip_select_media(rtdev, 2);
++					rtdev->if_port = tp->saved_if_port;
++					break;
++				}
++		}
++
++		/* Find the connected MII xcvrs.
++		   Doing this in open() would allow detecting external xcvrs
++		   later, but takes much time. */
++		tulip_find_mii (rtdev, board_idx);
++	}
++
++	rtdev->open = tulip_open;
++	rtdev->stop = tulip_close;
++	rtdev->hard_header = rt_eth_header;
++	rtdev->hard_start_xmit = tulip_start_xmit;
++	rtdev->get_stats = tulip_get_stats;
++
++	if (/*RTnet*/rt_register_rtnetdev(rtdev)) {
++		goto err_out_free_ring;
++	}
++
++	printk(KERN_INFO "%s: %s rev %d at %#3lx,",
++	       rtdev->name, tulip_tbl[chip_idx].chip_name, chip_rev, ioaddr);
++	pci_set_drvdata(pdev, rtdev);
++
++	if (t2104x_mode == 1)
++		printk(" 21040 compatible mode,");
++	else if (t2104x_mode == 2)
++		printk(" 21041 mode,");
++	if (eeprom_missing)
++		printk(" EEPROM not present,");
++	for (i = 0; i < 6; i++)
++		printk("%c%2.2X", i ? ':' : ' ', rtdev->dev_addr[i]);
++	printk(", IRQ %d.\n", irq);
++
++/*RTnet
++	if (tp->chip_id == PNIC2)
++		tp->link_change = pnic2_lnk_change;
++	else if ((tp->flags & HAS_NWAY)  || tp->chip_id == DC21041)
++		tp->link_change = t21142_lnk_change;
++	else if (tp->flags & HAS_PNICNWAY)
++		tp->link_change = pnic_lnk_change;
++ *RTnet*/
++ tp->link_change = NULL;
++
++	/* Reset the xcvr interface and turn on heartbeat. */
++	switch (chip_idx) {
++	case DC21041:
++		if (tp->sym_advertise == 0)
++			tp->sym_advertise = 0x0061;
++		outl(0x00000000, ioaddr + CSR13);
++		outl(0xFFFFFFFF, ioaddr + CSR14);
++		outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */
++		outl(inl(ioaddr + CSR6) | csr6_fd, ioaddr + CSR6);
++		outl(0x0000EF01, ioaddr + CSR13);
++		break;
++	case DC21040:
++		outl(0x00000000, ioaddr + CSR13);
++		outl(0x00000004, ioaddr + CSR13);
++		break;
++	case DC21140:
++	case DM910X:
++	default:
++		if (tp->mtable)
++			outl(tp->mtable->csr12dir | 0x100, ioaddr + CSR12);
++		break;
++	case DC21142:
++		if (tp->mii_cnt  ||  tulip_media_cap[rtdev->if_port] & MediaIsMII) {
++			outl(csr6_mask_defstate, ioaddr + CSR6);
++			outl(0x0000, ioaddr + CSR13);
++			outl(0x0000, ioaddr + CSR14);
++			outl(csr6_mask_hdcap, ioaddr + CSR6);
++		} else
++			t21142_start_nway(rtdev);
++		break;
++	case PNIC2:
++		/* just do a reset for sanity sake */
++		outl(0x0000, ioaddr + CSR13);
++		outl(0x0000, ioaddr + CSR14);
++		break;
++	case LC82C168:
++		if ( ! tp->mii_cnt) {
++			tp->nway = 1;
++			tp->nwayset = 0;
++			outl(csr6_ttm | csr6_ca, ioaddr + CSR6);
++			outl(0x30, ioaddr + CSR12);
++			outl(0x0001F078, ioaddr + CSR6);
++			outl(0x0201F078, ioaddr + CSR6); /* Turn on autonegotiation. */
++		}
++		break;
++	case MX98713:
++	case COMPEX9881:
++		outl(0x00000000, ioaddr + CSR6);
++		outl(0x000711C0, ioaddr + CSR14); /* Turn on NWay. */
++		outl(0x00000001, ioaddr + CSR13);
++		break;
++	case MX98715:
++	case MX98725:
++		outl(0x01a80000, ioaddr + CSR6);
++		outl(0xFFFFFFFF, ioaddr + CSR14);
++		outl(0x00001000, ioaddr + CSR12);
++		break;
++	case COMET:
++		/* No initialization necessary. */
++		break;
++	}
++
++	/* put the chip in snooze mode until opened */
++	tulip_set_power_state (tp, 0, 1);
++
++	return 0;
++
++err_out_free_ring:
++	pci_free_consistent (pdev,
++			     sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
++			     sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
++			     tp->rx_ring, tp->rx_ring_dma);
++
++err_out_mtable:
++	if (tp->mtable)
++		kfree (tp->mtable);
++#ifndef USE_IO_OPS
++	iounmap((void *)ioaddr);
++
++err_out_free_res:
++#endif
++	pci_release_regions (pdev);
++
++err_out_free_netdev:
++	/*RTnet*/rtdev_free (rtdev);
++	return -ENODEV;
++}
++
++
++static void tulip_remove_one (struct pci_dev *pdev)
++{
++	struct rtnet_device *rtdev = (struct rtnet_device *) pci_get_drvdata (pdev);
++	struct tulip_private *tp;
++
++	if (!rtdev || !rtdev->priv)
++		return;
++
++	tp = rtdev->priv;
++	pci_free_consistent (pdev,
++			     sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
++			     sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
++			     tp->rx_ring, tp->rx_ring_dma);
++	rt_unregister_rtnetdev (rtdev);
++	if (tp->mtable)
++		kfree (tp->mtable);
++#ifndef USE_IO_OPS
++	iounmap((void *)rtdev->base_addr);
++#endif
++	/*RTnet*/
++	rt_rtdev_disconnect(rtdev);
++	rtdev_free (rtdev);
++	/*RTnet*/
++	pci_release_regions (pdev);
++	pci_set_drvdata (pdev, NULL);
++
++	/* pci_power_off (pdev, -1); */
++}
++
++
++static struct pci_driver tulip_driver = {
++	name:		DRV_NAME,
++	id_table:	tulip_pci_tbl,
++	probe:		tulip_init_one,
++	remove:		tulip_remove_one,
++};
++
++
++static int __init tulip_init (void)
++{
++#ifdef MODULE
++	printk(KERN_INFO "%s", version);
++#endif
++
++	/* copy module parms into globals */
++	tulip_rx_copybreak = rx_copybreak;
++	tulip_max_interrupt_work = max_interrupt_work;
++
++	/* probe for and init boards */
++	return pci_register_driver (&tulip_driver);
++}
++
++
++static void __exit tulip_cleanup (void)
++{
++	pci_unregister_driver (&tulip_driver);
++}
++
++
++module_init(tulip_init);
++module_exit(tulip_cleanup);
+--- linux/drivers/xenomai/net/drivers/tulip/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/Makefile	2021-04-07 16:01:27.329634020 +0800
+@@ -0,0 +1,12 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_TULIP) += rt_tulip.o
++
++rt_tulip-y := \
++	tulip_core.o \
++	eeprom.o \
++	interrupt.o \
++	media.o \
++	21142.o \
++	pnic.o \
++	pnic2.o
+--- linux/drivers/xenomai/net/drivers/tulip/21142.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/21142.c	2021-04-07 16:01:27.324634028 +0800
+@@ -0,0 +1,51 @@
++/*
++	drivers/net/tulip/21142.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#include "tulip.h"
++#include <linux/pci.h>
++#include <linux/delay.h>
++
++u16 t21142_csr14[] = { 0xFFFF, 0x0705, 0x0705, 0x0000, 0x7F3D, };
++
++
++void t21142_start_nway(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	int csr14 = ((tp->sym_advertise & 0x0780) << 9)  |
++		((tp->sym_advertise & 0x0020) << 1) | 0xffbf;
++
++	rtdev->if_port = 0;
++	tp->nway = tp->mediasense = 1;
++	tp->nwayset = tp->lpar = 0;
++	if (tulip_debug > 1)
++		printk(KERN_DEBUG "%s: Restarting 21143 autonegotiation, csr14=%8.8x.\n",
++			   rtdev->name, csr14);
++	outl(0x0001, ioaddr + CSR13);
++	udelay(100);
++	outl(csr14, ioaddr + CSR14);
++	tp->csr6 = 0x82420000 | (tp->sym_advertise & 0x0040 ? FullDuplex : 0);
++	outl(tp->csr6, ioaddr + CSR6);
++	if (tp->mtable  &&  tp->mtable->csr15dir) {
++		outl(tp->mtable->csr15dir, ioaddr + CSR15);
++		outl(tp->mtable->csr15val, ioaddr + CSR15);
++	} else
++		outw(0x0008, ioaddr + CSR15);
++	outl(0x1301, ioaddr + CSR12); 		/* Trigger NWAY. */
++}
++
++
+--- linux/drivers/xenomai/net/drivers/tulip/interrupt.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/interrupt.c	2021-04-07 16:01:27.319634035 +0800
+@@ -0,0 +1,391 @@
++/*
++	drivers/net/tulip/interrupt.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#include "tulip.h"
++#include <linux/etherdevice.h>
++#include <linux/pci.h>
++
++
++int tulip_rx_copybreak;
++unsigned int tulip_max_interrupt_work;
++
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++
++#define MIT_SIZE 15
++unsigned int mit_table[MIT_SIZE+1] =
++{
++	/*  CRS11 21143 hardware Mitigation Control Interrupt
++	    We use only RX mitigation we other techniques for
++	    TX intr. mitigation.
++
++	   31    Cycle Size (timer control)
++	   30:27 TX timer in 16 * Cycle size
++	   26:24 TX No pkts before Int.
++	   23:20 RX timer in Cycle size
++	   19:17 RX No pkts before Int.
++	   16       Continues Mode (CM)
++	*/
++
++	0x0,             /* IM disabled */
++	0x80150000,      /* RX time = 1, RX pkts = 2, CM = 1 */
++	0x80150000,
++	0x80270000,
++	0x80370000,
++	0x80490000,
++	0x80590000,
++	0x80690000,
++	0x807B0000,
++	0x808B0000,
++	0x809D0000,
++	0x80AD0000,
++	0x80BD0000,
++	0x80CF0000,
++	0x80DF0000,
++//       0x80FF0000      /* RX time = 16, RX pkts = 7, CM = 1 */
++	0x80F10000      /* RX time = 16, RX pkts = 0, CM = 1 */
++};
++#endif
++
++
++int tulip_refill_rx(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int entry;
++	int refilled = 0;
++
++	/* Refill the Rx ring buffers. */
++	for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
++		entry = tp->dirty_rx % RX_RING_SIZE;
++		if (tp->rx_buffers[entry].skb == NULL) {
++			struct /*RTnet*/rtskb *skb;
++			dma_addr_t mapping;
++
++			skb = tp->rx_buffers[entry].skb = /*RTnet*/rtnetdev_alloc_rtskb(rtdev, PKT_BUF_SZ);
++			if (skb == NULL)
++				break;
++
++			mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ,
++						 PCI_DMA_FROMDEVICE);
++			tp->rx_buffers[entry].mapping = mapping;
++
++			tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping);
++			refilled++;
++		}
++		tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
++	}
++	if(tp->chip_id == LC82C168) {
++		if(((inl(rtdev->base_addr + CSR5)>>17)&0x07) == 4) {
++			/* Rx stopped due to out of buffers,
++			 * restart it
++			 */
++			outl(0x01, rtdev->base_addr + CSR2);
++		}
++	}
++	return refilled;
++}
++
++
++static int tulip_rx(/*RTnet*/struct rtnet_device *rtdev, nanosecs_abs_t *time_stamp)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int entry = tp->cur_rx % RX_RING_SIZE;
++	int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
++	int received = 0;
++
++	if (tulip_debug > 4)
++		/*RTnet*/rtdm_printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry,
++			   tp->rx_ring[entry].status);
++	/* If we own the next entry, it is a new packet. Send it up. */
++	while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) {
++		s32 status = le32_to_cpu(tp->rx_ring[entry].status);
++
++		if (tulip_debug > 5)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n",
++				   rtdev->name, entry, status);
++		if (--rx_work_limit < 0)
++			break;
++		if ((status & 0x38008300) != 0x0300) {
++			if ((status & 0x38000300) != 0x0300) {
++				/* Ingore earlier buffers. */
++				if ((status & 0xffff) != 0x7fff) {
++					if (tulip_debug > 1)
++						/*RTnet*/rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame "
++							   "spanned multiple buffers, status %8.8x!\n",
++							   rtdev->name, status);
++					tp->stats.rx_length_errors++;
++				}
++			} else if (status & RxDescFatalErr) {
++				/* There was a fatal error. */
++				if (tulip_debug > 2)
++					/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n",
++						   rtdev->name, status);
++				tp->stats.rx_errors++; /* end of a packet.*/
++				if (status & 0x0890) tp->stats.rx_length_errors++;
++				if (status & 0x0004) tp->stats.rx_frame_errors++;
++				if (status & 0x0002) tp->stats.rx_crc_errors++;
++				if (status & 0x0001) tp->stats.rx_fifo_errors++;
++			}
++		} else {
++			/* Omit the four octet CRC from the length. */
++			short pkt_len = ((status >> 16) & 0x7ff) - 4;
++			struct /*RTnet*/rtskb *skb;
++
++#ifndef final_version
++			if (pkt_len > 1518) {
++				/*RTnet*/rtdm_printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n",
++					   rtdev->name, pkt_len, pkt_len);
++				pkt_len = 1518;
++				tp->stats.rx_length_errors++;
++			}
++#endif
++
++			{
++				unsigned char *temp = /*RTnet*/rtskb_put(skb = tp->rx_buffers[entry].skb, pkt_len);
++
++#ifndef final_version
++				if (tp->rx_buffers[entry].mapping !=
++				    le32_to_cpu(tp->rx_ring[entry].buffer1)) {
++					/*RTnet*/rtdm_printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
++					       "do not match in tulip_rx: %08x vs. %08llx ? / %p.\n",
++					       rtdev->name,
++					       le32_to_cpu(tp->rx_ring[entry].buffer1),
++					       (unsigned long long)tp->rx_buffers[entry].mapping,
++					       temp);/*RTnet*/
++				}
++#endif
++
++				pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
++						 PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
++
++				tp->rx_buffers[entry].skb = NULL;
++				tp->rx_buffers[entry].mapping = 0;
++			}
++			skb->protocol = /*RTnet*/rt_eth_type_trans(skb, rtdev);
++			skb->time_stamp = *time_stamp;
++			/*RTnet*/rtnetif_rx(skb);
++
++			tp->stats.rx_packets++;
++			tp->stats.rx_bytes += pkt_len;
++		}
++		received++;
++		entry = (++tp->cur_rx) % RX_RING_SIZE;
++	}
++	return received;
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++int tulip_interrupt(rtdm_irq_t *irq_handle)
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read();/*RTnet*/
++	struct rtnet_device *rtdev =
++	    rtdm_irq_get_arg(irq_handle, struct rtnet_device);/*RTnet*/
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	unsigned int csr5;
++	int entry;
++	int missed;
++	int rx = 0;
++	int tx = 0;
++	int oi = 0;
++	int maxrx = RX_RING_SIZE;
++	int maxtx = TX_RING_SIZE;
++	int maxoi = TX_RING_SIZE;
++	unsigned int work_count = tulip_max_interrupt_work;
++
++	/* Let's see whether the interrupt really is for us */
++	csr5 = inl(ioaddr + CSR5);
++
++	if ((csr5 & (NormalIntr|AbnormalIntr)) == 0) {
++		rtdm_printk("%s: unexpected IRQ!\n",rtdev->name);
++		return RTDM_IRQ_NONE;
++	}
++
++	tp->nir++;
++
++	do {
++		/* Acknowledge all of the current interrupt sources ASAP. */
++		outl(csr5 & 0x0001ffff, ioaddr + CSR5);
++
++		if (tulip_debug > 4)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: interrupt  csr5=%#8.8x new csr5=%#8.8x.\n",
++				   rtdev->name, csr5, inl(rtdev->base_addr + CSR5));
++
++		if (csr5 & (RxIntr | RxNoBuf)) {
++			rx += tulip_rx(rtdev, &time_stamp);
++			tulip_refill_rx(rtdev);
++		}
++
++		if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) {
++			unsigned int dirty_tx;
++
++			rtdm_lock_get(&tp->lock);
++
++			for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0;
++				 dirty_tx++) {
++				int entry = dirty_tx % TX_RING_SIZE;
++				int status = le32_to_cpu(tp->tx_ring[entry].status);
++
++				if (status < 0)
++					break;			/* It still has not been Txed */
++
++				/* Check for Rx filter setup frames. */
++				if (tp->tx_buffers[entry].skb == NULL) {
++					/* test because dummy frames not mapped */
++					if (tp->tx_buffers[entry].mapping)
++						pci_unmap_single(tp->pdev,
++							 tp->tx_buffers[entry].mapping,
++							 sizeof(tp->setup_frame),
++							 PCI_DMA_TODEVICE);
++					continue;
++				}
++
++				if (status & 0x8000) {
++					/* There was an major error, log it. */
++#ifndef final_version
++					if (tulip_debug > 1)
++						/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
++							   rtdev->name, status);
++#endif
++					tp->stats.tx_errors++;
++					if (status & 0x4104) tp->stats.tx_aborted_errors++;
++					if (status & 0x0C00) tp->stats.tx_carrier_errors++;
++					if (status & 0x0200) tp->stats.tx_window_errors++;
++					if (status & 0x0002) tp->stats.tx_fifo_errors++;
++					if ((status & 0x0080) && tp->full_duplex == 0)
++						tp->stats.tx_heartbeat_errors++;
++				} else {
++					tp->stats.tx_bytes +=
++						tp->tx_buffers[entry].skb->len;
++					tp->stats.collisions += (status >> 3) & 15;
++					tp->stats.tx_packets++;
++				}
++
++				pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
++						 tp->tx_buffers[entry].skb->len,
++						 PCI_DMA_TODEVICE);
++
++				/* Free the original skb. */
++				/*RTnet*/dev_kfree_rtskb(tp->tx_buffers[entry].skb);
++				tp->tx_buffers[entry].skb = NULL;
++				tp->tx_buffers[entry].mapping = 0;
++				tx++;
++				rtnetif_tx(rtdev);
++			}
++
++#ifndef final_version
++			if (tp->cur_tx - dirty_tx > TX_RING_SIZE) {
++				/*RTnet*/rtdm_printk(KERN_ERR "%s: Out-of-sync dirty pointer, %d vs. %d.\n",
++					   rtdev->name, dirty_tx, tp->cur_tx);
++				dirty_tx += TX_RING_SIZE;
++			}
++#endif
++
++			if (tp->cur_tx - dirty_tx < TX_RING_SIZE - 2)
++				/*RTnet*/rtnetif_wake_queue(rtdev);
++
++			tp->dirty_tx = dirty_tx;
++			if (csr5 & TxDied) {
++				if (tulip_debug > 2)
++					/*RTnet*/rtdm_printk(KERN_WARNING "%s: The transmitter stopped."
++						   "  CSR5 is %x, CSR6 %x, new CSR6 %x.\n",
++						   rtdev->name, csr5, inl(ioaddr + CSR6), tp->csr6);
++				tulip_restart_rxtx(tp);
++			}
++			rtdm_lock_put(&tp->lock);
++		}
++
++		/* Log errors. */
++		if (csr5 & AbnormalIntr) {	/* Abnormal error summary bit. */
++			if (csr5 == 0xffffffff)
++				break;
++			/*RTnet*/rtdm_printk(KERN_ERR "%s: Error detected, "
++			    "device may not work any more (csr5=%08x)!\n", rtdev->name, csr5);
++			/* Clear all error sources, included undocumented ones! */
++			outl(0x0800f7ba, ioaddr + CSR5);
++			oi++;
++		}
++		if (csr5 & TimerInt) {
++
++			if (tulip_debug > 2)
++				/*RTnet*/rtdm_printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
++					   rtdev->name, csr5);
++			outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
++			tp->ttimer = 0;
++			oi++;
++		}
++		if (tx > maxtx || rx > maxrx || oi > maxoi) {
++			if (tulip_debug > 1)
++				/*RTnet*/rtdm_printk(KERN_WARNING "%s: Too much work during an interrupt, "
++					   "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", rtdev->name, csr5, tp->nir, tx, rx, oi);
++
++		       /* Acknowledge all interrupt sources. */
++			outl(0x8001ffff, ioaddr + CSR5);
++			if (tp->flags & HAS_INTR_MITIGATION) {
++		     /* Josip Loncaric at ICASE did extensive experimentation
++			to develop a good interrupt mitigation setting.*/
++				outl(0x8b240000, ioaddr + CSR11);
++			} else if (tp->chip_id == LC82C168) {
++				/* the LC82C168 doesn't have a hw timer.*/
++				outl(0x00, ioaddr + CSR7);
++			} else {
++			  /* Mask all interrupting sources, set timer to
++				re-enable. */
++			}
++			break;
++		}
++
++		work_count--;
++		if (work_count == 0)
++			break;
++
++		csr5 = inl(ioaddr + CSR5);
++	} while ((csr5 & (NormalIntr|AbnormalIntr)) != 0);
++
++	tulip_refill_rx(rtdev);
++
++	/* check if the card is in suspend mode */
++	entry = tp->dirty_rx % RX_RING_SIZE;
++	if (tp->rx_buffers[entry].skb == NULL) {
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", rtdev->name, tp->nir, tp->cur_rx, tp->ttimer, rx);
++		if (tp->chip_id == LC82C168)
++			outl(0x00, ioaddr + CSR7);
++		else {
++			if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) {
++				if (tulip_debug > 1)
++					/*RTnet*/rtdm_printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", rtdev->name, tp->nir);
++				outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
++					ioaddr + CSR7);
++				outl(TimerInt, ioaddr + CSR5);
++				outl(12, ioaddr + CSR11);
++				tp->ttimer = 1;
++			}
++		}
++	}
++
++	if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) {
++		tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed;
++	}
++
++	if (tulip_debug > 4)
++		/*RTnet*/rtdm_printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n",
++			   rtdev->name, inl(ioaddr + CSR5));
++	if (rx)
++		rt_mark_stack_mgr(rtdev);
++	return RTDM_IRQ_HANDLED;
++}
+--- linux/drivers/xenomai/net/drivers/tulip/tulip.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/tulip.h	2021-04-07 16:01:27.314634042 +0800
+@@ -0,0 +1,490 @@
++/*
++        drivers/net/tulip/tulip.h
++
++        Copyright 2000,2001  The Linux Kernel Team
++        Written/copyright 1994-2001 by Donald Becker.
++
++        This software may be used and distributed according to the terms
++        of the GNU General Public License, incorporated herein by reference.
++
++        Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++        for more information on this driver, or visit the project
++        Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#ifndef __NET_TULIP_H__
++#define __NET_TULIP_H__
++
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/spinlock.h>
++#include <linux/netdevice.h>
++#include <linux/timer.h>
++#include <linux/delay.h>
++#include <asm/io.h>
++#include <asm/irq.h>
++
++#include <rtnet_port.h>
++
++
++
++/* undefine, or define to various debugging levels (>4 == obscene levels) */
++#define TULIP_DEBUG 1
++
++/* undefine USE_IO_OPS for MMIO, define for PIO */
++#ifdef CONFIG_TULIP_MMIO
++# undef USE_IO_OPS
++#else
++# define USE_IO_OPS 1
++#endif
++
++
++
++struct tulip_chip_table {
++        char *chip_name;
++        unsigned int io_size;
++        int valid_intrs;	/* CSR7 interrupt enable settings */
++        int flags;
++};
++
++
++enum tbl_flag {
++        HAS_MII			= 0x0001,
++        HAS_MEDIA_TABLE		= 0x0002,
++        CSR12_IN_SROM		= 0x0004,
++        ALWAYS_CHECK_MII	= 0x0008,
++        HAS_ACPI		= 0x0010,
++        MC_HASH_ONLY		= 0x0020, /* Hash-only multicast filter. */
++        HAS_PNICNWAY		= 0x0080,
++        HAS_NWAY		= 0x0040, /* Uses internal NWay xcvr. */
++        HAS_INTR_MITIGATION	= 0x0100,
++        IS_ASIX			= 0x0200,
++        HAS_8023X		= 0x0400,
++        COMET_MAC_ADDR		= 0x0800,
++        HAS_PCI_MWI		= 0x1000,
++};
++
++
++/* chip types.  careful!  order is VERY IMPORTANT here, as these
++ * are used throughout the driver as indices into arrays */
++/* Note 21142 == 21143. */
++enum chips {
++        DC21040 = 0,
++        DC21041 = 1,
++        DC21140 = 2,
++        DC21142 = 3, DC21143 = 3,
++        LC82C168,
++        MX98713,
++        MX98715,
++        MX98725,
++        AX88140,
++        PNIC2,
++        COMET,
++        COMPEX9881,
++        I21145,
++        DM910X,
++};
++
++
++enum MediaIs {
++        MediaIsFD = 1,
++        MediaAlwaysFD = 2,
++        MediaIsMII = 4,
++        MediaIsFx = 8,
++        MediaIs100 = 16
++};
++
++
++/* Offsets to the Command and Status Registers, "CSRs".  All accesses
++   must be longword instructions and quadword aligned. */
++enum tulip_offsets {
++        CSR0 = 0,
++        CSR1 = 0x08,
++        CSR2 = 0x10,
++        CSR3 = 0x18,
++        CSR4 = 0x20,
++        CSR5 = 0x28,
++        CSR6 = 0x30,
++        CSR7 = 0x38,
++        CSR8 = 0x40,
++        CSR9 = 0x48,
++        CSR10 = 0x50,
++        CSR11 = 0x58,
++        CSR12 = 0x60,
++        CSR13 = 0x68,
++        CSR14 = 0x70,
++        CSR15 = 0x78,
++};
++
++/* register offset and bits for CFDD PCI config reg */
++enum pci_cfg_driver_reg {
++        CFDD = 0x40,
++        CFDD_Sleep = (1 << 31),
++        CFDD_Snooze = (1 << 30),
++};
++
++
++/* The bits in the CSR5 status registers, mostly interrupt sources. */
++enum status_bits {
++        TimerInt = 0x800,
++        SytemError = 0x2000,
++        TPLnkFail = 0x1000,
++        TPLnkPass = 0x10,
++        NormalIntr = 0x10000,
++        AbnormalIntr = 0x8000,
++        RxJabber = 0x200,
++        RxDied = 0x100,
++        RxNoBuf = 0x80,
++        RxIntr = 0x40,
++        TxFIFOUnderflow = 0x20,
++        TxJabber = 0x08,
++        TxNoBuf = 0x04,
++        TxDied = 0x02,
++        TxIntr = 0x01,
++};
++
++
++enum tulip_mode_bits {
++        TxThreshold		= (1 << 22),
++        FullDuplex		= (1 << 9),
++        TxOn			= 0x2000,
++        AcceptBroadcast		= 0x0100,
++        AcceptAllMulticast	= 0x0080,
++        AcceptAllPhys		= 0x0040,
++        AcceptRunt		= 0x0008,
++        RxOn			= 0x0002,
++        RxTx			= (TxOn | RxOn),
++};
++
++
++enum tulip_busconfig_bits {
++        MWI			= (1 << 24),
++        MRL			= (1 << 23),
++        MRM			= (1 << 21),
++        CALShift		= 14,
++        BurstLenShift		= 8,
++};
++
++
++/* The Tulip Rx and Tx buffer descriptors. */
++struct tulip_rx_desc {
++        s32 status;
++        s32 length;
++        u32 buffer1;
++        u32 buffer2;
++};
++
++
++struct tulip_tx_desc {
++        s32 status;
++        s32 length;
++        u32 buffer1;
++        u32 buffer2;		/* We use only buffer 1.  */
++};
++
++
++enum desc_status_bits {
++        DescOwned = 0x80000000,
++        RxDescFatalErr = 0x8000,
++        RxWholePkt = 0x0300,
++};
++
++
++enum t21041_csr13_bits {
++        csr13_eng = (0xEF0<<4), /* for eng. purposes only, hardcode at EF0h */
++        csr13_aui = (1<<3), /* clear to force 10bT, set to force AUI/BNC */
++        csr13_cac = (1<<2), /* CSR13/14/15 autoconfiguration */
++        csr13_srl = (1<<0), /* When reset, resets all SIA functions, machines */
++
++        csr13_mask_auibnc = (csr13_eng | csr13_aui | csr13_srl),
++        csr13_mask_10bt = (csr13_eng | csr13_srl),
++};
++
++enum t21143_csr6_bits {
++        csr6_sc = (1<<31),
++        csr6_ra = (1<<30),
++        csr6_ign_dest_msb = (1<<26),
++        csr6_mbo = (1<<25),
++        csr6_scr = (1<<24),  /* scramble mode flag: can't be set */
++        csr6_pcs = (1<<23),  /* Enables PCS functions (symbol mode requires csr6_ps be set) default is set */
++        csr6_ttm = (1<<22),  /* Transmit Threshold Mode, set for 10baseT, 0 for 100BaseTX */
++        csr6_sf = (1<<21),   /* Store and forward. If set ignores TR bits */
++        csr6_hbd = (1<<19),  /* Heart beat disable. Disables SQE function in 10baseT */
++        csr6_ps = (1<<18),   /* Port Select. 0 (defualt) = 10baseT, 1 = 100baseTX: can't be set */
++        csr6_ca = (1<<17),   /* Collision Offset Enable. If set uses special algorithm in low collision situations */
++        csr6_trh = (1<<15),  /* Transmit Threshold high bit */
++        csr6_trl = (1<<14),  /* Transmit Threshold low bit */
++
++        /***************************************************************
++         * This table shows transmit threshold values based on media   *
++         * and these two registers (from PNIC1 & 2 docs) Note: this is *
++         * all meaningless if sf is set.                               *
++         ***************************************************************/
++
++        /***********************************
++         * (trh,trl) * 100BaseTX * 10BaseT *
++         ***********************************
++         *   (0,0)   *     128   *    72   *
++         *   (0,1)   *     256   *    96   *
++         *   (1,0)   *     512   *   128   *
++         *   (1,1)   *    1024   *   160   *
++         ***********************************/
++
++        csr6_fc = (1<<12),   /* Forces a collision in next transmission (for testing in loopback mode) */
++        csr6_om_int_loop = (1<<10), /* internal (FIFO) loopback flag */
++        csr6_om_ext_loop = (1<<11), /* external (PMD) loopback flag */
++        /* set both and you get (PHY) loopback */
++        csr6_fd = (1<<9),    /* Full duplex mode, disables hearbeat, no loopback */
++        csr6_pm = (1<<7),    /* Pass All Multicast */
++        csr6_pr = (1<<6),    /* Promiscuous mode */
++        csr6_sb = (1<<5),    /* Start(1)/Stop(0) backoff counter */
++        csr6_if = (1<<4),    /* Inverse Filtering, rejects only addresses in address table: can't be set */
++        csr6_pb = (1<<3),    /* Pass Bad Frames, (1) causes even bad frames to be passed on */
++        csr6_ho = (1<<2),    /* Hash-only filtering mode: can't be set */
++        csr6_hp = (1<<0),    /* Hash/Perfect Receive Filtering Mode: can't be set */
++
++        csr6_mask_capture = (csr6_sc | csr6_ca),
++        csr6_mask_defstate = (csr6_mask_capture | csr6_mbo),
++        csr6_mask_hdcap = (csr6_mask_defstate | csr6_hbd | csr6_ps),
++        csr6_mask_hdcaptt = (csr6_mask_hdcap  | csr6_trh | csr6_trl),
++        csr6_mask_fullcap = (csr6_mask_hdcaptt | csr6_fd),
++        csr6_mask_fullpromisc = (csr6_pr | csr6_pm),
++        csr6_mask_filters = (csr6_hp | csr6_ho | csr6_if),
++        csr6_mask_100bt = (csr6_scr | csr6_pcs | csr6_hbd),
++};
++
++
++/* Keep the ring sizes a power of two for efficiency.
++   Making the Tx ring too large decreases the effectiveness of channel
++   bonding and packet priority.
++   There are no ill effects from too-large receive rings. */
++#define TX_RING_SIZE	16
++#define RX_RING_SIZE	8 /* RTnet: RX_RING_SIZE*2 rtskbs will be preallocated */
++
++#define MEDIA_MASK     31
++
++#define PKT_BUF_SZ		1536	/* Size of each temporary Rx buffer. */
++
++#define TULIP_MIN_CACHE_LINE	8	/* in units of 32-bit words */
++
++#if defined(__sparc__) || defined(__hppa__)
++/* The UltraSparc PCI controllers will disconnect at every 64-byte
++ * crossing anyways so it makes no sense to tell Tulip to burst
++ * any more than that.
++ */
++#define TULIP_MAX_CACHE_LINE	16	/* in units of 32-bit words */
++#else
++#define TULIP_MAX_CACHE_LINE	32	/* in units of 32-bit words */
++#endif
++
++
++/* Ring-wrap flag in length field, use for last ring entry.
++        0x01000000 means chain on buffer2 address,
++        0x02000000 means use the ring start address in CSR2/3.
++   Note: Some work-alike chips do not function correctly in chained mode.
++   The ASIX chip works only in chained mode.
++   Thus we indicates ring mode, but always write the 'next' field for
++   chained mode as well.
++*/
++#define DESC_RING_WRAP 0x02000000
++
++
++#define EEPROM_SIZE 128         /* 2 << EEPROM_ADDRLEN */
++
++
++#define RUN_AT(x) (jiffies + (x))
++
++#if defined(__i386__)			/* AKA get_unaligned() */
++#define get_u16(ptr) (*(u16 *)(ptr))
++#else
++#define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8))
++#endif
++
++struct medialeaf {
++        u8 type;
++        u8 media;
++        unsigned char *leafdata;
++};
++
++
++struct mediatable {
++        u16 defaultmedia;
++        u8 leafcount;
++        u8 csr12dir;		/* General purpose pin directions. */
++        unsigned has_mii:1;
++        unsigned has_nonmii:1;
++        unsigned has_reset:6;
++        u32 csr15dir;
++        u32 csr15val;		/* 21143 NWay setting. */
++        struct medialeaf mleaf[0];
++};
++
++
++struct mediainfo {
++        struct mediainfo *next;
++        int info_type;
++        int index;
++        unsigned char *info;
++};
++
++struct ring_info {
++        struct /*RTnet*/rtskb	*skb;
++        dma_addr_t	mapping;
++};
++
++
++struct tulip_private {
++        const char *product_name;
++        /*RTnet*/struct rtnet_device *next_module;
++        struct tulip_rx_desc *rx_ring;
++        struct tulip_tx_desc *tx_ring;
++        dma_addr_t rx_ring_dma;
++        dma_addr_t tx_ring_dma;
++        /* The saved address of a sent-in-place packet/buffer, for skfree(). */
++        struct ring_info tx_buffers[TX_RING_SIZE];
++        /* The addresses of receive-in-place skbuffs. */
++        struct ring_info rx_buffers[RX_RING_SIZE];
++        u16 setup_frame[96];	/* Pseudo-Tx frame to init address table. */
++        int chip_id;
++        int revision;
++        int flags;
++        struct net_device_stats stats;
++        u32 mc_filter[2];
++        /*RTnet*/rtdm_lock_t lock;
++        spinlock_t mii_lock;
++        unsigned int cur_rx, cur_tx;	/* The next free ring entry */
++        unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
++
++#ifdef CONFIG_NET_HW_FLOWCONTROL
++#define RX_A_NBF_STOP 0xffffff3f /* To disable RX and RX-NOBUF ints. */
++        int fc_bit;
++        int mit_sel;
++        int mit_change; /* Signal for Interrupt Mitigtion */
++#endif
++        unsigned int full_duplex:1;	/* Full-duplex operation requested. */
++        unsigned int full_duplex_lock:1;
++        unsigned int fake_addr:1;	/* Multiport board faked address. */
++        unsigned int default_port:4;	/* Last dev->if_port value. */
++        unsigned int media2:4;	/* Secondary monitored media port. */
++        unsigned int medialock:1;	/* Don't sense media type. */
++        unsigned int mediasense:1;	/* Media sensing in progress. */
++        unsigned int nway:1, nwayset:1;		/* 21143 internal NWay. */
++        unsigned int csr0;	/* CSR0 setting. */
++        unsigned int csr6;	/* Current CSR6 control settings. */
++        unsigned char eeprom[EEPROM_SIZE];	/* Serial EEPROM contents. */
++        void (*link_change) (/*RTnet*/struct rtnet_device *rtdev, int csr5);
++        u16 sym_advertise, mii_advertise; /* NWay capabilities advertised.  */
++        u16 lpar;		/* 21143 Link partner ability. */
++        u16 advertising[4];
++        signed char phys[4], mii_cnt;	/* MII device addresses. */
++        struct mediatable *mtable;
++        int cur_index;		/* Current media index. */
++        int saved_if_port;
++        struct pci_dev *pdev;
++        int ttimer;
++        int susp_rx;
++        unsigned long nir;
++        unsigned long base_addr;
++        int pad0, pad1;		/* Used for 8-byte alignment */
++        rtdm_irq_t irq_handle;
++};
++
++
++struct eeprom_fixup {
++        char *name;
++        unsigned char addr0;
++        unsigned char addr1;
++        unsigned char addr2;
++        u16 newtable[32];	/* Max length below. */
++};
++
++
++/* 21142.c */
++extern u16 t21142_csr14[];
++void t21142_start_nway(/*RTnet*/struct rtnet_device *rtdev);
++void t21142_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5);
++
++
++/* PNIC2.c */
++void pnic2_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5);
++void pnic2_start_nway(/*RTnet*/struct rtnet_device *rtdev);
++void pnic2_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5);
++
++/* eeprom.c */
++void tulip_parse_eeprom(struct rtnet_device *rtdev);
++int tulip_read_eeprom(long ioaddr, int location, int addr_len);
++
++/* interrupt.c */
++extern unsigned int tulip_max_interrupt_work;
++extern int tulip_rx_copybreak;
++int tulip_interrupt(rtdm_irq_t *irq_handle);
++int tulip_refill_rx(/*RTnet*/struct rtnet_device *rtdev);
++
++/* media.c */
++int tulip_mdio_read(struct rtnet_device *dev, int phy_id, int location);
++void tulip_mdio_write(struct rtnet_device *dev, int phy_id, int location, int value);
++void tulip_select_media(struct rtnet_device *dev, int startup);
++int tulip_check_duplex(struct rtnet_device *dev);
++void tulip_find_mii (struct rtnet_device *dev, int board_idx);
++
++/* pnic.c */
++void pnic_do_nway(/*RTnet*/struct rtnet_device *rtdev);
++void pnic_lnk_change(/*RTnet*/struct rtnet_device *rtdev, int csr5);
++
++/* tulip_core.c */
++extern int tulip_debug;
++extern const char * const medianame[];
++extern const char tulip_media_cap[];
++extern struct tulip_chip_table tulip_tbl[];
++extern u8 t21040_csr13[];
++extern u16 t21041_csr13[];
++extern u16 t21041_csr14[];
++extern u16 t21041_csr15[];
++
++#ifndef USE_IO_OPS
++#undef inb
++#undef inw
++#undef inl
++#undef outb
++#undef outw
++#undef outl
++#define inb(addr) readb((void*)(addr))
++#define inw(addr) readw((void*)(addr))
++#define inl(addr) readl((void*)(addr))
++#define outb(val,addr) writeb((val), (void*)(addr))
++#define outw(val,addr) writew((val), (void*)(addr))
++#define outl(val,addr) writel((val), (void*)(addr))
++#endif /* !USE_IO_OPS */
++
++
++
++static inline void tulip_start_rxtx(struct tulip_private *tp)
++{
++        long ioaddr = tp->base_addr;
++        outl(tp->csr6 | RxTx, ioaddr + CSR6);
++        barrier();
++        (void) inl(ioaddr + CSR6); /* mmio sync */
++}
++
++static inline void tulip_stop_rxtx(struct tulip_private *tp)
++{
++        long ioaddr = tp->base_addr;
++        u32 csr6 = inl(ioaddr + CSR6);
++
++        if (csr6 & RxTx) {
++                outl(csr6 & ~RxTx, ioaddr + CSR6);
++                barrier();
++                (void) inl(ioaddr + CSR6); /* mmio sync */
++        }
++}
++
++static inline void tulip_restart_rxtx(struct tulip_private *tp)
++{
++        tulip_stop_rxtx(tp);
++        rtdm_task_busy_sleep(5);
++        tulip_start_rxtx(tp);
++}
++
++#endif /* __NET_TULIP_H__ */
+--- linux/drivers/xenomai/net/drivers/tulip/media.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/media.c	2021-04-07 16:01:27.309634049 +0800
+@@ -0,0 +1,567 @@
++/*
++	drivers/net/tulip/media.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#include <linux/kernel.h>
++#include <linux/mii.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include "tulip.h"
++
++
++/* This is a mysterious value that can be written to CSR11 in the 21040 (only)
++   to support a pre-NWay full-duplex signaling mechanism using short frames.
++   No one knows what it should be, but if left at its default value some
++   10base2(!) packets trigger a full-duplex-request interrupt. */
++#define FULL_DUPLEX_MAGIC	0x6969
++
++/* The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
++   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
++   "overclocking" issues or future 66Mhz PCI. */
++#define mdio_delay() inl(mdio_addr)
++
++/* Read and write the MII registers using software-generated serial
++   MDIO protocol.  It is just different enough from the EEPROM protocol
++   to not share code.  The maxium data clock rate is 2.5 Mhz. */
++#define MDIO_SHIFT_CLK		0x10000
++#define MDIO_DATA_WRITE0	0x00000
++#define MDIO_DATA_WRITE1	0x20000
++#define MDIO_ENB		0x00000 /* Ignore the 0x02000 databook setting. */
++#define MDIO_ENB_IN		0x40000
++#define MDIO_DATA_READ		0x80000
++
++static const unsigned char comet_miireg2offset[32] = {
++	0xB4, 0xB8, 0xBC, 0xC0,  0xC4, 0xC8, 0xCC, 0,  0,0,0,0,  0,0,0,0,
++	0,0xD0,0,0,  0,0,0,0,  0,0,0,0, 0, 0xD4, 0xD8, 0xDC, };
++
++
++/* MII transceiver control section.
++   Read and write the MII registers using software-generated serial
++   MDIO protocol.  See the MII specifications or DP83840A data sheet
++   for details. */
++
++int tulip_mdio_read(struct rtnet_device *rtdev, int phy_id, int location)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int i;
++	int read_cmd = (0xf6 << 10) | ((phy_id & 0x1f) << 5) | location;
++	int retval = 0;
++	long ioaddr = rtdev->base_addr;
++	long mdio_addr = ioaddr + CSR9;
++	unsigned long flags;
++
++	if (location & ~0x1f)
++		return 0xffff;
++
++	if (tp->chip_id == COMET  &&  phy_id == 30) {
++		if (comet_miireg2offset[location])
++			return inl(ioaddr + comet_miireg2offset[location]);
++		return 0xffff;
++	}
++
++	spin_lock_irqsave(&tp->mii_lock, flags);
++	if (tp->chip_id == LC82C168) {
++		int i = 1000;
++		outl(0x60020000 + (phy_id<<23) + (location<<18), ioaddr + 0xA0);
++		inl(ioaddr + 0xA0);
++		inl(ioaddr + 0xA0);
++		while (--i > 0) {
++			barrier();
++			if ( ! ((retval = inl(ioaddr + 0xA0)) & 0x80000000))
++				break;
++		}
++		spin_unlock_irqrestore(&tp->mii_lock, flags);
++		return retval & 0xffff;
++	}
++
++	/* Establish sync by sending at least 32 logic ones. */
++	for (i = 32; i >= 0; i--) {
++		outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr);
++		mdio_delay();
++		outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Shift the read command bits out. */
++	for (i = 15; i >= 0; i--) {
++		int dataval = (read_cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0;
++
++		outl(MDIO_ENB | dataval, mdio_addr);
++		mdio_delay();
++		outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Read the two transition, 16 data, and wire-idle bits. */
++	for (i = 19; i > 0; i--) {
++		outl(MDIO_ENB_IN, mdio_addr);
++		mdio_delay();
++		retval = (retval << 1) | ((inl(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
++		outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++
++	spin_unlock_irqrestore(&tp->mii_lock, flags);
++	return (retval>>1) & 0xffff;
++}
++
++void tulip_mdio_write(struct rtnet_device *rtdev, int phy_id, int location, int val)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	int i;
++	int cmd = (0x5002 << 16) | ((phy_id & 0x1f) << 23) | (location<<18) | (val & 0xffff);
++	long ioaddr = rtdev->base_addr;
++	long mdio_addr = ioaddr + CSR9;
++	unsigned long flags;
++
++	if (location & ~0x1f)
++		return;
++
++	if (tp->chip_id == COMET && phy_id == 30) {
++		if (comet_miireg2offset[location])
++			outl(val, ioaddr + comet_miireg2offset[location]);
++		return;
++	}
++
++	spin_lock_irqsave(&tp->mii_lock, flags);
++	if (tp->chip_id == LC82C168) {
++		int i = 1000;
++		outl(cmd, ioaddr + 0xA0);
++		do {
++			barrier();
++			if ( ! (inl(ioaddr + 0xA0) & 0x80000000))
++				break;
++		} while (--i > 0);
++		spin_unlock_irqrestore(&tp->mii_lock, flags);
++		return;
++	}
++
++	/* Establish sync by sending 32 logic ones. */
++	for (i = 32; i >= 0; i--) {
++		outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr);
++		mdio_delay();
++		outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Shift the command bits out. */
++	for (i = 31; i >= 0; i--) {
++		int dataval = (cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0;
++		outl(MDIO_ENB | dataval, mdio_addr);
++		mdio_delay();
++		outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++	/* Clear out extra bits. */
++	for (i = 2; i > 0; i--) {
++		outl(MDIO_ENB_IN, mdio_addr);
++		mdio_delay();
++		outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
++		mdio_delay();
++	}
++
++	spin_unlock_irqrestore(&tp->mii_lock, flags);
++}
++
++
++/* Set up the transceiver control registers for the selected media type. */
++void tulip_select_media(struct rtnet_device *rtdev, int startup)
++{
++	long ioaddr = rtdev->base_addr;
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	struct mediatable *mtable = tp->mtable;
++	u32 new_csr6;
++	int i;
++
++	if (mtable) {
++		struct medialeaf *mleaf = &mtable->mleaf[tp->cur_index];
++		unsigned char *p = mleaf->leafdata;
++		switch (mleaf->type) {
++		case 0:					/* 21140 non-MII xcvr. */
++			if (tulip_debug > 1)
++				/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Using a 21140 non-MII transceiver"
++					   " with control setting %2.2x.\n",
++					   rtdev->name, p[1]);
++			rtdev->if_port = p[0];
++			if (startup)
++				outl(mtable->csr12dir | 0x100, ioaddr + CSR12);
++			outl(p[1], ioaddr + CSR12);
++			new_csr6 = 0x02000000 | ((p[2] & 0x71) << 18);
++			break;
++		case 2: case 4: {
++			u16 setup[5];
++			u32 csr13val, csr14val, csr15dir, csr15val;
++			for (i = 0; i < 5; i++)
++				setup[i] = get_u16(&p[i*2 + 1]);
++
++			rtdev->if_port = p[0] & MEDIA_MASK;
++			if (tulip_media_cap[rtdev->if_port] & MediaAlwaysFD)
++				tp->full_duplex = 1;
++
++			if (startup && mtable->has_reset) {
++				struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset];
++				unsigned char *rst = rleaf->leafdata;
++				if (tulip_debug > 1)
++					/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Resetting the transceiver.\n",
++						   rtdev->name);
++				for (i = 0; i < rst[0]; i++)
++					outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15);
++			}
++			if (tulip_debug > 1)
++				/*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21143 non-MII %s transceiver control "
++					   "%4.4x/%4.4x.\n",
++					   rtdev->name, medianame[rtdev->if_port], setup[0], setup[1]);
++			if (p[0] & 0x40) {	/* SIA (CSR13-15) setup values are provided. */
++				csr13val = setup[0];
++				csr14val = setup[1];
++				csr15dir = (setup[3]<<16) | setup[2];
++				csr15val = (setup[4]<<16) | setup[2];
++				outl(0, ioaddr + CSR13);
++				outl(csr14val, ioaddr + CSR14);
++				outl(csr15dir, ioaddr + CSR15);	/* Direction */
++				outl(csr15val, ioaddr + CSR15);	/* Data */
++				outl(csr13val, ioaddr + CSR13);
++			} else {
++				csr13val = 1;
++				csr14val = 0;
++				csr15dir = (setup[0]<<16) | 0x0008;
++				csr15val = (setup[1]<<16) | 0x0008;
++				if (rtdev->if_port <= 4)
++					csr14val = t21142_csr14[rtdev->if_port];
++				if (startup) {
++					outl(0, ioaddr + CSR13);
++					outl(csr14val, ioaddr + CSR14);
++				}
++				outl(csr15dir, ioaddr + CSR15);	/* Direction */
++				outl(csr15val, ioaddr + CSR15);	/* Data */
++				if (startup) outl(csr13val, ioaddr + CSR13);
++			}
++			if (tulip_debug > 1)
++				/*RTnet*/rtdm_printk(KERN_DEBUG "%s:  Setting CSR15 to %8.8x/%8.8x.\n",
++					   rtdev->name, csr15dir, csr15val);
++			if (mleaf->type == 4)
++				new_csr6 = 0x82020000 | ((setup[2] & 0x71) << 18);
++			else
++				new_csr6 = 0x82420000;
++			break;
++		}
++		case 1: case 3: {
++			int phy_num = p[0];
++			int init_length = p[1];
++			u16 *misc_info, tmp_info;
++
++			rtdev->if_port = 11;
++			new_csr6 = 0x020E0000;
++			if (mleaf->type == 3) {	/* 21142 */
++				u16 *init_sequence = (u16*)(p+2);
++				u16 *reset_sequence = &((u16*)(p+3))[init_length];
++				int reset_length = p[2 + init_length*2];
++				misc_info = reset_sequence + reset_length;
++				if (startup)
++					for (i = 0; i < reset_length; i++)
++						outl(get_u16(&reset_sequence[i]) << 16, ioaddr + CSR15);
++				for (i = 0; i < init_length; i++)
++					outl(get_u16(&init_sequence[i]) << 16, ioaddr + CSR15);
++			} else {
++				u8 *init_sequence = p + 2;
++				u8 *reset_sequence = p + 3 + init_length;
++				int reset_length = p[2 + init_length];
++				misc_info = (u16*)(reset_sequence + reset_length);
++				if (startup) {
++					outl(mtable->csr12dir | 0x100, ioaddr + CSR12);
++					for (i = 0; i < reset_length; i++)
++						outl(reset_sequence[i], ioaddr + CSR12);
++				}
++				for (i = 0; i < init_length; i++)
++					outl(init_sequence[i], ioaddr + CSR12);
++			}
++			tmp_info = get_u16(&misc_info[1]);
++			if (tmp_info)
++				tp->advertising[phy_num] = tmp_info | 1;
++			if (tmp_info && startup < 2) {
++				if (tp->mii_advertise == 0)
++					tp->mii_advertise = tp->advertising[phy_num];
++				if (tulip_debug > 1)
++					/*RTnet*/rtdm_printk(KERN_DEBUG "%s:  Advertising %4.4x on MII %d.\n",
++					       rtdev->name, tp->mii_advertise, tp->phys[phy_num]);
++				tulip_mdio_write(rtdev, tp->phys[phy_num], 4, tp->mii_advertise);
++			}
++			break;
++		}
++		case 5: case 6: {
++			u16 setup[5];
++
++			new_csr6 = 0; /* FIXME */
++
++			for (i = 0; i < 5; i++)
++				setup[i] = get_u16(&p[i*2 + 1]);
++
++			if (startup && mtable->has_reset) {
++				struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset];
++				unsigned char *rst = rleaf->leafdata;
++				if (tulip_debug > 1)
++					/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Resetting the transceiver.\n",
++						   rtdev->name);
++				for (i = 0; i < rst[0]; i++)
++					outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15);
++			}
++
++			break;
++		}
++		default:
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s:  Invalid media table selection %d.\n",
++					   rtdev->name, mleaf->type);
++			new_csr6 = 0x020E0000;
++		}
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: Using media type %s, CSR12 is %2.2x.\n",
++				   rtdev->name, medianame[rtdev->if_port],
++				   inl(ioaddr + CSR12) & 0xff);
++	} else if (tp->chip_id == DC21041) {
++		int port = rtdev->if_port <= 4 ? rtdev->if_port : 0;
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21041 using media %s, CSR12 is %4.4x.\n",
++				   rtdev->name, medianame[port == 3 ? 12: port],
++				   inl(ioaddr + CSR12));
++		outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */
++		outl(t21041_csr14[port], ioaddr + CSR14);
++		outl(t21041_csr15[port], ioaddr + CSR15);
++		outl(t21041_csr13[port], ioaddr + CSR13);
++		new_csr6 = 0x80020000;
++	} else if (tp->chip_id == LC82C168) {
++		if (startup && ! tp->medialock)
++			rtdev->if_port = tp->mii_cnt ? 11 : 0;
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: PNIC PHY status is %3.3x, media %s.\n",
++				   rtdev->name, inl(ioaddr + 0xB8), medianame[rtdev->if_port]);
++		if (tp->mii_cnt) {
++			new_csr6 = 0x810C0000;
++			outl(0x0001, ioaddr + CSR15);
++			outl(0x0201B07A, ioaddr + 0xB8);
++		} else if (startup) {
++			/* Start with 10mbps to do autonegotiation. */
++			outl(0x32, ioaddr + CSR12);
++			new_csr6 = 0x00420000;
++			outl(0x0001B078, ioaddr + 0xB8);
++			outl(0x0201B078, ioaddr + 0xB8);
++		} else if (rtdev->if_port == 3  ||  rtdev->if_port == 5) {
++			outl(0x33, ioaddr + CSR12);
++			new_csr6 = 0x01860000;
++			/* Trigger autonegotiation. */
++			outl(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
++		} else {
++			outl(0x32, ioaddr + CSR12);
++			new_csr6 = 0x00420000;
++			outl(0x1F078, ioaddr + 0xB8);
++		}
++	} else if (tp->chip_id == DC21040) {					/* 21040 */
++		/* Turn on the xcvr interface. */
++		int csr12 = inl(ioaddr + CSR12);
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: 21040 media type is %s, CSR12 is %2.2x.\n",
++				   rtdev->name, medianame[rtdev->if_port], csr12);
++		if (tulip_media_cap[rtdev->if_port] & MediaAlwaysFD)
++			tp->full_duplex = 1;
++		new_csr6 = 0x20000;
++		/* Set the full duplux match frame. */
++		outl(FULL_DUPLEX_MAGIC, ioaddr + CSR11);
++		outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */
++		if (t21040_csr13[rtdev->if_port] & 8) {
++			outl(0x0705, ioaddr + CSR14);
++			outl(0x0006, ioaddr + CSR15);
++		} else {
++			outl(0xffff, ioaddr + CSR14);
++			outl(0x0000, ioaddr + CSR15);
++		}
++		outl(0x8f01 | t21040_csr13[rtdev->if_port], ioaddr + CSR13);
++	} else {					/* Unknown chip type with no media table. */
++		if (tp->default_port == 0)
++			rtdev->if_port = tp->mii_cnt ? 11 : 3;
++		if (tulip_media_cap[rtdev->if_port] & MediaIsMII) {
++			new_csr6 = 0x020E0000;
++		} else if (tulip_media_cap[rtdev->if_port] & MediaIsFx) {
++			new_csr6 = 0x02860000;
++		} else
++			new_csr6 = 0x03860000;
++		if (tulip_debug > 1)
++			/*RTnet*/rtdm_printk(KERN_DEBUG "%s: No media description table, assuming "
++				   "%s transceiver, CSR12 %2.2x.\n",
++				   rtdev->name, medianame[rtdev->if_port],
++				   inl(ioaddr + CSR12));
++	}
++
++	tp->csr6 = new_csr6 | (tp->csr6 & 0xfdff) | (tp->full_duplex ? 0x0200 : 0);
++
++	mdelay(1);
++
++	return;
++}
++
++/*
++  Check the MII negotiated duplex and change the CSR6 setting if
++  required.
++  Return 0 if everything is OK.
++  Return < 0 if the transceiver is missing or has no link beat.
++  */
++int tulip_check_duplex(struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = rtdev->priv;
++	unsigned int bmsr, lpa, negotiated, new_csr6;
++
++	bmsr = tulip_mdio_read(rtdev, tp->phys[0], MII_BMSR);
++	lpa = tulip_mdio_read(rtdev, tp->phys[0], MII_LPA);
++	if (tulip_debug > 1)
++		/*RTnet*/rtdm_printk(KERN_INFO "%s: MII status %4.4x, Link partner report "
++			   "%4.4x.\n", rtdev->name, bmsr, lpa);
++	if (bmsr == 0xffff)
++		return -2;
++	if ((bmsr & BMSR_LSTATUS) == 0) {
++		int new_bmsr = tulip_mdio_read(rtdev, tp->phys[0], MII_BMSR);
++		if ((new_bmsr & BMSR_LSTATUS) == 0) {
++			if (tulip_debug  > 1)
++				/*RTnet*/rtdm_printk(KERN_INFO "%s: No link beat on the MII interface,"
++					   " status %4.4x.\n", rtdev->name, new_bmsr);
++			return -1;
++		}
++	}
++	negotiated = lpa & tp->advertising[0];
++	tp->full_duplex = mii_duplex(tp->full_duplex_lock, negotiated);
++
++	new_csr6 = tp->csr6;
++
++	if (negotiated & LPA_100) new_csr6 &= ~TxThreshold;
++	else			  new_csr6 |= TxThreshold;
++	if (tp->full_duplex) new_csr6 |= FullDuplex;
++	else		     new_csr6 &= ~FullDuplex;
++
++	if (new_csr6 != tp->csr6) {
++		tp->csr6 = new_csr6;
++		tulip_restart_rxtx(tp);
++
++		if (tulip_debug > 0)
++			/*RTnet*/rtdm_printk(KERN_INFO "%s: Setting %s-duplex based on MII"
++				   "#%d link partner capability of %4.4x.\n",
++				   rtdev->name, tp->full_duplex ? "full" : "half",
++				   tp->phys[0], lpa);
++		return 1;
++	}
++
++	return 0;
++}
++
++void tulip_find_mii (struct rtnet_device *rtdev, int board_idx)
++{
++	struct tulip_private *tp = rtdev->priv;
++	int phyn, phy_idx = 0;
++	int mii_reg0;
++	int mii_advert;
++	unsigned int to_advert, new_bmcr, ane_switch;
++
++	/* Find the connected MII xcvrs.
++	   Doing this in open() would allow detecting external xcvrs later,
++	   but takes much time. */
++	for (phyn = 1; phyn <= 32 && phy_idx < sizeof (tp->phys); phyn++) {
++		int phy = phyn & 0x1f;
++		int mii_status = tulip_mdio_read (rtdev, phy, MII_BMSR);
++		if ((mii_status & 0x8301) == 0x8001 ||
++		    ((mii_status & BMSR_100BASE4) == 0
++		     && (mii_status & 0x7800) != 0)) {
++			/* preserve Becker logic, gain indentation level */
++		} else {
++			continue;
++		}
++
++		mii_reg0 = tulip_mdio_read (rtdev, phy, MII_BMCR);
++		mii_advert = tulip_mdio_read (rtdev, phy, MII_ADVERTISE);
++		ane_switch = 0;
++
++		/* if not advertising at all, gen an
++		 * advertising value from the capability
++		 * bits in BMSR
++		 */
++		if ((mii_advert & ADVERTISE_ALL) == 0) {
++			unsigned int tmpadv = tulip_mdio_read (rtdev, phy, MII_BMSR);
++			mii_advert = ((tmpadv >> 6) & 0x3e0) | 1;
++		}
++
++		if (tp->mii_advertise) {
++			tp->advertising[phy_idx] =
++			to_advert = tp->mii_advertise;
++		} else if (tp->advertising[phy_idx]) {
++			to_advert = tp->advertising[phy_idx];
++		} else {
++			tp->advertising[phy_idx] =
++			tp->mii_advertise =
++			to_advert = mii_advert;
++		}
++
++		tp->phys[phy_idx++] = phy;
++
++		/*RTnet*/rtdm_printk(KERN_INFO "tulip%d:  MII transceiver #%d "
++			"config %4.4x status %4.4x advertising %4.4x.\n",
++			board_idx, phy, mii_reg0, mii_status, mii_advert);
++
++		/* Fixup for DLink with miswired PHY. */
++		if (mii_advert != to_advert) {
++			/*RTnet*/rtdm_printk(KERN_DEBUG "tulip%d:  Advertising %4.4x on PHY %d,"
++				" previously advertising %4.4x.\n",
++				board_idx, to_advert, phy, mii_advert);
++			tulip_mdio_write (rtdev, phy, 4, to_advert);
++		}
++
++		/* Enable autonegotiation: some boards default to off. */
++		if (tp->default_port == 0) {
++			new_bmcr = mii_reg0 | BMCR_ANENABLE;
++			if (new_bmcr != mii_reg0) {
++				new_bmcr |= BMCR_ANRESTART;
++				ane_switch = 1;
++			}
++		}
++		/* ...or disable nway, if forcing media */
++		else {
++			new_bmcr = mii_reg0 & ~BMCR_ANENABLE;
++			if (new_bmcr != mii_reg0)
++				ane_switch = 1;
++		}
++
++		/* clear out bits we never want at this point */
++		new_bmcr &= ~(BMCR_CTST | BMCR_FULLDPLX | BMCR_ISOLATE |
++			      BMCR_PDOWN | BMCR_SPEED100 | BMCR_LOOPBACK |
++			      BMCR_RESET);
++
++		if (tp->full_duplex)
++			new_bmcr |= BMCR_FULLDPLX;
++		if (tulip_media_cap[tp->default_port] & MediaIs100)
++			new_bmcr |= BMCR_SPEED100;
++
++		if (new_bmcr != mii_reg0) {
++			/* some phys need the ANE switch to
++			 * happen before forced media settings
++			 * will "take."  However, we write the
++			 * same value twice in order not to
++			 * confuse the sane phys.
++			 */
++			if (ane_switch) {
++				tulip_mdio_write (rtdev, phy, MII_BMCR, new_bmcr);
++				udelay (10);
++			}
++			tulip_mdio_write (rtdev, phy, MII_BMCR, new_bmcr);
++		}
++	}
++	tp->mii_cnt = phy_idx;
++	if (tp->mtable && tp->mtable->has_mii && phy_idx == 0) {
++		/*RTnet*/rtdm_printk(KERN_INFO "tulip%d: ***WARNING***: No MII transceiver found!\n",
++			board_idx);
++		tp->phys[0] = 1;
++	}
++}
+--- linux/drivers/xenomai/net/drivers/tulip/pnic.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/pnic.c	2021-04-07 16:01:27.304634056 +0800
+@@ -0,0 +1,53 @@
++/*
++	drivers/net/tulip/pnic.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++#include <linux/kernel.h>
++#include "tulip.h"
++
++
++void pnic_do_nway(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	u32 phy_reg = inl(ioaddr + 0xB8);
++	u32 new_csr6 = tp->csr6 & ~0x40C40200;
++
++	if (phy_reg & 0x78000000) { /* Ignore baseT4 */
++		if (phy_reg & 0x20000000)		rtdev->if_port = 5;
++		else if (phy_reg & 0x40000000)	rtdev->if_port = 3;
++		else if (phy_reg & 0x10000000)	rtdev->if_port = 4;
++		else if (phy_reg & 0x08000000)	rtdev->if_port = 0;
++		tp->nwayset = 1;
++		new_csr6 = (rtdev->if_port & 1) ? 0x01860000 : 0x00420000;
++		outl(0x32 | (rtdev->if_port & 1), ioaddr + CSR12);
++		if (rtdev->if_port & 1)
++			outl(0x1F868, ioaddr + 0xB8);
++		if (phy_reg & 0x30000000) {
++			tp->full_duplex = 1;
++			new_csr6 |= 0x00000200;
++		}
++		if (tulip_debug > 1)
++			/*RTnet*/printk(KERN_DEBUG "%s: PNIC autonegotiated status %8.8x, %s.\n",
++				   rtdev->name, phy_reg, medianame[rtdev->if_port]);
++		if (tp->csr6 != new_csr6) {
++			tp->csr6 = new_csr6;
++			/* Restart Tx */
++			tulip_restart_rxtx(tp);
++		}
++	}
++}
++
+--- linux/drivers/xenomai/net/drivers/tulip/pnic2.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/tulip/pnic2.c	2021-04-07 16:01:27.299634063 +0800
+@@ -0,0 +1,158 @@
++/*
++	drivers/net/tulip/pnic2.c
++
++	Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
++	Copyright 2000,2001  The Linux Kernel Team
++	Written/copyright 1994-2001 by Donald Becker.
++        Modified to hep support PNIC_II by Kevin B. Hendricks
++
++	This software may be used and distributed according to the terms
++	of the GNU General Public License, incorporated herein by reference.
++
++	Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
++	for more information on this driver, or visit the project
++	Web page at http://sourceforge.net/projects/tulip/
++
++*/
++/* Ported to RTnet by Wittawat Yamwong <wittawat@web.de> */
++
++
++/* Understanding the PNIC_II - everything is this file is based
++ * on the PNIC_II_PDF datasheet which is sorely lacking in detail
++ *
++ * As I understand things, here are the registers and bits that
++ * explain the masks and constants used in this file that are
++ * either different from the 21142/3 or important for basic operation.
++ *
++ *
++ * CSR 6  (mask = 0xfe3bd1fd of bits not to change)
++ * -----
++ * Bit 24    - SCR
++ * Bit 23    - PCS
++ * Bit 22    - TTM (Trasmit Threshold Mode)
++ * Bit 18    - Port Select
++ * Bit 13    - Start - 1, Stop - 0 Transmissions
++ * Bit 11:10 - Loop Back Operation Mode
++ * Bit 9     - Full Duplex mode (Advertise 10BaseT-FD is CSR14<7> is set)
++ * Bit 1     - Start - 1, Stop - 0 Receive
++ *
++ *
++ * CSR 14  (mask = 0xfff0ee39 of bits not to change)
++ * ------
++ * Bit 19    - PAUSE-Pause
++ * Bit 18    - Advertise T4
++ * Bit 17    - Advertise 100baseTx-FD
++ * Bit 16    - Advertise 100baseTx-HD
++ * Bit 12    - LTE - Link Test Enable
++ * Bit 7     - ANE - Auto Negotiate Enable
++ * Bit 6     - HDE - Advertise 10baseT-HD
++ * Bit 2     - Reset to Power down - kept as 1 for normal operation
++ * Bit 1     -  Loop Back enable for 10baseT MCC
++ *
++ *
++ * CSR 12
++ * ------
++ * Bit 25    - Partner can do T4
++ * Bit 24    - Partner can do 100baseTx-FD
++ * Bit 23    - Partner can do 100baseTx-HD
++ * Bit 22    - Partner can do 10baseT-FD
++ * Bit 21    - Partner can do 10baseT-HD
++ * Bit 15    - LPN is 1 if all above bits are valid other wise 0
++ * Bit 14:12 - autonegotiation state (write 001 to start autonegotiate)
++ * Bit 3     - Autopolarity state
++ * Bit 2     - LS10B - link state of 10baseT 0 - good, 1 - failed
++ * Bit 1     - LS100B - link state of 100baseT 0 - good, 1- faild
++ *
++ *
++ * Data Port Selection Info
++ *-------------------------
++ *
++ * CSR14<7>   CSR6<18>    CSR6<22>    CSR6<23>    CSR6<24>   MODE/PORT
++ *   1           0           0 (X)       0 (X)       1        NWAY
++ *   0           0           1           0 (X)       0        10baseT
++ *   0           1           0           1           1 (X)    100baseT
++ *
++ *
++ */
++
++
++
++#include "tulip.h"
++#include <linux/pci.h>
++#include <linux/delay.h>
++
++
++void pnic2_start_nway(/*RTnet*/struct rtnet_device *rtdev)
++{
++	struct tulip_private *tp = (struct tulip_private *)rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++        int csr14;
++        int csr12;
++
++        /* set up what to advertise during the negotiation */
++
++        /* load in csr14  and mask off bits not to touch
++         * comment at top of file explains mask value
++         */
++	csr14 = (inl(ioaddr + CSR14) & 0xfff0ee39);
++
++        /* bit 17 - advetise 100baseTx-FD */
++        if (tp->sym_advertise & 0x0100) csr14 |= 0x00020000;
++
++        /* bit 16 - advertise 100baseTx-HD */
++        if (tp->sym_advertise & 0x0080) csr14 |= 0x00010000;
++
++        /* bit 6 - advertise 10baseT-HD */
++        if (tp->sym_advertise & 0x0020) csr14 |= 0x00000040;
++
++        /* Now set bit 12 Link Test Enable, Bit 7 Autonegotiation Enable
++         * and bit 0 Don't PowerDown 10baseT
++         */
++        csr14 |= 0x00001184;
++
++	if (tulip_debug > 1)
++		printk(KERN_DEBUG "%s: Restarting PNIC2 autonegotiation, "
++                      "csr14=%8.8x.\n", rtdev->name, csr14);
++
++        /* tell pnic2_lnk_change we are doing an nway negotiation */
++	rtdev->if_port = 0;
++	tp->nway = tp->mediasense = 1;
++	tp->nwayset = tp->lpar = 0;
++
++        /* now we have to set up csr6 for NWAY state */
++
++	tp->csr6 = inl(ioaddr + CSR6);
++	if (tulip_debug > 1)
++		printk(KERN_DEBUG "%s: On Entry to Nway, "
++                      "csr6=%8.8x.\n", rtdev->name, tp->csr6);
++
++        /* mask off any bits not to touch
++         * comment at top of file explains mask value
++         */
++	tp->csr6 = tp->csr6 & 0xfe3bd1fd;
++
++        /* don't forget that bit 9 is also used for advertising */
++        /* advertise 10baseT-FD for the negotiation (bit 9) */
++        if (tp->sym_advertise & 0x0040) tp->csr6 |= 0x00000200;
++
++        /* set bit 24 for nway negotiation mode ...
++         * see Data Port Selection comment at top of file
++         * and "Stop" - reset both Transmit (bit 13) and Receive (bit 1)
++         */
++        tp->csr6 |= 0x01000000;
++	outl(csr14, ioaddr + CSR14);
++	outl(tp->csr6, ioaddr + CSR6);
++        udelay(100);
++
++        /* all set up so now force the negotiation to begin */
++
++        /* read in current values and mask off all but the
++	 * Autonegotiation bits 14:12.  Writing a 001 to those bits
++         * should start the autonegotiation
++         */
++        csr12 = (inl(ioaddr + CSR12) & 0xffff8fff);
++        csr12 |= 0x1000;
++	outl(csr12, ioaddr + CSR12);
++}
++
++
+--- linux/drivers/xenomai/net/drivers/rt_at91_ether.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/rt_at91_ether.h	2021-04-07 16:01:27.294634070 +0800
+@@ -0,0 +1,109 @@
++/*
++ * Ethernet driver for the Atmel AT91RM9200 (Thunder)
++ *
++ *  Copyright (C) SAN People (Pty) Ltd
++ *
++ * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc.
++ * Initial version by Rick Bronson.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#ifndef AT91_ETHERNET
++#define AT91_ETHERNET
++
++#include <rtdm/driver.h>
++#include <rtskb.h>
++
++/* Davicom 9161 PHY */
++#define MII_DM9161_ID	0x0181b880
++#define MII_DM9161A_ID	0x0181b8a0
++
++/* Davicom specific registers */
++#define MII_DSCR_REG	16
++#define MII_DSCSR_REG	17
++#define MII_DSINTR_REG	21
++
++/* Intel LXT971A PHY */
++#define MII_LXT971A_ID	0x001378E0
++
++/* Intel specific registers */
++#define MII_ISINTE_REG	18
++#define MII_ISINTS_REG	19
++#define MII_LEDCTRL_REG	20
++
++/* Realtek RTL8201 PHY */
++#define MII_RTL8201_ID	0x00008200
++
++/* Broadcom BCM5221 PHY */
++#define MII_BCM5221_ID	0x004061e0
++
++/* Broadcom specific registers */
++#define MII_BCMINTR_REG	26
++
++/* National Semiconductor DP83847 */
++#define MII_DP83847_ID	0x20005c30
++
++/* Altima AC101L PHY */
++#define MII_AC101L_ID	0x00225520
++
++/* Micrel KS8721 PHY */
++#define MII_KS8721_ID	0x00221610
++
++/* ........................................................................ */
++
++#define MAX_RBUFF_SZ	0x600		/* 1518 rounded up */
++#define MAX_RX_DESCR	9		/* max number of receive buffers */
++
++#define EMAC_DESC_DONE	0x00000001	/* bit for if DMA is done */
++#define EMAC_DESC_WRAP	0x00000002	/* bit for wrap */
++
++#define EMAC_BROADCAST	0x80000000	/* broadcast address */
++#define EMAC_MULTICAST	0x40000000	/* multicast address */
++#define EMAC_UNICAST	0x20000000	/* unicast address */
++
++struct rbf_t
++{
++	unsigned int addr;
++	unsigned long size;
++};
++
++struct recv_desc_bufs
++{
++	struct rbf_t descriptors[MAX_RX_DESCR];		/* must be on sizeof (rbf_t) boundary */
++	char recv_buf[MAX_RX_DESCR][MAX_RBUFF_SZ];	/* must be on long boundary */
++};
++
++struct at91_private
++{
++	struct net_device_stats stats;
++	struct mii_if_info mii;			/* ethtool support */
++	struct at91_eth_data board_data;	/* board-specific configuration */
++	struct clk *ether_clk;			/* clock */
++
++	/* PHY */
++	unsigned long phy_type;			/* type of PHY (PHY_ID) */
++	rtdm_lock_t lock;			/* lock for MDI interface */
++	short phy_media;			/* media interface type */
++	unsigned short phy_address;		/* 5-bit MDI address of PHY (0..31) */
++	struct timer_list check_timer;		/* Poll link status */
++
++	/* Transmit */
++	struct rtskb *skb;			/* holds skb until xmit interrupt completes */
++	dma_addr_t skb_physaddr;		/* phys addr from pci_map_single */
++	int skb_length;				/* saved skb length for pci_unmap_single */
++
++	/* Receive */
++	int rxBuffIndex;			/* index into receive descriptor list */
++	struct recv_desc_bufs *dlist;		/* descriptor list address */
++	struct recv_desc_bufs *dlist_phys;	/* descriptor list physical address */
++
++	/* RT Net */
++	rtdm_irq_t irq_handle;
++	rtdm_irq_t phy_irq_handle;
++};
++
++#endif
+--- linux/drivers/xenomai/net/drivers/pcnet32.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/pcnet32.c	2021-04-07 16:01:27.289634078 +0800
+@@ -0,0 +1,1652 @@
++/* pcnet32.c: An AMD PCnet32 ethernet driver for linux. */
++/*
++ *	Copyright 1996-1999 Thomas Bogendoerfer
++ *
++ *	Derived from the lance driver written 1993,1994,1995 by Donald Becker.
++ *
++ *	Copyright 1993 United States Government as represented by the
++ *	Director, National Security Agency.
++ *
++ *	This software may be used and distributed according to the terms
++ *	of the GNU General Public License, incorporated herein by reference.
++ *
++ *	This driver is for PCnet32 and PCnetPCI based ethercards
++ */
++/**************************************************************************
++ *  23 Oct, 2000.
++ *  Fixed a few bugs, related to running the controller in 32bit mode.
++ *
++ *  Carsten Langgaard, carstenl@mips.com
++ *  Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
++ *
++ *  Ported to RTnet: September 2003, Jan Kiszka <Jan.Kiszka@web.de>
++ *************************************************************************/
++
++#define DRV_NAME "pcnet32-rt"
++#define DRV_VERSION "1.27a-RTnet-0.2"
++#define DRV_RELDATE "2003-09-24"
++#define PFX DRV_NAME ": "
++
++static const char *version =
++	DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " Jan.Kiszka@web.de\n";
++
++#include <linux/module.h>
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/ptrace.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/delay.h>
++#include <linux/init.h>
++#include <linux/ethtool.h>
++#include <linux/mii.h>
++#include <linux/crc32.h>
++#include <linux/uaccess.h>
++#include <asm/bitops.h>
++#include <asm/io.h>
++#include <asm/dma.h>
++
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++
++/*** RTnet ***/
++#include <rtnet_port.h>
++
++#define MAX_UNITS 8 /* More are supported, limit only on options */
++#define DEFAULT_RX_POOL_SIZE 16
++
++static int cards[MAX_UNITS] = { [0 ...(MAX_UNITS - 1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++/*** RTnet ***/
++
++/*
++ * PCI device identifiers for "new style" Linux PCI Device Drivers
++ */
++static struct pci_device_id pcnet32_pci_tbl[] = {
++	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID,
++	  PCI_ANY_ID, 0, 0, 0 },
++	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0,
++	  0, 0 },
++	{
++		0,
++	}
++};
++
++MODULE_DEVICE_TABLE(pci, pcnet32_pci_tbl);
++
++static int cards_found = -1;
++static int pcnet32_have_pci;
++
++/*
++ * VLB I/O addresses
++ */
++static unsigned int pcnet32_portlist[] = { 0x300, 0x320, 0x340, 0x360, 0 };
++
++static int pcnet32_debug = 1;
++static int tx_start =
++	1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */
++static int pcnet32vlb; /* check for VLB cards ? */
++
++static struct rtnet_device *pcnet32_dev; /*** RTnet ***/
++
++static int max_interrupt_work = 80;
++/*** RTnet ***
++static int rx_copybreak = 200;
++ *** RTnet ***/
++
++#define PCNET32_PORT_AUI 0x00
++#define PCNET32_PORT_10BT 0x01
++#define PCNET32_PORT_GPSI 0x02
++#define PCNET32_PORT_MII 0x03
++
++#define PCNET32_PORT_PORTSEL 0x03
++#define PCNET32_PORT_ASEL 0x04
++#define PCNET32_PORT_100 0x40
++#define PCNET32_PORT_FD 0x80
++
++#define PCNET32_DMA_MASK 0xffffffff
++
++/*
++ * table to translate option values from tulip
++ * to internal options
++ */
++static unsigned char options_mapping[] = {
++	PCNET32_PORT_ASEL, /*  0 Auto-select	  */
++	PCNET32_PORT_AUI, /*  1 BNC/AUI	  */
++	PCNET32_PORT_AUI, /*  2 AUI/BNC	  */
++	PCNET32_PORT_ASEL, /*  3 not supported	  */
++	PCNET32_PORT_10BT | PCNET32_PORT_FD, /*  4 10baseT-FD	  */
++	PCNET32_PORT_ASEL, /*  5 not supported	  */
++	PCNET32_PORT_ASEL, /*  6 not supported	  */
++	PCNET32_PORT_ASEL, /*  7 not supported	  */
++	PCNET32_PORT_ASEL, /*  8 not supported	  */
++	PCNET32_PORT_MII, /*  9 MII 10baseT	  */
++	PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD	  */
++	PCNET32_PORT_MII, /* 11 MII (autosel)	  */
++	PCNET32_PORT_10BT, /* 12 10BaseT	  */
++	PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx	  */
++	PCNET32_PORT_MII | PCNET32_PORT_100 |
++		PCNET32_PORT_FD, /* 14 MII 100BaseTx-FD */
++	PCNET32_PORT_ASEL /* 15 not supported	  */
++};
++
++static int options[MAX_UNITS];
++static int full_duplex[MAX_UNITS];
++
++/*
++ *				Theory of Operation
++ *
++ * This driver uses the same software structure as the normal lance
++ * driver. So look for a verbose description in lance.c. The differences
++ * to the normal lance driver is the use of the 32bit mode of PCnet32
++ * and PCnetPCI chips. Because these chips are 32bit chips, there is no
++ * 16MB limitation and we don't need bounce buffers.
++ */
++
++/*
++ * History:
++ * v0.01:  Initial version
++ *	   only tested on Alpha Noname Board
++ * v0.02:  changed IRQ handling for new interrupt scheme (dev_id)
++ *	   tested on a ASUS SP3G
++ * v0.10:  fixed an odd problem with the 79C974 in a Compaq Deskpro XL
++ *	   looks like the 974 doesn't like stopping and restarting in a
++ *	   short period of time; now we do a reinit of the lance; the
++ *	   bug was triggered by doing ifconfig eth0 <ip> broadcast <addr>
++ *	   and hangs the machine (thanks to Klaus Liedl for debugging)
++ * v0.12:  by suggestion from Donald Becker: Renamed driver to pcnet32,
++ *	   made it standalone (no need for lance.c)
++ * v0.13:  added additional PCI detecting for special PCI devices (Compaq)
++ * v0.14:  stripped down additional PCI probe (thanks to David C Niemi
++ *	   and sveneric@xs4all.nl for testing this on their Compaq boxes)
++ * v0.15:  added 79C965 (VLB) probe
++ *	   added interrupt sharing for PCI chips
++ * v0.16:  fixed set_multicast_list on Alpha machines
++ * v0.17:  removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c
++ * v0.19:  changed setting of autoselect bit
++ * v0.20:  removed additional Compaq PCI probe; there is now a working one
++ *	   in arch/i386/bios32.c
++ * v0.21:  added endian conversion for ppc, from work by cort@cs.nmt.edu
++ * v0.22:  added printing of status to ring dump
++ * v0.23:  changed enet_statistics to net_devive_stats
++ * v0.90:  added multicast filter
++ *	   added module support
++ *	   changed irq probe to new style
++ *	   added PCnetFast chip id
++ *	   added fix for receive stalls with Intel saturn chipsets
++ *	   added in-place rx skbs like in the tulip driver
++ *	   minor cleanups
++ * v0.91:  added PCnetFast+ chip id
++ *	   back port to 2.0.x
++ * v1.00:  added some stuff from Donald Becker's 2.0.34 version
++ *	   added support for byte counters in net_dev_stats
++ * v1.01:  do ring dumps, only when debugging the driver
++ *	   increased the transmit timeout
++ * v1.02:  fixed memory leak in pcnet32_init_ring()
++ * v1.10:  workaround for stopped transmitter
++ *	   added port selection for modules
++ *	   detect special T1/E1 WAN card and setup port selection
++ * v1.11:  fixed wrong checking of Tx errors
++ * v1.20:  added check of return value kmalloc (cpeterso@cs.washington.edu)
++ *	   added save original kmalloc addr for freeing (mcr@solidum.com)
++ *	   added support for PCnetHome chip (joe@MIT.EDU)
++ *	   rewritten PCI card detection
++ *	   added dwio mode to get driver working on some PPC machines
++ * v1.21:  added mii selection and mii ioctl
++ * v1.22:  changed pci scanning code to make PPC people happy
++ *	   fixed switching to 32bit mode in pcnet32_open() (thanks
++ *	   to Michael Richard <mcr@solidum.com> for noticing this one)
++ *	   added sub vendor/device id matching (thanks again to
++ *	   Michael Richard <mcr@solidum.com>)
++ *	   added chip id for 79c973/975 (thanks to Zach Brown <zab@zabbo.net>)
++ * v1.23   fixed small bug, when manual selecting MII speed/duplex
++ * v1.24   Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO
++ *	   underflows.	Added tx_start_pt module parameter. Increased
++ *	   TX_RING_SIZE from 16 to 32.	Added #ifdef'd code to use DXSUFLO
++ *	   for FAST[+] chipsets. <kaf@fc.hp.com>
++ * v1.24ac Added SMP spinlocking - Alan Cox <alan@redhat.com>
++ * v1.25kf Added No Interrupt on successful Tx for some Tx's <kaf@fc.hp.com>
++ * v1.26   Converted to pci_alloc_consistent, Jamey Hicks / George France
++ *                                           <jamey@crl.dec.com>
++ * -	   Fixed a few bugs, related to running the controller in 32bit mode.
++ *	   23 Oct, 2000.  Carsten Langgaard, carstenl@mips.com
++ *	   Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
++ * v1.26p  Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker
++ * v1.27   improved CSR/PROM address detection, lots of cleanups,
++ *	   new pcnet32vlb module option, HP-PARISC support,
++ *	   added module parameter descriptions,
++ *	   initial ethtool support - Helge Deller <deller@gmx.de>
++ * v1.27a  Sun Feb 10 2002 Go Taniguchi <go@turbolinux.co.jp>
++ *	   use alloc_etherdev and register_netdev
++ *	   fix pci probe not increment cards_found
++ *	   FD auto negotiate error workaround for xSeries250
++ *	   clean up and using new mii module
++ */
++
++/*
++ * Set the number of Tx and Rx buffers, using Log_2(# buffers).
++ * Reasonable default values are 4 Tx buffers, and 16 Rx buffers.
++ * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4).
++ */
++#ifndef PCNET32_LOG_TX_BUFFERS
++#define PCNET32_LOG_TX_BUFFERS 4
++#define PCNET32_LOG_RX_BUFFERS 3 /*** RTnet ***/
++#endif
++
++#define TX_RING_SIZE (1 << (PCNET32_LOG_TX_BUFFERS))
++#define TX_RING_MOD_MASK (TX_RING_SIZE - 1)
++#define TX_RING_LEN_BITS ((PCNET32_LOG_TX_BUFFERS) << 12)
++
++#define RX_RING_SIZE (1 << (PCNET32_LOG_RX_BUFFERS))
++#define RX_RING_MOD_MASK (RX_RING_SIZE - 1)
++#define RX_RING_LEN_BITS ((PCNET32_LOG_RX_BUFFERS) << 4)
++
++#define PKT_BUF_SZ 1544
++
++/* Offsets from base I/O address. */
++#define PCNET32_WIO_RDP 0x10
++#define PCNET32_WIO_RAP 0x12
++#define PCNET32_WIO_RESET 0x14
++#define PCNET32_WIO_BDP 0x16
++
++#define PCNET32_DWIO_RDP 0x10
++#define PCNET32_DWIO_RAP 0x14
++#define PCNET32_DWIO_RESET 0x18
++#define PCNET32_DWIO_BDP 0x1C
++
++#define PCNET32_TOTAL_SIZE 0x20
++
++/* The PCNET32 Rx and Tx ring descriptors. */
++struct pcnet32_rx_head {
++	u32 base;
++	s16 buf_length;
++	s16 status;
++	u32 msg_length;
++	u32 reserved;
++};
++
++struct pcnet32_tx_head {
++	u32 base;
++	s16 length;
++	s16 status;
++	u32 misc;
++	u32 reserved;
++};
++
++/* The PCNET32 32-Bit initialization block, described in databook. */
++struct pcnet32_init_block {
++	u16 mode;
++	u16 tlen_rlen;
++	u8 phys_addr[6];
++	u16 reserved;
++	u32 filter[2];
++	/* Receive and transmit ring base, along with extra bits. */
++	u32 rx_ring;
++	u32 tx_ring;
++};
++
++/* PCnet32 access functions */
++struct pcnet32_access {
++	u16 (*read_csr)(unsigned long, int);
++	void (*write_csr)(unsigned long, int, u16);
++	u16 (*read_bcr)(unsigned long, int);
++	void (*write_bcr)(unsigned long, int, u16);
++	u16 (*read_rap)(unsigned long);
++	void (*write_rap)(unsigned long, u16);
++	void (*reset)(unsigned long);
++};
++
++/*
++ * The first three fields of pcnet32_private are read by the ethernet device
++ * so we allocate the structure should be allocated by pci_alloc_consistent().
++ */
++struct pcnet32_private {
++	/* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */
++	struct pcnet32_rx_head rx_ring[RX_RING_SIZE];
++	struct pcnet32_tx_head tx_ring[TX_RING_SIZE];
++	struct pcnet32_init_block init_block;
++	dma_addr_t dma_addr; /* DMA address of beginning of this object,
++					   returned by pci_alloc_consistent */
++	struct pci_dev
++		*pci_dev; /* Pointer to the associated pci device structure */
++	const char *name;
++	/* The saved address of a sent-in-place packet/buffer, for skfree(). */
++	/*** RTnet ***/
++	struct rtskb *tx_skbuff[TX_RING_SIZE];
++	struct rtskb *rx_skbuff[RX_RING_SIZE];
++	/*** RTnet ***/
++	dma_addr_t tx_dma_addr[TX_RING_SIZE];
++	dma_addr_t rx_dma_addr[RX_RING_SIZE];
++	struct pcnet32_access a;
++	rtdm_lock_t lock; /* Guard lock */
++	unsigned int cur_rx, cur_tx; /* The next free ring entry */
++	unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */
++	struct net_device_stats stats;
++	char tx_full;
++	int options;
++	int shared_irq : 1, /* shared irq possible */
++		ltint : 1, /* enable TxDone-intr inhibitor */
++		dxsuflo : 1, /* disable transmit stop on uflo */
++		mii : 1; /* mii port available */
++	struct rtnet_device *next; /*** RTnet ***/
++	struct mii_if_info mii_if;
++	rtdm_irq_t irq_handle;
++};
++
++static void pcnet32_probe_vlbus(void);
++static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
++static int pcnet32_probe1(unsigned long, unsigned int, int, struct pci_dev *);
++/*** RTnet ***/
++static int pcnet32_open(struct rtnet_device *);
++static int pcnet32_init_ring(struct rtnet_device *);
++static int pcnet32_start_xmit(struct rtskb *, struct rtnet_device *);
++static int pcnet32_rx(struct rtnet_device *, nanosecs_abs_t *time_stamp);
++//static void pcnet32_tx_timeout (struct net_device *dev);
++static int pcnet32_interrupt(rtdm_irq_t *irq_handle);
++static int pcnet32_close(struct rtnet_device *);
++static struct net_device_stats *pcnet32_get_stats(struct rtnet_device *);
++//static void pcnet32_set_multicast_list(struct net_device *);
++//static int  pcnet32_ioctl(struct net_device *, struct ifreq *, int);
++//static int mdio_read(struct net_device *dev, int phy_id, int reg_num);
++//static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val);
++/*** RTnet ***/
++
++enum pci_flags_bit {
++	PCI_USES_IO = 1,
++	PCI_USES_MEM = 2,
++	PCI_USES_MASTER = 4,
++	PCI_ADDR0 = 0x10 << 0,
++	PCI_ADDR1 = 0x10 << 1,
++	PCI_ADDR2 = 0x10 << 2,
++	PCI_ADDR3 = 0x10 << 3,
++};
++
++static u16 pcnet32_wio_read_csr(unsigned long addr, int index)
++{
++	outw(index, addr + PCNET32_WIO_RAP);
++	return inw(addr + PCNET32_WIO_RDP);
++}
++
++static void pcnet32_wio_write_csr(unsigned long addr, int index, u16 val)
++{
++	outw(index, addr + PCNET32_WIO_RAP);
++	outw(val, addr + PCNET32_WIO_RDP);
++}
++
++static u16 pcnet32_wio_read_bcr(unsigned long addr, int index)
++{
++	outw(index, addr + PCNET32_WIO_RAP);
++	return inw(addr + PCNET32_WIO_BDP);
++}
++
++static void pcnet32_wio_write_bcr(unsigned long addr, int index, u16 val)
++{
++	outw(index, addr + PCNET32_WIO_RAP);
++	outw(val, addr + PCNET32_WIO_BDP);
++}
++
++static u16 pcnet32_wio_read_rap(unsigned long addr)
++{
++	return inw(addr + PCNET32_WIO_RAP);
++}
++
++static void pcnet32_wio_write_rap(unsigned long addr, u16 val)
++{
++	outw(val, addr + PCNET32_WIO_RAP);
++}
++
++static void pcnet32_wio_reset(unsigned long addr)
++{
++	inw(addr + PCNET32_WIO_RESET);
++}
++
++static int pcnet32_wio_check(unsigned long addr)
++{
++	outw(88, addr + PCNET32_WIO_RAP);
++	return (inw(addr + PCNET32_WIO_RAP) == 88);
++}
++
++static struct pcnet32_access pcnet32_wio = {
++	read_csr: pcnet32_wio_read_csr,
++	write_csr: pcnet32_wio_write_csr,
++	read_bcr: pcnet32_wio_read_bcr,
++	write_bcr: pcnet32_wio_write_bcr,
++	read_rap: pcnet32_wio_read_rap,
++	write_rap: pcnet32_wio_write_rap,
++	reset: pcnet32_wio_reset
++};
++
++static u16 pcnet32_dwio_read_csr(unsigned long addr, int index)
++{
++	outl(index, addr + PCNET32_DWIO_RAP);
++	return (inl(addr + PCNET32_DWIO_RDP) & 0xffff);
++}
++
++static void pcnet32_dwio_write_csr(unsigned long addr, int index, u16 val)
++{
++	outl(index, addr + PCNET32_DWIO_RAP);
++	outl(val, addr + PCNET32_DWIO_RDP);
++}
++
++static u16 pcnet32_dwio_read_bcr(unsigned long addr, int index)
++{
++	outl(index, addr + PCNET32_DWIO_RAP);
++	return (inl(addr + PCNET32_DWIO_BDP) & 0xffff);
++}
++
++static void pcnet32_dwio_write_bcr(unsigned long addr, int index, u16 val)
++{
++	outl(index, addr + PCNET32_DWIO_RAP);
++	outl(val, addr + PCNET32_DWIO_BDP);
++}
++
++static u16 pcnet32_dwio_read_rap(unsigned long addr)
++{
++	return (inl(addr + PCNET32_DWIO_RAP) & 0xffff);
++}
++
++static void pcnet32_dwio_write_rap(unsigned long addr, u16 val)
++{
++	outl(val, addr + PCNET32_DWIO_RAP);
++}
++
++static void pcnet32_dwio_reset(unsigned long addr)
++{
++	inl(addr + PCNET32_DWIO_RESET);
++}
++
++static int pcnet32_dwio_check(unsigned long addr)
++{
++	outl(88, addr + PCNET32_DWIO_RAP);
++	return ((inl(addr + PCNET32_DWIO_RAP) & 0xffff) == 88);
++}
++
++static struct pcnet32_access pcnet32_dwio = {
++	read_csr: pcnet32_dwio_read_csr,
++	write_csr: pcnet32_dwio_write_csr,
++	read_bcr: pcnet32_dwio_read_bcr,
++	write_bcr: pcnet32_dwio_write_bcr,
++	read_rap: pcnet32_dwio_read_rap,
++	write_rap: pcnet32_dwio_write_rap,
++	reset: pcnet32_dwio_reset
++};
++
++/* only probes for non-PCI devices, the rest are handled by
++ * pci_register_driver via pcnet32_probe_pci */
++
++static void pcnet32_probe_vlbus(void)
++{
++	unsigned int *port, ioaddr;
++
++	/* search for PCnet32 VLB cards at known addresses */
++	for (port = pcnet32_portlist; (ioaddr = *port); port++) {
++		if (!request_region(ioaddr, PCNET32_TOTAL_SIZE,
++				    "pcnet32_probe_vlbus")) {
++			/* check if there is really a pcnet chip on that ioaddr */
++			if ((inb(ioaddr + 14) == 0x57) &&
++			    (inb(ioaddr + 15) == 0x57)) {
++				pcnet32_probe1(ioaddr, 0, 0, NULL);
++			} else {
++				release_region(ioaddr, PCNET32_TOTAL_SIZE);
++			}
++		}
++	}
++}
++
++static int pcnet32_probe_pci(struct pci_dev *pdev,
++			     const struct pci_device_id *ent)
++{
++	unsigned long ioaddr;
++	int err;
++
++	err = pci_enable_device(pdev);
++	if (err < 0) {
++		printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err);
++		return err;
++	}
++	pci_set_master(pdev);
++
++	ioaddr = pci_resource_start(pdev, 0);
++	if (!ioaddr) {
++		printk(KERN_ERR PFX "card has no PCI IO resources, aborting\n");
++		return -ENODEV;
++	}
++
++	if (!dma_supported(&pdev->dev, PCNET32_DMA_MASK)) {
++		printk(KERN_ERR PFX
++		       "architecture does not support 32bit PCI busmaster DMA\n");
++		return -ENODEV;
++	}
++
++	return pcnet32_probe1(ioaddr, pdev->irq, 1, pdev);
++}
++
++/* pcnet32_probe1
++ *  Called from both pcnet32_probe_vlbus and pcnet_probe_pci.
++ *  pdev will be NULL when called from pcnet32_probe_vlbus.
++ */
++static int pcnet32_probe1(unsigned long ioaddr, unsigned int irq_line,
++			  int shared, struct pci_dev *pdev)
++{
++	struct pcnet32_private *lp;
++	dma_addr_t lp_dma_addr;
++	int i, media;
++	int fdx, mii, fset, dxsuflo, ltint;
++	int chip_version;
++	char *chipname;
++	struct rtnet_device *dev; /*** RTnet ***/
++	struct pcnet32_access *a = NULL;
++	u8 promaddr[6];
++
++	// *** RTnet ***
++	cards_found++;
++	if (cards[cards_found] == 0)
++		return -ENODEV;
++	// *** RTnet ***
++
++	/* reset the chip */
++	pcnet32_wio_reset(ioaddr);
++
++	/* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */
++	if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) {
++		a = &pcnet32_wio;
++	} else {
++		pcnet32_dwio_reset(ioaddr);
++		if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 &&
++		    pcnet32_dwio_check(ioaddr)) {
++			a = &pcnet32_dwio;
++		} else
++			return -ENODEV;
++	}
++
++	chip_version =
++		a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr, 89) << 16);
++	if (pcnet32_debug > 2)
++		printk(KERN_INFO "  PCnet chip version is %#x.\n",
++		       chip_version);
++	if ((chip_version & 0xfff) != 0x003)
++		return -ENODEV;
++
++	/* initialize variables */
++	fdx = mii = fset = dxsuflo = ltint = 0;
++	chip_version = (chip_version >> 12) & 0xffff;
++
++	switch (chip_version) {
++	case 0x2420:
++		chipname = "PCnet/PCI 79C970"; /* PCI */
++		break;
++	case 0x2430:
++		if (shared)
++			chipname =
++				"PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */
++		else
++			chipname = "PCnet/32 79C965"; /* 486/VL bus */
++		break;
++	case 0x2621:
++		chipname = "PCnet/PCI II 79C970A"; /* PCI */
++		fdx = 1;
++		break;
++	case 0x2623:
++		chipname = "PCnet/FAST 79C971"; /* PCI */
++		fdx = 1;
++		mii = 1;
++		fset = 1;
++		ltint = 1;
++		break;
++	case 0x2624:
++		chipname = "PCnet/FAST+ 79C972"; /* PCI */
++		fdx = 1;
++		mii = 1;
++		fset = 1;
++		break;
++	case 0x2625:
++		chipname = "PCnet/FAST III 79C973"; /* PCI */
++		fdx = 1;
++		mii = 1;
++		break;
++	case 0x2626:
++		chipname = "PCnet/Home 79C978"; /* PCI */
++		fdx = 1;
++		/*
++	 * This is based on specs published at www.amd.com.  This section
++	 * assumes that a card with a 79C978 wants to go into 1Mb HomePNA
++	 * mode.  The 79C978 can also go into standard ethernet, and there
++	 * probably should be some sort of module option to select the
++	 * mode by which the card should operate
++	 */
++		/* switch to home wiring mode */
++		media = a->read_bcr(ioaddr, 49);
++		if (pcnet32_debug > 2)
++			printk(KERN_DEBUG PFX "media reset to %#x.\n", media);
++		a->write_bcr(ioaddr, 49, media);
++		break;
++	case 0x2627:
++		chipname = "PCnet/FAST III 79C975"; /* PCI */
++		fdx = 1;
++		mii = 1;
++		break;
++	default:
++		printk(KERN_INFO PFX "PCnet version %#x, no PCnet32 chip.\n",
++		       chip_version);
++		return -ENODEV;
++	}
++
++	/*
++     *	On selected chips turn on the BCR18:NOUFLO bit. This stops transmit
++     *	starting until the packet is loaded. Strike one for reliability, lose
++     *	one for latency - although on PCI this isnt a big loss. Older chips
++     *	have FIFO's smaller than a packet, so you can't do this.
++     */
++
++	if (fset) {
++		a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800));
++		a->write_csr(ioaddr, 80,
++			     (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);
++		dxsuflo = 1;
++		ltint = 1;
++	}
++
++	/*** RTnet ***/
++	dev = rt_alloc_etherdev(0, RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (dev == NULL)
++		return -ENOMEM;
++	rtdev_alloc_name(dev, "rteth%d");
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++	dev->sysbind = &pdev->dev;
++	/*** RTnet ***/
++
++	printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr);
++
++	/* In most chips, after a chip reset, the ethernet address is read from the
++     * station address PROM at the base address and programmed into the
++     * "Physical Address Registers" CSR12-14.
++     * As a precautionary measure, we read the PROM values and complain if
++     * they disagree with the CSRs.  Either way, we use the CSR values, and
++     * double check that they are valid.
++     */
++	for (i = 0; i < 3; i++) {
++		unsigned int val;
++		val = a->read_csr(ioaddr, i + 12) & 0x0ffff;
++		/* There may be endianness issues here. */
++		dev->dev_addr[2 * i] = val & 0x0ff;
++		dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff;
++	}
++
++	/* read PROM address and compare with CSR address */
++	for (i = 0; i < 6; i++)
++		promaddr[i] = inb(ioaddr + i);
++
++	if (memcmp(promaddr, dev->dev_addr, 6) ||
++	    !is_valid_ether_addr(dev->dev_addr)) {
++#ifndef __powerpc__
++		if (is_valid_ether_addr(promaddr)) {
++#else
++		if (!is_valid_ether_addr(dev->dev_addr) &&
++		    is_valid_ether_addr(promaddr)) {
++#endif
++			printk(" warning: CSR address invalid,\n");
++			printk(KERN_INFO "    using instead PROM address of");
++			memcpy(dev->dev_addr, promaddr, 6);
++		}
++	}
++
++	/* if the ethernet address is not valid, force to 00:00:00:00:00:00 */
++	if (!is_valid_ether_addr(dev->dev_addr))
++		memset(dev->dev_addr, 0, sizeof(dev->dev_addr));
++
++	for (i = 0; i < 6; i++)
++		printk(" %2.2x", dev->dev_addr[i]);
++
++	if (((chip_version + 1) & 0xfffe) ==
++	    0x2624) { /* Version 0x2623 or 0x2624 */
++		i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */
++		printk("\n" KERN_INFO "    tx_start_pt(0x%04x):", i);
++		switch (i >> 10) {
++		case 0:
++			printk("  20 bytes,");
++			break;
++		case 1:
++			printk("  64 bytes,");
++			break;
++		case 2:
++			printk(" 128 bytes,");
++			break;
++		case 3:
++			printk("~220 bytes,");
++			break;
++		}
++		i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */
++		printk(" BCR18(%x):", i & 0xffff);
++		if (i & (1 << 5))
++			printk("BurstWrEn ");
++		if (i & (1 << 6))
++			printk("BurstRdEn ");
++		if (i & (1 << 7))
++			printk("DWordIO ");
++		if (i & (1 << 11))
++			printk("NoUFlow ");
++		i = a->read_bcr(ioaddr, 25);
++		printk("\n" KERN_INFO "    SRAMSIZE=0x%04x,", i << 8);
++		i = a->read_bcr(ioaddr, 26);
++		printk(" SRAM_BND=0x%04x,", i << 8);
++		i = a->read_bcr(ioaddr, 27);
++		if (i & (1 << 14))
++			printk("LowLatRx");
++	}
++
++	dev->base_addr = ioaddr;
++	if (request_region(ioaddr, PCNET32_TOTAL_SIZE, chipname) == NULL)
++		return -EBUSY;
++
++	/* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */
++	if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) ==
++	    NULL) {
++		release_region(ioaddr, PCNET32_TOTAL_SIZE);
++		return -ENOMEM;
++	}
++
++	memset(lp, 0, sizeof(*lp));
++	lp->dma_addr = lp_dma_addr;
++	lp->pci_dev = pdev;
++
++	rtdm_lock_init(&lp->lock);
++
++	dev->priv = lp;
++	lp->name = chipname;
++	lp->shared_irq = shared;
++	lp->mii_if.full_duplex = fdx;
++	lp->dxsuflo = dxsuflo;
++	lp->ltint = ltint;
++	lp->mii = mii;
++	if ((cards_found >= MAX_UNITS) ||
++	    (options[cards_found] > (int)sizeof(options_mapping)))
++		lp->options = PCNET32_PORT_ASEL;
++	else
++		lp->options = options_mapping[options[cards_found]];
++	/*** RTnet ***
++    lp->mii_if.dev = dev;
++    lp->mii_if.mdio_read = mdio_read;
++    lp->mii_if.mdio_write = mdio_write;
++ *** RTnet ***/
++
++	if (fdx && !(lp->options & PCNET32_PORT_ASEL) &&
++	    ((cards_found >= MAX_UNITS) || full_duplex[cards_found]))
++		lp->options |= PCNET32_PORT_FD;
++
++	if (!a) {
++		printk(KERN_ERR PFX "No access methods\n");
++		pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
++		release_region(ioaddr, PCNET32_TOTAL_SIZE);
++		return -ENODEV;
++	}
++	lp->a = *a;
++
++	/* detect special T1/E1 WAN card by checking for MAC address */
++	if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 &&
++	    dev->dev_addr[2] == 0x75)
++		lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI;
++
++	lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */
++	lp->init_block.tlen_rlen =
++		le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS);
++	for (i = 0; i < 6; i++)
++		lp->init_block.phys_addr[i] = dev->dev_addr[i];
++	lp->init_block.filter[0] = 0x00000000;
++	lp->init_block.filter[1] = 0x00000000;
++	lp->init_block.rx_ring = (u32)le32_to_cpu(
++		lp->dma_addr + offsetof(struct pcnet32_private, rx_ring));
++	lp->init_block.tx_ring = (u32)le32_to_cpu(
++		lp->dma_addr + offsetof(struct pcnet32_private, tx_ring));
++
++	/* switch pcnet32 to 32bit mode */
++	a->write_bcr(ioaddr, 20, 2);
++
++	a->write_csr(
++		ioaddr, 1,
++		(lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &
++			0xffff);
++	a->write_csr(
++		ioaddr, 2,
++		(lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >>
++			16);
++
++	if (irq_line) {
++		dev->irq = irq_line;
++	}
++
++	if (dev->irq >= 2)
++		printk(" assigned IRQ %d.\n", dev->irq);
++	else {
++		unsigned long irq_mask = probe_irq_on();
++
++		/*
++	 * To auto-IRQ we enable the initialization-done and DMA error
++	 * interrupts. For ISA boards we get a DMA error, but VLB and PCI
++	 * boards will work.
++	 */
++		/* Trigger an initialization just for the interrupt. */
++		a->write_csr(ioaddr, 0, 0x41);
++		mdelay(1);
++
++		dev->irq = probe_irq_off(irq_mask);
++		if (dev->irq)
++			printk(", probed IRQ %d.\n", dev->irq);
++		else {
++			printk(", failed to detect IRQ line.\n");
++			pci_free_consistent(lp->pci_dev, sizeof(*lp), lp,
++					    lp->dma_addr);
++			release_region(ioaddr, PCNET32_TOTAL_SIZE);
++			return -ENODEV;
++		}
++	}
++
++	/* The PCNET32-specific entries in the device structure. */
++	dev->open = &pcnet32_open;
++	dev->hard_start_xmit = &pcnet32_start_xmit;
++	dev->stop = &pcnet32_close;
++	dev->get_stats = &pcnet32_get_stats;
++	/*** RTnet ***
++    dev->set_multicast_list = &pcnet32_set_multicast_list;
++    dev->do_ioctl = &pcnet32_ioctl;
++    dev->tx_timeout = pcnet32_tx_timeout;
++    dev->watchdog_timeo = (5*HZ);
++ *** RTnet ***/
++
++	lp->next = pcnet32_dev;
++	pcnet32_dev = dev;
++
++	/* Fill in the generic fields of the device structure. */
++	/*** RTnet ***/
++	if ((i = rt_register_rtnetdev(dev))) {
++		pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
++		release_region(ioaddr, PCNET32_TOTAL_SIZE);
++		rtdev_free(dev);
++		return i;
++	}
++	/*** RTnet ***/
++
++	printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name);
++	return 0;
++}
++
++static int pcnet32_open(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct pcnet32_private *lp = dev->priv;
++	unsigned long ioaddr = dev->base_addr;
++	u16 val;
++	int i;
++
++	/*** RTnet ***/
++	if (dev->irq == 0)
++		return -EAGAIN;
++
++	rt_stack_connect(dev, &STACK_manager);
++
++	i = rtdm_irq_request(&lp->irq_handle, dev->irq, pcnet32_interrupt,
++			     RTDM_IRQTYPE_SHARED, "rt_pcnet32", dev);
++	if (i)
++		return i;
++	/*** RTnet ***/
++
++	/* Check for a valid station address */
++	if (!is_valid_ether_addr(dev->dev_addr))
++		return -EINVAL;
++
++	/* Reset the PCNET32 */
++	lp->a.reset(ioaddr);
++
++	/* switch pcnet32 to 32bit mode */
++	lp->a.write_bcr(ioaddr, 20, 2);
++
++	if (pcnet32_debug > 1)
++		printk(KERN_DEBUG
++		       "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n",
++		       dev->name, dev->irq,
++		       (u32)(lp->dma_addr +
++			     offsetof(struct pcnet32_private, tx_ring)),
++		       (u32)(lp->dma_addr +
++			     offsetof(struct pcnet32_private, rx_ring)),
++		       (u32)(lp->dma_addr +
++			     offsetof(struct pcnet32_private, init_block)));
++
++	/* set/reset autoselect bit */
++	val = lp->a.read_bcr(ioaddr, 2) & ~2;
++	if (lp->options & PCNET32_PORT_ASEL)
++		val |= 2;
++	lp->a.write_bcr(ioaddr, 2, val);
++
++	/* handle full duplex setting */
++	if (lp->mii_if.full_duplex) {
++		val = lp->a.read_bcr(ioaddr, 9) & ~3;
++		if (lp->options & PCNET32_PORT_FD) {
++			val |= 1;
++			if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI))
++				val |= 2;
++		} else if (lp->options & PCNET32_PORT_ASEL) {
++			/* workaround of xSeries250, turn on for 79C975 only */
++			i = ((lp->a.read_csr(ioaddr, 88) |
++			      (lp->a.read_csr(ioaddr, 89) << 16)) >>
++			     12) &
++			    0xffff;
++			if (i == 0x2627)
++				val |= 3;
++		}
++		lp->a.write_bcr(ioaddr, 9, val);
++	}
++
++	/* set/reset GPSI bit in test register */
++	val = lp->a.read_csr(ioaddr, 124) & ~0x10;
++	if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI)
++		val |= 0x10;
++	lp->a.write_csr(ioaddr, 124, val);
++
++	if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) {
++		val = lp->a.read_bcr(ioaddr, 32) &
++		      ~0x38; /* disable Auto Negotiation, set 10Mpbs, HD */
++		if (lp->options & PCNET32_PORT_FD)
++			val |= 0x10;
++		if (lp->options & PCNET32_PORT_100)
++			val |= 0x08;
++		lp->a.write_bcr(ioaddr, 32, val);
++	} else {
++		if (lp->options &
++		    PCNET32_PORT_ASEL) { /* enable auto negotiate, setup, disable fd */
++			val = lp->a.read_bcr(ioaddr, 32) & ~0x98;
++			val |= 0x20;
++			lp->a.write_bcr(ioaddr, 32, val);
++		}
++	}
++
++#ifdef DO_DXSUFLO
++	if (lp->dxsuflo) { /* Disable transmit stop on underflow */
++		val = lp->a.read_csr(ioaddr, 3);
++		val |= 0x40;
++		lp->a.write_csr(ioaddr, 3, val);
++	}
++#endif
++
++	if (lp->ltint) { /* Enable TxDone-intr inhibitor */
++		val = lp->a.read_csr(ioaddr, 5);
++		val |= (1 << 14);
++		lp->a.write_csr(ioaddr, 5, val);
++	}
++
++	lp->init_block.mode =
++		le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
++	lp->init_block.filter[0] = 0x00000000;
++	lp->init_block.filter[1] = 0x00000000;
++	if (pcnet32_init_ring(dev))
++		return -ENOMEM;
++
++	/* Re-initialize the PCNET32, and start it when done. */
++	lp->a.write_csr(
++		ioaddr, 1,
++		(lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &
++			0xffff);
++	lp->a.write_csr(
++		ioaddr, 2,
++		(lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >>
++			16);
++
++	lp->a.write_csr(ioaddr, 4, 0x0915);
++	lp->a.write_csr(ioaddr, 0, 0x0001);
++
++	rtnetif_start_queue(dev); /*** RTnet ***/
++
++	i = 0;
++	while (i++ < 100)
++		if (lp->a.read_csr(ioaddr, 0) & 0x0100)
++			break;
++	/*
++     * We used to clear the InitDone bit, 0x0100, here but Mark Stockton
++     * reports that doing so triggers a bug in the '974.
++     */
++	lp->a.write_csr(ioaddr, 0, 0x0042);
++
++	if (pcnet32_debug > 2)
++		printk(KERN_DEBUG
++		       "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n",
++		       dev->name, i,
++		       (u32)(lp->dma_addr +
++			     offsetof(struct pcnet32_private, init_block)),
++		       lp->a.read_csr(ioaddr, 0));
++
++	return 0; /* Always succeed */
++}
++
++/*
++ * The LANCE has been halted for one reason or another (busmaster memory
++ * arbitration error, Tx FIFO underflow, driver stopped it to reconfigure,
++ * etc.).  Modern LANCE variants always reload their ring-buffer
++ * configuration when restarted, so we must reinitialize our ring
++ * context before restarting.  As part of this reinitialization,
++ * find all packets still on the Tx ring and pretend that they had been
++ * sent (in effect, drop the packets on the floor) - the higher-level
++ * protocols will time out and retransmit.  It'd be better to shuffle
++ * these skbs to a temp list and then actually re-Tx them after
++ * restarting the chip, but I'm too lazy to do so right now.  dplatt@3do.com
++ */
++
++/*** RTnet ***
++static void
++pcnet32_purge_tx_ring(struct net_device *dev)
++{
++    struct pcnet32_private *lp = dev->priv;
++    int i;
++
++    for (i = 0; i < TX_RING_SIZE; i++) {
++	if (lp->tx_skbuff[i]) {
++	    pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
++	    dev_kfree_skb(lp->tx_skbuff[i]);
++	    lp->tx_skbuff[i] = NULL;
++	    lp->tx_dma_addr[i] = 0;
++	}
++    }
++}
++ *** RTnet ***/
++
++/* Initialize the PCNET32 Rx and Tx rings. */
++static int pcnet32_init_ring(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct pcnet32_private *lp = dev->priv;
++	int i;
++
++	lp->tx_full = 0;
++	lp->cur_rx = lp->cur_tx = 0;
++	lp->dirty_rx = lp->dirty_tx = 0;
++
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		struct rtskb *rx_skbuff = lp->rx_skbuff[i]; /*** RTnet ***/
++		if (rx_skbuff == NULL) {
++			if (!(rx_skbuff = lp->rx_skbuff[i] =
++				      rtnetdev_alloc_rtskb(
++					      dev,
++					      PKT_BUF_SZ))) { /*** RTnet ***/
++				/* there is not much, we can do at this point */
++				printk(KERN_ERR
++				       "%s: pcnet32_init_ring rtnetdev_alloc_rtskb failed.\n",
++				       dev->name);
++				return -1;
++			}
++			rtskb_reserve(rx_skbuff, 2); /*** RTnet ***/
++		}
++		lp->rx_dma_addr[i] =
++			pci_map_single(lp->pci_dev, rx_skbuff->tail,
++				       rx_skbuff->len, PCI_DMA_FROMDEVICE);
++		lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]);
++		lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ);
++		lp->rx_ring[i].status = le16_to_cpu(0x8000);
++	}
++	/* The Tx buffer address is filled in as needed, but we do need to clear
++       the upper ownership bit. */
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		lp->tx_ring[i].base = 0;
++		lp->tx_ring[i].status = 0;
++		lp->tx_dma_addr[i] = 0;
++	}
++
++	lp->init_block.tlen_rlen =
++		le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS);
++	for (i = 0; i < 6; i++)
++		lp->init_block.phys_addr[i] = dev->dev_addr[i];
++	lp->init_block.rx_ring = (u32)le32_to_cpu(
++		lp->dma_addr + offsetof(struct pcnet32_private, rx_ring));
++	lp->init_block.tx_ring = (u32)le32_to_cpu(
++		lp->dma_addr + offsetof(struct pcnet32_private, tx_ring));
++	return 0;
++}
++
++/*** RTnet ***/
++/*** RTnet ***/
++
++static int pcnet32_start_xmit(struct rtskb *skb,
++			      struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct pcnet32_private *lp = dev->priv;
++	unsigned long ioaddr = dev->base_addr;
++	u16 status;
++	int entry;
++	rtdm_lockctx_t context;
++
++	if (pcnet32_debug > 3) {
++		rtdm_printk(KERN_DEBUG
++			    "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n",
++			    dev->name, lp->a.read_csr(ioaddr, 0));
++	}
++
++	/*** RTnet ***/
++	rtdm_lock_get_irqsave(&lp->lock, context);
++	/*** RTnet ***/
++
++	/* Default status -- will not enable Successful-TxDone
++     * interrupt when that option is available to us.
++     */
++	status = 0x8300;
++	if ((lp->ltint) && ((lp->cur_tx - lp->dirty_tx == TX_RING_SIZE / 2) ||
++			    (lp->cur_tx - lp->dirty_tx >= TX_RING_SIZE - 2))) {
++		/* Enable Successful-TxDone interrupt if we have
++	 * 1/2 of, or nearly all of, our ring buffer Tx'd
++	 * but not yet cleaned up.  Thus, most of the time,
++	 * we will not enable Successful-TxDone interrupts.
++	 */
++		status = 0x9300;
++	}
++
++	/* Fill in a Tx ring entry */
++
++	/* Mask to ring buffer boundary. */
++	entry = lp->cur_tx & TX_RING_MOD_MASK;
++
++	/* Caution: the write order is important here, set the base address
++       with the "ownership" bits last. */
++
++	lp->tx_ring[entry].length = le16_to_cpu(-skb->len);
++
++	lp->tx_ring[entry].misc = 0x00000000;
++
++	lp->tx_skbuff[entry] = skb;
++	lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data,
++						skb->len, PCI_DMA_TODEVICE);
++	lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]);
++
++	/*** RTnet ***/
++	/* get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++	/*** RTnet ***/
++
++	wmb();
++	lp->tx_ring[entry].status = le16_to_cpu(status);
++
++	lp->cur_tx++;
++	lp->stats.tx_bytes += skb->len;
++
++	/* Trigger an immediate send poll. */
++	lp->a.write_csr(ioaddr, 0, 0x0048);
++
++	//dev->trans_start = jiffies; /*** RTnet ***/
++
++	if (lp->tx_ring[(entry + 1) & TX_RING_MOD_MASK].base == 0)
++		rtnetif_start_queue(dev); /*** RTnet ***/
++	else {
++		lp->tx_full = 1;
++		rtnetif_stop_queue(dev); /*** RTnet ***/
++	}
++	/*** RTnet ***/
++	rtdm_lock_put_irqrestore(&lp->lock, context);
++	/*** RTnet ***/
++	return 0;
++}
++
++/* The PCNET32 interrupt handler. */
++static int pcnet32_interrupt(rtdm_irq_t *irq_handle) /*** RTnet ***/
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/
++	struct rtnet_device *dev = rtdm_irq_get_arg(
++		irq_handle, struct rtnet_device); /*** RTnet ***/
++	struct pcnet32_private *lp;
++	unsigned long ioaddr;
++	u16 csr0, rap;
++	int boguscnt = max_interrupt_work;
++	int must_restart;
++	unsigned int old_packet_cnt; /*** RTnet ***/
++	int ret = RTDM_IRQ_NONE;
++
++	/*** RTnet ***
++    if (!dev) {
++	rtdm_printk (KERN_DEBUG "%s(): irq %d for unknown device\n",
++		__FUNCTION__, irq);
++	return;
++    }
++ *** RTnet ***/
++
++	ioaddr = dev->base_addr;
++	lp = dev->priv;
++	old_packet_cnt = lp->stats.rx_packets; /*** RTnet ***/
++
++	rtdm_lock_get(&lp->lock); /*** RTnet ***/
++
++	rap = lp->a.read_rap(ioaddr);
++	while ((csr0 = lp->a.read_csr(ioaddr, 0)) & 0x8600 && --boguscnt >= 0) {
++		/* Acknowledge all of the current interrupt sources ASAP. */
++		lp->a.write_csr(ioaddr, 0, csr0 & ~0x004f);
++
++		ret = RTDM_IRQ_HANDLED;
++
++		must_restart = 0;
++
++		if (pcnet32_debug > 5)
++			rtdm_printk(
++				KERN_DEBUG
++				"%s: interrupt  csr0=%#2.2x new csr=%#2.2x.\n",
++				dev->name, csr0, lp->a.read_csr(ioaddr, 0));
++
++		if (csr0 & 0x0400) /* Rx interrupt */
++			pcnet32_rx(dev, &time_stamp);
++
++		if (csr0 & 0x0200) { /* Tx-done interrupt */
++			unsigned int dirty_tx = lp->dirty_tx;
++
++			while (dirty_tx < lp->cur_tx) {
++				int entry = dirty_tx & TX_RING_MOD_MASK;
++				int status = (short)le16_to_cpu(
++					lp->tx_ring[entry].status);
++
++				if (status < 0)
++					break; /* It still hasn't been Txed */
++
++				lp->tx_ring[entry].base = 0;
++
++				if (status & 0x4000) {
++					/* There was an major error, log it. */
++					int err_status = le32_to_cpu(
++						lp->tx_ring[entry].misc);
++					lp->stats.tx_errors++;
++					if (err_status & 0x04000000)
++						lp->stats.tx_aborted_errors++;
++					if (err_status & 0x08000000)
++						lp->stats.tx_carrier_errors++;
++					if (err_status & 0x10000000)
++						lp->stats.tx_window_errors++;
++#ifndef DO_DXSUFLO
++					if (err_status & 0x40000000) {
++						lp->stats.tx_fifo_errors++;
++						/* Ackk!  On FIFO errors the Tx unit is turned off! */
++						/* Remove this verbosity later! */
++						rtdm_printk(
++							KERN_ERR
++							"%s: Tx FIFO error! CSR0=%4.4x\n",
++							dev->name, csr0);
++						must_restart = 1;
++					}
++#else
++					if (err_status & 0x40000000) {
++						lp->stats.tx_fifo_errors++;
++						if (!lp->dxsuflo) { /* If controller doesn't recover ... */
++							/* Ackk!  On FIFO errors the Tx unit is turned off! */
++							/* Remove this verbosity later! */
++							rtdm_printk(
++								KERN_ERR
++								"%s: Tx FIFO error! CSR0=%4.4x\n",
++								dev->name,
++								csr0);
++							must_restart = 1;
++						}
++					}
++#endif
++				} else {
++					if (status & 0x1800)
++						lp->stats.collisions++;
++					lp->stats.tx_packets++;
++				}
++
++				/* We must free the original skb */
++				if (lp->tx_skbuff[entry]) {
++					pci_unmap_single(
++						lp->pci_dev,
++						lp->tx_dma_addr[entry],
++						lp->tx_skbuff[entry]->len,
++						PCI_DMA_TODEVICE);
++					dev_kfree_rtskb(
++						lp->tx_skbuff[entry]); /*** RTnet ***/
++					lp->tx_skbuff[entry] = 0;
++					lp->tx_dma_addr[entry] = 0;
++				}
++				dirty_tx++;
++			}
++
++			if (lp->cur_tx - dirty_tx >= TX_RING_SIZE) {
++				rtdm_printk(
++					KERN_ERR
++					"%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
++					dev->name, dirty_tx, lp->cur_tx,
++					lp->tx_full);
++				dirty_tx += TX_RING_SIZE;
++			}
++
++			if (lp->tx_full &&
++			    rtnetif_queue_stopped(dev) && /*** RTnet ***/
++			    dirty_tx > lp->cur_tx - TX_RING_SIZE + 2) {
++				/* The ring is no longer full, clear tbusy. */
++				lp->tx_full = 0;
++				rtnetif_wake_queue(dev); /*** RTnet ***/
++			}
++			lp->dirty_tx = dirty_tx;
++		}
++
++		/* Log misc errors. */
++		if (csr0 & 0x4000)
++			lp->stats.tx_errors++; /* Tx babble. */
++		if (csr0 & 0x1000) {
++			/*
++	     * this happens when our receive ring is full. This shouldn't
++	     * be a problem as we will see normal rx interrupts for the frames
++	     * in the receive ring. But there are some PCI chipsets (I can reproduce
++	     * this on SP3G with Intel saturn chipset) which have sometimes problems
++	     * and will fill up the receive ring with error descriptors. In this
++	     * situation we don't get a rx interrupt, but a missed frame interrupt sooner
++	     * or later. So we try to clean up our receive ring here.
++	     */
++			pcnet32_rx(dev, &time_stamp);
++			lp->stats.rx_errors++; /* Missed a Rx frame. */
++		}
++		if (csr0 & 0x0800) {
++			rtdm_printk(
++				KERN_ERR
++				"%s: Bus master arbitration failure, status %4.4x.\n",
++				dev->name, csr0);
++			/* unlike for the lance, there is no restart needed */
++		}
++
++		/*** RTnet ***/
++		/*** RTnet ***/
++	}
++
++	/* Clear any other interrupt, and set interrupt enable. */
++	lp->a.write_csr(ioaddr, 0, 0x7940);
++	lp->a.write_rap(ioaddr, rap);
++
++	if (pcnet32_debug > 4)
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n",
++			    dev->name, lp->a.read_csr(ioaddr, 0));
++
++	/*** RTnet ***/
++	rtdm_lock_put(&lp->lock);
++
++	if (old_packet_cnt != lp->stats.rx_packets)
++		rt_mark_stack_mgr(dev);
++
++	return ret;
++	/*** RTnet ***/
++}
++
++static int pcnet32_rx(struct rtnet_device *dev,
++		      nanosecs_abs_t *time_stamp) /*** RTnet ***/
++{
++	struct pcnet32_private *lp = dev->priv;
++	int entry = lp->cur_rx & RX_RING_MOD_MASK;
++
++	/* If we own the next entry, it's a new packet. Send it up. */
++	while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) {
++		int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8;
++
++		if (status != 0x03) { /* There was an error. */
++			/*
++	     * There is a tricky error noted by John Murphy,
++	     * <murf@perftech.com> to Russ Nelson: Even with full-sized
++	     * buffers it's possible for a jabber packet to use two
++	     * buffers, with only the last correctly noting the error.
++	     */
++			if (status &
++			    0x01) /* Only count a general error at the */
++				lp->stats.rx_errors++; /* end of a packet.*/
++			if (status & 0x20)
++				lp->stats.rx_frame_errors++;
++			if (status & 0x10)
++				lp->stats.rx_over_errors++;
++			if (status & 0x08)
++				lp->stats.rx_crc_errors++;
++			if (status & 0x04)
++				lp->stats.rx_fifo_errors++;
++			lp->rx_ring[entry].status &= le16_to_cpu(0x03ff);
++		} else {
++			/* Malloc up new buffer, compatible with net-2e. */
++			short pkt_len =
++				(le32_to_cpu(lp->rx_ring[entry].msg_length) &
++				 0xfff) -
++				4;
++			struct rtskb *skb; /*** RTnet ***/
++
++			if (pkt_len < 60) {
++				rtdm_printk(KERN_ERR "%s: Runt packet!\n",
++					    dev->name);
++				lp->stats.rx_errors++;
++			} else {
++				/*** RTnet ***/
++				/*int rx_in_place = 0;*/
++
++				/*if (pkt_len > rx_copybreak)*/ {
++					struct rtskb *newskb;
++
++					if ((newskb = rtnetdev_alloc_rtskb(
++						     dev, PKT_BUF_SZ))) {
++						rtskb_reserve(newskb, 2);
++						skb = lp->rx_skbuff[entry];
++						pci_unmap_single(
++							lp->pci_dev,
++							lp->rx_dma_addr[entry],
++							skb->len,
++							PCI_DMA_FROMDEVICE);
++						rtskb_put(skb, pkt_len);
++						lp->rx_skbuff[entry] = newskb;
++						lp->rx_dma_addr
++							[entry] = pci_map_single(
++							lp->pci_dev,
++							newskb->tail,
++							newskb->len,
++							PCI_DMA_FROMDEVICE);
++						lp->rx_ring[entry]
++							.base = le32_to_cpu(
++							lp->rx_dma_addr[entry]);
++						/*rx_in_place = 1;*/
++					} else
++						skb = NULL;
++				} /*else {
++		    skb = dev_alloc_skb(pkt_len+2);
++		}*/
++				/*** RTnet ***/
++
++				if (skb == NULL) {
++					int i;
++					rtdm_printk(
++						KERN_ERR
++						"%s: Memory squeeze, deferring packet.\n",
++						dev->name);
++					for (i = 0; i < RX_RING_SIZE; i++)
++						if ((short)le16_to_cpu(
++							    lp->rx_ring[(entry +
++									 i) &
++									RX_RING_MOD_MASK]
++								    .status) <
++						    0)
++							break;
++
++					if (i > RX_RING_SIZE - 2) {
++						lp->stats.rx_dropped++;
++						lp->rx_ring[entry].status |=
++							le16_to_cpu(0x8000);
++						lp->cur_rx++;
++					}
++					break;
++				}
++				/*** RTnet ***/
++				lp->stats.rx_bytes += skb->len;
++				skb->protocol = rt_eth_type_trans(skb, dev);
++				skb->time_stamp = *time_stamp;
++				rtnetif_rx(skb);
++				///dev->last_rx = jiffies;
++				/*** RTnet ***/
++				lp->stats.rx_packets++;
++			}
++		}
++		/*
++	 * The docs say that the buffer length isn't touched, but Andrew Boyd
++	 * of QNX reports that some revs of the 79C965 clear it.
++	 */
++		lp->rx_ring[entry].buf_length = le16_to_cpu(-PKT_BUF_SZ);
++		lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
++		entry = (++lp->cur_rx) & RX_RING_MOD_MASK;
++	}
++
++	return 0;
++}
++
++static int pcnet32_close(struct rtnet_device *dev) /*** RTnet ***/
++{
++	unsigned long ioaddr = dev->base_addr;
++	struct pcnet32_private *lp = dev->priv;
++	int i;
++
++	rtnetif_stop_queue(dev); /*** RTnet ***/
++
++	lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
++
++	if (pcnet32_debug > 1)
++		printk(KERN_DEBUG
++		       "%s: Shutting down ethercard, status was %2.2x.\n",
++		       dev->name, lp->a.read_csr(ioaddr, 0));
++
++	/* We stop the PCNET32 here -- it occasionally polls memory if we don't. */
++	lp->a.write_csr(ioaddr, 0, 0x0004);
++
++	/*
++     * Switch back to 16bit mode to avoid problems with dumb
++     * DOS packet driver after a warm reboot
++     */
++	lp->a.write_bcr(ioaddr, 20, 4);
++
++	/*** RTnet ***/
++	if ((i = rtdm_irq_free(&lp->irq_handle)) < 0)
++		return i;
++
++	rt_stack_disconnect(dev);
++	/*** RTnet ***/
++
++	/* free all allocated skbuffs */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		lp->rx_ring[i].status = 0;
++		if (lp->rx_skbuff[i]) {
++			pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i],
++					 lp->rx_skbuff[i]->len,
++					 PCI_DMA_FROMDEVICE);
++			dev_kfree_rtskb(lp->rx_skbuff[i]); /*** RTnet ***/
++		}
++		lp->rx_skbuff[i] = NULL;
++		lp->rx_dma_addr[i] = 0;
++	}
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		if (lp->tx_skbuff[i]) {
++			pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i],
++					 lp->tx_skbuff[i]->len,
++					 PCI_DMA_TODEVICE);
++			dev_kfree_rtskb(lp->tx_skbuff[i]); /*** RTnet ***/
++		}
++		lp->tx_skbuff[i] = NULL;
++		lp->tx_dma_addr[i] = 0;
++	}
++
++	return 0;
++}
++
++/*** RTnet ***/
++static struct net_device_stats *pcnet32_get_stats(struct rtnet_device *rtdev)
++{
++	struct pcnet32_private *lp = rtdev->priv;
++	unsigned long ioaddr = rtdev->base_addr;
++	rtdm_lockctx_t context;
++	u16 saved_addr;
++
++	rtdm_lock_get_irqsave(&lp->lock, context);
++	saved_addr = lp->a.read_rap(ioaddr);
++	lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
++	lp->a.write_rap(ioaddr, saved_addr);
++	rtdm_lock_put_irqrestore(&lp->lock, context);
++
++	return &lp->stats;
++}
++
++/*** RTnet ***/
++
++static struct pci_driver pcnet32_driver = {
++	name: DRV_NAME,
++	probe: pcnet32_probe_pci,
++	id_table: pcnet32_pci_tbl,
++};
++
++/* An additional parameter that may be passed in... */
++static int local_debug = -1;
++static int tx_start_pt = -1;
++
++module_param_named(debug, local_debug, int, 0444);
++MODULE_PARM_DESC(debug, DRV_NAME " debug level (0-6)");
++module_param(max_interrupt_work, int, 0444);
++MODULE_PARM_DESC(max_interrupt_work,
++		 DRV_NAME " maximum events handled per interrupt");
++/*** RTnet ***
++MODULE_PARM(rx_copybreak, "i");
++MODULE_PARM_DESC(rx_copybreak, DRV_NAME " copy breakpoint for copy-only-tiny-frames");
++ *** RTnet ***/
++module_param(tx_start_pt, int, 0444);
++MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)");
++module_param(pcnet32vlb, int, 0444);
++MODULE_PARM_DESC(pcnet32vlb, DRV_NAME " Vesa local bus (VLB) support (0/1)");
++module_param_array(options, int, NULL, 0444);
++MODULE_PARM_DESC(options, DRV_NAME " initial option setting(s) (0-15)");
++module_param_array(full_duplex, int, NULL, 0444);
++MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)");
++
++MODULE_AUTHOR("Jan Kiszka");
++MODULE_DESCRIPTION("RTnet Driver for PCnet32 and PCnetPCI based ethercards");
++MODULE_LICENSE("GPL");
++
++static int __init pcnet32_init_module(void)
++{
++	printk(KERN_INFO "%s", version);
++
++	if (local_debug > 0)
++		pcnet32_debug = local_debug;
++
++	if ((tx_start_pt >= 0) && (tx_start_pt <= 3))
++		tx_start = tx_start_pt;
++
++	/* find the PCI devices */
++	if (!pci_register_driver(&pcnet32_driver))
++		pcnet32_have_pci = 1;
++
++	/* should we find any remaining VLbus devices ? */
++	if (pcnet32vlb)
++		pcnet32_probe_vlbus();
++
++	if (cards_found)
++		printk(KERN_INFO PFX "%d cards_found.\n", cards_found);
++
++	return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV;
++}
++
++static void __exit pcnet32_cleanup_module(void)
++{
++	struct rtnet_device *next_dev; /*** RTnet ***/
++
++	/* No need to check MOD_IN_USE, as sys_delete_module() checks. */
++	while (pcnet32_dev) {
++		struct pcnet32_private *lp = pcnet32_dev->priv;
++		next_dev = lp->next;
++		/*** RTnet ***/
++		rt_unregister_rtnetdev(pcnet32_dev);
++		rt_rtdev_disconnect(pcnet32_dev);
++		/*** RTnet ***/
++		release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE);
++		pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
++		/*** RTnet ***/
++		rtdev_free(pcnet32_dev);
++		/*** RTnet ***/
++		pcnet32_dev = next_dev;
++	}
++
++	if (pcnet32_have_pci)
++		pci_unregister_driver(&pcnet32_driver);
++}
++
++module_init(pcnet32_init_module);
++module_exit(pcnet32_cleanup_module);
+--- linux/drivers/xenomai/net/drivers/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/Makefile	2021-04-07 16:01:27.284634085 +0800
+@@ -0,0 +1,69 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_EXP_DRIVERS) += experimental/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000) += e1000/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000E) += e1000e/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MPC52XX_FEC) += mpc52xx_fec/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_TULIP) += tulip/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_IGB) += igb/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_8139) += rt_8139too.o
++
++rt_8139too-y := 8139too.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_AT91_ETHER) += rt_at91_ether.o
++
++rt_at91_ether-y := at91_ether.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_EEPRO100) += rt_eepro100.o
++
++rt_eepro100-y := eepro100.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK) += rt_loopback.o
++
++rt_loopback-y := loopback.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FCC_ENET) += rt_mpc8260_fcc_enet.o
++
++rt_mpc8260_fcc_enet-y := mpc8260_fcc_enet.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_SCC_ENET) += rt_mpc8xx_enet.o
++
++rt_mpc8xx_enet-y := mpc8xx_enet.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FEC_ENET) += rt_mpc8xx_fec.o
++
++rt_mpc8xx_fec-y := mpc8xx_fec.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_FEC) += rt_fec.o
++
++rt_fec-y := fec.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_NATSEMI) += rt_natsemi.o
++
++rt_natsemi-y := natsemi.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_PCNET32) += rt_pcnet32.o
++
++rt_pcnet32-y := pcnet32.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_SMC91111) += rt_smc91111.o
++
++rt_smc91111-y := smc91111.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_MACB) += rt_macb.o
++
++rt_macb-y := macb.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_VIA_RHINE) += rt_via-rhine.o
++
++rt_via-rhine-y := via-rhine.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_R8169) += rt_r8169.o
++
++rt_r8169-y := r8169.o
+--- linux/drivers/xenomai/net/drivers/rt_eth1394.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/rt_eth1394.h	2021-04-07 16:01:27.279634092 +0800
+@@ -0,0 +1,240 @@
++/*
++ * eth1394.h -- Driver for Ethernet emulation over FireWire, (adapted from Linux1394)
++ *		working under RTnet.
++ *
++ * Copyright (C) 2005	Zhang Yuchen <yuchen623@gmail.com>
++ *
++ * Mainly based on work by Emanuel Pirker and Andreas E. Bombe
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __ETH1394_H
++#define __ETH1394_H
++
++#include <ieee1394.h>
++#include <rtskb.h>
++#include <linux/netdevice.h>
++#include <rtnet_port.h>
++
++
++/* Register for incoming packets. This is 4096 bytes, which supports up to
++ * S3200 (per Table 16-3 of IEEE 1394b-2002). */
++#define ETHER1394_REGION_ADDR_LEN	4096
++#define ETHER1394_REGION_ADDR		0xfffff0200000ULL
++#define ETHER1394_REGION_ADDR_END	(ETHER1394_REGION_ADDR + ETHER1394_REGION_ADDR_LEN)
++
++/* GASP identifier numbers for IPv4 over IEEE 1394 */
++#define ETHER1394_GASP_SPECIFIER_ID	0x00005E
++#define ETHER1394_GASP_SPECIFIER_ID_HI	((ETHER1394_GASP_SPECIFIER_ID >> 8) & 0xffff)
++#define ETHER1394_GASP_SPECIFIER_ID_LO	(ETHER1394_GASP_SPECIFIER_ID & 0xff)
++#define ETHER1394_GASP_VERSION		1
++
++#define ETHER1394_GASP_OVERHEAD (2 * sizeof(quadlet_t))  /* GASP header overhead */
++
++#define ETHER1394_GASP_BUFFERS 16
++
++#define ETH1394_BC_CHANNEL 31
++
++#define ALL_NODES	0x003f //stolen from ieee1394_types.h
++/* Node set == 64 */
++#define NODE_SET			(ALL_NODES + 1)
++
++enum eth1394_bc_states { ETHER1394_BC_CLOSED, ETHER1394_BC_OPENED,
++			 ETHER1394_BC_CHECK, ETHER1394_BC_ERROR,
++			 ETHER1394_BC_RUNNING,
++			 ETHER1394_BC_STOPPED  };
++
++#define TX_RING_SIZE	32
++#define RX_RING_SIZE	8 /* RX_RING_SIZE*2 rtskbs will be preallocated */
++
++struct pdg_list {
++	struct list_head list;		/* partial datagram list per node */
++	unsigned int sz;		/* partial datagram list size per node	*/
++	rtdm_lock_t lock;		/* partial datagram lock		*/
++};
++
++/* IP1394 headers */
++#include <asm/byteorder.h>
++
++/* Unfragmented */
++#if defined __BIG_ENDIAN_BITFIELD
++struct eth1394_uf_hdr {
++	u16 lf:2;
++	u16 res:14;
++	u16 ether_type;		/* Ethernet packet type */
++} __attribute__((packed));
++#elif defined __LITTLE_ENDIAN_BITFIELD
++struct eth1394_uf_hdr {
++	u16 res:14;
++	u16 lf:2;
++	u16 ether_type;
++} __attribute__((packed));
++#else
++#error Unknown bit field type
++#endif
++
++/* End of IP1394 headers */
++
++/* Fragment types */
++#define ETH1394_HDR_LF_UF	0	/* unfragmented		*/
++#define ETH1394_HDR_LF_FF	1	/* first fragment	*/
++#define ETH1394_HDR_LF_LF	2	/* last fragment	*/
++#define ETH1394_HDR_LF_IF	3	/* interior fragment	*/
++
++#define IP1394_HW_ADDR_LEN	2	/* In RFC, the value is 16; here use the value for modified spec		*/
++
++/* Our arp packet (ARPHRD_IEEE1394) */
++struct eth1394_arp {
++	u16 hw_type;		/* 0x0018	*/
++	u16 proto_type;		/* 0x0080	*/
++	u8 hw_addr_len;		/* 2		*/
++	u8 ip_addr_len;		/* 4		*/
++	u16 opcode;		/* ARP Opcode: 1 for req, 2 for resp	*/
++	/* Above is exactly the same format as struct arphdr */
++
++	unsigned char s_uniq_id[ETH_ALEN];	/* Sender's node id padded with zeros	*/
++	u8 max_rec;		/* Sender's max packet size		*/
++	u8 sspd;		/* Sender's max speed			*/
++	u32 sip;		/* Sender's IP Address			*/
++	u32 tip;		/* IP Address of requested hw addr	*/
++};
++
++
++/* Network timeout */
++#define ETHER1394_TIMEOUT	100000
++
++/* First fragment */
++#if defined __BIG_ENDIAN_BITFIELD
++struct eth1394_ff_hdr {
++	u16 lf:2;
++	u16 res1:2;
++	u16 dg_size:12;		/* Datagram size */
++	u16 ether_type;		/* Ethernet packet type */
++	u16 dgl;		/* Datagram label */
++	u16 res2;
++} __attribute__((packed));
++#elif defined __LITTLE_ENDIAN_BITFIELD
++struct eth1394_ff_hdr {
++	u16 dg_size:12;
++	u16 res1:2;
++	u16 lf:2;
++	u16 ether_type;
++	u16 dgl;
++	u16 res2;
++} __attribute__((packed));
++#else
++#error Unknown bit field type
++#endif
++
++/* XXX: Subsequent fragments, including last */
++#if defined __BIG_ENDIAN_BITFIELD
++struct eth1394_sf_hdr {
++	u16 lf:2;
++	u16 res1:2;
++	u16 dg_size:12;		/* Datagram size */
++	u16 res2:4;
++	u16 fg_off:12;		/* Fragment offset */
++	u16 dgl;		/* Datagram label */
++	u16 res3;
++} __attribute__((packed));
++#elif defined __LITTLE_ENDIAN_BITFIELD
++struct eth1394_sf_hdr {
++	u16 dg_size:12;
++	u16 res1:2;
++	u16 lf:2;
++	u16 fg_off:12;
++	u16 res2:4;
++	u16 dgl;
++	u16 res3;
++} __attribute__((packed));
++#else
++#error Unknown bit field type
++#endif
++
++#if defined __BIG_ENDIAN_BITFIELD
++struct eth1394_common_hdr {
++	u16 lf:2;
++	u16 pad1:14;
++} __attribute__((packed));
++#elif defined __LITTLE_ENDIAN_BITFIELD
++struct eth1394_common_hdr {
++	u16 pad1:14;
++	u16 lf:2;
++} __attribute__((packed));
++#else
++#error Unknown bit field type
++#endif
++
++struct eth1394_hdr_words {
++	u16 word1;
++	u16 word2;
++	u16 word3;
++	u16 word4;
++};
++
++union eth1394_hdr {
++	struct eth1394_common_hdr common;
++	struct eth1394_uf_hdr uf;
++	struct eth1394_ff_hdr ff;
++	struct eth1394_sf_hdr sf;
++	struct eth1394_hdr_words words;
++};
++
++typedef enum {ETH1394_GASP, ETH1394_WRREQ} eth1394_tx_type;
++
++/* This is our task struct. It's used for the packet complete callback.  */
++struct packet_task {
++	struct list_head lh;
++	struct rtskb *skb;
++	int outstanding_pkts;
++	eth1394_tx_type tx_type;
++	int max_payload;
++	struct hpsb_packet *packet;
++	struct eth1394_priv *priv;
++	union eth1394_hdr hdr;
++	u64 addr;
++	u16 dest_node;
++	unsigned int priority; //the priority mapped to priority on 1394 transaction
++};
++
++/* Private structure for our ethernet driver */
++struct eth1394_priv {
++	struct net_device_stats stats;	/* Device stats			 */
++	struct hpsb_host *host;		/* The card for this dev	 */
++	u16 maxpayload[NODE_SET];	/* Max payload per node		 */
++	unsigned char sspd[NODE_SET];	/* Max speed per node		 */
++	rtdm_lock_t lock;		/* Private lock			 */
++	int broadcast_channel;		/* Async stream Broadcast Channel */
++	enum eth1394_bc_states bc_state; /* broadcast channel state	 */
++	struct hpsb_iso	*iso;
++	struct pdg_list pdg[ALL_NODES]; /* partial RX datagram lists     */
++	int dgl[NODE_SET];              /* Outgoing datagram label per node */
++
++	/* The addresses of a Tx/Rx-in-place packets/buffers. */
++	struct rtskb *tx_skbuff[TX_RING_SIZE];
++	struct rtskb *rx_skbuff[RX_RING_SIZE];
++	struct packet_task ptask_list[20]; //the list of pre-allocated ptask structure
++};
++
++
++
++struct host_info {
++	struct hpsb_host *host;
++	struct rtnet_device *dev;
++};
++
++
++#endif /* __ETH1394_H */
+--- linux/drivers/xenomai/net/drivers/via-rhine.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/via-rhine.c	2021-04-07 16:01:27.274634099 +0800
+@@ -0,0 +1,1818 @@
++/* via-rhine.c: A Linux Ethernet device driver for VIA Rhine family chips. */
++/*
++	Written 1998-2001 by Donald Becker.
++
++	This software may be used and distributed according to the terms of
++	the GNU General Public License (GPL), incorporated herein by reference.
++	Drivers based on or derived from this code fall under the GPL and must
++	retain the authorship, copyright and license notice.  This file is not
++	a complete program and may only be used when the entire operating
++	system is licensed under the GPL.
++
++	This driver is designed for the VIA VT86C100A Rhine-I.
++	It also works with the 6102 Rhine-II, and 6105/6105M Rhine-III.
++
++	The author may be reached as becker@scyld.com, or C/O
++	Scyld Computing Corporation
++	410 Severn Ave., Suite 210
++	Annapolis MD 21403
++
++
++	This driver contains some changes from the original Donald Becker
++	version. He may or may not be interested in bug reports on this
++	code. You can find his versions at:
++	http://www.scyld.com/network/via-rhine.html
++
++
++	Linux kernel version history:
++
++	LK1.1.0:
++	- Jeff Garzik: softnet 'n stuff
++
++	LK1.1.1:
++	- Justin Guyett: softnet and locking fixes
++	- Jeff Garzik: use PCI interface
++
++	LK1.1.2:
++	- Urban Widmark: minor cleanups, merges from Becker 1.03a/1.04 versions
++
++	LK1.1.3:
++	- Urban Widmark: use PCI DMA interface (with thanks to the eepro100.c
++			 code) update "Theory of Operation" with
++			 softnet/locking changes
++	- Dave Miller: PCI DMA and endian fixups
++	- Jeff Garzik: MOD_xxx race fixes, updated PCI resource allocation
++
++	LK1.1.4:
++	- Urban Widmark: fix gcc 2.95.2 problem and
++			 remove writel's to fixed address 0x7c
++
++	LK1.1.5:
++	- Urban Widmark: mdio locking, bounce buffer changes
++			 merges from Beckers 1.05 version
++			 added netif_running_on/off support
++
++	LK1.1.6:
++	- Urban Widmark: merges from Beckers 1.08b version (VT6102 + mdio)
++			 set netif_running_on/off on startup, del_timer_sync
++
++	LK1.1.7:
++	- Manfred Spraul: added reset into tx_timeout
++
++	LK1.1.9:
++	- Urban Widmark: merges from Beckers 1.10 version
++			 (media selection + eeprom reload)
++	- David Vrabel:  merges from D-Link "1.11" version
++			 (disable WOL and PME on startup)
++
++	LK1.1.10:
++	- Manfred Spraul: use "singlecopy" for unaligned buffers
++			  don't allocate bounce buffers for !ReqTxAlign cards
++
++	LK1.1.11:
++	- David Woodhouse: Set dev->base_addr before the first time we call
++					   wait_for_reset(). It's a lot happier that way.
++					   Free np->tx_bufs only if we actually allocated it.
++
++	LK1.1.12:
++	- Martin Eriksson: Allow Memory-Mapped IO to be enabled.
++
++	LK1.1.13 (jgarzik):
++	- Add ethtool support
++	- Replace some MII-related magic numbers with constants
++
++	LK1.1.14 (Ivan G.):
++	- fixes comments for Rhine-III
++	- removes W_MAX_TIMEOUT (unused)
++	- adds HasDavicomPhy for Rhine-I (basis: linuxfet driver; my card
++	  is R-I and has Davicom chip, flag is referenced in kernel driver)
++	- sends chip_id as a parameter to wait_for_reset since np is not
++	  initialized on first call
++	- changes mmio "else if (chip_id==VT6102)" to "else" so it will work
++	  for Rhine-III's (documentation says same bit is correct)
++	- transmit frame queue message is off by one - fixed
++	- adds IntrNormalSummary to "Something Wicked" exclusion list
++	  so normal interrupts will not trigger the message (src: Donald Becker)
++	(Roger Luethi)
++	- show confused chip where to continue after Tx error
++	- location of collision counter is chip specific
++	- allow selecting backoff algorithm (module parameter)
++
++	LK1.1.15 (jgarzik):
++	- Use new MII lib helper generic_mii_ioctl
++
++	LK1.1.16 (Roger Luethi)
++	- Etherleak fix
++	- Handle Tx buffer underrun
++	- Fix bugs in full duplex handling
++	- New reset code uses "force reset" cmd on Rhine-II
++	- Various clean ups
++
++	LK1.1.17 (Roger Luethi)
++	- Fix race in via_rhine_start_tx()
++	- On errors, wait for Tx engine to turn off before scavenging
++	- Handle Tx descriptor write-back race on Rhine-II
++	- Force flushing for PCI posted writes
++	- More reset code changes
++
++	Ported to RTnet: October 2003, Jan Kiszka <Jan.Kiszka@web.de>
++*/
++
++#define DRV_NAME	"via-rhine-rt"
++#define DRV_VERSION	"1.1.17-RTnet-0.1"
++#define DRV_RELDATE	"2003-10-05"
++
++
++/* A few user-configurable values.
++   These may be modified when a driver module is loaded. */
++
++static int local_debug = 1;			/* 1 normal messages, 0 quiet .. 7 verbose. */
++static int max_interrupt_work = 20;
++
++/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
++   Setting to > 1518 effectively disables this feature. */
++/*** RTnet ***
++static int rx_copybreak;
++ *** RTnet ***/
++
++/* Select a backoff algorithm (Ethernet capture effect) */
++static int backoff;
++
++/* Used to pass the media type, etc.
++   Both 'options[]' and 'full_duplex[]' should exist for driver
++   interoperability.
++   The media type is usually passed in 'options[]'.
++   The default is autonegotation for speed and duplex.
++     This should rarely be overridden.
++   Use option values 0x10/0x20 for 10Mbps, 0x100,0x200 for 100Mbps.
++   Use option values 0x10 and 0x100 for forcing half duplex fixed speed.
++   Use option values 0x20 and 0x200 for forcing full duplex operation.
++*/
++#define MAX_UNITS 8		/* More are supported, limit only on options */
++static int options[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++
++/* Maximum number of multicast addresses to filter (vs. rx-all-multicast).
++   The Rhine has a 64 element 8390-like hash table.  */
++static const int multicast_filter_limit = 32;
++
++
++/* Operational parameters that are set at compile time. */
++
++/* Keep the ring sizes a power of two for compile efficiency.
++   The compiler will convert <unsigned>'%'<2^N> into a bit mask.
++   Making the Tx ring too large decreases the effectiveness of channel
++   bonding and packet priority.
++   There are no ill effects from too-large receive rings. */
++#define TX_RING_SIZE	16
++#define TX_QUEUE_LEN	10		/* Limit ring entries actually used.  */
++#define RX_RING_SIZE	8 /*** RTnet ***/
++
++
++/* Operational parameters that usually are not changed. */
++
++/* Time in jiffies before concluding the transmitter is hung. */
++#define TX_TIMEOUT  (2*HZ)
++
++#define PKT_BUF_SZ		1536			/* Size of each temporary Rx buffer.*/
++
++#if !defined(__OPTIMIZE__)  ||  !defined(__KERNEL__)
++#warning  You must compile this file with the correct options!
++#warning  See the last lines of the source file.
++#error  You must compile this driver with "-O".
++#endif
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/timer.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/crc32.h>
++#include <linux/uaccess.h>
++#include <asm/processor.h>		/* Processor type for cache alignment. */
++#include <asm/bitops.h>
++#include <asm/io.h>
++#include <asm/irq.h>
++
++/*** RTnet ***/
++#include <rtnet_port.h>
++
++#define DEFAULT_RX_POOL_SIZE    16
++
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++/*** RTnet ***/
++
++/* These identify the driver base version and may not be removed. */
++static char version[] =
++KERN_INFO DRV_NAME ".c:" DRV_VERSION "  " DRV_RELDATE "  Jan.Kiszka@web.de\n";
++
++static char shortname[] = DRV_NAME;
++
++
++/* This driver was written to use PCI memory space, however most versions
++   of the Rhine only work correctly with I/O space accesses. */
++/*#ifdef CONFIG_VIA_RHINE_MMIO
++#define USE_MEM
++#else*/
++#define USE_IO
++#undef readb
++#undef readw
++#undef readl
++#undef writeb
++#undef writew
++#undef writel
++#define readb(addr) inb((unsigned long)(addr))
++#define readw(addr) inw((unsigned long)(addr))
++#define readl(addr) inl((unsigned long)(addr))
++#define writeb(val,addr) outb((val),(unsigned long)(addr))
++#define writew(val,addr) outw((val),(unsigned long)(addr))
++#define writel(val,addr) outl((val),(unsigned long)(addr))
++/*#endif*/
++
++MODULE_AUTHOR("Jan Kiszka");
++MODULE_DESCRIPTION("RTnet VIA Rhine PCI Fast Ethernet driver");
++MODULE_LICENSE("GPL");
++
++module_param(max_interrupt_work, int, 0444);
++module_param_named(debug, local_debug, int, 0444);
++/*** RTnet ***
++MODULE_PARM(rx_copybreak, "i");
++ *** RTnet ***/
++module_param(backoff, int, 0444);
++module_param_array(options, int, NULL, 0444);
++module_param_array(full_duplex, int, NULL, 0444);
++MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
++MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
++/*** RTnet ***
++MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
++ *** RTnet ***/
++MODULE_PARM_DESC(backoff, "VIA Rhine: Bits 0-3: backoff algorithm");
++MODULE_PARM_DESC(options, "VIA Rhine: Bits 0-3: media type, bit 17: full duplex");
++MODULE_PARM_DESC(full_duplex, "VIA Rhine full duplex setting(s) (1)");
++
++/*
++				Theory of Operation
++
++I. Board Compatibility
++
++This driver is designed for the VIA 86c100A Rhine-II PCI Fast Ethernet
++controller.
++
++II. Board-specific settings
++
++Boards with this chip are functional only in a bus-master PCI slot.
++
++Many operational settings are loaded from the EEPROM to the Config word at
++offset 0x78. For most of these settings, this driver assumes that they are
++correct.
++If this driver is compiled to use PCI memory space operations the EEPROM
++must be configured to enable memory ops.
++
++III. Driver operation
++
++IIIa. Ring buffers
++
++This driver uses two statically allocated fixed-size descriptor lists
++formed into rings by a branch from the final descriptor to the beginning of
++the list.  The ring sizes are set at compile time by RX/TX_RING_SIZE.
++
++IIIb/c. Transmit/Receive Structure
++
++This driver attempts to use a zero-copy receive and transmit scheme.
++
++Alas, all data buffers are required to start on a 32 bit boundary, so
++the driver must often copy transmit packets into bounce buffers.
++
++The driver allocates full frame size skbuffs for the Rx ring buffers at
++open() time and passes the skb->data field to the chip as receive data
++buffers.  When an incoming frame is less than RX_COPYBREAK bytes long,
++a fresh skbuff is allocated and the frame is copied to the new skbuff.
++When the incoming frame is larger, the skbuff is passed directly up the
++protocol stack.  Buffers consumed this way are replaced by newly allocated
++skbuffs in the last phase of via_rhine_rx().
++
++The RX_COPYBREAK value is chosen to trade-off the memory wasted by
++using a full-sized skbuff for small frames vs. the copying costs of larger
++frames.  New boards are typically used in generously configured machines
++and the underfilled buffers have negligible impact compared to the benefit of
++a single allocation size, so the default value of zero results in never
++copying packets.  When copying is done, the cost is usually mitigated by using
++a combined copy/checksum routine.  Copying also preloads the cache, which is
++most useful with small frames.
++
++Since the VIA chips are only able to transfer data to buffers on 32 bit
++boundaries, the IP header at offset 14 in an ethernet frame isn't
++longword aligned for further processing.  Copying these unaligned buffers
++has the beneficial effect of 16-byte aligning the IP header.
++
++IIId. Synchronization
++
++The driver runs as two independent, single-threaded flows of control.  One
++is the send-packet routine, which enforces single-threaded use by the
++dev->priv->lock spinlock. The other thread is the interrupt handler, which
++is single threaded by the hardware and interrupt handling software.
++
++The send packet thread has partial control over the Tx ring. It locks the
++dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring
++is not available it stops the transmit queue by calling netif_stop_queue.
++
++The interrupt handler has exclusive control over the Rx ring and records stats
++from the Tx ring.  After reaping the stats, it marks the Tx queue entry as
++empty by incrementing the dirty_tx mark. If at least half of the entries in
++the Rx ring are available the transmit queue is woken up if it was stopped.
++
++IV. Notes
++
++IVb. References
++
++Preliminary VT86C100A manual from http://www.via.com.tw/
++http://www.scyld.com/expert/100mbps.html
++http://www.scyld.com/expert/NWay.html
++ftp://ftp.via.com.tw/public/lan/Products/NIC/VT86C100A/Datasheet/VT86C100A03.pdf
++ftp://ftp.via.com.tw/public/lan/Products/NIC/VT6102/Datasheet/VT6102_021.PDF
++
++
++IVc. Errata
++
++The VT86C100A manual is not reliable information.
++The 3043 chip does not handle unaligned transmit or receive buffers, resulting
++in significant performance degradation for bounce buffer copies on transmit
++and unaligned IP headers on receive.
++The chip does not pad to minimum transmit length.
++
++*/
++
++
++/* This table drives the PCI probe routines.  It's mostly boilerplate in all
++   of the drivers, and will likely be provided by some future kernel.
++   Note the matching code -- the first table entry matchs all 56** cards but
++   second only the 1234 card.
++*/
++
++enum pci_flags_bit {
++	PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
++	PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
++};
++
++enum via_rhine_chips {
++	VT86C100A = 0,
++	VT6102,
++	VT6105,
++	VT6105M
++};
++
++struct via_rhine_chip_info {
++	const char *name;
++	u16 pci_flags;
++	int io_size;
++	int drv_flags;
++};
++
++
++enum chip_capability_flags {
++	CanHaveMII=1, HasESIPhy=2, HasDavicomPhy=4,
++	ReqTxAlign=0x10, HasWOL=0x20, };
++
++#ifdef USE_MEM
++#define RHINE_IOTYPE (PCI_USES_MEM | PCI_USES_MASTER | PCI_ADDR1)
++#else
++#define RHINE_IOTYPE (PCI_USES_IO  | PCI_USES_MASTER | PCI_ADDR0)
++#endif
++/* Beware of PCI posted writes */
++#define IOSYNC	do { readb((void *)dev->base_addr + StationAddr); } while (0)
++
++/* directly indexed by enum via_rhine_chips, above */
++static struct via_rhine_chip_info via_rhine_chip_info[] =
++{
++	{ "VIA VT86C100A Rhine", RHINE_IOTYPE, 128,
++	  CanHaveMII | ReqTxAlign | HasDavicomPhy },
++	{ "VIA VT6102 Rhine-II", RHINE_IOTYPE, 256,
++	  CanHaveMII | HasWOL },
++	{ "VIA VT6105 Rhine-III", RHINE_IOTYPE, 256,
++	  CanHaveMII | HasWOL },
++	{ "VIA VT6105M Rhine-III", RHINE_IOTYPE, 256,
++	  CanHaveMII | HasWOL },
++};
++
++static struct pci_device_id via_rhine_pci_tbl[] =
++{
++	{0x1106, 0x3043, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT86C100A},
++	{0x1106, 0x3065, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6102},
++	{0x1106, 0x3106, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6105},
++	{0x1106, 0x3053, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VT6105M},
++	{0,}			/* terminate list */
++};
++MODULE_DEVICE_TABLE(pci, via_rhine_pci_tbl);
++
++
++/* Offsets to the device registers. */
++enum register_offsets {
++	StationAddr=0x00, RxConfig=0x06, TxConfig=0x07, ChipCmd=0x08,
++	IntrStatus=0x0C, IntrEnable=0x0E,
++	MulticastFilter0=0x10, MulticastFilter1=0x14,
++	RxRingPtr=0x18, TxRingPtr=0x1C, GFIFOTest=0x54,
++	MIIPhyAddr=0x6C, MIIStatus=0x6D, PCIBusConfig=0x6E,
++	MIICmd=0x70, MIIRegAddr=0x71, MIIData=0x72, MACRegEEcsr=0x74,
++	ConfigA=0x78, ConfigB=0x79, ConfigC=0x7A, ConfigD=0x7B,
++	RxMissed=0x7C, RxCRCErrs=0x7E, MiscCmd=0x81,
++	StickyHW=0x83, IntrStatus2=0x84, WOLcrClr=0xA4, WOLcgClr=0xA7,
++	PwrcsrClr=0xAC,
++};
++
++/* Bits in ConfigD */
++enum backoff_bits {
++	BackOptional=0x01, BackModify=0x02,
++	BackCaptureEffect=0x04, BackRandom=0x08
++};
++
++#ifdef USE_MEM
++/* Registers we check that mmio and reg are the same. */
++int mmio_verify_registers[] = {
++	RxConfig, TxConfig, IntrEnable, ConfigA, ConfigB, ConfigC, ConfigD,
++	0
++};
++#endif
++
++/* Bits in the interrupt status/mask registers. */
++enum intr_status_bits {
++	IntrRxDone=0x0001, IntrRxErr=0x0004, IntrRxEmpty=0x0020,
++	IntrTxDone=0x0002, IntrTxError=0x0008, IntrTxUnderrun=0x0210,
++	IntrPCIErr=0x0040,
++	IntrStatsMax=0x0080, IntrRxEarly=0x0100,
++	IntrRxOverflow=0x0400, IntrRxDropped=0x0800, IntrRxNoBuf=0x1000,
++	IntrTxAborted=0x2000, IntrLinkChange=0x4000,
++	IntrRxWakeUp=0x8000,
++	IntrNormalSummary=0x0003, IntrAbnormalSummary=0xC260,
++	IntrTxDescRace=0x080000,	/* mapped from IntrStatus2 */
++	IntrTxErrSummary=0x082218,
++};
++
++/* The Rx and Tx buffer descriptors. */
++struct rx_desc {
++	s32 rx_status;
++	u32 desc_length; /* Chain flag, Buffer/frame length */
++	u32 addr;
++	u32 next_desc;
++};
++struct tx_desc {
++	s32 tx_status;
++	u32 desc_length; /* Chain flag, Tx Config, Frame length */
++	u32 addr;
++	u32 next_desc;
++};
++
++/* Initial value for tx_desc.desc_length, Buffer size goes to bits 0-10 */
++#define TXDESC 0x00e08000
++
++enum rx_status_bits {
++	RxOK=0x8000, RxWholePkt=0x0300, RxErr=0x008F
++};
++
++/* Bits in *_desc.*_status */
++enum desc_status_bits {
++	DescOwn=0x80000000
++};
++
++/* Bits in ChipCmd. */
++enum chip_cmd_bits {
++	CmdInit=0x0001, CmdStart=0x0002, CmdStop=0x0004, CmdRxOn=0x0008,
++	CmdTxOn=0x0010, CmdTxDemand=0x0020, CmdRxDemand=0x0040,
++	CmdEarlyRx=0x0100, CmdEarlyTx=0x0200, CmdFDuplex=0x0400,
++	CmdNoTxPoll=0x0800, CmdReset=0x8000,
++};
++
++#define MAX_MII_CNT	4
++struct netdev_private {
++	/* Descriptor rings */
++	struct rx_desc *rx_ring;
++	struct tx_desc *tx_ring;
++	dma_addr_t rx_ring_dma;
++	dma_addr_t tx_ring_dma;
++
++	/* The addresses of receive-in-place skbuffs. */
++	struct rtskb *rx_skbuff[RX_RING_SIZE]; /*** RTnet ***/
++	dma_addr_t rx_skbuff_dma[RX_RING_SIZE];
++
++	/* The saved address of a sent-in-place packet/buffer, for later free(). */
++	struct rtskb *tx_skbuff[TX_RING_SIZE]; /*** RTnet ***/
++	dma_addr_t tx_skbuff_dma[TX_RING_SIZE];
++
++	/* Tx bounce buffers */
++	unsigned char *tx_buf[TX_RING_SIZE];
++	unsigned char *tx_bufs;
++	dma_addr_t tx_bufs_dma;
++
++	struct pci_dev *pdev;
++	struct net_device_stats stats;
++	struct timer_list timer;	/* Media monitoring timer. */
++	rtdm_lock_t lock;
++
++	/* Frequently used values: keep some adjacent for cache effect. */
++	int chip_id, drv_flags;
++	struct rx_desc *rx_head_desc;
++	unsigned int cur_rx, dirty_rx;		/* Producer/consumer ring indices */
++	unsigned int cur_tx, dirty_tx;
++	unsigned int rx_buf_sz;				/* Based on MTU+slack. */
++	u16 chip_cmd;						/* Current setting for ChipCmd */
++
++	/* These values are keep track of the transceiver/media in use. */
++	unsigned int default_port:4;		/* Last dev->if_port value. */
++	u8 tx_thresh, rx_thresh;
++
++	/* MII transceiver section. */
++	unsigned char phys[MAX_MII_CNT];			/* MII device addresses. */
++	unsigned int mii_cnt;			/* number of MIIs found, but only the first one is used */
++	u16 mii_status;						/* last read MII status */
++	struct mii_if_info mii_if;
++	unsigned int mii_if_force_media; /*** RTnet, support for older kernels (e.g. 2.4.19) ***/
++
++	rtdm_irq_t irq_handle;
++};
++
++/*** RTnet ***/
++static int  mdio_read(struct rtnet_device *dev, int phy_id, int location);
++static void mdio_write(struct rtnet_device *dev, int phy_id, int location, int value);
++static int  via_rhine_open(struct rtnet_device *dev);
++static void via_rhine_check_duplex(struct rtnet_device *dev);
++/*static void via_rhine_timer(unsigned long data);
++static void via_rhine_tx_timeout(struct net_device *dev);*/
++static int  via_rhine_start_tx(struct rtskb *skb, struct rtnet_device *dev);
++static int via_rhine_interrupt(rtdm_irq_t *irq_handle);
++static void via_rhine_tx(struct rtnet_device *dev);
++static void via_rhine_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp);
++static void via_rhine_error(struct rtnet_device *dev, int intr_status);
++static void via_rhine_set_rx_mode(struct rtnet_device *dev);
++static struct net_device_stats *via_rhine_get_stats(struct rtnet_device *rtdev);
++/*static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);*/
++static int  via_rhine_close(struct rtnet_device *dev);
++/*** RTnet ***/
++
++static inline u32 get_intr_status(struct rtnet_device *dev) /*** RTnet ***/
++{
++	void *ioaddr = (void *)dev->base_addr;
++	struct netdev_private *np = dev->priv;
++	u32 intr_status;
++
++	intr_status = readw(ioaddr + IntrStatus);
++	/* On Rhine-II, Bit 3 indicates Tx descriptor write-back race. */
++	if (np->chip_id == VT6102)
++		intr_status |= readb(ioaddr + IntrStatus2) << 16;
++	return intr_status;
++}
++
++static void wait_for_reset(struct rtnet_device *dev, int chip_id, char *name) /*** RTnet ***/
++{
++	void *ioaddr = (void *)dev->base_addr;
++	int boguscnt = 20;
++
++	IOSYNC;
++
++	if (readw(ioaddr + ChipCmd) & CmdReset) {
++		printk(KERN_INFO "%s: Reset not complete yet. "
++			"Trying harder.\n", name);
++
++		/* Rhine-II needs to be forced sometimes */
++		if (chip_id == VT6102)
++			writeb(0x40, ioaddr + MiscCmd);
++
++		/* VT86C100A may need long delay after reset (dlink) */
++		/* Seen on Rhine-II as well (rl) */
++		while ((readw(ioaddr + ChipCmd) & CmdReset) && --boguscnt)
++			udelay(5);
++
++	}
++
++	if (local_debug > 1)
++		printk(KERN_INFO "%s: Reset %s.\n", name,
++			boguscnt ? "succeeded" : "failed");
++}
++
++#ifdef USE_MEM
++static void enable_mmio(long ioaddr, int chip_id)
++{
++	int n;
++	if (chip_id == VT86C100A) {
++		/* More recent docs say that this bit is reserved ... */
++		n = inb(ioaddr + ConfigA) | 0x20;
++		outb(n, ioaddr + ConfigA);
++	} else {
++		n = inb(ioaddr + ConfigD) | 0x80;
++		outb(n, ioaddr + ConfigD);
++	}
++}
++#endif
++
++static void reload_eeprom(long ioaddr)
++{
++	int i;
++	outb(0x20, ioaddr + MACRegEEcsr);
++	/* Typically 2 cycles to reload. */
++	for (i = 0; i < 150; i++)
++		if (! (inb(ioaddr + MACRegEEcsr) & 0x20))
++			break;
++}
++
++static int via_rhine_init_one (struct pci_dev *pdev,
++					 const struct pci_device_id *ent)
++{
++	struct rtnet_device *dev; /*** RTnet ***/
++	struct netdev_private *np;
++	int i, option;
++	int chip_id = (int) ent->driver_data;
++	static int card_idx = -1;
++	void *ioaddr;
++	long memaddr;
++	unsigned int io_size;
++	int pci_flags;
++#ifdef USE_MEM
++	long ioaddr0;
++#endif
++
++/* when built into the kernel, we only print version if device is found */
++#ifndef MODULE
++	static int printed_version;
++	if (!printed_version++)
++		printk(version);
++#endif
++
++	card_idx++;
++	option = card_idx < MAX_UNITS ? options[card_idx] : 0;
++	io_size = via_rhine_chip_info[chip_id].io_size;
++	pci_flags = via_rhine_chip_info[chip_id].pci_flags;
++
++/*** RTnet ***/
++	if (cards[card_idx] == 0)
++		goto err_out;
++/*** RTnet ***/
++
++	if (pci_enable_device (pdev))
++		goto err_out;
++
++	/* this should always be supported */
++	if (pci_set_dma_mask(pdev, 0xffffffff)) {
++		printk(KERN_ERR "32-bit PCI DMA addresses not supported by the card!?\n");
++		goto err_out;
++	}
++
++	/* sanity check */
++	if ((pci_resource_len (pdev, 0) < io_size) ||
++	    (pci_resource_len (pdev, 1) < io_size)) {
++		printk (KERN_ERR "Insufficient PCI resources, aborting\n");
++		goto err_out;
++	}
++
++	ioaddr = (void *)pci_resource_start (pdev, 0);
++	memaddr = pci_resource_start (pdev, 1);
++
++	if (pci_flags & PCI_USES_MASTER)
++		pci_set_master (pdev);
++
++/*** RTnet ***/
++	dev = rt_alloc_etherdev(sizeof(struct netdev_private),
++							RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (dev == NULL) {
++		printk (KERN_ERR "init_ethernet failed for card #%d\n", card_idx);
++		goto err_out;
++	}
++	rtdev_alloc_name(dev, "rteth%d");
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++	dev->sysbind = &pdev->dev;
++/*** RTnet ***/
++
++	if (pci_request_regions(pdev, shortname))
++		goto err_out_free_netdev;
++
++#ifdef USE_MEM
++	ioaddr0 = (long)ioaddr;
++	enable_mmio(ioaddr0, chip_id);
++
++	ioaddr = ioremap (memaddr, io_size);
++	if (!ioaddr) {
++		printk (KERN_ERR "ioremap failed for device %s, region 0x%X @ 0x%lX\n",
++				pci_name(pdev), io_size, memaddr);
++		goto err_out_free_res;
++	}
++
++	/* Check that selected MMIO registers match the PIO ones */
++	i = 0;
++	while (mmio_verify_registers[i]) {
++		int reg = mmio_verify_registers[i++];
++		unsigned char a = inb(ioaddr0+reg);
++		unsigned char b = readb(ioaddr+reg);
++		if (a != b) {
++			printk (KERN_ERR "MMIO do not match PIO [%02x] (%02x != %02x)\n",
++					reg, a, b);
++			goto err_out_unmap;
++		}
++	}
++#endif
++
++	/* D-Link provided reset code (with comment additions) */
++	if (via_rhine_chip_info[chip_id].drv_flags & HasWOL) {
++		unsigned char byOrgValue;
++
++		/* clear sticky bit before reset & read ethernet address */
++		byOrgValue = readb(ioaddr + StickyHW);
++		byOrgValue = byOrgValue & 0xFC;
++		writeb(byOrgValue, ioaddr + StickyHW);
++
++		/* (bits written are cleared?) */
++		/* disable force PME-enable */
++		writeb(0x80, ioaddr + WOLcgClr);
++		/* disable power-event config bit */
++		writeb(0xFF, ioaddr + WOLcrClr);
++		/* clear power status (undocumented in vt6102 docs?) */
++		writeb(0xFF, ioaddr + PwrcsrClr);
++	}
++
++	/* Reset the chip to erase previous misconfiguration. */
++	writew(CmdReset, ioaddr + ChipCmd);
++
++	dev->base_addr = (long)ioaddr;
++	wait_for_reset(dev, chip_id, shortname);
++
++	/* Reload the station address from the EEPROM. */
++#ifdef USE_IO
++	reload_eeprom((long)ioaddr);
++#else
++	reload_eeprom(ioaddr0);
++	/* Reloading from eeprom overwrites cfgA-D, so we must re-enable MMIO.
++	   If reload_eeprom() was done first this could be avoided, but it is
++	   not known if that still works with the "win98-reboot" problem. */
++	enable_mmio(ioaddr0, chip_id);
++#endif
++
++	for (i = 0; i < 6; i++)
++		dev->dev_addr[i] = readb(ioaddr + StationAddr + i);
++
++	if (!is_valid_ether_addr(dev->dev_addr)) {
++		printk(KERN_ERR "Invalid MAC address for card #%d\n", card_idx);
++		goto err_out_unmap;
++	}
++
++	if (chip_id == VT6102) {
++		/*
++		 * for 3065D, EEPROM reloaded will cause bit 0 in MAC_REG_CFGA
++		 * turned on.  it makes MAC receive magic packet
++		 * automatically. So, we turn it off. (D-Link)
++		 */
++		writeb(readb(ioaddr + ConfigA) & 0xFE, ioaddr + ConfigA);
++	}
++
++	/* Select backoff algorithm */
++	if (backoff)
++		writeb(readb(ioaddr + ConfigD) & (0xF0 | backoff),
++			ioaddr + ConfigD);
++
++	dev->irq = pdev->irq;
++
++	np = dev->priv;
++	rtdm_lock_init (&np->lock);
++	np->chip_id = chip_id;
++	np->drv_flags = via_rhine_chip_info[chip_id].drv_flags;
++	np->pdev = pdev;
++/*** RTnet ***
++	np->mii_if.dev = dev;
++	np->mii_if.mdio_read = mdio_read;
++	np->mii_if.mdio_write = mdio_write;
++	np->mii_if.phy_id_mask = 0x1f;
++	np->mii_if.reg_num_mask = 0x1f;
++ *** RTnet ***/
++
++	if (dev->mem_start)
++		option = dev->mem_start;
++
++	/* The chip-specific entries in the device structure. */
++	dev->open = via_rhine_open;
++	dev->hard_start_xmit = via_rhine_start_tx;
++	dev->stop = via_rhine_close;
++	dev->get_stats = via_rhine_get_stats;
++/*** RTnet ***
++	dev->set_multicast_list = via_rhine_set_rx_mode;
++	dev->do_ioctl = netdev_ioctl;
++	dev->tx_timeout = via_rhine_tx_timeout;
++	dev->watchdog_timeo = TX_TIMEOUT;
++ *** RTnet ***/
++	if (np->drv_flags & ReqTxAlign)
++		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
++
++	/* dev->name not defined before register_netdev()! */
++/*** RTnet ***/
++	i = rt_register_rtnetdev(dev);
++	if (i) {
++		goto err_out_unmap;
++	}
++/*** RTnet ***/
++
++	/* The lower four bits are the media type. */
++	if (option > 0) {
++		if (option & 0x220)
++			np->mii_if.full_duplex = 1;
++		np->default_port = option & 15;
++	}
++	if (card_idx < MAX_UNITS  &&  full_duplex[card_idx] > 0)
++		np->mii_if.full_duplex = 1;
++
++	if (np->mii_if.full_duplex) {
++		printk(KERN_INFO "%s: Set to forced full duplex, autonegotiation"
++			   " disabled.\n", dev->name);
++		np->mii_if_force_media = 1; /*** RTnet ***/
++	}
++
++	printk(KERN_INFO "%s: %s at 0x%lx, ",
++		   dev->name, via_rhine_chip_info[chip_id].name,
++		   (pci_flags & PCI_USES_IO) ? (long)ioaddr : memaddr);
++
++	for (i = 0; i < 5; i++)
++			printk("%2.2x:", dev->dev_addr[i]);
++	printk("%2.2x, IRQ %d.\n", dev->dev_addr[i], pdev->irq);
++
++	pci_set_drvdata(pdev, dev);
++
++	if (np->drv_flags & CanHaveMII) {
++		int phy, phy_idx = 0;
++		np->phys[0] = 1;		/* Standard for this chip. */
++		for (phy = 1; phy < 32 && phy_idx < MAX_MII_CNT; phy++) {
++			int mii_status = mdio_read(dev, phy, 1);
++			if (mii_status != 0xffff  &&  mii_status != 0x0000) {
++				np->phys[phy_idx++] = phy;
++				np->mii_if.advertising = mdio_read(dev, phy, 4);
++				printk(KERN_INFO "%s: MII PHY found at address %d, status "
++					   "0x%4.4x advertising %4.4x Link %4.4x.\n",
++					   dev->name, phy, mii_status, np->mii_if.advertising,
++					   mdio_read(dev, phy, 5));
++
++				/* set IFF_RUNNING */
++				if (mii_status & BMSR_LSTATUS)
++					rtnetif_carrier_on(dev); /*** RTnet ***/
++				else
++					rtnetif_carrier_off(dev); /*** RTnet ***/
++			}
++		}
++		np->mii_cnt = phy_idx;
++		np->mii_if.phy_id = np->phys[0];
++	}
++
++	/* Allow forcing the media type. */
++	if (option > 0) {
++		if (option & 0x220)
++			np->mii_if.full_duplex = 1;
++		np->default_port = option & 0x3ff;
++		if (np->default_port & 0x330) {
++			/* FIXME: shouldn't someone check this variable? */
++			/* np->medialock = 1; */
++			printk(KERN_INFO "  Forcing %dMbs %s-duplex operation.\n",
++				   (option & 0x300 ? 100 : 10),
++				   (option & 0x220 ? "full" : "half"));
++			if (np->mii_cnt)
++				mdio_write(dev, np->phys[0], MII_BMCR,
++						   ((option & 0x300) ? 0x2000 : 0) |  /* 100mbps? */
++						   ((option & 0x220) ? 0x0100 : 0));  /* Full duplex? */
++		}
++	}
++
++	return 0;
++
++err_out_unmap:
++#ifdef USE_MEM
++	iounmap((void *)ioaddr);
++err_out_free_res:
++#endif
++	pci_release_regions(pdev);
++err_out_free_netdev:
++/*** RTnet ***/
++	rt_rtdev_disconnect(dev);
++	rtdev_free(dev);
++/*** RTnet ***/
++err_out:
++	return -ENODEV;
++}
++
++static int alloc_ring(struct rtnet_device* dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ring;
++	dma_addr_t ring_dma;
++
++	ring = pci_alloc_consistent(np->pdev,
++				    RX_RING_SIZE * sizeof(struct rx_desc) +
++				    TX_RING_SIZE * sizeof(struct tx_desc),
++				    &ring_dma);
++	if (!ring) {
++		printk(KERN_ERR "Could not allocate DMA memory.\n");
++		return -ENOMEM;
++	}
++	if (np->drv_flags & ReqTxAlign) {
++		np->tx_bufs = pci_alloc_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE,
++								   &np->tx_bufs_dma);
++		if (np->tx_bufs == NULL) {
++			pci_free_consistent(np->pdev,
++				    RX_RING_SIZE * sizeof(struct rx_desc) +
++				    TX_RING_SIZE * sizeof(struct tx_desc),
++				    ring, ring_dma);
++			return -ENOMEM;
++		}
++	}
++
++	np->rx_ring = ring;
++	np->tx_ring = ring + RX_RING_SIZE * sizeof(struct rx_desc);
++	np->rx_ring_dma = ring_dma;
++	np->tx_ring_dma = ring_dma + RX_RING_SIZE * sizeof(struct rx_desc);
++
++	return 0;
++}
++
++void free_ring(struct rtnet_device* dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++
++	pci_free_consistent(np->pdev,
++			    RX_RING_SIZE * sizeof(struct rx_desc) +
++			    TX_RING_SIZE * sizeof(struct tx_desc),
++			    np->rx_ring, np->rx_ring_dma);
++	np->tx_ring = NULL;
++
++	if (np->tx_bufs)
++		pci_free_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE,
++							np->tx_bufs, np->tx_bufs_dma);
++
++	np->tx_bufs = NULL;
++
++}
++
++static void alloc_rbufs(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	dma_addr_t next;
++	int i;
++
++	np->dirty_rx = np->cur_rx = 0;
++
++	np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
++	np->rx_head_desc = &np->rx_ring[0];
++	next = np->rx_ring_dma;
++
++	/* Init the ring entries */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		np->rx_ring[i].rx_status = 0;
++		np->rx_ring[i].desc_length = cpu_to_le32(np->rx_buf_sz);
++		next += sizeof(struct rx_desc);
++		np->rx_ring[i].next_desc = cpu_to_le32(next);
++		np->rx_skbuff[i] = 0;
++	}
++	/* Mark the last entry as wrapping the ring. */
++	np->rx_ring[i-1].next_desc = cpu_to_le32(np->rx_ring_dma);
++
++	/* Fill in the Rx buffers.  Handle allocation failure gracefully. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		struct rtskb *skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz); /*** RTnet ***/
++		np->rx_skbuff[i] = skb;
++		if (skb == NULL)
++			break;
++		np->rx_skbuff_dma[i] =
++			pci_map_single(np->pdev, skb->tail, np->rx_buf_sz,
++						   PCI_DMA_FROMDEVICE);
++
++		np->rx_ring[i].addr = cpu_to_le32(np->rx_skbuff_dma[i]);
++		np->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
++	}
++	np->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
++}
++
++static void free_rbufs(struct rtnet_device* dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	/* Free all the skbuffs in the Rx queue. */
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		np->rx_ring[i].rx_status = 0;
++		np->rx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
++		if (np->rx_skbuff[i]) {
++			pci_unmap_single(np->pdev,
++							 np->rx_skbuff_dma[i],
++							 np->rx_buf_sz, PCI_DMA_FROMDEVICE);
++			dev_kfree_rtskb(np->rx_skbuff[i]); /*** RTnet ***/
++		}
++		np->rx_skbuff[i] = 0;
++	}
++}
++
++static void alloc_tbufs(struct rtnet_device* dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	dma_addr_t next;
++	int i;
++
++	np->dirty_tx = np->cur_tx = 0;
++	next = np->tx_ring_dma;
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		np->tx_skbuff[i] = 0;
++		np->tx_ring[i].tx_status = 0;
++		np->tx_ring[i].desc_length = cpu_to_le32(TXDESC);
++		next += sizeof(struct tx_desc);
++		np->tx_ring[i].next_desc = cpu_to_le32(next);
++		np->tx_buf[i] = &np->tx_bufs[i * PKT_BUF_SZ];
++	}
++	np->tx_ring[i-1].next_desc = cpu_to_le32(np->tx_ring_dma);
++
++}
++
++static void free_tbufs(struct rtnet_device* dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	int i;
++
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		np->tx_ring[i].tx_status = 0;
++		np->tx_ring[i].desc_length = cpu_to_le32(TXDESC);
++		np->tx_ring[i].addr = cpu_to_le32(0xBADF00D0); /* An invalid address. */
++		if (np->tx_skbuff[i]) {
++			if (np->tx_skbuff_dma[i]) {
++				pci_unmap_single(np->pdev,
++								 np->tx_skbuff_dma[i],
++								 np->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
++			}
++			dev_kfree_rtskb(np->tx_skbuff[i]); /*** RTnet ***/
++		}
++		np->tx_skbuff[i] = 0;
++		np->tx_buf[i] = 0;
++	}
++}
++
++static void init_registers(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	int i;
++
++	for (i = 0; i < 6; i++)
++		writeb(dev->dev_addr[i], ioaddr + StationAddr + i);
++
++	/* Initialize other registers. */
++	writew(0x0006, ioaddr + PCIBusConfig);	/* Tune configuration??? */
++	/* Configure initial FIFO thresholds. */
++	writeb(0x20, ioaddr + TxConfig);
++	np->tx_thresh = 0x20;
++	np->rx_thresh = 0x60;			/* Written in via_rhine_set_rx_mode(). */
++	np->mii_if.full_duplex = 0;
++
++	if (dev->if_port == 0)
++		dev->if_port = np->default_port;
++
++	writel(np->rx_ring_dma, ioaddr + RxRingPtr);
++	writel(np->tx_ring_dma, ioaddr + TxRingPtr);
++
++	via_rhine_set_rx_mode(dev);
++
++	/* Enable interrupts by setting the interrupt mask. */
++	writew(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
++		   IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
++		   IntrTxDone | IntrTxError | IntrTxUnderrun |
++		   IntrPCIErr | IntrStatsMax | IntrLinkChange,
++		   ioaddr + IntrEnable);
++
++	np->chip_cmd = CmdStart|CmdTxOn|CmdRxOn|CmdNoTxPoll;
++	if (np->mii_if_force_media) /*** RTnet ***/
++		np->chip_cmd |= CmdFDuplex;
++	writew(np->chip_cmd, ioaddr + ChipCmd);
++
++	via_rhine_check_duplex(dev);
++
++	/* The LED outputs of various MII xcvrs should be configured.  */
++	/* For NS or Mison phys, turn on bit 1 in register 0x17 */
++	/* For ESI phys, turn on bit 7 in register 0x17. */
++	mdio_write(dev, np->phys[0], 0x17, mdio_read(dev, np->phys[0], 0x17) |
++			   (np->drv_flags & HasESIPhy) ? 0x0080 : 0x0001);
++}
++/* Read and write over the MII Management Data I/O (MDIO) interface. */
++
++static int mdio_read(struct rtnet_device *dev, int phy_id, int regnum) /*** RTnet ***/
++{
++	void *ioaddr = (void *)dev->base_addr;
++	int boguscnt = 1024;
++
++	/* Wait for a previous command to complete. */
++	while ((readb(ioaddr + MIICmd) & 0x60) && --boguscnt > 0)
++		;
++	writeb(0x00, ioaddr + MIICmd);
++	writeb(phy_id, ioaddr + MIIPhyAddr);
++	writeb(regnum, ioaddr + MIIRegAddr);
++	writeb(0x40, ioaddr + MIICmd);			/* Trigger read */
++	boguscnt = 1024;
++	while ((readb(ioaddr + MIICmd) & 0x40) && --boguscnt > 0)
++		;
++	return readw(ioaddr + MIIData);
++}
++
++static void mdio_write(struct rtnet_device *dev, int phy_id, int regnum, int value) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	int boguscnt = 1024;
++
++	if (phy_id == np->phys[0]) {
++		switch (regnum) {
++		case MII_BMCR:					/* Is user forcing speed/duplex? */
++			if (value & 0x9000)			/* Autonegotiation. */
++				np->mii_if_force_media = 0; /*** RTnet ***/
++			else
++				np->mii_if.full_duplex = (value & 0x0100) ? 1 : 0;
++			break;
++		case MII_ADVERTISE:
++			np->mii_if.advertising = value;
++			break;
++		}
++	}
++
++	/* Wait for a previous command to complete. */
++	while ((readb(ioaddr + MIICmd) & 0x60) && --boguscnt > 0)
++		;
++	writeb(0x00, ioaddr + MIICmd);
++	writeb(phy_id, ioaddr + MIIPhyAddr);
++	writeb(regnum, ioaddr + MIIRegAddr);
++	writew(value, ioaddr + MIIData);
++	writeb(0x20, ioaddr + MIICmd);			/* Trigger write. */
++}
++
++
++static int via_rhine_open(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	int i;
++
++	/* Reset the chip. */
++	writew(CmdReset, ioaddr + ChipCmd);
++
++/*** RTnet ***/
++	rt_stack_connect(dev, &STACK_manager);
++	i = rtdm_irq_request(&np->irq_handle, dev->irq, via_rhine_interrupt,
++			     RTDM_IRQTYPE_SHARED, "rt_via-rhine", dev);
++/*** RTnet ***/
++	if (i) {
++		return i;
++	}
++
++	if (local_debug > 1)
++		printk(KERN_DEBUG "%s: via_rhine_open() irq %d.\n",
++			   dev->name, np->pdev->irq);
++
++	i = alloc_ring(dev);
++	if (i) {
++		return i;
++	}
++	alloc_rbufs(dev);
++	alloc_tbufs(dev);
++	wait_for_reset(dev, np->chip_id, dev->name);
++	init_registers(dev);
++	if (local_debug > 2)
++		printk(KERN_DEBUG "%s: Done via_rhine_open(), status %4.4x "
++			   "MII status: %4.4x.\n",
++			   dev->name, readw(ioaddr + ChipCmd),
++			   mdio_read(dev, np->phys[0], MII_BMSR));
++
++	rtnetif_start_queue(dev); /*** RTnet ***/
++
++/*** RTnet ***/
++	/* Set the timer to check for link beat. */
++/*** RTnet ***/
++
++	return 0;
++}
++
++static void via_rhine_check_duplex(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA);
++	int negotiated = mii_lpa & np->mii_if.advertising;
++	int duplex;
++
++	if (np->mii_if_force_media  ||  mii_lpa == 0xffff) /*** RTnet ***/
++		return;
++	duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040;
++	if (np->mii_if.full_duplex != duplex) {
++		np->mii_if.full_duplex = duplex;
++		if (local_debug)
++			printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d link"
++				   " partner capability of %4.4x.\n", dev->name,
++				   duplex ? "full" : "half", np->phys[0], mii_lpa);
++		if (duplex)
++			np->chip_cmd |= CmdFDuplex;
++		else
++			np->chip_cmd &= ~CmdFDuplex;
++		writew(np->chip_cmd, ioaddr + ChipCmd);
++	}
++}
++
++
++/*** RTnet ***/
++/*** RTnet ***/
++
++static int via_rhine_start_tx(struct rtskb *skb, struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	unsigned entry;
++	u32 intr_status;
++/*** RTnet ***/
++	rtdm_lockctx_t context;
++/*** RTnet ***/
++
++	/* Caution: the write order is important here, set the field
++	   with the "ownership" bits last. */
++
++	/* Calculate the next Tx descriptor entry. */
++	entry = np->cur_tx % TX_RING_SIZE;
++
++	if (skb->len < ETH_ZLEN) {
++		skb = rtskb_padto(skb, ETH_ZLEN);
++		if(skb == NULL)
++			return 0;
++	}
++
++	np->tx_skbuff[entry] = skb;
++
++	if ((np->drv_flags & ReqTxAlign) &&
++		(((long)skb->data & 3) || /*** RTnet skb_shinfo(skb)->nr_frags != 0 || RTnet ***/ skb->ip_summed == CHECKSUM_PARTIAL)
++		) {
++		/* Must use alignment buffer. */
++		if (skb->len > PKT_BUF_SZ) {
++			/* packet too long, drop it */
++			dev_kfree_rtskb(skb); /*** RTnet ***/
++			np->tx_skbuff[entry] = NULL;
++			np->stats.tx_dropped++;
++			return 0;
++		}
++
++/*** RTnet ***/
++		/* get and patch time stamp just before the transmission */
++		if (skb->xmit_stamp) {
++			rtdm_lock_get_irqsave(&np->lock, context);
++
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++				*skb->xmit_stamp);
++
++			rtskb_copy_and_csum_dev(skb, np->tx_buf[entry]);
++		} else {
++			 /* no need to block the interrupts during copy */
++			rtskb_copy_and_csum_dev(skb, np->tx_buf[entry]);
++
++			rtdm_lock_get_irqsave(&np->lock, context);
++		}
++/*** RTnet ***/
++
++		np->tx_skbuff_dma[entry] = 0;
++		np->tx_ring[entry].addr = cpu_to_le32(np->tx_bufs_dma +
++										  (np->tx_buf[entry] - np->tx_bufs));
++	} else {
++		np->tx_skbuff_dma[entry] =
++			pci_map_single(np->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
++		np->tx_ring[entry].addr = cpu_to_le32(np->tx_skbuff_dma[entry]);
++
++/*** RTnet ***/
++		rtdm_lock_get_irqsave(&np->lock, context);
++
++		/* get and patch time stamp just before the transmission */
++		if (skb->xmit_stamp)
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++				*skb->xmit_stamp);
++/*** RTnet ***/
++	}
++
++	np->tx_ring[entry].desc_length =
++		cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
++
++	wmb();
++	np->tx_ring[entry].tx_status = cpu_to_le32(DescOwn);
++	wmb();
++
++	np->cur_tx++;
++
++	/* Non-x86 Todo: explicitly flush cache lines here. */
++
++	/*
++	 * Wake the potentially-idle transmit channel unless errors are
++	 * pending (the ISR must sort them out first).
++	 */
++	intr_status = get_intr_status(dev);
++	if ((intr_status & IntrTxErrSummary) == 0) {
++		writew(CmdTxDemand | np->chip_cmd, (void *)dev->base_addr + ChipCmd);
++	}
++	IOSYNC;
++
++	if (np->cur_tx == np->dirty_tx + TX_QUEUE_LEN)
++		rtnetif_stop_queue(dev); /*** RTnet ***/
++
++	/*dev->trans_start = jiffies; *** RTnet ***/
++
++/*** RTnet ***/
++	rtdm_lock_put_irqrestore(&np->lock, context);
++/*** RTnet ***/
++
++	if (local_debug > 4) {
++		rtdm_printk(KERN_DEBUG "%s: Transmit frame #%d queued in slot %d.\n", /*** RTnet ***/
++			   dev->name, np->cur_tx-1, entry);
++	}
++	return 0;
++}
++
++/* The interrupt handler does all of the Rx thread work and cleans up
++   after the Tx thread. */
++static int via_rhine_interrupt(rtdm_irq_t *irq_handle) /*** RTnet ***/
++{
++	nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/
++	struct rtnet_device *dev =
++	    rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/
++	long ioaddr;
++	u32 intr_status;
++	int boguscnt = max_interrupt_work;
++	struct netdev_private *np = dev->priv; /*** RTnet ***/
++	unsigned int old_packet_cnt = np->stats.rx_packets; /*** RTnet ***/
++	int ret = RTDM_IRQ_NONE;
++
++	ioaddr = dev->base_addr;
++
++	while ((intr_status = get_intr_status(dev))) {
++		/* Acknowledge all of the current interrupt sources ASAP. */
++		if (intr_status & IntrTxDescRace)
++			writeb(0x08, (void *)ioaddr + IntrStatus2);
++		writew(intr_status & 0xffff, (void *)ioaddr + IntrStatus);
++		IOSYNC;
++
++		ret = RTDM_IRQ_HANDLED;
++
++		if (local_debug > 4)
++			rtdm_printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n", /*** RTnet ***/
++				   dev->name, intr_status);
++
++		if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
++						   IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf))
++			via_rhine_rx(dev, &time_stamp);
++
++		if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
++			if (intr_status & IntrTxErrSummary) {
++/*** RTnet ***/
++				rtdm_printk(KERN_ERR "%s: via_rhine_interrupt(), Transmissions error\n", dev->name);
++/*** RTnet ***/
++			}
++			via_rhine_tx(dev);
++		}
++
++		/* Abnormal error summary/uncommon events handlers. */
++		if (intr_status & (IntrPCIErr | IntrLinkChange |
++				   IntrStatsMax | IntrTxError | IntrTxAborted |
++				   IntrTxUnderrun | IntrTxDescRace))
++			via_rhine_error(dev, intr_status);
++
++		if (--boguscnt < 0) {
++			rtdm_printk(KERN_WARNING "%s: Too much work at interrupt, " /*** RTnet ***/
++				   "status=%#8.8x.\n",
++				   dev->name, intr_status);
++			break;
++		}
++	}
++
++	if (local_debug > 3)
++		rtdm_printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n", /*** RTnet ***/
++			   dev->name, readw((void *)ioaddr + IntrStatus));
++
++/*** RTnet ***/
++	if (old_packet_cnt != np->stats.rx_packets)
++		rt_mark_stack_mgr(dev);
++	return ret;
++}
++
++/* This routine is logically part of the interrupt handler, but isolated
++   for clarity. */
++static void via_rhine_tx(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	int txstatus = 0, entry = np->dirty_tx % TX_RING_SIZE;
++
++	rtdm_lock_get(&np->lock); /*** RTnet ***/
++
++	/* find and cleanup dirty tx descriptors */
++	while (np->dirty_tx != np->cur_tx) {
++		txstatus = le32_to_cpu(np->tx_ring[entry].tx_status);
++		if (local_debug > 6)
++			rtdm_printk(KERN_DEBUG " Tx scavenge %d status %8.8x.\n", /*** RTnet ***/
++				   entry, txstatus);
++		if (txstatus & DescOwn)
++			break;
++		if (txstatus & 0x8000) {
++			if (local_debug > 1)
++				rtdm_printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", /*** RTnet ***/
++					   dev->name, txstatus);
++			np->stats.tx_errors++;
++			if (txstatus & 0x0400) np->stats.tx_carrier_errors++;
++			if (txstatus & 0x0200) np->stats.tx_window_errors++;
++			if (txstatus & 0x0100) np->stats.tx_aborted_errors++;
++			if (txstatus & 0x0080) np->stats.tx_heartbeat_errors++;
++			if (((np->chip_id == VT86C100A) && txstatus & 0x0002) ||
++				(txstatus & 0x0800) || (txstatus & 0x1000)) {
++				np->stats.tx_fifo_errors++;
++				np->tx_ring[entry].tx_status = cpu_to_le32(DescOwn);
++				break; /* Keep the skb - we try again */
++			}
++			/* Transmitter restarted in 'abnormal' handler. */
++		} else {
++			if (np->chip_id == VT86C100A)
++				np->stats.collisions += (txstatus >> 3) & 0x0F;
++			else
++				np->stats.collisions += txstatus & 0x0F;
++			if (local_debug > 6)
++				rtdm_printk(KERN_DEBUG "collisions: %1.1x:%1.1x\n", /*** RTnet ***/
++					(txstatus >> 3) & 0xF,
++					txstatus & 0xF);
++			np->stats.tx_bytes += np->tx_skbuff[entry]->len;
++			np->stats.tx_packets++;
++		}
++		/* Free the original skb. */
++		if (np->tx_skbuff_dma[entry]) {
++			pci_unmap_single(np->pdev,
++							 np->tx_skbuff_dma[entry],
++							 np->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
++		}
++		dev_kfree_rtskb(np->tx_skbuff[entry]); /*** RTnet ***/
++		np->tx_skbuff[entry] = NULL;
++		entry = (++np->dirty_tx) % TX_RING_SIZE;
++	}
++	if ((np->cur_tx - np->dirty_tx) < TX_QUEUE_LEN - 4)
++		rtnetif_wake_queue (dev); /*** RTnet ***/
++
++	rtdm_lock_put(&np->lock); /*** RTnet ***/
++}
++
++/* This routine is logically part of the interrupt handler, but isolated
++   for clarity and better register allocation. */
++static void via_rhine_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	int entry = np->cur_rx % RX_RING_SIZE;
++	int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx;
++
++	if (local_debug > 4) {
++		rtdm_printk(KERN_DEBUG "%s: via_rhine_rx(), entry %d status %8.8x.\n", /*** RTnet ***/
++			   dev->name, entry, le32_to_cpu(np->rx_head_desc->rx_status));
++	}
++
++	/* If EOP is set on the next entry, it's a new packet. Send it up. */
++	while ( ! (np->rx_head_desc->rx_status & cpu_to_le32(DescOwn))) {
++		struct rx_desc *desc = np->rx_head_desc;
++		u32 desc_status = le32_to_cpu(desc->rx_status);
++		int data_size = desc_status >> 16;
++
++		if (local_debug > 4)
++			rtdm_printk(KERN_DEBUG "  via_rhine_rx() status is %8.8x.\n", /*** RTnet ***/
++				   desc_status);
++		if (--boguscnt < 0)
++			break;
++		if ( (desc_status & (RxWholePkt | RxErr)) !=  RxWholePkt) {
++			if ((desc_status & RxWholePkt) !=  RxWholePkt) {
++				rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame spanned " /*** RTnet ***/
++					   "multiple buffers, entry %#x length %d status %8.8x!\n",
++					   dev->name, entry, data_size, desc_status);
++				rtdm_printk(KERN_WARNING "%s: Oversized Ethernet frame %p vs %p.\n", /*** RTnet ***/
++					   dev->name, np->rx_head_desc, &np->rx_ring[entry]);
++				np->stats.rx_length_errors++;
++			} else if (desc_status & RxErr) {
++				/* There was a error. */
++				if (local_debug > 2)
++					rtdm_printk(KERN_DEBUG "  via_rhine_rx() Rx error was %8.8x.\n", /*** RTnet ***/
++						   desc_status);
++				np->stats.rx_errors++;
++				if (desc_status & 0x0030) np->stats.rx_length_errors++;
++				if (desc_status & 0x0048) np->stats.rx_fifo_errors++;
++				if (desc_status & 0x0004) np->stats.rx_frame_errors++;
++				if (desc_status & 0x0002)
++					/* RTnet: this is only updated in the interrupt handler */
++					np->stats.rx_crc_errors++;
++			}
++		} else {
++			struct rtskb *skb; /*** RTnet ***/
++			/* Length should omit the CRC */
++			int pkt_len = data_size - 4;
++
++			/* Check if the packet is long enough to accept without copying
++			   to a minimally-sized skbuff. */
++/*** RTnet ***/
++			{
++/*** RTnet ***/
++				skb = np->rx_skbuff[entry];
++				if (skb == NULL) {
++					rtdm_printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n", /*** RTnet ***/
++						   dev->name);
++					break;
++				}
++				np->rx_skbuff[entry] = NULL;
++				rtskb_put(skb, pkt_len); /*** RTnet ***/
++				pci_unmap_single(np->pdev, np->rx_skbuff_dma[entry],
++								 np->rx_buf_sz, PCI_DMA_FROMDEVICE);
++			}
++/*** RTnet ***/
++			skb->protocol = rt_eth_type_trans(skb, dev);
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++			/*dev->last_rx = jiffies;*/
++/*** RTnet ***/
++			np->stats.rx_bytes += pkt_len;
++			np->stats.rx_packets++;
++		}
++		entry = (++np->cur_rx) % RX_RING_SIZE;
++		np->rx_head_desc = &np->rx_ring[entry];
++	}
++
++	/* Refill the Rx ring buffers. */
++	for (; np->cur_rx - np->dirty_rx > 0; np->dirty_rx++) {
++		struct rtskb *skb; /*** RTnet ***/
++		entry = np->dirty_rx % RX_RING_SIZE;
++		if (np->rx_skbuff[entry] == NULL) {
++			skb = rtnetdev_alloc_rtskb(dev, np->rx_buf_sz); /*** RTnet ***/
++			np->rx_skbuff[entry] = skb;
++			if (skb == NULL)
++				break;			/* Better luck next round. */
++			np->rx_skbuff_dma[entry] =
++				pci_map_single(np->pdev, skb->tail, np->rx_buf_sz,
++							   PCI_DMA_FROMDEVICE);
++			np->rx_ring[entry].addr = cpu_to_le32(np->rx_skbuff_dma[entry]);
++		}
++		np->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
++	}
++
++	/* Pre-emptively restart Rx engine. */
++	writew(readw((void *)dev->base_addr + ChipCmd) | CmdRxOn | CmdRxDemand,
++		   (void *)dev->base_addr + ChipCmd);
++}
++
++/* Clears the "tally counters" for CRC errors and missed frames(?).
++   It has been reported that some chips need a write of 0 to clear
++   these, for others the counters are set to 1 when written to and
++   instead cleared when read. So we clear them both ways ... */
++static inline void clear_tally_counters(void *ioaddr)
++{
++	writel(0, ioaddr + RxMissed);
++	readw(ioaddr + RxCRCErrs);
++	readw(ioaddr + RxMissed);
++}
++
++static void via_rhine_restart_tx(struct rtnet_device *dev) { /*** RTnet ***/
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	int entry = np->dirty_tx % TX_RING_SIZE;
++	u32 intr_status;
++
++	/*
++	 * If new errors occured, we need to sort them out before doing Tx.
++	 * In that case the ISR will be back here RSN anyway.
++	 */
++	intr_status = get_intr_status(dev);
++
++	if ((intr_status & IntrTxErrSummary) == 0) {
++
++		/* We know better than the chip where it should continue. */
++		writel(np->tx_ring_dma + entry * sizeof(struct tx_desc),
++			   ioaddr + TxRingPtr);
++
++		writew(CmdTxDemand | np->chip_cmd, ioaddr + ChipCmd);
++		IOSYNC;
++	}
++	else {
++		/* This should never happen */
++		if (local_debug > 1)
++			rtdm_printk(KERN_WARNING "%s: via_rhine_restart_tx() " /*** RTnet ***/
++				   "Another error occured %8.8x.\n",
++				   dev->name, intr_status);
++	}
++
++}
++
++static void via_rhine_error(struct rtnet_device *dev, int intr_status) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++
++	rtdm_lock_get(&np->lock); /*** RTnet ***/
++
++	if (intr_status & (IntrLinkChange)) {
++		if (readb(ioaddr + MIIStatus) & 0x02) {
++			/* Link failed, restart autonegotiation. */
++			if (np->drv_flags & HasDavicomPhy)
++				mdio_write(dev, np->phys[0], MII_BMCR, 0x3300);
++		} else
++			via_rhine_check_duplex(dev);
++		if (local_debug)
++			rtdm_printk(KERN_ERR "%s: MII status changed: Autonegotiation " /*** RTnet ***/
++				   "advertising %4.4x  partner %4.4x.\n", dev->name,
++			   mdio_read(dev, np->phys[0], MII_ADVERTISE),
++			   mdio_read(dev, np->phys[0], MII_LPA));
++	}
++	if (intr_status & IntrStatsMax) {
++		np->stats.rx_crc_errors	+= readw(ioaddr + RxCRCErrs);
++		np->stats.rx_missed_errors	+= readw(ioaddr + RxMissed);
++		clear_tally_counters(ioaddr);
++	}
++	if (intr_status & IntrTxAborted) {
++		if (local_debug > 1)
++			rtdm_printk(KERN_INFO "%s: Abort %8.8x, frame dropped.\n", /*** RTnet ***/
++				   dev->name, intr_status);
++	}
++	if (intr_status & IntrTxUnderrun) {
++		if (np->tx_thresh < 0xE0)
++			writeb(np->tx_thresh += 0x20, ioaddr + TxConfig);
++		if (local_debug > 1)
++			rtdm_printk(KERN_INFO "%s: Transmitter underrun, Tx " /*** RTnet ***/
++				   "threshold now %2.2x.\n",
++				   dev->name, np->tx_thresh);
++	}
++	if (intr_status & IntrTxDescRace) {
++		if (local_debug > 2)
++			rtdm_printk(KERN_INFO "%s: Tx descriptor write-back race.\n", /*** RTnet ***/
++				   dev->name);
++	}
++	if ((intr_status & IntrTxError) && ~( IntrTxAborted | IntrTxUnderrun |
++		IntrTxDescRace )) {
++		if (np->tx_thresh < 0xE0) {
++			writeb(np->tx_thresh += 0x20, ioaddr + TxConfig);
++		}
++		if (local_debug > 1)
++			rtdm_printk(KERN_INFO "%s: Unspecified error. Tx " /*** RTnet ***/
++				"threshold now %2.2x.\n",
++				dev->name, np->tx_thresh);
++	}
++	if (intr_status & ( IntrTxAborted | IntrTxUnderrun | IntrTxDescRace |
++		IntrTxError ))
++		via_rhine_restart_tx(dev);
++
++	if (intr_status & ~( IntrLinkChange | IntrStatsMax | IntrTxUnderrun |
++						 IntrTxError | IntrTxAborted | IntrNormalSummary |
++						 IntrTxDescRace )) {
++		if (local_debug > 1)
++			rtdm_printk(KERN_ERR "%s: Something Wicked happened! %8.8x.\n", /*** RTnet ***/
++				   dev->name, intr_status);
++	}
++
++	rtdm_lock_put(&np->lock); /*** RTnet ***/
++}
++
++static struct net_device_stats *via_rhine_get_stats(struct rtnet_device *rtdev)
++{
++	struct netdev_private *np = rtdev->priv;
++	long ioaddr = rtdev->base_addr;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&np->lock, context);
++	np->stats.rx_crc_errors	+= readw(ioaddr + RxCRCErrs);
++	np->stats.rx_missed_errors	+= readw(ioaddr + RxMissed);
++	clear_tally_counters((void *)ioaddr);
++	rtdm_lock_put_irqrestore(&np->lock, context);
++
++	return &np->stats;
++}
++
++static void via_rhine_set_rx_mode(struct rtnet_device *dev) /*** RTnet ***/
++{
++	struct netdev_private *np = dev->priv;
++	void *ioaddr = (void *)dev->base_addr;
++	u32 mc_filter[2];			/* Multicast hash filter */
++	u8 rx_mode;					/* Note: 0x02=accept runt, 0x01=accept errs */
++
++	if (dev->flags & IFF_PROMISC) {			/* Set promiscuous. */
++		/* Unconditionally log net taps. */
++		printk(KERN_NOTICE "%s: Promiscuous mode enabled.\n", dev->name);
++		rx_mode = 0x1C;
++		writel(0xffffffff, (void *)ioaddr + MulticastFilter0);
++		writel(0xffffffff, (void *)ioaddr + MulticastFilter1);
++	} else if (dev->flags & IFF_ALLMULTI) {
++		/* Too many to match, or accept all multicasts. */
++		writel(0xffffffff, (void *)ioaddr + MulticastFilter0);
++		writel(0xffffffff, (void *)ioaddr + MulticastFilter1);
++		rx_mode = 0x0C;
++	} else {
++		memset(mc_filter, 0, sizeof(mc_filter));
++		writel(mc_filter[0], (void *)ioaddr + MulticastFilter0);
++		writel(mc_filter[1], (void *)ioaddr + MulticastFilter1);
++		rx_mode = 0x0C;
++	}
++	writeb(np->rx_thresh | rx_mode, (void *)ioaddr + RxConfig);
++}
++
++/*** RTnet ***/
++/*** RTnet ***/
++
++static int via_rhine_close(struct rtnet_device *dev) /*** RTnet ***/
++{
++	long ioaddr = dev->base_addr;
++	struct netdev_private *np = dev->priv;
++	int i; /*** RTnet ***/
++	rtdm_lockctx_t context;
++
++/*** RTnet ***
++	del_timer_sync(&np->timer);
++ *** RTnet ***/
++
++	rtdm_lock_get_irqsave(&np->lock, context); /*** RTnet ***/
++
++	rtnetif_stop_queue(dev); /*** RTnet ***/
++
++	if (local_debug > 1)
++		rtdm_printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n", /*** RTnet ***/
++			   dev->name, readw((void *)ioaddr + ChipCmd));
++
++	/* Switch to loopback mode to avoid hardware races. */
++	writeb(np->tx_thresh | 0x02, (void *)ioaddr + TxConfig);
++
++	/* Disable interrupts by clearing the interrupt mask. */
++	writew(0x0000, (void *)ioaddr + IntrEnable);
++
++	/* Stop the chip's Tx and Rx processes. */
++	writew(CmdStop, (void *)ioaddr + ChipCmd);
++
++	rtdm_lock_put_irqrestore(&np->lock, context); /*** RTnet ***/
++
++/*** RTnet ***/
++	if ( (i=rtdm_irq_free(&np->irq_handle))<0 )
++		return i;
++
++	rt_stack_disconnect(dev);
++/*** RTnet ***/
++
++	free_rbufs(dev);
++	free_tbufs(dev);
++	free_ring(dev);
++
++	return 0;
++}
++
++
++static void via_rhine_remove_one (struct pci_dev *pdev)
++{
++ /*** RTnet ***/
++	struct rtnet_device *dev = pci_get_drvdata(pdev);
++
++	rt_unregister_rtnetdev(dev);
++	rt_rtdev_disconnect(dev);
++/*** RTnet ***/
++
++	pci_release_regions(pdev);
++
++#ifdef USE_MEM
++	iounmap((char *)(dev->base_addr));
++#endif
++
++	rtdev_free(dev); /*** RTnet ***/
++	pci_disable_device(pdev);
++	pci_set_drvdata(pdev, NULL);
++}
++
++
++static struct pci_driver via_rhine_driver = {
++	.name		= DRV_NAME,
++	.id_table	= via_rhine_pci_tbl,
++	.probe		= via_rhine_init_one,
++	.remove		= via_rhine_remove_one,
++};
++
++
++static int __init via_rhine_init (void)
++{
++/* when a module, this is printed whether or not devices are found in probe */
++#ifdef MODULE
++	printk(version);
++#endif
++	return pci_register_driver (&via_rhine_driver);
++}
++
++
++static void __exit via_rhine_cleanup (void)
++{
++	pci_unregister_driver (&via_rhine_driver);
++}
++
++
++module_init(via_rhine_init);
++module_exit(via_rhine_cleanup);
++
++
++/*
++ * Local variables:
++ *  compile-command: "gcc -DMODULE -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -c via-rhine.c `[ -f /usr/include/linux/modversions.h ] && echo -DMODVERSIONS`"
++ *  c-indent-level: 4
++ *  c-basic-offset: 4
++ *  tab-width: 4
++ * End:
++ */
+--- linux/drivers/xenomai/net/drivers/mpc8xx_enet.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc8xx_enet.c	2021-04-07 16:01:27.269634106 +0800
+@@ -0,0 +1,1073 @@
++/*
++ * BK Id: SCCS/s.enet.c 1.24 01/19/02 03:07:14 dan
++ */
++/*
++ * Ethernet driver for Motorola MPC8xx.
++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
++ *
++ * I copied the basic skeleton from the lance driver, because I did not
++ * know how to write the Linux driver, but I did know how the LANCE worked.
++ *
++ * This version of the driver is somewhat selectable for the different
++ * processor/board combinations.  It works for the boards I know about
++ * now, and should be easily modified to include others.  Some of the
++ * configuration information is contained in <asm/commproc.h> and the
++ * remainder is here.
++ *
++ * Buffer descriptors are kept in the CPM dual port RAM, and the frame
++ * buffers are in the host memory.
++ *
++ * Right now, I am very watseful with the buffers.  I allocate memory
++ * pages and then divide them into 2K frame buffers.  This way I know I
++ * have buffers large enough to hold one frame within one buffer descriptor.
++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which
++ * will be much more memory efficient and will easily handle lots of
++ * small packets.
++ *
++ * Ported to RTnet.
++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de)
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/ptrace.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <linux/uaccess.h>
++
++#include <asm/8xx_immap.h>
++#include <asm/pgtable.h>
++#include <asm/mpc8xx.h>
++#include <asm/bitops.h>
++#include <asm/irq.h>
++#include <asm/commproc.h>
++
++#include <rtnet_port.h>
++
++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTnet MPC8xx SCC Ethernet driver");
++MODULE_LICENSE("GPL");
++
++static unsigned int rx_pool_size =  0;
++MODULE_PARM(rx_pool_size, "i");
++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size");
++
++static unsigned int rtnet_scc = 1; /* SCC1 */
++MODULE_PARM(rtnet_scc, "i");
++MODULE_PARM_DESC(rtnet_scc, "SCCx port for RTnet, x=1..3 (default=1)");
++
++#define RT_DEBUG(fmt,args...)
++
++/*
++ *				Theory of Operation
++ *
++ * The MPC8xx CPM performs the Ethernet processing on SCC1.  It can use
++ * an aribtrary number of buffers on byte boundaries, but must have at
++ * least two receive buffers to prevent constant overrun conditions.
++ *
++ * The buffer descriptors are allocated from the CPM dual port memory
++ * with the data buffers allocated from host memory, just like all other
++ * serial communication protocols.  The host memory buffers are allocated
++ * from the free page pool, and then divided into smaller receive and
++ * transmit buffers.  The size of the buffers should be a power of two,
++ * since that nicely divides the page.  This creates a ring buffer
++ * structure similar to the LANCE and other controllers.
++ *
++ * Like the LANCE driver:
++ * The driver runs as two independent, single-threaded flows of control.  One
++ * is the send-packet routine, which enforces single-threaded use by the
++ * cep->tx_busy flag.  The other thread is the interrupt handler, which is
++ * single threaded by the hardware and other software.
++ *
++ * The send packet thread has partial control over the Tx ring and the
++ * 'cep->tx_busy' flag.  It sets the tx_busy flag whenever it's queuing a Tx
++ * packet. If the next queue slot is empty, it clears the tx_busy flag when
++ * finished otherwise it sets the 'lp->tx_full' flag.
++ *
++ * The MBX has a control register external to the MPC8xx that has some
++ * control of the Ethernet interface.  Information is in the manual for
++ * your board.
++ *
++ * The RPX boards have an external control/status register.  Consult the
++ * programming documents for details unique to your board.
++ *
++ * For the TQM8xx(L) modules, there is no control register interface.
++ * All functions are directly controlled using I/O pins.  See <asm/commproc.h>.
++ */
++
++/* The transmitter timeout
++ */
++#define TX_TIMEOUT	(2*HZ)
++
++/* The number of Tx and Rx buffers.  These are allocated from the page
++ * pool.  The code may assume these are power of two, so it is best
++ * to keep them that size.
++ * We don't need to allocate pages for the transmitter.  We just use
++ * the skbuffer directly.
++ */
++#define CPM_ENET_RX_PAGES	4
++#define CPM_ENET_RX_FRSIZE	2048
++#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
++#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
++#define TX_RING_SIZE		8	/* Must be power of two */
++#define TX_RING_MOD_MASK	7	/*   for this to work */
++
++/* The CPM stores dest/src/type, data, and checksum for receive packets.
++ */
++#define PKT_MAXBUF_SIZE		1518
++#define PKT_MINBUF_SIZE		64
++#define PKT_MAXBLR_SIZE		1520
++
++/* The CPM buffer descriptors track the ring buffers.  The rx_bd_base and
++ * tx_bd_base always point to the base of the buffer descriptors.  The
++ * cur_rx and cur_tx point to the currently available buffer.
++ * The dirty_tx tracks the current buffer that is being sent by the
++ * controller.  The cur_tx and dirty_tx are equal under both completely
++ * empty and completely full conditions.  The empty/ready indicator in
++ * the buffer descriptor determines the actual condition.
++ */
++struct scc_enet_private {
++	/* The addresses of a Tx/Rx-in-place packets/buffers. */
++	struct rtskb *tx_skbuff[TX_RING_SIZE];
++	ushort	skb_cur;
++	ushort	skb_dirty;
++
++	/* CPM dual port RAM relative addresses.
++	*/
++	cbd_t	*rx_bd_base;		/* Address of Rx and Tx buffers. */
++	cbd_t	*tx_bd_base;
++	cbd_t	*cur_rx, *cur_tx;		/* The next free ring entry */
++	cbd_t	*dirty_tx;	/* The ring entries to be free()ed. */
++	scc_t	*sccp;
++
++	/* Virtual addresses for the receive buffers because we can't
++	 * do a __va() on them anymore.
++	 */
++	unsigned char *rx_vaddr[RX_RING_SIZE];
++	struct	net_device_stats stats;
++	uint	tx_full;
++	rtdm_lock_t lock;
++	rtdm_irq_t irq_handle;
++};
++
++static int scc_enet_open(struct rtnet_device *rtdev);
++static int scc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static int scc_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp);
++static int scc_enet_interrupt(rtdm_irq_t *irq_handle);
++static int scc_enet_close(struct rtnet_device *rtdev);
++
++static struct net_device_stats *scc_enet_get_stats(struct rtnet_device *rtdev);
++#ifdef ORIGINAL_VERSION
++static void set_multicast_list(struct net_device *dev);
++#endif
++
++#ifndef ORIGINAL_VERSION
++static struct rtnet_device *rtdev_root = NULL;
++#endif
++
++/* Typically, 860(T) boards use SCC1 for Ethernet, and other 8xx boards
++ * use SCC2. Some even may use SCC3.
++ * This is easily extended if necessary.
++ * These values are set when the driver is initialized.
++ */
++static int CPM_CR_ENET;
++static int PROFF_ENET;
++static int SCC_ENET;
++static int CPMVEC_ENET;
++
++static int
++scc_enet_open(struct rtnet_device *rtdev)
++{
++	/* I should reset the ring buffers here, but I don't yet know
++	 * a simple way to do that.
++	 */
++	rtnetif_start_queue(rtdev);
++
++	return 0;					/* Always succeed */
++}
++
++static int
++scc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv;
++	volatile cbd_t	*bdp;
++	rtdm_lockctx_t context;
++
++
++	RT_DEBUG(__FUNCTION__": ...\n");
++
++	/* Fill in a Tx ring entry */
++	bdp = cep->cur_tx;
++
++#ifndef final_version
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		/* Ooops.  All transmit buffers are full.  Bail out.
++		 * This should not happen, since cep->tx_busy should be set.
++		 */
++		rtdm_printk("%s: tx queue full!.\n", rtdev->name);
++		return 1;
++	}
++#endif
++
++	/* Clear all of the status flags.
++	 */
++	bdp->cbd_sc &= ~BD_ENET_TX_STATS;
++
++	/* If the frame is short, tell CPM to pad it.
++	*/
++	if (skb->len <= ETH_ZLEN)
++		bdp->cbd_sc |= BD_ENET_TX_PAD;
++	else
++		bdp->cbd_sc &= ~BD_ENET_TX_PAD;
++
++	/* Set buffer length and buffer pointer.
++	*/
++	bdp->cbd_datlen = skb->len;
++	bdp->cbd_bufaddr = __pa(skb->data);
++
++	/* Save skb pointer.
++	*/
++	cep->tx_skbuff[cep->skb_cur] = skb;
++
++	cep->stats.tx_bytes += skb->len;
++	cep->skb_cur = (cep->skb_cur+1) & TX_RING_MOD_MASK;
++
++	/* Prevent interrupts from changing the Tx ring from underneath us. */
++	// *** RTnet ***
++	rtdm_lock_get_irqsave(&cep->lock, context);
++
++	/* Get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	/* Push the data cache so the CPM does not get stale memory
++	 * data.
++	 */
++	flush_dcache_range((unsigned long)(skb->data),
++			   (unsigned long)(skb->data + skb->len));
++
++
++	/* Send it on its way.  Tell CPM its ready, interrupt when done,
++	 * its the last BD of the frame, and to put the CRC on the end.
++	 */
++	bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR | BD_ENET_TX_LAST | BD_ENET_TX_TC);
++
++	/* If this was the last BD in the ring, start at the beginning again.
++	*/
++	if (bdp->cbd_sc & BD_ENET_TX_WRAP)
++		bdp = cep->tx_bd_base;
++	else
++		bdp++;
++
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		rtnetif_stop_queue(rtdev);
++		cep->tx_full = 1;
++	}
++
++	cep->cur_tx = (cbd_t *)bdp;
++
++	// *** RTnet ***
++	rtdm_lock_put_irqrestore(&cep->lock, context);
++
++	return 0;
++}
++
++#ifdef ORIGINAL_VERSION
++static void
++scc_enet_timeout(struct net_device *dev)
++{
++	struct scc_enet_private *cep = (struct scc_enet_private *)dev->priv;
++
++	printk("%s: transmit timed out.\n", dev->name);
++	cep->stats.tx_errors++;
++#ifndef final_version
++	{
++		int	i;
++		cbd_t	*bdp;
++		printk(" Ring data dump: cur_tx %p%s cur_rx %p.\n",
++		       cep->cur_tx, cep->tx_full ? " (full)" : "",
++		       cep->cur_rx);
++		bdp = cep->tx_bd_base;
++		for (i = 0 ; i < TX_RING_SIZE; i++, bdp++)
++			printk("%04x %04x %08x\n",
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++		bdp = cep->rx_bd_base;
++		for (i = 0 ; i < RX_RING_SIZE; i++, bdp++)
++			printk("%04x %04x %08x\n",
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++	}
++#endif
++	if (!cep->tx_full)
++		netif_wake_queue(dev);
++}
++#endif /* ORIGINAL_VERSION */
++
++/* The interrupt handler.
++ * This is called from the CPM handler, not the MPC core interrupt.
++ */
++static int scc_enet_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	struct	scc_enet_private *cep;
++	volatile cbd_t	*bdp;
++	ushort	int_events;
++	int	must_restart;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++
++	cep = (struct scc_enet_private *)rtdev->priv;
++
++	/* Get the interrupt events that caused us to be here.
++	*/
++	int_events = cep->sccp->scc_scce;
++	cep->sccp->scc_scce = int_events;
++	must_restart = 0;
++
++	/* Handle receive event in its own function.
++	*/
++	if (int_events & SCCE_ENET_RXF) {
++		scc_enet_rx(rtdev, &packets, &time_stamp);
++	}
++
++	/* Check for a transmit error.  The manual is a little unclear
++	 * about this, so the debug code until I get it figured out.  It
++	 * appears that if TXE is set, then TXB is not set.  However,
++	 * if carrier sense is lost during frame transmission, the TXE
++	 * bit is set, "and continues the buffer transmission normally."
++	 * I don't know if "normally" implies TXB is set when the buffer
++	 * descriptor is closed.....trial and error :-).
++	 */
++
++	/* Transmit OK, or non-fatal error.  Update the buffer descriptors.
++	*/
++	if (int_events & (SCCE_ENET_TXE | SCCE_ENET_TXB)) {
++	    rtdm_lock_get(&cep->lock);
++	    bdp = cep->dirty_tx;
++	    while ((bdp->cbd_sc&BD_ENET_TX_READY)==0) {
++		RT_DEBUG(__FUNCTION__": Tx ok\n");
++		if ((bdp==cep->cur_tx) && (cep->tx_full == 0))
++		    break;
++
++		if (bdp->cbd_sc & BD_ENET_TX_HB)	/* No heartbeat */
++			cep->stats.tx_heartbeat_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_LC)	/* Late collision */
++			cep->stats.tx_window_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_RL)	/* Retrans limit */
++			cep->stats.tx_aborted_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_UN)	/* Underrun */
++			cep->stats.tx_fifo_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_CSL)	/* Carrier lost */
++			cep->stats.tx_carrier_errors++;
++
++
++		/* No heartbeat or Lost carrier are not really bad errors.
++		 * The others require a restart transmit command.
++		 */
++		if (bdp->cbd_sc &
++		    (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) {
++			must_restart = 1;
++			cep->stats.tx_errors++;
++		}
++
++		cep->stats.tx_packets++;
++
++		/* Deferred means some collisions occurred during transmit,
++		 * but we eventually sent the packet OK.
++		 */
++		if (bdp->cbd_sc & BD_ENET_TX_DEF)
++			cep->stats.collisions++;
++
++		/* Free the sk buffer associated with this last transmit.
++		*/
++		dev_kfree_rtskb(cep->tx_skbuff[cep->skb_dirty]);
++		cep->skb_dirty = (cep->skb_dirty + 1) & TX_RING_MOD_MASK;
++
++		/* Update pointer to next buffer descriptor to be transmitted.
++		*/
++		if (bdp->cbd_sc & BD_ENET_TX_WRAP)
++			bdp = cep->tx_bd_base;
++		else
++			bdp++;
++
++		/* I don't know if we can be held off from processing these
++		 * interrupts for more than one frame time.  I really hope
++		 * not.  In such a case, we would now want to check the
++		 * currently available BD (cur_tx) and determine if any
++		 * buffers between the dirty_tx and cur_tx have also been
++		 * sent.  We would want to process anything in between that
++		 * does not have BD_ENET_TX_READY set.
++		 */
++
++		/* Since we have freed up a buffer, the ring is no longer
++		 * full.
++		 */
++		if (cep->tx_full) {
++			cep->tx_full = 0;
++			if (rtnetif_queue_stopped(rtdev))
++				rtnetif_wake_queue(rtdev);
++		}
++
++		cep->dirty_tx = (cbd_t *)bdp;
++	    }
++
++	    if (must_restart) {
++		volatile cpm8xx_t *cp;
++
++		/* Some transmit errors cause the transmitter to shut
++		 * down.  We now issue a restart transmit.  Since the
++		 * errors close the BD and update the pointers, the restart
++		 * _should_ pick up without having to reset any of our
++		 * pointers either.
++		 */
++		cp = cpmp;
++		cp->cp_cpcr =
++		    mk_cr_cmd(CPM_CR_ENET, CPM_CR_RESTART_TX) | CPM_CR_FLG;
++		while (cp->cp_cpcr & CPM_CR_FLG);
++	    }
++	    rtdm_lock_put(&cep->lock);
++	}
++
++	/* Check for receive busy, i.e. packets coming but no place to
++	 * put them.  This "can't happen" because the receive interrupt
++	 * is tossing previous frames.
++	 */
++	if (int_events & SCCE_ENET_BSY) {
++		cep->stats.rx_dropped++;
++		rtdm_printk("CPM ENET: BSY can't happen.\n");
++	}
++
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++	return RTDM_IRQ_HANDLED;
++}
++
++/* During a receive, the cur_rx points to the current incoming buffer.
++ * When we update through the ring, if the next incoming buffer has
++ * not been given to the system, we just set the empty indicator,
++ * effectively tossing the packet.
++ */
++static int
++scc_enet_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp)
++{
++	struct	scc_enet_private *cep;
++	volatile cbd_t	*bdp;
++	ushort	pkt_len;
++	struct	rtskb *skb;
++
++	RT_DEBUG(__FUNCTION__": ...\n");
++
++	cep = (struct scc_enet_private *)rtdev->priv;
++
++	/* First, grab all of the stats for the incoming packet.
++	 * These get messed up if we get called due to a busy condition.
++	 */
++	bdp = cep->cur_rx;
++
++    for (;;) {
++
++	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
++		break;
++
++#ifndef final_version
++	/* Since we have allocated space to hold a complete frame, both
++	 * the first and last indicators should be set.
++	 */
++	if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) !=
++		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
++			rtdm_printk("CPM ENET: rcv is not first+last\n");
++#endif
++
++	/* Frame too long or too short.
++	*/
++	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
++		cep->stats.rx_length_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
++		cep->stats.rx_frame_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
++		cep->stats.rx_crc_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
++		cep->stats.rx_crc_errors++;
++
++	/* Report late collisions as a frame error.
++	 * On this error, the BD is closed, but we don't know what we
++	 * have in the buffer.  So, just drop this frame on the floor.
++	 */
++	if (bdp->cbd_sc & BD_ENET_RX_CL) {
++		cep->stats.rx_frame_errors++;
++	}
++	else {
++
++		/* Process the incoming frame.
++		*/
++		cep->stats.rx_packets++;
++		pkt_len = bdp->cbd_datlen;
++		cep->stats.rx_bytes += pkt_len;
++
++		/* This does 16 byte alignment, much more than we need.
++		 * The packet length includes FCS, but we don't want to
++		 * include that when passing upstream as it messes up
++		 * bridging applications.
++		 */
++		skb = rtnetdev_alloc_rtskb(rtdev, pkt_len-4);
++		if (skb == NULL) {
++			rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name);
++			cep->stats.rx_dropped++;
++		}
++		else {
++			rtskb_put(skb,pkt_len-4); /* Make room */
++			memcpy(skb->data,
++			       cep->rx_vaddr[bdp - cep->rx_bd_base],
++			       pkt_len-4);
++			skb->protocol=rt_eth_type_trans(skb,rtdev);
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++			(*packets)++;
++		}
++	}
++
++	/* Clear the status flags for this buffer.
++	*/
++	bdp->cbd_sc &= ~BD_ENET_RX_STATS;
++
++	/* Mark the buffer empty.
++	*/
++	bdp->cbd_sc |= BD_ENET_RX_EMPTY;
++
++	/* Update BD pointer to next entry.
++	*/
++	if (bdp->cbd_sc & BD_ENET_RX_WRAP)
++		bdp = cep->rx_bd_base;
++	else
++		bdp++;
++
++    }
++	cep->cur_rx = (cbd_t *)bdp;
++
++	return 0;
++}
++
++static int
++scc_enet_close(struct rtnet_device *rtdev)
++{
++	/* Don't know what to do yet.
++	*/
++	rtnetif_stop_queue(rtdev);
++
++	return 0;
++}
++
++static struct net_device_stats *scc_enet_get_stats(struct rtnet_device *rtdev)
++{
++	struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv;
++
++	return &cep->stats;
++}
++
++#ifdef ORIGINAL_VERSION
++/* Set or clear the multicast filter for this adaptor.
++ * Skeleton taken from sunlance driver.
++ * The CPM Ethernet implementation allows Multicast as well as individual
++ * MAC address filtering.  Some of the drivers check to make sure it is
++ * a group multicast address, and discard those that are not.  I guess I
++ * will do the same for now, but just remove the test if you want
++ * individual filtering as well (do the upper net layers want or support
++ * this kind of feature?).
++ */
++
++static void set_multicast_list(struct net_device *dev)
++{
++	struct	scc_enet_private *cep;
++	struct	dev_mc_list *dmi;
++	u_char	*mcptr, *tdptr;
++	volatile scc_enet_t *ep;
++	int	i, j;
++	cep = (struct scc_enet_private *)dev->priv;
++
++	/* Get pointer to SCC area in parameter RAM.
++	*/
++	ep = (scc_enet_t *)dev->base_addr;
++
++	if (dev->flags&IFF_PROMISC) {
++
++		/* Log any net taps. */
++		printk("%s: Promiscuous mode enabled.\n", dev->name);
++		cep->sccp->scc_pmsr |= SCC_PMSR_PRO;
++	} else {
++
++		cep->sccp->scc_pmsr &= ~SCC_PMSR_PRO;
++
++		if (dev->flags & IFF_ALLMULTI) {
++			/* Catch all multicast addresses, so set the
++			 * filter to all 1's.
++			 */
++			ep->sen_gaddr1 = 0xffff;
++			ep->sen_gaddr2 = 0xffff;
++			ep->sen_gaddr3 = 0xffff;
++			ep->sen_gaddr4 = 0xffff;
++		}
++		else {
++			/* Clear filter and add the addresses in the list.
++			*/
++			ep->sen_gaddr1 = 0;
++			ep->sen_gaddr2 = 0;
++			ep->sen_gaddr3 = 0;
++			ep->sen_gaddr4 = 0;
++
++			dmi = dev->mc_list;
++
++			for (i=0; i<dev->mc_count; i++) {
++
++				/* Only support group multicast for now.
++				*/
++				if (!(dmi->dmi_addr[0] & 1))
++					continue;
++
++				/* The address in dmi_addr is LSB first,
++				 * and taddr is MSB first.  We have to
++				 * copy bytes MSB first from dmi_addr.
++				 */
++				mcptr = (u_char *)dmi->dmi_addr + 5;
++				tdptr = (u_char *)&ep->sen_taddrh;
++				for (j=0; j<6; j++)
++					*tdptr++ = *mcptr--;
++
++				/* Ask CPM to run CRC and set bit in
++				 * filter mask.
++				 */
++				cpmp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_SET_GADDR) | CPM_CR_FLG;
++				/* this delay is necessary here -- Cort */
++				udelay(10);
++				while (cpmp->cp_cpcr & CPM_CR_FLG);
++			}
++		}
++	}
++}
++#endif /* ORIGINAL_VERSION */
++
++/* Initialize the CPM Ethernet on SCC.  If EPPC-Bug loaded us, or performed
++ * some other network I/O, a whole bunch of this has already been set up.
++ * It is no big deal if we do it again, we just have to disable the
++ * transmit and receive to make sure we don't catch the CPM with some
++ * inconsistent control information.
++ */
++int __init scc_enet_init(void)
++{
++	struct rtnet_device *rtdev = NULL;
++	struct scc_enet_private *cep;
++	int i, j, k;
++	unsigned char	*eap, *ba;
++	dma_addr_t	mem_addr;
++	bd_t		*bd;
++	volatile	cbd_t		*bdp;
++	volatile	cpm8xx_t	*cp;
++	volatile	scc_t		*sccp;
++	volatile	scc_enet_t	*ep;
++	volatile	immap_t		*immap;
++
++	cp = cpmp;	/* Get pointer to Communication Processor */
++
++	immap = (immap_t *)(mfspr(IMMR) & 0xFFFF0000);	/* and to internal registers */
++
++	bd = (bd_t *)__res;
++
++	/* Configure the SCC parameters (this has formerly be done
++	 * by macro definitions).
++	 */
++	switch (rtnet_scc) {
++	case 3:
++		CPM_CR_ENET = CPM_CR_CH_SCC3;
++		PROFF_ENET  = PROFF_SCC3;
++		SCC_ENET    = 2;		/* Index, not number! */
++		CPMVEC_ENET = CPMVEC_SCC3;
++		break;
++	case 2:
++		CPM_CR_ENET = CPM_CR_CH_SCC2;
++		PROFF_ENET  = PROFF_SCC2;
++		SCC_ENET    = 1;		/* Index, not number! */
++		CPMVEC_ENET = CPMVEC_SCC2;
++		break;
++	case 1:
++		CPM_CR_ENET = CPM_CR_CH_SCC1;
++		PROFF_ENET  = PROFF_SCC1;
++		SCC_ENET    = 0;		/* Index, not number! */
++		CPMVEC_ENET = CPMVEC_SCC1;
++		break;
++	default:
++		printk(KERN_ERR "enet: SCC%d doesn't exit (check rtnet_scc)\n", rtnet_scc);
++		return -1;
++	}
++
++	/* Allocate some private information and create an Ethernet device instance.
++	*/
++	if (!rx_pool_size)
++		rx_pool_size = RX_RING_SIZE * 2;
++	rtdev = rtdev_root = rt_alloc_etherdev(sizeof(struct scc_enet_private),
++					rx_pool_size + TX_RING_SIZE);
++	if (rtdev == NULL) {
++		printk(KERN_ERR "enet: Could not allocate ethernet device.\n");
++		return -1;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++
++	cep = (struct scc_enet_private *)rtdev->priv;
++	rtdm_lock_init(&cep->lock);
++
++	/* Get pointer to SCC area in parameter RAM.
++	*/
++	ep = (scc_enet_t *)(&cp->cp_dparam[PROFF_ENET]);
++
++	/* And another to the SCC register area.
++	*/
++	sccp = (volatile scc_t *)(&cp->cp_scc[SCC_ENET]);
++	cep->sccp = (scc_t *)sccp;		/* Keep the pointer handy */
++
++	/* Disable receive and transmit in case EPPC-Bug started it.
++	*/
++	sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT);
++
++	/* Cookbook style from the MPC860 manual.....
++	 * Not all of this is necessary if EPPC-Bug has initialized
++	 * the network.
++	 * So far we are lucky, all board configurations use the same
++	 * pins, or at least the same I/O Port for these functions.....
++	 * It can't last though......
++	 */
++
++#if (defined(PA_ENET_RXD) && defined(PA_ENET_TXD))
++	/* Configure port A pins for Txd and Rxd.
++	*/
++	immap->im_ioport.iop_papar |=  (PA_ENET_RXD | PA_ENET_TXD);
++	immap->im_ioport.iop_padir &= ~(PA_ENET_RXD | PA_ENET_TXD);
++	immap->im_ioport.iop_paodr &=                ~PA_ENET_TXD;
++#elif (defined(PB_ENET_RXD) && defined(PB_ENET_TXD))
++	/* Configure port B pins for Txd and Rxd.
++	*/
++	immap->im_cpm.cp_pbpar |=  (PB_ENET_RXD | PB_ENET_TXD);
++	immap->im_cpm.cp_pbdir &= ~(PB_ENET_RXD | PB_ENET_TXD);
++	immap->im_cpm.cp_pbodr &=		 ~PB_ENET_TXD;
++#else
++#error Exactly ONE pair of PA_ENET_[RT]XD, PB_ENET_[RT]XD must be defined
++#endif
++
++#if defined(PC_ENET_LBK)
++	/* Configure port C pins to disable External Loopback
++	 */
++	immap->im_ioport.iop_pcpar &= ~PC_ENET_LBK;
++	immap->im_ioport.iop_pcdir |=  PC_ENET_LBK;
++	immap->im_ioport.iop_pcso  &= ~PC_ENET_LBK;
++	immap->im_ioport.iop_pcdat &= ~PC_ENET_LBK;	/* Disable Loopback */
++#endif	/* PC_ENET_LBK */
++
++	/* Configure port C pins to enable CLSN and RENA.
++	*/
++	immap->im_ioport.iop_pcpar &= ~(PC_ENET_CLSN | PC_ENET_RENA);
++	immap->im_ioport.iop_pcdir &= ~(PC_ENET_CLSN | PC_ENET_RENA);
++	immap->im_ioport.iop_pcso  |=  (PC_ENET_CLSN | PC_ENET_RENA);
++
++	/* Configure port A for TCLK and RCLK.
++	*/
++	immap->im_ioport.iop_papar |=  (PA_ENET_TCLK | PA_ENET_RCLK);
++	immap->im_ioport.iop_padir &= ~(PA_ENET_TCLK | PA_ENET_RCLK);
++
++	/* Configure Serial Interface clock routing.
++	 * First, clear all SCC bits to zero, then set the ones we want.
++	 */
++	cp->cp_sicr &= ~SICR_ENET_MASK;
++	cp->cp_sicr |=  SICR_ENET_CLKRT;
++
++	/* Manual says set SDDR, but I can't find anything with that
++	 * name.  I think it is a misprint, and should be SDCR.  This
++	 * has already been set by the communication processor initialization.
++	 */
++
++	/* Allocate space for the buffer descriptors in the DP ram.
++	 * These are relative offsets in the DP ram address space.
++	 * Initialize base addresses for the buffer descriptors.
++	 */
++	i = m8xx_cpm_dpalloc(sizeof(cbd_t) * RX_RING_SIZE);
++	ep->sen_genscc.scc_rbase = i;
++	cep->rx_bd_base = (cbd_t *)&cp->cp_dpmem[i];
++
++	i = m8xx_cpm_dpalloc(sizeof(cbd_t) * TX_RING_SIZE);
++	ep->sen_genscc.scc_tbase = i;
++	cep->tx_bd_base = (cbd_t *)&cp->cp_dpmem[i];
++
++	cep->dirty_tx = cep->cur_tx = cep->tx_bd_base;
++	cep->cur_rx = cep->rx_bd_base;
++
++	/* Issue init Rx BD command for SCC.
++	 * Manual says to perform an Init Rx parameters here.  We have
++	 * to perform both Rx and Tx because the SCC may have been
++	 * already running.
++	 * In addition, we have to do it later because we don't yet have
++	 * all of the BD control/status set properly.
++	cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_RX) | CPM_CR_FLG;
++	while (cp->cp_cpcr & CPM_CR_FLG);
++	 */
++
++	/* Initialize function code registers for big-endian.
++	*/
++	ep->sen_genscc.scc_rfcr = SCC_EB;
++	ep->sen_genscc.scc_tfcr = SCC_EB;
++
++	/* Set maximum bytes per receive buffer.
++	 * This appears to be an Ethernet frame size, not the buffer
++	 * fragment size.  It must be a multiple of four.
++	 */
++	ep->sen_genscc.scc_mrblr = PKT_MAXBLR_SIZE;
++
++	/* Set CRC preset and mask.
++	*/
++	ep->sen_cpres = 0xffffffff;
++	ep->sen_cmask = 0xdebb20e3;
++
++	ep->sen_crcec = 0;	/* CRC Error counter */
++	ep->sen_alec = 0;	/* alignment error counter */
++	ep->sen_disfc = 0;	/* discard frame counter */
++
++	ep->sen_pads = 0x8888;	/* Tx short frame pad character */
++	ep->sen_retlim = 15;	/* Retry limit threshold */
++
++	ep->sen_maxflr = PKT_MAXBUF_SIZE;   /* maximum frame length register */
++	ep->sen_minflr = PKT_MINBUF_SIZE;  /* minimum frame length register */
++
++	ep->sen_maxd1 = PKT_MAXBLR_SIZE;	/* maximum DMA1 length */
++	ep->sen_maxd2 = PKT_MAXBLR_SIZE;	/* maximum DMA2 length */
++
++	/* Clear hash tables.
++	*/
++	ep->sen_gaddr1 = 0;
++	ep->sen_gaddr2 = 0;
++	ep->sen_gaddr3 = 0;
++	ep->sen_gaddr4 = 0;
++	ep->sen_iaddr1 = 0;
++	ep->sen_iaddr2 = 0;
++	ep->sen_iaddr3 = 0;
++	ep->sen_iaddr4 = 0;
++
++	/* Set Ethernet station address.
++	 */
++	eap = (unsigned char *)&(ep->sen_paddrh);
++#ifdef CONFIG_FEC_ENET
++	/* We need a second MAC address if FEC is used by Linux */
++	for (i=5; i>=0; i--)
++		*eap++ = rtdev->dev_addr[i] = (bd->bi_enetaddr[i] |
++					     (i==3 ? 0x80 : 0));
++#else
++	for (i=5; i>=0; i--)
++		*eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i];
++#endif
++
++	ep->sen_pper = 0;	/* 'cause the book says so */
++	ep->sen_taddrl = 0;	/* temp address (LSB) */
++	ep->sen_taddrm = 0;
++	ep->sen_taddrh = 0;	/* temp address (MSB) */
++
++	/* Now allocate the host memory pages and initialize the
++	 * buffer descriptors.
++	 */
++	bdp = cep->tx_bd_base;
++	for (i=0; i<TX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		bdp->cbd_sc = 0;
++		bdp->cbd_bufaddr = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	bdp = cep->rx_bd_base;
++	k = 0;
++	for (i=0; i<CPM_ENET_RX_PAGES; i++) {
++
++		/* Allocate a page.
++		*/
++		ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
++			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
++			bdp->cbd_bufaddr = mem_addr;
++			cep->rx_vaddr[k++] = ba;
++			mem_addr += CPM_ENET_RX_FRSIZE;
++			ba += CPM_ENET_RX_FRSIZE;
++			bdp++;
++		}
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* Let's re-initialize the channel now.  We have to do it later
++	 * than the manual describes because we have just now finished
++	 * the BD initialization.
++	 */
++	cp->cp_cpcr = mk_cr_cmd(CPM_CR_ENET, CPM_CR_INIT_TRX) | CPM_CR_FLG;
++	while (cp->cp_cpcr & CPM_CR_FLG);
++
++	cep->skb_cur = cep->skb_dirty = 0;
++
++	sccp->scc_scce = 0xffff;	/* Clear any pending events */
++
++	/* Enable interrupts for transmit error, complete frame
++	 * received, and any transmit buffer we have also set the
++	 * interrupt flag.
++	 */
++	sccp->scc_sccm = (SCCE_ENET_TXE | SCCE_ENET_RXF | SCCE_ENET_TXB);
++
++	/* Install our interrupt handler.
++	*/
++	rtdev->irq = CPM_IRQ_OFFSET + CPMVEC_ENET;
++	rt_stack_connect(rtdev, &STACK_manager);
++	if ((i = rtdm_irq_request(&cep->irq_handle, rtdev->irq,
++				  scc_enet_interrupt, 0, "rt_mpc8xx_enet", rtdev))) {
++		printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq);
++		rtdev_free(rtdev);
++		return i;
++	}
++
++
++	/* Set GSMR_H to enable all normal operating modes.
++	 * Set GSMR_L to enable Ethernet to MC68160.
++	 */
++	sccp->scc_gsmrh = 0;
++	sccp->scc_gsmrl = (SCC_GSMRL_TCI | SCC_GSMRL_TPL_48 | SCC_GSMRL_TPP_10 | SCC_GSMRL_MODE_ENET);
++
++	/* Set sync/delimiters.
++	*/
++	sccp->scc_dsr = 0xd555;
++
++	/* Set processing mode.  Use Ethernet CRC, catch broadcast, and
++	 * start frame search 22 bit times after RENA.
++	 */
++	sccp->scc_pmsr = (SCC_PMSR_ENCRC | SCC_PMSR_NIB22);
++
++	/* It is now OK to enable the Ethernet transmitter.
++	 * Unfortunately, there are board implementation differences here.
++	 */
++#if   (!defined (PB_ENET_TENA) &&  defined (PC_ENET_TENA))
++	immap->im_ioport.iop_pcpar |=  PC_ENET_TENA;
++	immap->im_ioport.iop_pcdir &= ~PC_ENET_TENA;
++#elif ( defined (PB_ENET_TENA) && !defined (PC_ENET_TENA))
++	cp->cp_pbpar |= PB_ENET_TENA;
++	cp->cp_pbdir |= PB_ENET_TENA;
++#else
++#error Configuration Error: define exactly ONE of PB_ENET_TENA, PC_ENET_TENA
++#endif
++
++#if defined(CONFIG_RPXLITE) || defined(CONFIG_RPXCLASSIC)
++	/* And while we are here, set the configuration to enable ethernet.
++	*/
++	*((volatile uint *)RPX_CSR_ADDR) &= ~BCSR0_ETHLPBK;
++	*((volatile uint *)RPX_CSR_ADDR) |=
++			(BCSR0_ETHEN | BCSR0_COLTESTDIS | BCSR0_FULLDPLXDIS);
++#endif
++
++#ifdef CONFIG_BSEIP
++	/* BSE uses port B and C for PHY control.
++	*/
++	cp->cp_pbpar &= ~(PB_BSE_POWERUP | PB_BSE_FDXDIS);
++	cp->cp_pbdir |= (PB_BSE_POWERUP | PB_BSE_FDXDIS);
++	cp->cp_pbdat |= (PB_BSE_POWERUP | PB_BSE_FDXDIS);
++
++	immap->im_ioport.iop_pcpar &= ~PC_BSE_LOOPBACK;
++	immap->im_ioport.iop_pcdir |= PC_BSE_LOOPBACK;
++	immap->im_ioport.iop_pcso &= ~PC_BSE_LOOPBACK;
++	immap->im_ioport.iop_pcdat &= ~PC_BSE_LOOPBACK;
++#endif
++
++#ifdef CONFIG_FADS
++	cp->cp_pbpar |= PB_ENET_TENA;
++	cp->cp_pbdir |= PB_ENET_TENA;
++
++	/* Enable the EEST PHY.
++	*/
++	*((volatile uint *)BCSR1) &= ~BCSR1_ETHEN;
++#endif
++
++	rtdev->base_addr = (unsigned long)ep;
++
++	/* The CPM Ethernet specific entries in the device structure. */
++	rtdev->open = scc_enet_open;
++	rtdev->hard_start_xmit = scc_enet_start_xmit;
++	rtdev->stop = scc_enet_close;
++	rtdev->hard_header = &rt_eth_header;
++	rtdev->get_stats = scc_enet_get_stats;
++
++	if (!rx_pool_size)
++		rx_pool_size = RX_RING_SIZE * 2;
++
++	if ((i = rt_register_rtnetdev(rtdev))) {
++		printk(KERN_ERR "Couldn't register rtdev\n");
++		rtdm_irq_disable(&cep->irq_handle);
++		rtdm_irq_free(&cep->irq_handle);
++		rtdev_free(rtdev);
++		return i;
++	}
++
++	/* And last, enable the transmit and receive processing.
++	*/
++	sccp->scc_gsmrl |= (SCC_GSMRL_ENR | SCC_GSMRL_ENT);
++
++	printk("%s: CPM ENET Version 0.2 on SCC%d, irq %d, addr %02x:%02x:%02x:%02x:%02x:%02x\n",
++	       rtdev->name, SCC_ENET+1, rtdev->irq,
++	       rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2],
++	       rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]);
++
++	return 0;
++}
++
++static void __exit scc_enet_cleanup(void)
++{
++	struct rtnet_device *rtdev = rtdev_root;
++	struct scc_enet_private *cep = (struct scc_enet_private *)rtdev->priv;
++	volatile cpm8xx_t *cp = cpmp;
++	volatile scc_enet_t *ep;
++
++	if (rtdev) {
++		rtdm_irq_disable(&cep->irq_handle);
++		rtdm_irq_free(&cep->irq_handle);
++
++		ep = (scc_enet_t *)(&cp->cp_dparam[PROFF_ENET]);
++		m8xx_cpm_dpfree(ep->sen_genscc.scc_rbase);
++		m8xx_cpm_dpfree(ep->sen_genscc.scc_tbase);
++
++		rt_stack_disconnect(rtdev);
++		rt_unregister_rtnetdev(rtdev);
++		rt_rtdev_disconnect(rtdev);
++
++		printk("%s: unloaded\n", rtdev->name);
++		rtdev_free(rtdev);
++		rtdev_root = NULL;
++	}
++}
++
++module_init(scc_enet_init);
++module_exit(scc_enet_cleanup);
+--- linux/drivers/xenomai/net/drivers/mpc8xx_fec.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc8xx_fec.c	2021-04-07 16:01:27.264634113 +0800
+@@ -0,0 +1,2341 @@
++/*
++ * BK Id: SCCS/s.fec.c 1.30 09/11/02 14:55:08 paulus
++ */
++/*
++ * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx.
++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
++ *
++ * This version of the driver is specific to the FADS implementation,
++ * since the board contains control registers external to the processor
++ * for the control of the LevelOne LXT970 transceiver.  The MPC860T manual
++ * describes connections using the internal parallel port I/O, which
++ * is basically all of Port D.
++ *
++ * Includes support for the following PHYs: QS6612, LXT970, LXT971/2.
++ *
++ * Right now, I am very wasteful with the buffers.  I allocate memory
++ * pages and then divide them into 2K frame buffers.  This way I know I
++ * have buffers large enough to hold one frame within one buffer descriptor.
++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which
++ * will be much more memory efficient and will easily handle lots of
++ * small packets.
++ *
++ * Much better multiple PHY support by Magnus Damm.
++ * Copyright (c) 2000 Ericsson Radio Systems AB.
++ *
++ * Make use of MII for PHY control configurable.
++ * Some fixes.
++ * Copyright (c) 2000-2002 Wolfgang Denk, DENX Software Engineering.
++ *
++ * Fixes for tx_full condition and relink when using MII.
++ * Support for AMD AM79C874 added.
++ * Thomas Lange, thomas@corelatus.com
++ *
++ * Added code for Multicast support, Frederic Goddeeris, Paul Geerinckx
++ * Copyright (c) 2002 Siemens Atea
++ *
++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/8xx_io/fec.c".
++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de)
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/ptrace.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/uaccess.h>
++
++#include <asm/8xx_immap.h>
++#include <asm/pgtable.h>
++#include <asm/mpc8xx.h>
++#include <asm/irq.h>
++#include <asm/bitops.h>
++#include <asm/commproc.h>
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++#error "MDIO for PHY configuration is not yet supported!"
++#endif
++
++#include <rtnet_port.h>
++
++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTnet driver for the MPC8xx FEC Ethernet");
++MODULE_LICENSE("GPL");
++
++static unsigned int rx_pool_size =  0;
++MODULE_PARM(rx_pool_size, "i");
++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size");
++
++#define RT_DEBUG(fmt,args...)
++
++/* multicast support
++ */
++/* #define DEBUG_MULTICAST */
++
++/* CRC polynomium used by the FEC for the multicast group filtering
++ */
++#define FEC_CRC_POLY   0x04C11DB7
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++/* Forward declarations of some structures to support different PHYs
++*/
++
++typedef struct {
++	uint mii_data;
++	void (*funct)(uint mii_reg, struct net_device *dev, uint data);
++} phy_cmd_t;
++
++typedef struct {
++	uint id;
++	char *name;
++
++	const phy_cmd_t *config;
++	const phy_cmd_t *startup;
++	const phy_cmd_t *ack_int;
++	const phy_cmd_t *shutdown;
++} phy_info_t;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++/* The number of Tx and Rx buffers.  These are allocated from the page
++ * pool.  The code may assume these are power of two, so it is best
++ * to keep them that size.
++ * We don't need to allocate pages for the transmitter.  We just use
++ * the skbuffer directly.
++ */
++#define FEC_ENET_RX_PAGES	4
++#define FEC_ENET_RX_FRSIZE	2048
++#define FEC_ENET_RX_FRPPG	(PAGE_SIZE / FEC_ENET_RX_FRSIZE)
++#define RX_RING_SIZE		(FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
++#define TX_RING_SIZE		8	/* Must be power of two */
++#define TX_RING_MOD_MASK	7	/*   for this to work */
++
++/* Interrupt events/masks.
++*/
++#define FEC_ENET_HBERR	((uint)0x80000000)	/* Heartbeat error */
++#define FEC_ENET_BABR	((uint)0x40000000)	/* Babbling receiver */
++#define FEC_ENET_BABT	((uint)0x20000000)	/* Babbling transmitter */
++#define FEC_ENET_GRA	((uint)0x10000000)	/* Graceful stop complete */
++#define FEC_ENET_TXF	((uint)0x08000000)	/* Full frame transmitted */
++#define FEC_ENET_TXB	((uint)0x04000000)	/* A buffer was transmitted */
++#define FEC_ENET_RXF	((uint)0x02000000)	/* Full frame received */
++#define FEC_ENET_RXB	((uint)0x01000000)	/* A buffer was received */
++#define FEC_ENET_MII	((uint)0x00800000)	/* MII interrupt */
++#define FEC_ENET_EBERR	((uint)0x00400000)	/* SDMA bus error */
++
++/*
++*/
++#define FEC_ECNTRL_PINMUX	0x00000004
++#define FEC_ECNTRL_ETHER_EN	0x00000002
++#define FEC_ECNTRL_RESET	0x00000001
++
++#define FEC_RCNTRL_BC_REJ	0x00000010
++#define FEC_RCNTRL_PROM		0x00000008
++#define FEC_RCNTRL_MII_MODE	0x00000004
++#define FEC_RCNTRL_DRT		0x00000002
++#define FEC_RCNTRL_LOOP		0x00000001
++
++#define FEC_TCNTRL_FDEN		0x00000004
++#define FEC_TCNTRL_HBC		0x00000002
++#define FEC_TCNTRL_GTS		0x00000001
++
++/* Delay to wait for FEC reset command to complete (in us)
++*/
++#define FEC_RESET_DELAY		50
++
++/* The FEC stores dest/src/type, data, and checksum for receive packets.
++ */
++#define PKT_MAXBUF_SIZE		1518
++#define PKT_MINBUF_SIZE		64
++#define PKT_MAXBLR_SIZE		1520
++
++/* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
++ * tx_bd_base always point to the base of the buffer descriptors.  The
++ * cur_rx and cur_tx point to the currently available buffer.
++ * The dirty_tx tracks the current buffer that is being sent by the
++ * controller.  The cur_tx and dirty_tx are equal under both completely
++ * empty and completely full conditions.  The empty/ready indicator in
++ * the buffer descriptor determines the actual condition.
++ */
++struct fec_enet_private {
++	/* The addresses of a Tx/Rx-in-place packets/buffers. */
++	struct	rtskb *tx_skbuff[TX_RING_SIZE];
++	ushort	skb_cur;
++	ushort	skb_dirty;
++
++	/* CPM dual port RAM relative addresses.
++	*/
++	cbd_t	*rx_bd_base;		/* Address of Rx and Tx buffers. */
++	cbd_t	*tx_bd_base;
++	cbd_t	*cur_rx, *cur_tx;		/* The next free ring entry */
++	cbd_t	*dirty_tx;	/* The ring entries to be free()ed. */
++
++	/* Virtual addresses for the receive buffers because we can't
++	 * do a __va() on them anymore.
++	 */
++	unsigned char *rx_vaddr[RX_RING_SIZE];
++
++	struct	net_device_stats stats;
++	uint	tx_full;
++	rtdm_lock_t lock;
++	rtdm_irq_t irq_handle;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	uint	phy_id;
++	uint	phy_id_done;
++	uint	phy_status;
++	uint	phy_speed;
++	phy_info_t	*phy;
++	struct tq_struct phy_task;
++
++	uint	sequence_done;
++
++	uint	phy_addr;
++
++	struct timer_list phy_timer_list;
++	u16 old_status;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	int	link;
++	int	old_link;
++	int	full_duplex;
++
++};
++
++static int  fec_enet_open(struct rtnet_device *rtev);
++static int  fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static void fec_enet_tx(struct rtnet_device *rtdev);
++static void fec_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp);
++static int fec_enet_interrupt(rtdm_irq_t *irq_handle);
++static int  fec_enet_close(struct rtnet_device *dev);
++static void fec_restart(struct rtnet_device *rtdev, int duplex);
++static void fec_stop(struct rtnet_device *rtdev);
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static void fec_enet_mii(struct net_device *dev);
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++static struct net_device_stats *fec_enet_get_stats(struct rtnet_device *rtdev);
++#ifdef ORIGINAL_VERSION
++static void set_multicast_list(struct net_device *dev);
++#endif /* ORIGINAL_VERSION */
++
++static struct rtnet_device *rtdev_root = NULL; /* for cleanup */
++
++static	ushort	my_enet_addr[3];
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
++static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr);
++
++static void mdio_callback(uint regval, struct net_device *dev, uint data);
++static int mdio_read(struct net_device *dev, int phy_id, int location);
++
++#if defined(CONFIG_FEC_DP83846A)
++static void mdio_timer_callback(unsigned long data);
++#endif /* CONFIG_FEC_DP83846A */
++
++/* MII processing.  We keep this as simple as possible.  Requests are
++ * placed on the list (if there is room).  When the request is finished
++ * by the MII, an optional function may be called.
++ */
++typedef struct mii_list {
++	uint	mii_regval;
++	void	(*mii_func)(uint val, struct net_device *dev, uint data);
++	struct	mii_list *mii_next;
++	uint	mii_data;
++} mii_list_t;
++
++#define		NMII	20
++mii_list_t	mii_cmds[NMII];
++mii_list_t	*mii_free;
++mii_list_t	*mii_head;
++mii_list_t	*mii_tail;
++
++typedef struct mdio_read_data {
++	u16 regval;
++	struct task_struct *sleeping_task;
++} mdio_read_data_t;
++
++static int	mii_queue(struct net_device *dev, int request,
++				void (*func)(uint, struct net_device *, uint), uint data);
++static void mii_queue_relink(uint mii_reg, struct net_device *dev, uint data);
++
++/* Make MII read/write commands for the FEC.
++*/
++#define mk_mii_read(REG)	(0x60020000 | ((REG & 0x1f) << 18))
++#define mk_mii_write(REG, VAL)	(0x50020000 | ((REG & 0x1f) << 18) | \
++						(VAL & 0xffff))
++#define mk_mii_end	0
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++/* Transmitter timeout.
++*/
++#define TX_TIMEOUT (2*HZ)
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++/* Register definitions for the PHY.
++*/
++
++#define MII_REG_CR          0  /* Control Register                         */
++#define MII_REG_SR          1  /* Status Register                          */
++#define MII_REG_PHYIR1      2  /* PHY Identification Register 1            */
++#define MII_REG_PHYIR2      3  /* PHY Identification Register 2            */
++#define MII_REG_ANAR        4  /* A-N Advertisement Register               */
++#define MII_REG_ANLPAR      5  /* A-N Link Partner Ability Register        */
++#define MII_REG_ANER        6  /* A-N Expansion Register                   */
++#define MII_REG_ANNPTR      7  /* A-N Next Page Transmit Register          */
++#define MII_REG_ANLPRNPR    8  /* A-N Link Partner Received Next Page Reg. */
++
++/* values for phy_status */
++
++#define PHY_CONF_ANE	0x0001  /* 1 auto-negotiation enabled */
++#define PHY_CONF_LOOP	0x0002  /* 1 loopback mode enabled */
++#define PHY_CONF_SPMASK	0x00f0  /* mask for speed */
++#define PHY_CONF_10HDX	0x0010  /* 10 Mbit half duplex supported */
++#define PHY_CONF_10FDX	0x0020  /* 10 Mbit full duplex supported */
++#define PHY_CONF_100HDX	0x0040  /* 100 Mbit half duplex supported */
++#define PHY_CONF_100FDX	0x0080  /* 100 Mbit full duplex supported */
++
++#define PHY_STAT_LINK	0x0100  /* 1 up - 0 down */
++#define PHY_STAT_FAULT	0x0200  /* 1 remote fault */
++#define PHY_STAT_ANC	0x0400  /* 1 auto-negotiation complete	*/
++#define PHY_STAT_SPMASK	0xf000  /* mask for speed */
++#define PHY_STAT_10HDX	0x1000  /* 10 Mbit half duplex selected	*/
++#define PHY_STAT_10FDX	0x2000  /* 10 Mbit full duplex selected	*/
++#define PHY_STAT_100HDX	0x4000  /* 100 Mbit half duplex selected */
++#define PHY_STAT_100FDX	0x8000  /* 100 Mbit full duplex selected */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++
++static int
++fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct fec_enet_private *fep;
++	volatile fec_t	*fecp;
++	volatile cbd_t	*bdp;
++	rtdm_lockctx_t	context;
++
++
++	RT_DEBUG(__FUNCTION__": ...\n");
++
++	fep = rtdev->priv;
++	fecp = (volatile fec_t*)rtdev->base_addr;
++
++	if (!fep->link) {
++		/* Link is down or autonegotiation is in progress. */
++		return 1;
++	}
++
++	/* Fill in a Tx ring entry */
++	bdp = fep->cur_tx;
++
++#ifndef final_version
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		/* Ooops.  All transmit buffers are full.  Bail out.
++		 * This should not happen, since dev->tbusy should be set.
++		 */
++		rtdm_printk("%s: tx queue full!.\n", rtdev->name);
++		return 1;
++	}
++#endif
++
++	/* Clear all of the status flags.
++	 */
++	bdp->cbd_sc &= ~BD_ENET_TX_STATS;
++
++	/* Set buffer length and buffer pointer.
++	*/
++	bdp->cbd_bufaddr = __pa(skb->data);
++	bdp->cbd_datlen = skb->len;
++
++	/* Save skb pointer.
++	*/
++	fep->tx_skbuff[fep->skb_cur] = skb;
++
++	fep->stats.tx_bytes += skb->len;
++	fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK;
++
++	rtdm_lock_get_irqsave(&fep->lock, context);
++
++	/* Get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	/* Push the data cache so the CPM does not get stale memory
++	 * data.
++	 */
++	flush_dcache_range((unsigned long)skb->data,
++			   (unsigned long)skb->data + skb->len);
++
++	/* Send it on its way.  Tell FEC its ready, interrupt when done,
++	 * its the last BD of the frame, and to put the CRC on the end.
++	 */
++
++	bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
++			| BD_ENET_TX_LAST | BD_ENET_TX_TC);
++
++	//rtdev->trans_start = jiffies;
++
++	/* Trigger transmission start */
++	fecp->fec_x_des_active = 0x01000000;
++
++	/* If this was the last BD in the ring, start at the beginning again.
++	*/
++	if (bdp->cbd_sc & BD_ENET_TX_WRAP) {
++		bdp = fep->tx_bd_base;
++	} else {
++		bdp++;
++	}
++
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		rtnetif_stop_queue(rtdev);
++		fep->tx_full = 1;
++	}
++
++	fep->cur_tx = (cbd_t *)bdp;
++
++	rtdm_lock_put_irqrestore(&fep->lock, context);
++
++	return 0;
++}
++
++#ifdef ORIGINAL_VERSION
++static void
++fec_timeout(struct net_device *dev)
++{
++	struct fec_enet_private *fep = rtdev->priv;
++
++	if (fep->link || fep->old_link) {
++		/* Link status changed - print timeout message */
++		printk("%s: transmit timed out.\n", rtdev->name);
++	}
++
++	fep->stats.tx_errors++;
++#ifndef final_version
++	if (fep->link) {
++		int	i;
++		cbd_t	*bdp;
++
++		printk ("Ring data dump: "
++			"cur_tx %p%s dirty_tx %p cur_rx %p\n",
++		       fep->cur_tx,
++		       fep->tx_full ? " (full)" : "",
++		       fep->dirty_tx,
++		       fep->cur_rx);
++
++		bdp = fep->tx_bd_base;
++		printk(" tx: %u buffers\n",  TX_RING_SIZE);
++		for (i = 0 ; i < TX_RING_SIZE; i++) {
++			printk("  %08x: %04x %04x %08x\n",
++			       (uint) bdp,
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++			bdp++;
++		}
++
++		bdp = fep->rx_bd_base;
++		printk(" rx: %lu buffers\n",  RX_RING_SIZE);
++		for (i = 0 ; i < RX_RING_SIZE; i++) {
++			printk("  %08x: %04x %04x %08x\n",
++			       (uint) bdp,
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++			bdp++;
++		}
++	}
++#endif
++	if (!fep->tx_full) {
++		netif_wake_queue(dev);
++	}
++}
++#endif /* ORIGINAL_VERSION */
++
++/* The interrupt handler.
++ * This is called from the MPC core interrupt.
++ */
++static int fec_enet_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	volatile fec_t	*fecp;
++	uint	int_events;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++
++	fecp = (volatile fec_t*)rtdev->base_addr;
++
++	/* Get the interrupt events that caused us to be here.
++	*/
++	while ((int_events = fecp->fec_ievent) != 0) {
++		fecp->fec_ievent = int_events;
++		if ((int_events & (FEC_ENET_HBERR | FEC_ENET_BABR |
++				   FEC_ENET_BABT | FEC_ENET_EBERR)) != 0) {
++			rtdm_printk("FEC ERROR %x\n", int_events);
++		}
++
++		/* Handle receive event in its own function.
++		 */
++		if (int_events & FEC_ENET_RXF) {
++			fec_enet_rx(rtdev, &packets, &time_stamp);
++		}
++
++		/* Transmit OK, or non-fatal error. Update the buffer
++		   descriptors. FEC handles all errors, we just discover
++		   them as part of the transmit process.
++		*/
++		if (int_events & FEC_ENET_TXF) {
++			fec_enet_tx(rtdev);
++		}
++
++		if (int_events & FEC_ENET_MII) {
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++			fec_enet_mii(dev);
++#else
++		rtdm_printk("%s[%d] %s: unexpected FEC_ENET_MII event\n",
++			__FILE__,__LINE__,__FUNCTION__);
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++		}
++
++	}
++
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++	return RTDM_IRQ_HANDLED;
++}
++
++
++static void
++fec_enet_tx(struct rtnet_device *rtdev)
++{
++	struct rtskb *skb;
++	struct	fec_enet_private *fep = rtdev->priv;
++	volatile cbd_t	*bdp;
++	rtdm_lock_get(&fep->lock);
++	bdp = fep->dirty_tx;
++
++	while ((bdp->cbd_sc&BD_ENET_TX_READY) == 0) {
++		if (bdp == fep->cur_tx && fep->tx_full == 0) break;
++
++		skb = fep->tx_skbuff[fep->skb_dirty];
++		/* Check for errors. */
++		if (bdp->cbd_sc & (BD_ENET_TX_HB | BD_ENET_TX_LC |
++				   BD_ENET_TX_RL | BD_ENET_TX_UN |
++				   BD_ENET_TX_CSL)) {
++			fep->stats.tx_errors++;
++			if (bdp->cbd_sc & BD_ENET_TX_HB)  /* No heartbeat */
++				fep->stats.tx_heartbeat_errors++;
++			if (bdp->cbd_sc & BD_ENET_TX_LC)  /* Late collision */
++				fep->stats.tx_window_errors++;
++			if (bdp->cbd_sc & BD_ENET_TX_RL)  /* Retrans limit */
++				fep->stats.tx_aborted_errors++;
++			if (bdp->cbd_sc & BD_ENET_TX_UN)  /* Underrun */
++				fep->stats.tx_fifo_errors++;
++			if (bdp->cbd_sc & BD_ENET_TX_CSL) /* Carrier lost */
++				fep->stats.tx_carrier_errors++;
++		} else {
++			fep->stats.tx_packets++;
++		}
++
++#ifndef final_version
++		if (bdp->cbd_sc & BD_ENET_TX_READY)
++			rtdm_printk("HEY! Enet xmit interrupt and TX_READY.\n");
++#endif
++		/* Deferred means some collisions occurred during transmit,
++		 * but we eventually sent the packet OK.
++		 */
++		if (bdp->cbd_sc & BD_ENET_TX_DEF)
++			fep->stats.collisions++;
++
++		/* Free the sk buffer associated with this last transmit.
++		 */
++		dev_kfree_rtskb(skb);
++		fep->tx_skbuff[fep->skb_dirty] = NULL;
++		fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK;
++
++		/* Update pointer to next buffer descriptor to be transmitted.
++		 */
++		if (bdp->cbd_sc & BD_ENET_TX_WRAP)
++			bdp = fep->tx_bd_base;
++		else
++			bdp++;
++
++		/* Since we have freed up a buffer, the ring is no longer
++		 * full.
++		 */
++		if (fep->tx_full) {
++			fep->tx_full = 0;
++			if (rtnetif_queue_stopped(rtdev))
++				rtnetif_wake_queue(rtdev);
++		}
++	}
++	fep->dirty_tx = (cbd_t *)bdp;
++	rtdm_lock_put(&fep->lock);
++}
++
++
++/* During a receive, the cur_rx points to the current incoming buffer.
++ * When we update through the ring, if the next incoming buffer has
++ * not been given to the system, we just set the empty indicator,
++ * effectively tossing the packet.
++ */
++static void
++fec_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp)
++{
++	struct	fec_enet_private *fep;
++	volatile fec_t	*fecp;
++	volatile cbd_t *bdp;
++	struct	rtskb *skb;
++	ushort	pkt_len;
++	__u8 *data;
++
++	fep = rtdev->priv;
++	fecp = (volatile fec_t*)rtdev->base_addr;
++
++	/* First, grab all of the stats for the incoming packet.
++	 * These get messed up if we get called due to a busy condition.
++	 */
++	bdp = fep->cur_rx;
++
++while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) {
++
++#ifndef final_version
++	/* Since we have allocated space to hold a complete frame,
++	 * the last indicator should be set.
++	 */
++	if ((bdp->cbd_sc & BD_ENET_RX_LAST) == 0)
++		rtdm_printk("FEC ENET: rcv is not +last\n");
++#endif
++
++	/* Check for errors. */
++	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
++			   BD_ENET_RX_CR | BD_ENET_RX_OV)) {
++		fep->stats.rx_errors++;
++		if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) {
++		/* Frame too long or too short. */
++			fep->stats.rx_length_errors++;
++		}
++		if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
++			fep->stats.rx_frame_errors++;
++		if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
++			fep->stats.rx_crc_errors++;
++		if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
++			fep->stats.rx_crc_errors++;
++	}
++
++	/* Report late collisions as a frame error.
++	 * On this error, the BD is closed, but we don't know what we
++	 * have in the buffer.  So, just drop this frame on the floor.
++	 */
++	if (bdp->cbd_sc & BD_ENET_RX_CL) {
++		fep->stats.rx_errors++;
++		fep->stats.rx_frame_errors++;
++		goto rx_processing_done;
++	}
++
++	/* Process the incoming frame.
++	 */
++	fep->stats.rx_packets++;
++	pkt_len = bdp->cbd_datlen;
++	fep->stats.rx_bytes += pkt_len;
++	data = fep->rx_vaddr[bdp - fep->rx_bd_base];
++
++	/* This does 16 byte alignment, exactly what we need.
++	 * The packet length includes FCS, but we don't want to
++	 * include that when passing upstream as it messes up
++	 * bridging applications.
++	 */
++	skb = rtnetdev_alloc_rtskb(rtdev, pkt_len-4);
++
++	if (skb == NULL) {
++		rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name);
++		fep->stats.rx_dropped++;
++	} else {
++		rtskb_put(skb,pkt_len-4); /* Make room */
++		memcpy(skb->data, data, pkt_len-4);
++		skb->protocol=rt_eth_type_trans(skb,rtdev);
++		skb->time_stamp = *time_stamp;
++		rtnetif_rx(skb);
++		(*packets)++;
++	}
++rx_processing_done:
++
++	/* Clear the status flags for this buffer.
++	*/
++	bdp->cbd_sc &= ~BD_ENET_RX_STATS;
++
++	/* Mark the buffer empty.
++	*/
++	bdp->cbd_sc |= BD_ENET_RX_EMPTY;
++
++	/* Update BD pointer to next entry.
++	*/
++	if (bdp->cbd_sc & BD_ENET_RX_WRAP)
++		bdp = fep->rx_bd_base;
++	else
++		bdp++;
++
++	/* Doing this here will keep the FEC running while we process
++	 * incoming frames.  On a heavily loaded network, we should be
++	 * able to keep up at the expense of system resources.
++	 */
++	fecp->fec_r_des_active = 0x01000000;
++   } /* while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) */
++	fep->cur_rx = (cbd_t *)bdp;
++
++}
++
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static void
++fec_enet_mii(struct net_device *dev)
++{
++	struct	fec_enet_private *fep;
++	volatile fec_t	*ep;
++	mii_list_t	*mip;
++	uint		mii_reg;
++
++	fep = (struct fec_enet_private *)dev->priv;
++	ep = &(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec);
++	mii_reg = ep->fec_mii_data;
++
++	if ((mip = mii_head) == NULL) {
++		printk("MII and no head!\n");
++		return;
++	}
++
++	if (mip->mii_func != NULL)
++		(*(mip->mii_func))(mii_reg, dev, mip->mii_data);
++
++	mii_head = mip->mii_next;
++	mip->mii_next = mii_free;
++	mii_free = mip;
++
++	if ((mip = mii_head) != NULL) {
++		ep->fec_mii_data = mip->mii_regval;
++	}
++}
++
++static int
++mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_device *, uint), uint data)
++{
++	struct fec_enet_private *fep;
++	unsigned long	flags;
++	mii_list_t	*mip;
++	int		retval;
++
++	/* Add PHY address to register command.
++	*/
++	fep = dev->priv;
++	regval |= fep->phy_addr << 23;
++
++	retval = 0;
++
++	save_flags(flags);
++	cli();
++
++	if ((mip = mii_free) != NULL) {
++		mii_free = mip->mii_next;
++		mip->mii_regval = regval;
++		mip->mii_func = func;
++		mip->mii_next = NULL;
++		mip->mii_data = data;
++		if (mii_head) {
++			mii_tail->mii_next = mip;
++			mii_tail = mip;
++		} else {
++			mii_head = mii_tail = mip;
++			(&(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec))->fec_mii_data = regval;
++		}
++	} else {
++		retval = 1;
++	}
++
++	restore_flags(flags);
++
++	return(retval);
++}
++
++static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c)
++{
++	int k;
++
++	if(!c)
++		return;
++
++	for(k = 0; (c+k)->mii_data != mk_mii_end; k++)
++		mii_queue(dev, (c+k)->mii_data, (c+k)->funct, 0);
++}
++
++static void mii_parse_sr(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC);
++
++	if (mii_reg & 0x0004)
++		s |= PHY_STAT_LINK;
++	if (mii_reg & 0x0010)
++		s |= PHY_STAT_FAULT;
++	if (mii_reg & 0x0020)
++		s |= PHY_STAT_ANC;
++
++	fep->phy_status = s;
++	fep->link = (s & PHY_STAT_LINK) ? 1 : 0;
++}
++
++static void mii_parse_cr(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP);
++
++	if (mii_reg & 0x1000)
++		s |= PHY_CONF_ANE;
++	if (mii_reg & 0x4000)
++		s |= PHY_CONF_LOOP;
++
++	fep->phy_status = s;
++}
++
++static void mii_parse_anar(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_CONF_SPMASK);
++
++	if (mii_reg & 0x0020)
++		s |= PHY_CONF_10HDX;
++	if (mii_reg & 0x0040)
++		s |= PHY_CONF_10FDX;
++	if (mii_reg & 0x0080)
++		s |= PHY_CONF_100HDX;
++	if (mii_reg & 0x0100)
++		s |= PHY_CONF_100FDX;
++
++	fep->phy_status = s;
++}
++
++/* ------------------------------------------------------------------------- */
++/* The Level one LXT970 is used by many boards				     */
++
++#ifdef CONFIG_FEC_LXT970
++
++#define MII_LXT970_MIRROR    16  /* Mirror register           */
++#define MII_LXT970_IER       17  /* Interrupt Enable Register */
++#define MII_LXT970_ISR       18  /* Interrupt Status Register */
++#define MII_LXT970_CONFIG    19  /* Configuration Register    */
++#define MII_LXT970_CSR       20  /* Chip Status Register      */
++
++static void mii_parse_lxt970_csr(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x0800) {
++		if (mii_reg & 0x1000)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++	else {
++		if (mii_reg & 0x1000)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_lxt970 = {
++	0x07810000,
++	"LXT970",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_LXT970_IER, 0x0002), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* read SR and ISR to acknowledge */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_LXT970_ISR), NULL },
++
++		/* find out the current status */
++
++		{ mk_mii_read(MII_LXT970_CSR), mii_parse_lxt970_csr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_LXT970_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_LXT970 */
++
++/* ------------------------------------------------------------------------- */
++/* The Level one LXT971 is used on some of my custom boards                  */
++
++#ifdef CONFIG_FEC_LXT971
++
++/* register definitions for the 971 */
++
++#define MII_LXT971_PCR       16  /* Port Control Register     */
++#define MII_LXT971_SR2       17  /* Status Register 2         */
++#define MII_LXT971_IER       18  /* Interrupt Enable Register */
++#define MII_LXT971_ISR       19  /* Interrupt Status Register */
++#define MII_LXT971_LCR       20  /* LED Control Register      */
++#define MII_LXT971_TCR       30  /* Transmit Control Register */
++
++/*
++ * I had some nice ideas of running the MDIO faster...
++ * The 971 should support 8MHz and I tried it, but things acted really
++ * weird, so 2.5 MHz ought to be enough for anyone...
++ */
++
++static void mii_parse_lxt971_sr2(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x4000) {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++	else {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	if (mii_reg & 0x0008)
++		s |= PHY_STAT_FAULT;
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_lxt971 = {
++	0x0001378e,
++	"LXT971",
++
++	(const phy_cmd_t []) {  /* config */
++//		{ mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10  Mbps, HD */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x00f2), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++
++		/* Somehow does the 971 tell me that the link is down
++		 * the first read after power-up.
++		 * read here to get a valid value in ack_int */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* find out the current status */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 },
++
++		/* we only need to read ISR to acknowledge */
++
++		{ mk_mii_read(MII_LXT971_ISR), NULL },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_LXT971 */
++
++
++/* ------------------------------------------------------------------------- */
++/* The Quality Semiconductor QS6612 is used on the RPX CLLF                  */
++
++#ifdef CONFIG_FEC_QS6612
++
++/* register definitions */
++
++#define MII_QS6612_MCR       17  /* Mode Control Register      */
++#define MII_QS6612_FTR       27  /* Factory Test Register      */
++#define MII_QS6612_MCO       28  /* Misc. Control Register     */
++#define MII_QS6612_ISR       29  /* Interrupt Source Register  */
++#define MII_QS6612_IMR       30  /* Interrupt Mask Register    */
++#define MII_QS6612_PCR       31  /* 100BaseTx PHY Control Reg. */
++
++static void mii_parse_qs6612_pcr(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	switch((mii_reg >> 2) & 7) {
++	case 1: s |= PHY_STAT_10HDX;  break;
++	case 2: s |= PHY_STAT_100HDX; break;
++	case 5: s |= PHY_STAT_10FDX;  break;
++	case 6: s |= PHY_STAT_100FDX; break;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_qs6612 = {
++	0x00181440,
++	"QS6612",
++
++	(const phy_cmd_t []) {  /* config */
++//	{ mk_mii_write(MII_REG_ANAR, 0x061), NULL }, /* 10  Mbps */
++
++		/* The PHY powers up isolated on the RPX,
++		 * so send a command to allow operation.
++		 */
++
++		{ mk_mii_write(MII_QS6612_PCR, 0x0dc0), NULL },
++
++		/* parse cr and anar to get some info */
++
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_QS6612_IMR, 0x003a), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++
++		/* we need to read ISR, SR and ANER to acknowledge */
++
++		{ mk_mii_read(MII_QS6612_ISR), NULL },
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_ANER), NULL },
++
++		/* read pcr to get info */
++
++		{ mk_mii_read(MII_QS6612_PCR), mii_parse_qs6612_pcr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_QS6612_IMR, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_QS6612 */
++
++/* ------------------------------------------------------------------------- */
++/* The Advanced Micro Devices AM79C874 is used on the ICU862		     */
++
++#ifdef CONFIG_FEC_AM79C874
++
++/* register definitions for the 79C874 */
++
++#define MII_AM79C874_MFR	16  /* Miscellaneous Features Register      */
++#define MII_AM79C874_ICSR	17  /* Interrupt Control/Status Register    */
++#define MII_AM79C874_DR		18  /* Diagnostic Register		    */
++#define MII_AM79C874_PMLR	19  /* Power Management & Loopback Register */
++#define MII_AM79C874_MCR	21  /* Mode Control Register		    */
++#define MII_AM79C874_DC		23  /* Disconnect Counter		    */
++#define MII_AM79C874_REC	24  /* Receiver Error Counter		    */
++
++static void mii_parse_amd79c874_dr(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	/* Register 18: Bit 10 is data rate, 11 is Duplex */
++	switch ((mii_reg >> 10) & 3) {
++	case 0:	s |= PHY_STAT_10HDX;	break;
++	case 1:	s |= PHY_STAT_100HDX;	break;
++	case 2:	s |= PHY_STAT_10FDX;	break;
++	case 3:	s |= PHY_STAT_100FDX;	break;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_amd79c874 = {
++	0x00022561,
++	"AM79C874",
++
++	(const phy_cmd_t []) {  /* config */
++//		{ mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10  Mbps, HD */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_AM79C874_ICSR, 0xff00), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* find out the current status */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_AM79C874_DR), mii_parse_amd79c874_dr },
++
++		/* we only need to read ICSR to acknowledge */
++
++		{ mk_mii_read(MII_AM79C874_ICSR), NULL },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_AM79C874_ICSR, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_AM79C874 */
++
++/* -------------------------------------------------------------------- */
++/* The National Semiconductor DP83843BVJE is used on a Mediatrix board  */
++/* -------------------------------------------------------------------- */
++
++#ifdef CONFIG_FEC_DP83843
++
++/* Register definitions */
++#define MII_DP83843_PHYSTS 0x10  /* PHY Status Register */
++#define MII_DP83843_MIPSCR 0x11  /* Specific Status Register */
++#define MII_DP83843_MIPGSR 0x12  /* Generic Status Register */
++
++static void mii_parse_dp83843_physts(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x0002)
++	{
++		if (mii_reg & 0x0004)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	else
++	{
++		if (mii_reg & 0x0004)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_dp83843 = {
++	0x020005c1,
++	"DP83843BVJE",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_write(MII_REG_ANAR, 0x01E1), NULL  }, /* Auto-Negociation Register Control set to    */
++							       /* auto-negociate 10/100MBps, Half/Full duplex */
++		{ mk_mii_read(MII_REG_CR),   mii_parse_cr   },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup */
++		{ mk_mii_write(MII_DP83843_MIPSCR, 0x0002), NULL }, /* Enable interrupts */
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL         }, /* Enable and Restart Auto-Negotiation */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr		 },
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_DP83843_PHYSTS), mii_parse_dp83843_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		{ mk_mii_read(MII_DP83843_MIPGSR), NULL },  /* Acknowledge interrupts */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },  /* Find out the current status */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_DP83843_PHYSTS), mii_parse_dp83843_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_end, }
++	}
++};
++
++#endif /* CONFIG_FEC_DP83843 */
++
++
++/* ----------------------------------------------------------------- */
++/* The National Semiconductor DP83846A is used on a Mediatrix board  */
++/* ----------------------------------------------------------------- */
++
++#ifdef CONFIG_FEC_DP83846A
++
++/* Register definitions */
++#define MII_DP83846A_PHYSTS 0x10  /* PHY Status Register */
++
++static void mii_parse_dp83846a_physts(uint mii_reg, struct net_device *dev, uint data)
++{
++	volatile struct fec_enet_private *fep = (struct fec_enet_private *)dev->priv;
++	uint s = fep->phy_status;
++	int link_change_mask;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x0002) {
++		if (mii_reg & 0x0004)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	else {
++		if (mii_reg & 0x0004)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	}
++
++	fep->phy_status = s;
++
++	link_change_mask = PHY_STAT_LINK | PHY_STAT_10FDX | PHY_STAT_10HDX | PHY_STAT_100FDX | PHY_STAT_100HDX;
++	if(fep->old_status != (link_change_mask & s))
++	{
++		fep->old_status = (link_change_mask & s);
++		mii_queue_relink(mii_reg, dev, 0);
++	}
++}
++
++static phy_info_t phy_info_dp83846a = {
++	0x020005c2,
++	"DP83846A",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_write(MII_REG_ANAR, 0x01E1), NULL  }, /* Auto-Negociation Register Control set to    */
++							       /* auto-negociate 10/100MBps, Half/Full duplex */
++		{ mk_mii_read(MII_REG_CR),   mii_parse_cr   },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup */
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* Enable and Restart Auto-Negotiation */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr   },
++		{ mk_mii_read(MII_DP83846A_PHYSTS), mii_parse_dp83846a_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr   },
++		{ mk_mii_read(MII_DP83846A_PHYSTS), mii_parse_dp83846a_physts },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_end, }
++	}
++};
++
++#endif /* CONFIG_FEC_DP83846A */
++
++
++static phy_info_t *phy_info[] = {
++
++#ifdef CONFIG_FEC_LXT970
++	&phy_info_lxt970,
++#endif /* CONFIG_FEC_LXT970 */
++
++#ifdef CONFIG_FEC_LXT971
++	&phy_info_lxt971,
++#endif /* CONFIG_FEC_LXT971 */
++
++#ifdef CONFIG_FEC_QS6612
++	&phy_info_qs6612,
++#endif /* CONFIG_FEC_QS6612 */
++
++#ifdef CONFIG_FEC_AM79C874
++	&phy_info_amd79c874,
++#endif /* CONFIG_FEC_AM79C874 */
++
++#ifdef CONFIG_FEC_DP83843
++	&phy_info_dp83843,
++#endif /* CONFIG_FEC_DP83843 */
++
++#ifdef CONFIG_FEC_DP83846A
++	&phy_info_dp83846a,
++#endif /* CONFIG_FEC_DP83846A */
++
++	NULL
++};
++
++static void mii_display_status(struct net_device *dev)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	if (!fep->link && !fep->old_link) {
++		/* Link is still down - don't print anything */
++		return;
++	}
++
++	printk("%s: status: ", dev->name);
++
++	if (!fep->link) {
++		printk("link down");
++	} else {
++		printk("link up");
++
++		switch(s & PHY_STAT_SPMASK) {
++		case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break;
++		case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break;
++		case PHY_STAT_10FDX:  printk(", 10 Mbps Full Duplex");  break;
++		case PHY_STAT_10HDX:  printk(", 10 Mbps Half Duplex");  break;
++		default:
++			printk(", Unknown speed/duplex");
++		}
++
++		if (s & PHY_STAT_ANC)
++			printk(", auto-negotiation complete");
++	}
++
++	if (s & PHY_STAT_FAULT)
++		printk(", remote fault");
++
++	printk(".\n");
++}
++
++static void mii_display_config(struct net_device *dev)
++{
++	volatile struct fec_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	printk("%s: config: auto-negotiation ", dev->name);
++
++	if (s & PHY_CONF_ANE)
++		printk("on");
++	else
++		printk("off");
++
++	if (s & PHY_CONF_100FDX)
++		printk(", 100FDX");
++	if (s & PHY_CONF_100HDX)
++		printk(", 100HDX");
++	if (s & PHY_CONF_10FDX)
++		printk(", 10FDX");
++	if (s & PHY_CONF_10HDX)
++		printk(", 10HDX");
++	if (!(s & PHY_CONF_SPMASK))
++		printk(", No speed/duplex selected?");
++
++	if (s & PHY_CONF_LOOP)
++		printk(", loopback enabled");
++
++	printk(".\n");
++
++	fep->sequence_done = 1;
++}
++
++static void mii_relink(struct net_device *dev)
++{
++	struct fec_enet_private *fep = dev->priv;
++	int duplex;
++
++	fep->link = (fep->phy_status & PHY_STAT_LINK) ? 1 : 0;
++	mii_display_status(dev);
++	fep->old_link = fep->link;
++
++	if (fep->link) {
++		duplex = 0;
++		if (fep->phy_status
++		    & (PHY_STAT_100FDX | PHY_STAT_10FDX))
++			duplex = 1;
++		fec_restart(dev, duplex);
++
++		if (netif_queue_stopped(dev)) {
++			netif_wake_queue(dev);
++		}
++	} else {
++		netif_stop_queue(dev);
++		fec_stop(dev);
++	}
++}
++
++static void mii_queue_relink(uint mii_reg, struct net_device *dev, uint data)
++{
++	struct fec_enet_private *fep = dev->priv;
++
++	fep->phy_task.routine = (void *)mii_relink;
++	fep->phy_task.data = dev;
++	schedule_task(&fep->phy_task);
++}
++
++static void mii_queue_config(uint mii_reg, struct net_device *dev, uint data)
++{
++	struct fec_enet_private *fep = dev->priv;
++
++	fep->phy_task.routine = (void *)mii_display_config;
++	fep->phy_task.data = dev;
++	schedule_task(&fep->phy_task);
++}
++
++
++
++phy_cmd_t phy_cmd_relink[] = { { mk_mii_read(MII_REG_CR), mii_queue_relink },
++			       { mk_mii_end, } };
++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config },
++			       { mk_mii_end, } };
++
++
++
++/* Read remainder of PHY ID.
++*/
++static void
++mii_discover_phy3(uint mii_reg, struct net_device *dev, uint data)
++{
++	struct fec_enet_private *fep;
++	int	i;
++
++	fep = dev->priv;
++	fep->phy_id |= (mii_reg & 0xffff);
++
++	for(i = 0; phy_info[i]; i++)
++		if(phy_info[i]->id == (fep->phy_id >> 4))
++			break;
++
++	if(!phy_info[i])
++		panic("%s: PHY id 0x%08x is not supported!\n",
++		      dev->name, fep->phy_id);
++
++	fep->phy = phy_info[i];
++	fep->phy_id_done = 1;
++
++	printk("%s: Phy @ 0x%x, type %s (0x%08x)\n",
++		dev->name, fep->phy_addr, fep->phy->name, fep->phy_id);
++}
++
++/* Scan all of the MII PHY addresses looking for someone to respond
++ * with a valid ID.  This usually happens quickly.
++ */
++static void
++mii_discover_phy(uint mii_reg, struct net_device *dev, uint data)
++{
++	struct fec_enet_private *fep;
++	uint	phytype;
++
++	fep = dev->priv;
++
++	if ((phytype = (mii_reg & 0xffff)) != 0xffff) {
++
++		/* Got first part of ID, now get remainder.
++		*/
++		fep->phy_id = phytype << 16;
++		mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3, 0);
++	} else {
++		fep->phy_addr++;
++		if (fep->phy_addr < 32) {
++			mii_queue(dev, mk_mii_read(MII_REG_PHYIR1),
++							mii_discover_phy, 0);
++		} else {
++			printk("fec: No PHY device found.\n");
++		}
++	}
++}
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++/* This interrupt occurs when the PHY detects a link change.
++*/
++static void
++#ifdef CONFIG_RPXCLASSIC
++mii_link_interrupt(void *dev_id)
++#else
++mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs)
++#endif
++{
++	struct	net_device *dev = dev_id;
++	struct fec_enet_private *fep = dev->priv;
++	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
++	volatile fec_t *fecp = &(immap->im_cpm.cp_fec);
++	unsigned int ecntrl = fecp->fec_ecntrl;
++
++	/*
++	 * Acknowledge the interrupt if possible. If we have not
++	 * found the PHY yet we can't process or acknowledge the
++	 * interrupt now. Instead we ignore this interrupt for now,
++	 * which we can do since it is edge triggered. It will be
++	 * acknowledged later by fec_enet_open().
++	 */
++	if (fep->phy) {
++		/*
++		 * We need the FEC enabled to access the MII
++		 */
++		if ((ecntrl & FEC_ECNTRL_ETHER_EN) == 0) {
++			fecp->fec_ecntrl |= FEC_ECNTRL_ETHER_EN;
++		}
++
++		mii_do_cmd(dev, fep->phy->ack_int);
++		mii_do_cmd(dev, phy_cmd_relink);  /* restart and display status */
++
++		if ((ecntrl & FEC_ECNTRL_ETHER_EN) == 0) {
++			fecp->fec_ecntrl = ecntrl;	/* restore old settings */
++		}
++	}
++
++}
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++static int
++fec_enet_open(struct rtnet_device *rtdev)
++{
++	struct fec_enet_private *fep = rtdev->priv;
++
++	/* I should reset the ring buffers here, but I don't yet know
++	 * a simple way to do that.
++	 */
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	fep->sequence_done = 0;
++	fep->link = 0;
++
++	if (fep->phy) {
++		mii_do_cmd(dev, fep->phy->config);
++		mii_do_cmd(dev, phy_cmd_config);  /* display configuration */
++		while(!fep->sequence_done)
++			schedule();
++
++		mii_do_cmd(dev, fep->phy->startup);
++
++#if defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO) && defined(CONFIG_FEC_DP83846A)
++		if(fep->phy == &phy_info_dp83846a)
++		{
++			/* Initializing timers
++			 */
++			init_timer( &fep->phy_timer_list );
++
++			/* Starting timer for periodic link status check
++			 * After 100 milli-seconds, mdio_timer_callback function is called.
++			 */
++			fep->phy_timer_list.expires  = jiffies + (100 * HZ / 1000);
++			fep->phy_timer_list.data     = (unsigned long)dev;
++			fep->phy_timer_list.function = mdio_timer_callback;
++			add_timer( &fep->phy_timer_list );
++		}
++
++#if defined(CONFIG_IP_PNP)
++	rtdm_printk("%s: Waiting for the link to be up...\n", rtdev->name);
++
++	while(fep->link == 0 || ((((volatile fec_t*)rtdev->base_addr)->fec_ecntrl & FEC_ECNTRL_ETHER_EN) == 0))
++	{
++	    schedule();
++	}
++#endif /* CONFIG_IP_PNP */
++
++#endif /* CONFIG_XENO_DRIVERS_NET_USE_MDIO && CONFIG_FEC_DP83846A */
++
++		netif_start_queue(dev);
++		return 0;		/* Success */
++	}
++	return -ENODEV;		/* No PHY we understand */
++#else	/* !CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++	fep->link = 1;
++	rtnetif_start_queue(rtdev);
++
++	return 0;	/* Success */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++}
++
++static int
++fec_enet_close(struct rtnet_device *rtdev)
++{
++	/* Don't know what to do yet.
++	*/
++	rtnetif_stop_queue(rtdev);
++
++	fec_stop(rtdev);
++
++	return 0;
++}
++
++static struct net_device_stats *fec_enet_get_stats(struct rtnet_device *rtdev)
++{
++	struct fec_enet_private *fep = (struct fec_enet_private *)rtdev->priv;
++
++	return &fep->stats;
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++
++#if defined(CONFIG_FEC_DP83846A)
++/* Execute the ack_int command set and schedules next timer call back.  */
++static void mdio_timer_callback(unsigned long data)
++{
++	struct net_device *dev = (struct net_device *)data;
++	struct fec_enet_private *fep = (struct fec_enet_private *)(dev->priv);
++	mii_do_cmd(dev, fep->phy->ack_int);
++
++	if(fep->link == 0)
++	{
++		fep->phy_timer_list.expires  = jiffies + (100 * HZ / 1000); /* Sleep for 100ms */
++	}
++	else
++	{
++		fep->phy_timer_list.expires  = jiffies + (1 * HZ); /* Sleep for 1 sec. */
++	}
++	add_timer( &fep->phy_timer_list );
++}
++#endif /* CONFIG_FEC_DP83846A */
++
++static void mdio_callback(uint regval, struct net_device *dev, uint data)
++{
++	mdio_read_data_t* mrd = (mdio_read_data_t *)data;
++	mrd->regval = 0xFFFF & regval;
++	wake_up_process(mrd->sleeping_task);
++}
++
++static int mdio_read(struct net_device *dev, int phy_id, int location)
++{
++	uint retval;
++	mdio_read_data_t* mrd = (mdio_read_data_t *)kmalloc(sizeof(*mrd), GFP_KERNEL);
++
++	mrd->sleeping_task = current;
++	set_current_state(TASK_INTERRUPTIBLE);
++	mii_queue(dev, mk_mii_read(location), mdio_callback, (unsigned int) mrd);
++	schedule();
++
++	retval = mrd->regval;
++
++	kfree(mrd);
++
++	return retval;
++}
++
++void mdio_write(struct net_device *dev, int phy_id, int location, int value)
++{
++	mii_queue(dev, mk_mii_write(location, value), NULL, 0);
++}
++
++static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
++{
++	struct fec_enet_private *cep = (struct fec_enet_private *)dev->priv;
++	struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
++
++	int phy = cep->phy_addr & 0x1f;
++	int retval;
++
++	if (data == NULL)
++	{
++		retval = -EINVAL;
++	}
++	else
++	{
++		switch(cmd)
++		{
++		case SIOCETHTOOL:
++			return netdev_ethtool_ioctl(dev, (void*)rq->ifr_data);
++			break;
++
++		case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
++		case SIOCDEVPRIVATE:		/* for binary compat, remove in 2.5 */
++			data->phy_id = phy;
++
++		case SIOCGMIIREG:		/* Read MII PHY register.	*/
++		case SIOCDEVPRIVATE+1:		/* for binary compat, remove in 2.5 */
++			data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f);
++			retval = 0;
++			break;
++
++		case SIOCSMIIREG:		/* Write MII PHY register.	*/
++		case SIOCDEVPRIVATE+2:		/* for binary compat, remove in 2.5 */
++			if (!capable(CAP_NET_ADMIN))
++			{
++				retval = -EPERM;
++			}
++			else
++			{
++				mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in);
++				retval = 0;
++			}
++			break;
++
++		default:
++			retval = -EOPNOTSUPP;
++			break;
++		}
++	}
++	return retval;
++}
++
++
++static int netdev_ethtool_ioctl (struct net_device *dev, void *useraddr)
++{
++	u32 ethcmd;
++
++	/* dev_ioctl() in ../../net/core/dev.c has already checked
++	   capable(CAP_NET_ADMIN), so don't bother with that here.  */
++
++	if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
++		return -EFAULT;
++
++	switch (ethcmd) {
++	case ETHTOOL_GDRVINFO:
++		{
++			struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
++			strcpy (info.driver, dev->name);
++			strcpy (info.version, "0.3");
++			strcpy (info.bus_info, "");
++			if (copy_to_user (useraddr, &info, sizeof (info)))
++				return -EFAULT;
++			return 0;
++		}
++	default:
++		break;
++	}
++
++	return -EOPNOTSUPP;
++}
++
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++
++#ifdef ORIGINAL_VERSION
++
++/* Returns the CRC needed when filling in the hash table for
++ * multicast group filtering
++ * pAddr must point to a MAC address (6 bytes)
++ */
++static u32 fec_mulicast_calc_crc(char *pAddr)
++{
++	u8	byte;
++	int	byte_count;
++	int	bit_count;
++	u32	crc = 0xffffffff;
++	u8	msb;
++
++	for (byte_count=0; byte_count<6; byte_count++) {
++		byte = pAddr[byte_count];
++		for (bit_count=0; bit_count<8; bit_count++) {
++			msb = crc >> 31;
++			crc <<= 1;
++			if (msb ^ (byte & 0x1)) {
++				crc ^= FEC_CRC_POLY;
++			}
++			byte >>= 1;
++		}
++	}
++	return (crc);
++}
++
++/* Set or clear the multicast filter for this adaptor.
++ * Skeleton taken from sunlance driver.
++ * The CPM Ethernet implementation allows Multicast as well as individual
++ * MAC address filtering.  Some of the drivers check to make sure it is
++ * a group multicast address, and discard those that are not.  I guess I
++ * will do the same for now, but just remove the test if you want
++ * individual filtering as well (do the upper net layers want or support
++ * this kind of feature?).
++ */
++
++static void set_multicast_list(struct net_device *dev)
++{
++	struct	fec_enet_private *fep;
++	volatile fec_t *ep;
++
++	fep = (struct fec_enet_private *)dev->priv;
++	ep = &(((immap_t *)IMAP_ADDR)->im_cpm.cp_fec);
++
++	if (dev->flags&IFF_PROMISC) {
++
++		/* Log any net taps. */
++		printk("%s: Promiscuous mode enabled.\n", dev->name);
++		ep->fec_r_cntrl |= FEC_RCNTRL_PROM;
++	} else {
++
++		ep->fec_r_cntrl &= ~FEC_RCNTRL_PROM;
++
++		if (dev->flags & IFF_ALLMULTI) {
++			/* Catch all multicast addresses, so set the
++			 * filter to all 1's.
++			 */
++			ep->fec_hash_table_high = 0xffffffff;
++			ep->fec_hash_table_low = 0xffffffff;
++		} else {
++			struct dev_mc_list *pmc = dev->mc_list;
++
++			/* Clear Hash-Table
++			*/
++			ep->fec_hash_table_high = 0;
++			ep->fec_hash_table_low = 0;
++
++			/* Now populate the hash table
++			*/
++#ifdef DEBUG_MULTICAST
++			if (pmc) {
++				printk ("%s: Recalculating hash-table:\n",
++					dev->name);
++				printk (" MAC Address         high     low\n");
++			}
++#endif
++
++			while (pmc) {
++				u32	crc;
++				int	temp;
++				u32	csrVal;
++				int	hash_index;
++
++				crc = fec_mulicast_calc_crc(pmc->dmi_addr);
++				temp = (crc & 0x3f) >> 1;
++				hash_index = ((temp & 0x01) << 4) |
++					     ((temp & 0x02) << 2) |
++					     ((temp & 0x04))      |
++					     ((temp & 0x08) >> 2) |
++					     ((temp & 0x10) >> 4);
++				csrVal = (1 << hash_index);
++				if (crc & 1) {
++					ep->fec_hash_table_high	|= csrVal;
++				}
++				else {
++					ep->fec_hash_table_low	|= csrVal;
++				}
++#ifdef DEBUG_MULTICAST
++				printk (" %02x:%02x:%02x:%02x:%02x:%02x   %08x %08x\n",
++					(int)pmc->dmi_addr[0],
++					(int)pmc->dmi_addr[1],
++					(int)pmc->dmi_addr[2],
++					(int)pmc->dmi_addr[3],
++					(int)pmc->dmi_addr[4],
++					(int)pmc->dmi_addr[5],
++					ep->fec_hash_table_high,
++					ep->fec_hash_table_low
++				);
++#endif
++				pmc = pmc->next;
++			}
++		}
++	}
++}
++#endif /* ORIGINAL_VERSION */
++
++/* Initialize the FEC Ethernet on 860T.
++ */
++int __init fec_enet_init(void)
++{
++	struct rtnet_device *rtdev = NULL;
++	struct fec_enet_private *fep;
++	int i, j, k;
++	unsigned char	*eap, *iap, *ba;
++	unsigned long	mem_addr;
++	volatile	cbd_t	*bdp;
++	cbd_t		*cbd_base;
++	volatile	immap_t	*immap;
++	volatile	fec_t	*fecp;
++	bd_t		*bd;
++
++	immap = (immap_t *)IMAP_ADDR;	/* pointer to internal registers */
++
++	bd = (bd_t *)__res;
++
++	if (!rx_pool_size)
++		rx_pool_size = RX_RING_SIZE * 2;
++
++	rtdev = rtdev_root = rt_alloc_etherdev(sizeof(struct fec_enet_private),
++					rx_pool_size + TX_RING_SIZE);
++	if (rtdev == NULL) {
++		printk(KERN_ERR "enet: Could not allocate ethernet device.\n");
++		return -1;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++
++	fep = (struct fec_enet_private *)rtdev->priv;
++	fecp = &(immap->im_cpm.cp_fec);
++
++	/* Whack a reset.  We should wait for this.
++	*/
++	fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_RESET;
++	for (i = 0;
++	     (fecp->fec_ecntrl & FEC_ECNTRL_RESET) && (i < FEC_RESET_DELAY);
++	     ++i) {
++		udelay(1);
++	}
++	if (i == FEC_RESET_DELAY) {
++		printk ("FEC Reset timeout!\n");
++	}
++
++	/* Set the Ethernet address.  If using multiple Enets on the 8xx,
++	 * this needs some work to get unique addresses.
++	 */
++	eap = (unsigned char *)my_enet_addr;
++	iap = bd->bi_enetaddr;
++
++#if defined(CONFIG_SCC_ENET) && !defined(ORIGINAL_VERSION)
++	/*
++	 * If a board has Ethernet configured both on a SCC and the
++	 * FEC, it needs (at least) 2 MAC addresses (we know that Sun
++	 * disagrees, but anyway). For the FEC port, we create
++	 * another address by setting one of the address bits above
++	 * something that would have (up to now) been allocated.
++	 */
++	{
++		unsigned char	tmpaddr[6];
++		for (i=0; i<6; i++)
++			tmpaddr[i] = *iap++;
++		tmpaddr[3] |= 0x80;
++		iap = tmpaddr;
++	}
++#endif
++
++	for (i=0; i<6; i++) {
++		rtdev->dev_addr[i] = *eap++ = *iap++;
++	}
++
++	/* Allocate memory for buffer descriptors.
++	*/
++	if (((RX_RING_SIZE + TX_RING_SIZE) * sizeof(cbd_t)) > PAGE_SIZE) {
++		printk("FEC init error.  Need more space.\n");
++		printk("FEC initialization failed.\n");
++		return 1;
++	}
++	cbd_base = (cbd_t *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, (void *)&mem_addr);
++
++	/* Set receive and transmit descriptor base.
++	*/
++	fep->rx_bd_base = cbd_base;
++	fep->tx_bd_base = cbd_base + RX_RING_SIZE;
++
++	fep->skb_cur = fep->skb_dirty = 0;
++
++	/* Initialize the receive buffer descriptors.
++	*/
++	bdp = fep->rx_bd_base;
++	k = 0;
++	for (i=0; i<FEC_ENET_RX_PAGES; i++) {
++
++		/* Allocate a page.
++		*/
++		ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, (void *)&mem_addr);
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		for (j=0; j<FEC_ENET_RX_FRPPG; j++) {
++			bdp->cbd_sc = BD_ENET_RX_EMPTY;
++			bdp->cbd_bufaddr = mem_addr;
++			fep->rx_vaddr[k++] = ba;
++			mem_addr += FEC_ENET_RX_FRSIZE;
++			ba += FEC_ENET_RX_FRSIZE;
++			bdp++;
++		}
++	}
++
++	rtdm_lock_init(&fep->lock);
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* Install our interrupt handler.
++	*/
++	rt_stack_connect(rtdev, &STACK_manager);
++	if ((i = rtdm_irq_request(&fep->irq_handle, FEC_INTERRUPT,
++				  fec_enet_interrupt, 0, "rt_mpc8xx_fec", rtdev))) {
++		printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq);
++		rtdev_free(rtdev);
++		return i;
++	}
++
++	rtdev->base_addr = (unsigned long)fecp;
++
++#ifdef CONFIG_RPXCLASSIC
++/* If MDIO is disabled the PHY should not be allowed to
++ * generate interrupts telling us to read the PHY.
++ */
++# ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Make Port C, bit 15 an input that causes interrupts.
++	*/
++	immap->im_ioport.iop_pcpar &= ~0x0001;
++	immap->im_ioport.iop_pcdir &= ~0x0001;
++	immap->im_ioport.iop_pcso  &= ~0x0001;
++	immap->im_ioport.iop_pcint |=  0x0001;
++	cpm_install_handler(CPMVEC_PIO_PC15, mii_link_interrupt, dev);
++# endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	/* Make LEDS reflect Link status.
++	*/
++	*((uint *) RPX_CSR_ADDR) &= ~BCSR2_FETHLEDMODE;
++#endif	/* CONFIG_RPXCLASSIC */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++# ifndef PHY_INTERRUPT
++#  error Want to use MII, but PHY_INTERRUPT not defined!
++# endif
++	((immap_t *)IMAP_ADDR)->im_siu_conf.sc_siel |=
++		(0x80000000 >> PHY_INTERRUPT);
++
++	if (request_8xxirq(PHY_INTERRUPT, mii_link_interrupt, 0, "mii", dev) != 0)
++		panic("Could not allocate MII IRQ!");
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	rtdev->base_addr = (unsigned long)fecp;
++
++	/* The FEC Ethernet specific entries in the device structure. */
++	rtdev->open = fec_enet_open;
++	rtdev->hard_start_xmit = fec_enet_start_xmit;
++	rtdev->stop = fec_enet_close;
++	rtdev->hard_header = &rt_eth_header;
++	rtdev->get_stats = fec_enet_get_stats;
++
++	if ((i = rt_register_rtnetdev(rtdev))) {
++		rtdm_irq_disable(&fep->irq_handle);
++		rtdm_irq_free(&fep->irq_handle);
++		rtdev_free(rtdev);
++		return i;
++	}
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	dev->do_ioctl = fec_enet_ioctl;
++
++	for (i=0; i<NMII-1; i++)
++		mii_cmds[i].mii_next = &mii_cmds[i+1];
++	mii_free = mii_cmds;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++#ifndef CONFIG_ICU862
++	/* Configure all of port D for MII.
++	*/
++	immap->im_ioport.iop_pdpar = 0x1fff;
++
++#else	/* CONFIG_ICU862 */
++	/* Configure port A for MII.
++	*/
++
++	/* Has Utopia been configured? */
++	if (immap->im_ioport.iop_pdpar & (0x8000 >> 1)) {
++		/*
++		 * YES - Use MUXED mode for UTOPIA bus.
++		 * This frees Port A for use by MII (see 862UM table 41-6).
++		 */
++		immap->im_ioport.utmode &= ~0x80;
++	} else {
++		/*
++		 * NO - set SPLIT mode for UTOPIA bus.
++		 *
++		 * This doesn't really effect UTOPIA (which isn't
++		 * enabled anyway) but just tells the 862
++		 * to use port A for MII (see 862UM table 41-6).
++		 */
++		immap->im_ioport.utmode |= 0x80;
++	}
++
++# ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Now configure MII_MDC pin */
++	immap->im_ioport.iop_pdpar |= (0x8000 >> 8);
++# endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++#endif  /* CONFIG_ICU862 */
++
++	/* Bits moved from Rev. D onward.
++	*/
++	if ((mfspr(IMMR) & 0xffff) < 0x0501)
++		immap->im_ioport.iop_pddir = 0x1c58;	/* Pre rev. D */
++	else
++		immap->im_ioport.iop_pddir = 0x1fff;	/* Rev. D and later */
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Set MII speed to 2.5 MHz
++	*/
++	fecp->fec_mii_speed = fep->phy_speed =
++	    ((((bd->bi_intfreq + 4999999) / 2500000) / 2 ) & 0x3F ) << 1;
++#else
++	fecp->fec_mii_speed = 0;	/* turn off MDIO */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++#ifndef ORIGINAL_VERSION
++	printk("%s: FEC ENET Version 0.3, irq %d, addr %02x:%02x:%02x:%02x:%02x:%02x\n",
++	       rtdev->name, FEC_INTERRUPT,
++	       rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2],
++	       rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]);
++#else
++	printk ("%s: FEC ENET Version 0.3, FEC irq %d"
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		", with MDIO"
++#endif
++#ifdef PHY_INTERRUPT
++		", MII irq %d"
++#endif
++		", addr ",
++		dev->name, FEC_INTERRUPT
++#ifdef PHY_INTERRUPT
++		, PHY_INTERRUPT
++#endif
++	);
++	for (i=0; i<6; i++)
++		printk("%02x%c", rtdev->dev_addr[i], (i==5) ? '\n' : ':');
++#endif /* ORIGINAL_VERSION */
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO	/* start in full duplex mode, and negotiate speed */
++	fec_restart (dev, 1);
++#else			/* always use half duplex mode only */
++	fec_restart (rtdev, 0);
++#endif
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Queue up command to detect the PHY and initialize the
++	 * remainder of the interface.
++	 */
++	fep->phy_id_done = 0;
++	fep->phy_addr = 0;
++	mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy, 0);
++
++	fep->old_status = 0;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	return 0;
++}
++
++/* This function is called to start or restart the FEC during a link
++ * change.  This only happens when switching between half and full
++ * duplex.
++ */
++static void
++fec_restart(struct rtnet_device *rtdev, int duplex)
++{
++	struct fec_enet_private *fep;
++	int i;
++	volatile	cbd_t	*bdp;
++	volatile	immap_t	*immap;
++	volatile	fec_t	*fecp;
++
++	immap = (immap_t *)IMAP_ADDR;	/* pointer to internal registers */
++
++	fecp = &(immap->im_cpm.cp_fec);
++
++	fep = rtdev->priv;
++
++	/* Whack a reset.  We should wait for this.
++	*/
++	fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_RESET;
++	for (i = 0;
++	     (fecp->fec_ecntrl & FEC_ECNTRL_RESET) && (i < FEC_RESET_DELAY);
++	     ++i) {
++		udelay(1);
++	}
++	if (i == FEC_RESET_DELAY) {
++		printk ("FEC Reset timeout!\n");
++	}
++
++	/* Set station address.
++	*/
++	fecp->fec_addr_low  = (my_enet_addr[0] << 16) | my_enet_addr[1];
++	fecp->fec_addr_high =  my_enet_addr[2];
++
++	/* Reset all multicast.
++	*/
++	fecp->fec_hash_table_high = 0;
++	fecp->fec_hash_table_low  = 0;
++
++	/* Set maximum receive buffer size.
++	*/
++	fecp->fec_r_buff_size = PKT_MAXBLR_SIZE;
++	fecp->fec_r_hash = PKT_MAXBUF_SIZE;
++
++	/* Set receive and transmit descriptor base.
++	*/
++	fecp->fec_r_des_start = iopa((uint)(fep->rx_bd_base));
++	fecp->fec_x_des_start = iopa((uint)(fep->tx_bd_base));
++
++	fep->dirty_tx = fep->cur_tx = fep->tx_bd_base;
++	fep->cur_rx = fep->rx_bd_base;
++
++	/* Reset SKB transmit buffers.
++	*/
++	fep->skb_cur = fep->skb_dirty = 0;
++	for (i=0; i<=TX_RING_MOD_MASK; i++) {
++		if (fep->tx_skbuff[i] != NULL) {
++			dev_kfree_rtskb(fep->tx_skbuff[i]);
++			fep->tx_skbuff[i] = NULL;
++		}
++	}
++
++	/* Initialize the receive buffer descriptors.
++	*/
++	bdp = fep->rx_bd_base;
++	for (i=0; i<RX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		bdp->cbd_sc = BD_ENET_RX_EMPTY;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* ...and the same for transmmit.
++	*/
++	bdp = fep->tx_bd_base;
++	for (i=0; i<TX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		bdp->cbd_sc = 0;
++		bdp->cbd_bufaddr = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* Enable MII mode.
++	*/
++	if (duplex) {
++		fecp->fec_r_cntrl = FEC_RCNTRL_MII_MODE;	/* MII enable */
++		fecp->fec_x_cntrl = FEC_TCNTRL_FDEN;		/* FD enable */
++	}
++	else {
++		fecp->fec_r_cntrl = FEC_RCNTRL_MII_MODE | FEC_RCNTRL_DRT;
++		fecp->fec_x_cntrl = 0;
++	}
++
++	fep->full_duplex = duplex;
++
++	/* Enable big endian and don't care about SDMA FC.
++	*/
++	fecp->fec_fun_code = 0x78000000;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Set MII speed.
++	*/
++	fecp->fec_mii_speed = fep->phy_speed;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	/* Clear any outstanding interrupt.
++	*/
++	fecp->fec_ievent = 0xffc0;
++
++	fecp->fec_ivec = (FEC_INTERRUPT/2) << 29;
++
++	/* Enable interrupts we wish to service.
++	*/
++	fecp->fec_imask = ( FEC_ENET_TXF | FEC_ENET_TXB |
++			    FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_MII );
++
++	/* And last, enable the transmit and receive processing.
++	*/
++	fecp->fec_ecntrl = FEC_ECNTRL_PINMUX | FEC_ECNTRL_ETHER_EN;
++	fecp->fec_r_des_active = 0x01000000;
++
++	/* The tx ring is no longer full. */
++	if(fep->tx_full)
++	{
++		fep->tx_full = 0;
++		rtnetif_wake_queue(rtdev);
++	}
++}
++
++static void
++fec_stop(struct rtnet_device *rtdev)
++{
++	volatile	immap_t	*immap;
++	volatile	fec_t	*fecp;
++	int i;
++	struct fec_enet_private *fep;
++
++	immap = (immap_t *)IMAP_ADDR;	/* pointer to internal registers */
++
++	fecp = &(immap->im_cpm.cp_fec);
++
++	if ((fecp->fec_ecntrl & FEC_ECNTRL_ETHER_EN) == 0)
++		return;	/* already down */
++
++	fep = rtdev->priv;
++
++
++	fecp->fec_x_cntrl = 0x01;	/* Graceful transmit stop */
++
++	for (i = 0;
++	     ((fecp->fec_ievent & 0x10000000) == 0) && (i < FEC_RESET_DELAY);
++	     ++i) {
++		udelay(1);
++	}
++	if (i == FEC_RESET_DELAY) {
++		printk ("FEC timeout on graceful transmit stop\n");
++	}
++
++	/* Clear outstanding MII command interrupts.
++	*/
++	fecp->fec_ievent = FEC_ENET_MII;
++
++	/* Enable MII command finished interrupt
++	*/
++	fecp->fec_ivec = (FEC_INTERRUPT/2) << 29;
++	fecp->fec_imask = FEC_ENET_MII;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* Set MII speed.
++	*/
++	fecp->fec_mii_speed = fep->phy_speed;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	/* Disable FEC
++	*/
++	fecp->fec_ecntrl &= ~(FEC_ECNTRL_ETHER_EN);
++}
++
++static void __exit fec_enet_cleanup(void)
++{
++	struct rtnet_device *rtdev = rtdev_root;
++	struct fec_enet_private *fep = rtdev->priv;
++
++	if (rtdev) {
++		rtdm_irq_disable(&fep->irq_handle);
++		rtdm_irq_free(&fep->irq_handle);
++
++		consistent_free(fep->rx_bd_base);
++
++		rt_stack_disconnect(rtdev);
++		rt_unregister_rtnetdev(rtdev);
++		rt_rtdev_disconnect(rtdev);
++
++		printk("%s: unloaded\n", rtdev->name);
++		rtdev_free(rtdev);
++		rtdev_root = NULL;
++	}
++}
++
++module_init(fec_enet_init);
++module_exit(fec_enet_cleanup);
+--- linux/drivers/xenomai/net/drivers/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/Kconfig	2021-04-07 16:01:27.258634122 +0800
+@@ -0,0 +1,138 @@
++menu "Drivers"
++    depends on XENO_DRIVERS_NET
++
++comment "Common PCI Drivers"
++    depends on PCI
++
++config XENO_DRIVERS_NET_DRV_PCNET32
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "AMD PCnet32"
++
++
++config XENO_DRIVERS_NET_DRV_TULIP
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "DEC Tulip"
++
++
++config XENO_DRIVERS_NET_DRV_EEPRO100
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Intel EtherExpress PRO/100"
++    default y
++
++config XENO_DRIVERS_NET_DRV_EEPRO100_CMDTIMEOUT
++    depends on XENO_DRIVERS_NET && PCI
++    int "Command Timeout"
++    depends on XENO_DRIVERS_NET_DRV_EEPRO100
++    default 20
++    ---help---
++    Timeout in microseconds of transmission or configuration commands that
++    are issued in real-time contexts.
++
++config XENO_DRIVERS_NET_DRV_EEPRO100_DBG
++    depends on XENO_DRIVERS_NET && PCI
++    bool "Enable debugging and instrumentation"
++    depends on XENO_DRIVERS_NET_DRV_EEPRO100
++    ---help---
++    This option switches on internal debugging code of the EEPRO/100 driver.
++    It also enables the collection of worst-case command delays in real-time
++    contexts in order to reduce the command timeout (which, effectively, will
++    also reduce the worst-case transmission latency).
++
++
++config XENO_DRIVERS_NET_DRV_E1000
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Intel(R) PRO/1000 (Gigabit)"
++    default y
++
++config XENO_DRIVERS_NET_DRV_E1000E
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "New Intel(R) PRO/1000 PCIe (Gigabit)"
++
++
++config XENO_DRIVERS_NET_DRV_NATSEMI
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "NatSemi"
++
++
++config XENO_DRIVERS_NET_DRV_8139
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Realtek 8139"
++    default y
++
++
++config XENO_DRIVERS_NET_DRV_VIA_RHINE
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "VIA Rhine"
++
++
++config XENO_DRIVERS_NET_DRV_IGB
++    select I2C
++    select I2C_ALGOBIT
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Intel(R) 82575 (Gigabit)"
++
++
++config XENO_DRIVERS_NET_DRV_R8169
++    depends on XENO_DRIVERS_NET && PCI
++    tristate "Realtek 8169 (Gigabit)"
++
++
++if PPC
++
++comment "Embedded MPC Drivers"
++    depends on XENO_DRIVERS_NET
++
++config XENO_DRIVERS_NET_DRV_FCC_ENET
++    depends on XENO_DRIVERS_NET
++    tristate "MPC8260 FCC Ethernet"
++
++
++config XENO_DRIVERS_NET_DRV_FEC_ENET
++    depends on XENO_DRIVERS_NET
++    tristate "MPC8xx FEC Ethernet"
++
++
++config XENO_DRIVERS_NET_DRV_SCC_ENET
++    depends on XENO_DRIVERS_NET
++    tristate "MPC8xx SCC Ethernet"
++
++
++config XENO_DRIVERS_NET_DRV_MPC52XX_FEC
++    depends on XENO_DRIVERS_NET
++    tristate "MPC52xx FEC Ethernet"
++
++endif
++
++
++comment "Misc Drivers"
++
++config XENO_DRIVERS_NET_DRV_LOOPBACK
++    depends on XENO_DRIVERS_NET
++    tristate "Loopback"
++    default y
++
++
++config XENO_DRIVERS_NET_DRV_SMC91111
++    depends on XENO_DRIVERS_NET
++    tristate "SMSC LAN91C111"
++
++if ARM
++
++config XENO_DRIVERS_NET_DRV_AT91_ETHER
++    depends on XENO_DRIVERS_NET && SOC_AT91RM9200
++    select XENO_DRIVERS_NET_DRV_MACB
++    tristate "AT91RM9200 Board Ethernet Driver"
++
++config XENO_DRIVERS_NET_DRV_MACB
++    depends on XENO_DRIVERS_NET
++    select AT91_PROGRAMMABLE_CLOCKS if ARCH_AT91
++    tristate "Cadence MACB/GEM devices"
++    ---help---
++    Driver for internal MAC-controller on AT91SAM926x microcontrollers.
++    Porting by Cristiano Mantovani and Stefano Banzi (Marposs SpA).
++
++endif
++
++source "drivers/xenomai/net/drivers/experimental/Kconfig"
++
++endmenu
+--- linux/drivers/xenomai/net/drivers/fec.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/fec.c	2021-04-07 16:01:27.253634129 +0800
+@@ -0,0 +1,1859 @@
++/*
++ * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx.
++ * Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
++ *
++ * Right now, I am very wasteful with the buffers.  I allocate memory
++ * pages and then divide them into 2K frame buffers.  This way I know I
++ * have buffers large enough to hold one frame within one buffer descriptor.
++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which
++ * will be much more memory efficient and will easily handle lots of
++ * small packets.
++ *
++ * Much better multiple PHY support by Magnus Damm.
++ * Copyright (c) 2000 Ericsson Radio Systems AB.
++ *
++ * Support for FEC controller of ColdFire processors.
++ * Copyright (c) 2001-2005 Greg Ungerer (gerg@snapgear.com)
++ *
++ * Bug fixes and cleanup by Philippe De Muyter (phdm@macqel.be)
++ * Copyright (c) 2004-2006 Macq Electronique SA.
++ *
++ * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
++ *
++ * Ported from v3.5 Linux drivers/net/ethernet/freescale/fec.[ch]
++ * (git tag v3.5-709-ga6be1fc)
++ *
++ * Copyright (c) 2012 Wolfgang Grandegger <wg@denx.de>
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/version.h>
++#include <linux/string.h>
++#include <linux/ptrace.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <linux/workqueue.h>
++#include <linux/bitops.h>
++#include <linux/io.h>
++#include <linux/irq.h>
++#include <linux/clk.h>
++#include <linux/platform_device.h>
++#include <linux/phy.h>
++#include <linux/fec.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/of_gpio.h>
++#include <linux/of_net.h>
++#include <linux/pinctrl/consumer.h>
++
++#include <asm/cacheflush.h>
++
++#ifndef CONFIG_ARM
++#include <asm/coldfire.h>
++#include <asm/mcfsim.h>
++#endif
++
++/* RTnet */
++#include <rtnet_port.h>
++#include <rtskb.h>
++
++/* RTnet */
++#include "rt_fec.h"
++
++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTnet driver for the FEC Ethernet");
++MODULE_LICENSE("GPL");
++
++#if defined(CONFIG_ARM)
++#define FEC_ALIGNMENT	0xf
++#else
++#define FEC_ALIGNMENT	0x3
++#endif
++
++#define DRIVER_NAME	"rt_fec"
++
++/* Controller is ENET-MAC */
++#define FEC_QUIRK_ENET_MAC		(1 << 0)
++/* Controller needs driver to swap frame */
++#define FEC_QUIRK_SWAP_FRAME		(1 << 1)
++/* Controller uses gasket */
++#define FEC_QUIRK_USE_GASKET		(1 << 2)
++/* Controller has GBIT support */
++#define FEC_QUIRK_HAS_GBIT		(1 << 3)
++
++static struct platform_device_id fec_devtype[] = {
++	{
++		.name = "fec",
++/* For legacy not devicetree based support */
++#if defined(CONFIG_SOC_IMX6Q)
++		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT,
++#elif defined(CONFIG_SOC_IMX28)
++		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
++#elif defined(CONFIG_SOC_IMX25)
++		.driver_data = FEC_QUIRK_USE_GASKET,
++#else
++		/* keep it for coldfire */
++		.driver_data = 0,
++#endif
++	}, {
++		.name = "imx25-fec",
++		.driver_data = FEC_QUIRK_USE_GASKET,
++	}, {
++		.name = "imx27-fec",
++		.driver_data = 0,
++	}, {
++		.name = "imx28-fec",
++		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
++	}, {
++		.name = "imx6q-fec",
++		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT,
++	}, {
++		/* sentinel */
++	}
++};
++MODULE_DEVICE_TABLE(platform, fec_devtype);
++
++enum imx_fec_type {
++	IMX25_FEC = 1,	/* runs on i.mx25/50/53 */
++	IMX27_FEC,	/* runs on i.mx27/35/51 */
++	IMX28_FEC,
++	IMX6Q_FEC,
++};
++
++static const struct of_device_id fec_dt_ids[] = {
++	{ .compatible = "fsl,imx25-fec", .data = &fec_devtype[IMX25_FEC], },
++	{ .compatible = "fsl,imx27-fec", .data = &fec_devtype[IMX27_FEC], },
++	{ .compatible = "fsl,imx28-fec", .data = &fec_devtype[IMX28_FEC], },
++	{ .compatible = "fsl,imx6q-fec", .data = &fec_devtype[IMX6Q_FEC], },
++	{ /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(of, fec_dt_ids);
++
++static unsigned char macaddr[ETH_ALEN];
++module_param_array(macaddr, byte, NULL, 0);
++MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
++
++#if defined(CONFIG_M5272)
++/*
++ * Some hardware gets it MAC address out of local flash memory.
++ * if this is non-zero then assume it is the address to get MAC from.
++ */
++#if defined(CONFIG_NETtel)
++#define	FEC_FLASHMAC	0xf0006006
++#elif defined(CONFIG_GILBARCONAP) || defined(CONFIG_SCALES)
++#define	FEC_FLASHMAC	0xf0006000
++#elif defined(CONFIG_CANCam)
++#define	FEC_FLASHMAC	0xf0020000
++#elif defined (CONFIG_M5272C3)
++#define	FEC_FLASHMAC	(0xffe04000 + 4)
++#elif defined(CONFIG_MOD5272)
++#define FEC_FLASHMAC	0xffc0406b
++#else
++#define	FEC_FLASHMAC	0
++#endif
++#endif /* CONFIG_M5272 */
++
++/* The number of Tx and Rx buffers.  These are allocated from the page
++ * pool.  The code may assume these are power of two, so it it best
++ * to keep them that size.
++ * We don't need to allocate pages for the transmitter.  We just use
++ * the skbuffer directly.
++ */
++#define FEC_ENET_RX_PAGES	8
++#define FEC_ENET_RX_FRSIZE	RTSKB_SIZE /* Maximum size for RTnet */
++#define FEC_ENET_RX_FRPPG	(PAGE_SIZE / FEC_ENET_RX_FRSIZE)
++#define RX_RING_SIZE		(FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
++#define FEC_ENET_TX_FRSIZE	2048
++#define FEC_ENET_TX_FRPPG	(PAGE_SIZE / FEC_ENET_TX_FRSIZE)
++#define TX_RING_SIZE		16	/* Must be power of two */
++#define TX_RING_MOD_MASK	15	/*   for this to work */
++
++#if (((RX_RING_SIZE + TX_RING_SIZE) * 8) > PAGE_SIZE)
++#error "FEC: descriptor ring size constants too large"
++#endif
++
++/* Interrupt events/masks. */
++#define FEC_ENET_HBERR	((uint)0x80000000)	/* Heartbeat error */
++#define FEC_ENET_BABR	((uint)0x40000000)	/* Babbling receiver */
++#define FEC_ENET_BABT	((uint)0x20000000)	/* Babbling transmitter */
++#define FEC_ENET_GRA	((uint)0x10000000)	/* Graceful stop complete */
++#define FEC_ENET_TXF	((uint)0x08000000)	/* Full frame transmitted */
++#define FEC_ENET_TXB	((uint)0x04000000)	/* A buffer was transmitted */
++#define FEC_ENET_RXF	((uint)0x02000000)	/* Full frame received */
++#define FEC_ENET_RXB	((uint)0x01000000)	/* A buffer was received */
++#define FEC_ENET_MII	((uint)0x00800000)	/* MII interrupt */
++#define FEC_ENET_EBERR	((uint)0x00400000)	/* SDMA bus error */
++
++#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
++
++/* The FEC stores dest/src/type, data, and checksum for receive packets.
++ */
++#define PKT_MAXBUF_SIZE		1518
++#define PKT_MINBUF_SIZE		64
++#define PKT_MAXBLR_SIZE		1520
++
++/* This device has up to three irqs on some platforms */
++#define FEC_IRQ_NUM		3
++
++/*
++ * The 5270/5271/5280/5282/532x RX control register also contains maximum frame
++ * size bits. Other FEC hardware does not, so we need to take that into
++ * account when setting it.
++ */
++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
++    defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
++#define	OPT_FRAME_SIZE	(PKT_MAXBUF_SIZE << 16)
++#else
++#define	OPT_FRAME_SIZE	0
++#endif
++
++static unsigned int rx_pool_size = 2 * RX_RING_SIZE;
++module_param(rx_pool_size, int, 0444);
++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size");
++
++#ifndef rtnetdev_priv
++#define rtnetdev_priv(ndev) (ndev)->priv
++#endif
++
++/* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
++ * tx_bd_base always point to the base of the buffer descriptors.  The
++ * cur_rx and cur_tx point to the currently available buffer.
++ * The dirty_tx tracks the current buffer that is being sent by the
++ * controller.  The cur_tx and dirty_tx are equal under both completely
++ * empty and completely full conditions.  The empty/ready indicator in
++ * the buffer descriptor determines the actual condition.
++ */
++struct fec_enet_private {
++	/* Hardware registers of the FEC device */
++	void __iomem *hwp;
++
++	struct net_device *netdev; /* linux netdev needed for phy handling */
++
++	struct clk *clk_ipg;
++	struct clk *clk_ahb;
++
++	/* The saved address of a sent-in-place packet/buffer, for skfree(). */
++	unsigned char *tx_bounce[TX_RING_SIZE];
++	struct	rtskb *tx_skbuff[TX_RING_SIZE];
++	struct	rtskb *rx_skbuff[RX_RING_SIZE];
++	ushort	skb_cur;
++	ushort	skb_dirty;
++
++	/* CPM dual port RAM relative addresses */
++	dma_addr_t	bd_dma;
++	/* Address of Rx and Tx buffers */
++	struct bufdesc	*rx_bd_base;
++	struct bufdesc	*tx_bd_base;
++	/* The next free ring entry */
++	struct bufdesc	*cur_rx, *cur_tx;
++	/* The ring entries to be free()ed */
++	struct bufdesc	*dirty_tx;
++
++	uint	tx_full;
++	/* hold while accessing the HW like ringbuffer for tx/rx but not MAC */
++	rtdm_lock_t hw_lock;
++
++	struct	platform_device *pdev;
++
++	int	opened;
++	int	dev_id;
++
++	/* Phylib and MDIO interface */
++	struct	mii_bus *mii_bus;
++	struct	phy_device *phy_dev;
++	int	mii_timeout;
++	uint	phy_speed;
++	phy_interface_t	phy_interface;
++	int	link;
++	int	full_duplex;
++	struct	completion mdio_done;
++	int	irq[FEC_IRQ_NUM];
++
++	/* RTnet */
++	struct device *dev;
++	rtdm_irq_t irq_handle[3];
++	rtdm_nrtsig_t mdio_done_sig;
++	struct net_device_stats stats;
++};
++
++/* For phy handling */
++struct fec_enet_netdev_priv {
++	struct rtnet_device *rtdev;
++};
++
++/* FEC MII MMFR bits definition */
++#define FEC_MMFR_ST		(1 << 30)
++#define FEC_MMFR_OP_READ	(2 << 28)
++#define FEC_MMFR_OP_WRITE	(1 << 28)
++#define FEC_MMFR_PA(v)		((v & 0x1f) << 23)
++#define FEC_MMFR_RA(v)		((v & 0x1f) << 18)
++#define FEC_MMFR_TA		(2 << 16)
++#define FEC_MMFR_DATA(v)	(v & 0xffff)
++
++#define FEC_MII_TIMEOUT		30000 /* us */
++
++/* Transmitter timeout */
++#define TX_TIMEOUT (2 * HZ)
++
++static int mii_cnt;
++
++static void *swap_buffer(void *bufaddr, int len)
++{
++	int i;
++	unsigned int *buf = bufaddr;
++
++	for (i = 0; i < (len + 3) / 4; i++, buf++)
++		*buf = cpu_to_be32(*buf);
++
++	return bufaddr;
++}
++
++static int
++fec_enet_start_xmit(struct rtskb *skb, struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	struct bufdesc *bdp;
++	void *bufaddr;
++	unsigned short	status;
++	unsigned long context;
++
++	if (!fep->link) {
++		/* Link is down or autonegotiation is in progress. */
++		printk("%s: tx link down!.\n", ndev->name);
++		rtnetif_stop_queue(ndev);
++		return 1;	/* RTnet: will call kfree_rtskb() */
++	}
++
++	rtdm_lock_get_irqsave(&fep->hw_lock, context);
++
++	/* RTnet */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() +
++					       *skb->xmit_stamp);
++
++	/* Fill in a Tx ring entry */
++	bdp = fep->cur_tx;
++
++	status = bdp->cbd_sc;
++
++	if (status & BD_ENET_TX_READY) {
++		/* Ooops.  All transmit buffers are full.  Bail out.
++		 * This should not happen, since ndev->tbusy should be set.
++		 */
++		printk("%s: tx queue full!.\n", ndev->name);
++		rtdm_lock_put_irqrestore(&fep->hw_lock, context);
++		return 1;	/* RTnet: will call kfree_rtskb() */
++	}
++
++	/* Clear all of the status flags */
++	status &= ~BD_ENET_TX_STATS;
++
++	/* Set buffer length and buffer pointer */
++	bufaddr = skb->data;
++	bdp->cbd_datlen = skb->len;
++
++	/*
++	 * On some FEC implementations data must be aligned on
++	 * 4-byte boundaries. Use bounce buffers to copy data
++	 * and get it aligned. Ugh.
++	 */
++	if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {
++		unsigned int index;
++		index = bdp - fep->tx_bd_base;
++		memcpy(fep->tx_bounce[index], skb->data, skb->len);
++		bufaddr = fep->tx_bounce[index];
++	}
++
++	/*
++	 * Some design made an incorrect assumption on endian mode of
++	 * the system that it's running on. As the result, driver has to
++	 * swap every frame going to and coming from the controller.
++	 */
++	if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
++		swap_buffer(bufaddr, skb->len);
++
++	/* Save skb pointer */
++	fep->tx_skbuff[fep->skb_cur] = skb;
++
++	fep->stats.tx_bytes += skb->len;
++	fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK;
++
++	/* Push the data cache so the CPM does not get stale memory
++	 * data.
++	 */
++	bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,
++			FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE);
++
++	/* Send it on its way.  Tell FEC it's ready, interrupt when done,
++	 * it's the last BD of the frame, and to put the CRC on the end.
++	 */
++	status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR
++			| BD_ENET_TX_LAST | BD_ENET_TX_TC);
++	bdp->cbd_sc = status;
++
++	/* Trigger transmission start */
++	writel(0, fep->hwp + FEC_X_DES_ACTIVE);
++
++	/* If this was the last BD in the ring, start at the beginning again. */
++	if (status & BD_ENET_TX_WRAP)
++		bdp = fep->tx_bd_base;
++	else
++		bdp++;
++
++	if (bdp == fep->dirty_tx) {
++		fep->tx_full = 1;
++		rtnetif_stop_queue(ndev);
++	}
++
++	fep->cur_tx = bdp;
++
++	rtdm_lock_put_irqrestore(&fep->hw_lock, context);
++
++	return NETDEV_TX_OK;
++}
++
++/* This function is called to start or restart the FEC during a link
++ * change.  This only happens when switching between half and full
++ * duplex.
++ */
++static void
++fec_restart(struct rtnet_device *ndev, int duplex)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	int i;
++	u32 temp_mac[2];
++	u32 rcntl = OPT_FRAME_SIZE | 0x04;
++	u32 ecntl = 0x2; /* ETHEREN */
++
++	/* Whack a reset.  We should wait for this. */
++	writel(1, fep->hwp + FEC_ECNTRL);
++	udelay(10);
++
++	/*
++	 * enet-mac reset will reset mac address registers too,
++	 * so need to reconfigure it.
++	 */
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
++		memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
++		writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);
++		writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);
++	}
++
++	/* Clear any outstanding interrupt. */
++	writel(0xffc00000, fep->hwp + FEC_IEVENT);
++
++	/* Reset all multicast.	*/
++	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
++	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
++#ifndef CONFIG_M5272
++	writel(0, fep->hwp + FEC_HASH_TABLE_HIGH);
++	writel(0, fep->hwp + FEC_HASH_TABLE_LOW);
++#endif
++
++	/* Set maximum receive buffer size. */
++	writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE);
++
++	/* Set receive and transmit descriptor base. */
++	writel(fep->bd_dma, fep->hwp + FEC_R_DES_START);
++	writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) * RX_RING_SIZE,
++			fep->hwp + FEC_X_DES_START);
++
++	fep->dirty_tx = fep->cur_tx = fep->tx_bd_base;
++	fep->cur_rx = fep->rx_bd_base;
++
++	/* Reset SKB transmit buffers. */
++	fep->skb_cur = fep->skb_dirty = 0;
++	for (i = 0; i <= TX_RING_MOD_MASK; i++) {
++		if (fep->tx_skbuff[i]) {
++			dev_kfree_rtskb(fep->tx_skbuff[i]);
++			fep->tx_skbuff[i] = NULL;
++		}
++	}
++
++	/* Enable MII mode */
++	if (duplex) {
++		/* FD enable */
++		writel(0x04, fep->hwp + FEC_X_CNTRL);
++	} else {
++		/* No Rcv on Xmit */
++		rcntl |= 0x02;
++		writel(0x0, fep->hwp + FEC_X_CNTRL);
++	}
++
++	fep->full_duplex = duplex;
++
++	/* Set MII speed */
++	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
++
++	/*
++	 * The phy interface and speed need to get configured
++	 * differently on enet-mac.
++	 */
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
++		/* Enable flow control and length check */
++		rcntl |= 0x40000000 | 0x00000020;
++
++		/* RGMII, RMII or MII */
++		if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII)
++			rcntl |= (1 << 6);
++		else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
++			rcntl |= (1 << 8);
++		else
++			rcntl &= ~(1 << 8);
++
++		/* 1G, 100M or 10M */
++		if (fep->phy_dev) {
++			if (fep->phy_dev->speed == SPEED_1000)
++				ecntl |= (1 << 5);
++			else if (fep->phy_dev->speed == SPEED_100)
++				rcntl &= ~(1 << 9);
++			else
++				rcntl |= (1 << 9);
++		}
++	} else {
++#ifdef FEC_MIIGSK_ENR
++		if (id_entry->driver_data & FEC_QUIRK_USE_GASKET) {
++			u32 cfgr;
++			/* disable the gasket and wait */
++			writel(0, fep->hwp + FEC_MIIGSK_ENR);
++			while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
++				udelay(1);
++
++			/*
++			 * configure the gasket:
++			 *   RMII, 50 MHz, no loopback, no echo
++			 *   MII, 25 MHz, no loopback, no echo
++			 */
++			cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
++				? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII;
++			if (fep->phy_dev && fep->phy_dev->speed == SPEED_10)
++				cfgr |= BM_MIIGSK_CFGR_FRCONT_10M;
++			writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR);
++
++			/* re-enable the gasket */
++			writel(2, fep->hwp + FEC_MIIGSK_ENR);
++		}
++#endif
++	}
++	writel(rcntl, fep->hwp + FEC_R_CNTRL);
++
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
++		/* enable ENET endian swap */
++		ecntl |= (1 << 8);
++		/* enable ENET store and forward mode */
++		writel(1 << 8, fep->hwp + FEC_X_WMRK);
++	}
++
++	/* And last, enable the transmit and receive processing */
++	writel(ecntl, fep->hwp + FEC_ECNTRL);
++	writel(0, fep->hwp + FEC_R_DES_ACTIVE);
++
++	/* Enable interrupts we wish to service */
++	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
++}
++
++static void
++fec_stop(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
++
++	/* We cannot expect a graceful transmit stop without link !!! */
++	if (fep->link) {
++		writel(1, fep->hwp + FEC_X_CNTRL); /* Graceful transmit stop */
++		udelay(10);
++		if (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_GRA))
++			printk("fec_stop : Graceful transmit stop did not complete !\n");
++	}
++
++	/* Whack a reset.  We should wait for this. */
++	writel(1, fep->hwp + FEC_ECNTRL);
++	udelay(10);
++	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
++	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
++
++	/* We have to keep ENET enabled to have MII interrupt stay working */
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) {
++		writel(2, fep->hwp + FEC_ECNTRL);
++		writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
++	}
++}
++
++static void
++fec_enet_tx(struct rtnet_device *ndev)
++{
++	struct	fec_enet_private *fep;
++	struct bufdesc *bdp;
++	unsigned short status;
++	struct	rtskb	*skb;
++
++	fep = rtnetdev_priv(ndev);
++	rtdm_lock_get(&fep->hw_lock);
++	bdp = fep->dirty_tx;
++
++	while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) {
++		if (bdp == fep->cur_tx && fep->tx_full == 0)
++			break;
++
++		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
++				FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE);
++		bdp->cbd_bufaddr = 0;
++
++		skb = fep->tx_skbuff[fep->skb_dirty];
++		/* Check for errors. */
++		if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
++				   BD_ENET_TX_RL | BD_ENET_TX_UN |
++				   BD_ENET_TX_CSL)) {
++			fep->stats.tx_errors++;
++			if (status & BD_ENET_TX_HB)  /* No heartbeat */
++				fep->stats.tx_heartbeat_errors++;
++			if (status & BD_ENET_TX_LC)  /* Late collision */
++				fep->stats.tx_window_errors++;
++			if (status & BD_ENET_TX_RL)  /* Retrans limit */
++				fep->stats.tx_aborted_errors++;
++			if (status & BD_ENET_TX_UN)  /* Underrun */
++				fep->stats.tx_fifo_errors++;
++			if (status & BD_ENET_TX_CSL) /* Carrier lost */
++				fep->stats.tx_carrier_errors++;
++		} else {
++			fep->stats.tx_packets++;
++		}
++
++		if (status & BD_ENET_TX_READY)
++			printk("HEY! Enet xmit interrupt and TX_READY.\n");
++
++		/* Deferred means some collisions occurred during transmit,
++		 * but we eventually sent the packet OK.
++		 */
++		if (status & BD_ENET_TX_DEF)
++			fep->stats.collisions++;
++
++		/* Free the sk buffer associated with this last transmit */
++		dev_kfree_rtskb(skb); /* RTnet */
++		fep->tx_skbuff[fep->skb_dirty] = NULL;
++		fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK;
++
++		/* Update pointer to next buffer descriptor to be transmitted */
++		if (status & BD_ENET_TX_WRAP)
++			bdp = fep->tx_bd_base;
++		else
++			bdp++;
++
++		/* Since we have freed up a buffer, the ring is no longer full
++		 */
++		if (fep->tx_full) {
++			fep->tx_full = 0;
++			if (rtnetif_queue_stopped(ndev))
++				rtnetif_wake_queue(ndev);
++		}
++	}
++	fep->dirty_tx = bdp;
++	rtdm_lock_put(&fep->hw_lock);
++}
++
++
++/* During a receive, the cur_rx points to the current incoming buffer.
++ * When we update through the ring, if the next incoming buffer has
++ * not been given to the system, we just set the empty indicator,
++ * effectively tossing the packet.
++ */
++static void
++fec_enet_rx(struct rtnet_device *ndev, int *packets, nanosecs_abs_t *time_stamp)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	struct bufdesc *bdp;
++	unsigned short status;
++	struct	rtskb	*skb;
++	ushort	pkt_len;
++	__u8 *data;
++
++#ifdef CONFIG_M532x
++	flush_cache_all();
++#endif
++	rtdm_lock_get(&fep->hw_lock);
++
++	/* First, grab all of the stats for the incoming packet.
++	 * These get messed up if we get called due to a busy condition.
++	 */
++	bdp = fep->cur_rx;
++
++	while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
++
++		/* Since we have allocated space to hold a complete frame,
++		 * the last indicator should be set.
++		 */
++		if ((status & BD_ENET_RX_LAST) == 0)
++			printk("FEC ENET: rcv is not +last\n");
++
++		if (!fep->opened)
++			goto rx_processing_done;
++
++		/* Check for errors. */
++		if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
++			   BD_ENET_RX_CR | BD_ENET_RX_OV)) {
++			fep->stats.rx_errors++;
++			if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) {
++				/* Frame too long or too short. */
++				fep->stats.rx_length_errors++;
++			}
++			if (status & BD_ENET_RX_NO)	/* Frame alignment */
++				fep->stats.rx_frame_errors++;
++			if (status & BD_ENET_RX_CR)	/* CRC Error */
++				fep->stats.rx_crc_errors++;
++			if (status & BD_ENET_RX_OV)	/* FIFO overrun */
++				fep->stats.rx_fifo_errors++;
++		}
++
++		/* Report late collisions as a frame error.
++		 * On this error, the BD is closed, but we don't know what we
++		 * have in the buffer.  So, just drop this frame on the floor.
++		 */
++		if (status & BD_ENET_RX_CL) {
++			fep->stats.rx_errors++;
++			fep->stats.rx_frame_errors++;
++			goto rx_processing_done;
++		}
++
++		/* Process the incoming frame. */
++		fep->stats.rx_packets++;
++		pkt_len = bdp->cbd_datlen;
++		fep->stats.rx_bytes += pkt_len;
++		data = (__u8*)__va(bdp->cbd_bufaddr);
++
++		dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
++				FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE);
++
++		if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
++			swap_buffer(data, pkt_len);
++
++		/* This does 16 byte alignment, exactly what we need.
++		 * The packet length includes FCS, but we don't want to
++		 * include that when passing upstream as it messes up
++		 * bridging applications.
++		 */
++		skb = rtnetdev_alloc_rtskb(ndev, pkt_len - 4 + NET_IP_ALIGN); /* RTnet */
++
++		if (unlikely(!skb)) {
++			printk("%s: Memory squeeze, dropping packet.\n",
++					ndev->name);
++			fep->stats.rx_dropped++;
++		} else {
++			rtskb_reserve(skb, NET_IP_ALIGN);
++			rtskb_put(skb, pkt_len - 4);	/* Make room */
++			memcpy(skb->data, data, pkt_len - 4);
++			skb->protocol = rt_eth_type_trans(skb, ndev);
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++			(*packets)++; /* RTnet */
++		}
++
++		bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
++				FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE);
++rx_processing_done:
++		/* Clear the status flags for this buffer */
++		status &= ~BD_ENET_RX_STATS;
++
++		/* Mark the buffer empty */
++		status |= BD_ENET_RX_EMPTY;
++		bdp->cbd_sc = status;
++
++		/* Update BD pointer to next entry */
++		if (status & BD_ENET_RX_WRAP)
++			bdp = fep->rx_bd_base;
++		else
++			bdp++;
++		/* Doing this here will keep the FEC running while we process
++		 * incoming frames.  On a heavily loaded network, we should be
++		 * able to keep up at the expense of system resources.
++		 */
++		writel(0, fep->hwp + FEC_R_DES_ACTIVE);
++	}
++	fep->cur_rx = bdp;
++
++	rtdm_lock_put(&fep->hw_lock);
++}
++
++static int
++fec_enet_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *ndev =
++		rtdm_irq_get_arg(irq_handle, struct rtnet_device); /* RTnet */
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	uint int_events;
++	irqreturn_t ret = RTDM_IRQ_NONE;
++	/* RTnet */
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	int packets = 0;
++
++	do {
++		int_events = readl(fep->hwp + FEC_IEVENT);
++		writel(int_events, fep->hwp + FEC_IEVENT);
++
++		if (int_events & FEC_ENET_RXF) {
++			ret = RTDM_IRQ_HANDLED;
++			fec_enet_rx(ndev, &packets, &time_stamp);
++		}
++
++		/* Transmit OK, or non-fatal error. Update the buffer
++		 * descriptors. FEC handles all errors, we just discover
++		 * them as part of the transmit process.
++		 */
++		if (int_events & FEC_ENET_TXF) {
++			ret = RTDM_IRQ_HANDLED;
++			fec_enet_tx(ndev);
++		}
++
++		if (int_events & FEC_ENET_MII) {
++			ret = RTDM_IRQ_HANDLED;
++			rtdm_nrtsig_pend(&fep->mdio_done_sig);
++		}
++	} while (int_events);
++
++	if (packets > 0)
++		rt_mark_stack_mgr(ndev);
++
++	return ret;
++}
++
++
++
++/* ------------------------------------------------------------------------- */
++static void __inline__ fec_get_mac(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
++	unsigned char *iap, tmpaddr[ETH_ALEN];
++
++	/*
++	 * try to get mac address in following order:
++	 *
++	 * 1) module parameter via kernel command line in form
++	 *    fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0
++	 */
++	iap = macaddr;
++
++#ifdef CONFIG_OF
++	/*
++	 * 2) from device tree data
++	 */
++	if (!is_valid_ether_addr(iap)) {
++		struct device_node *np = fep->pdev->dev.of_node;
++		if (np) {
++			const char *mac = of_get_mac_address(np);
++			if (mac)
++				iap = (unsigned char *) mac;
++		}
++	}
++#endif
++
++	/*
++	 * 3) from flash or fuse (via platform data)
++	 */
++	if (!is_valid_ether_addr(iap)) {
++#ifdef CONFIG_M5272
++		if (FEC_FLASHMAC)
++			iap = (unsigned char *)FEC_FLASHMAC;
++#else
++		if (pdata)
++			iap = (unsigned char *)&pdata->mac;
++#endif
++	}
++
++	/*
++	 * 4) FEC mac registers set by bootloader
++	 */
++	if (!is_valid_ether_addr(iap)) {
++		*((unsigned long *) &tmpaddr[0]) =
++			be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
++		*((unsigned short *) &tmpaddr[4]) =
++			be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
++		iap = &tmpaddr[0];
++	}
++
++	memcpy(ndev->dev_addr, iap, ETH_ALEN);
++
++	/* Adjust MAC if using macaddr */
++	if (iap == macaddr)
++		 ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id;
++}
++
++/* ------------------------------------------------------------------------- */
++
++/*
++ * Phy section
++ */
++static void fec_enet_mdio_done(rtdm_nrtsig_t *nrt_sig, void* data)
++{
++	struct fec_enet_private *fep = data;
++
++	complete(&fep->mdio_done);
++}
++
++static void fec_enet_adjust_link(struct net_device *netdev)
++{
++	struct fec_enet_netdev_priv *npriv = netdev_priv(netdev);
++	struct rtnet_device *ndev = npriv->rtdev;
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct phy_device *phy_dev = fep->phy_dev;
++	unsigned long context;
++
++	int status_change = 0;
++
++	rtdm_lock_get_irqsave(&fep->hw_lock, context);
++
++	/* Prevent a state halted on mii error */
++	if (fep->mii_timeout && phy_dev->state == PHY_HALTED) {
++		phy_dev->state = PHY_RESUMING;
++		goto spin_unlock;
++	}
++
++	/* Duplex link change */
++	if (phy_dev->link) {
++		if (fep->full_duplex != phy_dev->duplex) {
++			fec_restart(ndev, phy_dev->duplex);
++			/* prevent unnecessary second fec_restart() below */
++			fep->link = phy_dev->link;
++			status_change = 1;
++		}
++	}
++
++	/* Link on or off change */
++	if (phy_dev->link != fep->link) {
++		fep->link = phy_dev->link;
++		if (phy_dev->link)
++			fec_restart(ndev, phy_dev->duplex);
++		else
++			fec_stop(ndev);
++		status_change = 1;
++	}
++
++spin_unlock:
++	rtdm_lock_put_irqrestore(&fep->hw_lock, context);
++
++	if (status_change)
++		phy_print_status(phy_dev);
++}
++
++static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
++{
++	struct fec_enet_private *fep = bus->priv;
++	unsigned long time_left;
++
++	fep->mii_timeout = 0;
++	init_completion(&fep->mdio_done);
++
++	/* start a read op */
++	writel(FEC_MMFR_ST | FEC_MMFR_OP_READ |
++		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
++		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
++
++	/* wait for end of transfer */
++	time_left = wait_for_completion_timeout(&fep->mdio_done,
++			usecs_to_jiffies(FEC_MII_TIMEOUT));
++	if (time_left == 0) {
++		fep->mii_timeout = 1;
++		printk(KERN_ERR "FEC: MDIO read timeout\n");
++		return -ETIMEDOUT;
++	}
++
++	/* return value */
++	return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA));
++}
++
++static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
++			   u16 value)
++{
++	struct fec_enet_private *fep = bus->priv;
++	unsigned long time_left;
++
++	fep->mii_timeout = 0;
++	init_completion(&fep->mdio_done);
++
++	/* start a write op */
++	writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE |
++		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
++		FEC_MMFR_TA | FEC_MMFR_DATA(value),
++		fep->hwp + FEC_MII_DATA);
++
++	/* wait for end of transfer */
++	time_left = wait_for_completion_timeout(&fep->mdio_done,
++			usecs_to_jiffies(FEC_MII_TIMEOUT));
++	if (time_left == 0) {
++		fep->mii_timeout = 1;
++		printk(KERN_ERR "FEC: MDIO write timeout\n");
++		return -ETIMEDOUT;
++	}
++
++	return 0;
++}
++
++static int fec_enet_mdio_reset(struct mii_bus *bus)
++{
++	return 0;
++}
++
++static int fec_enet_mii_probe(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	struct phy_device *phy_dev = NULL;
++	char mdio_bus_id[MII_BUS_ID_SIZE];
++	char phy_name[MII_BUS_ID_SIZE + 3];
++	int phy_id;
++	int dev_id = fep->dev_id;
++
++	fep->phy_dev = NULL;
++
++	/* check for attached phy */
++	for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
++		if ((fep->mii_bus->phy_mask & (1 << phy_id)))
++			continue;
++		if (fep->mii_bus->phy_map[phy_id] == NULL)
++			continue;
++		if (fep->mii_bus->phy_map[phy_id]->phy_id == 0)
++			continue;
++		if (dev_id--)
++			continue;
++		strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
++		break;
++	}
++
++	if (phy_id >= PHY_MAX_ADDR) {
++		printk(KERN_INFO
++			"%s: no PHY, assuming direct connection to switch\n",
++			ndev->name);
++		strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE);
++		phy_id = 0;
++	}
++
++	snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id);
++	/* attach the mac to the phy using the dummy linux netdev */
++	phy_dev = phy_connect(fep->netdev, phy_name, &fec_enet_adjust_link, 0,
++			      fep->phy_interface);
++	if (IS_ERR(phy_dev)) {
++		printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name);
++		return PTR_ERR(phy_dev);
++	}
++
++	/* mask with MAC supported features */
++	if (id_entry->driver_data & FEC_QUIRK_HAS_GBIT)
++		phy_dev->supported &= PHY_GBIT_FEATURES;
++	else
++		phy_dev->supported &= PHY_BASIC_FEATURES;
++
++	phy_dev->advertising = phy_dev->supported;
++
++	fep->phy_dev = phy_dev;
++	fep->link = 0;
++	fep->full_duplex = 0;
++
++	printk(KERN_INFO
++		"%s: Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
++		ndev->name,
++		fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev),
++		fep->phy_dev->irq);
++
++	return 0;
++}
++
++static int fec_enet_mii_init(struct platform_device *pdev)
++{
++	static struct mii_bus *fec0_mii_bus;
++	struct rtnet_device *ndev = platform_get_drvdata(pdev);
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	const struct platform_device_id *id_entry =
++				platform_get_device_id(fep->pdev);
++	int err = -ENXIO, i;
++
++	/*
++	 * The dual fec interfaces are not equivalent with enet-mac.
++	 * Here are the differences:
++	 *
++	 *  - fec0 supports MII & RMII modes while fec1 only supports RMII
++	 *  - fec0 acts as the 1588 time master while fec1 is slave
++	 *  - external phys can only be configured by fec0
++	 *
++	 * That is to say fec1 can not work independently. It only works
++	 * when fec0 is working. The reason behind this design is that the
++	 * second interface is added primarily for Switch mode.
++	 *
++	 * Because of the last point above, both phys are attached on fec0
++	 * mdio interface in board design, and need to be configured by
++	 * fec0 mii_bus.
++	 */
++	if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) {
++		/* fec1 uses fec0 mii_bus */
++		if (mii_cnt && fec0_mii_bus) {
++			fep->mii_bus = fec0_mii_bus;
++			mii_cnt++;
++			return 0;
++		}
++		return -ENOENT;
++	}
++
++	fep->mii_timeout = 0;
++
++	/*
++	 * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed)
++	 *
++	 * The formula for FEC MDC is 'ref_freq / (MII_SPEED x 2)' while
++	 * for ENET-MAC is 'ref_freq / ((MII_SPEED + 1) x 2)'.  The i.MX28
++	 * Reference Manual has an error on this, and gets fixed on i.MX6Q
++	 * document.
++	 */
++	fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000);
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
++		fep->phy_speed--;
++	fep->phy_speed <<= 1;
++	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
++
++	fep->mii_bus = mdiobus_alloc();
++	if (fep->mii_bus == NULL) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++
++	fep->mii_bus->name = "fec_enet_mii_bus";
++	fep->mii_bus->read = fec_enet_mdio_read;
++	fep->mii_bus->write = fec_enet_mdio_write;
++	fep->mii_bus->reset = fec_enet_mdio_reset;
++	snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
++		pdev->name, fep->dev_id + 1);
++	fep->mii_bus->priv = fep;
++	fep->mii_bus->parent = &pdev->dev;
++
++	fep->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
++	if (!fep->mii_bus->irq) {
++		err = -ENOMEM;
++		goto err_out_free_mdiobus;
++	}
++
++	for (i = 0; i < PHY_MAX_ADDR; i++)
++		fep->mii_bus->irq[i] = PHY_POLL;
++
++	rtdm_nrtsig_init(&fep->mdio_done_sig, fec_enet_mdio_done, fep);
++
++	if (mdiobus_register(fep->mii_bus))
++		goto err_out_destroy_nrt;
++
++	mii_cnt++;
++
++	/* save fec0 mii_bus */
++	if (id_entry->driver_data & FEC_QUIRK_ENET_MAC)
++		fec0_mii_bus = fep->mii_bus;
++
++	return 0;
++
++err_out_destroy_nrt:
++	rtdm_nrtsig_destroy(&fep->mdio_done_sig);
++	kfree(fep->mii_bus->irq);
++err_out_free_mdiobus:
++	mdiobus_free(fep->mii_bus);
++err_out:
++	return err;
++}
++
++static void fec_enet_mii_remove(struct fec_enet_private *fep)
++{
++	if (--mii_cnt == 0) {
++		mdiobus_unregister(fep->mii_bus);
++		kfree(fep->mii_bus->irq);
++		mdiobus_free(fep->mii_bus);
++	}
++	rtdm_nrtsig_destroy(&fep->mdio_done_sig);
++}
++
++static int
++fec_enet_ioctl(struct rtnet_device *ndev, unsigned int request, void *arg)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct phy_device *phydev = fep->phy_dev;
++	struct ifreq *ifr = arg;
++	struct ethtool_value *value;
++	struct ethtool_cmd cmd;
++	int err = 0;
++
++	if (!rtnetif_running(ndev))
++		return -EINVAL;
++
++	if (!phydev)
++		return -ENODEV;
++
++	switch (request) {
++	case SIOCETHTOOL:
++		value = (struct ethtool_value *)ifr->ifr_data;
++		switch (value->cmd) {
++		case ETHTOOL_GLINK:
++			value->data = fep->link;
++			if (copy_to_user(&value->data, &fep->link,
++					 sizeof(value->data)))
++				err = -EFAULT;
++			break;
++		case ETHTOOL_GSET:
++			memset(&cmd, 0, sizeof(cmd));
++			cmd.cmd = ETHTOOL_GSET;
++			err = phy_ethtool_gset(phydev, &cmd);
++			if (err)
++				break;
++			if (copy_to_user(ifr->ifr_data, &cmd, sizeof(cmd)))
++				err = -EFAULT;
++			break;
++		case ETHTOOL_SSET:
++			if (copy_from_user(&cmd, ifr->ifr_data, sizeof(cmd)))
++				err = -EFAULT;
++			else
++				err = phy_ethtool_sset(phydev, &cmd);
++			break;
++		}
++		break;
++	default:
++		err = -EOPNOTSUPP;
++		break;
++	}
++
++	return err;
++}
++
++static void fec_enet_free_buffers(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	int i;
++	struct rtskb *skb;
++	struct bufdesc	*bdp;
++
++	bdp = fep->rx_bd_base;
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		skb = fep->rx_skbuff[i];
++
++		if (bdp->cbd_bufaddr)
++			dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
++					FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);
++		if (skb)
++			dev_kfree_rtskb(skb); /* RTnet */
++		bdp++;
++	}
++
++	bdp = fep->tx_bd_base;
++	for (i = 0; i < TX_RING_SIZE; i++)
++		kfree(fep->tx_bounce[i]);
++}
++
++static int fec_enet_alloc_buffers(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	int i;
++	struct rtskb *skb;
++	struct bufdesc	*bdp;
++
++	bdp = fep->rx_bd_base;
++	for (i = 0; i < RX_RING_SIZE; i++) {
++		skb = rtnetdev_alloc_rtskb(netdev, FEC_ENET_RX_FRSIZE); /* RTnet */
++		if (!skb) {
++			fec_enet_free_buffers(ndev);
++			return -ENOMEM;
++		}
++		fep->rx_skbuff[i] = skb;
++
++		bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data,
++				FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);
++		bdp->cbd_sc = BD_ENET_RX_EMPTY;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap. */
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	bdp = fep->tx_bd_base;
++	for (i = 0; i < TX_RING_SIZE; i++) {
++		fep->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL);
++
++		bdp->cbd_sc = 0;
++		bdp->cbd_bufaddr = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap. */
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	return 0;
++}
++
++static int
++fec_enet_open(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	int ret;
++
++	/* I should reset the ring buffers here, but I don't yet know
++	 * a simple way to do that.
++	 */
++
++	ret = fec_enet_alloc_buffers(ndev);
++	if (ret)
++		return ret;
++
++	/* RTnet */
++	rt_stack_connect(ndev, &STACK_manager);
++
++	/* Probe and connect to PHY when open the interface */
++	ret = fec_enet_mii_probe(ndev);
++	if (ret) {
++		fec_enet_free_buffers(ndev);
++		return ret;
++	}
++	phy_start(fep->phy_dev);
++	rtnetif_carrier_on(ndev);
++	rtnetif_start_queue(ndev);
++	fep->opened = 1;
++	return 0;
++}
++
++static int
++fec_enet_close(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++
++	/* Don't know what to do yet. */
++	fep->opened = 0;
++	rtnetif_stop_queue(ndev);
++	fec_stop(ndev);
++
++	if (fep->phy_dev) {
++		phy_stop(fep->phy_dev);
++		phy_disconnect(fep->phy_dev);
++	}
++
++	fec_enet_free_buffers(ndev);
++
++	/* RTnet */
++	rt_stack_disconnect(ndev);
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_MULTICAST
++/* Set or clear the multicast filter for this adaptor.
++ * Skeleton taken from sunlance driver.
++ * The CPM Ethernet implementation allows Multicast as well as individual
++ * MAC address filtering.  Some of the drivers check to make sure it is
++ * a group multicast address, and discard those that are not.  I guess I
++ * will do the same for now, but just remove the test if you want
++ * individual filtering as well (do the upper net layers want or support
++ * this kind of feature?).
++ */
++
++#define HASH_BITS	6		/* #bits in hash */
++#define CRC32_POLY	0xEDB88320
++
++static void set_multicast_list(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct netdev_hw_addr *ha;
++	unsigned int i, bit, data, crc, tmp;
++	unsigned char hash;
++
++	if (ndev->flags & IFF_PROMISC) {
++		tmp = readl(fep->hwp + FEC_R_CNTRL);
++		tmp |= 0x8;
++		writel(tmp, fep->hwp + FEC_R_CNTRL);
++		return;
++	}
++
++	tmp = readl(fep->hwp + FEC_R_CNTRL);
++	tmp &= ~0x8;
++	writel(tmp, fep->hwp + FEC_R_CNTRL);
++
++	if (ndev->flags & IFF_ALLMULTI) {
++		/* Catch all multicast addresses, so set the
++		 * filter to all 1's
++		 */
++		writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
++		writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
++
++		return;
++	}
++
++	/* Clear filter and add the addresses in hash register
++	 */
++	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
++	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
++
++	rtnetdev_for_each_mc_addr(ha, ndev) {
++		/* calculate crc32 value of mac address */
++		crc = 0xffffffff;
++
++		for (i = 0; i < ndev->addr_len; i++) {
++			data = ha->addr[i];
++			for (bit = 0; bit < 8; bit++, data >>= 1) {
++				crc = (crc >> 1) ^
++				(((crc ^ data) & 1) ? CRC32_POLY : 0);
++			}
++		}
++
++		/* only upper 6 bits (HASH_BITS) are used
++		 * which point to specific bit in he hash registers
++		 */
++		hash = (crc >> (32 - HASH_BITS)) & 0x3f;
++
++		if (hash > 31) {
++			tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
++			tmp |= 1 << (hash - 32);
++			writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
++		} else {
++			tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW);
++			tmp |= 1 << hash;
++			writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
++		}
++	}
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_MULTICAST */
++
++#ifdef ORIGINAL_CODE
++/* Set a MAC change in hardware. */
++static int
++fec_set_mac_address(struct rtnet_device *ndev, void *p)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct sockaddr *addr = p;
++
++	if (!is_valid_ether_addr(addr->sa_data))
++		return -EADDRNOTAVAIL;
++
++	memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
++
++	writel(ndev->dev_addr[3] | (ndev->dev_addr[2] << 8) |
++		(ndev->dev_addr[1] << 16) | (ndev->dev_addr[0] << 24),
++		fep->hwp + FEC_ADDR_LOW);
++	writel((ndev->dev_addr[5] << 16) | (ndev->dev_addr[4] << 24),
++		fep->hwp + FEC_ADDR_HIGH);
++	return 0;
++}
++
++#ifdef CONFIG_NET_POLL_CONTROLLER
++/*
++ * fec_poll_controller: FEC Poll controller function
++ * @dev: The FEC network adapter
++ *
++ * Polled functionality used by netconsole and others in non interrupt mode
++ *
++ */
++void fec_poll_controller(struct rtnet_device *dev)
++{
++	int i;
++	struct fec_enet_private *fep = rtnetdev_priv(dev);
++
++	for (i = 0; i < FEC_IRQ_NUM; i++) {
++		if (fep->irq[i] > 0) {
++			disable_irq(fep->irq[i]);
++			fec_enet_interrupt(fep->irq[i], dev);
++			enable_irq(fep->irq[i]);
++		}
++	}
++}
++#endif /* ORIGINAL_CODE */
++
++static const struct rtnet_device_ops fec_netdev_ops = {
++	.ndo_open		= fec_enet_open,
++	.ndo_stop		= fec_enet_close,
++	.ndo_start_xmit		= fec_enet_start_xmit,
++	.ndo_set_rx_mode	= set_multicast_list,
++	.ndo_change_mtu		= eth_change_mtu,
++	.ndo_validate_addr	= eth_validate_addr,
++	.ndo_tx_timeout		= fec_timeout,
++	.ndo_set_mac_address	= fec_set_mac_address,
++#ifdef CONFIG_NET_POLL_CONTROLLER
++	.ndo_poll_controller	= fec_poll_controller,
++#endif
++};
++#endif
++
++/* RTnet: get statistics */
++static struct net_device_stats *fec_get_stats(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	return &fep->stats;
++}
++
++ /*
++  * XXX:  We need to clean up on failure exits here.
++  *
++  */
++static int fec_enet_init(struct rtnet_device *ndev)
++{
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct bufdesc *cbd_base;
++	struct bufdesc *bdp;
++	int i;
++
++	/* Allocate memory for buffer descriptors. */
++	cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma,
++			GFP_KERNEL);
++	if (!cbd_base) {
++		printk("FEC: allocate descriptor memory failed?\n");
++		return -ENOMEM;
++	}
++
++	rtdm_lock_init(&fep->hw_lock);
++
++	/* Get the Ethernet address */
++	fec_get_mac(ndev);
++
++	/* Set receive and transmit descriptor base. */
++	fep->rx_bd_base = cbd_base;
++	fep->tx_bd_base = cbd_base + RX_RING_SIZE;
++
++	/* RTnet: specific entries in the device structure */
++	ndev->open = fec_enet_open;
++	ndev->stop = fec_enet_close;
++	ndev->hard_start_xmit = fec_enet_start_xmit;
++	ndev->get_stats = fec_get_stats;
++	ndev->do_ioctl = fec_enet_ioctl;
++#ifdef CONFIG_XENO_DRIVERS_NET_MULTICAST
++	ndev->set_multicast_list = &set_multicast_list;
++#endif
++
++	/* Initialize the receive buffer descriptors. */
++	bdp = fep->rx_bd_base;
++	for (i = 0; i < RX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page. */
++		bdp->cbd_sc = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap */
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* ...and the same for transmit */
++	bdp = fep->tx_bd_base;
++	for (i = 0; i < TX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page. */
++		bdp->cbd_sc = 0;
++		bdp->cbd_bufaddr = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap */
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	fec_restart(ndev, 0);
++
++	return 0;
++}
++
++#ifdef CONFIG_OF
++static int fec_get_phy_mode_dt(struct platform_device *pdev)
++{
++	struct device_node *np = pdev->dev.of_node;
++
++	if (np)
++		return of_get_phy_mode(np);
++
++	return -ENODEV;
++}
++
++static void fec_reset_phy(struct platform_device *pdev)
++{
++	int err, phy_reset;
++	struct device_node *np = pdev->dev.of_node;
++
++	if (!np)
++		return;
++
++	phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0);
++	err = gpio_request_one(phy_reset, GPIOF_OUT_INIT_LOW, "phy-reset");
++	if (err) {
++		pr_debug("FEC: failed to get gpio phy-reset: %d\n", err);
++		return;
++	}
++	msleep(1);
++	gpio_set_value(phy_reset, 1);
++}
++#else /* CONFIG_OF */
++static inline int fec_get_phy_mode_dt(struct platform_device *pdev)
++{
++	return -ENODEV;
++}
++
++static inline void fec_reset_phy(struct platform_device *pdev)
++{
++	/*
++	 * In case of platform probe, the reset has been done
++	 * by machine code.
++	 */
++}
++#endif /* CONFIG_OF */
++
++static int fec_probe(struct platform_device *pdev)
++{
++	struct fec_enet_netdev_priv *npriv;
++	struct fec_enet_private *fep;
++	struct fec_platform_data *pdata;
++	struct rtnet_device *ndev;
++	int i, irq, ret = 0;
++	struct resource *r;
++	const struct of_device_id *of_id;
++	static int dev_id;
++	struct pinctrl *pinctrl;
++
++	of_id = of_match_device(fec_dt_ids, &pdev->dev);
++	if (of_id)
++		pdev->id_entry = of_id->data;
++
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	if (!r)
++		return -ENXIO;
++
++	r = request_mem_region(r->start, resource_size(r), pdev->name);
++	if (!r)
++		return -EBUSY;
++
++	/* Init network device */
++	ndev = rt_alloc_etherdev(sizeof(struct fec_enet_private),
++				rx_pool_size + TX_RING_SIZE);
++	if (!ndev) {
++		ret = -ENOMEM;
++		goto failed_alloc_etherdev;
++	}
++
++	/* RTnet */
++	rtdev_alloc_name(ndev, "rteth%d");
++	rt_rtdev_connect(ndev, &RTDEV_manager);
++	ndev->vers = RTDEV_VERS_2_0;
++	ndev->sysbind = &pdev->dev;
++
++	/* setup board info structure */
++	fep = rtnetdev_priv(ndev);
++	memset(fep, 0, sizeof(*fep));
++
++	/* RTnet: allocate dummy linux netdev structure for phy handling */
++	fep->netdev = alloc_etherdev(sizeof(struct fec_enet_netdev_priv));
++	if (!fep->netdev)
++		goto failed_alloc_netdev;
++	SET_NETDEV_DEV(fep->netdev, &pdev->dev);
++	npriv = netdev_priv(fep->netdev);
++	npriv->rtdev = ndev;
++
++	fep->hwp = ioremap(r->start, resource_size(r));
++	fep->pdev = pdev;
++	fep->dev_id = dev_id++;
++
++	if (!fep->hwp) {
++		ret = -ENOMEM;
++		goto failed_ioremap;
++	}
++
++	platform_set_drvdata(pdev, ndev);
++
++	ret = fec_get_phy_mode_dt(pdev);
++	if (ret < 0) {
++		pdata = pdev->dev.platform_data;
++		if (pdata)
++			fep->phy_interface = pdata->phy;
++		else
++			fep->phy_interface = PHY_INTERFACE_MODE_MII;
++	} else {
++		fep->phy_interface = ret;
++	}
++
++	fec_reset_phy(pdev);
++
++	for (i = 0; i < FEC_IRQ_NUM; i++) {
++		irq = platform_get_irq(pdev, i);
++		if (irq < 0) {
++			if (i)
++				break;
++			ret = irq;
++			goto failed_irq;
++		}
++		ret = rtdm_irq_request(&fep->irq_handle[i], irq,
++				       fec_enet_interrupt, 0, ndev->name, ndev);
++		if (ret) {
++			while (--i >= 0) {
++				irq = platform_get_irq(pdev, i);
++				rtdm_irq_free(&fep->irq_handle[i]);
++			}
++			goto failed_irq;
++		}
++	}
++
++	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
++	if (IS_ERR(pinctrl)) {
++		ret = PTR_ERR(pinctrl);
++		goto failed_pin;
++	}
++
++	fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
++	if (IS_ERR(fep->clk_ipg)) {
++		ret = PTR_ERR(fep->clk_ipg);
++		goto failed_clk;
++	}
++
++	fep->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
++	if (IS_ERR(fep->clk_ahb)) {
++		ret = PTR_ERR(fep->clk_ahb);
++		goto failed_clk;
++	}
++
++	clk_prepare_enable(fep->clk_ahb);
++	clk_prepare_enable(fep->clk_ipg);
++
++	ret = fec_enet_init(ndev);
++	if (ret)
++		goto failed_init;
++
++	ret = fec_enet_mii_init(pdev);
++	if (ret)
++		goto failed_mii_init;
++
++	/* Carrier starts down, phylib will bring it up */
++	rtnetif_carrier_off(ndev);
++
++	/* RTnet: register the network interface */
++	ret = rt_register_rtnetdev(ndev);
++	if (ret)
++		goto failed_register;
++
++	return 0;
++
++failed_register:
++	fec_enet_mii_remove(fep);
++failed_mii_init:
++failed_init:
++	clk_disable_unprepare(fep->clk_ahb);
++	clk_disable_unprepare(fep->clk_ipg);
++failed_pin:
++failed_clk:
++	for (i = 0; i < FEC_IRQ_NUM; i++) {
++		irq = platform_get_irq(pdev, i);
++		if (irq > 0)
++			rtdm_irq_free(&fep->irq_handle[i]);
++	}
++failed_irq:
++	iounmap(fep->hwp);
++failed_ioremap:
++	free_netdev(fep->netdev);
++failed_alloc_netdev:
++	rtdev_free(ndev); /* RTnet */
++failed_alloc_etherdev:
++	release_mem_region(r->start, resource_size(r));
++
++	return ret;
++}
++
++static int fec_drv_remove(struct platform_device *pdev)
++{
++	struct rtnet_device *ndev = platform_get_drvdata(pdev);
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++	struct resource *r;
++	int i;
++
++	/* RTnet */
++	rt_unregister_rtnetdev(ndev);
++	rt_rtdev_disconnect(ndev);
++
++	fec_enet_mii_remove(fep);
++	for (i = 0; i < FEC_IRQ_NUM; i++) {
++		int irq = platform_get_irq(pdev, i);
++		if (irq > 0)
++			rtdm_irq_free(&fep->irq_handle[i]);
++	}
++
++	clk_disable_unprepare(fep->clk_ahb);
++	clk_disable_unprepare(fep->clk_ipg);
++	iounmap(fep->hwp);
++
++	/* RTnet */
++	free_netdev(fep->netdev);
++	rtdev_free(ndev);
++
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	BUG_ON(!r);
++	release_mem_region(r->start, resource_size(r));
++
++	platform_set_drvdata(pdev, NULL);
++
++	return 0;
++}
++
++#ifdef CONFIG_PM
++static int
++fec_suspend(struct device *dev)
++{
++	struct rtnet_device *ndev = dev_get_drvdata(dev);
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++
++	if (rtnetif_running(ndev)) {
++		fec_stop(ndev);
++		rtnetif_device_detach(ndev);
++	}
++	clk_disable_unprepare(fep->clk_ahb);
++	clk_disable_unprepare(fep->clk_ipg);
++	return 0;
++}
++
++static int
++fec_resume(struct device *dev)
++{
++	struct rtnet_device *ndev = dev_get_drvdata(dev);
++	struct fec_enet_private *fep = rtnetdev_priv(ndev);
++
++	clk_prepare_enable(fep->clk_ahb);
++	clk_prepare_enable(fep->clk_ipg);
++	if (rtnetif_running(ndev)) {
++		fec_restart(ndev, fep->full_duplex);
++		rtnetif_device_attach(ndev);
++	}
++
++	return 0;
++}
++
++static const struct dev_pm_ops fec_pm_ops = {
++	.suspend	= fec_suspend,
++	.resume		= fec_resume,
++	.freeze		= fec_suspend,
++	.thaw		= fec_resume,
++	.poweroff	= fec_suspend,
++	.restore	= fec_resume,
++};
++#endif
++
++static struct platform_driver fec_driver = {
++	.driver	= {
++		.name	= DRIVER_NAME,
++		.owner	= THIS_MODULE,
++#ifdef CONFIG_PM
++		.pm	= &fec_pm_ops,
++#endif
++		.of_match_table = fec_dt_ids,
++	},
++	.id_table = fec_devtype,
++	.probe	= fec_probe,
++	.remove	= fec_drv_remove,
++};
++
++module_platform_driver(fec_driver);
+--- linux/drivers/xenomai/net/drivers/at91_ether.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/at91_ether.c	2021-04-07 16:01:27.248634136 +0800
+@@ -0,0 +1,453 @@
++/*
++ * Ethernet driver for the Atmel AT91RM9200 (Thunder)
++ *
++ *  Copyright (C) 2003 SAN People (Pty) Ltd
++ *
++ * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc.
++ * Initial version by Rick Bronson 01/11/2003
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ *
++ * RTnet port:
++ * Copyright (C) 2014 Gilles Chanteperdrix <gch@xenomai.org>
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/dma-mapping.h>
++#include <linux/ethtool.h>
++#include <linux/platform_data/macb.h>
++#include <linux/platform_device.h>
++#include <linux/clk.h>
++#include <linux/gfp.h>
++#include <linux/phy.h>
++#include <linux/io.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/of_net.h>
++
++#include <rtdev.h>
++#include <rtdm/net.h>
++#include <rtnet_port.h>
++#include <rtskb.h>
++#include "rt_macb.h"
++
++/* 1518 rounded up */
++#define MAX_RBUFF_SZ	0x600
++/* max number of receive buffers */
++#define MAX_RX_DESCR	9
++
++/* Initialize and start the Receiver and Transmit subsystems */
++static int at91ether_start(struct rtnet_device *dev)
++{
++	struct macb *lp = rtnetdev_priv(dev);
++	dma_addr_t addr;
++	u32 ctl;
++	int i;
++
++	lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
++					 (MAX_RX_DESCR *
++					  sizeof(struct macb_dma_desc)),
++					 &lp->rx_ring_dma, GFP_KERNEL);
++	if (!lp->rx_ring)
++		return -ENOMEM;
++
++	lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev,
++					    MAX_RX_DESCR * MAX_RBUFF_SZ,
++					    &lp->rx_buffers_dma, GFP_KERNEL);
++	if (!lp->rx_buffers) {
++		dma_free_coherent(&lp->pdev->dev,
++				  MAX_RX_DESCR * sizeof(struct macb_dma_desc),
++				  lp->rx_ring, lp->rx_ring_dma);
++		lp->rx_ring = NULL;
++		return -ENOMEM;
++	}
++
++	addr = lp->rx_buffers_dma;
++	for (i = 0; i < MAX_RX_DESCR; i++) {
++		lp->rx_ring[i].addr = addr;
++		lp->rx_ring[i].ctrl = 0;
++		addr += MAX_RBUFF_SZ;
++	}
++
++	/* Set the Wrap bit on the last descriptor */
++	lp->rx_ring[MAX_RX_DESCR - 1].addr |= MACB_BIT(RX_WRAP);
++
++	/* Reset buffer index */
++	lp->rx_tail = 0;
++
++	/* Program address of descriptor list in Rx Buffer Queue register */
++	macb_writel(lp, RBQP, lp->rx_ring_dma);
++
++	/* Enable Receive and Transmit */
++	ctl = macb_readl(lp, NCR);
++	macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE));
++
++	return 0;
++}
++
++/* Open the ethernet interface */
++static int at91ether_open(struct rtnet_device *dev)
++{
++	struct macb *lp = rtnetdev_priv(dev);
++	u32 ctl;
++	int ret;
++
++	rt_stack_connect(dev, &STACK_manager);
++
++	/* Clear internal statistics */
++	ctl = macb_readl(lp, NCR);
++	macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT));
++
++	rtmacb_set_hwaddr(lp);
++
++	ret = at91ether_start(dev);
++	if (ret)
++		return ret;
++
++	/* Enable MAC interrupts */
++	macb_writel(lp, IER, MACB_BIT(RCOMP)	|
++			     MACB_BIT(RXUBR)	|
++			     MACB_BIT(ISR_TUND)	|
++			     MACB_BIT(ISR_RLE)	|
++			     MACB_BIT(TCOMP)	|
++			     MACB_BIT(ISR_ROVR)	|
++			     MACB_BIT(HRESP));
++
++	/* schedule a link state check */
++	phy_start(lp->phy_dev);
++
++	rtnetif_start_queue(dev);
++
++	return 0;
++}
++
++/* Close the interface */
++static int at91ether_close(struct rtnet_device *dev)
++{
++	struct macb *lp = rtnetdev_priv(dev);
++	u32 ctl;
++
++	/* Disable Receiver and Transmitter */
++	ctl = macb_readl(lp, NCR);
++	macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE)));
++
++	/* Disable MAC interrupts */
++	macb_writel(lp, IDR, MACB_BIT(RCOMP)	|
++			     MACB_BIT(RXUBR)	|
++			     MACB_BIT(ISR_TUND)	|
++			     MACB_BIT(ISR_RLE)	|
++			     MACB_BIT(TCOMP)	|
++			     MACB_BIT(ISR_ROVR) |
++			     MACB_BIT(HRESP));
++
++	rtnetif_stop_queue(dev);
++
++	dma_free_coherent(&lp->pdev->dev,
++				MAX_RX_DESCR * sizeof(struct macb_dma_desc),
++				lp->rx_ring, lp->rx_ring_dma);
++	lp->rx_ring = NULL;
++
++	dma_free_coherent(&lp->pdev->dev,
++				MAX_RX_DESCR * MAX_RBUFF_SZ,
++				lp->rx_buffers, lp->rx_buffers_dma);
++	lp->rx_buffers = NULL;
++
++	rt_stack_disconnect(dev);
++
++	return 0;
++}
++
++/* Transmit packet */
++static int at91ether_start_xmit(struct rtskb *skb, struct rtnet_device *dev)
++{
++	struct macb *lp = rtnetdev_priv(dev);
++
++	if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
++		rtnetif_stop_queue(dev);
++
++		/* Store packet information (to free when Tx completed) */
++		lp->skb = skb;
++		lp->skb_length = skb->len;
++		lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len,
++							DMA_TO_DEVICE);
++
++		/* Set address of the data in the Transmit Address register */
++		macb_writel(lp, TAR, lp->skb_physaddr);
++		/* Set length of the packet in the Transmit Control register */
++		macb_writel(lp, TCR, skb->len);
++
++	} else {
++		rtdev_err(dev, "%s called, but device is busy!\n", __func__);
++		return RTDEV_TX_BUSY;
++	}
++
++	return RTDEV_TX_OK;
++}
++
++/* Extract received frame from buffer descriptors and sent to upper layers.
++ * (Called from interrupt context)
++ */
++static bool at91ether_rx(struct rtnet_device *dev, nanosecs_abs_t *time_stamp)
++{
++	struct macb *lp = rtnetdev_priv(dev);
++	unsigned char *p_recv;
++	struct rtskb *skb;
++	unsigned int pktlen;
++	bool ret = false;
++
++	while (lp->rx_ring[lp->rx_tail].addr & MACB_BIT(RX_USED)) {
++		p_recv = lp->rx_buffers + lp->rx_tail * MAX_RBUFF_SZ;
++		pktlen = MACB_BF(RX_FRMLEN, lp->rx_ring[lp->rx_tail].ctrl);
++		skb = rtnetdev_alloc_rtskb(dev, pktlen + 2);
++		if (skb) {
++			rtskb_reserve(skb, 2);
++			memcpy(rtskb_put(skb, pktlen), p_recv, pktlen);
++
++			skb->protocol = rt_eth_type_trans(skb, dev);
++			lp->stats.rx_packets++;
++			lp->stats.rx_bytes += pktlen;
++			ret = true;
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++		} else {
++			lp->stats.rx_dropped++;
++		}
++
++		if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH))
++			lp->stats.multicast++;
++
++		/* reset ownership bit */
++		lp->rx_ring[lp->rx_tail].addr &= ~MACB_BIT(RX_USED);
++
++		/* wrap after last buffer */
++		if (lp->rx_tail == MAX_RX_DESCR - 1)
++			lp->rx_tail = 0;
++		else
++			lp->rx_tail++;
++	}
++
++	return ret;
++}
++
++/* MAC interrupt handler */
++static int at91ether_interrupt(rtdm_irq_t *irq_handle)
++{
++	void *dev_id = rtdm_irq_get_arg(irq_handle, void);
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	struct rtnet_device *dev = dev_id;
++	struct macb *lp = rtnetdev_priv(dev);
++	u32 intstatus, ctl;
++
++	/* MAC Interrupt Status register indicates what interrupts are pending.
++	 * It is automatically cleared once read.
++	 */
++	intstatus = macb_readl(lp, ISR);
++
++	/* Receive complete */
++	if ((intstatus & MACB_BIT(RCOMP)) && at91ether_rx(dev, &time_stamp))
++		rt_mark_stack_mgr(dev);
++
++	/* Transmit complete */
++	if (intstatus & MACB_BIT(TCOMP)) {
++		/* The TCOM bit is set even if the transmission failed */
++		if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
++			lp->stats.tx_errors++;
++
++		if (lp->skb) {
++			dev_kfree_rtskb(lp->skb);
++			lp->skb = NULL;
++			dma_unmap_single(NULL, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE);
++			lp->stats.tx_packets++;
++			lp->stats.tx_bytes += lp->skb_length;
++		}
++		rtnetif_wake_queue(dev);
++	}
++
++	/* Work-around for EMAC Errata section 41.3.1 */
++	if (intstatus & MACB_BIT(RXUBR)) {
++		ctl = macb_readl(lp, NCR);
++		macb_writel(lp, NCR, ctl & ~MACB_BIT(RE));
++		macb_writel(lp, NCR, ctl | MACB_BIT(RE));
++	}
++
++	if (intstatus & MACB_BIT(ISR_ROVR))
++		rtdev_err(dev, "ROVR error\n");
++
++	return RTDM_IRQ_HANDLED;
++}
++
++#if defined(CONFIG_OF)
++static const struct of_device_id at91ether_dt_ids[] = {
++	{ .compatible = "cdns,at91rm9200-emac" },
++	{ .compatible = "cdns,emac" },
++	{ /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(of, at91ether_dt_ids);
++#endif
++
++/* Detect MAC & PHY and perform ethernet interface initialization */
++static int __init at91ether_probe(struct platform_device *pdev)
++{
++	struct macb_platform_data *board_data = dev_get_platdata(&pdev->dev);
++	struct resource *regs;
++	struct rtnet_device *dev;
++	struct phy_device *phydev;
++	struct macb *lp;
++	int res;
++	u32 reg;
++	const char *mac;
++
++	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	if (!regs)
++		return -ENOENT;
++
++	dev = rt_alloc_etherdev(sizeof(struct macb), MAX_RX_DESCR * 2 + 2);
++	if (!dev)
++		return -ENOMEM;
++
++	rtdev_alloc_name(dev, "rteth%d");
++	rt_rtdev_connect(dev, &RTDEV_manager);
++	dev->vers = RTDEV_VERS_2_0;
++	dev->sysbind = &pdev->dev;
++
++	lp = rtnetdev_priv(dev);
++	lp->pdev = pdev;
++	lp->dev = dev;
++	rtdm_lock_init(&lp->lock);
++
++	/* physical base address */
++	dev->base_addr = regs->start;
++	lp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
++	if (!lp->regs) {
++		res = -ENOMEM;
++		goto err_free_dev;
++	}
++
++	/* Clock */
++	lp->pclk = devm_clk_get(&pdev->dev, "ether_clk");
++	if (IS_ERR(lp->pclk)) {
++		res = PTR_ERR(lp->pclk);
++		goto err_free_dev;
++	}
++	clk_enable(lp->pclk);
++
++	lp->hclk = ERR_PTR(-ENOENT);
++	lp->tx_clk = ERR_PTR(-ENOENT);
++
++	/* Install the interrupt handler */
++	dev->irq = platform_get_irq(pdev, 0);
++	res = rtdm_irq_request(&lp->irq_handle, dev->irq, at91ether_interrupt, 0, dev->name, dev);
++	if (res)
++		goto err_disable_clock;
++
++	dev->open = at91ether_open;
++	dev->stop = at91ether_close;
++	dev->hard_start_xmit = at91ether_start_xmit;
++	dev->do_ioctl = rtmacb_ioctl;
++	dev->get_stats = rtmacb_get_stats;
++
++	platform_set_drvdata(pdev, dev);
++
++	mac = of_get_mac_address(pdev->dev.of_node);
++	if (mac)
++		memcpy(lp->dev->dev_addr, mac, ETH_ALEN);
++	else
++		rtmacb_get_hwaddr(lp);
++
++	res = of_get_phy_mode(pdev->dev.of_node);
++	if (res < 0) {
++		if (board_data && board_data->is_rmii)
++			lp->phy_interface = PHY_INTERFACE_MODE_RMII;
++		else
++			lp->phy_interface = PHY_INTERFACE_MODE_MII;
++	} else {
++		lp->phy_interface = res;
++	}
++
++	macb_writel(lp, NCR, 0);
++
++	reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG);
++	if (lp->phy_interface == PHY_INTERFACE_MODE_RMII)
++		reg |= MACB_BIT(RM9200_RMII);
++
++	macb_writel(lp, NCFGR, reg);
++
++	/* Register the network interface */
++	res = rt_register_rtnetdev(dev);
++	if (res)
++		goto err_irq_free;
++
++	res = rtmacb_mii_init(lp);
++	if (res)
++		goto err_out_unregister_netdev;
++
++	/* will be enabled in open() */
++	rtnetif_carrier_off(dev);
++
++	phydev = lp->phy_dev;
++	rtdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n",
++				phydev->drv->name, dev_name(&phydev->dev),
++				phydev->irq);
++
++	/* Display ethernet banner */
++	rtdev_info(dev, "AT91 ethernet at 0x%08lx int=%d (%pM)\n",
++				dev->base_addr, dev->irq, dev->dev_addr);
++
++	return 0;
++
++err_out_unregister_netdev:
++	rt_unregister_rtnetdev(dev);
++err_irq_free:
++	rtdm_irq_free(&lp->irq_handle);
++err_disable_clock:
++	clk_disable(lp->pclk);
++err_free_dev:
++	rtdev_free(dev);
++	return res;
++}
++
++static int at91ether_remove(struct platform_device *pdev)
++{
++	struct rtnet_device *dev = platform_get_drvdata(pdev);
++	struct macb *lp = rtnetdev_priv(dev);
++
++	if (lp->phy_dev)
++		phy_disconnect(lp->phy_dev);
++
++	mdiobus_unregister(lp->mii_bus);
++	if (lp->phy_phony_net_device)
++		free_netdev(lp->phy_phony_net_device);
++	kfree(lp->mii_bus->irq);
++	rt_rtdev_disconnect(dev);
++	rtdm_irq_free(&lp->irq_handle);
++	mdiobus_free(lp->mii_bus);
++	rt_unregister_rtnetdev(dev);
++	clk_disable(lp->pclk);
++	rtdev_free(dev);
++
++	return 0;
++}
++
++static struct platform_driver at91ether_driver = {
++	.remove		= at91ether_remove,
++	.driver		= {
++		.name	= "at91_ether",
++		.owner	= THIS_MODULE,
++		.of_match_table	= of_match_ptr(at91ether_dt_ids),
++	},
++};
++
++module_platform_driver_probe(at91ether_driver, at91ether_probe);
++
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("AT91RM9200 EMAC Ethernet driver");
++MODULE_AUTHOR("Andrew Victor");
++MODULE_ALIAS("platform:at91_ether");
+--- linux/drivers/xenomai/net/drivers/mpc8260_fcc_enet.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/mpc8260_fcc_enet.c	2021-04-07 16:01:27.244634142 +0800
+@@ -0,0 +1,2235 @@
++/*
++ * Fast Ethernet Controller (FCC) driver for Motorola MPC8260.
++ * Copyright (c) 2000 MontaVista Software, Inc.   Dan Malek (dmalek@jlc.net)
++ *
++ * This version of the driver is a combination of the 8xx fec and
++ * 8260 SCC Ethernet drivers.  This version has some additional
++ * configuration options, which should probably be moved out of
++ * here.  This driver currently works for the EST SBC8260,
++ * SBS Diablo/BCM, Embedded Planet RPX6, TQM8260, and others.
++ *
++ * Right now, I am very watseful with the buffers.  I allocate memory
++ * pages and then divide them into 2K frame buffers.  This way I know I
++ * have buffers large enough to hold one frame within one buffer descriptor.
++ * Once I get this working, I will use 64 or 128 byte CPM buffers, which
++ * will be much more memory efficient and will easily handle lots of
++ * small packets.  Since this is a cache coherent processor and CPM,
++ * I could also preallocate SKB's and use them directly on the interface.
++ *
++ * Ported to RTnet from "linuxppc_2_4_devel/arch/ppc/8260_io/fcc_enet.c".
++ * Copyright (c) 2003 Wolfgang Grandegger (wg@denx.de)
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++
++#include <linux/sched.h>
++#include <linux/string.h>
++#include <linux/ptrace.h>
++#include <linux/errno.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <linux/interrupt.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <linux/uaccess.h>
++
++#include <asm/immap_8260.h>
++#include <asm/pgtable.h>
++#include <asm/mpc8260.h>
++#include <asm/irq.h>
++#include <asm/bitops.h>
++#include <asm/cpm_8260.h>
++
++#ifdef CONFIG_XENO_DRIVERS_NET_USE_MDIO
++#error "MDIO for PHY configuration is not yet supported!"
++#endif
++
++#include <rtnet_port.h>
++
++MODULE_AUTHOR("Maintainer: Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTnet driver for the MPC8260 FCC Ethernet");
++MODULE_LICENSE("GPL");
++
++static unsigned int rx_pool_size =  0;
++MODULE_PARM(rx_pool_size, "i");
++MODULE_PARM_DESC(rx_pool_size, "Receive buffer pool size");
++
++static unsigned int rtnet_fcc = 1;
++MODULE_PARM(rtnet_fcc, "i");
++MODULE_PARM_DESC(rtnet_fcc, "FCCx port for RTnet (default=1)");
++
++#define RT_DEBUG(fmt,args...)
++
++/* The transmitter timeout
++ */
++#define TX_TIMEOUT	(2*HZ)
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++/* Forward declarations of some structures to support different PHYs */
++
++typedef struct {
++	uint mii_data;
++	void (*funct)(uint mii_reg, struct net_device *dev);
++} phy_cmd_t;
++
++typedef struct {
++	uint id;
++	char *name;
++
++	const phy_cmd_t *config;
++	const phy_cmd_t *startup;
++	const phy_cmd_t *ack_int;
++	const phy_cmd_t *shutdown;
++} phy_info_t;
++
++/* Register definitions for the PHY. */
++
++#define MII_REG_CR          0  /* Control Register                         */
++#define MII_REG_SR          1  /* Status Register                          */
++#define MII_REG_PHYIR1      2  /* PHY Identification Register 1            */
++#define MII_REG_PHYIR2      3  /* PHY Identification Register 2            */
++#define MII_REG_ANAR        4  /* A-N Advertisement Register               */
++#define MII_REG_ANLPAR      5  /* A-N Link Partner Ability Register        */
++#define MII_REG_ANER        6  /* A-N Expansion Register                   */
++#define MII_REG_ANNPTR      7  /* A-N Next Page Transmit Register          */
++#define MII_REG_ANLPRNPR    8  /* A-N Link Partner Received Next Page Reg. */
++
++/* values for phy_status */
++
++#define PHY_CONF_ANE	0x0001  /* 1 auto-negotiation enabled */
++#define PHY_CONF_LOOP	0x0002  /* 1 loopback mode enabled */
++#define PHY_CONF_SPMASK	0x00f0  /* mask for speed */
++#define PHY_CONF_10HDX	0x0010  /* 10 Mbit half duplex supported */
++#define PHY_CONF_10FDX	0x0020  /* 10 Mbit full duplex supported */
++#define PHY_CONF_100HDX	0x0040  /* 100 Mbit half duplex supported */
++#define PHY_CONF_100FDX	0x0080  /* 100 Mbit full duplex supported */
++
++#define PHY_STAT_LINK	0x0100  /* 1 up - 0 down */
++#define PHY_STAT_FAULT	0x0200  /* 1 remote fault */
++#define PHY_STAT_ANC	0x0400  /* 1 auto-negotiation complete	*/
++#define PHY_STAT_SPMASK	0xf000  /* mask for speed */
++#define PHY_STAT_10HDX	0x1000  /* 10 Mbit half duplex selected	*/
++#define PHY_STAT_10FDX	0x2000  /* 10 Mbit full duplex selected	*/
++#define PHY_STAT_100HDX	0x4000  /* 100 Mbit half duplex selected */
++#define PHY_STAT_100FDX	0x8000  /* 100 Mbit full duplex selected */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++/* The number of Tx and Rx buffers.  These are allocated from the page
++ * pool.  The code may assume these are power of two, so it is best
++ * to keep them that size.
++ * We don't need to allocate pages for the transmitter.  We just use
++ * the skbuffer directly.
++ */
++#define FCC_ENET_RX_PAGES	16
++#define FCC_ENET_RX_FRSIZE	2048
++#define FCC_ENET_RX_FRPPG	(PAGE_SIZE / FCC_ENET_RX_FRSIZE)
++#define RX_RING_SIZE		(FCC_ENET_RX_FRPPG * FCC_ENET_RX_PAGES)
++#define TX_RING_SIZE		16	/* Must be power of two */
++#define TX_RING_MOD_MASK	15	/*   for this to work */
++
++/* The FCC stores dest/src/type, data, and checksum for receive packets.
++ */
++#define PKT_MAXBUF_SIZE		1518
++#define PKT_MINBUF_SIZE		64
++
++/* Maximum input DMA size.  Must be a should(?) be a multiple of 4.
++*/
++#define PKT_MAXDMA_SIZE		1520
++
++/* Maximum input buffer size.  Must be a multiple of 32.
++*/
++#define PKT_MAXBLR_SIZE		1536
++
++static int  fcc_enet_open(struct rtnet_device *rtev);
++static int  fcc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++static int  fcc_enet_rx(struct rtnet_device *rtdev, int *packets, nanosecs_abs_t *time_stamp);
++static int fcc_enet_interrupt(rtdm_irq_t *irq_handle);
++static int  fcc_enet_close(struct rtnet_device *dev);
++
++static struct net_device_stats *fcc_enet_get_stats(struct rtnet_device *rtdev);
++#ifdef ORIGINAL_VERSION
++static void set_multicast_list(struct net_device *dev);
++static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
++#endif /* ORIGINAL_VERSION */
++
++static void fcc_restart(struct rtnet_device *rtdev, int duplex);
++
++/* These will be configurable for the FCC choice.
++ * Multiple ports can be configured.  There is little choice among the
++ * I/O pins to the PHY, except the clocks.  We will need some board
++ * dependent clock selection.
++ * Why in the hell did I put these inside #ifdef's?  I dunno, maybe to
++ * help show what pins are used for each device.
++ */
++
++/* I/O Pin assignment for FCC1.  I don't yet know the best way to do this,
++ * but there is little variation among the choices.
++ */
++#define PA1_COL		((uint)0x00000001)
++#define PA1_CRS		((uint)0x00000002)
++#define PA1_TXER	((uint)0x00000004)
++#define PA1_TXEN	((uint)0x00000008)
++#define PA1_RXDV	((uint)0x00000010)
++#define PA1_RXER	((uint)0x00000020)
++#define PA1_TXDAT	((uint)0x00003c00)
++#define PA1_RXDAT	((uint)0x0003c000)
++#define PA1_PSORA0	(PA1_RXDAT | PA1_TXDAT)
++#define PA1_PSORA1	(PA1_COL | PA1_CRS | PA1_TXER | PA1_TXEN | \
++				PA1_RXDV | PA1_RXER)
++#define PA1_DIRA0	(PA1_RXDAT | PA1_CRS | PA1_COL | PA1_RXER | PA1_RXDV)
++#define PA1_DIRA1	(PA1_TXDAT | PA1_TXEN | PA1_TXER)
++
++/* CLK12 is receive, CLK11 is transmit.  These are board specific.
++*/
++#define PC_F1RXCLK	((uint)0x00000800)
++#define PC_F1TXCLK	((uint)0x00000400)
++#if defined(CONFIG_PM826)
++#ifndef CONFIG_RTAI_RTNET_DB_CR826_J30x_ON
++#define CMX1_CLK_ROUTE  ((uint)0x35000000)
++#define CMX1_CLK_MASK   ((uint)0x7f000000)
++#else
++#define CMX1_CLK_ROUTE	((uint)0x37000000)
++#define CMX1_CLK_MASK	((uint)0x7f000000)
++#endif
++#elif defined(CONFIG_CPU86)
++#define CMX1_CLK_ROUTE  ((uint)0x37000000)
++#define CMX1_CLK_MASK   ((uint)0x7f000000)
++#else
++#define CMX1_CLK_ROUTE	((uint)0x3e000000)
++#define CMX1_CLK_MASK	((uint)0xff000000)
++#endif	/* CONFIG_PM826 */
++
++/* I/O Pin assignment for FCC2.  I don't yet know the best way to do this,
++ * but there is little variation among the choices.
++ */
++#define PB2_TXER	((uint)0x00000001)
++#define PB2_RXDV	((uint)0x00000002)
++#define PB2_TXEN	((uint)0x00000004)
++#define PB2_RXER	((uint)0x00000008)
++#define PB2_COL		((uint)0x00000010)
++#define PB2_CRS		((uint)0x00000020)
++#define PB2_TXDAT	((uint)0x000003c0)
++#define PB2_RXDAT	((uint)0x00003c00)
++#define PB2_PSORB0	(PB2_RXDAT | PB2_TXDAT | PB2_CRS | PB2_COL | \
++				PB2_RXER | PB2_RXDV | PB2_TXER)
++#define PB2_PSORB1	(PB2_TXEN)
++#define PB2_DIRB0	(PB2_RXDAT | PB2_CRS | PB2_COL | PB2_RXER | PB2_RXDV)
++#define PB2_DIRB1	(PB2_TXDAT | PB2_TXEN | PB2_TXER)
++
++/* CLK13 is receive, CLK14 is transmit.  These are board dependent.
++*/
++#define PC_F2RXCLK	((uint)0x00001000)
++#define PC_F2TXCLK	((uint)0x00002000)
++#define CMX2_CLK_ROUTE	((uint)0x00250000)
++#define CMX2_CLK_MASK	((uint)0x00ff0000)
++
++/* I/O Pin assignment for FCC3.  I don't yet know the best way to do this,
++ * but there is little variation among the choices.
++ */
++#define PB3_RXDV	((uint)0x00004000)
++#define PB3_RXER	((uint)0x00008000)
++#define PB3_TXER	((uint)0x00010000)
++#define PB3_TXEN	((uint)0x00020000)
++#define PB3_COL		((uint)0x00040000)
++#define PB3_CRS		((uint)0x00080000)
++#define PB3_TXDAT	((uint)0x0f000000)
++#define PB3_RXDAT	((uint)0x00f00000)
++#define PB3_PSORB0	(PB3_RXDAT | PB3_TXDAT | PB3_CRS | PB3_COL | \
++				PB3_RXER | PB3_RXDV | PB3_TXER | PB3_TXEN)
++#define PB3_PSORB1	(0)
++#define PB3_DIRB0	(PB3_RXDAT | PB3_CRS | PB3_COL | PB3_RXER | PB3_RXDV)
++#define PB3_DIRB1	(PB3_TXDAT | PB3_TXEN | PB3_TXER)
++
++/* CLK15 is receive, CLK16 is transmit.  These are board dependent.
++*/
++#ifdef CONFIG_IPHASE4539
++#define PC_F3RXCLK	((uint)0x00002000) /* CLK 14 is receive  */
++#define PC_F3TXCLK	((uint)0x00008000) /* CLK 16 is transmit */
++#define CMX3_CLK_ROUTE	((uint)0x00002f00)
++#define CMX3_CLK_MASK	((uint)0x00007f00)
++#else
++#define PC_F3RXCLK	((uint)0x00004000)
++#define PC_F3TXCLK	((uint)0x00008000)
++#define CMX3_CLK_ROUTE	((uint)0x00003700)
++#define CMX3_CLK_MASK	((uint)0x0000ff00)
++#endif
++
++/* MII status/control serial interface.
++*/
++#define IOP_PORT_OFF(f)	((uint)(&((iop8260_t *)0)->iop_p##f))
++#define IOP_PORT(x)	IOP_PORT_OFF(dir##x)
++
++#define IOP_DIR(b,p)	*((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(dira)-IOP_PORT_OFF(dira))))
++#define IOP_PAR(b,p)	*((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(para)-IOP_PORT_OFF(dira))))
++#define IOP_SOR(b,p)	*((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(sora)-IOP_PORT_OFF(dira))))
++#define IOP_ODR(b,p)	*((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(odra)-IOP_PORT_OFF(dira))))
++#define IOP_DAT(b,p)	*((uint*)((void*)(b)+(p)+(IOP_PORT_OFF(data)-IOP_PORT_OFF(dira))))
++
++#if defined(CONFIG_TQM8260)
++/* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */
++#define MII_MDIO		((uint)0x00000002)
++#define MII_MDCK		((uint)0x00000001)
++#elif defined (CONFIG_PM826)
++#ifndef CONFIG_RTAI_RTNET_DB_CR826_J30x_ON
++#define MII_MDIO		((uint)0x00000080) /* MDIO on PC24 */
++#define MII_MDCK		((uint)0x00000100) /* MDCK on PC23 */
++#else
++#define MII_MDIO		((uint)0x00000100) /* MDIO on PA23 */
++#define MII_MDCK		((uint)0x00000200) /* MDCK on PA22 */
++#define MII_PORT		IOP_PORT(a)
++#endif	/* CONFIG_RTAI_RTNET_DB_CR826_J30x_ON */
++#elif defined (CONFIG_IPHASE4539)
++#define MII_MDIO		((uint)0x00000080) /* MDIO on PC24 */
++#define MII_MDCK		((uint)0x00000100) /* MDCK on PC23 */
++#else
++#define MII_MDIO		((uint)0x00000004)
++#define MII_MDCK		((uint)0x00000100)
++#endif
++
++# if defined(CONFIG_TQM8260)
++#define MII_MDIO2		MII_MDIO
++#define MII_MDCK2		MII_MDCK
++#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260)
++#define MII_MDIO2		((uint)0x00400000)
++#define MII_MDCK2		((uint)0x00200000)
++#elif defined(CONFIG_PM826)
++#define MII_MDIO2		((uint)0x00000040) /* MDIO on PA25 */
++#define MII_MDCK2		((uint)0x00000080) /* MDCK on PA24 */
++#define MII_PORT2		IOP_PORT(a)
++#else
++#define MII_MDIO2		((uint)0x00000002)
++#define MII_MDCK2		((uint)0x00000080)
++#endif
++
++# if defined(CONFIG_TQM8260)
++#define MII_MDIO3		MII_MDIO
++#define MII_MDCK3		MII_MDCK
++#else
++#define MII_MDIO3		((uint)0x00000001)
++#define MII_MDCK3		((uint)0x00000040)
++#endif
++
++#ifndef MII_PORT
++#define MII_PORT		IOP_PORT(c)
++#endif
++
++#ifndef MII_PORT2
++#define MII_PORT2		IOP_PORT(c)
++#endif
++
++#ifndef MII_PORT3
++#define MII_PORT3		IOP_PORT(c)
++#endif
++
++/* A table of information for supporting FCCs.  This does two things.
++ * First, we know how many FCCs we have and they are always externally
++ * numbered from zero.  Second, it holds control register and I/O
++ * information that could be different among board designs.
++ */
++typedef struct fcc_info {
++	uint	fc_fccnum;
++	uint	fc_cpmblock;
++	uint	fc_cpmpage;
++	uint	fc_proff;
++	uint	fc_interrupt;
++	uint	fc_trxclocks;
++	uint	fc_clockroute;
++	uint	fc_clockmask;
++	uint	fc_mdio;
++	uint	fc_mdck;
++	uint	fc_port;
++	struct rtnet_device *rtdev;
++} fcc_info_t;
++
++static fcc_info_t fcc_ports[] = {
++	{ 0, CPM_CR_FCC1_SBLOCK, CPM_CR_FCC1_PAGE, PROFF_FCC1, SIU_INT_FCC1,
++		(PC_F1RXCLK | PC_F1TXCLK), CMX1_CLK_ROUTE, CMX1_CLK_MASK,
++		MII_MDIO, MII_MDCK, MII_PORT },
++	{ 1, CPM_CR_FCC2_SBLOCK, CPM_CR_FCC2_PAGE, PROFF_FCC2, SIU_INT_FCC2,
++		(PC_F2RXCLK | PC_F2TXCLK), CMX2_CLK_ROUTE, CMX2_CLK_MASK,
++		MII_MDIO2, MII_MDCK2, MII_PORT2 },
++	{ 2, CPM_CR_FCC3_SBLOCK, CPM_CR_FCC3_PAGE, PROFF_FCC3, SIU_INT_FCC3,
++		(PC_F3RXCLK | PC_F3TXCLK), CMX3_CLK_ROUTE, CMX3_CLK_MASK,
++		MII_MDIO3, MII_MDCK3, MII_PORT3 },
++};
++
++/* The FCC buffer descriptors track the ring buffers.  The rx_bd_base and
++ * tx_bd_base always point to the base of the buffer descriptors.  The
++ * cur_rx and cur_tx point to the currently available buffer.
++ * The dirty_tx tracks the current buffer that is being sent by the
++ * controller.  The cur_tx and dirty_tx are equal under both completely
++ * empty and completely full conditions.  The empty/ready indicator in
++ * the buffer descriptor determines the actual condition.
++ */
++struct fcc_enet_private {
++	/* The addresses of a Tx/Rx-in-place packets/buffers. */
++	struct	rtskb *tx_skbuff[TX_RING_SIZE];
++	ushort	skb_cur;
++	ushort	skb_dirty;
++
++	/* CPM dual port RAM relative addresses.
++	*/
++	cbd_t	*rx_bd_base;		/* Address of Rx and Tx buffers. */
++	cbd_t	*tx_bd_base;
++	cbd_t	*cur_rx, *cur_tx;		/* The next free ring entry */
++	cbd_t	*dirty_tx;	/* The ring entries to be free()ed. */
++	volatile fcc_t	*fccp;
++	volatile fcc_enet_t	*ep;
++	struct	net_device_stats stats;
++	uint	tx_full;
++	rtdm_lock_t lock;
++	rtdm_irq_t irq_handle;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	uint	phy_id;
++	uint	phy_id_done;
++	uint	phy_status;
++	phy_info_t	*phy;
++	struct tq_struct phy_task;
++
++	uint	sequence_done;
++
++	uint	phy_addr;
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	int	link;
++	int	old_link;
++	int	full_duplex;
++
++	fcc_info_t	*fip;
++};
++
++static void init_fcc_shutdown(fcc_info_t *fip, struct fcc_enet_private *cep,
++	volatile immap_t *immap);
++static void init_fcc_startup(fcc_info_t *fip, struct rtnet_device *rtdev);
++static void init_fcc_ioports(fcc_info_t *fip, volatile iop8260_t *io,
++	volatile immap_t *immap);
++static void init_fcc_param(fcc_info_t *fip, struct rtnet_device *rtdev,
++	volatile immap_t *immap);
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++static int	mii_queue(struct net_device *dev, int request, void (*func)(uint, struct net_device *));
++static uint	mii_send_receive(fcc_info_t *fip, uint cmd);
++
++static void	fcc_stop(struct net_device *dev);
++
++/* Make MII read/write commands for the FCC.
++*/
++#define mk_mii_read(REG)	(0x60020000 | ((REG & 0x1f) << 18))
++#define mk_mii_write(REG, VAL)	(0x50020000 | ((REG & 0x1f) << 18) | \
++						(VAL & 0xffff))
++#define mk_mii_end	0
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++
++static int
++fcc_enet_start_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct fcc_enet_private *cep = (struct fcc_enet_private *)rtdev->priv;
++	volatile cbd_t	*bdp;
++	rtdm_lockctx_t	context;
++
++	RT_DEBUG(__FUNCTION__": ...\n");
++
++	if (!cep->link) {
++		/* Link is down or autonegotiation is in progress. */
++		return 1;
++	}
++
++	/* Fill in a Tx ring entry */
++	bdp = cep->cur_tx;
++
++#ifndef final_version
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		/* Ooops.  All transmit buffers are full.  Bail out.
++		 * This should not happen, since cep->tx_full should be set.
++		 */
++		rtdm_printk("%s: tx queue full!.\n", rtdev->name);
++		return 1;
++	}
++#endif
++
++	/* Clear all of the status flags. */
++	bdp->cbd_sc &= ~BD_ENET_TX_STATS;
++
++	/* If the frame is short, tell CPM to pad it. */
++	if (skb->len <= ETH_ZLEN)
++		bdp->cbd_sc |= BD_ENET_TX_PAD;
++	else
++		bdp->cbd_sc &= ~BD_ENET_TX_PAD;
++
++	/* Set buffer length and buffer pointer. */
++	bdp->cbd_datlen = skb->len;
++	bdp->cbd_bufaddr = __pa(skb->data);
++
++	/* Save skb pointer. */
++	cep->tx_skbuff[cep->skb_cur] = skb;
++
++	cep->stats.tx_bytes += skb->len;
++	cep->skb_cur = (cep->skb_cur+1) & TX_RING_MOD_MASK;
++
++	rtdm_lock_get_irqsave(&cep->lock, context);
++
++	/* Get and patch time stamp just before the transmission */
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	/* Send it on its way.  Tell CPM its ready, interrupt when done,
++	 * its the last BD of the frame, and to put the CRC on the end.
++	 */
++	bdp->cbd_sc |= (BD_ENET_TX_READY | BD_ENET_TX_INTR | BD_ENET_TX_LAST | BD_ENET_TX_TC);
++
++#ifdef ORIGINAL_VERSION
++	dev->trans_start = jiffies;
++#endif
++
++	/* If this was the last BD in the ring, start at the beginning again. */
++	if (bdp->cbd_sc & BD_ENET_TX_WRAP)
++		bdp = cep->tx_bd_base;
++	else
++		bdp++;
++
++	if (bdp->cbd_sc & BD_ENET_TX_READY) {
++		rtnetif_stop_queue(rtdev);
++		cep->tx_full = 1;
++	}
++
++	cep->cur_tx = (cbd_t *)bdp;
++
++	rtdm_lock_put_irqrestore(&cep->lock, context);
++
++	return 0;
++}
++
++
++#ifdef ORIGINAL_VERSION
++static void
++fcc_enet_timeout(struct net_device *dev)
++{
++	struct fcc_enet_private *cep = (struct fcc_enet_private *)dev->priv;
++
++	printk("%s: transmit timed out.\n", dev->name);
++	cep->stats.tx_errors++;
++#ifndef final_version
++	{
++		int	i;
++		cbd_t	*bdp;
++		printk(" Ring data dump: cur_tx %p%s cur_rx %p.\n",
++		       cep->cur_tx, cep->tx_full ? " (full)" : "",
++		       cep->cur_rx);
++		bdp = cep->tx_bd_base;
++		printk(" Tx @base %p :\n", bdp);
++		for (i = 0 ; i < TX_RING_SIZE; i++, bdp++)
++			printk("%04x %04x %08x\n",
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++		bdp = cep->rx_bd_base;
++		printk(" Rx @base %p :\n", bdp);
++		for (i = 0 ; i < RX_RING_SIZE; i++, bdp++)
++			printk("%04x %04x %08x\n",
++			       bdp->cbd_sc,
++			       bdp->cbd_datlen,
++			       bdp->cbd_bufaddr);
++	}
++#endif
++	if (!cep->tx_full)
++		netif_wake_queue(dev);
++}
++#endif /* ORIGINAL_VERSION */
++
++/* The interrupt handler. */
++static int fcc_enet_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device);
++	int packets = 0;
++	struct	fcc_enet_private *cep;
++	volatile cbd_t	*bdp;
++	ushort	int_events;
++	int	must_restart;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++
++	cep = (struct fcc_enet_private *)rtdev->priv;
++
++	/* Get the interrupt events that caused us to be here.
++	*/
++	int_events = cep->fccp->fcc_fcce;
++	cep->fccp->fcc_fcce = int_events;
++	must_restart = 0;
++
++	/* Handle receive event in its own function.
++	*/
++	if (int_events & FCC_ENET_RXF) {
++		fcc_enet_rx(rtdev, &packets, &time_stamp);
++	}
++
++	/* Check for a transmit error.  The manual is a little unclear
++	 * about this, so the debug code until I get it figured out.  It
++	 * appears that if TXE is set, then TXB is not set.  However,
++	 * if carrier sense is lost during frame transmission, the TXE
++	 * bit is set, "and continues the buffer transmission normally."
++	 * I don't know if "normally" implies TXB is set when the buffer
++	 * descriptor is closed.....trial and error :-).
++	 */
++
++	/* Transmit OK, or non-fatal error.  Update the buffer descriptors.
++	*/
++	if (int_events & (FCC_ENET_TXE | FCC_ENET_TXB)) {
++	    rtdm_lock_get(&cep->lock);
++	    bdp = cep->dirty_tx;
++	    while ((bdp->cbd_sc&BD_ENET_TX_READY)==0) {
++		if ((bdp==cep->cur_tx) && (cep->tx_full == 0))
++		    break;
++
++		if (bdp->cbd_sc & BD_ENET_TX_HB)	/* No heartbeat */
++			cep->stats.tx_heartbeat_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_LC)	/* Late collision */
++			cep->stats.tx_window_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_RL)	/* Retrans limit */
++			cep->stats.tx_aborted_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_UN)	/* Underrun */
++			cep->stats.tx_fifo_errors++;
++		if (bdp->cbd_sc & BD_ENET_TX_CSL)	/* Carrier lost */
++			cep->stats.tx_carrier_errors++;
++
++
++		/* No heartbeat or Lost carrier are not really bad errors.
++		 * The others require a restart transmit command.
++		 */
++		if (bdp->cbd_sc &
++		    (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) {
++			must_restart = 1;
++			cep->stats.tx_errors++;
++		}
++
++		cep->stats.tx_packets++;
++
++		/* Deferred means some collisions occurred during transmit,
++		 * but we eventually sent the packet OK.
++		 */
++		if (bdp->cbd_sc & BD_ENET_TX_DEF)
++			cep->stats.collisions++;
++
++		/* Free the sk buffer associated with this last transmit. */
++		dev_kfree_rtskb(cep->tx_skbuff[cep->skb_dirty]);
++		cep->skb_dirty = (cep->skb_dirty + 1) & TX_RING_MOD_MASK;
++
++		/* Update pointer to next buffer descriptor to be transmitted. */
++		if (bdp->cbd_sc & BD_ENET_TX_WRAP)
++			bdp = cep->tx_bd_base;
++		else
++			bdp++;
++
++		/* I don't know if we can be held off from processing these
++		 * interrupts for more than one frame time.  I really hope
++		 * not.  In such a case, we would now want to check the
++		 * currently available BD (cur_tx) and determine if any
++		 * buffers between the dirty_tx and cur_tx have also been
++		 * sent.  We would want to process anything in between that
++		 * does not have BD_ENET_TX_READY set.
++		 */
++
++		/* Since we have freed up a buffer, the ring is no longer
++		 * full.
++		 */
++		if (cep->tx_full) {
++			cep->tx_full = 0;
++			if (rtnetif_queue_stopped(rtdev))
++				rtnetif_wake_queue(rtdev);
++		}
++
++		cep->dirty_tx = (cbd_t *)bdp;
++	    }
++
++	    if (must_restart) {
++		volatile cpm8260_t *cp;
++
++		/* Some transmit errors cause the transmitter to shut
++		 * down.  We now issue a restart transmit.  Since the
++		 * errors close the BD and update the pointers, the restart
++		 * _should_ pick up without having to reset any of our
++		 * pointers either.  Also, To workaround 8260 device erratum
++		 * CPM37, we must disable and then re-enable the transmitter
++		 * following a Late Collision, Underrun, or Retry Limit error.
++		 */
++		cep->fccp->fcc_gfmr &= ~FCC_GFMR_ENT;
++#ifdef ORIGINAL_VERSION
++		udelay(10); /* wait a few microseconds just on principle */
++#endif
++		cep->fccp->fcc_gfmr |=  FCC_GFMR_ENT;
++
++		cp = cpmp;
++		cp->cp_cpcr =
++		    mk_cr_cmd(cep->fip->fc_cpmpage, cep->fip->fc_cpmblock,
++				0x0c, CPM_CR_RESTART_TX) | CPM_CR_FLG;
++		while (cp->cp_cpcr & CPM_CR_FLG); // looks suspicious - how long may it take?
++	    }
++	    rtdm_lock_put(&cep->lock);
++	}
++
++	/* Check for receive busy, i.e. packets coming but no place to
++	 * put them.
++	 */
++	if (int_events & FCC_ENET_BSY) {
++		cep->stats.rx_dropped++;
++	}
++
++	if (packets > 0)
++		rt_mark_stack_mgr(rtdev);
++	return RTDM_IRQ_HANDLED;
++}
++
++/* During a receive, the cur_rx points to the current incoming buffer.
++ * When we update through the ring, if the next incoming buffer has
++ * not been given to the system, we just set the empty indicator,
++ * effectively tossing the packet.
++ */
++static int
++fcc_enet_rx(struct rtnet_device *rtdev, int* packets, nanosecs_abs_t *time_stamp)
++{
++	struct	fcc_enet_private *cep;
++	volatile cbd_t	*bdp;
++	struct	rtskb *skb;
++	ushort	pkt_len;
++
++	RT_DEBUG(__FUNCTION__": ...\n");
++
++	cep = (struct fcc_enet_private *)rtdev->priv;
++
++	/* First, grab all of the stats for the incoming packet.
++	 * These get messed up if we get called due to a busy condition.
++	 */
++	bdp = cep->cur_rx;
++
++for (;;) {
++	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
++		break;
++
++#ifndef final_version
++	/* Since we have allocated space to hold a complete frame, both
++	 * the first and last indicators should be set.
++	 */
++	if ((bdp->cbd_sc & (BD_ENET_RX_FIRST | BD_ENET_RX_LAST)) !=
++		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
++			rtdm_printk("CPM ENET: rcv is not first+last\n");
++#endif
++
++	/* Frame too long or too short. */
++	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
++		cep->stats.rx_length_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
++		cep->stats.rx_frame_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
++		cep->stats.rx_crc_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
++		cep->stats.rx_crc_errors++;
++	if (bdp->cbd_sc & BD_ENET_RX_CL)	/* Late Collision */
++		cep->stats.rx_frame_errors++;
++
++	if (!(bdp->cbd_sc &
++	      (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | BD_ENET_RX_CR
++	       | BD_ENET_RX_OV | BD_ENET_RX_CL)))
++	{
++		/* Process the incoming frame. */
++		cep->stats.rx_packets++;
++
++		/* Remove the FCS from the packet length. */
++		pkt_len = bdp->cbd_datlen - 4;
++		cep->stats.rx_bytes += pkt_len;
++
++		/* This does 16 byte alignment, much more than we need. */
++		skb = rtnetdev_alloc_rtskb(rtdev, pkt_len);
++
++		if (skb == NULL) {
++			rtdm_printk("%s: Memory squeeze, dropping packet.\n", rtdev->name);
++			cep->stats.rx_dropped++;
++		}
++		else {
++			rtskb_put(skb,pkt_len); /* Make room */
++			memcpy(skb->data,
++			       (unsigned char *)__va(bdp->cbd_bufaddr),
++			       pkt_len);
++			skb->protocol=rt_eth_type_trans(skb,rtdev);
++			skb->time_stamp = *time_stamp;
++			rtnetif_rx(skb);
++			(*packets)++;
++		}
++	}
++
++	/* Clear the status flags for this buffer. */
++	bdp->cbd_sc &= ~BD_ENET_RX_STATS;
++
++	/* Mark the buffer empty. */
++	bdp->cbd_sc |= BD_ENET_RX_EMPTY;
++
++	/* Update BD pointer to next entry. */
++	if (bdp->cbd_sc & BD_ENET_RX_WRAP)
++		bdp = cep->rx_bd_base;
++	else
++		bdp++;
++
++   }
++	cep->cur_rx = (cbd_t *)bdp;
++
++	return 0;
++}
++
++static int
++fcc_enet_close(struct rtnet_device *rtdev)
++{
++	/* Don't know what to do yet. */
++	rtnetif_stop_queue(rtdev);
++
++	return 0;
++}
++
++static struct net_device_stats *fcc_enet_get_stats(struct rtnet_device *rtdev)
++{
++	struct fcc_enet_private *cep = (struct fcc_enet_private *)rtdev->priv;
++
++	return &cep->stats;
++}
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++
++/* NOTE: Most of the following comes from the FEC driver for 860. The
++ * overall structure of MII code has been retained (as it's proved stable
++ * and well-tested), but actual transfer requests are processed "at once"
++ * instead of being queued (there's no interrupt-driven MII transfer
++ * mechanism, one has to toggle the data/clock bits manually).
++ */
++static int
++mii_queue(struct net_device *dev, int regval, void (*func)(uint, struct net_device *))
++{
++	struct fcc_enet_private *fep;
++	int		retval, tmp;
++
++	/* Add PHY address to register command. */
++	fep = dev->priv;
++	regval |= fep->phy_addr << 23;
++
++	retval = 0;
++
++	tmp = mii_send_receive(fep->fip, regval);
++	if (func)
++		func(tmp, dev);
++
++	return retval;
++}
++
++static void mii_do_cmd(struct net_device *dev, const phy_cmd_t *c)
++{
++	int k;
++
++	if(!c)
++		return;
++
++	for(k = 0; (c+k)->mii_data != mk_mii_end; k++)
++		mii_queue(dev, (c+k)->mii_data, (c+k)->funct);
++}
++
++static void mii_parse_sr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_LINK | PHY_STAT_FAULT | PHY_STAT_ANC);
++
++	if (mii_reg & 0x0004)
++		s |= PHY_STAT_LINK;
++	if (mii_reg & 0x0010)
++		s |= PHY_STAT_FAULT;
++	if (mii_reg & 0x0020)
++		s |= PHY_STAT_ANC;
++
++	fep->phy_status = s;
++	fep->link = (s & PHY_STAT_LINK) ? 1 : 0;
++}
++
++static void mii_parse_cr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_CONF_ANE | PHY_CONF_LOOP);
++
++	if (mii_reg & 0x1000)
++		s |= PHY_CONF_ANE;
++	if (mii_reg & 0x4000)
++		s |= PHY_CONF_LOOP;
++
++	fep->phy_status = s;
++}
++
++static void mii_parse_anar(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_CONF_SPMASK);
++
++	if (mii_reg & 0x0020)
++		s |= PHY_CONF_10HDX;
++	if (mii_reg & 0x0040)
++		s |= PHY_CONF_10FDX;
++	if (mii_reg & 0x0080)
++		s |= PHY_CONF_100HDX;
++	if (mii_reg & 0x00100)
++		s |= PHY_CONF_100FDX;
++
++	fep->phy_status = s;
++}
++
++/* Some boards don't have the MDIRQ line connected (PM826 is such a board) */
++
++static void mii_waitfor_anc(uint mii_reg, struct net_device *dev)
++{
++	struct fcc_enet_private *fep;
++	int regval;
++	int i;
++
++	fep = dev->priv;
++	regval = mk_mii_read(MII_REG_SR) | (fep->phy_addr << 23);
++
++	for (i = 0; i < 1000; i++)
++	{
++		if (mii_send_receive(fep->fip, regval) & 0x20)
++			return;
++		udelay(10000);
++	}
++
++	printk("%s: autonegotiation timeout\n", dev->name);
++}
++
++/* ------------------------------------------------------------------------- */
++/* The Level one LXT970 is used by many boards				     */
++
++#ifdef CONFIG_FCC_LXT970
++
++#define MII_LXT970_MIRROR    16  /* Mirror register           */
++#define MII_LXT970_IER       17  /* Interrupt Enable Register */
++#define MII_LXT970_ISR       18  /* Interrupt Status Register */
++#define MII_LXT970_CONFIG    19  /* Configuration Register    */
++#define MII_LXT970_CSR       20  /* Chip Status Register      */
++
++static void mii_parse_lxt970_csr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x0800) {
++		if (mii_reg & 0x1000)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	} else {
++		if (mii_reg & 0x1000)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_lxt970 = {
++	0x07810000,
++	"LXT970",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_LXT970_IER, 0x0002), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* read SR and ISR to acknowledge */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_LXT970_ISR), NULL },
++
++		/* find out the current status */
++
++		{ mk_mii_read(MII_LXT970_CSR), mii_parse_lxt970_csr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_LXT970_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_LXT970 */
++
++/* ------------------------------------------------------------------------- */
++/* The Level one LXT971 is used on some of my custom boards                  */
++
++#ifdef CONFIG_FCC_LXT971
++
++/* register definitions for the 971 */
++
++#define MII_LXT971_PCR       16  /* Port Control Register     */
++#define MII_LXT971_SR2       17  /* Status Register 2         */
++#define MII_LXT971_IER       18  /* Interrupt Enable Register */
++#define MII_LXT971_ISR       19  /* Interrupt Status Register */
++#define MII_LXT971_LCR       20  /* LED Control Register      */
++#define MII_LXT971_TCR       30  /* Transmit Control Register */
++
++/*
++ * I had some nice ideas of running the MDIO faster...
++ * The 971 should support 8MHz and I tried it, but things acted really
++ * weird, so 2.5 MHz ought to be enough for anyone...
++ */
++
++static void mii_parse_lxt971_sr2(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x4000) {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	} else {
++		if (mii_reg & 0x0200)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++	if (mii_reg & 0x0008)
++		s |= PHY_STAT_FAULT;
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_lxt971 = {
++	0x0001378e,
++	"LXT971",
++
++	(const phy_cmd_t []) {  /* config */
++//		{ mk_mii_write(MII_REG_ANAR, 0x021), NULL }, /* 10  Mbps, HD */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x00f2), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++
++		/* Somehow does the 971 tell me that the link is down
++		 * the first read after power-up.
++		 * read here to get a valid value in ack_int */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++#ifdef	CONFIG_PM826
++		{ mk_mii_read(MII_REG_SR), mii_waitfor_anc },
++#endif
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* find out the current status */
++
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_LXT971_SR2), mii_parse_lxt971_sr2 },
++
++		/* we only need to read ISR to acknowledge */
++
++		{ mk_mii_read(MII_LXT971_ISR), NULL },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_LXT971_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FEC_LXT971 */
++
++
++/* ------------------------------------------------------------------------- */
++/* The Quality Semiconductor QS6612 is used on the RPX CLLF                  */
++
++#ifdef CONFIG_FCC_QS6612
++
++/* register definitions */
++
++#define MII_QS6612_MCR       17  /* Mode Control Register      */
++#define MII_QS6612_FTR       27  /* Factory Test Register      */
++#define MII_QS6612_MCO       28  /* Misc. Control Register     */
++#define MII_QS6612_ISR       29  /* Interrupt Source Register  */
++#define MII_QS6612_IMR       30  /* Interrupt Mask Register    */
++#define MII_QS6612_PCR       31  /* 100BaseTx PHY Control Reg. */
++
++static void mii_parse_qs6612_pcr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	switch((mii_reg >> 2) & 7) {
++	case 1: s |= PHY_STAT_10HDX;  break;
++	case 2: s |= PHY_STAT_100HDX; break;
++	case 5: s |= PHY_STAT_10FDX;  break;
++	case 6: s |= PHY_STAT_100FDX; break;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_qs6612 = {
++	0x00181440,
++	"QS6612",
++
++	(const phy_cmd_t []) {  /* config */
++//	{ mk_mii_write(MII_REG_ANAR, 0x061), NULL }, /* 10  Mbps */
++
++		/* The PHY powers up isolated on the RPX,
++		 * so send a command to allow operation.
++		 */
++
++		{ mk_mii_write(MII_QS6612_PCR, 0x0dc0), NULL },
++
++		/* parse cr and anar to get some info */
++
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_QS6612_IMR, 0x003a), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++
++		/* we need to read ISR, SR and ANER to acknowledge */
++
++		{ mk_mii_read(MII_QS6612_ISR), NULL },
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_ANER), NULL },
++
++		/* read pcr to get info */
++
++		{ mk_mii_read(MII_QS6612_PCR), mii_parse_qs6612_pcr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_QS6612_IMR, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++
++#endif /* CONFIG_FCC_QS6612 */
++
++/* ------------------------------------------------------------------------- */
++/* The AMD Am79C873 PHY is on PM826				*/
++
++#ifdef CONFIG_FCC_AMD79C873
++
++#define MII_79C873_IER       17  /* Interrupt Enable Register */
++#define MII_79C873_DR        18  /* Diagnostic Register       */
++
++static void mii_parse_79c873_cr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	if (mii_reg & 0x2000) {
++		if (mii_reg & 0x0100)
++			s |= PHY_STAT_100FDX;
++		else
++			s |= PHY_STAT_100HDX;
++	} else {
++		if (mii_reg & 0x0100)
++			s |= PHY_STAT_10FDX;
++		else
++			s |= PHY_STAT_10HDX;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_79c873 = {
++	0x00181b80,
++	"AMD79C873",
++
++	(const phy_cmd_t []) {  /* config */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup */
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++#ifdef	CONFIG_PM826
++		{ mk_mii_read(MII_REG_SR), mii_waitfor_anc },
++#endif
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++		/* read SR twice: to acknowledge and to get link status */
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++
++		/* find out the current link parameters */
++
++		{ mk_mii_read(MII_REG_CR), mii_parse_79c873_cr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_79C873_IER, 0x0000), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++#endif /* CONFIG_FCC_AMD79C873 */
++
++
++/* ------------------------------------------------------------------------- */
++/* The Davicom DM9131 is used on the HYMOD board			     */
++
++#ifdef CONFIG_FCC_DM9131
++
++/* register definitions */
++
++#define MII_DM9131_ACR		16	/* Aux. Config Register		*/
++#define MII_DM9131_ACSR		17	/* Aux. Config/Status Register	*/
++#define MII_DM9131_10TCSR	18	/* 10BaseT Config/Status Reg.	*/
++#define MII_DM9131_INTR		21	/* Interrupt Register		*/
++#define MII_DM9131_RECR		22	/* Receive Error Counter Reg.	*/
++#define MII_DM9131_DISCR	23	/* Disconnect Counter Register	*/
++
++static void mii_parse_dm9131_acsr(uint mii_reg, struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	s &= ~(PHY_STAT_SPMASK);
++
++	switch ((mii_reg >> 12) & 0xf) {
++	case 1: s |= PHY_STAT_10HDX;  break;
++	case 2: s |= PHY_STAT_10FDX;  break;
++	case 4: s |= PHY_STAT_100HDX; break;
++	case 8: s |= PHY_STAT_100FDX; break;
++	}
++
++	fep->phy_status = s;
++}
++
++static phy_info_t phy_info_dm9131 = {
++	0x00181b80,
++	"DM9131",
++
++	(const phy_cmd_t []) {  /* config */
++		/* parse cr and anar to get some info */
++		{ mk_mii_read(MII_REG_CR), mii_parse_cr },
++		{ mk_mii_read(MII_REG_ANAR), mii_parse_anar },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* startup - enable interrupts */
++		{ mk_mii_write(MII_DM9131_INTR, 0x0002), NULL },
++		{ mk_mii_write(MII_REG_CR, 0x1200), NULL }, /* autonegotiate */
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) { /* ack_int */
++
++		/* we need to read INTR, SR and ANER to acknowledge */
++
++		{ mk_mii_read(MII_DM9131_INTR), NULL },
++		{ mk_mii_read(MII_REG_SR), mii_parse_sr },
++		{ mk_mii_read(MII_REG_ANER), NULL },
++
++		/* read acsr to get info */
++
++		{ mk_mii_read(MII_DM9131_ACSR), mii_parse_dm9131_acsr },
++		{ mk_mii_end, }
++	},
++	(const phy_cmd_t []) {  /* shutdown - disable interrupts */
++		{ mk_mii_write(MII_DM9131_INTR, 0x0f00), NULL },
++		{ mk_mii_end, }
++	},
++};
++
++
++#endif /* CONFIG_FEC_DM9131 */
++
++
++static phy_info_t *phy_info[] = {
++
++#ifdef CONFIG_FCC_LXT970
++	&phy_info_lxt970,
++#endif /* CONFIG_FCC_LXT970 */
++
++#ifdef CONFIG_FCC_LXT971
++	&phy_info_lxt971,
++#endif /* CONFIG_FCC_LXT971 */
++
++#ifdef CONFIG_FCC_QS6612
++	&phy_info_qs6612,
++#endif /* CONFIG_FCC_QS6612 */
++
++#ifdef CONFIG_FCC_DM9131
++	&phy_info_dm9131,
++#endif /* CONFIG_FCC_DM9131 */
++
++#ifdef CONFIG_FCC_AMD79C873
++	&phy_info_79c873,
++#endif /* CONFIG_FCC_AMD79C873 */
++
++	NULL
++};
++
++static void mii_display_status(struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	if (!fep->link && !fep->old_link) {
++		/* Link is still down - don't print anything */
++		return;
++	}
++
++	printk("%s: status: ", dev->name);
++
++	if (!fep->link) {
++		printk("link down");
++	} else {
++		printk("link up");
++
++		switch(s & PHY_STAT_SPMASK) {
++		case PHY_STAT_100FDX: printk(", 100 Mbps Full Duplex"); break;
++		case PHY_STAT_100HDX: printk(", 100 Mbps Half Duplex"); break;
++		case PHY_STAT_10FDX:  printk(", 10 Mbps Full Duplex");  break;
++		case PHY_STAT_10HDX:  printk(", 10 Mbps Half Duplex");  break;
++		default:
++			printk(", Unknown speed/duplex");
++		}
++
++		if (s & PHY_STAT_ANC)
++			printk(", auto-negotiation complete");
++	}
++
++	if (s & PHY_STAT_FAULT)
++		printk(", remote fault");
++
++	printk(".\n");
++}
++
++static void mii_display_config(struct net_device *dev)
++{
++	volatile struct fcc_enet_private *fep = dev->priv;
++	uint s = fep->phy_status;
++
++	printk("%s: config: auto-negotiation ", dev->name);
++
++	if (s & PHY_CONF_ANE)
++		printk("on");
++	else
++		printk("off");
++
++	if (s & PHY_CONF_100FDX)
++		printk(", 100FDX");
++	if (s & PHY_CONF_100HDX)
++		printk(", 100HDX");
++	if (s & PHY_CONF_10FDX)
++		printk(", 10FDX");
++	if (s & PHY_CONF_10HDX)
++		printk(", 10HDX");
++	if (!(s & PHY_CONF_SPMASK))
++		printk(", No speed/duplex selected?");
++
++	if (s & PHY_CONF_LOOP)
++		printk(", loopback enabled");
++
++	printk(".\n");
++
++	fep->sequence_done = 1;
++}
++
++static void mii_relink(struct net_device *dev)
++{
++	struct fcc_enet_private *fep = dev->priv;
++	int duplex;
++
++	fep->link = (fep->phy_status & PHY_STAT_LINK) ? 1 : 0;
++	mii_display_status(dev);
++	fep->old_link = fep->link;
++
++	if (fep->link) {
++		duplex = 0;
++		if (fep->phy_status
++		    & (PHY_STAT_100FDX | PHY_STAT_10FDX))
++			duplex = 1;
++		fcc_restart(dev, duplex);
++	} else {
++		fcc_stop(dev);
++	}
++}
++
++static void mii_queue_relink(uint mii_reg, struct net_device *dev)
++{
++	struct fcc_enet_private *fep = dev->priv;
++
++	fep->phy_task.routine = (void *)mii_relink;
++	fep->phy_task.data = dev;
++	schedule_task(&fep->phy_task);
++}
++
++static void mii_queue_config(uint mii_reg, struct net_device *dev)
++{
++	struct fcc_enet_private *fep = dev->priv;
++
++	fep->phy_task.routine = (void *)mii_display_config;
++	fep->phy_task.data = dev;
++	schedule_task(&fep->phy_task);
++}
++
++
++
++phy_cmd_t phy_cmd_relink[] = { { mk_mii_read(MII_REG_CR), mii_queue_relink },
++			       { mk_mii_end, } };
++phy_cmd_t phy_cmd_config[] = { { mk_mii_read(MII_REG_CR), mii_queue_config },
++			       { mk_mii_end, } };
++
++
++/* Read remainder of PHY ID.
++*/
++static void
++mii_discover_phy3(uint mii_reg, struct net_device *dev)
++{
++	struct fcc_enet_private *fep;
++	int	i;
++
++	fep = dev->priv;
++	fep->phy_id |= (mii_reg & 0xffff);
++
++	for(i = 0; phy_info[i]; i++)
++		if(phy_info[i]->id == (fep->phy_id >> 4))
++			break;
++
++	if(!phy_info[i])
++		panic("%s: PHY id 0x%08x is not supported!\n",
++		      dev->name, fep->phy_id);
++
++	fep->phy = phy_info[i];
++
++	printk("%s: Phy @ 0x%x, type %s (0x%08x)\n",
++		dev->name, fep->phy_addr, fep->phy->name, fep->phy_id);
++}
++
++/* Scan all of the MII PHY addresses looking for someone to respond
++ * with a valid ID.  This usually happens quickly.
++ */
++static void
++mii_discover_phy(uint mii_reg, struct net_device *dev)
++{
++	struct fcc_enet_private *fep;
++	uint	phytype;
++
++	fep = dev->priv;
++
++	if ((phytype = (mii_reg & 0xfff)) != 0xfff && phytype != 0) {
++
++		/* Got first part of ID, now get remainder. */
++		fep->phy_id = phytype << 16;
++		mii_queue(dev, mk_mii_read(MII_REG_PHYIR2), mii_discover_phy3);
++	} else {
++		fep->phy_addr++;
++		if (fep->phy_addr < 32) {
++			mii_queue(dev, mk_mii_read(MII_REG_PHYIR1),
++							mii_discover_phy);
++		} else {
++			printk("FCC: No PHY device found.\n");
++		}
++	}
++}
++
++/* This interrupt occurs when the PHY detects a link change. */
++#if !defined (CONFIG_PM826)
++static void
++mii_link_interrupt(int irq, void * dev_id, struct pt_regs * regs)
++{
++	struct	net_device *dev = dev_id;
++	struct fcc_enet_private *fep = dev->priv;
++
++	mii_do_cmd(dev, fep->phy->ack_int);
++	mii_do_cmd(dev, phy_cmd_relink);  /* restart and display status */
++}
++#endif	/* !CONFIG_PM826 */
++
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++#ifdef ORIGINAL_VERSION
++/* Set or clear the multicast filter for this adaptor.
++ * Skeleton taken from sunlance driver.
++ * The CPM Ethernet implementation allows Multicast as well as individual
++ * MAC address filtering.  Some of the drivers check to make sure it is
++ * a group multicast address, and discard those that are not.  I guess I
++ * will do the same for now, but just remove the test if you want
++ * individual filtering as well (do the upper net layers want or support
++ * this kind of feature?).
++ */
++static void
++set_multicast_list(struct net_device *dev)
++{
++	struct	fcc_enet_private *cep;
++	struct	dev_mc_list *dmi;
++	u_char	*mcptr, *tdptr;
++	volatile fcc_enet_t *ep;
++	int	i, j;
++
++	cep = (struct fcc_enet_private *)dev->priv;
++
++return;
++	/* Get pointer to FCC area in parameter RAM.
++	*/
++	ep = (fcc_enet_t *)dev->base_addr;
++
++	if (dev->flags&IFF_PROMISC) {
++
++		/* Log any net taps. */
++		printk("%s: Promiscuous mode enabled.\n", dev->name);
++		cep->fccp->fcc_fpsmr |= FCC_PSMR_PRO;
++	} else {
++
++		cep->fccp->fcc_fpsmr &= ~FCC_PSMR_PRO;
++
++		if (dev->flags & IFF_ALLMULTI) {
++			/* Catch all multicast addresses, so set the
++			 * filter to all 1's.
++			 */
++			ep->fen_gaddrh = 0xffffffff;
++			ep->fen_gaddrl = 0xffffffff;
++		}
++		else {
++			/* Clear filter and add the addresses in the list.
++			*/
++			ep->fen_gaddrh = 0;
++			ep->fen_gaddrl = 0;
++
++			dmi = dev->mc_list;
++
++			for (i=0; i<dev->mc_count; i++) {
++
++				/* Only support group multicast for now.
++				*/
++				if (!(dmi->dmi_addr[0] & 1))
++					continue;
++
++				/* The address in dmi_addr is LSB first,
++				 * and taddr is MSB first.  We have to
++				 * copy bytes MSB first from dmi_addr.
++				 */
++				mcptr = (u_char *)dmi->dmi_addr + 5;
++				tdptr = (u_char *)&ep->fen_taddrh;
++				for (j=0; j<6; j++)
++					*tdptr++ = *mcptr--;
++
++				/* Ask CPM to run CRC and set bit in
++				 * filter mask.
++				 */
++				cpmp->cp_cpcr = mk_cr_cmd(cep->fip->fc_cpmpage,
++						cep->fip->fc_cpmblock, 0x0c,
++						CPM_CR_SET_GADDR) | CPM_CR_FLG;
++				udelay(10);
++				while (cpmp->cp_cpcr & CPM_CR_FLG);
++			}
++		}
++	}
++}
++
++
++/* Set the individual MAC address.
++ */
++int fcc_enet_set_mac_address(struct net_device *dev, void *p)
++{
++	struct sockaddr *addr= (struct sockaddr *) p;
++	struct fcc_enet_private *cep;
++	volatile fcc_enet_t *ep;
++	unsigned char *eap;
++	int i;
++
++	cep = (struct fcc_enet_private *)(dev->priv);
++	ep = cep->ep;
++
++	if (netif_running(dev))
++		return -EBUSY;
++
++	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
++
++	eap = (unsigned char *) &(ep->fen_paddrh);
++	for (i=5; i>=0; i--)
++		*eap++ = addr->sa_data[i];
++
++	return 0;
++}
++#endif /* ORIGINAL_VERSION */
++
++
++/* Initialize the CPM Ethernet on FCC.
++ */
++int __init fec_enet_init(void)
++{
++	struct rtnet_device *rtdev = NULL;
++	struct fcc_enet_private *cep;
++	fcc_info_t	*fip;
++	int		i, np;
++	volatile	immap_t		*immap;
++	volatile	iop8260_t	*io;
++
++	immap = (immap_t *)IMAP_ADDR;	/* and to internal registers */
++	io = &immap->im_ioport;
++
++	for (np = 0, fip = fcc_ports;
++	     np < sizeof(fcc_ports) / sizeof(fcc_info_t);
++	     np++, fip++) {
++
++		/* Skip FCC ports not used for RTnet.
++		 */
++		if (np != rtnet_fcc - 1) continue;
++
++		/* Allocate some private information and create an Ethernet device instance.
++		*/
++		if (!rx_pool_size)
++			rx_pool_size = RX_RING_SIZE * 2;
++
++		rtdev = rt_alloc_etherdev(sizeof(struct fcc_enet_private),
++					rx_pool_size + TX_RING_SIZE);
++		if (rtdev == NULL) {
++			printk(KERN_ERR "fcc_enet: Could not allocate ethernet device.\n");
++			return -1;
++		}
++		rtdev_alloc_name(rtdev, "rteth%d");
++		rt_rtdev_connect(rtdev, &RTDEV_manager);
++		rtdev->vers = RTDEV_VERS_2_0;
++
++		cep = (struct fcc_enet_private *)rtdev->priv;
++		rtdm_lock_init(&cep->lock);
++		cep->fip = fip;
++		fip->rtdev = rtdev; /* need for cleanup */
++
++		init_fcc_shutdown(fip, cep, immap);
++		init_fcc_ioports(fip, io, immap);
++		init_fcc_param(fip, rtdev, immap);
++
++		rtdev->base_addr = (unsigned long)(cep->ep);
++
++		/* The CPM Ethernet specific entries in the device
++		 * structure.
++		 */
++		rtdev->open = fcc_enet_open;
++		rtdev->hard_start_xmit = fcc_enet_start_xmit;
++		rtdev->stop = fcc_enet_close;
++		rtdev->hard_header = &rt_eth_header;
++		rtdev->get_stats = fcc_enet_get_stats;
++
++		if ((i = rt_register_rtnetdev(rtdev))) {
++			rtdm_irq_disable(&cep->irq_handle);
++			rtdm_irq_free(&cep->irq_handle);
++			rtdev_free(rtdev);
++			return i;
++		}
++		init_fcc_startup(fip, rtdev);
++
++		printk("%s: FCC%d ENET Version 0.4, %02x:%02x:%02x:%02x:%02x:%02x\n",
++		       rtdev->name, fip->fc_fccnum + 1,
++		       rtdev->dev_addr[0], rtdev->dev_addr[1], rtdev->dev_addr[2],
++		       rtdev->dev_addr[3], rtdev->dev_addr[4], rtdev->dev_addr[5]);
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++		/* Queue up command to detect the PHY and initialize the
++		 * remainder of the interface.
++		 */
++		cep->phy_addr = 0;
++		mii_queue(dev, mk_mii_read(MII_REG_PHYIR1), mii_discover_phy);
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++	}
++
++	return 0;
++}
++
++/* Make sure the device is shut down during initialization.
++*/
++static void __init
++init_fcc_shutdown(fcc_info_t *fip, struct fcc_enet_private *cep,
++						volatile immap_t *immap)
++{
++	volatile	fcc_enet_t	*ep;
++	volatile	fcc_t		*fccp;
++
++	/* Get pointer to FCC area in parameter RAM.
++	*/
++	ep = (fcc_enet_t *)(&immap->im_dprambase[fip->fc_proff]);
++
++	/* And another to the FCC register area.
++	*/
++	fccp = (volatile fcc_t *)(&immap->im_fcc[fip->fc_fccnum]);
++	cep->fccp = fccp;		/* Keep the pointers handy */
++	cep->ep = ep;
++
++	/* Disable receive and transmit in case someone left it running.
++	*/
++	fccp->fcc_gfmr &= ~(FCC_GFMR_ENR | FCC_GFMR_ENT);
++}
++
++/* Initialize the I/O pins for the FCC Ethernet.
++*/
++static void __init
++init_fcc_ioports(fcc_info_t *fip, volatile iop8260_t *io,
++						volatile immap_t *immap)
++{
++
++	/* FCC1 pins are on port A/C.  FCC2/3 are port B/C.
++	*/
++	if (fip->fc_proff == PROFF_FCC1) {
++		/* Configure port A and C pins for FCC1 Ethernet.
++		 */
++		io->iop_pdira &= ~PA1_DIRA0;
++		io->iop_pdira |= PA1_DIRA1;
++		io->iop_psora &= ~PA1_PSORA0;
++		io->iop_psora |= PA1_PSORA1;
++		io->iop_ppara |= (PA1_DIRA0 | PA1_DIRA1);
++	}
++	if (fip->fc_proff == PROFF_FCC2) {
++		/* Configure port B and C pins for FCC Ethernet.
++		 */
++		io->iop_pdirb &= ~PB2_DIRB0;
++		io->iop_pdirb |= PB2_DIRB1;
++		io->iop_psorb &= ~PB2_PSORB0;
++		io->iop_psorb |= PB2_PSORB1;
++		io->iop_pparb |= (PB2_DIRB0 | PB2_DIRB1);
++	}
++	if (fip->fc_proff == PROFF_FCC3) {
++		/* Configure port B and C pins for FCC Ethernet.
++		 */
++		io->iop_pdirb &= ~PB3_DIRB0;
++		io->iop_pdirb |= PB3_DIRB1;
++		io->iop_psorb &= ~PB3_PSORB0;
++		io->iop_psorb |= PB3_PSORB1;
++		io->iop_pparb |= (PB3_DIRB0 | PB3_DIRB1);
++	}
++
++	/* Port C has clocks......
++	*/
++	io->iop_psorc &= ~(fip->fc_trxclocks);
++	io->iop_pdirc &= ~(fip->fc_trxclocks);
++	io->iop_pparc |= fip->fc_trxclocks;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	/* ....and the MII serial clock/data.
++	*/
++#ifndef	CONFIG_PM826
++	IOP_DAT(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck);
++	IOP_ODR(io,fip->fc_port) &= ~(fip->fc_mdio | fip->fc_mdck);
++#endif	/* CONFIG_PM826 */
++	IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck);
++	IOP_PAR(io,fip->fc_port) &= ~(fip->fc_mdio | fip->fc_mdck);
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++	/* Configure Serial Interface clock routing.
++	 * First, clear all FCC bits to zero,
++	 * then set the ones we want.
++	 */
++	immap->im_cpmux.cmx_fcr &= ~(fip->fc_clockmask);
++	immap->im_cpmux.cmx_fcr |= fip->fc_clockroute;
++}
++
++static void __init
++init_fcc_param(fcc_info_t *fip, struct rtnet_device *rtdev,
++						volatile immap_t *immap)
++{
++	unsigned char	*eap;
++	unsigned long	mem_addr;
++	bd_t		*bd;
++	int		i, j;
++	struct		fcc_enet_private *cep;
++	volatile	fcc_enet_t	*ep;
++	volatile	cbd_t		*bdp;
++	volatile	cpm8260_t	*cp;
++
++	cep = (struct fcc_enet_private *)rtdev->priv;
++	ep = cep->ep;
++	cp = cpmp;
++
++	bd = (bd_t *)__res;
++
++	/* Zero the whole thing.....I must have missed some individually.
++	 * It works when I do this.
++	 */
++	memset((char *)ep, 0, sizeof(fcc_enet_t));
++
++	/* Allocate space for the buffer descriptors in the DP ram.
++	 * These are relative offsets in the DP ram address space.
++	 * Initialize base addresses for the buffer descriptors.
++	 */
++	cep->rx_bd_base = (cbd_t *)m8260_cpm_hostalloc(sizeof(cbd_t) * RX_RING_SIZE, 8);
++	ep->fen_genfcc.fcc_rbase = __pa(cep->rx_bd_base);
++	cep->tx_bd_base = (cbd_t *)m8260_cpm_hostalloc(sizeof(cbd_t) * TX_RING_SIZE, 8);
++	ep->fen_genfcc.fcc_tbase = __pa(cep->tx_bd_base);
++
++	cep->dirty_tx = cep->cur_tx = cep->tx_bd_base;
++	cep->cur_rx = cep->rx_bd_base;
++
++	ep->fen_genfcc.fcc_rstate = (CPMFCR_GBL | CPMFCR_EB) << 24;
++	ep->fen_genfcc.fcc_tstate = (CPMFCR_GBL | CPMFCR_EB) << 24;
++
++	/* Set maximum bytes per receive buffer.
++	 * It must be a multiple of 32.
++	 */
++	ep->fen_genfcc.fcc_mrblr = PKT_MAXBLR_SIZE;
++
++	/* Allocate space in the reserved FCC area of DPRAM for the
++	 * internal buffers.  No one uses this space (yet), so we
++	 * can do this.  Later, we will add resource management for
++	 * this area.
++	 */
++	mem_addr = CPM_FCC_SPECIAL_BASE + (fip->fc_fccnum * 128);
++	ep->fen_genfcc.fcc_riptr = mem_addr;
++	ep->fen_genfcc.fcc_tiptr = mem_addr+32;
++	ep->fen_padptr = mem_addr+64;
++	memset((char *)(&(immap->im_dprambase[(mem_addr+64)])), 0x88, 32);
++
++	ep->fen_genfcc.fcc_rbptr = 0;
++	ep->fen_genfcc.fcc_tbptr = 0;
++	ep->fen_genfcc.fcc_rcrc = 0;
++	ep->fen_genfcc.fcc_tcrc = 0;
++	ep->fen_genfcc.fcc_res1 = 0;
++	ep->fen_genfcc.fcc_res2 = 0;
++
++	ep->fen_camptr = 0;	/* CAM isn't used in this driver */
++
++	/* Set CRC preset and mask.
++	*/
++	ep->fen_cmask = 0xdebb20e3;
++	ep->fen_cpres = 0xffffffff;
++
++	ep->fen_crcec = 0;	/* CRC Error counter */
++	ep->fen_alec = 0;	/* alignment error counter */
++	ep->fen_disfc = 0;	/* discard frame counter */
++	ep->fen_retlim = 15;	/* Retry limit threshold */
++	ep->fen_pper = 0;	/* Normal persistence */
++
++	/* Clear hash filter tables.
++	*/
++	ep->fen_gaddrh = 0;
++	ep->fen_gaddrl = 0;
++	ep->fen_iaddrh = 0;
++	ep->fen_iaddrl = 0;
++
++	/* Clear the Out-of-sequence TxBD.
++	*/
++	ep->fen_tfcstat = 0;
++	ep->fen_tfclen = 0;
++	ep->fen_tfcptr = 0;
++
++	ep->fen_mflr = PKT_MAXBUF_SIZE;   /* maximum frame length register */
++	ep->fen_minflr = PKT_MINBUF_SIZE;  /* minimum frame length register */
++
++	/* Set Ethernet station address.
++	 *
++	 * This is supplied in the board information structure, so we
++	 * copy that into the controller.
++	 */
++	eap = (unsigned char *)&(ep->fen_paddrh);
++#if defined(CONFIG_CPU86) || defined(CONFIG_TQM8260)
++	/*
++	 * TQM8260 and CPU86 use sequential MAC addresses
++	 */
++	*eap++ = rtdev->dev_addr[5] = bd->bi_enetaddr[5] + fip->fc_fccnum;
++	for (i=4; i>=0; i--) {
++		*eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i];
++	}
++#elif defined(CONFIG_PM826)
++	*eap++ = rtdev->dev_addr[5] = bd->bi_enetaddr[5] + fip->fc_fccnum + 1;
++	for (i=4; i>=0; i--) {
++		*eap++ = rtdev->dev_addr[i] = bd->bi_enetaddr[i];
++	}
++#else
++	/*
++	 * So, far we have only been given one Ethernet address. We make
++	 * it unique by toggling selected bits in the upper byte of the
++	 * non-static part of the address (for the second and third ports,
++	 * the first port uses the address supplied as is).
++	 */
++	for (i=5; i>=0; i--) {
++		if (i == 3 && fip->fc_fccnum != 0) {
++			rtdev->dev_addr[i] = bd->bi_enetaddr[i];
++			rtdev->dev_addr[i] ^= (1 << (7 - fip->fc_fccnum));
++			*eap++ = dev->dev_addr[i];
++		}
++		else {
++			*eap++ = dev->dev_addr[i] = bd->bi_enetaddr[i];
++		}
++	}
++#endif
++
++	ep->fen_taddrh = 0;
++	ep->fen_taddrm = 0;
++	ep->fen_taddrl = 0;
++
++	ep->fen_maxd1 = PKT_MAXDMA_SIZE;	/* maximum DMA1 length */
++	ep->fen_maxd2 = PKT_MAXDMA_SIZE;	/* maximum DMA2 length */
++
++	/* Clear stat counters, in case we ever enable RMON.
++	*/
++	ep->fen_octc = 0;
++	ep->fen_colc = 0;
++	ep->fen_broc = 0;
++	ep->fen_mulc = 0;
++	ep->fen_uspc = 0;
++	ep->fen_frgc = 0;
++	ep->fen_ospc = 0;
++	ep->fen_jbrc = 0;
++	ep->fen_p64c = 0;
++	ep->fen_p65c = 0;
++	ep->fen_p128c = 0;
++	ep->fen_p256c = 0;
++	ep->fen_p512c = 0;
++	ep->fen_p1024c = 0;
++
++	ep->fen_rfthr = 0;	/* Suggested by manual */
++	ep->fen_rfcnt = 0;
++	ep->fen_cftype = 0;
++
++	/* Now allocate the host memory pages and initialize the
++	 * buffer descriptors.
++	 */
++	bdp = cep->tx_bd_base;
++	for (i=0; i<TX_RING_SIZE; i++) {
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		bdp->cbd_sc = 0;
++		bdp->cbd_datlen = 0;
++		bdp->cbd_bufaddr = 0;
++		bdp++;
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	bdp = cep->rx_bd_base;
++	for (i=0; i<FCC_ENET_RX_PAGES; i++) {
++
++		/* Allocate a page.
++		*/
++		mem_addr = __get_free_page(GFP_KERNEL);
++
++		/* Initialize the BD for every fragment in the page.
++		*/
++		for (j=0; j<FCC_ENET_RX_FRPPG; j++) {
++			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
++			bdp->cbd_datlen = 0;
++			bdp->cbd_bufaddr = __pa(mem_addr);
++			mem_addr += FCC_ENET_RX_FRSIZE;
++			bdp++;
++		}
++	}
++
++	/* Set the last buffer to wrap.
++	*/
++	bdp--;
++	bdp->cbd_sc |= BD_SC_WRAP;
++
++	/* Let's re-initialize the channel now.  We have to do it later
++	 * than the manual describes because we have just now finished
++	 * the BD initialization.
++	 */
++	cp->cp_cpcr = mk_cr_cmd(fip->fc_cpmpage, fip->fc_cpmblock, 0x0c,
++			CPM_CR_INIT_TRX) | CPM_CR_FLG;
++	while (cp->cp_cpcr & CPM_CR_FLG);
++
++	cep->skb_cur = cep->skb_dirty = 0;
++}
++
++/* Let 'er rip.
++*/
++static void __init
++init_fcc_startup(fcc_info_t *fip, struct rtnet_device *rtdev)
++{
++	volatile fcc_t	*fccp;
++	struct fcc_enet_private *cep;
++
++	cep = (struct fcc_enet_private *)rtdev->priv;
++	fccp = cep->fccp;
++
++	fccp->fcc_fcce = 0xffff;	/* Clear any pending events */
++
++	/* Enable interrupts for transmit error, complete frame
++	 * received, and any transmit buffer we have also set the
++	 * interrupt flag.
++	 */
++	fccp->fcc_fccm = (FCC_ENET_TXE | FCC_ENET_RXF | FCC_ENET_TXB);
++
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	/* Install our interrupt handler.
++	*/
++	if (rtdm_irq_request(&cep->irq_handle, fip->fc_interrupt,
++			     fcc_enet_interrupt, 0, "rt_mpc8260_fcc_enet", rtdev))  {
++		printk(KERN_ERR "Couldn't request IRQ %d\n", rtdev->irq);
++		rtdev_free(rtdev);
++		return;
++	}
++
++
++#if defined (CONFIG_XENO_DRIVERS_NET_USE_MDIO) && !defined (CONFIG_PM826)
++# ifndef PHY_INTERRUPT
++#  error Want to use MDIO, but PHY_INTERRUPT not defined!
++# endif
++	if (request_8xxirq(PHY_INTERRUPT, mii_link_interrupt, 0,
++							"mii", dev) < 0)
++		printk("Can't get MII IRQ %d\n", PHY_INTERRUPT);
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO, CONFIG_PM826 */
++
++	/* Set GFMR to enable Ethernet operating mode.
++	 */
++#ifndef CONFIG_EST8260
++	fccp->fcc_gfmr = (FCC_GFMR_TCI | FCC_GFMR_MODE_ENET);
++#else
++	fccp->fcc_gfmr = FCC_GFMR_MODE_ENET;
++#endif
++
++	/* Set sync/delimiters.
++	*/
++	fccp->fcc_fdsr = 0xd555;
++
++	/* Set protocol specific processing mode for Ethernet.
++	 * This has to be adjusted for Full Duplex operation after we can
++	 * determine how to detect that.
++	 */
++	fccp->fcc_fpsmr = FCC_PSMR_ENCRC;
++
++#ifdef CONFIG_ADS8260
++	/* Enable the PHY.
++	*/
++	ads_csr_addr[1] |= BCSR1_FETH_RST;	/* Remove reset */
++	ads_csr_addr[1] &= ~BCSR1_FETHIEN;	/* Enable */
++#endif
++
++#if defined(CONFIG_XENO_DRIVERS_NET_USE_MDIO) || defined(CONFIG_TQM8260)
++	/* start in full duplex mode, and negotiate speed */
++	fcc_restart (rtdev, 1);
++#else
++	/* start in half duplex mode */
++	fcc_restart (rtdev, 0);
++#endif
++}
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++/* MII command/status interface.
++ * I'm not going to describe all of the details.  You can find the
++ * protocol definition in many other places, including the data sheet
++ * of most PHY parts.
++ * I wonder what "they" were thinking (maybe weren't) when they leave
++ * the I2C in the CPM but I have to toggle these bits......
++ *
++ * Timing is a critical, especially on faster CPU's ...
++ */
++#define MDIO_DELAY	5
++
++#define FCC_MDIO(bit) do {					\
++	udelay(MDIO_DELAY);					\
++	if (bit)						\
++		IOP_DAT(io,fip->fc_port) |= fip->fc_mdio;	\
++	else							\
++		IOP_DAT(io,fip->fc_port) &= ~fip->fc_mdio;	\
++} while(0)
++
++#define FCC_MDC(bit) do {					\
++	udelay(MDIO_DELAY);					\
++	if (bit)						\
++		IOP_DAT(io,fip->fc_port) |= fip->fc_mdck;	\
++	else							\
++		IOP_DAT(io,fip->fc_port) &= ~fip->fc_mdck;	\
++} while(0)
++
++static uint
++mii_send_receive(fcc_info_t *fip, uint cmd)
++{
++	uint		retval;
++	int		read_op, i, off;
++	volatile	immap_t		*immap;
++	volatile	iop8260_t	*io;
++
++	immap = (immap_t *)IMAP_ADDR;
++	io = &immap->im_ioport;
++
++	IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck);
++
++	read_op = ((cmd & 0xf0000000) == 0x60000000);
++
++	/* Write preamble
++	 */
++	for (i = 0; i < 32; i++)
++	{
++		FCC_MDC(0);
++		FCC_MDIO(1);
++		FCC_MDC(1);
++	}
++
++	/* Write data
++	 */
++	for (i = 0, off = 31; i < (read_op ? 14 : 32); i++, --off)
++	{
++		FCC_MDC(0);
++		FCC_MDIO((cmd >> off) & 0x00000001);
++		FCC_MDC(1);
++	}
++
++	retval = cmd;
++
++	if (read_op)
++	{
++		retval >>= 16;
++
++		FCC_MDC(0);
++		IOP_DIR(io,fip->fc_port) &= ~fip->fc_mdio;
++		FCC_MDC(1);
++		FCC_MDC(0);
++
++		for (i = 0, off = 15; i < 16; i++, off--)
++		{
++			FCC_MDC(1);
++			udelay(MDIO_DELAY);
++			retval <<= 1;
++			if (IOP_DAT(io,fip->fc_port) & fip->fc_mdio)
++				retval++;
++			FCC_MDC(0);
++		}
++	}
++
++	IOP_DIR(io,fip->fc_port) |= (fip->fc_mdio | fip->fc_mdck);
++
++	for (i = 0; i < 32; i++)
++	{
++		FCC_MDC(0);
++		FCC_MDIO(1);
++		FCC_MDC(1);
++	}
++
++	return retval;
++}
++
++static void
++fcc_stop(struct net_device *dev)
++{
++	volatile fcc_t	*fccp;
++	struct fcc_enet_private	*fcp;
++
++	fcp = (struct fcc_enet_private *)(dev->priv);
++	fccp = fcp->fccp;
++
++	/* Disable transmit/receive */
++	fccp->fcc_gfmr &= ~(FCC_GFMR_ENR | FCC_GFMR_ENT);
++}
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++
++static void
++fcc_restart(struct rtnet_device *rtdev, int duplex)
++{
++	volatile fcc_t	*fccp;
++	struct fcc_enet_private	*fcp;
++
++	fcp = (struct fcc_enet_private *)rtdev->priv;
++	fccp = fcp->fccp;
++
++	if (duplex)
++		fccp->fcc_fpsmr |= (FCC_PSMR_FDE | FCC_PSMR_LPB);
++	else
++		fccp->fcc_fpsmr &= ~(FCC_PSMR_FDE | FCC_PSMR_LPB);
++
++	/* Enable transmit/receive */
++	fccp->fcc_gfmr |= FCC_GFMR_ENR | FCC_GFMR_ENT;
++}
++
++static int
++fcc_enet_open(struct rtnet_device *rtdev)
++{
++	struct fcc_enet_private *fep = rtdev->priv;
++
++#ifdef	CONFIG_XENO_DRIVERS_NET_USE_MDIO
++	fep->sequence_done = 0;
++	fep->link = 0;
++
++	if (fep->phy) {
++		mii_do_cmd(dev, fep->phy->ack_int);
++		mii_do_cmd(dev, fep->phy->config);
++		mii_do_cmd(dev, phy_cmd_config);  /* display configuration */
++		while(!fep->sequence_done)
++			schedule();
++
++		mii_do_cmd(dev, fep->phy->startup);
++#ifdef	CONFIG_PM826
++		/* Read the autonegotiation results */
++		mii_do_cmd(dev, fep->phy->ack_int);
++		mii_do_cmd(dev, phy_cmd_relink);
++#endif	/* CONFIG_PM826 */
++		rtnetif_start_queue(rtdev);
++		return 0;		/* Success */
++	}
++	return -ENODEV;		/* No PHY we understand */
++#else
++	fep->link = 1;
++	rtnetif_start_queue(rtdev);
++	return 0;					/* Always succeed */
++#endif	/* CONFIG_XENO_DRIVERS_NET_USE_MDIO */
++}
++
++static void __exit fcc_enet_cleanup(void)
++{
++	struct rtnet_device *rtdev;
++	volatile immap_t *immap = (immap_t *)IMAP_ADDR;
++	struct fcc_enet_private *cep;
++	fcc_info_t *fip;
++	int np;
++
++	for (np = 0, fip = fcc_ports;
++	     np < sizeof(fcc_ports) / sizeof(fcc_info_t);
++	     np++, fip++) {
++
++		/* Skip FCC ports not used for RTnet. */
++		if (np != rtnet_fcc - 1) continue;
++
++		rtdev = fip->rtdev;
++		cep = (struct fcc_enet_private *)rtdev->priv;
++
++		rtdm_irq_disable(&cep->irq_handle);
++		rtdm_irq_free(&cep->irq_handle);
++
++		init_fcc_shutdown(fip, cep, immap);
++		printk("%s: cleanup incomplete (m8260_cpm_dpfree does not exit)!\n",
++		       rtdev->name);
++		rt_stack_disconnect(rtdev);
++		rt_unregister_rtnetdev(rtdev);
++		rt_rtdev_disconnect(rtdev);
++
++		printk("%s: unloaded\n", rtdev->name);
++		rtdev_free(rtdev);
++		fip++;
++	}
++}
++
++module_init(fec_enet_init);
++module_exit(fcc_enet_cleanup);
+--- linux/drivers/xenomai/net/drivers/r8169.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/r8169.c	2021-04-07 16:01:27.238634150 +0800
+@@ -0,0 +1,2046 @@
++/*
++=========================================================================
++ r8169.c: A RealTek RTL8169s/8110s Gigabit Ethernet driver for Linux kernel 2.4.x.
++ --------------------------------------------------------------------
++
++ History:
++ Feb  4 2002	- created initially by ShuChen <shuchen@realtek.com.tw>.
++ May 20 2002	- Add link status force-mode and TBI mode support.
++=========================================================================
++
++RTL8169_VERSION "1.1"	<2002/10/4>
++
++	The bit4:0 of MII register 4 is called "selector field", and have to be
++	00001b to indicate support of IEEE std 802.3 during NWay process of
++	exchanging Link Code Word (FLP).
++
++RTL8169_VERSION "1.2"	<2003/6/17>
++	Update driver module name.
++	Modify ISR.
++	Add chip mcfg.
++
++RTL8169_VERSION "1.3"	<2003/6/20>
++	Add chip pcfg.
++	Add priv->phy_timer_t, rtl8169_phy_timer_t_handler()
++	Add rtl8169_hw_PHY_config()
++	Add rtl8169_hw_PHY_reset()
++
++RTL8169_VERSION "1.4"	<2003/7/14>
++	Add tx_bytes, rx_bytes.
++
++RTL8169_VERSION "1.5"	<2003/7/18>
++	Set 0x0000 to PHY at offset 0x0b.
++	Modify chip mcfg, pcfg
++	Force media for multiple card.
++RTL8169_VERSION "1.6"	<2003/8/25>
++	Modify receive data buffer.
++
++RTL8169_VERSION "1.7"	<2003/9/18>
++	Add Jumbo Frame support.
++
++RTL8169_VERSION "1.8"	<2003/10/21>
++	Performance and CPU Utilizaion Enhancement.
++
++RTL8169_VERSION "1.9"	<2003/12/29>
++	Enable Tx/Rx flow control.
++
++RTL8169_VERSION "2.0"	<2004/03/26>
++	Beta version.
++	Support for linux 2.6.x
++
++RTL8169_VERSION "2.1"	<2004/07/05>
++	Modify parameters.
++
++RTL8169_VERSION "2.2"	<2004/08/09>
++	Add.pci_dma_sync_single.
++	Add pci_alloc_consistent()/pci_free_consistent().
++	Revise parameters.
++	Recognize our interrupt for linux 2.6.x.
++*/
++
++/*
++ * Ported to RTnet by Klaus Keppler <klaus.keppler@gmx.de>
++ * All RTnet porting stuff may be used and distributed according to the
++ * terms of the GNU General Public License (GPL).
++ *
++ * Version 2.2-04 <2005/08/22>
++ *    Initial release of this driver, based on RTL8169 driver v2.2
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/pci-aspm.h>
++#include <linux/pci.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/delay.h>
++#include <linux/version.h>
++
++#include <linux/timer.h>
++#include <linux/init.h>
++
++#include <rtnet_port.h>	/*** RTnet ***/
++
++#define RTL8169_VERSION "2.2-04"
++#define MODULENAME "rt_r8169"
++#define RTL8169_DRIVER_NAME   MODULENAME " RTnet Gigabit Ethernet driver " RTL8169_VERSION
++#define PFX MODULENAME ": "
++
++//#define RTL8169_DEBUG
++#undef RTL8169_JUMBO_FRAME_SUPPORT	/*** RTnet: no not enable! ***/
++#undef	RTL8169_HW_FLOW_CONTROL_SUPPORT
++
++
++#undef RTL8169_IOCTL_SUPPORT	/*** RTnet: do not enable! ***/
++#undef RTL8169_DYNAMIC_CONTROL
++#undef RTL8169_USE_IO
++
++
++#ifdef RTL8169_DEBUG
++	#define assert(expr) \
++		if(!(expr)) { printk( "Assertion failed! %s,%s,%s,line=%d\n", #expr,__FILE__,__FUNCTION__,__LINE__); }
++	/*** RTnet / <kk>: rt_assert must be used instead of assert() within interrupt context! ***/
++	#define rt_assert(expr) \
++		if(!(expr)) { rtdm_printk( "Assertion failed! %s,%s,%s,line=%d\n", #expr,__FILE__,__FUNCTION__,__LINE__); }
++	/*** RTnet / <kk>: RT_DBG_PRINT must be used instead of DBG_PRINT() within interrupt context! ***/
++	#define DBG_PRINT( fmt, args...)   printk("r8169: " fmt, ## args);
++	#define RT_DBG_PRINT( fmt, args...)   rtdm_printk("r8169: " fmt, ## args);
++#else
++	#define assert(expr) do {} while (0)
++	#define rt_assert(expr) do {} while (0)
++	#define DBG_PRINT( fmt, args...)   ;
++	#define RT_DBG_PRINT( fmt, args...)   ;
++#endif	// end of #ifdef RTL8169_DEBUG
++
++/* media options */
++#define MAX_UNITS 8
++static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
++
++/*** RTnet ***/
++static int cards[MAX_UNITS] = { [0 ... (MAX_UNITS-1)] = 1 };
++module_param_array(cards, int, NULL, 0444);
++MODULE_PARM_DESC(cards, "array of cards to be supported (e.g. 1,0,1)");
++/*** /RTnet ***/
++
++/* <kk> Enable debugging output */
++#define DEBUG_RX_SYNC 1
++#define DEBUG_RX_OTHER 2
++#define DEBUG_TX_SYNC 4
++#define DEBUG_TX_OTHER 8
++#define DEBUG_RUN 16
++static int local_debug = -1;
++static int r8169_debug = -1;
++module_param_named(debug, local_debug, int, 0444);
++MODULE_PARM_DESC(debug, MODULENAME " debug level (bit mask, see docs!)");
++
++
++/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
++static int max_interrupt_work = 20;
++
++/* MAC address length*/
++#define MAC_ADDR_LEN        6
++
++#define RX_FIFO_THRESH      7       /* 7 means NO threshold, Rx buffer level before first PCI xfer.  */
++#define RX_DMA_BURST        7       /* Maximum PCI burst, '6' is 1024 */
++#define TX_DMA_BURST        7       /* Maximum PCI burst, '6' is 1024 */
++#define ETTh                0x3F    /* 0x3F means NO threshold */
++
++#define ETH_HDR_LEN         14
++#define DEFAULT_MTU         1500
++#define DEFAULT_RX_BUF_LEN  1536
++
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++#define MAX_JUMBO_FRAME_MTU	( 10000 )
++#define MAX_RX_SKBDATA_SIZE	( MAX_JUMBO_FRAME_MTU + ETH_HDR_LEN )
++#else
++#define MAX_RX_SKBDATA_SIZE 1600
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++
++#define InterFrameGap       0x03    /* 3 means InterFrameGap = the shortest one */
++
++//#define NUM_TX_DESC         64	/* Number of Tx descriptor registers*/
++//#define NUM_RX_DESC         64	/* Number of Rx descriptor registers*/
++
++#define TX_RING_SIZE          16	/*** RTnet ***/
++#define NUM_TX_DESC TX_RING_SIZE	/* Number of Tx descriptor registers*/	/*** RTnet ***/
++#define RX_RING_SIZE           8	/*** RTnet ***/
++#define NUM_RX_DESC RX_RING_SIZE	/* Number of Rx descriptor registers*/	/*** RTnet ***/
++
++#define RTL_MIN_IO_SIZE     0x80
++#define TX_TIMEOUT          (6*HZ)
++//#define RTL8169_TIMER_EXPIRE_TIME 100 //100	/*** RTnet ***/
++
++
++#ifdef RTL8169_USE_IO
++#define RTL_W8(reg, val8)   outb ((val8), ioaddr + (reg))
++#define RTL_W16(reg, val16) outw ((val16), ioaddr + (reg))
++#define RTL_W32(reg, val32) outl ((val32), ioaddr + (reg))
++#define RTL_R8(reg)         inb (ioaddr + (reg))
++#define RTL_R16(reg)        inw (ioaddr + (reg))
++#define RTL_R32(reg)        ((unsigned long) inl (ioaddr + (reg)))
++#else
++/* write/read MMIO register */
++#define RTL_W8(reg, val8)   writeb ((val8), (void *)ioaddr + (reg))
++#define RTL_W16(reg, val16) writew ((val16), (void *)ioaddr + (reg))
++#define RTL_W32(reg, val32) writel ((val32), (void *)ioaddr + (reg))
++#define RTL_R8(reg)         readb ((void *)ioaddr + (reg))
++#define RTL_R16(reg)        readw ((void *)ioaddr + (reg))
++#define RTL_R32(reg)        ((unsigned long) readl ((void *)ioaddr + (reg)))
++#endif
++
++#define MCFG_METHOD_1		0x01
++#define MCFG_METHOD_2		0x02
++#define MCFG_METHOD_3		0x03
++#define MCFG_METHOD_4		0x04
++
++#define PCFG_METHOD_1		0x01	//PHY Reg 0x03 bit0-3 == 0x0000
++#define PCFG_METHOD_2		0x02	//PHY Reg 0x03 bit0-3 == 0x0001
++#define PCFG_METHOD_3		0x03	//PHY Reg 0x03 bit0-3 == 0x0002
++
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++#include "r8169_callback.h"
++#endif  //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++
++const static struct {
++	const char *name;
++	u8 mcfg;                 /* depend on RTL8169 docs */
++	u32 RxConfigMask;       /* should clear the bits supported by this chip */
++} rtl_chip_info[] = {
++	{ "RTL8169",  MCFG_METHOD_1,  0xff7e1880 },
++	{ "RTL8169s/8110s",  MCFG_METHOD_2,  0xff7e1880 },
++	{ "RTL8169s/8110s",  MCFG_METHOD_3,  0xff7e1880 },
++};
++
++
++static struct pci_device_id rtl8169_pci_tbl[] = {
++	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8136), 0, 0, 2 },
++	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8167), 0, 0, 1 },
++	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8169), 0, 0, 1 },
++	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4300), 0, 0, 1 },	/* <kk> D-Link DGE-528T */
++	{0,},
++};
++
++
++MODULE_DEVICE_TABLE (pci, rtl8169_pci_tbl);
++
++
++enum RTL8169_registers {
++	MAC0 = 0x0,
++	MAR0 = 0x8,
++	TxDescStartAddr	= 0x20,
++	TxHDescStartAddr= 0x28,
++	FLASH	= 0x30,
++	ERSR	= 0x36,
++	ChipCmd	= 0x37,
++	TxPoll	= 0x38,
++	IntrMask = 0x3C,
++	IntrStatus = 0x3E,
++	TxConfig = 0x40,
++	RxConfig = 0x44,
++	RxMissed = 0x4C,
++	Cfg9346 = 0x50,
++	Config0	= 0x51,
++	Config1	= 0x52,
++	Config2	= 0x53,
++	Config3	= 0x54,
++	Config4	= 0x55,
++	Config5	= 0x56,
++	MultiIntr = 0x5C,
++	PHYAR	= 0x60,
++	TBICSR	= 0x64,
++	TBI_ANAR = 0x68,
++	TBI_LPAR = 0x6A,
++	PHYstatus = 0x6C,
++	RxMaxSize = 0xDA,
++	CPlusCmd = 0xE0,
++	RxDescStartAddr	= 0xE4,
++	ETThReg	= 0xEC,
++	FuncEvent	= 0xF0,
++	FuncEventMask	= 0xF4,
++	FuncPresetState	= 0xF8,
++	FuncForceEvent	= 0xFC,
++};
++
++enum RTL8169_register_content {
++	/*InterruptStatusBits*/
++	SYSErr          = 0x8000,
++	PCSTimeout	= 0x4000,
++	SWInt		= 0x0100,
++	TxDescUnavail	= 0x80,
++	RxFIFOOver      = 0x40,
++	LinkChg         = 0x20,
++	RxOverflow      = 0x10,
++	TxErr   = 0x08,
++	TxOK    = 0x04,
++	RxErr   = 0x02,
++	RxOK    = 0x01,
++
++	/*RxStatusDesc*/
++	RxRES = 0x00200000,
++	RxCRC = 0x00080000,
++	RxRUNT= 0x00100000,
++	RxRWT = 0x00400000,
++
++	/*ChipCmdBits*/
++	CmdReset = 0x10,
++	CmdRxEnb = 0x08,
++	CmdTxEnb = 0x04,
++	RxBufEmpty = 0x01,
++
++	/*Cfg9346Bits*/
++	Cfg9346_Lock = 0x00,
++	Cfg9346_Unlock = 0xC0,
++
++	/*rx_mode_bits*/
++	AcceptErr = 0x20,
++	AcceptRunt = 0x10,
++	AcceptBroadcast = 0x08,
++	AcceptMulticast = 0x04,
++	AcceptMyPhys = 0x02,
++	AcceptAllPhys = 0x01,
++
++	/*RxConfigBits*/
++	RxCfgFIFOShift = 13,
++	RxCfgDMAShift = 8,
++
++	/*TxConfigBits*/
++	TxInterFrameGapShift = 24,
++	TxDMAShift = 8,
++
++	/* Config2 register */
++	MSIEnable	= (1 << 5),
++
++	/*rtl8169_PHYstatus*/
++	TBI_Enable	= 0x80,
++	TxFlowCtrl	= 0x40,
++	RxFlowCtrl	= 0x20,
++	_1000bpsF	= 0x10,
++	_100bps		= 0x08,
++	_10bps		= 0x04,
++	LinkStatus	= 0x02,
++	FullDup		= 0x01,
++
++	/*GIGABIT_PHY_registers*/
++	PHY_CTRL_REG = 0,
++	PHY_STAT_REG = 1,
++	PHY_AUTO_NEGO_REG = 4,
++	PHY_1000_CTRL_REG = 9,
++
++	/*GIGABIT_PHY_REG_BIT*/
++	PHY_Restart_Auto_Nego	= 0x0200,
++	PHY_Enable_Auto_Nego	= 0x1000,
++
++	//PHY_STAT_REG = 1;
++	PHY_Auto_Neco_Comp	= 0x0020,
++
++	//PHY_AUTO_NEGO_REG = 4;
++	PHY_Cap_10_Half		= 0x0020,
++	PHY_Cap_10_Full		= 0x0040,
++	PHY_Cap_100_Half	= 0x0080,
++	PHY_Cap_100_Full	= 0x0100,
++
++	//PHY_1000_CTRL_REG = 9;
++	PHY_Cap_1000_Full	= 0x0200,
++	PHY_Cap_1000_Half	= 0x0100,
++
++	PHY_Cap_PAUSE		= 0x0400,
++	PHY_Cap_ASYM_PAUSE	= 0x0800,
++
++	PHY_Cap_Null		= 0x0,
++
++	/*_MediaType*/
++	_10_Half	= 0x01,
++	_10_Full	= 0x02,
++	_100_Half	= 0x04,
++	_100_Full	= 0x08,
++	_1000_Full	= 0x10,
++
++	/*_TBICSRBit*/
++	TBILinkOK       = 0x02000000,
++};
++
++
++
++enum _DescStatusBit {
++	OWNbit	= 0x80000000,
++	EORbit	= 0x40000000,
++	FSbit	= 0x20000000,
++	LSbit	= 0x10000000,
++};
++
++
++struct TxDesc {
++	u32		status;
++	u32		vlan_tag;
++	u32		buf_addr;
++	u32		buf_Haddr;
++};
++
++struct RxDesc {
++	u32		status;
++	u32		vlan_tag;
++	u32		buf_addr;
++	u32		buf_Haddr;
++};
++
++
++typedef struct timer_list rt_timer_t;
++
++enum rtl8169_features {
++	RTL_FEATURE_WOL		= (1 << 0),
++	RTL_FEATURE_MSI		= (1 << 1),
++	RTL_FEATURE_GMII	= (1 << 2),
++};
++
++
++struct rtl8169_private {
++	unsigned long ioaddr;                /* memory map physical address*/
++	struct pci_dev *pci_dev;                /* Index of PCI device  */
++	struct net_device_stats stats;          /* statistics of net device */
++	rtdm_lock_t lock;                       /* spin lock flag */	/*** RTnet ***/
++	int chipset;
++	int mcfg;
++	int pcfg;
++/*	rt_timer_t r8169_timer; */	/*** RTnet ***/
++/*	unsigned long expire_time;	*/	/*** RTnet ***/
++
++	unsigned long phy_link_down_cnt;
++	unsigned long cur_rx;                   /* Index into the Rx descriptor buffer of next Rx pkt. */
++	unsigned long cur_tx;                   /* Index into the Tx descriptor buffer of next Rx pkt. */
++	unsigned long dirty_tx;
++	struct	TxDesc	*TxDescArray;           /* Index of 256-alignment Tx Descriptor buffer */
++	struct	RxDesc	*RxDescArray;           /* Index of 256-alignment Rx Descriptor buffer */
++	struct	rtskb	*Tx_skbuff[NUM_TX_DESC];/* Index of Transmit data buffer */	/*** RTnet ***/
++	struct	rtskb	*Rx_skbuff[NUM_RX_DESC];/* Receive data buffer */			/*** RTnet ***/
++	unsigned char   drvinit_fail;
++
++	dma_addr_t txdesc_array_dma_addr[NUM_TX_DESC];
++	dma_addr_t rxdesc_array_dma_addr[NUM_RX_DESC];
++	dma_addr_t rx_skbuff_dma_addr[NUM_RX_DESC];
++
++	void *txdesc_space;
++	dma_addr_t txdesc_phy_dma_addr;
++	int sizeof_txdesc_space;
++
++	void *rxdesc_space;
++	dma_addr_t rxdesc_phy_dma_addr;
++	int sizeof_rxdesc_space;
++
++	int curr_mtu_size;
++	int tx_pkt_len;
++	int rx_pkt_len;
++
++	int hw_rx_pkt_len;
++
++	int rx_buf_size;	/*** RTnet / <kk> ***/
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++	struct r8169_cb_t rt;
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++	unsigned char   linkstatus;
++	rtdm_irq_t irq_handle;			/*** RTnet ***/
++
++	unsigned features;
++};
++
++
++MODULE_AUTHOR ("Realtek, modified for RTnet by Klaus.Keppler@gmx.de");
++MODULE_DESCRIPTION ("RealTek RTL-8169 Gigabit Ethernet driver");
++module_param_array(media, int, NULL, 0444);
++MODULE_LICENSE("GPL");
++
++
++static int rtl8169_open (struct rtnet_device *rtdev);
++static int rtl8169_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev);
++
++static int rtl8169_interrupt(rtdm_irq_t *irq_handle);
++
++static void rtl8169_init_ring (struct rtnet_device *rtdev);
++static void rtl8169_hw_start (struct rtnet_device *rtdev);
++static int rtl8169_close (struct rtnet_device *rtdev);
++static inline u32 ether_crc (int length, unsigned char *data);
++static void rtl8169_set_rx_mode (struct rtnet_device *rtdev);
++/* static void rtl8169_tx_timeout (struct net_device *dev); */	/*** RTnet ***/
++static struct net_device_stats *rtl8169_get_stats(struct rtnet_device *netdev);
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu);
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++static void rtl8169_hw_PHY_config (struct rtnet_device *rtdev);
++/* static void rtl8169_hw_PHY_reset(struct net_device *dev); */	/*** RTnet ***/
++static const u16 rtl8169_intr_mask = LinkChg | RxOverflow | RxFIFOOver | TxErr | TxOK | RxErr | RxOK | SYSErr;	/*** <kk> added SYSErr ***/
++static const unsigned int rtl8169_rx_config = (RX_FIFO_THRESH << RxCfgFIFOShift) | (RX_DMA_BURST << RxCfgDMAShift) | 0x0000000E;
++
++/*** <kk> these functions are backported from Linux-2.6.12's r8169.c driver ***/
++static void rtl8169_irq_mask_and_ack(unsigned long ioaddr);
++/* static void rtl8169_asic_down(unsigned long ioaddr); */ /*** RTnet ***/
++static void rtl8169_pcierr_interrupt(struct rtnet_device *rtdev);
++
++#define RTL8169_WRITE_GMII_REG_BIT( ioaddr, reg, bitnum, bitval )\
++{ \
++	int val; \
++	if( bitval == 1 ){ val = ( RTL8169_READ_GMII_REG( ioaddr, reg ) | (bitval<<bitnum) ) & 0xffff ; } \
++	else{ val = ( RTL8169_READ_GMII_REG( ioaddr, reg ) & (~(0x0001<<bitnum)) ) & 0xffff ; } \
++	RTL8169_WRITE_GMII_REG( ioaddr, reg, val ); \
++}
++
++
++
++#ifdef RTL8169_DEBUG
++unsigned alloc_rxskb_cnt = 0;
++#define RTL8169_ALLOC_RXSKB(bufsize)    dev_alloc_skb(bufsize); alloc_rxskb_cnt ++ ;
++#define RTL8169_FREE_RXSKB(skb)         kfree_skb(skb); alloc_rxskb_cnt -- ;
++#define RTL8169_NETIF_RX(skb)           netif_rx(skb); alloc_rxskb_cnt -- ;
++#else
++#define RTL8169_ALLOC_RXSKB(bufsize)    dev_alloc_skb(bufsize);
++#define RTL8169_FREE_RXSKB(skb)         kfree_skb(skb);
++#define RTL8169_NETIF_RX(skb)           netif_rx(skb);
++#endif //end #ifdef RTL8169_DEBUG
++
++
++//=================================================================
++//	PHYAR
++//	bit		Symbol
++//	31		Flag
++//	30-21	reserved
++//	20-16	5-bit GMII/MII register address
++//	15-0	16-bit GMII/MII register data
++//=================================================================
++void RTL8169_WRITE_GMII_REG( unsigned long ioaddr, int RegAddr, int value )
++{
++	int	i;
++
++	RTL_W32 ( PHYAR, 0x80000000 | (RegAddr&0xFF)<<16 | value);
++	udelay(1000);
++
++	for( i = 2000; i > 0 ; i -- ){
++		// Check if the RTL8169 has completed writing to the specified MII register
++		if( ! (RTL_R32(PHYAR)&0x80000000) ){
++			break;
++		}
++		else{
++			udelay(100);
++		}// end of if( ! (RTL_R32(PHYAR)&0x80000000) )
++	}// end of for() loop
++}
++//=================================================================
++int RTL8169_READ_GMII_REG( unsigned long ioaddr, int RegAddr )
++{
++	int i, value = -1;
++
++	RTL_W32 ( PHYAR, 0x0 | (RegAddr&0xFF)<<16 );
++	udelay(1000);
++
++	for( i = 2000; i > 0 ; i -- ){
++		// Check if the RTL8169 has completed retrieving data from the specified MII register
++		if( RTL_R32(PHYAR) & 0x80000000 ){
++			value = (int)( RTL_R32(PHYAR)&0xFFFF );
++			break;
++		}
++		else{
++			udelay(100);
++		}// end of if( RTL_R32(PHYAR) & 0x80000000 )
++	}// end of for() loop
++	return value;
++}
++
++
++#ifdef RTL8169_IOCTL_SUPPORT
++#include "r8169_ioctl.c"
++#endif //end #ifdef RTL8169_IOCTL_SUPPORT
++
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++#include "r8169_callback.c"
++#endif
++
++
++
++//======================================================================================================
++//======================================================================================================
++static int rtl8169_init_board ( struct pci_dev *pdev, struct rtnet_device **dev_out, unsigned long *ioaddr_out, int region)
++{
++	unsigned long ioaddr = 0;
++	struct rtnet_device *rtdev;
++	struct rtl8169_private *priv;
++	int rc, i;
++	unsigned long mmio_start, mmio_end, mmio_flags, mmio_len;
++
++
++	assert (pdev != NULL);
++	assert (ioaddr_out != NULL);
++
++	*ioaddr_out = 0;
++	*dev_out = NULL;
++
++	/*** RTnet ***/
++	rtdev = rt_alloc_etherdev(sizeof(struct rtl8169_private),
++				RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (rtdev == NULL) {
++		printk (KERN_ERR PFX "unable to alloc new ethernet\n");
++		return -ENOMEM;
++	}
++	rtdev_alloc_name(rtdev, "rteth%d");
++	rt_rtdev_connect(rtdev, &RTDEV_manager);
++	rtdev->vers = RTDEV_VERS_2_0;
++	rtdev->sysbind = &pdev->dev;
++	/*** /RTnet ***/
++
++	priv = rtdev->priv;
++
++	/* disable ASPM completely as that cause random device stop working
++	 * problems as well as full system hangs for some PCIe devices users */
++	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
++				     PCIE_LINK_STATE_CLKPM);
++
++	// enable device (incl. PCI PM wakeup and hotplug setup)
++	rc = pci_enable_device (pdev);
++	if (rc)
++		goto err_out;
++
++	if (pci_set_mwi(pdev) < 0)
++		printk("R8169: Mem-Wr-Inval unavailable\n");
++
++	mmio_start = pci_resource_start (pdev, region);
++	mmio_end = pci_resource_end (pdev, region);
++	mmio_flags = pci_resource_flags (pdev, region);
++	mmio_len = pci_resource_len (pdev, region);
++
++	// make sure PCI base addr 1 is MMIO
++	if (!(mmio_flags & IORESOURCE_MEM)) {
++		printk (KERN_ERR PFX "region #%d not an MMIO resource, aborting\n", region);
++		rc = -ENODEV;
++		goto err_out;
++	}
++
++	// check for weird/broken PCI region reporting
++	if ( mmio_len < RTL_MIN_IO_SIZE ) {
++		printk (KERN_ERR PFX "Invalid PCI region size(s), aborting\n");
++		rc = -ENODEV;
++		goto err_out;
++	}
++
++
++	rc = pci_request_regions (pdev, rtdev->name);
++	if (rc)
++		goto err_out;
++
++	// enable PCI bus-mastering
++	pci_set_master (pdev);
++
++#ifdef RTL8169_USE_IO
++	ioaddr = pci_resource_start(pdev, 0);
++#else
++	// ioremap MMIO region
++	ioaddr = (unsigned long)ioremap (mmio_start, mmio_len);
++	if (ioaddr == 0) {
++		printk (KERN_ERR PFX "cannot remap MMIO, aborting\n");
++		rc = -EIO;
++		goto err_out_free_res;
++	}
++#endif
++
++	// Soft reset the chip.
++	RTL_W8 ( ChipCmd, CmdReset);
++
++	// Check that the chip has finished the reset.
++	for (i = 1000; i > 0; i--){
++		if ( (RTL_R8(ChipCmd) & CmdReset) == 0){
++			break;
++		}
++		else{
++			udelay (10);
++		}
++	}
++
++	{
++		u8 cfg2 = RTL_R8(Config2) & ~MSIEnable;
++		if (region) {
++			if (pci_enable_msi(pdev))
++				printk("R8169: no MSI, Back to INTx.\n");
++			else {
++				cfg2 |= MSIEnable;
++				priv->features |= RTL_FEATURE_MSI;
++			}
++		}
++		RTL_W8(Config2, cfg2);
++	}
++
++	// identify config method
++	{
++		unsigned long val32 = (RTL_R32(TxConfig)&0x7c800000);
++
++		if( val32 == (0x1<<28) ){
++			priv->mcfg = MCFG_METHOD_4;
++		}
++		else if( val32 == (0x1<<26) ){
++			priv->mcfg = MCFG_METHOD_3;
++		}
++		else if( val32 == (0x1<<23) ){
++			priv->mcfg = MCFG_METHOD_2;
++		}
++		else if( val32 == 0x00000000 ){
++			priv->mcfg = MCFG_METHOD_1;
++		}
++		else{
++			priv->mcfg = MCFG_METHOD_1;
++		}
++	}
++
++	{
++		unsigned char val8 = (unsigned char)(RTL8169_READ_GMII_REG(ioaddr,3)&0x000f);
++		if( val8 == 0x00 ){
++			priv->pcfg = PCFG_METHOD_1;
++		}
++		else if( val8 == 0x01 ){
++			priv->pcfg = PCFG_METHOD_2;
++		}
++		else if( val8 == 0x02 ){
++			priv->pcfg = PCFG_METHOD_3;
++		}
++		else{
++			priv->pcfg = PCFG_METHOD_3;
++		}
++	}
++
++
++	for (i = ARRAY_SIZE (rtl_chip_info) - 1; i >= 0; i--){
++		if (priv->mcfg == rtl_chip_info[i].mcfg) {
++			priv->chipset = i;
++			goto match;
++		}
++	}
++
++	//if unknown chip, assume array element #0, original RTL-8169 in this case
++	printk (KERN_DEBUG PFX "PCI device %s: unknown chip version, assuming RTL-8169\n", pci_name(pdev));
++	priv->chipset = 0;
++
++match:
++	*ioaddr_out = ioaddr;
++	*dev_out = rtdev;
++	return 0;
++
++#ifndef RTL8169_USE_IO
++err_out_free_res:
++#endif
++	pci_release_regions (pdev);	/*** <kk> moved outside of #ifdev ***/
++
++err_out:
++	/*** RTnet ***/
++	rt_rtdev_disconnect(rtdev);
++	rtdev_free(rtdev);
++	/*** /RTnet ***/
++	return rc;
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
++{
++	struct rtnet_device *rtdev = NULL;	/*** RTnet ***/
++	struct rtl8169_private *priv = NULL;
++	unsigned long ioaddr = 0;
++	static int board_idx = -1;
++	int region = ent->driver_data;
++	int i;
++	int option = -1, Cap10_100 = 0, Cap1000 = 0;
++
++
++	assert (pdev != NULL);
++	assert (ent != NULL);
++
++	board_idx++;
++
++	/*** RTnet ***/
++	if (board_idx >= MAX_UNITS) {
++		return -ENODEV;
++	}
++	if (cards[board_idx] == 0)
++		return -ENODEV;
++	/*** RTnet ***/
++
++	i = rtl8169_init_board (pdev, &rtdev, &ioaddr, region);
++	if (i < 0) {
++		return i;
++	}
++
++	priv = rtdev->priv;
++
++	assert (ioaddr != 0);
++	assert (rtdev != NULL);
++	assert (priv != NULL);
++
++	// Get MAC address //
++	for (i = 0; i < MAC_ADDR_LEN ; i++){
++		rtdev->dev_addr[i] = RTL_R8( MAC0 + i );
++	}
++
++	rtdev->open		= rtl8169_open;
++	rtdev->hard_start_xmit  = rtl8169_start_xmit;
++	rtdev->get_stats        = rtl8169_get_stats;
++	rtdev->stop             = rtl8169_close;
++	/* dev->tx_timeout      = rtl8169_tx_timeout; */			/*** RTnet ***/
++	/* dev->set_multicast_list = rtl8169_set_rx_mode; */	/*** RTnet ***/
++	/* dev->watchdog_timeo  = TX_TIMEOUT; */				/*** RTnet ***/
++	rtdev->irq              = pdev->irq;
++	rtdev->base_addr                = (unsigned long) ioaddr;
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++	rtdev->change_mtu		= rtl8169_change_mtu;
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++#ifdef RTL8169_IOCTL_SUPPORT
++	rtdev->do_ioctl                 = rtl8169_ioctl;
++#endif //end #ifdef RTL8169_IOCTL_SUPPORT
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++	priv->rt.dev = rtdev;
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++	priv = rtdev->priv;				// private data //
++	priv->pci_dev   = pdev;
++	priv->ioaddr    = ioaddr;
++
++//#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++	priv->curr_mtu_size = rtdev->mtu;
++	priv->tx_pkt_len = rtdev->mtu + ETH_HDR_LEN;
++	priv->rx_pkt_len = rtdev->mtu + ETH_HDR_LEN;
++	priv->hw_rx_pkt_len = priv->rx_pkt_len + 8;
++//#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++	DBG_PRINT("-------------------------- \n");
++	DBG_PRINT("dev->mtu = %d \n", rtdev->mtu);
++	DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size);
++	DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len);
++	DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len);
++	DBG_PRINT("priv->hw_rx_pkt_len = %d \n", priv->hw_rx_pkt_len);
++	DBG_PRINT("-------------------------- \n");
++
++	rtdm_lock_init(&priv->lock);	/*** RTnet ***/
++
++	/*** RTnet ***/
++	if (rt_register_rtnetdev(rtdev) < 0) {
++		/* clean up... */
++		pci_release_regions (pdev);
++		rt_rtdev_disconnect(rtdev);
++		rtdev_free(rtdev);
++		return -ENODEV;
++	}
++	/*** /RTnet ***/
++
++	pci_set_drvdata(pdev, rtdev);     //      pdev->driver_data = data;
++
++	printk (KERN_DEBUG "%s: Identified chip type is '%s'.\n", rtdev->name, rtl_chip_info[priv->chipset].name);
++	printk (KERN_INFO "%s: %s at 0x%lx, "
++				"%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
++				"IRQ %d\n",
++				rtdev->name,
++				RTL8169_DRIVER_NAME,
++				rtdev->base_addr,
++				rtdev->dev_addr[0], rtdev->dev_addr[1],
++				rtdev->dev_addr[2], rtdev->dev_addr[3],
++				rtdev->dev_addr[4], rtdev->dev_addr[5],
++				rtdev->irq);
++
++	// Config PHY
++	rtl8169_hw_PHY_config(rtdev);
++
++	DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
++	RTL_W8( 0x82, 0x01 );
++
++	if( priv->mcfg < MCFG_METHOD_3 ){
++		DBG_PRINT("Set PCI Latency=0x40\n");
++		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x40);
++	}
++
++	if( priv->mcfg == MCFG_METHOD_2 ){
++		DBG_PRINT("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
++		RTL_W8( 0x82, 0x01 );
++		DBG_PRINT("Set PHY Reg 0x0bh = 0x00h\n");
++		RTL8169_WRITE_GMII_REG( ioaddr, 0x0b, 0x0000 );	//w 0x0b 15 0 0
++	}
++
++	// if TBI is not endbled
++	if( !(RTL_R8(PHYstatus) & TBI_Enable) ){
++		int	val = RTL8169_READ_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG );
++
++#ifdef RTL8169_HW_FLOW_CONTROL_SUPPORT
++		val |= PHY_Cap_PAUSE | PHY_Cap_ASYM_PAUSE ;
++#endif //end #define RTL8169_HW_FLOW_CONTROL_SUPPORT
++
++		option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx];
++		// Force RTL8169 in 10/100/1000 Full/Half mode.
++		if( option > 0 ){
++			printk(KERN_INFO "%s: Force-mode Enabled. \n", rtdev->name);
++			Cap10_100 = 0;
++			Cap1000 = 0;
++			switch( option ){
++				case _10_Half:
++						Cap10_100 = PHY_Cap_10_Half;
++						Cap1000 = PHY_Cap_Null;
++						break;
++				case _10_Full:
++						Cap10_100 = PHY_Cap_10_Full | PHY_Cap_10_Half;
++						Cap1000 = PHY_Cap_Null;
++						break;
++				case _100_Half:
++						Cap10_100 = PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++						Cap1000 = PHY_Cap_Null;
++						break;
++				case _100_Full:
++						Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++						Cap1000 = PHY_Cap_Null;
++						break;
++				case _1000_Full:
++						Cap10_100 = PHY_Cap_100_Full | PHY_Cap_100_Half | PHY_Cap_10_Full | PHY_Cap_10_Half;
++						Cap1000 = PHY_Cap_1000_Full;
++						break;
++				default:
++						break;
++			}
++			RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG, Cap10_100 | ( val&0xC1F ) );	//leave PHY_AUTO_NEGO_REG bit4:0 unchanged
++			RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, Cap1000 );
++		}
++		else{
++			printk(KERN_INFO "%s: Auto-negotiation Enabled.\n", rtdev->name);
++
++			// enable 10/100 Full/Half Mode, leave PHY_AUTO_NEGO_REG bit4:0 unchanged
++			RTL8169_WRITE_GMII_REG( ioaddr, PHY_AUTO_NEGO_REG,
++				PHY_Cap_10_Half | PHY_Cap_10_Full | PHY_Cap_100_Half | PHY_Cap_100_Full | ( val&0xC1F ) );
++
++			// enable 1000 Full Mode
++//			RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full );
++			RTL8169_WRITE_GMII_REG( ioaddr, PHY_1000_CTRL_REG, PHY_Cap_1000_Full | PHY_Cap_1000_Half);	//rtl8168
++
++		}// end of if( option > 0 )
++
++		// Enable auto-negotiation and restart auto-nigotiation
++		RTL8169_WRITE_GMII_REG( ioaddr, PHY_CTRL_REG, PHY_Enable_Auto_Nego | PHY_Restart_Auto_Nego );
++		udelay(100);
++
++		// wait for auto-negotiation process
++		for( i = 10000; i > 0; i-- ){
++			//check if auto-negotiation complete
++			if( RTL8169_READ_GMII_REG(ioaddr, PHY_STAT_REG) & PHY_Auto_Neco_Comp ){
++				udelay(100);
++				option = RTL_R8(PHYstatus);
++				if( option & _1000bpsF ){
++					printk(KERN_INFO "%s: 1000Mbps Full-duplex operation.\n", rtdev->name);
++				}
++				else{
++					printk(KERN_INFO "%s: %sMbps %s-duplex operation.\n", rtdev->name,
++							(option & _100bps) ? "100" : "10", (option & FullDup) ? "Full" : "Half" );
++				}
++				break;
++			}
++			else{
++				udelay(100);
++			}// end of if( RTL8169_READ_GMII_REG(ioaddr, 1) & 0x20 )
++		}// end for-loop to wait for auto-negotiation process
++
++		option = RTL_R8(PHYstatus);
++		if( option & _1000bpsF ){
++			priv->linkstatus = _1000_Full;
++		}
++		else{
++			if(option & _100bps){
++				priv->linkstatus = (option & FullDup) ? _100_Full : _100_Half;
++			}
++	    else{
++				priv->linkstatus = (option & FullDup) ? _10_Full : _10_Half;
++			}
++		}
++		DBG_PRINT("priv->linkstatus = 0x%02x\n", priv->linkstatus);
++
++	}// end of TBI is not enabled
++	else{
++		udelay(100);
++		DBG_PRINT("1000Mbps Full-duplex operation, TBI Link %s!\n",(RTL_R32(TBICSR) & TBILinkOK) ? "OK" : "Failed" );
++
++	}// end of TBI is not enabled
++
++	return 0;
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_remove_one (struct pci_dev *pdev)
++{
++	struct rtnet_device *rtdev = pci_get_drvdata(pdev);
++	struct rtl8169_private *priv = rtdev->priv;;
++
++	assert (rtdev != NULL);
++
++	/*** RTnet ***/
++	rt_unregister_rtnetdev(rtdev);
++	rt_rtdev_disconnect(rtdev);
++	/*** /RTnet ***/
++
++	if (priv->features & RTL_FEATURE_MSI)
++		pci_disable_msi(pdev);
++
++#ifdef RTL8169_USE_IO
++#else
++	iounmap ((void *)(rtdev->base_addr));
++#endif
++	pci_release_regions(pdev);
++
++	rtdev_free(rtdev);	/*** RTnet ***/
++
++	pci_disable_device(pdev);	/*** <kk> Disable device now :-) ***/
++
++	pci_set_drvdata(pdev, NULL);
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_open (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	struct pci_dev *pdev = priv->pci_dev;
++	int retval;
++//	u8 diff;
++//	u32 TxPhyAddr, RxPhyAddr;
++
++	if( priv->drvinit_fail == 1 ){
++		printk("%s: Gigabit driver open failed.\n", rtdev->name );
++		return -ENOMEM;
++	}
++
++	/*** RTnet ***/
++	rt_stack_connect(rtdev, &STACK_manager);
++
++	retval = rtdm_irq_request(&priv->irq_handle, rtdev->irq, rtl8169_interrupt, 0, "rt_r8169", rtdev);
++	/*** /RTnet ***/
++
++	// retval = request_irq (dev->irq, rtl8169_interrupt, SA_SHIRQ, dev->name, dev);
++	if (retval) {
++		return retval;
++	}
++
++
++	//2004-05-11
++	// Allocate tx/rx descriptor space
++	priv->sizeof_txdesc_space = NUM_TX_DESC * sizeof(struct TxDesc)+256;
++	priv->txdesc_space = pci_alloc_consistent( pdev, priv->sizeof_txdesc_space, &priv->txdesc_phy_dma_addr );
++	if( priv->txdesc_space == NULL ){
++		printk("%s: Gigabit driver alloc txdesc_space failed.\n", rtdev->name );
++		return -ENOMEM;
++	}
++	priv->sizeof_rxdesc_space = NUM_RX_DESC * sizeof(struct RxDesc)+256;
++	priv->rxdesc_space = pci_alloc_consistent( pdev, priv->sizeof_rxdesc_space, &priv->rxdesc_phy_dma_addr );
++	if( priv->rxdesc_space == NULL ){
++		printk("%s: Gigabit driver alloc rxdesc_space failed.\n", rtdev->name );
++		return -ENOMEM;
++	}
++
++	if(priv->txdesc_phy_dma_addr & 0xff){
++		printk("%s: Gigabit driver txdesc_phy_dma_addr is not 256-bytes-aligned.\n", rtdev->name );
++	}
++	if(priv->rxdesc_phy_dma_addr & 0xff){
++		printk("%s: Gigabit driver rxdesc_phy_dma_addr is not 256-bytes-aligned.\n", rtdev->name );
++	}
++	// Set tx/rx descriptor space
++	priv->TxDescArray = (struct TxDesc *)priv->txdesc_space;
++	priv->RxDescArray = (struct RxDesc *)priv->rxdesc_space;
++
++	{
++		int i;
++		struct rtskb *skb = NULL;	/*** RTnet ***/
++		priv->rx_buf_size = (rtdev->mtu <= 1500 ? DEFAULT_RX_BUF_LEN : rtdev->mtu + 32);	/*** RTnet / <kk> ***/
++
++		for(i=0;i<NUM_RX_DESC;i++){
++			//skb = RTL8169_ALLOC_RXSKB(MAX_RX_SKBDATA_SIZE);	/*** <kk> ***/
++			skb = rtnetdev_alloc_rtskb(rtdev, priv->rx_buf_size); /*** RTnet ***/;
++			if( skb != NULL ) {
++				rtskb_reserve (skb, 2);	// 16 byte align the IP fields. //
++				priv->Rx_skbuff[i] = skb;
++			}
++			else{
++				printk("%s: Gigabit driver failed to allocate skbuff.\n", rtdev->name);
++				priv->drvinit_fail = 1;
++			}
++		}
++	}
++
++
++	//////////////////////////////////////////////////////////////////////////////
++	rtl8169_init_ring(rtdev);
++	rtl8169_hw_start(rtdev);
++
++	// ------------------------------------------------------
++
++	//DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt );	/*** <kk> won't work anymore... ***/
++
++	return 0;
++
++}//end of rtl8169_open (struct net_device *dev)
++
++
++
++
++
++
++
++
++//======================================================================================================
++
++
++
++//======================================================================================================
++static void rtl8169_hw_PHY_config (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	void *ioaddr = (void*)priv->ioaddr;
++
++	DBG_PRINT("priv->mcfg=%d, priv->pcfg=%d\n",priv->mcfg,priv->pcfg);
++
++	if( priv->mcfg == MCFG_METHOD_4 ){
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1b, 0x841e );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0e, 0x7bfb );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x09, 0x273a );
++
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0002 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x90D0 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 );
++	}else if((priv->mcfg == MCFG_METHOD_2)||(priv->mcfg == MCFG_METHOD_3)){
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0001 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x15, 0x1000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x18, 0x65C7 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0x00A1 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0x0008 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x1020 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x1000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0800 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE60 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x0077 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7800 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x7000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xFA00 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA800 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xA000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xFF41 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDE20 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0x0140 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0x00BB );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB800 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xB000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x03, 0xDF01 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x02, 0xDF20 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x01, 0xFF95 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x00, 0xBF00 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF800 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0xF000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x04, 0x0000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x1F, 0x0000 );
++		RTL8169_WRITE_GMII_REG( (unsigned long)ioaddr, 0x0B, 0x0000 );
++	}
++	else{
++		DBG_PRINT("priv->mcfg=%d. Discard hw PHY config.\n",priv->mcfg);
++	}
++}
++
++
++
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_hw_start (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	unsigned long ioaddr = priv->ioaddr;
++	u32 i;
++
++
++	/* Soft reset the chip. */
++	RTL_W8 ( ChipCmd, CmdReset);
++
++	/* Check that the chip has finished the reset. */
++	for (i = 1000; i > 0; i--){
++		if ((RTL_R8( ChipCmd ) & CmdReset) == 0) break;
++		else udelay (10);
++	}
++
++	RTL_W8 ( Cfg9346, Cfg9346_Unlock);
++	RTL_W8 ( ChipCmd, CmdTxEnb | CmdRxEnb);
++	RTL_W8 ( ETThReg, ETTh);
++
++	// For gigabit rtl8169
++	RTL_W16	( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len );
++
++	// Set Rx Config register
++	i = rtl8169_rx_config | ( RTL_R32( RxConfig ) & rtl_chip_info[priv->chipset].RxConfigMask);
++	RTL_W32 ( RxConfig, i);
++
++
++	/* Set DMA burst size and Interframe Gap Time */
++	RTL_W32 ( TxConfig, (TX_DMA_BURST << TxDMAShift) | (InterFrameGap << TxInterFrameGapShift) );
++
++
++
++	RTL_W16( CPlusCmd, RTL_R16(CPlusCmd) );
++
++	if(	priv->mcfg == MCFG_METHOD_2 ||
++		priv->mcfg == MCFG_METHOD_3)
++	{
++		RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<14)|(1<<3)) );
++		DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3 and bit-14\n");
++	}
++	else
++	{
++		RTL_W16( CPlusCmd, (RTL_R16(CPlusCmd)|(1<<3)) );
++		DBG_PRINT("Set MAC Reg C+CR Offset 0xE0: bit-3.\n");
++	}
++
++	{
++		//RTL_W16(0xE2, 0x1517);
++		//RTL_W16(0xE2, 0x152a);
++		//RTL_W16(0xE2, 0x282a);
++		RTL_W16(0xE2, 0x0000);		/* 0xE2 = IntrMitigate */
++	}
++
++	priv->cur_rx = 0;
++
++	RTL_W32 ( TxDescStartAddr, priv->txdesc_phy_dma_addr);
++	RTL_W32 ( TxDescStartAddr + 4, 0x00);
++	RTL_W32 ( RxDescStartAddr, priv->rxdesc_phy_dma_addr);
++	RTL_W32 ( RxDescStartAddr + 4, 0x00);
++
++	RTL_W8 ( Cfg9346, Cfg9346_Lock );
++	udelay (10);
++
++	RTL_W32 ( RxMissed, 0 );
++
++	rtl8169_set_rx_mode (rtdev);
++
++	/* no early-rx interrupts */
++	RTL_W16 ( MultiIntr, RTL_R16(MultiIntr) & 0xF000);
++
++	/* enable all known interrupts by setting the interrupt mask */
++	RTL_W16 ( IntrMask, rtl8169_intr_mask);
++
++	rtnetif_start_queue (rtdev);	/*** RTnet ***/
++
++}//end of rtl8169_hw_start (struct net_device *dev)
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_init_ring (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	struct pci_dev *pdev = priv->pci_dev;
++	int i;
++	struct rtskb	*skb;
++
++
++	priv->cur_rx = 0;
++	priv->cur_tx = 0;
++	priv->dirty_tx = 0;
++	memset(priv->TxDescArray, 0x0, NUM_TX_DESC*sizeof(struct TxDesc));
++	memset(priv->RxDescArray, 0x0, NUM_RX_DESC*sizeof(struct RxDesc));
++
++
++	for (i=0 ; i<NUM_TX_DESC ; i++){
++		priv->Tx_skbuff[i]=NULL;
++		priv->txdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->TxDescArray[i], sizeof(struct TxDesc), PCI_DMA_TODEVICE);
++	}
++
++	for (i=0; i <NUM_RX_DESC; i++) {
++		if(i==(NUM_RX_DESC-1)){
++			priv->RxDescArray[i].status = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len);
++		}
++		else{
++			priv->RxDescArray[i].status = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len);
++		}
++
++		{//-----------------------------------------------------------------------
++			skb = priv->Rx_skbuff[i];
++			priv->rx_skbuff_dma_addr[i] = pci_map_single(pdev, skb->data, priv->rx_buf_size /* MAX_RX_SKBDATA_SIZE */, PCI_DMA_FROMDEVICE);	/*** <kk> ***/
++
++			if( skb != NULL ){
++				priv->RxDescArray[i].buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[i]);
++				priv->RxDescArray[i].buf_Haddr = 0;
++			}
++			else{
++				DBG_PRINT("%s: %s() Rx_skbuff == NULL\n", rtdev->name, __FUNCTION__);
++				priv->drvinit_fail = 1;
++			}
++		}//-----------------------------------------------------------------------
++		priv->rxdesc_array_dma_addr[i] = pci_map_single(pdev, &priv->RxDescArray[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE);
++		pci_dma_sync_single_for_device(pdev, priv->rxdesc_array_dma_addr[i], sizeof(struct RxDesc), PCI_DMA_TODEVICE);
++	}
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_tx_clear (struct rtl8169_private *priv)
++{
++	int i;
++
++	priv->cur_tx = 0;
++	for ( i = 0 ; i < NUM_TX_DESC ; i++ ){
++		if ( priv->Tx_skbuff[i] != NULL ) {
++			dev_kfree_rtskb ( priv->Tx_skbuff[i] );
++			priv->Tx_skbuff[i] = NULL;
++			priv->stats.tx_dropped++;
++		}
++	}
++}
++
++
++
++
++
++
++
++//======================================================================================================
++
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_start_xmit (struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	unsigned long ioaddr = priv->ioaddr;
++	struct pci_dev *pdev = priv->pci_dev;
++	int entry = priv->cur_tx % NUM_TX_DESC;
++	// int buf_len = 60;
++	dma_addr_t txbuf_dma_addr;
++	rtdm_lockctx_t context;	/*** RTnet ***/
++	u32 status, len;		/* <kk> */
++
++	rtdm_lock_get_irqsave(&priv->lock, context);	/*** RTnet ***/
++
++	status = le32_to_cpu(priv->TxDescArray[entry].status);
++
++	if( (status & OWNbit)==0 ){
++
++		priv->Tx_skbuff[entry] = skb;
++
++		len = skb->len;
++		if (len < ETH_ZLEN) {
++			skb = rtskb_padto(skb, ETH_ZLEN);
++			if (skb == NULL) {
++				/* Error... */
++				rtdm_printk("%s: Error -- rtskb_padto returned NULL; out of memory?\n", rtdev->name);
++			}
++			len = ETH_ZLEN;
++		}
++
++		txbuf_dma_addr = pci_map_single(pdev, skb->data, len, PCI_DMA_TODEVICE);
++
++		priv->TxDescArray[entry].buf_addr = cpu_to_le32(txbuf_dma_addr);
++
++		/* <kk> print TX frame debug informations? */
++		while (r8169_debug & (DEBUG_TX_SYNC | DEBUG_TX_OTHER)) {
++			unsigned short proto = 0;
++
++			/* get ethernet protocol id */
++			if (skb->len < 14) break;	/* packet too small! */
++			if (skb->len > 12) proto = be16_to_cpu(*((unsigned short *)(skb->data + 12)));
++
++			if (proto == 0x9021 && !(r8169_debug & DEBUG_TX_SYNC)) {
++				/* don't show TDMA Sync frames for better debugging, so look at RTmac frame type... */
++				unsigned short type;
++
++				if (skb->len < 16) break;	/* packet too small! */
++				type = be16_to_cpu(*((unsigned short *)(skb->data + 14)));
++
++				if (type == 0x0001) {
++					/* TDMA-Frame; get Message ID */
++					unsigned short tdma_version;
++
++					if (skb->len < 20) break;	/* packet too small! */
++					tdma_version = be16_to_cpu(*((unsigned short *)(skb->data + 18)));
++
++					if (tdma_version == 0x0201) {
++						unsigned short tdma_id;
++
++						if (skb->len < 22) break;	/* packet too small! */
++						tdma_id = be16_to_cpu(*((unsigned short *)(skb->data + 20)));
++
++						if (tdma_id == 0x0000 && !(r8169_debug & DEBUG_TX_SYNC)) {
++							/* TDMA sync frame found, but not allowed to print it */
++							break;
++						}
++					}
++				}
++
++			}
++
++			/* print frame informations */
++			RT_DBG_PRINT("%s: TX len = %d, skb->len = %d, eth_proto=%04x\n", __FUNCTION__, len, skb->len, proto);
++
++			break;	/* leave loop */
++		}
++
++		if( len > priv->tx_pkt_len ){
++			rtdm_printk("%s: Error -- Tx packet size(%d) > mtu(%d)+14\n", rtdev->name, len, rtdev->mtu);
++			len = priv->tx_pkt_len;
++		}
++
++		/*** RTnet ***/
++		/* get and patch time stamp just before the transmission */
++		if (skb->xmit_stamp)
++			*skb->xmit_stamp = cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++		/*** /RTnet ***/
++
++		if( entry != (NUM_TX_DESC-1) ){
++			status = (OWNbit | FSbit | LSbit) | len;
++		}
++		else{
++			status = (OWNbit | EORbit | FSbit | LSbit) | len;
++		}
++		priv->TxDescArray[entry].status = cpu_to_le32(status);
++
++		pci_dma_sync_single_for_device(pdev, priv->txdesc_array_dma_addr[entry], sizeof(struct TxDesc), PCI_DMA_TODEVICE);
++
++		RTL_W8 ( TxPoll, 0x40);		//set polling bit
++
++		//rtdev->trans_start = jiffies;
++
++		priv->stats.tx_bytes += len;
++		priv->cur_tx++;
++	}//end of if( (priv->TxDescArray[entry].status & 0x80000000)==0 )
++
++	rtdm_lock_put_irqrestore(&priv->lock, context);	/*** RTnet ***/
++
++	if ( (priv->cur_tx - NUM_TX_DESC) == priv->dirty_tx ){
++		if (r8169_debug & DEBUG_RUN) rtdm_printk(KERN_DEBUG "%s: stopping rtnetif queue", __FUNCTION__);
++		rtnetif_stop_queue (rtdev);
++	}
++	else{
++		if (rtnetif_queue_stopped (rtdev)){
++			if (r8169_debug & DEBUG_RUN) rtdm_printk(KERN_DEBUG "%s: waking rtnetif queue", __FUNCTION__);
++			rtnetif_wake_queue (rtdev);
++		}
++	}
++
++	return 0;
++}
++
++
++
++
++
++
++
++//======================================================================================================
++/* This routine is logically part of the interrupt handler, but isolated
++   for clarity. */
++static void rtl8169_tx_interrupt (struct rtnet_device *rtdev, struct rtl8169_private *priv, unsigned long ioaddr)
++{
++	unsigned long dirty_tx, tx_left=0;
++	//int entry = priv->cur_tx % NUM_TX_DESC;	/* <kk> */
++	int txloop_cnt = 0;
++
++	rt_assert (rtdev != NULL);
++	rt_assert (priv != NULL);
++	rt_assert (ioaddr != 0);
++
++	rtdm_lock_get(&priv->lock); /*** RTnet ***/
++
++	dirty_tx = priv->dirty_tx;
++	smp_rmb();	/*** <kk> ***/
++	tx_left = priv->cur_tx - dirty_tx;
++
++	while( (tx_left > 0) && (txloop_cnt < max_interrupt_work) ){
++		unsigned int entry = dirty_tx % NUM_TX_DESC;	/* <kk> */
++		if( (le32_to_cpu(priv->TxDescArray[entry].status) & OWNbit) == 0 ){
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++			r8169_callback_tx(&(priv->rt), 1, priv->Tx_skbuff[dirty_tx % NUM_TX_DESC]->len);
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++			if (priv->txdesc_array_dma_addr[entry])
++				pci_unmap_single(priv->pci_dev, priv->txdesc_array_dma_addr[entry], priv->Tx_skbuff[entry]->len, PCI_DMA_TODEVICE);	/*** ##KK## ***/
++			dev_kfree_rtskb( priv->Tx_skbuff[entry] );	/*** RTnet; previously: dev_kfree_skb_irq() - luckily we're within an IRQ ***/
++			priv->Tx_skbuff[entry] = NULL;
++			priv->stats.tx_packets++;
++			dirty_tx++;
++			tx_left--;
++			entry++;
++		}
++		txloop_cnt ++;
++	}
++
++	if (priv->dirty_tx != dirty_tx) {
++		priv->dirty_tx = dirty_tx;
++		smp_wmb();	/*** <kk> ***/
++		if (rtnetif_queue_stopped (rtdev))
++			rtnetif_wake_queue (rtdev);
++	}
++
++	rtdm_lock_put(&priv->lock); /*** RTnet ***/
++
++}
++
++
++
++
++
++
++//======================================================================================================
++/* This routine is logically part of the interrupt handler, but isolated
++   for clarity. */
++static void rtl8169_rx_interrupt (struct rtnet_device *rtdev, struct rtl8169_private *priv, unsigned long ioaddr, nanosecs_abs_t *time_stamp)
++{
++	struct pci_dev *pdev = priv->pci_dev;
++	int cur_rx;
++	int pkt_size = 0 ;
++	int rxdesc_cnt = 0;
++	/* int ret; */	/*** RTnet ***/
++	struct rtskb *n_skb = NULL;
++	struct rtskb *cur_skb;
++	struct rtskb *rx_skb;
++	struct	RxDesc	*rxdesc;
++
++	rt_assert (rtdev != NULL);
++	rt_assert (priv != NULL);
++	rt_assert (ioaddr != 0);
++
++
++	cur_rx = priv->cur_rx;
++
++	rxdesc = &priv->RxDescArray[cur_rx];
++	pci_dma_sync_single_for_cpu(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
++
++	while ( ((le32_to_cpu(rxdesc->status) & OWNbit)== 0) && (rxdesc_cnt < max_interrupt_work) ){
++
++	    rxdesc_cnt++;
++
++	    if( le32_to_cpu(rxdesc->status) & RxRES ){
++			rtdm_printk(KERN_INFO "%s: Rx ERROR!!!\n", rtdev->name);
++			priv->stats.rx_errors++;
++			if ( le32_to_cpu(rxdesc->status) & (RxRWT|RxRUNT) )
++				priv->stats.rx_length_errors++;
++			if ( le32_to_cpu(rxdesc->status) & RxCRC) {
++				/* in the rt_via-rhine.c there's a lock around the incrementation... we'll do that also here <kk> */
++				rtdm_lock_get(&priv->lock); /*** RTnet ***/
++				priv->stats.rx_crc_errors++;
++				rtdm_lock_put(&priv->lock); /*** RTnet ***/
++			}
++	    }
++	    else{
++			pkt_size=(int)(le32_to_cpu(rxdesc->status) & 0x00001FFF)-4;
++
++			if( pkt_size > priv->rx_pkt_len ){
++				rtdm_printk("%s: Error -- Rx packet size(%d) > mtu(%d)+14\n", rtdev->name, pkt_size, rtdev->mtu);
++				pkt_size = priv->rx_pkt_len;
++			}
++
++			{// -----------------------------------------------------
++				rx_skb = priv->Rx_skbuff[cur_rx];
++				// n_skb = RTL8169_ALLOC_RXSKB(MAX_RX_SKBDATA_SIZE);	/*** <kk> ***/
++				n_skb = rtnetdev_alloc_rtskb(rtdev, priv->rx_buf_size);	/*** RTnet ***/
++				if( n_skb != NULL ) {
++					rtskb_reserve (n_skb, 2);	// 16 byte align the IP fields. //
++
++					// Indicate rx_skb
++					if( rx_skb != NULL ){
++						pci_dma_sync_single_for_cpu(pdev, priv->rx_skbuff_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
++
++						rtskb_put ( rx_skb, pkt_size );
++						rx_skb->protocol = rt_eth_type_trans ( rx_skb, rtdev );
++						rx_skb->time_stamp = *time_stamp;	/*** RTnet ***/
++						//ret = RTL8169_NETIF_RX (rx_skb);
++						rtnetif_rx(rx_skb);	/*** RTnet ***/
++
++//						dev->last_rx = jiffies;
++						priv->stats.rx_bytes += pkt_size;
++						priv->stats.rx_packets++;
++
++#ifdef RTL8169_DYNAMIC_CONTROL
++						r8169_callback_rx( &(priv->rt), 1, pkt_size);
++#endif //end #ifdef RTL8169_DYNAMIC_CONTROL
++
++					}//end if( rx_skb != NULL )
++
++					priv->Rx_skbuff[cur_rx] = n_skb;
++				}
++				else{
++					RT_DBG_PRINT("%s: Allocate n_skb failed! (priv->rx_buf_size = %d)\n",__FUNCTION__, priv->rx_buf_size );
++					priv->Rx_skbuff[cur_rx] = rx_skb;
++				}
++
++
++				// Update rx descriptor
++				if( cur_rx == (NUM_RX_DESC-1) ){
++					priv->RxDescArray[cur_rx].status  = cpu_to_le32((OWNbit | EORbit) | (unsigned long)priv->hw_rx_pkt_len);
++				}
++				else{
++					priv->RxDescArray[cur_rx].status  = cpu_to_le32(OWNbit | (unsigned long)priv->hw_rx_pkt_len);
++				}
++
++				cur_skb = priv->Rx_skbuff[cur_rx];
++
++				if( cur_skb != NULL ){
++					priv->rx_skbuff_dma_addr[cur_rx] = pci_map_single(pdev, cur_skb->data, priv->rx_buf_size /* <kk> MAX_RX_SKBDATA_SIZE */, PCI_DMA_FROMDEVICE);
++					rxdesc->buf_addr = cpu_to_le32(priv->rx_skbuff_dma_addr[cur_rx]);
++				}
++				else{
++					RT_DBG_PRINT("%s: %s() cur_skb == NULL\n", rtdev->name, __FUNCTION__);
++				}
++
++			}//------------------------------------------------------------
++
++	    }// end of if( priv->RxDescArray[cur_rx].status & RxRES )
++
++	    cur_rx = (cur_rx +1) % NUM_RX_DESC;
++	    rxdesc = &priv->RxDescArray[cur_rx];
++	    pci_dma_sync_single_for_cpu(pdev, priv->rxdesc_array_dma_addr[cur_rx], sizeof(struct RxDesc), PCI_DMA_FROMDEVICE);
++
++	}// end of while ( (priv->RxDescArray[cur_rx].status & 0x80000000)== 0)
++
++	if( rxdesc_cnt >= max_interrupt_work ){
++		RT_DBG_PRINT("%s: Too much work at Rx interrupt.\n", rtdev->name);
++	}
++
++	priv->cur_rx = cur_rx;
++}
++
++
++
++
++
++
++
++
++//======================================================================================================
++/* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */
++static int rtl8169_interrupt(rtdm_irq_t *irq_handle)
++{
++	/* struct net_device *dev = (struct net_device *) dev_instance; */	/*** RTnet ***/
++	struct rtnet_device *rtdev = rtdm_irq_get_arg(irq_handle, struct rtnet_device); /*** RTnet ***/
++	struct rtl8169_private *priv = rtdev->priv;
++	int boguscnt = max_interrupt_work;
++	unsigned long ioaddr = priv->ioaddr;
++	int status = 0;
++	unsigned int old_packet_cnt = priv->stats.rx_packets; /*** RTnet ***/
++	nanosecs_abs_t time_stamp = rtdm_clock_read(); /*** RTnet ***/
++
++	int interrupt_handled = RTDM_IRQ_NONE; /*** <kk> ***/
++
++	do {
++		status = RTL_R16(IntrStatus);	/* read interrupt status */
++
++		if ((status == 0xFFFF) || (!status)) {
++			break;						/* hotplug/major error/no more work/shared irq */
++		}
++
++
++		interrupt_handled = RTDM_IRQ_HANDLED;
++
++/*		if (unlikely(!rtnetif_running(rtdev))) {
++			rtl8169_asic_down(ioaddr);
++			goto out;
++		}
++*/
++
++		/* Acknowledge interrupts */
++		RTL_W16(IntrStatus, 0xffff);
++
++		if (!(status & rtl8169_intr_mask)) {
++			break;
++		}
++
++		if (unlikely(status & SYSErr)) {
++			RT_DBG_PRINT("PCI error...!? %i\n", __LINE__);
++			rtl8169_pcierr_interrupt(rtdev);
++			break;
++		}
++
++		/*** RTnet / <kk> (Linux-2.6.12-Backport) ***/
++		if (unlikely(status & LinkChg)) {
++			rtdm_lock_get(&priv->lock);
++			if (RTL_R8(PHYstatus) & LinkStatus)	/*** <kk> only supporting XMII, not yet TBI ***/
++				rtnetif_carrier_on(rtdev);
++			else
++				rtnetif_carrier_off(rtdev);
++			rtdm_lock_put(&priv->lock);
++		}
++
++		// Rx interrupt
++		if (status & (RxOK | RxOverflow | RxFIFOOver)) {
++			rtl8169_rx_interrupt (rtdev, priv, ioaddr, &time_stamp);
++		}
++
++		// Tx interrupt
++		if (status & (TxOK | TxErr)) {
++			rtl8169_tx_interrupt (rtdev, priv, ioaddr);
++		}
++
++		boguscnt--;
++	} while (boguscnt > 0);
++
++	if (boguscnt <= 0) {
++		rtdm_printk(KERN_WARNING "%s: Too much work at interrupt!\n", rtdev->name);
++		RTL_W16( IntrStatus, 0xffff);	/* Clear all interrupt sources */
++	}
++
++//out:
++
++	if (old_packet_cnt != priv->stats.rx_packets)
++		rt_mark_stack_mgr(rtdev);
++	return interrupt_handled;
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static int rtl8169_close (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	unsigned long ioaddr = priv->ioaddr;
++	int i;
++	rtdm_lockctx_t context;	/*** RTnet, for rtdm_lock_get_irqsave ***/
++
++	// -----------------------------------------
++	/* rtl8169_delete_timer( &(priv->r8169_timer) ); */	/*** RTnet ***/
++
++
++	rtdm_lock_get_irqsave (&priv->lock, context);	/*** RTnet ***/
++
++	rtnetif_stop_queue (rtdev);		/*** RTnet / <kk>: moved behind spin_lock! ***/
++
++	/* Stop the chip's Tx and Rx processes. */
++	RTL_W8 ( ChipCmd, 0x00);
++
++	/* Disable interrupts by clearing the interrupt mask. */
++	RTL_W16 ( IntrMask, 0x0000);
++
++	/* Update the error counts. */
++	priv->stats.rx_missed_errors += RTL_R32(RxMissed);
++	RTL_W32( RxMissed, 0);
++
++	rtdm_lock_put_irqrestore(&priv->lock, context);	/*** RTnet ***/
++
++	/*** RTnet ***/
++	if ( (i=rtdm_irq_free(&priv->irq_handle))<0 )
++		return i;
++
++	rt_stack_disconnect(rtdev);
++	/*** /RTnet ***/
++
++	rtl8169_tx_clear (priv);
++
++	//2004-05-11
++	if(priv->txdesc_space != NULL){
++		pci_free_consistent(
++				priv->pci_dev,
++				priv->sizeof_txdesc_space,
++				priv->txdesc_space,
++				priv->txdesc_phy_dma_addr
++		);
++		priv->txdesc_space = NULL;
++	}
++
++	if(priv->rxdesc_space != NULL){
++		pci_free_consistent(
++				priv->pci_dev,
++				priv->sizeof_rxdesc_space,
++				priv->rxdesc_space,
++				priv->rxdesc_phy_dma_addr
++		);
++		priv->rxdesc_space = NULL;
++	}
++
++	priv->TxDescArray = NULL;
++	priv->RxDescArray = NULL;
++
++	{//-----------------------------------------------------------------------------
++		for(i=0;i<NUM_RX_DESC;i++){
++			if( priv->Rx_skbuff[i] != NULL ) {
++				//RTL8169_FREE_RXSKB ( priv->Rx_skbuff[i] );	/*** <kk> ***/
++				dev_kfree_rtskb(priv->Rx_skbuff[i]);	/*** RTnet ***/
++			}
++		}
++	}//-----------------------------------------------------------------------------
++
++	//DBG_PRINT("%s: %s() alloc_rxskb_cnt = %d\n", dev->name, __FUNCTION__, alloc_rxskb_cnt );	/*** <kk> won't work anymore ***/
++
++	return 0;
++}
++
++
++
++
++
++
++
++//======================================================================================================
++static unsigned const ethernet_polynomial = 0x04c11db7U;
++static inline u32 ether_crc (int length, unsigned char *data)
++{
++	int crc = -1;
++
++	while (--length >= 0) {
++		unsigned char current_octet = *data++;
++		int bit;
++		for (bit = 0; bit < 8; bit++, current_octet >>= 1)
++			crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ? ethernet_polynomial : 0);
++	}
++
++	return crc;
++}
++
++
++
++
++
++
++
++
++//======================================================================================================
++static void rtl8169_set_rx_mode (struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	unsigned long ioaddr = priv->ioaddr;
++	rtdm_lockctx_t context;
++	u32 mc_filter[2];	/* Multicast hash filter */
++	int rx_mode;
++	u32 tmp=0;
++
++
++	if (rtdev->flags & IFF_PROMISC) {
++		/* Unconditionally log net taps. */
++		printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", rtdev->name);
++		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys | AcceptAllPhys;
++		mc_filter[1] = mc_filter[0] = 0xffffffff;
++	} else if (rtdev->flags & IFF_ALLMULTI) {
++		/* Too many to filter perfectly -- accept all multicasts. */
++		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
++		mc_filter[1] = mc_filter[0] = 0xffffffff;
++	} else {
++		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
++		mc_filter[1] = mc_filter[0] = 0;
++	}
++
++	rtdm_lock_get_irqsave(&priv->lock, context);			/*** RTnet ***/
++
++	tmp = rtl8169_rx_config | rx_mode | (RTL_R32(RxConfig) & rtl_chip_info[priv->chipset].RxConfigMask);
++
++	RTL_W32 ( RxConfig, tmp);
++	RTL_W32 ( MAR0 + 0, mc_filter[0]);
++	RTL_W32 ( MAR0 + 4, mc_filter[1]);
++
++	rtdm_lock_put_irqrestore(&priv->lock, context);	/*** RTnet ***/
++
++}//end of rtl8169_set_rx_mode (struct net_device *dev)
++
++
++
++
++
++
++
++//================================================================================
++static struct net_device_stats *rtl8169_get_stats(struct rtnet_device *rtdev)
++
++{
++    struct rtl8169_private *priv = rtdev->priv;
++
++    return &priv->stats;
++}
++
++
++
++
++
++
++
++//================================================================================
++static struct pci_driver rtl8169_pci_driver = {
++	name:		MODULENAME,
++	id_table:	rtl8169_pci_tbl,
++	probe:		rtl8169_init_one,
++	remove:		rtl8169_remove_one,
++	suspend:	NULL,
++	resume:		NULL,
++};
++
++
++
++
++
++//======================================================================================================
++static int __init rtl8169_init_module (void)
++{
++	/* <kk> Enable debugging output... */
++	if (local_debug > 0) {
++		r8169_debug = local_debug;
++	}
++	if (r8169_debug & DEBUG_RUN) printk("Initializing " MODULENAME " driver");
++	return pci_register_driver (&rtl8169_pci_driver);
++}
++
++
++
++
++//======================================================================================================
++static void __exit rtl8169_cleanup_module (void)
++{
++	pci_unregister_driver (&rtl8169_pci_driver);
++}
++
++
++#ifdef RTL8169_JUMBO_FRAME_SUPPORT
++static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
++{
++	struct rtl8169_private *priv = dev->priv;
++	unsigned long ioaddr = priv->ioaddr;
++
++	if( new_mtu > MAX_JUMBO_FRAME_MTU ){
++		printk("%s: Error -- new_mtu(%d) > MAX_JUMBO_FRAME_MTU(%d).\n", dev->name, new_mtu, MAX_JUMBO_FRAME_MTU);
++		return -1;
++	}
++
++	dev->mtu = new_mtu;
++
++	priv->curr_mtu_size = new_mtu;
++	priv->tx_pkt_len = new_mtu + ETH_HDR_LEN;
++	priv->rx_pkt_len = new_mtu + ETH_HDR_LEN;
++	priv->hw_rx_pkt_len = priv->rx_pkt_len + 8;
++
++	RTL_W8 ( Cfg9346, Cfg9346_Unlock);
++	RTL_W16	( RxMaxSize, (unsigned short)priv->hw_rx_pkt_len );
++	RTL_W8 ( Cfg9346, Cfg9346_Lock);
++
++	DBG_PRINT("-------------------------- \n");
++	DBG_PRINT("dev->mtu = %d \n", dev->mtu);
++	DBG_PRINT("priv->curr_mtu_size = %d \n", priv->curr_mtu_size);
++	DBG_PRINT("priv->rx_pkt_len = %d \n", priv->rx_pkt_len);
++	DBG_PRINT("priv->tx_pkt_len = %d \n", priv->tx_pkt_len);
++	DBG_PRINT("RTL_W16( RxMaxSize, %d )\n", priv->hw_rx_pkt_len);
++	DBG_PRINT("-------------------------- \n");
++
++	rtl8169_close (dev);
++	rtl8169_open (dev);
++
++	return 0;
++}
++#endif //end #ifdef RTL8169_JUMBO_FRAME_SUPPORT
++
++
++
++/*** <kk> these functions are backported from Linux-2.6.12's r8169.c driver ***/
++static void rtl8169_irq_mask_and_ack(unsigned long ioaddr)
++{
++	RTL_W16(IntrMask, 0x0000);
++
++	RTL_W16(IntrStatus, 0xffff);
++}
++
++static void rtl8169_pcierr_interrupt(struct rtnet_device *rtdev)
++{
++	struct rtl8169_private *priv = rtdev->priv;
++	struct pci_dev *pdev = priv->pci_dev;
++	unsigned long ioaddr = priv->ioaddr;
++	u16 pci_status, pci_cmd;
++
++	pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
++	pci_read_config_word(pdev, PCI_STATUS, &pci_status);
++
++	rtdm_printk(KERN_ERR PFX "%s: PCI error (cmd = 0x%04x, status = 0x%04x).\n",
++	       rtdev->name, pci_cmd, pci_status);
++
++	/*
++	 * The recovery sequence below admits a very elaborated explanation:
++	 * - it seems to work;
++	 * - I did not see what else could be done.
++	 *
++	 * Feel free to adjust to your needs.
++	 */
++	pci_write_config_word(pdev, PCI_COMMAND,
++			      pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY);
++
++	pci_write_config_word(pdev, PCI_STATUS,
++		pci_status & (PCI_STATUS_DETECTED_PARITY |
++		PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT |
++		PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT));
++
++	/* The infamous DAC f*ckup only happens at boot time */
++	/*** <kk> ***
++	if ((priv->cp_cmd & PCIDAC) && !priv->dirty_rx && !priv->cur_rx) {
++		rtdm_printk(KERN_INFO PFX "%s: disabling PCI DAC.\n", rtdev->name);
++		priv->cp_cmd &= ~PCIDAC;
++		RTL_W16(CPlusCmd, priv->cp_cmd);
++		rtdev->features &= ~NETIF_F_HIGHDMA;
++		rtl8169_schedule_work(rtdev, rtl8169_reinit_task);
++	}
++	 *** /RTnet ***/
++
++	/* Disable interrupts */
++	rtl8169_irq_mask_and_ack(ioaddr);
++
++	/* Reset the chipset */
++	RTL_W8(ChipCmd, CmdReset);
++
++	/* PCI commit */
++	RTL_R8(ChipCmd);
++
++}
++
++
++
++
++
++
++//======================================================================================================
++module_init(rtl8169_init_module);
++module_exit(rtl8169_cleanup_module);
+--- linux/drivers/xenomai/net/drivers/rt_smc91111.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/rt_smc91111.h	2021-04-07 16:01:27.234634156 +0800
+@@ -0,0 +1,566 @@
++/*------------------------------------------------------------------------
++ . smc91111.h - macros for the LAN91C111 Ethernet Driver
++ .
++ . Copyright (C) 2001 Standard Microsystems Corporation (SMSC)
++ .       Developed by Simple Network Magic Corporation (SNMC)
++ . Copyright (C) 1996 by Erik Stahlman (ES)
++ .
++ . This program is free software; you can redistribute it and/or modify
++ . it under the terms of the GNU General Public License as published by
++ . the Free Software Foundation; either version 2 of the License, or
++ . (at your option) any later version.
++ .
++ . This program is distributed in the hope that it will be useful,
++ . but WITHOUT ANY WARRANTY; without even the implied warranty of
++ . MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ . GNU General Public License for more details.
++ .
++ . You should have received a copy of the GNU General Public License
++ . along with this program; if not, write to the Free Software
++ . Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ .
++ . This file contains register information and access macros for 
++ . the LAN91C111 single chip ethernet controller.  It is a modified
++ . version of the smc9194.h file.
++ . 
++ . Information contained in this file was obtained from the LAN91C111
++ . manual from SMC.  To get a copy, if you really want one, you can find 
++ . information under www.smsc.com.
++ . 
++ . Authors
++ . 	Erik Stahlman				( erik@vt.edu )
++ .	Daris A Nevil				( dnevil@snmc.com )
++ .
++ . History
++ . 03/16/01		Daris A Nevil	Modified for use with LAN91C111 device
++ .
++ ---------------------------------------------------------------------------*/
++#ifndef _SMC91111_H_
++#define _SMC91111_H_
++
++/* I want some simple types */
++
++typedef unsigned char			byte;
++typedef unsigned short			word;
++typedef unsigned long int 		dword;
++
++
++/* Because of bank switching, the LAN91xxx uses only 16 I/O ports */
++
++#define SMC_IO_EXTENT	16
++
++
++/*---------------------------------------------------------------
++ .  
++ . A description of the SMSC registers is probably in order here,
++ . although for details, the SMC datasheet is invaluable.  
++ . 
++ . Basically, the chip has 4 banks of registers ( 0 to 3 ), which
++ . are accessed by writing a number into the BANK_SELECT register
++ . ( I also use a SMC_SELECT_BANK macro for this ).
++ . 
++ . The banks are configured so that for most purposes, bank 2 is all
++ . that is needed for simple run time tasks.  
++ -----------------------------------------------------------------------*/
++
++/*
++ . Bank Select Register: 
++ .
++ .		yyyy yyyy 0000 00xx  
++ .		xx 		= bank number
++ .		yyyy yyyy	= 0x33, for identification purposes.
++*/
++#define	BANK_SELECT		14
++
++// Transmit Control Register
++/* BANK 0  */
++#define	TCR_REG 	0x0000 	// transmit control register 
++#define TCR_ENABLE	0x0001	// When 1 we can transmit
++#define TCR_LOOP	0x0002	// Controls output pin LBK
++#define TCR_FORCOL	0x0004	// When 1 will force a collision
++#define TCR_PAD_EN	0x0080	// When 1 will pad tx frames < 64 bytes w/0
++#define TCR_NOCRC	0x0100	// When 1 will not append CRC to tx frames
++#define TCR_MON_CSN	0x0400	// When 1 tx monitors carrier
++#define TCR_FDUPLX    	0x0800  // When 1 enables full duplex operation
++#define TCR_STP_SQET	0x1000	// When 1 stops tx if Signal Quality Error
++#define	TCR_EPH_LOOP	0x2000	// When 1 enables EPH block loopback
++#define	TCR_SWFDUP	0x8000	// When 1 enables Switched Full Duplex mode
++
++#define	TCR_CLEAR	0	/* do NOTHING */
++/* the default settings for the TCR register : */ 
++/* QUESTION: do I want to enable padding of short packets ? */
++#define	TCR_DEFAULT  	TCR_ENABLE 
++
++
++// EPH Status Register
++/* BANK 0  */
++#define EPH_STATUS_REG	0x0002
++#define ES_TX_SUC	0x0001	// Last TX was successful
++#define ES_SNGL_COL	0x0002	// Single collision detected for last tx
++#define ES_MUL_COL	0x0004	// Multiple collisions detected for last tx
++#define ES_LTX_MULT	0x0008	// Last tx was a multicast
++#define ES_16COL	0x0010	// 16 Collisions Reached
++#define ES_SQET		0x0020	// Signal Quality Error Test
++#define ES_LTXBRD	0x0040	// Last tx was a broadcast
++#define ES_TXDEFR	0x0080	// Transmit Deferred
++#define ES_LATCOL	0x0200	// Late collision detected on last tx
++#define ES_LOSTCARR	0x0400	// Lost Carrier Sense
++#define ES_EXC_DEF	0x0800	// Excessive Deferral
++#define ES_CTR_ROL	0x1000	// Counter Roll Over indication
++#define ES_LINK_OK	0x4000	// Driven by inverted value of nLNK pin
++#define ES_TXUNRN	0x8000	// Tx Underrun
++
++
++// Receive Control Register
++/* BANK 0  */
++#define	RCR_REG		0x0004
++#define	RCR_RX_ABORT	0x0001	// Set if a rx frame was aborted
++#define	RCR_PRMS	0x0002	// Enable promiscuous mode
++#define	RCR_ALMUL	0x0004	// When set accepts all multicast frames
++#define RCR_RXEN	0x0100	// IFF this is set, we can receive packets
++#define	RCR_STRIP_CRC	0x0200	// When set strips CRC from rx packets
++#define	RCR_ABORT_ENB	0x0200	// When set will abort rx on collision 
++#define	RCR_FILT_CAR	0x0400	// When set filters leading 12 bit s of carrier
++#define RCR_SOFTRST	0x8000 	// resets the chip
++
++/* the normal settings for the RCR register : */
++#define	RCR_DEFAULT	(RCR_STRIP_CRC | RCR_RXEN)
++#define RCR_CLEAR	0x0	// set it to a base state
++
++// Counter Register
++/* BANK 0  */
++#define	COUNTER_REG	0x0006
++
++// Memory Information Register
++/* BANK 0  */
++#define	MIR_REG		0x0008
++
++// Receive/Phy Control Register
++/* BANK 0  */
++#define	RPC_REG		0x000A
++#define	RPC_SPEED	0x2000	// When 1 PHY is in 100Mbps mode.
++#define	RPC_DPLX	0x1000	// When 1 PHY is in Full-Duplex Mode
++#define	RPC_ANEG	0x0800	// When 1 PHY is in Auto-Negotiate Mode
++#define	RPC_LSXA_SHFT	5	// Bits to shift LS2A,LS1A,LS0A to lsb
++#define	RPC_LSXB_SHFT	2	// Bits to get LS2B,LS1B,LS0B to lsb
++#define RPC_LED_100_10	(0x00)	// LED = 100Mbps OR's with 10Mbps link detect
++#define RPC_LED_RES	(0x01)	// LED = Reserved
++#define RPC_LED_10	(0x02)	// LED = 10Mbps link detect
++#define RPC_LED_FD	(0x03)	// LED = Full Duplex Mode
++#define RPC_LED_TX_RX	(0x04)	// LED = TX or RX packet occurred
++#define RPC_LED_100	(0x05)	// LED = 100Mbps link dectect
++#define RPC_LED_TX	(0x06)	// LED = TX packet occurred
++#define RPC_LED_RX	(0x07)	// LED = RX packet occurred
++#define RPC_DEFAULT (RPC_ANEG | (RPC_LED_100 << RPC_LSXA_SHFT) | (RPC_LED_FD << RPC_LSXB_SHFT) | RPC_SPEED | RPC_DPLX)
++
++/* Bank 0 0x000C is reserved */
++
++// Bank Select Register
++/* All Banks */
++#define BSR_REG	0x000E
++
++
++// Configuration Reg
++/* BANK 1 */
++#define CONFIG_REG	0x0000
++#define CONFIG_EXT_PHY	0x0200	// 1=external MII, 0=internal Phy
++#define CONFIG_GPCNTRL	0x0400	// Inverse value drives pin nCNTRL
++#define CONFIG_NO_WAIT	0x1000	// When 1 no extra wait states on ISA bus
++#define CONFIG_EPH_POWER_EN 0x8000 // When 0 EPH is placed into low power mode.
++
++// Default is powered-up, Internal Phy, Wait States, and pin nCNTRL=low
++#define CONFIG_DEFAULT	(CONFIG_EPH_POWER_EN)
++
++
++// Base Address Register
++/* BANK 1 */
++#define	BASE_REG	0x0002
++
++
++// Individual Address Registers
++/* BANK 1 */
++#define	ADDR0_REG	0x0004
++#define	ADDR1_REG	0x0006
++#define	ADDR2_REG	0x0008
++
++
++// General Purpose Register
++/* BANK 1 */
++#define	GP_REG		0x000A
++
++
++// Control Register
++/* BANK 1 */
++#define	CTL_REG		0x000C
++#define CTL_RCV_BAD	0x4000 // When 1 bad CRC packets are received
++#define CTL_AUTO_RELEASE 0x0800 // When 1 tx pages are released automatically
++#define	CTL_LE_ENABLE	0x0080 // When 1 enables Link Error interrupt
++#define	CTL_CR_ENABLE	0x0040 // When 1 enables Counter Rollover interrupt
++#define	CTL_TE_ENABLE	0x0020 // When 1 enables Transmit Error interrupt
++#define	CTL_EEPROM_SELECT 0x0004 // Controls EEPROM reload & store
++#define	CTL_RELOAD	0x0002 // When set reads EEPROM into registers
++#define	CTL_STORE	0x0001 // When set stores registers into EEPROM
++
++
++// MMU Command Register
++/* BANK 2 */
++#define MMU_CMD_REG	0x0000
++#define MC_BUSY		1	// When 1 the last release has not completed
++#define MC_NOP		(0<<5)	// No Op
++#define	MC_ALLOC	(1<<5) 	// OR with number of 256 byte packets
++#define	MC_RESET	(2<<5)	// Reset MMU to initial state
++#define	MC_REMOVE	(3<<5) 	// Remove the current rx packet
++#define MC_RELEASE  	(4<<5) 	// Remove and release the current rx packet
++#define MC_FREEPKT  	(5<<5) 	// Release packet in PNR register
++#define MC_ENQUEUE	(6<<5)	// Enqueue the packet for transmit
++#define MC_RSTTXFIFO	(7<<5)	// Reset the TX FIFOs
++
++
++// Packet Number Register
++/* BANK 2 */
++#define	PN_REG		0x0002
++
++
++// Allocation Result Register
++/* BANK 2 */
++#define	AR_REG		0x0003
++#define AR_FAILED	0x80	// Alocation Failed
++
++
++// RX FIFO Ports Register
++/* BANK 2 */
++#define RXFIFO_REG	0x0004	// Must be read as a word
++#define RXFIFO_REMPTY	0x8000	// RX FIFO Empty
++
++
++// TX FIFO Ports Register
++/* BANK 2 */
++#define TXFIFO_REG	RXFIFO_REG	// Must be read as a word
++#define TXFIFO_TEMPTY	0x80	// TX FIFO Empty
++
++
++// Pointer Register
++/* BANK 2 */
++#define PTR_REG		0x0006
++#define	PTR_RCV		0x8000 // 1=Receive area, 0=Transmit area
++#define	PTR_AUTOINC 	0x4000 // Auto increment the pointer on each access
++#define PTR_READ	0x2000 // When 1 the operation is a read
++
++
++// Data Register
++/* BANK 2 */
++#define	DATA_REG	0x0008
++
++
++// Interrupt Status/Acknowledge Register
++/* BANK 2 */
++#define	INT_REG		0x000C
++
++
++// Interrupt Mask Register
++/* BANK 2 */
++#define IM_REG		0x000D
++#define	IM_MDINT	0x80 // PHY MI Register 18 Interrupt
++#define	IM_ERCV_INT	0x40 // Early Receive Interrupt
++#define	IM_EPH_INT	0x20 // Set by Etheret Protocol Handler section
++#define	IM_RX_OVRN_INT	0x10 // Set by Receiver Overruns
++#define	IM_ALLOC_INT	0x08 // Set when allocation request is completed
++#define	IM_TX_EMPTY_INT	0x04 // Set if the TX FIFO goes empty
++#define	IM_TX_INT	0x02 // Transmit Interrrupt
++#define IM_RCV_INT	0x01 // Receive Interrupt
++
++
++// Multicast Table Registers
++/* BANK 3 */
++#define	MCAST_REG1	0x0000
++#define	MCAST_REG2	0x0002
++#define	MCAST_REG3	0x0004
++#define	MCAST_REG4	0x0006
++
++
++// Management Interface Register (MII)
++/* BANK 3 */
++#define	MII_REG		0x0008
++#define MII_MSK_CRS100	0x4000 // Disables CRS100 detection during tx half dup
++#define MII_MDOE	0x0008 // MII Output Enable
++#define MII_MCLK	0x0004 // MII Clock, pin MDCLK
++#define MII_MDI		0x0002 // MII Input, pin MDI
++#define MII_MDO		0x0001 // MII Output, pin MDO
++
++
++// Revision Register
++/* BANK 3 */
++#define	REV_REG		0x000A /* ( hi: chip id   low: rev # ) */
++
++
++// Early RCV Register
++/* BANK 3 */
++/* this is NOT on SMC9192 */
++#define	ERCV_REG	0x000C
++#define ERCV_RCV_DISCRD	0x0080 // When 1 discards a packet being received
++#define ERCV_THRESHOLD	0x001F // ERCV Threshold Mask
++
++// External Register
++/* BANK 7 */
++#define	EXT_REG		0x0000
++
++
++#define CHIP_9192	3
++#define CHIP_9194	4
++#define CHIP_9195	5
++#define CHIP_9196	6
++#define CHIP_91100	7
++#define CHIP_91100FD	8
++#define CHIP_91111FD	9
++
++static const char * chip_ids[ 15 ] =  { 
++	NULL, NULL, NULL, 
++	/* 3 */ "SMC91C90/91C92",
++	/* 4 */ "SMC91C94",
++	/* 5 */ "SMC91C95",
++	/* 6 */ "SMC91C96",
++	/* 7 */ "SMC91C100", 
++	/* 8 */ "SMC91C100FD", 
++	/* 9 */ "SMC91C11xFD", 
++	NULL, NULL, 
++	NULL, NULL, NULL};  
++
++/* 
++ . Transmit status bits 
++*/
++#define TS_SUCCESS 0x0001
++#define TS_LOSTCAR 0x0400
++#define TS_LATCOL  0x0200
++#define TS_16COL   0x0010
++
++/*
++ . Receive status bits
++*/
++#define RS_ALGNERR	0x8000
++#define RS_BRODCAST	0x4000
++#define RS_BADCRC	0x2000
++#define RS_ODDFRAME	0x1000	// bug: the LAN91C111 never sets this on receive
++#define RS_TOOLONG	0x0800
++#define RS_TOOSHORT	0x0400
++#define RS_MULTICAST	0x0001
++#define RS_ERRORS	(RS_ALGNERR | RS_BADCRC | RS_TOOLONG | RS_TOOSHORT) 
++
++
++// PHY Types
++enum {
++	PHY_LAN83C183 = 1,	// LAN91C111 Internal PHY
++	PHY_LAN83C180
++};
++
++
++// PHY Register Addresses (LAN91C111 Internal PHY)
++
++// PHY Control Register
++#define PHY_CNTL_REG		0x00
++#define PHY_CNTL_RST		0x8000	// 1=PHY Reset
++#define PHY_CNTL_LPBK		0x4000	// 1=PHY Loopback
++#define PHY_CNTL_SPEED		0x2000	// 1=100Mbps, 0=10Mpbs
++#define PHY_CNTL_ANEG_EN	0x1000 // 1=Enable Auto negotiation
++#define PHY_CNTL_PDN		0x0800	// 1=PHY Power Down mode
++#define PHY_CNTL_MII_DIS	0x0400	// 1=MII 4 bit interface disabled
++#define PHY_CNTL_ANEG_RST	0x0200 // 1=Reset Auto negotiate
++#define PHY_CNTL_DPLX		0x0100	// 1=Full Duplex, 0=Half Duplex
++#define PHY_CNTL_COLTST		0x0080	// 1= MII Colision Test
++
++// PHY Status Register
++#define PHY_STAT_REG		0x01
++#define PHY_STAT_CAP_T4		0x8000	// 1=100Base-T4 capable
++#define PHY_STAT_CAP_TXF	0x4000	// 1=100Base-X full duplex capable
++#define PHY_STAT_CAP_TXH	0x2000	// 1=100Base-X half duplex capable
++#define PHY_STAT_CAP_TF		0x1000	// 1=10Mbps full duplex capable
++#define PHY_STAT_CAP_TH		0x0800	// 1=10Mbps half duplex capable
++#define PHY_STAT_CAP_SUPR	0x0040	// 1=recv mgmt frames with not preamble
++#define PHY_STAT_ANEG_ACK	0x0020	// 1=ANEG has completed
++#define PHY_STAT_REM_FLT	0x0010	// 1=Remote Fault detected
++#define PHY_STAT_CAP_ANEG	0x0008	// 1=Auto negotiate capable
++#define PHY_STAT_LINK		0x0004	// 1=valid link
++#define PHY_STAT_JAB		0x0002	// 1=10Mbps jabber condition
++#define PHY_STAT_EXREG		0x0001	// 1=extended registers implemented
++
++// PHY Identifier Registers
++#define PHY_ID1_REG		0x02	// PHY Identifier 1
++#define PHY_ID2_REG		0x03	// PHY Identifier 2
++
++// PHY Auto-Negotiation Advertisement Register
++#define PHY_AD_REG		0x04
++#define PHY_AD_NP		0x8000	// 1=PHY requests exchange of Next Page
++#define PHY_AD_ACK		0x4000	// 1=got link code word from remote
++#define PHY_AD_RF		0x2000	// 1=advertise remote fault
++#define PHY_AD_T4		0x0200	// 1=PHY is capable of 100Base-T4
++#define PHY_AD_TX_FDX		0x0100	// 1=PHY is capable of 100Base-TX FDPLX
++#define PHY_AD_TX_HDX		0x0080	// 1=PHY is capable of 100Base-TX HDPLX
++#define PHY_AD_10_FDX		0x0040	// 1=PHY is capable of 10Base-T FDPLX
++#define PHY_AD_10_HDX		0x0020	// 1=PHY is capable of 10Base-T HDPLX
++#define PHY_AD_CSMA		0x0001	// 1=PHY is capable of 802.3 CMSA
++
++// PHY Auto-negotiation Remote End Capability Register
++#define PHY_RMT_REG		0x05
++// Uses same bit definitions as PHY_AD_REG
++
++// PHY Configuration Register 1
++#define PHY_CFG1_REG		0x10
++#define PHY_CFG1_LNKDIS		0x8000	// 1=Rx Link Detect Function disabled
++#define PHY_CFG1_XMTDIS		0x4000	// 1=TP Transmitter Disabled
++#define PHY_CFG1_XMTPDN		0x2000	// 1=TP Transmitter Powered Down
++#define PHY_CFG1_BYPSCR		0x0400	// 1=Bypass scrambler/descrambler
++#define PHY_CFG1_UNSCDS		0x0200	// 1=Unscramble Idle Reception Disable
++#define PHY_CFG1_EQLZR		0x0100	// 1=Rx Equalizer Disabled
++#define PHY_CFG1_CABLE		0x0080	// 1=STP(150ohm), 0=UTP(100ohm)
++#define PHY_CFG1_RLVL0		0x0040	// 1=Rx Squelch level reduced by 4.5db
++#define PHY_CFG1_TLVL_SHIFT	2	// Transmit Output Level Adjust
++#define PHY_CFG1_TLVL_MASK	0x003C
++#define PHY_CFG1_TRF_MASK	0x0003	// Transmitter Rise/Fall time
++
++
++// PHY Configuration Register 2
++#define PHY_CFG2_REG		0x11
++#define PHY_CFG2_APOLDIS	0x0020	// 1=Auto Polarity Correction disabled
++#define PHY_CFG2_JABDIS		0x0010	// 1=Jabber disabled
++#define PHY_CFG2_MREG		0x0008	// 1=Multiple register access (MII mgt)
++#define PHY_CFG2_INTMDIO	0x0004	// 1=Interrupt signaled with MDIO pulseo
++
++// PHY Status Output (and Interrupt status) Register
++#define PHY_INT_REG		0x12	// Status Output (Interrupt Status)
++#define PHY_INT_INT		0x8000	// 1=bits have changed since last read
++#define	PHY_INT_LNKFAIL		0x4000	// 1=Link Not detected
++#define PHY_INT_LOSSSYNC	0x2000	// 1=Descrambler has lost sync
++#define PHY_INT_CWRD		0x1000	// 1=Invalid 4B5B code detected on rx
++#define PHY_INT_SSD		0x0800	// 1=No Start Of Stream detected on rx
++#define PHY_INT_ESD		0x0400	// 1=No End Of Stream detected on rx
++#define PHY_INT_RPOL		0x0200	// 1=Reverse Polarity detected
++#define PHY_INT_JAB		0x0100	// 1=Jabber detected
++#define PHY_INT_SPDDET		0x0080	// 1=100Base-TX mode, 0=10Base-T mode
++#define PHY_INT_DPLXDET		0x0040	// 1=Device in Full Duplex
++
++// PHY Interrupt/Status Mask Register
++#define PHY_MASK_REG		0x13	// Interrupt Mask
++// Uses the same bit definitions as PHY_INT_REG
++
++
++
++/*-------------------------------------------------------------------------
++ .  I define some macros to make it easier to do somewhat common
++ . or slightly complicated, repeated tasks. 
++ --------------------------------------------------------------------------*/
++
++/* select a register bank, 0 to 3  */
++
++#define SMC_SELECT_BANK(x)  { outw( x, ioaddr + BANK_SELECT ); } 
++
++/* this enables an interrupt in the interrupt mask register */
++#define SMC_ENABLE_INT(x) {\
++		unsigned char mask;\
++		SMC_SELECT_BANK(2);\
++		mask = inb( ioaddr + IM_REG );\
++		mask |= (x);\
++		outb( mask, ioaddr + IM_REG ); \
++}
++
++/* this disables an interrupt from the interrupt mask register */
++
++#define SMC_DISABLE_INT(x) {\
++		unsigned char mask;\
++		SMC_SELECT_BANK(2);\
++		mask = inb( ioaddr + IM_REG );\
++		mask &= ~(x);\
++		outb( mask, ioaddr + IM_REG ); \
++}
++
++/*----------------------------------------------------------------------
++ . Define the interrupts that I want to receive from the card
++ . 
++ . I want: 
++ .  IM_EPH_INT, for nasty errors
++ .  IM_RCV_INT, for happy received packets
++ .  IM_RX_OVRN_INT, because I have to kick the receiver
++ .  IM_MDINT, for PHY Register 18 Status Changes
++ --------------------------------------------------------------------------*/
++#define SMC_INTERRUPT_MASK   (IM_EPH_INT | IM_RX_OVRN_INT | IM_RCV_INT | \
++	IM_MDINT) 
++
++
++#ifdef CONFIG_SYSCTL
++
++
++/*
++ * Declarations for the sysctl interface, which allows users the ability to
++ * control the finer aspects of the LAN91C111 chip.  Since the smc
++ * module currently registers its sysctl table dynamically, the sysctl path
++ * for module FOO is /proc/sys/dev/ethX/FOO
++ */
++#define CTL_SMC         (CTL_BUS+1389)      // arbitrary and hopefully unused
++
++enum {
++	CTL_SMC_INFO = 1,	// Sysctl files information
++	CTL_SMC_SWVER,		// Driver Software Version Info
++	CTL_SMC_SWFDUP,		// Switched Full Duplex Mode
++	CTL_SMC_EPHLOOP,	// EPH Block Internal Loopback
++	CTL_SMC_MIIOP,		// MII Operation
++	CTL_SMC_AUTONEG,	// Auto-negotiate Mode
++	CTL_SMC_RFDUPLX,	// Request Full Duplex Mode
++	CTL_SMC_RSPEED,		// Request Speed Selection
++	CTL_SMC_AFDUPLX,	// Actual Full Duplex Mode
++	CTL_SMC_ASPEED,		// Actual Speed Selection
++	CTL_SMC_LNKFAIL,	// Link Failed
++	CTL_SMC_FORCOL,		// Force a Collision
++	CTL_SMC_FILTCAR,	// Filter Carrier
++	CTL_SMC_FREEMEM,	// Free Buffer Memory
++	CTL_SMC_TOTMEM,		// Total Buffer Memory
++	CTL_SMC_LEDA,		// Output of LED-A
++	CTL_SMC_LEDB,		// Output of LED-B
++	CTL_SMC_CHIPREV,	// LAN91C111 Chip Revision ID
++#ifdef SMC_DEBUG
++	// Register access for debugging
++	CTL_SMC_REG_BSR,	// Bank Select
++	CTL_SMC_REG_TCR,	// Transmit Control
++	CTL_SMC_REG_ESR,	// EPH Status
++	CTL_SMC_REG_RCR,	// Receive Control
++	CTL_SMC_REG_CTRR,	// Counter
++	CTL_SMC_REG_MIR,	// Memory Information
++	CTL_SMC_REG_RPCR,	// Receive/Phy Control
++	CTL_SMC_REG_CFGR,	// Configuration
++	CTL_SMC_REG_BAR,	// Base Address
++	CTL_SMC_REG_IAR0,	// Individual Address 0
++	CTL_SMC_REG_IAR1,	// Individual Address 1 
++	CTL_SMC_REG_IAR2,	// Individual Address 2
++	CTL_SMC_REG_GPR,	// General Purpose
++	CTL_SMC_REG_CTLR,	// Control
++	CTL_SMC_REG_MCR,	// MMU Command
++	CTL_SMC_REG_PNR,	// Packet Number
++	CTL_SMC_REG_FPR,	// FIFO Ports
++	CTL_SMC_REG_PTR,	// Pointer
++	CTL_SMC_REG_DR,		// Data 
++	CTL_SMC_REG_ISR,	// Interrupt Status
++	CTL_SMC_REG_MTR1,	// Multicast Table Entry 1
++	CTL_SMC_REG_MTR2,	// Multicast Table Entry 2
++	CTL_SMC_REG_MTR3,	// Multicast Table Entry 3
++	CTL_SMC_REG_MTR4,	// Multicast Table Entry 4
++	CTL_SMC_REG_MIIR,	// Management Interface
++	CTL_SMC_REG_REVR,	// Revision
++	CTL_SMC_REG_ERCVR,	// Early RCV
++	CTL_SMC_REG_EXTR,	// External
++	CTL_SMC_PHY_CTRL,	// PHY Control
++	CTL_SMC_PHY_STAT,	// PHY Status
++	CTL_SMC_PHY_ID1,	// PHY ID1
++	CTL_SMC_PHY_ID2,	// PHY ID2
++	CTL_SMC_PHY_ADC,	// PHY Advertise Capability
++	CTL_SMC_PHY_REMC,	// PHY Advertise Capability
++	CTL_SMC_PHY_CFG1,	// PHY Configuration 1
++	CTL_SMC_PHY_CFG2,	// PHY Configuration 2
++	CTL_SMC_PHY_INT,	// PHY Interrupt/Status Output
++	CTL_SMC_PHY_MASK,	// PHY Interrupt/Status Mask
++#endif
++	// ---------------------------------------------------
++	CTL_SMC_LAST_ENTRY	// Add new entries above the line
++};
++
++#endif // CONFIG_SYSCTL
++ 
++#endif  /* _SMC_91111_H_ */
++
++
+--- linux/drivers/xenomai/net/drivers/e1000e/phy.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/phy.c	2021-04-07 16:01:27.229634163 +0800
+@@ -0,0 +1,3381 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include <linux/delay.h>
++
++#include "e1000.h"
++
++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw);
++static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw);
++static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
++static s32 e1000_wait_autoneg(struct e1000_hw *hw);
++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg);
++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
++					  u16 *data, bool read, bool page_set);
++static u32 e1000_get_phy_addr_for_hv_page(u32 page);
++static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset,
++                                          u16 *data, bool read);
++
++/* Cable length tables */
++static const u16 e1000_m88_cable_length_table[] = {
++	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
++#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
++		ARRAY_SIZE(e1000_m88_cable_length_table)
++
++static const u16 e1000_igp_2_cable_length_table[] = {
++	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3,
++	6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22,
++	26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40,
++	44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61,
++	66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82,
++	87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95,
++	100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121,
++	124};
++#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
++		ARRAY_SIZE(e1000_igp_2_cable_length_table)
++
++#define BM_PHY_REG_PAGE(offset) \
++	((u16)(((offset) >> PHY_PAGE_SHIFT) & 0xFFFF))
++#define BM_PHY_REG_NUM(offset) \
++	((u16)(((offset) & MAX_PHY_REG_ADDRESS) |\
++	 (((offset) >> (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT)) &\
++		~MAX_PHY_REG_ADDRESS)))
++
++#define HV_INTC_FC_PAGE_START             768
++#define I82578_ADDR_REG                   29
++#define I82577_ADDR_REG                   16
++#define I82577_CFG_REG                    22
++#define I82577_CFG_ASSERT_CRS_ON_TX       (1 << 15)
++#define I82577_CFG_ENABLE_DOWNSHIFT       (3 << 10) /* auto downshift 100/10 */
++#define I82577_CTRL_REG                   23
++
++/* 82577 specific PHY registers */
++#define I82577_PHY_CTRL_2            18
++#define I82577_PHY_STATUS_2          26
++#define I82577_PHY_DIAG_STATUS       31
++
++/* I82577 PHY Status 2 */
++#define I82577_PHY_STATUS2_REV_POLARITY   0x0400
++#define I82577_PHY_STATUS2_MDIX           0x0800
++#define I82577_PHY_STATUS2_SPEED_MASK     0x0300
++#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200
++
++/* I82577 PHY Control 2 */
++#define I82577_PHY_CTRL2_AUTO_MDIX        0x0400
++#define I82577_PHY_CTRL2_FORCE_MDI_MDIX   0x0200
++
++/* I82577 PHY Diagnostics Status */
++#define I82577_DSTATUS_CABLE_LENGTH       0x03FC
++#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2
++
++/* BM PHY Copper Specific Control 1 */
++#define BM_CS_CTRL1                       16
++
++#define HV_MUX_DATA_CTRL               PHY_REG(776, 16)
++#define HV_MUX_DATA_CTRL_GEN_TO_MAC    0x0400
++#define HV_MUX_DATA_CTRL_FORCE_SPEED   0x0004
++
++/**
++ *  e1000e_check_reset_block_generic - Check if PHY reset is blocked
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the PHY management control register and check whether a PHY reset
++ *  is blocked.  If a reset is not blocked return 0, otherwise
++ *  return E1000_BLK_PHY_RESET (12).
++ **/
++s32 e1000e_check_reset_block_generic(struct e1000_hw *hw)
++{
++	u32 manc;
++
++	manc = er32(MANC);
++
++	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ?
++	       E1000_BLK_PHY_RESET : 0;
++}
++
++/**
++ *  e1000e_get_phy_id - Retrieve the PHY ID and revision
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
++ *  revision in the hardware structure.
++ **/
++s32 e1000e_get_phy_id(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_id;
++	u16 retry_count = 0;
++
++	if (!(phy->ops.read_reg))
++		goto out;
++
++	while (retry_count < 2) {
++		ret_val = e1e_rphy(hw, PHY_ID1, &phy_id);
++		if (ret_val)
++			goto out;
++
++		phy->id = (u32)(phy_id << 16);
++		udelay(20);
++		ret_val = e1e_rphy(hw, PHY_ID2, &phy_id);
++		if (ret_val)
++			goto out;
++
++		phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
++		phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
++
++		if (phy->id != 0 && phy->id != PHY_REVISION_MASK)
++			goto out;
++
++		retry_count++;
++	}
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_reset_dsp - Reset PHY DSP
++ *  @hw: pointer to the HW structure
++ *
++ *  Reset the digital signal processor.
++ **/
++s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	ret_val = e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
++	if (ret_val)
++		return ret_val;
++
++	return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
++}
++
++/**
++ *  e1000e_read_phy_reg_mdic - Read MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the MDI control register in the PHY at offset and stores the
++ *  information read to data.
++ **/
++s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		e_dbg("PHY Address %d is out of range\n", offset);
++		return -E1000_ERR_PARAM;
++	}
++
++	/*
++	 * Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
++		(phy->addr << E1000_MDIC_PHY_SHIFT) |
++		(E1000_MDIC_OP_READ));
++
++	ew32(MDIC, mdic);
++
++	/*
++	 * Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		udelay(50);
++		mdic = er32(MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		e_dbg("MDI Read did not complete\n");
++		return -E1000_ERR_PHY;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		e_dbg("MDI Error\n");
++		return -E1000_ERR_PHY;
++	}
++	*data = (u16) mdic;
++
++	/*
++	 * Allow some time after each MDIC transaction to avoid
++	 * reading duplicate data in the next MDIC transaction.
++	 */
++	if (hw->mac.type == e1000_pch2lan)
++		udelay(100);
++
++	return 0;
++}
++
++/**
++ *  e1000e_write_phy_reg_mdic - Write MDI control register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write to register at offset
++ *
++ *  Writes data to MDI control register in the PHY at offset.
++ **/
++s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, mdic = 0;
++
++	if (offset > MAX_PHY_REG_ADDRESS) {
++		e_dbg("PHY Address %d is out of range\n", offset);
++		return -E1000_ERR_PARAM;
++	}
++
++	/*
++	 * Set up Op-code, Phy Address, and register offset in the MDI
++	 * Control register.  The MAC will take care of interfacing with the
++	 * PHY to retrieve the desired data.
++	 */
++	mdic = (((u32)data) |
++		(offset << E1000_MDIC_REG_SHIFT) |
++		(phy->addr << E1000_MDIC_PHY_SHIFT) |
++		(E1000_MDIC_OP_WRITE));
++
++	ew32(MDIC, mdic);
++
++	/*
++	 * Poll the ready bit to see if the MDI read completed
++	 * Increasing the time out as testing showed failures with
++	 * the lower time out
++	 */
++	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
++		udelay(50);
++		mdic = er32(MDIC);
++		if (mdic & E1000_MDIC_READY)
++			break;
++	}
++	if (!(mdic & E1000_MDIC_READY)) {
++		e_dbg("MDI Write did not complete\n");
++		return -E1000_ERR_PHY;
++	}
++	if (mdic & E1000_MDIC_ERROR) {
++		e_dbg("MDI Error\n");
++		return -E1000_ERR_PHY;
++	}
++
++	/*
++	 * Allow some time after each MDIC transaction to avoid
++	 * reading duplicate data in the next MDIC transaction.
++	 */
++	if (hw->mac.type == e1000_pch2lan)
++		udelay(100);
++
++	return 0;
++}
++
++/**
++ *  e1000e_read_phy_reg_m88 - Read m88 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					   data);
++
++	hw->phy.ops.release(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_write_phy_reg_m88 - Write m88 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					    data);
++
++	hw->phy.ops.release(hw);
++
++	return ret_val;
++}
++
++/**
++ *  @brief Set page as on IGP-like PHY(s)
++ *  @param hw pointer to the HW structure
++ *  @param page page to set (shifted left when necessary)
++ *
++ *  Sets PHY page required for PHY register access.  Assumes semaphore is
++ *  already acquired.  Note, this function sets phy.addr to 1 so the caller
++ *  must set it appropriately (if necessary) after this function returns.
++ */
++s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page)
++{
++	e_dbg("Setting page 0x%x\n", page);
++
++	hw->phy.addr = 1;
++
++	return e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page);
++}
++
++/**
++ *  __e1000e_read_phy_reg_igp - Read igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and stores the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++static s32 __e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data,
++                                    bool locked)
++{
++	s32 ret_val = 0;
++
++	if (!locked) {
++		if (!(hw->phy.ops.acquire))
++			goto out;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = e1000e_write_phy_reg_mdic(hw,
++						    IGP01E1000_PHY_PAGE_SELECT,
++						    (u16)offset);
++		if (ret_val)
++			goto release;
++	}
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++	                                  data);
++
++release:
++	if (!locked)
++		hw->phy.ops.release(hw);
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_read_phy_reg_igp - Read igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore then reads the PHY register at offset and stores the
++ *  retrieved information in data.
++ *  Release the acquired semaphore before exiting.
++ **/
++s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000e_read_phy_reg_igp(hw, offset, data, false);
++}
++
++/**
++ *  e1000e_read_phy_reg_igp_locked - Read igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY register at offset and stores the retrieved information
++ *  in data.  Assumes semaphore already acquired.
++ **/
++s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000e_read_phy_reg_igp(hw, offset, data, true);
++}
++
++/**
++ *  e1000e_write_phy_reg_igp - Write igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++static s32 __e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data,
++                                     bool locked)
++{
++	s32 ret_val = 0;
++
++	if (!locked) {
++		if (!(hw->phy.ops.acquire))
++			goto out;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		ret_val = e1000e_write_phy_reg_mdic(hw,
++						    IGP01E1000_PHY_PAGE_SELECT,
++						    (u16)offset);
++		if (ret_val)
++			goto release;
++	}
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					    data);
++
++release:
++	if (!locked)
++		hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_write_phy_reg_igp - Write igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000e_write_phy_reg_igp(hw, offset, data, false);
++}
++
++/**
++ *  e1000e_write_phy_reg_igp_locked - Write igp PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes the data to PHY register at the offset.
++ *  Assumes semaphore already acquired.
++ **/
++s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000e_write_phy_reg_igp(hw, offset, data, true);
++}
++
++/**
++ *  __e1000_read_kmrn_reg - Read kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary.  Then reads the PHY register at offset
++ *  using the kumeran interface.  The information retrieved is stored in data.
++ *  Release any acquired semaphores before exiting.
++ **/
++static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
++                                 bool locked)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val = 0;
++
++	if (!locked) {
++		if (!(hw->phy.ops.acquire))
++			goto out;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++		       E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
++	ew32(KMRNCTRLSTA, kmrnctrlsta);
++	e1e_flush();
++
++	udelay(2);
++
++	kmrnctrlsta = er32(KMRNCTRLSTA);
++	*data = (u16)kmrnctrlsta;
++
++	if (!locked)
++		hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_read_kmrn_reg -  Read kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore then reads the PHY register at offset using the
++ *  kumeran interface.  The information retrieved is stored in data.
++ *  Release the acquired semaphore before exiting.
++ **/
++s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000_read_kmrn_reg(hw, offset, data, false);
++}
++
++/**
++ *  e1000e_read_kmrn_reg_locked -  Read kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY register at offset using the kumeran interface.  The
++ *  information retrieved is stored in data.
++ *  Assumes semaphore already acquired.
++ **/
++s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000_read_kmrn_reg(hw, offset, data, true);
++}
++
++/**
++ *  __e1000_write_kmrn_reg - Write kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary.  Then write the data to PHY register
++ *  at the offset using the kumeran interface.  Release any acquired semaphores
++ *  before exiting.
++ **/
++static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
++                                  bool locked)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val = 0;
++
++	if (!locked) {
++		if (!(hw->phy.ops.acquire))
++			goto out;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++		       E1000_KMRNCTRLSTA_OFFSET) | data;
++	ew32(KMRNCTRLSTA, kmrnctrlsta);
++	e1e_flush();
++
++	udelay(2);
++
++	if (!locked)
++		hw->phy.ops.release(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_write_kmrn_reg -  Write kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore then writes the data to the PHY register at the offset
++ *  using the kumeran interface.  Release the acquired semaphore before exiting.
++ **/
++s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000_write_kmrn_reg(hw, offset, data, false);
++}
++
++/**
++ *  e1000e_write_kmrn_reg_locked -  Write kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Write the data to PHY register at the offset using the kumeran interface.
++ *  Assumes semaphore already acquired.
++ **/
++s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000_write_kmrn_reg(hw, offset, data, true);
++}
++
++/**
++ *  e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up Carrier-sense on Transmit and downshift values.
++ **/
++s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_data;
++
++	/* Enable CRS on Tx. This must be set for half-duplex operation. */
++	ret_val = e1e_rphy(hw, I82577_CFG_REG, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data |= I82577_CFG_ASSERT_CRS_ON_TX;
++
++	/* Enable downshift */
++	phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
++
++	ret_val = e1e_wphy(hw, I82577_CFG_REG, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_copper_link_setup_m88 - Setup m88 PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
++ *  and downshift values are set also.
++ **/
++s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++
++	/* Enable CRS on Tx. This must be set for half-duplex operation. */
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/* For BM PHY this bit is downshift enable */
++	if (phy->type != e1000_phy_bm)
++		phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++
++	/*
++	 * Options:
++	 *   MDI/MDI-X = 0 (default)
++	 *   0 - Auto for all speeds
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++	 */
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++
++	switch (phy->mdix) {
++	case 1:
++		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
++		break;
++	case 2:
++		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
++		break;
++	case 3:
++		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
++		break;
++	case 0:
++	default:
++		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
++		break;
++	}
++
++	/*
++	 * Options:
++	 *   disable_polarity_correction = 0 (default)
++	 *       Automatic Correction for Reversed Cable Polarity
++	 *   0 - Disabled
++	 *   1 - Enabled
++	 */
++	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
++	if (phy->disable_polarity_correction == 1)
++		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
++
++	/* Enable downshift on BM (disabled by default) */
++	if (phy->type == e1000_phy_bm)
++		phy_data |= BME1000_PSCR_ENABLE_DOWNSHIFT;
++
++	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	if ((phy->type == e1000_phy_m88) &&
++	    (phy->revision < E1000_REVISION_4) &&
++	    (phy->id != BME1000_E_PHY_ID_R2)) {
++		/*
++		 * Force TX_CLK in the Extended PHY Specific Control Register
++		 * to 25MHz clock.
++		 */
++		ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++		if (ret_val)
++			return ret_val;
++
++		phy_data |= M88E1000_EPSCR_TX_CLK_25;
++
++		if ((phy->revision == 2) &&
++		    (phy->id == M88E1111_I_PHY_ID)) {
++			/* 82573L PHY - set the downshift counter to 5x. */
++			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
++			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
++		} else {
++			/* Configure Master and Slave downshift values */
++			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
++				      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
++			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
++				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
++		}
++		ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++		if (ret_val)
++			return ret_val;
++	}
++
++	if ((phy->type == e1000_phy_bm) && (phy->id == BME1000_E_PHY_ID_R2)) {
++		/* Set PHY page 0, register 29 to 0x0003 */
++		ret_val = e1e_wphy(hw, 29, 0x0003);
++		if (ret_val)
++			return ret_val;
++
++		/* Set PHY page 0, register 30 to 0x0000 */
++		ret_val = e1e_wphy(hw, 30, 0x0000);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Commit the changes. */
++	ret_val = e1000e_commit_phy(hw);
++	if (ret_val) {
++		e_dbg("Error committing the PHY changes\n");
++		return ret_val;
++	}
++
++	if (phy->type == e1000_phy_82578) {
++		ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++		if (ret_val)
++			return ret_val;
++
++		/* 82578 PHY - set the downshift count to 1x. */
++		phy_data |= I82578_EPSCR_DOWNSHIFT_ENABLE;
++		phy_data &= ~I82578_EPSCR_DOWNSHIFT_COUNTER_MASK;
++		ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++		if (ret_val)
++			return ret_val;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_copper_link_setup_igp - Setup igp PHY's for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
++ *  igp PHY's.
++ **/
++s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1000_phy_hw_reset(hw);
++	if (ret_val) {
++		e_dbg("Error resetting the PHY.\n");
++		return ret_val;
++	}
++
++	/*
++	 * Wait 100ms for MAC to configure PHY from NVM settings, to avoid
++	 * timeout issues when LFS is enabled.
++	 */
++	msleep(100);
++
++	/* disable lplu d0 during driver init */
++	ret_val = e1000_set_d0_lplu_state(hw, false);
++	if (ret_val) {
++		e_dbg("Error Disabling LPLU D0\n");
++		return ret_val;
++	}
++	/* Configure mdi-mdix settings */
++	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++
++	switch (phy->mdix) {
++	case 1:
++		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 2:
++		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
++		break;
++	case 0:
++	default:
++		data |= IGP01E1000_PSCR_AUTO_MDIX;
++		break;
++	}
++	ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, data);
++	if (ret_val)
++		return ret_val;
++
++	/* set auto-master slave resolution settings */
++	if (hw->mac.autoneg) {
++		/*
++		 * when autonegotiation advertisement is only 1000Mbps then we
++		 * should disable SmartSpeed and enable Auto MasterSlave
++		 * resolution as hardware default.
++		 */
++		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
++			/* Disable SmartSpeed */
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++
++			/* Set auto Master/Slave resolution process */
++			ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~CR_1000T_MS_ENABLE;
++			ret_val = e1e_wphy(hw, PHY_1000T_CTRL, data);
++			if (ret_val)
++				return ret_val;
++		}
++
++		ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &data);
++		if (ret_val)
++			return ret_val;
++
++		/* load defaults for future use */
++		phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ?
++			((data & CR_1000T_MS_VALUE) ?
++			e1000_ms_force_master :
++			e1000_ms_force_slave) :
++			e1000_ms_auto;
++
++		switch (phy->ms_type) {
++		case e1000_ms_force_master:
++			data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_force_slave:
++			data |= CR_1000T_MS_ENABLE;
++			data &= ~(CR_1000T_MS_VALUE);
++			break;
++		case e1000_ms_auto:
++			data &= ~CR_1000T_MS_ENABLE;
++		default:
++			break;
++		}
++		ret_val = e1e_wphy(hw, PHY_1000T_CTRL, data);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the MII auto-neg advertisement register and/or the 1000T control
++ *  register and if the PHY is already setup for auto-negotiation, then
++ *  return successful.  Otherwise, setup advertisement and flow control to
++ *  the appropriate values for the wanted auto-negotiation.
++ **/
++static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 mii_autoneg_adv_reg;
++	u16 mii_1000t_ctrl_reg = 0;
++
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
++	ret_val = e1e_rphy(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
++	if (ret_val)
++		return ret_val;
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
++		/* Read the MII 1000Base-T Control Register (Address 9). */
++		ret_val = e1e_rphy(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/*
++	 * Need to parse both autoneg_advertised and fc and set up
++	 * the appropriate PHY registers.  First we will parse for
++	 * autoneg_advertised software override.  Since we can advertise
++	 * a plethora of combinations, we need to check each bit
++	 * individually.
++	 */
++
++	/*
++	 * First we clear all the 10/100 mb speed bits in the Auto-Neg
++	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
++	 * the  1000Base-T Control Register (Address 9).
++	 */
++	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
++				 NWAY_AR_100TX_HD_CAPS |
++				 NWAY_AR_10T_FD_CAPS   |
++				 NWAY_AR_10T_HD_CAPS);
++	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
++
++	e_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
++
++	/* Do we want to advertise 10 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
++		e_dbg("Advertise 10mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
++	}
++
++	/* Do we want to advertise 10 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
++		e_dbg("Advertise 10mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Half Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
++		e_dbg("Advertise 100mb Half duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
++	}
++
++	/* Do we want to advertise 100 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
++		e_dbg("Advertise 100mb Full duplex\n");
++		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
++	}
++
++	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
++	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
++		e_dbg("Advertise 1000mb Half duplex request denied!\n");
++
++	/* Do we want to advertise 1000 Mb Full Duplex? */
++	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
++		e_dbg("Advertise 1000mb Full duplex\n");
++		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
++	}
++
++	/*
++	 * Check for a software override of the flow control settings, and
++	 * setup the PHY advertisement registers accordingly.  If
++	 * auto-negotiation is enabled, then software will have to set the
++	 * "PAUSE" bits to the correct value in the Auto-Negotiation
++	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
++	 * negotiation.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause frames
++	 *	  but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *	  but we do not support receiving pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
++	 *  other:  No software override.  The flow control configuration
++	 *	  in the EEPROM is used.
++	 */
++	switch (hw->fc.current_mode) {
++	case e1000_fc_none:
++		/*
++		 * Flow control (Rx & Tx) is completely disabled by a
++		 * software over-ride.
++		 */
++		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_rx_pause:
++		/*
++		 * Rx Flow control is enabled, and Tx Flow control is
++		 * disabled, by a software over-ride.
++		 *
++		 * Since there really isn't a way to advertise that we are
++		 * capable of Rx Pause ONLY, we will advertise that we
++		 * support both symmetric and asymmetric Rx PAUSE.  Later
++		 * (in e1000e_config_fc_after_link_up) we will disable the
++		 * hw's ability to send PAUSE frames.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	case e1000_fc_tx_pause:
++		/*
++		 * Tx Flow control is enabled, and Rx Flow control is
++		 * disabled, by a software over-ride.
++		 */
++		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
++		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
++		break;
++	case e1000_fc_full:
++		/*
++		 * Flow control (both Rx and Tx) is enabled by a software
++		 * over-ride.
++		 */
++		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
++		break;
++	default:
++		e_dbg("Flow control param set incorrectly\n");
++		ret_val = -E1000_ERR_CONFIG;
++		return ret_val;
++	}
++
++	ret_val = e1e_wphy(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
++	if (ret_val)
++		return ret_val;
++
++	e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
++
++	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
++		ret_val = e1e_wphy(hw, PHY_1000T_CTRL, mii_1000t_ctrl_reg);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs initial bounds checking on autoneg advertisement parameter, then
++ *  configure to advertise the full capability.  Setup the PHY to autoneg
++ *  and restart the negotiation process between the link partner.  If
++ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
++ **/
++static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_ctrl;
++
++	/*
++	 * Perform some bounds checking on the autoneg advertisement
++	 * parameter.
++	 */
++	phy->autoneg_advertised &= phy->autoneg_mask;
++
++	/*
++	 * If autoneg_advertised is zero, we assume it was not defaulted
++	 * by the calling code so we set to advertise full capability.
++	 */
++	if (phy->autoneg_advertised == 0)
++		phy->autoneg_advertised = phy->autoneg_mask;
++
++	e_dbg("Reconfiguring auto-neg advertisement params\n");
++	ret_val = e1000_phy_setup_autoneg(hw);
++	if (ret_val) {
++		e_dbg("Error Setting up Auto-Negotiation\n");
++		return ret_val;
++	}
++	e_dbg("Restarting Auto-Neg\n");
++
++	/*
++	 * Restart auto-negotiation by setting the Auto Neg Enable bit and
++	 * the Auto Neg Restart bit in the PHY control register.
++	 */
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		return ret_val;
++
++	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Does the user want to wait for Auto-Neg to complete here, or
++	 * check at a later time (for example, callback routine).
++	 */
++	if (phy->autoneg_wait_to_complete) {
++		ret_val = e1000_wait_autoneg(hw);
++		if (ret_val) {
++			e_dbg("Error while waiting for "
++				 "autoneg to complete\n");
++			return ret_val;
++		}
++	}
++
++	hw->mac.get_link_status = 1;
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_setup_copper_link - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the appropriate function to configure the link for auto-neg or forced
++ *  speed and duplex.  Then we check for link, once link is established calls
++ *  to configure collision distance and flow control are called.  If link is
++ *  not established, we return -E1000_ERR_PHY (-2).
++ **/
++s32 e1000e_setup_copper_link(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	bool link;
++
++	if (hw->mac.autoneg) {
++		/*
++		 * Setup autoneg and flow control advertisement and perform
++		 * autonegotiation.
++		 */
++		ret_val = e1000_copper_link_autoneg(hw);
++		if (ret_val)
++			return ret_val;
++	} else {
++		/*
++		 * PHY will be set to 10H, 10F, 100H or 100F
++		 * depending on user settings.
++		 */
++		e_dbg("Forcing Speed and Duplex\n");
++		ret_val = e1000_phy_force_speed_duplex(hw);
++		if (ret_val) {
++			e_dbg("Error Forcing Speed and Duplex\n");
++			return ret_val;
++		}
++	}
++
++	/*
++	 * Check link status. Wait up to 100 microseconds for link to become
++	 * valid.
++	 */
++	ret_val = e1000e_phy_has_link_generic(hw,
++					     COPPER_LINK_UP_LIMIT,
++					     10,
++					     &link);
++	if (ret_val)
++		return ret_val;
++
++	if (link) {
++		e_dbg("Valid link established!!!\n");
++		e1000e_config_collision_dist(hw);
++		ret_val = e1000e_config_fc_after_link_up(hw);
++	} else {
++		e_dbg("Unable to establish link!!!\n");
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Waits for link and returns
++ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
++ **/
++s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  IGP requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
++	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
++
++	ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e_dbg("IGP PSCR: %X\n", phy_data);
++
++	udelay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		e_dbg("Waiting for forced speed/duplex link on IGP phy.\n");
++
++		ret_val = e1000e_phy_has_link_generic(hw,
++						     PHY_FORCE_LIMIT,
++						     100000,
++						     &link);
++		if (ret_val)
++			return ret_val;
++
++		if (!link)
++			e_dbg("Link taking longer than expected.\n");
++
++		/* Try once more */
++		ret_val = e1000e_phy_has_link_generic(hw,
++						     PHY_FORCE_LIMIT,
++						     100000,
++						     &link);
++		if (ret_val)
++			return ret_val;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.  Clears the
++ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
++ *  changes.  If time expires while waiting for link up, we reset the DSP.
++ *  After reset, TX_CLK and CRS on Tx must be set.  Return successful upon
++ *  successful completion, else return corresponding error code.
++ **/
++s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
++	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e_dbg("M88E1000 PSCR: %X\n", phy_data);
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/* Reset the phy to commit changes. */
++	ret_val = e1000e_commit_phy(hw);
++	if (ret_val)
++		return ret_val;
++
++	if (phy->autoneg_wait_to_complete) {
++		e_dbg("Waiting for forced speed/duplex link on M88 phy.\n");
++
++		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++						     100000, &link);
++		if (ret_val)
++			return ret_val;
++
++		if (!link) {
++			if (hw->phy.type != e1000_phy_m88) {
++				e_dbg("Link taking longer than expected.\n");
++			} else {
++				/*
++				 * We didn't get link.
++				 * Reset the DSP and cross our fingers.
++				 */
++				ret_val = e1e_wphy(hw, M88E1000_PHY_PAGE_SELECT,
++						   0x001d);
++				if (ret_val)
++					return ret_val;
++				ret_val = e1000e_phy_reset_dsp(hw);
++				if (ret_val)
++					return ret_val;
++			}
++		}
++
++		/* Try once more */
++		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++						     100000, &link);
++		if (ret_val)
++			return ret_val;
++	}
++
++	if (hw->phy.type != e1000_phy_m88)
++		return 0;
++
++	ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Resetting the phy means we need to re-force TX_CLK in the
++	 * Extended PHY Specific Control Register to 25MHz clock from
++	 * the reset value of 2.5MHz.
++	 */
++	phy_data |= M88E1000_EPSCR_TX_CLK_25;
++	ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * In addition, we must re-enable CRS on Tx for both half and full
++	 * duplex.
++	 */
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
++	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  Forces the speed and duplex settings of the PHY.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	e1000e_phy_force_speed_duplex_setup(hw, &data);
++
++	ret_val = e1e_wphy(hw, PHY_CONTROL, data);
++	if (ret_val)
++		goto out;
++
++	/* Disable MDI-X support for 10/100 */
++	ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	data &= ~IFE_PMC_AUTO_MDIX;
++	data &= ~IFE_PMC_FORCE_MDIX;
++
++	ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, data);
++	if (ret_val)
++		goto out;
++
++	e_dbg("IFE PMC: %X\n", data);
++
++	udelay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		e_dbg("Waiting for forced speed/duplex link on IFE phy.\n");
++
++		ret_val = e1000e_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++
++		if (!link)
++			e_dbg("Link taking longer than expected.\n");
++
++		/* Try once more */
++		ret_val = e1000e_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @phy_ctrl: pointer to current value of PHY_CONTROL
++ *
++ *  Forces speed and duplex on the PHY by doing the following: disable flow
++ *  control, force speed/duplex on the MAC, disable auto speed detection,
++ *  disable auto-negotiation, configure duplex, configure speed, configure
++ *  the collision distance, write configuration to CTRL register.  The
++ *  caller must write to the PHY_CONTROL register for these settings to
++ *  take affect.
++ **/
++void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 ctrl;
++
++	/* Turn off flow control when forcing speed/duplex */
++	hw->fc.current_mode = e1000_fc_none;
++
++	/* Force speed/duplex on the mac */
++	ctrl = er32(CTRL);
++	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ctrl &= ~E1000_CTRL_SPD_SEL;
++
++	/* Disable Auto Speed Detection */
++	ctrl &= ~E1000_CTRL_ASDE;
++
++	/* Disable autoneg on the phy */
++	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
++
++	/* Forcing Full or Half Duplex? */
++	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
++		ctrl &= ~E1000_CTRL_FD;
++		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
++		e_dbg("Half Duplex\n");
++	} else {
++		ctrl |= E1000_CTRL_FD;
++		*phy_ctrl |= MII_CR_FULL_DUPLEX;
++		e_dbg("Full Duplex\n");
++	}
++
++	/* Forcing 10mb or 100mb? */
++	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
++		ctrl |= E1000_CTRL_SPD_100;
++		*phy_ctrl |= MII_CR_SPEED_100;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
++		e_dbg("Forcing 100mb\n");
++	} else {
++		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++		*phy_ctrl |= MII_CR_SPEED_10;
++		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
++		e_dbg("Forcing 10mb\n");
++	}
++
++	e1000e_config_collision_dist(hw);
++
++	ew32(CTRL, ctrl);
++}
++
++/**
++ *  e1000e_set_d3_lplu_state - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.
++ **/
++s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		return ret_val;
++
++	if (!active) {
++		data &= ~IGP02E1000_PM_D3_LPLU;
++		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
++		if (ret_val)
++			return ret_val;
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= IGP02E1000_PM_D3_LPLU;
++		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
++		if (ret_val)
++			return ret_val;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
++		if (ret_val)
++			return ret_val;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_check_downshift - Checks whether a downshift in speed occurred
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  A downshift is detected by querying the PHY link health.
++ **/
++s32 e1000e_check_downshift(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, offset, mask;
++
++	switch (phy->type) {
++	case e1000_phy_m88:
++	case e1000_phy_gg82563:
++	case e1000_phy_bm:
++	case e1000_phy_82578:
++		offset	= M88E1000_PHY_SPEC_STATUS;
++		mask	= M88E1000_PSSR_DOWNSHIFT;
++		break;
++	case e1000_phy_igp_2:
++	case e1000_phy_igp_3:
++		offset	= IGP01E1000_PHY_LINK_HEALTH;
++		mask	= IGP01E1000_PLHR_SS_DOWNGRADE;
++		break;
++	default:
++		/* speed downshift not supported */
++		phy->speed_downgraded = false;
++		return 0;
++	}
++
++	ret_val = e1e_rphy(hw, offset, &phy_data);
++
++	if (!ret_val)
++		phy->speed_downgraded = (phy_data & mask);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_m88 - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY specific status register.
++ **/
++s32 e1000_check_polarity_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY)
++				      ? e1000_rev_polarity_reversed
++				      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_igp - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY port status register, and the
++ *  current speed (since there is no polarity at 100Mbps).
++ **/
++s32 e1000_check_polarity_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data, offset, mask;
++
++	/*
++	 * Polarity is determined based on the speed of
++	 * our connection.
++	 */
++	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		return ret_val;
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		offset	= IGP01E1000_PHY_PCS_INIT_REG;
++		mask	= IGP01E1000_PHY_POLARITY_MASK;
++	} else {
++		/*
++		 * This really only applies to 10Mbps since
++		 * there is no polarity for 100Mbps (always 0).
++		 */
++		offset	= IGP01E1000_PHY_PORT_STATUS;
++		mask	= IGP01E1000_PSSR_POLARITY_REVERSED;
++	}
++
++	ret_val = e1e_rphy(hw, offset, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & mask)
++				      ? e1000_rev_polarity_reversed
++				      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_ife - Check cable polarity for IFE PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Polarity is determined on the polarity reversal feature being enabled.
++ **/
++s32 e1000_check_polarity_ife(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, offset, mask;
++
++	/*
++	 * Polarity is determined based on the reversal feature being enabled.
++	 */
++	if (phy->polarity_correction) {
++		offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
++		mask = IFE_PESC_POLARITY_REVERSED;
++	} else {
++		offset = IFE_PHY_SPECIAL_CONTROL;
++		mask = IFE_PSC_FORCE_POLARITY;
++	}
++
++	ret_val = e1e_rphy(hw, offset, &phy_data);
++
++	if (!ret_val)
++		phy->cable_polarity = (phy_data & mask)
++		                       ? e1000_rev_polarity_reversed
++		                       : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_wait_autoneg - Wait for auto-neg completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Waits for auto-negotiation to complete or for the auto-negotiation time
++ *  limit to expire, which ever happens first.
++ **/
++static s32 e1000_wait_autoneg(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 i, phy_status;
++
++	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
++	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
++		ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_AUTONEG_COMPLETE)
++			break;
++		msleep(100);
++	}
++
++	/*
++	 * PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
++	 * has completed.
++	 */
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_has_link_generic - Polls PHY for link
++ *  @hw: pointer to the HW structure
++ *  @iterations: number of times to poll for link
++ *  @usec_interval: delay between polling attempts
++ *  @success: pointer to whether polling was successful or not
++ *
++ *  Polls the PHY status register for link, 'iterations' number of times.
++ **/
++s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
++			       u32 usec_interval, bool *success)
++{
++	s32 ret_val = 0;
++	u16 i, phy_status;
++
++	for (i = 0; i < iterations; i++) {
++		/*
++		 * Some PHYs require the PHY_STATUS register to be read
++		 * twice due to the link bit being sticky.  No harm doing
++		 * it across the board.
++		 */
++		ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			/*
++			 * If the first read fails, another entity may have
++			 * ownership of the resources, wait and try again to
++			 * see if they have relinquished the resources yet.
++			 */
++			udelay(usec_interval);
++		ret_val = e1e_rphy(hw, PHY_STATUS, &phy_status);
++		if (ret_val)
++			break;
++		if (phy_status & MII_SR_LINK_STATUS)
++			break;
++		if (usec_interval >= 1000)
++			mdelay(usec_interval/1000);
++		else
++			udelay(usec_interval);
++	}
++
++	*success = (i < iterations);
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_get_cable_length_m88 - Determine cable length for m88 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY specific status register to retrieve the cable length
++ *  information.  The cable length is determined by averaging the minimum and
++ *  maximum values to get the "average" cable length.  The m88 PHY has four
++ *  possible cable length values, which are:
++ *	Register Value		Cable Length
++ *	0			< 50 meters
++ *	1			50 - 80 meters
++ *	2			80 - 110 meters
++ *	3			110 - 140 meters
++ *	4			> 140 meters
++ **/
++s32 e1000e_get_cable_length_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, index;
++
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
++	        M88E1000_PSSR_CABLE_LENGTH_SHIFT;
++	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++	phy->min_cable_length = e1000_m88_cable_length_table[index];
++	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_get_cable_length_igp_2 - Determine cable length for igp2 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  The automatic gain control (agc) normalizes the amplitude of the
++ *  received signal, adjusting for the attenuation produced by the
++ *  cable.  By reading the AGC registers, which represent the
++ *  combination of coarse and fine gain value, the value can be put
++ *  into a lookup table to obtain the approximate cable length
++ *  for each channel.
++ **/
++s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, i, agc_value = 0;
++	u16 cur_agc_index, max_agc_index = 0;
++	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
++	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
++	       IGP02E1000_PHY_AGC_A,
++	       IGP02E1000_PHY_AGC_B,
++	       IGP02E1000_PHY_AGC_C,
++	       IGP02E1000_PHY_AGC_D
++	};
++
++	/* Read the AGC registers for all channels */
++	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
++		ret_val = e1e_rphy(hw, agc_reg_array[i], &phy_data);
++		if (ret_val)
++			return ret_val;
++
++		/*
++		 * Getting bits 15:9, which represent the combination of
++		 * coarse and fine gain values.  The result is a number
++		 * that can be put into the lookup table to obtain the
++		 * approximate cable length.
++		 */
++		cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
++				IGP02E1000_AGC_LENGTH_MASK;
++
++		/* Array index bound check. */
++		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
++		    (cur_agc_index == 0))
++			return -E1000_ERR_PHY;
++
++		/* Remove min & max AGC values from calculation. */
++		if (e1000_igp_2_cable_length_table[min_agc_index] >
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			min_agc_index = cur_agc_index;
++		if (e1000_igp_2_cable_length_table[max_agc_index] <
++		    e1000_igp_2_cable_length_table[cur_agc_index])
++			max_agc_index = cur_agc_index;
++
++		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
++	}
++
++	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
++		      e1000_igp_2_cable_length_table[max_agc_index]);
++	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
++
++	/* Calculate cable length with the error range of +/- 10 meters. */
++	phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
++				 (agc_value - IGP02E1000_AGC_RANGE) : 0;
++	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_get_phy_info_m88 - Retrieve PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Valid for only copper links.  Read the PHY status register (sticky read)
++ *  to verify that link is up.  Read the PHY special control register to
++ *  determine the polarity and 10base-T extended distance.  Read the PHY
++ *  special status register to determine MDI/MDIx and current speed.  If
++ *  speed is 1000, then determine cable length, local and remote receiver.
++ **/
++s32 e1000e_get_phy_info_m88(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32  ret_val;
++	u16 phy_data;
++	bool link;
++
++	if (phy->media_type != e1000_media_type_copper) {
++		e_dbg("Phy info is only valid for copper media\n");
++		return -E1000_ERR_CONFIG;
++	}
++
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		return ret_val;
++
++	if (!link) {
++		e_dbg("Phy info is only valid if link is up\n");
++		return -E1000_ERR_CONFIG;
++	}
++
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy->polarity_correction = (phy_data &
++				    M88E1000_PSCR_POLARITY_REVERSAL);
++
++	ret_val = e1000_check_polarity_m88(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX);
++
++	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
++		ret_val = e1000_get_cable_length(hw);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &phy_data);
++		if (ret_val)
++			return ret_val;
++
++		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
++				? e1000_1000t_rx_status_ok
++				: e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
++				 ? e1000_1000t_rx_status_ok
++				 : e1000_1000t_rx_status_not_ok;
++	} else {
++		/* Set values to "undefined" */
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_get_phy_info_igp - Retrieve igp PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Read PHY status to determine if link is up.  If link is up, then
++ *  set/determine 10base-T extended distance and polarity correction.  Read
++ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
++ *  determine on the cable length, local and remote receiver.
++ **/
++s32 e1000e_get_phy_info_igp(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		return ret_val;
++
++	if (!link) {
++		e_dbg("Phy info is only valid if link is up\n");
++		return -E1000_ERR_CONFIG;
++	}
++
++	phy->polarity_correction = true;
++
++	ret_val = e1000_check_polarity_igp(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data);
++	if (ret_val)
++		return ret_val;
++
++	phy->is_mdix = (data & IGP01E1000_PSSR_MDIX);
++
++	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
++	    IGP01E1000_PSSR_SPEED_1000MBPS) {
++		ret_val = e1000_get_cable_length(hw);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &data);
++		if (ret_val)
++			return ret_val;
++
++		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
++				? e1000_1000t_rx_status_ok
++				: e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
++				 ? e1000_1000t_rx_status_ok
++				 : e1000_1000t_rx_status_not_ok;
++	} else {
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_ife - Retrieves various IFE PHY states
++ *  @hw: pointer to the HW structure
++ *
++ *  Populates "phy" structure with various feature states.
++ **/
++s32 e1000_get_phy_info_ife(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		e_dbg("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	ret_val = e1e_rphy(hw, IFE_PHY_SPECIAL_CONTROL, &data);
++	if (ret_val)
++		goto out;
++	phy->polarity_correction = (data & IFE_PSC_AUTO_POLARITY_DISABLE)
++	                           ? false : true;
++
++	if (phy->polarity_correction) {
++		ret_val = e1000_check_polarity_ife(hw);
++		if (ret_val)
++			goto out;
++	} else {
++		/* Polarity is forced */
++		phy->cable_polarity = (data & IFE_PSC_FORCE_POLARITY)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++	}
++
++	ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & IFE_PMC_MDIX_STATUS) ? true : false;
++
++	/* The following parameters are undefined for 10/100 operation. */
++	phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++	phy->local_rx = e1000_1000t_rx_status_undefined;
++	phy->remote_rx = e1000_1000t_rx_status_undefined;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_sw_reset - PHY software reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Does a software reset of the PHY by reading the PHY control register and
++ *  setting/write the control register reset bit to the PHY.
++ **/
++s32 e1000e_phy_sw_reset(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_ctrl;
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_ctrl);
++	if (ret_val)
++		return ret_val;
++
++	phy_ctrl |= MII_CR_RESET;
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_ctrl);
++	if (ret_val)
++		return ret_val;
++
++	udelay(1);
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_phy_hw_reset_generic - PHY hardware reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Verify the reset block is not blocking us from resetting.  Acquire
++ *  semaphore (if necessary) and read/set/write the device control reset
++ *  bit in the PHY.  Wait the appropriate delay time for the device to
++ *  reset and release the semaphore (if necessary).
++ **/
++s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u32 ctrl;
++
++	ret_val = e1000_check_reset_block(hw);
++	if (ret_val)
++		return 0;
++
++	ret_val = phy->ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	ctrl = er32(CTRL);
++	ew32(CTRL, ctrl | E1000_CTRL_PHY_RST);
++	e1e_flush();
++
++	udelay(phy->reset_delay_us);
++
++	ew32(CTRL, ctrl);
++	e1e_flush();
++
++	udelay(150);
++
++	phy->ops.release(hw);
++
++	return e1000_get_phy_cfg_done(hw);
++}
++
++/**
++ *  e1000e_get_cfg_done - Generic configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Generic function to wait 10 milli-seconds for configuration to complete
++ *  and return success.
++ **/
++s32 e1000e_get_cfg_done(struct e1000_hw *hw)
++{
++	mdelay(10);
++	return 0;
++}
++
++/**
++ *  e1000e_phy_init_script_igp3 - Inits the IGP3 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
++ **/
++s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw)
++{
++	e_dbg("Running IGP 3 PHY init script\n");
++
++	/* PHY init IGP 3 */
++	/* Enable rise/fall, 10-mode work in class-A */
++	e1e_wphy(hw, 0x2F5B, 0x9018);
++	/* Remove all caps from Replica path filter */
++	e1e_wphy(hw, 0x2F52, 0x0000);
++	/* Bias trimming for ADC, AFE and Driver (Default) */
++	e1e_wphy(hw, 0x2FB1, 0x8B24);
++	/* Increase Hybrid poly bias */
++	e1e_wphy(hw, 0x2FB2, 0xF8F0);
++	/* Add 4% to Tx amplitude in Gig mode */
++	e1e_wphy(hw, 0x2010, 0x10B0);
++	/* Disable trimming (TTT) */
++	e1e_wphy(hw, 0x2011, 0x0000);
++	/* Poly DC correction to 94.6% + 2% for all channels */
++	e1e_wphy(hw, 0x20DD, 0x249A);
++	/* ABS DC correction to 95.9% */
++	e1e_wphy(hw, 0x20DE, 0x00D3);
++	/* BG temp curve trim */
++	e1e_wphy(hw, 0x28B4, 0x04CE);
++	/* Increasing ADC OPAMP stage 1 currents to max */
++	e1e_wphy(hw, 0x2F70, 0x29E4);
++	/* Force 1000 ( required for enabling PHY regs configuration) */
++	e1e_wphy(hw, 0x0000, 0x0140);
++	/* Set upd_freq to 6 */
++	e1e_wphy(hw, 0x1F30, 0x1606);
++	/* Disable NPDFE */
++	e1e_wphy(hw, 0x1F31, 0xB814);
++	/* Disable adaptive fixed FFE (Default) */
++	e1e_wphy(hw, 0x1F35, 0x002A);
++	/* Enable FFE hysteresis */
++	e1e_wphy(hw, 0x1F3E, 0x0067);
++	/* Fixed FFE for short cable lengths */
++	e1e_wphy(hw, 0x1F54, 0x0065);
++	/* Fixed FFE for medium cable lengths */
++	e1e_wphy(hw, 0x1F55, 0x002A);
++	/* Fixed FFE for long cable lengths */
++	e1e_wphy(hw, 0x1F56, 0x002A);
++	/* Enable Adaptive Clip Threshold */
++	e1e_wphy(hw, 0x1F72, 0x3FB0);
++	/* AHT reset limit to 1 */
++	e1e_wphy(hw, 0x1F76, 0xC0FF);
++	/* Set AHT master delay to 127 msec */
++	e1e_wphy(hw, 0x1F77, 0x1DEC);
++	/* Set scan bits for AHT */
++	e1e_wphy(hw, 0x1F78, 0xF9EF);
++	/* Set AHT Preset bits */
++	e1e_wphy(hw, 0x1F79, 0x0210);
++	/* Change integ_factor of channel A to 3 */
++	e1e_wphy(hw, 0x1895, 0x0003);
++	/* Change prop_factor of channels BCD to 8 */
++	e1e_wphy(hw, 0x1796, 0x0008);
++	/* Change cg_icount + enable integbp for channels BCD */
++	e1e_wphy(hw, 0x1798, 0xD008);
++	/*
++	 * Change cg_icount + enable integbp + change prop_factor_master
++	 * to 8 for channel A
++	 */
++	e1e_wphy(hw, 0x1898, 0xD918);
++	/* Disable AHT in Slave mode on channel A */
++	e1e_wphy(hw, 0x187A, 0x0800);
++	/*
++	 * Enable LPLU and disable AN to 1000 in non-D0a states,
++	 * Enable SPD+B2B
++	 */
++	e1e_wphy(hw, 0x0019, 0x008D);
++	/* Enable restart AN on an1000_dis change */
++	e1e_wphy(hw, 0x001B, 0x2080);
++	/* Enable wh_fifo read clock in 10/100 modes */
++	e1e_wphy(hw, 0x0014, 0x0045);
++	/* Restart AN, Speed selection is 1000 */
++	e1e_wphy(hw, 0x0000, 0x1340);
++
++	return 0;
++}
++
++/* Internal function pointers */
++
++/**
++ *  e1000_get_phy_cfg_done - Generic PHY configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Return success if silicon family did not implement a family specific
++ *  get_cfg_done function.
++ **/
++static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw)
++{
++	if (hw->phy.ops.get_cfg_done)
++		return hw->phy.ops.get_cfg_done(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex - Generic force PHY speed/duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  When the silicon family has not implemented a forced speed/duplex
++ *  function for the PHY, simply return 0.
++ **/
++static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw)
++{
++	if (hw->phy.ops.force_speed_duplex)
++		return hw->phy.ops.force_speed_duplex(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000e_get_phy_type_from_id - Get PHY type from id
++ *  @phy_id: phy_id read from the phy
++ *
++ *  Returns the phy type from the id.
++ **/
++enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id)
++{
++	enum e1000_phy_type phy_type = e1000_phy_unknown;
++
++	switch (phy_id) {
++	case M88E1000_I_PHY_ID:
++	case M88E1000_E_PHY_ID:
++	case M88E1111_I_PHY_ID:
++	case M88E1011_I_PHY_ID:
++		phy_type = e1000_phy_m88;
++		break;
++	case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */
++		phy_type = e1000_phy_igp_2;
++		break;
++	case GG82563_E_PHY_ID:
++		phy_type = e1000_phy_gg82563;
++		break;
++	case IGP03E1000_E_PHY_ID:
++		phy_type = e1000_phy_igp_3;
++		break;
++	case IFE_E_PHY_ID:
++	case IFE_PLUS_E_PHY_ID:
++	case IFE_C_E_PHY_ID:
++		phy_type = e1000_phy_ife;
++		break;
++	case BME1000_E_PHY_ID:
++	case BME1000_E_PHY_ID_R2:
++		phy_type = e1000_phy_bm;
++		break;
++	case I82578_E_PHY_ID:
++		phy_type = e1000_phy_82578;
++		break;
++	case I82577_E_PHY_ID:
++		phy_type = e1000_phy_82577;
++		break;
++	case I82579_E_PHY_ID:
++		phy_type = e1000_phy_82579;
++		break;
++	case I217_E_PHY_ID:
++		phy_type = e1000_phy_i217;
++		break;
++	default:
++		phy_type = e1000_phy_unknown;
++		break;
++	}
++	return phy_type;
++}
++
++/**
++ *  e1000e_determine_phy_address - Determines PHY address.
++ *  @hw: pointer to the HW structure
++ *
++ *  This uses a trial and error method to loop through possible PHY
++ *  addresses. It tests each by reading the PHY ID registers and
++ *  checking for a match.
++ **/
++s32 e1000e_determine_phy_address(struct e1000_hw *hw)
++{
++	s32 ret_val = -E1000_ERR_PHY_TYPE;
++	u32 phy_addr = 0;
++	u32 i;
++	enum e1000_phy_type phy_type = e1000_phy_unknown;
++
++	hw->phy.id = phy_type;
++
++	for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
++		hw->phy.addr = phy_addr;
++		i = 0;
++
++		do {
++			e1000e_get_phy_id(hw);
++			phy_type = e1000e_get_phy_type_from_id(hw->phy.id);
++
++			/*
++			 * If phy_type is valid, break - we found our
++			 * PHY address
++			 */
++			if (phy_type  != e1000_phy_unknown) {
++				ret_val = 0;
++				goto out;
++			}
++			usleep_range(1000, 2000);
++			i++;
++		} while (i < 10);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  @brief Retrieve PHY page address
++ *  @param page page to access
++ *
++ *  @return PHY address for the page requested.
++ **/
++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg)
++{
++	u32 phy_addr = 2;
++
++	if ((page >= 768) || (page == 0 && reg == 25) || (reg == 31))
++		phy_addr = 1;
++
++	return phy_addr;
++}
++
++/**
++ *  e1000e_write_phy_reg_bm - Write BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++	u32 page = offset >> IGP_PAGE_SHIFT;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
++							 false, false);
++		goto out;
++	}
++
++	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		u32 page_shift, page_select;
++
++		/*
++		 * Page select is register 31 for phy address 1 and 22 for
++		 * phy address 2 and 3. Page select is shifted only for
++		 * phy address 1.
++		 */
++		if (hw->phy.addr == 1) {
++			page_shift = IGP_PAGE_SHIFT;
++			page_select = IGP01E1000_PHY_PAGE_SELECT;
++		} else {
++			page_shift = 0;
++			page_select = BM_PHY_PAGE_SELECT;
++		}
++
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
++		                                    (page << page_shift));
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++	                                    data);
++
++out:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  e1000e_read_phy_reg_bm - Read BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u32 page = offset >> IGP_PAGE_SHIFT;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
++							 true, false);
++		goto out;
++	}
++
++	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		u32 page_shift, page_select;
++
++		/*
++		 * Page select is register 31 for phy address 1 and 22 for
++		 * phy address 2 and 3. Page select is shifted only for
++		 * phy address 1.
++		 */
++		if (hw->phy.addr == 1) {
++			page_shift = IGP_PAGE_SHIFT;
++			page_select = IGP01E1000_PHY_PAGE_SELECT;
++		} else {
++			page_shift = 0;
++			page_select = BM_PHY_PAGE_SELECT;
++		}
++
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
++		                                    (page << page_shift));
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++	                                   data);
++out:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  e1000e_read_phy_reg_bm2 - Read BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u16 page = (u16)(offset >> IGP_PAGE_SHIFT);
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
++							 true, false);
++		goto out;
++	}
++
++	hw->phy.addr = 1;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT,
++						    page);
++
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					   data);
++out:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  e1000e_write_phy_reg_bm2 - Write BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++	u16 page = (u16)(offset >> IGP_PAGE_SHIFT);
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
++							 false, false);
++		goto out;
++	}
++
++	hw->phy.addr = 1;
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT,
++						    page);
++
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++					    data);
++
++out:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  e1000_enable_phy_wakeup_reg_access_bm - enable access to BM wakeup registers
++ *  @hw: pointer to the HW structure
++ *  @phy_reg: pointer to store original contents of BM_WUC_ENABLE_REG
++ *
++ *  Assumes semaphore already acquired and phy_reg points to a valid memory
++ *  address to store contents of the BM_WUC_ENABLE_REG register.
++ **/
++s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg)
++{
++	s32 ret_val;
++	u16 temp;
++
++	/* All page select, port ctrl and wakeup registers use phy address 1 */
++	hw->phy.addr = 1;
++
++	/* Select Port Control Registers page */
++	ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT));
++	if (ret_val) {
++		e_dbg("Could not set Port Control page\n");
++		goto out;
++	}
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg);
++	if (ret_val) {
++		e_dbg("Could not read PHY register %d.%d\n",
++		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
++		goto out;
++	}
++
++	/*
++	 * Enable both PHY wakeup mode and Wakeup register page writes.
++	 * Prevent a power state change by disabling ME and Host PHY wakeup.
++	 */
++	temp = *phy_reg;
++	temp |= BM_WUC_ENABLE_BIT;
++	temp &= ~(BM_WUC_ME_WU_BIT | BM_WUC_HOST_WU_BIT);
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, temp);
++	if (ret_val) {
++		e_dbg("Could not write PHY register %d.%d\n",
++		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
++		goto out;
++	}
++
++	/* Select Host Wakeup Registers page */
++	ret_val = e1000_set_page_igp(hw, (BM_WUC_PAGE << IGP_PAGE_SHIFT));
++
++	/* caller now able to write registers on the Wakeup registers page */
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_disable_phy_wakeup_reg_access_bm - disable access to BM wakeup regs
++ *  @hw: pointer to the HW structure
++ *  @phy_reg: pointer to original contents of BM_WUC_ENABLE_REG
++ *
++ *  Restore BM_WUC_ENABLE_REG to its original value.
++ *
++ *  Assumes semaphore already acquired and *phy_reg is the contents of the
++ *  BM_WUC_ENABLE_REG before register(s) on BM_WUC_PAGE were accessed by
++ *  caller.
++ **/
++s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg)
++{
++	s32 ret_val = 0;
++
++	/* Select Port Control Registers page */
++	ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT));
++	if (ret_val) {
++		e_dbg("Could not set Port Control page\n");
++		goto out;
++	}
++
++	/* Restore 769.17 to its original value */
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, *phy_reg);
++	if (ret_val)
++		e_dbg("Could not restore PHY register %d.%d\n",
++		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_access_phy_wakeup_reg_bm - Read/write BM PHY wakeup register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read or written
++ *  @data: pointer to the data to read or write
++ *  @read: determines if operation is read or write
++ *  @page_set: BM_WUC_PAGE already set and access enabled
++ *
++ *  Read the PHY register at offset and store the retrieved information in
++ *  data, or write data to PHY register at offset.  Note the procedure to
++ *  access the PHY wakeup registers is different than reading the other PHY
++ *  registers. It works as such:
++ *  1) Set 769.17.2 (page 769, register 17, bit 2) = 1
++ *  2) Set page to 800 for host (801 if we were manageability)
++ *  3) Write the address using the address opcode (0x11)
++ *  4) Read or write the data using the data opcode (0x12)
++ *  5) Restore 769.17.2 to its original value
++ *
++ *  Steps 1 and 2 are done by e1000_enable_phy_wakeup_reg_access_bm() and
++ *  step 5 is done by e1000_disable_phy_wakeup_reg_access_bm().
++ *
++ *  Assumes semaphore is already acquired.  When page_set==true, assumes
++ *  the PHY page is set to BM_WUC_PAGE (i.e. a function in the call stack
++ *  is responsible for calls to e1000_[enable|disable]_phy_wakeup_reg_bm()).
++ **/
++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
++					  u16 *data, bool read, bool page_set)
++{
++	s32 ret_val;
++	u16 reg = BM_PHY_REG_NUM(offset);
++	u16 page = BM_PHY_REG_PAGE(offset);
++	u16 phy_reg = 0;
++
++	/* Gig must be disabled for MDIO accesses to Host Wakeup reg page */
++	if ((hw->mac.type == e1000_pchlan) &&
++	    (!(er32(PHY_CTRL) & E1000_PHY_CTRL_GBE_DISABLE)))
++		e_dbg("Attempting to access page %d while gig enabled.\n",
++		      page);
++
++	if (!page_set) {
++		/* Enable access to PHY wakeup registers */
++		ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg);
++		if (ret_val) {
++			e_dbg("Could not enable PHY wakeup reg access\n");
++			goto out;
++		}
++	}
++
++	e_dbg("Accessing PHY page %d reg 0x%x\n", page, reg);
++
++	/* Write the Wakeup register page offset value using opcode 0x11 */
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ADDRESS_OPCODE, reg);
++	if (ret_val) {
++		e_dbg("Could not write address opcode to page %d\n", page);
++		goto out;
++	}
++
++	if (read) {
++		/* Read the Wakeup register page value using opcode 0x12 */
++		ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
++		                                   data);
++	} else {
++		/* Write the Wakeup register page value using opcode 0x12 */
++		ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
++						    *data);
++	}
++
++	if (ret_val) {
++		e_dbg("Could not access PHY reg %d.%d\n", page, reg);
++		goto out;
++	}
++
++	if (!page_set)
++		ret_val = e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, restore the link to previous
++ * settings.
++ **/
++void e1000_power_up_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	e1e_rphy(hw, PHY_CONTROL, &mii_reg);
++	mii_reg &= ~MII_CR_POWER_DOWN;
++	e1e_wphy(hw, PHY_CONTROL, mii_reg);
++}
++
++/**
++ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, restore the link to previous
++ * settings.
++ **/
++void e1000_power_down_phy_copper(struct e1000_hw *hw)
++{
++	u16 mii_reg = 0;
++
++	/* The PHY will retain its settings across a power down/up cycle */
++	e1e_rphy(hw, PHY_CONTROL, &mii_reg);
++	mii_reg |= MII_CR_POWER_DOWN;
++	e1e_wphy(hw, PHY_CONTROL, mii_reg);
++	usleep_range(1000, 2000);
++}
++
++/**
++ *  e1000e_commit_phy - Soft PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Performs a soft PHY reset on those that apply. This is a function pointer
++ *  entry point called by drivers.
++ **/
++s32 e1000e_commit_phy(struct e1000_hw *hw)
++{
++	if (hw->phy.ops.commit)
++		return hw->phy.ops.commit(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_set_d0_lplu_state - Sets low power link up state for D0
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  Success returns 0, Failure returns 1
++ *
++ *  The low power link up (lplu) state is set to the power management level D0
++ *  and SmartSpeed is disabled when active is true, else clear lplu for D0
++ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.  This is a function pointer entry point called by drivers.
++ **/
++static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active)
++{
++	if (hw->phy.ops.set_d0_lplu_state)
++		return hw->phy.ops.set_d0_lplu_state(hw, active);
++
++	return 0;
++}
++
++/**
++ *  __e1000_read_phy_reg_hv -  Read HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and stores the retrieved information in data.  Release any acquired
++ *  semaphore before exiting.
++ **/
++static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data,
++				   bool locked, bool page_set)
++{
++	s32 ret_val;
++	u16 page = BM_PHY_REG_PAGE(offset);
++	u16 reg = BM_PHY_REG_NUM(offset);
++	u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page);
++
++	if (!locked) {
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
++							 true, page_set);
++		goto out;
++	}
++
++	if (page > 0 && page < HV_INTC_FC_PAGE_START) {
++		ret_val = e1000_access_phy_debug_regs_hv(hw, offset,
++		                                         data, true);
++		goto out;
++	}
++
++	if (!page_set) {
++		if (page == HV_INTC_FC_PAGE_START)
++			page = 0;
++
++		if (reg > MAX_PHY_MULTI_PAGE_REG) {
++			/* Page is shifted left, PHY expects (page x 32) */
++			ret_val = e1000_set_page_igp(hw,
++						     (page << IGP_PAGE_SHIFT));
++
++			hw->phy.addr = phy_addr;
++
++			if (ret_val)
++				goto out;
++		}
++	}
++
++	e_dbg("reading PHY page %d (or 0x%x shifted) reg 0x%x\n", page,
++	      page << IGP_PAGE_SHIFT, reg);
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg,
++	                                  data);
++out:
++	if (!locked)
++		hw->phy.ops.release(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_read_phy_reg_hv -  Read HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore then reads the PHY register at offset and stores
++ *  the retrieved information in data.  Release the acquired semaphore
++ *  before exiting.
++ **/
++s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000_read_phy_reg_hv(hw, offset, data, false, false);
++}
++
++/**
++ *  e1000_read_phy_reg_hv_locked -  Read HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Reads the PHY register at offset and stores the retrieved information
++ *  in data.  Assumes semaphore already acquired.
++ **/
++s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000_read_phy_reg_hv(hw, offset, data, true, false);
++}
++
++/**
++ *  e1000_read_phy_reg_page_hv - Read HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Reads the PHY register at offset and stores the retrieved information
++ *  in data.  Assumes semaphore already acquired and page already set.
++ **/
++s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return __e1000_read_phy_reg_hv(hw, offset, data, true, true);
++}
++
++/**
++ *  __e1000_write_phy_reg_hv - Write HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *  @locked: semaphore has already been acquired or not
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data,
++				    bool locked, bool page_set)
++{
++	s32 ret_val;
++	u16 page = BM_PHY_REG_PAGE(offset);
++	u16 reg = BM_PHY_REG_NUM(offset);
++	u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page);
++
++	if (!locked) {
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
++							 false, page_set);
++		goto out;
++	}
++
++	if (page > 0 && page < HV_INTC_FC_PAGE_START) {
++		ret_val = e1000_access_phy_debug_regs_hv(hw, offset,
++		                                         &data, false);
++		goto out;
++	}
++
++	if (!page_set) {
++		if (page == HV_INTC_FC_PAGE_START)
++			page = 0;
++
++		/*
++		 * Workaround MDIO accesses being disabled after entering IEEE
++		 * Power Down (when bit 11 of the PHY Control register is set)
++		 */
++		if ((hw->phy.type == e1000_phy_82578) &&
++		    (hw->phy.revision >= 1) &&
++		    (hw->phy.addr == 2) &&
++		    ((MAX_PHY_REG_ADDRESS & reg) == 0) && (data & (1 << 11))) {
++			u16 data2 = 0x7EFF;
++			ret_val = e1000_access_phy_debug_regs_hv(hw,
++								 (1 << 6) | 0x3,
++								 &data2, false);
++			if (ret_val)
++				goto out;
++		}
++
++		if (reg > MAX_PHY_MULTI_PAGE_REG) {
++			/* Page is shifted left, PHY expects (page x 32) */
++			ret_val = e1000_set_page_igp(hw,
++						     (page << IGP_PAGE_SHIFT));
++
++			hw->phy.addr = phy_addr;
++
++			if (ret_val)
++				goto out;
++		}
++	}
++
++	e_dbg("writing PHY page %d (or 0x%x shifted) reg 0x%x\n", page,
++	      page << IGP_PAGE_SHIFT, reg);
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg,
++	                                  data);
++
++out:
++	if (!locked)
++		hw->phy.ops.release(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_hv - Write HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore then writes the data to PHY register at the offset.
++ *  Release the acquired semaphores before exiting.
++ **/
++s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000_write_phy_reg_hv(hw, offset, data, false, false);
++}
++
++/**
++ *  e1000_write_phy_reg_hv_locked - Write HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes the data to PHY register at the offset.  Assumes semaphore
++ *  already acquired.
++ **/
++s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000_write_phy_reg_hv(hw, offset, data, true, false);
++}
++
++/**
++ *  e1000_write_phy_reg_page_hv - Write HV PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Writes the data to PHY register at the offset.  Assumes semaphore
++ *  already acquired and page already set.
++ **/
++s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return __e1000_write_phy_reg_hv(hw, offset, data, true, true);
++}
++
++/**
++ * @brief Get PHY address based on page
++ * @param page page to be accessed
++ * @return PHY address
++ */
++static u32 e1000_get_phy_addr_for_hv_page(u32 page)
++{
++	u32 phy_addr = 2;
++
++	if (page >= HV_INTC_FC_PAGE_START)
++		phy_addr = 1;
++
++	return phy_addr;
++}
++
++/**
++ *  e1000_access_phy_debug_regs_hv - Read HV PHY vendor specific high registers
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read or written
++ *  @data: pointer to the data to be read or written
++ *  @read: determines if operation is read or write
++ *
++ *  Reads the PHY register at offset and stores the retreived information
++ *  in data.  Assumes semaphore already acquired.  Note that the procedure
++ *  to access these regs uses the address port and data port to read/write.
++ *  These accesses done with PHY address 2 and without using pages.
++ **/
++static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset,
++                                          u16 *data, bool read)
++{
++	s32 ret_val;
++	u32 addr_reg = 0;
++	u32 data_reg = 0;
++
++	/* This takes care of the difference with desktop vs mobile phy */
++	addr_reg = (hw->phy.type == e1000_phy_82578) ?
++	           I82578_ADDR_REG : I82577_ADDR_REG;
++	data_reg = addr_reg + 1;
++
++	/* All operations in this function are phy address 2 */
++	hw->phy.addr = 2;
++
++	/* masking with 0x3F to remove the page from offset */
++	ret_val = e1000e_write_phy_reg_mdic(hw, addr_reg, (u16)offset & 0x3F);
++	if (ret_val) {
++		e_dbg("Could not write the Address Offset port register\n");
++		goto out;
++	}
++
++	/* Read or write the data value next */
++	if (read)
++		ret_val = e1000e_read_phy_reg_mdic(hw, data_reg, data);
++	else
++		ret_val = e1000e_write_phy_reg_mdic(hw, data_reg, *data);
++
++	if (ret_val) {
++		e_dbg("Could not access the Data port register\n");
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_link_stall_workaround_hv - Si workaround
++ *  @hw: pointer to the HW structure
++ *
++ *  This function works around a Si bug where the link partner can get
++ *  a link up indication before the PHY does.  If small packets are sent
++ *  by the link partner they can be placed in the packet buffer without
++ *  being properly accounted for by the PHY and will stall preventing
++ *  further packets from being received.  The workaround is to clear the
++ *  packet buffer after the PHY detects link up.
++ **/
++s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 data;
++
++	if (hw->phy.type != e1000_phy_82578)
++		goto out;
++
++	/* Do not apply workaround if in PHY loopback bit 14 set */
++	e1e_rphy(hw, PHY_CONTROL, &data);
++	if (data & PHY_CONTROL_LB)
++		goto out;
++
++	/* check if link is up and at 1Gbps */
++	ret_val = e1e_rphy(hw, BM_CS_STATUS, &data);
++	if (ret_val)
++		goto out;
++
++	data &= BM_CS_STATUS_LINK_UP |
++	        BM_CS_STATUS_RESOLVED |
++	        BM_CS_STATUS_SPEED_MASK;
++
++	if (data != (BM_CS_STATUS_LINK_UP |
++	             BM_CS_STATUS_RESOLVED |
++	             BM_CS_STATUS_SPEED_1000))
++		goto out;
++
++	mdelay(200);
++
++	/* flush the packets in the fifo buffer */
++	ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC |
++			   HV_MUX_DATA_CTRL_FORCE_SPEED);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_check_polarity_82577 - Checks the polarity.
++ *  @hw: pointer to the HW structure
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ *
++ *  Polarity is determined based on the PHY specific status register.
++ **/
++s32 e1000_check_polarity_82577(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
++
++	if (!ret_val)
++		phy->cable_polarity = (data & I82577_PHY_STATUS2_REV_POLARITY)
++		                      ? e1000_rev_polarity_reversed
++		                      : e1000_rev_polarity_normal;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  Calls the PHY setup function to force speed and duplex.
++ **/
++s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		goto out;
++
++	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		goto out;
++
++	udelay(1);
++
++	if (phy->autoneg_wait_to_complete) {
++		e_dbg("Waiting for forced speed/duplex link on 82577 phy\n");
++
++		ret_val = e1000e_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++
++		if (!link)
++			e_dbg("Link taking longer than expected.\n");
++
++		/* Try once more */
++		ret_val = e1000e_phy_has_link_generic(hw,
++		                                     PHY_FORCE_LIMIT,
++		                                     100000,
++		                                     &link);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_info_82577 - Retrieve I82577 PHY information
++ *  @hw: pointer to the HW structure
++ *
++ *  Read PHY status to determine if link is up.  If link is up, then
++ *  set/determine 10base-T extended distance and polarity correction.  Read
++ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
++ *  determine on the cable length, local and remote receiver.
++ **/
++s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++	bool link;
++
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (!link) {
++		e_dbg("Phy info is only valid if link is up\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	phy->polarity_correction = true;
++
++	ret_val = e1000_check_polarity_82577(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
++	if (ret_val)
++		goto out;
++
++	phy->is_mdix = (data & I82577_PHY_STATUS2_MDIX) ? true : false;
++
++	if ((data & I82577_PHY_STATUS2_SPEED_MASK) ==
++	    I82577_PHY_STATUS2_SPEED_1000MBPS) {
++		ret_val = hw->phy.ops.get_cable_length(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1e_rphy(hw, PHY_1000T_STATUS, &data);
++		if (ret_val)
++			goto out;
++
++		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
++		                ? e1000_1000t_rx_status_ok
++		                : e1000_1000t_rx_status_not_ok;
++
++		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
++		                 ? e1000_1000t_rx_status_ok
++		                 : e1000_1000t_rx_status_not_ok;
++	} else {
++		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
++		phy->local_rx = e1000_1000t_rx_status_undefined;
++		phy->remote_rx = e1000_1000t_rx_status_undefined;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_82577 - Determine cable length for 82577 PHY
++ *  @hw: pointer to the HW structure
++ *
++ * Reads the diagnostic status register and verifies result is valid before
++ * placing it in the phy_cable_length field.
++ **/
++s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_data, length;
++
++	ret_val = e1e_rphy(hw, I82577_PHY_DIAG_STATUS, &phy_data);
++	if (ret_val)
++		goto out;
++
++	length = (phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
++	         I82577_DSTATUS_CABLE_LENGTH_SHIFT;
++
++	if (length == E1000_CABLE_LENGTH_UNDEFINED)
++		ret_val = -E1000_ERR_PHY;
++
++	phy->cable_length = length;
++
++out:
++	return ret_val;
++}
+--- linux/drivers/xenomai/net/drivers/e1000e/e1000.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/e1000.h	2021-04-07 16:01:27.224634170 +0800
+@@ -0,0 +1,764 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/* Linux PRO/1000 Ethernet Driver main header file */
++
++#ifndef _E1000_H_
++#define _E1000_H_
++
++#include <linux/bitops.h>
++#include <linux/types.h>
++#include <linux/timer.h>
++#include <linux/workqueue.h>
++#include <linux/io.h>
++#include <linux/netdevice.h>
++#include <linux/pci.h>
++#include <linux/pci-aspm.h>
++#include <linux/crc32.h>
++#include <linux/if_vlan.h>
++
++#include <rtnet_port.h>
++
++#include "hw.h"
++
++struct e1000_info;
++
++#define e_dbg(format, arg...) \
++	pr_debug(format, ## arg)
++#define e_err(format, arg...) \
++	pr_err(format, ## arg)
++#define e_info(format, arg...) \
++	pr_info(format, ## arg)
++#define e_warn(format, arg...) \
++	pr_warn(format, ## arg)
++#define e_notice(format, arg...) \
++	pr_notice(format, ## arg)
++
++
++/* Interrupt modes, as used by the IntMode parameter */
++#define E1000E_INT_MODE_LEGACY		0
++#define E1000E_INT_MODE_MSI		1
++#define E1000E_INT_MODE_MSIX		2
++
++/* Tx/Rx descriptor defines */
++#define E1000_DEFAULT_TXD		256
++#define E1000_MAX_TXD			4096
++#define E1000_MIN_TXD			64
++
++#define E1000_DEFAULT_RXD		256
++#define E1000_MAX_RXD			4096
++#define E1000_MIN_RXD			64
++
++#define E1000_MIN_ITR_USECS		10 /* 100000 irq/sec */
++#define E1000_MAX_ITR_USECS		10000 /* 100    irq/sec */
++
++/* Early Receive defines */
++#define E1000_ERT_2048			0x100
++
++#define E1000_FC_PAUSE_TIME		0x0680 /* 858 usec */
++
++/* How many Tx Descriptors do we need to call netif_wake_queue ? */
++/* How many Rx Buffers do we bundle into one write to the hardware ? */
++#define E1000_RX_BUFFER_WRITE		16 /* Must be power of 2 */
++
++#define AUTO_ALL_MODES			0
++#define E1000_EEPROM_APME		0x0400
++
++#define E1000_MNG_VLAN_NONE		(-1)
++
++/* Number of packet split data buffers (not including the header buffer) */
++#define PS_PAGE_BUFFERS			(MAX_PS_BUFFERS - 1)
++
++#define DEFAULT_JUMBO			9234
++
++/* BM/HV Specific Registers */
++#define BM_PORT_CTRL_PAGE                 769
++
++#define PHY_UPPER_SHIFT                   21
++#define BM_PHY_REG(page, reg) \
++	(((reg) & MAX_PHY_REG_ADDRESS) |\
++	 (((page) & 0xFFFF) << PHY_PAGE_SHIFT) |\
++	 (((reg) & ~MAX_PHY_REG_ADDRESS) << (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT)))
++
++/* PHY Wakeup Registers and defines */
++#define BM_PORT_GEN_CFG PHY_REG(BM_PORT_CTRL_PAGE, 17)
++#define BM_RCTL         PHY_REG(BM_WUC_PAGE, 0)
++#define BM_WUC          PHY_REG(BM_WUC_PAGE, 1)
++#define BM_WUFC         PHY_REG(BM_WUC_PAGE, 2)
++#define BM_WUS          PHY_REG(BM_WUC_PAGE, 3)
++#define BM_RAR_L(_i)    (BM_PHY_REG(BM_WUC_PAGE, 16 + ((_i) << 2)))
++#define BM_RAR_M(_i)    (BM_PHY_REG(BM_WUC_PAGE, 17 + ((_i) << 2)))
++#define BM_RAR_H(_i)    (BM_PHY_REG(BM_WUC_PAGE, 18 + ((_i) << 2)))
++#define BM_RAR_CTRL(_i) (BM_PHY_REG(BM_WUC_PAGE, 19 + ((_i) << 2)))
++#define BM_MTA(_i)      (BM_PHY_REG(BM_WUC_PAGE, 128 + ((_i) << 1)))
++
++#define BM_RCTL_UPE           0x0001          /* Unicast Promiscuous Mode */
++#define BM_RCTL_MPE           0x0002          /* Multicast Promiscuous Mode */
++#define BM_RCTL_MO_SHIFT      3               /* Multicast Offset Shift */
++#define BM_RCTL_MO_MASK       (3 << 3)        /* Multicast Offset Mask */
++#define BM_RCTL_BAM           0x0020          /* Broadcast Accept Mode */
++#define BM_RCTL_PMCF          0x0040          /* Pass MAC Control Frames */
++#define BM_RCTL_RFCE          0x0080          /* Rx Flow Control Enable */
++
++#define HV_STATS_PAGE	778
++#define HV_SCC_UPPER	PHY_REG(HV_STATS_PAGE, 16) /* Single Collision Count */
++#define HV_SCC_LOWER	PHY_REG(HV_STATS_PAGE, 17)
++#define HV_ECOL_UPPER	PHY_REG(HV_STATS_PAGE, 18) /* Excessive Coll. Count */
++#define HV_ECOL_LOWER	PHY_REG(HV_STATS_PAGE, 19)
++#define HV_MCC_UPPER	PHY_REG(HV_STATS_PAGE, 20) /* Multiple Coll. Count */
++#define HV_MCC_LOWER	PHY_REG(HV_STATS_PAGE, 21)
++#define HV_LATECOL_UPPER PHY_REG(HV_STATS_PAGE, 23) /* Late Collision Count */
++#define HV_LATECOL_LOWER PHY_REG(HV_STATS_PAGE, 24)
++#define HV_COLC_UPPER	PHY_REG(HV_STATS_PAGE, 25) /* Collision Count */
++#define HV_COLC_LOWER	PHY_REG(HV_STATS_PAGE, 26)
++#define HV_DC_UPPER	PHY_REG(HV_STATS_PAGE, 27) /* Defer Count */
++#define HV_DC_LOWER	PHY_REG(HV_STATS_PAGE, 28)
++#define HV_TNCRS_UPPER	PHY_REG(HV_STATS_PAGE, 29) /* Transmit with no CRS */
++#define HV_TNCRS_LOWER	PHY_REG(HV_STATS_PAGE, 30)
++
++#define E1000_FCRTV_PCH     0x05F40 /* PCH Flow Control Refresh Timer Value */
++
++/* BM PHY Copper Specific Status */
++#define BM_CS_STATUS                      17
++#define BM_CS_STATUS_LINK_UP              0x0400
++#define BM_CS_STATUS_RESOLVED             0x0800
++#define BM_CS_STATUS_SPEED_MASK           0xC000
++#define BM_CS_STATUS_SPEED_1000           0x8000
++
++/* 82577 Mobile Phy Status Register */
++#define HV_M_STATUS                       26
++#define HV_M_STATUS_AUTONEG_COMPLETE      0x1000
++#define HV_M_STATUS_SPEED_MASK            0x0300
++#define HV_M_STATUS_SPEED_1000            0x0200
++#define HV_M_STATUS_LINK_UP               0x0040
++
++#define E1000_ICH_FWSM_PCIM2PCI		0x01000000 /* ME PCIm-to-PCI active */
++#define E1000_ICH_FWSM_PCIM2PCI_COUNT	2000
++
++/* Time to wait before putting the device into D3 if there's no link (in ms). */
++#define LINK_TIMEOUT		100
++
++#define DEFAULT_RDTR			0
++#define DEFAULT_RADV			8
++#define BURST_RDTR			0x20
++#define BURST_RADV			0x20
++
++/*
++ * in the case of WTHRESH, it appears at least the 82571/2 hardware
++ * writes back 4 descriptors when WTHRESH=5, and 3 descriptors when
++ * WTHRESH=4, and since we want 64 bytes at a time written back, set
++ * it to 5
++ */
++#define E1000_TXDCTL_DMA_BURST_ENABLE                          \
++	(E1000_TXDCTL_GRAN | /* set descriptor granularity */  \
++	 E1000_TXDCTL_COUNT_DESC |                             \
++	 (5 << 16) | /* wthresh must be +1 more than desired */\
++	 (1 << 8)  | /* hthresh */                             \
++	 0x1f)       /* pthresh */
++
++#define E1000_RXDCTL_DMA_BURST_ENABLE                          \
++	(0x01000000 | /* set descriptor granularity */         \
++	 (4 << 16)  | /* set writeback threshold    */         \
++	 (4 << 8)   | /* set prefetch threshold     */         \
++	 0x20)        /* set hthresh                */
++
++#define E1000_TIDV_FPD (1 << 31)
++#define E1000_RDTR_FPD (1 << 31)
++
++enum e1000_boards {
++	board_82571,
++	board_82572,
++	board_82573,
++	board_82574,
++	board_82583,
++	board_80003es2lan,
++	board_ich8lan,
++	board_ich9lan,
++	board_ich10lan,
++	board_pchlan,
++	board_pch2lan,
++	board_pch_lpt,
++};
++
++struct e1000_ps_page {
++	struct page *page;
++	u64 dma; /* must be u64 - written to hw */
++};
++
++/*
++ * wrappers around a pointer to a socket buffer,
++ * so a DMA handle can be stored along with the buffer
++ */
++struct e1000_buffer {
++	dma_addr_t dma;
++	struct rtskb *skb;
++	union {
++		/* Tx */
++		struct {
++			unsigned long time_stamp;
++			u16 length;
++			u16 next_to_watch;
++			unsigned int segs;
++			unsigned int bytecount;
++			u16 mapped_as_page;
++		};
++		/* Rx */
++		struct {
++			/* arrays of page information for packet split */
++			struct e1000_ps_page *ps_pages;
++			struct page *page;
++		};
++	};
++};
++
++struct e1000_ring {
++	void *desc;			/* pointer to ring memory  */
++	dma_addr_t dma;			/* phys address of ring    */
++	unsigned int size;		/* length of ring in bytes */
++	unsigned int count;		/* number of desc. in ring */
++
++	u16 next_to_use;
++	u16 next_to_clean;
++
++	u16 head;
++	u16 tail;
++
++	/* array of buffer information structs */
++	struct e1000_buffer *buffer_info;
++
++	char name[IFNAMSIZ + 5];
++	u32 ims_val;
++	u32 itr_val;
++	u16 itr_register;
++	int set_itr;
++
++	struct rtskb *rx_skb_top;
++
++	rtdm_lock_t lock;
++};
++
++/* PHY register snapshot values */
++struct e1000_phy_regs {
++	u16 bmcr;		/* basic mode control register    */
++	u16 bmsr;		/* basic mode status register     */
++	u16 advertise;		/* auto-negotiation advertisement */
++	u16 lpa;		/* link partner ability register  */
++	u16 expansion;		/* auto-negotiation expansion reg */
++	u16 ctrl1000;		/* 1000BASE-T control register    */
++	u16 stat1000;		/* 1000BASE-T status register     */
++	u16 estatus;		/* extended status register       */
++};
++
++/* board specific private data structure */
++struct e1000_adapter {
++	struct timer_list watchdog_timer;
++	struct timer_list phy_info_timer;
++	struct timer_list blink_timer;
++
++	struct work_struct reset_task;
++	struct work_struct watchdog_task;
++
++	const struct e1000_info *ei;
++
++	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
++	u32 bd_number;
++	u32 rx_buffer_len;
++	u16 mng_vlan_id;
++	u16 link_speed;
++	u16 link_duplex;
++	u16 eeprom_vers;
++
++	/* track device up/down/testing state */
++	unsigned long state;
++
++	/* Interrupt Throttle Rate */
++	u32 itr;
++	u32 itr_setting;
++	u16 tx_itr;
++	u16 rx_itr;
++
++	/*
++	 * Tx
++	 */
++	struct e1000_ring *tx_ring /* One per active queue */
++						____cacheline_aligned_in_smp;
++
++	struct napi_struct napi;
++
++	unsigned int restart_queue;
++	u32 txd_cmd;
++
++	bool detect_tx_hung;
++	u8 tx_timeout_factor;
++
++	u32 tx_int_delay;
++	u32 tx_abs_int_delay;
++
++	unsigned int total_tx_bytes;
++	unsigned int total_tx_packets;
++	unsigned int total_rx_bytes;
++	unsigned int total_rx_packets;
++
++	/* Tx stats */
++	u64 tpt_old;
++	u64 colc_old;
++	u32 gotc;
++	u64 gotc_old;
++	u32 tx_timeout_count;
++	u32 tx_fifo_head;
++	u32 tx_head_addr;
++	u32 tx_fifo_size;
++	u32 tx_dma_failed;
++
++	/*
++	 * Rx
++	 */
++	bool (*clean_rx) (struct e1000_adapter *adapter,
++			  nanosecs_abs_t *time_stamp)
++						____cacheline_aligned_in_smp;
++	void (*alloc_rx_buf) (struct e1000_adapter *adapter,
++			      int cleaned_count, gfp_t gfp);
++	struct e1000_ring *rx_ring;
++
++	u32 rx_int_delay;
++	u32 rx_abs_int_delay;
++
++	/* Rx stats */
++	u64 hw_csum_err;
++	u64 hw_csum_good;
++	u64 rx_hdr_split;
++	u32 gorc;
++	u64 gorc_old;
++	u32 alloc_rx_buff_failed;
++	u32 rx_dma_failed;
++
++	unsigned int rx_ps_pages;
++	u16 rx_ps_bsize0;
++	u32 max_frame_size;
++	u32 min_frame_size;
++
++	/* OS defined structs */
++	struct rtnet_device *netdev;
++	struct pci_dev *pdev;
++
++	rtdm_irq_t irq_handle;
++	rtdm_irq_t rx_irq_handle;
++	rtdm_irq_t tx_irq_handle;
++	rtdm_nrtsig_t mod_timer_sig;
++	rtdm_nrtsig_t downshift_sig;
++
++	/* structs defined in e1000_hw.h */
++	struct e1000_hw hw;
++
++	spinlock_t stats64_lock;
++	struct e1000_hw_stats stats;
++	struct e1000_phy_info phy_info;
++	struct e1000_phy_stats phy_stats;
++
++	/* Snapshot of PHY registers */
++	struct e1000_phy_regs phy_regs;
++
++	struct e1000_ring test_tx_ring;
++	struct e1000_ring test_rx_ring;
++	u32 test_icr;
++
++	u32 msg_enable;
++	unsigned int num_vectors;
++	struct msix_entry *msix_entries;
++	int int_mode;
++	u32 eiac_mask;
++
++	u32 eeprom_wol;
++	u32 wol;
++	u32 pba;
++	u32 max_hw_frame_size;
++
++	bool fc_autoneg;
++
++	unsigned int flags;
++	unsigned int flags2;
++	struct work_struct downshift_task;
++	struct work_struct update_phy_task;
++	struct work_struct print_hang_task;
++
++	bool idle_check;
++	int phy_hang_count;
++};
++
++struct e1000_info {
++	enum e1000_mac_type	mac;
++	unsigned int		flags;
++	unsigned int		flags2;
++	u32			pba;
++	u32			max_hw_frame_size;
++	s32			(*get_variants)(struct e1000_adapter *);
++	const struct e1000_mac_operations *mac_ops;
++	const struct e1000_phy_operations *phy_ops;
++	const struct e1000_nvm_operations *nvm_ops;
++};
++
++/* hardware capability, feature, and workaround flags */
++#define FLAG_HAS_AMT                      (1 << 0)
++#define FLAG_HAS_FLASH                    (1 << 1)
++#define FLAG_HAS_HW_VLAN_FILTER           (1 << 2)
++#define FLAG_HAS_WOL                      (1 << 3)
++#define FLAG_HAS_ERT                      (1 << 4)
++#define FLAG_HAS_CTRLEXT_ON_LOAD          (1 << 5)
++#define FLAG_HAS_SWSM_ON_LOAD             (1 << 6)
++#define FLAG_HAS_JUMBO_FRAMES             (1 << 7)
++#define FLAG_READ_ONLY_NVM                (1 << 8)
++#define FLAG_IS_ICH                       (1 << 9)
++#define FLAG_HAS_MSIX                     (1 << 10)
++#define FLAG_HAS_SMART_POWER_DOWN         (1 << 11)
++#define FLAG_IS_QUAD_PORT_A               (1 << 12)
++#define FLAG_IS_QUAD_PORT                 (1 << 13)
++#define FLAG_TIPG_MEDIUM_FOR_80003ESLAN   (1 << 14)
++#define FLAG_APME_IN_WUC                  (1 << 15)
++#define FLAG_APME_IN_CTRL3                (1 << 16)
++#define FLAG_APME_CHECK_PORT_B            (1 << 17)
++#define FLAG_DISABLE_FC_PAUSE_TIME        (1 << 18)
++#define FLAG_NO_WAKE_UCAST                (1 << 19)
++#define FLAG_MNG_PT_ENABLED               (1 << 20)
++#define FLAG_RESET_OVERWRITES_LAA         (1 << 21)
++#define FLAG_TARC_SPEED_MODE_BIT          (1 << 22)
++#define FLAG_TARC_SET_BIT_ZERO            (1 << 23)
++#define FLAG_RX_NEEDS_RESTART             (1 << 24)
++#define FLAG_LSC_GIG_SPEED_DROP           (1 << 25)
++#define FLAG_SMART_POWER_DOWN             (1 << 26)
++#define FLAG_MSI_ENABLED                  (1 << 27)
++/* reserved (1 << 28) */
++#define FLAG_TSO_FORCE                    (1 << 29)
++#define FLAG_RX_RESTART_NOW               (1 << 30)
++#define FLAG_MSI_TEST_FAILED              (1 << 31)
++
++#define FLAG2_CRC_STRIPPING               (1 << 0)
++#define FLAG2_HAS_PHY_WAKEUP              (1 << 1)
++#define FLAG2_IS_DISCARDING               (1 << 2)
++#define FLAG2_DISABLE_ASPM_L1             (1 << 3)
++#define FLAG2_HAS_PHY_STATS               (1 << 4)
++#define FLAG2_HAS_EEE                     (1 << 5)
++#define FLAG2_DMA_BURST                   (1 << 6)
++#define FLAG2_DISABLE_ASPM_L0S            (1 << 7)
++#define FLAG2_DISABLE_AIM                 (1 << 8)
++#define FLAG2_CHECK_PHY_HANG              (1 << 9)
++#define FLAG2_NO_DISABLE_RX               (1 << 10)
++#define FLAG2_PCIM2PCI_ARBITER_WA         (1 << 11)
++
++#define E1000_RX_DESC_PS(R, i)	    \
++	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
++#define E1000_RX_DESC_EXT(R, i)	    \
++	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
++#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
++#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
++#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
++
++enum e1000_state_t {
++	__E1000_TESTING,
++	__E1000_RESETTING,
++	__E1000_ACCESS_SHARED_RESOURCE,
++	__E1000_DOWN
++};
++
++enum latency_range {
++	lowest_latency = 0,
++	low_latency = 1,
++	bulk_latency = 2,
++	latency_invalid = 255
++};
++
++extern char e1000e_driver_name[];
++extern const char e1000e_driver_version[];
++
++extern void e1000e_check_options(struct e1000_adapter *adapter);
++extern void e1000e_set_ethtool_ops(struct net_device *netdev);
++
++extern int e1000e_up(struct e1000_adapter *adapter);
++extern void e1000e_down(struct e1000_adapter *adapter);
++extern void e1000e_reinit_locked(struct e1000_adapter *adapter);
++extern void e1000e_reset(struct e1000_adapter *adapter);
++extern void e1000e_power_up_phy(struct e1000_adapter *adapter);
++extern int e1000e_setup_rx_resources(struct e1000_adapter *adapter);
++extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter);
++extern void e1000e_free_rx_resources(struct e1000_adapter *adapter);
++extern void e1000e_free_tx_resources(struct e1000_adapter *adapter);
++extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
++						    struct rtnl_link_stats64
++						    *stats);
++extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
++extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
++extern void e1000e_get_hw_control(struct e1000_adapter *adapter);
++extern void e1000e_release_hw_control(struct e1000_adapter *adapter);
++
++extern unsigned int copybreak;
++
++extern char *e1000e_get_hw_dev_name(struct e1000_hw *hw);
++
++extern const struct e1000_info e1000_82571_info;
++extern const struct e1000_info e1000_82572_info;
++extern const struct e1000_info e1000_82573_info;
++extern const struct e1000_info e1000_82574_info;
++extern const struct e1000_info e1000_82583_info;
++extern const struct e1000_info e1000_ich8_info;
++extern const struct e1000_info e1000_ich9_info;
++extern const struct e1000_info e1000_ich10_info;
++extern const struct e1000_info e1000_pch_info;
++extern const struct e1000_info e1000_pch2_info;
++extern const struct e1000_info e1000_pch_lpt_info;
++extern const struct e1000_info e1000_es2_info;
++
++extern s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
++					 u32 pba_num_size);
++
++extern s32  e1000e_commit_phy(struct e1000_hw *hw);
++
++extern bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw);
++
++extern bool e1000e_get_laa_state_82571(struct e1000_hw *hw);
++extern void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state);
++
++extern void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw);
++extern void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
++						 bool state);
++extern void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw);
++extern void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw);
++extern void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw);
++extern void e1000_resume_workarounds_pchlan(struct e1000_hw *hw);
++extern s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable);
++extern s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable);
++extern void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw);
++
++extern s32 e1000e_check_for_copper_link(struct e1000_hw *hw);
++extern s32 e1000e_check_for_fiber_link(struct e1000_hw *hw);
++extern s32 e1000e_check_for_serdes_link(struct e1000_hw *hw);
++extern s32 e1000e_setup_led_generic(struct e1000_hw *hw);
++extern s32 e1000e_cleanup_led_generic(struct e1000_hw *hw);
++extern s32 e1000e_led_on_generic(struct e1000_hw *hw);
++extern s32 e1000e_led_off_generic(struct e1000_hw *hw);
++extern s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw);
++extern void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
++extern void e1000_set_lan_id_single_port(struct e1000_hw *hw);
++extern s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, u16 *duplex);
++extern s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw *hw, u16 *speed, u16 *duplex);
++extern s32 e1000e_disable_pcie_master(struct e1000_hw *hw);
++extern s32 e1000e_get_auto_rd_done(struct e1000_hw *hw);
++extern s32 e1000e_id_led_init(struct e1000_hw *hw);
++extern void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw);
++extern s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw);
++extern s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw);
++extern s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw);
++extern s32 e1000e_setup_link(struct e1000_hw *hw);
++extern void e1000_clear_vfta_generic(struct e1000_hw *hw);
++extern void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count);
++extern void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw,
++					       u8 *mc_addr_list,
++					       u32 mc_addr_count);
++extern void e1000e_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
++extern s32 e1000e_set_fc_watermarks(struct e1000_hw *hw);
++extern void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop);
++extern s32 e1000e_get_hw_semaphore(struct e1000_hw *hw);
++extern s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data);
++extern void e1000e_config_collision_dist(struct e1000_hw *hw);
++extern s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw);
++extern s32 e1000e_force_mac_fc(struct e1000_hw *hw);
++extern s32 e1000e_blink_led_generic(struct e1000_hw *hw);
++extern void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
++extern s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
++extern void e1000e_reset_adaptive(struct e1000_hw *hw);
++extern void e1000e_update_adaptive(struct e1000_hw *hw);
++
++extern s32 e1000e_setup_copper_link(struct e1000_hw *hw);
++extern s32 e1000e_get_phy_id(struct e1000_hw *hw);
++extern void e1000e_put_hw_semaphore(struct e1000_hw *hw);
++extern s32 e1000e_check_reset_block_generic(struct e1000_hw *hw);
++extern s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw);
++extern s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw);
++extern s32 e1000e_get_phy_info_igp(struct e1000_hw *hw);
++extern s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page);
++extern s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset,
++					  u16 *data);
++extern s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw);
++extern s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active);
++extern s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset,
++					   u16 data);
++extern s32 e1000e_phy_sw_reset(struct e1000_hw *hw);
++extern s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw);
++extern s32 e1000e_get_cfg_done(struct e1000_hw *hw);
++extern s32 e1000e_get_cable_length_m88(struct e1000_hw *hw);
++extern s32 e1000e_get_phy_info_m88(struct e1000_hw *hw);
++extern s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw);
++extern enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id);
++extern s32 e1000e_determine_phy_address(struct e1000_hw *hw);
++extern s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw,
++						 u16 *phy_reg);
++extern s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw,
++						  u16 *phy_reg);
++extern s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
++extern void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
++extern s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset,
++					u16 data);
++extern s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset,
++				       u16 *data);
++extern s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
++			       u32 usec_interval, bool *success);
++extern s32 e1000e_phy_reset_dsp(struct e1000_hw *hw);
++extern void e1000_power_up_phy_copper(struct e1000_hw *hw);
++extern void e1000_power_down_phy_copper(struct e1000_hw *hw);
++extern s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_check_downshift(struct e1000_hw *hw);
++extern s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
++extern s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset,
++					u16 *data);
++extern s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset,
++				      u16 *data);
++extern s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset,
++					 u16 data);
++extern s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset,
++				       u16 data);
++extern s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw);
++extern s32 e1000_copper_link_setup_82577(struct e1000_hw *hw);
++extern s32 e1000_check_polarity_82577(struct e1000_hw *hw);
++extern s32 e1000_get_phy_info_82577(struct e1000_hw *hw);
++extern s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw);
++extern s32 e1000_get_cable_length_82577(struct e1000_hw *hw);
++
++extern s32 e1000_check_polarity_m88(struct e1000_hw *hw);
++extern s32 e1000_get_phy_info_ife(struct e1000_hw *hw);
++extern s32 e1000_check_polarity_ife(struct e1000_hw *hw);
++extern s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
++extern s32 e1000_check_polarity_igp(struct e1000_hw *hw);
++extern bool e1000_check_phy_82574(struct e1000_hw *hw);
++
++static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw)
++{
++	return hw->phy.ops.reset(hw);
++}
++
++static inline s32 e1000_check_reset_block(struct e1000_hw *hw)
++{
++	return hw->phy.ops.check_reset_block(hw);
++}
++
++static inline s32 e1e_rphy(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return hw->phy.ops.read_reg(hw, offset, data);
++}
++
++static inline s32 e1e_rphy_locked(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	return hw->phy.ops.read_reg_locked(hw, offset, data);
++}
++
++static inline s32 e1e_wphy(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return hw->phy.ops.write_reg(hw, offset, data);
++}
++
++static inline s32 e1e_wphy_locked(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	return hw->phy.ops.write_reg_locked(hw, offset, data);
++}
++
++static inline s32 e1000_get_cable_length(struct e1000_hw *hw)
++{
++	return hw->phy.ops.get_cable_length(hw);
++}
++
++extern s32 e1000e_acquire_nvm(struct e1000_hw *hw);
++extern s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++extern s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw);
++extern s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
++extern s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
++extern s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw);
++extern void e1000e_release_nvm(struct e1000_hw *hw);
++extern void e1000e_reload_nvm(struct e1000_hw *hw);
++extern s32 e1000_read_mac_addr_generic(struct e1000_hw *hw);
++
++static inline s32 e1000e_read_mac_addr(struct e1000_hw *hw)
++{
++	if (hw->mac.ops.read_mac_addr)
++		return hw->mac.ops.read_mac_addr(hw);
++
++	return e1000_read_mac_addr_generic(hw);
++}
++
++static inline s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
++{
++	return hw->nvm.ops.validate(hw);
++}
++
++static inline s32 e1000e_update_nvm_checksum(struct e1000_hw *hw)
++{
++	return hw->nvm.ops.update(hw);
++}
++
++static inline s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	return hw->nvm.ops.read(hw, offset, words, data);
++}
++
++static inline s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	return hw->nvm.ops.write(hw, offset, words, data);
++}
++
++static inline s32 e1000_get_phy_info(struct e1000_hw *hw)
++{
++	return hw->phy.ops.get_info(hw);
++}
++
++static inline s32 e1000e_check_mng_mode(struct e1000_hw *hw)
++{
++	return hw->mac.ops.check_mng_mode(hw);
++}
++
++extern bool e1000e_check_mng_mode_generic(struct e1000_hw *hw);
++extern bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw);
++extern s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length);
++
++static inline u32 __er32(struct e1000_hw *hw, unsigned long reg)
++{
++	return readl(hw->hw_addr + reg);
++}
++
++static inline void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
++{
++	writel(val, hw->hw_addr + reg);
++}
++
++#endif /* _E1000_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000e/82571.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/82571.c	2021-04-07 16:01:27.219634178 +0800
+@@ -0,0 +1,2112 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/*
++ * 82571EB Gigabit Ethernet Controller
++ * 82571EB Gigabit Ethernet Controller (Copper)
++ * 82571EB Gigabit Ethernet Controller (Fiber)
++ * 82571EB Dual Port Gigabit Mezzanine Adapter
++ * 82571EB Quad Port Gigabit Mezzanine Adapter
++ * 82571PT Gigabit PT Quad Port Server ExpressModule
++ * 82572EI Gigabit Ethernet Controller (Copper)
++ * 82572EI Gigabit Ethernet Controller (Fiber)
++ * 82572EI Gigabit Ethernet Controller
++ * 82573V Gigabit Ethernet Controller (Copper)
++ * 82573E Gigabit Ethernet Controller (Copper)
++ * 82573L Gigabit Ethernet Controller
++ * 82574L Gigabit Network Connection
++ * 82583V Gigabit Network Connection
++ */
++
++#include "e1000.h"
++
++#define ID_LED_RESERVED_F746 0xF746
++#define ID_LED_DEFAULT_82573 ((ID_LED_DEF1_DEF2 << 12) | \
++			      (ID_LED_OFF1_ON2  <<  8) | \
++			      (ID_LED_DEF1_DEF2 <<  4) | \
++			      (ID_LED_DEF1_DEF2))
++
++#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
++#define AN_RETRY_COUNT          5 /* Autoneg Retry Count value */
++#define E1000_BASE1000T_STATUS          10
++#define E1000_IDLE_ERROR_COUNT_MASK     0xFF
++#define E1000_RECEIVE_ERROR_COUNTER     21
++#define E1000_RECEIVE_ERROR_MAX         0xFFFF
++
++#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 /* Manageability Operation Mode mask */
++
++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw);
++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw);
++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw);
++static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw);
++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
++				      u16 words, u16 *data);
++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw);
++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw);
++static s32 e1000_setup_link_82571(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw);
++static void e1000_clear_vfta_82571(struct e1000_hw *hw);
++static bool e1000_check_mng_mode_82574(struct e1000_hw *hw);
++static s32 e1000_led_on_82574(struct e1000_hw *hw);
++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw);
++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw);
++static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw);
++static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw);
++static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw);
++static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active);
++static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active);
++
++/**
++ *  e1000_init_phy_params_82571 - Init PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type = e1000_phy_none;
++		return 0;
++	}
++
++	phy->addr			 = 1;
++	phy->autoneg_mask		 = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us		 = 100;
++
++	phy->ops.power_up		 = e1000_power_up_phy_copper;
++	phy->ops.power_down		 = e1000_power_down_phy_copper_82571;
++
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		phy->type		 = e1000_phy_igp_2;
++		break;
++	case e1000_82573:
++		phy->type		 = e1000_phy_m88;
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		phy->type		 = e1000_phy_bm;
++		phy->ops.acquire = e1000_get_hw_semaphore_82574;
++		phy->ops.release = e1000_put_hw_semaphore_82574;
++		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82574;
++		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82574;
++		break;
++	default:
++		return -E1000_ERR_PHY;
++		break;
++	}
++
++	/* This can only be done after all function pointers are setup. */
++	ret_val = e1000_get_phy_id_82571(hw);
++	if (ret_val) {
++		e_dbg("Error getting PHY ID\n");
++		return ret_val;
++	}
++
++	/* Verify phy id */
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		if (phy->id != IGP01E1000_I_PHY_ID)
++			ret_val = -E1000_ERR_PHY;
++		break;
++	case e1000_82573:
++		if (phy->id != M88E1111_I_PHY_ID)
++			ret_val = -E1000_ERR_PHY;
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		if (phy->id != BME1000_E_PHY_ID_R2)
++			ret_val = -E1000_ERR_PHY;
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		break;
++	}
++
++	if (ret_val)
++		e_dbg("PHY ID unknown: type = 0x%08x\n", phy->id);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_82571 - Init NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = er32(EECD);
++	u16 size;
++
++	nvm->opcode_bits = 8;
++	nvm->delay_usec = 1;
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->page_size = 32;
++		nvm->address_bits = 16;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->page_size = 8;
++		nvm->address_bits = 8;
++		break;
++	default:
++		nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
++		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
++		break;
++	}
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		if (((eecd >> 15) & 0x3) == 0x3) {
++			nvm->type = e1000_nvm_flash_hw;
++			nvm->word_size = 2048;
++			/*
++			 * Autonomous Flash update bit must be cleared due
++			 * to Flash update issue.
++			 */
++			eecd &= ~E1000_EECD_AUPDEN;
++			ew32(EECD, eecd);
++			break;
++		}
++		/* Fall Through */
++	default:
++		nvm->type = e1000_nvm_eeprom_spi;
++		size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++				  E1000_EECD_SIZE_EX_SHIFT);
++		/*
++		 * Added to a constant, "size" becomes the left-shift value
++		 * for setting word_size.
++		 */
++		size += NVM_WORD_SIZE_BASE_SHIFT;
++
++		/* EEPROM access above 16k is unsupported */
++		if (size > 14)
++			size = 14;
++		nvm->word_size	= 1 << size;
++		break;
++	}
++
++	/* Function Pointers */
++	switch (hw->mac.type) {
++	case e1000_82574:
++	case e1000_82583:
++		nvm->ops.acquire = e1000_get_hw_semaphore_82574;
++		nvm->ops.release = e1000_put_hw_semaphore_82574;
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_init_mac_params_82571 - Init MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_mac_params_82571(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_mac_operations *func = &mac->ops;
++	u32 swsm = 0;
++	u32 swsm2 = 0;
++	bool force_clear_smbi = false;
++
++	/* Set media type */
++	switch (adapter->pdev->device) {
++	case E1000_DEV_ID_82571EB_FIBER:
++	case E1000_DEV_ID_82572EI_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++		hw->phy.media_type = e1000_media_type_fiber;
++		break;
++	case E1000_DEV_ID_82571EB_SERDES:
++	case E1000_DEV_ID_82572EI_SERDES:
++	case E1000_DEV_ID_82571EB_SERDES_DUAL:
++	case E1000_DEV_ID_82571EB_SERDES_QUAD:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++	/* Adaptive IFS supported */
++	mac->adaptive_ifs = true;
++
++	/* check for link */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		func->setup_physical_interface = e1000_setup_copper_link_82571;
++		func->check_for_link = e1000e_check_for_copper_link;
++		func->get_link_up_info = e1000e_get_speed_and_duplex_copper;
++		break;
++	case e1000_media_type_fiber:
++		func->setup_physical_interface =
++			e1000_setup_fiber_serdes_link_82571;
++		func->check_for_link = e1000e_check_for_fiber_link;
++		func->get_link_up_info =
++			e1000e_get_speed_and_duplex_fiber_serdes;
++		break;
++	case e1000_media_type_internal_serdes:
++		func->setup_physical_interface =
++			e1000_setup_fiber_serdes_link_82571;
++		func->check_for_link = e1000_check_for_serdes_link_82571;
++		func->get_link_up_info =
++			e1000e_get_speed_and_duplex_fiber_serdes;
++		break;
++	default:
++		return -E1000_ERR_CONFIG;
++		break;
++	}
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++		func->set_lan_id = e1000_set_lan_id_single_port;
++		func->check_mng_mode = e1000e_check_mng_mode_generic;
++		func->led_on = e1000e_led_on_generic;
++		func->blink_led = e1000e_blink_led_generic;
++
++		/* FWSM register */
++		mac->has_fwsm = true;
++		/*
++		 * ARC supported; valid only if manageability features are
++		 * enabled.
++		 */
++		mac->arc_subsystem_valid =
++			(er32(FWSM) & E1000_FWSM_MODE_MASK)
++			? true : false;
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		func->set_lan_id = e1000_set_lan_id_single_port;
++		func->check_mng_mode = e1000_check_mng_mode_82574;
++		func->led_on = e1000_led_on_82574;
++		break;
++	default:
++		func->check_mng_mode = e1000e_check_mng_mode_generic;
++		func->led_on = e1000e_led_on_generic;
++		func->blink_led = e1000e_blink_led_generic;
++
++		/* FWSM register */
++		mac->has_fwsm = true;
++		break;
++	}
++
++	/*
++	 * Ensure that the inter-port SWSM.SMBI lock bit is clear before
++	 * first NVM or PHY access. This should be done for single-port
++	 * devices, and for one port only on dual-port devices so that
++	 * for those devices we can still use the SMBI lock to synchronize
++	 * inter-port accesses to the PHY & NVM.
++	 */
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		swsm2 = er32(SWSM2);
++
++		if (!(swsm2 & E1000_SWSM2_LOCK)) {
++			/* Only do this for the first interface on this card */
++			ew32(SWSM2,
++			    swsm2 | E1000_SWSM2_LOCK);
++			force_clear_smbi = true;
++		} else
++			force_clear_smbi = false;
++		break;
++	default:
++		force_clear_smbi = true;
++		break;
++	}
++
++	if (force_clear_smbi) {
++		/* Make sure SWSM.SMBI is clear */
++		swsm = er32(SWSM);
++		if (swsm & E1000_SWSM_SMBI) {
++			/* This bit should not be set on a first interface, and
++			 * indicates that the bootagent or EFI code has
++			 * improperly left this bit enabled
++			 */
++			e_dbg("Please update your 82571 Bootagent\n");
++		}
++		ew32(SWSM, swsm & ~E1000_SWSM_SMBI);
++	}
++
++	/*
++	 * Initialize device specific counter of SMBI acquisition
++	 * timeouts.
++	 */
++	 hw->dev_spec.e82571.smb_counter = 0;
++
++	return 0;
++}
++
++static s32 e1000_get_variants_82571(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	static int global_quad_port_a; /* global port a indication */
++	struct pci_dev *pdev = adapter->pdev;
++	int is_port_b = er32(STATUS) & E1000_STATUS_FUNC_1;
++	s32 rc;
++
++	rc = e1000_init_mac_params_82571(adapter);
++	if (rc)
++		return rc;
++
++	rc = e1000_init_nvm_params_82571(hw);
++	if (rc)
++		return rc;
++
++	rc = e1000_init_phy_params_82571(hw);
++	if (rc)
++		return rc;
++
++	/* tag quad port adapters first, it's used below */
++	switch (pdev->device) {
++	case E1000_DEV_ID_82571EB_QUAD_COPPER:
++	case E1000_DEV_ID_82571EB_QUAD_FIBER:
++	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
++	case E1000_DEV_ID_82571PT_QUAD_COPPER:
++		adapter->flags |= FLAG_IS_QUAD_PORT;
++		/* mark the first port */
++		if (global_quad_port_a == 0)
++			adapter->flags |= FLAG_IS_QUAD_PORT_A;
++		/* Reset for multiple quad port adapters */
++		global_quad_port_a++;
++		if (global_quad_port_a == 4)
++			global_quad_port_a = 0;
++		break;
++	default:
++		break;
++	}
++
++	switch (adapter->hw.mac.type) {
++	case e1000_82571:
++		/* these dual ports don't have WoL on port B at all */
++		if (((pdev->device == E1000_DEV_ID_82571EB_FIBER) ||
++		     (pdev->device == E1000_DEV_ID_82571EB_SERDES) ||
++		     (pdev->device == E1000_DEV_ID_82571EB_COPPER)) &&
++		    (is_port_b))
++			adapter->flags &= ~FLAG_HAS_WOL;
++		/* quad ports only support WoL on port A */
++		if (adapter->flags & FLAG_IS_QUAD_PORT &&
++		    (!(adapter->flags & FLAG_IS_QUAD_PORT_A)))
++			adapter->flags &= ~FLAG_HAS_WOL;
++		/* Does not support WoL on any port */
++		if (pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD)
++			adapter->flags &= ~FLAG_HAS_WOL;
++		break;
++	case e1000_82573:
++		if (pdev->device == E1000_DEV_ID_82573L) {
++			adapter->flags |= FLAG_HAS_JUMBO_FRAMES;
++			adapter->max_hw_frame_size = DEFAULT_JUMBO;
++		}
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_get_phy_id_82571 - Retrieve the PHY ID and revision
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
++ *  revision in the hardware structure.
++ **/
++static s32 e1000_get_phy_id_82571(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 phy_id = 0;
++
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		/*
++		 * The 82571 firmware may still be configuring the PHY.
++		 * In this case, we cannot access the PHY until the
++		 * configuration is done.  So we explicitly set the
++		 * PHY ID.
++		 */
++		phy->id = IGP01E1000_I_PHY_ID;
++		break;
++	case e1000_82573:
++		return e1000e_get_phy_id(hw);
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		ret_val = e1e_rphy(hw, PHY_ID1, &phy_id);
++		if (ret_val)
++			return ret_val;
++
++		phy->id = (u32)(phy_id << 16);
++		udelay(20);
++		ret_val = e1e_rphy(hw, PHY_ID2, &phy_id);
++		if (ret_val)
++			return ret_val;
++
++		phy->id |= (u32)(phy_id);
++		phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
++		break;
++	default:
++		return -E1000_ERR_PHY;
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_get_hw_semaphore_82571 - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ **/
++static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 sw_timeout = hw->nvm.word_size + 1;
++	s32 fw_timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	/*
++	 * If we have timedout 3 times on trying to acquire
++	 * the inter-port SMBI semaphore, there is old code
++	 * operating on the other port, and it is not
++	 * releasing SMBI. Modify the number of times that
++	 * we try for the semaphore to interwork with this
++	 * older code.
++	 */
++	if (hw->dev_spec.e82571.smb_counter > 2)
++		sw_timeout = 1;
++
++	/* Get the SW semaphore */
++	while (i < sw_timeout) {
++		swsm = er32(SWSM);
++		if (!(swsm & E1000_SWSM_SMBI))
++			break;
++
++		udelay(50);
++		i++;
++	}
++
++	if (i == sw_timeout) {
++		e_dbg("Driver can't access device - SMBI bit is set.\n");
++		hw->dev_spec.e82571.smb_counter++;
++	}
++	/* Get the FW semaphore. */
++	for (i = 0; i < fw_timeout; i++) {
++		swsm = er32(SWSM);
++		ew32(SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (er32(SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		udelay(50);
++	}
++
++	if (i == fw_timeout) {
++		/* Release semaphores */
++		e1000_put_hw_semaphore_82571(hw);
++		e_dbg("Driver can't access the NVM\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_put_hw_semaphore_82571 - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ **/
++static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw)
++{
++	u32 swsm;
++
++	swsm = er32(SWSM);
++	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++	ew32(SWSM, swsm);
++}
++/**
++ *  e1000_get_hw_semaphore_82573 - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore during reset.
++ *
++ **/
++static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl;
++	s32 ret_val = 0;
++	s32 i = 0;
++
++	extcnf_ctrl = er32(EXTCNF_CTRL);
++	extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++	do {
++		ew32(EXTCNF_CTRL, extcnf_ctrl);
++		extcnf_ctrl = er32(EXTCNF_CTRL);
++
++		if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP)
++			break;
++
++		extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++
++		usleep_range(2000, 4000);
++		i++;
++	} while (i < MDIO_OWNERSHIP_TIMEOUT);
++
++	if (i == MDIO_OWNERSHIP_TIMEOUT) {
++		/* Release semaphores */
++		e1000_put_hw_semaphore_82573(hw);
++		e_dbg("Driver can't access the PHY\n");
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_put_hw_semaphore_82573 - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used during reset.
++ *
++ **/
++static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl;
++
++	extcnf_ctrl = er32(EXTCNF_CTRL);
++	extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
++	ew32(EXTCNF_CTRL, extcnf_ctrl);
++}
++
++static DEFINE_MUTEX(swflag_mutex);
++
++/**
++ *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM.
++ *
++ **/
++static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	mutex_lock(&swflag_mutex);
++	ret_val = e1000_get_hw_semaphore_82573(hw);
++	if (ret_val)
++		mutex_unlock(&swflag_mutex);
++	return ret_val;
++}
++
++/**
++ *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ *
++ **/
++static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
++{
++	e1000_put_hw_semaphore_82573(hw);
++	mutex_unlock(&swflag_mutex);
++}
++
++/**
++ *  e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.
++ *  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active)
++{
++	u16 data = er32(POEMB);
++
++	if (active)
++		data |= E1000_PHY_CTRL_D0A_LPLU;
++	else
++		data &= ~E1000_PHY_CTRL_D0A_LPLU;
++
++	ew32(POEMB, data);
++	return 0;
++}
++
++/**
++ *  e1000_set_d3_lplu_state_82574 - Sets low power link up state for D3
++ *  @hw: pointer to the HW structure
++ *  @active: boolean used to enable/disable lplu
++ *
++ *  The low power link up (lplu) state is set to the power management level D3
++ *  when active is true, else clear lplu for D3. LPLU
++ *  is used during Dx states where the power conservation is most important.
++ *  During driver activity, SmartSpeed should be enabled so performance is
++ *  maintained.
++ **/
++static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active)
++{
++	u16 data = er32(POEMB);
++
++	if (!active) {
++		data &= ~E1000_PHY_CTRL_NOND0A_LPLU;
++	} else if ((hw->phy.autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++		   (hw->phy.autoneg_advertised == E1000_ALL_NOT_GIG) ||
++		   (hw->phy.autoneg_advertised == E1000_ALL_10_SPEED)) {
++		data |= E1000_PHY_CTRL_NOND0A_LPLU;
++	}
++
++	ew32(POEMB, data);
++	return 0;
++}
++
++/**
++ *  e1000_acquire_nvm_82571 - Request for access to the EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  To gain access to the EEPROM, first we must obtain a hardware semaphore.
++ *  Then for non-82573 hardware, set the EEPROM access request bit and wait
++ *  for EEPROM access grant bit.  If the access grant bit is not set, release
++ *  hardware semaphore.
++ **/
++static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	ret_val = e1000_get_hw_semaphore_82571(hw);
++	if (ret_val)
++		return ret_val;
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++		break;
++	default:
++		ret_val = e1000e_acquire_nvm(hw);
++		break;
++	}
++
++	if (ret_val)
++		e1000_put_hw_semaphore_82571(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_release_nvm_82571 - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
++ **/
++static void e1000_release_nvm_82571(struct e1000_hw *hw)
++{
++	e1000e_release_nvm(hw);
++	e1000_put_hw_semaphore_82571(hw);
++}
++
++/**
++ *  e1000_write_nvm_82571 - Write to EEPROM using appropriate interface
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  For non-82573 silicon, write data to EEPROM at offset using SPI interface.
++ *
++ *  If e1000e_update_nvm_checksum is not called after this function, the
++ *  EEPROM will most likely contain an invalid checksum.
++ **/
++static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words,
++				 u16 *data)
++{
++	s32 ret_val;
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data);
++		break;
++	case e1000_82571:
++	case e1000_82572:
++		ret_val = e1000e_write_nvm_spi(hw, offset, words, data);
++		break;
++	default:
++		ret_val = -E1000_ERR_NVM;
++		break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_update_nvm_checksum_82571 - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	u32 eecd;
++	s32 ret_val;
++	u16 i;
++
++	ret_val = e1000e_update_nvm_checksum_generic(hw);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * If our nvm is an EEPROM, then we're done
++	 * otherwise, commit the checksum to the flash NVM.
++	 */
++	if (hw->nvm.type != e1000_nvm_flash_hw)
++		return ret_val;
++
++	/* Check for pending operations. */
++	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
++		usleep_range(1000, 2000);
++		if ((er32(EECD) & E1000_EECD_FLUPD) == 0)
++			break;
++	}
++
++	if (i == E1000_FLASH_UPDATES)
++		return -E1000_ERR_NVM;
++
++	/* Reset the firmware if using STM opcode. */
++	if ((er32(FLOP) & 0xFF00) == E1000_STM_OPCODE) {
++		/*
++		 * The enabling of and the actual reset must be done
++		 * in two write cycles.
++		 */
++		ew32(HICR, E1000_HICR_FW_RESET_ENABLE);
++		e1e_flush();
++		ew32(HICR, E1000_HICR_FW_RESET);
++	}
++
++	/* Commit the write to flash */
++	eecd = er32(EECD) | E1000_EECD_FLUPD;
++	ew32(EECD, eecd);
++
++	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
++		usleep_range(1000, 2000);
++		if ((er32(EECD) & E1000_EECD_FLUPD) == 0)
++			break;
++	}
++
++	if (i == E1000_FLASH_UPDATES)
++		return -E1000_ERR_NVM;
++
++	return 0;
++}
++
++/**
++ *  e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	if (hw->nvm.type == e1000_nvm_flash_hw)
++		e1000_fix_nvm_checksum_82571(hw);
++
++	return e1000e_validate_nvm_checksum_generic(hw);
++}
++
++/**
++ *  e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  After checking for invalid values, poll the EEPROM to ensure the previous
++ *  command has completed before trying to write the next word.  After write
++ *  poll for completion.
++ *
++ *  If e1000e_update_nvm_checksum is not called after this function, the
++ *  EEPROM will most likely contain an invalid checksum.
++ **/
++static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
++				      u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, eewr = 0;
++	s32 ret_val = 0;
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		e_dbg("nvm parameter(s) out of bounds\n");
++		return -E1000_ERR_NVM;
++	}
++
++	for (i = 0; i < words; i++) {
++		eewr = (data[i] << E1000_NVM_RW_REG_DATA) |
++		       ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
++		       E1000_NVM_RW_REG_START;
++
++		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
++		if (ret_val)
++			break;
++
++		ew32(EEWR, eewr);
++
++		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
++		if (ret_val)
++			break;
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cfg_done_82571 - Poll for configuration done
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the management control register for the config done bit to be set.
++ **/
++static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw)
++{
++	s32 timeout = PHY_CFG_TIMEOUT;
++
++	while (timeout) {
++		if (er32(EEMNGCTL) &
++		    E1000_NVM_CFG_DONE_PORT_0)
++			break;
++		usleep_range(1000, 2000);
++		timeout--;
++	}
++	if (!timeout) {
++		e_dbg("MNG configuration cycle has not completed.\n");
++		return -E1000_ERR_RESET;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When activating LPLU
++ *  this function also disables smart speed and vice versa.  LPLU will not be
++ *  activated unless the device autonegotiation advertisement meets standards
++ *  of either 10 or 10/100 or 10/100/1000 at all duplexes.  This is a function
++ *  pointer entry point only called by PHY setup routines.
++ **/
++static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data);
++	if (ret_val)
++		return ret_val;
++
++	if (active) {
++		data |= IGP02E1000_PM_D0_LPLU;
++		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
++		if (ret_val)
++			return ret_val;
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
++		if (ret_val)
++			return ret_val;
++	} else {
++		data &= ~IGP02E1000_PM_D0_LPLU;
++		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_reset_hw_82571 - Reset hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This resets the hardware into a known state.
++ **/
++static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
++{
++	u32 ctrl, ctrl_ext;
++	s32 ret_val;
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000e_disable_pcie_master(hw);
++	if (ret_val)
++		e_dbg("PCI-E Master disable polling has failed.\n");
++
++	e_dbg("Masking off all interrupts\n");
++	ew32(IMC, 0xffffffff);
++
++	ew32(RCTL, 0);
++	ew32(TCTL, E1000_TCTL_PSP);
++	e1e_flush();
++
++	usleep_range(10000, 20000);
++
++	/*
++	 * Must acquire the MDIO ownership before MAC reset.
++	 * Ownership defaults to firmware after a reset.
++	 */
++	switch (hw->mac.type) {
++	case e1000_82573:
++		ret_val = e1000_get_hw_semaphore_82573(hw);
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		ret_val = e1000_get_hw_semaphore_82574(hw);
++		break;
++	default:
++		break;
++	}
++	if (ret_val)
++		e_dbg("Cannot acquire MDIO ownership\n");
++
++	ctrl = er32(CTRL);
++
++	e_dbg("Issuing a global reset to MAC\n");
++	ew32(CTRL, ctrl | E1000_CTRL_RST);
++
++	/* Must release MDIO ownership and mutex after MAC reset. */
++	switch (hw->mac.type) {
++	case e1000_82574:
++	case e1000_82583:
++		e1000_put_hw_semaphore_82574(hw);
++		break;
++	default:
++		break;
++	}
++
++	if (hw->nvm.type == e1000_nvm_flash_hw) {
++		udelay(10);
++		ctrl_ext = er32(CTRL_EXT);
++		ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++		ew32(CTRL_EXT, ctrl_ext);
++		e1e_flush();
++	}
++
++	ret_val = e1000e_get_auto_rd_done(hw);
++	if (ret_val)
++		/* We don't want to continue accessing MAC registers. */
++		return ret_val;
++
++	/*
++	 * Phy configuration from NVM just starts after EECD_AUTO_RD is set.
++	 * Need to wait for Phy configuration completion before accessing
++	 * NVM and Phy.
++	 */
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		msleep(25);
++		break;
++	default:
++		break;
++	}
++
++	/* Clear any pending interrupt events. */
++	ew32(IMC, 0xffffffff);
++	er32(ICR);
++
++	if (hw->mac.type == e1000_82571) {
++		/* Install any alternate MAC address into RAR0 */
++		ret_val = e1000_check_alt_mac_addr_generic(hw);
++		if (ret_val)
++			return ret_val;
++
++		e1000e_set_laa_state_82571(hw, true);
++	}
++
++	/* Reinitialize the 82571 serdes link state machine */
++	if (hw->phy.media_type == e1000_media_type_internal_serdes)
++		hw->mac.serdes_link_state = e1000_serdes_link_down;
++
++	return 0;
++}
++
++/**
++ *  e1000_init_hw_82571 - Initialize hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  This inits the hardware readying it for operation.
++ **/
++static s32 e1000_init_hw_82571(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 reg_data;
++	s32 ret_val;
++	u16 i, rar_count = mac->rar_entry_count;
++
++	e1000_initialize_hw_bits_82571(hw);
++
++	/* Initialize identification LED */
++	ret_val = e1000e_id_led_init(hw);
++	if (ret_val)
++		e_dbg("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++
++	/* Disabling VLAN filtering */
++	e_dbg("Initializing the IEEE VLAN\n");
++	mac->ops.clear_vfta(hw);
++
++	/* Setup the receive address. */
++	/*
++	 * If, however, a locally administered address was assigned to the
++	 * 82571, we must reserve a RAR for it to work around an issue where
++	 * resetting one port will reload the MAC on the other port.
++	 */
++	if (e1000e_get_laa_state_82571(hw))
++		rar_count--;
++	e1000e_init_rx_addrs(hw, rar_count);
++
++	/* Zero out the Multicast HASH table */
++	e_dbg("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link_82571(hw);
++
++	/* Set the transmit descriptor write-back policy */
++	reg_data = er32(TXDCTL(0));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++		   E1000_TXDCTL_FULL_TX_DESC_WB |
++		   E1000_TXDCTL_COUNT_DESC;
++	ew32(TXDCTL(0), reg_data);
++
++	/* ...for both queues. */
++	switch (mac->type) {
++	case e1000_82573:
++		e1000e_enable_tx_pkt_filtering(hw);
++		/* fall through */
++	case e1000_82574:
++	case e1000_82583:
++		reg_data = er32(GCR);
++		reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
++		ew32(GCR, reg_data);
++		break;
++	default:
++		reg_data = er32(TXDCTL(1));
++		reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++			   E1000_TXDCTL_FULL_TX_DESC_WB |
++			   E1000_TXDCTL_COUNT_DESC;
++		ew32(TXDCTL(1), reg_data);
++		break;
++	}
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_82571(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes required hardware-dependent bits needed for normal operation.
++ **/
++static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	/* Transmit Descriptor Control 0 */
++	reg = er32(TXDCTL(0));
++	reg |= (1 << 22);
++	ew32(TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = er32(TXDCTL(1));
++	reg |= (1 << 22);
++	ew32(TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = er32(TARC(0));
++	reg &= ~(0xF << 27); /* 30:27 */
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		reg |= (1 << 23) | (1 << 24) | (1 << 25) | (1 << 26);
++		break;
++	default:
++		break;
++	}
++	ew32(TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = er32(TARC(1));
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		reg &= ~((1 << 29) | (1 << 30));
++		reg |= (1 << 22) | (1 << 24) | (1 << 25) | (1 << 26);
++		if (er32(TCTL) & E1000_TCTL_MULR)
++			reg &= ~(1 << 28);
++		else
++			reg |= (1 << 28);
++		ew32(TARC(1), reg);
++		break;
++	default:
++		break;
++	}
++
++	/* Device Control */
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		reg = er32(CTRL);
++		reg &= ~(1 << 29);
++		ew32(CTRL, reg);
++		break;
++	default:
++		break;
++	}
++
++	/* Extended Device Control */
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		reg = er32(CTRL_EXT);
++		reg &= ~(1 << 23);
++		reg |= (1 << 22);
++		ew32(CTRL_EXT, reg);
++		break;
++	default:
++		break;
++	}
++
++	if (hw->mac.type == e1000_82571) {
++		reg = er32(PBA_ECC);
++		reg |= E1000_PBA_ECC_CORR_EN;
++		ew32(PBA_ECC, reg);
++	}
++	/*
++	 * Workaround for hardware errata.
++	 * Ensure that DMA Dynamic Clock gating is disabled on 82571 and 82572
++	 */
++
++        if ((hw->mac.type == e1000_82571) ||
++           (hw->mac.type == e1000_82572)) {
++                reg = er32(CTRL_EXT);
++                reg &= ~E1000_CTRL_EXT_DMA_DYN_CLK_EN;
++                ew32(CTRL_EXT, reg);
++        }
++
++
++	/* PCI-Ex Control Registers */
++	switch (hw->mac.type) {
++	case e1000_82574:
++	case e1000_82583:
++		reg = er32(GCR);
++		reg |= (1 << 22);
++		ew32(GCR, reg);
++
++		/*
++		 * Workaround for hardware errata.
++		 * apply workaround for hardware errata documented in errata
++		 * docs Fixes issue where some error prone or unreliable PCIe
++		 * completions are occurring, particularly with ASPM enabled.
++		 * Without fix, issue can cause Tx timeouts.
++		 */
++		reg = er32(GCR2);
++		reg |= 1;
++		ew32(GCR2, reg);
++		break;
++	default:
++		break;
++	}
++}
++
++/**
++ *  e1000_clear_vfta_82571 - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++static void e1000_clear_vfta_82571(struct e1000_hw *hw)
++{
++	u32 offset;
++	u32 vfta_value = 0;
++	u32 vfta_offset = 0;
++	u32 vfta_bit_in_reg = 0;
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		if (hw->mng_cookie.vlan_id != 0) {
++			/*
++			 * The VFTA is a 4096b bit-field, each identifying
++			 * a single VLAN ID.  The following operations
++			 * determine which 32b entry (i.e. offset) into the
++			 * array we want to set the VLAN ID (i.e. bit) of
++			 * the manageability unit.
++			 */
++			vfta_offset = (hw->mng_cookie.vlan_id >>
++				       E1000_VFTA_ENTRY_SHIFT) &
++				      E1000_VFTA_ENTRY_MASK;
++			vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id &
++					       E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
++		}
++		break;
++	default:
++		break;
++	}
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		/*
++		 * If the offset we want to clear is the same offset of the
++		 * manageability VLAN ID, then clear all bits except that of
++		 * the manageability unit.
++		 */
++		vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value);
++		e1e_flush();
++	}
++}
++
++/**
++ *  e1000_check_mng_mode_82574 - Check manageability is enabled
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the NVM Initialization Control Word 2 and returns true
++ *  (>0) if any manageability is enabled, else false (0).
++ **/
++static bool e1000_check_mng_mode_82574(struct e1000_hw *hw)
++{
++	u16 data;
++
++	e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data);
++	return (data & E1000_NVM_INIT_CTRL2_MNGM) != 0;
++}
++
++/**
++ *  e1000_led_on_82574 - Turn LED on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED on.
++ **/
++static s32 e1000_led_on_82574(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	u32 i;
++
++	ctrl = hw->mac.ledctl_mode2;
++	if (!(E1000_STATUS_LU & er32(STATUS))) {
++		/*
++		 * If no link, then turn LED on by setting the invert bit
++		 * for each LED that's "on" (0x0E) in ledctl_mode2.
++		 */
++		for (i = 0; i < 4; i++)
++			if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) ==
++			    E1000_LEDCTL_MODE_LED_ON)
++				ctrl |= (E1000_LEDCTL_LED0_IVRT << (i * 8));
++	}
++	ew32(LEDCTL, ctrl);
++
++	return 0;
++}
++
++/**
++ *  e1000_check_phy_82574 - check 82574 phy hung state
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns whether phy is hung or not
++ **/
++bool e1000_check_phy_82574(struct e1000_hw *hw)
++{
++	u16 status_1kbt = 0;
++	u16 receive_errors = 0;
++	bool phy_hung = false;
++	s32 ret_val = 0;
++
++	/*
++	 * Read PHY Receive Error counter first, if its is max - all F's then
++	 * read the Base1000T status register If both are max then PHY is hung.
++	 */
++	ret_val = e1e_rphy(hw, E1000_RECEIVE_ERROR_COUNTER, &receive_errors);
++
++	if (ret_val)
++		goto out;
++	if (receive_errors == E1000_RECEIVE_ERROR_MAX)  {
++		ret_val = e1e_rphy(hw, E1000_BASE1000T_STATUS, &status_1kbt);
++		if (ret_val)
++			goto out;
++		if ((status_1kbt & E1000_IDLE_ERROR_COUNT_MASK) ==
++		    E1000_IDLE_ERROR_COUNT_MASK)
++			phy_hung = true;
++	}
++out:
++	return phy_hung;
++}
++
++/**
++ *  e1000_setup_link_82571 - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++static s32 e1000_setup_link_82571(struct e1000_hw *hw)
++{
++	/*
++	 * 82573 does not have a word in the NVM to determine
++	 * the default flow control setting, so we explicitly
++	 * set it to full.
++	 */
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		if (hw->fc.requested_mode == e1000_fc_default)
++			hw->fc.requested_mode = e1000_fc_full;
++		break;
++	default:
++		break;
++	}
++
++	return e1000e_setup_link(hw);
++}
++
++/**
++ *  e1000_setup_copper_link_82571 - Configure copper link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
++ *  for link, once link is established calls to configure collision distance
++ *  and flow control are called.
++ **/
++static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ew32(CTRL, ctrl);
++
++	switch (hw->phy.type) {
++	case e1000_phy_m88:
++	case e1000_phy_bm:
++		ret_val = e1000e_copper_link_setup_m88(hw);
++		break;
++	case e1000_phy_igp_2:
++		ret_val = e1000e_copper_link_setup_igp(hw);
++		break;
++	default:
++		return -E1000_ERR_PHY;
++		break;
++	}
++
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000e_setup_copper_link(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures collision distance and flow control for fiber and serdes links.
++ *  Upon successful setup, poll for link.
++ **/
++static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw)
++{
++	switch (hw->mac.type) {
++	case e1000_82571:
++	case e1000_82572:
++		/*
++		 * If SerDes loopback mode is entered, there is no form
++		 * of reset to take the adapter out of that mode.  So we
++		 * have to explicitly take the adapter out of loopback
++		 * mode.  This prevents drivers from twiddling their thumbs
++		 * if another tool failed to take it out of loopback mode.
++		 */
++		ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
++		break;
++	default:
++		break;
++	}
++
++	return e1000e_setup_fiber_serdes_link(hw);
++}
++
++/**
++ *  e1000_check_for_serdes_link_82571 - Check for link (Serdes)
++ *  @hw: pointer to the HW structure
++ *
++ *  Reports the link state as up or down.
++ *
++ *  If autonegotiation is supported by the link partner, the link state is
++ *  determined by the result of autonegotiation. This is the most likely case.
++ *  If autonegotiation is not supported by the link partner, and the link
++ *  has a valid signal, force the link up.
++ *
++ *  The link state is represented internally here by 4 states:
++ *
++ *  1) down
++ *  2) autoneg_progress
++ *  3) autoneg_complete (the link successfully autonegotiated)
++ *  4) forced_up (the link has been forced up, it did not autonegotiate)
++ *
++ **/
++static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw;
++	u32 ctrl;
++	u32 status;
++	u32 txcw;
++	u32 i;
++	s32 ret_val = 0;
++
++	ctrl = er32(CTRL);
++	status = er32(STATUS);
++	rxcw = er32(RXCW);
++
++	if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
++
++		/* Receiver is synchronized with no invalid bits.  */
++		switch (mac->serdes_link_state) {
++		case e1000_serdes_link_autoneg_complete:
++			if (!(status & E1000_STATUS_LU)) {
++				/*
++				 * We have lost link, retry autoneg before
++				 * reporting link failure
++				 */
++				mac->serdes_link_state =
++				    e1000_serdes_link_autoneg_progress;
++				mac->serdes_has_link = false;
++				e_dbg("AN_UP     -> AN_PROG\n");
++			} else {
++				mac->serdes_has_link = true;
++			}
++			break;
++
++		case e1000_serdes_link_forced_up:
++			/*
++			 * If we are receiving /C/ ordered sets, re-enable
++			 * auto-negotiation in the TXCW register and disable
++			 * forced link in the Device Control register in an
++			 * attempt to auto-negotiate with our link partner.
++			 * If the partner code word is null, stop forcing
++			 * and restart auto negotiation.
++			 */
++			if ((rxcw & E1000_RXCW_C) || !(rxcw & E1000_RXCW_CW))  {
++				/* Enable autoneg, and unforce link up */
++				ew32(TXCW, mac->txcw);
++				ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
++				mac->serdes_link_state =
++				    e1000_serdes_link_autoneg_progress;
++				mac->serdes_has_link = false;
++				e_dbg("FORCED_UP -> AN_PROG\n");
++			} else {
++				mac->serdes_has_link = true;
++			}
++			break;
++
++		case e1000_serdes_link_autoneg_progress:
++			if (rxcw & E1000_RXCW_C) {
++				/*
++				 * We received /C/ ordered sets, meaning the
++				 * link partner has autonegotiated, and we can
++				 * trust the Link Up (LU) status bit.
++				 */
++				if (status & E1000_STATUS_LU) {
++					mac->serdes_link_state =
++					    e1000_serdes_link_autoneg_complete;
++					e_dbg("AN_PROG   -> AN_UP\n");
++					mac->serdes_has_link = true;
++				} else {
++					/* Autoneg completed, but failed. */
++					mac->serdes_link_state =
++					    e1000_serdes_link_down;
++					e_dbg("AN_PROG   -> DOWN\n");
++				}
++			} else {
++				/*
++				 * The link partner did not autoneg.
++				 * Force link up and full duplex, and change
++				 * state to forced.
++				 */
++				ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++				ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++				ew32(CTRL, ctrl);
++
++				/* Configure Flow Control after link up. */
++				ret_val = e1000e_config_fc_after_link_up(hw);
++				if (ret_val) {
++					e_dbg("Error config flow control\n");
++					break;
++				}
++				mac->serdes_link_state =
++				    e1000_serdes_link_forced_up;
++				mac->serdes_has_link = true;
++				e_dbg("AN_PROG   -> FORCED_UP\n");
++			}
++			break;
++
++		case e1000_serdes_link_down:
++		default:
++			/*
++			 * The link was down but the receiver has now gained
++			 * valid sync, so lets see if we can bring the link
++			 * up.
++			 */
++			ew32(TXCW, mac->txcw);
++			ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
++			mac->serdes_link_state =
++			    e1000_serdes_link_autoneg_progress;
++			mac->serdes_has_link = false;
++			e_dbg("DOWN      -> AN_PROG\n");
++			break;
++		}
++	} else {
++		if (!(rxcw & E1000_RXCW_SYNCH)) {
++			mac->serdes_has_link = false;
++			mac->serdes_link_state = e1000_serdes_link_down;
++			e_dbg("ANYSTATE  -> DOWN\n");
++		} else {
++			/*
++			 * Check several times, if Sync and Config
++			 * both are consistently 1 then simply ignore
++			 * the Invalid bit and restart Autoneg
++			 */
++			for (i = 0; i < AN_RETRY_COUNT; i++) {
++				udelay(10);
++				rxcw = er32(RXCW);
++				if ((rxcw & E1000_RXCW_IV) &&
++				    !((rxcw & E1000_RXCW_SYNCH) &&
++				      (rxcw & E1000_RXCW_C))) {
++					mac->serdes_has_link = false;
++					mac->serdes_link_state =
++					    e1000_serdes_link_down;
++					e_dbg("ANYSTATE  -> DOWN\n");
++					break;
++				}
++			}
++
++			if (i == AN_RETRY_COUNT) {
++				txcw = er32(TXCW);
++				txcw |= E1000_TXCW_ANE;
++				ew32(TXCW, txcw);
++				mac->serdes_link_state =
++				    e1000_serdes_link_autoneg_progress;
++				mac->serdes_has_link = false;
++				e_dbg("ANYSTATE  -> AN_PROG\n");
++			}
++		}
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_valid_led_default_82571 - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		return ret_val;
++	}
++
++	switch (hw->mac.type) {
++	case e1000_82573:
++	case e1000_82574:
++	case e1000_82583:
++		if (*data == ID_LED_RESERVED_F746)
++			*data = ID_LED_DEFAULT_82573;
++		break;
++	default:
++		if (*data == ID_LED_RESERVED_0000 ||
++		    *data == ID_LED_RESERVED_FFFF)
++			*data = ID_LED_DEFAULT;
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_get_laa_state_82571 - Get locally administered address state
++ *  @hw: pointer to the HW structure
++ *
++ *  Retrieve and return the current locally administered address state.
++ **/
++bool e1000e_get_laa_state_82571(struct e1000_hw *hw)
++{
++	if (hw->mac.type != e1000_82571)
++		return false;
++
++	return hw->dev_spec.e82571.laa_is_present;
++}
++
++/**
++ *  e1000e_set_laa_state_82571 - Set locally administered address state
++ *  @hw: pointer to the HW structure
++ *  @state: enable/disable locally administered address
++ *
++ *  Enable/Disable the current locally administered address state.
++ **/
++void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state)
++{
++	if (hw->mac.type != e1000_82571)
++		return;
++
++	hw->dev_spec.e82571.laa_is_present = state;
++
++	/* If workaround is activated... */
++	if (state)
++		/*
++		 * Hold a copy of the LAA in RAR[14] This is done so that
++		 * between the time RAR[0] gets clobbered and the time it
++		 * gets fixed, the actual LAA is in one of the RARs and no
++		 * incoming packets directed to this port are dropped.
++		 * Eventually the LAA will be in RAR[0] and RAR[14].
++		 */
++		e1000e_rar_set(hw, hw->mac.addr, hw->mac.rar_entry_count - 1);
++}
++
++/**
++ *  e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Verifies that the EEPROM has completed the update.  After updating the
++ *  EEPROM, we need to check bit 15 in work 0x23 for the checksum fix.  If
++ *  the checksum fix is not implemented, we need to set the bit and update
++ *  the checksum.  Otherwise, if bit 15 is set and the checksum is incorrect,
++ *  we need to return bad checksum.
++ **/
++static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32 ret_val;
++	u16 data;
++
++	if (nvm->type != e1000_nvm_flash_hw)
++		return 0;
++
++	/*
++	 * Check bit 4 of word 10h.  If it is 0, firmware is done updating
++	 * 10h-12h.  Checksum may need to be fixed.
++	 */
++	ret_val = e1000_read_nvm(hw, 0x10, 1, &data);
++	if (ret_val)
++		return ret_val;
++
++	if (!(data & 0x10)) {
++		/*
++		 * Read 0x23 and check bit 15.  This bit is a 1
++		 * when the checksum has already been fixed.  If
++		 * the checksum is still wrong and this bit is a
++		 * 1, we need to return bad checksum.  Otherwise,
++		 * we need to set this bit to a 1 and update the
++		 * checksum.
++		 */
++		ret_val = e1000_read_nvm(hw, 0x23, 1, &data);
++		if (ret_val)
++			return ret_val;
++
++		if (!(data & 0x8000)) {
++			data |= 0x8000;
++			ret_val = e1000_write_nvm(hw, 0x23, 1, &data);
++			if (ret_val)
++				return ret_val;
++			ret_val = e1000e_update_nvm_checksum(hw);
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_read_mac_addr_82571 - Read device MAC address
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	if (hw->mac.type == e1000_82571) {
++		/*
++		 * If there's an alternate MAC address place it in RAR0
++		 * so that it will override the Si installed default perm
++		 * address.
++		 */
++		ret_val = e1000_check_alt_mac_addr_generic(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	ret_val = e1000_read_mac_addr_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_82571 - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	struct e1000_mac_info *mac = &hw->mac;
++
++	if (!(phy->ops.check_reset_block))
++		return;
++
++	/* If the management interface is not enabled, then power down */
++	if (!(mac->ops.check_mng_mode(hw) || phy->ops.check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++}
++
++/**
++ *  e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw)
++{
++	e1000e_clear_hw_cntrs_base(hw);
++
++	er32(PRC64);
++	er32(PRC127);
++	er32(PRC255);
++	er32(PRC511);
++	er32(PRC1023);
++	er32(PRC1522);
++	er32(PTC64);
++	er32(PTC127);
++	er32(PTC255);
++	er32(PTC511);
++	er32(PTC1023);
++	er32(PTC1522);
++
++	er32(ALGNERRC);
++	er32(RXERRC);
++	er32(TNCRS);
++	er32(CEXTERR);
++	er32(TSCTC);
++	er32(TSCTFC);
++
++	er32(MGTPRC);
++	er32(MGTPDC);
++	er32(MGTPTC);
++
++	er32(IAC);
++	er32(ICRXOC);
++
++	er32(ICRXPTC);
++	er32(ICRXATC);
++	er32(ICTXPTC);
++	er32(ICTXATC);
++	er32(ICTXQEC);
++	er32(ICTXQMTC);
++	er32(ICRXDMTC);
++}
++
++static const struct e1000_mac_operations e82571_mac_ops = {
++	/* .check_mng_mode: mac type dependent */
++	/* .check_for_link: media type dependent */
++	.id_led_init		= e1000e_id_led_init,
++	.cleanup_led		= e1000e_cleanup_led_generic,
++	.clear_hw_cntrs		= e1000_clear_hw_cntrs_82571,
++	.get_bus_info		= e1000e_get_bus_info_pcie,
++	.set_lan_id		= e1000_set_lan_id_multi_port_pcie,
++	/* .get_link_up_info: media type dependent */
++	/* .led_on: mac type dependent */
++	.led_off		= e1000e_led_off_generic,
++	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
++	.write_vfta		= e1000_write_vfta_generic,
++	.clear_vfta		= e1000_clear_vfta_82571,
++	.reset_hw		= e1000_reset_hw_82571,
++	.init_hw		= e1000_init_hw_82571,
++	.setup_link		= e1000_setup_link_82571,
++	/* .setup_physical_interface: media type dependent */
++	.setup_led		= e1000e_setup_led_generic,
++	.read_mac_addr		= e1000_read_mac_addr_82571,
++};
++
++static const struct e1000_phy_operations e82_phy_ops_igp = {
++	.acquire		= e1000_get_hw_semaphore_82571,
++	.check_polarity		= e1000_check_polarity_igp,
++	.check_reset_block	= e1000e_check_reset_block_generic,
++	.commit			= NULL,
++	.force_speed_duplex	= e1000e_phy_force_speed_duplex_igp,
++	.get_cfg_done		= e1000_get_cfg_done_82571,
++	.get_cable_length	= e1000e_get_cable_length_igp_2,
++	.get_info		= e1000e_get_phy_info_igp,
++	.read_reg		= e1000e_read_phy_reg_igp,
++	.release		= e1000_put_hw_semaphore_82571,
++	.reset			= e1000e_phy_hw_reset_generic,
++	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
++	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
++	.write_reg		= e1000e_write_phy_reg_igp,
++	.cfg_on_link_up      	= NULL,
++};
++
++static const struct e1000_phy_operations e82_phy_ops_m88 = {
++	.acquire		= e1000_get_hw_semaphore_82571,
++	.check_polarity		= e1000_check_polarity_m88,
++	.check_reset_block	= e1000e_check_reset_block_generic,
++	.commit			= e1000e_phy_sw_reset,
++	.force_speed_duplex	= e1000e_phy_force_speed_duplex_m88,
++	.get_cfg_done		= e1000e_get_cfg_done,
++	.get_cable_length	= e1000e_get_cable_length_m88,
++	.get_info		= e1000e_get_phy_info_m88,
++	.read_reg		= e1000e_read_phy_reg_m88,
++	.release		= e1000_put_hw_semaphore_82571,
++	.reset			= e1000e_phy_hw_reset_generic,
++	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
++	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
++	.write_reg		= e1000e_write_phy_reg_m88,
++	.cfg_on_link_up      	= NULL,
++};
++
++static const struct e1000_phy_operations e82_phy_ops_bm = {
++	.acquire		= e1000_get_hw_semaphore_82571,
++	.check_polarity		= e1000_check_polarity_m88,
++	.check_reset_block	= e1000e_check_reset_block_generic,
++	.commit			= e1000e_phy_sw_reset,
++	.force_speed_duplex	= e1000e_phy_force_speed_duplex_m88,
++	.get_cfg_done		= e1000e_get_cfg_done,
++	.get_cable_length	= e1000e_get_cable_length_m88,
++	.get_info		= e1000e_get_phy_info_m88,
++	.read_reg		= e1000e_read_phy_reg_bm2,
++	.release		= e1000_put_hw_semaphore_82571,
++	.reset			= e1000e_phy_hw_reset_generic,
++	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
++	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
++	.write_reg		= e1000e_write_phy_reg_bm2,
++	.cfg_on_link_up      	= NULL,
++};
++
++static const struct e1000_nvm_operations e82571_nvm_ops = {
++	.acquire		= e1000_acquire_nvm_82571,
++	.read			= e1000e_read_nvm_eerd,
++	.release		= e1000_release_nvm_82571,
++	.update			= e1000_update_nvm_checksum_82571,
++	.valid_led_default	= e1000_valid_led_default_82571,
++	.validate		= e1000_validate_nvm_checksum_82571,
++	.write			= e1000_write_nvm_82571,
++};
++
++const struct e1000_info e1000_82571_info = {
++	.mac			= e1000_82571,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_SMART_POWER_DOWN
++				  | FLAG_RESET_OVERWRITES_LAA /* errata */
++				  | FLAG_TARC_SPEED_MODE_BIT /* errata */
++				  | FLAG_APME_CHECK_PORT_B,
++	.flags2			= FLAG2_DISABLE_ASPM_L1 /* errata 13 */
++				  | FLAG2_DMA_BURST,
++	.pba			= 38,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_82571,
++	.mac_ops		= &e82571_mac_ops,
++	.phy_ops		= &e82_phy_ops_igp,
++	.nvm_ops		= &e82571_nvm_ops,
++};
++
++const struct e1000_info e1000_82572_info = {
++	.mac			= e1000_82572,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_TARC_SPEED_MODE_BIT, /* errata */
++	.flags2			= FLAG2_DISABLE_ASPM_L1 /* errata 13 */
++				  | FLAG2_DMA_BURST,
++	.pba			= 38,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_82571,
++	.mac_ops		= &e82571_mac_ops,
++	.phy_ops		= &e82_phy_ops_igp,
++	.nvm_ops		= &e82571_nvm_ops,
++};
++
++const struct e1000_info e1000_82573_info = {
++	.mac			= e1000_82573,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_SMART_POWER_DOWN
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_SWSM_ON_LOAD,
++	.flags2			= FLAG2_DISABLE_ASPM_L1
++				  | FLAG2_DISABLE_ASPM_L0S,
++	.pba			= 20,
++	.max_hw_frame_size	= ETH_FRAME_LEN + ETH_FCS_LEN,
++	.get_variants		= e1000_get_variants_82571,
++	.mac_ops		= &e82571_mac_ops,
++	.phy_ops		= &e82_phy_ops_m88,
++	.nvm_ops		= &e82571_nvm_ops,
++};
++
++const struct e1000_info e1000_82574_info = {
++	.mac			= e1000_82574,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_MSIX
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_SMART_POWER_DOWN
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_CTRLEXT_ON_LOAD,
++	.flags2			  = FLAG2_CHECK_PHY_HANG
++				  | FLAG2_DISABLE_ASPM_L0S
++				  | FLAG2_NO_DISABLE_RX,
++	.pba			= 32,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_82571,
++	.mac_ops		= &e82571_mac_ops,
++	.phy_ops		= &e82_phy_ops_bm,
++	.nvm_ops		= &e82571_nvm_ops,
++};
++
++const struct e1000_info e1000_82583_info = {
++	.mac			= e1000_82583,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_SMART_POWER_DOWN
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_HAS_CTRLEXT_ON_LOAD,
++	.flags2			= FLAG2_DISABLE_ASPM_L0S
++				  | FLAG2_NO_DISABLE_RX,
++	.pba			= 32,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_82571,
++	.mac_ops		= &e82571_mac_ops,
++	.phy_ops		= &e82_phy_ops_bm,
++	.nvm_ops		= &e82571_nvm_ops,
++};
++
+--- linux/drivers/xenomai/net/drivers/e1000e/lib.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/lib.c	2021-04-07 16:01:27.214634185 +0800
+@@ -0,0 +1,2693 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include "e1000.h"
++
++enum e1000_mng_mode {
++	e1000_mng_mode_none = 0,
++	e1000_mng_mode_asf,
++	e1000_mng_mode_pt,
++	e1000_mng_mode_ipmi,
++	e1000_mng_mode_host_if_only
++};
++
++#define E1000_FACTPS_MNGCG		0x20000000
++
++/* Intel(R) Active Management Technology signature */
++#define E1000_IAMT_SIGNATURE		0x544D4149
++
++/**
++ *  e1000e_get_bus_info_pcie - Get PCIe bus information
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines and stores the system bus information for a particular
++ *  network interface.  The following bus information is determined and stored:
++ *  bus speed, bus width, type (PCIe), and PCIe function.
++ **/
++s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_bus_info *bus = &hw->bus;
++	struct e1000_adapter *adapter = hw->adapter;
++	u16 pcie_link_status, cap_offset;
++
++	cap_offset = pci_pcie_cap(adapter->pdev);
++	if (!cap_offset) {
++		bus->width = e1000_bus_width_unknown;
++	} else {
++		pci_read_config_word(adapter->pdev,
++				     cap_offset + PCIE_LINK_STATUS,
++				     &pcie_link_status);
++		bus->width = (enum e1000_bus_width)((pcie_link_status &
++						     PCIE_LINK_WIDTH_MASK) >>
++						    PCIE_LINK_WIDTH_SHIFT);
++	}
++
++	mac->ops.set_lan_id(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
++ *
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines the LAN function id by reading memory-mapped registers
++ *  and swaps the port value if requested.
++ **/
++void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	u32 reg;
++
++	/*
++	 * The status register reports the correct function number
++	 * for the device regardless of function swap state.
++	 */
++	reg = er32(STATUS);
++	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
++}
++
++/**
++ *  e1000_set_lan_id_single_port - Set LAN id for a single port device
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the LAN function id to zero for a single port device.
++ **/
++void e1000_set_lan_id_single_port(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++
++	bus->func = 0;
++}
++
++/**
++ *  e1000_clear_vfta_generic - Clear VLAN filter table
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the register array which contains the VLAN filter table by
++ *  setting all the values to 0.
++ **/
++void e1000_clear_vfta_generic(struct e1000_hw *hw)
++{
++	u32 offset;
++
++	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
++		e1e_flush();
++	}
++}
++
++/**
++ *  e1000_write_vfta_generic - Write value to VLAN filter table
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset in VLAN filter table
++ *  @value: register value written to VLAN filter table
++ *
++ *  Writes value at the given offset in the register array which stores
++ *  the VLAN filter table.
++ **/
++void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
++{
++	E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
++	e1e_flush();
++}
++
++/**
++ *  e1000e_init_rx_addrs - Initialize receive address's
++ *  @hw: pointer to the HW structure
++ *  @rar_count: receive address registers
++ *
++ *  Setup the receive address registers by setting the base receive address
++ *  register to the devices MAC address and clearing all the other receive
++ *  address registers to 0.
++ **/
++void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count)
++{
++	u32 i;
++	u8 mac_addr[ETH_ALEN] = {0};
++
++	/* Setup the receive address */
++	e_dbg("Programming MAC Address into RAR[0]\n");
++
++	e1000e_rar_set(hw, hw->mac.addr, 0);
++
++	/* Zero out the other (rar_entry_count - 1) receive addresses */
++	e_dbg("Clearing RAR[1-%u]\n", rar_count-1);
++	for (i = 1; i < rar_count; i++)
++		e1000e_rar_set(hw, mac_addr, i);
++}
++
++/**
++ *  e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
++ *  can be setup by pre-boot software and must be treated like a permanent
++ *  address and must override the actual permanent MAC address. If an
++ *  alternate MAC address is found it is programmed into RAR0, replacing
++ *  the permanent address that was installed into RAR0 by the Si on reset.
++ *  This function will return SUCCESS unless it encounters an error while
++ *  reading the EEPROM.
++ **/
++s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
++{
++	u32 i;
++	s32 ret_val = 0;
++	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
++	u8 alt_mac_addr[ETH_ALEN];
++
++	ret_val = e1000_read_nvm(hw, NVM_COMPAT, 1, &nvm_data);
++	if (ret_val)
++		goto out;
++
++	/* Check for LOM (vs. NIC) or one of two valid mezzanine cards */
++	if (!((nvm_data & NVM_COMPAT_LOM) ||
++	      (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_DUAL) ||
++	      (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD) ||
++	      (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES)))
++		goto out;
++
++	ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1,
++	                         &nvm_alt_mac_addr_offset);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
++	    (nvm_alt_mac_addr_offset == 0x0000))
++		/* There is no Alternate MAC Address */
++		goto out;
++
++	if (hw->bus.func == E1000_FUNC_1)
++		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
++	for (i = 0; i < ETH_ALEN; i += 2) {
++		offset = nvm_alt_mac_addr_offset + (i >> 1);
++		ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data);
++		if (ret_val) {
++			e_dbg("NVM Read Error\n");
++			goto out;
++		}
++
++		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
++		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
++	}
++
++	/* if multicast bit is set, the alternate address will not be used */
++	if (is_multicast_ether_addr(alt_mac_addr)) {
++		e_dbg("Ignoring Alternate Mac Address with MC bit set\n");
++		goto out;
++	}
++
++	/*
++	 * We have a valid alternate MAC address, and we want to treat it the
++	 * same as the normal permanent MAC address stored by the HW into the
++	 * RAR. Do this by mapping this address into RAR0.
++	 */
++	e1000e_rar_set(hw, alt_mac_addr, 0);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_rar_set - Set receive address register
++ *  @hw: pointer to the HW structure
++ *  @addr: pointer to the receive address
++ *  @index: receive address array register
++ *
++ *  Sets the receive address array register at index to the address passed
++ *  in by addr.
++ **/
++void e1000e_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
++{
++	u32 rar_low, rar_high;
++
++	/*
++	 * HW expects these in little endian so we reverse the byte order
++	 * from network order (big endian) to little endian
++	 */
++	rar_low = ((u32) addr[0] |
++		   ((u32) addr[1] << 8) |
++		    ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
++
++	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
++
++	/* If MAC address zero, no need to set the AV bit */
++	if (rar_low || rar_high)
++		rar_high |= E1000_RAH_AV;
++
++	/*
++	 * Some bridges will combine consecutive 32-bit writes into
++	 * a single burst write, which will malfunction on some parts.
++	 * The flushes avoid this.
++	 */
++	ew32(RAL(index), rar_low);
++	e1e_flush();
++	ew32(RAH(index), rar_high);
++	e1e_flush();
++}
++
++/**
++ *  e1000_hash_mc_addr - Generate a multicast hash value
++ *  @hw: pointer to the HW structure
++ *  @mc_addr: pointer to a multicast address
++ *
++ *  Generates a multicast address hash value which is used to determine
++ *  the multicast filter table array address and new table value.  See
++ *  e1000_mta_set_generic()
++ **/
++static u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
++{
++	u32 hash_value, hash_mask;
++	u8 bit_shift = 0;
++
++	/* Register count multiplied by bits per register */
++	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
++
++	/*
++	 * For a mc_filter_type of 0, bit_shift is the number of left-shifts
++	 * where 0xFF would still fall within the hash mask.
++	 */
++	while (hash_mask >> bit_shift != 0xFF)
++		bit_shift++;
++
++	/*
++	 * The portion of the address that is used for the hash table
++	 * is determined by the mc_filter_type setting.
++	 * The algorithm is such that there is a total of 8 bits of shifting.
++	 * The bit_shift for a mc_filter_type of 0 represents the number of
++	 * left-shifts where the MSB of mc_addr[5] would still fall within
++	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
++	 * of 8 bits of shifting, then mc_addr[4] will shift right the
++	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
++	 * cases are a variation of this algorithm...essentially raising the
++	 * number of bits to shift mc_addr[5] left, while still keeping the
++	 * 8-bit shifting total.
++	 *
++	 * For example, given the following Destination MAC Address and an
++	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
++	 * we can see that the bit_shift for case 0 is 4.  These are the hash
++	 * values resulting from each mc_filter_type...
++	 * [0] [1] [2] [3] [4] [5]
++	 * 01  AA  00  12  34  56
++	 * LSB		 MSB
++	 *
++	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
++	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
++	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
++	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
++	 */
++	switch (hw->mac.mc_filter_type) {
++	default:
++	case 0:
++		break;
++	case 1:
++		bit_shift += 1;
++		break;
++	case 2:
++		bit_shift += 2;
++		break;
++	case 3:
++		bit_shift += 4;
++		break;
++	}
++
++	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
++				  (((u16) mc_addr[5]) << bit_shift)));
++
++	return hash_value;
++}
++
++/**
++ *  e1000e_update_mc_addr_list_generic - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *
++ *  Updates entire Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ **/
++void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw,
++					u8 *mc_addr_list, u32 mc_addr_count)
++{
++	u32 hash_value, hash_bit, hash_reg;
++	int i;
++
++	/* clear mta_shadow */
++	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
++
++	/* update mta_shadow from mc_addr_list */
++	for (i = 0; (u32) i < mc_addr_count; i++) {
++		hash_value = e1000_hash_mc_addr(hw, mc_addr_list);
++
++		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
++		hash_bit = hash_value & 0x1F;
++
++		hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit);
++		mc_addr_list += (ETH_ALEN);
++	}
++
++	/* replace the entire MTA table */
++	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
++	e1e_flush();
++}
++
++/**
++ *  e1000e_clear_hw_cntrs_base - Clear base hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the base hardware counters by reading the counter registers.
++ **/
++void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
++{
++	er32(CRCERRS);
++	er32(SYMERRS);
++	er32(MPC);
++	er32(SCC);
++	er32(ECOL);
++	er32(MCC);
++	er32(LATECOL);
++	er32(COLC);
++	er32(DC);
++	er32(SEC);
++	er32(RLEC);
++	er32(XONRXC);
++	er32(XONTXC);
++	er32(XOFFRXC);
++	er32(XOFFTXC);
++	er32(FCRUC);
++	er32(GPRC);
++	er32(BPRC);
++	er32(MPRC);
++	er32(GPTC);
++	er32(GORCL);
++	er32(GORCH);
++	er32(GOTCL);
++	er32(GOTCH);
++	er32(RNBC);
++	er32(RUC);
++	er32(RFC);
++	er32(ROC);
++	er32(RJC);
++	er32(TORL);
++	er32(TORH);
++	er32(TOTL);
++	er32(TOTH);
++	er32(TPR);
++	er32(TPT);
++	er32(MPTC);
++	er32(BPTC);
++}
++
++/**
++ *  e1000e_check_for_copper_link - Check for link (Copper)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks to see of the link status of the hardware has changed.  If a
++ *  change in link status has been detected, then we read the PHY registers
++ *  to get the current speed/duplex if link exists.
++ **/
++s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	bool link;
++
++	/*
++	 * We only want to go out to the PHY registers to see if Auto-Neg
++	 * has completed and/or if our link status has changed.  The
++	 * get_link_status flag is set upon receiving a Link Status
++	 * Change or Rx Sequence Error interrupt.
++	 */
++	if (!mac->get_link_status)
++		return 0;
++
++	/*
++	 * First we want to see if the MII Status Register reports
++	 * link.  If so, then we want to get the current speed/duplex
++	 * of the PHY.
++	 */
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		return ret_val;
++
++	if (!link)
++		return ret_val; /* No link detected */
++
++	mac->get_link_status = false;
++
++	/*
++	 * Check if there was DownShift, must be checked
++	 * immediately after link-up
++	 */
++	e1000e_check_downshift(hw);
++
++	/*
++	 * If we are forcing speed/duplex, then we simply return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		ret_val = -E1000_ERR_CONFIG;
++		return ret_val;
++	}
++
++	/*
++	 * Auto-Neg is enabled.  Auto Speed Detection takes care
++	 * of MAC speed/duplex configuration.  So we only need to
++	 * configure Collision Distance in the MAC.
++	 */
++	e1000e_config_collision_dist(hw);
++
++	/*
++	 * Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = e1000e_config_fc_after_link_up(hw);
++	if (ret_val)
++		e_dbg("Error configuring flow control\n");
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_check_for_fiber_link - Check for link (Fiber)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks for link up on the hardware.  If link is not up and we have
++ *  a signal, then we need to force link up.
++ **/
++s32 e1000e_check_for_fiber_link(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw;
++	u32 ctrl;
++	u32 status;
++	s32 ret_val;
++
++	ctrl = er32(CTRL);
++	status = er32(STATUS);
++	rxcw = er32(RXCW);
++
++	/*
++	 * If we don't have link (auto-negotiation failed or link partner
++	 * cannot auto-negotiate), the cable is plugged in (we have signal),
++	 * and our link partner is not trying to auto-negotiate with us (we
++	 * are receiving idles or data), we need to force link up. We also
++	 * need to give auto-negotiation time to complete, in case the cable
++	 * was just plugged in. The autoneg_failed flag does this.
++	 */
++	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
++	if ((ctrl & E1000_CTRL_SWDPIN1) && (!(status & E1000_STATUS_LU)) &&
++	    (!(rxcw & E1000_RXCW_C))) {
++		if (mac->autoneg_failed == 0) {
++			mac->autoneg_failed = 1;
++			return 0;
++		}
++		e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
++
++		/* Disable auto-negotiation in the TXCW register */
++		ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++
++		/* Force link-up and also force full-duplex. */
++		ctrl = er32(CTRL);
++		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++		ew32(CTRL, ctrl);
++
++		/* Configure Flow Control after forcing link up. */
++		ret_val = e1000e_config_fc_after_link_up(hw);
++		if (ret_val) {
++			e_dbg("Error configuring flow control\n");
++			return ret_val;
++		}
++	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++		/*
++		 * If we are forcing link and we are receiving /C/ ordered
++		 * sets, re-enable auto-negotiation in the TXCW register
++		 * and disable forced link in the Device Control register
++		 * in an attempt to auto-negotiate with our link partner.
++		 */
++		e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
++		ew32(TXCW, mac->txcw);
++		ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++		mac->serdes_has_link = true;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_check_for_serdes_link - Check for link (Serdes)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks for link up on the hardware.  If link is not up and we have
++ *  a signal, then we need to force link up.
++ **/
++s32 e1000e_check_for_serdes_link(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 rxcw;
++	u32 ctrl;
++	u32 status;
++	s32 ret_val;
++
++	ctrl = er32(CTRL);
++	status = er32(STATUS);
++	rxcw = er32(RXCW);
++
++	/*
++	 * If we don't have link (auto-negotiation failed or link partner
++	 * cannot auto-negotiate), and our link partner is not trying to
++	 * auto-negotiate with us (we are receiving idles or data),
++	 * we need to force link up. We also need to give auto-negotiation
++	 * time to complete.
++	 */
++	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
++	if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) {
++		if (mac->autoneg_failed == 0) {
++			mac->autoneg_failed = 1;
++			return 0;
++		}
++		e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
++
++		/* Disable auto-negotiation in the TXCW register */
++		ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
++
++		/* Force link-up and also force full-duplex. */
++		ctrl = er32(CTRL);
++		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
++		ew32(CTRL, ctrl);
++
++		/* Configure Flow Control after forcing link up. */
++		ret_val = e1000e_config_fc_after_link_up(hw);
++		if (ret_val) {
++			e_dbg("Error configuring flow control\n");
++			return ret_val;
++		}
++	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
++		/*
++		 * If we are forcing link and we are receiving /C/ ordered
++		 * sets, re-enable auto-negotiation in the TXCW register
++		 * and disable forced link in the Device Control register
++		 * in an attempt to auto-negotiate with our link partner.
++		 */
++		e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
++		ew32(TXCW, mac->txcw);
++		ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
++
++		mac->serdes_has_link = true;
++	} else if (!(E1000_TXCW_ANE & er32(TXCW))) {
++		/*
++		 * If we force link for non-auto-negotiation switch, check
++		 * link status based on MAC synchronization for internal
++		 * serdes media type.
++		 */
++		/* SYNCH bit and IV bit are sticky. */
++		udelay(10);
++		rxcw = er32(RXCW);
++		if (rxcw & E1000_RXCW_SYNCH) {
++			if (!(rxcw & E1000_RXCW_IV)) {
++				mac->serdes_has_link = true;
++				e_dbg("SERDES: Link up - forced.\n");
++			}
++		} else {
++			mac->serdes_has_link = false;
++			e_dbg("SERDES: Link down - force failed.\n");
++		}
++	}
++
++	if (E1000_TXCW_ANE & er32(TXCW)) {
++		status = er32(STATUS);
++		if (status & E1000_STATUS_LU) {
++			/* SYNCH bit and IV bit are sticky, so reread rxcw.  */
++			udelay(10);
++			rxcw = er32(RXCW);
++			if (rxcw & E1000_RXCW_SYNCH) {
++				if (!(rxcw & E1000_RXCW_IV)) {
++					mac->serdes_has_link = true;
++					e_dbg("SERDES: Link up - autoneg "
++					   "completed successfully.\n");
++				} else {
++					mac->serdes_has_link = false;
++					e_dbg("SERDES: Link down - invalid"
++					   "codewords detected in autoneg.\n");
++				}
++			} else {
++				mac->serdes_has_link = false;
++				e_dbg("SERDES: Link down - no sync.\n");
++			}
++		} else {
++			mac->serdes_has_link = false;
++			e_dbg("SERDES: Link down - autoneg failed\n");
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_set_default_fc_generic - Set flow control default values
++ *  @hw: pointer to the HW structure
++ *
++ *  Read the EEPROM for the default values for flow control and store the
++ *  values.
++ **/
++static s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 nvm_data;
++
++	/*
++	 * Read and store word 0x0F of the EEPROM. This word contains bits
++	 * that determine the hardware's default PAUSE (flow control) mode,
++	 * a bit that determines whether the HW defaults to enabling or
++	 * disabling auto-negotiation, and the direction of the
++	 * SW defined pins. If there is no SW over-ride of the flow
++	 * control setting, then the variable hw->fc will
++	 * be initialized based on a value in the EEPROM.
++	 */
++	ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
++
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		return ret_val;
++	}
++
++	if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0)
++		hw->fc.requested_mode = e1000_fc_none;
++	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) ==
++		 NVM_WORD0F_ASM_DIR)
++		hw->fc.requested_mode = e1000_fc_tx_pause;
++	else
++		hw->fc.requested_mode = e1000_fc_full;
++
++	return 0;
++}
++
++/**
++ *  e1000e_setup_link - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++s32 e1000e_setup_link(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++
++	/*
++	 * In the case of the phy reset being blocked, we already have a link.
++	 * We do not need to set it up again.
++	 */
++	if (e1000_check_reset_block(hw))
++		return 0;
++
++	/*
++	 * If requested flow control is set to default, set flow control
++	 * based on the EEPROM flow control settings.
++	 */
++	if (hw->fc.requested_mode == e1000_fc_default) {
++		ret_val = e1000_set_default_fc_generic(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/*
++	 * Save off the requested flow control mode for use later.  Depending
++	 * on the link partner's capabilities, we may or may not use this mode.
++	 */
++	hw->fc.current_mode = hw->fc.requested_mode;
++
++	e_dbg("After fix-ups FlowControl is now = %x\n",
++		hw->fc.current_mode);
++
++	/* Call the necessary media_type subroutine to configure the link. */
++	ret_val = mac->ops.setup_physical_interface(hw);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Initialize the flow control address, type, and PAUSE timer
++	 * registers to their default values.  This is done even if flow
++	 * control is disabled, because it does not hurt anything to
++	 * initialize these registers.
++	 */
++	e_dbg("Initializing the Flow Control address, type and timer regs\n");
++	ew32(FCT, FLOW_CONTROL_TYPE);
++	ew32(FCAH, FLOW_CONTROL_ADDRESS_HIGH);
++	ew32(FCAL, FLOW_CONTROL_ADDRESS_LOW);
++
++	ew32(FCTTV, hw->fc.pause_time);
++
++	return e1000e_set_fc_watermarks(hw);
++}
++
++/**
++ *  e1000_commit_fc_settings_generic - Configure flow control
++ *  @hw: pointer to the HW structure
++ *
++ *  Write the flow control settings to the Transmit Config Word Register (TXCW)
++ *  base on the flow control settings in e1000_mac_info.
++ **/
++static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 txcw;
++
++	/*
++	 * Check for a software override of the flow control settings, and
++	 * setup the device accordingly.  If auto-negotiation is enabled, then
++	 * software will have to set the "PAUSE" bits to the correct value in
++	 * the Transmit Config Word Register (TXCW) and re-start auto-
++	 * negotiation.  However, if auto-negotiation is disabled, then
++	 * software will have to manually configure the two flow control enable
++	 * bits in the CTRL register.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause frames,
++	 *          but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames but we
++	 *          do not support receiving pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
++	 */
++	switch (hw->fc.current_mode) {
++	case e1000_fc_none:
++		/* Flow control completely disabled by a software over-ride. */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
++		break;
++	case e1000_fc_rx_pause:
++		/*
++		 * Rx Flow control is enabled and Tx Flow control is disabled
++		 * by a software over-ride. Since there really isn't a way to
++		 * advertise that we are capable of Rx Pause ONLY, we will
++		 * advertise that we support both symmetric and asymmetric Rx
++		 * PAUSE.  Later, we will disable the adapter's ability to send
++		 * PAUSE frames.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++		break;
++	case e1000_fc_tx_pause:
++		/*
++		 * Tx Flow control is enabled, and Rx Flow control is disabled,
++		 * by a software over-ride.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
++		break;
++	case e1000_fc_full:
++		/*
++		 * Flow control (both Rx and Tx) is enabled by a software
++		 * over-ride.
++		 */
++		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
++		break;
++	default:
++		e_dbg("Flow control param set incorrectly\n");
++		return -E1000_ERR_CONFIG;
++		break;
++	}
++
++	ew32(TXCW, txcw);
++	mac->txcw = txcw;
++
++	return 0;
++}
++
++/**
++ *  e1000_poll_fiber_serdes_link_generic - Poll for link up
++ *  @hw: pointer to the HW structure
++ *
++ *  Polls for link up by reading the status register, if link fails to come
++ *  up with auto-negotiation, then the link is forced if a signal is detected.
++ **/
++static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 i, status;
++	s32 ret_val;
++
++	/*
++	 * If we have a signal (the cable is plugged in, or assumed true for
++	 * serdes media) then poll for a "Link-Up" indication in the Device
++	 * Status Register.  Time-out if a link isn't seen in 500 milliseconds
++	 * seconds (Auto-negotiation should complete in less than 500
++	 * milliseconds even if the other end is doing it in SW).
++	 */
++	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
++		usleep_range(10000, 20000);
++		status = er32(STATUS);
++		if (status & E1000_STATUS_LU)
++			break;
++	}
++	if (i == FIBER_LINK_UP_LIMIT) {
++		e_dbg("Never got a valid link from auto-neg!!!\n");
++		mac->autoneg_failed = 1;
++		/*
++		 * AutoNeg failed to achieve a link, so we'll call
++		 * mac->check_for_link. This routine will force the
++		 * link up if we detect a signal. This will allow us to
++		 * communicate with non-autonegotiating link partners.
++		 */
++		ret_val = mac->ops.check_for_link(hw);
++		if (ret_val) {
++			e_dbg("Error while checking for link\n");
++			return ret_val;
++		}
++		mac->autoneg_failed = 0;
++	} else {
++		mac->autoneg_failed = 0;
++		e_dbg("Valid Link Found\n");
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_setup_fiber_serdes_link - Setup link for fiber/serdes
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures collision distance and flow control for fiber and serdes
++ *  links.  Upon successful setup, poll for link.
++ **/
++s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++
++	ctrl = er32(CTRL);
++
++	/* Take the link out of reset */
++	ctrl &= ~E1000_CTRL_LRST;
++
++	e1000e_config_collision_dist(hw);
++
++	ret_val = e1000_commit_fc_settings_generic(hw);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Since auto-negotiation is enabled, take the link out of reset (the
++	 * link will be in reset, because we previously reset the chip). This
++	 * will restart auto-negotiation.  If auto-negotiation is successful
++	 * then the link-up status bit will be set and the flow control enable
++	 * bits (RFCE and TFCE) will be set according to their negotiated value.
++	 */
++	e_dbg("Auto-negotiation enabled\n");
++
++	ew32(CTRL, ctrl);
++	e1e_flush();
++	usleep_range(1000, 2000);
++
++	/*
++	 * For these adapters, the SW definable pin 1 is set when the optics
++	 * detect a signal.  If we have a signal, then poll for a "Link-Up"
++	 * indication.
++	 */
++	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
++	    (er32(CTRL) & E1000_CTRL_SWDPIN1)) {
++		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
++	} else {
++		e_dbg("No signal detected\n");
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_config_collision_dist - Configure collision distance
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the collision distance to the default value and is used
++ *  during link setup. Currently no func pointer exists and all
++ *  implementations are handled in the generic version of this function.
++ **/
++void e1000e_config_collision_dist(struct e1000_hw *hw)
++{
++	u32 tctl;
++
++	tctl = er32(TCTL);
++
++	tctl &= ~E1000_TCTL_COLD;
++	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
++
++	ew32(TCTL, tctl);
++	e1e_flush();
++}
++
++/**
++ *  e1000e_set_fc_watermarks - Set flow control high/low watermarks
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets the flow control high/low threshold (watermark) registers.  If
++ *  flow control XON frame transmission is enabled, then set XON frame
++ *  transmission as well.
++ **/
++s32 e1000e_set_fc_watermarks(struct e1000_hw *hw)
++{
++	u32 fcrtl = 0, fcrth = 0;
++
++	/*
++	 * Set the flow control receive threshold registers.  Normally,
++	 * these registers will be set to a default threshold that may be
++	 * adjusted later by the driver's runtime code.  However, if the
++	 * ability to transmit pause frames is not enabled, then these
++	 * registers will be set to 0.
++	 */
++	if (hw->fc.current_mode & e1000_fc_tx_pause) {
++		/*
++		 * We need to set up the Receive Threshold high and low water
++		 * marks as well as (optionally) enabling the transmission of
++		 * XON frames.
++		 */
++		fcrtl = hw->fc.low_water;
++		fcrtl |= E1000_FCRTL_XONE;
++		fcrth = hw->fc.high_water;
++	}
++	ew32(FCRTL, fcrtl);
++	ew32(FCRTH, fcrth);
++
++	return 0;
++}
++
++/**
++ *  e1000e_force_mac_fc - Force the MAC's flow control settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
++ *  device control register to reflect the adapter settings.  TFCE and RFCE
++ *  need to be explicitly set by software when a copper PHY is used because
++ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
++ *  also configure these bits when link is forced on a fiber connection.
++ **/
++s32 e1000e_force_mac_fc(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	ctrl = er32(CTRL);
++
++	/*
++	 * Because we didn't get link via the internal auto-negotiation
++	 * mechanism (we either forced link or we got link via PHY
++	 * auto-neg), we have to manually enable/disable transmit an
++	 * receive flow control.
++	 *
++	 * The "Case" statement below enables/disable flow control
++	 * according to the "hw->fc.current_mode" parameter.
++	 *
++	 * The possible values of the "fc" parameter are:
++	 *      0:  Flow control is completely disabled
++	 *      1:  Rx flow control is enabled (we can receive pause
++	 *          frames but not send pause frames).
++	 *      2:  Tx flow control is enabled (we can send pause frames
++	 *          frames but we do not receive pause frames).
++	 *      3:  Both Rx and Tx flow control (symmetric) is enabled.
++	 *  other:  No other values should be possible at this point.
++	 */
++	e_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
++
++	switch (hw->fc.current_mode) {
++	case e1000_fc_none:
++		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
++		break;
++	case e1000_fc_rx_pause:
++		ctrl &= (~E1000_CTRL_TFCE);
++		ctrl |= E1000_CTRL_RFCE;
++		break;
++	case e1000_fc_tx_pause:
++		ctrl &= (~E1000_CTRL_RFCE);
++		ctrl |= E1000_CTRL_TFCE;
++		break;
++	case e1000_fc_full:
++		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
++		break;
++	default:
++		e_dbg("Flow control param set incorrectly\n");
++		return -E1000_ERR_CONFIG;
++	}
++
++	ew32(CTRL, ctrl);
++
++	return 0;
++}
++
++/**
++ *  e1000e_config_fc_after_link_up - Configures flow control after link
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks the status of auto-negotiation after link up to ensure that the
++ *  speed and duplex were not forced.  If the link needed to be forced, then
++ *  flow control needs to be forced also.  If auto-negotiation is enabled
++ *  and did not fail, then we configure flow control based on our link
++ *  partner.
++ **/
++s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val = 0;
++	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
++	u16 speed, duplex;
++
++	/*
++	 * Check for the case where we have fiber media and auto-neg failed
++	 * so we had to force link.  In this case, we need to force the
++	 * configuration of the MAC to match the "fc" parameter.
++	 */
++	if (mac->autoneg_failed) {
++		if (hw->phy.media_type == e1000_media_type_fiber ||
++		    hw->phy.media_type == e1000_media_type_internal_serdes)
++			ret_val = e1000e_force_mac_fc(hw);
++	} else {
++		if (hw->phy.media_type == e1000_media_type_copper)
++			ret_val = e1000e_force_mac_fc(hw);
++	}
++
++	if (ret_val) {
++		e_dbg("Error forcing flow control settings\n");
++		return ret_val;
++	}
++
++	/*
++	 * Check for the case where we have copper media and auto-neg is
++	 * enabled.  In this case, we need to check and see if Auto-Neg
++	 * has completed, and if so, how the PHY and link partner has
++	 * flow control configured.
++	 */
++	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
++		/*
++		 * Read the MII Status Register and check to see if AutoNeg
++		 * has completed.  We read this twice because this reg has
++		 * some "sticky" (latched) bits.
++		 */
++		ret_val = e1e_rphy(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			return ret_val;
++		ret_val = e1e_rphy(hw, PHY_STATUS, &mii_status_reg);
++		if (ret_val)
++			return ret_val;
++
++		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
++			e_dbg("Copper PHY and Auto Neg "
++				 "has not completed.\n");
++			return ret_val;
++		}
++
++		/*
++		 * The AutoNeg process has completed, so we now need to
++		 * read both the Auto Negotiation Advertisement
++		 * Register (Address 4) and the Auto_Negotiation Base
++		 * Page Ability Register (Address 5) to determine how
++		 * flow control was negotiated.
++		 */
++		ret_val = e1e_rphy(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg);
++		if (ret_val)
++			return ret_val;
++		ret_val =
++		    e1e_rphy(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg);
++		if (ret_val)
++			return ret_val;
++
++		/*
++		 * Two bits in the Auto Negotiation Advertisement Register
++		 * (Address 4) and two bits in the Auto Negotiation Base
++		 * Page Ability Register (Address 5) determine flow control
++		 * for both the PHY and the link partner.  The following
++		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
++		 * 1999, describes these PAUSE resolution bits and how flow
++		 * control is determined based upon these settings.
++		 * NOTE:  DC = Don't Care
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
++		 *   0   |    1    |   0   |   DC    | e1000_fc_none
++		 *   0   |    1    |   1   |    0    | e1000_fc_none
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 *   1   |    0    |   0   |   DC    | e1000_fc_none
++		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
++		 *   1   |    1    |   0   |    0    | e1000_fc_none
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 *
++		 * Are both PAUSE bits set to 1?  If so, this implies
++		 * Symmetric Flow Control is enabled at both ends.  The
++		 * ASM_DIR bits are irrelevant per the spec.
++		 *
++		 * For Symmetric Flow Control:
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
++		 *
++		 */
++		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
++			/*
++			 * Now we need to check if the user selected Rx ONLY
++			 * of pause frames.  In this case, we had to advertise
++			 * FULL flow control because we could not advertise Rx
++			 * ONLY. Hence, we must now check to see if we need to
++			 * turn OFF the TRANSMISSION of PAUSE frames.
++			 */
++			if (hw->fc.requested_mode == e1000_fc_full) {
++				hw->fc.current_mode = e1000_fc_full;
++				e_dbg("Flow Control = FULL.\r\n");
++			} else {
++				hw->fc.current_mode = e1000_fc_rx_pause;
++				e_dbg("Flow Control = "
++				      "Rx PAUSE frames only.\r\n");
++			}
++		}
++		/*
++		 * For receiving PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
++		 */
++		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++			  (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++			  (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++			  (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_tx_pause;
++			e_dbg("Flow Control = Tx PAUSE frames only.\r\n");
++		}
++		/*
++		 * For transmitting PAUSE frames ONLY.
++		 *
++		 *   LOCAL DEVICE  |   LINK PARTNER
++		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
++		 *-------|---------|-------|---------|--------------------
++		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
++		 */
++		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
++			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
++			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
++			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
++			hw->fc.current_mode = e1000_fc_rx_pause;
++			e_dbg("Flow Control = Rx PAUSE frames only.\r\n");
++		} else {
++			/*
++			 * Per the IEEE spec, at this point flow control
++			 * should be disabled.
++			 */
++			hw->fc.current_mode = e1000_fc_none;
++			e_dbg("Flow Control = NONE.\r\n");
++		}
++
++		/*
++		 * Now we need to do one last check...  If we auto-
++		 * negotiated to HALF DUPLEX, flow control should not be
++		 * enabled per IEEE 802.3 spec.
++		 */
++		ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
++		if (ret_val) {
++			e_dbg("Error getting link speed and duplex\n");
++			return ret_val;
++		}
++
++		if (duplex == HALF_DUPLEX)
++			hw->fc.current_mode = e1000_fc_none;
++
++		/*
++		 * Now we call a subroutine to actually force the MAC
++		 * controller to use the correct flow control settings.
++		 */
++		ret_val = e1000e_force_mac_fc(hw);
++		if (ret_val) {
++			e_dbg("Error forcing flow control settings\n");
++			return ret_val;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_get_speed_and_duplex_copper - Retrieve current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Read the status register for the current speed/duplex and store the current
++ *  speed and duplex for copper connections.
++ **/
++s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed, u16 *duplex)
++{
++	u32 status;
++
++	status = er32(STATUS);
++	if (status & E1000_STATUS_SPEED_1000)
++		*speed = SPEED_1000;
++	else if (status & E1000_STATUS_SPEED_100)
++		*speed = SPEED_100;
++	else
++		*speed = SPEED_10;
++
++	if (status & E1000_STATUS_FD)
++		*duplex = FULL_DUPLEX;
++	else
++		*duplex = HALF_DUPLEX;
++
++	e_dbg("%u Mbps, %s Duplex\n",
++	      *speed == SPEED_1000 ? 1000 : *speed == SPEED_100 ? 100 : 10,
++	      *duplex == FULL_DUPLEX ? "Full" : "Half");
++
++	return 0;
++}
++
++/**
++ *  e1000e_get_speed_and_duplex_fiber_serdes - Retrieve current speed/duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: stores the current speed
++ *  @duplex: stores the current duplex
++ *
++ *  Sets the speed and duplex to gigabit full duplex (the only possible option)
++ *  for fiber/serdes links.
++ **/
++s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw *hw, u16 *speed, u16 *duplex)
++{
++	*speed = SPEED_1000;
++	*duplex = FULL_DUPLEX;
++
++	return 0;
++}
++
++/**
++ *  e1000e_get_hw_semaphore - Acquire hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the HW semaphore to access the PHY or NVM
++ **/
++s32 e1000e_get_hw_semaphore(struct e1000_hw *hw)
++{
++	u32 swsm;
++	s32 timeout = hw->nvm.word_size + 1;
++	s32 i = 0;
++
++	/* Get the SW semaphore */
++	while (i < timeout) {
++		swsm = er32(SWSM);
++		if (!(swsm & E1000_SWSM_SMBI))
++			break;
++
++		udelay(50);
++		i++;
++	}
++
++	if (i == timeout) {
++		e_dbg("Driver can't access device - SMBI bit is set.\n");
++		return -E1000_ERR_NVM;
++	}
++
++	/* Get the FW semaphore. */
++	for (i = 0; i < timeout; i++) {
++		swsm = er32(SWSM);
++		ew32(SWSM, swsm | E1000_SWSM_SWESMBI);
++
++		/* Semaphore acquired if bit latched */
++		if (er32(SWSM) & E1000_SWSM_SWESMBI)
++			break;
++
++		udelay(50);
++	}
++
++	if (i == timeout) {
++		/* Release semaphores */
++		e1000e_put_hw_semaphore(hw);
++		e_dbg("Driver can't access the NVM\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_put_hw_semaphore - Release hardware semaphore
++ *  @hw: pointer to the HW structure
++ *
++ *  Release hardware semaphore used to access the PHY or NVM
++ **/
++void e1000e_put_hw_semaphore(struct e1000_hw *hw)
++{
++	u32 swsm;
++
++	swsm = er32(SWSM);
++	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
++	ew32(SWSM, swsm);
++}
++
++/**
++ *  e1000e_get_auto_rd_done - Check for auto read completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Check EEPROM for Auto Read done bit.
++ **/
++s32 e1000e_get_auto_rd_done(struct e1000_hw *hw)
++{
++	s32 i = 0;
++
++	while (i < AUTO_READ_DONE_TIMEOUT) {
++		if (er32(EECD) & E1000_EECD_AUTO_RD)
++			break;
++		usleep_range(1000, 2000);
++		i++;
++	}
++
++	if (i == AUTO_READ_DONE_TIMEOUT) {
++		e_dbg("Auto read by HW from NVM has not completed.\n");
++		return -E1000_ERR_RESET;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_valid_led_default - Verify a valid default LED config
++ *  @hw: pointer to the HW structure
++ *  @data: pointer to the NVM (EEPROM)
++ *
++ *  Read the EEPROM for the current default LED configuration.  If the
++ *  LED configuration is not valid, set to a valid LED configuration.
++ **/
++s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		return ret_val;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
++		*data = ID_LED_DEFAULT;
++
++	return 0;
++}
++
++/**
++ *  e1000e_id_led_init -
++ *  @hw: pointer to the HW structure
++ *
++ **/
++s32 e1000e_id_led_init(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	const u32 ledctl_mask = 0x000000FF;
++	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
++	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
++	u16 data, i, temp;
++	const u16 led_mask = 0x0F;
++
++	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
++	if (ret_val)
++		return ret_val;
++
++	mac->ledctl_default = er32(LEDCTL);
++	mac->ledctl_mode1 = mac->ledctl_default;
++	mac->ledctl_mode2 = mac->ledctl_default;
++
++	for (i = 0; i < 4; i++) {
++		temp = (data >> (i << 2)) & led_mask;
++		switch (temp) {
++		case ID_LED_ON1_DEF2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_ON1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_OFF1_DEF2:
++		case ID_LED_OFF1_ON2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode1 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++		switch (temp) {
++		case ID_LED_DEF1_ON2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_OFF1_ON2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_on << (i << 3);
++			break;
++		case ID_LED_DEF1_OFF2:
++		case ID_LED_ON1_OFF2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
++			mac->ledctl_mode2 |= ledctl_off << (i << 3);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_setup_led_generic - Configures SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This prepares the SW controllable LED for use and saves the current state
++ *  of the LED so it can be later restored.
++ **/
++s32 e1000e_setup_led_generic(struct e1000_hw *hw)
++{
++	u32 ledctl;
++
++	if (hw->mac.ops.setup_led != e1000e_setup_led_generic)
++		return -E1000_ERR_CONFIG;
++
++	if (hw->phy.media_type == e1000_media_type_fiber) {
++		ledctl = er32(LEDCTL);
++		hw->mac.ledctl_default = ledctl;
++		/* Turn off LED0 */
++		ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
++		            E1000_LEDCTL_LED0_BLINK |
++		            E1000_LEDCTL_LED0_MODE_MASK);
++		ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
++		           E1000_LEDCTL_LED0_MODE_SHIFT);
++		ew32(LEDCTL, ledctl);
++	} else if (hw->phy.media_type == e1000_media_type_copper) {
++		ew32(LEDCTL, hw->mac.ledctl_mode1);
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_cleanup_led_generic - Set LED config to default operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Remove the current LED configuration and set the LED configuration
++ *  to the default value, saved from the EEPROM.
++ **/
++s32 e1000e_cleanup_led_generic(struct e1000_hw *hw)
++{
++	ew32(LEDCTL, hw->mac.ledctl_default);
++	return 0;
++}
++
++/**
++ *  e1000e_blink_led_generic - Blink LED
++ *  @hw: pointer to the HW structure
++ *
++ *  Blink the LEDs which are set to be on.
++ **/
++s32 e1000e_blink_led_generic(struct e1000_hw *hw)
++{
++	u32 ledctl_blink = 0;
++	u32 i;
++
++	if (hw->phy.media_type == e1000_media_type_fiber) {
++		/* always blink LED0 for PCI-E fiber */
++		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
++		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
++	} else {
++		/*
++		 * set the blink bit for each LED that's "on" (0x0E)
++		 * in ledctl_mode2
++		 */
++		ledctl_blink = hw->mac.ledctl_mode2;
++		for (i = 0; i < 4; i++)
++			if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) ==
++			    E1000_LEDCTL_MODE_LED_ON)
++				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK <<
++						 (i * 8));
++	}
++
++	ew32(LEDCTL, ledctl_blink);
++
++	return 0;
++}
++
++/**
++ *  e1000e_led_on_generic - Turn LED on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED on.
++ **/
++s32 e1000e_led_on_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_fiber:
++		ctrl = er32(CTRL);
++		ctrl &= ~E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++		ew32(CTRL, ctrl);
++		break;
++	case e1000_media_type_copper:
++		ew32(LEDCTL, hw->mac.ledctl_mode2);
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_led_off_generic - Turn LED off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn LED off.
++ **/
++s32 e1000e_led_off_generic(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	switch (hw->phy.media_type) {
++	case e1000_media_type_fiber:
++		ctrl = er32(CTRL);
++		ctrl |= E1000_CTRL_SWDPIN0;
++		ctrl |= E1000_CTRL_SWDPIO0;
++		ew32(CTRL, ctrl);
++		break;
++	case e1000_media_type_copper:
++		ew32(LEDCTL, hw->mac.ledctl_mode1);
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_set_pcie_no_snoop - Set PCI-express capabilities
++ *  @hw: pointer to the HW structure
++ *  @no_snoop: bitmap of snoop events
++ *
++ *  Set the PCI-express register to snoop for events enabled in 'no_snoop'.
++ **/
++void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop)
++{
++	u32 gcr;
++
++	if (no_snoop) {
++		gcr = er32(GCR);
++		gcr &= ~(PCIE_NO_SNOOP_ALL);
++		gcr |= no_snoop;
++		ew32(GCR, gcr);
++	}
++}
++
++/**
++ *  e1000e_disable_pcie_master - Disables PCI-express master access
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns 0 if successful, else returns -10
++ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
++ *  the master requests to be disabled.
++ *
++ *  Disables PCI-Express master access and verifies there are no pending
++ *  requests.
++ **/
++s32 e1000e_disable_pcie_master(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 timeout = MASTER_DISABLE_TIMEOUT;
++
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
++	ew32(CTRL, ctrl);
++
++	while (timeout) {
++		if (!(er32(STATUS) &
++		      E1000_STATUS_GIO_MASTER_ENABLE))
++			break;
++		udelay(100);
++		timeout--;
++	}
++
++	if (!timeout) {
++		e_dbg("Master requests are pending.\n");
++		return -E1000_ERR_MASTER_REQUESTS_PENDING;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_reset_adaptive - Reset Adaptive Interframe Spacing
++ *  @hw: pointer to the HW structure
++ *
++ *  Reset the Adaptive Interframe Spacing throttle to default values.
++ **/
++void e1000e_reset_adaptive(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++
++	if (!mac->adaptive_ifs) {
++		e_dbg("Not in Adaptive IFS mode!\n");
++		goto out;
++	}
++
++	mac->current_ifs_val = 0;
++	mac->ifs_min_val = IFS_MIN;
++	mac->ifs_max_val = IFS_MAX;
++	mac->ifs_step_size = IFS_STEP;
++	mac->ifs_ratio = IFS_RATIO;
++
++	mac->in_ifs_mode = false;
++	ew32(AIT, 0);
++out:
++	return;
++}
++
++/**
++ *  e1000e_update_adaptive - Update Adaptive Interframe Spacing
++ *  @hw: pointer to the HW structure
++ *
++ *  Update the Adaptive Interframe Spacing Throttle value based on the
++ *  time between transmitted packets and time between collisions.
++ **/
++void e1000e_update_adaptive(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++
++	if (!mac->adaptive_ifs) {
++		e_dbg("Not in Adaptive IFS mode!\n");
++		goto out;
++	}
++
++	if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
++		if (mac->tx_packet_delta > MIN_NUM_XMITS) {
++			mac->in_ifs_mode = true;
++			if (mac->current_ifs_val < mac->ifs_max_val) {
++				if (!mac->current_ifs_val)
++					mac->current_ifs_val = mac->ifs_min_val;
++				else
++					mac->current_ifs_val +=
++						mac->ifs_step_size;
++				ew32(AIT, mac->current_ifs_val);
++			}
++		}
++	} else {
++		if (mac->in_ifs_mode &&
++		    (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
++			mac->current_ifs_val = 0;
++			mac->in_ifs_mode = false;
++			ew32(AIT, 0);
++		}
++	}
++out:
++	return;
++}
++
++/**
++ *  e1000_raise_eec_clk - Raise EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Enable/Raise the EEPROM clock bit.
++ **/
++static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd | E1000_EECD_SK;
++	ew32(EECD, *eecd);
++	e1e_flush();
++	udelay(hw->nvm.delay_usec);
++}
++
++/**
++ *  e1000_lower_eec_clk - Lower EEPROM clock
++ *  @hw: pointer to the HW structure
++ *  @eecd: pointer to the EEPROM
++ *
++ *  Clear/Lower the EEPROM clock bit.
++ **/
++static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
++{
++	*eecd = *eecd & ~E1000_EECD_SK;
++	ew32(EECD, *eecd);
++	e1e_flush();
++	udelay(hw->nvm.delay_usec);
++}
++
++/**
++ *  e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @data: data to send to the EEPROM
++ *  @count: number of bits to shift out
++ *
++ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
++ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
++ *  In order to do this, "data" must be broken down into bits.
++ **/
++static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = er32(EECD);
++	u32 mask;
++
++	mask = 0x01 << (count - 1);
++	if (nvm->type == e1000_nvm_eeprom_spi)
++		eecd |= E1000_EECD_DO;
++
++	do {
++		eecd &= ~E1000_EECD_DI;
++
++		if (data & mask)
++			eecd |= E1000_EECD_DI;
++
++		ew32(EECD, eecd);
++		e1e_flush();
++
++		udelay(nvm->delay_usec);
++
++		e1000_raise_eec_clk(hw, &eecd);
++		e1000_lower_eec_clk(hw, &eecd);
++
++		mask >>= 1;
++	} while (mask);
++
++	eecd &= ~E1000_EECD_DI;
++	ew32(EECD, eecd);
++}
++
++/**
++ *  e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
++ *  @hw: pointer to the HW structure
++ *  @count: number of bits to shift in
++ *
++ *  In order to read a register from the EEPROM, we need to shift 'count' bits
++ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
++ *  the EEPROM (setting the SK bit), and then reading the value of the data out
++ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
++ *  always be clear.
++ **/
++static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
++{
++	u32 eecd;
++	u32 i;
++	u16 data;
++
++	eecd = er32(EECD);
++
++	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
++	data = 0;
++
++	for (i = 0; i < count; i++) {
++		data <<= 1;
++		e1000_raise_eec_clk(hw, &eecd);
++
++		eecd = er32(EECD);
++
++		eecd &= ~E1000_EECD_DI;
++		if (eecd & E1000_EECD_DO)
++			data |= 1;
++
++		e1000_lower_eec_clk(hw, &eecd);
++	}
++
++	return data;
++}
++
++/**
++ *  e1000e_poll_eerd_eewr_done - Poll for EEPROM read/write completion
++ *  @hw: pointer to the HW structure
++ *  @ee_reg: EEPROM flag for polling
++ *
++ *  Polls the EEPROM status bit for either read or write completion based
++ *  upon the value of 'ee_reg'.
++ **/
++s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
++{
++	u32 attempts = 100000;
++	u32 i, reg = 0;
++
++	for (i = 0; i < attempts; i++) {
++		if (ee_reg == E1000_NVM_POLL_READ)
++			reg = er32(EERD);
++		else
++			reg = er32(EEWR);
++
++		if (reg & E1000_NVM_RW_REG_DONE)
++			return 0;
++
++		udelay(5);
++	}
++
++	return -E1000_ERR_NVM;
++}
++
++/**
++ *  e1000e_acquire_nvm - Generic request for access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
++ *  Return successful if access grant bit set, else clear the request for
++ *  EEPROM access and return -E1000_ERR_NVM (-1).
++ **/
++s32 e1000e_acquire_nvm(struct e1000_hw *hw)
++{
++	u32 eecd = er32(EECD);
++	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
++
++	ew32(EECD, eecd | E1000_EECD_REQ);
++	eecd = er32(EECD);
++
++	while (timeout) {
++		if (eecd & E1000_EECD_GNT)
++			break;
++		udelay(5);
++		eecd = er32(EECD);
++		timeout--;
++	}
++
++	if (!timeout) {
++		eecd &= ~E1000_EECD_REQ;
++		ew32(EECD, eecd);
++		e_dbg("Could not acquire NVM grant\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_standby_nvm - Return EEPROM to standby state
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the EEPROM to a standby state.
++ **/
++static void e1000_standby_nvm(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = er32(EECD);
++
++	if (nvm->type == e1000_nvm_eeprom_spi) {
++		/* Toggle CS to flush commands */
++		eecd |= E1000_EECD_CS;
++		ew32(EECD, eecd);
++		e1e_flush();
++		udelay(nvm->delay_usec);
++		eecd &= ~E1000_EECD_CS;
++		ew32(EECD, eecd);
++		e1e_flush();
++		udelay(nvm->delay_usec);
++	}
++}
++
++/**
++ *  e1000_stop_nvm - Terminate EEPROM command
++ *  @hw: pointer to the HW structure
++ *
++ *  Terminates the current command by inverting the EEPROM's chip select pin.
++ **/
++static void e1000_stop_nvm(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	eecd = er32(EECD);
++	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
++		/* Pull CS high */
++		eecd |= E1000_EECD_CS;
++		e1000_lower_eec_clk(hw, &eecd);
++	}
++}
++
++/**
++ *  e1000e_release_nvm - Release exclusive access to EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
++ **/
++void e1000e_release_nvm(struct e1000_hw *hw)
++{
++	u32 eecd;
++
++	e1000_stop_nvm(hw);
++
++	eecd = er32(EECD);
++	eecd &= ~E1000_EECD_REQ;
++	ew32(EECD, eecd);
++}
++
++/**
++ *  e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
++ *  @hw: pointer to the HW structure
++ *
++ *  Setups the EEPROM for reading and writing.
++ **/
++static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = er32(EECD);
++	u8 spi_stat_reg;
++
++	if (nvm->type == e1000_nvm_eeprom_spi) {
++		u16 timeout = NVM_MAX_RETRY_SPI;
++
++		/* Clear SK and CS */
++		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
++		ew32(EECD, eecd);
++		e1e_flush();
++		udelay(1);
++
++		/*
++		 * Read "Status Register" repeatedly until the LSB is cleared.
++		 * The EEPROM will signal that the command has been completed
++		 * by clearing bit 0 of the internal status register.  If it's
++		 * not cleared within 'timeout', then error out.
++		 */
++		while (timeout) {
++			e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
++						 hw->nvm.opcode_bits);
++			spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
++			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
++				break;
++
++			udelay(5);
++			e1000_standby_nvm(hw);
++			timeout--;
++		}
++
++		if (!timeout) {
++			e_dbg("SPI NVM Status error\n");
++			return -E1000_ERR_NVM;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_read_nvm_eerd - Reads EEPROM using EERD register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of word in the EEPROM to read
++ *  @words: number of words to read
++ *  @data: word read from the EEPROM
++ *
++ *  Reads a 16 bit word from the EEPROM using the EERD register.
++ **/
++s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 i, eerd = 0;
++	s32 ret_val = 0;
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * too many words for the offset, and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		e_dbg("nvm parameter(s) out of bounds\n");
++		return -E1000_ERR_NVM;
++	}
++
++	for (i = 0; i < words; i++) {
++		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
++		       E1000_NVM_RW_REG_START;
++
++		ew32(EERD, eerd);
++		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
++		if (ret_val)
++			break;
++
++		data[i] = (er32(EERD) >> E1000_NVM_RW_REG_DATA);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_write_nvm_spi - Write to EEPROM using SPI
++ *  @hw: pointer to the HW structure
++ *  @offset: offset within the EEPROM to be written to
++ *  @words: number of words to write
++ *  @data: 16 bit word(s) to be written to the EEPROM
++ *
++ *  Writes data to EEPROM at offset using SPI interface.
++ *
++ *  If e1000e_update_nvm_checksum is not called after this function , the
++ *  EEPROM will most likely contain an invalid checksum.
++ **/
++s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	s32 ret_val;
++	u16 widx = 0;
++
++	/*
++	 * A check for invalid values:  offset too large, too many words,
++	 * and not enough words.
++	 */
++	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
++	    (words == 0)) {
++		e_dbg("nvm parameter(s) out of bounds\n");
++		return -E1000_ERR_NVM;
++	}
++
++	ret_val = nvm->ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	while (widx < words) {
++		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
++
++		ret_val = e1000_ready_nvm_eeprom(hw);
++		if (ret_val) {
++			nvm->ops.release(hw);
++			return ret_val;
++		}
++
++		e1000_standby_nvm(hw);
++
++		/* Send the WRITE ENABLE command (8 bit opcode) */
++		e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
++					 nvm->opcode_bits);
++
++		e1000_standby_nvm(hw);
++
++		/*
++		 * Some SPI eeproms use the 8th address bit embedded in the
++		 * opcode
++		 */
++		if ((nvm->address_bits == 8) && (offset >= 128))
++			write_opcode |= NVM_A8_OPCODE_SPI;
++
++		/* Send the Write command (8-bit opcode + addr) */
++		e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
++		e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
++					 nvm->address_bits);
++
++		/* Loop to allow for up to whole page write of eeprom */
++		while (widx < words) {
++			u16 word_out = data[widx];
++			word_out = (word_out >> 8) | (word_out << 8);
++			e1000_shift_out_eec_bits(hw, word_out, 16);
++			widx++;
++
++			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
++				e1000_standby_nvm(hw);
++				break;
++			}
++		}
++	}
++
++	usleep_range(10000, 20000);
++	nvm->ops.release(hw);
++	return 0;
++}
++
++/**
++ *  e1000_read_pba_string_generic - Read device part number
++ *  @hw: pointer to the HW structure
++ *  @pba_num: pointer to device part number
++ *  @pba_num_size: size of part number buffer
++ *
++ *  Reads the product board assembly (PBA) number from the EEPROM and stores
++ *  the value in pba_num.
++ **/
++s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
++				  u32 pba_num_size)
++{
++	s32 ret_val;
++	u16 nvm_data;
++	u16 pba_ptr;
++	u16 offset;
++	u16 length;
++
++	if (pba_num == NULL) {
++		e_dbg("PBA string buffer was null\n");
++		ret_val = E1000_ERR_INVALID_ARGUMENT;
++		goto out;
++	}
++
++	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	/*
++	 * if nvm_data is not ptr guard the PBA must be in legacy format which
++	 * means pba_ptr is actually our second data word for the PBA number
++	 * and we can decode it into an ascii string
++	 */
++	if (nvm_data != NVM_PBA_PTR_GUARD) {
++		e_dbg("NVM PBA number is not stored as string\n");
++
++		/* we will need 11 characters to store the PBA */
++		if (pba_num_size < 11) {
++			e_dbg("PBA string buffer too small\n");
++			return E1000_ERR_NO_SPACE;
++		}
++
++		/* extract hex string from data and pba_ptr */
++		pba_num[0] = (nvm_data >> 12) & 0xF;
++		pba_num[1] = (nvm_data >> 8) & 0xF;
++		pba_num[2] = (nvm_data >> 4) & 0xF;
++		pba_num[3] = nvm_data & 0xF;
++		pba_num[4] = (pba_ptr >> 12) & 0xF;
++		pba_num[5] = (pba_ptr >> 8) & 0xF;
++		pba_num[6] = '-';
++		pba_num[7] = 0;
++		pba_num[8] = (pba_ptr >> 4) & 0xF;
++		pba_num[9] = pba_ptr & 0xF;
++
++		/* put a null character on the end of our string */
++		pba_num[10] = '\0';
++
++		/* switch all the data but the '-' to hex char */
++		for (offset = 0; offset < 10; offset++) {
++			if (pba_num[offset] < 0xA)
++				pba_num[offset] += '0';
++			else if (pba_num[offset] < 0x10)
++				pba_num[offset] += 'A' - 0xA;
++		}
++
++		goto out;
++	}
++
++	ret_val = e1000_read_nvm(hw, pba_ptr, 1, &length);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		goto out;
++	}
++
++	if (length == 0xFFFF || length == 0) {
++		e_dbg("NVM PBA number section invalid length\n");
++		ret_val = E1000_ERR_NVM_PBA_SECTION;
++		goto out;
++	}
++	/* check if pba_num buffer is big enough */
++	if (pba_num_size < (((u32)length * 2) - 1)) {
++		e_dbg("PBA string buffer too small\n");
++		ret_val = E1000_ERR_NO_SPACE;
++		goto out;
++	}
++
++	/* trim pba length from start of string */
++	pba_ptr++;
++	length--;
++
++	for (offset = 0; offset < length; offset++) {
++		ret_val = e1000_read_nvm(hw, pba_ptr + offset, 1, &nvm_data);
++		if (ret_val) {
++			e_dbg("NVM Read Error\n");
++			goto out;
++		}
++		pba_num[offset * 2] = (u8)(nvm_data >> 8);
++		pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
++	}
++	pba_num[offset * 2] = '\0';
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_read_mac_addr_generic - Read device MAC address
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the device MAC address from the EEPROM and stores the value.
++ *  Since devices with two ports use the same EEPROM, we increment the
++ *  last bit in the MAC address for the second port.
++ **/
++s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
++{
++	u32 rar_high;
++	u32 rar_low;
++	u16 i;
++
++	rar_high = er32(RAH(0));
++	rar_low = er32(RAL(0));
++
++	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
++		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
++
++	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
++		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
++
++	for (i = 0; i < ETH_ALEN; i++)
++		hw->mac.addr[i] = hw->mac.perm_addr[i];
++
++	return 0;
++}
++
++/**
++ *  e1000e_validate_nvm_checksum_generic - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
++ **/
++s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
++		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			e_dbg("NVM Read Error\n");
++			return ret_val;
++		}
++		checksum += nvm_data;
++	}
++
++	if (checksum != (u16) NVM_SUM) {
++		e_dbg("NVM Checksum Invalid\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_update_nvm_checksum_generic - Update EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
++ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
++ *  value to the EEPROM.
++ **/
++s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 checksum = 0;
++	u16 i, nvm_data;
++
++	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
++		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
++		if (ret_val) {
++			e_dbg("NVM Read Error while updating checksum.\n");
++			return ret_val;
++		}
++		checksum += nvm_data;
++	}
++	checksum = (u16) NVM_SUM - checksum;
++	ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum);
++	if (ret_val)
++		e_dbg("NVM Write Error while updating checksum.\n");
++
++	return ret_val;
++}
++
++/**
++ *  e1000e_reload_nvm - Reloads EEPROM
++ *  @hw: pointer to the HW structure
++ *
++ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
++ *  extended control register.
++ **/
++void e1000e_reload_nvm(struct e1000_hw *hw)
++{
++	u32 ctrl_ext;
++
++	udelay(10);
++	ctrl_ext = er32(CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_EE_RST;
++	ew32(CTRL_EXT, ctrl_ext);
++	e1e_flush();
++}
++
++/**
++ *  e1000_calculate_checksum - Calculate checksum for buffer
++ *  @buffer: pointer to EEPROM
++ *  @length: size of EEPROM to calculate a checksum for
++ *
++ *  Calculates the checksum for some buffer on a specified length.  The
++ *  checksum calculated is returned.
++ **/
++static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
++{
++	u32 i;
++	u8  sum = 0;
++
++	if (!buffer)
++		return 0;
++
++	for (i = 0; i < length; i++)
++		sum += buffer[i];
++
++	return (u8) (0 - sum);
++}
++
++/**
++ *  e1000_mng_enable_host_if - Checks host interface is enabled
++ *  @hw: pointer to the HW structure
++ *
++ *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
++ *
++ *  This function checks whether the HOST IF is enabled for command operation
++ *  and also checks whether the previous command is completed.  It busy waits
++ *  in case of previous command is not completed.
++ **/
++static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
++{
++	u32 hicr;
++	u8 i;
++
++	if (!(hw->mac.arc_subsystem_valid)) {
++		e_dbg("ARC subsystem not valid.\n");
++		return -E1000_ERR_HOST_INTERFACE_COMMAND;
++	}
++
++	/* Check that the host interface is enabled. */
++	hicr = er32(HICR);
++	if ((hicr & E1000_HICR_EN) == 0) {
++		e_dbg("E1000_HOST_EN bit disabled.\n");
++		return -E1000_ERR_HOST_INTERFACE_COMMAND;
++	}
++	/* check the previous command is completed */
++	for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
++		hicr = er32(HICR);
++		if (!(hicr & E1000_HICR_C))
++			break;
++		mdelay(1);
++	}
++
++	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
++		e_dbg("Previous command timeout failed .\n");
++		return -E1000_ERR_HOST_INTERFACE_COMMAND;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_check_mng_mode_generic - check management mode
++ *  @hw: pointer to the HW structure
++ *
++ *  Reads the firmware semaphore register and returns true (>0) if
++ *  manageability is enabled, else false (0).
++ **/
++bool e1000e_check_mng_mode_generic(struct e1000_hw *hw)
++{
++	u32 fwsm = er32(FWSM);
++
++	return (fwsm & E1000_FWSM_MODE_MASK) ==
++		(E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT);
++}
++
++/**
++ *  e1000e_enable_tx_pkt_filtering - Enable packet filtering on Tx
++ *  @hw: pointer to the HW structure
++ *
++ *  Enables packet filtering on transmit packets if manageability is enabled
++ *  and host interface is enabled.
++ **/
++bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw)
++{
++	struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
++	u32 *buffer = (u32 *)&hw->mng_cookie;
++	u32 offset;
++	s32 ret_val, hdr_csum, csum;
++	u8 i, len;
++
++	hw->mac.tx_pkt_filtering = true;
++
++	/* No manageability, no filtering */
++	if (!e1000e_check_mng_mode(hw)) {
++		hw->mac.tx_pkt_filtering = false;
++		goto out;
++	}
++
++	/*
++	 * If we can't read from the host interface for whatever
++	 * reason, disable filtering.
++	 */
++	ret_val = e1000_mng_enable_host_if(hw);
++	if (ret_val) {
++		hw->mac.tx_pkt_filtering = false;
++		goto out;
++	}
++
++	/* Read in the header.  Length and offset are in dwords. */
++	len    = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
++	offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
++	for (i = 0; i < len; i++)
++		*(buffer + i) = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF, offset + i);
++	hdr_csum = hdr->checksum;
++	hdr->checksum = 0;
++	csum = e1000_calculate_checksum((u8 *)hdr,
++					E1000_MNG_DHCP_COOKIE_LENGTH);
++	/*
++	 * If either the checksums or signature don't match, then
++	 * the cookie area isn't considered valid, in which case we
++	 * take the safe route of assuming Tx filtering is enabled.
++	 */
++	if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) {
++		hw->mac.tx_pkt_filtering = true;
++		goto out;
++	}
++
++	/* Cookie area is valid, make the final check for filtering. */
++	if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) {
++		hw->mac.tx_pkt_filtering = false;
++		goto out;
++	}
++
++out:
++	return hw->mac.tx_pkt_filtering;
++}
++
++/**
++ *  e1000_mng_write_cmd_header - Writes manageability command header
++ *  @hw: pointer to the HW structure
++ *  @hdr: pointer to the host interface command header
++ *
++ *  Writes the command header after does the checksum calculation.
++ **/
++static s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
++				  struct e1000_host_mng_command_header *hdr)
++{
++	u16 i, length = sizeof(struct e1000_host_mng_command_header);
++
++	/* Write the whole command header structure with new checksum. */
++
++	hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
++
++	length >>= 2;
++	/* Write the relevant command block into the ram area. */
++	for (i = 0; i < length; i++) {
++		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, i,
++					    *((u32 *) hdr + i));
++		e1e_flush();
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_mng_host_if_write - Write to the manageability host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface buffer
++ *  @length: size of the buffer
++ *  @offset: location in the buffer to write to
++ *  @sum: sum of the data (not checksum)
++ *
++ *  This function writes the buffer content at the offset given on the host if.
++ *  It also does alignment considerations to do the writes in most efficient
++ *  way.  Also fills up the sum of the buffer in *buffer parameter.
++ **/
++static s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer,
++				   u16 length, u16 offset, u8 *sum)
++{
++	u8 *tmp;
++	u8 *bufptr = buffer;
++	u32 data = 0;
++	u16 remaining, i, j, prev_bytes;
++
++	/* sum = only sum of the data and it is not checksum */
++
++	if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH)
++		return -E1000_ERR_PARAM;
++
++	tmp = (u8 *)&data;
++	prev_bytes = offset & 0x3;
++	offset >>= 2;
++
++	if (prev_bytes) {
++		data = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF, offset);
++		for (j = prev_bytes; j < sizeof(u32); j++) {
++			*(tmp + j) = *bufptr++;
++			*sum += *(tmp + j);
++		}
++		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset, data);
++		length -= j - prev_bytes;
++		offset++;
++	}
++
++	remaining = length & 0x3;
++	length -= remaining;
++
++	/* Calculate length in DWORDs */
++	length >>= 2;
++
++	/*
++	 * The device driver writes the relevant command block into the
++	 * ram area.
++	 */
++	for (i = 0; i < length; i++) {
++		for (j = 0; j < sizeof(u32); j++) {
++			*(tmp + j) = *bufptr++;
++			*sum += *(tmp + j);
++		}
++
++		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data);
++	}
++	if (remaining) {
++		for (j = 0; j < sizeof(u32); j++) {
++			if (j < remaining)
++				*(tmp + j) = *bufptr++;
++			else
++				*(tmp + j) = 0;
++
++			*sum += *(tmp + j);
++		}
++		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data);
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000e_mng_write_dhcp_info - Writes DHCP info to host interface
++ *  @hw: pointer to the HW structure
++ *  @buffer: pointer to the host interface
++ *  @length: size of the buffer
++ *
++ *  Writes the DHCP information to the host interface.
++ **/
++s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
++{
++	struct e1000_host_mng_command_header hdr;
++	s32 ret_val;
++	u32 hicr;
++
++	hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
++	hdr.command_length = length;
++	hdr.reserved1 = 0;
++	hdr.reserved2 = 0;
++	hdr.checksum = 0;
++
++	/* Enable the host interface */
++	ret_val = e1000_mng_enable_host_if(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Populate the host interface with the contents of "buffer". */
++	ret_val = e1000_mng_host_if_write(hw, buffer, length,
++					  sizeof(hdr), &(hdr.checksum));
++	if (ret_val)
++		return ret_val;
++
++	/* Write the manageability command header */
++	ret_val = e1000_mng_write_cmd_header(hw, &hdr);
++	if (ret_val)
++		return ret_val;
++
++	/* Tell the ARC a new command is pending. */
++	hicr = er32(HICR);
++	ew32(HICR, hicr | E1000_HICR_C);
++
++	return 0;
++}
++
++/**
++ *  e1000e_enable_mng_pass_thru - Check if management passthrough is needed
++ *  @hw: pointer to the HW structure
++ *
++ *  Verifies the hardware needs to leave interface enabled so that frames can
++ *  be directed to and from the management interface.
++ **/
++bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw)
++{
++	u32 manc;
++	u32 fwsm, factps;
++	bool ret_val = false;
++
++	manc = er32(MANC);
++
++	if (!(manc & E1000_MANC_RCV_TCO_EN))
++		goto out;
++
++	if (hw->mac.has_fwsm) {
++		fwsm = er32(FWSM);
++		factps = er32(FACTPS);
++
++		if (!(factps & E1000_FACTPS_MNGCG) &&
++		    ((fwsm & E1000_FWSM_MODE_MASK) ==
++		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) {
++			ret_val = true;
++			goto out;
++		}
++	} else if ((hw->mac.type == e1000_82574) ||
++		   (hw->mac.type == e1000_82583)) {
++		u16 data;
++
++		factps = er32(FACTPS);
++		e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data);
++
++		if (!(factps & E1000_FACTPS_MNGCG) &&
++		    ((data & E1000_NVM_INIT_CTRL2_MNGM) ==
++		     (e1000_mng_mode_pt << 13))) {
++			ret_val = true;
++			goto out;
++		}
++	} else if ((manc & E1000_MANC_SMBUS_EN) &&
++		    !(manc & E1000_MANC_ASF_EN)) {
++			ret_val = true;
++			goto out;
++	}
++
++out:
++	return ret_val;
++}
+--- linux/drivers/xenomai/net/drivers/e1000e/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/Makefile	2021-04-07 16:01:27.209634192 +0800
+@@ -0,0 +1,12 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_DRV_E1000E) += rt_e1000e.o
++
++rt_e1000e-y := \
++	82571.o \
++	80003es2lan.o \
++	ich8lan.o \
++	lib.o \
++	netdev.o \
++	param.o \
++	phy.o
+--- linux/drivers/xenomai/net/drivers/e1000e/param.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/param.c	2021-04-07 16:01:27.204634199 +0800
+@@ -0,0 +1,484 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#include <linux/netdevice.h>
++#include <linux/pci.h>
++
++#include "e1000.h"
++
++/*
++ * This is the only thing that needs to be changed to adjust the
++ * maximum number of ports that the driver can manage.
++ */
++
++#define E1000_MAX_NIC 32
++
++#define OPTION_UNSET   -1
++#define OPTION_DISABLED 0
++#define OPTION_ENABLED  1
++
++#define COPYBREAK_DEFAULT 256
++unsigned int copybreak = COPYBREAK_DEFAULT;
++module_param(copybreak, uint, 0644);
++MODULE_PARM_DESC(copybreak,
++	"Maximum size of packet that is copied to a new buffer on receive");
++
++/*
++ * All parameters are treated the same, as an integer array of values.
++ * This macro just reduces the need to repeat the same declaration code
++ * over and over (plus this helps to avoid typo bugs).
++ */
++
++#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
++#define E1000_PARAM(X, desc)					\
++	static int X[E1000_MAX_NIC+1]		\
++		= E1000_PARAM_INIT;				\
++	static unsigned int num_##X;				\
++	module_param_array_named(X, X, int, &num_##X, 0);	\
++	MODULE_PARM_DESC(X, desc);
++
++/*
++ * Transmit Interrupt Delay in units of 1.024 microseconds
++ * Tx interrupt delay needs to typically be set to something non-zero
++ *
++ * Valid Range: 0-65535
++ * 
++ * Default Value: 0 for rtnet
++ */
++E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
++#define DEFAULT_TIDV 0
++#define MAX_TXDELAY 0xFFFF
++#define MIN_TXDELAY 0
++
++/*
++ * Transmit Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ * 
++ * Default Value: 0 for rtnet
++ */
++E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
++#define DEFAULT_TADV 0
++#define MAX_TXABSDELAY 0xFFFF
++#define MIN_TXABSDELAY 0
++
++/*
++ * Receive Interrupt Delay in units of 1.024 microseconds
++ * hardware will likely hang if you set this to anything but zero.
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
++#define MAX_RXDELAY 0xFFFF
++#define MIN_RXDELAY 0
++
++/*
++ * Receive Absolute Interrupt Delay in units of 1.024 microseconds
++ *
++ * Valid Range: 0-65535
++ */
++E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
++#define MAX_RXABSDELAY 0xFFFF
++#define MIN_RXABSDELAY 0
++
++/*
++ * Interrupt Throttle Rate (interrupts/sec)
++ *
++ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
++ * 
++ * Default Value: 0 for rtnet
++ */
++E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
++#define DEFAULT_ITR 0
++#define MAX_ITR 100000
++#define MIN_ITR 100
++
++/* IntMode (Interrupt Mode)
++ *
++ * Valid Range: 0 - 2
++ *
++ * Default Value: 2 (MSI-X)
++ */
++E1000_PARAM(IntMode, "Interrupt Mode");
++#define MAX_INTMODE	2
++#define MIN_INTMODE	0
++
++/*
++ * Enable Smart Power Down of the PHY
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 0 (disabled)
++ */
++E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down");
++
++/*
++ * Enable Kumeran Lock Loss workaround
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 1 (enabled)
++ */
++E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround");
++
++/*
++ * Write Protect NVM
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 1 (enabled)
++ */
++E1000_PARAM(WriteProtectNVM, "Write-protect NVM [WARNING: disabling this can lead to corrupted NVM]");
++
++/*
++ * Enable CRC Stripping
++ *
++ * Valid Range: 0, 1
++ *
++ * Default Value: 1 (enabled)
++ */
++E1000_PARAM(CrcStripping, "Enable CRC Stripping, disable if your BMC needs " \
++                          "the CRC");
++
++struct e1000_option {
++	enum { enable_option, range_option, list_option } type;
++	const char *name;
++	const char *err;
++	int def;
++	union {
++		struct { /* range_option info */
++			int min;
++			int max;
++		} r;
++		struct { /* list_option info */
++			int nr;
++			struct e1000_opt_list { int i; char *str; } *p;
++		} l;
++	} arg;
++};
++
++static int e1000_validate_option(unsigned int *value,
++					   const struct e1000_option *opt,
++					   struct e1000_adapter *adapter)
++{
++	if (*value == OPTION_UNSET) {
++		*value = opt->def;
++		return 0;
++	}
++
++	switch (opt->type) {
++	case enable_option:
++		switch (*value) {
++		case OPTION_ENABLED:
++			e_info("%s Enabled\n", opt->name);
++			return 0;
++		case OPTION_DISABLED:
++			e_info("%s Disabled\n", opt->name);
++			return 0;
++		}
++		break;
++	case range_option:
++		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
++			e_info("%s set to %i\n", opt->name, *value);
++			return 0;
++		}
++		break;
++	case list_option: {
++		int i;
++		struct e1000_opt_list *ent;
++
++		for (i = 0; i < opt->arg.l.nr; i++) {
++			ent = &opt->arg.l.p[i];
++			if (*value == ent->i) {
++				if (ent->str[0] != '\0')
++					e_info("%s\n", ent->str);
++				return 0;
++			}
++		}
++	}
++		break;
++	default:
++		BUG();
++	}
++
++	e_info("Invalid %s value specified (%i) %s\n", opt->name, *value,
++	       opt->err);
++	*value = opt->def;
++	return -1;
++}
++
++/**
++ * e1000e_check_options - Range Checking for Command Line Parameters
++ * @adapter: board private structure
++ *
++ * This routine checks all command line parameters for valid user
++ * input.  If an invalid value is given, or if no user specified
++ * value exists, a default value is used.  The final value is stored
++ * in a variable in the adapter structure.
++ **/
++void e1000e_check_options(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	int bd = adapter->bd_number;
++
++	if (bd >= E1000_MAX_NIC) {
++		e_notice("Warning: no configuration for board #%i\n", bd);
++		e_notice("Using defaults for all values\n");
++	}
++
++	{ /* Transmit Interrupt Delay */
++		static const struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Interrupt Delay",
++			.err  = "using default of "
++				__MODULE_STRING(DEFAULT_TIDV),
++			.def  = DEFAULT_TIDV,
++			.arg  = { .r = { .min = MIN_TXDELAY,
++					 .max = MAX_TXDELAY } }
++		};
++
++		if (num_TxIntDelay > bd) {
++			adapter->tx_int_delay = TxIntDelay[bd];
++			e1000_validate_option(&adapter->tx_int_delay, &opt,
++					      adapter);
++		} else {
++			adapter->tx_int_delay = opt.def;
++		}
++	}
++	{ /* Transmit Absolute Interrupt Delay */
++		static const struct e1000_option opt = {
++			.type = range_option,
++			.name = "Transmit Absolute Interrupt Delay",
++			.err  = "using default of "
++				__MODULE_STRING(DEFAULT_TADV),
++			.def  = DEFAULT_TADV,
++			.arg  = { .r = { .min = MIN_TXABSDELAY,
++					 .max = MAX_TXABSDELAY } }
++		};
++
++		if (num_TxAbsIntDelay > bd) {
++			adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
++					      adapter);
++		} else {
++			adapter->tx_abs_int_delay = opt.def;
++		}
++	}
++	{ /* Receive Interrupt Delay */
++		static struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Interrupt Delay",
++			.err  = "using default of "
++				__MODULE_STRING(DEFAULT_RDTR),
++			.def  = DEFAULT_RDTR,
++			.arg  = { .r = { .min = MIN_RXDELAY,
++					 .max = MAX_RXDELAY } }
++		};
++
++		if (num_RxIntDelay > bd) {
++			adapter->rx_int_delay = RxIntDelay[bd];
++			e1000_validate_option(&adapter->rx_int_delay, &opt,
++					      adapter);
++		} else {
++			adapter->rx_int_delay = opt.def;
++		}
++	}
++	{ /* Receive Absolute Interrupt Delay */
++		static const struct e1000_option opt = {
++			.type = range_option,
++			.name = "Receive Absolute Interrupt Delay",
++			.err  = "using default of "
++				__MODULE_STRING(DEFAULT_RADV),
++			.def  = DEFAULT_RADV,
++			.arg  = { .r = { .min = MIN_RXABSDELAY,
++					 .max = MAX_RXABSDELAY } }
++		};
++
++		if (num_RxAbsIntDelay > bd) {
++			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
++			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
++					      adapter);
++		} else {
++			adapter->rx_abs_int_delay = opt.def;
++		}
++	}
++	{ /* Interrupt Throttling Rate */
++		static const struct e1000_option opt = {
++			.type = range_option,
++			.name = "Interrupt Throttling Rate (ints/sec)",
++			.err  = "using default of "
++				__MODULE_STRING(DEFAULT_ITR),
++			.def  = DEFAULT_ITR,
++			.arg  = { .r = { .min = MIN_ITR,
++					 .max = MAX_ITR } }
++		};
++
++		if (num_InterruptThrottleRate > bd) {
++			adapter->itr = InterruptThrottleRate[bd];
++			switch (adapter->itr) {
++			case 0:
++				e_info("%s turned off\n", opt.name);
++				break;
++			case 1:
++				e_info("%s set to dynamic mode\n", opt.name);
++				adapter->itr_setting = adapter->itr;
++				adapter->itr = 20000;
++				break;
++			case 3:
++				e_info("%s set to dynamic conservative mode\n",
++					opt.name);
++				adapter->itr_setting = adapter->itr;
++				adapter->itr = 20000;
++				break;
++			case 4:
++				e_info("%s set to simplified (2000-8000 ints) "
++				       "mode\n", opt.name);
++				adapter->itr_setting = 4;
++				break;
++			default:
++				/*
++				 * Save the setting, because the dynamic bits
++				 * change itr.
++				 */
++				if (e1000_validate_option(&adapter->itr, &opt,
++							  adapter) &&
++				    (adapter->itr == 3)) {
++					/*
++					 * In case of invalid user value,
++					 * default to conservative mode.
++					 */
++					adapter->itr_setting = adapter->itr;
++					adapter->itr = 20000;
++				} else {
++					/*
++					 * Clear the lower two bits because
++					 * they are used as control.
++					 */
++					adapter->itr_setting =
++						adapter->itr & ~3;
++				}
++				break;
++			}
++		} else {
++			adapter->itr_setting = opt.def;
++			adapter->itr = 0;
++		}
++	}
++	{ /* Interrupt Mode */
++		static struct e1000_option opt = {
++			.type = range_option,
++			.name = "Interrupt Mode",
++			.err  = "defaulting to 2 (MSI-X)",
++			.def  = E1000E_INT_MODE_MSIX,
++			.arg  = { .r = { .min = MIN_INTMODE,
++					 .max = MAX_INTMODE } }
++		};
++
++		if (num_IntMode > bd) {
++			unsigned int int_mode = IntMode[bd];
++			e1000_validate_option(&int_mode, &opt, adapter);
++			adapter->int_mode = int_mode;
++		} else {
++			adapter->int_mode = opt.def;
++		}
++	}
++	{ /* Smart Power Down */
++		static const struct e1000_option opt = {
++			.type = enable_option,
++			.name = "PHY Smart Power Down",
++			.err  = "defaulting to Disabled",
++			.def  = OPTION_DISABLED
++		};
++
++		if (num_SmartPowerDownEnable > bd) {
++			unsigned int spd = SmartPowerDownEnable[bd];
++			e1000_validate_option(&spd, &opt, adapter);
++			if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN)
++			    && spd)
++				adapter->flags |= FLAG_SMART_POWER_DOWN;
++		}
++	}
++	{ /* CRC Stripping */
++		static const struct e1000_option opt = {
++			.type = enable_option,
++			.name = "CRC Stripping",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++		if (num_CrcStripping > bd) {
++			unsigned int crc_stripping = CrcStripping[bd];
++			e1000_validate_option(&crc_stripping, &opt, adapter);
++			if (crc_stripping == OPTION_ENABLED)
++				adapter->flags2 |= FLAG2_CRC_STRIPPING;
++		} else {
++			adapter->flags2 |= FLAG2_CRC_STRIPPING;
++		}
++	}
++	{ /* Kumeran Lock Loss Workaround */
++		static const struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Kumeran Lock Loss Workaround",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++		if (num_KumeranLockLoss > bd) {
++			unsigned int kmrn_lock_loss = KumeranLockLoss[bd];
++			e1000_validate_option(&kmrn_lock_loss, &opt, adapter);
++			if (hw->mac.type == e1000_ich8lan)
++				e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw,
++								kmrn_lock_loss);
++		} else {
++			if (hw->mac.type == e1000_ich8lan)
++				e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw,
++								       opt.def);
++		}
++	}
++	{ /* Write-protect NVM */
++		static const struct e1000_option opt = {
++			.type = enable_option,
++			.name = "Write-protect NVM",
++			.err  = "defaulting to Enabled",
++			.def  = OPTION_ENABLED
++		};
++
++		if (adapter->flags & FLAG_IS_ICH) {
++			if (num_WriteProtectNVM > bd) {
++				unsigned int write_protect_nvm = WriteProtectNVM[bd];
++				e1000_validate_option(&write_protect_nvm, &opt,
++						      adapter);
++				if (write_protect_nvm)
++					adapter->flags |= FLAG_READ_ONLY_NVM;
++			} else {
++				if (opt.def)
++					adapter->flags |= FLAG_READ_ONLY_NVM;
++			}
++		}
++	}
++}
+--- linux/drivers/xenomai/net/drivers/e1000e/ich8lan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/ich8lan.c	2021-04-07 16:01:27.199634206 +0800
+@@ -0,0 +1,4446 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/*
++ * 82562G 10/100 Network Connection
++ * 82562G-2 10/100 Network Connection
++ * 82562GT 10/100 Network Connection
++ * 82562GT-2 10/100 Network Connection
++ * 82562V 10/100 Network Connection
++ * 82562V-2 10/100 Network Connection
++ * 82566DC-2 Gigabit Network Connection
++ * 82566DC Gigabit Network Connection
++ * 82566DM-2 Gigabit Network Connection
++ * 82566DM Gigabit Network Connection
++ * 82566MC Gigabit Network Connection
++ * 82566MM Gigabit Network Connection
++ * 82567LM Gigabit Network Connection
++ * 82567LF Gigabit Network Connection
++ * 82567V Gigabit Network Connection
++ * 82567LM-2 Gigabit Network Connection
++ * 82567LF-2 Gigabit Network Connection
++ * 82567V-2 Gigabit Network Connection
++ * 82567LF-3 Gigabit Network Connection
++ * 82567LM-3 Gigabit Network Connection
++ * 82567LM-4 Gigabit Network Connection
++ * 82577LM Gigabit Network Connection
++ * 82577LC Gigabit Network Connection
++ * 82578DM Gigabit Network Connection
++ * 82578DC Gigabit Network Connection
++ * 82579LM Gigabit Network Connection
++ * 82579V Gigabit Network Connection
++ */
++
++#include "e1000.h"
++
++#define ICH_FLASH_GFPREG		0x0000
++#define ICH_FLASH_HSFSTS		0x0004
++#define ICH_FLASH_HSFCTL		0x0006
++#define ICH_FLASH_FADDR			0x0008
++#define ICH_FLASH_FDATA0		0x0010
++#define ICH_FLASH_PR0			0x0074
++
++#define ICH_FLASH_READ_COMMAND_TIMEOUT	500
++#define ICH_FLASH_WRITE_COMMAND_TIMEOUT	500
++#define ICH_FLASH_ERASE_COMMAND_TIMEOUT	3000000
++#define ICH_FLASH_LINEAR_ADDR_MASK	0x00FFFFFF
++#define ICH_FLASH_CYCLE_REPEAT_COUNT	10
++
++#define ICH_CYCLE_READ			0
++#define ICH_CYCLE_WRITE			2
++#define ICH_CYCLE_ERASE			3
++
++#define FLASH_GFPREG_BASE_MASK		0x1FFF
++#define FLASH_SECTOR_ADDR_SHIFT		12
++
++#define ICH_FLASH_SEG_SIZE_256		256
++#define ICH_FLASH_SEG_SIZE_4K		4096
++#define ICH_FLASH_SEG_SIZE_8K		8192
++#define ICH_FLASH_SEG_SIZE_64K		65536
++
++
++#define E1000_ICH_FWSM_RSPCIPHY	0x00000040 /* Reset PHY on PCI Reset */
++/* FW established a valid mode */
++#define E1000_ICH_FWSM_FW_VALID		0x00008000
++
++#define E1000_ICH_MNG_IAMT_MODE		0x2
++
++#define ID_LED_DEFAULT_ICH8LAN  ((ID_LED_DEF1_DEF2 << 12) | \
++				 (ID_LED_DEF1_OFF2 <<  8) | \
++				 (ID_LED_DEF1_ON2  <<  4) | \
++				 (ID_LED_DEF1_DEF2))
++
++#define E1000_ICH_NVM_SIG_WORD		0x13
++#define E1000_ICH_NVM_SIG_MASK		0xC000
++#define E1000_ICH_NVM_VALID_SIG_MASK    0xC0
++#define E1000_ICH_NVM_SIG_VALUE         0x80
++
++#define E1000_ICH8_LAN_INIT_TIMEOUT	1500
++
++#define E1000_FEXTNVM_SW_CONFIG		1
++#define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M :/ */
++
++#define E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK    0x0C000000
++#define E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC  0x08000000
++
++#define E1000_FEXTNVM4_BEACON_DURATION_MASK    0x7
++#define E1000_FEXTNVM4_BEACON_DURATION_8USEC   0x7
++#define E1000_FEXTNVM4_BEACON_DURATION_16USEC  0x3
++
++#define PCIE_ICH8_SNOOP_ALL		PCIE_NO_SNOOP_ALL
++
++#define E1000_ICH_RAR_ENTRIES		7
++#define E1000_PCH2_RAR_ENTRIES		5 /* RAR[0], SHRA[0-3] */
++#define E1000_PCH_LPT_RAR_ENTRIES	12 /* RAR[0], SHRA[0-10] */
++
++#define PHY_PAGE_SHIFT 5
++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \
++			   ((reg) & MAX_PHY_REG_ADDRESS))
++#define IGP3_KMRN_DIAG  PHY_REG(770, 19) /* KMRN Diagnostic */
++#define IGP3_VR_CTRL    PHY_REG(776, 18) /* Voltage Regulator Control */
++
++#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS	0x0002
++#define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK 0x0300
++#define IGP3_VR_CTRL_MODE_SHUTDOWN	0x0200
++
++#define HV_LED_CONFIG		PHY_REG(768, 30) /* LED Configuration */
++
++#define SW_FLAG_TIMEOUT    1000 /* SW Semaphore flag timeout in milliseconds */
++
++/* SMBus Control Phy Register */
++#define CV_SMB_CTRL		PHY_REG(769, 23)
++#define CV_SMB_CTRL_FORCE_SMBUS	0x0001
++
++/* SMBus Address Phy Register */
++#define HV_SMB_ADDR            PHY_REG(768, 26)
++#define HV_SMB_ADDR_MASK       0x007F
++#define HV_SMB_ADDR_PEC_EN     0x0200
++#define HV_SMB_ADDR_VALID      0x0080
++#define HV_SMB_ADDR_FREQ_MASK           0x1100
++#define HV_SMB_ADDR_FREQ_LOW_SHIFT      8
++#define HV_SMB_ADDR_FREQ_HIGH_SHIFT     12
++
++/* PHY Power Management Control */
++#define HV_PM_CTRL		PHY_REG(770, 17)
++#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA	0x100
++
++/* PHY Low Power Idle Control */
++#define I82579_LPI_CTRL				PHY_REG(772, 20)
++#define I82579_LPI_CTRL_ENABLE_MASK		0x6000
++#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT	0x80
++
++/* EMI Registers */
++#define I82579_EMI_ADDR         0x10
++#define I82579_EMI_DATA         0x11
++#define I82579_LPI_UPDATE_TIMER 0x4805	/* in 40ns units + 40 ns base value */
++
++#define I217_EEE_ADVERTISEMENT  0x8001	/* IEEE MMD Register 7.60 */
++#define I217_EEE_LP_ABILITY     0x8002	/* IEEE MMD Register 7.61 */
++#define I217_EEE_100_SUPPORTED  (1 << 1)	/* 100BaseTx EEE supported */
++
++/* Intel Rapid Start Technology Support */
++#define I217_PROXY_CTRL                 PHY_REG(BM_WUC_PAGE, 70)
++#define I217_PROXY_CTRL_AUTO_DISABLE    0x0080
++#define I217_SxCTRL                     PHY_REG(BM_PORT_CTRL_PAGE, 28)
++#define I217_SxCTRL_MASK                0x1000
++#define I217_CGFREG                     PHY_REG(772, 29)
++#define I217_CGFREG_MASK                0x0002
++#define I217_MEMPWR                     PHY_REG(772, 26)
++#define I217_MEMPWR_MASK                0x0010
++
++/* Strapping Option Register - RO */
++#define E1000_STRAP                     0x0000C
++#define E1000_STRAP_SMBUS_ADDRESS_MASK  0x00FE0000
++#define E1000_STRAP_SMBUS_ADDRESS_SHIFT 17
++#define E1000_STRAP_SMT_FREQ_MASK       0x00003000
++#define E1000_STRAP_SMT_FREQ_SHIFT      12
++
++/* OEM Bits Phy Register */
++#define HV_OEM_BITS            PHY_REG(768, 25)
++#define HV_OEM_BITS_LPLU       0x0004 /* Low Power Link Up */
++#define HV_OEM_BITS_GBE_DIS    0x0040 /* Gigabit Disable */
++#define HV_OEM_BITS_RESTART_AN 0x0400 /* Restart Auto-negotiation */
++
++#define E1000_NVM_K1_CONFIG 0x1B /* NVM K1 Config Word */
++#define E1000_NVM_K1_ENABLE 0x1  /* NVM Enable K1 bit */
++
++/* KMRN Mode Control */
++#define HV_KMRN_MODE_CTRL      PHY_REG(769, 16)
++#define HV_KMRN_MDIO_SLOW      0x0400
++
++/* KMRN FIFO Control and Status */
++#define HV_KMRN_FIFO_CTRLSTA                  PHY_REG(770, 16)
++#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK    0x7000
++#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT   12
++
++/* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
++/* Offset 04h HSFSTS */
++union ich8_hws_flash_status {
++	struct ich8_hsfsts {
++		u16 flcdone    :1; /* bit 0 Flash Cycle Done */
++		u16 flcerr     :1; /* bit 1 Flash Cycle Error */
++		u16 dael       :1; /* bit 2 Direct Access error Log */
++		u16 berasesz   :2; /* bit 4:3 Sector Erase Size */
++		u16 flcinprog  :1; /* bit 5 flash cycle in Progress */
++		u16 reserved1  :2; /* bit 13:6 Reserved */
++		u16 reserved2  :6; /* bit 13:6 Reserved */
++		u16 fldesvalid :1; /* bit 14 Flash Descriptor Valid */
++		u16 flockdn    :1; /* bit 15 Flash Config Lock-Down */
++	} hsf_status;
++	u16 regval;
++};
++
++/* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */
++/* Offset 06h FLCTL */
++union ich8_hws_flash_ctrl {
++	struct ich8_hsflctl {
++		u16 flcgo      :1;   /* 0 Flash Cycle Go */
++		u16 flcycle    :2;   /* 2:1 Flash Cycle */
++		u16 reserved   :5;   /* 7:3 Reserved  */
++		u16 fldbcount  :2;   /* 9:8 Flash Data Byte Count */
++		u16 flockdn    :6;   /* 15:10 Reserved */
++	} hsf_ctrl;
++	u16 regval;
++};
++
++/* ICH Flash Region Access Permissions */
++union ich8_hws_flash_regacc {
++	struct ich8_flracc {
++		u32 grra      :8; /* 0:7 GbE region Read Access */
++		u32 grwa      :8; /* 8:15 GbE region Write Access */
++		u32 gmrag     :8; /* 23:16 GbE Master Read Access Grant */
++		u32 gmwag     :8; /* 31:24 GbE Master Write Access Grant */
++	} hsf_flregacc;
++	u16 regval;
++};
++
++/* ICH Flash Protected Region */
++union ich8_flash_protected_range {
++	struct ich8_pr {
++		u32 base:13;     /* 0:12 Protected Range Base */
++		u32 reserved1:2; /* 13:14 Reserved */
++		u32 rpe:1;       /* 15 Read Protection Enable */
++		u32 limit:13;    /* 16:28 Protected Range Limit */
++		u32 reserved2:2; /* 29:30 Reserved */
++		u32 wpe:1;       /* 31 Write Protection Enable */
++	} range;
++	u32 regval;
++};
++
++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw);
++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw);
++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank);
++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
++						u32 offset, u8 byte);
++static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u8 *data);
++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u16 *data);
++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u8 size, u16 *data);
++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw);
++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw);
++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw);
++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw);
++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw);
++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw);
++static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw);
++static s32 e1000_setup_led_pchlan(struct e1000_hw *hw);
++static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw);
++static s32 e1000_led_on_pchlan(struct e1000_hw *hw);
++static s32 e1000_led_off_pchlan(struct e1000_hw *hw);
++static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active);
++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw);
++static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw);
++static s32  e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link);
++static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw);
++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
++static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw);
++static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index);
++static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
++static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate);
++
++static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg)
++{
++	return readw(hw->flash_address + reg);
++}
++
++static inline u32 __er32flash(struct e1000_hw *hw, unsigned long reg)
++{
++	return readl(hw->flash_address + reg);
++}
++
++static inline void __ew16flash(struct e1000_hw *hw, unsigned long reg, u16 val)
++{
++	writew(val, hw->flash_address + reg);
++}
++
++static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
++{
++	writel(val, hw->flash_address + reg);
++}
++
++#define er16flash(reg)		__er16flash(hw, (reg))
++#define er32flash(reg)		__er32flash(hw, (reg))
++#define ew16flash(reg,val)	__ew16flash(hw, (reg), (val))
++#define ew32flash(reg,val)	__ew32flash(hw, (reg), (val))
++
++static void e1000_toggle_lanphypc_value_ich8lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE;
++	ctrl &= ~E1000_CTRL_LANPHYPC_VALUE;
++	ew32(CTRL, ctrl);
++	e1e_flush();
++	udelay(10);
++	ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
++	ew32(CTRL, ctrl);
++}
++
++/**
++ *  e1000_init_phy_params_pchlan - Initialize PHY function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific PHY parameters and function pointers.
++ **/
++static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 fwsm;
++	s32 ret_val = 0;
++
++	phy->addr                     = 1;
++	phy->reset_delay_us           = 100;
++
++	phy->ops.set_page             = e1000_set_page_igp;
++	phy->ops.read_reg             = e1000_read_phy_reg_hv;
++	phy->ops.read_reg_locked      = e1000_read_phy_reg_hv_locked;
++	phy->ops.read_reg_page        = e1000_read_phy_reg_page_hv;
++	phy->ops.set_d0_lplu_state    = e1000_set_lplu_state_pchlan;
++	phy->ops.set_d3_lplu_state    = e1000_set_lplu_state_pchlan;
++	phy->ops.write_reg            = e1000_write_phy_reg_hv;
++	phy->ops.write_reg_locked     = e1000_write_phy_reg_hv_locked;
++	phy->ops.write_reg_page       = e1000_write_phy_reg_page_hv;
++	phy->ops.power_up             = e1000_power_up_phy_copper;
++	phy->ops.power_down           = e1000_power_down_phy_copper_ich8lan;
++	phy->autoneg_mask             = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++
++	/*
++	 * The MAC-PHY interconnect may still be in SMBus mode
++	 * after Sx->S0.  If the manageability engine (ME) is
++	 * disabled, then toggle the LANPHYPC Value bit to force
++	 * the interconnect to PCIe mode.
++	 */
++	fwsm = er32(FWSM);
++	if (!(fwsm & E1000_ICH_FWSM_FW_VALID) && !e1000_check_reset_block(hw)) {
++		e1000_toggle_lanphypc_value_ich8lan(hw);
++		msleep(50);
++
++		/*
++		 * Gate automatic PHY configuration by hardware on
++		 * non-managed 82579
++		 */
++		if (hw->mac.type == e1000_pch2lan)
++			e1000_gate_hw_phy_config_ich8lan(hw, true);
++	}
++
++	/*
++	 * Reset the PHY before any access to it.  Doing so, ensures that
++	 * the PHY is in a known good state before we read/write PHY registers.
++	 * The generic reset is sufficient here, because we haven't determined
++	 * the PHY type yet.
++	 */
++	ret_val = e1000e_phy_hw_reset_generic(hw);
++	if (ret_val)
++		goto out;
++
++	/* Ungate automatic PHY configuration on non-managed 82579 */
++	if ((hw->mac.type == e1000_pch2lan) &&
++	    !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
++		usleep_range(10000, 20000);
++		e1000_gate_hw_phy_config_ich8lan(hw, false);
++	}
++
++	phy->id = e1000_phy_unknown;
++	switch (hw->mac.type) {
++	default:
++		ret_val = e1000e_get_phy_id(hw);
++		if (ret_val)
++			goto out;
++		if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK))
++			break;
++		/* fall-through */
++	case e1000_pch2lan:
++	case e1000_pch_lpt:
++		/*
++		 * In case the PHY needs to be in mdio slow mode,
++		 * set slow mode and try to get the PHY id again.
++		 */
++		ret_val = e1000_set_mdio_slow_mode_hv(hw);
++		if (ret_val)
++			goto out;
++		ret_val = e1000e_get_phy_id(hw);
++		if (ret_val)
++			goto out;
++		break;
++	}
++	phy->type = e1000e_get_phy_type_from_id(phy->id);
++
++	switch (phy->type) {
++	case e1000_phy_82577:
++	case e1000_phy_82579:
++	case e1000_phy_i217:
++		phy->ops.check_polarity = e1000_check_polarity_82577;
++		phy->ops.force_speed_duplex =
++		    e1000_phy_force_speed_duplex_82577;
++		phy->ops.get_cable_length = e1000_get_cable_length_82577;
++		phy->ops.get_info = e1000_get_phy_info_82577;
++		phy->ops.commit = e1000e_phy_sw_reset;
++		break;
++	case e1000_phy_82578:
++		phy->ops.check_polarity = e1000_check_polarity_m88;
++		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88;
++		phy->ops.get_cable_length = e1000e_get_cable_length_m88;
++		phy->ops.get_info = e1000e_get_phy_info_m88;
++		break;
++	default:
++		ret_val = -E1000_ERR_PHY;
++		break;
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_phy_params_ich8lan - Initialize PHY function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific PHY parameters and function pointers.
++ **/
++static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u16 i = 0;
++
++	phy->addr			= 1;
++	phy->reset_delay_us		= 100;
++
++	phy->ops.power_up               = e1000_power_up_phy_copper;
++	phy->ops.power_down             = e1000_power_down_phy_copper_ich8lan;
++
++	/*
++	 * We may need to do this twice - once for IGP and if that fails,
++	 * we'll set BM func pointers and try again
++	 */
++	ret_val = e1000e_determine_phy_address(hw);
++	if (ret_val) {
++		phy->ops.write_reg = e1000e_write_phy_reg_bm;
++		phy->ops.read_reg  = e1000e_read_phy_reg_bm;
++		ret_val = e1000e_determine_phy_address(hw);
++		if (ret_val) {
++			e_dbg("Cannot determine PHY addr. Erroring out\n");
++			return ret_val;
++		}
++	}
++
++	phy->id = 0;
++	while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
++	       (i++ < 100)) {
++		usleep_range(1000, 2000);
++		ret_val = e1000e_get_phy_id(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Verify phy id */
++	switch (phy->id) {
++	case IGP03E1000_E_PHY_ID:
++		phy->type = e1000_phy_igp_3;
++		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++		phy->ops.read_reg_locked = e1000e_read_phy_reg_igp_locked;
++		phy->ops.write_reg_locked = e1000e_write_phy_reg_igp_locked;
++		phy->ops.get_info = e1000e_get_phy_info_igp;
++		phy->ops.check_polarity = e1000_check_polarity_igp;
++		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_igp;
++		break;
++	case IFE_E_PHY_ID:
++	case IFE_PLUS_E_PHY_ID:
++	case IFE_C_E_PHY_ID:
++		phy->type = e1000_phy_ife;
++		phy->autoneg_mask = E1000_ALL_NOT_GIG;
++		phy->ops.get_info = e1000_get_phy_info_ife;
++		phy->ops.check_polarity = e1000_check_polarity_ife;
++		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_ife;
++		break;
++	case BME1000_E_PHY_ID:
++		phy->type = e1000_phy_bm;
++		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++		phy->ops.read_reg = e1000e_read_phy_reg_bm;
++		phy->ops.write_reg = e1000e_write_phy_reg_bm;
++		phy->ops.commit = e1000e_phy_sw_reset;
++		phy->ops.get_info = e1000e_get_phy_info_m88;
++		phy->ops.check_polarity = e1000_check_polarity_m88;
++		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88;
++		break;
++	default:
++		return -E1000_ERR_PHY;
++		break;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_init_nvm_params_ich8lan - Initialize NVM function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific NVM parameters and function
++ *  pointers.
++ **/
++static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u32 gfpreg, sector_base_addr, sector_end_addr;
++	u16 i;
++
++	/* Can't read flash registers if the register set isn't mapped. */
++	if (!hw->flash_address) {
++		e_dbg("ERROR: Flash registers not mapped\n");
++		return -E1000_ERR_CONFIG;
++	}
++
++	nvm->type = e1000_nvm_flash_sw;
++
++	gfpreg = er32flash(ICH_FLASH_GFPREG);
++
++	/*
++	 * sector_X_addr is a "sector"-aligned address (4096 bytes)
++	 * Add 1 to sector_end_addr since this sector is included in
++	 * the overall size.
++	 */
++	sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
++	sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
++
++	/* flash_base_addr is byte-aligned */
++	nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT;
++
++	/*
++	 * find total size of the NVM, then cut in half since the total
++	 * size represents two separate NVM banks.
++	 */
++	nvm->flash_bank_size = (sector_end_addr - sector_base_addr)
++				<< FLASH_SECTOR_ADDR_SHIFT;
++	nvm->flash_bank_size /= 2;
++	/* Adjust to word count */
++	nvm->flash_bank_size /= sizeof(u16);
++
++	nvm->word_size = E1000_ICH8_SHADOW_RAM_WORDS;
++
++	/* Clear shadow ram */
++	for (i = 0; i < nvm->word_size; i++) {
++		dev_spec->shadow_ram[i].modified = false;
++		dev_spec->shadow_ram[i].value    = 0xFFFF;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_init_mac_params_ich8lan - Initialize MAC function pointers
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize family-specific MAC parameters and function
++ *  pointers.
++ **/
++static s32 e1000_init_mac_params_ich8lan(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_mac_info *mac = &hw->mac;
++
++	/* Set media type function pointer */
++	hw->phy.media_type = e1000_media_type_copper;
++
++	/* Set mta register count */
++	mac->mta_reg_count = 32;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_ICH_RAR_ENTRIES;
++	if (mac->type == e1000_ich8lan)
++		mac->rar_entry_count--;
++	/* FWSM register */
++	mac->has_fwsm = true;
++	/* ARC subsystem not supported */
++	mac->arc_subsystem_valid = false;
++	/* Adaptive IFS supported */
++	mac->adaptive_ifs = true;
++
++	/* LED operations */
++	switch (mac->type) {
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++	case e1000_ich10lan:
++		/* check management mode */
++		mac->ops.check_mng_mode = e1000_check_mng_mode_ich8lan;
++		/* ID LED init */
++		mac->ops.id_led_init = e1000e_id_led_init;
++		/* blink LED */
++		mac->ops.blink_led = e1000e_blink_led_generic;
++		/* setup LED */
++		mac->ops.setup_led = e1000e_setup_led_generic;
++		/* cleanup LED */
++		mac->ops.cleanup_led = e1000_cleanup_led_ich8lan;
++		/* turn on/off LED */
++		mac->ops.led_on = e1000_led_on_ich8lan;
++		mac->ops.led_off = e1000_led_off_ich8lan;
++		break;
++	case e1000_pch_lpt:
++	case e1000_pchlan:
++	case e1000_pch2lan:
++		/* check management mode */
++		mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
++		/* ID LED init */
++		mac->ops.id_led_init = e1000_id_led_init_pchlan;
++		/* setup LED */
++		mac->ops.setup_led = e1000_setup_led_pchlan;
++		/* cleanup LED */
++		mac->ops.cleanup_led = e1000_cleanup_led_pchlan;
++		/* turn on/off LED */
++		mac->ops.led_on = e1000_led_on_pchlan;
++		mac->ops.led_off = e1000_led_off_pchlan;
++		break;
++	default:
++		break;
++	}
++
++	if (mac->type == e1000_pch_lpt) {
++		mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES;
++		mac->ops.rar_set = e1000_rar_set_pch_lpt;
++	}
++
++	/* Enable PCS Lock-loss workaround for ICH8 */
++	if (mac->type == e1000_ich8lan)
++		e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true);
++
++	/* Gate automatic PHY configuration by hardware on managed
++	 * 82579 and i217
++	 */
++	if ((mac->type == e1000_pch2lan || mac->type == e1000_pch_lpt) &&
++	    (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
++		e1000_gate_hw_phy_config_ich8lan(hw, true);
++
++	return 0;
++}
++
++/**
++ *  e1000_set_eee_pchlan - Enable/disable EEE support
++ *  @hw: pointer to the HW structure
++ *
++ *  Enable/disable EEE based on setting in dev_spec structure.  The bits in
++ *  the LPI Control register will remain set only if/when link is up.
++ **/
++static s32 e1000_set_eee_pchlan(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	s32 ret_val = 0;
++	u16 phy_reg;
++
++	if ((hw->phy.type != e1000_phy_82579) &&
++	    (hw->phy.type != e1000_phy_i217))
++		return ret_val;
++
++	ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
++	if (ret_val)
++		return ret_val;
++
++	if (dev_spec->eee_disable)
++		phy_reg &= ~I82579_LPI_CTRL_ENABLE_MASK;
++	else
++		phy_reg |= I82579_LPI_CTRL_ENABLE_MASK;
++
++	ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
++
++	if (ret_val)
++		return ret_val;
++
++	if ((hw->phy.type == e1000_phy_i217) && !dev_spec->eee_disable) {
++		/* Save off link partner's EEE ability */
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			return ret_val;
++		ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR,
++					  I217_EEE_LP_ABILITY);
++		if (ret_val)
++			goto release;
++		e1e_rphy_locked(hw, I82579_EMI_DATA, &dev_spec->eee_lp_ability);
++
++		/* EEE is not supported in 100Half, so ignore partner's EEE
++		 * in 100 ability if full-duplex is not advertised.
++		 */
++		e1e_rphy_locked(hw, PHY_LP_ABILITY, &phy_reg);
++		if (!(phy_reg & NWAY_LPAR_100TX_FD_CAPS))
++			dev_spec->eee_lp_ability &= ~I217_EEE_100_SUPPORTED;
++release:
++		hw->phy.ops.release(hw);
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_check_for_copper_link_ich8lan - Check for link (Copper)
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks to see of the link status of the hardware has changed.  If a
++ *  change in link status has been detected, then we read the PHY registers
++ *  to get the current speed/duplex if link exists.
++ **/
++static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	bool link;
++	u16 phy_reg;
++
++	/*
++	 * We only want to go out to the PHY registers to see if Auto-Neg
++	 * has completed and/or if our link status has changed.  The
++	 * get_link_status flag is set upon receiving a Link Status
++	 * Change or Rx Sequence Error interrupt.
++	 */
++	if (!mac->get_link_status) {
++		ret_val = 0;
++		goto out;
++	}
++
++	/*
++	 * First we want to see if the MII Status Register reports
++	 * link.  If so, then we want to get the current speed/duplex
++	 * of the PHY.
++	 */
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (ret_val)
++		goto out;
++
++	if (hw->mac.type == e1000_pchlan) {
++		ret_val = e1000_k1_gig_workaround_hv(hw, link);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Clear link partner's EEE ability */
++	hw->dev_spec.ich8lan.eee_lp_ability = 0;
++
++	if (!link)
++		goto out; /* No link detected */
++
++	mac->get_link_status = false;
++
++	switch (hw->mac.type) {
++	case e1000_pch2lan:
++		ret_val = e1000_k1_workaround_lv(hw);
++		if (ret_val)
++			goto out;
++		/* fall-thru */
++	case e1000_pchlan:
++		if (hw->phy.type == e1000_phy_82578) {
++			ret_val = e1000_link_stall_workaround_hv(hw);
++			if (ret_val)
++				goto out;
++		}
++
++		/*
++		 * Workaround for PCHx parts in half-duplex:
++		 * Set the number of preambles removed from the packet
++		 * when it is passed from the PHY to the MAC to prevent
++		 * the MAC from misinterpreting the packet type.
++		 */
++		e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
++		phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
++
++		if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
++			phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
++
++		e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
++		break;
++	default:
++		break;
++	}
++
++	/*
++	 * Check if there was DownShift, must be checked
++	 * immediately after link-up
++	 */
++	e1000e_check_downshift(hw);
++
++	/* Enable/Disable EEE after link up */
++	ret_val = e1000_set_eee_pchlan(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * If we are forcing speed/duplex, then we simply return since
++	 * we have already determined whether we have link or not.
++	 */
++	if (!mac->autoneg) {
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	/*
++	 * Auto-Neg is enabled.  Auto Speed Detection takes care
++	 * of MAC speed/duplex configuration.  So we only need to
++	 * configure Collision Distance in the MAC.
++	 */
++	e1000e_config_collision_dist(hw);
++
++	/*
++	 * Configure Flow Control now that Auto-Neg has completed.
++	 * First, we need to restore the desired flow control
++	 * settings because we may have had to re-autoneg with a
++	 * different link partner.
++	 */
++	ret_val = e1000e_config_fc_after_link_up(hw);
++	if (ret_val)
++		e_dbg("Error configuring flow control\n");
++
++out:
++	return ret_val;
++}
++
++static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	s32 rc;
++
++	rc = e1000_init_mac_params_ich8lan(adapter);
++	if (rc)
++		return rc;
++
++	rc = e1000_init_nvm_params_ich8lan(hw);
++	if (rc)
++		return rc;
++
++	switch (hw->mac.type) {
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++	case e1000_ich10lan:
++		rc = e1000_init_phy_params_ich8lan(hw);
++		break;
++	case e1000_pchlan:
++	case e1000_pch2lan:
++	case e1000_pch_lpt:
++		rc = e1000_init_phy_params_pchlan(hw);
++		break;
++	default:
++		break;
++	}
++	if (rc)
++		return rc;
++
++	/*
++	 * Disable Jumbo Frame support on parts with Intel 10/100 PHY or
++	 * on parts with MACsec enabled in NVM (reflected in CTRL_EXT).
++	 */
++	if ((adapter->hw.phy.type == e1000_phy_ife) ||
++	    ((adapter->hw.mac.type >= e1000_pch2lan) &&
++	     (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) {
++		adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES;
++		adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN;
++
++		hw->mac.ops.blink_led = NULL;
++	}
++
++	if ((adapter->hw.mac.type == e1000_ich8lan) &&
++	    (adapter->hw.phy.type != e1000_phy_ife))
++		adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
++
++	/* Enable workaround for 82579 w/ ME enabled */
++	if ((adapter->hw.mac.type == e1000_pch2lan) &&
++	    (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
++		adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
++
++	/* Disable EEE by default until IEEE802.3az spec is finalized */
++	if (adapter->flags2 & FLAG2_HAS_EEE)
++		adapter->hw.dev_spec.ich8lan.eee_disable = true;
++
++	return 0;
++}
++
++static DEFINE_MUTEX(nvm_mutex);
++
++/**
++ *  e1000_acquire_nvm_ich8lan - Acquire NVM mutex
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquires the mutex for performing NVM operations.
++ **/
++static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw)
++{
++	mutex_lock(&nvm_mutex);
++
++	return 0;
++}
++
++/**
++ *  e1000_release_nvm_ich8lan - Release NVM mutex
++ *  @hw: pointer to the HW structure
++ *
++ *  Releases the mutex used while performing NVM operations.
++ **/
++static void e1000_release_nvm_ich8lan(struct e1000_hw *hw)
++{
++	mutex_unlock(&nvm_mutex);
++}
++
++/**
++ *  e1000_acquire_swflag_ich8lan - Acquire software control flag
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquires the software control flag for performing PHY and select
++ *  MAC CSR accesses.
++ **/
++static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT;
++	s32 ret_val = 0;
++
++	if (test_and_set_bit(__E1000_ACCESS_SHARED_RESOURCE,
++			     &hw->adapter->state)) {
++		WARN(1, "e1000e: %s: contention for Phy access\n",
++		     hw->adapter->netdev->name);
++		return -E1000_ERR_PHY;
++	}
++
++	while (timeout) {
++		extcnf_ctrl = er32(EXTCNF_CTRL);
++		if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG))
++			break;
++
++		mdelay(1);
++		timeout--;
++	}
++
++	if (!timeout) {
++		e_dbg("SW has already locked the resource.\n");
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++	timeout = SW_FLAG_TIMEOUT;
++
++	extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
++	ew32(EXTCNF_CTRL, extcnf_ctrl);
++
++	while (timeout) {
++		extcnf_ctrl = er32(EXTCNF_CTRL);
++		if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
++			break;
++
++		mdelay(1);
++		timeout--;
++	}
++
++	if (!timeout) {
++		e_dbg("Failed to acquire the semaphore, FW or HW has it: "
++		      "FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n",
++		      er32(FWSM), extcnf_ctrl);
++		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
++		ew32(EXTCNF_CTRL, extcnf_ctrl);
++		ret_val = -E1000_ERR_CONFIG;
++		goto out;
++	}
++
++out:
++	if (ret_val)
++		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_release_swflag_ich8lan - Release software control flag
++ *  @hw: pointer to the HW structure
++ *
++ *  Releases the software control flag for performing PHY and select
++ *  MAC CSR accesses.
++ **/
++static void e1000_release_swflag_ich8lan(struct e1000_hw *hw)
++{
++	u32 extcnf_ctrl;
++
++	extcnf_ctrl = er32(EXTCNF_CTRL);
++
++	if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) {
++		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
++		ew32(EXTCNF_CTRL, extcnf_ctrl);
++	} else {
++		e_dbg("Semaphore unexpectedly released by sw/fw/hw\n");
++	}
++
++	clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
++}
++
++/**
++ *  e1000_check_mng_mode_ich8lan - Checks management mode
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks if the adapter has any manageability enabled.
++ *  This is a function pointer entry point only called by read/write
++ *  routines for the PHY and NVM parts.
++ **/
++static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	fwsm = er32(FWSM);
++	return (fwsm & E1000_ICH_FWSM_FW_VALID) &&
++	       ((fwsm & E1000_FWSM_MODE_MASK) ==
++		(E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
++}
++
++/**
++ *  e1000_check_mng_mode_pchlan - Checks management mode
++ *  @hw: pointer to the HW structure
++ *
++ *  This checks if the adapter has iAMT enabled.
++ *  This is a function pointer entry point only called by read/write
++ *  routines for the PHY and NVM parts.
++ **/
++static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	fwsm = er32(FWSM);
++	return (fwsm & E1000_ICH_FWSM_FW_VALID) &&
++	       (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
++}
++
++/**
++ *  e1000_rar_set_pch_lpt - Set receive address registers
++ *  @hw: pointer to the HW structure
++ *  @addr: pointer to the receive address
++ *  @index: receive address array register
++ *
++ *  Sets the receive address register array at index to the address passed
++ *  in by addr. For LPT, RAR[0] is the base address register that is to
++ *  contain the MAC address. SHRA[0-10] are the shared receive address
++ *  registers that are shared between the Host and manageability engine (ME).
++ **/
++static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index)
++{
++	u32 rar_low, rar_high;
++	u32 wlock_mac;
++
++	/* HW expects these in little endian so we reverse the byte order
++	 * from network order (big endian) to little endian
++	 */
++	rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) |
++		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
++
++	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
++
++	/* If MAC address zero, no need to set the AV bit */
++	if (rar_low || rar_high)
++		rar_high |= E1000_RAH_AV;
++
++	if (index == 0) {
++		ew32(RAL(index), rar_low);
++		e1e_flush();
++		ew32(RAH(index), rar_high);
++		e1e_flush();
++		return;
++	}
++
++	/* The manageability engine (ME) can lock certain SHRAR registers that
++	 * it is using - those registers are unavailable for use.
++	 */
++	if (index < hw->mac.rar_entry_count) {
++		wlock_mac = er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK;
++		wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT;
++
++		/* Check if all SHRAR registers are locked */
++		if (wlock_mac == 1)
++			goto out;
++
++		if ((wlock_mac == 0) || (index <= wlock_mac)) {
++			s32 ret_val;
++
++			ret_val = e1000_acquire_swflag_ich8lan(hw);
++
++			if (ret_val)
++				goto out;
++
++			ew32(SHRAL_PCH_LPT(index - 1), rar_low);
++			e1e_flush();
++			ew32(SHRAH_PCH_LPT(index - 1), rar_high);
++			e1e_flush();
++
++			e1000_release_swflag_ich8lan(hw);
++
++			/* verify the register updates */
++			if ((er32(SHRAL_PCH_LPT(index - 1)) == rar_low) &&
++			    (er32(SHRAH_PCH_LPT(index - 1)) == rar_high))
++				return;
++		}
++	}
++
++out:
++	e_dbg("Failed to write receive address at index %d\n", index);
++}
++
++/**
++ *  e1000_check_reset_block_ich8lan - Check if PHY reset is blocked
++ *  @hw: pointer to the HW structure
++ *
++ *  Checks if firmware is blocking the reset of the PHY.
++ *  This is a function pointer entry point only called by
++ *  reset routines.
++ **/
++static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	fwsm = er32(FWSM);
++
++	return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? 0 : E1000_BLK_PHY_RESET;
++}
++
++/**
++ *  e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states
++ *  @hw: pointer to the HW structure
++ *
++ *  Assumes semaphore already acquired.
++ *
++ **/
++static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
++{
++	u16 phy_data;
++	u32 strap = er32(STRAP);
++	u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >>
++	    E1000_STRAP_SMT_FREQ_SHIFT;
++	s32 ret_val = 0;
++
++	strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
++
++	ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data);
++	if (ret_val)
++		goto out;
++
++	phy_data &= ~HV_SMB_ADDR_MASK;
++	phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT);
++	phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID;
++
++	if (hw->phy.type == e1000_phy_i217) {
++		/* Restore SMBus frequency */
++		if (freq--) {
++			phy_data &= ~HV_SMB_ADDR_FREQ_MASK;
++			phy_data |= (freq & (1 << 0)) <<
++			    HV_SMB_ADDR_FREQ_LOW_SHIFT;
++			phy_data |= (freq & (1 << 1)) <<
++			    (HV_SMB_ADDR_FREQ_HIGH_SHIFT - 1);
++		} else {
++			e_dbg("Unsupported SMB frequency in PHY\n");
++		}
++	}
++
++	ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration
++ *  @hw:   pointer to the HW structure
++ *
++ *  SW should configure the LCD from the NVM extended configuration region
++ *  as a workaround for certain parts.
++ **/
++static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask;
++	s32 ret_val = 0;
++	u16 word_addr, reg_data, reg_addr, phy_page = 0;
++
++	/*
++	 * Initialize the PHY from the NVM on ICH platforms.  This
++	 * is needed due to an issue where the NVM configuration is
++	 * not properly autoloaded after power transitions.
++	 * Therefore, after each PHY reset, we will load the
++	 * configuration data out of the NVM manually.
++	 */
++	switch (hw->mac.type) {
++	case e1000_ich8lan:
++		if (phy->type != e1000_phy_igp_3)
++			return ret_val;
++
++		if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) ||
++		    (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) {
++			sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG;
++			break;
++		}
++		/* Fall-thru */
++	case e1000_pchlan:
++	case e1000_pch2lan:
++	case e1000_pch_lpt:
++		sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
++		break;
++	default:
++		return ret_val;
++	}
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	data = er32(FEXTNVM);
++	if (!(data & sw_cfg_mask))
++		goto out;
++
++	/*
++	 * Make sure HW does not configure LCD from PHY
++	 * extended configuration before SW configuration
++	 */
++	data = er32(EXTCNF_CTRL);
++	if ((hw->mac.type < e1000_pch2lan) &&
++	    (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE))
++		goto out;
++
++	cnf_size = er32(EXTCNF_SIZE);
++	cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK;
++	cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT;
++	if (!cnf_size)
++		goto out;
++
++	cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK;
++	cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT;
++
++	if (((hw->mac.type == e1000_pchlan) &&
++	     !(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)) ||
++	    (hw->mac.type > e1000_pchlan)) {
++		/*
++		 * HW configures the SMBus address and LEDs when the
++		 * OEM and LCD Write Enable bits are set in the NVM.
++		 * When both NVM bits are cleared, SW will configure
++		 * them instead.
++		 */
++		ret_val = e1000_write_smbus_addr(hw);
++		if (ret_val)
++			goto out;
++
++		data = er32(LEDCTL);
++		ret_val = e1000_write_phy_reg_hv_locked(hw, HV_LED_CONFIG,
++							(u16)data);
++		if (ret_val)
++			goto out;
++	}
++
++	/* Configure LCD from extended configuration region. */
++
++	/* cnf_base_addr is in DWORD */
++	word_addr = (u16)(cnf_base_addr << 1);
++
++	for (i = 0; i < cnf_size; i++) {
++		ret_val = e1000_read_nvm(hw, (word_addr + i * 2), 1,
++					 &reg_data);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_read_nvm(hw, (word_addr + i * 2 + 1),
++					 1, &reg_addr);
++		if (ret_val)
++			goto out;
++
++		/* Save off the PHY page for future writes. */
++		if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) {
++			phy_page = reg_data;
++			continue;
++		}
++
++		reg_addr &= PHY_REG_MASK;
++		reg_addr |= phy_page;
++
++		ret_val = phy->ops.write_reg_locked(hw, (u32)reg_addr,
++						    reg_data);
++		if (ret_val)
++			goto out;
++	}
++
++out:
++	hw->phy.ops.release(hw);
++	return ret_val;
++}
++
++/**
++ *  e1000_k1_gig_workaround_hv - K1 Si workaround
++ *  @hw:   pointer to the HW structure
++ *  @link: link up bool flag
++ *
++ *  If K1 is enabled for 1Gbps, the MAC might stall when transitioning
++ *  from a lower speed.  This workaround disables K1 whenever link is at 1Gig
++ *  If link is down, the function will restore the default K1 setting located
++ *  in the NVM.
++ **/
++static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link)
++{
++	s32 ret_val = 0;
++	u16 status_reg = 0;
++	bool k1_enable = hw->dev_spec.ich8lan.nvm_k1_enabled;
++
++	if (hw->mac.type != e1000_pchlan)
++		goto out;
++
++	/* Wrap the whole flow with the sw flag */
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++
++	/* Disable K1 when link is 1Gbps, otherwise use the NVM setting */
++	if (link) {
++		if (hw->phy.type == e1000_phy_82578) {
++			ret_val = hw->phy.ops.read_reg_locked(hw, BM_CS_STATUS,
++			                                          &status_reg);
++			if (ret_val)
++				goto release;
++
++			status_reg &= BM_CS_STATUS_LINK_UP |
++			              BM_CS_STATUS_RESOLVED |
++			              BM_CS_STATUS_SPEED_MASK;
++
++			if (status_reg == (BM_CS_STATUS_LINK_UP |
++			                   BM_CS_STATUS_RESOLVED |
++			                   BM_CS_STATUS_SPEED_1000))
++				k1_enable = false;
++		}
++
++		if (hw->phy.type == e1000_phy_82577) {
++			ret_val = hw->phy.ops.read_reg_locked(hw, HV_M_STATUS,
++			                                          &status_reg);
++			if (ret_val)
++				goto release;
++
++			status_reg &= HV_M_STATUS_LINK_UP |
++			              HV_M_STATUS_AUTONEG_COMPLETE |
++			              HV_M_STATUS_SPEED_MASK;
++
++			if (status_reg == (HV_M_STATUS_LINK_UP |
++			                   HV_M_STATUS_AUTONEG_COMPLETE |
++			                   HV_M_STATUS_SPEED_1000))
++				k1_enable = false;
++		}
++
++		/* Link stall fix for link up */
++		ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19),
++		                                           0x0100);
++		if (ret_val)
++			goto release;
++
++	} else {
++		/* Link stall fix for link down */
++		ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19),
++		                                           0x4100);
++		if (ret_val)
++			goto release;
++	}
++
++	ret_val = e1000_configure_k1_ich8lan(hw, k1_enable);
++
++release:
++	hw->phy.ops.release(hw);
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_configure_k1_ich8lan - Configure K1 power state
++ *  @hw: pointer to the HW structure
++ *  @enable: K1 state to configure
++ *
++ *  Configure the K1 power state based on the provided parameter.
++ *  Assumes semaphore already acquired.
++ *
++ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
++ **/
++s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable)
++{
++	s32 ret_val = 0;
++	u32 ctrl_reg = 0;
++	u32 ctrl_ext = 0;
++	u32 reg = 0;
++	u16 kmrn_reg = 0;
++
++	ret_val = e1000e_read_kmrn_reg_locked(hw,
++	                                     E1000_KMRNCTRLSTA_K1_CONFIG,
++	                                     &kmrn_reg);
++	if (ret_val)
++		goto out;
++
++	if (k1_enable)
++		kmrn_reg |= E1000_KMRNCTRLSTA_K1_ENABLE;
++	else
++		kmrn_reg &= ~E1000_KMRNCTRLSTA_K1_ENABLE;
++
++	ret_val = e1000e_write_kmrn_reg_locked(hw,
++	                                      E1000_KMRNCTRLSTA_K1_CONFIG,
++	                                      kmrn_reg);
++	if (ret_val)
++		goto out;
++
++	udelay(20);
++	ctrl_ext = er32(CTRL_EXT);
++	ctrl_reg = er32(CTRL);
++
++	reg = ctrl_reg & ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
++	reg |= E1000_CTRL_FRCSPD;
++	ew32(CTRL, reg);
++
++	ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS);
++	e1e_flush();
++	udelay(20);
++	ew32(CTRL, ctrl_reg);
++	ew32(CTRL_EXT, ctrl_ext);
++	e1e_flush();
++	udelay(20);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_oem_bits_config_ich8lan - SW-based LCD Configuration
++ *  @hw:       pointer to the HW structure
++ *  @d0_state: boolean if entering d0 or d3 device state
++ *
++ *  SW will configure Gbe Disable and LPLU based on the NVM. The four bits are
++ *  collectively called OEM bits.  The OEM Write Enable bit and SW Config bit
++ *  in NVM determines whether HW should configure LPLU and Gbe Disable.
++ **/
++static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state)
++{
++	s32 ret_val = 0;
++	u32 mac_reg;
++	u16 oem_reg;
++
++	if (hw->mac.type < e1000_pchlan)
++		return ret_val;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	if (hw->mac.type == e1000_pchlan) {
++		mac_reg = er32(EXTCNF_CTRL);
++		if (mac_reg & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)
++			goto out;
++	}
++
++	mac_reg = er32(FEXTNVM);
++	if (!(mac_reg & E1000_FEXTNVM_SW_CONFIG_ICH8M))
++		goto out;
++
++	mac_reg = er32(PHY_CTRL);
++
++	ret_val = hw->phy.ops.read_reg_locked(hw, HV_OEM_BITS, &oem_reg);
++	if (ret_val)
++		goto out;
++
++	oem_reg &= ~(HV_OEM_BITS_GBE_DIS | HV_OEM_BITS_LPLU);
++
++	if (d0_state) {
++		if (mac_reg & E1000_PHY_CTRL_GBE_DISABLE)
++			oem_reg |= HV_OEM_BITS_GBE_DIS;
++
++		if (mac_reg & E1000_PHY_CTRL_D0A_LPLU)
++			oem_reg |= HV_OEM_BITS_LPLU;
++
++		/* Set Restart auto-neg to activate the bits */
++		if (!e1000_check_reset_block(hw))
++			oem_reg |= HV_OEM_BITS_RESTART_AN;
++	} else {
++		if (mac_reg & (E1000_PHY_CTRL_GBE_DISABLE |
++			       E1000_PHY_CTRL_NOND0A_GBE_DISABLE))
++			oem_reg |= HV_OEM_BITS_GBE_DIS;
++
++		if (mac_reg & (E1000_PHY_CTRL_D0A_LPLU |
++			       E1000_PHY_CTRL_NOND0A_LPLU))
++			oem_reg |= HV_OEM_BITS_LPLU;
++	}
++
++	ret_val = hw->phy.ops.write_reg_locked(hw, HV_OEM_BITS, oem_reg);
++
++out:
++	hw->phy.ops.release(hw);
++
++	return ret_val;
++}
++
++
++/**
++ *  e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode
++ *  @hw:   pointer to the HW structure
++ **/
++static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, HV_KMRN_MODE_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	data |= HV_KMRN_MDIO_SLOW;
++
++	ret_val = e1e_wphy(hw, HV_KMRN_MODE_CTRL, data);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_hv_phy_workarounds_ich8lan - A series of Phy workarounds to be
++ *  done after every PHY reset.
++ **/
++static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 phy_data;
++
++	if (hw->mac.type != e1000_pchlan)
++		return ret_val;
++
++	/* Set MDIO slow mode before any other MDIO access */
++	if (hw->phy.type == e1000_phy_82577) {
++		ret_val = e1000_set_mdio_slow_mode_hv(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	if (((hw->phy.type == e1000_phy_82577) &&
++	     ((hw->phy.revision == 1) || (hw->phy.revision == 2))) ||
++	    ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision == 1))) {
++		/* Disable generation of early preamble */
++		ret_val = e1e_wphy(hw, PHY_REG(769, 25), 0x4431);
++		if (ret_val)
++			return ret_val;
++
++		/* Preamble tuning for SSC */
++		ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
++		if (ret_val)
++			return ret_val;
++	}
++
++	if (hw->phy.type == e1000_phy_82578) {
++		/*
++		 * Return registers to default by doing a soft reset then
++		 * writing 0x3140 to the control register.
++		 */
++		if (hw->phy.revision < 2) {
++			e1000e_phy_sw_reset(hw);
++			ret_val = e1e_wphy(hw, PHY_CONTROL, 0x3140);
++		}
++	}
++
++	/* Select page 0 */
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return ret_val;
++
++	hw->phy.addr = 1;
++	ret_val = e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 0);
++	hw->phy.ops.release(hw);
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Configure the K1 Si workaround during phy reset assuming there is
++	 * link so that it disables K1 if link is in 1Gbps.
++	 */
++	ret_val = e1000_k1_gig_workaround_hv(hw, true);
++	if (ret_val)
++		goto out;
++
++	/* Workaround for link disconnects on a busy hub in half duplex */
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		goto out;
++	ret_val = hw->phy.ops.read_reg_locked(hw, BM_PORT_GEN_CFG, &phy_data);
++	if (ret_val)
++		goto release;
++	ret_val = hw->phy.ops.write_reg_locked(hw, BM_PORT_GEN_CFG,
++					       phy_data & 0x00FF);
++release:
++	hw->phy.ops.release(hw);
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_copy_rx_addrs_to_phy_ich8lan - Copy Rx addresses from MAC to PHY
++ *  @hw:   pointer to the HW structure
++ **/
++void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
++{
++	u32 mac_reg;
++	u16 i, phy_reg = 0;
++	s32 ret_val;
++
++	ret_val = hw->phy.ops.acquire(hw);
++	if (ret_val)
++		return;
++	ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg);
++	if (ret_val)
++		goto release;
++
++	/* Copy both RAL/H (rar_entry_count) and SHRAL/H (+4) to PHY */
++	for (i = 0; i < (hw->mac.rar_entry_count + 4); i++) {
++		mac_reg = er32(RAL(i));
++		hw->phy.ops.write_reg_page(hw, BM_RAR_L(i),
++					   (u16)(mac_reg & 0xFFFF));
++		hw->phy.ops.write_reg_page(hw, BM_RAR_M(i),
++					   (u16)((mac_reg >> 16) & 0xFFFF));
++
++		mac_reg = er32(RAH(i));
++		hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
++					   (u16)(mac_reg & 0xFFFF));
++		hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
++					   (u16)((mac_reg & E1000_RAH_AV)
++						 >> 16));
++	}
++
++	e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
++
++release:
++	hw->phy.ops.release(hw);
++}
++
++/**
++ *  e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation
++ *  with 82579 PHY
++ *  @hw: pointer to the HW structure
++ *  @enable: flag to enable/disable workaround when enabling/disabling jumbos
++ **/
++s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
++{
++	s32 ret_val = 0;
++	u16 phy_reg, data;
++	u32 mac_reg;
++	u16 i;
++
++	if (hw->mac.type < e1000_pch2lan)
++		goto out;
++
++	/* disable Rx path while enabling/disabling workaround */
++	e1e_rphy(hw, PHY_REG(769, 20), &phy_reg);
++	ret_val = e1e_wphy(hw, PHY_REG(769, 20), phy_reg | (1 << 14));
++	if (ret_val)
++		goto out;
++
++	if (enable) {
++		/*
++		 * Write Rx addresses (rar_entry_count for RAL/H, +4 for
++		 * SHRAL/H) and initial CRC values to the MAC
++		 */
++		for (i = 0; i < (hw->mac.rar_entry_count + 4); i++) {
++			u8 mac_addr[ETH_ALEN] = {0};
++			u32 addr_high, addr_low;
++
++			addr_high = er32(RAH(i));
++			if (!(addr_high & E1000_RAH_AV))
++				continue;
++			addr_low = er32(RAL(i));
++			mac_addr[0] = (addr_low & 0xFF);
++			mac_addr[1] = ((addr_low >> 8) & 0xFF);
++			mac_addr[2] = ((addr_low >> 16) & 0xFF);
++			mac_addr[3] = ((addr_low >> 24) & 0xFF);
++			mac_addr[4] = (addr_high & 0xFF);
++			mac_addr[5] = ((addr_high >> 8) & 0xFF);
++
++			ew32(PCH_RAICC(i), ~ether_crc_le(ETH_ALEN, mac_addr));
++		}
++
++		/* Write Rx addresses to the PHY */
++		e1000_copy_rx_addrs_to_phy_ich8lan(hw);
++
++		/* Enable jumbo frame workaround in the MAC */
++		mac_reg = er32(FFLT_DBG);
++		mac_reg &= ~(1 << 14);
++		mac_reg |= (7 << 15);
++		ew32(FFLT_DBG, mac_reg);
++
++		mac_reg = er32(RCTL);
++		mac_reg |= E1000_RCTL_SECRC;
++		ew32(RCTL, mac_reg);
++
++		ret_val = e1000e_read_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_CTRL_OFFSET,
++						&data);
++		if (ret_val)
++			goto out;
++		ret_val = e1000e_write_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_CTRL_OFFSET,
++						data | (1 << 0));
++		if (ret_val)
++			goto out;
++		ret_val = e1000e_read_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_HD_CTRL,
++						&data);
++		if (ret_val)
++			goto out;
++		data &= ~(0xF << 8);
++		data |= (0xB << 8);
++		ret_val = e1000e_write_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_HD_CTRL,
++						data);
++		if (ret_val)
++			goto out;
++
++		/* Enable jumbo frame workaround in the PHY */
++		e1e_rphy(hw, PHY_REG(769, 23), &data);
++		data &= ~(0x7F << 5);
++		data |= (0x37 << 5);
++		ret_val = e1e_wphy(hw, PHY_REG(769, 23), data);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, PHY_REG(769, 16), &data);
++		data &= ~(1 << 13);
++		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, PHY_REG(776, 20), &data);
++		data &= ~(0x3FF << 2);
++		data |= (0x1A << 2);
++		ret_val = e1e_wphy(hw, PHY_REG(776, 20), data);
++		if (ret_val)
++			goto out;
++		ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0xF100);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, HV_PM_CTRL, &data);
++		ret_val = e1e_wphy(hw, HV_PM_CTRL, data | (1 << 10));
++		if (ret_val)
++			goto out;
++	} else {
++		/* Write MAC register values back to h/w defaults */
++		mac_reg = er32(FFLT_DBG);
++		mac_reg &= ~(0xF << 14);
++		ew32(FFLT_DBG, mac_reg);
++
++		mac_reg = er32(RCTL);
++		mac_reg &= ~E1000_RCTL_SECRC;
++		ew32(RCTL, mac_reg);
++
++		ret_val = e1000e_read_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_CTRL_OFFSET,
++						&data);
++		if (ret_val)
++			goto out;
++		ret_val = e1000e_write_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_CTRL_OFFSET,
++						data & ~(1 << 0));
++		if (ret_val)
++			goto out;
++		ret_val = e1000e_read_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_HD_CTRL,
++						&data);
++		if (ret_val)
++			goto out;
++		data &= ~(0xF << 8);
++		data |= (0xB << 8);
++		ret_val = e1000e_write_kmrn_reg(hw,
++						E1000_KMRNCTRLSTA_HD_CTRL,
++						data);
++		if (ret_val)
++			goto out;
++
++		/* Write PHY register values back to h/w defaults */
++		e1e_rphy(hw, PHY_REG(769, 23), &data);
++		data &= ~(0x7F << 5);
++		ret_val = e1e_wphy(hw, PHY_REG(769, 23), data);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, PHY_REG(769, 16), &data);
++		data |= (1 << 13);
++		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, PHY_REG(776, 20), &data);
++		data &= ~(0x3FF << 2);
++		data |= (0x8 << 2);
++		ret_val = e1e_wphy(hw, PHY_REG(776, 20), data);
++		if (ret_val)
++			goto out;
++		ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0x7E00);
++		if (ret_val)
++			goto out;
++		e1e_rphy(hw, HV_PM_CTRL, &data);
++		ret_val = e1e_wphy(hw, HV_PM_CTRL, data & ~(1 << 10));
++		if (ret_val)
++			goto out;
++	}
++
++	/* re-enable Rx path after enabling/disabling workaround */
++	ret_val = e1e_wphy(hw, PHY_REG(769, 20), phy_reg & ~(1 << 14));
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_lv_phy_workarounds_ich8lan - A series of Phy workarounds to be
++ *  done after every PHY reset.
++ **/
++static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	if (hw->mac.type < e1000_pch2lan)
++		goto out;
++
++	/* Set MDIO slow mode before any other MDIO access */
++	ret_val = e1000_set_mdio_slow_mode_hv(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_k1_gig_workaround_lv - K1 Si workaround
++ *  @hw:   pointer to the HW structure
++ *
++ *  Workaround to set the K1 beacon duration for 82579 parts
++ **/
++static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 status_reg = 0;
++	u32 mac_reg;
++	u16 phy_reg;
++
++	if (hw->mac.type != e1000_pch2lan)
++		goto out;
++
++	/* Set K1 beacon duration based on 1Gbps speed or otherwise */
++	ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg);
++	if (ret_val)
++		goto out;
++
++	if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE))
++	    == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) {
++		mac_reg = er32(FEXTNVM4);
++		mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
++
++		ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
++		if (ret_val)
++			goto out;
++
++		if (status_reg & HV_M_STATUS_SPEED_1000) {
++			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
++			phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
++		} else {
++			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
++			phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
++		}
++		ew32(FEXTNVM4, mac_reg);
++		ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware
++ *  @hw:   pointer to the HW structure
++ *  @gate: boolean set to true to gate, false to ungate
++ *
++ *  Gate/ungate the automatic PHY configuration via hardware; perform
++ *  the configuration via software instead.
++ **/
++static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate)
++{
++	u32 extcnf_ctrl;
++
++	if (hw->mac.type != e1000_pch2lan)
++		return;
++
++	extcnf_ctrl = er32(EXTCNF_CTRL);
++
++	if (gate)
++		extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
++	else
++		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG;
++
++	ew32(EXTCNF_CTRL, extcnf_ctrl);
++	return;
++}
++
++/**
++ *  e1000_lan_init_done_ich8lan - Check for PHY config completion
++ *  @hw: pointer to the HW structure
++ *
++ *  Check the appropriate indication the MAC has finished configuring the
++ *  PHY after a software reset.
++ **/
++static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw)
++{
++	u32 data, loop = E1000_ICH8_LAN_INIT_TIMEOUT;
++
++	/* Wait for basic configuration completes before proceeding */
++	do {
++		data = er32(STATUS);
++		data &= E1000_STATUS_LAN_INIT_DONE;
++		udelay(100);
++	} while ((!data) && --loop);
++
++	/*
++	 * If basic configuration is incomplete before the above loop
++	 * count reaches 0, loading the configuration from NVM will
++	 * leave the PHY in a bad state possibly resulting in no link.
++	 */
++	if (loop == 0)
++		e_dbg("LAN_INIT_DONE not set, increase timeout\n");
++
++	/* Clear the Init Done bit for the next init event */
++	data = er32(STATUS);
++	data &= ~E1000_STATUS_LAN_INIT_DONE;
++	ew32(STATUS, data);
++}
++
++/**
++ *  e1000_post_phy_reset_ich8lan - Perform steps required after a PHY reset
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 reg;
++
++	if (e1000_check_reset_block(hw))
++		goto out;
++
++	/* Allow time for h/w to get to quiescent state after reset */
++	usleep_range(10000, 20000);
++
++	/* Perform any necessary post-reset workarounds */
++	switch (hw->mac.type) {
++	case e1000_pchlan:
++		ret_val = e1000_hv_phy_workarounds_ich8lan(hw);
++		if (ret_val)
++			goto out;
++		break;
++	case e1000_pch2lan:
++		ret_val = e1000_lv_phy_workarounds_ich8lan(hw);
++		if (ret_val)
++			goto out;
++		break;
++	default:
++		break;
++	}
++
++	/* Clear the host wakeup bit after lcd reset */
++	if (hw->mac.type >= e1000_pchlan) {
++		e1e_rphy(hw, BM_PORT_GEN_CFG, &reg);
++		reg &= ~BM_WUC_HOST_WU_BIT;
++		e1e_wphy(hw, BM_PORT_GEN_CFG, reg);
++	}
++
++	/* Configure the LCD with the extended configuration region in NVM */
++	ret_val = e1000_sw_lcd_config_ich8lan(hw);
++	if (ret_val)
++		goto out;
++
++	/* Configure the LCD with the OEM bits in NVM */
++	ret_val = e1000_oem_bits_config_ich8lan(hw, true);
++
++	if (hw->mac.type == e1000_pch2lan) {
++		/* Ungate automatic PHY configuration on non-managed 82579 */
++		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
++			usleep_range(10000, 20000);
++			e1000_gate_hw_phy_config_ich8lan(hw, false);
++		}
++
++		/* Set EEE LPI Update Timer to 200usec */
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++		ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_ADDR,
++						       I82579_LPI_UPDATE_TIMER);
++		if (ret_val)
++			goto release;
++		ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_DATA,
++						       0x1387);
++release:
++		hw->phy.ops.release(hw);
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_phy_hw_reset_ich8lan - Performs a PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Resets the PHY
++ *  This is a function pointer entry point called by drivers
++ *  or other shared routines.
++ **/
++static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	/* Gate automatic PHY configuration by hardware on non-managed 82579 */
++	if ((hw->mac.type == e1000_pch2lan) &&
++	    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
++		e1000_gate_hw_phy_config_ich8lan(hw, true);
++
++	ret_val = e1000e_phy_hw_reset_generic(hw);
++	if (ret_val)
++		return ret_val;
++
++	return e1000_post_phy_reset_ich8lan(hw);
++}
++
++/**
++ *  e1000_set_lplu_state_pchlan - Set Low Power Link Up state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU state according to the active flag.  For PCH, if OEM write
++ *  bit are disabled in the NVM, writing the LPLU bits in the MAC will not set
++ *  the phy speed. This function will manually set the LPLU bit and restart
++ *  auto-neg as hw would do. D3 and D0 LPLU will call the same function
++ *  since it configures the same bit.
++ **/
++static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active)
++{
++	s32 ret_val = 0;
++	u16 oem_reg;
++
++	ret_val = e1e_rphy(hw, HV_OEM_BITS, &oem_reg);
++	if (ret_val)
++		goto out;
++
++	if (active)
++		oem_reg |= HV_OEM_BITS_LPLU;
++	else
++		oem_reg &= ~HV_OEM_BITS_LPLU;
++
++	oem_reg |= HV_OEM_BITS_RESTART_AN;
++	ret_val = e1e_wphy(hw, HV_OEM_BITS, oem_reg);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D0 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 phy_ctrl;
++	s32 ret_val = 0;
++	u16 data;
++
++	if (phy->type == e1000_phy_ife)
++		return ret_val;
++
++	phy_ctrl = er32(PHY_CTRL);
++
++	if (active) {
++		phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU;
++		ew32(PHY_CTRL, phy_ctrl);
++
++		if (phy->type != e1000_phy_igp_3)
++			return 0;
++
++		/*
++		 * Call gig speed drop workaround on LPLU before accessing
++		 * any PHY registers
++		 */
++		if (hw->mac.type == e1000_ich8lan)
++			e1000e_gig_downshift_workaround_ich8lan(hw);
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
++		if (ret_val)
++			return ret_val;
++	} else {
++		phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU;
++		ew32(PHY_CTRL, phy_ctrl);
++
++		if (phy->type != e1000_phy_igp_3)
++			return 0;
++
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state
++ *  @hw: pointer to the HW structure
++ *  @active: true to enable LPLU, false to disable
++ *
++ *  Sets the LPLU D3 state according to the active flag.  When
++ *  activating LPLU this function also disables smart speed
++ *  and vice versa.  LPLU will not be activated unless the
++ *  device autonegotiation advertisement meets standards of
++ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
++ *  This is a function pointer entry point only called by
++ *  PHY setup routines.
++ **/
++static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	u32 phy_ctrl;
++	s32 ret_val;
++	u16 data;
++
++	phy_ctrl = er32(PHY_CTRL);
++
++	if (!active) {
++		phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU;
++		ew32(PHY_CTRL, phy_ctrl);
++
++		if (phy->type != e1000_phy_igp_3)
++			return 0;
++
++		/*
++		 * LPLU and SmartSpeed are mutually exclusive.  LPLU is used
++		 * during Dx states where the power conservation is most
++		 * important.  During driver activity we should enable
++		 * SmartSpeed, so performance is maintained.
++		 */
++		if (phy->smart_speed == e1000_smart_speed_on) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data |= IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		} else if (phy->smart_speed == e1000_smart_speed_off) {
++			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   &data);
++			if (ret_val)
++				return ret_val;
++
++			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
++					   data);
++			if (ret_val)
++				return ret_val;
++		}
++	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
++		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
++		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
++		phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU;
++		ew32(PHY_CTRL, phy_ctrl);
++
++		if (phy->type != e1000_phy_igp_3)
++			return 0;
++
++		/*
++		 * Call gig speed drop workaround on LPLU before accessing
++		 * any PHY registers
++		 */
++		if (hw->mac.type == e1000_ich8lan)
++			e1000e_gig_downshift_workaround_ich8lan(hw);
++
++		/* When LPLU is enabled, we should disable SmartSpeed */
++		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
++		if (ret_val)
++			return ret_val;
++
++		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
++		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1
++ *  @hw: pointer to the HW structure
++ *  @bank:  pointer to the variable that returns the active bank
++ *
++ *  Reads signature byte from the NVM using the flash access registers.
++ *  Word 0x13 bits 15:14 = 10b indicate a valid signature for that bank.
++ **/
++static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
++{
++	u32 eecd;
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 bank1_offset = nvm->flash_bank_size * sizeof(u16);
++	u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1;
++	u8 sig_byte = 0;
++	s32 ret_val = 0;
++
++	switch (hw->mac.type) {
++	case e1000_ich8lan:
++	case e1000_ich9lan:
++		eecd = er32(EECD);
++		if ((eecd & E1000_EECD_SEC1VAL_VALID_MASK) ==
++		    E1000_EECD_SEC1VAL_VALID_MASK) {
++			if (eecd & E1000_EECD_SEC1VAL)
++				*bank = 1;
++			else
++				*bank = 0;
++
++			return 0;
++		}
++		e_dbg("Unable to determine valid NVM bank via EEC - "
++		       "reading flash signature\n");
++		/* fall-thru */
++	default:
++		/* set bank to 0 in case flash read fails */
++		*bank = 0;
++
++		/* Check bank 0 */
++		ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset,
++		                                        &sig_byte);
++		if (ret_val)
++			return ret_val;
++		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
++		    E1000_ICH_NVM_SIG_VALUE) {
++			*bank = 0;
++			return 0;
++		}
++
++		/* Check bank 1 */
++		ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset +
++		                                        bank1_offset,
++		                                        &sig_byte);
++		if (ret_val)
++			return ret_val;
++		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
++		    E1000_ICH_NVM_SIG_VALUE) {
++			*bank = 1;
++			return 0;
++		}
++
++		e_dbg("ERROR: No valid NVM bank present\n");
++		return -E1000_ERR_NVM;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_read_nvm_ich8lan - Read word(s) from the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the word(s) to read.
++ *  @words: Size of data to read in words
++ *  @data: Pointer to the word(s) to read at offset.
++ *
++ *  Reads a word(s) from the NVM using the flash access registers.
++ **/
++static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
++				  u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u32 act_offset;
++	s32 ret_val = 0;
++	u32 bank = 0;
++	u16 i, word;
++
++	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
++	    (words == 0)) {
++		e_dbg("nvm parameter(s) out of bounds\n");
++		ret_val = -E1000_ERR_NVM;
++		goto out;
++	}
++
++	nvm->ops.acquire(hw);
++
++	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
++	if (ret_val) {
++		e_dbg("Could not detect valid bank, assuming bank 0\n");
++		bank = 0;
++	}
++
++	act_offset = (bank) ? nvm->flash_bank_size : 0;
++	act_offset += offset;
++
++	ret_val = 0;
++	for (i = 0; i < words; i++) {
++		if (dev_spec->shadow_ram[offset+i].modified) {
++			data[i] = dev_spec->shadow_ram[offset+i].value;
++		} else {
++			ret_val = e1000_read_flash_word_ich8lan(hw,
++								act_offset + i,
++								&word);
++			if (ret_val)
++				break;
++			data[i] = word;
++		}
++	}
++
++	nvm->ops.release(hw);
++
++out:
++	if (ret_val)
++		e_dbg("NVM read error: %d\n", ret_val);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_flash_cycle_init_ich8lan - Initialize flash
++ *  @hw: pointer to the HW structure
++ *
++ *  This function does initial flash setup so that a new read/write/erase cycle
++ *  can be started.
++ **/
++static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
++{
++	union ich8_hws_flash_status hsfsts;
++	s32 ret_val = -E1000_ERR_NVM;
++
++	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++
++	/* Check if the flash descriptor is valid */
++	if (hsfsts.hsf_status.fldesvalid == 0) {
++		e_dbg("Flash descriptor invalid.  "
++			 "SW Sequencing must be used.\n");
++		return -E1000_ERR_NVM;
++	}
++
++	/* Clear FCERR and DAEL in hw status by writing 1 */
++	hsfsts.hsf_status.flcerr = 1;
++	hsfsts.hsf_status.dael = 1;
++
++	ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
++
++	/*
++	 * Either we should have a hardware SPI cycle in progress
++	 * bit to check against, in order to start a new cycle or
++	 * FDONE bit should be changed in the hardware so that it
++	 * is 1 after hardware reset, which can then be used as an
++	 * indication whether a cycle is in progress or has been
++	 * completed.
++	 */
++
++	if (hsfsts.hsf_status.flcinprog == 0) {
++		/*
++		 * There is no cycle running at present,
++		 * so we can start a cycle.
++		 * Begin by setting Flash Cycle Done.
++		 */
++		hsfsts.hsf_status.flcdone = 1;
++		ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
++		ret_val = 0;
++	} else {
++		s32 i = 0;
++
++		/*
++		 * Otherwise poll for sometime so the current
++		 * cycle has a chance to end before giving up.
++		 */
++		for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) {
++			hsfsts.regval = __er16flash(hw, ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcinprog == 0) {
++				ret_val = 0;
++				break;
++			}
++			udelay(1);
++		}
++		if (ret_val == 0) {
++			/*
++			 * Successful in waiting for previous cycle to timeout,
++			 * now set the Flash Cycle Done.
++			 */
++			hsfsts.hsf_status.flcdone = 1;
++			ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
++		} else {
++			e_dbg("Flash controller busy, cannot get access\n");
++		}
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase)
++ *  @hw: pointer to the HW structure
++ *  @timeout: maximum time to wait for completion
++ *
++ *  This function starts a flash cycle and waits for its completion.
++ **/
++static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
++{
++	union ich8_hws_flash_ctrl hsflctl;
++	union ich8_hws_flash_status hsfsts;
++	s32 ret_val = -E1000_ERR_NVM;
++	u32 i = 0;
++
++	/* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
++	hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
++	hsflctl.hsf_ctrl.flcgo = 1;
++	ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
++
++	/* wait till FDONE bit is set to 1 */
++	do {
++		hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++		if (hsfsts.hsf_status.flcdone == 1)
++			break;
++		udelay(1);
++	} while (i++ < timeout);
++
++	if (hsfsts.hsf_status.flcdone == 1 && hsfsts.hsf_status.flcerr == 0)
++		return 0;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_read_flash_word_ich8lan - Read word from flash
++ *  @hw: pointer to the HW structure
++ *  @offset: offset to data location
++ *  @data: pointer to the location for storing the data
++ *
++ *  Reads the flash word at offset into data.  Offset is converted
++ *  to bytes before read.
++ **/
++static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u16 *data)
++{
++	/* Must convert offset into bytes. */
++	offset <<= 1;
++
++	return e1000_read_flash_data_ich8lan(hw, offset, 2, data);
++}
++
++/**
++ *  e1000_read_flash_byte_ich8lan - Read byte from flash
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset of the byte to read.
++ *  @data: Pointer to a byte to store the value read.
++ *
++ *  Reads a single byte from the NVM using the flash access registers.
++ **/
++static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u8 *data)
++{
++	s32 ret_val;
++	u16 word = 0;
++
++	ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
++	if (ret_val)
++		return ret_val;
++
++	*data = (u8)word;
++
++	return 0;
++}
++
++/**
++ *  e1000_read_flash_data_ich8lan - Read byte or word from NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the byte or word to read.
++ *  @size: Size of data to read, 1=byte 2=word
++ *  @data: Pointer to the word to store the value read.
++ *
++ *  Reads a byte or word from the NVM using the flash access registers.
++ **/
++static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++					 u8 size, u16 *data)
++{
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	u32 flash_data = 0;
++	s32 ret_val = -E1000_ERR_NVM;
++	u8 count = 0;
++
++	if (size < 1  || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
++		return -E1000_ERR_NVM;
++
++	flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) +
++			    hw->nvm.flash_base_addr;
++
++	do {
++		udelay(1);
++		/* Steps */
++		ret_val = e1000_flash_cycle_init_ich8lan(hw);
++		if (ret_val != 0)
++			break;
++
++		hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
++		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++		hsflctl.hsf_ctrl.fldbcount = size - 1;
++		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
++		ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
++
++		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
++
++		ret_val = e1000_flash_cycle_ich8lan(hw,
++						ICH_FLASH_READ_COMMAND_TIMEOUT);
++
++		/*
++		 * Check if FCERR is set to 1, if set to 1, clear it
++		 * and try the whole sequence a few more times, else
++		 * read in (shift in) the Flash Data0, the order is
++		 * least significant byte first msb to lsb
++		 */
++		if (ret_val == 0) {
++			flash_data = er32flash(ICH_FLASH_FDATA0);
++			if (size == 1)
++				*data = (u8)(flash_data & 0x000000FF);
++			else if (size == 2)
++				*data = (u16)(flash_data & 0x0000FFFF);
++			break;
++		} else {
++			/*
++			 * If we've gotten here, then things are probably
++			 * completely hosed, but if the error condition is
++			 * detected, it won't hurt to give it another try...
++			 * ICH_FLASH_CYCLE_REPEAT_COUNT times.
++			 */
++			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcerr == 1) {
++				/* Repeat for some time before giving up. */
++				continue;
++			} else if (hsfsts.hsf_status.flcdone == 0) {
++				e_dbg("Timeout error - flash cycle "
++					 "did not complete.\n");
++				break;
++			}
++		}
++	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_ich8lan - Write word(s) to the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the word(s) to write.
++ *  @words: Size of data to write in words
++ *  @data: Pointer to the word(s) to write at offset.
++ *
++ *  Writes a byte or word to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
++				   u16 *data)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u16 i;
++
++	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
++	    (words == 0)) {
++		e_dbg("nvm parameter(s) out of bounds\n");
++		return -E1000_ERR_NVM;
++	}
++
++	nvm->ops.acquire(hw);
++
++	for (i = 0; i < words; i++) {
++		dev_spec->shadow_ram[offset+i].modified = true;
++		dev_spec->shadow_ram[offset+i].value = data[i];
++	}
++
++	nvm->ops.release(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  The NVM checksum is updated by calling the generic update_nvm_checksum,
++ *  which writes the checksum to the shadow ram.  The changes in the shadow
++ *  ram are then committed to the EEPROM by processing each bank at a time
++ *  checking for the modified bit and writing only the pending changes.
++ *  After a successful commit, the shadow ram is cleared and is ready for
++ *  future writes.
++ **/
++static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
++	s32 ret_val;
++	u16 data;
++
++	ret_val = e1000e_update_nvm_checksum_generic(hw);
++	if (ret_val)
++		goto out;
++
++	if (nvm->type != e1000_nvm_flash_sw)
++		goto out;
++
++	nvm->ops.acquire(hw);
++
++	/*
++	 * We're writing to the opposite bank so if we're on bank 1,
++	 * write to bank 0 etc.  We also need to erase the segment that
++	 * is going to be written
++	 */
++	ret_val =  e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
++	if (ret_val) {
++		e_dbg("Could not detect valid bank, assuming bank 0\n");
++		bank = 0;
++	}
++
++	if (bank == 0) {
++		new_bank_offset = nvm->flash_bank_size;
++		old_bank_offset = 0;
++		ret_val = e1000_erase_flash_bank_ich8lan(hw, 1);
++		if (ret_val)
++			goto release;
++	} else {
++		old_bank_offset = nvm->flash_bank_size;
++		new_bank_offset = 0;
++		ret_val = e1000_erase_flash_bank_ich8lan(hw, 0);
++		if (ret_val)
++			goto release;
++	}
++
++	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) {
++		/*
++		 * Determine whether to write the value stored
++		 * in the other NVM bank or a modified value stored
++		 * in the shadow RAM
++		 */
++		if (dev_spec->shadow_ram[i].modified) {
++			data = dev_spec->shadow_ram[i].value;
++		} else {
++			ret_val = e1000_read_flash_word_ich8lan(hw, i +
++			                                        old_bank_offset,
++			                                        &data);
++			if (ret_val)
++				break;
++		}
++
++		/*
++		 * If the word is 0x13, then make sure the signature bits
++		 * (15:14) are 11b until the commit has completed.
++		 * This will allow us to write 10b which indicates the
++		 * signature is valid.  We want to do this after the write
++		 * has completed so that we don't mark the segment valid
++		 * while the write is still in progress
++		 */
++		if (i == E1000_ICH_NVM_SIG_WORD)
++			data |= E1000_ICH_NVM_SIG_MASK;
++
++		/* Convert offset to bytes. */
++		act_offset = (i + new_bank_offset) << 1;
++
++		udelay(100);
++		/* Write the bytes to the new bank. */
++		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++							       act_offset,
++							       (u8)data);
++		if (ret_val)
++			break;
++
++		udelay(100);
++		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++							  act_offset + 1,
++							  (u8)(data >> 8));
++		if (ret_val)
++			break;
++	}
++
++	/*
++	 * Don't bother writing the segment valid bits if sector
++	 * programming failed.
++	 */
++	if (ret_val) {
++		/* Possibly read-only, see e1000e_write_protect_nvm_ich8lan() */
++		e_dbg("Flash commit failed.\n");
++		goto release;
++	}
++
++	/*
++	 * Finally validate the new segment by setting bit 15:14
++	 * to 10b in word 0x13 , this can be done without an
++	 * erase as well since these bits are 11 to start with
++	 * and we need to change bit 14 to 0b
++	 */
++	act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
++	ret_val = e1000_read_flash_word_ich8lan(hw, act_offset, &data);
++	if (ret_val)
++		goto release;
++
++	data &= 0xBFFF;
++	ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
++						       act_offset * 2 + 1,
++						       (u8)(data >> 8));
++	if (ret_val)
++		goto release;
++
++	/*
++	 * And invalidate the previously valid segment by setting
++	 * its signature word (0x13) high_byte to 0b. This can be
++	 * done without an erase because flash erase sets all bits
++	 * to 1's. We can write 1's to 0's without an erase
++	 */
++	act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1;
++	ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0);
++	if (ret_val)
++		goto release;
++
++	/* Great!  Everything worked, we can now clear the cached entries. */
++	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) {
++		dev_spec->shadow_ram[i].modified = false;
++		dev_spec->shadow_ram[i].value = 0xFFFF;
++	}
++
++release:
++	nvm->ops.release(hw);
++
++	/*
++	 * Reload the EEPROM, or else modifications will not appear
++	 * until after the next adapter reset.
++	 */
++	if (!ret_val) {
++		e1000e_reload_nvm(hw);
++		usleep_range(10000, 20000);
++	}
++
++out:
++	if (ret_val)
++		e_dbg("NVM update error: %d\n", ret_val);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum
++ *  @hw: pointer to the HW structure
++ *
++ *  Check to see if checksum needs to be fixed by reading bit 6 in word 0x19.
++ *  If the bit is 0, that the EEPROM had been modified, but the checksum was not
++ *  calculated, in which case we need to calculate the checksum and set bit 6.
++ **/
++static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 data;
++
++	/*
++	 * Read 0x19 and check bit 6.  If this bit is 0, the checksum
++	 * needs to be fixed.  This bit is an indication that the NVM
++	 * was prepared by OEM software and did not calculate the
++	 * checksum...a likely scenario.
++	 */
++	ret_val = e1000_read_nvm(hw, 0x19, 1, &data);
++	if (ret_val)
++		return ret_val;
++
++	if ((data & 0x40) == 0) {
++		data |= 0x40;
++		ret_val = e1000_write_nvm(hw, 0x19, 1, &data);
++		if (ret_val)
++			return ret_val;
++		ret_val = e1000e_update_nvm_checksum(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	return e1000e_validate_nvm_checksum_generic(hw);
++}
++
++/**
++ *  e1000e_write_protect_nvm_ich8lan - Make the NVM read-only
++ *  @hw: pointer to the HW structure
++ *
++ *  To prevent malicious write/erase of the NVM, set it to be read-only
++ *  so that the hardware ignores all write/erase cycles of the NVM via
++ *  the flash control registers.  The shadow-ram copy of the NVM will
++ *  still be updated, however any updates to this copy will not stick
++ *  across driver reloads.
++ **/
++void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	union ich8_flash_protected_range pr0;
++	union ich8_hws_flash_status hsfsts;
++	u32 gfpreg;
++
++	nvm->ops.acquire(hw);
++
++	gfpreg = er32flash(ICH_FLASH_GFPREG);
++
++	/* Write-protect GbE Sector of NVM */
++	pr0.regval = er32flash(ICH_FLASH_PR0);
++	pr0.range.base = gfpreg & FLASH_GFPREG_BASE_MASK;
++	pr0.range.limit = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK);
++	pr0.range.wpe = true;
++	ew32flash(ICH_FLASH_PR0, pr0.regval);
++
++	/*
++	 * Lock down a subset of GbE Flash Control Registers, e.g.
++	 * PR0 to prevent the write-protection from being lifted.
++	 * Once FLOCKDN is set, the registers protected by it cannot
++	 * be written until FLOCKDN is cleared by a hardware reset.
++	 */
++	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++	hsfsts.hsf_status.flockdn = true;
++	ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval);
++
++	nvm->ops.release(hw);
++}
++
++/**
++ *  e1000_write_flash_data_ich8lan - Writes bytes to the NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset (in bytes) of the byte/word to read.
++ *  @size: Size of data to read, 1=byte 2=word
++ *  @data: The byte(s) to write to the NVM.
++ *
++ *  Writes one/two bytes to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
++					  u8 size, u16 data)
++{
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	u32 flash_data = 0;
++	s32 ret_val;
++	u8 count = 0;
++
++	if (size < 1 || size > 2 || data > size * 0xff ||
++	    offset > ICH_FLASH_LINEAR_ADDR_MASK)
++		return -E1000_ERR_NVM;
++
++	flash_linear_addr = (ICH_FLASH_LINEAR_ADDR_MASK & offset) +
++			    hw->nvm.flash_base_addr;
++
++	do {
++		udelay(1);
++		/* Steps */
++		ret_val = e1000_flash_cycle_init_ich8lan(hw);
++		if (ret_val)
++			break;
++
++		hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
++		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
++		hsflctl.hsf_ctrl.fldbcount = size -1;
++		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
++		ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
++
++		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
++
++		if (size == 1)
++			flash_data = (u32)data & 0x00FF;
++		else
++			flash_data = (u32)data;
++
++		ew32flash(ICH_FLASH_FDATA0, flash_data);
++
++		/*
++		 * check if FCERR is set to 1 , if set to 1, clear it
++		 * and try the whole sequence a few more times else done
++		 */
++		ret_val = e1000_flash_cycle_ich8lan(hw,
++					       ICH_FLASH_WRITE_COMMAND_TIMEOUT);
++		if (!ret_val)
++			break;
++
++		/*
++		 * If we're here, then things are most likely
++		 * completely hosed, but if the error condition
++		 * is detected, it won't hurt to give it another
++		 * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
++		 */
++		hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++		if (hsfsts.hsf_status.flcerr == 1)
++			/* Repeat for some time before giving up. */
++			continue;
++		if (hsfsts.hsf_status.flcdone == 0) {
++			e_dbg("Timeout error - flash cycle "
++				 "did not complete.");
++			break;
++		}
++	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_flash_byte_ich8lan - Write a single byte to NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The index of the byte to read.
++ *  @data: The byte to write to the NVM.
++ *
++ *  Writes a single byte to the NVM using the flash access registers.
++ **/
++static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
++					  u8 data)
++{
++	u16 word = (u16)data;
++
++	return e1000_write_flash_data_ich8lan(hw, offset, 1, word);
++}
++
++/**
++ *  e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: The offset of the byte to write.
++ *  @byte: The byte to write to the NVM.
++ *
++ *  Writes a single byte to the NVM using the flash access registers.
++ *  Goes through a retry algorithm before giving up.
++ **/
++static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
++						u32 offset, u8 byte)
++{
++	s32 ret_val;
++	u16 program_retries;
++
++	ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
++	if (!ret_val)
++		return ret_val;
++
++	for (program_retries = 0; program_retries < 100; program_retries++) {
++		e_dbg("Retrying Byte %2.2X at offset %u\n", byte, offset);
++		udelay(100);
++		ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
++		if (!ret_val)
++			break;
++	}
++	if (program_retries == 100)
++		return -E1000_ERR_NVM;
++
++	return 0;
++}
++
++/**
++ *  e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM
++ *  @hw: pointer to the HW structure
++ *  @bank: 0 for first bank, 1 for second bank, etc.
++ *
++ *  Erases the bank specified. Each bank is a 4k block. Banks are 0 based.
++ *  bank N is 4096 * N + flash_reg_addr.
++ **/
++static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	union ich8_hws_flash_status hsfsts;
++	union ich8_hws_flash_ctrl hsflctl;
++	u32 flash_linear_addr;
++	/* bank size is in 16bit words - adjust to bytes */
++	u32 flash_bank_size = nvm->flash_bank_size * 2;
++	s32 ret_val;
++	s32 count = 0;
++	s32 j, iteration, sector_size;
++
++	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++
++	/*
++	 * Determine HW Sector size: Read BERASE bits of hw flash status
++	 * register
++	 * 00: The Hw sector is 256 bytes, hence we need to erase 16
++	 *     consecutive sectors.  The start index for the nth Hw sector
++	 *     can be calculated as = bank * 4096 + n * 256
++	 * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector.
++	 *     The start index for the nth Hw sector can be calculated
++	 *     as = bank * 4096
++	 * 10: The Hw sector is 8K bytes, nth sector = bank * 8192
++	 *     (ich9 only, otherwise error condition)
++	 * 11: The Hw sector is 64K bytes, nth sector = bank * 65536
++	 */
++	switch (hsfsts.hsf_status.berasesz) {
++	case 0:
++		/* Hw sector size 256 */
++		sector_size = ICH_FLASH_SEG_SIZE_256;
++		iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256;
++		break;
++	case 1:
++		sector_size = ICH_FLASH_SEG_SIZE_4K;
++		iteration = 1;
++		break;
++	case 2:
++		sector_size = ICH_FLASH_SEG_SIZE_8K;
++		iteration = 1;
++		break;
++	case 3:
++		sector_size = ICH_FLASH_SEG_SIZE_64K;
++		iteration = 1;
++		break;
++	default:
++		return -E1000_ERR_NVM;
++	}
++
++	/* Start with the base address, then add the sector offset. */
++	flash_linear_addr = hw->nvm.flash_base_addr;
++	flash_linear_addr += (bank) ? flash_bank_size : 0;
++
++	for (j = 0; j < iteration ; j++) {
++		do {
++			/* Steps */
++			ret_val = e1000_flash_cycle_init_ich8lan(hw);
++			if (ret_val)
++				return ret_val;
++
++			/*
++			 * Write a value 11 (block Erase) in Flash
++			 * Cycle field in hw flash control
++			 */
++			hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
++			hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE;
++			ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
++
++			/*
++			 * Write the last 24 bits of an index within the
++			 * block into Flash Linear address field in Flash
++			 * Address.
++			 */
++			flash_linear_addr += (j * sector_size);
++			ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
++
++			ret_val = e1000_flash_cycle_ich8lan(hw,
++					       ICH_FLASH_ERASE_COMMAND_TIMEOUT);
++			if (ret_val == 0)
++				break;
++
++			/*
++			 * Check if FCERR is set to 1.  If 1,
++			 * clear it and try the whole sequence
++			 * a few more times else Done
++			 */
++			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
++			if (hsfsts.hsf_status.flcerr == 1)
++				/* repeat for some time before giving up */
++				continue;
++			else if (hsfsts.hsf_status.flcdone == 0)
++				return ret_val;
++		} while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT);
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_valid_led_default_ich8lan - Set the default LED settings
++ *  @hw: pointer to the HW structure
++ *  @data: Pointer to the LED settings
++ *
++ *  Reads the LED default settings from the NVM to data.  If the NVM LED
++ *  settings is all 0's or F's, set the LED default to a valid LED default
++ *  setting.
++ **/
++static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data)
++{
++	s32 ret_val;
++
++	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
++	if (ret_val) {
++		e_dbg("NVM Read Error\n");
++		return ret_val;
++	}
++
++	if (*data == ID_LED_RESERVED_0000 ||
++	    *data == ID_LED_RESERVED_FFFF)
++		*data = ID_LED_DEFAULT_ICH8LAN;
++
++	return 0;
++}
++
++/**
++ *  e1000_id_led_init_pchlan - store LED configurations
++ *  @hw: pointer to the HW structure
++ *
++ *  PCH does not control LEDs via the LEDCTL register, rather it uses
++ *  the PHY LED configuration register.
++ *
++ *  PCH also does not have an "always on" or "always off" mode which
++ *  complicates the ID feature.  Instead of using the "on" mode to indicate
++ *  in ledctl_mode2 the LEDs to use for ID (see e1000e_id_led_init()),
++ *  use "link_up" mode.  The LEDs will still ID on request if there is no
++ *  link based on logic in e1000_led_[on|off]_pchlan().
++ **/
++static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	s32 ret_val;
++	const u32 ledctl_on = E1000_LEDCTL_MODE_LINK_UP;
++	const u32 ledctl_off = E1000_LEDCTL_MODE_LINK_UP | E1000_PHY_LED0_IVRT;
++	u16 data, i, temp, shift;
++
++	/* Get default ID LED modes */
++	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
++	if (ret_val)
++		goto out;
++
++	mac->ledctl_default = er32(LEDCTL);
++	mac->ledctl_mode1 = mac->ledctl_default;
++	mac->ledctl_mode2 = mac->ledctl_default;
++
++	for (i = 0; i < 4; i++) {
++		temp = (data >> (i << 2)) & E1000_LEDCTL_LED0_MODE_MASK;
++		shift = (i * 5);
++		switch (temp) {
++		case ID_LED_ON1_DEF2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_ON1_OFF2:
++			mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift);
++			mac->ledctl_mode1 |= (ledctl_on << shift);
++			break;
++		case ID_LED_OFF1_DEF2:
++		case ID_LED_OFF1_ON2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift);
++			mac->ledctl_mode1 |= (ledctl_off << shift);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++		switch (temp) {
++		case ID_LED_DEF1_ON2:
++		case ID_LED_ON1_ON2:
++		case ID_LED_OFF1_ON2:
++			mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift);
++			mac->ledctl_mode2 |= (ledctl_on << shift);
++			break;
++		case ID_LED_DEF1_OFF2:
++		case ID_LED_ON1_OFF2:
++		case ID_LED_OFF1_OFF2:
++			mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift);
++			mac->ledctl_mode2 |= (ledctl_off << shift);
++			break;
++		default:
++			/* Do nothing */
++			break;
++		}
++	}
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_bus_info_ich8lan - Get/Set the bus type and width
++ *  @hw: pointer to the HW structure
++ *
++ *  ICH8 use the PCI Express bus, but does not contain a PCI Express Capability
++ *  register, so the the bus width is hard coded.
++ **/
++static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_bus_info *bus = &hw->bus;
++	s32 ret_val;
++
++	ret_val = e1000e_get_bus_info_pcie(hw);
++
++	/*
++	 * ICH devices are "PCI Express"-ish.  They have
++	 * a configuration space, but do not contain
++	 * PCI Express Capability registers, so bus width
++	 * must be hardcoded.
++	 */
++	if (bus->width == e1000_bus_width_unknown)
++		bus->width = e1000_bus_width_pcie_x1;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_ich8lan - Reset the hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  Does a full reset of the hardware which includes a reset of the PHY and
++ *  MAC.
++ **/
++static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u16 reg;
++	u32 ctrl, kab;
++	s32 ret_val;
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000e_disable_pcie_master(hw);
++	if (ret_val)
++		e_dbg("PCI-E Master disable polling has failed.\n");
++
++	e_dbg("Masking off all interrupts\n");
++	ew32(IMC, 0xffffffff);
++
++	/*
++	 * Disable the Transmit and Receive units.  Then delay to allow
++	 * any pending transactions to complete before we hit the MAC
++	 * with the global reset.
++	 */
++	ew32(RCTL, 0);
++	ew32(TCTL, E1000_TCTL_PSP);
++	e1e_flush();
++
++	usleep_range(10000, 20000);
++
++	/* Workaround for ICH8 bit corruption issue in FIFO memory */
++	if (hw->mac.type == e1000_ich8lan) {
++		/* Set Tx and Rx buffer allocation to 8k apiece. */
++		ew32(PBA, E1000_PBA_8K);
++		/* Set Packet Buffer Size to 16k. */
++		ew32(PBS, E1000_PBS_16K);
++	}
++
++	if (hw->mac.type == e1000_pchlan) {
++		/* Save the NVM K1 bit setting*/
++		ret_val = e1000_read_nvm(hw, E1000_NVM_K1_CONFIG, 1, &reg);
++		if (ret_val)
++			return ret_val;
++
++		if (reg & E1000_NVM_K1_ENABLE)
++			dev_spec->nvm_k1_enabled = true;
++		else
++			dev_spec->nvm_k1_enabled = false;
++	}
++
++	ctrl = er32(CTRL);
++
++	if (!e1000_check_reset_block(hw)) {
++		/*
++		 * Full-chip reset requires MAC and PHY reset at the same
++		 * time to make sure the interface between MAC and the
++		 * external PHY is reset.
++		 */
++		ctrl |= E1000_CTRL_PHY_RST;
++
++		/*
++		 * Gate automatic PHY configuration by hardware on
++		 * non-managed 82579
++		 */
++		if ((hw->mac.type == e1000_pch2lan) &&
++		    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
++			e1000_gate_hw_phy_config_ich8lan(hw, true);
++	}
++	ret_val = e1000_acquire_swflag_ich8lan(hw);
++	e_dbg("Issuing a global reset to ich8lan\n");
++	ew32(CTRL, (ctrl | E1000_CTRL_RST));
++	/* cannot issue a flush here because it hangs the hardware */
++	msleep(20);
++
++	if (!ret_val)
++		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
++
++	if (ctrl & E1000_CTRL_PHY_RST) {
++		ret_val = hw->phy.ops.get_cfg_done(hw);
++		if (ret_val)
++			goto out;
++
++		ret_val = e1000_post_phy_reset_ich8lan(hw);
++		if (ret_val)
++			goto out;
++	}
++
++	/*
++	 * For PCH, this write will make sure that any noise
++	 * will be detected as a CRC error and be dropped rather than show up
++	 * as a bad packet to the DMA engine.
++	 */
++	if (hw->mac.type == e1000_pchlan)
++		ew32(CRC_OFFSET, 0x65656565);
++
++	ew32(IMC, 0xffffffff);
++	er32(ICR);
++
++	kab = er32(KABGTXD);
++	kab |= E1000_KABGTXD_BGSQLBIAS;
++	ew32(KABGTXD, kab);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_ich8lan - Initialize the hardware
++ *  @hw: pointer to the HW structure
++ *
++ *  Prepares the hardware for transmit and receive by doing the following:
++ *   - initialize hardware bits
++ *   - initialize LED identification
++ *   - setup receive address registers
++ *   - setup flow control
++ *   - setup transmit descriptors
++ *   - clear statistics
++ **/
++static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 ctrl_ext, txdctl, snoop;
++	s32 ret_val;
++	u16 i;
++
++	e1000_initialize_hw_bits_ich8lan(hw);
++
++	/* Initialize identification LED */
++	ret_val = mac->ops.id_led_init(hw);
++	if (ret_val)
++		e_dbg("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++
++	/* Setup the receive address. */
++	e1000e_init_rx_addrs(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	e_dbg("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/*
++	 * The 82578 Rx buffer will stall if wakeup is enabled in host and
++	 * the ME.  Disable wakeup by clearing the host wakeup bit.
++	 * Reset the phy after disabling host wakeup to reset the Rx buffer.
++	 */
++	if (hw->phy.type == e1000_phy_82578) {
++		e1e_rphy(hw, BM_PORT_GEN_CFG, &i);
++		i &= ~BM_WUC_HOST_WU_BIT;
++		e1e_wphy(hw, BM_PORT_GEN_CFG, i);
++		ret_val = e1000_phy_hw_reset_ich8lan(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/* Setup link and flow control */
++	ret_val = e1000_setup_link_ich8lan(hw);
++
++	/* Set the transmit descriptor write-back policy for both queues */
++	txdctl = er32(TXDCTL(0));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++		 E1000_TXDCTL_FULL_TX_DESC_WB;
++	txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) |
++		 E1000_TXDCTL_MAX_TX_DESC_PREFETCH;
++	ew32(TXDCTL(0), txdctl);
++	txdctl = er32(TXDCTL(1));
++	txdctl = (txdctl & ~E1000_TXDCTL_WTHRESH) |
++		 E1000_TXDCTL_FULL_TX_DESC_WB;
++	txdctl = (txdctl & ~E1000_TXDCTL_PTHRESH) |
++		 E1000_TXDCTL_MAX_TX_DESC_PREFETCH;
++	ew32(TXDCTL(1), txdctl);
++
++	/*
++	 * ICH8 has opposite polarity of no_snoop bits.
++	 * By default, we should use snoop behavior.
++	 */
++	if (mac->type == e1000_ich8lan)
++		snoop = PCIE_ICH8_SNOOP_ALL;
++	else
++		snoop = (u32) ~(PCIE_NO_SNOOP_ALL);
++	e1000e_set_pcie_no_snoop(hw, snoop);
++
++	ctrl_ext = er32(CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
++	ew32(CTRL_EXT, ctrl_ext);
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_ich8lan(hw);
++
++	return 0;
++}
++/**
++ *  e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits
++ *  @hw: pointer to the HW structure
++ *
++ *  Sets/Clears required hardware bits necessary for correctly setting up the
++ *  hardware for transmit and receive.
++ **/
++static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	/* Extended Device Control */
++	reg = er32(CTRL_EXT);
++	reg |= (1 << 22);
++	/* Enable PHY low-power state when MAC is at D3 w/o WoL */
++	if (hw->mac.type >= e1000_pchlan)
++		reg |= E1000_CTRL_EXT_PHYPDEN;
++	ew32(CTRL_EXT, reg);
++
++	/* Transmit Descriptor Control 0 */
++	reg = er32(TXDCTL(0));
++	reg |= (1 << 22);
++	ew32(TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = er32(TXDCTL(1));
++	reg |= (1 << 22);
++	ew32(TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = er32(TARC(0));
++	if (hw->mac.type == e1000_ich8lan)
++		reg |= (1 << 28) | (1 << 29);
++	reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27);
++	ew32(TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = er32(TARC(1));
++	if (er32(TCTL) & E1000_TCTL_MULR)
++		reg &= ~(1 << 28);
++	else
++		reg |= (1 << 28);
++	reg |= (1 << 24) | (1 << 26) | (1 << 30);
++	ew32(TARC(1), reg);
++
++	/* Device Status */
++	if (hw->mac.type == e1000_ich8lan) {
++		reg = er32(STATUS);
++		reg &= ~(1 << 31);
++		ew32(STATUS, reg);
++	}
++
++	/*
++	 * work-around descriptor data corruption issue during nfs v2 udp
++	 * traffic, just disable the nfs filtering capability
++	 */
++	reg = er32(RFCTL);
++	reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS);
++	ew32(RFCTL, reg);
++}
++
++/**
++ *  e1000_setup_link_ich8lan - Setup flow control and link settings
++ *  @hw: pointer to the HW structure
++ *
++ *  Determines which flow control settings to use, then configures flow
++ *  control.  Calls the appropriate media-specific link configuration
++ *  function.  Assuming the adapter has a valid link partner, a valid link
++ *  should be established.  Assumes the hardware has previously been reset
++ *  and the transmitter and receiver are not enabled.
++ **/
++static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	if (e1000_check_reset_block(hw))
++		return 0;
++
++	/*
++	 * ICH parts do not have a word in the NVM to determine
++	 * the default flow control setting, so we explicitly
++	 * set it to full.
++	 */
++	if (hw->fc.requested_mode == e1000_fc_default) {
++		/* Workaround h/w hang when Tx flow control enabled */
++		if (hw->mac.type == e1000_pchlan)
++			hw->fc.requested_mode = e1000_fc_rx_pause;
++		else
++			hw->fc.requested_mode = e1000_fc_full;
++	}
++
++	/*
++	 * Save off the requested flow control mode for use later.  Depending
++	 * on the link partner's capabilities, we may or may not use this mode.
++	 */
++	hw->fc.current_mode = hw->fc.requested_mode;
++
++	e_dbg("After fix-ups FlowControl is now = %x\n",
++		hw->fc.current_mode);
++
++	/* Continue to configure the copper link. */
++	ret_val = e1000_setup_copper_link_ich8lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	ew32(FCTTV, hw->fc.pause_time);
++	if ((hw->phy.type == e1000_phy_82578) ||
++	    (hw->phy.type == e1000_phy_82579) ||
++	    (hw->phy.type == e1000_phy_i217) ||
++	    (hw->phy.type == e1000_phy_82577)) {
++		ew32(FCRTV_PCH, hw->fc.refresh_time);
++
++		ret_val = e1e_wphy(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27),
++				   hw->fc.pause_time);
++		if (ret_val)
++			return ret_val;
++	}
++
++	return e1000e_set_fc_watermarks(hw);
++}
++
++/**
++ *  e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface
++ *  @hw: pointer to the HW structure
++ *
++ *  Configures the kumeran interface to the PHY to wait the appropriate time
++ *  when polling the PHY, then call the generic setup_copper_link to finish
++ *  configuring the copper link.
++ **/
++static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++	u16 reg_data;
++
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ew32(CTRL, ctrl);
++
++	/*
++	 * Set the mac to wait the maximum time between each iteration
++	 * and increase the max iterations when polling the phy;
++	 * this fixes erroneous timeouts at 10Mbps.
++	 */
++	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_TIMEOUTS, 0xFFFF);
++	if (ret_val)
++		return ret_val;
++	ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
++	                               &reg_data);
++	if (ret_val)
++		return ret_val;
++	reg_data |= 0x3F;
++	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
++	                                reg_data);
++	if (ret_val)
++		return ret_val;
++
++	switch (hw->phy.type) {
++	case e1000_phy_igp_3:
++		ret_val = e1000e_copper_link_setup_igp(hw);
++		if (ret_val)
++			return ret_val;
++		break;
++	case e1000_phy_bm:
++	case e1000_phy_82578:
++		ret_val = e1000e_copper_link_setup_m88(hw);
++		if (ret_val)
++			return ret_val;
++		break;
++	case e1000_phy_82577:
++	case e1000_phy_82579:
++	case e1000_phy_i217:
++		ret_val = e1000_copper_link_setup_82577(hw);
++		if (ret_val)
++			return ret_val;
++		break;
++	case e1000_phy_ife:
++		ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &reg_data);
++		if (ret_val)
++			return ret_val;
++
++		reg_data &= ~IFE_PMC_AUTO_MDIX;
++
++		switch (hw->phy.mdix) {
++		case 1:
++			reg_data &= ~IFE_PMC_FORCE_MDIX;
++			break;
++		case 2:
++			reg_data |= IFE_PMC_FORCE_MDIX;
++			break;
++		case 0:
++		default:
++			reg_data |= IFE_PMC_AUTO_MDIX;
++			break;
++		}
++		ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, reg_data);
++		if (ret_val)
++			return ret_val;
++		break;
++	default:
++		break;
++	}
++	return e1000e_setup_copper_link(hw);
++}
++
++/**
++ *  e1000_get_link_up_info_ich8lan - Get current link speed and duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: pointer to store current link speed
++ *  @duplex: pointer to store the current link duplex
++ *
++ *  Calls the generic get_speed_and_duplex to retrieve the current link
++ *  information and then calls the Kumeran lock loss workaround for links at
++ *  gigabit speeds.
++ **/
++static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed,
++					  u16 *duplex)
++{
++	s32 ret_val;
++
++	ret_val = e1000e_get_speed_and_duplex_copper(hw, speed, duplex);
++	if (ret_val)
++		return ret_val;
++
++	if ((hw->mac.type == e1000_ich8lan) &&
++	    (hw->phy.type == e1000_phy_igp_3) &&
++	    (*speed == SPEED_1000)) {
++		ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround
++ *  @hw: pointer to the HW structure
++ *
++ *  Work-around for 82566 Kumeran PCS lock loss:
++ *  On link status change (i.e. PCI reset, speed change) and link is up and
++ *  speed is gigabit-
++ *    0) if workaround is optionally disabled do nothing
++ *    1) wait 1ms for Kumeran link to come up
++ *    2) check Kumeran Diagnostic register PCS lock loss bit
++ *    3) if not set the link is locked (all is good), otherwise...
++ *    4) reset the PHY
++ *    5) repeat up to 10 times
++ *  Note: this is only called for IGP3 copper when speed is 1gb.
++ **/
++static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u32 phy_ctrl;
++	s32 ret_val;
++	u16 i, data;
++	bool link;
++
++	if (!dev_spec->kmrn_lock_loss_workaround_enabled)
++		return 0;
++
++	/*
++	 * Make sure link is up before proceeding.  If not just return.
++	 * Attempting this while link is negotiating fouled up link
++	 * stability
++	 */
++	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
++	if (!link)
++		return 0;
++
++	for (i = 0; i < 10; i++) {
++		/* read once to clear */
++		ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data);
++		if (ret_val)
++			return ret_val;
++		/* and again to get new status */
++		ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data);
++		if (ret_val)
++			return ret_val;
++
++		/* check for PCS lock */
++		if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS))
++			return 0;
++
++		/* Issue PHY reset */
++		e1000_phy_hw_reset(hw);
++		mdelay(5);
++	}
++	/* Disable GigE link negotiation */
++	phy_ctrl = er32(PHY_CTRL);
++	phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE |
++		     E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++	ew32(PHY_CTRL, phy_ctrl);
++
++	/*
++	 * Call gig speed drop workaround on Gig disable before accessing
++	 * any PHY registers
++	 */
++	e1000e_gig_downshift_workaround_ich8lan(hw);
++
++	/* unable to acquire PCS lock */
++	return -E1000_ERR_PHY;
++}
++
++/**
++ *  e1000_set_kmrn_lock_loss_workaround_ich8lan - Set Kumeran workaround state
++ *  @hw: pointer to the HW structure
++ *  @state: boolean value used to set the current Kumeran workaround state
++ *
++ *  If ICH8, set the current Kumeran workaround state (enabled - true
++ *  /disabled - false).
++ **/
++void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
++						 bool state)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++
++	if (hw->mac.type != e1000_ich8lan) {
++		e_dbg("Workaround applies to ICH8 only.\n");
++		return;
++	}
++
++	dev_spec->kmrn_lock_loss_workaround_enabled = state;
++}
++
++/**
++ *  e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
++ *  @hw: pointer to the HW structure
++ *
++ *  Workaround for 82566 power-down on D3 entry:
++ *    1) disable gigabit link
++ *    2) write VR power-down enable
++ *    3) read it back
++ *  Continue if successful, else issue LCD reset and repeat
++ **/
++void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw)
++{
++	u32 reg;
++	u16 data;
++	u8  retry = 0;
++
++	if (hw->phy.type != e1000_phy_igp_3)
++		return;
++
++	/* Try the workaround twice (if needed) */
++	do {
++		/* Disable link */
++		reg = er32(PHY_CTRL);
++		reg |= (E1000_PHY_CTRL_GBE_DISABLE |
++			E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
++		ew32(PHY_CTRL, reg);
++
++		/*
++		 * Call gig speed drop workaround on Gig disable before
++		 * accessing any PHY registers
++		 */
++		if (hw->mac.type == e1000_ich8lan)
++			e1000e_gig_downshift_workaround_ich8lan(hw);
++
++		/* Write VR power-down enable */
++		e1e_rphy(hw, IGP3_VR_CTRL, &data);
++		data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
++		e1e_wphy(hw, IGP3_VR_CTRL, data | IGP3_VR_CTRL_MODE_SHUTDOWN);
++
++		/* Read it back and test */
++		e1e_rphy(hw, IGP3_VR_CTRL, &data);
++		data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
++		if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry)
++			break;
++
++		/* Issue PHY reset and repeat at most one more time */
++		reg = er32(CTRL);
++		ew32(CTRL, reg | E1000_CTRL_PHY_RST);
++		retry++;
++	} while (retry);
++}
++
++/**
++ *  e1000e_gig_downshift_workaround_ich8lan - WoL from S5 stops working
++ *  @hw: pointer to the HW structure
++ *
++ *  Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC),
++ *  LPLU, Gig disable, MDIC PHY reset):
++ *    1) Set Kumeran Near-end loopback
++ *    2) Clear Kumeran Near-end loopback
++ *  Should only be called for ICH8[m] devices with any 1G Phy.
++ **/
++void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 reg_data;
++
++	if ((hw->mac.type != e1000_ich8lan) || (hw->phy.type == e1000_phy_ife))
++		return;
++
++	ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++				      &reg_data);
++	if (ret_val)
++		return;
++	reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK;
++	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++				       reg_data);
++	if (ret_val)
++		return;
++	reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK;
++	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
++				       reg_data);
++}
++
++/**
++ *  e1000_suspend_workarounds_ich8lan - workarounds needed during S0->Sx
++ *  @hw: pointer to the HW structure
++ *
++ *  During S0 to Sx transition, it is possible the link remains at gig
++ *  instead of negotiating to a lower speed.  Before going to Sx, set
++ *  'LPLU Enabled' and 'Gig Disable' to force link speed negotiation
++ *  to a lower speed.  For PCH and newer parts, the OEM bits PHY register
++ *  (LED, GbE disable and LPLU configurations) also needs to be written.
++ *  Parts that support (and are linked to a partner which support) EEE in
++ *  100Mbps should disable LPLU since 100Mbps w/ EEE requires less power
++ *  than 10Mbps w/o EEE.
++ **/
++void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
++{
++	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
++	u32 phy_ctrl;
++	s32 ret_val;
++
++	phy_ctrl = er32(PHY_CTRL);
++	phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_GBE_DISABLE;
++
++	if (hw->phy.type == e1000_phy_i217) {
++		u16 phy_reg;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			goto out;
++
++		if (!dev_spec->eee_disable) {
++			u16 eee_advert;
++
++			ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR,
++						  I217_EEE_ADVERTISEMENT);
++			if (ret_val)
++				goto release;
++			e1e_rphy_locked(hw, I82579_EMI_DATA, &eee_advert);
++
++			/* Disable LPLU if both link partners support 100BaseT
++			 * EEE and 100Full is advertised on both ends of the
++			 * link.
++			 */
++			if ((eee_advert & I217_EEE_100_SUPPORTED) &&
++			    (dev_spec->eee_lp_ability &
++			     I217_EEE_100_SUPPORTED) &&
++			    (hw->phy.autoneg_advertised & ADVERTISE_100_FULL))
++				phy_ctrl &= ~(E1000_PHY_CTRL_D0A_LPLU |
++					      E1000_PHY_CTRL_NOND0A_LPLU);
++		}
++
++		/* For i217 Intel Rapid Start Technology support,
++		 * when the system is going into Sx and no manageability engine
++		 * is present, the driver must configure proxy to reset only on
++		 * power good.  LPI (Low Power Idle) state must also reset only
++		 * on power good, as well as the MTA (Multicast table array).
++		 * The SMBus release must also be disabled on LCD reset.
++		 */
++		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
++
++			/* Enable proxy to reset only on power good. */
++			e1e_rphy_locked(hw, I217_PROXY_CTRL, &phy_reg);
++			phy_reg |= I217_PROXY_CTRL_AUTO_DISABLE;
++			e1e_wphy_locked(hw, I217_PROXY_CTRL, phy_reg);
++
++			/* Set bit enable LPI (EEE) to reset only on
++			 * power good.
++			 */
++			e1e_rphy_locked(hw, I217_SxCTRL, &phy_reg);
++			phy_reg |= I217_SxCTRL_MASK;
++			e1e_wphy_locked(hw, I217_SxCTRL, phy_reg);
++
++			/* Disable the SMB release on LCD reset. */
++			e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
++			phy_reg &= ~I217_MEMPWR;
++			e1e_wphy_locked(hw, I217_MEMPWR, phy_reg);
++		}
++
++		/* Enable MTA to reset for Intel Rapid Start Technology
++		 * Support
++		 */
++		e1e_rphy_locked(hw, I217_CGFREG, &phy_reg);
++		phy_reg |= I217_CGFREG_MASK;
++		e1e_wphy_locked(hw, I217_CGFREG, phy_reg);
++
++release:
++		hw->phy.ops.release(hw);
++	}
++out:
++	ew32(PHY_CTRL, phy_ctrl);
++
++	if (hw->mac.type == e1000_ich8lan)
++		e1000e_gig_downshift_workaround_ich8lan(hw);
++
++	if (hw->mac.type >= e1000_pchlan) {
++		e1000_oem_bits_config_ich8lan(hw, false);
++		e1000_phy_hw_reset_ich8lan(hw);
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			return;
++		e1000_write_smbus_addr(hw);
++		hw->phy.ops.release(hw);
++	}
++}
++
++/**
++ *  e1000_resume_workarounds_pchlan - workarounds needed during Sx->S0
++ *  @hw: pointer to the HW structure
++ *
++ *  During Sx to S0 transitions on non-managed devices or managed devices
++ *  on which PHY resets are not blocked, if the PHY registers cannot be
++ *  accessed properly by the s/w toggle the LANPHYPC value to power cycle
++ *  the PHY.
++ *  On i217, setup Intel Rapid Start Technology.
++ **/
++void e1000_resume_workarounds_pchlan(struct e1000_hw *hw)
++{
++	u32 fwsm;
++
++	if (hw->mac.type != e1000_pch2lan)
++		return;
++
++	fwsm = er32(FWSM);
++	if (!(fwsm & E1000_ICH_FWSM_FW_VALID) || !e1000_check_reset_block(hw)) {
++		u16 phy_id1, phy_id2;
++		s32 ret_val;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val) {
++			e_dbg("Failed to acquire PHY semaphore in resume\n");
++			return;
++		}
++
++	/* For i217 Intel Rapid Start Technology support when the system
++	 * is transitioning from Sx and no manageability engine is present
++	 * configure SMBus to restore on reset, disable proxy, and enable
++	 * the reset on MTA (Multicast table array).
++	 */
++	if (hw->phy.type == e1000_phy_i217) {
++		u16 phy_reg;
++
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val) {
++			e_dbg("Failed to setup iRST\n");
++			return;
++		}
++
++		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
++			/* Restore clear on SMB if no manageability engine
++			 * is present
++			 */
++			ret_val = e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
++			if (ret_val)
++				goto _release;
++			phy_reg |= I217_MEMPWR_MASK;
++			e1e_wphy_locked(hw, I217_MEMPWR, phy_reg);
++
++			/* Disable Proxy */
++			e1e_wphy_locked(hw, I217_PROXY_CTRL, 0);
++		}
++		/* Enable reset on MTA */
++		ret_val = e1e_rphy_locked(hw, I217_CGFREG, &phy_reg);
++		if (ret_val)
++			goto _release;
++		phy_reg &= ~I217_CGFREG_MASK;
++		e1e_wphy_locked(hw, I217_CGFREG, phy_reg);
++	_release:
++		if (ret_val)
++			e_dbg("Error %d in resume workarounds\n", ret_val);
++		hw->phy.ops.release(hw);
++	}
++
++		/* Test access to the PHY registers by reading the ID regs */
++		ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID1, &phy_id1);
++		if (ret_val)
++			goto release;
++		ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID2, &phy_id2);
++		if (ret_val)
++			goto release;
++
++		if (hw->phy.id == ((u32)(phy_id1 << 16) |
++				   (u32)(phy_id2 & PHY_REVISION_MASK)))
++			goto release;
++
++		e1000_toggle_lanphypc_value_ich8lan(hw);
++
++		hw->phy.ops.release(hw);
++		msleep(50);
++		e1000_phy_hw_reset(hw);
++		msleep(50);
++		return;
++	}
++
++release:
++	hw->phy.ops.release(hw);
++
++	return;
++}
++
++/**
++ *  e1000_cleanup_led_ich8lan - Restore the default LED operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the LED back to the default configuration.
++ **/
++static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw)
++{
++	if (hw->phy.type == e1000_phy_ife)
++		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0);
++
++	ew32(LEDCTL, hw->mac.ledctl_default);
++	return 0;
++}
++
++/**
++ *  e1000_led_on_ich8lan - Turn LEDs on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn on the LEDs.
++ **/
++static s32 e1000_led_on_ich8lan(struct e1000_hw *hw)
++{
++	if (hw->phy.type == e1000_phy_ife)
++		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED,
++				(IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON));
++
++	ew32(LEDCTL, hw->mac.ledctl_mode2);
++	return 0;
++}
++
++/**
++ *  e1000_led_off_ich8lan - Turn LEDs off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn off the LEDs.
++ **/
++static s32 e1000_led_off_ich8lan(struct e1000_hw *hw)
++{
++	if (hw->phy.type == e1000_phy_ife)
++		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED,
++				(IFE_PSCL_PROBE_MODE |
++				 IFE_PSCL_PROBE_LEDS_OFF));
++
++	ew32(LEDCTL, hw->mac.ledctl_mode1);
++	return 0;
++}
++
++/**
++ *  e1000_setup_led_pchlan - Configures SW controllable LED
++ *  @hw: pointer to the HW structure
++ *
++ *  This prepares the SW controllable LED for use.
++ **/
++static s32 e1000_setup_led_pchlan(struct e1000_hw *hw)
++{
++	return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1);
++}
++
++/**
++ *  e1000_cleanup_led_pchlan - Restore the default LED operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Return the LED back to the default configuration.
++ **/
++static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw)
++{
++	return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default);
++}
++
++/**
++ *  e1000_led_on_pchlan - Turn LEDs on
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn on the LEDs.
++ **/
++static s32 e1000_led_on_pchlan(struct e1000_hw *hw)
++{
++	u16 data = (u16)hw->mac.ledctl_mode2;
++	u32 i, led;
++
++	/*
++	 * If no link, then turn LED on by setting the invert bit
++	 * for each LED that's mode is "link_up" in ledctl_mode2.
++	 */
++	if (!(er32(STATUS) & E1000_STATUS_LU)) {
++		for (i = 0; i < 3; i++) {
++			led = (data >> (i * 5)) & E1000_PHY_LED0_MASK;
++			if ((led & E1000_PHY_LED0_MODE_MASK) !=
++			    E1000_LEDCTL_MODE_LINK_UP)
++				continue;
++			if (led & E1000_PHY_LED0_IVRT)
++				data &= ~(E1000_PHY_LED0_IVRT << (i * 5));
++			else
++				data |= (E1000_PHY_LED0_IVRT << (i * 5));
++		}
++	}
++
++	return e1e_wphy(hw, HV_LED_CONFIG, data);
++}
++
++/**
++ *  e1000_led_off_pchlan - Turn LEDs off
++ *  @hw: pointer to the HW structure
++ *
++ *  Turn off the LEDs.
++ **/
++static s32 e1000_led_off_pchlan(struct e1000_hw *hw)
++{
++	u16 data = (u16)hw->mac.ledctl_mode1;
++	u32 i, led;
++
++	/*
++	 * If no link, then turn LED off by clearing the invert bit
++	 * for each LED that's mode is "link_up" in ledctl_mode1.
++	 */
++	if (!(er32(STATUS) & E1000_STATUS_LU)) {
++		for (i = 0; i < 3; i++) {
++			led = (data >> (i * 5)) & E1000_PHY_LED0_MASK;
++			if ((led & E1000_PHY_LED0_MODE_MASK) !=
++			    E1000_LEDCTL_MODE_LINK_UP)
++				continue;
++			if (led & E1000_PHY_LED0_IVRT)
++				data &= ~(E1000_PHY_LED0_IVRT << (i * 5));
++			else
++				data |= (E1000_PHY_LED0_IVRT << (i * 5));
++		}
++	}
++
++	return e1e_wphy(hw, HV_LED_CONFIG, data);
++}
++
++/**
++ *  e1000_get_cfg_done_ich8lan - Read config done bit after Full or PHY reset
++ *  @hw: pointer to the HW structure
++ *
++ *  Read appropriate register for the config done bit for completion status
++ *  and configure the PHY through s/w for EEPROM-less parts.
++ *
++ *  NOTE: some silicon which is EEPROM-less will fail trying to read the
++ *  config done bit, so only an error is logged and continues.  If we were
++ *  to return with error, EEPROM-less silicon would not be able to be reset
++ *  or change link.
++ **/
++static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u32 bank = 0;
++	u32 status;
++
++	e1000e_get_cfg_done(hw);
++
++	/* Wait for indication from h/w that it has completed basic config */
++	if (hw->mac.type >= e1000_ich10lan) {
++		e1000_lan_init_done_ich8lan(hw);
++	} else {
++		ret_val = e1000e_get_auto_rd_done(hw);
++		if (ret_val) {
++			/*
++			 * When auto config read does not complete, do not
++			 * return with an error. This can happen in situations
++			 * where there is no eeprom and prevents getting link.
++			 */
++			e_dbg("Auto Read Done did not complete\n");
++			ret_val = 0;
++		}
++	}
++
++	/* Clear PHY Reset Asserted bit */
++	status = er32(STATUS);
++	if (status & E1000_STATUS_PHYRA)
++		ew32(STATUS, status & ~E1000_STATUS_PHYRA);
++	else
++		e_dbg("PHY Reset Asserted not set - needs delay\n");
++
++	/* If EEPROM is not marked present, init the IGP 3 PHY manually */
++	if (hw->mac.type <= e1000_ich9lan) {
++		if (((er32(EECD) & E1000_EECD_PRES) == 0) &&
++		    (hw->phy.type == e1000_phy_igp_3)) {
++			e1000e_phy_init_script_igp3(hw);
++		}
++	} else {
++		if (e1000_valid_nvm_bank_detect_ich8lan(hw, &bank)) {
++			/* Maybe we should do a basic PHY config */
++			e_dbg("EEPROM not present\n");
++			ret_val = -E1000_ERR_CONFIG;
++		}
++	}
++
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(hw->mac.ops.check_mng_mode(hw) ||
++	      hw->phy.ops.check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++}
++
++/**
++ *  e1000_clear_hw_cntrs_ich8lan - Clear statistical counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears hardware counters specific to the silicon family and calls
++ *  clear_hw_cntrs_generic to clear all general purpose counters.
++ **/
++static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw)
++{
++	u16 phy_data;
++	s32 ret_val;
++
++	e1000e_clear_hw_cntrs_base(hw);
++
++	er32(ALGNERRC);
++	er32(RXERRC);
++	er32(TNCRS);
++	er32(CEXTERR);
++	er32(TSCTC);
++	er32(TSCTFC);
++
++	er32(MGTPRC);
++	er32(MGTPDC);
++	er32(MGTPTC);
++
++	er32(IAC);
++	er32(ICRXOC);
++
++	/* Clear PHY statistics registers */
++	if ((hw->phy.type == e1000_phy_82578) ||
++	    (hw->phy.type == e1000_phy_82579) ||
++	    (hw->phy.type == e1000_phy_i217) ||	
++	    (hw->phy.type == e1000_phy_82577)) {
++		ret_val = hw->phy.ops.acquire(hw);
++		if (ret_val)
++			return;
++		ret_val = hw->phy.ops.set_page(hw,
++					       HV_STATS_PAGE << IGP_PAGE_SHIFT);
++		if (ret_val)
++			goto release;
++		hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data);
++		hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data);
++release:
++		hw->phy.ops.release(hw);
++	}
++}
++
++static const struct e1000_mac_operations ich8_mac_ops = {
++	.id_led_init		= e1000e_id_led_init,
++	/* check_mng_mode dependent on mac type */
++	.check_for_link		= e1000_check_for_copper_link_ich8lan,
++	/* cleanup_led dependent on mac type */
++	.clear_hw_cntrs		= e1000_clear_hw_cntrs_ich8lan,
++	.get_bus_info		= e1000_get_bus_info_ich8lan,
++	.set_lan_id		= e1000_set_lan_id_single_port,
++	.get_link_up_info	= e1000_get_link_up_info_ich8lan,
++	/* led_on dependent on mac type */
++	/* led_off dependent on mac type */
++	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
++	.reset_hw		= e1000_reset_hw_ich8lan,
++	.init_hw		= e1000_init_hw_ich8lan,
++	.setup_link		= e1000_setup_link_ich8lan,
++	.setup_physical_interface= e1000_setup_copper_link_ich8lan,
++	/* id_led_init dependent on mac type */
++};
++
++static const struct e1000_phy_operations ich8_phy_ops = {
++	.acquire		= e1000_acquire_swflag_ich8lan,
++	.check_reset_block	= e1000_check_reset_block_ich8lan,
++	.commit			= NULL,
++	.get_cfg_done		= e1000_get_cfg_done_ich8lan,
++	.get_cable_length	= e1000e_get_cable_length_igp_2,
++	.read_reg		= e1000e_read_phy_reg_igp,
++	.release		= e1000_release_swflag_ich8lan,
++	.reset			= e1000_phy_hw_reset_ich8lan,
++	.set_d0_lplu_state	= e1000_set_d0_lplu_state_ich8lan,
++	.set_d3_lplu_state	= e1000_set_d3_lplu_state_ich8lan,
++	.write_reg		= e1000e_write_phy_reg_igp,
++};
++
++static const struct e1000_nvm_operations ich8_nvm_ops = {
++	.acquire		= e1000_acquire_nvm_ich8lan,
++	.read		 	= e1000_read_nvm_ich8lan,
++	.release		= e1000_release_nvm_ich8lan,
++	.update			= e1000_update_nvm_checksum_ich8lan,
++	.valid_led_default	= e1000_valid_led_default_ich8lan,
++	.validate		= e1000_validate_nvm_checksum_ich8lan,
++	.write			= e1000_write_nvm_ich8lan,
++};
++
++const struct e1000_info e1000_ich8_info = {
++	.mac			= e1000_ich8lan,
++	.flags			= FLAG_HAS_WOL
++				  | FLAG_IS_ICH
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_FLASH
++				  | FLAG_APME_IN_WUC,
++	.pba			= 8,
++	.max_hw_frame_size	= ETH_FRAME_LEN + ETH_FCS_LEN,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
++
++const struct e1000_info e1000_ich9_info = {
++	.mac			= e1000_ich9lan,
++	.flags			= FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_IS_ICH
++				  | FLAG_HAS_WOL
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_ERT
++				  | FLAG_HAS_FLASH
++				  | FLAG_APME_IN_WUC,
++	.pba			= 10,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
++
++const struct e1000_info e1000_ich10_info = {
++	.mac			= e1000_ich10lan,
++	.flags			= FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_IS_ICH
++				  | FLAG_HAS_WOL
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_ERT
++				  | FLAG_HAS_FLASH
++				  | FLAG_APME_IN_WUC,
++	.pba			= 10,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
++
++const struct e1000_info e1000_pch_info = {
++	.mac			= e1000_pchlan,
++	.flags			= FLAG_IS_ICH
++				  | FLAG_HAS_WOL
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_FLASH
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_DISABLE_FC_PAUSE_TIME /* errata */
++				  | FLAG_APME_IN_WUC,
++	.flags2			= FLAG2_HAS_PHY_STATS,
++	.pba			= 26,
++	.max_hw_frame_size	= 4096,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
++
++const struct e1000_info e1000_pch2_info = {
++	.mac			= e1000_pch2lan,
++	.flags			= FLAG_IS_ICH
++				  | FLAG_HAS_WOL
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_FLASH
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_APME_IN_WUC,
++	.flags2			= FLAG2_HAS_PHY_STATS
++				  | FLAG2_HAS_EEE,
++	.pba			= 26,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
++
++const struct e1000_info e1000_pch_lpt_info = {
++	.mac			= e1000_pch_lpt,
++	.flags			= FLAG_IS_ICH
++				  | FLAG_HAS_WOL
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_HAS_AMT
++				  | FLAG_HAS_FLASH
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_APME_IN_WUC,
++	.flags2			= FLAG2_HAS_PHY_STATS
++				  | FLAG2_HAS_EEE,
++	.pba			= 26,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_ich8lan,
++	.mac_ops		= &ich8_mac_ops,
++	.phy_ops		= &ich8_phy_ops,
++	.nvm_ops		= &ich8_nvm_ops,
++};
+--- linux/drivers/xenomai/net/drivers/e1000e/80003es2lan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/80003es2lan.c	2021-04-07 16:01:27.194634213 +0800
+@@ -0,0 +1,1515 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++/*
++ * 80003ES2LAN Gigabit Ethernet Controller (Copper)
++ * 80003ES2LAN Gigabit Ethernet Controller (Serdes)
++ */
++
++#include "e1000.h"
++
++#define E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL	 0x00
++#define E1000_KMRNCTRLSTA_OFFSET_INB_CTRL	 0x02
++#define E1000_KMRNCTRLSTA_OFFSET_HD_CTRL	 0x10
++#define E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE	 0x1F
++
++#define E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS	 0x0008
++#define E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS	 0x0800
++#define E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING	 0x0010
++
++#define E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT 0x0004
++#define E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT	 0x0000
++#define E1000_KMRNCTRLSTA_OPMODE_E_IDLE		 0x2000
++
++#define E1000_KMRNCTRLSTA_OPMODE_MASK		 0x000C
++#define E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO	 0x0004
++
++#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00 /* Gigabit Carry Extend Padding */
++#define DEFAULT_TCTL_EXT_GCEX_80003ES2LAN	 0x00010000
++
++#define DEFAULT_TIPG_IPGT_1000_80003ES2LAN	 0x8
++#define DEFAULT_TIPG_IPGT_10_100_80003ES2LAN	 0x9
++
++/* GG82563 PHY Specific Status Register (Page 0, Register 16 */
++#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE	 0x0002 /* 1=Reversal Disab. */
++#define GG82563_PSCR_CROSSOVER_MODE_MASK	 0x0060
++#define GG82563_PSCR_CROSSOVER_MODE_MDI		 0x0000 /* 00=Manual MDI */
++#define GG82563_PSCR_CROSSOVER_MODE_MDIX	 0x0020 /* 01=Manual MDIX */
++#define GG82563_PSCR_CROSSOVER_MODE_AUTO	 0x0060 /* 11=Auto crossover */
++
++/* PHY Specific Control Register 2 (Page 0, Register 26) */
++#define GG82563_PSCR2_REVERSE_AUTO_NEG		 0x2000
++						/* 1=Reverse Auto-Negotiation */
++
++/* MAC Specific Control Register (Page 2, Register 21) */
++/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */
++#define GG82563_MSCR_TX_CLK_MASK		 0x0007
++#define GG82563_MSCR_TX_CLK_10MBPS_2_5		 0x0004
++#define GG82563_MSCR_TX_CLK_100MBPS_25		 0x0005
++#define GG82563_MSCR_TX_CLK_1000MBPS_25		 0x0007
++
++#define GG82563_MSCR_ASSERT_CRS_ON_TX		 0x0010 /* 1=Assert */
++
++/* DSP Distance Register (Page 5, Register 26) */
++#define GG82563_DSPD_CABLE_LENGTH		 0x0007 /* 0 = <50M
++							   1 = 50-80M
++							   2 = 80-110M
++							   3 = 110-140M
++							   4 = >140M */
++
++/* Kumeran Mode Control Register (Page 193, Register 16) */
++#define GG82563_KMCR_PASS_FALSE_CARRIER		 0x0800
++
++/* Max number of times Kumeran read/write should be validated */
++#define GG82563_MAX_KMRN_RETRY  0x5
++
++/* Power Management Control Register (Page 193, Register 20) */
++#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE	 0x0001
++					   /* 1=Enable SERDES Electrical Idle */
++
++/* In-Band Control Register (Page 194, Register 18) */
++#define GG82563_ICR_DIS_PADDING			 0x0010 /* Disable Padding */
++
++/*
++ * A table for the GG82563 cable length where the range is defined
++ * with a lower bound at "index" and the upper bound at
++ * "index + 5".
++ */
++static const u16 e1000_gg82563_cable_length_table[] = {
++	 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF };
++#define GG82563_CABLE_LENGTH_TABLE_SIZE \
++		ARRAY_SIZE(e1000_gg82563_cable_length_table)
++
++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw);
++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw);
++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw);
++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw);
++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex);
++static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw);
++static s32  e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
++                                            u16 *data);
++static s32  e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
++                                             u16 data);
++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw);
++
++/**
++ *  e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++
++	if (hw->phy.media_type != e1000_media_type_copper) {
++		phy->type	= e1000_phy_none;
++		return 0;
++	} else {
++		phy->ops.power_up = e1000_power_up_phy_copper;
++		phy->ops.power_down = e1000_power_down_phy_copper_80003es2lan;
++	}
++
++	phy->addr		= 1;
++	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
++	phy->reset_delay_us      = 100;
++	phy->type		= e1000_phy_gg82563;
++
++	/* This can only be done after all function pointers are setup. */
++	ret_val = e1000e_get_phy_id(hw);
++
++	/* Verify phy id */
++	if (phy->id != GG82563_E_PHY_ID)
++		return -E1000_ERR_PHY;
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_nvm_info *nvm = &hw->nvm;
++	u32 eecd = er32(EECD);
++	u16 size;
++
++	nvm->opcode_bits	= 8;
++	nvm->delay_usec	 = 1;
++	switch (nvm->override) {
++	case e1000_nvm_override_spi_large:
++		nvm->page_size    = 32;
++		nvm->address_bits = 16;
++		break;
++	case e1000_nvm_override_spi_small:
++		nvm->page_size    = 8;
++		nvm->address_bits = 8;
++		break;
++	default:
++		nvm->page_size    = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
++		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
++		break;
++	}
++
++	nvm->type = e1000_nvm_eeprom_spi;
++
++	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
++			  E1000_EECD_SIZE_EX_SHIFT);
++
++	/*
++	 * Added to a constant, "size" becomes the left-shift value
++	 * for setting word_size.
++	 */
++	size += NVM_WORD_SIZE_BASE_SHIFT;
++
++	/* EEPROM access above 16k is unsupported */
++	if (size > 14)
++		size = 14;
++	nvm->word_size	= 1 << size;
++
++	return 0;
++}
++
++/**
++ *  e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs.
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_init_mac_params_80003es2lan(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_mac_info *mac = &hw->mac;
++	struct e1000_mac_operations *func = &mac->ops;
++
++	/* Set media type */
++	switch (adapter->pdev->device) {
++	case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
++		hw->phy.media_type = e1000_media_type_internal_serdes;
++		break;
++	default:
++		hw->phy.media_type = e1000_media_type_copper;
++		break;
++	}
++
++	/* Set mta register count */
++	mac->mta_reg_count = 128;
++	/* Set rar entry count */
++	mac->rar_entry_count = E1000_RAR_ENTRIES;
++	/* FWSM register */
++	mac->has_fwsm = true;
++	/* ARC supported; valid only if manageability features are enabled. */
++	mac->arc_subsystem_valid =
++	        (er32(FWSM) & E1000_FWSM_MODE_MASK)
++	                ? true : false;
++	/* Adaptive IFS not supported */
++	mac->adaptive_ifs = false;
++
++	/* check for link */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		func->setup_physical_interface = e1000_setup_copper_link_80003es2lan;
++		func->check_for_link = e1000e_check_for_copper_link;
++		break;
++	case e1000_media_type_fiber:
++		func->setup_physical_interface = e1000e_setup_fiber_serdes_link;
++		func->check_for_link = e1000e_check_for_fiber_link;
++		break;
++	case e1000_media_type_internal_serdes:
++		func->setup_physical_interface = e1000e_setup_fiber_serdes_link;
++		func->check_for_link = e1000e_check_for_serdes_link;
++		break;
++	default:
++		return -E1000_ERR_CONFIG;
++		break;
++	}
++
++	/* set lan id for port to determine which phy lock to use */
++	hw->mac.ops.set_lan_id(hw);
++
++	return 0;
++}
++
++static s32 e1000_get_variants_80003es2lan(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	s32 rc;
++
++	rc = e1000_init_mac_params_80003es2lan(adapter);
++	if (rc)
++		return rc;
++
++	rc = e1000_init_nvm_params_80003es2lan(hw);
++	if (rc)
++		return rc;
++
++	rc = e1000_init_phy_params_80003es2lan(hw);
++	if (rc)
++		return rc;
++
++	return 0;
++}
++
++/**
++ *  e1000_acquire_phy_80003es2lan - Acquire rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  A wrapper to acquire access rights to the correct PHY.
++ **/
++static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
++	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_release_phy_80003es2lan - Release rights to access PHY
++ *  @hw: pointer to the HW structure
++ *
++ *  A wrapper to release access rights to the correct PHY.
++ **/
++static void e1000_release_phy_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
++	e1000_release_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_acquire_mac_csr_80003es2lan - Acquire rights to access Kumeran register
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the semaphore to access the Kumeran interface.
++ *
++ **/
++static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	mask = E1000_SWFW_CSR_SM;
++
++	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_release_mac_csr_80003es2lan - Release rights to access Kumeran Register
++ *  @hw: pointer to the HW structure
++ *
++ *  Release the semaphore used to access the Kumeran interface
++ **/
++static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw)
++{
++	u16 mask;
++
++	mask = E1000_SWFW_CSR_SM;
++
++	e1000_release_swfw_sync_80003es2lan(hw, mask);
++}
++
++/**
++ *  e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  Acquire the semaphore to access the EEPROM.
++ **/
++static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++
++	ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000e_acquire_nvm(hw);
++
++	if (ret_val)
++		e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_release_nvm_80003es2lan - Relinquish rights to access NVM
++ *  @hw: pointer to the HW structure
++ *
++ *  Release the semaphore used to access the EEPROM.
++ **/
++static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw)
++{
++	e1000e_release_nvm(hw);
++	e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
++}
++
++/**
++ *  e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
++ *  will also specify which port we're acquiring the lock for.
++ **/
++static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++	u32 swmask = mask;
++	u32 fwmask = mask << 16;
++	s32 i = 0;
++	s32 timeout = 50;
++
++	while (i < timeout) {
++		if (e1000e_get_hw_semaphore(hw))
++			return -E1000_ERR_SWFW_SYNC;
++
++		swfw_sync = er32(SW_FW_SYNC);
++		if (!(swfw_sync & (fwmask | swmask)))
++			break;
++
++		/*
++		 * Firmware currently using resource (fwmask)
++		 * or other software thread using resource (swmask)
++		 */
++		e1000e_put_hw_semaphore(hw);
++		mdelay(5);
++		i++;
++	}
++
++	if (i == timeout) {
++		e_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
++		return -E1000_ERR_SWFW_SYNC;
++	}
++
++	swfw_sync |= swmask;
++	ew32(SW_FW_SYNC, swfw_sync);
++
++	e1000e_put_hw_semaphore(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore
++ *  @hw: pointer to the HW structure
++ *  @mask: specifies which semaphore to acquire
++ *
++ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
++ *  will also specify which port we're releasing the lock for.
++ **/
++static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
++{
++	u32 swfw_sync;
++
++	while (e1000e_get_hw_semaphore(hw) != 0)
++		; /* Empty */
++
++	swfw_sync = er32(SW_FW_SYNC);
++	swfw_sync &= ~mask;
++	ew32(SW_FW_SYNC, swfw_sync);
++
++	e1000e_put_hw_semaphore(hw);
++}
++
++/**
++ *  e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @data: pointer to the data returned from the operation
++ *
++ *  Read the GG82563 PHY register.
++ **/
++static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++						  u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u32 page_select;
++	u16 temp;
++
++	ret_val = e1000_acquire_phy_80003es2lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Select Configuration Page */
++	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++		page_select = GG82563_PHY_PAGE_SELECT;
++	} else {
++		/*
++		 * Use Alternative Page Select register to access
++		 * registers 30 and 31
++		 */
++		page_select = GG82563_PHY_PAGE_SELECT_ALT;
++	}
++
++	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
++	ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp);
++	if (ret_val) {
++		e1000_release_phy_80003es2lan(hw);
++		return ret_val;
++	}
++
++	if (hw->dev_spec.e80003es2lan.mdic_wa_enable == true) {
++		/*
++		 * The "ready" bit in the MDIC register may be incorrectly set
++		 * before the device has completed the "Page Select" MDI
++		 * transaction.  So we wait 200us after each MDI command...
++		 */
++		udelay(200);
++
++		/* ...and verify the command was successful. */
++		ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp);
++
++		if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
++			ret_val = -E1000_ERR_PHY;
++			e1000_release_phy_80003es2lan(hw);
++			return ret_val;
++		}
++
++		udelay(200);
++
++		ret_val = e1000e_read_phy_reg_mdic(hw,
++		                                  MAX_PHY_REG_ADDRESS & offset,
++		                                  data);
++
++		udelay(200);
++	} else {
++		ret_val = e1000e_read_phy_reg_mdic(hw,
++		                                  MAX_PHY_REG_ADDRESS & offset,
++		                                  data);
++	}
++
++	e1000_release_phy_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @data: value to write to the register
++ *
++ *  Write to the GG82563 PHY register.
++ **/
++static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
++						   u32 offset, u16 data)
++{
++	s32 ret_val;
++	u32 page_select;
++	u16 temp;
++
++	ret_val = e1000_acquire_phy_80003es2lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	/* Select Configuration Page */
++	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
++		page_select = GG82563_PHY_PAGE_SELECT;
++	} else {
++		/*
++		 * Use Alternative Page Select register to access
++		 * registers 30 and 31
++		 */
++		page_select = GG82563_PHY_PAGE_SELECT_ALT;
++	}
++
++	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
++	ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp);
++	if (ret_val) {
++		e1000_release_phy_80003es2lan(hw);
++		return ret_val;
++	}
++
++	if (hw->dev_spec.e80003es2lan.mdic_wa_enable == true) {
++		/*
++		 * The "ready" bit in the MDIC register may be incorrectly set
++		 * before the device has completed the "Page Select" MDI
++		 * transaction.  So we wait 200us after each MDI command...
++		 */
++		udelay(200);
++
++		/* ...and verify the command was successful. */
++		ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp);
++
++		if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
++			e1000_release_phy_80003es2lan(hw);
++			return -E1000_ERR_PHY;
++		}
++
++		udelay(200);
++
++		ret_val = e1000e_write_phy_reg_mdic(hw,
++		                                  MAX_PHY_REG_ADDRESS & offset,
++		                                  data);
++
++		udelay(200);
++	} else {
++		ret_val = e1000e_write_phy_reg_mdic(hw,
++		                                  MAX_PHY_REG_ADDRESS & offset,
++		                                  data);
++	}
++
++	e1000_release_phy_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_nvm_80003es2lan - Write to ESB2 NVM
++ *  @hw: pointer to the HW structure
++ *  @offset: offset of the register to read
++ *  @words: number of words to write
++ *  @data: buffer of data to write to the NVM
++ *
++ *  Write "words" of data to the ESB2 NVM.
++ **/
++static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset,
++				       u16 words, u16 *data)
++{
++	return e1000e_write_nvm_spi(hw, offset, words, data);
++}
++
++/**
++ *  e1000_get_cfg_done_80003es2lan - Wait for configuration to complete
++ *  @hw: pointer to the HW structure
++ *
++ *  Wait a specific amount of time for manageability processes to complete.
++ *  This is a function pointer entry point called by the phy module.
++ **/
++static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw)
++{
++	s32 timeout = PHY_CFG_TIMEOUT;
++	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
++
++	if (hw->bus.func == 1)
++		mask = E1000_NVM_CFG_DONE_PORT_1;
++
++	while (timeout) {
++		if (er32(EEMNGCTL) & mask)
++			break;
++		usleep_range(1000, 2000);
++		timeout--;
++	}
++	if (!timeout) {
++		e_dbg("MNG configuration cycle has not completed.\n");
++		return -E1000_ERR_RESET;
++	}
++
++	return 0;
++}
++
++/**
++ *  e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex
++ *  @hw: pointer to the HW structure
++ *
++ *  Force the speed and duplex settings onto the PHY.  This is a
++ *  function pointer entry point called by the phy module.
++ **/
++static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 phy_data;
++	bool link;
++
++	/*
++	 * Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
++	 * forced whenever speed and duplex are forced.
++	 */
++	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO;
++	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e_dbg("GG82563 PSCR: %X\n", phy_data);
++
++	ret_val = e1e_rphy(hw, PHY_CONTROL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
++
++	/* Reset the phy to commit changes. */
++	phy_data |= MII_CR_RESET;
++
++	ret_val = e1e_wphy(hw, PHY_CONTROL, phy_data);
++	if (ret_val)
++		return ret_val;
++
++	udelay(1);
++
++	if (hw->phy.autoneg_wait_to_complete) {
++		e_dbg("Waiting for forced speed/duplex link "
++			 "on GG82563 phy.\n");
++
++		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++						     100000, &link);
++		if (ret_val)
++			return ret_val;
++
++		if (!link) {
++			/*
++			 * We didn't get link.
++			 * Reset the DSP and cross our fingers.
++			 */
++			ret_val = e1000e_phy_reset_dsp(hw);
++			if (ret_val)
++				return ret_val;
++		}
++
++		/* Try once more */
++		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
++						     100000, &link);
++		if (ret_val)
++			return ret_val;
++	}
++
++	ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Resetting the phy means we need to verify the TX_CLK corresponds
++	 * to the link speed.  10Mbps -> 2.5MHz, else 25MHz.
++	 */
++	phy_data &= ~GG82563_MSCR_TX_CLK_MASK;
++	if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED)
++		phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5;
++	else
++		phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25;
++
++	/*
++	 * In addition, we must re-enable CRS on Tx for both half and full
++	 * duplex.
++	 */
++	phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++	ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_cable_length_80003es2lan - Set approximate cable length
++ *  @hw: pointer to the HW structure
++ *
++ *  Find the approximate cable length as measured by the GG82563 PHY.
++ *  This is a function pointer entry point called by the phy module.
++ **/
++static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val = 0;
++	u16 phy_data, index;
++
++	ret_val = e1e_rphy(hw, GG82563_PHY_DSP_DISTANCE, &phy_data);
++	if (ret_val)
++		goto out;
++
++	index = phy_data & GG82563_DSPD_CABLE_LENGTH;
++
++	if (index >= GG82563_CABLE_LENGTH_TABLE_SIZE - 5) {
++		ret_val = -E1000_ERR_PHY;
++		goto out;
++	}
++
++	phy->min_cable_length = e1000_gg82563_cable_length_table[index];
++	phy->max_cable_length = e1000_gg82563_cable_length_table[index + 5];
++
++	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_get_link_up_info_80003es2lan - Report speed and duplex
++ *  @hw: pointer to the HW structure
++ *  @speed: pointer to speed buffer
++ *  @duplex: pointer to duplex buffer
++ *
++ *  Retrieve the current speed and duplex configuration.
++ **/
++static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed,
++					      u16 *duplex)
++{
++	s32 ret_val;
++
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		ret_val = e1000e_get_speed_and_duplex_copper(hw,
++								    speed,
++								    duplex);
++		hw->phy.ops.cfg_on_link_up(hw);
++	} else {
++		ret_val = e1000e_get_speed_and_duplex_fiber_serdes(hw,
++								  speed,
++								  duplex);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_reset_hw_80003es2lan - Reset the ESB2 controller
++ *  @hw: pointer to the HW structure
++ *
++ *  Perform a global reset to the ESB2 controller.
++ **/
++static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++
++	/*
++	 * Prevent the PCI-E bus from sticking if there is no TLP connection
++	 * on the last TLP read/write transaction when MAC is reset.
++	 */
++	ret_val = e1000e_disable_pcie_master(hw);
++	if (ret_val)
++		e_dbg("PCI-E Master disable polling has failed.\n");
++
++	e_dbg("Masking off all interrupts\n");
++	ew32(IMC, 0xffffffff);
++
++	ew32(RCTL, 0);
++	ew32(TCTL, E1000_TCTL_PSP);
++	e1e_flush();
++
++	usleep_range(10000, 20000);
++
++	ctrl = er32(CTRL);
++
++	ret_val = e1000_acquire_phy_80003es2lan(hw);
++	e_dbg("Issuing a global reset to MAC\n");
++	ew32(CTRL, ctrl | E1000_CTRL_RST);
++	e1000_release_phy_80003es2lan(hw);
++
++	ret_val = e1000e_get_auto_rd_done(hw);
++	if (ret_val)
++		/* We don't want to continue accessing MAC registers. */
++		return ret_val;
++
++	/* Clear any pending interrupt events. */
++	ew32(IMC, 0xffffffff);
++	er32(ICR);
++
++	ret_val = e1000_check_alt_mac_addr_generic(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_init_hw_80003es2lan - Initialize the ESB2 controller
++ *  @hw: pointer to the HW structure
++ *
++ *  Initialize the hw bits, LED, VFTA, MTA, link and hw counters.
++ **/
++static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_mac_info *mac = &hw->mac;
++	u32 reg_data;
++	s32 ret_val;
++	u16 kum_reg_data;
++	u16 i;
++
++	e1000_initialize_hw_bits_80003es2lan(hw);
++
++	/* Initialize identification LED */
++	ret_val = e1000e_id_led_init(hw);
++	if (ret_val)
++		e_dbg("Error initializing identification LED\n");
++		/* This is not fatal and we should not stop init due to this */
++
++	/* Disabling VLAN filtering */
++	e_dbg("Initializing the IEEE VLAN\n");
++	mac->ops.clear_vfta(hw);
++
++	/* Setup the receive address. */
++	e1000e_init_rx_addrs(hw, mac->rar_entry_count);
++
++	/* Zero out the Multicast HASH table */
++	e_dbg("Zeroing the MTA\n");
++	for (i = 0; i < mac->mta_reg_count; i++)
++		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
++
++	/* Setup link and flow control */
++	ret_val = e1000e_setup_link(hw);
++
++	/* Disable IBIST slave mode (far-end loopback) */
++	e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
++					&kum_reg_data);
++	kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
++	e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
++					 kum_reg_data);
++
++	/* Set the transmit descriptor write-back policy */
++	reg_data = er32(TXDCTL(0));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++		   E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC;
++	ew32(TXDCTL(0), reg_data);
++
++	/* ...for both queues. */
++	reg_data = er32(TXDCTL(1));
++	reg_data = (reg_data & ~E1000_TXDCTL_WTHRESH) |
++		   E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC;
++	ew32(TXDCTL(1), reg_data);
++
++	/* Enable retransmit on late collisions */
++	reg_data = er32(TCTL);
++	reg_data |= E1000_TCTL_RTLC;
++	ew32(TCTL, reg_data);
++
++	/* Configure Gigabit Carry Extend Padding */
++	reg_data = er32(TCTL_EXT);
++	reg_data &= ~E1000_TCTL_EXT_GCEX_MASK;
++	reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN;
++	ew32(TCTL_EXT, reg_data);
++
++	/* Configure Transmit Inter-Packet Gap */
++	reg_data = er32(TIPG);
++	reg_data &= ~E1000_TIPG_IPGT_MASK;
++	reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
++	ew32(TIPG, reg_data);
++
++	reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001);
++	reg_data &= ~0x00100000;
++	E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data);
++
++	/* default to true to enable the MDIC W/A */
++	hw->dev_spec.e80003es2lan.mdic_wa_enable = true;
++
++	ret_val = e1000_read_kmrn_reg_80003es2lan(hw,
++	                              E1000_KMRNCTRLSTA_OFFSET >>
++	                              E1000_KMRNCTRLSTA_OFFSET_SHIFT,
++	                              &i);
++	if (!ret_val) {
++		if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) ==
++		     E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO)
++			hw->dev_spec.e80003es2lan.mdic_wa_enable = false;
++	}
++
++	/*
++	 * Clear all of the statistics registers (clear on read).  It is
++	 * important that we do this after we have tried to establish link
++	 * because the symbol error count will increment wildly if there
++	 * is no link.
++	 */
++	e1000_clear_hw_cntrs_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2
++ *  @hw: pointer to the HW structure
++ *
++ *  Initializes required hardware-dependent bits needed for normal operation.
++ **/
++static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw)
++{
++	u32 reg;
++
++	/* Transmit Descriptor Control 0 */
++	reg = er32(TXDCTL(0));
++	reg |= (1 << 22);
++	ew32(TXDCTL(0), reg);
++
++	/* Transmit Descriptor Control 1 */
++	reg = er32(TXDCTL(1));
++	reg |= (1 << 22);
++	ew32(TXDCTL(1), reg);
++
++	/* Transmit Arbitration Control 0 */
++	reg = er32(TARC(0));
++	reg &= ~(0xF << 27); /* 30:27 */
++	if (hw->phy.media_type != e1000_media_type_copper)
++		reg &= ~(1 << 20);
++	ew32(TARC(0), reg);
++
++	/* Transmit Arbitration Control 1 */
++	reg = er32(TARC(1));
++	if (er32(TCTL) & E1000_TCTL_MULR)
++		reg &= ~(1 << 28);
++	else
++		reg |= (1 << 28);
++	ew32(TARC(1), reg);
++}
++
++/**
++ *  e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link
++ *  @hw: pointer to the HW structure
++ *
++ *  Setup some GG82563 PHY registers for obtaining link
++ **/
++static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw)
++{
++	struct e1000_phy_info *phy = &hw->phy;
++	s32 ret_val;
++	u32 ctrl_ext;
++	u16 data;
++
++	ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
++	/* Use 25MHz for both link down and 1000Base-T for Tx clock. */
++	data |= GG82563_MSCR_TX_CLK_1000MBPS_25;
++
++	ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Options:
++	 *   MDI/MDI-X = 0 (default)
++	 *   0 - Auto for all speeds
++	 *   1 - MDI mode
++	 *   2 - MDI-X mode
++	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
++	 */
++	ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK;
++
++	switch (phy->mdix) {
++	case 1:
++		data |= GG82563_PSCR_CROSSOVER_MODE_MDI;
++		break;
++	case 2:
++		data |= GG82563_PSCR_CROSSOVER_MODE_MDIX;
++		break;
++	case 0:
++	default:
++		data |= GG82563_PSCR_CROSSOVER_MODE_AUTO;
++		break;
++	}
++
++	/*
++	 * Options:
++	 *   disable_polarity_correction = 0 (default)
++	 *       Automatic Correction for Reversed Cable Polarity
++	 *   0 - Disabled
++	 *   1 - Enabled
++	 */
++	data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++	if (phy->disable_polarity_correction)
++		data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
++
++	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, data);
++	if (ret_val)
++		return ret_val;
++
++	/* SW Reset the PHY so all changes take effect */
++	ret_val = e1000e_commit_phy(hw);
++	if (ret_val) {
++		e_dbg("Error Resetting the PHY\n");
++		return ret_val;
++	}
++
++	/* Bypass Rx and Tx FIFO's */
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
++					E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL,
++					E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS |
++					E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000_read_kmrn_reg_80003es2lan(hw,
++				       E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE,
++				       &data);
++	if (ret_val)
++		return ret_val;
++	data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE;
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
++					E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE,
++					data);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL_2, &data);
++	if (ret_val)
++		return ret_val;
++
++	data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG;
++	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL_2, data);
++	if (ret_val)
++		return ret_val;
++
++	ctrl_ext = er32(CTRL_EXT);
++	ctrl_ext &= ~(E1000_CTRL_EXT_LINK_MODE_MASK);
++	ew32(CTRL_EXT, ctrl_ext);
++
++	ret_val = e1e_rphy(hw, GG82563_PHY_PWR_MGMT_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	/*
++	 * Do not init these registers when the HW is in IAMT mode, since the
++	 * firmware will have already initialized them.  We only initialize
++	 * them if the HW is not in IAMT mode.
++	 */
++	if (!e1000e_check_mng_mode(hw)) {
++		/* Enable Electrical Idle on the PHY */
++		data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE;
++		ret_val = e1e_wphy(hw, GG82563_PHY_PWR_MGMT_CTRL, data);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &data);
++		if (ret_val)
++			return ret_val;
++
++		data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++		ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, data);
++		if (ret_val)
++			return ret_val;
++	}
++
++	/*
++	 * Workaround: Disable padding in Kumeran interface in the MAC
++	 * and in the PHY to avoid CRC errors.
++	 */
++	ret_val = e1e_rphy(hw, GG82563_PHY_INBAND_CTRL, &data);
++	if (ret_val)
++		return ret_val;
++
++	data |= GG82563_ICR_DIS_PADDING;
++	ret_val = e1e_wphy(hw, GG82563_PHY_INBAND_CTRL, data);
++	if (ret_val)
++		return ret_val;
++
++	return 0;
++}
++
++/**
++ *  e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2
++ *  @hw: pointer to the HW structure
++ *
++ *  Essentially a wrapper for setting up all things "copper" related.
++ *  This is a function pointer entry point called by the mac module.
++ **/
++static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw)
++{
++	u32 ctrl;
++	s32 ret_val;
++	u16 reg_data;
++
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_SLU;
++	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
++	ew32(CTRL, ctrl);
++
++	/*
++	 * Set the mac to wait the maximum time between each
++	 * iteration and increase the max iterations when
++	 * polling the phy; this fixes erroneous timeouts at 10Mbps.
++	 */
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4),
++	                                           0xFFFF);
++	if (ret_val)
++		return ret_val;
++	ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9),
++	                                          &reg_data);
++	if (ret_val)
++		return ret_val;
++	reg_data |= 0x3F;
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9),
++	                                           reg_data);
++	if (ret_val)
++		return ret_val;
++	ret_val = e1000_read_kmrn_reg_80003es2lan(hw,
++				      E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
++				      &reg_data);
++	if (ret_val)
++		return ret_val;
++	reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING;
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
++					E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
++					reg_data);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	ret_val = e1000e_setup_copper_link(hw);
++
++	return 0;
++}
++
++/**
++ *  e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up
++ *  @hw: pointer to the HW structure
++ *  @duplex: current duplex setting
++ *
++ *  Configure the KMRN interface by applying last minute quirks for
++ *  10/100 operation.
++ **/
++static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++	u16 speed;
++	u16 duplex;
++
++	if (hw->phy.media_type == e1000_media_type_copper) {
++		ret_val = e1000e_get_speed_and_duplex_copper(hw, &speed,
++		                                             &duplex);
++		if (ret_val)
++			return ret_val;
++
++		if (speed == SPEED_1000)
++			ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw);
++		else
++			ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, duplex);
++	}
++
++	return ret_val;
++}
++
++/**
++ *  e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation
++ *  @hw: pointer to the HW structure
++ *  @duplex: current duplex setting
++ *
++ *  Configure the KMRN interface by applying last minute quirks for
++ *  10/100 operation.
++ **/
++static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex)
++{
++	s32 ret_val;
++	u32 tipg;
++	u32 i = 0;
++	u16 reg_data, reg_data2;
++
++	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT;
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
++	                               reg_data);
++	if (ret_val)
++		return ret_val;
++
++	/* Configure Transmit Inter-Packet Gap */
++	tipg = er32(TIPG);
++	tipg &= ~E1000_TIPG_IPGT_MASK;
++	tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN;
++	ew32(TIPG, tipg);
++
++	do {
++		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data2);
++		if (ret_val)
++			return ret_val;
++		i++;
++	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
++
++	if (duplex == HALF_DUPLEX)
++		reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER;
++	else
++		reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++
++	ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++	return 0;
++}
++
++/**
++ *  e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation
++ *  @hw: pointer to the HW structure
++ *
++ *  Configure the KMRN interface by applying last minute quirks for
++ *  gigabit operation.
++ **/
++static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val;
++	u16 reg_data, reg_data2;
++	u32 tipg;
++	u32 i = 0;
++
++	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT;
++	ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
++	                               E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
++	                               reg_data);
++	if (ret_val)
++		return ret_val;
++
++	/* Configure Transmit Inter-Packet Gap */
++	tipg = er32(TIPG);
++	tipg &= ~E1000_TIPG_IPGT_MASK;
++	tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
++	ew32(TIPG, tipg);
++
++	do {
++		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
++		if (ret_val)
++			return ret_val;
++
++		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data2);
++		if (ret_val)
++			return ret_val;
++		i++;
++	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
++
++	reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
++	ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_read_kmrn_reg_80003es2lan - Read kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquire semaphore, then read the PHY register at offset
++ *  using the kumeran interface.  The information retrieved is stored in data.
++ *  Release the semaphore before exiting.
++ **/
++static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
++					   u16 *data)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val = 0;
++
++	ret_val = e1000_acquire_mac_csr_80003es2lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++	               E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
++	ew32(KMRNCTRLSTA, kmrnctrlsta);
++	e1e_flush();
++
++	udelay(2);
++
++	kmrnctrlsta = er32(KMRNCTRLSTA);
++	*data = (u16)kmrnctrlsta;
++
++	e1000_release_mac_csr_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_write_kmrn_reg_80003es2lan - Write kumeran register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquire semaphore, then write the data to PHY register
++ *  at the offset using the kumeran interface.  Release semaphore
++ *  before exiting.
++ **/
++static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
++					    u16 data)
++{
++	u32 kmrnctrlsta;
++	s32 ret_val = 0;
++
++	ret_val = e1000_acquire_mac_csr_80003es2lan(hw);
++	if (ret_val)
++		return ret_val;
++
++	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
++	               E1000_KMRNCTRLSTA_OFFSET) | data;
++	ew32(KMRNCTRLSTA, kmrnctrlsta);
++	e1e_flush();
++
++	udelay(2);
++
++	e1000_release_mac_csr_80003es2lan(hw);
++
++	return ret_val;
++}
++
++/**
++ *  e1000_read_mac_addr_80003es2lan - Read device MAC address
++ *  @hw: pointer to the HW structure
++ **/
++static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw)
++{
++	s32 ret_val = 0;
++
++	/*
++	 * If there's an alternate MAC address place it in RAR0
++	 * so that it will override the Si installed default perm
++	 * address.
++	 */
++	ret_val = e1000_check_alt_mac_addr_generic(hw);
++	if (ret_val)
++		goto out;
++
++	ret_val = e1000_read_mac_addr_generic(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down
++ * @hw: pointer to the HW structure
++ *
++ * In the case of a PHY power down to save power, or to turn off link during a
++ * driver unload, or wake on lan is not enabled, remove the link.
++ **/
++static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw)
++{
++	/* If the management interface is not enabled, then power down */
++	if (!(hw->mac.ops.check_mng_mode(hw) ||
++	      hw->phy.ops.check_reset_block(hw)))
++		e1000_power_down_phy_copper(hw);
++}
++
++/**
++ *  e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters
++ *  @hw: pointer to the HW structure
++ *
++ *  Clears the hardware counters by reading the counter registers.
++ **/
++static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw)
++{
++	e1000e_clear_hw_cntrs_base(hw);
++
++	er32(PRC64);
++	er32(PRC127);
++	er32(PRC255);
++	er32(PRC511);
++	er32(PRC1023);
++	er32(PRC1522);
++	er32(PTC64);
++	er32(PTC127);
++	er32(PTC255);
++	er32(PTC511);
++	er32(PTC1023);
++	er32(PTC1522);
++
++	er32(ALGNERRC);
++	er32(RXERRC);
++	er32(TNCRS);
++	er32(CEXTERR);
++	er32(TSCTC);
++	er32(TSCTFC);
++
++	er32(MGTPRC);
++	er32(MGTPDC);
++	er32(MGTPTC);
++
++	er32(IAC);
++	er32(ICRXOC);
++
++	er32(ICRXPTC);
++	er32(ICRXATC);
++	er32(ICTXPTC);
++	er32(ICTXATC);
++	er32(ICTXQEC);
++	er32(ICTXQMTC);
++	er32(ICRXDMTC);
++}
++
++static const struct e1000_mac_operations es2_mac_ops = {
++	.read_mac_addr		= e1000_read_mac_addr_80003es2lan,
++	.id_led_init		= e1000e_id_led_init,
++	.blink_led		= e1000e_blink_led_generic,
++	.check_mng_mode		= e1000e_check_mng_mode_generic,
++	/* check_for_link dependent on media type */
++	.cleanup_led		= e1000e_cleanup_led_generic,
++	.clear_hw_cntrs		= e1000_clear_hw_cntrs_80003es2lan,
++	.get_bus_info		= e1000e_get_bus_info_pcie,
++	.set_lan_id		= e1000_set_lan_id_multi_port_pcie,
++	.get_link_up_info	= e1000_get_link_up_info_80003es2lan,
++	.led_on			= e1000e_led_on_generic,
++	.led_off		= e1000e_led_off_generic,
++	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
++	.write_vfta		= e1000_write_vfta_generic,
++	.clear_vfta		= e1000_clear_vfta_generic,
++	.reset_hw		= e1000_reset_hw_80003es2lan,
++	.init_hw		= e1000_init_hw_80003es2lan,
++	.setup_link		= e1000e_setup_link,
++	/* setup_physical_interface dependent on media type */
++	.setup_led		= e1000e_setup_led_generic,
++};
++
++static const struct e1000_phy_operations es2_phy_ops = {
++	.acquire		= e1000_acquire_phy_80003es2lan,
++	.check_polarity		= e1000_check_polarity_m88,
++	.check_reset_block	= e1000e_check_reset_block_generic,
++	.commit		 	= e1000e_phy_sw_reset,
++	.force_speed_duplex 	= e1000_phy_force_speed_duplex_80003es2lan,
++	.get_cfg_done       	= e1000_get_cfg_done_80003es2lan,
++	.get_cable_length   	= e1000_get_cable_length_80003es2lan,
++	.get_info       	= e1000e_get_phy_info_m88,
++	.read_reg       	= e1000_read_phy_reg_gg82563_80003es2lan,
++	.release		= e1000_release_phy_80003es2lan,
++	.reset		  	= e1000e_phy_hw_reset_generic,
++	.set_d0_lplu_state  	= NULL,
++	.set_d3_lplu_state  	= e1000e_set_d3_lplu_state,
++	.write_reg      	= e1000_write_phy_reg_gg82563_80003es2lan,
++	.cfg_on_link_up      	= e1000_cfg_on_link_up_80003es2lan,
++};
++
++static const struct e1000_nvm_operations es2_nvm_ops = {
++	.acquire		= e1000_acquire_nvm_80003es2lan,
++	.read			= e1000e_read_nvm_eerd,
++	.release		= e1000_release_nvm_80003es2lan,
++	.update			= e1000e_update_nvm_checksum_generic,
++	.valid_led_default	= e1000e_valid_led_default,
++	.validate		= e1000e_validate_nvm_checksum_generic,
++	.write			= e1000_write_nvm_80003es2lan,
++};
++
++const struct e1000_info e1000_es2_info = {
++	.mac			= e1000_80003es2lan,
++	.flags			= FLAG_HAS_HW_VLAN_FILTER
++				  | FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_HAS_WOL
++				  | FLAG_APME_IN_CTRL3
++				  | FLAG_HAS_CTRLEXT_ON_LOAD
++				  | FLAG_RX_NEEDS_RESTART /* errata */
++				  | FLAG_TARC_SET_BIT_ZERO /* errata */
++				  | FLAG_APME_CHECK_PORT_B
++				  | FLAG_DISABLE_FC_PAUSE_TIME /* errata */
++				  | FLAG_TIPG_MEDIUM_FOR_80003ESLAN,
++	.flags2			= FLAG2_DMA_BURST,
++	.pba			= 38,
++	.max_hw_frame_size	= DEFAULT_JUMBO,
++	.get_variants		= e1000_get_variants_80003es2lan,
++	.mac_ops		= &es2_mac_ops,
++	.phy_ops		= &es2_phy_ops,
++	.nvm_ops		= &es2_nvm_ops,
++};
++
+--- linux/drivers/xenomai/net/drivers/e1000e/defines.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/defines.h	2021-04-07 16:01:27.180634233 +0800
+@@ -0,0 +1,852 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_DEFINES_H_
++#define _E1000_DEFINES_H_
++
++#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
++#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
++#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
++#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
++#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
++#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
++#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
++#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
++#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
++#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
++#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
++#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
++#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
++#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
++#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
++#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
++#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
++
++/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
++#define REQ_TX_DESCRIPTOR_MULTIPLE  8
++#define REQ_RX_DESCRIPTOR_MULTIPLE  8
++
++/* Definitions for power management and wakeup registers */
++/* Wake Up Control */
++#define E1000_WUC_APME       0x00000001 /* APM Enable */
++#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
++#define E1000_WUC_PHY_WAKE   0x00000100 /* if PHY supports wakeup */
++
++/* Wake Up Filter Control */
++#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
++#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
++#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
++#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
++#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
++#define E1000_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
++
++/* Wake Up Status */
++#define E1000_WUS_LNKC         E1000_WUFC_LNKC
++#define E1000_WUS_MAG          E1000_WUFC_MAG
++#define E1000_WUS_EX           E1000_WUFC_EX
++#define E1000_WUS_MC           E1000_WUFC_MC
++#define E1000_WUS_BC           E1000_WUFC_BC
++
++/* Extended Device Control */
++#define E1000_CTRL_EXT_LPCD  0x00000004     /* LCD Power Cycle Done */
++#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */
++#define E1000_CTRL_EXT_FORCE_SMBUS 0x00000004 /* Force SMBus mode*/
++#define E1000_CTRL_EXT_EE_RST    0x00002000 /* Reinitialize from EEPROM */
++#define E1000_CTRL_EXT_SPD_BYPS  0x00008000 /* Speed Select Bypass */
++#define E1000_CTRL_EXT_RO_DIS    0x00020000 /* Relaxed Ordering disable */
++#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clock Gating */
++#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
++#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES  0x00C00000
++#define E1000_CTRL_EXT_EIAME          0x01000000
++#define E1000_CTRL_EXT_DRV_LOAD       0x10000000 /* Driver loaded bit for FW */
++#define E1000_CTRL_EXT_IAME           0x08000000 /* Interrupt acknowledge Auto-mask */
++#define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000 /* Clear Interrupt timers after IMS clear */
++#define E1000_CTRL_EXT_PBA_CLR        0x80000000 /* PBA Clear */
++#define E1000_CTRL_EXT_LSECCK         0x00001000
++#define E1000_CTRL_EXT_PHYPDEN        0x00100000
++
++/* Receive Descriptor bit definitions */
++#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
++#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
++#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
++#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
++#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
++#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
++#define E1000_RXD_ERR_CE        0x01    /* CRC Error */
++#define E1000_RXD_ERR_SE        0x02    /* Symbol Error */
++#define E1000_RXD_ERR_SEQ       0x04    /* Sequence Error */
++#define E1000_RXD_ERR_CXE       0x10    /* Carrier Extension Error */
++#define E1000_RXD_ERR_TCPE      0x20    /* TCP/UDP Checksum Error */
++#define E1000_RXD_ERR_RXE       0x80    /* Rx Data Error */
++#define E1000_RXD_SPC_VLAN_MASK 0x0FFF  /* VLAN ID is in lower 12 bits */
++
++#define E1000_RXDEXT_STATERR_CE    0x01000000
++#define E1000_RXDEXT_STATERR_SE    0x02000000
++#define E1000_RXDEXT_STATERR_SEQ   0x04000000
++#define E1000_RXDEXT_STATERR_CXE   0x10000000
++#define E1000_RXDEXT_STATERR_RXE   0x80000000
++
++/* mask to determine if packets should be dropped due to frame errors */
++#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
++    E1000_RXD_ERR_CE  |                \
++    E1000_RXD_ERR_SE  |                \
++    E1000_RXD_ERR_SEQ |                \
++    E1000_RXD_ERR_CXE |                \
++    E1000_RXD_ERR_RXE)
++
++/* Same mask, but for extended and packet split descriptors */
++#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
++    E1000_RXDEXT_STATERR_CE  |            \
++    E1000_RXDEXT_STATERR_SE  |            \
++    E1000_RXDEXT_STATERR_SEQ |            \
++    E1000_RXDEXT_STATERR_CXE |            \
++    E1000_RXDEXT_STATERR_RXE)
++
++#define E1000_RXDPS_HDRSTAT_HDRSP              0x00008000
++
++/* Management Control */
++#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
++#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
++#define E1000_MANC_ARP_EN        0x00002000 /* Enable ARP Request Filtering */
++#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
++#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
++/* Enable MAC address filtering */
++#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000
++/* Enable MNG packets to host memory */
++#define E1000_MANC_EN_MNG2HOST   0x00200000
++
++#define E1000_MANC2H_PORT_623    0x00000020 /* Port 0x26f */
++#define E1000_MANC2H_PORT_664    0x00000040 /* Port 0x298 */
++#define E1000_MDEF_PORT_623      0x00000800 /* Port 0x26f */
++#define E1000_MDEF_PORT_664      0x00000400 /* Port 0x298 */
++
++/* Receive Control */
++#define E1000_RCTL_EN             0x00000002    /* enable */
++#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
++#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
++#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
++#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
++#define E1000_RCTL_LBM_NO         0x00000000    /* no loopback mode */
++#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
++#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
++#define E1000_RCTL_DTYP_PS        0x00000400    /* Packet Split descriptor */
++#define E1000_RCTL_RDMTS_HALF     0x00000000    /* Rx desc min threshold size */
++#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
++#define E1000_RCTL_MO_3           0x00003000    /* multicast offset 15:4 */
++#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
++#define E1000_RCTL_SZ_2048        0x00000000    /* Rx buffer size 2048 */
++#define E1000_RCTL_SZ_1024        0x00010000    /* Rx buffer size 1024 */
++#define E1000_RCTL_SZ_512         0x00020000    /* Rx buffer size 512 */
++#define E1000_RCTL_SZ_256         0x00030000    /* Rx buffer size 256 */
++/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
++#define E1000_RCTL_SZ_16384       0x00010000    /* Rx buffer size 16384 */
++#define E1000_RCTL_SZ_8192        0x00020000    /* Rx buffer size 8192 */
++#define E1000_RCTL_SZ_4096        0x00030000    /* Rx buffer size 4096 */
++#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
++#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
++#define E1000_RCTL_CFI            0x00100000    /* canonical form indicator */
++#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
++#define E1000_RCTL_BSEX           0x02000000    /* Buffer size extension */
++#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
++
++/*
++ * Use byte values for the following shift parameters
++ * Usage:
++ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE0_MASK) |
++ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE1_MASK) |
++ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
++ *                  E1000_PSRCTL_BSIZE2_MASK) |
++ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
++ *                  E1000_PSRCTL_BSIZE3_MASK))
++ * where value0 = [128..16256],  default=256
++ *       value1 = [1024..64512], default=4096
++ *       value2 = [0..64512],    default=4096
++ *       value3 = [0..64512],    default=0
++ */
++
++#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
++#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
++#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
++#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
++
++#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
++#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
++#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
++#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
++
++/* SWFW_SYNC Definitions */
++#define E1000_SWFW_EEP_SM   0x1
++#define E1000_SWFW_PHY0_SM  0x2
++#define E1000_SWFW_PHY1_SM  0x4
++#define E1000_SWFW_CSR_SM   0x8
++
++/* Device Control */
++#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
++#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
++#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
++#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
++#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
++#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
++#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
++#define E1000_CTRL_SPD_10   0x00000000  /* Force 10Mb */
++#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
++#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
++#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
++#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
++#define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */
++#define E1000_CTRL_LANPHYPC_VALUE    0x00020000 /* SW value of LANPHYPC */
++#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
++#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
++#define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
++#define E1000_CTRL_RST      0x04000000  /* Global reset */
++#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
++#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
++#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
++#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
++
++/*
++ * Bit definitions for the Management Data IO (MDIO) and Management Data
++ * Clock (MDC) pins in the Device Control Register.
++ */
++
++/* Device Status */
++#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
++#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
++#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
++#define E1000_STATUS_FUNC_SHIFT 2
++#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
++#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
++#define E1000_STATUS_SPEED_10   0x00000000      /* Speed 10Mb/s */
++#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
++#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
++#define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion by NVM */
++#define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
++#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */
++
++/* Constants used to interpret the masked PCI-X bus speed. */
++
++#define HALF_DUPLEX 1
++#define FULL_DUPLEX 2
++
++
++#define ADVERTISE_10_HALF                 0x0001
++#define ADVERTISE_10_FULL                 0x0002
++#define ADVERTISE_100_HALF                0x0004
++#define ADVERTISE_100_FULL                0x0008
++#define ADVERTISE_1000_HALF               0x0010 /* Not used, just FYI */
++#define ADVERTISE_1000_FULL               0x0020
++
++/* 1000/H is not supported, nor spec-compliant. */
++#define E1000_ALL_SPEED_DUPLEX ( ADVERTISE_10_HALF |   ADVERTISE_10_FULL | \
++				ADVERTISE_100_HALF |  ADVERTISE_100_FULL | \
++						     ADVERTISE_1000_FULL)
++#define E1000_ALL_NOT_GIG      ( ADVERTISE_10_HALF |   ADVERTISE_10_FULL | \
++				ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_100_SPEED    (ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
++#define E1000_ALL_10_SPEED      (ADVERTISE_10_HALF |   ADVERTISE_10_FULL)
++#define E1000_ALL_HALF_DUPLEX   (ADVERTISE_10_HALF |  ADVERTISE_100_HALF)
++
++#define AUTONEG_ADVERTISE_SPEED_DEFAULT   E1000_ALL_SPEED_DUPLEX
++
++/* LED Control */
++#define E1000_PHY_LED0_MODE_MASK          0x00000007
++#define E1000_PHY_LED0_IVRT               0x00000008
++#define E1000_PHY_LED0_MASK               0x0000001F
++
++#define E1000_LEDCTL_LED0_MODE_MASK       0x0000000F
++#define E1000_LEDCTL_LED0_MODE_SHIFT      0
++#define E1000_LEDCTL_LED0_IVRT            0x00000040
++#define E1000_LEDCTL_LED0_BLINK           0x00000080
++
++#define E1000_LEDCTL_MODE_LINK_UP       0x2
++#define E1000_LEDCTL_MODE_LED_ON        0xE
++#define E1000_LEDCTL_MODE_LED_OFF       0xF
++
++/* Transmit Descriptor bit definitions */
++#define E1000_TXD_DTYP_D     0x00100000 /* Data Descriptor */
++#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
++#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
++#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
++#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
++#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
++#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
++#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
++#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
++#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
++#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
++#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
++#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
++#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
++#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
++#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
++#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
++#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
++#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
++
++/* Transmit Control */
++#define E1000_TCTL_EN     0x00000002    /* enable Tx */
++#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
++#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
++#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
++#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
++#define E1000_TCTL_MULR   0x10000000    /* Multiple request support */
++
++/* Transmit Arbitration Count */
++
++/* SerDes Control */
++#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
++
++/* Receive Checksum Control */
++#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
++#define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
++
++/* Header split receive */
++#define E1000_RFCTL_NFSW_DIS            0x00000040
++#define E1000_RFCTL_NFSR_DIS            0x00000080
++#define E1000_RFCTL_ACK_DIS             0x00001000
++#define E1000_RFCTL_EXTEN               0x00008000
++#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
++#define E1000_RFCTL_NEW_IPV6_EXT_DIS    0x00020000
++
++/* Collision related configuration parameters */
++#define E1000_COLLISION_THRESHOLD       15
++#define E1000_CT_SHIFT                  4
++#define E1000_COLLISION_DISTANCE        63
++#define E1000_COLD_SHIFT                12
++
++/* Default values for the transmit IPG register */
++#define DEFAULT_82543_TIPG_IPGT_COPPER 8
++
++#define E1000_TIPG_IPGT_MASK  0x000003FF
++
++#define DEFAULT_82543_TIPG_IPGR1 8
++#define E1000_TIPG_IPGR1_SHIFT  10
++
++#define DEFAULT_82543_TIPG_IPGR2 6
++#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7
++#define E1000_TIPG_IPGR2_SHIFT  20
++
++#define MAX_JUMBO_FRAME_SIZE    0x3F00
++
++/* Extended Configuration Control and Size */
++#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP      0x00000020
++#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE       0x00000001
++#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE       0x00000008
++#define E1000_EXTCNF_CTRL_SWFLAG                 0x00000020
++#define E1000_EXTCNF_CTRL_GATE_PHY_CFG           0x00000080
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK   0x00FF0000
++#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT          16
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK   0x0FFF0000
++#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT          16
++
++#define E1000_PHY_CTRL_D0A_LPLU           0x00000002
++#define E1000_PHY_CTRL_NOND0A_LPLU        0x00000004
++#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008
++#define E1000_PHY_CTRL_GBE_DISABLE        0x00000040
++
++#define E1000_KABGTXD_BGSQLBIAS           0x00050000
++
++/* PBA constants */
++#define E1000_PBA_8K  0x0008    /* 8KB */
++#define E1000_PBA_16K 0x0010    /* 16KB */
++
++#define E1000_PBS_16K E1000_PBA_16K
++
++#define IFS_MAX       80
++#define IFS_MIN       40
++#define IFS_RATIO     4
++#define IFS_STEP      10
++#define MIN_NUM_XMITS 1000
++
++/* SW Semaphore Register */
++#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
++#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
++#define E1000_SWSM_DRV_LOAD     0x00000008 /* Driver Loaded Bit */
++
++#define E1000_SWSM2_LOCK        0x00000002 /* Secondary driver semaphore bit */
++
++/* Interrupt Cause Read */
++#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
++#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
++#define E1000_ICR_RXSEQ         0x00000008 /* Rx sequence error */
++#define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
++#define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
++#define E1000_ICR_INT_ASSERTED  0x80000000 /* If this bit asserted, the driver should claim the interrupt */
++#define E1000_ICR_RXQ0          0x00100000 /* Rx Queue 0 Interrupt */
++#define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
++#define E1000_ICR_TXQ0          0x00400000 /* Tx Queue 0 Interrupt */
++#define E1000_ICR_TXQ1          0x00800000 /* Tx Queue 1 Interrupt */
++#define E1000_ICR_OTHER         0x01000000 /* Other Interrupts */
++
++/* PBA ECC Register */
++#define E1000_PBA_ECC_COUNTER_MASK  0xFFF00000 /* ECC counter mask */
++#define E1000_PBA_ECC_COUNTER_SHIFT 20         /* ECC counter shift value */
++#define E1000_PBA_ECC_CORR_EN       0x00000001 /* ECC correction enable */
++#define E1000_PBA_ECC_STAT_CLR      0x00000002 /* Clear ECC error counter */
++#define E1000_PBA_ECC_INT_EN        0x00000004 /* Enable ICR bit 5 for ECC */
++
++/*
++ * This defines the bits that are set in the Interrupt Mask
++ * Set/Read Register.  Each bit is documented below:
++ *   o RXT0   = Receiver Timer Interrupt (ring 0)
++ *   o TXDW   = Transmit Descriptor Written Back
++ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
++ *   o RXSEQ  = Receive Sequence Error
++ *   o LSC    = Link Status Change
++ */
++#define IMS_ENABLE_MASK ( \
++    E1000_IMS_RXT0   |    \
++    E1000_IMS_TXDW   |    \
++    E1000_IMS_RXDMT0 |    \
++    E1000_IMS_RXSEQ  |    \
++    E1000_IMS_LSC)
++
++/* Interrupt Mask Set */
++#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
++#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
++#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
++#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
++#define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
++#define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
++#define E1000_IMS_TXQ0      E1000_ICR_TXQ0      /* Tx Queue 0 Interrupt */
++#define E1000_IMS_TXQ1      E1000_ICR_TXQ1      /* Tx Queue 1 Interrupt */
++#define E1000_IMS_OTHER     E1000_ICR_OTHER     /* Other Interrupts */
++
++/* Interrupt Cause Set */
++#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
++#define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
++#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
++
++/* Transmit Descriptor Control */
++#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
++#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */
++#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */
++#define E1000_TXDCTL_GRAN    0x01000000 /* TXDCTL Granularity */
++#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
++#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
++/* Enable the counting of desc. still to be processed. */
++#define E1000_TXDCTL_COUNT_DESC 0x00400000
++
++/* Flow Control Constants */
++#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
++#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
++#define FLOW_CONTROL_TYPE         0x8808
++
++/* 802.1q VLAN Packet Size */
++#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
++
++/* Receive Address */
++/*
++ * Number of high/low register pairs in the RAR. The RAR (Receive Address
++ * Registers) holds the directed and multicast addresses that we monitor.
++ * Technically, we have 16 spots.  However, we reserve one of these spots
++ * (RAR[15]) for our directed address used by controllers with
++ * manageability enabled, allowing us room for 15 multicast addresses.
++ */
++#define E1000_RAR_ENTRIES     15
++#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
++#define E1000_RAL_MAC_ADDR_LEN 4
++#define E1000_RAH_MAC_ADDR_LEN 2
++
++/* Error Codes */
++#define E1000_ERR_NVM      1
++#define E1000_ERR_PHY      2
++#define E1000_ERR_CONFIG   3
++#define E1000_ERR_PARAM    4
++#define E1000_ERR_MAC_INIT 5
++#define E1000_ERR_PHY_TYPE 6
++#define E1000_ERR_RESET   9
++#define E1000_ERR_MASTER_REQUESTS_PENDING 10
++#define E1000_ERR_HOST_INTERFACE_COMMAND 11
++#define E1000_BLK_PHY_RESET   12
++#define E1000_ERR_SWFW_SYNC 13
++#define E1000_NOT_IMPLEMENTED 14
++#define E1000_ERR_INVALID_ARGUMENT  16
++#define E1000_ERR_NO_SPACE          17
++#define E1000_ERR_NVM_PBA_SECTION   18
++
++/* Loop limit on how long we wait for auto-negotiation to complete */
++#define FIBER_LINK_UP_LIMIT               50
++#define COPPER_LINK_UP_LIMIT              10
++#define PHY_AUTO_NEG_LIMIT                45
++#define PHY_FORCE_LIMIT                   20
++/* Number of 100 microseconds we wait for PCI Express master disable */
++#define MASTER_DISABLE_TIMEOUT      800
++/* Number of milliseconds we wait for PHY configuration done after MAC reset */
++#define PHY_CFG_TIMEOUT             100
++/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
++#define MDIO_OWNERSHIP_TIMEOUT      10
++/* Number of milliseconds for NVM auto read done after MAC reset. */
++#define AUTO_READ_DONE_TIMEOUT      10
++
++/* Flow Control */
++#define E1000_FCRTH_RTH  0x0000FFF8     /* Mask Bits[15:3] for RTH */
++#define E1000_FCRTL_RTL  0x0000FFF8     /* Mask Bits[15:3] for RTL */
++#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
++
++/* Transmit Configuration Word */
++#define E1000_TXCW_FD         0x00000020        /* TXCW full duplex */
++#define E1000_TXCW_PAUSE      0x00000080        /* TXCW sym pause request */
++#define E1000_TXCW_ASM_DIR    0x00000100        /* TXCW astm pause direction */
++#define E1000_TXCW_PAUSE_MASK 0x00000180        /* TXCW pause request mask */
++#define E1000_TXCW_ANE        0x80000000        /* Auto-neg enable */
++
++/* Receive Configuration Word */
++#define E1000_RXCW_CW         0x0000ffff        /* RxConfigWord mask */
++#define E1000_RXCW_IV         0x08000000        /* Receive config invalid */
++#define E1000_RXCW_C          0x20000000        /* Receive config */
++#define E1000_RXCW_SYNCH      0x40000000        /* Receive config synch */
++
++/* PCI Express Control */
++#define E1000_GCR_RXD_NO_SNOOP          0x00000001
++#define E1000_GCR_RXDSCW_NO_SNOOP       0x00000002
++#define E1000_GCR_RXDSCR_NO_SNOOP       0x00000004
++#define E1000_GCR_TXD_NO_SNOOP          0x00000008
++#define E1000_GCR_TXDSCW_NO_SNOOP       0x00000010
++#define E1000_GCR_TXDSCR_NO_SNOOP       0x00000020
++
++#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP         | \
++			   E1000_GCR_RXDSCW_NO_SNOOP      | \
++			   E1000_GCR_RXDSCR_NO_SNOOP      | \
++			   E1000_GCR_TXD_NO_SNOOP         | \
++			   E1000_GCR_TXDSCW_NO_SNOOP      | \
++			   E1000_GCR_TXDSCR_NO_SNOOP)
++
++/* PHY Control Register */
++#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
++#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
++#define MII_CR_POWER_DOWN       0x0800  /* Power down */
++#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
++#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
++#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
++#define MII_CR_SPEED_1000       0x0040
++#define MII_CR_SPEED_100        0x2000
++#define MII_CR_SPEED_10         0x0000
++
++/* PHY Status Register */
++#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
++#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
++
++/* Autoneg Advertisement Register */
++#define NWAY_AR_10T_HD_CAPS      0x0020   /* 10T   Half Duplex Capable */
++#define NWAY_AR_10T_FD_CAPS      0x0040   /* 10T   Full Duplex Capable */
++#define NWAY_AR_100TX_HD_CAPS    0x0080   /* 100TX Half Duplex Capable */
++#define NWAY_AR_100TX_FD_CAPS    0x0100   /* 100TX Full Duplex Capable */
++#define NWAY_AR_PAUSE            0x0400   /* Pause operation desired */
++#define NWAY_AR_ASM_DIR          0x0800   /* Asymmetric Pause Direction bit */
++
++/* Link Partner Ability Register (Base Page) */
++#define NWAY_LPAR_100TX_FD_CAPS  0x0100 /* LP 100TX Full Dplx Capable */
++#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
++#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
++
++/* Autoneg Expansion Register */
++#define NWAY_ER_LP_NWAY_CAPS     0x0001 /* LP has Auto Neg Capability */
++
++/* 1000BASE-T Control Register */
++#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
++#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
++					/* 0=DTE device */
++#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
++					/* 0=Configure PHY as Slave */
++#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
++					/* 0=Automatic Master/Slave config */
++
++/* 1000BASE-T Status Register */
++#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
++#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
++
++
++/* PHY 1000 MII Register/Bit Definitions */
++/* PHY Registers defined by IEEE */
++#define PHY_CONTROL      0x00 /* Control Register */
++#define PHY_STATUS       0x01 /* Status Register */
++#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
++#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
++#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
++#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
++#define PHY_AUTONEG_EXP  0x06 /* Autoneg Expansion Reg */
++#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
++#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
++#define PHY_EXT_STATUS   0x0F /* Extended Status Reg */
++
++#define PHY_CONTROL_LB   0x4000 /* PHY Loopback bit */
++
++/* NVM Control */
++#define E1000_EECD_SK        0x00000001 /* NVM Clock */
++#define E1000_EECD_CS        0x00000002 /* NVM Chip Select */
++#define E1000_EECD_DI        0x00000004 /* NVM Data In */
++#define E1000_EECD_DO        0x00000008 /* NVM Data Out */
++#define E1000_EECD_REQ       0x00000040 /* NVM Access Request */
++#define E1000_EECD_GNT       0x00000080 /* NVM Access Grant */
++#define E1000_EECD_PRES      0x00000100 /* NVM Present */
++#define E1000_EECD_SIZE      0x00000200 /* NVM Size (0=64 word 1=256 word) */
++/* NVM Addressing bits based on type (0-small, 1-large) */
++#define E1000_EECD_ADDR_BITS 0x00000400
++#define E1000_NVM_GRANT_ATTEMPTS   1000 /* NVM # attempts to gain grant */
++#define E1000_EECD_AUTO_RD          0x00000200  /* NVM Auto Read done */
++#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* NVM Size */
++#define E1000_EECD_SIZE_EX_SHIFT     11
++#define E1000_EECD_FLUPD     0x00080000 /* Update FLASH */
++#define E1000_EECD_AUPDEN    0x00100000 /* Enable Autonomous FLASH update */
++#define E1000_EECD_SEC1VAL   0x00400000 /* Sector One Valid */
++#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES)
++
++#define E1000_NVM_RW_REG_DATA   16   /* Offset to data in NVM read/write registers */
++#define E1000_NVM_RW_REG_DONE   2    /* Offset to READ/WRITE done bit */
++#define E1000_NVM_RW_REG_START  1    /* Start operation */
++#define E1000_NVM_RW_ADDR_SHIFT 2    /* Shift to the address bits */
++#define E1000_NVM_POLL_WRITE    1    /* Flag for polling for write complete */
++#define E1000_NVM_POLL_READ     0    /* Flag for polling for read complete */
++#define E1000_FLASH_UPDATES  2000
++
++/* NVM Word Offsets */
++#define NVM_COMPAT                 0x0003
++#define NVM_ID_LED_SETTINGS        0x0004
++#define NVM_INIT_CONTROL2_REG      0x000F
++#define NVM_INIT_CONTROL3_PORT_B   0x0014
++#define NVM_INIT_3GIO_3            0x001A
++#define NVM_INIT_CONTROL3_PORT_A   0x0024
++#define NVM_CFG                    0x0012
++#define NVM_ALT_MAC_ADDR_PTR       0x0037
++#define NVM_CHECKSUM_REG           0x003F
++
++#define E1000_NVM_INIT_CTRL2_MNGM 0x6000 /* Manageability Operation Mode mask */
++
++#define E1000_NVM_CFG_DONE_PORT_0  0x40000 /* MNG config cycle done */
++#define E1000_NVM_CFG_DONE_PORT_1  0x80000 /* ...for second port */
++
++/* Mask bits for fields in Word 0x0f of the NVM */
++#define NVM_WORD0F_PAUSE_MASK       0x3000
++#define NVM_WORD0F_PAUSE            0x1000
++#define NVM_WORD0F_ASM_DIR          0x2000
++
++/* Mask bits for fields in Word 0x1a of the NVM */
++#define NVM_WORD1A_ASPM_MASK  0x000C
++
++/* Mask bits for fields in Word 0x03 of the EEPROM */
++#define NVM_COMPAT_LOM    0x0800
++
++/* length of string needed to store PBA number */
++#define E1000_PBANUM_LENGTH             11
++
++/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
++#define NVM_SUM                    0xBABA
++
++/* PBA (printed board assembly) number words */
++#define NVM_PBA_OFFSET_0           8
++#define NVM_PBA_OFFSET_1           9
++#define NVM_PBA_PTR_GUARD          0xFAFA
++#define NVM_WORD_SIZE_BASE_SHIFT   6
++
++/* NVM Commands - SPI */
++#define NVM_MAX_RETRY_SPI          5000 /* Max wait of 5ms, for RDY signal */
++#define NVM_READ_OPCODE_SPI        0x03 /* NVM read opcode */
++#define NVM_WRITE_OPCODE_SPI       0x02 /* NVM write opcode */
++#define NVM_A8_OPCODE_SPI          0x08 /* opcode bit-3 = address bit-8 */
++#define NVM_WREN_OPCODE_SPI        0x06 /* NVM set Write Enable latch */
++#define NVM_RDSR_OPCODE_SPI        0x05 /* NVM read Status register */
++
++/* SPI NVM Status Register */
++#define NVM_STATUS_RDY_SPI         0x01
++
++/* Word definitions for ID LED Settings */
++#define ID_LED_RESERVED_0000 0x0000
++#define ID_LED_RESERVED_FFFF 0xFFFF
++#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2  << 12) | \
++			      (ID_LED_OFF1_OFF2 <<  8) | \
++			      (ID_LED_DEF1_DEF2 <<  4) | \
++			      (ID_LED_DEF1_DEF2))
++#define ID_LED_DEF1_DEF2     0x1
++#define ID_LED_DEF1_ON2      0x2
++#define ID_LED_DEF1_OFF2     0x3
++#define ID_LED_ON1_DEF2      0x4
++#define ID_LED_ON1_ON2       0x5
++#define ID_LED_ON1_OFF2      0x6
++#define ID_LED_OFF1_DEF2     0x7
++#define ID_LED_OFF1_ON2      0x8
++#define ID_LED_OFF1_OFF2     0x9
++
++#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
++#define IGP_ACTIVITY_LED_ENABLE 0x0300
++#define IGP_LED3_MODE           0x07000000
++
++/* PCI/PCI-X/PCI-EX Config space */
++#define PCI_HEADER_TYPE_REGISTER     0x0E
++#define PCIE_LINK_STATUS             0x12
++
++#define PCI_HEADER_TYPE_MULTIFUNC    0x80
++#define PCIE_LINK_WIDTH_MASK         0x3F0
++#define PCIE_LINK_WIDTH_SHIFT        4
++
++#define PHY_REVISION_MASK      0xFFFFFFF0
++#define MAX_PHY_REG_ADDRESS    0x1F  /* 5 bit address bus (0-0x1F) */
++#define MAX_PHY_MULTI_PAGE_REG 0xF
++
++/* Bit definitions for valid PHY IDs. */
++/*
++ * I = Integrated
++ * E = External
++ */
++#define M88E1000_E_PHY_ID    0x01410C50
++#define M88E1000_I_PHY_ID    0x01410C30
++#define M88E1011_I_PHY_ID    0x01410C20
++#define IGP01E1000_I_PHY_ID  0x02A80380
++#define M88E1111_I_PHY_ID    0x01410CC0
++#define GG82563_E_PHY_ID     0x01410CA0
++#define IGP03E1000_E_PHY_ID  0x02A80390
++#define IFE_E_PHY_ID         0x02A80330
++#define IFE_PLUS_E_PHY_ID    0x02A80320
++#define IFE_C_E_PHY_ID       0x02A80310
++#define BME1000_E_PHY_ID     0x01410CB0
++#define BME1000_E_PHY_ID_R2  0x01410CB1
++#define I82577_E_PHY_ID      0x01540050
++#define I82578_E_PHY_ID      0x004DD040
++#define I82579_E_PHY_ID      0x01540090
++#define I217_E_PHY_ID        0x015400A0
++
++/* M88E1000 Specific Registers */
++#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
++#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
++#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
++
++#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
++#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
++
++/* M88E1000 PHY Specific Control Register */
++#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
++#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
++					       /* Manual MDI configuration */
++#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
++/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
++#define M88E1000_PSCR_AUTO_X_1000T     0x0040
++/* Auto crossover enabled all speeds */
++#define M88E1000_PSCR_AUTO_X_MODE      0x0060
++/*
++ * 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold)
++ * 0=Normal 10BASE-T Rx Threshold
++ */
++#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */
++
++/* M88E1000 PHY Specific Status Register */
++#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
++#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
++#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
++/* 0=<50M; 1=50-80M; 2=80-110M; 3=110-140M; 4=>140M */
++#define M88E1000_PSSR_CABLE_LENGTH       0x0380
++#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
++#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
++
++#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
++
++/*
++ * Number of times we will attempt to autonegotiate before downshifting if we
++ * are the master
++ */
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
++#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
++/*
++ * Number of times we will attempt to autonegotiate before downshifting if we
++ * are the slave
++ */
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
++#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
++#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
++
++/* M88EC018 Rev 2 specific DownShift settings */
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
++#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
++
++#define I82578_EPSCR_DOWNSHIFT_ENABLE          0x0020
++#define I82578_EPSCR_DOWNSHIFT_COUNTER_MASK    0x001C
++
++/* BME1000 PHY Specific Control Register */
++#define BME1000_PSCR_ENABLE_DOWNSHIFT   0x0800 /* 1 = enable downshift */
++
++
++#define PHY_PAGE_SHIFT 5
++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \
++                           ((reg) & MAX_PHY_REG_ADDRESS))
++
++/*
++ * Bits...
++ * 15-5: page
++ * 4-0: register offset
++ */
++#define GG82563_PAGE_SHIFT        5
++#define GG82563_REG(page, reg)    \
++	(((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
++#define GG82563_MIN_ALT_REG       30
++
++/* GG82563 Specific Registers */
++#define GG82563_PHY_SPEC_CTRL           \
++	GG82563_REG(0, 16) /* PHY Specific Control */
++#define GG82563_PHY_PAGE_SELECT         \
++	GG82563_REG(0, 22) /* Page Select */
++#define GG82563_PHY_SPEC_CTRL_2         \
++	GG82563_REG(0, 26) /* PHY Specific Control 2 */
++#define GG82563_PHY_PAGE_SELECT_ALT     \
++	GG82563_REG(0, 29) /* Alternate Page Select */
++
++#define GG82563_PHY_MAC_SPEC_CTRL       \
++	GG82563_REG(2, 21) /* MAC Specific Control Register */
++
++#define GG82563_PHY_DSP_DISTANCE    \
++	GG82563_REG(5, 26) /* DSP Distance */
++
++/* Page 193 - Port Control Registers */
++#define GG82563_PHY_KMRN_MODE_CTRL   \
++	GG82563_REG(193, 16) /* Kumeran Mode Control */
++#define GG82563_PHY_PWR_MGMT_CTRL       \
++	GG82563_REG(193, 20) /* Power Management Control */
++
++/* Page 194 - KMRN Registers */
++#define GG82563_PHY_INBAND_CTRL         \
++	GG82563_REG(194, 18) /* Inband Control */
++
++/* MDI Control */
++#define E1000_MDIC_REG_SHIFT 16
++#define E1000_MDIC_PHY_SHIFT 21
++#define E1000_MDIC_OP_WRITE  0x04000000
++#define E1000_MDIC_OP_READ   0x08000000
++#define E1000_MDIC_READY     0x10000000
++#define E1000_MDIC_ERROR     0x40000000
++
++/* SerDes Control */
++#define E1000_GEN_POLL_TIMEOUT          640
++
++/* FW Semaphore */
++#define E1000_FWSM_WLOCK_MAC_MASK	0x0380
++#define E1000_FWSM_WLOCK_MAC_SHIFT	7
++
++#endif /* _E1000_DEFINES_H_ */
+--- linux/drivers/xenomai/net/drivers/e1000e/netdev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/netdev.c	2021-04-07 16:01:27.175634241 +0800
+@@ -0,0 +1,4419 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/vmalloc.h>
++#include <linux/pagemap.h>
++#include <linux/delay.h>
++#include <linux/netdevice.h>
++#include <linux/interrupt.h>
++#include <linux/tcp.h>
++#include <linux/ipv6.h>
++#include <linux/slab.h>
++#include <net/checksum.h>
++#include <net/ip6_checksum.h>
++#include <linux/mii.h>
++#include <linux/ethtool.h>
++#include <linux/if_vlan.h>
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#include <linux/version.h>
++#include <linux/pm_qos.h>
++#include <linux/pm_runtime.h>
++#include <linux/aer.h>
++#include <linux/prefetch.h>
++
++#include "e1000.h"
++
++#define RT_E1000E_NUM_RXD	64
++
++#define DRV_EXTRAVERSION "-k-rt"
++
++#define DRV_VERSION "1.5.1" DRV_EXTRAVERSION
++char e1000e_driver_name[] = "rt_e1000e";
++const char e1000e_driver_version[] = DRV_VERSION;
++
++static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state);
++
++static const struct e1000_info *e1000_info_tbl[] = {
++	[board_82571]		= &e1000_82571_info,
++	[board_82572]		= &e1000_82572_info,
++	[board_82573]		= &e1000_82573_info,
++	[board_82574]		= &e1000_82574_info,
++	[board_82583]		= &e1000_82583_info,
++	[board_80003es2lan]	= &e1000_es2_info,
++	[board_ich8lan]		= &e1000_ich8_info,
++	[board_ich9lan]		= &e1000_ich9_info,
++	[board_ich10lan]	= &e1000_ich10_info,
++	[board_pchlan]		= &e1000_pch_info,
++	[board_pch2lan]		= &e1000_pch2_info,
++	[board_pch_lpt]		= &e1000_pch_lpt_info,
++};
++
++struct e1000_reg_info {
++	u32 ofs;
++	char *name;
++};
++
++#define E1000_RDFH	0x02410	/* Rx Data FIFO Head - RW */
++#define E1000_RDFT	0x02418	/* Rx Data FIFO Tail - RW */
++#define E1000_RDFHS	0x02420	/* Rx Data FIFO Head Saved - RW */
++#define E1000_RDFTS	0x02428	/* Rx Data FIFO Tail Saved - RW */
++#define E1000_RDFPC	0x02430	/* Rx Data FIFO Packet Count - RW */
++
++#define E1000_TDFH	0x03410	/* Tx Data FIFO Head - RW */
++#define E1000_TDFT	0x03418	/* Tx Data FIFO Tail - RW */
++#define E1000_TDFHS	0x03420	/* Tx Data FIFO Head Saved - RW */
++#define E1000_TDFTS	0x03428	/* Tx Data FIFO Tail Saved - RW */
++#define E1000_TDFPC	0x03430	/* Tx Data FIFO Packet Count - RW */
++
++static const struct e1000_reg_info e1000_reg_info_tbl[] = {
++
++	/* General Registers */
++	{E1000_CTRL, "CTRL"},
++	{E1000_STATUS, "STATUS"},
++	{E1000_CTRL_EXT, "CTRL_EXT"},
++
++	/* Interrupt Registers */
++	{E1000_ICR, "ICR"},
++
++	/* Rx Registers */
++	{E1000_RCTL, "RCTL"},
++	{E1000_RDLEN, "RDLEN"},
++	{E1000_RDH, "RDH"},
++	{E1000_RDT, "RDT"},
++	{E1000_RDTR, "RDTR"},
++	{E1000_RXDCTL(0), "RXDCTL"},
++	{E1000_ERT, "ERT"},
++	{E1000_RDBAL, "RDBAL"},
++	{E1000_RDBAH, "RDBAH"},
++	{E1000_RDFH, "RDFH"},
++	{E1000_RDFT, "RDFT"},
++	{E1000_RDFHS, "RDFHS"},
++	{E1000_RDFTS, "RDFTS"},
++	{E1000_RDFPC, "RDFPC"},
++
++	/* Tx Registers */
++	{E1000_TCTL, "TCTL"},
++	{E1000_TDBAL, "TDBAL"},
++	{E1000_TDBAH, "TDBAH"},
++	{E1000_TDLEN, "TDLEN"},
++	{E1000_TDH, "TDH"},
++	{E1000_TDT, "TDT"},
++	{E1000_TIDV, "TIDV"},
++	{E1000_TXDCTL(0), "TXDCTL"},
++	{E1000_TADV, "TADV"},
++	{E1000_TARC(0), "TARC"},
++	{E1000_TDFH, "TDFH"},
++	{E1000_TDFT, "TDFT"},
++	{E1000_TDFHS, "TDFHS"},
++	{E1000_TDFTS, "TDFTS"},
++	{E1000_TDFPC, "TDFPC"},
++
++	/* List Terminator */
++	{}
++};
++
++/*
++ * e1000_regdump - register printout routine
++ */
++static void e1000_regdump(struct e1000_hw *hw, struct e1000_reg_info *reginfo)
++{
++	int n = 0;
++	char rname[16];
++	u32 regs[8];
++
++	switch (reginfo->ofs) {
++	case E1000_RXDCTL(0):
++		for (n = 0; n < 2; n++)
++			regs[n] = __er32(hw, E1000_RXDCTL(n));
++		break;
++	case E1000_TXDCTL(0):
++		for (n = 0; n < 2; n++)
++			regs[n] = __er32(hw, E1000_TXDCTL(n));
++		break;
++	case E1000_TARC(0):
++		for (n = 0; n < 2; n++)
++			regs[n] = __er32(hw, E1000_TARC(n));
++		break;
++	default:
++		printk(KERN_INFO "%-15s %08x\n",
++		       reginfo->name, __er32(hw, reginfo->ofs));
++		return;
++	}
++
++	snprintf(rname, 16, "%s%s", reginfo->name, "[0-1]");
++	printk(KERN_INFO "%-15s ", rname);
++	for (n = 0; n < 2; n++)
++		printk(KERN_CONT "%08x ", regs[n]);
++	printk(KERN_CONT "\n");
++}
++
++/*
++ * e1000e_dump - Print registers, Tx-ring and Rx-ring
++ */
++static void e1000e_dump(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_reg_info *reginfo;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_tx_desc *tx_desc;
++	struct my_u0 {
++		u64 a;
++		u64 b;
++	} *u0;
++	struct e1000_buffer *buffer_info;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	union e1000_rx_desc_packet_split *rx_desc_ps;
++	union e1000_rx_desc_extended *rx_desc;
++	struct my_u1 {
++		u64 a;
++		u64 b;
++		u64 c;
++		u64 d;
++	} *u1;
++	u32 staterr;
++	int i = 0;
++
++	if (!netif_msg_hw(adapter))
++		return;
++
++	/* Print netdevice Info */
++	if (netdev) {
++		dev_info(&adapter->pdev->dev, "Net device Info\n");
++		printk(KERN_INFO "Device Name     state            "
++		       "trans_start      last_rx\n");
++		printk(KERN_INFO "%-15s\n", netdev->name);
++	}
++
++	/* Print Registers */
++	dev_info(&adapter->pdev->dev, "Register Dump\n");
++	printk(KERN_INFO " Register Name   Value\n");
++	for (reginfo = (struct e1000_reg_info *)e1000_reg_info_tbl;
++	     reginfo->name; reginfo++) {
++		e1000_regdump(hw, reginfo);
++	}
++
++	/* Print Tx Ring Summary */
++	if (!netdev || !rtnetif_running(netdev))
++		goto exit;
++
++	dev_info(&adapter->pdev->dev, "Tx Ring Summary\n");
++	printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
++	       " leng ntw timestamp\n");
++	buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
++	printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
++	       0, tx_ring->next_to_use, tx_ring->next_to_clean,
++	       (unsigned long long)buffer_info->dma,
++	       buffer_info->length,
++	       buffer_info->next_to_watch,
++	       (unsigned long long)buffer_info->time_stamp);
++
++	/* Print Tx Ring */
++	if (!netif_msg_tx_done(adapter))
++		goto rx_ring_summary;
++
++	dev_info(&adapter->pdev->dev, "Tx Ring Dump\n");
++
++	/* Transmit Descriptor Formats - DEXT[29] is 0 (Legacy) or 1 (Extended)
++	 *
++	 * Legacy Transmit Descriptor
++	 *   +--------------------------------------------------------------+
++	 * 0 |         Buffer Address [63:0] (Reserved on Write Back)       |
++	 *   +--------------------------------------------------------------+
++	 * 8 | Special  |    CSS     | Status |  CMD    |  CSO   |  Length  |
++	 *   +--------------------------------------------------------------+
++	 *   63       48 47        36 35    32 31     24 23    16 15        0
++	 *
++	 * Extended Context Descriptor (DTYP=0x0) for TSO or checksum offload
++	 *   63      48 47    40 39       32 31             16 15    8 7      0
++	 *   +----------------------------------------------------------------+
++	 * 0 |  TUCSE  | TUCS0  |   TUCSS   |     IPCSE       | IPCS0 | IPCSS |
++	 *   +----------------------------------------------------------------+
++	 * 8 |   MSS   | HDRLEN | RSV | STA | TUCMD | DTYP |      PAYLEN      |
++	 *   +----------------------------------------------------------------+
++	 *   63      48 47    40 39 36 35 32 31   24 23  20 19                0
++	 *
++	 * Extended Data Descriptor (DTYP=0x1)
++	 *   +----------------------------------------------------------------+
++	 * 0 |                     Buffer Address [63:0]                      |
++	 *   +----------------------------------------------------------------+
++	 * 8 | VLAN tag |  POPTS  | Rsvd | Status | Command | DTYP |  DTALEN  |
++	 *   +----------------------------------------------------------------+
++	 *   63       48 47     40 39  36 35    32 31     24 23  20 19        0
++	 */
++	printk(KERN_INFO "Tl[desc]     [address 63:0  ] [SpeCssSCmCsLen]"
++	       " [bi->dma       ] leng  ntw timestamp        bi->skb "
++	       "<-- Legacy format\n");
++	printk(KERN_INFO "Tc[desc]     [Ce CoCsIpceCoS] [MssHlRSCm0Plen]"
++	       " [bi->dma       ] leng  ntw timestamp        bi->skb "
++	       "<-- Ext Context format\n");
++	printk(KERN_INFO "Td[desc]     [address 63:0  ] [VlaPoRSCm1Dlen]"
++	       " [bi->dma       ] leng  ntw timestamp        bi->skb "
++	       "<-- Ext Data format\n");
++	for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
++		tx_desc = E1000_TX_DESC(*tx_ring, i);
++		buffer_info = &tx_ring->buffer_info[i];
++		u0 = (struct my_u0 *)tx_desc;
++		printk(KERN_INFO "T%c[0x%03X]    %016llX %016llX %016llX "
++		       "%04X  %3X %016llX %p",
++		       (!(le64_to_cpu(u0->b) & (1 << 29)) ? 'l' :
++			((le64_to_cpu(u0->b) & (1 << 20)) ? 'd' : 'c')), i,
++		       (unsigned long long)le64_to_cpu(u0->a),
++		       (unsigned long long)le64_to_cpu(u0->b),
++		       (unsigned long long)buffer_info->dma,
++		       buffer_info->length, buffer_info->next_to_watch,
++		       (unsigned long long)buffer_info->time_stamp,
++		       buffer_info->skb);
++		if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean)
++			printk(KERN_CONT " NTC/U\n");
++		else if (i == tx_ring->next_to_use)
++			printk(KERN_CONT " NTU\n");
++		else if (i == tx_ring->next_to_clean)
++			printk(KERN_CONT " NTC\n");
++		else
++			printk(KERN_CONT "\n");
++
++		if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
++			print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS,
++				       16, 1, phys_to_virt(buffer_info->dma),
++				       buffer_info->length, true);
++	}
++
++	/* Print Rx Ring Summary */
++rx_ring_summary:
++	dev_info(&adapter->pdev->dev, "Rx Ring Summary\n");
++	printk(KERN_INFO "Queue [NTU] [NTC]\n");
++	printk(KERN_INFO " %5d %5X %5X\n", 0,
++	       rx_ring->next_to_use, rx_ring->next_to_clean);
++
++	/* Print Rx Ring */
++	if (!netif_msg_rx_status(adapter))
++		goto exit;
++
++	dev_info(&adapter->pdev->dev, "Rx Ring Dump\n");
++	switch (adapter->rx_ps_pages) {
++	case 1:
++	case 2:
++	case 3:
++		/* [Extended] Packet Split Receive Descriptor Format
++		 *
++		 *    +-----------------------------------------------------+
++		 *  0 |                Buffer Address 0 [63:0]              |
++		 *    +-----------------------------------------------------+
++		 *  8 |                Buffer Address 1 [63:0]              |
++		 *    +-----------------------------------------------------+
++		 * 16 |                Buffer Address 2 [63:0]              |
++		 *    +-----------------------------------------------------+
++		 * 24 |                Buffer Address 3 [63:0]              |
++		 *    +-----------------------------------------------------+
++		 */
++		printk(KERN_INFO "R  [desc]      [buffer 0 63:0 ] "
++		       "[buffer 1 63:0 ] "
++		       "[buffer 2 63:0 ] [buffer 3 63:0 ] [bi->dma       ] "
++		       "[bi->skb] <-- Ext Pkt Split format\n");
++		/* [Extended] Receive Descriptor (Write-Back) Format
++		 *
++		 *   63       48 47    32 31     13 12    8 7    4 3        0
++		 *   +------------------------------------------------------+
++		 * 0 | Packet   | IP     |  Rsvd   | MRQ   | Rsvd | MRQ RSS |
++		 *   | Checksum | Ident  |         | Queue |      |  Type   |
++		 *   +------------------------------------------------------+
++		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
++		 *   +------------------------------------------------------+
++		 *   63       48 47    32 31            20 19               0
++		 */
++		printk(KERN_INFO "RWB[desc]      [ck ipid mrqhsh] "
++		       "[vl   l0 ee  es] "
++		       "[ l3  l2  l1 hs] [reserved      ] ---------------- "
++		       "[bi->skb] <-- Ext Rx Write-Back format\n");
++		for (i = 0; i < rx_ring->count; i++) {
++			buffer_info = &rx_ring->buffer_info[i];
++			rx_desc_ps = E1000_RX_DESC_PS(*rx_ring, i);
++			u1 = (struct my_u1 *)rx_desc_ps;
++			staterr =
++			    le32_to_cpu(rx_desc_ps->wb.middle.status_error);
++			if (staterr & E1000_RXD_STAT_DD) {
++				/* Descriptor Done */
++				printk(KERN_INFO "RWB[0x%03X]     %016llX "
++				       "%016llX %016llX %016llX "
++				       "---------------- %p", i,
++				       (unsigned long long)le64_to_cpu(u1->a),
++				       (unsigned long long)le64_to_cpu(u1->b),
++				       (unsigned long long)le64_to_cpu(u1->c),
++				       (unsigned long long)le64_to_cpu(u1->d),
++				       buffer_info->skb);
++			} else {
++				printk(KERN_INFO "R  [0x%03X]     %016llX "
++				       "%016llX %016llX %016llX %016llX %p", i,
++				       (unsigned long long)le64_to_cpu(u1->a),
++				       (unsigned long long)le64_to_cpu(u1->b),
++				       (unsigned long long)le64_to_cpu(u1->c),
++				       (unsigned long long)le64_to_cpu(u1->d),
++				       (unsigned long long)buffer_info->dma,
++				       buffer_info->skb);
++
++				if (netif_msg_pktdata(adapter))
++					print_hex_dump(KERN_INFO, "",
++						DUMP_PREFIX_ADDRESS, 16, 1,
++						phys_to_virt(buffer_info->dma),
++						adapter->rx_ps_bsize0, true);
++			}
++
++			if (i == rx_ring->next_to_use)
++				printk(KERN_CONT " NTU\n");
++			else if (i == rx_ring->next_to_clean)
++				printk(KERN_CONT " NTC\n");
++			else
++				printk(KERN_CONT "\n");
++		}
++		break;
++	default:
++	case 0:
++		/* Extended Receive Descriptor (Read) Format
++		 *
++		 *   +-----------------------------------------------------+
++		 * 0 |                Buffer Address [63:0]                |
++		 *   +-----------------------------------------------------+
++		 * 8 |                      Reserved                       |
++		 *   +-----------------------------------------------------+
++		 */
++		printk(KERN_INFO "R  [desc]      [buf addr 63:0 ] "
++		       "[reserved 63:0 ] [bi->dma       ] "
++		       "[bi->skb] <-- Ext (Read) format\n");
++		/* Extended Receive Descriptor (Write-Back) Format
++		 *
++		 *   63       48 47    32 31    24 23            4 3        0
++		 *   +------------------------------------------------------+
++		 *   |     RSS Hash      |        |               |         |
++		 * 0 +-------------------+  Rsvd  |   Reserved    | MRQ RSS |
++		 *   | Packet   | IP     |        |               |  Type   |
++		 *   | Checksum | Ident  |        |               |         |
++		 *   +------------------------------------------------------+
++		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
++		 *   +------------------------------------------------------+
++		 *   63       48 47    32 31            20 19               0
++		 */
++		printk(KERN_INFO "RWB[desc]      [cs ipid    mrq] "
++		       "[vt   ln xe  xs] "
++		       "[bi->skb] <-- Ext (Write-Back) format\n");
++
++		for (i = 0; i < rx_ring->count; i++) {
++			buffer_info = &rx_ring->buffer_info[i];
++			rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
++			u1 = (struct my_u1 *)rx_desc;
++			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
++			if (staterr & E1000_RXD_STAT_DD) {
++				/* Descriptor Done */
++				printk(KERN_INFO "RWB[0x%03X]     %016llX "
++				       "%016llX ---------------- %p", i,
++				       (unsigned long long)le64_to_cpu(u1->a),
++				       (unsigned long long)le64_to_cpu(u1->b),
++				       buffer_info->skb);
++			} else {
++				printk(KERN_INFO "R  [0x%03X]     %016llX "
++				       "%016llX %016llX %p", i,
++				       (unsigned long long)le64_to_cpu(u1->a),
++				       (unsigned long long)le64_to_cpu(u1->b),
++				       (unsigned long long)buffer_info->dma,
++				       buffer_info->skb);
++
++				if (netif_msg_pktdata(adapter))
++					print_hex_dump(KERN_INFO, "",
++						       DUMP_PREFIX_ADDRESS, 16,
++						       1,
++						       phys_to_virt
++						       (buffer_info->dma),
++						       adapter->rx_buffer_len,
++						       true);
++			}
++
++			if (i == rx_ring->next_to_use)
++				printk(KERN_CONT " NTU\n");
++			else if (i == rx_ring->next_to_clean)
++				printk(KERN_CONT " NTC\n");
++			else
++				printk(KERN_CONT "\n");
++		}
++	}
++
++exit:
++	return;
++}
++
++void e1000e_mod_watchdog_timer(rtdm_nrtsig_t *nrt_sig, void *data)
++{
++	struct timer_list *timer = data;
++
++	mod_timer(timer, jiffies + 1);
++}
++
++void e1000e_trigger_downshift(rtdm_nrtsig_t *nrt_sig, void *data)
++{
++	struct work_struct *downshift_task = data;
++
++	schedule_work(downshift_task);
++}
++
++/**
++ * e1000_desc_unused - calculate if we have unused descriptors
++ **/
++static int e1000_desc_unused(struct e1000_ring *ring)
++{
++	if (ring->next_to_clean > ring->next_to_use)
++		return ring->next_to_clean - ring->next_to_use - 1;
++
++	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
++}
++
++/**
++ * e1000_rx_checksum - Receive Checksum Offload
++ * @adapter:     board private structure
++ * @status_err:  receive descriptor status and error fields
++ * @csum:	receive descriptor csum field
++ * @sk_buff:     socket buffer with received data
++ **/
++static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
++			      u32 csum, struct rtskb *skb)
++{
++	u16 status = (u16)status_err;
++	u8 errors = (u8)(status_err >> 24);
++
++	/* Ignore Checksum bit is set */
++	if (status & E1000_RXD_STAT_IXSM)
++		return;
++	/* TCP/UDP checksum error bit is set */
++	if (errors & E1000_RXD_ERR_TCPE) {
++		/* let the stack verify checksum errors */
++		adapter->hw_csum_err++;
++		return;
++	}
++
++	/* TCP/UDP Checksum has not been calculated */
++	if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
++		return;
++
++	/* It must be a TCP or UDP packet with a valid checksum */
++	if (status & E1000_RXD_STAT_TCPCS) {
++		/* TCP checksum is good */
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	} else {
++		/*
++		 * IP fragment with UDP payload
++		 * Hardware complements the payload checksum, so we undo it
++		 * and then put the value in host order for further stack use.
++		 */
++		__sum16 sum = (__force __sum16)htons(csum);
++		skb->csum = csum_unfold(~sum);
++		skb->ip_summed = CHECKSUM_COMPLETE;
++	}
++	adapter->hw_csum_good++;
++}
++
++/**
++ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
++ * @hw: pointer to the HW structure
++ * @tail: address of tail descriptor register
++ * @i: value to write to tail descriptor register
++ *
++ * When updating the tail register, the ME could be accessing Host CSR
++ * registers at the same time.  Normally, this is handled in h/w by an
++ * arbiter but on some parts there is a bug that acknowledges Host accesses
++ * later than it should which could result in the descriptor register to
++ * have an incorrect value.  Workaround this by checking the FWSM register
++ * which has bit 24 set while ME is accessing Host CSR registers, wait
++ * if it is set and try again a number of times.
++ **/
++static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail,
++					unsigned int i)
++{
++	unsigned int j = 0;
++
++	while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
++	       (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
++		udelay(50);
++
++	writel(i, tail);
++
++	if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
++		return E1000_ERR_SWFW_SYNC;
++
++	return 0;
++}
++
++static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i)
++{
++	u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail);
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (e1000e_update_tail_wa(hw, tail, i)) {
++		u32 rctl = er32(RCTL);
++		ew32(RCTL, rctl & ~E1000_RCTL_EN);
++		e_err("ME firmware caused invalid RDT - resetting\n");
++		rtdm_schedule_nrt_work(&adapter->reset_task);
++	}
++}
++
++static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i)
++{
++	u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail);
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (e1000e_update_tail_wa(hw, tail, i)) {
++		u32 tctl = er32(TCTL);
++		ew32(TCTL, tctl & ~E1000_TCTL_EN);
++		e_err("ME firmware caused invalid TDT - resetting\n");
++		rtdm_schedule_nrt_work(&adapter->reset_task);
++	}
++}
++
++/**
++ * e1000_alloc_rx_buffers - Replace used receive buffers
++ * @adapter: address of board private structure
++ **/
++static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
++				   int cleaned_count, gfp_t gfp)
++{
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	union e1000_rx_desc_extended *rx_desc;
++	struct e1000_buffer *buffer_info;
++	struct rtskb *skb;
++	unsigned int i;
++	unsigned int bufsz = adapter->rx_buffer_len;
++
++	i = rx_ring->next_to_use;
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (cleaned_count--) {
++		skb = buffer_info->skb;
++		if (skb) {
++			rtskb_trim(skb, 0);
++			goto map_skb;
++		}
++
++		skb = rtnetdev_alloc_rtskb(adapter->netdev, bufsz);
++		if (!skb) {
++			/* Better luck next round */
++			adapter->alloc_rx_buff_failed++;
++			break;
++		}
++		rtskb_reserve(skb, NET_IP_ALIGN);
++
++		buffer_info->skb = skb;
++map_skb:
++		buffer_info->dma = rtskb_data_dma_addr(skb, 0);
++
++		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
++		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
++
++		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
++			/*
++			 * Force memory writes to complete before letting h/w
++			 * know there are new descriptors to fetch.  (Only
++			 * applicable for weak-ordered memory model archs,
++			 * such as IA-64).
++			 */
++			wmb();
++			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
++				e1000e_update_rdt_wa(adapter, i);
++			else
++				writel(i, adapter->hw.hw_addr + rx_ring->tail);
++		}
++		i++;
++		if (i == rx_ring->count)
++			i = 0;
++		buffer_info = &rx_ring->buffer_info[i];
++	}
++
++	rx_ring->next_to_use = i;
++}
++
++/**
++ * e1000_clean_rx_irq - Send received data up the network stack; legacy
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
++			       nanosecs_abs_t *time_stamp)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	union e1000_rx_desc_extended *rx_desc, *next_rxd;
++	struct e1000_buffer *buffer_info, *next_buffer;
++	u32 length, staterr;
++	unsigned int i;
++	int cleaned_count = 0;
++	bool data_received = false;
++	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
++	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (staterr & E1000_RXD_STAT_DD) {
++		struct rtskb *skb;
++
++		rmb();	/* read descriptor and rx_buffer_info after status DD */
++
++		skb = buffer_info->skb;
++		buffer_info->skb = NULL;
++
++		prefetch(skb->data - NET_IP_ALIGN);
++
++		i++;
++		if (i == rx_ring->count)
++			i = 0;
++		next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++
++		cleaned_count++;
++		buffer_info->dma = 0;
++
++		length = le16_to_cpu(rx_desc->wb.upper.length);
++
++		/*
++		 * !EOP means multiple descriptors were used to store a single
++		 * packet, if that's the case we need to toss it.  In fact, we
++		 * need to toss every packet with the EOP bit clear and the
++		 * next frame that _does_ have the EOP bit set, as it is by
++		 * definition only a frame fragment
++		 */
++		if (unlikely(!(staterr & E1000_RXD_STAT_EOP)))
++			adapter->flags2 |= FLAG2_IS_DISCARDING;
++
++		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
++			/* All receives must fit into a single buffer */
++			e_dbg("Receive packet consumed multiple buffers\n");
++			/* recycle */
++			buffer_info->skb = skb;
++			if (staterr & E1000_RXD_STAT_EOP)
++				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
++			goto next_desc;
++		}
++
++		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
++			/* recycle */
++			buffer_info->skb = skb;
++			goto next_desc;
++		}
++
++		/* adjust length to remove Ethernet CRC */
++		if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
++			length -= 4;
++
++		total_rx_bytes += length;
++		total_rx_packets++;
++
++		rtskb_put(skb, length);
++
++		/* Receive Checksum Offload */
++		e1000_rx_checksum(adapter, staterr,
++				  le16_to_cpu(rx_desc->wb.lower.hi_dword.
++					      csum_ip.csum), skb);
++
++		skb->protocol = rt_eth_type_trans(skb, netdev);
++		skb->time_stamp = *time_stamp;
++		rtnetif_rx(skb);
++		data_received = true;
++
++next_desc:
++		rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
++			adapter->alloc_rx_buf(adapter, cleaned_count,
++					      GFP_ATOMIC);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++
++		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = e1000_desc_unused(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, cleaned_count, GFP_ATOMIC);
++
++	adapter->total_rx_bytes += total_rx_bytes;
++	adapter->total_rx_packets += total_rx_packets;
++	return data_received;
++}
++
++static void e1000_put_txbuf(struct e1000_adapter *adapter,
++			     struct e1000_buffer *buffer_info)
++{
++	buffer_info->dma = 0;
++	if (buffer_info->skb) {
++		kfree_rtskb(buffer_info->skb);
++		buffer_info->skb = NULL;
++	}
++	buffer_info->time_stamp = 0;
++}
++
++/**
++ * e1000_clean_tx_irq - Reclaim resources after transmit completes
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_tx_desc *tx_desc, *eop_desc;
++	struct e1000_buffer *buffer_info;
++	unsigned int i, eop;
++	unsigned int count = 0;
++	unsigned int total_tx_bytes = 0, total_tx_packets = 0;
++
++	i = tx_ring->next_to_clean;
++	eop = tx_ring->buffer_info[i].next_to_watch;
++	eop_desc = E1000_TX_DESC(*tx_ring, eop);
++
++	while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
++	       (count < tx_ring->count)) {
++		bool cleaned = false;
++		rmb(); /* read buffer_info after eop_desc */
++		for (; !cleaned; count++) {
++			tx_desc = E1000_TX_DESC(*tx_ring, i);
++			buffer_info = &tx_ring->buffer_info[i];
++			cleaned = (i == eop);
++
++			if (cleaned) {
++				total_tx_packets += buffer_info->segs;
++				total_tx_bytes += buffer_info->bytecount;
++			}
++
++			e1000_put_txbuf(adapter, buffer_info);
++			tx_desc->upper.data = 0;
++
++			i++;
++			if (i == tx_ring->count)
++				i = 0;
++		}
++
++		if (i == tx_ring->next_to_use)
++			break;
++		eop = tx_ring->buffer_info[i].next_to_watch;
++		eop_desc = E1000_TX_DESC(*tx_ring, eop);
++	}
++
++	tx_ring->next_to_clean = i;
++
++#define TX_WAKE_THRESHOLD 32
++	if (count && rtnetif_carrier_ok(netdev) &&
++	    e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) {
++		/* Make sure that anybody stopping the queue after this
++		 * sees the new next_to_clean.
++		 */
++		smp_mb();
++
++		if (rtnetif_queue_stopped(netdev) &&
++		    !(test_bit(__E1000_DOWN, &adapter->state))) {
++			rtnetif_wake_queue(netdev);
++			++adapter->restart_queue;
++		}
++	}
++
++	if (adapter->detect_tx_hung) {
++		/*
++		 * Detect a transmit hang in hardware, this serializes the
++		 * check with the clearing of time_stamp and movement of i
++		 */
++		adapter->detect_tx_hung = 0;
++		if (tx_ring->buffer_info[i].time_stamp &&
++		    time_after(jiffies, tx_ring->buffer_info[i].time_stamp
++			       + (adapter->tx_timeout_factor * HZ)) &&
++		    !(er32(STATUS) & E1000_STATUS_TXOFF)) {
++			rtnetif_stop_queue(netdev);
++		}
++	}
++	adapter->total_tx_bytes += total_tx_bytes;
++	adapter->total_tx_packets += total_tx_packets;
++	return count < tx_ring->count;
++}
++
++/**
++ * e1000_clean_rx_ring - Free Rx Buffers per Queue
++ * @adapter: board private structure
++ **/
++static void e1000_clean_rx_ring(struct e1000_adapter *adapter)
++{
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	struct e1000_buffer *buffer_info;
++	unsigned int i;
++
++	/* Free all the Rx ring sk_buffs */
++	for (i = 0; i < rx_ring->count; i++) {
++		buffer_info = &rx_ring->buffer_info[i];
++		buffer_info->dma = 0;
++
++		if (buffer_info->skb) {
++			kfree_rtskb(buffer_info->skb);
++			buffer_info->skb = NULL;
++		}
++	}
++
++	/* there also may be some cached data from a chained receive */
++	if (rx_ring->rx_skb_top) {
++		kfree_rtskb(rx_ring->rx_skb_top);
++		rx_ring->rx_skb_top = NULL;
++	}
++
++	/* Zero out the descriptor ring */
++	memset(rx_ring->desc, 0, rx_ring->size);
++
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++	adapter->flags2 &= ~FLAG2_IS_DISCARDING;
++
++	writel(0, adapter->hw.hw_addr + rx_ring->head);
++	writel(0, adapter->hw.hw_addr + rx_ring->tail);
++}
++
++static void e1000e_downshift_workaround(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++					struct e1000_adapter, downshift_task);
++
++	if (test_bit(__E1000_DOWN, &adapter->state))
++		return;
++
++	e1000e_gig_downshift_workaround_ich8lan(&adapter->hw);
++}
++
++/**
++ * e1000_intr_msi - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static int e1000_intr_msi(rtdm_irq_t *irq_handle)
++{
++	struct e1000_adapter *adapter =
++		rtdm_irq_get_arg(irq_handle, struct e1000_adapter);
++	struct e1000_hw *hw = &adapter->hw;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	u32 icr = er32(ICR);
++
++	/*
++	 * read ICR disables interrupts using IAM
++	 */
++
++	if (icr & E1000_ICR_LSC) {
++		hw->mac.get_link_status = 1;
++		/*
++		 * ICH8 workaround-- Call gig speed drop workaround on cable
++		 * disconnect (LSC) before accessing any PHY registers
++		 */
++		if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
++		    (!(er32(STATUS) & E1000_STATUS_LU)))
++			rtdm_schedule_nrt_work(&adapter->downshift_task);
++
++		/*
++		 * 80003ES2LAN workaround-- For packet buffer work-around on
++		 * link down event; disable receives here in the ISR and reset
++		 * adapter in watchdog
++		 */
++		if (rtnetif_carrier_ok(adapter->netdev) &&
++		    adapter->flags & FLAG_RX_NEEDS_RESTART) {
++			/* disable receives */
++			u32 rctl = er32(RCTL);
++			ew32(RCTL, rctl & ~E1000_RCTL_EN);
++			adapter->flags |= FLAG_RX_RESTART_NOW;
++		}
++		/* guard against interrupt when we're going down */
++		if (!test_bit(__E1000_DOWN, &adapter->state))
++			rtdm_nrtsig_pend(&adapter->mod_timer_sig);
++	}
++
++	if (!e1000_clean_tx_irq(adapter))
++		/* Ring was not completely cleaned, so fire another interrupt */
++		ew32(ICS, adapter->tx_ring->ims_val);
++
++	if (e1000_clean_rx_irq(adapter, &time_stamp))
++		rt_mark_stack_mgr(adapter->netdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++/**
++ * e1000_intr - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static int e1000_intr(rtdm_irq_t *irq_handle)
++{
++	struct e1000_adapter *adapter =
++		rtdm_irq_get_arg(irq_handle, struct e1000_adapter);
++	struct e1000_hw *hw = &adapter->hw;
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++	u32 rctl, icr = er32(ICR);
++
++	if (!icr || test_bit(__E1000_DOWN, &adapter->state))
++		return RTDM_IRQ_NONE;  /* Not our interrupt */
++
++	/*
++	 * IMS will not auto-mask if INT_ASSERTED is not set, and if it is
++	 * not set, then the adapter didn't send an interrupt
++	 */
++	if (!(icr & E1000_ICR_INT_ASSERTED))
++		return RTDM_IRQ_NONE;
++
++	/*
++	 * Interrupt Auto-Mask...upon reading ICR,
++	 * interrupts are masked.  No need for the
++	 * IMC write
++	 */
++
++	if (icr & E1000_ICR_LSC) {
++		hw->mac.get_link_status = 1;
++		/*
++		 * ICH8 workaround-- Call gig speed drop workaround on cable
++		 * disconnect (LSC) before accessing any PHY registers
++		 */
++		if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
++		    (!(er32(STATUS) & E1000_STATUS_LU)))
++			rtdm_nrtsig_pend(&adapter->downshift_sig);
++
++		/*
++		 * 80003ES2LAN workaround--
++		 * For packet buffer work-around on link down event;
++		 * disable receives here in the ISR and
++		 * reset adapter in watchdog
++		 */
++		if (rtnetif_carrier_ok(adapter->netdev) &&
++		    (adapter->flags & FLAG_RX_NEEDS_RESTART)) {
++			/* disable receives */
++			rctl = er32(RCTL);
++			ew32(RCTL, rctl & ~E1000_RCTL_EN);
++			adapter->flags |= FLAG_RX_RESTART_NOW;
++		}
++		/* guard against interrupt when we're going down */
++		if (!test_bit(__E1000_DOWN, &adapter->state))
++			rtdm_nrtsig_pend(&adapter->mod_timer_sig);
++	}
++
++	if (!e1000_clean_tx_irq(adapter))
++		/* Ring was not completely cleaned, so fire another interrupt */
++		ew32(ICS, adapter->tx_ring->ims_val);
++
++	if (e1000_clean_rx_irq(adapter, &time_stamp))
++		rt_mark_stack_mgr(adapter->netdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static irqreturn_t e1000_msix_other(int irq, void *data)
++{
++	struct rtnet_device *netdev = data;
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 icr = er32(ICR);
++
++	if (!(icr & E1000_ICR_INT_ASSERTED)) {
++		if (!test_bit(__E1000_DOWN, &adapter->state))
++			ew32(IMS, E1000_IMS_OTHER);
++		return IRQ_NONE;
++	}
++
++	if (icr & adapter->eiac_mask)
++		ew32(ICS, (icr & adapter->eiac_mask));
++
++	if (icr & E1000_ICR_OTHER) {
++		if (!(icr & E1000_ICR_LSC))
++			goto no_link_interrupt;
++		hw->mac.get_link_status = 1;
++		/* guard against interrupt when we're going down */
++		if (!test_bit(__E1000_DOWN, &adapter->state))
++			mod_timer(&adapter->watchdog_timer, jiffies + 1);
++	}
++
++no_link_interrupt:
++	if (!test_bit(__E1000_DOWN, &adapter->state))
++		ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
++
++	return IRQ_HANDLED;
++}
++
++
++static int e1000_intr_msix_tx(rtdm_irq_t *irq_handle)
++{
++	struct e1000_adapter *adapter =
++		rtdm_irq_get_arg(irq_handle, struct e1000_adapter);
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++
++
++	adapter->total_tx_bytes = 0;
++	adapter->total_tx_packets = 0;
++
++	if (!e1000_clean_tx_irq(adapter))
++		/* Ring was not completely cleaned, so fire another interrupt */
++		ew32(ICS, tx_ring->ims_val);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int e1000_intr_msix_rx(rtdm_irq_t *irq_handle)
++{
++	struct e1000_adapter *adapter =
++		rtdm_irq_get_arg(irq_handle, struct e1000_adapter);
++	nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++	/* Write the ITR value calculated at the end of the
++	 * previous interrupt.
++	 */
++	if (adapter->rx_ring->set_itr) {
++		writel(1000000000 / (adapter->rx_ring->itr_val * 256),
++		       adapter->hw.hw_addr + adapter->rx_ring->itr_register);
++		adapter->rx_ring->set_itr = 0;
++	}
++
++	if (e1000_clean_rx_irq(adapter, &time_stamp))
++		rt_mark_stack_mgr(adapter->netdev);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++/**
++ * e1000_configure_msix - Configure MSI-X hardware
++ *
++ * e1000_configure_msix sets up the hardware to properly
++ * generate MSI-X interrupts.
++ **/
++static void e1000_configure_msix(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	int vector = 0;
++	u32 ctrl_ext, ivar = 0;
++
++	adapter->eiac_mask = 0;
++
++	/* Workaround issue with spurious interrupts on 82574 in MSI-X mode */
++	if (hw->mac.type == e1000_82574) {
++		u32 rfctl = er32(RFCTL);
++		rfctl |= E1000_RFCTL_ACK_DIS;
++		ew32(RFCTL, rfctl);
++	}
++
++#define E1000_IVAR_INT_ALLOC_VALID	0x8
++	/* Configure Rx vector */
++	rx_ring->ims_val = E1000_IMS_RXQ0;
++	adapter->eiac_mask |= rx_ring->ims_val;
++	if (rx_ring->itr_val)
++		writel(1000000000 / (rx_ring->itr_val * 256),
++		       hw->hw_addr + rx_ring->itr_register);
++	else
++		writel(1, hw->hw_addr + rx_ring->itr_register);
++	ivar = E1000_IVAR_INT_ALLOC_VALID | vector;
++
++	/* Configure Tx vector */
++	tx_ring->ims_val = E1000_IMS_TXQ0;
++	vector++;
++	if (tx_ring->itr_val)
++		writel(1000000000 / (tx_ring->itr_val * 256),
++		       hw->hw_addr + tx_ring->itr_register);
++	else
++		writel(1, hw->hw_addr + tx_ring->itr_register);
++	adapter->eiac_mask |= tx_ring->ims_val;
++	ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 8);
++
++	/* set vector for Other Causes, e.g. link changes */
++	vector++;
++	ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 16);
++	if (rx_ring->itr_val)
++		writel(1000000000 / (rx_ring->itr_val * 256),
++		       hw->hw_addr + E1000_EITR_82574(vector));
++	else
++		writel(1, hw->hw_addr + E1000_EITR_82574(vector));
++
++	/* Cause Tx interrupts on every write back */
++	ivar |= (1 << 31);
++
++	ew32(IVAR, ivar);
++
++	/* enable MSI-X PBA support */
++	ctrl_ext = er32(CTRL_EXT);
++	ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
++
++	/* Auto-Mask Other interrupts upon ICR read */
++#define E1000_EIAC_MASK_82574   0x01F00000
++	ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
++	ctrl_ext |= E1000_CTRL_EXT_EIAME;
++	ew32(CTRL_EXT, ctrl_ext);
++	e1e_flush();
++}
++
++void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter)
++{
++	if (adapter->msix_entries) {
++		pci_disable_msix(adapter->pdev);
++		kfree(adapter->msix_entries);
++		adapter->msix_entries = NULL;
++	} else if (adapter->flags & FLAG_MSI_ENABLED) {
++		pci_disable_msi(adapter->pdev);
++		adapter->flags &= ~FLAG_MSI_ENABLED;
++	}
++}
++
++/**
++ * e1000e_set_interrupt_capability - set MSI or MSI-X if supported
++ *
++ * Attempt to configure interrupts using the best available
++ * capabilities of the hardware and kernel.
++ **/
++void e1000e_set_interrupt_capability(struct e1000_adapter *adapter)
++{
++	int err;
++	int i;
++
++	switch (adapter->int_mode) {
++	case E1000E_INT_MODE_MSIX:
++		if (adapter->flags & FLAG_HAS_MSIX) {
++			adapter->num_vectors = 3; /* RxQ0, TxQ0 and other */
++			adapter->msix_entries = kcalloc(adapter->num_vectors,
++						      sizeof(struct msix_entry),
++						      GFP_KERNEL);
++			if (adapter->msix_entries) {
++				for (i = 0; i < adapter->num_vectors; i++)
++					adapter->msix_entries[i].entry = i;
++
++				err = pci_enable_msix_range(adapter->pdev,
++							adapter->msix_entries,
++							adapter->num_vectors,
++							adapter->num_vectors);
++				if (err == 0)
++					return;
++			}
++			/* MSI-X failed, so fall through and try MSI */
++			e_err("Failed to initialize MSI-X interrupts.  "
++			      "Falling back to MSI interrupts.\n");
++			e1000e_reset_interrupt_capability(adapter);
++		}
++		adapter->int_mode = E1000E_INT_MODE_MSI;
++		/* Fall through */
++	case E1000E_INT_MODE_MSI:
++		if (!pci_enable_msi(adapter->pdev)) {
++			adapter->flags |= FLAG_MSI_ENABLED;
++		} else {
++			adapter->int_mode = E1000E_INT_MODE_LEGACY;
++			e_err("Failed to initialize MSI interrupts.  Falling "
++			      "back to legacy interrupts.\n");
++		}
++		/* Fall through */
++	case E1000E_INT_MODE_LEGACY:
++		/* Don't do anything; this is the system default */
++		break;
++	}
++
++	/* store the number of vectors being used */
++	adapter->num_vectors = 1;
++}
++
++/**
++ * e1000_request_msix - Initialize MSI-X interrupts
++ *
++ * e1000_request_msix allocates MSI-X vectors and requests interrupts from the
++ * kernel.
++ **/
++static int e1000_request_msix(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	int err = 0, vector = 0;
++
++	if (strlen(netdev->name) < (IFNAMSIZ - 5))
++		snprintf(adapter->rx_ring->name,
++			 sizeof(adapter->rx_ring->name) - 1,
++			 "%s-rx-0", netdev->name);
++	else
++		memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
++	err = rtdm_irq_request(&adapter->rx_irq_handle,
++			       adapter->msix_entries[vector].vector,
++			       e1000_intr_msix_rx, 0, adapter->rx_ring->name,
++			       adapter);
++	if (err)
++		goto out;
++	adapter->rx_ring->itr_register = E1000_EITR_82574(vector);
++	adapter->rx_ring->itr_val = adapter->itr;
++	vector++;
++
++	if (strlen(netdev->name) < (IFNAMSIZ - 5))
++		snprintf(adapter->tx_ring->name,
++			 sizeof(adapter->tx_ring->name) - 1,
++			 "%s-tx-0", netdev->name);
++	else
++		memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
++	err = rtdm_irq_request(&adapter->tx_irq_handle,
++			       adapter->msix_entries[vector].vector,
++			       e1000_intr_msix_tx, 0, adapter->tx_ring->name,
++			       adapter);
++	if (err)
++		goto out;
++	adapter->tx_ring->itr_register = E1000_EITR_82574(vector);
++	adapter->tx_ring->itr_val = adapter->itr;
++	vector++;
++
++	err = request_irq(adapter->msix_entries[vector].vector,
++			  e1000_msix_other, 0, netdev->name, netdev);
++	if (err)
++		goto out;
++
++	e1000_configure_msix(adapter);
++	return 0;
++out:
++	return err;
++}
++
++/**
++ * e1000_request_irq - initialize interrupts
++ *
++ * Attempts to configure interrupts using the best available
++ * capabilities of the hardware and kernel.
++ **/
++static int e1000_request_irq(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	int err;
++
++	if (adapter->msix_entries) {
++		err = e1000_request_msix(adapter);
++		if (!err)
++			return err;
++		/* fall back to MSI */
++		e1000e_reset_interrupt_capability(adapter);
++		adapter->int_mode = E1000E_INT_MODE_MSI;
++		e1000e_set_interrupt_capability(adapter);
++	}
++	if (adapter->flags & FLAG_MSI_ENABLED) {
++		err = rtdm_irq_request(&adapter->irq_handle,
++				       adapter->pdev->irq, e1000_intr_msi,
++				       0, netdev->name, adapter);
++		if (!err)
++			return err;
++
++		/* fall back to legacy interrupt */
++		e1000e_reset_interrupt_capability(adapter);
++		adapter->int_mode = E1000E_INT_MODE_LEGACY;
++	}
++
++	err = rtdm_irq_request(&adapter->irq_handle, adapter->pdev->irq,
++			       e1000_intr, 0, netdev->name, adapter);
++	if (err)
++		e_err("Unable to allocate interrupt, Error: %d\n", err);
++
++	return err;
++}
++
++static void e1000_free_irq(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++
++	if (adapter->msix_entries) {
++		int vector = 0;
++
++		rtdm_irq_disable(&adapter->rx_irq_handle);
++		rtdm_irq_free(&adapter->rx_irq_handle);
++		vector++;
++
++		rtdm_irq_disable(&adapter->tx_irq_handle);
++		rtdm_irq_free(&adapter->tx_irq_handle);
++		vector++;
++
++		/* Other Causes interrupt vector */
++		free_irq(adapter->msix_entries[vector].vector, netdev);
++		return;
++	}
++
++	if (adapter->flags & FLAG_MSI_ENABLED)
++		rtdm_irq_disable(&adapter->irq_handle);
++	rtdm_irq_free(&adapter->irq_handle);
++}
++
++/**
++ * e1000_irq_disable - Mask off interrupt generation on the NIC
++ **/
++static void e1000_irq_disable(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	ew32(IMC, ~0);
++	if (adapter->msix_entries)
++		ew32(EIAC_82574, 0);
++	e1e_flush();
++
++	if (adapter->msix_entries) {
++		int i;
++		for (i = 0; i < adapter->num_vectors; i++)
++			synchronize_irq(adapter->msix_entries[i].vector);
++	} else {
++		synchronize_irq(adapter->pdev->irq);
++	}
++}
++
++/**
++ * e1000_irq_enable - Enable default interrupt generation settings
++ **/
++static void e1000_irq_enable(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (adapter->msix_entries) {
++		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
++		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
++	} else {
++		ew32(IMS, IMS_ENABLE_MASK);
++	}
++	e1e_flush();
++}
++
++/**
++ * e1000e_get_hw_control - get control of the h/w from f/w
++ * @adapter: address of board private structure
++ *
++ * e1000e_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that
++ * the driver is loaded. For AMT version (only with 82573)
++ * of the f/w this means that the network i/f is open.
++ **/
++void e1000e_get_hw_control(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl_ext;
++	u32 swsm;
++
++	/* Let firmware know the driver has taken over */
++	if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
++		swsm = er32(SWSM);
++		ew32(SWSM, swsm | E1000_SWSM_DRV_LOAD);
++	} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
++		ctrl_ext = er32(CTRL_EXT);
++		ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
++	}
++}
++
++/**
++ * e1000e_release_hw_control - release control of the h/w to f/w
++ * @adapter: address of board private structure
++ *
++ * e1000e_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
++ * For ASF and Pass Through versions of f/w this means that the
++ * driver is no longer loaded. For AMT version (only with 82573) i
++ * of the f/w this means that the network i/f is closed.
++ *
++ **/
++void e1000e_release_hw_control(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl_ext;
++	u32 swsm;
++
++	/* Let firmware taken over control of h/w */
++	if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
++		swsm = er32(SWSM);
++		ew32(SWSM, swsm & ~E1000_SWSM_DRV_LOAD);
++	} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
++		ctrl_ext = er32(CTRL_EXT);
++		ew32(CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
++	}
++}
++
++/**
++ * @e1000_alloc_ring - allocate memory for a ring structure
++ **/
++static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
++				struct e1000_ring *ring)
++{
++	struct pci_dev *pdev = adapter->pdev;
++
++	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
++					GFP_KERNEL);
++	if (!ring->desc)
++		return -ENOMEM;
++
++	return 0;
++}
++
++/**
++ * e1000e_setup_tx_resources - allocate Tx resources (Descriptors)
++ * @adapter: board private structure
++ *
++ * Return 0 on success, negative on failure
++ **/
++int e1000e_setup_tx_resources(struct e1000_adapter *adapter)
++{
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	int err = -ENOMEM, size;
++
++	size = sizeof(struct e1000_buffer) * tx_ring->count;
++	tx_ring->buffer_info = vzalloc(size);
++	if (!tx_ring->buffer_info)
++		goto err;
++
++	/* round up to nearest 4K */
++	tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
++	tx_ring->size = ALIGN(tx_ring->size, 4096);
++
++	err = e1000_alloc_ring_dma(adapter, tx_ring);
++	if (err)
++		goto err;
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++
++	return 0;
++err:
++	vfree(tx_ring->buffer_info);
++	e_err("Unable to allocate memory for the transmit descriptor ring\n");
++	return err;
++}
++
++/**
++ * e1000e_setup_rx_resources - allocate Rx resources (Descriptors)
++ * @adapter: board private structure
++ *
++ * Returns 0 on success, negative on failure
++ **/
++int e1000e_setup_rx_resources(struct e1000_adapter *adapter)
++{
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	int size, desc_len, err = -ENOMEM;
++
++	size = sizeof(struct e1000_buffer) * rx_ring->count;
++	rx_ring->buffer_info = vzalloc(size);
++	if (!rx_ring->buffer_info)
++		goto err;
++
++	desc_len = sizeof(union e1000_rx_desc_packet_split);
++
++	/* Round up to nearest 4K */
++	rx_ring->size = rx_ring->count * desc_len;
++	rx_ring->size = ALIGN(rx_ring->size, 4096);
++
++	err = e1000_alloc_ring_dma(adapter, rx_ring);
++	if (err)
++		goto err;
++
++	rx_ring->next_to_clean = 0;
++	rx_ring->next_to_use = 0;
++	rx_ring->rx_skb_top = NULL;
++
++	return 0;
++
++err:
++	vfree(rx_ring->buffer_info);
++	e_err("Unable to allocate memory for the receive descriptor ring\n");
++	return err;
++}
++
++/**
++ * e1000_clean_tx_ring - Free Tx Buffers
++ * @adapter: board private structure
++ **/
++static void e1000_clean_tx_ring(struct e1000_adapter *adapter)
++{
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_buffer *buffer_info;
++	unsigned long size;
++	unsigned int i;
++
++	for (i = 0; i < tx_ring->count; i++) {
++		buffer_info = &tx_ring->buffer_info[i];
++		e1000_put_txbuf(adapter, buffer_info);
++	}
++
++	size = sizeof(struct e1000_buffer) * tx_ring->count;
++	memset(tx_ring->buffer_info, 0, size);
++
++	memset(tx_ring->desc, 0, tx_ring->size);
++
++	tx_ring->next_to_use = 0;
++	tx_ring->next_to_clean = 0;
++
++	writel(0, adapter->hw.hw_addr + tx_ring->head);
++	writel(0, adapter->hw.hw_addr + tx_ring->tail);
++}
++
++/**
++ * e1000e_free_tx_resources - Free Tx Resources per Queue
++ * @adapter: board private structure
++ *
++ * Free all transmit software resources
++ **/
++void e1000e_free_tx_resources(struct e1000_adapter *adapter)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++
++	e1000_clean_tx_ring(adapter);
++
++	vfree(tx_ring->buffer_info);
++	tx_ring->buffer_info = NULL;
++
++	dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
++			  tx_ring->dma);
++	tx_ring->desc = NULL;
++}
++
++/**
++ * e1000e_free_rx_resources - Free Rx Resources
++ * @adapter: board private structure
++ *
++ * Free all receive software resources
++ **/
++
++void e1000e_free_rx_resources(struct e1000_adapter *adapter)
++{
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	int i;
++
++	e1000_clean_rx_ring(adapter);
++
++	for (i = 0; i < rx_ring->count; i++)
++		kfree(rx_ring->buffer_info[i].ps_pages);
++
++	vfree(rx_ring->buffer_info);
++	rx_ring->buffer_info = NULL;
++
++	dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
++			  rx_ring->dma);
++	rx_ring->desc = NULL;
++}
++
++/**
++ * e1000_alloc_queues - Allocate memory for all rings
++ * @adapter: board private structure to initialize
++ **/
++static int e1000_alloc_queues(struct e1000_adapter *adapter)
++{
++	adapter->tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
++	if (!adapter->tx_ring)
++		goto err;
++
++	rtdm_lock_init(&adapter->tx_ring->lock);
++
++	adapter->rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
++	if (!adapter->rx_ring)
++		goto err;
++
++	return 0;
++err:
++	e_err("Unable to allocate memory for queues\n");
++	kfree(adapter->rx_ring);
++	kfree(adapter->tx_ring);
++	return -ENOMEM;
++}
++
++static void e1000_vlan_rx_add_vid(struct rtnet_device *netdev, u16 vid)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 vfta, index;
++
++	/* don't update vlan cookie if already programmed */
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
++	    (vid == adapter->mng_vlan_id))
++		return;
++
++	/* add VID to filter table */
++	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
++		index = (vid >> 5) & 0x7F;
++		vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
++		vfta |= (1 << (vid & 0x1F));
++		hw->mac.ops.write_vfta(hw, index, vfta);
++	}
++
++	set_bit(vid, adapter->active_vlans);
++}
++
++static void e1000_vlan_rx_kill_vid(struct rtnet_device *netdev, u16 vid)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 vfta, index;
++
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
++	    (vid == adapter->mng_vlan_id)) {
++		/* release control to f/w */
++		e1000e_release_hw_control(adapter);
++		return;
++	}
++
++	/* remove VID from filter table */
++	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
++		index = (vid >> 5) & 0x7F;
++		vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
++		vfta &= ~(1 << (vid & 0x1F));
++		hw->mac.ops.write_vfta(hw, index, vfta);
++	}
++
++	clear_bit(vid, adapter->active_vlans);
++}
++
++/**
++ * e1000e_vlan_filter_disable - helper to disable hw VLAN filtering
++ * @adapter: board private structure to initialize
++ **/
++static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
++		/* disable VLAN receive filtering */
++		rctl = er32(RCTL);
++		rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN);
++		ew32(RCTL, rctl);
++
++		if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) {
++			e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
++			adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++		}
++	}
++}
++
++/**
++ * e1000e_vlan_filter_enable - helper to enable HW VLAN filtering
++ * @adapter: board private structure to initialize
++ **/
++static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
++		/* enable VLAN receive filtering */
++		rctl = er32(RCTL);
++		rctl |= E1000_RCTL_VFE;
++		rctl &= ~E1000_RCTL_CFIEN;
++		ew32(RCTL, rctl);
++	}
++}
++
++/**
++ * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping
++ * @adapter: board private structure to initialize
++ **/
++static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl;
++
++	/* disable VLAN tag insert/strip */
++	ctrl = er32(CTRL);
++	ctrl &= ~E1000_CTRL_VME;
++	ew32(CTRL, ctrl);
++}
++
++/**
++ * e1000e_vlan_strip_enable - helper to enable HW VLAN stripping
++ * @adapter: board private structure to initialize
++ **/
++static void e1000e_vlan_strip_enable(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl;
++
++	/* enable VLAN tag insert/strip */
++	ctrl = er32(CTRL);
++	ctrl |= E1000_CTRL_VME;
++	ew32(CTRL, ctrl);
++}
++
++static void e1000_update_mng_vlan(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	u16 vid = adapter->hw.mng_cookie.vlan_id;
++	u16 old_vid = adapter->mng_vlan_id;
++
++	if (adapter->hw.mng_cookie.status &
++	    E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
++		e1000_vlan_rx_add_vid(netdev, vid);
++		adapter->mng_vlan_id = vid;
++	}
++
++	if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid))
++		e1000_vlan_rx_kill_vid(netdev, old_vid);
++}
++
++static void e1000_restore_vlan(struct e1000_adapter *adapter)
++{
++	u16 vid;
++
++	e1000_vlan_rx_add_vid(adapter->netdev, 0);
++
++	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
++		e1000_vlan_rx_add_vid(adapter->netdev, vid);
++}
++
++static void e1000_init_manageability_pt(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 manc, manc2h, mdef, i, j;
++
++	if (!(adapter->flags & FLAG_MNG_PT_ENABLED))
++		return;
++
++	manc = er32(MANC);
++
++	/*
++	 * enable receiving management packets to the host. this will probably
++	 * generate destination unreachable messages from the host OS, but
++	 * the packets will be handled on SMBUS
++	 */
++	manc |= E1000_MANC_EN_MNG2HOST;
++	manc2h = er32(MANC2H);
++
++	switch (hw->mac.type) {
++	default:
++		manc2h |= (E1000_MANC2H_PORT_623 | E1000_MANC2H_PORT_664);
++		break;
++	case e1000_82574:
++	case e1000_82583:
++		/*
++		 * Check if IPMI pass-through decision filter already exists;
++		 * if so, enable it.
++		 */
++		for (i = 0, j = 0; i < 8; i++) {
++			mdef = er32(MDEF(i));
++
++			/* Ignore filters with anything other than IPMI ports */
++			if (mdef & ~(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664))
++				continue;
++
++			/* Enable this decision filter in MANC2H */
++			if (mdef)
++				manc2h |= (1 << i);
++
++			j |= mdef;
++		}
++
++		if (j == (E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664))
++			break;
++
++		/* Create new decision filter in an empty filter */
++		for (i = 0, j = 0; i < 8; i++)
++			if (er32(MDEF(i)) == 0) {
++				ew32(MDEF(i), (E1000_MDEF_PORT_623 |
++					       E1000_MDEF_PORT_664));
++				manc2h |= (1 << 1);
++				j++;
++				break;
++			}
++
++		if (!j)
++			e_warn("Unable to create IPMI pass-through filter\n");
++		break;
++	}
++
++	ew32(MANC2H, manc2h);
++	ew32(MANC, manc);
++}
++
++/**
++ * e1000_configure_tx - Configure Transmit Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Tx unit of the MAC after a reset.
++ **/
++static void e1000_configure_tx(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	u64 tdba;
++	u32 tdlen, tctl, tipg, tarc;
++	u32 ipgr1, ipgr2;
++
++	/* Setup the HW Tx Head and Tail descriptor pointers */
++	tdba = tx_ring->dma;
++	tdlen = tx_ring->count * sizeof(struct e1000_tx_desc);
++	ew32(TDBAL, (tdba & DMA_BIT_MASK(32)));
++	ew32(TDBAH, (tdba >> 32));
++	ew32(TDLEN, tdlen);
++	ew32(TDH, 0);
++	ew32(TDT, 0);
++	tx_ring->head = E1000_TDH;
++	tx_ring->tail = E1000_TDT;
++
++	/* Set the default values for the Tx Inter Packet Gap timer */
++	tipg = DEFAULT_82543_TIPG_IPGT_COPPER;          /*  8  */
++	ipgr1 = DEFAULT_82543_TIPG_IPGR1;               /*  8  */
++	ipgr2 = DEFAULT_82543_TIPG_IPGR2;               /*  6  */
++
++	if (adapter->flags & FLAG_TIPG_MEDIUM_FOR_80003ESLAN)
++		ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; /*  7  */
++
++	tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT;
++	tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT;
++	ew32(TIPG, tipg);
++
++	/* Set the Tx Interrupt Delay register */
++	ew32(TIDV, adapter->tx_int_delay);
++	/* Tx irq moderation */
++	ew32(TADV, adapter->tx_abs_int_delay);
++
++	if (adapter->flags2 & FLAG2_DMA_BURST) {
++		u32 txdctl = er32(TXDCTL(0));
++		txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH |
++			    E1000_TXDCTL_WTHRESH);
++		/*
++		 * set up some performance related parameters to encourage the
++		 * hardware to use the bus more efficiently in bursts, depends
++		 * on the tx_int_delay to be enabled,
++		 * wthresh = 5 ==> burst write a cacheline (64 bytes) at a time
++		 * hthresh = 1 ==> prefetch when one or more available
++		 * pthresh = 0x1f ==> prefetch if internal cache 31 or less
++		 * BEWARE: this seems to work but should be considered first if
++		 * there are Tx hangs or other Tx related bugs
++		 */
++		txdctl |= E1000_TXDCTL_DMA_BURST_ENABLE;
++		ew32(TXDCTL(0), txdctl);
++		/* erratum work around: set txdctl the same for both queues */
++		ew32(TXDCTL(1), txdctl);
++	}
++
++	/* Program the Transmit Control Register */
++	tctl = er32(TCTL);
++	tctl &= ~E1000_TCTL_CT;
++	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
++		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
++
++	if (adapter->flags & FLAG_TARC_SPEED_MODE_BIT) {
++		tarc = er32(TARC(0));
++		/*
++		 * set the speed mode bit, we'll clear it if we're not at
++		 * gigabit link later
++		 */
++#define SPEED_MODE_BIT (1 << 21)
++		tarc |= SPEED_MODE_BIT;
++		ew32(TARC(0), tarc);
++	}
++
++	/* errata: program both queues to unweighted RR */
++	if (adapter->flags & FLAG_TARC_SET_BIT_ZERO) {
++		tarc = er32(TARC(0));
++		tarc |= 1;
++		ew32(TARC(0), tarc);
++		tarc = er32(TARC(1));
++		tarc |= 1;
++		ew32(TARC(1), tarc);
++	}
++
++	/* Setup Transmit Descriptor Settings for eop descriptor */
++	adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
++
++	/* only set IDE if we are delaying interrupts using the timers */
++	if (adapter->tx_int_delay)
++		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
++
++	/* enable Report Status bit */
++	adapter->txd_cmd |= E1000_TXD_CMD_RS;
++
++	ew32(TCTL, tctl);
++
++	e1000e_config_collision_dist(hw);
++}
++
++/**
++ * e1000_setup_rctl - configure the receive control registers
++ * @adapter: Board private structure
++ **/
++#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
++			   (((S) & (PAGE_SIZE - 1)) ? 1 : 0))
++static void e1000_setup_rctl(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl, rfctl;
++
++	/* Workaround Si errata on PCHx - configure jumbo frame flow */
++	if (hw->mac.type >= e1000_pch2lan) {
++		s32 ret_val;
++
++		if (adapter->netdev->mtu > ETH_DATA_LEN)
++			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true);
++		else
++			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false);
++
++		if (ret_val)
++			e_dbg("failed to enable jumbo frame workaround mode\n");
++	}
++
++	/* Program MC offset vector base */
++	rctl = er32(RCTL);
++	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
++	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
++		E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
++		(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
++
++	/* Do not Store bad packets */
++	rctl &= ~E1000_RCTL_SBP;
++
++	/* Enable Long Packet receive */
++	if (adapter->netdev->mtu <= ETH_DATA_LEN)
++		rctl &= ~E1000_RCTL_LPE;
++	else
++		rctl |= E1000_RCTL_LPE;
++
++	/* Some systems expect that the CRC is included in SMBUS traffic. The
++	 * hardware strips the CRC before sending to both SMBUS (BMC) and to
++	 * host memory when this is enabled
++	 */
++	if (adapter->flags2 & FLAG2_CRC_STRIPPING)
++		rctl |= E1000_RCTL_SECRC;
++
++	/* Workaround Si errata on 82577 PHY - configure IPG for jumbos */
++	if ((hw->phy.type == e1000_phy_82577) && (rctl & E1000_RCTL_LPE)) {
++		u16 phy_data;
++
++		e1e_rphy(hw, PHY_REG(770, 26), &phy_data);
++		phy_data &= 0xfff8;
++		phy_data |= (1 << 2);
++		e1e_wphy(hw, PHY_REG(770, 26), phy_data);
++
++		e1e_rphy(hw, 22, &phy_data);
++		phy_data &= 0x0fff;
++		phy_data |= (1 << 14);
++		e1e_wphy(hw, 0x10, 0x2823);
++		e1e_wphy(hw, 0x11, 0x0003);
++		e1e_wphy(hw, 22, phy_data);
++	}
++
++	/* Setup buffer sizes */
++	rctl &= ~E1000_RCTL_SZ_4096;
++	rctl |= E1000_RCTL_BSEX;
++	switch (adapter->rx_buffer_len) {
++	case 2048:
++	default:
++		rctl |= E1000_RCTL_SZ_2048;
++		rctl &= ~E1000_RCTL_BSEX;
++		break;
++	case 4096:
++		rctl |= E1000_RCTL_SZ_4096;
++		break;
++	case 8192:
++		rctl |= E1000_RCTL_SZ_8192;
++		break;
++	case 16384:
++		rctl |= E1000_RCTL_SZ_16384;
++		break;
++	}
++
++	/* Enable Extended Status in all Receive Descriptors */
++	rfctl = er32(RFCTL);
++	rfctl |= E1000_RFCTL_EXTEN;
++
++	adapter->rx_ps_pages = 0;
++
++	ew32(RFCTL, rfctl);
++	ew32(RCTL, rctl);
++	/* just started the receive unit, no need to restart */
++	adapter->flags &= ~FLAG_RX_RESTART_NOW;
++}
++
++/**
++ * e1000_configure_rx - Configure Receive Unit after Reset
++ * @adapter: board private structure
++ *
++ * Configure the Rx unit of the MAC after a reset.
++ **/
++static void e1000_configure_rx(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	u64 rdba;
++	u32 rdlen, rctl, rxcsum, ctrl_ext;
++
++	rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended);
++	adapter->clean_rx = e1000_clean_rx_irq;
++	adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
++
++	/* disable receives while setting up the descriptors */
++	rctl = er32(RCTL);
++	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
++		ew32(RCTL, rctl & ~E1000_RCTL_EN);
++	e1e_flush();
++	usleep_range(10000, 20000);
++
++	if (adapter->flags2 & FLAG2_DMA_BURST) {
++		/*
++		 * set the writeback threshold (only takes effect if the RDTR
++		 * is set). set GRAN=1 and write back up to 0x4 worth, and
++		 * enable prefetching of 0x20 Rx descriptors
++		 * granularity = 01
++		 * wthresh = 04,
++		 * hthresh = 04,
++		 * pthresh = 0x20
++		 */
++		ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
++		ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
++
++		/*
++		 * override the delay timers for enabling bursting, only if
++		 * the value was not set by the user via module options
++		 */
++		if (adapter->rx_int_delay == DEFAULT_RDTR)
++			adapter->rx_int_delay = BURST_RDTR;
++		if (adapter->rx_abs_int_delay == DEFAULT_RADV)
++			adapter->rx_abs_int_delay = BURST_RADV;
++	}
++
++	/* set the Receive Delay Timer Register */
++	ew32(RDTR, adapter->rx_int_delay);
++
++	/* irq moderation */
++	ew32(RADV, adapter->rx_abs_int_delay);
++	if ((adapter->itr_setting != 0) && (adapter->itr != 0))
++		ew32(ITR, 1000000000 / (adapter->itr * 256));
++
++	ctrl_ext = er32(CTRL_EXT);
++	ew32(CTRL_EXT, ctrl_ext);
++	e1e_flush();
++
++	/*
++	 * Setup the HW Rx Head and Tail Descriptor Pointers and
++	 * the Base and Length of the Rx Descriptor Ring
++	 */
++	rdba = rx_ring->dma;
++	ew32(RDBAL, (rdba & DMA_BIT_MASK(32)));
++	ew32(RDBAH, (rdba >> 32));
++	ew32(RDLEN, rdlen);
++	ew32(RDH, 0);
++	ew32(RDT, 0);
++	rx_ring->head = E1000_RDH;
++	rx_ring->tail = E1000_RDT;
++
++	/* Enable Receive Checksum Offload for TCP and UDP */
++	rxcsum = er32(RXCSUM);
++	if (adapter->netdev->features & NETIF_F_RXCSUM) {
++		rxcsum |= E1000_RXCSUM_TUOFL;
++	} else {
++		rxcsum &= ~E1000_RXCSUM_TUOFL;
++		/* no need to clear IPPCSE as it defaults to 0 */
++	}
++	ew32(RXCSUM, rxcsum);
++
++	/* Enable Receives */
++	ew32(RCTL, rctl);
++}
++
++/**
++ *  e1000_update_mc_addr_list - Update Multicast addresses
++ *  @hw: pointer to the HW structure
++ *  @mc_addr_list: array of multicast addresses to program
++ *  @mc_addr_count: number of multicast addresses to program
++ *
++ *  Updates the Multicast Table Array.
++ *  The caller must have a packed mc_addr_list of multicast addresses.
++ **/
++static void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
++				      u32 mc_addr_count)
++{
++	hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
++}
++
++/**
++ * e1000_set_multi - Multicast and Promiscuous mode set
++ * @netdev: network interface device structure
++ *
++ * The set_multi entry point is called whenever the multicast address
++ * list or the network interface flags are updated.  This routine is
++ * responsible for configuring the hardware for proper multicast,
++ * promiscuous mode, and all-multi behavior.
++ **/
++static void e1000_set_multi(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 rctl;
++
++	/* Check for Promiscuous and All Multicast modes */
++
++	rctl = er32(RCTL);
++
++	if (netdev->flags & IFF_PROMISC) {
++		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
++		rctl &= ~E1000_RCTL_VFE;
++		/* Do not hardware filter VLANs in promisc mode */
++		e1000e_vlan_filter_disable(adapter);
++	} else {
++		if (netdev->flags & IFF_ALLMULTI) {
++			rctl |= E1000_RCTL_MPE;
++			rctl &= ~E1000_RCTL_UPE;
++		} else {
++			rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
++		}
++		e1000e_vlan_filter_enable(adapter);
++	}
++
++	ew32(RCTL, rctl);
++
++	e1000_update_mc_addr_list(hw, NULL, 0);
++
++	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
++		e1000e_vlan_strip_enable(adapter);
++	else
++		e1000e_vlan_strip_disable(adapter);
++}
++
++/**
++ * e1000_configure - configure the hardware for Rx and Tx
++ * @adapter: private board structure
++ **/
++static void e1000_configure(struct e1000_adapter *adapter)
++{
++	e1000_set_multi(adapter->netdev);
++
++	e1000_restore_vlan(adapter);
++	e1000_init_manageability_pt(adapter);
++
++	e1000_configure_tx(adapter);
++	e1000_setup_rctl(adapter);
++	e1000_configure_rx(adapter);
++	adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring),
++			      GFP_KERNEL);
++}
++
++/**
++ * e1000e_power_up_phy - restore link in case the phy was powered down
++ * @adapter: address of board private structure
++ *
++ * The phy may be powered down to save power and turn off link when the
++ * driver is unloaded and wake on lan is not enabled (among others)
++ * *** this routine MUST be followed by a call to e1000e_reset ***
++ **/
++void e1000e_power_up_phy(struct e1000_adapter *adapter)
++{
++	if (adapter->hw.phy.ops.power_up)
++		adapter->hw.phy.ops.power_up(&adapter->hw);
++
++	adapter->hw.mac.ops.setup_link(&adapter->hw);
++}
++
++/**
++ * e1000_power_down_phy - Power down the PHY
++ *
++ * Power down the PHY so no link is implied when interface is down.
++ * The PHY cannot be powered down if management or WoL is active.
++ */
++static void e1000_power_down_phy(struct e1000_adapter *adapter)
++{
++	/* WoL is enabled */
++	if (adapter->wol)
++		return;
++
++	if (adapter->hw.phy.ops.power_down)
++		adapter->hw.phy.ops.power_down(&adapter->hw);
++}
++
++/**
++ * e1000e_reset - bring the hardware into a known good state
++ *
++ * This function boots the hardware and enables some settings that
++ * require a configuration cycle of the hardware - those cannot be
++ * set/changed during runtime. After reset the device needs to be
++ * properly configured for Rx, Tx etc.
++ */
++void e1000e_reset(struct e1000_adapter *adapter)
++{
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	struct e1000_fc_info *fc = &adapter->hw.fc;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 tx_space, min_tx_space, min_rx_space;
++	u32 pba = adapter->pba;
++	u16 hwm;
++
++	/* reset Packet Buffer Allocation to default */
++	ew32(PBA, pba);
++
++	if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) {
++		/*
++		 * To maintain wire speed transmits, the Tx FIFO should be
++		 * large enough to accommodate two full transmit packets,
++		 * rounded up to the next 1KB and expressed in KB.  Likewise,
++		 * the Rx FIFO should be large enough to accommodate at least
++		 * one full receive packet and is similarly rounded up and
++		 * expressed in KB.
++		 */
++		pba = er32(PBA);
++		/* upper 16 bits has Tx packet buffer allocation size in KB */
++		tx_space = pba >> 16;
++		/* lower 16 bits has Rx packet buffer allocation size in KB */
++		pba &= 0xffff;
++		/*
++		 * the Tx fifo also stores 16 bytes of information about the Tx
++		 * but don't include ethernet FCS because hardware appends it
++		 */
++		min_tx_space = (adapter->max_frame_size +
++				sizeof(struct e1000_tx_desc) -
++				ETH_FCS_LEN) * 2;
++		min_tx_space = ALIGN(min_tx_space, 1024);
++		min_tx_space >>= 10;
++		/* software strips receive CRC, so leave room for it */
++		min_rx_space = adapter->max_frame_size;
++		min_rx_space = ALIGN(min_rx_space, 1024);
++		min_rx_space >>= 10;
++
++		/*
++		 * If current Tx allocation is less than the min Tx FIFO size,
++		 * and the min Tx FIFO size is less than the current Rx FIFO
++		 * allocation, take space away from current Rx allocation
++		 */
++		if ((tx_space < min_tx_space) &&
++		    ((min_tx_space - tx_space) < pba)) {
++			pba -= min_tx_space - tx_space;
++
++			/*
++			 * if short on Rx space, Rx wins and must trump Tx
++			 * adjustment or use Early Receive if available
++			 */
++			if ((pba < min_rx_space) &&
++			    (!(adapter->flags & FLAG_HAS_ERT)))
++				/* ERT enabled in e1000_configure_rx */
++				pba = min_rx_space;
++		}
++
++		ew32(PBA, pba);
++	}
++
++	/*
++	 * flow control settings
++	 *
++	 * The high water mark must be low enough to fit one full frame
++	 * (or the size used for early receive) above it in the Rx FIFO.
++	 * Set it to the lower of:
++	 * - 90% of the Rx FIFO size, and
++	 * - the full Rx FIFO size minus the early receive size (for parts
++	 *   with ERT support assuming ERT set to E1000_ERT_2048), or
++	 * - the full Rx FIFO size minus one full frame
++	 */
++	if (adapter->flags & FLAG_DISABLE_FC_PAUSE_TIME)
++		fc->pause_time = 0xFFFF;
++	else
++		fc->pause_time = E1000_FC_PAUSE_TIME;
++	fc->send_xon = 1;
++	fc->current_mode = fc->requested_mode;
++
++	switch (hw->mac.type) {
++	default:
++		if ((adapter->flags & FLAG_HAS_ERT) &&
++		    (adapter->netdev->mtu > ETH_DATA_LEN))
++			hwm = min(((pba << 10) * 9 / 10),
++				  ((pba << 10) - (E1000_ERT_2048 << 3)));
++		else
++			hwm = min(((pba << 10) * 9 / 10),
++				  ((pba << 10) - adapter->max_frame_size));
++
++		fc->high_water = hwm & E1000_FCRTH_RTH; /* 8-byte granularity */
++		fc->low_water = fc->high_water - 8;
++		break;
++	case e1000_pchlan:
++		/*
++		 * Workaround PCH LOM adapter hangs with certain network
++		 * loads.  If hangs persist, try disabling Tx flow control.
++		 */
++		if (adapter->netdev->mtu > ETH_DATA_LEN) {
++			fc->high_water = 0x3500;
++			fc->low_water  = 0x1500;
++		} else {
++			fc->high_water = 0x5000;
++			fc->low_water  = 0x3000;
++		}
++		fc->refresh_time = 0x1000;
++		break;
++	case e1000_pch2lan:
++	case e1000_pch_lpt:
++		fc->high_water = 0x05C20;
++		fc->low_water = 0x05048;
++		fc->pause_time = 0x0650;
++		fc->refresh_time = 0x0400;
++		if (adapter->netdev->mtu > ETH_DATA_LEN) {
++			pba = 14;
++			ew32(PBA, pba);
++		}
++		break;
++	}
++
++	/*
++	 * Disable Adaptive Interrupt Moderation if 2 full packets cannot
++	 * fit in receive buffer and early-receive not supported.
++	 */
++	if (adapter->itr_setting & 0x3) {
++		if (((adapter->max_frame_size * 2) > (pba << 10)) &&
++		    !(adapter->flags & FLAG_HAS_ERT)) {
++			if (!(adapter->flags2 & FLAG2_DISABLE_AIM)) {
++				dev_info(&adapter->pdev->dev,
++					"Interrupt Throttle Rate turned off\n");
++				adapter->flags2 |= FLAG2_DISABLE_AIM;
++				ew32(ITR, 0);
++			}
++		} else if (adapter->flags2 & FLAG2_DISABLE_AIM) {
++			dev_info(&adapter->pdev->dev,
++				 "Interrupt Throttle Rate turned on\n");
++			adapter->flags2 &= ~FLAG2_DISABLE_AIM;
++			adapter->itr = 20000;
++			ew32(ITR, 1000000000 / (adapter->itr * 256));
++		}
++	}
++
++	/* Allow time for pending master requests to run */
++	mac->ops.reset_hw(hw);
++
++	/*
++	 * For parts with AMT enabled, let the firmware know
++	 * that the network interface is in control
++	 */
++	if (adapter->flags & FLAG_HAS_AMT)
++		e1000e_get_hw_control(adapter);
++
++	ew32(WUC, 0);
++
++	if (mac->ops.init_hw(hw))
++		e_err("Hardware Error\n");
++
++	e1000_update_mng_vlan(adapter);
++
++	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
++	ew32(VET, ETH_P_8021Q);
++
++	e1000e_reset_adaptive(hw);
++
++	if (!rtnetif_running(adapter->netdev) &&
++	    !test_bit(__E1000_TESTING, &adapter->state)) {
++		e1000_power_down_phy(adapter);
++		return;
++	}
++
++	e1000_get_phy_info(hw);
++
++	if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) &&
++	    !(adapter->flags & FLAG_SMART_POWER_DOWN)) {
++		u16 phy_data = 0;
++		/*
++		 * speed up time to link by disabling smart power down, ignore
++		 * the return value of this function because there is nothing
++		 * different we would do if it failed
++		 */
++		e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
++		phy_data &= ~IGP02E1000_PM_SPD;
++		e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
++	}
++}
++
++int e1000e_up(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	/* hardware has been reset, we need to reload some things */
++	e1000_configure(adapter);
++
++	clear_bit(__E1000_DOWN, &adapter->state);
++
++	if (adapter->msix_entries)
++		e1000_configure_msix(adapter);
++	e1000_irq_enable(adapter);
++
++	rtnetif_start_queue(adapter->netdev);
++
++	/* fire a link change interrupt to start the watchdog */
++	if (adapter->msix_entries)
++		ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
++	else
++		ew32(ICS, E1000_ICS_LSC);
++
++	return 0;
++}
++
++static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	if (!(adapter->flags2 & FLAG2_DMA_BURST))
++		return;
++
++	/* flush pending descriptor writebacks to memory */
++	ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
++	ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
++
++	/* execute the writes immediately */
++	e1e_flush();
++}
++
++void e1000e_down(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 tctl, rctl;
++
++	/*
++	 * signal that we're down so the interrupt handler does not
++	 * reschedule our watchdog timer
++	 */
++	set_bit(__E1000_DOWN, &adapter->state);
++
++	/* disable receives in the hardware */
++	rctl = er32(RCTL);
++	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
++		ew32(RCTL, rctl & ~E1000_RCTL_EN);
++	/* flush and sleep below */
++
++	rtnetif_stop_queue(netdev);
++
++	/* disable transmits in the hardware */
++	tctl = er32(TCTL);
++	tctl &= ~E1000_TCTL_EN;
++	ew32(TCTL, tctl);
++
++	/* flush both disables and wait for them to finish */
++	e1e_flush();
++	usleep_range(10000, 20000);
++
++	e1000_irq_disable(adapter);
++
++	del_timer_sync(&adapter->watchdog_timer);
++	del_timer_sync(&adapter->phy_info_timer);
++
++	rtnetif_carrier_off(netdev);
++
++	e1000e_flush_descriptors(adapter);
++	e1000_clean_tx_ring(adapter);
++	e1000_clean_rx_ring(adapter);
++
++	adapter->link_speed = 0;
++	adapter->link_duplex = 0;
++
++	if (!pci_channel_offline(adapter->pdev))
++		e1000e_reset(adapter);
++
++	/*
++	 * TODO: for power management, we could drop the link and
++	 * pci_disable_device here.
++	 */
++}
++
++void e1000e_reinit_locked(struct e1000_adapter *adapter)
++{
++	might_sleep();
++	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
++		usleep_range(1000, 2000);
++	e1000e_down(adapter);
++	e1000e_up(adapter);
++	clear_bit(__E1000_RESETTING, &adapter->state);
++}
++
++/**
++ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
++ * @adapter: board private structure to initialize
++ *
++ * e1000_sw_init initializes the Adapter private data structure.
++ * Fields are initialized based on PCI device information and
++ * OS network device settings (MTU size).
++ **/
++static int e1000_sw_init(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++
++	adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN;
++	adapter->rx_ps_bsize0 = 128;
++	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
++	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
++
++	spin_lock_init(&adapter->stats64_lock);
++
++	e1000e_set_interrupt_capability(adapter);
++
++	if (e1000_alloc_queues(adapter))
++		return -ENOMEM;
++
++	/* Explicitly disable IRQ since the NIC can be in any state. */
++	e1000_irq_disable(adapter);
++
++	set_bit(__E1000_DOWN, &adapter->state);
++	return 0;
++}
++
++/**
++ * e1000_intr_msi_test - Interrupt Handler
++ * @irq: interrupt number
++ * @data: pointer to a network interface device structure
++ **/
++static irqreturn_t e1000_intr_msi_test(int irq, void *data)
++{
++	struct rtnet_device *netdev = data;
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 icr = er32(ICR);
++
++	e_dbg("icr is %08X\n", icr);
++	if (icr & E1000_ICR_RXSEQ) {
++		adapter->flags &= ~FLAG_MSI_TEST_FAILED;
++		wmb();
++	}
++
++	return IRQ_HANDLED;
++}
++
++/**
++ * e1000_test_msi_interrupt - Returns 0 for successful test
++ * @adapter: board private struct
++ *
++ * code flow taken from tg3.c
++ **/
++static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
++{
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_hw *hw = &adapter->hw;
++	int err;
++
++	/* poll_enable hasn't been called yet, so don't need disable */
++	/* clear any pending events */
++	er32(ICR);
++
++	/* free the real vector and request a test handler */
++	e1000_free_irq(adapter);
++	e1000e_reset_interrupt_capability(adapter);
++
++	/* Assume that the test fails, if it succeeds then the test
++	 * MSI irq handler will unset this flag */
++	adapter->flags |= FLAG_MSI_TEST_FAILED;
++
++	err = pci_enable_msi(adapter->pdev);
++	if (err)
++		goto msi_test_failed;
++
++	err = request_irq(adapter->pdev->irq, e1000_intr_msi_test, 0,
++			  netdev->name, netdev);
++	if (err) {
++		pci_disable_msi(adapter->pdev);
++		goto msi_test_failed;
++	}
++
++	wmb();
++
++	e1000_irq_enable(adapter);
++
++	/* fire an unusual interrupt on the test handler */
++	ew32(ICS, E1000_ICS_RXSEQ);
++	e1e_flush();
++	msleep(50);
++
++	e1000_irq_disable(adapter);
++
++	rmb();
++
++	if (adapter->flags & FLAG_MSI_TEST_FAILED) {
++		adapter->int_mode = E1000E_INT_MODE_LEGACY;
++		e_info("MSI interrupt test failed, using legacy interrupt.\n");
++	} else
++		e_dbg("MSI interrupt test succeeded!\n");
++
++	free_irq(adapter->pdev->irq, netdev);
++	pci_disable_msi(adapter->pdev);
++
++msi_test_failed:
++	e1000e_set_interrupt_capability(adapter);
++	return e1000_request_irq(adapter);
++}
++
++/**
++ * e1000_test_msi - Returns 0 if MSI test succeeds or INTx mode is restored
++ * @adapter: board private struct
++ *
++ * code flow taken from tg3.c, called with e1000 interrupts disabled.
++ **/
++static int e1000_test_msi(struct e1000_adapter *adapter)
++{
++	int err;
++	u16 pci_cmd;
++
++	if (!(adapter->flags & FLAG_MSI_ENABLED))
++		return 0;
++
++	/* disable SERR in case the MSI write causes a master abort */
++	pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
++	if (pci_cmd & PCI_COMMAND_SERR)
++		pci_write_config_word(adapter->pdev, PCI_COMMAND,
++				      pci_cmd & ~PCI_COMMAND_SERR);
++
++	err = e1000_test_msi_interrupt(adapter);
++
++	/* re-enable SERR */
++	if (pci_cmd & PCI_COMMAND_SERR) {
++		pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
++		pci_cmd |= PCI_COMMAND_SERR;
++		pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd);
++	}
++
++	return err;
++}
++
++/**
++ * e1000_open - Called when a network interface is made active
++ * @netdev: network interface device structure
++ *
++ * Returns 0 on success, negative value on failure
++ *
++ * The open entry point is called when a network interface is made
++ * active by the system (IFF_UP).  At this point all resources needed
++ * for transmit and receive operations are allocated, the interrupt
++ * handler is registered with the OS, the watchdog timer is started,
++ * and the stack is notified that the interface is ready.
++ **/
++static int e1000_open(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	struct pci_dev *pdev = adapter->pdev;
++	int err;
++
++	/* disallow open during test */
++	if (test_bit(__E1000_TESTING, &adapter->state))
++		return -EBUSY;
++
++	pm_runtime_get_sync(&pdev->dev);
++
++	rtnetif_carrier_off(netdev);
++
++	/* allocate transmit descriptors */
++	err = e1000e_setup_tx_resources(adapter);
++	if (err)
++		goto err_setup_tx;
++
++	/* allocate receive descriptors */
++	err = e1000e_setup_rx_resources(adapter);
++	if (err)
++		goto err_setup_rx;
++
++	/*
++	 * If AMT is enabled, let the firmware know that the network
++	 * interface is now open and reset the part to a known state.
++	 */
++	if (adapter->flags & FLAG_HAS_AMT) {
++		e1000e_get_hw_control(adapter);
++		e1000e_reset(adapter);
++	}
++
++	e1000e_power_up_phy(adapter);
++
++	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
++	if ((adapter->hw.mng_cookie.status &
++	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
++		e1000_update_mng_vlan(adapter);
++
++	/*
++	 * before we allocate an interrupt, we must be ready to handle it.
++	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
++	 * as soon as we call pci_request_irq, so we have to setup our
++	 * clean_rx handler before we do so.
++	 */
++	e1000_configure(adapter);
++
++	rt_stack_connect(netdev, &STACK_manager);
++
++	err = e1000_request_irq(adapter);
++	if (err)
++		goto err_req_irq;
++
++	/*
++	 * Work around PCIe errata with MSI interrupts causing some chipsets to
++	 * ignore e1000e MSI messages, which means we need to test our MSI
++	 * interrupt now
++	 */
++	if (adapter->int_mode != E1000E_INT_MODE_LEGACY) {
++		err = e1000_test_msi(adapter);
++		if (err) {
++			e_err("Interrupt allocation failed\n");
++			goto err_req_irq;
++		}
++	}
++
++	/* From here on the code is the same as e1000e_up() */
++	clear_bit(__E1000_DOWN, &adapter->state);
++
++	e1000_irq_enable(adapter);
++
++	rtnetif_start_queue(netdev);
++
++	adapter->idle_check = true;
++	pm_runtime_put(&pdev->dev);
++
++	/* fire a link status change interrupt to start the watchdog */
++	if (adapter->msix_entries)
++		ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
++	else
++		ew32(ICS, E1000_ICS_LSC);
++
++	return 0;
++
++err_req_irq:
++	e1000e_release_hw_control(adapter);
++	e1000_power_down_phy(adapter);
++	e1000e_free_rx_resources(adapter);
++err_setup_rx:
++	e1000e_free_tx_resources(adapter);
++err_setup_tx:
++	e1000e_reset(adapter);
++	pm_runtime_put_sync(&pdev->dev);
++
++	return err;
++}
++
++/**
++ * e1000_close - Disables a network interface
++ * @netdev: network interface device structure
++ *
++ * Returns 0, this is not allowed to fail
++ *
++ * The close entry point is called when an interface is de-activated
++ * by the OS.  The hardware is still under the drivers control, but
++ * needs to be disabled.  A global MAC reset is issued to stop the
++ * hardware, and all transmit and receive resources are freed.
++ **/
++static int e1000_close(struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct pci_dev *pdev = adapter->pdev;
++
++	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
++
++	pm_runtime_get_sync(&pdev->dev);
++
++	if (!test_bit(__E1000_DOWN, &adapter->state)) {
++		e1000e_down(adapter);
++		e1000_free_irq(adapter);
++	}
++	e1000_power_down_phy(adapter);
++
++	rt_stack_disconnect(netdev);
++
++	e1000e_free_tx_resources(adapter);
++	e1000e_free_rx_resources(adapter);
++
++	/*
++	 * kill manageability vlan ID if supported, but not if a vlan with
++	 * the same ID is registered on the host OS (let 8021q kill it)
++	 */
++	if (adapter->hw.mng_cookie.status &
++	    E1000_MNG_DHCP_COOKIE_STATUS_VLAN)
++		e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
++
++	/*
++	 * If AMT is enabled, let the firmware know that the network
++	 * interface is now closed
++	 */
++	if ((adapter->flags & FLAG_HAS_AMT) &&
++	    !test_bit(__E1000_TESTING, &adapter->state))
++		e1000e_release_hw_control(adapter);
++
++	pm_runtime_put_sync(&pdev->dev);
++
++	return 0;
++}
++
++/**
++ * e1000e_update_phy_task - work thread to update phy
++ * @work: pointer to our work struct
++ *
++ * this worker thread exists because we must acquire a
++ * semaphore to read the phy, which we could msleep while
++ * waiting for it, and we can't msleep in a timer.
++ **/
++static void e1000e_update_phy_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++					struct e1000_adapter, update_phy_task);
++
++	if (test_bit(__E1000_DOWN, &adapter->state))
++		return;
++
++	e1000_get_phy_info(&adapter->hw);
++}
++
++/*
++ * Need to wait a few seconds after link up to get diagnostic information from
++ * the phy
++ */
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++static void e1000_update_phy_info(struct timer_list *t)
++{
++	struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer);
++#else /* < 4.14 */
++static void e1000_update_phy_info(unsigned long data)
++{
++	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
++#endif /* < 4.14 */
++
++	if (test_bit(__E1000_DOWN, &adapter->state))
++		return;
++
++	rtdm_schedule_nrt_work(&adapter->update_phy_task);
++}
++
++/**
++ * e1000_phy_read_status - Update the PHY register status snapshot
++ * @adapter: board private structure
++ **/
++static void e1000_phy_read_status(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct e1000_phy_regs *phy = &adapter->phy_regs;
++
++	if ((er32(STATUS) & E1000_STATUS_LU) &&
++	    (adapter->hw.phy.media_type == e1000_media_type_copper)) {
++		int ret_val;
++
++		ret_val  = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr);
++		ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr);
++		ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise);
++		ret_val |= e1e_rphy(hw, PHY_LP_ABILITY, &phy->lpa);
++		ret_val |= e1e_rphy(hw, PHY_AUTONEG_EXP, &phy->expansion);
++		ret_val |= e1e_rphy(hw, PHY_1000T_CTRL, &phy->ctrl1000);
++		ret_val |= e1e_rphy(hw, PHY_1000T_STATUS, &phy->stat1000);
++		ret_val |= e1e_rphy(hw, PHY_EXT_STATUS, &phy->estatus);
++		if (ret_val)
++			e_warn("Error reading PHY register\n");
++	} else {
++		/*
++		 * Do not read PHY registers if link is not up
++		 * Set values to typical power-on defaults
++		 */
++		phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX);
++		phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL |
++			     BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE |
++			     BMSR_ERCAP);
++		phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP |
++				  ADVERTISE_ALL | ADVERTISE_CSMA);
++		phy->lpa = 0;
++		phy->expansion = EXPANSION_ENABLENPAGE;
++		phy->ctrl1000 = ADVERTISE_1000FULL;
++		phy->stat1000 = 0;
++		phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF);
++	}
++}
++
++static void e1000_print_link_info(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl = er32(CTRL);
++
++	/* Link status message must follow this format for user tools */
++	printk(KERN_INFO "e1000e: %s NIC Link is Up %d Mbps %s, "
++	       "Flow Control: %s\n",
++	       adapter->netdev->name,
++	       adapter->link_speed,
++	       (adapter->link_duplex == FULL_DUPLEX) ?
++	       "Full Duplex" : "Half Duplex",
++	       ((ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE)) ?
++	       "Rx/Tx" :
++	       ((ctrl & E1000_CTRL_RFCE) ? "Rx" :
++		((ctrl & E1000_CTRL_TFCE) ? "Tx" : "None")));
++}
++
++static bool e1000e_has_link(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	bool link_active = 0;
++	s32 ret_val = 0;
++
++	/*
++	 * get_link_status is set on LSC (link status) interrupt or
++	 * Rx sequence error interrupt.  get_link_status will stay
++	 * false until the check_for_link establishes link
++	 * for copper adapters ONLY
++	 */
++	switch (hw->phy.media_type) {
++	case e1000_media_type_copper:
++		if (hw->mac.get_link_status) {
++			ret_val = hw->mac.ops.check_for_link(hw);
++			link_active = !hw->mac.get_link_status;
++		} else {
++			link_active = 1;
++		}
++		break;
++	case e1000_media_type_fiber:
++		ret_val = hw->mac.ops.check_for_link(hw);
++		link_active = !!(er32(STATUS) & E1000_STATUS_LU);
++		break;
++	case e1000_media_type_internal_serdes:
++		ret_val = hw->mac.ops.check_for_link(hw);
++		link_active = adapter->hw.mac.serdes_has_link;
++		break;
++	default:
++	case e1000_media_type_unknown:
++		break;
++	}
++
++	if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
++	    (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
++		/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
++		e_info("Gigabit has been disabled, downgrading speed\n");
++	}
++
++	return link_active;
++}
++
++static void e1000e_enable_receives(struct e1000_adapter *adapter)
++{
++	/* make sure the receive unit is started */
++	if ((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
++	    (adapter->flags & FLAG_RX_RESTART_NOW)) {
++		struct e1000_hw *hw = &adapter->hw;
++		u32 rctl = er32(RCTL);
++		ew32(RCTL, rctl | E1000_RCTL_EN);
++		adapter->flags &= ~FLAG_RX_RESTART_NOW;
++	}
++}
++
++static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++
++	/*
++	 * With 82574 controllers, PHY needs to be checked periodically
++	 * for hung state and reset, if two calls return true
++	 */
++	if (e1000_check_phy_82574(hw))
++		adapter->phy_hang_count++;
++	else
++		adapter->phy_hang_count = 0;
++
++	if (adapter->phy_hang_count > 1) {
++		adapter->phy_hang_count = 0;
++		rtdm_schedule_nrt_work(&adapter->reset_task);
++	}
++}
++
++/**
++ * e1000_watchdog - Timer Call-back
++ * @data: pointer to adapter cast into an unsigned long
++ **/
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++static void e1000_watchdog(struct timer_list *t)
++{
++	struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
++#else /* < 4.14 */
++static void e1000_watchdog(unsigned long data)
++{
++	struct e1000_adapter *adapter = (struct e1000_adapter *) data;
++#endif /* < 4.14 */
++
++	/* Do the rest outside of interrupt context */
++	rtdm_schedule_nrt_work(&adapter->watchdog_task);
++
++	/* TODO: make this use queue_delayed_work() */
++}
++
++static void e1000_watchdog_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter = container_of(work,
++					struct e1000_adapter, watchdog_task);
++	struct rtnet_device *netdev = adapter->netdev;
++	struct e1000_mac_info *mac = &adapter->hw.mac;
++	struct e1000_phy_info *phy = &adapter->hw.phy;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 link, tctl;
++
++	if (test_bit(__E1000_DOWN, &adapter->state))
++		return;
++
++	link = e1000e_has_link(adapter);
++	if ((rtnetif_carrier_ok(netdev)) && link) {
++		e1000e_enable_receives(adapter);
++		goto link_up;
++	}
++
++	if ((e1000e_enable_tx_pkt_filtering(hw)) &&
++	    (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id))
++		e1000_update_mng_vlan(adapter);
++
++	if (link) {
++		if (!rtnetif_carrier_ok(netdev)) {
++			bool txb2b = 1;
++
++			/* update snapshot of PHY registers on LSC */
++			e1000_phy_read_status(adapter);
++			mac->ops.get_link_up_info(&adapter->hw,
++						   &adapter->link_speed,
++						   &adapter->link_duplex);
++			e1000_print_link_info(adapter);
++			/*
++			 * On supported PHYs, check for duplex mismatch only
++			 * if link has autonegotiated at 10/100 half
++			 */
++			if ((hw->phy.type == e1000_phy_igp_3 ||
++			     hw->phy.type == e1000_phy_bm) &&
++			    (hw->mac.autoneg == true) &&
++			    (adapter->link_speed == SPEED_10 ||
++			     adapter->link_speed == SPEED_100) &&
++			    (adapter->link_duplex == HALF_DUPLEX)) {
++				u16 autoneg_exp;
++
++				e1e_rphy(hw, PHY_AUTONEG_EXP, &autoneg_exp);
++
++				if (!(autoneg_exp & NWAY_ER_LP_NWAY_CAPS))
++					e_info("Autonegotiated half duplex but"
++					       " link partner cannot autoneg. "
++					       " Try forcing full duplex if "
++					       "link gets many collisions.\n");
++			}
++
++			/* adjust timeout factor according to speed/duplex */
++			adapter->tx_timeout_factor = 1;
++			switch (adapter->link_speed) {
++			case SPEED_10:
++				txb2b = 0;
++				adapter->tx_timeout_factor = 16;
++				break;
++			case SPEED_100:
++				txb2b = 0;
++				adapter->tx_timeout_factor = 10;
++				break;
++			}
++
++			/*
++			 * workaround: re-program speed mode bit after
++			 * link-up event
++			 */
++			if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) &&
++			    !txb2b) {
++				u32 tarc0;
++				tarc0 = er32(TARC(0));
++				tarc0 &= ~SPEED_MODE_BIT;
++				ew32(TARC(0), tarc0);
++			}
++
++			/*
++			 * disable TSO for pcie and 10/100 speeds, to avoid
++			 * some hardware issues
++			 */
++			if (!(adapter->flags & FLAG_TSO_FORCE)) {
++				switch (adapter->link_speed) {
++				case SPEED_10:
++				case SPEED_100:
++					e_info("10/100 speed: disabling TSO\n");
++					netdev->features &= ~NETIF_F_TSO;
++					netdev->features &= ~NETIF_F_TSO6;
++					break;
++				case SPEED_1000:
++					netdev->features |= NETIF_F_TSO;
++					netdev->features |= NETIF_F_TSO6;
++					break;
++				default:
++					/* oops */
++					break;
++				}
++			}
++
++			/*
++			 * enable transmits in the hardware, need to do this
++			 * after setting TARC(0)
++			 */
++			tctl = er32(TCTL);
++			tctl |= E1000_TCTL_EN;
++			ew32(TCTL, tctl);
++
++			/*
++			 * Perform any post-link-up configuration before
++			 * reporting link up.
++			 */
++			if (phy->ops.cfg_on_link_up)
++				phy->ops.cfg_on_link_up(hw);
++
++			rtnetif_carrier_on(netdev);
++
++			if (!test_bit(__E1000_DOWN, &adapter->state))
++				mod_timer(&adapter->phy_info_timer,
++					  round_jiffies(jiffies + 2 * HZ));
++		}
++	} else {
++		if (rtnetif_carrier_ok(netdev)) {
++			adapter->link_speed = 0;
++			adapter->link_duplex = 0;
++			/* Link status message must follow this format */
++			printk(KERN_INFO "e1000e: %s NIC Link is Down\n",
++			       adapter->netdev->name);
++			rtnetif_carrier_off(netdev);
++			if (!test_bit(__E1000_DOWN, &adapter->state))
++				mod_timer(&adapter->phy_info_timer,
++					  round_jiffies(jiffies + 2 * HZ));
++
++			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
++				rtdm_schedule_nrt_work(&adapter->reset_task);
++		}
++	}
++
++link_up:
++	spin_lock(&adapter->stats64_lock);
++
++	mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
++	adapter->tpt_old = adapter->stats.tpt;
++	mac->collision_delta = adapter->stats.colc - adapter->colc_old;
++	adapter->colc_old = adapter->stats.colc;
++
++	adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
++	adapter->gorc_old = adapter->stats.gorc;
++	adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
++	adapter->gotc_old = adapter->stats.gotc;
++	spin_unlock(&adapter->stats64_lock);
++
++	e1000e_update_adaptive(&adapter->hw);
++
++	if (!rtnetif_carrier_ok(netdev) &&
++	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) {
++		/*
++		 * We've lost link, so the controller stops DMA,
++		 * but we've got queued Tx work that's never going
++		 * to get done, so reset controller to flush Tx.
++		 * (Do the reset outside of interrupt context).
++		 */
++		rtdm_schedule_nrt_work(&adapter->reset_task);
++		/* return immediately since reset is imminent */
++		return;
++	}
++
++	/* Simple mode for Interrupt Throttle Rate (ITR) */
++	if (adapter->itr_setting == 4) {
++		/*
++		 * Symmetric Tx/Rx gets a reduced ITR=2000;
++		 * Total asymmetrical Tx or Rx gets ITR=8000;
++		 * everyone else is between 2000-8000.
++		 */
++		u32 goc = (adapter->gotc + adapter->gorc) / 10000;
++		u32 dif = (adapter->gotc > adapter->gorc ?
++			    adapter->gotc - adapter->gorc :
++			    adapter->gorc - adapter->gotc) / 10000;
++		u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000;
++
++		ew32(ITR, 1000000000 / (itr * 256));
++	}
++
++	/* Cause software interrupt to ensure Rx ring is cleaned */
++	if (adapter->msix_entries)
++		ew32(ICS, adapter->rx_ring->ims_val);
++	else
++		ew32(ICS, E1000_ICS_RXDMT0);
++
++	/* flush pending descriptors to memory before detecting Tx hang */
++	e1000e_flush_descriptors(adapter);
++
++	/* Force detection of hung controller every watchdog period */
++	adapter->detect_tx_hung = 1;
++
++	/*
++	 * With 82571 controllers, LAA may be overwritten due to controller
++	 * reset from the other port. Set the appropriate LAA in RAR[0]
++	 */
++	if (e1000e_get_laa_state_82571(hw))
++		e1000e_rar_set(hw, adapter->hw.mac.addr, 0);
++
++	if (adapter->flags2 & FLAG2_CHECK_PHY_HANG)
++		e1000e_check_82574_phy_workaround(adapter);
++
++	/* Reset the timer */
++	if (!test_bit(__E1000_DOWN, &adapter->state))
++		mod_timer(&adapter->watchdog_timer,
++			  round_jiffies(jiffies + 2 * HZ));
++}
++
++#define E1000_TX_FLAGS_CSUM		0x00000001
++#define E1000_TX_FLAGS_VLAN		0x00000002
++#define E1000_TX_FLAGS_TSO		0x00000004
++#define E1000_TX_FLAGS_IPV4		0x00000008
++#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
++#define E1000_TX_FLAGS_VLAN_SHIFT	16
++
++#define E1000_MAX_PER_TXD	8192
++#define E1000_MAX_TXD_PWR	12
++
++static int e1000_tx_map(struct e1000_adapter *adapter,
++			struct rtskb *skb, unsigned int first)
++{
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_buffer *buffer_info;
++	unsigned int offset = 0, size, i;
++
++	i = tx_ring->next_to_use;
++
++	buffer_info = &tx_ring->buffer_info[i];
++	size = skb->len;
++
++	buffer_info->length = size;
++	buffer_info->time_stamp = jiffies;
++	buffer_info->next_to_watch = i;
++	buffer_info->dma = rtskb_data_dma_addr(skb, offset);
++	buffer_info->mapped_as_page = false;
++
++	tx_ring->buffer_info[i].skb = skb;
++	tx_ring->buffer_info[i].segs = 1;
++	tx_ring->buffer_info[i].bytecount = size;
++	tx_ring->buffer_info[first].next_to_watch = i;
++
++	return 1;
++}
++
++static void e1000_tx_queue(struct e1000_adapter *adapter,
++			   int tx_flags, int count)
++{
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	struct e1000_tx_desc *tx_desc = NULL;
++	struct e1000_buffer *buffer_info;
++	u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
++	unsigned int i;
++
++	if (tx_flags & E1000_TX_FLAGS_CSUM) {
++		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
++		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
++	}
++
++	if (tx_flags & E1000_TX_FLAGS_VLAN) {
++		txd_lower |= E1000_TXD_CMD_VLE;
++		txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
++	}
++
++	i = tx_ring->next_to_use;
++
++	do {
++		buffer_info = &tx_ring->buffer_info[i];
++		tx_desc = E1000_TX_DESC(*tx_ring, i);
++		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++		tx_desc->lower.data =
++			cpu_to_le32(txd_lower | buffer_info->length);
++		tx_desc->upper.data = cpu_to_le32(txd_upper);
++
++		i++;
++		if (i == tx_ring->count)
++			i = 0;
++	} while (--count > 0);
++
++	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
++
++	/*
++	 * Force memory writes to complete before letting h/w
++	 * know there are new descriptors to fetch.  (Only
++	 * applicable for weak-ordered memory model archs,
++	 * such as IA-64).
++	 */
++	wmb();
++
++	tx_ring->next_to_use = i;
++
++	if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
++		e1000e_update_tdt_wa(adapter, i);
++	else
++		writel(i, adapter->hw.hw_addr + tx_ring->tail);
++
++	/*
++	 * we need this if more than one processor can write to our tail
++	 * at a time, it synchronizes IO on IA64/Altix systems
++	 */
++	mmiowb();
++}
++
++#define MINIMUM_DHCP_PACKET_SIZE 282
++static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
++				    struct rtskb *skb)
++{
++	struct e1000_hw *hw =  &adapter->hw;
++	u16 length, offset;
++
++	if (skb->len <= MINIMUM_DHCP_PACKET_SIZE)
++		return 0;
++
++	if (((struct ethhdr *) skb->data)->h_proto != htons(ETH_P_IP))
++		return 0;
++
++	{
++		const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data+14);
++		struct udphdr *udp;
++
++		if (ip->protocol != IPPROTO_UDP)
++			return 0;
++
++		udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2));
++		if (ntohs(udp->dest) != 67)
++			return 0;
++
++		offset = (u8 *)udp + 8 - skb->data;
++		length = skb->len - offset;
++		return e1000e_mng_write_dhcp_info(hw, (u8 *)udp + 8, length);
++	}
++
++	return 0;
++}
++
++#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
++static int e1000_xmit_frame(struct rtskb *skb, struct rtnet_device *netdev)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_ring *tx_ring = adapter->tx_ring;
++	rtdm_lockctx_t context;
++	unsigned int first;
++	unsigned int tx_flags = 0;
++	int count = 0;
++
++	if (test_bit(__E1000_DOWN, &adapter->state)) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	if (skb->len <= 0) {
++		kfree_rtskb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	count++;
++
++	count += skb->len;
++
++	if (adapter->hw.mac.tx_pkt_filtering)
++		e1000_transfer_dhcp_info(adapter, skb);
++
++	rtdm_lock_get_irqsave(&tx_ring->lock, context);
++
++	first = tx_ring->next_to_use;
++
++	if (skb->xmit_stamp)
++		*skb->xmit_stamp =
++			cpu_to_be64(rtdm_clock_read() + *skb->xmit_stamp);
++
++	/* if count is 0 then mapping error has occurred */
++	count = e1000_tx_map(adapter, skb, first);
++	if (count) {
++		e1000_tx_queue(adapter, tx_flags, count);
++		rtdm_lock_put_irqrestore(&tx_ring->lock, context);
++	} else {
++		tx_ring->buffer_info[first].time_stamp = 0;
++		tx_ring->next_to_use = first;
++		rtdm_lock_put_irqrestore(&tx_ring->lock, context);
++		kfree_rtskb(skb);
++	}
++
++	return NETDEV_TX_OK;
++}
++
++static void e1000_reset_task(struct work_struct *work)
++{
++	struct e1000_adapter *adapter;
++	adapter = container_of(work, struct e1000_adapter, reset_task);
++
++	/* don't run the task if already down */
++	if (test_bit(__E1000_DOWN, &adapter->state))
++		return;
++
++	if (!((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
++	      (adapter->flags & FLAG_RX_RESTART_NOW))) {
++		e1000e_dump(adapter);
++		e_err("Reset adapter\n");
++	}
++	e1000e_reinit_locked(adapter);
++}
++
++static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	u32 i, mac_reg;
++	u16 phy_reg, wuc_enable;
++	int retval = 0;
++
++	/* copy MAC RARs to PHY RARs */
++	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
++
++	retval = hw->phy.ops.acquire(hw);
++	if (retval) {
++		e_err("Could not acquire PHY\n");
++		return retval;
++	}
++
++	/* Enable access to wakeup registers on and set page to BM_WUC_PAGE */
++	retval = e1000_enable_phy_wakeup_reg_access_bm(hw, &wuc_enable);
++	if (retval)
++		goto out;
++
++	/* copy MAC MTA to PHY MTA - only needed for pchlan */
++	for (i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
++		mac_reg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
++		hw->phy.ops.write_reg_page(hw, BM_MTA(i),
++					   (u16)(mac_reg & 0xFFFF));
++		hw->phy.ops.write_reg_page(hw, BM_MTA(i) + 1,
++					   (u16)((mac_reg >> 16) & 0xFFFF));
++	}
++
++	/* configure PHY Rx Control register */
++	hw->phy.ops.read_reg_page(&adapter->hw, BM_RCTL, &phy_reg);
++	mac_reg = er32(RCTL);
++	if (mac_reg & E1000_RCTL_UPE)
++		phy_reg |= BM_RCTL_UPE;
++	if (mac_reg & E1000_RCTL_MPE)
++		phy_reg |= BM_RCTL_MPE;
++	phy_reg &= ~(BM_RCTL_MO_MASK);
++	if (mac_reg & E1000_RCTL_MO_3)
++		phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
++				<< BM_RCTL_MO_SHIFT);
++	if (mac_reg & E1000_RCTL_BAM)
++		phy_reg |= BM_RCTL_BAM;
++	if (mac_reg & E1000_RCTL_PMCF)
++		phy_reg |= BM_RCTL_PMCF;
++	mac_reg = er32(CTRL);
++	if (mac_reg & E1000_CTRL_RFCE)
++		phy_reg |= BM_RCTL_RFCE;
++	hw->phy.ops.write_reg_page(&adapter->hw, BM_RCTL, phy_reg);
++
++	/* enable PHY wakeup in MAC register */
++	ew32(WUFC, wufc);
++	ew32(WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
++
++	/* configure and enable PHY wakeup in PHY registers */
++	hw->phy.ops.write_reg_page(&adapter->hw, BM_WUFC, wufc);
++	hw->phy.ops.write_reg_page(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
++
++	/* activate PHY wakeup */
++	wuc_enable |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
++	retval = e1000_disable_phy_wakeup_reg_access_bm(hw, &wuc_enable);
++	if (retval)
++		e_err("Could not set PHY Host Wakeup bit\n");
++out:
++	hw->phy.ops.release(hw);
++
++	return retval;
++}
++
++static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake,
++			    bool runtime)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u32 ctrl, ctrl_ext, rctl, status;
++	/* Runtime suspend should only enable wakeup for link changes */
++	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
++	int retval = 0;
++
++	rtnetif_device_detach(netdev);
++
++	if (rtnetif_running(netdev)) {
++		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
++		e1000e_down(adapter);
++		e1000_free_irq(adapter);
++	}
++	e1000e_reset_interrupt_capability(adapter);
++
++	retval = pci_save_state(pdev);
++	if (retval)
++		return retval;
++
++	status = er32(STATUS);
++	if (status & E1000_STATUS_LU)
++		wufc &= ~E1000_WUFC_LNKC;
++
++	if (wufc) {
++		e1000_setup_rctl(adapter);
++		e1000_set_multi(netdev);
++
++		/* turn on all-multi mode if wake on multicast is enabled */
++		if (wufc & E1000_WUFC_MC) {
++			rctl = er32(RCTL);
++			rctl |= E1000_RCTL_MPE;
++			ew32(RCTL, rctl);
++		}
++
++		ctrl = er32(CTRL);
++		/* advertise wake from D3Cold */
++		#define E1000_CTRL_ADVD3WUC 0x00100000
++		/* phy power management enable */
++		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
++		ctrl |= E1000_CTRL_ADVD3WUC;
++		if (!(adapter->flags2 & FLAG2_HAS_PHY_WAKEUP))
++			ctrl |= E1000_CTRL_EN_PHY_PWR_MGMT;
++		ew32(CTRL, ctrl);
++
++		if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
++		    adapter->hw.phy.media_type ==
++		    e1000_media_type_internal_serdes) {
++			/* keep the laser running in D3 */
++			ctrl_ext = er32(CTRL_EXT);
++			ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
++			ew32(CTRL_EXT, ctrl_ext);
++		}
++
++		if (adapter->flags & FLAG_IS_ICH)
++			e1000_suspend_workarounds_ich8lan(&adapter->hw);
++
++		/* Allow time for pending master requests to run */
++		e1000e_disable_pcie_master(&adapter->hw);
++
++		if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
++			/* enable wakeup by the PHY */
++			retval = e1000_init_phy_wakeup(adapter, wufc);
++			if (retval)
++				return retval;
++		} else {
++			/* enable wakeup by the MAC */
++			ew32(WUFC, wufc);
++			ew32(WUC, E1000_WUC_PME_EN);
++		}
++	} else {
++		ew32(WUC, 0);
++		ew32(WUFC, 0);
++	}
++
++	*enable_wake = !!wufc;
++
++	/* make sure adapter isn't asleep if manageability is enabled */
++	if ((adapter->flags & FLAG_MNG_PT_ENABLED) ||
++	    (hw->mac.ops.check_mng_mode(hw)))
++		*enable_wake = true;
++
++	if (adapter->hw.phy.type == e1000_phy_igp_3)
++		e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
++
++	/*
++	 * Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant.
++	 */
++	e1000e_release_hw_control(adapter);
++
++	pci_disable_device(pdev);
++
++	return 0;
++}
++
++static void e1000_power_off(struct pci_dev *pdev, bool sleep, bool wake)
++{
++	if (sleep && wake) {
++		pci_prepare_to_sleep(pdev);
++		return;
++	}
++
++	pci_wake_from_d3(pdev, wake);
++	pci_set_power_state(pdev, PCI_D3hot);
++}
++
++static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep,
++				    bool wake)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	/*
++	 * The pci-e switch on some quad port adapters will report a
++	 * correctable error when the MAC transitions from D0 to D3.  To
++	 * prevent this we need to mask off the correctable errors on the
++	 * downstream port of the pci-e switch.
++	 */
++	if (adapter->flags & FLAG_IS_QUAD_PORT) {
++		struct pci_dev *us_dev = pdev->bus->self;
++		int pos = pci_pcie_cap(us_dev);
++		u16 devctl;
++
++		pci_read_config_word(us_dev, pos + PCI_EXP_DEVCTL, &devctl);
++		pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL,
++				      (devctl & ~PCI_EXP_DEVCTL_CERE));
++
++		e1000_power_off(pdev, sleep, wake);
++
++		pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, devctl);
++	} else {
++		e1000_power_off(pdev, sleep, wake);
++	}
++}
++
++static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
++{
++	int pos;
++	u16 reg16;
++
++	/*
++	 * Both device and parent should have the same ASPM setting.
++	 * Disable ASPM in downstream component first and then upstream.
++	 */
++	pos = pci_pcie_cap(pdev);
++	pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
++	reg16 &= ~state;
++	pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16);
++
++	if (!pdev->bus->self)
++		return;
++
++	pos = pci_pcie_cap(pdev->bus->self);
++	pci_read_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, &reg16);
++	reg16 &= ~state;
++	pci_write_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, reg16);
++}
++
++static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
++{
++	dev_info(&pdev->dev, "Disabling ASPM %s %s\n",
++		 (state & PCIE_LINK_STATE_L0S) ? "L0s" : "",
++		 (state & PCIE_LINK_STATE_L1) ? "L1" : "");
++
++	__e1000e_disable_aspm(pdev, state);
++}
++
++static void e1000_shutdown(struct pci_dev *pdev)
++{
++	bool wake = false;
++
++	__e1000_shutdown(pdev, &wake, false);
++
++	if (system_state == SYSTEM_POWER_OFF)
++		e1000_complete_shutdown(pdev, false, wake);
++}
++
++/**
++ * e1000_io_error_detected - called when PCI error is detected
++ * @pdev: Pointer to PCI device
++ * @state: The current pci connection state
++ *
++ * This function is called after a PCI bus error affecting
++ * this device has been detected.
++ */
++static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
++						pci_channel_state_t state)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	rtnetif_device_detach(netdev);
++
++	if (state == pci_channel_io_perm_failure)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	if (rtnetif_running(netdev))
++		e1000e_down(adapter);
++	pci_disable_device(pdev);
++
++	/* Request a slot slot reset. */
++	return PCI_ERS_RESULT_NEED_RESET;
++}
++
++/**
++ * e1000_io_slot_reset - called after the pci bus has been reset.
++ * @pdev: Pointer to PCI device
++ *
++ * Restart the card from scratch, as if from a cold-boot. Implementation
++ * resembles the first-half of the e1000_resume routine.
++ */
++static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++	struct e1000_hw *hw = &adapter->hw;
++	u16 aspm_disable_flag = 0;
++	int err;
++	pci_ers_result_t result;
++
++	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L0S)
++		aspm_disable_flag = PCIE_LINK_STATE_L0S;
++	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
++		aspm_disable_flag |= PCIE_LINK_STATE_L1;
++	if (aspm_disable_flag)
++		e1000e_disable_aspm(pdev, aspm_disable_flag);
++
++	err = pci_enable_device_mem(pdev);
++	if (err) {
++		dev_err(&pdev->dev,
++			"Cannot re-enable PCI device after reset.\n");
++		result = PCI_ERS_RESULT_DISCONNECT;
++	} else {
++		pci_set_master(pdev);
++		pdev->state_saved = true;
++		pci_restore_state(pdev);
++
++		pci_enable_wake(pdev, PCI_D3hot, 0);
++		pci_enable_wake(pdev, PCI_D3cold, 0);
++
++		e1000e_reset(adapter);
++		ew32(WUS, ~0);
++		result = PCI_ERS_RESULT_RECOVERED;
++	}
++
++	pci_cleanup_aer_uncorrect_error_status(pdev);
++
++	return result;
++}
++
++/**
++ * e1000_io_resume - called when traffic can start flowing again.
++ * @pdev: Pointer to PCI device
++ *
++ * This callback is called when the error recovery driver tells us that
++ * its OK to resume normal operation. Implementation resembles the
++ * second-half of the e1000_resume routine.
++ */
++static void e1000_io_resume(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++
++	e1000_init_manageability_pt(adapter);
++
++	if (rtnetif_running(netdev)) {
++		if (e1000e_up(adapter)) {
++			dev_err(&pdev->dev,
++				"can't bring device back up after reset\n");
++			return;
++		}
++	}
++
++	rtnetif_device_attach(netdev);
++
++	/*
++	 * If the controller has AMT, do not set DRV_LOAD until the interface
++	 * is up.  For all other cases, let the f/w know that the h/w is now
++	 * under the control of the driver.
++	 */
++	if (!(adapter->flags & FLAG_HAS_AMT))
++		e1000e_get_hw_control(adapter);
++
++}
++
++static void e1000_print_device_info(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	struct rtnet_device *netdev = adapter->netdev;
++	u32 ret_val;
++	u8 pba_str[E1000_PBANUM_LENGTH];
++
++	/* print bus type/speed/width info */
++	e_info("(PCI Express:2.5GT/s:%s) %pM\n",
++	       /* bus width */
++	       ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
++		"Width x1"),
++	       /* MAC address */
++	       netdev->dev_addr);
++	e_info("Intel(R) PRO/%s Network Connection\n",
++	       (hw->phy.type == e1000_phy_ife) ? "10/100" : "1000");
++	ret_val = e1000_read_pba_string_generic(hw, pba_str,
++						E1000_PBANUM_LENGTH);
++	if (ret_val)
++		strncpy((char *)pba_str, "Unknown", sizeof(pba_str) - 1);
++	e_info("MAC: %d, PHY: %d, PBA No: %s\n",
++	       hw->mac.type, hw->phy.type, pba_str);
++}
++
++static void e1000_eeprom_checks(struct e1000_adapter *adapter)
++{
++	struct e1000_hw *hw = &adapter->hw;
++	int ret_val;
++	u16 buf = 0;
++
++	if (hw->mac.type != e1000_82573)
++		return;
++
++	ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &buf);
++	if (!ret_val && (!(le16_to_cpu(buf) & (1 << 0)))) {
++		/* Deep Smart Power Down (DSPD) */
++		dev_warn(&adapter->pdev->dev,
++			 "Warning: detected DSPD enabled in EEPROM\n");
++	}
++}
++
++static dma_addr_t e1000_map_rtskb(struct rtnet_device *netdev,
++				  struct rtskb *skb)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct device *dev = &adapter->pdev->dev;
++	dma_addr_t addr;
++
++	addr = dma_map_single(dev, skb->buf_start, RTSKB_SIZE,
++			      DMA_BIDIRECTIONAL);
++	if (dma_mapping_error(dev, addr)) {
++		dev_err(dev, "DMA map failed\n");
++		return RTSKB_UNMAPPED;
++	}
++	return addr;
++}
++
++static void e1000_unmap_rtskb(struct rtnet_device *netdev,
++			      struct rtskb *skb)
++{
++	struct e1000_adapter *adapter = netdev->priv;
++	struct device *dev = &adapter->pdev->dev;
++
++	dma_unmap_single(dev, skb->buf_dma_addr, RTSKB_SIZE,
++			 DMA_BIDIRECTIONAL);
++}
++
++/**
++ * e1000_probe - Device Initialization Routine
++ * @pdev: PCI device information struct
++ * @ent: entry in e1000_pci_tbl
++ *
++ * Returns 0 on success, negative on failure
++ *
++ * e1000_probe initializes an adapter identified by a pci_dev structure.
++ * The OS initialization, configuring of the adapter private structure,
++ * and a hardware reset occur.
++ **/
++static int e1000_probe(struct pci_dev *pdev,
++				 const struct pci_device_id *ent)
++{
++	struct rtnet_device *netdev;
++	struct e1000_adapter *adapter;
++	struct e1000_hw *hw;
++	const struct e1000_info *ei = e1000_info_tbl[ent->driver_data];
++	resource_size_t mmio_start, mmio_len;
++	resource_size_t flash_start, flash_len;
++
++	static int cards_found;
++	u16 aspm_disable_flag = 0;
++	int i, err, pci_using_dac;
++	u16 eeprom_data = 0;
++	u16 eeprom_apme_mask = E1000_EEPROM_APME;
++
++	if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S)
++		aspm_disable_flag = PCIE_LINK_STATE_L0S;
++	if (ei->flags2 & FLAG2_DISABLE_ASPM_L1)
++		aspm_disable_flag |= PCIE_LINK_STATE_L1;
++	if (aspm_disable_flag)
++		e1000e_disable_aspm(pdev, aspm_disable_flag);
++
++	err = pci_enable_device_mem(pdev);
++	if (err)
++		return err;
++
++	pci_using_dac = 0;
++	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
++	if (!err) {
++		err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
++		if (!err)
++			pci_using_dac = 1;
++	} else {
++		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
++		if (err) {
++			err = dma_set_coherent_mask(&pdev->dev,
++						    DMA_BIT_MASK(32));
++			if (err) {
++				dev_err(&pdev->dev, "No usable DMA "
++					"configuration, aborting\n");
++				goto err_dma;
++			}
++		}
++	}
++
++	err = pci_request_selected_regions_exclusive(pdev,
++					  pci_select_bars(pdev, IORESOURCE_MEM),
++					  e1000e_driver_name);
++	if (err)
++		goto err_pci_reg;
++
++	/* AER (Advanced Error Reporting) hooks */
++	pci_enable_pcie_error_reporting(pdev);
++
++	pci_set_master(pdev);
++	/* PCI config space info */
++	err = pci_save_state(pdev);
++	if (err)
++		goto err_alloc_etherdev;
++
++	err = -ENOMEM;
++	netdev = rt_alloc_etherdev(sizeof(*adapter),
++				2 * RT_E1000E_NUM_RXD + 256);
++	if (!netdev)
++		goto err_alloc_etherdev;
++
++	rtdev_alloc_name(netdev, "rteth%d");
++	rt_rtdev_connect(netdev, &RTDEV_manager);
++	netdev->vers = RTDEV_VERS_2_0;
++	netdev->sysbind = &pdev->dev;
++
++	netdev->irq = pdev->irq;
++
++	pci_set_drvdata(pdev, netdev);
++	adapter = netdev->priv;
++	hw = &adapter->hw;
++	adapter->netdev = netdev;
++	adapter->pdev = pdev;
++	adapter->ei = ei;
++	adapter->pba = ei->pba;
++	adapter->flags = ei->flags;
++	adapter->flags2 = ei->flags2;
++	adapter->hw.adapter = adapter;
++	adapter->hw.mac.type = ei->mac;
++	adapter->max_hw_frame_size = ei->max_hw_frame_size;
++	adapter->msg_enable = (1 << NETIF_MSG_DRV | NETIF_MSG_PROBE) - 1;
++
++	mmio_start = pci_resource_start(pdev, 0);
++	mmio_len = pci_resource_len(pdev, 0);
++
++	err = -EIO;
++	adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
++	if (!adapter->hw.hw_addr)
++		goto err_ioremap;
++
++	if ((adapter->flags & FLAG_HAS_FLASH) &&
++	    (pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
++		flash_start = pci_resource_start(pdev, 1);
++		flash_len = pci_resource_len(pdev, 1);
++		adapter->hw.flash_address = ioremap(flash_start, flash_len);
++		if (!adapter->hw.flash_address)
++			goto err_flashmap;
++	}
++
++	/* construct the net_device struct */
++	netdev->open = e1000_open;
++	netdev->stop = e1000_close;
++	netdev->hard_start_xmit = e1000_xmit_frame;
++	//netdev->get_stats = e1000_get_stats;
++	netdev->map_rtskb = e1000_map_rtskb;
++	netdev->unmap_rtskb = e1000_unmap_rtskb;
++	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
++
++	netdev->mem_start = mmio_start;
++	netdev->mem_end = mmio_start + mmio_len;
++
++	adapter->bd_number = cards_found++;
++
++	e1000e_check_options(adapter);
++
++	/* setup adapter struct */
++	err = e1000_sw_init(adapter);
++	if (err)
++		goto err_sw_init;
++
++	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
++	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
++	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
++
++	err = ei->get_variants(adapter);
++	if (err)
++		goto err_hw_init;
++
++	if ((adapter->flags & FLAG_IS_ICH) &&
++	    (adapter->flags & FLAG_READ_ONLY_NVM))
++		e1000e_write_protect_nvm_ich8lan(&adapter->hw);
++
++	hw->mac.ops.get_bus_info(&adapter->hw);
++
++	adapter->hw.phy.autoneg_wait_to_complete = 0;
++
++	/* Copper options */
++	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
++		adapter->hw.phy.mdix = AUTO_ALL_MODES;
++		adapter->hw.phy.disable_polarity_correction = 0;
++		adapter->hw.phy.ms_type = e1000_ms_hw_default;
++	}
++
++	if (e1000_check_reset_block(&adapter->hw))
++		e_info("PHY reset is blocked due to SOL/IDER session.\n");
++
++	/* Set initial default active device features */
++	netdev->features = (NETIF_F_SG |
++			    NETIF_F_HW_VLAN_CTAG_RX |
++			    NETIF_F_HW_VLAN_CTAG_TX |
++			    NETIF_F_TSO |
++			    NETIF_F_TSO6 |
++			    NETIF_F_RXCSUM |
++			    NETIF_F_HW_CSUM);
++
++	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER)
++		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
++
++	if (pci_using_dac) {
++		netdev->features |= NETIF_F_HIGHDMA;
++	}
++
++	if (e1000e_enable_mng_pass_thru(&adapter->hw))
++		adapter->flags |= FLAG_MNG_PT_ENABLED;
++
++	/*
++	 * before reading the NVM, reset the controller to
++	 * put the device in a known good starting state
++	 */
++	adapter->hw.mac.ops.reset_hw(&adapter->hw);
++
++	/*
++	 * systems with ASPM and others may see the checksum fail on the first
++	 * attempt. Let's give it a few tries
++	 */
++	for (i = 0;; i++) {
++		if (e1000_validate_nvm_checksum(&adapter->hw) >= 0)
++			break;
++		if (i == 2) {
++			e_err("The NVM Checksum Is Not Valid\n");
++			err = -EIO;
++			goto err_eeprom;
++		}
++	}
++
++	e1000_eeprom_checks(adapter);
++
++	/* copy the MAC address */
++	if (e1000e_read_mac_addr(&adapter->hw))
++		e_err("NVM Read Error while reading MAC address\n");
++
++	memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++	timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
++	timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
++#else /* < 4.14 */
++	init_timer(&adapter->watchdog_timer);
++	adapter->watchdog_timer.function = e1000_watchdog;
++	adapter->watchdog_timer.data = (unsigned long) adapter;
++
++	init_timer(&adapter->phy_info_timer);
++	adapter->phy_info_timer.function = e1000_update_phy_info;
++	adapter->phy_info_timer.data = (unsigned long) adapter;
++#endif /* < 4.14 */
++
++	INIT_WORK(&adapter->reset_task, e1000_reset_task);
++	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
++	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
++	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
++
++	rtdm_nrtsig_init(&adapter->mod_timer_sig, e1000e_mod_watchdog_timer,
++			(void*)&adapter->watchdog_timer);
++	rtdm_nrtsig_init(&adapter->downshift_sig, e1000e_trigger_downshift,
++			&adapter->downshift_task);
++
++	/* Initialize link parameters. User can change them with ethtool */
++	adapter->hw.mac.autoneg = 1;
++	adapter->fc_autoneg = 1;
++	adapter->hw.fc.requested_mode = e1000_fc_default;
++	adapter->hw.fc.current_mode = e1000_fc_default;
++	adapter->hw.phy.autoneg_advertised = 0x2f;
++
++	/* ring size defaults */
++	adapter->rx_ring->count = RT_E1000E_NUM_RXD;
++	adapter->tx_ring->count = 256;
++
++	/*
++	 * Initial Wake on LAN setting - If APM wake is enabled in
++	 * the EEPROM, enable the ACPI Magic Packet filter
++	 */
++	if (adapter->flags & FLAG_APME_IN_WUC) {
++		/* APME bit in EEPROM is mapped to WUC.APME */
++		eeprom_data = er32(WUC);
++		eeprom_apme_mask = E1000_WUC_APME;
++		if ((hw->mac.type > e1000_ich10lan) &&
++		    (eeprom_data & E1000_WUC_PHY_WAKE))
++			adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP;
++	} else if (adapter->flags & FLAG_APME_IN_CTRL3) {
++		if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
++		    (adapter->hw.bus.func == 1))
++			e1000_read_nvm(&adapter->hw,
++				NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
++		else
++			e1000_read_nvm(&adapter->hw,
++				NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
++	}
++
++	/* fetch WoL from EEPROM */
++	if (eeprom_data & eeprom_apme_mask)
++		adapter->eeprom_wol |= E1000_WUFC_MAG;
++
++	/*
++	 * now that we have the eeprom settings, apply the special cases
++	 * where the eeprom may be wrong or the board simply won't support
++	 * wake on lan on a particular port
++	 */
++	if (!(adapter->flags & FLAG_HAS_WOL))
++		adapter->eeprom_wol = 0;
++
++	/* initialize the wol settings based on the eeprom settings */
++	adapter->wol = adapter->eeprom_wol;
++	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
++
++	/* save off EEPROM version number */
++	e1000_read_nvm(&adapter->hw, 5, 1, &adapter->eeprom_vers);
++
++	/* reset the hardware with the new settings */
++	e1000e_reset(adapter);
++
++	/*
++	 * If the controller has AMT, do not set DRV_LOAD until the interface
++	 * is up.  For all other cases, let the f/w know that the h/w is now
++	 * under the control of the driver.
++	 */
++	if (!(adapter->flags & FLAG_HAS_AMT))
++		e1000e_get_hw_control(adapter);
++
++	strncpy(netdev->name, "rteth%d", sizeof(netdev->name) - 1);
++	err = rt_register_rtnetdev(netdev);
++	if (err)
++		goto err_register;
++
++	/* carrier off reporting is important to ethtool even BEFORE open */
++	rtnetif_carrier_off(netdev);
++
++	e1000_print_device_info(adapter);
++
++	if (pci_dev_run_wake(pdev))
++		pm_runtime_put_noidle(&pdev->dev);
++
++	return 0;
++
++err_register:
++	rtdm_nrtsig_destroy(&adapter->downshift_sig);
++	rtdm_nrtsig_destroy(&adapter->mod_timer_sig);
++	if (!(adapter->flags & FLAG_HAS_AMT))
++		e1000e_release_hw_control(adapter);
++err_eeprom:
++	if (!e1000_check_reset_block(&adapter->hw))
++		e1000_phy_hw_reset(&adapter->hw);
++err_hw_init:
++	kfree(adapter->tx_ring);
++	kfree(adapter->rx_ring);
++err_sw_init:
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++	e1000e_reset_interrupt_capability(adapter);
++err_flashmap:
++	iounmap(adapter->hw.hw_addr);
++err_ioremap:
++	rtdev_free(netdev);
++err_alloc_etherdev:
++	pci_release_selected_regions(pdev,
++				     pci_select_bars(pdev, IORESOURCE_MEM));
++err_pci_reg:
++err_dma:
++	pci_disable_device(pdev);
++	return err;
++}
++
++/**
++ * e1000_remove - Device Removal Routine
++ * @pdev: PCI device information struct
++ *
++ * e1000_remove is called by the PCI subsystem to alert the driver
++ * that it should release a PCI device.  The could be caused by a
++ * Hot-Plug event, or because the driver is going to be removed from
++ * memory.
++ **/
++static void e1000_remove(struct pci_dev *pdev)
++{
++	struct rtnet_device *netdev = pci_get_drvdata(pdev);
++	struct e1000_adapter *adapter = netdev->priv;
++	bool down = test_bit(__E1000_DOWN, &adapter->state);
++
++	/*
++	 * The timers may be rescheduled, so explicitly disable them
++	 * from being rescheduled.
++	 */
++	if (!down)
++		set_bit(__E1000_DOWN, &adapter->state);
++	del_timer_sync(&adapter->watchdog_timer);
++	del_timer_sync(&adapter->phy_info_timer);
++
++	rtdm_nrtsig_destroy(&adapter->downshift_sig);
++	rtdm_nrtsig_destroy(&adapter->mod_timer_sig);
++
++	cancel_work_sync(&adapter->reset_task);
++	cancel_work_sync(&adapter->watchdog_task);
++	cancel_work_sync(&adapter->downshift_task);
++	cancel_work_sync(&adapter->update_phy_task);
++
++	if (!(netdev->flags & IFF_UP))
++		e1000_power_down_phy(adapter);
++
++	/* Don't lie to e1000_close() down the road. */
++	if (!down)
++		clear_bit(__E1000_DOWN, &adapter->state);
++	rt_unregister_rtnetdev(netdev);
++
++	if (pci_dev_run_wake(pdev))
++		pm_runtime_get_noresume(&pdev->dev);
++
++	/*
++	 * Release control of h/w to f/w.  If f/w is AMT enabled, this
++	 * would have already happened in close and is redundant.
++	 */
++	e1000e_release_hw_control(adapter);
++
++	e1000e_reset_interrupt_capability(adapter);
++	kfree(adapter->tx_ring);
++	kfree(adapter->rx_ring);
++
++	iounmap(adapter->hw.hw_addr);
++	if (adapter->hw.flash_address)
++		iounmap(adapter->hw.flash_address);
++	pci_release_selected_regions(pdev,
++				     pci_select_bars(pdev, IORESOURCE_MEM));
++
++	rtdev_free(netdev);
++
++	/* AER disable */
++	pci_disable_pcie_error_reporting(pdev);
++
++	pci_disable_device(pdev);
++}
++
++/* PCI Error Recovery (ERS) */
++static struct pci_error_handlers e1000_err_handler = {
++	.error_detected = e1000_io_error_detected,
++	.slot_reset = e1000_io_slot_reset,
++	.resume = e1000_io_resume,
++};
++
++static const struct pci_device_id e1000_pci_tbl[] = {
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD), board_82571 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER), board_82571 },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI), board_82572 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_COPPER), board_82572 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_FIBER), board_82572 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_SERDES), board_82572 },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E), board_82573 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E_IAMT), board_82573 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573L), board_82573 },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82574L), board_82574 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82574LA), board_82574 },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82583V), board_82583 },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_DPT),
++	  board_80003es2lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_SPT),
++	  board_80003es2lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_DPT),
++	  board_80003es2lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_SPT),
++	  board_80003es2lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_G), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_GT), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_AMT), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_C), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M_AMT), board_ich8lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_82567V_3), board_ich8lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_G), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_BM), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LM), board_ich10lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LF), board_ich10lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_V), board_ich10lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LM), board_pchlan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LC), board_pchlan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DM), board_pchlan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DC), board_pchlan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_LM), board_pch2lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_V), board_pch2lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_LM), board_pch_lpt },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_V), board_pch_lpt },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM), board_pch_lpt },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V), board_pch_lpt },
++
++	{ }	/* terminate list */
++};
++MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
++
++/* PCI Device API Driver */
++static struct pci_driver e1000_driver = {
++	.name     = e1000e_driver_name,
++	.id_table = e1000_pci_tbl,
++	.probe    = e1000_probe,
++	.remove   = e1000_remove,
++	.shutdown = e1000_shutdown,
++	.err_handler = &e1000_err_handler
++};
++
++/**
++ * e1000_init_module - Driver Registration Routine
++ *
++ * e1000_init_module is the first routine called when the driver is
++ * loaded. All it does is register with the PCI subsystem.
++ **/
++static int __init e1000_init_module(void)
++{
++	int ret;
++	pr_info("Intel(R) PRO/1000 Network Driver - %s\n",
++		e1000e_driver_version);
++	pr_info("Copyright(c) 1999 - 2011 Intel Corporation.\n");
++	ret = pci_register_driver(&e1000_driver);
++
++	return ret;
++}
++module_init(e1000_init_module);
++
++/**
++ * e1000_exit_module - Driver Exit Cleanup Routine
++ *
++ * e1000_exit_module is called just before the driver is removed
++ * from memory.
++ **/
++static void __exit e1000_exit_module(void)
++{
++	pci_unregister_driver(&e1000_driver);
++}
++module_exit(e1000_exit_module);
++
++
++MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
++MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
++MODULE_LICENSE("GPL");
++MODULE_VERSION(DRV_VERSION);
++
++/* e1000_main.c */
+--- linux/drivers/xenomai/net/drivers/e1000e/hw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/e1000e/hw.h	2021-04-07 16:01:27.170634248 +0800
+@@ -0,0 +1,997 @@
++/*******************************************************************************
++
++  Intel PRO/1000 Linux driver
++  Copyright(c) 1999 - 2011 Intel Corporation.
++
++  This program is free software; you can redistribute it and/or modify it
++  under the terms and conditions of the GNU General Public License,
++  version 2, as published by the Free Software Foundation.
++
++  This program is distributed in the hope it will be useful, but WITHOUT
++  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++  more details.
++
++  You should have received a copy of the GNU General Public License along with
++  this program; if not, write to the Free Software Foundation, Inc.,
++  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++
++  The full GNU General Public License is included in this distribution in
++  the file called "COPYING".
++
++  Contact Information:
++  Linux NICS <linux.nics@intel.com>
++  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
++  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
++
++*******************************************************************************/
++
++#ifndef _E1000_HW_H_
++#define _E1000_HW_H_
++
++#include <linux/types.h>
++
++struct e1000_hw;
++struct e1000_adapter;
++
++#include "defines.h"
++
++#define er32(reg)	__er32(hw, E1000_##reg)
++#define ew32(reg,val)	__ew32(hw, E1000_##reg, (val))
++#define e1e_flush()	er32(STATUS)
++
++#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) \
++	(writel((value), ((a)->hw_addr + reg + ((offset) << 2))))
++
++#define E1000_READ_REG_ARRAY(a, reg, offset) \
++	(readl((a)->hw_addr + reg + ((offset) << 2)))
++
++enum e1e_registers {
++	E1000_CTRL     = 0x00000, /* Device Control - RW */
++	E1000_STATUS   = 0x00008, /* Device Status - RO */
++	E1000_EECD     = 0x00010, /* EEPROM/Flash Control - RW */
++	E1000_EERD     = 0x00014, /* EEPROM Read - RW */
++	E1000_CTRL_EXT = 0x00018, /* Extended Device Control - RW */
++	E1000_FLA      = 0x0001C, /* Flash Access - RW */
++	E1000_MDIC     = 0x00020, /* MDI Control - RW */
++	E1000_SCTL     = 0x00024, /* SerDes Control - RW */
++	E1000_FCAL     = 0x00028, /* Flow Control Address Low - RW */
++	E1000_FCAH     = 0x0002C, /* Flow Control Address High -RW */
++	E1000_FEXTNVM4 = 0x00024, /* Future Extended NVM 4 - RW */
++	E1000_FEXTNVM  = 0x00028, /* Future Extended NVM - RW */
++	E1000_FCT      = 0x00030, /* Flow Control Type - RW */
++	E1000_VET      = 0x00038, /* VLAN Ether Type - RW */
++	E1000_ICR      = 0x000C0, /* Interrupt Cause Read - R/clr */
++	E1000_ITR      = 0x000C4, /* Interrupt Throttling Rate - RW */
++	E1000_ICS      = 0x000C8, /* Interrupt Cause Set - WO */
++	E1000_IMS      = 0x000D0, /* Interrupt Mask Set - RW */
++	E1000_IMC      = 0x000D8, /* Interrupt Mask Clear - WO */
++	E1000_EIAC_82574 = 0x000DC, /* Ext. Interrupt Auto Clear - RW */
++	E1000_IAM      = 0x000E0, /* Interrupt Acknowledge Auto Mask */
++	E1000_IVAR     = 0x000E4, /* Interrupt Vector Allocation - RW */
++	E1000_EITR_82574_BASE = 0x000E8, /* Interrupt Throttling - RW */
++#define E1000_EITR_82574(_n) (E1000_EITR_82574_BASE + (_n << 2))
++	E1000_RCTL     = 0x00100, /* Rx Control - RW */
++	E1000_FCTTV    = 0x00170, /* Flow Control Transmit Timer Value - RW */
++	E1000_TXCW     = 0x00178, /* Tx Configuration Word - RW */
++	E1000_RXCW     = 0x00180, /* Rx Configuration Word - RO */
++	E1000_TCTL     = 0x00400, /* Tx Control - RW */
++	E1000_TCTL_EXT = 0x00404, /* Extended Tx Control - RW */
++	E1000_TIPG     = 0x00410, /* Tx Inter-packet gap -RW */
++	E1000_AIT      = 0x00458, /* Adaptive Interframe Spacing Throttle -RW */
++	E1000_LEDCTL   = 0x00E00, /* LED Control - RW */
++	E1000_EXTCNF_CTRL  = 0x00F00, /* Extended Configuration Control */
++	E1000_EXTCNF_SIZE  = 0x00F08, /* Extended Configuration Size */
++	E1000_PHY_CTRL     = 0x00F10, /* PHY Control Register in CSR */
++#define E1000_POEMB	E1000_PHY_CTRL	/* PHY OEM Bits */
++	E1000_PBA      = 0x01000, /* Packet Buffer Allocation - RW */
++	E1000_PBS      = 0x01008, /* Packet Buffer Size */
++	E1000_EEMNGCTL = 0x01010, /* MNG EEprom Control */
++	E1000_EEWR     = 0x0102C, /* EEPROM Write Register - RW */
++	E1000_FLOP     = 0x0103C, /* FLASH Opcode Register */
++	E1000_PBA_ECC  = 0x01100, /* PBA ECC Register */
++	E1000_ERT      = 0x02008, /* Early Rx Threshold - RW */
++	E1000_FCRTL    = 0x02160, /* Flow Control Receive Threshold Low - RW */
++	E1000_FCRTH    = 0x02168, /* Flow Control Receive Threshold High - RW */
++	E1000_PSRCTL   = 0x02170, /* Packet Split Receive Control - RW */
++	E1000_RDBAL    = 0x02800, /* Rx Descriptor Base Address Low - RW */
++	E1000_RDBAH    = 0x02804, /* Rx Descriptor Base Address High - RW */
++	E1000_RDLEN    = 0x02808, /* Rx Descriptor Length - RW */
++	E1000_RDH      = 0x02810, /* Rx Descriptor Head - RW */
++	E1000_RDT      = 0x02818, /* Rx Descriptor Tail - RW */
++	E1000_RDTR     = 0x02820, /* Rx Delay Timer - RW */
++	E1000_RXDCTL_BASE = 0x02828, /* Rx Descriptor Control - RW */
++#define E1000_RXDCTL(_n)   (E1000_RXDCTL_BASE + (_n << 8))
++	E1000_RADV     = 0x0282C, /* Rx Interrupt Absolute Delay Timer - RW */
++
++/* Convenience macros
++ *
++ * Note: "_n" is the queue number of the register to be written to.
++ *
++ * Example usage:
++ * E1000_RDBAL_REG(current_rx_queue)
++ *
++ */
++#define E1000_RDBAL_REG(_n)   (E1000_RDBAL + (_n << 8))
++	E1000_KABGTXD  = 0x03004, /* AFE Band Gap Transmit Ref Data */
++	E1000_TDBAL    = 0x03800, /* Tx Descriptor Base Address Low - RW */
++	E1000_TDBAH    = 0x03804, /* Tx Descriptor Base Address High - RW */
++	E1000_TDLEN    = 0x03808, /* Tx Descriptor Length - RW */
++	E1000_TDH      = 0x03810, /* Tx Descriptor Head - RW */
++	E1000_TDT      = 0x03818, /* Tx Descriptor Tail - RW */
++	E1000_TIDV     = 0x03820, /* Tx Interrupt Delay Value - RW */
++	E1000_TXDCTL_BASE = 0x03828, /* Tx Descriptor Control - RW */
++#define E1000_TXDCTL(_n)   (E1000_TXDCTL_BASE + (_n << 8))
++	E1000_TADV     = 0x0382C, /* Tx Interrupt Absolute Delay Val - RW */
++	E1000_TARC_BASE = 0x03840, /* Tx Arbitration Count (0) */
++#define E1000_TARC(_n)   (E1000_TARC_BASE + (_n << 8))
++	E1000_CRCERRS  = 0x04000, /* CRC Error Count - R/clr */
++	E1000_ALGNERRC = 0x04004, /* Alignment Error Count - R/clr */
++	E1000_SYMERRS  = 0x04008, /* Symbol Error Count - R/clr */
++	E1000_RXERRC   = 0x0400C, /* Receive Error Count - R/clr */
++	E1000_MPC      = 0x04010, /* Missed Packet Count - R/clr */
++	E1000_SCC      = 0x04014, /* Single Collision Count - R/clr */
++	E1000_ECOL     = 0x04018, /* Excessive Collision Count - R/clr */
++	E1000_MCC      = 0x0401C, /* Multiple Collision Count - R/clr */
++	E1000_LATECOL  = 0x04020, /* Late Collision Count - R/clr */
++	E1000_COLC     = 0x04028, /* Collision Count - R/clr */
++	E1000_DC       = 0x04030, /* Defer Count - R/clr */
++	E1000_TNCRS    = 0x04034, /* Tx-No CRS - R/clr */
++	E1000_SEC      = 0x04038, /* Sequence Error Count - R/clr */
++	E1000_CEXTERR  = 0x0403C, /* Carrier Extension Error Count - R/clr */
++	E1000_RLEC     = 0x04040, /* Receive Length Error Count - R/clr */
++	E1000_XONRXC   = 0x04048, /* XON Rx Count - R/clr */
++	E1000_XONTXC   = 0x0404C, /* XON Tx Count - R/clr */
++	E1000_XOFFRXC  = 0x04050, /* XOFF Rx Count - R/clr */
++	E1000_XOFFTXC  = 0x04054, /* XOFF Tx Count - R/clr */
++	E1000_FCRUC    = 0x04058, /* Flow Control Rx Unsupported Count- R/clr */
++	E1000_PRC64    = 0x0405C, /* Packets Rx (64 bytes) - R/clr */
++	E1000_PRC127   = 0x04060, /* Packets Rx (65-127 bytes) - R/clr */
++	E1000_PRC255   = 0x04064, /* Packets Rx (128-255 bytes) - R/clr */
++	E1000_PRC511   = 0x04068, /* Packets Rx (255-511 bytes) - R/clr */
++	E1000_PRC1023  = 0x0406C, /* Packets Rx (512-1023 bytes) - R/clr */
++	E1000_PRC1522  = 0x04070, /* Packets Rx (1024-1522 bytes) - R/clr */
++	E1000_GPRC     = 0x04074, /* Good Packets Rx Count - R/clr */
++	E1000_BPRC     = 0x04078, /* Broadcast Packets Rx Count - R/clr */
++	E1000_MPRC     = 0x0407C, /* Multicast Packets Rx Count - R/clr */
++	E1000_GPTC     = 0x04080, /* Good Packets Tx Count - R/clr */
++	E1000_GORCL    = 0x04088, /* Good Octets Rx Count Low - R/clr */
++	E1000_GORCH    = 0x0408C, /* Good Octets Rx Count High - R/clr */
++	E1000_GOTCL    = 0x04090, /* Good Octets Tx Count Low - R/clr */
++	E1000_GOTCH    = 0x04094, /* Good Octets Tx Count High - R/clr */
++	E1000_RNBC     = 0x040A0, /* Rx No Buffers Count - R/clr */
++	E1000_RUC      = 0x040A4, /* Rx Undersize Count - R/clr */
++	E1000_RFC      = 0x040A8, /* Rx Fragment Count - R/clr */
++	E1000_ROC      = 0x040AC, /* Rx Oversize Count - R/clr */
++	E1000_RJC      = 0x040B0, /* Rx Jabber Count - R/clr */
++	E1000_MGTPRC   = 0x040B4, /* Management Packets Rx Count - R/clr */
++	E1000_MGTPDC   = 0x040B8, /* Management Packets Dropped Count - R/clr */
++	E1000_MGTPTC   = 0x040BC, /* Management Packets Tx Count - R/clr */
++	E1000_TORL     = 0x040C0, /* Total Octets Rx Low - R/clr */
++	E1000_TORH     = 0x040C4, /* Total Octets Rx High - R/clr */
++	E1000_TOTL     = 0x040C8, /* Total Octets Tx Low - R/clr */
++	E1000_TOTH     = 0x040CC, /* Total Octets Tx High - R/clr */
++	E1000_TPR      = 0x040D0, /* Total Packets Rx - R/clr */
++	E1000_TPT      = 0x040D4, /* Total Packets Tx - R/clr */
++	E1000_PTC64    = 0x040D8, /* Packets Tx (64 bytes) - R/clr */
++	E1000_PTC127   = 0x040DC, /* Packets Tx (65-127 bytes) - R/clr */
++	E1000_PTC255   = 0x040E0, /* Packets Tx (128-255 bytes) - R/clr */
++	E1000_PTC511   = 0x040E4, /* Packets Tx (256-511 bytes) - R/clr */
++	E1000_PTC1023  = 0x040E8, /* Packets Tx (512-1023 bytes) - R/clr */
++	E1000_PTC1522  = 0x040EC, /* Packets Tx (1024-1522 Bytes) - R/clr */
++	E1000_MPTC     = 0x040F0, /* Multicast Packets Tx Count - R/clr */
++	E1000_BPTC     = 0x040F4, /* Broadcast Packets Tx Count - R/clr */
++	E1000_TSCTC    = 0x040F8, /* TCP Segmentation Context Tx - R/clr */
++	E1000_TSCTFC   = 0x040FC, /* TCP Segmentation Context Tx Fail - R/clr */
++	E1000_IAC      = 0x04100, /* Interrupt Assertion Count */
++	E1000_ICRXPTC  = 0x04104, /* Irq Cause Rx Packet Timer Expire Count */
++	E1000_ICRXATC  = 0x04108, /* Irq Cause Rx Abs Timer Expire Count */
++	E1000_ICTXPTC  = 0x0410C, /* Irq Cause Tx Packet Timer Expire Count */
++	E1000_ICTXATC  = 0x04110, /* Irq Cause Tx Abs Timer Expire Count */
++	E1000_ICTXQEC  = 0x04118, /* Irq Cause Tx Queue Empty Count */
++	E1000_ICTXQMTC = 0x0411C, /* Irq Cause Tx Queue MinThreshold Count */
++	E1000_ICRXDMTC = 0x04120, /* Irq Cause Rx Desc MinThreshold Count */
++	E1000_ICRXOC   = 0x04124, /* Irq Cause Receiver Overrun Count */
++	E1000_RXCSUM   = 0x05000, /* Rx Checksum Control - RW */
++	E1000_RFCTL    = 0x05008, /* Receive Filter Control */
++	E1000_MTA      = 0x05200, /* Multicast Table Array - RW Array */
++	E1000_RAL_BASE = 0x05400, /* Receive Address Low - RW */
++#define E1000_RAL(_n)   (E1000_RAL_BASE + ((_n) * 8))
++#define E1000_RA        (E1000_RAL(0))
++	E1000_RAH_BASE = 0x05404, /* Receive Address High - RW */
++#define E1000_RAH(_n)   (E1000_RAH_BASE + ((_n) * 8))
++	E1000_SHRAL_PCH_LPT_BASE = 0x05408,
++#define E1000_SHRAL_PCH_LPT(_n)   (E1000_SHRAL_PCH_LPT_BASE + ((_n) * 8))
++	E1000_SHRAH_PCH_LTP_BASE = 0x0540C,
++#define E1000_SHRAH_PCH_LPT(_n)   (E1000_SHRAH_PCH_LTP_BASE + ((_n) * 8))
++	E1000_VFTA     = 0x05600, /* VLAN Filter Table Array - RW Array */
++	E1000_WUC      = 0x05800, /* Wakeup Control - RW */
++	E1000_WUFC     = 0x05808, /* Wakeup Filter Control - RW */
++	E1000_WUS      = 0x05810, /* Wakeup Status - RO */
++	E1000_MANC     = 0x05820, /* Management Control - RW */
++	E1000_FFLT     = 0x05F00, /* Flexible Filter Length Table - RW Array */
++	E1000_HOST_IF  = 0x08800, /* Host Interface */
++
++	E1000_KMRNCTRLSTA = 0x00034, /* MAC-PHY interface - RW */
++	E1000_MANC2H    = 0x05860, /* Management Control To Host - RW */
++	E1000_MDEF_BASE = 0x05890, /* Management Decision Filters */
++#define E1000_MDEF(_n)   (E1000_MDEF_BASE + ((_n) * 4))
++	E1000_SW_FW_SYNC = 0x05B5C, /* Software-Firmware Synchronization - RW */
++	E1000_GCR	= 0x05B00, /* PCI-Ex Control */
++	E1000_GCR2      = 0x05B64, /* PCI-Ex Control #2 */
++	E1000_FACTPS    = 0x05B30, /* Function Active and Power State to MNG */
++	E1000_SWSM      = 0x05B50, /* SW Semaphore */
++	E1000_FWSM      = 0x05B54, /* FW Semaphore */
++	E1000_SWSM2     = 0x05B58, /* Driver-only SW semaphore */
++	E1000_FFLT_DBG  = 0x05F04, /* Debug Register */
++	E1000_PCH_RAICC_BASE = 0x05F50, /* Receive Address Initial CRC */
++#define E1000_PCH_RAICC(_n)	(E1000_PCH_RAICC_BASE + ((_n) * 4))
++#define E1000_CRC_OFFSET	E1000_PCH_RAICC_BASE
++	E1000_HICR      = 0x08F00, /* Host Interface Control */
++};
++
++#define E1000_MAX_PHY_ADDR		4
++
++/* IGP01E1000 Specific Registers */
++#define IGP01E1000_PHY_PORT_CONFIG	0x10 /* Port Config */
++#define IGP01E1000_PHY_PORT_STATUS	0x11 /* Status */
++#define IGP01E1000_PHY_PORT_CTRL	0x12 /* Control */
++#define IGP01E1000_PHY_LINK_HEALTH	0x13 /* PHY Link Health */
++#define IGP02E1000_PHY_POWER_MGMT	0x19 /* Power Management */
++#define IGP01E1000_PHY_PAGE_SELECT	0x1F /* Page Select */
++#define BM_PHY_PAGE_SELECT		22   /* Page Select for BM */
++#define IGP_PAGE_SHIFT			5
++#define PHY_REG_MASK			0x1F
++
++#define BM_WUC_PAGE			800
++#define BM_WUC_ADDRESS_OPCODE		0x11
++#define BM_WUC_DATA_OPCODE		0x12
++#define BM_WUC_ENABLE_PAGE		769
++#define BM_WUC_ENABLE_REG		17
++#define BM_WUC_ENABLE_BIT		(1 << 2)
++#define BM_WUC_HOST_WU_BIT		(1 << 4)
++#define BM_WUC_ME_WU_BIT		(1 << 5)
++
++#define BM_WUC	PHY_REG(BM_WUC_PAGE, 1)
++#define BM_WUFC PHY_REG(BM_WUC_PAGE, 2)
++#define BM_WUS	PHY_REG(BM_WUC_PAGE, 3)
++
++#define IGP01E1000_PHY_PCS_INIT_REG	0x00B4
++#define IGP01E1000_PHY_POLARITY_MASK	0x0078
++
++#define IGP01E1000_PSCR_AUTO_MDIX	0x1000
++#define IGP01E1000_PSCR_FORCE_MDI_MDIX	0x2000 /* 0=MDI, 1=MDIX */
++
++#define IGP01E1000_PSCFR_SMART_SPEED	0x0080
++
++#define IGP02E1000_PM_SPD		0x0001 /* Smart Power Down */
++#define IGP02E1000_PM_D0_LPLU		0x0002 /* For D0a states */
++#define IGP02E1000_PM_D3_LPLU		0x0004 /* For all other states */
++
++#define IGP01E1000_PLHR_SS_DOWNGRADE	0x8000
++
++#define IGP01E1000_PSSR_POLARITY_REVERSED	0x0002
++#define IGP01E1000_PSSR_MDIX			0x0800
++#define IGP01E1000_PSSR_SPEED_MASK		0xC000
++#define IGP01E1000_PSSR_SPEED_1000MBPS		0xC000
++
++#define IGP02E1000_PHY_CHANNEL_NUM		4
++#define IGP02E1000_PHY_AGC_A			0x11B1
++#define IGP02E1000_PHY_AGC_B			0x12B1
++#define IGP02E1000_PHY_AGC_C			0x14B1
++#define IGP02E1000_PHY_AGC_D			0x18B1
++
++#define IGP02E1000_AGC_LENGTH_SHIFT	9 /* Course - 15:13, Fine - 12:9 */
++#define IGP02E1000_AGC_LENGTH_MASK	0x7F
++#define IGP02E1000_AGC_RANGE		15
++
++/* manage.c */
++#define E1000_VFTA_ENTRY_SHIFT		5
++#define E1000_VFTA_ENTRY_MASK		0x7F
++#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK	0x1F
++
++#define E1000_HICR_EN			0x01  /* Enable bit - RO */
++/* Driver sets this bit when done to put command in RAM */
++#define E1000_HICR_C			0x02
++#define E1000_HICR_FW_RESET_ENABLE	0x40
++#define E1000_HICR_FW_RESET		0x80
++
++#define E1000_FWSM_MODE_MASK		0xE
++#define E1000_FWSM_MODE_SHIFT		1
++
++#define E1000_MNG_IAMT_MODE		0x3
++#define E1000_MNG_DHCP_COOKIE_LENGTH	0x10
++#define E1000_MNG_DHCP_COOKIE_OFFSET	0x6F0
++#define E1000_MNG_DHCP_COMMAND_TIMEOUT	10
++#define E1000_MNG_DHCP_TX_PAYLOAD_CMD	64
++#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING	0x1
++#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
++
++/* nvm.c */
++#define E1000_STM_OPCODE  0xDB00
++
++#define E1000_KMRNCTRLSTA_OFFSET	0x001F0000
++#define E1000_KMRNCTRLSTA_OFFSET_SHIFT	16
++#define E1000_KMRNCTRLSTA_REN		0x00200000
++#define E1000_KMRNCTRLSTA_CTRL_OFFSET	0x1    /* Kumeran Control */
++#define E1000_KMRNCTRLSTA_DIAG_OFFSET	0x3    /* Kumeran Diagnostic */
++#define E1000_KMRNCTRLSTA_TIMEOUTS	0x4    /* Kumeran Timeouts */
++#define E1000_KMRNCTRLSTA_INBAND_PARAM	0x9    /* Kumeran InBand Parameters */
++#define E1000_KMRNCTRLSTA_IBIST_DISABLE	0x0200 /* Kumeran IBIST Disable */
++#define E1000_KMRNCTRLSTA_DIAG_NELPBK	0x1000 /* Nearend Loopback mode */
++#define E1000_KMRNCTRLSTA_K1_CONFIG	0x7
++#define E1000_KMRNCTRLSTA_K1_ENABLE	0x0002
++#define E1000_KMRNCTRLSTA_HD_CTRL	0x10   /* Kumeran HD Control */
++
++#define IFE_PHY_EXTENDED_STATUS_CONTROL	0x10
++#define IFE_PHY_SPECIAL_CONTROL		0x11 /* 100BaseTx PHY Special Control */
++#define IFE_PHY_SPECIAL_CONTROL_LED	0x1B /* PHY Special and LED Control */
++#define IFE_PHY_MDIX_CONTROL		0x1C /* MDI/MDI-X Control */
++
++/* IFE PHY Extended Status Control */
++#define IFE_PESC_POLARITY_REVERSED	0x0100
++
++/* IFE PHY Special Control */
++#define IFE_PSC_AUTO_POLARITY_DISABLE		0x0010
++#define IFE_PSC_FORCE_POLARITY			0x0020
++
++/* IFE PHY Special Control and LED Control */
++#define IFE_PSCL_PROBE_MODE		0x0020
++#define IFE_PSCL_PROBE_LEDS_OFF		0x0006 /* Force LEDs 0 and 2 off */
++#define IFE_PSCL_PROBE_LEDS_ON		0x0007 /* Force LEDs 0 and 2 on */
++
++/* IFE PHY MDIX Control */
++#define IFE_PMC_MDIX_STATUS	0x0020 /* 1=MDI-X, 0=MDI */
++#define IFE_PMC_FORCE_MDIX	0x0040 /* 1=force MDI-X, 0=force MDI */
++#define IFE_PMC_AUTO_MDIX	0x0080 /* 1=enable auto MDI/MDI-X, 0=disable */
++
++#define E1000_CABLE_LENGTH_UNDEFINED	0xFF
++
++#define E1000_DEV_ID_82571EB_COPPER		0x105E
++#define E1000_DEV_ID_82571EB_FIBER		0x105F
++#define E1000_DEV_ID_82571EB_SERDES		0x1060
++#define E1000_DEV_ID_82571EB_QUAD_COPPER	0x10A4
++#define E1000_DEV_ID_82571PT_QUAD_COPPER	0x10D5
++#define E1000_DEV_ID_82571EB_QUAD_FIBER		0x10A5
++#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP	0x10BC
++#define E1000_DEV_ID_82571EB_SERDES_DUAL	0x10D9
++#define E1000_DEV_ID_82571EB_SERDES_QUAD	0x10DA
++#define E1000_DEV_ID_82572EI_COPPER		0x107D
++#define E1000_DEV_ID_82572EI_FIBER		0x107E
++#define E1000_DEV_ID_82572EI_SERDES		0x107F
++#define E1000_DEV_ID_82572EI			0x10B9
++#define E1000_DEV_ID_82573E			0x108B
++#define E1000_DEV_ID_82573E_IAMT		0x108C
++#define E1000_DEV_ID_82573L			0x109A
++#define E1000_DEV_ID_82574L			0x10D3
++#define E1000_DEV_ID_82574LA			0x10F6
++#define E1000_DEV_ID_82583V                     0x150C
++
++#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT	0x1096
++#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT	0x1098
++#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT	0x10BA
++#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT	0x10BB
++
++#define E1000_DEV_ID_ICH8_82567V_3		0x1501
++#define E1000_DEV_ID_ICH8_IGP_M_AMT		0x1049
++#define E1000_DEV_ID_ICH8_IGP_AMT		0x104A
++#define E1000_DEV_ID_ICH8_IGP_C			0x104B
++#define E1000_DEV_ID_ICH8_IFE			0x104C
++#define E1000_DEV_ID_ICH8_IFE_GT		0x10C4
++#define E1000_DEV_ID_ICH8_IFE_G			0x10C5
++#define E1000_DEV_ID_ICH8_IGP_M			0x104D
++#define E1000_DEV_ID_ICH9_IGP_AMT		0x10BD
++#define E1000_DEV_ID_ICH9_BM			0x10E5
++#define E1000_DEV_ID_ICH9_IGP_M_AMT		0x10F5
++#define E1000_DEV_ID_ICH9_IGP_M			0x10BF
++#define E1000_DEV_ID_ICH9_IGP_M_V		0x10CB
++#define E1000_DEV_ID_ICH9_IGP_C			0x294C
++#define E1000_DEV_ID_ICH9_IFE			0x10C0
++#define E1000_DEV_ID_ICH9_IFE_GT		0x10C3
++#define E1000_DEV_ID_ICH9_IFE_G			0x10C2
++#define E1000_DEV_ID_ICH10_R_BM_LM		0x10CC
++#define E1000_DEV_ID_ICH10_R_BM_LF		0x10CD
++#define E1000_DEV_ID_ICH10_R_BM_V		0x10CE
++#define E1000_DEV_ID_ICH10_D_BM_LM		0x10DE
++#define E1000_DEV_ID_ICH10_D_BM_LF		0x10DF
++#define E1000_DEV_ID_ICH10_D_BM_V		0x1525
++#define E1000_DEV_ID_PCH_M_HV_LM		0x10EA
++#define E1000_DEV_ID_PCH_M_HV_LC		0x10EB
++#define E1000_DEV_ID_PCH_D_HV_DM		0x10EF
++#define E1000_DEV_ID_PCH_D_HV_DC		0x10F0
++#define E1000_DEV_ID_PCH2_LV_LM			0x1502
++#define E1000_DEV_ID_PCH2_LV_V			0x1503
++#define E1000_DEV_ID_PCH_LPT_I217_LM		0x153A
++#define E1000_DEV_ID_PCH_LPT_I217_V		0x153B
++#define E1000_DEV_ID_PCH_LPTLP_I218_LM		0x155A
++#define E1000_DEV_ID_PCH_LPTLP_I218_V		0x1559
++
++#define E1000_REVISION_4 4
++
++#define E1000_FUNC_1 1
++
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0   0
++#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1   3
++
++enum e1000_mac_type {
++	e1000_82571,
++	e1000_82572,
++	e1000_82573,
++	e1000_82574,
++	e1000_82583,
++	e1000_80003es2lan,
++	e1000_ich8lan,
++	e1000_ich9lan,
++	e1000_ich10lan,
++	e1000_pchlan,
++	e1000_pch2lan,
++	e1000_pch_lpt,
++};
++
++enum e1000_media_type {
++	e1000_media_type_unknown = 0,
++	e1000_media_type_copper = 1,
++	e1000_media_type_fiber = 2,
++	e1000_media_type_internal_serdes = 3,
++	e1000_num_media_types
++};
++
++enum e1000_nvm_type {
++	e1000_nvm_unknown = 0,
++	e1000_nvm_none,
++	e1000_nvm_eeprom_spi,
++	e1000_nvm_flash_hw,
++	e1000_nvm_flash_sw
++};
++
++enum e1000_nvm_override {
++	e1000_nvm_override_none = 0,
++	e1000_nvm_override_spi_small,
++	e1000_nvm_override_spi_large
++};
++
++enum e1000_phy_type {
++	e1000_phy_unknown = 0,
++	e1000_phy_none,
++	e1000_phy_m88,
++	e1000_phy_igp,
++	e1000_phy_igp_2,
++	e1000_phy_gg82563,
++	e1000_phy_igp_3,
++	e1000_phy_ife,
++	e1000_phy_bm,
++	e1000_phy_82578,
++	e1000_phy_82577,
++	e1000_phy_82579,
++	e1000_phy_i217,
++};
++
++enum e1000_bus_width {
++	e1000_bus_width_unknown = 0,
++	e1000_bus_width_pcie_x1,
++	e1000_bus_width_pcie_x2,
++	e1000_bus_width_pcie_x4 = 4,
++	e1000_bus_width_32,
++	e1000_bus_width_64,
++	e1000_bus_width_reserved
++};
++
++enum e1000_1000t_rx_status {
++	e1000_1000t_rx_status_not_ok = 0,
++	e1000_1000t_rx_status_ok,
++	e1000_1000t_rx_status_undefined = 0xFF
++};
++
++enum e1000_rev_polarity{
++	e1000_rev_polarity_normal = 0,
++	e1000_rev_polarity_reversed,
++	e1000_rev_polarity_undefined = 0xFF
++};
++
++enum e1000_fc_mode {
++	e1000_fc_none = 0,
++	e1000_fc_rx_pause,
++	e1000_fc_tx_pause,
++	e1000_fc_full,
++	e1000_fc_default = 0xFF
++};
++
++enum e1000_ms_type {
++	e1000_ms_hw_default = 0,
++	e1000_ms_force_master,
++	e1000_ms_force_slave,
++	e1000_ms_auto
++};
++
++enum e1000_smart_speed {
++	e1000_smart_speed_default = 0,
++	e1000_smart_speed_on,
++	e1000_smart_speed_off
++};
++
++enum e1000_serdes_link_state {
++	e1000_serdes_link_down = 0,
++	e1000_serdes_link_autoneg_progress,
++	e1000_serdes_link_autoneg_complete,
++	e1000_serdes_link_forced_up
++};
++
++/* Receive Descriptor */
++struct e1000_rx_desc {
++	__le64 buffer_addr; /* Address of the descriptor's data buffer */
++	__le16 length;      /* Length of data DMAed into data buffer */
++	__le16 csum;	/* Packet checksum */
++	u8  status;      /* Descriptor status */
++	u8  errors;      /* Descriptor Errors */
++	__le16 special;
++};
++
++/* Receive Descriptor - Extended */
++union e1000_rx_desc_extended {
++	struct {
++		__le64 buffer_addr;
++		__le64 reserved;
++	} read;
++	struct {
++		struct {
++			__le32 mrq;	      /* Multiple Rx Queues */
++			union {
++				__le32 rss;	    /* RSS Hash */
++				struct {
++					__le16 ip_id;  /* IP id */
++					__le16 csum;   /* Packet Checksum */
++				} csum_ip;
++			} hi_dword;
++		} lower;
++		struct {
++			__le32 status_error;     /* ext status/error */
++			__le16 length;
++			__le16 vlan;	     /* VLAN tag */
++		} upper;
++	} wb;  /* writeback */
++};
++
++#define MAX_PS_BUFFERS 4
++/* Receive Descriptor - Packet Split */
++union e1000_rx_desc_packet_split {
++	struct {
++		/* one buffer for protocol header(s), three data buffers */
++		__le64 buffer_addr[MAX_PS_BUFFERS];
++	} read;
++	struct {
++		struct {
++			__le32 mrq;	      /* Multiple Rx Queues */
++			union {
++				__le32 rss;	      /* RSS Hash */
++				struct {
++					__le16 ip_id;    /* IP id */
++					__le16 csum;     /* Packet Checksum */
++				} csum_ip;
++			} hi_dword;
++		} lower;
++		struct {
++			__le32 status_error;     /* ext status/error */
++			__le16 length0;	  /* length of buffer 0 */
++			__le16 vlan;	     /* VLAN tag */
++		} middle;
++		struct {
++			__le16 header_status;
++			__le16 length[3];	/* length of buffers 1-3 */
++		} upper;
++		__le64 reserved;
++	} wb; /* writeback */
++};
++
++/* Transmit Descriptor */
++struct e1000_tx_desc {
++	__le64 buffer_addr;      /* Address of the descriptor's data buffer */
++	union {
++		__le32 data;
++		struct {
++			__le16 length;    /* Data buffer length */
++			u8 cso;	/* Checksum offset */
++			u8 cmd;	/* Descriptor control */
++		} flags;
++	} lower;
++	union {
++		__le32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 css;	/* Checksum start */
++			__le16 special;
++		} fields;
++	} upper;
++};
++
++/* Offload Context Descriptor */
++struct e1000_context_desc {
++	union {
++		__le32 ip_config;
++		struct {
++			u8 ipcss;      /* IP checksum start */
++			u8 ipcso;      /* IP checksum offset */
++			__le16 ipcse;     /* IP checksum end */
++		} ip_fields;
++	} lower_setup;
++	union {
++		__le32 tcp_config;
++		struct {
++			u8 tucss;      /* TCP checksum start */
++			u8 tucso;      /* TCP checksum offset */
++			__le16 tucse;     /* TCP checksum end */
++		} tcp_fields;
++	} upper_setup;
++	__le32 cmd_and_length;
++	union {
++		__le32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 hdr_len;    /* Header length */
++			__le16 mss;       /* Maximum segment size */
++		} fields;
++	} tcp_seg_setup;
++};
++
++/* Offload data descriptor */
++struct e1000_data_desc {
++	__le64 buffer_addr;   /* Address of the descriptor's buffer address */
++	union {
++		__le32 data;
++		struct {
++			__le16 length;    /* Data buffer length */
++			u8 typ_len_ext;
++			u8 cmd;
++		} flags;
++	} lower;
++	union {
++		__le32 data;
++		struct {
++			u8 status;     /* Descriptor status */
++			u8 popts;      /* Packet Options */
++			__le16 special;   /* */
++		} fields;
++	} upper;
++};
++
++/* Statistics counters collected by the MAC */
++struct e1000_hw_stats {
++	u64 crcerrs;
++	u64 algnerrc;
++	u64 symerrs;
++	u64 rxerrc;
++	u64 mpc;
++	u64 scc;
++	u64 ecol;
++	u64 mcc;
++	u64 latecol;
++	u64 colc;
++	u64 dc;
++	u64 tncrs;
++	u64 sec;
++	u64 cexterr;
++	u64 rlec;
++	u64 xonrxc;
++	u64 xontxc;
++	u64 xoffrxc;
++	u64 xofftxc;
++	u64 fcruc;
++	u64 prc64;
++	u64 prc127;
++	u64 prc255;
++	u64 prc511;
++	u64 prc1023;
++	u64 prc1522;
++	u64 gprc;
++	u64 bprc;
++	u64 mprc;
++	u64 gptc;
++	u64 gorc;
++	u64 gotc;
++	u64 rnbc;
++	u64 ruc;
++	u64 rfc;
++	u64 roc;
++	u64 rjc;
++	u64 mgprc;
++	u64 mgpdc;
++	u64 mgptc;
++	u64 tor;
++	u64 tot;
++	u64 tpr;
++	u64 tpt;
++	u64 ptc64;
++	u64 ptc127;
++	u64 ptc255;
++	u64 ptc511;
++	u64 ptc1023;
++	u64 ptc1522;
++	u64 mptc;
++	u64 bptc;
++	u64 tsctc;
++	u64 tsctfc;
++	u64 iac;
++	u64 icrxptc;
++	u64 icrxatc;
++	u64 ictxptc;
++	u64 ictxatc;
++	u64 ictxqec;
++	u64 ictxqmtc;
++	u64 icrxdmtc;
++	u64 icrxoc;
++};
++
++struct e1000_phy_stats {
++	u32 idle_errors;
++	u32 receive_errors;
++};
++
++struct e1000_host_mng_dhcp_cookie {
++	u32 signature;
++	u8  status;
++	u8  reserved0;
++	u16 vlan_id;
++	u32 reserved1;
++	u16 reserved2;
++	u8  reserved3;
++	u8  checksum;
++};
++
++/* Host Interface "Rev 1" */
++struct e1000_host_command_header {
++	u8 command_id;
++	u8 command_length;
++	u8 command_options;
++	u8 checksum;
++};
++
++#define E1000_HI_MAX_DATA_LENGTH     252
++struct e1000_host_command_info {
++	struct e1000_host_command_header command_header;
++	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
++};
++
++/* Host Interface "Rev 2" */
++struct e1000_host_mng_command_header {
++	u8  command_id;
++	u8  checksum;
++	u16 reserved1;
++	u16 reserved2;
++	u16 command_length;
++};
++
++#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
++struct e1000_host_mng_command_info {
++	struct e1000_host_mng_command_header command_header;
++	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
++};
++
++/* Function pointers and static data for the MAC. */
++struct e1000_mac_operations {
++	s32  (*id_led_init)(struct e1000_hw *);
++	s32  (*blink_led)(struct e1000_hw *);
++	bool (*check_mng_mode)(struct e1000_hw *);
++	s32  (*check_for_link)(struct e1000_hw *);
++	s32  (*cleanup_led)(struct e1000_hw *);
++	void (*clear_hw_cntrs)(struct e1000_hw *);
++	void (*clear_vfta)(struct e1000_hw *);
++	s32  (*get_bus_info)(struct e1000_hw *);
++	void (*set_lan_id)(struct e1000_hw *);
++	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
++	s32  (*led_on)(struct e1000_hw *);
++	s32  (*led_off)(struct e1000_hw *);
++	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
++	s32  (*reset_hw)(struct e1000_hw *);
++	s32  (*init_hw)(struct e1000_hw *);
++	s32  (*setup_link)(struct e1000_hw *);
++	s32  (*setup_physical_interface)(struct e1000_hw *);
++	s32  (*setup_led)(struct e1000_hw *);
++	void (*write_vfta)(struct e1000_hw *, u32, u32);
++	void (*config_collision_dist)(struct e1000_hw *);
++	void (*rar_set)(struct e1000_hw *, u8 *, u32);
++	s32  (*read_mac_addr)(struct e1000_hw *);
++};
++
++/*
++ * When to use various PHY register access functions:
++ *
++ *                 Func   Caller
++ *   Function      Does   Does    When to use
++ *   ~~~~~~~~~~~~  ~~~~~  ~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++ *   X_reg         L,P,A  n/a     for simple PHY reg accesses
++ *   X_reg_locked  P,A    L       for multiple accesses of different regs
++ *                                on different pages
++ *   X_reg_page    A      L,P     for multiple accesses of different regs
++ *                                on the same page
++ *
++ * Where X=[read|write], L=locking, P=sets page, A=register access
++ *
++ */
++struct e1000_phy_operations {
++	s32  (*acquire)(struct e1000_hw *);
++	s32  (*cfg_on_link_up)(struct e1000_hw *);
++	s32  (*check_polarity)(struct e1000_hw *);
++	s32  (*check_reset_block)(struct e1000_hw *);
++	s32  (*commit)(struct e1000_hw *);
++	s32  (*force_speed_duplex)(struct e1000_hw *);
++	s32  (*get_cfg_done)(struct e1000_hw *hw);
++	s32  (*get_cable_length)(struct e1000_hw *);
++	s32  (*get_info)(struct e1000_hw *);
++	s32  (*set_page)(struct e1000_hw *, u16);
++	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
++	s32  (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
++	s32  (*read_reg_page)(struct e1000_hw *, u32, u16 *);
++	void (*release)(struct e1000_hw *);
++	s32  (*reset)(struct e1000_hw *);
++	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
++	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
++	s32  (*write_reg)(struct e1000_hw *, u32, u16);
++	s32  (*write_reg_locked)(struct e1000_hw *, u32, u16);
++	s32  (*write_reg_page)(struct e1000_hw *, u32, u16);
++	void (*power_up)(struct e1000_hw *);
++	void (*power_down)(struct e1000_hw *);
++};
++
++/* Function pointers for the NVM. */
++struct e1000_nvm_operations {
++	s32  (*acquire)(struct e1000_hw *);
++	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
++	void (*release)(struct e1000_hw *);
++	s32  (*update)(struct e1000_hw *);
++	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
++	s32  (*validate)(struct e1000_hw *);
++	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
++};
++
++struct e1000_mac_info {
++	struct e1000_mac_operations ops;
++	u8 addr[ETH_ALEN];
++	u8 perm_addr[ETH_ALEN];
++
++	enum e1000_mac_type type;
++
++	u32 collision_delta;
++	u32 ledctl_default;
++	u32 ledctl_mode1;
++	u32 ledctl_mode2;
++	u32 mc_filter_type;
++	u32 tx_packet_delta;
++	u32 txcw;
++
++	u16 current_ifs_val;
++	u16 ifs_max_val;
++	u16 ifs_min_val;
++	u16 ifs_ratio;
++	u16 ifs_step_size;
++	u16 mta_reg_count;
++
++	/* Maximum size of the MTA register table in all supported adapters */
++	#define MAX_MTA_REG 128
++	u32 mta_shadow[MAX_MTA_REG];
++	u16 rar_entry_count;
++
++	u8  forced_speed_duplex;
++
++	bool adaptive_ifs;
++	bool has_fwsm;
++	bool arc_subsystem_valid;
++	bool autoneg;
++	bool autoneg_failed;
++	bool get_link_status;
++	bool in_ifs_mode;
++	bool serdes_has_link;
++	bool tx_pkt_filtering;
++	enum e1000_serdes_link_state serdes_link_state;
++};
++
++struct e1000_phy_info {
++	struct e1000_phy_operations ops;
++
++	enum e1000_phy_type type;
++
++	enum e1000_1000t_rx_status local_rx;
++	enum e1000_1000t_rx_status remote_rx;
++	enum e1000_ms_type ms_type;
++	enum e1000_ms_type original_ms_type;
++	enum e1000_rev_polarity cable_polarity;
++	enum e1000_smart_speed smart_speed;
++
++	u32 addr;
++	u32 id;
++	u32 reset_delay_us; /* in usec */
++	u32 revision;
++
++	enum e1000_media_type media_type;
++
++	u16 autoneg_advertised;
++	u16 autoneg_mask;
++	u16 cable_length;
++	u16 max_cable_length;
++	u16 min_cable_length;
++
++	u8 mdix;
++
++	bool disable_polarity_correction;
++	bool is_mdix;
++	bool polarity_correction;
++	bool speed_downgraded;
++	bool autoneg_wait_to_complete;
++};
++
++struct e1000_nvm_info {
++	struct e1000_nvm_operations ops;
++
++	enum e1000_nvm_type type;
++	enum e1000_nvm_override override;
++
++	u32 flash_bank_size;
++	u32 flash_base_addr;
++
++	u16 word_size;
++	u16 delay_usec;
++	u16 address_bits;
++	u16 opcode_bits;
++	u16 page_size;
++};
++
++struct e1000_bus_info {
++	enum e1000_bus_width width;
++
++	u16 func;
++};
++
++struct e1000_fc_info {
++	u32 high_water;          /* Flow control high-water mark */
++	u32 low_water;           /* Flow control low-water mark */
++	u16 pause_time;          /* Flow control pause timer */
++	u16 refresh_time;        /* Flow control refresh timer */
++	bool send_xon;           /* Flow control send XON */
++	bool strict_ieee;        /* Strict IEEE mode */
++	enum e1000_fc_mode current_mode; /* FC mode in effect */
++	enum e1000_fc_mode requested_mode; /* FC mode requested by caller */
++};
++
++struct e1000_dev_spec_82571 {
++	bool laa_is_present;
++	u32 smb_counter;
++};
++
++struct e1000_dev_spec_80003es2lan {
++	bool  mdic_wa_enable;
++};
++
++struct e1000_shadow_ram {
++	u16  value;
++	bool modified;
++};
++
++#define E1000_ICH8_SHADOW_RAM_WORDS		2048
++
++struct e1000_dev_spec_ich8lan {
++	bool kmrn_lock_loss_workaround_enabled;
++	struct e1000_shadow_ram shadow_ram[E1000_ICH8_SHADOW_RAM_WORDS];
++	bool nvm_k1_enabled;
++	bool eee_disable;
++	u16 eee_lp_ability;
++};
++
++struct e1000_hw {
++	struct e1000_adapter *adapter;
++
++	u8 __iomem *hw_addr;
++	u8 __iomem *flash_address;
++
++	struct e1000_mac_info  mac;
++	struct e1000_fc_info   fc;
++	struct e1000_phy_info  phy;
++	struct e1000_nvm_info  nvm;
++	struct e1000_bus_info  bus;
++	struct e1000_host_mng_dhcp_cookie mng_cookie;
++
++	union {
++		struct e1000_dev_spec_82571	e82571;
++		struct e1000_dev_spec_80003es2lan e80003es2lan;
++		struct e1000_dev_spec_ich8lan	ich8lan;
++	} dev_spec;
++};
++
++#endif
+--- linux/drivers/xenomai/net/drivers/eth1394.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/drivers/eth1394.c	2021-04-07 16:01:27.165634255 +0800
+@@ -0,0 +1,1536 @@
++/*
++ * eth1394.h -- RTnet Driver for Ethernet emulation over FireWire
++ *              (adapted from Linux1394)
++ *
++ * Copyright (C) 2005 Zhang Yuchen <yuchen623@gmail.com>
++ *
++ * Mainly based on work by Emanuel Pirker and Andreas E. Bombe
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/if_arp.h>
++#include <linux/if_ether.h>
++#include <linux/ip.h>
++#include <linux/in.h>
++#include <linux/bitops.h>
++#include <linux/uaccess.h>
++#include <net/arp.h>
++
++#define rtos_spinlock_t rtdm_lock_t
++#define nanosecs_abs_t  nanosecs_t
++
++#include <rt_eth1394.h>
++
++#include <rtnet_port.h>
++
++#include <ieee1394_types.h>
++#include <ieee1394_core.h>
++#include <ieee1394_transactions.h>
++#include <ieee1394.h>
++#include <highlevel.h>
++#include <iso.h>
++
++#define driver_name	"RT-ETH1394"
++
++
++#define ETH1394_PRINT_G(level, fmt, args...) \
++	rtdm_printk(level "%s: " fmt, driver_name, ## args)
++
++#define ETH1394_PRINT(level, dev_name, fmt, args...) \
++	rtdm_printk(level "%s: %s: " fmt, driver_name, dev_name, ## args)
++
++//#define ETH1394_DEBUG 1
++
++#ifdef ETH1394_DEBUG
++#define DEBUGP(fmt, args...) \
++	rtdm_printk(KERN_ERR "%s:%s[%d]: " fmt "\n", driver_name, __FUNCTION__, __LINE__, ## args)
++#else
++#define DEBUGP(fmt, args...)
++#endif
++
++#define TRACE() rtdm_printk(KERN_ERR "%s:%s[%d] ---- TRACE\n", driver_name, __FUNCTION__, __LINE__)
++
++/* Change this to IEEE1394_SPEED_S100 to make testing easier */
++#define ETH1394_SPEED_DEF	0x03 /*IEEE1394_SPEED_MAX*/
++
++/* For now, this needs to be 1500, so that XP works with us */
++#define ETH1394_DATA_LEN		1500/*ETH_DATA_LEN*/
++
++struct fragment_info {
++	struct list_head list;
++	int offset;
++	int len;
++};
++
++struct partial_datagram {
++	struct list_head list;
++	u16 dgl;
++	u16 dg_size;
++	u16 ether_type;
++	struct rtskb *skb;
++	char *pbuf;
++	struct list_head frag_info;
++};
++
++ static const u16 eth1394_speedto_maxpayload[] = {
++/*     S100, S200, S400, S800, S1600, S3200 */
++	512, 1024, 2048, 4096,  4096,  4096
++};
++
++static struct hpsb_highlevel eth1394_highlevel;
++
++/* Use common.lf to determine header len */
++static const int hdr_type_len[] = {
++	sizeof (struct eth1394_uf_hdr),
++	sizeof (struct eth1394_ff_hdr),
++	sizeof (struct eth1394_sf_hdr),
++	sizeof (struct eth1394_sf_hdr)
++};
++
++/* The max_partial_datagrams parameter is the maximum number of fragmented
++ * datagrams per node that eth1394 will keep in memory.  Providing an upper
++ * bound allows us to limit the amount of memory that partial datagrams
++ * consume in the event that some partial datagrams are never completed.  This
++ * should probably change to a sysctl item or the like if possible.
++ */
++static int max_partial_datagrams = 25;
++module_param(max_partial_datagrams, int, 0444);
++MODULE_PARM_DESC(max_partial_datagrams,
++		 "Maximum number of partially received fragmented datagrams "
++		 "(default = 25).");
++
++
++static int eth1394_header(struct rtskb *skb, struct rtnet_device *dev,
++			    unsigned short type, void *daddr, void *saddr,
++			    unsigned len);
++
++static int eth1394_write(struct hpsb_host *host,struct hpsb_packet *packet, unsigned int length);
++
++static inline void purge_partial_datagram(struct list_head *old);
++static int eth1394_tx(struct rtskb *skb, struct rtnet_device *dev);
++static void eth1394_iso(struct hpsb_iso *iso, void *arg);
++
++/* Function for incoming 1394 packets */
++static struct hpsb_address_ops eth1394_ops = {
++	.write =	eth1394_write,
++};
++
++static void eth1394_add_host (struct hpsb_host *host);
++static void eth1394_remove_host (struct hpsb_host *host);
++static void eth1394_host_reset (struct hpsb_host *host);
++
++/* Ieee1394 highlevel driver functions */
++static struct hpsb_highlevel eth1394_highlevel = {
++	.name =		driver_name,
++	.add_host =	eth1394_add_host,
++	.remove_host =	eth1394_remove_host,
++	.host_reset =	eth1394_host_reset,
++};
++
++static void eth1394_iso_shutdown(struct eth1394_priv *priv)
++{
++	priv->bc_state = ETHER1394_BC_CLOSED;
++
++	if (priv->iso != NULL) {
++		//~ if (!in_interrupt())
++			hpsb_iso_shutdown(priv->iso);
++		priv->iso = NULL;
++	}
++}
++
++static int eth1394_init_bc(struct rtnet_device *dev)
++{
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++
++	/* First time sending?  Need a broadcast channel for ARP and for
++	 * listening on */
++	if (priv->bc_state == ETHER1394_BC_CHECK) {
++		quadlet_t bc;
++
++		/* Get the local copy of the broadcast channel and check its
++		 * validity (the IRM should validate it for us) */
++
++		bc = priv->host->csr.broadcast_channel;
++
++		if ((bc & 0x80000000) != 0x80000000) { //used to be 0xc0000000
++			/* broadcast channel not validated yet */
++			ETH1394_PRINT(KERN_WARNING, dev->name,
++				      "Error BROADCAST_CHANNEL register valid "
++				      "bit not set, can't send IP traffic\n");
++
++			eth1394_iso_shutdown(priv);
++
++			return -EAGAIN;
++		}
++		if (priv->broadcast_channel != (bc & 0x3f)) {
++			/* This really shouldn't be possible, but just in case
++			 * the IEEE 1394 spec changes regarding broadcast
++			 * channels in the future. */
++
++			eth1394_iso_shutdown(priv);
++
++			//~ if (in_interrupt())
++				//~ return -EAGAIN;
++
++			priv->broadcast_channel = bc & 0x3f;
++			ETH1394_PRINT(KERN_INFO, dev->name,
++				      "Changing to broadcast channel %d...\n",
++				      priv->broadcast_channel);
++
++			priv->iso = hpsb_iso_recv_init(priv->host, 16 * 4096,
++						       16, priv->broadcast_channel, HPSB_ISO_DMA_PACKET_PER_BUFFER,
++						       1, eth1394_iso, 0, "eth1394_iso", IEEE1394_PRIORITY_HIGHEST);
++
++			if (priv->iso == NULL) {
++				ETH1394_PRINT(KERN_ERR, dev->name,
++					      "failed to change broadcast "
++					      "channel\n");
++				return -EAGAIN;
++			}
++		}
++		if (hpsb_iso_recv_start(priv->iso, -1, (1 << 3), -1) < 0) {
++			ETH1394_PRINT(KERN_ERR, dev->name,
++				      "Could not start data stream reception\n");
++
++			eth1394_iso_shutdown(priv);
++
++			return -EAGAIN;
++		}
++		priv->bc_state = ETHER1394_BC_OPENED;
++	}
++
++	return 0;
++}
++
++static int eth1394_open (struct rtnet_device *dev)
++{
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++	rtdm_lockctx_t context;
++	int ret;
++
++	/* Something bad happened, don't even try */
++	if (priv->bc_state == ETHER1394_BC_CLOSED)
++	{
++		return -EAGAIN;
++	}
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	ret = eth1394_init_bc(dev);
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	if (ret)
++		return ret;
++	rt_stack_connect(dev,&STACK_manager);
++	rtnetif_start_queue (dev);
++	return 0;
++}
++
++static int eth1394_stop (struct rtnet_device *dev)
++{
++	rtnetif_stop_queue (dev);
++	rt_stack_disconnect(dev);
++	return 0;
++}
++
++/* Return statistics to the caller */
++static struct net_device_stats *eth1394_stats (struct rtnet_device *dev)
++{
++	return &(((struct eth1394_priv *)dev->priv)->stats);
++}
++
++static inline void eth1394_register_limits(int nodeid, u16 maxpayload,
++					     unsigned char sspd,
++					     struct eth1394_priv *priv)
++{
++
++	if (nodeid < 0 || nodeid >= ALL_NODES) {
++		ETH1394_PRINT_G (KERN_ERR, "Cannot register invalid nodeid %d\n", nodeid);
++		return;
++	}
++
++	priv->maxpayload[nodeid]	= maxpayload;
++	priv->sspd[nodeid]		= sspd;
++	priv->maxpayload[ALL_NODES] = min(priv->maxpayload[ALL_NODES], maxpayload);
++	priv->sspd[ALL_NODES] = min(priv->sspd[ALL_NODES], sspd);
++
++	return;
++}
++
++
++static void eth1394_reset_priv (struct rtnet_device *dev, int set_mtu)
++{
++	rtdm_lockctx_t context;
++	int i;
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++	struct hpsb_host *host = priv->host;
++	int phy_id = NODEID_TO_NODE(host->node_id);
++	u16 maxpayload = 1 << (host->csr.max_rec + 1);
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	/* Clear the speed/payload/offset tables */
++	memset (priv->maxpayload, 0, sizeof (priv->maxpayload));
++	memset (priv->sspd, 0, sizeof (priv->sspd));
++
++	priv->sspd[ALL_NODES] = ETH1394_SPEED_DEF;
++	priv->maxpayload[ALL_NODES] = eth1394_speedto_maxpayload[priv->sspd[ALL_NODES]];
++
++	priv->bc_state = ETHER1394_BC_CHECK;
++
++	/* Register our limits now */
++	eth1394_register_limits(phy_id, maxpayload,
++				    host->speed_map[(phy_id << 6) + phy_id], priv);
++
++	/* We'll use our maxpayload as the default mtu */
++	if (set_mtu) {
++		dev->mtu = min(ETH1394_DATA_LEN, (int)(priv->maxpayload[phy_id] -
++			       (sizeof(union eth1394_hdr) + ETHER1394_GASP_OVERHEAD)));
++
++		//~ /* Set our hardware address while we're at it */
++		//~ *(u64*)dev->dev_addr = guid;
++		//~ *(u64*)dev->broadcast = ~0x0ULL;
++		*(u16*)dev->dev_addr = LOCAL_BUS | phy_id; //we directly use FireWire address for our MAC address
++		*(u16*)dev->broadcast =  LOCAL_BUS | ALL_NODES;
++	}
++
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	for (i = 0; i < ALL_NODES; i++) {
++		struct list_head *lh, *n;
++
++		rtdm_lock_get_irqsave(&priv->pdg[i].lock, context);
++		if (!set_mtu) {
++			list_for_each_safe(lh, n, &priv->pdg[i].list) {
++				//~ purge_partial_datagram(lh);
++			}
++		}
++		INIT_LIST_HEAD(&(priv->pdg[i].list));
++		priv->pdg[i].sz = 0;
++		rtdm_lock_put_irqrestore(&priv->pdg[i].lock, context);
++	}
++
++}
++
++static void eth1394_add_host (struct hpsb_host *host)
++{
++	int i;
++	struct host_info *hi = NULL;
++
++	//*******RTnet********
++	struct rtnet_device *dev = NULL;
++	//
++	struct eth1394_priv *priv;
++
++	/* We should really have our own alloc_hpsbdev() function in
++	 * net_init.c instead of calling the one for ethernet then hijacking
++	 * it for ourselves.  That way we'd be a real networking device. */
++
++	//******RTnet******
++
++	dev = rt_alloc_etherdev(sizeof (struct eth1394_priv),
++				RX_RING_SIZE * 2 + TX_RING_SIZE);
++	if (dev == NULL) {
++		ETH1394_PRINT_G (KERN_ERR, "Out of memory trying to allocate "
++				 "etherdevice for IEEE 1394 device\n");
++		goto free_dev;
++	}
++	rtdev_alloc_name(dev, "rteth%d");
++	memset(dev->priv, 0, sizeof(struct eth1394_priv));
++	rt_rtdev_connect(dev, &RTDEV_manager);
++
++	//dev->init = eth1394_init_dev;
++
++	dev->vers = RTDEV_VERS_2_0;
++	dev->open = eth1394_open;
++	dev->hard_start_xmit = eth1394_tx;
++	dev->stop = eth1394_stop;
++	dev->hard_header = eth1394_header;
++	dev->get_stats = eth1394_stats;
++	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
++	dev->addr_len		= ETH_ALEN;
++	dev->hard_header_len	= ETH_HLEN;
++	dev->type		= ARPHRD_IEEE1394;
++
++	//rtdev->do_ioctl = NULL;
++	priv = (struct eth1394_priv *)dev->priv;
++
++	rtdm_lock_init(&priv->lock);
++	priv->host = host;
++
++	for (i = 0; i < ALL_NODES; i++) {
++		rtdm_lock_init(&priv->pdg[i].lock);
++		INIT_LIST_HEAD(&priv->pdg[i].list);
++		priv->pdg[i].sz = 0;
++	}
++
++	hi = hpsb_create_hostinfo(&eth1394_highlevel, host, sizeof(*hi));
++	if (hi == NULL) {
++		ETH1394_PRINT_G (KERN_ERR, "Out of memory trying to create "
++				 "hostinfo for IEEE 1394 device\n");
++		goto free_hi;
++	}
++
++	if(rt_register_rtnetdev(dev))
++	{
++		ETH1394_PRINT (KERN_ERR, dev->name, "Error registering network driver\n");
++		goto free_hi;
++	}
++
++	ETH1394_PRINT (KERN_ERR, dev->name, "IEEE-1394 IPv4 over 1394 Ethernet\n");
++
++	hi->host = host;
++	hi->dev = dev;
++
++	eth1394_reset_priv (dev, 1);
++
++	/* Ignore validity in hopes that it will be set in the future.  It'll
++	 * be checked when the eth device is opened. */
++	priv->broadcast_channel = host->csr.broadcast_channel & 0x3f;
++
++	priv->iso = hpsb_iso_recv_init(host, (ETHER1394_GASP_BUFFERS * 2 *
++					      2048), // XXX workaround for limitation in rawiso
++					      //(1 << (host->csr.max_rec + 1))),
++				       ETHER1394_GASP_BUFFERS,
++				       priv->broadcast_channel,
++				       HPSB_ISO_DMA_PACKET_PER_BUFFER,
++				       1, eth1394_iso, 0, "eth1394_iso", IEEE1394_PRIORITY_HIGHEST);
++
++
++
++	if (priv->iso == NULL) {
++		ETH1394_PRINT(KERN_ERR, dev->name,
++			      "Could not allocate isochronous receive context "
++			      "for the broadcast channel\n");
++		priv->bc_state = ETHER1394_BC_ERROR;
++		goto unregister_dev;
++	} else {
++		if (hpsb_iso_recv_start(priv->iso, -1, (1 << 3), -1) < 0){
++			priv->bc_state = ETHER1394_BC_STOPPED;
++			goto unregister_dev;
++		}
++		else
++			priv->bc_state = ETHER1394_BC_RUNNING;
++	}
++
++	hpsb_register_addrspace(&eth1394_highlevel, host, &eth1394_ops, ETHER1394_REGION_ADDR,
++				 ETHER1394_REGION_ADDR_END);
++
++	return;
++
++unregister_dev:
++	rt_unregister_rtnetdev(dev);
++free_hi:
++	hpsb_destroy_hostinfo(&eth1394_highlevel, host);
++free_dev:
++	rtdev_free(dev);
++
++	return;
++}
++
++static void eth1394_remove_host (struct hpsb_host *host)
++{
++	struct host_info *hi = hpsb_get_hostinfo(&eth1394_highlevel, host);
++
++	if (hi != NULL) {
++		struct eth1394_priv *priv = (struct eth1394_priv *)hi->dev->priv;
++
++		eth1394_iso_shutdown(priv);
++
++		if (hi->dev) {
++			rt_stack_disconnect(hi->dev);
++			rt_unregister_rtnetdev (hi->dev);
++			rtdev_free(hi->dev);
++		}
++	}
++	return;
++}
++
++static void eth1394_host_reset (struct hpsb_host *host)
++{
++	struct host_info *hi = hpsb_get_hostinfo(&eth1394_highlevel, host);
++	struct rtnet_device *dev;
++
++	/* This can happen for hosts that we don't use */
++	if (hi == NULL)
++		return;
++
++	dev = hi->dev;
++
++	/* Reset our private host data, but not our mtu */
++	rtnetif_stop_queue (dev);
++	eth1394_reset_priv (dev, 1);
++	rtnetif_wake_queue (dev);
++}
++
++
++/******************************************
++ * HW Header net device functions
++ ******************************************/
++/* These functions have been adapted from net/ethernet/eth.c */
++
++
++/* Create a fake MAC header for an arbitrary protocol layer.
++ * saddr=NULL means use device source address
++ * daddr=NULL means leave destination address (eg unresolved arp). */
++static int eth1394_header(struct rtskb *skb, struct rtnet_device *dev,
++			    unsigned short type, void *daddr, void *saddr,
++			    unsigned len)
++{
++	struct ethhdr *eth = (struct ethhdr *)rtskb_push(skb,ETH_HLEN);
++	memset(eth, 0, sizeof(*eth));
++
++	eth->h_proto = htons(type);
++
++	if (saddr)
++		memcpy(eth->h_source, saddr, sizeof(nodeid_t));
++	else
++		memcpy(eth->h_source, dev->dev_addr, sizeof(nodeid_t));
++
++	if (dev->flags & (IFF_LOOPBACK|IFF_NOARP))
++	{
++		memset(eth->h_dest, 0, dev->addr_len);
++		return(dev->hard_header_len);
++	}
++
++	if (daddr)
++	{
++		memcpy(eth->h_dest,daddr, sizeof(nodeid_t));
++		return dev->hard_header_len;
++	}
++
++	return -dev->hard_header_len;
++
++}
++
++
++/******************************************
++ * Datagram reception code
++ ******************************************/
++
++/* Copied from net/ethernet/eth.c */
++static inline u16 eth1394_type_trans(struct rtskb *skb,
++				       struct rtnet_device *dev)
++{
++	struct ethhdr *eth;
++	unsigned char *rawp;
++
++	skb->mac.raw = skb->data;
++	rtskb_pull (skb, ETH_HLEN);
++	eth = (struct ethhdr*)skb->mac.raw;
++
++	if (*eth->h_dest & 1) {
++		if (memcmp(eth->h_dest, dev->broadcast, dev->addr_len)==0)
++			skb->pkt_type = PACKET_BROADCAST;
++	} else {
++		if (memcmp(eth->h_dest, dev->dev_addr, dev->addr_len))
++			skb->pkt_type = PACKET_OTHERHOST;
++	}
++
++	if (ntohs (eth->h_proto) >= 1536)
++		return eth->h_proto;
++
++	rawp = skb->data;
++
++	if (*(unsigned short *)rawp == 0xFFFF)
++		return htons (ETH_P_802_3);
++
++	return htons (ETH_P_802_2);
++}
++
++/* Parse an encapsulated IP1394 header into an ethernet frame packet.
++ * We also perform ARP translation here, if need be.  */
++static inline u16 eth1394_parse_encap(struct rtskb *skb,
++					struct rtnet_device *dev,
++					nodeid_t srcid, nodeid_t destid,
++					u16 ether_type)
++{
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++	unsigned short ret = 0;
++
++	/* If this is an ARP packet, convert it. First, we want to make
++	 * use of some of the fields, since they tell us a little bit
++	 * about the sending machine.  */
++	if (ether_type == __constant_htons (ETH_P_ARP)) {
++		rtdm_lockctx_t context;
++		struct eth1394_arp *arp1394 =
++				(struct eth1394_arp*)((u8 *)skb->data);
++		struct arphdr *arp =
++				(struct arphdr *)((u8 *)skb->data);
++		unsigned char *arp_ptr = (unsigned char *)(arp + 1);
++		u8 max_rec = min(priv->host->csr.max_rec,
++				 (u8)(arp1394->max_rec));
++		int sspd = arp1394->sspd;
++		u16 maxpayload;
++		/* Sanity check. MacOSX seems to be sending us 131 in this
++		 * field (atleast on my Panther G5). Not sure why. */
++		if (sspd > 5 || sspd < 0)
++			sspd = 0;
++
++		maxpayload = min(eth1394_speedto_maxpayload[sspd], (u16)(1 << (max_rec + 1)));
++
++
++
++		/* Update our speed/payload/fifo_offset table */
++		rtdm_lock_get_irqsave(&priv->lock, context);
++		eth1394_register_limits(NODEID_TO_NODE(srcid), maxpayload,
++					  arp1394->sspd,
++						priv);
++		rtdm_lock_put_irqrestore(&priv->lock, context);
++
++		/* Now that we're done with the 1394 specific stuff, we'll
++		 * need to alter some of the data.  Believe it or not, all
++		 * that needs to be done is sender_IP_address needs to be
++		 * moved, the destination hardware address get stuffed
++		 * in and the hardware address length set to 8.
++		 *
++		 * IMPORTANT: The code below overwrites 1394 specific data
++		 * needed above data so keep the call to
++		 * eth1394_register_limits() before munging the data for the
++		 * higher level IP stack. */
++
++		arp->ar_hln = ETH_ALEN;
++		arp_ptr += arp->ar_hln;		/* skip over sender unique id */
++		*(u32*)arp_ptr = arp1394->sip;	/* move sender IP addr */
++		arp_ptr += arp->ar_pln;		/* skip over sender IP addr */
++
++		if (arp->ar_op == 1)
++			/* just set ARP req target unique ID to 0 */
++			memset(arp_ptr, 0, ETH_ALEN);
++		else
++			memcpy(arp_ptr, dev->dev_addr, ETH_ALEN);
++	}
++
++	/* Now add the ethernet header. */
++	//no need to add ethernet header now, since we did not get rid of it on the sending side
++	if (dev->hard_header (skb, dev, __constant_ntohs (ether_type),
++			      &destid, &srcid, skb->len) >= 0)
++		ret = eth1394_type_trans(skb, dev);
++
++	return ret;
++}
++
++static inline int fragment_overlap(struct list_head *frag_list, int offset, int len)
++{
++	struct list_head *lh;
++	struct fragment_info *fi;
++
++	list_for_each(lh, frag_list) {
++		fi = list_entry(lh, struct fragment_info, list);
++
++		if ( ! ((offset > (fi->offset + fi->len - 1)) ||
++		       ((offset + len - 1) < fi->offset)))
++			return 1;
++	}
++	return 0;
++}
++
++static inline struct list_head *find_partial_datagram(struct list_head *pdgl, int dgl)
++{
++	struct list_head *lh;
++	struct partial_datagram *pd;
++
++	list_for_each(lh, pdgl) {
++		pd = list_entry(lh, struct partial_datagram, list);
++		if (pd->dgl == dgl)
++			return lh;
++	}
++	return NULL;
++}
++
++/* Assumes that new fragment does not overlap any existing fragments */
++static inline int new_fragment(struct list_head *frag_info, int offset, int len)
++{
++	struct list_head *lh;
++	struct fragment_info *fi, *fi2, *new;
++
++	list_for_each(lh, frag_info) {
++		fi = list_entry(lh, struct fragment_info, list);
++		if ((fi->offset + fi->len) == offset) {
++			/* The new fragment can be tacked on to the end */
++			fi->len += len;
++			/* Did the new fragment plug a hole? */
++			fi2 = list_entry(lh->next, struct fragment_info, list);
++			if ((fi->offset + fi->len) == fi2->offset) {
++				/* glue fragments together */
++				fi->len += fi2->len;
++				list_del(lh->next);
++				kfree(fi2);
++			}
++			return 0;
++		} else if ((offset + len) == fi->offset) {
++			/* The new fragment can be tacked on to the beginning */
++			fi->offset = offset;
++			fi->len += len;
++			/* Did the new fragment plug a hole? */
++			fi2 = list_entry(lh->prev, struct fragment_info, list);
++			if ((fi2->offset + fi2->len) == fi->offset) {
++				/* glue fragments together */
++				fi2->len += fi->len;
++				list_del(lh);
++				kfree(fi);
++			}
++			return 0;
++		} else if (offset > (fi->offset + fi->len)) {
++			break;
++		} else if ((offset + len) < fi->offset) {
++			lh = lh->prev;
++			break;
++		}
++	}
++
++	new = kmalloc(sizeof(struct fragment_info), GFP_ATOMIC);
++	if (!new)
++		return -ENOMEM;
++
++	new->offset = offset;
++	new->len = len;
++
++	list_add(&new->list, lh);
++
++	return 0;
++}
++
++static inline int new_partial_datagram(struct rtnet_device *dev,
++				       struct list_head *pdgl, int dgl,
++				       int dg_size, char *frag_buf,
++				       int frag_off, int frag_len)
++{
++	struct partial_datagram *new;
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++
++	new = kmalloc(sizeof(struct partial_datagram), GFP_ATOMIC);
++	if (!new)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&new->frag_info);
++
++	if (new_fragment(&new->frag_info, frag_off, frag_len) < 0) {
++		kfree(new);
++		return -ENOMEM;
++	}
++
++	new->dgl = dgl;
++	new->dg_size = dg_size;
++
++	new->skb = rtnetdev_alloc_rtskb(dev, dg_size + dev->hard_header_len + 15);
++	if (!new->skb) {
++		struct fragment_info *fi = list_entry(new->frag_info.next,
++						      struct fragment_info,
++						      list);
++		kfree(fi);
++		kfree(new);
++		return -ENOMEM;
++	}
++
++	rtskb_reserve(new->skb, (dev->hard_header_len + 15) & ~15);
++	new->pbuf = rtskb_put(new->skb, dg_size);
++	memcpy(new->pbuf + frag_off, frag_buf, frag_len);
++
++	list_add(&new->list, pdgl);
++
++	return 0;
++}
++
++static inline int update_partial_datagram(struct list_head *pdgl, struct list_head *lh,
++					  char *frag_buf, int frag_off, int frag_len)
++{
++	struct partial_datagram *pd = list_entry(lh, struct partial_datagram, list);
++
++	if (new_fragment(&pd->frag_info, frag_off, frag_len) < 0) {
++		return -ENOMEM;
++	}
++
++	memcpy(pd->pbuf + frag_off, frag_buf, frag_len);
++
++	/* Move list entry to beginnig of list so that oldest partial
++	 * datagrams percolate to the end of the list */
++	list_del(lh);
++	list_add(lh, pdgl);
++
++	return 0;
++}
++
++static inline void purge_partial_datagram(struct list_head *old)
++{
++	struct partial_datagram *pd = list_entry(old, struct partial_datagram, list);
++	struct list_head *lh, *n;
++
++	list_for_each_safe(lh, n, &pd->frag_info) {
++		struct fragment_info *fi = list_entry(lh, struct fragment_info, list);
++		list_del(lh);
++		kfree(fi);
++	}
++	list_del(old);
++	kfree_rtskb(pd->skb);
++	kfree(pd);
++}
++
++static inline int is_datagram_complete(struct list_head *lh, int dg_size)
++{
++	struct partial_datagram *pd = list_entry(lh, struct partial_datagram, list);
++	struct fragment_info *fi = list_entry(pd->frag_info.next,
++					      struct fragment_info, list);
++
++	return (fi->len == dg_size);
++}
++
++
++
++
++/* Packet reception. We convert the IP1394 encapsulation header to an
++ * ethernet header, and fill it with some of our other fields. This is
++ * an incoming packet from the 1394 bus.  */
++static int eth1394_data_handler(struct rtnet_device *dev, int srcid, int destid,
++				  char *buf, int len, nanosecs_abs_t time_stamp)
++{
++	struct rtskb *skb;
++	rtdm_lockctx_t context;
++	struct eth1394_priv *priv;
++	union eth1394_hdr *hdr = (union eth1394_hdr *)buf;
++	u16 ether_type = 0;  /* initialized to clear warning */
++	int hdr_len;
++
++	//~ nanosecs_abs_t time_stamp = rtdm_clock_read();
++
++	priv = (struct eth1394_priv *)dev->priv;
++
++	/* First, did we receive a fragmented or unfragmented datagram? */
++	hdr->words.word1 = ntohs(hdr->words.word1);
++
++	hdr_len = hdr_type_len[hdr->common.lf];
++
++	if (hdr->common.lf == ETH1394_HDR_LF_UF) {
++		DEBUGP("a single datagram has been received\n");
++		/* An unfragmented datagram has been received by the ieee1394
++		 * bus. Build an skbuff around it so we can pass it to the
++		 * high level network layer. */
++
++		//~ if(rtpkb_acquire((struct rtpkb*)packet, &priv->skb_pool)){
++			//~ HPSB_PRINT (KERN_ERR, "eth1394 rx: low on mem\n");
++			//~ priv->stats.rx_dropped++;
++			//~ return -1;
++		//~ }
++
++		skb = rtnetdev_alloc_rtskb(dev, len + dev->hard_header_len + 15);
++		if (!skb) {
++			ETH1394_PRINT_G(KERN_ERR, "eth1394 rx: low on mem\n");
++			priv->stats.rx_dropped++;
++			return -1;
++		}
++		//~ skb = (struct rtskb *)packet;//we can do this, because these two belong to the same common object, rtpkb.
++		//~ rtpkb_put(skb, len-hdr_len);
++		//~ skb->data = (u8 *)packet->data + hdr_len; //we jump over the 1394-specific fragment overhead
++		//~ rtskb_put(skb, );
++		rtskb_reserve(skb, (dev->hard_header_len + 15) & ~15);//we reserve the space to put in fake MAC address
++		memcpy(rtskb_put(skb, len - hdr_len), buf + hdr_len, len - hdr_len);
++		ether_type = hdr->uf.ether_type;
++	} else {
++		/* A datagram fragment has been received, now the fun begins. */
++		struct list_head *pdgl, *lh;
++		struct partial_datagram *pd;
++		int fg_off;
++		int fg_len = len - hdr_len;
++		int dg_size;
++		int dgl;
++		int retval;
++		int sid = NODEID_TO_NODE(srcid);
++		struct pdg_list *pdg = &(priv->pdg[sid]);
++
++		DEBUGP("a datagram fragment has been received\n");
++		hdr->words.word3 = ntohs(hdr->words.word3);
++		/* The 4th header word is reserved so no need to do ntohs() */
++
++		if (hdr->common.lf == ETH1394_HDR_LF_FF) {
++			//first fragment
++			ether_type = hdr->ff.ether_type;
++			dgl = hdr->ff.dgl;
++			dg_size = hdr->ff.dg_size + 1;
++			fg_off = 0;
++		} else {
++			hdr->words.word2 = ntohs(hdr->words.word2);
++			dgl = hdr->sf.dgl;
++			dg_size = hdr->sf.dg_size + 1;
++			fg_off = hdr->sf.fg_off;
++		}
++		rtdm_lock_get_irqsave(&pdg->lock, context);
++
++		pdgl = &(pdg->list);
++		lh = find_partial_datagram(pdgl, dgl);
++
++		if (lh == NULL) {
++			if (pdg->sz == max_partial_datagrams) {
++				/* remove the oldest */
++				purge_partial_datagram(pdgl->prev);
++				pdg->sz--;
++			}
++
++			retval = new_partial_datagram(dev, pdgl, dgl, dg_size,
++						      buf + hdr_len, fg_off,
++						      fg_len);
++			if (retval < 0) {
++				rtdm_lock_put_irqrestore(&pdg->lock, context);
++				goto bad_proto;
++			}
++			pdg->sz++;
++			lh = find_partial_datagram(pdgl, dgl);
++		} else {
++			struct partial_datagram *pd;
++
++			pd = list_entry(lh, struct partial_datagram, list);
++
++			if (fragment_overlap(&pd->frag_info, fg_off, fg_len)) {
++				/* Overlapping fragments, obliterate old
++				 * datagram and start new one. */
++				purge_partial_datagram(lh);
++				retval = new_partial_datagram(dev, pdgl, dgl,
++							      dg_size,
++							      buf + hdr_len,
++							      fg_off, fg_len);
++				if (retval < 0) {
++					pdg->sz--;
++					rtdm_lock_put_irqrestore(&pdg->lock, context);
++					goto bad_proto;
++				}
++			} else {
++				retval = update_partial_datagram(pdgl, lh,
++								 buf + hdr_len,
++								 fg_off, fg_len);
++				if (retval < 0) {
++					/* Couldn't save off fragment anyway
++					 * so might as well obliterate the
++					 * datagram now. */
++					purge_partial_datagram(lh);
++					pdg->sz--;
++					rtdm_lock_put_irqrestore(&pdg->lock, context);
++					goto bad_proto;
++				}
++			} /* fragment overlap */
++		} /* new datagram or add to existing one */
++
++		pd = list_entry(lh, struct partial_datagram, list);
++
++		if (hdr->common.lf == ETH1394_HDR_LF_FF) {
++			pd->ether_type = ether_type;
++		}
++
++		if (is_datagram_complete(lh, dg_size)) {
++			ether_type = pd->ether_type;
++			pdg->sz--;
++			//skb = skb_get(pd->skb);
++			skb = pd->skb;
++			purge_partial_datagram(lh);
++			rtdm_lock_put_irqrestore(&pdg->lock, context);
++		} else {
++			/* Datagram is not complete, we're done for the
++			 * moment. */
++			rtdm_lock_put_irqrestore(&pdg->lock, context);
++			return 0;
++		}
++	} /* unframgented datagram or fragmented one */
++
++	/* Write metadata, and then pass to the receive level */
++	skb->ip_summed = CHECKSUM_UNNECESSARY;	/* don't check it */
++
++	/* Parse the encapsulation header. This actually does the job of
++	 * converting to an ethernet frame header, aswell as arp
++	 * conversion if needed. ARP conversion is easier in this
++	 * direction, since we are using ethernet as our backend.  */
++	skb->protocol = eth1394_parse_encap(skb, dev, srcid, destid,
++					      ether_type);
++
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	if (!skb->protocol) {
++		DEBUG_PRINT("pointer to %s(%s)%d\n",__FILE__,__FUNCTION__,__LINE__);
++		priv->stats.rx_errors++;
++		priv->stats.rx_dropped++;
++		//dev_kfree_skb_any(skb);
++		kfree_rtskb(skb);
++		goto bad_proto;
++	}
++
++	skb->time_stamp = time_stamp;
++	/*if (netif_rx(skb) == NET_RX_DROP) {
++		priv->stats.rx_errors++;
++		priv->stats.rx_dropped++;
++		goto bad_proto;
++	}*/
++	rtnetif_rx(skb);//finally, we deliver the packet
++
++	/* Statistics */
++	priv->stats.rx_packets++;
++	priv->stats.rx_bytes += skb->len;
++	rt_mark_stack_mgr(dev);
++
++bad_proto:
++	if (rtnetif_queue_stopped(dev))
++		rtnetif_wake_queue(dev);
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	//dev->last_rx = jiffies;
++
++	return 0;
++}
++
++
++static int eth1394_write(struct hpsb_host *host, struct hpsb_packet *packet, unsigned int length)
++{
++	struct host_info *hi = hpsb_get_hostinfo(&eth1394_highlevel, host);
++	int ret;
++
++	if (hi == NULL) {
++		ETH1394_PRINT_G(KERN_ERR, "Could not find net device for host %s\n",
++				host->driver->name);
++		return RCODE_ADDRESS_ERROR;
++	}
++
++	//we need to parse the packet now
++	ret = eth1394_data_handler(hi->dev, packet->header[1]>>16, //source id
++							 packet->header[0]>>16, //dest id
++							 (char *)packet->data, //data
++							packet->data_size, packet->time_stamp);
++	//we only get the request packet, serve it, but dont free it, since it does not belong to us!!!!
++
++	if(ret)
++		return RCODE_ADDRESS_ERROR;
++	else
++		return RCODE_COMPLETE;
++}
++
++
++/**
++ * callback function for broadcast channel
++ * called from hpsb_iso_wake( )
++ */
++static void eth1394_iso(struct hpsb_iso *iso, void *arg)
++{
++	quadlet_t *data;
++	char *buf;
++	struct rtnet_device *dev;
++	unsigned int len;
++	u32 specifier_id;
++	u16 source_id;
++	int i;
++	int nready;
++
++	struct host_info *hi = hpsb_get_hostinfo(&eth1394_highlevel, iso->host);
++	if (hi == NULL) {
++		ETH1394_PRINT_G(KERN_ERR, "Could not find net device for host %s\n",
++				iso->host->driver->name);
++		return;
++	}
++
++	dev = hi->dev;
++
++	nready = hpsb_iso_n_ready(iso);
++	for (i = 0; i < nready; i++) {
++		struct hpsb_iso_packet_info *info =
++			&iso->infos[(iso->first_packet + i) % iso->buf_packets];
++		data = (quadlet_t*) (iso->data_buf.kvirt + info->offset);
++
++		/* skip over GASP header */
++		buf = (char *)data + 8;
++		len = info->len - 8;
++
++		specifier_id = (((be32_to_cpu(data[0]) & 0xffff) << 8) |
++				((be32_to_cpu(data[1]) & 0xff000000) >> 24));
++		source_id = be32_to_cpu(data[0]) >> 16;
++
++		if (info->channel != (iso->host->csr.broadcast_channel & 0x3f) ||
++				specifier_id != ETHER1394_GASP_SPECIFIER_ID) {
++			/* This packet is not for us */
++			continue;
++		}
++		eth1394_data_handler(dev, source_id, LOCAL_BUS | ALL_NODES,
++				       buf, len, rtdm_clock_read());
++	}
++
++	hpsb_iso_recv_release_packets(iso, i);
++
++	//dev->last_rx = jiffies;
++}
++
++/******************************************
++ * Datagram transmission code
++ ******************************************/
++
++/* Convert a standard ARP packet to 1394 ARP. The first 8 bytes (the entire
++ * arphdr) is the same format as the ip1394 header, so they overlap.  The rest
++ * needs to be munged a bit.  The remainder of the arphdr is formatted based
++ * on hwaddr len and ipaddr len.  We know what they'll be, so it's easy to
++ * judge.
++ *
++ * Now that the EUI is used for the hardware address all we need to do to make
++ * this work for 1394 is to insert 2 quadlets that contain max_rec size,
++ * speed, and unicast FIFO address information between the sender_unique_id
++ * and the IP addresses.
++ */
++
++//we dont need the EUI id now. fifo_hi should contain the bus id and node id.
++//fifo_lo should contain the highest 32 bits of in-node address.
++static inline void eth1394_arp_to_1394arp(struct rtskb *skb,
++					    struct rtnet_device *dev)
++{
++	struct eth1394_priv *priv = (struct eth1394_priv *)(dev->priv);
++	u16 phy_id = NODEID_TO_NODE(priv->host->node_id);
++
++	struct arphdr *arp = (struct arphdr *)skb->data;
++	unsigned char *arp_ptr = (unsigned char *)(arp + 1);
++	struct eth1394_arp *arp1394 = (struct eth1394_arp *)skb->data;
++
++	arp1394->hw_addr_len	= 6;
++	arp1394->sip		= *(u32*)(arp_ptr + ETH_ALEN);
++	arp1394->max_rec	= priv->host->csr.max_rec;
++	arp1394->sspd		= priv->sspd[phy_id];
++
++	return;
++}
++
++/* We need to encapsulate the standard header with our own. We use the
++ * ethernet header's proto for our own. */
++static inline unsigned int eth1394_encapsulate_prep(unsigned int max_payload,
++						      int proto,
++						      union eth1394_hdr *hdr,
++						      u16 dg_size, u16 dgl)
++{
++	unsigned int adj_max_payload = max_payload - hdr_type_len[ETH1394_HDR_LF_UF];
++
++	/* Does it all fit in one packet? */
++	if (dg_size <= adj_max_payload) {
++		hdr->uf.lf = ETH1394_HDR_LF_UF;
++		hdr->uf.ether_type = proto;
++	} else {
++		hdr->ff.lf = ETH1394_HDR_LF_FF;
++		hdr->ff.ether_type = proto;
++		hdr->ff.dg_size = dg_size - 1;
++		hdr->ff.dgl = dgl;
++		adj_max_payload = max_payload - hdr_type_len[ETH1394_HDR_LF_FF];
++	}
++	return((dg_size + (adj_max_payload - 1)) / adj_max_payload);
++}
++
++static inline unsigned int eth1394_encapsulate(struct rtskb *skb,
++						 unsigned int max_payload,
++						 union eth1394_hdr *hdr)
++{
++	union eth1394_hdr *bufhdr;
++	int ftype = hdr->common.lf;
++	int hdrsz = hdr_type_len[ftype];
++	unsigned int adj_max_payload = max_payload - hdrsz;
++
++	switch(ftype) {
++	case ETH1394_HDR_LF_UF:
++		bufhdr = (union eth1394_hdr *)rtskb_push(skb, hdrsz);
++		bufhdr->words.word1 = htons(hdr->words.word1);
++		bufhdr->words.word2 = hdr->words.word2;
++		break;
++
++	case ETH1394_HDR_LF_FF:
++		bufhdr = (union eth1394_hdr *)rtskb_push(skb, hdrsz);
++		bufhdr->words.word1 = htons(hdr->words.word1);
++		bufhdr->words.word2 = hdr->words.word2;
++		bufhdr->words.word3 = htons(hdr->words.word3);
++		bufhdr->words.word4 = 0;
++
++		/* Set frag type here for future interior fragments */
++		hdr->common.lf = ETH1394_HDR_LF_IF;
++		hdr->sf.fg_off = 0;
++		break;
++
++	default:
++		hdr->sf.fg_off += adj_max_payload;
++		bufhdr = (union eth1394_hdr *)rtskb_pull(skb, adj_max_payload);
++		if (max_payload >= skb->len)
++			hdr->common.lf = ETH1394_HDR_LF_LF;
++		bufhdr->words.word1 = htons(hdr->words.word1);
++		bufhdr->words.word2 = htons(hdr->words.word2);
++		bufhdr->words.word3 = htons(hdr->words.word3);
++		bufhdr->words.word4 = 0;
++	}
++
++	return min(max_payload, skb->len);
++}
++
++//just allocate a hpsb_packet header, without payload.
++static inline struct hpsb_packet *eth1394_alloc_common_packet(struct hpsb_host *host, unsigned int priority)
++{
++	struct hpsb_packet *p;
++
++	p = hpsb_alloc_packet(0,&host->pool, priority);
++	if (p) {
++		p->host = host;
++		p->data = NULL;
++		p->generation = get_hpsb_generation(host);
++		p->type = hpsb_async;
++	}
++	return p;
++}
++
++//prepare an asynchronous write packet
++static inline int eth1394_prep_write_packet(struct hpsb_packet *p,
++					      struct hpsb_host *host,
++					      nodeid_t node, u64 addr,
++					      void * data, int tx_len)
++{
++	p->node_id = node;
++
++	p->tcode = TCODE_WRITEB;
++
++	p->header[1] = (host->node_id << 16) | (addr >> 32);
++	p->header[2] = addr & 0xffffffff;
++
++	p->header_size = 16;
++	p->expect_response = 1;
++
++	if (hpsb_get_tlabel(p)) {
++		ETH1394_PRINT_G(KERN_ERR, "No more tlabels left while sending "
++				"to node " NODE_BUS_FMT "\n", NODE_BUS_ARGS(host, node));
++		return -1;
++	}
++	p->header[0] = (p->node_id << 16) | (p->tlabel << 10)
++		| (1 << 8) | (TCODE_WRITEB << 4);
++
++	p->header[3] = tx_len << 16;
++	p->data_size = tx_len + (tx_len % 4 ? 4 - (tx_len % 4) : 0);
++	p->data = (quadlet_t*)data;
++
++	return 0;
++}
++
++//prepare gasp packet from skb.
++static inline void eth1394_prep_gasp_packet(struct hpsb_packet *p,
++					      struct eth1394_priv *priv,
++					      struct rtskb *skb, int length)
++{
++	p->header_size = 4;
++	p->tcode = TCODE_STREAM_DATA;
++
++	p->header[0] = (length << 16) | (3 << 14)
++		| ((priv->broadcast_channel) << 8)
++		| (TCODE_STREAM_DATA << 4);
++	p->data_size = length;
++	p->data = ((quadlet_t*)skb->data) - 2; //we need 64bits for extra spec_id and gasp version.
++	p->data[0] = cpu_to_be32((priv->host->node_id << 16) |
++				      ETHER1394_GASP_SPECIFIER_ID_HI);
++	p->data[1] = cpu_to_be32((ETHER1394_GASP_SPECIFIER_ID_LO << 24) |
++				      ETHER1394_GASP_VERSION);
++
++	/* Setting the node id to ALL_NODES (not LOCAL_BUS | ALL_NODES)
++	 * prevents hpsb_send_packet() from setting the speed to an arbitrary
++	 * value based on packet->node_id if packet->node_id is not set. */
++	p->node_id = ALL_NODES;
++	p->speed_code = priv->sspd[ALL_NODES];
++}
++
++
++static inline void eth1394_free_packet(struct hpsb_packet *packet)
++{
++	if (packet->tcode != TCODE_STREAM_DATA)
++		hpsb_free_tlabel(packet);
++	hpsb_free_packet(packet);
++}
++
++static void eth1394_complete_cb(struct hpsb_packet *packet, void *__ptask);
++
++
++/**
++ * this function does the real calling of hpsb_send_packet
++ *But before that, it also constructs the FireWire packet according to
++ * ptask
++ */
++static int eth1394_send_packet(struct packet_task *ptask, unsigned int tx_len, nanosecs_abs_t *xmit_stamp)
++{
++	struct eth1394_priv *priv = ptask->priv;
++	struct hpsb_packet *packet = NULL;
++	int ret;
++
++	packet = eth1394_alloc_common_packet(priv->host, ptask->priority);
++	if (!packet) {
++		ret = -ENOMEM;
++		return ret;
++	}
++	if(xmit_stamp)
++		packet->xmit_stamp = xmit_stamp;
++
++	if (ptask->tx_type == ETH1394_GASP) {
++		int length = tx_len + (2 * sizeof(quadlet_t)); //for the extra gasp overhead
++
++		eth1394_prep_gasp_packet(packet, priv, ptask->skb, length);
++	} else if (eth1394_prep_write_packet(packet, priv->host,
++					       ptask->dest_node,
++					       ptask->addr, ptask->skb->data,
++					       tx_len)) {
++		hpsb_free_packet(packet);
++		return -1;
++	}
++
++	ptask->packet = packet;
++	hpsb_set_packet_complete_task(ptask->packet, eth1394_complete_cb,
++				      ptask);
++
++	ret = hpsb_send_packet(packet);
++	if (ret != 0) {
++		eth1394_free_packet(packet);
++	}
++
++	return ret;
++}
++
++
++/* Task function to be run when a datagram transmission is completed */
++static inline void eth1394_dg_complete(struct packet_task *ptask, int fail)
++{
++	struct rtskb *skb = ptask->skb;
++	struct rtnet_device *dev = skb->rtdev;
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++	rtdm_lockctx_t context;
++
++	/* Statistics */
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	if (fail) {
++		priv->stats.tx_dropped++;
++		priv->stats.tx_errors++;
++	} else {
++		priv->stats.tx_bytes += skb->len;
++		priv->stats.tx_packets++;
++	}
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	//dev_kfree_skb_any(skb);
++	kfree_rtskb(skb);
++	//~ kmem_cache_free(packet_task_cache, ptask);
++	//this means this ptask structure has been freed
++	ptask->packet=NULL;
++}
++
++
++/* Callback for when a packet has been sent and the status of that packet is
++ * known */
++static void eth1394_complete_cb(struct hpsb_packet *packet, void *__ptask)
++{
++	struct packet_task *ptask = (struct packet_task *)__ptask;
++	int fail = 0;
++
++	if (packet->tcode != TCODE_STREAM_DATA)
++		fail = hpsb_packet_success(packet);
++
++	//we have no rights to free packet, since it belongs to RT-FireWire kernel.
++	//~ eth1394_free_packet(packet);
++
++	ptask->outstanding_pkts--;
++	if (ptask->outstanding_pkts > 0 && !fail)
++	{
++		int tx_len;
++
++		/* Add the encapsulation header to the fragment */
++		tx_len = eth1394_encapsulate(ptask->skb, ptask->max_payload,
++					       &ptask->hdr);
++		if (eth1394_send_packet(ptask, tx_len, NULL))
++			eth1394_dg_complete(ptask, 1);
++	} else {
++		eth1394_dg_complete(ptask, fail);
++	}
++}
++
++
++
++/**
++ *Transmit a packet (called by kernel)
++ * this is the dev->hard_start_transmit
++ */
++static int eth1394_tx (struct rtskb *skb, struct rtnet_device *dev)
++{
++
++	struct ethhdr *eth;
++	struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv;
++	int proto;
++	rtdm_lockctx_t context;
++	nodeid_t dest_node;
++	eth1394_tx_type tx_type;
++	int ret = 0;
++	unsigned int tx_len;
++	unsigned int max_payload;
++	u16 dg_size;
++	u16 dgl;
++
++	//we try to find the available ptask struct, if failed, we can not send packet
++	struct packet_task *ptask = NULL;
++	int i;
++	for(i=0;i<20;i++){
++		if(priv->ptask_list[i].packet == NULL){
++			ptask = &priv->ptask_list[i];
++			break;
++		}
++	}
++	if(ptask == NULL)
++		return -EBUSY;
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	if (priv->bc_state == ETHER1394_BC_CLOSED) {
++		ETH1394_PRINT(KERN_ERR, dev->name,
++			      "Cannot send packet, no broadcast channel available.\n");
++		ret = -EAGAIN;
++		rtdm_lock_put_irqrestore(&priv->lock, context);
++		goto fail;
++	}
++	if ((ret = eth1394_init_bc(dev))) {
++		rtdm_lock_put_irqrestore(&priv->lock, context);
++		goto fail;
++	}
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++	//if ((skb = skb_share_check (skb, kmflags)) == NULL) {
++	//	ret = -ENOMEM;
++	//	goto fail;
++	//}
++
++	/* Get rid of the fake eth1394 header, but save a pointer */
++	eth = (struct ethhdr*)skb->data;
++	rtskb_pull(skb, ETH_HLEN);
++	//dont get rid of the fake eth1394 header, since we need it on the receiving side
++	//eth = (struct ethhdr*)skb->data;
++
++	//~ //find the node id via our fake MAC address
++	//~ ne = hpsb_guid_get_entry(be64_to_cpu(*(u64*)eth->h_dest));
++	//~ if (!ne)
++		//~ dest_node = LOCAL_BUS | ALL_NODES;
++	//~ else
++		//~ dest_node = ne->nodeid;
++	//now it is much easier
++	dest_node = *(u16*)eth->h_dest;
++	if(dest_node != 0xffff)
++	DEBUGP("%s: dest_node is %x\n", __FUNCTION__, dest_node);
++
++	proto = eth->h_proto;
++
++	/* If this is an ARP packet, convert it */
++	if (proto == __constant_htons (ETH_P_ARP))
++		eth1394_arp_to_1394arp (skb, dev);
++
++	max_payload = priv->maxpayload[NODEID_TO_NODE(dest_node)];
++	DEBUGP("%s: max_payload is %d\n", __FUNCTION__, max_payload);
++
++	/* This check should be unnecessary, but we'll keep it for safety for
++	 * a while longer. */
++	if (max_payload < 512) {
++		DEBUGP("max_payload too small: %d   (setting to 512)\n",
++			      max_payload);
++		max_payload = 512;
++	}
++
++	/* Set the transmission type for the packet.  ARP packets and IP
++	 * broadcast packets are sent via GASP. */
++	if (memcmp(eth->h_dest, dev->broadcast, sizeof(nodeid_t)) == 0 ||
++	    proto == __constant_htons(ETH_P_ARP) ||
++	    (proto == __constant_htons(ETH_P_IP) &&
++	     IN_MULTICAST(__constant_ntohl(skb->nh.iph->daddr)))) {
++		tx_type = ETH1394_GASP;
++		max_payload -= ETHER1394_GASP_OVERHEAD; //we have extra overhead for gasp packet
++	} else {
++		tx_type = ETH1394_WRREQ;
++	}
++
++	dg_size = skb->len;
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	dgl = priv->dgl[NODEID_TO_NODE(dest_node)];
++	if (max_payload < dg_size + hdr_type_len[ETH1394_HDR_LF_UF])
++		priv->dgl[NODEID_TO_NODE(dest_node)]++;
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	ptask->hdr.words.word1 = 0;
++	ptask->hdr.words.word2 = 0;
++	ptask->hdr.words.word3 = 0;
++	ptask->hdr.words.word4 = 0;
++	ptask->skb = skb;
++	ptask->priv = priv;
++	ptask->tx_type = tx_type;
++
++	if (tx_type != ETH1394_GASP) {
++		u64 addr;
++
++		/* This test is just temporary until ConfigROM support has
++		 * been added to eth1394.  Until then, we need an ARP packet
++		 * after a bus reset from the current destination node so that
++		 * we can get FIFO information. */
++		//~ if (priv->fifo[NODEID_TO_NODE(dest_node)] == 0ULL) {
++			//~ ret = -EAGAIN;
++			//~ goto fail;
++		//~ }
++
++		//~ rtos_spin_lock_irqsave(&priv->lock, flags);
++		//~ addr = priv->fifo[NODEID_TO_NODE(dest_node)];
++		addr =  ETHER1394_REGION_ADDR;
++		//~ rtos_spin_unlock_irqrestore(&priv->lock, flags);
++
++		ptask->addr = addr;
++		ptask->dest_node = dest_node;
++	}
++
++	ptask->tx_type = tx_type;
++	ptask->max_payload = max_payload;
++	ptask->outstanding_pkts = eth1394_encapsulate_prep(max_payload, proto,
++							     &ptask->hdr, dg_size,
++							     dgl);
++
++	/* Add the encapsulation header to the fragment */
++	tx_len = eth1394_encapsulate(skb, max_payload, &ptask->hdr);
++	//dev->trans_start = jiffies;
++	//~ if(skb->xmit_stamp)
++		//~ *skb->xmit_stamp = cpu_to_be64(rtos_get_time() + *skb->xmit_stamp);
++
++
++	if (eth1394_send_packet(ptask, tx_len, skb->xmit_stamp))
++		goto fail;
++
++	rtnetif_wake_queue(dev);
++	return 0;
++fail:
++	if (ptask!=NULL){
++		//~ kmem_cache_free(packet_task_cache, ptask);
++		ptask->packet=NULL;
++		ptask=NULL;
++	}
++
++	if (skb != NULL)
++		dev_kfree_rtskb(skb);
++
++	rtdm_lock_get_irqsave(&priv->lock, context);
++	priv->stats.tx_dropped++;
++	priv->stats.tx_errors++;
++	rtdm_lock_put_irqrestore(&priv->lock, context);
++
++	if (rtnetif_queue_stopped(dev))
++		rtnetif_wake_queue(dev);
++
++	return 0;  /* returning non-zero causes serious problems */
++}
++
++static int eth1394_init(void)
++{
++	hpsb_register_highlevel(&eth1394_highlevel);
++
++	return 0;
++}
++
++static void eth1394_exit(void)
++{
++	hpsb_unregister_highlevel(&eth1394_highlevel);
++}
++
++module_init(eth1394_init);
++module_exit(eth1394_exit);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/addons/proxy.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/addons/proxy.c	2021-04-07 16:01:27.149634278 +0800
+@@ -0,0 +1,442 @@
++/* rtnetproxy.c: a Linux network driver that uses the RTnet driver to
++ * transport IP data from/to Linux kernel mode.
++ * This allows the usage of TCP/IP from linux space using via the RTNET
++ * network adapter.
++ *
++ *
++ * Usage:
++ *
++ * insmod rtnetproxy.o    (only after having rtnet up and running)
++ *
++ * ifconfig rtproxy up IP_ADDRESS netmask NETMASK
++ *
++ * Use it like any other network device from linux.
++ *
++ * Restrictions:
++ * Only IPV4 based protocols are supported, UDP and ICMP can be send out
++ * but not received - as these are handled directly by rtnet!
++ *
++ *
++ *
++ * Based on the linux net driver dummy.c by Nick Holloway
++ *
++ *
++ * Changelog:
++ *
++ * 08-Nov-2002  Mathias Koehrer - Clear separation between rtai context and
++ *                                standard linux driver context.
++ *                                Data exchange via ringbuffers.
++ *                                A RTAI thread is used for rtnet transmission.
++ *
++ * 05-Nov-2002  Mathias Koehrer - Initial version!
++ *                                Development based on rtnet 0.2.6,
++ *                                rtai-24.1.10, kernel 2.4.19
++ *
++ *
++ * Mathias Koehrer - mathias_koehrer@yahoo.de
++*/
++
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/init.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <net/sock.h>
++#include <net/ip.h>
++
++#include <linux/if_ether.h> /* For the statistics structure. */
++#include <linux/if_arp.h> /* For ARPHRD_ETHER */
++
++#include <rtdev.h>
++#include <rtskb.h>
++#include <rtdm/driver.h>
++#include <ipv4/ip_input.h>
++#include <ipv4/route.h>
++#include <rtnet_port.h>
++
++static struct net_device *dev_rtnetproxy;
++
++/* **************************************************************************
++ *  SKB pool management (JK):
++ * ************************************************************************ */
++#define DEFAULT_PROXY_RTSKBS 32
++
++static unsigned int proxy_rtskbs = DEFAULT_PROXY_RTSKBS;
++module_param(proxy_rtskbs, uint, 0444);
++MODULE_PARM_DESC(proxy_rtskbs,
++		 "Number of realtime socket buffers in proxy pool");
++
++static struct rtskb_pool rtskb_pool;
++
++static struct rtskb_queue tx_queue;
++static struct rtskb_queue rx_queue;
++
++/* handle for non-real-time signal */
++static rtdm_nrtsig_t rtnetproxy_rx_signal;
++
++/* Thread for transmission */
++static rtdm_task_t rtnetproxy_tx_task;
++
++static rtdm_event_t rtnetproxy_tx_event;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++static char *rtdev_attach = "rteth0";
++module_param(rtdev_attach, charp, 0444);
++MODULE_PARM_DESC(rtdev_attach, "Attach to the specified RTnet device");
++
++struct rtnet_device *rtnetproxy_rtdev;
++#endif
++
++/* ************************************************************************
++ * ************************************************************************
++ *   T R A N S M I T
++ * ************************************************************************
++ * ************************************************************************ */
++
++static void rtnetproxy_tx_loop(void *arg)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++
++	while (!rtdm_task_should_stop()) {
++		if (rtdm_event_wait(&rtnetproxy_tx_event) < 0)
++			break;
++
++		while ((rtskb = rtskb_dequeue(&tx_queue)) != NULL) {
++			rtdev = rtskb->rtdev;
++			rtdev_xmit_proxy(rtskb);
++			rtdev_dereference(rtdev);
++		}
++	}
++}
++
++/* ************************************************************************
++ *  hard_xmit
++ *
++ *  This function runs in linux kernel context and is executed whenever
++ *  there is a frame to be sent out.
++ * ************************************************************************ */
++static int rtnetproxy_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct ethhdr *eth = (struct ethhdr *)skb->data;
++	struct rtskb *rtskb;
++	int len = skb->len;
++#ifndef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	struct dest_route rt;
++	struct iphdr *iph;
++	u32 saddr, daddr;
++#endif
++
++	switch (ntohs(eth->h_proto)) {
++	case ETH_P_IP:
++		if (len < sizeof(struct ethhdr) + sizeof(struct iphdr))
++			goto drop1;
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	case ETH_P_ARP:
++#endif
++		break;
++	default:
++	drop1:
++		dev->stats.tx_dropped++;
++		dev_kfree_skb(skb);
++		return NETDEV_TX_OK;
++	}
++
++	rtskb = alloc_rtskb(len, &rtskb_pool);
++	if (!rtskb)
++		return NETDEV_TX_BUSY;
++
++	memcpy(rtskb_put(rtskb, len), skb->data, len);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	dev_kfree_skb(skb);
++
++	rtskb->rtdev = rtnetproxy_rtdev;
++	if (rtdev_reference(rtnetproxy_rtdev) == 0) {
++		dev->stats.tx_dropped++;
++		kfree_rtskb(rtskb);
++		return NETDEV_TX_BUSY;
++	}
++
++#else /* !CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */
++	iph = (struct iphdr *)(skb->data + sizeof(struct ethhdr));
++	saddr = iph->saddr;
++	daddr = iph->daddr;
++
++	dev_kfree_skb(skb);
++
++	if (rt_ip_route_output(&rt, daddr, INADDR_ANY) < 0) {
++	drop2:
++		dev->stats.tx_dropped++;
++		kfree_rtskb(rtskb);
++		return NETDEV_TX_OK;
++	}
++	if (rt.rtdev->local_ip != saddr) {
++		rtdev_dereference(rt.rtdev);
++		goto drop2;
++	}
++
++	eth = (struct ethhdr *)rtskb->data;
++	memcpy(eth->h_source, rt.rtdev->dev_addr, rt.rtdev->addr_len);
++	memcpy(eth->h_dest, rt.dev_addr, rt.rtdev->addr_len);
++
++	rtskb->rtdev = rt.rtdev;
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */
++
++	dev->stats.tx_packets++;
++	dev->stats.tx_bytes += len;
++
++	rtskb_queue_tail(&tx_queue, rtskb);
++	rtdm_event_signal(&rtnetproxy_tx_event);
++
++	return NETDEV_TX_OK;
++}
++
++/* ************************************************************************
++ * ************************************************************************
++ *   R E C E I V E
++ * ************************************************************************
++ * ************************************************************************ */
++
++/* ************************************************************************
++ * This function runs in real-time context.
++ *
++ * It is called from inside rtnet whenever a packet has been received that
++ * has to be processed by rtnetproxy.
++ * ************************************************************************ */
++static void rtnetproxy_recv(struct rtskb *rtskb)
++{
++	/* Acquire rtskb (JK) */
++	if (rtskb_acquire(rtskb, &rtskb_pool) != 0) {
++		dev_rtnetproxy->stats.rx_dropped++;
++		rtdm_printk("rtnetproxy_recv: No free rtskb in pool\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	if (rtskb_queue_tail_check(&rx_queue, rtskb))
++		rtdm_nrtsig_pend(&rtnetproxy_rx_signal);
++}
++
++/* ************************************************************************
++ * This function runs in kernel mode.
++ * It is activated from rtnetproxy_signal_handler whenever rtnet received a
++ * frame to be processed by rtnetproxy.
++ * ************************************************************************ */
++static inline void rtnetproxy_kernel_recv(struct rtskb *rtskb)
++{
++	struct sk_buff *skb;
++	struct net_device *dev = dev_rtnetproxy;
++
++	int header_len = rtskb->rtdev->hard_header_len;
++	int len = rtskb->len + header_len;
++
++	/* Copy the realtime skb (rtskb) to the standard skb: */
++	skb = dev_alloc_skb(len + 2);
++	skb_reserve(skb, 2);
++
++	memcpy(skb_put(skb, len), rtskb->data - header_len, len);
++
++	/* Set some relevant entries in the skb: */
++	skb->protocol = eth_type_trans(skb, dev);
++	skb->dev = dev;
++	skb->ip_summed = CHECKSUM_UNNECESSARY;
++	skb->pkt_type = PACKET_HOST; /* Extremely important! Why?!? */
++
++	/* the rtskb stamp is useless (different clock), get new one */
++	__net_timestamp(skb);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
++	dev->last_rx = jiffies;
++#endif
++	dev->stats.rx_bytes += skb->len;
++	dev->stats.rx_packets++;
++
++	netif_rx(skb); /* pass it to the received stuff */
++}
++
++/* ************************************************************************
++ * This function runs in kernel mode.
++ * It is activated from rtnetproxy_recv whenever rtnet received a frame to
++ * be processed by rtnetproxy.
++ * ************************************************************************ */
++static void rtnetproxy_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg)
++{
++	struct rtskb *rtskb;
++
++	while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) {
++		rtnetproxy_kernel_recv(rtskb);
++		kfree_rtskb(rtskb);
++	}
++}
++
++/* ************************************************************************
++ * ************************************************************************
++ *   G E N E R A L
++ * ************************************************************************
++ * ************************************************************************ */
++
++static void fake_multicast_support(struct net_device *dev)
++{
++}
++
++#ifdef CONFIG_NET_FASTROUTE
++static int rtnetproxy_accept_fastpath(struct net_device *dev,
++				      struct dst_entry *dst)
++{
++	return -1;
++}
++#endif
++
++static int rtnetproxy_open(struct net_device *dev)
++{
++	int err = try_module_get(THIS_MODULE);
++	if (err == 0)
++		return -EIDRM;
++
++	return 0;
++}
++
++static int rtnetproxy_stop(struct net_device *dev)
++{
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++static const struct net_device_ops rtnetproxy_netdev_ops = {
++	.ndo_open = rtnetproxy_open,
++	.ndo_stop = rtnetproxy_stop,
++	.ndo_start_xmit = rtnetproxy_xmit,
++	.ndo_set_rx_mode = fake_multicast_support,
++};
++
++/* ************************************************************************
++ *  device init
++ * ************************************************************************ */
++static void __init rtnetproxy_init(struct net_device *dev)
++{
++	/* Fill in device structure with ethernet-generic values. */
++	ether_setup(dev);
++
++	dev->tx_queue_len = 0;
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	memcpy(dev->dev_addr, rtnetproxy_rtdev->dev_addr, MAX_ADDR_LEN);
++#else
++	dev->flags |= IFF_NOARP;
++#endif
++	dev->flags &= ~IFF_MULTICAST;
++
++	dev->netdev_ops = &rtnetproxy_netdev_ops;
++}
++
++/* ************************************************************************
++ * ************************************************************************
++ *   I N I T
++ * ************************************************************************
++ * ************************************************************************ */
++static int __init rtnetproxy_init_module(void)
++{
++	int err;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	if ((rtnetproxy_rtdev = rtdev_get_by_name(rtdev_attach)) == NULL) {
++		printk("Couldn't attach to %s\n", rtdev_attach);
++		return -EINVAL;
++	}
++	printk("RTproxy attached to %s\n", rtdev_attach);
++#endif
++
++	/* Initialize the proxy's rtskb pool (JK) */
++	if (rtskb_module_pool_init(&rtskb_pool, proxy_rtskbs) < proxy_rtskbs) {
++		err = -ENOMEM;
++		goto err1;
++	}
++
++	dev_rtnetproxy =
++		alloc_netdev(0, "rtproxy", NET_NAME_UNKNOWN, rtnetproxy_init);
++	if (!dev_rtnetproxy) {
++		err = -ENOMEM;
++		goto err1;
++	}
++
++	rtdm_nrtsig_init(&rtnetproxy_rx_signal, rtnetproxy_signal_handler,
++			 NULL);
++
++	rtskb_queue_init(&tx_queue);
++	rtskb_queue_init(&rx_queue);
++
++	err = register_netdev(dev_rtnetproxy);
++	if (err < 0)
++		goto err3;
++
++	/* Init the task for transmission */
++	rtdm_event_init(&rtnetproxy_tx_event, 0);
++	err = rtdm_task_init(&rtnetproxy_tx_task, "rtnetproxy",
++			     rtnetproxy_tx_loop, 0, RTDM_TASK_LOWEST_PRIORITY,
++			     0);
++	if (err)
++		goto err4;
++
++	/* Register with RTnet */
++	rt_ip_fallback_handler = rtnetproxy_recv;
++
++	printk("rtnetproxy installed as \"%s\"\n", dev_rtnetproxy->name);
++
++	return 0;
++
++err4:
++	unregister_netdev(dev_rtnetproxy);
++
++err3:
++	rtdm_nrtsig_destroy(&rtnetproxy_rx_signal);
++
++	free_netdev(dev_rtnetproxy);
++
++err1:
++	rtskb_pool_release(&rtskb_pool);
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	rtdev_dereference(rtnetproxy_rtdev);
++#endif
++	return err;
++}
++
++static void __exit rtnetproxy_cleanup_module(void)
++{
++	struct rtskb *rtskb;
++
++	/* Unregister the fallback at rtnet */
++	rt_ip_fallback_handler = NULL;
++
++	/* Unregister the net device: */
++	unregister_netdev(dev_rtnetproxy);
++	free_netdev(dev_rtnetproxy);
++
++	rtdm_event_destroy(&rtnetproxy_tx_event);
++	rtdm_task_destroy(&rtnetproxy_tx_task);
++
++	/* free the non-real-time signal */
++	rtdm_nrtsig_destroy(&rtnetproxy_rx_signal);
++
++	while ((rtskb = rtskb_dequeue(&tx_queue)) != NULL) {
++		rtdev_dereference(rtskb->rtdev);
++		kfree_rtskb(rtskb);
++	}
++
++	while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) {
++		kfree_rtskb(rtskb);
++	}
++
++	rtskb_pool_release(&rtskb_pool);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	rtdev_dereference(rtnetproxy_rtdev);
++#endif
++}
++
++module_init(rtnetproxy_init_module);
++module_exit(rtnetproxy_cleanup_module);
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/addons/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/addons/Makefile	2021-04-07 16:01:27.144634285 +0800
+@@ -0,0 +1,9 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP) += rtcap.o
++
++rtcap-y := cap.o
++
++obj-$(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY) += rtnetproxy.o
++
++rtnetproxy-y := proxy.o
+--- linux/drivers/xenomai/net/addons/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/addons/Kconfig	2021-04-07 16:01:27.139634292 +0800
+@@ -0,0 +1,44 @@
++menu "Add-Ons"
++    depends on XENO_DRIVERS_NET
++
++config XENO_DRIVERS_NET_ADDON_RTCAP
++    depends on XENO_DRIVERS_NET && m
++    select ETHERNET
++    tristate "Real-Time Capturing Support"
++    default n
++    ---help---
++    This feature allows to capture real-time packets traversing the RTnet
++    stack. It can both be used to sniff passively on a network (in this
++    case you may want to enable the promisc mode of your real-time NIC via
++    rtifconfig) and to log the traffic the node receives and transmits
++    during normal operation. RTcap consists of additional hooks in the
++    RTnet stack and a separate module as interface to standard network
++    analysis tools like Ethereal.
++
++    For further information see Documentation/README.rtcap.
++
++config XENO_DRIVERS_NET_ADDON_PROXY
++    depends on XENO_DRIVERS_NET_RTIPV4 && m
++    select ETHERNET
++    tristate "IP protocol proxy for Linux"
++    default n
++    ---help---
++    Enables a forward-to-Linux module for all IP protocols that are not
++    handled by the IPv4 implemenation of RTnet (TCP, UDP, etc.). Only use
++    when you know what you are doing - it can easily break your real-time
++    requirements!
++
++    See Documentation/README.rtnetproxy for further information.
++
++config XENO_DRIVERS_NET_ADDON_PROXY_ARP
++    depends on XENO_DRIVERS_NET_ADDON_PROXY
++    bool "Enable ARP handling via protocol proxy"
++    default n
++    ---help---
++    Enables ARP support for the IP protocol proxy. Incoming ARP replies
++    are then delivered to both, the RTnet and the Linux network stack,
++    but only answered by Linux. The IP protocol proxy gets attached to
++    the RTnet device specified by the module parameter "rtdev_attach",
++    rteth0 by default.
++
++endmenu
+--- linux/drivers/xenomai/net/addons/cap.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/addons/cap.c	2021-04-07 16:01:27.134634299 +0800
+@@ -0,0 +1,503 @@
++/***
++ *
++ *  rtcap/rtcap.c
++ *
++ *  Real-Time Capturing Interface
++ *
++ *  Copyright (C) 2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/sched.h>
++
++#include <rtdev.h>
++#include <rtnet_chrdev.h>
++#include <rtnet_port.h> /* for netdev_priv() */
++
++MODULE_LICENSE("GPL");
++
++static unsigned int rtcap_rtskbs = 128;
++module_param(rtcap_rtskbs, uint, 0444);
++MODULE_PARM_DESC(rtcap_rtskbs, "Number of real-time socket buffers per "
++			       "real-time device");
++
++#define TAP_DEV 1
++#define RTMAC_TAP_DEV 2
++#define XMIT_HOOK 4
++
++static rtdm_nrtsig_t cap_signal;
++static struct rtskb_queue cap_queue;
++static struct rtskb_pool cap_pool;
++
++static struct tap_device_t {
++	struct net_device *tap_dev;
++	struct net_device *rtmac_tap_dev;
++	struct net_device_stats tap_dev_stats;
++	int present;
++	int (*orig_xmit)(struct rtskb *skb, struct rtnet_device *dev);
++} tap_device[MAX_RT_DEVICES];
++
++void rtcap_rx_hook(struct rtskb *rtskb)
++{
++	bool			trigger = false;
++
++	if ((rtskb->cap_comp_skb = rtskb_pool_dequeue(&cap_pool)) == 0) {
++		tap_device[rtskb->rtdev->ifindex].tap_dev_stats.rx_dropped++;
++		return;
++	}
++
++	if (cap_queue.first == NULL) {
++		cap_queue.first = rtskb;
++		trigger = true;
++	} else
++		cap_queue.last->cap_next = rtskb;
++	cap_queue.last = rtskb;
++	rtskb->cap_next = NULL;
++
++	rtskb->cap_flags |= RTSKB_CAP_SHARED;
++
++	if (trigger)
++		rtdm_nrtsig_pend(&cap_signal);
++}
++
++int rtcap_xmit_hook(struct rtskb *rtskb, struct rtnet_device *rtdev)
++{
++	struct tap_device_t *tap_dev = &tap_device[rtskb->rtdev->ifindex];
++	rtdm_lockctx_t context;
++	bool trigger = false;
++
++	if ((rtskb->cap_comp_skb = rtskb_pool_dequeue(&cap_pool)) == 0) {
++		tap_dev->tap_dev_stats.rx_dropped++;
++		return tap_dev->orig_xmit(rtskb, rtdev);
++	}
++
++	rtskb->cap_next = NULL;
++	rtskb->cap_start = rtskb->data;
++	rtskb->cap_len = rtskb->len;
++	rtskb->cap_flags |= RTSKB_CAP_SHARED;
++
++	rtskb->time_stamp = rtdm_clock_read();
++
++	rtdm_lock_get_irqsave(&rtcap_lock, context);
++
++	if (cap_queue.first == NULL) {
++		cap_queue.first = rtskb;
++		trigger = true;
++	} else
++		cap_queue.last->cap_next = rtskb;
++	cap_queue.last = rtskb;
++
++	rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++	if (trigger)
++		rtdm_nrtsig_pend(&cap_signal);
++
++	return tap_dev->orig_xmit(rtskb, rtdev);
++}
++
++int rtcap_loopback_xmit_hook(struct rtskb *rtskb, struct rtnet_device *rtdev)
++{
++	struct tap_device_t *tap_dev = &tap_device[rtskb->rtdev->ifindex];
++
++	rtskb->time_stamp = rtdm_clock_read();
++
++	return tap_dev->orig_xmit(rtskb, rtdev);
++}
++
++void rtcap_kfree_rtskb(struct rtskb *rtskb)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *comp_skb;
++
++	rtdm_lock_get_irqsave(&rtcap_lock, context);
++
++	if (rtskb->cap_flags & RTSKB_CAP_SHARED) {
++		rtskb->cap_flags &= ~RTSKB_CAP_SHARED;
++
++		comp_skb = rtskb->cap_comp_skb;
++
++		rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++		rtskb_pool_queue_tail(comp_skb->pool, comp_skb);
++
++		return;
++	}
++
++	rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++	rtskb->chain_end = rtskb;
++	rtskb_pool_queue_tail(rtskb->pool, rtskb);
++}
++
++static void convert_timestamp(nanosecs_abs_t timestamp, struct sk_buff *skb)
++{
++#ifdef CONFIG_KTIME_SCALAR
++	skb->tstamp.tv64 = timestamp;
++#else /* !CONFIG_KTIME_SCALAR */
++	unsigned long rem;
++
++	rem = do_div(timestamp, NSEC_PER_SEC);
++	skb->tstamp = ktime_set((long)timestamp, rem);
++#endif /* !CONFIG_KTIME_SCALAR */
++}
++
++static void rtcap_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg)
++{
++	struct rtskb *rtskb;
++	struct sk_buff *skb;
++	struct sk_buff *rtmac_skb;
++	struct net_device_stats *stats;
++	int ifindex;
++	int active;
++	rtdm_lockctx_t context;
++
++	while (1) {
++		rtdm_lock_get_irqsave(&rtcap_lock, context);
++
++		if ((rtskb = cap_queue.first) == NULL) {
++			rtdm_lock_put_irqrestore(&rtcap_lock, context);
++			break;
++		}
++
++		cap_queue.first = rtskb->cap_next;
++
++		rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++		ifindex = rtskb->rtdev->ifindex;
++		active = tap_device[ifindex].present;
++
++		if (active) {
++			if ((tap_device[ifindex].tap_dev->flags & IFF_UP) == 0)
++				active &= ~TAP_DEV;
++			if (active & RTMAC_TAP_DEV &&
++			    !(tap_device[ifindex].rtmac_tap_dev->flags &
++			      IFF_UP))
++				active &= ~RTMAC_TAP_DEV;
++		}
++
++		if (active == 0) {
++			tap_device[ifindex].tap_dev_stats.rx_dropped++;
++			rtcap_kfree_rtskb(rtskb);
++			continue;
++		}
++
++		skb = dev_alloc_skb(rtskb->cap_len);
++		if (skb) {
++			memcpy(skb_put(skb, rtskb->cap_len), rtskb->cap_start,
++			       rtskb->cap_len);
++
++			if (active & TAP_DEV) {
++				skb->dev = tap_device[ifindex].tap_dev;
++				skb->protocol = eth_type_trans(skb, skb->dev);
++				convert_timestamp(rtskb->time_stamp, skb);
++
++				rtmac_skb = NULL;
++				if ((rtskb->cap_flags &
++				     RTSKB_CAP_RTMAC_STAMP) &&
++				    (active & RTMAC_TAP_DEV)) {
++					rtmac_skb = skb_clone(skb, GFP_ATOMIC);
++					if (rtmac_skb != NULL)
++						convert_timestamp(
++							rtskb->cap_rtmac_stamp,
++							rtmac_skb);
++				}
++
++				rtcap_kfree_rtskb(rtskb);
++
++				stats = &tap_device[ifindex].tap_dev_stats;
++				stats->rx_packets++;
++				stats->rx_bytes += skb->len;
++
++				if (rtmac_skb != NULL) {
++					rtmac_skb->dev = tap_device[ifindex]
++								 .rtmac_tap_dev;
++					netif_rx(rtmac_skb);
++				}
++				netif_rx(skb);
++			} else if (rtskb->cap_flags & RTSKB_CAP_RTMAC_STAMP) {
++				skb->dev = tap_device[ifindex].rtmac_tap_dev;
++				skb->protocol = eth_type_trans(skb, skb->dev);
++				convert_timestamp(rtskb->cap_rtmac_stamp, skb);
++
++				rtcap_kfree_rtskb(rtskb);
++
++				stats = &tap_device[ifindex].tap_dev_stats;
++				stats->rx_packets++;
++				stats->rx_bytes += skb->len;
++
++				netif_rx(skb);
++			} else {
++				dev_kfree_skb(skb);
++				rtcap_kfree_rtskb(rtskb);
++			}
++		} else {
++			printk("RTcap: unable to allocate linux skb\n");
++			rtcap_kfree_rtskb(rtskb);
++		}
++	}
++}
++
++static int tap_dev_open(struct net_device *dev)
++{
++	int err;
++
++	err = try_module_get(THIS_MODULE);
++	if (err == 0)
++		return -EIDRM;
++
++	memcpy(dev->dev_addr,
++	       (*(struct rtnet_device **)netdev_priv(dev))->dev_addr,
++	       MAX_ADDR_LEN);
++
++	return 0;
++}
++
++static int tap_dev_stop(struct net_device *dev)
++{
++	module_put(THIS_MODULE);
++	return 0;
++}
++
++static int tap_dev_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	netif_stop_queue(dev);
++	return 1;
++}
++
++static struct net_device_stats *tap_dev_get_stats(struct net_device *dev)
++{
++	struct rtnet_device *rtdev = *(struct rtnet_device **)netdev_priv(dev);
++
++	return &tap_device[rtdev->ifindex].tap_dev_stats;
++}
++
++static int tap_dev_change_mtu(struct net_device *dev, int new_mtu)
++{
++	return -EINVAL;
++}
++
++static const struct net_device_ops tap_netdev_ops = {
++	.ndo_open = tap_dev_open,
++	.ndo_stop = tap_dev_stop,
++	.ndo_start_xmit = tap_dev_xmit,
++	.ndo_get_stats = tap_dev_get_stats,
++	.ndo_change_mtu = tap_dev_change_mtu,
++};
++
++static void tap_dev_setup(struct net_device *dev)
++{
++	ether_setup(dev);
++
++	dev->netdev_ops = &tap_netdev_ops;
++	dev->mtu = 1500;
++	dev->flags &= ~IFF_MULTICAST;
++}
++
++void cleanup_tap_devices(void)
++{
++	int i;
++	struct rtnet_device *rtdev;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++)
++		if ((tap_device[i].present & TAP_DEV) != 0) {
++			if ((tap_device[i].present & XMIT_HOOK) != 0) {
++				rtdev = *(struct rtnet_device **)netdev_priv(
++					tap_device[i].tap_dev);
++
++				mutex_lock(&rtdev->nrt_lock);
++				rtdev->hard_start_xmit =
++					tap_device[i].orig_xmit;
++				if (rtdev->features & NETIF_F_LLTX)
++					rtdev->start_xmit =
++						tap_device[i].orig_xmit;
++				mutex_unlock(&rtdev->nrt_lock);
++
++				rtdev_dereference(rtdev);
++			}
++
++			if ((tap_device[i].present & RTMAC_TAP_DEV) != 0) {
++				unregister_netdev(tap_device[i].rtmac_tap_dev);
++				free_netdev(tap_device[i].rtmac_tap_dev);
++			}
++
++			unregister_netdev(tap_device[i].tap_dev);
++			free_netdev(tap_device[i].tap_dev);
++		}
++}
++
++int __init rtcap_init(void)
++{
++	struct rtnet_device *rtdev;
++	struct net_device *dev;
++	int ret;
++	int devices = 0;
++	int i;
++
++	printk("RTcap: real-time capturing interface\n");
++
++	rtskb_queue_init(&cap_queue);
++
++	rtdm_nrtsig_init(&cap_signal, rtcap_signal_handler, NULL);
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		tap_device[i].present = 0;
++
++		rtdev = rtdev_get_by_index(i);
++		if (rtdev != NULL) {
++			mutex_lock(&rtdev->nrt_lock);
++
++			if (test_bit(PRIV_FLAG_UP, &rtdev->priv_flags)) {
++				mutex_unlock(&rtdev->nrt_lock);
++				printk("RTcap: %s busy, skipping device!\n",
++				       rtdev->name);
++				rtdev_dereference(rtdev);
++				continue;
++			}
++
++			if (rtdev->mac_priv != NULL) {
++				mutex_unlock(&rtdev->nrt_lock);
++
++				printk("RTcap: RTmac discipline already active on device %s. "
++				       "Load RTcap before RTmac!\n",
++				       rtdev->name);
++
++				rtdev_dereference(rtdev);
++				continue;
++			}
++
++			memset(&tap_device[i].tap_dev_stats, 0,
++			       sizeof(struct net_device_stats));
++
++			dev = alloc_netdev(sizeof(struct rtnet_device *),
++					   rtdev->name, NET_NAME_UNKNOWN,
++					   tap_dev_setup);
++			if (!dev) {
++				ret = -ENOMEM;
++				goto error3;
++			}
++
++			tap_device[i].tap_dev = dev;
++			*(struct rtnet_device **)netdev_priv(dev) = rtdev;
++
++			ret = register_netdev(dev);
++			if (ret < 0)
++				goto error3;
++
++			tap_device[i].present = TAP_DEV;
++
++			tap_device[i].orig_xmit = rtdev->hard_start_xmit;
++
++			if ((rtdev->flags & IFF_LOOPBACK) == 0) {
++				dev = alloc_netdev(
++					sizeof(struct rtnet_device *),
++					rtdev->name, NET_NAME_UNKNOWN,
++					tap_dev_setup);
++				if (!dev) {
++					ret = -ENOMEM;
++					goto error3;
++				}
++
++				tap_device[i].rtmac_tap_dev = dev;
++				*(struct rtnet_device **)netdev_priv(dev) =
++					rtdev;
++				strncat(dev->name, "-mac",
++					IFNAMSIZ - strlen(dev->name));
++
++				ret = register_netdev(dev);
++				if (ret < 0)
++					goto error3;
++
++				tap_device[i].present |= RTMAC_TAP_DEV;
++
++				rtdev->hard_start_xmit = rtcap_xmit_hook;
++			} else
++				rtdev->hard_start_xmit =
++					rtcap_loopback_xmit_hook;
++
++			/* If the device requires no xmit_lock, start_xmit points equals
++	     * hard_start_xmit => we have to update this as well
++	     */
++			if (rtdev->features & NETIF_F_LLTX)
++				rtdev->start_xmit = rtdev->hard_start_xmit;
++
++			tap_device[i].present |= XMIT_HOOK;
++
++			mutex_unlock(&rtdev->nrt_lock);
++
++			devices++;
++		}
++	}
++
++	if (devices == 0) {
++		printk("RTcap: no real-time devices found!\n");
++		ret = -ENODEV;
++		goto error2;
++	}
++
++	if (rtskb_module_pool_init(&cap_pool, rtcap_rtskbs * devices) <
++	    rtcap_rtskbs * devices) {
++		rtskb_pool_release(&cap_pool);
++		ret = -ENOMEM;
++		goto error2;
++	}
++
++	/* register capturing handlers with RTnet core
++     * (adding the handler need no locking) */
++	rtcap_handler = rtcap_rx_hook;
++
++	return 0;
++
++error3:
++	mutex_unlock(&rtdev->nrt_lock);
++	rtdev_dereference(rtdev);
++	printk("RTcap: unable to register %s!\n", dev->name);
++
++error2:
++	cleanup_tap_devices();
++	rtdm_nrtsig_destroy(&cap_signal);
++
++	return ret;
++}
++
++void rtcap_cleanup(void)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_nrtsig_destroy(&cap_signal);
++
++	/* unregister capturing handlers
++     * (take lock to avoid any unloading code before handler was left) */
++	rtdm_lock_get_irqsave(&rtcap_lock, context);
++	rtcap_handler = NULL;
++	rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++	/* empty queue (should be already empty) */
++	rtcap_signal_handler(0, NULL /* we ignore them anyway */);
++
++	cleanup_tap_devices();
++
++	rtskb_pool_release(&cap_pool);
++
++	printk("RTcap: unloaded\n");
++}
++
++module_init(rtcap_init);
++module_exit(rtcap_cleanup);
+--- linux/drivers/xenomai/net/stack/rtskb.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtskb.c	2021-04-07 16:01:27.129634306 +0800
+@@ -0,0 +1,535 @@
++/***
++ *
++ *  stack/rtskb.c - rtskb implementation for rtnet
++ *
++ *  Copyright (C) 2002      Ulrich Marx <marx@fet.uni-hannover.de>,
++ *  Copyright (C) 2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *  Copyright (C) 2006 Jorge Almeida <j-almeida@criticalsoftware.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of version 2 of the GNU General Public License as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/slab.h>
++#include <net/checksum.h>
++
++#include <rtdev.h>
++#include <rtnet_internal.h>
++#include <rtskb.h>
++#include <rtnet_port.h>
++
++static unsigned int global_rtskbs = DEFAULT_GLOBAL_RTSKBS;
++module_param(global_rtskbs, uint, 0444);
++MODULE_PARM_DESC(global_rtskbs,
++		 "Number of realtime socket buffers in global pool");
++
++/* Linux slab pool for rtskbs */
++static struct kmem_cache *rtskb_slab_pool;
++
++/* pool of rtskbs for global use */
++struct rtskb_pool global_pool;
++EXPORT_SYMBOL_GPL(global_pool);
++
++/* pool statistics */
++unsigned int rtskb_pools = 0;
++unsigned int rtskb_pools_max = 0;
++unsigned int rtskb_amount = 0;
++unsigned int rtskb_amount_max = 0;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++/* RTcap interface */
++rtdm_lock_t rtcap_lock;
++EXPORT_SYMBOL_GPL(rtcap_lock);
++
++void (*rtcap_handler)(struct rtskb *skb) = NULL;
++EXPORT_SYMBOL_GPL(rtcap_handler);
++#endif
++
++/***
++ *  rtskb_copy_and_csum_bits
++ */
++unsigned int rtskb_copy_and_csum_bits(const struct rtskb *skb, int offset,
++				      u8 *to, int len, unsigned int csum)
++{
++	int copy;
++
++	/* Copy header. */
++	if ((copy = skb->len - offset) > 0) {
++		if (copy > len)
++			copy = len;
++		csum = csum_partial_copy_nocheck(skb->data + offset, to, copy,
++						 csum);
++		if ((len -= copy) == 0)
++			return csum;
++		offset += copy;
++		to += copy;
++	}
++
++	RTNET_ASSERT(len == 0, );
++	return csum;
++}
++
++EXPORT_SYMBOL_GPL(rtskb_copy_and_csum_bits);
++
++/***
++ *  rtskb_copy_and_csum_dev
++ */
++void rtskb_copy_and_csum_dev(const struct rtskb *skb, u8 *to)
++{
++	unsigned int csum;
++	unsigned int csstart;
++
++	if (skb->ip_summed == CHECKSUM_PARTIAL) {
++		csstart = skb->h.raw - skb->data;
++
++		if (csstart > skb->len)
++			BUG();
++	} else
++		csstart = skb->len;
++
++	memcpy(to, skb->data, csstart);
++
++	csum = 0;
++	if (csstart != skb->len)
++		csum = rtskb_copy_and_csum_bits(skb, csstart, to + csstart,
++						skb->len - csstart, 0);
++
++	if (skb->ip_summed == CHECKSUM_PARTIAL) {
++		unsigned int csstuff = csstart + skb->csum;
++
++		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
++	}
++}
++
++EXPORT_SYMBOL_GPL(rtskb_copy_and_csum_dev);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++/**
++ *  skb_over_panic - private function
++ *  @skb: buffer
++ *  @sz: size
++ *  @here: address
++ *
++ *  Out of line support code for rtskb_put(). Not user callable.
++ */
++void rtskb_over_panic(struct rtskb *skb, int sz, void *here)
++{
++	rtdm_printk("RTnet: rtskb_put :over: %p:%d put:%d dev:%s\n", here,
++		    skb->len, sz, (skb->rtdev) ? skb->rtdev->name : "<NULL>");
++}
++
++EXPORT_SYMBOL_GPL(rtskb_over_panic);
++
++/**
++ *  skb_under_panic - private function
++ *  @skb: buffer
++ *  @sz: size
++ *  @here: address
++ *
++ *  Out of line support code for rtskb_push(). Not user callable.
++ */
++void rtskb_under_panic(struct rtskb *skb, int sz, void *here)
++{
++	rtdm_printk("RTnet: rtskb_push :under: %p:%d put:%d dev:%s\n", here,
++		    skb->len, sz, (skb->rtdev) ? skb->rtdev->name : "<NULL>");
++}
++
++EXPORT_SYMBOL_GPL(rtskb_under_panic);
++#endif /* CONFIG_XENO_DRIVERS_NET_CHECKED */
++
++static struct rtskb *__rtskb_pool_dequeue(struct rtskb_pool *pool)
++{
++	struct rtskb_queue *queue = &pool->queue;
++	struct rtskb *skb;
++
++	if (pool->lock_ops && !pool->lock_ops->trylock(pool->lock_cookie))
++		return NULL;
++	skb = __rtskb_dequeue(queue);
++	if (skb == NULL && pool->lock_ops)
++		pool->lock_ops->unlock(pool->lock_cookie);
++
++	return skb;
++}
++
++struct rtskb *rtskb_pool_dequeue(struct rtskb_pool *pool)
++{
++	struct rtskb_queue *queue = &pool->queue;
++	rtdm_lockctx_t context;
++	struct rtskb *skb;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	skb = __rtskb_pool_dequeue(pool);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++
++	return skb;
++}
++EXPORT_SYMBOL_GPL(rtskb_pool_dequeue);
++
++static void __rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb)
++{
++	struct rtskb_queue *queue = &pool->queue;
++
++	__rtskb_queue_tail(queue, skb);
++	if (pool->lock_ops)
++		pool->lock_ops->unlock(pool->lock_cookie);
++}
++
++void rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb)
++{
++	struct rtskb_queue *queue = &pool->queue;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	__rtskb_pool_queue_tail(pool, skb);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++}
++EXPORT_SYMBOL_GPL(rtskb_pool_queue_tail);
++
++/***
++ *  alloc_rtskb - allocate an rtskb from a pool
++ *  @size: required buffer size (to check against maximum boundary)
++ *  @pool: pool to take the rtskb from
++ */
++struct rtskb *alloc_rtskb(unsigned int size, struct rtskb_pool *pool)
++{
++	struct rtskb *skb;
++
++	RTNET_ASSERT(size <= SKB_DATA_ALIGN(RTSKB_SIZE), return NULL;);
++
++	skb = rtskb_pool_dequeue(pool);
++	if (!skb)
++		return NULL;
++
++	/* Load the data pointers. */
++	skb->data = skb->buf_start;
++	skb->tail = skb->buf_start;
++	skb->end = skb->buf_start + size;
++
++	/* Set up other states */
++	skb->chain_end = skb;
++	skb->len = 0;
++	skb->pkt_type = PACKET_HOST;
++	skb->xmit_stamp = NULL;
++	skb->ip_summed = CHECKSUM_NONE;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++	skb->cap_flags = 0;
++#endif
++
++	return skb;
++}
++
++EXPORT_SYMBOL_GPL(alloc_rtskb);
++
++/***
++ *  kfree_rtskb
++ *  @skb    rtskb
++ */
++void kfree_rtskb(struct rtskb *skb)
++{
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++	rtdm_lockctx_t context;
++	struct rtskb *comp_skb;
++	struct rtskb *next_skb;
++	struct rtskb *chain_end;
++#endif
++
++	RTNET_ASSERT(skb != NULL, return;);
++	RTNET_ASSERT(skb->pool != NULL, return;);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++	next_skb = skb;
++	chain_end = skb->chain_end;
++
++	do {
++		skb = next_skb;
++		next_skb = skb->next;
++
++		rtdm_lock_get_irqsave(&rtcap_lock, context);
++
++		if (skb->cap_flags & RTSKB_CAP_SHARED) {
++			skb->cap_flags &= ~RTSKB_CAP_SHARED;
++
++			comp_skb = skb->cap_comp_skb;
++			skb->pool = xchg(&comp_skb->pool, skb->pool);
++
++			rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++			rtskb_pool_queue_tail(comp_skb->pool, comp_skb);
++		} else {
++			rtdm_lock_put_irqrestore(&rtcap_lock, context);
++
++			skb->chain_end = skb;
++			rtskb_pool_queue_tail(skb->pool, skb);
++		}
++
++	} while (chain_end != skb);
++
++#else /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */
++
++	rtskb_pool_queue_tail(skb->pool, skb);
++
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */
++}
++
++EXPORT_SYMBOL_GPL(kfree_rtskb);
++
++/***
++ *  rtskb_pool_init
++ *  @pool: pool to be initialized
++ *  @initial_size: number of rtskbs to allocate
++ *  return: number of actually allocated rtskbs
++ */
++unsigned int rtskb_pool_init(struct rtskb_pool *pool, unsigned int initial_size,
++			     const struct rtskb_pool_lock_ops *lock_ops,
++			     void *lock_cookie)
++{
++	unsigned int i;
++
++	rtskb_queue_init(&pool->queue);
++
++	i = rtskb_pool_extend(pool, initial_size);
++
++	rtskb_pools++;
++	if (rtskb_pools > rtskb_pools_max)
++		rtskb_pools_max = rtskb_pools;
++
++	pool->lock_ops = lock_ops;
++	pool->lock_cookie = lock_cookie;
++
++	return i;
++}
++
++EXPORT_SYMBOL_GPL(rtskb_pool_init);
++
++static int rtskb_module_pool_trylock(void *cookie)
++{
++	int err = 1;
++	if (cookie)
++		err = try_module_get(cookie);
++	return err;
++}
++
++static void rtskb_module_pool_unlock(void *cookie)
++{
++	if (cookie)
++		module_put(cookie);
++}
++
++static const struct rtskb_pool_lock_ops rtskb_module_lock_ops = {
++	.trylock = rtskb_module_pool_trylock,
++	.unlock = rtskb_module_pool_unlock,
++};
++
++unsigned int __rtskb_module_pool_init(struct rtskb_pool *pool,
++				      unsigned int initial_size,
++				      struct module *module)
++{
++	return rtskb_pool_init(pool, initial_size, &rtskb_module_lock_ops,
++			       module);
++}
++EXPORT_SYMBOL_GPL(__rtskb_module_pool_init);
++
++/***
++ *  __rtskb_pool_release
++ *  @pool: pool to release
++ */
++void rtskb_pool_release(struct rtskb_pool *pool)
++{
++	struct rtskb *skb;
++
++	while ((skb = rtskb_dequeue(&pool->queue)) != NULL) {
++		rtdev_unmap_rtskb(skb);
++		kmem_cache_free(rtskb_slab_pool, skb);
++		rtskb_amount--;
++	}
++
++	rtskb_pools--;
++}
++
++EXPORT_SYMBOL_GPL(rtskb_pool_release);
++
++unsigned int rtskb_pool_extend(struct rtskb_pool *pool, unsigned int add_rtskbs)
++{
++	unsigned int i;
++	struct rtskb *skb;
++
++	RTNET_ASSERT(pool != NULL, return -EINVAL;);
++
++	for (i = 0; i < add_rtskbs; i++) {
++		/* get rtskb from slab pool */
++		if (!(skb = kmem_cache_alloc(rtskb_slab_pool, GFP_KERNEL))) {
++			printk(KERN_ERR
++			       "RTnet: rtskb allocation from slab pool failed\n");
++			break;
++		}
++
++		/* fill the header with zero */
++		memset(skb, 0, sizeof(struct rtskb));
++
++		skb->chain_end = skb;
++		skb->pool = pool;
++		skb->buf_start =
++			((unsigned char *)skb) + ALIGN_RTSKB_STRUCT_LEN;
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++		skb->buf_end = skb->buf_start + SKB_DATA_ALIGN(RTSKB_SIZE) - 1;
++#endif
++
++		if (rtdev_map_rtskb(skb) < 0) {
++			kmem_cache_free(rtskb_slab_pool, skb);
++			break;
++		}
++
++		rtskb_queue_tail(&pool->queue, skb);
++
++		rtskb_amount++;
++		if (rtskb_amount > rtskb_amount_max)
++			rtskb_amount_max = rtskb_amount;
++	}
++
++	return i;
++}
++
++unsigned int rtskb_pool_shrink(struct rtskb_pool *pool, unsigned int rem_rtskbs)
++{
++	unsigned int i;
++	struct rtskb *skb;
++
++	for (i = 0; i < rem_rtskbs; i++) {
++		if ((skb = rtskb_dequeue(&pool->queue)) == NULL)
++			break;
++
++		rtdev_unmap_rtskb(skb);
++		kmem_cache_free(rtskb_slab_pool, skb);
++		rtskb_amount--;
++	}
++
++	return i;
++}
++
++/* Note: acquires only the first skb of a chain! */
++int rtskb_acquire(struct rtskb *rtskb, struct rtskb_pool *comp_pool)
++{
++	struct rtskb *comp_rtskb;
++	struct rtskb_pool *release_pool;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&comp_pool->queue.lock, context);
++
++	comp_rtskb = __rtskb_pool_dequeue(comp_pool);
++	if (!comp_rtskb) {
++		rtdm_lock_put_irqrestore(&comp_pool->queue.lock, context);
++		return -ENOMEM;
++	}
++
++	rtdm_lock_put(&comp_pool->queue.lock);
++
++	comp_rtskb->chain_end = comp_rtskb;
++	comp_rtskb->pool = release_pool = rtskb->pool;
++
++	rtdm_lock_get(&release_pool->queue.lock);
++
++	__rtskb_pool_queue_tail(release_pool, comp_rtskb);
++
++	rtdm_lock_put_irqrestore(&release_pool->queue.lock, context);
++
++	rtskb->pool = comp_pool;
++
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(rtskb_acquire);
++
++/* clone rtskb to another, allocating the new rtskb from pool */
++struct rtskb *rtskb_clone(struct rtskb *rtskb, struct rtskb_pool *pool)
++{
++	struct rtskb *clone_rtskb;
++	unsigned int total_len;
++
++	clone_rtskb = alloc_rtskb(rtskb->end - rtskb->buf_start, pool);
++	if (clone_rtskb == NULL)
++		return NULL;
++
++	/* Note: We don't clone
++	- rtskb.sk
++	- rtskb.xmit_stamp
++       until real use cases show up. */
++
++	clone_rtskb->priority = rtskb->priority;
++	clone_rtskb->rtdev = rtskb->rtdev;
++	clone_rtskb->time_stamp = rtskb->time_stamp;
++
++	clone_rtskb->mac.raw = clone_rtskb->buf_start;
++	clone_rtskb->nh.raw = clone_rtskb->buf_start;
++	clone_rtskb->h.raw = clone_rtskb->buf_start;
++
++	clone_rtskb->data += rtskb->data - rtskb->buf_start;
++	clone_rtskb->tail += rtskb->tail - rtskb->buf_start;
++	clone_rtskb->mac.raw += rtskb->mac.raw - rtskb->buf_start;
++	clone_rtskb->nh.raw += rtskb->nh.raw - rtskb->buf_start;
++	clone_rtskb->h.raw += rtskb->h.raw - rtskb->buf_start;
++
++	clone_rtskb->protocol = rtskb->protocol;
++	clone_rtskb->pkt_type = rtskb->pkt_type;
++
++	clone_rtskb->ip_summed = rtskb->ip_summed;
++	clone_rtskb->csum = rtskb->csum;
++
++	total_len = rtskb->len + rtskb->data - rtskb->mac.raw;
++	memcpy(clone_rtskb->mac.raw, rtskb->mac.raw, total_len);
++	clone_rtskb->len = rtskb->len;
++
++	return clone_rtskb;
++}
++
++EXPORT_SYMBOL_GPL(rtskb_clone);
++
++int rtskb_pools_init(void)
++{
++	rtskb_slab_pool = kmem_cache_create("rtskb_slab_pool",
++					    ALIGN_RTSKB_STRUCT_LEN +
++						    SKB_DATA_ALIGN(RTSKB_SIZE),
++					    0, SLAB_HWCACHE_ALIGN, NULL);
++	if (rtskb_slab_pool == NULL)
++		return -ENOMEM;
++
++	/* reset the statistics (cache is accounted separately) */
++	rtskb_pools = 0;
++	rtskb_pools_max = 0;
++	rtskb_amount = 0;
++	rtskb_amount_max = 0;
++
++	/* create the global rtskb pool */
++	if (rtskb_module_pool_init(&global_pool, global_rtskbs) < global_rtskbs)
++		goto err_out;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++	rtdm_lock_init(&rtcap_lock);
++#endif
++
++	return 0;
++
++err_out:
++	rtskb_pool_release(&global_pool);
++	kmem_cache_destroy(rtskb_slab_pool);
++
++	return -ENOMEM;
++}
++
++void rtskb_pools_release(void)
++{
++	rtskb_pool_release(&global_pool);
++	kmem_cache_destroy(rtskb_slab_pool);
++}
+--- linux/drivers/xenomai/net/stack/rtnet_rtpc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtnet_rtpc.c	2021-04-07 16:01:27.124634313 +0800
+@@ -0,0 +1,258 @@
++/***
++ *
++ *  stack/rtnet_rtpc.c
++ *
++ *  RTnet - real-time networking subsystem
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/wait.h>
++
++#include <rtnet_rtpc.h>
++#include <rtdm/driver.h>
++
++static DEFINE_RTDM_LOCK(pending_calls_lock);
++static DEFINE_RTDM_LOCK(processed_calls_lock);
++static rtdm_event_t dispatch_event;
++static rtdm_task_t dispatch_task;
++static rtdm_nrtsig_t rtpc_nrt_signal;
++
++LIST_HEAD(pending_calls);
++LIST_HEAD(processed_calls);
++
++#ifndef __wait_event_interruptible_timeout
++#define __wait_event_interruptible_timeout(wq, condition, ret)                 \
++	do {                                                                   \
++		wait_queue_t __wait;                                           \
++		init_waitqueue_entry(&__wait, current);                        \
++                                                                               \
++		add_wait_queue(&wq, &__wait);                                  \
++		for (;;) {                                                     \
++			set_current_state(TASK_INTERRUPTIBLE);                 \
++			if (condition)                                         \
++				break;                                         \
++			if (!signal_pending(current)) {                        \
++				ret = schedule_timeout(ret);                   \
++				if (!ret)                                      \
++					break;                                 \
++				continue;                                      \
++			}                                                      \
++			ret = -ERESTARTSYS;                                    \
++			break;                                                 \
++		}                                                              \
++		current->state = TASK_RUNNING;                                 \
++		remove_wait_queue(&wq, &__wait);                               \
++	} while (0)
++#endif
++
++#ifndef wait_event_interruptible_timeout
++#define wait_event_interruptible_timeout(wq, condition, timeout)               \
++	({                                                                     \
++		long __ret = timeout;                                          \
++		if (!(condition))                                              \
++			__wait_event_interruptible_timeout(wq, condition,      \
++							   __ret);             \
++		__ret;                                                         \
++	})
++#endif
++
++int rtnet_rtpc_dispatch_call(rtpc_proc proc, unsigned int timeout,
++			     void *priv_data, size_t priv_data_size,
++			     rtpc_copy_back_proc copy_back_handler,
++			     rtpc_cleanup_proc cleanup_handler)
++{
++	struct rt_proc_call *call;
++	rtdm_lockctx_t context;
++	int ret;
++
++	call = kmalloc(sizeof(struct rt_proc_call) + priv_data_size,
++		       GFP_KERNEL);
++	if (call == NULL)
++		return -ENOMEM;
++
++	memcpy(call->priv_data, priv_data, priv_data_size);
++
++	call->processed = 0;
++	call->proc = proc;
++	call->result = 0;
++	call->cleanup_handler = cleanup_handler;
++	atomic_set(&call->ref_count, 2); /* dispatcher + rt-procedure */
++	init_waitqueue_head(&call->call_wq);
++
++	rtdm_lock_get_irqsave(&pending_calls_lock, context);
++	list_add_tail(&call->list_entry, &pending_calls);
++	rtdm_lock_put_irqrestore(&pending_calls_lock, context);
++
++	rtdm_event_signal(&dispatch_event);
++
++	if (timeout > 0) {
++		ret = wait_event_interruptible_timeout(
++			call->call_wq, call->processed, (timeout * HZ) / 1000);
++		if (ret == 0)
++			ret = -ETIME;
++	} else
++		ret = wait_event_interruptible(call->call_wq, call->processed);
++
++	if (ret >= 0) {
++		if (copy_back_handler != NULL)
++			copy_back_handler(call, priv_data);
++		ret = call->result;
++	}
++
++	if (atomic_dec_and_test(&call->ref_count)) {
++		if (call->cleanup_handler != NULL)
++			call->cleanup_handler(&call->priv_data);
++		kfree(call);
++	}
++
++	return ret;
++}
++
++static inline struct rt_proc_call *rtpc_dequeue_pending_call(void)
++{
++	rtdm_lockctx_t context;
++	struct rt_proc_call *call = NULL;
++
++	rtdm_lock_get_irqsave(&pending_calls_lock, context);
++	if (!list_empty(&pending_calls)) {
++		call = (struct rt_proc_call *)pending_calls.next;
++		list_del(&call->list_entry);
++	}
++	rtdm_lock_put_irqrestore(&pending_calls_lock, context);
++
++	return call;
++}
++
++static inline void rtpc_queue_processed_call(struct rt_proc_call *call)
++{
++	rtdm_lockctx_t context;
++	bool trigger;
++
++	rtdm_lock_get_irqsave(&processed_calls_lock, context);
++	trigger = list_empty(&processed_calls);
++	list_add_tail(&call->list_entry, &processed_calls);
++	rtdm_lock_put_irqrestore(&processed_calls_lock, context);
++
++	if (trigger)
++		rtdm_nrtsig_pend(&rtpc_nrt_signal);
++}
++
++static inline struct rt_proc_call *rtpc_dequeue_processed_call(void)
++{
++	rtdm_lockctx_t context;
++	struct rt_proc_call *call = NULL;
++
++	rtdm_lock_get_irqsave(&processed_calls_lock, context);
++	if (!list_empty(&processed_calls)) {
++		call = (struct rt_proc_call *)processed_calls.next;
++		list_del(&call->list_entry);
++	}
++	rtdm_lock_put_irqrestore(&processed_calls_lock, context);
++
++	return call;
++}
++
++static void rtpc_dispatch_handler(void *arg)
++{
++	struct rt_proc_call *call;
++	int ret;
++
++	while (!rtdm_task_should_stop()) {
++		if (rtdm_event_wait(&dispatch_event) < 0)
++			break;
++
++		while ((call = rtpc_dequeue_pending_call())) {
++			ret = call->proc(call);
++			if (ret != -CALL_PENDING)
++				rtpc_complete_call(call, ret);
++		}
++	}
++}
++
++static void rtpc_signal_handler(rtdm_nrtsig_t *nrt_sig, void *arg)
++{
++	struct rt_proc_call *call;
++
++	while ((call = rtpc_dequeue_processed_call()) != NULL) {
++		call->processed = 1;
++		wake_up(&call->call_wq);
++
++		if (atomic_dec_and_test(&call->ref_count)) {
++			if (call->cleanup_handler != NULL)
++				call->cleanup_handler(&call->priv_data);
++			kfree(call);
++		}
++	}
++}
++
++void rtnet_rtpc_complete_call(struct rt_proc_call *call, int result)
++{
++	call->result = result;
++	rtpc_queue_processed_call(call);
++}
++
++void rtnet_rtpc_complete_call_nrt(struct rt_proc_call *call, int result)
++{
++	RTNET_ASSERT(!rtdm_in_rt_context(),
++		     rtnet_rtpc_complete_call(call, result);
++		     return;);
++
++	call->processed = 1;
++	wake_up(&call->call_wq);
++
++	if (atomic_dec_and_test(&call->ref_count)) {
++		if (call->cleanup_handler != NULL)
++			call->cleanup_handler(&call->priv_data);
++		kfree(call);
++	}
++}
++
++int __init rtpc_init(void)
++{
++	int ret;
++
++	rtdm_nrtsig_init(&rtpc_nrt_signal, rtpc_signal_handler, NULL);
++
++	rtdm_event_init(&dispatch_event, 0);
++
++	ret = rtdm_task_init(&dispatch_task, "rtnet-rtpc",
++			     rtpc_dispatch_handler, 0,
++			     RTDM_TASK_LOWEST_PRIORITY, 0);
++	if (ret < 0) {
++		rtdm_event_destroy(&dispatch_event);
++		rtdm_nrtsig_destroy(&rtpc_nrt_signal);
++	}
++
++	return ret;
++}
++
++void rtpc_cleanup(void)
++{
++	rtdm_event_destroy(&dispatch_event);
++	rtdm_task_destroy(&dispatch_task);
++	rtdm_nrtsig_destroy(&rtpc_nrt_signal);
++}
++
++EXPORT_SYMBOL_GPL(rtnet_rtpc_dispatch_call);
++EXPORT_SYMBOL_GPL(rtnet_rtpc_complete_call);
++EXPORT_SYMBOL_GPL(rtnet_rtpc_complete_call_nrt);
+--- linux/drivers/xenomai/net/stack/rtnet_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtnet_module.c	2021-04-07 16:01:27.120634319 +0800
+@@ -0,0 +1,411 @@
++/***
++ *
++ *  stack/rtnet_module.c - module framework, proc file system
++ *
++ *  Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++
++#include <rtdev_mgr.h>
++#include <rtnet_chrdev.h>
++#include <rtnet_internal.h>
++#include <rtnet_socket.h>
++#include <rtnet_rtpc.h>
++#include <stack_mgr.h>
++#include <rtwlan.h>
++
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("RTnet stack core");
++
++struct class *rtnet_class;
++
++struct rtnet_mgr STACK_manager;
++struct rtnet_mgr RTDEV_manager;
++
++EXPORT_SYMBOL_GPL(STACK_manager);
++EXPORT_SYMBOL_GPL(RTDEV_manager);
++
++const char rtnet_rtdm_provider_name[] =
++	"(C) 1999-2008 RTnet Development Team, http://www.rtnet.org";
++
++EXPORT_SYMBOL_GPL(rtnet_rtdm_provider_name);
++
++void rtnet_corectl_register(void);
++void rtnet_corectl_unregister(void);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++/***
++ *      proc filesystem section
++ */
++struct xnvfile_directory rtnet_proc_root;
++EXPORT_SYMBOL_GPL(rtnet_proc_root);
++
++static int rtnet_devices_nrt_lock_get(struct xnvfile *vfile)
++{
++	return mutex_lock_interruptible(&rtnet_devices_nrt_lock);
++}
++
++static void rtnet_devices_nrt_lock_put(struct xnvfile *vfile)
++{
++	mutex_unlock(&rtnet_devices_nrt_lock);
++}
++
++static struct xnvfile_lock_ops rtnet_devices_nrt_lock_ops = {
++	.get = rtnet_devices_nrt_lock_get,
++	.put = rtnet_devices_nrt_lock_put,
++};
++
++static void *rtnet_devices_begin(struct xnvfile_regular_iterator *it)
++{
++	if (it->pos == 0)
++		return VFILE_SEQ_START;
++
++	return (void *)2UL;
++}
++
++static void *rtnet_devices_next(struct xnvfile_regular_iterator *it)
++{
++	if (it->pos >= MAX_RT_DEVICES)
++		return NULL;
++
++	return (void *)2UL;
++}
++
++static int rtnet_devices_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct rtnet_device *rtdev;
++
++	if (data == NULL) {
++		xnvfile_printf(it, "Index\tName\t\tFlags\n");
++		return 0;
++	}
++
++	rtdev = __rtdev_get_by_index(it->pos);
++	if (rtdev == NULL)
++		return VFILE_SEQ_SKIP;
++
++	xnvfile_printf(it, "%d\t%-15s %s%s%s%s\n", rtdev->ifindex, rtdev->name,
++		       (rtdev->flags & IFF_UP) ? "UP" : "DOWN",
++		       (rtdev->flags & IFF_BROADCAST) ? " BROADCAST" : "",
++		       (rtdev->flags & IFF_LOOPBACK) ? " LOOPBACK" : "",
++		       (rtdev->flags & IFF_PROMISC) ? " PROMISC" : "");
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_devices_vfile_ops = {
++	.begin = rtnet_devices_begin,
++	.next = rtnet_devices_next,
++	.show = rtnet_devices_show,
++};
++
++static struct xnvfile_regular rtnet_devices_vfile = {
++	.entry = { .lockops = &rtnet_devices_nrt_lock_ops, },
++	.ops = &rtnet_devices_vfile_ops,
++};
++
++static int rtnet_rtskb_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	unsigned int rtskb_len;
++
++	rtskb_len = ALIGN_RTSKB_STRUCT_LEN + SKB_DATA_ALIGN(RTSKB_SIZE);
++
++	xnvfile_printf(it,
++		       "Statistics\t\tCurrent\tMaximum\n"
++		       "rtskb pools\t\t%d\t%d\n"
++		       "rtskbs\t\t\t%d\t%d\n"
++		       "rtskb memory need\t%d\t%d\n",
++		       rtskb_pools, rtskb_pools_max, rtskb_amount,
++		       rtskb_amount_max, rtskb_amount * rtskb_len,
++		       rtskb_amount_max * rtskb_len);
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_rtskb_vfile_ops = {
++	.show = rtnet_rtskb_show,
++};
++
++static struct xnvfile_regular rtnet_rtskb_vfile = {
++	.ops = &rtnet_rtskb_vfile_ops,
++};
++
++static int rtnet_version_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	const char verstr[] = "RTnet for Xenomai v" XENO_VERSION_STRING "\n"
++			      "RTcap:      "
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++			      "yes\n"
++#else
++			      "no\n"
++#endif
++			      "rtnetproxy: "
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++			      "yes\n"
++#else
++			      "no\n"
++#endif
++			      "bug checks: "
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++			      "yes\n"
++#else
++			      "no\n"
++#endif
++		;
++
++	xnvfile_printf(it, "%s", verstr);
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_version_vfile_ops = {
++	.show = rtnet_version_show,
++};
++
++static struct xnvfile_regular rtnet_version_vfile = {
++	.ops = &rtnet_version_vfile_ops,
++};
++
++static void *rtnet_stats_begin(struct xnvfile_regular_iterator *it)
++{
++	return (void *)1UL;
++}
++
++static void *rtnet_stats_next(struct xnvfile_regular_iterator *it)
++{
++	if (it->pos >= MAX_RT_DEVICES)
++		return NULL;
++
++	return (void *)1UL;
++}
++
++static int rtnet_stats_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct net_device_stats *stats;
++	struct rtnet_device *rtdev;
++
++	if (it->pos == 0) {
++		xnvfile_printf(it,
++			       "Inter-|   Receive                            "
++			       "                    |  Transmit\n");
++		xnvfile_printf(it,
++			       " face |bytes    packets errs drop fifo frame "
++			       "compressed multicast|bytes    packets errs "
++			       "drop fifo colls carrier compressed\n");
++		return 0;
++	}
++
++	rtdev = __rtdev_get_by_index(it->pos);
++	if (rtdev == NULL)
++		return VFILE_SEQ_SKIP;
++
++	if (rtdev->get_stats == NULL) {
++		xnvfile_printf(it, "%6s: No statistics available.\n",
++			       rtdev->name);
++		return 0;
++	}
++
++	stats = rtdev->get_stats(rtdev);
++	xnvfile_printf(
++		it,
++		"%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
++		"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
++		rtdev->name, stats->rx_bytes, stats->rx_packets,
++		stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors,
++		stats->rx_fifo_errors,
++		stats->rx_length_errors + stats->rx_over_errors +
++			stats->rx_crc_errors + stats->rx_frame_errors,
++		stats->rx_compressed, stats->multicast, stats->tx_bytes,
++		stats->tx_packets, stats->tx_errors, stats->tx_dropped,
++		stats->tx_fifo_errors, stats->collisions,
++		stats->tx_carrier_errors + stats->tx_aborted_errors +
++			stats->tx_window_errors + stats->tx_heartbeat_errors,
++		stats->tx_compressed);
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_stats_vfile_ops = {
++	.begin = rtnet_stats_begin,
++	.next = rtnet_stats_next,
++	.show = rtnet_stats_show,
++};
++
++static struct xnvfile_regular rtnet_stats_vfile = {
++	.entry = { .lockops = &rtnet_devices_nrt_lock_ops, },
++	.ops = &rtnet_stats_vfile_ops,
++};
++
++static int rtnet_proc_register(void)
++{
++	int err;
++
++	err = xnvfile_init_dir("rtnet", &rtnet_proc_root, NULL);
++	if (err < 0)
++		goto error1;
++
++	err = xnvfile_init_regular("devices", &rtnet_devices_vfile,
++				   &rtnet_proc_root);
++	if (err < 0)
++		goto error2;
++
++	err = xnvfile_init_regular("rtskb", &rtnet_rtskb_vfile,
++				   &rtnet_proc_root);
++	if (err < 0)
++		goto error3;
++
++	err = xnvfile_init_regular("version", &rtnet_version_vfile,
++				   &rtnet_proc_root);
++	if (err < 0)
++		goto error4;
++
++	err = xnvfile_init_regular("stats", &rtnet_stats_vfile,
++				   &rtnet_proc_root);
++	if (err < 0)
++		goto error5;
++
++	return 0;
++
++error5:
++	xnvfile_destroy_regular(&rtnet_version_vfile);
++
++error4:
++	xnvfile_destroy_regular(&rtnet_rtskb_vfile);
++
++error3:
++	xnvfile_destroy_regular(&rtnet_devices_vfile);
++
++error2:
++	xnvfile_destroy_dir(&rtnet_proc_root);
++
++error1:
++	printk("RTnet: unable to initialize /proc entries\n");
++	return err;
++}
++
++static void rtnet_proc_unregister(void)
++{
++	xnvfile_destroy_regular(&rtnet_stats_vfile);
++	xnvfile_destroy_regular(&rtnet_version_vfile);
++	xnvfile_destroy_regular(&rtnet_rtskb_vfile);
++	xnvfile_destroy_regular(&rtnet_devices_vfile);
++	xnvfile_destroy_dir(&rtnet_proc_root);
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++/**
++ *  rtnet_init()
++ */
++int __init rtnet_init(void)
++{
++	int err = 0;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	printk("\n*** RTnet for Xenomai v" XENO_VERSION_STRING " ***\n\n");
++	printk("RTnet: initialising real-time networking\n");
++
++	rtnet_class = class_create(THIS_MODULE, "rtnet");
++	if (IS_ERR(rtnet_class))
++		return PTR_ERR(rtnet_class);
++
++	if ((err = rtskb_pools_init()) != 0)
++		goto err_out1;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	if ((err = rtnet_proc_register()) != 0)
++		goto err_out2;
++#endif
++
++	/* initialize the Stack-Manager */
++	if ((err = rt_stack_mgr_init(&STACK_manager)) != 0)
++		goto err_out3;
++
++	/* initialize the RTDEV-Manager */
++	if ((err = rt_rtdev_mgr_init(&RTDEV_manager)) != 0)
++		goto err_out4;
++
++	rtnet_chrdev_init();
++
++	if ((err = rtwlan_init()) != 0)
++		goto err_out5;
++
++	if ((err = rtpc_init()) != 0)
++		goto err_out6;
++
++	rtnet_corectl_register();
++
++	return 0;
++
++err_out6:
++	rtwlan_exit();
++
++err_out5:
++	rtnet_chrdev_release();
++	rt_rtdev_mgr_delete(&RTDEV_manager);
++
++err_out4:
++	rt_stack_mgr_delete(&STACK_manager);
++
++err_out3:
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtnet_proc_unregister();
++
++err_out2:
++#endif
++	rtskb_pools_release();
++
++err_out1:
++	class_destroy(rtnet_class);
++
++	return err;
++}
++
++/**
++ *  rtnet_release()
++ */
++void __exit rtnet_release(void)
++{
++	rtnet_corectl_unregister();
++
++	rtpc_cleanup();
++
++	rtwlan_exit();
++
++	rtnet_chrdev_release();
++
++	rt_stack_mgr_delete(&STACK_manager);
++	rt_rtdev_mgr_delete(&RTDEV_manager);
++
++	rtskb_pools_release();
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtnet_proc_unregister();
++#endif
++
++	class_destroy(rtnet_class);
++
++	printk("RTnet: unloaded\n");
++}
++
++module_init(rtnet_init);
++module_exit(rtnet_release);
+--- linux/drivers/xenomai/net/stack/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/Makefile	2021-04-07 16:01:27.115634326 +0800
+@@ -0,0 +1,26 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include -Ikernel/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4) += ipv4/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTPACKET) += packet/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTMAC) += rtmac/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTCFG) += rtcfg/
++
++obj-$(CONFIG_XENO_DRIVERS_NET) += rtnet.o
++
++rtnet-y :=  \
++	corectl.o \
++	iovec.o \
++	rtdev.o \
++	rtdev_mgr.o \
++	rtnet_chrdev.o \
++	rtnet_module.o \
++	rtnet_rtpc.o \
++	rtskb.o \
++	socket.o \
++	stack_mgr.o \
++	eth.o
++
++rtnet-$(CONFIG_XENO_DRIVERS_NET_RTWLAN) += rtwlan.o
+--- linux/drivers/xenomai/net/stack/packet/af_packet.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/packet/af_packet.c	2021-04-07 16:01:27.110634333 +0800
+@@ -0,0 +1,670 @@
++/***
++ *
++ *  packet/af_packet.c
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *  Copyright (C) 2006 Jorge Almeida <j-almeida@criticalsoftware.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++
++#include <rtnet_iovec.h>
++#include <rtnet_socket.h>
++#include <stack_mgr.h>
++
++MODULE_LICENSE("GPL");
++
++/***
++ *  rt_packet_rcv
++ */
++static int rt_packet_rcv(struct rtskb *skb, struct rtpacket_type *pt)
++{
++	struct rtsocket *sock =
++		container_of(pt, struct rtsocket, prot.packet.packet_type);
++	int ifindex = sock->prot.packet.ifindex;
++	void (*callback_func)(struct rtdm_fd *, void *);
++	void *callback_arg;
++	rtdm_lockctx_t context;
++
++	if (unlikely((ifindex != 0) && (ifindex != skb->rtdev->ifindex)))
++		return -EUNATCH;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL
++	if (pt->type == htons(ETH_P_ALL)) {
++		struct rtskb *clone_skb = rtskb_clone(skb, &sock->skb_pool);
++		if (clone_skb == NULL)
++			goto out;
++		skb = clone_skb;
++	} else
++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++		if (unlikely(rtskb_acquire(skb, &sock->skb_pool) < 0)) {
++		kfree_rtskb(skb);
++		goto out;
++	}
++
++	rtskb_queue_tail(&sock->incoming, skb);
++	rtdm_sem_up(&sock->pending_sem);
++
++	rtdm_lock_get_irqsave(&sock->param_lock, context);
++	callback_func = sock->callback_func;
++	callback_arg = sock->callback_arg;
++	rtdm_lock_put_irqrestore(&sock->param_lock, context);
++
++	if (callback_func)
++		callback_func(rt_socket_fd(sock), callback_arg);
++
++out:
++	return 0;
++}
++
++static bool rt_packet_trylock(struct rtpacket_type *pt)
++{
++	struct rtsocket *sock =
++		container_of(pt, struct rtsocket, prot.packet.packet_type);
++	struct rtdm_fd *fd = rtdm_private_to_fd(sock);
++
++	if (rtdm_fd_lock(fd) < 0)
++		return false;
++
++	return true;
++}
++
++static void rt_packet_unlock(struct rtpacket_type *pt)
++{
++	struct rtsocket *sock =
++		container_of(pt, struct rtsocket, prot.packet.packet_type);
++	struct rtdm_fd *fd = rtdm_private_to_fd(sock);
++
++	rtdm_fd_unlock(fd);
++}
++
++/***
++ *  rt_packet_bind
++ */
++static int rt_packet_bind(struct rtdm_fd *fd, struct rtsocket *sock,
++			  const struct sockaddr *addr, socklen_t addrlen)
++{
++	struct sockaddr_ll _sll, *sll;
++	struct rtpacket_type *pt = &sock->prot.packet.packet_type;
++	int new_type;
++	int ret;
++	rtdm_lockctx_t context;
++
++	if (addrlen < sizeof(struct sockaddr_ll))
++		return -EINVAL;
++
++	sll = rtnet_get_arg(fd, &_sll, addr, sizeof(_sll));
++	if (IS_ERR(sll))
++		return PTR_ERR(sll);
++
++	if (sll->sll_family != AF_PACKET)
++		return -EINVAL;
++
++	new_type =
++		(sll->sll_protocol != 0) ? sll->sll_protocol : sock->protocol;
++
++	rtdm_lock_get_irqsave(&sock->param_lock, context);
++
++	/* release existing binding */
++	if (pt->type != 0)
++		rtdev_remove_pack(pt);
++
++	pt->type = new_type;
++	sock->prot.packet.ifindex = sll->sll_ifindex;
++
++	/* if protocol is non-zero, register the packet type */
++	if (new_type != 0) {
++		pt->handler = rt_packet_rcv;
++		pt->err_handler = NULL;
++		pt->trylock = rt_packet_trylock;
++		pt->unlock = rt_packet_unlock;
++
++		ret = rtdev_add_pack(pt);
++	} else
++		ret = 0;
++
++	rtdm_lock_put_irqrestore(&sock->param_lock, context);
++
++	return ret;
++}
++
++/***
++ *  rt_packet_getsockname
++ */
++static int rt_packet_getsockname(struct rtdm_fd *fd, struct rtsocket *sock,
++				 struct sockaddr *addr, socklen_t *addrlen)
++{
++	struct sockaddr_ll _sll, *sll;
++	struct rtnet_device *rtdev;
++	rtdm_lockctx_t context;
++	socklen_t _namelen, *namelen;
++	int ret;
++
++	namelen = rtnet_get_arg(fd, &_namelen, addrlen, sizeof(_namelen));
++	if (IS_ERR(namelen))
++		return PTR_ERR(namelen);
++
++	if (*namelen < sizeof(struct sockaddr_ll))
++		return -EINVAL;
++
++	sll = rtnet_get_arg(fd, &_sll, addr, sizeof(_sll));
++	if (IS_ERR(sll))
++		return PTR_ERR(sll);
++
++	rtdm_lock_get_irqsave(&sock->param_lock, context);
++
++	sll->sll_family = AF_PACKET;
++	sll->sll_ifindex = sock->prot.packet.ifindex;
++	sll->sll_protocol = sock->protocol;
++
++	rtdm_lock_put_irqrestore(&sock->param_lock, context);
++
++	rtdev = rtdev_get_by_index(sll->sll_ifindex);
++	if (rtdev != NULL) {
++		sll->sll_hatype = rtdev->type;
++		sll->sll_halen = rtdev->addr_len;
++		memcpy(sll->sll_addr, rtdev->dev_addr, rtdev->addr_len);
++		rtdev_dereference(rtdev);
++	} else {
++		sll->sll_hatype = 0;
++		sll->sll_halen = 0;
++	}
++
++	*namelen = sizeof(struct sockaddr_ll);
++
++	ret = rtnet_put_arg(fd, addr, sll, sizeof(*sll));
++	if (ret)
++		return ret;
++
++	return rtnet_put_arg(fd, addrlen, namelen, sizeof(*namelen));
++}
++
++/***
++ * rt_packet_socket - initialize a packet socket
++ */
++static int rt_packet_socket(struct rtdm_fd *fd, int protocol)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	int ret;
++
++	if ((ret = rt_socket_init(fd, protocol)) != 0)
++		return ret;
++
++	sock->prot.packet.packet_type.type = protocol;
++	sock->prot.packet.ifindex = 0;
++	sock->prot.packet.packet_type.trylock = rt_packet_trylock;
++	sock->prot.packet.packet_type.unlock = rt_packet_unlock;
++
++	/* if protocol is non-zero, register the packet type */
++	if (protocol != 0) {
++		sock->prot.packet.packet_type.handler = rt_packet_rcv;
++		sock->prot.packet.packet_type.err_handler = NULL;
++
++		if ((ret = rtdev_add_pack(&sock->prot.packet.packet_type)) <
++		    0) {
++			rt_socket_cleanup(fd);
++			return ret;
++		}
++	}
++
++	return 0;
++}
++
++/***
++ *  rt_packet_close
++ */
++static void rt_packet_close(struct rtdm_fd *fd)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	struct rtpacket_type *pt = &sock->prot.packet.packet_type;
++	struct rtskb *del;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&sock->param_lock, context);
++
++	if (pt->type != 0) {
++		rtdev_remove_pack(pt);
++		pt->type = 0;
++	}
++
++	rtdm_lock_put_irqrestore(&sock->param_lock, context);
++
++	/* free packets in incoming queue */
++	while ((del = rtskb_dequeue(&sock->incoming)) != NULL) {
++		kfree_rtskb(del);
++	}
++
++	rt_socket_cleanup(fd);
++}
++
++/***
++ *  rt_packet_ioctl
++ */
++static int rt_packet_ioctl(struct rtdm_fd *fd, unsigned int request,
++			   void __user *arg)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	const struct _rtdm_setsockaddr_args *setaddr;
++	struct _rtdm_setsockaddr_args _setaddr;
++	const struct _rtdm_getsockaddr_args *getaddr;
++	struct _rtdm_getsockaddr_args _getaddr;
++
++	/* fast path for common socket IOCTLs */
++	if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK)
++		return rt_socket_common_ioctl(fd, request, arg);
++
++	switch (request) {
++	case _RTIOC_BIND:
++		setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
++		if (IS_ERR(setaddr))
++			return PTR_ERR(setaddr);
++		return rt_packet_bind(fd, sock, setaddr->addr,
++				      setaddr->addrlen);
++
++	case _RTIOC_GETSOCKNAME:
++		getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr));
++		if (IS_ERR(getaddr))
++			return PTR_ERR(getaddr);
++		return rt_packet_getsockname(fd, sock, getaddr->addr,
++					     getaddr->addrlen);
++
++	default:
++		return rt_socket_if_ioctl(fd, request, arg);
++	}
++}
++
++/***
++ *  rt_packet_recvmsg
++ */
++static ssize_t rt_packet_recvmsg(struct rtdm_fd *fd, struct user_msghdr *u_msg,
++				 int msg_flags)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	ssize_t len;
++	size_t copy_len;
++	struct rtskb *rtskb;
++	struct sockaddr_ll sll;
++	int ret, flags;
++	nanosecs_rel_t timeout = sock->timeout;
++	struct user_msghdr _msg, *msg;
++	socklen_t namelen;
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++
++	msg = rtnet_get_arg(fd, &_msg, u_msg, sizeof(_msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	if (msg->msg_iovlen < 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen == 0)
++		return 0;
++
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	/* non-blocking receive? */
++	if (msg_flags & MSG_DONTWAIT)
++		timeout = -1;
++
++	ret = rtdm_sem_timeddown(&sock->pending_sem, timeout, NULL);
++	if (unlikely(ret < 0))
++		switch (ret) {
++		default:
++			ret = -EBADF; /* socket has been closed */
++		case -EWOULDBLOCK:
++		case -ETIMEDOUT:
++		case -EINTR:
++			rtdm_drop_iovec(iov, iov_fast);
++			return ret;
++		}
++
++	rtskb = rtskb_dequeue_chain(&sock->incoming);
++	RTNET_ASSERT(rtskb != NULL, return -EFAULT;);
++
++	/* copy the address if required. */
++	if (msg->msg_name) {
++		struct rtnet_device *rtdev = rtskb->rtdev;
++		memset(&sll, 0, sizeof(sll));
++		sll.sll_family = AF_PACKET;
++		sll.sll_hatype = rtdev->type;
++		sll.sll_protocol = rtskb->protocol;
++		sll.sll_pkttype = rtskb->pkt_type;
++		sll.sll_ifindex = rtdev->ifindex;
++
++		/* Ethernet specific - we rather need some parse handler here */
++		memcpy(sll.sll_addr, rtskb->mac.ethernet->h_source, ETH_ALEN);
++		sll.sll_halen = ETH_ALEN;
++		ret = rtnet_put_arg(fd, msg->msg_name, &sll, sizeof(sll));
++		if (ret)
++			goto fail;
++
++		namelen = sizeof(sll);
++		ret = rtnet_put_arg(fd, &u_msg->msg_namelen, &namelen,
++				    sizeof(namelen));
++		if (ret)
++			goto fail;
++	}
++
++	/* Include the header in raw delivery */
++	if (rtdm_fd_to_context(fd)->device->driver->socket_type != SOCK_DGRAM)
++		rtskb_push(rtskb, rtskb->data - rtskb->mac.raw);
++
++	/* The data must not be longer than the available buffer size */
++	copy_len = rtskb->len;
++	len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen);
++	if (len < 0) {
++		copy_len = len;
++		goto out;
++	}
++
++	if (copy_len > len) {
++		copy_len = len;
++		flags = msg->msg_flags | MSG_TRUNC;
++		ret = rtnet_put_arg(fd, &u_msg->msg_flags, &flags,
++				    sizeof(flags));
++		if (ret)
++			goto fail;
++	}
++
++	copy_len = rtnet_write_to_iov(fd, iov, msg->msg_iovlen, rtskb->data,
++				      copy_len);
++out:
++	if ((msg_flags & MSG_PEEK) == 0) {
++		kfree_rtskb(rtskb);
++	} else {
++		rtskb_queue_head(&sock->incoming, rtskb);
++		rtdm_sem_up(&sock->pending_sem);
++	}
++
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return copy_len;
++fail:
++	copy_len = ret;
++	goto out;
++}
++
++/***
++ *  rt_packet_sendmsg
++ */
++static ssize_t rt_packet_sendmsg(struct rtdm_fd *fd,
++				 const struct user_msghdr *msg, int msg_flags)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	size_t len;
++	struct sockaddr_ll _sll, *sll;
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned short proto;
++	unsigned char *addr;
++	int ifindex;
++	ssize_t ret;
++	struct user_msghdr _msg;
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++
++	if (msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
++		return -EOPNOTSUPP;
++	if (msg_flags & ~MSG_DONTWAIT)
++		return -EINVAL;
++
++	msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	if (msg->msg_iovlen < 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen == 0)
++		return 0;
++
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	if (msg->msg_name == NULL) {
++		/* Note: We do not care about races with rt_packet_bind here -
++	   the user has to do so. */
++		ifindex = sock->prot.packet.ifindex;
++		proto = sock->prot.packet.packet_type.type;
++		addr = NULL;
++		sll = NULL;
++	} else {
++		sll = rtnet_get_arg(fd, &_sll, msg->msg_name, sizeof(_sll));
++		if (IS_ERR(sll)) {
++			ret = PTR_ERR(sll);
++			goto abort;
++		}
++
++		if ((msg->msg_namelen < sizeof(struct sockaddr_ll)) ||
++		    (msg->msg_namelen <
++		     (sll->sll_halen +
++		      offsetof(struct sockaddr_ll, sll_addr))) ||
++		    ((sll->sll_family != AF_PACKET) &&
++		     (sll->sll_family != AF_UNSPEC))) {
++			ret = -EINVAL;
++			goto abort;
++		}
++
++		ifindex = sll->sll_ifindex;
++		proto = sll->sll_protocol;
++		addr = sll->sll_addr;
++	}
++
++	if ((rtdev = rtdev_get_by_index(ifindex)) == NULL) {
++		ret = -ENODEV;
++		goto abort;
++	}
++
++	len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen);
++	rtskb = alloc_rtskb(rtdev->hard_header_len + len, &sock->skb_pool);
++	if (rtskb == NULL) {
++		ret = -ENOBUFS;
++		goto out;
++	}
++
++	/* If an RTmac discipline is active, this becomes a pure sanity check to
++       avoid writing beyond rtskb boundaries. The hard check is then performed
++       upon rtdev_xmit() by the discipline's xmit handler. */
++	if (len >
++	    rtdev->mtu +
++		    ((rtdm_fd_to_context(fd)->device->driver->socket_type ==
++		      SOCK_RAW) ?
++			     rtdev->hard_header_len :
++			     0)) {
++		ret = -EMSGSIZE;
++		goto err;
++	}
++
++	if ((sll != NULL) && (sll->sll_halen != rtdev->addr_len)) {
++		ret = -EINVAL;
++		goto err;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	rtskb->rtdev = rtdev;
++	rtskb->priority = sock->priority;
++
++	if (rtdev->hard_header) {
++		int hdr_len;
++
++		ret = -EINVAL;
++		hdr_len = rtdev->hard_header(rtskb, rtdev, ntohs(proto), addr,
++					     NULL, len);
++		if (rtdm_fd_to_context(fd)->device->driver->socket_type !=
++		    SOCK_DGRAM) {
++			rtskb->tail = rtskb->data;
++			rtskb->len = 0;
++		} else if (hdr_len < 0)
++			goto err;
++	}
++
++	ret = rtnet_read_from_iov(fd, iov, msg->msg_iovlen,
++				  rtskb_put(rtskb, len), len);
++
++	if ((rtdev->flags & IFF_UP) != 0) {
++		if ((ret = rtdev_xmit(rtskb)) == 0)
++			ret = len;
++	} else {
++		ret = -ENETDOWN;
++		goto err;
++	}
++
++out:
++	rtdev_dereference(rtdev);
++abort:
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return ret;
++err:
++	kfree_rtskb(rtskb);
++	goto out;
++}
++
++static struct rtdm_driver packet_proto_drv = {
++    .profile_info =     RTDM_PROFILE_INFO(packet,
++					RTDM_CLASS_NETWORK,
++					RTDM_SUBCLASS_RTNET,
++					RTNET_RTDM_VER),
++    .device_flags =     RTDM_PROTOCOL_DEVICE,
++    .device_count =     1,
++    .context_size =     sizeof(struct rtsocket),
++
++    .protocol_family =  PF_PACKET,
++    .socket_type =      SOCK_DGRAM,
++
++
++    .ops = {
++	.socket =       rt_packet_socket,
++	.close =        rt_packet_close,
++	.ioctl_rt =     rt_packet_ioctl,
++	.ioctl_nrt =    rt_packet_ioctl,
++	.recvmsg_rt =   rt_packet_recvmsg,
++	.sendmsg_rt =   rt_packet_sendmsg,
++	.select =       rt_socket_select_bind,
++    },
++};
++
++static struct rtdm_device packet_proto_dev = {
++	.driver = &packet_proto_drv,
++	.label = "packet",
++};
++
++static struct rtdm_driver raw_packet_proto_drv = {
++    .profile_info =     RTDM_PROFILE_INFO(raw_packet,
++					RTDM_CLASS_NETWORK,
++					RTDM_SUBCLASS_RTNET,
++					RTNET_RTDM_VER),
++    .device_flags =     RTDM_PROTOCOL_DEVICE,
++    .device_count =     1,
++    .context_size =     sizeof(struct rtsocket),
++
++    .protocol_family =  PF_PACKET,
++    .socket_type =      SOCK_RAW,
++
++    .ops = {
++	.socket =       rt_packet_socket,
++	.close =        rt_packet_close,
++	.ioctl_rt =     rt_packet_ioctl,
++	.ioctl_nrt =    rt_packet_ioctl,
++	.recvmsg_rt =   rt_packet_recvmsg,
++	.sendmsg_rt =   rt_packet_sendmsg,
++	.select =       rt_socket_select_bind,
++    },
++};
++
++static struct rtdm_device raw_packet_proto_dev = {
++	.driver = &raw_packet_proto_drv,
++	.label = "raw_packet",
++};
++
++static int __init rt_packet_proto_init(void)
++{
++	int err;
++
++	err = rtdm_dev_register(&packet_proto_dev);
++	if (err)
++		return err;
++
++	err = rtdm_dev_register(&raw_packet_proto_dev);
++	if (err)
++		rtdm_dev_unregister(&packet_proto_dev);
++
++	return err;
++}
++
++static void rt_packet_proto_release(void)
++{
++	rtdm_dev_unregister(&packet_proto_dev);
++	rtdm_dev_unregister(&raw_packet_proto_dev);
++}
++
++module_init(rt_packet_proto_init);
++module_exit(rt_packet_proto_release);
++
++/**********************************************************
++ * Utilities                                              *
++ **********************************************************/
++
++static int hex2int(unsigned char hex_char)
++{
++	if ((hex_char >= '0') && (hex_char <= '9'))
++		return hex_char - '0';
++	else if ((hex_char >= 'a') && (hex_char <= 'f'))
++		return hex_char - 'a' + 10;
++	else if ((hex_char >= 'A') && (hex_char <= 'F'))
++		return hex_char - 'A' + 10;
++	else
++		return -EINVAL;
++}
++
++int rt_eth_aton(unsigned char *addr_buf, const char *mac)
++{
++	int i = 0;
++	int nibble;
++
++	while (1) {
++		if (*mac == 0)
++			return -EINVAL;
++
++		if ((nibble = hex2int(*mac++)) < 0)
++			return nibble;
++		*addr_buf = nibble << 4;
++
++		if (*mac == 0)
++			return -EINVAL;
++
++		if ((nibble = hex2int(*mac++)) < 0)
++			return nibble;
++		*addr_buf++ |= nibble;
++
++		if (++i == 6)
++			break;
++
++		if ((*mac == 0) || (*mac++ != ':'))
++			return -EINVAL;
++	}
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(rt_eth_aton);
+--- linux/drivers/xenomai/net/stack/packet/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/packet/Makefile	2021-04-07 16:01:27.106634339 +0800
+@@ -0,0 +1,5 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTPACKET) += rtpacket.o
++
++rtpacket-y := af_packet.o
+--- linux/drivers/xenomai/net/stack/packet/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/packet/Kconfig	2021-04-07 16:01:27.101634346 +0800
+@@ -0,0 +1,14 @@
++config XENO_DRIVERS_NET_RTPACKET
++    depends on XENO_DRIVERS_NET
++    tristate "Real-Time Packet Socket Support"
++    default y
++    ---help---
++    Enables real-time packet sockets for RTnet. This support is
++    implemented in a separate module. When loaded, application programs
++    can send and received so-called "cooked" packets directly at OSI layer
++    2 (device layer). This means that RTnet will still maintain the
++    device-dependent packet header but leave the full data segment to the
++    user.
++
++    Examples like raw-ethernet or netshm make use of this support. See
++    also Linux man page packet(7).
+--- linux/drivers/xenomai/net/stack/eth.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/eth.c	2021-04-07 16:01:27.096634353 +0800
+@@ -0,0 +1,131 @@
++/***
++ *
++ *  stack/eth.c - Ethernet-specific functions
++ *
++ *  Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++
++#include <rtdev.h>
++#include <rtnet_internal.h>
++
++/*
++ *  Create the Ethernet MAC header for an arbitrary protocol layer
++ *
++ *  saddr=NULL  means use device source address
++ *  daddr=NULL  means leave destination address (eg unresolved arp)
++ */
++int rt_eth_header(struct rtskb *skb, struct rtnet_device *rtdev,
++		  unsigned short type, void *daddr, void *saddr, unsigned len)
++{
++	struct ethhdr *eth = (struct ethhdr *)rtskb_push(skb, ETH_HLEN);
++
++	/*
++     *  Set rtskb mac field
++     */
++
++	skb->mac.ethernet = eth;
++
++	/*
++     *  Set the protocol type. For a packet of type ETH_P_802_3 we put the length
++     *  in here instead. It is up to the 802.2 layer to carry protocol information.
++     */
++
++	if (type != ETH_P_802_3)
++		eth->h_proto = htons(type);
++	else
++		eth->h_proto = htons(len);
++
++	/*
++     *  Set the source hardware address.
++     */
++
++	if (saddr)
++		memcpy(eth->h_source, saddr, rtdev->addr_len);
++	else
++		memcpy(eth->h_source, rtdev->dev_addr, rtdev->addr_len);
++
++	if (rtdev->flags & (IFF_LOOPBACK | IFF_NOARP)) {
++		memset(eth->h_dest, 0, rtdev->addr_len);
++		return rtdev->hard_header_len;
++	}
++
++	if (daddr) {
++		memcpy(eth->h_dest, daddr, rtdev->addr_len);
++		return rtdev->hard_header_len;
++	}
++
++	return -rtdev->hard_header_len;
++}
++
++unsigned short rt_eth_type_trans(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	struct ethhdr *eth;
++	unsigned char *rawp;
++
++	rtcap_mark_incoming(skb);
++
++	skb->mac.raw = skb->data;
++	rtskb_pull(skb, rtdev->hard_header_len);
++	eth = skb->mac.ethernet;
++
++	if (*eth->h_dest & 1) {
++		if (memcmp(eth->h_dest, rtdev->broadcast, ETH_ALEN) == 0)
++			skb->pkt_type = PACKET_BROADCAST;
++		else
++			skb->pkt_type = PACKET_MULTICAST;
++	}
++
++	/*
++     *  This ALLMULTI check should be redundant by 1.4
++     *  so don't forget to remove it.
++     *
++     *  Seems, you forgot to remove it. All silly devices
++     *  seems to set IFF_PROMISC.
++     */
++
++	else if (1 /*rtdev->flags&IFF_PROMISC*/) {
++		if (memcmp(eth->h_dest, rtdev->dev_addr, ETH_ALEN))
++			skb->pkt_type = PACKET_OTHERHOST;
++	}
++
++	if (ntohs(eth->h_proto) >= 1536)
++		return eth->h_proto;
++
++	rawp = skb->data;
++
++	/*
++     *  This is a magic hack to spot IPX packets. Older Novell breaks
++     *  the protocol design and runs IPX over 802.3 without an 802.2 LLC
++     *  layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
++     *  won't work for fault tolerant netware but does for the rest.
++     */
++	if (*(unsigned short *)rawp == 0xFFFF)
++		return htons(ETH_P_802_3);
++
++	/*
++     *  Real 802.2 LLC
++     */
++	return htons(ETH_P_802_2);
++}
++
++EXPORT_SYMBOL_GPL(rt_eth_header);
++EXPORT_SYMBOL_GPL(rt_eth_type_trans);
+--- linux/drivers/xenomai/net/stack/ipv4/protocol.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/protocol.c	2021-04-07 16:01:27.092634359 +0800
+@@ -0,0 +1,88 @@
++/***
++ *
++ *  ipv4/protocol.c
++ *
++ *  rtnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/socket.h>
++#include <linux/in.h>
++
++#include <rtnet_socket.h>
++#include <ipv4/protocol.h>
++
++struct rtinet_protocol *rt_inet_protocols[MAX_RT_INET_PROTOCOLS];
++
++/***
++ * rt_inet_add_protocol
++ */
++void rt_inet_add_protocol(struct rtinet_protocol *prot)
++{
++	unsigned char hash = rt_inet_hashkey(prot->protocol);
++
++	if (rt_inet_protocols[hash] == NULL)
++		rt_inet_protocols[hash] = prot;
++}
++EXPORT_SYMBOL_GPL(rt_inet_add_protocol);
++
++/***
++ * rt_inet_del_protocol
++ */
++void rt_inet_del_protocol(struct rtinet_protocol *prot)
++{
++	unsigned char hash = rt_inet_hashkey(prot->protocol);
++
++	if (prot == rt_inet_protocols[hash])
++		rt_inet_protocols[hash] = NULL;
++}
++EXPORT_SYMBOL_GPL(rt_inet_del_protocol);
++
++/***
++ * rt_inet_socket - initialize an Internet socket
++ * @sock: socket structure
++ * @protocol: protocol id
++ */
++int rt_inet_socket(struct rtdm_fd *fd, int protocol)
++{
++	struct rtinet_protocol *prot;
++
++	if (protocol == 0)
++		switch (rtdm_fd_to_context(fd)->device->driver->socket_type) {
++		case SOCK_DGRAM:
++			protocol = IPPROTO_UDP;
++			break;
++		case SOCK_STREAM:
++			protocol = IPPROTO_TCP;
++			break;
++		}
++
++	prot = rt_inet_protocols[rt_inet_hashkey(protocol)];
++
++	/* create the socket (call the socket creator) */
++	if ((prot != NULL) && (prot->protocol == protocol))
++		return prot->init_socket(fd);
++	else {
++		rtdm_printk("RTnet: protocol with id %d not found\n", protocol);
++
++		return -ENOPROTOOPT;
++	}
++}
++EXPORT_SYMBOL_GPL(rt_inet_socket);
+--- linux/drivers/xenomai/net/stack/ipv4/route.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/route.c	2021-04-07 16:01:27.087634366 +0800
+@@ -0,0 +1,1057 @@
++/***
++ *
++ *  ipv4/route.c - real-time routing
++ *
++ *  Copyright (C) 2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  Rewritten version of the original route by David Schleef and Ulrich Marx
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <net/ip.h>
++
++#include <rtnet_internal.h>
++#include <rtnet_port.h>
++#include <rtnet_chrdev.h>
++#include <ipv4/af_inet.h>
++#include <ipv4/route.h>
++
++/* FIXME: should also become some tunable parameter */
++#define ROUTER_FORWARD_PRIO                                                    \
++	RTSKB_PRIO_VALUE(QUEUE_MAX_PRIO +                                      \
++				 (QUEUE_MIN_PRIO - QUEUE_MAX_PRIO + 1) / 2,    \
++			 RTSKB_DEF_RT_CHANNEL)
++
++/* First-level routing: explicite host routes */
++struct host_route {
++	struct host_route *next;
++	struct dest_route dest_host;
++};
++
++/* Second-level routing: routes to other networks */
++struct net_route {
++	struct net_route *next;
++	u32 dest_net_ip;
++	u32 dest_net_mask;
++	u32 gw_ip;
++};
++
++#if (CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES &                              \
++     (CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES - 1))
++#error CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES must be power of 2
++#endif
++#if CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES < 256
++#define HOST_HASH_TBL_SIZE 64
++#else
++#define HOST_HASH_TBL_SIZE                                                     \
++	((CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES / 256) * 64)
++#endif
++#define HOST_HASH_KEY_MASK (HOST_HASH_TBL_SIZE - 1)
++
++static struct host_route host_routes[CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES];
++static struct host_route *free_host_route;
++static int allocated_host_routes;
++static struct host_route *host_hash_tbl[HOST_HASH_TBL_SIZE];
++static DEFINE_RTDM_LOCK(host_table_lock);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++#if (CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES &                               \
++     (CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES - 1))
++#error CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES must be power of 2
++#endif
++#if CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES < 256
++#define NET_HASH_TBL_SIZE 64
++#else
++#define NET_HASH_TBL_SIZE                                                      \
++	((CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES / 256) * 64)
++#endif
++#define NET_HASH_KEY_MASK (NET_HASH_TBL_SIZE - 1)
++#define NET_HASH_KEY_SHIFT 8
++
++static struct net_route net_routes[CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES];
++static struct net_route *free_net_route;
++static int allocated_net_routes;
++static struct net_route *net_hash_tbl[NET_HASH_TBL_SIZE + 1];
++static unsigned int net_hash_key_shift = NET_HASH_KEY_SHIFT;
++static DEFINE_RTDM_LOCK(net_table_lock);
++
++module_param(net_hash_key_shift, uint, 0444);
++MODULE_PARM_DESC(net_hash_key_shift, "destination right shift for "
++				     "network hash key (default: 8)");
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++/***
++ *  proc filesystem section
++ */
++#ifdef CONFIG_XENO_OPT_VFILE
++static int rtnet_ipv4_route_show(struct xnvfile_regular_iterator *it, void *d)
++{
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	u32 mask;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++	xnvfile_printf(it,
++		       "Host routes allocated/total:\t%d/%d\n"
++		       "Host hash table size:\t\t%d\n",
++		       allocated_host_routes,
++		       CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES,
++		       HOST_HASH_TBL_SIZE);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	mask = NET_HASH_KEY_MASK << net_hash_key_shift;
++	xnvfile_printf(it,
++		       "Network routes allocated/total:\t%d/%d\n"
++		       "Network hash table size:\t%d\n"
++		       "Network hash key shift/mask:\t%d/%08X\n",
++		       allocated_net_routes,
++		       CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES,
++		       NET_HASH_TBL_SIZE, net_hash_key_shift, mask);
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER
++	xnvfile_printf(it, "IP Router:\t\t\tyes\n");
++#else
++	xnvfile_printf(it, "IP Router:\t\t\tno\n");
++#endif
++
++	return 0;
++}
++
++static int rtnet_ipv4_module_lock(struct xnvfile *vfile)
++{
++	bool res = try_module_get(THIS_MODULE);
++	if (!res)
++		return -EIDRM;
++
++	return 0;
++}
++
++static void rtnet_ipv4_module_unlock(struct xnvfile *vfile)
++{
++	module_put(THIS_MODULE);
++}
++
++static struct xnvfile_lock_ops rtnet_ipv4_module_lock_ops = {
++	.get = rtnet_ipv4_module_lock,
++	.put = rtnet_ipv4_module_unlock,
++};
++
++static struct xnvfile_regular_ops rtnet_ipv4_route_vfile_ops = {
++	.show = rtnet_ipv4_route_show,
++};
++
++static struct xnvfile_regular rtnet_ipv4_route_vfile = {
++    .entry = {
++	.lockops = &rtnet_ipv4_module_lock_ops,
++    },
++    .ops = &rtnet_ipv4_route_vfile_ops,
++};
++
++static rtdm_lockctx_t rtnet_ipv4_host_route_lock_ctx;
++
++static int rtnet_ipv4_host_route_lock(struct xnvfile *vfile)
++{
++	rtdm_lock_get_irqsave(&host_table_lock, rtnet_ipv4_host_route_lock_ctx);
++	return 0;
++}
++
++static void rtnet_ipv4_host_route_unlock(struct xnvfile *vfile)
++{
++	rtdm_lock_put_irqrestore(&host_table_lock,
++				 rtnet_ipv4_host_route_lock_ctx);
++}
++
++static struct xnvfile_lock_ops rtnet_ipv4_host_route_lock_ops = {
++	.get = rtnet_ipv4_host_route_lock,
++	.put = rtnet_ipv4_host_route_unlock,
++};
++
++struct rtnet_ipv4_host_route_priv {
++	unsigned key;
++	struct host_route *entry_ptr;
++};
++
++struct rtnet_ipv4_host_route_data {
++	int key;
++	char name[IFNAMSIZ];
++	struct dest_route dest_host;
++};
++
++struct xnvfile_rev_tag host_route_tag;
++
++static void *rtnet_ipv4_host_route_begin(struct xnvfile_snapshot_iterator *it)
++{
++	struct rtnet_ipv4_host_route_priv *priv = xnvfile_iterator_priv(it);
++	struct rtnet_ipv4_host_route_data *data;
++	unsigned routes;
++	int err;
++
++	routes = allocated_host_routes;
++	if (!routes)
++		return VFILE_SEQ_EMPTY;
++
++	data = kmalloc(sizeof(*data) * routes, GFP_KERNEL);
++	if (data == NULL)
++		return NULL;
++
++	err = rtnet_ipv4_module_lock(NULL);
++	if (err < 0) {
++		kfree(data);
++		return VFILE_SEQ_EMPTY;
++	}
++
++	priv->key = -1;
++	priv->entry_ptr = NULL;
++	return data;
++}
++
++static void rtnet_ipv4_host_route_end(struct xnvfile_snapshot_iterator *it,
++				      void *buf)
++{
++	rtnet_ipv4_module_unlock(NULL);
++	kfree(buf);
++}
++
++static int rtnet_ipv4_host_route_next(struct xnvfile_snapshot_iterator *it,
++				      void *data)
++{
++	struct rtnet_ipv4_host_route_priv *priv = xnvfile_iterator_priv(it);
++	struct rtnet_ipv4_host_route_data *p = data;
++	struct rtnet_device *rtdev;
++
++	if (priv->entry_ptr == NULL) {
++		if (++priv->key >= HOST_HASH_TBL_SIZE)
++			return 0;
++
++		priv->entry_ptr = host_hash_tbl[priv->key];
++		if (priv->entry_ptr == NULL)
++			return VFILE_SEQ_SKIP;
++	}
++
++	rtdev = priv->entry_ptr->dest_host.rtdev;
++
++	if (!rtdev_reference(rtdev))
++		return -EIDRM;
++
++	memcpy(&p->name, rtdev->name, sizeof(p->name));
++
++	rtdev_dereference(rtdev);
++
++	p->key = priv->key;
++
++	memcpy(&p->dest_host, &priv->entry_ptr->dest_host,
++	       sizeof(p->dest_host));
++
++	priv->entry_ptr = priv->entry_ptr->next;
++
++	return 1;
++}
++
++static int rtnet_ipv4_host_route_show(struct xnvfile_snapshot_iterator *it,
++				      void *data)
++{
++	struct rtnet_ipv4_host_route_data *p = data;
++
++	if (p == NULL) {
++		xnvfile_printf(it, "Hash\tDestination\tHW Address\t\tDevice\n");
++		return 0;
++	}
++
++	xnvfile_printf(it,
++		       "%02X\t%u.%u.%u.%-3u\t"
++		       "%02X:%02X:%02X:%02X:%02X:%02X\t%s\n",
++		       p->key, NIPQUAD(p->dest_host.ip),
++		       p->dest_host.dev_addr[0], p->dest_host.dev_addr[1],
++		       p->dest_host.dev_addr[2], p->dest_host.dev_addr[3],
++		       p->dest_host.dev_addr[4], p->dest_host.dev_addr[5],
++		       p->name);
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops rtnet_ipv4_host_route_vfile_ops = {
++	.begin = rtnet_ipv4_host_route_begin,
++	.end = rtnet_ipv4_host_route_end,
++	.next = rtnet_ipv4_host_route_next,
++	.show = rtnet_ipv4_host_route_show,
++};
++
++static struct xnvfile_snapshot rtnet_ipv4_host_route_vfile = {
++    .entry = {
++	.lockops = &rtnet_ipv4_host_route_lock_ops,
++    },
++    .privsz = sizeof(struct rtnet_ipv4_host_route_priv),
++    .datasz = sizeof(struct rtnet_ipv4_host_route_data),
++    .tag = &host_route_tag,
++    .ops = &rtnet_ipv4_host_route_vfile_ops,
++};
++
++static struct xnvfile_link rtnet_ipv4_arp_vfile;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++static rtdm_lockctx_t rtnet_ipv4_net_route_lock_ctx;
++
++static int rtnet_ipv4_net_route_lock(struct xnvfile *vfile)
++{
++	rtdm_lock_get_irqsave(&net_table_lock, rtnet_ipv4_net_route_lock_ctx);
++	return 0;
++}
++
++static void rtnet_ipv4_net_route_unlock(struct xnvfile *vfile)
++{
++	rtdm_lock_put_irqrestore(&net_table_lock,
++				 rtnet_ipv4_net_route_lock_ctx);
++}
++
++static struct xnvfile_lock_ops rtnet_ipv4_net_route_lock_ops = {
++	.get = rtnet_ipv4_net_route_lock,
++	.put = rtnet_ipv4_net_route_unlock,
++};
++
++struct rtnet_ipv4_net_route_priv {
++	unsigned key;
++	struct net_route *entry_ptr;
++};
++
++struct rtnet_ipv4_net_route_data {
++	int key;
++	u32 dest_net_ip;
++	u32 dest_net_mask;
++	u32 gw_ip;
++};
++
++struct xnvfile_rev_tag net_route_tag;
++
++static void *rtnet_ipv4_net_route_begin(struct xnvfile_snapshot_iterator *it)
++{
++	struct rtnet_ipv4_net_route_priv *priv = xnvfile_iterator_priv(it);
++	struct rtnet_ipv4_net_route_data *data;
++	unsigned routes;
++	int err;
++
++	routes = allocated_net_routes;
++	if (!routes)
++		return VFILE_SEQ_EMPTY;
++
++	data = kmalloc(sizeof(*data) * routes, GFP_KERNEL);
++	if (data == NULL)
++		return NULL;
++
++	err = rtnet_ipv4_module_lock(NULL);
++	if (err < 0) {
++		kfree(data);
++		return VFILE_SEQ_EMPTY;
++	}
++
++	priv->key = -1;
++	priv->entry_ptr = NULL;
++	return data;
++}
++
++static void rtnet_ipv4_net_route_end(struct xnvfile_snapshot_iterator *it,
++				     void *buf)
++{
++	rtnet_ipv4_module_unlock(NULL);
++	kfree(buf);
++}
++
++static int rtnet_ipv4_net_route_next(struct xnvfile_snapshot_iterator *it,
++				     void *data)
++{
++	struct rtnet_ipv4_net_route_priv *priv = xnvfile_iterator_priv(it);
++	struct rtnet_ipv4_net_route_data *p = data;
++
++	if (priv->entry_ptr == NULL) {
++		if (++priv->key >= NET_HASH_TBL_SIZE + 1)
++			return 0;
++
++		priv->entry_ptr = net_hash_tbl[priv->key];
++		if (priv->entry_ptr == NULL)
++			return VFILE_SEQ_SKIP;
++	}
++
++	p->key = priv->key;
++	p->dest_net_ip = priv->entry_ptr->dest_net_ip;
++	p->dest_net_mask = priv->entry_ptr->dest_net_mask;
++	p->gw_ip = priv->entry_ptr->gw_ip;
++
++	priv->entry_ptr = priv->entry_ptr->next;
++
++	return 1;
++}
++
++static int rtnet_ipv4_net_route_show(struct xnvfile_snapshot_iterator *it,
++				     void *data)
++{
++	struct rtnet_ipv4_net_route_data *p = data;
++
++	if (p == NULL) {
++		xnvfile_printf(it, "Hash\tDestination\tMask\t\t\tGateway\n");
++		return 0;
++	}
++
++	if (p->key < NET_HASH_TBL_SIZE)
++		xnvfile_printf(it,
++			       "%02X\t%u.%u.%u.%-3u\t%u.%u.%u.%-3u"
++			       "\t\t%u.%u.%u.%-3u\n",
++			       p->key, NIPQUAD(p->dest_net_ip),
++			       NIPQUAD(p->dest_net_mask), NIPQUAD(p->gw_ip));
++	else
++		xnvfile_printf(it,
++			       "*\t%u.%u.%u.%-3u\t%u.%u.%u.%-3u\t\t"
++			       "%u.%u.%u.%-3u\n",
++			       NIPQUAD(p->dest_net_ip),
++			       NIPQUAD(p->dest_net_mask), NIPQUAD(p->gw_ip));
++
++	return 0;
++}
++
++static struct xnvfile_snapshot_ops rtnet_ipv4_net_route_vfile_ops = {
++	.begin = rtnet_ipv4_net_route_begin,
++	.end = rtnet_ipv4_net_route_end,
++	.next = rtnet_ipv4_net_route_next,
++	.show = rtnet_ipv4_net_route_show,
++};
++
++static struct xnvfile_snapshot rtnet_ipv4_net_route_vfile = {
++    .entry = {
++	.lockops = &rtnet_ipv4_net_route_lock_ops,
++    },
++    .privsz = sizeof(struct rtnet_ipv4_net_route_priv),
++    .datasz = sizeof(struct rtnet_ipv4_net_route_data),
++    .tag = &net_route_tag,
++    .ops = &rtnet_ipv4_net_route_vfile_ops,
++};
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++static int __init rt_route_proc_register(void)
++{
++	int err;
++
++	err = xnvfile_init_regular("route", &rtnet_ipv4_route_vfile,
++				   &ipv4_proc_root);
++	if (err < 0)
++		goto err1;
++
++	err = xnvfile_init_snapshot("host_route", &rtnet_ipv4_host_route_vfile,
++				    &ipv4_proc_root);
++	if (err < 0)
++		goto err2;
++
++	/* create "arp" as an alias for "host_route" */
++	err = xnvfile_init_link("arp", "host_route", &rtnet_ipv4_arp_vfile,
++				&ipv4_proc_root);
++	if (err < 0)
++		goto err3;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	err = xnvfile_init_snapshot("net_route", &rtnet_ipv4_net_route_vfile,
++				    &ipv4_proc_root);
++	if (err < 0)
++		goto err4;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++	return 0;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++err4:
++	xnvfile_destroy_link(&rtnet_ipv4_arp_vfile);
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++err3:
++	xnvfile_destroy_snapshot(&rtnet_ipv4_host_route_vfile);
++
++err2:
++	xnvfile_destroy_regular(&rtnet_ipv4_route_vfile);
++
++err1:
++	printk("RTnet: unable to initialize /proc entries (route)\n");
++	return err;
++}
++
++static void rt_route_proc_unregister(void)
++{
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	xnvfile_destroy_snapshot(&rtnet_ipv4_net_route_vfile);
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++	xnvfile_destroy_link(&rtnet_ipv4_arp_vfile);
++	xnvfile_destroy_snapshot(&rtnet_ipv4_host_route_vfile);
++	xnvfile_destroy_regular(&rtnet_ipv4_route_vfile);
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++/***
++ *  rt_alloc_host_route - allocates new host route
++ */
++static inline struct host_route *rt_alloc_host_route(void)
++{
++	rtdm_lockctx_t context;
++	struct host_route *rt;
++
++	rtdm_lock_get_irqsave(&host_table_lock, context);
++
++	if ((rt = free_host_route) != NULL) {
++		free_host_route = rt->next;
++		allocated_host_routes++;
++	}
++
++	rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++	return rt;
++}
++
++/***
++ *  rt_free_host_route - releases host route
++ *
++ *  Note: must be called with host_table_lock held
++ */
++static inline void rt_free_host_route(struct host_route *rt)
++{
++	rt->next = free_host_route;
++	free_host_route = rt;
++	allocated_host_routes--;
++}
++
++/***
++ *  rt_ip_route_add_host: add or update host route
++ */
++int rt_ip_route_add_host(u32 addr, unsigned char *dev_addr,
++			 struct rtnet_device *rtdev)
++{
++	rtdm_lockctx_t context;
++	struct host_route *new_route;
++	struct host_route *rt;
++	unsigned int key;
++	int ret = 0;
++
++	rtdm_lock_get_irqsave(&rtdev->rtdev_lock, context);
++
++	if ((!test_bit(PRIV_FLAG_UP, &rtdev->priv_flags) ||
++	     test_and_set_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags))) {
++		rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context);
++		return -EBUSY;
++	}
++
++	rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context);
++
++	if ((new_route = rt_alloc_host_route()) != NULL) {
++		new_route->dest_host.ip = addr;
++		new_route->dest_host.rtdev = rtdev;
++		memcpy(new_route->dest_host.dev_addr, dev_addr,
++		       rtdev->addr_len);
++	}
++
++	key = ntohl(addr) & HOST_HASH_KEY_MASK;
++
++	rtdm_lock_get_irqsave(&host_table_lock, context);
++
++	xnvfile_touch_tag(&host_route_tag);
++
++	rt = host_hash_tbl[key];
++	while (rt != NULL) {
++		if ((rt->dest_host.ip == addr) &&
++		    (rt->dest_host.rtdev->local_ip == rtdev->local_ip)) {
++			rt->dest_host.rtdev = rtdev;
++			memcpy(rt->dest_host.dev_addr, dev_addr,
++			       rtdev->addr_len);
++
++			if (new_route)
++				rt_free_host_route(new_route);
++
++			rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++			goto out;
++		}
++
++		rt = rt->next;
++	}
++
++	if (new_route) {
++		new_route->next = host_hash_tbl[key];
++		host_hash_tbl[key] = new_route;
++
++		rtdm_lock_put_irqrestore(&host_table_lock, context);
++	} else {
++		rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++		/*ERRMSG*/ rtdm_printk(
++			"RTnet: no more host routes available\n");
++		ret = -ENOBUFS;
++	}
++
++out:
++	clear_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags);
++
++	return ret;
++}
++
++/***
++ *  rt_ip_route_del_host - deletes specified host route
++ */
++int rt_ip_route_del_host(u32 addr, struct rtnet_device *rtdev)
++{
++	rtdm_lockctx_t context;
++	struct host_route *rt;
++	struct host_route **last_ptr;
++	unsigned int key;
++
++	key = ntohl(addr) & HOST_HASH_KEY_MASK;
++	last_ptr = &host_hash_tbl[key];
++
++	rtdm_lock_get_irqsave(&host_table_lock, context);
++
++	rt = host_hash_tbl[key];
++	while (rt != NULL) {
++		if ((rt->dest_host.ip == addr) &&
++		    (!rtdev ||
++		     (rt->dest_host.rtdev->local_ip == rtdev->local_ip))) {
++			*last_ptr = rt->next;
++
++			rt_free_host_route(rt);
++
++			xnvfile_touch_tag(&host_route_tag);
++
++			rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++			return 0;
++		}
++
++		last_ptr = &rt->next;
++		rt = rt->next;
++	}
++
++	rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++	return -ENOENT;
++}
++
++/***
++ *  rt_ip_route_del_all - deletes all routes associated with a specified device
++ */
++void rt_ip_route_del_all(struct rtnet_device *rtdev)
++{
++	rtdm_lockctx_t context;
++	struct host_route *host_rt;
++	struct host_route **last_host_ptr;
++	unsigned int key;
++	u32 ip;
++
++	for (key = 0; key < HOST_HASH_TBL_SIZE; key++) {
++	host_start_over:
++		last_host_ptr = &host_hash_tbl[key];
++
++		rtdm_lock_get_irqsave(&host_table_lock, context);
++
++		host_rt = host_hash_tbl[key];
++		while (host_rt != NULL) {
++			if (host_rt->dest_host.rtdev == rtdev) {
++				*last_host_ptr = host_rt->next;
++
++				rt_free_host_route(host_rt);
++
++				rtdm_lock_put_irqrestore(&host_table_lock,
++							 context);
++
++				goto host_start_over;
++			}
++
++			last_host_ptr = &host_rt->next;
++			host_rt = host_rt->next;
++		}
++
++		rtdm_lock_put_irqrestore(&host_table_lock, context);
++	}
++
++	if ((ip = rtdev->local_ip) != 0)
++		rt_ip_route_del_host(ip, rtdev);
++}
++
++/***
++ *  rt_ip_route_get_host - check if specified host route is resolved
++ */
++int rt_ip_route_get_host(u32 addr, char *if_name, unsigned char *dev_addr,
++			 struct rtnet_device *rtdev)
++{
++	rtdm_lockctx_t context;
++	struct host_route *rt;
++	unsigned int key;
++
++	key = ntohl(addr) & HOST_HASH_KEY_MASK;
++
++	rtdm_lock_get_irqsave(&host_table_lock, context);
++
++	rt = host_hash_tbl[key];
++	while (rt != NULL) {
++		if ((rt->dest_host.ip == addr) &&
++		    (!rtdev ||
++		     rt->dest_host.rtdev->local_ip == rtdev->local_ip)) {
++			memcpy(dev_addr, rt->dest_host.dev_addr,
++			       rt->dest_host.rtdev->addr_len);
++			strncpy(if_name, rt->dest_host.rtdev->name, IFNAMSIZ);
++
++			rtdm_lock_put_irqrestore(&host_table_lock, context);
++			return 0;
++		}
++
++		rt = rt->next;
++	}
++
++	rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++	return -ENOENT;
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++/***
++ *  rt_alloc_net_route - allocates new network route
++ */
++static inline struct net_route *rt_alloc_net_route(void)
++{
++	rtdm_lockctx_t context;
++	struct net_route *rt;
++
++	rtdm_lock_get_irqsave(&net_table_lock, context);
++
++	if ((rt = free_net_route) != NULL) {
++		free_net_route = rt->next;
++		allocated_net_routes++;
++	}
++
++	rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++	return rt;
++}
++
++/***
++ *  rt_free_net_route - releases network route
++ *
++ *  Note: must be called with net_table_lock held
++ */
++static inline void rt_free_net_route(struct net_route *rt)
++{
++	rt->next = free_net_route;
++	free_net_route = rt;
++	allocated_host_routes--;
++}
++
++/***
++ *  rt_ip_route_add_net: add or update network route
++ */
++int rt_ip_route_add_net(u32 addr, u32 mask, u32 gw_addr)
++{
++	rtdm_lockctx_t context;
++	struct net_route *new_route;
++	struct net_route *rt;
++	struct net_route **last_ptr;
++	unsigned int key;
++	u32 shifted_mask;
++
++	addr &= mask;
++
++	if ((new_route = rt_alloc_net_route()) != NULL) {
++		new_route->dest_net_ip = addr;
++		new_route->dest_net_mask = mask;
++		new_route->gw_ip = gw_addr;
++	}
++
++	shifted_mask = NET_HASH_KEY_MASK << net_hash_key_shift;
++	if ((mask & shifted_mask) == shifted_mask)
++		key = (ntohl(addr) >> net_hash_key_shift) & NET_HASH_KEY_MASK;
++	else
++		key = NET_HASH_TBL_SIZE;
++	last_ptr = &net_hash_tbl[key];
++
++	rtdm_lock_get_irqsave(&net_table_lock, context);
++
++	xnvfile_touch_tag(&net_route_tag);
++
++	rt = net_hash_tbl[key];
++	while (rt != NULL) {
++		if ((rt->dest_net_ip == addr) && (rt->dest_net_mask == mask)) {
++			rt->gw_ip = gw_addr;
++
++			if (new_route)
++				rt_free_net_route(new_route);
++
++			rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++			return 0;
++		}
++
++		last_ptr = &rt->next;
++		rt = rt->next;
++	}
++
++	if (new_route) {
++		new_route->next = *last_ptr;
++		*last_ptr = new_route;
++
++		rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++		return 0;
++	} else {
++		rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++		/*ERRMSG*/ rtdm_printk(
++			"RTnet: no more network routes available\n");
++		return -ENOBUFS;
++	}
++}
++
++/***
++ *  rt_ip_route_del_net - deletes specified network route
++ */
++int rt_ip_route_del_net(u32 addr, u32 mask)
++{
++	rtdm_lockctx_t context;
++	struct net_route *rt;
++	struct net_route **last_ptr;
++	unsigned int key;
++	u32 shifted_mask;
++
++	addr &= mask;
++
++	shifted_mask = NET_HASH_KEY_MASK << net_hash_key_shift;
++	if ((mask & shifted_mask) == shifted_mask)
++		key = (ntohl(addr) >> net_hash_key_shift) & NET_HASH_KEY_MASK;
++	else
++		key = NET_HASH_TBL_SIZE;
++	last_ptr = &net_hash_tbl[key];
++
++	rtdm_lock_get_irqsave(&net_table_lock, context);
++
++	rt = net_hash_tbl[key];
++	while (rt != NULL) {
++		if ((rt->dest_net_ip == addr) && (rt->dest_net_mask == mask)) {
++			*last_ptr = rt->next;
++
++			rt_free_net_route(rt);
++
++			xnvfile_touch_tag(&net_route_tag);
++
++			rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++			return 0;
++		}
++
++		last_ptr = &rt->next;
++		rt = rt->next;
++	}
++
++	rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++	return -ENOENT;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++/***
++ *  rt_ip_route_output - looks up output route
++ *
++ *  Note: increments refcount on returned rtdev in rt_buf
++ */
++int rt_ip_route_output(struct dest_route *rt_buf, u32 daddr, u32 saddr)
++{
++	rtdm_lockctx_t context;
++	struct host_route *host_rt;
++	unsigned int key;
++
++#ifndef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++#define DADDR daddr
++#else
++#define DADDR real_daddr
++
++	struct net_route *net_rt;
++	int lookup_gw = 1;
++	u32 real_daddr = daddr;
++
++restart:
++#endif /* !CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++	key = ntohl(daddr) & HOST_HASH_KEY_MASK;
++
++	rtdm_lock_get_irqsave(&host_table_lock, context);
++
++	host_rt = host_hash_tbl[key];
++	if (likely(saddr == INADDR_ANY))
++		while (host_rt != NULL) {
++			if (host_rt->dest_host.ip == daddr) {
++			host_route_found:
++				if (!rtdev_reference(
++					    host_rt->dest_host.rtdev)) {
++					rtdm_lock_put_irqrestore(
++						&host_table_lock, context);
++					goto next;
++				}
++
++				memcpy(rt_buf->dev_addr,
++				       &host_rt->dest_host.dev_addr,
++				       sizeof(rt_buf->dev_addr));
++				rt_buf->rtdev = host_rt->dest_host.rtdev;
++
++				rtdm_lock_put_irqrestore(&host_table_lock,
++							 context);
++
++				rt_buf->ip = DADDR;
++
++				return 0;
++			}
++		next:
++			host_rt = host_rt->next;
++		}
++	else
++		while (host_rt != NULL) {
++			if ((host_rt->dest_host.ip == daddr) &&
++			    (host_rt->dest_host.rtdev->local_ip == saddr))
++				goto host_route_found;
++			host_rt = host_rt->next;
++		}
++
++	rtdm_lock_put_irqrestore(&host_table_lock, context);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	if (lookup_gw) {
++		lookup_gw = 0;
++		key = (ntohl(daddr) >> net_hash_key_shift) & NET_HASH_KEY_MASK;
++
++		rtdm_lock_get_irqsave(&net_table_lock, context);
++
++		net_rt = net_hash_tbl[key];
++		while (net_rt != NULL) {
++			if (net_rt->dest_net_ip ==
++			    (daddr & net_rt->dest_net_mask)) {
++				daddr = net_rt->gw_ip;
++
++				rtdm_lock_put_irqrestore(&net_table_lock,
++							 context);
++
++				/* start over, now using the gateway ip as destination */
++				goto restart;
++			}
++
++			net_rt = net_rt->next;
++		}
++
++		rtdm_lock_put_irqrestore(&net_table_lock, context);
++
++		/* last try: no hash key */
++		rtdm_lock_get_irqsave(&net_table_lock, context);
++
++		net_rt = net_hash_tbl[NET_HASH_TBL_SIZE];
++		while (net_rt != NULL) {
++			if (net_rt->dest_net_ip ==
++			    (daddr & net_rt->dest_net_mask)) {
++				daddr = net_rt->gw_ip;
++
++				rtdm_lock_put_irqrestore(&net_table_lock,
++							 context);
++
++				/* start over, now using the gateway ip as destination */
++				goto restart;
++			}
++
++			net_rt = net_rt->next;
++		}
++
++		rtdm_lock_put_irqrestore(&net_table_lock, context);
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++	/*ERRMSG*/ rtdm_printk("RTnet: host %u.%u.%u.%u unreachable\n",
++			       NIPQUAD(daddr));
++	return -EHOSTUNREACH;
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER
++int rt_ip_route_forward(struct rtskb *rtskb, u32 daddr)
++{
++	struct rtnet_device *rtdev = rtskb->rtdev;
++	struct dest_route dest;
++
++	if (likely((daddr == rtdev->local_ip) ||
++		   (daddr == rtdev->broadcast_ip) ||
++		   (rtdev->flags & IFF_LOOPBACK)))
++		return 0;
++
++	if (rtskb_acquire(rtskb, &global_pool) != 0) {
++		/*ERRMSG*/ rtdm_printk(
++			"RTnet: router overloaded, dropping packet\n");
++		goto error;
++	}
++
++	if (rt_ip_route_output(&dest, daddr, INADDR_ANY) < 0) {
++		/*ERRMSG*/ rtdm_printk(
++			"RTnet: unable to forward packet from %u.%u.%u.%u\n",
++			NIPQUAD(rtskb->nh.iph->saddr));
++		goto error;
++	}
++
++	rtskb->rtdev = dest.rtdev;
++	rtskb->priority = ROUTER_FORWARD_PRIO;
++
++	if ((dest.rtdev->hard_header) &&
++	    (dest.rtdev->hard_header(rtskb, dest.rtdev, ETH_P_IP, dest.dev_addr,
++				     dest.rtdev->dev_addr, rtskb->len) < 0))
++		goto error;
++
++	rtdev_xmit(rtskb);
++
++	return 1;
++
++error:
++	kfree_rtskb(rtskb);
++	return 1;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */
++
++/***
++ *  rt_ip_routing_init: initialize
++ */
++int __init rt_ip_routing_init(void)
++{
++	int i;
++
++	for (i = 0; i < CONFIG_XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES - 2; i++)
++		host_routes[i].next = &host_routes[i + 1];
++	free_host_route = &host_routes[0];
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	for (i = 0; i < CONFIG_XENO_DRIVERS_NET_RTIPV4_NET_ROUTES - 2; i++)
++		net_routes[i].next = &net_routes[i + 1];
++	free_net_route = &net_routes[0];
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	return rt_route_proc_register();
++#else /* !CONFIG_XENO_OPT_VFILE */
++	return 0;
++#endif /* CONFIG_XENO_OPT_VFILE */
++}
++
++/***
++ *  rt_ip_routing_realease
++ */
++void rt_ip_routing_release(void)
++{
++#ifdef CONFIG_XENO_OPT_VFILE
++	rt_route_proc_unregister();
++#endif /* CONFIG_XENO_OPT_VFILE */
++}
++
++EXPORT_SYMBOL_GPL(rt_ip_route_add_host);
++EXPORT_SYMBOL_GPL(rt_ip_route_del_host);
++EXPORT_SYMBOL_GPL(rt_ip_route_del_all);
++EXPORT_SYMBOL_GPL(rt_ip_route_output);
+--- linux/drivers/xenomai/net/stack/ipv4/icmp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/icmp.c	2021-04-07 16:01:27.082634374 +0800
+@@ -0,0 +1,497 @@
++/***
++ *
++ *  ipv4/icmp.c
++ *
++ *  rtnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2002       Vinay Sridhara <vinaysridhara@yahoo.com>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/icmp.h>
++#include <net/checksum.h>
++
++#include <rtskb.h>
++#include <rtnet_socket.h>
++#include <ipv4_chrdev.h>
++#include <ipv4/icmp.h>
++#include <ipv4/ip_fragment.h>
++#include <ipv4/ip_output.h>
++#include <ipv4/protocol.h>
++#include <ipv4/route.h>
++
++/***
++ * Structure for sending the icmp packets
++ */
++struct icmp_bxm {
++	unsigned int csum;
++	size_t head_len;
++	size_t data_len;
++	off_t offset;
++	struct {
++		struct icmphdr icmph;
++		nanosecs_abs_t timestamp;
++	} head;
++	union {
++		struct rtskb *skb;
++		void *buf;
++	} data;
++};
++
++struct rt_icmp_control {
++	void (*handler)(struct rtskb *skb);
++	short error; /* This ICMP is classed as an error message */
++};
++
++static DEFINE_RTDM_LOCK(echo_calls_lock);
++LIST_HEAD(echo_calls);
++
++static struct {
++	/*
++     * Scratch pad, provided so that rt_socket_dereference(&icmp_socket);
++     * remains legal.
++     */
++	struct rtdm_dev_context dummy;
++
++	/*
++     *  Socket for icmp replies
++     *  It is not part of the socket pool. It may furthermore be used
++     *  concurrently by multiple tasks because all fields are static excect
++     *  skb_pool, but that one is spinlock protected.
++     */
++	struct rtsocket socket;
++} icmp_socket_container;
++
++#define icmp_fd (&icmp_socket_container.dummy.fd)
++#define icmp_socket ((struct rtsocket *)rtdm_fd_to_private(icmp_fd))
++
++void rt_icmp_queue_echo_request(struct rt_proc_call *call)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&echo_calls_lock, context);
++	list_add_tail(&call->list_entry, &echo_calls);
++	rtdm_lock_put_irqrestore(&echo_calls_lock, context);
++}
++
++void rt_icmp_dequeue_echo_request(struct rt_proc_call *call)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&echo_calls_lock, context);
++	list_del(&call->list_entry);
++	rtdm_lock_put_irqrestore(&echo_calls_lock, context);
++}
++
++void rt_icmp_cleanup_echo_requests(void)
++{
++	rtdm_lockctx_t context;
++	struct list_head *entry;
++	struct list_head *next;
++
++	rtdm_lock_get_irqsave(&echo_calls_lock, context);
++	entry = echo_calls.next;
++	INIT_LIST_HEAD(&echo_calls);
++	rtdm_lock_put_irqrestore(&echo_calls_lock, context);
++
++	while (entry != &echo_calls) {
++		next = entry->next;
++		rtpc_complete_call_nrt((struct rt_proc_call *)entry, -EINTR);
++		entry = next;
++	}
++
++	/* purge any pending ICMP fragments */
++	rt_ip_frag_invalidate_socket(icmp_socket);
++}
++
++/***
++ *  rt_icmp_discard - dummy function
++ */
++static void rt_icmp_discard(struct rtskb *skb)
++{
++}
++
++static int rt_icmp_glue_reply_bits(const void *p, unsigned char *to,
++				   unsigned int offset, unsigned int fraglen)
++{
++	struct icmp_bxm *icmp_param = (struct icmp_bxm *)p;
++	struct icmphdr *icmph;
++	unsigned long csum;
++
++	/* TODO: add support for fragmented ICMP packets */
++	if (offset != 0)
++		return -EMSGSIZE;
++
++	csum = csum_partial_copy_nocheck((void *)&icmp_param->head, to,
++					 icmp_param->head_len,
++					 icmp_param->csum);
++
++	csum = rtskb_copy_and_csum_bits(icmp_param->data.skb,
++					icmp_param->offset,
++					to + icmp_param->head_len,
++					fraglen - icmp_param->head_len, csum);
++
++	icmph = (struct icmphdr *)to;
++
++	icmph->checksum = csum_fold(csum);
++
++	return 0;
++}
++
++/***
++ *  common reply function
++ */
++static void rt_icmp_send_reply(struct icmp_bxm *icmp_param, struct rtskb *skb)
++{
++	struct dest_route rt;
++	int err;
++
++	icmp_param->head.icmph.checksum = 0;
++	icmp_param->csum = 0;
++
++	/* route back to the source address via the incoming device */
++	if (rt_ip_route_output(&rt, skb->nh.iph->saddr, skb->rtdev->local_ip) !=
++	    0)
++		return;
++
++	rt_socket_reference(icmp_socket);
++	err = rt_ip_build_xmit(icmp_socket, rt_icmp_glue_reply_bits, icmp_param,
++			       sizeof(struct icmphdr) + icmp_param->data_len,
++			       &rt, MSG_DONTWAIT);
++	if (err)
++		rt_socket_dereference(icmp_socket);
++
++	rtdev_dereference(rt.rtdev);
++
++	RTNET_ASSERT(err == 0,
++		     rtdm_printk("RTnet: %s() error in xmit\n", __FUNCTION__););
++	(void)err;
++}
++
++/***
++ *  rt_icmp_echo - handles echo replies on our previously sent requests
++ */
++static void rt_icmp_echo_reply(struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++	struct rt_proc_call *call;
++	struct ipv4_cmd *cmd;
++
++	rtdm_lock_get_irqsave(&echo_calls_lock, context);
++
++	if (!list_empty(&echo_calls)) {
++		call = (struct rt_proc_call *)echo_calls.next;
++		list_del(&call->list_entry);
++
++		rtdm_lock_put_irqrestore(&echo_calls_lock, context);
++	} else {
++		rtdm_lock_put_irqrestore(&echo_calls_lock, context);
++		return;
++	}
++
++	cmd = rtpc_get_priv(call, struct ipv4_cmd);
++
++	cmd->args.ping.ip_addr = skb->nh.iph->saddr;
++	cmd->args.ping.rtt = 0;
++
++	if ((skb->h.icmph->un.echo.id == cmd->args.ping.id) &&
++	    (ntohs(skb->h.icmph->un.echo.sequence) ==
++	     cmd->args.ping.sequence) &&
++	    skb->len == cmd->args.ping.msg_size) {
++		if (skb->len >= sizeof(nanosecs_abs_t))
++			cmd->args.ping.rtt = rtdm_clock_read() -
++					     *((nanosecs_abs_t *)skb->data);
++		rtpc_complete_call(call, sizeof(struct icmphdr) + skb->len);
++	} else
++		rtpc_complete_call(call, 0);
++}
++
++/***
++ *  rt_icmp_echo_request - handles echo requests sent by other stations
++ */
++static void rt_icmp_echo_request(struct rtskb *skb)
++{
++	struct icmp_bxm icmp_param;
++
++	icmp_param.head.icmph = *skb->h.icmph;
++	icmp_param.head.icmph.type = ICMP_ECHOREPLY;
++	icmp_param.data.skb = skb;
++	icmp_param.offset = 0;
++	icmp_param.data_len = skb->len;
++	icmp_param.head_len = sizeof(struct icmphdr);
++
++	rt_icmp_send_reply(&icmp_param, skb);
++
++	return;
++}
++
++static int rt_icmp_glue_request_bits(const void *p, unsigned char *to,
++				     unsigned int offset, unsigned int fraglen)
++{
++	struct icmp_bxm *icmp_param = (struct icmp_bxm *)p;
++	struct icmphdr *icmph;
++	unsigned long csum;
++
++	/* TODO: add support for fragmented ICMP packets */
++	RTNET_ASSERT(
++		offset == 0,
++		rtdm_printk("RTnet: %s() does not support fragmentation.\n",
++			    __FUNCTION__);
++		return -1;);
++
++	csum = csum_partial_copy_nocheck((void *)&icmp_param->head, to,
++					 icmp_param->head_len,
++					 icmp_param->csum);
++
++	csum = csum_partial_copy_nocheck(icmp_param->data.buf,
++					 to + icmp_param->head_len,
++					 fraglen - icmp_param->head_len, csum);
++
++	icmph = (struct icmphdr *)to;
++
++	icmph->checksum = csum_fold(csum);
++
++	return 0;
++}
++
++/***
++ *  common request function
++ */
++static int rt_icmp_send_request(u32 daddr, struct icmp_bxm *icmp_param)
++{
++	struct dest_route rt;
++	unsigned int size;
++	int err;
++
++	icmp_param->head.icmph.checksum = 0;
++	icmp_param->csum = 0;
++
++	if ((err = rt_ip_route_output(&rt, daddr, INADDR_ANY)) < 0)
++		return err;
++
++	/* TODO: add support for fragmented ICMP packets */
++	size = icmp_param->head_len + icmp_param->data_len;
++	if (size + 20 /* ip header */ >
++	    rt.rtdev->get_mtu(rt.rtdev, RT_ICMP_PRIO))
++		err = -EMSGSIZE;
++	else {
++		rt_socket_reference(icmp_socket);
++		err = rt_ip_build_xmit(icmp_socket, rt_icmp_glue_request_bits,
++				       icmp_param, size, &rt, MSG_DONTWAIT);
++		if (err)
++			rt_socket_dereference(icmp_socket);
++	}
++
++	rtdev_dereference(rt.rtdev);
++
++	return err;
++}
++
++/***
++ *  rt_icmp_echo_request - sends an echo request to the specified address
++ */
++int rt_icmp_send_echo(u32 daddr, u16 id, u16 sequence, size_t msg_size)
++{
++	struct icmp_bxm icmp_param;
++	unsigned char pattern_buf[msg_size];
++	off_t pos;
++
++	/* first purge any potentially pending ICMP fragments */
++	rt_ip_frag_invalidate_socket(icmp_socket);
++
++	icmp_param.head.icmph.type = ICMP_ECHO;
++	icmp_param.head.icmph.code = 0;
++	icmp_param.head.icmph.un.echo.id = id;
++	icmp_param.head.icmph.un.echo.sequence = htons(sequence);
++	icmp_param.offset = 0;
++
++	if (msg_size >= sizeof(nanosecs_abs_t)) {
++		icmp_param.head_len =
++			sizeof(struct icmphdr) + sizeof(nanosecs_abs_t);
++		icmp_param.data_len = msg_size - sizeof(nanosecs_abs_t);
++
++		for (pos = 0; pos < icmp_param.data_len; pos++)
++			pattern_buf[pos] = pos & 0xFF;
++
++		icmp_param.head.timestamp = rtdm_clock_read();
++	} else {
++		icmp_param.head_len = sizeof(struct icmphdr) + msg_size;
++		icmp_param.data_len = 0;
++
++		for (pos = 0; pos < msg_size; pos++)
++			pattern_buf[pos] = pos & 0xFF;
++	}
++	icmp_param.data.buf = pattern_buf;
++
++	return rt_icmp_send_request(daddr, &icmp_param);
++}
++
++/***
++ *  rt_icmp_socket
++ */
++int rt_icmp_socket(struct rtdm_fd *fd)
++{
++	/* we don't support user-created ICMP sockets */
++	return -ENOPROTOOPT;
++}
++
++static struct rt_icmp_control rt_icmp_pointers[NR_ICMP_TYPES + 1] = {
++	/* ECHO REPLY (0) */
++	{ rt_icmp_echo_reply, 0 },
++	{ rt_icmp_discard, 1 },
++	{ rt_icmp_discard, 1 },
++
++	/* DEST UNREACH (3) */
++	{ rt_icmp_discard, 1 },
++
++	/* SOURCE QUENCH (4) */
++	{ rt_icmp_discard, 1 },
++
++	/* REDIRECT (5) */
++	{ rt_icmp_discard, 1 },
++	{ rt_icmp_discard, 1 },
++	{ rt_icmp_discard, 1 },
++
++	/* ECHO (8) */
++	{ rt_icmp_echo_request, 0 },
++	{ rt_icmp_discard, 1 },
++	{ rt_icmp_discard, 1 },
++
++	/* TIME EXCEEDED (11) */
++	{ rt_icmp_discard, 1 },
++
++	/* PARAMETER PROBLEM (12) */
++	{ rt_icmp_discard, 1 },
++
++	/* TIMESTAMP (13) */
++	{ rt_icmp_discard, 0 },
++
++	/* TIMESTAMP REPLY (14) */
++	{ rt_icmp_discard, 0 },
++
++	/* INFO (15) */
++	{ rt_icmp_discard, 0 },
++
++	/* INFO REPLY (16) */
++	{ rt_icmp_discard, 0 },
++
++	/* ADDR MASK (17) */
++	{ rt_icmp_discard, 0 },
++
++	/* ADDR MASK REPLY (18) */
++	{ rt_icmp_discard, 0 }
++};
++
++/***
++ *  rt_icmp_dest_pool
++ */
++struct rtsocket *rt_icmp_dest_socket(struct rtskb *skb)
++{
++	rt_socket_reference(icmp_socket);
++	return icmp_socket;
++}
++
++/***
++ *  rt_icmp_rcv
++ */
++void rt_icmp_rcv(struct rtskb *skb)
++{
++	struct icmphdr *icmpHdr = skb->h.icmph;
++	unsigned int length = skb->len;
++
++	/* check header sanity and don't accept fragmented packets */
++	if ((length < sizeof(struct icmphdr)) || (skb->next != NULL)) {
++		rtdm_printk("RTnet: improper length in icmp packet\n");
++		goto cleanup;
++	}
++
++	if (ip_compute_csum((unsigned char *)icmpHdr, length)) {
++		rtdm_printk("RTnet: invalid checksum in icmp packet %d\n",
++			    length);
++		goto cleanup;
++	}
++
++	if (!rtskb_pull(skb, sizeof(struct icmphdr))) {
++		rtdm_printk("RTnet: pull failed %p\n", (skb->sk));
++		goto cleanup;
++	}
++
++	if (icmpHdr->type > NR_ICMP_TYPES) {
++		rtdm_printk("RTnet: invalid icmp type\n");
++		goto cleanup;
++	}
++
++	/* sane packet, process it */
++	rt_icmp_pointers[icmpHdr->type].handler(skb);
++
++cleanup:
++	kfree_rtskb(skb);
++}
++
++/***
++ *  rt_icmp_rcv_err
++ */
++void rt_icmp_rcv_err(struct rtskb *skb)
++{
++	rtdm_printk("RTnet: rt_icmp_rcv err\n");
++}
++
++/***
++ *  ICMP-Initialisation
++ */
++static struct rtinet_protocol icmp_protocol = { .protocol = IPPROTO_ICMP,
++						.dest_socket =
++							&rt_icmp_dest_socket,
++						.rcv_handler = &rt_icmp_rcv,
++						.err_handler = &rt_icmp_rcv_err,
++						.init_socket =
++							&rt_icmp_socket };
++
++/***
++ *  rt_icmp_init
++ */
++void __init rt_icmp_init(void)
++{
++	int skbs;
++
++	skbs = rt_bare_socket_init(icmp_fd, IPPROTO_ICMP, RT_ICMP_PRIO,
++				   ICMP_REPLY_POOL_SIZE);
++	BUG_ON(skbs < 0);
++	if (skbs < ICMP_REPLY_POOL_SIZE)
++		printk("RTnet: allocated only %d icmp rtskbs\n", skbs);
++
++	icmp_socket->prot.inet.tos = 0;
++	icmp_fd->refs = 1;
++
++	rt_inet_add_protocol(&icmp_protocol);
++}
++
++/***
++ *  rt_icmp_release
++ */
++void rt_icmp_release(void)
++{
++	rt_icmp_cleanup_echo_requests();
++	rt_inet_del_protocol(&icmp_protocol);
++	rt_bare_socket_cleanup(icmp_socket);
++}
+--- linux/drivers/xenomai/net/stack/ipv4/af_inet.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/af_inet.c	2021-04-07 16:01:27.077634381 +0800
+@@ -0,0 +1,340 @@
++/***
++ *
++ *  ipv4/af_inet.c
++ *
++ *  rtnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/uaccess.h>
++
++#include <ipv4_chrdev.h>
++#include <rtnet_internal.h>
++#include <rtnet_rtpc.h>
++#include <ipv4/arp.h>
++#include <ipv4/icmp.h>
++#include <ipv4/ip_output.h>
++#include <ipv4/protocol.h>
++#include <ipv4/route.h>
++
++MODULE_LICENSE("GPL");
++
++struct route_solicit_params {
++	struct rtnet_device *rtdev;
++	__u32 ip_addr;
++};
++
++#ifdef CONFIG_XENO_OPT_VFILE
++struct xnvfile_directory ipv4_proc_root;
++EXPORT_SYMBOL_GPL(ipv4_proc_root);
++#endif
++
++static int route_solicit_handler(struct rt_proc_call *call)
++{
++	struct route_solicit_params *param;
++	struct rtnet_device *rtdev;
++
++	param = rtpc_get_priv(call, struct route_solicit_params);
++	rtdev = param->rtdev;
++
++	if ((rtdev->flags & IFF_UP) == 0)
++		return -ENODEV;
++
++	rt_arp_solicit(rtdev, param->ip_addr);
++
++	return 0;
++}
++
++static void cleanup_route_solicit(void *priv_data)
++{
++	struct route_solicit_params *param;
++
++	param = (struct route_solicit_params *)priv_data;
++	rtdev_dereference(param->rtdev);
++}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP
++static int ping_handler(struct rt_proc_call *call)
++{
++	struct ipv4_cmd *cmd;
++	int err;
++
++	cmd = rtpc_get_priv(call, struct ipv4_cmd);
++
++	rt_icmp_queue_echo_request(call);
++
++	err = rt_icmp_send_echo(cmd->args.ping.ip_addr, cmd->args.ping.id,
++				cmd->args.ping.sequence,
++				cmd->args.ping.msg_size);
++	if (err < 0) {
++		rt_icmp_dequeue_echo_request(call);
++		return err;
++	}
++
++	return -CALL_PENDING;
++}
++
++static void ping_complete_handler(struct rt_proc_call *call, void *priv_data)
++{
++	struct ipv4_cmd *cmd;
++	struct ipv4_cmd *usr_cmd = (struct ipv4_cmd *)priv_data;
++
++	if (rtpc_get_result(call) < 0)
++		return;
++
++	cmd = rtpc_get_priv(call, struct ipv4_cmd);
++	usr_cmd->args.ping.ip_addr = cmd->args.ping.ip_addr;
++	usr_cmd->args.ping.rtt = cmd->args.ping.rtt;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */
++
++static int ipv4_ioctl(struct rtnet_device *rtdev, unsigned int request,
++		      unsigned long arg)
++{
++	struct ipv4_cmd cmd;
++	struct route_solicit_params params;
++	int ret;
++
++	ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd));
++	if (ret != 0)
++		return -EFAULT;
++
++	switch (request) {
++	case IOC_RT_HOST_ROUTE_ADD:
++		if (mutex_lock_interruptible(&rtdev->nrt_lock))
++			return -ERESTARTSYS;
++
++		ret = rt_ip_route_add_host(cmd.args.addhost.ip_addr,
++					   cmd.args.addhost.dev_addr, rtdev);
++
++		mutex_unlock(&rtdev->nrt_lock);
++		break;
++
++	case IOC_RT_HOST_ROUTE_SOLICIT:
++		if (mutex_lock_interruptible(&rtdev->nrt_lock))
++			return -ERESTARTSYS;
++
++		if (!rtdev_reference(rtdev)) {
++			mutex_unlock(&rtdev->nrt_lock);
++			return -EIDRM;
++		}
++
++		params.rtdev = rtdev;
++		params.ip_addr = cmd.args.solicit.ip_addr;
++
++		/* We need the rtpc wrapping because rt_arp_solicit can block on a
++	     * real-time lock in the NIC's xmit routine. */
++		ret = rtpc_dispatch_call(route_solicit_handler, 0, &params,
++					 sizeof(params), NULL,
++					 cleanup_route_solicit);
++
++		mutex_unlock(&rtdev->nrt_lock);
++		break;
++
++	case IOC_RT_HOST_ROUTE_DELETE:
++	case IOC_RT_HOST_ROUTE_DELETE_DEV:
++		ret = rt_ip_route_del_host(cmd.args.delhost.ip_addr, rtdev);
++		break;
++
++	case IOC_RT_HOST_ROUTE_GET:
++	case IOC_RT_HOST_ROUTE_GET_DEV:
++		ret = rt_ip_route_get_host(cmd.args.gethost.ip_addr,
++					   cmd.head.if_name,
++					   cmd.args.gethost.dev_addr, rtdev);
++		if (ret >= 0) {
++			if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0)
++				ret = -EFAULT;
++		}
++		break;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++	case IOC_RT_NET_ROUTE_ADD:
++		ret = rt_ip_route_add_net(cmd.args.addnet.net_addr,
++					  cmd.args.addnet.net_mask,
++					  cmd.args.addnet.gw_addr);
++		break;
++
++	case IOC_RT_NET_ROUTE_DELETE:
++		ret = rt_ip_route_del_net(cmd.args.delnet.net_addr,
++					  cmd.args.delnet.net_mask);
++		break;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP
++	case IOC_RT_PING:
++		ret = rtpc_dispatch_call(ping_handler, cmd.args.ping.timeout,
++					 &cmd, sizeof(cmd),
++					 ping_complete_handler, NULL);
++		if (ret >= 0) {
++			if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0)
++				ret = -EFAULT;
++		}
++		if (ret < 0)
++			rt_icmp_cleanup_echo_requests();
++		break;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	return ret;
++}
++
++unsigned long rt_inet_aton(const char *ip)
++{
++	int p, n, c;
++	union {
++		unsigned long l;
++		char c[4];
++	} u;
++	p = n = 0;
++	while ((c = *ip++)) {
++		if (c != '.') {
++			n = n * 10 + c - '0';
++		} else {
++			if (n > 0xFF) {
++				return 0;
++			}
++			u.c[p++] = n;
++			n = 0;
++		}
++	}
++	u.c[3] = n;
++	return u.l;
++}
++
++static void rt_ip_ifup(struct rtnet_device *rtdev,
++		       struct rtnet_core_cmd *up_cmd)
++{
++	struct rtnet_device *tmp;
++	int i;
++
++	rt_ip_route_del_all(rtdev); /* cleanup routing table */
++
++	if (up_cmd->args.up.ip_addr != 0xFFFFFFFF) {
++		rtdev->local_ip = up_cmd->args.up.ip_addr;
++		rtdev->broadcast_ip = up_cmd->args.up.broadcast_ip;
++	}
++
++	if (rtdev->local_ip != 0) {
++		if (rtdev->flags & IFF_LOOPBACK) {
++			for (i = 0; i < MAX_RT_DEVICES; i++)
++				if ((tmp = rtdev_get_by_index(i)) != NULL) {
++					rt_ip_route_add_host(tmp->local_ip,
++							     rtdev->dev_addr,
++							     rtdev);
++					rtdev_dereference(tmp);
++				}
++		} else if ((tmp = rtdev_get_loopback()) != NULL) {
++			rt_ip_route_add_host(rtdev->local_ip, tmp->dev_addr,
++					     tmp);
++			rtdev_dereference(tmp);
++		}
++
++		if (rtdev->flags & IFF_BROADCAST)
++			rt_ip_route_add_host(up_cmd->args.up.broadcast_ip,
++					     rtdev->broadcast, rtdev);
++	}
++}
++
++static void rt_ip_ifdown(struct rtnet_device *rtdev)
++{
++	rt_ip_route_del_all(rtdev);
++}
++
++static struct rtdev_event_hook rtdev_hook = { .unregister_device = rt_ip_ifdown,
++					      .ifup = rt_ip_ifup,
++					      .ifdown = rt_ip_ifdown };
++
++static struct rtnet_ioctls ipv4_ioctls = { .service_name = "IPv4",
++					   .ioctl_type = RTNET_IOC_TYPE_IPV4,
++					   .handler = ipv4_ioctl };
++
++static int __init rt_ipv4_proto_init(void)
++{
++	int i;
++	int result;
++
++	/* Network-Layer */
++	rt_ip_init();
++	rt_arp_init();
++
++	/* Transport-Layer */
++	for (i = 0; i < MAX_RT_INET_PROTOCOLS; i++)
++		rt_inet_protocols[i] = NULL;
++
++	rt_icmp_init();
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	result = xnvfile_init_dir("ipv4", &ipv4_proc_root, &rtnet_proc_root);
++	if (result < 0)
++		goto err1;
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	if ((result = rt_ip_routing_init()) < 0)
++		goto err2;
++	if ((result = rtnet_register_ioctls(&ipv4_ioctls)) < 0)
++		goto err3;
++
++	rtdev_add_event_hook(&rtdev_hook);
++
++	return 0;
++
++err3:
++	rt_ip_routing_release();
++
++err2:
++#ifdef CONFIG_XENO_OPT_VFILE
++	xnvfile_destroy_dir(&ipv4_proc_root);
++err1:
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	rt_icmp_release();
++	rt_arp_release();
++	rt_ip_release();
++
++	return result;
++}
++
++static void __exit rt_ipv4_proto_release(void)
++{
++	rt_ip_release();
++
++	rtdev_del_event_hook(&rtdev_hook);
++	rtnet_unregister_ioctls(&ipv4_ioctls);
++	rt_ip_routing_release();
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	xnvfile_destroy_dir(&ipv4_proc_root);
++#endif
++
++	/* Transport-Layer */
++	rt_icmp_release();
++
++	/* Network-Layer */
++	rt_arp_release();
++}
++
++module_init(rt_ipv4_proto_init);
++module_exit(rt_ipv4_proto_release);
++
++EXPORT_SYMBOL_GPL(rt_inet_aton);
+--- linux/drivers/xenomai/net/stack/ipv4/tcp/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/Makefile	2021-04-07 16:01:27.073634386 +0800
+@@ -0,0 +1,7 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP) += rttcp.o
++
++rttcp-y := \
++	tcp.o \
++	timerwheel.o
+--- linux/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.h	2021-04-07 16:01:27.068634393 +0800
+@@ -0,0 +1,62 @@
++/***
++ *
++ *  ipv4/tcp/timerwheel.h - timerwheel interface for RTnet
++ *
++ *  Copyright (C) 2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License, version 2, as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TIMERWHEEL_H_
++#define __TIMERWHEEL_H_
++
++#include <linux/list.h>
++#include <rtdm/net.h>
++
++#define TIMERWHEEL_TIMER_UNUSED -1
++
++typedef void (*timerwheel_timer_handler)(void *);
++
++struct timerwheel_timer {
++	struct list_head link;
++	timerwheel_timer_handler handler;
++	void *data;
++	int slot;
++	volatile int refcount; /* only written by wheel task */
++};
++
++static inline void timerwheel_init_timer(struct timerwheel_timer *timer,
++					 timerwheel_timer_handler handler,
++					 void *data)
++{
++	timer->slot = TIMERWHEEL_TIMER_UNUSED;
++	timer->handler = handler;
++	timer->data = data;
++	timer->refcount = 0;
++}
++
++/* passed data must remain valid till a timer fireup */
++int timerwheel_add_timer(struct timerwheel_timer *timer,
++			 nanosecs_rel_t expires);
++
++int timerwheel_remove_timer(struct timerwheel_timer *timer);
++
++void timerwheel_remove_timer_sync(struct timerwheel_timer *timer);
++
++int timerwheel_init(nanosecs_rel_t timeout, unsigned int granularity);
++
++void timerwheel_cleanup(void);
++
++#endif
+--- linux/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/timerwheel.c	2021-04-07 16:01:27.063634401 +0800
+@@ -0,0 +1,220 @@
++/***
++ *
++ *  ipv4/tcp/timerwheel.c - timerwheel implementation for RTnet
++ *
++ *  Copyright (C) 2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License, version 2, as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/delay.h>
++#include <linux/slab.h>
++
++#include <rtdm/driver.h>
++#include "timerwheel.h"
++
++static struct {
++	/* timer pivot task */
++	rtdm_task_t pivot_task;
++
++	/* time length for one period of rotation of timerwheel */
++	nanosecs_rel_t timeout;
++
++	/* timer wheel slots for storing timers up to timerwheel_timeout */
++	unsigned int slots;
++
++	/* timer wheel interval timeout */
++	nanosecs_rel_t interval;
++
++	/* timer wheel interval timeout */
++	unsigned int interval_base;
++
++	/* timerwheel array */
++	struct list_head *ring;
++
++	/* timerwheel slot counter */
++	unsigned int current_slot;
++
++	/* timerwheel current slot lock */
++	rtdm_lock_t slot_lock;
++} wheel;
++
++static struct timerwheel_timer *timerwheel_get_from_current_slot(void)
++{
++	struct timerwheel_timer *timer = NULL;
++	struct list_head *slot_list;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&wheel.slot_lock, context);
++
++	slot_list = &wheel.ring[wheel.current_slot];
++
++	if (!list_empty(slot_list)) {
++		timer = list_first_entry(slot_list, struct timerwheel_timer,
++					 link);
++		list_del(&timer->link);
++		timer->slot = TIMERWHEEL_TIMER_UNUSED;
++		timer->refcount++;
++	}
++
++	rtdm_lock_put_irqrestore(&wheel.slot_lock, context);
++
++	return timer;
++}
++
++int timerwheel_add_timer(struct timerwheel_timer *timer, nanosecs_rel_t expires)
++{
++	rtdm_lockctx_t context;
++	int slot;
++
++	slot = expires >> wheel.interval_base;
++
++	if (slot >= wheel.slots)
++		return -EINVAL;
++
++	rtdm_lock_get_irqsave(&wheel.slot_lock, context);
++
++	/* cancel timer if it's still running */
++	if (timer->slot >= 0)
++		list_del(&timer->link);
++
++	slot = slot + wheel.current_slot;
++	if (slot >= wheel.slots)
++		slot = slot - wheel.slots;
++
++	list_add_tail(&timer->link, &wheel.ring[slot]);
++	timer->slot = slot;
++
++	rtdm_lock_put_irqrestore(&wheel.slot_lock, context);
++
++	return 0;
++}
++
++static int timerwheel_sleep(void)
++{
++	int ret;
++
++	ret = rtdm_task_sleep(wheel.interval);
++	if (ret < 0)
++		return ret;
++
++	wheel.current_slot++;
++	if (wheel.current_slot == wheel.slots)
++		wheel.current_slot = 0;
++
++	return 0;
++}
++
++static void timerwheel_pivot(void *arg)
++{
++	struct timerwheel_timer *timer;
++	int ret;
++
++	while (1) {
++		ret = timerwheel_sleep();
++		if (ret < 0) {
++			rtdm_printk(
++				"timerwheel: timerwheel_pivot interrupted %d\n",
++				-ret);
++			break;
++		}
++
++		while ((timer = timerwheel_get_from_current_slot())) {
++			timer->handler(timer->data);
++
++			smp_mb();
++			timer->refcount--;
++		}
++	}
++}
++
++int timerwheel_remove_timer(struct timerwheel_timer *timer)
++{
++	rtdm_lockctx_t context;
++	int ret;
++
++	rtdm_lock_get_irqsave(&wheel.slot_lock, context);
++
++	if (timer->slot >= 0) {
++		list_del(&timer->link);
++		timer->slot = TIMERWHEEL_TIMER_UNUSED;
++		ret = 0;
++	} else
++		ret = -ENOENT;
++
++	rtdm_lock_put_irqrestore(&wheel.slot_lock, context);
++
++	return ret;
++}
++
++void timerwheel_remove_timer_sync(struct timerwheel_timer *timer)
++{
++	u64 interval_ms = wheel.interval;
++
++	do_div(interval_ms, 1000000);
++
++	timerwheel_remove_timer(timer);
++
++	while (timer->refcount > 0)
++		msleep(interval_ms);
++}
++
++/*
++  timeout     - maximum expiration timeout for timers
++  granularity - is an exponent of 2 representing nanoseconds for
++  one wheel tick
++  heapsize    - is a number of timers to allocate
++*/
++int __init timerwheel_init(nanosecs_rel_t timeout, unsigned int granularity)
++{
++	int i;
++	int err;
++
++	/* the least possible slot timeout is set for 1ms */
++	if (granularity < 10)
++		return -EINVAL;
++
++	wheel.timeout = timeout;
++	wheel.interval_base = granularity;
++	wheel.slots = (timeout >> granularity) + 1;
++	wheel.interval = (1 << granularity);
++	wheel.current_slot = 0;
++
++	wheel.ring =
++		kmalloc(sizeof(struct list_head) * wheel.slots, GFP_KERNEL);
++	if (!wheel.ring)
++		return -ENOMEM;
++
++	for (i = 0; i < wheel.slots; i++)
++		INIT_LIST_HEAD(&wheel.ring[i]);
++
++	rtdm_lock_init(&wheel.slot_lock);
++
++	err = rtdm_task_init(&wheel.pivot_task, "rttcp timerwheel",
++			     timerwheel_pivot, NULL, 1, 0);
++	if (err) {
++		printk("timerwheel: error on pivot task initialization: %d\n",
++		       err);
++		kfree(wheel.ring);
++	}
++
++	return err;
++}
++
++void timerwheel_cleanup(void)
++{
++	rtdm_task_destroy(&wheel.pivot_task);
++	kfree(wheel.ring);
++}
+--- linux/drivers/xenomai/net/stack/ipv4/tcp/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/Kconfig	2021-04-07 16:01:27.059634406 +0800
+@@ -0,0 +1,18 @@
++config XENO_DRIVERS_NET_RTIPV4_TCP
++    tristate "TCP support"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    ---help---
++    Enables TCP support of the RTnet Real-Time IPv4 protocol.
++
++    When the RTnet IPv4 is enabled while this feature is disabled, TCP
++    will be forwarded to the Linux network stack.
++
++config XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
++    bool "TCP error injection"
++    depends on XENO_DRIVERS_NET_RTIPV4_TCP
++    ---help---
++    Enables error injection for incoming TCP packets. This can be used
++    to test both protocol as well as application behavior under error
++    conditions. The per-socket error rate is 0 by default and can be
++    tuned during runtime via the error_rate and multi_error module
++    parameters.
+--- linux/drivers/xenomai/net/stack/ipv4/tcp/tcp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/tcp/tcp.c	2021-04-07 16:01:27.054634413 +0800
+@@ -0,0 +1,2462 @@
++/***
++ *
++ *  ipv4/tcp/tcp.c - TCP implementation for RTnet
++ *
++ *  Copyright (C) 2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License, version 2, as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/list.h>
++#include <linux/skbuff.h>
++#include <linux/err.h>
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/completion.h>
++#include <net/tcp_states.h>
++#include <net/tcp.h>
++
++#include <rtdm/driver.h>
++#include <rtnet_rtpc.h>
++#include <rtskb.h>
++#include <rtdev.h>
++#include <rtnet_port.h>
++#include <ipv4/tcp.h>
++#include <ipv4/ip_sock.h>
++#include <ipv4/ip_output.h>
++#include <ipv4/ip_fragment.h>
++#include <ipv4/route.h>
++#include <ipv4/af_inet.h>
++#include "timerwheel.h"
++
++static unsigned int close_timeout = 1000;
++module_param(close_timeout, uint, 0664);
++MODULE_PARM_DESC(close_timeout,
++		 "max time (ms) to wait during close for FIN-ACK handshake to complete, default 1000");
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
++
++static unsigned int error_rate;
++module_param(error_rate, uint, 0664);
++MODULE_PARM_DESC(error_rate, "simulate packet loss after every n packets");
++
++static unsigned int multi_error = 1;
++module_param(multi_error, uint, 0664);
++MODULE_PARM_DESC(multi_error, "on simulated error, drop n packets in a row");
++
++static unsigned int counter_start = 1234;
++module_param(counter_start, uint, 0664);
++MODULE_PARM_DESC(counter_start, "start value of per-socket packet counter "
++				"(used for error injection)");
++
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
++
++struct tcp_sync {
++	u32 seq;
++	u32 ack_seq;
++
++	/* Local window size sent to peer  */
++	u16 window;
++	/* Last received destination peer window size */
++	u16 dst_window;
++};
++
++/*
++  connection timeout
++*/
++/* 5 second */
++static const nanosecs_rel_t rt_tcp_connection_timeout = 1000000000ull;
++
++/* retransmission timerwheel timeout */
++static const u64 rt_tcp_retransmit_timeout = 100000000ull;
++
++/*
++  keepalive constants
++*/
++/* 75 second */
++static const u64 rt_tcp_keepalive_intvl = 75000000000ull;
++/* 9 probes to send */
++static const u8 rt_tcp_keepalive_probes = 9;
++/* 2 hour */
++static const u64 rt_tcp_keepalive_timeout = 7200000000000ull;
++
++/*
++  retransmission timeout
++*/
++/* 50 millisecond */
++static const nanosecs_rel_t rt_tcp_retransmission_timeout = 50000000ull;
++/*
++  maximum allowed number of retransmissions
++*/
++static const unsigned int max_retransmits = 3;
++
++struct tcp_keepalive {
++	u8 enabled;
++	u32 probes;
++	rtdm_timer_t timer;
++};
++
++/***
++ *  This structure is used to register a TCP socket for reception. All
++ *  structures are kept in the port_registry array to increase the cache
++ *  locality during the critical port lookup in rt_tcp_v4_lookup().
++ */
++
++/* if dport & daddr are zeroes, it means a listening socket */
++/* otherwise this is a data structure, which describes a connection */
++
++/* NB: sock->prot.inet.saddr & sock->prot.inet.sport values are not used */
++struct tcp_socket {
++	struct rtsocket sock; /* set up by rt_socket_init() implicitly */
++	u16 sport; /* local port */
++	u32 saddr; /* local ip-addr */
++	u16 dport; /* destination port */
++	u32 daddr; /* destination ip-addr */
++
++	u8 tcp_state; /* tcp connection state */
++
++	u8 is_binding; /* if set, tcp socket is in port binding progress */
++	u8 is_bound; /* if set, tcp socket is already port bound */
++	u8 is_valid; /* if set, read() and write() can process */
++	u8 is_accepting; /* if set, accept() is in progress */
++	u8 is_accepted; /* if set, accept() is already called */
++	u8 is_closed; /* close() call for resource deallocation follows */
++
++	rtdm_event_t send_evt; /* write request is permissible */
++	rtdm_event_t conn_evt; /* connection event */
++
++	struct dest_route rt;
++	struct tcp_sync sync;
++	struct tcp_keepalive keepalive;
++	rtdm_lock_t socket_lock;
++
++	struct hlist_node link;
++
++	nanosecs_rel_t sk_sndtimeo;
++
++	/* retransmission routine data */
++	u32 nacked_first;
++	unsigned int timer_state;
++	struct rtskb_queue retransmit_queue;
++	struct timerwheel_timer timer;
++
++	struct completion fin_handshake;
++	rtdm_nrtsig_t close_sig;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
++	unsigned int packet_counter;
++	unsigned int error_rate;
++	unsigned int multi_error;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
++};
++
++struct rt_tcp_dispatched_packet_send_cmd {
++	__be32 flags; /* packet flags value */
++	struct tcp_socket *ts;
++};
++
++/***
++ *  Automatic port number assignment
++
++ *  The automatic assignment of port numbers to unbound sockets is realised as
++ *  a simple addition of two values:
++ *   - the socket ID (lower 8 bits of file descriptor) which is set during
++ *     initialisation and left unchanged afterwards
++ *   - the start value tcp_auto_port_start which is a module parameter
++
++ *  tcp_auto_port_mask, also a module parameter, is used to define the range of
++ *  port numbers which are used for automatic assignment. Any number within
++ *  this range will be rejected when passed to bind_rt().
++
++ */
++
++MODULE_LICENSE("GPL");
++
++static struct {
++	struct rtdm_dev_context dummy;
++	struct tcp_socket rst_socket;
++} rst_socket_container;
++
++#define rst_fd (&rst_socket_container.dummy.fd)
++#define rst_socket (*(struct tcp_socket *)rtdm_fd_to_private(rst_fd))
++
++static u32 tcp_auto_port_start = 1024;
++static u32 tcp_auto_port_mask = ~(RT_TCP_SOCKETS - 1);
++static u32 free_ports = RT_TCP_SOCKETS;
++#define RT_PORT_BITMAP_WORDS                                                   \
++	((RT_TCP_SOCKETS + BITS_PER_LONG - 1) / BITS_PER_LONG)
++static unsigned long port_bitmap[RT_PORT_BITMAP_WORDS];
++
++static struct tcp_socket *port_registry[RT_TCP_SOCKETS];
++static DEFINE_RTDM_LOCK(tcp_socket_base_lock);
++
++static struct hlist_head port_hash[RT_TCP_SOCKETS * 2];
++#define port_hash_mask (RT_TCP_SOCKETS * 2 - 1)
++
++module_param(tcp_auto_port_start, uint, 0444);
++module_param(tcp_auto_port_mask, uint, 0444);
++MODULE_PARM_DESC(tcp_auto_port_start, "Start of automatically assigned "
++				      "port range for TCP");
++MODULE_PARM_DESC(tcp_auto_port_mask, "Mask that defines port range for TCP "
++				     "for automatic assignment");
++
++static inline struct tcp_socket *port_hash_search(u32 saddr, u16 sport)
++{
++	u32 bucket = sport & port_hash_mask;
++	struct tcp_socket *ts;
++
++	hlist_for_each_entry (ts, &port_hash[bucket], link)
++		if (ts->sport == sport &&
++		    (saddr == INADDR_ANY || ts->saddr == saddr ||
++		     ts->saddr == INADDR_ANY))
++			return ts;
++
++	return NULL;
++}
++
++static int port_hash_insert(struct tcp_socket *ts, u32 saddr, u16 sport)
++{
++	u32 bucket;
++
++	if (port_hash_search(saddr, sport))
++		return -EADDRINUSE;
++
++	bucket = sport & port_hash_mask;
++	ts->saddr = saddr;
++	ts->sport = sport;
++	ts->daddr = 0;
++	ts->dport = 0;
++
++	hlist_add_head(&ts->link, &port_hash[bucket]);
++
++	return 0;
++}
++
++static inline void port_hash_del(struct tcp_socket *ts)
++{
++	hlist_del(&ts->link);
++}
++
++/***
++ *  rt_tcp_v4_lookup
++ */
++static struct rtsocket *rt_tcp_v4_lookup(u32 daddr, u16 dport)
++{
++	rtdm_lockctx_t context;
++	struct tcp_socket *ts;
++	int ret;
++
++	rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
++	ts = port_hash_search(daddr, dport);
++
++	if (ts != NULL) {
++		ret = rt_socket_reference(&ts->sock);
++		if (ret == 0 || (ret == -EIDRM && ts->is_closed)) {
++			rtdm_lock_put_irqrestore(&tcp_socket_base_lock,
++						 context);
++
++			return &ts->sock;
++		}
++	}
++
++	rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++
++	return NULL;
++}
++
++/* test seq1 <= seq2 */
++static inline int rt_tcp_before(__u32 seq1, __u32 seq2)
++{
++	return (__s32)(seq1 - seq2) <= 0;
++}
++
++/* test seq1 => seq2 */
++static inline int rt_tcp_after(__u32 seq1, __u32 seq2)
++{
++	return (__s32)(seq2 - seq1) <= 0;
++}
++
++static inline u32 rt_tcp_compute_ack_seq(struct tcphdr *th, u32 len)
++{
++	u32 ack_seq = ntohl(th->seq) + len;
++
++	if (unlikely(th->syn || th->fin))
++		ack_seq++;
++
++	return ack_seq;
++}
++
++static void rt_tcp_keepalive_start(struct tcp_socket *ts)
++{
++	if (ts->tcp_state == TCP_ESTABLISHED) {
++		rtdm_timer_start(&ts->keepalive.timer, rt_tcp_keepalive_timeout,
++				 0, RTDM_TIMERMODE_RELATIVE);
++	}
++}
++
++static void rt_tcp_keepalive_stop(struct tcp_socket *ts)
++{
++	if (ts->tcp_state == TCP_ESTABLISHED) {
++		rtdm_timer_stop(&ts->keepalive.timer);
++	}
++}
++
++#ifdef YET_UNUSED
++static void rt_tcp_keepalive_timer(rtdm_timer_t *timer);
++
++static void rt_tcp_keepalive_enable(struct tcp_socket *ts)
++{
++	rtdm_lockctx_t context;
++	struct tcp_keepalive *keepalive;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	keepalive = &ts->keepalive;
++
++	if (keepalive->enabled) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	keepalive->probes = rt_tcp_keepalive_probes;
++
++	rtdm_timer_init(&keepalive->timer, rt_tcp_keepalive_timer,
++			"RT TCP keepalive timer");
++
++	rt_tcp_keepalive_start(ts);
++
++	keepalive->enabled = 1;
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++}
++#endif
++
++static void rt_tcp_keepalive_disable(struct tcp_socket *ts)
++{
++	struct tcp_keepalive *keepalive;
++
++	keepalive = &ts->keepalive;
++
++	if (!keepalive->enabled) {
++		return;
++	}
++
++	rt_tcp_keepalive_stop(ts);
++	rtdm_timer_destroy(&keepalive->timer);
++
++	keepalive->enabled = 0;
++}
++
++static void rt_tcp_keepalive_feed(struct tcp_socket *ts)
++{
++	rtdm_lockctx_t context;
++	struct tcp_keepalive *keepalive;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	keepalive = &ts->keepalive;
++
++	if (ts->tcp_state == TCP_ESTABLISHED && ts->keepalive.enabled) {
++		keepalive->probes = rt_tcp_keepalive_probes;
++
++		/* Restart keepalive timer */
++		rtdm_timer_stop(&keepalive->timer);
++		rtdm_timer_start(&keepalive->timer, rt_tcp_keepalive_timeout, 0,
++				 RTDM_TIMERMODE_RELATIVE);
++
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	} else {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	}
++}
++
++static int rt_tcp_socket_invalidate(struct tcp_socket *ts, u8 to_state)
++{
++	int signal = ts->is_valid;
++
++	ts->tcp_state = to_state;
++
++	/*
++      multiple invalidation could happen without fuss,
++      see rt_tcp_close(), rt_tcp_rcv(), timeout expiration etc.
++    */
++	if (ts->is_valid) {
++		ts->is_valid = 0;
++
++		if (ts->keepalive.enabled) {
++			rt_tcp_keepalive_stop(ts);
++		}
++	}
++
++	return signal;
++}
++
++static void rt_tcp_socket_invalidate_signal(struct tcp_socket *ts)
++{
++	/* awake all readers and writers destroying events */
++	rtdm_sem_destroy(&ts->sock.pending_sem);
++	rtdm_event_destroy(&ts->send_evt);
++}
++
++static void rt_tcp_socket_validate(struct tcp_socket *ts)
++{
++	ts->tcp_state = TCP_ESTABLISHED;
++
++	ts->is_valid = 1;
++
++	if (ts->keepalive.enabled) {
++		rt_tcp_keepalive_start(ts);
++	}
++
++	rtdm_event_init(&ts->send_evt, 0);
++}
++
++/***
++ *  rt_tcp_retransmit_handler - timerwheel handler to process a retransmission
++ *  @data: pointer to a rttcp socket structure
++ */
++static void rt_tcp_retransmit_handler(void *data)
++{
++	struct tcp_socket *ts = (struct tcp_socket *)data;
++	struct rtskb *skb;
++	rtdm_lockctx_t context;
++	int signal;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (unlikely(rtskb_queue_empty(&ts->retransmit_queue))) {
++		/* handled, but retransmission queue is empty */
++		rtdm_lock_get_irqsave(&ts->socket_lock, context);
++		rtdm_printk("rttcp: bug in RT TCP retransmission routine\n");
++		return;
++	}
++
++	if (ts->tcp_state == TCP_CLOSE) {
++		/* socket is already closed */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	if (ts->timer_state) {
++		/* more tries */
++		ts->timer_state--;
++		timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
++
++		/* warning, rtskb_clone is under lock */
++		skb = rtskb_clone(ts->retransmit_queue.first,
++				  &ts->sock.skb_pool);
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		/* BUG, window changes are not respected */
++		if (unlikely(rtdev_xmit(skb)) != 0) {
++			kfree_rtskb(skb);
++			rtdm_printk(
++				"rttcp: packet retransmission from timer failed\n");
++		}
++	} else {
++		ts->timer_state = max_retransmits;
++
++		/* report about connection lost */
++		signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		if (signal)
++			rt_tcp_socket_invalidate_signal(ts);
++
++		/* retransmission queue will be cleaned up in rt_tcp_socket_destruct */
++		rtdm_printk("rttcp: connection is lost by NACK timeout\n");
++	}
++}
++
++/***
++ *  rt_tcp_retransmit_ack - remove skbs from retransmission queue on ACK
++ *  @ts: rttcp socket
++ *  @ack_seq: received ACK sequence value
++ */
++static void rt_tcp_retransmit_ack(struct tcp_socket *ts, u32 ack_seq)
++{
++	struct rtskb *skb;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	/*
++      ACK, but retransmission queue is empty
++      This could happen on repeated ACKs
++    */
++	if (rtskb_queue_empty(&ts->retransmit_queue)) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	/*
++      Check ts->nacked_first value firstly to ensure that
++      skb for retransmission is present in the queue, otherwise
++      retransmission queue will be drained completely
++    */
++	if (!rt_tcp_before(ts->nacked_first, ack_seq)) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	if (timerwheel_remove_timer(&ts->timer) != 0) {
++		/* already timed out */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++dequeue_loop:
++	if (ts->tcp_state == TCP_CLOSE) {
++		/* warn about queue safety in race with anyone,
++	   who closes the socket */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	if ((skb = __rtskb_dequeue(&ts->retransmit_queue)) == NULL) {
++		ts->timer_state = max_retransmits;
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return;
++	}
++
++	if (rt_tcp_before(ts->nacked_first, ack_seq)) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		kfree_rtskb(skb);
++		rtdm_lock_get_irqsave(&ts->socket_lock, context);
++		goto dequeue_loop;
++	}
++
++	/* Put NACKed skb back to queue */
++	/* BUG, need to respect half-acknowledged packets */
++	ts->nacked_first = ntohl(skb->h.th->seq) + 1;
++
++	__rtskb_queue_head(&ts->retransmit_queue, skb);
++
++	/* Have more packages in retransmission queue, restart the timer */
++	timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++}
++
++/***
++ *  rt_tcp_retransmit_send - enqueue a skb to retransmission queue (not locked)
++ *  @ts: rttcp socket
++ *  @skb: a copied skb for enqueueing
++ */
++static void rt_tcp_retransmit_send(struct tcp_socket *ts, struct rtskb *skb)
++{
++	if (rtskb_queue_empty(&ts->retransmit_queue)) {
++		/* retransmission queue is empty */
++		ts->nacked_first = ntohl(skb->h.th->seq) + 1;
++
++		__rtskb_queue_tail(&ts->retransmit_queue, skb);
++
++		timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
++	} else {
++		/* retransmission queue is not empty */
++		__rtskb_queue_tail(&ts->retransmit_queue, skb);
++	}
++}
++
++static int rt_ip_build_frame(struct rtskb *skb, struct rtsocket *sk,
++			     struct dest_route *rt, struct iphdr *iph)
++{
++	int ret;
++	struct rtnet_device *rtdev = rt->rtdev;
++
++	RTNET_ASSERT(rtdev->hard_header, return -EBADF;);
++
++	if (!rtdev_reference(rt->rtdev))
++		return -EIDRM;
++
++	iph->ihl = 5; /* 20 byte header only - no TCP options */
++
++	skb->nh.iph = iph;
++
++	iph->version = 4;
++	iph->tos = sk->prot.inet.tos;
++	iph->tot_len = htons(skb->len); /* length of IP header and IP payload */
++	iph->id = htons(0x00); /* zero IP frame id */
++	iph->frag_off = htons(IP_DF); /* and no more frames */
++	iph->ttl = 255;
++	iph->protocol = sk->protocol;
++	iph->saddr = rtdev->local_ip;
++	iph->daddr = rt->ip;
++	iph->check = 0; /* required to compute correct checksum */
++	iph->check = ip_fast_csum((u8 *)iph, 5 /*iph->ihl*/);
++
++	ret = rtdev->hard_header(skb, rtdev, ETH_P_IP, rt->dev_addr,
++				 rtdev->dev_addr, skb->len);
++	rtdev_dereference(rt->rtdev);
++
++	if (ret != rtdev->hard_header_len) {
++		rtdm_printk("rttcp: rt_ip_build_frame: error on lower level\n");
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static void rt_tcp_build_header(struct tcp_socket *ts, struct rtskb *skb,
++				__be32 flags, u8 is_keepalive)
++{
++	u32 wcheck;
++	u8 tcphdrlen = 20;
++	u8 iphdrlen = 20;
++	struct tcphdr *th;
++
++	th = skb->h.th;
++	th->source = ts->sport;
++	th->dest = ts->dport;
++
++	th->seq = htonl(ts->sync.seq);
++
++	if (unlikely(is_keepalive))
++		th->seq--;
++
++	tcp_flag_word(th) = flags;
++	th->ack_seq = htonl(ts->sync.ack_seq);
++	th->window = htons(ts->sync.window);
++
++	th->doff = tcphdrlen >> 2; /* No options for now */
++	th->res1 = 0;
++	th->check = 0;
++	th->urg_ptr = 0;
++
++	/* compute checksum */
++	wcheck = csum_partial(th, tcphdrlen, 0);
++
++	if (skb->len - tcphdrlen - iphdrlen) {
++		wcheck = csum_partial(skb->data + tcphdrlen + iphdrlen,
++				      skb->len - tcphdrlen - iphdrlen, wcheck);
++	}
++
++	th->check =
++		tcp_v4_check(skb->len - iphdrlen, ts->saddr, ts->daddr, wcheck);
++}
++
++static int rt_tcp_segment(struct dest_route *rt, struct tcp_socket *ts,
++			  __be32 flags, u32 data_len, u8 *data_ptr,
++			  u8 is_keepalive)
++{
++	struct tcphdr *th;
++	struct rtsocket *sk = &ts->sock;
++	struct rtnet_device *rtdev = rt->rtdev;
++	struct rtskb *skb;
++	struct iphdr *iph;
++	struct rtskb *cloned_skb;
++	rtdm_lockctx_t context;
++
++	int ret;
++
++	u32 hh_len = (rtdev->hard_header_len + 15) & ~15;
++	u32 prio = (volatile unsigned int)sk->priority;
++	u32 mtu = rtdev->get_mtu(rtdev, prio);
++
++	u8 *data = NULL;
++
++	if ((skb = alloc_rtskb(mtu + hh_len + 15, &sk->skb_pool)) == NULL) {
++		rtdm_printk(
++			"rttcp: no more elements in skb_pool for allocation\n");
++		return -ENOBUFS;
++	}
++
++	/* rtskb_reserve(skb, hh_len + 20); */
++	rtskb_reserve(skb, hh_len);
++
++	iph = (struct iphdr *)rtskb_put(skb, 20); /* length of IP header */
++	skb->nh.iph = iph;
++
++	th = (struct tcphdr *)rtskb_put(skb, 20); /* length of TCP header */
++	skb->h.th = th;
++
++	if (data_len) { /* check for available place */
++		data = (u8 *)rtskb_put(skb,
++				       data_len); /* length of TCP payload */
++		if (!memcpy(data, (void *)data_ptr, data_len)) {
++			ret = -EFAULT;
++			goto error;
++		}
++	}
++
++	/* used local phy MTU value */
++	if (data_len > mtu)
++		data_len = mtu;
++
++	skb->rtdev = rtdev;
++	skb->priority = prio;
++
++	/* do not validate socket connection on xmit
++       this should be done at upper level */
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++	rt_tcp_build_header(ts, skb, flags, is_keepalive);
++
++	if ((ret = rt_ip_build_frame(skb, sk, rt, iph)) != 0) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		goto error;
++	}
++
++	/* add rtskb entry to the socket retransmission queue */
++	if (ts->tcp_state != TCP_CLOSE &&
++	    ((flags & (TCP_FLAG_SYN | TCP_FLAG_FIN)) || data_len)) {
++		/* rtskb_clone below is called under lock, this is an admission,
++	   because for now there is no rtskb copy by reference */
++		cloned_skb = rtskb_clone(skb, &ts->sock.skb_pool);
++		if (!cloned_skb) {
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rtdm_printk("rttcp: cann't clone skb\n");
++			ret = -ENOMEM;
++			goto error;
++		}
++
++		rt_tcp_retransmit_send(ts, cloned_skb);
++	}
++
++	/* need to update sync here, because it is safe way in
++       comparison with races on fast ACK response */
++	if (flags & (TCP_FLAG_FIN | TCP_FLAG_SYN))
++		ts->sync.seq++;
++
++	ts->sync.seq += data_len;
++	ts->sync.dst_window -= data_len;
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	/* ignore return value from rtdev_xmit */
++	/* the packet was enqueued and on error will be retransmitted later */
++	/* on critical error after retransmission timeout the connection will
++       be closed by connection lost */
++	rtdev_xmit(skb);
++
++	return data_len;
++
++error:
++	kfree_rtskb(skb);
++	return ret;
++}
++
++static int rt_tcp_send(struct tcp_socket *ts, __be32 flags)
++{
++	struct dest_route rt;
++	int ret;
++
++	/*
++     * We may not have a route yet during setup. But once it is set, it stays
++     * until the socket died.
++     */
++	if (likely(ts->rt.rtdev)) {
++		ret = rt_tcp_segment(&ts->rt, ts, flags, 0, NULL, 0);
++	} else {
++		ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr);
++		if (ret == 0) {
++			ret = rt_tcp_segment(&rt, ts, flags, 0, NULL, 0);
++			rtdev_dereference(rt.rtdev);
++		}
++	}
++	if (ret < 0)
++		rtdm_printk("rttcp: can't send a packet: err %d\n", -ret);
++	return ret;
++}
++
++#ifdef YET_UNUSED
++static void rt_tcp_keepalive_timer(rtdm_timer_t *timer)
++{
++	rtdm_lockctx_t context;
++	struct tcp_keepalive *keepalive =
++		container_of(timer, struct tcp_keepalive, timer);
++
++	struct tcp_socket *ts =
++		container_of(keepalive, struct tcp_socket, keepalive);
++	int signal = 0;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (keepalive->probes) {
++		/* Send a probe */
++		if (rt_tcp_segment(&ts->rt, ts, 0, 0, NULL, 1) < 0) {
++			/* data receiving and sending is not possible anymore */
++			signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT);
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		}
++
++		keepalive->probes--;
++		rtdm_timer_start_in_handler(&keepalive->timer,
++					    rt_tcp_keepalive_intvl, 0,
++					    RTDM_TIMERMODE_RELATIVE);
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	} else {
++		/* data receiving and sending is not possible anymore */
++
++		signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT);
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	}
++
++	if (signal)
++		rt_tcp_socket_invalidate_signal(ts);
++}
++#endif
++
++static inline u32 rt_tcp_initial_seq(void)
++{
++	uint64_t clock_val = rtdm_clock_read_monotonic();
++	return (u32)(clock_val ^ (clock_val >> 32));
++}
++
++/***
++ *  rt_tcp_dest_socket
++ */
++static struct rtsocket *rt_tcp_dest_socket(struct rtskb *skb)
++{
++	struct tcphdr *th = skb->h.th;
++
++	u32 saddr = skb->nh.iph->saddr;
++	u32 daddr = skb->nh.iph->daddr;
++	u32 sport = th->source;
++	u32 dport = th->dest;
++
++	u32 data_len;
++
++	if (tcp_v4_check(skb->len, saddr, daddr,
++			 csum_partial(skb->data, skb->len, 0))) {
++		rtdm_printk("rttcp: invalid TCP packet checksum, dropped\n");
++		return NULL; /* Invalid checksum, drop the packet */
++	}
++
++	/* find the destination socket */
++	if ((skb->sk = rt_tcp_v4_lookup(daddr, dport)) == NULL) {
++		/*
++	  rtdm_printk("Not found addr:0x%08x, port: 0x%04x\n", daddr, dport);
++	*/
++		if (!th->rst) {
++			/* No listening socket found, send RST|ACK */
++			rst_socket.saddr = daddr;
++			rst_socket.daddr = saddr;
++			rst_socket.sport = dport;
++			rst_socket.dport = sport;
++
++			data_len = skb->len - (th->doff << 2);
++
++			rst_socket.sync.seq = 0;
++			rst_socket.sync.ack_seq =
++				rt_tcp_compute_ack_seq(th, data_len);
++
++			if (rt_ip_route_output(&rst_socket.rt, daddr, saddr) ==
++			    0) {
++				rt_socket_reference(&rst_socket.sock);
++				rt_tcp_send(&rst_socket,
++					    TCP_FLAG_ACK | TCP_FLAG_RST);
++				rtdev_dereference(rst_socket.rt.rtdev);
++			}
++		}
++	}
++
++	return skb->sk;
++}
++
++static void rt_tcp_window_update(struct tcp_socket *ts, u16 window)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (ts->sync.dst_window) {
++		ts->sync.dst_window = window;
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		if (!window) {
++			/* clear send event status */
++			rtdm_event_clear(&ts->send_evt);
++		}
++	} else {
++		if (window) {
++			ts->sync.dst_window = window;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			/* set send event status */
++			rtdm_event_signal(&ts->send_evt);
++		} else {
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		}
++	}
++}
++
++/***
++ *  rt_tcp_rcv
++ */
++static void rt_tcp_rcv(struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++	struct tcp_socket *ts;
++	struct tcphdr *th = skb->h.th;
++	unsigned int data_len = skb->len - (th->doff << 2);
++	u32 seq = ntohl(th->seq);
++	int signal;
++
++	ts = container_of(skb->sk, struct tcp_socket, sock);
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
++	if (ts->error_rate > 0) {
++		if ((ts->packet_counter++ % error_rate) < ts->multi_error) {
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			goto drop;
++		}
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
++
++	/* Check for daddr/dport correspondence to values stored in
++       selected socket from hash */
++	if (ts->tcp_state != TCP_LISTEN && (ts->daddr != skb->nh.iph->saddr ||
++					    ts->dport != skb->h.th->source)) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		goto drop;
++	}
++
++	/* Check if it is a keepalive probe */
++	if (ts->sync.ack_seq == (seq + 1) && ts->tcp_state == TCP_ESTABLISHED) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		rt_tcp_send(ts, TCP_FLAG_ACK);
++		goto feed;
++	}
++
++	if (ts->tcp_state == TCP_SYN_SENT) {
++		ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
++
++		if (th->syn && th->ack) {
++			rt_tcp_socket_validate(ts);
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rtdm_event_signal(&ts->conn_evt);
++			/* Send ACK */
++			rt_tcp_send(ts, TCP_FLAG_ACK);
++			goto feed;
++		}
++
++		ts->tcp_state = TCP_CLOSE;
++		ts->sync.seq = ntohl(th->ack_seq);
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		/* Send RST|ACK */
++		rtdm_event_signal(&ts->conn_evt);
++		rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK);
++		goto drop;
++	}
++
++	/* Check for SEQ correspondence to determine the connection relevance */
++
++	/* OR-list of conditions to be satisfied:
++     *
++     * th->ack && rt_tcp_after(ts->nacked_first, ntohl(th->ack_seq))
++     * th->ack && th->rst && ...
++     * th->syn && (ts->tcp_state == TCP_LISTEN ||
++		   ts->tcp_state == TCP_SYN_SENT)
++     * rt_tcp_after(seq, ts->sync.ack_seq) &&
++	   rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window)
++     */
++
++	if ((rt_tcp_after(seq, ts->sync.ack_seq) &&
++	     rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window)) ||
++	    th->rst ||
++	    (th->syn &&
++	     (ts->tcp_state == TCP_LISTEN || ts->tcp_state == TCP_SYN_SENT))) {
++		/* everything is ok */
++	} else if (rt_tcp_after(seq, ts->sync.ack_seq - data_len)) {
++		/* retransmission of data we already acked */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		rt_tcp_send(ts, TCP_FLAG_ACK);
++		goto drop;
++	} else {
++		/* drop forward ack */
++		if (th->ack &&
++		    /* but reset ack from old connection */
++		    ts->tcp_state == TCP_ESTABLISHED) {
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rtdm_printk(
++				"rttcp: dropped unappropriate ACK packet %u\n",
++				ts->sync.ack_seq);
++			goto drop;
++		}
++
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		rtdm_printk("rttcp: sequence number is not in window, "
++			    "dropped (failed: %u <= %u <= %u)\n",
++			    ts->sync.ack_seq, seq,
++			    ts->sync.ack_seq + ts->sync.window);
++
++		/* That's a forced RST for a lost connection */
++		rst_socket.saddr = skb->nh.iph->daddr;
++		rst_socket.daddr = skb->nh.iph->saddr;
++		rst_socket.sport = th->dest;
++		rst_socket.dport = th->source;
++
++		rst_socket.sync.seq = ntohl(th->ack_seq);
++		rst_socket.sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
++
++		if (rt_ip_route_output(&rst_socket.rt, rst_socket.daddr,
++				       rst_socket.saddr) == 0) {
++			rt_socket_reference(&rst_socket.sock);
++			rt_tcp_send(&rst_socket, TCP_FLAG_RST | TCP_FLAG_ACK);
++			rtdev_dereference(rst_socket.rt.rtdev);
++		}
++		goto drop;
++	}
++
++	if (th->rst) {
++		if (ts->tcp_state == TCP_SYN_RECV) {
++			ts->tcp_state = TCP_LISTEN;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			goto drop;
++		} else {
++			/* Drop our half-open connection, peer obviously went away. */
++			signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++			if (signal)
++				rt_tcp_socket_invalidate_signal(ts);
++
++			goto drop;
++		}
++	}
++
++	ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
++
++	if (th->fin) {
++		if (ts->tcp_state == TCP_ESTABLISHED) {
++			/* Send ACK */
++			signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE_WAIT);
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++			if (signal)
++				rt_tcp_socket_invalidate_signal(ts);
++
++			rt_tcp_send(ts, TCP_FLAG_ACK);
++			goto feed;
++		} else if ((ts->tcp_state == TCP_FIN_WAIT1 && th->ack) ||
++			   ts->tcp_state == TCP_FIN_WAIT2) {
++			/* Send ACK */
++			ts->tcp_state = TCP_TIME_WAIT;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rt_tcp_send(ts, TCP_FLAG_ACK);
++			/* data receiving is not possible anymore */
++			rtdm_sem_destroy(&ts->sock.pending_sem);
++			rtdm_nrtsig_pend(&ts->close_sig);
++			goto feed;
++		} else if (ts->tcp_state == TCP_FIN_WAIT1) {
++			/* Send ACK */
++			ts->tcp_state = TCP_CLOSING;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rt_tcp_send(ts, TCP_FLAG_ACK);
++			/* data receiving is not possible anymore */
++			rtdm_sem_destroy(&ts->sock.pending_sem);
++			goto feed;
++		} else {
++			/* just drop it */
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			goto drop;
++		}
++	}
++
++	if (th->syn) {
++		/* Need to differentiate LISTEN socket from ESTABLISHED one */
++		/* Both of them have the same sport/saddr, but different dport/daddr */
++		/* dport is unknown if it is the first connection of n */
++
++		if (ts->tcp_state == TCP_LISTEN) {
++			/* Need to store ts->seq while sending SYN earlier */
++			/* The socket shall be in TCP_LISTEN state */
++
++			/* safe to update ts->saddr here due to a single task for
++	       rt_tcp_rcv() and rt_tcp_dest_socket() callers */
++			ts->saddr = skb->nh.iph->daddr;
++
++			ts->daddr = skb->nh.iph->saddr;
++			ts->dport = th->source;
++			ts->sync.seq = rt_tcp_initial_seq();
++			ts->sync.window = 4096;
++			ts->tcp_state = TCP_SYN_RECV;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++			/* Send SYN|ACK */
++			rt_tcp_send(ts, TCP_FLAG_SYN | TCP_FLAG_ACK);
++			goto drop;
++		}
++
++		/* Send RST|ACK */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK);
++		goto drop;
++	}
++
++	/* ACK received without SYN, FIN or RST flags */
++	if (th->ack) {
++		/* Check ack sequence */
++		if (rt_tcp_before(ts->sync.seq + 1, ntohl(th->ack_seq))) {
++			rtdm_printk("rttcp: unexpected ACK %u %u %u\n",
++				    ts->sync.seq, ts->nacked_first,
++				    ntohl(th->ack_seq));
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			goto drop;
++		}
++
++		if (ts->tcp_state == TCP_LAST_ACK) {
++			/* close connection and free socket data */
++			ts->tcp_state = TCP_CLOSE;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			/* socket destruction will be done on close() */
++			rtdm_nrtsig_pend(&ts->close_sig);
++			goto drop;
++		} else if (ts->tcp_state == TCP_FIN_WAIT1) {
++			ts->tcp_state = TCP_FIN_WAIT2;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			goto feed;
++		} else if (ts->tcp_state == TCP_SYN_RECV) {
++			rt_tcp_socket_validate(ts);
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rtdm_event_signal(&ts->conn_evt);
++			goto feed;
++		} else if (ts->tcp_state == TCP_CLOSING) {
++			ts->tcp_state = TCP_TIME_WAIT;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			/* socket destruction will be done on close() */
++			rtdm_nrtsig_pend(&ts->close_sig);
++			goto feed;
++		}
++	}
++
++	if (ts->tcp_state != TCP_ESTABLISHED) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		goto drop;
++	}
++
++	if (data_len == 0) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		goto feed;
++	}
++
++	/* Send ACK */
++	ts->sync.window -= data_len;
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	rt_tcp_send(ts, TCP_FLAG_ACK);
++
++	rtskb_queue_tail(&skb->sk->incoming, skb);
++	rtdm_sem_up(&ts->sock.pending_sem);
++
++	/* inform retransmission subsystem about arrived ack */
++	if (th->ack) {
++		rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq));
++	}
++
++	rt_tcp_keepalive_feed(ts);
++	rt_tcp_window_update(ts, ntohs(th->window));
++
++	return;
++
++feed:
++	/* inform retransmission subsystem about arrived ack */
++	if (th->ack) {
++		rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq));
++	}
++
++	rt_tcp_keepalive_feed(ts);
++	rt_tcp_window_update(ts, ntohs(th->window));
++
++drop:
++	kfree_rtskb(skb);
++	return;
++}
++
++/***
++ *  rt_tcp_rcv_err
++ */
++static void rt_tcp_rcv_err(struct rtskb *skb)
++{
++	rtdm_printk("rttcp: rt_tcp_rcv err\n");
++}
++
++static int rt_tcp_window_send(struct tcp_socket *ts, u32 data_len, u8 *data_ptr)
++{
++	u32 dst_window = ts->sync.dst_window;
++	int ret;
++
++	if (data_len > dst_window)
++		data_len = dst_window;
++
++	if ((ret = rt_tcp_segment(&ts->rt, ts, TCP_FLAG_ACK, data_len, data_ptr,
++				  0)) < 0) {
++		rtdm_printk("rttcp: cann't send a packet: err %d\n", -ret);
++		return ret;
++	}
++
++	return ret;
++}
++
++static void rt_tcp_close_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg)
++{
++	complete_all((struct completion *)arg);
++}
++
++static int rt_tcp_socket_create(struct tcp_socket *ts)
++{
++	rtdm_lockctx_t context;
++	int i;
++	int index;
++	struct rtsocket *sock = &ts->sock;
++
++	sock->prot.inet.saddr = INADDR_ANY;
++	sock->prot.inet.state = TCP_CLOSE;
++	sock->prot.inet.tos = 0;
++	/*
++      rtdm_printk("rttcp: rt_tcp_socket_create 0x%p\n", ts);
++    */
++	rtdm_lock_init(&ts->socket_lock);
++
++	ts->rt.rtdev = NULL;
++
++	ts->tcp_state = TCP_CLOSE;
++
++	ts->is_accepting = 0;
++	ts->is_accepted = 0;
++	ts->is_binding = 0;
++	ts->is_bound = 0;
++	ts->is_valid = 0;
++	ts->is_closed = 0;
++
++	ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE;
++
++	rtdm_event_init(&ts->conn_evt, 0);
++
++	ts->keepalive.enabled = 0;
++
++	ts->timer_state = max_retransmits;
++	timerwheel_init_timer(&ts->timer, rt_tcp_retransmit_handler, ts);
++	rtskb_queue_init(&ts->retransmit_queue);
++
++	init_completion(&ts->fin_handshake);
++	rtdm_nrtsig_init(&ts->close_sig, rt_tcp_close_signal_handler,
++			 &ts->fin_handshake);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
++	ts->packet_counter = counter_start;
++	ts->error_rate = error_rate;
++	ts->multi_error = multi_error;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
++
++	rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
++
++	/* enforce maximum number of TCP sockets */
++	if (free_ports == 0) {
++		rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++		rtdm_nrtsig_destroy(&ts->close_sig);
++		return -EAGAIN;
++	}
++	free_ports--;
++
++	/* find free auto-port in bitmap */
++	for (i = 0; i < RT_PORT_BITMAP_WORDS; i++)
++		if (port_bitmap[i] != (unsigned long)-1)
++			break;
++	index = ffz(port_bitmap[i]);
++	set_bit(index, &port_bitmap[i]);
++	index += i * 32;
++	sock->prot.inet.reg_index = index;
++	sock->prot.inet.sport = index + tcp_auto_port_start;
++
++	/* register TCP socket */
++	port_registry[index] = ts;
++	port_hash_insert(ts, INADDR_ANY, sock->prot.inet.sport);
++
++	rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++
++	return 0;
++}
++
++/***
++ *  rt_tcp_socket - create a new TCP-Socket
++ *  @s: socket
++ */
++static int rt_tcp_socket(struct rtdm_fd *fd)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++	int ret;
++
++	if ((ret = rt_socket_init(fd, IPPROTO_TCP)) != 0)
++		return ret;
++
++	if ((ret = rt_tcp_socket_create(ts)) != 0)
++		rt_socket_cleanup(fd);
++
++	return ret;
++}
++
++static int rt_tcp_dispatched_packet_send(struct rt_proc_call *call)
++{
++	int ret;
++	struct rt_tcp_dispatched_packet_send_cmd *cmd;
++
++	cmd = rtpc_get_priv(call, struct rt_tcp_dispatched_packet_send_cmd);
++	ret = rt_tcp_send(cmd->ts, cmd->flags);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_socket_destruct
++ *  this function requires non realtime context
++ */
++static void rt_tcp_socket_destruct(struct tcp_socket *ts)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *skb;
++	int index;
++	int signal;
++	struct rtsocket *sock = &ts->sock;
++
++	/*
++      rtdm_printk("rttcp: rt_tcp_socket_destruct 0x%p\n", ts);
++    */
++
++	rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
++	if (sock->prot.inet.reg_index >= 0) {
++		index = sock->prot.inet.reg_index;
++
++		clear_bit(index % BITS_PER_LONG,
++			  &port_bitmap[index / BITS_PER_LONG]);
++		port_hash_del(port_registry[index]);
++		free_ports++;
++		sock->prot.inet.reg_index = -1;
++	}
++	rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
++
++	rt_tcp_keepalive_disable(ts);
++
++	sock->prot.inet.state = TCP_CLOSE;
++
++	/* dereference rtdev */
++	if (ts->rt.rtdev != NULL) {
++		rtdev_dereference(ts->rt.rtdev);
++		ts->rt.rtdev = NULL;
++	}
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	if (signal)
++		rt_tcp_socket_invalidate_signal(ts);
++
++	rtdm_event_destroy(&ts->conn_evt);
++
++	rtdm_nrtsig_destroy(&ts->close_sig);
++
++	/* cleanup already collected fragments */
++	rt_ip_frag_invalidate_socket(sock);
++
++	/* free packets in incoming queue */
++	while ((skb = rtskb_dequeue(&sock->incoming)) != NULL)
++		kfree_rtskb(skb);
++
++	/* ensure that the timer is no longer running */
++	timerwheel_remove_timer_sync(&ts->timer);
++
++	/* free packets in retransmission queue */
++	while ((skb = __rtskb_dequeue(&ts->retransmit_queue)) != NULL)
++		kfree_rtskb(skb);
++}
++
++/***
++ *  rt_tcp_close
++ */
++static void rt_tcp_close(struct rtdm_fd *fd)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++	struct rt_tcp_dispatched_packet_send_cmd send_cmd;
++	rtdm_lockctx_t context;
++	int signal = 0;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	ts->is_closed = 1;
++
++	if (ts->tcp_state == TCP_ESTABLISHED || ts->tcp_state == TCP_SYN_RECV) {
++		/* close() from ESTABLISHED */
++		send_cmd.ts = ts;
++		send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
++		signal = rt_tcp_socket_invalidate(ts, TCP_FIN_WAIT1);
++
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd,
++				   sizeof(send_cmd), NULL, NULL);
++		/* result is ignored */
++
++		/* Give the peer some time to reply to our FIN.
++		   Since it is not relevant what exactly causes the wait
++		   function to return its result is ignored. */
++		wait_for_completion_interruptible_timeout(&ts->fin_handshake,
++					      msecs_to_jiffies(close_timeout));
++	} else if (ts->tcp_state == TCP_CLOSE_WAIT) {
++		/* Send FIN in CLOSE_WAIT */
++		send_cmd.ts = ts;
++		send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
++		signal = rt_tcp_socket_invalidate(ts, TCP_LAST_ACK);
++
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd,
++				   sizeof(send_cmd), NULL, NULL);
++		/* result is ignored */
++
++		/* Give the peer some time to reply to our FIN.
++		   Since it is not relevant what exactly causes the wait
++		   function to return its result is ignored. */
++		wait_for_completion_interruptible_timeout(&ts->fin_handshake,
++					      msecs_to_jiffies(close_timeout));
++	} else {
++		/*
++	  rt_tcp_socket_validate() has not been called at all,
++	  hence socket state is TCP_SYN_SENT or TCP_LISTEN,
++	  or socket is in one of close states,
++	  hence rt_tcp_socket_invalidate() was called,
++	  but close() is called at first time
++	*/
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++	}
++
++	if (signal)
++		rt_tcp_socket_invalidate_signal(ts);
++
++	rt_tcp_socket_destruct(ts);
++
++	rt_socket_cleanup(fd);
++}
++
++/***
++ *  rt_tcp_bind - bind socket to local address
++ *  @s:     socket
++ *  @addr:  local address
++ */
++static int rt_tcp_bind(struct rtdm_fd *fd, struct tcp_socket *ts,
++		       const struct sockaddr __user *addr, socklen_t addrlen)
++{
++	struct sockaddr_in *usin, _usin;
++	rtdm_lockctx_t context;
++	int index;
++	int bound = 0;
++	int ret = 0;
++
++	usin = rtnet_get_arg(fd, &_usin, addr, sizeof(_usin));
++	if (IS_ERR(usin))
++		return PTR_ERR(usin);
++
++	if ((addrlen < (int)sizeof(struct sockaddr_in)) ||
++	    ((usin->sin_port & tcp_auto_port_mask) == tcp_auto_port_start))
++		return -EINVAL;
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++	if (ts->tcp_state != TCP_CLOSE || ts->is_bound || ts->is_binding) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EINVAL;
++	}
++
++	ts->is_binding = 1;
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
++
++	if ((index = ts->sock.prot.inet.reg_index) < 0) {
++		/* socket is destroyed */
++		ret = -EBADF;
++		goto unlock_out;
++	}
++
++	port_hash_del(ts);
++	if (port_hash_insert(ts, usin->sin_addr.s_addr,
++			     usin->sin_port ?: index + tcp_auto_port_start)) {
++		port_hash_insert(ts, ts->saddr, ts->sport);
++
++		ret = -EADDRINUSE;
++		goto unlock_out;
++	}
++
++	bound = 1;
++
++unlock_out:
++	rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++	ts->is_bound = bound;
++	ts->is_binding = 0;
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_connect
++ */
++static int rt_tcp_connect(struct rtdm_fd *fd, struct tcp_socket *ts,
++			  const struct sockaddr __user *serv_addr,
++			  socklen_t addrlen)
++{
++	struct sockaddr_in *usin, _usin;
++	struct dest_route rt;
++	rtdm_lockctx_t context;
++	int ret;
++
++	if (addrlen < (int)sizeof(struct sockaddr_in))
++		return -EINVAL;
++
++	usin = rtnet_get_arg(fd, &_usin, serv_addr, sizeof(_usin));
++	if (IS_ERR(usin))
++		return PTR_ERR(usin);
++
++	if (usin->sin_family != AF_INET)
++		return -EAFNOSUPPORT;
++
++	ret = rt_ip_route_output(&rt, usin->sin_addr.s_addr, ts->saddr);
++	if (ret < 0) {
++		/* no route to host */
++		return -ENETUNREACH;
++	}
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (ts->is_closed) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		ret = -EBADF;
++		goto err_deref;
++	}
++
++	if (ts->tcp_state != TCP_CLOSE || ts->is_binding) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		ret = -EINVAL;
++		goto err_deref;
++	}
++
++	if (ts->rt.rtdev == NULL)
++		memcpy(&ts->rt, &rt, sizeof(rt));
++	else
++		rtdev_dereference(rt.rtdev);
++
++	ts->saddr = rt.rtdev->local_ip;
++
++	ts->daddr = usin->sin_addr.s_addr;
++	ts->dport = usin->sin_port;
++
++	ts->sync.seq = rt_tcp_initial_seq();
++	ts->sync.ack_seq = 0;
++	ts->sync.window = 4096;
++	ts->sync.dst_window = 0;
++
++	ts->tcp_state = TCP_SYN_SENT;
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	/* Complete three-way handshake */
++	ret = rt_tcp_send(ts, TCP_FLAG_SYN);
++	if (ret < 0) {
++		rtdm_printk("rttcp: cann't send SYN\n");
++		return ret;
++	}
++
++	ret = rtdm_event_timedwait(&ts->conn_evt, rt_tcp_connection_timeout,
++				   NULL);
++	if (unlikely(ret < 0))
++		switch (ret) {
++		case -EWOULDBLOCK:
++		case -ETIMEDOUT:
++		case -EINTR:
++			return ret;
++
++		default:
++			return -EBADF;
++		}
++
++	if (ts->tcp_state == TCP_SYN_SENT) {
++		/* received conn_evt, but connection is not established */
++		return -ECONNREFUSED;
++	}
++
++	return ret;
++
++err_deref:
++	rtdev_dereference(rt.rtdev);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_listen
++ */
++static int rt_tcp_listen(struct tcp_socket *ts, unsigned long backlog)
++{
++	int ret;
++	rtdm_lockctx_t context;
++
++	/* Ignore backlog value, maximum number of queued connections is 1 */
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++	if (ts->is_closed) {
++		ret = -EBADF;
++		goto unlock_out;
++	}
++
++	if (ts->tcp_state != TCP_CLOSE || ts->is_binding) {
++		ret = -EINVAL;
++		goto unlock_out;
++	}
++
++	ts->tcp_state = TCP_LISTEN;
++	ret = 0;
++
++unlock_out:
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_accept
++ */
++static int rt_tcp_accept(struct rtdm_fd *fd, struct tcp_socket *ts,
++			 struct sockaddr *addr, socklen_t __user *addrlen)
++{
++	/* Return sockaddr, but bind it with rt_socket_init, so it would be
++       possible to read/write from it in future, return valid file descriptor */
++
++	int ret;
++	socklen_t *uaddrlen, _uaddrlen;
++	struct sockaddr_in sin;
++	nanosecs_rel_t timeout = ts->sock.timeout;
++	rtdm_lockctx_t context;
++	struct dest_route rt;
++
++	uaddrlen = rtnet_get_arg(fd, &_uaddrlen, addrlen, sizeof(_uaddrlen));
++	if (IS_ERR(uaddrlen))
++		return PTR_ERR(uaddrlen);
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++	if (ts->is_accepting || ts->is_accepted) {
++		/* socket is already accepted or is accepting a connection right now */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EALREADY;
++	}
++
++	if (ts->tcp_state != TCP_LISTEN ||
++	    *uaddrlen < sizeof(struct sockaddr_in)) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EINVAL;
++	}
++
++	ts->is_accepting = 1;
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	ret = rtdm_event_timedwait(&ts->conn_evt, timeout, NULL);
++
++	if (unlikely(ret < 0))
++		switch (ret) {
++		case -ETIMEDOUT:
++		case -EINTR:
++			goto err;
++
++		default:
++			ret = -EBADF;
++			goto err;
++		}
++
++	/* accept() reported about connection establishment */
++	ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr);
++	if (ret < 0) {
++		/* strange, no route to host, keep status quo */
++		ret = -EPROTO;
++		goto err;
++	}
++
++	if (addr) {
++		sin.sin_family = AF_INET;
++		sin.sin_port = ts->dport;
++		sin.sin_addr.s_addr = ts->daddr;
++		ret = rtnet_put_arg(fd, addr, &sin, sizeof(sin));
++		if (ret) {
++			rtdev_dereference(rt.rtdev);
++			ret = -EFAULT;
++			goto err;
++		}
++	}
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (ts->tcp_state != TCP_ESTABLISHED) {
++		/* protocol error */
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		rtdev_dereference(rt.rtdev);
++		ret = -EPROTO;
++		goto err;
++	}
++
++	if (ts->rt.rtdev == NULL)
++		memcpy(&ts->rt, &rt, sizeof(rt));
++	else
++		rtdev_dereference(rt.rtdev);
++
++	ts->is_accepted = 1;
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	ret = rtdm_fd_ufd(rt_socket_fd(&ts->sock));
++
++err:
++	/* it is not critical to leave this unlocked
++       due to single entry nature of accept() */
++	ts->is_accepting = 0;
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_shutdown
++ */
++static int rt_tcp_shutdown(struct tcp_socket *ts, unsigned long how)
++{
++	return -EOPNOTSUPP;
++}
++
++/***
++ *  rt_tcp_setsockopt
++ */
++static int rt_tcp_setsockopt(struct rtdm_fd *fd, struct tcp_socket *ts,
++			     int level, int optname, const void *optval,
++			     socklen_t optlen)
++{
++	/* uint64_t val; */
++	struct timeval tv;
++	rtdm_lockctx_t context;
++
++	switch (optname) {
++	case SO_KEEPALIVE:
++		if (optlen < sizeof(unsigned int))
++			return -EINVAL;
++
++		/* commented out, because current implementation transmits
++	       keepalive probes from interrupt context */
++		/*
++	    val = *(unsigned long*)optval;
++
++	    if (val)
++		rt_tcp_keepalive_enable(ts);
++	    else
++		rt_tcp_keepalive_disable(ts);
++	    */
++		return 0;
++
++	case SO_SNDTIMEO:
++		if (optlen < sizeof(tv))
++			return -EINVAL;
++		if (rtdm_copy_from_user(fd, &tv, optval, sizeof(tv)))
++			return -EFAULT;
++		if (tv.tv_usec < 0 || tv.tv_usec >= 1000000)
++			return -EDOM;
++
++		rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++		if (tv.tv_sec < 0) {
++			ts->sk_sndtimeo = RTDM_TIMEOUT_NONE;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			return 0;
++		}
++
++		ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE;
++		if (tv.tv_sec == 0 && tv.tv_usec == 0) {
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			return 0;
++		}
++
++		if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / 1000000000ull - 1))
++			ts->sk_sndtimeo =
++				(tv.tv_sec * 1000000 + tv.tv_usec) * 1000;
++
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++		return 0;
++
++	case SO_REUSEADDR:
++		/* to implement */
++		return -EOPNOTSUPP;
++	}
++
++	return -ENOPROTOOPT;
++}
++
++/***
++ *  rt_tcp_getsockopt
++ */
++static int rt_tcp_getsockopt(struct rtdm_fd *fd, struct tcp_socket *ts,
++			     int level, int optname, void *optval,
++			     socklen_t *optlen)
++{
++	int ret = 0;
++
++	if (*optlen < sizeof(unsigned int))
++		return -EINVAL;
++
++	switch (optname) {
++	case SO_ERROR:
++		ret = 0; /* used in nonblocking connect(), extend later */
++		break;
++
++	default:
++		ret = -ENOPROTOOPT;
++		break;
++	}
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_ioctl
++ */
++static int rt_tcp_ioctl(struct rtdm_fd *fd, unsigned int request,
++			void __user *arg)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++	const struct _rtdm_setsockaddr_args *setaddr;
++	struct _rtdm_setsockaddr_args _setaddr;
++	const struct _rtdm_getsockaddr_args *getaddr;
++	struct _rtdm_getsockaddr_args _getaddr;
++	const struct _rtdm_getsockopt_args *getopt;
++	struct _rtdm_getsockopt_args _getopt;
++	const struct _rtdm_setsockopt_args *setopt;
++	struct _rtdm_setsockopt_args _setopt;
++	int in_rt;
++
++	/* fast path for common socket IOCTLs */
++	if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK)
++		return rt_socket_common_ioctl(fd, request, arg);
++
++	in_rt = rtdm_in_rt_context();
++
++	switch (request) {
++	case _RTIOC_BIND:
++		setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
++		if (IS_ERR(setaddr))
++			return PTR_ERR(setaddr);
++		return rt_tcp_bind(fd, ts, setaddr->addr, setaddr->addrlen);
++	case _RTIOC_CONNECT:
++		if (!in_rt)
++			return -ENOSYS;
++		setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
++		if (IS_ERR(setaddr))
++			return PTR_ERR(setaddr);
++		return rt_tcp_connect(fd, ts, setaddr->addr, setaddr->addrlen);
++
++	case _RTIOC_LISTEN:
++		return rt_tcp_listen(ts, (unsigned long)arg);
++
++	case _RTIOC_ACCEPT:
++		if (!in_rt)
++			return -ENOSYS;
++		getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr));
++		if (IS_ERR(getaddr))
++			return PTR_ERR(getaddr);
++		return rt_tcp_accept(fd, ts, getaddr->addr, getaddr->addrlen);
++
++	case _RTIOC_SHUTDOWN:
++		return rt_tcp_shutdown(ts, (unsigned long)arg);
++
++	case _RTIOC_SETSOCKOPT:
++		setopt = rtnet_get_arg(fd, &_setopt, arg, sizeof(_setopt));
++		if (IS_ERR(setopt))
++			return PTR_ERR(setopt);
++
++		if (setopt->level != SOL_SOCKET)
++			break;
++
++		return rt_tcp_setsockopt(fd, ts, setopt->level, setopt->optname,
++					 setopt->optval, setopt->optlen);
++
++	case _RTIOC_GETSOCKOPT:
++		getopt = rtnet_get_arg(fd, &_getopt, arg, sizeof(_getopt));
++		if (IS_ERR(getopt))
++			return PTR_ERR(getopt);
++
++		if (getopt->level != SOL_SOCKET)
++			break;
++
++		return rt_tcp_getsockopt(fd, ts, getopt->level, getopt->optname,
++					 getopt->optval, getopt->optlen);
++	default:
++		break;
++	}
++
++	return rt_ip_ioctl(fd, request, arg);
++}
++
++/***
++ *  rt_tcp_read
++ */
++static ssize_t rt_tcp_read(struct rtdm_fd *fd, void *buf, size_t nbyte)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++	struct rtsocket *sock = &ts->sock;
++
++	struct rtskb *skb;
++	struct rtskb *first_skb;
++	nanosecs_rel_t timeout = sock->timeout;
++	size_t data_len;
++	size_t th_len;
++	size_t copied = 0;
++	size_t block_size;
++	u8 *user_buf = buf;
++	int ret;
++	rtdm_lockctx_t context;
++
++	rtdm_toseq_t timeout_seq;
++
++	if (!rtdm_fd_is_user(fd)) {
++		return -EFAULT;
++	}
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	if (ts->is_closed) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EBADF;
++	}
++
++	if (!ts->is_valid) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return 0;
++	}
++
++	if (ts->tcp_state != TCP_ESTABLISHED &&
++	    ts->tcp_state != TCP_FIN_WAIT2) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EINVAL;
++	}
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	rtdm_toseq_init(&timeout_seq, timeout);
++
++	while (copied < nbyte) {
++		ret = rtdm_sem_timeddown(&ts->sock.pending_sem, timeout,
++					 &timeout_seq);
++
++		if (unlikely(ret < 0))
++			switch (ret) {
++			case -EWOULDBLOCK:
++			case -ETIMEDOUT:
++			case -EINTR:
++				return (copied ? copied : ret);
++
++			case -EIDRM: /* event is destroyed */
++				if (ts->is_closed)
++					return -EBADF;
++
++				return copied;
++
++			default:
++				if (ts->is_closed) {
++					return -EBADF;
++				}
++
++				return 0;
++			}
++
++		skb = rtskb_dequeue_chain(&sock->incoming);
++		RTNET_ASSERT(skb != NULL, return -EFAULT;);
++
++		th_len = (skb->h.th->doff) << 2;
++
++		data_len = skb->len - th_len;
++
++		__rtskb_pull(skb, th_len);
++
++		first_skb = skb;
++
++		/* iterate over all IP fragments */
++	iterate_fragments:
++		block_size = skb->len;
++		copied += block_size;
++		data_len -= block_size;
++
++		if (copied > nbyte) {
++			block_size -= copied - nbyte;
++			copied = nbyte;
++
++			if (rtdm_copy_to_user(fd, user_buf, skb->data,
++					      block_size)) {
++				kfree_rtskb(first_skb); /* or store the data? */
++				return -EFAULT;
++			}
++			rtdm_lock_get_irqsave(&ts->socket_lock, context);
++			if (ts->sync.window) {
++				ts->sync.window += block_size;
++				rtdm_lock_put_irqrestore(&ts->socket_lock,
++							 context);
++			} else {
++				ts->sync.window = block_size;
++				rtdm_lock_put_irqrestore(&ts->socket_lock,
++							 context);
++				rt_tcp_send(ts,
++					    TCP_FLAG_ACK); /* window update */
++			}
++
++			__rtskb_pull(skb, block_size);
++			__rtskb_push(first_skb, sizeof(struct tcphdr));
++			first_skb->h.th->doff = 5;
++			rtskb_queue_head(&sock->incoming, first_skb);
++			rtdm_sem_up(&ts->sock.pending_sem);
++
++			return copied;
++		}
++
++		if (rtdm_copy_to_user(fd, user_buf, skb->data, block_size)) {
++			kfree_rtskb(first_skb); /* or store the data? */
++			return -EFAULT;
++		}
++		rtdm_lock_get_irqsave(&ts->socket_lock, context);
++		if (ts->sync.window) {
++			ts->sync.window += block_size;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		} else {
++			ts->sync.window = block_size;
++			rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++			rt_tcp_send(ts, TCP_FLAG_ACK); /* window update */
++		}
++
++		if ((skb = skb->next) != NULL) {
++			user_buf += data_len;
++			goto iterate_fragments;
++		}
++
++		kfree_rtskb(first_skb);
++	}
++
++	return copied;
++}
++
++/***
++ *  rt_tcp_write
++ */
++static ssize_t rt_tcp_write(struct rtdm_fd *fd, const void __user *user_buf,
++			    size_t nbyte)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++	uint32_t sent_len = 0;
++	rtdm_lockctx_t context;
++	int ret = 0;
++	nanosecs_rel_t sk_sndtimeo;
++	void *buf;
++
++	if (!rtdm_fd_is_user(fd)) {
++		return -EFAULT;
++	}
++
++	rtdm_lock_get_irqsave(&ts->socket_lock, context);
++
++	sk_sndtimeo = ts->sk_sndtimeo;
++
++	if (!ts->is_valid) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EPIPE;
++	}
++
++	if ((ts->daddr | ts->dport) == 0 || ts->tcp_state != TCP_ESTABLISHED) {
++		rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++		return -EINVAL;
++	}
++
++	rtdm_lock_put_irqrestore(&ts->socket_lock, context);
++
++	buf = xnmalloc(nbyte);
++	if (buf == NULL)
++		return -ENOMEM;
++
++	ret = rtdm_copy_from_user(fd, buf, user_buf, nbyte);
++	if (ret) {
++		xnfree(buf);
++		return ret;
++	}
++
++	while (sent_len < nbyte) {
++		ret = rtdm_event_timedwait(&ts->send_evt, sk_sndtimeo, NULL);
++
++		if (unlikely(ret < 0))
++			switch (ret) {
++			case -EWOULDBLOCK:
++			case -ETIMEDOUT:
++			case -EINTR:
++				xnfree(buf);
++				return sent_len ?: ret;
++
++			case -EIDRM: /* event is destroyed */
++			default:
++				if (ts->is_closed) {
++					xnfree(buf);
++					return -EBADF;
++				}
++
++				xnfree(buf);
++				return sent_len ?: ret;
++			}
++
++		ret = rt_tcp_window_send(ts, nbyte - sent_len,
++					 ((u8 *)buf) + sent_len);
++
++		if (ret < 0) { /* check this branch correctness */
++			rtdm_event_signal(&ts->send_evt);
++			break;
++		}
++
++		sent_len += ret;
++		if (ts->sync.dst_window)
++			rtdm_event_signal(&ts->send_evt);
++	}
++
++	xnfree(buf);
++	return (ret < 0 ? ret : sent_len);
++}
++
++/***
++ *  rt_tcp_recvmsg
++ */
++static ssize_t rt_tcp_recvmsg(struct rtdm_fd *fd, struct user_msghdr *msg,
++			      int msg_flags)
++{
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct user_msghdr _msg;
++	ssize_t ret;
++	size_t len;
++	void *buf;
++
++	if (msg_flags)
++		return -EOPNOTSUPP;
++
++	msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	/* loop over all vectors to be implemented */
++	if (msg->msg_iovlen != 1)
++		return -EOPNOTSUPP;
++
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	len = iov[0].iov_len;
++	if (len > 0) {
++		buf = iov[0].iov_base;
++		ret = rt_tcp_read(fd, buf, len);
++	}
++
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_sendmsg
++ */
++static ssize_t rt_tcp_sendmsg(struct rtdm_fd *fd, const struct user_msghdr *msg,
++			      int msg_flags)
++{
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct user_msghdr _msg;
++	ssize_t ret;
++	size_t len;
++
++	if (msg_flags)
++		return -EOPNOTSUPP;
++
++	msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	/* loop over all vectors to be implemented */
++	if (msg->msg_iovlen != 1)
++		return -EOPNOTSUPP;
++
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	len = iov[0].iov_len;
++	if (len > 0)
++		ret = rt_tcp_write(fd, iov[0].iov_base, len);
++
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_select
++ */
++static int rt_tcp_select(struct rtdm_fd *fd, rtdm_selector_t *selector,
++			 enum rtdm_selecttype type, unsigned fd_index)
++{
++	struct tcp_socket *ts = rtdm_fd_to_private(fd);
++
++	switch (type) {
++	case XNSELECT_READ:
++		return rtdm_sem_select(&ts->sock.pending_sem, selector,
++				       XNSELECT_READ, fd_index);
++	case XNSELECT_WRITE:
++		return rtdm_event_select(&ts->send_evt, selector,
++					 XNSELECT_WRITE, fd_index);
++	default:
++		return -EBADF;
++	}
++
++	return -EINVAL;
++}
++
++/***
++ *  TCP-Initialisation
++ */
++static struct rtinet_protocol tcp_protocol = { .protocol = IPPROTO_TCP,
++					       .dest_socket =
++						       &rt_tcp_dest_socket,
++					       .rcv_handler = &rt_tcp_rcv,
++					       .err_handler = &rt_tcp_rcv_err,
++					       .init_socket = &rt_tcp_socket };
++
++static struct rtdm_driver tcp_driver = {
++    .profile_info =     RTDM_PROFILE_INFO(tcp,
++					RTDM_CLASS_NETWORK,
++					RTDM_SUBCLASS_RTNET,
++					RTNET_RTDM_VER),
++    .device_flags =     RTDM_PROTOCOL_DEVICE,
++    .device_count =	1,
++    .context_size =     sizeof(struct tcp_socket),
++
++    .protocol_family =  PF_INET,
++    .socket_type =      SOCK_STREAM,
++
++    .ops = {
++	.socket     =   rt_inet_socket,
++	.close      =   rt_tcp_close,
++	.ioctl_rt   =   rt_tcp_ioctl,
++	.ioctl_nrt  =   rt_tcp_ioctl,
++	.read_rt    =   rt_tcp_read,
++	.write_rt   =   rt_tcp_write,
++	.recvmsg_rt =   rt_tcp_recvmsg,
++	.sendmsg_rt =   rt_tcp_sendmsg,
++	.select     =   rt_tcp_select,
++    },
++};
++
++static struct rtdm_device tcp_device = {
++	.driver = &tcp_driver,
++	.label = "tcp",
++};
++
++#ifdef CONFIG_XENO_OPT_VFILE
++/***
++ *  rt_tcp_proc_read
++ */
++static inline char *rt_tcp_string_of_state(u8 state)
++{
++	switch (state) {
++	case TCP_ESTABLISHED:
++		return "ESTABLISHED";
++	case TCP_SYN_SENT:
++		return "SYN_SENT";
++	case TCP_SYN_RECV:
++		return "SYN_RECV";
++	case TCP_FIN_WAIT1:
++		return "FIN_WAIT1";
++	case TCP_FIN_WAIT2:
++		return "FIN_WAIT2";
++	case TCP_TIME_WAIT:
++		return "TIME_WAIT";
++	case TCP_CLOSE:
++		return "CLOSE";
++	case TCP_CLOSE_WAIT:
++		return "CLOSE_WAIT";
++	case TCP_LAST_ACK:
++		return "LASK_ACK";
++	case TCP_LISTEN:
++		return "LISTEN";
++	case TCP_CLOSING:
++		return "CLOSING";
++	default:
++		return "UNKNOWN";
++	}
++}
++
++static int rtnet_ipv4_tcp_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	rtdm_lockctx_t context;
++	struct tcp_socket *ts;
++	u32 saddr, daddr;
++	u16 sport = 0, dport = 0; /* set to 0 to silence compiler */
++	char sbuffer[24];
++	char dbuffer[24];
++	int state;
++	int index;
++
++	xnvfile_printf(it, "Hash    Local Address           "
++			   "Foreign Address         State\n");
++
++	for (index = 0; index < RT_TCP_SOCKETS; index++) {
++		rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
++
++		ts = port_registry[index];
++		state = ts ? ts->tcp_state : TCP_CLOSE;
++
++		if (ts && ts->tcp_state != TCP_CLOSE) {
++			saddr = ts->saddr;
++			sport = ts->sport;
++			daddr = ts->daddr;
++			dport = ts->dport;
++		}
++
++		rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
++
++		if (state != TCP_CLOSE) {
++			snprintf(sbuffer, sizeof(sbuffer), "%u.%u.%u.%u:%u",
++				 NIPQUAD(saddr), ntohs(sport));
++			snprintf(dbuffer, sizeof(dbuffer), "%u.%u.%u.%u:%u",
++				 NIPQUAD(daddr), ntohs(dport));
++
++			xnvfile_printf(it, "%04X    %-23s %-23s %s\n",
++				       sport & port_hash_mask, sbuffer, dbuffer,
++				       rt_tcp_string_of_state(state));
++		}
++	}
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_ipv4_tcp_vfile_ops = {
++	.show = rtnet_ipv4_tcp_show,
++};
++
++static struct xnvfile_regular rtnet_ipv4_tcp_vfile = {
++	.ops = &rtnet_ipv4_tcp_vfile_ops,
++};
++
++/***
++ *  rt_tcp_proc_register
++ */
++static int __init rt_tcp_proc_register(void)
++{
++	return xnvfile_init_regular("tcp", &rtnet_ipv4_tcp_vfile,
++				    &ipv4_proc_root);
++}
++
++/***
++ *  rt_tcp_proc_unregister
++ */
++
++static void rt_tcp_proc_unregister(void)
++{
++	xnvfile_destroy_regular(&rtnet_ipv4_tcp_vfile);
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++/***
++ *  rt_tcp_init
++ */
++int __init rt_tcp_init(void)
++{
++	unsigned int skbs;
++	int i;
++	int ret;
++
++	if ((tcp_auto_port_start < 0) ||
++	    (tcp_auto_port_start >= 0x10000 - RT_TCP_SOCKETS))
++		tcp_auto_port_start = 1024;
++	tcp_auto_port_start =
++		htons(tcp_auto_port_start & (tcp_auto_port_mask & 0xFFFF));
++	tcp_auto_port_mask = htons(tcp_auto_port_mask | 0xFFFF0000);
++
++	for (i = 0; i < ARRAY_SIZE(port_hash); i++)
++		INIT_HLIST_HEAD(&port_hash[i]);
++
++	/* Perform essential initialization of the RST|ACK socket */
++	skbs = rt_bare_socket_init(rst_fd, IPPROTO_TCP, RT_TCP_RST_PRIO,
++				   RT_TCP_RST_POOL_SIZE);
++	if (skbs < RT_TCP_RST_POOL_SIZE)
++		printk("rttcp: allocated only %d RST|ACK rtskbs\n", skbs);
++	rst_socket.sock.prot.inet.tos = 0;
++	rst_fd->refs = 1;
++	rtdm_lock_init(&rst_socket.socket_lock);
++
++	/*
++     * 100 ms forwarding timer with 8.38 ms slots
++     */
++	ret = timerwheel_init(100000000ull, 23);
++	if (ret < 0) {
++		rtdm_printk("rttcp: cann't initialize timerwheel task: %d\n",
++			    -ret);
++		goto out_1;
++	}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	if ((ret = rt_tcp_proc_register()) < 0) {
++		rtdm_printk("rttcp: cann't initialize proc entry: %d\n", -ret);
++		goto out_2;
++	}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	rt_inet_add_protocol(&tcp_protocol);
++
++	ret = rtdm_dev_register(&tcp_device);
++	if (ret < 0) {
++		rtdm_printk("rttcp: cann't register RT TCP: %d\n", -ret);
++		goto out_3;
++	}
++
++	return ret;
++
++out_3:
++	rt_inet_del_protocol(&tcp_protocol);
++#ifdef CONFIG_XENO_OPT_VFILE
++	rt_tcp_proc_unregister();
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++out_2:
++	timerwheel_cleanup();
++
++out_1:
++	rt_bare_socket_cleanup(&rst_socket.sock);
++
++	return ret;
++}
++
++/***
++ *  rt_tcp_release
++ */
++void __exit rt_tcp_release(void)
++{
++	rt_inet_del_protocol(&tcp_protocol);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	rt_tcp_proc_unregister();
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	timerwheel_cleanup();
++
++	rt_bare_socket_cleanup(&rst_socket.sock);
++
++	rtdm_dev_unregister(&tcp_device);
++}
++
++module_init(rt_tcp_init);
++module_exit(rt_tcp_release);
+--- linux/drivers/xenomai/net/stack/ipv4/ip_sock.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_sock.c	2021-04-07 16:01:27.049634420 +0800
+@@ -0,0 +1,194 @@
++/***
++ *
++ *  ipv4/ip_sock.c
++ *
++ *  Copyright (C) 2003       Hans-Peter Bock <hpbock@avaapgh.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *                2019       Sebastian Smolorz <sebastian.smolorz@gmx.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/errno.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++
++#include <rtnet_socket.h>
++
++int rt_ip_setsockopt(struct rtdm_fd *fd, struct rtsocket *s, int level,
++		     int optname, const void __user *optval, socklen_t optlen)
++{
++	int err = 0;
++	unsigned int _tos, *tos;
++
++	if (level != SOL_IP)
++		return -ENOPROTOOPT;
++
++	if (optlen < sizeof(unsigned int))
++		return -EINVAL;
++
++	switch (optname) {
++	case IP_TOS:
++		tos = rtnet_get_arg(fd, &_tos, optval, sizeof(_tos));
++		if (IS_ERR(tos))
++			return PTR_ERR(tos);
++		else
++			s->prot.inet.tos = *tos;
++		break;
++
++	default:
++		err = -ENOPROTOOPT;
++		break;
++	}
++
++	return err;
++}
++
++int rt_ip_getsockopt(struct rtdm_fd *fd, struct rtsocket *s, int level,
++		     int optname, void __user *optval, socklen_t __user *optlen)
++{
++	int err = 0;
++	unsigned int tos;
++	socklen_t _len, *len;
++
++	len = rtnet_get_arg(fd, &_len, optlen, sizeof(_len));
++	if (IS_ERR(len))
++		return PTR_ERR(len);
++
++	if (*len < sizeof(unsigned int))
++		return -EINVAL;
++
++	switch (optname) {
++	case IP_TOS:
++		tos = s->prot.inet.tos;
++		err = rtnet_put_arg(fd, optval, &tos, sizeof(tos));
++		if (!err) {
++			*len = sizeof(unsigned int);
++			err = rtnet_put_arg(fd, optlen, len, sizeof(socklen_t));
++		}
++		break;
++
++	default:
++		err = -ENOPROTOOPT;
++		break;
++	}
++
++	return err;
++}
++
++int rt_ip_getsockname(struct rtdm_fd *fd, struct rtsocket *s,
++		      struct sockaddr __user *addr, socklen_t __user *addrlen)
++{
++	struct sockaddr_in _sin;
++	socklen_t *len, _len;
++	int ret;
++
++	len = rtnet_get_arg(fd, &_len, addrlen, sizeof(_len));
++	if (IS_ERR(len))
++		return PTR_ERR(len);
++
++	if (*len < sizeof(struct sockaddr_in))
++		return -EINVAL;
++
++	_sin.sin_family = AF_INET;
++	_sin.sin_addr.s_addr = s->prot.inet.saddr;
++	_sin.sin_port = s->prot.inet.sport;
++	memset(&_sin.sin_zero, 0, sizeof(_sin.sin_zero));
++	ret = rtnet_put_arg(fd, addr, &_sin, sizeof(_sin));
++	if (ret)
++		return ret;
++
++	*len = sizeof(struct sockaddr_in);
++	ret = rtnet_put_arg(fd, addrlen, len, sizeof(socklen_t));
++
++	return ret;
++}
++
++int rt_ip_getpeername(struct rtdm_fd *fd, struct rtsocket *s,
++		      struct sockaddr __user *addr, socklen_t __user *addrlen)
++{
++	struct sockaddr_in _sin;
++	socklen_t *len, _len;
++	int ret;
++
++	len = rtnet_get_arg(fd, &_len, addrlen, sizeof(_len));
++	if (IS_ERR(len))
++		return PTR_ERR(len);
++
++	if (*len < sizeof(struct sockaddr_in))
++		return -EINVAL;
++
++	_sin.sin_family = AF_INET;
++	_sin.sin_addr.s_addr = s->prot.inet.daddr;
++	_sin.sin_port = s->prot.inet.dport;
++	memset(&_sin.sin_zero, 0, sizeof(_sin.sin_zero));
++	ret = rtnet_put_arg(fd, addr, &_sin, sizeof(_sin));
++	if (ret)
++		return ret;
++
++	*len = sizeof(struct sockaddr_in);
++	ret = rtnet_put_arg(fd, addrlen, len, sizeof(socklen_t));
++
++	return ret;
++}
++
++int rt_ip_ioctl(struct rtdm_fd *fd, int request, void __user *arg)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	struct _rtdm_getsockaddr_args _getaddr, *getaddr;
++	struct _rtdm_getsockopt_args _getopt, *getopt;
++	struct _rtdm_setsockopt_args _setopt, *setopt;
++
++	switch (request) {
++	case _RTIOC_SETSOCKOPT:
++		setopt = rtnet_get_arg(fd, &_setopt, arg, sizeof(_setopt));
++		if (IS_ERR(setopt))
++			return PTR_ERR(setopt);
++
++		return rt_ip_setsockopt(fd, sock, setopt->level,
++					setopt->optname, setopt->optval,
++					setopt->optlen);
++
++	case _RTIOC_GETSOCKOPT:
++		getopt = rtnet_get_arg(fd, &_getopt, arg, sizeof(_getopt));
++		if (IS_ERR(getopt))
++			return PTR_ERR(getopt);
++
++		return rt_ip_getsockopt(fd, sock, getopt->level,
++					getopt->optname, getopt->optval,
++					getopt->optlen);
++
++	case _RTIOC_GETSOCKNAME:
++		getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr));
++		if (IS_ERR(getaddr))
++			return PTR_ERR(getaddr);
++
++		return rt_ip_getsockname(fd, sock, getaddr->addr,
++					 getaddr->addrlen);
++
++	case _RTIOC_GETPEERNAME:
++		getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr));
++		if (IS_ERR(getaddr))
++			return PTR_ERR(getaddr);
++
++		return rt_ip_getpeername(fd, sock, getaddr->addr,
++					 getaddr->addrlen);
++
++	default:
++		return rt_socket_if_ioctl(fd, request, arg);
++	}
++}
++EXPORT_SYMBOL_GPL(rt_ip_ioctl);
+--- linux/drivers/xenomai/net/stack/ipv4/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/Makefile	2021-04-07 16:01:27.045634426 +0800
+@@ -0,0 +1,19 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP) += udp/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP) += tcp/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4) += rtipv4.o
++
++rtipv4-y := \
++	route.o \
++	protocol.o \
++	arp.o \
++	af_inet.o \
++	ip_input.o \
++	ip_sock.o \
++	ip_output.o \
++	ip_fragment.o
++
++rtipv4-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP) += icmp.o
+--- linux/drivers/xenomai/net/stack/ipv4/arp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/arp.c	2021-04-07 16:01:27.040634433 +0800
+@@ -0,0 +1,212 @@
++/***
++ *
++ *  ipv4/arp.h - Adress Resolution Protocol for RTnet
++ *
++ *  Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <rtdev.h>
++#include <stack_mgr.h>
++#include <ipv4/arp.h>
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++#include <ipv4/ip_input.h>
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */
++
++/***
++ *  arp_send:   Create and send an arp packet. If (dest_hw == NULL),
++ *              we create a broadcast message.
++ */
++void rt_arp_send(int type, int ptype, u32 dest_ip, struct rtnet_device *rtdev,
++		 u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw,
++		 unsigned char *target_hw)
++{
++	struct rtskb *skb;
++	struct arphdr *arp;
++	unsigned char *arp_ptr;
++
++	if (rtdev->flags & IFF_NOARP)
++		return;
++
++	if (!(skb = alloc_rtskb(sizeof(struct arphdr) +
++					2 * (rtdev->addr_len + 4) +
++					rtdev->hard_header_len + 15,
++				&global_pool)))
++		return;
++
++	rtskb_reserve(skb, (rtdev->hard_header_len + 15) & ~15);
++
++	skb->nh.raw = skb->data;
++	arp = (struct arphdr *)rtskb_put(
++		skb, sizeof(struct arphdr) + 2 * (rtdev->addr_len + 4));
++
++	skb->rtdev = rtdev;
++	skb->protocol = __constant_htons(ETH_P_ARP);
++	skb->priority = RT_ARP_SKB_PRIO;
++	if (src_hw == NULL)
++		src_hw = rtdev->dev_addr;
++	if (dest_hw == NULL)
++		dest_hw = rtdev->broadcast;
++
++	/*
++     *  Fill the device header for the ARP frame
++     */
++	if (rtdev->hard_header &&
++	    (rtdev->hard_header(skb, rtdev, ptype, dest_hw, src_hw, skb->len) <
++	     0))
++		goto out;
++
++	arp->ar_hrd = htons(rtdev->type);
++	arp->ar_pro = __constant_htons(ETH_P_IP);
++	arp->ar_hln = rtdev->addr_len;
++	arp->ar_pln = 4;
++	arp->ar_op = htons(type);
++
++	arp_ptr = (unsigned char *)(arp + 1);
++
++	memcpy(arp_ptr, src_hw, rtdev->addr_len);
++	arp_ptr += rtdev->addr_len;
++
++	memcpy(arp_ptr, &src_ip, 4);
++	arp_ptr += 4;
++
++	if (target_hw != NULL)
++		memcpy(arp_ptr, target_hw, rtdev->addr_len);
++	else
++		memset(arp_ptr, 0, rtdev->addr_len);
++	arp_ptr += rtdev->addr_len;
++
++	memcpy(arp_ptr, &dest_ip, 4);
++
++	/* send the frame */
++	rtdev_xmit(skb);
++
++	return;
++
++out:
++	kfree_rtskb(skb);
++}
++
++/***
++ *  arp_rcv:    Receive an arp request by the device layer.
++ */
++int rt_arp_rcv(struct rtskb *skb, struct rtpacket_type *pt)
++{
++	struct rtnet_device *rtdev = skb->rtdev;
++	struct arphdr *arp = skb->nh.arph;
++	unsigned char *arp_ptr = (unsigned char *)(arp + 1);
++	unsigned char *sha;
++	u32 sip, tip;
++	u16 dev_type = rtdev->type;
++
++	/*
++     *  The hardware length of the packet should match the hardware length
++     *  of the device.  Similarly, the hardware types should match.  The
++     *  device should be ARP-able.  Also, if pln is not 4, then the lookup
++     *  is not from an IP number.  We can't currently handle this, so toss
++     *  it.
++     */
++	if ((arp->ar_hln != rtdev->addr_len) || (rtdev->flags & IFF_NOARP) ||
++	    (skb->pkt_type == PACKET_OTHERHOST) ||
++	    (skb->pkt_type == PACKET_LOOPBACK) || (arp->ar_pln != 4))
++		goto out;
++
++	switch (dev_type) {
++	default:
++		if ((arp->ar_pro != __constant_htons(ETH_P_IP)) &&
++		    (htons(dev_type) != arp->ar_hrd))
++			goto out;
++		break;
++	case ARPHRD_ETHER:
++		/*
++	     * ETHERNET devices will accept ARP hardware types of either
++	     * 1 (Ethernet) or 6 (IEEE 802.2).
++	     */
++		if ((arp->ar_hrd != __constant_htons(ARPHRD_ETHER)) &&
++		    (arp->ar_hrd != __constant_htons(ARPHRD_IEEE802))) {
++			goto out;
++		}
++		if (arp->ar_pro != __constant_htons(ETH_P_IP)) {
++			goto out;
++		}
++		break;
++	}
++
++	/* Understand only these message types */
++	if ((arp->ar_op != __constant_htons(ARPOP_REPLY)) &&
++	    (arp->ar_op != __constant_htons(ARPOP_REQUEST)))
++		goto out;
++
++	/*
++     *  Extract fields
++     */
++	sha = arp_ptr;
++	arp_ptr += rtdev->addr_len;
++	memcpy(&sip, arp_ptr, 4);
++
++	arp_ptr += 4;
++	arp_ptr += rtdev->addr_len;
++	memcpy(&tip, arp_ptr, 4);
++
++	/* process only requests/replies directed to us */
++	if (tip == rtdev->local_ip) {
++		rt_ip_route_add_host(sip, sha, rtdev);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++		if (!rt_ip_fallback_handler)
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */
++			if (arp->ar_op == __constant_htons(ARPOP_REQUEST)) {
++				rt_arp_send(ARPOP_REPLY, ETH_P_ARP, sip, rtdev,
++					    tip, sha, rtdev->dev_addr, sha);
++				goto out1;
++			}
++	}
++
++out:
++#ifdef CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP
++	if (rt_ip_fallback_handler) {
++		rt_ip_fallback_handler(skb);
++		return 0;
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY_ARP */
++out1:
++	kfree_rtskb(skb);
++	return 0;
++}
++
++static struct rtpacket_type arp_packet_type = {
++	type: __constant_htons(ETH_P_ARP),
++	handler: &rt_arp_rcv
++};
++
++/***
++ *  rt_arp_init
++ */
++void __init rt_arp_init(void)
++{
++	rtdev_add_pack(&arp_packet_type);
++}
++
++/***
++ *  rt_arp_release
++ */
++void rt_arp_release(void)
++{
++	rtdev_remove_pack(&arp_packet_type);
++}
+--- linux/drivers/xenomai/net/stack/ipv4/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/Kconfig	2021-04-07 16:01:27.035634441 +0800
+@@ -0,0 +1,75 @@
++config XENO_DRIVERS_NET_RTIPV4
++    depends on XENO_DRIVERS_NET
++    tristate "Real-Time IPv4"
++    default y
++    ---help---
++    Enables the real-time capable IPv4 support of RTnet. The protocol is
++    implemented as a separate module. Supplementing tools (rtroute,
++    rtping) and examples are provided as well. Moreover, RTcfg will
++    include IPv4 support when this option is switched on.
++
++    For further information see also Documentation/README.routing and
++    Documentation/README.ipfragmentation.
++
++config XENO_DRIVERS_NET_RTIPV4_ICMP
++    bool "ICMP support"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    default y
++    ---help---
++    Enables ICMP support of the RTnet Real-Time IPv4 protocol.
++
++    When the RTnet-Proxy is enabled while this feature is disabled, ICMP
++    will be forwarded to the Linux network stack.
++
++config XENO_DRIVERS_NET_RTIPV4_HOST_ROUTES
++    int "Maximum host routing table entries"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    default 32
++    ---help---
++    Each IPv4 supporting interface and each remote host that is directly
++    reachable via via some output interface requires a host routing table
++    entry. If you run larger networks with may hosts per subnet, you may
++    have to increase this limit. Must be power of 2!
++
++config XENO_DRIVERS_NET_RTIPV4_NETROUTING
++    bool "IP Network Routing"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    ---help---
++    Enables routing across IPv4 real-time networks. You will only require
++    this feature in complex networks, while switching it off for flat,
++    single-segment networks improves code size and the worst-case routing
++    decision delay.
++
++    See Documentation/README.routing for further information.
++
++config XENO_DRIVERS_NET_RTIPV4_NET_ROUTES
++    int "Maximum network routing table entries"
++    depends on XENO_DRIVERS_NET_RTIPV4_NETROUTING
++    default 16
++    ---help---
++    Each route describing a target network reachable via a router
++    requires an entry in the network routing table. If you run very
++    complex realtime networks, you may have to increase this limit. Must
++    be power of 2!
++
++config XENO_DRIVERS_NET_RTIPV4_ROUTER
++    bool "IP Router"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    ---help---
++    When switched on, the RTnet station will be able to forward IPv4
++    packets that are not directed to the station itself. Typically used in
++    combination with CONFIG_RTNET_RTIPV4_NETROUTING.
++
++    See Documentation/README.routing for further information.
++
++config XENO_DRIVERS_NET_RTIPV4_DEBUG
++    bool "RTipv4 Debugging"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    default n
++    
++    ---help---
++    Enables debug message output of the RTipv4 layer. Typically, you
++    may want to turn this on for tracing issues in packet delivery.
++
++source "drivers/xenomai/net/stack/ipv4/udp/Kconfig"
++source "drivers/xenomai/net/stack/ipv4/tcp/Kconfig"
+--- linux/drivers/xenomai/net/stack/ipv4/ip_output.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_output.c	2021-04-07 16:01:27.031634446 +0800
+@@ -0,0 +1,267 @@
++/***
++ *
++ *  ipv4/ip_output.c - prepare outgoing IP packets
++ *
++ *  Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/ip.h>
++#include <net/checksum.h>
++#include <net/ip.h>
++
++#include <rtnet_socket.h>
++#include <stack_mgr.h>
++#include <ipv4/ip_fragment.h>
++#include <ipv4/ip_input.h>
++#include <ipv4/route.h>
++
++static DEFINE_RTDM_LOCK(rt_ip_id_lock);
++static u16 rt_ip_id_count = 0;
++
++/***
++ *  Slow path for fragmented packets
++ */
++int rt_ip_build_xmit_slow(struct rtsocket *sk,
++			  int getfrag(const void *, char *, unsigned int,
++				      unsigned int),
++			  const void *frag, unsigned length,
++			  struct dest_route *rt, int msg_flags,
++			  unsigned int mtu, unsigned int prio)
++{
++	int err, next_err;
++	struct rtskb *skb;
++	struct rtskb *next_skb;
++	struct iphdr *iph;
++	struct rtnet_device *rtdev = rt->rtdev;
++	unsigned int fragdatalen;
++	unsigned int offset = 0;
++	u16 msg_rt_ip_id;
++	rtdm_lockctx_t context;
++	unsigned int rtskb_size;
++	int hh_len = (rtdev->hard_header_len + 15) & ~15;
++
++#define FRAGHEADERLEN sizeof(struct iphdr)
++
++	fragdatalen = ((mtu - FRAGHEADERLEN) & ~7);
++
++	/* Store id in local variable */
++	rtdm_lock_get_irqsave(&rt_ip_id_lock, context);
++	msg_rt_ip_id = rt_ip_id_count++;
++	rtdm_lock_put_irqrestore(&rt_ip_id_lock, context);
++
++	rtskb_size = mtu + hh_len + 15;
++
++	/* TODO: delay previous skb until ALL errors are catched which may occure
++	     during next skb setup */
++
++	/* Preallocate first rtskb */
++	skb = alloc_rtskb(rtskb_size, &sk->skb_pool);
++	if (skb == NULL)
++		return -ENOBUFS;
++
++	for (offset = 0; offset < length; offset += fragdatalen) {
++		int fraglen; /* The length (IP, including ip-header) of this
++			very fragment */
++		__u16 frag_off = offset >> 3;
++
++		next_err = 0;
++		if (offset >= length - fragdatalen) {
++			/* last fragment */
++			fraglen = FRAGHEADERLEN + length - offset;
++			next_skb = NULL;
++		} else {
++			fraglen = FRAGHEADERLEN + fragdatalen;
++			frag_off |= IP_MF;
++
++			next_skb = alloc_rtskb(rtskb_size, &sk->skb_pool);
++			if (next_skb == NULL) {
++				frag_off &= ~IP_MF; /* cut the chain */
++				next_err = -ENOBUFS;
++			}
++		}
++
++		rtskb_reserve(skb, hh_len);
++
++		skb->rtdev = rtdev;
++		skb->nh.iph = iph = (struct iphdr *)rtskb_put(skb, fraglen);
++		skb->priority = prio;
++
++		iph->version = 4;
++		iph->ihl = 5; /* 20 byte header - no options */
++		iph->tos = sk->prot.inet.tos;
++		iph->tot_len = htons(fraglen);
++		iph->id = htons(msg_rt_ip_id);
++		iph->frag_off = htons(frag_off);
++		iph->ttl = 255;
++		iph->protocol = sk->protocol;
++		iph->saddr = rtdev->local_ip;
++		iph->daddr = rt->ip;
++		iph->check = 0; /* required! */
++		iph->check = ip_fast_csum((unsigned char *)iph, 5 /*iph->ihl*/);
++
++		if ((err = getfrag(frag, ((char *)iph) + 5 /*iph->ihl*/ * 4,
++				   offset, fraglen - FRAGHEADERLEN)))
++			goto error;
++
++		if (rtdev->hard_header) {
++			err = rtdev->hard_header(skb, rtdev, ETH_P_IP,
++						 rt->dev_addr, rtdev->dev_addr,
++						 skb->len);
++			if (err < 0)
++				goto error;
++		}
++
++		err = rtdev_xmit(skb);
++
++		skb = next_skb;
++
++		if (err != 0) {
++			err = -EAGAIN;
++			goto error;
++		}
++
++		if (next_err != 0)
++			return next_err;
++	}
++	return 0;
++
++error:
++	if (skb != NULL) {
++		kfree_rtskb(skb);
++
++		if (next_skb != NULL)
++			kfree_rtskb(next_skb);
++	}
++	return err;
++}
++
++/***
++ *  Fast path for unfragmented packets.
++ */
++int rt_ip_build_xmit(struct rtsocket *sk,
++		     int getfrag(const void *, char *, unsigned int,
++				 unsigned int),
++		     const void *frag, unsigned length, struct dest_route *rt,
++		     int msg_flags)
++{
++	int err = 0;
++	struct rtskb *skb;
++	struct iphdr *iph;
++	int hh_len;
++	u16 msg_rt_ip_id;
++	rtdm_lockctx_t context;
++	struct rtnet_device *rtdev = rt->rtdev;
++	unsigned int prio;
++	unsigned int mtu;
++
++	/* sk->priority may encode both priority and output channel. Make sure
++       we use a consitent value, also for the MTU which is derived from the
++       channel. */
++	prio = (volatile unsigned int)sk->priority;
++	mtu = rtdev->get_mtu(rtdev, prio);
++
++	/*
++     *  Try the simple case first. This leaves fragmented frames, and by choice
++     *  RAW frames within 20 bytes of maximum size(rare) to the long path
++     */
++	length += sizeof(struct iphdr);
++
++	if (length > mtu)
++		return rt_ip_build_xmit_slow(sk, getfrag, frag,
++					     length - sizeof(struct iphdr), rt,
++					     msg_flags, mtu, prio);
++
++	/* Store id in local variable */
++	rtdm_lock_get_irqsave(&rt_ip_id_lock, context);
++	msg_rt_ip_id = rt_ip_id_count++;
++	rtdm_lock_put_irqrestore(&rt_ip_id_lock, context);
++
++	hh_len = (rtdev->hard_header_len + 15) & ~15;
++
++	skb = alloc_rtskb(length + hh_len + 15, &sk->skb_pool);
++	if (skb == NULL)
++		return -ENOBUFS;
++
++	rtskb_reserve(skb, hh_len);
++
++	skb->rtdev = rtdev;
++	skb->nh.iph = iph = (struct iphdr *)rtskb_put(skb, length);
++	skb->priority = prio;
++
++	iph->version = 4;
++	iph->ihl = 5;
++	iph->tos = sk->prot.inet.tos;
++	iph->tot_len = htons(length);
++	iph->id = htons(msg_rt_ip_id);
++	iph->frag_off = htons(IP_DF);
++	iph->ttl = 255;
++	iph->protocol = sk->protocol;
++	iph->saddr = rtdev->local_ip;
++	iph->daddr = rt->ip;
++	iph->check = 0; /* required! */
++	iph->check = ip_fast_csum((unsigned char *)iph, 5 /*iph->ihl*/);
++
++	if ((err = getfrag(frag, ((char *)iph) + 5 /*iph->ihl*/ * 4, 0,
++			   length - 5 /*iph->ihl*/ * 4)))
++		goto error;
++
++	if (rtdev->hard_header) {
++		err = rtdev->hard_header(skb, rtdev, ETH_P_IP, rt->dev_addr,
++					 rtdev->dev_addr, skb->len);
++		if (err < 0)
++			goto error;
++	}
++
++	err = rtdev_xmit(skb);
++
++	if (err)
++		return -EAGAIN;
++	else
++		return 0;
++
++error:
++	kfree_rtskb(skb);
++	return err;
++}
++EXPORT_SYMBOL_GPL(rt_ip_build_xmit);
++
++/***
++ *  IP protocol layer initialiser
++ */
++static struct rtpacket_type ip_packet_type = { .type = __constant_htons(
++						       ETH_P_IP),
++					       .handler = &rt_ip_rcv };
++
++/***
++ *  ip_init
++ */
++void __init rt_ip_init(void)
++{
++	rtdev_add_pack(&ip_packet_type);
++	rt_ip_fragment_init();
++}
++
++/***
++ *  ip_release
++ */
++void rt_ip_release(void)
++{
++	rtdev_remove_pack(&ip_packet_type);
++	rt_ip_fragment_cleanup();
++}
+--- linux/drivers/xenomai/net/stack/ipv4/ip_fragment.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_fragment.c	2021-04-07 16:01:27.026634453 +0800
+@@ -0,0 +1,327 @@
++/* ip_fragment.c
++ *
++ * Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003      Mathias Koehrer <mathias_koehrer@yahoo.de>
++ *               2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/module.h>
++#include <net/checksum.h>
++#include <net/ip.h>
++
++#include <rtdev.h>
++#include <rtnet_internal.h>
++#include <rtnet_socket.h>
++
++#include <linux/ip.h>
++#include <linux/in.h>
++
++#include <ipv4/ip_fragment.h>
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++#include <ipv4/ip_input.h>
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */
++
++/*
++ * This defined sets the number of incoming fragmented IP messages that
++ * can be handled in parallel.
++ */
++#define COLLECTOR_COUNT 10
++
++struct ip_collector {
++	int in_use;
++	__u32 saddr;
++	__u32 daddr;
++	__u16 id;
++	__u8 protocol;
++
++	struct rtskb_queue frags;
++	struct rtsocket *sock;
++	unsigned int buf_size;
++};
++
++static struct ip_collector collector[COLLECTOR_COUNT];
++
++static void alloc_collector(struct rtskb *skb, struct rtsocket *sock)
++{
++	int i;
++	rtdm_lockctx_t context;
++	struct ip_collector *p_coll;
++	struct iphdr *iph = skb->nh.iph;
++
++	/*
++     * Find a free collector
++     *
++     * Note: We once used to clean up probably outdated chains, but the
++     * algorithm was not stable enough and could cause incorrect drops even
++     * under medium load. If we run in overload, we will loose data anyhow.
++     * What we should do in the future is to account collectors per socket or
++     * socket owner and set quotations.
++     * Garbage collection is now performed only on socket close.
++     */
++	for (i = 0; i < COLLECTOR_COUNT; i++) {
++		p_coll = &collector[i];
++		rtdm_lock_get_irqsave(&p_coll->frags.lock, context);
++
++		if (!p_coll->in_use) {
++			p_coll->in_use = 1;
++			p_coll->buf_size = skb->len;
++			p_coll->frags.first = skb;
++			p_coll->frags.last = skb;
++			p_coll->saddr = iph->saddr;
++			p_coll->daddr = iph->daddr;
++			p_coll->id = iph->id;
++			p_coll->protocol = iph->protocol;
++			p_coll->sock = sock;
++
++			rtdm_lock_put_irqrestore(&p_coll->frags.lock, context);
++
++			return;
++		}
++
++		rtdm_lock_put_irqrestore(&p_coll->frags.lock, context);
++	}
++
++	rtdm_printk("RTnet: IP fragmentation - no collector available\n");
++	kfree_rtskb(skb);
++}
++
++/*
++ * Return a pointer to the collector that holds the message which
++ * fits to the iphdr of the passed rtskb.
++ * */
++static struct rtskb *add_to_collector(struct rtskb *skb, unsigned int offset,
++				      int more_frags)
++{
++	int i, err;
++	rtdm_lockctx_t context;
++	struct ip_collector *p_coll;
++	struct iphdr *iph = skb->nh.iph;
++	struct rtskb *first_skb;
++
++	/* Search in existing collectors */
++	for (i = 0; i < COLLECTOR_COUNT; i++) {
++		p_coll = &collector[i];
++		rtdm_lock_get_irqsave(&p_coll->frags.lock, context);
++
++		if (p_coll->in_use && (iph->saddr == p_coll->saddr) &&
++		    (iph->daddr == p_coll->daddr) && (iph->id == p_coll->id) &&
++		    (iph->protocol == p_coll->protocol)) {
++			first_skb = p_coll->frags.first;
++
++			/* Acquire the rtskb at the expense of the protocol pool */
++			if (rtskb_acquire(skb, &p_coll->sock->skb_pool) != 0) {
++				/* We have to drop this fragment => clean up the whole chain */
++				p_coll->in_use = 0;
++
++				rtdm_lock_put_irqrestore(&p_coll->frags.lock,
++							 context);
++
++#ifdef FRAG_DBG
++				rtdm_printk(
++					"RTnet: Compensation pool empty - IP fragments "
++					"dropped (saddr:%x, daddr:%x)\n",
++					iph->saddr, iph->daddr);
++#endif
++
++				kfree_rtskb(first_skb);
++				kfree_rtskb(skb);
++				return NULL;
++			}
++
++			/* Optimized version of __rtskb_queue_tail */
++			skb->next = NULL;
++			p_coll->frags.last->next = skb;
++			p_coll->frags.last = skb;
++
++			/* Extend the chain */
++			first_skb->chain_end = skb;
++
++			/* Sanity check: unordered fragments are not allowed! */
++			if (offset != p_coll->buf_size) {
++				/* We have to drop this fragment => clean up the whole chain */
++				p_coll->in_use = 0;
++				skb = first_skb;
++
++				rtdm_lock_put_irqrestore(&p_coll->frags.lock,
++							 context);
++				break; /* leave the for loop */
++			}
++
++			p_coll->buf_size += skb->len;
++
++			if (!more_frags) {
++				p_coll->in_use = 0;
++
++				err = rt_socket_reference(p_coll->sock);
++
++				rtdm_lock_put_irqrestore(&p_coll->frags.lock,
++							 context);
++
++				if (err < 0) {
++					kfree_rtskb(first_skb);
++					return NULL;
++				}
++
++				return first_skb;
++			} else {
++				rtdm_lock_put_irqrestore(&p_coll->frags.lock,
++							 context);
++				return NULL;
++			}
++		}
++
++		rtdm_lock_put_irqrestore(&p_coll->frags.lock, context);
++	}
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++	if (rt_ip_fallback_handler) {
++		__rtskb_push(skb, iph->ihl * 4);
++		rt_ip_fallback_handler(skb);
++		return NULL;
++	}
++#endif
++
++#ifdef FRAG_DBG
++	rtdm_printk("RTnet: Unordered IP fragment (saddr:%x, daddr:%x)"
++		    " - dropped\n",
++		    iph->saddr, iph->daddr);
++#endif
++
++	kfree_rtskb(skb);
++	return NULL;
++}
++
++/*
++ * Cleans up all collectors referring to the specified socket.
++ * This is now the only kind of garbage collection we do.
++ */
++void rt_ip_frag_invalidate_socket(struct rtsocket *sock)
++{
++	int i;
++	rtdm_lockctx_t context;
++	struct ip_collector *p_coll;
++
++	for (i = 0; i < COLLECTOR_COUNT; i++) {
++		p_coll = &collector[i];
++		rtdm_lock_get_irqsave(&p_coll->frags.lock, context);
++
++		if ((p_coll->in_use) && (p_coll->sock == sock)) {
++			p_coll->in_use = 0;
++			kfree_rtskb(p_coll->frags.first);
++		}
++
++		rtdm_lock_put_irqrestore(&p_coll->frags.lock, context);
++	}
++}
++EXPORT_SYMBOL_GPL(rt_ip_frag_invalidate_socket);
++
++/*
++ * Cleans up all existing collectors
++ */
++static void cleanup_all_collectors(void)
++{
++	int i;
++	rtdm_lockctx_t context;
++	struct ip_collector *p_coll;
++
++	for (i = 0; i < COLLECTOR_COUNT; i++) {
++		p_coll = &collector[i];
++		rtdm_lock_get_irqsave(&p_coll->frags.lock, context);
++
++		if (p_coll->in_use) {
++			p_coll->in_use = 0;
++			kfree_rtskb(p_coll->frags.first);
++		}
++
++		rtdm_lock_put_irqrestore(&p_coll->frags.lock, context);
++	}
++}
++
++/*
++ * This function returns an rtskb that contains the complete, accumulated IP message.
++ * If not all fragments of the IP message have been received yet, it returns NULL
++ * Note: the IP header must have already been pulled from the rtskb!
++ * */
++struct rtskb *rt_ip_defrag(struct rtskb *skb, struct rtinet_protocol *ipprot)
++{
++	unsigned int more_frags;
++	unsigned int offset;
++	struct rtsocket *sock;
++	struct iphdr *iph = skb->nh.iph;
++	int ret;
++
++	/* Parse the IP header */
++	offset = ntohs(iph->frag_off);
++	more_frags = offset & IP_MF;
++	offset &= IP_OFFSET;
++	offset <<= 3; /* offset is in 8-byte chunks */
++
++	/* First fragment? */
++	if (offset == 0) {
++		/* Get the destination socket */
++		if ((sock = ipprot->dest_socket(skb)) == NULL) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++			if (rt_ip_fallback_handler) {
++				__rtskb_push(skb, iph->ihl * 4);
++				rt_ip_fallback_handler(skb);
++				return NULL;
++			}
++#endif
++			/* Drop the rtskb */
++			kfree_rtskb(skb);
++			return NULL;
++		}
++
++		/* Acquire the rtskb, to unlock the device skb pool */
++		ret = rtskb_acquire(skb, &sock->skb_pool);
++
++		if (ret != 0) {
++			/* Drop the rtskb */
++			kfree_rtskb(skb);
++		} else {
++			/* Allocates a new collector */
++			alloc_collector(skb, sock);
++		}
++
++		/* Packet is queued or freed, socket can be released */
++		rt_socket_dereference(sock);
++
++		return NULL;
++	} else {
++		/* Add to an existing collector */
++		return add_to_collector(skb, offset, more_frags);
++	}
++}
++
++int __init rt_ip_fragment_init(void)
++{
++	int i;
++
++	/* Probably not needed (static variable...) */
++	memset(collector, 0, sizeof(collector));
++
++	for (i = 0; i < COLLECTOR_COUNT; i++)
++		rtdm_lock_init(&collector[i].frags.lock);
++
++	return 0;
++}
++
++void rt_ip_fragment_cleanup(void)
++{
++	cleanup_all_collectors();
++}
+--- linux/drivers/xenomai/net/stack/ipv4/ip_input.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/ip_input.c	2021-04-07 16:01:27.021634460 +0800
+@@ -0,0 +1,159 @@
++/***
++ *
++ *  ipv4/ip_input.c - process incoming IP packets
++ *
++ *  Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <net/checksum.h>
++#include <net/ip.h>
++
++#include <rtskb.h>
++#include <rtnet_socket.h>
++#include <stack_mgr.h>
++#include <ipv4/ip_fragment.h>
++#include <ipv4/protocol.h>
++#include <ipv4/route.h>
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++#include <ipv4/ip_input.h>
++
++rt_ip_fallback_handler_t rt_ip_fallback_handler = NULL;
++EXPORT_SYMBOL_GPL(rt_ip_fallback_handler);
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */
++
++/***
++ *  rt_ip_local_deliver
++ */
++static inline void rt_ip_local_deliver(struct rtskb *skb)
++{
++	struct iphdr *iph = skb->nh.iph;
++	unsigned short protocol = iph->protocol;
++	struct rtinet_protocol *ipprot;
++	struct rtsocket *sock;
++	int err;
++
++	ipprot = rt_inet_protocols[rt_inet_hashkey(protocol)];
++
++	/* Check if we are supporting the protocol */
++	if ((ipprot != NULL) && (ipprot->protocol == protocol)) {
++		__rtskb_pull(skb, iph->ihl * 4);
++
++		/* Point into the IP datagram, just past the header. */
++		skb->h.raw = skb->data;
++
++		/* Reassemble IP fragments */
++		if (iph->frag_off & htons(IP_MF | IP_OFFSET)) {
++			skb = rt_ip_defrag(skb, ipprot);
++			if (!skb)
++				return;
++
++			sock = skb->sk;
++		} else {
++			/* Get the destination socket */
++			if ((sock = ipprot->dest_socket(skb)) == NULL) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++				if (rt_ip_fallback_handler) {
++					__rtskb_push(skb, iph->ihl * 4);
++					rt_ip_fallback_handler(skb);
++					return;
++				}
++#endif
++				kfree_rtskb(skb);
++				return;
++			}
++
++			/* Acquire the rtskb, to unlock the device skb pool */
++			err = rtskb_acquire(skb, &sock->skb_pool);
++
++			if (err) {
++				kfree_rtskb(skb);
++				rt_socket_dereference(sock);
++				return;
++			}
++		}
++
++		/* Deliver the packet to the next layer */
++		ipprot->rcv_handler(skb);
++
++		/* Packet is queued, socket can be released */
++		rt_socket_dereference(sock);
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++	} else if (rt_ip_fallback_handler) {
++		/* If a fallback handler for IP protocol has been installed,
++         * call it. */
++		rt_ip_fallback_handler(skb);
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */
++	} else {
++		if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_DEBUG))
++			rtdm_printk("RTnet: no protocol found\n");
++		kfree_rtskb(skb);
++	}
++}
++
++/***
++ *  rt_ip_rcv
++ */
++int rt_ip_rcv(struct rtskb *skb, struct rtpacket_type *pt)
++{
++	struct iphdr *iph;
++	__u32 len;
++
++	/* When the interface is in promisc. mode, drop all the crap
++     * that it receives, do not try to analyse it.
++     */
++	if (skb->pkt_type == PACKET_OTHERHOST)
++		goto drop;
++
++	iph = skb->nh.iph;
++
++	/*
++     *  RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
++     *
++     *  Is the datagram acceptable?
++     *
++     *  1.  Length at least the size of an ip header
++     *  2.  Version of 4
++     *  3.  Checksums correctly. [Speed optimisation for later, skip loopback checksums]
++     *  4.  Doesn't have a bogus length
++     */
++	if (iph->ihl < 5 || iph->version != 4)
++		goto drop;
++
++	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
++		goto drop;
++
++	len = ntohs(iph->tot_len);
++	if ((skb->len < len) || (len < ((__u32)iph->ihl << 2)))
++		goto drop;
++
++	rtskb_trim(skb, len);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER
++	if (rt_ip_route_forward(skb, iph->daddr))
++		return 0;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */
++
++	rt_ip_local_deliver(skb);
++	return 0;
++
++drop:
++	kfree_rtskb(skb);
++	return 0;
++}
+--- linux/drivers/xenomai/net/stack/ipv4/udp/udp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/udp.c	2021-04-07 16:01:27.016634468 +0800
+@@ -0,0 +1,839 @@
++/***
++ *
++ *  ipv4/udp.c - UDP implementation for RTnet
++ *
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/err.h>
++#include <linux/udp.h>
++#include <linux/tcp.h>
++#include <net/checksum.h>
++#include <linux/list.h>
++
++#include <rtskb.h>
++#include <rtnet_internal.h>
++#include <rtnet_port.h>
++#include <rtnet_iovec.h>
++#include <rtnet_socket.h>
++#include <ipv4/ip_fragment.h>
++#include <ipv4/ip_output.h>
++#include <ipv4/ip_sock.h>
++#include <ipv4/protocol.h>
++#include <ipv4/route.h>
++#include <ipv4/udp.h>
++
++/***
++ *  This structure is used to register a UDP socket for reception. All
++ +  structures are kept in the port_registry array to increase the cache
++ *  locality during the critical port lookup in rt_udp_v4_lookup().
++ */
++struct udp_socket {
++	u16 sport; /* local port */
++	u32 saddr; /* local ip-addr */
++	struct rtsocket *sock;
++	struct hlist_node link;
++};
++
++/***
++ *  Automatic port number assignment
++
++ *  The automatic assignment of port numbers to unbound sockets is realised as
++ *  a simple addition of two values:
++ *   - the socket ID (lower 8 bits of file descriptor) which is set during
++ *     initialisation and left unchanged afterwards
++ *   - the start value auto_port_start which is a module parameter
++
++ *  auto_port_mask, also a module parameter, is used to define the range of
++ *  port numbers which are used for automatic assignment. Any number within
++ *  this range will be rejected when passed to bind_rt().
++
++ */
++static unsigned int auto_port_start = 1024;
++static unsigned int auto_port_mask = ~(RT_UDP_SOCKETS - 1);
++static int free_ports = RT_UDP_SOCKETS;
++#define RT_PORT_BITMAP_WORDS                                                   \
++	((RT_UDP_SOCKETS + BITS_PER_LONG - 1) / BITS_PER_LONG)
++static unsigned long port_bitmap[RT_PORT_BITMAP_WORDS];
++static struct udp_socket port_registry[RT_UDP_SOCKETS];
++static DEFINE_RTDM_LOCK(udp_socket_base_lock);
++
++static struct hlist_head port_hash[RT_UDP_SOCKETS * 2];
++#define port_hash_mask (RT_UDP_SOCKETS * 2 - 1)
++
++MODULE_LICENSE("GPL");
++
++module_param(auto_port_start, uint, 0444);
++module_param(auto_port_mask, uint, 0444);
++MODULE_PARM_DESC(auto_port_start, "Start of automatically assigned port range");
++MODULE_PARM_DESC(auto_port_mask,
++		 "Mask that defines port range for automatic assignment");
++
++static inline struct udp_socket *port_hash_search(u32 saddr, u16 sport)
++{
++	unsigned bucket = sport & port_hash_mask;
++	struct udp_socket *sock;
++
++	hlist_for_each_entry (sock, &port_hash[bucket], link)
++		if (sock->sport == sport &&
++		    (saddr == INADDR_ANY || sock->saddr == saddr ||
++		     sock->saddr == INADDR_ANY))
++			return sock;
++
++	return NULL;
++}
++
++static inline int port_hash_insert(struct udp_socket *sock, u32 saddr,
++				   u16 sport)
++{
++	unsigned bucket;
++
++	if (port_hash_search(saddr, sport))
++		return -EADDRINUSE;
++
++	bucket = sport & port_hash_mask;
++	sock->saddr = saddr;
++	sock->sport = sport;
++	hlist_add_head(&sock->link, &port_hash[bucket]);
++	return 0;
++}
++
++static inline void port_hash_del(struct udp_socket *sock)
++{
++	hlist_del(&sock->link);
++}
++
++/***
++ *  rt_udp_v4_lookup
++ */
++static inline struct rtsocket *rt_udp_v4_lookup(u32 daddr, u16 dport)
++{
++	rtdm_lockctx_t context;
++	struct udp_socket *sock;
++
++	rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++	sock = port_hash_search(daddr, dport);
++	if (sock && rt_socket_reference(sock->sock) == 0) {
++		rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++		return sock->sock;
++	}
++
++	rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++	return NULL;
++}
++
++/***
++ *  rt_udp_bind - bind socket to local address
++ *  @s:     socket
++ *  @addr:  local address
++ */
++int rt_udp_bind(struct rtdm_fd *fd, struct rtsocket *sock,
++		const struct sockaddr __user *addr, socklen_t addrlen)
++{
++	struct sockaddr_in _sin, *sin;
++	rtdm_lockctx_t context;
++	int index;
++	int err = 0;
++
++	if (addrlen < sizeof(struct sockaddr_in))
++		return -EINVAL;
++
++	sin = rtnet_get_arg(fd, &_sin, addr, sizeof(_sin));
++	if (IS_ERR(sin))
++		return PTR_ERR(sin);
++
++	if ((sin->sin_port & auto_port_mask) == auto_port_start)
++		return -EINVAL;
++
++	rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++	if ((index = sock->prot.inet.reg_index) < 0) {
++		/* socket is being closed */
++		err = -EBADF;
++		goto unlock_out;
++	}
++	if (sock->prot.inet.state != TCP_CLOSE) {
++		err = -EINVAL;
++		goto unlock_out;
++	}
++
++	port_hash_del(&port_registry[index]);
++	if (port_hash_insert(&port_registry[index], sin->sin_addr.s_addr,
++			     sin->sin_port ?: index + auto_port_start)) {
++		port_hash_insert(&port_registry[index],
++				 port_registry[index].saddr,
++				 port_registry[index].sport);
++		rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++		return -EADDRINUSE;
++	}
++
++	/* set the source-addr */
++	sock->prot.inet.saddr = port_registry[index].saddr;
++
++	/* set source port, if not set by user */
++	sock->prot.inet.sport = port_registry[index].sport;
++
++unlock_out:
++	rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++	return err;
++}
++
++/***
++ *  rt_udp_connect
++ */
++int rt_udp_connect(struct rtdm_fd *fd, struct rtsocket *sock,
++		   const struct sockaddr __user *serv_addr, socklen_t addrlen)
++{
++	struct sockaddr _sa, *sa;
++	struct sockaddr_in _sin, *sin;
++	rtdm_lockctx_t context;
++	int index;
++
++	if (addrlen < sizeof(struct sockaddr))
++		return -EINVAL;
++
++	sa = rtnet_get_arg(fd, &_sa, serv_addr, sizeof(_sa));
++	if (IS_ERR(sa))
++		return PTR_ERR(sa);
++
++	if (sa->sa_family == AF_UNSPEC) {
++		if ((index = sock->prot.inet.reg_index) < 0)
++			/* socket is being closed */
++			return -EBADF;
++
++		rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++		sock->prot.inet.saddr = INADDR_ANY;
++		/* Note: The following line differs from standard
++		   stacks, and we also don't remove the socket from
++		   the port list. Might get fixed in the future... */
++		sock->prot.inet.sport = index + auto_port_start;
++		sock->prot.inet.daddr = INADDR_ANY;
++		sock->prot.inet.dport = 0;
++		sock->prot.inet.state = TCP_CLOSE;
++
++		rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++	} else {
++		if (addrlen < sizeof(struct sockaddr_in))
++			return -EINVAL;
++
++		sin = rtnet_get_arg(fd, &_sin, serv_addr, sizeof(_sin));
++		if (IS_ERR(sin))
++			return PTR_ERR(sin);
++
++		if (sin->sin_family != AF_INET)
++			return -EINVAL;
++
++		rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++		if (sock->prot.inet.state != TCP_CLOSE) {
++			rtdm_lock_put_irqrestore(&udp_socket_base_lock,
++						 context);
++			return -EINVAL;
++		}
++
++		sock->prot.inet.state = TCP_ESTABLISHED;
++		sock->prot.inet.daddr = sin->sin_addr.s_addr;
++		sock->prot.inet.dport = sin->sin_port;
++
++		rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++	}
++
++	return 0;
++}
++
++/***
++ *  rt_udp_socket - create a new UDP-Socket
++ *  @s: socket
++ */
++int rt_udp_socket(struct rtdm_fd *fd)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	int ret;
++	int i;
++	int index;
++	rtdm_lockctx_t context;
++
++	if ((ret = rt_socket_init(fd, IPPROTO_UDP)) != 0)
++		return ret;
++
++	sock->prot.inet.saddr = INADDR_ANY;
++	sock->prot.inet.state = TCP_CLOSE;
++	sock->prot.inet.tos = 0;
++
++	rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++	/* enforce maximum number of UDP sockets */
++	if (free_ports == 0) {
++		rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++		rt_socket_cleanup(fd);
++		return -EAGAIN;
++	}
++	free_ports--;
++
++	/* find free auto-port in bitmap */
++	for (i = 0; i < RT_PORT_BITMAP_WORDS; i++)
++		if (port_bitmap[i] != (unsigned long)-1)
++			break;
++	index = ffz(port_bitmap[i]);
++	set_bit(index, &port_bitmap[i]);
++	index += i * 32;
++	sock->prot.inet.reg_index = index;
++	sock->prot.inet.sport = index + auto_port_start;
++
++	/* register UDP socket */
++	port_hash_insert(&port_registry[index], INADDR_ANY,
++			 sock->prot.inet.sport);
++	port_registry[index].sock = sock;
++
++	rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++	return 0;
++}
++
++/***
++ *  rt_udp_close
++ */
++void rt_udp_close(struct rtdm_fd *fd)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	struct rtskb *del;
++	int port;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++	sock->prot.inet.state = TCP_CLOSE;
++
++	if (sock->prot.inet.reg_index >= 0) {
++		port = sock->prot.inet.reg_index;
++		clear_bit(port % BITS_PER_LONG,
++			  &port_bitmap[port / BITS_PER_LONG]);
++		port_hash_del(&port_registry[port]);
++
++		free_ports++;
++
++		sock->prot.inet.reg_index = -1;
++	}
++
++	rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++	/* cleanup already collected fragments */
++	rt_ip_frag_invalidate_socket(sock);
++
++	/* free packets in incoming queue */
++	while ((del = rtskb_dequeue(&sock->incoming)) != NULL)
++		kfree_rtskb(del);
++
++	rt_socket_cleanup(fd);
++}
++
++int rt_udp_ioctl(struct rtdm_fd *fd, unsigned int request, void __user *arg)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	const struct _rtdm_setsockaddr_args *setaddr;
++	struct _rtdm_setsockaddr_args _setaddr;
++
++	/* fast path for common socket IOCTLs */
++	if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK)
++		return rt_socket_common_ioctl(fd, request, arg);
++
++	switch (request) {
++	case _RTIOC_BIND:
++	case _RTIOC_CONNECT:
++		setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
++		if (IS_ERR(setaddr))
++			return PTR_ERR(setaddr);
++		if (request == _RTIOC_BIND)
++			return rt_udp_bind(fd, sock, setaddr->addr,
++					   setaddr->addrlen);
++
++		return rt_udp_connect(fd, sock, setaddr->addr,
++				      setaddr->addrlen);
++
++	default:
++		return rt_ip_ioctl(fd, request, arg);
++	}
++}
++
++/***
++ *  rt_udp_recvmsg
++ */
++/***
++ *  rt_udp_recvmsg
++ */
++ssize_t rt_udp_recvmsg(struct rtdm_fd *fd, struct user_msghdr *u_msg,
++		       int msg_flags)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	size_t len;
++	struct rtskb *skb;
++	struct rtskb *first_skb;
++	size_t copied = 0;
++	size_t block_size;
++	size_t data_len;
++	struct udphdr *uh;
++	struct sockaddr_in sin;
++	nanosecs_rel_t timeout = sock->timeout;
++	int ret, flags;
++	struct user_msghdr _msg, *msg;
++	socklen_t namelen;
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++
++	msg = rtnet_get_arg(fd, &_msg, u_msg, sizeof(_msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	if (msg->msg_iovlen < 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen == 0)
++		return 0;
++
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	/* non-blocking receive? */
++	if (msg_flags & MSG_DONTWAIT)
++		timeout = -1;
++
++	ret = rtdm_sem_timeddown(&sock->pending_sem, timeout, NULL);
++	if (unlikely(ret < 0))
++		switch (ret) {
++		default:
++			ret = -EBADF; /* socket has been closed */
++		case -EWOULDBLOCK:
++		case -ETIMEDOUT:
++		case -EINTR:
++			rtdm_drop_iovec(iov, iov_fast);
++			return ret;
++		}
++
++	skb = rtskb_dequeue_chain(&sock->incoming);
++	RTNET_ASSERT(skb != NULL, return -EFAULT;);
++	uh = skb->h.uh;
++	first_skb = skb;
++
++	/* copy the address if required. */
++	if (msg->msg_name) {
++		memset(&sin, 0, sizeof(sin));
++		sin.sin_family = AF_INET;
++		sin.sin_port = uh->source;
++		sin.sin_addr.s_addr = skb->nh.iph->saddr;
++		ret = rtnet_put_arg(fd, msg->msg_name, &sin, sizeof(sin));
++		if (ret)
++			goto fail;
++
++		namelen = sizeof(sin);
++		ret = rtnet_put_arg(fd, &u_msg->msg_namelen, &namelen,
++				    sizeof(namelen));
++		if (ret)
++			goto fail;
++	}
++
++	data_len = ntohs(uh->len) - sizeof(struct udphdr);
++
++	/* remove the UDP header */
++	__rtskb_pull(skb, sizeof(struct udphdr));
++
++	flags = msg->msg_flags & ~MSG_TRUNC;
++	len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen);
++
++	/* iterate over all IP fragments */
++	do {
++		rtskb_trim(skb, data_len);
++
++		block_size = skb->len;
++		copied += block_size;
++		data_len -= block_size;
++
++		/* The data must not be longer than the available buffer size */
++		if (copied > len) {
++			block_size -= copied - len;
++			copied = len;
++			flags |= MSG_TRUNC;
++		}
++
++		/* copy the data */
++		ret = rtnet_write_to_iov(fd, iov, msg->msg_iovlen, skb->data,
++					 block_size);
++		if (ret)
++			goto fail;
++
++		/* next fragment */
++		skb = skb->next;
++	} while (skb && !(flags & MSG_TRUNC));
++
++	/* did we copied all bytes? */
++	if (data_len > 0)
++		flags |= MSG_TRUNC;
++
++	if (flags != msg->msg_flags) {
++		ret = rtnet_put_arg(fd, &u_msg->msg_flags, &flags,
++				    sizeof(flags));
++		if (ret)
++			goto fail;
++	}
++out:
++	if ((msg_flags & MSG_PEEK) == 0)
++		kfree_rtskb(first_skb);
++	else {
++		__rtskb_push(first_skb, sizeof(struct udphdr));
++		rtskb_queue_head(&sock->incoming, first_skb);
++		rtdm_sem_up(&sock->pending_sem);
++	}
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return copied;
++fail:
++	copied = ret;
++	goto out;
++}
++
++/***
++ *  struct udpfakehdr
++ */
++struct udpfakehdr {
++	struct udphdr uh;
++	u32 daddr;
++	u32 saddr;
++	struct rtdm_fd *fd;
++	struct iovec *iov;
++	int iovlen;
++	u32 wcheck;
++};
++
++/***
++ *
++ */
++static int rt_udp_getfrag(const void *p, unsigned char *to, unsigned int offset,
++			  unsigned int fraglen)
++{
++	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
++	int ret;
++
++	// We should optimize this function a bit (copy+csum...)!
++	if (offset) {
++		ret = rtnet_read_from_iov(ufh->fd, ufh->iov, ufh->iovlen, to,
++					  fraglen);
++		return ret < 0 ? ret : 0;
++	}
++
++	ret = rtnet_read_from_iov(ufh->fd, ufh->iov, ufh->iovlen,
++				  to + sizeof(struct udphdr),
++				  fraglen - sizeof(struct udphdr));
++	if (ret < 0)
++		return ret;
++
++	/* Checksum of the complete data part of the UDP message: */
++	ufh->wcheck =
++		csum_partial(to + sizeof(struct udphdr),
++			     fraglen - sizeof(struct udphdr), ufh->wcheck);
++
++	/* Checksum of the udp header: */
++	ufh->wcheck = csum_partial((unsigned char *)ufh, sizeof(struct udphdr),
++				   ufh->wcheck);
++
++	ufh->uh.check =
++		csum_tcpudp_magic(ufh->saddr, ufh->daddr, ntohs(ufh->uh.len),
++				  IPPROTO_UDP, ufh->wcheck);
++
++	if (ufh->uh.check == 0)
++		ufh->uh.check = -1;
++
++	memcpy(to, ufh, sizeof(struct udphdr));
++
++	return 0;
++}
++
++/***
++ *  rt_udp_sendmsg
++ */
++ssize_t rt_udp_sendmsg(struct rtdm_fd *fd, const struct user_msghdr *msg,
++		       int msg_flags)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	size_t len;
++	int ulen;
++	struct sockaddr_in _sin, *sin;
++	struct udpfakehdr ufh;
++	struct dest_route rt;
++	u32 saddr;
++	u32 daddr;
++	u16 dport;
++	int err;
++	rtdm_lockctx_t context;
++	struct user_msghdr _msg;
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++
++	if (msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
++		return -EOPNOTSUPP;
++
++	if (msg_flags & ~(MSG_DONTROUTE | MSG_DONTWAIT))
++		return -EINVAL;
++
++	msg = rtnet_get_arg(fd, &_msg, msg, sizeof(*msg));
++	if (IS_ERR(msg))
++		return PTR_ERR(msg);
++
++	if (msg->msg_iovlen < 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen == 0)
++		return 0;
++
++	err = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (err)
++		return err;
++
++	len = rtdm_get_iov_flatlen(iov, msg->msg_iovlen);
++	if ((len < 0) ||
++	    (len > 0xFFFF - sizeof(struct iphdr) - sizeof(struct udphdr))) {
++		err = -EMSGSIZE;
++		goto out;
++	}
++
++	ulen = len + sizeof(struct udphdr);
++
++	if (msg->msg_name && msg->msg_namelen == sizeof(*sin)) {
++		sin = rtnet_get_arg(fd, &_sin, msg->msg_name, sizeof(_sin));
++		if (IS_ERR(sin)) {
++			err = PTR_ERR(sin);
++			goto out;
++		}
++
++		if (sin->sin_family != AF_INET &&
++		    sin->sin_family != AF_UNSPEC) {
++			err = -EINVAL;
++			goto out;
++		}
++
++		daddr = sin->sin_addr.s_addr;
++		dport = sin->sin_port;
++		rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++	} else {
++		rtdm_lock_get_irqsave(&udp_socket_base_lock, context);
++
++		if (sock->prot.inet.state != TCP_ESTABLISHED) {
++			rtdm_lock_put_irqrestore(&udp_socket_base_lock,
++						 context);
++			err = -ENOTCONN;
++			goto out;
++		}
++
++		daddr = sock->prot.inet.daddr;
++		dport = sock->prot.inet.dport;
++	}
++
++	saddr = sock->prot.inet.saddr;
++	ufh.uh.source = sock->prot.inet.sport;
++
++	rtdm_lock_put_irqrestore(&udp_socket_base_lock, context);
++
++	if ((daddr | dport) == 0) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* get output route */
++	err = rt_ip_route_output(&rt, daddr, saddr);
++	if (err)
++		goto out;
++
++	/* we found a route, remember the routing dest-addr could be the netmask */
++	ufh.saddr = saddr != INADDR_ANY ? saddr : rt.rtdev->local_ip;
++	ufh.daddr = daddr;
++	ufh.uh.dest = dport;
++	ufh.uh.len = htons(ulen);
++	ufh.uh.check = 0;
++	ufh.fd = fd;
++	ufh.iov = iov;
++	ufh.iovlen = msg->msg_iovlen;
++	ufh.wcheck = 0;
++
++	err = rt_ip_build_xmit(sock, rt_udp_getfrag, &ufh, ulen, &rt,
++			       msg_flags);
++
++	/* Drop the reference obtained in rt_ip_route_output() */
++	rtdev_dereference(rt.rtdev);
++out:
++	rtdm_drop_iovec(iov, iov_fast);
++
++	return err ?: len;
++}
++
++/***
++ *  rt_udp_check
++ */
++static inline unsigned short rt_udp_check(struct udphdr *uh, int len,
++					  unsigned long saddr,
++					  unsigned long daddr,
++					  unsigned long base)
++{
++	return (csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
++}
++
++struct rtsocket *rt_udp_dest_socket(struct rtskb *skb)
++{
++	struct udphdr *uh = skb->h.uh;
++	unsigned short ulen = ntohs(uh->len);
++	u32 saddr = skb->nh.iph->saddr;
++	u32 daddr = skb->nh.iph->daddr;
++	struct rtnet_device *rtdev = skb->rtdev;
++
++	if (uh->check == 0)
++		skb->ip_summed = CHECKSUM_UNNECESSARY;
++	/* ip_summed (yet) never equals CHECKSUM_PARTIAL
++    else
++        if (skb->ip_summed == CHECKSUM_PARTIAL) {
++            skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++            if ( !rt_udp_check(uh, ulen, saddr, daddr, skb->csum) )
++                return NULL;
++
++            skb->ip_summed = CHECKSUM_NONE;
++        }*/
++
++	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
++		skb->csum =
++			csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
++
++	/* patch broadcast daddr */
++	if (daddr == rtdev->broadcast_ip)
++		daddr = rtdev->local_ip;
++
++	/* find the destination socket */
++	skb->sk = rt_udp_v4_lookup(daddr, uh->dest);
++
++	return skb->sk;
++}
++
++/***
++ *  rt_udp_rcv
++ */
++void rt_udp_rcv(struct rtskb *skb)
++{
++	struct rtsocket *sock = skb->sk;
++	void (*callback_func)(struct rtdm_fd *, void *);
++	void *callback_arg;
++	rtdm_lockctx_t context;
++
++	rtskb_queue_tail(&sock->incoming, skb);
++	rtdm_sem_up(&sock->pending_sem);
++
++	rtdm_lock_get_irqsave(&sock->param_lock, context);
++	callback_func = sock->callback_func;
++	callback_arg = sock->callback_arg;
++	rtdm_lock_put_irqrestore(&sock->param_lock, context);
++
++	if (callback_func)
++		callback_func(rt_socket_fd(sock), callback_arg);
++}
++
++/***
++ *  rt_udp_rcv_err
++ */
++void rt_udp_rcv_err(struct rtskb *skb)
++{
++	rtdm_printk("RTnet: rt_udp_rcv err\n");
++}
++
++/***
++ *  UDP-Initialisation
++ */
++static struct rtinet_protocol udp_protocol = { .protocol = IPPROTO_UDP,
++					       .dest_socket =
++						       &rt_udp_dest_socket,
++					       .rcv_handler = &rt_udp_rcv,
++					       .err_handler = &rt_udp_rcv_err,
++					       .init_socket = &rt_udp_socket };
++
++static struct rtdm_driver udp_driver = {
++    .profile_info =     RTDM_PROFILE_INFO(udp,
++                                        RTDM_CLASS_NETWORK,
++                                        RTDM_SUBCLASS_RTNET,
++                                        RTNET_RTDM_VER),
++    .device_flags =     RTDM_PROTOCOL_DEVICE,
++    .device_count =	1,
++    .context_size =     sizeof(struct rtsocket),
++
++    .protocol_family =  PF_INET,
++    .socket_type =      SOCK_DGRAM,
++
++    /* default is UDP */
++    .ops = {
++        .socket =       rt_inet_socket,
++        .close =        rt_udp_close,
++        .ioctl_rt =     rt_udp_ioctl,
++        .ioctl_nrt =    rt_udp_ioctl,
++        .recvmsg_rt =   rt_udp_recvmsg,
++        .sendmsg_rt =   rt_udp_sendmsg,
++        .select =       rt_socket_select_bind,
++    },
++};
++
++static struct rtdm_device udp_device = {
++	.driver = &udp_driver,
++	.label = "udp",
++};
++
++/***
++ *  rt_udp_init
++ */
++static int __init rt_udp_init(void)
++{
++	int i, err;
++
++	if ((auto_port_start < 0) ||
++	    (auto_port_start >= 0x10000 - RT_UDP_SOCKETS))
++		auto_port_start = 1024;
++	auto_port_start = htons(auto_port_start & (auto_port_mask & 0xFFFF));
++	auto_port_mask = htons(auto_port_mask | 0xFFFF0000);
++
++	rt_inet_add_protocol(&udp_protocol);
++
++	for (i = 0; i < ARRAY_SIZE(port_hash); i++)
++		INIT_HLIST_HEAD(&port_hash[i]);
++
++	err = rtdm_dev_register(&udp_device);
++	if (err)
++		rt_inet_del_protocol(&udp_protocol);
++	return err;
++}
++
++/***
++ *  rt_udp_release
++ */
++static void __exit rt_udp_release(void)
++{
++	rtdm_dev_unregister(&udp_device);
++	rt_inet_del_protocol(&udp_protocol);
++}
++
++module_init(rt_udp_init);
++module_exit(rt_udp_release);
+--- linux/drivers/xenomai/net/stack/ipv4/udp/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/Makefile	2021-04-07 16:01:27.012634474 +0800
+@@ -0,0 +1,5 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP) += rtudp.o
++
++rtudp-y := udp.o
+--- linux/drivers/xenomai/net/stack/ipv4/udp/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/ipv4/udp/Kconfig	2021-04-07 16:01:27.007634481 +0800
+@@ -0,0 +1,6 @@
++config XENO_DRIVERS_NET_RTIPV4_UDP
++    tristate "UDP support"
++    depends on XENO_DRIVERS_NET_RTIPV4
++    default y
++    ---help---
++    Enables UDP support of the RTnet Real-Time IPv4 protocol.
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_proto.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_proto.c	2021-04-07 16:01:27.002634488 +0800
+@@ -0,0 +1,127 @@
++/***
++ *
++ *  rtmac/nomac/nomac_proto.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/init.h>
++
++#include <rtdev.h>
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/nomac/nomac.h>
++
++static struct rtskb_queue nrt_rtskb_queue;
++static rtdm_task_t wrapper_task;
++static rtdm_event_t wakeup_sem;
++
++int nomac_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev)
++{
++	/* unused here, just to demonstrate access to the discipline state
++    struct nomac_priv   *nomac =
++        (struct nomac_priv *)rtdev->mac_priv->disc_priv; */
++	int ret;
++
++	rtcap_mark_rtmac_enqueue(rtskb);
++
++	/* no MAC: we simply transmit the packet under xmit_lock */
++	rtdm_mutex_lock(&rtdev->xmit_mutex);
++	ret = rtmac_xmit(rtskb);
++	rtdm_mutex_unlock(&rtdev->xmit_mutex);
++
++	return ret;
++}
++
++int nomac_nrt_packet_tx(struct rtskb *rtskb)
++{
++	struct rtnet_device *rtdev = rtskb->rtdev;
++	/* unused here, just to demonstrate access to the discipline state
++    struct nomac_priv   *nomac =
++        (struct nomac_priv *)rtdev->mac_priv->disc_priv; */
++	int ret;
++
++	rtcap_mark_rtmac_enqueue(rtskb);
++
++	/* note: this routine may be called both in rt and non-rt context
++     *       => detect and wrap the context if necessary */
++	if (!rtdm_in_rt_context()) {
++		rtskb_queue_tail(&nrt_rtskb_queue, rtskb);
++		rtdm_event_signal(&wakeup_sem);
++		return 0;
++	} else {
++		/* no MAC: we simply transmit the packet under xmit_lock */
++		rtdm_mutex_lock(&rtdev->xmit_mutex);
++		ret = rtmac_xmit(rtskb);
++		rtdm_mutex_unlock(&rtdev->xmit_mutex);
++
++		return ret;
++	}
++}
++
++void nrt_xmit_task(void *arg)
++{
++	struct rtskb *rtskb;
++	struct rtnet_device *rtdev;
++
++	while (!rtdm_task_should_stop()) {
++		if (rtdm_event_wait(&wakeup_sem) < 0)
++			break;
++
++		while ((rtskb = rtskb_dequeue(&nrt_rtskb_queue))) {
++			rtdev = rtskb->rtdev;
++
++			/* no MAC: we simply transmit the packet under xmit_lock */
++			rtdm_mutex_lock(&rtdev->xmit_mutex);
++			rtmac_xmit(rtskb);
++			rtdm_mutex_unlock(&rtdev->xmit_mutex);
++		}
++	}
++}
++
++int nomac_packet_rx(struct rtskb *rtskb)
++{
++	/* actually, NoMAC doesn't expect any control packet */
++	kfree_rtskb(rtskb);
++
++	return 0;
++}
++
++int __init nomac_proto_init(void)
++{
++	int ret;
++
++	rtskb_queue_init(&nrt_rtskb_queue);
++	rtdm_event_init(&wakeup_sem, 0);
++
++	ret = rtdm_task_init(&wrapper_task, "rtnet-nomac", nrt_xmit_task, 0,
++			     RTDM_TASK_LOWEST_PRIORITY, 0);
++	if (ret < 0) {
++		rtdm_event_destroy(&wakeup_sem);
++		return ret;
++	}
++
++	return 0;
++}
++
++void nomac_proto_cleanup(void)
++{
++	rtdm_event_destroy(&wakeup_sem);
++	rtdm_task_destroy(&wrapper_task);
++}
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_ioctl.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_ioctl.c	2021-04-07 16:01:26.998634493 +0800
+@@ -0,0 +1,99 @@
++/***
++ *
++ *  rtmac/nomac/nomac_ioctl.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/uaccess.h>
++
++#include <nomac_chrdev.h>
++#include <rtmac/nomac/nomac.h>
++
++static int nomac_ioctl_attach(struct rtnet_device *rtdev)
++{
++	struct nomac_priv *nomac;
++	int ret;
++
++	if (rtdev->mac_priv == NULL) {
++		ret = rtmac_disc_attach(rtdev, &nomac_disc);
++		if (ret < 0)
++			return ret;
++	}
++
++	nomac = (struct nomac_priv *)rtdev->mac_priv->disc_priv;
++	if (nomac->magic != NOMAC_MAGIC)
++		return -ENOTTY;
++
++	/* ... */
++
++	return 0;
++}
++
++static int nomac_ioctl_detach(struct rtnet_device *rtdev)
++{
++	struct nomac_priv *nomac;
++	int ret;
++
++	if (rtdev->mac_priv == NULL)
++		return -ENOTTY;
++
++	nomac = (struct nomac_priv *)rtdev->mac_priv->disc_priv;
++	if (nomac->magic != NOMAC_MAGIC)
++		return -ENOTTY;
++
++	ret = rtmac_disc_detach(rtdev);
++
++	/* ... */
++
++	return ret;
++}
++
++int nomac_ioctl(struct rtnet_device *rtdev, unsigned int request,
++		unsigned long arg)
++{
++	struct nomac_config cfg;
++	int ret;
++
++	ret = copy_from_user(&cfg, (void *)arg, sizeof(cfg));
++	if (ret != 0)
++		return -EFAULT;
++
++	if (mutex_lock_interruptible(&rtdev->nrt_lock))
++		return -ERESTARTSYS;
++
++	switch (request) {
++	case NOMAC_IOC_ATTACH:
++		ret = nomac_ioctl_attach(rtdev);
++		break;
++
++	case NOMAC_IOC_DETACH:
++		ret = nomac_ioctl_detach(rtdev);
++		break;
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	mutex_unlock(&rtdev->nrt_lock);
++
++	return ret;
++}
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/Makefile	2021-04-07 16:01:26.993634501 +0800
+@@ -0,0 +1,9 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_NOMAC) += nomac.o
++
++nomac-y := \
++	nomac_dev.o \
++	nomac_ioctl.o \
++	nomac_module.o \
++	nomac_proto.o
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/Kconfig	2021-04-07 16:01:26.988634508 +0800
+@@ -0,0 +1,9 @@
++config XENO_DRIVERS_NET_NOMAC
++    tristate "NoMAC discipline for RTmac"
++    depends on XENO_DRIVERS_NET_RTMAC
++    default n
++    ---help---
++    This no-operation RTmac discipline is intended to act as a template
++    for new implementations. However, it can be compiled and used (see
++    nomaccfg management tool), but don't expect any improved determinism
++    of your network. ;)
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_dev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_dev.c	2021-04-07 16:01:26.984634513 +0800
+@@ -0,0 +1,84 @@
++/***
++ *
++ *  rtmac/nomac/nomac_dev.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/list.h>
++
++#include <rtdev.h>
++#include <rtmac.h>
++#include <rtmac/nomac/nomac.h>
++
++static int nomac_dev_openclose(void)
++{
++	return 0;
++}
++
++static int nomac_dev_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	struct nomac_priv *nomac;
++
++	nomac = container_of(rtdm_fd_to_context(fd)->device, struct nomac_priv,
++			     api_device);
++
++	switch (request) {
++	case RTMAC_RTIOC_TIMEOFFSET:
++
++	case RTMAC_RTIOC_WAITONCYCLE:
++
++	default:
++		return -ENOTTY;
++	}
++}
++
++static struct rtdm_driver
++	nomac_driver = { .profile_info = RTDM_PROFILE_INFO(
++				 nomac, RTDM_CLASS_RTMAC,
++				 RTDM_SUBCLASS_UNMANAGED, RTNET_RTDM_VER),
++			 .device_flags = RTDM_NAMED_DEVICE,
++			 .device_count = 1,
++			 .context_size = 0,
++			 .ops = {
++				 .open = (typeof(nomac_driver.ops.open))
++					 nomac_dev_openclose,
++				 .ioctl_rt = nomac_dev_ioctl,
++				 .ioctl_nrt = nomac_dev_ioctl,
++				 .close = (typeof(nomac_driver.ops.close))
++					 nomac_dev_openclose,
++			 } };
++
++int nomac_dev_init(struct rtnet_device *rtdev, struct nomac_priv *nomac)
++{
++	char *pos;
++
++	strcpy(nomac->device_name, "NOMAC");
++	for (pos = rtdev->name + strlen(rtdev->name) - 1;
++	     (pos >= rtdev->name) && ((*pos) >= '0') && (*pos <= '9'); pos--)
++		;
++	strncat(nomac->device_name + 5, pos + 1, IFNAMSIZ - 5);
++
++	nomac->api_driver = nomac_driver;
++	nomac->api_device.driver = &nomac->api_driver;
++	nomac->api_device.label = nomac->device_name;
++
++	return rtdm_dev_register(&nomac->api_device);
++}
+--- linux/drivers/xenomai/net/stack/rtmac/nomac/nomac_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/nomac/nomac_module.c	2021-04-07 16:01:26.979634520 +0800
+@@ -0,0 +1,161 @@
++/***
++ *
++ *  rtmac/nomac/nomac_module.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/init.h>
++#include <linux/module.h>
++
++#include <rtdm/driver.h>
++#include <rtmac/rtmac_vnic.h>
++#include <rtmac/nomac/nomac.h>
++#include <rtmac/nomac/nomac_dev.h>
++#include <rtmac/nomac/nomac_ioctl.h>
++#include <rtmac/nomac/nomac_proto.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++LIST_HEAD(nomac_devices);
++DEFINE_MUTEX(nomac_nrt_lock);
++
++int nomac_proc_read(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct nomac_priv *entry;
++
++	mutex_lock(&nomac_nrt_lock);
++
++	xnvfile_printf(it, "Interface       API Device      State\n");
++
++	list_for_each_entry (entry, &nomac_devices, list_entry)
++		xnvfile_printf(it, "%-15s %-15s Attached\n", entry->rtdev->name,
++			       entry->api_device.name);
++
++	mutex_unlock(&nomac_nrt_lock);
++
++	return 0;
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++int nomac_attach(struct rtnet_device *rtdev, void *priv)
++{
++	struct nomac_priv *nomac = (struct nomac_priv *)priv;
++	int ret;
++
++	nomac->magic = NOMAC_MAGIC;
++	nomac->rtdev = rtdev;
++
++	/* ... */
++
++	ret = nomac_dev_init(rtdev, nomac);
++	if (ret < 0)
++		return ret;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	mutex_lock(&nomac_nrt_lock);
++	list_add(&nomac->list_entry, &nomac_devices);
++	mutex_unlock(&nomac_nrt_lock);
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	return 0;
++}
++
++int nomac_detach(struct rtnet_device *rtdev, void *priv)
++{
++	struct nomac_priv *nomac = (struct nomac_priv *)priv;
++
++	nomac_dev_release(nomac);
++
++	/* ... */
++#ifdef CONFIG_XENO_OPT_VFILE
++	mutex_lock(&nomac_nrt_lock);
++	list_del(&nomac->list_entry);
++	mutex_unlock(&nomac_nrt_lock);
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++struct rtmac_proc_entry nomac_proc_entries[] = {
++	{ name: "nomac", handler: nomac_proc_read },
++};
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct rtmac_disc nomac_disc = {
++	name: "NoMAC",
++	priv_size: sizeof(struct nomac_priv),
++	disc_type: __constant_htons(RTMAC_TYPE_NOMAC),
++
++	packet_rx: nomac_packet_rx,
++	rt_packet_tx: nomac_rt_packet_tx,
++	nrt_packet_tx: nomac_nrt_packet_tx,
++
++	get_mtu: NULL,
++
++	vnic_xmit: RTMAC_DEFAULT_VNIC,
++
++	attach: nomac_attach,
++	detach: nomac_detach,
++
++	ioctls: {
++		service_name: "RTmac/NoMAC",
++		ioctl_type: RTNET_IOC_TYPE_RTMAC_NOMAC,
++		handler: nomac_ioctl
++	},
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	proc_entries: nomac_proc_entries,
++	nr_proc_entries: ARRAY_SIZE(nomac_proc_entries),
++#endif /* CONFIG_XENO_OPT_VFILE */
++};
++
++int __init nomac_init(void)
++{
++	int ret;
++
++	printk("RTmac/NoMAC: init void media access control mechanism\n");
++
++	ret = nomac_proto_init();
++	if (ret < 0)
++		return ret;
++
++	ret = rtmac_disc_register(&nomac_disc);
++	if (ret < 0) {
++		nomac_proto_cleanup();
++		return ret;
++	}
++
++	return 0;
++}
++
++void nomac_release(void)
++{
++	rtmac_disc_deregister(&nomac_disc);
++	nomac_proto_cleanup();
++
++	printk("RTmac/NoMAC: unloaded\n");
++}
++
++module_init(nomac_init);
++module_exit(nomac_release);
++
++MODULE_AUTHOR("Jan Kiszka");
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/stack/rtmac/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/Makefile	2021-04-07 16:01:26.974634528 +0800
+@@ -0,0 +1,15 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_NOMAC) += nomac/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_TDMA) += tdma/
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTMAC) += rtmac.o
++
++rtmac-y := \
++	rtmac_disc.o \
++	rtmac_module.o \
++	rtmac_proc.o \
++	rtmac_proto.o \
++	rtmac_syms.o \
++	rtmac_vnic.o
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_vnic.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_vnic.c	2021-04-07 16:01:26.970634533 +0800
+@@ -0,0 +1,334 @@
++/* rtmac_vnic.c
++ *
++ * rtmac - real-time networking media access control subsystem
++ * Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *               2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/rtnetlink.h>
++
++#include <rtnet_internal.h>
++#include <rtdev.h>
++#include <rtnet_port.h> /* for netdev_priv() */
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/rtmac_vnic.h>
++
++static unsigned int vnic_rtskbs = DEFAULT_VNIC_RTSKBS;
++module_param(vnic_rtskbs, uint, 0444);
++MODULE_PARM_DESC(vnic_rtskbs,
++		 "Number of realtime socket buffers per virtual NIC");
++
++static rtdm_nrtsig_t vnic_signal;
++static struct rtskb_queue rx_queue;
++
++int rtmac_vnic_rx(struct rtskb *rtskb, u16 type)
++{
++	struct rtmac_priv *mac_priv = rtskb->rtdev->mac_priv;
++	struct rtskb_pool *pool = &mac_priv->vnic_skb_pool;
++
++	if (rtskb_acquire(rtskb, pool) != 0) {
++		mac_priv->vnic_stats.rx_dropped++;
++		kfree_rtskb(rtskb);
++		return -1;
++	}
++
++	rtskb->protocol = type;
++
++	if (rtskb_queue_tail_check(&rx_queue, rtskb))
++		rtdm_nrtsig_pend(&vnic_signal);
++
++	return 0;
++}
++
++static void rtmac_vnic_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg)
++{
++	struct rtskb *rtskb;
++	struct sk_buff *skb;
++	unsigned hdrlen;
++	struct net_device_stats *stats;
++	struct rtnet_device *rtdev;
++
++	while (1) {
++		rtskb = rtskb_dequeue(&rx_queue);
++		if (!rtskb)
++			break;
++
++		rtdev = rtskb->rtdev;
++		hdrlen = rtdev->hard_header_len;
++
++		skb = dev_alloc_skb(hdrlen + rtskb->len + 2);
++		if (skb) {
++			/* the rtskb stamp is useless (different clock), get new one */
++			__net_timestamp(skb);
++
++			skb_reserve(skb,
++				    2); /* Align IP on 16 byte boundaries */
++
++			/* copy Ethernet header */
++			memcpy(skb_put(skb, hdrlen),
++			       rtskb->data - hdrlen - sizeof(struct rtmac_hdr),
++			       hdrlen);
++
++			/* patch the protocol field in the original Ethernet header */
++			((struct ethhdr *)skb->data)->h_proto = rtskb->protocol;
++
++			/* copy data */
++			memcpy(skb_put(skb, rtskb->len), rtskb->data,
++			       rtskb->len);
++
++			skb->dev = rtskb->rtdev->mac_priv->vnic;
++			skb->protocol = eth_type_trans(skb, skb->dev);
++
++			stats = &rtskb->rtdev->mac_priv->vnic_stats;
++
++			kfree_rtskb(rtskb);
++
++			stats->rx_packets++;
++			stats->rx_bytes += skb->len;
++
++			netif_rx(skb);
++		} else {
++			printk("RTmac: VNIC fails to allocate linux skb\n");
++			kfree_rtskb(rtskb);
++		}
++	}
++}
++
++static int rtmac_vnic_copy_mac(struct net_device *dev)
++{
++	memcpy(dev->dev_addr,
++	       (*(struct rtnet_device **)netdev_priv(dev))->dev_addr,
++	       MAX_ADDR_LEN);
++
++	return 0;
++}
++
++int rtmac_vnic_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct rtnet_device *rtdev = *(struct rtnet_device **)netdev_priv(dev);
++	struct net_device_stats *stats = &rtdev->mac_priv->vnic_stats;
++	struct rtskb_pool *pool = &rtdev->mac_priv->vnic_skb_pool;
++	struct ethhdr *ethernet = (struct ethhdr *)skb->data;
++	struct rtskb *rtskb;
++	int res;
++	int data_len;
++
++	rtskb = alloc_rtskb((skb->len + sizeof(struct rtmac_hdr) + 15) & ~15,
++			    pool);
++	if (!rtskb)
++		return NETDEV_TX_BUSY;
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len + sizeof(struct rtmac_hdr));
++
++	data_len = skb->len - dev->hard_header_len;
++	memcpy(rtskb_put(rtskb, data_len), skb->data + dev->hard_header_len,
++	       data_len);
++
++	res = rtmac_add_header(rtdev, ethernet->h_dest, rtskb,
++			       ntohs(ethernet->h_proto), RTMAC_FLAG_TUNNEL);
++	if (res < 0) {
++		stats->tx_dropped++;
++		kfree_rtskb(rtskb);
++		goto done;
++	}
++
++	RTNET_ASSERT(rtdev->mac_disc->nrt_packet_tx != NULL, kfree_rtskb(rtskb);
++		     goto done;);
++
++	res = rtdev->mac_disc->nrt_packet_tx(rtskb);
++	if (res < 0) {
++		stats->tx_dropped++;
++		kfree_rtskb(rtskb);
++	} else {
++		stats->tx_packets++;
++		stats->tx_bytes += skb->len;
++	}
++
++done:
++	dev_kfree_skb(skb);
++	return NETDEV_TX_OK;
++}
++
++static struct net_device_stats *rtmac_vnic_get_stats(struct net_device *dev)
++{
++	return &(*(struct rtnet_device **)netdev_priv(dev))
++			->mac_priv->vnic_stats;
++}
++
++static int rtmac_vnic_change_mtu(struct net_device *dev, int new_mtu)
++{
++	if ((new_mtu < 68) ||
++	    ((unsigned)new_mtu > 1500 - sizeof(struct rtmac_hdr)))
++		return -EINVAL;
++	dev->mtu = new_mtu;
++	return 0;
++}
++
++void rtmac_vnic_set_max_mtu(struct rtnet_device *rtdev, unsigned int max_mtu)
++{
++	struct rtmac_priv *mac_priv = rtdev->mac_priv;
++	struct net_device *vnic = mac_priv->vnic;
++	unsigned int prev_mtu = mac_priv->vnic_max_mtu;
++
++	mac_priv->vnic_max_mtu = max_mtu - sizeof(struct rtmac_hdr);
++
++	/* set vnic mtu in case max_mtu is smaller than the current mtu or
++       the current mtu was set to previous max_mtu */
++	rtnl_lock();
++	if ((vnic->mtu > mac_priv->vnic_max_mtu) ||
++	    (prev_mtu == mac_priv->vnic_max_mtu)) {
++		dev_set_mtu(vnic, mac_priv->vnic_max_mtu);
++	}
++	rtnl_unlock();
++}
++
++static struct net_device_ops vnic_netdev_ops = {
++	.ndo_open = rtmac_vnic_copy_mac,
++	.ndo_get_stats = rtmac_vnic_get_stats,
++	.ndo_change_mtu = rtmac_vnic_change_mtu,
++};
++
++static void rtmac_vnic_setup(struct net_device *dev)
++{
++	ether_setup(dev);
++
++	dev->netdev_ops = &vnic_netdev_ops;
++	dev->flags &= ~IFF_MULTICAST;
++}
++
++int rtmac_vnic_add(struct rtnet_device *rtdev, vnic_xmit_handler vnic_xmit)
++{
++	int res;
++	struct rtmac_priv *mac_priv = rtdev->mac_priv;
++	struct net_device *vnic;
++	char buf[IFNAMSIZ];
++
++	/* does the discipline request vnic support? */
++	if (!vnic_xmit)
++		return 0;
++
++	mac_priv->vnic = NULL;
++	mac_priv->vnic_max_mtu = rtdev->mtu - sizeof(struct rtmac_hdr);
++	memset(&mac_priv->vnic_stats, 0, sizeof(mac_priv->vnic_stats));
++
++	/* create the rtskb pool */
++	if (rtskb_pool_init(&mac_priv->vnic_skb_pool, vnic_rtskbs, NULL, NULL) <
++	    vnic_rtskbs) {
++		res = -ENOMEM;
++		goto error;
++	}
++
++	snprintf(buf, sizeof(buf), "vnic%d", rtdev->ifindex - 1);
++
++	vnic = alloc_netdev(sizeof(struct rtnet_device *), buf,
++			    NET_NAME_UNKNOWN, rtmac_vnic_setup);
++	if (!vnic) {
++		res = -ENOMEM;
++		goto error;
++	}
++
++	vnic_netdev_ops.ndo_start_xmit = vnic_xmit;
++	vnic->mtu = mac_priv->vnic_max_mtu;
++	*(struct rtnet_device **)netdev_priv(vnic) = rtdev;
++	rtmac_vnic_copy_mac(vnic);
++
++	res = register_netdev(vnic);
++	if (res < 0)
++		goto error;
++
++	mac_priv->vnic = vnic;
++
++	return 0;
++
++error:
++	rtskb_pool_release(&mac_priv->vnic_skb_pool);
++	return res;
++}
++
++int rtmac_vnic_unregister(struct rtnet_device *rtdev)
++{
++	struct rtmac_priv *mac_priv = rtdev->mac_priv;
++
++	if (mac_priv->vnic) {
++		rtskb_pool_release(&mac_priv->vnic_skb_pool);
++		unregister_netdev(mac_priv->vnic);
++		free_netdev(mac_priv->vnic);
++		mac_priv->vnic = NULL;
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int rtnet_rtmac_vnics_show(struct xnvfile_regular_iterator *it, void *d)
++{
++	struct rtnet_device *rtdev;
++	int i;
++	int err;
++
++	xnvfile_printf(it, "RT-NIC name\tVNIC name\n");
++
++	for (i = 1; i <= MAX_RT_DEVICES; i++) {
++		rtdev = rtdev_get_by_index(i);
++		if (rtdev == NULL)
++			continue;
++
++		err = mutex_lock_interruptible(&rtdev->nrt_lock);
++		if (err < 0) {
++			rtdev_dereference(rtdev);
++			return err;
++		}
++
++		if (rtdev->mac_priv != NULL) {
++			struct rtmac_priv *rtmac;
++
++			rtmac = (struct rtmac_priv *)rtdev->mac_priv;
++			xnvfile_printf(it, "%-15s %s\n", rtdev->name,
++				       rtmac->vnic->name);
++		}
++
++		mutex_unlock(&rtdev->nrt_lock);
++		rtdev_dereference(rtdev);
++	}
++
++	return 0;
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++int __init rtmac_vnic_module_init(void)
++{
++	rtskb_queue_init(&rx_queue);
++
++	rtdm_nrtsig_init(&vnic_signal, rtmac_vnic_signal_handler, NULL);
++
++	return 0;
++}
++
++void rtmac_vnic_module_cleanup(void)
++{
++	struct rtskb *rtskb;
++
++	rtdm_nrtsig_destroy(&vnic_signal);
++
++	while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) {
++		kfree_rtskb(rtskb);
++	}
++}
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_ioctl.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_ioctl.c	2021-04-07 16:01:26.965634541 +0800
+@@ -0,0 +1,663 @@
++/***
++ *
++ *  rtmac/tdma/tdma_ioctl.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/uaccess.h>
++#include <asm/div64.h>
++
++#include <tdma_chrdev.h>
++#include <rtmac/rtmac_vnic.h>
++#include <rtmac/tdma/tdma.h>
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++static int tdma_ioctl_master(struct rtnet_device *rtdev,
++			     struct tdma_config *cfg)
++{
++	struct tdma_priv *tdma;
++	u64 cycle_ms;
++	unsigned int table_size;
++	int ret;
++
++	if (rtdev->mac_priv == NULL) {
++		ret = rtmac_disc_attach(rtdev, &tdma_disc);
++		if (ret < 0)
++			return ret;
++	}
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC) {
++		/* note: we don't clean up an unknown discipline */
++		return -ENOTTY;
++	}
++
++	if (test_bit(TDMA_FLAG_ATTACHED, &tdma->flags)) {
++		/* already attached */
++		return -EBUSY;
++	}
++
++	set_bit(TDMA_FLAG_MASTER, &tdma->flags);
++
++	tdma->cal_rounds = cfg->args.master.cal_rounds;
++
++	/* search at least 3 cycle periods for other masters */
++	cycle_ms = cfg->args.master.cycle_period;
++	do_div(cycle_ms, 1000000);
++	if (cycle_ms == 0)
++		cycle_ms = 1;
++	msleep(3 * cycle_ms);
++
++	if (rtskb_module_pool_init(&tdma->cal_rtskb_pool,
++				   cfg->args.master.max_cal_requests) !=
++	    cfg->args.master.max_cal_requests) {
++		ret = -ENOMEM;
++		goto err_out;
++	}
++
++	table_size = sizeof(struct tdma_slot *) *
++		     ((cfg->args.master.max_slot_id >= 1) ?
++			      cfg->args.master.max_slot_id + 1 :
++			      2);
++
++	tdma->slot_table = (struct tdma_slot **)kmalloc(table_size, GFP_KERNEL);
++	if (!tdma->slot_table) {
++		ret = -ENOMEM;
++		goto err_out;
++	}
++	tdma->max_slot_id = cfg->args.master.max_slot_id;
++	memset(tdma->slot_table, 0, table_size);
++
++	tdma->cycle_period = cfg->args.master.cycle_period;
++	tdma->sync_job.ref_count = 0;
++	INIT_LIST_HEAD(&tdma->sync_job.entry);
++
++	if (cfg->args.master.backup_sync_offset == 0)
++		tdma->sync_job.id = XMIT_SYNC;
++	else {
++		set_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags);
++		tdma->sync_job.id = BACKUP_SYNC;
++		tdma->backup_sync_inc = cfg->args.master.backup_sync_offset +
++					tdma->cycle_period;
++	}
++
++	/* did we detect another active master? */
++	if (test_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags)) {
++		/* become a slave, we need to calibrate first */
++		tdma->sync_job.id = WAIT_ON_SYNC;
++	} else {
++		if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags))
++			printk("TDMA: warning, no primary master detected!\n");
++		set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags);
++		tdma->current_cycle_start = rtdm_clock_read();
++	}
++
++	tdma->first_job = tdma->current_job = &tdma->sync_job;
++
++	rtdm_event_signal(&tdma->worker_wakeup);
++
++	set_bit(TDMA_FLAG_ATTACHED, &tdma->flags);
++
++	return 0;
++
++err_out:
++	rtmac_disc_detach(rtdev);
++	return ret;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++
++static int tdma_ioctl_slave(struct rtnet_device *rtdev, struct tdma_config *cfg)
++{
++	struct tdma_priv *tdma;
++	unsigned int table_size;
++	int ret;
++
++	if (rtdev->mac_priv == NULL) {
++		ret = rtmac_disc_attach(rtdev, &tdma_disc);
++		if (ret < 0)
++			return ret;
++	}
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC) {
++		/* note: we don't clean up an unknown discipline */
++		return -ENOTTY;
++	}
++
++	if (test_bit(TDMA_FLAG_ATTACHED, &tdma->flags)) {
++		/* already attached */
++		return -EBUSY;
++	}
++
++	tdma->cal_rounds = cfg->args.slave.cal_rounds;
++	if (tdma->cal_rounds == 0)
++		set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags);
++
++	table_size = sizeof(struct tdma_slot *) *
++		     ((cfg->args.slave.max_slot_id >= 1) ?
++			      cfg->args.slave.max_slot_id + 1 :
++			      2);
++
++	tdma->slot_table = (struct tdma_slot **)kmalloc(table_size, GFP_KERNEL);
++	if (!tdma->slot_table) {
++		ret = -ENOMEM;
++		goto err_out;
++	}
++	tdma->max_slot_id = cfg->args.slave.max_slot_id;
++	memset(tdma->slot_table, 0, table_size);
++
++	tdma->sync_job.id = WAIT_ON_SYNC;
++	tdma->sync_job.ref_count = 0;
++	INIT_LIST_HEAD(&tdma->sync_job.entry);
++
++	tdma->first_job = tdma->current_job = &tdma->sync_job;
++
++	rtdm_event_signal(&tdma->worker_wakeup);
++
++	set_bit(TDMA_FLAG_ATTACHED, &tdma->flags);
++
++	return 0;
++
++err_out:
++	rtmac_disc_detach(rtdev);
++	return ret;
++}
++
++static int tdma_ioctl_cal_result_size(struct rtnet_device *rtdev,
++				      struct tdma_config *cfg)
++{
++	struct tdma_priv *tdma;
++
++	if (rtdev->mac_priv == NULL)
++		return -ENOTTY;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC)
++		return -ENOTTY;
++
++	if (!test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags))
++		return tdma->cal_rounds;
++	else
++		return 0;
++}
++
++int start_calibration(struct rt_proc_call *call)
++{
++	struct tdma_request_cal *req_cal;
++	struct tdma_priv *tdma;
++	rtdm_lockctx_t context;
++
++	req_cal = rtpc_get_priv(call, struct tdma_request_cal);
++	tdma = req_cal->tdma;
++
++	/* there are no slots yet, simply add this job after first_job */
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++	tdma->calibration_call = call;
++	tdma->job_list_revision++;
++	list_add(&req_cal->head.entry, &tdma->first_job->entry);
++	rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	return -CALL_PENDING;
++}
++
++void copyback_calibration(struct rt_proc_call *call, void *priv_data)
++{
++	struct tdma_request_cal *req_cal;
++	struct tdma_priv *tdma;
++	int i;
++	u64 value;
++	u64 average = 0;
++	u64 min = 0x7FFFFFFFFFFFFFFFLL;
++	u64 max = 0;
++
++	req_cal = rtpc_get_priv(call, struct tdma_request_cal);
++	tdma = req_cal->tdma;
++
++	for (i = 0; i < tdma->cal_rounds; i++) {
++		value = req_cal->result_buffer[i];
++		average += value;
++		if (value < min)
++			min = value;
++		if (value > max)
++			max = value;
++		if ((req_cal->cal_results) &&
++		    (copy_to_user(&req_cal->cal_results[i], &value,
++				  sizeof(value)) != 0))
++			rtpc_set_result(call, -EFAULT);
++	}
++	do_div(average, tdma->cal_rounds);
++	tdma->master_packet_delay_ns = average;
++
++	average += 500;
++	do_div(average, 1000);
++	min += 500;
++	do_div(min, 1000);
++	max += 500;
++	do_div(max, 1000);
++	printk("TDMA: calibrated master-to-slave packet delay: "
++	       "%ld us (min/max: %ld/%ld us)\n",
++	       (unsigned long)average, (unsigned long)min, (unsigned long)max);
++}
++
++void cleanup_calibration(void *priv_data)
++{
++	struct tdma_request_cal *req_cal;
++
++	req_cal = (struct tdma_request_cal *)priv_data;
++	kfree(req_cal->result_buffer);
++}
++
++static int tdma_ioctl_set_slot(struct rtnet_device *rtdev,
++			       struct tdma_config *cfg)
++{
++	struct tdma_priv *tdma;
++	int id;
++	int jnt_id;
++	struct tdma_slot *slot, *old_slot;
++	struct tdma_job *job, *prev_job;
++	struct tdma_request_cal req_cal;
++	struct rtskb *rtskb;
++	unsigned int job_list_revision;
++	rtdm_lockctx_t context;
++	int ret;
++
++	if (rtdev->mac_priv == NULL)
++		return -ENOTTY;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC)
++		return -ENOTTY;
++
++	id = cfg->args.set_slot.id;
++	if (id > tdma->max_slot_id)
++		return -EINVAL;
++
++	if (cfg->args.set_slot.size == 0)
++		cfg->args.set_slot.size = rtdev->mtu;
++	else if (cfg->args.set_slot.size > rtdev->mtu)
++		return -EINVAL;
++
++	jnt_id = cfg->args.set_slot.joint_slot;
++	if ((jnt_id >= 0) &&
++	    ((jnt_id >= tdma->max_slot_id) || (tdma->slot_table[jnt_id] == 0) ||
++	     (tdma->slot_table[jnt_id]->mtu != cfg->args.set_slot.size)))
++		return -EINVAL;
++
++	slot = (struct tdma_slot *)kmalloc(sizeof(struct tdma_slot),
++					   GFP_KERNEL);
++	if (!slot)
++		return -ENOMEM;
++
++	if (!test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags)) {
++		req_cal.head.id = XMIT_REQ_CAL;
++		req_cal.head.ref_count = 0;
++		req_cal.tdma = tdma;
++		req_cal.offset = cfg->args.set_slot.offset;
++		req_cal.period = cfg->args.set_slot.period;
++		req_cal.phasing = cfg->args.set_slot.phasing;
++		req_cal.cal_rounds = tdma->cal_rounds;
++		req_cal.cal_results = cfg->args.set_slot.cal_results;
++
++		req_cal.result_buffer =
++			kmalloc(req_cal.cal_rounds * sizeof(u64), GFP_KERNEL);
++		if (!req_cal.result_buffer) {
++			kfree(slot);
++			return -ENOMEM;
++		}
++
++		ret = rtpc_dispatch_call(start_calibration, 0, &req_cal,
++					 sizeof(req_cal), copyback_calibration,
++					 cleanup_calibration);
++		if (ret < 0) {
++			/* kick out any pending calibration job before returning */
++			rtdm_lock_get_irqsave(&tdma->lock, context);
++
++			job = list_entry(tdma->first_job->entry.next,
++					 struct tdma_job, entry);
++			if (job != tdma->first_job) {
++				__list_del(job->entry.prev, job->entry.next);
++
++				while (job->ref_count > 0) {
++					rtdm_lock_put_irqrestore(&tdma->lock,
++								 context);
++					msleep(100);
++					rtdm_lock_get_irqsave(&tdma->lock,
++							      context);
++				}
++			}
++
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++			kfree(slot);
++			return ret;
++		}
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++		if (test_bit(TDMA_FLAG_MASTER, &tdma->flags)) {
++			u32 cycle_no = (volatile u32)tdma->current_cycle;
++			u64 cycle_ms;
++
++			/* switch back to [backup] master mode */
++			if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags))
++				tdma->sync_job.id = BACKUP_SYNC;
++			else
++				tdma->sync_job.id = XMIT_SYNC;
++
++			/* wait 2 cycle periods for the mode switch */
++			cycle_ms = tdma->cycle_period;
++			do_div(cycle_ms, 1000000);
++			if (cycle_ms == 0)
++				cycle_ms = 1;
++			msleep(2 * cycle_ms);
++
++			/* catch the very unlikely case that the current master died
++               while we just switched the mode */
++			if (cycle_no == (volatile u32)tdma->current_cycle) {
++				kfree(slot);
++				return -ETIME;
++			}
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++
++		set_bit(TDMA_FLAG_CALIBRATED, &tdma->flags);
++	}
++
++	slot->head.id = id;
++	slot->head.ref_count = 0;
++	slot->period = cfg->args.set_slot.period;
++	slot->phasing = cfg->args.set_slot.phasing;
++	slot->mtu = cfg->args.set_slot.size;
++	slot->size = cfg->args.set_slot.size + rtdev->hard_header_len;
++	slot->offset = cfg->args.set_slot.offset;
++	slot->queue = &slot->local_queue;
++	rtskb_prio_queue_init(&slot->local_queue);
++
++	if (jnt_id >= 0) /* all other validation tests performed above */
++		slot->queue = tdma->slot_table[jnt_id]->queue;
++
++	old_slot = tdma->slot_table[id];
++	if ((id == DEFAULT_NRT_SLOT) &&
++	    (old_slot == tdma->slot_table[DEFAULT_SLOT]))
++		old_slot = NULL;
++
++restart:
++	job_list_revision = tdma->job_list_revision;
++
++	if (!old_slot) {
++		job = tdma->first_job;
++		while (1) {
++			prev_job = job;
++			job = list_entry(job->entry.next, struct tdma_job,
++					 entry);
++			if (((job->id >= 0) &&
++			     ((slot->offset < SLOT_JOB(job)->offset) ||
++			      ((slot->offset == SLOT_JOB(job)->offset) &&
++			       (slot->head.id <= SLOT_JOB(job)->head.id)))) ||
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++			    ((job->id == XMIT_RPL_CAL) &&
++			     (slot->offset <
++			      REPLY_CAL_JOB(job)->reply_offset)) ||
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++			    (job == tdma->first_job))
++				break;
++		}
++
++	} else
++		prev_job = list_entry(old_slot->head.entry.prev,
++				      struct tdma_job, entry);
++
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++
++	if (job_list_revision != tdma->job_list_revision) {
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++		msleep(100);
++		goto restart;
++	}
++
++	if (old_slot)
++		__list_del(old_slot->head.entry.prev,
++			   old_slot->head.entry.next);
++
++	list_add(&slot->head.entry, &prev_job->entry);
++	tdma->slot_table[id] = slot;
++	if ((id == DEFAULT_SLOT) &&
++	    (tdma->slot_table[DEFAULT_NRT_SLOT] == old_slot))
++		tdma->slot_table[DEFAULT_NRT_SLOT] = slot;
++
++	if (old_slot) {
++		while (old_slot->head.ref_count > 0) {
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++			msleep(100);
++			rtdm_lock_get_irqsave(&tdma->lock, context);
++		}
++
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++		/* search for other slots linked to the old one */
++		for (jnt_id = 0; jnt_id < tdma->max_slot_id; jnt_id++)
++			if ((tdma->slot_table[jnt_id] != 0) &&
++			    (tdma->slot_table[jnt_id]->queue ==
++			     &old_slot->local_queue)) {
++				/* found a joint slot, move or detach it now */
++				rtdm_lock_get_irqsave(&tdma->lock, context);
++
++				while (tdma->slot_table[jnt_id]->head.ref_count >
++				       0) {
++					rtdm_lock_put_irqrestore(&tdma->lock,
++								 context);
++					msleep(100);
++					rtdm_lock_get_irqsave(&tdma->lock,
++							      context);
++				}
++
++				/* If the new slot size is larger, detach the other slot,
++                 * update it otherwise. */
++				if (slot->mtu > tdma->slot_table[jnt_id]->mtu)
++					tdma->slot_table[jnt_id]->queue =
++						&tdma->slot_table[jnt_id]
++							 ->local_queue;
++				else {
++					tdma->slot_table[jnt_id]->mtu =
++						slot->mtu;
++					tdma->slot_table[jnt_id]->queue =
++						slot->queue;
++				}
++
++				rtdm_lock_put_irqrestore(&tdma->lock, context);
++			}
++	} else
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	rtmac_vnic_set_max_mtu(rtdev, cfg->args.set_slot.size);
++
++	if (old_slot) {
++		/* avoid that the formerly joint queue gets purged */
++		old_slot->queue = &old_slot->local_queue;
++
++		/* Without any reference to the old job and no joint slots we can
++         * safely purge its queue without lock protection.
++         * NOTE: Reconfiguring a slot during runtime may lead to packet
++         *       drops! */
++		while ((rtskb = __rtskb_prio_dequeue(old_slot->queue)))
++			kfree_rtskb(rtskb);
++
++		kfree(old_slot);
++	}
++
++	return 0;
++}
++
++int tdma_cleanup_slot(struct tdma_priv *tdma, struct tdma_slot *slot)
++{
++	struct rtskb *rtskb;
++	unsigned int id, jnt_id;
++	rtdm_lockctx_t context;
++
++	if (!slot)
++		return -EINVAL;
++
++	id = slot->head.id;
++
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++
++	__list_del(slot->head.entry.prev, slot->head.entry.next);
++
++	if (id == DEFAULT_NRT_SLOT)
++		tdma->slot_table[DEFAULT_NRT_SLOT] =
++			tdma->slot_table[DEFAULT_SLOT];
++	else {
++		if ((id == DEFAULT_SLOT) &&
++		    (tdma->slot_table[DEFAULT_NRT_SLOT] == slot))
++			tdma->slot_table[DEFAULT_NRT_SLOT] = NULL;
++		tdma->slot_table[id] = NULL;
++	}
++
++	while (slot->head.ref_count > 0) {
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++		msleep(100);
++		rtdm_lock_get_irqsave(&tdma->lock, context);
++	}
++
++	rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	/* search for other slots linked to this one */
++	for (jnt_id = 0; jnt_id < tdma->max_slot_id; jnt_id++)
++		if ((tdma->slot_table[jnt_id] != 0) &&
++		    (tdma->slot_table[jnt_id]->queue == &slot->local_queue)) {
++			/* found a joint slot, detach it now under lock protection */
++			rtdm_lock_get_irqsave(&tdma->lock, context);
++
++			while (tdma->slot_table[jnt_id]->head.ref_count > 0) {
++				rtdm_lock_put_irqrestore(&tdma->lock, context);
++				msleep(100);
++				rtdm_lock_get_irqsave(&tdma->lock, context);
++			}
++			tdma->slot_table[jnt_id]->queue =
++				&tdma->slot_table[jnt_id]->local_queue;
++
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++		}
++
++	/* avoid that the formerly joint queue gets purged */
++	slot->queue = &slot->local_queue;
++
++	/* No need to protect the queue access here -
++     * no one is referring to this job anymore
++     * (ref_count == 0, all joint slots detached). */
++	while ((rtskb = __rtskb_prio_dequeue(slot->queue)))
++		kfree_rtskb(rtskb);
++
++	kfree(slot);
++
++	return 0;
++}
++
++static int tdma_ioctl_remove_slot(struct rtnet_device *rtdev,
++				  struct tdma_config *cfg)
++{
++	struct tdma_priv *tdma;
++	int id;
++
++	if (rtdev->mac_priv == NULL)
++		return -ENOTTY;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC)
++		return -ENOTTY;
++
++	id = cfg->args.remove_slot.id;
++	if (id > tdma->max_slot_id)
++		return -EINVAL;
++
++	if ((id == DEFAULT_NRT_SLOT) && (tdma->slot_table[DEFAULT_NRT_SLOT] ==
++					 tdma->slot_table[DEFAULT_SLOT]))
++		return -EINVAL;
++
++	return tdma_cleanup_slot(tdma, tdma->slot_table[id]);
++}
++
++static int tdma_ioctl_detach(struct rtnet_device *rtdev)
++{
++	struct tdma_priv *tdma;
++	int ret;
++
++	if (rtdev->mac_priv == NULL)
++		return -ENOTTY;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++	if (tdma->magic != TDMA_MAGIC)
++		return -ENOTTY;
++
++	ret = rtmac_disc_detach(rtdev);
++
++	return ret;
++}
++
++int tdma_ioctl(struct rtnet_device *rtdev, unsigned int request,
++	       unsigned long arg)
++{
++	struct tdma_config cfg;
++	int ret;
++
++	ret = copy_from_user(&cfg, (void *)arg, sizeof(cfg));
++	if (ret != 0)
++		return -EFAULT;
++
++	if (mutex_lock_interruptible(&rtdev->nrt_lock))
++		return -ERESTARTSYS;
++
++	switch (request) {
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	case TDMA_IOC_MASTER:
++		ret = tdma_ioctl_master(rtdev, &cfg);
++		break;
++#endif
++	case TDMA_IOC_SLAVE:
++		ret = tdma_ioctl_slave(rtdev, &cfg);
++		break;
++
++	case TDMA_IOC_CAL_RESULT_SIZE:
++		ret = tdma_ioctl_cal_result_size(rtdev, &cfg);
++		break;
++
++	case TDMA_IOC_SET_SLOT:
++		ret = tdma_ioctl_set_slot(rtdev, &cfg);
++		break;
++
++	case TDMA_IOC_REMOVE_SLOT:
++		ret = tdma_ioctl_remove_slot(rtdev, &cfg);
++		break;
++
++	case TDMA_IOC_DETACH:
++		ret = tdma_ioctl_detach(rtdev);
++		break;
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	mutex_unlock(&rtdev->nrt_lock);
++
++	return ret;
++}
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/Makefile	2021-04-07 16:01:26.960634548 +0800
+@@ -0,0 +1,10 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_TDMA) += tdma.o
++
++tdma-y := \
++	tdma_dev.o \
++	tdma_ioctl.o \
++	tdma_module.o \
++	tdma_proto.o \
++	tdma_worker.o
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_worker.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_worker.c	2021-04-07 16:01:26.956634553 +0800
+@@ -0,0 +1,231 @@
++/***
++ *
++ *  rtmac/tdma/tdma_worker.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/tdma/tdma_proto.h>
++
++static void do_slot_job(struct tdma_priv *tdma, struct tdma_slot *job,
++			rtdm_lockctx_t lockctx)
++{
++	struct rtskb *rtskb;
++
++	if ((job->period != 1) &&
++	    (tdma->current_cycle % job->period != job->phasing))
++		return;
++
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	/* wait for slot begin, then send one pending packet */
++	rtdm_task_sleep_abs(tdma->current_cycle_start + SLOT_JOB(job)->offset,
++			    RTDM_TIMERMODE_REALTIME);
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++	rtskb = __rtskb_prio_dequeue(SLOT_JOB(job)->queue);
++	if (!rtskb)
++		return;
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	rtmac_xmit(rtskb);
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++}
++
++static void do_xmit_sync_job(struct tdma_priv *tdma, rtdm_lockctx_t lockctx)
++{
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	/* wait for beginning of next cycle, then send sync */
++	rtdm_task_sleep_abs(tdma->current_cycle_start + tdma->cycle_period,
++			    RTDM_TIMERMODE_REALTIME);
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++	tdma->current_cycle++;
++	tdma->current_cycle_start += tdma->cycle_period;
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	tdma_xmit_sync_frame(tdma);
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++}
++
++static void do_backup_sync_job(struct tdma_priv *tdma, rtdm_lockctx_t lockctx)
++{
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	/* wait for backup slot */
++	rtdm_task_sleep_abs(tdma->current_cycle_start + tdma->backup_sync_inc,
++			    RTDM_TIMERMODE_REALTIME);
++
++	/* take over sync transmission if all earlier masters failed */
++	if (!test_and_clear_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags)) {
++		rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++		tdma->current_cycle++;
++		tdma->current_cycle_start += tdma->cycle_period;
++		rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++		tdma_xmit_sync_frame(tdma);
++
++		set_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags);
++	} else
++		clear_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags);
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++}
++
++static struct tdma_job *do_request_cal_job(struct tdma_priv *tdma,
++					   struct tdma_request_cal *job,
++					   rtdm_lockctx_t lockctx)
++{
++	struct rt_proc_call *call;
++	struct tdma_job *prev_job;
++	int err;
++
++	if ((job->period != 1) &&
++	    (tdma->current_cycle % job->period != job->phasing))
++		return &job->head;
++
++	/* remove job until we get a reply */
++	__list_del(job->head.entry.prev, job->head.entry.next);
++	job->head.ref_count--;
++	prev_job = tdma->current_job =
++		list_entry(job->head.entry.prev, struct tdma_job, entry);
++	prev_job->ref_count++;
++	tdma->job_list_revision++;
++
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	rtdm_task_sleep_abs(tdma->current_cycle_start + job->offset,
++			    RTDM_TIMERMODE_REALTIME);
++	err = tdma_xmit_request_cal_frame(
++		tdma, tdma->current_cycle + job->period, job->offset);
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++
++	/* terminate call on error */
++	if (err < 0) {
++		call = tdma->calibration_call;
++		tdma->calibration_call = NULL;
++
++		if (call) {
++			rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++			rtpc_complete_call(call, err);
++			rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++		}
++	}
++
++	return prev_job;
++}
++
++static struct tdma_job *do_reply_cal_job(struct tdma_priv *tdma,
++					 struct tdma_reply_cal *job,
++					 rtdm_lockctx_t lockctx)
++{
++	struct tdma_job *prev_job;
++
++	if (job->reply_cycle > tdma->current_cycle)
++		return &job->head;
++
++	/* remove the job */
++	__list_del(job->head.entry.prev, job->head.entry.next);
++	job->head.ref_count--;
++	prev_job = tdma->current_job =
++		list_entry(job->head.entry.prev, struct tdma_job, entry);
++	prev_job->ref_count++;
++	tdma->job_list_revision++;
++
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++
++	if (job->reply_cycle == tdma->current_cycle) {
++		/* send reply in the assigned slot */
++		rtdm_task_sleep_abs(tdma->current_cycle_start +
++					    job->reply_offset,
++				    RTDM_TIMERMODE_REALTIME);
++		rtmac_xmit(job->reply_rtskb);
++	} else {
++		/* cleanup if cycle already passed */
++		kfree_rtskb(job->reply_rtskb);
++	}
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++
++	return prev_job;
++}
++
++void tdma_worker(void *arg)
++{
++	struct tdma_priv *tdma = arg;
++	struct tdma_job *job;
++	rtdm_lockctx_t lockctx;
++	int ret;
++
++	ret = rtdm_event_wait(&tdma->worker_wakeup);
++	if (ret)
++		return;
++
++	rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++
++	job = tdma->first_job;
++
++	while (!rtdm_task_should_stop()) {
++		job->ref_count++;
++		switch (job->id) {
++		case WAIT_ON_SYNC:
++			rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++			ret = rtdm_event_wait(&tdma->sync_event);
++			if (ret)
++				return;
++			rtdm_lock_get_irqsave(&tdma->lock, lockctx);
++			break;
++
++		case XMIT_REQ_CAL:
++			job = do_request_cal_job(tdma, REQUEST_CAL_JOB(job),
++						 lockctx);
++			break;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++		case XMIT_SYNC:
++			do_xmit_sync_job(tdma, lockctx);
++			break;
++
++		case BACKUP_SYNC:
++			do_backup_sync_job(tdma, lockctx);
++			break;
++
++		case XMIT_RPL_CAL:
++			job = do_reply_cal_job(tdma, REPLY_CAL_JOB(job),
++					       lockctx);
++			break;
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++
++		default:
++			do_slot_job(tdma, SLOT_JOB(job), lockctx);
++			break;
++		}
++		job->ref_count--;
++
++		job = tdma->current_job =
++			list_entry(job->entry.next, struct tdma_job, entry);
++	}
++
++	rtdm_lock_put_irqrestore(&tdma->lock, lockctx);
++}
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/Kconfig	2021-04-07 16:01:26.951634561 +0800
+@@ -0,0 +1,21 @@
++config XENO_DRIVERS_NET_TDMA
++    tristate "TDMA discipline for RTmac"
++    depends on XENO_DRIVERS_NET_RTMAC
++    default y
++    ---help---
++    The Time Division Multiple Access discipline is the default RTmac
++    protocol for Ethernet networks. It consists of a master synchronising
++    the access of the slaves to the media by periodically issuing frames.
++    Backup masters can be set up to take over if the primary master fails.
++    TDMA also provides a global clock across all participants. The tdmacfg
++    tool can be used to configure a real-time NIC to use TDMA.
++
++    See Documenatation/README.rtmac for further details.
++
++config XENO_DRIVERS_NET_TDMA_MASTER
++    bool "TDMA master support"
++    depends on XENO_DRIVERS_NET_TDMA
++    default y
++    ---help---
++    Enables TDMA master and backup master support for the node. This can
++    be switched of to reduce the memory footprint of pure slave nodes.
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_module.c	2021-04-07 16:01:26.946634568 +0800
+@@ -0,0 +1,317 @@
++/***
++ *
++ *  rtmac/tdma/tdma_module.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <asm/div64.h>
++#include <linux/delay.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++
++#include <rtdm/driver.h>
++#include <rtmac/rtmac_vnic.h>
++#include <rtmac/tdma/tdma.h>
++#include <rtmac/tdma/tdma_dev.h>
++#include <rtmac/tdma/tdma_ioctl.h>
++#include <rtmac/tdma/tdma_proto.h>
++#include <rtmac/tdma/tdma_worker.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int tdma_proc_read(struct xnvfile_regular_iterator *it, void *data)
++{
++	int d, err = 0;
++	struct rtnet_device *rtdev;
++	struct tdma_priv *tdma;
++	const char *state;
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	u64 cycle;
++#endif
++
++	xnvfile_printf(it, "Interface       API Device      Operation Mode  "
++			   "Cycle   State\n");
++
++	for (d = 1; d <= MAX_RT_DEVICES; d++) {
++		rtdev = rtdev_get_by_index(d);
++		if (!rtdev)
++			continue;
++
++		err = mutex_lock_interruptible(&rtdev->nrt_lock);
++		if (err < 0) {
++			rtdev_dereference(rtdev);
++			break;
++		}
++
++		if (!rtdev->mac_priv)
++			goto unlock_dev;
++		tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++
++		xnvfile_printf(it, "%-15s %-15s ", rtdev->name,
++			       tdma->api_device.name);
++
++		if (test_bit(TDMA_FLAG_CALIBRATED, &tdma->flags)) {
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++			if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags) &&
++			    !test_bit(TDMA_FLAG_BACKUP_ACTIVE, &tdma->flags))
++				state = "stand-by";
++			else
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++				state = "active";
++		} else
++			state = "init";
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++		if (test_bit(TDMA_FLAG_MASTER, &tdma->flags)) {
++			cycle = tdma->cycle_period + 500;
++			do_div(cycle, 1000);
++			if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags))
++				xnvfile_printf(it, "Backup Master   %-7ld %s\n",
++					       (unsigned long)cycle, state);
++			else
++				xnvfile_printf(it, "Master          %-7ld %s\n",
++					       (unsigned long)cycle, state);
++		} else
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++			xnvfile_printf(it, "Slave           -       %s\n",
++				       state);
++
++	unlock_dev:
++		mutex_unlock(&rtdev->nrt_lock);
++		rtdev_dereference(rtdev);
++	}
++
++	return err;
++}
++
++int tdma_slots_proc_read(struct xnvfile_regular_iterator *it, void *data)
++{
++	int d, i, err = 0;
++	struct rtnet_device *rtdev;
++	struct tdma_priv *tdma;
++	struct tdma_slot *slot;
++	int jnt_id;
++	u64 slot_offset;
++
++	xnvfile_printf(it, "Interface       "
++			   "Slots (id[->joint]:offset:phasing/period:size)\n");
++
++	for (d = 1; d <= MAX_RT_DEVICES; d++) {
++		rtdev = rtdev_get_by_index(d);
++		if (!rtdev)
++			continue;
++
++		err = mutex_lock_interruptible(&rtdev->nrt_lock);
++		if (err < 0) {
++			rtdev_dereference(rtdev);
++			break;
++		}
++
++		if (!rtdev->mac_priv)
++			goto unlock_dev;
++		tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++
++		xnvfile_printf(it, "%-15s ", rtdev->name);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++		if (test_bit(TDMA_FLAG_BACKUP_MASTER, &tdma->flags)) {
++			slot_offset = tdma->backup_sync_inc -
++				      tdma->cycle_period + 500;
++			do_div(slot_offset, 1000);
++			xnvfile_printf(it, "bak:%ld  ",
++				       (unsigned long)slot_offset);
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_TDMA_MASTER */
++
++		if (tdma->slot_table)
++			for (i = 0; i <= tdma->max_slot_id; i++) {
++				slot = tdma->slot_table[i];
++				if (!slot ||
++				    ((i == DEFAULT_NRT_SLOT) &&
++				     (tdma->slot_table[DEFAULT_SLOT] == slot)))
++					continue;
++
++				if (slot->queue == &slot->local_queue) {
++					xnvfile_printf(it, "%d", i);
++				} else
++					for (jnt_id = 0;
++					     jnt_id <= tdma->max_slot_id;
++					     jnt_id++)
++						if (&tdma->slot_table[jnt_id]
++							     ->local_queue ==
++						    slot->queue) {
++							xnvfile_printf(it,
++								       "%d->%d",
++								       i,
++								       jnt_id);
++							break;
++						}
++
++				slot_offset = slot->offset + 500;
++				do_div(slot_offset, 1000);
++				xnvfile_printf(it, ":%ld:%d/%d:%d  ",
++					       (unsigned long)slot_offset,
++					       slot->phasing + 1, slot->period,
++					       slot->mtu);
++			}
++
++		xnvfile_printf(it, "\n");
++
++	unlock_dev:
++		mutex_unlock(&rtdev->nrt_lock);
++		rtdev_dereference(rtdev);
++	}
++
++	return err;
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++int tdma_attach(struct rtnet_device *rtdev, void *priv)
++{
++	struct tdma_priv *tdma = (struct tdma_priv *)priv;
++	int ret;
++
++	memset(tdma, 0, sizeof(struct tdma_priv));
++
++	tdma->magic = TDMA_MAGIC;
++	tdma->rtdev = rtdev;
++
++	rtdm_lock_init(&tdma->lock);
++
++	rtdm_event_init(&tdma->worker_wakeup, 0);
++	rtdm_event_init(&tdma->xmit_event, 0);
++	rtdm_event_init(&tdma->sync_event, 0);
++
++	ret = tdma_dev_init(rtdev, tdma);
++	if (ret < 0)
++		goto err_out1;
++
++	ret = rtdm_task_init(&tdma->worker_task, "rtnet-tdma", tdma_worker,
++			     tdma, DEF_WORKER_PRIO, 0);
++	if (ret != 0)
++		goto err_out2;
++
++	return 0;
++
++err_out2:
++	tdma_dev_release(tdma);
++
++err_out1:
++	rtdm_event_destroy(&tdma->sync_event);
++	rtdm_event_destroy(&tdma->xmit_event);
++	rtdm_event_destroy(&tdma->worker_wakeup);
++
++	return ret;
++}
++
++int tdma_detach(struct rtnet_device *rtdev, void *priv)
++{
++	struct tdma_priv *tdma = (struct tdma_priv *)priv;
++	struct tdma_job *job, *tmp;
++
++	rtdm_event_destroy(&tdma->sync_event);
++	rtdm_event_destroy(&tdma->xmit_event);
++	rtdm_event_destroy(&tdma->worker_wakeup);
++
++	tdma_dev_release(tdma);
++
++	rtdm_task_destroy(&tdma->worker_task);
++
++	list_for_each_entry_safe (job, tmp, &tdma->first_job->entry, entry) {
++		if (job->id >= 0)
++			tdma_cleanup_slot(tdma, SLOT_JOB(job));
++		else if (job->id == XMIT_RPL_CAL) {
++			__list_del(job->entry.prev, job->entry.next);
++			kfree_rtskb(REPLY_CAL_JOB(job)->reply_rtskb);
++		}
++	}
++
++	if (tdma->slot_table)
++		kfree(tdma->slot_table);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	if (test_bit(TDMA_FLAG_MASTER, &tdma->flags))
++		rtskb_pool_release(&tdma->cal_rtskb_pool);
++#endif
++
++	return 0;
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++struct rtmac_proc_entry tdma_proc_entries[] = {
++	{ name: "tdma", handler: tdma_proc_read },
++	{ name: "tdma_slots", handler: tdma_slots_proc_read },
++};
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++struct rtmac_disc tdma_disc = {
++	name: "TDMA",
++	priv_size: sizeof(struct tdma_priv),
++	disc_type: __constant_htons(RTMAC_TYPE_TDMA),
++
++	packet_rx: tdma_packet_rx,
++	rt_packet_tx: tdma_rt_packet_tx,
++	nrt_packet_tx: tdma_nrt_packet_tx,
++
++	get_mtu: tdma_get_mtu,
++
++	vnic_xmit: RTMAC_DEFAULT_VNIC,
++
++	attach: tdma_attach,
++	detach: tdma_detach,
++
++	ioctls: {
++		service_name: "RTmac/TDMA",
++		ioctl_type: RTNET_IOC_TYPE_RTMAC_TDMA,
++		handler: tdma_ioctl
++	},
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	proc_entries: tdma_proc_entries,
++	nr_proc_entries: ARRAY_SIZE(tdma_proc_entries),
++#endif /* CONFIG_XENO_OPT_VFILE */
++};
++
++int __init tdma_init(void)
++{
++	int ret;
++
++	printk("RTmac/TDMA: init time division multiple access control "
++	       "mechanism\n");
++
++	ret = rtmac_disc_register(&tdma_disc);
++	if (ret < 0)
++		return ret;
++
++	return 0;
++}
++
++void tdma_release(void)
++{
++	rtmac_disc_deregister(&tdma_disc);
++
++	printk("RTmac/TDMA: unloaded\n");
++}
++
++module_init(tdma_init);
++module_exit(tdma_release);
++
++MODULE_AUTHOR("Jan Kiszka");
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_proto.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_proto.c	2021-04-07 16:01:26.942634574 +0800
+@@ -0,0 +1,407 @@
++/***
++ *
++ *  rtmac/tdma/tdma_proto.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/init.h>
++#include "asm/div64.h"
++
++#include <rtdev.h>
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/tdma/tdma_proto.h>
++
++void tdma_xmit_sync_frame(struct tdma_priv *tdma)
++{
++	struct rtnet_device *rtdev = tdma->rtdev;
++	struct rtskb *rtskb;
++	struct tdma_frm_sync *sync;
++
++	rtskb = alloc_rtskb(rtdev->hard_header_len + sizeof(struct rtmac_hdr) +
++				    sizeof(struct tdma_frm_sync) + 15,
++			    &global_pool);
++	if (!rtskb)
++		goto err_out;
++
++	rtskb_reserve(rtskb,
++		      (rtdev->hard_header_len + sizeof(struct rtmac_hdr) + 15) &
++			      ~15);
++
++	sync = (struct tdma_frm_sync *)rtskb_put(rtskb,
++						 sizeof(struct tdma_frm_sync));
++
++	if (rtmac_add_header(rtdev, rtdev->broadcast, rtskb, RTMAC_TYPE_TDMA,
++			     0) < 0) {
++		kfree_rtskb(rtskb);
++		goto err_out;
++	}
++
++	sync->head.version = __constant_htons(TDMA_FRM_VERSION);
++	sync->head.id = __constant_htons(TDMA_FRM_SYNC);
++
++	sync->cycle_no = htonl(tdma->current_cycle);
++	sync->xmit_stamp = tdma->clock_offset;
++	sync->sched_xmit_stamp =
++		cpu_to_be64(tdma->clock_offset + tdma->current_cycle_start);
++
++	rtskb->xmit_stamp = &sync->xmit_stamp;
++
++	rtmac_xmit(rtskb);
++
++	/* signal local waiters */
++	rtdm_event_pulse(&tdma->sync_event);
++
++	return;
++
++err_out:
++	/*ERROR*/ rtdm_printk("TDMA: Failed to transmit sync frame!\n");
++	return;
++}
++
++int tdma_xmit_request_cal_frame(struct tdma_priv *tdma, u32 reply_cycle,
++				u64 reply_slot_offset)
++{
++	struct rtnet_device *rtdev = tdma->rtdev;
++	struct rtskb *rtskb;
++	struct tdma_frm_req_cal *req_cal;
++	int ret;
++
++	rtskb = alloc_rtskb(rtdev->hard_header_len + sizeof(struct rtmac_hdr) +
++				    sizeof(struct tdma_frm_req_cal) + 15,
++			    &global_pool);
++	ret = -ENOMEM;
++	if (!rtskb)
++		goto err_out;
++
++	rtskb_reserve(rtskb,
++		      (rtdev->hard_header_len + sizeof(struct rtmac_hdr) + 15) &
++			      ~15);
++
++	req_cal = (struct tdma_frm_req_cal *)rtskb_put(
++		rtskb, sizeof(struct tdma_frm_req_cal));
++
++	if ((ret = rtmac_add_header(rtdev, tdma->master_hw_addr, rtskb,
++				    RTMAC_TYPE_TDMA, 0)) < 0) {
++		kfree_rtskb(rtskb);
++		goto err_out;
++	}
++
++	req_cal->head.version = __constant_htons(TDMA_FRM_VERSION);
++	req_cal->head.id = __constant_htons(TDMA_FRM_REQ_CAL);
++
++	req_cal->xmit_stamp = 0;
++	req_cal->reply_cycle = htonl(reply_cycle);
++	req_cal->reply_slot_offset = cpu_to_be64(reply_slot_offset);
++
++	rtskb->xmit_stamp = &req_cal->xmit_stamp;
++
++	ret = rtmac_xmit(rtskb);
++	if (ret < 0)
++		goto err_out;
++
++	return 0;
++
++err_out:
++	/*ERROR*/ rtdm_printk("TDMA: Failed to transmit request calibration "
++			      "frame!\n");
++	return ret;
++}
++
++int tdma_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev)
++{
++	struct tdma_priv *tdma;
++	rtdm_lockctx_t context;
++	struct tdma_slot *slot;
++	int ret = 0;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++
++	rtcap_mark_rtmac_enqueue(rtskb);
++
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++
++	slot = tdma->slot_table[(rtskb->priority & RTSKB_CHANNEL_MASK) >>
++				RTSKB_CHANNEL_SHIFT];
++
++	if (unlikely(!slot)) {
++		ret = -EAGAIN;
++		goto err_out;
++	}
++
++	if (unlikely(rtskb->len > slot->size)) {
++		ret = -EMSGSIZE;
++		goto err_out;
++	}
++
++	__rtskb_prio_queue_tail(slot->queue, rtskb);
++
++err_out:
++	rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	return ret;
++}
++
++int tdma_nrt_packet_tx(struct rtskb *rtskb)
++{
++	struct tdma_priv *tdma;
++	rtdm_lockctx_t context;
++	struct tdma_slot *slot;
++	int ret = 0;
++
++	tdma = (struct tdma_priv *)rtskb->rtdev->mac_priv->disc_priv;
++
++	rtcap_mark_rtmac_enqueue(rtskb);
++
++	rtskb->priority = RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO, DEFAULT_NRT_SLOT);
++
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++
++	slot = tdma->slot_table[DEFAULT_NRT_SLOT];
++
++	if (unlikely(!slot)) {
++		ret = -EAGAIN;
++		goto err_out;
++	}
++
++	if (unlikely(rtskb->len > slot->size)) {
++		ret = -EMSGSIZE;
++		goto err_out;
++	}
++
++	__rtskb_prio_queue_tail(slot->queue, rtskb);
++
++err_out:
++	rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	return ret;
++}
++
++int tdma_packet_rx(struct rtskb *rtskb)
++{
++	struct tdma_priv *tdma;
++	struct tdma_frm_head *head;
++	u64 delay;
++	u64 cycle_start;
++	nanosecs_rel_t clock_offset;
++	struct rt_proc_call *call;
++	struct tdma_request_cal *req_cal_job;
++	rtdm_lockctx_t context;
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	struct rtskb *reply_rtskb;
++	struct rtnet_device *rtdev;
++	struct tdma_frm_rpl_cal *rpl_cal_frm;
++	struct tdma_reply_cal *rpl_cal_job;
++	struct tdma_job *job;
++#endif
++
++	tdma = (struct tdma_priv *)rtskb->rtdev->mac_priv->disc_priv;
++
++	head = (struct tdma_frm_head *)rtskb->data;
++
++	if (head->version != __constant_htons(TDMA_FRM_VERSION))
++		goto kfree_out;
++
++	switch (head->id) {
++	case __constant_htons(TDMA_FRM_SYNC):
++		rtskb_pull(rtskb, sizeof(struct tdma_frm_sync));
++
++		/* see "Time Arithmetics" in the TDMA specification */
++		clock_offset = be64_to_cpu(SYNC_FRM(head)->xmit_stamp) +
++			       tdma->master_packet_delay_ns;
++		clock_offset -= rtskb->time_stamp;
++
++		cycle_start = be64_to_cpu(SYNC_FRM(head)->sched_xmit_stamp) -
++			      clock_offset;
++
++		rtdm_lock_get_irqsave(&tdma->lock, context);
++		tdma->current_cycle = ntohl(SYNC_FRM(head)->cycle_no);
++		tdma->current_cycle_start = cycle_start;
++		tdma->clock_offset = clock_offset;
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++		/* note: Ethernet-specific! */
++		memcpy(tdma->master_hw_addr, rtskb->mac.ethernet->h_source,
++		       ETH_ALEN);
++
++		set_bit(TDMA_FLAG_RECEIVED_SYNC, &tdma->flags);
++
++		rtdm_event_pulse(&tdma->sync_event);
++		break;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	case __constant_htons(TDMA_FRM_REQ_CAL):
++		RTNET_ASSERT(test_bit(TDMA_FLAG_MASTER, &tdma->flags) &&
++				     test_bit(TDMA_FLAG_CALIBRATED,
++					      &tdma->flags),
++			     break;);
++
++		rtskb_pull(rtskb, sizeof(struct tdma_frm_req_cal));
++
++		rtdev = rtskb->rtdev;
++
++		reply_rtskb = alloc_rtskb(
++			rtdev->hard_header_len + sizeof(struct rtmac_hdr) +
++				sizeof(struct tdma_frm_rpl_cal) + 15,
++			&tdma->cal_rtskb_pool);
++		if (unlikely(!reply_rtskb)) {
++			/*ERROR*/ rtdm_printk(
++				"TDMA: Too many calibration requests "
++				"pending!\n");
++			break;
++		}
++
++		rtskb_reserve(reply_rtskb, (rtdev->hard_header_len +
++					    sizeof(struct rtmac_hdr) + 15) &
++						   ~15);
++
++		rpl_cal_frm = (struct tdma_frm_rpl_cal *)rtskb_put(
++			reply_rtskb, sizeof(struct tdma_frm_rpl_cal));
++
++		/* note: Ethernet-specific! */
++		if (unlikely(rtmac_add_header(
++				     rtdev, rtskb->mac.ethernet->h_source,
++				     reply_rtskb, RTMAC_TYPE_TDMA, 0) < 0)) {
++			kfree_rtskb(reply_rtskb);
++			break;
++		}
++
++		rpl_cal_frm->head.version = __constant_htons(TDMA_FRM_VERSION);
++		rpl_cal_frm->head.id = __constant_htons(TDMA_FRM_RPL_CAL);
++
++		rpl_cal_frm->request_xmit_stamp = REQ_CAL_FRM(head)->xmit_stamp;
++		rpl_cal_frm->reception_stamp = cpu_to_be64(rtskb->time_stamp);
++		rpl_cal_frm->xmit_stamp = 0;
++
++		reply_rtskb->xmit_stamp = &rpl_cal_frm->xmit_stamp;
++
++		/* use reply_rtskb memory behind the frame as job buffer */
++		rpl_cal_job = (struct tdma_reply_cal *)reply_rtskb->tail;
++		RTNET_ASSERT(reply_rtskb->tail +
++					     sizeof(struct tdma_reply_cal) <=
++				     reply_rtskb->buf_end,
++			     rtskb_over_panic(reply_rtskb,
++					      sizeof(struct tdma_reply_cal),
++					      current_text_addr()););
++
++		rpl_cal_job->head.id = XMIT_RPL_CAL;
++		rpl_cal_job->head.ref_count = 0;
++		rpl_cal_job->reply_cycle =
++			ntohl(REQ_CAL_FRM(head)->reply_cycle);
++		rpl_cal_job->reply_rtskb = reply_rtskb;
++		rpl_cal_job->reply_offset =
++			be64_to_cpu(REQ_CAL_FRM(head)->reply_slot_offset);
++
++		rtdm_lock_get_irqsave(&tdma->lock, context);
++
++		job = tdma->current_job;
++		while (1) {
++			job = list_entry(job->entry.prev, struct tdma_job,
++					 entry);
++			if ((job == tdma->first_job) ||
++			    ((job->id >= 0) && (SLOT_JOB(job)->offset <
++						rpl_cal_job->reply_offset)) ||
++			    ((job->id == XMIT_RPL_CAL) &&
++			     (REPLY_CAL_JOB(job)->reply_offset <
++			      rpl_cal_job->reply_offset)))
++				break;
++		}
++		list_add(&rpl_cal_job->head.entry, &job->entry);
++		tdma->job_list_revision++;
++
++		rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++		break;
++#endif
++
++	case __constant_htons(TDMA_FRM_RPL_CAL):
++		rtskb_pull(rtskb, sizeof(struct tdma_frm_rpl_cal));
++
++		/* see "Time Arithmetics" in the TDMA specification */
++		delay = (rtskb->time_stamp -
++			 be64_to_cpu(RPL_CAL_FRM(head)->request_xmit_stamp)) -
++			(be64_to_cpu(RPL_CAL_FRM(head)->xmit_stamp) -
++			 be64_to_cpu(RPL_CAL_FRM(head)->reception_stamp));
++		delay = (delay + 1) >> 1;
++
++		rtdm_lock_get_irqsave(&tdma->lock, context);
++
++		call = tdma->calibration_call;
++		if (call == NULL) {
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++			break;
++		}
++		req_cal_job = rtpc_get_priv(call, struct tdma_request_cal);
++
++		req_cal_job->result_buffer[--req_cal_job->cal_rounds] = delay;
++
++		if (req_cal_job->cal_rounds > 0) {
++			tdma->job_list_revision++;
++			list_add(&req_cal_job->head.entry,
++				 &tdma->first_job->entry);
++
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++		} else {
++			tdma->calibration_call = NULL;
++
++			rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++			rtpc_complete_call(call, 0);
++		}
++
++		break;
++
++	default:
++		/*ERROR*/ rtdm_printk("TDMA: Unknown frame %d!\n",
++				      ntohs(head->id));
++	}
++
++kfree_out:
++	kfree_rtskb(rtskb);
++	return 0;
++}
++
++unsigned int tdma_get_mtu(struct rtnet_device *rtdev, unsigned int priority)
++{
++	struct tdma_priv *tdma;
++	rtdm_lockctx_t context;
++	struct tdma_slot *slot;
++	unsigned int mtu;
++
++	tdma = (struct tdma_priv *)rtdev->mac_priv->disc_priv;
++
++	rtdm_lock_get_irqsave(&tdma->lock, context);
++
++	slot = tdma->slot_table[(priority & RTSKB_CHANNEL_MASK) >>
++				RTSKB_CHANNEL_SHIFT];
++
++	if (unlikely(!slot)) {
++		mtu = rtdev->mtu;
++		goto out;
++	}
++
++	mtu = slot->mtu;
++
++out:
++	rtdm_lock_put_irqrestore(&tdma->lock, context);
++
++	return mtu;
++}
+--- linux/drivers/xenomai/net/stack/rtmac/tdma/tdma_dev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/tdma/tdma_dev.c	2021-04-07 16:01:26.937634581 +0800
+@@ -0,0 +1,186 @@
++/***
++ *
++ *  rtmac/tdma/tdma_dev.c
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>
++ *                2003-2006 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/list.h>
++
++#include <rtdev.h>
++#include <rtmac.h>
++#include <rtmac/tdma/tdma.h>
++
++struct tdma_dev_ctx {
++	rtdm_task_t *cycle_waiter;
++};
++
++static int tdma_dev_open(struct rtdm_fd *fd, int oflags)
++{
++	struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd);
++
++	ctx->cycle_waiter = NULL;
++
++	return 0;
++}
++
++static void tdma_dev_close(struct rtdm_fd *fd)
++{
++	struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd);
++	rtdm_lockctx_t lock_ctx;
++
++	cobalt_atomic_enter(lock_ctx);
++	if (ctx->cycle_waiter)
++		rtdm_task_unblock(ctx->cycle_waiter);
++	cobalt_atomic_leave(lock_ctx);
++}
++
++static int wait_on_sync(struct tdma_dev_ctx *tdma_ctx, rtdm_event_t *sync_event)
++{
++	rtdm_lockctx_t lock_ctx;
++	int ret;
++
++	cobalt_atomic_enter(lock_ctx);
++	/* keep it simple: only one waiter per device instance allowed */
++	if (!tdma_ctx->cycle_waiter) {
++		tdma_ctx->cycle_waiter = rtdm_task_current();
++		ret = rtdm_event_wait(sync_event);
++		tdma_ctx->cycle_waiter = NULL;
++	} else
++		ret = -EBUSY;
++	cobalt_atomic_leave(lock_ctx);
++
++	return ret;
++}
++
++static int tdma_dev_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	struct tdma_dev_ctx *ctx = rtdm_fd_to_private(fd);
++	struct tdma_priv *tdma;
++	rtdm_lockctx_t lock_ctx;
++	int ret;
++
++	tdma = container_of(rtdm_fd_to_context(fd)->device, struct tdma_priv,
++			    api_device);
++
++	switch (request) {
++	case RTMAC_RTIOC_TIMEOFFSET: {
++		nanosecs_rel_t offset;
++
++		rtdm_lock_get_irqsave(&tdma->lock, lock_ctx);
++		offset = tdma->clock_offset;
++		rtdm_lock_put_irqrestore(&tdma->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd)) {
++			if (!rtdm_rw_user_ok(fd, arg, sizeof(__s64)) ||
++			    rtdm_copy_to_user(fd, arg, &offset, sizeof(__s64)))
++				return -EFAULT;
++		} else
++			*(__s64 *)arg = offset;
++
++		return 0;
++	}
++	case RTMAC_RTIOC_WAITONCYCLE:
++		if (!rtdm_in_rt_context())
++			return -ENOSYS;
++
++		if ((long)arg != TDMA_WAIT_ON_SYNC)
++			return -EINVAL;
++
++		return wait_on_sync(ctx, &tdma->sync_event);
++
++	case RTMAC_RTIOC_WAITONCYCLE_EX: {
++		struct rtmac_waitinfo *waitinfo = (struct rtmac_waitinfo *)arg;
++		struct rtmac_waitinfo waitinfo_buf;
++
++#define WAITINFO_HEAD_SIZE                                                     \
++	((char *)&waitinfo_buf.cycle_no - (char *)&waitinfo_buf)
++
++		if (!rtdm_in_rt_context())
++			return -ENOSYS;
++
++		if (rtdm_fd_is_user(fd)) {
++			if (!rtdm_rw_user_ok(fd, waitinfo,
++					     sizeof(struct rtmac_waitinfo)) ||
++			    rtdm_copy_from_user(fd, &waitinfo_buf, arg,
++						WAITINFO_HEAD_SIZE))
++				return -EFAULT;
++
++			waitinfo = &waitinfo_buf;
++		}
++
++		if ((waitinfo->type != TDMA_WAIT_ON_SYNC) ||
++		    (waitinfo->size < sizeof(struct rtmac_waitinfo)))
++			return -EINVAL;
++
++		ret = wait_on_sync(ctx, &tdma->sync_event);
++		if (ret)
++			return ret;
++
++		rtdm_lock_get_irqsave(&tdma->lock, lock_ctx);
++		waitinfo->cycle_no = tdma->current_cycle;
++		waitinfo->cycle_start = tdma->current_cycle_start;
++		waitinfo->clock_offset = tdma->clock_offset;
++		rtdm_lock_put_irqrestore(&tdma->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd)) {
++			if (rtdm_copy_to_user(fd, arg, &waitinfo_buf,
++					      sizeof(struct rtmac_waitinfo)))
++				return -EFAULT;
++		}
++
++		return 0;
++	}
++	default:
++		return -ENOTTY;
++	}
++}
++
++static struct rtdm_driver tdma_driver = { .profile_info = RTDM_PROFILE_INFO(
++						  tdma, RTDM_CLASS_RTMAC,
++						  RTDM_SUBCLASS_TDMA,
++						  RTNET_RTDM_VER),
++					  .device_flags = RTDM_NAMED_DEVICE,
++					  .device_count = 1,
++					  .context_size =
++						  sizeof(struct tdma_dev_ctx),
++					  .ops = {
++						  .open = tdma_dev_open,
++						  .ioctl_rt = tdma_dev_ioctl,
++						  .ioctl_nrt = tdma_dev_ioctl,
++						  .close = tdma_dev_close,
++					  } };
++
++int tdma_dev_init(struct rtnet_device *rtdev, struct tdma_priv *tdma)
++{
++	char *pos;
++
++	strcpy(tdma->device_name, "TDMA");
++	for (pos = rtdev->name + strlen(rtdev->name) - 1;
++	     (pos >= rtdev->name) && ((*pos) >= '0') && (*pos <= '9'); pos--)
++		;
++	strncat(tdma->device_name + 4, pos + 1, IFNAMSIZ - 4);
++
++	tdma->api_driver = tdma_driver;
++	tdma->api_device.driver = &tdma->api_driver;
++	tdma->api_device.label = tdma->device_name;
++
++	return rtdm_dev_register(&tdma->api_device);
++}
+--- linux/drivers/xenomai/net/stack/rtmac/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/Kconfig	2021-04-07 16:01:26.932634588 +0800
+@@ -0,0 +1,16 @@
++menuconfig XENO_DRIVERS_NET_RTMAC
++    depends on XENO_DRIVERS_NET
++    tristate "RTmac Layer"
++    default y
++    ---help---
++    The Real-Time Media Access Control layer allows to extend the RTnet
++    stack with software-based access control mechanisms (also called
++    disciplines) for nondeterministic transport media. Disciplines can be
++    attached and detached per real-time device. RTmac also provides a
++    framework for tunnelling non-time-critical packets through real-time
++    networks by installing virtual NICs (VNIC) in the Linux domain.
++
++    See Documentation/README.rtmac for further information.
++
++source "drivers/xenomai/net/stack/rtmac/tdma/Kconfig"
++source "drivers/xenomai/net/stack/rtmac/nomac/Kconfig"
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_proc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_proc.c	2021-04-07 16:01:26.928634593 +0800
+@@ -0,0 +1,132 @@
++/***
++ *
++ *  rtmac_proc.c
++ *
++ *  rtmac - real-time networking medium access control subsystem
++ *  Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>
++ *                2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++
++#include <rtnet_internal.h>
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_vnic.h>
++#include <rtmac/rtmac_proc.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++struct xnvfile_directory rtmac_proc_root;
++
++static struct xnvfile_regular_ops rtnet_rtmac_disciplines_vfile_ops = {
++	.show = rtnet_rtmac_disciplines_show,
++};
++
++static struct xnvfile_regular rtnet_rtmac_disciplines_vfile = {
++	.ops = &rtnet_rtmac_disciplines_vfile_ops,
++};
++
++static struct xnvfile_regular_ops rtnet_rtmac_vnics_vfile_ops = {
++	.show = rtnet_rtmac_vnics_show,
++};
++
++static struct xnvfile_regular rtnet_rtmac_vnics_vfile = {
++	.ops = &rtnet_rtmac_vnics_vfile_ops,
++};
++
++static int rtnet_rtmac_disc_show(struct xnvfile_regular_iterator *it,
++				 void *data)
++{
++	struct rtmac_proc_entry *entry;
++	entry = container_of(it->vfile, struct rtmac_proc_entry, vfile);
++	return entry->handler(it, data);
++}
++
++static struct xnvfile_regular_ops rtnet_rtmac_disc_vfile_ops = {
++	.show = rtnet_rtmac_disc_show,
++};
++
++int rtmac_disc_proc_register(struct rtmac_disc *disc)
++{
++	int i, err;
++	struct rtmac_proc_entry *entry;
++
++	for (i = 0; i < disc->nr_proc_entries; i++) {
++		entry = &disc->proc_entries[i];
++
++		entry->vfile.ops = &rtnet_rtmac_disc_vfile_ops;
++		err = xnvfile_init_regular(entry->name, &entry->vfile,
++					   &rtmac_proc_root);
++		if (err < 0) {
++			while (--i >= 0)
++				xnvfile_destroy_regular(
++					&disc->proc_entries[i].vfile);
++			return err;
++		}
++	}
++
++	return 0;
++}
++
++void rtmac_disc_proc_unregister(struct rtmac_disc *disc)
++{
++	int i;
++
++	for (i = 0; i < disc->nr_proc_entries; i++)
++		xnvfile_destroy_regular(&disc->proc_entries[i].vfile);
++}
++
++int rtmac_proc_register(void)
++{
++	int err;
++
++	err = xnvfile_init_dir("rtmac", &rtmac_proc_root, &rtnet_proc_root);
++	if (err < 0)
++		goto err1;
++
++	err = xnvfile_init_regular("disciplines",
++				   &rtnet_rtmac_disciplines_vfile,
++				   &rtmac_proc_root);
++	if (err < 0)
++		goto err2;
++
++	err = xnvfile_init_regular("vnics", &rtnet_rtmac_vnics_vfile,
++				   &rtmac_proc_root);
++	if (err < 0)
++		goto err3;
++
++	return 0;
++
++err3:
++	xnvfile_destroy_regular(&rtnet_rtmac_disciplines_vfile);
++
++err2:
++	xnvfile_destroy_dir(&rtmac_proc_root);
++
++err1:
++	/*ERRMSG*/ printk("RTmac: unable to initialize /proc entries\n");
++	return err;
++}
++
++void rtmac_proc_release(void)
++{
++	xnvfile_destroy_regular(&rtnet_rtmac_vnics_vfile);
++	xnvfile_destroy_regular(&rtnet_rtmac_disciplines_vfile);
++	xnvfile_destroy_dir(&rtmac_proc_root);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_disc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_disc.c	2021-04-07 16:01:26.923634601 +0800
+@@ -0,0 +1,271 @@
++/***
++ *
++ *  rtmac_disc.c
++ *
++ *  rtmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/slab.h>
++#include <linux/netdevice.h>
++#include <linux/mutex.h>
++
++#include <rtnet_internal.h>
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_proc.h>
++#include <rtmac/rtmac_vnic.h>
++
++static DEFINE_MUTEX(disc_list_lock);
++static LIST_HEAD(disc_list);
++
++/***
++ *  rtmac_disc_attach
++ *
++ *  @rtdev       attaches a discipline to a device
++ *  @disc        discipline to attach
++ *
++ *  0            success
++ *  -EBUSY       other discipline active
++ *  -ENOMEM      could not allocate memory
++ *
++ *  Note: must be called with rtdev->nrt_lock acquired
++ */
++int rtmac_disc_attach(struct rtnet_device *rtdev, struct rtmac_disc *disc)
++{
++	int ret;
++	struct rtmac_priv *priv;
++
++	RTNET_ASSERT(rtdev != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->attach != NULL, return -EINVAL;);
++
++	if (rtdev->mac_disc) {
++		printk("RTmac: another discipline for rtdev '%s' active.\n",
++		       rtdev->name);
++		return -EBUSY;
++	}
++
++	if (rtdev->flags & IFF_LOOPBACK)
++		return -EINVAL;
++
++	if (!try_module_get(disc->owner))
++		return -EIDRM;
++
++	if (!rtdev_reference(rtdev)) {
++		ret = -EIDRM;
++		goto err_module_put;
++	}
++
++	/* alloc memory */
++	priv = kmalloc(sizeof(struct rtmac_priv) + disc->priv_size, GFP_KERNEL);
++	if (!priv) {
++		printk("RTmac: kmalloc returned NULL for rtmac!\n");
++		return -ENOMEM;
++	}
++	priv->orig_start_xmit = rtdev->start_xmit;
++
++	/* call attach function of discipline */
++	ret = disc->attach(rtdev, priv->disc_priv);
++	if (ret < 0)
++		goto err_kfree_priv;
++
++	/* now attach RTmac to device */
++	rtdev->mac_disc = disc;
++	rtdev->mac_priv = priv;
++	rtdev->start_xmit = disc->rt_packet_tx;
++	if (disc->get_mtu)
++		rtdev->get_mtu = disc->get_mtu;
++	rtdev->mac_detach = rtmac_disc_detach;
++
++	/* create the VNIC */
++	ret = rtmac_vnic_add(rtdev, disc->vnic_xmit);
++	if (ret < 0) {
++		printk("RTmac: Warning, VNIC creation failed for rtdev %s.\n",
++		       rtdev->name);
++		goto err_disc_detach;
++	}
++
++	return 0;
++
++err_disc_detach:
++	disc->detach(rtdev, priv->disc_priv);
++err_kfree_priv:
++	kfree(priv);
++	rtdev_dereference(rtdev);
++err_module_put:
++	module_put(disc->owner);
++	return ret;
++}
++
++/***
++ *  rtmac_disc_detach
++ *
++ *  @rtdev       detaches a discipline from a device
++ *
++ *  0            success
++ *  -1           discipline has no detach function
++ *  -EINVAL      called with rtdev=NULL
++ *  -ENODEV      no discipline active on dev
++ *
++ *  Note: must be called with rtdev->nrt_lock acquired
++ */
++int rtmac_disc_detach(struct rtnet_device *rtdev)
++{
++	int ret;
++	struct rtmac_disc *disc;
++	struct rtmac_priv *priv;
++
++	RTNET_ASSERT(rtdev != NULL, return -EINVAL;);
++
++	disc = rtdev->mac_disc;
++	if (!disc)
++		return -ENODEV;
++
++	RTNET_ASSERT(disc->detach != NULL, return -EINVAL;);
++
++	priv = rtdev->mac_priv;
++	RTNET_ASSERT(priv != NULL, return -EINVAL;);
++
++	ret = rtmac_vnic_unregister(rtdev);
++	if (ret < 0)
++		return ret;
++
++	/* call release function of discipline */
++	ret = disc->detach(rtdev, priv->disc_priv);
++	if (ret < 0)
++		return ret;
++
++	rtmac_vnic_cleanup(rtdev);
++
++	/* restore start_xmit and get_mtu */
++	rtdev->start_xmit = priv->orig_start_xmit;
++	rtdev->get_mtu = rt_hard_mtu;
++
++	/* remove pointers from rtdev */
++	rtdev->mac_disc = NULL;
++	rtdev->mac_priv = NULL;
++	rtdev->mac_detach = NULL;
++
++	rtdev_dereference(rtdev);
++
++	kfree(priv);
++
++	module_put(disc->owner);
++
++	return 0;
++}
++
++static struct rtmac_disc *rtmac_get_disc_by_name(const char *name)
++{
++	struct list_head *disc;
++
++	mutex_lock(&disc_list_lock);
++
++	list_for_each (disc, &disc_list) {
++		if (strcmp(((struct rtmac_disc *)disc)->name, name) == 0) {
++			mutex_unlock(&disc_list_lock);
++			return (struct rtmac_disc *)disc;
++		}
++	}
++
++	mutex_unlock(&disc_list_lock);
++
++	return NULL;
++}
++
++int __rtmac_disc_register(struct rtmac_disc *disc, struct module *module)
++{
++	int ret;
++
++	RTNET_ASSERT(disc != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->name != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->rt_packet_tx != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->nrt_packet_tx != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->attach != NULL, return -EINVAL;);
++	RTNET_ASSERT(disc->detach != NULL, return -EINVAL;);
++
++	disc->owner = module;
++
++	if (rtmac_get_disc_by_name(disc->name) != NULL) {
++		printk("RTmac: discipline '%s' already registered!\n",
++		       disc->name);
++		return -EBUSY;
++	}
++
++	ret = rtnet_register_ioctls(&disc->ioctls);
++	if (ret < 0)
++		return ret;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	ret = rtmac_disc_proc_register(disc);
++	if (ret < 0) {
++		rtnet_unregister_ioctls(&disc->ioctls);
++		return ret;
++	}
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++	mutex_lock(&disc_list_lock);
++
++	list_add(&disc->list, &disc_list);
++
++	mutex_unlock(&disc_list_lock);
++
++	return 0;
++}
++
++void rtmac_disc_deregister(struct rtmac_disc *disc)
++{
++	RTNET_ASSERT(disc != NULL, return;);
++
++	mutex_lock(&disc_list_lock);
++
++	list_del(&disc->list);
++
++	mutex_unlock(&disc_list_lock);
++
++	rtnet_unregister_ioctls(&disc->ioctls);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtmac_disc_proc_unregister(disc);
++#endif /* CONFIG_XENO_OPT_VFILE */
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int rtnet_rtmac_disciplines_show(struct xnvfile_regular_iterator *it, void *d)
++{
++	struct rtmac_disc *disc;
++	int err;
++
++	err = mutex_lock_interruptible(&disc_list_lock);
++	if (err < 0)
++		return err;
++
++	xnvfile_printf(it, "Name\t\tID\n");
++
++	list_for_each_entry (disc, &disc_list, list)
++		xnvfile_printf(it, "%-15s %04X\n", disc->name,
++			       ntohs(disc->disc_type));
++
++	mutex_unlock(&disc_list_lock);
++
++	return 0;
++}
++#endif /* CONFIG_XENO_OPT_VFILE */
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_proto.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_proto.c	2021-04-07 16:01:26.918634608 +0800
+@@ -0,0 +1,68 @@
++/***
++ *
++ *  rtmac/rtmac_proto.c
++ *
++ *  rtmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <rtdm/driver.h>
++#include <stack_mgr.h>
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/rtmac_vnic.h>
++
++int rtmac_proto_rx(struct rtskb *skb, struct rtpacket_type *pt)
++{
++	struct rtmac_disc *disc = skb->rtdev->mac_disc;
++	struct rtmac_hdr *hdr;
++
++	if (disc == NULL) {
++		goto error;
++	}
++
++	hdr = (struct rtmac_hdr *)skb->data;
++	rtskb_pull(skb, sizeof(struct rtmac_hdr));
++
++	if (hdr->ver != RTMAC_VERSION) {
++		rtdm_printk(
++			"RTmac: received unsupported RTmac protocol version on "
++			"device %s.  Got 0x%x but expected 0x%x\n",
++			skb->rtdev->name, hdr->ver, RTMAC_VERSION);
++		goto error;
++	}
++
++	if (hdr->flags & RTMAC_FLAG_TUNNEL)
++		rtmac_vnic_rx(skb, hdr->type);
++	else if (disc->disc_type == hdr->type)
++		disc->packet_rx(skb);
++	return 0;
++
++error:
++	kfree_rtskb(skb);
++	return 0;
++}
++
++struct rtpacket_type rtmac_packet_type = { .type = __constant_htons(ETH_RTMAC),
++					   .handler = rtmac_proto_rx };
++
++void rtmac_proto_release(void)
++{
++	rtdev_remove_pack(&rtmac_packet_type);
++}
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_module.c	2021-04-07 16:01:26.914634613 +0800
+@@ -0,0 +1,80 @@
++/* rtmac_module.c
++ *
++ * rtmac - real-time networking media access control subsystem
++ * Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *               2003 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++
++#include <rtdm/driver.h>
++
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_proc.h>
++#include <rtmac/rtmac_proto.h>
++#include <rtmac/rtmac_vnic.h>
++
++int __init rtmac_init(void)
++{
++	int ret = 0;
++
++	printk("RTmac: init realtime media access control\n");
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	ret = rtmac_proc_register();
++	if (ret < 0)
++		return ret;
++#endif
++
++	ret = rtmac_vnic_module_init();
++	if (ret < 0)
++		goto error1;
++
++	ret = rtmac_proto_init();
++	if (ret < 0)
++		goto error2;
++
++	return 0;
++
++error2:
++	rtmac_vnic_module_cleanup();
++
++error1:
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtmac_proc_release();
++#endif
++	return ret;
++}
++
++void rtmac_release(void)
++{
++	rtmac_proto_release();
++	rtmac_vnic_module_cleanup();
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtmac_proc_release();
++#endif
++
++	printk("RTmac: unloaded\n");
++}
++
++module_init(rtmac_init);
++module_exit(rtmac_release);
++
++MODULE_AUTHOR("Marc Kleine-Budde, Jan Kiszka");
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/net/stack/rtmac/rtmac_syms.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtmac/rtmac_syms.c	2021-04-07 16:01:26.909634620 +0800
+@@ -0,0 +1,36 @@
++/* rtmac_syms.c
++ *
++ * rtmac - real-time networking media access control subsystem
++ * Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>
++ *               2003 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++
++#include <rtmac/rtmac_disc.h>
++#include <rtmac/rtmac_vnic.h>
++
++EXPORT_SYMBOL_GPL(__rtmac_disc_register);
++EXPORT_SYMBOL_GPL(rtmac_disc_deregister);
++
++EXPORT_SYMBOL_GPL(rtmac_disc_attach);
++EXPORT_SYMBOL_GPL(rtmac_disc_detach);
++
++EXPORT_SYMBOL_GPL(rtmac_vnic_set_max_mtu);
++
++EXPORT_SYMBOL_GPL(rtmac_vnic_xmit);
+--- linux/drivers/xenomai/net/stack/rtdev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtdev.c	2021-04-07 16:01:26.905634626 +0800
+@@ -0,0 +1,940 @@
++/***
++ *
++ *  stack/rtdev.c - NIC device driver layer
++ *
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/if.h>
++#include <linux/if_arp.h> /* ARPHRD_ETHER */
++#include <linux/netdevice.h>
++#include <linux/moduleparam.h>
++
++#include <rtnet_internal.h>
++#include <rtskb.h>
++#include <ethernet/eth.h>
++#include <rtmac/rtmac_disc.h>
++#include <rtnet_port.h>
++
++static unsigned int device_rtskbs = DEFAULT_DEVICE_RTSKBS;
++module_param(device_rtskbs, uint, 0444);
++MODULE_PARM_DESC(device_rtskbs, "Number of additional global realtime socket "
++				"buffers per network adapter");
++
++struct rtnet_device *rtnet_devices[MAX_RT_DEVICES];
++static struct rtnet_device *loopback_device;
++static DEFINE_RTDM_LOCK(rtnet_devices_rt_lock);
++static LIST_HEAD(rtskb_mapped_list);
++static LIST_HEAD(rtskb_mapwait_list);
++
++LIST_HEAD(event_hook_list);
++DEFINE_MUTEX(rtnet_devices_nrt_lock);
++
++static int rtdev_locked_xmit(struct rtskb *skb, struct rtnet_device *rtdev);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)
++#define atomic_fetch_add_unless __atomic_add_unless
++#endif
++
++int rtdev_reference(struct rtnet_device *rtdev)
++{
++	smp_mb__before_atomic();
++	if (rtdev->rt_owner &&
++	    atomic_fetch_add_unless(&rtdev->refcount, 1, 0) == 0) {
++		if (!try_module_get(rtdev->rt_owner))
++			return 0;
++		if (atomic_inc_return(&rtdev->refcount) != 1)
++			module_put(rtdev->rt_owner);
++	}
++	return 1;
++}
++EXPORT_SYMBOL_GPL(rtdev_reference);
++
++struct rtskb *rtnetdev_alloc_rtskb(struct rtnet_device *rtdev,
++				   unsigned int size)
++{
++	struct rtskb *rtskb = alloc_rtskb(size, &rtdev->dev_pool);
++	if (rtskb)
++		rtskb->rtdev = rtdev;
++	return rtskb;
++}
++EXPORT_SYMBOL_GPL(rtnetdev_alloc_rtskb);
++
++/***
++ *  __rtdev_get_by_name - find a rtnet_device by its name
++ *  @name: name to find
++ *  @note: caller must hold rtnet_devices_nrt_lock
++ */
++static struct rtnet_device *__rtdev_get_by_name(const char *name)
++{
++	int i;
++	struct rtnet_device *rtdev;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtdev = rtnet_devices[i];
++		if ((rtdev != NULL) &&
++		    (strncmp(rtdev->name, name, IFNAMSIZ) == 0))
++			return rtdev;
++	}
++	return NULL;
++}
++
++/***
++ *  rtdev_get_by_name - find and lock a rtnet_device by its name
++ *  @name: name to find
++ */
++struct rtnet_device *rtdev_get_by_name(const char *name)
++{
++	struct rtnet_device *rtdev;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	rtdev = __rtdev_get_by_name(name);
++	if (rtdev != NULL && !rtdev_reference(rtdev))
++		rtdev = NULL;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	return rtdev;
++}
++
++/***
++ *  rtdev_get_by_index - find and lock a rtnet_device by its ifindex
++ *  @ifindex: index of device
++ */
++struct rtnet_device *rtdev_get_by_index(int ifindex)
++{
++	struct rtnet_device *rtdev;
++	rtdm_lockctx_t context;
++
++	if ((ifindex <= 0) || (ifindex > MAX_RT_DEVICES))
++		return NULL;
++
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	rtdev = __rtdev_get_by_index(ifindex);
++	if (rtdev != NULL && !rtdev_reference(rtdev))
++		rtdev = NULL;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	return rtdev;
++}
++
++/***
++ *  __rtdev_get_by_hwaddr - find a rtnetdevice by its mac-address
++ *  @type:          Type of the net_device (may be ARPHRD_ETHER)
++ *  @hw_addr:       MAC-Address
++ */
++static inline struct rtnet_device *__rtdev_get_by_hwaddr(unsigned short type,
++							 char *hw_addr)
++{
++	int i;
++	struct rtnet_device *rtdev;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtdev = rtnet_devices[i];
++		if ((rtdev != NULL) && (rtdev->type == type) &&
++		    (!memcmp(rtdev->dev_addr, hw_addr, rtdev->addr_len))) {
++			return rtdev;
++		}
++	}
++	return NULL;
++}
++
++/***
++ *  rtdev_get_by_hwaddr - find and lock a rtnetdevice by its mac-address
++ *  @type:          Type of the net_device (may be ARPHRD_ETHER)
++ *  @hw_addr:       MAC-Address
++ */
++struct rtnet_device *rtdev_get_by_hwaddr(unsigned short type, char *hw_addr)
++{
++	struct rtnet_device *rtdev;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	rtdev = __rtdev_get_by_hwaddr(type, hw_addr);
++	if (rtdev != NULL && !rtdev_reference(rtdev))
++		rtdev = NULL;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	return rtdev;
++}
++
++/***
++ *  rtdev_get_by_hwaddr - find and lock the loopback device if available
++ */
++struct rtnet_device *rtdev_get_loopback(void)
++{
++	struct rtnet_device *rtdev;
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	rtdev = loopback_device;
++	if (rtdev != NULL && !rtdev_reference(rtdev))
++		rtdev = NULL;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	return rtdev;
++}
++
++/***
++ *  rtdev_alloc_name - allocate a name for the rtnet_device
++ *  @rtdev:         the rtnet_device
++ *  @name_mask:     a name mask (e.g. "rteth%d" for ethernet)
++ *
++ *  This function have to be called from the driver probe function.
++ */
++void rtdev_alloc_name(struct rtnet_device *rtdev, const char *mask)
++{
++	char buf[IFNAMSIZ];
++	int i;
++	struct rtnet_device *tmp;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		snprintf(buf, IFNAMSIZ, mask, i);
++		if ((tmp = rtdev_get_by_name(buf)) == NULL) {
++			strncpy(rtdev->name, buf, IFNAMSIZ);
++			break;
++		} else
++			rtdev_dereference(tmp);
++	}
++}
++
++static int rtdev_pool_trylock(void *cookie)
++{
++	return rtdev_reference(cookie);
++}
++
++static void rtdev_pool_unlock(void *cookie)
++{
++	rtdev_dereference(cookie);
++}
++
++static const struct rtskb_pool_lock_ops rtdev_ops = {
++	.trylock = rtdev_pool_trylock,
++	.unlock = rtdev_pool_unlock,
++};
++
++int rtdev_init(struct rtnet_device *rtdev, unsigned dev_pool_size)
++{
++	int ret;
++
++	ret = rtskb_pool_init(&rtdev->dev_pool, dev_pool_size, &rtdev_ops,
++			      rtdev);
++	if (ret < dev_pool_size) {
++		printk(KERN_ERR "RTnet: cannot allocate rtnet device pool\n");
++		rtskb_pool_release(&rtdev->dev_pool);
++		return -ENOMEM;
++	}
++
++	rtdm_mutex_init(&rtdev->xmit_mutex);
++	rtdm_lock_init(&rtdev->rtdev_lock);
++	mutex_init(&rtdev->nrt_lock);
++
++	atomic_set(&rtdev->refcount, 0);
++
++	/* scale global rtskb pool */
++	rtdev->add_rtskbs = rtskb_pool_extend(&global_pool, device_rtskbs);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rtdev_init);
++
++void rtdev_destroy(struct rtnet_device *rtdev)
++{
++	rtskb_pool_release(&rtdev->dev_pool);
++	rtskb_pool_shrink(&global_pool, rtdev->add_rtskbs);
++	rtdev->stack_event = NULL;
++	rtdm_mutex_destroy(&rtdev->xmit_mutex);
++}
++EXPORT_SYMBOL_GPL(rtdev_destroy);
++
++/***
++ *  rtdev_alloc
++ *  @int sizeof_priv:
++ *
++ *  allocate memory for a new rt-network-adapter
++ */
++struct rtnet_device *rtdev_alloc(unsigned sizeof_priv, unsigned dev_pool_size)
++{
++	struct rtnet_device *rtdev;
++	unsigned alloc_size;
++	int ret;
++
++	/* ensure 32-byte alignment of the private area */
++	alloc_size = sizeof(*rtdev) + sizeof_priv + 31;
++
++	rtdev = kzalloc(alloc_size, GFP_KERNEL);
++	if (rtdev == NULL) {
++		printk(KERN_ERR "RTnet: cannot allocate rtnet device\n");
++		return NULL;
++	}
++
++	ret = rtdev_init(rtdev, dev_pool_size);
++	if (ret) {
++		kfree(rtdev);
++		return NULL;
++	}
++
++	if (sizeof_priv)
++		rtdev->priv = (void *)(((long)(rtdev + 1) + 31) & ~31);
++
++	return rtdev;
++}
++
++/***
++ *  rtdev_free
++ */
++void rtdev_free(struct rtnet_device *rtdev)
++{
++	if (rtdev != NULL) {
++		rtdev_destroy(rtdev);
++		kfree(rtdev);
++	}
++}
++EXPORT_SYMBOL_GPL(rtdev_free);
++
++static void init_etherdev(struct rtnet_device *rtdev, struct module *module)
++{
++	rtdev->hard_header = rt_eth_header;
++	rtdev->type = ARPHRD_ETHER;
++	rtdev->hard_header_len = ETH_HLEN;
++	rtdev->mtu = 1500; /* eth_mtu */
++	rtdev->addr_len = ETH_ALEN;
++	rtdev->flags = IFF_BROADCAST; /* TODO: IFF_MULTICAST; */
++	rtdev->get_mtu = rt_hard_mtu;
++	rtdev->rt_owner = module;
++
++	memset(rtdev->broadcast, 0xFF, ETH_ALEN);
++	strcpy(rtdev->name, "rteth%d");
++}
++
++/**
++ * rt_init_etherdev - sets up an ethernet device
++ * @module: module initializing the device
++ *
++ * Fill in the fields of the device structure with ethernet-generic
++ * values. This routine can be used to set up a pre-allocated device
++ * structure. The device still needs to be registered afterwards.
++ */
++int __rt_init_etherdev(struct rtnet_device *rtdev, unsigned dev_pool_size,
++		       struct module *module)
++{
++	int ret;
++
++	ret = rtdev_init(rtdev, dev_pool_size);
++	if (ret)
++		return ret;
++
++	init_etherdev(rtdev, module);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(__rt_init_etherdev);
++
++/**
++ * rt_alloc_etherdev - Allocates and sets up an ethernet device
++ * @sizeof_priv: size of additional driver-private structure to
++ *               be allocated for this ethernet device
++ * @dev_pool_size: size of the rx pool
++ * @module: module creating the device
++ *
++ * Allocates then fills in the fields of a new device structure with
++ * ethernet-generic values. Basically does everything except
++ * registering the device.
++ *
++ * A 32-byte alignment is enforced for the private data area.
++ */
++struct rtnet_device *__rt_alloc_etherdev(unsigned sizeof_priv,
++					 unsigned dev_pool_size,
++					 struct module *module)
++{
++	struct rtnet_device *rtdev;
++
++	rtdev = rtdev_alloc(sizeof_priv, dev_pool_size);
++	if (!rtdev)
++		return NULL;
++
++	init_etherdev(rtdev, module);
++
++	return rtdev;
++}
++EXPORT_SYMBOL_GPL(__rt_alloc_etherdev);
++
++static inline int __rtdev_new_index(void)
++{
++	int i;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++)
++		if (rtnet_devices[i] == NULL)
++			return i + 1;
++
++	return -ENOMEM;
++}
++
++static int rtskb_map(struct rtnet_device *rtdev, struct rtskb *skb)
++{
++	dma_addr_t addr;
++
++	addr = rtdev->map_rtskb(rtdev, skb);
++
++	if (WARN_ON(addr == RTSKB_UNMAPPED))
++		return -ENOMEM;
++
++	if (skb->buf_dma_addr != RTSKB_UNMAPPED && addr != skb->buf_dma_addr) {
++		printk("RTnet: device %s maps skb differently than others. "
++		       "Different IOMMU domain?\nThis is not supported.\n",
++		       rtdev->name);
++		return -EACCES;
++	}
++
++	skb->buf_dma_addr = addr;
++
++	return 0;
++}
++
++int rtdev_map_rtskb(struct rtskb *skb)
++{
++	struct rtnet_device *rtdev;
++	int err = 0;
++	int i;
++
++	skb->buf_dma_addr = RTSKB_UNMAPPED;
++
++	mutex_lock(&rtnet_devices_nrt_lock);
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtdev = rtnet_devices[i];
++		if (rtdev && rtdev->map_rtskb) {
++			err = rtskb_map(rtdev, skb);
++			if (err)
++				break;
++		}
++	}
++
++	if (!err) {
++		if (skb->buf_dma_addr != RTSKB_UNMAPPED)
++			list_add(&skb->entry, &rtskb_mapped_list);
++		else
++			list_add(&skb->entry, &rtskb_mapwait_list);
++	}
++
++	mutex_unlock(&rtnet_devices_nrt_lock);
++
++	return err;
++}
++
++static int rtdev_map_all_rtskbs(struct rtnet_device *rtdev)
++{
++	struct rtskb *skb, *n;
++	int err = 0;
++
++	if (!rtdev->map_rtskb)
++		return 0;
++
++	list_for_each_entry (skb, &rtskb_mapped_list, entry) {
++		err = rtskb_map(rtdev, skb);
++		if (err)
++			break;
++	}
++
++	list_for_each_entry_safe (skb, n, &rtskb_mapwait_list, entry) {
++		err = rtskb_map(rtdev, skb);
++		if (err)
++			break;
++		list_del(&skb->entry);
++		list_add(&skb->entry, &rtskb_mapped_list);
++	}
++
++	return err;
++}
++
++void rtdev_unmap_rtskb(struct rtskb *skb)
++{
++	struct rtnet_device *rtdev;
++	int i;
++
++	mutex_lock(&rtnet_devices_nrt_lock);
++
++	list_del(&skb->entry);
++
++	if (skb->buf_dma_addr != RTSKB_UNMAPPED) {
++		for (i = 0; i < MAX_RT_DEVICES; i++) {
++			rtdev = rtnet_devices[i];
++			if (rtdev && rtdev->unmap_rtskb) {
++				rtdev->unmap_rtskb(rtdev, skb);
++			}
++		}
++	}
++
++	skb->buf_dma_addr = RTSKB_UNMAPPED;
++
++	mutex_unlock(&rtnet_devices_nrt_lock);
++}
++
++static void rtdev_unmap_all_rtskbs(struct rtnet_device *rtdev)
++{
++	struct rtskb *skb;
++
++	if (!rtdev->unmap_rtskb)
++		return;
++
++	list_for_each_entry (skb, &rtskb_mapped_list, entry) {
++		rtdev->unmap_rtskb(rtdev, skb);
++	}
++}
++
++/***
++ * rt_register_rtnetdev: register a new rtnet_device (linux-like)
++ * @rtdev:               the device
++ */
++int rt_register_rtnetdev(struct rtnet_device *rtdev)
++{
++	struct list_head *entry;
++	struct rtdev_event_hook *hook;
++	rtdm_lockctx_t context;
++	int ifindex;
++	int err;
++
++	/* requires at least driver layer version 2.0 */
++	if (rtdev->vers < RTDEV_VERS_2_0)
++		return -EINVAL;
++
++	if (rtdev->features & NETIF_F_LLTX)
++		rtdev->start_xmit = rtdev->hard_start_xmit;
++	else
++		rtdev->start_xmit = rtdev_locked_xmit;
++
++	mutex_lock(&rtnet_devices_nrt_lock);
++
++	ifindex = __rtdev_new_index();
++	if (ifindex < 0) {
++		err = ifindex;
++		goto fail;
++	}
++	rtdev->ifindex = ifindex;
++
++	if (strchr(rtdev->name, '%') != NULL)
++		rtdev_alloc_name(rtdev, rtdev->name);
++
++	if (__rtdev_get_by_name(rtdev->name) != NULL) {
++		err = -EEXIST;
++		goto fail;
++	}
++
++	rtdev->sysdev =
++		device_create(rtnet_class, NULL, MKDEV(0, rtdev->ifindex),
++			      rtdev, rtdev->name);
++	if (IS_ERR(rtdev->sysdev)) {
++		err = PTR_ERR(rtdev->sysdev);
++		goto fail;
++	}
++
++	if (rtdev->sysbind) {
++		err = sysfs_create_link(&rtdev->sysdev->kobj,
++					&rtdev->sysbind->kobj, "adapter");
++		if (err)
++			goto fail_link;
++	}
++
++	err = rtdev_map_all_rtskbs(rtdev);
++	if (err)
++		goto fail_map;
++
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	if (rtdev->flags & IFF_LOOPBACK) {
++		/* allow only one loopback device */
++		if (loopback_device) {
++			rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock,
++						 context);
++			err = -EEXIST;
++			goto fail_loopback;
++		}
++		loopback_device = rtdev;
++	}
++	rtnet_devices[rtdev->ifindex - 1] = rtdev;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	list_for_each (entry, &event_hook_list) {
++		hook = list_entry(entry, struct rtdev_event_hook, entry);
++		if (hook->register_device)
++			hook->register_device(rtdev);
++	}
++
++	mutex_unlock(&rtnet_devices_nrt_lock);
++
++	/* Default state at registration is that the device is present. */
++	set_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state);
++
++	printk("RTnet: registered %s\n", rtdev->name);
++
++	return 0;
++
++fail_loopback:
++	rtdev_unmap_all_rtskbs(rtdev);
++fail_map:
++	if (rtdev->sysbind)
++		sysfs_remove_link(&rtdev->sysdev->kobj, "adapter");
++fail_link:
++	device_destroy(rtnet_class, MKDEV(0, rtdev->ifindex));
++fail:
++	mutex_unlock(&rtnet_devices_nrt_lock);
++
++	return err;
++}
++
++/***
++ * rt_unregister_rtnetdev: unregister a rtnet_device
++ * @rtdev:                 the device
++ */
++int rt_unregister_rtnetdev(struct rtnet_device *rtdev)
++{
++	struct list_head *entry;
++	struct rtdev_event_hook *hook;
++	rtdm_lockctx_t context;
++
++	RTNET_ASSERT(rtdev->ifindex != 0,
++		     printk("RTnet: device %s/%p was not registered\n",
++			    rtdev->name, rtdev);
++		     return -ENODEV;);
++
++	if (rtdev->sysbind)
++		sysfs_remove_link(&rtdev->sysdev->kobj, "adapter");
++
++	device_destroy(rtnet_class, MKDEV(0, rtdev->ifindex));
++
++	mutex_lock(&rtnet_devices_nrt_lock);
++	rtdm_lock_get_irqsave(&rtnet_devices_rt_lock, context);
++
++	RTNET_ASSERT(atomic_read(&rtdev->refcount == 0), BUG());
++	rtnet_devices[rtdev->ifindex - 1] = NULL;
++	if (rtdev->flags & IFF_LOOPBACK)
++		loopback_device = NULL;
++
++	rtdm_lock_put_irqrestore(&rtnet_devices_rt_lock, context);
++
++	list_for_each (entry, &event_hook_list) {
++		hook = list_entry(entry, struct rtdev_event_hook, entry);
++		if (hook->unregister_device)
++			hook->unregister_device(rtdev);
++	}
++
++	rtdev_unmap_all_rtskbs(rtdev);
++
++	mutex_unlock(&rtnet_devices_nrt_lock);
++
++	clear_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state);
++
++	RTNET_ASSERT(atomic_read(&rtdev->refcount) == 0,
++		     printk("RTnet: rtdev reference counter < 0!\n"););
++
++	printk("RTnet: unregistered %s\n", rtdev->name);
++
++	return 0;
++}
++
++void rtdev_add_event_hook(struct rtdev_event_hook *hook)
++{
++	mutex_lock(&rtnet_devices_nrt_lock);
++	list_add(&hook->entry, &event_hook_list);
++	mutex_unlock(&rtnet_devices_nrt_lock);
++}
++
++void rtdev_del_event_hook(struct rtdev_event_hook *hook)
++{
++	mutex_lock(&rtnet_devices_nrt_lock);
++	list_del(&hook->entry);
++	mutex_unlock(&rtnet_devices_nrt_lock);
++}
++
++int rtdev_up(struct rtnet_device *rtdev, struct rtnet_core_cmd *cmd)
++{
++	struct list_head *entry;
++	struct rtdev_event_hook *hook;
++	int ret = 0;
++
++	if (mutex_lock_interruptible(&rtdev->nrt_lock))
++		return -ERESTARTSYS;
++
++	/* We cannot change the promisc flag or the hardware address if
++	   the device is already up. */
++	if ((rtdev->flags & IFF_UP) &&
++	    (((cmd->args.up.set_dev_flags | cmd->args.up.clear_dev_flags) &
++	      IFF_PROMISC) ||
++	     (cmd->args.up.dev_addr_type != ARPHRD_VOID))) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++	if (cmd->args.up.dev_addr_type != ARPHRD_VOID &&
++	    cmd->args.up.dev_addr_type != rtdev->type) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	/* Skip upon extraneous call only after args have been checked. */
++	if (test_and_set_bit(PRIV_FLAG_UP, &rtdev->priv_flags))
++		goto out;
++
++	rtdev->flags |= cmd->args.up.set_dev_flags;
++	rtdev->flags &= ~cmd->args.up.clear_dev_flags;
++
++	if (cmd->args.up.dev_addr_type != ARPHRD_VOID)
++		memcpy(rtdev->dev_addr, cmd->args.up.dev_addr, MAX_ADDR_LEN);
++
++	ret = rtdev_open(rtdev); /* also == 0 if rtdev is already up */
++
++	if (ret == 0) {
++		mutex_lock(&rtnet_devices_nrt_lock);
++
++		list_for_each (entry, &event_hook_list) {
++			hook = list_entry(entry, struct rtdev_event_hook,
++					  entry);
++			if (hook->ifup)
++				hook->ifup(rtdev, cmd);
++		}
++
++		mutex_unlock(&rtnet_devices_nrt_lock);
++	} else
++		clear_bit(PRIV_FLAG_UP, &rtdev->priv_flags);
++out:
++	mutex_unlock(&rtdev->nrt_lock);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdev_up);
++
++int rtdev_down(struct rtnet_device *rtdev)
++{
++	struct list_head *entry;
++	struct rtdev_event_hook *hook;
++	rtdm_lockctx_t context;
++	int ret = 0;
++
++	if (mutex_lock_interruptible(&rtdev->nrt_lock))
++		return -ERESTARTSYS;
++
++	/* spin lock required for sync with routing code */
++	rtdm_lock_get_irqsave(&rtdev->rtdev_lock, context);
++
++	if (test_bit(PRIV_FLAG_ADDING_ROUTE, &rtdev->priv_flags)) {
++		ret = -EBUSY;
++		goto fail;
++	}
++
++	if (!test_and_clear_bit(PRIV_FLAG_UP, &rtdev->priv_flags))
++		goto fail;
++
++	rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context);
++
++	if (rtdev->mac_detach != NULL)
++		ret = rtdev->mac_detach(rtdev);
++
++	if (ret == 0) {
++		mutex_lock(&rtnet_devices_nrt_lock);
++
++		list_for_each (entry, &event_hook_list) {
++			hook = list_entry(entry, struct rtdev_event_hook,
++					  entry);
++			if (hook->ifdown)
++				hook->ifdown(rtdev);
++		}
++
++		mutex_unlock(&rtnet_devices_nrt_lock);
++
++		ret = rtdev_close(rtdev);
++	}
++out:
++	mutex_unlock(&rtdev->nrt_lock);
++
++	return ret;
++fail:
++	rtdm_lock_put_irqrestore(&rtdev->rtdev_lock, context);
++	goto out;
++}
++EXPORT_SYMBOL_GPL(rtdev_down);
++
++/***
++ *  rtdev_open
++ *
++ *  Prepare an interface for use.
++ */
++int rtdev_open(struct rtnet_device *rtdev)
++{
++	int ret = 0;
++
++	if (rtdev->flags & IFF_UP) /* Is it already up?                */
++		return 0;
++
++	if (!rtdev_reference(rtdev))
++		return -EIDRM;
++
++	if (rtdev->open) /* Call device private open method  */
++		ret = rtdev->open(rtdev);
++
++	if (!ret) {
++		rtdev->flags |= IFF_UP;
++		set_bit(__RTNET_LINK_STATE_START, &rtdev->link_state);
++	} else
++		rtdev_dereference(rtdev);
++
++	return ret;
++}
++
++/***
++ *  rtdev_close
++ */
++int rtdev_close(struct rtnet_device *rtdev)
++{
++	int ret = 0;
++
++	if (!(rtdev->flags & IFF_UP))
++		return 0;
++
++	if (rtdev->stop)
++		ret = rtdev->stop(rtdev);
++
++	rtdev->flags &= ~(IFF_UP | IFF_RUNNING);
++	clear_bit(__RTNET_LINK_STATE_START, &rtdev->link_state);
++
++	if (ret == 0)
++		rtdev_dereference(rtdev);
++
++	return ret;
++}
++
++static int rtdev_locked_xmit(struct rtskb *skb, struct rtnet_device *rtdev)
++{
++	int ret;
++
++	rtdm_mutex_lock(&rtdev->xmit_mutex);
++	ret = rtdev->hard_start_xmit(skb, rtdev);
++	rtdm_mutex_unlock(&rtdev->xmit_mutex);
++
++	return ret;
++}
++
++/***
++ *  rtdev_xmit - send real-time packet
++ */
++int rtdev_xmit(struct rtskb *rtskb)
++{
++	struct rtnet_device *rtdev;
++	int err;
++
++	RTNET_ASSERT(rtskb != NULL, return -EINVAL;);
++
++	rtdev = rtskb->rtdev;
++
++	if (!rtnetif_carrier_ok(rtdev)) {
++		err = -EAGAIN;
++		kfree_rtskb(rtskb);
++		return err;
++	}
++
++	if (rtskb_acquire(rtskb, &rtdev->dev_pool) != 0) {
++		err = -ENOBUFS;
++		kfree_rtskb(rtskb);
++		return err;
++	}
++
++	RTNET_ASSERT(rtdev != NULL, return -EINVAL;);
++
++	err = rtdev->start_xmit(rtskb, rtdev);
++	if (err) {
++		/* on error we must free the rtskb here */
++		kfree_rtskb(rtskb);
++
++		rtdm_printk("hard_start_xmit returned %d\n", err);
++	}
++
++	return err;
++}
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++/***
++ *      rtdev_xmit_proxy - send rtproxy packet
++ */
++int rtdev_xmit_proxy(struct rtskb *rtskb)
++{
++	struct rtnet_device *rtdev;
++	int err;
++
++	RTNET_ASSERT(rtskb != NULL, return -EINVAL;);
++
++	rtdev = rtskb->rtdev;
++
++	RTNET_ASSERT(rtdev != NULL, return -EINVAL;);
++
++	/* TODO: make these lines race-condition-safe */
++	if (rtdev->mac_disc) {
++		RTNET_ASSERT(rtdev->mac_disc->nrt_packet_tx != NULL,
++			     return -EINVAL;);
++
++		err = rtdev->mac_disc->nrt_packet_tx(rtskb);
++	} else {
++		err = rtdev->start_xmit(rtskb, rtdev);
++		if (err) {
++			/* on error we must free the rtskb here */
++			kfree_rtskb(rtskb);
++
++			rtdm_printk("hard_start_xmit returned %d\n", err);
++		}
++	}
++
++	return err;
++}
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_PROXY */
++
++unsigned int rt_hard_mtu(struct rtnet_device *rtdev, unsigned int priority)
++{
++	return rtdev->mtu;
++}
++
++EXPORT_SYMBOL_GPL(rtdev_alloc_name);
++
++EXPORT_SYMBOL_GPL(rt_register_rtnetdev);
++EXPORT_SYMBOL_GPL(rt_unregister_rtnetdev);
++
++EXPORT_SYMBOL_GPL(rtdev_add_event_hook);
++EXPORT_SYMBOL_GPL(rtdev_del_event_hook);
++
++EXPORT_SYMBOL_GPL(rtdev_get_by_name);
++EXPORT_SYMBOL_GPL(rtdev_get_by_index);
++EXPORT_SYMBOL_GPL(rtdev_get_by_hwaddr);
++EXPORT_SYMBOL_GPL(rtdev_get_loopback);
++
++EXPORT_SYMBOL_GPL(rtdev_xmit);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++EXPORT_SYMBOL_GPL(rtdev_xmit_proxy);
++#endif
++
++EXPORT_SYMBOL_GPL(rt_hard_mtu);
+--- linux/drivers/xenomai/net/stack/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/Kconfig	2021-04-07 16:01:26.900634633 +0800
+@@ -0,0 +1,41 @@
++menu "Protocol Stack"
++    depends on XENO_DRIVERS_NET
++
++comment "Stack parameters"
++
++config XENO_DRIVERS_NET_RX_FIFO_SIZE
++    int "Size of central RX-FIFO"
++    depends on XENO_DRIVERS_NET
++    default 32
++    ---help---
++    Size of FIFO between NICs and stack manager task. Must be power
++    of two! Effectively, only CONFIG_RTNET_RX_FIFO_SIZE-1 slots will
++    be usable.
++
++config XENO_DRIVERS_NET_ETH_P_ALL
++    depends on XENO_DRIVERS_NET
++    bool "Support for ETH_P_ALL"
++    ---help---
++    Enables core support for registering listeners on all layer 3
++    protocols (ETH_P_ALL). Internally this is currently realised by
++    clone-copying incoming frames for those listners, future versions
++    will implement buffer sharing for efficiency reasons. Use with
++    care, every ETH_P_ALL-listener adds noticable overhead to the
++    reception path.
++
++config XENO_DRIVERS_NET_RTWLAN
++    depends on XENO_DRIVERS_NET
++    bool "Real-Time WLAN"
++    ---help---
++    Enables core support for real-time wireless LAN. RT-WLAN is based
++    on low-level access to 802.11-compliant adapters and is currently
++    in an experimental stage.
++
++comment "Protocols"
++
++source "drivers/xenomai/net/stack/ipv4/Kconfig"
++source "drivers/xenomai/net/stack/packet/Kconfig"
++source "drivers/xenomai/net/stack/rtmac/Kconfig"
++source "drivers/xenomai/net/stack/rtcfg/Kconfig"
++
++endmenu
+--- linux/drivers/xenomai/net/stack/include/rtnet_internal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_internal.h	2021-04-07 16:01:26.895634641 +0800
+@@ -0,0 +1,75 @@
++/***
++ *
++ *  rtnet_internal.h - internal declarations
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_INTERNAL_H_
++#define __RTNET_INTERNAL_H_
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <rtdm/driver.h>
++
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++#define RTNET_ASSERT(expr, func)                                               \
++	if (!(expr)) {                                                         \
++		rtdm_printk("Assertion failed! %s:%s:%d %s\n", __FILE__,       \
++			    __FUNCTION__, __LINE__, (#expr));                  \
++		func                                                           \
++	}
++#else
++#define RTNET_ASSERT(expr, func)
++#endif /* CONFIG_XENO_DRIVERS_NET_CHECKED */
++
++/* some configurables */
++
++#define RTNET_DEF_STACK_PRIORITY                                               \
++	RTDM_TASK_HIGHEST_PRIORITY + RTDM_TASK_LOWER_PRIORITY
++/*#define RTNET_RTDEV_PRIORITY        5*/
++
++struct rtnet_device;
++
++/*struct rtnet_msg {
++    int                 msg_type;
++    struct rtnet_device *rtdev;
++};*/
++
++struct rtnet_mgr {
++	rtdm_task_t task;
++	/*    MBX     mbx;*/
++	rtdm_event_t event;
++};
++
++extern struct rtnet_mgr STACK_manager;
++extern struct rtnet_mgr RTDEV_manager;
++
++extern const char rtnet_rtdm_provider_name[];
++
++#ifdef CONFIG_XENO_OPT_VFILE
++extern struct xnvfile_directory rtnet_proc_root;
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++extern struct class *rtnet_class;
++
++#endif /* __RTNET_INTERNAL_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtnet_socket.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_socket.h	2021-04-07 16:01:26.890634648 +0800
+@@ -0,0 +1,108 @@
++/***
++ *
++ *  include/rtnet_socket.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_SOCKET_H_
++#define __RTNET_SOCKET_H_
++
++#include <asm/atomic.h>
++#include <linux/list.h>
++
++#include <rtdev.h>
++#include <rtdm/net.h>
++#include <rtdm/driver.h>
++#include <stack_mgr.h>
++
++struct rtsocket {
++	unsigned short protocol;
++
++	struct rtskb_pool skb_pool;
++	unsigned int pool_size;
++	struct mutex pool_nrt_lock;
++
++	struct rtskb_queue incoming;
++
++	rtdm_lock_t param_lock;
++
++	unsigned int priority;
++	nanosecs_rel_t timeout; /* receive timeout, 0 for infinite */
++
++	rtdm_sem_t pending_sem;
++
++	void (*callback_func)(struct rtdm_fd *, void *arg);
++	void *callback_arg;
++
++	unsigned long flags;
++
++	union {
++		/* IP specific */
++		struct {
++			u32 saddr; /* source ip-addr (bind) */
++			u32 daddr; /* destination ip-addr */
++			u16 sport; /* source port */
++			u16 dport; /* destination port */
++
++			int reg_index; /* index in port registry */
++			u8 tos;
++			u8 state;
++		} inet;
++
++		/* packet socket specific */
++		struct {
++			struct rtpacket_type packet_type;
++			int ifindex;
++		} packet;
++	} prot;
++};
++
++static inline struct rtdm_fd *rt_socket_fd(struct rtsocket *sock)
++{
++	return rtdm_private_to_fd(sock);
++}
++
++void *rtnet_get_arg(struct rtdm_fd *fd, void *tmp, const void *src, size_t len);
++
++int rtnet_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len);
++
++#define rt_socket_reference(sock) rtdm_fd_lock(rt_socket_fd(sock))
++#define rt_socket_dereference(sock) rtdm_fd_unlock(rt_socket_fd(sock))
++
++int rt_socket_init(struct rtdm_fd *fd, unsigned short protocol);
++
++void rt_socket_cleanup(struct rtdm_fd *fd);
++int rt_socket_common_ioctl(struct rtdm_fd *fd, int request, void __user *arg);
++int rt_socket_if_ioctl(struct rtdm_fd *fd, int request, void __user *arg);
++int rt_socket_select_bind(struct rtdm_fd *fd, rtdm_selector_t *selector,
++			  enum rtdm_selecttype type, unsigned fd_index);
++
++int rt_bare_socket_init(struct rtdm_fd *fd, unsigned short protocol,
++			unsigned int priority, unsigned int pool_size);
++
++static inline void rt_bare_socket_cleanup(struct rtsocket *sock)
++{
++	rtskb_pool_release(&sock->skb_pool);
++}
++
++#endif /* __RTNET_SOCKET_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac.h	2021-04-07 16:01:26.886634653 +0800
+@@ -0,0 +1,92 @@
++/***
++ *
++ *  include/rtmac.h
++ *
++ *  rtmac - real-time networking media access control subsystem
++ *  Copyright (C) 2004-2006 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ *  As a special exception to the GNU General Public license, the RTnet
++ *  project allows you to use this header file in unmodified form to produce
++ *  application programs executing in user-space which use RTnet services by
++ *  normal system calls. The resulting executable will not be covered by the
++ *  GNU General Public License merely as a result of this header file use.
++ *  Instead, this header file use will be considered normal use of RTnet and
++ *  not a "derived work" in the sense of the GNU General Public License.
++ *
++ *  This exception does not apply when the application code is built as a
++ *  static or dynamically loadable portion of the Linux kernel nor does the
++ *  exception override other reasons justifying application of the GNU General
++ *  Public License.
++ *
++ *  This exception applies only to the code released by the RTnet project
++ *  under the name RTnet and bearing this exception notice. If you copy code
++ *  from other sources into a copy of RTnet, the exception does not apply to
++ *  the code that you add in this way.
++ *
++ */
++
++#ifndef __RTMAC_H_
++#define __RTMAC_H_
++
++#include <rtdm/rtdm.h>
++
++/* sub-classes: RTDM_CLASS_RTMAC */
++#define RTDM_SUBCLASS_TDMA 0
++#define RTDM_SUBCLASS_UNMANAGED 1
++
++#define RTIOC_TYPE_RTMAC RTDM_CLASS_RTMAC
++
++/* ** Common Cycle Event Types ** */
++/* standard event, wake up once per cycle */
++#define RTMAC_WAIT_ON_DEFAULT 0x00
++/* wake up on media access of the station, may trigger multiple times per
++   cycle */
++#define RTMAC_WAIT_ON_XMIT 0x01
++
++/* ** TDMA-specific Cycle Event Types ** */
++/* tigger on on SYNC frame reception/transmission */
++#define TDMA_WAIT_ON_SYNC RTMAC_WAIT_ON_DEFAULT
++#define TDMA_WAIT_ON_SOF TDMA_WAIT_ON_SYNC /* legacy support */
++
++/* RTMAC_RTIOC_WAITONCYCLE_EX control and status data */
++struct rtmac_waitinfo {
++	/** Set to wait type before invoking the service */
++	unsigned int type;
++
++	/** Set to sizeof(struct rtmac_waitinfo) before invoking the service */
++	size_t size;
++
++	/** Counter of elementary cycles of the underlying RTmac discipline
++        (if applicable) */
++	unsigned long cycle_no;
++
++	/** Date (in local time) of the last elementary cycle start of the RTmac
++        discipline (if applicable) */
++	nanosecs_abs_t cycle_start;
++
++	/** Offset of the local clock to the global clock provided by the RTmac
++        discipline (if applicable): t_global = t_local + clock_offset */
++	nanosecs_rel_t clock_offset;
++};
++
++/* RTmac Discipline IOCTLs */
++#define RTMAC_RTIOC_TIMEOFFSET _IOR(RTIOC_TYPE_RTMAC, 0x00, int64_t)
++#define RTMAC_RTIOC_WAITONCYCLE _IOW(RTIOC_TYPE_RTMAC, 0x01, unsigned int)
++#define RTMAC_RTIOC_WAITONCYCLE_EX                                             \
++	_IOWR(RTIOC_TYPE_RTMAC, 0x02, struct rtmac_waitinfo)
++
++#endif /* __RTMAC_H_ */
+--- linux/drivers/xenomai/net/stack/include/ethernet/eth.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ethernet/eth.h	2021-04-07 16:01:26.881634661 +0800
+@@ -0,0 +1,32 @@
++/* ethernet/eth.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTNET_ETH_H_
++#define __RTNET_ETH_H_
++
++#include <rtskb.h>
++#include <rtdev.h>
++
++extern int rt_eth_header(struct rtskb *skb, struct rtnet_device *rtdev,
++			 unsigned short type, void *daddr, void *saddr,
++			 unsigned int len);
++extern unsigned short rt_eth_type_trans(struct rtskb *skb,
++					struct rtnet_device *dev);
++
++#endif /* __RTNET_ETH_H_ */
+--- linux/drivers/xenomai/net/stack/include/stack_mgr.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/stack_mgr.h	2021-04-07 16:01:26.877634666 +0800
+@@ -0,0 +1,95 @@
++/***
++ *
++ *  stack_mgr.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2002      Ulrich Marx <marx@fet.uni-hannover.de>
++ *                2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __STACK_MGR_H_
++#define __STACK_MGR_H_
++
++#ifdef __KERNEL__
++
++#include <linux/list.h>
++
++#include <rtnet_internal.h>
++#include <rtdev.h>
++
++/***
++ * network layer protocol (layer 3)
++ */
++
++#define RTPACKET_HASH_TBL_SIZE 64
++#define RTPACKET_HASH_KEY_MASK (RTPACKET_HASH_TBL_SIZE - 1)
++
++struct rtpacket_type {
++	struct list_head list_entry;
++
++	unsigned short type;
++	short refcount;
++
++	int (*handler)(struct rtskb *, struct rtpacket_type *);
++	int (*err_handler)(struct rtskb *, struct rtnet_device *,
++			   struct rtpacket_type *);
++	bool (*trylock)(struct rtpacket_type *);
++	void (*unlock)(struct rtpacket_type *);
++
++	struct module *owner;
++};
++
++int __rtdev_add_pack(struct rtpacket_type *pt, struct module *module);
++#define rtdev_add_pack(pt) __rtdev_add_pack(pt, THIS_MODULE)
++
++void rtdev_remove_pack(struct rtpacket_type *pt);
++
++static inline bool rtdev_lock_pack(struct rtpacket_type *pt)
++{
++	return try_module_get(pt->owner);
++}
++
++static inline void rtdev_unlock_pack(struct rtpacket_type *pt)
++{
++	module_put(pt->owner);
++}
++
++void rt_stack_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr);
++void rt_stack_disconnect(struct rtnet_device *rtdev);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK)
++void rt_stack_deliver(struct rtskb *rtskb);
++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */
++
++int rt_stack_mgr_init(struct rtnet_mgr *mgr);
++void rt_stack_mgr_delete(struct rtnet_mgr *mgr);
++
++void rtnetif_rx(struct rtskb *skb);
++
++static inline void rtnetif_tx(struct rtnet_device *rtdev)
++{
++}
++
++static inline void rt_mark_stack_mgr(struct rtnet_device *rtdev)
++{
++	rtdm_event_signal(rtdev->stack_event);
++}
++
++#endif /* __KERNEL__ */
++
++#endif /* __STACK_MGR_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtskb.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtskb.h	2021-04-07 16:01:26.872634674 +0800
+@@ -0,0 +1,809 @@
++/***
++ *
++ *  include/rtskb.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>,
++ *                2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTSKB_H_
++#define __RTSKB_H_
++
++#ifdef __KERNEL__
++
++#include <linux/skbuff.h>
++
++#include <rtdm/net.h>
++#include <rtnet_internal.h>
++
++/***
++
++rtskb Management - A Short Introduction
++---------------------------------------
++
++1. rtskbs (Real-Time Socket Buffers)
++
++A rtskb consists of a management structure (struct rtskb) and a fixed-sized
++(RTSKB_SIZE) data buffer. It is used to store network packets on their way from
++the API routines through the stack to the NICs or vice versa. rtskbs are
++allocated as one chunk of memory which contains both the managment structure
++and the buffer memory itself.
++
++
++2. rtskb Queues
++
++A rtskb queue is described by struct rtskb_queue. A queue can contain an
++unlimited number of rtskbs in an ordered way. A rtskb can either be added to
++the head (rtskb_queue_head()) or the tail of a queue (rtskb_queue_tail()). When
++a rtskb is removed from a queue (rtskb_dequeue()), it is always taken from the
++head. Queues are normally spin lock protected unless the __variants of the
++queuing functions are used.
++
++
++3. Prioritized rtskb Queues
++
++A prioritized queue contains a number of normal rtskb queues within an array.
++The array index of a sub-queue correspond to the priority of the rtskbs within
++this queue. For enqueuing a rtskb (rtskb_prio_queue_head()), its priority field
++is evaluated and the rtskb is then placed into the appropriate sub-queue. When
++dequeuing a rtskb, the first rtskb of the first non-empty sub-queue with the
++highest priority is returned. The current implementation supports 32 different
++priority levels, the lowest if defined by QUEUE_MIN_PRIO, the highest by
++QUEUE_MAX_PRIO.
++
++
++4. rtskb Pools
++
++As rtskbs must not be allocated by a normal memory manager during runtime,
++preallocated rtskbs are kept ready in several pools. Most packet producers
++(NICs, sockets, etc.) have their own pools in order to be independent of the
++load situation of other parts of the stack.
++
++When a pool is created (rtskb_pool_init()), the required rtskbs are allocated
++from a Linux slab cache. Pools can be extended (rtskb_pool_extend()) or
++shrinked (rtskb_pool_shrink()) during runtime. When shutting down the
++program/module, every pool has to be released (rtskb_pool_release()). All these
++commands demand to be executed within a non real-time context.
++
++Pools are organized as normal rtskb queues (struct rtskb_queue). When a rtskb
++is allocated (alloc_rtskb()), it is actually dequeued from the pool's queue.
++When freeing a rtskb (kfree_rtskb()), the rtskb is enqueued to its owning pool.
++rtskbs can be exchanged between pools (rtskb_acquire()). In this case, the
++passed rtskb switches over to from its owning pool to a given pool, but only if
++this pool can pass an empty rtskb from its own queue back.
++
++
++5. rtskb Chains
++
++To ease the defragmentation of larger IP packets, several rtskbs can form a
++chain. For these purposes, the first rtskb (and only the first!) provides a
++pointer to the last rtskb in the chain. When enqueuing the first rtskb of a
++chain, the whole chain is automatically placed into the destined queue. But,
++to dequeue a complete chain specialized calls are required (postfix: _chain).
++While chains also get freed en bloc (kfree_rtskb()) when passing the first
++rtskbs, it is not possible to allocate a chain from a pool (alloc_rtskb()); a
++newly allocated rtskb is always reset to a "single rtskb chain". Furthermore,
++the acquisition of complete chains is NOT supported (rtskb_acquire()).
++
++
++6. Capturing Support (Optional)
++
++When incoming or outgoing packets are captured, the assigned rtskb needs to be
++shared between the stack, the driver, and the capturing service. In contrast to
++many other network stacks, RTnet does not create a new rtskb head and
++re-references the payload. Instead, additional fields at the end of the rtskb
++structure are use for sharing a rtskb with a capturing service. If the sharing
++bit (RTSKB_CAP_SHARED) in cap_flags is set, the rtskb will not be returned to
++the owning pool upon the call of kfree_rtskb. Instead this bit will be reset,
++and a compensation rtskb stored in cap_comp_skb will be returned to the owning
++pool. cap_start and cap_len can be used to mirror the dimension of the full
++packet. This is required because the data and len fields will be modified while
++walking through the stack. cap_next allows to add a rtskb to a separate queue
++which is independent of any queue described in 2.
++
++Certain setup tasks for capturing packets can not become part of a capturing
++module, they have to be embedded into the stack. For this purpose, several
++inline functions are provided. rtcap_mark_incoming() is used to save the packet
++dimension right before it is modifed by the stack. rtcap_report_incoming()
++calls the capturing handler, if present, in order to let it process the
++received rtskb (e.g. allocate compensation rtskb, mark original rtskb as
++shared, and enqueue it).
++
++Outgoing rtskb have to be captured by adding a hook function to the chain of
++hard_start_xmit functions of a device. To measure the delay caused by RTmac
++between the request and the actual transmission, a time stamp can be taken using
++rtcap_mark_rtmac_enqueue(). This function is typically called by RTmac
++disciplines when they add a rtskb to their internal transmission queue. In such
++a case, the RTSKB_CAP_RTMAC_STAMP bit is set in cap_flags to indicate that the
++cap_rtmac_stamp field now contains valid data.
++
++ ***/
++
++#ifndef CHECKSUM_PARTIAL
++#define CHECKSUM_PARTIAL CHECKSUM_HW
++#endif
++
++#define RTSKB_CAP_SHARED 1 /* rtskb shared between stack and RTcap */
++#define RTSKB_CAP_RTMAC_STAMP 2 /* cap_rtmac_stamp is valid             */
++
++#define RTSKB_UNMAPPED 0
++
++struct rtskb_queue;
++struct rtsocket;
++struct rtnet_device;
++
++/***
++ *  rtskb - realtime socket buffer
++ */
++struct rtskb {
++	struct rtskb *next; /* used for queuing rtskbs */
++	struct rtskb *chain_end; /* marks the end of a rtskb chain starting
++				       with this very rtskb */
++
++	struct rtskb_pool *pool; /* owning pool */
++
++	unsigned int priority; /* bit 0..15: prio, 16..31: user-defined */
++
++	struct rtsocket *sk; /* assigned socket */
++	struct rtnet_device *rtdev; /* source or destination device */
++
++	nanosecs_abs_t time_stamp; /* arrival or transmission (RTcap) time */
++
++	/* patch address of the transmission time stamp, can be NULL
++     * calculation: *xmit_stamp = cpu_to_be64(time_in_ns + *xmit_stamp)
++     */
++	nanosecs_abs_t *xmit_stamp;
++
++	/* transport layer */
++	union {
++		struct tcphdr *th;
++		struct udphdr *uh;
++		struct icmphdr *icmph;
++		struct iphdr *ipihdr;
++		unsigned char *raw;
++	} h;
++
++	/* network layer */
++	union {
++		struct iphdr *iph;
++		struct arphdr *arph;
++		unsigned char *raw;
++	} nh;
++
++	/* link layer */
++	union {
++		struct ethhdr *ethernet;
++		unsigned char *raw;
++	} mac;
++
++	unsigned short protocol;
++	unsigned char pkt_type;
++
++	unsigned char ip_summed;
++	unsigned int csum;
++
++	unsigned char *data;
++	unsigned char *tail;
++	unsigned char *end;
++	unsigned int len;
++
++	dma_addr_t buf_dma_addr;
++
++	unsigned char *buf_start;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++	unsigned char *buf_end;
++#endif
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++	int cap_flags; /* see RTSKB_CAP_xxx                    */
++	struct rtskb *cap_comp_skb; /* compensation rtskb                */
++	struct rtskb *cap_next; /* used for capture queue               */
++	unsigned char *cap_start; /* start offset for capturing           */
++	unsigned int cap_len; /* capture length of this rtskb         */
++	nanosecs_abs_t cap_rtmac_stamp; /* RTmac enqueuing time            */
++#endif
++
++	struct list_head entry; /* for global rtskb list */
++};
++
++struct rtskb_queue {
++	struct rtskb *first;
++	struct rtskb *last;
++	rtdm_lock_t lock;
++};
++
++struct rtskb_pool_lock_ops {
++	int (*trylock)(void *cookie);
++	void (*unlock)(void *cookie);
++};
++
++struct rtskb_pool {
++	struct rtskb_queue queue;
++	const struct rtskb_pool_lock_ops *lock_ops;
++	void *lock_cookie;
++};
++
++#define QUEUE_MAX_PRIO 0
++#define QUEUE_MIN_PRIO 31
++
++struct rtskb_prio_queue {
++	rtdm_lock_t lock;
++	unsigned long usage; /* bit array encoding non-empty sub-queues */
++	struct rtskb_queue queue[QUEUE_MIN_PRIO + 1];
++};
++
++#define RTSKB_PRIO_MASK 0x0000FFFF /* bits  0..15: xmit prio    */
++#define RTSKB_CHANNEL_MASK 0xFFFF0000 /* bits 16..31: xmit channel */
++#define RTSKB_CHANNEL_SHIFT 16
++
++#define RTSKB_DEF_RT_CHANNEL SOCK_DEF_RT_CHANNEL
++#define RTSKB_DEF_NRT_CHANNEL SOCK_DEF_NRT_CHANNEL
++#define RTSKB_USER_CHANNEL SOCK_USER_CHANNEL
++
++/* Note: always keep SOCK_XMIT_PARAMS consistent with definitions above! */
++#define RTSKB_PRIO_VALUE SOCK_XMIT_PARAMS
++
++/* default values for the module parameter */
++#define DEFAULT_GLOBAL_RTSKBS 0 /* default number of rtskb's in global pool */
++#define DEFAULT_DEVICE_RTSKBS                                                  \
++	16 /* default additional rtskbs per network adapter */
++#define DEFAULT_SOCKET_RTSKBS 16 /* default number of rtskb's in socket pools */
++
++#define ALIGN_RTSKB_STRUCT_LEN SKB_DATA_ALIGN(sizeof(struct rtskb))
++#define RTSKB_SIZE 1544 /* maximum needed by pcnet32-rt */
++
++extern unsigned int rtskb_pools; /* current number of rtskb pools      */
++extern unsigned int rtskb_pools_max; /* maximum number of rtskb pools      */
++extern unsigned int rtskb_amount; /* current number of allocated rtskbs */
++extern unsigned int rtskb_amount_max; /* maximum number of allocated rtskbs */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_CHECKED
++extern void rtskb_over_panic(struct rtskb *skb, int len, void *here);
++extern void rtskb_under_panic(struct rtskb *skb, int len, void *here);
++#endif
++
++extern struct rtskb *rtskb_pool_dequeue(struct rtskb_pool *pool);
++
++extern void rtskb_pool_queue_tail(struct rtskb_pool *pool, struct rtskb *skb);
++
++extern struct rtskb *alloc_rtskb(unsigned int size, struct rtskb_pool *pool);
++
++extern void kfree_rtskb(struct rtskb *skb);
++#define dev_kfree_rtskb(a) kfree_rtskb(a)
++
++static inline void rtskb_tx_timestamp(struct rtskb *skb)
++{
++	nanosecs_abs_t *ts = skb->xmit_stamp;
++
++	if (!ts)
++		return;
++
++	*ts = cpu_to_be64(rtdm_clock_read() + *ts);
++}
++
++/***
++ *  rtskb_queue_init - initialize the queue
++ *  @queue
++ */
++static inline void rtskb_queue_init(struct rtskb_queue *queue)
++{
++	rtdm_lock_init(&queue->lock);
++	queue->first = NULL;
++	queue->last = NULL;
++}
++
++/***
++ *  rtskb_prio_queue_init - initialize the prioritized queue
++ *  @prioqueue
++ */
++static inline void rtskb_prio_queue_init(struct rtskb_prio_queue *prioqueue)
++{
++	memset(prioqueue, 0, sizeof(struct rtskb_prio_queue));
++	rtdm_lock_init(&prioqueue->lock);
++}
++
++/***
++ *  rtskb_queue_empty
++ *  @queue
++ */
++static inline int rtskb_queue_empty(struct rtskb_queue *queue)
++{
++	return (queue->first == NULL);
++}
++
++/***
++ *  rtskb__prio_queue_empty
++ *  @queue
++ */
++static inline int rtskb_prio_queue_empty(struct rtskb_prio_queue *prioqueue)
++{
++	return (prioqueue->usage == 0);
++}
++
++/***
++ *  __rtskb_queue_head - insert a buffer at the queue head (w/o locks)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void __rtskb_queue_head(struct rtskb_queue *queue,
++				      struct rtskb *skb)
++{
++	struct rtskb *chain_end = skb->chain_end;
++
++	chain_end->next = queue->first;
++
++	if (queue->first == NULL)
++		queue->last = chain_end;
++	queue->first = skb;
++}
++
++/***
++ *  rtskb_queue_head - insert a buffer at the queue head (lock protected)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void rtskb_queue_head(struct rtskb_queue *queue,
++				    struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	__rtskb_queue_head(queue, skb);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++}
++
++/***
++ *  __rtskb_prio_queue_head - insert a buffer at the prioritized queue head
++ *                            (w/o locks)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void __rtskb_prio_queue_head(struct rtskb_prio_queue *prioqueue,
++					   struct rtskb *skb)
++{
++	unsigned int prio = skb->priority & RTSKB_PRIO_MASK;
++
++	RTNET_ASSERT(prio <= 31, prio = 31;);
++
++	__rtskb_queue_head(&prioqueue->queue[prio], skb);
++	__set_bit(prio, &prioqueue->usage);
++}
++
++/***
++ *  rtskb_prio_queue_head - insert a buffer at the prioritized queue head
++ *                          (lock protected)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void rtskb_prio_queue_head(struct rtskb_prio_queue *prioqueue,
++					 struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&prioqueue->lock, context);
++	__rtskb_prio_queue_head(prioqueue, skb);
++	rtdm_lock_put_irqrestore(&prioqueue->lock, context);
++}
++
++/***
++ *  __rtskb_queue_tail - insert a buffer at the queue tail (w/o locks)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void __rtskb_queue_tail(struct rtskb_queue *queue,
++				      struct rtskb *skb)
++{
++	struct rtskb *chain_end = skb->chain_end;
++
++	chain_end->next = NULL;
++
++	if (queue->first == NULL)
++		queue->first = skb;
++	else
++		queue->last->next = skb;
++	queue->last = chain_end;
++}
++
++/***
++ *  rtskb_queue_tail - insert a buffer at the queue tail (lock protected)
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void rtskb_queue_tail(struct rtskb_queue *queue,
++				    struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	__rtskb_queue_tail(queue, skb);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++}
++
++/***
++ *  rtskb_queue_tail_check - variant of rtskb_queue_tail
++ *          returning true on empty->non empty transition.
++ *  @queue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline bool rtskb_queue_tail_check(struct rtskb_queue *queue,
++					  struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++	bool ret;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	ret = queue->first == NULL;
++	__rtskb_queue_tail(queue, skb);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++
++	return ret;
++}
++
++/***
++ *  __rtskb_prio_queue_tail - insert a buffer at the prioritized queue tail
++ *                            (w/o locks)
++ *  @prioqueue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void __rtskb_prio_queue_tail(struct rtskb_prio_queue *prioqueue,
++					   struct rtskb *skb)
++{
++	unsigned int prio = skb->priority & RTSKB_PRIO_MASK;
++
++	RTNET_ASSERT(prio <= 31, prio = 31;);
++
++	__rtskb_queue_tail(&prioqueue->queue[prio], skb);
++	__set_bit(prio, &prioqueue->usage);
++}
++
++/***
++ *  rtskb_prio_queue_tail - insert a buffer at the prioritized queue tail
++ *                          (lock protected)
++ *  @prioqueue: queue to use
++ *  @skb: buffer to queue
++ */
++static inline void rtskb_prio_queue_tail(struct rtskb_prio_queue *prioqueue,
++					 struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&prioqueue->lock, context);
++	__rtskb_prio_queue_tail(prioqueue, skb);
++	rtdm_lock_put_irqrestore(&prioqueue->lock, context);
++}
++
++/***
++ *  __rtskb_dequeue - remove from the head of the queue (w/o locks)
++ *  @queue: queue to remove from
++ */
++static inline struct rtskb *__rtskb_dequeue(struct rtskb_queue *queue)
++{
++	struct rtskb *result;
++
++	if ((result = queue->first) != NULL) {
++		queue->first = result->next;
++		result->next = NULL;
++	}
++
++	return result;
++}
++
++/***
++ *  rtskb_dequeue - remove from the head of the queue (lock protected)
++ *  @queue: queue to remove from
++ */
++static inline struct rtskb *rtskb_dequeue(struct rtskb_queue *queue)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *result;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	result = __rtskb_dequeue(queue);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++
++	return result;
++}
++
++/***
++ *  __rtskb_prio_dequeue - remove from the head of the prioritized queue
++ *                         (w/o locks)
++ *  @prioqueue: queue to remove from
++ */
++static inline struct rtskb *
++__rtskb_prio_dequeue(struct rtskb_prio_queue *prioqueue)
++{
++	int prio;
++	struct rtskb *result = NULL;
++	struct rtskb_queue *sub_queue;
++
++	if (prioqueue->usage) {
++		prio = ffz(~prioqueue->usage);
++		sub_queue = &prioqueue->queue[prio];
++		result = __rtskb_dequeue(sub_queue);
++		if (rtskb_queue_empty(sub_queue))
++			__change_bit(prio, &prioqueue->usage);
++	}
++
++	return result;
++}
++
++/***
++ *  rtskb_prio_dequeue - remove from the head of the prioritized queue
++ *                       (lock protected)
++ *  @prioqueue: queue to remove from
++ */
++static inline struct rtskb *
++rtskb_prio_dequeue(struct rtskb_prio_queue *prioqueue)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *result;
++
++	rtdm_lock_get_irqsave(&prioqueue->lock, context);
++	result = __rtskb_prio_dequeue(prioqueue);
++	rtdm_lock_put_irqrestore(&prioqueue->lock, context);
++
++	return result;
++}
++
++/***
++ *  __rtskb_dequeue_chain - remove a chain from the head of the queue
++ *                          (w/o locks)
++ *  @queue: queue to remove from
++ */
++static inline struct rtskb *__rtskb_dequeue_chain(struct rtskb_queue *queue)
++{
++	struct rtskb *result;
++	struct rtskb *chain_end;
++
++	if ((result = queue->first) != NULL) {
++		chain_end = result->chain_end;
++		queue->first = chain_end->next;
++		chain_end->next = NULL;
++	}
++
++	return result;
++}
++
++/***
++ *  rtskb_dequeue_chain - remove a chain from the head of the queue
++ *                        (lock protected)
++ *  @queue: queue to remove from
++ */
++static inline struct rtskb *rtskb_dequeue_chain(struct rtskb_queue *queue)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *result;
++
++	rtdm_lock_get_irqsave(&queue->lock, context);
++	result = __rtskb_dequeue_chain(queue);
++	rtdm_lock_put_irqrestore(&queue->lock, context);
++
++	return result;
++}
++
++/***
++ *  rtskb_prio_dequeue_chain - remove a chain from the head of the
++ *                             prioritized queue
++ *  @prioqueue: queue to remove from
++ */
++static inline struct rtskb *
++rtskb_prio_dequeue_chain(struct rtskb_prio_queue *prioqueue)
++{
++	rtdm_lockctx_t context;
++	int prio;
++	struct rtskb *result = NULL;
++	struct rtskb_queue *sub_queue;
++
++	rtdm_lock_get_irqsave(&prioqueue->lock, context);
++	if (prioqueue->usage) {
++		prio = ffz(~prioqueue->usage);
++		sub_queue = &prioqueue->queue[prio];
++		result = __rtskb_dequeue_chain(sub_queue);
++		if (rtskb_queue_empty(sub_queue))
++			__change_bit(prio, &prioqueue->usage);
++	}
++	rtdm_lock_put_irqrestore(&prioqueue->lock, context);
++
++	return result;
++}
++
++/***
++ *  rtskb_queue_purge - clean the queue
++ *  @queue
++ */
++static inline void rtskb_queue_purge(struct rtskb_queue *queue)
++{
++	struct rtskb *skb;
++	while ((skb = rtskb_dequeue(queue)) != NULL)
++		kfree_rtskb(skb);
++}
++
++static inline int rtskb_headlen(const struct rtskb *skb)
++{
++	return skb->len;
++}
++
++static inline void rtskb_reserve(struct rtskb *skb, unsigned int len)
++{
++	skb->data += len;
++	skb->tail += len;
++}
++
++static inline unsigned char *__rtskb_put(struct rtskb *skb, unsigned int len)
++{
++	unsigned char *tmp = skb->tail;
++
++	skb->tail += len;
++	skb->len += len;
++	return tmp;
++}
++
++#define rtskb_put(skb, length)                                                 \
++	({                                                                     \
++		struct rtskb *__rtskb = (skb);                                 \
++		unsigned int __len = (length);                                 \
++		unsigned char *tmp = __rtskb->tail;                            \
++                                                                               \
++		__rtskb->tail += __len;                                        \
++		__rtskb->len += __len;                                         \
++                                                                               \
++		RTNET_ASSERT(__rtskb->tail <= __rtskb->buf_end,                \
++			     rtskb_over_panic(__rtskb, __len,                  \
++					      current_text_addr()););          \
++                                                                               \
++		tmp;                                                           \
++	})
++
++static inline unsigned char *__rtskb_push(struct rtskb *skb, unsigned int len)
++{
++	skb->data -= len;
++	skb->len += len;
++	return skb->data;
++}
++
++#define rtskb_push(skb, length)                                                \
++	({                                                                     \
++		struct rtskb *__rtskb = (skb);                                 \
++		unsigned int __len = (length);                                 \
++                                                                               \
++		__rtskb->data -= __len;                                        \
++		__rtskb->len += __len;                                         \
++                                                                               \
++		RTNET_ASSERT(__rtskb->data >= __rtskb->buf_start,              \
++			     rtskb_under_panic(__rtskb, __len,                 \
++					       current_text_addr()););         \
++                                                                               \
++		__rtskb->data;                                                 \
++	})
++
++static inline unsigned char *__rtskb_pull(struct rtskb *skb, unsigned int len)
++{
++	RTNET_ASSERT(len <= skb->len, return NULL;);
++
++	skb->len -= len;
++
++	return skb->data += len;
++}
++
++static inline unsigned char *rtskb_pull(struct rtskb *skb, unsigned int len)
++{
++	if (len > skb->len)
++		return NULL;
++
++	skb->len -= len;
++
++	return skb->data += len;
++}
++
++static inline void rtskb_trim(struct rtskb *skb, unsigned int len)
++{
++	if (skb->len > len) {
++		skb->len = len;
++		skb->tail = skb->data + len;
++	}
++}
++
++static inline struct rtskb *rtskb_padto(struct rtskb *rtskb, unsigned int len)
++{
++	RTNET_ASSERT(len <= (unsigned int)(rtskb->buf_end + 1 - rtskb->data),
++		     return NULL;);
++
++	memset(rtskb->data + rtskb->len, 0, len - rtskb->len);
++
++	return rtskb;
++}
++
++static inline dma_addr_t rtskb_data_dma_addr(struct rtskb *rtskb,
++					     unsigned int offset)
++{
++	return rtskb->buf_dma_addr + rtskb->data - rtskb->buf_start + offset;
++}
++
++extern struct rtskb_pool global_pool;
++
++extern unsigned int rtskb_pool_init(struct rtskb_pool *pool,
++				    unsigned int initial_size,
++				    const struct rtskb_pool_lock_ops *lock_ops,
++				    void *lock_cookie);
++
++extern unsigned int __rtskb_module_pool_init(struct rtskb_pool *pool,
++					     unsigned int initial_size,
++					     struct module *module);
++
++#define rtskb_module_pool_init(pool, size)                                     \
++	__rtskb_module_pool_init(pool, size, THIS_MODULE)
++
++extern void rtskb_pool_release(struct rtskb_pool *pool);
++
++extern unsigned int rtskb_pool_extend(struct rtskb_pool *pool,
++				      unsigned int add_rtskbs);
++extern unsigned int rtskb_pool_shrink(struct rtskb_pool *pool,
++				      unsigned int rem_rtskbs);
++extern int rtskb_acquire(struct rtskb *rtskb, struct rtskb_pool *comp_pool);
++extern struct rtskb *rtskb_clone(struct rtskb *rtskb, struct rtskb_pool *pool);
++
++extern int rtskb_pools_init(void);
++extern void rtskb_pools_release(void);
++
++extern unsigned int rtskb_copy_and_csum_bits(const struct rtskb *skb,
++					     int offset, u8 *to, int len,
++					     unsigned int csum);
++extern void rtskb_copy_and_csum_dev(const struct rtskb *skb, u8 *to);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP)
++
++extern rtdm_lock_t rtcap_lock;
++extern void (*rtcap_handler)(struct rtskb *skb);
++
++static inline void rtcap_mark_incoming(struct rtskb *skb)
++{
++	skb->cap_start = skb->data;
++	skb->cap_len = skb->len;
++}
++
++static inline void rtcap_report_incoming(struct rtskb *skb)
++{
++	rtdm_lockctx_t context;
++
++	rtdm_lock_get_irqsave(&rtcap_lock, context);
++	if (rtcap_handler != NULL)
++		rtcap_handler(skb);
++
++	rtdm_lock_put_irqrestore(&rtcap_lock, context);
++}
++
++static inline void rtcap_mark_rtmac_enqueue(struct rtskb *skb)
++{
++	/* rtskb start and length are probably not valid yet */
++	skb->cap_flags |= RTSKB_CAP_RTMAC_STAMP;
++	skb->cap_rtmac_stamp = rtdm_clock_read();
++}
++
++#else /* ifndef CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */
++
++#define rtcap_mark_incoming(skb)
++#define rtcap_report_incoming(skb)
++#define rtcap_mark_rtmac_enqueue(skb)
++
++#endif /* CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP */
++
++#endif /* __KERNEL__ */
++
++#endif /* __RTSKB_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtnet_iovec.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_iovec.h	2021-04-07 16:01:26.867634681 +0800
+@@ -0,0 +1,38 @@
++/* rtnet_iovec.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *               2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTNET_IOVEC_H_
++#define __RTNET_IOVEC_H_
++
++#ifdef __KERNEL__
++
++#include <linux/uio.h>
++
++struct user_msghdr;
++struct rtdm_fd;
++
++ssize_t rtnet_write_to_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen,
++			   const void *data, size_t len);
++
++ssize_t rtnet_read_from_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen,
++			    void *data, size_t len);
++#endif /* __KERNEL__ */
++
++#endif /* __RTNET_IOVEC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtdev.h	2021-04-07 16:01:26.863634686 +0800
+@@ -0,0 +1,275 @@
++/***
++ *
++ *  rtdev.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTDEV_H_
++#define __RTDEV_H_
++
++#define MAX_RT_DEVICES 8
++
++#ifdef __KERNEL__
++
++#include <asm/atomic.h>
++#include <linux/netdevice.h>
++
++#include <rtskb.h>
++#include <rtnet_internal.h>
++
++#define RTDEV_VERS_2_0 0x0200
++
++#define PRIV_FLAG_UP 0
++#define PRIV_FLAG_ADDING_ROUTE 1
++
++#ifndef NETIF_F_LLTX
++#define NETIF_F_LLTX 4096
++#endif
++
++#define RTDEV_TX_OK 0
++#define RTDEV_TX_BUSY 1
++
++enum rtnet_link_state {
++	__RTNET_LINK_STATE_XOFF = 0,
++	__RTNET_LINK_STATE_START,
++	__RTNET_LINK_STATE_PRESENT,
++	__RTNET_LINK_STATE_NOCARRIER,
++};
++#define RTNET_LINK_STATE_XOFF (1 << __RTNET_LINK_STATE_XOFF)
++#define RTNET_LINK_STATE_START (1 << __RTNET_LINK_STATE_START)
++#define RTNET_LINK_STATE_PRESENT (1 << __RTNET_LINK_STATE_PRESENT)
++#define RTNET_LINK_STATE_NOCARRIER (1 << __RTNET_LINK_STATE_NOCARRIER)
++
++/***
++ *  rtnet_device
++ */
++struct rtnet_device {
++	/* Many field are borrowed from struct net_device in
++     * <linux/netdevice.h> - WY
++     */
++	unsigned int vers;
++
++	char name[IFNAMSIZ];
++	struct device *sysbind; /* device bound in sysfs (optional) */
++
++	unsigned long rmem_end; /* shmem "recv" end     */
++	unsigned long rmem_start; /* shmem "recv" start   */
++	unsigned long mem_end; /* shared mem end       */
++	unsigned long mem_start; /* shared mem start     */
++	unsigned long base_addr; /* device I/O address   */
++	unsigned int irq; /* device IRQ number    */
++
++	/*
++     *  Some hardware also needs these fields, but they are not
++     *  part of the usual set specified in Space.c.
++     */
++	unsigned char if_port; /* Selectable AUI, TP,..*/
++	unsigned char dma; /* DMA channel          */
++	__u16 __padding;
++
++	unsigned long link_state;
++	int ifindex;
++	atomic_t refcount;
++
++	struct device *sysdev; /* node in driver model for sysfs */
++	struct module *rt_owner; /* like classic owner, but      *
++				     * forces correct macro usage   */
++
++	unsigned int flags; /* interface flags (a la BSD)   */
++	unsigned long priv_flags; /* internal flags               */
++	unsigned short type; /* interface hardware type      */
++	unsigned short hard_header_len; /* hardware hdr length  */
++	unsigned int mtu; /* eth = 1536, tr = 4...        */
++	void *priv; /* pointer to private data      */
++	netdev_features_t features; /* [RT]NETIF_F_*                */
++
++	/* Interface address info. */
++	unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
++	unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address   */
++	unsigned char addr_len; /* hardware address length      */
++
++	int promiscuity;
++	int allmulti;
++
++	__u32 local_ip; /* IP address in network order  */
++	__u32 broadcast_ip; /* broadcast IP in network order */
++
++	rtdm_event_t *stack_event;
++
++	rtdm_mutex_t xmit_mutex; /* protects xmit routine        */
++	rtdm_lock_t rtdev_lock; /* management lock              */
++	struct mutex nrt_lock; /* non-real-time locking        */
++
++	unsigned int add_rtskbs; /* additionally allocated global rtskbs */
++
++	struct rtskb_pool dev_pool;
++
++	/* RTmac related fields */
++	struct rtmac_disc *mac_disc;
++	struct rtmac_priv *mac_priv;
++	int (*mac_detach)(struct rtnet_device *rtdev);
++
++	/* Device operations */
++	int (*open)(struct rtnet_device *rtdev);
++	int (*stop)(struct rtnet_device *rtdev);
++	int (*hard_header)(struct rtskb *, struct rtnet_device *,
++			   unsigned short type, void *daddr, void *saddr,
++			   unsigned int len);
++	int (*rebuild_header)(struct rtskb *);
++	int (*hard_start_xmit)(struct rtskb *skb, struct rtnet_device *dev);
++	int (*hw_reset)(struct rtnet_device *rtdev);
++
++	/* Transmission hook, managed by the stack core, RTcap, and RTmac
++     *
++     * If xmit_lock is used, start_xmit points either to rtdev_locked_xmit or
++     * the RTmac discipline handler. If xmit_lock is not required, start_xmit
++     * points to hard_start_xmit or the discipline handler.
++     */
++	int (*start_xmit)(struct rtskb *skb, struct rtnet_device *dev);
++
++	/* MTU hook, managed by the stack core and RTmac */
++	unsigned int (*get_mtu)(struct rtnet_device *rtdev,
++				unsigned int priority);
++
++	int (*do_ioctl)(struct rtnet_device *rtdev, struct ifreq *ifr, int cmd);
++	struct net_device_stats *(*get_stats)(struct rtnet_device *rtdev);
++
++	/* DMA pre-mapping hooks */
++	dma_addr_t (*map_rtskb)(struct rtnet_device *rtdev, struct rtskb *skb);
++	void (*unmap_rtskb)(struct rtnet_device *rtdev, struct rtskb *skb);
++};
++
++struct rtnet_core_cmd;
++
++struct rtdev_event_hook {
++	struct list_head entry;
++	void (*register_device)(struct rtnet_device *rtdev);
++	void (*unregister_device)(struct rtnet_device *rtdev);
++	void (*ifup)(struct rtnet_device *rtdev, struct rtnet_core_cmd *up_cmd);
++	void (*ifdown)(struct rtnet_device *rtdev);
++};
++
++extern struct list_head event_hook_list;
++extern struct mutex rtnet_devices_nrt_lock;
++extern struct rtnet_device *rtnet_devices[];
++
++int __rt_init_etherdev(struct rtnet_device *rtdev, unsigned int dev_pool_size,
++		       struct module *module);
++
++#define rt_init_etherdev(__rtdev, __dev_pool_size)                             \
++	__rt_init_etherdev(__rtdev, __dev_pool_size, THIS_MODULE)
++
++struct rtnet_device *__rt_alloc_etherdev(unsigned sizeof_priv,
++					 unsigned dev_pool_size,
++					 struct module *module);
++#define rt_alloc_etherdev(priv_size, rx_size)                                  \
++	__rt_alloc_etherdev(priv_size, rx_size, THIS_MODULE)
++
++void rtdev_destroy(struct rtnet_device *rtdev);
++
++void rtdev_free(struct rtnet_device *rtdev);
++
++int rt_register_rtnetdev(struct rtnet_device *rtdev);
++int rt_unregister_rtnetdev(struct rtnet_device *rtdev);
++
++void rtdev_add_event_hook(struct rtdev_event_hook *hook);
++void rtdev_del_event_hook(struct rtdev_event_hook *hook);
++
++void rtdev_alloc_name(struct rtnet_device *rtdev, const char *name_mask);
++
++/**
++ *  __rtdev_get_by_index - find a rtnet_device by its ifindex
++ *  @ifindex: index of device
++ *  @note: caller must hold rtnet_devices_nrt_lock
++ */
++static inline struct rtnet_device *__rtdev_get_by_index(int ifindex)
++{
++	return rtnet_devices[ifindex - 1];
++}
++
++struct rtnet_device *rtdev_get_by_name(const char *if_name);
++struct rtnet_device *rtdev_get_by_index(int ifindex);
++struct rtnet_device *rtdev_get_by_hwaddr(unsigned short type, char *ha);
++struct rtnet_device *rtdev_get_loopback(void);
++
++int rtdev_reference(struct rtnet_device *rtdev);
++
++static inline void rtdev_dereference(struct rtnet_device *rtdev)
++{
++	smp_mb__before_atomic();
++	if (rtdev->rt_owner && atomic_dec_and_test(&rtdev->refcount))
++		module_put(rtdev->rt_owner);
++}
++
++int rtdev_xmit(struct rtskb *skb);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++int rtdev_xmit_proxy(struct rtskb *skb);
++#endif
++
++unsigned int rt_hard_mtu(struct rtnet_device *rtdev, unsigned int priority);
++
++int rtdev_open(struct rtnet_device *rtdev);
++int rtdev_close(struct rtnet_device *rtdev);
++
++int rtdev_up(struct rtnet_device *rtdev, struct rtnet_core_cmd *cmd);
++int rtdev_down(struct rtnet_device *rtdev);
++
++int rtdev_map_rtskb(struct rtskb *skb);
++void rtdev_unmap_rtskb(struct rtskb *skb);
++
++struct rtskb *rtnetdev_alloc_rtskb(struct rtnet_device *dev, unsigned int size);
++
++#define rtnetdev_priv(dev) ((dev)->priv)
++
++#define rtdev_emerg(__dev, format, args...)                                    \
++	pr_emerg("%s: " format, (__dev)->name, ##args)
++#define rtdev_alert(__dev, format, args...)                                    \
++	pr_alert("%s: " format, (__dev)->name, ##args)
++#define rtdev_crit(__dev, format, args...)                                     \
++	pr_crit("%s: " format, (__dev)->name, ##args)
++#define rtdev_err(__dev, format, args...)                                      \
++	pr_err("%s: " format, (__dev)->name, ##args)
++#define rtdev_warn(__dev, format, args...)                                     \
++	pr_warn("%s: " format, (__dev)->name, ##args)
++#define rtdev_notice(__dev, format, args...)                                   \
++	pr_notice("%s: " format, (__dev)->name, ##args)
++#define rtdev_info(__dev, format, args...)                                     \
++	pr_info("%s: " format, (__dev)->name, ##args)
++#define rtdev_dbg(__dev, format, args...)                                      \
++	pr_debug("%s: " format, (__dev)->name, ##args)
++
++#ifdef VERBOSE_DEBUG
++#define rtdev_vdbg rtdev_dbg
++#else
++#define rtdev_vdbg(__dev, format, args...)                                     \
++	({                                                                     \
++		if (0)                                                         \
++			pr_debug("%s: " format, (__dev)->name, ##args);        \
++                                                                               \
++		0;                                                             \
++	})
++#endif
++
++#endif /* __KERNEL__ */
++
++#endif /* __RTDEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/tdma_chrdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/tdma_chrdev.h	2021-04-07 16:01:26.858634693 +0800
+@@ -0,0 +1,81 @@
++/***
++ *
++ *  include/tdma_chrdev.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_CHRDEV_H_
++#define __TDMA_CHRDEV_H_
++
++#ifndef __KERNEL__
++#include <inttypes.h>
++#endif
++
++#include <rtnet_chrdev.h>
++
++#define MIN_SLOT_SIZE 60
++
++struct tdma_config {
++	struct rtnet_ioctl_head head;
++
++	union {
++		struct {
++			__u64 cycle_period;
++			__u64 backup_sync_offset;
++			__u32 cal_rounds;
++			__u32 max_cal_requests;
++			__u32 max_slot_id;
++		} master;
++
++		struct {
++			__u32 cal_rounds;
++			__u32 max_slot_id;
++		} slave;
++
++		struct {
++			__s32 id;
++			__u32 period;
++			__u64 offset;
++			__u32 phasing;
++			__u32 size;
++			__s32 joint_slot;
++			__u32 cal_timeout;
++			__u64 *cal_results;
++		} set_slot;
++
++		struct {
++			__s32 id;
++		} remove_slot;
++
++		__u64 __padding[8];
++	} args;
++};
++
++#define TDMA_IOC_MASTER _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 0, struct tdma_config)
++#define TDMA_IOC_SLAVE _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 1, struct tdma_config)
++#define TDMA_IOC_CAL_RESULT_SIZE                                               \
++	_IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 2, struct tdma_config)
++#define TDMA_IOC_SET_SLOT _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 3, struct tdma_config)
++#define TDMA_IOC_REMOVE_SLOT                                                   \
++	_IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 4, struct tdma_config)
++#define TDMA_IOC_DETACH _IOW(RTNET_IOC_TYPE_RTMAC_TDMA, 5, struct tdma_config)
++
++#endif /* __TDMA_CHRDEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/ip_output.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_output.h	2021-04-07 16:01:26.853634701 +0800
+@@ -0,0 +1,42 @@
++/***
++ *
++ *  include/ipv4/ip_output.h - prepare outgoing IP packets
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *                2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_IP_OUTPUT_H_
++#define __RTNET_IP_OUTPUT_H_
++
++#include <linux/init.h>
++
++#include <rtdev.h>
++#include <ipv4/route.h>
++
++extern int rt_ip_build_xmit(struct rtsocket *sk,
++			    int getfrag(const void *, unsigned char *,
++					unsigned int, unsigned int),
++			    const void *frag, unsigned length,
++			    struct dest_route *rt, int flags);
++
++extern void __init rt_ip_init(void);
++extern void rt_ip_release(void);
++
++#endif /* __RTNET_IP_OUTPUT_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/route.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/route.h	2021-04-07 16:01:26.849634706 +0800
+@@ -0,0 +1,60 @@
++/***
++ *
++ *  include/ipv4/route.h - real-time routing
++ *
++ *  Copyright (C) 2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  Rewritten version of the original route by David Schleef and Ulrich Marx
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_ROUTE_H_
++#define __RTNET_ROUTE_H_
++
++#include <linux/init.h>
++#include <linux/types.h>
++
++#include <rtdev.h>
++
++struct dest_route {
++	u32 ip;
++	unsigned char dev_addr[MAX_ADDR_LEN];
++	struct rtnet_device *rtdev;
++};
++
++int rt_ip_route_add_host(u32 addr, unsigned char *dev_addr,
++			 struct rtnet_device *rtdev);
++void rt_ip_route_del_all(struct rtnet_device *rtdev);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING
++int rt_ip_route_add_net(u32 addr, u32 mask, u32 gw_addr);
++int rt_ip_route_del_net(u32 addr, u32 mask);
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING */
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER
++int rt_ip_route_forward(struct rtskb *rtskb, u32 daddr);
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER */
++
++int rt_ip_route_del_host(u32 addr, struct rtnet_device *rtdev);
++int rt_ip_route_get_host(u32 addr, char *if_name, unsigned char *dev_addr,
++			 struct rtnet_device *rtdev);
++int rt_ip_route_output(struct dest_route *rt_buf, u32 daddr, u32 saddr);
++
++int __init rt_ip_routing_init(void);
++void rt_ip_routing_release(void);
++
++#endif /* __RTNET_ROUTE_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/ip_sock.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_sock.h	2021-04-07 16:01:26.844634713 +0800
+@@ -0,0 +1,31 @@
++/***
++ *
++ *  include/ipv4/ip_sock.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_IP_SOCK_H_
++#define __RTNET_IP_SOCK_H_
++
++#include <rtnet_socket.h>
++
++extern int rt_ip_ioctl(struct rtdm_fd *fd, int request, void *arg);
++
++#endif /* __RTNET_IP_SOCK_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/af_inet.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/af_inet.h	2021-04-07 16:01:26.839634720 +0800
+@@ -0,0 +1,35 @@
++/***
++ *
++ *  include/ipv4/af_inet.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@wev.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_AF_INET_H_
++#define __RTNET_AF_INET_H_
++
++#include <rtnet_internal.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++extern struct xnvfile_directory ipv4_proc_root;
++#endif
++
++#endif /* __RTNET_AF_INET_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/protocol.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/protocol.h	2021-04-07 16:01:26.835634726 +0800
+@@ -0,0 +1,54 @@
++/***
++ *
++ *  include/ipv4/protocol.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_PROTOCOL_H_
++#define __RTNET_PROTOCOL_H_
++
++#include <rtnet_socket.h>
++#include <rtskb.h>
++
++#define MAX_RT_INET_PROTOCOLS 32
++
++/***
++ * transport layer protocol
++ */
++struct rtinet_protocol {
++	char *name;
++	unsigned short protocol;
++
++	struct rtsocket *(*dest_socket)(struct rtskb *);
++	void (*rcv_handler)(struct rtskb *);
++	void (*err_handler)(struct rtskb *);
++	int (*init_socket)(struct rtdm_fd *);
++};
++
++extern struct rtinet_protocol *rt_inet_protocols[];
++
++#define rt_inet_hashkey(id) (id & (MAX_RT_INET_PROTOCOLS - 1))
++extern void rt_inet_add_protocol(struct rtinet_protocol *prot);
++extern void rt_inet_del_protocol(struct rtinet_protocol *prot);
++extern int rt_inet_socket(struct rtdm_fd *fd, int protocol);
++
++#endif /* __RTNET_PROTOCOL_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/udp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/udp.h	2021-04-07 16:01:26.830634733 +0800
+@@ -0,0 +1,33 @@
++/***
++ *
++ *  include/ipv4/udp.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_UDP_H_
++#define __RTNET_UDP_H_
++
++/* Maximum number of active udp sockets
++   Only increase with care (look-up delays!), must be power of 2 */
++#define RT_UDP_SOCKETS 64
++
++#endif /* __RTNET_UDP_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/arp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/arp.h	2021-04-07 16:01:26.825634741 +0800
+@@ -0,0 +1,51 @@
++/***
++ *
++ *  include/ipv4/arp.h - Adress Resolution Protocol for RTnet
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *                2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_ARP_H_
++#define __RTNET_ARP_H_
++
++#include <linux/if_arp.h>
++#include <linux/init.h>
++#include <linux/types.h>
++
++#include <ipv4/route.h>
++
++#define RT_ARP_SKB_PRIO                                                        \
++	RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL)
++
++void rt_arp_send(int type, int ptype, u32 dest_ip, struct rtnet_device *rtdev,
++		 u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw,
++		 unsigned char *target_hw);
++
++static inline void rt_arp_solicit(struct rtnet_device *rtdev, u32 target)
++{
++	rt_arp_send(ARPOP_REQUEST, ETH_P_ARP, target, rtdev, rtdev->local_ip,
++		    NULL, NULL, NULL);
++}
++
++void __init rt_arp_init(void);
++void rt_arp_release(void);
++
++#endif /* __RTNET_ARP_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/icmp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/icmp.h	2021-04-07 16:01:26.820634748 +0800
+@@ -0,0 +1,56 @@
++/***
++ *
++ *  ipv4/icmp.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999, 2000 Zentropic Computing, LLC
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_ICMP_H_
++#define __RTNET_ICMP_H_
++
++#include <linux/init.h>
++
++#include <rtskb.h>
++#include <rtnet_rtpc.h>
++#include <ipv4/protocol.h>
++
++#define RT_ICMP_PRIO RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL)
++
++#define ICMP_REPLY_POOL_SIZE 8
++
++void rt_icmp_queue_echo_request(struct rt_proc_call *call);
++void rt_icmp_dequeue_echo_request(struct rt_proc_call *call);
++void rt_icmp_cleanup_echo_requests(void);
++int rt_icmp_send_echo(u32 daddr, u16 id, u16 sequence, size_t msg_size);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP
++void __init rt_icmp_init(void);
++void rt_icmp_release(void);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */
++#define rt_icmp_init()                                                         \
++	do {                                                                   \
++	} while (0)
++#define rt_icmp_release()                                                      \
++	do {                                                                   \
++	} while (0)
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP */
++
++#endif /* __RTNET_ICMP_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/ip_fragment.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_fragment.h	2021-04-07 16:01:26.816634753 +0800
+@@ -0,0 +1,37 @@
++/* ipv4/ip_fragment.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *               2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTNET_IP_FRAGMENT_H_
++#define __RTNET_IP_FRAGMENT_H_
++
++#include <linux/init.h>
++
++#include <rtskb.h>
++#include <ipv4/protocol.h>
++
++extern struct rtskb *rt_ip_defrag(struct rtskb *skb,
++				  struct rtinet_protocol *ipprot);
++
++extern void rt_ip_frag_invalidate_socket(struct rtsocket *sock);
++
++extern int __init rt_ip_fragment_init(void);
++extern void rt_ip_fragment_cleanup(void);
++
++#endif /* __RTNET_IP_FRAGMENT_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/ip_input.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/ip_input.h	2021-04-07 16:01:26.811634761 +0800
+@@ -0,0 +1,45 @@
++/* ipv4/ip_input.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *               2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTNET_IP_INPUT_H_
++#define __RTNET_IP_INPUT_H_
++
++#include <rtskb.h>
++#include <stack_mgr.h>
++
++extern int rt_ip_rcv(struct rtskb *skb, struct rtpacket_type *pt);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY)
++typedef void (*rt_ip_fallback_handler_t)(struct rtskb *skb);
++
++/*
++ * This hook can be used to register a fallback handler for incoming
++ * IP packets. Typically this is done to move over to the standard Linux
++ * IP protocol (e.g. for handling TCP).
++ * Manipulating the fallback handler is expected to happen only when the
++ * RTnetinterfaces are shut down (avoiding race conditions).
++ *
++ * Note that merging RT and non-RT traffic this way most likely breaks hard
++ * real-time constraints!
++ */
++extern rt_ip_fallback_handler_t rt_ip_fallback_handler;
++#endif
++
++#endif /* __RTNET_IP_INPUT_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4/tcp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4/tcp.h	2021-04-07 16:01:26.806634768 +0800
+@@ -0,0 +1,50 @@
++/***
++ *
++ *  include/ipv4/tcp.h
++ *
++ *  Copyright (C) 2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License, version 2, as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_TCP_H_
++#define __RTNET_TCP_H_
++
++#include <rtskb.h>
++#include <ipv4/protocol.h>
++
++/* Maximum number of active tcp sockets, must be power of 2 */
++#define RT_TCP_SOCKETS 32
++
++/*Maximum number of active tcp connections, must be power of 2 */
++#define RT_TCP_CONNECTIONS 64
++
++/* Maximum size of TCP input window */
++#define RT_TCP_WINDOW 4096
++
++/* Maximum number of retransmissions of invalid segments */
++#define RT_TCP_RETRANSMIT 3
++
++/* Number of milliseconds to wait for ACK */
++#define RT_TCP_WAIT_TIME 10
++
++/* Priority of RST|ACK replies (error condition => non-RT prio) */
++#define RT_TCP_RST_PRIO                                                        \
++	RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL)
++
++/* rtskb pool for sending socket-less RST|ACK */
++#define RT_TCP_RST_POOL_SIZE 8
++
++#endif /* __RTNET_TCP_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtdev_mgr.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtdev_mgr.h	2021-04-07 16:01:26.802634774 +0800
+@@ -0,0 +1,39 @@
++/* rtdev_mgr.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTDEV_MGR_H_
++#define __RTDEV_MGR_H_
++
++#ifdef __KERNEL__
++
++#include <rtnet_internal.h>
++
++extern void rtnetif_err_rx(struct rtnet_device *rtdev);
++extern void rtnetif_err_tx(struct rtnet_device *rtdev);
++
++extern void rt_rtdev_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr);
++extern void rt_rtdev_disconnect(struct rtnet_device *rtdev);
++extern int rt_rtdev_mgr_init(struct rtnet_mgr *mgr);
++extern void rt_rtdev_mgr_delete(struct rtnet_mgr *mgr);
++extern int rt_rtdev_mgr_start(struct rtnet_mgr *mgr);
++extern int rt_rtdev_mgr_stop(struct rtnet_mgr *mgr);
++
++#endif /* __KERNEL__ */
++
++#endif /* __RTDEV_MGR_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtnet_chrdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_chrdev.h	2021-04-07 16:01:26.797634781 +0800
+@@ -0,0 +1,116 @@
++/***
++ *
++ *  include/rtnet_chrdev.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999    Lineo, Inc
++ *                1999,2002 David A. Schleef <ds@schleef.org>
++ *                2002 Ulrich Marx <marx@fet.uni-hannover.de>
++ *                2003,2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_CHRDEV_H_
++#define __RTNET_CHRDEV_H_
++
++#include <rtdev.h>
++
++#ifdef __KERNEL__
++
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/ioctl.h>
++#include <linux/netdevice.h>
++#include <linux/types.h>
++
++/* new extensible interface */
++struct rtnet_ioctls {
++	/* internal usage only */
++	struct list_head entry;
++	atomic_t ref_count;
++
++	/* provider specification */
++	const char *service_name;
++	unsigned int ioctl_type;
++	int (*handler)(struct rtnet_device *rtdev, unsigned int request,
++		       unsigned long arg);
++};
++
++extern int rtnet_register_ioctls(struct rtnet_ioctls *ioctls);
++extern void rtnet_unregister_ioctls(struct rtnet_ioctls *ioctls);
++
++extern int __init rtnet_chrdev_init(void);
++extern void rtnet_chrdev_release(void);
++
++#else /* ifndef __KERNEL__ */
++
++#include <net/if.h> /* IFNAMSIZ */
++#include <linux/types.h>
++
++#endif /* __KERNEL__ */
++
++#define RTNET_MINOR 240 /* user interface for /dev/rtnet */
++#define DEV_ADDR_LEN 32 /* avoids inconsistent MAX_ADDR_LEN */
++
++struct rtnet_ioctl_head {
++	char if_name[IFNAMSIZ];
++};
++
++struct rtnet_core_cmd {
++	struct rtnet_ioctl_head head;
++
++	union {
++		/*** rtifconfig **/
++		struct {
++			__u32 ip_addr;
++			__u32 broadcast_ip;
++			__u32 set_dev_flags;
++			__u32 clear_dev_flags;
++			__u32 dev_addr_type;
++			__u32 __padding;
++			__u8 dev_addr[DEV_ADDR_LEN];
++		} up;
++
++		struct {
++			__u32 ifindex;
++			__u32 type;
++			__u32 ip_addr;
++			__u32 broadcast_ip;
++			__u32 mtu;
++			__u32 flags;
++			__u8 dev_addr[DEV_ADDR_LEN];
++		} info;
++
++		__u64 __padding[8];
++	} args;
++};
++
++#define RTNET_IOC_NODEV_PARAM 0x80
++
++#define RTNET_IOC_TYPE_CORE 0
++#define RTNET_IOC_TYPE_RTCFG 1
++#define RTNET_IOC_TYPE_IPV4 2
++#define RTNET_IOC_TYPE_RTMAC_NOMAC 100
++#define RTNET_IOC_TYPE_RTMAC_TDMA 110
++
++#define IOC_RT_IFUP _IOW(RTNET_IOC_TYPE_CORE, 0, struct rtnet_core_cmd)
++#define IOC_RT_IFDOWN _IOW(RTNET_IOC_TYPE_CORE, 1, struct rtnet_core_cmd)
++#define IOC_RT_IFINFO                                                          \
++	_IOWR(RTNET_IOC_TYPE_CORE, 2 | RTNET_IOC_NODEV_PARAM,                  \
++	      struct rtnet_core_cmd)
++
++#endif /* __RTNET_CHRDEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_vnic.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_vnic.h	2021-04-07 16:01:26.793634786 +0800
+@@ -0,0 +1,59 @@
++/* include/rtmac/rtmac_vnic.h
++ *
++ * rtmac - real-time networking media access control subsystem
++ * Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *               2003 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#ifndef __RTMAC_VNIC_H_
++#define __RTMAC_VNIC_H_
++
++#ifdef __KERNEL__
++
++#include <linux/init.h>
++#include <linux/netdevice.h>
++
++#include <rtmac/rtmac_disc.h>
++
++#define DEFAULT_VNIC_RTSKBS 32
++
++int rtmac_vnic_rx(struct rtskb *skb, u16 type);
++
++int rtmac_vnic_xmit(struct sk_buff *skb, struct net_device *dev);
++
++void rtmac_vnic_set_max_mtu(struct rtnet_device *rtdev, unsigned int max_mtu);
++
++int rtmac_vnic_add(struct rtnet_device *rtdev, vnic_xmit_handler vnic_xmit);
++int rtmac_vnic_unregister(struct rtnet_device *rtdev);
++
++static inline void rtmac_vnic_cleanup(struct rtnet_device *rtdev)
++{
++	struct rtmac_priv *mac_priv = rtdev->mac_priv;
++
++	rtskb_pool_release(&mac_priv->vnic_skb_pool);
++}
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int rtnet_rtmac_vnics_show(struct xnvfile_regular_iterator *it, void *data);
++#endif
++
++int __init rtmac_vnic_module_init(void);
++void rtmac_vnic_module_cleanup(void);
++
++#endif /* __KERNEL__ */
++
++#endif /* __RTMAC_VNIC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac.h	2021-04-07 16:01:26.788634793 +0800
+@@ -0,0 +1,51 @@
++/***
++ *
++ *  include/rtmac/nomac/nomac.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __NOMAC_H_
++#define __NOMAC_H_
++
++#include <rtdm/driver.h>
++
++#include <rtmac/rtmac_disc.h>
++
++#define RTMAC_TYPE_NOMAC 0
++
++#define NOMAC_MAGIC 0x004D0A0C
++
++struct nomac_priv {
++	unsigned int magic;
++	struct rtnet_device *rtdev;
++	char device_name[32];
++	struct rtdm_driver api_driver;
++	struct rtdm_device api_device;
++	/* ... */
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct list_head list_entry;
++#endif
++};
++
++extern struct rtmac_disc nomac_disc;
++
++#endif /* __NOMAC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_dev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_dev.h	2021-04-07 16:01:26.783634801 +0800
+@@ -0,0 +1,37 @@
++/***
++ *
++ *  include/rtmac/nomac/nomac_dev.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __NOMAC_DEV_H_
++#define __NOMAC_DEV_H_
++
++#include <rtmac/nomac/nomac.h>
++
++int nomac_dev_init(struct rtnet_device *rtdev, struct nomac_priv *nomac);
++
++static inline void nomac_dev_release(struct nomac_priv *nomac)
++{
++	rtdm_dev_unregister(&nomac->api_device);
++}
++
++#endif /* __NOMAC_DEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_proto.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_proto.h	2021-04-07 16:01:26.778634808 +0800
+@@ -0,0 +1,38 @@
++/***
++ *
++ *  include/rtmac/nomac/nomac_proto.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __NOMAC_PROTO_H_
++#define __NOMAC_PROTO_H_
++
++#include <rtdev.h>
++
++int nomac_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev);
++int nomac_nrt_packet_tx(struct rtskb *rtskb);
++
++int nomac_packet_rx(struct rtskb *rtskb);
++
++int nomac_proto_init(void);
++void nomac_proto_cleanup(void);
++
++#endif /* __NOMAC_PROTO_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_ioctl.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/nomac/nomac_ioctl.h	2021-04-07 16:01:26.774634813 +0800
+@@ -0,0 +1,31 @@
++/***
++ *
++ *  include/rtmac/nomac/nomac_ioctl.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __NOMAC_IOCTL_H_
++#define __NOMAC_IOCTL_H_
++
++int nomac_ioctl(struct rtnet_device *rtdev, unsigned int request,
++		unsigned long arg);
++
++#endif /* __NOMAC_IOCTL_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_proto.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_proto.h	2021-04-07 16:01:26.769634821 +0800
+@@ -0,0 +1,78 @@
++/***
++ *
++ *  include/rtmac/rtmac_proto.h
++ *
++ *  rtmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTMAC_PROTO_H_
++#define __RTMAC_PROTO_H_
++
++#include <stack_mgr.h>
++
++#define RTMAC_VERSION 0x02
++#define ETH_RTMAC 0x9021
++
++#define RTMAC_FLAG_TUNNEL 0x01
++
++struct rtmac_hdr {
++	u16 type;
++	u8 ver;
++	u8 flags;
++} __attribute__((packed));
++
++static inline int rtmac_add_header(struct rtnet_device *rtdev, void *daddr,
++				   struct rtskb *skb, u16 type, u8 flags)
++{
++	struct rtmac_hdr *hdr =
++		(struct rtmac_hdr *)rtskb_push(skb, sizeof(struct rtmac_hdr));
++
++	hdr->type = htons(type);
++	hdr->ver = RTMAC_VERSION;
++	hdr->flags = flags;
++
++	skb->rtdev = rtdev;
++
++	if (rtdev->hard_header &&
++	    (rtdev->hard_header(skb, rtdev, ETH_RTMAC, daddr, rtdev->dev_addr,
++				skb->len) < 0))
++		return -1;
++
++	return 0;
++}
++
++static inline int rtmac_xmit(struct rtskb *skb)
++{
++	struct rtnet_device *rtdev = skb->rtdev;
++	int ret;
++
++	ret = rtdev->hard_start_xmit(skb, rtdev);
++	if (ret != 0)
++		kfree_rtskb(skb);
++
++	return ret;
++}
++
++extern struct rtpacket_type rtmac_packet_type;
++
++#define rtmac_proto_init() rtdev_add_pack(&rtmac_packet_type)
++void rtmac_proto_release(void);
++
++#endif /* __RTMAC_PROTO_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_ioctl.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_ioctl.h	2021-04-07 16:01:26.765634826 +0800
+@@ -0,0 +1,35 @@
++/***
++ *
++ *  include/rtmac/tdma/tdma_ioctl.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_IOCTL_H_
++#define __TDMA_IOCTL_H_
++
++#include <rtmac/tdma/tdma.h>
++
++int tdma_cleanup_slot(struct tdma_priv *tdma, struct tdma_slot *slot);
++
++int tdma_ioctl(struct rtnet_device *rtdev, unsigned int request,
++	       unsigned long arg);
++
++#endif /* __TDMA_IOCTL_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_worker.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_worker.h	2021-04-07 16:01:26.760634834 +0800
+@@ -0,0 +1,34 @@
++/***
++ *
++ *  include/rtmac/tdma/tdma_worker.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_WORKER_H_
++#define __TDMA_WORKER_H_
++
++#include <rtdm/driver.h>
++
++#define DEF_WORKER_PRIO RTDM_TASK_HIGHEST_PRIORITY
++
++void tdma_worker(void *arg);
++
++#endif /* __TDMA_WORKER_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma.h	2021-04-07 16:01:26.755634841 +0800
+@@ -0,0 +1,161 @@
++/***
++ *
++ *  include/rtmac/tdma/tdma.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002      Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003-2005 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_H_
++#define __TDMA_H_
++
++#include <rtdm/driver.h>
++
++#include <rtnet_rtpc.h>
++#include <rtmac/rtmac_disc.h>
++
++#define RTMAC_TYPE_TDMA 0x0001
++
++#define TDMA_MAGIC 0x3A0D4D0A
++
++#define TDMA_FLAG_CALIBRATED 1
++#define TDMA_FLAG_RECEIVED_SYNC 2
++#define TDMA_FLAG_MASTER 3 /* also set for backup masters */
++#define TDMA_FLAG_BACKUP_MASTER 4
++#define TDMA_FLAG_ATTACHED 5
++#define TDMA_FLAG_BACKUP_ACTIVE 6
++
++#define DEFAULT_SLOT 0
++#define DEFAULT_NRT_SLOT 1
++
++/* job IDs */
++#define WAIT_ON_SYNC -1
++#define XMIT_SYNC -2
++#define BACKUP_SYNC -3
++#define XMIT_REQ_CAL -4
++#define XMIT_RPL_CAL -5
++
++struct tdma_priv;
++
++struct tdma_job {
++	struct list_head entry;
++	int id;
++	unsigned int ref_count;
++};
++
++#define SLOT_JOB(job) ((struct tdma_slot *)(job))
++
++struct tdma_slot {
++	struct tdma_job head;
++
++	u64 offset;
++	unsigned int period;
++	unsigned int phasing;
++	unsigned int mtu;
++	unsigned int size;
++	struct rtskb_prio_queue *queue;
++	struct rtskb_prio_queue local_queue;
++};
++
++#define REQUEST_CAL_JOB(job) ((struct tdma_request_cal *)(job))
++
++struct tdma_request_cal {
++	struct tdma_job head;
++
++	struct tdma_priv *tdma;
++	u64 offset;
++	unsigned int period;
++	unsigned int phasing;
++	unsigned int cal_rounds;
++	u64 *cal_results;
++	u64 *result_buffer;
++};
++
++#define REPLY_CAL_JOB(job) ((struct tdma_reply_cal *)(job))
++
++struct tdma_reply_cal {
++	struct tdma_job head;
++
++	u32 reply_cycle;
++	u64 reply_offset;
++	struct rtskb *reply_rtskb;
++};
++
++struct tdma_priv {
++	unsigned int magic;
++	struct rtnet_device *rtdev;
++	char device_name[32];
++	struct rtdm_driver api_driver;
++	struct rtdm_device api_device;
++
++#ifdef ALIGN_RTOS_TASK
++	__u8 __align[(ALIGN_RTOS_TASK -
++		      ((sizeof(unsigned int) + sizeof(struct rtnet_device *) +
++			sizeof(struct rtdm_device)) &
++		       (ALIGN_RTOS_TASK - 1))) &
++		     (ALIGN_RTOS_TASK - 1)];
++#endif
++	rtdm_task_t worker_task;
++	rtdm_event_t worker_wakeup;
++	rtdm_event_t xmit_event;
++	rtdm_event_t sync_event;
++
++	unsigned long flags;
++	unsigned int cal_rounds;
++	u32 current_cycle;
++	u64 current_cycle_start;
++	u64 master_packet_delay_ns;
++	nanosecs_rel_t clock_offset;
++
++	struct tdma_job sync_job;
++	struct tdma_job *first_job;
++	struct tdma_job *current_job;
++	volatile unsigned int job_list_revision;
++
++	unsigned int max_slot_id;
++	struct tdma_slot **slot_table;
++
++	struct rt_proc_call *calibration_call;
++	unsigned char master_hw_addr[MAX_ADDR_LEN];
++
++	rtdm_lock_t lock;
++
++#ifdef CONFIG_XENO_DRIVERS_NET_TDMA_MASTER
++	struct rtskb_pool cal_rtskb_pool;
++	u64 cycle_period;
++	u64 backup_sync_inc;
++#endif
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct list_head list_entry;
++#endif
++};
++
++extern struct rtmac_disc tdma_disc;
++
++#define print_jobs()                                                           \
++	do {                                                                   \
++		struct tdma_job *entry;                                        \
++		rtdm_printk("%s:%d - ", __FUNCTION__, __LINE__);               \
++		list_for_each_entry (entry, &tdma->first_job->entry, entry)    \
++			rtdm_printk("%d ", entry->id);                         \
++		rtdm_printk("\n");                                             \
++	} while (0)
++
++#endif /* __TDMA_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_dev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_dev.h	2021-04-07 16:01:26.751634846 +0800
+@@ -0,0 +1,37 @@
++/***
++ *
++ *  include/rtmac/tdma/tdma_dev.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_DEV_H_
++#define __TDMA_DEV_H_
++
++#include <rtmac/tdma/tdma.h>
++
++int tdma_dev_init(struct rtnet_device *rtdev, struct tdma_priv *tdma);
++
++static inline void tdma_dev_release(struct tdma_priv *tdma)
++{
++	rtdm_dev_unregister(&tdma->api_device);
++}
++
++#endif /* __TDMA_DEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_proto.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/tdma/tdma_proto.h	2021-04-07 16:01:26.746634853 +0800
+@@ -0,0 +1,81 @@
++/***
++ *
++ *  include/rtmac/tdma/tdma_proto.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __TDMA_PROTO_H_
++#define __TDMA_PROTO_H_
++
++#include <rtdev.h>
++
++#include <rtmac/tdma/tdma.h>
++
++#define TDMA_FRM_VERSION 0x0201
++
++#define TDMA_FRM_SYNC 0x0000
++#define TDMA_FRM_REQ_CAL 0x0010
++#define TDMA_FRM_RPL_CAL 0x0011
++
++struct tdma_frm_head {
++	u16 version;
++	u16 id;
++} __attribute__((packed));
++
++#define SYNC_FRM(head) ((struct tdma_frm_sync *)(head))
++
++struct tdma_frm_sync {
++	struct tdma_frm_head head;
++	u32 cycle_no;
++	u64 xmit_stamp;
++	u64 sched_xmit_stamp;
++} __attribute__((packed));
++
++#define REQ_CAL_FRM(head) ((struct tdma_frm_req_cal *)(head))
++
++struct tdma_frm_req_cal {
++	struct tdma_frm_head head;
++	u64 xmit_stamp;
++	u32 reply_cycle;
++	u64 reply_slot_offset;
++} __attribute__((packed));
++
++#define RPL_CAL_FRM(head) ((struct tdma_frm_rpl_cal *)(head))
++
++struct tdma_frm_rpl_cal {
++	struct tdma_frm_head head;
++	u64 request_xmit_stamp;
++	u64 reception_stamp;
++	u64 xmit_stamp;
++} __attribute__((packed));
++
++void tdma_xmit_sync_frame(struct tdma_priv *tdma);
++int tdma_xmit_request_cal_frame(struct tdma_priv *tdma, u32 reply_cycle,
++				u64 reply_slot_offset);
++
++int tdma_rt_packet_tx(struct rtskb *rtskb, struct rtnet_device *rtdev);
++int tdma_nrt_packet_tx(struct rtskb *rtskb);
++
++int tdma_packet_rx(struct rtskb *rtskb);
++
++unsigned int tdma_get_mtu(struct rtnet_device *rtdev, unsigned int priority);
++
++#endif /* __TDMA_PROTO_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_disc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_disc.h	2021-04-07 16:01:26.741634861 +0800
+@@ -0,0 +1,95 @@
++/***
++ *
++ *  include/rtmac/rtmac_disc.h
++ *
++ *  rtmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTMAC_DISC_H_
++#define __RTMAC_DISC_H_
++
++#include <linux/list.h>
++#include <linux/netdevice.h>
++
++#include <rtdev.h>
++#include <rtnet_chrdev.h>
++
++#define RTMAC_NO_VNIC NULL
++#define RTMAC_DEFAULT_VNIC rtmac_vnic_xmit
++
++typedef int (*vnic_xmit_handler)(struct sk_buff *skb, struct net_device *dev);
++
++struct rtmac_priv {
++	int (*orig_start_xmit)(struct rtskb *skb, struct rtnet_device *dev);
++	struct net_device *vnic;
++	struct net_device_stats vnic_stats;
++	struct rtskb_pool vnic_skb_pool;
++	unsigned int vnic_max_mtu;
++
++	u8 disc_priv[0] __attribute__((aligned(16)));
++};
++
++struct rtmac_proc_entry {
++	const char *name;
++	int (*handler)(struct xnvfile_regular_iterator *it, void *data);
++	struct xnvfile_regular vfile;
++};
++
++struct rtmac_disc {
++	struct list_head list;
++
++	const char *name;
++	unsigned int priv_size; /* size of rtmac_priv.disc_priv */
++	u16 disc_type;
++
++	int (*packet_rx)(struct rtskb *skb);
++	/* rt_packet_tx prototype must be compatible with hard_start_xmit */
++	int (*rt_packet_tx)(struct rtskb *skb, struct rtnet_device *dev);
++	int (*nrt_packet_tx)(struct rtskb *skb);
++
++	unsigned int (*get_mtu)(struct rtnet_device *rtdev,
++				unsigned int priority);
++
++	vnic_xmit_handler vnic_xmit;
++
++	int (*attach)(struct rtnet_device *rtdev, void *disc_priv);
++	int (*detach)(struct rtnet_device *rtdev, void *disc_priv);
++
++	struct rtnet_ioctls ioctls;
++
++	struct rtmac_proc_entry *proc_entries;
++	unsigned nr_proc_entries;
++
++	struct module *owner;
++};
++
++int rtmac_disc_attach(struct rtnet_device *rtdev, struct rtmac_disc *disc);
++int rtmac_disc_detach(struct rtnet_device *rtdev);
++
++int __rtmac_disc_register(struct rtmac_disc *disc, struct module *module);
++#define rtmac_disc_register(disc) __rtmac_disc_register(disc, THIS_MODULE)
++
++void rtmac_disc_deregister(struct rtmac_disc *disc);
++
++#ifdef CONFIG_XENO_OPT_VFILE
++int rtnet_rtmac_disciplines_show(struct xnvfile_regular_iterator *it, void *d);
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++#endif /* __RTMAC_DISC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtmac/rtmac_proc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtmac/rtmac_proc.h	2021-04-07 16:01:26.737634866 +0800
+@@ -0,0 +1,34 @@
++/***
++ *
++ *  include/rtmac/rtmac_proc.h
++ *
++ *  rtmac - real-time networking medium access control subsystem
++ *  Copyright (C) 2002 Marc Kleine-Budde <kleine-budde@gmx.de>
++ *                2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTMAC_PROC_H_
++#define __RTMAC_PROC_H_
++
++int rtmac_disc_proc_register(struct rtmac_disc *disc);
++void rtmac_disc_proc_unregister(struct rtmac_disc *disc);
++
++int rtmac_proc_register(void);
++void rtmac_proc_release(void);
++
++#endif /* __RTMAC_PROC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtwlan.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtwlan.h	2021-04-07 16:01:26.732634874 +0800
+@@ -0,0 +1,263 @@
++/* rtwlan.h
++ *
++ * This file is a rtnet adaption from ieee80211/ieee80211.h used by the
++ * rt2x00-2.0.0-b3 sourceforge project
++ *
++ * Merged with mainline ieee80211.h in Aug 2004.  Original ieee802_11
++ * remains copyright by the original authors
++ *
++ * Portions of the merged code are based on Host AP (software wireless
++ * LAN access point) driver for Intersil Prism2/2.5/3.
++ *
++ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
++ * <jkmaline@cc.hut.fi>
++ * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
++ *
++ * Adaption to a generic IEEE 802.11 stack by James Ketrenos
++ * <jketreno@linux.intel.com>
++ * Copyright (c) 2004-2005, Intel Corporation
++ *
++ * Adaption to rtnet
++ * Copyright (c) 2006, Daniel Gregorek <dxg@gmx.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef RTWLAN_H
++#define RTWLAN_H
++
++#include <linux/if_ether.h> /* ETH_ALEN */
++#include <linux/kernel.h> /* ARRAY_SIZE */
++
++#include <rtskb.h>
++#include <rtwlan_io.h>
++
++#define IEEE80211_1ADDR_LEN 10
++#define IEEE80211_2ADDR_LEN 16
++#define IEEE80211_3ADDR_LEN 24
++#define IEEE80211_4ADDR_LEN 30
++#define IEEE80211_FCS_LEN 4
++#define IEEE80211_HLEN (IEEE80211_4ADDR_LEN)
++#define IEEE80211_FRAME_LEN (IEEE80211_DATA_LEN + IEEE80211_HLEN)
++
++#define MIN_FRAG_THRESHOLD 256U
++#define MAX_FRAG_THRESHOLD 2346U
++
++/* Frame control field constants */
++#define IEEE80211_FCTL_VERS 0x0003
++#define IEEE80211_FCTL_FTYPE 0x000c
++#define IEEE80211_FCTL_STYPE 0x00f0
++#define IEEE80211_FCTL_TODS 0x0100
++#define IEEE80211_FCTL_FROMDS 0x0200
++#define IEEE80211_FCTL_MOREFRAGS 0x0400
++#define IEEE80211_FCTL_RETRY 0x0800
++#define IEEE80211_FCTL_PM 0x1000
++#define IEEE80211_FCTL_MOREDATA 0x2000
++#define IEEE80211_FCTL_PROTECTED 0x4000
++#define IEEE80211_FCTL_ORDER 0x8000
++
++#define IEEE80211_FTYPE_MGMT 0x0000
++#define IEEE80211_FTYPE_CTL 0x0004
++#define IEEE80211_FTYPE_DATA 0x0008
++
++/* management */
++#define IEEE80211_STYPE_ASSOC_REQ 0x0000
++#define IEEE80211_STYPE_ASSOC_RESP 0x0010
++#define IEEE80211_STYPE_REASSOC_REQ 0x0020
++#define IEEE80211_STYPE_REASSOC_RESP 0x0030
++#define IEEE80211_STYPE_PROBE_REQ 0x0040
++#define IEEE80211_STYPE_PROBE_RESP 0x0050
++#define IEEE80211_STYPE_BEACON 0x0080
++#define IEEE80211_STYPE_ATIM 0x0090
++#define IEEE80211_STYPE_DISASSOC 0x00A0
++#define IEEE80211_STYPE_AUTH 0x00B0
++#define IEEE80211_STYPE_DEAUTH 0x00C0
++#define IEEE80211_STYPE_ACTION 0x00D0
++
++/* control */
++#define IEEE80211_STYPE_PSPOLL 0x00A0
++#define IEEE80211_STYPE_RTS 0x00B0
++#define IEEE80211_STYPE_CTS 0x00C0
++#define IEEE80211_STYPE_ACK 0x00D0
++#define IEEE80211_STYPE_CFEND 0x00E0
++#define IEEE80211_STYPE_CFENDACK 0x00F0
++
++/* data */
++#define IEEE80211_STYPE_DATA 0x0000
++#define IEEE80211_STYPE_DATA_CFACK 0x0010
++#define IEEE80211_STYPE_DATA_CFPOLL 0x0020
++#define IEEE80211_STYPE_DATA_CFACKPOLL 0x0030
++#define IEEE80211_STYPE_NULLFUNC 0x0040
++#define IEEE80211_STYPE_CFACK 0x0050
++#define IEEE80211_STYPE_CFPOLL 0x0060
++#define IEEE80211_STYPE_CFACKPOLL 0x0070
++#define IEEE80211_STYPE_QOS_DATA 0x0080
++
++#define RTWLAN_SCTL_SEQ 0xFFF0
++
++#define WLAN_FC_GET_VERS(fc) ((fc)&IEEE80211_FCTL_VERS)
++#define WLAN_FC_GET_TYPE(fc) ((fc)&IEEE80211_FCTL_FTYPE)
++#define WLAN_FC_GET_STYPE(fc) ((fc)&IEEE80211_FCTL_STYPE)
++
++#define IEEE80211_DSSS_RATE_1MB 0x02
++#define IEEE80211_DSSS_RATE_2MB 0x04
++#define IEEE80211_DSSS_RATE_5MB 0x0B
++#define IEEE80211_DSSS_RATE_11MB 0x16
++#define IEEE80211_OFDM_RATE_6MB 0x0C
++#define IEEE80211_OFDM_RATE_9MB 0x12
++#define IEEE80211_OFDM_RATE_12MB 0x18
++#define IEEE80211_OFDM_RATE_18MB 0x24
++#define IEEE80211_OFDM_RATE_24MB 0x30
++#define IEEE80211_OFDM_RATE_36MB 0x48
++#define IEEE80211_OFDM_RATE_48MB 0x60
++#define IEEE80211_OFDM_RATE_54MB 0x6C
++#define IEEE80211_BASIC_RATE_MASK 0x80
++
++#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
++#define MAC_ARG(x)                                                             \
++	((u8 *)(x))[0], ((u8 *)(x))[1], ((u8 *)(x))[2], ((u8 *)(x))[3],        \
++		((u8 *)(x))[4], ((u8 *)(x))[5]
++
++#ifdef CONFIG_RTWLAN_DEBUG
++#define RTWLAN_DEBUG_PRINTK(__message...)                                      \
++	do {                                                                   \
++		rtdm_printk(__message);                                        \
++	} while (0)
++#define RTWLAN_DEBUG(__message, __args...)                                     \
++	RTWLAN_DEBUG_PRINTK(KERN_DEBUG "rtwlan->%s: Debug - " __message,       \
++			    __FUNCTION__, ##__args);
++#else
++#define RTWLAN_DEBUG(__message...)                                             \
++	do {                                                                   \
++	} while (0)
++#endif
++
++struct rtwlan_stats {
++	unsigned long rx_packets; /* total packets received	*/
++	unsigned long tx_packets; /* total packets transmitted	*/
++	unsigned long tx_retry; /* total packets transmitted with retry */
++};
++
++struct rtwlan_device {
++	struct rtwlan_stats stats;
++
++	struct rtskb_pool skb_pool;
++
++	int mode;
++
++	int (*hard_start_xmit)(struct rtskb *rtskb,
++			       struct rtnet_device *rtnet_dev);
++
++	/* This must be the last item */
++	u8 priv[0];
++};
++
++/* Minimal header; can be used for passing 802.11 frames with sufficient
++ * information to determine what type of underlying data type is actually
++ * stored in the data. */
++struct ieee80211_hdr {
++	u16 frame_ctl;
++	u16 duration_id;
++	u8 payload[0];
++} __attribute__((packed));
++
++struct ieee80211_hdr_3addr {
++	u16 frame_ctl;
++	u16 duration_id;
++	u8 addr1[ETH_ALEN];
++	u8 addr2[ETH_ALEN];
++	u8 addr3[ETH_ALEN];
++	u16 seq_ctl;
++	u8 payload[0];
++} __attribute__((packed));
++
++static inline int ieee80211_get_hdrlen(u16 fc)
++{
++	int hdrlen = IEEE80211_3ADDR_LEN;
++	u16 stype = WLAN_FC_GET_STYPE(fc);
++
++	switch (WLAN_FC_GET_TYPE(fc)) {
++	case IEEE80211_FTYPE_DATA:
++		if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
++			hdrlen = IEEE80211_4ADDR_LEN;
++		if (stype & IEEE80211_STYPE_QOS_DATA)
++			hdrlen += 2;
++		break;
++
++	case IEEE80211_FTYPE_CTL:
++		switch (WLAN_FC_GET_STYPE(fc)) {
++		case IEEE80211_STYPE_CTS:
++		case IEEE80211_STYPE_ACK:
++			hdrlen = IEEE80211_1ADDR_LEN;
++			break;
++
++		default:
++			hdrlen = IEEE80211_2ADDR_LEN;
++			break;
++		}
++		break;
++	}
++
++	return hdrlen;
++}
++
++static inline int ieee80211_is_ofdm_rate(u8 rate)
++{
++	switch (rate & ~IEEE80211_BASIC_RATE_MASK) {
++	case IEEE80211_OFDM_RATE_6MB:
++	case IEEE80211_OFDM_RATE_9MB:
++	case IEEE80211_OFDM_RATE_12MB:
++	case IEEE80211_OFDM_RATE_18MB:
++	case IEEE80211_OFDM_RATE_24MB:
++	case IEEE80211_OFDM_RATE_36MB:
++	case IEEE80211_OFDM_RATE_48MB:
++	case IEEE80211_OFDM_RATE_54MB:
++		return 1;
++	}
++	return 0;
++}
++
++static inline int ieee80211_is_dsss_rate(u8 rate)
++{
++	switch (rate & ~IEEE80211_BASIC_RATE_MASK) {
++	case IEEE80211_DSSS_RATE_1MB:
++	case IEEE80211_DSSS_RATE_2MB:
++	case IEEE80211_DSSS_RATE_5MB:
++	case IEEE80211_DSSS_RATE_11MB:
++		return 1;
++	}
++	return 0;
++}
++
++static inline void *rtwlan_priv(struct rtwlan_device *rtwlan_dev)
++{
++	return (void *)rtwlan_dev + sizeof(struct rtwlan_device);
++}
++
++struct rtnet_device *rtwlan_alloc_dev(unsigned sizeof_priv,
++				      unsigned dev_pool_size);
++int rtwlan_rx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev);
++int rtwlan_tx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTWLAN
++int __init rtwlan_init(void);
++void rtwlan_exit(void);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTWLAN */
++#define rtwlan_init() 0
++#define rtwlan_exit()
++#endif /* CONFIG_XENO_DRIVERS_NET_RTWLAN */
++
++#endif
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_proc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_proc.h	2021-04-07 16:01:26.727634881 +0800
+@@ -0,0 +1,63 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_proc.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_PROC_H_
++#define __RTCFG_PROC_H_
++
++#include <rtnet_internal.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++extern struct mutex nrt_proc_lock;
++
++void rtcfg_update_conn_proc_entries(int ifindex);
++void rtcfg_remove_conn_proc_entries(int ifindex);
++
++int rtcfg_init_proc(void);
++void rtcfg_cleanup_proc(void);
++
++static inline void rtcfg_lockwr_proc(int ifindex)
++{
++	mutex_lock(&nrt_proc_lock);
++	rtcfg_remove_conn_proc_entries(ifindex);
++}
++
++static inline void rtcfg_unlockwr_proc(int ifindex)
++{
++	rtcfg_update_conn_proc_entries(ifindex);
++	mutex_unlock(&nrt_proc_lock);
++}
++
++#else
++
++#define rtcfg_lockwr_proc(x)                                                   \
++	do {                                                                   \
++	} while (0)
++#define rtcfg_unlockwr_proc(x)                                                 \
++	do {                                                                   \
++	} while (0)
++
++#endif /* CONFIG_XENO_OPT_VFILE */
++
++#endif /* __RTCFG_PROC_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_timer.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_timer.h	2021-04-07 16:01:26.723634886 +0800
+@@ -0,0 +1,34 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_timer.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_TIMER_H_
++#define __RTCFG_TIMER_H_
++
++void rtcfg_timer(rtdm_timer_t *t);
++
++void rtcfg_timer_run(void);
++
++void rtcfg_thread_signal(void);
++
++#endif /* __RTCFG_TIMER_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_file.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_file.h	2021-04-07 16:01:26.718634893 +0800
+@@ -0,0 +1,43 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_file.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_FILE_H_
++#define __RTCFG_FILE_H_
++
++#include <linux/list.h>
++#include <linux/types.h>
++
++struct rtcfg_file {
++	struct list_head entry;
++	int ref_count;
++	const char *name;
++	size_t size;
++	void *buffer;
++};
++
++struct rtcfg_file *rtcfg_get_file(const char *filename);
++void rtcfg_add_file(struct rtcfg_file *file);
++int rtcfg_release_file(struct rtcfg_file *file);
++
++#endif /* __RTCFG_FILE_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_conn_event.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_conn_event.h	2021-04-07 16:01:26.713634901 +0800
+@@ -0,0 +1,69 @@
++/***
++ *
++ *	include/rtcfg/rtcfg_conn_event.h
++ *
++ *	Real-Time Configuration Distribution Protocol
++ *
++ *	Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *	This program is free software; you can redistribute it and/or modify
++ *	it under the terms of the GNU General Public License as published by
++ *	the Free Software Foundation; either version 2 of the License, or
++ *	(at your option) any later version.
++ *
++ *	This program is distributed in the hope that it will be useful,
++ *	but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *	GNU General Public License for more details.
++ *
++ *	You should have received a copy of the GNU General Public License
++ *	along with this program; if not, write to the Free Software
++ *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_CONN_EVENT_H_
++#define __RTCFG_CONN_EVENT_H_
++
++#include <linux/netdevice.h>
++
++#include <rtcfg_chrdev.h>
++#include <rtcfg/rtcfg_file.h>
++#include <rtnet_internal.h>
++
++typedef enum {
++	RTCFG_CONN_SEARCHING,
++	RTCFG_CONN_STAGE_1,
++	RTCFG_CONN_STAGE_2,
++	RTCFG_CONN_READY,
++	RTCFG_CONN_DEAD
++} RTCFG_CONN_STATE;
++
++struct rtcfg_connection {
++	struct list_head entry;
++	int ifindex;
++	RTCFG_CONN_STATE state;
++	u8 mac_addr[MAX_ADDR_LEN];
++	unsigned int addr_type;
++	union {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		u32 ip_addr;
++#endif
++	} addr;
++	void *stage1_data;
++	size_t stage1_size;
++	struct rtcfg_file *stage2_file;
++	u32 cfg_offs;
++	unsigned int flags;
++	unsigned int burstrate;
++	nanosecs_abs_t last_frame;
++	u64 cfg_timeout;
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct xnvfile_regular proc_entry;
++#endif
++};
++
++int rtcfg_do_conn_event(struct rtcfg_connection *conn, RTCFG_EVENT event_id,
++			void *event_data);
++
++#endif /* __RTCFG_CONN_EVENT_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_client_event.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_client_event.h	2021-04-07 16:01:26.708634908 +0800
+@@ -0,0 +1,45 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_client_event.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003, 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_CLIENT_EVENT_H_
++#define __RTCFG_CLIENT_EVENT_H_
++
++#include <rtcfg_chrdev.h>
++
++int rtcfg_main_state_client_0(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data);
++int rtcfg_main_state_client_1(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data);
++int rtcfg_main_state_client_announced(int ifindex, RTCFG_EVENT event_id,
++				      void *event_data);
++int rtcfg_main_state_client_all_known(int ifindex, RTCFG_EVENT event_id,
++				      void *event_data);
++int rtcfg_main_state_client_all_frames(int ifindex, RTCFG_EVENT event_id,
++				       void *event_data);
++int rtcfg_main_state_client_2(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data);
++int rtcfg_main_state_client_ready(int ifindex, RTCFG_EVENT event_id,
++				  void *event_data);
++
++#endif /* __RTCFG_CLIENT_EVENT_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_event.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_event.h	2021-04-07 16:01:26.704634913 +0800
+@@ -0,0 +1,121 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_event.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_EVENT_H_
++#define __RTCFG_EVENT_H_
++
++#include <linux/if_ether.h>
++#include <linux/netdevice.h>
++
++#include <rtcfg_chrdev.h>
++#include <rtdev.h>
++#include <rtnet_internal.h>
++#include <rtnet_rtpc.h>
++
++#define FLAG_TIMER_STARTED 16
++#define FLAG_TIMER_SHUTDOWN 17
++#define FLAG_TIMER_PENDING 18
++
++#define _FLAG_TIMER_STARTED (1 << FLAG_TIMER_STARTED)
++#define _FLAG_TIMER_SHUTDOWN (1 << FLAG_TIMER_SHUTDOWN)
++#define _FLAG_TIMER_PENDING (1 << FLAG_TIMER_PENDING)
++
++typedef enum {
++	RTCFG_MAIN_OFF,
++	RTCFG_MAIN_SERVER_RUNNING,
++	RTCFG_MAIN_CLIENT_0,
++	RTCFG_MAIN_CLIENT_1,
++	RTCFG_MAIN_CLIENT_ANNOUNCED,
++	RTCFG_MAIN_CLIENT_ALL_KNOWN,
++	RTCFG_MAIN_CLIENT_ALL_FRAMES,
++	RTCFG_MAIN_CLIENT_2,
++	RTCFG_MAIN_CLIENT_READY
++} RTCFG_MAIN_STATE;
++
++struct rtcfg_station {
++	u8 mac_addr[ETH_ALEN]; /* Ethernet-specific! */
++	u8 flags;
++};
++
++struct rtcfg_device {
++	RTCFG_MAIN_STATE state;
++	u32 other_stations;
++	u32 stations_found;
++	u32 stations_ready;
++	rtdm_mutex_t dev_mutex;
++	struct list_head event_calls;
++	rtdm_lock_t event_calls_lock;
++	rtdm_timer_t timer;
++	unsigned long flags;
++	unsigned int burstrate;
++#ifdef CONFIG_XENO_OPT_VFILE
++	struct xnvfile_directory proc_entry;
++	struct xnvfile_regular proc_state_vfile;
++	struct xnvfile_regular proc_stations_vfile;
++#endif
++
++	union {
++		struct {
++			unsigned int addr_type;
++			union {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++				u32 ip_addr;
++#endif
++			} srv_addr;
++			u8 srv_mac_addr[MAX_ADDR_LEN];
++			u8 *stage2_buffer;
++			u32 cfg_len;
++			u32 cfg_offs;
++			unsigned int packet_counter;
++			u32 chain_len;
++			struct rtskb *stage2_chain;
++			u32 max_stations;
++			struct rtcfg_station *station_addr_list;
++		} clt;
++
++		struct {
++			u32 clients_configured;
++			struct list_head conn_list;
++			u16 heartbeat;
++			u64 heartbeat_timeout;
++		} srv;
++	} spec;
++};
++
++extern struct rtcfg_device device[MAX_RT_DEVICES];
++extern const char *rtcfg_event[];
++extern const char *rtcfg_main_state[];
++
++int rtcfg_do_main_event(int ifindex, RTCFG_EVENT event_id, void *event_data);
++void rtcfg_next_main_state(int ifindex, RTCFG_MAIN_STATE state);
++
++void rtcfg_queue_blocking_call(int ifindex, struct rt_proc_call *call);
++struct rt_proc_call *rtcfg_dequeue_blocking_call(int ifindex);
++void rtcfg_complete_cmd(int ifindex, RTCFG_EVENT event_id, int result);
++void rtcfg_reset_device(int ifindex);
++
++void rtcfg_init_state_machines(void);
++void rtcfg_cleanup_state_machines(void);
++
++#endif /* __RTCFG_EVENT_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_ioctl.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_ioctl.h	2021-04-07 16:01:26.699634921 +0800
+@@ -0,0 +1,33 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_ioctl.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003, 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_IOCTL_H_
++#define __RTCFG_IOCTL_H_
++
++extern struct rtnet_ioctls rtcfg_ioctls;
++
++#define rtcfg_init_ioctls() rtnet_register_ioctls(&rtcfg_ioctls)
++#define rtcfg_cleanup_ioctls() rtnet_unregister_ioctls(&rtcfg_ioctls)
++
++#endif /* __RTCFG_IOCTL_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg.h	2021-04-07 16:01:26.694634928 +0800
+@@ -0,0 +1,47 @@
++/***
++ *
++ *  include/rtcfg/rtcfg.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_H_INTERNAL_
++#define __RTCFG_H_INTERNAL_
++
++#include <rtdm/driver.h>
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++/***
++ * RTcfg debugging
++ */
++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG
++
++extern int rtcfg_debug;
++
++/* use 0 for production, 1 for verification, >2 for debug */
++#define RTCFG_DEFAULT_DEBUG_LEVEL 10
++
++#define RTCFG_DEBUG(n, args...) (rtcfg_debug >= (n)) ? (rtdm_printk(args)) : 0
++#else
++#define RTCFG_DEBUG(n, args...)
++#endif /* CONFIG_RTCFG_DEBUG */
++
++#endif /* __RTCFG_H_INTERNAL_ */
+--- linux/drivers/xenomai/net/stack/include/rtcfg/rtcfg_frame.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg/rtcfg_frame.h	2021-04-07 16:01:26.690634934 +0800
+@@ -0,0 +1,139 @@
++/***
++ *
++ *  include/rtcfg/rtcfg_frame.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_FRAME_H_
++#define __RTCFG_FRAME_H_
++
++#include <linux/init.h>
++#include <linux/if_packet.h>
++#include <asm/byteorder.h>
++
++#include <rtcfg/rtcfg_event.h>
++
++#define ETH_RTCFG 0x9022
++
++#define RTCFG_SKB_PRIO                                                         \
++	RTSKB_PRIO_VALUE(QUEUE_MIN_PRIO - 1, RTSKB_DEF_NRT_CHANNEL)
++
++#define RTCFG_ID_STAGE_1_CFG 0
++#define RTCFG_ID_ANNOUNCE_NEW 1
++#define RTCFG_ID_ANNOUNCE_REPLY 2
++#define RTCFG_ID_STAGE_2_CFG 3
++#define RTCFG_ID_STAGE_2_CFG_FRAG 4
++#define RTCFG_ID_ACK_CFG 5
++#define RTCFG_ID_READY 6
++#define RTCFG_ID_HEARTBEAT 7
++#define RTCFG_ID_DEAD_STATION 8
++
++#define RTCFG_ADDRSIZE_MAC 0
++#define RTCFG_ADDRSIZE_IP 4
++#define RTCFG_MAX_ADDRSIZE RTCFG_ADDRSIZE_IP
++
++#define RTCFG_FLAG_STAGE_2_DATA 0
++#define RTCFG_FLAG_READY 1
++
++#define _RTCFG_FLAG_STAGE_2_DATA (1 << RTCFG_FLAG_STAGE_2_DATA)
++#define _RTCFG_FLAG_READY (1 << RTCFG_FLAG_READY)
++
++struct rtcfg_frm_head {
++#if defined(__LITTLE_ENDIAN_BITFIELD)
++	u8 id : 5;
++	u8 version : 3;
++#elif defined(__BIG_ENDIAN_BITFIELD)
++	u8 version : 3;
++	u8 id : 5;
++#else
++#error unsupported byte order
++#endif
++} __attribute__((packed));
++
++struct rtcfg_frm_stage_1_cfg {
++	struct rtcfg_frm_head head;
++	u8 addr_type;
++	u8 client_addr[0];
++	u8 server_addr[0];
++	u8 burstrate;
++	u16 cfg_len;
++	u8 cfg_data[0];
++} __attribute__((packed));
++
++struct rtcfg_frm_announce {
++	struct rtcfg_frm_head head;
++	u8 addr_type;
++	u8 addr[0];
++	u8 flags;
++	u8 burstrate;
++} __attribute__((packed));
++
++struct rtcfg_frm_stage_2_cfg {
++	struct rtcfg_frm_head head;
++	u8 flags;
++	u32 stations;
++	u16 heartbeat_period;
++	u32 cfg_len;
++	u8 cfg_data[0];
++} __attribute__((packed));
++
++struct rtcfg_frm_stage_2_cfg_frag {
++	struct rtcfg_frm_head head;
++	u32 frag_offs;
++	u8 cfg_data[0];
++} __attribute__((packed));
++
++struct rtcfg_frm_ack_cfg {
++	struct rtcfg_frm_head head;
++	u32 ack_len;
++} __attribute__((packed));
++
++struct rtcfg_frm_simple {
++	struct rtcfg_frm_head head;
++} __attribute__((packed));
++
++struct rtcfg_frm_dead_station {
++	struct rtcfg_frm_head head;
++	u8 addr_type;
++	u8 logical_addr[0];
++	u8 physical_addr[32];
++} __attribute__((packed));
++
++int rtcfg_send_stage_1(struct rtcfg_connection *conn);
++int rtcfg_send_stage_2(struct rtcfg_connection *conn, int send_data);
++int rtcfg_send_stage_2_frag(struct rtcfg_connection *conn);
++int rtcfg_send_announce_new(int ifindex);
++int rtcfg_send_announce_reply(int ifindex, u8 *dest_mac_addr);
++int rtcfg_send_ack(int ifindex);
++int rtcfg_send_dead_station(struct rtcfg_connection *conn);
++
++int rtcfg_send_simple_frame(int ifindex, int frame_id, u8 *dest_addr);
++
++#define rtcfg_send_ready(ifindex)                                              \
++	rtcfg_send_simple_frame(ifindex, RTCFG_ID_READY, NULL)
++#define rtcfg_send_heartbeat(ifindex)                                          \
++	rtcfg_send_simple_frame(ifindex, RTCFG_ID_HEARTBEAT,                   \
++				device[ifindex].spec.clt.srv_mac_addr)
++
++int __init rtcfg_init_frames(void);
++void rtcfg_cleanup_frames(void);
++
++#endif /* __RTCFG_FRAME_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtnet_rtpc.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_rtpc.h	2021-04-07 16:01:26.685634941 +0800
+@@ -0,0 +1,71 @@
++/***
++ *
++ *  include/rtnet_rtpc.h
++ *
++ *  RTnet - real-time networking subsystem
++ *
++ *  Copyright (C) 2003, 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTNET_RTPC_H_
++#define __RTNET_RTPC_H_
++
++#include <linux/init.h>
++
++#include <rtnet_internal.h>
++
++struct rt_proc_call;
++
++typedef int (*rtpc_proc)(struct rt_proc_call *call);
++typedef void (*rtpc_copy_back_proc)(struct rt_proc_call *call, void *priv_data);
++typedef void (*rtpc_cleanup_proc)(void *priv_data);
++
++struct rt_proc_call {
++	struct list_head list_entry;
++	int processed;
++	rtpc_proc proc;
++	int result;
++	atomic_t ref_count;
++	wait_queue_head_t call_wq;
++	rtpc_cleanup_proc cleanup_handler;
++	char priv_data[0] __attribute__((aligned(8)));
++};
++
++#define CALL_PENDING 1000 /* result value for blocked calls */
++
++int rtnet_rtpc_dispatch_call(rtpc_proc rt_proc, unsigned int timeout,
++			     void *priv_data, size_t priv_data_size,
++			     rtpc_copy_back_proc copy_back_handler,
++			     rtpc_cleanup_proc cleanup_handler);
++
++void rtnet_rtpc_complete_call(struct rt_proc_call *call, int result);
++void rtnet_rtpc_complete_call_nrt(struct rt_proc_call *call, int result);
++
++#define rtpc_dispatch_call rtnet_rtpc_dispatch_call
++#define rtpc_complete_call rtnet_rtpc_complete_call
++#define rtpc_complete_call_nrt rtnet_rtpc_complete_call_nrt
++
++#define rtpc_get_priv(call, type) (type *)(call->priv_data)
++#define rtpc_get_result(call) call->result
++#define rtpc_set_result(call, new_result) call->result = new_result
++#define rtpc_set_cleanup_handler(call, handler) call->cleanup_handler = handler;
++
++int __init rtpc_init(void);
++void rtpc_cleanup(void);
++
++#endif /* __RTNET_RTPC_H_ */
+--- linux/drivers/xenomai/net/stack/include/nomac_chrdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/nomac_chrdev.h	2021-04-07 16:01:26.680634948 +0800
+@@ -0,0 +1,39 @@
++/***
++ *
++ *  include/nomac_chrdev.h
++ *
++ *  RTmac - real-time networking media access control subsystem
++ *  Copyright (C) 2002       Marc Kleine-Budde <kleine-budde@gmx.de>,
++ *                2003, 2004 Jan Kiszka <Jan.Kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __NOMAC_CHRDEV_H_
++#define __NOMAC_CHRDEV_H_
++
++#include <rtnet_chrdev.h>
++
++struct nomac_config {
++	struct rtnet_ioctl_head head;
++};
++
++#define NOMAC_IOC_ATTACH                                                       \
++	_IOW(RTNET_IOC_TYPE_RTMAC_NOMAC, 0, struct nomac_config)
++#define NOMAC_IOC_DETACH                                                       \
++	_IOW(RTNET_IOC_TYPE_RTMAC_NOMAC, 1, struct nomac_config)
++
++#endif /* __NOMAC_CHRDEV_H_ */
+--- linux/drivers/xenomai/net/stack/include/ipv4_chrdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/ipv4_chrdev.h	2021-04-07 16:01:26.676634953 +0800
+@@ -0,0 +1,94 @@
++/***
++ *
++ *  include/ipv4.h
++ *
++ *  Real-Time IP/UDP/ICMP stack
++ *
++ *  Copyright (C) 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __IPV4_H_
++#define __RTCFG_H_
++
++#include <rtnet_chrdev.h>
++
++struct ipv4_cmd {
++	struct rtnet_ioctl_head head;
++
++	union {
++		/*** rtroute ***/
++		struct {
++			__u32 ip_addr;
++		} solicit;
++
++		struct {
++			__u8 dev_addr[DEV_ADDR_LEN];
++			__u32 ip_addr;
++		} gethost;
++
++		struct {
++			__u8 dev_addr[DEV_ADDR_LEN];
++			__u32 ip_addr;
++		} addhost;
++
++		struct {
++			__u32 ip_addr;
++		} delhost;
++
++		struct {
++			__u32 net_addr;
++			__u32 net_mask;
++			__u32 gw_addr;
++		} addnet;
++
++		struct {
++			__u32 net_addr;
++			__u32 net_mask;
++		} delnet;
++
++		/*** rtping ***/
++		struct {
++			__u32 ip_addr;
++			__u16 id;
++			__u16 sequence;
++			__u32 msg_size;
++			__u32 timeout;
++			__s64 rtt;
++		} ping;
++
++		__u64 __padding[8];
++	} args;
++};
++
++#define IOC_RT_HOST_ROUTE_ADD _IOW(RTNET_IOC_TYPE_IPV4, 0, struct ipv4_cmd)
++#define IOC_RT_HOST_ROUTE_SOLICIT _IOW(RTNET_IOC_TYPE_IPV4, 1, struct ipv4_cmd)
++#define IOC_RT_HOST_ROUTE_DELETE                                               \
++	_IOW(RTNET_IOC_TYPE_IPV4, 2 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd)
++#define IOC_RT_NET_ROUTE_ADD                                                   \
++	_IOW(RTNET_IOC_TYPE_IPV4, 3 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd)
++#define IOC_RT_NET_ROUTE_DELETE                                                \
++	_IOW(RTNET_IOC_TYPE_IPV4, 4 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd)
++#define IOC_RT_PING                                                            \
++	_IOWR(RTNET_IOC_TYPE_IPV4, 5 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd)
++#define IOC_RT_HOST_ROUTE_DELETE_DEV                                           \
++	_IOW(RTNET_IOC_TYPE_IPV4, 6, struct ipv4_cmd)
++#define IOC_RT_HOST_ROUTE_GET                                                  \
++	_IOWR(RTNET_IOC_TYPE_IPV4, 7 | RTNET_IOC_NODEV_PARAM, struct ipv4_cmd)
++#define IOC_RT_HOST_ROUTE_GET_DEV _IOWR(RTNET_IOC_TYPE_IPV4, 8, struct ipv4_cmd)
++
++#endif /* __IPV4_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtnet_port.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtnet_port.h	2021-04-07 16:01:26.671634961 +0800
+@@ -0,0 +1,113 @@
++/* include/rtnet_port.h
++ *
++ * RTnet - real-time networking subsystem
++ * Copyright (C) 2003      Wittawat Yamwong
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++#ifndef __RTNET_PORT_H_
++#define __RTNET_PORT_H_
++
++#ifdef __KERNEL__
++
++#include <linux/bitops.h>
++#include <linux/moduleparam.h>
++#include <linux/list.h>
++#include <linux/netdevice.h>
++#include <linux/vmalloc.h>
++#include <linux/bitops.h>
++
++#include <rtdev.h>
++#include <rtdev_mgr.h>
++#include <rtdm/driver.h>
++#include <stack_mgr.h>
++#include <ethernet/eth.h>
++
++static inline void rtnetif_start_queue(struct rtnet_device *rtdev)
++{
++	clear_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state);
++}
++
++static inline void rtnetif_wake_queue(struct rtnet_device *rtdev)
++{
++	if (test_and_clear_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state))
++		/*TODO __netif_schedule(dev); */;
++}
++
++static inline void rtnetif_stop_queue(struct rtnet_device *rtdev)
++{
++	set_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state);
++}
++
++static inline int rtnetif_queue_stopped(struct rtnet_device *rtdev)
++{
++	return test_bit(__RTNET_LINK_STATE_XOFF, &rtdev->link_state);
++}
++
++static inline int rtnetif_running(struct rtnet_device *rtdev)
++{
++	return test_bit(__RTNET_LINK_STATE_START, &rtdev->link_state);
++}
++
++static inline int rtnetif_device_present(struct rtnet_device *rtdev)
++{
++	return test_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state);
++}
++
++static inline void rtnetif_device_detach(struct rtnet_device *rtdev)
++{
++	if (test_and_clear_bit(__RTNET_LINK_STATE_PRESENT,
++			       &rtdev->link_state) &&
++	    rtnetif_running(rtdev)) {
++		rtnetif_stop_queue(rtdev);
++	}
++}
++
++static inline void rtnetif_device_attach(struct rtnet_device *rtdev)
++{
++	if (!test_and_set_bit(__RTNET_LINK_STATE_PRESENT, &rtdev->link_state) &&
++	    rtnetif_running(rtdev)) {
++		rtnetif_wake_queue(rtdev);
++		/* __netdev_watchdog_up(rtdev); */
++	}
++}
++
++static inline void rtnetif_carrier_on(struct rtnet_device *rtdev)
++{
++	clear_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state);
++	/*
++    if (netif_running(dev))
++	__netdev_watchdog_up(dev);
++    */
++}
++
++static inline void rtnetif_carrier_off(struct rtnet_device *rtdev)
++{
++	set_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state);
++}
++
++static inline int rtnetif_carrier_ok(struct rtnet_device *rtdev)
++{
++	return !test_bit(__RTNET_LINK_STATE_NOCARRIER, &rtdev->link_state);
++}
++
++#define NIPQUAD(addr)                                                          \
++	((unsigned char *)&addr)[0], ((unsigned char *)&addr)[1],              \
++		((unsigned char *)&addr)[2], ((unsigned char *)&addr)[3]
++#define NIPQUAD_FMT "%u.%u.%u.%u"
++
++#endif /* __KERNEL__ */
++
++#endif /* __RTNET_PORT_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtskb_fifo.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtskb_fifo.h	2021-04-07 16:01:26.666634968 +0800
+@@ -0,0 +1,144 @@
++/***
++ *
++ *  include/rtskb_fifo.h
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 2006 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTSKB_FIFO_H_
++#define __RTSKB_FIFO_H_
++
++#include <rtskb.h>
++
++struct rtskb_fifo {
++	unsigned long read_pos ____cacheline_aligned_in_smp;
++	rtdm_lock_t read_lock;
++	unsigned long size_mask;
++	unsigned long write_pos ____cacheline_aligned_in_smp;
++	rtdm_lock_t write_lock;
++	struct rtskb *buffer[0];
++};
++
++#define DECLARE_RTSKB_FIFO(name_prefix, size)                                  \
++	struct {                                                               \
++		struct rtskb_fifo fifo;                                        \
++		struct rtskb *__buffer[(size)];                                \
++	} name_prefix
++
++static inline int __rtskb_fifo_insert(struct rtskb_fifo *fifo,
++				      struct rtskb *rtskb)
++{
++	unsigned long pos = fifo->write_pos;
++	unsigned long new_pos = (pos + 1) & fifo->size_mask;
++
++	if (unlikely(new_pos == fifo->read_pos))
++		return -EAGAIN;
++
++	fifo->buffer[pos] = rtskb;
++
++	/* rtskb must have been written before write_pos update */
++	smp_wmb();
++
++	fifo->write_pos = new_pos;
++
++	return 0;
++}
++
++static inline int rtskb_fifo_insert(struct rtskb_fifo *fifo,
++				    struct rtskb *rtskb)
++{
++	rtdm_lockctx_t context;
++	int result;
++
++	rtdm_lock_get_irqsave(&fifo->write_lock, context);
++	result = __rtskb_fifo_insert(fifo, rtskb);
++	rtdm_lock_put_irqrestore(&fifo->write_lock, context);
++
++	return result;
++}
++
++static inline int rtskb_fifo_insert_inirq(struct rtskb_fifo *fifo,
++					  struct rtskb *rtskb)
++{
++	int result;
++
++	rtdm_lock_get(&fifo->write_lock);
++	result = __rtskb_fifo_insert(fifo, rtskb);
++	rtdm_lock_put(&fifo->write_lock);
++
++	return result;
++}
++
++static inline struct rtskb *__rtskb_fifo_remove(struct rtskb_fifo *fifo)
++{
++	unsigned long pos = fifo->read_pos;
++	struct rtskb *result;
++
++	/* check FIFO status first */
++	if (unlikely(pos == fifo->write_pos))
++		return NULL;
++
++	/* at least one rtskb is enqueued, so get the next one */
++	result = fifo->buffer[pos];
++
++	/* result must have been read before read_pos update */
++	smp_rmb();
++
++	fifo->read_pos = (pos + 1) & fifo->size_mask;
++
++	/* read_pos must have been written for a consitent fifo state on exit */
++	smp_wmb();
++
++	return result;
++}
++
++static inline struct rtskb *rtskb_fifo_remove(struct rtskb_fifo *fifo)
++{
++	rtdm_lockctx_t context;
++	struct rtskb *result;
++
++	rtdm_lock_get_irqsave(&fifo->read_lock, context);
++	result = __rtskb_fifo_remove(fifo);
++	rtdm_lock_put_irqrestore(&fifo->read_lock, context);
++
++	return result;
++}
++
++static inline struct rtskb *rtskb_fifo_remove_inirq(struct rtskb_fifo *fifo)
++{
++	struct rtskb *result;
++
++	rtdm_lock_get(&fifo->read_lock);
++	result = __rtskb_fifo_remove(fifo);
++	rtdm_lock_put(&fifo->read_lock);
++
++	return result;
++}
++
++/* for now inlined... */
++static inline void rtskb_fifo_init(struct rtskb_fifo *fifo, unsigned long size)
++{
++	fifo->read_pos = 0;
++	fifo->write_pos = 0;
++	fifo->size_mask = size - 1;
++	rtdm_lock_init(&fifo->read_lock);
++	rtdm_lock_init(&fifo->write_lock);
++}
++
++#endif /* __RTSKB_FIFO_H_ */
+--- linux/drivers/xenomai/net/stack/include/rtwlan_io.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtwlan_io.h	2021-04-07 16:01:26.662634974 +0800
+@@ -0,0 +1,104 @@
++/* rtwlan_io.h
++ *
++ * Copyright (C) 2006      Daniel Gregorek <dxg@gmx.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef RTWLAN_IO
++#define RTWLAN_IO
++
++#include <rtnet_chrdev.h>
++
++#define RTWLAN_TXMODE_RAW 0
++#define RTWLAN_TXMODE_ACK 1
++#define RTWLAN_TXMODE_MCAST 2
++
++#define ENORTWLANDEV 0xff08
++
++struct rtwlan_cmd {
++	struct rtnet_ioctl_head head;
++
++	union {
++		struct {
++			unsigned int bitrate;
++			unsigned int channel;
++			unsigned int retry;
++			unsigned int txpower;
++			unsigned int mode;
++			unsigned int autoresponder;
++			unsigned int dropbcast;
++			unsigned int dropmcast;
++			unsigned int bbpsens;
++		} set;
++
++		struct {
++			unsigned int address;
++			unsigned int value;
++		} reg;
++
++		struct {
++			int ifindex;
++			unsigned int flags;
++			unsigned int bitrate;
++			unsigned int channel;
++			unsigned int retry;
++			unsigned int txpower;
++			unsigned int bbpsens;
++			unsigned int mode;
++			unsigned int autoresponder;
++			unsigned int dropbcast;
++			unsigned int dropmcast;
++			unsigned int rx_packets;
++			unsigned int tx_packets;
++			unsigned int tx_retry;
++		} info;
++	} args;
++};
++
++#define RTNET_IOC_TYPE_RTWLAN 8
++
++#define IOC_RTWLAN_IFINFO                                                      \
++	_IOWR(RTNET_IOC_TYPE_RTWLAN, 0 | RTNET_IOC_NODEV_PARAM,                \
++	      struct rtwlan_cmd)
++
++#define IOC_RTWLAN_BITRATE _IOWR(RTNET_IOC_TYPE_RTWLAN, 1, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_CHANNEL _IOWR(RTNET_IOC_TYPE_RTWLAN, 2, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_TXPOWER _IOWR(RTNET_IOC_TYPE_RTWLAN, 3, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_RETRY _IOWR(RTNET_IOC_TYPE_RTWLAN, 4, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_TXMODE _IOWR(RTNET_IOC_TYPE_RTWLAN, 5, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_DROPBCAST _IOWR(RTNET_IOC_TYPE_RTWLAN, 6, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_DROPMCAST _IOWR(RTNET_IOC_TYPE_RTWLAN, 7, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_REGREAD _IOWR(RTNET_IOC_TYPE_RTWLAN, 8, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_REGWRITE _IOWR(RTNET_IOC_TYPE_RTWLAN, 9, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_BBPWRITE _IOWR(RTNET_IOC_TYPE_RTWLAN, 10, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_BBPREAD _IOWR(RTNET_IOC_TYPE_RTWLAN, 11, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_BBPSENS _IOWR(RTNET_IOC_TYPE_RTWLAN, 12, struct rtwlan_cmd)
++
++#define IOC_RTWLAN_AUTORESP _IOWR(RTNET_IOC_TYPE_RTWLAN, 13, struct rtwlan_cmd)
++
++#endif
+--- linux/drivers/xenomai/net/stack/include/rtcfg_chrdev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/include/rtcfg_chrdev.h	2021-04-07 16:01:26.657634981 +0800
+@@ -0,0 +1,176 @@
++/***
++ *
++ *  include/rtcfg.h
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2004, 2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#ifndef __RTCFG_H_
++#define __RTCFG_H_
++
++#include <rtnet_chrdev.h>
++
++#define ERTCFG_START 0x0F00
++#define ESTAGE1SIZE ERTCFG_START
++
++#define FLAG_STAGE_2_DATA 0x0001
++#define FLAG_READY 0x0002
++#define FLAG_ASSIGN_ADDR_BY_MAC 0x0100
++
++#define RTCFG_ADDR_MAC 0x00
++#define RTCFG_ADDR_IP 0x01
++#define RTCFG_ADDR_MASK 0xFF
++
++typedef enum {
++	RTCFG_CMD_SERVER,
++	RTCFG_CMD_ADD,
++	RTCFG_CMD_DEL,
++	RTCFG_CMD_WAIT,
++	RTCFG_CMD_CLIENT,
++	RTCFG_CMD_ANNOUNCE,
++	RTCFG_CMD_READY,
++	RTCFG_CMD_DETACH,
++
++	/* internal usage only */
++	RTCFG_TIMER,
++	RTCFG_FRM_STAGE_1_CFG,
++	RTCFG_FRM_ANNOUNCE_NEW,
++	RTCFG_FRM_ANNOUNCE_REPLY,
++	RTCFG_FRM_STAGE_2_CFG,
++	RTCFG_FRM_STAGE_2_CFG_FRAG,
++	RTCFG_FRM_ACK_CFG,
++	RTCFG_FRM_READY,
++	RTCFG_FRM_HEARTBEAT,
++	RTCFG_FRM_DEAD_STATION
++} RTCFG_EVENT;
++
++struct rtskb;
++struct rtcfg_station;
++struct rtcfg_connection;
++struct rtcfg_file;
++
++struct rtcfg_cmd {
++	struct rtnet_ioctl_head head;
++
++	union {
++		struct {
++			__u32 period;
++			__u32 burstrate;
++			__u32 heartbeat;
++			__u32 threshold;
++			__u32 flags;
++		} server;
++
++		struct {
++			__u32 addr_type;
++			__u32 ip_addr;
++			__u8 mac_addr[DEV_ADDR_LEN];
++			__u32 timeout;
++			__u16 stage1_size;
++			__u16 __padding;
++			void *stage1_data;
++			const char *stage2_filename;
++
++			/* internal usage only */
++			struct rtcfg_connection *conn_buf;
++			struct rtcfg_file *stage2_file;
++		} add;
++
++		struct {
++			__u32 addr_type;
++			__u32 ip_addr;
++			__u8 mac_addr[DEV_ADDR_LEN];
++
++			/* internal usage only */
++			struct rtcfg_connection *conn_buf;
++			struct rtcfg_file *stage2_file;
++		} del;
++
++		struct {
++			__u32 timeout;
++		} wait;
++
++		struct {
++			__u32 timeout;
++			__u32 max_stations;
++			__u64 buffer_size;
++			void *buffer;
++
++			/* internal usage only */
++			struct rtcfg_station *station_buf;
++			struct rtskb *rtskb;
++		} client;
++
++		struct {
++			__u32 timeout;
++			__u32 flags;
++			__u32 burstrate;
++			__u32 __padding;
++			__u64 buffer_size;
++			void *buffer;
++
++			/* internal usage only */
++			struct rtskb *rtskb;
++		} announce;
++
++		struct {
++			__u32 timeout;
++		} ready;
++
++		struct {
++			/* internal usage only */
++			struct rtcfg_connection *conn_buf;
++			struct rtcfg_file *stage2_file;
++			struct rtcfg_station *station_addr_list;
++			struct rtskb *stage2_chain;
++		} detach;
++
++		__u64 __padding[16];
++	} args;
++
++	/* internal usage only */
++	union {
++		struct {
++			int ifindex;
++			RTCFG_EVENT event_id;
++		} data;
++
++		__u64 __padding[2];
++	} internal;
++};
++
++#define RTCFG_IOC_SERVER                                                       \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_SERVER, struct rtcfg_cmd)
++#define RTCFG_IOC_ADD                                                          \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_ADD, struct rtcfg_cmd)
++#define RTCFG_IOC_DEL                                                          \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_DEL, struct rtcfg_cmd)
++#define RTCFG_IOC_WAIT                                                         \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_WAIT, struct rtcfg_cmd)
++#define RTCFG_IOC_CLIENT                                                       \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_CLIENT, struct rtcfg_cmd)
++#define RTCFG_IOC_ANNOUNCE                                                     \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_ANNOUNCE, struct rtcfg_cmd)
++#define RTCFG_IOC_READY                                                        \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_READY, struct rtcfg_cmd)
++#define RTCFG_IOC_DETACH                                                       \
++	_IOW(RTNET_IOC_TYPE_RTCFG, RTCFG_CMD_DETACH, struct rtcfg_cmd)
++
++#endif /* __RTCFG_H_ */
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_proc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_proc.c	2021-04-07 16:01:26.652634988 +0800
+@@ -0,0 +1,347 @@
++/***
++ *
++ *	rtcfg/rtcfg_proc.c
++ *
++ *	Real-Time Configuration Distribution Protocol
++ *
++ *	Copyright (C) 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *	This program is free software; you can redistribute it and/or modify
++ *	it under the terms of the GNU General Public License as published by
++ *	the Free Software Foundation; either version 2 of the License, or
++ *	(at your option) any later version.
++ *
++ *	This program is distributed in the hope that it will be useful,
++ *	but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *	GNU General Public License for more details.
++ *
++ *	You should have received a copy of the GNU General Public License
++ *	along with this program; if not, write to the Free Software
++ *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <rtdev.h>
++#include <rtnet_internal.h>
++#include <rtnet_port.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++
++#ifdef CONFIG_XENO_OPT_VFILE
++DEFINE_MUTEX(nrt_proc_lock);
++static struct xnvfile_directory rtcfg_proc_root;
++
++static int rtnet_rtcfg_proc_lock_get(struct xnvfile *vfile)
++{
++	return mutex_lock_interruptible(&nrt_proc_lock);
++}
++
++static void rtnet_rtcfg_proc_lock_put(struct xnvfile *vfile)
++{
++	return mutex_unlock(&nrt_proc_lock);
++}
++
++static struct xnvfile_lock_ops rtnet_rtcfg_proc_lock_ops = {
++	.get = rtnet_rtcfg_proc_lock_get,
++	.put = rtnet_rtcfg_proc_lock_put,
++};
++
++int rtnet_rtcfg_dev_state_show(struct xnvfile_regular_iterator *it, void *data)
++{
++	struct rtcfg_device *rtcfg_dev = xnvfile_priv(it->vfile);
++	const char *state_name[] = { "OFF",
++				     "SERVER_RUNNING",
++				     "CLIENT_0",
++				     "CLIENT_1",
++				     "CLIENT_ANNOUNCED",
++				     "CLIENT_ALL_KNOWN",
++				     "CLIENT_ALL_FRAMES",
++				     "CLIENT_2",
++				     "CLIENT_READY" };
++
++	xnvfile_printf(it,
++		       "state:\t\t\t%d (%s)\n"
++		       "flags:\t\t\t%08lX\n"
++		       "other stations:\t\t%d\n"
++		       "stations found:\t\t%d\n"
++		       "stations ready:\t\t%d\n",
++		       rtcfg_dev->state, state_name[rtcfg_dev->state],
++		       rtcfg_dev->flags, rtcfg_dev->other_stations,
++		       rtcfg_dev->stations_found, rtcfg_dev->stations_ready);
++
++	if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) {
++		xnvfile_printf(it,
++			       "configured clients:\t%d\n"
++			       "burstrate:\t\t%d\n"
++			       "heartbeat period:\t%d ms\n",
++			       rtcfg_dev->spec.srv.clients_configured,
++			       rtcfg_dev->burstrate,
++			       rtcfg_dev->spec.srv.heartbeat);
++	} else if (rtcfg_dev->state != RTCFG_MAIN_OFF) {
++		xnvfile_printf(
++			it,
++			"address type:\t\t%d\n"
++			"server address:\t\t%02X:%02X:%02X:%02X:%02X:%02X\n"
++			"stage 2 config:\t\t%d/%d\n",
++			rtcfg_dev->spec.clt.addr_type,
++			rtcfg_dev->spec.clt.srv_mac_addr[0],
++			rtcfg_dev->spec.clt.srv_mac_addr[1],
++			rtcfg_dev->spec.clt.srv_mac_addr[2],
++			rtcfg_dev->spec.clt.srv_mac_addr[3],
++			rtcfg_dev->spec.clt.srv_mac_addr[4],
++			rtcfg_dev->spec.clt.srv_mac_addr[5],
++			rtcfg_dev->spec.clt.cfg_offs,
++			rtcfg_dev->spec.clt.cfg_len);
++	}
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_rtcfg_dev_state_vfile_ops = {
++	.show = rtnet_rtcfg_dev_state_show,
++};
++
++int rtnet_rtcfg_dev_stations_show(struct xnvfile_regular_iterator *it, void *d)
++{
++	struct rtcfg_device *rtcfg_dev = xnvfile_priv(it->vfile);
++	struct rtcfg_connection *conn;
++	struct rtcfg_station *station;
++	int i;
++
++	if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) {
++		list_for_each_entry (conn, &rtcfg_dev->spec.srv.conn_list,
++				     entry) {
++			if ((conn->state != RTCFG_CONN_SEARCHING) &&
++			    (conn->state != RTCFG_CONN_DEAD))
++				xnvfile_printf(
++					it,
++					"%02X:%02X:%02X:%02X:%02X:%02X\t%02X\n",
++					conn->mac_addr[0], conn->mac_addr[1],
++					conn->mac_addr[2], conn->mac_addr[3],
++					conn->mac_addr[4], conn->mac_addr[5],
++					conn->flags);
++		}
++	} else if (rtcfg_dev->spec.clt.station_addr_list) {
++		for (i = 0; i < rtcfg_dev->stations_found; i++) {
++			station = &rtcfg_dev->spec.clt.station_addr_list[i];
++
++			xnvfile_printf(
++				it, "%02X:%02X:%02X:%02X:%02X:%02X\t%02X\n",
++				station->mac_addr[0], station->mac_addr[1],
++				station->mac_addr[2], station->mac_addr[3],
++				station->mac_addr[4], station->mac_addr[5],
++				station->flags);
++		}
++	}
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_rtcfg_dev_stations_vfile_ops = {
++	.show = rtnet_rtcfg_dev_stations_show,
++};
++
++int rtnet_rtcfg_dev_conn_state_show(struct xnvfile_regular_iterator *it,
++				    void *d)
++{
++	struct rtcfg_connection *conn = xnvfile_priv(it->vfile);
++	char *state_name[] = { "SEARCHING", "STAGE_1", "STAGE_2", "READY",
++			       "DEAD" };
++
++	xnvfile_printf(it,
++		       "state:\t\t\t%d (%s)\n"
++		       "flags:\t\t\t%02X\n"
++		       "stage 1 size:\t\t%zd\n"
++		       "stage 2 filename:\t%s\n"
++		       "stage 2 size:\t\t%zd\n"
++		       "stage 2 offset:\t\t%d\n"
++		       "burstrate:\t\t%d\n"
++		       "mac address:\t\t%02X:%02X:%02X:%02X:%02X:%02X\n",
++		       conn->state, state_name[conn->state], conn->flags,
++		       conn->stage1_size,
++		       (conn->stage2_file) ? conn->stage2_file->name : "-",
++		       (conn->stage2_file) ? conn->stage2_file->size : 0,
++		       conn->cfg_offs, conn->burstrate, conn->mac_addr[0],
++		       conn->mac_addr[1], conn->mac_addr[2], conn->mac_addr[3],
++		       conn->mac_addr[4], conn->mac_addr[5]);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	if ((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP)
++		xnvfile_printf(it, "ip:\t\t\t%u.%u.%u.%u\n",
++			       NIPQUAD(conn->addr.ip_addr));
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	return 0;
++}
++
++static struct xnvfile_regular_ops rtnet_rtcfg_dev_conn_state_vfile_ops = {
++	.show = rtnet_rtcfg_dev_conn_state_show,
++};
++
++void rtcfg_update_conn_proc_entries(int ifindex)
++{
++	struct rtcfg_device *dev = &device[ifindex];
++	struct rtcfg_connection *conn;
++	char name_buf[64];
++
++	if (dev->state != RTCFG_MAIN_SERVER_RUNNING)
++		return;
++
++	list_for_each_entry (conn, &dev->spec.srv.conn_list, entry) {
++		switch (conn->addr_type & RTCFG_ADDR_MASK) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		case RTCFG_ADDR_IP:
++			snprintf(name_buf, 64, "CLIENT_%u.%u.%u.%u",
++				 NIPQUAD(conn->addr.ip_addr));
++			break;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++		default: /* RTCFG_ADDR_MAC */
++			snprintf(name_buf, 64,
++				 "CLIENT_%02X%02X%02X%02X%02X%02X",
++				 conn->mac_addr[0], conn->mac_addr[1],
++				 conn->mac_addr[2], conn->mac_addr[3],
++				 conn->mac_addr[4], conn->mac_addr[5]);
++			break;
++		}
++		memset(&conn->proc_entry, '\0', sizeof(conn->proc_entry));
++		conn->proc_entry.entry.lockops = &rtnet_rtcfg_proc_lock_ops;
++		conn->proc_entry.ops = &rtnet_rtcfg_dev_conn_state_vfile_ops;
++		xnvfile_priv(&conn->proc_entry) = conn;
++
++		xnvfile_init_regular(name_buf, &conn->proc_entry,
++				     &dev->proc_entry);
++	}
++}
++
++void rtcfg_remove_conn_proc_entries(int ifindex)
++{
++	struct rtcfg_device *dev = &device[ifindex];
++	struct rtcfg_connection *conn;
++
++	if (dev->state != RTCFG_MAIN_SERVER_RUNNING)
++		return;
++
++	list_for_each_entry (conn, &dev->spec.srv.conn_list, entry)
++		xnvfile_destroy_regular(&conn->proc_entry);
++}
++
++void rtcfg_new_rtdev(struct rtnet_device *rtdev)
++{
++	struct rtcfg_device *dev = &device[rtdev->ifindex];
++	int err;
++
++	mutex_lock(&nrt_proc_lock);
++
++	memset(&dev->proc_entry, '\0', sizeof(dev->proc_entry));
++	err = xnvfile_init_dir(rtdev->name, &dev->proc_entry, &rtcfg_proc_root);
++	if (err < 0)
++		goto error1;
++
++	memset(&dev->proc_state_vfile, '\0', sizeof(dev->proc_state_vfile));
++	dev->proc_state_vfile.entry.lockops = &rtnet_rtcfg_proc_lock_ops;
++	dev->proc_state_vfile.ops = &rtnet_rtcfg_dev_state_vfile_ops;
++	xnvfile_priv(&dev->proc_state_vfile) = dev;
++
++	err = xnvfile_init_regular("state", &dev->proc_state_vfile,
++				   &dev->proc_entry);
++	if (err < 0)
++		goto error2;
++
++	memset(&dev->proc_stations_vfile, '\0',
++	       sizeof(dev->proc_stations_vfile));
++	dev->proc_stations_vfile.entry.lockops = &rtnet_rtcfg_proc_lock_ops;
++	dev->proc_stations_vfile.ops = &rtnet_rtcfg_dev_stations_vfile_ops;
++	xnvfile_priv(&dev->proc_stations_vfile) = dev;
++
++	err = xnvfile_init_regular("stations_list", &dev->proc_stations_vfile,
++				   &dev->proc_entry);
++	if (err < 0)
++		goto error3;
++
++	mutex_unlock(&nrt_proc_lock);
++
++	return;
++
++error3:
++	xnvfile_destroy_regular(&dev->proc_state_vfile);
++error2:
++	xnvfile_destroy_dir(&dev->proc_entry);
++error1:
++	dev->proc_entry.entry.pde = NULL;
++	mutex_unlock(&nrt_proc_lock);
++}
++
++void rtcfg_remove_rtdev(struct rtnet_device *rtdev)
++{
++	struct rtcfg_device *dev = &device[rtdev->ifindex];
++
++	// To-Do: issue down command
++
++	mutex_lock(&nrt_proc_lock);
++
++	if (dev->proc_entry.entry.pde) {
++		rtcfg_remove_conn_proc_entries(rtdev->ifindex);
++
++		xnvfile_destroy_regular(&dev->proc_stations_vfile);
++		xnvfile_destroy_regular(&dev->proc_state_vfile);
++		xnvfile_destroy_dir(&dev->proc_entry);
++		dev->proc_entry.entry.pde = NULL;
++	}
++
++	mutex_unlock(&nrt_proc_lock);
++}
++
++static struct rtdev_event_hook rtdev_hook = { .register_device =
++						      rtcfg_new_rtdev,
++					      .unregister_device =
++						      rtcfg_remove_rtdev,
++					      .ifup = NULL,
++					      .ifdown = NULL };
++
++int rtcfg_init_proc(void)
++{
++	struct rtnet_device *rtdev;
++	int i, err;
++
++	err = xnvfile_init_dir("rtcfg", &rtcfg_proc_root, &rtnet_proc_root);
++	if (err < 0)
++		goto err1;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtdev = rtdev_get_by_index(i);
++		if (rtdev) {
++			rtcfg_new_rtdev(rtdev);
++			rtdev_dereference(rtdev);
++		}
++	}
++
++	rtdev_add_event_hook(&rtdev_hook);
++	return 0;
++
++err1:
++	printk("RTcfg: unable to initialise /proc entries\n");
++	return err;
++}
++
++void rtcfg_cleanup_proc(void)
++{
++	struct rtnet_device *rtdev;
++	int i;
++
++	rtdev_del_event_hook(&rtdev_hook);
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtdev = rtdev_get_by_index(i);
++		if (rtdev) {
++			rtcfg_remove_rtdev(rtdev);
++			rtdev_dereference(rtdev);
++		}
++	}
++
++	xnvfile_destroy_dir(&rtcfg_proc_root);
++}
++
++#endif /* CONFIG_XENO_OPT_VFILE */
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_conn_event.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_conn_event.c	2021-04-07 16:01:26.647634995 +0800
+@@ -0,0 +1,364 @@
++/***
++ *
++ *  rtcfg/rtcfg_conn_event.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/kernel.h>
++
++#include <ipv4/route.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++
++/****************************** states ***************************************/
++static int rtcfg_conn_state_searching(struct rtcfg_connection *conn,
++				      RTCFG_EVENT event_id, void *event_data);
++static int rtcfg_conn_state_stage_1(struct rtcfg_connection *conn,
++				    RTCFG_EVENT event_id, void *event_data);
++static int rtcfg_conn_state_stage_2(struct rtcfg_connection *conn,
++				    RTCFG_EVENT event_id, void *event_data);
++static int rtcfg_conn_state_ready(struct rtcfg_connection *conn,
++				  RTCFG_EVENT event_id, void *event_data);
++static int rtcfg_conn_state_dead(struct rtcfg_connection *conn,
++				 RTCFG_EVENT event_id, void *event_data);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG
++const char *rtcfg_conn_state[] = { "RTCFG_CONN_SEARCHING", "RTCFG_CONN_STAGE_1",
++				   "RTCFG_CONN_STAGE_2", "RTCFG_CONN_READY",
++				   "RTCFG_CONN_DEAD" };
++#endif /* CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG */
++
++static void rtcfg_conn_recv_announce_new(struct rtcfg_connection *conn,
++					 struct rtskb *rtskb);
++static void rtcfg_conn_check_cfg_timeout(struct rtcfg_connection *conn);
++static void rtcfg_conn_check_heartbeat(struct rtcfg_connection *conn);
++
++static int (*state[])(struct rtcfg_connection *conn, RTCFG_EVENT event_id,
++		      void *event_data) = {
++	rtcfg_conn_state_searching, rtcfg_conn_state_stage_1,
++	rtcfg_conn_state_stage_2, rtcfg_conn_state_ready, rtcfg_conn_state_dead
++};
++
++int rtcfg_do_conn_event(struct rtcfg_connection *conn, RTCFG_EVENT event_id,
++			void *event_data)
++{
++	int conn_state = conn->state;
++
++	RTCFG_DEBUG(3, "RTcfg: %s() conn=%p, event=%s, state=%s\n",
++		    __FUNCTION__, conn, rtcfg_event[event_id],
++		    rtcfg_conn_state[conn_state]);
++
++	return (*state[conn_state])(conn, event_id, event_data);
++}
++
++static void rtcfg_next_conn_state(struct rtcfg_connection *conn,
++				  RTCFG_CONN_STATE state)
++{
++	RTCFG_DEBUG(4, "RTcfg: next connection state=%s \n",
++		    rtcfg_conn_state[state]);
++
++	conn->state = state;
++}
++
++static int rtcfg_conn_state_searching(struct rtcfg_connection *conn,
++				      RTCFG_EVENT event_id, void *event_data)
++{
++	struct rtcfg_device *rtcfg_dev = &device[conn->ifindex];
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++
++	switch (event_id) {
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		rtcfg_conn_recv_announce_new(conn, rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		conn->last_frame = rtskb->time_stamp;
++
++		rtcfg_next_conn_state(conn, RTCFG_CONN_READY);
++
++		rtcfg_dev->stations_found++;
++		rtcfg_dev->stations_ready++;
++		rtcfg_dev->spec.srv.clients_configured++;
++		if (rtcfg_dev->spec.srv.clients_configured ==
++		    rtcfg_dev->other_stations)
++			rtcfg_complete_cmd(conn->ifindex, RTCFG_CMD_WAIT, 0);
++
++		break;
++
++	default:
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n",
++			    rtcfg_event[event_id], conn, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int rtcfg_conn_state_stage_1(struct rtcfg_connection *conn,
++				    RTCFG_EVENT event_id, void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rtcfg_device *rtcfg_dev = &device[conn->ifindex];
++	struct rtcfg_frm_ack_cfg *ack_cfg;
++	int packets;
++
++	switch (event_id) {
++	case RTCFG_FRM_ACK_CFG:
++		conn->last_frame = rtskb->time_stamp;
++
++		ack_cfg = (struct rtcfg_frm_ack_cfg *)rtskb->data;
++		conn->cfg_offs = ntohl(ack_cfg->ack_len);
++
++		if ((conn->flags & _RTCFG_FLAG_STAGE_2_DATA) != 0) {
++			if (conn->cfg_offs >= conn->stage2_file->size) {
++				rtcfg_dev->spec.srv.clients_configured++;
++				if (rtcfg_dev->spec.srv.clients_configured ==
++				    rtcfg_dev->other_stations)
++					rtcfg_complete_cmd(conn->ifindex,
++							   RTCFG_CMD_WAIT, 0);
++				rtcfg_next_conn_state(
++					conn, ((conn->flags &
++						_RTCFG_FLAG_READY) != 0) ?
++						      RTCFG_CONN_READY :
++						      RTCFG_CONN_STAGE_2);
++			} else {
++				packets = conn->burstrate;
++				while ((conn->cfg_offs <
++					conn->stage2_file->size) &&
++				       (packets > 0)) {
++					rtcfg_send_stage_2_frag(conn);
++					packets--;
++				}
++			}
++		} else {
++			rtcfg_dev->spec.srv.clients_configured++;
++			if (rtcfg_dev->spec.srv.clients_configured ==
++			    rtcfg_dev->other_stations)
++				rtcfg_complete_cmd(conn->ifindex,
++						   RTCFG_CMD_WAIT, 0);
++			rtcfg_next_conn_state(
++				conn, ((conn->flags & _RTCFG_FLAG_READY) != 0) ?
++					      RTCFG_CONN_READY :
++					      RTCFG_CONN_STAGE_2);
++		}
++
++		break;
++
++	case RTCFG_TIMER:
++		rtcfg_conn_check_cfg_timeout(conn);
++		break;
++
++	default:
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n",
++			    rtcfg_event[event_id], conn, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int rtcfg_conn_state_stage_2(struct rtcfg_connection *conn,
++				    RTCFG_EVENT event_id, void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rtcfg_device *rtcfg_dev = &device[conn->ifindex];
++
++	switch (event_id) {
++	case RTCFG_FRM_READY:
++		conn->last_frame = rtskb->time_stamp;
++
++		rtcfg_next_conn_state(conn, RTCFG_CONN_READY);
++
++		conn->flags |= _RTCFG_FLAG_READY;
++		rtcfg_dev->stations_ready++;
++
++		if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations)
++			rtcfg_complete_cmd(conn->ifindex, RTCFG_CMD_READY, 0);
++
++		break;
++
++	case RTCFG_TIMER:
++		rtcfg_conn_check_cfg_timeout(conn);
++		break;
++
++	default:
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n",
++			    rtcfg_event[event_id], conn, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int rtcfg_conn_state_ready(struct rtcfg_connection *conn,
++				  RTCFG_EVENT event_id, void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++
++	switch (event_id) {
++	case RTCFG_TIMER:
++		rtcfg_conn_check_heartbeat(conn);
++		break;
++
++	case RTCFG_FRM_HEARTBEAT:
++		conn->last_frame = rtskb->time_stamp;
++		break;
++
++	default:
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n",
++			    rtcfg_event[event_id], conn, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int rtcfg_conn_state_dead(struct rtcfg_connection *conn,
++				 RTCFG_EVENT event_id, void *event_data)
++{
++	switch (event_id) {
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		rtcfg_conn_recv_announce_new(conn, (struct rtskb *)event_data);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		/* Spec to-do: signal station that it is assumed to be dead
++               (=> reboot command?) */
++
++	default:
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for conn %p in %s()\n",
++			    rtcfg_event[event_id], conn, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static void rtcfg_conn_recv_announce_new(struct rtcfg_connection *conn,
++					 struct rtskb *rtskb)
++{
++	struct rtcfg_device *rtcfg_dev = &device[conn->ifindex];
++	struct rtcfg_frm_announce *announce_new;
++	int packets;
++
++	conn->last_frame = rtskb->time_stamp;
++
++	announce_new = (struct rtcfg_frm_announce *)rtskb->data;
++
++	conn->flags = announce_new->flags;
++	if (announce_new->burstrate < conn->burstrate)
++		conn->burstrate = announce_new->burstrate;
++
++	rtcfg_next_conn_state(conn, RTCFG_CONN_STAGE_1);
++
++	rtcfg_dev->stations_found++;
++	if ((conn->flags & _RTCFG_FLAG_READY) != 0)
++		rtcfg_dev->stations_ready++;
++
++	if (((conn->flags & _RTCFG_FLAG_STAGE_2_DATA) != 0) &&
++	    (conn->stage2_file != NULL)) {
++		packets = conn->burstrate - 1;
++
++		rtcfg_send_stage_2(conn, 1);
++
++		while ((conn->cfg_offs < conn->stage2_file->size) &&
++		       (packets > 0)) {
++			rtcfg_send_stage_2_frag(conn);
++			packets--;
++		}
++	} else {
++		rtcfg_send_stage_2(conn, 0);
++		conn->flags &= ~_RTCFG_FLAG_STAGE_2_DATA;
++	}
++}
++
++static void rtcfg_conn_check_cfg_timeout(struct rtcfg_connection *conn)
++{
++	struct rtcfg_device *rtcfg_dev;
++
++	if (!conn->cfg_timeout)
++		return;
++
++	if (rtdm_clock_read() >= conn->last_frame + conn->cfg_timeout) {
++		rtcfg_dev = &device[conn->ifindex];
++
++		rtcfg_dev->stations_found--;
++		if (conn->state == RTCFG_CONN_STAGE_2)
++			rtcfg_dev->spec.srv.clients_configured--;
++
++		rtcfg_next_conn_state(conn, RTCFG_CONN_SEARCHING);
++		conn->cfg_offs = 0;
++		conn->flags = 0;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		if (conn->addr_type == RTCFG_ADDR_IP) {
++			struct rtnet_device *rtdev;
++
++			/* MAC address yet unknown -> use broadcast address */
++			rtdev = rtdev_get_by_index(conn->ifindex);
++			if (rtdev == NULL)
++				return;
++			memcpy(conn->mac_addr, rtdev->broadcast, MAX_ADDR_LEN);
++			rtdev_dereference(rtdev);
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++	}
++}
++
++static void rtcfg_conn_check_heartbeat(struct rtcfg_connection *conn)
++{
++	u64 timeout;
++	struct rtcfg_device *rtcfg_dev;
++
++	timeout = device[conn->ifindex].spec.srv.heartbeat_timeout;
++	if (!timeout)
++		return;
++
++	if (rtdm_clock_read() >= conn->last_frame + timeout) {
++		rtcfg_dev = &device[conn->ifindex];
++
++		rtcfg_dev->stations_found--;
++		rtcfg_dev->stations_ready--;
++		rtcfg_dev->spec.srv.clients_configured--;
++
++		rtcfg_send_dead_station(conn);
++
++		rtcfg_next_conn_state(conn, RTCFG_CONN_DEAD);
++		conn->cfg_offs = 0;
++		conn->flags = 0;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		if ((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) {
++			struct rtnet_device *rtdev =
++				rtdev_get_by_index(conn->ifindex);
++
++			rt_ip_route_del_host(conn->addr.ip_addr, rtdev);
++
++			if (rtdev == NULL)
++				return;
++
++			if (!(conn->addr_type & FLAG_ASSIGN_ADDR_BY_MAC))
++				/* MAC address yet unknown -> use broadcast address */
++				memcpy(conn->mac_addr, rtdev->broadcast,
++				       MAX_ADDR_LEN);
++
++			rtdev_dereference(rtdev);
++		}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++	}
++}
+--- linux/drivers/xenomai/net/stack/rtcfg/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/Makefile	2021-04-07 16:01:26.643635001 +0800
+@@ -0,0 +1,14 @@
++ccflags-y += -Idrivers/xenomai/net/stack/include
++
++obj-$(CONFIG_XENO_DRIVERS_NET_RTCFG) += rtcfg.o
++
++rtcfg-y := \
++	rtcfg_module.o \
++	rtcfg_event.o \
++	rtcfg_client_event.o \
++	rtcfg_conn_event.o \
++	rtcfg_ioctl.o \
++	rtcfg_frame.o \
++	rtcfg_timer.o \
++	rtcfg_file.o \
++	rtcfg_proc.o
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_module.c	2021-04-07 16:01:26.638635008 +0800
+@@ -0,0 +1,83 @@
++/***
++ *
++ *  rtcfg/rtcfg_module.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003, 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/kernel.h>
++
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_ioctl.h>
++#include <rtcfg/rtcfg_proc.h>
++
++MODULE_LICENSE("GPL");
++
++int __init rtcfg_init(void)
++{
++	int ret;
++
++	printk("RTcfg: init real-time configuration distribution protocol\n");
++
++	ret = rtcfg_init_ioctls();
++	if (ret != 0)
++		goto error1;
++
++	rtcfg_init_state_machines();
++
++	ret = rtcfg_init_frames();
++	if (ret != 0)
++		goto error2;
++
++#ifdef CONFIG_XENO_OPT_VFILE
++	ret = rtcfg_init_proc();
++	if (ret != 0) {
++		rtcfg_cleanup_frames();
++		goto error2;
++	}
++#endif
++
++	return 0;
++
++error2:
++	rtcfg_cleanup_state_machines();
++	rtcfg_cleanup_ioctls();
++
++error1:
++	return ret;
++}
++
++void rtcfg_cleanup(void)
++{
++#ifdef CONFIG_XENO_OPT_VFILE
++	rtcfg_cleanup_proc();
++#endif
++	rtcfg_cleanup_frames();
++	rtcfg_cleanup_state_machines();
++	rtcfg_cleanup_ioctls();
++
++	printk("RTcfg: unloaded\n");
++}
++
++module_init(rtcfg_init);
++module_exit(rtcfg_cleanup);
+--- linux/drivers/xenomai/net/stack/rtcfg/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/Kconfig	2021-04-07 16:01:26.633635015 +0800
+@@ -0,0 +1,23 @@
++config XENO_DRIVERS_NET_RTCFG
++    depends on XENO_DRIVERS_NET
++    tristate "RTcfg Service"
++    default y
++    ---help---
++    The Real-Time Configuration service configures and monitors nodes in
++    a RTnet network. It works both with plain MAC as well as with IPv4
++    addresses (in case CONFIG_RTNET_RTIPV4 has been switched on). RTcfg
++    consists of a configuration server, which can run on the same station
++    as the TDMA master e.g., and one or more clients. Clients can join and
++    leave the network during runtime without interfering with other
++    stations. Besides network configuration, the RTcfg server can also
++    distribute custom data.
++
++    See Documentation/README.rtcfg for further information.
++
++config XENO_DRIVERS_NET_RTCFG_DEBUG
++    bool "RTcfg Debugging"
++    depends on XENO_DRIVERS_NET_RTCFG
++    default n
++    ---help---
++    Enables debug message output of the RTcfg state machines. Switch on if
++    you have to trace some problem related to RTcfg.
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_file.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_file.c	2021-04-07 16:01:26.629635021 +0800
+@@ -0,0 +1,81 @@
++/***
++ *
++ *  rtcfg/rtcfg_file.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2004 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/init.h>
++
++#include <rtdm/driver.h>
++#include <rtcfg_chrdev.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_file.h>
++
++/* Note:
++ * We don't need any special lock protection while manipulating the
++ * rtcfg_files list. The list is only accessed through valid connections, and
++ * connections are already lock-protected.
++ */
++LIST_HEAD(rtcfg_files);
++
++struct rtcfg_file *rtcfg_get_file(const char *filename)
++{
++	struct list_head *entry;
++	struct rtcfg_file *file;
++
++	RTCFG_DEBUG(4, "RTcfg: looking for file %s\n", filename);
++
++	list_for_each (entry, &rtcfg_files) {
++		file = list_entry(entry, struct rtcfg_file, entry);
++
++		if (strcmp(file->name, filename) == 0) {
++			file->ref_count++;
++
++			RTCFG_DEBUG(4,
++				    "RTcfg: reusing file entry, now %d users\n",
++				    file->ref_count);
++
++			return file;
++		}
++	}
++
++	return NULL;
++}
++
++void rtcfg_add_file(struct rtcfg_file *file)
++{
++	RTCFG_DEBUG(4, "RTcfg: adding file %s to list\n", file->name);
++
++	file->ref_count = 1;
++	list_add_tail(&file->entry, &rtcfg_files);
++}
++
++int rtcfg_release_file(struct rtcfg_file *file)
++{
++	if (--file->ref_count == 0) {
++		RTCFG_DEBUG(4, "RTcfg: removing file %s from list\n",
++			    file->name);
++
++		list_del(&file->entry);
++	}
++
++	return file->ref_count;
++}
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_frame.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_frame.c	2021-04-07 16:01:26.624635028 +0800
+@@ -0,0 +1,571 @@
++/***
++ *
++ *  rtcfg/rtcfg_frame.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/if_ether.h>
++
++#include <stack_mgr.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_timer.h>
++
++static unsigned int num_rtskbs = 32;
++module_param(num_rtskbs, uint, 0444);
++MODULE_PARM_DESC(num_rtskbs, "Number of realtime socket buffers used by RTcfg");
++
++static struct rtskb_pool rtcfg_pool;
++static rtdm_task_t rx_task;
++static rtdm_event_t rx_event;
++static struct rtskb_queue rx_queue;
++
++void rtcfg_thread_signal(void)
++{
++	rtdm_event_signal(&rx_event);
++}
++
++static int rtcfg_rx_handler(struct rtskb *rtskb, struct rtpacket_type *pt)
++{
++	if (rtskb_acquire(rtskb, &rtcfg_pool) == 0) {
++		rtskb_queue_tail(&rx_queue, rtskb);
++		rtcfg_thread_signal();
++	} else
++		kfree_rtskb(rtskb);
++
++	return 0;
++}
++
++static void rtcfg_rx_task(void *arg)
++{
++	struct rtskb *rtskb;
++	struct rtcfg_frm_head *frm_head;
++	struct rtnet_device *rtdev;
++
++	while (!rtdm_task_should_stop()) {
++		if (rtdm_event_wait(&rx_event) < 0)
++			break;
++
++		while ((rtskb = rtskb_dequeue(&rx_queue))) {
++			rtdev = rtskb->rtdev;
++
++			if (rtskb->pkt_type == PACKET_OTHERHOST) {
++				kfree_rtskb(rtskb);
++				continue;
++			}
++
++			if (rtskb->len < sizeof(struct rtcfg_frm_head)) {
++				RTCFG_DEBUG(
++					1,
++					"RTcfg: %s() received an invalid frame\n",
++					__FUNCTION__);
++				kfree_rtskb(rtskb);
++				continue;
++			}
++
++			frm_head = (struct rtcfg_frm_head *)rtskb->data;
++
++			if (rtcfg_do_main_event(rtskb->rtdev->ifindex,
++						frm_head->id +
++							RTCFG_FRM_STAGE_1_CFG,
++						rtskb) < 0)
++				kfree_rtskb(rtskb);
++		}
++
++		rtcfg_timer_run();
++	}
++}
++
++int rtcfg_send_frame(struct rtskb *rtskb, struct rtnet_device *rtdev,
++		     u8 *dest_addr)
++{
++	int ret;
++
++	rtskb->rtdev = rtdev;
++	rtskb->priority = RTCFG_SKB_PRIO;
++
++	if (rtdev->hard_header) {
++		ret = rtdev->hard_header(rtskb, rtdev, ETH_RTCFG, dest_addr,
++					 rtdev->dev_addr, rtskb->len);
++		if (ret < 0)
++			goto err;
++	}
++
++	if ((rtdev->flags & IFF_UP) != 0) {
++		ret = 0;
++		if (rtdev_xmit(rtskb) != 0)
++			ret = -EAGAIN;
++	} else {
++		ret = -ENETDOWN;
++		goto err;
++	}
++
++	rtdev_dereference(rtdev);
++	return ret;
++
++err:
++	kfree_rtskb(rtskb);
++	rtdev_dereference(rtdev);
++	return ret;
++}
++
++int rtcfg_send_stage_1(struct rtcfg_connection *conn)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_stage_1_cfg *stage_1_frm;
++
++	rtdev = rtdev_get_by_index(conn->ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_stage_1_cfg) + conn->stage1_size +
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		     (((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) ?
++			      2 * RTCFG_ADDRSIZE_IP :
++			      0);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++		     0;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	stage_1_frm = (struct rtcfg_frm_stage_1_cfg *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_stage_1_cfg));
++
++	stage_1_frm->head.id = RTCFG_ID_STAGE_1_CFG;
++	stage_1_frm->head.version = 0;
++	stage_1_frm->addr_type = conn->addr_type & RTCFG_ADDR_MASK;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	if (stage_1_frm->addr_type == RTCFG_ADDR_IP) {
++		rtskb_put(rtskb, 2 * RTCFG_ADDRSIZE_IP);
++
++		memcpy(stage_1_frm->client_addr, &(conn->addr.ip_addr), 4);
++
++		stage_1_frm =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_frm) +
++							 RTCFG_ADDRSIZE_IP);
++
++		memcpy(stage_1_frm->server_addr, &(rtdev->local_ip), 4);
++
++		stage_1_frm =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_frm) +
++							 RTCFG_ADDRSIZE_IP);
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	stage_1_frm->burstrate = device[conn->ifindex].burstrate;
++	stage_1_frm->cfg_len = htons(conn->stage1_size);
++
++	memcpy(rtskb_put(rtskb, conn->stage1_size), conn->stage1_data,
++	       conn->stage1_size);
++
++	return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr);
++}
++
++int rtcfg_send_stage_2(struct rtcfg_connection *conn, int send_data)
++{
++	struct rtnet_device *rtdev;
++	struct rtcfg_device *rtcfg_dev = &device[conn->ifindex];
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_stage_2_cfg *stage_2_frm;
++	size_t total_size;
++	size_t frag_size;
++
++	rtdev = rtdev_get_by_index(conn->ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	if (send_data) {
++		total_size = conn->stage2_file->size;
++		frag_size = MIN(rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO) -
++					sizeof(struct rtcfg_frm_stage_2_cfg),
++				total_size);
++	} else {
++		total_size = 0;
++		frag_size = 0;
++	}
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_stage_2_cfg) + frag_size;
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	stage_2_frm = (struct rtcfg_frm_stage_2_cfg *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_stage_2_cfg));
++
++	stage_2_frm->head.id = RTCFG_ID_STAGE_2_CFG;
++	stage_2_frm->head.version = 0;
++	stage_2_frm->flags = rtcfg_dev->flags;
++	stage_2_frm->stations = htonl(rtcfg_dev->other_stations);
++	stage_2_frm->heartbeat_period = htons(rtcfg_dev->spec.srv.heartbeat);
++	stage_2_frm->cfg_len = htonl(total_size);
++
++	if (send_data)
++		memcpy(rtskb_put(rtskb, frag_size), conn->stage2_file->buffer,
++		       frag_size);
++	conn->cfg_offs = frag_size;
++
++	return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr);
++}
++
++int rtcfg_send_stage_2_frag(struct rtcfg_connection *conn)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_stage_2_cfg_frag *stage_2_frm;
++	size_t frag_size;
++
++	rtdev = rtdev_get_by_index(conn->ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	frag_size = MIN(rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO) -
++				sizeof(struct rtcfg_frm_stage_2_cfg_frag),
++			conn->stage2_file->size - conn->cfg_offs);
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_stage_2_cfg_frag) + frag_size;
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	stage_2_frm = (struct rtcfg_frm_stage_2_cfg_frag *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_stage_2_cfg_frag));
++
++	stage_2_frm->head.id = RTCFG_ID_STAGE_2_CFG_FRAG;
++	stage_2_frm->head.version = 0;
++	stage_2_frm->frag_offs = htonl(conn->cfg_offs);
++
++	memcpy(rtskb_put(rtskb, frag_size),
++	       conn->stage2_file->buffer + conn->cfg_offs, frag_size);
++	conn->cfg_offs += frag_size;
++
++	return rtcfg_send_frame(rtskb, rtdev, conn->mac_addr);
++}
++
++int rtcfg_send_announce_new(int ifindex)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_announce *announce_new;
++
++	rtdev = rtdev_get_by_index(ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_announce) +
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		     (((rtcfg_dev->spec.clt.addr_type & RTCFG_ADDR_MASK) ==
++		       RTCFG_ADDR_IP) ?
++			      RTCFG_ADDRSIZE_IP :
++			      0);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++		     0;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	announce_new = (struct rtcfg_frm_announce *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_announce));
++
++	announce_new->head.id = RTCFG_ID_ANNOUNCE_NEW;
++	announce_new->head.version = 0;
++	announce_new->addr_type = rtcfg_dev->spec.clt.addr_type;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	if (announce_new->addr_type == RTCFG_ADDR_IP) {
++		rtskb_put(rtskb, RTCFG_ADDRSIZE_IP);
++
++		memcpy(announce_new->addr, &(rtdev->local_ip), 4);
++
++		announce_new =
++			(struct rtcfg_frm_announce *)(((u8 *)announce_new) +
++						      RTCFG_ADDRSIZE_IP);
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	announce_new->flags = rtcfg_dev->flags;
++	announce_new->burstrate = rtcfg_dev->burstrate;
++
++	return rtcfg_send_frame(rtskb, rtdev, rtdev->broadcast);
++}
++
++int rtcfg_send_announce_reply(int ifindex, u8 *dest_mac_addr)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_announce *announce_rpl;
++
++	rtdev = rtdev_get_by_index(ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_announce) +
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		     ((rtcfg_dev->spec.clt.addr_type == RTCFG_ADDR_IP) ?
++			      RTCFG_ADDRSIZE_IP :
++			      0);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++		     0;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	announce_rpl = (struct rtcfg_frm_announce *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_announce));
++
++	announce_rpl->head.id = RTCFG_ID_ANNOUNCE_REPLY;
++	announce_rpl->head.version = 0;
++	announce_rpl->addr_type = rtcfg_dev->spec.clt.addr_type;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	if (announce_rpl->addr_type == RTCFG_ADDR_IP) {
++		rtskb_put(rtskb, RTCFG_ADDRSIZE_IP);
++
++		memcpy(announce_rpl->addr, &(rtdev->local_ip), 4);
++
++		announce_rpl =
++			(struct rtcfg_frm_announce *)(((u8 *)announce_rpl) +
++						      RTCFG_ADDRSIZE_IP);
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	announce_rpl->flags = rtcfg_dev->flags & _RTCFG_FLAG_READY;
++	announce_rpl->burstrate = 0; /* padding field */
++
++	return rtcfg_send_frame(rtskb, rtdev, dest_mac_addr);
++}
++
++int rtcfg_send_ack(int ifindex)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_ack_cfg *ack_frm;
++
++	rtdev = rtdev_get_by_index(ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len + sizeof(struct rtcfg_frm_ack_cfg);
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	ack_frm = (struct rtcfg_frm_ack_cfg *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_ack_cfg));
++
++	ack_frm->head.id = RTCFG_ID_ACK_CFG;
++	ack_frm->head.version = 0;
++	ack_frm->ack_len = htonl(device[ifindex].spec.clt.cfg_offs);
++
++	return rtcfg_send_frame(rtskb, rtdev,
++				device[ifindex].spec.clt.srv_mac_addr);
++}
++
++int rtcfg_send_simple_frame(int ifindex, int frame_id, u8 *dest_addr)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_simple *simple_frm;
++
++	rtdev = rtdev_get_by_index(ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len + sizeof(struct rtcfg_frm_simple);
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	simple_frm = (struct rtcfg_frm_simple *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_simple));
++
++	simple_frm->head.id = frame_id;
++	simple_frm->head.version = 0;
++
++	return rtcfg_send_frame(rtskb, rtdev,
++				(dest_addr) ? dest_addr : rtdev->broadcast);
++}
++
++int rtcfg_send_dead_station(struct rtcfg_connection *conn)
++{
++	struct rtnet_device *rtdev;
++	struct rtskb *rtskb;
++	unsigned int rtskb_size;
++	struct rtcfg_frm_dead_station *dead_station_frm;
++
++	rtdev = rtdev_get_by_index(conn->ifindex);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	rtskb_size = rtdev->hard_header_len +
++		     sizeof(struct rtcfg_frm_dead_station) +
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		     (((conn->addr_type & RTCFG_ADDR_MASK) == RTCFG_ADDR_IP) ?
++			      RTCFG_ADDRSIZE_IP :
++			      0);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++		     0;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	rtskb = alloc_rtskb(rtskb_size, &rtcfg_pool);
++	if (rtskb == NULL) {
++		rtdev_dereference(rtdev);
++		return -ENOBUFS;
++	}
++
++	rtskb_reserve(rtskb, rtdev->hard_header_len);
++
++	dead_station_frm = (struct rtcfg_frm_dead_station *)rtskb_put(
++		rtskb, sizeof(struct rtcfg_frm_dead_station));
++
++	dead_station_frm->head.id = RTCFG_ID_DEAD_STATION;
++	dead_station_frm->head.version = 0;
++	dead_station_frm->addr_type = conn->addr_type & RTCFG_ADDR_MASK;
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	if (dead_station_frm->addr_type == RTCFG_ADDR_IP) {
++		rtskb_put(rtskb, RTCFG_ADDRSIZE_IP);
++
++		memcpy(dead_station_frm->logical_addr, &(conn->addr.ip_addr),
++		       4);
++
++		dead_station_frm = (struct rtcfg_frm_dead_station
++					    *)(((u8 *)dead_station_frm) +
++					       RTCFG_ADDRSIZE_IP);
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	/* Ethernet-specific! */
++	memcpy(dead_station_frm->physical_addr, conn->mac_addr, ETH_ALEN);
++	memset(&dead_station_frm->physical_addr[ETH_ALEN], 0,
++	       sizeof(dead_station_frm->physical_addr) - ETH_ALEN);
++
++	return rtcfg_send_frame(rtskb, rtdev, rtdev->broadcast);
++}
++
++static struct rtpacket_type rtcfg_packet_type = { .type = __constant_htons(
++							  ETH_RTCFG),
++						  .handler = rtcfg_rx_handler };
++
++int __init rtcfg_init_frames(void)
++{
++	int ret;
++
++	if (rtskb_module_pool_init(&rtcfg_pool, num_rtskbs) < num_rtskbs)
++		return -ENOMEM;
++
++	rtskb_queue_init(&rx_queue);
++	rtdm_event_init(&rx_event, 0);
++
++	ret = rtdm_task_init(&rx_task, "rtcfg-rx", rtcfg_rx_task, 0,
++			     RTDM_TASK_LOWEST_PRIORITY, 0);
++	if (ret < 0) {
++		rtdm_event_destroy(&rx_event);
++		goto error1;
++	}
++
++	ret = rtdev_add_pack(&rtcfg_packet_type);
++	if (ret < 0)
++		goto error2;
++
++	return 0;
++
++error2:
++	rtdm_event_destroy(&rx_event);
++	rtdm_task_destroy(&rx_task);
++
++error1:
++	rtskb_pool_release(&rtcfg_pool);
++
++	return ret;
++}
++
++void rtcfg_cleanup_frames(void)
++{
++	struct rtskb *rtskb;
++
++	rtdev_remove_pack(&rtcfg_packet_type);
++
++	rtdm_event_destroy(&rx_event);
++	rtdm_task_destroy(&rx_task);
++
++	while ((rtskb = rtskb_dequeue(&rx_queue)) != NULL) {
++		kfree_rtskb(rtskb);
++	}
++
++	rtskb_pool_release(&rtcfg_pool);
++}
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_ioctl.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_ioctl.c	2021-04-07 16:01:26.619635035 +0800
+@@ -0,0 +1,421 @@
++/***
++ *
++ *  rtcfg/rtcfg_ioctl.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/file.h>
++#include <linux/vmalloc.h>
++
++#include <rtcfg_chrdev.h>
++#include <rtnet_rtpc.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_proc.h>
++
++int rtcfg_event_handler(struct rt_proc_call *call)
++{
++	struct rtcfg_cmd *cmd_event;
++
++	cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++	return rtcfg_do_main_event(cmd_event->internal.data.ifindex,
++				   cmd_event->internal.data.event_id, call);
++}
++
++void keep_cmd_add(struct rt_proc_call *call, void *priv_data)
++{
++	/* do nothing on error (<0), or if file already present (=0) */
++	if (rtpc_get_result(call) <= 0)
++		return;
++
++	/* Don't cleanup any buffers, we are going to recycle them! */
++	rtpc_set_cleanup_handler(call, NULL);
++}
++
++void cleanup_cmd_add(void *priv_data)
++{
++	struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data;
++	void *buf;
++
++	/* unlock proc and update directory structure */
++	rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++
++	buf = cmd->args.add.conn_buf;
++	if (buf != NULL)
++		kfree(buf);
++
++	buf = cmd->args.add.stage1_data;
++	if (buf != NULL)
++		kfree(buf);
++
++	if (cmd->args.add.stage2_file != NULL) {
++		buf = cmd->args.add.stage2_file->buffer;
++		if (buf != NULL)
++			vfree(buf);
++		kfree(cmd->args.add.stage2_file);
++	}
++}
++
++void cleanup_cmd_del(void *priv_data)
++{
++	struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data;
++	void *buf;
++
++	/* unlock proc and update directory structure */
++	rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++
++	if (cmd->args.del.conn_buf != NULL) {
++		buf = cmd->args.del.conn_buf->stage1_data;
++		if (buf != NULL)
++			kfree(buf);
++		kfree(cmd->args.del.conn_buf);
++	}
++
++	if (cmd->args.del.stage2_file != NULL) {
++		buf = cmd->args.del.stage2_file->buffer;
++		if (buf != NULL)
++			vfree(buf);
++		kfree(cmd->args.del.stage2_file);
++	}
++}
++
++void copy_stage_1_data(struct rt_proc_call *call, void *priv_data)
++{
++	struct rtcfg_cmd *cmd;
++	int result = rtpc_get_result(call);
++
++	if (result <= 0)
++		return;
++
++	cmd = rtpc_get_priv(call, struct rtcfg_cmd);
++
++	if (cmd->args.client.buffer_size < (size_t)result)
++		rtpc_set_result(call, -ENOSPC);
++	else if (copy_to_user(cmd->args.client.buffer,
++			      cmd->args.client.rtskb->data, result) != 0)
++		rtpc_set_result(call, -EFAULT);
++}
++
++void cleanup_cmd_client(void *priv_data)
++{
++	struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data;
++	void *station_buf;
++	struct rtskb *rtskb;
++
++	station_buf = cmd->args.client.station_buf;
++	if (station_buf != NULL)
++		kfree(station_buf);
++
++	rtskb = cmd->args.client.rtskb;
++	if (rtskb != NULL)
++		kfree_rtskb(rtskb);
++}
++
++void copy_stage_2_data(struct rt_proc_call *call, void *priv_data)
++{
++	struct rtcfg_cmd *cmd;
++	int result = rtpc_get_result(call);
++	struct rtskb *rtskb;
++
++	if (result <= 0)
++		return;
++
++	cmd = rtpc_get_priv(call, struct rtcfg_cmd);
++
++	if (cmd->args.announce.buffer_size < (size_t)result)
++		rtpc_set_result(call, -ENOSPC);
++	else {
++		rtskb = cmd->args.announce.rtskb;
++		do {
++			if (copy_to_user(cmd->args.announce.buffer, rtskb->data,
++					 rtskb->len) != 0) {
++				rtpc_set_result(call, -EFAULT);
++				break;
++			}
++			cmd->args.announce.buffer += rtskb->len;
++			rtskb = rtskb->next;
++		} while (rtskb != NULL);
++	}
++}
++
++void cleanup_cmd_announce(void *priv_data)
++{
++	struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data;
++	struct rtskb *rtskb;
++
++	rtskb = cmd->args.announce.rtskb;
++	if (rtskb != NULL)
++		kfree_rtskb(rtskb);
++}
++
++void cleanup_cmd_detach(void *priv_data)
++{
++	struct rtcfg_cmd *cmd = (struct rtcfg_cmd *)priv_data;
++	void *buf;
++
++	/* unlock proc and update directory structure */
++	rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++
++	if (cmd->args.detach.conn_buf) {
++		buf = cmd->args.detach.conn_buf->stage1_data;
++		if (buf != NULL)
++			kfree(buf);
++		kfree(cmd->args.detach.conn_buf);
++	}
++
++	if (cmd->args.detach.stage2_file != NULL) {
++		buf = cmd->args.detach.stage2_file->buffer;
++		if (buf)
++			vfree(buf);
++		kfree(cmd->args.detach.stage2_file);
++	}
++
++	if (cmd->args.detach.station_addr_list)
++		kfree(cmd->args.detach.station_addr_list);
++
++	if (cmd->args.detach.stage2_chain)
++		kfree_rtskb(cmd->args.detach.stage2_chain);
++}
++
++int rtcfg_ioctl_add(struct rtnet_device *rtdev, struct rtcfg_cmd *cmd)
++{
++	struct rtcfg_connection *conn_buf;
++	struct rtcfg_file *file = NULL;
++	void *data_buf;
++	size_t size;
++	int ret;
++
++	conn_buf = kmalloc(sizeof(struct rtcfg_connection), GFP_KERNEL);
++	if (conn_buf == NULL)
++		return -ENOMEM;
++	cmd->args.add.conn_buf = conn_buf;
++
++	data_buf = NULL;
++	size = cmd->args.add.stage1_size;
++	if (size > 0) {
++		/* check stage 1 data size */
++		if (sizeof(struct rtcfg_frm_stage_1_cfg) +
++			    2 * RTCFG_ADDRSIZE_IP + size >
++		    rtdev->get_mtu(rtdev, RTCFG_SKB_PRIO)) {
++			ret = -ESTAGE1SIZE;
++			goto err;
++		}
++
++		data_buf = kmalloc(size, GFP_KERNEL);
++		if (data_buf == NULL) {
++			ret = -ENOMEM;
++			goto err;
++		}
++
++		ret = copy_from_user(data_buf, cmd->args.add.stage1_data, size);
++		if (ret != 0) {
++			ret = -EFAULT;
++			goto err;
++		}
++	}
++	cmd->args.add.stage1_data = data_buf;
++
++	if (cmd->args.add.stage2_filename != NULL) {
++		size = strnlen_user(cmd->args.add.stage2_filename, PATH_MAX);
++
++		file = kmalloc(sizeof(struct rtcfg_file) + size, GFP_KERNEL);
++		if (file == NULL) {
++			ret = -ENOMEM;
++			goto err;
++		}
++
++		file->name = (char *)file + sizeof(struct rtcfg_file);
++		file->buffer = NULL;
++
++		ret = copy_from_user(
++			(void *)file + sizeof(struct rtcfg_file),
++			(const void *)cmd->args.add.stage2_filename, size);
++		if (ret != 0) {
++			ret = -EFAULT;
++			goto err;
++		}
++	}
++	cmd->args.add.stage2_file = file;
++
++	/* lock proc structure for modification */
++	rtcfg_lockwr_proc(cmd->internal.data.ifindex);
++
++	ret = rtpc_dispatch_call(rtcfg_event_handler, 0, cmd, sizeof(*cmd),
++				 keep_cmd_add, cleanup_cmd_add);
++
++	/* load file if missing */
++	if (ret > 0) {
++		struct file *filp;
++		mm_segment_t oldfs;
++
++		filp = filp_open(file->name, O_RDONLY, 0);
++		if (IS_ERR(filp)) {
++			rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++			ret = PTR_ERR(filp);
++			goto err;
++		}
++
++		file->size = filp->f_path.dentry->d_inode->i_size;
++
++		/* allocate buffer even for empty files */
++		file->buffer = vmalloc((file->size) ? file->size : 1);
++		if (file->buffer == NULL) {
++			rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++			fput(filp);
++			ret = -ENOMEM;
++			goto err;
++		}
++
++		oldfs = get_fs();
++		set_fs(KERNEL_DS);
++		filp->f_pos = 0;
++
++		ret = filp->f_op->read(filp, file->buffer, file->size,
++				       &filp->f_pos);
++
++		set_fs(oldfs);
++		fput(filp);
++
++		if (ret != (int)file->size) {
++			rtcfg_unlockwr_proc(cmd->internal.data.ifindex);
++			ret = -EIO;
++			goto err;
++		}
++
++		/* dispatch again, this time with new file attached */
++		ret = rtpc_dispatch_call(rtcfg_event_handler, 0, cmd,
++					 sizeof(*cmd), NULL, cleanup_cmd_add);
++	}
++
++	return ret;
++
++err:
++	kfree(conn_buf);
++	if (data_buf != NULL)
++		kfree(data_buf);
++	if (file != NULL) {
++		if (file->buffer != NULL)
++			vfree(file->buffer);
++		kfree(file);
++	}
++	return ret;
++}
++
++int rtcfg_ioctl(struct rtnet_device *rtdev, unsigned int request,
++		unsigned long arg)
++{
++	struct rtcfg_cmd cmd;
++	struct rtcfg_station *station_buf;
++	int ret;
++
++	ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd));
++	if (ret != 0)
++		return -EFAULT;
++
++	cmd.internal.data.ifindex = rtdev->ifindex;
++	cmd.internal.data.event_id = _IOC_NR(request);
++
++	switch (request) {
++	case RTCFG_IOC_SERVER:
++		ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd,
++					 sizeof(cmd), NULL, NULL);
++		break;
++
++	case RTCFG_IOC_ADD:
++		ret = rtcfg_ioctl_add(rtdev, &cmd);
++		break;
++
++	case RTCFG_IOC_DEL:
++		cmd.args.del.conn_buf = NULL;
++		cmd.args.del.stage2_file = NULL;
++
++		/* lock proc structure for modification
++               (unlock in cleanup_cmd_del) */
++		rtcfg_lockwr_proc(cmd.internal.data.ifindex);
++
++		ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd,
++					 sizeof(cmd), NULL, cleanup_cmd_del);
++		break;
++
++	case RTCFG_IOC_WAIT:
++		ret = rtpc_dispatch_call(rtcfg_event_handler,
++					 cmd.args.wait.timeout, &cmd,
++					 sizeof(cmd), NULL, NULL);
++		break;
++
++	case RTCFG_IOC_CLIENT:
++		station_buf = kmalloc(sizeof(struct rtcfg_station) *
++					      cmd.args.client.max_stations,
++				      GFP_KERNEL);
++		if (station_buf == NULL)
++			return -ENOMEM;
++		cmd.args.client.station_buf = station_buf;
++		cmd.args.client.rtskb = NULL;
++
++		ret = rtpc_dispatch_call(rtcfg_event_handler,
++					 cmd.args.client.timeout, &cmd,
++					 sizeof(cmd), copy_stage_1_data,
++					 cleanup_cmd_client);
++		break;
++
++	case RTCFG_IOC_ANNOUNCE:
++		cmd.args.announce.rtskb = NULL;
++
++		ret = rtpc_dispatch_call(rtcfg_event_handler,
++					 cmd.args.announce.timeout, &cmd,
++					 sizeof(cmd), copy_stage_2_data,
++					 cleanup_cmd_announce);
++		break;
++
++	case RTCFG_IOC_READY:
++		ret = rtpc_dispatch_call(rtcfg_event_handler,
++					 cmd.args.ready.timeout, &cmd,
++					 sizeof(cmd), NULL, NULL);
++		break;
++
++	case RTCFG_IOC_DETACH:
++		do {
++			cmd.args.detach.conn_buf = NULL;
++			cmd.args.detach.stage2_file = NULL;
++			cmd.args.detach.station_addr_list = NULL;
++			cmd.args.detach.stage2_chain = NULL;
++
++			/* lock proc structure for modification
++                   (unlock in cleanup_cmd_detach) */
++			rtcfg_lockwr_proc(cmd.internal.data.ifindex);
++
++			ret = rtpc_dispatch_call(rtcfg_event_handler, 0, &cmd,
++						 sizeof(cmd), NULL,
++						 cleanup_cmd_detach);
++		} while (ret == -EAGAIN);
++		break;
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	return ret;
++}
++
++struct rtnet_ioctls rtcfg_ioctls = { .service_name = "RTcfg",
++				     .ioctl_type = RTNET_IOC_TYPE_RTCFG,
++				     .handler = rtcfg_ioctl };
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_event.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_event.c	2021-04-07 16:01:26.614635042 +0800
+@@ -0,0 +1,745 @@
++/***
++ *
++ *  rtcfg/rtcfg_event.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/list.h>
++#include <linux/vmalloc.h>
++
++#include <rtdev.h>
++#include <ipv4/route.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_client_event.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_file.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_timer.h>
++
++/*** Common and Server States ***/
++static int rtcfg_main_state_off(int ifindex, RTCFG_EVENT event_id,
++				void *event_data);
++static int rtcfg_main_state_server_running(int ifindex, RTCFG_EVENT event_id,
++					   void *event_data);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG
++const char *rtcfg_event[] = { "RTCFG_CMD_SERVER",
++			      "RTCFG_CMD_ADD",
++			      "RTCFG_CMD_DEL",
++			      "RTCFG_CMD_WAIT",
++			      "RTCFG_CMD_CLIENT",
++			      "RTCFG_CMD_ANNOUNCE",
++			      "RTCFG_CMD_READY",
++			      "RTCFG_CMD_DETACH",
++			      "RTCFG_TIMER",
++			      "RTCFG_FRM_STAGE_1_CFG",
++			      "RTCFG_FRM_ANNOUNCE_NEW",
++			      "RTCFG_FRM_ANNOUNCE_REPLY",
++			      "RTCFG_FRM_STAGE_2_CFG",
++			      "RTCFG_FRM_STAGE_2_CFG_FRAG",
++			      "RTCFG_FRM_ACK_CFG",
++			      "RTCFG_FRM_READY",
++			      "RTCFG_FRM_HEARTBEAT",
++			      "RTCFG_FRM_DEAD_STATION" };
++
++const char *rtcfg_main_state[] = { "RTCFG_MAIN_OFF",
++				   "RTCFG_MAIN_SERVER_RUNNING",
++				   "RTCFG_MAIN_CLIENT_0",
++				   "RTCFG_MAIN_CLIENT_1",
++				   "RTCFG_MAIN_CLIENT_ANNOUNCED",
++				   "RTCFG_MAIN_CLIENT_ALL_KNOWN",
++				   "RTCFG_MAIN_CLIENT_ALL_FRAMES",
++				   "RTCFG_MAIN_CLIENT_2",
++				   "RTCFG_MAIN_CLIENT_READY" };
++
++int rtcfg_debug = RTCFG_DEFAULT_DEBUG_LEVEL;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTCFG_DEBUG */
++
++struct rtcfg_device device[MAX_RT_DEVICES];
++
++static int (*state[])(int ifindex, RTCFG_EVENT event_id,
++		      void *event_data) = { rtcfg_main_state_off,
++					    rtcfg_main_state_server_running,
++					    rtcfg_main_state_client_0,
++					    rtcfg_main_state_client_1,
++					    rtcfg_main_state_client_announced,
++					    rtcfg_main_state_client_all_known,
++					    rtcfg_main_state_client_all_frames,
++					    rtcfg_main_state_client_2,
++					    rtcfg_main_state_client_ready };
++
++static int rtcfg_server_add(struct rtcfg_cmd *cmd_event);
++static int rtcfg_server_del(struct rtcfg_cmd *cmd_event);
++static int rtcfg_server_detach(int ifindex, struct rtcfg_cmd *cmd_event);
++static int rtcfg_server_recv_announce(int ifindex, RTCFG_EVENT event_id,
++				      struct rtskb *rtskb);
++static int rtcfg_server_recv_ack(int ifindex, struct rtskb *rtskb);
++static int rtcfg_server_recv_simple_frame(int ifindex, RTCFG_EVENT event_id,
++					  struct rtskb *rtskb);
++
++int rtcfg_do_main_event(int ifindex, RTCFG_EVENT event_id, void *event_data)
++{
++	int main_state;
++
++	rtdm_mutex_lock(&device[ifindex].dev_mutex);
++
++	main_state = device[ifindex].state;
++
++	RTCFG_DEBUG(3, "RTcfg: %s() rtdev=%d, event=%s, state=%s\n",
++		    __FUNCTION__, ifindex, rtcfg_event[event_id],
++		    rtcfg_main_state[main_state]);
++
++	return (*state[main_state])(ifindex, event_id, event_data);
++}
++
++void rtcfg_next_main_state(int ifindex, RTCFG_MAIN_STATE state)
++{
++	RTCFG_DEBUG(4, "RTcfg: next main state=%s \n", rtcfg_main_state[state]);
++
++	device[ifindex].state = state;
++}
++
++static int rtcfg_main_state_off(int ifindex, RTCFG_EVENT event_id,
++				void *event_data)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_cmd *cmd_event;
++	int ret;
++
++	cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++	switch (event_id) {
++	case RTCFG_CMD_SERVER:
++		INIT_LIST_HEAD(&rtcfg_dev->spec.srv.conn_list);
++
++		ret = rtdm_timer_init(&rtcfg_dev->timer, rtcfg_timer,
++				      "rtcfg-timer");
++		if (ret == 0) {
++			ret = rtdm_timer_start(
++				&rtcfg_dev->timer, XN_INFINITE,
++				(nanosecs_rel_t)cmd_event->args.server.period *
++					1000000,
++				RTDM_TIMERMODE_RELATIVE);
++			if (ret < 0)
++				rtdm_timer_destroy(&rtcfg_dev->timer);
++		}
++		if (ret < 0) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			return ret;
++		}
++
++		if (cmd_event->args.server.flags & _RTCFG_FLAG_READY)
++			set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags);
++		set_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags);
++
++		rtcfg_dev->burstrate = cmd_event->args.server.burstrate;
++
++		rtcfg_dev->spec.srv.heartbeat =
++			cmd_event->args.server.heartbeat;
++
++		rtcfg_dev->spec.srv.heartbeat_timeout =
++			((u64)cmd_event->args.server.heartbeat) * 1000000 *
++			cmd_event->args.server.threshold;
++
++		rtcfg_next_main_state(ifindex, RTCFG_MAIN_SERVER_RUNNING);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		break;
++
++	case RTCFG_CMD_CLIENT:
++		rtcfg_dev->spec.clt.station_addr_list =
++			cmd_event->args.client.station_buf;
++		cmd_event->args.client.station_buf = NULL;
++
++		rtcfg_dev->spec.clt.max_stations =
++			cmd_event->args.client.max_stations;
++		rtcfg_dev->other_stations = -1;
++
++		rtcfg_queue_blocking_call(ifindex, call);
++
++		rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_0);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++/*** Server States ***/
++
++static int rtcfg_main_state_server_running(int ifindex, RTCFG_EVENT event_id,
++					   void *event_data)
++{
++	struct rt_proc_call *call;
++	struct rtcfg_cmd *cmd_event;
++	struct rtcfg_device *rtcfg_dev;
++	struct rtskb *rtskb;
++
++	switch (event_id) {
++	case RTCFG_CMD_ADD:
++		call = (struct rt_proc_call *)event_data;
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		return rtcfg_server_add(cmd_event);
++
++	case RTCFG_CMD_DEL:
++		call = (struct rt_proc_call *)event_data;
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		return rtcfg_server_del(cmd_event);
++
++	case RTCFG_CMD_WAIT:
++		call = (struct rt_proc_call *)event_data;
++
++		rtcfg_dev = &device[ifindex];
++
++		if (rtcfg_dev->spec.srv.clients_configured ==
++		    rtcfg_dev->other_stations)
++			rtpc_complete_call(call, 0);
++		else
++			rtcfg_queue_blocking_call(ifindex, call);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	case RTCFG_CMD_READY:
++		call = (struct rt_proc_call *)event_data;
++
++		rtcfg_dev = &device[ifindex];
++
++		if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations)
++			rtpc_complete_call(call, 0);
++		else
++			rtcfg_queue_blocking_call(ifindex, call);
++
++		if (!test_and_set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags))
++			rtcfg_send_ready(ifindex);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	case RTCFG_CMD_DETACH:
++		call = (struct rt_proc_call *)event_data;
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		return rtcfg_server_detach(ifindex, cmd_event);
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		rtskb = (struct rtskb *)event_data;
++		return rtcfg_server_recv_announce(ifindex, event_id, rtskb);
++
++	case RTCFG_FRM_ACK_CFG:
++		rtskb = (struct rtskb *)event_data;
++		return rtcfg_server_recv_ack(ifindex, rtskb);
++
++	case RTCFG_FRM_READY:
++	case RTCFG_FRM_HEARTBEAT:
++		rtskb = (struct rtskb *)event_data;
++		return rtcfg_server_recv_simple_frame(ifindex, event_id, rtskb);
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++/*** Server Command Event Handlers ***/
++
++static int rtcfg_server_add(struct rtcfg_cmd *cmd_event)
++{
++	struct rtcfg_device *rtcfg_dev;
++	struct rtcfg_connection *conn;
++	struct rtcfg_connection *new_conn;
++	struct list_head *entry;
++	unsigned int addr_type;
++
++	rtcfg_dev = &device[cmd_event->internal.data.ifindex];
++	addr_type = cmd_event->args.add.addr_type & RTCFG_ADDR_MASK;
++
++	new_conn = cmd_event->args.add.conn_buf;
++	memset(new_conn, 0, sizeof(struct rtcfg_connection));
++
++	new_conn->ifindex = cmd_event->internal.data.ifindex;
++	new_conn->state = RTCFG_CONN_SEARCHING;
++	new_conn->addr_type = cmd_event->args.add.addr_type;
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	new_conn->addr.ip_addr = cmd_event->args.add.ip_addr;
++#endif
++	new_conn->stage1_data = cmd_event->args.add.stage1_data;
++	new_conn->stage1_size = cmd_event->args.add.stage1_size;
++	new_conn->burstrate = rtcfg_dev->burstrate;
++	new_conn->cfg_timeout = ((u64)cmd_event->args.add.timeout) * 1000000;
++
++	if (cmd_event->args.add.addr_type == RTCFG_ADDR_IP) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++		struct rtnet_device *rtdev;
++
++		/* MAC address yet unknown -> use broadcast address */
++		rtdev = rtdev_get_by_index(cmd_event->internal.data.ifindex);
++		if (rtdev == NULL) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			return -ENODEV;
++		}
++		memcpy(new_conn->mac_addr, rtdev->broadcast, MAX_ADDR_LEN);
++		rtdev_dereference(rtdev);
++#else /* !CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++		return -EPROTONOSUPPORT;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++	} else
++		memcpy(new_conn->mac_addr, cmd_event->args.add.mac_addr,
++		       MAX_ADDR_LEN);
++
++	/* get stage 2 file */
++	if (cmd_event->args.add.stage2_file != NULL) {
++		if (cmd_event->args.add.stage2_file->buffer != NULL) {
++			new_conn->stage2_file = cmd_event->args.add.stage2_file;
++			rtcfg_add_file(new_conn->stage2_file);
++
++			cmd_event->args.add.stage2_file = NULL;
++		} else {
++			new_conn->stage2_file = rtcfg_get_file(
++				cmd_event->args.add.stage2_file->name);
++			if (new_conn->stage2_file == NULL) {
++				rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++				return 1;
++			}
++		}
++	}
++
++	list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++		conn = list_entry(entry, struct rtcfg_connection, entry);
++
++		if (
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++			((addr_type == RTCFG_ADDR_IP) &&
++			 (conn->addr.ip_addr == cmd_event->args.add.ip_addr)) ||
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++			((addr_type == RTCFG_ADDR_MAC) &&
++			 (memcmp(conn->mac_addr, new_conn->mac_addr,
++				 MAX_ADDR_LEN) == 0))) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++			if ((new_conn->stage2_file) &&
++			    (rtcfg_release_file(new_conn->stage2_file) == 0)) {
++				/* Note: This assignment cannot overwrite a valid file pointer.
++		 * Effectively, it will only be executed when
++		 * new_conn->stage2_file is the pointer originally passed by
++		 * rtcfg_ioctl. But checking this assumptions does not cause
++		 * any harm :o)
++		 */
++				RTNET_ASSERT(cmd_event->args.add.stage2_file ==
++						     NULL,
++					     ;);
++
++				cmd_event->args.add.stage2_file =
++					new_conn->stage2_file;
++			}
++
++			return -EEXIST;
++		}
++	}
++
++	list_add_tail(&new_conn->entry, &rtcfg_dev->spec.srv.conn_list);
++	rtcfg_dev->other_stations++;
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	cmd_event->args.add.conn_buf = NULL;
++	cmd_event->args.add.stage1_data = NULL;
++
++	return 0;
++}
++
++static int rtcfg_server_del(struct rtcfg_cmd *cmd_event)
++{
++	struct rtcfg_connection *conn;
++	struct list_head *entry;
++	unsigned int addr_type;
++	struct rtcfg_device *rtcfg_dev;
++
++	rtcfg_dev = &device[cmd_event->internal.data.ifindex];
++	addr_type = cmd_event->args.add.addr_type & RTCFG_ADDR_MASK;
++
++	list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++		conn = list_entry(entry, struct rtcfg_connection, entry);
++
++		if ((addr_type == conn->addr_type) &&
++		    (
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++			    ((addr_type == RTCFG_ADDR_IP) &&
++			     (conn->addr.ip_addr ==
++			      cmd_event->args.add.ip_addr)) ||
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++			    ((addr_type == RTCFG_ADDR_MAC) &&
++			     (memcmp(conn->mac_addr,
++				     cmd_event->args.add.mac_addr,
++				     MAX_ADDR_LEN) == 0)))) {
++			list_del(&conn->entry);
++			rtcfg_dev->other_stations--;
++
++			if (conn->state > RTCFG_CONN_SEARCHING) {
++				rtcfg_dev->stations_found--;
++				if (conn->state >= RTCFG_CONN_STAGE_2)
++					rtcfg_dev->spec.srv.clients_configured--;
++				if (conn->flags & _RTCFG_FLAG_READY)
++					rtcfg_dev->stations_ready--;
++			}
++
++			if ((conn->stage2_file) &&
++			    (rtcfg_release_file(conn->stage2_file) == 0))
++				cmd_event->args.del.stage2_file =
++					conn->stage2_file;
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++			cmd_event->args.del.conn_buf = conn;
++
++			return 0;
++		}
++	}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	return -ENOENT;
++}
++
++static int rtcfg_server_detach(int ifindex, struct rtcfg_cmd *cmd_event)
++{
++	struct rtcfg_connection *conn;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	if (!list_empty(&rtcfg_dev->spec.srv.conn_list)) {
++		conn = list_entry(rtcfg_dev->spec.srv.conn_list.next,
++				  struct rtcfg_connection, entry);
++
++		list_del(&conn->entry);
++		rtcfg_dev->other_stations--;
++
++		if (conn->state > RTCFG_CONN_SEARCHING) {
++			rtcfg_dev->stations_found--;
++			if (conn->state >= RTCFG_CONN_STAGE_2)
++				rtcfg_dev->spec.srv.clients_configured--;
++			if (conn->flags & _RTCFG_FLAG_READY)
++				rtcfg_dev->stations_ready--;
++		}
++
++		if ((conn->stage2_file) &&
++		    (rtcfg_release_file(conn->stage2_file) == 0))
++			cmd_event->args.detach.stage2_file = conn->stage2_file;
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		cmd_event->args.detach.conn_buf = conn;
++
++		return -EAGAIN;
++	}
++
++	if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags))
++		rtdm_timer_destroy(&rtcfg_dev->timer);
++	rtcfg_reset_device(ifindex);
++
++	rtcfg_next_main_state(ifindex, RTCFG_MAIN_OFF);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	return 0;
++}
++
++/*** Server Frame Event Handlers ***/
++
++static int rtcfg_server_recv_announce(int ifindex, RTCFG_EVENT event_id,
++				      struct rtskb *rtskb)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct list_head *entry;
++	struct rtcfg_frm_announce *announce;
++	struct rtcfg_connection *conn;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_announce)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid announce frame\n");
++		return -EINVAL;
++	}
++
++	announce = (struct rtcfg_frm_announce *)rtskb->data;
++
++	list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++		conn = list_entry(entry, struct rtcfg_connection, entry);
++
++		switch (announce->addr_type) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++			u32 announce_addr;
++		case RTCFG_ADDR_IP:
++			memcpy(&announce_addr, announce->addr, 4);
++
++			if (((conn->addr_type & RTCFG_ADDR_MASK) ==
++			     RTCFG_ADDR_IP) &&
++			    (announce_addr == conn->addr.ip_addr)) {
++				/* save MAC address - Ethernet-specific! */
++				memcpy(conn->mac_addr,
++				       rtskb->mac.ethernet->h_source, ETH_ALEN);
++
++				/* update routing table */
++				rt_ip_route_add_host(conn->addr.ip_addr,
++						     conn->mac_addr,
++						     rtskb->rtdev);
++
++				/* remove IP address */
++				__rtskb_pull(rtskb, RTCFG_ADDRSIZE_IP);
++
++				rtcfg_do_conn_event(conn, event_id, rtskb);
++
++				goto out;
++			}
++			break;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++		case RTCFG_ADDR_MAC:
++			/* Ethernet-specific! */
++			if (memcmp(conn->mac_addr,
++				   rtskb->mac.ethernet->h_source,
++				   ETH_ALEN) == 0) {
++				rtcfg_do_conn_event(conn, event_id, rtskb);
++
++				goto out;
++			}
++			break;
++		}
++	}
++
++out:
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	kfree_rtskb(rtskb);
++	return 0;
++}
++
++static int rtcfg_server_recv_ack(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct list_head *entry;
++	struct rtcfg_connection *conn;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_ack_cfg)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid ack_cfg frame\n");
++		return -EINVAL;
++	}
++
++	list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++		conn = list_entry(entry, struct rtcfg_connection, entry);
++
++		/* find the corresponding connection - Ethernet-specific! */
++		if (memcmp(conn->mac_addr, rtskb->mac.ethernet->h_source,
++			   ETH_ALEN) != 0)
++			continue;
++
++		rtcfg_do_conn_event(conn, RTCFG_FRM_ACK_CFG, rtskb);
++
++		break;
++	}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	kfree_rtskb(rtskb);
++	return 0;
++}
++
++static int rtcfg_server_recv_simple_frame(int ifindex, RTCFG_EVENT event_id,
++					  struct rtskb *rtskb)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct list_head *entry;
++	struct rtcfg_connection *conn;
++
++	list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++		conn = list_entry(entry, struct rtcfg_connection, entry);
++
++		/* find the corresponding connection - Ethernet-specific! */
++		if (memcmp(conn->mac_addr, rtskb->mac.ethernet->h_source,
++			   ETH_ALEN) != 0)
++			continue;
++
++		rtcfg_do_conn_event(conn, event_id, rtskb);
++
++		break;
++	}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	kfree_rtskb(rtskb);
++	return 0;
++}
++
++/*** Utility Functions ***/
++
++void rtcfg_queue_blocking_call(int ifindex, struct rt_proc_call *call)
++{
++	rtdm_lockctx_t context;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	rtdm_lock_get_irqsave(&rtcfg_dev->event_calls_lock, context);
++	list_add_tail(&call->list_entry, &rtcfg_dev->event_calls);
++	rtdm_lock_put_irqrestore(&rtcfg_dev->event_calls_lock, context);
++}
++
++struct rt_proc_call *rtcfg_dequeue_blocking_call(int ifindex)
++{
++	rtdm_lockctx_t context;
++	struct rt_proc_call *call;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	rtdm_lock_get_irqsave(&rtcfg_dev->event_calls_lock, context);
++	if (!list_empty(&rtcfg_dev->event_calls)) {
++		call = (struct rt_proc_call *)rtcfg_dev->event_calls.next;
++		list_del(&call->list_entry);
++	} else
++		call = NULL;
++	rtdm_lock_put_irqrestore(&rtcfg_dev->event_calls_lock, context);
++
++	return call;
++}
++
++void rtcfg_complete_cmd(int ifindex, RTCFG_EVENT event_id, int result)
++{
++	struct rt_proc_call *call;
++	struct rtcfg_cmd *cmd_event;
++
++	while (1) {
++		call = rtcfg_dequeue_blocking_call(ifindex);
++		if (call == NULL)
++			break;
++
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		rtpc_complete_call(call, (cmd_event->internal.data.event_id ==
++					  event_id) ?
++						 result :
++						 -EINVAL);
++	}
++}
++
++void rtcfg_reset_device(int ifindex)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	rtcfg_dev->other_stations = 0;
++	rtcfg_dev->stations_found = 0;
++	rtcfg_dev->stations_ready = 0;
++	rtcfg_dev->flags = 0;
++	rtcfg_dev->burstrate = 0;
++
++	memset(&rtcfg_dev->spec, 0, sizeof(rtcfg_dev->spec));
++	INIT_LIST_HEAD(&rtcfg_dev->spec.srv.conn_list);
++}
++
++void rtcfg_init_state_machines(void)
++{
++	int i;
++	struct rtcfg_device *rtcfg_dev;
++
++	memset(device, 0, sizeof(device));
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtcfg_dev = &device[i];
++		rtcfg_dev->state = RTCFG_MAIN_OFF;
++
++		rtdm_mutex_init(&rtcfg_dev->dev_mutex);
++
++		INIT_LIST_HEAD(&rtcfg_dev->event_calls);
++		rtdm_lock_init(&rtcfg_dev->event_calls_lock);
++	}
++}
++
++void rtcfg_cleanup_state_machines(void)
++{
++	int i;
++	struct rtcfg_device *rtcfg_dev;
++	struct rtcfg_connection *conn;
++	struct list_head *entry;
++	struct list_head *tmp;
++	struct rt_proc_call *call;
++
++	for (i = 0; i < MAX_RT_DEVICES; i++) {
++		rtcfg_dev = &device[i];
++
++		if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags))
++			rtdm_timer_destroy(&rtcfg_dev->timer);
++
++		/*
++	 * No need to synchronize with rtcfg_timer here: the task running
++	 * rtcfg_timer is already dead.
++	 */
++
++		rtdm_mutex_destroy(&rtcfg_dev->dev_mutex);
++
++		if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) {
++			list_for_each_safe (entry, tmp,
++					    &rtcfg_dev->spec.srv.conn_list) {
++				conn = list_entry(
++					entry, struct rtcfg_connection, entry);
++
++				if (conn->stage1_data != NULL)
++					kfree(conn->stage1_data);
++
++				if ((conn->stage2_file != NULL) &&
++				    (rtcfg_release_file(conn->stage2_file) ==
++				     0)) {
++					vfree(conn->stage2_file->buffer);
++					kfree(conn->stage2_file);
++				}
++
++				kfree(entry);
++			}
++		} else if (rtcfg_dev->state != RTCFG_MAIN_OFF) {
++			if (rtcfg_dev->spec.clt.station_addr_list != NULL)
++				kfree(rtcfg_dev->spec.clt.station_addr_list);
++
++			if (rtcfg_dev->spec.clt.stage2_chain != NULL)
++				kfree_rtskb(rtcfg_dev->spec.clt.stage2_chain);
++		}
++
++		while (1) {
++			call = rtcfg_dequeue_blocking_call(i);
++			if (call == NULL)
++				break;
++
++			rtpc_complete_call_nrt(call, -ENODEV);
++		}
++	}
++}
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_timer.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_timer.c	2021-04-07 16:01:26.610635048 +0800
+@@ -0,0 +1,110 @@
++/***
++ *
++ *  rtcfg/rtcfg_timer.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/list.h>
++
++#include <rtdev.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_conn_event.h>
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_timer.h>
++
++void rtcfg_timer(rtdm_timer_t *t)
++{
++	struct rtcfg_device *rtcfg_dev =
++		container_of(t, struct rtcfg_device, timer);
++
++	set_bit(FLAG_TIMER_PENDING, &rtcfg_dev->flags);
++	rtcfg_thread_signal();
++}
++
++void rtcfg_timer_run_one(int ifindex)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct list_head *entry;
++	struct rtcfg_connection *conn;
++	int last_stage_1 = -1;
++	int burst_credit;
++	int index;
++	int ret, shutdown;
++
++	shutdown = test_and_clear_bit(FLAG_TIMER_SHUTDOWN, &rtcfg_dev->flags);
++
++	if (!test_and_clear_bit(FLAG_TIMER_PENDING, &rtcfg_dev->flags) ||
++	    shutdown)
++		return;
++
++	rtdm_mutex_lock(&rtcfg_dev->dev_mutex);
++
++	if (rtcfg_dev->state == RTCFG_MAIN_SERVER_RUNNING) {
++		index = 0;
++		burst_credit = rtcfg_dev->burstrate;
++
++		list_for_each (entry, &rtcfg_dev->spec.srv.conn_list) {
++			conn = list_entry(entry, struct rtcfg_connection,
++					  entry);
++
++			if ((conn->state == RTCFG_CONN_SEARCHING) ||
++			    (conn->state == RTCFG_CONN_DEAD)) {
++				if ((burst_credit > 0) &&
++				    (index > last_stage_1)) {
++					if ((ret = rtcfg_send_stage_1(conn)) <
++					    0) {
++						RTCFG_DEBUG(
++							2,
++							"RTcfg: error %d while sending "
++							"stage 1 frame\n",
++							ret);
++					}
++					burst_credit--;
++					last_stage_1 = index;
++				}
++			} else {
++				/* skip connection in history */
++				if (last_stage_1 == (index - 1))
++					last_stage_1 = index;
++
++				rtcfg_do_conn_event(conn, RTCFG_TIMER, NULL);
++			}
++			index++;
++		}
++
++		/* handle pointer overrun of the last stage 1 transmission */
++		if (last_stage_1 == (index - 1))
++			last_stage_1 = -1;
++	} else if (rtcfg_dev->state == RTCFG_MAIN_CLIENT_READY)
++		rtcfg_send_heartbeat(ifindex);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++}
++
++void rtcfg_timer_run(void)
++{
++	int ifindex;
++
++	for (ifindex = 0; ifindex < MAX_RT_DEVICES; ifindex++)
++		rtcfg_timer_run_one(ifindex);
++}
+--- linux/drivers/xenomai/net/stack/rtcfg/rtcfg_client_event.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtcfg/rtcfg_client_event.c	2021-04-07 16:01:26.605635055 +0800
+@@ -0,0 +1,1175 @@
++/***
++ *
++ *  rtcfg/rtcfg_client_event.c
++ *
++ *  Real-Time Configuration Distribution Protocol
++ *
++ *  Copyright (C) 2003-2005 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <ipv4/route.h>
++#include <rtcfg/rtcfg.h>
++#include <rtcfg/rtcfg_event.h>
++#include <rtcfg/rtcfg_frame.h>
++#include <rtcfg/rtcfg_timer.h>
++
++static int rtcfg_client_get_frag(int ifindex, struct rt_proc_call *call);
++static void rtcfg_client_detach(int ifindex, struct rt_proc_call *call);
++static void rtcfg_client_recv_stage_1(int ifindex, struct rtskb *rtskb);
++static int rtcfg_client_recv_announce(int ifindex, struct rtskb *rtskb);
++static void rtcfg_client_recv_stage_2_cfg(int ifindex, struct rtskb *rtskb);
++static void rtcfg_client_recv_stage_2_frag(int ifindex, struct rtskb *rtskb);
++static int rtcfg_client_recv_ready(int ifindex, struct rtskb *rtskb);
++static void rtcfg_client_recv_dead_station(int ifindex, struct rtskb *rtskb);
++static void rtcfg_client_update_server(int ifindex, struct rtskb *rtskb);
++
++/*** Client States ***/
++
++int rtcfg_main_state_client_0(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++
++	switch (event_id) {
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		rtcfg_client_recv_stage_1(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int rtcfg_main_state_client_1(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_cmd *cmd_event;
++	int ret;
++
++	switch (event_id) {
++	case RTCFG_CMD_CLIENT:
++		/* second trial (buffer was probably too small) */
++		rtcfg_queue_blocking_call(ifindex,
++					  (struct rt_proc_call *)event_data);
++
++		rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_0);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	case RTCFG_CMD_ANNOUNCE:
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		if (cmd_event->args.announce.burstrate == 0) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			return -EINVAL;
++		}
++
++		rtcfg_queue_blocking_call(ifindex,
++					  (struct rt_proc_call *)event_data);
++
++		if (cmd_event->args.announce.flags & _RTCFG_FLAG_STAGE_2_DATA)
++			set_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags);
++		if (cmd_event->args.announce.flags & _RTCFG_FLAG_READY)
++			set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags);
++		if (cmd_event->args.announce.burstrate < rtcfg_dev->burstrate)
++			rtcfg_dev->burstrate =
++				cmd_event->args.announce.burstrate;
++
++		rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_ANNOUNCED);
++
++		ret = rtcfg_send_announce_new(ifindex);
++		if (ret < 0) {
++			rtcfg_dequeue_blocking_call(ifindex);
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			return ret;
++		}
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		/* ignore */
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int rtcfg_main_state_client_announced(int ifindex, RTCFG_EVENT event_id,
++				      void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_device *rtcfg_dev;
++
++	switch (event_id) {
++	case RTCFG_CMD_ANNOUNCE:
++		return rtcfg_client_get_frag(ifindex, call);
++
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_STAGE_2_CFG:
++		rtcfg_client_recv_stage_2_cfg(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_STAGE_2_CFG_FRAG:
++		rtcfg_client_recv_stage_2_frag(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++
++			rtcfg_dev = &device[ifindex];
++			if (rtcfg_dev->stations_found ==
++			    rtcfg_dev->other_stations)
++				rtcfg_next_main_state(
++					ifindex, RTCFG_MAIN_CLIENT_ALL_KNOWN);
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_dev = &device[ifindex];
++			if (rtcfg_dev->stations_found ==
++			    rtcfg_dev->other_stations)
++				rtcfg_next_main_state(
++					ifindex, RTCFG_MAIN_CLIENT_ALL_KNOWN);
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		/* ignore */
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++int rtcfg_main_state_client_all_known(int ifindex, RTCFG_EVENT event_id,
++				      void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++
++	switch (event_id) {
++	case RTCFG_CMD_ANNOUNCE:
++		return rtcfg_client_get_frag(ifindex, call);
++
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_STAGE_2_CFG_FRAG:
++		rtcfg_client_recv_stage_2_frag(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_DEAD_STATION:
++		rtcfg_client_recv_dead_station(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		/* ignore */
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int rtcfg_main_state_client_all_frames(int ifindex, RTCFG_EVENT event_id,
++				       void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_device *rtcfg_dev;
++
++	switch (event_id) {
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++
++			rtcfg_dev = &device[ifindex];
++			if (rtcfg_dev->stations_found ==
++			    rtcfg_dev->other_stations) {
++				rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE,
++						   0);
++
++				rtcfg_next_main_state(
++					ifindex,
++					test_bit(RTCFG_FLAG_READY,
++						 &rtcfg_dev->flags) ?
++						RTCFG_MAIN_CLIENT_READY :
++						RTCFG_MAIN_CLIENT_2);
++			}
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_REPLY:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_dev = &device[ifindex];
++			if (rtcfg_dev->stations_found ==
++			    rtcfg_dev->other_stations) {
++				rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE,
++						   0);
++
++				rtcfg_next_main_state(
++					ifindex,
++					test_bit(RTCFG_FLAG_READY,
++						 &rtcfg_dev->flags) ?
++						RTCFG_MAIN_CLIENT_READY :
++						RTCFG_MAIN_CLIENT_2);
++			}
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		break;
++
++	case RTCFG_FRM_DEAD_STATION:
++		rtcfg_client_recv_dead_station(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		/* ignore */
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int rtcfg_main_state_client_2(int ifindex, RTCFG_EVENT event_id,
++			      void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_device *rtcfg_dev;
++
++	switch (event_id) {
++	case RTCFG_CMD_READY:
++		rtcfg_dev = &device[ifindex];
++
++		if (rtcfg_dev->stations_ready == rtcfg_dev->other_stations)
++			rtpc_complete_call(call, 0);
++		else
++			rtcfg_queue_blocking_call(ifindex, call);
++
++		rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_READY);
++
++		if (!test_and_set_bit(RTCFG_FLAG_READY, &rtcfg_dev->flags))
++			rtcfg_send_ready(ifindex);
++
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++		return -CALL_PENDING;
++
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0)
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_DEAD_STATION:
++		rtcfg_client_recv_dead_station(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		/* ignore */
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		kfree_rtskb(rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++int rtcfg_main_state_client_ready(int ifindex, RTCFG_EVENT event_id,
++				  void *event_data)
++{
++	struct rtskb *rtskb = (struct rtskb *)event_data;
++	struct rt_proc_call *call = (struct rt_proc_call *)event_data;
++	struct rtcfg_device *rtcfg_dev;
++
++	switch (event_id) {
++	case RTCFG_CMD_DETACH:
++		rtcfg_client_detach(ifindex, call);
++		break;
++
++	case RTCFG_FRM_READY:
++		if (rtcfg_client_recv_ready(ifindex, rtskb) == 0) {
++			rtcfg_dev = &device[ifindex];
++			if (rtcfg_dev->stations_ready ==
++			    rtcfg_dev->other_stations)
++				rtcfg_complete_cmd(ifindex, RTCFG_CMD_READY, 0);
++
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		}
++		break;
++
++	case RTCFG_FRM_ANNOUNCE_NEW:
++		if (rtcfg_client_recv_announce(ifindex, rtskb) == 0) {
++			rtcfg_send_announce_reply(
++				ifindex, rtskb->mac.ethernet->h_source);
++			rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		}
++		kfree_rtskb(rtskb);
++		break;
++
++	case RTCFG_FRM_DEAD_STATION:
++		rtcfg_client_recv_dead_station(ifindex, rtskb);
++		break;
++
++	case RTCFG_FRM_STAGE_1_CFG:
++		rtcfg_client_update_server(ifindex, rtskb);
++		break;
++
++	default:
++		rtdm_mutex_unlock(&device[ifindex].dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown event %s for rtdev %d in %s()\n",
++			    rtcfg_event[event_id], ifindex, __FUNCTION__);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++/*** Client Command Event Handlers ***/
++
++static int rtcfg_client_get_frag(int ifindex, struct rt_proc_call *call)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) == 0) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		return -EINVAL;
++	}
++
++	rtcfg_send_ack(ifindex);
++
++	if (rtcfg_dev->spec.clt.cfg_offs >= rtcfg_dev->spec.clt.cfg_len) {
++		if (rtcfg_dev->stations_found == rtcfg_dev->other_stations) {
++			rtpc_complete_call(call, 0);
++
++			rtcfg_next_main_state(ifindex,
++					      test_bit(RTCFG_FLAG_READY,
++						       &rtcfg_dev->flags) ?
++						      RTCFG_MAIN_CLIENT_READY :
++						      RTCFG_MAIN_CLIENT_2);
++		} else {
++			rtcfg_next_main_state(ifindex,
++					      RTCFG_MAIN_CLIENT_ALL_FRAMES);
++			rtcfg_queue_blocking_call(ifindex, call);
++		}
++	} else
++		rtcfg_queue_blocking_call(ifindex, call);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	return -CALL_PENDING;
++}
++
++/* releases rtcfg_dev->dev_mutex on return */
++static void rtcfg_client_detach(int ifindex, struct rt_proc_call *call)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rtcfg_cmd *cmd_event;
++
++	cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++	cmd_event->args.detach.station_addr_list =
++		rtcfg_dev->spec.clt.station_addr_list;
++	cmd_event->args.detach.stage2_chain = rtcfg_dev->spec.clt.stage2_chain;
++
++	while (1) {
++		call = rtcfg_dequeue_blocking_call(ifindex);
++		if (call == NULL)
++			break;
++
++		rtpc_complete_call(call, -ENODEV);
++	}
++
++	if (test_and_clear_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags))
++		rtdm_timer_destroy(&rtcfg_dev->timer);
++	rtcfg_reset_device(ifindex);
++
++	rtcfg_next_main_state(cmd_event->internal.data.ifindex, RTCFG_MAIN_OFF);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++}
++
++/*** Client Frame Event Handlers ***/
++
++static void rtcfg_client_recv_stage_1(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_stage_1_cfg *stage_1_cfg;
++	struct rt_proc_call *call;
++	struct rtcfg_cmd *cmd_event;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	u8 addr_type;
++	int ret;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg frame\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	stage_1_cfg = (struct rtcfg_frm_stage_1_cfg *)rtskb->data;
++	__rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_1_cfg));
++
++	addr_type = stage_1_cfg->addr_type;
++
++	switch (stage_1_cfg->addr_type) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	case RTCFG_ADDR_IP: {
++		struct rtnet_device *rtdev, *tmp;
++		u32 daddr, saddr, mask, bcast;
++
++		if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg) +
++					 2 * RTCFG_ADDRSIZE_IP) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg "
++				       "frame\n");
++			kfree_rtskb(rtskb);
++			return;
++		}
++
++		rtdev = rtskb->rtdev;
++
++		memcpy(&daddr, stage_1_cfg->client_addr, 4);
++		stage_1_cfg =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) +
++							 RTCFG_ADDRSIZE_IP);
++
++		memcpy(&saddr, stage_1_cfg->server_addr, 4);
++		stage_1_cfg =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) +
++							 RTCFG_ADDRSIZE_IP);
++
++		__rtskb_pull(rtskb, 2 * RTCFG_ADDRSIZE_IP);
++
++		/* Broadcast: IP is used to address client */
++		if (rtskb->pkt_type == PACKET_BROADCAST) {
++			/* directed to us? */
++			if (daddr != rtdev->local_ip) {
++				rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++				kfree_rtskb(rtskb);
++				return;
++			}
++
++			/* Unicast: IP address is assigned by the server */
++		} else {
++			/* default netmask */
++			if (ntohl(daddr) <= 0x7FFFFFFF) /* 127.255.255.255  */
++				mask = 0x000000FF; /* 255.0.0.0        */
++			else if (ntohl(daddr) <=
++				 0xBFFFFFFF) /* 191.255.255.255  */
++				mask = 0x0000FFFF; /* 255.255.0.0      */
++			else
++				mask = 0x00FFFFFF; /* 255.255.255.0    */
++			bcast = daddr | (~mask);
++
++			rt_ip_route_del_all(rtdev); /* cleanup routing table */
++
++			rtdev->local_ip = daddr;
++			rtdev->broadcast_ip = bcast;
++
++			if ((tmp = rtdev_get_loopback()) != NULL) {
++				rt_ip_route_add_host(daddr, tmp->dev_addr, tmp);
++				rtdev_dereference(tmp);
++			}
++
++			if (rtdev->flags & IFF_BROADCAST)
++				rt_ip_route_add_host(bcast, rtdev->broadcast,
++						     rtdev);
++		}
++
++		/* update routing table */
++		rt_ip_route_add_host(saddr, rtskb->mac.ethernet->h_source,
++				     rtdev);
++
++		rtcfg_dev->spec.clt.srv_addr.ip_addr = saddr;
++		break;
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	case RTCFG_ADDR_MAC:
++		/* nothing to do */
++		break;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n",
++			    stage_1_cfg->addr_type, __FUNCTION__);
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	rtcfg_dev->spec.clt.addr_type = addr_type;
++
++	/* Ethernet-specific */
++	memcpy(rtcfg_dev->spec.clt.srv_mac_addr, rtskb->mac.ethernet->h_source,
++	       ETH_ALEN);
++
++	rtcfg_dev->burstrate = stage_1_cfg->burstrate;
++
++	rtcfg_next_main_state(ifindex, RTCFG_MAIN_CLIENT_1);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	while (1) {
++		call = rtcfg_dequeue_blocking_call(ifindex);
++		if (call == NULL)
++			break;
++
++		cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++		if (cmd_event->internal.data.event_id == RTCFG_CMD_CLIENT) {
++			ret = 0;
++
++			/* note: only the first pending call gets data */
++			if ((rtskb != NULL) &&
++			    (cmd_event->args.client.buffer_size > 0)) {
++				ret = ntohs(stage_1_cfg->cfg_len);
++
++				cmd_event->args.client.rtskb = rtskb;
++				rtskb = NULL;
++			}
++		} else
++			ret = -EINVAL;
++
++		rtpc_complete_call(call, ret);
++	}
++
++	if (rtskb)
++		kfree_rtskb(rtskb);
++}
++
++static int rtcfg_add_to_station_list(struct rtcfg_device *rtcfg_dev,
++				     u8 *mac_addr, u8 flags)
++{
++	if (rtcfg_dev->stations_found == rtcfg_dev->spec.clt.max_stations) {
++		RTCFG_DEBUG(
++			1, "RTcfg: insufficient memory for storing new station "
++			   "address\n");
++		return -ENOMEM;
++	}
++
++	/* Ethernet-specific! */
++	memcpy(&rtcfg_dev->spec.clt.station_addr_list[rtcfg_dev->stations_found]
++			.mac_addr,
++	       mac_addr, ETH_ALEN);
++
++	rtcfg_dev->spec.clt.station_addr_list[rtcfg_dev->stations_found].flags =
++		flags;
++
++	rtcfg_dev->stations_found++;
++	if ((flags & _RTCFG_FLAG_READY) != 0)
++		rtcfg_dev->stations_ready++;
++
++	return 0;
++}
++
++/* Notes:
++ *  o rtcfg_client_recv_announce does not release the passed rtskb.
++ *  o On success, rtcfg_client_recv_announce returns without releasing the
++ *    device lock.
++ */
++static int rtcfg_client_recv_announce(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_announce *announce_frm;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	u32 i;
++	u32 announce_frm_addr;
++	int result;
++
++	announce_frm = (struct rtcfg_frm_announce *)rtskb->data;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_announce)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1,
++			    "RTcfg: received invalid announce frame (id: %d)\n",
++			    announce_frm->head.id);
++		return -EINVAL;
++	}
++
++	switch (announce_frm->addr_type) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	case RTCFG_ADDR_IP:
++		if (rtskb->len <
++		    sizeof(struct rtcfg_frm_announce) + RTCFG_ADDRSIZE_IP) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			RTCFG_DEBUG(1,
++				    "RTcfg: received invalid announce frame "
++				    "(id: %d)\n",
++				    announce_frm->head.id);
++			return -EINVAL;
++		}
++
++		memcpy(&announce_frm_addr, announce_frm->addr, 4);
++
++		/* update routing table */
++		rt_ip_route_add_host(announce_frm_addr,
++				     rtskb->mac.ethernet->h_source,
++				     rtskb->rtdev);
++
++		announce_frm =
++			(struct rtcfg_frm_announce *)(((u8 *)announce_frm) +
++						      RTCFG_ADDRSIZE_IP);
++
++		break;
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	case RTCFG_ADDR_MAC:
++		/* nothing to do */
++		break;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n",
++			    announce_frm->addr_type, __FUNCTION__);
++		return -EINVAL;
++	}
++
++	for (i = 0; i < rtcfg_dev->stations_found; i++)
++		/* Ethernet-specific! */
++		if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr,
++			   rtskb->mac.ethernet->h_source, ETH_ALEN) == 0)
++			return 0;
++
++	result = rtcfg_add_to_station_list(
++		rtcfg_dev, rtskb->mac.ethernet->h_source, announce_frm->flags);
++	if (result < 0)
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	return result;
++}
++
++static void rtcfg_client_queue_frag(int ifindex, struct rtskb *rtskb,
++				    size_t data_len)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	struct rt_proc_call *call;
++	struct rtcfg_cmd *cmd_event;
++	int result;
++
++	rtskb_trim(rtskb, data_len);
++
++	if (rtcfg_dev->spec.clt.stage2_chain == NULL)
++		rtcfg_dev->spec.clt.stage2_chain = rtskb;
++	else {
++		rtcfg_dev->spec.clt.stage2_chain->chain_end->next = rtskb;
++		rtcfg_dev->spec.clt.stage2_chain->chain_end = rtskb;
++	}
++
++	rtcfg_dev->spec.clt.cfg_offs += data_len;
++	rtcfg_dev->spec.clt.chain_len += data_len;
++
++	if ((rtcfg_dev->spec.clt.cfg_offs >= rtcfg_dev->spec.clt.cfg_len) ||
++	    (++rtcfg_dev->spec.clt.packet_counter == rtcfg_dev->burstrate)) {
++		while (1) {
++			call = rtcfg_dequeue_blocking_call(ifindex);
++			if (call == NULL)
++				break;
++
++			cmd_event = rtpc_get_priv(call, struct rtcfg_cmd);
++
++			result = 0;
++
++			/* note: only the first pending call gets data */
++			if (rtcfg_dev->spec.clt.stage2_chain != NULL) {
++				result = rtcfg_dev->spec.clt.chain_len;
++				cmd_event->args.announce.rtskb =
++					rtcfg_dev->spec.clt.stage2_chain;
++				rtcfg_dev->spec.clt.stage2_chain = NULL;
++			}
++
++			rtpc_complete_call(call,
++					   (cmd_event->internal.data.event_id ==
++					    RTCFG_CMD_ANNOUNCE) ?
++						   result :
++						   -EINVAL);
++		}
++
++		rtcfg_dev->spec.clt.packet_counter = 0;
++		rtcfg_dev->spec.clt.chain_len = 0;
++	}
++}
++
++static void rtcfg_client_recv_stage_2_cfg(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_stage_2_cfg *stage_2_cfg;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	size_t data_len;
++	int ret;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_stage_2_cfg)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid stage_2_cfg frame\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	stage_2_cfg = (struct rtcfg_frm_stage_2_cfg *)rtskb->data;
++	__rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_2_cfg));
++
++	if (stage_2_cfg->heartbeat_period) {
++		ret = rtdm_timer_init(&rtcfg_dev->timer, rtcfg_timer,
++				      "rtcfg-timer");
++		if (ret == 0) {
++			ret = rtdm_timer_start(
++				&rtcfg_dev->timer, XN_INFINITE,
++				(nanosecs_rel_t)ntohs(
++					stage_2_cfg->heartbeat_period) *
++					1000000,
++				RTDM_TIMERMODE_RELATIVE);
++			if (ret < 0)
++				rtdm_timer_destroy(&rtcfg_dev->timer);
++		}
++
++		if (ret < 0)
++			/*ERRMSG*/ rtdm_printk(
++				"RTcfg: unable to create timer task\n");
++		else
++			set_bit(FLAG_TIMER_STARTED, &rtcfg_dev->flags);
++	}
++
++	/* add server to station list */
++	if (rtcfg_add_to_station_list(rtcfg_dev, rtskb->mac.ethernet->h_source,
++				      stage_2_cfg->flags) < 0) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unable to process stage_2_cfg frage\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	rtcfg_dev->other_stations = ntohl(stage_2_cfg->stations);
++	rtcfg_dev->spec.clt.cfg_len = ntohl(stage_2_cfg->cfg_len);
++	data_len = MIN(rtcfg_dev->spec.clt.cfg_len, rtskb->len);
++
++	if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) &&
++	    (data_len > 0)) {
++		rtcfg_client_queue_frag(ifindex, rtskb, data_len);
++		rtskb = NULL;
++
++		if (rtcfg_dev->stations_found == rtcfg_dev->other_stations)
++			rtcfg_next_main_state(ifindex,
++					      RTCFG_MAIN_CLIENT_ALL_KNOWN);
++	} else {
++		if (rtcfg_dev->stations_found == rtcfg_dev->other_stations) {
++			rtcfg_complete_cmd(ifindex, RTCFG_CMD_ANNOUNCE, 0);
++
++			rtcfg_next_main_state(ifindex,
++					      test_bit(RTCFG_FLAG_READY,
++						       &rtcfg_dev->flags) ?
++						      RTCFG_MAIN_CLIENT_READY :
++						      RTCFG_MAIN_CLIENT_2);
++		} else
++			rtcfg_next_main_state(ifindex,
++					      RTCFG_MAIN_CLIENT_ALL_FRAMES);
++
++		rtcfg_send_ack(ifindex);
++	}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	if (rtskb != NULL)
++		kfree_rtskb(rtskb);
++}
++
++static void rtcfg_client_recv_stage_2_frag(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_stage_2_cfg_frag *stage_2_frag;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	size_t data_len;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_stage_2_cfg_frag)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1,
++			    "RTcfg: received invalid stage_2_cfg_frag frame\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	stage_2_frag = (struct rtcfg_frm_stage_2_cfg_frag *)rtskb->data;
++	__rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_2_cfg_frag));
++
++	data_len =
++		MIN(rtcfg_dev->spec.clt.cfg_len - rtcfg_dev->spec.clt.cfg_offs,
++		    rtskb->len);
++
++	if (test_bit(RTCFG_FLAG_STAGE_2_DATA, &rtcfg_dev->flags) == 0) {
++		RTCFG_DEBUG(1, "RTcfg: unexpected stage 2 fragment, we did not "
++			       "request any data!\n");
++
++	} else if (rtcfg_dev->spec.clt.cfg_offs !=
++		   ntohl(stage_2_frag->frag_offs)) {
++		RTCFG_DEBUG(1,
++			    "RTcfg: unexpected stage 2 fragment (expected: %d, "
++			    "received: %d)\n",
++			    rtcfg_dev->spec.clt.cfg_offs,
++			    ntohl(stage_2_frag->frag_offs));
++
++		rtcfg_send_ack(ifindex);
++		rtcfg_dev->spec.clt.packet_counter = 0;
++	} else {
++		rtcfg_client_queue_frag(ifindex, rtskb, data_len);
++		rtskb = NULL;
++	}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	if (rtskb != NULL)
++		kfree_rtskb(rtskb);
++}
++
++/* Notes:
++ *  o On success, rtcfg_client_recv_ready returns without releasing the
++ *    device lock.
++ */
++static int rtcfg_client_recv_ready(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	u32 i;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_simple)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid ready frame\n");
++		kfree_rtskb(rtskb);
++		return -EINVAL;
++	}
++
++	for (i = 0; i < rtcfg_dev->stations_found; i++)
++		/* Ethernet-specific! */
++		if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr,
++			   rtskb->mac.ethernet->h_source, ETH_ALEN) == 0) {
++			if ((rtcfg_dev->spec.clt.station_addr_list[i].flags &
++			     _RTCFG_FLAG_READY) == 0) {
++				rtcfg_dev->spec.clt.station_addr_list[i].flags |=
++					_RTCFG_FLAG_READY;
++				rtcfg_dev->stations_ready++;
++			}
++			break;
++		}
++
++	kfree_rtskb(rtskb);
++	return 0;
++}
++
++static void rtcfg_client_recv_dead_station(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_dead_station *dead_station_frm;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++	u32 i;
++
++	dead_station_frm = (struct rtcfg_frm_dead_station *)rtskb->data;
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_dead_station)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid dead station frame\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	switch (dead_station_frm->addr_type) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	case RTCFG_ADDR_IP: {
++		u32 ip;
++
++		if (rtskb->len <
++		    sizeof(struct rtcfg_frm_dead_station) + RTCFG_ADDRSIZE_IP) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			RTCFG_DEBUG(
++				1,
++				"RTcfg: received invalid dead station frame\n");
++			kfree_rtskb(rtskb);
++			return;
++		}
++
++		memcpy(&ip, dead_station_frm->logical_addr, 4);
++
++		/* only delete remote IPs from routing table */
++		if (rtskb->rtdev->local_ip != ip)
++			rt_ip_route_del_host(ip, rtskb->rtdev);
++
++		dead_station_frm = (struct rtcfg_frm_dead_station
++					    *)(((u8 *)dead_station_frm) +
++					       RTCFG_ADDRSIZE_IP);
++
++		break;
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	case RTCFG_ADDR_MAC:
++		/* nothing to do */
++		break;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n",
++			    dead_station_frm->addr_type, __FUNCTION__);
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	for (i = 0; i < rtcfg_dev->stations_found; i++)
++		/* Ethernet-specific! */
++		if (memcmp(rtcfg_dev->spec.clt.station_addr_list[i].mac_addr,
++			   dead_station_frm->physical_addr, ETH_ALEN) == 0) {
++			if ((rtcfg_dev->spec.clt.station_addr_list[i].flags &
++			     _RTCFG_FLAG_READY) != 0)
++				rtcfg_dev->stations_ready--;
++
++			rtcfg_dev->stations_found--;
++			memmove(&rtcfg_dev->spec.clt.station_addr_list[i],
++				&rtcfg_dev->spec.clt.station_addr_list[i + 1],
++				sizeof(struct rtcfg_station) *
++					(rtcfg_dev->stations_found - i));
++
++			if (rtcfg_dev->state == RTCFG_MAIN_CLIENT_ALL_KNOWN)
++				rtcfg_next_main_state(
++					ifindex, RTCFG_MAIN_CLIENT_ANNOUNCED);
++			break;
++		}
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	kfree_rtskb(rtskb);
++}
++
++static void rtcfg_client_update_server(int ifindex, struct rtskb *rtskb)
++{
++	struct rtcfg_frm_stage_1_cfg *stage_1_cfg;
++	struct rtcfg_device *rtcfg_dev = &device[ifindex];
++
++	if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg)) {
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg frame\n");
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	stage_1_cfg = (struct rtcfg_frm_stage_1_cfg *)rtskb->data;
++	__rtskb_pull(rtskb, sizeof(struct rtcfg_frm_stage_1_cfg));
++
++	switch (stage_1_cfg->addr_type) {
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4)
++	case RTCFG_ADDR_IP: {
++		struct rtnet_device *rtdev;
++		u32 daddr, saddr;
++
++		if (rtskb->len < sizeof(struct rtcfg_frm_stage_1_cfg) +
++					 2 * RTCFG_ADDRSIZE_IP) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			RTCFG_DEBUG(1, "RTcfg: received invalid stage_1_cfg "
++				       "frame\n");
++			kfree_rtskb(rtskb);
++			break;
++		}
++
++		rtdev = rtskb->rtdev;
++
++		memcpy(&daddr, stage_1_cfg->client_addr, 4);
++		stage_1_cfg =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) +
++							 RTCFG_ADDRSIZE_IP);
++
++		memcpy(&saddr, stage_1_cfg->server_addr, 4);
++		stage_1_cfg =
++			(struct rtcfg_frm_stage_1_cfg *)(((u8 *)stage_1_cfg) +
++							 RTCFG_ADDRSIZE_IP);
++
++		__rtskb_pull(rtskb, 2 * RTCFG_ADDRSIZE_IP);
++
++		/* directed to us? */
++		if ((rtskb->pkt_type == PACKET_BROADCAST) &&
++		    (daddr != rtdev->local_ip)) {
++			rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++			kfree_rtskb(rtskb);
++			return;
++		}
++
++		/* update routing table */
++		rt_ip_route_add_host(saddr, rtskb->mac.ethernet->h_source,
++				     rtdev);
++
++		rtcfg_dev->spec.clt.srv_addr.ip_addr = saddr;
++		break;
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4 */
++
++	case RTCFG_ADDR_MAC:
++		/* nothing to do */
++		break;
++
++	default:
++		rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++		RTCFG_DEBUG(1, "RTcfg: unknown addr_type %d in %s()\n",
++			    stage_1_cfg->addr_type, __FUNCTION__);
++		kfree_rtskb(rtskb);
++		return;
++	}
++
++	/* Ethernet-specific */
++	memcpy(rtcfg_dev->spec.clt.srv_mac_addr, rtskb->mac.ethernet->h_source,
++	       ETH_ALEN);
++
++	rtcfg_send_announce_reply(ifindex, rtskb->mac.ethernet->h_source);
++
++	rtdm_mutex_unlock(&rtcfg_dev->dev_mutex);
++
++	kfree_rtskb(rtskb);
++}
+--- linux/drivers/xenomai/net/stack/rtdev_mgr.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtdev_mgr.c	2021-04-07 16:01:26.600635062 +0800
+@@ -0,0 +1,127 @@
++/***
++ *
++ *  stack/rtdev_mgr.c - device error manager
++ *
++ *  Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/netdevice.h>
++
++#include <rtdev.h>
++#include <rtdm/net.h>
++#include <rtnet_internal.h>
++
++/***
++ *  rtnetif_err_rx: will be called from the  driver
++ *
++ *
++ *  @rtdev - the network-device
++ */
++void rtnetif_err_rx(struct rtnet_device *rtdev)
++{
++}
++
++/***
++ *  rtnetif_err_tx: will be called from the  driver
++ *
++ *
++ *  @rtdev - the network-device
++ */
++void rtnetif_err_tx(struct rtnet_device *rtdev)
++{
++}
++
++/***
++ *  do_rtdev_task
++ */
++/*static void do_rtdev_task(int mgr_id)
++{
++    struct rtnet_msg msg;
++    struct rtnet_mgr *mgr = (struct rtnet_mgr *)mgr_id;
++
++    while (1) {
++        rt_mbx_receive(&(mgr->mbx), &msg, sizeof(struct rtnet_msg));
++        if (msg.rtdev) {
++            rt_printk("RTnet: error on rtdev %s\n", msg.rtdev->name);
++        }
++    }
++}*/
++
++/***
++ *  rt_rtdev_connect
++ */
++void rt_rtdev_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr)
++{
++	/*    rtdev->rtdev_mbx=&(mgr->mbx);*/
++}
++
++/***
++ *  rt_rtdev_disconnect
++ */
++void rt_rtdev_disconnect(struct rtnet_device *rtdev)
++{
++	/*    rtdev->rtdev_mbx=NULL;*/
++}
++
++/***
++ *  rt_rtdev_mgr_start
++ */
++int rt_rtdev_mgr_start(struct rtnet_mgr *mgr)
++{
++	return /*(rt_task_resume(&(mgr->task)))*/ 0;
++}
++
++/***
++ *  rt_rtdev_mgr_stop
++ */
++int rt_rtdev_mgr_stop(struct rtnet_mgr *mgr)
++{
++	return /*(rt_task_suspend(&(mgr->task)))*/ 0;
++}
++
++/***
++ *  rt_rtdev_mgr_init
++ */
++int rt_rtdev_mgr_init(struct rtnet_mgr *mgr)
++{
++	int ret = 0;
++
++	/*    if ( (ret=rt_mbx_init (&(mgr->mbx), sizeof(struct rtnet_msg))) )
++        return ret;
++    if ( (ret=rt_task_init(&(mgr->task), &do_rtdev_task, (int)mgr, 4096, RTNET_RTDEV_PRIORITY, 0, 0)) )
++        return ret;
++    if ( (ret=rt_task_resume(&(mgr->task))) )
++        return ret;*/
++
++	return (ret);
++}
++
++/***
++ *  rt_rtdev_mgr_delete
++ */
++void rt_rtdev_mgr_delete(struct rtnet_mgr *mgr)
++{
++	/*    rt_task_delete(&(mgr->task));
++    rt_mbx_delete(&(mgr->mbx));*/
++}
++
++EXPORT_SYMBOL_GPL(rtnetif_err_rx);
++EXPORT_SYMBOL_GPL(rtnetif_err_tx);
++
++EXPORT_SYMBOL_GPL(rt_rtdev_connect);
++EXPORT_SYMBOL_GPL(rt_rtdev_disconnect);
+--- linux/drivers/xenomai/net/stack/stack_mgr.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/stack_mgr.c	2021-04-07 16:01:26.595635069 +0800
+@@ -0,0 +1,256 @@
++/***
++ *
++ *  stack/stack_mgr.c - Stack-Manager
++ *
++ *  Copyright (C) 2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *  Copyright (C) 2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *  Copyright (C) 2006 Jorge Almeida <j-almeida@criticalsoftware.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++
++#include <rtdev.h>
++#include <rtnet_internal.h>
++#include <rtskb_fifo.h>
++#include <stack_mgr.h>
++
++static unsigned int stack_mgr_prio = RTNET_DEF_STACK_PRIORITY;
++module_param(stack_mgr_prio, uint, 0444);
++MODULE_PARM_DESC(stack_mgr_prio, "Priority of the stack manager task");
++
++#if (CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE &                                    \
++     (CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE - 1)) != 0
++#error CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE must be power of 2!
++#endif
++static DECLARE_RTSKB_FIFO(rx, CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE);
++
++struct list_head rt_packets[RTPACKET_HASH_TBL_SIZE];
++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL
++struct list_head rt_packets_all;
++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++DEFINE_RTDM_LOCK(rt_packets_lock);
++
++/***
++ *  rtdev_add_pack:         add protocol (Layer 3)
++ *  @pt:                    the new protocol
++ */
++int __rtdev_add_pack(struct rtpacket_type *pt, struct module *module)
++{
++	int ret = 0;
++	rtdm_lockctx_t context;
++
++	INIT_LIST_HEAD(&pt->list_entry);
++	pt->refcount = 0;
++	if (pt->trylock == NULL)
++		pt->trylock = rtdev_lock_pack;
++	if (pt->unlock == NULL)
++		pt->unlock = rtdev_unlock_pack;
++	pt->owner = module;
++
++	rtdm_lock_get_irqsave(&rt_packets_lock, context);
++
++	if (pt->type == htons(ETH_P_ALL))
++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL
++		list_add_tail(&pt->list_entry, &rt_packets_all);
++#else /* !CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++		ret = -EINVAL;
++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++	else
++		list_add_tail(
++			&pt->list_entry,
++			&rt_packets[ntohs(pt->type) & RTPACKET_HASH_KEY_MASK]);
++
++	rtdm_lock_put_irqrestore(&rt_packets_lock, context);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(__rtdev_add_pack);
++
++/***
++ *  rtdev_remove_pack:  remove protocol (Layer 3)
++ *  @pt:                protocol
++ */
++void rtdev_remove_pack(struct rtpacket_type *pt)
++{
++	rtdm_lockctx_t context;
++
++	RTNET_ASSERT(pt != NULL, return;);
++
++	rtdm_lock_get_irqsave(&rt_packets_lock, context);
++	list_del(&pt->list_entry);
++	rtdm_lock_put_irqrestore(&rt_packets_lock, context);
++}
++
++EXPORT_SYMBOL_GPL(rtdev_remove_pack);
++
++/***
++ *  rtnetif_rx: will be called from the driver interrupt handler
++ *  (IRQs disabled!) and send a message to rtdev-owned stack-manager
++ *
++ *  @skb - the packet
++ */
++void rtnetif_rx(struct rtskb *skb)
++{
++	RTNET_ASSERT(skb != NULL, return;);
++	RTNET_ASSERT(skb->rtdev != NULL, return;);
++
++	if (unlikely(rtskb_fifo_insert_inirq(&rx.fifo, skb) < 0)) {
++		rtdm_printk("RTnet: dropping packet in %s()\n", __FUNCTION__);
++		kfree_rtskb(skb);
++	}
++}
++
++EXPORT_SYMBOL_GPL(rtnetif_rx);
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK)
++#define __DELIVER_PREFIX
++#else /* !CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */
++#define __DELIVER_PREFIX static inline
++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */
++
++__DELIVER_PREFIX void rt_stack_deliver(struct rtskb *rtskb)
++{
++	unsigned short hash;
++	struct rtpacket_type *pt_entry;
++	rtdm_lockctx_t context;
++	struct rtnet_device *rtdev = rtskb->rtdev;
++	int err;
++	int eth_p_all_hit = 0;
++
++	rtcap_report_incoming(rtskb);
++
++	rtskb->nh.raw = rtskb->data;
++
++	rtdm_lock_get_irqsave(&rt_packets_lock, context);
++
++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL
++	eth_p_all_hit = 0;
++	list_for_each_entry (pt_entry, &rt_packets_all, list_entry) {
++		if (!pt_entry->trylock(pt_entry))
++			continue;
++		rtdm_lock_put_irqrestore(&rt_packets_lock, context);
++
++		pt_entry->handler(rtskb, pt_entry);
++
++		rtdm_lock_get_irqsave(&rt_packets_lock, context);
++		pt_entry->unlock(pt_entry);
++		eth_p_all_hit = 1;
++	}
++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++
++	hash = ntohs(rtskb->protocol) & RTPACKET_HASH_KEY_MASK;
++
++	list_for_each_entry (pt_entry, &rt_packets[hash], list_entry)
++		if (pt_entry->type == rtskb->protocol) {
++			if (!pt_entry->trylock(pt_entry))
++				continue;
++			rtdm_lock_put_irqrestore(&rt_packets_lock, context);
++
++			err = pt_entry->handler(rtskb, pt_entry);
++
++			rtdm_lock_get_irqsave(&rt_packets_lock, context);
++			pt_entry->unlock(pt_entry);
++
++			if (likely(!err)) {
++				rtdm_lock_put_irqrestore(&rt_packets_lock,
++							 context);
++				return;
++			}
++		}
++
++	rtdm_lock_put_irqrestore(&rt_packets_lock, context);
++
++	/* Don't warn if ETH_P_ALL listener were present or when running in
++       promiscuous mode (RTcap). */
++	if (unlikely(!eth_p_all_hit && !(rtdev->flags & IFF_PROMISC)))
++		rtdm_printk("RTnet: no one cared for packet with layer 3 "
++			    "protocol type 0x%04x\n",
++			    ntohs(rtskb->protocol));
++
++	kfree_rtskb(rtskb);
++}
++
++#if IS_ENABLED(CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK)
++EXPORT_SYMBOL_GPL(rt_stack_deliver);
++#endif /* CONFIG_XENO_DRIVERS_NET_DRV_LOOPBACK */
++
++static void rt_stack_mgr_task(void *arg)
++{
++	rtdm_event_t *mgr_event = &((struct rtnet_mgr *)arg)->event;
++	struct rtskb *rtskb;
++
++	while (!rtdm_task_should_stop()) {
++		if (rtdm_event_wait(mgr_event) < 0)
++			break;
++
++		/* we are the only reader => no locking required */
++		while ((rtskb = __rtskb_fifo_remove(&rx.fifo)))
++			rt_stack_deliver(rtskb);
++	}
++}
++
++/***
++ *  rt_stack_connect
++ */
++void rt_stack_connect(struct rtnet_device *rtdev, struct rtnet_mgr *mgr)
++{
++	rtdev->stack_event = &mgr->event;
++}
++
++EXPORT_SYMBOL_GPL(rt_stack_connect);
++
++/***
++ *  rt_stack_disconnect
++ */
++void rt_stack_disconnect(struct rtnet_device *rtdev)
++{
++	rtdev->stack_event = NULL;
++}
++
++EXPORT_SYMBOL_GPL(rt_stack_disconnect);
++
++/***
++ *  rt_stack_mgr_init
++ */
++int rt_stack_mgr_init(struct rtnet_mgr *mgr)
++{
++	int i;
++
++	rtskb_fifo_init(&rx.fifo, CONFIG_XENO_DRIVERS_NET_RX_FIFO_SIZE);
++
++	for (i = 0; i < RTPACKET_HASH_TBL_SIZE; i++)
++		INIT_LIST_HEAD(&rt_packets[i]);
++#ifdef CONFIG_XENO_DRIVERS_NET_ETH_P_ALL
++	INIT_LIST_HEAD(&rt_packets_all);
++#endif /* CONFIG_XENO_DRIVERS_NET_ETH_P_ALL */
++
++	rtdm_event_init(&mgr->event, 0);
++
++	return rtdm_task_init(&mgr->task, "rtnet-stack", rt_stack_mgr_task, mgr,
++			      stack_mgr_prio, 0);
++}
++
++/***
++ *  rt_stack_mgr_delete
++ */
++void rt_stack_mgr_delete(struct rtnet_mgr *mgr)
++{
++	rtdm_event_destroy(&mgr->event);
++	rtdm_task_destroy(&mgr->task);
++}
+--- linux/drivers/xenomai/net/stack/socket.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/socket.c	2021-04-07 16:01:26.591635075 +0800
+@@ -0,0 +1,395 @@
++/***
++ *
++ *  stack/socket.c - sockets implementation for rtnet
++ *
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/moduleparam.h>
++#include <linux/spinlock.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/err.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <asm/bitops.h>
++
++#include <rtdm/net.h>
++#include <rtnet_internal.h>
++#include <rtnet_iovec.h>
++#include <rtnet_socket.h>
++#include <ipv4/protocol.h>
++
++#define SKB_POOL_CLOSED 0
++
++static unsigned int socket_rtskbs = DEFAULT_SOCKET_RTSKBS;
++module_param(socket_rtskbs, uint, 0444);
++MODULE_PARM_DESC(socket_rtskbs,
++		 "Default number of realtime socket buffers in socket pools");
++
++/************************************************************************
++ *  internal socket functions                                           *
++ ************************************************************************/
++
++int rt_bare_socket_init(struct rtdm_fd *fd, unsigned short protocol,
++			unsigned int priority, unsigned int pool_size)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	int err;
++
++	err = rtskb_pool_init(&sock->skb_pool, pool_size, NULL, fd);
++	if (err < 0)
++		return err;
++
++	sock->protocol = protocol;
++	sock->priority = priority;
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(rt_bare_socket_init);
++
++/***
++ *  rt_socket_init - initialises a new socket structure
++ */
++int rt_socket_init(struct rtdm_fd *fd, unsigned short protocol)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	unsigned int pool_size;
++
++	sock->flags = 0;
++	sock->callback_func = NULL;
++
++	rtskb_queue_init(&sock->incoming);
++
++	sock->timeout = 0;
++
++	rtdm_lock_init(&sock->param_lock);
++	rtdm_sem_init(&sock->pending_sem, 0);
++
++	pool_size = rt_bare_socket_init(fd, protocol,
++					RTSKB_PRIO_VALUE(SOCK_DEF_PRIO,
++							 RTSKB_DEF_RT_CHANNEL),
++					socket_rtskbs);
++	sock->pool_size = pool_size;
++	mutex_init(&sock->pool_nrt_lock);
++
++	if (pool_size < socket_rtskbs) {
++		/* fix statistics */
++		if (pool_size == 0)
++			rtskb_pools--;
++
++		rt_socket_cleanup(fd);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rt_socket_init);
++
++/***
++ *  rt_socket_cleanup - releases resources allocated for the socket
++ */
++void rt_socket_cleanup(struct rtdm_fd *fd)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++
++	rtdm_sem_destroy(&sock->pending_sem);
++
++	mutex_lock(&sock->pool_nrt_lock);
++
++	set_bit(SKB_POOL_CLOSED, &sock->flags);
++
++	if (sock->pool_size > 0)
++		rtskb_pool_release(&sock->skb_pool);
++
++	mutex_unlock(&sock->pool_nrt_lock);
++}
++EXPORT_SYMBOL_GPL(rt_socket_cleanup);
++
++/***
++ *  rt_socket_common_ioctl
++ */
++int rt_socket_common_ioctl(struct rtdm_fd *fd, int request, void __user *arg)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++	int ret = 0;
++	struct rtnet_callback *callback;
++	const unsigned int *val;
++	unsigned int _val;
++	const nanosecs_rel_t *timeout;
++	nanosecs_rel_t _timeout;
++	rtdm_lockctx_t context;
++
++	switch (request) {
++	case RTNET_RTIOC_XMITPARAMS:
++		val = rtnet_get_arg(fd, &_val, arg, sizeof(_val));
++		if (IS_ERR(val))
++			return PTR_ERR(val);
++		sock->priority = *val;
++		break;
++
++	case RTNET_RTIOC_TIMEOUT:
++		timeout = rtnet_get_arg(fd, &_timeout, arg, sizeof(_timeout));
++		if (IS_ERR(timeout))
++			return PTR_ERR(timeout);
++		sock->timeout = *timeout;
++		break;
++
++	case RTNET_RTIOC_CALLBACK:
++		if (rtdm_fd_is_user(fd))
++			return -EACCES;
++
++		rtdm_lock_get_irqsave(&sock->param_lock, context);
++
++		callback = arg;
++		sock->callback_func = callback->func;
++		sock->callback_arg = callback->arg;
++
++		rtdm_lock_put_irqrestore(&sock->param_lock, context);
++		break;
++
++	case RTNET_RTIOC_EXTPOOL:
++		val = rtnet_get_arg(fd, &_val, arg, sizeof(_val));
++		if (IS_ERR(val))
++			return PTR_ERR(val);
++
++		if (rtdm_in_rt_context())
++			return -ENOSYS;
++
++		mutex_lock(&sock->pool_nrt_lock);
++
++		if (test_bit(SKB_POOL_CLOSED, &sock->flags)) {
++			mutex_unlock(&sock->pool_nrt_lock);
++			return -EBADF;
++		}
++		ret = rtskb_pool_extend(&sock->skb_pool, *val);
++		sock->pool_size += ret;
++
++		mutex_unlock(&sock->pool_nrt_lock);
++
++		if (ret == 0 && *val > 0)
++			ret = -ENOMEM;
++
++		break;
++
++	case RTNET_RTIOC_SHRPOOL:
++		val = rtnet_get_arg(fd, &_val, arg, sizeof(_val));
++		if (IS_ERR(val))
++			return PTR_ERR(val);
++
++		if (rtdm_in_rt_context())
++			return -ENOSYS;
++
++		mutex_lock(&sock->pool_nrt_lock);
++
++		ret = rtskb_pool_shrink(&sock->skb_pool, *val);
++		sock->pool_size -= ret;
++
++		mutex_unlock(&sock->pool_nrt_lock);
++
++		if (ret == 0 && *val > 0)
++			ret = -EBUSY;
++
++		break;
++
++	default:
++		ret = -EOPNOTSUPP;
++		break;
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rt_socket_common_ioctl);
++
++/***
++ *  rt_socket_if_ioctl
++ */
++int rt_socket_if_ioctl(struct rtdm_fd *fd, int request, void __user *arg)
++{
++	struct rtnet_device *rtdev;
++	struct ifreq _ifr, *ifr, *u_ifr;
++	struct sockaddr_in _sin;
++	struct ifconf _ifc, *ifc, *u_ifc;
++	int ret = 0, size = 0, i;
++	short flags;
++
++	if (request == SIOCGIFCONF) {
++		u_ifc = arg;
++		ifc = rtnet_get_arg(fd, &_ifc, u_ifc, sizeof(_ifc));
++		if (IS_ERR(ifc))
++			return PTR_ERR(ifc);
++
++		for (u_ifr = ifc->ifc_req, i = 1; i <= MAX_RT_DEVICES;
++		     i++, u_ifr++) {
++			rtdev = rtdev_get_by_index(i);
++			if (rtdev == NULL)
++				continue;
++
++			if ((rtdev->flags & IFF_UP) == 0) {
++				rtdev_dereference(rtdev);
++				continue;
++			}
++
++			size += sizeof(struct ifreq);
++			if (size > ifc->ifc_len) {
++				rtdev_dereference(rtdev);
++				size = ifc->ifc_len;
++				break;
++			}
++
++			ret = rtnet_put_arg(fd, u_ifr->ifr_name, rtdev->name,
++					    IFNAMSIZ);
++			if (ret == 0) {
++				memset(&_sin, 0, sizeof(_sin));
++				_sin.sin_family = AF_INET;
++				_sin.sin_addr.s_addr = rtdev->local_ip;
++				ret = rtnet_put_arg(fd, &u_ifr->ifr_addr, &_sin,
++						    sizeof(_sin));
++			}
++
++			rtdev_dereference(rtdev);
++			if (ret)
++				return ret;
++		}
++
++		return rtnet_put_arg(fd, &u_ifc->ifc_len, &size, sizeof(size));
++	}
++
++	u_ifr = arg;
++	ifr = rtnet_get_arg(fd, &_ifr, u_ifr, sizeof(_ifr));
++	if (IS_ERR(ifr))
++		return PTR_ERR(ifr);
++
++	if (request == SIOCGIFNAME) {
++		rtdev = rtdev_get_by_index(ifr->ifr_ifindex);
++		if (rtdev == NULL)
++			return -ENODEV;
++		ret = rtnet_put_arg(fd, u_ifr->ifr_name, rtdev->name, IFNAMSIZ);
++		goto out;
++	}
++
++	rtdev = rtdev_get_by_name(ifr->ifr_name);
++	if (rtdev == NULL)
++		return -ENODEV;
++
++	switch (request) {
++	case SIOCGIFINDEX:
++		ret = rtnet_put_arg(fd, &u_ifr->ifr_ifindex, &rtdev->ifindex,
++				    sizeof(u_ifr->ifr_ifindex));
++		break;
++
++	case SIOCGIFFLAGS:
++		flags = rtdev->flags;
++		if ((ifr->ifr_flags & IFF_UP) &&
++		    (rtdev->link_state &
++		     (RTNET_LINK_STATE_PRESENT | RTNET_LINK_STATE_NOCARRIER)) ==
++			    RTNET_LINK_STATE_PRESENT)
++			flags |= IFF_RUNNING;
++		ret = rtnet_put_arg(fd, &u_ifr->ifr_flags, &flags,
++				    sizeof(u_ifr->ifr_flags));
++		break;
++
++	case SIOCGIFHWADDR:
++		ret = rtnet_put_arg(fd, &u_ifr->ifr_hwaddr.sa_data,
++				    rtdev->dev_addr, rtdev->addr_len);
++		if (!ret)
++			ret = rtnet_put_arg(
++				fd, &u_ifr->ifr_hwaddr.sa_family, &rtdev->type,
++				sizeof(u_ifr->ifr_hwaddr.sa_family));
++		break;
++
++	case SIOCGIFADDR:
++		memset(&_sin, 0, sizeof(_sin));
++		_sin.sin_family = AF_INET;
++		_sin.sin_addr.s_addr = rtdev->local_ip;
++		ret = rtnet_put_arg(fd, &u_ifr->ifr_addr, &_sin, sizeof(_sin));
++		break;
++
++	case SIOCETHTOOL:
++		if (rtdev->do_ioctl != NULL) {
++			if (rtdm_in_rt_context())
++				return -ENOSYS;
++			ret = rtdev->do_ioctl(rtdev, ifr, request);
++		} else
++			ret = -EOPNOTSUPP;
++		break;
++
++	case SIOCDEVPRIVATE ... SIOCDEVPRIVATE + 15:
++		if (rtdev->do_ioctl != NULL)
++			ret = rtdev->do_ioctl(rtdev, ifr, request);
++		else
++			ret = -EOPNOTSUPP;
++		break;
++
++	default:
++		ret = -EOPNOTSUPP;
++		break;
++	}
++
++out:
++	rtdev_dereference(rtdev);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rt_socket_if_ioctl);
++
++int rt_socket_select_bind(struct rtdm_fd *fd, rtdm_selector_t *selector,
++			  enum rtdm_selecttype type, unsigned fd_index)
++{
++	struct rtsocket *sock = rtdm_fd_to_private(fd);
++
++	switch (type) {
++	case XNSELECT_READ:
++		return rtdm_sem_select(&sock->pending_sem, selector,
++				       XNSELECT_READ, fd_index);
++	default:
++		return -EBADF;
++	}
++
++	return -EINVAL;
++}
++EXPORT_SYMBOL_GPL(rt_socket_select_bind);
++
++void *rtnet_get_arg(struct rtdm_fd *fd, void *tmp, const void *src, size_t len)
++{
++	int ret;
++
++	if (!rtdm_fd_is_user(fd))
++		return (void *)src;
++
++	ret = rtdm_copy_from_user(fd, tmp, src, len);
++	if (ret)
++		return ERR_PTR(ret);
++
++	return tmp;
++}
++EXPORT_SYMBOL_GPL(rtnet_get_arg);
++
++int rtnet_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		if (dst != src)
++			memcpy(dst, src, len);
++		return 0;
++	}
++
++	return rtdm_copy_to_user(fd, dst, src, len);
++}
++EXPORT_SYMBOL_GPL(rtnet_put_arg);
+--- linux/drivers/xenomai/net/stack/rtnet_chrdev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtnet_chrdev.c	2021-04-07 16:01:26.586635082 +0800
+@@ -0,0 +1,240 @@
++/***
++ *
++ *  stack/rtnet_chrdev.c - implements char device for management interface
++ *
++ *  Copyright (C) 1999       Lineo, Inc
++ *                1999, 2002 David A. Schleef <ds@schleef.org>
++ *                2002       Ulrich Marx <marx@fet.uni-hannover.de>
++ *                2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of version 2 of the GNU General Public License as
++ *  published by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/if_arp.h>
++#include <linux/kmod.h>
++#include <linux/miscdevice.h>
++#include <linux/netdevice.h>
++#include <linux/spinlock.h>
++
++#include <rtnet_chrdev.h>
++#include <rtnet_internal.h>
++#include <ipv4/route.h>
++
++static DEFINE_SPINLOCK(ioctl_handler_lock);
++static LIST_HEAD(ioctl_handlers);
++
++static long rtnet_ioctl(struct file *file, unsigned int request,
++			unsigned long arg)
++{
++	struct rtnet_ioctl_head head;
++	struct rtnet_device *rtdev = NULL;
++	struct rtnet_ioctls *ioctls;
++	struct list_head *entry;
++	int ret;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
++	ret = copy_from_user(&head, (void *)arg, sizeof(head));
++	if (ret != 0)
++		return -EFAULT;
++
++	spin_lock(&ioctl_handler_lock);
++
++	list_for_each (entry, &ioctl_handlers) {
++		ioctls = list_entry(entry, struct rtnet_ioctls, entry);
++
++		if (ioctls->ioctl_type == _IOC_TYPE(request)) {
++			atomic_inc(&ioctls->ref_count);
++
++			spin_unlock(&ioctl_handler_lock);
++
++			if ((_IOC_NR(request) & RTNET_IOC_NODEV_PARAM) == 0) {
++				rtdev = rtdev_get_by_name(head.if_name);
++				if (!rtdev) {
++					atomic_dec(&ioctls->ref_count);
++					return -ENODEV;
++				}
++			}
++
++			ret = ioctls->handler(rtdev, request, arg);
++
++			if (rtdev)
++				rtdev_dereference(rtdev);
++			atomic_dec(&ioctls->ref_count);
++
++			return ret;
++		}
++	}
++
++	spin_unlock(&ioctl_handler_lock);
++
++	return -ENOTTY;
++}
++
++static int rtnet_core_ioctl(struct rtnet_device *rtdev, unsigned int request,
++			    unsigned long arg)
++{
++	struct rtnet_core_cmd cmd;
++	int ret;
++
++	ret = copy_from_user(&cmd, (void *)arg, sizeof(cmd));
++	if (ret != 0)
++		return -EFAULT;
++
++	switch (request) {
++	case IOC_RT_IFUP:
++		ret = rtdev_up(rtdev, &cmd);
++		break;
++
++	case IOC_RT_IFDOWN:
++		ret = rtdev_down(rtdev);
++		break;
++
++	case IOC_RT_IFINFO:
++		if (cmd.args.info.ifindex > 0)
++			rtdev = rtdev_get_by_index(cmd.args.info.ifindex);
++		else
++			rtdev = rtdev_get_by_name(cmd.head.if_name);
++		if (rtdev == NULL)
++			return -ENODEV;
++
++		if (mutex_lock_interruptible(&rtdev->nrt_lock)) {
++			rtdev_dereference(rtdev);
++			return -ERESTARTSYS;
++		}
++
++		memcpy(cmd.head.if_name, rtdev->name, IFNAMSIZ);
++		cmd.args.info.ifindex = rtdev->ifindex;
++		cmd.args.info.type = rtdev->type;
++		cmd.args.info.ip_addr = rtdev->local_ip;
++		cmd.args.info.broadcast_ip = rtdev->broadcast_ip;
++		cmd.args.info.mtu = rtdev->mtu;
++		cmd.args.info.flags = rtdev->flags;
++		if ((cmd.args.info.flags & IFF_UP) &&
++		    (rtdev->link_state &
++		     (RTNET_LINK_STATE_PRESENT | RTNET_LINK_STATE_NOCARRIER)) ==
++			    RTNET_LINK_STATE_PRESENT)
++			cmd.args.info.flags |= IFF_RUNNING;
++
++		memcpy(cmd.args.info.dev_addr, rtdev->dev_addr, MAX_ADDR_LEN);
++
++		mutex_unlock(&rtdev->nrt_lock);
++
++		rtdev_dereference(rtdev);
++
++		if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0)
++			return -EFAULT;
++		break;
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	return ret;
++}
++
++int rtnet_register_ioctls(struct rtnet_ioctls *ioctls)
++{
++	struct list_head *entry;
++	struct rtnet_ioctls *registered_ioctls;
++
++	RTNET_ASSERT(ioctls->handler != NULL, return -EINVAL;);
++
++	spin_lock(&ioctl_handler_lock);
++
++	list_for_each (entry, &ioctl_handlers) {
++		registered_ioctls =
++			list_entry(entry, struct rtnet_ioctls, entry);
++		if (registered_ioctls->ioctl_type == ioctls->ioctl_type) {
++			spin_unlock(&ioctl_handler_lock);
++			return -EEXIST;
++		}
++	}
++
++	list_add_tail(&ioctls->entry, &ioctl_handlers);
++	atomic_set(&ioctls->ref_count, 0);
++
++	spin_unlock(&ioctl_handler_lock);
++
++	return 0;
++}
++
++void rtnet_unregister_ioctls(struct rtnet_ioctls *ioctls)
++{
++	spin_lock(&ioctl_handler_lock);
++
++	while (atomic_read(&ioctls->ref_count) != 0) {
++		spin_unlock(&ioctl_handler_lock);
++
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule_timeout(1 * HZ); /* wait a second */
++
++		spin_lock(&ioctl_handler_lock);
++	}
++
++	list_del(&ioctls->entry);
++
++	spin_unlock(&ioctl_handler_lock);
++}
++
++static struct file_operations rtnet_fops = {
++	.owner = THIS_MODULE,
++	.unlocked_ioctl = rtnet_ioctl,
++};
++
++static struct miscdevice rtnet_chr_misc_dev = {
++	.minor = RTNET_MINOR,
++	.name = "rtnet",
++	.fops = &rtnet_fops,
++};
++
++static struct rtnet_ioctls core_ioctls = { .service_name = "RTnet Core",
++					   .ioctl_type = RTNET_IOC_TYPE_CORE,
++					   .handler = rtnet_core_ioctl };
++
++/**
++ * rtnet_chrdev_init -
++ *
++ */
++int __init rtnet_chrdev_init(void)
++{
++	int err;
++
++	err = misc_register(&rtnet_chr_misc_dev);
++	if (err) {
++		printk("RTnet: unable to register rtnet management device/class "
++		       "(error %d)\n",
++		       err);
++		return err;
++	}
++
++	rtnet_register_ioctls(&core_ioctls);
++	return 0;
++}
++
++/**
++ * rtnet_chrdev_release -
++ *
++ */
++void rtnet_chrdev_release(void)
++{
++	misc_deregister(&rtnet_chr_misc_dev);
++}
++
++EXPORT_SYMBOL_GPL(rtnet_register_ioctls);
++EXPORT_SYMBOL_GPL(rtnet_unregister_ioctls);
+--- linux/drivers/xenomai/net/stack/rtwlan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/rtwlan.c	2021-04-07 16:01:26.581635089 +0800
+@@ -0,0 +1,219 @@
++/* rtwlan.c
++ *
++ * rtwlan protocol stack
++ * Copyright (c) 2006, Daniel Gregorek <dxg@gmx.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++
++#include <rtnet_port.h>
++
++#include <rtwlan.h>
++
++int rtwlan_rx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev)
++{
++	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rtskb->data;
++	u16 fc = le16_to_cpu(hdr->frame_ctl);
++
++	/* strip rtwlan header */
++	rtskb_pull(rtskb, ieee80211_get_hdrlen(fc));
++	rtskb->protocol = rt_eth_type_trans(rtskb, rtnet_dev);
++
++	/* forward rtskb to rtnet */
++	rtnetif_rx(rtskb);
++
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(rtwlan_rx);
++
++int rtwlan_tx(struct rtskb *rtskb, struct rtnet_device *rtnet_dev)
++{
++	struct rtwlan_device *rtwlan_dev = rtnetdev_priv(rtnet_dev);
++	struct ieee80211_hdr_3addr header = { /* Ensure zero initialized */
++					      .duration_id = 0,
++					      .seq_ctl = 0
++	};
++	int ret;
++	u8 dest[ETH_ALEN], src[ETH_ALEN];
++
++	/* Get source and destination addresses */
++
++	memcpy(src, rtskb->data + ETH_ALEN, ETH_ALEN);
++
++	if (rtwlan_dev->mode == RTWLAN_TXMODE_MCAST) {
++		memcpy(dest, rtnet_dev->dev_addr, ETH_ALEN);
++		dest[0] |= 0x01;
++	} else {
++		memcpy(dest, rtskb->data, ETH_ALEN);
++	}
++
++	/*
++     * Generate ieee80211 compatible header
++     */
++	memcpy(header.addr3, src, ETH_ALEN); /* BSSID */
++	memcpy(header.addr2, src, ETH_ALEN); /* SA */
++	memcpy(header.addr1, dest, ETH_ALEN); /* DA */
++
++	/* Write frame control field */
++	header.frame_ctl =
++		cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
++
++	memcpy(rtskb_push(rtskb, IEEE80211_3ADDR_LEN), &header,
++	       IEEE80211_3ADDR_LEN);
++
++	ret = (*rtwlan_dev->hard_start_xmit)(rtskb, rtnet_dev);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(rtwlan_tx);
++
++/**
++ * rtalloc_wlandev - Allocates and sets up a wlan device
++ * @sizeof_priv: size of additional driver-private structure to
++ *               be allocated for this wlan device
++ *
++ * Fill in the fields of the device structure with wlan-generic
++ * values. Basically does everything except registering the device.
++ *
++ * A 32-byte alignment is enforced for the private data area.
++ */
++
++struct rtnet_device *rtwlan_alloc_dev(unsigned sizeof_priv,
++				      unsigned dev_pool_size)
++{
++	struct rtnet_device *rtnet_dev;
++
++	RTWLAN_DEBUG("Start.\n");
++
++	rtnet_dev = rt_alloc_etherdev(
++		sizeof(struct rtwlan_device) + sizeof_priv, dev_pool_size);
++	if (!rtnet_dev)
++		return NULL;
++
++	rtnet_dev->hard_start_xmit = rtwlan_tx;
++
++	rtdev_alloc_name(rtnet_dev, "rtwlan%d");
++
++	return rtnet_dev;
++}
++
++EXPORT_SYMBOL_GPL(rtwlan_alloc_dev);
++
++int rtwlan_ioctl(struct rtnet_device *rtdev, unsigned int request,
++		 unsigned long arg)
++{
++	struct rtwlan_cmd cmd;
++	struct ifreq ifr;
++	int ret = 0;
++
++	if (copy_from_user(&cmd, (void *)arg, sizeof(cmd)) != 0)
++		return -EFAULT;
++
++	/*
++     * FIXME: proper do_ioctl() should expect a __user pointer
++     * arg. This only works with the existing WLAN support because the
++     * only driver currently providing this feature is broken, not
++     * doing the copy_to/from_user dance.
++     */
++	memset(&ifr, 0, sizeof(ifr));
++	ifr.ifr_data = &cmd;
++
++	switch (request) {
++	case IOC_RTWLAN_IFINFO:
++		if (cmd.args.info.ifindex > 0)
++			rtdev = rtdev_get_by_index(cmd.args.info.ifindex);
++		else
++			rtdev = rtdev_get_by_name(cmd.head.if_name);
++		if (rtdev == NULL)
++			return -ENODEV;
++
++		if (mutex_lock_interruptible(&rtdev->nrt_lock)) {
++			rtdev_dereference(rtdev);
++			return -ERESTARTSYS;
++		}
++
++		if (rtdev->do_ioctl)
++			ret = rtdev->do_ioctl(rtdev, &ifr, request);
++		else
++			ret = -ENORTWLANDEV;
++
++		memcpy(cmd.head.if_name, rtdev->name, IFNAMSIZ);
++		cmd.args.info.ifindex = rtdev->ifindex;
++		cmd.args.info.flags = rtdev->flags;
++
++		mutex_unlock(&rtdev->nrt_lock);
++
++		rtdev_dereference(rtdev);
++
++		break;
++
++	case IOC_RTWLAN_TXMODE:
++	case IOC_RTWLAN_BITRATE:
++	case IOC_RTWLAN_CHANNEL:
++	case IOC_RTWLAN_RETRY:
++	case IOC_RTWLAN_TXPOWER:
++	case IOC_RTWLAN_AUTORESP:
++	case IOC_RTWLAN_DROPBCAST:
++	case IOC_RTWLAN_DROPMCAST:
++	case IOC_RTWLAN_REGREAD:
++	case IOC_RTWLAN_REGWRITE:
++	case IOC_RTWLAN_BBPWRITE:
++	case IOC_RTWLAN_BBPREAD:
++	case IOC_RTWLAN_BBPSENS:
++		if (mutex_lock_interruptible(&rtdev->nrt_lock))
++			return -ERESTARTSYS;
++
++		if (rtdev->do_ioctl)
++			ret = rtdev->do_ioctl(rtdev, &ifr, request);
++		else
++			ret = -ENORTWLANDEV;
++
++		mutex_unlock(&rtdev->nrt_lock);
++
++		break;
++
++	default:
++		ret = -ENOTTY;
++	}
++
++	if (copy_to_user((void *)arg, &cmd, sizeof(cmd)) != 0)
++		return -EFAULT;
++
++	return ret;
++}
++
++struct rtnet_ioctls rtnet_wlan_ioctls = {
++	service_name: "rtwlan ioctl",
++	ioctl_type: RTNET_IOC_TYPE_RTWLAN,
++	handler: rtwlan_ioctl
++};
++
++int __init rtwlan_init(void)
++{
++	if (rtnet_register_ioctls(&rtnet_wlan_ioctls))
++		rtdm_printk(KERN_ERR "Failed to register rtnet_wlan_ioctl!\n");
++
++	return 0;
++}
++
++void rtwlan_exit(void)
++{
++	rtnet_unregister_ioctls(&rtnet_wlan_ioctls);
++}
+--- linux/drivers/xenomai/net/stack/iovec.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/iovec.c	2021-04-07 16:01:26.576635096 +0800
+@@ -0,0 +1,103 @@
++/***
++ *
++ *  stack/iovec.c
++ *
++ *  RTnet - real-time networking subsystem
++ *  Copyright (C) 1999,2000 Zentropic Computing, LLC
++ *                2002 Ulrich Marx <marx@kammer.uni-hannover.de>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <rtdm/driver.h>
++#include <rtnet_iovec.h>
++#include <rtnet_socket.h>
++
++ssize_t rtnet_write_to_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen,
++			   const void *data, size_t len)
++{
++	ssize_t ret = 0;
++	size_t nbytes;
++	int n;
++
++	for (n = 0; len > 0 && n < iovlen; n++, iov++) {
++		if (iov->iov_len == 0)
++			continue;
++
++		nbytes = iov->iov_len;
++		if (nbytes > len)
++			nbytes = len;
++
++		ret = rtnet_put_arg(fd, iov->iov_base, data, nbytes);
++		if (ret)
++			break;
++
++		len -= nbytes;
++		data += nbytes;
++		iov->iov_len -= nbytes;
++		iov->iov_base += nbytes;
++		ret += nbytes;
++		if (ret < 0) {
++			ret = -EINVAL;
++			break;
++		}
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtnet_write_to_iov);
++
++ssize_t rtnet_read_from_iov(struct rtdm_fd *fd, struct iovec *iov, int iovlen,
++			    void *data, size_t len)
++{
++	ssize_t ret = 0;
++	size_t nbytes;
++	int n;
++
++	for (n = 0; len > 0 && n < iovlen; n++, iov++) {
++		if (iov->iov_len == 0)
++			continue;
++
++		nbytes = iov->iov_len;
++		if (nbytes > len)
++			nbytes = len;
++
++		if (!rtdm_fd_is_user(fd))
++			memcpy(data, iov->iov_base, nbytes);
++		else {
++			ret = rtdm_copy_from_user(fd, data, iov->iov_base,
++						  nbytes);
++			if (ret)
++				break;
++		}
++
++		len -= nbytes;
++		data += nbytes;
++		iov->iov_len -= nbytes;
++		iov->iov_base += nbytes;
++		ret += nbytes;
++		if (ret < 0) {
++			ret = -EINVAL;
++			break;
++		}
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtnet_read_from_iov);
+--- linux/drivers/xenomai/net/stack/corectl.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/stack/corectl.c	2021-04-07 16:01:26.572635102 +0800
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (C) 2016 Gilles Chanteperdrix <gch@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <asm/xenomai/syscall.h>
++#include <xenomai/posix/corectl.h>
++
++static int rtnet_corectl_call(struct notifier_block *self, unsigned long arg,
++			      void *cookie)
++{
++	struct cobalt_config_vector *vec = cookie;
++	int ret = 0;
++
++	if (arg != _CC_COBALT_GET_NET_CONFIG)
++		return NOTIFY_DONE;
++
++	if (vec->u_bufsz < sizeof(ret))
++		return notifier_from_errno(-EINVAL);
++
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET))
++		ret |= _CC_COBALT_NET;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ETH_P_ALL))
++		ret |= _CC_COBALT_NET_ETH_P_ALL;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4))
++		ret |= _CC_COBALT_NET_IPV4;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_ICMP))
++		ret |= _CC_COBALT_NET_ICMP;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING))
++		ret |= _CC_COBALT_NET_NETROUTING;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTE))
++		ret |= _CC_COBALT_NET_ROUTER;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTIPV4_UDP))
++		ret |= _CC_COBALT_NET_UDP;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTPACKET))
++		ret |= _CC_COBALT_NET_AF_PACKET;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_TDMA))
++		ret |= _CC_COBALT_NET_TDMA;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_NOMAC))
++		ret |= _CC_COBALT_NET_NOMAC;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_RTCFG))
++		ret |= _CC_COBALT_NET_CFG;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_RTCAP))
++		ret |= _CC_COBALT_NET_CAP;
++	if (IS_ENABLED(CONFIG_XENO_DRIVERS_NET_ADDON_PROXY))
++		ret |= _CC_COBALT_NET_PROXY;
++
++	ret = cobalt_copy_to_user(vec->u_buf, &ret, sizeof(ret));
++
++	return ret ? notifier_from_errno(-EFAULT) : NOTIFY_STOP;
++}
++
++static struct notifier_block rtnet_corectl_notifier = {
++	.notifier_call = rtnet_corectl_call,
++};
++
++void rtnet_corectl_register(void)
++{
++	cobalt_add_config_chain(&rtnet_corectl_notifier);
++}
++
++void rtnet_corectl_unregister(void)
++{
++	cobalt_remove_config_chain(&rtnet_corectl_notifier);
++}
+--- linux/drivers/xenomai/net/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/Makefile	2021-04-07 16:01:26.567635109 +0800
+@@ -0,0 +1 @@
++obj-$(CONFIG_XENO_DRIVERS_NET) += stack/ drivers/ addons/
+--- linux/drivers/xenomai/net/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/net/Kconfig	2021-04-07 16:01:26.562635116 +0800
+@@ -0,0 +1,25 @@
++menu "RTnet"
++
++config XENO_DRIVERS_NET
++    depends on m
++    select NET
++    tristate "RTnet, TCP/IP socket interface"
++
++if XENO_DRIVERS_NET
++
++config XENO_DRIVERS_RTNET_CHECKED
++    bool "Internal Bug Checks"
++    default n
++    ---help---
++    Switch on if you face crashes when RTnet is running or if you suspect
++    any other RTnet-related issues. This feature will add a few sanity
++    checks at critical points that will produce warnings on the kernel
++    console in case certain internal bugs are detected.
++
++source "drivers/xenomai/net/stack/Kconfig"
++source "drivers/xenomai/net/drivers/Kconfig"
++source "drivers/xenomai/net/addons/Kconfig"
++
++endif
++
++endmenu
+--- linux/drivers/xenomai/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/Kconfig	2021-04-07 16:01:26.552635131 +0800
+@@ -0,0 +1,35 @@
++menu "Drivers"
++
++config XENO_OPT_RTDM_COMPAT_DEVNODE
++	bool "Enable legacy pathnames for named RTDM devices"
++	default y
++	help
++	This compatibility option allows applications to open named
++	RTDM devices using the legacy naming scheme, i.e.
++
++	fd = open("devname", ...);
++	   or
++	fd = open("/dev/devname", ...);
++
++	When such a request is received by RTDM, a warning message is
++	issued to the kernel log whenever XENO_OPT_DEBUG_LEGACY is
++	also enabled in the kernel configuration.
++
++	Applications should open named devices via their actual device
++	nodes instead, i.e.
++
++	fd = open("/dev/rtdm/devname", ...);
++
++source "drivers/xenomai/autotune/Kconfig"
++source "drivers/xenomai/serial/Kconfig"
++source "drivers/xenomai/testing/Kconfig"
++source "drivers/xenomai/can/Kconfig"
++source "drivers/xenomai/net/Kconfig"
++source "drivers/xenomai/analogy/Kconfig"
++source "drivers/xenomai/ipc/Kconfig"
++source "drivers/xenomai/udd/Kconfig"
++source "drivers/xenomai/gpio/Kconfig"
++source "drivers/xenomai/gpiopwm/Kconfig"
++source "drivers/xenomai/spi/Kconfig"
++
++endmenu
+--- linux/drivers/xenomai/udd/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/udd/Makefile	2021-04-07 16:01:26.547635138 +0800
+@@ -0,0 +1,5 @@
++ccflags-y += -Ikernel
++
++obj-$(CONFIG_XENO_DRIVERS_UDD) += xeno_udd.o
++
++xeno_udd-y := udd.o
+--- linux/drivers/xenomai/udd/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/udd/Kconfig	2021-04-07 16:01:26.543635144 +0800
+@@ -0,0 +1,10 @@
++menu "UDD support"
++
++config XENO_DRIVERS_UDD
++	tristate "User-space device driver framework"
++	help
++
++	A RTDM-based driver for enabling interrupt control and I/O
++	memory access interfaces to user-space device drivers.
++
++endmenu
+--- linux/drivers/xenomai/udd/udd.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/udd/udd.c	2021-04-07 16:01:26.538635151 +0800
+@@ -0,0 +1,658 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/mm.h>
++#include <rtdm/cobalt.h>
++#include <rtdm/driver.h>
++#include <rtdm/udd.h>
++
++struct udd_context {
++	u32 event_count;
++};
++
++static int udd_open(struct rtdm_fd *fd, int oflags)
++{
++	struct udd_context *context;
++	struct udd_device *udd;
++	int ret;
++
++	udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device);
++	if (udd->ops.open) {
++		ret = udd->ops.open(fd, oflags);
++		if (ret)
++			return ret;
++	}
++
++	context = rtdm_fd_to_private(fd);
++	context->event_count = 0;
++
++	return 0;
++}
++
++static void udd_close(struct rtdm_fd *fd)
++{
++	struct udd_device *udd;
++
++	udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device);
++	if (udd->ops.close)
++		udd->ops.close(fd);
++}
++
++static int udd_ioctl_rt(struct rtdm_fd *fd,
++			unsigned int request, void __user *arg)
++{
++	struct udd_signotify signfy;
++	struct udd_reserved *ur;
++	struct udd_device *udd;
++	rtdm_event_t done;
++	int ret;
++
++	udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device);
++	if (udd->ops.ioctl) {
++		ret = udd->ops.ioctl(fd, request, arg);
++		if (ret != -ENOSYS)
++			return ret;
++	}
++
++	ur = &udd->__reserved;
++
++	switch (request) {
++	case UDD_RTIOC_IRQSIG:
++		ret = rtdm_safe_copy_from_user(fd, &signfy, arg, sizeof(signfy));
++		if (ret)
++			return ret;
++		/* Early check, we'll redo at each signal issue. */
++		if (signfy.pid <= 0)
++			ur->signfy.pid = -1;
++		else {
++			if (signfy.sig < SIGRTMIN || signfy.sig > SIGRTMAX)
++				return -EINVAL;
++			if (cobalt_thread_find_local(signfy.pid) == NULL)
++				return -EINVAL;
++			ur->signfy = signfy;
++		}
++		break;
++	case UDD_RTIOC_IRQEN:
++	case UDD_RTIOC_IRQDIS:
++		if (udd->irq == UDD_IRQ_NONE || udd->irq == UDD_IRQ_CUSTOM)
++			return -EIO;
++		rtdm_event_init(&done, 0);
++		if (request == UDD_RTIOC_IRQEN)
++			udd_enable_irq(udd, &done);
++		else
++			udd_disable_irq(udd, &done);
++		ret = rtdm_event_wait(&done);
++		if (ret != -EIDRM)
++			rtdm_event_destroy(&done);
++		break;
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static ssize_t udd_read_rt(struct rtdm_fd *fd,
++			   void __user *buf, size_t len)
++{
++	struct udd_context *context;
++	struct udd_reserved *ur;
++	struct udd_device *udd;
++	rtdm_lockctx_t ctx;
++	ssize_t ret = 0;
++	u32 count;
++
++	if (len != sizeof(count))
++		return -EINVAL;
++
++	udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device);
++	if (udd->irq == UDD_IRQ_NONE)
++		return -EIO;
++
++	ur = &udd->__reserved;
++	context = rtdm_fd_to_private(fd);
++
++	cobalt_atomic_enter(ctx);
++
++	if (ur->event_count != context->event_count)
++		rtdm_event_clear(&ur->pulse);
++	else
++		ret = rtdm_event_wait(&ur->pulse);
++
++	count = ur->event_count;
++
++	cobalt_atomic_leave(ctx);
++
++	if (ret)
++		return ret;
++
++	context->event_count = count;
++	ret = rtdm_copy_to_user(fd, buf, &count, sizeof(count));
++
++	return ret ?: sizeof(count);
++}
++
++static ssize_t udd_write_rt(struct rtdm_fd *fd,
++			    const void __user *buf, size_t len)
++{
++	int ret;
++	u32 val;
++
++	if (len != sizeof(val))
++		return -EINVAL;
++
++	ret = rtdm_safe_copy_from_user(fd, &val, buf, sizeof(val));
++	if (ret)
++		return ret;
++
++	ret = udd_ioctl_rt(fd, val ? UDD_RTIOC_IRQEN : UDD_RTIOC_IRQDIS, NULL);
++
++	return ret ?: len;
++}
++
++static int udd_select(struct rtdm_fd *fd, struct xnselector *selector,
++		      unsigned int type, unsigned int index)
++{
++	struct udd_device *udd;
++
++	udd = container_of(rtdm_fd_device(fd), struct udd_device, __reserved.device);
++	if (udd->irq == UDD_IRQ_NONE)
++		return -EIO;
++
++	return rtdm_event_select(&udd->__reserved.pulse,
++				 selector, type, index);
++}
++
++static int udd_irq_handler(rtdm_irq_t *irqh)
++{
++	struct udd_device *udd;
++	int ret;
++
++	udd = rtdm_irq_get_arg(irqh, struct udd_device);
++	ret = udd->ops.interrupt(udd);
++	if (ret == RTDM_IRQ_HANDLED)
++		udd_notify_event(udd);
++
++	return ret;
++}
++
++static int mapper_open(struct rtdm_fd *fd, int oflags)
++{
++	int minor = rtdm_fd_minor(fd);
++	struct udd_device *udd;
++
++	/*
++	 * Check that we are opening a mapper instance pointing at a
++	 * valid memory region. e.g. UDD creates the companion device
++	 * "foo,mapper" on the fly when registering the main device
++	 * "foo". Userland may then open("/dev/foo,mapper0", ...)
++	 * followed by a call to mmap() for mapping the memory region
++	 * #0 as declared in the mem_regions[] array of the main
++	 * device.
++	 *
++	 * We support sparse region arrays, so the device minor shall
++	 * match the mem_regions[] index exactly.
++	 */
++	if (minor < 0 || minor >= UDD_NR_MAPS)
++		return -EIO;
++
++	udd = udd_get_device(fd);
++	if (udd->mem_regions[minor].type == UDD_MEM_NONE)
++		return -EIO;
++
++	return 0;
++}
++
++static void mapper_close(struct rtdm_fd *fd)
++{
++	/* nop */
++}
++
++static int mapper_mmap(struct rtdm_fd *fd, struct vm_area_struct *vma)
++{
++	struct udd_memregion *rn;
++	struct udd_device *udd;
++	size_t len;
++	int ret;
++
++	udd = udd_get_device(fd);
++	if (udd->ops.mmap)
++		/* Offload to client driver if handler is present. */
++		return udd->ops.mmap(fd, vma);
++
++	/* Otherwise DIY using the RTDM helpers. */
++
++	len = vma->vm_end - vma->vm_start;
++	rn = udd->mem_regions + rtdm_fd_minor(fd);
++	if (rn->len < len)
++		/* Can't map that much, bail out. */
++		return -EINVAL;
++
++	switch (rn->type) {
++	case UDD_MEM_PHYS:
++		ret = rtdm_mmap_iomem(vma, rn->addr);
++		break;
++	case UDD_MEM_LOGICAL:
++		ret = rtdm_mmap_kmem(vma, (void *)rn->addr);
++		break;
++	case UDD_MEM_VIRTUAL:
++		ret = rtdm_mmap_vmem(vma, (void *)rn->addr);
++		break;
++	default:
++		ret = -EINVAL;	/* Paranoid, can't happen. */
++	}
++
++	return ret;
++}
++
++static inline int check_memregion(struct udd_device *udd,
++				  struct udd_memregion *rn)
++{
++	if (rn->name == NULL)
++		return -EINVAL;
++
++	if (rn->addr == 0)
++		return -EINVAL;
++
++	if (rn->len == 0)
++		return -EINVAL;
++
++	return 0;
++}
++
++static inline int register_mapper(struct udd_device *udd)
++{
++	struct udd_reserved *ur = &udd->__reserved;
++	struct rtdm_driver *drv = &ur->mapper_driver;
++	struct udd_mapper *mapper;
++	struct udd_memregion *rn;
++	int n, ret;
++
++	ur->mapper_name = kasformat("%s,mapper%%d", udd->device_name);
++	if (ur->mapper_name == NULL)
++		return -ENOMEM;
++
++	drv->profile_info = (struct rtdm_profile_info)
++		RTDM_PROFILE_INFO(mapper, RTDM_CLASS_MEMORY,
++				  RTDM_SUBCLASS_GENERIC, 0);
++	drv->device_flags = RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR;
++	drv->device_count = UDD_NR_MAPS;
++	drv->base_minor = 0;
++	drv->ops = (struct rtdm_fd_ops){
++		.open		=	mapper_open,
++		.close		=	mapper_close,
++		.mmap		=	mapper_mmap,
++	};
++
++	for (n = 0, mapper = ur->mapdev; n < UDD_NR_MAPS; n++, mapper++) {
++		rn = udd->mem_regions + n;
++		if (rn->type == UDD_MEM_NONE)
++			continue;
++		mapper->dev.driver = drv;
++		mapper->dev.label = ur->mapper_name;
++		mapper->dev.minor = n;
++		mapper->udd = udd;
++		ret = rtdm_dev_register(&mapper->dev);
++		if (ret)
++			goto undo;
++	}
++
++	return 0;
++undo:
++	while (--n >= 0)
++		rtdm_dev_unregister(&ur->mapdev[n].dev);
++
++	return ret;
++}
++
++/**
++ * @brief Register a UDD device
++ *
++ * This routine registers a mini-driver at the UDD core.
++ *
++ * @param udd @ref udd_device "UDD device descriptor" which should
++ * describe the new device properties.
++ *
++ * @return Zero is returned upon success, otherwise a negative error
++ * code is received, from the set of error codes defined by
++ * rtdm_dev_register(). In addition, the following error codes can be
++ * returned:
++ *
++ * - -EINVAL, some of the memory regions declared in the
++ *   udd_device.mem_regions[] array have invalid properties, i.e. bad
++ *   type, NULL name, zero length or address. Any undeclared region
++ *   entry from the array must bear the UDD_MEM_NONE type.
++ *
++ * - -EINVAL, if udd_device.irq is different from UDD_IRQ_CUSTOM and
++ * UDD_IRQ_NONE but invalid, causing rtdm_irq_request() to fail.
++ *
++ * - -EINVAL, if udd_device.device_flags contains invalid flags.
++ *
++ * - -ENOSYS, if this service is called while the real-time core is disabled.
++ *
++ * @coretags{secondary-only}
++ */
++int udd_register_device(struct udd_device *udd)
++{
++	struct rtdm_device *dev = &udd->__reserved.device;
++	struct udd_reserved *ur = &udd->__reserved;
++	struct rtdm_driver *drv = &ur->driver;
++	struct udd_memregion *rn;
++	int ret, n;
++
++	if (udd->device_flags & RTDM_PROTOCOL_DEVICE)
++		return -EINVAL;
++
++	if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM &&
++	    udd->ops.interrupt == NULL)
++		return -EINVAL;
++
++	for (n = 0, ur->nr_maps = 0; n < UDD_NR_MAPS; n++) {
++		/* We allow sparse region arrays. */
++		rn = udd->mem_regions + n;
++		if (rn->type == UDD_MEM_NONE)
++			continue;
++		ret = check_memregion(udd, rn);
++		if (ret)
++			return ret;
++		udd->__reserved.nr_maps++;
++	}
++
++	drv->profile_info = (struct rtdm_profile_info)
++		RTDM_PROFILE_INFO(udd->device_name, RTDM_CLASS_UDD,
++				  udd->device_subclass, 0);
++	drv->device_flags = RTDM_NAMED_DEVICE|udd->device_flags;
++	drv->device_count = 1;
++	drv->context_size = sizeof(struct udd_context);
++	drv->ops = (struct rtdm_fd_ops){
++		.open = udd_open,
++		.ioctl_rt = udd_ioctl_rt,
++		.read_rt = udd_read_rt,
++		.write_rt = udd_write_rt,
++		.close = udd_close,
++		.select = udd_select,
++	};
++
++	dev->driver = drv;
++	dev->label = udd->device_name;
++
++	ret = rtdm_dev_register(dev);
++	if (ret)
++		return ret;
++
++	if (ur->nr_maps > 0) {
++		ret = register_mapper(udd);
++		if (ret)
++			goto fail_mapper;
++	} else
++		ur->mapper_name = NULL;
++
++	ur->event_count = 0;
++	rtdm_event_init(&ur->pulse, 0);
++	ur->signfy.pid = -1;
++
++	if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM) {
++		ret = rtdm_irq_request(&ur->irqh, udd->irq,
++				       udd_irq_handler, 0,
++				       dev->name, udd);
++		if (ret)
++			goto fail_irq_request;
++	}
++
++	return 0;
++
++fail_irq_request:
++	for (n = 0; n < UDD_NR_MAPS; n++) {
++		rn = udd->mem_regions + n;
++		if (rn->type != UDD_MEM_NONE)
++			rtdm_dev_unregister(&ur->mapdev[n].dev);
++	}
++fail_mapper:
++	rtdm_dev_unregister(dev);
++	if (ur->mapper_name)
++		kfree(ur->mapper_name);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(udd_register_device);
++
++/**
++ * @brief Unregister a UDD device
++ *
++ * This routine unregisters a mini-driver from the UDD core. This
++ * routine waits until all connections to @a udd have been closed
++ * prior to unregistering.
++ *
++ * @param udd UDD device descriptor
++ *
++ * @return Zero is returned upon success, otherwise -ENXIO is received
++ * if this service is called while the Cobalt kernel is disabled.
++ *
++ * @coretags{secondary-only}
++ */
++int udd_unregister_device(struct udd_device *udd)
++{
++	struct udd_reserved *ur = &udd->__reserved;
++	struct udd_memregion *rn;
++	int n;
++
++	rtdm_event_destroy(&ur->pulse);
++
++	if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM)
++		rtdm_irq_free(&ur->irqh);
++
++	for (n = 0; n < UDD_NR_MAPS; n++) {
++		rn = udd->mem_regions + n;
++		if (rn->type != UDD_MEM_NONE)
++			rtdm_dev_unregister(&ur->mapdev[n].dev);
++	}
++
++	if (ur->mapper_name)
++		kfree(ur->mapper_name);
++
++	rtdm_dev_unregister(&ur->device);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(udd_unregister_device);
++
++/**
++ * @brief Notify an IRQ event for an unmanaged interrupt
++ *
++ * When the UDD core shall hand over the interrupt management for a
++ * device to the mini-driver (see UDD_IRQ_CUSTOM), the latter should
++ * notify the UDD core when IRQ events are received by calling this
++ * service.
++ *
++ * As a result, the UDD core wakes up any Cobalt thread waiting for
++ * interrupts on the device via a read(2) or select(2) call.
++ *
++ * @param udd UDD device descriptor receiving the IRQ.
++ *
++ * @coretags{coreirq-only}
++ *
++ * @note In case the @ref udd_irq_handler "IRQ handler" from the
++ * mini-driver requested the UDD core not to re-enable the interrupt
++ * line, the application may later request the unmasking by issuing
++ * the UDD_RTIOC_IRQEN ioctl(2) command. Writing a non-zero integer to
++ * the device via the write(2) system call has the same effect.
++ */
++void udd_notify_event(struct udd_device *udd)
++{
++	struct udd_reserved *ur = &udd->__reserved;
++	union sigval sival;
++	rtdm_lockctx_t ctx;
++
++	cobalt_atomic_enter(ctx);
++	ur->event_count++;
++	rtdm_event_signal(&ur->pulse);
++	cobalt_atomic_leave(ctx);
++
++	if (ur->signfy.pid > 0) {
++		sival.sival_int = (int)ur->event_count;
++		__cobalt_sigqueue(ur->signfy.pid, ur->signfy.sig, &sival);
++	}
++}
++EXPORT_SYMBOL_GPL(udd_notify_event);
++
++struct irqswitch_work {
++	struct ipipe_work_header work; /* Must be first. */
++	rtdm_irq_t *irqh;
++	int enabled;
++	rtdm_event_t *done;
++};
++
++static void lostage_irqswitch_line(struct ipipe_work_header *work)
++{
++	struct irqswitch_work *rq;
++
++	/*
++	 * This runs from secondary mode, we may flip the IRQ state
++	 * now.
++	 */
++	rq = container_of(work, struct irqswitch_work, work);
++	if (rq->enabled)
++		rtdm_irq_enable(rq->irqh);
++	else
++		rtdm_irq_disable(rq->irqh);
++
++	if (rq->done)
++		rtdm_event_signal(rq->done);
++}
++
++static void switch_irq_line(rtdm_irq_t *irqh, int enable, rtdm_event_t *done)
++{
++	struct irqswitch_work switchwork = {
++		.work = {
++			.size = sizeof(switchwork),
++			.handler = lostage_irqswitch_line,
++		},
++		.irqh = irqh,
++		.enabled = enable,
++		.done = done,
++	};
++
++	/*
++	 * Not pretty, but we may not traverse the kernel code for
++	 * enabling/disabling IRQ lines from primary mode. So we have
++	 * to send a deferrable root request (i.e. low-level APC) to
++	 * be callable from real-time context.
++	 */
++	ipipe_post_work_root(&switchwork, work);
++}
++
++/**
++ * @brief Enable the device IRQ line
++ *
++ * This service issues a request to the regular kernel for enabling
++ * the IRQ line registered by the driver. If the caller runs in
++ * primary mode, the request is scheduled but deferred until the
++ * current CPU leaves the real-time domain (see note). Otherwise, the
++ * request is immediately handled.
++ *
++ * @param udd The UDD driver handling the IRQ to disable. If no IRQ
++ * was registered by the driver at the UDD core, this routine has no
++ * effect.
++ *
++ * @param done Optional event to signal upon completion. If non-NULL,
++ * @a done will be posted by a call to rtdm_event_signal() after the
++ * interrupt line is enabled.
++ *
++ * @coretags{unrestricted}
++ *
++ * @note The deferral is required as some interrupt management code
++ * involved in enabling interrupt lines may not be safely executed
++ * from primary mode. By passing a valid @a done object address, the
++ * caller can wait for the request to complete, by sleeping on
++ * rtdm_event_wait().
++ */
++void udd_enable_irq(struct udd_device *udd, rtdm_event_t *done)
++{
++	struct udd_reserved *ur = &udd->__reserved;
++
++	if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM)
++		switch_irq_line(&ur->irqh, 1, done);
++}
++EXPORT_SYMBOL_GPL(udd_enable_irq);
++
++/**
++ * @brief Disable the device IRQ line
++ *
++ * This service issues a request to the regular kernel for disabling
++ * the IRQ line registered by the driver. If the caller runs in
++ * primary mode, the request is scheduled but deferred until the
++ * current CPU leaves the real-time domain (see note). Otherwise, the
++ * request is immediately handled.
++ *
++ * @param udd The UDD driver handling the IRQ to disable. If no IRQ
++ * was registered by the driver at the UDD core, this routine has no
++ * effect.
++ *
++ * @param done Optional event to signal upon completion. If non-NULL,
++ * @a done will be posted by a call to rtdm_event_signal() after the
++ * interrupt line is disabled.
++ *
++ * @coretags{unrestricted}
++ *
++ * @note The deferral is required as some interrupt management code
++ * involved in disabling interrupt lines may not be safely executed
++ * from primary mode. By passing a valid @a done object address, the
++ * caller can wait for the request to complete, by sleeping on
++ * rtdm_event_wait().
++ */
++void udd_disable_irq(struct udd_device *udd, rtdm_event_t *done)
++{
++	struct udd_reserved *ur = &udd->__reserved;
++
++	if (udd->irq != UDD_IRQ_NONE && udd->irq != UDD_IRQ_CUSTOM)
++		switch_irq_line(&ur->irqh, 0, done);
++}
++EXPORT_SYMBOL_GPL(udd_disable_irq);
++
++/**
++ * @brief RTDM file descriptor to target UDD device
++ *
++ * Retrieves the UDD device from a RTDM file descriptor.
++ *
++ * @param fd File descriptor received by an ancillary I/O handler
++ * from a mini-driver based on the UDD core.
++ *
++ * @return A pointer to the UDD device to which @a fd refers to.
++ *
++ * @note This service is intended for use by mini-drivers based on the
++ * UDD core exclusively. Passing file descriptors referring to other
++ * RTDM devices will certainly lead to invalid results.
++ *
++ * @coretags{mode-unrestricted}
++ */
++struct udd_device *udd_get_device(struct rtdm_fd *fd)
++{
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++
++	if (dev->driver->profile_info.class_id == RTDM_CLASS_MEMORY)
++		return container_of(dev, struct udd_mapper, dev)->udd;
++
++	return container_of(dev, struct udd_device, __reserved.device);
++}
++EXPORT_SYMBOL_GPL(udd_get_device);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/serial/16550A_pnp.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/16550A_pnp.h	2021-04-07 16:01:26.533635158 +0800
+@@ -0,0 +1,387 @@
++/*
++ * Copyright (C) 2006-2007 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#if defined(CONFIG_PNP) && \
++    (defined(CONFIG_XENO_DRIVERS_16550A_PIO) || \
++     defined(CONFIG_XENO_DRIVERS_16550A_ANY))
++
++#include <linux/pnp.h>
++
++#define UNKNOWN_DEV 0x3000
++
++/* Bluntly cloned from drivers/serial/8250_pnp.c */
++static const struct pnp_device_id rt_16550_pnp_tbl[] = {
++	/* Archtek America Corp. */
++	/* Archtek SmartLink Modem 3334BT Plug & Play */
++	{	"AAC000F",		0	},
++	/* Anchor Datacomm BV */
++	/* SXPro 144 External Data Fax Modem Plug & Play */
++	{	"ADC0001",		0	},
++	/* SXPro 288 External Data Fax Modem Plug & Play */
++	{	"ADC0002",		0	},
++	/* PROLiNK 1456VH ISA PnP K56flex Fax Modem */
++	{	"AEI0250",		0	},
++	/* Actiontec ISA PNP 56K X2 Fax Modem */
++	{	"AEI1240",		0	},
++	/* Rockwell 56K ACF II Fax+Data+Voice Modem */
++	{	"AKY1021",		0 /*SPCI_FL_NO_SHIRQ*/	},
++	/* AZT3005 PnP SOUND DEVICE */
++	{	"AZT4001",		0	},
++	/* Best Data Products Inc. Smart One 336F PnP Modem */
++	{	"BDP3336",		0	},
++	/*  Boca Research */
++	/* Boca Complete Ofc Communicator 14.4 Data-FAX */
++	{	"BRI0A49",		0	},
++	/* Boca Research 33,600 ACF Modem */
++	{	"BRI1400",		0	},
++	/* Boca 33.6 Kbps Internal FD34FSVD */
++	{	"BRI3400",		0	},
++	/* Boca 33.6 Kbps Internal FD34FSVD */
++	{	"BRI0A49",		0	},
++	/* Best Data Products Inc. Smart One 336F PnP Modem */
++	{	"BDP3336",		0	},
++	/* Computer Peripherals Inc */
++	/* EuroViVa CommCenter-33.6 SP PnP */
++	{	"CPI4050",		0	},
++	/* Creative Labs */
++	/* Creative Labs Phone Blaster 28.8 DSVD PnP Voice */
++	{	"CTL3001",		0	},
++	/* Creative Labs Modem Blaster 28.8 DSVD PnP Voice */
++	{	"CTL3011",		0	},
++	/* Creative */
++	/* Creative Modem Blaster Flash56 DI5601-1 */
++	{	"DMB1032",		0	},
++	/* Creative Modem Blaster V.90 DI5660 */
++	{	"DMB2001",		0	},
++	/* E-Tech */
++	/* E-Tech CyberBULLET PC56RVP */
++	{	"ETT0002",		0	},
++	/* FUJITSU */
++	/* Fujitsu 33600 PnP-I2 R Plug & Play */
++	{	"FUJ0202",		0	},
++	/* Fujitsu FMV-FX431 Plug & Play */
++	{	"FUJ0205",		0	},
++	/* Fujitsu 33600 PnP-I4 R Plug & Play */
++	{	"FUJ0206",		0	},
++	/* Fujitsu Fax Voice 33600 PNP-I5 R Plug & Play */
++	{	"FUJ0209",		0	},
++	/* Archtek America Corp. */
++	/* Archtek SmartLink Modem 3334BT Plug & Play */
++	{	"GVC000F",		0	},
++	/* Hayes */
++	/* Hayes Optima 288 V.34-V.FC + FAX + Voice Plug & Play */
++	{	"HAY0001",		0	},
++	/* Hayes Optima 336 V.34 + FAX + Voice PnP */
++	{	"HAY000C",		0	},
++	/* Hayes Optima 336B V.34 + FAX + Voice PnP */
++	{	"HAY000D",		0	},
++	/* Hayes Accura 56K Ext Fax Modem PnP */
++	{	"HAY5670",		0	},
++	/* Hayes Accura 56K Ext Fax Modem PnP */
++	{	"HAY5674",		0	},
++	/* Hayes Accura 56K Fax Modem PnP */
++	{	"HAY5675",		0	},
++	/* Hayes 288, V.34 + FAX */
++	{	"HAYF000",		0	},
++	/* Hayes Optima 288 V.34 + FAX + Voice, Plug & Play */
++	{	"HAYF001",		0	},
++	/* IBM */
++	/* IBM Thinkpad 701 Internal Modem Voice */
++	{	"IBM0033",		0	},
++	/* Intertex */
++	/* Intertex 28k8 33k6 Voice EXT PnP */
++	{	"IXDC801",		0	},
++	/* Intertex 33k6 56k Voice EXT PnP */
++	{	"IXDC901",		0	},
++	/* Intertex 28k8 33k6 Voice SP EXT PnP */
++	{	"IXDD801",		0	},
++	/* Intertex 33k6 56k Voice SP EXT PnP */
++	{	"IXDD901",		0	},
++	/* Intertex 28k8 33k6 Voice SP INT PnP */
++	{	"IXDF401",		0	},
++	/* Intertex 28k8 33k6 Voice SP EXT PnP */
++	{	"IXDF801",		0	},
++	/* Intertex 33k6 56k Voice SP EXT PnP */
++	{	"IXDF901",		0	},
++	/* Kortex International */
++	/* KORTEX 28800 Externe PnP */
++	{	"KOR4522",		0	},
++	/* KXPro 33.6 Vocal ASVD PnP */
++	{	"KORF661",		0	},
++	/* Lasat */
++	/* LASAT Internet 33600 PnP */
++	{	"LAS4040",		0	},
++	/* Lasat Safire 560 PnP */
++	{	"LAS4540",		0	},
++	/* Lasat Safire 336  PnP */
++	{	"LAS5440",		0	},
++	/* Microcom, Inc. */
++	/* Microcom TravelPorte FAST V.34 Plug & Play */
++	{	"MNP0281",		0	},
++	/* Microcom DeskPorte V.34 FAST or FAST+ Plug & Play */
++	{	"MNP0336",		0	},
++	/* Microcom DeskPorte FAST EP 28.8 Plug & Play */
++	{	"MNP0339",		0	},
++	/* Microcom DeskPorte 28.8P Plug & Play */
++	{	"MNP0342",		0	},
++	/* Microcom DeskPorte FAST ES 28.8 Plug & Play */
++	{	"MNP0500",		0	},
++	/* Microcom DeskPorte FAST ES 28.8 Plug & Play */
++	{	"MNP0501",		0	},
++	/* Microcom DeskPorte 28.8S Internal Plug & Play */
++	{	"MNP0502",		0	},
++	/* Motorola */
++	/* Motorola BitSURFR Plug & Play */
++	{	"MOT1105",		0	},
++	/* Motorola TA210 Plug & Play */
++	{	"MOT1111",		0	},
++	/* Motorola HMTA 200 (ISDN) Plug & Play */
++	{	"MOT1114",		0	},
++	/* Motorola BitSURFR Plug & Play */
++	{	"MOT1115",		0	},
++	/* Motorola Lifestyle 28.8 Internal */
++	{	"MOT1190",		0	},
++	/* Motorola V.3400 Plug & Play */
++	{	"MOT1501",		0	},
++	/* Motorola Lifestyle 28.8 V.34 Plug & Play */
++	{	"MOT1502",		0	},
++	/* Motorola Power 28.8 V.34 Plug & Play */
++	{	"MOT1505",		0	},
++	/* Motorola ModemSURFR External 28.8 Plug & Play */
++	{	"MOT1509",		0	},
++	/* Motorola Premier 33.6 Desktop Plug & Play */
++	{	"MOT150A",		0	},
++	/* Motorola VoiceSURFR 56K External PnP */
++	{	"MOT150F",		0	},
++	/* Motorola ModemSURFR 56K External PnP */
++	{	"MOT1510",		0	},
++	/* Motorola ModemSURFR 56K Internal PnP */
++	{	"MOT1550",		0	},
++	/* Motorola ModemSURFR Internal 28.8 Plug & Play */
++	{	"MOT1560",		0	},
++	/* Motorola Premier 33.6 Internal Plug & Play */
++	{	"MOT1580",		0	},
++	/* Motorola OnlineSURFR 28.8 Internal Plug & Play */
++	{	"MOT15B0",		0	},
++	/* Motorola VoiceSURFR 56K Internal PnP */
++	{	"MOT15F0",		0	},
++	/* Com 1 */
++	/*  Deskline K56 Phone System PnP */
++	{	"MVX00A1",		0	},
++	/* PC Rider K56 Phone System PnP */
++	{	"MVX00F2",		0	},
++	/* NEC 98NOTE SPEAKER PHONE FAX MODEM(33600bps) */
++	{	"nEC8241",		0	},
++	/* Pace 56 Voice Internal Plug & Play Modem */
++	{	"PMC2430",		0	},
++	/* Generic */
++	/* Generic standard PC COM port	 */
++	{	"PNP0500",		0	},
++	/* Generic 16550A-compatible COM port */
++	{	"PNP0501",		0	},
++	/* Compaq 14400 Modem */
++	{	"PNPC000",		0	},
++	/* Compaq 2400/9600 Modem */
++	{	"PNPC001",		0	},
++	/* Dial-Up Networking Serial Cable between 2 PCs */
++	{	"PNPC031",		0	},
++	/* Dial-Up Networking Parallel Cable between 2 PCs */
++	{	"PNPC032",		0	},
++	/* Standard 9600 bps Modem */
++	{	"PNPC100",		0	},
++	/* Standard 14400 bps Modem */
++	{	"PNPC101",		0	},
++	/*  Standard 28800 bps Modem*/
++	{	"PNPC102",		0	},
++	/*  Standard Modem*/
++	{	"PNPC103",		0	},
++	/*  Standard 9600 bps Modem*/
++	{	"PNPC104",		0	},
++	/*  Standard 14400 bps Modem*/
++	{	"PNPC105",		0	},
++	/*  Standard 28800 bps Modem*/
++	{	"PNPC106",		0	},
++	/*  Standard Modem */
++	{	"PNPC107",		0	},
++	/* Standard 9600 bps Modem */
++	{	"PNPC108",		0	},
++	/* Standard 14400 bps Modem */
++	{	"PNPC109",		0	},
++	/* Standard 28800 bps Modem */
++	{	"PNPC10A",		0	},
++	/* Standard Modem */
++	{	"PNPC10B",		0	},
++	/* Standard 9600 bps Modem */
++	{	"PNPC10C",		0	},
++	/* Standard 14400 bps Modem */
++	{	"PNPC10D",		0	},
++	/* Standard 28800 bps Modem */
++	{	"PNPC10E",		0	},
++	/* Standard Modem */
++	{	"PNPC10F",		0	},
++	/* Standard PCMCIA Card Modem */
++	{	"PNP2000",		0	},
++	/* Rockwell */
++	/* Modular Technology */
++	/* Rockwell 33.6 DPF Internal PnP */
++	/* Modular Technology 33.6 Internal PnP */
++	{	"ROK0030",		0	},
++	/* Kortex International */
++	/* KORTEX 14400 Externe PnP */
++	{	"ROK0100",		0	},
++	/* Rockwell 28.8 */
++	{	"ROK4120",		0	},
++	/* Viking Components, Inc */
++	/* Viking 28.8 INTERNAL Fax+Data+Voice PnP */
++	{	"ROK4920",		0	},
++	/* Rockwell */
++	/* British Telecom */
++	/* Modular Technology */
++	/* Rockwell 33.6 DPF External PnP */
++	/* BT Prologue 33.6 External PnP */
++	/* Modular Technology 33.6 External PnP */
++	{	"RSS00A0",		0	},
++	/* Viking 56K FAX INT */
++	{	"RSS0262",		0	},
++	/* K56 par,VV,Voice,Speakphone,AudioSpan,PnP */
++	{       "RSS0250",              0       },
++	/* SupraExpress 28.8 Data/Fax PnP modem */
++	{	"SUP1310",		0	},
++	/* SupraExpress 33.6 Data/Fax PnP modem */
++	{	"SUP1421",		0	},
++	/* SupraExpress 33.6 Data/Fax PnP modem */
++	{	"SUP1590",		0	},
++	/* SupraExpress 336i Sp ASVD */
++	{	"SUP1620",		0	},
++	/* SupraExpress 33.6 Data/Fax PnP modem */
++	{	"SUP1760",		0	},
++	/* SupraExpress 56i Sp Intl */
++	{	"SUP2171",		0	},
++	/* Phoebe Micro */
++	/* Phoebe Micro 33.6 Data Fax 1433VQH Plug & Play */
++	{	"TEX0011",		0	},
++	/* Archtek America Corp. */
++	/* Archtek SmartLink Modem 3334BT Plug & Play */
++	{	"UAC000F",		0	},
++	/* 3Com Corp. */
++	/* Gateway Telepath IIvi 33.6 */
++	{	"USR0000",		0	},
++	/* U.S. Robotics Sporster 33.6K Fax INT PnP */
++	{	"USR0002",		0	},
++	/*  Sportster Vi 14.4 PnP FAX Voicemail */
++	{	"USR0004",		0	},
++	/* U.S. Robotics 33.6K Voice INT PnP */
++	{	"USR0006",		0	},
++	/* U.S. Robotics 33.6K Voice EXT PnP */
++	{	"USR0007",		0	},
++	/* U.S. Robotics Courier V.Everything INT PnP */
++	{	"USR0009",		0	},
++	/* U.S. Robotics 33.6K Voice INT PnP */
++	{	"USR2002",		0	},
++	/* U.S. Robotics 56K Voice INT PnP */
++	{	"USR2070",		0	},
++	/* U.S. Robotics 56K Voice EXT PnP */
++	{	"USR2080",		0	},
++	/* U.S. Robotics 56K FAX INT */
++	{	"USR3031",		0	},
++	/* U.S. Robotics 56K FAX INT */
++	{	"USR3050",		0	},
++	/* U.S. Robotics 56K Voice INT PnP */
++	{	"USR3070",		0	},
++	/* U.S. Robotics 56K Voice EXT PnP */
++	{	"USR3080",		0	},
++	/* U.S. Robotics 56K Voice INT PnP */
++	{	"USR3090",		0	},
++	/* U.S. Robotics 56K Message  */
++	{	"USR9100",		0	},
++	/* U.S. Robotics 56K FAX EXT PnP*/
++	{	"USR9160",		0	},
++	/* U.S. Robotics 56K FAX INT PnP*/
++	{	"USR9170",		0	},
++	/* U.S. Robotics 56K Voice EXT PnP*/
++	{	"USR9180",		0	},
++	/* U.S. Robotics 56K Voice INT PnP*/
++	{	"USR9190",		0	},
++	/* Wacom tablets */
++	{	"WACF004",		0	},
++	{	"WACF005",		0	},
++	{       "WACF006",              0       },
++	/* Compaq touchscreen */
++	{       "FPI2002",              0 },
++	/* Fujitsu Stylistic touchscreens */
++	{       "FUJ02B2",              0 },
++	{       "FUJ02B3",              0 },
++	/* Fujitsu Stylistic LT touchscreens */
++	{       "FUJ02B4",              0 },
++	/* Passive Fujitsu Stylistic touchscreens */
++	{       "FUJ02B6",              0 },
++	{       "FUJ02B7",              0 },
++	{       "FUJ02B8",              0 },
++	{       "FUJ02B9",              0 },
++	{       "FUJ02BC",              0 },
++	/* Rockwell's (PORALiNK) 33600 INT PNP */
++	{	"WCI0003",		0	},
++	/* Unkown PnP modems */
++	{	"PNPCXXX",		UNKNOWN_DEV	},
++	/* More unkown PnP modems */
++	{	"PNPDXXX",		UNKNOWN_DEV	},
++	{	"",			0	}
++};
++
++static int rt_16550_pnp_probe(struct pnp_dev *dev,
++			       const struct pnp_device_id *dev_id)
++{
++	int i;
++
++	for (i = 0; i < MAX_DEVICES; i++)
++		if (pnp_port_valid(dev, 0) &&
++		    pnp_port_start(dev, 0) == io[i]) {
++			if (!irq[i])
++				irq[i] = pnp_irq(dev, 0);
++			return 0;
++		}
++
++	return -ENODEV;
++}
++
++static struct pnp_driver rt_16550_pnp_driver = {
++	.name		= RT_16550_DRIVER_NAME,
++	.id_table	= rt_16550_pnp_tbl,
++	.probe		= rt_16550_pnp_probe,
++};
++
++static int pnp_registered;
++
++static inline void rt_16550_pnp_init(void)
++{
++	if (pnp_register_driver(&rt_16550_pnp_driver) == 0)
++		pnp_registered = 1;
++}
++
++static inline void rt_16550_pnp_cleanup(void)
++{
++	if (pnp_registered)
++		pnp_unregister_driver(&rt_16550_pnp_driver);
++}
++
++#else /* !CONFIG_PNP || !(..._16550A_IO || ..._16550A_ANY) */
++
++#define rt_16550_pnp_init()	do { } while (0)
++#define rt_16550_pnp_cleanup()	do { } while (0)
++
++#endif /* !CONFIG_PNP || !(..._16550A_IO || ..._16550A_ANY) */
+--- linux/drivers/xenomai/serial/16550A_io.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/16550A_io.h	2021-04-07 16:01:26.528635165 +0800
+@@ -0,0 +1,210 @@
++/*
++ * Copyright (C) 2007 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++/* Manages the I/O access method of the driver. */
++
++typedef enum { MODE_PIO, MODE_MMIO } io_mode_t;
++
++#if defined(CONFIG_XENO_DRIVERS_16550A_PIO) || \
++    defined(CONFIG_XENO_DRIVERS_16550A_ANY)
++static unsigned long io[MAX_DEVICES];
++module_param_array(io, ulong, NULL, 0400);
++MODULE_PARM_DESC(io, "I/O port addresses of the serial devices");
++#endif /* CONFIG_XENO_DRIVERS_16550A_PIO || CONFIG_XENO_DRIVERS_16550A_ANY */
++
++#if defined(CONFIG_XENO_DRIVERS_16550A_MMIO) || \
++    defined(CONFIG_XENO_DRIVERS_16550A_ANY)
++static unsigned long mem[MAX_DEVICES];
++static void *mapped_io[MAX_DEVICES];
++module_param_array(mem, ulong, NULL, 0400);
++MODULE_PARM_DESC(mem, "I/O memory addresses of the serial devices");
++#endif /* CONFIG_XENO_DRIVERS_16550A_MMIO || CONFIG_XENO_DRIVERS_16550A_ANY */
++
++#ifdef CONFIG_XENO_DRIVERS_16550A_PIO
++
++#define RT_16550_IO_INLINE inline
++
++extern void *mapped_io[]; /* dummy */
++
++static inline unsigned long rt_16550_addr_param(int dev_id)
++{
++	return io[dev_id];
++}
++
++static inline int rt_16550_addr_param_valid(int dev_id)
++{
++	return 1;
++}
++
++static inline unsigned long rt_16550_base_addr(int dev_id)
++{
++	return io[dev_id];
++}
++
++static inline io_mode_t rt_16550_io_mode(int dev_id)
++{
++	return MODE_PIO;
++}
++
++static inline io_mode_t
++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx)
++{
++	return MODE_PIO;
++}
++
++static inline void
++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx)
++{
++	ctx->base_addr = io[dev_id];
++}
++
++#elif defined(CONFIG_XENO_DRIVERS_16550A_MMIO)
++
++#define RT_16550_IO_INLINE inline
++
++extern unsigned long io[]; /* dummy */
++
++static inline unsigned long rt_16550_addr_param(int dev_id)
++{
++	return mem[dev_id];
++}
++
++static inline int rt_16550_addr_param_valid(int dev_id)
++{
++	return 1;
++}
++
++static inline unsigned long rt_16550_base_addr(int dev_id)
++{
++	return (unsigned long)mapped_io[dev_id];
++}
++
++static inline io_mode_t rt_16550_io_mode(int dev_id)
++{
++	return MODE_MMIO;
++}
++
++static inline io_mode_t
++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx)
++{
++	return MODE_MMIO;
++}
++
++static inline void
++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx)
++{
++	ctx->base_addr = (unsigned long)mapped_io[dev_id];
++}
++
++#elif defined(CONFIG_XENO_DRIVERS_16550A_ANY)
++
++#define RT_16550_IO_INLINE /* uninline */
++
++static inline unsigned long rt_16550_addr_param(int dev_id)
++{
++	return (io[dev_id]) ? io[dev_id] : mem[dev_id];
++}
++
++static inline int rt_16550_addr_param_valid(int dev_id)
++{
++	return !(io[dev_id] && mem[dev_id]);
++}
++
++static inline unsigned long rt_16550_base_addr(int dev_id)
++{
++	return (io[dev_id]) ? io[dev_id] : (unsigned long)mapped_io[dev_id];
++}
++
++static inline io_mode_t rt_16550_io_mode(int dev_id)
++{
++	return (io[dev_id]) ? MODE_PIO : MODE_MMIO;
++}
++
++static inline io_mode_t
++rt_16550_io_mode_from_ctx(struct rt_16550_context *ctx)
++{
++	return ctx->io_mode;
++}
++
++static inline void
++rt_16550_init_io_ctx(int dev_id, struct rt_16550_context *ctx)
++{
++	if (io[dev_id]) {
++		ctx->base_addr = io[dev_id];
++		ctx->io_mode   = MODE_PIO;
++	} else {
++		ctx->base_addr = (unsigned long)mapped_io[dev_id];
++		ctx->io_mode   = MODE_MMIO;
++	}
++}
++
++#else
++# error Unsupported I/O access method
++#endif
++
++static RT_16550_IO_INLINE u8
++rt_16550_reg_in(io_mode_t io_mode, unsigned long base, int off)
++{
++	switch (io_mode) {
++	case MODE_PIO:
++		return inb(base + off);
++	default: /* MODE_MMIO */
++		return readb((void *)base + off);
++	}
++}
++
++static RT_16550_IO_INLINE void
++rt_16550_reg_out(io_mode_t io_mode, unsigned long base, int off, u8 val)
++{
++	switch (io_mode) {
++	case MODE_PIO:
++		outb(val, base + off);
++		break;
++	case MODE_MMIO:
++		writeb(val, (void *)base + off);
++		break;
++	}
++}
++
++static int rt_16550_init_io(int dev_id, char* name)
++{
++	switch (rt_16550_io_mode(dev_id)) {
++	case MODE_PIO:
++		if (!request_region(rt_16550_addr_param(dev_id), 8, name))
++			return -EBUSY;
++		break;
++	case MODE_MMIO:
++		mapped_io[dev_id] = ioremap(rt_16550_addr_param(dev_id), 8);
++		if (!mapped_io[dev_id])
++			return -EBUSY;
++		break;
++	}
++	return 0;
++}
++
++static void rt_16550_release_io(int dev_id)
++{
++	switch (rt_16550_io_mode(dev_id)) {
++	case MODE_PIO:
++		release_region(io[dev_id], 8);
++		break;
++	case MODE_MMIO:
++		iounmap(mapped_io[dev_id]);
++		break;
++	}
++}
+--- linux/drivers/xenomai/serial/16550A.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/16550A.c	2021-04-07 16:01:26.524635171 +0800
+@@ -0,0 +1,1188 @@
++/*
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/slab.h>
++#include <asm/io.h>
++
++#include <rtdm/serial.h>
++#include <rtdm/driver.h>
++
++MODULE_DESCRIPTION("RTDM-based driver for 16550A UARTs");
++MODULE_AUTHOR("Jan Kiszka <jan.kiszka@web.de>");
++MODULE_VERSION("1.5.2");
++MODULE_LICENSE("GPL");
++
++#define RT_16550_DRIVER_NAME	"xeno_16550A"
++
++#define MAX_DEVICES		8
++
++#define IN_BUFFER_SIZE		4096
++#define OUT_BUFFER_SIZE		4096
++
++#define DEFAULT_BAUD_BASE	115200
++#define DEFAULT_TX_FIFO		16
++
++#define PARITY_MASK		0x03
++#define DATA_BITS_MASK		0x03
++#define STOP_BITS_MASK		0x01
++#define FIFO_MASK		0xC0
++#define EVENT_MASK		0x0F
++
++#define LCR_DLAB		0x80
++
++#define FCR_FIFO		0x01
++#define FCR_RESET_RX		0x02
++#define FCR_RESET_TX		0x04
++
++#define IER_RX			0x01
++#define IER_TX			0x02
++#define IER_STAT		0x04
++#define IER_MODEM		0x08
++
++#define IIR_MODEM		0x00
++#define IIR_PIRQ		0x01
++#define IIR_TX			0x02
++#define IIR_RX			0x04
++#define IIR_STAT		0x06
++#define IIR_MASK		0x07
++
++#define RHR			0	/* Receive Holding Buffer */
++#define THR			0	/* Transmit Holding Buffer */
++#define DLL			0	/* Divisor Latch LSB */
++#define IER			1	/* Interrupt Enable Register */
++#define DLM			1	/* Divisor Latch MSB */
++#define IIR			2	/* Interrupt Id Register */
++#define FCR			2	/* Fifo Control Register */
++#define LCR			3	/* Line Control Register */
++#define MCR			4	/* Modem Control Register */
++#define LSR			5	/* Line Status Register */
++#define MSR			6	/* Modem Status Register */
++
++struct rt_16550_context {
++	struct rtser_config config;	/* current device configuration */
++
++	rtdm_irq_t irq_handle;		/* device IRQ handle */
++	rtdm_lock_t lock;		/* lock to protect context struct */
++
++	unsigned long base_addr;	/* hardware IO base address */
++#ifdef CONFIG_XENO_DRIVERS_16550A_ANY
++	int io_mode;			/* hardware IO-access mode */
++#endif
++	int tx_fifo;			/* cached global tx_fifo[<device>] */
++
++	int in_head;			/* RX ring buffer, head pointer */
++	int in_tail;			/* RX ring buffer, tail pointer */
++	size_t in_npend;		/* pending bytes in RX ring */
++	int in_nwait;			/* bytes the user waits for */
++	rtdm_event_t in_event;		/* raised to unblock reader */
++	char in_buf[IN_BUFFER_SIZE];	/* RX ring buffer */
++	volatile unsigned long in_lock;	/* single-reader lock */
++	uint64_t *in_history;		/* RX timestamp buffer */
++
++	int out_head;			/* TX ring buffer, head pointer */
++	int out_tail;			/* TX ring buffer, tail pointer */
++	size_t out_npend;		/* pending bytes in TX ring */
++	rtdm_event_t out_event;		/* raised to unblock writer */
++	char out_buf[OUT_BUFFER_SIZE];	/* TX ring buffer */
++	rtdm_mutex_t out_lock;		/* single-writer mutex */
++
++	uint64_t last_timestamp;	/* timestamp of last event */
++	int ioc_events;			/* recorded events */
++	rtdm_event_t ioc_event;		/* raised to unblock event waiter */
++	volatile unsigned long ioc_event_lock;	/* single-waiter lock */
++
++	int ier_status;			/* IER cache */
++	int mcr_status;			/* MCR cache */
++	int status;			/* cache for LSR + soft-states */
++	int saved_errors;		/* error cache for RTIOC_GET_STATUS */
++};
++
++static const struct rtser_config default_config = {
++	0xFFFF, RTSER_DEF_BAUD, RTSER_DEF_PARITY, RTSER_DEF_BITS,
++	RTSER_DEF_STOPB, RTSER_DEF_HAND, RTSER_DEF_FIFO_DEPTH, 0,
++	RTSER_DEF_TIMEOUT, RTSER_DEF_TIMEOUT, RTSER_DEF_TIMEOUT,
++	RTSER_DEF_TIMESTAMP_HISTORY, RTSER_DEF_EVENT_MASK, RTSER_DEF_RS485
++};
++
++static struct rtdm_device *device[MAX_DEVICES];
++
++static unsigned int irq[MAX_DEVICES];
++static unsigned long irqtype[MAX_DEVICES] = {
++	[0 ... MAX_DEVICES-1] = RTDM_IRQTYPE_SHARED | RTDM_IRQTYPE_EDGE
++};
++static unsigned int baud_base[MAX_DEVICES];
++static int tx_fifo[MAX_DEVICES];
++
++module_param_array(irq, uint, NULL, 0400);
++module_param_array(baud_base, uint, NULL, 0400);
++module_param_array(tx_fifo, int, NULL, 0400);
++
++MODULE_PARM_DESC(irq, "IRQ numbers of the serial devices");
++MODULE_PARM_DESC(baud_base, "Maximum baud rate of the serial device "
++		 "(internal clock rate / 16)");
++MODULE_PARM_DESC(tx_fifo, "Transmitter FIFO size");
++
++#include "16550A_io.h"
++#include "16550A_pnp.h"
++#include "16550A_pci.h"
++
++static inline int rt_16550_rx_interrupt(struct rt_16550_context *ctx,
++					uint64_t * timestamp)
++{
++	unsigned long base = ctx->base_addr;
++	int mode = rt_16550_io_mode_from_ctx(ctx);
++	int rbytes = 0;
++	int lsr = 0;
++	int c;
++
++	do {
++		c = rt_16550_reg_in(mode, base, RHR);	/* read input char */
++
++		ctx->in_buf[ctx->in_tail] = c;
++		if (ctx->in_history)
++			ctx->in_history[ctx->in_tail] = *timestamp;
++		ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1);
++
++		if (++ctx->in_npend > IN_BUFFER_SIZE) {
++			lsr |= RTSER_SOFT_OVERRUN_ERR;
++			ctx->in_npend--;
++		}
++
++		rbytes++;
++		lsr &= ~RTSER_LSR_DATA;
++		lsr |= (rt_16550_reg_in(mode, base, LSR) &
++			(RTSER_LSR_DATA | RTSER_LSR_OVERRUN_ERR |
++			 RTSER_LSR_PARITY_ERR | RTSER_LSR_FRAMING_ERR |
++			 RTSER_LSR_BREAK_IND));
++	} while (lsr & RTSER_LSR_DATA);
++
++	/* save new errors */
++	ctx->status |= lsr;
++
++	/* If we are enforcing the RTSCTS control flow and the input
++	   buffer is busy above the specified high watermark, clear
++	   RTS. */
++/*	if (uart->i_count >= uart->config.rts_hiwm &&
++	    (uart->config.handshake & RT_UART_RTSCTS) != 0 &&
++	    (uart->modem & MCR_RTS) != 0) {
++		uart->modem &= ~MCR_RTS;
++		rt_16550_reg_out(mode, base, MCR, uart->modem);
++	}*/
++
++	return rbytes;
++}
++
++static void rt_16550_tx_fill(struct rt_16550_context *ctx)
++{
++	int c;
++	int count;
++	unsigned long base = ctx->base_addr;
++	int mode = rt_16550_io_mode_from_ctx(ctx);
++
++/*	if (uart->modem & MSR_CTS)*/
++	{
++		for (count = ctx->tx_fifo;
++		     (count > 0) && (ctx->out_npend > 0);
++		     count--, ctx->out_npend--) {
++			c = ctx->out_buf[ctx->out_head++];
++			rt_16550_reg_out(mode, base, THR, c);
++			ctx->out_head &= (OUT_BUFFER_SIZE - 1);
++		}
++	}
++}
++
++static inline void rt_16550_stat_interrupt(struct rt_16550_context *ctx)
++{
++	unsigned long base = ctx->base_addr;
++	int mode = rt_16550_io_mode_from_ctx(ctx);
++
++	ctx->status |= (rt_16550_reg_in(mode, base, LSR) &
++			(RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR |
++			 RTSER_LSR_FRAMING_ERR | RTSER_LSR_BREAK_IND));
++}
++
++static int rt_16550_interrupt(rtdm_irq_t * irq_context)
++{
++	struct rt_16550_context *ctx;
++	unsigned long base;
++	int mode;
++	int iir;
++	uint64_t timestamp = rtdm_clock_read();
++	int rbytes = 0;
++	int events = 0;
++	int modem;
++	int ret = RTDM_IRQ_NONE;
++
++	ctx = rtdm_irq_get_arg(irq_context, struct rt_16550_context);
++	base = ctx->base_addr;
++	mode = rt_16550_io_mode_from_ctx(ctx);
++
++	rtdm_lock_get(&ctx->lock);
++
++	while (1) {
++		iir = rt_16550_reg_in(mode, base, IIR) & IIR_MASK;
++		if (iir & IIR_PIRQ)
++			break;
++
++		if (iir == IIR_RX) {
++			rbytes += rt_16550_rx_interrupt(ctx, &timestamp);
++			events |= RTSER_EVENT_RXPEND;
++		} else if (iir == IIR_STAT)
++			rt_16550_stat_interrupt(ctx);
++		else if (iir == IIR_TX)
++			rt_16550_tx_fill(ctx);
++		else if (iir == IIR_MODEM) {
++			modem = rt_16550_reg_in(mode, base, MSR);
++			if (modem & (modem << 4))
++				events |= RTSER_EVENT_MODEMHI;
++			if ((modem ^ 0xF0) & (modem << 4))
++				events |= RTSER_EVENT_MODEMLO;
++		}
++
++		ret = RTDM_IRQ_HANDLED;
++	}
++
++	if (ctx->in_nwait > 0) {
++		if ((ctx->in_nwait <= rbytes) || ctx->status) {
++			ctx->in_nwait = 0;
++			rtdm_event_signal(&ctx->in_event);
++		} else
++			ctx->in_nwait -= rbytes;
++	}
++
++	if (ctx->status) {
++		events |= RTSER_EVENT_ERRPEND;
++		ctx->ier_status &= ~IER_STAT;
++	}
++
++	if (events & ctx->config.event_mask) {
++		int old_events = ctx->ioc_events;
++
++		ctx->last_timestamp = timestamp;
++		ctx->ioc_events = events;
++
++		if (!old_events)
++			rtdm_event_signal(&ctx->ioc_event);
++	}
++
++	if ((ctx->ier_status & IER_TX) && (ctx->out_npend == 0)) {
++		/* mask transmitter empty interrupt */
++		ctx->ier_status &= ~IER_TX;
++
++		rtdm_event_signal(&ctx->out_event);
++	}
++
++	/* update interrupt mask */
++	rt_16550_reg_out(mode, base, IER, ctx->ier_status);
++
++	rtdm_lock_put(&ctx->lock);
++
++	return ret;
++}
++
++static int rt_16550_set_config(struct rt_16550_context *ctx,
++			       const struct rtser_config *config,
++			       uint64_t **in_history_ptr)
++{
++	rtdm_lockctx_t lock_ctx;
++	unsigned long base = ctx->base_addr;
++	int mode = rt_16550_io_mode_from_ctx(ctx);
++	int err = 0;
++
++	/* make line configuration atomic and IRQ-safe */
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	if (config->config_mask & RTSER_SET_BAUD) {
++		int dev_id = rtdm_fd_minor(rtdm_private_to_fd(ctx));
++		int baud_div;
++
++		ctx->config.baud_rate = config->baud_rate;
++		baud_div = (baud_base[dev_id] + (ctx->config.baud_rate>>1)) /
++			ctx->config.baud_rate;
++		rt_16550_reg_out(mode, base, LCR, LCR_DLAB);
++		rt_16550_reg_out(mode, base, DLL, baud_div & 0xff);
++		rt_16550_reg_out(mode, base, DLM, baud_div >> 8);
++	}
++
++	if (config->config_mask & RTSER_SET_PARITY)
++		ctx->config.parity = config->parity & PARITY_MASK;
++	if (config->config_mask & RTSER_SET_DATA_BITS)
++		ctx->config.data_bits = config->data_bits & DATA_BITS_MASK;
++	if (config->config_mask & RTSER_SET_STOP_BITS)
++		ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK;
++
++	if (config->config_mask & (RTSER_SET_PARITY |
++				   RTSER_SET_DATA_BITS |
++				   RTSER_SET_STOP_BITS |
++				   RTSER_SET_BAUD)) {
++		rt_16550_reg_out(mode, base, LCR,
++				 (ctx->config.parity << 3) |
++				 (ctx->config.stop_bits << 2) |
++				 ctx->config.data_bits);
++		ctx->status = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++	}
++
++	if (config->config_mask & RTSER_SET_FIFO_DEPTH) {
++		ctx->config.fifo_depth = config->fifo_depth & FIFO_MASK;
++		rt_16550_reg_out(mode, base, FCR,
++				 FCR_FIFO | FCR_RESET_RX | FCR_RESET_TX);
++		rt_16550_reg_out(mode, base, FCR,
++				 FCR_FIFO | ctx->config.fifo_depth);
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	/* Timeout manipulation is not atomic. The user is supposed to take
++	   care not to use and change timeouts at the same time. */
++	if (config->config_mask & RTSER_SET_TIMEOUT_RX)
++		ctx->config.rx_timeout = config->rx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_TX)
++		ctx->config.tx_timeout = config->tx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_EVENT)
++		ctx->config.event_timeout = config->event_timeout;
++
++	if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++		/* change timestamp history atomically */
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) {
++			if (!ctx->in_history) {
++				ctx->in_history = *in_history_ptr;
++				*in_history_ptr = NULL;
++				if (!ctx->in_history)
++					err = -ENOMEM;
++			}
++		} else {
++			*in_history_ptr = ctx->in_history;
++			ctx->in_history = NULL;
++		}
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++	}
++
++	if (config->config_mask & RTSER_SET_EVENT_MASK) {
++		/* change event mask atomically */
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		ctx->config.event_mask = config->event_mask & EVENT_MASK;
++		ctx->ioc_events = 0;
++
++		if ((config->event_mask & RTSER_EVENT_RXPEND) &&
++		    (ctx->in_npend > 0))
++			ctx->ioc_events |= RTSER_EVENT_RXPEND;
++
++		if ((config->event_mask & RTSER_EVENT_ERRPEND)
++		    && ctx->status)
++			ctx->ioc_events |= RTSER_EVENT_ERRPEND;
++
++		if (config->event_mask & (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO))
++			/* enable modem status interrupt */
++			ctx->ier_status |= IER_MODEM;
++		else
++			/* disable modem status interrupt */
++			ctx->ier_status &= ~IER_MODEM;
++		rt_16550_reg_out(mode, base, IER, ctx->ier_status);
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++	}
++
++	if (config->config_mask & RTSER_SET_HANDSHAKE) {
++		/* change handshake atomically */
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		ctx->config.handshake = config->handshake;
++
++		switch (ctx->config.handshake) {
++		case RTSER_RTSCTS_HAND:
++			// ...?
++
++		default:	/* RTSER_NO_HAND */
++			ctx->mcr_status =
++			    RTSER_MCR_DTR | RTSER_MCR_RTS | RTSER_MCR_OUT2;
++			break;
++		}
++		rt_16550_reg_out(mode, base, MCR, ctx->mcr_status);
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++	}
++
++	return err;
++}
++
++void rt_16550_cleanup_ctx(struct rt_16550_context *ctx)
++{
++	rtdm_event_destroy(&ctx->in_event);
++	rtdm_event_destroy(&ctx->out_event);
++	rtdm_event_destroy(&ctx->ioc_event);
++	rtdm_mutex_destroy(&ctx->out_lock);
++}
++
++int rt_16550_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rt_16550_context *ctx;
++	int dev_id = rtdm_fd_minor(fd);
++	int err;
++	uint64_t *dummy;
++	rtdm_lockctx_t lock_ctx;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	/* IPC initialisation - cannot fail with used parameters */
++	rtdm_lock_init(&ctx->lock);
++	rtdm_event_init(&ctx->in_event, 0);
++	rtdm_event_init(&ctx->out_event, 0);
++	rtdm_event_init(&ctx->ioc_event, 0);
++	rtdm_mutex_init(&ctx->out_lock);
++
++	rt_16550_init_io_ctx(dev_id, ctx);
++
++	ctx->tx_fifo = tx_fifo[dev_id];
++
++	ctx->in_head = 0;
++	ctx->in_tail = 0;
++	ctx->in_npend = 0;
++	ctx->in_nwait = 0;
++	ctx->in_lock = 0;
++	ctx->in_history = NULL;
++
++	ctx->out_head = 0;
++	ctx->out_tail = 0;
++	ctx->out_npend = 0;
++
++	ctx->ioc_events = 0;
++	ctx->ioc_event_lock = 0;
++	ctx->status = 0;
++	ctx->saved_errors = 0;
++
++	rt_16550_set_config(ctx, &default_config, &dummy);
++
++	err = rtdm_irq_request(&ctx->irq_handle, irq[dev_id],
++			rt_16550_interrupt, irqtype[dev_id],
++			rtdm_fd_device(fd)->name, ctx);
++	if (err) {
++		/* reset DTR and RTS */
++		rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ctx->base_addr,
++				 MCR, 0);
++
++		rt_16550_cleanup_ctx(ctx);
++
++		return err;
++	}
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	/* enable interrupts */
++	ctx->ier_status = IER_RX;
++	rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx), ctx->base_addr, IER,
++			 IER_RX);
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	return 0;
++}
++
++void rt_16550_close(struct rtdm_fd *fd)
++{
++	struct rt_16550_context *ctx;
++	unsigned long base;
++	int mode;
++	uint64_t *in_history;
++	rtdm_lockctx_t lock_ctx;
++
++	ctx = rtdm_fd_to_private(fd);
++	base = ctx->base_addr;
++	mode = rt_16550_io_mode_from_ctx(ctx);
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	/* reset DTR and RTS */
++	rt_16550_reg_out(mode, base, MCR, 0);
++
++	/* mask all UART interrupts and clear pending ones. */
++	rt_16550_reg_out(mode, base, IER, 0);
++	rt_16550_reg_in(mode, base, IIR);
++	rt_16550_reg_in(mode, base, LSR);
++	rt_16550_reg_in(mode, base, RHR);
++	rt_16550_reg_in(mode, base, MSR);
++
++	in_history = ctx->in_history;
++	ctx->in_history = NULL;
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	rtdm_irq_free(&ctx->irq_handle);
++
++	rt_16550_cleanup_ctx(ctx);
++
++	kfree(in_history);
++}
++
++int rt_16550_ioctl(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	rtdm_lockctx_t lock_ctx;
++	struct rt_16550_context *ctx;
++	int err = 0;
++	unsigned long base;
++	int mode;
++
++	ctx = rtdm_fd_to_private(fd);
++	base = ctx->base_addr;
++	mode = rt_16550_io_mode_from_ctx(ctx);
++
++	switch (request) {
++	case RTSER_RTIOC_GET_CONFIG:
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &ctx->config,
++						   sizeof(struct
++							  rtser_config));
++		else
++			memcpy(arg, &ctx->config,
++			       sizeof(struct rtser_config));
++		break;
++
++	case RTSER_RTIOC_SET_CONFIG: {
++		struct rtser_config *config;
++		struct rtser_config config_buf;
++		uint64_t *hist_buf = NULL;
++
++		config = (struct rtser_config *)arg;
++
++		if (rtdm_fd_is_user(fd)) {
++			err =
++			    rtdm_safe_copy_from_user(fd, &config_buf,
++						     arg,
++						     sizeof(struct
++							    rtser_config));
++			if (err)
++				return err;
++
++			config = &config_buf;
++		}
++
++		if ((config->config_mask & RTSER_SET_BAUD) &&
++		    (config->baud_rate >
++			    baud_base[rtdm_fd_minor(fd)] ||
++			    config->baud_rate <= 0))
++			/* invalid baudrate for this port */
++			return -EINVAL;
++
++		if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++			/*
++			 * Reflect the call to non-RT as we will likely
++			 * allocate or free the buffer.
++			 */
++			if (rtdm_in_rt_context())
++				return -ENOSYS;
++
++			if (config->timestamp_history &
++			    RTSER_RX_TIMESTAMP_HISTORY)
++				hist_buf = kmalloc(IN_BUFFER_SIZE *
++						   sizeof(nanosecs_abs_t),
++						   GFP_KERNEL);
++		}
++
++		rt_16550_set_config(ctx, config, &hist_buf);
++
++		if (hist_buf)
++			kfree(hist_buf);
++
++		break;
++	}
++
++	case RTSER_RTIOC_GET_STATUS: {
++		int status;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		status = ctx->saved_errors | ctx->status;
++		ctx->status = 0;
++		ctx->saved_errors = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd)) {
++			struct rtser_status status_buf;
++
++			status_buf.line_status =
++			    rt_16550_reg_in(mode, base, LSR) | status;
++			status_buf.modem_status =
++			    rt_16550_reg_in(mode, base, MSR);
++
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &status_buf,
++						   sizeof(struct
++							  rtser_status));
++		} else {
++			((struct rtser_status *)arg)->line_status =
++			    rt_16550_reg_in(mode, base, LSR) | status;
++			((struct rtser_status *)arg)->modem_status =
++			    rt_16550_reg_in(mode, base, MSR);
++		}
++		break;
++	}
++
++	case RTSER_RTIOC_GET_CONTROL:
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &ctx->mcr_status,
++						   sizeof(int));
++		else
++			*(int *)arg = ctx->mcr_status;
++
++		break;
++
++	case RTSER_RTIOC_SET_CONTROL: {
++		int new_mcr = (long)arg;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		ctx->mcr_status = new_mcr;
++		rt_16550_reg_out(mode, base, MCR, new_mcr);
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++	case RTSER_RTIOC_WAIT_EVENT: {
++		struct rtser_event ev = { .rxpend_timestamp = 0 };
++		rtdm_toseq_t timeout_seq;
++
++		if (!rtdm_in_rt_context())
++			return -ENOSYS;
++
++		/* Only one waiter allowed, stop any further attempts here. */
++		if (test_and_set_bit(0, &ctx->ioc_event_lock))
++			return -EBUSY;
++
++		rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout);
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		while (!ctx->ioc_events) {
++			/* Only enable error interrupt
++			   when the user waits for it. */
++			if (ctx->config.event_mask & RTSER_EVENT_ERRPEND) {
++				ctx->ier_status |= IER_STAT;
++				rt_16550_reg_out(mode, base, IER,
++						 ctx->ier_status);
++			}
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			err = rtdm_event_timedwait(&ctx->ioc_event,
++						   ctx->config.event_timeout,
++						   &timeout_seq);
++			if (err) {
++				/* Device has been closed? */
++				if (err == -EIDRM)
++					err = -EBADF;
++				goto wait_unlock_out;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		}
++
++		ev.events = ctx->ioc_events;
++		ctx->ioc_events &=
++		    ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO);
++
++		ev.last_timestamp = ctx->last_timestamp;
++		ev.rx_pending = ctx->in_npend;
++
++		if (ctx->in_history)
++			ev.rxpend_timestamp = ctx->in_history[ctx->in_head];
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg, &ev,
++						   sizeof(struct
++							  rtser_event));
++			else
++				memcpy(arg, &ev, sizeof(struct rtser_event));
++
++	      wait_unlock_out:
++		/* release the simple event waiter lock */
++		clear_bit(0, &ctx->ioc_event_lock);
++		break;
++	}
++
++	case RTSER_RTIOC_BREAK_CTL: {
++		int lcr = ((long)arg & RTSER_BREAK_SET) << 6;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		lcr |=
++		    (ctx->config.parity << 3) | (ctx->config.stop_bits << 2) |
++		    ctx->config.data_bits;
++
++		rt_16550_reg_out(mode, base, LCR, lcr);
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++	case RTIOC_PURGE: {
++		int fcr = 0;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		if ((long)arg & RTDM_PURGE_RX_BUFFER) {
++			ctx->in_head = 0;
++			ctx->in_tail = 0;
++			ctx->in_npend = 0;
++			ctx->status = 0;
++			fcr |= FCR_FIFO | FCR_RESET_RX;
++			rt_16550_reg_in(mode, base, RHR);
++		}
++		if ((long)arg & RTDM_PURGE_TX_BUFFER) {
++			ctx->out_head = 0;
++			ctx->out_tail = 0;
++			ctx->out_npend = 0;
++			fcr |= FCR_FIFO | FCR_RESET_TX;
++		}
++		if (fcr) {
++			rt_16550_reg_out(mode, base, FCR, fcr);
++			rt_16550_reg_out(mode, base, FCR,
++					 FCR_FIFO | ctx->config.fifo_depth);
++		}
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++	default:
++		err = -ENOTTY;
++	}
++
++	return err;
++}
++
++ssize_t rt_16550_read(struct rtdm_fd *fd, void *buf, size_t nbyte)
++{
++	struct rt_16550_context *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t read = 0;
++	int pending;
++	int block;
++	int subblock;
++	int in_pos;
++	char *out_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret = -EAGAIN;	/* for non-blocking read */
++	int nonblocking;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
++
++	/* non-blocking is handled separately here */
++	nonblocking = (ctx->config.rx_timeout < 0);
++
++	/* only one reader allowed, stop any further attempts here */
++	if (test_and_set_bit(0, &ctx->in_lock))
++		return -EBUSY;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	while (1) {
++		/* switch on error interrupt - the user is ready to listen */
++		if ((ctx->ier_status & IER_STAT) == 0) {
++			ctx->ier_status |= IER_STAT;
++			rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx),
++					 ctx->base_addr, IER,
++					 ctx->ier_status);
++		}
++
++		if (ctx->status) {
++			if (ctx->status & RTSER_LSR_BREAK_IND)
++				ret = -EPIPE;
++			else
++				ret = -EIO;
++			ctx->saved_errors = ctx->status &
++			    (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR |
++			     RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR);
++			ctx->status = 0;
++			break;
++		}
++
++		pending = ctx->in_npend;
++
++		if (pending > 0) {
++			block = subblock = (pending <= nbyte) ? pending : nbyte;
++			in_pos = ctx->in_head;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (in_pos + subblock > IN_BUFFER_SIZE) {
++				/* Treat the block between head and buffer end
++				   separately. */
++				subblock = IN_BUFFER_SIZE - in_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_to_user
++					    (fd, out_pos,
++					     &ctx->in_buf[in_pos],
++					     subblock) != 0) {
++						ret = -EFAULT;
++						goto break_unlocked;
++					}
++				} else
++					memcpy(out_pos, &ctx->in_buf[in_pos],
++					       subblock);
++
++				read += subblock;
++				out_pos += subblock;
++
++				subblock = block - subblock;
++				in_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_to_user(fd, out_pos,
++						      &ctx->in_buf[in_pos],
++						      subblock) != 0) {
++					ret = -EFAULT;
++					goto break_unlocked;
++				}
++			} else
++				memcpy(out_pos, &ctx->in_buf[in_pos], subblock);
++
++			read += subblock;
++			out_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->in_head =
++			    (ctx->in_head + block) & (IN_BUFFER_SIZE - 1);
++			if ((ctx->in_npend -= block) == 0)
++				ctx->ioc_events &= ~RTSER_EVENT_RXPEND;
++
++			if (nbyte == 0)
++				break; /* All requested bytes read. */
++
++			continue;
++		}
++
++		if (nonblocking)
++			/* ret was set to EAGAIN in case of a real
++			   non-blocking call or contains the error
++			   returned by rtdm_event_wait[_until] */
++			break;
++
++		ctx->in_nwait = nbyte;
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret = rtdm_event_timedwait(&ctx->in_event,
++					   ctx->config.rx_timeout,
++					   &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				   return immediately. */
++				return -EBADF;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			nonblocking = 1;
++			if (ctx->in_npend > 0) {
++				/* Final turn: collect pending bytes
++				   before exit. */
++				continue;
++			}
++
++			ctx->in_nwait = 0;
++			break;
++		}
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++break_unlocked:
++	/* Release the simple reader lock, */
++	clear_bit(0, &ctx->in_lock);
++
++	if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			   (ret == -ETIMEDOUT) || (ret == -EINTR)))
++		ret = read;
++
++	return ret;
++}
++
++ssize_t rt_16550_write(struct rtdm_fd *fd, const void *buf, size_t nbyte)
++{
++	struct rt_16550_context *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t written = 0;
++	int free;
++	int block;
++	int subblock;
++	int out_pos;
++	int lsr;
++	char *in_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.tx_timeout);
++
++	/* Make write operation atomic. */
++	ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.tx_timeout,
++				   &timeout_seq);
++	if (ret)
++		return ret;
++
++	while (nbyte > 0) {
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		free = OUT_BUFFER_SIZE - ctx->out_npend;
++
++		if (free > 0) {
++			block = subblock = (nbyte <= free) ? nbyte : free;
++			out_pos = ctx->out_tail;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (out_pos + subblock > OUT_BUFFER_SIZE) {
++				/* Treat the block between head and buffer
++				   end separately. */
++				subblock = OUT_BUFFER_SIZE - out_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_from_user
++					    (fd,
++					     &ctx->out_buf[out_pos],
++					     in_pos, subblock) != 0) {
++						ret = -EFAULT;
++						break;
++					}
++				} else
++					memcpy(&ctx->out_buf[out_pos], in_pos,
++					       subblock);
++
++				written += subblock;
++				in_pos += subblock;
++
++				subblock = block - subblock;
++				out_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_from_user
++				    (fd, &ctx->out_buf[out_pos],
++				     in_pos, subblock) != 0) {
++					ret = -EFAULT;
++					break;
++				}
++			} else
++				memcpy(&ctx->out_buf[out_pos], in_pos, block);
++
++			written += subblock;
++			in_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->out_tail =
++			    (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1);
++			ctx->out_npend += block;
++
++			lsr = rt_16550_reg_in(rt_16550_io_mode_from_ctx(ctx),
++					      ctx->base_addr, LSR);
++			if (lsr & RTSER_LSR_THR_EMTPY)
++				rt_16550_tx_fill(ctx);
++
++			if (ctx->out_npend > 0 && !(ctx->ier_status & IER_TX)) {
++				/* unmask tx interrupt */
++				ctx->ier_status |= IER_TX;
++				rt_16550_reg_out(rt_16550_io_mode_from_ctx(ctx),
++						 ctx->base_addr, IER,
++						 ctx->ier_status);
++			}
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++			continue;
++		}
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret =
++		    rtdm_event_timedwait(&ctx->out_event,
++					 ctx->config.tx_timeout,
++					 &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				   return immediately. */
++				return -EBADF;
++			}
++			if (ret == -EWOULDBLOCK) {
++				/* Fix error code for non-blocking mode. */
++				ret = -EAGAIN;
++			}
++			break;
++		}
++	}
++
++	rtdm_mutex_unlock(&ctx->out_lock);
++
++	if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			      (ret == -ETIMEDOUT) || (ret == -EINTR)))
++		ret = written;
++
++	return ret;
++}
++
++static struct rtdm_driver uart16550A_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(uart16550A,
++						    RTDM_CLASS_SERIAL,
++						    RTDM_SUBCLASS_16550A,
++						    RTSER_PROFILE_VER),
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.device_count		= MAX_DEVICES,
++	.context_size		= sizeof(struct rt_16550_context),
++	.ops = {
++		.open		= rt_16550_open,
++		.close		= rt_16550_close,
++		.ioctl_rt	= rt_16550_ioctl,
++		.ioctl_nrt	= rt_16550_ioctl,
++		.read_rt	= rt_16550_read,
++		.write_rt	= rt_16550_write,
++	},
++};
++
++void rt_16550_exit(void);
++
++int __init rt_16550_init(void)
++{
++	struct rtdm_device *dev;
++	unsigned long base;
++	char *name;
++	int mode;
++	int err;
++	int i;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	rt_16550_pnp_init();
++	rt_16550_pci_init();
++
++	for (i = 0; i < MAX_DEVICES; i++) {
++		if (!rt_16550_addr_param(i))
++			continue;
++
++		err = -EINVAL;
++		if (!irq[i] || !rt_16550_addr_param_valid(i))
++			goto cleanup_out;
++
++		dev = kmalloc(sizeof(struct rtdm_device) +
++			      RTDM_MAX_DEVNAME_LEN, GFP_KERNEL);
++		err = -ENOMEM;
++		if (!dev)
++			goto cleanup_out;
++
++		dev->driver = &uart16550A_driver;
++		dev->label = "rtser%d";
++		name = (char *)(dev + 1);
++		ksformat(name, RTDM_MAX_DEVNAME_LEN, dev->label, i);
++
++		err = rt_16550_init_io(i, name);
++		if (err)
++			goto kfree_out;
++
++		if (baud_base[i] == 0)
++			baud_base[i] = DEFAULT_BAUD_BASE;
++
++		if (tx_fifo[i] == 0)
++			tx_fifo[i] = DEFAULT_TX_FIFO;
++
++		/* Mask all UART interrupts and clear pending ones. */
++		base = rt_16550_base_addr(i);
++		mode = rt_16550_io_mode(i);
++		rt_16550_reg_out(mode, base, IER, 0);
++		rt_16550_reg_in(mode, base, IIR);
++		rt_16550_reg_in(mode, base, LSR);
++		rt_16550_reg_in(mode, base, RHR);
++		rt_16550_reg_in(mode, base, MSR);
++
++		err = rtdm_dev_register(dev);
++
++		if (err)
++			goto release_io_out;
++
++		device[i] = dev;
++	}
++
++	return 0;
++
++      release_io_out:
++	rt_16550_release_io(i);
++
++      kfree_out:
++	kfree(dev);
++
++      cleanup_out:
++	rt_16550_exit();
++
++	return err;
++}
++
++void rt_16550_exit(void)
++{
++	int i;
++
++	for (i = 0; i < MAX_DEVICES; i++)
++		if (device[i]) {
++			rtdm_dev_unregister(device[i]);
++			rt_16550_release_io(i);
++			kfree(device[i]);
++		}
++
++	rt_16550_pci_cleanup();
++	rt_16550_pnp_cleanup();
++}
++
++module_init(rt_16550_init);
++module_exit(rt_16550_exit);
+--- linux/drivers/xenomai/serial/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/Makefile	2021-04-07 16:01:26.519635178 +0800
+@@ -0,0 +1,8 @@
++
++obj-$(CONFIG_XENO_DRIVERS_16550A) += xeno_16550A.o
++obj-$(CONFIG_XENO_DRIVERS_MPC52XX_UART) += xeno_mpc52xx_uart.o
++obj-$(CONFIG_XENO_DRIVERS_IMX_UART) += xeno_imx_uart.o
++
++xeno_16550A-y := 16550A.o
++xeno_mpc52xx_uart-y := mpc52xx_uart.o
++xeno_imx_uart-y := rt_imx_uart.o
+--- linux/drivers/xenomai/serial/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/Kconfig	2021-04-07 16:01:26.514635185 +0800
+@@ -0,0 +1,79 @@
++menu "Serial drivers"
++
++config XENO_DRIVERS_16550A
++	tristate "16550A UART driver"
++	help
++	Real-time UART driver for 16550A compatible controllers. See
++	doc/txt/16550A-driver.txt for more details.
++
++choice
++	prompt "Hardware access mode"
++	depends on XENO_DRIVERS_16550A
++	default XENO_DRIVERS_16550A_PIO
++
++config XENO_DRIVERS_16550A_PIO
++	bool "Port-based I/O"
++	help
++	Hardware access only via I/O ports. Use module parameter
++	"io=<port>[,<port>[,...]]" to specify the base port of a device.
++
++config XENO_DRIVERS_16550A_MMIO
++	bool "Memory-mapped I/O"
++	help
++	Hardware access only via memory mapping. Use module paramter
++	"mem=<addr>[,<addr>[,...]]" to specify the physical base address of
++	a device.
++
++config XENO_DRIVERS_16550A_ANY
++	bool "Any access mode"
++	help
++	Decide at module load-time (or via kernel parameter) which access
++	mode to use for which device. This mode is useful when devices of
++	both types can be present in a system, also at the same time.
++
++	Both "io" and "mem" module parameters are available, but always only
++	one of them can be applied on a particular device. Use, e.g.,
++	"io=0x3f8,0 mem=0,0xe0000000" to address device 1 via IO base port
++	0x3f8 and device 2 via physical base address 0xe0000000.
++
++endchoice
++
++config XENO_DRIVERS_16550A_PCI
++	depends on PCI && (XENO_DRIVERS_16550A_PIO || XENO_DRIVERS_16550A_ANY)
++	bool "PCI board support"
++	default n
++	help
++
++	This option activates support for PCI serial boards.
++
++config XENO_DRIVERS_16550A_PCI_MOXA
++	depends on XENO_DRIVERS_16550A_PCI
++	bool "Moxa PCI boards"
++	default n
++	help
++
++	This option activates support for the following Moxa boards:
++	PCI Serial Boards:
++	  C104H/PCI, C168H/PCI
++	  CP-114, CP-132
++	Universal PCI Serial Boards:
++	  CP-102U, CP-102UL, CP-104U
++	  CP-112UL, CP-114UL, CP-118U
++	  CP-132U, CP-134U, CP-138U
++	  CP-168U
++
++config XENO_DRIVERS_MPC52XX_UART
++	depends on PPC_MPC52xx
++	tristate "MPC52xx PSC UART driver"
++	help
++	Real-time UART driver for the PSC on the MPC5200 processor.
++
++config XENO_DRIVERS_IMX_UART
++	depends on ARCH_IMX || ARCH_MXC
++	tristate "RT IMX UART driver"
++	select RATIONAL
++	help
++	Real-time UART driver for the Freescale Semiconductor MXC Internal
++	UART compatible controllers.
++
++endmenu
+--- linux/drivers/xenomai/serial/16550A_pci.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/16550A_pci.h	2021-04-07 16:01:26.510635191 +0800
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (C) 2006-2007 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2011 Stefan Kisdaroczi <kisda@hispeed.ch>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI)
++
++#include <linux/pci.h>
++
++struct rt_16550_pci_board {
++	char *name;
++	resource_size_t resource_base_addr;
++	unsigned int nports;
++	unsigned int port_ofs;
++	unsigned long irqtype;
++	unsigned int baud_base;
++	int tx_fifo;
++};
++
++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI_MOXA)
++
++#define PCI_DEVICE_ID_CP112UL	0x1120
++#define PCI_DEVICE_ID_CP114UL	0x1143
++#define PCI_DEVICE_ID_CP138U	0x1380
++
++static const struct rt_16550_pci_board rt_16550_moxa_c104 = {
++	.name = "Moxa C104H/PCI",
++	.resource_base_addr = 2,
++	.nports = 4,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_c168 = {
++	.name = "Moxa C168H/PCI",
++	.resource_base_addr = 2,
++	.nports = 8,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp114 = {
++	.name = "Moxa CP-114",
++	.resource_base_addr = 2,
++	.nports = 4,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp132 = {
++	.name = "Moxa CP-132",
++	.resource_base_addr = 2,
++	.nports = 2,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp102u = {
++	.name = "Moxa CP-102U",
++	.resource_base_addr = 2,
++	.nports = 2,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp102ul = {
++	.name = "Moxa CP-102UL",
++	.resource_base_addr = 2,
++	.nports = 2,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp104u = {
++	.name = "Moxa CP-104U",
++	.resource_base_addr = 2,
++	.nports = 4,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp112ul = {
++	.name = "Moxa CP-112UL",
++	.resource_base_addr = 2,
++	.nports = 2,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp114ul = {
++	.name = "Moxa CP-114UL",
++	.resource_base_addr = 2,
++	.nports = 4,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp118u = {
++	.name = "Moxa CP-118U",
++	.resource_base_addr = 2,
++	.nports = 8,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp132u = {
++	.name = "Moxa CP-132U",
++	.resource_base_addr = 2,
++	.nports = 2,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp134u = {
++	.name = "Moxa CP-134U",
++	.resource_base_addr = 2,
++	.nports = 4,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp138u = {
++	.name = "Moxa CP-138U",
++	.resource_base_addr = 2,
++	.nports = 8,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++
++static const struct rt_16550_pci_board rt_16550_moxa_cp168u = {
++	.name = "Moxa CP-168U",
++	.resource_base_addr = 2,
++	.nports = 8,
++	.port_ofs = 8,
++	.baud_base = 921600,
++	.tx_fifo = 16,
++	.irqtype = RTDM_IRQTYPE_SHARED,
++};
++#endif
++
++const struct pci_device_id rt_16550_pci_table[] = {
++#if defined(CONFIG_XENO_DRIVERS_16550A_PCI_MOXA)
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_C104),
++	 .driver_data = (unsigned long)&rt_16550_moxa_c104},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_C168),
++	 .driver_data = (unsigned long)&rt_16550_moxa_c168},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP114),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp114},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP132),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp132},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP102U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp102u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP102UL),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp102ul},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP104U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp104u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP112UL),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp112ul},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP114UL),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp114ul},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP118U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp118u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP132U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp132u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP134U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp134u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_CP138U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp138u},
++	{PCI_VDEVICE(MOXA, PCI_DEVICE_ID_MOXA_CP168U),
++	 .driver_data = (unsigned long)&rt_16550_moxa_cp168u},
++#endif
++	{ }
++};
++
++static int rt_16550_pci_probe(struct pci_dev *pdev,
++			      const struct pci_device_id *ent)
++{
++	struct rt_16550_pci_board *board;
++	int err;
++	int i;
++	int port = 0;
++	int base_addr;
++	int max_devices = 0;
++
++	if (!ent->driver_data)
++		return -ENODEV;
++
++	board = (struct rt_16550_pci_board *)ent->driver_data;
++
++	for (i = 0; i < MAX_DEVICES; i++)
++		if (!rt_16550_addr_param(i))
++			max_devices++;
++
++	if (board->nports > max_devices)
++		return -ENODEV;
++
++	if ((err = pci_enable_device(pdev)))
++		return err;
++
++	base_addr = pci_resource_start(pdev, board->resource_base_addr);
++
++	for (i = 0; i < MAX_DEVICES; i++) {
++		if ((port < board->nports) && (!rt_16550_addr_param(i))) {
++			io[i] = base_addr + port * board->port_ofs;
++			irq[i] = pdev->irq;
++			irqtype[i] = board->irqtype;
++			baud_base[i] = board->baud_base;
++			tx_fifo[i] = board->tx_fifo;
++			port++;
++		}
++	}
++
++	return 0;
++}
++
++static void rt_16550_pci_remove(struct pci_dev *pdev) {
++	pci_disable_device( pdev );
++};
++
++static struct pci_driver rt_16550_pci_driver = {
++	.name     = RT_16550_DRIVER_NAME,
++	.id_table = rt_16550_pci_table,
++	.probe    = rt_16550_pci_probe,
++	.remove   = rt_16550_pci_remove
++};
++
++static int pci_registered;
++
++static inline void rt_16550_pci_init(void)
++{
++	if (pci_register_driver(&rt_16550_pci_driver) == 0)
++		pci_registered = 1;
++}
++
++static inline void rt_16550_pci_cleanup(void)
++{
++	if (pci_registered)
++		pci_unregister_driver(&rt_16550_pci_driver);
++}
++
++#else /* Linux < 2.6.0 || !CONFIG_PCI || !(..._16550A_PCI */
++
++#define rt_16550_pci_init()	do { } while (0)
++#define rt_16550_pci_cleanup()	do { } while (0)
++
++#endif /* Linux < 2.6.0 || !CONFIG_PCI || !(..._16550A_PCI */
+--- linux/drivers/xenomai/serial/mpc52xx_uart.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/mpc52xx_uart.c	2021-04-07 16:01:26.505635198 +0800
+@@ -0,0 +1,1438 @@
++/*
++ * Copyright (C) 2011 Wolfgang Grandegger <wg@denx.de>.
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/of.h>
++#include <linux/slab.h>
++#include <linux/of_platform.h>
++#include <linux/io.h>
++
++#include <asm/mpc52xx.h>
++#include <asm/mpc52xx_psc.h>
++
++#include <rtdm/serial.h>
++#include <rtdm/driver.h>
++
++MODULE_DESCRIPTION("RTDM-based driver for MPC52xx UARTs");
++MODULE_AUTHOR("Wolfgang Grandegger <wg@denx.de>");
++MODULE_VERSION("1.0.0");
++MODULE_LICENSE("GPL");
++
++#define RT_MPC52XX_UART_DRVNAM	"xeno_mpc52xx_uart"
++
++#define IN_BUFFER_SIZE		512
++#define OUT_BUFFER_SIZE		512
++
++#define PARITY_MASK		0x03
++#define DATA_BITS_MASK		0x03
++#define STOP_BITS_MASK		0x01
++#define FIFO_MASK		0xC0
++#define EVENT_MASK		0x0F
++
++
++struct rt_mpc52xx_uart_port {
++	const struct device *dev;
++	struct mpc52xx_psc __iomem *psc;
++	struct mpc52xx_psc_fifo __iomem *fifo;
++	unsigned int uartclk;
++	int irq;
++	int num;
++};
++
++struct rt_mpc52xx_uart_ctx {
++	struct rtser_config config;	/* current device configuration */
++
++	rtdm_irq_t irq_handle;		/* device IRQ handle */
++	rtdm_lock_t lock;		/* lock to protect context struct */
++
++	int in_head;			/* RX ring buffer, head pointer */
++	int in_tail;			/* RX ring buffer, tail pointer */
++	size_t in_npend;		/* pending bytes in RX ring */
++	int in_nwait;			/* bytes the user waits for */
++	rtdm_event_t in_event;		/* raised to unblock reader */
++	char in_buf[IN_BUFFER_SIZE];	/* RX ring buffer */
++	volatile unsigned long in_lock;	/* single-reader lock */
++	uint64_t *in_history;		/* RX timestamp buffer */
++
++	int out_head;			/* TX ring buffer, head pointer */
++	int out_tail;			/* TX ring buffer, tail pointer */
++	size_t out_npend;		/* pending bytes in TX ring */
++	rtdm_event_t out_event;		/* raised to unblock writer */
++	char out_buf[OUT_BUFFER_SIZE];	/* TX ring buffer */
++	rtdm_mutex_t out_lock;		/* single-writer mutex */
++
++	uint64_t last_timestamp;	/* timestamp of last event */
++	int ioc_events;			/* recorded events */
++	rtdm_event_t ioc_event;		/* raised to unblock event waiter */
++	volatile unsigned long ioc_event_lock;	/* single-waiter lock */
++
++
++	int mcr_status;			/* emulated MCR cache */
++	int status;			/* cache for LSR + soft-states */
++	int saved_errors;		/* error cache for RTIOC_GET_STATUS */
++
++	unsigned int imr_status;	/* interrupt mask register cache */
++	int tx_empty;			/* shift register empty flag */
++
++	struct rt_mpc52xx_uart_port *port; /* Port related data */
++};
++
++static const struct rtser_config default_config = {
++	.config_mask = 0xFFFF,
++	.baud_rate = RTSER_DEF_BAUD,
++	.parity = RTSER_DEF_PARITY,
++	.data_bits = RTSER_DEF_BITS,
++	.stop_bits = RTSER_DEF_STOPB,
++	.handshake = RTSER_DEF_HAND,
++	.fifo_depth = RTSER_DEF_FIFO_DEPTH,
++	.rx_timeout = RTSER_DEF_TIMEOUT,
++	.tx_timeout = RTSER_DEF_TIMEOUT,
++	.event_timeout = RTSER_DEF_TIMEOUT,
++	.timestamp_history = RTSER_DEF_TIMESTAMP_HISTORY,
++	.event_mask = RTSER_DEF_EVENT_MASK,
++	.rs485 = RTSER_DEF_RS485,
++};
++
++/* lookup table for matching device nodes to index numbers */
++static struct device_node *rt_mpc52xx_uart_nodes[MPC52xx_PSC_MAXNUM];
++
++static inline void psc_fifo_init(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	out_8(&ctx->port->fifo->rfcntl, 0x00);
++	out_be16(&ctx->port->fifo->rfalarm, 0x1ff);
++	out_8(&ctx->port->fifo->tfcntl, 0x07);
++	out_be16(&ctx->port->fifo->tfalarm, 0x80);
++}
++
++static inline int psc_raw_rx_rdy(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_be16(&ctx->port->psc->mpc52xx_psc_status) &
++		MPC52xx_PSC_SR_RXRDY;
++}
++
++static inline int psc_raw_tx_rdy(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_be16(&ctx->port->psc->mpc52xx_psc_status) &
++		MPC52xx_PSC_SR_TXRDY;
++}
++
++static inline int psc_rx_rdy(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_be16(&ctx->port->psc->mpc52xx_psc_isr) &
++		ctx->imr_status & MPC52xx_PSC_IMR_RXRDY;
++}
++
++static int psc_tx_rdy(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_be16(&ctx->port->psc->mpc52xx_psc_isr) &
++		ctx->imr_status & MPC52xx_PSC_IMR_TXRDY;
++}
++
++static inline int psc_tx_empty(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_be16(&ctx->port->psc->mpc52xx_psc_status) &
++		MPC52xx_PSC_SR_TXEMP;
++}
++
++static inline void psc_start_tx(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	ctx->imr_status |= MPC52xx_PSC_IMR_TXRDY;
++	out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static inline void psc_stop_tx(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	ctx->imr_status &= ~MPC52xx_PSC_IMR_TXRDY;
++	out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static inline void psc_stop_rx(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	ctx->imr_status &= ~MPC52xx_PSC_IMR_RXRDY;
++	out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static inline void psc_write_char(struct rt_mpc52xx_uart_ctx *ctx,
++				  unsigned char c)
++{
++	out_8(&ctx->port->psc->mpc52xx_psc_buffer_8, c);
++}
++
++static inline unsigned char psc_read_char(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	return in_8(&ctx->port->psc->mpc52xx_psc_buffer_8);
++}
++
++static inline void psc_disable_ints(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	ctx->imr_status = 0;
++	out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static void psc_set_mcr(struct rt_mpc52xx_uart_ctx *ctx,
++			unsigned int mcr)
++{
++	if (mcr & RTSER_MCR_RTS)
++		out_8(&ctx->port->psc->op1, MPC52xx_PSC_OP_RTS);
++	else
++		out_8(&ctx->port->psc->op0, MPC52xx_PSC_OP_RTS);
++}
++
++/* FIXME: status interrupts not yet handled properly */
++static unsigned int psc_get_msr(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	unsigned int msr = RTSER_MSR_DSR;
++	u8 status = in_8(&ctx->port->psc->mpc52xx_psc_ipcr);
++
++	if (!(status & MPC52xx_PSC_CTS))
++		msr |= RTSER_MSR_CTS;
++	if (!(status & MPC52xx_PSC_DCD))
++		msr |= RTSER_MSR_DCD;
++
++	return msr;
++}
++
++static void psc_enable_ms(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	struct mpc52xx_psc *psc = ctx->port->psc;
++
++	/* clear D_*-bits by reading them */
++	in_8(&psc->mpc52xx_psc_ipcr);
++	/* enable CTS and DCD as IPC interrupts */
++	out_8(&psc->mpc52xx_psc_acr, MPC52xx_PSC_IEC_CTS | MPC52xx_PSC_IEC_DCD);
++
++	ctx->imr_status |= MPC52xx_PSC_IMR_IPC;
++	out_be16(&psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static void psc_disable_ms(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	struct mpc52xx_psc *psc = ctx->port->psc;
++
++	/* disable CTS and DCD as IPC interrupts */
++	out_8(&psc->mpc52xx_psc_acr, 0);
++
++	ctx->imr_status &= ~MPC52xx_PSC_IMR_IPC;
++	out_be16(&psc->mpc52xx_psc_imr, ctx->imr_status);
++}
++
++static struct of_device_id mpc5200_gpio_ids[] = {
++	{ .compatible = "fsl,mpc5200-gpio", },
++	{ .compatible = "mpc5200-gpio", },
++	{}
++};
++
++static void rt_mpc52xx_uart_init_hw(struct rt_mpc52xx_uart_port *port)
++{
++	struct mpc52xx_gpio __iomem *gpio;
++	struct device_node *gpio_np;
++	u32 port_config;
++
++	if (port->num == 6) {
++		gpio_np = of_find_matching_node(NULL, mpc5200_gpio_ids);
++		gpio = of_iomap(gpio_np, 0);
++		of_node_put(gpio_np);
++		if (!gpio) {
++			dev_err(port->dev, "PSC%d port_config: "
++				"couldn't map gpio ids\n", port->num);
++			return;
++		}
++		port_config = in_be32(&gpio->port_config);
++		port_config &= 0xFF0FFFFF; /* port config for PSC6 */
++		port_config |= 0x00500000;
++		dev_dbg(port->dev, "PSC%d port_config: old:%x new:%x\n",
++			port->num, in_be32(&gpio->port_config), port_config);
++		out_be32(&gpio->port_config, port_config);
++		iounmap(gpio);
++	}
++}
++
++static inline void rt_mpc52xx_uart_put_char(struct rt_mpc52xx_uart_ctx *ctx,
++					    uint64_t *timestamp,
++					    unsigned char ch)
++{
++	ctx->in_buf[ctx->in_tail] = ch;
++	if (ctx->in_history)
++		ctx->in_history[ctx->in_tail] = *timestamp;
++	ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1);
++
++	if (++ctx->in_npend > IN_BUFFER_SIZE) {
++		ctx->status |= RTSER_SOFT_OVERRUN_ERR;
++		ctx->in_npend--;
++	}
++}
++
++static inline int rt_mpc52xx_uart_rx_interrupt(struct rt_mpc52xx_uart_ctx *ctx,
++					       uint64_t *timestamp)
++{
++	int rbytes = 0;
++	int psc_status;
++
++	psc_status = in_be16(&ctx->port->psc->mpc52xx_psc_status);
++	while (psc_status & MPC52xx_PSC_SR_RXRDY) {
++		/* read input character */
++		rt_mpc52xx_uart_put_char(ctx, timestamp, psc_read_char(ctx));
++		rbytes++;
++
++		/* save new errors */
++		if (psc_status & (MPC52xx_PSC_SR_OE | MPC52xx_PSC_SR_PE |
++				  MPC52xx_PSC_SR_FE | MPC52xx_PSC_SR_RB)) {
++			if (psc_status & MPC52xx_PSC_SR_PE)
++				ctx->status |= RTSER_LSR_PARITY_ERR;
++			if (psc_status & MPC52xx_PSC_SR_FE)
++				ctx->status |= RTSER_LSR_FRAMING_ERR;
++			if (psc_status & MPC52xx_PSC_SR_RB)
++				ctx->status |= RTSER_LSR_BREAK_IND;
++
++			/*
++			 * Overrun is special, since it's reported
++			 * immediately, and doesn't affect the current
++			 * character.
++			 */
++			if (psc_status & MPC52xx_PSC_SR_OE) {
++				ctx->status |= RTSER_LSR_OVERRUN_ERR;
++				rt_mpc52xx_uart_put_char(ctx, timestamp, 0);
++				rbytes++;
++			}
++
++			/* Clear error condition */
++			out_8(&ctx->port->psc->command,
++			      MPC52xx_PSC_RST_ERR_STAT);
++		}
++
++		psc_status = in_be16(&ctx->port->psc->mpc52xx_psc_status);
++	};
++
++	return rbytes;
++}
++
++static inline int rt_mpc52xx_uart_tx_interrupt(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	while (psc_raw_tx_rdy(ctx) && (ctx->out_npend > 0)) {
++		if (ctx->config.rs485 &&
++		    (ctx->mcr_status & RTSER_MCR_RTS) == 0) {
++			/* switch RTS */
++			ctx->mcr_status |= RTSER_MCR_RTS;
++			dev_dbg(ctx->port->dev, "Set RTS, mcr_status=%#x\n",
++				ctx->mcr_status);
++			psc_set_mcr(ctx, ctx->mcr_status);
++		}
++		if (ctx->config.rs485 ||
++		    ((ctx->config.event_mask & RTSER_EVENT_TXEMPTY) &&
++		     (ctx->imr_status & MPC52xx_PSC_IMR_TXEMP) == 0)) {
++			/* enable tx-empty interrupt */
++			ctx->imr_status |= MPC52xx_PSC_IMR_TXEMP;
++			dev_dbg(ctx->port->dev, "Enable TXEMP interrupt, "
++				"imr_status=%#x\n", ctx->imr_status);
++			out_be16(&ctx->port->psc->mpc52xx_psc_imr,
++				 ctx->imr_status);
++		}
++
++		psc_write_char(ctx, ctx->out_buf[ctx->out_head++]);
++		ctx->out_head &= OUT_BUFFER_SIZE - 1;
++		ctx->out_npend--;
++	}
++
++	return ctx->out_npend;
++}
++
++static int rt_mpc52xx_uart_interrupt(rtdm_irq_t *irq_context)
++{
++	struct rt_mpc52xx_uart_ctx *ctx;
++	uint64_t timestamp = rtdm_clock_read();
++	int rbytes = 0;
++	int events = 0;
++	int ret = RTDM_IRQ_NONE;
++	int goon = 1;
++	int n;
++
++	ctx = rtdm_irq_get_arg(irq_context, struct rt_mpc52xx_uart_ctx);
++
++	rtdm_lock_get(&ctx->lock);
++
++	while (goon) {
++		goon = 0;
++		if (psc_rx_rdy(ctx)) {
++			dev_dbg(ctx->port->dev, "RX interrupt\n");
++			n = rt_mpc52xx_uart_rx_interrupt(ctx, &timestamp);
++			if (n) {
++				rbytes += n;
++				events |= RTSER_EVENT_RXPEND;
++			}
++		}
++		if (psc_tx_rdy(ctx))
++			goon |= rt_mpc52xx_uart_tx_interrupt(ctx);
++
++		if (psc_tx_empty(ctx)) {
++			if (ctx->config.rs485 &&
++			    (ctx->mcr_status & RTSER_MCR_RTS)) {
++				/* reset RTS */
++				ctx->mcr_status &= ~RTSER_MCR_RTS;
++				dev_dbg(ctx->port->dev, "Reset RTS, "
++					"mcr_status=%#x\n", ctx->mcr_status);
++				psc_set_mcr(ctx, ctx->mcr_status);
++			}
++			/* disable tx-empty interrupt */
++			ctx->imr_status &= ~MPC52xx_PSC_IMR_TXEMP;
++			dev_dbg(ctx->port->dev, "Disable TXEMP interrupt, "
++				"imr_status=%#x\n", ctx->imr_status);
++			out_be16(&ctx->port->psc->mpc52xx_psc_imr,
++				 ctx->imr_status);
++
++			events |= RTSER_EVENT_TXEMPTY;
++			ctx->tx_empty = 1;
++		}
++
++		if (ctx->config.event_mask &
++		    (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO)) {
++			u8 status = in_8(&ctx->port->psc->mpc52xx_psc_ipcr);
++
++			if (status & MPC52xx_PSC_D_DCD)
++				events |= (status & MPC52xx_PSC_DCD) ?
++					RTSER_EVENT_MODEMLO :
++					RTSER_EVENT_MODEMHI;
++			if (status & MPC52xx_PSC_D_CTS)
++				events |= (status & MPC52xx_PSC_CTS) ?
++					RTSER_EVENT_MODEMLO :
++					RTSER_EVENT_MODEMHI;
++			dev_dbg(ctx->port->dev, "Modem line changed, "
++				"events=%#x\n", events);
++		}
++
++		ret = RTDM_IRQ_HANDLED;
++	}
++
++	if (ctx->in_nwait > 0) {
++		if ((ctx->in_nwait <= rbytes) || ctx->status) {
++			ctx->in_nwait = 0;
++			rtdm_event_signal(&ctx->in_event);
++		} else
++			ctx->in_nwait -= rbytes;
++	}
++
++	if (ctx->status)
++		events |= RTSER_EVENT_ERRPEND;
++
++	if (events & ctx->config.event_mask) {
++		int old_events = ctx->ioc_events;
++
++		ctx->last_timestamp = timestamp;
++		ctx->ioc_events = events;
++
++		if (!old_events)
++			rtdm_event_signal(&ctx->ioc_event);
++	}
++
++	if ((ctx->imr_status & MPC52xx_PSC_IMR_TXRDY) &&
++	    (ctx->out_npend == 0)) {
++		psc_stop_tx(ctx);
++		rtdm_event_signal(&ctx->out_event);
++	}
++
++	rtdm_lock_put(&ctx->lock);
++
++	return ret;
++}
++
++
++static int rt_mpc52xx_uart_set_config(struct rt_mpc52xx_uart_ctx *ctx,
++				      const struct rtser_config *config,
++				      uint64_t **in_history_ptr)
++{
++	rtdm_lockctx_t lock_ctx;
++	int err = 0;
++
++	/* make line configuration atomic and IRQ-safe */
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	if (config->config_mask & RTSER_SET_BAUD)
++		ctx->config.baud_rate = config->baud_rate;
++	if (config->config_mask & RTSER_SET_PARITY)
++		ctx->config.parity = config->parity & PARITY_MASK;
++	if (config->config_mask & RTSER_SET_DATA_BITS)
++		ctx->config.data_bits = config->data_bits & DATA_BITS_MASK;
++	if (config->config_mask & RTSER_SET_STOP_BITS)
++		ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK;
++	if (config->config_mask & RTSER_SET_HANDSHAKE)
++		ctx->config.handshake = config->handshake;
++
++	if (config->config_mask & (RTSER_SET_PARITY |
++				   RTSER_SET_DATA_BITS | RTSER_SET_STOP_BITS |
++				   RTSER_SET_BAUD | RTSER_SET_HANDSHAKE)) {
++		struct mpc52xx_psc *psc = ctx->port->psc;
++		unsigned char mr1 = 0, mr2 = 0;
++		unsigned int divisor;
++		u16 prescaler;
++
++		switch (ctx->config.data_bits) {
++		case RTSER_5_BITS:
++			mr1 |= MPC52xx_PSC_MODE_5_BITS;
++			break;
++		case RTSER_6_BITS:
++			mr1 |= MPC52xx_PSC_MODE_6_BITS;
++			break;
++		case RTSER_7_BITS:
++			mr1 |= MPC52xx_PSC_MODE_7_BITS;
++			break;
++		case RTSER_8_BITS:
++		default:
++			mr1 |= MPC52xx_PSC_MODE_8_BITS;
++			break;
++		}
++
++		switch (ctx->config.parity) {
++		case RTSER_ODD_PARITY:
++			mr1 |= MPC52xx_PSC_MODE_PARODD;
++			break;
++		case RTSER_EVEN_PARITY:
++			mr1 |= MPC52xx_PSC_MODE_PAREVEN;
++			break;
++		case RTSER_NO_PARITY:
++		default:
++			mr1 |= MPC52xx_PSC_MODE_PARNONE;
++			break;
++		}
++
++		if (ctx->config.stop_bits == RTSER_2_STOPB)
++			mr2 |= (ctx->config.data_bits == RTSER_5_BITS) ?
++				MPC52xx_PSC_MODE_ONE_STOP_5_BITS :
++				MPC52xx_PSC_MODE_TWO_STOP;
++		else
++			mr2 |= MPC52xx_PSC_MODE_ONE_STOP;
++
++		if (ctx->config.handshake == RTSER_RTSCTS_HAND) {
++			mr1 |= MPC52xx_PSC_MODE_RXRTS;
++			mr2 |= MPC52xx_PSC_MODE_TXCTS;
++		} else if (config->config_mask & RTSER_SET_HANDSHAKE) {
++			ctx->mcr_status =
++				RTSER_MCR_DTR | RTSER_MCR_RTS | RTSER_MCR_OUT2;
++			psc_set_mcr(ctx, ctx->mcr_status);
++		}
++
++		/* Reset the TX & RX */
++		out_8(&psc->command, MPC52xx_PSC_RST_RX);
++		out_8(&psc->command, MPC52xx_PSC_RST_TX);
++
++		/* Send new mode settings */
++		out_8(&psc->command, MPC52xx_PSC_SEL_MODE_REG_1);
++		out_8(&psc->mode, mr1);
++		out_8(&psc->mode, mr2);
++
++		/* Set baudrate */
++		divisor = (ctx->port->uartclk + 16 * ctx->config.baud_rate) /
++			(32 * ctx->config.baud_rate);
++		prescaler = 0xdd00;
++		out_be16(&psc->mpc52xx_psc_clock_select, prescaler);
++		out_8(&psc->ctur, divisor >> 8);
++		out_8(&psc->ctlr, divisor & 0xff);
++
++		dev_info(ctx->port->dev,
++			 "mr1=%#x mr2=%#x baud=%d divisor=%d prescaler=%x\n",
++			 mr1, mr2, ctx->config.baud_rate, divisor, prescaler);
++
++		/* Reenable TX & RX */
++		out_8(&psc->command, MPC52xx_PSC_TX_ENABLE);
++		out_8(&psc->command, MPC52xx_PSC_RX_ENABLE);
++
++		/* Enable RX */
++		ctx->imr_status |= MPC52xx_PSC_IMR_RXRDY;
++		out_be16(&ctx->port->psc->mpc52xx_psc_imr, ctx->imr_status);
++
++		ctx->status = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++
++	}
++
++	if (config->config_mask & RTSER_SET_RS485) {
++		ctx->config.rs485 = config->rs485;
++		if (config->rs485) {
++			/* reset RTS */
++			ctx->mcr_status &= ~RTSER_MCR_RTS;
++			dev_dbg(ctx->port->dev, "Reset RTS, mcr_status=%#x\n",
++				ctx->mcr_status);
++			psc_set_mcr(ctx, ctx->mcr_status);
++		}
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	/* Timeout manipulation is not atomic. The user is supposed to take
++	   care not to use and change timeouts at the same time. */
++	if (config->config_mask & RTSER_SET_TIMEOUT_RX)
++		ctx->config.rx_timeout = config->rx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_TX)
++		ctx->config.tx_timeout = config->tx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_EVENT)
++		ctx->config.event_timeout = config->event_timeout;
++
++	if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++		/* change timestamp history atomically */
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) {
++			if (!ctx->in_history) {
++				ctx->in_history = *in_history_ptr;
++				*in_history_ptr = NULL;
++				if (!ctx->in_history)
++					err = -ENOMEM;
++			}
++		} else {
++			*in_history_ptr = ctx->in_history;
++			ctx->in_history = NULL;
++		}
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++	}
++
++	if (config->config_mask & RTSER_SET_EVENT_MASK) {
++		/* change event mask atomically */
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		ctx->config.event_mask = config->event_mask & EVENT_MASK;
++		ctx->ioc_events = 0;
++
++		if ((config->event_mask & RTSER_EVENT_RXPEND) &&
++		    (ctx->in_npend > 0))
++			ctx->ioc_events |= RTSER_EVENT_RXPEND;
++
++		if ((config->event_mask & RTSER_EVENT_ERRPEND) &&
++		    ctx->status)
++			ctx->ioc_events |= RTSER_EVENT_ERRPEND;
++
++		if ((config->event_mask & RTSER_EVENT_TXEMPTY) &&
++		    !ctx->out_npend && ctx->tx_empty)
++			ctx->ioc_events |= RTSER_EVENT_TXEMPTY;
++
++		if (config->event_mask &
++		    (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO))
++			psc_enable_ms(ctx);
++		else
++			psc_disable_ms(ctx);
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++	}
++
++	return err;
++}
++
++void rt_mpc52xx_uart_cleanup_ctx(struct rt_mpc52xx_uart_ctx *ctx)
++{
++	rtdm_event_destroy(&ctx->in_event);
++	rtdm_event_destroy(&ctx->out_event);
++	rtdm_event_destroy(&ctx->ioc_event);
++	rtdm_mutex_destroy(&ctx->out_lock);
++}
++
++static int rt_mpc52xx_uart_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rt_mpc52xx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	uint64_t *dummy;
++	int err;
++
++	ctx = rtdm_fd_to_private(fd);
++	ctx->port = (struct rt_mpc52xx_uart_port *)rtdm_fd_device(fd)->device_data;
++
++	/* IPC initialisation - cannot fail with used parameters */
++	rtdm_lock_init(&ctx->lock);
++	rtdm_event_init(&ctx->in_event, 0);
++	rtdm_event_init(&ctx->out_event, 0);
++	rtdm_event_init(&ctx->ioc_event, 0);
++	rtdm_mutex_init(&ctx->out_lock);
++
++	ctx->in_head = 0;
++	ctx->in_tail = 0;
++	ctx->in_npend = 0;
++	ctx->in_nwait = 0;
++	ctx->in_lock = 0;
++	ctx->in_history = NULL;
++
++	ctx->out_head = 0;
++	ctx->out_tail = 0;
++	ctx->out_npend = 0;
++
++	ctx->ioc_events = 0;
++	ctx->ioc_event_lock = 0;
++	ctx->status = 0;
++	ctx->saved_errors = 0;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	psc_disable_ints(ctx);
++
++	/* Reset/activate the port, clear and enable interrupts */
++	out_8(&ctx->port->psc->command, MPC52xx_PSC_RST_RX);
++	out_8(&ctx->port->psc->command, MPC52xx_PSC_RST_TX);
++
++	out_be32(&ctx->port->psc->sicr, 0);	/* UART mode DCD ignored */
++
++	psc_fifo_init(ctx);
++
++	out_8(&ctx->port->psc->command, MPC52xx_PSC_TX_ENABLE);
++	out_8(&ctx->port->psc->command, MPC52xx_PSC_RX_ENABLE);
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	rt_mpc52xx_uart_set_config(ctx, &default_config, &dummy);
++
++	err = rtdm_irq_request(&ctx->irq_handle, ctx->port->irq,
++			       rt_mpc52xx_uart_interrupt, 0,
++			       rtdm_fd_device(fd)->name, ctx);
++	if (err) {
++		psc_set_mcr(ctx, 0);
++		rt_mpc52xx_uart_cleanup_ctx(ctx);
++
++		return err;
++	}
++
++	return 0;
++}
++
++static void rt_mpc52xx_uart_close(struct rtdm_fd *fd)
++{
++	struct rt_mpc52xx_uart_ctx *ctx;
++	uint64_t *in_history;
++	rtdm_lockctx_t lock_ctx;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	/* reset DTR and RTS */
++	psc_set_mcr(ctx, 0);
++
++	psc_disable_ints(ctx);
++
++	in_history = ctx->in_history;
++	ctx->in_history = NULL;
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	rtdm_irq_free(&ctx->irq_handle);
++
++	rt_mpc52xx_uart_cleanup_ctx(ctx);
++
++	kfree(in_history);
++}
++
++static int rt_mpc52xx_uart_ioctl(struct rtdm_fd *fd,
++				 unsigned int request, void *arg)
++{
++	rtdm_lockctx_t lock_ctx;
++	struct rt_mpc52xx_uart_ctx *ctx;
++	int err = 0;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	switch (request) {
++	case RTSER_RTIOC_GET_CONFIG:
++		if (rtdm_fd_is_user(fd))
++			err = rtdm_safe_copy_to_user(fd, arg,
++						     &ctx->config,
++						     sizeof(struct
++							    rtser_config));
++		else
++			memcpy(arg, &ctx->config, sizeof(struct rtser_config));
++		break;
++
++	case RTSER_RTIOC_SET_CONFIG: {
++		struct rtser_config *config;
++		struct rtser_config config_buf;
++		uint64_t *hist_buf = NULL;
++
++		config = (struct rtser_config *)arg;
++
++		if (rtdm_fd_is_user(fd)) {
++			err = rtdm_safe_copy_from_user(fd, &config_buf,
++						       arg,
++						       sizeof(struct
++							      rtser_config));
++			if (err)
++				return err;
++
++			config = &config_buf;
++		}
++
++		if ((config->config_mask & RTSER_SET_BAUD) &&
++		    (config->baud_rate <= 0))
++			/* invalid baudrate for this port */
++			return -EINVAL;
++
++		if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++			/*
++			 * Reflect the call to non-RT as we will likely
++			 * allocate or free the buffer.
++			 */
++			if (rtdm_in_rt_context())
++				return -ENOSYS;
++
++			if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY)
++				hist_buf = kmalloc(IN_BUFFER_SIZE *
++						   sizeof(nanosecs_abs_t),
++						   GFP_KERNEL);
++		}
++
++		rt_mpc52xx_uart_set_config(ctx, config, &hist_buf);
++
++		if (hist_buf)
++			kfree(hist_buf);
++
++		break;
++	}
++
++	case RTSER_RTIOC_GET_STATUS: {
++		int status;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		status = ctx->saved_errors | ctx->status;
++		ctx->status = 0;
++		ctx->saved_errors = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd)) {
++			struct rtser_status status_buf;
++
++			status_buf.line_status = status;
++			status_buf.modem_status = psc_get_msr(ctx);
++
++			err = rtdm_safe_copy_to_user(fd, arg,
++						     &status_buf,
++						     sizeof(struct
++							    rtser_status));
++		} else {
++			((struct rtser_status *)arg)->line_status = status;
++			((struct rtser_status *)arg)->modem_status =
++				psc_get_msr(ctx);
++		}
++		break;
++	}
++
++	case RTSER_RTIOC_GET_CONTROL:
++		if (rtdm_fd_is_user(fd))
++			err = rtdm_safe_copy_to_user(fd, arg,
++						     &ctx->mcr_status,
++						     sizeof(int));
++		else
++			*(int *)arg = ctx->mcr_status;
++
++		break;
++
++	case RTSER_RTIOC_SET_CONTROL: {
++		int new_mcr = (long)arg;
++
++		if ((new_mcr & RTSER_MCR_RTS) != RTSER_MCR_RTS)
++			dev_warn(ctx->port->dev,
++				 "MCR: Only RTS is supported\n");
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		ctx->mcr_status = new_mcr & RTSER_MCR_RTS;
++		psc_set_mcr(ctx, ctx->mcr_status);
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++	case RTSER_RTIOC_WAIT_EVENT: {
++		struct rtser_event ev = { .rxpend_timestamp = 0 };
++		rtdm_toseq_t timeout_seq;
++
++		if (!rtdm_in_rt_context())
++			return -ENOSYS;
++
++		/* Only one waiter allowed, stop any further attempts here. */
++		if (test_and_set_bit(0, &ctx->ioc_event_lock))
++			return -EBUSY;
++
++		rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout);
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		while (!ctx->ioc_events) {
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			err = rtdm_event_timedwait(&ctx->ioc_event,
++						   ctx->config.event_timeout,
++						   &timeout_seq);
++			if (err) {
++				/* Device has been closed? */
++				if (err == -EIDRM)
++					err = -EBADF;
++				goto wait_unlock_out;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		}
++
++		ev.events = ctx->ioc_events;
++		ctx->ioc_events &= ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO);
++
++		ev.last_timestamp = ctx->last_timestamp;
++		ev.rx_pending = ctx->in_npend;
++
++		if (ctx->in_history)
++			ev.rxpend_timestamp = ctx->in_history[ctx->in_head];
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg, &ev,
++						   sizeof(struct
++							  rtser_event));
++			else
++				memcpy(arg, &ev, sizeof(struct rtser_event));
++
++	      wait_unlock_out:
++		/* release the simple event waiter lock */
++		clear_bit(0, &ctx->ioc_event_lock);
++		break;
++	}
++
++	case RTSER_RTIOC_BREAK_CTL: {
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		if ((long)arg & RTSER_BREAK_SET)
++			out_8(&ctx->port->psc->command,
++			      MPC52xx_PSC_START_BRK);
++		else
++			out_8(&ctx->port->psc->command,
++			      MPC52xx_PSC_STOP_BRK);
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++#ifdef ISREADY
++	case RTIOC_PURGE: {
++		int fcr = 0;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		if ((long)arg & RTDM_PURGE_RX_BUFFER) {
++			ctx->in_head = 0;
++			ctx->in_tail = 0;
++			ctx->in_npend = 0;
++			ctx->status = 0;
++			fcr |= FCR_FIFO | FCR_RESET_RX;
++			rt_mpc52xx_uart_reg_in(mode, base, RHR);
++		}
++		if ((long)arg & RTDM_PURGE_TX_BUFFER) {
++			ctx->out_head = 0;
++			ctx->out_tail = 0;
++			ctx->out_npend = 0;
++			fcr |= FCR_FIFO | FCR_RESET_TX;
++		}
++		if (fcr) {
++			rt_mpc52xx_uart_reg_out(mode, base, FCR, fcr);
++			rt_mpc52xx_uart_reg_out(mode, base, FCR,
++					 FCR_FIFO | ctx->config.fifo_depth);
++		}
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++#endif
++
++	default:
++		err = -ENOTTY;
++	}
++
++	return err;
++}
++
++static ssize_t rt_mpc52xx_uart_read(struct rtdm_fd *fd, void *buf,
++				    size_t nbyte)
++{
++	struct rt_mpc52xx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t read = 0;
++	int pending;
++	int block;
++	int subblock;
++	int in_pos;
++	char *out_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret = -EAGAIN;	/* for non-blocking read */
++	int nonblocking;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
++
++	/* non-blocking is handled separately here */
++	nonblocking = (ctx->config.rx_timeout < 0);
++
++	/* only one reader allowed, stop any further attempts here */
++	if (test_and_set_bit(0, &ctx->in_lock))
++		return -EBUSY;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	while (1) {
++		if (ctx->status) {
++			if (ctx->status & RTSER_LSR_BREAK_IND)
++				ret = -EPIPE;
++			else
++				ret = -EIO;
++			ctx->saved_errors = ctx->status &
++			    (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR |
++			     RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR);
++			ctx->status = 0;
++			break;
++		}
++
++		pending = ctx->in_npend;
++
++		if (pending > 0) {
++			block = subblock = (pending <= nbyte) ? pending : nbyte;
++			in_pos = ctx->in_head;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (in_pos + subblock > IN_BUFFER_SIZE) {
++				/* Treat the block between head and buffer end
++				   separately. */
++				subblock = IN_BUFFER_SIZE - in_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_to_user
++					    (fd, out_pos,
++					     &ctx->in_buf[in_pos],
++					     subblock) != 0) {
++						ret = -EFAULT;
++						goto break_unlocked;
++					}
++				} else
++					memcpy(out_pos, &ctx->in_buf[in_pos],
++					       subblock);
++
++				read += subblock;
++				out_pos += subblock;
++
++				subblock = block - subblock;
++				in_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_to_user(fd, out_pos,
++						      &ctx->in_buf[in_pos],
++						      subblock) != 0) {
++					ret = -EFAULT;
++					goto break_unlocked;
++				}
++			} else
++				memcpy(out_pos, &ctx->in_buf[in_pos], subblock);
++
++			read += subblock;
++			out_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->in_head =
++			    (ctx->in_head + block) & (IN_BUFFER_SIZE - 1);
++			if ((ctx->in_npend -= block) == 0)
++				ctx->ioc_events &= ~RTSER_EVENT_RXPEND;
++
++			if (nbyte == 0)
++				break; /* All requested bytes read. */
++
++			continue;
++		}
++
++		if (nonblocking)
++			/* ret was set to EAGAIN in case of a real
++			   non-blocking call or contains the error
++			   returned by rtdm_event_wait[_until] */
++			break;
++
++		ctx->in_nwait = nbyte;
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret = rtdm_event_timedwait(&ctx->in_event,
++					   ctx->config.rx_timeout,
++					   &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				   return immediately. */
++				return -EBADF;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			nonblocking = 1;
++			if (ctx->in_npend > 0) {
++				/* Final turn: collect pending bytes
++				   before exit. */
++				continue;
++			}
++
++			ctx->in_nwait = 0;
++			break;
++		}
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++break_unlocked:
++	/* Release the simple reader lock, */
++	clear_bit(0, &ctx->in_lock);
++
++	if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			   (ret == -ETIMEDOUT) || (ret == -EINTR)))
++		ret = read;
++
++	return ret;
++}
++
++static ssize_t rt_mpc52xx_uart_write(struct rtdm_fd *fd,
++				     const void *buf,
++				     size_t nbyte)
++{
++	struct rt_mpc52xx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t written = 0;
++	int free;
++	int block;
++	int subblock;
++	int out_pos;
++	char *in_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
++
++	/* Make write operation atomic. */
++	ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.rx_timeout,
++				   &timeout_seq);
++	if (ret)
++		return ret;
++
++	while (nbyte > 0) {
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		free = OUT_BUFFER_SIZE - ctx->out_npend;
++
++		if (free > 0) {
++			block = subblock = (nbyte <= free) ? nbyte : free;
++			out_pos = ctx->out_tail;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (out_pos + subblock > OUT_BUFFER_SIZE) {
++				/* Treat the block between head and buffer
++				   end separately. */
++				subblock = OUT_BUFFER_SIZE - out_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_from_user
++					    (fd,
++					     &ctx->out_buf[out_pos],
++					     in_pos, subblock) != 0) {
++						ret = -EFAULT;
++						break;
++					}
++				} else
++					memcpy(&ctx->out_buf[out_pos], in_pos,
++					       subblock);
++
++				written += subblock;
++				in_pos += subblock;
++
++				subblock = block - subblock;
++				out_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_from_user
++				    (fd, &ctx->out_buf[out_pos],
++				     in_pos, subblock) != 0) {
++					ret = -EFAULT;
++					break;
++				}
++			} else
++				memcpy(&ctx->out_buf[out_pos], in_pos, block);
++
++			written += subblock;
++			in_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->out_tail =
++			    (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1);
++			ctx->out_npend += block;
++
++			/* Mark shift register not empty */
++			ctx->ioc_events &= ~RTSER_EVENT_TXEMPTY;
++			ctx->tx_empty = 0;
++
++			psc_start_tx(ctx);
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++			continue;
++		}
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret = rtdm_event_timedwait(&ctx->out_event,
++					   ctx->config.tx_timeout,
++					   &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				   return immediately. */
++				return -EBADF;
++			}
++			if (ret == -EWOULDBLOCK) {
++				/* Fix error code for non-blocking mode. */
++				ret = -EAGAIN;
++			}
++			break;
++		}
++	}
++
++	rtdm_mutex_unlock(&ctx->out_lock);
++
++	if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			      (ret == -ETIMEDOUT) || (ret == -EINTR)))
++		ret = written;
++
++	return ret;
++}
++
++static struct rtdm_driver mpc52xx_uart_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(imx_uart,
++						    RTDM_CLASS_SERIAL,
++						    RTDM_SUBCLASS_16550A,
++						    RTSER_PROFILE_VER),
++	.device_count		= MPC52xx_PSC_MAXNUM,
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.context_size		= sizeof(struct rt_mpc52xx_uart_ctx),
++	.ops = {
++		.open		= rt_mpc52xx_uart_open,
++		.close		= rt_mpc52xx_uart_close,
++		.ioctl_rt	= rt_mpc52xx_uart_ioctl,
++		.ioctl_nrt	= rt_mpc52xx_uart_ioctl,
++		.read_rt	= rt_mpc52xx_uart_read,
++		.write_rt	= rt_mpc52xx_uart_write,
++	},
++};
++
++static int rt_mpc52xx_uart_of_probe(struct platform_device *op)
++{
++	struct rt_mpc52xx_uart_port *port;
++	struct rtdm_device *dev;
++	struct resource res;
++	int ret, idx;
++
++	dev_dbg(&op->dev, "mpc52xx_uart_probe(op=%p)\n", op);
++
++	/* Check validity & presence */
++	for (idx = 0; idx < MPC52xx_PSC_MAXNUM; idx++)
++		if (rt_mpc52xx_uart_nodes[idx] == op->dev.of_node)
++			break;
++	if (idx >= MPC52xx_PSC_MAXNUM)
++		return -EINVAL;
++
++	port = kmalloc(sizeof(*port), GFP_KERNEL);
++	if (!port) {
++		dev_err(&op->dev, "Could allocate port space\n");
++		return -ENOMEM;
++	}
++	port->dev = &op->dev;
++
++	/*
++	 * Set the uart clock to the input clock of the psc, the different
++	 * prescalers are taken into account in the set_baudrate() methods
++	 * of the respective chip
++	 */
++	port->uartclk = mpc5xxx_get_bus_frequency(op->dev.of_node);
++	if (port->uartclk == 0) {
++		dev_err(&op->dev, "Could not find uart clock frequency\n");
++		ret = -EINVAL;
++		goto out_kfree_port;
++	}
++
++	/* Fetch register locations */
++	ret = of_address_to_resource(op->dev.of_node, 0, &res);
++	if (ret) {
++		dev_err(&op->dev, "Could not get resources\n");
++		goto out_kfree_port;
++	}
++	port->num = ((res.start >> 8) & 0xf) / 2;
++	if (port->num < 6)
++		port->num++;
++
++	if (!request_mem_region(res.start, resource_size(&res),
++				RT_MPC52XX_UART_DRVNAM)) {
++		ret = -EBUSY;
++		goto out_kfree_port;
++	}
++
++	port->psc = ioremap(res.start, resource_size(&res));
++	if (!port->psc) {
++		dev_err(&op->dev, "Could not map PSC registers\n");
++		ret = -ENOMEM;
++		goto out_release_mem_region;
++	}
++	port->fifo = (struct mpc52xx_psc_fifo __iomem *)(port->psc + 1);
++
++	port->irq = irq_of_parse_and_map(op->dev.of_node, 0);
++	if (port->irq <= 0) {
++		dev_err(&op->dev, "Could not get irq\n");
++		ret = -ENODEV;
++		goto out_iounmap;
++	}
++
++	dev = kmalloc(sizeof(struct rtdm_device), GFP_KERNEL);
++	if (!dev) {
++		dev_err(&op->dev, "Could allocate device context\n");
++		ret = -ENOMEM;
++		goto out_dispose_irq_mapping;
++	}
++
++	dev->driver = &mpc52xx_uart_driver;
++	dev->label = "rtserPSC%d";
++	dev->device_data = port;
++
++	rt_mpc52xx_uart_init_hw(port);
++
++	ret = rtdm_dev_register(dev);
++	if (ret)
++		goto out_kfree_dev;
++
++	dev_set_drvdata(&op->dev, dev);
++
++	dev_info(&op->dev, "%s on PSC%d at 0x%p, irq=%d, clk=%i\n",
++		 dev->name, port->num, port->psc, port->irq,
++		 port->uartclk);
++
++	return 0;
++
++out_kfree_dev:
++	kfree(dev);
++out_dispose_irq_mapping:
++	irq_dispose_mapping(port->irq);
++out_iounmap:
++	iounmap(port->psc);
++out_release_mem_region:
++	release_mem_region(res.start, resource_size(&res));
++out_kfree_port:
++	kfree(port);
++
++	return ret;
++}
++
++static int rt_mpc52xx_uart_of_remove(struct platform_device *op)
++{
++	struct rtdm_device *dev = dev_get_drvdata(&op->dev);
++	struct rt_mpc52xx_uart_port *port = dev->device_data;
++	struct resource res;
++
++	dev_set_drvdata(&op->dev, NULL);
++
++	rtdm_dev_unregister(dev);
++	irq_dispose_mapping(port->irq);
++	iounmap(port->psc);
++	if (!of_address_to_resource(op->dev.of_node, 0, &res))
++		release_mem_region(res.start, resource_size(&res));
++	kfree(port);
++	kfree(dev);
++
++	return 0;
++}
++
++static struct of_device_id rt_mpc52xx_uart_of_match[] = {
++	{ .compatible = "fsl,mpc5200b-psc-uart", },
++	{ .compatible = "fsl,mpc5200-psc-uart", },
++	{},
++};
++MODULE_DEVICE_TABLE(of, rt_mpc52xx_uart_of_match);
++
++static struct platform_driver rt_mpc52xx_uart_of_driver = {
++	.probe = rt_mpc52xx_uart_of_probe,
++	.remove	=  rt_mpc52xx_uart_of_remove,
++	.driver = {
++		.name = "rt-mpc52xx-psc-uart",
++		.owner = THIS_MODULE,
++		.of_match_table = rt_mpc52xx_uart_of_match,
++	},
++};
++
++static void rt_mpc52xx_uart_of_enumerate(void)
++{
++	struct device_node *np;
++	int idx = 0;
++
++	/* Assign index to each PSC in device tree line the linux driver does */
++	for_each_matching_node(np, rt_mpc52xx_uart_of_match) {
++		of_node_get(np);
++		rt_mpc52xx_uart_nodes[idx] = np;
++		idx++;
++	}
++}
++
++static int __init rt_mpc52xx_uart_init(void)
++{
++	int ret;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	printk(KERN_INFO "RTserial: MPC52xx PSC UART driver\n");
++
++	rt_mpc52xx_uart_of_enumerate();
++
++	ret = platform_driver_register(&rt_mpc52xx_uart_of_driver);
++	if (ret) {
++		printk(KERN_ERR
++		       "%s; Could not register  driver (err=%d)\n",
++		       __func__, ret);
++		return ret;
++	}
++
++	return 0;
++}
++
++static void __exit rt_mpc52xx_uart_exit(void)
++{
++	platform_driver_unregister(&rt_mpc52xx_uart_of_driver);
++}
++
++module_init(rt_mpc52xx_uart_init);
++module_exit(rt_mpc52xx_uart_exit);
+--- linux/drivers/xenomai/serial/rt_imx_uart.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/serial/rt_imx_uart.c	2021-04-07 16:01:26.490635219 +0800
+@@ -0,0 +1,1677 @@
++/*
++ * Copyright 2012 Wolfgang Grandegger <wg@denx.de>
++ *
++ * Derived from the Linux IMX UART driver (drivers/tty/serial/imx.c)
++ * and 16650A RTserial driver.
++ *
++ * Copyright (C) 2005-2007 Jan Kiszka <jan.kiszka@web.de>.
++ * Copyright (C) 2004 Pengutronix
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ */
++
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/string.h>
++#include <linux/ioport.h>
++#include <linux/init.h>
++#include <linux/serial.h>
++#include <linux/console.h>
++#include <linux/platform_device.h>
++#include <linux/sysrq.h>
++#include <linux/dma-mapping.h>
++#include <linux/clk.h>
++#include <linux/delay.h>
++#include <linux/rational.h>
++#include <linux/io.h>
++#include <asm/irq.h>
++#include <asm/dma.h>
++#include <asm/div64.h>
++#include <linux/platform_data/serial-imx.h>
++#include <linux/slab.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++
++#include <rtdm/serial.h>
++#include <rtdm/driver.h>
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@denx.de>");
++MODULE_DESCRIPTION("RTDM-based driver for IMX UARTs");
++MODULE_VERSION("1.0.0");
++MODULE_LICENSE("GPL");
++
++#define DRIVER_NAME	"xeno_imx_uart"
++
++/* Register definitions */
++#define URXD0	0x0  /* Receiver Register */
++#define URTX0	0x40 /* Transmitter Register */
++#define UCR1	0x80 /* Control Register 1 */
++#define UCR2	0x84 /* Control Register 2 */
++#define UCR3	0x88 /* Control Register 3 */
++#define UCR4	0x8c /* Control Register 4 */
++#define UFCR	0x90 /* FIFO Control Register */
++#define USR1	0x94 /* Status Register 1 */
++#define USR2	0x98 /* Status Register 2 */
++#define UESC	0x9c /* Escape Character Register */
++#define UTIM	0xa0 /* Escape Timer Register */
++#define UBIR	0xa4 /* BRM Incremental Register */
++#define UBMR	0xa8 /* BRM Modulator Register */
++#define UBRC	0xac /* Baud Rate Count Register */
++#define MX2_ONEMS 0xb0 /* One Millisecond register */
++#define IMX1_UTS 0xd0 /* UART Test Register on i.mx1 */
++#define IMX21_UTS 0xb4 /* UART Test Register on all other i.mx*/
++
++
++
++/* UART Control Register Bit Fields.*/
++#define URXD_CHARRDY	(1<<15)
++#define URXD_ERR	(1<<14)
++#define URXD_OVRRUN	(1<<13)
++#define URXD_FRMERR	(1<<12)
++#define URXD_BRK	(1<<11)
++#define URXD_PRERR	(1<<10)
++#define UCR1_ADEN	(1<<15) /* Auto dectect interrupt */
++#define UCR1_ADBR	(1<<14) /* Auto detect baud rate */
++#define UCR1_TRDYEN	(1<<13) /* Transmitter ready interrupt enable */
++#define UCR1_IDEN	(1<<12) /* Idle condition interrupt */
++#define UCR1_RRDYEN	(1<<9)	/* Recv ready interrupt enable */
++#define UCR1_RDMAEN	(1<<8)	/* Recv ready DMA enable */
++#define UCR1_IREN	(1<<7)	/* Infrared interface enable */
++#define UCR1_TXMPTYEN	(1<<6)	/* Transimitter empty interrupt enable */
++#define UCR1_RTSDEN	(1<<5)	/* RTS delta interrupt enable */
++#define UCR1_SNDBRK	(1<<4)	/* Send break */
++#define UCR1_TDMAEN	(1<<3)	/* Transmitter ready DMA enable */
++#define MX1_UCR1_UARTCLKEN	(1<<2)	/* UART clock enabled, mx1 only */
++#define UCR1_DOZE	(1<<1)	/* Doze */
++#define UCR1_UARTEN	(1<<0)	/* UART enabled */
++#define UCR2_ESCI	(1<<15) /* Escape seq interrupt enable */
++#define UCR2_IRTS	(1<<14) /* Ignore RTS pin */
++#define UCR2_CTSC	(1<<13) /* CTS pin control */
++#define UCR2_CTS	(1<<12) /* Clear to send */
++#define UCR2_ESCEN	(1<<11) /* Escape enable */
++#define UCR2_PREN	(1<<8)	/* Parity enable */
++#define UCR2_PROE	(1<<7)	/* Parity odd/even */
++#define UCR2_STPB	(1<<6)	/* Stop */
++#define UCR2_WS		(1<<5)	/* Word size */
++#define UCR2_RTSEN	(1<<4)	/* Request to send interrupt enable */
++#define UCR2_ATEN	(1<<3)	/* Aging Timer Enable */
++#define UCR2_TXEN	(1<<2)	/* Transmitter enabled */
++#define UCR2_RXEN	(1<<1)	/* Receiver enabled */
++#define UCR2_SRST	(1<<0)	/* SW reset */
++#define UCR3_DTREN	(1<<13) /* DTR interrupt enable */
++#define UCR3_PARERREN	(1<<12) /* Parity enable */
++#define UCR3_FRAERREN	(1<<11) /* Frame error interrupt enable */
++#define UCR3_DSR	(1<<10) /* Data set ready */
++#define UCR3_DCD	(1<<9)	/* Data carrier detect */
++#define UCR3_RI		(1<<8)	/* Ring indicator */
++#define UCR3_ADNIMP	(1<<7)	/* Autobaud Detection Not Improved */
++#define UCR3_RXDSEN	(1<<6)	/* Receive status interrupt enable */
++#define UCR3_AIRINTEN	(1<<5)	/* Async IR wake interrupt enable */
++#define UCR3_AWAKEN	(1<<4)	/* Async wake interrupt enable */
++#define UCR3_DTRDEN	(1<<3)	/* Data Terminal Ready Delta Enable. */
++#define MX1_UCR3_REF25		(1<<3)	/* Ref freq 25 MHz, only on mx1 */
++#define MX1_UCR3_REF30		(1<<2)	/* Ref Freq 30 MHz, only on mx1 */
++#define MX2_UCR3_RXDMUXSEL	(1<<2)	/* RXD Muxed Input Select, on mx2/mx3 */
++#define UCR3_INVT	(1<<1)	/* Inverted Infrared transmission */
++#define UCR3_BPEN	(1<<0)	/* Preset registers enable */
++#define UCR4_CTSTL_SHF	10	/* CTS trigger level shift */
++#define UCR4_CTSTL_MASK	0x3F	/* CTS trigger is 6 bits wide */
++#define UCR4_INVR	(1<<9)	/* Inverted infrared reception */
++#define UCR4_ENIRI	(1<<8)	/* Serial infrared interrupt enable */
++#define UCR4_WKEN	(1<<7)	/* Wake interrupt enable */
++#define UCR4_REF16	(1<<6)	/* Ref freq 16 MHz */
++#define UCR4_IRSC	(1<<5)	/* IR special case */
++#define UCR4_TCEN	(1<<3)	/* Transmit complete interrupt enable */
++#define UCR4_BKEN	(1<<2)	/* Break condition interrupt enable */
++#define UCR4_OREN	(1<<1)	/* Receiver overrun interrupt enable */
++#define UCR4_DREN	(1<<0)	/* Recv data ready interrupt enable */
++#define UFCR_RXTL_SHF	0	/* Receiver trigger level shift */
++#define UFCR_RFDIV	(7<<7)	/* Reference freq divider mask */
++#define UFCR_RFDIV_REG(x)	(((x) < 7 ? 6 - (x) : 6) << 7)
++#define UFCR_TXTL_SHF	10	/* Transmitter trigger level shift */
++#define UFCR_DCEDTE	(1<<6)
++#define USR1_PARITYERR	(1<<15) /* Parity error interrupt flag */
++#define USR1_RTSS	(1<<14) /* RTS pin status */
++#define USR1_TRDY	(1<<13) /* Transmitter ready interrupt/dma flag */
++#define USR1_RTSD	(1<<12) /* RTS delta */
++#define USR1_ESCF	(1<<11) /* Escape seq interrupt flag */
++#define USR1_FRAMERR	(1<<10) /* Frame error interrupt flag */
++#define USR1_RRDY	(1<<9)	/* Receiver ready interrupt/dma flag */
++#define USR1_AGTIM	(1<<8)	/* Ageing Timer Interrupt Flag */
++#define USR1_DTRD	(1<<7)	/* DTR Delta */
++#define USR1_RXDS	(1<<6)	/* Receiver idle interrupt flag */
++#define USR1_AIRINT	(1<<5)	/* Async IR wake interrupt flag */
++#define USR1_AWAKE	(1<<4)	/* Async wake interrupt flag */
++#define USR2_ADET	(1<<15) /* Auto baud rate detect complete */
++#define USR2_TXFE	(1<<14) /* Transmit buffer FIFO empty */
++#define USR2_DTRF	(1<<13) /* DTR edge interrupt flag */
++#define USR2_IDLE	(1<<12) /* Idle condition */
++#define USR2_RIDELT	(1<<10) /* Ring Indicator Delta */
++#define USR2_RIIN	(1<<9)	/* Ring Indicator Input */
++#define USR2_IRINT	(1<<8)	/* Serial infrared interrupt flag */
++#define USR2_WAKE	(1<<7)	/* Wake */
++#define USR2_DCDDELT	(1<<6)	/* Data Carrier Detect Delta */
++#define USR2_DCDIN	(1<<5)	/* Data Carrier Detect Input */
++#define USR2_RTSF	(1<<4)	/* RTS edge interrupt flag */
++#define USR2_TXDC	(1<<3)	/* Transmitter complete */
++#define USR2_BRCD	(1<<2)	/* Break condition */
++#define USR2_ORE	(1<<1)	/* Overrun error */
++#define USR2_RDR	(1<<0)	/* Recv data ready */
++#define UTS_FRCPERR	(1<<13) /* Force parity error */
++#define UTS_LOOP	(1<<12) /* Loop tx and rx */
++#define UTS_TXEMPTY	(1<<6)	/* TxFIFO empty */
++#define UTS_RXEMPTY	(1<<5)	/* RxFIFO empty */
++#define UTS_TXFULL	(1<<4)	/* TxFIFO full */
++#define UTS_RXFULL	(1<<3)	/* RxFIFO full */
++#define UTS_SOFTRST	(1<<0)	/* Software reset */
++
++#define IN_BUFFER_SIZE		4096
++#define OUT_BUFFER_SIZE		4096
++
++#define TX_FIFO_SIZE		32
++
++#define PARITY_MASK		0x03
++#define DATA_BITS_MASK		0x03
++#define STOP_BITS_MASK		0x01
++#define FIFO_MASK		0xC0
++#define EVENT_MASK		0x0F
++
++#define IER_RX			0x01
++#define IER_TX			0x02
++#define IER_STAT		0x04
++#define IER_MODEM		0x08
++
++#define IMX_ISR_PASS_LIMIT	256
++#define UART_CREAD_BIT		256
++
++#define RT_IMX_UART_MAX		5
++
++static int tx_fifo[RT_IMX_UART_MAX];
++module_param_array(tx_fifo, int, NULL, 0400);
++MODULE_PARM_DESC(tx_fifo, "Transmitter FIFO size");
++
++/* i.MX21 type uart runs on all i.mx except i.MX1 and i.MX6q */
++enum imx_uart_type {
++	IMX1_UART,
++	IMX21_UART,
++	IMX53_UART,
++	IMX6Q_UART,
++};
++
++/* device type dependent stuff */
++struct imx_uart_data {
++	unsigned int uts_reg;
++	enum imx_uart_type devtype;
++};
++
++
++struct rt_imx_uart_port {
++	unsigned char __iomem *membase;	/* read/write[bwl] */
++	resource_size_t mapbase;	/* for ioremap */
++	unsigned int irq;		/* irq number */
++	int tx_fifo;			/* TX fifo size*/
++	unsigned int have_rtscts;
++	unsigned int use_dcedte;
++	unsigned int use_hwflow;
++	struct clk *clk_ipg;		/* clock id for UART clock */
++	struct clk *clk_per;		/* clock id for UART clock */
++	const struct imx_uart_data *devdata;
++	unsigned int uartclk;		/* base uart clock */
++	struct rtdm_device rtdm_dev;	/* RTDM device structure */
++};
++
++
++static struct imx_uart_data imx_uart_devdata[] = {
++	[IMX1_UART] = {
++		.uts_reg = IMX1_UTS,
++		.devtype = IMX1_UART,
++	},
++	[IMX21_UART] = {
++		.uts_reg = IMX21_UTS,
++		.devtype = IMX21_UART,
++	},
++	[IMX53_UART] = {
++		.uts_reg = IMX21_UTS,
++		.devtype = IMX53_UART,
++	},
++	[IMX6Q_UART] = {
++		.uts_reg = IMX21_UTS,
++		.devtype = IMX6Q_UART,
++	},
++};
++
++static const struct platform_device_id rt_imx_uart_id_table[] = {
++	{
++		.name = "imx1-uart",
++		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX1_UART],
++	}, {
++		.name = "imx21-uart",
++		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX21_UART],
++	}, {
++		.name = "imx53-uart",
++		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX53_UART],
++	}, {
++		.name = "imx6q-uart",
++		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX6Q_UART],
++	}, {
++		/* sentinel */
++	}
++};
++MODULE_DEVICE_TABLE(platform, rt_imx_uart_id_table);
++
++static const struct of_device_id rt_imx_uart_dt_ids[] = {
++	{
++		.compatible = "fsl,imx6q-uart",
++		.data = &imx_uart_devdata[IMX6Q_UART], },
++	{
++		.compatible = "fsl,imx53-uart",
++		.data = &imx_uart_devdata[IMX53_UART], },
++	{
++		.compatible = "fsl,imx1-uart",
++		.data = &imx_uart_devdata[IMX1_UART], },
++	{
++		.compatible = "fsl,imx21-uart",
++		.data = &imx_uart_devdata[IMX21_UART], },
++	{ /* sentinel */ }
++};
++MODULE_DEVICE_TABLE(of, rt_imx_uart_dt_ids);
++
++struct rt_imx_uart_ctx {
++	struct rtser_config config;	/* current device configuration */
++
++	rtdm_irq_t irq_handle;		/* device IRQ handle */
++	rtdm_lock_t lock;		/* lock to protect context struct */
++
++	int in_head;			/* RX ring buffer, head pointer */
++	int in_tail;			/* RX ring buffer, tail pointer */
++	size_t in_npend;		/* pending bytes in RX ring */
++	int in_nwait;			/* bytes the user waits for */
++	rtdm_event_t in_event;		/* raised to unblock reader */
++	char in_buf[IN_BUFFER_SIZE];	/* RX ring buffer */
++
++	volatile unsigned long in_lock;	/* single-reader lock */
++	uint64_t *in_history;		/* RX timestamp buffer */
++
++	int out_head;			/* TX ring buffer, head pointer */
++	int out_tail;			/* TX ring buffer, tail pointer */
++	size_t out_npend;		/* pending bytes in TX ring */
++	rtdm_event_t out_event;		/* raised to unblock writer */
++	char out_buf[OUT_BUFFER_SIZE];	/* TX ring buffer */
++	rtdm_mutex_t out_lock;		/* single-writer mutex */
++
++	uint64_t last_timestamp;	/* timestamp of last event */
++	int ioc_events;			/* recorded events */
++	rtdm_event_t ioc_event;		/* raised to unblock event waiter */
++	volatile unsigned long ioc_event_lock;	/* single-waiter lock */
++
++	int ier_status;			/* IER cache */
++	int mcr_status;			/* MCR cache */
++	int status;			/* cache for LSR + soft-states */
++	int saved_errors;		/* error cache for RTIOC_GET_STATUS */
++
++	/*
++	 * The port structure holds all the information about the UART
++	 * port like base address, and so on.
++	 */
++	struct rt_imx_uart_port *port;
++};
++
++static const struct rtser_config default_config = {
++	.config_mask = 0xFFFF,
++	.baud_rate = RTSER_DEF_BAUD,
++	.parity = RTSER_DEF_PARITY,
++	.data_bits = RTSER_DEF_BITS,
++	.stop_bits = RTSER_DEF_STOPB,
++	.handshake = RTSER_DEF_HAND,
++	.fifo_depth = RTSER_DEF_FIFO_DEPTH,
++	.rx_timeout = RTSER_DEF_TIMEOUT,
++	.tx_timeout = RTSER_DEF_TIMEOUT,
++	.event_timeout = RTSER_DEF_TIMEOUT,
++	.timestamp_history = RTSER_DEF_TIMESTAMP_HISTORY,
++	.event_mask = RTSER_DEF_EVENT_MASK,
++};
++
++static void rt_imx_uart_stop_tx(struct rt_imx_uart_ctx *ctx)
++{
++	unsigned long temp;
++
++	temp = readl(ctx->port->membase + UCR1);
++	writel(temp & ~UCR1_TXMPTYEN, ctx->port->membase + UCR1);
++}
++
++static void rt_imx_uart_start_tx(struct rt_imx_uart_ctx *ctx)
++{
++	unsigned long temp;
++
++	temp = readl(ctx->port->membase + UCR1);
++	writel(temp | UCR1_TXMPTYEN, ctx->port->membase + UCR1);
++}
++
++static void rt_imx_uart_enable_ms(struct rt_imx_uart_ctx *ctx)
++{
++	unsigned long ucr3;
++
++	/*
++	 * RTS interrupt is enabled only if we are using interrupt-driven
++	 * software controlled hardware flow control
++	 */
++	if (!ctx->port->use_hwflow) {
++		unsigned long ucr1 = readl(ctx->port->membase + UCR1);
++
++		ucr1 |= UCR1_RTSDEN;
++		writel(ucr1, ctx->port->membase + UCR1);
++	}
++	ucr3 = readl(ctx->port->membase + UCR3);
++	ucr3 |= UCR3_DTREN;
++	if (ctx->port->use_dcedte) /* DTE mode */
++		ucr3 |= UCR3_DCD | UCR3_RI;
++	writel(ucr3, ctx->port->membase + UCR3);
++}
++
++static int rt_imx_uart_rx_chars(struct rt_imx_uart_ctx *ctx,
++				uint64_t *timestamp)
++{
++	unsigned int rx, temp;
++	int rbytes = 0;
++	int lsr = 0;
++
++	while (readl(ctx->port->membase + USR2) & USR2_RDR) {
++		rx = readl(ctx->port->membase + URXD0);
++		temp = readl(ctx->port->membase + USR2);
++		if (temp & USR2_BRCD) {
++			writel(USR2_BRCD, ctx->port->membase + USR2);
++			lsr |= RTSER_LSR_BREAK_IND;
++		}
++
++		if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR)) {
++			if (rx & URXD_PRERR)
++				lsr |= RTSER_LSR_PARITY_ERR;
++			else if (rx & URXD_FRMERR)
++				lsr |= RTSER_LSR_FRAMING_ERR;
++			if (rx & URXD_OVRRUN)
++				lsr |= RTSER_LSR_OVERRUN_ERR;
++		}
++
++		/* save received character */
++		ctx->in_buf[ctx->in_tail] = rx & 0xff;
++		if (ctx->in_history)
++			ctx->in_history[ctx->in_tail] = *timestamp;
++		ctx->in_tail = (ctx->in_tail + 1) & (IN_BUFFER_SIZE - 1);
++
++		if (unlikely(ctx->in_npend >= IN_BUFFER_SIZE))
++			lsr |= RTSER_SOFT_OVERRUN_ERR;
++		else
++			ctx->in_npend++;
++
++		rbytes++;
++	}
++
++	/* save new errors */
++	ctx->status |= lsr;
++
++	return rbytes;
++}
++
++static void rt_imx_uart_tx_chars(struct rt_imx_uart_ctx *ctx)
++{
++	int ch;
++	unsigned int uts_reg = ctx->port->devdata->uts_reg;
++
++	while (ctx->out_npend > 0 &&
++	       !(readl(ctx->port->membase + uts_reg) & UTS_TXFULL)) {
++		ch = ctx->out_buf[ctx->out_head++];
++		writel(ch, ctx->port->membase + URTX0);
++		ctx->out_head &= (OUT_BUFFER_SIZE - 1);
++		ctx->out_npend--;
++	}
++}
++
++static int rt_imx_uart_modem_status(struct rt_imx_uart_ctx *ctx,
++				     unsigned int usr1,
++				     unsigned int usr2)
++{
++	int events = 0;
++
++	/* Clear the status bits that triggered the interrupt */
++	writel(usr1, ctx->port->membase + USR1);
++	writel(usr2, ctx->port->membase + USR2);
++
++	if (ctx->port->use_dcedte) { /* DTE mode */
++		if (usr2 & USR2_DCDDELT)
++			events |= !(usr2 & USR2_DCDIN) ?
++				RTSER_EVENT_MODEMHI : RTSER_EVENT_MODEMLO;
++	}
++	if (!ctx->port->use_hwflow && (usr1 & USR1_RTSD)) {
++		events |= (usr1 & USR1_RTSS) ?
++			RTSER_EVENT_MODEMHI : RTSER_EVENT_MODEMLO;
++	}
++
++	return events;
++}
++
++static int rt_imx_uart_int(rtdm_irq_t *irq_context)
++{
++	uint64_t timestamp = rtdm_clock_read();
++	struct rt_imx_uart_ctx *ctx;
++	unsigned int usr1, usr2, ucr1;
++	int rbytes = 0, events = 0;
++	int ret = RTDM_IRQ_NONE;
++
++	ctx = rtdm_irq_get_arg(irq_context, struct rt_imx_uart_ctx);
++
++	rtdm_lock_get(&ctx->lock);
++
++	usr1 = readl(ctx->port->membase + USR1);
++	usr2 = readl(ctx->port->membase + USR2);
++	ucr1 = readl(ctx->port->membase + UCR1);
++
++	/*
++	 * Read if there is data available
++	 */
++	if (usr1 & USR1_RRDY) {
++		if (likely(ucr1 & UCR1_RRDYEN)) {
++			rbytes = rt_imx_uart_rx_chars(ctx, &timestamp);
++			events |= RTSER_EVENT_RXPEND;
++		}
++		ret = RTDM_IRQ_HANDLED;
++	}
++
++	/*
++	 * Send data if there is data to be sent
++	 */
++	if (usr1 & USR1_TRDY) {
++		if (likely(ucr1 & UCR1_TXMPTYEN))
++			rt_imx_uart_tx_chars(ctx);
++		ret = RTDM_IRQ_HANDLED;
++	}
++
++	/*
++	 * Handle modem status events
++	 */
++	if ((usr1 & (USR1_RTSD | USR1_DTRD)) ||
++	    (usr2 & (USR2_DCDDELT | USR2_RIDELT))) {
++		events |= rt_imx_uart_modem_status(ctx, usr1, usr2);
++		ret = RTDM_IRQ_HANDLED;
++	}
++
++	if (ctx->in_nwait > 0) {
++		if ((ctx->in_nwait <= rbytes) || ctx->status) {
++			ctx->in_nwait = 0;
++			rtdm_event_signal(&ctx->in_event);
++		} else {
++			ctx->in_nwait -= rbytes;
++		}
++	}
++
++	if (ctx->status) {
++		events |= RTSER_EVENT_ERRPEND;
++#ifdef FIXME
++		ctx->ier_status &= ~IER_STAT;
++#endif
++	}
++
++	if (events & ctx->config.event_mask) {
++		int old_events = ctx->ioc_events;
++
++		ctx->last_timestamp = timestamp;
++		ctx->ioc_events = events;
++
++		if (!old_events)
++			rtdm_event_signal(&ctx->ioc_event);
++	}
++
++	if ((ctx->ier_status & IER_TX) && (ctx->out_npend == 0)) {
++		rt_imx_uart_stop_tx(ctx);
++		ctx->ier_status &= ~IER_TX;
++		rtdm_event_signal(&ctx->out_event);
++	}
++
++	rtdm_lock_put(&ctx->lock);
++
++	if (ret != RTDM_IRQ_HANDLED)
++		pr_warn("%s: unhandled interrupt\n", __func__);
++	return ret;
++}
++
++static unsigned int rt_imx_uart_get_msr(struct rt_imx_uart_ctx *ctx)
++{
++	unsigned long usr1 = readl(ctx->port->membase + USR1);
++	unsigned long usr2 = readl(ctx->port->membase + USR2);
++	unsigned int msr = 0;
++
++	if (usr1 & USR1_RTSD)
++		msr |= RTSER_MSR_DCTS;
++	if (usr1 & USR1_DTRD)
++		msr |= RTSER_MSR_DDSR;
++	if (usr2 & USR2_RIDELT)
++		msr |= RTSER_MSR_TERI;
++	if (usr2 & USR2_DCDDELT)
++		msr |= RTSER_MSR_DDCD;
++
++	if (usr1 & USR1_RTSS)
++		msr |= RTSER_MSR_CTS;
++
++	if (ctx->port->use_dcedte) { /* DTE mode */
++		if (!(usr2 & USR2_DCDIN))
++			msr |= RTSER_MSR_DCD;
++		if (!(usr2 & USR2_RIIN))
++			msr |= RTSER_MSR_RI;
++	}
++
++	return msr;
++}
++
++static void rt_imx_uart_set_mcr(struct rt_imx_uart_ctx *ctx,
++				unsigned int mcr)
++{
++	unsigned int uts_reg = ctx->port->devdata->uts_reg;
++	unsigned long ucr2 = readl(ctx->port->membase + UCR2);
++	unsigned long ucr3 = readl(ctx->port->membase + UCR3);
++	unsigned long uts = readl(ctx->port->membase + uts_reg);
++
++	if (mcr & RTSER_MCR_RTS) {
++		/*
++		 * Return to hardware-driven hardware flow control if the
++		 * option is enabled
++		 */
++		if (ctx->port->use_hwflow) {
++			ucr2 |= UCR2_CTSC;
++		} else {
++			ucr2 |= UCR2_CTS;
++			ucr2 &= ~UCR2_CTSC;
++		}
++	} else {
++		ucr2 &= ~(UCR2_CTS | UCR2_CTSC);
++	}
++	writel(ucr2, ctx->port->membase + UCR2);
++
++	if (mcr & RTSER_MCR_DTR)
++		ucr3 |= UCR3_DSR;
++	else
++		ucr3 &= ~UCR3_DSR;
++	writel(ucr3, ctx->port->membase + UCR3);
++
++	if (mcr & RTSER_MCR_LOOP)
++		uts |= UTS_LOOP;
++	else
++		uts &= ~UTS_LOOP;
++	writel(uts, ctx->port->membase + uts_reg);
++}
++
++static void rt_imx_uart_break_ctl(struct rt_imx_uart_ctx *ctx,
++				  int break_state)
++{
++	unsigned long ucr1 = readl(ctx->port->membase + UCR1);
++
++	if (break_state == RTSER_BREAK_SET)
++		ucr1 |= UCR1_SNDBRK;
++	else
++		ucr1 &= ~UCR1_SNDBRK;
++	writel(ucr1, ctx->port->membase + UCR1);
++}
++
++static int rt_imx_uart_set_config(struct rt_imx_uart_ctx *ctx,
++				  const struct rtser_config *config,
++				  uint64_t **in_history_ptr)
++{
++	rtdm_lockctx_t lock_ctx;
++	int err = 0;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	if (config->config_mask & RTSER_SET_BAUD)
++		ctx->config.baud_rate = config->baud_rate;
++	if (config->config_mask & RTSER_SET_DATA_BITS)
++		ctx->config.data_bits = config->data_bits & DATA_BITS_MASK;
++	if (config->config_mask & RTSER_SET_PARITY)
++		ctx->config.parity = config->parity & PARITY_MASK;
++	if (config->config_mask & RTSER_SET_STOP_BITS)
++		ctx->config.stop_bits = config->stop_bits & STOP_BITS_MASK;
++
++	/* Timeout manipulation is not atomic. The user is supposed to take
++	 * care not to use and change timeouts at the same time.
++	 */
++	if (config->config_mask & RTSER_SET_TIMEOUT_RX)
++		ctx->config.rx_timeout = config->rx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_TX)
++		ctx->config.tx_timeout = config->tx_timeout;
++	if (config->config_mask & RTSER_SET_TIMEOUT_EVENT)
++		ctx->config.event_timeout = config->event_timeout;
++
++	if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++		if (config->timestamp_history & RTSER_RX_TIMESTAMP_HISTORY) {
++			if (!ctx->in_history) {
++				ctx->in_history = *in_history_ptr;
++				*in_history_ptr = NULL;
++				if (!ctx->in_history)
++					err = -ENOMEM;
++			}
++		} else {
++			*in_history_ptr = ctx->in_history;
++			ctx->in_history = NULL;
++		}
++	}
++
++	if (config->config_mask & RTSER_SET_EVENT_MASK) {
++		ctx->config.event_mask = config->event_mask & EVENT_MASK;
++		ctx->ioc_events = 0;
++
++		if ((config->event_mask & RTSER_EVENT_RXPEND) &&
++		    (ctx->in_npend > 0))
++			ctx->ioc_events |= RTSER_EVENT_RXPEND;
++
++		if ((config->event_mask & RTSER_EVENT_ERRPEND)
++		    && ctx->status)
++			ctx->ioc_events |= RTSER_EVENT_ERRPEND;
++	}
++
++	if (config->config_mask & RTSER_SET_HANDSHAKE) {
++		ctx->config.handshake = config->handshake;
++
++		switch (ctx->config.handshake) {
++		case RTSER_RTSCTS_HAND:
++			/* ...? */
++
++		default:	/* RTSER_NO_HAND */
++			ctx->mcr_status = RTSER_MCR_RTS | RTSER_MCR_OUT1;
++			break;
++		}
++		rt_imx_uart_set_mcr(ctx, ctx->mcr_status);
++	}
++
++	/* configure hardware with new parameters */
++	if (config->config_mask & (RTSER_SET_BAUD |
++				   RTSER_SET_PARITY |
++				   RTSER_SET_DATA_BITS |
++				   RTSER_SET_STOP_BITS |
++				   RTSER_SET_EVENT_MASK |
++				   RTSER_SET_HANDSHAKE)) {
++		struct rt_imx_uart_port *port = ctx->port;
++		unsigned int ucr2, old_ucr1, old_txrxen, old_ucr2;
++		unsigned int baud = ctx->config.baud_rate;
++		unsigned int div, ufcr;
++		unsigned long num, denom;
++		uint64_t tdiv64;
++
++		if (ctx->config.data_bits == RTSER_8_BITS)
++			ucr2 = UCR2_WS | UCR2_IRTS;
++		else
++			ucr2 = UCR2_IRTS;
++
++		if (ctx->config.handshake == RTSER_RTSCTS_HAND) {
++			if (port->have_rtscts) {
++				ucr2 &= ~UCR2_IRTS;
++				ucr2 |= UCR2_CTSC;
++			}
++		}
++
++		if (ctx->config.stop_bits == RTSER_2_STOPB)
++			ucr2 |= UCR2_STPB;
++		if (ctx->config.parity == RTSER_ODD_PARITY ||
++		    ctx->config.parity == RTSER_EVEN_PARITY) {
++			ucr2 |= UCR2_PREN;
++			if (ctx->config.parity == RTSER_ODD_PARITY)
++				ucr2 |= UCR2_PROE;
++		}
++
++		/*
++		 * disable interrupts and drain transmitter
++		 */
++		old_ucr1 = readl(port->membase + UCR1);
++		old_ucr1 &= ~UCR1_RTSDEN; /* reset in  rt_imx_uart_enable_ms()*/
++		writel(old_ucr1 & ~(UCR1_TXMPTYEN | UCR1_RRDYEN),
++		       port->membase + UCR1);
++		old_ucr2 = readl(port->membase + USR2);
++		writel(old_ucr2 & ~UCR2_ATEN, port->membase + USR2);
++		while (!(readl(port->membase + USR2) & USR2_TXDC))
++			barrier();
++
++		/* then, disable everything */
++		old_txrxen = readl(port->membase + UCR2);
++		writel(old_txrxen & ~(UCR2_TXEN | UCR2_RXEN),
++		       port->membase + UCR2);
++		old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
++		div = port->uartclk / (baud * 16);
++		if (div > 7)
++			div = 7;
++		if (!div)
++			div = 1;
++
++		rational_best_approximation(16 * div * baud, port->uartclk,
++					    1 << 16, 1 << 16, &num, &denom);
++
++		tdiv64 = port->uartclk;
++		tdiv64 *= num;
++		do_div(tdiv64, denom * 16 * div);
++
++		num -= 1;
++		denom -= 1;
++
++		ufcr = readl(port->membase + UFCR);
++		ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
++
++		if (port->use_dcedte)
++			ufcr |= UFCR_DCEDTE;
++
++		writel(ufcr, port->membase + UFCR);
++
++		writel(num, port->membase + UBIR);
++		writel(denom, port->membase + UBMR);
++
++		writel(port->uartclk / div / 1000, port->membase + MX2_ONEMS);
++
++		writel(old_ucr1, port->membase + UCR1);
++
++		/* set the parity, stop bits and data size */
++		writel(ucr2 | old_txrxen, port->membase + UCR2);
++
++		if (config->event_mask &
++		    (RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO))
++			rt_imx_uart_enable_ms(ctx);
++
++		ctx->status = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	return err;
++}
++
++void rt_imx_uart_cleanup_ctx(struct rt_imx_uart_ctx *ctx)
++{
++	rtdm_event_destroy(&ctx->in_event);
++	rtdm_event_destroy(&ctx->out_event);
++	rtdm_event_destroy(&ctx->ioc_event);
++	rtdm_mutex_destroy(&ctx->out_lock);
++}
++
++#define TXTL 2 /* reset default */
++#define RXTL 1 /* reset default */
++
++static int rt_imx_uart_setup_ufcr(struct rt_imx_uart_port *port)
++{
++	unsigned int val;
++	unsigned int ufcr_rfdiv;
++
++	/* set receiver / transmitter trigger level.
++	 * RFDIV is set such way to satisfy requested uartclk value
++	 */
++	val = TXTL << 10 | RXTL;
++	ufcr_rfdiv = (clk_get_rate(port->clk_per) + port->uartclk / 2) /
++		port->uartclk;
++
++	if (!ufcr_rfdiv)
++		ufcr_rfdiv = 1;
++
++	val |= UFCR_RFDIV_REG(ufcr_rfdiv);
++
++	writel(val, port->membase + UFCR);
++
++	return 0;
++}
++
++/* half the RX buffer size */
++#define CTSTL 16
++
++static void uart_reset(struct rt_imx_uart_port *port)
++{
++	unsigned int uts_reg = port->devdata->uts_reg;
++	int n = 100;
++	u32 temp;
++
++	/* Reset fifo's and state machines */
++	temp = readl(port->membase + UCR2);
++	temp &= ~UCR2_SRST;
++	writel(temp, port->membase + UCR2);
++	n = 100;
++	while (!(readl(port->membase + uts_reg) & UTS_SOFTRST) && --n > 0)
++		udelay(1);
++}
++
++static int rt_imx_uart_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rt_imx_uart_ctx *ctx;
++	struct rt_imx_uart_port *port;
++	rtdm_lockctx_t lock_ctx;
++	unsigned long temp;
++	uint64_t *dummy;
++
++	ctx = rtdm_fd_to_private(fd);
++	ctx->port = (struct rt_imx_uart_port *)rtdm_fd_device(fd)->device_data;
++
++	port = ctx->port;
++
++	/* IPC initialisation - cannot fail with used parameters */
++	rtdm_lock_init(&ctx->lock);
++	rtdm_event_init(&ctx->in_event, 0);
++	rtdm_event_init(&ctx->out_event, 0);
++	rtdm_event_init(&ctx->ioc_event, 0);
++	rtdm_mutex_init(&ctx->out_lock);
++
++	ctx->in_head = 0;
++	ctx->in_tail = 0;
++	ctx->in_npend = 0;
++	ctx->in_nwait = 0;
++	ctx->in_lock = 0;
++	ctx->in_history = NULL;
++
++	ctx->out_head = 0;
++	ctx->out_tail = 0;
++	ctx->out_npend = 0;
++
++	ctx->ioc_events = 0;
++	ctx->ioc_event_lock = 0;
++	ctx->status = 0;
++	ctx->saved_errors = 0;
++
++	/*
++	 * disable the DREN bit (Data Ready interrupt enable) before
++	 * requesting IRQs
++	 */
++	temp = readl(port->membase + UCR4);
++
++	/* set the trigger level for CTS */
++	temp &= ~(UCR4_CTSTL_MASK << UCR4_CTSTL_SHF);
++	temp |= CTSTL << UCR4_CTSTL_SHF;
++	writel(temp & ~UCR4_DREN, port->membase + UCR4);
++
++	uart_reset(port);
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	/*
++	 * Finally, clear status and enable interrupts
++	 */
++	writel(USR1_RTSD | USR1_DTRD, port->membase + USR1);
++	writel(USR2_ORE, port->membase + USR2);
++
++	temp = readl(port->membase + UCR1) & ~UCR1_RRDYEN;
++	temp |= UCR1_UARTEN;
++	if (port->have_rtscts)
++		temp |= UCR1_RTSDEN;
++	writel(temp, port->membase + UCR1);
++
++	temp = readl(port->membase + UCR4);
++	temp |= UCR4_OREN;
++	writel(temp, port->membase + UCR4);
++
++	temp = readl(port->membase + UCR2) & ~(UCR2_ATEN|UCR2_RTSEN);
++	temp |= (UCR2_RXEN | UCR2_TXEN);
++	if (!port->have_rtscts)
++		temp |= UCR2_IRTS;
++	writel(temp, port->membase + UCR2);
++
++	temp = readl(port->membase + UCR3);
++	temp |= MX2_UCR3_RXDMUXSEL;
++	writel(temp, port->membase + UCR3);
++
++	temp = readl(port->membase + UCR1);
++	temp |= UCR1_RRDYEN;
++	writel(temp, port->membase + UCR1);
++
++	temp = readl(port->membase + UCR2);
++	temp |= UCR2_ATEN;
++	writel(temp, port->membase + UCR2);
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	rt_imx_uart_set_config(ctx, &default_config, &dummy);
++
++	rt_imx_uart_setup_ufcr(port);
++
++	return rtdm_irq_request(&ctx->irq_handle,
++				port->irq, rt_imx_uart_int, 0,
++				rtdm_fd_device(fd)->name, ctx);
++}
++
++void rt_imx_uart_close(struct rtdm_fd *fd)
++{
++	struct rt_imx_uart_port *port;
++	struct rt_imx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	unsigned long temp;
++
++	ctx = rtdm_fd_to_private(fd);
++	port = ctx->port;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	temp = readl(port->membase + UCR2);
++	temp &= ~(UCR2_ATEN|UCR2_RTSEN|UCR2_RXEN|UCR2_TXEN|UCR2_IRTS);
++	writel(temp, port->membase + UCR2);
++	/*
++	 * Disable all interrupts, port and break condition, then
++	 * reset.
++	 */
++	temp = readl(port->membase + UCR1);
++	temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
++	writel(temp, port->membase + UCR1);
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++	rtdm_irq_free(&ctx->irq_handle);
++
++	uart_reset(port);
++
++	rt_imx_uart_cleanup_ctx(ctx);
++	kfree(ctx->in_history);
++}
++
++static int rt_imx_uart_ioctl(struct rtdm_fd *fd,
++			     unsigned int request, void *arg)
++{
++	rtdm_lockctx_t lock_ctx;
++	struct rt_imx_uart_ctx *ctx;
++	int err = 0;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	switch (request) {
++	case RTSER_RTIOC_GET_CONFIG:
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &ctx->config,
++						   sizeof(struct rtser_config));
++		else
++			memcpy(arg, &ctx->config,
++			       sizeof(struct rtser_config));
++		break;
++
++	case RTSER_RTIOC_SET_CONFIG: {
++		struct rtser_config *config;
++		struct rtser_config config_buf;
++		uint64_t *hist_buf = NULL;
++
++		/*
++		 * We may call regular kernel services ahead, ask for
++		 * re-entering secondary mode if need be.
++		 */
++		if (rtdm_in_rt_context())
++			return -ENOSYS;
++
++		config = (struct rtser_config *)arg;
++
++		if (rtdm_fd_is_user(fd)) {
++			err =
++			    rtdm_safe_copy_from_user(fd, &config_buf,
++						     arg,
++						     sizeof(struct
++							    rtser_config));
++			if (err)
++				return err;
++
++			config = &config_buf;
++		}
++
++		if ((config->config_mask & RTSER_SET_BAUD) &&
++		    (config->baud_rate > clk_get_rate(ctx->port->clk_per) / 16 ||
++		     config->baud_rate <= 0))
++			/* invalid baudrate for this port */
++			return -EINVAL;
++
++		if (config->config_mask & RTSER_SET_TIMESTAMP_HISTORY) {
++			if (config->timestamp_history &
++						RTSER_RX_TIMESTAMP_HISTORY)
++				hist_buf = kmalloc(IN_BUFFER_SIZE *
++						   sizeof(nanosecs_abs_t),
++						   GFP_KERNEL);
++		}
++
++		rt_imx_uart_set_config(ctx, config, &hist_buf);
++
++		if (hist_buf)
++			kfree(hist_buf);
++		break;
++	}
++
++	case RTSER_RTIOC_GET_STATUS: {
++		int status, msr;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		status = ctx->saved_errors | ctx->status;
++		ctx->status = 0;
++		ctx->saved_errors = 0;
++		ctx->ioc_events &= ~RTSER_EVENT_ERRPEND;
++
++		msr = rt_imx_uart_get_msr(ctx);
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd)) {
++			struct rtser_status status_buf;
++
++
++			status_buf.line_status = status;
++			status_buf.modem_status = msr;
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &status_buf,
++						   sizeof(struct
++							  rtser_status));
++		} else {
++			((struct rtser_status *)arg)->line_status = 0;
++			((struct rtser_status *)arg)->modem_status = msr;
++		}
++		break;
++	}
++
++	case RTSER_RTIOC_GET_CONTROL:
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg,
++						   &ctx->mcr_status,
++						   sizeof(int));
++		else
++			*(int *)arg = ctx->mcr_status;
++
++		break;
++
++	case RTSER_RTIOC_SET_CONTROL: {
++		int new_mcr = (long)arg;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		ctx->mcr_status = new_mcr;
++		rt_imx_uart_set_mcr(ctx, new_mcr);
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++	case RTSER_RTIOC_WAIT_EVENT: {
++		struct rtser_event ev = { .rxpend_timestamp = 0 };
++		rtdm_toseq_t timeout_seq;
++
++		if (!rtdm_in_rt_context())
++			return -ENOSYS;
++
++		/* Only one waiter allowed, stop any further attempts here. */
++		if (test_and_set_bit(0, &ctx->ioc_event_lock))
++			return -EBUSY;
++
++		rtdm_toseq_init(&timeout_seq, ctx->config.event_timeout);
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		while (!ctx->ioc_events) {
++			/* Only enable error interrupt
++			 * when the user waits for it.
++			 */
++			if (ctx->config.event_mask & RTSER_EVENT_ERRPEND) {
++				ctx->ier_status |= IER_STAT;
++#ifdef FIXME
++				rt_imx_uart_reg_out(mode, base, IER,
++						 ctx->ier_status);
++#endif
++			}
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			err = rtdm_event_timedwait(&ctx->ioc_event,
++						   ctx->config.event_timeout,
++						   &timeout_seq);
++			if (err) {
++				/* Device has been closed? */
++				if (err == -EIDRM)
++					err = -EBADF;
++				goto wait_unlock_out;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		}
++
++		ev.events = ctx->ioc_events;
++		ctx->ioc_events &=
++		    ~(RTSER_EVENT_MODEMHI | RTSER_EVENT_MODEMLO);
++
++		ev.last_timestamp = ctx->last_timestamp;
++		ev.rx_pending = ctx->in_npend;
++
++		if (ctx->in_history)
++			ev.rxpend_timestamp = ctx->in_history[ctx->in_head];
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		if (rtdm_fd_is_user(fd))
++			err =
++			    rtdm_safe_copy_to_user(fd, arg, &ev,
++						   sizeof(struct
++							  rtser_event));
++			else
++				memcpy(arg, &ev, sizeof(struct rtser_event));
++
++wait_unlock_out:
++		/* release the simple event waiter lock */
++		clear_bit(0, &ctx->ioc_event_lock);
++		break;
++	}
++
++	case RTSER_RTIOC_BREAK_CTL: {
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		rt_imx_uart_break_ctl(ctx, (int)arg);
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++
++#ifdef FIXME
++	case RTIOC_PURGE: {
++		int fcr = 0;
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++		if ((long)arg & RTDM_PURGE_RX_BUFFER) {
++			ctx->in_head = 0;
++			ctx->in_tail = 0;
++			ctx->in_npend = 0;
++			ctx->status = 0;
++			fcr |= FCR_FIFO | FCR_RESET_RX;
++			rt_imx_uart_reg_in(mode, base, RHR);
++		}
++		if ((long)arg & RTDM_PURGE_TX_BUFFER) {
++			ctx->out_head = 0;
++			ctx->out_tail = 0;
++			ctx->out_npend = 0;
++			fcr |= FCR_FIFO | FCR_RESET_TX;
++		}
++		if (fcr) {
++			rt_imx_uart_reg_out(mode, base, FCR, fcr);
++			rt_imx_uart_reg_out(mode, base, FCR,
++					 FCR_FIFO | ctx->config.fifo_depth);
++		}
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++		break;
++	}
++#endif
++
++	default:
++		err = -ENOTTY;
++	}
++
++	return err;
++}
++
++ssize_t rt_imx_uart_read(struct rtdm_fd *fd, void *buf, size_t nbyte)
++{
++	struct rt_imx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t read = 0;
++	int pending;
++	int block;
++	int subblock;
++	int in_pos;
++	char *out_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret = -EAGAIN;	/* for non-blocking read */
++	int nonblocking;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_rw_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
++
++	/* non-blocking is handled separately here */
++	nonblocking = (ctx->config.rx_timeout < 0);
++
++	/* only one reader allowed, stop any further attempts here */
++	if (test_and_set_bit(0, &ctx->in_lock))
++		return -EBUSY;
++
++	rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++	while (1) {
++		if (ctx->status) {
++			if (ctx->status & RTSER_LSR_BREAK_IND)
++				ret = -EPIPE;
++			else
++				ret = -EIO;
++			ctx->saved_errors = ctx->status &
++			    (RTSER_LSR_OVERRUN_ERR | RTSER_LSR_PARITY_ERR |
++			     RTSER_LSR_FRAMING_ERR | RTSER_SOFT_OVERRUN_ERR);
++			ctx->status = 0;
++			break;
++		}
++
++		pending = ctx->in_npend;
++
++		if (pending > 0) {
++			block = subblock = (pending <= nbyte) ? pending : nbyte;
++			in_pos = ctx->in_head;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (in_pos + subblock > IN_BUFFER_SIZE) {
++				/* Treat the block between head and buffer end
++				 * separately.
++				 */
++				subblock = IN_BUFFER_SIZE - in_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_to_user
++					    (fd, out_pos,
++					     &ctx->in_buf[in_pos],
++					     subblock) != 0) {
++						ret = -EFAULT;
++						goto break_unlocked;
++					}
++				} else
++					memcpy(out_pos, &ctx->in_buf[in_pos],
++					       subblock);
++
++				read += subblock;
++				out_pos += subblock;
++
++				subblock = block - subblock;
++				in_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_to_user(fd, out_pos,
++						      &ctx->in_buf[in_pos],
++						      subblock) != 0) {
++					ret = -EFAULT;
++					goto break_unlocked;
++				}
++			} else
++				memcpy(out_pos, &ctx->in_buf[in_pos], subblock);
++
++			read += subblock;
++			out_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->in_head =
++			    (ctx->in_head + block) & (IN_BUFFER_SIZE - 1);
++			ctx->in_npend -= block;
++			if (ctx->in_npend == 0)
++				ctx->ioc_events &= ~RTSER_EVENT_RXPEND;
++
++			if (nbyte == 0)
++				break; /* All requested bytes read. */
++
++			continue;
++		}
++
++		if (nonblocking)
++			/* ret was set to EAGAIN in case of a real
++			 * non-blocking call or contains the error
++			 * returned by rtdm_event_wait[_until]
++			 */
++			break;
++
++		ctx->in_nwait = nbyte;
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret = rtdm_event_timedwait(&ctx->in_event,
++					   ctx->config.rx_timeout,
++					   &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				 * return immediately.
++				 */
++				return -EBADF;
++			}
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			nonblocking = 1;
++			if (ctx->in_npend > 0) {
++				/* Final turn: collect pending bytes
++				 * before exit.
++				 */
++				continue;
++			}
++
++			ctx->in_nwait = 0;
++			break;
++		}
++
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++	}
++
++	rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++break_unlocked:
++	/* Release the simple reader lock, */
++	clear_bit(0, &ctx->in_lock);
++
++	if ((read > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			   (ret == -ETIMEDOUT)))
++		ret = read;
++
++	return ret;
++}
++
++static ssize_t rt_imx_uart_write(struct rtdm_fd *fd, const void *buf,
++				size_t nbyte)
++{
++	struct rt_imx_uart_ctx *ctx;
++	rtdm_lockctx_t lock_ctx;
++	size_t written = 0;
++	int free;
++	int block;
++	int subblock;
++	int out_pos;
++	char *in_pos = (char *)buf;
++	rtdm_toseq_t timeout_seq;
++	ssize_t ret;
++
++	if (nbyte == 0)
++		return 0;
++
++	if (rtdm_fd_is_user(fd) && !rtdm_read_user_ok(fd, buf, nbyte))
++		return -EFAULT;
++
++	ctx = rtdm_fd_to_private(fd);
++
++	rtdm_toseq_init(&timeout_seq, ctx->config.rx_timeout);
++
++	/* Make write operation atomic. */
++	ret = rtdm_mutex_timedlock(&ctx->out_lock, ctx->config.rx_timeout,
++				   &timeout_seq);
++	if (ret)
++		return ret;
++
++	while (nbyte > 0) {
++		rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++		free = OUT_BUFFER_SIZE - ctx->out_npend;
++
++		if (free > 0) {
++			block = subblock = (nbyte <= free) ? nbyte : free;
++			out_pos = ctx->out_tail;
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++			/* Do we have to wrap around the buffer end? */
++			if (out_pos + subblock > OUT_BUFFER_SIZE) {
++				/* Treat the block between head and buffer
++				 * end separately.
++				 */
++				subblock = OUT_BUFFER_SIZE - out_pos;
++
++				if (rtdm_fd_is_user(fd)) {
++					if (rtdm_copy_from_user
++					    (fd,
++					     &ctx->out_buf[out_pos],
++					     in_pos, subblock) != 0) {
++						ret = -EFAULT;
++						break;
++					}
++				} else
++					memcpy(&ctx->out_buf[out_pos], in_pos,
++					       subblock);
++
++				written += subblock;
++				in_pos += subblock;
++
++				subblock = block - subblock;
++				out_pos = 0;
++			}
++
++			if (rtdm_fd_is_user(fd)) {
++				if (rtdm_copy_from_user
++				    (fd, &ctx->out_buf[out_pos],
++				     in_pos, subblock) != 0) {
++					ret = -EFAULT;
++					break;
++				}
++			} else
++				memcpy(&ctx->out_buf[out_pos], in_pos, block);
++
++			written += subblock;
++			in_pos += subblock;
++			nbyte -= block;
++
++			rtdm_lock_get_irqsave(&ctx->lock, lock_ctx);
++
++			ctx->out_tail =
++			    (ctx->out_tail + block) & (OUT_BUFFER_SIZE - 1);
++			ctx->out_npend += block;
++
++			ctx->ier_status |= IER_TX;
++			rt_imx_uart_start_tx(ctx);
++
++			rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++			continue;
++		}
++
++		rtdm_lock_put_irqrestore(&ctx->lock, lock_ctx);
++
++		ret = rtdm_event_timedwait(&ctx->out_event,
++					   ctx->config.tx_timeout,
++					   &timeout_seq);
++		if (ret < 0) {
++			if (ret == -EIDRM) {
++				/* Device has been closed -
++				 * return immediately.
++				 */
++				ret = -EBADF;
++			}
++			break;
++		}
++	}
++
++	rtdm_mutex_unlock(&ctx->out_lock);
++
++	if ((written > 0) && ((ret == 0) || (ret == -EAGAIN) ||
++			      (ret == -ETIMEDOUT)))
++		ret = written;
++
++	return ret;
++}
++
++static struct rtdm_driver imx_uart_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(imx_uart,
++						    RTDM_CLASS_SERIAL,
++						    RTDM_SUBCLASS_16550A,
++						    RTSER_PROFILE_VER),
++	.device_count		= RT_IMX_UART_MAX,
++	.device_flags		= RTDM_NAMED_DEVICE | RTDM_EXCLUSIVE,
++	.context_size		= sizeof(struct rt_imx_uart_ctx),
++	.ops = {
++		.open		= rt_imx_uart_open,
++		.close		= rt_imx_uart_close,
++		.ioctl_rt	= rt_imx_uart_ioctl,
++		.ioctl_nrt	= rt_imx_uart_ioctl,
++		.read_rt	= rt_imx_uart_read,
++		.write_rt	= rt_imx_uart_write,
++	},
++};
++
++
++#ifdef CONFIG_OF
++
++/*
++ * This function returns 1 iff pdev isn't a device instatiated by dt, 0 iff it
++ * could successfully get all information from dt or a negative errno.
++ */
++static int rt_imx_uart_probe_dt(struct rt_imx_uart_port *port,
++				struct platform_device *pdev)
++{
++	struct device_node *np = pdev->dev.of_node;
++	const struct of_device_id *of_id =
++			of_match_device(rt_imx_uart_dt_ids, &pdev->dev);
++	int ret;
++
++	if (!np)
++		/* no device tree device */
++		return 1;
++
++	ret = of_alias_get_id(np, "serial");
++	if (ret < 0) {
++		dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret);
++		return ret;
++	}
++
++	pdev->id = ret;
++
++	if (of_get_property(np, "uart-has-rtscts", NULL) ||
++	    of_get_property(np, "fsl,uart-has-rtscts", NULL) /* deprecated */)
++		port->have_rtscts = 1;
++	if (of_get_property(np, "fsl,irda-mode", NULL))
++		dev_warn(&pdev->dev, "IRDA not yet supported\n");
++
++	if (of_get_property(np, "fsl,dte-mode", NULL))
++		port->use_dcedte = 1;
++
++	port->devdata = of_id->data;
++
++	return 0;
++}
++#else
++static inline int rt_imx_uart_probe_dt(struct rt_imx_uart_port *port,
++				       struct platform_device *pdev)
++{
++	return 1;
++}
++#endif
++
++static void rt_imx_uart_probe_pdata(struct rt_imx_uart_port *port,
++				    struct platform_device *pdev)
++{
++	struct imxuart_platform_data *pdata = dev_get_platdata(&pdev->dev);
++
++	port->devdata = (struct imx_uart_data  *) pdev->id_entry->driver_data;
++
++	if (!pdata)
++		return;
++
++	if (pdata->flags & IMXUART_HAVE_RTSCTS)
++		port->have_rtscts = 1;
++}
++
++static int rt_imx_uart_probe(struct platform_device *pdev)
++{
++	struct rtdm_device *dev;
++	struct rt_imx_uart_port *port;
++	struct resource *res;
++	int ret;
++
++	port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL);
++	if (!port)
++		return -ENOMEM;
++
++	ret = rt_imx_uart_probe_dt(port, pdev);
++	if (ret > 0)
++		rt_imx_uart_probe_pdata(port, pdev);
++	else if (ret < 0)
++		return ret;
++
++	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	if (!res)
++		return -ENODEV;
++
++	port->irq = platform_get_irq(pdev, 0);
++
++	if (port->irq <= 0)
++		return -ENODEV;
++
++	port->membase = devm_ioremap_resource(&pdev->dev, res);
++	if (IS_ERR(port->membase))
++		return PTR_ERR(port->membase);
++
++	dev = &port->rtdm_dev;
++	dev->driver = &imx_uart_driver;
++	dev->label = "rtser%d";
++	dev->device_data = port;
++
++	if (!tx_fifo[pdev->id] || tx_fifo[pdev->id] > TX_FIFO_SIZE)
++		port->tx_fifo = TX_FIFO_SIZE;
++	else
++		port->tx_fifo = tx_fifo[pdev->id];
++
++	port->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
++	if (IS_ERR(port->clk_ipg))
++		return PTR_ERR(port->clk_ipg);
++
++	port->clk_per = devm_clk_get(&pdev->dev, "per");
++	if (IS_ERR(port->clk_per))
++		return PTR_ERR(port->clk_per);
++
++	clk_prepare_enable(port->clk_ipg);
++	clk_prepare_enable(port->clk_per);
++	port->uartclk = clk_get_rate(port->clk_per);
++
++	port->use_hwflow = 1;
++
++	ret = rtdm_dev_register(dev);
++	if (ret)
++		return ret;
++
++	platform_set_drvdata(pdev, port);
++
++	pr_info("%s on IMX UART%d: membase=0x%p irq=%d uartclk=%d\n",
++	       dev->name, pdev->id, port->membase, port->irq, port->uartclk);
++	return 0;
++}
++
++static int rt_imx_uart_remove(struct platform_device *pdev)
++{
++	struct imxuart_platform_data *pdata;
++	struct rt_imx_uart_port *port = platform_get_drvdata(pdev);
++	struct rtdm_device *dev = &port->rtdm_dev;
++
++	pdata = pdev->dev.platform_data;
++	platform_set_drvdata(pdev, NULL);
++
++	clk_disable_unprepare(port->clk_ipg);
++	clk_disable_unprepare(port->clk_per);
++	rtdm_dev_unregister(dev);
++
++	return 0;
++}
++
++static struct platform_driver rt_imx_uart_driver = {
++	.probe = rt_imx_uart_probe,
++	.remove	= rt_imx_uart_remove,
++	.id_table = rt_imx_uart_id_table,
++	.driver = {
++		.name = DRIVER_NAME,
++		.owner = THIS_MODULE,
++		.of_match_table = rt_imx_uart_dt_ids,
++	},
++	.prevent_deferred_probe = true,
++};
++
++
++static int __init rt_imx_uart_init(void)
++{
++	int ret;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	ret = platform_driver_register(&rt_imx_uart_driver);
++	if (ret) {
++		pr_err("%s; Could not register  driver (err=%d)\n",
++			__func__, ret);
++	}
++
++	return ret;
++}
++
++static void __exit rt_imx_uart_exit(void)
++{
++	platform_driver_unregister(&rt_imx_uart_driver);
++}
++
++module_init(rt_imx_uart_init);
++module_exit(rt_imx_uart_exit);
+--- linux/drivers/xenomai/can/rtcan_raw.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_raw.h	2021-04-07 16:01:26.481635232 +0800
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_RAW_H_
++#define __RTCAN_RAW_H_
++
++#ifdef __KERNEL__
++
++int rtcan_raw_ioctl_dev(struct rtdm_fd *fd, int request, void *arg);
++
++int rtcan_raw_check_filter(struct rtcan_socket *sock,
++			   int ifindex, struct rtcan_filter_list *flist);
++int rtcan_raw_add_filter(struct rtcan_socket *sock, int ifindex);
++void rtcan_raw_remove_filter(struct rtcan_socket *sock);
++
++void rtcan_rcv(struct rtcan_device *rtcandev, struct rtcan_skb *skb);
++
++void rtcan_loopback(struct rtcan_device *rtcandev);
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++#define rtcan_loopback_enabled(sock) (sock->loopback)
++#define rtcan_loopback_pending(dev) (dev->tx_socket)
++#else /* !CONFIG_XENO_DRIVERS_CAN_LOOPBACK */
++#define rtcan_loopback_enabled(sock) (0)
++#define rtcan_loopback_pending(dev) (0)
++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR
++void __rtcan_raw_enable_bus_err(struct rtcan_socket *sock);
++static inline void rtcan_raw_enable_bus_err(struct rtcan_socket *sock)
++{
++    if ((sock->err_mask & CAN_ERR_BUSERROR))
++	__rtcan_raw_enable_bus_err(sock);
++}
++#else
++#define rtcan_raw_enable_bus_err(sock)
++#endif
++
++int __init rtcan_raw_proto_register(void);
++void __exit rtcan_raw_proto_unregister(void);
++
++#endif  /* __KERNEL__ */
++
++#endif  /* __RTCAN_RAW_H_ */
+--- linux/drivers/xenomai/can/rtcan_socket.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_socket.h	2021-04-07 16:01:26.475635241 +0800
+@@ -0,0 +1,207 @@
++/*
++ * Copyright (C) 2005,2006 Sebastian Smolorz
++ *                         <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * Derived from RTnet project file include/stack/socket.h:
++ *
++ * Copyright (C) 1999       Lineo, Inc
++ *               1999, 2002 David A. Schleef <ds@schleef.org>
++ *               2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_SOCKET_H_
++#define __RTCAN_SOCKET_H_
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++
++
++
++/* This MUST BE 2^N */
++#define RTCAN_RXBUF_SIZE          CONFIG_XENO_DRIVERS_CAN_RXBUF_SIZE
++
++/* Size of timestamp */
++#define RTCAN_TIMESTAMP_SIZE      sizeof(nanosecs_abs_t)
++
++/* Bit in the can_dlc member of struct ring_buffer_frame used to indicate
++ * whether a frame has got a timestamp or not */
++#define RTCAN_HAS_TIMESTAMP       0x80
++
++/* Mask for clearing bit RTCAN_HAS_TIMESTAMP */
++#define RTCAN_HAS_NO_TIMESTAMP    0x7F
++
++#define RTCAN_SOCK_UNBOUND        -1
++#define RTCAN_FLIST_NO_FILTER     (struct rtcan_filter_list *)-1
++#define rtcan_flist_no_filter(f)  ((f) == RTCAN_FLIST_NO_FILTER)
++#define rtcan_sock_has_filter(s)  ((s)->flistlen > 0)
++#define rtcan_sock_is_bound(s)    ((s)->flistlen >= 0)
++
++/*
++ *  Internal frame representation within the ring buffer of a
++ *  struct rtcan_socket.
++ *
++ *  The data array is of arbitrary size when the frame is actually
++ *  stored in a socket's ring buffer. The timestamp member exists if the
++ *  socket was set to take timestamps (then it follows direcly after the
++ *  arbitrary-sized data array), otherwise it does not exist.
++ */
++struct rtcan_rb_frame {
++
++    /* CAN ID representation equal to struct can_frame */
++    uint32_t            can_id;
++
++    /* Interface index from which the frame originates */
++    unsigned char       can_ifindex;
++
++    /* DLC (between 0 and 15) and mark if frame has got a timestamp. The
++     * existence of a timestamp is indicated by the RTCAN_HAS_TIMESTAMP
++     * bit. */
++    unsigned char       can_dlc;
++
++    /* Data bytes */
++    uint8_t             data[8];
++
++    /* High precision timestamp indicating when the frame was received.
++     * Exists when RTCAN_HAS_TIMESTAMP bit in can_dlc is set. */
++    nanosecs_abs_t      timestamp;
++
++} __attribute__ ((packed));
++
++
++/* Size of struct rtcan_rb_frame without any data bytes and timestamp */
++#define EMPTY_RB_FRAME_SIZE \
++    sizeof(struct rtcan_rb_frame) - 8 - RTCAN_TIMESTAMP_SIZE
++
++
++/*
++ *  Wrapper structure around a struct rtcan_rb_frame with actual size
++ *  of the frame.
++ *
++ *  This isn't really a socket buffer but only a sort of. It is constructed
++ *  within the interrupt routine when a CAN frame is read from
++ *  the controller. Then it's passed to the reception handler where only
++ *  rb_frame finds its way to the sockets' ring buffers.
++ */
++struct rtcan_skb {
++    /* Actual size of following rb_frame (without timestamp) */
++    size_t                rb_frame_size;
++    /* Frame to be stored in the sockets' ring buffers (as is) */
++    struct rtcan_rb_frame rb_frame;
++};
++
++struct rtcan_filter_list {
++    int flistlen;
++    struct can_filter flist[1];
++};
++
++/*
++ * Internal CAN socket structure.
++ *
++ * Every socket has an internal ring buffer for incoming messages. A message
++ * is not stored as a struct can_frame (in order to save buffer space)
++ * but as struct rtcan_rb_frame of arbitrary length depending on the
++ * actual payload.
++ */
++struct rtcan_socket {
++
++    struct list_head    socket_list;
++
++    unsigned long	flags;
++
++    /* Transmission timeout in ns. Protected by rtcan_socket_lock
++     * in all socket structures. */
++    nanosecs_rel_t      tx_timeout;
++
++    /* Reception timeout in ns. Protected by rtcan_socket_lock
++     * in all socket structures. */
++    nanosecs_rel_t      rx_timeout;
++
++
++    /* Begin of first frame data in the ring buffer. Protected by
++     * rtcan_socket_lock in all socket structures. */
++    int                 recv_head;
++
++    /* End of last frame data in the ring buffer. I.e. position of first
++     * free byte in the ring buffer. Protected by
++     * rtcan_socket_lock in all socket structures. */
++    int                 recv_tail;
++
++    /* Ring buffer for incoming CAN frames. Protected by
++     * rtcan_socket_lock in all socket structures. */
++    unsigned char       recv_buf[RTCAN_RXBUF_SIZE];
++
++    /* Semaphore for receivers and incoming messages */
++    rtdm_sem_t          recv_sem;
++
++
++    /* All senders waiting to be able to send
++     * via this socket are queued here */
++    struct list_head    tx_wait_head;
++
++
++    /* Interface index the socket is bound to. Protected by
++     * rtcan_recv_list_lock in all socket structures. */
++    atomic_t            ifindex;
++
++    /* Length of filter list. I.e. how many entries does this socket occupy in
++     * the reception list. 0 if unbound. Protected by
++     * rtcan_recv_list_lock in all socket structures. */
++    int                 flistlen;
++
++    uint32_t            err_mask;
++
++    uint32_t            rx_buf_full;
++
++    struct rtcan_filter_list *flist;
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++    int loopback;
++#endif
++};
++
++
++
++/*
++ *  Get the RTDM context from a struct rtcan_socket
++ *
++ *  @param[in] sock Pointer to socket structure
++ *
++ *  @return Pointer to a file descriptor of type struct rtdm_fd this socket
++ *          belongs to
++ */
++/* FIXME: to be replaced with container_of */
++static inline struct rtdm_fd *rtcan_socket_to_fd(struct rtcan_socket *sock)
++{
++    return rtdm_private_to_fd(sock);
++}
++
++/* Spinlock protecting the ring buffers and the timeouts of all
++ * rtcan_sockets */
++extern rtdm_lock_t rtcan_socket_lock;
++extern struct list_head rtcan_socket_list;
++
++extern void rtcan_socket_init(struct rtdm_fd *fd);
++extern void rtcan_socket_cleanup(struct rtdm_fd *fd);
++
++
++#endif  /* __RTCAN_SOCKET_H_ */
+--- linux/drivers/xenomai/can/rtcan_dev.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_dev.h	2021-04-07 16:01:26.470635248 +0800
+@@ -0,0 +1,205 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from RTnet project file stack/include/rtdev.h:
++ *
++ * Copyright (C) 1999       Lineo, Inc
++ *               1999, 2002 David A. Schleef <ds@schleef.org>
++ *               2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#ifndef __RTCAN_DEV_H_
++#define __RTCAN_DEV_H_
++
++
++#ifdef __KERNEL__
++
++#include <asm/atomic.h>
++#include <linux/netdevice.h>
++#include <linux/semaphore.h>
++
++#include "rtcan_list.h"
++
++
++/* Number of MSCAN devices the driver can handle */
++#define RTCAN_MAX_DEVICES    CONFIG_XENO_DRIVERS_CAN_MAX_DEVICES
++
++/* Maximum number of single filters per controller which can be registered
++ * for reception at the same time using Bind */
++#define RTCAN_MAX_RECEIVERS  CONFIG_XENO_DRIVERS_CAN_MAX_RECEIVERS
++
++/* Suppress handling of refcount if module support is not enabled
++ * or modules cannot be unloaded */
++
++#if defined(CONFIG_MODULES) && defined(CONFIG_MODULE_UNLOAD)
++#define RTCAN_USE_REFCOUNT
++#endif
++
++/*
++ * CAN harware-dependent bit-timing constant
++ *
++ * Used for calculating and checking bit-timing parameters
++ */
++struct can_bittiming_const {
++	char name[16];		/* Name of the CAN controller hardware */
++	__u32 tseg1_min;	/* Time segement 1 = prop_seg + phase_seg1 */
++	__u32 tseg1_max;
++	__u32 tseg2_min;	/* Time segement 2 = phase_seg2 */
++	__u32 tseg2_max;
++	__u32 sjw_max;		/* Synchronisation jump width */
++	__u32 brp_min;		/* Bit-rate prescaler */
++	__u32 brp_max;
++	__u32 brp_inc;
++};
++
++struct rtcan_device {
++    unsigned int        version;
++
++    char                name[IFNAMSIZ];
++
++    char                *ctrl_name; /* Name of CAN controller */
++    char                *board_name;/* Name of CAN board */
++
++    unsigned long       base_addr;  /* device I/O address   */
++    rtdm_irq_t          irq_handle; /* RTDM IRQ handle */
++
++    int                 ifindex;
++#ifdef RTCAN_USE_REFCOUNT
++    atomic_t            refcount;
++#endif
++
++    void                *priv;      /* pointer to chip private data */
++
++    void                *board_priv;/* pointer to board private data*/
++
++    struct semaphore    nrt_lock;   /* non-real-time locking        */
++
++    /* Spinlock for all devices (but not for all attributes) and also for HW
++     * access to all CAN controllers
++     */
++    rtdm_lock_t         device_lock;
++
++    /* Acts as a mutex allowing only one sender to write to the MSCAN
++     * simultaneously. Created when the controller goes into operating mode,
++     * destroyed if it goes into reset mode. */
++    rtdm_sem_t          tx_sem;
++
++    /* Baudrate of this device. Protected by device_lock in all device
++     * structures. */
++    unsigned int        can_sys_clock;
++
++
++    /* Baudrate of this device. Protected by device_lock in all device
++     * structures. */
++    can_baudrate_t      baudrate;
++
++    struct can_bittime  bit_time;
++    const struct can_bittiming_const *bittiming_const;
++
++    /* State which the controller is in. Protected by device_lock in all
++     * device structures. */
++    can_state_t state;
++
++    /* State which the controller was before sleeping. Protected by
++     * device_lock in all device structures. */
++    can_state_t          state_before_sleep;
++
++    /* Controller specific settings. Protected by device_lock in all
++     * device structures. */
++    can_ctrlmode_t       ctrl_mode;
++
++    /* Device operations */
++    int                 (*hard_start_xmit)(struct rtcan_device *dev,
++					   struct can_frame *frame);
++    int                 (*do_set_mode)(struct rtcan_device *dev,
++				       can_mode_t mode,
++				       rtdm_lockctx_t *lock_ctx);
++    can_state_t         (*do_get_state)(struct rtcan_device *dev);
++    int                 (*do_set_bit_time)(struct rtcan_device *dev,
++					   struct can_bittime *bit_time,
++					   rtdm_lockctx_t *lock_ctx);
++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR
++    void                (*do_enable_bus_err)(struct rtcan_device *dev);
++#endif
++
++    /* Reception list head. This list contains all filters which have been
++     * registered via a bind call. */
++    struct rtcan_recv               *recv_list;
++
++    /* Empty list head. This list contains all empty entries not needed
++     * by the reception list and therefore is disjunctive with it. */
++    struct rtcan_recv               *empty_list;
++
++    /* Preallocated array for the list entries. To increase cache
++     * locality all list elements are kept in this array. */
++    struct rtcan_recv               receivers[RTCAN_MAX_RECEIVERS];
++
++    /* Indicates the length of the empty list */
++    int                             free_entries;
++
++    /* A few statistics counters */
++    unsigned int tx_count;
++    unsigned int rx_count;
++    unsigned int err_count;
++
++#ifdef CONFIG_PROC_FS
++    struct proc_dir_entry *proc_root;
++#endif
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++    struct rtcan_skb tx_skb;
++    struct rtcan_socket *tx_socket;
++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */
++};
++
++
++extern struct semaphore rtcan_devices_nrt_lock;
++
++
++void rtcan_dev_free(struct rtcan_device *dev);
++
++int rtcan_dev_register(struct rtcan_device *dev);
++int rtcan_dev_unregister(struct rtcan_device *dev);
++
++struct rtcan_device *rtcan_dev_alloc(int sizeof_priv, int sizeof_board_priv);
++void rtcan_dev_alloc_name (struct rtcan_device *dev, const char *name_mask);
++
++struct rtcan_device *rtcan_dev_get_by_name(const char *if_name);
++struct rtcan_device *rtcan_dev_get_by_index(int ifindex);
++
++#ifdef RTCAN_USE_REFCOUNT
++#define rtcan_dev_reference(dev)      atomic_inc(&(dev)->refcount)
++#define rtcan_dev_dereference(dev)    atomic_dec(&(dev)->refcount)
++#else
++#define rtcan_dev_reference(dev)      do {} while(0)
++#define rtcan_dev_dereference(dev)    do {} while(0)
++#endif
++
++#ifdef CONFIG_PROC_FS
++int rtcan_dev_create_proc(struct rtcan_device* dev);
++void rtcan_dev_remove_proc(struct rtcan_device* dev);
++#else /* !CONFIG_PROC_FS */
++static inline int rtcan_dev_create_proc(struct rtcan_device* dev)
++{
++	return 0;
++}
++static inline void rtcan_dev_remove_proc(struct rtcan_device* dev) { }
++#endif /* !CONFIG_PROC_FS */
++
++#endif  /* __KERNEL__ */
++
++#endif  /* __RTCAN_DEV_H_ */
+--- linux/drivers/xenomai/can/rtcan_raw.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_raw.c	2021-04-07 16:01:26.465635255 +0800
+@@ -0,0 +1,1003 @@
++/*
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                          <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * Parts of this software are based on the following:
++ *
++ * - RTAI CAN device driver for SJA1000 controllers by Jan Kiszka
++ *
++ * - linux-can.patch, a CAN socket framework for Linux,
++ *   Copyright (C) 2004, 2005, Robert Schwebel, Benedikt Spranger,
++ *   Marc Kleine-Budde, Sascha Hauer, Pengutronix
++ *
++ * - RTnet (www.rtnet.org)
++ *
++ * - serial device driver and profile included in Xenomai (RTDM),
++ *   Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/stringify.h>
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++#include "rtcan_version.h"
++#include "rtcan_socket.h"
++#include "rtcan_list.h"
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++#include "rtcan_internal.h"
++
++
++/*
++ * Set if socket wants to receive a high precision timestamp together with
++ * CAN frames
++ */
++#define RTCAN_GET_TIMESTAMP         0
++
++
++MODULE_AUTHOR("RT-Socket-CAN Development Team");
++MODULE_DESCRIPTION("RTDM CAN raw socket device driver");
++MODULE_VERSION(__stringify(RTCAN_MAJOR_VER)
++	       __stringify(RTCAN_MINOR_VER)
++	       __stringify(RTCAN_BUGFIX_VER));
++MODULE_LICENSE("GPL");
++
++void rtcan_tx_push(struct rtcan_device *dev, struct rtcan_socket *sock,
++		   can_frame_t *frame);
++
++static inline int rtcan_accept_msg(uint32_t can_id, can_filter_t *filter)
++{
++    if ((filter->can_mask & CAN_INV_FILTER))
++	return ((can_id & filter->can_mask) != filter->can_id);
++    else
++	return ((can_id & filter->can_mask) == filter->can_id);
++}
++
++
++static void rtcan_rcv_deliver(struct rtcan_recv *recv_listener,
++			      struct rtcan_skb *skb)
++{
++    int size_free;
++    size_t cpy_size, first_part_size;
++    struct rtcan_rb_frame *frame = &skb->rb_frame;
++    struct rtdm_fd *fd = rtdm_private_to_fd(recv_listener->sock);
++    struct rtcan_socket *sock;
++
++    if (rtdm_fd_lock(fd) < 0)
++	return;
++
++    sock = recv_listener->sock;
++
++    cpy_size = skb->rb_frame_size;
++    /* Check if socket wants to receive a timestamp */
++    if (test_bit(RTCAN_GET_TIMESTAMP, &sock->flags)) {
++	cpy_size += RTCAN_TIMESTAMP_SIZE;
++	frame->can_dlc |= RTCAN_HAS_TIMESTAMP;
++    } else
++	frame->can_dlc &= RTCAN_HAS_NO_TIMESTAMP;
++
++    /* Calculate free size in the ring buffer */
++    size_free = sock->recv_head - sock->recv_tail;
++    if (size_free <= 0)
++	size_free += RTCAN_RXBUF_SIZE;
++
++    /* Test if ring buffer has enough space. */
++    if (size_free > cpy_size) {
++	/* Check if we must wrap around the end of buffer */
++	if ((sock->recv_tail + cpy_size) > RTCAN_RXBUF_SIZE) {
++	    /* Wrap around: Two memcpy operations */
++
++	    first_part_size = RTCAN_RXBUF_SIZE - sock->recv_tail;
++
++	    memcpy(&sock->recv_buf[sock->recv_tail], (void *)frame,
++		   first_part_size);
++	    memcpy(&sock->recv_buf[0], (void *)frame +
++		   first_part_size, cpy_size - first_part_size);
++	} else
++	    memcpy(&sock->recv_buf[sock->recv_tail], (void *)frame,
++		   cpy_size);
++
++	/* Adjust tail */
++	sock->recv_tail = (sock->recv_tail + cpy_size) &
++	    (RTCAN_RXBUF_SIZE - 1);
++
++	/*Notify the delivery of the message */
++	rtdm_sem_up(&sock->recv_sem);
++
++    } else {
++	/* Overflow of socket's ring buffer! */
++	sock->rx_buf_full++;
++	RTCAN_RTDM_DBG("rtcan: socket buffer overflow, message discarded\n");
++    }
++
++    rtdm_fd_unlock(fd);
++}
++
++
++void rtcan_rcv(struct rtcan_device *dev, struct rtcan_skb *skb)
++{
++    nanosecs_abs_t timestamp = rtdm_clock_read();
++    /* Entry in reception list, begin with head */
++    struct rtcan_recv *recv_listener = dev->recv_list;
++    struct rtcan_rb_frame *frame = &skb->rb_frame;
++
++    /* Copy timestamp to skb */
++    memcpy((void *)&skb->rb_frame + skb->rb_frame_size,
++	   &timestamp, RTCAN_TIMESTAMP_SIZE);
++
++    if ((frame->can_id & CAN_ERR_FLAG)) {
++	dev->err_count++;
++	while (recv_listener != NULL) {
++	    if ((frame->can_id & recv_listener->sock->err_mask)) {
++		recv_listener->match_count++;
++		rtcan_rcv_deliver(recv_listener, skb);
++	    }
++	    recv_listener = recv_listener->next;
++	}
++    } else {
++	dev->rx_count++;
++	while (recv_listener != NULL) {
++	    if (rtcan_accept_msg(frame->can_id, &recv_listener->can_filter)) {
++		recv_listener->match_count++;
++		rtcan_rcv_deliver(recv_listener, skb);
++	    }
++	    recv_listener = recv_listener->next;
++	}
++    }
++}
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++
++void rtcan_tx_push(struct rtcan_device *dev, struct rtcan_socket *sock,
++		   can_frame_t *frame)
++{
++    struct rtcan_rb_frame *rb_frame = &dev->tx_skb.rb_frame;
++
++    RTCAN_ASSERT(dev->tx_socket == 0,
++		 rtdm_printk("(%d) TX skb still in use", dev->ifindex););
++
++    rb_frame->can_id = frame->can_id;
++    rb_frame->can_dlc = frame->can_dlc;
++    dev->tx_skb.rb_frame_size = EMPTY_RB_FRAME_SIZE;
++    if (frame->can_dlc && !(frame->can_id & CAN_RTR_FLAG)) {
++	memcpy(rb_frame->data, frame->data, frame->can_dlc);
++	dev->tx_skb.rb_frame_size += frame->can_dlc;
++    }
++    rb_frame->can_ifindex = dev->ifindex;
++    dev->tx_socket = sock;
++}
++
++void rtcan_loopback(struct rtcan_device *dev)
++{
++    nanosecs_abs_t timestamp = rtdm_clock_read();
++    /* Entry in reception list, begin with head */
++    struct rtcan_recv *recv_listener = dev->recv_list;
++    struct rtcan_rb_frame *frame = &dev->tx_skb.rb_frame;
++
++    memcpy((void *)&dev->tx_skb.rb_frame + dev->tx_skb.rb_frame_size,
++	   &timestamp, RTCAN_TIMESTAMP_SIZE);
++
++    while (recv_listener != NULL) {
++	dev->rx_count++;
++	if ((dev->tx_socket != recv_listener->sock) &&
++	    rtcan_accept_msg(frame->can_id, &recv_listener->can_filter)) {
++	    recv_listener->match_count++;
++	    rtcan_rcv_deliver(recv_listener, &dev->tx_skb);
++	}
++	recv_listener = recv_listener->next;
++    }
++    dev->tx_socket = NULL;
++}
++
++EXPORT_SYMBOL_GPL(rtcan_loopback);
++
++#endif /* CONFIG_XENO_DRIVERS_CAN_LOOPBACK */
++
++
++int rtcan_raw_socket(struct rtdm_fd *fd, int protocol)
++{
++    /* Only protocol CAN_RAW is supported */
++    if (protocol != CAN_RAW && protocol != 0)
++	return -EPROTONOSUPPORT;
++
++    rtcan_socket_init(fd);
++
++    return 0;
++}
++
++
++static inline void rtcan_raw_unbind(struct rtcan_socket *sock)
++{
++    rtcan_raw_remove_filter(sock);
++    if (!rtcan_flist_no_filter(sock->flist) && sock->flist)
++	rtdm_free(sock->flist);
++    sock->flist = NULL;
++    sock->flistlen = RTCAN_SOCK_UNBOUND;
++    atomic_set(&sock->ifindex, 0);
++}
++
++
++static void rtcan_raw_close(struct rtdm_fd *fd)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    rtdm_lockctx_t lock_ctx;
++
++    /* Get lock for reception lists */
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++
++    /* Check if socket is bound */
++    if (rtcan_sock_is_bound(sock))
++	rtcan_raw_unbind(sock);
++
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++
++    rtcan_socket_cleanup(fd);
++}
++
++
++int rtcan_raw_bind(struct rtdm_fd *fd,
++		   struct sockaddr_can *scan)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    rtdm_lockctx_t lock_ctx;
++    int ret = 0;
++
++    /* Check address family and
++       check if given length of filter list is plausible */
++    if (scan->can_family != AF_CAN)
++	return -EINVAL;
++    /* Check range of ifindex, must be between 0 and RTCAN_MAX_DEVICES */
++    if (scan->can_ifindex < 0 || scan->can_ifindex > RTCAN_MAX_DEVICES)
++	return -ENODEV;
++
++    /* Get lock for reception lists */
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++
++    if ((ret = rtcan_raw_check_filter(sock, scan->can_ifindex,
++				      sock->flist)))
++	goto out;
++    rtcan_raw_remove_filter(sock);
++    /* Add filter and mark socket as bound */
++    sock->flistlen = rtcan_raw_add_filter(sock, scan->can_ifindex);
++
++    /* Set new interface index the socket is now bound to */
++    atomic_set(&sock->ifindex, scan->can_ifindex);
++
++ out:
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++    return ret;
++}
++
++
++static int rtcan_raw_setsockopt(struct rtdm_fd *fd,
++				struct _rtdm_setsockopt_args *so)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    struct rtcan_filter_list *flist;
++    int ifindex = atomic_read(&sock->ifindex);
++    rtdm_lockctx_t lock_ctx;
++    can_err_mask_t err_mask;
++    int val, ret = 0;
++
++    if (so->level != SOL_CAN_RAW)
++	return -ENOPROTOOPT;
++
++    switch (so->optname) {
++
++    case CAN_RAW_FILTER:
++	if (so->optlen == 0) {
++	    flist = RTCAN_FLIST_NO_FILTER;
++	} else {
++	    int flistlen;
++	    flistlen = so->optlen / sizeof(struct can_filter);
++	    if (flistlen < 1 || flistlen > RTCAN_MAX_RECEIVERS ||
++		so->optlen % sizeof(struct can_filter) != 0)
++		return -EINVAL;
++
++	    flist = (struct rtcan_filter_list *)rtdm_malloc(so->optlen + sizeof(int));
++	    if (flist == NULL)
++		return -ENOMEM;
++	    if (rtdm_fd_is_user(fd)) {
++		if (!rtdm_read_user_ok(fd, so->optval, so->optlen) ||
++		    rtdm_copy_from_user(fd, flist->flist,
++					so->optval, so->optlen)) {
++		    rtdm_free(flist);
++		    return -EFAULT;
++		}
++	    } else
++		memcpy(flist->flist, so->optval, so->optlen);
++	    flist->flistlen = flistlen;
++	}
++
++	/* Get lock for reception lists */
++	rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++
++	/* Check if there is space for the filter list if already bound */
++	if (rtcan_sock_is_bound(sock)) {
++	    if (!rtcan_flist_no_filter(flist) &&
++		(ret = rtcan_raw_check_filter(sock, ifindex, flist))) {
++		rtdm_free(flist);
++		goto out_filter;
++	    }
++	    rtcan_raw_remove_filter(sock);
++	}
++
++	/* Remove previous list and attach the new one */
++	if (!rtcan_flist_no_filter(flist) && sock->flist)
++	    rtdm_free(sock->flist);
++	sock->flist = flist;
++
++	if (rtcan_sock_is_bound(sock))
++	    sock->flistlen = rtcan_raw_add_filter(sock, ifindex);
++
++    out_filter:
++	/* Release lock for reception lists */
++	rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++	break;
++
++    case CAN_RAW_ERR_FILTER:
++
++	if (so->optlen != sizeof(can_err_mask_t))
++	    return -EINVAL;
++
++	if (rtdm_fd_is_user(fd)) {
++	    if (!rtdm_read_user_ok(fd, so->optval, so->optlen) ||
++		rtdm_copy_from_user(fd, &err_mask, so->optval, so->optlen))
++		return -EFAULT;
++	} else
++	    memcpy(&err_mask, so->optval, so->optlen);
++
++	/* Get lock for reception lists */
++	rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++	sock->err_mask = err_mask;
++	rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++	break;
++
++    case CAN_RAW_LOOPBACK:
++
++	if (so->optlen != sizeof(int))
++	    return -EINVAL;
++
++	if (rtdm_fd_is_user(fd)) {
++	    if (!rtdm_read_user_ok(fd, so->optval, so->optlen) ||
++		rtdm_copy_from_user(fd, &val, so->optval, so->optlen))
++		return -EFAULT;
++	} else
++	    memcpy(&val, so->optval, so->optlen);
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++	sock->loopback = val;
++#else
++	if (val)
++	    return -EOPNOTSUPP;
++#endif
++	break;
++
++    default:
++	ret = -ENOPROTOOPT;
++    }
++
++    return ret;
++}
++
++
++int rtcan_raw_ioctl(struct rtdm_fd *fd,
++		    unsigned int request, void *arg)
++{
++    int ret = 0;
++
++    switch (request) {
++    case _RTIOC_BIND: {
++	struct _rtdm_setsockaddr_args *setaddr, setaddr_buf;
++	struct sockaddr_can *sockaddr, sockaddr_buf;
++
++	if (rtdm_fd_is_user(fd)) {
++	    /* Copy argument structure from userspace */
++	    if (!rtdm_read_user_ok(fd, arg,
++				   sizeof(struct _rtdm_setsockaddr_args)) ||
++		rtdm_copy_from_user(fd, &setaddr_buf, arg,
++				    sizeof(struct _rtdm_setsockaddr_args)))
++		return -EFAULT;
++
++	    setaddr = &setaddr_buf;
++
++	    /* Check size */
++	    if (setaddr->addrlen != sizeof(struct sockaddr_can))
++		return -EINVAL;
++
++	    /* Copy argument structure from userspace */
++	    if (!rtdm_read_user_ok(fd, arg,
++				   sizeof(struct sockaddr_can)) ||
++		rtdm_copy_from_user(fd, &sockaddr_buf, setaddr->addr,
++				    sizeof(struct sockaddr_can)))
++		return -EFAULT;
++	    sockaddr = &sockaddr_buf;
++	} else {
++	    setaddr = (struct _rtdm_setsockaddr_args *)arg;
++	    sockaddr = (struct sockaddr_can *)setaddr->addr;
++	}
++
++	/* Now, all required data are in kernel space */
++	ret = rtcan_raw_bind(fd, sockaddr);
++
++	break;
++    }
++
++    case _RTIOC_SETSOCKOPT: {
++	struct _rtdm_setsockopt_args *setopt;
++	struct _rtdm_setsockopt_args setopt_buf;
++
++	if (rtdm_fd_is_user(fd)) {
++	    if (!rtdm_read_user_ok(fd, arg,
++				   sizeof(struct _rtdm_setsockopt_args)) ||
++		rtdm_copy_from_user(fd, &setopt_buf, arg,
++				    sizeof(struct _rtdm_setsockopt_args)))
++		return -EFAULT;
++
++	    setopt = &setopt_buf;
++	} else
++	    setopt = (struct _rtdm_setsockopt_args *)arg;
++
++	return rtcan_raw_setsockopt(fd, setopt);
++    }
++
++    case RTCAN_RTIOC_TAKE_TIMESTAMP: {
++	struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++	long timestamp_switch = (long)arg;
++
++	if (timestamp_switch == RTCAN_TAKE_TIMESTAMPS)
++	    set_bit(RTCAN_GET_TIMESTAMP, &sock->flags);
++	else
++	    clear_bit(RTCAN_GET_TIMESTAMP, &sock->flags);
++	break;
++    }
++
++    case RTCAN_RTIOC_RCV_TIMEOUT:
++    case RTCAN_RTIOC_SND_TIMEOUT: {
++	/* Do some work these requests have in common. */
++	struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++
++	nanosecs_rel_t *timeout = (nanosecs_rel_t *)arg;
++	nanosecs_rel_t timeo_buf;
++
++	if (rtdm_fd_is_user(fd)) {
++	    /* Copy 64 bit timeout value from userspace */
++	    if (!rtdm_read_user_ok(fd, arg,
++				   sizeof(nanosecs_rel_t)) ||
++		rtdm_copy_from_user(fd, &timeo_buf,
++				    arg, sizeof(nanosecs_rel_t)))
++		return -EFAULT;
++
++	    timeout = &timeo_buf;
++	}
++
++	/* Now the differences begin between the requests. */
++	if (request == RTCAN_RTIOC_RCV_TIMEOUT)
++	    sock->rx_timeout = *timeout;
++	else
++	    sock->tx_timeout = *timeout;
++
++	break;
++    }
++
++    default:
++	ret = rtcan_raw_ioctl_dev(fd, request, arg);
++	break;
++    }
++
++    return ret;
++}
++
++
++#define MEMCPY_FROM_RING_BUF(to, len)					\
++do {									\
++	if (unlikely((recv_buf_index + len) > RTCAN_RXBUF_SIZE)) { 	\
++		/* Wrap around end of buffer */				\
++		first_part_size = RTCAN_RXBUF_SIZE - recv_buf_index; 	\
++		memcpy(to, &recv_buf[recv_buf_index], first_part_size);	\
++		memcpy((void *)to + first_part_size, recv_buf,		\
++		       len - first_part_size);				\
++	} else								\
++		memcpy(to, &recv_buf[recv_buf_index], len);		\
++	recv_buf_index = (recv_buf_index + len) & (RTCAN_RXBUF_SIZE - 1); \
++} while (0)
++
++ssize_t rtcan_raw_recvmsg(struct rtdm_fd *fd,
++			  struct user_msghdr *msg, int flags)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    struct sockaddr_can scan;
++    nanosecs_rel_t timeout;
++    struct iovec *iov = (struct iovec *)msg->msg_iov;
++    struct iovec iov_buf;
++    can_frame_t frame;
++    nanosecs_abs_t timestamp = 0;
++    unsigned char ifindex;
++    unsigned char can_dlc;
++    unsigned char *recv_buf;
++    int recv_buf_index;
++    size_t first_part_size;
++    size_t payload_size;
++    rtdm_lockctx_t lock_ctx;
++    int ret;
++
++    /* Clear frame memory location */
++    memset(&frame, 0, sizeof(can_frame_t));
++
++    /* Check flags */
++    if (flags & ~(MSG_DONTWAIT | MSG_PEEK))
++	return -EINVAL;
++
++
++    /* Check if msghdr entries are sane */
++
++    if (msg->msg_name != NULL) {
++	if (msg->msg_namelen < sizeof(struct sockaddr_can))
++	    return -EINVAL;
++
++	if (rtdm_fd_is_user(fd)) {
++	    if (!rtdm_rw_user_ok(fd, msg->msg_name, msg->msg_namelen))
++		return -EFAULT;
++	}
++
++    } else {
++	if (msg->msg_namelen != 0)
++	    return -EINVAL;
++    }
++
++    /* Check msg_iovlen, only one buffer allowed */
++    if (msg->msg_iovlen != 1)
++	return -EMSGSIZE;
++
++    if (rtdm_fd_is_user(fd)) {
++	/* Copy IO vector from userspace */
++	if (!rtdm_rw_user_ok(fd, msg->msg_iov,
++			     sizeof(struct iovec)) ||
++	    rtdm_copy_from_user(fd, &iov_buf, msg->msg_iov,
++				sizeof(struct iovec)))
++	    return -EFAULT;
++
++	iov = &iov_buf;
++    }
++
++    /* Check size of buffer */
++    if (iov->iov_len < sizeof(can_frame_t))
++	return -EMSGSIZE;
++
++    /* Check buffer if in user space */
++    if (rtdm_fd_is_user(fd)) {
++	if (!rtdm_rw_user_ok(fd, iov->iov_base, iov->iov_len))
++	    return -EFAULT;
++    }
++
++    if (msg->msg_control != NULL) {
++	if (msg->msg_controllen < sizeof(nanosecs_abs_t))
++	    return -EINVAL;
++
++	if (rtdm_fd_is_user(fd)) {
++	    if (!rtdm_rw_user_ok(fd, msg->msg_control,
++				 msg->msg_controllen))
++		return -EFAULT;
++	}
++
++    } else {
++	if (msg->msg_controllen != 0)
++	    return -EINVAL;
++    }
++
++    rtcan_raw_enable_bus_err(sock);
++
++    /* Set RX timeout */
++    timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sock->rx_timeout;
++
++    /* Fetch message (ok, try it ...) */
++    ret = rtdm_sem_timeddown(&sock->recv_sem, timeout, NULL);
++
++    /* Error code returned? */
++    if (unlikely(ret)) {
++	/* Which error code? */
++
++	if (ret == -EIDRM)
++	    /* Socket was closed */
++	    return -EBADF;
++
++	else if (ret == -EWOULDBLOCK)
++	    /* We would block but don't want to */
++	    return -EAGAIN;
++
++	else
++	    /* Return all other error codes unmodified. */
++	    return ret;
++    }
++
++
++    /* OK, we've got mail. */
++
++    rtdm_lock_get_irqsave(&rtcan_socket_lock, lock_ctx);
++
++
++    /* Construct a struct can_frame with data from socket's ring buffer */
++    recv_buf_index = sock->recv_head;
++    recv_buf = sock->recv_buf;
++
++
++    /* Begin with CAN ID */
++    MEMCPY_FROM_RING_BUF(&frame.can_id, sizeof(uint32_t));
++
++
++    /* Fetch interface index */
++    ifindex = recv_buf[recv_buf_index];
++    recv_buf_index = (recv_buf_index + 1) & (RTCAN_RXBUF_SIZE - 1);
++
++
++    /* Fetch DLC (with indicator if a timestamp exists) */
++    can_dlc = recv_buf[recv_buf_index];
++    recv_buf_index = (recv_buf_index + 1) & (RTCAN_RXBUF_SIZE - 1);
++
++    frame.can_dlc = can_dlc & RTCAN_HAS_NO_TIMESTAMP;
++    payload_size = (frame.can_dlc > 8) ? 8 : frame.can_dlc;
++
++
++    /* If frame is an RTR or one with no payload it's not necessary
++     * to copy the data bytes. */
++    if (!(frame.can_id & CAN_RTR_FLAG) && payload_size)
++	/* Copy data bytes */
++	MEMCPY_FROM_RING_BUF(frame.data, payload_size);
++
++    /* Is a timestamp available and is the caller actually interested? */
++    if (msg->msg_controllen && (can_dlc & RTCAN_HAS_TIMESTAMP))
++	/* Copy timestamp */
++	MEMCPY_FROM_RING_BUF(&timestamp, RTCAN_TIMESTAMP_SIZE);
++
++    /* Message completely read from the socket's ring buffer. Now check if
++     * caller is just peeking. */
++    if (flags & MSG_PEEK)
++	/* Next one, please! */
++	rtdm_sem_up(&sock->recv_sem);
++    else
++	/* Adjust begin of first message in the ring buffer. */
++	sock->recv_head = recv_buf_index;
++
++
++    /* Release lock */
++    rtdm_lock_put_irqrestore(&rtcan_socket_lock, lock_ctx);
++
++
++    /* Create CAN socket address to give back */
++    if (msg->msg_namelen) {
++	scan.can_family = AF_CAN;
++	scan.can_ifindex = ifindex;
++    }
++
++
++    /* Last duty: Copy all back to the caller's buffers. */
++
++    if (rtdm_fd_is_user(fd)) {
++	/* Copy to user space */
++
++	/* Copy socket address */
++	if (msg->msg_namelen) {
++	    if (rtdm_copy_to_user(fd, msg->msg_name, &scan,
++				  sizeof(struct sockaddr_can)))
++		return -EFAULT;
++
++	    msg->msg_namelen = sizeof(struct sockaddr_can);
++	}
++
++	/* Copy CAN frame */
++	if (rtdm_copy_to_user(fd, iov->iov_base, &frame,
++			      sizeof(can_frame_t)))
++	    return -EFAULT;
++	/* Adjust iovec in the common way */
++	iov->iov_base += sizeof(can_frame_t);
++	iov->iov_len -= sizeof(can_frame_t);
++	/* ... and copy it, too. */
++	if (rtdm_copy_to_user(fd, msg->msg_iov, iov,
++			      sizeof(struct iovec)))
++	    return -EFAULT;
++
++	/* Copy timestamp if existent and wanted */
++	if (msg->msg_controllen) {
++	    if (can_dlc & RTCAN_HAS_TIMESTAMP) {
++		if (rtdm_copy_to_user(fd, msg->msg_control,
++				      &timestamp, RTCAN_TIMESTAMP_SIZE))
++		    return -EFAULT;
++
++		msg->msg_controllen = RTCAN_TIMESTAMP_SIZE;
++	    } else
++		msg->msg_controllen = 0;
++	}
++
++    } else {
++	/* Kernel space */
++
++	/* Copy socket address */
++	if (msg->msg_namelen) {
++	    memcpy(msg->msg_name, &scan, sizeof(struct sockaddr_can));
++	    msg->msg_namelen = sizeof(struct sockaddr_can);
++	}
++
++	/* Copy CAN frame */
++	memcpy(iov->iov_base, &frame, sizeof(can_frame_t));
++	/* Adjust iovec in the common way */
++	iov->iov_base += sizeof(can_frame_t);
++	iov->iov_len -= sizeof(can_frame_t);
++
++	/* Copy timestamp if existent and wanted */
++	if (msg->msg_controllen) {
++	    if (can_dlc & RTCAN_HAS_TIMESTAMP) {
++		memcpy(msg->msg_control, &timestamp, RTCAN_TIMESTAMP_SIZE);
++		msg->msg_controllen = RTCAN_TIMESTAMP_SIZE;
++	    } else
++		msg->msg_controllen = 0;
++	}
++    }
++
++
++    return sizeof(can_frame_t);
++}
++
++
++ssize_t rtcan_raw_sendmsg(struct rtdm_fd *fd,
++			  const struct user_msghdr *msg, int flags)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    struct sockaddr_can *scan = (struct sockaddr_can *)msg->msg_name;
++    struct sockaddr_can scan_buf;
++    struct iovec *iov = (struct iovec *)msg->msg_iov;
++    struct iovec iov_buf;
++    can_frame_t *frame;
++    can_frame_t frame_buf;
++    rtdm_lockctx_t lock_ctx;
++    nanosecs_rel_t timeout = 0;
++    struct tx_wait_queue tx_wait;
++    struct rtcan_device *dev;
++    int ifindex = 0;
++    int ret  = 0;
++    spl_t s;
++
++
++    if (flags & MSG_OOB)   /* Mirror BSD error message compatibility */
++	return -EOPNOTSUPP;
++
++    /* Only MSG_DONTWAIT is a valid flag. */
++    if (flags & ~MSG_DONTWAIT)
++	return -EINVAL;
++
++    /* Check msg_iovlen, only one buffer allowed */
++    if (msg->msg_iovlen != 1)
++	return -EMSGSIZE;
++
++    if (scan == NULL) {
++	/* No socket address. Will use bound interface for sending */
++
++	if (msg->msg_namelen != 0)
++	    return -EINVAL;
++
++
++	/* We only want a consistent value here, a spin lock would be
++	 * overkill. Nevertheless, the binding could change till we have
++	 * the chance to send. Blame the user, though. */
++	ifindex = atomic_read(&sock->ifindex);
++
++	if (!ifindex)
++	    /* Socket isn't bound or bound to all interfaces. Go out. */
++	    return -ENXIO;
++    } else {
++	/* Socket address given */
++	if (msg->msg_namelen < sizeof(struct sockaddr_can))
++	    return -EINVAL;
++
++	if (rtdm_fd_is_user(fd)) {
++	    /* Copy socket address from userspace */
++	    if (!rtdm_read_user_ok(fd, msg->msg_name,
++				   sizeof(struct sockaddr_can)) ||
++		rtdm_copy_from_user(fd, &scan_buf, msg->msg_name,
++				    sizeof(struct sockaddr_can)))
++		return -EFAULT;
++
++	    scan = &scan_buf;
++	}
++
++	/* Check address family */
++	if (scan->can_family != AF_CAN)
++	    return -EINVAL;
++
++	ifindex = scan->can_ifindex;
++    }
++
++    if (rtdm_fd_is_user(fd)) {
++	/* Copy IO vector from userspace */
++	if (!rtdm_rw_user_ok(fd, msg->msg_iov,
++			     sizeof(struct iovec)) ||
++	    rtdm_copy_from_user(fd, &iov_buf, msg->msg_iov,
++				sizeof(struct iovec)))
++	    return -EFAULT;
++
++	iov = &iov_buf;
++    }
++
++    /* Check size of buffer */
++    if (iov->iov_len != sizeof(can_frame_t))
++	return -EMSGSIZE;
++
++    frame = (can_frame_t *)iov->iov_base;
++
++    if (rtdm_fd_is_user(fd)) {
++	/* Copy CAN frame from userspace */
++	if (!rtdm_read_user_ok(fd, iov->iov_base,
++			       sizeof(can_frame_t)) ||
++	    rtdm_copy_from_user(fd, &frame_buf, iov->iov_base,
++				sizeof(can_frame_t)))
++	    return -EFAULT;
++
++	frame = &frame_buf;
++    }
++
++    /* Adjust iovec in the common way */
++    iov->iov_base += sizeof(can_frame_t);
++    iov->iov_len -= sizeof(can_frame_t);
++    /* ... and copy it back to userspace if necessary */
++    if (rtdm_fd_is_user(fd)) {
++	if (rtdm_copy_to_user(fd, msg->msg_iov, iov,
++			      sizeof(struct iovec)))
++	    return -EFAULT;
++    }
++
++    /* At last, we've got the frame ... */
++
++    /* Check if DLC between 0 and 15 */
++    if (frame->can_dlc > 15)
++	return -EINVAL;
++
++    /* Check if it is a standard frame and the ID between 0 and 2031 */
++    if (!(frame->can_id & CAN_EFF_FLAG)) {
++	u32 id = frame->can_id & CAN_EFF_MASK;
++	if (id > (CAN_SFF_MASK - 16))
++	    return -EINVAL;
++    }
++
++    if ((dev = rtcan_dev_get_by_index(ifindex)) == NULL)
++	return -ENXIO;
++
++    timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sock->tx_timeout;
++
++    tx_wait.rt_task = rtdm_task_current();
++
++    /* Register the task at the socket's TX wait queue and decrement
++     * the TX semaphore. This must be atomic. Finally, the task must
++     * be deregistered again (also atomic). */
++    cobalt_atomic_enter(s);
++
++    list_add(&tx_wait.tx_wait_list, &sock->tx_wait_head);
++
++    /* Try to pass the guard in order to access the controller */
++    ret = rtdm_sem_timeddown(&dev->tx_sem, timeout, NULL);
++
++    /* Only dequeue task again if socket isn't being closed i.e. if
++     * this task was not unblocked within the close() function. */
++    if (likely(!list_empty(&tx_wait.tx_wait_list)))
++	/* Dequeue this task from the TX wait queue */
++	list_del_init(&tx_wait.tx_wait_list);
++    else
++	/* The socket was closed. */
++	ret = -EBADF;
++
++    cobalt_atomic_leave(s);
++
++    /* Error code returned? */
++    if (ret != 0) {
++	/* Which error code? */
++	switch (ret) {
++	case -EIDRM:
++	    /* Controller is stopped or bus-off */
++	    ret = -ENETDOWN;
++	    goto send_out1;
++
++	case -EWOULDBLOCK:
++	    /* We would block but don't want to */
++	    ret = -EAGAIN;
++	    goto send_out1;
++
++	default:
++	    /* Return all other error codes unmodified. */
++	    goto send_out1;
++	}
++    }
++
++    /* We got access */
++
++
++    /* Push message onto stack for loopback when TX done */
++    if (rtcan_loopback_enabled(sock))
++	rtcan_tx_push(dev, sock, frame);
++
++    rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx);
++
++    /* Controller should be operating */
++    if (!CAN_STATE_OPERATING(dev->state)) {
++	if (dev->state == CAN_STATE_SLEEPING) {
++	    ret = -ECOMM;
++	    rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx);
++	    rtdm_sem_up(&dev->tx_sem);
++	    goto send_out1;
++	}
++	ret = -ENETDOWN;
++	goto send_out2;
++    }
++
++    dev->tx_count++;
++    ret = dev->hard_start_xmit(dev, frame);
++
++    /* Return number of bytes sent upon successful completion */
++    if (ret == 0)
++	ret = sizeof(can_frame_t);
++
++ send_out2:
++    rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx);
++ send_out1:
++    rtcan_dev_dereference(dev);
++    return ret;
++}
++
++
++static struct rtdm_driver rtcan_driver = {
++	.profile_info		= RTDM_PROFILE_INFO(rtcan,
++						    RTDM_CLASS_CAN,
++						    RTDM_SUBCLASS_GENERIC,
++						    RTCAN_PROFILE_VER),
++	.device_flags		= RTDM_PROTOCOL_DEVICE,
++	.device_count		= 1,
++	.context_size		= sizeof(struct rtcan_socket),
++	.protocol_family	= PF_CAN,
++	.socket_type		= SOCK_RAW,
++	.ops = {
++		.socket		= rtcan_raw_socket,
++		.close		= rtcan_raw_close,
++		.ioctl_nrt	= rtcan_raw_ioctl,
++		.recvmsg_rt	= rtcan_raw_recvmsg,
++		.sendmsg_rt	= rtcan_raw_sendmsg,
++	},
++};
++
++static struct rtdm_device rtcan_device = {
++	.driver = &rtcan_driver,
++	.label = "rtcan",
++};
++
++int __init rtcan_raw_proto_register(void)
++{
++    return rtdm_dev_register(&rtcan_device);
++}
++
++void __exit rtcan_raw_proto_unregister(void)
++{
++    rtdm_dev_unregister(&rtcan_device);
++}
++
++
++EXPORT_SYMBOL_GPL(rtcan_rcv);
+--- linux/drivers/xenomai/can/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/Makefile	2021-04-07 16:01:26.460635262 +0800
+@@ -0,0 +1,10 @@
++
++ccflags-y += -Idrivers/xenomai/can
++
++obj-$(CONFIG_XENO_DRIVERS_CAN) += xeno_can.o mscan/ sja1000/
++obj-$(CONFIG_XENO_DRIVERS_CAN_FLEXCAN) += xeno_can_flexcan.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_VIRT) += xeno_can_virt.o
++
++xeno_can-y := rtcan_dev.o rtcan_socket.o rtcan_module.o rtcan_raw.o rtcan_raw_dev.o rtcan_raw_filter.o
++xeno_can_virt-y := rtcan_virt.o
++xeno_can_flexcan-y := rtcan_flexcan.o
+--- linux/drivers/xenomai/can/rtcan_internal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_internal.h	2021-04-07 16:01:26.455635269 +0800
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from RTnet project file stack/include/rtnet_internal.h:
++ *
++ * Copyright (C) 1999       Lineo, Inc
++ *               1999, 2002 David A. Schleef <ds@schleef.org>
++ *               2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#ifndef __RTCAN_INTERNAL_H_
++#define __RTCAN_INTERNAL_H_
++
++#include <linux/module.h>
++#include <rtdm/driver.h>
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG
++#define RTCAN_ASSERT(expr, func) \
++    if (!(expr)) { \
++	rtdm_printk("Assertion failed! %s:%s:%d %s\n", \
++	__FILE__, __FUNCTION__, __LINE__, (#expr)); \
++	func \
++    }
++#else
++#define RTCAN_ASSERT(expr, func)
++#endif /* CONFIG_RTCAN_CHECKED */
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG
++# define RTCAN_DBG(fmt,args...) do { printk(fmt ,##args); } while (0)
++# define RTCAN_RTDM_DBG(fmt,args...) do { rtdm_printk(fmt ,##args); } while (0)
++#else
++# define RTCAN_DBG(fmt,args...) do {} while (0)
++# define RTCAN_RTDM_DBG(fmt,args...) do {} while (0)
++#endif
++
++#define rtcan_priv(dev)			(dev)->priv
++#define rtcandev_dbg(dev, fmt, args...)				\
++	printk(KERN_DEBUG "%s: " fmt, (dev)->name, ##args)
++#define rtcandev_info(dev, fmt, args...)			\
++	printk(KERN_INFO "%s: " fmt, (dev)->name, ##args)
++#define rtcandev_warn(dev, fmt, args...)			\
++	printk(KERN_WARNING "%s: " fmt, (dev)->name, ##args)
++#define rtcandev_err(dev, fmt, args...)				\
++	printk(KERN_ERR "%s: " fmt, (dev)->name, ##args)
++
++#endif /* __RTCAN_INTERNAL_H_ */
+--- linux/drivers/xenomai/can/rtcan_list.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_list.h	2021-04-07 16:01:26.449635278 +0800
+@@ -0,0 +1,68 @@
++/*
++ * List management for the RTDM RTCAN device driver
++ *
++ * Copyright (C) 2005,2006 Sebastian Smolorz
++ *                         <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_LIST_H_
++#define __RTCAN_LIST_H_
++
++#include "rtcan_socket.h"
++
++
++/*
++ * List element in a single linked list used for registering reception sockets.
++ * Every single struct can_filter which was bound to a socket gets such a
++ * list entry. There is no member for the CAN interface because there is one
++ * reception list for every CAN controller. This is because when a CAN message
++ * is received it is clear from which interface and therefore minimizes
++ * searching time.
++ */
++struct rtcan_recv {
++    can_filter_t            can_filter;     /* filter used for deciding if
++					     *   a socket wants to get a CAN
++					     *   message */
++    unsigned int            match_count;    /* count accepted messages */
++    struct rtcan_socket     *sock;          /* pointer to registered socket
++					     */
++    struct rtcan_recv       *next;          /* pointer to next list element
++					     */
++};
++
++
++/*
++ *  Element in a TX wait queue.
++ *
++ *  Every socket holds a TX wait queue where all RT tasks are queued when they
++ *  are blocked while waiting to be able to transmit a message via this socket.
++ *
++ *  Every sender holds its own element.
++ */
++struct tx_wait_queue {
++    struct list_head        tx_wait_list;   /* List pointers */
++    rtdm_task_t             *rt_task;       /* Pointer to task handle */
++};
++
++
++/* Spinlock for all reception lists and also for some members in
++ * struct rtcan_socket */
++extern rtdm_lock_t rtcan_recv_list_lock;
++
++
++#endif  /* __RTCAN_LIST_H_ */
+--- linux/drivers/xenomai/can/rtcan_module.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_module.c	2021-04-07 16:01:26.445635284 +0800
+@@ -0,0 +1,450 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from RTnet project file stack/rtcan_module.c:
++ *
++ * Copyright (C) 2002      Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003-2006 Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++
++#include <rtdm/driver.h>
++#include <rtdm/can.h>
++#include <rtcan_version.h>
++#include <rtcan_internal.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++
++MODULE_LICENSE("GPL");
++
++
++const char rtcan_rtdm_provider_name[] =
++    "(C) 2006 RT-Socket-CAN Development Team";
++
++
++#ifdef CONFIG_PROC_FS
++
++struct proc_dir_entry *rtcan_proc_root;
++
++static void rtcan_dev_get_ctrlmode_name(can_ctrlmode_t ctrlmode,
++					char* name, int max_len)
++{
++    snprintf(name, max_len, "%s%s",
++	     ctrlmode & CAN_CTRLMODE_LISTENONLY ? "listen-only " : "",
++	     ctrlmode & CAN_CTRLMODE_LOOPBACK ? "loopback " : "");
++}
++
++static char *rtcan_state_names[] = {
++    "active", "warning", "passive" , "bus-off",
++    "scanning", "stopped", "sleeping"
++};
++
++static void rtcan_dev_get_state_name(can_state_t state,
++				     char* name, int max_len)
++{
++    if (state >= CAN_STATE_ACTIVE &&
++	state <= CAN_STATE_SLEEPING)
++	strncpy(name, rtcan_state_names[state], max_len);
++    else
++	strncpy(name, "unknown", max_len);
++}
++
++static void rtcan_dev_get_baudrate_name(can_baudrate_t baudrate,
++					char* name, int max_len)
++{
++    switch (baudrate) {
++    case CAN_BAUDRATE_UNCONFIGURED:
++	strncpy(name, "undefined", max_len);
++	break;
++    case CAN_BAUDRATE_UNKNOWN:
++	strncpy(name, "unknown", max_len);
++	break;
++    default:
++	ksformat(name, max_len, "%d", baudrate);
++	break;
++    }
++}
++
++static void rtcan_dev_get_bittime_name(struct can_bittime *bit_time,
++				       char* name, int max_len)
++{
++    switch (bit_time->type) {
++    case CAN_BITTIME_STD:
++	ksformat(name, max_len,
++		 "brp=%d prop_seg=%d phase_seg1=%d "
++		 "phase_seg2=%d sjw=%d sam=%d",
++		 bit_time->std.brp,
++		 bit_time->std.prop_seg,
++		 bit_time->std.phase_seg1,
++		 bit_time->std.phase_seg2,
++		 bit_time->std.sjw,
++		 bit_time->std.sam);
++	break;
++    case CAN_BITTIME_BTR:
++	ksformat(name, max_len, "btr0=0x%02x btr1=0x%02x",
++		 bit_time->btr.btr0, bit_time->btr.btr1);
++	break;
++    default:
++	strncpy(name, "unknown", max_len);
++	break;
++    }
++}
++
++static void rtcan_get_timeout_name(nanosecs_rel_t timeout,
++				   char* name, int max_len)
++{
++    if (timeout == RTDM_TIMEOUT_INFINITE)
++	strncpy(name, "infinite", max_len);
++    else
++	ksformat(name, max_len, "%lld", (long long)timeout);
++}
++
++static int rtcan_read_proc_devices(struct seq_file *p, void *data)
++{
++    int i;
++    struct rtcan_device *dev;
++    char state_name[20], baudrate_name[20];
++
++    if (down_interruptible(&rtcan_devices_nrt_lock))
++	return -ERESTARTSYS;
++
++    /* Name___________ _Baudrate State___ _TX_Counts _TX_Counts ____Errors
++     * rtcan0             125000 stopped  1234567890 1234567890 1234567890
++     * rtcan1          undefined warning  1234567890 1234567890 1234567890
++     * rtcan2          undefined scanning 1234567890 1234567890 1234567890
++     */
++    seq_printf(p, "Name___________ _Baudrate State___ TX_Counter RX_Counter "
++		  "____Errors\n");
++
++    for (i = 1; i <= RTCAN_MAX_DEVICES; i++) {
++	if ((dev = rtcan_dev_get_by_index(i)) != NULL) {
++	    rtcan_dev_get_state_name(dev->state,
++				     state_name, sizeof(state_name));
++	    rtcan_dev_get_baudrate_name(dev->baudrate,
++					baudrate_name, sizeof(baudrate_name));
++	    seq_printf(p, "%-15s %9s %-8s %10d %10d %10d\n",
++		       dev->name, baudrate_name, state_name, dev->tx_count,
++		       dev->rx_count, dev->err_count);
++	    rtcan_dev_dereference(dev);
++	}
++    }
++
++    up(&rtcan_devices_nrt_lock);
++
++    return 0;
++}
++
++static int rtcan_proc_devices_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_read_proc_devices, NULL);
++}
++
++static const struct file_operations rtcan_proc_devices_ops = {
++	.open		= rtcan_proc_devices_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++static int rtcan_read_proc_sockets(struct seq_file *p, void *data)
++{
++    struct rtcan_socket *sock;
++    struct rtdm_fd *fd;
++    struct rtcan_device *dev;
++    char name[IFNAMSIZ] = "not-bound";
++    char rx_timeout[20], tx_timeout[20];
++    rtdm_lockctx_t lock_ctx;
++    int ifindex;
++
++    if (down_interruptible(&rtcan_devices_nrt_lock))
++	return -ERESTARTSYS;
++
++    /* Name___________ Filter ErrMask RX_Timeout TX_Timeout RX_BufFull TX_Lo
++     * rtcan0               1 0x00010 1234567890 1234567890 1234567890 12345
++     */
++    seq_printf(p, "Name___________ Filter ErrMask RX_Timeout_ns "
++		  "TX_Timeout_ns RX_BufFull TX_Lo\n");
++
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++
++    list_for_each_entry(sock, &rtcan_socket_list, socket_list) {
++	fd = rtcan_socket_to_fd(sock);
++	if (rtcan_sock_is_bound(sock)) {
++	    ifindex = atomic_read(&sock->ifindex);
++	    if (ifindex) {
++		dev = rtcan_dev_get_by_index(ifindex);
++		if (dev) {
++		    strncpy(name, dev->name, IFNAMSIZ);
++		    rtcan_dev_dereference(dev);
++		}
++	    } else
++		ksformat(name, sizeof(name), "%d", ifindex);
++	}
++	rtcan_get_timeout_name(sock->tx_timeout,
++			       tx_timeout, sizeof(tx_timeout));
++	rtcan_get_timeout_name(sock->rx_timeout,
++			       rx_timeout, sizeof(rx_timeout));
++	seq_printf(p, "%-15s %6d 0x%05x %13s %13s %10d %5d\n",
++		   name, sock->flistlen, sock->err_mask,
++		   rx_timeout, tx_timeout, sock->rx_buf_full,
++		   rtcan_loopback_enabled(sock));
++    }
++
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++    up(&rtcan_devices_nrt_lock);
++
++    return 0;
++}
++
++static int rtcan_proc_sockets_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_read_proc_sockets, NULL);
++}
++
++static const struct file_operations rtcan_proc_sockets_ops = {
++	.open		= rtcan_proc_sockets_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++
++static int rtcan_read_proc_info(struct seq_file *p, void *data)
++{
++    struct rtcan_device *dev = p->private;
++    char state_name[20], baudrate_name[20];
++    char ctrlmode_name[80], bittime_name[80];
++
++    if (down_interruptible(&rtcan_devices_nrt_lock))
++	return -ERESTARTSYS;
++
++    rtcan_dev_get_state_name(dev->state,
++			     state_name, sizeof(state_name));
++    rtcan_dev_get_ctrlmode_name(dev->ctrl_mode,
++				ctrlmode_name, sizeof(ctrlmode_name));
++    rtcan_dev_get_baudrate_name(dev->baudrate,
++				baudrate_name, sizeof(baudrate_name));
++    rtcan_dev_get_bittime_name(&dev->bit_time,
++			       bittime_name, sizeof(bittime_name));
++
++    seq_printf(p, "Device     %s\n", dev->name);
++    seq_printf(p, "Controller %s\n", dev->ctrl_name);
++    seq_printf(p, "Board      %s\n", dev->board_name);
++    seq_printf(p, "Clock-Hz   %d\n", dev->can_sys_clock);
++    seq_printf(p, "Baudrate   %s\n", baudrate_name);
++    seq_printf(p, "Bit-time   %s\n", bittime_name);
++    seq_printf(p, "Ctrl-Mode  %s\n", ctrlmode_name);
++    seq_printf(p, "State      %s\n", state_name);
++    seq_printf(p, "TX-Counter %d\n", dev->tx_count);
++    seq_printf(p, "RX-Counter %d\n", dev->rx_count);
++    seq_printf(p, "Errors     %d\n", dev->err_count);
++#ifdef RTCAN_USE_REFCOUNT
++    seq_printf(p, "Refcount   %d\n", atomic_read(&dev->refcount));
++#endif
++
++    up(&rtcan_devices_nrt_lock);
++
++    return 0;
++}
++
++static int rtcan_proc_info_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_read_proc_info, PDE_DATA(inode));
++}
++
++static const struct file_operations rtcan_proc_info_ops = {
++	.open		= rtcan_proc_info_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++
++
++static int rtcan_read_proc_filter(struct seq_file *p, void *data)
++{
++    struct rtcan_device *dev = p->private;
++    struct rtcan_recv *recv_listener = dev->recv_list;
++    struct rtdm_fd *fd;
++    rtdm_lockctx_t lock_ctx;
++
++    /*  __CAN_ID__ _CAN_Mask_ Inv MatchCount
++     *  0x12345678 0x12345678  no 1234567890
++     */
++
++    seq_printf(p, "__CAN_ID__ _CAN_Mask_ Inv MatchCount\n");
++
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++
++    /* Loop over the reception list of the device */
++    while (recv_listener != NULL) {
++	fd = rtcan_socket_to_fd(recv_listener->sock);
++
++	seq_printf(p, "0x%08x 0x%08x %s %10d\n",
++		   recv_listener->can_filter.can_id,
++		   recv_listener->can_filter.can_mask & ~CAN_INV_FILTER,
++		   (recv_listener->can_filter.can_mask & CAN_INV_FILTER) ?
++			"yes" : " no",
++		   recv_listener->match_count);
++
++	recv_listener = recv_listener->next;
++    }
++
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++    return 0;
++}
++
++static int rtcan_proc_filter_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_read_proc_filter, PDE_DATA(inode));
++}
++
++static const struct file_operations rtcan_proc_filter_ops = {
++	.open		= rtcan_proc_filter_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++
++
++static int rtcan_read_proc_version(struct seq_file *p, void *data)
++{
++	seq_printf(p, "RT-Socket-CAN %d.%d.%d\n",
++		   RTCAN_MAJOR_VER, RTCAN_MINOR_VER, RTCAN_BUGFIX_VER);
++
++	return 0;
++}
++
++static int rtcan_proc_version_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_read_proc_version, NULL);
++}
++
++static const struct file_operations rtcan_proc_version_ops = {
++	.open		= rtcan_proc_version_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++
++void rtcan_dev_remove_proc(struct rtcan_device* dev)
++{
++    if (!dev->proc_root)
++	return;
++
++    remove_proc_entry("info", dev->proc_root);
++    remove_proc_entry("filters", dev->proc_root);
++    remove_proc_entry(dev->name, rtcan_proc_root);
++
++    dev->proc_root = NULL;
++}
++
++int rtcan_dev_create_proc(struct rtcan_device* dev)
++{
++    if (!rtcan_proc_root)
++	return -EINVAL;
++
++    dev->proc_root = proc_mkdir(dev->name, rtcan_proc_root);
++    if (!dev->proc_root) {
++	printk("%s: unable to create /proc device entries\n", dev->name);
++	return -1;
++    }
++
++    proc_create_data("info", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root,
++		     &rtcan_proc_info_ops, dev);
++    proc_create_data("filters", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root,
++		     &rtcan_proc_filter_ops, dev);
++    return 0;
++
++}
++
++
++static int rtcan_proc_register(void)
++{
++    rtcan_proc_root = proc_mkdir("rtcan", NULL);
++    if (!rtcan_proc_root) {
++	printk("rtcan: unable to initialize /proc entries\n");
++	return -1;
++    }
++
++    proc_create("devices", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root,
++		&rtcan_proc_devices_ops);
++    proc_create("version", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root,
++		&rtcan_proc_version_ops);
++    proc_create("sockets", S_IFREG | S_IRUGO | S_IWUSR, rtcan_proc_root,
++		&rtcan_proc_sockets_ops);
++    return 0;
++}
++
++
++
++static void rtcan_proc_unregister(void)
++{
++    remove_proc_entry("devices", rtcan_proc_root);
++    remove_proc_entry("version", rtcan_proc_root);
++    remove_proc_entry("sockets", rtcan_proc_root);
++    remove_proc_entry("rtcan", 0);
++}
++#endif  /* CONFIG_PROC_FS */
++
++
++
++int __init rtcan_init(void)
++{
++    int err = 0;
++
++    if (!rtdm_available())
++	return -ENOSYS;
++
++    printk("RT-Socket-CAN %d.%d.%d - %s\n",
++	   RTCAN_MAJOR_VER, RTCAN_MINOR_VER, RTCAN_BUGFIX_VER,
++	   rtcan_rtdm_provider_name);
++
++    if ((err = rtcan_raw_proto_register()) != 0)
++	goto out;
++
++#ifdef CONFIG_PROC_FS
++    if ((err = rtcan_proc_register()) != 0)
++	goto out;
++#endif
++
++ out:
++    return err;
++}
++
++
++void __exit rtcan_exit(void)
++{
++    rtcan_raw_proto_unregister();
++#ifdef CONFIG_PROC_FS
++    rtcan_proc_unregister();
++#endif
++
++    printk("rtcan: unloaded\n");
++}
++
++
++module_init(rtcan_init);
++module_exit(rtcan_exit);
+--- linux/drivers/xenomai/can/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/Kconfig	2021-04-07 16:01:26.440635291 +0800
+@@ -0,0 +1,91 @@
++menu "CAN drivers"
++
++config XENO_DRIVERS_CAN
++	tristate "RT-Socket-CAN, CAN raw socket interface"
++	help
++	RT-Socket-CAN is a real-time socket interface for CAN controllers.
++
++config XENO_DRIVERS_CAN_DEBUG
++	depends on XENO_DRIVERS_CAN && PROC_FS
++	bool "Enable debug output"
++	default y
++	help
++
++	This option activates debugging checks and enhanced output for the
++	RT-Socket-CAN driver. It also allows to list the hardware registers
++	of the registered CAN controllers. It is a recommended option for
++	getting started and analysing potential problems. For production
++	purposes, it should be switched off (for the sake of latency).
++
++config XENO_DRIVERS_CAN_LOOPBACK
++	depends on XENO_DRIVERS_CAN
++	bool "Enable TX loopback to local sockets"
++	default n
++	help
++
++	This options adds support for TX loopback to local sockets. Normally,
++	messages sent to the CAN bus are not visible to sockets listening to
++	the same local device. When this option is enabled, TX messages are
++	looped back locally when the transmit has been done by default. This
++	behaviour can be deactivated or reactivated with "setsockopt". Enable
++	this option, if you want to have a "net-alike" behaviour.
++
++config XENO_DRIVERS_CAN_RXBUF_SIZE
++	depends on XENO_DRIVERS_CAN
++	int "Size of receive ring buffers (must be 2^N)"
++	default 1024
++
++config XENO_DRIVERS_CAN_MAX_DEVICES
++	depends on XENO_DRIVERS_CAN
++	int "Maximum number of devices"
++	default 4
++
++config XENO_DRIVERS_CAN_MAX_RECEIVERS
++	depends on XENO_DRIVERS_CAN
++	int "Maximum number of receive filters per device"
++	default 16
++	help
++
++	The driver maintains a receive filter list per device for fast access.
++
++config XENO_DRIVERS_CAN_BUS_ERR
++	depends on XENO_DRIVERS_CAN
++	bool
++	default n
++	help
++
++	To avoid unnecessary bus error interrupt flooding, this option enables
++	bus error interrupts when an application is calling a receive function
++	on a socket listening on bus errors. After one bus error has occured,
++	the interrupt will be disabled to allow the application time for error
++	processing. This option is automatically selected for CAN controllers
++	supporting bus error interrupts like the SJA1000.
++
++config XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++	depends on XENO_DRIVERS_CAN
++	bool "Old bit-time calculation algorithm (deprecated)"
++	default n
++	help
++
++	This option allows to enable the old algorithm to calculate the
++	CAN bit-timing parameters for backward compatibility.
++
++config XENO_DRIVERS_CAN_VIRT
++	depends on XENO_DRIVERS_CAN
++	tristate "Virtual CAN bus driver"
++	help
++
++	This driver provides two CAN ports that are virtually interconnected.
++	More ports can be enabled with the module parameter "devices".
++
++config XENO_DRIVERS_CAN_FLEXCAN
++	depends on XENO_DRIVERS_CAN && OF && !XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++	tristate "Freescale FLEXCAN based chips"
++	help
++
++	Say Y here if you want to support for Freescale FlexCAN.
++
++source "drivers/xenomai/can/mscan/Kconfig"
++source "drivers/xenomai/can/sja1000/Kconfig"
++
++endmenu
+--- linux/drivers/xenomai/can/rtcan_raw_filter.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_raw_filter.c	2021-04-07 16:01:26.435635298 +0800
+@@ -0,0 +1,256 @@
++/*
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                          <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; eitherer version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++#include "rtcan_internal.h"
++#include "rtcan_socket.h"
++#include "rtcan_list.h"
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++
++
++#if 0
++void rtcan_raw_print_filter(struct rtcan_device *dev)
++{
++    int i;
++    struct rtcan_recv *r = dev->receivers;
++
++    rtdm_printk("%s: recv_list=%p empty_list=%p free_entries=%d\n",
++		dev->name, dev->recv_list, dev->empty_list, dev->free_entries);
++    for (i = 0; i < RTCAN_MAX_RECEIVERS; i++, r++) {
++	rtdm_printk("%2d %p sock=%p next=%p id=%x mask=%x\n",
++		    i, r, r->sock, r->next,
++		    r->can_filter.can_id, r->can_filter.can_mask);
++    }
++}
++#else
++#define rtcan_raw_print_filter(dev)
++#endif
++
++
++static inline void rtcan_raw_mount_filter(can_filter_t *recv_filter,
++					  can_filter_t *filter)
++{
++    if (filter->can_id & CAN_INV_FILTER) {
++	recv_filter->can_id = filter->can_id & ~CAN_INV_FILTER;
++	recv_filter->can_mask = filter->can_mask | CAN_INV_FILTER;
++    } else {
++	recv_filter->can_id = filter->can_id;
++	recv_filter->can_mask = filter->can_mask & ~CAN_INV_FILTER;
++    }
++
++    /* Apply mask for fast filter check */
++    recv_filter->can_id &= recv_filter->can_mask;
++}
++
++
++int rtcan_raw_check_filter(struct rtcan_socket *sock, int ifindex,
++			   struct rtcan_filter_list *flist)
++{
++    int old_ifindex = 0, old_flistlen_all = 0;
++    int free_entries, i, begin, end;
++    struct rtcan_device *dev;
++    int flistlen;
++
++    if (rtcan_flist_no_filter(flist))
++	return 0;
++
++    /* Check if filter list has been defined by user */
++    flistlen = (flist) ? flist->flistlen : 1;
++
++    /* Now we check if a reception list would overflow. This takes some
++     * preparation, so let's go ... */
++
++    /* Check current bind status */
++    if (rtcan_sock_has_filter(sock)) {
++	/* Socket is bound */
++	i = atomic_read(&sock->ifindex);
++
++	if (i == 0)
++	    /* Socket was bound to ALL interfaces */
++	    old_flistlen_all = sock->flistlen;
++	else    /* Socket was bound to only one interface */
++	    old_ifindex = i;
++    }
++
++    if (ifindex) {
++	/* We bind the socket to only one interface. */
++	begin = ifindex;
++	end   = ifindex;
++    } else {
++	/* Socket must be bound to all interfaces. */
++	begin = 1;
++	end = RTCAN_MAX_DEVICES;
++    }
++
++    /* Check if there is space for the new binding */
++    for (i = begin; i <= end; i++) {
++	if ((dev = rtcan_dev_get_by_index(i)) == NULL)
++	    continue;
++	free_entries = dev->free_entries + old_flistlen_all;
++	rtcan_dev_dereference(dev);
++	if (i == old_ifindex)
++	    free_entries += sock->flistlen;
++	/* Compare free list space to new filter list length */
++	if (free_entries < flistlen)
++	    return -ENOSPC;
++    }
++
++    return 0;
++}
++
++
++int rtcan_raw_add_filter(struct rtcan_socket *sock, int ifindex)
++{
++    int i, j, begin, end;
++    struct rtcan_recv *first, *last;
++    struct rtcan_device *dev;
++    /* Check if filter list has been defined by user */
++    int flistlen;
++
++    if (rtcan_flist_no_filter(sock->flist)) {
++	return 0;
++    }
++
++    flistlen = (sock->flist) ? sock->flist->flistlen : 0;
++
++    if (ifindex) {
++	/* We bind the socket to only one interface. */
++	begin = ifindex;
++	end   = ifindex;
++    } else {
++	/* Socket must be bound to all interfaces. */
++	begin = 1;
++	end = RTCAN_MAX_DEVICES;
++    }
++
++    for (i = begin; i <= end; i++) {
++	if ((dev = rtcan_dev_get_by_index(i)) == NULL)
++	    continue;
++
++	/* Take first entry of empty list */
++	first = last = dev->empty_list;
++	/* Check if filter list is empty */
++	if (flistlen) {
++	    /* Filter list is not empty */
++	    /* Register first filter */
++	    rtcan_raw_mount_filter(&last->can_filter,
++				   &sock->flist->flist[0]);
++	    last->match_count = 0;
++	    last->sock = sock;
++	    for (j = 1; j < flistlen; j++) {
++		/* Register remaining filters */
++		last = last->next;
++		rtcan_raw_mount_filter(&last->can_filter,
++				       &sock->flist->flist[j]);
++		last->sock = sock;
++		last->match_count = 0;
++	    }
++	    /* Decrease free entries counter by length of filter list */
++	    dev->free_entries -= flistlen;
++
++	} else {
++	    /* Filter list is empty. Socket must be bound to all CAN IDs. */
++	    /* Fill list entry members */
++	    last->can_filter.can_id = last->can_filter.can_mask = 0;
++	    last->sock = sock;
++	    last->match_count = 0;
++	    /* Decrease free entries counter by 1
++	     * (one filter for all CAN frames) */
++	    dev->free_entries--;
++	}
++
++	/* Set new empty list header */
++	dev->empty_list = last->next;
++	/* Add new partial recv list to the head of reception list */
++	last->next = dev->recv_list;
++	/* Adjust rececption list pointer */
++	dev->recv_list = first;
++
++	rtcan_raw_print_filter(dev);
++	rtcan_dev_dereference(dev);
++    }
++
++    return (flistlen) ? flistlen : 1;
++}
++
++
++void rtcan_raw_remove_filter(struct rtcan_socket *sock)
++{
++    int i, j, begin, end;
++    struct rtcan_recv *first, *next, *last;
++    int ifindex = atomic_read(&sock->ifindex);
++    struct rtcan_device *dev;
++
++    if (!rtcan_sock_has_filter(sock)) /* nothing to do */
++	return;
++
++    if (ifindex) {
++	/* Socket was bound to one interface only. */
++	begin = ifindex;
++	end   = ifindex;
++    } else {
++	/* Socket was bound to all interfaces */
++	begin = 1;
++	end = RTCAN_MAX_DEVICES;
++    }
++
++    for (i = begin; i <= end; i++) {
++
++	if ((dev = rtcan_dev_get_by_index(i)) == NULL)
++	    continue;
++
++	/* Search for first list entry pointing to this socket */
++	first = NULL;
++	next = dev->recv_list;
++	while (next->sock != sock) {
++	    first = next;
++	    next = first->next;
++	}
++
++	/* Now go to the end of the old filter list */
++	last = next;
++	for (j = 1; j < sock->flistlen; j++)
++	    last = last->next;
++
++	/* Detach found first list entry from reception list */
++	if (first)
++	    first->next = last->next;
++	else
++	    dev->recv_list = last->next;
++	/* Add partial list to the head of empty list */
++	last->next = dev->empty_list;
++	/* Adjust empty list pointer */
++	dev->empty_list = next;
++
++	/* Increase free entries counter by length of old filter list */
++	dev->free_entries += sock->flistlen;
++
++	rtcan_raw_print_filter(dev);
++	rtcan_dev_dereference(dev);
++    }
++}
+--- linux/drivers/xenomai/can/rtcan_raw_dev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_raw_dev.c	2021-04-07 16:01:26.430635305 +0800
+@@ -0,0 +1,455 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger, <wg@grandegger.com>
++ * Copyright (C) 2005 Marc Kleine-Budde, Pengutronix
++ * Copyright (C) 2006 Andrey Volkov, Varma Electronics
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the version 2 of the GNU General Public License
++ * as published by the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++#include "rtcan_internal.h"
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++
++#define RTCAN_MAX_TSEG1  15
++#define RTCAN_MAX_TSEG2  7
++
++/*
++ * Calculate standard bit-time values for odd bitrates.
++ * Most parts of this code is from Arnaud Westenberg <arnaud@wanadoo.nl>
++ */
++static int rtcan_calc_bit_time(struct rtcan_device *dev,
++			       can_baudrate_t rate,
++			       struct can_bittime_std *bit_time)
++{
++    int best_error = 1000000000;
++    int error;
++    int best_tseg=0, best_brp=0, best_rate=0, brp=0;
++    int tseg=0, tseg1=0, tseg2=0;
++    int clock = dev->can_sys_clock;
++    int sjw = 0;
++    int sampl_pt = 90;
++
++    /* some heuristic specials */
++    if (rate > ((1000000 + 500000) / 2))
++	sampl_pt = 75;
++
++    if (rate < ((12500 + 10000) / 2))
++	sampl_pt = 75;
++
++    if (rate < ((100000 + 125000) / 2))
++	sjw = 1;
++
++    /* tseg even = round down, odd = round up */
++    for (tseg = (0 + 0 + 2) * 2;
++	 tseg <= (RTCAN_MAX_TSEG2 + RTCAN_MAX_TSEG1 + 2) * 2 + 1;
++	 tseg++) {
++	brp = clock / ((1 + tseg / 2) * rate) + tseg % 2;
++	if ((brp == 0) || (brp > 64))
++	    continue;
++
++	error = rate - clock / (brp * (1 + tseg / 2));
++	if (error < 0)
++	    error = -error;
++
++	if (error <= best_error) {
++	    best_error = error;
++	    best_tseg = tseg/2;
++	    best_brp = brp - 1;
++	    best_rate = clock / (brp * (1 + tseg / 2));
++	}
++    }
++
++    if (best_error && (rate / best_error < 10)) {
++	RTCAN_RTDM_DBG("%s: bitrate %d is not possible with %d Hz clock\n",
++		       dev->name, rate, clock);
++	return -EDOM;
++    }
++
++    tseg2 = best_tseg - (sampl_pt * (best_tseg + 1)) / 100;
++
++    if (tseg2 < 0)
++	tseg2 = 0;
++
++    if (tseg2 > RTCAN_MAX_TSEG2)
++	tseg2 = RTCAN_MAX_TSEG2;
++
++    tseg1 = best_tseg - tseg2 - 2;
++
++    if (tseg1 > RTCAN_MAX_TSEG1)  {
++	tseg1 = RTCAN_MAX_TSEG1;
++	tseg2 = best_tseg-tseg1-2;
++    }
++
++    bit_time->brp = best_brp + 1;
++    bit_time->prop_seg = 0;
++    bit_time->phase_seg1 = tseg1 + 1;
++    bit_time->phase_seg2 = tseg2 + 1;
++    bit_time->sjw = sjw + 1;
++    bit_time->sam = 0;
++
++    return 0;
++}
++
++#else /* !CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD */
++
++/* This is the bit-time calculation method from the Linux kernel */
++
++#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */
++
++static int can_update_spt(const struct can_bittiming_const *btc,
++			  unsigned int sampl_pt, unsigned int tseg,
++			  unsigned int *tseg1, unsigned int *tseg2)
++{
++    *tseg2 = tseg + 1 - (sampl_pt * (tseg + 1)) / 1000;
++    *tseg2 = clamp(*tseg2, btc->tseg2_min, btc->tseg2_max);
++    *tseg1 = tseg - *tseg2;
++    if (*tseg1 > btc->tseg1_max) {
++	*tseg1 = btc->tseg1_max;
++	*tseg2 = tseg - *tseg1;
++    }
++
++    return 1000 * (tseg + 1 - *tseg2) / (tseg + 1);
++}
++
++static int rtcan_calc_bit_time(struct rtcan_device *dev,
++			       can_baudrate_t bitrate,
++			       struct can_bittime_std *bt)
++{
++    const struct can_bittiming_const *btc = dev->bittiming_const;
++    long rate;	/* current bitrate */
++    long rate_error;/* difference between current and target value */
++    long best_rate_error = 1000000000;
++    int spt;	/* current sample point in thousandth */
++    int spt_error;	/* difference between current and target value */
++    int best_spt_error = 1000;
++    int sampl_pt;	/* target sample point */
++    int best_tseg = 0, best_brp = 0;	/* current best values for tseg and brp */
++    unsigned int brp, tsegall, tseg, tseg1, tseg2;
++    u64 v64;
++
++    if (!dev->bittiming_const)
++	return -ENOTSUPP;
++
++    /* Use CIA recommended sample points */
++    if (bitrate > 800000)
++	sampl_pt = 750;
++    else if (bitrate > 500000)
++	sampl_pt = 800;
++    else
++	sampl_pt = 875;
++
++    /* tseg even = round down, odd = round up */
++    for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1;
++	 tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) {
++	tsegall = 1 + tseg / 2;
++
++	/* Compute all possible tseg choices (tseg=tseg1+tseg2) */
++	brp = dev->can_sys_clock / (tsegall * bitrate) + tseg % 2;
++
++	/* chose brp step which is possible in system */
++	brp = (brp / btc->brp_inc) * btc->brp_inc;
++	if ((brp < btc->brp_min) || (brp > btc->brp_max))
++	    continue;
++
++	rate = dev->can_sys_clock / (brp * tsegall);
++	rate_error = abs((long)(bitrate - rate));
++
++	/* tseg brp biterror */
++	if (rate_error > best_rate_error)
++	    continue;
++
++	/* reset sample point error if we have a better bitrate */
++	if (rate_error < best_rate_error)
++	    best_spt_error = 1000;
++
++	spt = can_update_spt(btc, sampl_pt, tseg / 2, &tseg1, &tseg2);
++	spt_error = abs((long)(sampl_pt - spt));
++	if (spt_error > best_spt_error)
++	    continue;
++
++	best_spt_error = spt_error;
++	best_rate_error = rate_error;
++	best_tseg = tseg / 2;
++	best_brp = brp;
++
++	if (rate_error == 0 && spt_error == 0)
++	    break;
++    }
++
++    if (best_rate_error) {
++	/* Error in one-tenth of a percent */
++	rate_error = (best_rate_error * 1000) / bitrate;
++	if (rate_error > CAN_CALC_MAX_ERROR) {
++	    rtcandev_err(dev,
++			 "bitrate error %ld.%ld%% too high\n",
++			 rate_error / 10, rate_error % 10);
++	    return -EDOM;
++	} else {
++	    rtcandev_warn(dev, "bitrate error %ld.%ld%%\n",
++			  rate_error / 10, rate_error % 10);
++	}
++    }
++
++    /* real sample point */
++    sampl_pt = can_update_spt(btc, sampl_pt, best_tseg, &tseg1, &tseg2);
++
++    v64 = (u64)best_brp * 1000000000UL;
++    do_div(v64, dev->can_sys_clock);
++    bt->prop_seg = tseg1 / 2;
++    bt->phase_seg1 = tseg1 - bt->prop_seg;
++    bt->phase_seg2 = tseg2;
++    bt->sjw = 1;
++    bt->sam = 0;
++    bt->brp = best_brp;
++
++    /* real bit-rate */
++    rate = dev->can_sys_clock / (bt->brp * (tseg1 + tseg2 + 1));
++
++    rtcandev_dbg(dev, "real bitrate %ld, sampling point %d.%d%%\n",
++		 rate, sampl_pt/10, sampl_pt%10);
++
++    return 0;
++}
++
++#endif /* CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD */
++
++static inline int rtcan_raw_ioctl_dev_get(struct rtcan_device *dev,
++					  int request, struct can_ifreq *ifr)
++{
++    rtdm_lockctx_t lock_ctx;
++
++    switch (request) {
++
++    case SIOCGIFINDEX:
++	ifr->ifr_ifindex = dev->ifindex;
++	break;
++
++    case SIOCGCANSTATE:
++	rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx);
++	if (dev->do_get_state)
++	    dev->state = dev->do_get_state(dev);
++	ifr->ifr_ifru.state = dev->state;
++	rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx);
++	break;
++
++    case SIOCGCANCTRLMODE:
++	ifr->ifr_ifru.ctrlmode = dev->ctrl_mode;
++	break;
++
++    case SIOCGCANBAUDRATE:
++	ifr->ifr_ifru.baudrate = dev->baudrate;
++	break;
++
++    case SIOCGCANCUSTOMBITTIME:
++	ifr->ifr_ifru.bittime = dev->bit_time;
++	break;
++    }
++
++    return 0;
++}
++
++static inline int rtcan_raw_ioctl_dev_set(struct rtcan_device *dev,
++					  int request, struct can_ifreq *ifr)
++{
++    rtdm_lockctx_t lock_ctx;
++    int ret = 0, started = 0;
++    struct can_bittime bit_time, *bt;
++
++    switch (request) {
++    case SIOCSCANBAUDRATE:
++	if (!dev->do_set_bit_time)
++	    return 0;
++	ret = rtcan_calc_bit_time(dev, ifr->ifr_ifru.baudrate, &bit_time.std);
++	if (ret)
++	    break;
++	bit_time.type = CAN_BITTIME_STD;
++	break;
++    }
++
++    rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx);
++
++    if (dev->do_get_state)
++	dev->state = dev->do_get_state(dev);
++
++    switch (request) {
++    case SIOCSCANCTRLMODE:
++    case SIOCSCANBAUDRATE:
++    case SIOCSCANCUSTOMBITTIME:
++	if ((started = CAN_STATE_OPERATING(dev->state))) {
++	    if ((ret = dev->do_set_mode(dev, CAN_MODE_STOP, &lock_ctx)))
++		goto out;
++	}
++	break;
++    }
++
++    switch (request) {
++    case SIOCSCANMODE:
++	if (dev->do_set_mode &&
++	    !(ifr->ifr_ifru.mode == CAN_MODE_START &&
++	      CAN_STATE_OPERATING(dev->state)))
++	    ret = dev->do_set_mode(dev, ifr->ifr_ifru.mode, &lock_ctx);
++	break;
++
++    case SIOCSCANCTRLMODE:
++	dev->ctrl_mode = ifr->ifr_ifru.ctrlmode;
++	break;
++
++    case SIOCSCANBAUDRATE:
++	ret = dev->do_set_bit_time(dev, &bit_time, &lock_ctx);
++	if (!ret) {
++	    dev->baudrate = ifr->ifr_ifru.baudrate;
++	    dev->bit_time = bit_time;
++	}
++	break;
++
++    case SIOCSCANCUSTOMBITTIME:
++	bt = &ifr->ifr_ifru.bittime;
++	ret = dev->do_set_bit_time(dev, bt, &lock_ctx);
++	if (!ret) {
++	    dev->bit_time = *bt;
++	    if (bt->type == CAN_BITTIME_STD && bt->std.brp)
++		dev->baudrate = (dev->can_sys_clock /
++				 (bt->std.brp * (1 + bt->std.prop_seg +
++						 bt->std.phase_seg1 +
++						 bt->std.phase_seg2)));
++	    else
++		dev->baudrate = CAN_BAUDRATE_UNKNOWN;
++	}
++	break;
++
++    default:
++	ret = -EOPNOTSUPP;
++	break;
++    }
++
++ out:
++    if (started)
++	dev->do_set_mode(dev, CAN_MODE_START, &lock_ctx);
++
++    rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx);
++
++    return ret;
++}
++
++int rtcan_raw_ioctl_dev(struct rtdm_fd *fd, int request, void *arg)
++{
++    struct can_ifreq *ifr;
++    int ret = 0, get = 0;
++    union {
++	    /*
++	     * We need to deal with callers still passing struct ifreq
++	     * instead of can_ifreq, which might have a larger memory
++	     * footprint (but can't be smaller though). Field offsets
++	     * will be the same regardless.
++	     */
++	    struct ifreq ifr_legacy;
++	    struct can_ifreq ifr_can;
++    } ifr_buf;
++    struct rtcan_device *dev;
++
++    switch (request) {
++
++    case SIOCGIFINDEX:
++    case SIOCGCANSTATE:
++    case SIOCGCANBAUDRATE:
++    case SIOCGCANCUSTOMBITTIME:
++	    get = 1;
++	    /* Falldown wanted. */
++    case SIOCSCANMODE:
++    case SIOCSCANCTRLMODE:
++    case SIOCSCANBAUDRATE:
++    case SIOCSCANCUSTOMBITTIME:
++
++	if (rtdm_fd_is_user(fd)) {
++	    /* Copy struct can_ifreq from userspace */
++	    if (!rtdm_read_user_ok(fd, arg,
++				   sizeof(struct can_ifreq)) ||
++		rtdm_copy_from_user(fd, &ifr_buf, arg,
++				    sizeof(struct can_ifreq)))
++		return -EFAULT;
++
++	    ifr = &ifr_buf.ifr_can;
++	} else
++	    ifr = (struct can_ifreq *)arg;
++
++	/* Get interface index and data */
++	dev = rtcan_dev_get_by_name(ifr->ifr_name);
++	if (dev == NULL)
++	    return -ENODEV;
++
++	if (get) {
++		ret = rtcan_raw_ioctl_dev_get(dev, request, ifr);
++		rtcan_dev_dereference(dev);
++		if (ret == 0 && rtdm_fd_is_user(fd)) {
++		    /*
++		     * Since we yet tested if user memory is rw safe,
++		     * we can copy to user space directly.
++		     */
++		    if (rtdm_copy_to_user(fd, arg, ifr,
++					  sizeof(struct can_ifreq)))
++			    return -EFAULT;
++		}
++	} else {
++		ret = rtcan_raw_ioctl_dev_set(dev, request, ifr);
++		rtcan_dev_dereference(dev);
++	}
++	break;
++
++    default:
++	ret = -EOPNOTSUPP;
++	break;
++
++    }
++
++    return ret;
++}
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_BUS_ERR
++void __rtcan_raw_enable_bus_err(struct rtcan_socket *sock)
++{
++    int i, begin, end;
++    struct rtcan_device *dev;
++    rtdm_lockctx_t lock_ctx;
++    int ifindex = atomic_read(&sock->ifindex);
++
++    if (ifindex) {
++	begin = ifindex;
++	end   = ifindex;
++    } else {
++	begin = 1;
++	end = RTCAN_MAX_DEVICES;
++    }
++
++    for (i = begin; i <= end; i++) {
++	if ((dev = rtcan_dev_get_by_index(i)) == NULL)
++	    continue;
++
++	if (dev->do_enable_bus_err) {
++	    rtdm_lock_get_irqsave(&dev->device_lock, lock_ctx);
++	    dev->do_enable_bus_err(dev);
++	    rtdm_lock_put_irqrestore(&dev->device_lock, lock_ctx);
++	}
++	rtcan_dev_dereference(dev);
++    }
++}
++#endif /* CONFIG_XENO_DRIVERS_CAN_BUS_ERR*/
+--- linux/drivers/xenomai/can/rtcan_flexcan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_flexcan.c	2021-04-07 16:01:26.425635312 +0800
+@@ -0,0 +1,1536 @@
++/*
++ * RTDM-based FLEXCAN CAN controller driver
++ *
++ * Rebased on linux 4.14.58 flexcan driver:
++ * Copyright (c) 2018 Philippe Gerum <rpm@xenomai.org>
++ *
++ * Original port to RTDM:
++ * Copyright (c) 2012 Wolfgang Grandegger <wg@denx.de>
++ *
++ * Copyright (c) 2005-2006 Varma Electronics Oy
++ * Copyright (c) 2009 Sascha Hauer, Pengutronix
++ * Copyright (c) 2010-2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
++ * Copyright (c) 2014 David Jander, Protonic Holland
++ *
++ * Based on code originally by Andrey Volkov <avolkov@varma-el.com>
++ *
++ * LICENCE:
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation version 2.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ */
++#include <linux/clk.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/io.h>
++#include <linux/module.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/platform_device.h>
++#include <linux/regulator/consumer.h>
++#include <rtdm/driver.h>
++#include <rtdm/can.h>
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++#include "rtcan_internal.h"
++#include <asm/unaligned.h>
++
++#define DRV_NAME	"flexcan"
++#define DEV_NAME	"rtcan%d"
++
++#define CAN_MAX_DLC 8
++#define get_can_dlc(i)		(min_t(__u8, (i), CAN_MAX_DLC))
++
++/* 8 for RX fifo and 2 error handling */
++#define FLEXCAN_NAPI_WEIGHT		(8 + 2)
++
++/* FLEXCAN module configuration register (CANMCR) bits */
++#define FLEXCAN_MCR_MDIS		BIT(31)
++#define FLEXCAN_MCR_FRZ			BIT(30)
++#define FLEXCAN_MCR_FEN			BIT(29)
++#define FLEXCAN_MCR_HALT		BIT(28)
++#define FLEXCAN_MCR_NOT_RDY		BIT(27)
++#define FLEXCAN_MCR_WAK_MSK		BIT(26)
++#define FLEXCAN_MCR_SOFTRST		BIT(25)
++#define FLEXCAN_MCR_FRZ_ACK		BIT(24)
++#define FLEXCAN_MCR_SUPV		BIT(23)
++#define FLEXCAN_MCR_SLF_WAK		BIT(22)
++#define FLEXCAN_MCR_WRN_EN		BIT(21)
++#define FLEXCAN_MCR_LPM_ACK		BIT(20)
++#define FLEXCAN_MCR_WAK_SRC		BIT(19)
++#define FLEXCAN_MCR_DOZE		BIT(18)
++#define FLEXCAN_MCR_SRX_DIS		BIT(17)
++#define FLEXCAN_MCR_IRMQ		BIT(16)
++#define FLEXCAN_MCR_LPRIO_EN		BIT(13)
++#define FLEXCAN_MCR_AEN			BIT(12)
++/* MCR_MAXMB: maximum used MBs is MAXMB + 1 */
++#define FLEXCAN_MCR_MAXMB(x)		((x) & 0x7f)
++#define FLEXCAN_MCR_IDAM_A		(0x0 << 8)
++#define FLEXCAN_MCR_IDAM_B		(0x1 << 8)
++#define FLEXCAN_MCR_IDAM_C		(0x2 << 8)
++#define FLEXCAN_MCR_IDAM_D		(0x3 << 8)
++
++/* FLEXCAN control register (CANCTRL) bits */
++#define FLEXCAN_CTRL_PRESDIV(x)		(((x) & 0xff) << 24)
++#define FLEXCAN_CTRL_RJW(x)		(((x) & 0x03) << 22)
++#define FLEXCAN_CTRL_PSEG1(x)		(((x) & 0x07) << 19)
++#define FLEXCAN_CTRL_PSEG2(x)		(((x) & 0x07) << 16)
++#define FLEXCAN_CTRL_BOFF_MSK		BIT(15)
++#define FLEXCAN_CTRL_ERR_MSK		BIT(14)
++#define FLEXCAN_CTRL_CLK_SRC		BIT(13)
++#define FLEXCAN_CTRL_LPB		BIT(12)
++#define FLEXCAN_CTRL_TWRN_MSK		BIT(11)
++#define FLEXCAN_CTRL_RWRN_MSK		BIT(10)
++#define FLEXCAN_CTRL_SMP		BIT(7)
++#define FLEXCAN_CTRL_BOFF_REC		BIT(6)
++#define FLEXCAN_CTRL_TSYN		BIT(5)
++#define FLEXCAN_CTRL_LBUF		BIT(4)
++#define FLEXCAN_CTRL_LOM		BIT(3)
++#define FLEXCAN_CTRL_PROPSEG(x)		((x) & 0x07)
++#define FLEXCAN_CTRL_ERR_BUS		(FLEXCAN_CTRL_ERR_MSK)
++#define FLEXCAN_CTRL_ERR_STATE \
++	(FLEXCAN_CTRL_TWRN_MSK | FLEXCAN_CTRL_RWRN_MSK | \
++	 FLEXCAN_CTRL_BOFF_MSK)
++#define FLEXCAN_CTRL_ERR_ALL \
++	(FLEXCAN_CTRL_ERR_BUS | FLEXCAN_CTRL_ERR_STATE)
++
++/* FLEXCAN control register 2 (CTRL2) bits */
++#define FLEXCAN_CTRL2_ECRWRE		BIT(29)
++#define FLEXCAN_CTRL2_WRMFRZ		BIT(28)
++#define FLEXCAN_CTRL2_RFFN(x)		(((x) & 0x0f) << 24)
++#define FLEXCAN_CTRL2_TASD(x)		(((x) & 0x1f) << 19)
++#define FLEXCAN_CTRL2_MRP		BIT(18)
++#define FLEXCAN_CTRL2_RRS		BIT(17)
++#define FLEXCAN_CTRL2_EACEN		BIT(16)
++
++/* FLEXCAN memory error control register (MECR) bits */
++#define FLEXCAN_MECR_ECRWRDIS		BIT(31)
++#define FLEXCAN_MECR_HANCEI_MSK		BIT(19)
++#define FLEXCAN_MECR_FANCEI_MSK		BIT(18)
++#define FLEXCAN_MECR_CEI_MSK		BIT(16)
++#define FLEXCAN_MECR_HAERRIE		BIT(15)
++#define FLEXCAN_MECR_FAERRIE		BIT(14)
++#define FLEXCAN_MECR_EXTERRIE		BIT(13)
++#define FLEXCAN_MECR_RERRDIS		BIT(9)
++#define FLEXCAN_MECR_ECCDIS		BIT(8)
++#define FLEXCAN_MECR_NCEFAFRZ		BIT(7)
++
++/* FLEXCAN error and status register (ESR) bits */
++#define FLEXCAN_ESR_TWRN_INT		BIT(17)
++#define FLEXCAN_ESR_RWRN_INT		BIT(16)
++#define FLEXCAN_ESR_BIT1_ERR		BIT(15)
++#define FLEXCAN_ESR_BIT0_ERR		BIT(14)
++#define FLEXCAN_ESR_ACK_ERR		BIT(13)
++#define FLEXCAN_ESR_CRC_ERR		BIT(12)
++#define FLEXCAN_ESR_FRM_ERR		BIT(11)
++#define FLEXCAN_ESR_STF_ERR		BIT(10)
++#define FLEXCAN_ESR_TX_WRN		BIT(9)
++#define FLEXCAN_ESR_RX_WRN		BIT(8)
++#define FLEXCAN_ESR_IDLE		BIT(7)
++#define FLEXCAN_ESR_TXRX		BIT(6)
++#define FLEXCAN_EST_FLT_CONF_SHIFT	(4)
++#define FLEXCAN_ESR_FLT_CONF_MASK	(0x3 << FLEXCAN_EST_FLT_CONF_SHIFT)
++#define FLEXCAN_ESR_FLT_CONF_ACTIVE	(0x0 << FLEXCAN_EST_FLT_CONF_SHIFT)
++#define FLEXCAN_ESR_FLT_CONF_PASSIVE	(0x1 << FLEXCAN_EST_FLT_CONF_SHIFT)
++#define FLEXCAN_ESR_BOFF_INT		BIT(2)
++#define FLEXCAN_ESR_ERR_INT		BIT(1)
++#define FLEXCAN_ESR_WAK_INT		BIT(0)
++#define FLEXCAN_ESR_ERR_BUS \
++	(FLEXCAN_ESR_BIT1_ERR | FLEXCAN_ESR_BIT0_ERR | \
++	 FLEXCAN_ESR_ACK_ERR | FLEXCAN_ESR_CRC_ERR | \
++	 FLEXCAN_ESR_FRM_ERR | FLEXCAN_ESR_STF_ERR)
++#define FLEXCAN_ESR_ERR_STATE \
++	(FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | FLEXCAN_ESR_BOFF_INT)
++#define FLEXCAN_ESR_ERR_ALL \
++	(FLEXCAN_ESR_ERR_BUS | FLEXCAN_ESR_ERR_STATE)
++#define FLEXCAN_ESR_ALL_INT \
++	(FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | \
++	 FLEXCAN_ESR_BOFF_INT | FLEXCAN_ESR_ERR_INT)
++
++/* FLEXCAN interrupt flag register (IFLAG) bits */
++/* Errata ERR005829 step7: Reserve first valid MB */
++#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO	8
++#define FLEXCAN_TX_MB_OFF_FIFO		9
++#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP	0
++#define FLEXCAN_TX_MB_OFF_TIMESTAMP		1
++#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST	(FLEXCAN_TX_MB_OFF_TIMESTAMP + 1)
++#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST	63
++#define FLEXCAN_RX_MB_TIMESTAMP_COUNT	(FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST -	\
++					 FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST + 1)
++#define FLEXCAN_IFLAG_MB(x)		BIT(x)
++#define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW	BIT(7)
++#define FLEXCAN_IFLAG_RX_FIFO_WARN	BIT(6)
++#define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE	BIT(5)
++
++/* FLEXCAN message buffers */
++#define FLEXCAN_MB_CODE_MASK		(0xf << 24)
++#define FLEXCAN_MB_CODE_RX_BUSY_BIT	(0x1 << 24)
++#define FLEXCAN_MB_CODE_RX_INACTIVE	(0x0 << 24)
++#define FLEXCAN_MB_CODE_RX_EMPTY	(0x4 << 24)
++#define FLEXCAN_MB_CODE_RX_FULL		(0x2 << 24)
++#define FLEXCAN_MB_CODE_RX_OVERRUN	(0x6 << 24)
++#define FLEXCAN_MB_CODE_RX_RANSWER	(0xa << 24)
++
++#define FLEXCAN_MB_CODE_TX_INACTIVE	(0x8 << 24)
++#define FLEXCAN_MB_CODE_TX_ABORT	(0x9 << 24)
++#define FLEXCAN_MB_CODE_TX_DATA		(0xc << 24)
++#define FLEXCAN_MB_CODE_TX_TANSWER	(0xe << 24)
++
++#define FLEXCAN_MB_CNT_SRR		BIT(22)
++#define FLEXCAN_MB_CNT_IDE		BIT(21)
++#define FLEXCAN_MB_CNT_RTR		BIT(20)
++#define FLEXCAN_MB_CNT_LENGTH(x)	(((x) & 0xf) << 16)
++#define FLEXCAN_MB_CNT_TIMESTAMP(x)	((x) & 0xffff)
++
++#define FLEXCAN_TIMEOUT_US		(50)
++
++/* FLEXCAN hardware feature flags
++ *
++ * Below is some version info we got:
++ *    SOC   Version   IP-Version  Glitch- [TR]WRN_INT IRQ Err Memory err RTR re-
++ *                                Filter? connected?  Passive detection  ception in MB
++ *   MX25  FlexCAN2  03.00.00.00     no        no         ?       no        no
++ *   MX28  FlexCAN2  03.00.04.00    yes       yes        no       no        no
++ *   MX35  FlexCAN2  03.00.00.00     no        no         ?       no        no
++ *   MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no
++ *   MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes
++ *   VF610 FlexCAN3  ?               no       yes        no      yes       yes?
++ *
++ * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
++ */
++#define FLEXCAN_QUIRK_BROKEN_WERR_STATE	BIT(1) /* [TR]WRN_INT not connected */
++#define FLEXCAN_QUIRK_DISABLE_RXFG	BIT(2) /* Disable RX FIFO Global mask */
++#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS	BIT(3) /* Enable EACEN and RRS bit in ctrl2 */
++#define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disable Memory error detection */
++#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP	BIT(5) /* Use timestamp based offloading */
++#define FLEXCAN_QUIRK_BROKEN_PERR_STATE	BIT(6) /* No interrupt for error passive */
++
++/* Structure of the message buffer */
++struct flexcan_mb {
++	u32 can_ctrl;
++	u32 can_id;
++	u32 data[2];
++};
++
++/* Structure of the hardware registers */
++struct flexcan_regs {
++	u32 mcr;		/* 0x00 */
++	u32 ctrl;		/* 0x04 */
++	u32 timer;		/* 0x08 */
++	u32 _reserved1;		/* 0x0c */
++	u32 rxgmask;		/* 0x10 */
++	u32 rx14mask;		/* 0x14 */
++	u32 rx15mask;		/* 0x18 */
++	u32 ecr;		/* 0x1c */
++	u32 esr;		/* 0x20 */
++	u32 imask2;		/* 0x24 */
++	u32 imask1;		/* 0x28 */
++	u32 iflag2;		/* 0x2c */
++	u32 iflag1;		/* 0x30 */
++	union {			/* 0x34 */
++		u32 gfwr_mx28;	/* MX28, MX53 */
++		u32 ctrl2;	/* MX6, VF610 */
++	};
++	u32 esr2;		/* 0x38 */
++	u32 imeur;		/* 0x3c */
++	u32 lrfr;		/* 0x40 */
++	u32 crcr;		/* 0x44 */
++	u32 rxfgmask;		/* 0x48 */
++	u32 rxfir;		/* 0x4c */
++	u32 _reserved3[12];	/* 0x50 */
++	struct flexcan_mb mb[64];	/* 0x80 */
++	/* FIFO-mode:
++	 *			MB
++	 * 0x080...0x08f	0	RX message buffer
++	 * 0x090...0x0df	1-5	reserverd
++	 * 0x0e0...0x0ff	6-7	8 entry ID table
++	 *				(mx25, mx28, mx35, mx53)
++	 * 0x0e0...0x2df	6-7..37	8..128 entry ID table
++	 *				size conf'ed via ctrl2::RFFN
++	 *				(mx6, vf610)
++	 */
++	u32 _reserved4[256];	/* 0x480 */
++	u32 rximr[64];		/* 0x880 */
++	u32 _reserved5[24];	/* 0x980 */
++	u32 gfwr_mx6;		/* 0x9e0 - MX6 */
++	u32 _reserved6[63];	/* 0x9e4 */
++	u32 mecr;		/* 0xae0 */
++	u32 erriar;		/* 0xae4 */
++	u32 erridpr;		/* 0xae8 */
++	u32 errippr;		/* 0xaec */
++	u32 rerrar;		/* 0xaf0 */
++	u32 rerrdr;		/* 0xaf4 */
++	u32 rerrsynr;		/* 0xaf8 */
++	u32 errsr;		/* 0xafc */
++};
++
++struct flexcan_devtype_data {
++	u32 quirks;		/* quirks needed for different IP cores */
++};
++
++struct flexcan_timestamped_frame {
++	struct rtcan_skb skb;
++	u32 timestamp;
++	struct list_head next;
++};
++
++struct flexcan_priv {
++	unsigned int irq;
++	unsigned int mb_first;
++	unsigned int mb_last;
++	struct can_bittime bittiming;
++	struct flexcan_timestamped_frame *ts_frames;
++
++	struct flexcan_regs __iomem *regs;
++	struct flexcan_mb __iomem *tx_mb;
++	struct flexcan_mb __iomem *tx_mb_reserved;
++	u8 tx_mb_idx;
++	u32 reg_ctrl_default;
++	u32 reg_imask1_default;
++	u32 reg_imask2_default;
++
++	struct clk *clk_ipg;
++	struct clk *clk_per;
++	const struct flexcan_devtype_data *devtype_data;
++	struct regulator *reg_xceiver;
++
++	unsigned long bus_errors;
++};
++
++static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
++	.quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
++		FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++};
++
++static const struct flexcan_devtype_data fsl_imx28_devtype_data = {
++	.quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++};
++
++static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
++	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
++	FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++};
++
++static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
++	.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
++		FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
++		FLEXCAN_QUIRK_BROKEN_PERR_STATE,
++};
++
++static const struct can_bittiming_const flexcan_bittiming_const = {
++	.name = DRV_NAME,
++	.tseg1_min = 4,
++	.tseg1_max = 16,
++	.tseg2_min = 2,
++	.tseg2_max = 8,
++	.sjw_max = 4,
++	.brp_min = 1,
++	.brp_max = 256,
++	.brp_inc = 1,
++};
++
++/* Abstract off the read/write for arm versus ppc. This
++ * assumes that PPC uses big-endian registers and everything
++ * else uses little-endian registers, independent of CPU
++ * endianness.
++ */
++#if defined(CONFIG_PPC)
++static inline u32 flexcan_read(void __iomem *addr)
++{
++	return in_be32(addr);
++}
++
++static inline void flexcan_write(u32 val, void __iomem *addr)
++{
++	out_be32(addr, val);
++}
++#else
++static inline u32 flexcan_read(void __iomem *addr)
++{
++	return readl(addr);
++}
++
++static inline void flexcan_write(u32 val, void __iomem *addr)
++{
++	writel(val, addr);
++}
++#endif
++
++static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg_ctrl = (priv->reg_ctrl_default | FLEXCAN_CTRL_ERR_MSK);
++
++	flexcan_write(reg_ctrl, &regs->ctrl);
++}
++
++static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg_ctrl = (priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_MSK);
++
++	flexcan_write(reg_ctrl, &regs->ctrl);
++}
++
++static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv)
++{
++	if (!priv->reg_xceiver)
++		return 0;
++
++	return regulator_enable(priv->reg_xceiver);
++}
++
++static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv)
++{
++	if (!priv->reg_xceiver)
++		return 0;
++
++	return regulator_disable(priv->reg_xceiver);
++}
++
++static int flexcan_chip_enable(struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
++	u32 reg;
++
++	reg = flexcan_read(&regs->mcr);
++	reg &= ~FLEXCAN_MCR_MDIS;
++	flexcan_write(reg, &regs->mcr);
++
++	while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
++		udelay(10);
++
++	if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK)
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++static int flexcan_chip_disable(struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
++	u32 reg;
++
++	reg = flexcan_read(&regs->mcr);
++	reg |= FLEXCAN_MCR_MDIS;
++	flexcan_write(reg, &regs->mcr);
++
++	while (timeout-- && !(flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
++		udelay(10);
++
++	if (!(flexcan_read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++static int flexcan_chip_freeze(struct rtcan_device *dev)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	unsigned int timeout = 1000 * 1000 * 10 / dev->baudrate;
++	u32 reg;
++
++	reg = flexcan_read(&regs->mcr);
++	reg |= FLEXCAN_MCR_HALT;
++	flexcan_write(reg, &regs->mcr);
++
++	while (timeout-- && !(flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
++		udelay(100);
++
++	if (!(flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++static int flexcan_chip_unfreeze(struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
++	u32 reg;
++
++	reg = flexcan_read(&regs->mcr);
++	reg &= ~FLEXCAN_MCR_HALT;
++	flexcan_write(reg, &regs->mcr);
++
++	while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
++		udelay(10);
++
++	if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK)
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++static int flexcan_chip_softreset(struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
++
++	flexcan_write(FLEXCAN_MCR_SOFTRST, &regs->mcr);
++	while (timeout-- && (flexcan_read(&regs->mcr) & FLEXCAN_MCR_SOFTRST))
++		udelay(10);
++
++	if (flexcan_read(&regs->mcr) & FLEXCAN_MCR_SOFTRST)
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++static int flexcan_start_xmit(struct rtcan_device *dev, struct can_frame *cf)
++{
++	const struct flexcan_priv *priv = rtcan_priv(dev);
++	u32 can_id, data, ctrl;
++
++	ctrl = FLEXCAN_MB_CODE_TX_DATA | (cf->can_dlc << 16);
++	if (cf->can_id & CAN_EFF_FLAG) {
++		can_id = cf->can_id & CAN_EFF_MASK;
++		ctrl |= FLEXCAN_MB_CNT_IDE | FLEXCAN_MB_CNT_SRR;
++	} else {
++		can_id = (cf->can_id & CAN_SFF_MASK) << 18;
++	}
++
++	if (cf->can_id & CAN_RTR_FLAG)
++		ctrl |= FLEXCAN_MB_CNT_RTR;
++
++	if (cf->can_dlc > CAN_MAX_DLC)
++		cf->can_dlc = CAN_MAX_DLC;
++
++	if (cf->can_dlc > 0) {
++		data = be32_to_cpup((__be32 *)&cf->data[0]);
++		flexcan_write(data, &priv->tx_mb->data[0]);
++	}
++	if (cf->can_dlc > 4) {
++		data = be32_to_cpup((__be32 *)&cf->data[4]);
++		flexcan_write(data, &priv->tx_mb->data[1]);
++	}
++
++	flexcan_write(can_id, &priv->tx_mb->can_id);
++	flexcan_write(ctrl, &priv->tx_mb->can_ctrl);
++
++	/* Errata ERR005829 step8:
++	 * Write twice INACTIVE(0x8) code to first MB.
++	 */
++	flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
++		      &priv->tx_mb_reserved->can_ctrl);
++	flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
++		      &priv->tx_mb_reserved->can_ctrl);
++
++	return 0;
++}
++
++static void init_err_skb(struct rtcan_skb *skb)
++{
++	struct rtcan_rb_frame *cf = &skb->rb_frame;
++
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC;
++	cf->can_id = CAN_ERR_FLAG;
++	cf->can_dlc = CAN_ERR_DLC;
++	memset(&cf->data[0], 0, cf->can_dlc);
++}
++
++static void flexcan_irq_bus_err(struct rtcan_device *dev,
++				u32 reg_esr, struct rtcan_skb *skb)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct rtcan_rb_frame *cf = &skb->rb_frame;
++
++	init_err_skb(skb);
++
++	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
++
++	if (reg_esr & FLEXCAN_ESR_BIT1_ERR) {
++		rtcandev_dbg(dev, "BIT1_ERR irq\n");
++		cf->data[2] |= CAN_ERR_PROT_BIT1;
++	}
++	if (reg_esr & FLEXCAN_ESR_BIT0_ERR) {
++		rtcandev_dbg(dev, "BIT0_ERR irq\n");
++		cf->data[2] |= CAN_ERR_PROT_BIT0;
++	}
++	if (reg_esr & FLEXCAN_ESR_ACK_ERR) {
++		rtcandev_dbg(dev, "ACK_ERR irq\n");
++		cf->can_id |= CAN_ERR_ACK;
++		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
++	}
++	if (reg_esr & FLEXCAN_ESR_CRC_ERR) {
++		rtcandev_dbg(dev, "CRC_ERR irq\n");
++		cf->data[2] |= CAN_ERR_PROT_BIT;
++		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
++	}
++	if (reg_esr & FLEXCAN_ESR_FRM_ERR) {
++		rtcandev_dbg(dev, "FRM_ERR irq\n");
++		cf->data[2] |= CAN_ERR_PROT_FORM;
++	}
++	if (reg_esr & FLEXCAN_ESR_STF_ERR) {
++		rtcandev_dbg(dev, "STF_ERR irq\n");
++		cf->data[2] |= CAN_ERR_PROT_STUFF;
++	}
++
++	priv->bus_errors++;
++}
++
++struct berr_counter {
++	u16 txerr;
++	u16 rxerr;
++};
++
++static void flexcan_change_state(struct rtcan_device *dev,
++				 struct rtcan_rb_frame *cf,
++				 struct berr_counter *bec,
++				 can_state_t new_state)
++{
++	switch (dev->state) {
++	case CAN_STATE_ERROR_ACTIVE:
++		/*
++		 * from: ERROR_ACTIVE
++		 * to  : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF
++		 * =>  : there was a warning int
++		 */
++		if (new_state >= CAN_STATE_ERROR_WARNING &&
++		    new_state <= CAN_STATE_BUS_OFF) {
++			rtcandev_dbg(dev, "Error Warning IRQ\n");
++
++			cf->can_id |= CAN_ERR_CRTL;
++			cf->data[1] = (bec->txerr > bec->rxerr) ?
++				CAN_ERR_CRTL_TX_WARNING :
++				CAN_ERR_CRTL_RX_WARNING;
++		}
++	case CAN_STATE_ERROR_WARNING:	/* fallthrough */
++		/*
++		 * from: ERROR_ACTIVE, ERROR_WARNING
++		 * to  : ERROR_PASSIVE, BUS_OFF
++		 * =>  : error passive int
++		 */
++		if (new_state >= CAN_STATE_ERROR_PASSIVE &&
++		    new_state <= CAN_STATE_BUS_OFF) {
++			rtcandev_dbg(dev, "Error Passive IRQ\n");
++
++			cf->can_id |= CAN_ERR_CRTL;
++			cf->data[1] = (bec->txerr > bec->rxerr) ?
++				CAN_ERR_CRTL_TX_PASSIVE :
++				CAN_ERR_CRTL_RX_PASSIVE;
++		}
++		break;
++	case CAN_STATE_BUS_OFF:
++		rtcandev_err(dev, "BUG! "
++			     "hardware recovered automatically from BUS_OFF\n");
++		break;
++	default:
++		break;
++	}
++
++	/* process state changes depending on the new state */
++	switch (new_state) {
++	case CAN_STATE_ERROR_ACTIVE:
++		rtcandev_dbg(dev, "Error Active\n");
++		cf->can_id |= CAN_ERR_PROT;
++		cf->data[2] = CAN_ERR_PROT_ACTIVE;
++		break;
++	case CAN_STATE_BUS_OFF:
++		cf->can_id |= CAN_ERR_BUSOFF;
++		/* Wake up waiting senders */
++		rtdm_sem_destroy(&dev->tx_sem);
++		break;
++	default:
++		break;
++	}
++
++	dev->state = new_state;
++}
++
++static bool flexcan_irq_state(struct rtcan_device *dev, u32 reg_esr,
++			      struct rtcan_skb *skb)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	enum CAN_STATE new_state, rx_state, tx_state;
++	struct rtcan_rb_frame *cf = &skb->rb_frame;
++	struct berr_counter bec;
++	u32 reg;
++	int flt;
++
++	reg = flexcan_read(&regs->ecr);
++	bec.txerr = (reg >> 0) & 0xff;
++	bec.rxerr = (reg >> 8) & 0xff;
++
++	flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK;
++	if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) {
++		tx_state = unlikely(reg_esr & FLEXCAN_ESR_TX_WRN) ?
++			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
++		rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ?
++			CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
++		new_state = max(tx_state, rx_state);
++	} else
++		new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ?
++			CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF;
++
++	/* state hasn't changed */
++	if (likely(new_state == dev->state))
++		return false;
++
++	init_err_skb(skb);
++	
++	flexcan_change_state(dev, cf, &bec, new_state);
++
++	return true;
++}
++
++static unsigned int flexcan_mailbox_read(struct rtcan_device *dev,
++					 struct rtcan_skb *skb,
++					 u32 *timestamp, unsigned int n)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	struct flexcan_mb __iomem *mb = &regs->mb[n];
++	u32 reg_ctrl, reg_id, reg_iflag1, code;
++	struct rtcan_rb_frame *cf = &skb->rb_frame;
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		do {
++			reg_ctrl = flexcan_read(&mb->can_ctrl);
++		} while (reg_ctrl & FLEXCAN_MB_CODE_RX_BUSY_BIT);
++
++		/* is this MB empty? */
++		code = reg_ctrl & FLEXCAN_MB_CODE_MASK;
++		if ((code != FLEXCAN_MB_CODE_RX_FULL) &&
++		    (code != FLEXCAN_MB_CODE_RX_OVERRUN))
++			return 0;
++	} else {
++		reg_iflag1 = flexcan_read(&regs->iflag1);
++		if (!(reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE))
++			return 0;
++
++		reg_ctrl = flexcan_read(&mb->can_ctrl);
++	}
++
++	/* increase timstamp to full 32 bit */
++	*timestamp = reg_ctrl << 16;
++
++	cf->can_dlc = get_can_dlc((reg_ctrl >> 16) & 0xf);
++	reg_id = flexcan_read(&mb->can_id);
++	if (reg_ctrl & FLEXCAN_MB_CNT_IDE)
++		cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
++	else
++		cf->can_id = (reg_id >> 18) & CAN_SFF_MASK;
++
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE;
++
++	if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
++		cf->can_id |= CAN_RTR_FLAG;
++	else
++		skb->rb_frame_size += cf->can_dlc;
++
++	put_unaligned_be32(flexcan_read(&mb->data[0]), cf->data + 0);
++	put_unaligned_be32(flexcan_read(&mb->data[1]), cf->data + 4);
++
++	cf->can_ifindex = dev->ifindex;
++
++	/* mark as read */
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		/* Clear IRQ */
++		if (n < 32)
++			flexcan_write(BIT(n), &regs->iflag1);
++		else
++			flexcan_write(BIT(n - 32), &regs->iflag2);
++	} else {
++		flexcan_write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
++		flexcan_read(&regs->timer);
++	}
++
++	return 1;
++}
++
++static inline bool flexcan_rx_le(struct flexcan_priv *priv, unsigned int a, unsigned int b)
++{
++	if (priv->mb_first < priv->mb_last)
++		return a <= b;
++
++	return a >= b;
++}
++
++static inline unsigned int flexcan_rx_inc(struct flexcan_priv *priv, unsigned int *val)
++{
++	if (priv->mb_first < priv->mb_last)
++		return (*val)++;
++
++	return (*val)--;
++}
++
++static int flexcan_mailbox_read_timestamp(struct rtcan_device *dev, u64 pending)
++{
++	struct flexcan_timestamped_frame *new, *pos, *tmp;
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct list_head q, *head;
++	int i, count = 0;
++
++	INIT_LIST_HEAD(&q);
++
++	for (i = priv->mb_first;
++	     flexcan_rx_le(priv, i, priv->mb_last);
++	     flexcan_rx_inc(priv, &i)) {
++		if (!(pending & BIT_ULL(i)))
++			continue;
++
++		new = priv->ts_frames + (i - priv->mb_first);
++		if (!flexcan_mailbox_read(dev, &new->skb, &new->timestamp, i))
++			break;
++
++		head = &q;
++		if (list_empty(&q))
++			goto add;
++
++		list_for_each_entry_reverse(pos, &q, next) {
++			/*
++			 * Substract two u32 and return result as int,
++			 * to keep difference steady around the u32
++			 * overflow.
++			 */
++			if (((int)(new->timestamp - pos->timestamp)) >= 0) {
++				head = &pos->next;
++				break;
++			}
++		}
++	add:
++		list_add(&new->next, head);
++		count++;
++	}
++
++	if (list_empty(&q))
++		return 0;
++
++	list_for_each_entry_safe(pos, tmp, &q, next)
++		rtcan_rcv(dev, &pos->skb);
++	
++	return count;
++}
++
++static void flexcan_mailbox_read_fifo(struct rtcan_device *dev)
++{
++	struct rtcan_skb skb;
++	u32 timestamp;
++	
++	for (;;) {
++		if (!flexcan_mailbox_read(dev, &skb, &timestamp, 0))
++			break;
++		rtcan_rcv(dev, &skb);
++	}
++}
++
++static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
++{
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 iflag1, iflag2;
++
++	iflag2 = flexcan_read(&regs->iflag2) & priv->reg_imask2_default;
++	iflag1 = flexcan_read(&regs->iflag1) & priv->reg_imask1_default &
++		~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
++
++	return (u64)iflag2 << 32 | iflag1;
++}
++
++static int flexcan_do_rx(struct rtcan_device *dev, u32 reg_iflag1)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	struct rtcan_skb skb;
++	struct rtcan_rb_frame *cf = &skb.rb_frame;
++	bool input = false;
++	u64 reg;
++	int ret;
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		while ((reg = flexcan_read_reg_iflag_rx(priv))) {
++			input = true;
++			ret = flexcan_mailbox_read_timestamp(dev, reg);
++			if (!ret)
++				break;
++		}
++	} else {
++		if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_OVERFLOW) {
++			flexcan_write(FLEXCAN_IFLAG_RX_FIFO_OVERFLOW, &regs->iflag1);
++			init_err_skb(&skb);
++			cf->can_id |= CAN_ERR_CRTL;
++			cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
++			input = true;
++		} else  if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) {
++			flexcan_mailbox_read_fifo(dev);
++			input = true;
++		}
++	}
++
++	return input;
++}
++
++static int flexcan_irq(rtdm_irq_t *irq_handle)
++{
++	struct rtcan_device *dev = rtdm_irq_get_arg(irq_handle, void);
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg_iflag1, reg_esr;
++	struct rtcan_skb skb;
++	int handled;
++
++	rtdm_lock_get(&dev->device_lock);
++	rtdm_lock_get(&rtcan_recv_list_lock);
++	rtdm_lock_get(&rtcan_socket_lock);
++
++	reg_iflag1 = flexcan_read(&regs->iflag1);
++
++	/* reception interrupt */
++	if (flexcan_do_rx(dev, reg_iflag1))
++		handled = RTDM_IRQ_HANDLED;
++
++	/* transmission complete interrupt */
++	if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
++		/* after sending a RTR frame MB is in RX mode */
++		flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
++			      &priv->tx_mb->can_ctrl);
++		flexcan_write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag1);
++		rtdm_sem_up(&dev->tx_sem);
++		if (rtcan_loopback_pending(dev))
++			rtcan_loopback(dev);
++		handled = RTDM_IRQ_HANDLED;
++	}
++
++	reg_esr = flexcan_read(&regs->esr);
++
++	/* ACK all bus error and state change IRQ sources */
++	if (reg_esr & FLEXCAN_ESR_ALL_INT) {
++		flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, &regs->esr);
++		handled = RTDM_IRQ_HANDLED;
++	}
++
++	/* state change interrupt or broken error state quirk fix is enabled */
++	if (reg_esr & FLEXCAN_ESR_ERR_STATE)
++		handled = RTDM_IRQ_HANDLED;
++	else if (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE |
++					       FLEXCAN_QUIRK_BROKEN_PERR_STATE))
++		goto esr_err;
++	
++	if (reg_esr & FLEXCAN_ESR_ERR_STATE) {
++	esr_err:
++		if (flexcan_irq_state(dev, reg_esr, &skb)) {
++			rtcan_rcv(dev, &skb);
++		}
++	}
++
++	/* bus error IRQ - report unconditionally */
++	if (reg_esr & FLEXCAN_ESR_ERR_BUS) {
++		flexcan_irq_bus_err(dev, reg_esr, &skb);
++		rtcan_rcv(dev, &skb);
++		handled = RTDM_IRQ_HANDLED;
++	}
++
++	rtdm_lock_put(&rtcan_socket_lock);
++	rtdm_lock_put(&rtcan_recv_list_lock);
++	rtdm_lock_put(&dev->device_lock);
++
++	return handled;
++}
++
++static void flexcan_set_bittiming(struct rtcan_device *dev)
++{
++	const struct flexcan_priv *priv = rtcan_priv(dev);
++	const struct can_bittime *bt = &priv->bittiming;
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg;
++
++	reg = flexcan_read(&regs->ctrl);
++	reg &= ~(FLEXCAN_CTRL_PRESDIV(0xff) |
++		 FLEXCAN_CTRL_RJW(0x3) |
++		 FLEXCAN_CTRL_PSEG1(0x7) |
++		 FLEXCAN_CTRL_PSEG2(0x7) |
++		 FLEXCAN_CTRL_PROPSEG(0x7) |
++		 FLEXCAN_CTRL_LPB |
++		 FLEXCAN_CTRL_SMP |
++		 FLEXCAN_CTRL_LOM);
++
++	reg |= FLEXCAN_CTRL_PRESDIV(bt->std.brp - 1) |
++		FLEXCAN_CTRL_PSEG1(bt->std.phase_seg1 - 1) |
++		FLEXCAN_CTRL_PSEG2(bt->std.phase_seg2 - 1) |
++		FLEXCAN_CTRL_RJW(bt->std.sjw - 1) |
++		FLEXCAN_CTRL_PROPSEG(bt->std.prop_seg - 1);
++
++	if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK)
++		reg |= FLEXCAN_CTRL_LPB;
++	if (dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY)
++		reg |= FLEXCAN_CTRL_LOM;
++	if (dev->ctrl_mode & CAN_CTRLMODE_3_SAMPLES)
++		reg |= FLEXCAN_CTRL_SMP;
++
++	rtcandev_dbg(dev, "writing ctrl=0x%08x\n", reg);
++	flexcan_write(reg, &regs->ctrl);
++
++	/* print chip status */
++	rtcandev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
++		   flexcan_read(&regs->mcr), flexcan_read(&regs->ctrl));
++}
++
++/* flexcan_chip_start
++ *
++ * this functions is entered with clocks enabled
++ *
++ */
++static int flexcan_chip_start(struct rtcan_device *dev)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr;
++	int err, i;
++
++	err = clk_prepare_enable(priv->clk_ipg);
++	if (err)
++		return err;
++
++	err = clk_prepare_enable(priv->clk_per);
++	if (err)
++		goto out_disable_ipg;
++
++	/* enable module */
++	err = flexcan_chip_enable(priv);
++	if (err)
++		goto out_disable_per;
++
++	/* soft reset */
++	err = flexcan_chip_softreset(priv);
++	if (err)
++		goto out_chip_disable;
++
++	flexcan_set_bittiming(dev);
++
++	/* MCR
++	 *
++	 * enable freeze
++	 * enable fifo
++	 * halt now
++	 * only supervisor access
++	 * enable warning int
++	 * disable local echo
++	 * enable individual RX masking
++	 * choose format C
++	 * set max mailbox number
++	 */
++	reg_mcr = flexcan_read(&regs->mcr);
++	reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
++	reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
++		FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ |
++		FLEXCAN_MCR_IDAM_C;
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		reg_mcr &= ~FLEXCAN_MCR_FEN;
++		reg_mcr |= FLEXCAN_MCR_MAXMB(priv->mb_last);
++	} else {
++		reg_mcr |= FLEXCAN_MCR_FEN |
++			FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
++	}
++	rtcandev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr);
++	flexcan_write(reg_mcr, &regs->mcr);
++
++	/* CTRL
++	 *
++	 * disable timer sync feature
++	 *
++	 * disable auto busoff recovery
++	 * transmit lowest buffer first
++	 *
++	 * enable tx and rx warning interrupt
++	 * enable bus off interrupt
++	 * (== FLEXCAN_CTRL_ERR_STATE)
++	 */
++	reg_ctrl = flexcan_read(&regs->ctrl);
++	reg_ctrl &= ~FLEXCAN_CTRL_TSYN;
++	reg_ctrl |= FLEXCAN_CTRL_BOFF_REC | FLEXCAN_CTRL_LBUF |
++		FLEXCAN_CTRL_ERR_STATE;
++
++	/* enable the "error interrupt" (FLEXCAN_CTRL_ERR_MSK),
++	 * on most Flexcan cores, too. Otherwise we don't get
++	 * any error warning or passive interrupts.
++	 */
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE)
++		reg_ctrl |= FLEXCAN_CTRL_ERR_MSK;
++	else
++		reg_ctrl &= ~FLEXCAN_CTRL_ERR_MSK;
++
++	/* save for later use */
++	priv->reg_ctrl_default = reg_ctrl;
++	/* leave interrupts disabled for now */
++	reg_ctrl &= ~FLEXCAN_CTRL_ERR_ALL;
++	rtcandev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
++	flexcan_write(reg_ctrl, &regs->ctrl);
++
++	if ((priv->devtype_data->quirks & FLEXCAN_QUIRK_ENABLE_EACEN_RRS)) {
++		reg_ctrl2 = flexcan_read(&regs->ctrl2);
++		reg_ctrl2 |= FLEXCAN_CTRL2_EACEN | FLEXCAN_CTRL2_RRS;
++		flexcan_write(reg_ctrl2, &regs->ctrl2);
++	}
++
++	/* clear and invalidate all mailboxes first */
++	for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) {
++		flexcan_write(FLEXCAN_MB_CODE_RX_INACTIVE,
++			      &regs->mb[i].can_ctrl);
++	}
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		for (i = priv->mb_first; i <= priv->mb_last; i++)
++			flexcan_write(FLEXCAN_MB_CODE_RX_EMPTY,
++				      &regs->mb[i].can_ctrl);
++	}
++
++	/* Errata ERR005829: mark first TX mailbox as INACTIVE */
++	flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
++		      &priv->tx_mb_reserved->can_ctrl);
++
++	/* mark TX mailbox as INACTIVE */
++	flexcan_write(FLEXCAN_MB_CODE_TX_INACTIVE,
++		      &priv->tx_mb->can_ctrl);
++
++	/* acceptance mask/acceptance code (accept everything) */
++	flexcan_write(0x0, &regs->rxgmask);
++	flexcan_write(0x0, &regs->rx14mask);
++	flexcan_write(0x0, &regs->rx15mask);
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_RXFG)
++		flexcan_write(0x0, &regs->rxfgmask);
++
++	/* clear acceptance filters */
++	for (i = 0; i < ARRAY_SIZE(regs->mb); i++)
++		flexcan_write(0, &regs->rximr[i]);
++
++	/* On Vybrid, disable memory error detection interrupts
++	 * and freeze mode.
++	 * This also works around errata e5295 which generates
++	 * false positive memory errors and put the device in
++	 * freeze mode.
++	 */
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
++		/* Follow the protocol as described in "Detection
++		 * and Correction of Memory Errors" to write to
++		 * MECR register
++		 */
++		reg_ctrl2 = flexcan_read(&regs->ctrl2);
++		reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE;
++		flexcan_write(reg_ctrl2, &regs->ctrl2);
++
++		reg_mecr = flexcan_read(&regs->mecr);
++		reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS;
++		flexcan_write(reg_mecr, &regs->mecr);
++		reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK |
++			      FLEXCAN_MECR_FANCEI_MSK);
++		flexcan_write(reg_mecr, &regs->mecr);
++	}
++
++	err = flexcan_transceiver_enable(priv);
++	if (err)
++		goto out_chip_disable;
++
++	/* synchronize with the can bus */
++	err = flexcan_chip_unfreeze(priv);
++	if (err)
++		goto out_transceiver_disable;
++
++	dev->state = CAN_STATE_ERROR_ACTIVE;
++
++	/* enable interrupts atomically */
++	rtdm_irq_disable(&dev->irq_handle);
++	flexcan_write(priv->reg_ctrl_default, &regs->ctrl);
++	flexcan_write(priv->reg_imask1_default, &regs->imask1);
++	flexcan_write(priv->reg_imask2_default, &regs->imask2);
++	rtdm_irq_enable(&dev->irq_handle);
++
++	/* print chip status */
++	rtcandev_dbg(dev, "%s: reading mcr=0x%08x ctrl=0x%08x\n", __func__,
++		   flexcan_read(&regs->mcr), flexcan_read(&regs->ctrl));
++
++	return 0;
++
++ out_transceiver_disable:
++	flexcan_transceiver_disable(priv);
++ out_chip_disable:
++	flexcan_chip_disable(priv);
++ out_disable_per:
++	clk_disable_unprepare(priv->clk_per);
++ out_disable_ipg:
++	clk_disable_unprepare(priv->clk_ipg);
++
++	return err;
++}
++
++/* flexcan_chip_stop
++ *
++ * this functions is entered with clocks enabled
++ */
++static void flexcan_chip_stop(struct rtcan_device *dev)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++
++	/* freeze + disable module */
++	flexcan_chip_freeze(dev);
++	flexcan_chip_disable(priv);
++
++	/* Disable all interrupts */
++	flexcan_write(0, &regs->imask2);
++	flexcan_write(0, &regs->imask1);
++	flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
++		      &regs->ctrl);
++
++	flexcan_transceiver_disable(priv);
++
++	clk_disable_unprepare(priv->clk_per);
++	clk_disable_unprepare(priv->clk_ipg);
++}
++
++static int flexcan_mode_start(struct rtcan_device *dev,
++			      rtdm_lockctx_t *lock_ctx)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	int err = 0;
++
++	rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++
++	switch (dev->state) {
++
++	case CAN_STATE_ACTIVE:
++	case CAN_STATE_BUS_WARNING:
++	case CAN_STATE_BUS_PASSIVE:
++		break;
++
++	case CAN_STATE_STOPPED:
++		/* Register IRQ handler and pass device structure as arg */
++		err = rtdm_irq_request(&dev->irq_handle, priv->irq,
++				       flexcan_irq, 0, DRV_NAME,
++				       dev);
++		if (err) {
++			rtcandev_err(dev, "couldn't request irq %d\n",
++				     priv->irq);
++			goto out;
++		}
++
++		/* Set up sender "mutex" */
++		rtdm_sem_init(&dev->tx_sem, 1);
++
++		/* start chip and queuing */
++		err = flexcan_chip_start(dev);
++		if (err) {
++			rtdm_irq_free(&dev->irq_handle);
++			rtdm_sem_destroy(&dev->tx_sem);
++			goto out;
++		}
++		break;
++
++	case CAN_STATE_BUS_OFF:
++		/* Set up sender "mutex" */
++		rtdm_sem_init(&dev->tx_sem, 1);
++		/* start chip and queuing */
++		err = flexcan_chip_start(dev);
++		if (err) {
++			rtdm_sem_destroy(&dev->tx_sem);
++			goto out;
++		}
++		break;
++
++	case CAN_STATE_SLEEPING:
++	default:
++		err = 0;
++		break;
++	}
++
++out:
++	rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++
++	return err;
++}
++
++static int flexcan_mode_stop(struct rtcan_device *dev,
++			     rtdm_lockctx_t *lock_ctx)
++{
++	if (!CAN_STATE_OPERATING(dev->state))
++		return 0;
++
++	dev->state = CAN_STATE_STOPPED;
++
++	rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++
++	flexcan_chip_stop(dev);
++	rtdm_irq_free(&dev->irq_handle);
++	rtdm_sem_destroy(&dev->tx_sem);
++
++	rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++
++	return 0;
++}
++
++static int flexcan_set_mode(struct rtcan_device *dev, can_mode_t mode,
++			    rtdm_lockctx_t *lock_ctx)
++{
++	if (mode == CAN_MODE_START)
++		return flexcan_mode_start(dev, lock_ctx);
++
++	if (mode == CAN_MODE_STOP)
++		return flexcan_mode_stop(dev, lock_ctx);
++
++	return -EOPNOTSUPP;
++}
++
++static int flexcan_copy_bittiming(struct rtcan_device *dev,
++				  struct can_bittime *bt,
++				  rtdm_lockctx_t *lock_ctx)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++
++	memcpy(&priv->bittiming, bt, sizeof(*bt));
++
++	return 0;
++}
++
++static int register_flexcandev(struct rtcan_device *dev)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++	struct flexcan_regs __iomem *regs = priv->regs;
++	u32 reg, err;
++
++	err = clk_prepare_enable(priv->clk_ipg);
++	if (err)
++		return err;
++
++	err = clk_prepare_enable(priv->clk_per);
++	if (err)
++		goto out_disable_ipg;
++
++	/* select "bus clock", chip must be disabled */
++	err = flexcan_chip_disable(priv);
++	if (err)
++		goto out_disable_per;
++	reg = flexcan_read(&regs->ctrl);
++	reg |= FLEXCAN_CTRL_CLK_SRC;
++	flexcan_write(reg, &regs->ctrl);
++
++	err = flexcan_chip_enable(priv);
++	if (err)
++		goto out_chip_disable;
++
++	/* set freeze, halt and activate FIFO, restrict register access */
++	reg = flexcan_read(&regs->mcr);
++	reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT |
++		FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
++	flexcan_write(reg, &regs->mcr);
++
++	/* Currently we only support newer versions of this core
++	 * featuring a RX hardware FIFO (although this driver doesn't
++	 * make use of it on some cores). Older cores, found on some
++	 * Coldfire derivates are not tested.
++	 */
++	reg = flexcan_read(&regs->mcr);
++	if (!(reg & FLEXCAN_MCR_FEN)) {
++		rtcandev_err(dev, "Could not enable RX FIFO, unsupported core\n");
++		err = -ENODEV;
++		goto out_chip_disable;
++	}
++
++	err = rtcan_dev_register(dev);
++
++	/* disable core and turn off clocks */
++ out_chip_disable:
++	flexcan_chip_disable(priv);
++ out_disable_per:
++	clk_disable_unprepare(priv->clk_per);
++ out_disable_ipg:
++	clk_disable_unprepare(priv->clk_ipg);
++
++	return err;
++}
++
++static void unregister_flexcandev(struct rtcan_device *dev)
++{
++	struct flexcan_priv *priv = rtcan_priv(dev);
++
++	rtcan_dev_unregister(dev);
++	if (priv->ts_frames)
++		kfree(priv->ts_frames);
++}
++
++static const struct of_device_id flexcan_of_match[] = {
++	{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
++	{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
++	{ .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
++	{ .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
++	{ /* sentinel */ },
++};
++MODULE_DEVICE_TABLE(of, flexcan_of_match);
++
++static const struct platform_device_id flexcan_id_table[] = {
++	{ .name = "flexcan", .driver_data = (kernel_ulong_t)&fsl_p1010_devtype_data, },
++	{ /* sentinel */ },
++};
++MODULE_DEVICE_TABLE(platform, flexcan_id_table);
++
++static int flexcan_probe(struct platform_device *pdev)
++{
++	const struct of_device_id *of_id;
++	const struct flexcan_devtype_data *devtype_data;
++	struct rtcan_device *dev;
++	struct flexcan_priv *priv;
++	struct regulator *reg_xceiver;
++	struct resource *mem;
++	struct clk *clk_ipg = NULL, *clk_per = NULL;
++	struct flexcan_regs __iomem *regs;
++	int err, irq;
++	u32 clock_freq = 0;
++
++	reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver");
++	if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER)
++		return -EPROBE_DEFER;
++	else if (IS_ERR(reg_xceiver))
++		reg_xceiver = NULL;
++
++	if (pdev->dev.of_node)
++		of_property_read_u32(pdev->dev.of_node,
++				     "clock-frequency", &clock_freq);
++
++	if (!clock_freq) {
++		clk_ipg = devm_clk_get(&pdev->dev, "ipg");
++		if (IS_ERR(clk_ipg)) {
++			dev_err(&pdev->dev, "no ipg clock defined\n");
++			return PTR_ERR(clk_ipg);
++		}
++
++		clk_per = devm_clk_get(&pdev->dev, "per");
++		if (IS_ERR(clk_per)) {
++			dev_err(&pdev->dev, "no per clock defined\n");
++			return PTR_ERR(clk_per);
++		}
++		clock_freq = clk_get_rate(clk_per);
++	}
++
++	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	irq = platform_get_irq(pdev, 0);
++	if (irq <= 0)
++		return -ENODEV;
++
++	regs = devm_ioremap_resource(&pdev->dev, mem);
++	if (IS_ERR(regs))
++		return PTR_ERR(regs);
++
++	of_id = of_match_device(flexcan_of_match, &pdev->dev);
++	if (of_id) {
++		devtype_data = of_id->data;
++	} else if (platform_get_device_id(pdev)->driver_data) {
++		devtype_data = (struct flexcan_devtype_data *)
++			platform_get_device_id(pdev)->driver_data;
++	} else {
++		return -ENODEV;
++	}
++
++	dev = rtcan_dev_alloc(sizeof(struct flexcan_priv), 0);
++	if (!dev)
++		return -ENOMEM;
++
++	platform_set_drvdata(pdev, dev);
++
++	priv = rtcan_priv(dev);
++	priv->regs = regs;
++	priv->irq = irq;
++	priv->clk_ipg = clk_ipg;
++	priv->clk_per = clk_per;
++	priv->devtype_data = devtype_data;
++	priv->reg_xceiver = reg_xceiver;
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP;
++		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP];
++	} else {
++		priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO;
++		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO];
++	}
++	priv->tx_mb = &regs->mb[priv->tx_mb_idx];
++
++	priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
++	priv->reg_imask2_default = 0;
++
++	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
++		u64 imask;
++
++		priv->mb_first = FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST;
++		priv->mb_last = FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST;
++		priv->ts_frames = kzalloc(sizeof(*priv->ts_frames) *
++					  FLEXCAN_RX_MB_TIMESTAMP_COUNT, GFP_KERNEL);
++		if (priv->ts_frames == NULL) {
++			err = -ENOMEM;
++			goto failed_fralloc;
++		}
++
++		imask = GENMASK_ULL(priv->mb_last, priv->mb_first);
++		priv->reg_imask1_default |= imask;
++		priv->reg_imask2_default |= imask >> 32;
++	} else {
++		priv->reg_imask1_default |= FLEXCAN_IFLAG_RX_FIFO_OVERFLOW |
++			FLEXCAN_IFLAG_RX_FIFO_AVAILABLE;
++		priv->ts_frames = NULL;
++	}
++
++	dev->ctrl_name = "FLEXCAN";
++	dev->board_name = "FLEXCAN";
++	dev->base_addr = (unsigned long)regs;
++	dev->can_sys_clock = clock_freq;
++	dev->hard_start_xmit = flexcan_start_xmit;
++	dev->do_set_mode = flexcan_set_mode;
++	dev->do_set_bit_time = flexcan_copy_bittiming;
++	dev->bittiming_const = &flexcan_bittiming_const;
++	dev->state = CAN_STATE_STOPPED;
++	strncpy(dev->name, DEV_NAME, IFNAMSIZ);
++	
++	err = register_flexcandev(dev);
++	if (err) {
++		dev_err(&pdev->dev, "registering netdev failed\n");
++		goto failed_register;
++	}
++
++	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%d)\n",
++		 priv->regs, priv->irq);
++
++	return 0;
++
++ failed_register:
++	if (priv->ts_frames)
++		kfree(priv->ts_frames);
++ failed_fralloc:
++	rtcan_dev_free(dev);
++	return err;
++}
++
++static int flexcan_remove(struct platform_device *pdev)
++{
++	struct rtcan_device *dev = platform_get_drvdata(pdev);
++
++	unregister_flexcandev(dev);
++	rtcan_dev_free(dev);
++
++	return 0;
++}
++
++static struct platform_driver flexcan_driver = {
++	.driver = {
++		.name = DRV_NAME,
++		.of_match_table = flexcan_of_match,
++	},
++	.probe = flexcan_probe,
++	.remove = flexcan_remove,
++	.id_table = flexcan_id_table,
++};
++
++module_platform_driver(flexcan_driver);
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@denx.de>, "
++	      "Sascha Hauer <kernel@pengutronix.de>, "
++	      "Marc Kleine-Budde <kernel@pengutronix.de>");
++MODULE_LICENSE("GPL v2");
++MODULE_DESCRIPTION("RT-CAN port driver for flexcan based chip");
+--- linux/drivers/xenomai/can/mscan/rtcan_mscan_regs.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_regs.h	2021-04-07 16:01:26.420635319 +0800
+@@ -0,0 +1,226 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Based on linux-2.4.25/include/asm-ppc/mpc5xxx.h
++ * Prototypes, etc. for the Motorola MPC5xxx embedded cpu chips
++ *
++ * Author: Dale Farnsworth <dfarnsworth@mvista.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_MSCAN_REGS_H_
++#define __RTCAN_MSCAN_REGS_H_
++
++#include <linux/version.h>
++#include <linux/of_platform.h>
++#include <asm/mpc52xx.h>
++
++static inline void __iomem *mpc5xxx_gpio_find_and_map(void)
++{
++	struct device_node *ofn;
++	ofn = of_find_compatible_node(NULL, NULL, "mpc5200-gpio");
++	if (!ofn)
++		ofn = of_find_compatible_node(NULL, NULL, "fsl,mpc5200-gpio");
++	return ofn ? of_iomap(ofn, 0) : NULL;
++}
++
++#define MPC5xxx_GPIO	mpc5xxx_gpio_find_and_map()
++#define mpc5xxx_gpio	mpc52xx_gpio
++
++#define mpc5xxx_get_of_node(ofdev) (ofdev)->dev.of_node
++
++#define MSCAN_CAN1_ADDR	(MSCAN_MBAR + 0x0900) /* MSCAN Module 1 */
++#define MSCAN_CAN2_ADDR	(MSCAN_MBAR + 0x0980) /* MSCAN Module 2 */
++#define MSCAN_SIZE	0x80
++
++/* MSCAN control register 0 (CANCTL0) bits */
++#define MSCAN_RXFRM	0x80
++#define MSCAN_RXACT	0x40
++#define MSCAN_CSWAI	0x20
++#define MSCAN_SYNCH	0x10
++#define MSCAN_TIME	0x08
++#define MSCAN_WUPE	0x04
++#define MSCAN_SLPRQ	0x02
++#define MSCAN_INITRQ	0x01
++
++/* MSCAN control register 1 (CANCTL1) bits */
++#define MSCAN_CANE	0x80
++#define MSCAN_CLKSRC	0x40
++#define MSCAN_LOOPB	0x20
++#define MSCAN_LISTEN	0x10
++#define MSCAN_WUPM	0x04
++#define MSCAN_SLPAK	0x02
++#define MSCAN_INITAK	0x01
++
++/* MSCAN receiver flag register (CANRFLG) bits */
++#define MSCAN_WUPIF	0x80
++#define MSCAN_CSCIF	0x40
++#define MSCAN_RSTAT1	0x20
++#define MSCAN_RSTAT0	0x10
++#define MSCAN_TSTAT1	0x08
++#define MSCAN_TSTAT0	0x04
++#define MSCAN_OVRIF	0x02
++#define MSCAN_RXF	0x01
++
++/* MSCAN receiver interrupt enable register (CANRIER) bits */
++#define MSCAN_WUPIE	0x80
++#define MSCAN_CSCIE	0x40
++#define MSCAN_RSTATE1	0x20
++#define MSCAN_RSTATE0	0x10
++#define MSCAN_TSTATE1	0x08
++#define MSCAN_TSTATE0	0x04
++#define MSCAN_OVRIE	0x02
++#define MSCAN_RXFIE	0x01
++
++/* MSCAN transmitter flag register (CANTFLG) bits */
++#define MSCAN_TXE2	0x04
++#define MSCAN_TXE1	0x02
++#define MSCAN_TXE0	0x01
++#define MSCAN_TXE	(MSCAN_TXE2 | MSCAN_TXE1 | MSCAN_TXE0)
++
++/* MSCAN transmitter interrupt enable register (CANTIER) bits */
++#define MSCAN_TXIE2	0x04
++#define MSCAN_TXIE1	0x02
++#define MSCAN_TXIE0	0x01
++#define MSCAN_TXIE	(MSCAN_TXIE2 | MSCAN_TXIE1 | MSCAN_TXIE0)
++
++/* MSCAN transmitter message abort request (CANTARQ) bits */
++#define MSCAN_ABTRQ2	0x04
++#define MSCAN_ABTRQ1	0x02
++#define MSCAN_ABTRQ0	0x01
++
++/* MSCAN transmitter message abort ack (CANTAAK) bits */
++#define MSCAN_ABTAK2	0x04
++#define MSCAN_ABTAK1	0x02
++#define MSCAN_ABTAK0	0x01
++
++/* MSCAN transmit buffer selection (CANTBSEL) bits */
++#define MSCAN_TX2	0x04
++#define MSCAN_TX1	0x02
++#define MSCAN_TX0	0x01
++
++/* MSCAN ID acceptance control register (CANIDAC) bits */
++#define MSCAN_IDAM1	0x20
++#define MSCAN_IDAM0	0x10
++#define MSCAN_IDHIT2	0x04
++#define MSCAN_IDHIT1	0x02
++#define MSCAN_IDHIT0	0x01
++
++struct mscan_msgbuf {
++	volatile u8  idr[0x8];		/* 0x00 */
++	volatile u8  dsr[0x10];		/* 0x08 */
++	volatile u8  dlr;		/* 0x18 */
++	volatile u8  tbpr;		/* 0x19 */	/* This register is not applicable for receive buffers */
++	volatile u16 rsrv1;		/* 0x1A */
++	volatile u8  tsrh;		/* 0x1C */
++	volatile u8  tsrl;		/* 0x1D */
++	volatile u16 rsrv2;		/* 0x1E */
++};
++
++struct mscan_regs {
++	volatile u8  canctl0;		/* MSCAN + 0x00 */
++	volatile u8  canctl1;		/* MSCAN + 0x01 */
++	volatile u16 rsrv1;		/* MSCAN + 0x02 */
++	volatile u8  canbtr0;		/* MSCAN + 0x04 */
++	volatile u8  canbtr1;		/* MSCAN + 0x05 */
++	volatile u16 rsrv2;		/* MSCAN + 0x06 */
++	volatile u8  canrflg;		/* MSCAN + 0x08 */
++	volatile u8  canrier;		/* MSCAN + 0x09 */
++	volatile u16 rsrv3;		/* MSCAN + 0x0A */
++	volatile u8  cantflg;		/* MSCAN + 0x0C */
++	volatile u8  cantier;		/* MSCAN + 0x0D */
++	volatile u16 rsrv4;		/* MSCAN + 0x0E */
++	volatile u8  cantarq;		/* MSCAN + 0x10 */
++	volatile u8  cantaak;		/* MSCAN + 0x11 */
++	volatile u16 rsrv5;		/* MSCAN + 0x12 */
++	volatile u8  cantbsel;		/* MSCAN + 0x14 */
++	volatile u8  canidac;		/* MSCAN + 0x15 */
++	volatile u16 rsrv6[3];		/* MSCAN + 0x16 */
++	volatile u8  canrxerr;		/* MSCAN + 0x1C */
++	volatile u8  cantxerr;		/* MSCAN + 0x1D */
++	volatile u16 rsrv7;		/* MSCAN + 0x1E */
++	volatile u8  canidar0;		/* MSCAN + 0x20 */
++	volatile u8  canidar1;		/* MSCAN + 0x21 */
++	volatile u16 rsrv8;		/* MSCAN + 0x22 */
++	volatile u8  canidar2;		/* MSCAN + 0x24 */
++	volatile u8  canidar3;		/* MSCAN + 0x25 */
++	volatile u16 rsrv9;		/* MSCAN + 0x26 */
++	volatile u8  canidmr0;		/* MSCAN + 0x28 */
++	volatile u8  canidmr1;		/* MSCAN + 0x29 */
++	volatile u16 rsrv10;		/* MSCAN + 0x2A */
++	volatile u8  canidmr2;		/* MSCAN + 0x2C */
++	volatile u8  canidmr3;		/* MSCAN + 0x2D */
++	volatile u16 rsrv11;		/* MSCAN + 0x2E */
++	volatile u8  canidar4;		/* MSCAN + 0x30 */
++	volatile u8  canidar5;		/* MSCAN + 0x31 */
++	volatile u16 rsrv12;		/* MSCAN + 0x32 */
++	volatile u8  canidar6;		/* MSCAN + 0x34 */
++	volatile u8  canidar7;		/* MSCAN + 0x35 */
++	volatile u16 rsrv13;		/* MSCAN + 0x36 */
++	volatile u8  canidmr4;		/* MSCAN + 0x38 */
++	volatile u8  canidmr5;		/* MSCAN + 0x39 */
++	volatile u16 rsrv14;		/* MSCAN + 0x3A */
++	volatile u8  canidmr6;		/* MSCAN + 0x3C */
++	volatile u8  canidmr7;		/* MSCAN + 0x3D */
++	volatile u16 rsrv15;		/* MSCAN + 0x3E */
++
++	struct mscan_msgbuf canrxfg;	/* MSCAN + 0x40 */    /* Foreground receive buffer */
++	struct mscan_msgbuf cantxfg;	/* MSCAN + 0x60 */    /* Foreground transmit buffer */
++};
++
++/* Clock source selection
++ */
++#define MSCAN_CLKSRC_BUS	0
++#define MSCAN_CLKSRC_XTAL	MSCAN_CLKSRC
++#define MSCAN_CLKSRC_IPS	MSCAN_CLKSRC
++
++/* Message type access macros.
++ */
++#define MSCAN_BUF_STD_RTR	0x10
++#define MSCAN_BUF_EXT_RTR	0x01
++#define MSCAN_BUF_EXTENDED	0x08
++
++#define MSCAN_IDAM1		0x20
++/* Value for the interrupt enable register */
++#define MSCAN_RIER		(MSCAN_OVRIE |		\
++				 MSCAN_RXFIE |		\
++				 MSCAN_WUPIF |		\
++				 MSCAN_CSCIE |		\
++				 MSCAN_RSTATE0 |	\
++				 MSCAN_RSTATE1 |	\
++				 MSCAN_TSTATE0 |	\
++				 MSCAN_TSTATE1)
++
++#define BTR0_BRP_MASK		0x3f
++#define BTR0_SJW_SHIFT		6
++#define BTR0_SJW_MASK		(0x3 << BTR0_SJW_SHIFT)
++
++#define BTR1_TSEG1_MASK		0xf
++#define BTR1_TSEG2_SHIFT	4
++#define BTR1_TSEG2_MASK		(0x7 << BTR1_TSEG2_SHIFT)
++#define BTR1_SAM_SHIFT		7
++
++#define BTR0_SET_BRP(brp)	(((brp) - 1) & BTR0_BRP_MASK)
++#define BTR0_SET_SJW(sjw)	((((sjw) - 1) << BTR0_SJW_SHIFT) & \
++				 BTR0_SJW_MASK)
++
++#define BTR1_SET_TSEG1(tseg1)	(((tseg1) - 1) & BTR1_TSEG1_MASK)
++#define BTR1_SET_TSEG2(tseg2)	((((tseg2) - 1) << BTR1_TSEG2_SHIFT) & \
++				 BTR1_TSEG2_MASK)
++#define BTR1_SET_SAM(sam)	(((sam) & 1) << BTR1_SAM_SHIFT)
++
++#endif /* __RTCAN_MSCAN_REGS_H_ */
+--- linux/drivers/xenomai/can/mscan/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/Makefile	2021-04-07 16:01:26.415635326 +0800
+@@ -0,0 +1,6 @@
++
++ccflags-y += -Idrivers/xenomai/can -Idrivers/xenomai/can/mscan
++
++obj-$(CONFIG_XENO_DRIVERS_CAN_MSCAN) += xeno_can_mscan.o
++
++xeno_can_mscan-y := rtcan_mscan.o rtcan_mscan_proc.o rtcan_mscan_mpc5xxx.o
+--- linux/drivers/xenomai/can/mscan/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/Kconfig	2021-04-07 16:01:26.410635334 +0800
+@@ -0,0 +1,8 @@
++config XENO_DRIVERS_CAN_MSCAN
++	depends on XENO_DRIVERS_CAN && (PPC_MPC52xx || PPC_MPC512x)
++	tristate "MSCAN driver for MPC52xx and MPC512x"
++	default n
++	help
++
++	This driver is for the MSCAN on the MPC5200 and MPC512x processor
++	from Freescale.
+--- linux/drivers/xenomai/can/mscan/rtcan_mscan_mpc5xxx.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_mpc5xxx.c	2021-04-07 16:01:26.405635341 +0800
+@@ -0,0 +1,392 @@
++/*
++ * CAN bus driver for the Freescale MPC5xxx embedded CPU.
++ *
++ * Copyright (C) 2004-2005 Andrey Volkov <avolkov@varma-el.com>,
++ *                         Varma Electronics Oy
++ * Copyright (C) 2008-2010 Wolfgang Grandegger <wg@grandegger.com>
++ * Copyright (C) 2009 Wolfram Sang, Pengutronix <w.sang@pengutronix.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the version 2 of the GNU General Public License
++ * as published by the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/netdevice.h>
++#include <linux/of_platform.h>
++#include <sysdev/fsl_soc.h>
++#include <linux/clk.h>
++#include <linux/io.h>
++#include <asm/mpc52xx.h>
++
++#include "rtcan_dev.h"
++#include "rtcan_mscan_regs.h"
++#include "rtcan_mscan.h"
++
++#define of_device platform_device
++#define of_platform_driver platform_driver
++#define of_register_platform_driver platform_driver_register
++#define of_unregister_platform_driver platform_driver_unregister
++
++static char mscan_ctrl_name_mpc5200[] = "MSCAN-MPC5200";
++static char mscan_ctrl_name_mpc512x[] = "MSCAN-MPC512x";
++static char mscan_board_name[] = "unkown";
++
++struct mpc5xxx_can_data {
++	unsigned int type;
++	u32 (*get_clock)(struct of_device *ofdev, const char *clock_name,
++			 int *mscan_clksrc);
++};
++
++#ifdef CONFIG_PPC_MPC52xx
++static struct of_device_id mpc52xx_cdm_ids[] = {
++	{ .compatible = "fsl,mpc5200-cdm", },
++	{}
++};
++
++static u32 mpc52xx_can_get_clock(struct of_device *ofdev,
++				 const char *clock_name,
++				 int *mscan_clksrc)
++{
++	unsigned int pvr;
++	struct mpc52xx_cdm  __iomem *cdm;
++	struct device_node *np_cdm;
++	unsigned int freq;
++	u32 val;
++
++	pvr = mfspr(SPRN_PVR);
++
++	/*
++	 * Either the oscillator clock (SYS_XTAL_IN) or the IP bus clock
++	 * (IP_CLK) can be selected as MSCAN clock source. According to
++	 * the MPC5200 user's manual, the oscillator clock is the better
++	 * choice as it has less jitter. For this reason, it is selected
++	 * by default. Unfortunately, it can not be selected for the old
++	 * MPC5200 Rev. A chips due to a hardware bug (check errata).
++	 */
++	if (clock_name && strcmp(clock_name, "ip") == 0)
++		*mscan_clksrc = MSCAN_CLKSRC_BUS;
++	else
++		*mscan_clksrc = MSCAN_CLKSRC_XTAL;
++
++	freq = mpc5xxx_get_bus_frequency(mpc5xxx_get_of_node(ofdev));
++	if (!freq)
++		return 0;
++
++	if (*mscan_clksrc == MSCAN_CLKSRC_BUS || pvr == 0x80822011)
++		return freq;
++
++	/* Determine SYS_XTAL_IN frequency from the clock domain settings */
++	np_cdm = of_find_matching_node(NULL, mpc52xx_cdm_ids);
++	if (!np_cdm) {
++		dev_err(&ofdev->dev, "can't get clock node!\n");
++		return 0;
++	}
++	cdm = of_iomap(np_cdm, 0);
++
++	if (in_8(&cdm->ipb_clk_sel) & 0x1)
++		freq *= 2;
++	val = in_be32(&cdm->rstcfg);
++
++	freq *= (val & (1 << 5)) ? 8 : 4;
++	freq /= (val & (1 << 6)) ? 12 : 16;
++
++	of_node_put(np_cdm);
++	iounmap(cdm);
++
++	return freq;
++}
++#else /* !CONFIG_PPC_MPC5200 */
++static u32 mpc52xx_can_get_clock(struct of_device *ofdev,
++				 const char *clock_name,
++				 int *mscan_clksrc)
++{
++	return 0;
++}
++#endif /* CONFIG_PPC_MPC52xx */
++
++#ifdef CONFIG_PPC_MPC512x
++struct mpc512x_clockctl {
++	u32 spmr;		/* System PLL Mode Reg */
++	u32 sccr[2];		/* System Clk Ctrl Reg 1 & 2 */
++	u32 scfr1;		/* System Clk Freq Reg 1 */
++	u32 scfr2;		/* System Clk Freq Reg 2 */
++	u32 reserved;
++	u32 bcr;		/* Bread Crumb Reg */
++	u32 pccr[12];		/* PSC Clk Ctrl Reg 0-11 */
++	u32 spccr;		/* SPDIF Clk Ctrl Reg */
++	u32 cccr;		/* CFM Clk Ctrl Reg */
++	u32 dccr;		/* DIU Clk Cnfg Reg */
++	u32 mccr[4];		/* MSCAN Clk Ctrl Reg 1-3 */
++};
++
++static struct of_device_id mpc512x_clock_ids[] = {
++	{ .compatible = "fsl,mpc5121-clock", },
++	{}
++};
++
++static u32 mpc512x_can_get_clock(struct of_device *ofdev,
++				 const char *clock_name,
++				 int *mscan_clksrc)
++{
++	struct mpc512x_clockctl __iomem *clockctl;
++	struct device_node *np_clock;
++	struct clk *sys_clk, *ref_clk;
++	int plen, clockidx, clocksrc = -1;
++	u32 sys_freq, val, clockdiv = 1, freq = 0;
++	const u32 *pval;
++
++	np_clock = of_find_matching_node(NULL, mpc512x_clock_ids);
++	if (!np_clock) {
++		dev_err(&ofdev->dev, "couldn't find clock node\n");
++		return -ENODEV;
++	}
++	clockctl = of_iomap(np_clock, 0);
++	if (!clockctl) {
++		dev_err(&ofdev->dev, "couldn't map clock registers\n");
++		return 0;
++	}
++
++	/* Determine the MSCAN device index from the physical address */
++	pval = of_get_property(mpc5xxx_get_of_node(ofdev), "reg", &plen);
++	BUG_ON(!pval || plen < sizeof(*pval));
++	clockidx = (*pval & 0x80) ? 1 : 0;
++	if (*pval & 0x2000)
++		clockidx += 2;
++
++	/*
++	 * Clock source and divider selection: 3 different clock sources
++	 * can be selected: "ip", "ref" or "sys". For the latter two, a
++	 * clock divider can be defined as well. If the clock source is
++	 * not specified by the device tree, we first try to find an
++	 * optimal CAN source clock based on the system clock. If that
++	 * is not posslible, the reference clock will be used.
++	 */
++	if (clock_name && !strcmp(clock_name, "ip")) {
++		*mscan_clksrc = MSCAN_CLKSRC_IPS;
++		freq = mpc5xxx_get_bus_frequency(mpc5xxx_get_of_node(ofdev));
++	} else {
++		*mscan_clksrc = MSCAN_CLKSRC_BUS;
++
++		pval = of_get_property(mpc5xxx_get_of_node(ofdev),
++				       "fsl,mscan-clock-divider", &plen);
++		if (pval && plen == sizeof(*pval))
++			clockdiv = *pval;
++		if (!clockdiv)
++			clockdiv = 1;
++
++		if (!clock_name || !strcmp(clock_name, "sys")) {
++			sys_clk = clk_get(&ofdev->dev, "sys_clk");
++			if (!sys_clk) {
++				dev_err(&ofdev->dev, "couldn't get sys_clk\n");
++				goto exit_unmap;
++			}
++			/* Get and round up/down sys clock rate */
++			sys_freq = 1000000 *
++				((clk_get_rate(sys_clk) + 499999) / 1000000);
++
++			if (!clock_name) {
++				/* A multiple of 16 MHz would be optimal */
++				if ((sys_freq % 16000000) == 0) {
++					clocksrc = 0;
++					clockdiv = sys_freq / 16000000;
++					freq = sys_freq / clockdiv;
++				}
++			} else {
++				clocksrc = 0;
++				freq = sys_freq / clockdiv;
++			}
++		}
++
++		if (clocksrc < 0) {
++			ref_clk = clk_get(&ofdev->dev, "ref_clk");
++			if (!ref_clk) {
++				dev_err(&ofdev->dev, "couldn't get ref_clk\n");
++				goto exit_unmap;
++			}
++			clocksrc = 1;
++			freq = clk_get_rate(ref_clk) / clockdiv;
++		}
++	}
++
++	/* Disable clock */
++	out_be32(&clockctl->mccr[clockidx], 0x0);
++	if (clocksrc >= 0) {
++		/* Set source and divider */
++		val = (clocksrc << 14) | ((clockdiv - 1) << 17);
++		out_be32(&clockctl->mccr[clockidx], val);
++		/* Enable clock */
++		out_be32(&clockctl->mccr[clockidx], val | 0x10000);
++	}
++
++	/* Enable MSCAN clock domain */
++	val = in_be32(&clockctl->sccr[1]);
++	if (!(val & (1 << 25)))
++		out_be32(&clockctl->sccr[1], val | (1 << 25));
++
++	dev_dbg(&ofdev->dev, "using '%s' with frequency divider %d\n",
++		*mscan_clksrc == MSCAN_CLKSRC_IPS ? "ips_clk" :
++		clocksrc == 1 ? "ref_clk" : "sys_clk", clockdiv);
++
++exit_unmap:
++	of_node_put(np_clock);
++	iounmap(clockctl);
++
++	return freq;
++}
++#else /* !CONFIG_PPC_MPC512x */
++static u32 mpc512x_can_get_clock(struct of_device *ofdev,
++				 const char *clock_name,
++				 int *mscan_clksrc)
++{
++	return 0;
++}
++#endif /* CONFIG_PPC_MPC512x */
++
++static struct of_device_id mpc5xxx_can_table[];
++static int mpc5xxx_can_probe(struct of_device *ofdev)
++{
++	struct device_node *np = mpc5xxx_get_of_node(ofdev);
++	struct mpc5xxx_can_data *data;
++	struct rtcan_device *dev;
++	void __iomem *base;
++	const char *clock_name = NULL;
++	int irq, mscan_clksrc = 0;
++	int err = -ENOMEM;
++
++	const struct of_device_id *id;
++
++	id = of_match_device(mpc5xxx_can_table, &ofdev->dev);
++	if (!id)
++		return -EINVAL;
++
++	data = (struct mpc5xxx_can_data *)id->data;
++
++	base = of_iomap(np, 0);
++	if (!base) {
++		dev_err(&ofdev->dev, "couldn't ioremap\n");
++		return err;
++	}
++
++	irq = irq_of_parse_and_map(np, 0);
++	if (!irq) {
++		dev_err(&ofdev->dev, "no irq found\n");
++		err = -ENODEV;
++		goto exit_unmap_mem;
++	}
++
++	dev = rtcan_dev_alloc(0, 0);
++	if (!dev)
++		goto exit_dispose_irq;
++
++	clock_name = of_get_property(np, "fsl,mscan-clock-source", NULL);
++
++	BUG_ON(!data);
++	dev->can_sys_clock = data->get_clock(ofdev, clock_name,
++					     &mscan_clksrc);
++	if (!dev->can_sys_clock) {
++		dev_err(&ofdev->dev, "couldn't get MSCAN clock properties\n");
++		goto exit_free_mscan;
++	}
++
++	if (data->type == MSCAN_TYPE_MPC5121)
++		dev->ctrl_name = mscan_ctrl_name_mpc512x;
++	else
++		dev->ctrl_name = mscan_ctrl_name_mpc5200;
++	dev->board_name = mscan_board_name;
++	dev->base_addr = (unsigned long)base;
++
++	err = rtcan_mscan_register(dev, irq, mscan_clksrc);
++	if (err) {
++		dev_err(&ofdev->dev, "registering %s failed (err=%d)\n",
++			RTCAN_DRV_NAME, err);
++		goto exit_free_mscan;
++	}
++
++	dev_set_drvdata(&ofdev->dev, dev);
++
++	dev_info(&ofdev->dev, "MSCAN at 0x%p, irq %d, clock %d Hz\n",
++		 base, irq, dev->can_sys_clock);
++
++	return 0;
++
++exit_free_mscan:
++	rtcan_dev_free(dev);
++exit_dispose_irq:
++	irq_dispose_mapping(irq);
++exit_unmap_mem:
++	iounmap(base);
++
++	return err;
++}
++
++static int mpc5xxx_can_remove(struct of_device *ofdev)
++{
++	struct rtcan_device *dev = dev_get_drvdata(&ofdev->dev);
++
++	dev_set_drvdata(&ofdev->dev, NULL);
++
++	rtcan_mscan_unregister(dev);
++	iounmap((void *)dev->base_addr);
++	rtcan_dev_free(dev);
++
++	return 0;
++}
++
++static struct mpc5xxx_can_data mpc5200_can_data = {
++	.type = MSCAN_TYPE_MPC5200,
++	.get_clock = mpc52xx_can_get_clock,
++};
++
++static struct mpc5xxx_can_data mpc5121_can_data = {
++	.type = MSCAN_TYPE_MPC5121,
++	.get_clock = mpc512x_can_get_clock,
++};
++
++static struct of_device_id mpc5xxx_can_table[] = {
++	{ .compatible = "fsl,mpc5200-mscan", .data = &mpc5200_can_data, },
++	/* Note that only MPC5121 Rev. 2 (and later) is supported */
++	{ .compatible = "fsl,mpc5121-mscan", .data = &mpc5121_can_data, },
++	{},
++};
++
++static struct of_platform_driver mpc5xxx_can_driver = {
++	.driver = {
++		.owner = THIS_MODULE,
++		.name = RTCAN_DRV_NAME,
++		.of_match_table = mpc5xxx_can_table,
++	},
++	.probe = mpc5xxx_can_probe,
++	.remove = mpc5xxx_can_remove,
++};
++
++static int __init mpc5xxx_can_init(void)
++{
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	return of_register_platform_driver(&mpc5xxx_can_driver);
++}
++module_init(mpc5xxx_can_init);
++
++static void __exit mpc5xxx_can_exit(void)
++{
++	return of_unregister_platform_driver(&mpc5xxx_can_driver);
++};
++module_exit(mpc5xxx_can_exit);
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RT-Socket-CAN driver for MPC5200 and MPC521x");
++MODULE_LICENSE("GPL v2");
+--- linux/drivers/xenomai/can/mscan/rtcan_mscan.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan.h	2021-04-07 16:01:26.400635348 +0800
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (C) 2009 Wolfgang Grandegger <wg@denx.de>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_MSCAN_H_
++#define __RTCAN_MSCAN_H_
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "rtcan_mscan"
++
++/* MSCAN type variants */
++enum {
++	MSCAN_TYPE_MPC5200,
++	MSCAN_TYPE_MPC5121
++};
++
++extern int rtcan_mscan_register(struct rtcan_device *dev, int irq,
++				       int mscan_clksrc);
++extern int rtcan_mscan_unregister(struct rtcan_device *dev);
++
++extern int rtcan_mscan_create_proc(struct rtcan_device* dev);
++extern void rtcan_mscan_remove_proc(struct rtcan_device* dev);
++
++#endif /* __RTCAN_MSCAN_H_ */
+--- linux/drivers/xenomai/can/mscan/rtcan_mscan.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan.c	2021-04-07 16:01:26.396635354 +0800
+@@ -0,0 +1,797 @@
++/*
++ * Copyright (C) 2006-2010 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                          <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Derived from the PCAN project file driver/src/pcan_mpc5200.c:
++ *
++ * Copyright (c) 2003 Wolfgang Denk, DENX Software Engineering, wd@denx.de.
++ *
++ * Copyright (c) 2005 Felix Daners, Plugit AG, felix.daners@plugit.ch
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++#include "rtcan_internal.h"
++#include "rtcan_mscan_regs.h"
++#include "rtcan_mscan.h"
++
++#define MSCAN_SET_MODE_RETRIES	255
++
++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++static struct can_bittiming_const mscan_bittiming_const = {
++	.name = "mscan",
++	.tseg1_min = 4,
++	.tseg1_max = 16,
++	.tseg2_min = 2,
++	.tseg2_max = 8,
++	.sjw_max = 4,
++	.brp_min = 1,
++	.brp_max = 64,
++	.brp_inc = 1,
++};
++#endif
++
++/**
++ *  Reception Interrupt handler
++ *
++ *  Inline function first called within @ref rtcan_mscan_interrupt when an RX
++ *  interrupt was detected. Here the HW registers are read out and composed
++ *  to a struct rtcan_skb.
++ *
++ *  @param[out] skb  Pointer to an instance of struct rtcan_skb which will be
++ *                   filled with received CAN message
++ *  @param[in]  dev  Device ID
++ */
++static inline void rtcan_mscan_rx_interrupt(struct rtcan_device *dev,
++					    struct rtcan_skb *skb)
++{
++	int i;
++	unsigned char size;
++	struct rtcan_rb_frame *frame = &skb->rb_frame;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE;
++
++	frame->can_dlc = in_8(&regs->canrxfg.dlr) & 0x0F;
++
++	/* If DLC exceeds 8 bytes adjust it to 8 (for the payload size) */
++	size = (frame->can_dlc > 8) ? 8 : frame->can_dlc;
++
++	if (in_8(&regs->canrxfg.idr[1]) & MSCAN_BUF_EXTENDED) {
++		frame->can_id = ((in_8(&regs->canrxfg.idr[0]) << 21) |
++				 ((in_8(&regs->canrxfg.idr[1]) & 0xE0) << 13) |
++				 ((in_8(&regs->canrxfg.idr[1]) & 0x07) << 15) |
++				 (in_8(&regs->canrxfg.idr[4]) << 7) |
++				 (in_8(&regs->canrxfg.idr[5]) >> 1));
++
++		frame->can_id |= CAN_EFF_FLAG;
++
++		if ((in_8(&regs->canrxfg.idr[5]) & MSCAN_BUF_EXT_RTR)) {
++			frame->can_id |= CAN_RTR_FLAG;
++		} else {
++			for (i = 0; i < size; i++)
++				frame->data[i] =
++					in_8(&regs->canrxfg.dsr[i +
++								(i / 2) * 2]);
++			skb->rb_frame_size += size;
++		}
++
++	} else {
++		frame->can_id = ((in_8(&regs->canrxfg.idr[0]) << 3) |
++				 (in_8(&regs->canrxfg.idr[1]) >> 5));
++
++		if ((in_8(&regs->canrxfg.idr[1]) & MSCAN_BUF_STD_RTR)) {
++			frame->can_id |= CAN_RTR_FLAG;
++		} else {
++			for (i = 0; i < size; i++)
++				frame->data[i] =
++					in_8(&regs->canrxfg.dsr[i +
++								(i / 2) * 2]);
++			skb->rb_frame_size += size;
++		}
++	}
++
++
++	/* Store the interface index */
++	frame->can_ifindex = dev->ifindex;
++}
++
++static can_state_t mscan_stat_map[4] = {
++	CAN_STATE_ACTIVE,
++	CAN_STATE_BUS_WARNING,
++	CAN_STATE_BUS_PASSIVE,
++	CAN_STATE_BUS_OFF
++};
++
++static inline void rtcan_mscan_err_interrupt(struct rtcan_device *dev,
++					     struct rtcan_skb *skb,
++					     int r_status)
++{
++	u8 rstat, tstat;
++	struct rtcan_rb_frame *frame = &skb->rb_frame;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC;
++
++	frame->can_id = CAN_ERR_FLAG;
++	frame->can_dlc = CAN_ERR_DLC;
++
++	memset(&frame->data[0], 0, frame->can_dlc);
++
++	if ((r_status & MSCAN_OVRIF)) {
++		frame->can_id |= CAN_ERR_CRTL;
++		frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
++
++	} else if ((r_status & (MSCAN_CSCIF))) {
++
++		rstat = (r_status & (MSCAN_TSTAT0 |
++				     MSCAN_TSTAT1)) >> 2 & 0x3;
++		tstat = (r_status & (MSCAN_RSTAT0 |
++				     MSCAN_RSTAT1)) >> 4 & 0x3;
++		dev->state = mscan_stat_map[max(rstat, tstat)];
++
++		switch (dev->state) {
++		case CAN_STATE_BUS_OFF:
++			/* Bus-off condition */
++			frame->can_id |= CAN_ERR_BUSOFF;
++			dev->state = CAN_STATE_BUS_OFF;
++			/* Disable receiver interrupts */
++			out_8(&regs->canrier, 0);
++			/* Wake up waiting senders */
++			rtdm_sem_destroy(&dev->tx_sem);
++			break;
++
++		case CAN_STATE_BUS_PASSIVE:
++			frame->can_id |= CAN_ERR_CRTL;
++			if (tstat > rstat)
++				frame->data[1] = CAN_ERR_CRTL_TX_PASSIVE;
++			else
++				frame->data[1] = CAN_ERR_CRTL_RX_PASSIVE;
++			break;
++
++		case CAN_STATE_BUS_WARNING:
++			frame->can_id |= CAN_ERR_CRTL;
++			if (tstat > rstat)
++				frame->data[1] = CAN_ERR_CRTL_TX_WARNING;
++			else
++				frame->data[1] = CAN_ERR_CRTL_RX_WARNING;
++			break;
++
++		default:
++			break;
++
++		}
++	}
++	/* Store the interface index */
++	frame->can_ifindex = dev->ifindex;
++}
++
++/** Interrupt handler */
++static int rtcan_mscan_interrupt(rtdm_irq_t *irq_handle)
++{
++	struct rtcan_skb skb;
++	struct rtcan_device *dev;
++	struct mscan_regs *regs;
++	u8 canrflg;
++	int recv_lock_free = 1;
++	int ret = RTDM_IRQ_NONE;
++
++
++	dev = (struct rtcan_device *)rtdm_irq_get_arg(irq_handle, void);
++	regs = (struct mscan_regs *)dev->base_addr;
++
++	rtdm_lock_get(&dev->device_lock);
++
++	canrflg = in_8(&regs->canrflg);
++
++	ret = RTDM_IRQ_HANDLED;
++
++	/* Transmit Interrupt? */
++	if ((in_8(&regs->cantier) & MSCAN_TXIE0) &&
++	    (in_8(&regs->cantflg) & MSCAN_TXE0)) {
++		out_8(&regs->cantier, 0);
++		/* Wake up a sender */
++		rtdm_sem_up(&dev->tx_sem);
++
++		if (rtcan_loopback_pending(dev)) {
++
++			if (recv_lock_free) {
++				recv_lock_free = 0;
++				rtdm_lock_get(&rtcan_recv_list_lock);
++				rtdm_lock_get(&rtcan_socket_lock);
++			}
++
++			rtcan_loopback(dev);
++		}
++	}
++
++	/* Wakeup interrupt?  */
++	if ((canrflg & MSCAN_WUPIF)) {
++		rtdm_printk("WUPIF interrupt\n");
++	}
++
++	/* Receive Interrupt? */
++	if ((canrflg & MSCAN_RXF)) {
++
++		/* Read out HW registers */
++		rtcan_mscan_rx_interrupt(dev, &skb);
++
++		/* Take more locks. Ensure that they are taken and
++		 * released only once in the IRQ handler. */
++		/* WARNING: Nested locks are dangerous! But they are
++		 * nested only in this routine so a deadlock should
++		 * not be possible. */
++		if (recv_lock_free) {
++			recv_lock_free = 0;
++			rtdm_lock_get(&rtcan_recv_list_lock);
++			rtdm_lock_get(&rtcan_socket_lock);
++		}
++
++		/* Pass received frame out to the sockets */
++		rtcan_rcv(dev, &skb);
++	}
++
++	/* Error Interrupt? */
++	if ((canrflg & (MSCAN_CSCIF | MSCAN_OVRIF))) {
++		/* Check error condition and fill error frame */
++		rtcan_mscan_err_interrupt(dev, &skb, canrflg);
++
++		if (recv_lock_free) {
++			recv_lock_free = 0;
++			rtdm_lock_get(&rtcan_recv_list_lock);
++			rtdm_lock_get(&rtcan_socket_lock);
++		}
++
++		/* Pass error frame out to the sockets */
++		rtcan_rcv(dev, &skb);
++	}
++
++	/* Acknowledge the handled interrupt within the controller.
++	 * Only do so for the receiver interrupts.
++	 */
++	if (canrflg)
++		out_8(&regs->canrflg, canrflg);
++
++	if (!recv_lock_free) {
++		rtdm_lock_put(&rtcan_socket_lock);
++		rtdm_lock_put(&rtcan_recv_list_lock);
++	}
++	rtdm_lock_put(&dev->device_lock);
++
++	return ret;
++}
++
++/**
++ *   Set controller into reset mode. Called from @ref rtcan_mscan_ioctl
++ *   (main usage), init_module and cleanup_module.
++ *
++ *   @param dev_id   Device ID
++ *   @param lock_ctx Pointer to saved IRQ context (if stored before calling
++ *                   this function). Only evaluated if @c locked is true.
++ *   @param locked   Boolean value indicating if function was called in an
++ *                   spin locked and IRQ disabled context
++ *
++ *   @return 0 on success, otherwise:
++ *   - -EAGAIN: Reset mode bit could not be verified after setting it.
++ *              See also note.
++ *
++ *   @note According to the MSCAN specification, it is necessary to check
++ *   the reset mode bit in PeliCAN mode after having set it. So we do. But if
++ *   using a ISA card like the PHYTEC eNET card this should not be necessary
++ *   because the CAN controller clock of this card (16 MHz) is twice as high
++ *   as the ISA bus clock.
++ */
++static int rtcan_mscan_mode_stop(struct rtcan_device *dev,
++				 rtdm_lockctx_t *lock_ctx)
++{
++	int ret = 0;
++	int rinit = 0;
++	can_state_t state;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++	u8 reg;
++
++	state = dev->state;
++	/* If controller is not operating anyway, go out */
++	if (!CAN_STATE_OPERATING(state))
++		goto out;
++
++	/* Switch to sleep mode */
++	setbits8(&regs->canctl0, MSCAN_SLPRQ);
++	reg = in_8(&regs->canctl1);
++	while (!(reg & MSCAN_SLPAK) &&
++	        (rinit < MSCAN_SET_MODE_RETRIES)) {
++		if (likely(lock_ctx != NULL))
++			rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++		/* Busy sleep 1 microsecond */
++		rtdm_task_busy_sleep(1000);
++		if (likely(lock_ctx != NULL))
++			rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++		rinit++;
++		reg = in_8(&regs->canctl1);
++	}
++	/*
++	 * The mscan controller will fail to enter sleep mode,
++	 * while there are irregular activities on bus, like
++	 * somebody keeps retransmitting. This behavior is
++	 * undocumented and seems to differ between mscan built
++	 * in mpc5200b and mpc5200. We proceed in that case,
++	 * since otherwise the slprq will be kept set and the
++	 * controller will get stuck. NOTE: INITRQ or CSWAI
++	 * will abort all active transmit actions, if still
++	 * any, at once.
++	 */
++	if (rinit >= MSCAN_SET_MODE_RETRIES)
++		rtdm_printk("rtcan_mscan: device failed to enter sleep mode. "
++				"We proceed anyhow.\n");
++	else
++		dev->state = CAN_STATE_SLEEPING;
++
++	rinit = 0;
++	setbits8(&regs->canctl0, MSCAN_INITRQ);
++
++	reg = in_8(&regs->canctl1);
++	while (!(reg & MSCAN_INITAK) &&
++	        (rinit < MSCAN_SET_MODE_RETRIES)) {
++		if (likely(lock_ctx != NULL))
++			rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++		/* Busy sleep 1 microsecond */
++		rtdm_task_busy_sleep(1000);
++		if (likely(lock_ctx != NULL))
++			rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++		rinit++;
++		reg = in_8(&regs->canctl1);
++	}
++	if (rinit >= MSCAN_SET_MODE_RETRIES)
++		ret = -ENODEV;
++
++	/* Volatile state could have changed while we slept busy. */
++	dev->state = CAN_STATE_STOPPED;
++	/* Wake up waiting senders */
++	rtdm_sem_destroy(&dev->tx_sem);
++
++out:
++	return ret;
++}
++
++/**
++ *   Set controller into operating mode.
++ *
++ *   Called from @ref rtcan_mscan_ioctl in spin locked and IRQ disabled
++ *   context.
++ *
++ *   @param dev_id   Device ID
++ *   @param lock_ctx Pointer to saved IRQ context (only used when coming
++ *                   from @ref CAN_STATE_SLEEPING, see also note)
++ *
++ *   @return 0 on success, otherwise:
++ *   - -EINVAL: No Baud rate set before request to set start mode
++ *
++ *   @note If coming from @c CAN_STATE_SLEEPING, the controller must wait
++ *         some time to avoid bus errors. Measured on an PHYTEC eNET card,
++ *         this time was 110 microseconds.
++ */
++static int rtcan_mscan_mode_start(struct rtcan_device *dev,
++				  rtdm_lockctx_t *lock_ctx)
++{
++	int ret = 0, retries = 0;
++	can_state_t state;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	/* We won't forget that state in the device structure is volatile and
++	 * access to it will not be optimized by the compiler. So ... */
++	state = dev->state;
++
++	switch (state) {
++	case CAN_STATE_ACTIVE:
++	case CAN_STATE_BUS_WARNING:
++	case CAN_STATE_BUS_PASSIVE:
++		break;
++
++	case CAN_STATE_SLEEPING:
++	case CAN_STATE_STOPPED:
++		/* Set error active state */
++		state = CAN_STATE_ACTIVE;
++		/* Set up sender "mutex" */
++		rtdm_sem_init(&dev->tx_sem, 1);
++
++		if ((dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY)) {
++			setbits8(&regs->canctl1, MSCAN_LISTEN);
++		} else {
++			clrbits8(&regs->canctl1, MSCAN_LISTEN);
++		}
++		if ((dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK)) {
++			setbits8(&regs->canctl1, MSCAN_LOOPB);
++		} else {
++			clrbits8(&regs->canctl1, MSCAN_LOOPB);
++		}
++
++		/* Switch to normal mode */
++		clrbits8(&regs->canctl0, MSCAN_INITRQ);
++		clrbits8(&regs->canctl0, MSCAN_SLPRQ);
++		while ((in_8(&regs->canctl1) & MSCAN_INITAK) ||
++		       (in_8(&regs->canctl1) & MSCAN_SLPAK)) {
++			if (likely(lock_ctx != NULL))
++				rtdm_lock_put_irqrestore(&dev->device_lock,
++							 *lock_ctx);
++			/* Busy sleep 1 microsecond */
++			rtdm_task_busy_sleep(1000);
++			if (likely(lock_ctx != NULL))
++				rtdm_lock_get_irqsave(&dev->device_lock,
++						      *lock_ctx);
++			retries++;
++		}
++		/* Enable interrupts */
++		setbits8(&regs->canrier, MSCAN_RIER);
++
++		break;
++
++	case CAN_STATE_BUS_OFF:
++		/* Trigger bus-off recovery */
++		out_8(&regs->canrier, MSCAN_RIER);
++		/* Set up sender "mutex" */
++		rtdm_sem_init(&dev->tx_sem, 1);
++		/* Set error active state */
++		state = CAN_STATE_ACTIVE;
++
++		break;
++
++	default:
++		/* Never reached, but we don't want nasty compiler warnings */
++		break;
++	}
++	/* Store new state in device structure (or old state) */
++	dev->state = state;
++
++	return ret;
++}
++
++static int rtcan_mscan_set_bit_time(struct rtcan_device *dev,
++				    struct can_bittime *bit_time,
++				    rtdm_lockctx_t *lock_ctx)
++{
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++	u8 btr0, btr1;
++
++	switch (bit_time->type) {
++	case CAN_BITTIME_BTR:
++		btr0 = bit_time->btr.btr0;
++		btr1 = bit_time->btr.btr1;
++		break;
++
++	case CAN_BITTIME_STD:
++		btr0 = (BTR0_SET_BRP(bit_time->std.brp) |
++			BTR0_SET_SJW(bit_time->std.sjw));
++		btr1 = (BTR1_SET_TSEG1(bit_time->std.prop_seg +
++				       bit_time->std.phase_seg1) |
++			BTR1_SET_TSEG2(bit_time->std.phase_seg2) |
++			BTR1_SET_SAM(bit_time->std.sam));
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	out_8(&regs->canbtr0, btr0);
++	out_8(&regs->canbtr1, btr1);
++
++	rtdm_printk("%s: btr0=0x%02x btr1=0x%02x\n", dev->name, btr0, btr1);
++
++	return 0;
++}
++
++static int rtcan_mscan_set_mode(struct rtcan_device *dev,
++				can_mode_t mode,
++				rtdm_lockctx_t *lock_ctx)
++{
++	int ret = 0, retries = 0;
++	can_state_t state;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	switch (mode) {
++
++	case CAN_MODE_STOP:
++		ret = rtcan_mscan_mode_stop(dev, lock_ctx);
++		break;
++
++	case CAN_MODE_START:
++		ret = rtcan_mscan_mode_start(dev, lock_ctx);
++		break;
++
++	case CAN_MODE_SLEEP:
++
++		state = dev->state;
++
++		/* Controller must operate, otherwise go out */
++		if (!CAN_STATE_OPERATING(state)) {
++			ret = -ENETDOWN;
++			goto mode_sleep_out;
++		}
++
++		/* Is controller sleeping yet? If yes, go out */
++		if (state == CAN_STATE_SLEEPING)
++			goto mode_sleep_out;
++
++		/* Remember into which state to return when we
++		 * wake up */
++		dev->state_before_sleep = state;
++		state = CAN_STATE_SLEEPING;
++
++		/* Let's take a nap. (Now I REALLY understand
++		 * the meaning of interrupts ...) */
++		out_8(&regs->canrier, 0);
++		out_8(&regs->cantier, 0);
++		setbits8(&regs->canctl0,
++			 MSCAN_SLPRQ /*| MSCAN_INITRQ*/ | MSCAN_WUPE);
++		while (!(in_8(&regs->canctl1) & MSCAN_SLPAK)) {
++			rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++			/* Busy sleep 1 microsecond */
++			rtdm_task_busy_sleep(1000);
++			rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++			if (retries++ >= 1000)
++				break;
++		}
++		rtdm_printk("Fallen asleep after %d tries.\n", retries);
++		clrbits8(&regs->canctl0, MSCAN_INITRQ);
++		while ((in_8(&regs->canctl1) & MSCAN_INITAK)) {
++			rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++			/* Busy sleep 1 microsecond */
++			rtdm_task_busy_sleep(1000);
++			rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++			if (retries++ >= 1000)
++				break;
++		}
++		rtdm_printk("Back to normal after %d tries.\n", retries);
++		out_8(&regs->canrier, MSCAN_WUPIE);
++
++	mode_sleep_out:
++		dev->state = state;
++		break;
++
++	default:
++		ret = -EOPNOTSUPP;
++	}
++
++	return ret;
++}
++
++/**
++ *  Start a transmission to a MSCAN
++ *
++ *  Inline function called within @ref rtcan_mscan_sendmsg.
++ *  This is the completion of a send call when hardware access is granted.
++ *  Spinlock is taken before calling this function.
++ *
++ *  @param[in] frame  Pointer to CAN frame which is about to be sent
++ *  @param[in] dev Device ID
++ */
++static int rtcan_mscan_start_xmit(struct rtcan_device *dev, can_frame_t *frame)
++{
++	int             i, id;
++	/* "Real" size of the payload */
++	unsigned char   size;
++	/* Content of frame information register */
++	unsigned char   dlc;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	/* Is TX buffer empty? */
++	if (!(in_8(&regs->cantflg) & MSCAN_TXE0)) {
++		rtdm_printk("rtcan_mscan_start_xmit: TX buffer not empty");
++		return -EIO;
++	}
++	/* Select the buffer we've found. */
++	out_8(&regs->cantbsel, MSCAN_TXE0);
++
++	/* Get DLC and ID */
++	dlc = frame->can_dlc;
++
++	/* If DLC exceeds 8 bytes adjust it to 8 (for the payload) */
++	size = (dlc > 8) ? 8 : dlc;
++
++	id = frame->can_id;
++	if (frame->can_id & CAN_EFF_FLAG) {
++		out_8(&regs->cantxfg.idr[0], (id & 0x1fe00000) >> 21);
++		out_8(&regs->cantxfg.idr[1], ((id & 0x001c0000) >> 13) |
++		      ((id & 0x00038000) >> 15) |
++		      0x18); /* set SRR and IDE bits */
++
++		out_8(&regs->cantxfg.idr[4], (id & 0x00007f80) >> 7);
++		out_8(&regs->cantxfg.idr[5], (id & 0x0000007f) << 1);
++
++		/* RTR? */
++		if (frame->can_id & CAN_RTR_FLAG)
++			setbits8(&regs->cantxfg.idr[5], 0x1);
++		else {
++			clrbits8(&regs->cantxfg.idr[5], 0x1);
++			/* No RTR, write data bytes */
++			for (i = 0; i < size; i++)
++				out_8(&regs->cantxfg.dsr[i + (i / 2) * 2],
++				      frame->data[i]);
++		}
++
++	} else {
++		/* Send standard frame */
++
++		out_8(&regs->cantxfg.idr[0], (id & 0x000007f8) >> 3);
++		out_8(&regs->cantxfg.idr[1], (id & 0x00000007) << 5);
++
++		/* RTR? */
++		if (frame->can_id & CAN_RTR_FLAG)
++			setbits8(&regs->cantxfg.idr[1], 0x10);
++		else {
++			clrbits8(&regs->cantxfg.idr[1], 0x10);
++			/* No RTR, write data bytes */
++			for (i = 0; i < size; i++)
++				out_8(&regs->cantxfg.dsr[i + (i / 2) * 2],
++				      frame->data[i]);
++		}
++	}
++
++	out_8(&regs->cantxfg.dlr, frame->can_dlc);
++	out_8(&regs->cantxfg.tbpr, 0);	/* all messages have the same prio */
++
++	/* Trigger transmission. */
++	out_8(&regs->cantflg, MSCAN_TXE0);
++
++	/* Enable interrupt. */
++	setbits8(&regs->cantier, MSCAN_TXIE0);
++
++	return 0;
++}
++
++/**
++ *  MSCAN Chip configuration
++ *
++ *  Called during @ref init_module. Here, the configuration registers which
++ *  must be set only once are written with the right values. The controller
++ *  is left in reset mode and goes into operating mode not until the IOCTL
++ *  for starting it is triggered.
++ *
++ *  @param[in] dev Device ID of the controller to be configured
++ */
++static inline void __init mscan_chip_config(struct mscan_regs *regs,
++					    int mscan_clksrc)
++{
++	/* Choose IP bus as clock source.
++	 */
++	if (mscan_clksrc)
++		setbits8(&regs->canctl1, MSCAN_CLKSRC);
++	clrbits8(&regs->canctl1, MSCAN_LISTEN);
++
++	/* Configure MSCAN to accept all incoming messages.
++	 */
++	out_8(&regs->canidar0, 0x00);
++	out_8(&regs->canidar1, 0x00);
++	out_8(&regs->canidar2, 0x00);
++	out_8(&regs->canidar3, 0x00);
++	out_8(&regs->canidmr0, 0xFF);
++	out_8(&regs->canidmr1, 0xFF);
++	out_8(&regs->canidmr2, 0xFF);
++	out_8(&regs->canidmr3, 0xFF);
++	out_8(&regs->canidar4, 0x00);
++	out_8(&regs->canidar5, 0x00);
++	out_8(&regs->canidar6, 0x00);
++	out_8(&regs->canidar7, 0x00);
++	out_8(&regs->canidmr4, 0xFF);
++	out_8(&regs->canidmr5, 0xFF);
++	out_8(&regs->canidmr6, 0xFF);
++	out_8(&regs->canidmr7, 0xFF);
++	clrbits8(&regs->canidac, MSCAN_IDAM0 | MSCAN_IDAM1);
++}
++
++/**
++ *  MSCAN Chip registration
++ *
++ *  Called during @ref init_module.
++ *
++ *  @param[in] dev Device ID of the controller to be registered
++ *  @param[in] mscan_clksrc clock source to be used
++ */
++int rtcan_mscan_register(struct rtcan_device *dev, int irq, int mscan_clksrc)
++{
++	int ret;
++	struct mscan_regs *regs;
++
++	regs = (struct mscan_regs *)dev->base_addr;
++
++	/* Enable MSCAN module. */
++	setbits8(&regs->canctl1, MSCAN_CANE);
++	udelay(100);
++
++	/* Set dummy state for following call */
++	dev->state = CAN_STATE_ACTIVE;
++
++	/* Enter reset mode */
++	rtcan_mscan_mode_stop(dev, NULL);
++
++	/* Give device an interface name (so that programs using this driver
++	   don't need to know the device ID) */
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	dev->hard_start_xmit = rtcan_mscan_start_xmit;
++	dev->do_set_mode = rtcan_mscan_set_mode;
++	dev->do_set_bit_time = rtcan_mscan_set_bit_time;
++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++	dev->bittiming_const = &mscan_bittiming_const;
++#endif
++
++	/* Register IRQ handler and pass device structure as arg */
++	ret = rtdm_irq_request(&dev->irq_handle, irq, rtcan_mscan_interrupt,
++			       0, RTCAN_DRV_NAME, (void *)dev);
++	if (ret) {
++		printk("ERROR! rtdm_irq_request for IRQ %d failed\n", irq);
++		goto out_can_disable;
++	}
++
++	mscan_chip_config(regs, mscan_clksrc);
++
++	/* Register RTDM device */
++	ret = rtcan_dev_register(dev);
++	if (ret) {
++		printk(KERN_ERR
++		       "ERROR while trying to register RTCAN device!\n");
++		goto out_irq_free;
++	}
++
++	rtcan_mscan_create_proc(dev);
++
++	return 0;
++
++out_irq_free:
++	rtdm_irq_free(&dev->irq_handle);
++
++out_can_disable:
++	/* Disable MSCAN module. */
++	clrbits8(&regs->canctl1, MSCAN_CANE);
++
++	return ret;
++}
++
++/**
++ *  MSCAN Chip deregistration
++ *
++ *  Called during @ref cleanup_module
++ *
++ *  @param[in] dev Device ID of the controller to be registered
++ */
++int rtcan_mscan_unregister(struct rtcan_device *dev)
++{
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++
++	printk("Unregistering %s device %s\n", RTCAN_DRV_NAME, dev->name);
++
++	rtcan_mscan_mode_stop(dev, NULL);
++	rtdm_irq_free(&dev->irq_handle);
++	rtcan_mscan_remove_proc(dev);
++	rtcan_dev_unregister(dev);
++
++	/* Disable MSCAN module. */
++	clrbits8(&regs->canctl1, MSCAN_CANE);
++
++	return 0;
++}
+--- linux/drivers/xenomai/can/mscan/rtcan_mscan_proc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/mscan/rtcan_mscan_proc.c	2021-04-07 16:01:26.391635361 +0800
+@@ -0,0 +1,152 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include "rtcan_dev.h"
++#include "rtcan_internal.h"
++#include "rtcan_mscan_regs.h"
++
++#define MSCAN_REG_ARGS(reg) \
++	"%-8s 0x%02x\n", #reg, (int)(in_8(&regs->reg)) & 0xff
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG
++
++static int rtcan_mscan_proc_regs(struct seq_file *p, void *data)
++{
++	struct rtcan_device *dev = (struct rtcan_device *)data;
++	struct mscan_regs *regs = (struct mscan_regs *)dev->base_addr;
++#ifdef MPC5xxx_GPIO
++	struct mpc5xxx_gpio *gpio = (struct mpc5xxx_gpio *)MPC5xxx_GPIO;
++	u32 port_config;
++#endif
++	u8 canctl0, canctl1;
++
++	seq_printf(p, "MSCAN registers at %p\n", regs);
++
++	canctl0 = in_8(&regs->canctl0);
++	seq_printf(p, "canctl0  0x%02x%s%s%s%s%s%s%s%s\n",
++		   canctl0,
++		   (canctl0 & MSCAN_RXFRM) ? " rxfrm" :"",
++		   (canctl0 & MSCAN_RXACT) ? " rxact" :"",
++		   (canctl0 & MSCAN_CSWAI) ? " cswai" :"",
++		   (canctl0 & MSCAN_SYNCH) ? " synch" :"",
++		   (canctl0 & MSCAN_TIME)  ? " time"  :"",
++		   (canctl0 & MSCAN_WUPE)  ? " wupe"  :"",
++		   (canctl0 & MSCAN_SLPRQ) ? " slprq" :"",
++		   (canctl0 & MSCAN_INITRQ)? " initrq":"" );
++	canctl1 = in_8(&regs->canctl1);
++	seq_printf(p, "canctl1  0x%02x%s%s%s%s%s%s%s\n",
++		   canctl1,
++		   (canctl1 & MSCAN_CANE)  ? " cane"  :"",
++		   (canctl1 & MSCAN_CLKSRC)? " clksrc":"",
++		   (canctl1 & MSCAN_LOOPB) ? " loopb" :"",
++		   (canctl1 & MSCAN_LISTEN)? " listen":"",
++		   (canctl1 & MSCAN_WUPM)  ? " wump"  :"",
++		   (canctl1 & MSCAN_SLPAK) ? " slpak" :"",
++		   (canctl1 & MSCAN_INITAK)? " initak":"");
++	seq_printf(p, MSCAN_REG_ARGS(canbtr0 ));
++	seq_printf(p, MSCAN_REG_ARGS(canbtr1 ));
++	seq_printf(p, MSCAN_REG_ARGS(canrflg ));
++	seq_printf(p, MSCAN_REG_ARGS(canrier ));
++	seq_printf(p, MSCAN_REG_ARGS(cantflg ));
++	seq_printf(p, MSCAN_REG_ARGS(cantier ));
++	seq_printf(p, MSCAN_REG_ARGS(cantarq ));
++	seq_printf(p, MSCAN_REG_ARGS(cantaak ));
++	seq_printf(p, MSCAN_REG_ARGS(cantbsel));
++	seq_printf(p, MSCAN_REG_ARGS(canidac ));
++	seq_printf(p, MSCAN_REG_ARGS(canrxerr));
++	seq_printf(p, MSCAN_REG_ARGS(cantxerr));
++	seq_printf(p, MSCAN_REG_ARGS(canidar0));
++	seq_printf(p, MSCAN_REG_ARGS(canidar1));
++	seq_printf(p, MSCAN_REG_ARGS(canidar2));
++	seq_printf(p, MSCAN_REG_ARGS(canidar3));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr0));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr1));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr2));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr3));
++	seq_printf(p, MSCAN_REG_ARGS(canidar4));
++	seq_printf(p, MSCAN_REG_ARGS(canidar5));
++	seq_printf(p, MSCAN_REG_ARGS(canidar6));
++	seq_printf(p, MSCAN_REG_ARGS(canidar7));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr4));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr5));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr6));
++	seq_printf(p, MSCAN_REG_ARGS(canidmr7));
++
++#ifdef MPC5xxx_GPIO
++	seq_printf(p, "GPIO registers\n");
++	port_config = in_be32(&gpio->port_config);
++	seq_printf(p, "port_config 0x%08x %s\n", port_config,
++		   (port_config & 0x10000000 ?
++			"CAN1 on I2C1, CAN2 on TMR0/1 pins" :
++			(port_config & 0x70) == 0x10 ?
++				"CAN1/2 on PSC2 pins" :
++				"MSCAN1/2 not routed"));
++#endif
++
++	return 0;
++}
++
++static int rtcan_mscan_proc_regs_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_mscan_proc_regs, PDE_DATA(inode));
++}
++
++static const struct file_operations rtcan_mscan_proc_regs_ops = {
++	.open		= rtcan_mscan_proc_regs_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++int rtcan_mscan_create_proc(struct rtcan_device* dev)
++{
++	if (!dev->proc_root)
++		return -EINVAL;
++
++	proc_create_data("registers", S_IFREG | S_IRUGO | S_IWUSR,
++			 dev->proc_root, &rtcan_mscan_proc_regs_ops, dev);
++	return 0;
++}
++
++void rtcan_mscan_remove_proc(struct rtcan_device* dev)
++{
++	if (!dev->proc_root)
++		return;
++
++	remove_proc_entry("registers", dev->proc_root);
++}
++
++#else /* !CONFIG_XENO_DRIVERS_CAN_DEBUG */
++
++void rtcan_mscan_remove_proc(struct rtcan_device* dev)
++{
++}
++
++int rtcan_mscan_create_proc(struct rtcan_device* dev)
++{
++	return 0;
++}
++#endif	/* CONFIG_XENO_DRIVERS_CAN_DEBUG */
+--- linux/drivers/xenomai/can/rtcan_dev.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_dev.c	2021-04-07 16:01:26.386635368 +0800
+@@ -0,0 +1,321 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from RTnet project file stack/rtdev.c:
++ *
++ * Copyright (C) 1999       Lineo, Inc
++ *               1999, 2002 David A. Schleef <ds@schleef.org>
++ *               2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/if.h>
++#include <linux/if_arp.h>
++#include <linux/netdevice.h>
++#include <linux/module.h>
++
++#include "rtcan_internal.h"
++#include "rtcan_dev.h"
++
++
++static struct rtcan_device *rtcan_devices[RTCAN_MAX_DEVICES];
++static DEFINE_RTDM_LOCK(rtcan_devices_rt_lock);
++
++static int rtcan_global_init_done;
++
++DEFINE_SEMAPHORE(rtcan_devices_nrt_lock);
++
++/* Spinlock for all reception lists and also for some members in
++ * struct rtcan_socket */
++rtdm_lock_t rtcan_socket_lock;
++
++/* Spinlock for all reception lists and also for some members in
++ * struct rtcan_socket */
++rtdm_lock_t rtcan_recv_list_lock;
++
++
++
++static inline void rtcan_global_init(void)
++{
++    if (!rtcan_global_init_done) {
++	rtdm_lock_init(&rtcan_socket_lock);
++	rtdm_lock_init(&rtcan_recv_list_lock);
++	rtcan_global_init_done = 1;
++    }
++}
++
++
++static inline struct rtcan_device *__rtcan_dev_get_by_name(const char *name)
++{
++    int i;
++    struct rtcan_device *dev;
++
++
++    for (i = 0; i < RTCAN_MAX_DEVICES; i++) {
++	dev = rtcan_devices[i];
++	if ((dev != NULL) && (strncmp(dev->name, name, IFNAMSIZ) == 0))
++	    return dev;
++    }
++    return NULL;
++}
++
++
++struct rtcan_device *rtcan_dev_get_by_name(const char *name)
++{
++    struct rtcan_device *dev;
++#ifdef RTCAN_USE_REFCOUNT
++    rtdm_lockctx_t context;
++#endif
++
++
++#ifdef RTCAN_USE_REFCOUNT
++    rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context);
++#endif
++
++    dev = __rtcan_dev_get_by_name(name);
++
++#ifdef RTCAN_USE_REFCOUNT
++    if (dev != NULL)
++	atomic_inc(&dev->refcount);
++    rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context);
++#endif
++
++    return dev;
++}
++
++
++static inline struct rtcan_device *__rtcan_dev_get_by_index(int ifindex)
++{
++    return rtcan_devices[ifindex - 1];
++}
++
++
++struct rtcan_device *rtcan_dev_get_by_index(int ifindex)
++{
++    struct rtcan_device *dev;
++#ifdef RTCAN_USE_REFCOUNT
++    rtdm_lockctx_t context;
++#endif
++
++
++    if ((ifindex <= 0) || (ifindex > RTCAN_MAX_DEVICES))
++	return NULL;
++
++#ifdef RTCAN_USE_REFCOUNT
++    rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context);
++#endif
++
++    dev = __rtcan_dev_get_by_index(ifindex);
++
++#ifdef RTCAN_USE_REFCOUNT
++    if (dev != NULL)
++	atomic_inc(&dev->refcount);
++    rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context);
++#endif
++
++    return dev;
++}
++
++
++void rtcan_dev_alloc_name(struct rtcan_device *dev, const char *mask)
++{
++    char buf[IFNAMSIZ];
++    struct rtcan_device *tmp;
++    int i;
++
++
++    for (i = 0; i < RTCAN_MAX_DEVICES; i++) {
++	ksformat(buf, IFNAMSIZ, mask, i);
++	if ((tmp = rtcan_dev_get_by_name(buf)) == NULL) {
++	    strncpy(dev->name, buf, IFNAMSIZ);
++	    break;
++	}
++#ifdef RTCAN_USE_REFCOUNT
++	else
++	    rtcan_dev_dereference(tmp);
++#endif
++    }
++}
++
++
++struct rtcan_device *rtcan_dev_alloc(int sizeof_priv, int sizeof_board_priv)
++{
++    struct rtcan_device *dev;
++    struct rtcan_recv *recv_list_elem;
++    int alloc_size;
++    int j;
++
++
++    alloc_size = sizeof(*dev) + sizeof_priv + sizeof_board_priv;
++
++    dev = (struct rtcan_device *)kmalloc(alloc_size, GFP_KERNEL);
++    if (dev == NULL) {
++	printk(KERN_ERR "rtcan: cannot allocate rtcan device\n");
++	return NULL;
++    }
++
++    memset(dev, 0, alloc_size);
++
++    sema_init(&dev->nrt_lock, 1);
++
++    rtdm_lock_init(&dev->device_lock);
++
++    /* Init TX Semaphore, will be destroyed forthwith
++     * when setting stop mode */
++    rtdm_sem_init(&dev->tx_sem, 0);
++#ifdef RTCAN_USE_REFCOUNT
++    atomic_set(&dev->refcount, 0);
++#endif
++
++    /* Initialize receive list */
++    dev->empty_list = recv_list_elem = dev->receivers;
++    for (j = 0; j < RTCAN_MAX_RECEIVERS - 1; j++, recv_list_elem++)
++	recv_list_elem->next = recv_list_elem + 1;
++    recv_list_elem->next = NULL;
++    dev->free_entries = RTCAN_MAX_RECEIVERS;
++
++    if (sizeof_priv)
++	dev->priv = (void *)((unsigned long)dev + sizeof(*dev));
++    if (sizeof_board_priv)
++	dev->board_priv = (void *)((unsigned long)dev + sizeof(*dev) + sizeof_priv);
++
++    return dev;
++}
++
++void rtcan_dev_free (struct rtcan_device *dev)
++{
++    if (dev != NULL) {
++	rtdm_sem_destroy(&dev->tx_sem);
++	kfree(dev);
++    }
++}
++
++
++static inline int __rtcan_dev_new_index(void)
++{
++    int i;
++
++
++    for (i = 0; i < RTCAN_MAX_DEVICES; i++)
++	if (rtcan_devices[i] == NULL)
++	     return i+1;
++
++    return -ENOMEM;
++}
++
++
++int rtcan_dev_register(struct rtcan_device *dev)
++{
++    rtdm_lockctx_t context;
++    int ret;
++
++    down(&rtcan_devices_nrt_lock);
++
++    rtcan_global_init();
++
++    if ((ret = __rtcan_dev_new_index()) < 0) {
++	up(&rtcan_devices_nrt_lock);
++	return ret;
++    }
++    dev->ifindex = ret;
++
++    if (strchr(dev->name,'%') != NULL)
++	rtcan_dev_alloc_name(dev, dev->name);
++
++    if (__rtcan_dev_get_by_name(dev->name) != NULL) {
++	up(&rtcan_devices_nrt_lock);
++	return -EEXIST;
++    }
++
++    rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context);
++
++    rtcan_devices[dev->ifindex - 1] = dev;
++
++    rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context);
++    rtcan_dev_create_proc(dev);
++
++    up(&rtcan_devices_nrt_lock);
++
++    printk("rtcan: registered %s\n", dev->name);
++
++    return 0;
++}
++
++
++int rtcan_dev_unregister(struct rtcan_device *dev)
++{
++    rtdm_lockctx_t context;
++
++
++    RTCAN_ASSERT(dev->ifindex != 0,
++		 printk("RTCAN: device %s/%p was not registered\n",
++			dev->name, dev); return -ENODEV;);
++
++    /* If device is running, close it first. */
++    if (CAN_STATE_OPERATING(dev->state))
++	return -EBUSY;
++
++    down(&rtcan_devices_nrt_lock);
++
++    rtcan_dev_remove_proc(dev);
++
++    rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context);
++
++#ifdef RTCAN_USE_REFCOUNT
++    while (atomic_read(&dev->refcount) > 0) {
++	rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context);
++	up(&rtcan_devices_nrt_lock);
++
++	RTCAN_DBG("RTCAN: unregistering %s deferred (refcount = %d)\n",
++		  dev->name, atomic_read(&dev->refcount));
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	schedule_timeout(1*HZ); /* wait a second */
++
++	down(&rtcan_devices_nrt_lock);
++	rtdm_lock_get_irqsave(&rtcan_devices_rt_lock, context);
++    }
++#endif
++    rtcan_devices[dev->ifindex - 1] = NULL;
++
++    rtdm_lock_put_irqrestore(&rtcan_devices_rt_lock, context);
++    up(&rtcan_devices_nrt_lock);
++
++#ifdef RTCAN_USE_REFCOUNT
++    RTCAN_ASSERT(atomic_read(&dev->refcount) == 0,
++		 printk("RTCAN: dev reference counter < 0!\n"););
++#endif
++
++    printk("RTCAN: unregistered %s\n", dev->name);
++
++    return 0;
++}
++
++
++EXPORT_SYMBOL_GPL(rtcan_socket_lock);
++EXPORT_SYMBOL_GPL(rtcan_recv_list_lock);
++
++EXPORT_SYMBOL_GPL(rtcan_dev_free);
++
++EXPORT_SYMBOL_GPL(rtcan_dev_alloc);
++EXPORT_SYMBOL_GPL(rtcan_dev_alloc_name);
++
++EXPORT_SYMBOL_GPL(rtcan_dev_register);
++EXPORT_SYMBOL_GPL(rtcan_dev_unregister);
++
++EXPORT_SYMBOL_GPL(rtcan_dev_get_by_name);
++EXPORT_SYMBOL_GPL(rtcan_dev_get_by_index);
+--- linux/drivers/xenomai/can/rtcan_socket.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_socket.c	2021-04-07 16:01:26.381635375 +0800
+@@ -0,0 +1,105 @@
++/*
++ * Copyright (C) 2005,2006 Sebastian Smolorz
++ *                         <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * Based on stack/socket.c - sockets implementation for RTnet
++ *
++ * Copyright (C) 1999       Lineo, Inc
++ *               1999, 2002 David A. Schleef <ds@schleef.org>
++ *               2002       Ulrich Marx <marx@kammer.uni-hannover.de>
++ *               2003-2005  Jan Kiszka <jan.kiszka@web.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include "rtcan_socket.h"
++#include "rtcan_list.h"
++
++
++LIST_HEAD(rtcan_socket_list);
++
++void rtcan_socket_init(struct rtdm_fd *fd)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    rtdm_lockctx_t lock_ctx;
++
++
++    rtdm_sem_init(&sock->recv_sem, 0);
++
++    sock->recv_head = 0;
++    sock->recv_tail = 0;
++    atomic_set(&sock->ifindex, 0);
++    sock->flistlen = RTCAN_SOCK_UNBOUND;
++    sock->flist = NULL;
++    sock->err_mask = 0;
++    sock->rx_buf_full = 0;
++    sock->flags = 0;
++#ifdef CONFIG_XENO_DRIVERS_CAN_LOOPBACK
++    sock->loopback = 1;
++#endif
++
++    sock->tx_timeout = RTDM_TIMEOUT_INFINITE;
++    sock->rx_timeout = RTDM_TIMEOUT_INFINITE;
++
++    INIT_LIST_HEAD(&sock->tx_wait_head);
++
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++    list_add(&sock->socket_list, &rtcan_socket_list);
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++}
++
++
++void rtcan_socket_cleanup(struct rtdm_fd *fd)
++{
++    struct rtcan_socket *sock = rtdm_fd_to_private(fd);
++    struct tx_wait_queue *tx_waiting;
++    rtdm_lockctx_t lock_ctx;
++    int tx_list_empty;
++
++    /* Wake up sleeping senders. This is re-entrant-safe. */
++    do {
++	cobalt_atomic_enter(lock_ctx);
++	/* Is someone there? */
++	if (list_empty(&sock->tx_wait_head))
++		tx_list_empty = 1;
++	else {
++		tx_list_empty = 0;
++
++		/* Get next entry pointing to a waiting task */
++		tx_waiting = list_entry(sock->tx_wait_head.next,
++					struct tx_wait_queue, tx_wait_list);
++
++		/* Remove it from list */
++		list_del_init(&tx_waiting->tx_wait_list);
++
++		/* Wake task up (atomic section is left implicitly) */
++		rtdm_task_unblock(tx_waiting->rt_task);
++	}
++	cobalt_atomic_leave(lock_ctx);
++    } while (!tx_list_empty);
++
++    rtdm_sem_destroy(&sock->recv_sem);
++
++    rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++    if (sock->socket_list.next) {
++	list_del(&sock->socket_list);
++	sock->socket_list.next = NULL;
++    }
++    rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++}
+--- linux/drivers/xenomai/can/rtcan_virt.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_virt.c	2021-04-07 16:01:26.376635382 +0800
+@@ -0,0 +1,198 @@
++/*
++ * Copyright (C) 2006 Jan Kiszka <jan.kiszka@web.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++
++#include <linux/module.h>
++#include <rtdm/driver.h>
++#include <rtdm/can.h>
++#include "rtcan_dev.h"
++#include "rtcan_raw.h"
++
++#define RTCAN_DEV_NAME          "rtcan%d"
++#define RTCAN_DRV_NAME          "VIRT"
++#define RTCAN_MAX_VIRT_DEVS     8
++
++#define VIRT_TX_BUFS            1
++
++static char *virt_ctlr_name  = "<virtual>";
++static char *virt_board_name = "<virtual>";
++
++MODULE_AUTHOR("Jan Kiszka <jan.kiszka@web.de>");
++MODULE_DESCRIPTION("Virtual RT-Socket-CAN driver");
++MODULE_LICENSE("GPL");
++
++static unsigned int devices = 2;
++
++module_param(devices, uint, 0400);
++MODULE_PARM_DESC(devices, "Number of devices on the virtual bus");
++
++static struct rtcan_device *rtcan_virt_devs[RTCAN_MAX_VIRT_DEVS];
++
++
++static int rtcan_virt_start_xmit(struct rtcan_device *tx_dev,
++				 can_frame_t *tx_frame)
++{
++	int i;
++	struct rtcan_device *rx_dev;
++	struct rtcan_skb skb;
++	struct rtcan_rb_frame *rx_frame = &skb.rb_frame;
++	rtdm_lockctx_t lock_ctx;
++
++	/* we can transmit immediately again */
++	rtdm_sem_up(&tx_dev->tx_sem);
++
++	skb.rb_frame_size = EMPTY_RB_FRAME_SIZE;
++
++	rx_frame->can_dlc = tx_frame->can_dlc;
++	rx_frame->can_id  = tx_frame->can_id;
++
++	if (!(tx_frame->can_id & CAN_RTR_FLAG)) {
++		memcpy(rx_frame->data, tx_frame->data, tx_frame->can_dlc);
++		skb.rb_frame_size += tx_frame->can_dlc;
++	}
++
++	rtdm_lock_get_irqsave(&rtcan_recv_list_lock, lock_ctx);
++	rtdm_lock_get(&rtcan_socket_lock);
++
++
++	/* Deliver to all other devices on the virtual bus */
++	for (i = 0; i < devices; i++) {
++		rx_dev = rtcan_virt_devs[i];
++		if (rx_dev->state == CAN_STATE_ACTIVE) {
++			if (tx_dev != rx_dev) {
++				rx_frame->can_ifindex = rx_dev->ifindex;
++				rtcan_rcv(rx_dev, &skb);
++			} else if (rtcan_loopback_pending(tx_dev))
++				rtcan_loopback(tx_dev);
++		}
++	}
++	rtdm_lock_put(&rtcan_socket_lock);
++	rtdm_lock_put_irqrestore(&rtcan_recv_list_lock, lock_ctx);
++
++	return 0;
++}
++
++
++static int rtcan_virt_set_mode(struct rtcan_device *dev, can_mode_t mode,
++			       rtdm_lockctx_t *lock_ctx)
++{
++	int err = 0;
++
++	switch (mode) {
++	case CAN_MODE_STOP:
++		dev->state = CAN_STATE_STOPPED;
++		/* Wake up waiting senders */
++		rtdm_sem_destroy(&dev->tx_sem);
++		break;
++
++	case CAN_MODE_START:
++		rtdm_sem_init(&dev->tx_sem, VIRT_TX_BUFS);
++		dev->state = CAN_STATE_ACTIVE;
++		break;
++
++	default:
++		err = -EOPNOTSUPP;
++	}
++
++	return err;
++}
++
++
++static int __init rtcan_virt_init_one(int idx)
++{
++	struct rtcan_device *dev;
++	int err;
++
++	if ((dev = rtcan_dev_alloc(0, 0)) == NULL)
++		return -ENOMEM;
++
++	dev->ctrl_name = virt_ctlr_name;
++	dev->board_name = virt_board_name;
++
++	rtcan_virt_set_mode(dev, CAN_MODE_STOP, NULL);
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	dev->hard_start_xmit = rtcan_virt_start_xmit;
++	dev->do_set_mode = rtcan_virt_set_mode;
++
++	/* Register RTDM device */
++	err = rtcan_dev_register(dev);
++	if (err) {
++	    printk(KERN_ERR "ERROR %d while trying to register RTCAN device!\n", err);
++		goto error_out;
++	}
++
++	/* Remember initialized devices */
++	rtcan_virt_devs[idx] = dev;
++
++	printk("%s: %s driver loaded\n", dev->name, RTCAN_DRV_NAME);
++
++	return 0;
++
++ error_out:
++	rtcan_dev_free(dev);
++	return err;
++}
++
++
++/** Init module */
++static int __init rtcan_virt_init(void)
++{
++	int i, err = 0;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	for (i = 0; i < devices; i++) {
++		err = rtcan_virt_init_one(i);
++		if (err) {
++			while (--i >= 0) {
++				struct rtcan_device *dev = rtcan_virt_devs[i];
++
++				rtcan_dev_unregister(dev);
++				rtcan_dev_free(dev);
++			}
++			break;
++		}
++	}
++
++	return err;
++}
++
++
++/** Cleanup module */
++static void __exit rtcan_virt_exit(void)
++{
++	int i;
++	struct rtcan_device *dev;
++
++	for (i = 0; i < devices; i++) {
++		dev = rtcan_virt_devs[i];
++
++		printk("Unloading %s device %s\n", RTCAN_DRV_NAME, dev->name);
++
++		rtcan_virt_set_mode(dev, CAN_MODE_STOP, NULL);
++		rtcan_dev_unregister(dev);
++		rtcan_dev_free(dev);
++	}
++}
++
++module_init(rtcan_virt_init);
++module_exit(rtcan_virt_exit);
+--- linux/drivers/xenomai/can/rtcan_version.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/rtcan_version.h	2021-04-07 16:01:26.371635389 +0800
+@@ -0,0 +1,27 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __RTCAN_VERSION_H_
++#define __RTCAN_VERSION_H_
++
++#define RTCAN_MAJOR_VER    0
++#define RTCAN_MINOR_VER   90
++#define RTCAN_BUGFIX_VER   2
++
++#endif /* __RTCAN_VERSION_H_ */
+--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000.h	2021-04-07 16:01:26.366635396 +0800
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (C) 2006, Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __SJA1000_H_
++#define __SJA1000_H_
++
++#include <rtcan_dev.h>
++
++struct rtcan_sja1000 {
++    unsigned char (*read_reg)(struct rtcan_device *dev, int off);
++    void (*write_reg)(struct rtcan_device *dev, int off, unsigned char val);
++    void (*irq_ack)(struct rtcan_device *dev);
++    unsigned short irq_num;
++    unsigned short irq_flags;
++    unsigned char ocr;
++    unsigned char cdr;
++    char bus_err_on;
++};
++
++#ifdef CONFIG_FS_PROCFS
++int rtcan_sja_create_proc(struct rtcan_device* dev);
++void rtcan_sja_remove_proc(struct rtcan_device* dev);
++#else
++static inline int rtcan_sja_create_proc(struct rtcan_device* dev)
++{ return 0; }
++static inline void rtcan_sja_remove_proc(struct rtcan_device* dev) { }
++#endif
++int rtcan_sja1000_register(struct rtcan_device *dev);
++void rtcan_sja1000_unregister(struct rtcan_device *dev);
++
++
++#endif  /* __SJA1000_H_ */
+--- linux/drivers/xenomai/can/sja1000/rtcan_plx_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_plx_pci.c	2021-04-07 16:01:26.361635404 +0800
+@@ -0,0 +1,600 @@
++/*
++ * Copyright (C) 2008-2010 Pavel Cheblakov <P.B.Cheblakov@inp.nsk.su>
++ *
++ * Derived from the ems_pci.c driver:
++ *	Copyright (C) 2007 Wolfgang Grandegger <wg@grandegger.com>
++ *	Copyright (C) 2008 Markus Plessing <plessing@ems-wuensche.com>
++ *	Copyright (C) 2008 Sebastian Haas <haas@ems-wuensche.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the version 2 of the GNU General Public License
++ * as published by the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <linux/io.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DRV_NAME "rt_sja1000_plx_pci"
++#define RTCAN_DEV_NAME "rtcan%d"
++
++MODULE_AUTHOR("Pavel Cheblakov <P.B.Cheblakov@inp.nsk.su>");
++MODULE_DESCRIPTION("RTCAN driver for PLX90xx PCI-bridge cards with "
++		   "the SJA1000 chips");
++MODULE_SUPPORTED_DEVICE("Adlink PCI-7841/cPCI-7841, "
++			"Adlink PCI-7841/cPCI-7841 SE, "
++			"Marathon CAN-bus-PCI, "
++			"TEWS TECHNOLOGIES TPMC810, "
++			"esd CAN-PCI/CPCI/PCI104/200, "
++			"esd CAN-PCI/PMC/266, "
++			"esd CAN-PCIe/2000")
++MODULE_LICENSE("GPL v2");
++
++#define PLX_PCI_MAX_CHAN 2
++
++struct plx_pci_card {
++	int channels;			/* detected channels count */
++	struct rtcan_device *rtcan_dev[PLX_PCI_MAX_CHAN];
++	void __iomem *conf_addr;
++
++	/* Pointer to device-dependent reset function */
++	void (*reset_func)(struct pci_dev *pdev);
++};
++
++#define PLX_PCI_CAN_CLOCK (16000000 / 2)
++
++/* PLX9030/9050/9052 registers */
++#define PLX_INTCSR	0x4c		/* Interrupt Control/Status */
++#define PLX_CNTRL	0x50		/* User I/O, Direct Slave Response,
++					 * Serial EEPROM, and Initialization
++					 * Control register
++					 */
++
++#define PLX_LINT1_EN	0x1		/* Local interrupt 1 enable */
++#define PLX_LINT2_EN	(1 << 3)	/* Local interrupt 2 enable */
++#define PLX_PCI_INT_EN	(1 << 6)	/* PCI Interrupt Enable */
++#define PLX_PCI_RESET	(1 << 30)	/* PCI Adapter Software Reset */
++
++/* PLX9056 registers */
++#define PLX9056_INTCSR	0x68		/* Interrupt Control/Status */
++#define PLX9056_CNTRL	0x6c		/* Control / Software Reset */
++
++#define PLX9056_LINTI	(1 << 11)
++#define PLX9056_PCI_INT_EN (1 << 8)
++#define PLX9056_PCI_RCR	(1 << 29)	/* Read Configuration Registers */
++
++/*
++ * The board configuration is probably following:
++ * RX1 is connected to ground.
++ * TX1 is not connected.
++ * CLKO is not connected.
++ * Setting the OCR register to 0xDA is a good idea.
++ * This means normal output mode, push-pull and the correct polarity.
++ */
++#define PLX_PCI_OCR	(SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL)
++
++/*
++ * In the CDR register, you should set CBP to 1.
++ * You will probably also want to set the clock divider value to 7
++ * (meaning direct oscillator output) because the second SJA1000 chip
++ * is driven by the first one CLKOUT output.
++ */
++#define PLX_PCI_CDR			(SJA_CDR_CBP | SJA_CDR_CAN_MODE)
++
++/* SJA1000 Control Register in the BasicCAN Mode */
++#define SJA_CR				0x00
++
++/* States of some SJA1000 registers after hardware reset in the BasicCAN mode*/
++#define REG_CR_BASICCAN_INITIAL		0x21
++#define REG_CR_BASICCAN_INITIAL_MASK	0xa1
++#define REG_SR_BASICCAN_INITIAL		0x0c
++#define REG_IR_BASICCAN_INITIAL		0xe0
++
++/* States of some SJA1000 registers after hardware reset in the PeliCAN mode*/
++#define REG_MOD_PELICAN_INITIAL		0x01
++#define REG_SR_PELICAN_INITIAL		0x3c
++#define REG_IR_PELICAN_INITIAL		0x00
++
++#define ADLINK_PCI_VENDOR_ID		0x144A
++#define ADLINK_PCI_DEVICE_ID		0x7841
++
++#define ESD_PCI_SUB_SYS_ID_PCI200	0x0004
++#define ESD_PCI_SUB_SYS_ID_PCI266	0x0009
++#define ESD_PCI_SUB_SYS_ID_PMC266	0x000e
++#define ESD_PCI_SUB_SYS_ID_CPCI200	0x010b
++#define ESD_PCI_SUB_SYS_ID_PCIE2000	0x0200
++#define ESD_PCI_SUB_SYS_ID_PCI104200	0x0501
++
++#define MARATHON_PCI_DEVICE_ID		0x2715
++
++#define TEWS_PCI_VENDOR_ID		0x1498
++#define TEWS_PCI_DEVICE_ID_TMPC810	0x032A
++
++static void plx_pci_reset_common(struct pci_dev *pdev);
++static void plx_pci_reset_marathon(struct pci_dev *pdev);
++static void plx9056_pci_reset_common(struct pci_dev *pdev);
++
++struct plx_pci_channel_map {
++	u32 bar;
++	u32 offset;
++	u32 size;		/* 0x00 - auto, e.g. length of entire bar */
++};
++
++struct plx_pci_card_info {
++	const char *name;
++	int channel_count;
++	u32 can_clock;
++	u8 ocr;			/* output control register */
++	u8 cdr;			/* clock divider register */
++
++	/* Parameters for mapping local configuration space */
++	struct plx_pci_channel_map conf_map;
++
++	/* Parameters for mapping the SJA1000 chips */
++	struct plx_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CHAN];
++
++	/* Pointer to device-dependent reset function */
++	void (*reset_func)(struct pci_dev *pdev);
++};
++
++static struct plx_pci_card_info plx_pci_card_info_adlink = {
++	"Adlink PCI-7841/cPCI-7841", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{1, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x80, 0x80} },
++	&plx_pci_reset_common
++	/* based on PLX9052 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_adlink_se = {
++	"Adlink PCI-7841/cPCI-7841 SE", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x80, 0x80} },
++	&plx_pci_reset_common
++	/* based on PLX9052 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_esd200 = {
++	"esd CAN-PCI/CPCI/PCI104/200", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} },
++	&plx_pci_reset_common
++	/* based on PLX9030/9050 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_esd266 = {
++	"esd CAN-PCI/PMC/266", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} },
++	&plx9056_pci_reset_common
++	/* based on PLX9056 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_esd2000 = {
++	"esd CAN-PCIe/2000", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x00, 0x80}, {2, 0x100, 0x80} },
++	&plx9056_pci_reset_common
++	/* based on PEX8311 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_marathon = {
++	"Marathon CAN-bus-PCI", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x00, 0x00}, {4, 0x00, 0x00} },
++	&plx_pci_reset_marathon
++	/* based on PLX9052 */
++};
++
++static struct plx_pci_card_info plx_pci_card_info_tews = {
++	"TEWS TECHNOLOGIES TPMC810", 2,
++	PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
++	{0, 0x00, 0x00}, { {2, 0x000, 0x80}, {2, 0x100, 0x80} },
++	&plx_pci_reset_common
++	/* based on PLX9030 */
++};
++
++static const struct pci_device_id plx_pci_tbl[] = {
++	{
++		/* Adlink PCI-7841/cPCI-7841 */
++		ADLINK_PCI_VENDOR_ID, ADLINK_PCI_DEVICE_ID,
++		PCI_ANY_ID, PCI_ANY_ID,
++		PCI_CLASS_NETWORK_OTHER << 8, ~0,
++		(kernel_ulong_t)&plx_pci_card_info_adlink
++	},
++	{
++		/* Adlink PCI-7841/cPCI-7841 SE */
++		ADLINK_PCI_VENDOR_ID, ADLINK_PCI_DEVICE_ID,
++		PCI_ANY_ID, PCI_ANY_ID,
++		PCI_CLASS_COMMUNICATION_OTHER << 8, ~0,
++		(kernel_ulong_t)&plx_pci_card_info_adlink_se
++	},
++	{
++		/* esd CAN-PCI/200 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI200,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd200
++	},
++	{
++		/* esd CAN-CPCI/200 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_CPCI200,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd200
++	},
++	{
++		/* esd CAN-PCI104/200 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI104200,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd200
++	},
++	{
++		/* esd CAN-PCI/266 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI266,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd266
++	},
++	{
++		/* esd CAN-PMC/266 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PMC266,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd266
++	},
++	{
++		/* esd CAN-PCIE/2000 */
++		PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++		PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCIE2000,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_esd2000
++	},
++	{
++		/* Marathon CAN-bus-PCI card */
++		PCI_VENDOR_ID_PLX, MARATHON_PCI_DEVICE_ID,
++		PCI_ANY_ID, PCI_ANY_ID,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_marathon
++	},
++	{
++		/* TEWS TECHNOLOGIES TPMC810 card */
++		TEWS_PCI_VENDOR_ID, TEWS_PCI_DEVICE_ID_TMPC810,
++		PCI_ANY_ID, PCI_ANY_ID,
++		0, 0,
++		(kernel_ulong_t)&plx_pci_card_info_tews
++	},
++	{ 0,}
++};
++MODULE_DEVICE_TABLE(pci, plx_pci_tbl);
++
++static u8 plx_pci_read_reg(struct rtcan_device *dev, int port)
++{
++	return ioread8((void* __iomem)dev->base_addr + port);
++}
++
++static void plx_pci_write_reg(struct rtcan_device *dev, int port, u8 val)
++{
++	iowrite8(val, (void* __iomem)dev->base_addr + port);
++}
++
++/*
++ * Check if a CAN controller is present at the specified location
++ * by trying to switch 'em from the Basic mode into the PeliCAN mode.
++ * Also check states of some registers in reset mode.
++ */
++static inline int plx_pci_check_sja1000(struct rtcan_device *dev)
++{
++	int flag = 0;
++
++	struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++	/*
++	 * Check registers after hardware reset (the Basic mode)
++	 * See states on p. 10 of the Datasheet.
++	 */
++	if ((chip->read_reg(dev, SJA_CR) & REG_CR_BASICCAN_INITIAL_MASK) ==
++	    REG_CR_BASICCAN_INITIAL &&
++	    (chip->read_reg(dev, SJA_SR) == REG_SR_BASICCAN_INITIAL) &&
++	    (chip->read_reg(dev, SJA_IR) == REG_IR_BASICCAN_INITIAL))
++		flag = 1;
++
++	/* Bring the SJA1000 into the PeliCAN mode*/
++	chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE);
++
++	/*
++	 * Check registers after reset in the PeliCAN mode.
++	 * See states on p. 23 of the Datasheet.
++	 */
++	if (chip->read_reg(dev, SJA_MOD) == REG_MOD_PELICAN_INITIAL &&
++	    chip->read_reg(dev, SJA_SR) == REG_SR_PELICAN_INITIAL &&
++	    chip->read_reg(dev, SJA_IR) == REG_IR_PELICAN_INITIAL)
++		return flag;
++
++	return 0;
++}
++
++/*
++ * PLX9030/50/52 software reset
++ * Also LRESET# asserts and brings to reset device on the Local Bus (if wired).
++ * For most cards it's enough for reset the SJA1000 chips.
++ */
++static void plx_pci_reset_common(struct pci_dev *pdev)
++{
++	struct plx_pci_card *card = pci_get_drvdata(pdev);
++	u32 cntrl;
++
++	cntrl = ioread32(card->conf_addr + PLX_CNTRL);
++	cntrl |= PLX_PCI_RESET;
++	iowrite32(cntrl, card->conf_addr + PLX_CNTRL);
++	udelay(100);
++	cntrl ^= PLX_PCI_RESET;
++	iowrite32(cntrl, card->conf_addr + PLX_CNTRL);
++};
++
++/*
++ * PLX9056 software reset
++ * Assert LRESET# and reset device(s) on the Local Bus (if wired).
++ */
++static void plx9056_pci_reset_common(struct pci_dev *pdev)
++{
++	struct plx_pci_card *card = pci_get_drvdata(pdev);
++	u32 cntrl;
++
++	/* issue a local bus reset */
++	cntrl = ioread32(card->conf_addr + PLX9056_CNTRL);
++	cntrl |= PLX_PCI_RESET;
++	iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL);
++	udelay(100);
++	cntrl ^= PLX_PCI_RESET;
++	iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL);
++
++	/* reload local configuration from EEPROM */
++	cntrl |= PLX9056_PCI_RCR;
++	iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL);
++
++	/*
++	 * There is no safe way to poll for the end
++	 * of reconfiguration process. Waiting for 10ms
++	 * is safe.
++	 */
++	mdelay(10);
++
++	cntrl ^= PLX9056_PCI_RCR;
++	iowrite32(cntrl, card->conf_addr + PLX9056_CNTRL);
++};
++
++/* Special reset function for Marathon card */
++static void plx_pci_reset_marathon(struct pci_dev *pdev)
++{
++	void __iomem *reset_addr;
++	int i;
++	int reset_bar[2] = {3, 5};
++
++	plx_pci_reset_common(pdev);
++
++	for (i = 0; i < 2; i++) {
++		reset_addr = pci_iomap(pdev, reset_bar[i], 0);
++		if (!reset_addr) {
++			dev_err(&pdev->dev, "Failed to remap reset "
++				"space %d (BAR%d)\n", i, reset_bar[i]);
++		} else {
++			/* reset the SJA1000 chip */
++			iowrite8(0x1, reset_addr);
++			udelay(100);
++			pci_iounmap(pdev, reset_addr);
++		}
++	}
++}
++
++static void plx_pci_del_card(struct pci_dev *pdev)
++{
++	struct plx_pci_card *card = pci_get_drvdata(pdev);
++	struct rtcan_device *dev;
++	int i = 0;
++
++	for (i = 0; i < card->channels; i++) {
++		dev = card->rtcan_dev[i];
++		if (!dev)
++			continue;
++
++		dev_info(&pdev->dev, "Removing %s\n", dev->name);
++		rtcan_sja1000_unregister(dev);
++		if (dev->base_addr)
++			pci_iounmap(pdev, (void* __iomem)dev->base_addr);
++		rtcan_dev_free(dev);
++	}
++
++	card->reset_func(pdev);
++
++	/*
++	 * Disable interrupts from PCI-card and disable local
++	 * interrupts
++	 */
++	if (pdev->device != PCI_DEVICE_ID_PLX_9056)
++		iowrite32(0x0, card->conf_addr + PLX_INTCSR);
++	else
++		iowrite32(0x0, card->conf_addr + PLX9056_INTCSR);
++
++	if (card->conf_addr)
++		pci_iounmap(pdev, card->conf_addr);
++
++	kfree(card);
++
++	pci_disable_device(pdev);
++	pci_set_drvdata(pdev, NULL);
++}
++
++/*
++ * Probe PLX90xx based device for the SJA1000 chips and register each
++ * available CAN channel to SJA1000 Socket-CAN subsystem.
++ */
++static int plx_pci_add_card(struct pci_dev *pdev,
++			    const struct pci_device_id *ent)
++{
++	struct rtcan_sja1000 *chip;
++	struct rtcan_device *dev;
++	struct plx_pci_card *card;
++	struct plx_pci_card_info *ci;
++	int err, i;
++	u32 val;
++	void __iomem *addr;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	ci = (struct plx_pci_card_info *)ent->driver_data;
++
++	if (pci_enable_device(pdev) < 0) {
++		dev_err(&pdev->dev, "Failed to enable PCI device\n");
++		return -ENODEV;
++	}
++
++	dev_info(&pdev->dev, "Detected \"%s\" card at slot #%i\n",
++		 ci->name, PCI_SLOT(pdev->devfn));
++
++	/* Allocate card structures to hold addresses, ... */
++	card = kzalloc(sizeof(*card), GFP_KERNEL);
++	if (!card) {
++		dev_err(&pdev->dev, "Unable to allocate memory\n");
++		pci_disable_device(pdev);
++		return -ENOMEM;
++	}
++
++	pci_set_drvdata(pdev, card);
++
++	card->channels = 0;
++
++	/* Remap PLX90xx configuration space */
++	addr = pci_iomap(pdev, ci->conf_map.bar, ci->conf_map.size);
++	if (!addr) {
++		err = -ENOMEM;
++		dev_err(&pdev->dev, "Failed to remap configuration space "
++			"(BAR%d)\n", ci->conf_map.bar);
++		goto failure_cleanup;
++	}
++	card->conf_addr = addr + ci->conf_map.offset;
++
++	ci->reset_func(pdev);
++	card->reset_func = ci->reset_func;
++
++	/* Detect available channels */
++	for (i = 0; i < ci->channel_count; i++) {
++		struct plx_pci_channel_map *cm = &ci->chan_map_tbl[i];
++
++		dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++				      sizeof(struct plx_pci_card));
++		if (!dev) {
++			err = -ENOMEM;
++			goto failure_cleanup;
++		}
++
++		strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++		dev->board_name = (char *)ci->name;
++
++		card->rtcan_dev[i] = dev;
++		chip = card->rtcan_dev[i]->priv;
++		chip->irq_flags = RTDM_IRQTYPE_SHARED;
++		chip->irq_num = pdev->irq;
++
++		/*
++		 * Remap IO space of the SJA1000 chips
++		 * This is device-dependent mapping
++		 */
++		addr = pci_iomap(pdev, cm->bar, cm->size);
++		if (!addr) {
++			err = -ENOMEM;
++			dev_err(&pdev->dev, "Failed to remap BAR%d\n", cm->bar);
++			goto failure_cleanup;
++		}
++
++		dev->base_addr = (unsigned long)(addr + cm->offset);
++		chip->read_reg = plx_pci_read_reg;
++		chip->write_reg = plx_pci_write_reg;
++
++		/* Check if channel is present */
++		if (plx_pci_check_sja1000(dev)) {
++			dev->can_sys_clock = ci->can_clock;
++			chip->ocr = ci->ocr;
++			chip->cdr = ci->cdr;
++
++			/* Register SJA1000 device */
++			err = rtcan_sja1000_register(dev);
++			if (err) {
++				dev_err(&pdev->dev, "Registering device failed "
++					"(err=%d)\n", err);
++				rtcan_dev_free(dev);
++				goto failure_cleanup;
++			}
++
++			card->channels++;
++
++			dev_info(&pdev->dev, "Channel #%d at 0x%p, irq %d "
++				 "registered as %s\n", i + 1,
++				 (void* __iomem)dev->base_addr, chip->irq_num,
++				 dev->name);
++		} else {
++			dev_err(&pdev->dev, "Channel #%d not detected\n",
++				i + 1);
++			rtcan_dev_free(dev);
++		}
++	}
++
++	if (!card->channels) {
++		err = -ENODEV;
++		goto failure_cleanup;
++	}
++
++	/*
++	 * Enable interrupts from PCI-card (PLX90xx) and enable Local_1,
++	 * Local_2 interrupts from the SJA1000 chips
++	 */
++	if (pdev->device != PCI_DEVICE_ID_PLX_9056) {
++		val = ioread32(card->conf_addr + PLX_INTCSR);
++		if (pdev->subsystem_vendor == PCI_VENDOR_ID_ESDGMBH)
++			val |= PLX_LINT1_EN | PLX_PCI_INT_EN;
++		else
++			val |= PLX_LINT1_EN | PLX_LINT2_EN | PLX_PCI_INT_EN;
++		iowrite32(val, card->conf_addr + PLX_INTCSR);
++	} else {
++		iowrite32(PLX9056_LINTI | PLX9056_PCI_INT_EN,
++			  card->conf_addr + PLX9056_INTCSR);
++	}
++	return 0;
++
++failure_cleanup:
++	dev_err(&pdev->dev, "Error: %d. Cleaning Up.\n", err);
++
++	plx_pci_del_card(pdev);
++
++	return err;
++}
++
++static struct pci_driver plx_pci_driver = {
++	.name = RTCAN_DRV_NAME,
++	.id_table = plx_pci_tbl,
++	.probe = plx_pci_add_card,
++	.remove = plx_pci_del_card,
++};
++
++module_pci_driver(plx_pci_driver);
+--- linux/drivers/xenomai/can/sja1000/rtcan_peak_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_peak_pci.c	2021-04-07 16:01:26.356635411 +0800
+@@ -0,0 +1,357 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from the PCAN project file driver/src/pcan_pci.c:
++ *
++ * Copyright (C) 2001-2006  PEAK System-Technik GmbH
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <asm/io.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "PEAK-PCI-CAN"
++
++static char *peak_pci_board_name = "PEAK-PCI";
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RTCAN board driver for PEAK-PCI cards");
++MODULE_SUPPORTED_DEVICE("PEAK-PCI card CAN controller");
++MODULE_LICENSE("GPL");
++
++struct rtcan_peak_pci
++{
++    struct pci_dev *pci_dev;
++    struct rtcan_device *slave_dev;
++    int channel;
++    volatile void __iomem *base_addr;
++    volatile void __iomem *conf_addr;
++};
++
++#define PEAK_PCI_CAN_SYS_CLOCK (16000000 / 2)
++
++#define PELICAN_SINGLE  (SJA_CDR_CAN_MODE | SJA_CDR_CBP | 0x07 | SJA_CDR_CLK_OFF)
++#define PELICAN_MASTER  (SJA_CDR_CAN_MODE | SJA_CDR_CBP | 0x07            )
++#define PELICAN_DEFAULT (SJA_CDR_CAN_MODE                                 )
++
++#define CHANNEL_SINGLE 0 /* this is a single channel device */
++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */
++#define CHANNEL_SLAVE  2 /* multi channel device, this is slave */
++
++// important PITA registers
++#define PITA_ICR         0x00        // interrupt control register
++#define PITA_GPIOICR     0x18        // general purpose IO interface control register
++#define PITA_MISC        0x1C        // miscellanoes register
++
++#define PEAK_PCI_VENDOR_ID      0x001C  // the PCI device and vendor IDs
++#define PEAK_PCI_DEVICE_ID      0x0001  // Device ID for PCI and older PCIe cards
++#define PEAK_PCIE_DEVICE_ID     0x0003  // Device ID for newer PCIe cards (IPEH-003027)
++#define PEAK_CPCI_DEVICE_ID     0x0004  // for nextgen cPCI slot cards
++#define PEAK_MPCI_DEVICE_ID     0x0005  // for nextgen miniPCI slot cards
++#define PEAK_PC_104P_DEVICE_ID  0x0006  // PCAN-PC/104+ cards
++#define PEAK_PCI_104E_DEVICE_ID 0x0007  // PCAN-PCI/104 Express cards
++#define PEAK_MPCIE_DEVICE_ID    0x0008  // The miniPCIe slot cards
++#define PEAK_PCIE_OEM_ID        0x0009  // PCAN-PCI Express OEM
++
++#define PCI_CONFIG_PORT_SIZE 0x1000  // size of the config io-memory
++#define PCI_PORT_SIZE        0x0400  // size of a channel io-memory
++
++static struct pci_device_id peak_pci_tbl[] = {
++	{PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,},
++	{ }
++};
++MODULE_DEVICE_TABLE (pci, peak_pci_tbl);
++
++
++static u8 rtcan_peak_pci_read_reg(struct rtcan_device *dev, int port)
++{
++    struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv;
++    return readb(board->base_addr + ((unsigned long)port << 2));
++}
++
++static void rtcan_peak_pci_write_reg(struct rtcan_device *dev, int port, u8 data)
++{
++    struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv;
++    writeb(data, board->base_addr + ((unsigned long)port << 2));
++}
++
++static void rtcan_peak_pci_irq_ack(struct rtcan_device *dev)
++{
++    struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv;
++    u16 pita_icr_low;
++
++    /* Select and clear in Pita stored interrupt */
++    pita_icr_low = readw(board->conf_addr + PITA_ICR);
++    if (board->channel == CHANNEL_SLAVE) {
++	if (pita_icr_low & 0x0001)
++	    writew(0x0001, board->conf_addr + PITA_ICR);
++    }
++    else {
++	if (pita_icr_low & 0x0002)
++	    writew(0x0002, board->conf_addr + PITA_ICR);
++    }
++}
++
++static void rtcan_peak_pci_del_chan(struct rtcan_device *dev,
++				    int init_step)
++{
++    struct rtcan_peak_pci *board;
++    u16 pita_icr_high;
++
++    if (!dev)
++	return;
++
++    board = (struct rtcan_peak_pci *)dev->board_priv;
++
++    switch (init_step) {
++    case 0:			/* Full cleanup */
++	printk("Removing %s %s device %s\n",
++	       peak_pci_board_name, dev->ctrl_name, dev->name);
++	rtcan_sja1000_unregister(dev);
++    case 5:
++	pita_icr_high = readw(board->conf_addr + PITA_ICR + 2);
++	if (board->channel == CHANNEL_SLAVE) {
++	    pita_icr_high &= ~0x0001;
++	} else {
++	    pita_icr_high &= ~0x0002;
++	}
++	writew(pita_icr_high, board->conf_addr + PITA_ICR + 2);
++    case 4:
++	iounmap((void *)board->base_addr);
++    case 3:
++	if (board->channel != CHANNEL_SLAVE)
++	    iounmap((void *)board->conf_addr);
++    case 2:
++	rtcan_dev_free(dev);
++    case 1:
++	break;
++    }
++
++}
++
++static int rtcan_peak_pci_add_chan(struct pci_dev *pdev, int channel,
++				   struct rtcan_device **master_dev)
++{
++    struct rtcan_device *dev;
++    struct rtcan_sja1000 *chip;
++    struct rtcan_peak_pci *board;
++    u16 pita_icr_high;
++    unsigned long addr;
++    int ret, init_step = 1;
++
++    dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++			  sizeof(struct rtcan_peak_pci));
++    if (dev == NULL)
++	return -ENOMEM;
++    init_step = 2;
++
++    chip = (struct rtcan_sja1000 *)dev->priv;
++    board = (struct rtcan_peak_pci *)dev->board_priv;
++
++    board->pci_dev = pdev;
++    board->channel = channel;
++
++    if (channel != CHANNEL_SLAVE) {
++
++	addr = pci_resource_start(pdev, 0);
++	board->conf_addr = ioremap(addr, PCI_CONFIG_PORT_SIZE);
++	if (board->conf_addr == 0) {
++	    ret = -ENODEV;
++	    goto failure;
++	}
++	init_step = 3;
++
++	/* Set GPIO control register */
++	writew(0x0005, board->conf_addr + PITA_GPIOICR + 2);
++
++	if (channel == CHANNEL_MASTER)
++	    writeb(0x00, board->conf_addr + PITA_GPIOICR); /* enable both */
++	else
++	    writeb(0x04, board->conf_addr + PITA_GPIOICR); /* enable single */
++
++	writeb(0x05, board->conf_addr + PITA_MISC + 3);  /* toggle reset */
++	mdelay(5);
++	writeb(0x04, board->conf_addr + PITA_MISC + 3);  /* leave parport mux mode */
++    } else {
++	struct rtcan_peak_pci *master_board =
++	    (struct rtcan_peak_pci *)(*master_dev)->board_priv;
++	master_board->slave_dev = dev;
++	board->conf_addr = master_board->conf_addr;
++    }
++
++    addr = pci_resource_start(pdev, 1);
++    if (channel == CHANNEL_SLAVE)
++	addr += 0x400;
++
++    board->base_addr = ioremap(addr, PCI_PORT_SIZE);
++    if (board->base_addr == 0) {
++	ret = -ENODEV;
++	goto failure;
++    }
++    init_step = 4;
++
++    dev->board_name = peak_pci_board_name;
++
++    chip->read_reg = rtcan_peak_pci_read_reg;
++    chip->write_reg = rtcan_peak_pci_write_reg;
++    chip->irq_ack = rtcan_peak_pci_irq_ack;
++
++    /* Clock frequency in Hz */
++    dev->can_sys_clock = PEAK_PCI_CAN_SYS_CLOCK;
++
++    /* Output control register */
++    chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL;
++
++    /* Clock divider register */
++    if (channel == CHANNEL_MASTER)
++	chip->cdr = PELICAN_MASTER;
++    else
++	chip->cdr = PELICAN_SINGLE;
++
++    strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++    /* Register and setup interrupt handling */
++    chip->irq_flags = RTDM_IRQTYPE_SHARED;
++    chip->irq_num = pdev->irq;
++    pita_icr_high = readw(board->conf_addr + PITA_ICR + 2);
++    if (channel == CHANNEL_SLAVE) {
++	pita_icr_high |= 0x0001;
++    } else {
++	pita_icr_high |= 0x0002;
++    }
++    writew(pita_icr_high, board->conf_addr + PITA_ICR + 2);
++    init_step = 5;
++
++    printk("%s: base_addr=%p conf_addr=%p irq=%d\n", RTCAN_DRV_NAME,
++	   board->base_addr, board->conf_addr, chip->irq_num);
++
++    /* Register SJA1000 device */
++    ret = rtcan_sja1000_register(dev);
++    if (ret) {
++	printk(KERN_ERR
++	       "ERROR %d while trying to register SJA1000 device!\n", ret);
++	goto failure;
++    }
++
++    if (channel != CHANNEL_SLAVE)
++	*master_dev = dev;
++
++    return 0;
++
++ failure:
++    rtcan_peak_pci_del_chan(dev, init_step);
++    return ret;
++}
++
++static int peak_pci_init_one(struct pci_dev *pdev,
++			     const struct pci_device_id *ent)
++{
++    int ret;
++    u16 sub_sys_id;
++    struct rtcan_device *master_dev = NULL;
++
++    if (!rtdm_available())
++	return -ENODEV;
++
++    printk("%s: initializing device %04x:%04x\n",
++	   RTCAN_DRV_NAME,  pdev->vendor, pdev->device);
++
++    if ((ret = pci_enable_device (pdev)))
++	goto failure;
++
++    if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME)))
++	goto failure;
++
++    if ((ret = pci_read_config_word(pdev, 0x2e, &sub_sys_id)))
++	goto failure_cleanup;
++
++    /* Enable memory space */
++    if ((ret = pci_write_config_word(pdev, 0x04, 2)))
++	goto failure_cleanup;
++
++    if ((ret = pci_write_config_word(pdev, 0x44, 0)))
++	goto failure_cleanup;
++
++    if (sub_sys_id > 3) {
++	if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_MASTER,
++					   &master_dev)))
++	    goto failure_cleanup;
++	if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_SLAVE,
++					   &master_dev)))
++	    goto failure_cleanup;
++    } else {
++	if ((ret = rtcan_peak_pci_add_chan(pdev, CHANNEL_SINGLE,
++					   &master_dev)))
++	    goto failure_cleanup;
++    }
++
++    pci_set_drvdata(pdev, master_dev);
++    return 0;
++
++ failure_cleanup:
++    if (master_dev)
++	rtcan_peak_pci_del_chan(master_dev, 0);
++
++    pci_release_regions(pdev);
++
++ failure:
++    return ret;
++
++}
++
++static void peak_pci_remove_one(struct pci_dev *pdev)
++{
++    struct rtcan_device *dev = pci_get_drvdata(pdev);
++    struct rtcan_peak_pci *board = (struct rtcan_peak_pci *)dev->board_priv;
++
++    if (board->slave_dev)
++	rtcan_peak_pci_del_chan(board->slave_dev, 0);
++    rtcan_peak_pci_del_chan(dev, 0);
++
++    pci_release_regions(pdev);
++    pci_disable_device(pdev);
++    pci_set_drvdata(pdev, NULL);
++}
++
++static struct pci_driver rtcan_peak_pci_driver = {
++	.name		= RTCAN_DRV_NAME,
++	.id_table	= peak_pci_tbl,
++	.probe		= peak_pci_init_one,
++	.remove		= peak_pci_remove_one,
++};
++
++module_pci_driver(rtcan_peak_pci_driver);
+--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000_regs.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000_regs.h	2021-04-07 16:01:26.351635418 +0800
+@@ -0,0 +1,206 @@
++/*
++ * Copyright (C) 2005,2006 Sebastian Smolorz
++ *                        <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Based on drivers/can/sja1000.h in linux-can.patch, a CAN socket
++ * framework for Linux:
++ *
++ * Copyright (C) 2005, Sascha Hauer, Pengutronix
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __SJA1000_REGS_H_
++#define __SJA1000_REGS_H_
++
++
++/* PeliCAN mode address map */
++
++/* reset and operating mode */
++#define SJA_MOD          0       /* Mode register                   */
++#define SJA_CMR          1       /* Command register                */
++#define SJA_SR           2       /* Status register                 */
++#define SJA_IR           3       /* Interrupt register              */
++#define SJA_IER          4       /* Interrupt enable register       */
++#define SJA_BTR0         6       /* Bus timing register 0           */
++#define SJA_BTR1         7       /* Bus timing register 1           */
++#define SJA_OCR          8       /* Output control register         */
++#define SJA_ALC         11       /* Arbitration lost capture        */
++#define SJA_ECC         12       /* Error code capture register     */
++#define SJA_RXERR       14       /* Receive error counter           */
++#define SJA_TXERR       15       /* Transmit error counter          */
++#define SJA_CDR         31       /* Clock divider register          */
++
++/* reset mode */
++#define SJA_ACR0        16       /* Acceptance code register 0      */
++#define SJA_ACR1        17       /* Acceptance code register 1      */
++#define SJA_ACR2        18       /* Acceptance code register 2      */
++#define SJA_ACR3        19       /* Acceptance code register 3      */
++#define SJA_AMR0        20       /* Acceptance mask register 0      */
++#define SJA_AMR1        21       /* Acceptance mask register 1      */
++#define SJA_AMR2        22       /* Acceptance mask register 2      */
++#define SJA_AMR3        23       /* Acceptance mask register 3      */
++
++/* operating mode */
++#define SJA_FIR         16       /* Frame information register      */
++#define SJA_ID1         17       /* Identifier 1                    */
++#define SJA_ID2         18       /* Identifier 2                    */
++#define SJA_ID3         19       /* Identifier 3 (EFF only)         */
++#define SJA_ID4         20       /* Identifier 4 (EFF only)         */
++
++#define SJA_DATA_SFF(x) (19 + (x)) /* Data registers in case of standard
++				    * frame format; 0 <= x <= 7 */
++#define SJA_DATA_EFF(x) (21 + (x)) /* Data registers in case of extended
++				    * frame format; 0 <= x <= 7 */
++
++/* Mode register */
++enum SJA1000_PELI_MOD {
++    SJA_MOD_RM           = 1,    /* Reset Mode                          */
++    SJA_MOD_LOM          = 1<<1, /* Listen Only Mode                    */
++    SJA_MOD_STM          = 1<<2, /* Self Test Mode                      */
++    SJA_MOD_AFM          = 1<<3, /* Acceptance Filter Mode              */
++    SJA_MOD_SM           = 1<<4  /* Sleep Mode                          */
++};
++
++/* Command register */
++enum SJA1000_PELI_CMR {
++    SJA_CMR_TR  = 1,             /* Transmission request                */
++    SJA_CMR_AT  = 1<<1,          /* Abort Transmission                  */
++    SJA_CMR_RRB = 1<<2,          /* Release Receive Buffer              */
++    SJA_CMR_CDO = 1<<3,          /* Clear Data Overrun                  */
++    SJA_CMR_SRR = 1<<4           /* Self reception request              */
++};
++
++/* Status register */
++enum SJA1000_PELI_SR {
++    SJA_SR_RBS           = 1,    /* Receive Buffer Status               */
++    SJA_SR_DOS           = 1<<1, /* Data Overrun Status                 */
++    SJA_SR_TBS           = 1<<2, /* Transmit Buffer Status              */
++    SJA_SR_ES            = 1<<6, /* Error Status                        */
++    SJA_SR_BS            = 1<<7  /* Bus Status                          */
++};
++
++/* Interrupt register */
++enum SJA1000_PELI_IR {
++    SJA_IR_RI           = 1,     /* Receive Interrupt                   */
++    SJA_IR_TI           = 1<<1,  /* Transmit Interrupt                  */
++    SJA_IR_EI           = 1<<2,  /* Error Warning Interrupt             */
++    SJA_IR_DOI          = 1<<3,  /* Data Overrun Interrupt              */
++    SJA_IR_WUI          = 1<<4,  /* Wake-Up Interrupt                   */
++    SJA_IR_EPI          = 1<<5,  /* Error Passive Interrupt             */
++    SJA_IR_ALI          = 1<<6,  /* Arbitration Lost Interrupt          */
++    SJA_IR_BEI          = 1<<7,  /* Bus Error Interrupt                 */
++};
++
++/* Interrupt enable register */
++enum SJA1000_PELI_IER {
++    SJA_IER_RIE         = 1,     /* Receive Interrupt Enable            */
++    SJA_IER_TIE         = 1<<1,  /* Transmit Interrupt Enable           */
++    SJA_IER_EIE         = 1<<2,  /* Error Warning Interrupt Enable      */
++    SJA_IER_DOIE        = 1<<3,  /* Data Overrun Interrupt Enable       */
++    SJA_IER_WUIE        = 1<<4,  /* Wake-Up Interrupt Enable            */
++    SJA_IER_EPIE        = 1<<5,  /* Error Passive Interrupt Enable      */
++    SJA_IER_ALIE        = 1<<6,  /* Arbitration Lost Interrupt Enable   */
++    SJA_IER_BEIE        = 1<<7,  /* Bus Error Interrupt Enable          */
++};
++
++/* Bus timing register 0 */
++enum SJA1000_PELI_BTR0 {
++    /* Period of the CAN system clock t_SCl
++     * (t_CLK = time period of XTAL frequency) */
++    SJA_BTR0_T_SCL_2_T_CLK  = 0,    /* t_SCl = 2 x t_CLK                 */
++    SJA_BTR0_T_SCL_4_T_CLK  = 1,    /* t_SCl = 4 x t_CLK                 */
++    SJA_BTR0_T_SCL_6_T_CLK  = 2,    /* t_SCl = 6 x t_CLK                 */
++    SJA_BTR0_T_SCL_8_T_CLK  = 3,    /* t_SCl = 8 x t_CLK                 */
++    SJA_BTR0_T_SCL_10_T_CLK = 4,    /* t_SCl = 10 x t_CLK                */
++    SJA_BTR0_T_SCL_12_T_CLK = 5,    /* t_SCl = 12 x t_CLK                */
++    SJA_BTR0_T_SCL_14_T_CLK = 6,    /* t_SCl = 14 x t_CLK                */
++    SJA_BTR0_T_SCL_16_T_CLK = 7,    /* t_SCl = 16 x t_CLK                */
++    SJA_BTR0_T_SCL_20_T_CLK = 9,    /* t_SCl = 20 x t_CLK                */
++    SJA_BTR0_T_SCL_40_T_CLK = 19,   /* t_SCl = 40 x t_CLK                */
++    SJA_BTR0_T_SCL_100_T_CLK = 49,  /* t_SCl = 100 x t_CLK               */
++
++};
++
++/* Bus timing register 1 */
++enum SJA1000_PELI_BTR1 {
++    /* Time segment 1 */
++    SJA_BTR1_T_SEG1_1_T_SCL = 0,    /* t_SEG1 = 1 x t_SCl               */
++    SJA_BTR1_T_SEG1_2_T_SCL = 1,    /* t_SEG1 = 2 x t_SCl               */
++    SJA_BTR1_T_SEG1_3_T_SCL = 2,    /* t_SEG1 = 3 x t_SCl               */
++    SJA_BTR1_T_SEG1_4_T_SCL = 3,    /* t_SEG1 = 4 x t_SCl               */
++    SJA_BTR1_T_SEG1_5_T_SCL = 4,    /* t_SEG1 = 5 x t_SCl               */
++    SJA_BTR1_T_SEG1_6_T_SCL = 5,    /* t_SEG1 = 6 x t_SCl               */
++    SJA_BTR1_T_SEG1_7_T_SCL = 6,    /* t_SEG1 = 7 x t_SCl               */
++    SJA_BTR1_T_SEG1_8_T_SCL = 7,    /* t_SEG1 = 8 x t_SCl               */
++    /* Time segment 2 */
++    SJA_BTR1_T_SEG2_1_T_SCL = 0<<4, /* t_SEG2 = 1 x t_SCl               */
++    SJA_BTR1_T_SEG2_2_T_SCL = 1<<4, /* t_SEG2 = 2 x t_SCl               */
++    SJA_BTR1_T_SEG2_3_T_SCL = 2<<4, /* t_SEG2 = 3 x t_SCl               */
++    SJA_BTR1_T_SEG2_4_T_SCL = 3<<4, /* t_SEG2 = 4 x t_SCl               */
++    SJA_BTR1_T_SEG2_5_T_SCL = 4<<4, /* t_SEG2 = 5 x t_SCl               */
++    SJA_BTR1_T_SEG2_6_T_SCL = 5<<4, /* t_SEG2 = 6 x t_SCl               */
++    SJA_BTR1_T_SEG2_7_T_SCL = 6<<4, /* t_SEG2 = 7 x t_SCl               */
++    SJA_BTR1_T_SEG2_8_T_SCL = 7<<4, /* t_SEG2 = 8 x t_SCl               */
++};
++
++/* One bit time = t_SCl + t_SEG1 + t_SEG2 */
++
++
++/* Output control register */
++enum SJA1000_PELI_OCR {
++    SJA_OCR_MODE_BIPHASE = 0,
++    SJA_OCR_MODE_TEST    = 1,
++    SJA_OCR_MODE_NORMAL  = 2,
++    SJA_OCR_MODE_CLOCK   = 3,
++    SJA_OCR_TX0_INVERT   = 1<<2,
++    SJA_OCR_TX0_PULLDOWN = 1<<3,
++    SJA_OCR_TX0_PULLUP   = 2<<3,
++    SJA_OCR_TX0_PUSHPULL = 3<<3,
++    SJA_OCR_TX1_INVERT   = 1<<5,
++    SJA_OCR_TX1_PULLDOWN = 1<<6,
++    SJA_OCR_TX1_PULLUP   = 2<<6,
++    SJA_OCR_TX1_PUSHPULL = 3<<6
++};
++
++/* Error code capture register */
++enum SJA1000_PELI_ECC {
++    /* The segmentation field gives information about the location of
++     * errors on the bus */
++    SJA_ECC_SEG_MASK     = 31,   /* Segmentation field mask             */
++    SJA_ECC_DIR          = 1<<5, /* Transfer direction                  */
++    SJA_ECC_ERR_BIT      = 0<<6,
++    SJA_ECC_ERR_FORM     = 1<<6,
++    SJA_ECC_ERR_STUFF    = 2<<6,
++    SJA_ECC_ERR_MASK     = 3<<6  /* Error code mask                     */
++};
++
++/* Frame information register */
++enum SJA1000_PELI_FIR {
++    SJA_FIR_DLC_MASK     = 15,   /* Data length code mask               */
++    SJA_FIR_RTR          = 1<<6, /* Remote transmission request         */
++    SJA_FIR_EFF          = 1<<7  /* Extended frame format               */
++};
++
++/* Clock divider register */
++enum SJA1000_PELI_CDR {
++    SJA_CDR_CLKOUT_MASK  = 0x07,
++    SJA_CDR_CLK_OFF      = 1<<3, /* Clock off (CLKOUT pin)              */
++    SJA_CDR_CBP          = 1<<6, /* CAN input comparator bypass         */
++    SJA_CDR_CAN_MODE     = 1<<7  /* CAN mode: 1 = PeliCAN               */
++};
++
++#endif  /* __SJA1000_REGS_H_ */
+--- linux/drivers/xenomai/can/sja1000/rtcan_adv_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_adv_pci.c	2021-04-07 16:01:26.346635425 +0800
+@@ -0,0 +1,361 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Copyright (C) 2012 Thierry Bultel <thierry.bultel@basystemes.fr>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <linux/io.h>
++
++#include <rtdm/driver.h>
++
++#define ADV_PCI_BASE_SIZE	0x80
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME "rtcan%d"
++#define RTCAN_DRV_NAME "ADV-PCI-CAN"
++
++static char *adv_pci_board_name = "ADV-PCI";
++
++MODULE_AUTHOR("Thierry Bultel <thierry.bultel@basystemes.fr>");
++MODULE_DESCRIPTION("RTCAN board driver for Advantech PCI cards");
++MODULE_SUPPORTED_DEVICE("ADV-PCI card CAN controller");
++MODULE_LICENSE("GPL");
++
++struct rtcan_adv_pci {
++	struct pci_dev *pci_dev;
++	struct rtcan_device *slave_dev;
++	void __iomem *conf_addr;
++	void __iomem *base_addr;
++};
++
++/*
++ * According to the datasheet,
++ * internal clock is 1/2 of the external oscillator frequency
++ * which is 16 MHz
++ */
++#define ADV_PCI_CAN_CLOCK (16000000 / 2)
++
++/*
++ * Output control register
++  Depends on the board configuration
++ */
++
++#define ADV_PCI_OCR (SJA_OCR_MODE_NORMAL	|\
++		     SJA_OCR_TX0_PUSHPULL	|\
++		     SJA_OCR_TX1_PUSHPULL	|\
++		     SJA_OCR_TX1_INVERT)
++
++/*
++ * In the CDR register, you should set CBP to 1.
++ */
++#define ADV_PCI_CDR (SJA_CDR_CBP | SJA_CDR_CAN_MODE)
++
++#define ADV_PCI_VENDOR_ID 0x13fe
++
++#define CHANNEL_SINGLE 0 /* this is a single channel device */
++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */
++#define CHANNEL_SLAVE  2 /* multi channel device, this is slave */
++
++#define ADV_PCI_DEVICE(device_id)\
++	{ ADV_PCI_VENDOR_ID, device_id, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }
++
++static const struct pci_device_id adv_pci_tbl[] = {
++	ADV_PCI_DEVICE(0x1680),
++	ADV_PCI_DEVICE(0x3680),
++	ADV_PCI_DEVICE(0x2052),
++	ADV_PCI_DEVICE(0x1681),
++	ADV_PCI_DEVICE(0xc001),
++	ADV_PCI_DEVICE(0xc002),
++	ADV_PCI_DEVICE(0xc004),
++	ADV_PCI_DEVICE(0xc101),
++	ADV_PCI_DEVICE(0xc102),
++	ADV_PCI_DEVICE(0xc104),
++	/* required last entry */
++	{ }
++};
++
++MODULE_DEVICE_TABLE(pci, adv_pci_tbl);
++
++static u8 rtcan_adv_pci_read_reg(struct rtcan_device *dev, int port)
++{
++	struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv;
++
++	return ioread8(board->base_addr + port);
++}
++
++static void rtcan_adv_pci_write_reg(struct rtcan_device *dev, int port, u8 data)
++{
++	struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv;
++
++	iowrite8(data, board->base_addr + port);
++}
++
++static void rtcan_adv_pci_del_chan(struct pci_dev *pdev,
++				   struct rtcan_device *dev)
++{
++	struct rtcan_adv_pci *board;
++
++	if (!dev)
++		return;
++
++	board = (struct rtcan_adv_pci *)dev->board_priv;
++
++	rtcan_sja1000_unregister(dev);
++
++	pci_iounmap(pdev, board->base_addr);
++
++	rtcan_dev_free(dev);
++}
++
++
++static int rtcan_adv_pci_add_chan(struct pci_dev *pdev,
++				  int channel,
++				  unsigned int bar,
++				  unsigned int offset,
++				  struct rtcan_device **master_dev)
++{
++	struct rtcan_device *dev;
++	struct rtcan_sja1000 *chip;
++	struct rtcan_adv_pci *board;
++	void __iomem *base_addr;
++	int ret;
++
++	dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++			      sizeof(struct rtcan_adv_pci));
++	if (dev == NULL)
++		return -ENOMEM;
++
++	chip = (struct rtcan_sja1000 *)dev->priv;
++	board = (struct rtcan_adv_pci *)dev->board_priv;
++
++	if (channel == CHANNEL_SLAVE) {
++		struct rtcan_adv_pci *master_board =
++			(struct rtcan_adv_pci *)(*master_dev)->board_priv;
++		master_board->slave_dev = dev;
++
++		if (offset) {
++			base_addr = master_board->base_addr+offset;
++		} else {
++			base_addr = pci_iomap(pdev, bar, ADV_PCI_BASE_SIZE);
++			if (!base_addr) {
++				ret = -EIO;
++				goto failure;
++			}
++		}
++	} else {
++		base_addr = pci_iomap(pdev, bar, ADV_PCI_BASE_SIZE) + offset;
++		if (!base_addr) {
++			ret = -EIO;
++			goto failure;
++		}
++	}
++
++	board->pci_dev = pdev;
++	board->conf_addr = NULL;
++	board->base_addr = base_addr;
++
++	dev->board_name = adv_pci_board_name;
++
++	chip->read_reg = rtcan_adv_pci_read_reg;
++	chip->write_reg = rtcan_adv_pci_write_reg;
++
++	/* Clock frequency in Hz */
++	dev->can_sys_clock = ADV_PCI_CAN_CLOCK;
++
++	/* Output control register */
++	chip->ocr = ADV_PCI_OCR;
++
++	/* Clock divider register */
++	chip->cdr = ADV_PCI_CDR;
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	/* Make sure SJA1000 is in reset mode */
++	chip->write_reg(dev, SJA_MOD, SJA_MOD_RM);
++	/* Set PeliCAN mode */
++	chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE);
++
++	/* check if mode is set */
++	ret = chip->read_reg(dev, SJA_CDR);
++	if (ret != SJA_CDR_CAN_MODE) {
++		ret = -EIO;
++		goto failure_iounmap;
++	}
++
++	/* Register and setup interrupt handling */
++	chip->irq_flags = RTDM_IRQTYPE_SHARED;
++	chip->irq_num = pdev->irq;
++
++	RTCAN_DBG("%s: base_addr=%p conf_addr=%p irq=%d ocr=%#x cdr=%#x\n",
++		   RTCAN_DRV_NAME, board->base_addr, board->conf_addr,
++		   chip->irq_num, chip->ocr, chip->cdr);
++
++	/* Register SJA1000 device */
++	ret = rtcan_sja1000_register(dev);
++	if (ret) {
++		printk(KERN_ERR "ERROR %d while trying to register SJA1000 device!\n",
++		       ret);
++		goto failure_iounmap;
++	}
++
++	if (channel != CHANNEL_SLAVE)
++		*master_dev = dev;
++
++	return 0;
++
++failure_iounmap:
++	if (channel != CHANNEL_SLAVE || !offset)
++		pci_iounmap(pdev, base_addr);
++failure:
++	rtcan_dev_free(dev);
++
++	return ret;
++}
++
++static int adv_pci_init_one(struct pci_dev *pdev,
++			    const struct pci_device_id *ent)
++{
++	int ret, channel;
++	unsigned int nb_ports = 0;
++	unsigned int bar = 0;
++	unsigned int bar_flag = 0;
++	unsigned int offset = 0;
++	unsigned int ix;
++
++	struct rtcan_device *master_dev = NULL;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	dev_info(&pdev->dev, "RTCAN Registering card");
++
++	ret = pci_enable_device(pdev);
++	if (ret)
++		goto failure;
++
++	dev_info(&pdev->dev, "RTCAN detected Advantech PCI card at slot #%i\n",
++		 PCI_SLOT(pdev->devfn));
++
++	ret = pci_request_regions(pdev, RTCAN_DRV_NAME);
++	if (ret)
++		goto failure_device;
++
++	switch (pdev->device) {
++	case 0xc001:
++	case 0xc002:
++	case 0xc004:
++	case 0xc101:
++	case 0xc102:
++	case 0xc104:
++		nb_ports = pdev->device & 0x7;
++		offset = 0x100;
++		bar = 0;
++		break;
++	case 0x1680:
++	case 0x2052:
++		nb_ports = 2;
++		bar = 2;
++		bar_flag = 1;
++		break;
++	case 0x1681:
++		nb_ports = 1;
++		bar = 2;
++		bar_flag = 1;
++		break;
++	default:
++		goto failure_regions;
++	}
++
++	if (nb_ports > 1)
++		channel = CHANNEL_MASTER;
++	else
++		channel = CHANNEL_SINGLE;
++
++	RTCAN_DBG("%s: Initializing device %04x:%04x:%04x\n",
++		   RTCAN_DRV_NAME,
++		   pdev->vendor,
++		   pdev->device,
++		   pdev->subsystem_device);
++
++	ret = rtcan_adv_pci_add_chan(pdev, channel, bar, offset, &master_dev);
++	if (ret)
++		goto failure_iounmap;
++
++	/* register slave channel, if any */
++
++	for (ix = 1; ix < nb_ports; ix++) {
++		ret = rtcan_adv_pci_add_chan(pdev,
++					     CHANNEL_SLAVE,
++					     bar + (bar_flag ? ix : 0),
++					     offset * ix,
++					     &master_dev);
++		if (ret)
++			goto failure_iounmap;
++	}
++
++	pci_set_drvdata(pdev, master_dev);
++
++	return 0;
++
++failure_iounmap:
++	if (master_dev)
++		rtcan_adv_pci_del_chan(pdev, master_dev);
++
++failure_regions:
++	pci_release_regions(pdev);
++
++failure_device:
++	pci_disable_device(pdev);
++
++failure:
++	return ret;
++}
++
++static void adv_pci_remove_one(struct pci_dev *pdev)
++{
++	struct rtcan_device *dev = pci_get_drvdata(pdev);
++	struct rtcan_adv_pci *board = (struct rtcan_adv_pci *)dev->board_priv;
++
++	if (board->slave_dev)
++		rtcan_adv_pci_del_chan(pdev, board->slave_dev);
++
++	rtcan_adv_pci_del_chan(pdev, dev);
++
++	pci_release_regions(pdev);
++	pci_disable_device(pdev);
++	pci_set_drvdata(pdev, NULL);
++}
++
++static struct pci_driver rtcan_adv_pci_driver = {
++	.name = RTCAN_DRV_NAME,
++	.id_table = adv_pci_tbl,
++	.probe = adv_pci_init_one,
++	.remove = adv_pci_remove_one,
++};
++
++module_pci_driver(rtcan_adv_pci_driver);
+--- linux/drivers/xenomai/can/sja1000/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/Makefile	2021-04-07 16:01:26.341635432 +0800
+@@ -0,0 +1,24 @@
++ccflags-y += -Idrivers/xenomai/can -Idrivers/xenomai/can/sja1000
++
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000) += xeno_can_sja1000.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PEAK_PCI) += xeno_can_peak_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PEAK_DNG) += xeno_can_peak_dng.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_PLX_PCI) += xeno_can_plx_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_IXXAT_PCI) += xeno_can_ixxat_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ADV_PCI) += xeno_can_adv_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_EMS_PCI) += xeno_can_ems_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ESD_PCI) += xeno_can_esd_pci.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_ISA) += xeno_can_isa.o
++obj-$(CONFIG_XENO_DRIVERS_CAN_SJA1000_MEM) += xeno_can_mem.o
++
++xeno_can_sja1000-y := rtcan_sja1000.o
++xeno_can_sja1000-$(CONFIG_FS_PROCFS) += rtcan_sja1000_proc.o
++xeno_can_peak_pci-y := rtcan_peak_pci.o
++xeno_can_peak_dng-y := rtcan_peak_dng.o
++xeno_can_plx_pci-y := rtcan_plx_pci.o
++xeno_can_ixxat_pci-y := rtcan_ixxat_pci.o
++xeno_can_adv_pci-y := rtcan_adv_pci.o
++xeno_can_ems_pci-y := rtcan_ems_pci.o
++xeno_can_esd_pci-y := rtcan_esd_pci.o
++xeno_can_isa-y := rtcan_isa.o
++xeno_can_mem-y := rtcan_mem.o
+--- linux/drivers/xenomai/can/sja1000/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/Kconfig	2021-04-07 16:01:26.337635438 +0800
+@@ -0,0 +1,100 @@
++config XENO_DRIVERS_CAN_SJA1000
++	depends on XENO_DRIVERS_CAN
++	tristate "Philips SJA1000 CAN controller"
++	select XENO_DRIVERS_CAN_BUS_ERR
++
++config XENO_DRIVERS_CAN_SJA1000_ISA
++	depends on XENO_DRIVERS_CAN_SJA1000
++	tristate "Standard ISA controllers"
++	help
++
++	This driver is for CAN devices connected to the ISA bus of a PC
++	or a PC/104 system. The I/O port, interrupt number and a few other
++	hardware specific parameters can be defined via module parameters.
++
++config XENO_DRIVERS_CAN_SJA1000_MEM
++	depends on XENO_DRIVERS_CAN_SJA1000
++	tristate "Memory mapped controllers"
++	help
++
++	This driver is for memory mapped CAN devices. The memory address,
++	interrupt number and a few other hardware specific parameters can
++	be defined via module parameters.
++
++config XENO_DRIVERS_CAN_SJA1000_PEAK_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "PEAK PCI Card"
++	help
++
++	This driver is for the PCAN PCI, the PC-PCI CAN plug-in card (1 or
++	2 channel) from PEAK Systems (http://www.peak-system.com). To get
++	the second channel working, Xenomai's shared interrupt support
++	must be enabled.
++
++config XENO_DRIVERS_CAN_SJA1000_IXXAT_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "IXXAT PCI Card"
++	help
++
++	This driver is for the IXXAT PC-I 04/PCI card (1 or 2 channel)
++	from the IXXAT Automation GmbH (http://www.ixxat.de). To get
++	the second channel working, Xenomai's shared interrupt support
++	must be enabled.
++
++config XENO_DRIVERS_CAN_SJA1000_ADV_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "ADVANTECH PCI Cards"
++	help
++
++	This driver is for the ADVANTECH PCI cards (1 or more channels)
++	It supports the 1680U and some other ones.
++
++
++config XENO_DRIVERS_CAN_SJA1000_PLX_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "PLX90xx PCI-bridge based Cards"
++	help
++
++	This driver is for CAN interface cards based on
++	the PLX90xx PCI bridge.
++	Driver supports now:
++	 - Adlink PCI-7841/cPCI-7841 card (http://www.adlinktech.com/)
++	 - Adlink PCI-7841/cPCI-7841 SE card
++	 - esd CAN-PCI/CPCI/PCI104/200 (http://www.esd.eu/)
++	 - esd CAN-PCI/PMC/266
++	 - esd CAN-PCIe/2000
++	 - Marathon CAN-bus-PCI card (http://www.marathon.ru/)
++	 - TEWS TECHNOLOGIES TPMC810 card (http://www.tews.com/)
++
++config XENO_DRIVERS_CAN_SJA1000_EMS_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "EMS CPC PCI Card"
++	help
++
++	This driver is for the 2 channel CPC PCI card from EMS Dr. Thomas
++	Wünsche (http://www.ems-wuensche.de). To get the second channel
++	working, Xenomai's shared interrupt support must be enabled.
++
++config XENO_DRIVERS_CAN_SJA1000_ESD_PCI
++	depends on XENO_DRIVERS_CAN_SJA1000 && PCI
++	tristate "ESD PCI Cards (DEPRECATED)"
++	help
++
++	This driver supports the esd PCI CAN cards CAN-PCI/200,
++	CAN-PCI/266, CAN-PMC/266 (PMC), CAN-CPCI/200 (CompactPCI),
++	CAN-PCIe2000 (PCI Express) and CAN-PCI104/200 (PCI104)
++	from the esd electronic system design gmbh (http://www.esd.eu).
++
++	This driver is deprecated. It's functionality is now provided by
++	"PLX90xx PCI-bridge based Cards" driver.
++
++config XENO_DRIVERS_CAN_SJA1000_PEAK_DNG
++	depends on XENO_DRIVERS_CAN_SJA1000 && !PARPORT
++	tristate "PEAK Parallel Port Dongle"
++	help
++
++	This driver is for the PCAN Dongle, the PC parallel port to CAN
++	converter from PEAK Systems (http://www.peak-system.com). You need
++	to disable parallel port support in the kernel (CONFIG_PARPORT) for
++	proper operation. The interface type (sp or epp), I/O port and
++	interrupt number should be defined via module parameters.
+--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000.c	2021-04-07 16:01:26.332635445 +0800
+@@ -0,0 +1,842 @@
++/*
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                          <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * Parts of this software are based on the following:
++ *
++ * - RTAI CAN device driver for SJA1000 controllers by Jan Kiszka
++ *
++ * - linux-can.patch, a CAN socket framework for Linux,
++ *   Copyright (C) 2004, 2005, Robert Schwebel, Benedikt Spranger,
++ *   Marc Kleine-Budde, Sascha Hauer, Pengutronix
++ *
++ * - RTnet (www.rtnet.org)
++ *
++ * - serial device driver and profile included in Xenomai (RTDM),
++ *   Copyright (C) 2005 Jan Kiszka <jan.kiszka@web.de>.
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++
++#include <rtdm/driver.h>
++#include <rtdm/can.h>
++
++#include <rtcan_socket.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_list.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++
++#define BTR0_BRP_MASK	0x3f
++#define BTR0_SJW_SHIFT	6
++#define BTR0_SJW_MASK	(0x3 << BTR0_SJW_SHIFT)
++
++#define BTR1_TSEG1_MASK  0xf
++#define BTR1_TSEG2_SHIFT 4
++#define BTR1_TSEG2_MASK  (0x7 << BTR1_TSEG2_SHIFT)
++#define BTR1_SAM_SHIFT   7
++
++#define BTR0_SET_BRP(brp)     (((brp) - 1) & BTR0_BRP_MASK)
++#define BTR0_SET_SJW(sjw)     ((((sjw) - 1) << BTR0_SJW_SHIFT) & BTR0_SJW_MASK)
++
++#define BTR1_SET_TSEG1(tseg1) (((tseg1) - 1) & BTR1_TSEG1_MASK)
++#define BTR1_SET_TSEG2(tseg2) ((((tseg2) - 1) << BTR1_TSEG2_SHIFT) & BTR1_TSEG2_MASK)
++#define BTR1_SET_SAM(sam)     (((sam) & 1) << BTR1_SAM_SHIFT)
++
++/* Value for the interrupt enable register */
++#define SJA1000_IER                 SJA_IER_RIE | SJA_IER_TIE | \
++				    SJA_IER_EIE | SJA_IER_WUIE | \
++				    SJA_IER_EPIE | SJA_IER_BEIE | \
++				    SJA_IER_ALIE | SJA_IER_DOIE
++
++static char *sja_ctrl_name = "SJA1000";
++
++#define STATE_OPERATING(state) \
++    ((state) != CAN_STATE_STOPPED && (state) != CAN_STATE_BUS_OFF)
++
++#define STATE_RESET(state) \
++    ((state) == CAN_STATE_STOPPED || (state) == CAN_STATE_BUS_OFF)
++
++
++MODULE_AUTHOR("Sebastian.Smolorz@stud.uni-hannover.de");
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("RT-Socket-CAN driver for SJA1000");
++MODULE_SUPPORTED_DEVICE("SJA1000 CAN controller");
++
++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++static struct can_bittiming_const sja1000_bittiming_const = {
++	.name = "sja1000",
++	.tseg1_min = 1,
++	.tseg1_max = 16,
++	.tseg2_min = 1,
++	.tseg2_max = 8,
++	.sjw_max = 4,
++	.brp_min = 1,
++	.brp_max = 64,
++	.brp_inc = 1,
++};
++#endif
++
++static inline void rtcan_sja_rx_interrupt(struct rtcan_device *dev,
++					  struct rtcan_skb *skb)
++{
++    int i;
++    /* "Real" size of the payload */
++    u8 size;
++    /* Content of frame information register */
++    u8 fir;
++    /* Ring buffer frame within skb */
++    struct rtcan_rb_frame *frame = &skb->rb_frame;
++    struct rtcan_sja1000 *chip = dev->priv;
++
++    /* Read out frame information register */
++    fir = chip->read_reg(dev, SJA_FIR);
++
++    /* Extract data length code */
++    frame->can_dlc = fir & SJA_FIR_DLC_MASK;
++
++    /* If DLC exceeds 8 bytes adjust it to 8 (for the payload size) */
++    size = (frame->can_dlc > 8) ? 8 : frame->can_dlc;
++
++
++    if (fir & SJA_FIR_EFF) {
++	/* Extended frame */
++	frame->can_id = CAN_EFF_FLAG;
++
++	/* Read ID */
++	frame->can_id |= chip->read_reg(dev, SJA_ID1) << 21;
++	frame->can_id |= chip->read_reg(dev, SJA_ID2) << 13;
++	frame->can_id |= chip->read_reg(dev, SJA_ID3) << 5;
++	frame->can_id |= chip->read_reg(dev, SJA_ID4) >> 3;
++
++	if (!(fir & SJA_FIR_RTR)) {
++	    /* No RTR, read data bytes */
++	    for (i = 0; i < size; i++)
++		frame->data[i] = chip->read_reg(dev,
++						SJA_DATA_EFF(i));
++	}
++
++    } else {
++	/* Standard frame */
++
++	/* Read ID */
++	frame->can_id  = chip->read_reg(dev, SJA_ID1) << 3;
++	frame->can_id |= chip->read_reg(dev, SJA_ID2) >> 5;
++
++	if (!(fir & SJA_FIR_RTR)) {
++	    /* No RTR, read data bytes */
++	    for (i = 0; i < size; i++)
++		frame->data[i] = chip->read_reg(dev, SJA_DATA_SFF(i));
++	}
++    }
++
++    /* Release Receive Buffer */
++    chip->write_reg(dev, SJA_CMR, SJA_CMR_RRB);
++
++
++    /* RTR? */
++    if (fir & SJA_FIR_RTR) {
++	frame->can_id |= CAN_RTR_FLAG;
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE;
++    } else
++	skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + size;
++
++    /* Store the interface index */
++    frame->can_ifindex = dev->ifindex;
++}
++
++
++static inline void rtcan_sja_err_interrupt(struct rtcan_device *dev,
++					   struct rtcan_sja1000 *chip,
++					   struct rtcan_skb *skb,
++					   u8 irq_source)
++{
++    struct rtcan_rb_frame *frame = &skb->rb_frame;
++    can_state_t state = dev->state;
++    u8 status, txerr, rxerr;
++
++    status = chip->read_reg(dev, SJA_SR);
++    txerr = chip->read_reg(dev, SJA_TXERR);
++    rxerr = chip->read_reg(dev, SJA_RXERR);
++
++    skb->rb_frame_size = EMPTY_RB_FRAME_SIZE + CAN_ERR_DLC;
++
++    frame->can_id = CAN_ERR_FLAG;
++    frame->can_dlc = CAN_ERR_DLC;
++
++    memset(&frame->data[0], 0, frame->can_dlc);
++
++    /* Data overrun interrupt? */
++    if (irq_source & SJA_IR_DOI) {
++	frame->can_id |= CAN_ERR_CRTL;
++	frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
++    }
++
++    /* Arbitratio lost interrupt? */
++    if (irq_source & SJA_IR_ALI) {
++	frame->can_id |= CAN_ERR_LOSTARB;
++	frame->data[0] = chip->read_reg(dev, SJA_ALC)  & 0x1f;
++    }
++
++    /* Bus error interrupt? */
++    if (irq_source & SJA_IR_BEI) {
++	u8 ecc = chip->read_reg(dev, SJA_ECC);
++
++	frame->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
++
++	switch (ecc & SJA_ECC_ERR_MASK) {
++	case SJA_ECC_ERR_BIT:
++	    frame->data[2] |= CAN_ERR_PROT_BIT;
++	    break;
++	case SJA_ECC_ERR_FORM:
++	    frame->data[2] |= CAN_ERR_PROT_FORM;
++	    break;
++	case SJA_ECC_ERR_STUFF:
++	    frame->data[2] |= CAN_ERR_PROT_STUFF;
++	    break;
++	default:
++	    frame->data[2] |= CAN_ERR_PROT_UNSPEC;
++	    frame->data[3] = ecc & SJA_ECC_SEG_MASK;
++	    break;
++	}
++	/* Error occured during transmission? */
++	if ((ecc & SJA_ECC_DIR) == 0)
++	    frame->data[2] |= CAN_ERR_PROT_TX;
++    }
++
++    /* Error passive interrupt? */
++    if (unlikely(irq_source & SJA_IR_EPI)) {
++	if (state == CAN_STATE_BUS_WARNING) {
++	    state = CAN_STATE_BUS_PASSIVE;
++	} else {
++	    state = CAN_STATE_BUS_WARNING;
++	}
++    }
++
++    /* Error warning interrupt? */
++    if (irq_source & SJA_IR_EI) {
++
++	/* Test bus status (bus-off condition) */
++	if (status & SJA_SR_BS) {
++	    /* Bus-off */
++	    state = CAN_STATE_BUS_OFF;
++	    frame->can_id |= CAN_ERR_BUSOFF;
++	    /* Only allow error warning interrupts
++	       (otherwise an EPI would arise during bus-off
++	       recovery) */
++	    chip->write_reg(dev, SJA_IER, SJA_IER_EIE);
++	    /* Wake up waiting senders */
++	    rtdm_sem_destroy(&dev->tx_sem);
++	}
++
++	/* Test error status (error warning limit) */
++	else if (status & SJA_SR_ES)
++	    /* error warning limit reached */
++	    state = CAN_STATE_BUS_WARNING;
++
++	/* Re-entrance into error active state from bus-warn? */
++	else if (state == CAN_STATE_BUS_WARNING)
++	    state = CAN_STATE_ACTIVE;
++
++	else
++	    /* Bus-off recovery complete, enable all interrupts again */
++	    chip->write_reg(dev, SJA_IER, SJA1000_IER);
++    }
++
++    if (state != dev->state &&
++	(state == CAN_STATE_BUS_WARNING || state == CAN_STATE_BUS_PASSIVE)) {
++	frame->can_id |= CAN_ERR_PROT;
++	if (txerr > rxerr)
++	    frame->data[1] = CAN_ERR_CRTL_TX_WARNING;
++	else
++	    frame->data[1] = CAN_ERR_CRTL_RX_WARNING;
++    }
++
++    dev->state = state;
++    frame->can_ifindex = dev->ifindex;
++}
++
++static int rtcan_sja_interrupt(rtdm_irq_t *irq_handle)
++{
++    struct rtcan_device *dev;
++    struct rtcan_sja1000 *chip;
++    struct rtcan_skb skb;
++    int recv_lock_free = 1;
++    int irq_count = 0;
++    int ret = RTDM_IRQ_NONE;
++    u8 irq_source;
++
++
++    /* Get the ID of the device which registered this IRQ. */
++    dev = (struct rtcan_device *)rtdm_irq_get_arg(irq_handle, void);
++    chip = (struct rtcan_sja1000 *)dev->priv;
++
++    /* Take spinlock protecting HW register access and device structures. */
++    rtdm_lock_get(&dev->device_lock);
++
++    /* Loop as long as the device reports an event */
++    while ((irq_source = chip->read_reg(dev, SJA_IR))) {
++	ret = RTDM_IRQ_HANDLED;
++	irq_count++;
++
++	/* Now look up which interrupts appeared */
++
++	/* Wake-up interrupt? */
++	if (irq_source & SJA_IR_WUI)
++	    dev->state = dev->state_before_sleep;
++
++	/* Error Interrupt? */
++	if (irq_source & (SJA_IR_EI | SJA_IR_DOI | SJA_IR_EPI |
++			  SJA_IR_ALI | SJA_IR_BEI)) {
++
++	    /* Check error condition and fill error frame */
++	    if (!((irq_source & SJA_IR_BEI) && (chip->bus_err_on-- < 2))) {
++		rtcan_sja_err_interrupt(dev, chip, &skb, irq_source);
++
++		if (recv_lock_free) {
++		    recv_lock_free = 0;
++		    rtdm_lock_get(&rtcan_recv_list_lock);
++		    rtdm_lock_get(&rtcan_socket_lock);
++		}
++		/* Pass error frame out to the sockets */
++		rtcan_rcv(dev, &skb);
++	    }
++	}
++
++	/* Transmit Interrupt? */
++	if (irq_source & SJA_IR_TI) {
++	    /* Wake up a sender */
++	    rtdm_sem_up(&dev->tx_sem);
++
++	    if (rtcan_loopback_pending(dev)) {
++
++		if (recv_lock_free) {
++		    recv_lock_free = 0;
++		    rtdm_lock_get(&rtcan_recv_list_lock);
++		    rtdm_lock_get(&rtcan_socket_lock);
++		}
++
++		rtcan_loopback(dev);
++	    }
++	}
++
++	/* Receive Interrupt? */
++	if (irq_source & SJA_IR_RI) {
++
++	    /* Read out HW registers */
++	    rtcan_sja_rx_interrupt(dev, &skb);
++
++	    /* Take more locks. Ensure that they are taken and
++	     * released only once in the IRQ handler. */
++	    /* WARNING: Nested locks are dangerous! But they are
++	     * nested only in this routine so a deadlock should
++	     * not be possible. */
++	    if (recv_lock_free) {
++		recv_lock_free = 0;
++		rtdm_lock_get(&rtcan_recv_list_lock);
++		rtdm_lock_get(&rtcan_socket_lock);
++	    }
++
++	    /* Pass received frame out to the sockets */
++	    rtcan_rcv(dev, &skb);
++	}
++    }
++
++    if (chip->irq_ack)
++	chip->irq_ack(dev);
++
++    /* Release spinlocks */
++    if (!recv_lock_free) {
++	rtdm_lock_put(&rtcan_socket_lock);
++	rtdm_lock_put(&rtcan_recv_list_lock);
++    }
++    rtdm_lock_put(&dev->device_lock);
++
++    return ret;
++}
++
++
++
++/*
++ * Inline function to decide if controller is operating
++ *
++ * Catch the very unlikely case that setting stop mode
++ * returned without success before this call but in the
++ * meantime the controller went into reset mode.
++ */
++static inline int rtcan_sja_is_operating(struct rtcan_device *dev,
++					 can_state_t *state)
++{
++    int is_operating = STATE_OPERATING(*state);
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++    if (unlikely(is_operating && chip->read_reg(dev, SJA_MOD) & SJA_MOD_RM)) {
++	*state = CAN_STATE_STOPPED;
++	is_operating = 0;
++	/* Disable the controller's interrupts */
++	chip->write_reg(dev, SJA_IER, 0x00);
++	/* Wake up waiting senders */
++	rtdm_sem_destroy(&dev->tx_sem);
++    }
++
++    return is_operating;
++}
++
++
++/*
++ * Set controller into reset mode.
++ *
++ * According to the SJA1000 specification, it is necessary to check the
++ * reset mode bit in PeliCAN mode after having set it. So we do. But if
++ * using a ISA card like the PHYTEC eNET card this should not be necessary
++ * because the CAN controller clock of this card (16 MHz) is twice as high
++ * as the ISA bus clock.
++ */
++static int rtcan_sja_mode_stop(struct rtcan_device *dev,
++			       rtdm_lockctx_t *lock_ctx)
++{
++    int ret = 0;
++    /* Max. 50 loops busy sleep. If the controller is stopped while in
++     * sleep mode 20-40 loops are needed (tested on PHYTEC eNET). */
++    int wait_loop = 50;
++    can_state_t state;
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++    state = dev->state;
++    /* If controller is not operating anyway, go out */
++    if (STATE_RESET(state))
++	goto out;
++
++    /* Disable the controller's interrupts */
++    chip->write_reg(dev, SJA_IER, 0x00);
++
++    /* Set reset mode bit */
++    chip->write_reg(dev, SJA_MOD, SJA_MOD_RM);
++
++    /* Read reset mode bit, multiple tests */
++    do {
++	if (chip->read_reg(dev, SJA_MOD) & SJA_MOD_RM)
++	    break;
++
++	if (lock_ctx)
++	    rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++	/* Busy sleep 1 microsecond */
++	rtdm_task_busy_sleep(1000);
++	if (lock_ctx)
++	    rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++    } while(--wait_loop);
++
++
++    if (wait_loop) {
++	/* Volatile state could have changed while we slept busy. */
++	dev->state = CAN_STATE_STOPPED;
++	/* Wake up waiting senders */
++	rtdm_sem_destroy(&dev->tx_sem);
++    } else {
++	ret = -EAGAIN;
++	/* Enable interrupts again as we did not succeed */
++	chip->write_reg(dev, SJA_IER, SJA1000_IER);
++    }
++
++ out:
++    return ret;
++}
++
++
++
++/*
++ * Set controller into operating mode.
++ *
++ * If coming from CAN_STATE_SLEEPING, the controller must wait
++ * some time to avoid bus errors. Measured on an PHYTEC eNET card,
++ * this time was 110 microseconds.
++ */
++static int rtcan_sja_mode_start(struct rtcan_device *dev,
++				rtdm_lockctx_t *lock_ctx)
++{
++    int ret = 0;
++    u8 mod_reg;
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++    /* We won't forget that state in the device structure is volatile and
++     * access to it will not be optimized by the compiler. So ... */
++
++    mod_reg = 0;
++    if (dev->ctrl_mode & CAN_CTRLMODE_LISTENONLY)
++	mod_reg |= SJA_MOD_LOM;
++    if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK)
++	mod_reg |= SJA_MOD_STM;
++
++    switch (dev->state) {
++
++    case CAN_STATE_ACTIVE:
++    case CAN_STATE_BUS_WARNING:
++    case CAN_STATE_BUS_PASSIVE:
++	break;
++
++    case CAN_STATE_STOPPED:
++	/* Clear error counters */
++	chip->write_reg(dev, SJA_RXERR , 0);
++	chip->write_reg(dev, SJA_TXERR , 0);
++	/* Clear error code capture (i.e. read it) */
++	chip->read_reg(dev, SJA_ECC);
++	/* Set error active state */
++	dev->state = CAN_STATE_ACTIVE;
++	/* Set up sender "mutex" */
++	rtdm_sem_init(&dev->tx_sem, 1);
++	/* Enable interrupts */
++	chip->write_reg(dev, SJA_IER, SJA1000_IER);
++
++	/* Clear reset mode bit in SJA1000 */
++	chip->write_reg(dev, SJA_MOD, mod_reg);
++
++	break;
++
++    case CAN_STATE_SLEEPING:
++	/* Trigger Wake-up interrupt */
++	chip->write_reg(dev, SJA_MOD, mod_reg);
++
++	/* Ok, coming from sleep mode is problematic. We have to wait
++	 * for the SJA1000 to get on both feet again. */
++	rtdm_lock_put_irqrestore(&dev->device_lock, *lock_ctx);
++	rtdm_task_busy_sleep(110000);
++	rtdm_lock_get_irqsave(&dev->device_lock, *lock_ctx);
++
++	/* Meanwhile, the Wake-up interrupt was serviced and has set the
++	 * right state. As we don't want to set it back jump out. */
++	goto out;
++
++	break;
++
++    case CAN_STATE_BUS_OFF:
++	/* Trigger bus-off recovery */
++	chip->write_reg(dev, SJA_MOD, mod_reg);
++	/* Set up sender "mutex" */
++	rtdm_sem_init(&dev->tx_sem, 1);
++	/* Set error active state */
++	dev->state = CAN_STATE_ACTIVE;
++
++	break;
++
++    default:
++	/* Never reached, but we don't want nasty compiler warnings ... */
++	break;
++    }
++
++ out:
++    return ret;
++}
++
++can_state_t rtcan_sja_get_state(struct rtcan_device *dev)
++{
++    can_state_t state = dev->state;
++    rtcan_sja_is_operating(dev, &state);
++    return state;
++}
++
++int rtcan_sja_set_mode(struct rtcan_device *dev,
++		       can_mode_t mode,
++		       rtdm_lockctx_t *lock_ctx)
++{
++    int ret = 0;
++    can_state_t state;
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000*)dev->priv;
++
++    switch (mode) {
++
++    case CAN_MODE_STOP:
++	ret = rtcan_sja_mode_stop(dev, lock_ctx);
++	break;
++
++    case CAN_MODE_START:
++	ret = rtcan_sja_mode_start(dev, lock_ctx);
++	break;
++
++    case CAN_MODE_SLEEP:
++
++	state = dev->state;
++
++	/* Controller must operate, otherwise go out */
++	if (!rtcan_sja_is_operating(dev, &state)) {
++	    ret = -ENETDOWN;
++	    goto mode_sleep_out;
++	}
++
++	/* Is controller sleeping yet? If yes, go out */
++	if (state == CAN_STATE_SLEEPING)
++	    goto mode_sleep_out;
++
++	/* Remember into which state to return when we
++	 * wake up */
++	dev->state_before_sleep = state;
++
++	/* Let's take a nap. (Now I REALLY understand
++	 * the meaning of interrupts ...) */
++	state = CAN_STATE_SLEEPING;
++	chip->write_reg(dev, SJA_MOD,
++			chip->read_reg(dev, SJA_MOD) | SJA_MOD_SM);
++
++    mode_sleep_out:
++	dev->state = state;
++	break;
++
++    default:
++	ret = -EOPNOTSUPP;
++	break;
++    }
++
++    return ret;
++}
++
++int rtcan_sja_set_bit_time(struct rtcan_device *dev,
++			   struct can_bittime *bit_time,
++			   rtdm_lockctx_t *lock_ctx)
++{
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++    u8 btr0, btr1;
++
++    switch (bit_time->type) {
++    case CAN_BITTIME_BTR:
++	btr0 = bit_time->btr.btr0;
++	btr1 = bit_time->btr.btr1;
++	break;
++
++    case CAN_BITTIME_STD:
++	btr0 = (BTR0_SET_BRP(bit_time->std.brp) |
++		BTR0_SET_SJW(bit_time->std.sjw));
++	btr1 = (BTR1_SET_TSEG1(bit_time->std.prop_seg +
++			       bit_time->std.phase_seg1) |
++		BTR1_SET_TSEG2(bit_time->std.phase_seg2) |
++		BTR1_SET_SAM(bit_time->std.sam));
++
++	break;
++
++    default:
++	return -EINVAL;
++    }
++
++    printk("%s: btr0=%#x btr1=%#x\n", __func__, btr0, btr1);
++    chip->write_reg(dev, SJA_BTR0, btr0);
++    chip->write_reg(dev, SJA_BTR1, btr1);
++
++    return 0;
++}
++
++void rtcan_sja_enable_bus_err(struct rtcan_device *dev)
++{
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++    if (chip->bus_err_on < 2) {
++	if (chip->bus_err_on < 1)
++	    chip->read_reg(dev, SJA_ECC);
++	chip->bus_err_on = 2;
++    }
++}
++
++/*
++ *  Start a transmission to a SJA1000 device
++ */
++static int rtcan_sja_start_xmit(struct rtcan_device *dev,
++				can_frame_t *frame)
++{
++    int             i;
++    /* "Real" size of the payload */
++    u8   size;
++    /* Content of frame information register */
++    u8   fir;
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++
++    /* Get DLC */
++    fir  = frame->can_dlc;
++
++    /* If DLC exceeds 8 bytes adjust it to 8 (for the payload) */
++    size = (fir > 8) ? 8 : fir;
++
++
++    if (frame->can_id & CAN_EFF_FLAG) {
++	/* Send extended frame */
++	fir |= SJA_FIR_EFF;
++
++	/* Write ID */
++	chip->write_reg(dev, SJA_ID1, frame->can_id >> 21);
++	chip->write_reg(dev, SJA_ID2, frame->can_id >> 13);
++	chip->write_reg(dev, SJA_ID3, frame->can_id >> 5);
++	chip->write_reg(dev, SJA_ID4, frame->can_id << 3);
++
++	/* RTR? */
++	if (frame->can_id & CAN_RTR_FLAG)
++	    fir |= SJA_FIR_RTR;
++
++	else {
++	    /* No RTR, write data bytes */
++	    for (i = 0; i < size; i++)
++		chip->write_reg(dev, SJA_DATA_EFF(i),
++				frame->data[i]);
++	}
++
++    } else {
++	/* Send standard frame */
++
++	/* Write ID */
++	chip->write_reg(dev, SJA_ID1, frame->can_id >> 3);
++	chip->write_reg(dev, SJA_ID2, frame->can_id << 5);
++
++	/* RTR? */
++	if (frame->can_id & CAN_RTR_FLAG)
++	    fir |= SJA_FIR_RTR;
++
++	else {
++	    /* No RTR, write data bytes */
++	    for (i = 0; i < size; i++)
++		chip->write_reg(dev, SJA_DATA_SFF(i),
++				frame->data[i]);
++	}
++    }
++
++
++    /* Write frame information register */
++    chip->write_reg(dev, SJA_FIR, fir);
++
++    /* Push the 'send' button */
++    if (dev->ctrl_mode & CAN_CTRLMODE_LOOPBACK)
++	chip->write_reg(dev, SJA_CMR, SJA_CMR_SRR);
++    else
++	chip->write_reg(dev, SJA_CMR, SJA_CMR_TR);
++
++    return 0;
++}
++
++
++
++/*
++ *  SJA1000 chip configuration
++ */
++static void sja1000_chip_config(struct rtcan_device *dev)
++{
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000* )dev->priv;
++
++    chip->write_reg(dev, SJA_CDR, chip->cdr);
++    chip->write_reg(dev, SJA_OCR, chip->ocr);
++
++    chip->write_reg(dev, SJA_AMR0, 0xFF);
++    chip->write_reg(dev, SJA_AMR1, 0xFF);
++    chip->write_reg(dev, SJA_AMR2, 0xFF);
++    chip->write_reg(dev, SJA_AMR3, 0xFF);
++}
++
++
++int rtcan_sja1000_register(struct rtcan_device *dev)
++{
++    int                         ret;
++    struct rtcan_sja1000 *chip = dev->priv;
++
++    if (chip == NULL)
++	return -EINVAL;
++
++    /* Set dummy state for following call */
++    dev->state = CAN_STATE_ACTIVE;
++    /* Enter reset mode */
++    rtcan_sja_mode_stop(dev, NULL);
++
++    if ((chip->read_reg(dev, SJA_SR) &
++	 (SJA_SR_RBS | SJA_SR_DOS | SJA_SR_TBS)) != SJA_SR_TBS) {
++	printk("ERROR! No SJA1000 device found!\n");
++	return -ENODEV;
++    }
++
++    dev->ctrl_name = sja_ctrl_name;
++
++    dev->hard_start_xmit = rtcan_sja_start_xmit;
++    dev->do_set_mode = rtcan_sja_set_mode;
++    dev->do_get_state = rtcan_sja_get_state;
++    dev->do_set_bit_time = rtcan_sja_set_bit_time;
++    dev->do_enable_bus_err = rtcan_sja_enable_bus_err;
++#ifndef CONFIG_XENO_DRIVERS_CAN_CALC_BITTIME_OLD
++    dev->bittiming_const = &sja1000_bittiming_const;
++#endif
++
++    chip->bus_err_on = 1;
++
++    ret = rtdm_irq_request(&dev->irq_handle,
++			   chip->irq_num, rtcan_sja_interrupt,
++			   chip->irq_flags, sja_ctrl_name, dev);
++    if (ret) {
++	printk(KERN_ERR "ERROR %d: IRQ %d is %s!\n",
++	       ret, chip->irq_num, ret == -EBUSY ?
++	       "busy, check shared interrupt support" : "invalid");
++	return ret;
++    }
++
++    sja1000_chip_config(dev);
++
++    /* Register RTDM device */
++    ret = rtcan_dev_register(dev);
++    if (ret) {
++	    printk(KERN_ERR
++		   "ERROR %d while trying to register RTCAN device!\n", ret);
++	goto out_irq_free;
++    }
++
++    rtcan_sja_create_proc(dev);
++
++    return 0;
++
++ out_irq_free:
++    rtdm_irq_free(&dev->irq_handle);
++
++    return ret;
++}
++
++
++/* Cleanup module */
++void rtcan_sja1000_unregister(struct rtcan_device *dev)
++{
++    printk("Unregistering SJA1000 device %s\n", dev->name);
++
++    rtdm_irq_disable(&dev->irq_handle);
++    rtcan_sja_mode_stop(dev, NULL);
++    rtdm_irq_free(&dev->irq_handle);
++    rtcan_sja_remove_proc(dev);
++    rtcan_dev_unregister(dev);
++}
++
++int __init rtcan_sja_init(void)
++{
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	printk("RTCAN SJA1000 driver initialized\n");
++	return 0;
++}
++
++
++void __exit rtcan_sja_exit(void)
++{
++	printk("%s removed\n", sja_ctrl_name);
++}
++
++module_init(rtcan_sja_init);
++module_exit(rtcan_sja_exit);
++
++EXPORT_SYMBOL_GPL(rtcan_sja1000_register);
++EXPORT_SYMBOL_GPL(rtcan_sja1000_unregister);
+--- linux/drivers/xenomai/can/sja1000/rtcan_isa.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_isa.c	2021-04-07 16:01:26.326635454 +0800
+@@ -0,0 +1,201 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Copyright (C) 2005, 2006, 2009 Sebastian Smolorz
++ *                               <smolorz@rts.uni-hannover.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; eitherer version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "sja1000-isa"
++
++#define RTCAN_ISA_MAX_DEV 4
++
++static char *isa_board_name = "ISA-Board";
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RTCAN board driver for standard ISA boards");
++MODULE_SUPPORTED_DEVICE("ISA board");
++MODULE_LICENSE("GPL");
++
++static u16 io[RTCAN_ISA_MAX_DEV];
++static int irq[RTCAN_ISA_MAX_DEV];
++static u32 can_clock[RTCAN_ISA_MAX_DEV];
++static u8 ocr[RTCAN_ISA_MAX_DEV];
++static u8 cdr[RTCAN_ISA_MAX_DEV];
++
++module_param_array(io, ushort, NULL, 0444);
++module_param_array(irq, int, NULL, 0444);
++module_param_array(can_clock, uint, NULL, 0444);
++module_param_array(ocr, byte, NULL, 0444);
++module_param_array(cdr, byte, NULL, 0444);
++
++MODULE_PARM_DESC(io, "The io-port address");
++MODULE_PARM_DESC(irq, "The interrupt number");
++MODULE_PARM_DESC(can_clock, "External clock frequency (default 16 MHz)");
++MODULE_PARM_DESC(ocr, "Value of output control register (default 0x1a)");
++MODULE_PARM_DESC(cdr, "Value of clock divider register (default 0xc8");
++
++#define RTCAN_ISA_PORT_SIZE 32
++
++struct rtcan_isa
++{
++	u16 io;
++};
++
++static struct rtcan_device *rtcan_isa_devs[RTCAN_ISA_MAX_DEV];
++
++static u8 rtcan_isa_readreg(struct rtcan_device *dev, int port)
++{
++	struct rtcan_isa *board = (struct rtcan_isa *)dev->board_priv;
++	return inb(board->io + port);
++}
++
++static void rtcan_isa_writereg(struct rtcan_device *dev, int port, u8 val)
++{
++	struct rtcan_isa *board = (struct rtcan_isa *)dev->board_priv;
++	outb(val, board->io + port);
++}
++
++
++int __init rtcan_isa_init_one(int idx)
++{
++	struct rtcan_device *dev;
++	struct rtcan_sja1000 *chip;
++	struct rtcan_isa *board;
++	int ret;
++
++	if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++				   sizeof(struct rtcan_isa))) == NULL)
++		return -ENOMEM;
++
++	chip = (struct rtcan_sja1000 *)dev->priv;
++	board = (struct rtcan_isa *)dev->board_priv;
++
++	dev->board_name = isa_board_name;
++
++	board->io = io[idx];
++
++	chip->irq_num = irq[idx];
++	chip->irq_flags = RTDM_IRQTYPE_SHARED | RTDM_IRQTYPE_EDGE;
++
++	chip->read_reg = rtcan_isa_readreg;
++	chip->write_reg = rtcan_isa_writereg;
++
++	/* Check and request I/O ports */
++	if (!request_region(board->io, RTCAN_ISA_PORT_SIZE, RTCAN_DRV_NAME)) {
++		ret = -EBUSY;
++		goto out_dev_free;
++	}
++
++	/* Clock frequency in Hz */
++	if (can_clock[idx])
++		dev->can_sys_clock = can_clock[idx] / 2;
++	else
++		dev->can_sys_clock = 8000000; /* 16/2 MHz */
++
++	/* Output control register */
++	if (ocr[idx])
++		chip->ocr = ocr[idx];
++	else
++		chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL;
++
++	if (cdr[idx])
++		chip->cdr = cdr[idx];
++	else
++		chip->cdr = SJA_CDR_CAN_MODE | SJA_CDR_CLK_OFF | SJA_CDR_CBP;
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	ret = rtcan_sja1000_register(dev);
++	if (ret) {
++		printk(KERN_ERR "ERROR %d while trying to register SJA1000 "
++		       "device!\n", ret);
++		goto out_free_region;
++	}
++
++	rtcan_isa_devs[idx] = dev;
++	return 0;
++
++ out_free_region:
++	release_region(board->io, RTCAN_ISA_PORT_SIZE);
++
++ out_dev_free:
++	rtcan_dev_free(dev);
++
++	return ret;
++}
++
++static void rtcan_isa_exit(void);
++
++/** Init module */
++static int __init rtcan_isa_init(void)
++{
++	int i, err;
++	int devices = 0;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	for (i = 0; i < RTCAN_ISA_MAX_DEV && io[i] != 0; i++) {
++		err = rtcan_isa_init_one(i);
++		if (err) {
++			rtcan_isa_exit();
++			return err;
++		}
++		devices++;
++	}
++	if (devices)
++		return 0;
++
++	printk(KERN_ERR "ERROR! No devices specified! "
++	       "Use io=<port1>[,...] irq=<irq1>[,...]\n");
++	return -EINVAL;
++}
++
++
++/** Cleanup module */
++static void rtcan_isa_exit(void)
++{
++	int i;
++	struct rtcan_device *dev;
++
++	for (i = 0; i < RTCAN_ISA_MAX_DEV; i++) {
++		dev = rtcan_isa_devs[i];
++		if (!dev)
++			continue;
++		rtcan_sja1000_unregister(dev);
++		release_region(io[i], RTCAN_ISA_PORT_SIZE);
++		rtcan_dev_free(dev);
++	}
++}
++
++module_init(rtcan_isa_init);
++module_exit(rtcan_isa_exit);
+--- linux/drivers/xenomai/can/sja1000/rtcan_peak_dng.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_peak_dng.c	2021-04-07 16:01:26.321635461 +0800
+@@ -0,0 +1,390 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * Derived from the PCAN project file driver/src/pcan_dongle.c:
++ *
++ * Copyright (C) 2001-2006  PEAK System-Technik GmbH
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/version.h>
++#include <linux/delay.h>
++#include <linux/pnp.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "PEAK-Dongle"
++
++#define RTCAN_PEAK_DNG_MAX_DEV 1
++
++static char *dongle_board_name = "PEAK-Dongle";
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RTCAN board driver for PEAK-Dongle");
++MODULE_SUPPORTED_DEVICE("PEAK-Dongle CAN controller");
++MODULE_LICENSE("GPL");
++
++static char   *type[RTCAN_PEAK_DNG_MAX_DEV];
++static ushort io[RTCAN_PEAK_DNG_MAX_DEV];
++static char   irq[RTCAN_PEAK_DNG_MAX_DEV];
++
++module_param_array(type, charp,  NULL, 0444);
++module_param_array(io,   ushort, NULL, 0444);
++module_param_array(irq,  byte,   NULL, 0444);
++
++MODULE_PARM_DESC(type, "The type of interface (sp, epp)");
++MODULE_PARM_DESC(io,   "The io-port address");
++MODULE_PARM_DESC(irq,  "The interrupt number");
++
++#define DONGLE_TYPE_SP  0
++#define DONGLE_TYPE_EPP 1
++
++#define DNG_PORT_SIZE            4  /* the address range of the dongle-port */
++#define ECR_PORT_SIZE            1  /* size of the associated ECR register */
++
++struct rtcan_peak_dng
++{
++    u16  ioport;
++    u16  ecr;      /* ECR register in case of EPP */
++    u8   old_data; /* the overwritten contents of the port registers */
++    u8   old_ctrl;
++    u8   old_ecr;
++    u8   type;
++};
++
++static struct rtcan_device *rtcan_peak_dng_devs[RTCAN_PEAK_DNG_MAX_DEV];
++
++static u16 dng_ports[] = {0x378, 0x278, 0x3bc, 0x2bc};
++static u8  dng_irqs[]  = {7, 5, 7, 5};
++
++static unsigned char nibble_decode[32] =
++{
++    0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
++    0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
++    0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
++    0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
++};
++
++/* Enable and disable irqs */
++static inline void rtcan_parport_disable_irq(u32 port)
++{
++    u32 pc = port + 2;
++    outb(inb(pc) & ~0x10, pc);
++}
++
++static inline void rtcan_parport_enable_irq(u32 port)
++{
++    u32 pc = port + 2;
++    outb(inb(pc) | 0x10, pc);
++}
++
++/* Functions for SP port */
++static u8 rtcan_peak_dng_sp_readreg(struct rtcan_device *dev, int port)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++    u32 pa = dng->ioport;
++    u32 pb = pa + 1;
++    u32 pc = pb + 1;
++    u8  b0, b1 ;
++    u8  irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */
++
++    outb((0x0B ^ 0x0D) | irq_enable, pc);
++    outb((port & 0x1F) | 0x80, pa);
++    outb((0x0B ^ 0x0C) | irq_enable, pc);
++    b1=nibble_decode[inb(pb)>>3];
++    outb(0x40, pa);
++    b0=nibble_decode[inb(pb)>>3];
++    outb((0x0B ^ 0x0D) | irq_enable, pc);
++
++    return  (b1 << 4) | b0 ;
++}
++
++static void rtcan_peak_dng_writereg(struct rtcan_device *dev, int port, u8 data)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++    u32 pa = dng->ioport;
++    u32 pc = pa + 2;
++    u8  irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */
++
++    outb((0x0B ^ 0x0D) | irq_enable, pc);
++    outb(port & 0x1F, pa);
++    outb((0x0B ^ 0x0C) | irq_enable, pc);
++    outb(data, pa);
++    outb((0x0B ^ 0x0D) | irq_enable, pc);
++}
++
++/* Functions for EPP port */
++static u8 rtcan_peak_dng_epp_readreg(struct rtcan_device *dev, int port)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++    u32 pa = dng->ioport;
++    u32 pc = pa + 2;
++    u8  val;
++    u8  irq_enable = inb(pc) & 0x10; /* don't influence irq_enable */
++
++    outb((0x0B ^ 0x0F) | irq_enable, pc);
++    outb((port & 0x1F) | 0x80, pa);
++    outb((0x0B ^ 0x2E) | irq_enable, pc);
++    val = inb(pa);
++    outb((0x0B ^ 0x0F) | irq_enable, pc);
++
++    return val;
++}
++
++
++/* to switch epp on or restore register */
++static void dongle_set_ecr(u16 port, struct rtcan_peak_dng *dng)
++{
++    u32 ecr = dng->ecr;
++
++    dng->old_ecr = inb(ecr);
++    outb((dng->old_ecr & 0x1F) | 0x20, ecr);
++
++    if (dng->old_ecr == 0xff)
++	printk(KERN_DEBUG "%s: realy ECP mode configured?\n", RTCAN_DRV_NAME);
++}
++
++static void dongle_restore_ecr(u16 port, struct rtcan_peak_dng *dng)
++{
++    u32 ecr = dng->ecr;
++
++    outb(dng->old_ecr, ecr);
++
++    printk(KERN_DEBUG "%s: restore ECR\n", RTCAN_DRV_NAME);
++}
++
++static inline void rtcan_peak_dng_enable(struct rtcan_device *dev)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++    u32 port = dng->ioport;
++
++    /* save old port contents */
++    dng->old_data = inb(port);
++    dng->old_ctrl = inb(port + 2);
++
++    /* switch to epp mode if possible */
++    if (dng->type == DONGLE_TYPE_EPP)
++	dongle_set_ecr(port, dng);
++
++    rtcan_parport_enable_irq(port);
++}
++
++static inline void rtcan_peak_dng_disable(struct rtcan_device *dev)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++    u32 port = dng->ioport;
++
++    rtcan_parport_disable_irq(port);
++
++    if (dng->type == DONGLE_TYPE_EPP)
++	dongle_restore_ecr(port, dng);
++
++    /* restore port state */
++    outb(dng->old_data, port);
++    outb(dng->old_ctrl, port + 2);
++}
++
++/** Init module */
++int __init rtcan_peak_dng_init_one(int idx)
++{
++    int ret, dtype;
++    struct rtcan_device *dev;
++    struct rtcan_sja1000 *sja;
++    struct rtcan_peak_dng *dng;
++
++    if (strncmp(type[idx], "sp", 2) == 0)
++	dtype = DONGLE_TYPE_SP;
++    else if (strncmp(type[idx], "epp", 3) == 0)
++	dtype = DONGLE_TYPE_EPP;
++    else {
++	printk("%s: type %s is invalid, use \"sp\" or \"epp\".",
++	       RTCAN_DRV_NAME, type[idx]);
++	return -EINVAL;
++    }
++
++    if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++			       sizeof(struct rtcan_peak_dng))) == NULL)
++	return -ENOMEM;
++
++    sja = (struct rtcan_sja1000 *)dev->priv;
++    dng = (struct rtcan_peak_dng *)dev->board_priv;
++
++    dev->board_name = dongle_board_name;
++
++    if (io[idx])
++	dng->ioport = io[idx];
++    else
++	dng->ioport = dng_ports[idx];
++
++    if (irq[idx])
++	sja->irq_num = irq[idx];
++    else
++	sja->irq_num = dng_irqs[idx];
++    sja->irq_flags = 0;
++
++    if (dtype == DONGLE_TYPE_SP) {
++	sja->read_reg = rtcan_peak_dng_sp_readreg;
++	sja->write_reg = rtcan_peak_dng_writereg;
++	dng->ecr = 0; /* set to anything */
++    } else {
++	sja->read_reg = rtcan_peak_dng_epp_readreg;
++	sja->write_reg = rtcan_peak_dng_writereg;
++	dng->ecr = dng->ioport + 0x402;
++    }
++
++    /* Check and request I/O ports */
++    if (!request_region(dng->ioport, DNG_PORT_SIZE, RTCAN_DRV_NAME)) {
++	ret = -EBUSY;
++	goto out_dev_free;
++    }
++
++    if (dng->type == DONGLE_TYPE_EPP) {
++	if (!request_region(dng->ecr, ECR_PORT_SIZE, RTCAN_DRV_NAME)) {
++	    ret = -EBUSY;
++	    goto out_free_region;
++	}
++    }
++
++    /* Clock frequency in Hz */
++    dev->can_sys_clock = 8000000;	/* 16/2 MHz */
++
++    /* Output control register */
++    sja->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL;
++
++    sja->cdr = SJA_CDR_CAN_MODE;
++
++    strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++    rtcan_peak_dng_enable(dev);
++
++    /* Register RTDM device */
++    ret = rtcan_sja1000_register(dev);
++    if (ret) {
++	printk(KERN_ERR "ERROR while trying to register SJA1000 device %d!\n",
++	       ret);
++	goto out_free_region2;
++    }
++
++    rtcan_peak_dng_devs[idx] = dev;
++    return 0;
++
++ out_free_region2:
++    if (dng->type == DONGLE_TYPE_EPP)
++	release_region(dng->ecr, ECR_PORT_SIZE);
++
++ out_free_region:
++    release_region(dng->ioport, DNG_PORT_SIZE);
++
++ out_dev_free:
++    rtcan_dev_free(dev);
++
++    return ret;
++}
++
++void rtcan_peak_dng_exit_one(struct rtcan_device *dev)
++{
++    struct rtcan_peak_dng *dng = (struct rtcan_peak_dng *)dev->board_priv;
++
++    rtcan_sja1000_unregister(dev);
++    rtcan_peak_dng_disable(dev);
++    if (dng->type == DONGLE_TYPE_EPP)
++	release_region(dng->ecr, ECR_PORT_SIZE);
++    release_region(dng->ioport, DNG_PORT_SIZE);
++    rtcan_dev_free(dev);
++}
++
++static const struct pnp_device_id rtcan_peak_dng_pnp_tbl[] = {
++    /* Standard LPT Printer Port */
++    {.id = "PNP0400", .driver_data = 0},
++    /* ECP Printer Port */
++    {.id = "PNP0401", .driver_data = 0},
++    { }
++};
++
++static int rtcan_peak_dng_pnp_probe(struct pnp_dev *dev,
++				    const struct pnp_device_id *id)
++{
++    return 0;
++}
++
++static struct pnp_driver rtcan_peak_dng_pnp_driver = {
++    .name     = RTCAN_DRV_NAME,
++    .id_table = rtcan_peak_dng_pnp_tbl,
++    .probe    = rtcan_peak_dng_pnp_probe,
++};
++
++static int pnp_registered;
++
++/** Cleanup module */
++static void rtcan_peak_dng_exit(void)
++{
++    int i;
++    struct rtcan_device *dev;
++
++    for (i = 0, dev = rtcan_peak_dng_devs[i];
++	 i < RTCAN_PEAK_DNG_MAX_DEV && dev != NULL;
++	 i++)
++	rtcan_peak_dng_exit_one(dev);
++
++    if (pnp_registered)
++	pnp_unregister_driver(&rtcan_peak_dng_pnp_driver);
++}
++
++/** Init module */
++static int __init rtcan_peak_dng_init(void)
++{
++    int i, ret = -EINVAL, done = 0;
++
++    if (!rtdm_available())
++	return -ENOSYS;
++
++    if (pnp_register_driver(&rtcan_peak_dng_pnp_driver) == 0)
++	pnp_registered = 1;
++
++    for (i = 0;
++	 i < RTCAN_PEAK_DNG_MAX_DEV && type[i] != 0;
++	 i++) {
++
++	if ((ret = rtcan_peak_dng_init_one(i)) != 0) {
++	    printk(KERN_ERR "%s: Init failed with %d\n", RTCAN_DRV_NAME, ret);
++	    goto cleanup;
++	}
++	done++;
++    }
++    if (done)
++	return 0;
++
++    printk(KERN_ERR "%s: Please specify type=epp or type=sp\n",
++	   RTCAN_DRV_NAME);
++
++cleanup:
++    rtcan_peak_dng_exit();
++    return ret;
++}
++
++module_init(rtcan_peak_dng_init);
++module_exit(rtcan_peak_dng_exit);
+--- linux/drivers/xenomai/can/sja1000/rtcan_sja1000_proc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_sja1000_proc.c	2021-04-07 16:01:26.316635468 +0800
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++#include <rtcan_dev.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++
++#ifdef CONFIG_XENO_DRIVERS_CAN_DEBUG
++
++static int rtcan_sja_proc_regs(struct seq_file *p, void *data)
++{
++    struct rtcan_device *dev = (struct rtcan_device *)data;
++    struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++    int i;
++
++    seq_printf(p, "SJA1000 registers");
++    for (i = 0; i < 0x20; i++) {
++	if ((i % 0x10) == 0)
++	    seq_printf(p, "\n%02x:", i);
++	seq_printf(p, " %02x", chip->read_reg(dev, i));
++    }
++    seq_printf(p, "\n");
++    return 0;
++}
++
++static int rtcan_sja_proc_regs_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, rtcan_sja_proc_regs, PDE_DATA(inode));
++}
++
++static const struct file_operations rtcan_sja_proc_regs_ops = {
++	.open		= rtcan_sja_proc_regs_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++int rtcan_sja_create_proc(struct rtcan_device* dev)
++{
++    if (!dev->proc_root)
++	return -EINVAL;
++
++    proc_create_data("registers", S_IFREG | S_IRUGO | S_IWUSR, dev->proc_root,
++		     &rtcan_sja_proc_regs_ops, dev);
++    return 0;
++}
++
++void rtcan_sja_remove_proc(struct rtcan_device* dev)
++{
++    if (!dev->proc_root)
++	return;
++
++    remove_proc_entry("registers", dev->proc_root);
++}
++
++#else /* !CONFIG_XENO_DRIVERS_CAN_DEBUG */
++
++void rtcan_sja_remove_proc(struct rtcan_device* dev)
++{
++}
++
++int rtcan_sja_create_proc(struct rtcan_device* dev)
++{
++    return 0;
++}
++#endif	/* CONFIG_XENO_DRIVERS_CAN_DEBUG */
+--- linux/drivers/xenomai/can/sja1000/rtcan_ems_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_ems_pci.c	2021-04-07 16:01:26.307635481 +0800
+@@ -0,0 +1,394 @@
++/*
++ * Copyright (C) 2007, 2016 Wolfgang Grandegger <wg@grandegger.com>
++ * Copyright (C) 2008 Markus Plessing <plessing@ems-wuensche.com>
++ * Copyright (C) 2008 Sebastian Haas <haas@ems-wuensche.com>
++ *
++ * Derived from Linux CAN SJA1000 PCI driver "ems_pci".
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the version 2 of the GNU General Public License
++ * as published by the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, see <http://www.gnu.org/licenses/>.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <linux/io.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "EMS-CPC-PCI-CAN"
++
++static char *ems_pci_board_name = "EMS-CPC-PCI";
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RTCAN board driver for EMS CPC-PCI/PCIe/104P CAN cards");
++MODULE_SUPPORTED_DEVICE("EMS CPC-PCI/PCIe/104P CAN card");
++MODULE_LICENSE("GPL v2");
++
++#define EMS_PCI_V1_MAX_CHAN 2
++#define EMS_PCI_V2_MAX_CHAN 4
++#define EMS_PCI_MAX_CHAN    EMS_PCI_V2_MAX_CHAN
++
++struct ems_pci_card {
++	int version;
++	int channels;
++
++	struct pci_dev *pci_dev;
++	struct rtcan_device *rtcan_dev[EMS_PCI_MAX_CHAN];
++
++	void __iomem *conf_addr;
++	void __iomem *base_addr;
++};
++
++#define EMS_PCI_CAN_CLOCK (16000000 / 2)
++
++/*
++ * Register definitions and descriptions are from LinCAN 0.3.3.
++ *
++ * PSB4610 PITA-2 bridge control registers
++ */
++#define PITA2_ICR           0x00	/* Interrupt Control Register */
++#define PITA2_ICR_INT0      0x00000002	/* [RC] INT0 Active/Clear */
++#define PITA2_ICR_INT0_EN   0x00020000	/* [RW] Enable INT0 */
++
++#define PITA2_MISC          0x1c	/* Miscellaneous Register */
++#define PITA2_MISC_CONFIG   0x04000000	/* Multiplexed parallel interface */
++
++/*
++ * Register definitions for the PLX 9030
++ */
++#define PLX_ICSR            0x4c   /* Interrupt Control/Status register */
++#define PLX_ICSR_LINTI1_ENA 0x0001 /* LINTi1 Enable */
++#define PLX_ICSR_PCIINT_ENA 0x0040 /* PCI Interrupt Enable */
++#define PLX_ICSR_LINTI1_CLR 0x0400 /* Local Edge Triggerable Interrupt Clear */
++#define PLX_ICSR_ENA_CLR    (PLX_ICSR_LINTI1_ENA | PLX_ICSR_PCIINT_ENA | \
++			     PLX_ICSR_LINTI1_CLR)
++
++/*
++ * The board configuration is probably following:
++ * RX1 is connected to ground.
++ * TX1 is not connected.
++ * CLKO is not connected.
++ * Setting the OCR register to 0xDA is a good idea.
++ * This means normal output mode, push-pull and the correct polarity.
++ */
++#define EMS_PCI_OCR         (SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL)
++
++/*
++ * In the CDR register, you should set CBP to 1.
++ * You will probably also want to set the clock divider value to 7
++ * (meaning direct oscillator output) because the second SJA1000 chip
++ * is driven by the first one CLKOUT output.
++ */
++#define EMS_PCI_CDR             (SJA_CDR_CBP | SJA_CDR_CLKOUT_MASK)
++
++#define EMS_PCI_V1_BASE_BAR     1
++#define EMS_PCI_V1_CONF_SIZE    4096 /* size of PITA control area */
++#define EMS_PCI_V2_BASE_BAR     2
++#define EMS_PCI_V2_CONF_SIZE    128 /* size of PLX control area */
++#define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */
++#define EMS_PCI_CAN_CTRL_SIZE   0x200 /* memory size for each controller */
++
++#define EMS_PCI_BASE_SIZE  4096 /* size of controller area */
++
++static const struct pci_device_id ems_pci_tbl[] = {
++	/* CPC-PCI v1 */
++	{PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,},
++	/* CPC-PCI v2 */
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_PLX, 0x4000},
++	/* CPC-104P v2 */
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_PLX, 0x4002},
++	{0,}
++};
++MODULE_DEVICE_TABLE(pci, ems_pci_tbl);
++
++/*
++ * Helper to read internal registers from card logic (not CAN)
++ */
++static u8 ems_pci_v1_readb(struct ems_pci_card *card, unsigned int port)
++{
++	return readb((void __iomem *)card->base_addr + (port * 4));
++}
++
++static u8 ems_pci_v1_read_reg(struct rtcan_device *dev, int port)
++{
++	return readb((void __iomem *)dev->base_addr + (port * 4));
++}
++
++static void ems_pci_v1_write_reg(struct rtcan_device *dev,
++				 int port, u8 val)
++{
++	writeb(val, (void __iomem *)dev->base_addr + (port * 4));
++}
++
++static void ems_pci_v1_post_irq(struct rtcan_device *dev)
++{
++	struct ems_pci_card *card = (struct ems_pci_card *)dev->board_priv;
++
++	/* reset int flag of pita */
++	writel(PITA2_ICR_INT0_EN | PITA2_ICR_INT0,
++	       card->conf_addr + PITA2_ICR);
++}
++
++static u8 ems_pci_v2_read_reg(struct rtcan_device *dev, int port)
++{
++	return readb((void __iomem *)dev->base_addr + port);
++}
++
++static void ems_pci_v2_write_reg(struct rtcan_device *dev,
++				 int port, u8 val)
++{
++	writeb(val, (void __iomem *)dev->base_addr + port);
++}
++
++static void ems_pci_v2_post_irq(struct rtcan_device *dev)
++{
++	struct ems_pci_card *card = (struct ems_pci_card *)dev->board_priv;
++
++	writel(PLX_ICSR_ENA_CLR, card->conf_addr + PLX_ICSR);
++}
++
++/*
++ * Check if a CAN controller is present at the specified location
++ * by trying to set 'em into the PeliCAN mode
++ */
++static inline int ems_pci_check_chan(struct rtcan_device *dev)
++{
++	struct rtcan_sja1000 *chip = (struct rtcan_sja1000 *)dev->priv;
++	unsigned char res;
++
++	/* Make sure SJA1000 is in reset mode */
++	chip->write_reg(dev, SJA_MOD, 1);
++
++	chip->write_reg(dev, SJA_CDR, SJA_CDR_CAN_MODE);
++
++	/* read reset-values */
++	res = chip->read_reg(dev, SJA_CDR);
++
++	if (res == SJA_CDR_CAN_MODE)
++		return 1;
++
++	return 0;
++}
++
++static void ems_pci_del_card(struct pci_dev *pdev)
++{
++	struct ems_pci_card *card = pci_get_drvdata(pdev);
++	struct rtcan_device *dev;
++	int i = 0;
++
++	for (i = 0; i < card->channels; i++) {
++		dev = card->rtcan_dev[i];
++
++		if (!dev)
++			continue;
++
++		dev_info(&pdev->dev, "Removing %s.\n", dev->name);
++		rtcan_sja1000_unregister(dev);
++		rtcan_dev_free(dev);
++	}
++
++	if (card->base_addr != NULL)
++		pci_iounmap(card->pci_dev, card->base_addr);
++
++	if (card->conf_addr != NULL)
++		pci_iounmap(card->pci_dev, card->conf_addr);
++
++	kfree(card);
++
++	pci_disable_device(pdev);
++	pci_set_drvdata(pdev, NULL);
++}
++
++static void ems_pci_card_reset(struct ems_pci_card *card)
++{
++	/* Request board reset */
++	writeb(0, card->base_addr);
++}
++
++/*
++ * Probe PCI device for EMS CAN signature and register each available
++ * CAN channel to RTCAN subsystem.
++ */
++static int ems_pci_add_card(struct pci_dev *pdev,
++			    const struct pci_device_id *ent)
++{
++	struct rtcan_sja1000 *chip;
++	struct rtcan_device *dev;
++	struct ems_pci_card *card;
++	int max_chan, conf_size, base_bar;
++	int err, i;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	/* Enabling PCI device */
++	if (pci_enable_device(pdev) < 0) {
++		dev_err(&pdev->dev, "Enabling PCI device failed\n");
++		return -ENODEV;
++	}
++
++	/* Allocating card structures to hold addresses, ... */
++	card = kzalloc(sizeof(*card), GFP_KERNEL);
++	if (card == NULL) {
++		pci_disable_device(pdev);
++		return -ENOMEM;
++	}
++
++	pci_set_drvdata(pdev, card);
++
++	card->pci_dev = pdev;
++
++	card->channels = 0;
++
++	if (pdev->vendor == PCI_VENDOR_ID_PLX) {
++		card->version = 2; /* CPC-PCI v2 */
++		max_chan = EMS_PCI_V2_MAX_CHAN;
++		base_bar = EMS_PCI_V2_BASE_BAR;
++		conf_size = EMS_PCI_V2_CONF_SIZE;
++	} else {
++		card->version = 1; /* CPC-PCI v1 */
++		max_chan = EMS_PCI_V1_MAX_CHAN;
++		base_bar = EMS_PCI_V1_BASE_BAR;
++		conf_size = EMS_PCI_V1_CONF_SIZE;
++	}
++
++	/* Remap configuration space and controller memory area */
++	card->conf_addr = pci_iomap(pdev, 0, conf_size);
++	if (card->conf_addr == NULL) {
++		err = -ENOMEM;
++		goto failure_cleanup;
++	}
++
++	card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE);
++	if (card->base_addr == NULL) {
++		err = -ENOMEM;
++		goto failure_cleanup;
++	}
++
++	if (card->version == 1) {
++		/* Configure PITA-2 parallel interface (enable MUX) */
++		writel(PITA2_MISC_CONFIG, card->conf_addr + PITA2_MISC);
++
++		/* Check for unique EMS CAN signature */
++		if (ems_pci_v1_readb(card, 0) != 0x55 ||
++		    ems_pci_v1_readb(card, 1) != 0xAA ||
++		    ems_pci_v1_readb(card, 2) != 0x01 ||
++		    ems_pci_v1_readb(card, 3) != 0xCB ||
++		    ems_pci_v1_readb(card, 4) != 0x11) {
++			dev_err(&pdev->dev,
++				"Not EMS Dr. Thomas Wuensche interface\n");
++			err = -ENODEV;
++			goto failure_cleanup;
++		}
++	}
++
++	ems_pci_card_reset(card);
++
++	for (i = 0; i < max_chan; i++) {
++		dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000), 0);
++		if (!dev) {
++			err = -ENOMEM;
++			goto failure_cleanup;
++		}
++
++		strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++		dev->board_name = ems_pci_board_name;
++		dev->board_priv = card;
++
++		card->rtcan_dev[i] = dev;
++		chip = card->rtcan_dev[i]->priv;
++		chip->irq_flags = RTDM_IRQTYPE_SHARED;
++		chip->irq_num = pdev->irq;
++
++		dev->base_addr = (unsigned long)card->base_addr +
++			EMS_PCI_CAN_BASE_OFFSET + (i * EMS_PCI_CAN_CTRL_SIZE);
++		if (card->version == 1) {
++			chip->read_reg  = ems_pci_v1_read_reg;
++			chip->write_reg = ems_pci_v1_write_reg;
++			chip->irq_ack = ems_pci_v1_post_irq;
++		} else {
++			chip->read_reg  = ems_pci_v2_read_reg;
++			chip->write_reg = ems_pci_v2_write_reg;
++			chip->irq_ack = ems_pci_v2_post_irq;
++		}
++
++		/* Check if channel is present */
++		if (ems_pci_check_chan(dev)) {
++			dev->can_sys_clock = EMS_PCI_CAN_CLOCK;
++			chip->ocr = EMS_PCI_OCR | SJA_OCR_MODE_NORMAL;
++			chip->cdr = EMS_PCI_CDR | SJA_CDR_CAN_MODE;
++
++			if (card->version == 1)
++				/* reset int flag of pita */
++				writel(PITA2_ICR_INT0_EN | PITA2_ICR_INT0,
++				       card->conf_addr + PITA2_ICR);
++			else
++				/* enable IRQ in PLX 9030 */
++				writel(PLX_ICSR_ENA_CLR,
++				       card->conf_addr + PLX_ICSR);
++
++			/* Register SJA1000 device */
++			err = rtcan_sja1000_register(dev);
++			if (err) {
++				dev_err(&pdev->dev, "Registering device failed "
++					"(err=%d)\n", err);
++				rtcan_dev_free(dev);
++				goto failure_cleanup;
++			}
++
++			card->channels++;
++
++			dev_info(&pdev->dev, "Channel #%d at 0x%p, irq %d "
++				 "registered as %s\n", i + 1,
++				 (void* __iomem)dev->base_addr, chip->irq_num,
++				 dev->name);
++		} else {
++			dev_err(&pdev->dev, "Channel #%d not detected\n",
++				i + 1);
++			rtcan_dev_free(dev);
++		}
++	}
++
++	if (!card->channels) {
++		err = -ENODEV;
++		goto failure_cleanup;
++	}
++
++	return 0;
++
++failure_cleanup:
++	dev_err(&pdev->dev, "Error: %d. Cleaning Up.\n", err);
++
++	ems_pci_del_card(pdev);
++
++	return err;
++}
++
++static struct pci_driver ems_pci_driver = {
++	.name = RTCAN_DRV_NAME,
++	.id_table = ems_pci_tbl,
++	.probe = ems_pci_add_card,
++	.remove = ems_pci_del_card,
++};
++
++module_pci_driver(ems_pci_driver);
+--- linux/drivers/xenomai/can/sja1000/rtcan_mem.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_mem.c	2021-04-07 16:01:26.302635488 +0800
+@@ -0,0 +1,216 @@
++/*
++ * Copyright (C) 2006 Matthias Fuchs <matthias.fuchs@esd-electronics.com>,
++ *                    Jan Kiszka <jan.kiszka@web.de>
++ *
++ * RTCAN driver for memory mapped SJA1000 CAN controller
++ * This code has been tested on esd's CPCI405/EPPC405 PPC405 systems.
++ *
++ * This driver is derived from the rtcan-isa driver by
++ * Wolfgang Grandegger and Sebastian Smolorz.
++ *
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ * Copyright (C) 2005, 2006 Sebastian Smolorz
++ *                          <Sebastian.Smolorz@stud.uni-hannover.de>
++ *
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; eitherer version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++
++#include <rtdm/driver.h>
++
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME    "rtcan%d"
++#define RTCAN_DRV_NAME    "sja1000-mem"
++
++#define RTCAN_MEM_MAX_DEV 4
++
++static char *mem_board_name = "mem mapped";
++
++MODULE_AUTHOR("Matthias Fuchs <matthias.fuchs@esd-electronics.com>");
++MODULE_DESCRIPTION("RTCAN driver for memory mapped SJA1000 controller");
++MODULE_SUPPORTED_DEVICE("mem mapped");
++MODULE_LICENSE("GPL");
++
++static u32 mem[RTCAN_MEM_MAX_DEV];
++static int irq[RTCAN_MEM_MAX_DEV];
++static u32 can_clock[RTCAN_MEM_MAX_DEV];
++static u8 ocr[RTCAN_MEM_MAX_DEV];
++static u8 cdr[RTCAN_MEM_MAX_DEV];
++
++module_param_array(mem, uint, NULL, 0444);
++module_param_array(irq, int, NULL, 0444);
++module_param_array(can_clock, uint, NULL, 0444);
++module_param_array(ocr, byte, NULL, 0444);
++module_param_array(cdr, byte, NULL, 0444);
++
++MODULE_PARM_DESC(mem, "The io-memory address");
++MODULE_PARM_DESC(irq, "The interrupt number");
++MODULE_PARM_DESC(can_clock, "External clock frequency (default 16 MHz)");
++MODULE_PARM_DESC(ocr, "Value of output control register (default 0x1a)");
++MODULE_PARM_DESC(cdr, "Value of clock divider register (default 0xc8");
++
++#define RTCAN_MEM_RANGE 0x80
++
++struct rtcan_mem
++{
++	volatile void __iomem *vmem;
++};
++
++static struct rtcan_device *rtcan_mem_devs[RTCAN_MEM_MAX_DEV];
++
++static u8 rtcan_mem_readreg(struct rtcan_device *dev, int reg)
++{
++	struct rtcan_mem *board = (struct rtcan_mem *)dev->board_priv;
++	return readb(board->vmem + reg);
++}
++
++static void rtcan_mem_writereg(struct rtcan_device *dev, int reg, u8 val)
++{
++	struct rtcan_mem *board = (struct rtcan_mem *)dev->board_priv;
++	writeb(val, board->vmem + reg);
++}
++
++int __init rtcan_mem_init_one(int idx)
++{
++	struct rtcan_device *dev;
++	struct rtcan_sja1000 *chip;
++	struct rtcan_mem *board;
++	int ret;
++
++	if ((dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++				   sizeof(struct rtcan_mem))) == NULL)
++		return -ENOMEM;
++
++	chip = (struct rtcan_sja1000 *)dev->priv;
++	board = (struct rtcan_mem *)dev->board_priv;
++
++	dev->board_name = mem_board_name;
++
++	chip->irq_num = irq[idx];
++	chip->irq_flags = RTDM_IRQTYPE_SHARED;
++	chip->read_reg = rtcan_mem_readreg;
++	chip->write_reg = rtcan_mem_writereg;
++
++	if (!request_mem_region(mem[idx], RTCAN_MEM_RANGE, RTCAN_DRV_NAME)) {
++		ret = -EBUSY;
++		goto out_dev_free;
++	}
++
++	/* ioremap io memory */
++	if (!(board->vmem = ioremap(mem[idx], RTCAN_MEM_RANGE))) {
++		ret = -EBUSY;
++		goto out_release_mem;
++	}
++
++	/* Clock frequency in Hz */
++	if (can_clock[idx])
++		dev->can_sys_clock = can_clock[idx] / 2;
++	else
++		dev->can_sys_clock = 8000000; /* 16/2 MHz */
++
++	/* Output control register */
++	if (ocr[idx])
++		chip->ocr = ocr[idx];
++	else
++		chip->ocr = SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_PUSHPULL;
++
++	if (cdr[idx])
++		chip->cdr = cdr[idx];
++	else
++		chip->cdr = SJA_CDR_CAN_MODE | SJA_CDR_CLK_OFF | SJA_CDR_CBP;
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	ret = rtcan_sja1000_register(dev);
++	if (ret) {
++		printk(KERN_ERR "ERROR %d while trying to register SJA1000 "
++		       "device!\n", ret);
++		goto out_iounmap;
++	}
++
++	rtcan_mem_devs[idx] = dev;
++	return 0;
++
++ out_iounmap:
++	iounmap((void *)board->vmem);
++
++ out_release_mem:
++	release_mem_region(mem[idx], RTCAN_MEM_RANGE);
++
++ out_dev_free:
++	rtcan_dev_free(dev);
++
++	return ret;
++}
++
++static void rtcan_mem_exit(void);
++
++/** Init module */
++static int __init rtcan_mem_init(void)
++{
++	int i, err;
++	int devices = 0;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	for (i = 0; i < RTCAN_MEM_MAX_DEV && mem[i] != 0; i++) {
++		err = rtcan_mem_init_one(i);
++		if (err) {
++			rtcan_mem_exit();
++			return err;
++		}
++		devices++;
++	}
++	if (devices)
++		return 0;
++
++	printk(KERN_ERR "ERROR! No devices specified! "
++	       "Use mem=<port1>[,...] irq=<irq1>[,...]\n");
++	return -EINVAL;
++}
++
++
++/** Cleanup module */
++static void rtcan_mem_exit(void)
++{
++	int i;
++	struct rtcan_device *dev;
++	volatile void __iomem *vmem;
++
++	for (i = 0; i < RTCAN_MEM_MAX_DEV; i++) {
++		dev = rtcan_mem_devs[i];
++		if (!dev)
++			continue;
++		vmem = ((struct rtcan_mem *)dev->board_priv)->vmem;
++		rtcan_sja1000_unregister(dev);
++		iounmap((void *)vmem);
++		release_mem_region(mem[i], RTCAN_MEM_RANGE);
++		rtcan_dev_free(dev);
++	}
++}
++
++module_init(rtcan_mem_init);
++module_exit(rtcan_mem_exit);
+--- linux/drivers/xenomai/can/sja1000/rtcan_esd_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_esd_pci.c	2021-04-07 16:01:26.297635495 +0800
+@@ -0,0 +1,346 @@
++/*
++ * Copyright (C) 2009 Sebastian Smolorz <sesmo@gmx.net>
++ *
++ * This driver is based on the Socket-CAN driver esd_pci.c,
++ * Copyright (C) 2007 Wolfgang Grandegger <wg@grandegger.com>
++ * Copyright (C) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
++ * Copyright (C) 2009 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the version 2 of the GNU General Public License
++ * as published by the Free Software Foundation
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <asm/io.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME "rtcan%d"
++#define RTCAN_DRV_NAME "ESD-PCI-CAN"
++
++static char *esd_pci_board_name = "ESD-PCI";
++
++MODULE_AUTHOR("Sebastian Smolorz <sesmo@gmx.net");
++MODULE_DESCRIPTION("RTCAN board driver for esd PCI/PMC/CPCI/PCIe/PCI104 " \
++		   "CAN cards");
++MODULE_SUPPORTED_DEVICE("esd CAN-PCI/200, CAN-PCI/266, CAN-PMC266, " \
++			"CAN-PCIe/2000, CAN-CPCI/200, CAN-PCI104");
++MODULE_LICENSE("GPL v2");
++
++struct rtcan_esd_pci {
++	struct pci_dev *pci_dev;
++	struct rtcan_device *slave_dev;
++	void __iomem *conf_addr;
++	void __iomem *base_addr;
++};
++
++#define ESD_PCI_CAN_CLOCK	(16000000 / 2)
++
++#define ESD_PCI_OCR		(SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL | \
++				 SJA_OCR_TX1_INVERT | SJA_OCR_MODE_CLOCK)
++#define ESD_PCI_CDR		(SJA_CDR_CLK_OFF | SJA_CDR_CBP | \
++				 SJA_CDR_CAN_MODE)
++
++#define CHANNEL_SINGLE 0 /* this is a single channel device */
++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */
++#define CHANNEL_SLAVE  2 /* multi channel device, this is slave */
++
++#define CHANNEL_OFFSET		0x100
++
++#define INTCSR_OFFSET		0x4c /* Offset in PLX9050 conf registers */
++#define INTCSR_LINTI1		(1 << 0)
++#define INTCSR_PCI		(1 << 6)
++
++#define INTCSR9056_OFFSET	0x68 /* Offset in PLX9056 conf registers */
++#define INTCSR9056_LINTI	(1 << 11)
++#define INTCSR9056_PCI		(1 << 8)
++
++#ifndef PCI_DEVICE_ID_PLX_9056
++# define PCI_DEVICE_ID_PLX_9056 0x9056
++#endif
++
++/* PCI subsystem IDs of esd's SJA1000 based CAN cards */
++
++/* CAN-PCI/200: PCI, 33MHz only, bridge: PLX9050 */
++#define ESD_PCI_SUB_SYS_ID_PCI200	0x0004
++
++/* CAN-PCI/266: PCI, 33/66MHz, bridge: PLX9056 */
++#define ESD_PCI_SUB_SYS_ID_PCI266	0x0009
++
++/* CAN-PMC/266: PMC module, 33/66MHz, bridge: PLX9056 */
++#define ESD_PCI_SUB_SYS_ID_PMC266	0x000e
++
++/* CAN-CPCI/200: Compact PCI, 33MHz only, bridge: PLX9030 */
++#define ESD_PCI_SUB_SYS_ID_CPCI200	0x010b
++
++/* CAN-PCIE/2000: PCI Express 1x, bridge: PEX8311 = PEX8111 + PLX9056 */
++#define ESD_PCI_SUB_SYS_ID_PCIE2000	0x0200
++
++/* CAN-PCI/104: PCI104 module, 33MHz only, bridge: PLX9030 */
++#define ESD_PCI_SUB_SYS_ID_PCI104200	0x0501
++
++static struct pci_device_id esd_pci_tbl[] = {
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI200},
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI266},
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PMC266},
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_CPCI200},
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9056,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCIE2000},
++	{PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
++	 PCI_VENDOR_ID_ESDGMBH, ESD_PCI_SUB_SYS_ID_PCI104200},
++	{0,}
++};
++
++#define ESD_PCI_BASE_SIZE  0x200
++
++MODULE_DEVICE_TABLE(pci, esd_pci_tbl);
++
++
++static u8 rtcan_esd_pci_read_reg(struct rtcan_device *dev, int port)
++{
++	struct rtcan_esd_pci *board = (struct rtcan_esd_pci *)dev->board_priv;
++	return readb(board->base_addr + port);
++}
++
++static void rtcan_esd_pci_write_reg(struct rtcan_device *dev, int port, u8 val)
++{
++	struct rtcan_esd_pci *board = (struct rtcan_esd_pci *)dev->board_priv;
++	writeb(val, board->base_addr + port);
++}
++
++static void rtcan_esd_pci_del_chan(struct rtcan_device *dev)
++{
++	struct rtcan_esd_pci *board;
++
++	if (!dev)
++		return;
++
++	board = (struct rtcan_esd_pci *)dev->board_priv;
++
++	printk("Removing %s %s device %s\n",
++		esd_pci_board_name, dev->ctrl_name, dev->name);
++
++	rtcan_sja1000_unregister(dev);
++
++	rtcan_dev_free(dev);
++}
++
++static int rtcan_esd_pci_add_chan(struct pci_dev *pdev, int channel,
++				  struct rtcan_device **master_dev,
++				  void __iomem *conf_addr,
++				  void __iomem *base_addr)
++{
++	struct rtcan_device *dev;
++	struct rtcan_sja1000 *chip;
++	struct rtcan_esd_pci *board;
++	int ret;
++
++	dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++			      sizeof(struct rtcan_esd_pci));
++	if (dev == NULL)
++		return -ENOMEM;
++
++	chip = (struct rtcan_sja1000 *)dev->priv;
++	board = (struct rtcan_esd_pci *)dev->board_priv;
++
++	board->pci_dev = pdev;
++	board->conf_addr = conf_addr;
++	board->base_addr = base_addr;
++
++	if (channel == CHANNEL_SLAVE) {
++		struct rtcan_esd_pci *master_board =
++			(struct rtcan_esd_pci *)(*master_dev)->board_priv;
++		master_board->slave_dev = dev;
++	}
++
++	dev->board_name = esd_pci_board_name;
++
++	chip->read_reg = rtcan_esd_pci_read_reg;
++	chip->write_reg = rtcan_esd_pci_write_reg;
++
++	dev->can_sys_clock = ESD_PCI_CAN_CLOCK;
++
++	chip->ocr = ESD_PCI_OCR;
++	chip->cdr = ESD_PCI_CDR;
++
++	strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++	chip->irq_flags = RTDM_IRQTYPE_SHARED;
++	chip->irq_num = pdev->irq;
++
++	RTCAN_DBG("%s: base_addr=0x%p conf_addr=0x%p irq=%d ocr=%#x cdr=%#x\n",
++		  RTCAN_DRV_NAME, board->base_addr, board->conf_addr,
++		  chip->irq_num, chip->ocr, chip->cdr);
++
++	/* Register SJA1000 device */
++	ret = rtcan_sja1000_register(dev);
++	if (ret) {
++		printk(KERN_ERR "ERROR %d while trying to register SJA1000 "
++				"device!\n", ret);
++		goto failure;
++	}
++
++	if (channel != CHANNEL_SLAVE)
++		*master_dev = dev;
++
++	return 0;
++
++
++failure:
++	rtcan_dev_free(dev);
++	return ret;
++}
++
++static int esd_pci_init_one(struct pci_dev *pdev,
++			    const struct pci_device_id *ent)
++{
++	int ret, channel;
++	void __iomem *base_addr;
++	void __iomem *conf_addr;
++	struct rtcan_device *master_dev = NULL;
++
++	if (!rtdm_available())
++		return -ENODEV;
++
++	if ((ret = pci_enable_device (pdev)))
++		goto failure;
++
++	if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME)))
++		goto failure;
++
++	RTCAN_DBG("%s: Initializing device %04x:%04x %04x:%04x\n",
++		 RTCAN_DRV_NAME, pdev->vendor, pdev->device,
++		 pdev->subsystem_vendor, pdev->subsystem_device);
++
++	conf_addr = pci_iomap(pdev, 0, ESD_PCI_BASE_SIZE);
++	if (conf_addr == NULL) {
++		ret = -ENODEV;
++		goto failure_release_pci;
++	}
++
++	base_addr = pci_iomap(pdev, 2, ESD_PCI_BASE_SIZE);
++	if (base_addr == NULL) {
++		ret = -ENODEV;
++		goto failure_iounmap_conf;
++	}
++
++	/* Check if second channel is available */
++	writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD);
++	writeb(SJA_CDR_CBP, base_addr + CHANNEL_OFFSET + SJA_CDR);
++	writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD);
++	if (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) == 0x21) {
++		writeb(SJA_MOD_SM | SJA_MOD_AFM | SJA_MOD_STM | SJA_MOD_LOM |
++		       SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD);
++		if (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) == 0x3f)
++			channel = CHANNEL_MASTER;
++		else {
++			writeb(SJA_MOD_RM,
++				base_addr + CHANNEL_OFFSET + SJA_MOD);
++			channel = CHANNEL_SINGLE;
++		}
++	} else {
++		writeb(SJA_MOD_RM, base_addr + CHANNEL_OFFSET + SJA_MOD);
++		channel = CHANNEL_SINGLE;
++	}
++
++	if ((ret = rtcan_esd_pci_add_chan(pdev, channel, &master_dev,
++						conf_addr, base_addr)))
++		goto failure_iounmap_base;
++
++	if (channel != CHANNEL_SINGLE) {
++		channel = CHANNEL_SLAVE;
++		if ((ret = rtcan_esd_pci_add_chan(pdev, channel, &master_dev,
++				      conf_addr, base_addr + CHANNEL_OFFSET)))
++			goto failure_iounmap_base;
++	}
++
++	if ((pdev->device == PCI_DEVICE_ID_PLX_9050) ||
++	    (pdev->device == PCI_DEVICE_ID_PLX_9030)) {
++		/* Enable interrupts in PLX9050 */
++		writel(INTCSR_LINTI1 | INTCSR_PCI, conf_addr + INTCSR_OFFSET);
++	} else {
++		/* Enable interrupts in PLX9056*/
++		writel(INTCSR9056_LINTI | INTCSR9056_PCI,
++					conf_addr + INTCSR9056_OFFSET);
++	}
++
++	pci_set_drvdata(pdev, master_dev);
++
++	return 0;
++
++
++failure_iounmap_base:
++	if (master_dev)
++		rtcan_esd_pci_del_chan(master_dev);
++	pci_iounmap(pdev, base_addr);
++
++failure_iounmap_conf:
++	pci_iounmap(pdev, conf_addr);
++
++failure_release_pci:
++	pci_release_regions(pdev);
++
++failure:
++	return ret;
++}
++
++static void esd_pci_remove_one(struct pci_dev *pdev)
++{
++	struct rtcan_device *dev = pci_get_drvdata(pdev);
++	struct rtcan_esd_pci *board = (struct rtcan_esd_pci *)dev->board_priv;
++
++	if ((pdev->device == PCI_DEVICE_ID_PLX_9050) ||
++	    (pdev->device == PCI_DEVICE_ID_PLX_9030)) {
++		/* Disable interrupts in PLX9050*/
++		writel(0, board->conf_addr + INTCSR_OFFSET);
++	} else {
++		/* Disable interrupts in PLX9056*/
++		writel(0, board->conf_addr + INTCSR9056_OFFSET);
++	}
++
++	if (board->slave_dev)
++		rtcan_esd_pci_del_chan(board->slave_dev);
++	rtcan_esd_pci_del_chan(dev);
++
++
++	pci_iounmap(pdev, board->base_addr);
++	pci_iounmap(pdev, board->conf_addr);
++
++	pci_release_regions(pdev);
++	pci_disable_device(pdev);
++	pci_set_drvdata(pdev, NULL);
++}
++
++static struct pci_driver rtcan_esd_pci_driver = {
++	.name = RTCAN_DRV_NAME,
++	.id_table = esd_pci_tbl,
++	.probe = esd_pci_init_one,
++	.remove = esd_pci_remove_one,
++};
++
++module_pci_driver(rtcan_esd_pci_driver);
+--- linux/drivers/xenomai/can/sja1000/rtcan_ixxat_pci.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/can/sja1000/rtcan_ixxat_pci.c	2021-04-07 16:01:26.292635502 +0800
+@@ -0,0 +1,300 @@
++/*
++ * Copyright (C) 2006 Wolfgang Grandegger <wg@grandegger.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/delay.h>
++#include <linux/pci.h>
++#include <asm/io.h>
++
++#include <rtdm/driver.h>
++
++/* CAN device profile */
++#include <rtdm/can.h>
++#include <rtcan_dev.h>
++#include <rtcan_raw.h>
++#include <rtcan_internal.h>
++#include <rtcan_sja1000.h>
++#include <rtcan_sja1000_regs.h>
++
++#define RTCAN_DEV_NAME "rtcan%d"
++#define RTCAN_DRV_NAME "IXXAT-PCI-CAN"
++
++static char *ixxat_pci_board_name = "IXXAT-PCI";
++
++MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
++MODULE_DESCRIPTION("RTCAN board driver for IXXAT-PCI cards");
++MODULE_SUPPORTED_DEVICE("IXXAT-PCI card CAN controller");
++MODULE_LICENSE("GPL");
++
++struct rtcan_ixxat_pci
++{
++    struct pci_dev *pci_dev;
++    struct rtcan_device *slave_dev;
++    int conf_addr;
++    void __iomem *base_addr;
++};
++
++#define IXXAT_PCI_CAN_SYS_CLOCK (16000000 / 2)
++
++#define CHANNEL_SINGLE 0 /* this is a single channel device */
++#define CHANNEL_MASTER 1 /* multi channel device, this device is master */
++#define CHANNEL_SLAVE  2 /* multi channel device, this is slave */
++
++#define CHANNEL_OFFSET       0x200
++#define CHANNEL_MASTER_RESET 0x110
++#define CHANNEL_SLAVE_RESET  (CHANNEL_MASTER_RESET + CHANNEL_OFFSET)
++
++#define IXXAT_INTCSR_OFFSET  0x4c /* Offset in PLX9050 conf registers */
++#define IXXAT_INTCSR_SLAVE   0x41 /* LINT1 and PCI interrupt enabled */
++#define IXXAT_INTCSR_MASTER  0x08 /* LINT2 enabled */
++#define IXXAT_SJA_MOD_MASK   0xa1 /* Mask for reading dual/single channel */
++
++/* PCI vender, device and sub-device ID */
++#define IXXAT_PCI_VENDOR_ID  0x10b5
++#define IXXAT_PCI_DEVICE_ID  0x9050
++#define IXXAT_PCI_SUB_SYS_ID 0x2540
++
++#define IXXAT_CONF_PORT_SIZE 0x0080
++#define IXXAT_BASE_PORT_SIZE 0x0400
++
++static struct pci_device_id ixxat_pci_tbl[] = {
++	{IXXAT_PCI_VENDOR_ID, IXXAT_PCI_DEVICE_ID,
++	 IXXAT_PCI_VENDOR_ID, IXXAT_PCI_SUB_SYS_ID, 0, 0, 0},
++	{ }
++};
++MODULE_DEVICE_TABLE (pci, ixxat_pci_tbl);
++
++
++static u8 rtcan_ixxat_pci_read_reg(struct rtcan_device *dev, int port)
++{
++    struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv;
++    return readb(board->base_addr + port);
++}
++
++static void rtcan_ixxat_pci_write_reg(struct rtcan_device *dev, int port, u8 data)
++{
++    struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv;
++    writeb(data, board->base_addr + port);
++}
++
++static void rtcan_ixxat_pci_del_chan(struct rtcan_device *dev)
++{
++    struct rtcan_ixxat_pci *board;
++    u8 intcsr;
++
++    if (!dev)
++	return;
++
++    board = (struct rtcan_ixxat_pci *)dev->board_priv;
++
++    printk("Removing %s %s device %s\n",
++	   ixxat_pci_board_name, dev->ctrl_name, dev->name);
++
++    rtcan_sja1000_unregister(dev);
++
++    /* Disable PCI interrupts */
++    intcsr = inb(board->conf_addr + IXXAT_INTCSR_OFFSET);
++    if (board->slave_dev) {
++	intcsr &= ~IXXAT_INTCSR_MASTER;
++	outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET);
++	writeb(0x1, board->base_addr + CHANNEL_MASTER_RESET);
++	iounmap(board->base_addr);
++    } else {
++	intcsr &= ~IXXAT_INTCSR_SLAVE;
++	outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET);
++	writeb(0x1, board->base_addr + CHANNEL_SLAVE_RESET );
++    }
++    rtcan_dev_free(dev);
++}
++
++static int rtcan_ixxat_pci_add_chan(struct pci_dev *pdev,
++				    int channel,
++				    struct rtcan_device **master_dev,
++				    int conf_addr,
++				    void __iomem *base_addr)
++{
++    struct rtcan_device *dev;
++    struct rtcan_sja1000 *chip;
++    struct rtcan_ixxat_pci *board;
++    u8 intcsr;
++    int ret;
++
++    dev = rtcan_dev_alloc(sizeof(struct rtcan_sja1000),
++			  sizeof(struct rtcan_ixxat_pci));
++    if (dev == NULL)
++	return -ENOMEM;
++
++    chip = (struct rtcan_sja1000 *)dev->priv;
++    board = (struct rtcan_ixxat_pci *)dev->board_priv;
++
++    board->pci_dev = pdev;
++    board->conf_addr = conf_addr;
++    board->base_addr = base_addr;
++
++    if (channel == CHANNEL_SLAVE) {
++	struct rtcan_ixxat_pci *master_board =
++	    (struct rtcan_ixxat_pci *)(*master_dev)->board_priv;
++	master_board->slave_dev = dev;
++    }
++
++    dev->board_name = ixxat_pci_board_name;
++
++    chip->read_reg = rtcan_ixxat_pci_read_reg;
++    chip->write_reg = rtcan_ixxat_pci_write_reg;
++
++    /* Clock frequency in Hz */
++    dev->can_sys_clock = IXXAT_PCI_CAN_SYS_CLOCK;
++
++    /* Output control register */
++    chip->ocr = (SJA_OCR_MODE_NORMAL | SJA_OCR_TX0_INVERT |
++		 SJA_OCR_TX0_PUSHPULL | SJA_OCR_TX1_PUSHPULL);
++
++    /* Clock divider register */
++    chip->cdr = SJA_CDR_CAN_MODE;
++
++    strncpy(dev->name, RTCAN_DEV_NAME, IFNAMSIZ);
++
++    /* Enable PCI interrupts */
++    intcsr = inb(board->conf_addr + IXXAT_INTCSR_OFFSET);
++    if (channel == CHANNEL_SLAVE)
++	intcsr |= IXXAT_INTCSR_SLAVE;
++    else
++	intcsr |= IXXAT_INTCSR_MASTER;
++    outb(intcsr, board->conf_addr + IXXAT_INTCSR_OFFSET);
++
++    /* Register and setup interrupt handling */
++    chip->irq_flags = RTDM_IRQTYPE_SHARED;
++    chip->irq_num = pdev->irq;
++
++    RTCAN_DBG("%s: base_addr=0x%p conf_addr=%#x irq=%d ocr=%#x cdr=%#x\n",
++	      RTCAN_DRV_NAME, board->base_addr, board->conf_addr,
++	      chip->irq_num, chip->ocr, chip->cdr);
++
++    /* Register SJA1000 device */
++    ret = rtcan_sja1000_register(dev);
++    if (ret) {
++	printk(KERN_ERR "ERROR %d while trying to register SJA1000 device!\n",
++	       ret);
++	goto failure;
++    }
++
++    if (channel != CHANNEL_SLAVE)
++	*master_dev = dev;
++
++    return 0;
++
++ failure:
++    rtcan_dev_free(dev);
++    return ret;
++}
++
++static int ixxat_pci_init_one(struct pci_dev *pdev,
++			      const struct pci_device_id *ent)
++{
++    int ret, channel, conf_addr;
++    unsigned long addr;
++    void __iomem *base_addr;
++    struct rtcan_device *master_dev = NULL;
++
++    if (!rtdm_available())
++	return -ENODEV;
++
++    if ((ret = pci_enable_device (pdev)))
++	goto failure;
++
++    if ((ret = pci_request_regions(pdev, RTCAN_DRV_NAME)))
++	goto failure;
++
++    RTCAN_DBG("%s: Initializing device %04x:%04x:%04x\n",
++	      RTCAN_DRV_NAME, pdev->vendor, pdev->device,
++	      pdev->subsystem_device);
++
++    /* Enable memory and I/O space */
++    if ((ret = pci_write_config_word(pdev, 0x04, 0x3)))
++	goto failure_release_pci;
++
++    conf_addr = pci_resource_start(pdev, 1);
++
++    addr = pci_resource_start(pdev, 2);
++    base_addr = ioremap(addr, IXXAT_BASE_PORT_SIZE);
++    if (base_addr == 0) {
++	ret = -ENODEV;
++	goto failure_release_pci;
++    }
++
++    /* Check if second channel is available after reset */
++    writeb(0x1, base_addr + CHANNEL_MASTER_RESET);
++    writeb(0x1, base_addr + CHANNEL_SLAVE_RESET);
++    udelay(100);
++    if ( (readb(base_addr + CHANNEL_OFFSET + SJA_MOD) & IXXAT_SJA_MOD_MASK ) != 0x21 ||
++	readb(base_addr + CHANNEL_OFFSET + SJA_SR ) != 0x0c ||
++	readb(base_addr + CHANNEL_OFFSET + SJA_IR ) != 0xe0)
++	channel = CHANNEL_SINGLE;
++    else
++	channel = CHANNEL_MASTER;
++
++    if ((ret = rtcan_ixxat_pci_add_chan(pdev, channel, &master_dev,
++					conf_addr, base_addr)))
++	goto failure_iounmap;
++
++    if (channel != CHANNEL_SINGLE) {
++	channel = CHANNEL_SLAVE;
++	if ((ret = rtcan_ixxat_pci_add_chan(pdev, channel,
++					    &master_dev, conf_addr,
++					    base_addr + CHANNEL_OFFSET)))
++	    goto failure_iounmap;
++    }
++
++    pci_set_drvdata(pdev, master_dev);
++    return 0;
++
++failure_iounmap:
++    if (master_dev)
++	rtcan_ixxat_pci_del_chan(master_dev);
++    iounmap(base_addr);
++
++failure_release_pci:
++    pci_release_regions(pdev);
++
++failure:
++    return ret;
++}
++
++static void ixxat_pci_remove_one(struct pci_dev *pdev)
++{
++    struct rtcan_device *dev = pci_get_drvdata(pdev);
++    struct rtcan_ixxat_pci *board = (struct rtcan_ixxat_pci *)dev->board_priv;
++
++    if (board->slave_dev)
++	rtcan_ixxat_pci_del_chan(board->slave_dev);
++    rtcan_ixxat_pci_del_chan(dev);
++
++    pci_release_regions(pdev);
++    pci_disable_device(pdev);
++    pci_set_drvdata(pdev, NULL);
++}
++
++static struct pci_driver rtcan_ixxat_pci_driver = {
++	.name = RTCAN_DRV_NAME,
++	.id_table = ixxat_pci_tbl,
++	.probe = ixxat_pci_init_one,
++	.remove = ixxat_pci_remove_one,
++};
++
++module_pci_driver(rtcan_ixxat_pci_driver);
+--- linux/drivers/xenomai/ipc/iddp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/iddp.c	2021-04-07 16:01:26.283635515 +0800
+@@ -0,0 +1,988 @@
++/**
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/list.h>
++#include <linux/kernel.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/bufd.h>
++#include <cobalt/kernel/map.h>
++#include <rtdm/ipc.h>
++#include "internal.h"
++
++#define IDDP_SOCKET_MAGIC 0xa37a37a8
++
++struct iddp_message {
++	struct list_head next;
++	int from;
++	size_t rdoff;
++	size_t len;
++	char data[];
++};
++
++struct iddp_socket {
++	int magic;
++	struct sockaddr_ipc name;
++	struct sockaddr_ipc peer;
++	struct xnheap *bufpool;
++	struct xnheap privpool;
++	rtdm_waitqueue_t *poolwaitq;
++	rtdm_waitqueue_t privwaitq;
++	size_t poolsz;
++	rtdm_sem_t insem;
++	struct list_head inq;
++	u_long status;
++	xnhandle_t handle;
++	char label[XNOBJECT_NAME_LEN];
++	nanosecs_rel_t rx_timeout;
++	nanosecs_rel_t tx_timeout;
++	unsigned long stalls;	/* Buffer stall counter. */
++	struct rtipc_private *priv;
++};
++
++static struct sockaddr_ipc nullsa = {
++	.sipc_family = AF_RTIPC,
++	.sipc_port = -1
++};
++
++static struct xnmap *portmap;
++
++static rtdm_waitqueue_t poolwaitq;
++
++#define _IDDP_BINDING   0
++#define _IDDP_BOUND     1
++#define _IDDP_CONNECTED 2
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static char *__iddp_link_target(void *obj)
++{
++	struct iddp_socket *sk = obj;
++
++	return kasformat("%d", sk->name.sipc_port);
++}
++
++extern struct xnptree rtipc_ptree;
++
++static struct xnpnode_link __iddp_pnode = {
++	.node = {
++		.dirname = "iddp",
++		.root = &rtipc_ptree,
++		.ops = &xnregistry_vlink_ops,
++	},
++	.target = __iddp_link_target,
++};
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++static struct xnpnode_link __iddp_pnode = {
++	.node = {
++		.dirname = "iddp",
++	},
++};
++
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++static inline void __iddp_init_mbuf(struct iddp_message *mbuf, size_t len)
++{
++	mbuf->rdoff = 0;
++	mbuf->len = len;
++	INIT_LIST_HEAD(&mbuf->next);
++}
++
++static struct iddp_message *
++__iddp_alloc_mbuf(struct iddp_socket *sk, size_t len,
++		  nanosecs_rel_t timeout, int flags, int *pret)
++{
++	struct iddp_message *mbuf = NULL;
++	rtdm_toseq_t timeout_seq;
++	rtdm_lockctx_t s;
++	int ret = 0;
++
++	rtdm_toseq_init(&timeout_seq, timeout);
++
++	for (;;) {
++		mbuf = xnheap_alloc(sk->bufpool, len + sizeof(*mbuf));
++		if (mbuf) {
++			__iddp_init_mbuf(mbuf, len);
++			break;
++		}
++		if (flags & MSG_DONTWAIT) {
++			ret = -EAGAIN;
++			break;
++		}
++		/*
++		 * No luck, no buffer free. Wait for a buffer to be
++		 * released and retry. Admittedly, we might create a
++		 * thundering herd effect if many waiters put a lot of
++		 * memory pressure on the pool, but in this case, the
++		 * pool size should be adjusted.
++		 */
++		rtdm_waitqueue_lock(sk->poolwaitq, s);
++		++sk->stalls;
++		ret = rtdm_timedwait_locked(sk->poolwaitq, timeout, &timeout_seq);
++		rtdm_waitqueue_unlock(sk->poolwaitq, s);
++		if (unlikely(ret == -EIDRM))
++			ret = -ECONNRESET;
++		if (ret)
++			break;
++	}
++
++	*pret = ret;
++
++	return mbuf;
++}
++
++static void __iddp_free_mbuf(struct iddp_socket *sk,
++			     struct iddp_message *mbuf)
++{
++	xnheap_free(sk->bufpool, mbuf);
++	rtdm_waitqueue_broadcast(sk->poolwaitq);
++}
++
++static int iddp_socket(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state;
++
++	sk->magic = IDDP_SOCKET_MAGIC;
++	sk->name = nullsa;	/* Unbound */
++	sk->peer = nullsa;
++	sk->bufpool = &cobalt_heap;
++	sk->poolwaitq = &poolwaitq;
++	sk->poolsz = 0;
++	sk->status = 0;
++	sk->handle = 0;
++	sk->rx_timeout = RTDM_TIMEOUT_INFINITE;
++	sk->tx_timeout = RTDM_TIMEOUT_INFINITE;
++	sk->stalls = 0;
++	*sk->label = 0;
++	INIT_LIST_HEAD(&sk->inq);
++	rtdm_sem_init(&sk->insem, 0);
++	rtdm_waitqueue_init(&sk->privwaitq);
++	sk->priv = priv;
++
++	return 0;
++}
++
++static void iddp_close(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state;
++	struct iddp_message *mbuf;
++	rtdm_lockctx_t s;
++	void *poolmem;
++	u32 poolsz;
++
++	rtdm_sem_destroy(&sk->insem);
++	rtdm_waitqueue_destroy(&sk->privwaitq);
++
++	if (test_bit(_IDDP_BOUND, &sk->status)) {
++		if (sk->handle)
++			xnregistry_remove(sk->handle);
++		if (sk->name.sipc_port > -1) {
++			cobalt_atomic_enter(s);
++			xnmap_remove(portmap, sk->name.sipc_port);
++			cobalt_atomic_leave(s);
++		}
++		if (sk->bufpool != &cobalt_heap) {
++			poolmem = xnheap_get_membase(&sk->privpool);
++			poolsz = xnheap_get_size(&sk->privpool);
++			xnheap_destroy(&sk->privpool);
++			xnheap_vfree(poolmem);
++			return;
++		}
++	}
++
++	/* Send unread datagrams back to the system heap. */
++	while (!list_empty(&sk->inq)) {
++		mbuf = list_entry(sk->inq.next, struct iddp_message, next);
++		list_del(&mbuf->next);
++		xnheap_free(&cobalt_heap, mbuf);
++	}
++
++	kfree(sk);
++
++	return;
++}
++
++static ssize_t __iddp_recvmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      struct sockaddr_ipc *saddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state;
++	ssize_t maxlen, len, wrlen, vlen;
++	rtdm_toseq_t timeout_seq, *toseq;
++	int nvec, rdoff, ret, dofree;
++	struct iddp_message *mbuf;
++	nanosecs_rel_t timeout;
++	struct xnbufd bufd;
++	rtdm_lockctx_t s;
++
++	if (!test_bit(_IDDP_BOUND, &sk->status))
++		return -EAGAIN;
++
++	maxlen = rtdm_get_iov_flatlen(iov, iovlen);
++	if (maxlen == 0)
++		return 0;
++
++	if (flags & MSG_DONTWAIT) {
++		timeout = RTDM_TIMEOUT_NONE;
++		toseq = NULL;
++	} else {
++		timeout = sk->rx_timeout;
++		toseq = &timeout_seq;
++	}
++
++	/* We want to pick one buffer from the queue. */
++	
++	for (;;) {
++		ret = rtdm_sem_timeddown(&sk->insem, timeout, toseq);
++		if (unlikely(ret)) {
++			if (ret == -EIDRM)
++				return -ECONNRESET;
++			return ret;
++		}
++		/* We may have spurious wakeups. */
++		cobalt_atomic_enter(s);
++		if (!list_empty(&sk->inq))
++			break;
++		cobalt_atomic_leave(s);
++	}
++
++	/* Pull heading message from input queue. */
++	mbuf = list_entry(sk->inq.next, struct iddp_message, next);
++	rdoff = mbuf->rdoff;
++	len = mbuf->len - rdoff;
++	if (saddr) {
++		saddr->sipc_family = AF_RTIPC;
++		saddr->sipc_port = mbuf->from;
++	}
++	if (maxlen >= len) {
++		list_del(&mbuf->next);
++		dofree = 1;
++		if (list_empty(&sk->inq)) /* -> non-readable */
++			xnselect_signal(&priv->recv_block, 0);
++
++	} else {
++		/* Buffer is only partially read: repost. */
++		mbuf->rdoff += maxlen;
++		len = maxlen;
++		dofree = 0;
++	}
++
++	if (!dofree)
++		rtdm_sem_up(&sk->insem);
++
++	cobalt_atomic_leave(s);
++
++	/* Now, write "len" bytes from mbuf->data to the vector cells */
++	for (nvec = 0, wrlen = len; nvec < iovlen && wrlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			break;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		wrlen -= vlen;
++		rdoff += vlen;
++	}
++
++	if (dofree)
++		__iddp_free_mbuf(sk, mbuf);
++
++	return ret ?: len;
++}
++
++static ssize_t iddp_recvmsg(struct rtdm_fd *fd,
++			    struct user_msghdr *msg, int flags)
++{
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct sockaddr_ipc saddr;
++	ssize_t ret;
++
++	if (flags & ~MSG_DONTWAIT)
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen < sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++	} else if (msg->msg_namelen != 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __iddp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy the updated I/O vector back */
++	if (rtdm_put_iovec(fd, iov, msg, iov_fast))
++		return -EFAULT;
++
++	/* Copy the source address if required. */
++	if (msg->msg_name) {
++		if (rtipc_put_arg(fd, msg->msg_name, &saddr, sizeof(saddr)))
++			return -EFAULT;
++		msg->msg_namelen = sizeof(struct sockaddr_ipc);
++	}
++
++	return ret;
++}
++
++static ssize_t iddp_read(struct rtdm_fd *fd, void *buf, size_t len)
++{
++	struct iovec iov = { .iov_base = buf, .iov_len = len };
++
++	return __iddp_recvmsg(fd, &iov, 1, 0, NULL);
++}
++
++static ssize_t __iddp_sendmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      const struct sockaddr_ipc *daddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state, *rsk;
++	struct iddp_message *mbuf;
++	ssize_t len, rdlen, vlen;
++	int nvec, wroff, ret;
++	struct rtdm_fd *rfd;
++	struct xnbufd bufd;
++	rtdm_lockctx_t s;
++
++	len = rtdm_get_iov_flatlen(iov, iovlen);
++	if (len == 0)
++		return 0;
++
++	cobalt_atomic_enter(s);
++	rfd = xnmap_fetch_nocheck(portmap, daddr->sipc_port);
++	if (rfd && rtdm_fd_lock(rfd) < 0)
++		rfd = NULL;
++	cobalt_atomic_leave(s);
++	if (rfd == NULL)
++		return -ECONNRESET;
++
++	rsk = rtipc_fd_to_state(rfd);
++	if (!test_bit(_IDDP_BOUND, &rsk->status)) {
++		rtdm_fd_unlock(rfd);
++		return -ECONNREFUSED;
++	}
++
++	mbuf = __iddp_alloc_mbuf(rsk, len, sk->tx_timeout, flags, &ret);
++	if (unlikely(ret)) {
++		rtdm_fd_unlock(rfd);
++		return ret;
++	}
++
++	/* Now, move "len" bytes to mbuf->data from the vector cells */
++	for (nvec = 0, rdlen = len, wroff = 0;
++	     nvec < iovlen && rdlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_to_kmem(mbuf->data + wroff, &bufd, vlen);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_to_kmem(mbuf->data + wroff, &bufd, vlen);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			goto fail;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		rdlen -= vlen;
++		wroff += vlen;
++	}
++
++	cobalt_atomic_enter(s);
++
++	/*
++	 * CAUTION: we must remain atomic from the moment we signal
++	 * POLLIN, until sem_up has happened.
++	 */
++	if (list_empty(&rsk->inq)) /* -> readable */
++		xnselect_signal(&rsk->priv->recv_block, POLLIN);
++
++	mbuf->from = sk->name.sipc_port;
++
++	if (flags & MSG_OOB)
++		list_add(&mbuf->next, &rsk->inq);
++	else
++		list_add_tail(&mbuf->next, &rsk->inq);
++
++	rtdm_sem_up(&rsk->insem); /* Will resched. */
++
++	cobalt_atomic_leave(s);
++
++	rtdm_fd_unlock(rfd);
++
++	return len;
++
++fail:
++	__iddp_free_mbuf(rsk, mbuf);
++
++	rtdm_fd_unlock(rfd);
++
++	return ret;
++}
++
++static ssize_t iddp_sendmsg(struct rtdm_fd *fd,
++			    const struct user_msghdr *msg, int flags)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct iddp_socket *sk = priv->state;
++	struct sockaddr_ipc daddr;
++	ssize_t ret;
++
++	if (flags & ~(MSG_OOB | MSG_DONTWAIT))
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen != sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++
++		/* Fetch the destination address to send to. */
++		if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr)))
++			return -EFAULT;
++
++		if (daddr.sipc_port < 0 ||
++		    daddr.sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT)
++			return -EINVAL;
++	} else {
++		if (msg->msg_namelen != 0)
++			return -EINVAL;
++		daddr = sk->peer;
++		if (daddr.sipc_port < 0)
++			return -EDESTADDRREQ;
++	}
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __iddp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy updated I/O vector back */
++	return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret;
++}
++
++static ssize_t iddp_write(struct rtdm_fd *fd,
++			  const void *buf, size_t len)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov = { .iov_base = (void *)buf, .iov_len = len };
++	struct iddp_socket *sk = priv->state;
++
++	if (sk->peer.sipc_port < 0)
++		return -EDESTADDRREQ;
++
++	return __iddp_sendmsg(fd, &iov, 1, 0, &sk->peer);
++}
++
++static int __iddp_bind_socket(struct rtdm_fd *fd,
++			      struct sockaddr_ipc *sa)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state;
++	int ret = 0, port;
++	rtdm_lockctx_t s;
++	void *poolmem;
++	size_t poolsz;
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT)
++		return -EINVAL;
++
++	cobalt_atomic_enter(s);
++	if (test_bit(_IDDP_BOUND, &sk->status) ||
++	    __test_and_set_bit(_IDDP_BINDING, &sk->status))
++		ret = -EADDRINUSE;
++	cobalt_atomic_leave(s);
++	if (ret)
++		return ret;
++
++	/* Will auto-select a free port number if unspec (-1). */
++	port = sa->sipc_port;
++	cobalt_atomic_enter(s);
++	port = xnmap_enter(portmap, port, fd);
++	cobalt_atomic_leave(s);
++	if (port < 0)
++		return port == -EEXIST ? -EADDRINUSE : -ENOMEM;
++
++	sa->sipc_port = port;
++
++	/*
++	 * Allocate a local buffer pool if we were told to do so via
++	 * setsockopt() before we got there.
++	 */
++	poolsz = sk->poolsz;
++	if (poolsz > 0) {
++		poolsz = PAGE_ALIGN(poolsz);
++		poolmem = xnheap_vmalloc(poolsz);
++		if (poolmem == NULL) {
++			ret = -ENOMEM;
++			goto fail;
++		}
++
++		ret = xnheap_init(&sk->privpool, poolmem, poolsz);
++		if (ret) {
++			xnheap_vfree(poolmem);
++			goto fail;
++		}
++		xnheap_set_name(&sk->privpool, "iddp-pool@%d", port);
++		sk->poolwaitq = &sk->privwaitq;
++		sk->bufpool = &sk->privpool;
++	}
++
++	sk->name = *sa;
++	/* Set default destination if unset at binding time. */
++	if (sk->peer.sipc_port < 0)
++		sk->peer = *sa;
++
++	if (*sk->label) {
++		ret = xnregistry_enter(sk->label, sk,
++				       &sk->handle, &__iddp_pnode.node);
++		if (ret) {
++			if (poolsz > 0) {
++				xnheap_destroy(&sk->privpool);
++				xnheap_vfree(poolmem);
++			}
++			goto fail;
++		}
++	}
++
++	cobalt_atomic_enter(s);
++	__clear_bit(_IDDP_BINDING, &sk->status);
++	__set_bit(_IDDP_BOUND, &sk->status);
++	if (xnselect_signal(&priv->send_block, POLLOUT))
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++fail:
++	xnmap_remove(portmap, port);
++	clear_bit(_IDDP_BINDING, &sk->status);
++
++	return ret;
++}
++
++static int __iddp_connect_socket(struct iddp_socket *sk,
++				 struct sockaddr_ipc *sa)
++{
++	struct sockaddr_ipc _sa;
++	struct iddp_socket *rsk;
++	int ret, resched = 0;
++	rtdm_lockctx_t s;
++	xnhandle_t h;
++
++	if (sa == NULL) {
++		_sa = nullsa;
++		sa = &_sa;
++		goto set_assoc;
++	}
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_IDDP_NRPORT)
++		return -EINVAL;
++	/*
++	 * - If a valid sipc_port is passed in the [0..NRPORT-1] range,
++	 * it is used verbatim and the connection succeeds
++	 * immediately, regardless of whether the destination is
++	 * bound at the time of the call.
++	 *
++	 * - If sipc_port is -1 and a label was set via IDDP_LABEL,
++	 * connect() blocks for the requested amount of time (see
++	 * SO_RCVTIMEO) until a socket is bound to the same label.
++	 *
++	 * - If sipc_port is -1 and no label is given, the default
++	 * destination address is cleared, meaning that any subsequent
++	 * write() to the socket will return -EDESTADDRREQ, until a
++	 * valid destination address is set via connect() or bind().
++	 *
++	 * - In all other cases, -EINVAL is returned.
++	 */
++	if (sa->sipc_port < 0 && *sk->label) {
++		ret = xnregistry_bind(sk->label,
++				      sk->rx_timeout, XN_RELATIVE, &h);
++		if (ret)
++			return ret;
++
++		cobalt_atomic_enter(s);
++		rsk = xnregistry_lookup(h, NULL);
++		if (rsk == NULL || rsk->magic != IDDP_SOCKET_MAGIC)
++			ret = -EINVAL;
++		else {
++			/* Fetch labeled port number. */
++			sa->sipc_port = rsk->name.sipc_port;
++			resched = xnselect_signal(&sk->priv->send_block, POLLOUT);
++		}
++		cobalt_atomic_leave(s);
++		if (ret)
++			return ret;
++	} else if (sa->sipc_port < 0)
++		sa = &nullsa;
++set_assoc:
++	cobalt_atomic_enter(s);
++	if (!test_bit(_IDDP_BOUND, &sk->status))
++		/* Set default name. */
++		sk->name = *sa;
++	/* Set default destination. */
++	sk->peer = *sa;
++	if (sa->sipc_port < 0)
++		__clear_bit(_IDDP_CONNECTED, &sk->status);
++	else
++		__set_bit(_IDDP_CONNECTED, &sk->status);
++	if (resched)
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++}
++
++static int __iddp_setsockopt(struct iddp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	struct _rtdm_setsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	size_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptin(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen);
++			if (ret)
++				return ret;
++			sk->rx_timeout = rtipc_timeval_to_ns(&tv);
++			break;
++
++		case SO_SNDTIMEO:
++			ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen);
++			if (ret)
++				return ret;
++			sk->tx_timeout = rtipc_timeval_to_ns(&tv);
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_IDDP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case IDDP_POOLSZ:
++		ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen);
++		if (ret)
++			return ret;
++		if (len == 0)
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		/*
++		 * We may not do this more than once, and we have to
++		 * do this before the first binding.
++		 */
++		if (test_bit(_IDDP_BOUND, &sk->status) ||
++		    test_bit(_IDDP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else
++			sk->poolsz = len;
++		cobalt_atomic_leave(s);
++		break;
++
++	case IDDP_LABEL:
++		if (sopt.optlen < sizeof(plabel))
++			return -EINVAL;
++		if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel)))
++			return -EFAULT;
++		cobalt_atomic_enter(s);
++		/*
++		 * We may attach a label to a client socket which was
++		 * previously bound in IDDP.
++		 */
++		if (test_bit(_IDDP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else {
++			strcpy(sk->label, plabel.label);
++			sk->label[XNOBJECT_NAME_LEN-1] = 0;
++		}
++		cobalt_atomic_leave(s);
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __iddp_getsockopt(struct iddp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	struct _rtdm_getsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	socklen_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptout(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	ret = rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len));
++	if (ret)
++		return ret;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			rtipc_ns_to_timeval(&tv, sk->rx_timeout);
++			ret = rtipc_put_timeval(fd, sopt.optval, &tv, len);
++			if (ret)
++				return ret;
++			break;
++
++		case SO_SNDTIMEO:
++			rtipc_ns_to_timeval(&tv, sk->tx_timeout);
++			ret = rtipc_put_timeval(fd, sopt.optval, &tv, len);
++			if (ret)
++				return ret;
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_IDDP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case IDDP_LABEL:
++		if (len < sizeof(plabel))
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		strcpy(plabel.label, sk->label);
++		cobalt_atomic_leave(s);
++		if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel)))
++			return -EFAULT;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __iddp_ioctl(struct rtdm_fd *fd,
++			unsigned int request, void *arg)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct sockaddr_ipc saddr, *saddrp = &saddr;
++	struct iddp_socket *sk = priv->state;
++	int ret = 0;
++
++	switch (request) {
++
++	COMPAT_CASE(_RTIOC_CONNECT):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret)
++		  return ret;
++		ret = __iddp_connect_socket(sk, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_BIND):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret)
++			return ret;
++		if (saddrp == NULL)
++			return -EFAULT;
++		ret = __iddp_bind_socket(fd, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->name);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETPEERNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->peer);
++		break;
++
++	COMPAT_CASE(_RTIOC_SETSOCKOPT):
++		ret = __iddp_setsockopt(sk, fd, arg);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKOPT):
++		ret = __iddp_getsockopt(sk, fd, arg);
++		break;
++
++	case _RTIOC_LISTEN:
++	COMPAT_CASE(_RTIOC_ACCEPT):
++		ret = -EOPNOTSUPP;
++		break;
++
++	case _RTIOC_SHUTDOWN:
++		ret = -ENOTCONN;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int iddp_ioctl(struct rtdm_fd *fd,
++		      unsigned int request, void *arg)
++{
++	int ret;
++
++	switch (request) {
++	COMPAT_CASE(_RTIOC_BIND):
++		if (rtdm_in_rt_context())
++			return -ENOSYS;	/* Try downgrading to NRT */
++	default:
++		ret = __iddp_ioctl(fd, request, arg);
++	}
++
++	return ret;
++}
++
++static int iddp_init(void)
++{
++	portmap = xnmap_create(CONFIG_XENO_OPT_IDDP_NRPORT, 0, 0);
++	if (portmap == NULL)
++		return -ENOMEM;
++
++	rtdm_waitqueue_init(&poolwaitq);
++
++	return 0;
++}
++
++static void iddp_exit(void)
++{
++	rtdm_waitqueue_destroy(&poolwaitq);
++	xnmap_delete(portmap);
++}
++
++static unsigned int iddp_pollstate(struct rtdm_fd *fd) /* atomic */
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iddp_socket *sk = priv->state;
++	unsigned int mask = 0;
++	struct rtdm_fd *rfd;
++
++	if (test_bit(_IDDP_BOUND, &sk->status) && !list_empty(&sk->inq))
++		mask |= POLLIN;
++
++	/*
++	 * If the socket is connected, POLLOUT means that the peer
++	 * exists. Otherwise POLLOUT is always set, assuming the
++	 * client is likely to use explicit addressing in send
++	 * operations.
++	 *
++	 * If the peer exists, we still can't really know whether
++	 * writing to the socket would block as it depends on the
++	 * message size and other highly dynamic factors, so pretend
++	 * it would not.
++	 */
++	if (test_bit(_IDDP_CONNECTED, &sk->status)) {
++		rfd = xnmap_fetch_nocheck(portmap, sk->peer.sipc_port);
++		if (rfd)
++			mask |= POLLOUT;
++	} else
++		mask |= POLLOUT;
++
++	return mask;
++}
++
++struct rtipc_protocol iddp_proto_driver = {
++	.proto_name = "iddp",
++	.proto_statesz = sizeof(struct iddp_socket),
++	.proto_init = iddp_init,
++	.proto_exit = iddp_exit,
++	.proto_ops = {
++		.socket = iddp_socket,
++		.close = iddp_close,
++		.recvmsg = iddp_recvmsg,
++		.sendmsg = iddp_sendmsg,
++		.read = iddp_read,
++		.write = iddp_write,
++		.ioctl = iddp_ioctl,
++		.pollstate = iddp_pollstate,
++	}
++};
+--- linux/drivers/xenomai/ipc/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/Makefile	2021-04-07 16:01:26.278635522 +0800
+@@ -0,0 +1,8 @@
++
++obj-$(CONFIG_XENO_DRIVERS_RTIPC) += xeno_rtipc.o
++
++xeno_rtipc-y := rtipc.o
++
++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_XDDP) += xddp.o
++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_IDDP) += iddp.o
++xeno_rtipc-$(CONFIG_XENO_DRIVERS_RTIPC_BUFP) += bufp.o
+--- linux/drivers/xenomai/ipc/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/Kconfig	2021-04-07 16:01:26.274635528 +0800
+@@ -0,0 +1,81 @@
++menu "Real-time IPC drivers"
++
++config XENO_DRIVERS_RTIPC
++	tristate "RTIPC protocol family"
++	help
++
++	This driver provides the real-time IPC protocol family
++	(PF_RTIPC) over RTDM.
++
++config XENO_DRIVERS_RTIPC_XDDP
++	depends on XENO_DRIVERS_RTIPC
++	select XENO_OPT_PIPE
++	default y
++	bool "XDDP cross-domain datagram protocol"
++	help
++
++	Xenomai's XDDP protocol enables threads to exchange datagrams
++	across the Xenomai/Linux domain boundary, using "message
++	pipes".
++
++	Message pipes are bi-directional FIFO communication channels
++	allowing data exchange between real-time Xenomai threads and
++	regular (i.e. non real-time) user-space processes. Message
++	pipes are datagram-based and thus natively preserve message
++	boundaries, but they can also be used in byte stream mode when
++	sending from the real-time to the non real-time domain.
++
++	The maximum number of communication ports available in the
++	system can be configured using the XENO_OPT_PIPE_NRDEV option
++	from the Nucleus menu.
++
++config XENO_DRIVERS_RTIPC_IDDP
++	depends on XENO_DRIVERS_RTIPC
++	select XENO_OPT_MAP
++	default y
++	bool "IDDP intra-domain datagram protocol"
++	help
++
++	Xenomai's IDDP protocol enables real-time threads to exchange
++	datagrams within the Xenomai domain.
++
++config XENO_OPT_IDDP_NRPORT
++	depends on XENO_DRIVERS_RTIPC_IDDP
++	int "Number of IDDP communication ports"
++	default 32
++	help
++
++	This parameter defines the number of IDDP ports available in
++	the system for creating receiver endpoints. Port numbers range
++	from 0 to CONFIG_XENO_OPT_IDDP_NRPORT - 1.
++
++config XENO_DRIVERS_RTIPC_BUFP
++	depends on XENO_DRIVERS_RTIPC
++	select XENO_OPT_MAP
++	default y
++	bool "Buffer protocol"
++	help
++
++	The buffer protocol implements a byte-oriented, one-way
++	Producer-Consumer data path, which makes it a bit faster than
++	datagram-oriented protocols. All messages written are buffered
++	into a single memory area in strict FIFO order, until read by
++	the consumer.
++
++	This protocol prevents short writes, and only allows short
++	reads when a potential deadlock situation arises (i.e. readers
++	and writers waiting for each other indefinitely), which
++	usually means that the buffer size does not fit the use peer
++	threads are making from the protocol.
++
++config XENO_OPT_BUFP_NRPORT
++	depends on XENO_DRIVERS_RTIPC_BUFP
++	int "Number of BUFP communication ports"
++	default 32
++	help
++
++	This parameter defines the number of BUFP ports available in
++	the system for creating receiver endpoints. Port numbers range
++	from 0 to CONFIG_XENO_OPT_BUFP_NRPORT - 1.
++
++endmenu
+--- linux/drivers/xenomai/ipc/xddp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/xddp.c	2021-04-07 16:01:26.269635535 +0800
+@@ -0,0 +1,1130 @@
++/**
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/poll.h>
++#include <linux/slab.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/bufd.h>
++#include <cobalt/kernel/pipe.h>
++#include <rtdm/ipc.h>
++#include "internal.h"
++
++#define XDDP_SOCKET_MAGIC 0xa21a21a2
++
++struct xddp_message {
++	struct xnpipe_mh mh;
++	char data[];
++};
++
++struct xddp_socket {
++	int magic;
++	struct sockaddr_ipc name;
++	struct sockaddr_ipc peer;
++
++	int minor;
++	size_t poolsz;
++	xnhandle_t handle;
++	char label[XNOBJECT_NAME_LEN];
++	struct rtdm_fd *fd;			/* i.e. RTDM socket fd */
++
++	struct xddp_message *buffer;
++	int buffer_port;
++	struct xnheap *bufpool;
++	struct xnheap privpool;
++	size_t fillsz;
++	size_t curbufsz;	/* Current streaming buffer size */
++	u_long status;
++	rtdm_lock_t lock;
++
++	nanosecs_rel_t timeout;	/* connect()/recvmsg() timeout */
++	size_t reqbufsz;	/* Requested streaming buffer size */
++
++	int (*monitor)(struct rtdm_fd *fd, int event, long arg);
++	struct rtipc_private *priv;
++};
++
++static struct sockaddr_ipc nullsa = {
++	.sipc_family = AF_RTIPC,
++	.sipc_port = -1
++};
++
++static struct rtdm_fd *portmap[CONFIG_XENO_OPT_PIPE_NRDEV]; /* indexes RTDM fildes */
++
++#define _XDDP_SYNCWAIT  0
++#define _XDDP_ATOMIC    1
++#define _XDDP_BINDING   2
++#define _XDDP_BOUND     3
++#define _XDDP_CONNECTED 4
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static char *__xddp_link_target(void *obj)
++{
++	struct xddp_socket *sk = obj;
++
++	return kasformat("/dev/rtp%d", sk->minor);
++}
++
++extern struct xnptree rtipc_ptree;
++
++static struct xnpnode_link __xddp_pnode = {
++	.node = {
++		.dirname = "xddp",
++		.root = &rtipc_ptree,
++		.ops = &xnregistry_vlink_ops,
++	},
++	.target = __xddp_link_target,
++};
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++static struct xnpnode_link __xddp_pnode = {
++	.node = {
++		.dirname = "xddp",
++	},
++};
++
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++static void *__xddp_alloc_handler(size_t size, void *skarg) /* nklock free */
++{
++	struct xddp_socket *sk = skarg;
++	void *buf;
++
++	/* Try to allocate memory for the incoming message. */
++	buf = xnheap_alloc(sk->bufpool, size);
++	if (unlikely(buf == NULL)) {
++		if (sk->monitor)
++			sk->monitor(sk->fd, XDDP_EVTNOBUF, size);
++		if (size > xnheap_get_size(sk->bufpool))
++			buf = (void *)-1; /* Will never succeed. */
++	}
++
++	return buf;
++}
++
++static int __xddp_resize_streambuf(struct xddp_socket *sk) /* sk->lock held */
++{
++	if (sk->buffer)
++		xnheap_free(sk->bufpool, sk->buffer);
++
++	if (sk->reqbufsz == 0) {
++		sk->buffer = NULL;
++		sk->curbufsz = 0;
++		return 0;
++	}
++
++	sk->buffer = xnheap_alloc(sk->bufpool, sk->reqbufsz);
++	if (sk->buffer == NULL) {
++		sk->curbufsz = 0;
++		return -ENOMEM;
++	}
++
++	sk->curbufsz = sk->reqbufsz;
++
++	return 0;
++}
++
++static void __xddp_free_handler(void *buf, void *skarg) /* nklock free */
++{
++	struct xddp_socket *sk = skarg;
++	rtdm_lockctx_t s;
++
++	if (buf != sk->buffer) {
++		xnheap_free(sk->bufpool, buf);
++		return;
++	}
++
++	/* Reset the streaming buffer. */
++
++	rtdm_lock_get_irqsave(&sk->lock, s);
++
++	sk->fillsz = 0;
++	sk->buffer_port = -1;
++	__clear_bit(_XDDP_SYNCWAIT, &sk->status);
++	__clear_bit(_XDDP_ATOMIC, &sk->status);
++
++	/*
++	 * If a XDDP_BUFSZ request is pending, resize the streaming
++	 * buffer on-the-fly.
++	 */
++	if (unlikely(sk->curbufsz != sk->reqbufsz))
++		__xddp_resize_streambuf(sk);
++
++	rtdm_lock_put_irqrestore(&sk->lock, s);
++}
++
++static void __xddp_output_handler(struct xnpipe_mh *mh, void *skarg) /* nklock held */
++{
++	struct xddp_socket *sk = skarg;
++
++	if (sk->monitor)
++		sk->monitor(sk->fd, XDDP_EVTOUT, xnpipe_m_size(mh));
++}
++
++static int __xddp_input_handler(struct xnpipe_mh *mh, int retval, void *skarg) /* nklock held */
++{
++	struct xddp_socket *sk = skarg;
++
++	if (sk->monitor) {
++		if (retval == 0)
++			/* Callee may alter the return value passed to userland. */
++			retval = sk->monitor(sk->fd, XDDP_EVTIN, xnpipe_m_size(mh));
++		else if (retval == -EPIPE && mh == NULL)
++			sk->monitor(sk->fd, XDDP_EVTDOWN, 0);
++	}
++
++	if (retval == 0 &&
++	    (__xnpipe_pollstate(sk->minor) & POLLIN) != 0 &&
++	    xnselect_signal(&sk->priv->recv_block, POLLIN))
++		xnsched_run();
++
++	return retval;
++}
++
++static void __xddp_release_handler(void *skarg) /* nklock free */
++{
++	struct xddp_socket *sk = skarg;
++	void *poolmem;
++	u32 poolsz;
++
++	if (sk->bufpool == &sk->privpool) {
++		poolmem = xnheap_get_membase(&sk->privpool);
++		poolsz = xnheap_get_size(&sk->privpool);
++		xnheap_destroy(&sk->privpool);
++		xnheap_vfree(poolmem);
++	} else if (sk->buffer)
++		xnfree(sk->buffer);
++
++	kfree(sk);
++}
++
++static int xddp_socket(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct xddp_socket *sk = priv->state;
++
++	sk->magic = XDDP_SOCKET_MAGIC;
++	sk->name = nullsa;	/* Unbound */
++	sk->peer = nullsa;
++	sk->minor = -1;
++	sk->handle = 0;
++	*sk->label = 0;
++	sk->poolsz = 0;
++	sk->buffer = NULL;
++	sk->buffer_port = -1;
++	sk->bufpool = NULL;
++	sk->fillsz = 0;
++	sk->status = 0;
++	sk->timeout = RTDM_TIMEOUT_INFINITE;
++	sk->curbufsz = 0;
++	sk->reqbufsz = 0;
++	sk->monitor = NULL;
++	rtdm_lock_init(&sk->lock);
++	sk->priv = priv;
++
++	return 0;
++}
++
++static void xddp_close(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct xddp_socket *sk = priv->state;
++	rtdm_lockctx_t s;
++
++	sk->monitor = NULL;
++
++	if (!test_bit(_XDDP_BOUND, &sk->status))
++		return;
++
++	cobalt_atomic_enter(s);
++	portmap[sk->name.sipc_port] = NULL;
++	cobalt_atomic_leave(s);
++
++	if (sk->handle)
++		xnregistry_remove(sk->handle);
++
++	xnpipe_disconnect(sk->minor);
++}
++
++static ssize_t __xddp_recvmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      struct sockaddr_ipc *saddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct xddp_message *mbuf = NULL; /* Fake GCC */
++	struct xddp_socket *sk = priv->state;
++	ssize_t maxlen, len, wrlen, vlen;
++	nanosecs_rel_t timeout;
++	struct xnpipe_mh *mh;
++	int nvec, rdoff, ret;
++	struct xnbufd bufd;
++	spl_t s;
++
++	if (!test_bit(_XDDP_BOUND, &sk->status))
++		return -EAGAIN;
++
++	maxlen = rtdm_get_iov_flatlen(iov, iovlen);
++	if (maxlen == 0)
++		return 0;
++
++	timeout = (flags & MSG_DONTWAIT) ? RTDM_TIMEOUT_NONE : sk->timeout;
++	/* Pull heading message from the input queue. */
++	len = xnpipe_recv(sk->minor, &mh, timeout);
++	if (len < 0)
++		return len == -EIDRM ? 0 : len;
++	if (len > maxlen) {
++		ret = -ENOBUFS;
++		goto out;
++	}
++
++	mbuf = container_of(mh, struct xddp_message, mh);
++
++	if (saddr)
++		*saddr = sk->name;
++
++	/* Write "len" bytes from mbuf->data to the vector cells */
++	for (ret = 0, nvec = 0, rdoff = 0, wrlen = len;
++	     nvec < iovlen && wrlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_from_kmem(&bufd, mbuf->data + rdoff, vlen);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			goto out;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		wrlen -= vlen;
++		rdoff += vlen;
++	}
++out:
++	xnheap_free(sk->bufpool, mbuf);
++	cobalt_atomic_enter(s);
++	if ((__xnpipe_pollstate(sk->minor) & POLLIN) == 0 &&
++	    xnselect_signal(&priv->recv_block, 0))
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return ret ?: len;
++}
++
++static ssize_t xddp_recvmsg(struct rtdm_fd *fd,
++			    struct user_msghdr *msg, int flags)
++{
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct sockaddr_ipc saddr;
++	ssize_t ret;
++
++	if (flags & ~MSG_DONTWAIT)
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen < sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++	} else if (msg->msg_namelen != 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __xddp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy the updated I/O vector back */
++	if (rtdm_put_iovec(fd, iov, msg, iov_fast))
++		return -EFAULT;
++
++	/* Copy the source address if required. */
++	if (msg->msg_name) {
++		if (rtipc_put_arg(fd, msg->msg_name, &saddr, sizeof(saddr)))
++			return -EFAULT;
++		msg->msg_namelen = sizeof(struct sockaddr_ipc);
++	}
++
++	return ret;
++}
++
++static ssize_t xddp_read(struct rtdm_fd *fd, void *buf, size_t len)
++{
++	struct iovec iov = { .iov_base = buf, .iov_len = len };
++
++	return __xddp_recvmsg(fd, &iov, 1, 0, NULL);
++}
++
++static ssize_t __xddp_stream(struct xddp_socket *sk,
++			     int from, struct xnbufd *bufd)
++{
++	struct xddp_message *mbuf;
++	size_t fillptr, rembytes;
++	rtdm_lockctx_t s;
++	ssize_t outbytes;
++	int ret;
++
++	/*
++	 * xnpipe_msend() and xnpipe_mfixup() routines will only grab
++	 * the nklock directly or indirectly, so holding our socket
++	 * lock across those calls is fine.
++	 */
++	rtdm_lock_get_irqsave(&sk->lock, s);
++
++	/*
++	 * There are two cases in which we must remove the cork
++	 * unconditionally and send the incoming data as a standalone
++	 * datagram: the destination port does not support streaming,
++	 * or its streaming buffer is already filled with data issued
++	 * from another port.
++	 */
++	if (sk->curbufsz == 0 ||
++	    (sk->buffer_port >= 0 && sk->buffer_port != from)) {
++		/* This will end up into a standalone datagram. */
++		outbytes = 0;
++		goto out;
++	}
++
++	mbuf = sk->buffer;
++	rembytes = sk->curbufsz - sizeof(*mbuf) - sk->fillsz;
++	outbytes = bufd->b_len > rembytes ? rembytes : bufd->b_len;
++	if (likely(outbytes > 0)) {
++	repeat:
++		/* Mark the beginning of a should-be-atomic section. */
++		__set_bit(_XDDP_ATOMIC, &sk->status);
++		fillptr = sk->fillsz;
++		sk->fillsz += outbytes;
++
++		rtdm_lock_put_irqrestore(&sk->lock, s);
++		ret = xnbufd_copy_to_kmem(mbuf->data + fillptr,
++					  bufd, outbytes);
++		rtdm_lock_get_irqsave(&sk->lock, s);
++
++		if (ret < 0) {
++			outbytes = ret;
++			__clear_bit(_XDDP_ATOMIC, &sk->status);
++			goto out;
++		}
++
++		/* We haven't been atomic, let's try again. */
++		if (!__test_and_clear_bit(_XDDP_ATOMIC, &sk->status))
++			goto repeat;
++
++		if (__test_and_set_bit(_XDDP_SYNCWAIT, &sk->status))
++			outbytes = xnpipe_mfixup(sk->minor,
++						 &mbuf->mh, outbytes);
++		else {
++			sk->buffer_port = from;
++			outbytes = xnpipe_send(sk->minor, &mbuf->mh,
++					       outbytes + sizeof(*mbuf),
++					       XNPIPE_NORMAL);
++			if (outbytes > 0)
++				outbytes -= sizeof(*mbuf);
++		}
++	}
++
++out:
++	rtdm_lock_put_irqrestore(&sk->lock, s);
++
++	return outbytes;
++}
++
++static ssize_t __xddp_sendmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      const struct sockaddr_ipc *daddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	ssize_t len, rdlen, wrlen, vlen, ret, sublen;
++	struct xddp_socket *sk = priv->state;
++	struct xddp_message *mbuf;
++	struct xddp_socket *rsk;
++	struct rtdm_fd *rfd;
++	int nvec, to, from;
++	struct xnbufd bufd;
++	rtdm_lockctx_t s;
++
++	len = rtdm_get_iov_flatlen(iov, iovlen);
++	if (len == 0)
++		return 0;
++
++	from = sk->name.sipc_port;
++	to = daddr->sipc_port;
++
++	cobalt_atomic_enter(s);
++	rfd = portmap[to];
++	if (rfd && rtdm_fd_lock(rfd) < 0)
++		rfd = NULL;
++	cobalt_atomic_leave(s);
++
++	if (rfd == NULL)
++		return -ECONNRESET;
++
++	rsk = rtipc_fd_to_state(rfd);
++	if (!test_bit(_XDDP_BOUND, &rsk->status)) {
++		rtdm_fd_unlock(rfd);
++		return -ECONNREFUSED;
++	}
++
++	sublen = len;
++	nvec = 0;
++
++	/*
++	 * If active, the streaming buffer is already pending on the
++	 * output queue, so we basically have nothing to do during a
++	 * MSG_MORE -> MSG_NONE transition. Therefore, we only have to
++	 * take care of filling that buffer when MSG_MORE is
++	 * given. Yummie.
++	 */
++	if (flags & MSG_MORE) {
++		for (rdlen = sublen, wrlen = 0;
++		     nvec < iovlen && rdlen > 0; nvec++) {
++			if (iov[nvec].iov_len == 0)
++				continue;
++			vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen;
++			if (rtdm_fd_is_user(fd)) {
++				xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++				ret = __xddp_stream(rsk, from, &bufd);
++				xnbufd_unmap_uread(&bufd);
++			} else {
++				xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++				ret = __xddp_stream(rsk, from, &bufd);
++				xnbufd_unmap_kread(&bufd);
++			}
++			if (ret < 0)
++				goto fail_unlock;
++			wrlen += ret;
++			rdlen -= ret;
++			iov[nvec].iov_base += ret;
++			iov[nvec].iov_len -= ret;
++			/*
++			 * In case of a short write to the streaming
++			 * buffer, send the unsent part as a
++			 * standalone datagram.
++			 */
++			if (ret < vlen) {
++				sublen = rdlen;
++				goto nostream;
++			}
++		}
++		len = wrlen;
++		goto done;
++	}
++
++nostream:
++	mbuf = xnheap_alloc(rsk->bufpool, sublen + sizeof(*mbuf));
++	if (unlikely(mbuf == NULL)) {
++		ret = -ENOMEM;
++		goto fail_unlock;
++	}
++
++	/*
++	 * Move "sublen" bytes to mbuf->data from the vector cells
++	 */
++	for (rdlen = sublen, wrlen = 0; nvec < iovlen && rdlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_to_kmem(mbuf->data + wrlen, &bufd, vlen);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = xnbufd_copy_to_kmem(mbuf->data + wrlen, &bufd, vlen);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			goto fail_freebuf;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		rdlen -= vlen;
++		wrlen += vlen;
++	}
++
++	ret = xnpipe_send(rsk->minor, &mbuf->mh,
++			  sublen + sizeof(*mbuf),
++			  (flags & MSG_OOB) ?
++			  XNPIPE_URGENT : XNPIPE_NORMAL);
++
++	if (unlikely(ret < 0)) {
++	fail_freebuf:
++		xnheap_free(rsk->bufpool, mbuf);
++	fail_unlock:
++		rtdm_fd_unlock(rfd);
++		return ret;
++	}
++done:
++	rtdm_fd_unlock(rfd);
++
++	return len;
++}
++
++static ssize_t xddp_sendmsg(struct rtdm_fd *fd,
++			    const struct user_msghdr *msg, int flags)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct xddp_socket *sk = priv->state;
++	struct sockaddr_ipc daddr;
++	ssize_t ret;
++
++	/*
++	 * We accept MSG_DONTWAIT, but do not care about it, since
++	 * writing to the real-time endpoint of a message pipe must be
++	 * a non-blocking operation.
++	 */
++	if (flags & ~(MSG_MORE | MSG_OOB | MSG_DONTWAIT))
++		return -EINVAL;
++
++	/*
++	 * MSG_MORE and MSG_OOB are mutually exclusive in our
++	 * implementation.
++	 */
++	if ((flags & (MSG_MORE | MSG_OOB)) == (MSG_MORE | MSG_OOB))
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen != sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++
++		/* Fetch the destination address to send to. */
++		if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr)))
++			return -EFAULT;
++
++		if (daddr.sipc_port < 0 ||
++		    daddr.sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV)
++			return -EINVAL;
++	} else {
++		if (msg->msg_namelen != 0)
++			return -EINVAL;
++		daddr = sk->peer;
++		if (daddr.sipc_port < 0)
++			return -EDESTADDRREQ;
++	}
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __xddp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy updated I/O vector back */
++	return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret;
++}
++
++static ssize_t xddp_write(struct rtdm_fd *fd,
++			  const void *buf, size_t len)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov = { .iov_base = (void *)buf, .iov_len = len };
++	struct xddp_socket *sk = priv->state;
++
++	if (sk->peer.sipc_port < 0)
++		return -EDESTADDRREQ;
++
++	return __xddp_sendmsg(fd, &iov, 1, 0, &sk->peer);
++}
++
++static int __xddp_bind_socket(struct rtipc_private *priv,
++			      struct sockaddr_ipc *sa)
++{
++	struct xddp_socket *sk = priv->state;
++	struct xnpipe_operations ops;
++	rtdm_lockctx_t s;
++	size_t poolsz;
++	void *poolmem;
++	int ret = 0;
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	/* Allow special port -1 for auto-selection. */
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV)
++		return -EINVAL;
++
++	cobalt_atomic_enter(s);
++	if (test_bit(_XDDP_BOUND, &sk->status) ||
++	    __test_and_set_bit(_XDDP_BINDING, &sk->status))
++		ret = -EADDRINUSE;
++	cobalt_atomic_leave(s);
++	if (ret)
++		return ret;
++
++	poolsz = sk->poolsz;
++	if (poolsz > 0) {
++		poolsz = PAGE_ALIGN(poolsz);
++		poolsz += PAGE_ALIGN(sk->reqbufsz);
++		poolmem = xnheap_vmalloc(poolsz);
++		if (poolmem == NULL) {
++			ret = -ENOMEM;
++			goto fail;
++		}
++
++		ret = xnheap_init(&sk->privpool, poolmem, poolsz);
++		if (ret) {
++			xnheap_vfree(poolmem);
++			goto fail;
++		}
++
++		sk->bufpool = &sk->privpool;
++	} else
++		sk->bufpool = &cobalt_heap;
++
++	if (sk->reqbufsz > 0) {
++		sk->buffer = xnheap_alloc(sk->bufpool, sk->reqbufsz);
++		if (sk->buffer == NULL) {
++			ret = -ENOMEM;
++			goto fail_freeheap;
++		}
++		sk->curbufsz = sk->reqbufsz;
++	}
++
++	sk->fd = rtdm_private_to_fd(priv);
++
++	ops.output = &__xddp_output_handler;
++	ops.input = &__xddp_input_handler;
++	ops.alloc_ibuf = &__xddp_alloc_handler;
++	ops.free_ibuf = &__xddp_free_handler;
++	ops.free_obuf = &__xddp_free_handler;
++	ops.release = &__xddp_release_handler;
++
++	ret = xnpipe_connect(sa->sipc_port, &ops, sk);
++	if (ret < 0) {
++		if (ret == -EBUSY)
++			ret = -EADDRINUSE;
++	fail_freeheap:
++		if (poolsz > 0) {
++			xnheap_destroy(&sk->privpool);
++			xnheap_vfree(poolmem);
++		}
++	fail:
++		clear_bit(_XDDP_BINDING, &sk->status);
++		return ret;
++	}
++
++	sk->minor = ret;
++	sa->sipc_port = ret;
++	sk->name = *sa;
++	/* Set default destination if unset at binding time. */
++	if (sk->peer.sipc_port < 0)
++		sk->peer = *sa;
++
++	if (poolsz > 0)
++		xnheap_set_name(sk->bufpool, "xddp-pool@%d", sa->sipc_port);
++
++	if (*sk->label) {
++		ret = xnregistry_enter(sk->label, sk, &sk->handle,
++				       &__xddp_pnode.node);
++		if (ret) {
++			/* The release handler will cleanup the pool for us. */
++			xnpipe_disconnect(sk->minor);
++			return ret;
++		}
++	}
++
++	cobalt_atomic_enter(s);
++	portmap[sk->minor] = rtdm_private_to_fd(priv);
++	__clear_bit(_XDDP_BINDING, &sk->status);
++	__set_bit(_XDDP_BOUND, &sk->status);
++	if (xnselect_signal(&priv->send_block, POLLOUT))
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++}
++
++static int __xddp_connect_socket(struct xddp_socket *sk,
++				 struct sockaddr_ipc *sa)
++{
++	struct sockaddr_ipc _sa;
++	struct xddp_socket *rsk;
++	int ret, resched = 0;
++	rtdm_lockctx_t s;
++	xnhandle_t h;
++
++	if (sa == NULL) {
++		_sa = nullsa;
++		sa = &_sa;
++		goto set_assoc;
++	}
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_PIPE_NRDEV)
++		return -EINVAL;
++	/*
++	 * - If a valid sipc_port is passed in the [0..NRDEV-1] range,
++	 * it is used verbatim and the connection succeeds
++	 * immediately, regardless of whether the destination is
++	 * bound at the time of the call.
++	 *
++	 * - If sipc_port is -1 and a label was set via XDDP_LABEL,
++	 * connect() blocks for the requested amount of time (see
++	 * SO_RCVTIMEO) until a socket is bound to the same label.
++	 *
++	 * - If sipc_port is -1 and no label is given, the default
++	 * destination address is cleared, meaning that any subsequent
++	 * write() to the socket will return -EDESTADDRREQ, until a
++	 * valid destination address is set via connect() or bind().
++	 *
++	 * - In all other cases, -EINVAL is returned.
++	 */
++	if (sa->sipc_port < 0 && *sk->label) {
++		ret = xnregistry_bind(sk->label,
++				      sk->timeout, XN_RELATIVE, &h);
++		if (ret)
++			return ret;
++
++		cobalt_atomic_enter(s);
++		rsk = xnregistry_lookup(h, NULL);
++		if (rsk == NULL || rsk->magic != XDDP_SOCKET_MAGIC)
++			ret = -EINVAL;
++		else {
++			/* Fetch labeled port number. */
++			sa->sipc_port = rsk->minor;
++			resched = xnselect_signal(&sk->priv->send_block, POLLOUT);
++		}
++		cobalt_atomic_leave(s);
++		if (ret)
++			return ret;
++	} else if (sa->sipc_port < 0)
++		sa = &nullsa;
++set_assoc:
++	cobalt_atomic_enter(s);
++	if (!test_bit(_XDDP_BOUND, &sk->status))
++		/* Set default name. */
++		sk->name = *sa;
++	/* Set default destination. */
++	sk->peer = *sa;
++	if (sa->sipc_port < 0)
++		__clear_bit(_XDDP_CONNECTED, &sk->status);
++	else
++		__set_bit(_XDDP_CONNECTED, &sk->status);
++	if (resched)
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++}
++
++static int __xddp_setsockopt(struct xddp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	int (*monitor)(struct rtdm_fd *fd, int event, long arg);
++	struct _rtdm_setsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	size_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptin(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen);
++			if (ret)
++				return ret;
++			sk->timeout = rtipc_timeval_to_ns(&tv);
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_XDDP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case XDDP_BUFSZ:
++		ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen);
++		if (ret)
++			return ret;
++		if (len > 0) {
++			len += sizeof(struct xddp_message);
++			if (sk->bufpool &&
++			    len > xnheap_get_size(sk->bufpool)) {
++				return -EINVAL;
++			}
++		}
++		rtdm_lock_get_irqsave(&sk->lock, s);
++		sk->reqbufsz = len;
++		if (len != sk->curbufsz &&
++		    !test_bit(_XDDP_SYNCWAIT, &sk->status) &&
++		    test_bit(_XDDP_BOUND, &sk->status))
++			ret = __xddp_resize_streambuf(sk);
++		rtdm_lock_put_irqrestore(&sk->lock, s);
++		break;
++
++	case XDDP_POOLSZ:
++		ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen);
++		if (ret)
++			return ret;
++		if (len == 0)
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		if (test_bit(_XDDP_BOUND, &sk->status) ||
++		    test_bit(_XDDP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else
++			sk->poolsz = len;
++		cobalt_atomic_leave(s);
++		break;
++
++	case XDDP_MONITOR:
++		/* Monitoring is available from kernel-space only. */
++		if (rtdm_fd_is_user(fd))
++			return -EPERM;
++		if (sopt.optlen != sizeof(monitor))
++			return -EINVAL;
++		if (rtipc_get_arg(NULL, &monitor, sopt.optval, sizeof(monitor)))
++			return -EFAULT;
++		sk->monitor = monitor;
++		break;
++
++	case XDDP_LABEL:
++		if (sopt.optlen < sizeof(plabel))
++			return -EINVAL;
++		if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel)))
++			return -EFAULT;
++		cobalt_atomic_enter(s);
++		if (test_bit(_XDDP_BOUND, &sk->status) ||
++		    test_bit(_XDDP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else {
++			strcpy(sk->label, plabel.label);
++			sk->label[XNOBJECT_NAME_LEN-1] = 0;
++		}
++		cobalt_atomic_leave(s);
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __xddp_getsockopt(struct xddp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	struct _rtdm_getsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	socklen_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptout(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	if (rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len)))
++		return -EFAULT;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			rtipc_ns_to_timeval(&tv, sk->timeout);
++			ret = rtipc_put_timeval(fd, sopt.optval, &tv, len);
++			if (ret)
++				return ret;
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_XDDP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case XDDP_LABEL:
++		if (len < sizeof(plabel))
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		strcpy(plabel.label, sk->label);
++		cobalt_atomic_leave(s);
++		if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel)))
++			return -EFAULT;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __xddp_ioctl(struct rtdm_fd *fd,
++			unsigned int request, void *arg)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct sockaddr_ipc saddr, *saddrp = &saddr;
++	struct xddp_socket *sk = priv->state;
++	int ret = 0;
++
++	switch (request) {
++
++	COMPAT_CASE(_RTIOC_CONNECT):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret == 0)
++			ret = __xddp_connect_socket(sk, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_BIND):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret)
++			return ret;
++		if (saddrp == NULL)
++			return -EFAULT;
++		ret = __xddp_bind_socket(priv, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->name);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETPEERNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->peer);
++		break;
++
++	COMPAT_CASE(_RTIOC_SETSOCKOPT):
++		ret = __xddp_setsockopt(sk, fd, arg);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKOPT):
++		ret = __xddp_getsockopt(sk, fd, arg);
++		break;
++
++	case _RTIOC_LISTEN:
++	COMPAT_CASE(_RTIOC_ACCEPT):
++		ret = -EOPNOTSUPP;
++		break;
++
++	case _RTIOC_SHUTDOWN:
++		ret = -ENOTCONN;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int xddp_ioctl(struct rtdm_fd *fd,
++		      unsigned int request, void *arg)
++{
++	int ret;
++
++	switch (request) {
++	COMPAT_CASE(_RTIOC_BIND):
++		if (rtdm_in_rt_context())
++			return -ENOSYS;	/* Try downgrading to NRT */
++	default:
++		ret = __xddp_ioctl(fd, request, arg);
++	}
++
++	return ret;
++}
++
++static unsigned int xddp_pollstate(struct rtdm_fd *fd) /* atomic */
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct xddp_socket *sk = priv->state, *rsk;
++	unsigned int mask = 0, pollstate;
++	struct rtdm_fd *rfd;
++
++	pollstate = __xnpipe_pollstate(sk->minor);
++	if (test_bit(_XDDP_BOUND, &sk->status))
++		mask |= (pollstate & POLLIN);
++
++	/*
++	 * If the socket is connected, POLLOUT means that the peer
++	 * exists, is bound and can receive data. Otherwise POLLOUT is
++	 * always set, assuming the client is likely to use explicit
++	 * addressing in send operations.
++	 */
++	if (test_bit(_XDDP_CONNECTED, &sk->status)) {
++		rfd = portmap[sk->peer.sipc_port];
++		if (rfd) {
++			rsk = rtipc_fd_to_state(rfd);
++			mask |= (pollstate & POLLOUT);
++		}
++	} else
++		mask |= POLLOUT;
++
++	return mask;
++}
++
++struct rtipc_protocol xddp_proto_driver = {
++	.proto_name = "xddp",
++	.proto_statesz = sizeof(struct xddp_socket),
++	.proto_ops = {
++		.socket = xddp_socket,
++		.close = xddp_close,
++		.recvmsg = xddp_recvmsg,
++		.sendmsg = xddp_sendmsg,
++		.read = xddp_read,
++		.write = xddp_write,
++		.ioctl = xddp_ioctl,
++		.pollstate = xddp_pollstate,
++	}
++};
+--- linux/drivers/xenomai/ipc/bufp.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/bufp.c	2021-04-07 16:01:26.264635542 +0800
+@@ -0,0 +1,1100 @@
++/**
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/list.h>
++#include <linux/kernel.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <cobalt/kernel/heap.h>
++#include <cobalt/kernel/map.h>
++#include <cobalt/kernel/bufd.h>
++#include <rtdm/ipc.h>
++#include "internal.h"
++
++#define BUFP_SOCKET_MAGIC 0xa61a61a6
++
++struct bufp_socket {
++	int magic;
++	struct sockaddr_ipc name;
++	struct sockaddr_ipc peer;
++
++	void *bufmem;
++	size_t bufsz;
++	u_long status;
++	xnhandle_t handle;
++	char label[XNOBJECT_NAME_LEN];
++
++	off_t rdoff;
++	off_t rdrsvd;
++	int rdsem;
++	off_t wroff;
++	off_t wrrsvd;
++	int wrsem;
++	size_t fillsz;
++	rtdm_event_t i_event;
++	rtdm_event_t o_event;
++
++	nanosecs_rel_t rx_timeout;
++	nanosecs_rel_t tx_timeout;
++
++	struct rtipc_private *priv;
++};
++
++struct bufp_wait_context {
++	struct rtipc_wait_context wc;
++	size_t len;
++	struct bufp_socket *sk;
++};
++
++static struct sockaddr_ipc nullsa = {
++	.sipc_family = AF_RTIPC,
++	.sipc_port = -1
++};
++
++static struct xnmap *portmap;
++
++#define _BUFP_BINDING   0
++#define _BUFP_BOUND     1
++#define _BUFP_CONNECTED 2
++
++#ifdef CONFIG_XENO_OPT_VFILE
++
++static char *__bufp_link_target(void *obj)
++{
++	struct bufp_socket *sk = obj;
++
++	return kasformat("%d", sk->name.sipc_port);
++}
++
++extern struct xnptree rtipc_ptree;
++
++static struct xnpnode_link __bufp_pnode = {
++	.node = {
++		.dirname = "bufp",
++		.root = &rtipc_ptree,
++		.ops = &xnregistry_vlink_ops,
++	},
++	.target = __bufp_link_target,
++};
++
++#else /* !CONFIG_XENO_OPT_VFILE */
++
++static struct xnpnode_link __bufp_pnode = {
++	.node = {
++		.dirname = "bufp",
++	},
++};
++
++#endif /* !CONFIG_XENO_OPT_VFILE */
++
++static int bufp_socket(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct bufp_socket *sk = priv->state;
++
++	sk->magic = BUFP_SOCKET_MAGIC;
++	sk->name = nullsa;	/* Unbound */
++	sk->peer = nullsa;
++	sk->bufmem = NULL;
++	sk->bufsz = 0;
++	sk->rdoff = 0;
++	sk->wroff = 0;
++	sk->fillsz = 0;
++	sk->rdrsvd = 0;
++	sk->wrrsvd = 0;
++	sk->status = 0;
++	sk->handle = 0;
++	sk->rx_timeout = RTDM_TIMEOUT_INFINITE;
++	sk->tx_timeout = RTDM_TIMEOUT_INFINITE;
++	*sk->label = 0;
++	rtdm_event_init(&sk->i_event, 0);
++	rtdm_event_init(&sk->o_event, 0);
++	sk->priv = priv;
++
++	return 0;
++}
++
++static void bufp_close(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct bufp_socket *sk = priv->state;
++	rtdm_lockctx_t s;
++
++	rtdm_event_destroy(&sk->i_event);
++	rtdm_event_destroy(&sk->o_event);
++
++	if (test_bit(_BUFP_BOUND, &sk->status)) {
++		if (sk->name.sipc_port > -1) {
++			cobalt_atomic_enter(s);
++			xnmap_remove(portmap, sk->name.sipc_port);
++			cobalt_atomic_leave(s);
++		}
++
++		if (sk->handle)
++			xnregistry_remove(sk->handle);
++
++		if (sk->bufmem)
++			xnheap_vfree(sk->bufmem);
++	}
++
++	kfree(sk);
++}
++
++static ssize_t __bufp_readbuf(struct bufp_socket *sk,
++			      struct xnbufd *bufd,
++			      int flags)
++{
++	struct bufp_wait_context wait, *bufwc;
++	struct rtipc_wait_context *wc;
++	struct xnthread *waiter;
++	size_t rbytes, n, avail;
++	ssize_t len, ret, xret;
++	rtdm_toseq_t toseq;
++	rtdm_lockctx_t s;
++	off_t rdoff;
++	int resched;
++
++	len = bufd->b_len;
++
++	rtdm_toseq_init(&toseq, sk->rx_timeout);
++
++	cobalt_atomic_enter(s);
++redo:
++	for (;;) {
++		/*
++		 * We should be able to read a complete message of the
++		 * requested length, or block.
++		 */
++		avail = sk->fillsz - sk->rdrsvd;
++		if (avail < len)
++			goto wait;
++
++		/* Reserve a read slot into the circular buffer. */
++		rdoff = sk->rdoff;
++		sk->rdoff = (rdoff + len) % sk->bufsz;
++		sk->rdrsvd += len;
++		sk->rdsem++;
++		rbytes = ret = len;
++
++		do {
++			if (rdoff + rbytes > sk->bufsz)
++				n = sk->bufsz - rdoff;
++			else
++				n = rbytes;
++			/*
++			 * Drop the lock before copying data to
++			 * user. The read slot is consumed in any
++			 * case: the non-copied portion of the message
++			 * is lost on bad write.
++			 */
++			cobalt_atomic_leave(s);
++			xret = xnbufd_copy_from_kmem(bufd, sk->bufmem + rdoff, n);
++			cobalt_atomic_enter(s);
++			if (xret < 0) {
++				ret = -EFAULT;
++				break;
++			}
++
++			rbytes -= n;
++			rdoff = (rdoff + n) % sk->bufsz;
++		} while (rbytes > 0);
++
++		if (--sk->rdsem > 0)
++			goto out;
++
++		resched = 0;
++		if (sk->fillsz == sk->bufsz) /* -> becomes writable */
++			resched |= xnselect_signal(&sk->priv->send_block, POLLOUT);
++
++		sk->fillsz -= sk->rdrsvd;
++		sk->rdrsvd = 0;
++
++		if (sk->fillsz == 0) /* -> becomes non-readable */
++			resched |= xnselect_signal(&sk->priv->recv_block, 0);
++
++		/*
++		 * Wake up all threads pending on the output wait
++		 * queue, if we freed enough room for the leading one
++		 * to post its message.
++		 */
++		waiter = rtipc_peek_wait_head(&sk->o_event);
++		if (waiter == NULL)
++			goto out;
++
++		wc = rtipc_get_wait_context(waiter);
++		XENO_BUG_ON(COBALT, wc == NULL);
++		bufwc = container_of(wc, struct bufp_wait_context, wc);
++		if (bufwc->len + sk->fillsz <= sk->bufsz)
++			/* This call rescheds internally. */
++			rtdm_event_pulse(&sk->o_event);
++		else if (resched)
++			xnsched_run();
++		/*
++		 * We cannot fail anymore once some data has been
++		 * copied via the buffer descriptor, so no need to
++		 * check for any reason to invalidate the latter.
++		 */
++		goto out;
++
++	wait:
++		if (flags & MSG_DONTWAIT) {
++			ret = -EWOULDBLOCK;
++			break;
++		}
++
++		/*
++		 * Check whether writers are already waiting for
++		 * sending data, while we are about to wait for
++		 * receiving some. In such a case, we have a
++		 * pathological use of the buffer. We must allow for a
++		 * short read to prevent a deadlock.
++		 */
++		if (sk->fillsz > 0 && rtipc_peek_wait_head(&sk->o_event)) {
++			len = sk->fillsz;
++			goto redo;
++		}
++
++		wait.len = len;
++		wait.sk = sk;
++		rtipc_prepare_wait(&wait.wc);
++		/*
++		 * Keep the nucleus lock across the wait call, so that
++		 * we don't miss a pulse.
++		 */
++		ret = rtdm_event_timedwait(&sk->i_event,
++					   sk->rx_timeout, &toseq);
++		if (unlikely(ret))
++			break;
++	}
++out:
++	cobalt_atomic_leave(s);
++
++	return ret;
++}
++
++static ssize_t __bufp_recvmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      struct sockaddr_ipc *saddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct bufp_socket *sk = priv->state;
++	ssize_t len, wrlen, vlen, ret;
++	struct xnbufd bufd;
++	int nvec;
++
++	if (!test_bit(_BUFP_BOUND, &sk->status))
++		return -EAGAIN;
++
++	len = rtdm_get_iov_flatlen(iov, iovlen);
++	if (len == 0)
++		return 0;
++	/*
++	 * We may only return complete messages to readers, so there
++	 * is no point in waiting for messages which are larger than
++	 * what the buffer can hold.
++	 */
++	if (len > sk->bufsz)
++		return -EINVAL;
++
++	/*
++	 * Write "len" bytes from the buffer to the vector cells. Each
++	 * cell is handled as a separate message.
++	 */
++	for (nvec = 0, wrlen = len; nvec < iovlen && wrlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = wrlen >= iov[nvec].iov_len ? iov[nvec].iov_len : wrlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = __bufp_readbuf(sk, &bufd, flags);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = __bufp_readbuf(sk, &bufd, flags);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			return ret;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		wrlen -= vlen;
++		if (ret < vlen)
++			/* Short reads may happen in rare cases. */
++			break;
++	}
++
++	/*
++	 * There is no way to determine who the sender was since we
++	 * process data in byte-oriented mode, so we just copy our own
++	 * sockaddr to send back a valid address.
++	 */
++	if (saddr)
++		*saddr = sk->name;
++
++	return len - wrlen;
++}
++
++static ssize_t bufp_recvmsg(struct rtdm_fd *fd,
++			    struct user_msghdr *msg, int flags)
++{
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct sockaddr_ipc saddr;
++	ssize_t ret;
++
++	if (flags & ~MSG_DONTWAIT)
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen < sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++	} else if (msg->msg_namelen != 0)
++		return -EINVAL;
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __bufp_recvmsg(fd, iov, msg->msg_iovlen, flags, &saddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy the updated I/O vector back */
++	if (rtdm_put_iovec(fd, iov, msg, iov_fast))
++		return -EFAULT;
++
++	/* Copy the source address if required. */
++	if (msg->msg_name) {
++		if (rtipc_put_arg(fd, msg->msg_name,
++				  &saddr, sizeof(saddr)))
++			return -EFAULT;
++		msg->msg_namelen = sizeof(struct sockaddr_ipc);
++	}
++
++	return ret;
++}
++
++static ssize_t bufp_read(struct rtdm_fd *fd, void *buf, size_t len)
++{
++	struct iovec iov = { .iov_base = buf, .iov_len = len };
++
++	return __bufp_recvmsg(fd, &iov, 1, 0, NULL);
++}
++
++static ssize_t __bufp_writebuf(struct bufp_socket *rsk,
++			       struct bufp_socket *sk,
++			       struct xnbufd *bufd,
++			       int flags)
++{
++	struct bufp_wait_context wait, *bufwc;
++	struct rtipc_wait_context *wc;
++	struct xnthread *waiter;
++	size_t wbytes, n, avail;
++	ssize_t len, ret, xret;
++	rtdm_toseq_t toseq;
++	rtdm_lockctx_t s;
++	off_t wroff;
++	int resched;
++
++	len = bufd->b_len;
++
++	rtdm_toseq_init(&toseq, sk->tx_timeout);
++
++	cobalt_atomic_enter(s);
++
++	for (;;) {
++		/*
++		 * No short or scattered writes: we should write the
++		 * entire message atomically or block.
++		 */
++		avail = rsk->fillsz + rsk->wrrsvd;
++		if (avail + len > rsk->bufsz)
++			goto wait;
++
++		/* Reserve a write slot into the circular buffer. */
++		wroff = rsk->wroff;
++		rsk->wroff = (wroff + len) % rsk->bufsz;
++		rsk->wrrsvd += len;
++		rsk->wrsem++;
++		wbytes = ret = len;
++
++		do {
++			if (wroff + wbytes > rsk->bufsz)
++				n = rsk->bufsz - wroff;
++			else
++				n = wbytes;
++			/*
++			 * We have to drop the lock while reading in
++			 * data, but we can't rollback on bad read
++			 * from user because some other thread might
++			 * have populated the memory ahead of our
++			 * write slot already: bluntly clear the
++			 * unavailable bytes on copy error.
++			 */
++			cobalt_atomic_leave(s);
++			xret = xnbufd_copy_to_kmem(rsk->bufmem + wroff, bufd, n);
++			cobalt_atomic_enter(s);
++			if (xret < 0) {
++				memset(rsk->bufmem + wroff, 0, n);
++				ret = -EFAULT;
++				break;
++			}
++
++			wbytes -= n;
++			wroff = (wroff + n) % rsk->bufsz;
++		} while (wbytes > 0);
++
++		if (--rsk->wrsem > 0)
++			goto out;
++
++		resched = 0;
++		if (rsk->fillsz == 0) /* -> becomes readable */
++			resched |= xnselect_signal(&rsk->priv->recv_block, POLLIN);
++
++		rsk->fillsz += rsk->wrrsvd;
++		rsk->wrrsvd = 0;
++
++		if (rsk->fillsz == rsk->bufsz) /* becomes non-writable */
++			resched |= xnselect_signal(&rsk->priv->send_block, 0);
++		/*
++		 * Wake up all threads pending on the input wait
++		 * queue, if we accumulated enough data to feed the
++		 * leading one.
++		 */
++		waiter = rtipc_peek_wait_head(&rsk->i_event);
++		if (waiter == NULL)
++			goto out;
++
++		wc = rtipc_get_wait_context(waiter);
++		XENO_BUG_ON(COBALT, wc == NULL);
++		bufwc = container_of(wc, struct bufp_wait_context, wc);
++		if (bufwc->len <= rsk->fillsz)
++			rtdm_event_pulse(&rsk->i_event);
++		else if (resched)
++			xnsched_run();
++		/*
++		 * We cannot fail anymore once some data has been
++		 * copied via the buffer descriptor, so no need to
++		 * check for any reason to invalidate the latter.
++		 */
++		goto out;
++	wait:
++		if (flags & MSG_DONTWAIT) {
++			ret = -EWOULDBLOCK;
++			break;
++		}
++
++		wait.len = len;
++		wait.sk = rsk;
++		rtipc_prepare_wait(&wait.wc);
++		/*
++		 * Keep the nucleus lock across the wait call, so that
++		 * we don't miss a pulse.
++		 */
++		ret = rtdm_event_timedwait(&rsk->o_event,
++					   sk->tx_timeout, &toseq);
++		if (unlikely(ret))
++			break;
++	}
++out:
++	cobalt_atomic_leave(s);
++
++	return ret;
++}
++
++static ssize_t __bufp_sendmsg(struct rtdm_fd *fd,
++			      struct iovec *iov, int iovlen, int flags,
++			      const struct sockaddr_ipc *daddr)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct bufp_socket *sk = priv->state, *rsk;
++	ssize_t len, rdlen, vlen, ret = 0;
++	struct rtdm_fd *rfd;
++	struct xnbufd bufd;
++	rtdm_lockctx_t s;
++	int nvec;
++
++	len = rtdm_get_iov_flatlen(iov, iovlen);
++	if (len == 0)
++		return 0;
++
++	cobalt_atomic_enter(s);
++	rfd = xnmap_fetch_nocheck(portmap, daddr->sipc_port);
++	if (rfd && rtdm_fd_lock(rfd) < 0)
++		rfd = NULL;
++	cobalt_atomic_leave(s);
++	if (rfd == NULL)
++		return -ECONNRESET;
++
++	rsk = rtipc_fd_to_state(rfd);
++	if (!test_bit(_BUFP_BOUND, &rsk->status)) {
++		rtdm_fd_unlock(rfd);
++		return -ECONNREFUSED;
++	}
++
++	/*
++	 * We may only send complete messages, so there is no point in
++	 * accepting messages which are larger than what the buffer
++	 * can hold.
++	 */
++	if (len > rsk->bufsz) {
++		ret = -EINVAL;
++		goto fail;
++	}
++
++	/*
++	 * Read "len" bytes to the buffer from the vector cells. Each
++	 * cell is handled as a separate message.
++	 */
++	for (nvec = 0, rdlen = len; nvec < iovlen && rdlen > 0; nvec++) {
++		if (iov[nvec].iov_len == 0)
++			continue;
++		vlen = rdlen >= iov[nvec].iov_len ? iov[nvec].iov_len : rdlen;
++		if (rtdm_fd_is_user(fd)) {
++			xnbufd_map_uread(&bufd, iov[nvec].iov_base, vlen);
++			ret = __bufp_writebuf(rsk, sk, &bufd, flags);
++			xnbufd_unmap_uread(&bufd);
++		} else {
++			xnbufd_map_kread(&bufd, iov[nvec].iov_base, vlen);
++			ret = __bufp_writebuf(rsk, sk, &bufd, flags);
++			xnbufd_unmap_kread(&bufd);
++		}
++		if (ret < 0)
++			goto fail;
++		iov[nvec].iov_base += vlen;
++		iov[nvec].iov_len -= vlen;
++		rdlen -= vlen;
++	}
++
++	rtdm_fd_unlock(rfd);
++
++	return len - rdlen;
++fail:
++	rtdm_fd_unlock(rfd);
++
++	return ret;
++}
++
++static ssize_t bufp_sendmsg(struct rtdm_fd *fd,
++			    const struct user_msghdr *msg, int flags)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
++	struct bufp_socket *sk = priv->state;
++	struct sockaddr_ipc daddr;
++	ssize_t ret;
++
++	if (flags & ~MSG_DONTWAIT)
++		return -EINVAL;
++
++	if (msg->msg_name) {
++		if (msg->msg_namelen != sizeof(struct sockaddr_ipc))
++			return -EINVAL;
++
++		/* Fetch the destination address to send to. */
++		if (rtipc_get_arg(fd, &daddr, msg->msg_name, sizeof(daddr)))
++			return -EFAULT;
++
++		if (daddr.sipc_port < 0 ||
++		    daddr.sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT)
++			return -EINVAL;
++	} else {
++		if (msg->msg_namelen != 0)
++			return -EINVAL;
++		daddr = sk->peer;
++		if (daddr.sipc_port < 0)
++			return -EDESTADDRREQ;
++	}
++
++	if (msg->msg_iovlen >= UIO_MAXIOV)
++		return -EINVAL;
++
++	/* Copy I/O vector in */
++	ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
++	if (ret)
++		return ret;
++
++	ret = __bufp_sendmsg(fd, iov, msg->msg_iovlen, flags, &daddr);
++	if (ret <= 0) {
++		rtdm_drop_iovec(iov, iov_fast);
++		return ret;
++	}
++
++	/* Copy updated I/O vector back */
++	return rtdm_put_iovec(fd, iov, msg, iov_fast) ?: ret;
++}
++
++static ssize_t bufp_write(struct rtdm_fd *fd,
++			  const void *buf, size_t len)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct iovec iov = { .iov_base = (void *)buf, .iov_len = len };
++	struct bufp_socket *sk = priv->state;
++
++	if (sk->peer.sipc_port < 0)
++		return -EDESTADDRREQ;
++
++	return __bufp_sendmsg(fd, &iov, 1, 0, &sk->peer);
++}
++
++static int __bufp_bind_socket(struct rtipc_private *priv,
++			      struct sockaddr_ipc *sa)
++{
++	struct bufp_socket *sk = priv->state;
++	int ret = 0, port;
++	struct rtdm_fd *fd;
++	rtdm_lockctx_t s;
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT)
++		return -EINVAL;
++
++	cobalt_atomic_enter(s);
++	if (test_bit(_BUFP_BOUND, &sk->status) ||
++	    __test_and_set_bit(_BUFP_BINDING, &sk->status))
++		ret = -EADDRINUSE;
++	cobalt_atomic_leave(s);
++	
++	if (ret)
++		return ret;
++
++	/* Will auto-select a free port number if unspec (-1). */
++	port = sa->sipc_port;
++	fd = rtdm_private_to_fd(priv);
++	cobalt_atomic_enter(s);
++	port = xnmap_enter(portmap, port, fd);
++	cobalt_atomic_leave(s);
++	if (port < 0)
++		return port == -EEXIST ? -EADDRINUSE : -ENOMEM;
++
++	sa->sipc_port = port;
++
++	/*
++	 * The caller must have told us how much memory is needed for
++	 * buffer space via setsockopt(), before we got there.
++	 */
++	if (sk->bufsz == 0)
++		return -ENOBUFS;
++
++	sk->bufmem = xnheap_vmalloc(sk->bufsz);
++	if (sk->bufmem == NULL) {
++		ret = -ENOMEM;
++		goto fail;
++	}
++
++	sk->name = *sa;
++	/* Set default destination if unset at binding time. */
++	if (sk->peer.sipc_port < 0)
++		sk->peer = *sa;
++
++	if (*sk->label) {
++		ret = xnregistry_enter(sk->label, sk,
++				       &sk->handle, &__bufp_pnode.node);
++		if (ret) {
++			xnheap_vfree(sk->bufmem);
++			goto fail;
++		}
++	}
++
++	cobalt_atomic_enter(s);
++	__clear_bit(_BUFP_BINDING, &sk->status);
++	__set_bit(_BUFP_BOUND, &sk->status);
++	if (xnselect_signal(&priv->send_block, POLLOUT))
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++fail:
++	xnmap_remove(portmap, port);
++	clear_bit(_BUFP_BINDING, &sk->status);
++
++	return ret;
++}
++
++static int __bufp_connect_socket(struct bufp_socket *sk,
++				 struct sockaddr_ipc *sa)
++{
++	struct sockaddr_ipc _sa;
++	struct bufp_socket *rsk;
++	int ret, resched = 0;
++	rtdm_lockctx_t s;
++	xnhandle_t h;
++
++	if (sa == NULL) {
++		_sa = nullsa;
++		sa = &_sa;
++		goto set_assoc;
++	}
++
++	if (sa->sipc_family != AF_RTIPC)
++		return -EINVAL;
++
++	if (sa->sipc_port < -1 ||
++	    sa->sipc_port >= CONFIG_XENO_OPT_BUFP_NRPORT)
++		return -EINVAL;
++	/*
++	 * - If a valid sipc_port is passed in the [0..NRPORT-1] range,
++	 * it is used verbatim and the connection succeeds
++	 * immediately, regardless of whether the destination is
++	 * bound at the time of the call.
++	 *
++	 * - If sipc_port is -1 and a label was set via BUFP_LABEL,
++	 * connect() blocks for the requested amount of time (see
++	 * SO_RCVTIMEO) until a socket is bound to the same label.
++	 *
++	 * - If sipc_port is -1 and no label is given, the default
++	 * destination address is cleared, meaning that any subsequent
++	 * write() to the socket will return -EDESTADDRREQ, until a
++	 * valid destination address is set via connect() or bind().
++	 *
++	 * - In all other cases, -EINVAL is returned.
++	 */
++	if (sa->sipc_port < 0 && *sk->label) {
++		ret = xnregistry_bind(sk->label,
++				      sk->rx_timeout, XN_RELATIVE, &h);
++		if (ret)
++			return ret;
++
++		cobalt_atomic_enter(s);
++		rsk = xnregistry_lookup(h, NULL);
++		if (rsk == NULL || rsk->magic != BUFP_SOCKET_MAGIC)
++			ret = -EINVAL;
++		else {
++			/* Fetch labeled port number. */
++			sa->sipc_port = rsk->name.sipc_port;
++			resched = xnselect_signal(&sk->priv->send_block, POLLOUT);
++		}
++		cobalt_atomic_leave(s);
++		if (ret)
++			return ret;
++	} else if (sa->sipc_port < 0)
++		sa = &nullsa;
++set_assoc:
++	cobalt_atomic_enter(s);
++	if (!test_bit(_BUFP_BOUND, &sk->status))
++		/* Set default name. */
++		sk->name = *sa;
++	/* Set default destination. */
++	sk->peer = *sa;
++	if (sa->sipc_port < 0)
++		__clear_bit(_BUFP_CONNECTED, &sk->status);
++	else
++		__set_bit(_BUFP_CONNECTED, &sk->status);
++	if (resched)
++		xnsched_run();
++	cobalt_atomic_leave(s);
++
++	return 0;
++}
++
++static int __bufp_setsockopt(struct bufp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	struct _rtdm_setsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	size_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptin(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen);
++			if (ret)
++				return ret;
++			sk->rx_timeout = rtipc_timeval_to_ns(&tv);
++			break;
++
++		case SO_SNDTIMEO:
++			ret = rtipc_get_timeval(fd, &tv, sopt.optval, sopt.optlen);
++			if (ret)
++				return ret;
++			sk->tx_timeout = rtipc_timeval_to_ns(&tv);
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_BUFP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case BUFP_BUFSZ:
++		ret = rtipc_get_length(fd, &len, sopt.optval, sopt.optlen);
++		if (ret)
++			return ret;
++		if (len == 0)
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		/*
++		 * We may not do this more than once, and we have to
++		 * do this before the first binding.
++		 */
++		if (test_bit(_BUFP_BOUND, &sk->status) ||
++		    test_bit(_BUFP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else
++			sk->bufsz = len;
++		cobalt_atomic_leave(s);
++		break;
++
++	case BUFP_LABEL:
++		if (sopt.optlen < sizeof(plabel))
++			return -EINVAL;
++		if (rtipc_get_arg(fd, &plabel, sopt.optval, sizeof(plabel)))
++			return -EFAULT;
++		cobalt_atomic_enter(s);
++		/*
++		 * We may attach a label to a client socket which was
++		 * previously bound in BUFP.
++		 */
++		if (test_bit(_BUFP_BINDING, &sk->status))
++			ret = -EALREADY;
++		else {
++			strcpy(sk->label, plabel.label);
++			sk->label[XNOBJECT_NAME_LEN-1] = 0;
++		}
++		cobalt_atomic_leave(s);
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __bufp_getsockopt(struct bufp_socket *sk,
++			     struct rtdm_fd *fd,
++			     void *arg)
++{
++	struct _rtdm_getsockopt_args sopt;
++	struct rtipc_port_label plabel;
++	struct timeval tv;
++	rtdm_lockctx_t s;
++	socklen_t len;
++	int ret;
++
++	ret = rtipc_get_sockoptout(fd, &sopt, arg);
++	if (ret)
++		return ret;
++
++	if (rtipc_get_arg(fd, &len, sopt.optlen, sizeof(len)))
++		return -EFAULT;
++
++	if (sopt.level == SOL_SOCKET) {
++		switch (sopt.optname) {
++
++		case SO_RCVTIMEO:
++			rtipc_ns_to_timeval(&tv, sk->rx_timeout);
++			ret = rtipc_put_timeval(fd, sopt.optval, &tv, len);
++			if (ret)
++				return ret;
++			break;
++
++		case SO_SNDTIMEO:
++			rtipc_ns_to_timeval(&tv, sk->tx_timeout);
++			ret = rtipc_put_timeval(fd, sopt.optval, &tv, len);
++			if (ret)
++				return ret;
++			break;
++
++		default:
++			ret = -EINVAL;
++		}
++
++		return ret;
++	}
++
++	if (sopt.level != SOL_BUFP)
++		return -ENOPROTOOPT;
++
++	switch (sopt.optname) {
++
++	case BUFP_LABEL:
++		if (len < sizeof(plabel))
++			return -EINVAL;
++		cobalt_atomic_enter(s);
++		strcpy(plabel.label, sk->label);
++		cobalt_atomic_leave(s);
++		if (rtipc_put_arg(fd, sopt.optval, &plabel, sizeof(plabel)))
++			return -EFAULT;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int __bufp_ioctl(struct rtdm_fd *fd,
++			unsigned int request, void *arg)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct sockaddr_ipc saddr, *saddrp = &saddr;
++	struct bufp_socket *sk = priv->state;
++	int ret = 0;
++
++	switch (request) {
++
++	COMPAT_CASE(_RTIOC_CONNECT):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret)
++		  return ret;
++		ret = __bufp_connect_socket(sk, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_BIND):
++		ret = rtipc_get_sockaddr(fd, &saddrp, arg);
++		if (ret)
++			return ret;
++		if (saddrp == NULL)
++			return -EFAULT;
++		ret = __bufp_bind_socket(priv, saddrp);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->name);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETPEERNAME):
++		ret = rtipc_put_sockaddr(fd, arg, &sk->peer);
++		break;
++
++	COMPAT_CASE(_RTIOC_SETSOCKOPT):
++		ret = __bufp_setsockopt(sk, fd, arg);
++		break;
++
++	COMPAT_CASE(_RTIOC_GETSOCKOPT):
++		ret = __bufp_getsockopt(sk, fd, arg);
++		break;
++
++	case _RTIOC_LISTEN:
++	COMPAT_CASE(_RTIOC_ACCEPT):
++		ret = -EOPNOTSUPP;
++		break;
++
++	case _RTIOC_SHUTDOWN:
++		ret = -ENOTCONN;
++		break;
++
++	default:
++		ret = -EINVAL;
++	}
++
++	return ret;
++}
++
++static int bufp_ioctl(struct rtdm_fd *fd,
++		      unsigned int request, void *arg)
++{
++	int ret;
++
++	switch (request) {
++	COMPAT_CASE(_RTIOC_BIND):
++		if (rtdm_in_rt_context())
++			return -ENOSYS;	/* Try downgrading to NRT */
++	default:
++		ret = __bufp_ioctl(fd, request, arg);
++	}
++
++	return ret;
++}
++
++static unsigned int bufp_pollstate(struct rtdm_fd *fd) /* atomic */
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct bufp_socket *sk = priv->state, *rsk;
++	unsigned int mask = 0;
++	struct rtdm_fd *rfd;
++
++	if (test_bit(_BUFP_BOUND, &sk->status) && sk->fillsz > 0)
++		mask |= POLLIN;
++
++	/*
++	 * If the socket is connected, POLLOUT means that the peer
++	 * exists, is bound and can receive data. Otherwise POLLOUT is
++	 * always set, assuming the client is likely to use explicit
++	 * addressing in send operations.
++	 */
++	if (test_bit(_BUFP_CONNECTED, &sk->status)) {
++		rfd = xnmap_fetch_nocheck(portmap, sk->peer.sipc_port);
++		if (rfd) {
++			rsk = rtipc_fd_to_state(rfd);
++			if (rsk->fillsz < rsk->bufsz)
++				mask |= POLLOUT;
++		}
++	} else
++		mask |= POLLOUT;
++
++	return mask;
++}
++
++static int bufp_init(void)
++{
++	portmap = xnmap_create(CONFIG_XENO_OPT_BUFP_NRPORT, 0, 0);
++	if (portmap == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static void bufp_exit(void)
++{
++	xnmap_delete(portmap);
++}
++
++struct rtipc_protocol bufp_proto_driver = {
++	.proto_name = "bufp",
++	.proto_statesz = sizeof(struct bufp_socket),
++	.proto_init = bufp_init,
++	.proto_exit = bufp_exit,
++	.proto_ops = {
++		.socket = bufp_socket,
++		.close = bufp_close,
++		.recvmsg = bufp_recvmsg,
++		.sendmsg = bufp_sendmsg,
++		.read = bufp_read,
++		.write = bufp_write,
++		.ioctl = bufp_ioctl,
++		.pollstate = bufp_pollstate,
++	}
++};
+--- linux/drivers/xenomai/ipc/internal.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/internal.h	2021-04-07 16:01:26.260635548 +0800
+@@ -0,0 +1,134 @@
++/**
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _RTIPC_INTERNAL_H
++#define _RTIPC_INTERNAL_H
++
++#include <linux/uio.h>
++#include <cobalt/kernel/registry.h>
++#include <cobalt/kernel/clock.h>
++#include <cobalt/kernel/select.h>
++#include <rtdm/rtdm.h>
++#include <rtdm/compat.h>
++#include <rtdm/driver.h>
++
++struct rtipc_protocol;
++
++struct rtipc_private {
++	struct rtipc_protocol *proto;
++	DECLARE_XNSELECT(send_block);
++	DECLARE_XNSELECT(recv_block);
++	void *state;
++};
++
++struct rtipc_protocol {
++	const char *proto_name;
++	int proto_statesz;
++	int (*proto_init)(void);
++	void (*proto_exit)(void);
++	struct {
++		int (*socket)(struct rtdm_fd *fd);
++		void (*close)(struct rtdm_fd *fd);
++		ssize_t (*recvmsg)(struct rtdm_fd *fd,
++				   struct user_msghdr *msg, int flags);
++		ssize_t (*sendmsg)(struct rtdm_fd *fd,
++				   const struct user_msghdr *msg, int flags);
++		ssize_t (*read)(struct rtdm_fd *fd,
++				void *buf, size_t len);
++		ssize_t (*write)(struct rtdm_fd *fd,
++				 const void *buf, size_t len);
++		int (*ioctl)(struct rtdm_fd *fd,
++			     unsigned int request, void *arg);
++		unsigned int (*pollstate)(struct rtdm_fd *fd);
++	} proto_ops;
++};
++
++static inline void *rtipc_fd_to_state(struct rtdm_fd *fd)
++{
++	struct rtipc_private *p = rtdm_fd_to_private(fd);
++	return p->state;
++}
++
++static inline nanosecs_rel_t rtipc_timeval_to_ns(const struct timeval *tv)
++{
++	nanosecs_rel_t ns = tv->tv_usec * 1000;
++
++	if (tv->tv_sec)
++		ns += (nanosecs_rel_t)tv->tv_sec * 1000000000UL;
++
++	return ns;
++}
++
++static inline void rtipc_ns_to_timeval(struct timeval *tv, nanosecs_rel_t ns)
++{
++	unsigned long nsecs;
++
++	tv->tv_sec = xnclock_divrem_billion(ns, &nsecs);
++	tv->tv_usec = nsecs / 1000;
++}
++
++int rtipc_get_sockaddr(struct rtdm_fd *fd,
++		       struct sockaddr_ipc **saddrp,
++		       const void *arg);
++
++int rtipc_put_sockaddr(struct rtdm_fd *fd, void *arg,
++		       const struct sockaddr_ipc *saddr);
++
++int rtipc_get_sockoptout(struct rtdm_fd *fd,
++			 struct _rtdm_getsockopt_args *sopt,
++			 const void *arg);
++
++int rtipc_put_sockoptout(struct rtdm_fd *fd, void *arg,
++			 const struct _rtdm_getsockopt_args *sopt);
++
++int rtipc_get_sockoptin(struct rtdm_fd *fd,
++			struct _rtdm_setsockopt_args *sopt,
++			const void *arg);
++
++int rtipc_get_timeval(struct rtdm_fd *fd, struct timeval *tv,
++		      const void *arg, size_t arglen);
++
++int rtipc_put_timeval(struct rtdm_fd *fd, void *arg,
++		      const struct timeval *tv, size_t arglen);
++
++int rtipc_get_length(struct rtdm_fd *fd, size_t *lenp,
++		     const void *arg, size_t arglen);
++
++int rtipc_get_arg(struct rtdm_fd *fd, void *dst, const void *src,
++		  size_t len);
++
++int rtipc_put_arg(struct rtdm_fd *fd, void *dst, const void *src,
++		  size_t len);
++
++extern struct rtipc_protocol xddp_proto_driver;
++
++extern struct rtipc_protocol iddp_proto_driver;
++
++extern struct rtipc_protocol bufp_proto_driver;
++
++extern struct xnptree rtipc_ptree;
++
++#define rtipc_wait_context		xnthread_wait_context
++#define rtipc_prepare_wait		xnthread_prepare_wait
++#define rtipc_get_wait_context		xnthread_get_wait_context
++#define rtipc_peek_wait_head(obj)	xnsynch_peek_pendq(&(obj)->synch_base)
++
++#define COMPAT_CASE(__op)	case __op __COMPAT_CASE(__op  ## _COMPAT)
++
++#endif /* !_RTIPC_INTERNAL_H */
+--- linux/drivers/xenomai/ipc/rtipc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/ipc/rtipc.c	2021-04-07 16:01:26.255635555 +0800
+@@ -0,0 +1,523 @@
++/**
++ * This file is part of the Xenomai project.
++ *
++ * @note Copyright (C) 2009 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <rtdm/ipc.h>
++#include <rtdm/compat.h>
++#include "internal.h"
++
++MODULE_DESCRIPTION("Real-time IPC interface");
++MODULE_AUTHOR("Philippe Gerum <rpm@xenomai.org>");
++MODULE_LICENSE("GPL");
++
++static struct rtipc_protocol *protocols[IPCPROTO_MAX] = {
++#ifdef CONFIG_XENO_DRIVERS_RTIPC_XDDP
++	[IPCPROTO_XDDP - 1] = &xddp_proto_driver,
++#endif
++#ifdef CONFIG_XENO_DRIVERS_RTIPC_IDDP
++	[IPCPROTO_IDDP - 1] = &iddp_proto_driver,
++#endif
++#ifdef CONFIG_XENO_DRIVERS_RTIPC_BUFP
++	[IPCPROTO_BUFP - 1] = &bufp_proto_driver,
++#endif
++};
++
++DEFINE_XNPTREE(rtipc_ptree, "rtipc");
++
++int rtipc_get_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		memcpy(dst, src, len);
++		return 0;
++	}
++
++	return rtdm_copy_from_user(fd, dst, src, len);
++}
++
++int rtipc_put_arg(struct rtdm_fd *fd, void *dst, const void *src, size_t len)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		memcpy(dst, src, len);
++		return 0;
++	}
++
++	return rtdm_copy_to_user(fd, dst, src, len);
++}
++
++int rtipc_get_sockaddr(struct rtdm_fd *fd, struct sockaddr_ipc **saddrp,
++		       const void *arg)
++{
++	const struct _rtdm_setsockaddr_args *p;
++	struct _rtdm_setsockaddr_args sreq;
++	int ret;
++
++	if (!rtdm_fd_is_user(fd)) {
++		p = arg;
++		if (p->addrlen > 0) {
++			if (p->addrlen != sizeof(**saddrp))
++				return -EINVAL;
++			memcpy(*saddrp, p->addr, sizeof(**saddrp));
++		} else {
++			if (p->addr)
++				return -EINVAL;
++			*saddrp = NULL;
++		}
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		struct compat_rtdm_setsockaddr_args csreq;
++		ret = rtdm_safe_copy_from_user(fd, &csreq, arg, sizeof(csreq));
++		if (ret)
++			return ret;
++		if (csreq.addrlen > 0) {
++			if (csreq.addrlen != sizeof(**saddrp))
++				return -EINVAL;
++			return rtdm_safe_copy_from_user(fd, *saddrp,
++							compat_ptr(csreq.addr),
++							sizeof(**saddrp));
++		}
++		if (csreq.addr)
++			return -EINVAL;
++
++		*saddrp = NULL;
++
++		return 0;
++	}
++#endif
++
++	ret = rtdm_safe_copy_from_user(fd, &sreq, arg, sizeof(sreq));
++	if (ret)
++		return ret;
++	if (sreq.addrlen > 0) {
++		if (sreq.addrlen != sizeof(**saddrp))
++			return -EINVAL;
++		return rtdm_safe_copy_from_user(fd, *saddrp,
++						sreq.addr, sizeof(**saddrp));
++	}
++	if (sreq.addr)
++		return -EINVAL;
++
++	*saddrp = NULL;
++
++	return 0;
++}
++
++int rtipc_put_sockaddr(struct rtdm_fd *fd, void *arg,
++		       const struct sockaddr_ipc *saddr)
++{
++	const struct _rtdm_getsockaddr_args *p;
++	struct _rtdm_getsockaddr_args sreq;
++	socklen_t len;
++	int ret;
++
++	if (!rtdm_fd_is_user(fd)) {
++		p = arg;
++		if (*p->addrlen < sizeof(*saddr))
++			return -EINVAL;
++		memcpy(p->addr, saddr, sizeof(*saddr));
++		*p->addrlen = sizeof(*saddr);
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		struct compat_rtdm_getsockaddr_args csreq;
++		ret = rtdm_safe_copy_from_user(fd, &csreq, arg, sizeof(csreq));
++		if (ret)
++			return ret;
++
++		ret = rtdm_safe_copy_from_user(fd, &len,
++					       compat_ptr(csreq.addrlen),
++					       sizeof(len));
++		if (ret)
++			return ret;
++
++		if (len < sizeof(*saddr))
++			return -EINVAL;
++
++		ret = rtdm_safe_copy_to_user(fd, compat_ptr(csreq.addr),
++					     saddr, sizeof(*saddr));
++		if (ret)
++			return ret;
++
++		len = sizeof(*saddr);
++		return rtdm_safe_copy_to_user(fd, compat_ptr(csreq.addrlen),
++					      &len, sizeof(len));
++	}
++#endif
++
++	sreq.addr = NULL;
++	sreq.addrlen = NULL;
++	ret = rtdm_safe_copy_from_user(fd, &sreq, arg, sizeof(sreq));
++	if (ret)
++		return ret;
++
++	ret = rtdm_safe_copy_from_user(fd, &len, sreq.addrlen, sizeof(len));
++	if (ret)
++		return ret;
++
++	if (len < sizeof(*saddr))
++		return -EINVAL;
++
++	ret = rtdm_safe_copy_to_user(fd, sreq.addr, saddr, sizeof(*saddr));
++	if (ret)
++		return ret;
++
++	len = sizeof(*saddr);
++
++	return rtdm_safe_copy_to_user(fd, sreq.addrlen, &len, sizeof(len));
++}
++
++int rtipc_get_sockoptout(struct rtdm_fd *fd, struct _rtdm_getsockopt_args *sopt,
++			 const void *arg)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		*sopt = *(struct _rtdm_getsockopt_args *)arg;
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		struct compat_rtdm_getsockopt_args csopt;
++		int ret;
++		ret = rtdm_safe_copy_from_user(fd, &csopt, arg, sizeof(csopt));
++		if (ret)
++			return ret;
++		sopt->level = csopt.level;
++		sopt->optname = csopt.optname;
++		sopt->optval = compat_ptr(csopt.optval);
++		sopt->optlen = compat_ptr(csopt.optlen);
++		return 0;
++	}
++#endif
++
++	return rtdm_safe_copy_from_user(fd, sopt, arg, sizeof(*sopt));
++}
++
++int rtipc_put_sockoptout(struct rtdm_fd *fd, void *arg,
++			 const struct _rtdm_getsockopt_args *sopt)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		*(struct _rtdm_getsockopt_args *)arg = *sopt;
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		struct compat_rtdm_getsockopt_args csopt;
++		int ret;
++		csopt.level = sopt->level;
++		csopt.optname = sopt->optname;
++		csopt.optval = ptr_to_compat(sopt->optval);
++		csopt.optlen = ptr_to_compat(sopt->optlen);
++		ret = rtdm_safe_copy_to_user(fd, arg, &csopt, sizeof(csopt));
++		if (ret)
++			return ret;
++		return 0;
++	}
++#endif
++
++	return rtdm_safe_copy_to_user(fd, arg, sopt, sizeof(*sopt));
++}
++
++int rtipc_get_sockoptin(struct rtdm_fd *fd, struct _rtdm_setsockopt_args *sopt,
++			const void *arg)
++{
++	if (!rtdm_fd_is_user(fd)) {
++		*sopt = *(struct _rtdm_setsockopt_args *)arg;
++		return 0;
++	}
++
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		struct compat_rtdm_setsockopt_args csopt;
++		int ret;
++		ret = rtdm_safe_copy_from_user(fd, &csopt, arg, sizeof(csopt));
++		if (ret)
++			return ret;
++		sopt->level = csopt.level;
++		sopt->optname = csopt.optname;
++		sopt->optval = compat_ptr(csopt.optval);
++		sopt->optlen = csopt.optlen;
++		return 0;
++	}
++#endif
++
++	return rtdm_safe_copy_from_user(fd, sopt, arg, sizeof(*sopt));
++}
++
++int rtipc_get_timeval(struct rtdm_fd *fd, struct timeval *tv,
++		      const void *arg, size_t arglen)
++{
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		if (arglen != sizeof(struct compat_timeval))
++			return -EINVAL;
++		return sys32_get_timeval(tv, arg);
++	}
++#endif
++
++	if (arglen != sizeof(*tv))
++		return -EINVAL;
++
++	if (!rtdm_fd_is_user(fd)) {
++		*tv = *(struct timeval *)arg;
++		return 0;
++	}
++
++	return rtdm_safe_copy_from_user(fd, tv, arg, sizeof(*tv));
++}
++
++int rtipc_put_timeval(struct rtdm_fd *fd, void *arg,
++		      const struct timeval *tv, size_t arglen)
++{
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		if (arglen != sizeof(struct compat_timeval))
++			return -EINVAL;
++		return sys32_put_timeval(arg, tv);
++	}
++#endif
++
++	if (arglen != sizeof(*tv))
++		return -EINVAL;
++
++	if (!rtdm_fd_is_user(fd)) {
++		*(struct timeval *)arg = *tv;
++		return 0;
++	}
++
++	return rtdm_safe_copy_to_user(fd, arg, tv, sizeof(*tv));
++}
++
++int rtipc_get_length(struct rtdm_fd *fd, size_t *lenp,
++		     const void *arg, size_t arglen)
++{
++#ifdef CONFIG_XENO_ARCH_SYS3264
++	if (rtdm_fd_is_compat(fd)) {
++		const compat_size_t *csz;
++		if (arglen != sizeof(*csz))
++			return -EINVAL;
++		csz = arg;
++		return csz == NULL ||
++			!access_rok(csz, sizeof(*csz)) ||
++			__xn_get_user(*lenp, csz) ? -EFAULT : 0;
++	}
++#endif
++
++	if (arglen != sizeof(size_t))
++		return -EINVAL;
++
++	if (!rtdm_fd_is_user(fd)) {
++		*lenp = *(size_t *)arg;
++		return 0;
++	}
++
++	return rtdm_safe_copy_from_user(fd, lenp, arg, sizeof(*lenp));
++}
++
++static int rtipc_socket(struct rtdm_fd *fd, int protocol)
++{
++	struct rtipc_protocol *proto;
++	struct rtipc_private *priv;
++	int ret;
++
++	if (protocol < 0 || protocol >= IPCPROTO_MAX)
++		return -EPROTONOSUPPORT;
++
++	if (protocol == IPCPROTO_IPC)
++		/* Default protocol is IDDP */
++		protocol = IPCPROTO_IDDP;
++
++	proto = protocols[protocol - 1];
++	if (proto == NULL)	/* Not compiled in? */
++		return -ENOPROTOOPT;
++
++	priv = rtdm_fd_to_private(fd);
++	priv->proto = proto;
++	priv->state = kmalloc(proto->proto_statesz, GFP_KERNEL);
++	if (priv->state == NULL)
++		return -ENOMEM;
++
++	xnselect_init(&priv->send_block);
++	xnselect_init(&priv->recv_block);
++
++	ret = proto->proto_ops.socket(fd);
++	if (ret)
++		kfree(priv->state);
++
++	return ret;
++}
++
++static void rtipc_close(struct rtdm_fd *fd)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	/*
++	 * CAUTION: priv->state shall be released by the
++	 * proto_ops.close() handler when appropriate (which may be
++	 * done asynchronously later, see XDDP).
++	 */
++	priv->proto->proto_ops.close(fd);
++	xnselect_destroy(&priv->recv_block);
++	xnselect_destroy(&priv->send_block);
++}
++
++static ssize_t rtipc_recvmsg(struct rtdm_fd *fd,
++			     struct user_msghdr *msg, int flags)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	return priv->proto->proto_ops.recvmsg(fd, msg, flags);
++}
++
++static ssize_t rtipc_sendmsg(struct rtdm_fd *fd,
++			     const struct user_msghdr *msg, int flags)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	return priv->proto->proto_ops.sendmsg(fd, msg, flags);
++}
++
++static ssize_t rtipc_read(struct rtdm_fd *fd,
++			  void *buf, size_t len)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	return priv->proto->proto_ops.read(fd, buf, len);
++}
++
++static ssize_t rtipc_write(struct rtdm_fd *fd,
++			   const void *buf, size_t len)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	return priv->proto->proto_ops.write(fd, buf, len);
++}
++
++static int rtipc_ioctl(struct rtdm_fd *fd,
++		       unsigned int request, void *arg)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	return priv->proto->proto_ops.ioctl(fd, request, arg);
++}
++
++static int rtipc_select(struct rtdm_fd *fd, struct xnselector *selector,
++			unsigned int type, unsigned int index)
++{
++	struct rtipc_private *priv = rtdm_fd_to_private(fd);
++	struct xnselect_binding *binding;
++	unsigned int pollstate, mask;
++	struct xnselect *block;
++	spl_t s;
++	int ret;
++	
++	if (type != XNSELECT_READ && type != XNSELECT_WRITE)
++		return -EINVAL;
++
++	binding = xnmalloc(sizeof(*binding));
++	if (binding == NULL)
++		return -ENOMEM;
++
++	cobalt_atomic_enter(s);
++
++	pollstate = priv->proto->proto_ops.pollstate(fd);
++
++	if (type == XNSELECT_READ) {
++		mask = pollstate & POLLIN;
++		block = &priv->recv_block;
++	} else {
++		mask = pollstate & POLLOUT;
++		block = &priv->send_block;
++	}
++
++	ret = xnselect_bind(block, binding, selector, type, index, mask);
++
++	cobalt_atomic_leave(s);
++
++	if (ret)
++		xnfree(binding);
++
++	return ret;
++}
++
++static struct rtdm_driver rtipc_driver = {
++	.profile_info		=	RTDM_PROFILE_INFO(rtipc,
++							  RTDM_CLASS_RTIPC,
++							  RTDM_SUBCLASS_GENERIC,
++							  1),
++	.device_flags		=	RTDM_PROTOCOL_DEVICE,
++	.device_count		=	1,
++	.context_size		=	sizeof(struct rtipc_private),
++	.protocol_family	=	PF_RTIPC,
++	.socket_type		=	SOCK_DGRAM,
++	.ops = {
++		.socket		=	rtipc_socket,
++		.close		=	rtipc_close,
++		.recvmsg_rt	=	rtipc_recvmsg,
++		.recvmsg_nrt	=	NULL,
++		.sendmsg_rt	=	rtipc_sendmsg,
++		.sendmsg_nrt	=	NULL,
++		.ioctl_rt	=	rtipc_ioctl,
++		.ioctl_nrt	=	rtipc_ioctl,
++		.read_rt	=	rtipc_read,
++		.read_nrt	=	NULL,
++		.write_rt	=	rtipc_write,
++		.write_nrt	=	NULL,
++		.select		=	rtipc_select,
++	},
++};
++
++static struct rtdm_device device = {
++	.driver = &rtipc_driver,
++	.label = "rtipc",
++};
++
++int __init __rtipc_init(void)
++{
++	int ret, n;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	for (n = 0; n < IPCPROTO_MAX; n++) {
++		if (protocols[n] && protocols[n]->proto_init) {
++			ret = protocols[n]->proto_init();
++			if (ret)
++				return ret;
++		}
++	}
++
++	return rtdm_dev_register(&device);
++}
++
++void __exit __rtipc_exit(void)
++{
++	int n;
++
++	rtdm_dev_unregister(&device);
++
++	for (n = 0; n < IPCPROTO_MAX; n++) {
++		if (protocols[n] && protocols[n]->proto_exit)
++			protocols[n]->proto_exit();
++	}
++}
++
++module_init(__rtipc_init);
++module_exit(__rtipc_exit);
+--- linux/drivers/xenomai/autotune/autotune.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/autotune/autotune.c	2021-04-07 16:01:26.250635562 +0800
+@@ -0,0 +1,820 @@
++/*
++ * This file is part of the Xenomai project.
++ *
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/atomic.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/kernel.h>
++#include <linux/sort.h>
++#include <cobalt/kernel/arith.h>
++#include <rtdm/driver.h>
++#include <rtdm/autotune.h>
++
++MODULE_DESCRIPTION("Xenomai/cobalt core clock autotuner");
++MODULE_AUTHOR("Philippe Gerum <rpm@xenomai.org>");
++MODULE_LICENSE("GPL");
++
++/* Auto-tuning services for the Cobalt core clock. */
++
++#define SAMPLING_TIME	500000000UL
++#define ADJUSTMENT_STEP 500
++#define WARMUP_STEPS	10
++#define AUTOTUNE_STEPS  40
++
++#define progress(__tuner, __fmt, __args...)				\
++	do {								\
++		if (!(__tuner)->quiet)					\
++			printk(XENO_INFO "autotune(%s) " __fmt "\n",	\
++			       (__tuner)->name, ##__args);		\
++	} while (0)
++
++struct tuning_score {
++	int pmean;
++	int stddev;
++	int minlat;
++	unsigned int step;
++	unsigned int gravity;
++};
++
++struct tuner_state {
++	xnticks_t ideal;
++	xnticks_t step;
++	int min_lat;
++	int max_lat;
++	int prev_mean;
++	long long prev_sqs;
++	long long cur_sqs;
++	unsigned int sum;
++	unsigned int cur_samples;
++	unsigned int max_samples;
++};
++
++struct gravity_tuner {
++	const char *name;
++	unsigned int (*get_gravity)(struct gravity_tuner *tuner);
++	void (*set_gravity)(struct gravity_tuner *tuner, unsigned int gravity);
++	unsigned int (*adjust_gravity)(struct gravity_tuner *tuner, int adjust);
++	int (*init_tuner)(struct gravity_tuner *tuner);
++	int (*start_tuner)(struct gravity_tuner *tuner, xnticks_t start_time,
++			   xnticks_t interval);
++	void (*destroy_tuner)(struct gravity_tuner *tuner);
++	struct tuner_state state;
++	rtdm_event_t done;
++	int status;
++	int quiet;
++	struct tuning_score scores[AUTOTUNE_STEPS];
++	int nscores;
++	atomic_t refcount;
++};
++
++struct irq_gravity_tuner {
++	rtdm_timer_t timer;
++	struct gravity_tuner tuner;
++};
++
++struct kthread_gravity_tuner {
++	rtdm_task_t task;
++	rtdm_event_t barrier;
++	xnticks_t start_time;
++	xnticks_t interval;
++	struct gravity_tuner tuner;
++};
++
++struct uthread_gravity_tuner {
++	rtdm_timer_t timer;
++	rtdm_event_t pulse;
++	struct gravity_tuner tuner;
++};
++
++struct autotune_context {
++	struct gravity_tuner *tuner;
++	struct autotune_setup setup;
++	rtdm_lock_t tuner_lock;
++};
++
++static inline void init_tuner(struct gravity_tuner *tuner)
++{
++	rtdm_event_init(&tuner->done, 0);
++	tuner->status = 0;
++	atomic_set(&tuner->refcount, 0);
++}
++
++static inline void destroy_tuner(struct gravity_tuner *tuner)
++{
++	rtdm_event_destroy(&tuner->done);
++}
++
++static inline void done_sampling(struct gravity_tuner *tuner,
++				 int status)
++{
++	tuner->status = status;
++	rtdm_event_signal(&tuner->done);
++}
++
++static int add_sample(struct gravity_tuner *tuner, xnticks_t timestamp)
++{
++	struct tuner_state *state;
++	int n, delta, cur_mean;
++
++	state = &tuner->state;
++
++	delta = (int)(timestamp - state->ideal);
++	if (delta < state->min_lat)
++		state->min_lat = delta;
++	if (delta > state->max_lat)
++		state->max_lat = delta;
++	if (delta < 0)
++		delta = 0;
++
++	state->sum += delta;
++	state->ideal += state->step;
++	n = ++state->cur_samples;
++
++	/*
++	 * Knuth citing Welford in TAOCP (Vol 2), single-pass
++	 * computation of variance using a recurrence relation.
++	 */
++	if (n == 1)
++		state->prev_mean = delta;
++	else {
++		cur_mean = state->prev_mean + (delta - state->prev_mean) / n;
++                state->cur_sqs = state->prev_sqs + (delta - state->prev_mean)
++			* (delta - cur_mean);
++                state->prev_mean = cur_mean; 
++                state->prev_sqs = state->cur_sqs;
++	}
++
++	if (n >= state->max_samples) {
++		done_sampling(tuner, 0);
++		return 1;	/* Finished. */
++	}
++
++	return 0;	/* Keep going. */
++}
++
++static void timer_handler(rtdm_timer_t *timer)
++{
++	struct irq_gravity_tuner *irq_tuner;
++	xnticks_t now;
++
++	irq_tuner = container_of(timer, struct irq_gravity_tuner, timer);
++	now = xnclock_read_raw(&nkclock);
++
++	if (add_sample(&irq_tuner->tuner, now))
++		rtdm_timer_stop_in_handler(timer);
++}
++
++static int init_irq_tuner(struct gravity_tuner *tuner)
++{
++	struct irq_gravity_tuner *irq_tuner;
++	int ret;
++
++	irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner);
++	ret = rtdm_timer_init(&irq_tuner->timer, timer_handler, "autotune");
++	if (ret)
++		return ret;
++
++	init_tuner(tuner);
++
++	return 0;
++}
++
++static void destroy_irq_tuner(struct gravity_tuner *tuner)
++{
++	struct irq_gravity_tuner *irq_tuner;
++
++	irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner);
++	rtdm_timer_destroy(&irq_tuner->timer);
++	destroy_tuner(tuner);
++}
++
++static unsigned int get_irq_gravity(struct gravity_tuner *tuner)
++{
++	return nkclock.gravity.irq;
++}
++
++static void set_irq_gravity(struct gravity_tuner *tuner, unsigned int gravity)
++{
++	nkclock.gravity.irq = gravity;
++}
++
++static unsigned int adjust_irq_gravity(struct gravity_tuner *tuner, int adjust)
++{
++	return nkclock.gravity.irq += adjust;
++}
++
++static int start_irq_tuner(struct gravity_tuner *tuner,
++			   xnticks_t start_time, xnticks_t interval)
++{
++	struct irq_gravity_tuner *irq_tuner;
++
++	irq_tuner = container_of(tuner, struct irq_gravity_tuner, tuner);
++
++	return rtdm_timer_start(&irq_tuner->timer, start_time,
++				interval, RTDM_TIMERMODE_ABSOLUTE);
++}
++
++struct irq_gravity_tuner irq_tuner = {
++	.tuner = {
++		.name = "irqhand",
++		.init_tuner = init_irq_tuner,
++		.destroy_tuner = destroy_irq_tuner,
++		.get_gravity = get_irq_gravity,
++		.set_gravity = set_irq_gravity,
++		.adjust_gravity = adjust_irq_gravity,
++		.start_tuner = start_irq_tuner,
++	},
++};
++
++void task_handler(void *arg)
++{
++	struct kthread_gravity_tuner *k_tuner = arg;
++	xnticks_t now;
++	int ret = 0;
++
++	for (;;) {
++		if (rtdm_task_should_stop())
++			break;
++
++		ret = rtdm_event_wait(&k_tuner->barrier);
++		if (ret)
++			break;
++
++		ret = rtdm_task_set_period(&k_tuner->task, k_tuner->start_time,
++					   k_tuner->interval);
++		if (ret)
++			break;
++
++		for (;;) {
++			ret = rtdm_task_wait_period(NULL);
++			if (ret && ret != -ETIMEDOUT)
++				goto out;
++
++			now = xnclock_read_raw(&nkclock);
++			if (add_sample(&k_tuner->tuner, now)) {
++				rtdm_task_set_period(&k_tuner->task, 0, 0);
++				break;
++			}
++		}
++	}
++out:
++	done_sampling(&k_tuner->tuner, ret);
++	rtdm_task_destroy(&k_tuner->task);
++}
++
++static int init_kthread_tuner(struct gravity_tuner *tuner)
++{
++	struct kthread_gravity_tuner *k_tuner;
++
++	init_tuner(tuner);
++	k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner);
++	rtdm_event_init(&k_tuner->barrier, 0);
++
++	return rtdm_task_init(&k_tuner->task, "autotune",
++			      task_handler, k_tuner,
++			      RTDM_TASK_HIGHEST_PRIORITY, 0);
++}
++
++static void destroy_kthread_tuner(struct gravity_tuner *tuner)
++{
++	struct kthread_gravity_tuner *k_tuner;
++
++	k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner);
++	rtdm_task_destroy(&k_tuner->task);
++	rtdm_event_destroy(&k_tuner->barrier);
++}
++
++static unsigned int get_kthread_gravity(struct gravity_tuner *tuner)
++{
++	return nkclock.gravity.kernel;
++}
++
++static void set_kthread_gravity(struct gravity_tuner *tuner, unsigned int gravity)
++{
++	nkclock.gravity.kernel = gravity;
++}
++
++static unsigned int adjust_kthread_gravity(struct gravity_tuner *tuner, int adjust)
++{
++	return nkclock.gravity.kernel += adjust;
++}
++
++static int start_kthread_tuner(struct gravity_tuner *tuner,
++			       xnticks_t start_time, xnticks_t interval)
++{
++	struct kthread_gravity_tuner *k_tuner;
++
++	k_tuner = container_of(tuner, struct kthread_gravity_tuner, tuner);
++
++	k_tuner->start_time = start_time;
++	k_tuner->interval = interval;
++	rtdm_event_signal(&k_tuner->barrier);
++
++	return 0;
++}
++
++struct kthread_gravity_tuner kthread_tuner = {
++	.tuner = {
++		.name = "kthread",
++		.init_tuner = init_kthread_tuner,
++		.destroy_tuner = destroy_kthread_tuner,
++		.get_gravity = get_kthread_gravity,
++		.set_gravity = set_kthread_gravity,
++		.adjust_gravity = adjust_kthread_gravity,
++		.start_tuner = start_kthread_tuner,
++	},
++};
++
++static void pulse_handler(rtdm_timer_t *timer)
++{
++	struct uthread_gravity_tuner *u_tuner;
++
++	u_tuner = container_of(timer, struct uthread_gravity_tuner, timer);
++	rtdm_event_signal(&u_tuner->pulse);
++}
++
++static int init_uthread_tuner(struct gravity_tuner *tuner)
++{
++	struct uthread_gravity_tuner *u_tuner;
++	int ret;
++
++	u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner);
++	ret = rtdm_timer_init(&u_tuner->timer, pulse_handler, "autotune");
++	if (ret)
++		return ret;
++
++	xntimer_set_gravity(&u_tuner->timer, XNTIMER_UGRAVITY); /* gasp... */
++	rtdm_event_init(&u_tuner->pulse, 0);
++	init_tuner(tuner);
++
++	return 0;
++}
++
++static void destroy_uthread_tuner(struct gravity_tuner *tuner)
++{
++	struct uthread_gravity_tuner *u_tuner;
++
++	u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner);
++	rtdm_timer_destroy(&u_tuner->timer);
++	rtdm_event_destroy(&u_tuner->pulse);
++}
++
++static unsigned int get_uthread_gravity(struct gravity_tuner *tuner)
++{
++	return nkclock.gravity.user;
++}
++
++static void set_uthread_gravity(struct gravity_tuner *tuner, unsigned int gravity)
++{
++	nkclock.gravity.user = gravity;
++}
++
++static unsigned int adjust_uthread_gravity(struct gravity_tuner *tuner, int adjust)
++{
++	return nkclock.gravity.user += adjust;
++}
++
++static int start_uthread_tuner(struct gravity_tuner *tuner,
++			       xnticks_t start_time, xnticks_t interval)
++{
++	struct uthread_gravity_tuner *u_tuner;
++
++	u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner);
++
++	return rtdm_timer_start(&u_tuner->timer, start_time,
++				interval, RTDM_TIMERMODE_ABSOLUTE);
++}
++
++static int add_uthread_sample(struct gravity_tuner *tuner,
++			      nanosecs_abs_t user_timestamp)
++{
++	struct uthread_gravity_tuner *u_tuner;
++	int ret;
++
++	u_tuner = container_of(tuner, struct uthread_gravity_tuner, tuner);
++
++	if (user_timestamp &&
++	    add_sample(tuner, xnclock_ns_to_ticks(&nkclock, user_timestamp))) {
++		rtdm_timer_stop(&u_tuner->timer);
++		/* Tell the caller to park until next round. */
++		ret = -EPIPE;
++	} else
++		ret = rtdm_event_wait(&u_tuner->pulse);
++
++	return ret;
++}
++
++struct uthread_gravity_tuner uthread_tuner = {
++	.tuner = {
++		.name = "uthread",
++		.init_tuner = init_uthread_tuner,
++		.destroy_tuner = destroy_uthread_tuner,
++		.get_gravity = get_uthread_gravity,
++		.set_gravity = set_uthread_gravity,
++		.adjust_gravity = adjust_uthread_gravity,
++		.start_tuner = start_uthread_tuner,
++	},
++};
++
++static inline void build_score(struct gravity_tuner *tuner, int step)
++{
++	struct tuner_state *state = &tuner->state;
++	unsigned int variance, n;
++
++	n = state->cur_samples;
++	tuner->scores[step].pmean = state->sum / n;
++	variance = n > 1 ? xnarch_llimd(state->cur_sqs, 1, n - 1) : 0;
++	tuner->scores[step].stddev = int_sqrt(variance);
++	tuner->scores[step].minlat = state->min_lat;
++	tuner->scores[step].gravity = tuner->get_gravity(tuner);
++	tuner->scores[step].step = step;
++	tuner->nscores++;
++}
++
++static int cmp_score_mean(const void *c, const void *r)
++{
++	const struct tuning_score *sc = c, *sr = r;
++	return sc->pmean - sr->pmean;
++}
++
++static int cmp_score_stddev(const void *c, const void *r)
++{
++	const struct tuning_score *sc = c, *sr = r;
++	return sc->stddev - sr->stddev;
++}
++
++static int cmp_score_minlat(const void *c, const void *r)
++{
++	const struct tuning_score *sc = c, *sr = r;
++	return sc->minlat - sr->minlat;
++}
++
++static int cmp_score_gravity(const void *c, const void *r)
++{
++	const struct tuning_score *sc = c, *sr = r;
++	return sc->gravity - sr->gravity;
++}
++
++static int filter_mean(struct gravity_tuner *tuner)
++{
++	sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
++	     cmp_score_mean, NULL);
++
++	/* Top half of the best pondered means. */
++
++	return (tuner->nscores + 1) / 2;
++}
++
++static int filter_stddev(struct gravity_tuner *tuner)
++{
++	sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
++	     cmp_score_stddev, NULL);
++
++	/* Top half of the best standard deviations. */
++
++	return (tuner->nscores + 1) / 2;
++}
++
++static int filter_minlat(struct gravity_tuner *tuner)
++{
++	sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
++	     cmp_score_minlat, NULL);
++
++	/* Top half of the minimum latencies. */
++
++	return (tuner->nscores + 1) / 2;
++}
++
++static int filter_gravity(struct gravity_tuner *tuner)
++{
++	sort(tuner->scores, tuner->nscores, sizeof(struct tuning_score),
++	     cmp_score_gravity, NULL);
++
++	/* Smallest gravity required among the shortest latencies. */
++
++	return tuner->nscores;
++}
++
++static void dump_scores(struct gravity_tuner *tuner)
++{
++	int n;
++
++	if (tuner->quiet)
++		return;
++
++	for (n = 0; n < tuner->nscores; n++)
++		printk(KERN_INFO
++		       ".. S%.2d pmean=%Ld stddev=%Lu minlat=%Lu gravity=%Lu\n",
++		       tuner->scores[n].step,
++		       xnclock_ticks_to_ns(&nkclock, tuner->scores[n].pmean),
++		       xnclock_ticks_to_ns(&nkclock, tuner->scores[n].stddev),
++		       xnclock_ticks_to_ns(&nkclock, tuner->scores[n].minlat),
++		       xnclock_ticks_to_ns(&nkclock, tuner->scores[n].gravity));
++}
++
++static inline void filter_score(struct gravity_tuner *tuner,
++				int (*filter)(struct gravity_tuner *tuner))
++{
++	tuner->nscores = filter(tuner);
++	dump_scores(tuner);
++}
++
++static int tune_gravity(struct gravity_tuner *tuner, int period)
++{
++	struct tuner_state *state = &tuner->state;
++	int ret, step, gravity_limit, adjust;
++	unsigned int orig_gravity;
++
++	state->step = xnclock_ns_to_ticks(&nkclock, period);
++	state->max_samples = SAMPLING_TIME / (period ?: 1);
++	orig_gravity = tuner->get_gravity(tuner);
++	tuner->set_gravity(tuner, 0);
++	tuner->nscores = 0;
++	/* Gravity adjustment step */
++	adjust = xnclock_ns_to_ticks(&nkclock, ADJUSTMENT_STEP) ?: 1;
++	gravity_limit = 0;
++	progress(tuner, "warming up...");
++
++	for (step = 0; step < WARMUP_STEPS + AUTOTUNE_STEPS; step++) {
++		state->ideal = xnclock_read_raw(&nkclock) + state->step * WARMUP_STEPS;
++		state->min_lat = xnclock_ns_to_ticks(&nkclock, SAMPLING_TIME);
++		state->max_lat = 0;
++		state->prev_mean = 0;
++		state->prev_sqs = 0;
++		state->cur_sqs = 0;
++		state->sum = 0;
++		state->cur_samples = 0;
++
++		ret = tuner->start_tuner(tuner,
++					 xnclock_ticks_to_ns(&nkclock, state->ideal),
++					 period);
++		if (ret)
++			goto fail;
++
++		/* Tuner stops when posting. */
++		ret = rtdm_event_wait(&tuner->done);
++		if (ret)
++			goto fail;
++
++		ret = tuner->status;
++		if (ret)
++			goto fail;
++
++		if (step < WARMUP_STEPS) {
++			if (state->min_lat > gravity_limit) {
++				gravity_limit = state->min_lat;
++				progress(tuner, "gravity limit set to %Lu ns (%d)",
++					 xnclock_ticks_to_ns(&nkclock, gravity_limit), state->min_lat);
++			}
++			continue;
++		}
++
++		/*
++		 * We should not be early by more than the gravity
++		 * value minus one tick, to account for the rounding
++		 * error involved when the timer frequency is lower
++		 * than 1e9 / ADJUSTMENT_STEP.
++		 */
++		if (state->min_lat < 0) {
++			if (tuner->get_gravity(tuner) < -state->min_lat - 1) {
++				printk(XENO_WARNING
++				       "autotune(%s) failed with early shot (%Ld ns)\n",
++				       tuner->name,
++				       xnclock_ticks_to_ns(&nkclock,
++						   -(tuner->get_gravity(tuner) +
++						     state->min_lat)));
++				ret = -EAGAIN;
++				goto fail;
++			}
++			break;
++		}
++
++		if (((step - WARMUP_STEPS) % 5) == 0)
++			progress(tuner, "calibrating... (slice %d)",
++				 (step - WARMUP_STEPS) / 5 + 1);
++
++		build_score(tuner, step - WARMUP_STEPS);
++
++		/*
++		 * Anticipating by more than the minimum latency
++		 * detected at warmup would make no sense: cap the
++		 * gravity we may try.
++		 */
++		if (tuner->adjust_gravity(tuner, adjust) > gravity_limit) {
++			progress(tuner, "beyond gravity limit at %Lu ns",
++				 xnclock_ticks_to_ns(&nkclock,
++						     tuner->get_gravity(tuner)));
++			break;
++		}
++	}
++
++	progress(tuner, "calibration scores");
++	dump_scores(tuner);
++	progress(tuner, "pondered mean filter");
++	filter_score(tuner, filter_mean);
++	progress(tuner, "standard deviation filter");
++	filter_score(tuner, filter_stddev);
++	progress(tuner, "minimum latency filter");
++	filter_score(tuner, filter_minlat);
++	progress(tuner, "gravity filter");
++	filter_score(tuner, filter_gravity);
++	tuner->set_gravity(tuner, tuner->scores[0].gravity);
++
++	return 0;
++fail:
++	tuner->set_gravity(tuner, orig_gravity);
++
++	return ret;
++}
++
++static int autotune_ioctl_nrt(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	struct autotune_context *context;
++	struct autotune_setup setup;
++	struct gravity_tuner *tuner, *old_tuner;
++	rtdm_lockctx_t lock_ctx;
++	int ret;
++
++	switch (request) {
++	case AUTOTUNE_RTIOC_RESET:
++		xnclock_reset_gravity(&nkclock);
++		return 0;
++	case AUTOTUNE_RTIOC_IRQ:
++		tuner = &irq_tuner.tuner;
++		break;
++	case AUTOTUNE_RTIOC_KERN:
++		tuner = &kthread_tuner.tuner;
++		break;
++	case AUTOTUNE_RTIOC_USER:
++		tuner = &uthread_tuner.tuner;
++		break;
++	default:
++		return -ENOSYS;
++	}
++
++	ret = rtdm_copy_from_user(fd, &setup, arg, sizeof(setup));
++	if (ret)
++		return ret;
++
++	ret = tuner->init_tuner(tuner);
++	if (ret)
++		return ret;
++
++	context = rtdm_fd_to_private(fd);
++
++	rtdm_lock_get_irqsave(&context->tuner_lock, lock_ctx);
++
++	old_tuner = context->tuner;
++	if (old_tuner && atomic_read(&old_tuner->refcount) > 0) {
++		rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx);
++		tuner->destroy_tuner(tuner);
++		return -EBUSY;
++	}
++
++	context->tuner = tuner;
++	context->setup = setup;
++
++	rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx);
++
++	if (old_tuner)
++		old_tuner->destroy_tuner(old_tuner);
++
++	if (setup.quiet <= 1)
++		printk(XENO_INFO "autotune(%s) started\n", tuner->name);
++
++	return ret;
++}
++
++static int autotune_ioctl_rt(struct rtdm_fd *fd, unsigned int request, void *arg)
++{
++	struct autotune_context *context;
++	struct gravity_tuner *tuner;
++	rtdm_lockctx_t lock_ctx;
++	__u64 timestamp;
++	__u32 gravity;
++	int ret;
++
++	context = rtdm_fd_to_private(fd);
++
++	rtdm_lock_get_irqsave(&context->tuner_lock, lock_ctx);
++
++	tuner = context->tuner;
++	if (tuner)
++		atomic_inc(&tuner->refcount);
++
++	rtdm_lock_put_irqrestore(&context->tuner_lock, lock_ctx);
++
++	if (tuner == NULL)
++		return -ENOSYS;
++
++	switch (request) {
++	case AUTOTUNE_RTIOC_RUN:
++		tuner->quiet = context->setup.quiet;
++		ret = tune_gravity(tuner, context->setup.period);
++		if (ret)
++			break;
++		gravity = xnclock_ticks_to_ns(&nkclock,
++					      tuner->get_gravity(tuner));
++		ret = rtdm_safe_copy_to_user(fd, arg, &gravity,
++					     sizeof(gravity));
++		break;
++	case AUTOTUNE_RTIOC_PULSE:
++		if (tuner != &uthread_tuner.tuner) {
++			ret = -EINVAL;
++			break;
++		}
++		ret = rtdm_safe_copy_from_user(fd, &timestamp, arg,
++					       sizeof(timestamp));
++		if (ret)
++			break;
++		ret = add_uthread_sample(tuner, timestamp);
++		break;
++	default:
++		ret = -ENOSYS;
++	}
++
++	atomic_dec(&tuner->refcount);
++
++	return ret;
++}
++
++static int autotune_open(struct rtdm_fd *fd, int oflags)
++{
++	struct autotune_context *context;
++
++	context = rtdm_fd_to_private(fd);
++	context->tuner = NULL;
++	rtdm_lock_init(&context->tuner_lock);
++
++	return 0;
++}
++
++static void autotune_close(struct rtdm_fd *fd)
++{
++	struct autotune_context *context;
++	struct gravity_tuner *tuner;
++
++	context = rtdm_fd_to_private(fd);
++	tuner = context->tuner;
++	if (tuner) {
++		if (context->setup.quiet <= 1)
++			printk(XENO_INFO "autotune finished [%Lui/%Luk/%Luu]\n",
++			       xnclock_ticks_to_ns(&nkclock,
++						   xnclock_get_gravity(&nkclock, irq)),
++			       xnclock_ticks_to_ns(&nkclock,
++						   xnclock_get_gravity(&nkclock, kernel)),
++			       xnclock_ticks_to_ns(&nkclock,
++						   xnclock_get_gravity(&nkclock, user)));
++		tuner->destroy_tuner(tuner);
++	}
++}
++
++static struct rtdm_driver autotune_driver = {
++	.profile_info		=	RTDM_PROFILE_INFO(autotune,
++							  RTDM_CLASS_AUTOTUNE,
++							  RTDM_SUBCLASS_AUTOTUNE,
++							  0),
++	.device_flags		=	RTDM_NAMED_DEVICE|RTDM_EXCLUSIVE,
++	.device_count		=	1,
++	.context_size		=	sizeof(struct autotune_context),
++	.ops = {
++		.open		=	autotune_open,
++		.ioctl_rt	=	autotune_ioctl_rt,
++		.ioctl_nrt	=	autotune_ioctl_nrt,
++		.close		=	autotune_close,
++	},
++};
++
++static struct rtdm_device device = {
++	.driver = &autotune_driver,
++	.label = "autotune",
++};
++
++static int __init autotune_init(void)
++{
++	return rtdm_dev_register(&device);
++}
++
++static void __exit autotune_exit(void)
++{
++	rtdm_dev_unregister(&device);
++}
++
++module_init(autotune_init);
++module_exit(autotune_exit);
+--- linux/drivers/xenomai/autotune/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/autotune/Makefile	2021-04-07 16:01:26.245635569 +0800
+@@ -0,0 +1,4 @@
++
++obj-$(CONFIG_XENO_DRIVERS_AUTOTUNE) += xeno_autotune.o
++
++xeno_autotune-y := autotune.o
+--- linux/drivers/xenomai/autotune/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/autotune/Kconfig	2021-04-07 16:01:26.240635577 +0800
+@@ -0,0 +1,3 @@
++
++config XENO_DRIVERS_AUTOTUNE
++	tristate
+--- linux/drivers/xenomai/gpio/gpio-sun8i-h3.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-sun8i-h3.c	2021-04-07 16:01:26.235635584 +0800
+@@ -0,0 +1,43 @@
++/**
++ * Copyright (C) 2017 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <rtdm/gpio.h>
++
++#define RTDM_SUBCLASS_H3  3
++
++static int __init h3_gpio_init(void)
++{
++	int ret;
++	
++	ret = rtdm_gpiochip_scan_of(NULL, "allwinner,sun8i-h3-pinctrl",
++				    RTDM_SUBCLASS_H3);
++	if (ret)
++		return ret;
++
++	return rtdm_gpiochip_scan_of(NULL, "allwinner,sun8i-h3-r-pinctrl",
++				     RTDM_SUBCLASS_H3);
++}
++module_init(h3_gpio_init);
++
++static void __exit h3_gpio_exit(void)
++{
++	rtdm_gpiochip_remove_of(RTDM_SUBCLASS_H3);
++}
++module_exit(h3_gpio_exit);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/xenomai/gpio/gpio-core.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-core.c	2021-04-07 16:01:26.230635591 +0800
+@@ -0,0 +1,640 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/device.h>
++#include <linux/gpio.h>
++#include <linux/irq.h>
++#include <linux/slab.h>
++#include <linux/err.h>
++#include <rtdm/gpio.h>
++
++struct rtdm_gpio_chan {
++	int requested : 1,
++		has_direction : 1,
++		is_output : 1,
++	        is_interrupt : 1,
++		want_timestamp : 1;
++};
++
++static LIST_HEAD(rtdm_gpio_chips);
++
++static DEFINE_MUTEX(chip_lock);
++
++static int gpio_pin_interrupt(rtdm_irq_t *irqh)
++{
++	struct rtdm_gpio_pin *pin;
++
++	pin = rtdm_irq_get_arg(irqh, struct rtdm_gpio_pin);
++
++	pin->timestamp = rtdm_clock_read_monotonic();
++	rtdm_event_signal(&pin->event);
++
++	return RTDM_IRQ_HANDLED;
++}
++
++static int request_gpio_irq(unsigned int gpio, struct rtdm_gpio_pin *pin,
++			    struct rtdm_gpio_chan *chan,
++			    int trigger)
++{
++	int ret, irq_trigger, irq;
++
++	if (trigger & ~GPIO_TRIGGER_MASK)
++		return -EINVAL;
++
++	if (!chan->requested) {
++		ret = gpio_request(gpio, pin->name);
++		if (ret) {
++			if (ret != -EPROBE_DEFER)
++				printk(XENO_ERR 
++				       "can not request GPIO%d\n", gpio);
++			return ret;
++		}
++		chan->requested = true;
++	}
++
++	ret = gpio_direction_input(gpio);
++	if (ret) {
++		printk(XENO_ERR "cannot set GPIO%d as input\n", gpio);
++		goto fail;
++	}
++
++	chan->has_direction = true;
++	gpio_export(gpio, true);
++
++	rtdm_event_clear(&pin->event);
++
++	/*
++	 * Attempt to hook the interrupt associated to that pin. We
++	 * might fail getting a valid IRQ number, in case the GPIO
++	 * chip did not define any mapping handler (->to_irq). If so,
++	 * just assume that either we have no IRQ indeed, or interrupt
++	 * handling may be open coded elsewhere.
++	 */
++	irq = gpio_to_irq(gpio);
++	if (irq < 0)
++		goto done;
++
++	irq_trigger = 0;
++	if (trigger & GPIO_TRIGGER_EDGE_RISING)
++		irq_trigger |= IRQ_TYPE_EDGE_RISING;
++	if (trigger & GPIO_TRIGGER_EDGE_FALLING)
++		irq_trigger |= IRQ_TYPE_EDGE_FALLING;
++	if (trigger & GPIO_TRIGGER_LEVEL_HIGH)
++		irq_trigger |= IRQ_TYPE_LEVEL_HIGH;
++	if (trigger & GPIO_TRIGGER_LEVEL_LOW)
++		irq_trigger |= IRQ_TYPE_LEVEL_LOW;
++
++	if (irq_trigger)
++		irq_set_irq_type(irq, irq_trigger);
++	
++	ret = rtdm_irq_request(&pin->irqh, irq, gpio_pin_interrupt,
++			       0, pin->name, pin);
++	if (ret) {
++		printk(XENO_ERR "cannot request GPIO%d interrupt\n", gpio);
++		goto fail;
++	}
++
++
++	rtdm_irq_enable(&pin->irqh);
++done:
++	chan->is_interrupt = true;
++
++	return 0;
++fail:
++	gpio_free(gpio);
++	chan->requested = false;
++
++	return ret;
++}
++
++static void release_gpio_irq(unsigned int gpio, struct rtdm_gpio_pin *pin,
++			     struct rtdm_gpio_chan *chan)
++{
++	if (chan->is_interrupt) {
++		rtdm_irq_free(&pin->irqh);
++		chan->is_interrupt = false;
++	}
++	gpio_free(gpio);
++	chan->requested = false;
++}
++
++static int gpio_pin_ioctl_nrt(struct rtdm_fd *fd,
++			      unsigned int request, void *arg)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	unsigned int gpio = rtdm_fd_minor(fd);
++	int ret = 0, val, trigger;
++	struct rtdm_gpio_pin *pin;
++	
++	pin = container_of(dev, struct rtdm_gpio_pin, dev);
++
++	switch (request) {
++	case GPIO_RTIOC_DIR_OUT:
++		ret = rtdm_safe_copy_from_user(fd, &val, arg, sizeof(val));
++		if (ret)
++			return ret;
++		ret = gpio_direction_output(gpio, val);
++		if (ret == 0) {
++			chan->has_direction = true;
++			chan->is_output = true;
++		}
++		break;
++	case GPIO_RTIOC_DIR_IN:
++		ret = gpio_direction_input(gpio);
++		if (ret == 0)
++			chan->has_direction = true;
++		break;
++	case GPIO_RTIOC_IRQEN:
++		if (chan->is_interrupt) {
++			return -EBUSY;
++		}
++		ret = rtdm_safe_copy_from_user(fd, &trigger,
++					       arg, sizeof(trigger));
++		if (ret)
++			return ret;
++		ret = request_gpio_irq(gpio, pin, chan, trigger);
++		break;
++	case GPIO_RTIOC_IRQDIS:
++		if (chan->is_interrupt) {
++			release_gpio_irq(gpio, pin, chan);
++			chan->requested = false;
++			chan->is_interrupt = false;
++		}
++		break;
++	case GPIO_RTIOC_REQS:
++		ret = gpio_request(gpio, pin->name);
++		if (ret)
++			return ret;
++		else
++			chan->requested = true;
++		break;
++	case GPIO_RTIOC_RELS:
++		gpio_free(gpio);
++		chan->requested = false;
++		break;
++	case GPIO_RTIOC_TS:
++		ret = rtdm_safe_copy_from_user(fd, &val, arg, sizeof(val));
++		if (ret)
++			return ret;
++		chan->want_timestamp = !!val;
++		break;
++	default:
++		return -EINVAL;
++	}
++	
++	return ret;
++}
++
++static ssize_t gpio_pin_read_rt(struct rtdm_fd *fd,
++				void __user *buf, size_t len)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	struct rtdm_gpio_readout rdo;
++	struct rtdm_gpio_pin *pin;
++	int ret;
++
++	if (!chan->has_direction)
++		return -EAGAIN;
++
++	if (chan->is_output)
++		return -EINVAL;
++
++	pin = container_of(dev, struct rtdm_gpio_pin, dev);
++
++	if (chan->want_timestamp) {
++		if (len < sizeof(rdo))
++			return -EINVAL;
++
++		if (!(fd->oflags & O_NONBLOCK)) {
++			ret = rtdm_event_wait(&pin->event);
++			if (ret)
++				return ret;
++			rdo.timestamp = pin->timestamp;
++		} else
++			rdo.timestamp = rtdm_clock_read_monotonic();
++
++		len = sizeof(rdo);
++		rdo.value = gpiod_get_raw_value(pin->desc);
++		ret = rtdm_safe_copy_to_user(fd, buf, &rdo, len);
++	} else {
++		if (len < sizeof(rdo.value))
++			return -EINVAL;
++
++		if (!(fd->oflags & O_NONBLOCK)) {
++			ret = rtdm_event_wait(&pin->event);
++			if (ret)
++				return ret;
++		}
++
++		len = sizeof(rdo.value);
++		rdo.value = gpiod_get_raw_value(pin->desc);
++		ret = rtdm_safe_copy_to_user(fd, buf, &rdo.value, len);
++	}
++	
++	return ret ?: len;
++}
++
++static ssize_t gpio_pin_write_rt(struct rtdm_fd *fd,
++				 const void __user *buf, size_t len)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	struct rtdm_gpio_pin *pin;
++	int value, ret;
++
++	if (len < sizeof(value))
++		return -EINVAL;
++
++	if (!chan->has_direction)
++		return -EAGAIN;
++
++	if (!chan->is_output)
++		return -EINVAL;
++
++	ret = rtdm_safe_copy_from_user(fd, &value, buf, sizeof(value));
++	if (ret)
++		return ret;
++
++	pin = container_of(dev, struct rtdm_gpio_pin, dev);
++	gpiod_set_raw_value(pin->desc, value);
++
++	return sizeof(value);
++}
++
++static int gpio_pin_select(struct rtdm_fd *fd, struct xnselector *selector,
++			   unsigned int type, unsigned int index)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	struct rtdm_gpio_pin *pin;
++
++	if (!chan->has_direction)
++		return -EAGAIN;
++
++	if (chan->is_output)
++		return -EINVAL;
++
++	pin = container_of(dev, struct rtdm_gpio_pin, dev);
++
++	return rtdm_event_select(&pin->event, selector, type, index);
++}
++
++int gpio_pin_open(struct rtdm_fd *fd, int oflags)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	unsigned int gpio = rtdm_fd_minor(fd);
++	int ret = 0;
++	struct rtdm_gpio_pin *pin;
++
++	pin = container_of(dev, struct rtdm_gpio_pin, dev);
++	ret = gpio_request(gpio, pin->name);
++	if (ret) {
++		printk(XENO_ERR "failed to request pin %d : %d\n", gpio, ret);
++		return ret;
++	} else {
++		chan->requested = true;
++	}
++
++	return 0;
++}
++
++static void gpio_pin_close(struct rtdm_fd *fd)
++{
++	struct rtdm_gpio_chan *chan = rtdm_fd_to_private(fd);
++	struct rtdm_device *dev = rtdm_fd_device(fd);
++	unsigned int gpio = rtdm_fd_minor(fd);
++	struct rtdm_gpio_pin *pin;
++
++	if (chan->requested) {
++		pin = container_of(dev, struct rtdm_gpio_pin, dev);
++		release_gpio_irq(gpio, pin, chan);
++	}
++}
++
++static void delete_pin_devices(struct rtdm_gpio_chip *rgc)
++{
++	struct rtdm_gpio_pin *pin;
++	struct rtdm_device *dev;
++	int offset;
++
++	for (offset = 0; offset < rgc->gc->ngpio; offset++) {
++		pin = rgc->pins + offset;
++		dev = &pin->dev;
++		rtdm_dev_unregister(dev);
++		rtdm_event_destroy(&pin->event);
++		kfree(dev->label);
++		kfree(pin->name);
++	}
++}
++
++static int create_pin_devices(struct rtdm_gpio_chip *rgc)
++{
++	struct gpio_chip *gc = rgc->gc;
++	struct rtdm_gpio_pin *pin;
++	struct rtdm_device *dev;
++	int offset, ret, gpio;
++
++	for (offset = 0; offset < gc->ngpio; offset++) {
++		ret = -ENOMEM;
++		gpio = gc->base + offset;
++		pin = rgc->pins + offset;
++		pin->name = kasprintf(GFP_KERNEL, "gpio%d", gpio);
++		if (pin->name == NULL)
++			goto fail_name;
++		pin->desc = gpio_to_desc(gpio);
++		if (pin->desc == NULL) {
++			ret = -ENODEV;
++			goto fail_desc;
++		}
++		dev = &pin->dev;
++		dev->driver = &rgc->driver;
++		dev->label = kasprintf(GFP_KERNEL, "%s/gpio%%d", gc->label);
++		if (dev->label == NULL)
++			goto fail_label;
++		dev->minor = gpio;
++		dev->device_data = rgc;
++		ret = rtdm_dev_register(dev);
++		if (ret)
++			goto fail_register;
++		rtdm_event_init(&pin->event, 0);
++	}
++
++	return 0;
++
++fail_register:
++	kfree(dev->label);
++fail_desc:
++fail_label:
++	kfree(pin->name);
++fail_name:
++	delete_pin_devices(rgc);
++
++	return ret;
++}
++
++static char *gpio_pin_devnode(struct device *dev, umode_t *mode)
++{
++	return kasprintf(GFP_KERNEL, "rtdm/%s/%s",
++			 dev->class->name,
++			 dev_name(dev));
++}
++
++int rtdm_gpiochip_add(struct rtdm_gpio_chip *rgc,
++		      struct gpio_chip *gc, int gpio_subclass)
++{
++	int ret;
++
++	rgc->devclass = class_create(gc->owner, gc->label);
++	if (IS_ERR(rgc->devclass)) {
++		printk(XENO_ERR "cannot create sysfs class\n");
++		return PTR_ERR(rgc->devclass);
++	}
++	rgc->devclass->devnode = gpio_pin_devnode;
++
++	rgc->driver.profile_info = (struct rtdm_profile_info)
++		RTDM_PROFILE_INFO(rtdm_gpio_chip,
++				  RTDM_CLASS_GPIO,
++				  gpio_subclass,
++				  0);
++	rgc->driver.device_flags = RTDM_NAMED_DEVICE|RTDM_FIXED_MINOR;
++	rgc->driver.base_minor = gc->base;
++	rgc->driver.device_count = gc->ngpio;
++	rgc->driver.context_size = sizeof(struct rtdm_gpio_chan);
++	rgc->driver.ops = (struct rtdm_fd_ops){
++		.open		=	gpio_pin_open,
++		.close		=	gpio_pin_close,
++		.ioctl_nrt	=	gpio_pin_ioctl_nrt,
++		.read_rt	=	gpio_pin_read_rt,
++		.write_rt	=	gpio_pin_write_rt,
++		.select		=	gpio_pin_select,
++	};
++	
++	rtdm_drv_set_sysclass(&rgc->driver, rgc->devclass);
++
++	rgc->gc = gc;
++	rtdm_lock_init(&rgc->lock);
++
++	ret = create_pin_devices(rgc);
++	if (ret)
++		class_destroy(rgc->devclass);
++	
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_add);
++
++struct rtdm_gpio_chip *
++rtdm_gpiochip_alloc(struct gpio_chip *gc, int gpio_subclass)
++{
++	struct rtdm_gpio_chip *rgc;
++	size_t asize;
++	int ret;
++
++	if (gc->ngpio == 0)
++		return ERR_PTR(-EINVAL);
++
++	asize = sizeof(*rgc) + gc->ngpio * sizeof(struct rtdm_gpio_pin);
++	rgc = kzalloc(asize, GFP_KERNEL);
++	if (rgc == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	ret = rtdm_gpiochip_add(rgc, gc, gpio_subclass);
++	if (ret) {
++		kfree(rgc);
++		return ERR_PTR(ret);
++	}
++
++	mutex_lock(&chip_lock);
++	list_add(&rgc->next, &rtdm_gpio_chips);
++	mutex_unlock(&chip_lock);
++
++	return rgc;
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_alloc);
++
++void rtdm_gpiochip_remove(struct rtdm_gpio_chip *rgc)
++{
++	mutex_lock(&chip_lock);
++	list_del(&rgc->next);
++	mutex_unlock(&chip_lock);
++	delete_pin_devices(rgc);
++	class_destroy(rgc->devclass);
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_remove);
++
++int rtdm_gpiochip_post_event(struct rtdm_gpio_chip *rgc,
++			     unsigned int offset)
++{
++	struct rtdm_gpio_pin *pin;
++
++	if (offset >= rgc->gc->ngpio)
++		return -EINVAL;
++
++	pin = rgc->pins + offset;
++	pin->timestamp = rtdm_clock_read_monotonic();
++	rtdm_event_signal(&pin->event);
++	
++	return 0;
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_post_event);
++
++static int gpiochip_match_name(struct gpio_chip *chip, void *data)
++{
++	const char *name = data;
++
++	return !strcmp(chip->label, name);
++}
++
++static struct gpio_chip *find_chip_by_name(const char *name)
++{
++	return gpiochip_find((void *)name, gpiochip_match_name);
++}
++
++int rtdm_gpiochip_add_by_name(struct rtdm_gpio_chip *rgc,
++			      const char *label, int gpio_subclass)
++{
++	struct gpio_chip *gc = find_chip_by_name(label);
++
++	if (gc == NULL)
++		return -EPROBE_DEFER;
++
++	return rtdm_gpiochip_add(rgc, gc, gpio_subclass);
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_add_by_name);
++
++#ifdef CONFIG_OF
++
++#include <linux/of_platform.h>
++
++struct gpiochip_holder {
++	struct gpio_chip *chip;
++	struct list_head next;
++};
++	
++struct gpiochip_match_data {
++	struct device *parent;
++	struct list_head list;
++};
++
++static int match_gpio_chip(struct gpio_chip *gc, void *data)
++{
++	struct gpiochip_match_data *d = data;
++	struct gpiochip_holder *h;
++
++	if (cobalt_gpiochip_dev(gc) == d->parent) {
++		h = kmalloc(sizeof(*h), GFP_KERNEL);
++		if (h) {
++			h->chip = gc;
++			list_add(&h->next, &d->list);
++		}
++	}
++
++	/*
++	 * Iterate over all existing GPIO chips, we may have several
++	 * hosted by the same pin controller mapping different ranges.
++	 */
++	return 0;
++}
++
++int rtdm_gpiochip_scan_of(struct device_node *from, const char *compat,
++			  int type)
++{
++	struct gpiochip_match_data match;
++	struct gpiochip_holder *h, *n;
++	struct device_node *np = from;
++	struct platform_device *pdev;
++	struct rtdm_gpio_chip *rgc;
++	int ret = -ENODEV, _ret;
++
++	if (!rtdm_available())
++		return -ENOSYS;
++
++	for (;;) {
++		np = of_find_compatible_node(np, NULL, compat);
++		if (np == NULL)
++			break;
++		pdev = of_find_device_by_node(np);
++		of_node_put(np);
++		if (pdev == NULL)
++			break;
++		match.parent = &pdev->dev;
++		INIT_LIST_HEAD(&match.list);
++		gpiochip_find(&match, match_gpio_chip);
++		if (!list_empty(&match.list)) {
++			ret = 0;
++			list_for_each_entry_safe(h, n, &match.list, next) {
++				list_del(&h->next);
++				_ret = 0;
++				rgc = rtdm_gpiochip_alloc(h->chip, type);
++				if (IS_ERR(rgc))
++					_ret = PTR_ERR(rgc);
++				kfree(h);
++				if (_ret && !ret)
++					ret = _ret;
++			}
++			if (ret)
++				break;
++		}
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_scan_of);
++
++int rtdm_gpiochip_scan_array_of(struct device_node *from,
++				const char *compat[],
++				int nentries, int type)
++{
++	int ret = -ENODEV, _ret, n;
++
++	for (n = 0; n < nentries; n++) {
++		_ret = rtdm_gpiochip_scan_of(from, compat[n], type);
++		if (_ret) {
++			if (_ret != -ENODEV)
++				return _ret;
++		} else
++			ret = 0;
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_scan_array_of);
++
++void rtdm_gpiochip_remove_of(int type)
++{
++	struct rtdm_gpio_chip *rgc, *n;
++
++	mutex_lock(&chip_lock);
++
++	list_for_each_entry_safe(rgc, n, &rtdm_gpio_chips, next) {
++		if (rgc->driver.profile_info.subclass_id == type) {
++			mutex_unlock(&chip_lock);
++			rtdm_gpiochip_remove(rgc);
++			kfree(rgc);
++			mutex_lock(&chip_lock);
++		}
++	}
++
++	mutex_unlock(&chip_lock);
++}
++EXPORT_SYMBOL_GPL(rtdm_gpiochip_remove_of);
++
++#endif /* CONFIG_OF */
+--- linux/drivers/xenomai/gpio/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/Makefile	2021-04-07 16:01:26.225635598 +0800
+@@ -0,0 +1,14 @@
++ccflags-$(CONFIG_XENO_DRIVERS_GPIO_DEBUG) := -DDEBUG
++
++obj-$(CONFIG_XENO_DRIVERS_GPIO_BCM2835) += xeno-gpio-bcm2835.o
++obj-$(CONFIG_XENO_DRIVERS_GPIO_MXC) += xeno-gpio-mxc.o
++obj-$(CONFIG_XENO_DRIVERS_GPIO_SUN8I_H3) += xeno-gpio-sun8i-h3.o
++obj-$(CONFIG_XENO_DRIVERS_GPIO_ZYNQ7000) += xeno-gpio-zynq7000.o
++obj-$(CONFIG_XENO_DRIVERS_GPIO_XILINX) += xeno-gpio-xilinx.o
++obj-$(CONFIG_XENO_DRIVERS_GPIO) += gpio-core.o
++
++xeno-gpio-bcm2835-y := gpio-bcm2835.o
++xeno-gpio-mxc-y := gpio-mxc.o
++xeno-gpio-sun8i-h3-y := gpio-sun8i-h3.o
++xeno-gpio-zynq7000-y := gpio-zynq7000.o
++xeno-gpio-xilinx-y := gpio-xilinx.o
+--- linux/drivers/xenomai/gpio/gpio-xilinx.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-xilinx.c	2021-04-07 16:01:26.220635605 +0800
+@@ -0,0 +1,40 @@
++/**
++ * @note Copyright (C) 2017 Greg Gallagher <greg@embeddedgreg.com>
++ *
++ * This driver controls the gpio that can be located on the PL
++ * of the Zynq SOC
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <rtdm/gpio.h>
++
++#define RTDM_SUBCLASS_XILINX  5
++
++static int __init xilinx_gpio_init(void)
++{
++	return rtdm_gpiochip_scan_of(NULL, "xlnx,xps-gpio-1.00.a",
++                     RTDM_SUBCLASS_XILINX);
++}
++module_init(xilinx_gpio_init);
++
++static void __exit xilinx_gpio_exit(void)
++{
++	rtdm_gpiochip_remove_of(RTDM_SUBCLASS_XILINX);
++}
++module_exit(xilinx_gpio_exit);
++
++MODULE_LICENSE("GPL");
++
+--- linux/drivers/xenomai/gpio/gpio-zynq7000.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-zynq7000.c	2021-04-07 16:01:26.215635612 +0800
+@@ -0,0 +1,40 @@
++/**
++ * @note Copyright (C) 2017 Greg Gallagher <greg@embeddedgreg.com>
++ * 
++ * This driver is inspired by:
++ * gpio-bcm2835.c, please see original file for copyright information
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <rtdm/gpio.h>
++
++#define RTDM_SUBCLASS_ZYNQ7000  4
++
++static int __init zynq7000_gpio_init(void)
++{
++ 	return rtdm_gpiochip_scan_of(NULL, "xlnx,zynq-gpio-1.0", 
++                     RTDM_SUBCLASS_ZYNQ7000);
++}
++module_init(zynq7000_gpio_init);
++
++static void __exit zynq7000_gpio_exit(void)
++{
++	rtdm_gpiochip_remove_of(RTDM_SUBCLASS_ZYNQ7000);
++}
++module_exit(zynq7000_gpio_exit);
++
++MODULE_LICENSE("GPL");
++
+--- linux/drivers/xenomai/gpio/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/Kconfig	2021-04-07 16:01:26.210635619 +0800
+@@ -0,0 +1,57 @@
++menu "Real-time GPIO drivers"
++
++config XENO_DRIVERS_GPIO
++       bool "GPIO controller"
++       depends on GPIOLIB
++       help
++
++       Real-time capable GPIO module.
++
++if XENO_DRIVERS_GPIO
++
++config XENO_DRIVERS_GPIO_BCM2835
++	depends on MACH_BCM2708 || ARCH_BCM2835
++	tristate "Support for BCM2835 GPIOs"
++	help
++
++	Enables support for the GPIO controller available from
++	Broadcom's BCM2835 SoC.
++
++config XENO_DRIVERS_GPIO_MXC
++	depends on GPIO_MXC
++	tristate "Support for MXC GPIOs"
++	help
++
++	Suitable for the GPIO controller available from
++	Freescale/NXP's MXC architecture.
++
++config XENO_DRIVERS_GPIO_SUN8I_H3
++	depends on MACH_SUN8I && PINCTRL_SUN8I_H3
++	tristate "Support for SUN8I H3 GPIOs"
++	help
++
++	Suitable for the GPIO controller available from Allwinner's H3
++	SoC, as found on the NanoPI boards.
++
++config XENO_DRIVERS_GPIO_ZYNQ7000
++	depends on ARCH_ZYNQ
++	tristate "Support for Zynq7000 GPIOs"
++	help
++
++	Enables support for the GPIO controller available from
++	Xilinx's Zynq7000 SoC.
++
++config XENO_DRIVERS_GPIO_XILINX
++	depends on ARCH_ZYNQ
++	tristate "Support for Xilinx GPIOs"
++	help
++
++	Enables support for the GPIO controller available from
++	Xilinx's softcore IP.
++
++config XENO_DRIVERS_GPIO_DEBUG
++       bool "Enable GPIO core debugging features"
++
++endif
++
++endmenu
+--- linux/drivers/xenomai/gpio/gpio-bcm2835.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-bcm2835.c	2021-04-07 16:01:26.205635627 +0800
+@@ -0,0 +1,37 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <rtdm/gpio.h>
++
++#define RTDM_SUBCLASS_BCM2835  1
++
++static int __init bcm2835_gpio_init(void)
++{
++ 	return rtdm_gpiochip_scan_of(NULL, "brcm,bcm2835-gpio",
++				     RTDM_SUBCLASS_BCM2835);
++}
++module_init(bcm2835_gpio_init);
++
++static void __exit bcm2835_gpio_exit(void)
++{
++	rtdm_gpiochip_remove_of(RTDM_SUBCLASS_BCM2835);
++}
++module_exit(bcm2835_gpio_exit);
++
++MODULE_LICENSE("GPL");
++
+--- linux/drivers/xenomai/gpio/gpio-mxc.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/drivers/xenomai/gpio/gpio-mxc.c	2021-04-07 16:01:26.199635635 +0800
+@@ -0,0 +1,42 @@
++/**
++ * @note Copyright (C) 2016 Philippe Gerum <rpm@xenomai.org>
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/module.h>
++#include <rtdm/gpio.h>
++
++#define RTDM_SUBCLASS_MXC  2
++
++static const char *compat_array[] = {
++	"fsl,imx6q-gpio",
++	"fsl,imx7d-gpio",
++};
++
++static int __init mxc_gpio_init(void)
++{
++	return rtdm_gpiochip_scan_array_of(NULL, compat_array,
++					   ARRAY_SIZE(compat_array),
++					   RTDM_SUBCLASS_MXC);
++}
++module_init(mxc_gpio_init);
++
++static void __exit mxc_gpio_exit(void)
++{
++	rtdm_gpiochip_remove_of(RTDM_SUBCLASS_MXC);
++}
++module_exit(mxc_gpio_exit);
++
++MODULE_LICENSE("GPL");
+--- linux/drivers/Makefile	2020-12-21 21:59:17.000000000 +0800
++++ linux-patched/drivers/Makefile	2021-04-07 16:01:25.590636505 +0800
+@@ -187,3 +187,5 @@
+ obj-$(CONFIG_SIOX)		+= siox/
+ obj-$(CONFIG_GNSS)		+= gnss/
+ obj-y                           += uacce/
++
++obj-$(CONFIG_XENOMAI) += xenomai/
+--- linux/arch/x86/include/ipipe/thread_info.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/include/ipipe/thread_info.h	2021-04-07 16:01:25.732636302 +0800
+@@ -0,0 +1,38 @@
++/**
++ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_IPIPE_THREAD_INFO_H
++#define _COBALT_IPIPE_THREAD_INFO_H
++
++struct xnthread;
++struct cobalt_process;
++
++struct ipipe_threadinfo {
++	/* Core thread backlink. */
++	struct xnthread *thread;
++	/* User process backlink. NULL for core threads. */
++	struct cobalt_process *process;
++};
++
++static inline void __ipipe_init_threadinfo(struct ipipe_threadinfo *p)
++{
++	p->thread = NULL;
++	p->process = NULL;
++}
++
++#endif /* !_COBALT_IPIPE_THREAD_INFO_H */
+--- linux/arch/x86/xenomai/machine.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/machine.c	2021-04-07 16:01:25.721636318 +0800
+@@ -0,0 +1,134 @@
++/**
++ *   Copyright (C) 2007-2012 Philippe Gerum.
++ *
++ *   Xenomai is free software; you can redistribute it and/or
++ *   modify it under the terms of the GNU General Public License as
++ *   published by the Free Software Foundation, Inc., 675 Mass Ave,
++ *   Cambridge MA 02139, USA; either version 2 of the License, or (at
++ *   your option) any later version.
++ *
++ *   Xenomai is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ *   General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ *   02111-1307, USA.
++ */
++#include <linux/ipipe_tickdev.h>
++#include <cobalt/kernel/arith.h>
++#include <asm/xenomai/syscall.h>
++#include <asm/xenomai/machine.h>
++#include <asm/xenomai/thread.h>
++#include <asm/xenomai/smi.h>
++#include <asm/xenomai/c1e.h>
++
++long strncpy_from_user_nocheck(char *dst, const char __user *src, long count)
++{
++	int ret;
++	char c;
++	long n;
++	
++	for (n = 0; n < count; n++, src++, dst++) {
++		ret = __xn_get_user(c, src);
++		if (ret)
++			return -EFAULT;
++		*dst = c;
++		if (c == 0)
++			break;
++	}
++
++	return n;
++}
++EXPORT_SYMBOL_GPL(strncpy_from_user_nocheck);
++
++static unsigned long mach_x86_calibrate(void)
++{
++	unsigned long delay = (cobalt_pipeline.timer_freq + HZ / 2) / HZ;
++	unsigned long long t0, t1, dt;
++	unsigned long flags;
++	int i;
++
++	flags = ipipe_critical_enter(NULL);
++
++	ipipe_timer_set(delay);
++
++	ipipe_read_tsc(t0);
++
++	for (i = 0; i < 100; i++)
++		ipipe_timer_set(delay);
++
++	ipipe_read_tsc(t1);
++	dt = t1 - t0;
++
++	ipipe_critical_exit(flags);
++
++	/*
++	 * Reset the max trace, since it contains the calibration time
++	 * now.
++	 */
++	ipipe_trace_max_reset();
++
++	/*
++	 * Compute average with a 5% margin to avoid negative
++	 * latencies with PIT.
++	 */
++	return xnarch_ulldiv(dt, i + 5, NULL);
++}
++
++static int mach_x86_init(void)
++{
++	int ret;
++
++	ret = mach_x86_thread_init();
++	if (ret)
++		return ret;
++
++	mach_x86_c1e_disable();
++	mach_x86_smi_init();
++	mach_x86_smi_disable();
++
++	return 0;
++}
++
++static void mach_x86_cleanup(void)
++{
++	mach_x86_smi_restore();
++	mach_x86_thread_cleanup();
++}
++
++static const char *const fault_labels[] = {
++    [0] = "Divide error",
++    [1] = "Debug",
++    [2] = "",   /* NMI is not pipelined. */
++    [3] = "Int3",
++    [4] = "Overflow",
++    [5] = "Bounds",
++    [6] = "Invalid opcode",
++    [7] = "FPU not available",
++    [8] = "Double fault",
++    [9] = "FPU segment overrun",
++    [10] = "Invalid TSS",
++    [11] = "Segment not present",
++    [12] = "Stack segment",
++    [13] = "General protection",
++    [14] = "Page fault",
++    [15] = "Spurious interrupt",
++    [16] = "FPU error",
++    [17] = "Alignment check",
++    [18] = "Machine check",
++    [19] = "SIMD error",
++    [20] = NULL,
++};
++
++struct cobalt_machine cobalt_machine = {
++	.name = "x86",
++	.init = mach_x86_init,
++	.late_init = NULL,
++	.cleanup = mach_x86_cleanup,
++	.calibrate = mach_x86_calibrate,
++	.prefault = NULL,
++	.fault_labels = fault_labels,
++};
+--- linux/arch/x86/xenomai/smi.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/smi.c	2021-04-07 16:01:25.716636325 +0800
+@@ -0,0 +1,168 @@
++/**
++ *   SMI workaround for x86.
++ *
++ *   Cut/Pasted from Vitor Angelo "smi" module.
++ *   Adapted by Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/ctype.h>
++#include <linux/pci.h>
++#include <linux/pci_ids.h>
++#include <linux/reboot.h>
++#include <cobalt/kernel/assert.h>
++#include <asm-generic/xenomai/pci_ids.h>
++#include <asm/xenomai/machine.h>
++
++#define DEVFN		0xf8	/* device 31, function 0 */
++
++#define PMBASE_B0	0x40
++#define PMBASE_B1	0x41
++
++#define SMI_CTRL_ADDR	0x30
++
++static int smi_state;
++static char smi_state_arg[16] = "detect";
++module_param_string(smi, smi_state_arg, sizeof(smi_state_arg), 0444);
++
++static unsigned int smi_masked_bits = 1; /* Global disable bit */
++module_param_named(smi_mask, smi_masked_bits, int, 0400);
++
++static unsigned int smi_saved_bits;
++static unsigned short smi_en_addr;
++
++#define mask_bits(v, p) outl(inl(p)&~(v),(p))
++#define set_bits(v, p)  outl(inl(p)|(v), (p))
++
++static int smi_reboot(struct notifier_block *nb, ulong event, void *buf);
++
++static struct notifier_block smi_notifier = {
++	.notifier_call = smi_reboot
++};
++
++static int smi_reboot(struct notifier_block *nb, ulong event, void *buf)
++{
++	if (((event == SYS_RESTART) || (event == SYS_HALT) ||
++	     (event == SYS_POWER_OFF)) && smi_en_addr)
++		set_bits(smi_saved_bits, smi_en_addr);
++
++	return NOTIFY_DONE;
++}
++
++void mach_x86_smi_disable(void)
++{
++	if (smi_en_addr == 0)
++		return;
++
++	smi_saved_bits = inl(smi_en_addr) & smi_masked_bits;
++	mask_bits(smi_masked_bits, smi_en_addr);
++
++	if (inl(smi_en_addr) & smi_masked_bits)
++		printk(XENO_WARNING "SMI workaround failed!\n");
++	else
++		printk(XENO_INFO "SMI workaround enabled\n");
++
++	register_reboot_notifier(&smi_notifier);
++}
++
++void mach_x86_smi_restore(void)
++{
++	if (smi_en_addr == 0)
++		return;
++
++	printk(XENO_INFO "SMI configuration restored\n");
++
++	set_bits(smi_saved_bits, smi_en_addr);
++
++	unregister_reboot_notifier(&smi_notifier);
++}
++
++static unsigned short get_smi_en_addr(struct pci_dev *dev)
++{
++	u_int8_t byte0, byte1;
++
++	pci_read_config_byte(dev, PMBASE_B0, &byte0);
++	pci_read_config_byte(dev, PMBASE_B1, &byte1);
++	return SMI_CTRL_ADDR + (((byte1 << 1) | (byte0 >> 7)) << 7);	// bits 7-15
++}
++
++
++static const char *smi_state_labels[] = {
++	"disabled",
++	"detect",
++	"enabled",
++};
++	
++static void setup_smi_state(void)
++{
++	static char warn_bad_state[] =
++		XENO_WARNING "invalid SMI state '%s'\n";
++	char *p;
++	int n;
++
++	/* Backward compat with legacy state specifiers. */
++	n = simple_strtol(smi_state_arg, &p, 10);
++	if (*p == '\0') {
++		smi_state = n;
++		return;
++	}
++
++	for (n = 0; n < ARRAY_SIZE(smi_state_labels); n++)
++		if (strcmp(smi_state_labels[n], smi_state_arg) == 0) {
++			smi_state = n - 1;
++			return;
++		}
++
++	printk(warn_bad_state, smi_state_arg);
++}
++
++void mach_x86_smi_init(void)
++{
++	struct pci_dev *dev = NULL;
++
++	setup_smi_state();
++
++	if (smi_state < 0)
++		return;
++
++	/*
++	 * Do not use pci_register_driver, pci_enable_device, ...
++	 * Just register the used ports.
++	 */
++	dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
++	if (dev == NULL || dev->bus->number || 
++	    dev->devfn != DEVFN || dev->vendor != PCI_VENDOR_ID_INTEL) {
++		pci_dev_put(dev);
++		return;
++	}
++
++	if (smi_state == 0) {
++		printk(XENO_WARNING "SMI-enabled chipset found, but SMI workaround disabled\n"
++		       "          (see xenomai.smi parameter). You might encounter\n"
++		       "          high latencies!\n");
++		pci_dev_put(dev);
++		return;
++	}
++
++	printk(XENO_INFO "SMI-enabled chipset found\n");
++	smi_en_addr = get_smi_en_addr(dev);
++
++	pci_dev_put(dev);
++}
+--- linux/arch/x86/xenomai/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/Makefile	2021-04-07 16:01:25.711636332 +0800
+@@ -0,0 +1,5 @@
++
++obj-$(CONFIG_XENOMAI) += xenomai.o
++xenomai-y := machine.o thread.o smi.o c1e.o
++
++ccflags-y := -Iarch/x86/xenomai/include -Iinclude/xenomai
+--- linux/arch/x86/xenomai/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/Kconfig	2021-04-07 16:01:25.707636338 +0800
+@@ -0,0 +1,8 @@
++config XENO_ARCH_FPU
++	def_bool y
++
++config XENO_ARCH_SYS3264
++        def_bool IA32_EMULATION
++
++source "kernel/xenomai/Kconfig"
++source "drivers/xenomai/Kconfig"
+--- linux/arch/x86/xenomai/include/asm/xenomai/machine.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/machine.h	2021-04-07 16:01:25.702636345 +0800
+@@ -0,0 +1,43 @@
++/**
++ * Copyright (C) 2007-2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_MACHINE_H
++#define _COBALT_X86_ASM_MACHINE_H
++
++#include <linux/compiler.h>
++
++static inline __attribute_const__ unsigned long ffnz(unsigned long ul)
++{
++#ifdef __i386__
++	__asm__("bsfl %1, %0":"=r,r" (ul) : "r,?m" (ul));
++#else
++	__asm__("bsfq %1, %0":"=r" (ul) : "rm" (ul));
++#endif
++	return ul;
++}
++
++#define XNARCH_HOST_TICK_IRQ	__ipipe_hrtimer_irq
++
++long strncpy_from_user_nocheck(char *dst,
++			       const char __user *src,
++			       long count);
++
++/* Read this last to enable default settings. */
++#include <asm-generic/xenomai/machine.h>
++
++#endif /* !_COBALT_X86_ASM_MACHINE_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/smi.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/smi.h	2021-04-07 16:01:25.697636352 +0800
+@@ -0,0 +1,32 @@
++/**
++ *   Copyright &copy; 2005 Gilles Chanteperdrix.
++ *
++ *   SMI workaround for x86.
++ *
++ *   Xenomai free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_SMI_H
++#define _COBALT_X86_ASM_SMI_H
++
++#ifndef _COBALT_X86_ASM_MACHINE_H
++#error "please don't include asm/smi.h directly"
++#endif
++
++void mach_x86_smi_disable(void);
++void mach_x86_smi_restore(void);
++void mach_x86_smi_init(void);
++
++#endif /* !_COBALT_X86_ASM_SMI_64_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/syscall32.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall32.h	2021-04-07 16:01:25.693636358 +0800
+@@ -0,0 +1,187 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_SYSCALL32_H
++#define _COBALT_X86_ASM_SYSCALL32_H
++
++#include <asm/unistd.h>
++
++#ifdef CONFIG_X86_X32
++
++#define __COBALT_X32_BASE		128
++
++#define __COBALT_SYSNR32x(__reg)			\
++	({						\
++		long __nr = __reg;			\
++		if (__nr & __X32_SYSCALL_BIT) {		\
++			__nr &= ~__X32_SYSCALL_BIT;	\
++			__nr += __COBALT_X32_BASE;	\
++		}					\
++		__nr;					\
++	})
++
++#define __COBALT_COMPAT32x(__reg)			\
++	(((__reg) & __X32_SYSCALL_BIT) ? __COBALT_COMPATX_BIT : 0)
++
++#if __NR_COBALT_SYSCALLS > __COBALT_X32_BASE
++#error "__NR_COBALT_SYSCALLS > __COBALT_X32_BASE"
++#endif
++
++#define __syshand32x__(__name)	((cobalt_syshand)(CoBaLt32x_ ## __name))
++
++#define __COBALT_CALL32x_INITHAND(__handler)	\
++	[__COBALT_X32_BASE ... __COBALT_X32_BASE + __NR_COBALT_SYSCALLS-1] = __handler,
++
++#define __COBALT_CALL32x_INITMODE(__mode)	\
++	[__COBALT_X32_BASE ... __COBALT_X32_BASE + __NR_COBALT_SYSCALLS-1] = __mode,
++
++/* x32 default entry (no thunk) */
++#define __COBALT_CALL32x_ENTRY(__name, __handler)		\
++	[sc_cobalt_ ## __name + __COBALT_X32_BASE] = __handler,
++
++/* x32 thunk installation */
++#define __COBALT_CALL32x_pure_THUNK(__name)	\
++	__COBALT_CALL32x_ENTRY(__name, __syshand32x__(__name))
++
++#define __COBALT_CALL32x_THUNK(__name)	\
++	__COBALT_CALL32x_ENTRY(__name, __syshand32emu__(__name))
++
++/* x32 thunk implementation. */
++#define COBALT_SYSCALL32x(__name, __mode, __args)	\
++	long CoBaLt32x_ ## __name __args
++
++/* x32 thunk declaration. */
++#define COBALT_SYSCALL32x_DECL(__name, __args)	\
++	long CoBaLt32x_ ## __name __args
++
++#else /* !CONFIG_X86_X32 */
++
++/* x32 support disabled. */
++
++#define __COBALT_SYSNR32x(__reg)	(__reg)
++
++#define __COBALT_COMPAT32x(__reg)	0
++
++#define __COBALT_CALL32x_INITHAND(__handler)
++
++#define __COBALT_CALL32x_INITMODE(__mode)
++
++#define __COBALT_CALL32x_ENTRY(__name, __handler)
++
++#define __COBALT_CALL32x_pure_THUNK(__name)
++
++#define __COBALT_CALL32x_THUNK(__name)
++
++#define COBALT_SYSCALL32x_DECL(__name, __args)
++
++#endif /* !CONFIG_X86_X32 */
++
++#ifdef CONFIG_IA32_EMULATION
++
++#define __COBALT_IA32_BASE		256 /* Power of two. */
++
++#define __COBALT_SYSNR32emu(__reg)					\
++	({								\
++		long __nr = __reg;					\
++		if (in_ia32_syscall())					\
++			__nr += __COBALT_IA32_BASE;			\
++		__nr;							\
++	})
++
++#define __COBALT_COMPAT32emu(__reg)					\
++	(in_ia32_syscall() ? __COBALT_COMPAT_BIT : 0)
++
++#if __NR_COBALT_SYSCALLS > __COBALT_IA32_BASE
++#error "__NR_COBALT_SYSCALLS > __COBALT_IA32_BASE"
++#endif
++
++#define __syshand32emu__(__name)	((cobalt_syshand)(CoBaLt32emu_ ## __name))
++
++#define __COBALT_CALL32emu_INITHAND(__handler)	\
++	[__COBALT_IA32_BASE ... __COBALT_IA32_BASE + __NR_COBALT_SYSCALLS-1] = __handler,
++
++#define __COBALT_CALL32emu_INITMODE(__mode)	\
++	[__COBALT_IA32_BASE ... __COBALT_IA32_BASE + __NR_COBALT_SYSCALLS-1] = __mode,
++
++/* ia32 default entry (no thunk) */
++#define __COBALT_CALL32emu_ENTRY(__name, __handler)		\
++	[sc_cobalt_ ## __name + __COBALT_IA32_BASE] = __handler,
++
++/* ia32 thunk installation */
++#define __COBALT_CALL32emu_THUNK(__name)	\
++	__COBALT_CALL32emu_ENTRY(__name, __syshand32emu__(__name))
++
++/* ia32 thunk implementation. */
++#define COBALT_SYSCALL32emu(__name, __mode, __args)	\
++	long CoBaLt32emu_ ## __name __args
++
++/* ia32 thunk declaration. */
++#define COBALT_SYSCALL32emu_DECL(__name, __args)	\
++	long CoBaLt32emu_ ## __name __args
++
++#else /* !CONFIG_IA32_EMULATION */
++
++/* ia32 emulation support disabled. */
++
++#define __COBALT_SYSNR32emu(__reg)	(__reg)
++
++#define __COBALT_COMPAT32emu(__reg)	0
++
++#define __COBALT_CALL32emu_INITHAND(__handler)
++
++#define __COBALT_CALL32emu_INITMODE(__mode)
++
++#define __COBALT_CALL32emu_ENTRY(__name, __handler)
++
++#define __COBALT_CALL32emu_THUNK(__name)
++
++#define COBALT_SYSCALL32emu_DECL(__name, __args)
++
++#endif /* !CONFIG_IA32_EMULATION */
++
++#define __COBALT_CALL32_ENTRY(__name, __handler)	\
++	__COBALT_CALL32x_ENTRY(__name, __handler)	\
++	__COBALT_CALL32emu_ENTRY(__name, __handler)
++
++#define __COBALT_CALL32_INITHAND(__handler)	\
++  	__COBALT_CALL32x_INITHAND(__handler)	\
++	__COBALT_CALL32emu_INITHAND(__handler)
++
++#define __COBALT_CALL32_INITMODE(__mode)	\
++  	__COBALT_CALL32x_INITMODE(__mode)	\
++	__COBALT_CALL32emu_INITMODE(__mode)
++
++/* Already checked for __COBALT_SYSCALL_BIT */
++#define __COBALT_CALL32_SYSNR(__reg)				\
++	({							\
++		long __nr;					\
++		__nr = __COBALT_SYSNR32x(__reg);		\
++		if (__nr == (__reg))				\
++			__nr = __COBALT_SYSNR32emu(__reg);	\
++		__nr;						\
++	})
++
++#define __COBALT_CALL_COMPAT(__reg)				\
++	({							\
++		int __ret = __COBALT_COMPAT32x(__reg);		\
++		if (__ret == 0)					\
++			__ret = __COBALT_COMPAT32emu(__reg);	\
++		__ret;						\
++	})
++
++#endif /* !_COBALT_X86_ASM_SYSCALL32_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/calibration.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/calibration.h	2021-04-07 16:01:25.688636365 +0800
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (C) 2001,2002,2003,2004,2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_CALIBRATION_H
++#define _COBALT_X86_ASM_CALIBRATION_H
++
++#include <asm/processor.h>
++
++static inline unsigned long __get_bogomips(void)
++{
++	return this_cpu_read(cpu_info.loops_per_jiffy)/(500000/HZ);
++}
++
++static inline void xnarch_get_latencies(struct xnclock_gravity *p)
++{
++	unsigned long sched_latency;
++
++#if CONFIG_XENO_OPT_TIMING_SCHEDLAT != 0
++	sched_latency = CONFIG_XENO_OPT_TIMING_SCHEDLAT;
++#else /* !CONFIG_XENO_OPT_TIMING_SCHEDLAT */
++
++	if (strcmp(ipipe_timer_name(), "lapic") == 0) {
++#ifdef CONFIG_SMP
++		if (num_online_cpus() > 1)
++			sched_latency = 3350;
++		else
++			sched_latency = 2000;
++#else /* !SMP */
++		sched_latency = 1000;
++#endif /* !SMP */
++	} else if (strcmp(ipipe_timer_name(), "pit")) { /* HPET */
++#ifdef CONFIG_SMP
++		if (num_online_cpus() > 1)
++			sched_latency = 3350;
++		else
++			sched_latency = 1500;
++#else /* !SMP */
++		sched_latency = 1000;
++#endif /* !SMP */
++	} else {
++		sched_latency = (__get_bogomips() < 250 ? 17000 :
++				 __get_bogomips() < 2500 ? 4200 :
++				 3500);
++#ifdef CONFIG_SMP
++		sched_latency += 1000;
++#endif /* CONFIG_SMP */
++	}
++#endif /* !CONFIG_XENO_OPT_TIMING_SCHEDLAT */
++
++	p->user = sched_latency;
++	p->kernel = CONFIG_XENO_OPT_TIMING_KSCHEDLAT;
++	p->irq = CONFIG_XENO_OPT_TIMING_IRQLAT;
++}
++
++#endif /* !_COBALT_X86_ASM_CALIBRATION_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/c1e.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/c1e.h	2021-04-07 16:01:25.683636372 +0800
+@@ -0,0 +1,23 @@
++/*
++ * Copyright (C) 2014 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#ifndef C1E_H
++#define C1E_H
++
++void mach_x86_c1e_disable(void);
++
++#endif /* C1E_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall.h	2021-04-07 16:01:25.679636378 +0800
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (C) 2001-2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_SYSCALL_H
++#define _COBALT_X86_ASM_SYSCALL_H
++
++#include <linux/errno.h>
++#include <asm/ptrace.h>
++#include <asm-generic/xenomai/syscall.h>
++
++/*
++ * Cobalt and Linux syscall numbers can be fetched from ORIG_AX,
++ * masking out the __COBALT_SYSCALL_BIT marker. Make sure to offset
++ * the number by __COBALT_X32_BASE for Cobalt 32-bit compat syscalls
++ * only.
++ */
++#define __xn_reg_sys(regs)    ((regs)->orig_ax)
++#define __xn_reg_rval(regs)   ((regs)->ax)
++#ifdef __i386__
++#define __xn_reg_arg1(regs)   ((regs)->bx)
++#define __xn_reg_arg2(regs)   ((regs)->cx)
++#define __xn_reg_arg3(regs)   ((regs)->dx)
++#define __xn_reg_arg4(regs)   ((regs)->si)
++#define __xn_reg_arg5(regs)   ((regs)->di)
++#else /* x86_64 */
++#define __xn_reg_arg1(regs)   ((regs)->di)
++#define __xn_reg_arg2(regs)   ((regs)->si)
++#define __xn_reg_arg3(regs)   ((regs)->dx)
++#define __xn_reg_arg4(regs)   ((regs)->r10)
++#define __xn_reg_arg5(regs)   ((regs)->r8)
++#endif /* x86_64 */
++#define __xn_reg_pc(regs)     ((regs)->ip)
++#define __xn_reg_sp(regs)     ((regs)->sp)
++
++#define __xn_syscall_p(regs)  (__xn_reg_sys(regs) & __COBALT_SYSCALL_BIT)
++#ifdef CONFIG_XENO_ARCH_SYS3264
++#define __xn_syscall(regs)    __COBALT_CALL32_SYSNR(__xn_reg_sys(regs)	\
++				    & ~__COBALT_SYSCALL_BIT)
++#else
++#define __xn_syscall(regs)    (__xn_reg_sys(regs) & ~__COBALT_SYSCALL_BIT)
++#endif
++
++/*
++ * Root syscall number with predicate (valid only if
++ * !__xn_syscall_p(__regs)).
++ */
++#define __xn_rootcall_p(__regs, __code)			\
++	({						\
++		*(__code) = __xn_reg_sys(__regs);	\
++		*(__code) < ipipe_root_nr_syscalls(current_thread_info()); \
++	})
++
++static inline void __xn_error_return(struct pt_regs *regs, int v)
++{
++	__xn_reg_rval(regs) = v;
++}
++
++static inline void __xn_status_return(struct pt_regs *regs, long v)
++{
++	__xn_reg_rval(regs) = v;
++}
++
++static inline int __xn_interrupted_p(struct pt_regs *regs)
++{
++	return __xn_reg_rval(regs) == -EINTR;
++}
++
++static inline
++int xnarch_local_syscall(unsigned long a1, unsigned long a2,
++			 unsigned long a3, unsigned long a4,
++			 unsigned long a5)
++{
++	return -ENOSYS;
++}
++
++#endif /* !_COBALT_X86_ASM_SYSCALL_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/syscall.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/syscall.h	2021-04-07 16:01:25.674636385 +0800
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (C) 2001-2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_X86_ASM_UAPI_SYSCALL_H
++#define _COBALT_X86_ASM_UAPI_SYSCALL_H
++
++#ifdef __ILP32__
++#define __xn_syscall_base  __COBALT_X32_BASE
++#else
++#define __xn_syscall_base  0
++#endif
++
++#define __xn_syscode(__nr)	(__COBALT_SYSCALL_BIT | (__nr + __xn_syscall_base))
++
++#endif /* !_COBALT_X86_ASM_UAPI_SYSCALL_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/fptest.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/fptest.h	2021-04-07 16:01:25.669636392 +0800
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (C) 2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_X86_ASM_UAPI_FPTEST_H
++#define _COBALT_X86_ASM_UAPI_FPTEST_H
++
++#define __COBALT_HAVE_SSE2	0x1
++#define __COBALT_HAVE_AVX	0x2
++
++static inline void fp_regs_set(int features, unsigned int val)
++{
++	unsigned long long vec[4] = { val, 0, val, 0 };
++	unsigned i;
++
++	for (i = 0; i < 8; i++)
++		__asm__ __volatile__("fildl %0": /* no output */ :"m"(val));
++
++	if (features & __COBALT_HAVE_AVX) {
++		__asm__ __volatile__(
++			"vmovupd %0,%%ymm0;"
++			"vmovupd %0,%%ymm1;"
++			"vmovupd %0,%%ymm2;"
++			"vmovupd %0,%%ymm3;"
++			"vmovupd %0,%%ymm4;"
++			"vmovupd %0,%%ymm5;"
++			"vmovupd %0,%%ymm6;"
++			"vmovupd %0,%%ymm7;"
++			: : "m"(vec[0]), "m"(vec[1]), "m"(vec[2]), "m"(vec[3]));
++	} else if (features & __COBALT_HAVE_SSE2) {
++		__asm__ __volatile__(
++			"movupd %0,%%xmm0;"
++			"movupd %0,%%xmm1;"
++			"movupd %0,%%xmm2;"
++			"movupd %0,%%xmm3;"
++			"movupd %0,%%xmm4;"
++			"movupd %0,%%xmm5;"
++			"movupd %0,%%xmm6;"
++			"movupd %0,%%xmm7;"
++			: : "m"(vec[0]), "m"(vec[1]), "m"(vec[2]), "m"(vec[3]));
++	}
++}
++
++static inline unsigned int fp_regs_check(int features, unsigned int val,
++					 int (*report)(const char *fmt, ...))
++{
++	unsigned long long vec[8][4];
++	unsigned int i, result = val;
++	unsigned e[8];
++
++	for (i = 0; i < 8; i++)
++		__asm__ __volatile__("fistpl %0":"=m"(e[7 - i]));
++
++	if (features & __COBALT_HAVE_AVX) {
++		__asm__ __volatile__(
++			"vmovupd %%ymm0,%0;"
++			"vmovupd %%ymm1,%1;"
++			"vmovupd %%ymm2,%2;"
++			"vmovupd %%ymm3,%3;"
++			"vmovupd %%ymm4,%4;"
++			"vmovupd %%ymm5,%5;"
++			"vmovupd %%ymm6,%6;"
++			"vmovupd %%ymm7,%7;"
++			: "=m" (vec[0][0]), "=m" (vec[1][0]),
++			  "=m" (vec[2][0]), "=m" (vec[3][0]),
++			  "=m" (vec[4][0]), "=m" (vec[5][0]),
++			  "=m" (vec[6][0]), "=m" (vec[7][0]));
++	} else if (features & __COBALT_HAVE_SSE2) {
++		__asm__ __volatile__(
++			"movupd %%xmm0,%0;"
++			"movupd %%xmm1,%1;"
++			"movupd %%xmm2,%2;"
++			"movupd %%xmm3,%3;"
++			"movupd %%xmm4,%4;"
++			"movupd %%xmm5,%5;"
++			"movupd %%xmm6,%6;"
++			"movupd %%xmm7,%7;"
++			: "=m" (vec[0][0]), "=m" (vec[1][0]),
++			  "=m" (vec[2][0]), "=m" (vec[3][0]),
++			  "=m" (vec[4][0]), "=m" (vec[5][0]),
++			  "=m" (vec[6][0]), "=m" (vec[7][0]));
++	}
++
++	for (i = 0; i < 8; i++)
++		if (e[i] != val) {
++			report("r%d: %u != %u\n", i, e[i], val);
++			result = e[i];
++		}
++
++	if (features & __COBALT_HAVE_AVX) {
++		for (i = 0; i < 8; i++) {
++			int error = 0;
++			if (vec[i][0] != val) {
++				result = vec[i][0];
++				error = 1;
++			}
++			if (vec[i][2] != val) {
++				result = vec[i][2];
++				error = 1;
++			}
++			if (error)
++				report("ymm%d: %llu/%llu != %u/%u\n",
++				       i, (unsigned long long)vec[i][0],
++				       (unsigned long long)vec[i][2],
++				       val, val);
++		}
++	} else if (features & __COBALT_HAVE_SSE2) {
++		for (i = 0; i < 8; i++)
++			if (vec[i][0] != val) {
++				report("xmm%d: %llu != %u\n",
++				       i, (unsigned long long)vec[i][0], val);
++				result = vec[i][0];
++			}
++	}
++
++	return result;
++}
++
++#endif /* _COBALT_X86_ASM_UAPI_FPTEST_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/features.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/features.h	2021-04-07 16:01:25.665636398 +0800
+@@ -0,0 +1,40 @@
++/*
++ * Copyright (C) 2005-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_X86_ASM_UAPI_FEATURES_H
++#define _COBALT_X86_ASM_UAPI_FEATURES_H
++
++/* The ABI revision level we use on this arch. */
++#define XENOMAI_ABI_REV   17UL
++
++#define XENOMAI_FEAT_DEP  __xn_feat_generic_mask
++
++#define XENOMAI_FEAT_MAN  __xn_feat_generic_man_mask
++
++#define XNARCH_HAVE_LLMULSHFT    1
++#define XNARCH_HAVE_NODIV_LLIMD  1
++
++struct cobalt_featinfo_archdep { /* no arch-specific feature */ };
++
++#include <cobalt/uapi/asm-generic/features.h>
++
++static inline const char *get_feature_label(unsigned int feature)
++{
++	return get_generic_feature_label(feature);
++}
++
++#endif /* !_COBALT_X86_ASM_UAPI_FEATURES_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/uapi/arith.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/uapi/arith.h	2021-04-07 16:01:25.660636405 +0800
+@@ -0,0 +1,243 @@
++/**
++ *   Arithmetic/conversion routines for x86.
++ *
++ *   Copyright &copy; 2005 Gilles Chanteperdrix, 32bit version.
++ *   Copyright &copy; 2007 Jan Kiszka, 64bit version.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with this library; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
++ */
++#ifndef _COBALT_X86_ASM_UAPI_ARITH_H
++#define _COBALT_X86_ASM_UAPI_ARITH_H
++#define _COBALT_X86_ASM_UAPI_ARITH_H
++
++#include <asm/xenomai/uapi/features.h>
++
++#ifdef __i386__
++
++#define xnarch_u64tou32(ull, h, l) ({          \
++    unsigned long long _ull = (ull);            \
++    (l) = _ull & 0xffffffff;                    \
++    (h) = _ull >> 32;                           \
++})
++
++#define xnarch_u64fromu32(h, l) ({             \
++    unsigned long long _ull;                    \
++    asm ( "": "=A"(_ull) : "d"(h), "a"(l));     \
++    _ull;                                       \
++})
++
++/* const helper for xnarch_uldivrem, so that the compiler will eliminate
++   multiple calls with same arguments, at no additionnal cost. */
++static inline __attribute__((__const__)) unsigned long long
++__mach_x86_32_uldivrem(const unsigned long long ull, const unsigned long d)
++{
++    unsigned long long ret;
++    __asm__ ("divl %1" : "=A,A"(ret) : "r,?m"(d), "A,A"(ull));
++    /* Exception if quotient does not fit on unsigned long. */
++    return ret;
++}
++
++/* Fast long long division: when the quotient and remainder fit on 32 bits. */
++static inline unsigned long mach_x86_32_uldivrem(unsigned long long ull,
++						 const unsigned d,
++						 unsigned long *const rp)
++{
++    unsigned long q, r;
++    ull = __mach_x86_32_uldivrem(ull, d);
++    __asm__ ( "": "=d"(r), "=a"(q) : "A"(ull));
++    if(rp)
++	*rp = r;
++    return q;
++}
++#define xnarch_uldivrem(ull, d, rp) mach_x86_32_uldivrem((ull),(d),(rp))
++
++/* Division of an unsigned 96 bits ((h << 32) + l) by an unsigned 32 bits.
++   Building block for ulldiv. */
++static inline unsigned long long mach_x86_32_div96by32(const unsigned long long h,
++						       const unsigned long l,
++						       const unsigned long d,
++						       unsigned long *const rp)
++{
++    unsigned long rh;
++    const unsigned long qh = xnarch_uldivrem(h, d, &rh);
++    const unsigned long long t = xnarch_u64fromu32(rh, l);
++    const unsigned long ql = xnarch_uldivrem(t, d, rp);
++
++    return xnarch_u64fromu32(qh, ql);
++}
++
++/* Slow long long division. Uses xnarch_uldivrem, hence has the same property:
++   the compiler removes redundant calls. */
++static inline unsigned long long
++mach_x86_32_ulldiv(const unsigned long long ull,
++		   const unsigned d,
++		   unsigned long *const rp)
++{
++    unsigned long h, l;
++    xnarch_u64tou32(ull, h, l);
++    return mach_x86_32_div96by32(h, l, d, rp);
++}
++#define xnarch_ulldiv(ull,d,rp) mach_x86_32_ulldiv((ull),(d),(rp))
++
++/* Fast scaled-math-based replacement for long long multiply-divide */
++#define xnarch_llmulshft(ll, m, s)					\
++({									\
++	long long __ret;						\
++	unsigned __lo, __hi;						\
++									\
++	__asm__ (							\
++		/* HI = HIWORD(ll) * m */				\
++		"mov  %%eax,%%ecx\n\t"					\
++		"mov  %%edx,%%eax\n\t"					\
++		"imull %[__m]\n\t"					\
++		"mov  %%eax,%[__lo]\n\t"				\
++		"mov  %%edx,%[__hi]\n\t"				\
++									\
++		/* LO = LOWORD(ll) * m */				\
++		"mov  %%ecx,%%eax\n\t"					\
++		"mull %[__m]\n\t"					\
++									\
++		/* ret = (HI << 32) + LO */				\
++		"add  %[__lo],%%edx\n\t"				\
++		"adc  $0,%[__hi]\n\t"					\
++									\
++		/* ret = ret >> s */					\
++		"mov  %[__s],%%ecx\n\t"					\
++		"shrd %%cl,%%edx,%%eax\n\t"				\
++		"shrd %%cl,%[__hi],%%edx\n\t"				\
++		: "=A" (__ret), [__lo] "=&r" (__lo), [__hi] "=&r" (__hi) \
++		: "A" (ll), [__m] "m" (m), [__s] "m" (s)		\
++		: "ecx");						\
++	__ret;								\
++})
++
++static inline __attribute__((const)) unsigned long long
++mach_x86_32_nodiv_ullimd(const unsigned long long op,
++			 const unsigned long long frac,
++			 unsigned rhs_integ)
++{
++	register unsigned rl __asm__("ecx");
++	register unsigned rm __asm__("esi");
++	register unsigned rh __asm__("edi");
++	unsigned fracl, frach, opl, oph;
++	volatile unsigned integ = rhs_integ;
++	register unsigned long long t;
++
++	xnarch_u64tou32(op, oph, opl);
++	xnarch_u64tou32(frac, frach, fracl);
++
++	__asm__ ("mov %[oph], %%eax\n\t"
++		 "mull %[frach]\n\t"
++		 "mov %%eax, %[rm]\n\t"
++		 "mov %%edx, %[rh]\n\t"
++		 "mov %[opl], %%eax\n\t"
++		 "mull %[fracl]\n\t"
++		 "mov %%edx, %[rl]\n\t"
++		 "shl $1, %%eax\n\t"
++		 "adc $0, %[rl]\n\t"
++		 "adc $0, %[rm]\n\t"
++		 "adc $0, %[rh]\n\t"
++		 "mov %[oph], %%eax\n\t"
++		 "mull %[fracl]\n\t"
++		 "add %%eax, %[rl]\n\t"
++		 "adc %%edx, %[rm]\n\t"
++		 "adc $0, %[rh]\n\t"
++		 "mov %[opl], %%eax\n\t"
++		 "mull %[frach]\n\t"
++		 "add %%eax, %[rl]\n\t"
++		 "adc %%edx, %[rm]\n\t"
++		 "adc $0, %[rh]\n\t"
++		 "mov %[opl], %%eax\n\t"
++		 "mull %[integ]\n\t"
++		 "add %[rm], %%eax\n\t"
++		 "adc %%edx, %[rh]\n\t"
++		 "mov %[oph], %%edx\n\t"
++		 "imul %[integ], %%edx\n\t"
++		 "add %[rh], %%edx\n\t"
++		 : [rl]"=&c"(rl), [rm]"=&S"(rm), [rh]"=&D"(rh), "=&A"(t)
++		 : [opl]"m"(opl), [oph]"m"(oph),
++		   [fracl]"m"(fracl), [frach]"m"(frach), [integ]"m"(integ)
++		 : "cc");
++
++	return t;
++}
++
++#define xnarch_nodiv_ullimd(op, frac, integ) \
++	mach_x86_32_nodiv_ullimd((op), (frac), (integ))
++
++#else /* x86_64 */
++
++static inline __attribute__((__const__)) long long
++mach_x86_64_llimd (long long op, unsigned m, unsigned d)
++{
++	long long result;
++
++	__asm__ (
++		"imul %[m]\n\t"
++		"idiv %[d]\n\t"
++		: "=a" (result)
++		: "a" (op), [m] "r" ((unsigned long long)m),
++		  [d] "r" ((unsigned long long)d)
++		: "rdx");
++
++	return result;
++}
++#define xnarch_llimd(ll,m,d) mach_x86_64_llimd((ll),(m),(d))
++
++static inline __attribute__((__const__)) long long
++mach_x86_64_llmulshft(long long op, unsigned m, unsigned s)
++{
++	long long result;
++
++	__asm__ (
++		"imulq %[m]\n\t"
++		"shrd %%cl,%%rdx,%%rax\n\t"
++		: "=a,a" (result)
++		: "a,a" (op), [m] "m,r" ((unsigned long long)m),
++		  "c,c" (s)
++		: "rdx");
++
++	return result;
++}
++#define xnarch_llmulshft(op, m, s) mach_x86_64_llmulshft((op), (m), (s))
++
++static inline __attribute__((__const__)) unsigned long long
++mach_x86_64_nodiv_ullimd(unsigned long long op,
++			    unsigned long long frac, unsigned rhs_integ)
++{
++	register unsigned long long rl __asm__("rax") = frac;
++	register unsigned long long rh __asm__("rdx");
++	register unsigned long long integ __asm__("rsi") = rhs_integ;
++	register unsigned long long t __asm__("r8") = 0x80000000ULL;
++
++	__asm__ ("mulq %[op]\n\t"
++		 "addq %[t], %[rl]\n\t"
++		 "adcq $0, %[rh]\n\t"
++		 "imulq %[op], %[integ]\n\t"
++		 "leaq (%[integ], %[rh], 1),%[rl]":
++		 [rh]"=&d"(rh), [rl]"+&a"(rl), [integ]"+S"(integ):
++		 [op]"D"(op), [t]"r"(t): "cc");
++
++	return rl;
++}
++
++#define xnarch_nodiv_ullimd(op, frac, integ) \
++	mach_x86_64_nodiv_ullimd((op), (frac), (integ))
++
++#endif /* x86_64 */
++
++#include <cobalt/uapi/asm-generic/arith.h>
++
++#endif /* _COBALT_X86_ASM_UAPI_ARITH_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/syscall32-table.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/syscall32-table.h	2021-04-07 16:01:25.655636412 +0800
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (C) 2014 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_SYSCALL32_TABLE_H
++#define _COBALT_X86_ASM_SYSCALL32_TABLE_H
++
++/*
++ * CAUTION: This file is read verbatim into the main syscall
++ * table. Only preprocessor stuff and syscall entries here.
++ */
++
++__COBALT_CALL32emu_THUNK(thread_create)
++__COBALT_CALL32emu_THUNK(thread_setschedparam_ex)
++__COBALT_CALL32emu_THUNK(thread_getschedparam_ex)
++__COBALT_CALL32emu_THUNK(thread_setschedprio)
++__COBALT_CALL32emu_THUNK(sem_open)
++__COBALT_CALL32x_THUNK(sem_open)
++__COBALT_CALL32emu_THUNK(sem_timedwait)
++__COBALT_CALL32emu_THUNK(clock_getres)
++__COBALT_CALL32emu_THUNK(clock_gettime)
++__COBALT_CALL32emu_THUNK(clock_settime)
++__COBALT_CALL32emu_THUNK(clock_nanosleep)
++__COBALT_CALL32emu_THUNK(mutex_timedlock)
++__COBALT_CALL32emu_THUNK(cond_wait_prologue)
++__COBALT_CALL32emu_THUNK(mq_open)
++__COBALT_CALL32x_THUNK(mq_open)
++__COBALT_CALL32emu_THUNK(mq_getattr)
++__COBALT_CALL32x_THUNK(mq_getattr)
++__COBALT_CALL32emu_THUNK(mq_timedsend)
++__COBALT_CALL32emu_THUNK(mq_timedreceive)
++__COBALT_CALL32x_pure_THUNK(mq_timedreceive)
++__COBALT_CALL32emu_THUNK(mq_notify)
++__COBALT_CALL32x_THUNK(mq_notify)
++__COBALT_CALL32emu_THUNK(sched_weightprio)
++__COBALT_CALL32emu_THUNK(sched_setconfig_np)
++__COBALT_CALL32emu_THUNK(sched_getconfig_np)
++__COBALT_CALL32emu_THUNK(sched_setscheduler_ex)
++__COBALT_CALL32emu_THUNK(sched_getscheduler_ex)
++__COBALT_CALL32emu_THUNK(timer_create)
++__COBALT_CALL32x_THUNK(timer_create)
++__COBALT_CALL32emu_THUNK(timer_settime)
++__COBALT_CALL32emu_THUNK(timer_gettime)
++__COBALT_CALL32emu_THUNK(timerfd_settime)
++__COBALT_CALL32emu_THUNK(timerfd_gettime)
++__COBALT_CALL32emu_THUNK(sigwait)
++__COBALT_CALL32x_THUNK(sigwait)
++__COBALT_CALL32emu_THUNK(sigtimedwait)
++__COBALT_CALL32x_THUNK(sigtimedwait)
++__COBALT_CALL32emu_THUNK(sigwaitinfo)
++__COBALT_CALL32x_THUNK(sigwaitinfo)
++__COBALT_CALL32emu_THUNK(sigpending)
++__COBALT_CALL32x_THUNK(sigpending)
++__COBALT_CALL32emu_THUNK(sigqueue)
++__COBALT_CALL32x_THUNK(sigqueue)
++__COBALT_CALL32emu_THUNK(monitor_wait)
++__COBALT_CALL32emu_THUNK(event_wait)
++__COBALT_CALL32emu_THUNK(select)
++__COBALT_CALL32x_THUNK(select)
++__COBALT_CALL32emu_THUNK(recvmsg)
++__COBALT_CALL32x_THUNK(recvmsg)
++__COBALT_CALL32emu_THUNK(sendmsg)
++__COBALT_CALL32x_THUNK(sendmsg)
++__COBALT_CALL32emu_THUNK(mmap)
++__COBALT_CALL32x_THUNK(mmap)
++__COBALT_CALL32emu_THUNK(backtrace)
++__COBALT_CALL32x_THUNK(backtrace)
++
++#endif /* !_COBALT_X86_ASM_SYSCALL32_TABLE_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/wrappers.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/wrappers.h	2021-04-07 16:01:25.651636418 +0800
+@@ -0,0 +1,64 @@
++/*
++ * Copyright (C) 2005 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_WRAPPERS_H
++#define _COBALT_X86_ASM_WRAPPERS_H
++
++#include <asm-generic/xenomai/wrappers.h> /* Read the generic portion. */
++
++#define __get_user_inatomic __get_user
++#define __put_user_inatomic __put_user
++
++#if LINUX_VERSION_CODE > KERNEL_VERSION(4,9,108) && \
++    LINUX_VERSION_CODE < KERNEL_VERSION(4,10,0)
++#define IPIPE_X86_FPU_EAGER
++#endif
++#if LINUX_VERSION_CODE > KERNEL_VERSION(4,4,137) && \
++    LINUX_VERSION_CODE < KERNEL_VERSION(4,5,0)
++#define IPIPE_X86_FPU_EAGER
++#endif
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++#define IPIPE_X86_FPU_EAGER
++#endif
++
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
++#include <asm/i387.h>
++#include <asm/fpu-internal.h>
++#else
++#include <asm/fpu/internal.h>
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,0,0)
++
++static inline void kernel_fpu_disable(void)
++{
++	__thread_clear_has_fpu(current);
++}
++
++static inline void kernel_fpu_enable(void)
++{
++}
++
++static inline bool kernel_fpu_disabled(void)
++{
++	return __thread_has_fpu(current) == 0 && (read_cr0() & X86_CR0_TS) == 0;
++}
++#endif /* linux < 4.1.0 */
++
++#endif /* _COBALT_X86_ASM_WRAPPERS_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/fptest.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/fptest.h	2021-04-07 16:01:25.646636425 +0800
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (C) 2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published
++ * by the Free Software Foundation; either version 2 of the License,
++ * or (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_FPTEST_H
++#define _COBALT_X86_ASM_FPTEST_H
++
++#include <linux/errno.h>
++#include <asm/processor.h>
++#include <asm/xenomai/wrappers.h>
++#include <asm/xenomai/uapi/fptest.h>
++
++static inline int fp_kernel_supported(void)
++{
++	return 1;
++}
++
++static inline int fp_linux_begin(void)
++{
++#if defined(CONFIG_X86_USE_3DNOW) \
++	|| defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
++	/* Ther kernel uses x86 FPU, we can not also use it in our tests. */
++	static int once = 0;
++	if (!once) {
++		once = 1;
++		printk("%s:%d: Warning: Linux is compiled to use FPU in "
++		       "kernel-space.\nFor this reason, switchtest can not "
++		       "test using FPU in Linux kernel-space.\n",
++		       __FILE__, __LINE__);
++	}
++	return -EBUSY;
++#endif /* 3DNow or RAID 456 */
++	kernel_fpu_begin();
++	/* kernel_fpu_begin() does no re-initialize the fpu context, but
++	   fp_regs_set() implicitely expects an initialized fpu context, so
++	   initialize it here. */
++	__asm__ __volatile__("fninit");
++	return 0;
++}
++
++static inline void fp_linux_end(void)
++{
++	kernel_fpu_end();
++}
++
++static inline int fp_detect(void)
++{
++	int features = 0;
++
++#ifndef cpu_has_xmm2
++#ifdef cpu_has_sse2
++#define cpu_has_xmm2 cpu_has_sse2
++#else
++#define cpu_has_xmm2 0
++#endif
++#endif
++	if (cpu_has_xmm2)
++		features |= __COBALT_HAVE_SSE2;
++
++#ifndef cpu_has_avx
++#define cpu_has_avx 0
++#endif
++	if (cpu_has_avx)
++		features |= __COBALT_HAVE_AVX;
++
++	return features;
++}
++
++#endif /* _COBALT_X86_ASM_FPTEST_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/features.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/features.h	2021-04-07 16:01:25.641636432 +0800
+@@ -0,0 +1,27 @@
++/*
++ * Copyright (C) 2005-2013 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_FEATURES_H
++#define _COBALT_X86_ASM_FEATURES_H
++
++struct cobalt_featinfo;
++static inline void collect_arch_features(struct cobalt_featinfo *p) { }
++
++#include <asm/xenomai/uapi/features.h>
++
++#endif /* !_COBALT_X86_ASM_FEATURES_H */
+--- linux/arch/x86/xenomai/include/asm/xenomai/thread.h	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/include/asm/xenomai/thread.h	2021-04-07 16:01:25.637636438 +0800
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2004-2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#ifndef _COBALT_X86_ASM_THREAD_H
++#define _COBALT_X86_ASM_THREAD_H
++
++#include <asm-generic/xenomai/thread.h>
++#include <asm/xenomai/wrappers.h>
++#include <asm/traps.h>
++
++#ifndef IPIPE_X86_FPU_EAGER
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0)
++typedef union thread_xstate x86_fpustate;
++#define x86_fpustate_ptr(t) ((t)->fpu.state)
++#else
++typedef union fpregs_state x86_fpustate;
++#define x86_fpustate_ptr(t) ((t)->fpu.active_state)
++#endif
++#endif
++
++struct xnarchtcb {
++	struct xntcb core;
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++	unsigned long sp;
++	unsigned long *spp;
++	unsigned long ip;
++	unsigned long *ipp;
++#endif  
++#ifdef IPIPE_X86_FPU_EAGER
++	struct fpu *kfpu;
++#else
++	x86_fpustate *fpup;
++	unsigned int root_used_math: 1;
++	x86_fpustate *kfpu_state;
++#endif
++	unsigned int root_kfpu: 1;
++};
++
++#define xnarch_fpu_ptr(tcb)     ((tcb)->fpup)
++
++#define xnarch_fault_regs(d)	((d)->regs)
++#define xnarch_fault_trap(d)	((d)->exception)
++#define xnarch_fault_code(d)	((d)->regs->orig_ax)
++#define xnarch_fault_pc(d)	((d)->regs->ip)
++#define xnarch_fault_fpu_p(d)	((d)->exception == X86_TRAP_NM)
++#define xnarch_fault_pf_p(d)	((d)->exception == X86_TRAP_PF)
++#define xnarch_fault_bp_p(d)	((current->ptrace & PT_PTRACED) &&	\
++				 ((d)->exception == X86_TRAP_DB || (d)->exception == X86_TRAP_BP))
++#define xnarch_fault_notify(d)	(!xnarch_fault_bp_p(d))
++
++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to);
++
++int xnarch_handle_fpu_fault(struct xnthread *from, 
++			struct xnthread *to, struct ipipe_trap_data *d);
++
++void xnarch_leave_root(struct xnthread *root);
++
++void xnarch_init_root_tcb(struct xnthread *thread);
++
++void xnarch_init_shadow_tcb(struct xnthread *thread);
++
++void xnarch_switch_to(struct xnthread *out, struct xnthread *in);
++
++static inline void xnarch_enter_root(struct xnthread *root) { }
++
++static inline int xnarch_escalate(void)
++{
++	if (ipipe_root_p) {
++		ipipe_raise_irq(cobalt_pipeline.escalate_virq);
++		return 1;
++	}
++
++	return 0;
++}
++
++int mach_x86_thread_init(void);
++void mach_x86_thread_cleanup(void);
++
++#endif /* !_COBALT_X86_ASM_THREAD_H */
+--- linux/arch/x86/xenomai/thread.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/thread.c	2021-04-07 16:01:25.632636445 +0800
+@@ -0,0 +1,569 @@
++/*
++ * Copyright (C) 2001-2013 Philippe Gerum <rpm@xenomai.org>.
++ * Copyright (C) 2004-2006 Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>.
++ *
++ * Xenomai is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Xenomai is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with Xenomai; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++
++#include <linux/sched.h>
++#include <linux/ipipe.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <cobalt/kernel/thread.h>
++#include <asm/mmu_context.h>
++#include <asm/processor.h>
++
++static struct kmem_cache *xstate_cache;
++
++#ifdef IPIPE_X86_FPU_EAGER
++#define fpu_kernel_xstate_size sizeof(struct fpu)
++#else
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
++#define fpu_kernel_xstate_size xstate_size
++#endif
++#endif /* IPIPE_X86_FPU_EAGER */
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
++#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
++#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
++#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
++#endif
++
++#ifndef IPIPE_X86_FPU_EAGER
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0)
++#include <asm/i387.h>
++#include <asm/fpu-internal.h>
++#define x86_fpregs_active(t)		__thread_has_fpu(t)
++#define x86_fpregs_deactivate(t)	__thread_clear_has_fpu(t)
++#define x86_fpregs_activate(t)		__thread_set_has_fpu(t)
++#define x86_xstate_alignment		__alignof__(union thread_xstate)
++#else
++#include <asm/fpu/internal.h>
++
++static inline int x86_fpregs_active(struct task_struct *t)
++{
++	return t->thread.fpu.fpregs_active;
++}
++
++static inline void x86_fpregs_deactivate(struct task_struct *t)
++{
++	if (x86_fpregs_active(t))
++		__fpregs_deactivate(&t->thread.fpu);
++}
++
++static inline void x86_fpregs_activate(struct task_struct *t)
++{
++	if (!x86_fpregs_active(t))
++		__fpregs_activate(&t->thread.fpu);
++}
++
++#define x86_xstate_alignment		__alignof__(union fpregs_state)
++
++#endif
++#else /* IPIPE_X86_FPU_EAGER */
++#define x86_xstate_alignment		__alignof__(union fpregs_state)
++#endif /* ! IPIPE_X86_FPU_EAGER */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++/*
++ * This is obsolete context switch code uselessly duplicating
++ * mainline's.
++ */
++#ifdef CONFIG_X86_32
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++
++#define __CANARY_OUTPUT							\
++	, [stack_canary] "=m" (stack_canary.canary)
++
++#define __CANARY_INPUT							\
++	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
++
++#define __CANARY_SWITCH							\
++	"movl %P[task_canary](%%edx), %%ebx\n\t"			\
++	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
++
++#else /* !CONFIG_CC_STACKPROTECTOR */
++
++#define __CANARY_OUTPUT
++#define __CANARY_INPUT
++#define __CANARY_SWITCH
++
++#endif /* !CONFIG_CC_STACKPROTECTOR */
++
++static inline void do_switch_threads(struct xnarchtcb *out_tcb,
++				     struct xnarchtcb *in_tcb,
++				     struct task_struct *outproc,
++				     struct task_struct *inproc)
++{
++	long ebx_out, ecx_out, edi_out, esi_out;
++
++	__asm__ __volatile__("pushfl\n\t"
++			     "pushl %%ebp\n\t"
++			     "movl %[spp_out_ptr],%%ecx\n\t"
++			     "movl %%esp,(%%ecx)\n\t"
++			     "movl %[ipp_out_ptr],%%ecx\n\t"
++			     "movl $1f,(%%ecx)\n\t"
++			     "movl %[spp_in_ptr],%%ecx\n\t"
++			     "movl %[ipp_in_ptr],%%edi\n\t"
++			     "movl (%%ecx),%%esp\n\t"
++			     "pushl (%%edi)\n\t"
++			     __CANARY_SWITCH
++			     "jmp  __switch_to\n\t"
++			     "1: popl %%ebp\n\t"
++			     "popfl\n\t"
++			     : "=b"(ebx_out),
++			       "=&c"(ecx_out),
++			       "=S"(esi_out),
++			       "=D"(edi_out),
++			       "+a"(outproc),
++			       "+d"(inproc)
++			       __CANARY_OUTPUT
++			     : [spp_out_ptr] "m"(out_tcb->spp),
++			       [ipp_out_ptr] "m"(out_tcb->ipp),
++			       [spp_in_ptr] "m"(in_tcb->spp),
++			       [ipp_in_ptr] "m"(in_tcb->ipp)
++			       __CANARY_INPUT
++			     : "memory");
++}
++
++#else /* CONFIG_X86_64 */
++
++#define __SWITCH_CLOBBER_LIST  , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++
++#define __CANARY_OUTPUT							\
++	, [gs_canary] "=m" (irq_stack_union.stack_canary)
++
++#define __CANARY_INPUT							\
++	, [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
++	, [current_task] "m" (current_task)
++
++#define __CANARY_SWITCH							\
++  	"movq "__percpu_arg([current_task])",%%rsi\n\t"			\
++	"movq %P[task_canary](%%rsi),%%r8\n\t"				\
++	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
++
++#else /* !CONFIG_CC_STACKPROTECTOR */
++
++#define __CANARY_OUTPUT
++#define __CANARY_INPUT
++#define __CANARY_SWITCH
++
++#endif /* !CONFIG_CC_STACKPROTECTOR */
++
++#define do_switch_threads(prev, next, p_rsp, n_rsp, p_rip, n_rip)	\
++	({								\
++		long __rdi, __rsi, __rax, __rbx, __rcx, __rdx;		\
++									\
++		__asm__ __volatile__("pushfq\n\t"			\
++			     "pushq	%%rbp\n\t"			\
++			     "movq	%%rsi, %%rbp\n\t"		\
++			     "movq	%%rsp, (%%rdx)\n\t"		\
++			     "movq	$1f, (%%rax)\n\t"		\
++			     "movq	(%%rcx), %%rsp\n\t"		\
++			     "pushq	(%%rbx)\n\t"			\
++			     "jmp	__switch_to\n\t"		\
++			     "1:\n\t"					\
++			     __CANARY_SWITCH				\
++			     "movq	%%rbp, %%rsi\n\t"		\
++			     "popq	%%rbp\n\t"			\
++			     "popfq\n\t"				\
++			     : "=S" (__rsi), "=D" (__rdi), "=a"	(__rax), \
++			       "=b" (__rbx), "=c" (__rcx), "=d" (__rdx)	\
++			       __CANARY_OUTPUT				\
++			     : "0" (next), "1" (prev), "5" (p_rsp), "4" (n_rsp), \
++			       "2" (p_rip), "3" (n_rip)			\
++			       __CANARY_INPUT				\
++			     : "memory", "cc" __SWITCH_CLOBBER_LIST);	\
++	})
++
++#endif /* CONFIG_X86_64 */
++
++#else /* LINUX_VERSION_CODE >= 4.8 */
++
++#include <asm/switch_to.h>
++
++#endif /* LINUX_VERSION_CODE >= 4.8 */
++
++void xnarch_switch_to(struct xnthread *out, struct xnthread *in)
++{
++	struct xnarchtcb *out_tcb = &out->tcb, *in_tcb = &in->tcb;
++	struct task_struct *prev, *next, *last;
++	struct mm_struct *prev_mm, *next_mm;
++
++	prev = out_tcb->core.host_task;
++#ifndef IPIPE_X86_FPU_EAGER
++	if (x86_fpregs_active(prev))
++		/*
++		 * __switch_to will try and use __unlazy_fpu, so we
++		 * need to clear the ts bit.
++		 */
++		clts();
++#endif /* ! IPIPE_X86_FPU_EAGER */
++
++	next = in_tcb->core.host_task;
++#ifndef IPIPE_X86_FPU_EAGER
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0)
++	next->thread.fpu.counter = 0;
++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0)
++	next->thread.fpu_counter = 0;
++#else
++	next->fpu_counter = 0;
++#endif
++#endif /* ! IPIPE_X86_FPU_EAGER */
++	prev_mm = out_tcb->core.active_mm;
++	next_mm = in_tcb->core.mm;
++	if (next_mm == NULL) {
++		in_tcb->core.active_mm = prev_mm;
++		enter_lazy_tlb(prev_mm, next);
++	} else {
++		ipipe_switch_mm_head(prev_mm, next_mm, next);
++		/*
++		 * We might be switching back to the root thread,
++		 * which we preempted earlier, shortly after "current"
++		 * dropped its mm context in the do_exit() path
++		 * (next->mm == NULL). In that particular case, the
++		 * kernel expects a lazy TLB state for leaving the mm.
++		 */
++		if (next->mm == NULL)
++			enter_lazy_tlb(prev_mm, next);
++	}
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++#ifdef CONFIG_X86_32
++	do_switch_threads(out_tcb, in_tcb, prev, next);
++#else /* CONFIG_X86_64 */
++	do_switch_threads(prev, next,
++			  out_tcb->spp, in_tcb->spp,
++			  out_tcb->ipp, in_tcb->ipp);
++#endif /* CONFIG_X86_64 */
++	(void)last;
++#else /* LINUX_VERSION_CODE >= 4.8 */
++	switch_to(prev, next, last);
++#endif /* LINUX_VERSION_CODE >= 4.8 */
++
++#ifndef IPIPE_X86_FPU_EAGER
++	stts();
++#endif /* ! IPIPE_X86_FPU_EAGER */
++}
++
++#ifndef IPIPE_X86_FPU_EAGER
++
++#ifdef CONFIG_X86_64
++#define XSAVE_PREFIX	"0x48,"
++#define XSAVE_SUFFIX	"q"
++#else
++#define XSAVE_PREFIX
++#define XSAVE_SUFFIX
++#endif
++
++static inline void __do_save_fpu_state(x86_fpustate *fpup)
++{
++#ifdef cpu_has_xsave
++	if (cpu_has_xsave) {
++#ifdef CONFIG_AS_AVX
++		__asm__ __volatile__("xsave" XSAVE_SUFFIX " %0"
++			     : "=m" (fpup->xsave) : "a" (-1), "d" (-1)
++			     : "memory");
++#else /* !CONFIG_AS_AVX */
++		__asm __volatile__(".byte " XSAVE_PREFIX "0x0f,0xae,0x27"
++			     : : "D" (&fpup->xsave), "m" (fpup->xsave),
++			         "a" (-1), "d" (-1)
++			     : "memory");
++#endif /* !CONFIG_AS_AVX */
++		return;
++	}
++#endif /* cpu_has_xsave */
++#ifdef CONFIG_X86_32
++	if (cpu_has_fxsr)
++		__asm__ __volatile__("fxsave %0; fnclex":"=m"(*fpup));
++	else
++		__asm__ __volatile__("fnsave %0; fwait":"=m"(*fpup));
++#else /* CONFIG_X86_64 */
++#ifdef CONFIG_AS_FXSAVEQ
++	__asm __volatile__("fxsaveq %0" : "=m" (fpup->fxsave));
++#else /* !CONFIG_AS_FXSAVEQ */
++	__asm__ __volatile__("rex64/fxsave (%[fx])"
++		     : "=m" (fpup->fxsave)
++		     : [fx] "R" (&fpup->fxsave));
++#endif /* !CONFIG_AS_FXSAVEQ */
++#endif /* CONFIG_X86_64 */
++}
++
++static inline void __do_restore_fpu_state(x86_fpustate *fpup)
++{
++#ifdef cpu_has_xsave
++	if (cpu_has_xsave) {
++#ifdef CONFIG_AS_AVX
++		__asm__ __volatile__("xrstor" XSAVE_SUFFIX " %0"
++			     : : "m" (fpup->xsave), "a" (-1), "d" (-1)
++			     : "memory");
++#else /* !CONFIG_AS_AVX */
++		__asm__ __volatile__(".byte " XSAVE_PREFIX "0x0f,0xae,0x2f"
++			     : : "D" (&fpup->xsave), "m" (fpup->xsave),
++			         "a" (-1), "d" (-1)
++			     : "memory");
++#endif /* !CONFIG_AS_AVX */
++		return;
++	}
++#endif /* cpu_has_xsave */
++#ifdef CONFIG_X86_32
++	if (cpu_has_fxsr)
++		__asm__ __volatile__("fxrstor %0": /* no output */ :"m"(*fpup));
++	else
++		__asm__ __volatile__("frstor %0": /* no output */ :"m"(*fpup));
++#else /* CONFIG_X86_64 */
++#ifdef CONFIG_AS_FXSAVEQ
++	__asm__ __volatile__("fxrstorq %0" : : "m" (fpup->fxsave));
++#else /* !CONFIG_AS_FXSAVEQ */
++	__asm__ __volatile__("rex64/fxrstor (%0)"
++		     : : "R" (&fpup->fxsave), "m" (fpup->fxsave));
++#endif /* !CONFIG_AS_FXSAVEQ */
++#endif /* CONFIG_X86_64 */
++}
++
++int xnarch_handle_fpu_fault(struct xnthread *from, 
++			struct xnthread *to, struct ipipe_trap_data *d)
++{
++	struct xnarchtcb *tcb = xnthread_archtcb(to);
++	struct task_struct *p = tcb->core.host_task;
++
++	if (x86_fpregs_active(p))
++		return 0;
++
++	if (!(p->flags & PF_USED_MATH)) {
++		/*
++		 * The faulting task is a shadow using the FPU for the first
++		 * time, initialize the FPU context and tell linux about it.
++		 */
++		__asm__ __volatile__("clts; fninit");
++
++		if (cpu_has_xmm) {
++			unsigned long __mxcsr = 0x1f80UL & 0xffbfUL;
++			__asm__ __volatile__("ldmxcsr %0"::"m"(__mxcsr));
++		}
++		p->flags |= PF_USED_MATH;
++	} else {
++		/*
++		 * The faulting task already used FPU in secondary
++		 * mode.
++		 */
++		clts();
++		__do_restore_fpu_state(tcb->fpup);
++	}
++		
++	x86_fpregs_activate(p);
++
++	xnlock_get(&nklock);
++	xnthread_set_state(to, XNFPU);
++	xnlock_put(&nklock);
++
++	return 1;
++}
++#else /* IPIPE_X86_FPU_EAGER */
++
++int xnarch_handle_fpu_fault(struct xnthread *from,
++			struct xnthread *to, struct ipipe_trap_data *d)
++{
++	/* in eager mode there are no such faults */
++	BUG_ON(1);
++}
++#endif /* ! IPIPE_X86_FPU_EAGER */
++
++#define current_task_used_kfpu() kernel_fpu_disabled()
++
++#define tcb_used_kfpu(t) ((t)->root_kfpu)
++
++#ifndef IPIPE_X86_FPU_EAGER
++void xnarch_leave_root(struct xnthread *root)
++{
++	struct xnarchtcb *const rootcb = xnthread_archtcb(root);
++	struct task_struct *const p = current;
++	x86_fpustate *const current_task_fpup = x86_fpustate_ptr(&p->thread);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0) && defined(CONFIG_X86_64)
++	rootcb->spp = &p->thread.sp;
++	rootcb->ipp = &p->thread.rip;
++#endif
++	if (!current_task_used_kfpu()) {
++		rootcb->root_kfpu = 0;
++		rootcb->fpup = x86_fpregs_active(p) ? current_task_fpup : NULL;
++		return;
++	}
++
++	/*
++	 * We need to save the kernel FPU context before preempting,
++	 * store it in our root control block.
++	 */
++	rootcb->root_kfpu = 1;
++	rootcb->fpup = current_task_fpup;
++	rootcb->root_used_math = !!(p->flags & PF_USED_MATH);
++	x86_fpustate_ptr(&p->thread) = rootcb->kfpu_state;
++	x86_fpregs_activate(p);
++	p->flags |= PF_USED_MATH;
++	kernel_fpu_enable();
++}
++
++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to)
++{
++	x86_fpustate *const prev_fpup = from ? from->tcb.fpup : NULL;
++	struct xnarchtcb *const tcb = xnthread_archtcb(to);
++	struct task_struct *const p = tcb->core.host_task;
++	x86_fpustate *const next_task_fpup = x86_fpustate_ptr(&p->thread);
++
++	/* Restore lazy mode only if root fpu owner is not current. */
++	if (xnthread_test_state(to, XNROOT) &&
++	    prev_fpup != next_task_fpup &&
++	    !tcb_used_kfpu(tcb))
++		return;
++
++	clts();
++	/*
++	 * The only case where we can skip restoring the FPU is:
++	 * - the fpu context of the next task is the current fpu
++	 * context;
++	 * - root thread has not used fpu in kernel-space;
++	 * - cpu has fxsr (because if it does not, last context switch
++	 * reinitialized fpu)
++	 */
++	if (prev_fpup != next_task_fpup || !cpu_has_fxsr)
++		__do_restore_fpu_state(next_task_fpup);
++
++	if (!tcb_used_kfpu(tcb)) {
++		x86_fpregs_activate(p);
++		return;
++	}
++	kernel_fpu_disable();
++
++	x86_fpustate_ptr(&p->thread) = to->tcb.fpup;
++	if (!tcb->root_used_math) {
++		x86_fpregs_deactivate(p);
++		p->flags &= ~PF_USED_MATH;
++	}
++}
++#else /* IPIPE_X86_FPU_EAGER */
++void xnarch_leave_root(struct xnthread *root)
++{
++	struct xnarchtcb *const rootcb = xnthread_archtcb(root);
++
++	rootcb->root_kfpu = current_task_used_kfpu();
++
++	if (!tcb_used_kfpu(rootcb))
++		return;
++
++	/* save fpregs from in-kernel use */
++	copy_fpregs_to_fpstate(rootcb->kfpu);
++	kernel_fpu_enable();
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0)
++	/* restore current's fpregs */
++	__cpu_invalidate_fpregs_state();
++	switch_fpu_finish(&current->thread.fpu, smp_processor_id());
++#else
++	/* mark current thread as not owning the FPU anymore */
++	if (fpregs_active())
++		fpregs_deactivate(&current->thread.fpu);
++#endif
++}
++
++void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to)
++{
++	struct xnarchtcb *const to_tcb = xnthread_archtcb(to);
++
++	if (!tcb_used_kfpu(to_tcb))
++		return;
++
++	copy_kernel_to_fpregs(&to_tcb->kfpu->state);
++	kernel_fpu_disable();
++}
++#endif /* ! IPIPE_X86_FPU_EAGER */
++
++void xnarch_init_root_tcb(struct xnthread *thread)
++{
++	struct xnarchtcb *tcb = xnthread_archtcb(thread);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++	tcb->sp = 0;
++	tcb->spp = &tcb->sp;
++	tcb->ipp = &tcb->ip;
++#endif	
++#ifndef IPIPE_X86_FPU_EAGER
++	tcb->fpup = NULL;
++	tcb->kfpu_state = kmem_cache_zalloc(xstate_cache, GFP_KERNEL);
++#else /* IPIPE_X86_FPU_EAGER */
++	tcb->kfpu = kmem_cache_zalloc(xstate_cache, GFP_KERNEL);
++#endif /* ! IPIPE_X86_FPU_EAGER */
++	tcb->root_kfpu = 0;
++}
++
++void xnarch_init_shadow_tcb(struct xnthread *thread)
++{
++	struct xnarchtcb *tcb = xnthread_archtcb(thread);
++	struct task_struct *p = tcb->core.host_task;
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++	tcb->sp = 0;
++	tcb->spp = &p->thread.sp;
++#ifdef CONFIG_X86_32
++	tcb->ipp = &p->thread.ip;
++#else
++	tcb->ipp = &p->thread.rip; /* <!> raw naming intended. */
++#endif
++#endif
++#ifndef IPIPE_X86_FPU_EAGER
++	tcb->fpup = x86_fpustate_ptr(&p->thread);
++	tcb->kfpu_state = NULL;
++#else /* IPIPE_X86_FPU_EAGER */
++	tcb->kfpu = NULL;
++#endif /* ! IPIPE_X86_FPU_EAGER */
++	tcb->root_kfpu = 0;
++
++#ifndef IPIPE_X86_FPU_EAGER
++	/* XNFPU is set upon first FPU fault */
++	xnthread_clear_state(thread, XNFPU);
++#else /* IPIPE_X86_FPU_EAGER */
++	/* XNFPU is always set */
++	xnthread_set_state(thread, XNFPU);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
++	fpu__activate_fpstate_read(&p->thread.fpu);
++#else
++	fpu__initialize(&p->thread.fpu);
++#endif
++#endif /* ! IPIPE_X86_FPU_EAGER */
++}
++
++int mach_x86_thread_init(void)
++{
++	xstate_cache = kmem_cache_create("cobalt_x86_xstate",
++					 fpu_kernel_xstate_size,
++					 x86_xstate_alignment,
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
++					 SLAB_NOTRACK,
++#else
++					 0,
++#endif
++					 NULL);
++	if (xstate_cache == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++void mach_x86_thread_cleanup(void)
++{
++	kmem_cache_destroy(xstate_cache);
++}
+--- linux/arch/x86/xenomai/c1e.c	1970-01-01 08:00:00.000000000 +0800
++++ linux-patched/arch/x86/xenomai/c1e.c	2021-04-07 16:01:25.627636452 +0800
+@@ -0,0 +1,72 @@
++/*
++ * Disable Intel automatic promotion to C1E mode.
++ * Lifted from drivers/idle/intel_idle.c
++ * Copyright (c) 2013, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++#include <linux/smp.h>
++#include <asm/processor.h>
++#include <asm/cpu_device_id.h>
++#include <asm/msr.h>
++
++#define ICPU(model) \
++	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, 1UL }
++
++static const struct x86_cpu_id c1e_ids[] = {
++	ICPU(0x1a),
++	ICPU(0x1e),
++	ICPU(0x1f),
++	ICPU(0x25),
++	ICPU(0x2c),
++	ICPU(0x2e),
++	ICPU(0x2f),
++	ICPU(0x2a),
++	ICPU(0x2d),
++	ICPU(0x3a),
++	ICPU(0x3e),
++	ICPU(0x3c),
++	ICPU(0x3f),
++	ICPU(0x45),
++	ICPU(0x46),
++	ICPU(0x4D),
++	{}
++};
++
++#undef ICPU
++
++static void c1e_promotion_disable(void *dummy)
++{
++	unsigned long long msr_bits;
++
++	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
++	msr_bits &= ~0x2;
++	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
++}
++
++void mach_x86_c1e_disable(void)
++{
++	const struct x86_cpu_id *id;
++
++	id = x86_match_cpu(c1e_ids);
++	if (id) {
++		printk("[Xenomai] disabling automatic C1E state promotion on Intel processor\n");
++		/*
++		 * cpu uses C1E, disable this feature (copied from
++		 * intel_idle driver)
++		 */
++		on_each_cpu(c1e_promotion_disable, NULL, 1);
++	}
++}
+--- linux/arch/x86/Makefile	2020-12-21 21:59:17.000000000 +0800
++++ linux-patched/arch/x86/Makefile	2021-04-07 16:01:25.584636514 +0800
+@@ -338,3 +338,6 @@
+   echo  '                  FDARGS="..."  arguments for the booted kernel'
+   echo  '                  FDINITRD=file initrd for the booted kernel'
+ endef
++
++KBUILD_CFLAGS += -Iarch/$(SRCARCH)/xenomai/include -Iinclude/xenomai
++core-$(CONFIG_XENOMAI) += arch/x86/xenomai/
+--- linux/init/Kconfig	2021-04-07 16:00:26.626720756 +0800
++++ linux-patched/init/Kconfig	2021-04-07 16:01:25.577636524 +0800
+@@ -2056,3 +2056,54 @@
+ # <asm/syscall_wrapper.h>.
+ config ARCH_HAS_SYSCALL_WRAPPER
+ 	def_bool n
++menuconfig XENOMAI
++	depends on X86_TSC || !X86
++	bool "Xenomai/cobalt"
++	select IPIPE
++	select IPIPE_WANT_APIREV_2
++	default y
++	help
++	  Xenomai's Cobalt core is a real-time extension to the Linux
++	  kernel, which exhibits very short interrupt and scheduling
++	  latency, without affecting the regular kernel services.
++
++	  This option enables the set of extended kernel services
++	  required to run the real-time applications in user-space,
++	  over the Xenomai libraries.
++
++	  Please visit http://xenomai.org for more information.
++
++if XENOMAI
++source "arch/x86/xenomai/Kconfig"
++endif
++
++if MIGRATION
++comment "WARNING! Page migration (CONFIG_MIGRATION) may increase"
++comment "latency."
++endif
++
++if APM || CPU_FREQ || ACPI_PROCESSOR || INTEL_IDLE
++comment "WARNING! At least one of APM, CPU frequency scaling, ACPI 'processor'"
++comment "or CPU idle features is enabled. Any of these options may"
++comment "cause troubles with Xenomai. You should disable them."
++endif
++
++if !GENERIC_CLOCKEVENTS
++comment "NOTE: Xenomai 3.x requires CONFIG_GENERIC_CLOCKEVENTS"
++endif
++
++config XENO_VERSION_MAJOR
++       int
++       default 3
++
++config XENO_VERSION_MINOR
++       int
++       default 1
++
++config XENO_REVISION_LEVEL
++       int
++       default 0
++
++config XENO_VERSION_STRING
++       string
++       default "3.1"
diff --git a/enable_irq.patch b/enable_irq_arm64.patch
similarity index 100%
rename from enable_irq.patch
rename to enable_irq_arm64.patch
diff --git a/ipipe-core-4.19.55-oe1.patch b/ipipe-core-4.19.55-oe1_arm64.patch
similarity index 100%
rename from ipipe-core-4.19.55-oe1.patch
rename to ipipe-core-4.19.55-oe1_arm64.patch
diff --git a/ipipe-core-4.19.90-oe1_x86.patch b/ipipe-core-4.19.90-oe1_x86.patch
new file mode 100755
index 0000000000000000000000000000000000000000..f9319362f92cf4d96fe4f175a9a82ac7dfb1d5ee
--- /dev/null
+++ b/ipipe-core-4.19.90-oe1_x86.patch
@@ -0,0 +1,142264 @@
+diff -uprN kernel/arch/x86/entry/common.c kernel_new/arch/x86/entry/common.c
+--- kernel/arch/x86/entry/common.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/entry/common.c	2021-04-01 18:28:07.548863405 +0800
+@@ -17,6 +17,7 @@
+ #include <linux/tracehook.h>
+ #include <linux/audit.h>
+ #include <linux/seccomp.h>
++#include <linux/unistd.h>
+ #include <linux/signal.h>
+ #include <linux/export.h>
+ #include <linux/context_tracking.h>
+@@ -47,6 +48,22 @@ __visible inline void enter_from_user_mo
+ static inline void enter_from_user_mode(void) {}
+ #endif
+ 
++#ifdef CONFIG_IPIPE
++#define disable_local_irqs()	do {	\
++	hard_local_irq_disable();	\
++	trace_hardirqs_off();		\
++} while (0)
++#define enable_local_irqs()	do {	\
++	trace_hardirqs_on();		\
++	hard_local_irq_enable();	\
++} while (0)
++#define check_irqs_disabled()	hard_irqs_disabled()
++#else
++#define disable_local_irqs()	local_irq_disable()
++#define enable_local_irqs()	local_irq_enable()
++#define check_irqs_disabled()	irqs_disabled()
++#endif
++
+ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
+ {
+ #ifdef CONFIG_X86_64
+@@ -147,7 +164,7 @@ static void exit_to_usermode_loop(struct
+ 	 */
+ 	while (true) {
+ 		/* We have work to do. */
+-		local_irq_enable();
++		enable_local_irqs();
+ 
+ 		if (cached_flags & _TIF_NEED_RESCHED)
+ 			schedule();
+@@ -172,7 +189,7 @@ static void exit_to_usermode_loop(struct
+ 			fire_user_return_notifiers();
+ 
+ 		/* Disable IRQs and retry */
+-		local_irq_disable();
++		disable_local_irqs();
+ 
+ 		cached_flags = READ_ONCE(current_thread_info()->flags);
+ 
+@@ -192,11 +209,23 @@ __visible inline void prepare_exit_to_us
+ 	lockdep_assert_irqs_disabled();
+ 	lockdep_sys_exit();
+ 
++again:
+ 	cached_flags = READ_ONCE(ti->flags);
+ 
+ 	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
+ 		exit_to_usermode_loop(regs, cached_flags);
+ 
++	if (ipipe_user_intret_notifier_enabled(ti)) {
++		int ret;
++
++		enable_local_irqs();
++		ret = __ipipe_notify_user_intreturn();
++		disable_local_irqs();
++
++		if (ret == 0)
++			goto again;
++	}
++
+ #ifdef CONFIG_COMPAT
+ 	/*
+ 	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
+@@ -255,8 +284,8 @@ __visible inline void syscall_return_slo
+ 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ 
+ 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
+-	    WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
+-		local_irq_enable();
++	    WARN(check_irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
++		enable_local_irqs();
+ 
+ 	rseq_syscall(regs);
+ 
+@@ -264,10 +293,13 @@ __visible inline void syscall_return_slo
+ 	 * First do one-time work.  If these work items are enabled, we
+ 	 * want to run them exactly once per syscall exit with IRQs on.
+ 	 */
+-	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
++	if (unlikely((!IS_ENABLED(CONFIG_IPIPE) ||
++		      syscall_get_nr(current, regs) <
++				ipipe_root_nr_syscalls(ti)) &&
++		     (cached_flags & SYSCALL_EXIT_WORK_FLAGS)))
+ 		syscall_slow_exit_work(regs, cached_flags);
+ 
+-	local_irq_disable();
++	disable_local_irqs();
+ 	prepare_exit_to_usermode(regs);
+ }
+ 
+@@ -275,10 +307,20 @@ __visible inline void syscall_return_slo
+ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+ {
+ 	struct thread_info *ti;
++	int ret;
+ 
+ 	enter_from_user_mode();
+-	local_irq_enable();
++	enable_local_irqs();
+ 	ti = current_thread_info();
++
++	ret = ipipe_handle_syscall(ti, nr & __SYSCALL_MASK, regs);
++	if (ret > 0) {
++		disable_local_irqs();
++		return;
++	}
++	if (ret < 0)
++		goto done;
++
+ 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+ 		nr = syscall_trace_enter(regs);
+ 
+@@ -292,12 +334,45 @@ __visible void do_syscall_64(unsigned lo
+ 		nr = array_index_nospec(nr, NR_syscalls);
+ 		regs->ax = sys_call_table[nr](regs);
+ 	}
+-
++done:
+ 	syscall_return_slowpath(regs);
+ }
+ #endif
+ 
+ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
++
++#ifdef CONFIG_IPIPE
++#ifdef CONFIG_X86_32
++static inline int pipeline_syscall(struct thread_info *ti,
++				   unsigned long nr, struct pt_regs *regs)
++{
++	return ipipe_handle_syscall(ti, nr, regs);
++}
++#else
++static inline int pipeline_syscall(struct thread_info *ti,
++				   unsigned long nr, struct pt_regs *regs)
++{
++	struct pt_regs regs64 = *regs;
++	int ret;
++
++	regs64.di = (unsigned int)regs->bx;
++	regs64.si = (unsigned int)regs->cx;
++	regs64.r10 = (unsigned int)regs->si;
++	regs64.r8 = (unsigned int)regs->di;
++	regs64.r9 = (unsigned int)regs->bp;
++	ret = ipipe_handle_syscall(ti, nr, &regs64);
++	regs->ax = (unsigned int)regs64.ax;
++
++	return ret;
++}
++#endif /* CONFIG_X86_32 */
++#else  /* CONFIG_IPIPE */
++static inline int pipeline_syscall(struct thread_info *ti,
++				   unsigned long nr, struct pt_regs *regs)
++{
++	return 0;
++}
++#endif /* CONFIG_IPIPE */
+ /*
+  * Does a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.  Does
+  * all entry and exit work and returns with IRQs off.  This function is
+@@ -308,11 +383,20 @@ static __always_inline void do_syscall_3
+ {
+ 	struct thread_info *ti = current_thread_info();
+ 	unsigned int nr = (unsigned int)regs->orig_ax;
++	int ret;
+ 
+ #ifdef CONFIG_IA32_EMULATION
+ 	ti->status |= TS_COMPAT;
+ #endif
+ 
++	ret = pipeline_syscall(ti, nr, regs);
++	if (ret > 0) {
++		disable_local_irqs();
++		return;
++	}
++	if (ret < 0)
++		goto done;
++
+ 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+ 		/*
+ 		 * Subtlety here: if ptrace pokes something larger than
+@@ -340,7 +424,7 @@ static __always_inline void do_syscall_3
+ 			(unsigned int)regs->di, (unsigned int)regs->bp);
+ #endif /* CONFIG_IA32_EMULATION */
+ 	}
+-
++done:
+ 	syscall_return_slowpath(regs);
+ }
+ 
+@@ -348,7 +432,7 @@ static __always_inline void do_syscall_3
+ __visible void do_int80_syscall_32(struct pt_regs *regs)
+ {
+ 	enter_from_user_mode();
+-	local_irq_enable();
++	enable_local_irqs();
+ 	do_syscall_32_irqs_on(regs);
+ }
+ 
+@@ -372,7 +456,7 @@ __visible long do_fast_syscall_32(struct
+ 
+ 	enter_from_user_mode();
+ 
+-	local_irq_enable();
++	enable_local_irqs();
+ 
+ 	/* Fetch EBP from where the vDSO stashed it. */
+ 	if (
+@@ -390,7 +474,7 @@ __visible long do_fast_syscall_32(struct
+ 		) {
+ 
+ 		/* User code screwed up. */
+-		local_irq_disable();
++		disable_local_irqs();
+ 		regs->ax = -EFAULT;
+ 		prepare_exit_to_usermode(regs);
+ 		return 0;	/* Keep it simple: use IRET. */
+diff -uprN kernel/arch/x86/entry/entry_64.S kernel_new/arch/x86/entry/entry_64.S
+--- kernel/arch/x86/entry/entry_64.S	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/entry/entry_64.S	2021-04-01 18:28:07.548863405 +0800
+@@ -30,6 +30,7 @@
+ #include <asm/hw_irq.h>
+ #include <asm/page_types.h>
+ #include <asm/irqflags.h>
++#include <asm/ipipe_base.h>
+ #include <asm/paravirt.h>
+ #include <asm/percpu.h>
+ #include <asm/asm.h>
+@@ -63,7 +64,12 @@ END(native_usergs_sysret64)
+ .endm
+ 
+ .macro TRACE_IRQS_IRETQ
+-	TRACE_IRQS_FLAGS EFLAGS(%rsp)
++#ifdef CONFIG_TRACE_IRQFLAGS
++	btl	$9, EFLAGS(%rsp)	/* interrupts off? */
++	jnc	1f
++	TRACE_IRQS_ON_VIRT
++1:
++#endif
+ .endm
+ 
+ /*
+@@ -77,7 +83,8 @@ END(native_usergs_sysret64)
+  * make sure the stack pointer does not get reset back to the top
+  * of the debug stack, and instead just reuses the current stack.
+  */
+-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
++#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) \
++	&& !defined(CONFIG_IPIPE)
+ 
+ .macro TRACE_IRQS_OFF_DEBUG
+ 	call	debug_stack_set_zero
+@@ -395,6 +402,7 @@ END(__switch_to_asm)
+  */
+ ENTRY(ret_from_fork)
+ 	UNWIND_HINT_EMPTY
++	HARD_COND_ENABLE_INTERRUPTS
+ 	movq	%rax, %rdi
+ 	call	schedule_tail			/* rdi: 'prev' task parameter */
+ 
+@@ -638,8 +646,13 @@ ENTRY(interrupt_entry)
+ 
+ 1:
+ 	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
+-	/* We entered an interrupt context - irqs are off: */
++#ifndef CONFIG_IPIPE
++	/* We entered an interrupt context - irqs are off unless
++	   pipelining is enabled, in which case we defer tracing until
++	   __ipipe_do_sync_stage() where the virtual IRQ state is
++	   updated for the root stage. */
+ 	TRACE_IRQS_OFF
++#endif
+ 
+ 	ret
+ END(interrupt_entry)
+@@ -667,7 +680,17 @@ common_interrupt:
+ 	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
+ 	call	interrupt_entry
+ 	UNWIND_HINT_REGS indirect=1
++#ifdef CONFIG_IPIPE
++	call	__ipipe_handle_irq
++	testl	%eax, %eax
++	jnz	ret_from_intr
++	LEAVE_IRQ_STACK
++	testb	$3, CS(%rsp)
++	jz	retint_kernel_early
++	jmp	retint_user_early
++#else
+ 	call	do_IRQ	/* rdi points to pt_regs */
++#endif
+ 	/* 0(%rsp): old RSP */
+ ret_from_intr:
+ 	DISABLE_INTERRUPTS(CLBR_ANY)
+@@ -682,6 +705,7 @@ ret_from_intr:
+ GLOBAL(retint_user)
+ 	mov	%rsp,%rdi
+ 	call	prepare_exit_to_usermode
++retint_user_early:
+ 	TRACE_IRQS_IRETQ
+ 
+ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+@@ -733,13 +757,18 @@ retint_kernel:
+ 	jnc	1f
+ 0:	cmpl	$0, PER_CPU_VAR(__preempt_count)
+ 	jnz	1f
++#ifdef CONFIG_IPIPE
++	call	__ipipe_preempt_schedule_irq
++#else
+ 	call	preempt_schedule_irq
++#endif
+ 	jmp	0b
+ 1:
+ #endif
+ 	/*
+ 	 * The iretq could re-enable interrupts:
+-	 */
++	*/
++retint_kernel_early:
+ 	TRACE_IRQS_IRETQ
+ 
+ GLOBAL(restore_regs_and_return_to_kernel)
+@@ -858,6 +887,28 @@ _ASM_NOKPROBE(common_interrupt)
+ /*
+  * APIC interrupts.
+  */
++#ifdef CONFIG_IPIPE
++.macro apicinterrupt2 num sym
++ENTRY(\sym)
++	UNWIND_HINT_IRET_REGS
++	ASM_CLAC
++	pushq	$~(\num)
++.Lcommon_\sym:
++	call	interrupt_entry
++	UNWIND_HINT_REGS indirect=1
++	call	__ipipe_handle_irq
++	testl	%eax, %eax
++	jnz	ret_from_intr
++	LEAVE_IRQ_STACK
++	testb	$3, CS(%rsp)
++	jz	retint_kernel_early
++	jmp	retint_user_early
++END(\sym)
++.endm
++.macro apicinterrupt3 num sym do_sym
++apicinterrupt2 \num \sym
++.endm
++#else /* !CONFIG_IPIPE */
+ .macro apicinterrupt3 num sym do_sym
+ ENTRY(\sym)
+ 	UNWIND_HINT_IRET_REGS
+@@ -870,6 +921,7 @@ ENTRY(\sym)
+ END(\sym)
+ _ASM_NOKPROBE(\sym)
+ .endm
++#endif /* !CONFIG_IPIPE */
+ 
+ /* Make sure APIC interrupt handlers end up in the irqentry section: */
+ #define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
+@@ -915,6 +967,14 @@ apicinterrupt THERMAL_APIC_VECTOR		therm
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR	call_function_single_interrupt	smp_call_function_single_interrupt
+ apicinterrupt CALL_FUNCTION_VECTOR		call_function_interrupt		smp_call_function_interrupt
+ apicinterrupt RESCHEDULE_VECTOR			reschedule_interrupt		smp_reschedule_interrupt
++#ifdef CONFIG_IPIPE
++apicinterrupt2 IPIPE_RESCHEDULE_VECTOR		ipipe_reschedule_interrupt
++apicinterrupt2 IPIPE_CRITICAL_VECTOR		ipipe_critical_interrupt
++#endif
++#endif
++
++#ifdef CONFIG_IPIPE
++apicinterrupt2 IPIPE_HRTIMER_VECTOR		ipipe_hrtimer_interrupt
+ #endif
+ 
+ apicinterrupt ERROR_APIC_VECTOR			error_interrupt			smp_error_interrupt
+@@ -929,7 +989,47 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work
+  */
+ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+ 
+-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0
++.macro ipipe_idtentry_prologue paranoid=0 trapnr=-1 skip_label=-invalid-
++#ifdef CONFIG_IPIPE
++	movq	EFLAGS(%rsp), %r14		/* regs->flags */
++	movq	%rsp, %rdi			/* pt_regs pointer */
++	movl	$\trapnr, %esi			/* trap number */
++	subq	$8, %rsp
++	movq	%rsp, %rdx			/* &flags */
++	call	__ipipe_trap_prologue
++	popq	%r13
++	mov	%rax, %r12			/* save propagation status */
++	.if \paranoid == 0			/* paranoid may not skip handler */
++	testl	%eax, %eax
++	jg	\skip_label			/* skip regular handler if > 0 */
++	.endif
++#endif
++.endm
++
++.macro ipipe_idtentry_epilogue paranoid=0 skip_label=-invalid-
++#ifdef CONFIG_IPIPE
++	testl	%r12d, %r12d
++	jnz	1000f
++	movq	%rsp, %rdi			/* pt_regs pointer */
++	movq	%r13, %rsi			/* &flags from prologue */
++	movq	%r14, %rdx			/* original regs->flags before fixup */
++	call	__ipipe_trap_epilogue
++1000:
++	.if \paranoid == 0			/* paranoid implies normal epilogue */
++	testl	%r12d, %r12d
++	jz	1001f
++\skip_label:
++	UNWIND_HINT_REGS
++	DISABLE_INTERRUPTS(CLBR_ANY)
++	testb	$3, CS(%rsp)
++	jz	retint_kernel_early
++	jmp	retint_user_early
++	.endif
++1001:
++#endif
++.endm
++
++.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 create_gap=0 trapnr=-1
+ ENTRY(\sym)
+ 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
+ 
+@@ -979,6 +1079,8 @@ ENTRY(\sym)
+ 	.endif
+ 	.endif
+ 
++	ipipe_idtentry_prologue paranoid=\paranoid trapnr=\trapnr skip_label=kernel_skip_\@
++
+ 	movq	%rsp, %rdi			/* pt_regs pointer */
+ 
+ 	.if \has_error_code
+@@ -994,6 +1096,8 @@ ENTRY(\sym)
+ 
+ 	call	\do_sym
+ 
++	ipipe_idtentry_epilogue paranoid=\paranoid skip_label=kernel_skip_\@
++
+ 	.if \shift_ist != -1
+ 	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ 	.endif
+@@ -1014,6 +1118,8 @@ ENTRY(\sym)
+ .Lfrom_usermode_switch_stack_\@:
+ 	call	error_entry
+ 
++	ipipe_idtentry_prologue paranoid=\paranoid trapnr=\trapnr skip_label=user_skip_\@
++
+ 	movq	%rsp, %rdi			/* pt_regs pointer */
+ 
+ 	.if \has_error_code
+@@ -1025,25 +1131,27 @@ ENTRY(\sym)
+ 
+ 	call	\do_sym
+ 
++	ipipe_idtentry_epilogue paranoid=\paranoid skip_label=user_skip_\@
++
+ 	jmp	error_exit
+ 	.endif
+ _ASM_NOKPROBE(\sym)
+ END(\sym)
+ .endm
+ 
+-idtentry divide_error			do_divide_error			has_error_code=0
+-idtentry overflow			do_overflow			has_error_code=0
+-idtentry bounds				do_bounds			has_error_code=0
+-idtentry invalid_op			do_invalid_op			has_error_code=0
+-idtentry device_not_available		do_device_not_available		has_error_code=0
+-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2
+-idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
+-idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
+-idtentry segment_not_present		do_segment_not_present		has_error_code=1
+-idtentry spurious_interrupt_bug		do_spurious_interrupt_bug	has_error_code=0
+-idtentry coprocessor_error		do_coprocessor_error		has_error_code=0
+-idtentry alignment_check		do_alignment_check		has_error_code=1
+-idtentry simd_coprocessor_error		do_simd_coprocessor_error	has_error_code=0
++idtentry divide_error			do_divide_error			has_error_code=0	trapnr=0
++idtentry overflow			do_overflow			has_error_code=0	trapnr=4
++idtentry bounds				do_bounds			has_error_code=0	trapnr=5
++idtentry invalid_op			do_invalid_op			has_error_code=0	trapnr=6
++idtentry device_not_available		do_device_not_available		has_error_code=0	trapnr=7
++idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2	trapnr=8
++idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0	trapnr=9
++idtentry invalid_TSS			do_invalid_TSS			has_error_code=1	trapnr=10
++idtentry segment_not_present		do_segment_not_present		has_error_code=1	trapnr=11
++idtentry spurious_interrupt_bug		do_spurious_interrupt_bug	has_error_code=0	trapnr=15
++idtentry coprocessor_error		do_coprocessor_error		has_error_code=0	trapnr=16
++idtentry alignment_check		do_alignment_check		has_error_code=1	trapnr=17
++idtentry simd_coprocessor_error		do_simd_coprocessor_error	has_error_code=0	trapnr=19
+ 
+ 
+ 	/*
+@@ -1087,10 +1195,14 @@ bad_gs:
+ ENTRY(do_softirq_own_stack)
+ 	pushq	%rbp
+ 	mov	%rsp, %rbp
++	HARD_COND_DISABLE_INTERRUPTS
+ 	ENTER_IRQ_STACK regs=0 old_rsp=%r11
++	HARD_COND_ENABLE_INTERRUPTS
+ 	call	__do_softirq
++	HARD_COND_DISABLE_INTERRUPTS
+ 	LEAVE_IRQ_STACK regs=0
+ 	leaveq
++	HARD_COND_ENABLE_INTERRUPTS
+ 	ret
+ ENDPROC(do_softirq_own_stack)
+ 
+@@ -1191,24 +1303,28 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
+ 	hv_stimer0_callback_vector hv_stimer0_vector_handler
+ #endif /* CONFIG_HYPERV */
+ 
++#ifdef CONFIG_IPIPE
++idtentry debug			do_debug		has_error_code=0	paranoid=1	trapnr=1
++#else
+ idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
+-idtentry int3			do_int3			has_error_code=0	create_gap=1
+-idtentry stack_segment		do_stack_segment	has_error_code=1
++#endif
++idtentry int3			do_int3			has_error_code=0	create_gap=1	trapnr=3
++idtentry stack_segment		do_stack_segment	has_error_code=1	trapnr=12
+ 
+ #ifdef CONFIG_XEN
+ idtentry xennmi			do_nmi			has_error_code=0
+ idtentry xendebug		do_debug		has_error_code=0
+ #endif
+ 
+-idtentry general_protection	do_general_protection	has_error_code=1
+-idtentry page_fault		do_page_fault		has_error_code=1
++idtentry general_protection	do_general_protection	has_error_code=1	trapnr=13
++idtentry page_fault		do_page_fault		has_error_code=1	trapnr=14
+ 
+ #ifdef CONFIG_KVM_GUEST
+-idtentry async_page_fault	do_async_page_fault	has_error_code=1
++idtentry async_page_fault	do_async_page_fault	has_error_code=1	trapnr=14
+ #endif
+ 
+ #ifdef CONFIG_X86_MCE
+-idtentry machine_check		do_mce			has_error_code=0	paranoid=1
++idtentry machine_check		do_mce			has_error_code=0	paranoid=1	trapnr=18
+ #endif
+ 
+ /*
+diff -uprN kernel/arch/x86/entry/thunk_64.S kernel_new/arch/x86/entry/thunk_64.S
+--- kernel/arch/x86/entry/thunk_64.S	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/entry/thunk_64.S	2021-04-01 18:28:07.651863292 +0800
+@@ -40,6 +40,7 @@
+ 
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ 	THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
++	THUNK trace_hardirqs_on_virt_thunk,trace_hardirqs_on_virt_caller,1
+ 	THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
+ #endif
+ 
+diff -uprN kernel/arch/x86/entry/vsyscall/vsyscall_gtod.c kernel_new/arch/x86/entry/vsyscall/vsyscall_gtod.c
+--- kernel/arch/x86/entry/vsyscall/vsyscall_gtod.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/entry/vsyscall/vsyscall_gtod.c	2021-04-01 18:28:07.651863292 +0800
+@@ -14,6 +14,7 @@
+  */
+ 
+ #include <linux/timekeeper_internal.h>
++#include <linux/ipipe_tickdev.h>
+ #include <asm/vgtod.h>
+ #include <asm/vvar.h>
+ 
+@@ -75,4 +76,7 @@ void update_vsyscall(struct timekeeper *
+ 	}
+ 
+ 	gtod_write_end(vdata);
++
++	if (tk->tkr_mono.clock == &clocksource_tsc)
++		ipipe_update_hostrt(tk);
+ }
+diff -uprN kernel/arch/x86/events/core.c kernel_new/arch/x86/events/core.c
+--- kernel/arch/x86/events/core.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/events/core.c	2021-04-01 18:28:07.651863292 +0800
+@@ -2111,7 +2111,7 @@ static int x86_pmu_event_init(struct per
+ 
+ static void refresh_pce(void *ignored)
+ {
+-	load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
++	load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm));
+ }
+ 
+ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+diff -uprN kernel/arch/x86/events/core.c.orig kernel_new/arch/x86/events/core.c.orig
+--- kernel/arch/x86/events/core.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/events/core.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,2568 @@
++/*
++ * Performance events x86 architecture code
++ *
++ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
++ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
++ *  Copyright (C) 2009 Jaswinder Singh Rajput
++ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
++ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
++ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
++ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
++ *
++ *  For licencing details see kernel-base/COPYING
++ */
++
++#include <linux/perf_event.h>
++#include <linux/capability.h>
++#include <linux/notifier.h>
++#include <linux/hardirq.h>
++#include <linux/kprobes.h>
++#include <linux/export.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/clock.h>
++#include <linux/uaccess.h>
++#include <linux/slab.h>
++#include <linux/cpu.h>
++#include <linux/bitops.h>
++#include <linux/device.h>
++#include <linux/nospec.h>
++
++#include <asm/apic.h>
++#include <asm/stacktrace.h>
++#include <asm/nmi.h>
++#include <asm/smp.h>
++#include <asm/alternative.h>
++#include <asm/mmu_context.h>
++#include <asm/tlbflush.h>
++#include <asm/timer.h>
++#include <asm/desc.h>
++#include <asm/ldt.h>
++#include <asm/unwind.h>
++
++#include "perf_event.h"
++
++struct x86_pmu x86_pmu __read_mostly;
++
++DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
++	.enabled = 1,
++};
++
++DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
++
++u64 __read_mostly hw_cache_event_ids
++				[PERF_COUNT_HW_CACHE_MAX]
++				[PERF_COUNT_HW_CACHE_OP_MAX]
++				[PERF_COUNT_HW_CACHE_RESULT_MAX];
++u64 __read_mostly hw_cache_extra_regs
++				[PERF_COUNT_HW_CACHE_MAX]
++				[PERF_COUNT_HW_CACHE_OP_MAX]
++				[PERF_COUNT_HW_CACHE_RESULT_MAX];
++
++/*
++ * Propagate event elapsed time into the generic event.
++ * Can only be executed on the CPU where the event is active.
++ * Returns the delta events processed.
++ */
++u64 x86_perf_event_update(struct perf_event *event)
++{
++	struct hw_perf_event *hwc = &event->hw;
++	int shift = 64 - x86_pmu.cntval_bits;
++	u64 prev_raw_count, new_raw_count;
++	int idx = hwc->idx;
++	u64 delta;
++
++	if (idx == INTEL_PMC_IDX_FIXED_BTS)
++		return 0;
++
++	/*
++	 * Careful: an NMI might modify the previous event value.
++	 *
++	 * Our tactic to handle this is to first atomically read and
++	 * exchange a new raw count - then add that new-prev delta
++	 * count to the generic event atomically:
++	 */
++again:
++	prev_raw_count = local64_read(&hwc->prev_count);
++	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
++
++	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
++					new_raw_count) != prev_raw_count)
++		goto again;
++
++	/*
++	 * Now we have the new raw value and have updated the prev
++	 * timestamp already. We can now calculate the elapsed delta
++	 * (event-)time and add that to the generic event.
++	 *
++	 * Careful, not all hw sign-extends above the physical width
++	 * of the count.
++	 */
++	delta = (new_raw_count << shift) - (prev_raw_count << shift);
++	delta >>= shift;
++
++	local64_add(delta, &event->count);
++	local64_sub(delta, &hwc->period_left);
++
++	return new_raw_count;
++}
++
++/*
++ * Find and validate any extra registers to set up.
++ */
++static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
++{
++	struct hw_perf_event_extra *reg;
++	struct extra_reg *er;
++
++	reg = &event->hw.extra_reg;
++
++	if (!x86_pmu.extra_regs)
++		return 0;
++
++	for (er = x86_pmu.extra_regs; er->msr; er++) {
++		if (er->event != (config & er->config_mask))
++			continue;
++		if (event->attr.config1 & ~er->valid_mask)
++			return -EINVAL;
++		/* Check if the extra msrs can be safely accessed*/
++		if (!er->extra_msr_access)
++			return -ENXIO;
++
++		reg->idx = er->idx;
++		reg->config = event->attr.config1;
++		reg->reg = er->msr;
++		break;
++	}
++	return 0;
++}
++
++static atomic_t active_events;
++static atomic_t pmc_refcount;
++static DEFINE_MUTEX(pmc_reserve_mutex);
++
++#ifdef CONFIG_X86_LOCAL_APIC
++
++static bool reserve_pmc_hardware(void)
++{
++	int i;
++
++	for (i = 0; i < x86_pmu.num_counters; i++) {
++		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
++			goto perfctr_fail;
++	}
++
++	for (i = 0; i < x86_pmu.num_counters; i++) {
++		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
++			goto eventsel_fail;
++	}
++
++	return true;
++
++eventsel_fail:
++	for (i--; i >= 0; i--)
++		release_evntsel_nmi(x86_pmu_config_addr(i));
++
++	i = x86_pmu.num_counters;
++
++perfctr_fail:
++	for (i--; i >= 0; i--)
++		release_perfctr_nmi(x86_pmu_event_addr(i));
++
++	return false;
++}
++
++static void release_pmc_hardware(void)
++{
++	int i;
++
++	for (i = 0; i < x86_pmu.num_counters; i++) {
++		release_perfctr_nmi(x86_pmu_event_addr(i));
++		release_evntsel_nmi(x86_pmu_config_addr(i));
++	}
++}
++
++#else
++
++static bool reserve_pmc_hardware(void) { return true; }
++static void release_pmc_hardware(void) {}
++
++#endif
++
++static bool check_hw_exists(void)
++{
++	u64 val, val_fail = -1, val_new= ~0;
++	int i, reg, reg_fail = -1, ret = 0;
++	int bios_fail = 0;
++	int reg_safe = -1;
++
++	/*
++	 * Check to see if the BIOS enabled any of the counters, if so
++	 * complain and bail.
++	 */
++	for (i = 0; i < x86_pmu.num_counters; i++) {
++		reg = x86_pmu_config_addr(i);
++		ret = rdmsrl_safe(reg, &val);
++		if (ret)
++			goto msr_fail;
++		if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
++			bios_fail = 1;
++			val_fail = val;
++			reg_fail = reg;
++		} else {
++			reg_safe = i;
++		}
++	}
++
++	if (x86_pmu.num_counters_fixed) {
++		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
++		ret = rdmsrl_safe(reg, &val);
++		if (ret)
++			goto msr_fail;
++		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
++			if (val & (0x03 << i*4)) {
++				bios_fail = 1;
++				val_fail = val;
++				reg_fail = reg;
++			}
++		}
++	}
++
++	/*
++	 * If all the counters are enabled, the below test will always
++	 * fail.  The tools will also become useless in this scenario.
++	 * Just fail and disable the hardware counters.
++	 */
++
++	if (reg_safe == -1) {
++		reg = reg_safe;
++		goto msr_fail;
++	}
++
++	/*
++	 * Read the current value, change it and read it back to see if it
++	 * matches, this is needed to detect certain hardware emulators
++	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
++	 */
++	reg = x86_pmu_event_addr(reg_safe);
++	if (rdmsrl_safe(reg, &val))
++		goto msr_fail;
++	val ^= 0xffffUL;
++	ret = wrmsrl_safe(reg, val);
++	ret |= rdmsrl_safe(reg, &val_new);
++	if (ret || val != val_new)
++		goto msr_fail;
++
++	/*
++	 * We still allow the PMU driver to operate:
++	 */
++	if (bios_fail) {
++		pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
++		pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
++			      reg_fail, val_fail);
++	}
++
++	return true;
++
++msr_fail:
++	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
++		pr_cont("PMU not available due to virtualization, using software events only.\n");
++	} else {
++		pr_cont("Broken PMU hardware detected, using software events only.\n");
++		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
++		       reg, val_new);
++	}
++
++	return false;
++}
++
++static void hw_perf_event_destroy(struct perf_event *event)
++{
++	x86_release_hardware();
++	atomic_dec(&active_events);
++}
++
++void hw_perf_lbr_event_destroy(struct perf_event *event)
++{
++	hw_perf_event_destroy(event);
++
++	/* undo the lbr/bts event accounting */
++	x86_del_exclusive(x86_lbr_exclusive_lbr);
++}
++
++static inline int x86_pmu_initialized(void)
++{
++	return x86_pmu.handle_irq != NULL;
++}
++
++static inline int
++set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
++{
++	struct perf_event_attr *attr = &event->attr;
++	unsigned int cache_type, cache_op, cache_result;
++	u64 config, val;
++
++	config = attr->config;
++
++	cache_type = (config >> 0) & 0xff;
++	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
++		return -EINVAL;
++	cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
++
++	cache_op = (config >>  8) & 0xff;
++	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
++		return -EINVAL;
++	cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
++
++	cache_result = (config >> 16) & 0xff;
++	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
++		return -EINVAL;
++	cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
++
++	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
++
++	if (val == 0)
++		return -ENOENT;
++
++	if (val == -1)
++		return -EINVAL;
++
++	hwc->config |= val;
++	attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
++	return x86_pmu_extra_regs(val, event);
++}
++
++int x86_reserve_hardware(void)
++{
++	int err = 0;
++
++	if (!atomic_inc_not_zero(&pmc_refcount)) {
++		mutex_lock(&pmc_reserve_mutex);
++		if (atomic_read(&pmc_refcount) == 0) {
++			if (!reserve_pmc_hardware())
++				err = -EBUSY;
++			else
++				reserve_ds_buffers();
++		}
++		if (!err)
++			atomic_inc(&pmc_refcount);
++		mutex_unlock(&pmc_reserve_mutex);
++	}
++
++	return err;
++}
++
++void x86_release_hardware(void)
++{
++	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
++		release_pmc_hardware();
++		release_ds_buffers();
++		mutex_unlock(&pmc_reserve_mutex);
++	}
++}
++
++/*
++ * Check if we can create event of a certain type (that no conflicting events
++ * are present).
++ */
++int x86_add_exclusive(unsigned int what)
++{
++	int i;
++
++	/*
++	 * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
++	 * LBR and BTS are still mutually exclusive.
++	 */
++	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
++		goto out;
++
++	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
++		mutex_lock(&pmc_reserve_mutex);
++		for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
++			if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
++				goto fail_unlock;
++		}
++		atomic_inc(&x86_pmu.lbr_exclusive[what]);
++		mutex_unlock(&pmc_reserve_mutex);
++	}
++
++out:
++	atomic_inc(&active_events);
++	return 0;
++
++fail_unlock:
++	mutex_unlock(&pmc_reserve_mutex);
++	return -EBUSY;
++}
++
++void x86_del_exclusive(unsigned int what)
++{
++	atomic_dec(&active_events);
++
++	/*
++	 * See the comment in x86_add_exclusive().
++	 */
++	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
++		return;
++
++	atomic_dec(&x86_pmu.lbr_exclusive[what]);
++}
++
++int x86_setup_perfctr(struct perf_event *event)
++{
++	struct perf_event_attr *attr = &event->attr;
++	struct hw_perf_event *hwc = &event->hw;
++	u64 config;
++
++	if (!is_sampling_event(event)) {
++		hwc->sample_period = x86_pmu.max_period;
++		hwc->last_period = hwc->sample_period;
++		local64_set(&hwc->period_left, hwc->sample_period);
++	}
++
++	if (attr->type == PERF_TYPE_RAW)
++		return x86_pmu_extra_regs(event->attr.config, event);
++
++	if (attr->type == PERF_TYPE_HW_CACHE)
++		return set_ext_hw_attr(hwc, event);
++
++	if (attr->config >= x86_pmu.max_events)
++		return -EINVAL;
++
++	attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
++
++	/*
++	 * The generic map:
++	 */
++	config = x86_pmu.event_map(attr->config);
++
++	if (config == 0)
++		return -ENOENT;
++
++	if (config == -1LL)
++		return -EINVAL;
++
++	hwc->config |= config;
++
++	return 0;
++}
++
++/*
++ * check that branch_sample_type is compatible with
++ * settings needed for precise_ip > 1 which implies
++ * using the LBR to capture ALL taken branches at the
++ * priv levels of the measurement
++ */
++static inline int precise_br_compat(struct perf_event *event)
++{
++	u64 m = event->attr.branch_sample_type;
++	u64 b = 0;
++
++	/* must capture all branches */
++	if (!(m & PERF_SAMPLE_BRANCH_ANY))
++		return 0;
++
++	m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
++
++	if (!event->attr.exclude_user)
++		b |= PERF_SAMPLE_BRANCH_USER;
++
++	if (!event->attr.exclude_kernel)
++		b |= PERF_SAMPLE_BRANCH_KERNEL;
++
++	/*
++	 * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
++	 */
++
++	return m == b;
++}
++
++int x86_pmu_max_precise(void)
++{
++	int precise = 0;
++
++	/* Support for constant skid */
++	if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
++		precise++;
++
++		/* Support for IP fixup */
++		if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
++			precise++;
++
++		if (x86_pmu.pebs_prec_dist)
++			precise++;
++	}
++	return precise;
++}
++
++int x86_pmu_hw_config(struct perf_event *event)
++{
++	if (event->attr.precise_ip) {
++		int precise = x86_pmu_max_precise();
++
++		if (event->attr.precise_ip > precise)
++			return -EOPNOTSUPP;
++
++		/* There's no sense in having PEBS for non sampling events: */
++		if (!is_sampling_event(event))
++			return -EINVAL;
++	}
++	/*
++	 * check that PEBS LBR correction does not conflict with
++	 * whatever the user is asking with attr->branch_sample_type
++	 */
++	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
++		u64 *br_type = &event->attr.branch_sample_type;
++
++		if (has_branch_stack(event)) {
++			if (!precise_br_compat(event))
++				return -EOPNOTSUPP;
++
++			/* branch_sample_type is compatible */
++
++		} else {
++			/*
++			 * user did not specify  branch_sample_type
++			 *
++			 * For PEBS fixups, we capture all
++			 * the branches at the priv level of the
++			 * event.
++			 */
++			*br_type = PERF_SAMPLE_BRANCH_ANY;
++
++			if (!event->attr.exclude_user)
++				*br_type |= PERF_SAMPLE_BRANCH_USER;
++
++			if (!event->attr.exclude_kernel)
++				*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
++		}
++	}
++
++	if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
++		event->attach_state |= PERF_ATTACH_TASK_DATA;
++
++	/*
++	 * Generate PMC IRQs:
++	 * (keep 'enabled' bit clear for now)
++	 */
++	event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
++
++	/*
++	 * Count user and OS events unless requested not to
++	 */
++	if (!event->attr.exclude_user)
++		event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
++	if (!event->attr.exclude_kernel)
++		event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
++
++	if (event->attr.type == PERF_TYPE_RAW)
++		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
++
++	if (event->attr.sample_period && x86_pmu.limit_period) {
++		if (x86_pmu.limit_period(event, event->attr.sample_period) >
++				event->attr.sample_period)
++			return -EINVAL;
++	}
++
++	return x86_setup_perfctr(event);
++}
++
++/*
++ * Setup the hardware configuration for a given attr_type
++ */
++static int __x86_pmu_event_init(struct perf_event *event)
++{
++	int err;
++
++	if (!x86_pmu_initialized())
++		return -ENODEV;
++
++	err = x86_reserve_hardware();
++	if (err)
++		return err;
++
++	atomic_inc(&active_events);
++	event->destroy = hw_perf_event_destroy;
++
++	event->hw.idx = -1;
++	event->hw.last_cpu = -1;
++	event->hw.last_tag = ~0ULL;
++
++	/* mark unused */
++	event->hw.extra_reg.idx = EXTRA_REG_NONE;
++	event->hw.branch_reg.idx = EXTRA_REG_NONE;
++
++	return x86_pmu.hw_config(event);
++}
++
++void x86_pmu_disable_all(void)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	int idx;
++
++	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
++		u64 val;
++
++		if (!test_bit(idx, cpuc->active_mask))
++			continue;
++		rdmsrl(x86_pmu_config_addr(idx), val);
++		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
++			continue;
++		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
++		wrmsrl(x86_pmu_config_addr(idx), val);
++	}
++}
++
++/*
++ * There may be PMI landing after enabled=0. The PMI hitting could be before or
++ * after disable_all.
++ *
++ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
++ * It will not be re-enabled in the NMI handler again, because enabled=0. After
++ * handling the NMI, disable_all will be called, which will not change the
++ * state either. If PMI hits after disable_all, the PMU is already disabled
++ * before entering NMI handler. The NMI handler will not change the state
++ * either.
++ *
++ * So either situation is harmless.
++ */
++static void x86_pmu_disable(struct pmu *pmu)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++	if (!x86_pmu_initialized())
++		return;
++
++	if (!cpuc->enabled)
++		return;
++
++	cpuc->n_added = 0;
++	cpuc->enabled = 0;
++	barrier();
++
++	x86_pmu.disable_all();
++}
++
++void x86_pmu_enable_all(int added)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	int idx;
++
++	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
++		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
++
++		if (!test_bit(idx, cpuc->active_mask))
++			continue;
++
++		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
++	}
++}
++
++static struct pmu pmu;
++
++static inline int is_x86_event(struct perf_event *event)
++{
++	return event->pmu == &pmu;
++}
++
++/*
++ * Event scheduler state:
++ *
++ * Assign events iterating over all events and counters, beginning
++ * with events with least weights first. Keep the current iterator
++ * state in struct sched_state.
++ */
++struct sched_state {
++	int	weight;
++	int	event;		/* event index */
++	int	counter;	/* counter index */
++	int	unassigned;	/* number of events to be assigned left */
++	int	nr_gp;		/* number of GP counters used */
++	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
++};
++
++/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
++#define	SCHED_STATES_MAX	2
++
++struct perf_sched {
++	int			max_weight;
++	int			max_events;
++	int			max_gp;
++	int			saved_states;
++	struct event_constraint	**constraints;
++	struct sched_state	state;
++	struct sched_state	saved[SCHED_STATES_MAX];
++};
++
++/*
++ * Initialize interator that runs through all events and counters.
++ */
++static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
++			    int num, int wmin, int wmax, int gpmax)
++{
++	int idx;
++
++	memset(sched, 0, sizeof(*sched));
++	sched->max_events	= num;
++	sched->max_weight	= wmax;
++	sched->max_gp		= gpmax;
++	sched->constraints	= constraints;
++
++	for (idx = 0; idx < num; idx++) {
++		if (constraints[idx]->weight == wmin)
++			break;
++	}
++
++	sched->state.event	= idx;		/* start with min weight */
++	sched->state.weight	= wmin;
++	sched->state.unassigned	= num;
++}
++
++static void perf_sched_save_state(struct perf_sched *sched)
++{
++	if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
++		return;
++
++	sched->saved[sched->saved_states] = sched->state;
++	sched->saved_states++;
++}
++
++static bool perf_sched_restore_state(struct perf_sched *sched)
++{
++	if (!sched->saved_states)
++		return false;
++
++	sched->saved_states--;
++	sched->state = sched->saved[sched->saved_states];
++
++	/* continue with next counter: */
++	clear_bit(sched->state.counter++, sched->state.used);
++
++	return true;
++}
++
++/*
++ * Select a counter for the current event to schedule. Return true on
++ * success.
++ */
++static bool __perf_sched_find_counter(struct perf_sched *sched)
++{
++	struct event_constraint *c;
++	int idx;
++
++	if (!sched->state.unassigned)
++		return false;
++
++	if (sched->state.event >= sched->max_events)
++		return false;
++
++	c = sched->constraints[sched->state.event];
++	/* Prefer fixed purpose counters */
++	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
++		idx = INTEL_PMC_IDX_FIXED;
++		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
++			if (!__test_and_set_bit(idx, sched->state.used))
++				goto done;
++		}
++	}
++
++	/* Grab the first unused counter starting with idx */
++	idx = sched->state.counter;
++	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
++		if (!__test_and_set_bit(idx, sched->state.used)) {
++			if (sched->state.nr_gp++ >= sched->max_gp)
++				return false;
++
++			goto done;
++		}
++	}
++
++	return false;
++
++done:
++	sched->state.counter = idx;
++
++	if (c->overlap)
++		perf_sched_save_state(sched);
++
++	return true;
++}
++
++static bool perf_sched_find_counter(struct perf_sched *sched)
++{
++	while (!__perf_sched_find_counter(sched)) {
++		if (!perf_sched_restore_state(sched))
++			return false;
++	}
++
++	return true;
++}
++
++/*
++ * Go through all unassigned events and find the next one to schedule.
++ * Take events with the least weight first. Return true on success.
++ */
++static bool perf_sched_next_event(struct perf_sched *sched)
++{
++	struct event_constraint *c;
++
++	if (!sched->state.unassigned || !--sched->state.unassigned)
++		return false;
++
++	do {
++		/* next event */
++		sched->state.event++;
++		if (sched->state.event >= sched->max_events) {
++			/* next weight */
++			sched->state.event = 0;
++			sched->state.weight++;
++			if (sched->state.weight > sched->max_weight)
++				return false;
++		}
++		c = sched->constraints[sched->state.event];
++	} while (c->weight != sched->state.weight);
++
++	sched->state.counter = 0;	/* start with first counter */
++
++	return true;
++}
++
++/*
++ * Assign a counter for each event.
++ */
++int perf_assign_events(struct event_constraint **constraints, int n,
++			int wmin, int wmax, int gpmax, int *assign)
++{
++	struct perf_sched sched;
++
++	perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
++
++	do {
++		if (!perf_sched_find_counter(&sched))
++			break;	/* failed */
++		if (assign)
++			assign[sched.state.event] = sched.state.counter;
++	} while (perf_sched_next_event(&sched));
++
++	return sched.state.unassigned;
++}
++EXPORT_SYMBOL_GPL(perf_assign_events);
++
++int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
++{
++	struct event_constraint *c;
++	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
++	struct perf_event *e;
++	int i, wmin, wmax, unsched = 0;
++	struct hw_perf_event *hwc;
++
++	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
++
++	if (x86_pmu.start_scheduling)
++		x86_pmu.start_scheduling(cpuc);
++
++	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
++		cpuc->event_constraint[i] = NULL;
++		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
++		cpuc->event_constraint[i] = c;
++
++		wmin = min(wmin, c->weight);
++		wmax = max(wmax, c->weight);
++	}
++
++	/*
++	 * fastpath, try to reuse previous register
++	 */
++	for (i = 0; i < n; i++) {
++		hwc = &cpuc->event_list[i]->hw;
++		c = cpuc->event_constraint[i];
++
++		/* never assigned */
++		if (hwc->idx == -1)
++			break;
++
++		/* constraint still honored */
++		if (!test_bit(hwc->idx, c->idxmsk))
++			break;
++
++		/* not already used */
++		if (test_bit(hwc->idx, used_mask))
++			break;
++
++		__set_bit(hwc->idx, used_mask);
++		if (assign)
++			assign[i] = hwc->idx;
++	}
++
++	/* slow path */
++	if (i != n) {
++		int gpmax = x86_pmu.num_counters;
++
++		/*
++		 * Do not allow scheduling of more than half the available
++		 * generic counters.
++		 *
++		 * This helps avoid counter starvation of sibling thread by
++		 * ensuring at most half the counters cannot be in exclusive
++		 * mode. There is no designated counters for the limits. Any
++		 * N/2 counters can be used. This helps with events with
++		 * specific counter constraints.
++		 */
++		if (is_ht_workaround_enabled() && !cpuc->is_fake &&
++		    READ_ONCE(cpuc->excl_cntrs->exclusive_present))
++			gpmax /= 2;
++
++		unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
++					     wmax, gpmax, assign);
++	}
++
++	/*
++	 * In case of success (unsched = 0), mark events as committed,
++	 * so we do not put_constraint() in case new events are added
++	 * and fail to be scheduled
++	 *
++	 * We invoke the lower level commit callback to lock the resource
++	 *
++	 * We do not need to do all of this in case we are called to
++	 * validate an event group (assign == NULL)
++	 */
++	if (!unsched && assign) {
++		for (i = 0; i < n; i++) {
++			e = cpuc->event_list[i];
++			e->hw.flags |= PERF_X86_EVENT_COMMITTED;
++			if (x86_pmu.commit_scheduling)
++				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
++		}
++	} else {
++		for (i = 0; i < n; i++) {
++			e = cpuc->event_list[i];
++			/*
++			 * do not put_constraint() on comitted events,
++			 * because they are good to go
++			 */
++			if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
++				continue;
++
++			/*
++			 * release events that failed scheduling
++			 */
++			if (x86_pmu.put_event_constraints)
++				x86_pmu.put_event_constraints(cpuc, e);
++		}
++	}
++
++	if (x86_pmu.stop_scheduling)
++		x86_pmu.stop_scheduling(cpuc);
++
++	return unsched ? -EINVAL : 0;
++}
++
++/*
++ * dogrp: true if must collect siblings events (group)
++ * returns total number of events and error code
++ */
++static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
++{
++	struct perf_event *event;
++	int n, max_count;
++
++	max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
++
++	/* current number of events already accepted */
++	n = cpuc->n_events;
++
++	if (is_x86_event(leader)) {
++		if (n >= max_count)
++			return -EINVAL;
++		cpuc->event_list[n] = leader;
++		n++;
++	}
++	if (!dogrp)
++		return n;
++
++	for_each_sibling_event(event, leader) {
++		if (!is_x86_event(event) ||
++		    event->state <= PERF_EVENT_STATE_OFF)
++			continue;
++
++		if (n >= max_count)
++			return -EINVAL;
++
++		cpuc->event_list[n] = event;
++		n++;
++	}
++	return n;
++}
++
++static inline void x86_assign_hw_event(struct perf_event *event,
++				struct cpu_hw_events *cpuc, int i)
++{
++	struct hw_perf_event *hwc = &event->hw;
++
++	hwc->idx = cpuc->assign[i];
++	hwc->last_cpu = smp_processor_id();
++	hwc->last_tag = ++cpuc->tags[i];
++
++	if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
++		hwc->config_base = 0;
++		hwc->event_base	= 0;
++	} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
++		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
++		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
++		hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
++	} else {
++		hwc->config_base = x86_pmu_config_addr(hwc->idx);
++		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
++		hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
++	}
++}
++
++static inline int match_prev_assignment(struct hw_perf_event *hwc,
++					struct cpu_hw_events *cpuc,
++					int i)
++{
++	return hwc->idx == cpuc->assign[i] &&
++		hwc->last_cpu == smp_processor_id() &&
++		hwc->last_tag == cpuc->tags[i];
++}
++
++static void x86_pmu_start(struct perf_event *event, int flags);
++
++static void x86_pmu_enable(struct pmu *pmu)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	struct perf_event *event;
++	struct hw_perf_event *hwc;
++	int i, added = cpuc->n_added;
++
++	if (!x86_pmu_initialized())
++		return;
++
++	if (cpuc->enabled)
++		return;
++
++	if (cpuc->n_added) {
++		int n_running = cpuc->n_events - cpuc->n_added;
++		/*
++		 * apply assignment obtained either from
++		 * hw_perf_group_sched_in() or x86_pmu_enable()
++		 *
++		 * step1: save events moving to new counters
++		 */
++		for (i = 0; i < n_running; i++) {
++			event = cpuc->event_list[i];
++			hwc = &event->hw;
++
++			/*
++			 * we can avoid reprogramming counter if:
++			 * - assigned same counter as last time
++			 * - running on same CPU as last time
++			 * - no other event has used the counter since
++			 */
++			if (hwc->idx == -1 ||
++			    match_prev_assignment(hwc, cpuc, i))
++				continue;
++
++			/*
++			 * Ensure we don't accidentally enable a stopped
++			 * counter simply because we rescheduled.
++			 */
++			if (hwc->state & PERF_HES_STOPPED)
++				hwc->state |= PERF_HES_ARCH;
++
++			x86_pmu_stop(event, PERF_EF_UPDATE);
++		}
++
++		/*
++		 * step2: reprogram moved events into new counters
++		 */
++		for (i = 0; i < cpuc->n_events; i++) {
++			event = cpuc->event_list[i];
++			hwc = &event->hw;
++
++			if (!match_prev_assignment(hwc, cpuc, i))
++				x86_assign_hw_event(event, cpuc, i);
++			else if (i < n_running)
++				continue;
++
++			if (hwc->state & PERF_HES_ARCH)
++				continue;
++
++			x86_pmu_start(event, PERF_EF_RELOAD);
++		}
++		cpuc->n_added = 0;
++		perf_events_lapic_init();
++	}
++
++	cpuc->enabled = 1;
++	barrier();
++
++	x86_pmu.enable_all(added);
++}
++
++static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
++
++/*
++ * Set the next IRQ period, based on the hwc->period_left value.
++ * To be called with the event disabled in hw:
++ */
++int x86_perf_event_set_period(struct perf_event *event)
++{
++	struct hw_perf_event *hwc = &event->hw;
++	s64 left = local64_read(&hwc->period_left);
++	s64 period = hwc->sample_period;
++	int ret = 0, idx = hwc->idx;
++
++	if (idx == INTEL_PMC_IDX_FIXED_BTS)
++		return 0;
++
++	/*
++	 * If we are way outside a reasonable range then just skip forward:
++	 */
++	if (unlikely(left <= -period)) {
++		left = period;
++		local64_set(&hwc->period_left, left);
++		hwc->last_period = period;
++		ret = 1;
++	}
++
++	if (unlikely(left <= 0)) {
++		left += period;
++		local64_set(&hwc->period_left, left);
++		hwc->last_period = period;
++		ret = 1;
++	}
++	/*
++	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
++	 */
++	if (unlikely(left < 2))
++		left = 2;
++
++	if (left > x86_pmu.max_period)
++		left = x86_pmu.max_period;
++
++	if (x86_pmu.limit_period)
++		left = x86_pmu.limit_period(event, left);
++
++	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
++
++	/*
++	 * The hw event starts counting from this event offset,
++	 * mark it to be able to extra future deltas:
++	 */
++	local64_set(&hwc->prev_count, (u64)-left);
++
++	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
++
++	/*
++	 * Due to erratum on certan cpu we need
++	 * a second write to be sure the register
++	 * is updated properly
++	 */
++	if (x86_pmu.perfctr_second_write) {
++		wrmsrl(hwc->event_base,
++			(u64)(-left) & x86_pmu.cntval_mask);
++	}
++
++	perf_event_update_userpage(event);
++
++	return ret;
++}
++
++void x86_pmu_enable_event(struct perf_event *event)
++{
++	if (__this_cpu_read(cpu_hw_events.enabled))
++		__x86_pmu_enable_event(&event->hw,
++				       ARCH_PERFMON_EVENTSEL_ENABLE);
++}
++
++/*
++ * Add a single event to the PMU.
++ *
++ * The event is added to the group of enabled events
++ * but only if it can be scehduled with existing events.
++ */
++static int x86_pmu_add(struct perf_event *event, int flags)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	struct hw_perf_event *hwc;
++	int assign[X86_PMC_IDX_MAX];
++	int n, n0, ret;
++
++	hwc = &event->hw;
++
++	n0 = cpuc->n_events;
++	ret = n = collect_events(cpuc, event, false);
++	if (ret < 0)
++		goto out;
++
++	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
++	if (!(flags & PERF_EF_START))
++		hwc->state |= PERF_HES_ARCH;
++
++	/*
++	 * If group events scheduling transaction was started,
++	 * skip the schedulability test here, it will be performed
++	 * at commit time (->commit_txn) as a whole.
++	 *
++	 * If commit fails, we'll call ->del() on all events
++	 * for which ->add() was called.
++	 */
++	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
++		goto done_collect;
++
++	ret = x86_pmu.schedule_events(cpuc, n, assign);
++	if (ret)
++		goto out;
++	/*
++	 * copy new assignment, now we know it is possible
++	 * will be used by hw_perf_enable()
++	 */
++	memcpy(cpuc->assign, assign, n*sizeof(int));
++
++done_collect:
++	/*
++	 * Commit the collect_events() state. See x86_pmu_del() and
++	 * x86_pmu_*_txn().
++	 */
++	cpuc->n_events = n;
++	cpuc->n_added += n - n0;
++	cpuc->n_txn += n - n0;
++
++	if (x86_pmu.add) {
++		/*
++		 * This is before x86_pmu_enable() will call x86_pmu_start(),
++		 * so we enable LBRs before an event needs them etc..
++		 */
++		x86_pmu.add(event);
++	}
++
++	ret = 0;
++out:
++	return ret;
++}
++
++static void x86_pmu_start(struct perf_event *event, int flags)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	int idx = event->hw.idx;
++
++	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
++		return;
++
++	if (WARN_ON_ONCE(idx == -1))
++		return;
++
++	if (flags & PERF_EF_RELOAD) {
++		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
++		x86_perf_event_set_period(event);
++	}
++
++	event->hw.state = 0;
++
++	cpuc->events[idx] = event;
++	__set_bit(idx, cpuc->active_mask);
++	__set_bit(idx, cpuc->running);
++	x86_pmu.enable(event);
++	perf_event_update_userpage(event);
++}
++
++void perf_event_print_debug(void)
++{
++	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
++	u64 pebs, debugctl;
++	struct cpu_hw_events *cpuc;
++	unsigned long flags;
++	int cpu, idx;
++
++	if (!x86_pmu.num_counters)
++		return;
++
++	local_irq_save(flags);
++
++	cpu = smp_processor_id();
++	cpuc = &per_cpu(cpu_hw_events, cpu);
++
++	if (x86_pmu.version >= 2) {
++		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
++		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
++		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
++		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
++
++		pr_info("\n");
++		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
++		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
++		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
++		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
++		if (x86_pmu.pebs_constraints) {
++			rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
++			pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
++		}
++		if (x86_pmu.lbr_nr) {
++			rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
++			pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
++		}
++	}
++	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
++
++	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
++		rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
++		rdmsrl(x86_pmu_event_addr(idx), pmc_count);
++
++		prev_left = per_cpu(pmc_prev_left[idx], cpu);
++
++		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
++			cpu, idx, pmc_ctrl);
++		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
++			cpu, idx, pmc_count);
++		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
++			cpu, idx, prev_left);
++	}
++	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
++		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
++
++		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
++			cpu, idx, pmc_count);
++	}
++	local_irq_restore(flags);
++}
++
++void x86_pmu_stop(struct perf_event *event, int flags)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	struct hw_perf_event *hwc = &event->hw;
++
++	if (test_bit(hwc->idx, cpuc->active_mask)) {
++		x86_pmu.disable(event);
++		__clear_bit(hwc->idx, cpuc->active_mask);
++		cpuc->events[hwc->idx] = NULL;
++		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
++		hwc->state |= PERF_HES_STOPPED;
++	}
++
++	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
++		/*
++		 * Drain the remaining delta count out of a event
++		 * that we are disabling:
++		 */
++		x86_perf_event_update(event);
++		hwc->state |= PERF_HES_UPTODATE;
++	}
++}
++
++static void x86_pmu_del(struct perf_event *event, int flags)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	int i;
++
++	/*
++	 * event is descheduled
++	 */
++	event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
++
++	/*
++	 * If we're called during a txn, we only need to undo x86_pmu.add.
++	 * The events never got scheduled and ->cancel_txn will truncate
++	 * the event_list.
++	 *
++	 * XXX assumes any ->del() called during a TXN will only be on
++	 * an event added during that same TXN.
++	 */
++	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
++		goto do_del;
++
++	/*
++	 * Not a TXN, therefore cleanup properly.
++	 */
++	x86_pmu_stop(event, PERF_EF_UPDATE);
++
++	for (i = 0; i < cpuc->n_events; i++) {
++		if (event == cpuc->event_list[i])
++			break;
++	}
++
++	if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
++		return;
++
++	/* If we have a newly added event; make sure to decrease n_added. */
++	if (i >= cpuc->n_events - cpuc->n_added)
++		--cpuc->n_added;
++
++	if (x86_pmu.put_event_constraints)
++		x86_pmu.put_event_constraints(cpuc, event);
++
++	/* Delete the array entry. */
++	while (++i < cpuc->n_events) {
++		cpuc->event_list[i-1] = cpuc->event_list[i];
++		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
++	}
++	--cpuc->n_events;
++
++	perf_event_update_userpage(event);
++
++do_del:
++	if (x86_pmu.del) {
++		/*
++		 * This is after x86_pmu_stop(); so we disable LBRs after any
++		 * event can need them etc..
++		 */
++		x86_pmu.del(event);
++	}
++}
++
++int x86_pmu_handle_irq(struct pt_regs *regs)
++{
++	struct perf_sample_data data;
++	struct cpu_hw_events *cpuc;
++	struct perf_event *event;
++	int idx, handled = 0;
++	u64 val;
++
++	cpuc = this_cpu_ptr(&cpu_hw_events);
++
++	/*
++	 * Some chipsets need to unmask the LVTPC in a particular spot
++	 * inside the nmi handler.  As a result, the unmasking was pushed
++	 * into all the nmi handlers.
++	 *
++	 * This generic handler doesn't seem to have any issues where the
++	 * unmasking occurs so it was left at the top.
++	 */
++	apic_write(APIC_LVTPC, APIC_DM_NMI);
++
++	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
++		if (!test_bit(idx, cpuc->active_mask))
++			continue;
++
++		event = cpuc->events[idx];
++
++		val = x86_perf_event_update(event);
++		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
++			continue;
++
++		/*
++		 * event overflow
++		 */
++		handled++;
++		perf_sample_data_init(&data, 0, event->hw.last_period);
++
++		if (!x86_perf_event_set_period(event))
++			continue;
++
++		if (perf_event_overflow(event, &data, regs))
++			x86_pmu_stop(event, 0);
++	}
++
++	if (handled)
++		inc_irq_stat(apic_perf_irqs);
++
++	return handled;
++}
++
++void perf_events_lapic_init(void)
++{
++	if (!x86_pmu.apic || !x86_pmu_initialized())
++		return;
++
++	/*
++	 * Always use NMI for PMU
++	 */
++	apic_write(APIC_LVTPC, APIC_DM_NMI);
++}
++
++static int
++perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
++{
++	u64 start_clock;
++	u64 finish_clock;
++	int ret;
++
++	/*
++	 * All PMUs/events that share this PMI handler should make sure to
++	 * increment active_events for their events.
++	 */
++	if (!atomic_read(&active_events))
++		return NMI_DONE;
++
++	start_clock = sched_clock();
++	ret = x86_pmu.handle_irq(regs);
++	finish_clock = sched_clock();
++
++	perf_sample_event_took(finish_clock - start_clock);
++
++	return ret;
++}
++NOKPROBE_SYMBOL(perf_event_nmi_handler);
++
++struct event_constraint emptyconstraint;
++struct event_constraint unconstrained;
++
++static int x86_pmu_prepare_cpu(unsigned int cpu)
++{
++	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
++	int i;
++
++	for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
++		cpuc->kfree_on_online[i] = NULL;
++	if (x86_pmu.cpu_prepare)
++		return x86_pmu.cpu_prepare(cpu);
++	return 0;
++}
++
++static int x86_pmu_dead_cpu(unsigned int cpu)
++{
++	if (x86_pmu.cpu_dead)
++		x86_pmu.cpu_dead(cpu);
++	return 0;
++}
++
++static int x86_pmu_online_cpu(unsigned int cpu)
++{
++	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
++	int i;
++
++	for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
++		kfree(cpuc->kfree_on_online[i]);
++		cpuc->kfree_on_online[i] = NULL;
++	}
++	return 0;
++}
++
++static int x86_pmu_starting_cpu(unsigned int cpu)
++{
++	if (x86_pmu.cpu_starting)
++		x86_pmu.cpu_starting(cpu);
++	return 0;
++}
++
++static int x86_pmu_dying_cpu(unsigned int cpu)
++{
++	if (x86_pmu.cpu_dying)
++		x86_pmu.cpu_dying(cpu);
++	return 0;
++}
++
++static void __init pmu_check_apic(void)
++{
++	if (boot_cpu_has(X86_FEATURE_APIC))
++		return;
++
++	x86_pmu.apic = 0;
++	pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
++	pr_info("no hardware sampling interrupt available.\n");
++
++	/*
++	 * If we have a PMU initialized but no APIC
++	 * interrupts, we cannot sample hardware
++	 * events (user-space has to fall back and
++	 * sample via a hrtimer based software event):
++	 */
++	pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
++
++}
++
++static struct attribute_group x86_pmu_format_group = {
++	.name = "format",
++	.attrs = NULL,
++};
++
++/*
++ * Remove all undefined events (x86_pmu.event_map(id) == 0)
++ * out of events_attr attributes.
++ */
++static void __init filter_events(struct attribute **attrs)
++{
++	struct device_attribute *d;
++	struct perf_pmu_events_attr *pmu_attr;
++	int offset = 0;
++	int i, j;
++
++	for (i = 0; attrs[i]; i++) {
++		d = (struct device_attribute *)attrs[i];
++		pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
++		/* str trumps id */
++		if (pmu_attr->event_str)
++			continue;
++		if (x86_pmu.event_map(i + offset))
++			continue;
++
++		for (j = i; attrs[j]; j++)
++			attrs[j] = attrs[j + 1];
++
++		/* Check the shifted attr. */
++		i--;
++
++		/*
++		 * event_map() is index based, the attrs array is organized
++		 * by increasing event index. If we shift the events, then
++		 * we need to compensate for the event_map(), otherwise
++		 * we are looking up the wrong event in the map
++		 */
++		offset++;
++	}
++}
++
++/* Merge two pointer arrays */
++__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
++{
++	struct attribute **new;
++	int j, i;
++
++	for (j = 0; a[j]; j++)
++		;
++	for (i = 0; b[i]; i++)
++		j++;
++	j++;
++
++	new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
++	if (!new)
++		return NULL;
++
++	j = 0;
++	for (i = 0; a[i]; i++)
++		new[j++] = a[i];
++	for (i = 0; b[i]; i++)
++		new[j++] = b[i];
++	new[j] = NULL;
++
++	return new;
++}
++
++ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
++{
++	struct perf_pmu_events_attr *pmu_attr = \
++		container_of(attr, struct perf_pmu_events_attr, attr);
++	u64 config = x86_pmu.event_map(pmu_attr->id);
++
++	/* string trumps id */
++	if (pmu_attr->event_str)
++		return sprintf(page, "%s", pmu_attr->event_str);
++
++	return x86_pmu.events_sysfs_show(page, config);
++}
++EXPORT_SYMBOL_GPL(events_sysfs_show);
++
++ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
++			  char *page)
++{
++	struct perf_pmu_events_ht_attr *pmu_attr =
++		container_of(attr, struct perf_pmu_events_ht_attr, attr);
++
++	/*
++	 * Report conditional events depending on Hyper-Threading.
++	 *
++	 * This is overly conservative as usually the HT special
++	 * handling is not needed if the other CPU thread is idle.
++	 *
++	 * Note this does not (and cannot) handle the case when thread
++	 * siblings are invisible, for example with virtualization
++	 * if they are owned by some other guest.  The user tool
++	 * has to re-read when a thread sibling gets onlined later.
++	 */
++	return sprintf(page, "%s",
++			topology_max_smt_threads() > 1 ?
++			pmu_attr->event_str_ht :
++			pmu_attr->event_str_noht);
++}
++
++EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
++EVENT_ATTR(instructions,		INSTRUCTIONS		);
++EVENT_ATTR(cache-references,		CACHE_REFERENCES	);
++EVENT_ATTR(cache-misses, 		CACHE_MISSES		);
++EVENT_ATTR(branch-instructions,		BRANCH_INSTRUCTIONS	);
++EVENT_ATTR(branch-misses,		BRANCH_MISSES		);
++EVENT_ATTR(bus-cycles,			BUS_CYCLES		);
++EVENT_ATTR(stalled-cycles-frontend,	STALLED_CYCLES_FRONTEND	);
++EVENT_ATTR(stalled-cycles-backend,	STALLED_CYCLES_BACKEND	);
++EVENT_ATTR(ref-cycles,			REF_CPU_CYCLES		);
++
++static struct attribute *empty_attrs;
++
++static struct attribute *events_attr[] = {
++	EVENT_PTR(CPU_CYCLES),
++	EVENT_PTR(INSTRUCTIONS),
++	EVENT_PTR(CACHE_REFERENCES),
++	EVENT_PTR(CACHE_MISSES),
++	EVENT_PTR(BRANCH_INSTRUCTIONS),
++	EVENT_PTR(BRANCH_MISSES),
++	EVENT_PTR(BUS_CYCLES),
++	EVENT_PTR(STALLED_CYCLES_FRONTEND),
++	EVENT_PTR(STALLED_CYCLES_BACKEND),
++	EVENT_PTR(REF_CPU_CYCLES),
++	NULL,
++};
++
++static struct attribute_group x86_pmu_events_group = {
++	.name = "events",
++	.attrs = events_attr,
++};
++
++ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
++{
++	u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
++	u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
++	bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
++	bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
++	bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
++	bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
++	ssize_t ret;
++
++	/*
++	* We have whole page size to spend and just little data
++	* to write, so we can safely use sprintf.
++	*/
++	ret = sprintf(page, "event=0x%02llx", event);
++
++	if (umask)
++		ret += sprintf(page + ret, ",umask=0x%02llx", umask);
++
++	if (edge)
++		ret += sprintf(page + ret, ",edge");
++
++	if (pc)
++		ret += sprintf(page + ret, ",pc");
++
++	if (any)
++		ret += sprintf(page + ret, ",any");
++
++	if (inv)
++		ret += sprintf(page + ret, ",inv");
++
++	if (cmask)
++		ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
++
++	ret += sprintf(page + ret, "\n");
++
++	return ret;
++}
++
++static struct attribute_group x86_pmu_attr_group;
++static struct attribute_group x86_pmu_caps_group;
++
++static int __init init_hw_perf_events(void)
++{
++	struct x86_pmu_quirk *quirk;
++	int err;
++
++	pr_info("Performance Events: ");
++
++	switch (boot_cpu_data.x86_vendor) {
++	case X86_VENDOR_INTEL:
++		err = intel_pmu_init();
++		break;
++	case X86_VENDOR_AMD:
++		err = amd_pmu_init();
++		break;
++	case X86_VENDOR_HYGON:
++		err = amd_pmu_init();
++		x86_pmu.name = "HYGON";
++		break;
++	default:
++		err = -ENOTSUPP;
++	}
++	if (err != 0) {
++		pr_cont("no PMU driver, software events only.\n");
++		return 0;
++	}
++
++	pmu_check_apic();
++
++	/* sanity check that the hardware exists or is emulated */
++	if (!check_hw_exists())
++		return 0;
++
++	pr_cont("%s PMU driver.\n", x86_pmu.name);
++
++	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
++
++	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
++		quirk->func();
++
++	if (!x86_pmu.intel_ctrl)
++		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
++
++	perf_events_lapic_init();
++	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
++
++	unconstrained = (struct event_constraint)
++		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
++				   0, x86_pmu.num_counters, 0, 0);
++
++	x86_pmu_format_group.attrs = x86_pmu.format_attrs;
++
++	if (x86_pmu.caps_attrs) {
++		struct attribute **tmp;
++
++		tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
++		if (!WARN_ON(!tmp))
++			x86_pmu_caps_group.attrs = tmp;
++	}
++
++	if (x86_pmu.event_attrs)
++		x86_pmu_events_group.attrs = x86_pmu.event_attrs;
++
++	if (!x86_pmu.events_sysfs_show)
++		x86_pmu_events_group.attrs = &empty_attrs;
++	else
++		filter_events(x86_pmu_events_group.attrs);
++
++	if (x86_pmu.cpu_events) {
++		struct attribute **tmp;
++
++		tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
++		if (!WARN_ON(!tmp))
++			x86_pmu_events_group.attrs = tmp;
++	}
++
++	if (x86_pmu.attrs) {
++		struct attribute **tmp;
++
++		tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
++		if (!WARN_ON(!tmp))
++			x86_pmu_attr_group.attrs = tmp;
++	}
++
++	pr_info("... version:                %d\n",     x86_pmu.version);
++	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
++	pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
++	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
++	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
++	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
++	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
++
++	/*
++	 * Install callbacks. Core will call them for each online
++	 * cpu.
++	 */
++	err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare",
++				x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
++	if (err)
++		return err;
++
++	err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
++				"perf/x86:starting", x86_pmu_starting_cpu,
++				x86_pmu_dying_cpu);
++	if (err)
++		goto out;
++
++	err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online",
++				x86_pmu_online_cpu, NULL);
++	if (err)
++		goto out1;
++
++	err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
++	if (err)
++		goto out2;
++
++	return 0;
++
++out2:
++	cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
++out1:
++	cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
++out:
++	cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
++	return err;
++}
++early_initcall(init_hw_perf_events);
++
++static inline void x86_pmu_read(struct perf_event *event)
++{
++	if (x86_pmu.read)
++		return x86_pmu.read(event);
++	x86_perf_event_update(event);
++}
++
++/*
++ * Start group events scheduling transaction
++ * Set the flag to make pmu::enable() not perform the
++ * schedulability test, it will be performed at commit time
++ *
++ * We only support PERF_PMU_TXN_ADD transactions. Save the
++ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
++ * transactions.
++ */
++static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++	WARN_ON_ONCE(cpuc->txn_flags);		/* txn already in flight */
++
++	cpuc->txn_flags = txn_flags;
++	if (txn_flags & ~PERF_PMU_TXN_ADD)
++		return;
++
++	perf_pmu_disable(pmu);
++	__this_cpu_write(cpu_hw_events.n_txn, 0);
++}
++
++/*
++ * Stop group events scheduling transaction
++ * Clear the flag and pmu::enable() will perform the
++ * schedulability test.
++ */
++static void x86_pmu_cancel_txn(struct pmu *pmu)
++{
++	unsigned int txn_flags;
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
++
++	txn_flags = cpuc->txn_flags;
++	cpuc->txn_flags = 0;
++	if (txn_flags & ~PERF_PMU_TXN_ADD)
++		return;
++
++	/*
++	 * Truncate collected array by the number of events added in this
++	 * transaction. See x86_pmu_add() and x86_pmu_*_txn().
++	 */
++	__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
++	__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
++	perf_pmu_enable(pmu);
++}
++
++/*
++ * Commit group events scheduling transaction
++ * Perform the group schedulability test as a whole
++ * Return 0 if success
++ *
++ * Does not cancel the transaction on failure; expects the caller to do this.
++ */
++static int x86_pmu_commit_txn(struct pmu *pmu)
++{
++	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++	int assign[X86_PMC_IDX_MAX];
++	int n, ret;
++
++	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
++
++	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
++		cpuc->txn_flags = 0;
++		return 0;
++	}
++
++	n = cpuc->n_events;
++
++	if (!x86_pmu_initialized())
++		return -EAGAIN;
++
++	ret = x86_pmu.schedule_events(cpuc, n, assign);
++	if (ret)
++		return ret;
++
++	/*
++	 * copy new assignment, now we know it is possible
++	 * will be used by hw_perf_enable()
++	 */
++	memcpy(cpuc->assign, assign, n*sizeof(int));
++
++	cpuc->txn_flags = 0;
++	perf_pmu_enable(pmu);
++	return 0;
++}
++/*
++ * a fake_cpuc is used to validate event groups. Due to
++ * the extra reg logic, we need to also allocate a fake
++ * per_core and per_cpu structure. Otherwise, group events
++ * using extra reg may conflict without the kernel being
++ * able to catch this when the last event gets added to
++ * the group.
++ */
++static void free_fake_cpuc(struct cpu_hw_events *cpuc)
++{
++	intel_cpuc_finish(cpuc);
++	kfree(cpuc);
++}
++
++static struct cpu_hw_events *allocate_fake_cpuc(void)
++{
++	struct cpu_hw_events *cpuc;
++	int cpu = raw_smp_processor_id();
++
++	cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
++	if (!cpuc)
++		return ERR_PTR(-ENOMEM);
++	cpuc->is_fake = 1;
++
++	if (intel_cpuc_prepare(cpuc, cpu))
++		goto error;
++
++	return cpuc;
++error:
++	free_fake_cpuc(cpuc);
++	return ERR_PTR(-ENOMEM);
++}
++
++/*
++ * validate that we can schedule this event
++ */
++static int validate_event(struct perf_event *event)
++{
++	struct cpu_hw_events *fake_cpuc;
++	struct event_constraint *c;
++	int ret = 0;
++
++	fake_cpuc = allocate_fake_cpuc();
++	if (IS_ERR(fake_cpuc))
++		return PTR_ERR(fake_cpuc);
++
++	c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
++
++	if (!c || !c->weight)
++		ret = -EINVAL;
++
++	if (x86_pmu.put_event_constraints)
++		x86_pmu.put_event_constraints(fake_cpuc, event);
++
++	free_fake_cpuc(fake_cpuc);
++
++	return ret;
++}
++
++/*
++ * validate a single event group
++ *
++ * validation include:
++ *	- check events are compatible which each other
++ *	- events do not compete for the same counter
++ *	- number of events <= number of counters
++ *
++ * validation ensures the group can be loaded onto the
++ * PMU if it was the only group available.
++ */
++static int validate_group(struct perf_event *event)
++{
++	struct perf_event *leader = event->group_leader;
++	struct cpu_hw_events *fake_cpuc;
++	int ret = -EINVAL, n;
++
++	fake_cpuc = allocate_fake_cpuc();
++	if (IS_ERR(fake_cpuc))
++		return PTR_ERR(fake_cpuc);
++	/*
++	 * the event is not yet connected with its
++	 * siblings therefore we must first collect
++	 * existing siblings, then add the new event
++	 * before we can simulate the scheduling
++	 */
++	n = collect_events(fake_cpuc, leader, true);
++	if (n < 0)
++		goto out;
++
++	fake_cpuc->n_events = n;
++	n = collect_events(fake_cpuc, event, false);
++	if (n < 0)
++		goto out;
++
++	fake_cpuc->n_events = n;
++
++	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
++
++out:
++	free_fake_cpuc(fake_cpuc);
++	return ret;
++}
++
++static int x86_pmu_event_init(struct perf_event *event)
++{
++	struct pmu *tmp;
++	int err;
++
++	switch (event->attr.type) {
++	case PERF_TYPE_RAW:
++	case PERF_TYPE_HARDWARE:
++	case PERF_TYPE_HW_CACHE:
++		break;
++
++	default:
++		return -ENOENT;
++	}
++
++	err = __x86_pmu_event_init(event);
++	if (!err) {
++		/*
++		 * we temporarily connect event to its pmu
++		 * such that validate_group() can classify
++		 * it as an x86 event using is_x86_event()
++		 */
++		tmp = event->pmu;
++		event->pmu = &pmu;
++
++		if (event->group_leader != event)
++			err = validate_group(event);
++		else
++			err = validate_event(event);
++
++		event->pmu = tmp;
++	}
++	if (err) {
++		if (event->destroy)
++			event->destroy(event);
++	}
++
++	if (READ_ONCE(x86_pmu.attr_rdpmc) &&
++	    !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
++		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
++
++	return err;
++}
++
++static void refresh_pce(void *ignored)
++{
++	load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
++}
++
++static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
++{
++	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
++		return;
++
++	/*
++	 * This function relies on not being called concurrently in two
++	 * tasks in the same mm.  Otherwise one task could observe
++	 * perf_rdpmc_allowed > 1 and return all the way back to
++	 * userspace with CR4.PCE clear while another task is still
++	 * doing on_each_cpu_mask() to propagate CR4.PCE.
++	 *
++	 * For now, this can't happen because all callers hold mmap_sem
++	 * for write.  If this changes, we'll need a different solution.
++	 */
++	lockdep_assert_held_exclusive(&mm->mmap_sem);
++
++	if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
++		on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
++}
++
++static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
++{
++
++	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
++		return;
++
++	if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
++		on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
++}
++
++static int x86_pmu_event_idx(struct perf_event *event)
++{
++	int idx = event->hw.idx;
++
++	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
++		return 0;
++
++	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
++		idx -= INTEL_PMC_IDX_FIXED;
++		idx |= 1 << 30;
++	}
++
++	return idx + 1;
++}
++
++static ssize_t get_attr_rdpmc(struct device *cdev,
++			      struct device_attribute *attr,
++			      char *buf)
++{
++	return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
++}
++
++static ssize_t set_attr_rdpmc(struct device *cdev,
++			      struct device_attribute *attr,
++			      const char *buf, size_t count)
++{
++	unsigned long val;
++	ssize_t ret;
++
++	ret = kstrtoul(buf, 0, &val);
++	if (ret)
++		return ret;
++
++	if (val > 2)
++		return -EINVAL;
++
++	if (x86_pmu.attr_rdpmc_broken)
++		return -ENOTSUPP;
++
++	if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
++		/*
++		 * Changing into or out of always available, aka
++		 * perf-event-bypassing mode.  This path is extremely slow,
++		 * but only root can trigger it, so it's okay.
++		 */
++		if (val == 2)
++			static_branch_inc(&rdpmc_always_available_key);
++		else
++			static_branch_dec(&rdpmc_always_available_key);
++		on_each_cpu(refresh_pce, NULL, 1);
++	}
++
++	x86_pmu.attr_rdpmc = val;
++
++	return count;
++}
++
++static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
++
++static struct attribute *x86_pmu_attrs[] = {
++	&dev_attr_rdpmc.attr,
++	NULL,
++};
++
++static struct attribute_group x86_pmu_attr_group = {
++	.attrs = x86_pmu_attrs,
++};
++
++static ssize_t max_precise_show(struct device *cdev,
++				  struct device_attribute *attr,
++				  char *buf)
++{
++	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
++}
++
++static DEVICE_ATTR_RO(max_precise);
++
++static struct attribute *x86_pmu_caps_attrs[] = {
++	&dev_attr_max_precise.attr,
++	NULL
++};
++
++static struct attribute_group x86_pmu_caps_group = {
++	.name = "caps",
++	.attrs = x86_pmu_caps_attrs,
++};
++
++static const struct attribute_group *x86_pmu_attr_groups[] = {
++	&x86_pmu_attr_group,
++	&x86_pmu_format_group,
++	&x86_pmu_events_group,
++	&x86_pmu_caps_group,
++	NULL,
++};
++
++static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
++{
++	if (x86_pmu.sched_task)
++		x86_pmu.sched_task(ctx, sched_in);
++}
++
++void perf_check_microcode(void)
++{
++	if (x86_pmu.check_microcode)
++		x86_pmu.check_microcode();
++}
++
++static int x86_pmu_check_period(struct perf_event *event, u64 value)
++{
++	if (x86_pmu.check_period && x86_pmu.check_period(event, value))
++		return -EINVAL;
++
++	if (value && x86_pmu.limit_period) {
++		if (x86_pmu.limit_period(event, value) > value)
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++static struct pmu pmu = {
++	.pmu_enable		= x86_pmu_enable,
++	.pmu_disable		= x86_pmu_disable,
++
++	.attr_groups		= x86_pmu_attr_groups,
++
++	.event_init		= x86_pmu_event_init,
++
++	.event_mapped		= x86_pmu_event_mapped,
++	.event_unmapped		= x86_pmu_event_unmapped,
++
++	.add			= x86_pmu_add,
++	.del			= x86_pmu_del,
++	.start			= x86_pmu_start,
++	.stop			= x86_pmu_stop,
++	.read			= x86_pmu_read,
++
++	.start_txn		= x86_pmu_start_txn,
++	.cancel_txn		= x86_pmu_cancel_txn,
++	.commit_txn		= x86_pmu_commit_txn,
++
++	.event_idx		= x86_pmu_event_idx,
++	.sched_task		= x86_pmu_sched_task,
++	.task_ctx_size          = sizeof(struct x86_perf_task_context),
++	.check_period		= x86_pmu_check_period,
++};
++
++void arch_perf_update_userpage(struct perf_event *event,
++			       struct perf_event_mmap_page *userpg, u64 now)
++{
++	struct cyc2ns_data data;
++	u64 offset;
++
++	userpg->cap_user_time = 0;
++	userpg->cap_user_time_zero = 0;
++	userpg->cap_user_rdpmc =
++		!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
++	userpg->pmc_width = x86_pmu.cntval_bits;
++
++	if (!using_native_sched_clock() || !sched_clock_stable())
++		return;
++
++	cyc2ns_read_begin(&data);
++
++	offset = data.cyc2ns_offset + __sched_clock_offset;
++
++	/*
++	 * Internal timekeeping for enabled/running/stopped times
++	 * is always in the local_clock domain.
++	 */
++	userpg->cap_user_time = 1;
++	userpg->time_mult = data.cyc2ns_mul;
++	userpg->time_shift = data.cyc2ns_shift;
++	userpg->time_offset = offset - now;
++
++	/*
++	 * cap_user_time_zero doesn't make sense when we're using a different
++	 * time base for the records.
++	 */
++	if (!event->attr.use_clockid) {
++		userpg->cap_user_time_zero = 1;
++		userpg->time_zero = offset;
++	}
++
++	cyc2ns_read_end();
++}
++
++void
++perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
++{
++	struct unwind_state state;
++	unsigned long addr;
++
++	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++		/* TODO: We don't support guest os callchain now */
++		return;
++	}
++
++	if (perf_callchain_store(entry, regs->ip))
++		return;
++
++	for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
++	     unwind_next_frame(&state)) {
++		addr = unwind_get_return_address(&state);
++		if (!addr || perf_callchain_store(entry, addr))
++			return;
++	}
++}
++
++static inline int
++valid_user_frame(const void __user *fp, unsigned long size)
++{
++	return (__range_not_ok(fp, size, TASK_SIZE) == 0);
++}
++
++static unsigned long get_segment_base(unsigned int segment)
++{
++	struct desc_struct *desc;
++	unsigned int idx = segment >> 3;
++
++	if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
++#ifdef CONFIG_MODIFY_LDT_SYSCALL
++		struct ldt_struct *ldt;
++
++		/* IRQs are off, so this synchronizes with smp_store_release */
++		ldt = READ_ONCE(current->active_mm->context.ldt);
++		if (!ldt || idx >= ldt->nr_entries)
++			return 0;
++
++		desc = &ldt->entries[idx];
++#else
++		return 0;
++#endif
++	} else {
++		if (idx >= GDT_ENTRIES)
++			return 0;
++
++		desc = raw_cpu_ptr(gdt_page.gdt) + idx;
++	}
++
++	return get_desc_base(desc);
++}
++
++#ifdef CONFIG_IA32_EMULATION
++
++#include <linux/compat.h>
++
++static inline int
++perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
++{
++	/* 32-bit process in 64-bit kernel. */
++	unsigned long ss_base, cs_base;
++	struct stack_frame_ia32 frame;
++	const void __user *fp;
++
++	if (!test_thread_flag(TIF_IA32))
++		return 0;
++
++	cs_base = get_segment_base(regs->cs);
++	ss_base = get_segment_base(regs->ss);
++
++	fp = compat_ptr(ss_base + regs->bp);
++	pagefault_disable();
++	while (entry->nr < entry->max_stack) {
++		unsigned long bytes;
++		frame.next_frame     = 0;
++		frame.return_address = 0;
++
++		if (!valid_user_frame(fp, sizeof(frame)))
++			break;
++
++		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
++		if (bytes != 0)
++			break;
++		bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
++		if (bytes != 0)
++			break;
++
++		perf_callchain_store(entry, cs_base + frame.return_address);
++		fp = compat_ptr(ss_base + frame.next_frame);
++	}
++	pagefault_enable();
++	return 1;
++}
++#else
++static inline int
++perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
++{
++    return 0;
++}
++#endif
++
++void
++perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
++{
++	struct stack_frame frame;
++	const unsigned long __user *fp;
++
++	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++		/* TODO: We don't support guest os callchain now */
++		return;
++	}
++
++	/*
++	 * We don't know what to do with VM86 stacks.. ignore them for now.
++	 */
++	if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
++		return;
++
++	fp = (unsigned long __user *)regs->bp;
++
++	perf_callchain_store(entry, regs->ip);
++
++	if (!nmi_uaccess_okay())
++		return;
++
++	if (perf_callchain_user32(regs, entry))
++		return;
++
++	pagefault_disable();
++	while (entry->nr < entry->max_stack) {
++		unsigned long bytes;
++
++		frame.next_frame	     = NULL;
++		frame.return_address = 0;
++
++		if (!valid_user_frame(fp, sizeof(frame)))
++			break;
++
++		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
++		if (bytes != 0)
++			break;
++		bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
++		if (bytes != 0)
++			break;
++
++		perf_callchain_store(entry, frame.return_address);
++		fp = (void __user *)frame.next_frame;
++	}
++	pagefault_enable();
++}
++
++/*
++ * Deal with code segment offsets for the various execution modes:
++ *
++ *   VM86 - the good olde 16 bit days, where the linear address is
++ *          20 bits and we use regs->ip + 0x10 * regs->cs.
++ *
++ *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
++ *          to figure out what the 32bit base address is.
++ *
++ *    X32 - has TIF_X32 set, but is running in x86_64
++ *
++ * X86_64 - CS,DS,SS,ES are all zero based.
++ */
++static unsigned long code_segment_base(struct pt_regs *regs)
++{
++	/*
++	 * For IA32 we look at the GDT/LDT segment base to convert the
++	 * effective IP to a linear address.
++	 */
++
++#ifdef CONFIG_X86_32
++	/*
++	 * If we are in VM86 mode, add the segment offset to convert to a
++	 * linear address.
++	 */
++	if (regs->flags & X86_VM_MASK)
++		return 0x10 * regs->cs;
++
++	if (user_mode(regs) && regs->cs != __USER_CS)
++		return get_segment_base(regs->cs);
++#else
++	if (user_mode(regs) && !user_64bit_mode(regs) &&
++	    regs->cs != __USER32_CS)
++		return get_segment_base(regs->cs);
++#endif
++	return 0;
++}
++
++unsigned long perf_instruction_pointer(struct pt_regs *regs)
++{
++	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
++		return perf_guest_cbs->get_guest_ip();
++
++	return regs->ip + code_segment_base(regs);
++}
++
++unsigned long perf_misc_flags(struct pt_regs *regs)
++{
++	int misc = 0;
++
++	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
++		if (perf_guest_cbs->is_user_mode())
++			misc |= PERF_RECORD_MISC_GUEST_USER;
++		else
++			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
++	} else {
++		if (user_mode(regs))
++			misc |= PERF_RECORD_MISC_USER;
++		else
++			misc |= PERF_RECORD_MISC_KERNEL;
++	}
++
++	if (regs->flags & PERF_EFLAGS_EXACT)
++		misc |= PERF_RECORD_MISC_EXACT_IP;
++
++	return misc;
++}
++
++void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
++{
++	cap->version		= x86_pmu.version;
++	cap->num_counters_gp	= x86_pmu.num_counters;
++	cap->num_counters_fixed	= x86_pmu.num_counters_fixed;
++	cap->bit_width_gp	= x86_pmu.cntval_bits;
++	cap->bit_width_fixed	= x86_pmu.cntval_bits;
++	cap->events_mask	= (unsigned int)x86_pmu.events_maskl;
++	cap->events_mask_len	= x86_pmu.events_mask_len;
++}
++EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
+diff -uprN kernel/arch/x86/include/asm/apic.h kernel_new/arch/x86/include/asm/apic.h
+--- kernel/arch/x86/include/asm/apic.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/apic.h	2021-04-01 18:28:07.651863292 +0800
+@@ -439,7 +439,17 @@ static inline void apic_set_eoi_write(vo
+ 
+ extern void apic_ack_irq(struct irq_data *data);
+ 
++#ifdef CONFIG_IPIPE
++#ifdef CONFIG_SMP
++struct irq_data;
++void move_xxapic_irq(struct irq_data *data);
++#endif
++#define ack_APIC_irq() do { } while(0)
++static inline void __ack_APIC_irq(void)
++#else /* !CONFIG_IPIPE */
++#define __ack_APIC_irq() ack_APIC_irq()
+ static inline void ack_APIC_irq(void)
++#endif /* CONFIG_IPIPE */
+ {
+ 	/*
+ 	 * ack_APIC_irq() actually gets compiled as a single instruction
+diff -uprN kernel/arch/x86/include/asm/debugreg.h kernel_new/arch/x86/include/asm/debugreg.h
+--- kernel/arch/x86/include/asm/debugreg.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/debugreg.h	2021-04-01 18:28:07.651863292 +0800
+@@ -94,7 +94,7 @@ extern void aout_dump_debugregs(struct u
+ 
+ extern void hw_breakpoint_restore(void);
+ 
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE)
+ DECLARE_PER_CPU(int, debug_stack_usage);
+ static inline void debug_stack_usage_inc(void)
+ {
+diff -uprN kernel/arch/x86/include/asm/desc.h kernel_new/arch/x86/include/asm/desc.h
+--- kernel/arch/x86/include/asm/desc.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/desc.h	2021-04-01 18:28:07.651863292 +0800
+@@ -309,7 +309,7 @@ static inline void force_reload_TR(void)
+  */
+ static inline void refresh_tss_limit(void)
+ {
+-	DEBUG_LOCKS_WARN_ON(preemptible());
++	DEBUG_LOCKS_WARN_ON(!hard_irqs_disabled() && preemptible());
+ 
+ 	if (unlikely(this_cpu_read(__tss_limit_invalid)))
+ 		force_reload_TR();
+@@ -326,7 +326,7 @@ static inline void refresh_tss_limit(voi
+  */
+ static inline void invalidate_tss_limit(void)
+ {
+-	DEBUG_LOCKS_WARN_ON(preemptible());
++	DEBUG_LOCKS_WARN_ON(!hard_irqs_disabled() && preemptible());
+ 
+ 	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
+ 		force_reload_TR();
+@@ -391,7 +391,7 @@ void alloc_intr_gate(unsigned int n, con
+ 
+ extern unsigned long system_vectors[];
+ 
+-#ifdef CONFIG_X86_64
++#if defined(CONFIG_X86_64) && !defined(CONFIG_IPIPE)
+ DECLARE_PER_CPU(u32, debug_idt_ctr);
+ static inline bool is_debug_idt_enabled(void)
+ {
+diff -uprN kernel/arch/x86/include/asm/fpu/internal.h kernel_new/arch/x86/include/asm/fpu/internal.h
+--- kernel/arch/x86/include/asm/fpu/internal.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/fpu/internal.h	2021-04-01 18:28:07.652863290 +0800
+@@ -607,4 +607,24 @@ static inline void xsetbv(u32 index, u64
+ 		     : : "a" (eax), "d" (edx), "c" (index));
+ }
+ 
++DECLARE_PER_CPU(bool, in_kernel_fpu);
++
++static inline void kernel_fpu_disable(void)
++{
++	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
++	this_cpu_write(in_kernel_fpu, true);
++}
++
++static inline void kernel_fpu_enable(void)
++{
++	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
++	this_cpu_write(in_kernel_fpu, false);
++}
++
++static inline bool kernel_fpu_disabled(void)
++{
++	return this_cpu_read(in_kernel_fpu);
++}
++
++
+ #endif /* _ASM_X86_FPU_INTERNAL_H */
+diff -uprN kernel/arch/x86/include/asm/i8259.h kernel_new/arch/x86/include/asm/i8259.h
+--- kernel/arch/x86/include/asm/i8259.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/i8259.h	2021-04-01 18:28:07.652863290 +0800
+@@ -26,7 +26,7 @@ extern unsigned int cached_irq_mask;
+ #define SLAVE_ICW4_DEFAULT	0x01
+ #define PIC_ICW4_AEOI		2
+ 
+-extern raw_spinlock_t i8259A_lock;
++IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock);
+ 
+ /* the PIC may need a careful delay on some platforms, hence specific calls */
+ static inline unsigned char inb_pic(unsigned int port)
+diff -uprN kernel/arch/x86/include/asm/ipipe_base.h kernel_new/arch/x86/include/asm/ipipe_base.h
+--- kernel/arch/x86/include/asm/ipipe_base.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/include/asm/ipipe_base.h	2021-04-01 18:28:07.652863290 +0800
+@@ -0,0 +1,156 @@
++/*   -*- linux-c -*-
++ *   arch/x86/include/asm/ipipe_base.h
++ *
++ *   Copyright (C) 2007-2012 Philippe Gerum.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __X86_IPIPE_BASE_H
++#define __X86_IPIPE_BASE_H
++
++#include <asm/irq_vectors.h>
++#include <asm/bitsperlong.h>
++
++#ifdef CONFIG_X86_32
++/* 32 from IDT + iret_error + mayday trap */
++#define IPIPE_TRAP_MAYDAY	33	/* Internal recovery trap */
++#define IPIPE_NR_FAULTS		34
++#else
++/* 32 from IDT + mayday trap */
++#define IPIPE_TRAP_MAYDAY	32	/* Internal recovery trap */
++#define IPIPE_NR_FAULTS		33
++#endif
++
++#ifdef CONFIG_X86_LOCAL_APIC
++/*
++ * Special APIC interrupts are mapped above the last defined external
++ * IRQ number.
++ */
++#define nr_apic_vectors	        (NR_VECTORS - FIRST_SYSTEM_VECTOR)
++#define IPIPE_FIRST_APIC_IRQ	NR_IRQS
++#define IPIPE_HRTIMER_IPI	ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR)
++#ifdef CONFIG_SMP
++#define IPIPE_RESCHEDULE_IPI	ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR)
++#define IPIPE_CRITICAL_IPI	ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
++#endif /* CONFIG_SMP */
++#define IPIPE_NR_XIRQS		(NR_IRQS + nr_apic_vectors)
++#define ipipe_apic_irq_vector(irq)  ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
++#define ipipe_apic_vector_irq(vec)  ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
++#else
++#define IPIPE_NR_XIRQS		NR_IRQS
++#endif /* !CONFIG_X86_LOCAL_APIC */
++
++#ifndef __ASSEMBLY__
++
++#include <asm/apicdef.h>
++
++extern unsigned int cpu_khz;
++
++static inline const char *ipipe_clock_name(void)
++{
++	return "tsc";
++}
++
++#define __ipipe_cpu_freq	({ u64 __freq = 1000ULL * cpu_khz; __freq; })
++#define __ipipe_hrclock_freq	__ipipe_cpu_freq
++
++#ifdef CONFIG_X86_32
++
++#define ipipe_read_tsc(t)				\
++	__asm__ __volatile__("rdtsc" : "=A"(t))
++
++#define ipipe_tsc2ns(t)					\
++({							\
++	unsigned long long delta = (t) * 1000000ULL;	\
++	unsigned long long freq = __ipipe_hrclock_freq;	\
++	do_div(freq, 1000);				\
++	do_div(delta, (unsigned)freq + 1);		\
++	(unsigned long)delta;				\
++})
++
++#define ipipe_tsc2us(t)					\
++({							\
++	unsigned long long delta = (t) * 1000ULL;	\
++	unsigned long long freq = __ipipe_hrclock_freq;	\
++	do_div(freq, 1000);				\
++	do_div(delta, (unsigned)freq + 1);		\
++	(unsigned long)delta;				\
++})
++
++static inline unsigned long __ipipe_ffnz(unsigned long ul)
++{
++	__asm__("bsrl %1, %0":"=r"(ul) : "r"(ul));
++	return ul;
++}
++
++#else  /* X86_64 */
++
++#define ipipe_read_tsc(t)  do {		\
++	unsigned int __a,__d;			\
++	asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
++	(t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
++} while(0)
++
++#define ipipe_tsc2ns(t)	(((t) * 1000UL) / (__ipipe_hrclock_freq / 1000000UL))
++#define ipipe_tsc2us(t)	((t) / (__ipipe_hrclock_freq / 1000000UL))
++
++static inline unsigned long __ipipe_ffnz(unsigned long ul)
++{
++      __asm__("bsrq %1, %0":"=r"(ul)
++	      :	"rm"(ul));
++      return ul;
++}
++
++#ifdef CONFIG_IA32_EMULATION
++#define ipipe_root_nr_syscalls(ti)	\
++	((ti->status & TS_COMPAT) ? IA32_NR_syscalls : NR_syscalls)
++#endif /* CONFIG_IA32_EMULATION */
++
++#endif	/* X86_64 */
++
++struct pt_regs;
++struct irq_desc;
++struct ipipe_vm_notifier;
++
++static inline unsigned __ipipe_get_irq_vector(int irq)
++{
++#ifdef CONFIG_X86_IO_APIC
++	unsigned int __ipipe_get_ioapic_irq_vector(int irq);
++	return __ipipe_get_ioapic_irq_vector(irq);
++#elif defined(CONFIG_X86_LOCAL_APIC)
++	return irq >= IPIPE_FIRST_APIC_IRQ ?
++		ipipe_apic_irq_vector(irq) : ISA_IRQ_VECTOR(irq);
++#else
++	return ISA_IRQ_VECTOR(irq);
++#endif
++}
++
++void ipipe_hrtimer_interrupt(void);
++
++void ipipe_reschedule_interrupt(void);
++
++void ipipe_critical_interrupt(void);
++
++int __ipipe_handle_irq(struct pt_regs *regs);
++
++void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy);
++
++extern int __ipipe_hrtimer_irq;
++
++#endif	/* !__ASSEMBLY__ */
++
++#endif	/* !__X86_IPIPE_BASE_H */
+diff -uprN kernel/arch/x86/include/asm/ipipe.h kernel_new/arch/x86/include/asm/ipipe.h
+--- kernel/arch/x86/include/asm/ipipe.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/include/asm/ipipe.h	2021-04-01 18:28:07.652863290 +0800
+@@ -0,0 +1,70 @@
++/*   -*- linux-c -*-
++ *   arch/x86/include/asm/ipipe.h
++ *
++ *   Copyright (C) 2007 Philippe Gerum.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __X86_IPIPE_H
++#define __X86_IPIPE_H
++
++#ifdef CONFIG_IPIPE
++
++#define IPIPE_CORE_RELEASE	9
++
++struct ipipe_domain;
++
++struct ipipe_arch_sysinfo {
++};
++
++#define ipipe_processor_id()	raw_smp_processor_id()
++
++/* Private interface -- Internal use only */
++
++#define __ipipe_early_core_setup()	do { } while(0)
++
++#define __ipipe_enable_irq(irq)		irq_to_desc(irq)->chip->enable(irq)
++#define __ipipe_disable_irq(irq)	irq_to_desc(irq)->chip->disable(irq)
++
++#ifdef CONFIG_SMP
++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
++#else
++#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
++#endif
++
++void __ipipe_enable_pipeline(void);
++
++#define __ipipe_root_tick_p(regs)	((regs)->flags & X86_EFLAGS_IF)
++
++#define ipipe_notify_root_preemption()	__ipipe_notify_vm_preemption()
++
++#endif /* CONFIG_IPIPE */
++
++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
++#define __ipipe_move_root_irq(__desc)					\
++	do {								\
++		if (!IS_ERR_OR_NULL(__desc)) {				\
++			struct irq_chip *__chip = irq_desc_get_chip(__desc); \
++			if (__chip->irq_move)				\
++				__chip->irq_move(irq_desc_get_irq_data(__desc)); \
++		}							\
++	} while (0)
++#else /* !(CONFIG_SMP && CONFIG_IPIPE) */
++#define __ipipe_move_root_irq(irq)	do { } while (0)
++#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
++
++#endif	/* !__X86_IPIPE_H */
+diff -uprN kernel/arch/x86/include/asm/irqflags.h kernel_new/arch/x86/include/asm/irqflags.h
+--- kernel/arch/x86/include/asm/irqflags.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/irqflags.h	2021-04-01 18:28:07.652863290 +0800
+@@ -8,6 +8,10 @@
+ 
+ #include <asm/nospec-branch.h>
+ 
++#include <linux/ipipe_trace.h>
++#include <linux/compiler.h>
++#include <asm-generic/ipipe.h>
++
+ /* Provide __cpuidle; we can't safely include <linux/cpu.h> */
+ #define __cpuidle __attribute__((__section__(".cpuidle.text")))
+ 
+@@ -66,14 +70,76 @@ static inline __cpuidle void native_halt
+ 	asm volatile("hlt": : :"memory");
+ }
+ 
++static inline int native_irqs_disabled(void)
++{
++	unsigned long flags = native_save_fl();
++
++	return !(flags & X86_EFLAGS_IF);
++}
++
+ #endif
+ 
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
++#define HARD_COND_ENABLE_INTERRUPTS
++#define HARD_COND_DISABLE_INTERRUPTS
+ #else
+ #ifndef __ASSEMBLY__
+ #include <linux/types.h>
+ 
++#ifdef CONFIG_IPIPE
++
++void __ipipe_halt_root(int use_mwait);
++
++static inline notrace unsigned long arch_local_save_flags(void)
++{
++	unsigned long flags;
++
++	flags = (!ipipe_test_root()) << 9;
++	barrier();
++	return flags;
++}
++
++static inline notrace void arch_local_irq_restore(unsigned long flags)
++{
++	barrier();
++	ipipe_restore_root(!(flags & X86_EFLAGS_IF));
++}
++
++static inline notrace void arch_local_irq_disable(void)
++{
++	ipipe_stall_root();
++	barrier();
++}
++
++static inline notrace void arch_local_irq_enable(void)
++{
++	barrier();
++	ipipe_unstall_root();
++}
++
++static inline __cpuidle void arch_safe_halt(void)
++{
++	barrier();
++	__ipipe_halt_root(0);
++}
++
++/* Merge virtual+real interrupt mask bits into a single word. */
++static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real)
++{
++	return (real & ~(1L << 31)) | ((unsigned long)(virt != 0) << 31);
++}
++
++/* Converse operation of arch_mangle_irq_bits() */
++static inline int arch_demangle_irq_bits(unsigned long *x)
++{
++	int virt = (*x & (1L << 31)) != 0;
++	*x &= ~(1L << 31);
++	return virt;
++}
++
++#else /* !CONFIG_IPIPE */
++
+ static inline notrace unsigned long arch_local_save_flags(void)
+ {
+ 	return native_save_fl();
+@@ -103,6 +169,8 @@ static inline __cpuidle void arch_safe_h
+ 	native_safe_halt();
+ }
+ 
++#endif /* !CONFIG_IPIPE */
++
+ /*
+  * Used when interrupts are already enabled or to
+  * shutdown the processor:
+@@ -126,6 +194,14 @@ static inline notrace unsigned long arch
+ #define ENABLE_INTERRUPTS(x)	sti
+ #define DISABLE_INTERRUPTS(x)	cli
+ 
++#ifdef CONFIG_IPIPE
++#define HARD_COND_ENABLE_INTERRUPTS	sti
++#define HARD_COND_DISABLE_INTERRUPTS	cli
++#else /* !CONFIG_IPIPE */
++#define HARD_COND_ENABLE_INTERRUPTS
++#define HARD_COND_DISABLE_INTERRUPTS
++#endif /* !CONFIG_IPIPE */
++
+ #ifdef CONFIG_X86_64
+ #define SWAPGS	swapgs
+ /*
+@@ -174,40 +250,156 @@ static inline int arch_irqs_disabled(voi
+ 
+ 	return arch_irqs_disabled_flags(flags);
+ }
++
++#ifdef CONFIG_IPIPE
++
++static inline unsigned long hard_local_irq_save_notrace(void)
++{
++	unsigned long flags;
++
++	flags = native_save_fl();
++	native_irq_disable();
++
++	return flags;
++}
++
++static inline void hard_local_irq_restore_notrace(unsigned long flags)
++{
++	native_restore_fl(flags);
++}
++
++static inline void hard_local_irq_disable_notrace(void)
++{
++	native_irq_disable();
++}
++
++static inline void hard_local_irq_enable_notrace(void)
++{
++	native_irq_enable();
++}
++
++static inline int hard_irqs_disabled(void)
++{
++	return native_irqs_disabled();
++}
++
++#define hard_irqs_disabled_flags(flags)	arch_irqs_disabled_flags(flags)
++
++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
++
++static inline void hard_local_irq_disable(void)
++{
++	if (!native_irqs_disabled()) {
++		native_irq_disable();
++		ipipe_trace_begin(0x80000000);
++	}
++}
++
++static inline void hard_local_irq_enable(void)
++{
++	if (native_irqs_disabled()) {
++		ipipe_trace_end(0x80000000);
++		native_irq_enable();
++	}
++}
++
++static inline unsigned long hard_local_irq_save(void)
++{
++	unsigned long flags;
++
++	flags = native_save_fl();
++	if (flags & X86_EFLAGS_IF) {
++		native_irq_disable();
++		ipipe_trace_begin(0x80000001);
++	}
++
++	return flags;
++}
++
++static inline void hard_local_irq_restore(unsigned long flags)
++{
++	if (flags & X86_EFLAGS_IF)
++		ipipe_trace_end(0x80000001);
++
++	native_restore_fl(flags);
++}
++
++#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
++
++static inline unsigned long hard_local_irq_save(void)
++{
++	return hard_local_irq_save_notrace();
++}
++
++static inline void hard_local_irq_restore(unsigned long flags)
++{
++	hard_local_irq_restore_notrace(flags);
++}
++
++static inline void hard_local_irq_enable(void)
++{
++	hard_local_irq_enable_notrace();
++}
++
++static inline void hard_local_irq_disable(void)
++{
++	hard_local_irq_disable_notrace();
++}
++
++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
++
++static inline unsigned long hard_local_save_flags(void)
++{
++	return native_save_fl();
++}
++
++#endif /* CONFIG_IPIPE */
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ #ifdef __ASSEMBLY__
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ #  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk;
++#ifdef CONFIG_IPIPE
++#  define TRACE_IRQS_ON_VIRT    call trace_hardirqs_on_virt_thunk;
++#else
++#  define TRACE_IRQS_ON_VIRT    TRACE_IRQS_ON
++#endif
+ #  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk;
+ #else
+ #  define TRACE_IRQS_ON
++#  define TRACE_IRQS_ON_VIRT
+ #  define TRACE_IRQS_OFF
+ #endif
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ #  ifdef CONFIG_X86_64
+-#    define LOCKDEP_SYS_EXIT		call lockdep_sys_exit_thunk
++#    define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
+ #    define LOCKDEP_SYS_EXIT_IRQ \
+ 	TRACE_IRQS_ON; \
+ 	sti; \
+ 	call lockdep_sys_exit_thunk; \
+ 	cli; \
+ 	TRACE_IRQS_OFF;
++
+ #  else
+-#    define LOCKDEP_SYS_EXIT \
++#    define LOCKDEP_SYS_EXIT			\
+ 	pushl %eax;				\
+ 	pushl %ecx;				\
+ 	pushl %edx;				\
++	pushfl;					\
++	sti;					\
+ 	call lockdep_sys_exit;			\
++	popfl;					\
+ 	popl %edx;				\
+ 	popl %ecx;				\
+ 	popl %eax;
++
+ #    define LOCKDEP_SYS_EXIT_IRQ
+ #  endif
+ #else
+ #  define LOCKDEP_SYS_EXIT
+ #  define LOCKDEP_SYS_EXIT_IRQ
+ #endif
+-#endif /* __ASSEMBLY__ */
+ 
++#endif /* __ASSEMBLY__ */
+ #endif
+diff -uprN kernel/arch/x86/include/asm/irq_vectors.h kernel_new/arch/x86/include/asm/irq_vectors.h
+--- kernel/arch/x86/include/asm/irq_vectors.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/irq_vectors.h	2021-04-01 18:28:07.652863290 +0800
+@@ -106,13 +106,18 @@
+ 
+ #define LOCAL_TIMER_VECTOR		0xec
+ 
+-#define NR_VECTORS			 256
++/* Interrupt pipeline IPIs */
++#define IPIPE_HRTIMER_VECTOR		0xeb
++#define IPIPE_RESCHEDULE_VECTOR		0xea
++#define IPIPE_CRITICAL_VECTOR		0xe9
++
++/*
++ * I-pipe: Lowest vector number which may be assigned to a special
++ * APIC IRQ. We must know this at build time.
++ */
++#define FIRST_SYSTEM_VECTOR		IPIPE_CRITICAL_VECTOR
+ 
+-#ifdef CONFIG_X86_LOCAL_APIC
+-#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+-#else
+-#define FIRST_SYSTEM_VECTOR		NR_VECTORS
+-#endif
++#define NR_VECTORS			 256
+ 
+ /*
+  * Size the maximum number of interrupts.
+diff -uprN kernel/arch/x86/include/asm/mmu_context.h kernel_new/arch/x86/include/asm/mmu_context.h
+--- kernel/arch/x86/include/asm/mmu_context.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/mmu_context.h	2021-04-01 18:28:07.652863290 +0800
+@@ -27,16 +27,16 @@ static inline void paravirt_activate_mm(
+ 
+ DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+ 
+-static inline void load_mm_cr4(struct mm_struct *mm)
++static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
+ {
+ 	if (static_branch_unlikely(&rdpmc_always_available_key) ||
+ 	    atomic_read(&mm->context.perf_rdpmc_allowed))
+-		cr4_set_bits(X86_CR4_PCE);
++		cr4_set_bits_irqsoff(X86_CR4_PCE);
+ 	else
+-		cr4_clear_bits(X86_CR4_PCE);
++		cr4_clear_bits_irqsoff(X86_CR4_PCE);
+ }
+ #else
+-static inline void load_mm_cr4(struct mm_struct *mm) {}
++static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {}
+ #endif
+ 
+ #ifdef CONFIG_MODIFY_LDT_SYSCALL
+@@ -173,7 +173,8 @@ static inline void switch_ldt(struct mm_
+ 		load_mm_ldt(next);
+ #endif
+ 
+-	DEBUG_LOCKS_WARN_ON(preemptible());
++	DEBUG_LOCKS_WARN_ON(preemptible() &&
++			(!IS_ENABLED(CONFIG_IPIPE) || !hard_irqs_disabled()));
+ }
+ 
+ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+@@ -213,6 +214,9 @@ extern void switch_mm_irqs_off(struct mm
+ 			       struct task_struct *tsk);
+ #define switch_mm_irqs_off switch_mm_irqs_off
+ 
++#define ipipe_switch_mm_head(prev, next, tsk) \
++	switch_mm_irqs_off(prev, next, tsk)
++
+ #define activate_mm(prev, next)			\
+ do {						\
+ 	paravirt_activate_mm((prev), (next));	\
+diff -uprN kernel/arch/x86/include/asm/thread_info.h kernel_new/arch/x86/include/asm/thread_info.h
+--- kernel/arch/x86/include/asm/thread_info.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/thread_info.h	2021-04-01 18:28:07.652863290 +0800
+@@ -52,10 +52,15 @@
+ struct task_struct;
+ #include <asm/cpufeature.h>
+ #include <linux/atomic.h>
++#include <ipipe/thread_info.h>
+ 
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	u32			status;		/* thread synchronous flags */
++#ifdef CONFIG_IPIPE
++	unsigned long		ipipe_flags;
++	struct ipipe_threadinfo ipipe_data;
++#endif
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+@@ -165,6 +170,17 @@ struct thread_info {
+ #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
+ #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+ 
++/* ti->ipipe_flags */
++#define TIP_HEAD	0	/* Runs in head domain */
++#define TIP_NOTIFY	1	/* Notify head domain about kernel events */
++#define TIP_MAYDAY	2	/* MAYDAY call is pending */
++#define TIP_USERINTRET	3	/* Notify on IRQ/trap return to root userspace */
++
++#define _TIP_HEAD	(1 << TIP_HEAD)
++#define _TIP_NOTIFY	(1 << TIP_NOTIFY)
++#define _TIP_MAYDAY	(1 << TIP_MAYDAY)
++#define _TIP_USERINTRET	(1 << TIP_USERINTRET)
++
+ #define STACK_WARN		(THREAD_SIZE/8)
+ 
+ /*
+diff -uprN kernel/arch/x86/include/asm/tlbflush.h kernel_new/arch/x86/include/asm/tlbflush.h
+--- kernel/arch/x86/include/asm/tlbflush.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/tlbflush.h	2021-04-01 18:28:07.652863290 +0800
+@@ -304,26 +304,42 @@ static inline void __cr4_set(unsigned lo
+ }
+ 
+ /* Set in this cpu's CR4. */
+-static inline void cr4_set_bits(unsigned long mask)
++static inline void cr4_set_bits_irqsoff(unsigned long mask)
+ {
+-	unsigned long cr4, flags;
++	unsigned long cr4;
+ 
+-	local_irq_save(flags);
+ 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ 	if ((cr4 | mask) != cr4)
+ 		__cr4_set(cr4 | mask);
+-	local_irq_restore(flags);
+ }
+ 
+ /* Clear in this cpu's CR4. */
+-static inline void cr4_clear_bits(unsigned long mask)
++static inline void cr4_clear_bits_irqsoff(unsigned long mask)
+ {
+-	unsigned long cr4, flags;
++	unsigned long cr4;
+ 
+-	local_irq_save(flags);
+ 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ 	if ((cr4 & ~mask) != cr4)
+ 		__cr4_set(cr4 & ~mask);
++}
++
++/* Set in this cpu's CR4. */
++static inline void cr4_set_bits(unsigned long mask)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	cr4_set_bits_irqsoff(mask);
++	local_irq_restore(flags);
++}
++
++/* Clear in this cpu's CR4. */
++static inline void cr4_clear_bits(unsigned long mask)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	cr4_clear_bits_irqsoff(mask);
+ 	local_irq_restore(flags);
+ }
+ 
+diff -uprN kernel/arch/x86/include/asm/tsc.h kernel_new/arch/x86/include/asm/tsc.h
+--- kernel/arch/x86/include/asm/tsc.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/tsc.h	2021-04-01 18:28:07.652863290 +0800
+@@ -15,6 +15,8 @@
+  */
+ typedef unsigned long long cycles_t;
+ 
++extern struct clocksource clocksource_tsc;
++
+ extern unsigned int cpu_khz;
+ extern unsigned int tsc_khz;
+ 
+diff -uprN kernel/arch/x86/include/asm/uaccess.h kernel_new/arch/x86/include/asm/uaccess.h
+--- kernel/arch/x86/include/asm/uaccess.h	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/include/asm/uaccess.h	2021-04-01 18:28:07.653863289 +0800
+@@ -7,6 +7,7 @@
+ #include <linux/compiler.h>
+ #include <linux/kasan-checks.h>
+ #include <linux/string.h>
++#include <linux/ipipe.h>
+ #include <asm/asm.h>
+ #include <asm/page.h>
+ #include <asm/smap.h>
+@@ -70,7 +71,7 @@ static inline bool __chk_range_not_ok(un
+ })
+ 
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+-# define WARN_ON_IN_IRQ()	WARN_ON_ONCE(!in_task())
++# define WARN_ON_IN_IRQ()	WARN_ON_ONCE(ipipe_root_p && !in_task())
+ #else
+ # define WARN_ON_IN_IRQ()
+ #endif
+diff -uprN kernel/arch/x86/Kconfig kernel_new/arch/x86/Kconfig
+--- kernel/arch/x86/Kconfig	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/Kconfig	2021-04-01 18:28:07.653863289 +0800
+@@ -118,7 +118,7 @@ config X86
+ 	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB
+ 	select HAVE_ARCH_AUDITSYSCALL
+ 	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
+-	select HAVE_ARCH_JUMP_LABEL
++	select HAVE_ARCH_JUMP_LABEL		if !IPIPE
+ 	select HAVE_ARCH_KASAN			if X86_64
+ 	select HAVE_ARCH_KGDB
+ 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
+@@ -134,7 +134,7 @@ config X86
+ 	select HAVE_ARCH_WITHIN_STACK_FRAMES
+ 	select HAVE_CMPXCHG_DOUBLE
+ 	select HAVE_CMPXCHG_LOCAL
+-	select HAVE_CONTEXT_TRACKING		if X86_64
++	select HAVE_CONTEXT_TRACKING		if X86_64 && !IPIPE
+ 	select HAVE_COPY_THREAD_TLS
+ 	select HAVE_C_RECORDMCOUNT
+ 	select HAVE_DEBUG_KMEMLEAK
+@@ -155,6 +155,12 @@ config X86
+ 	select HAVE_IOREMAP_PROT
+ 	select HAVE_IRQ_EXIT_ON_IRQ_STACK	if X86_64
+ 	select HAVE_IRQ_TIME_ACCOUNTING
++	select HAVE_IPIPE_SUPPORT		if X86_64
++	select HAVE_IPIPE_TRACER_SUPPORT
++	select IPIPE_HAVE_HOSTRT if IPIPE
++	select IPIPE_HAVE_SAFE_THREAD_INFO if IPIPE
++	select IPIPE_WANT_PTE_PINNING if IPIPE
++	select IPIPE_HAVE_VM_NOTIFIER if IPIPE
+ 	select HAVE_KERNEL_BZIP2
+ 	select HAVE_KERNEL_GZIP
+ 	select HAVE_KERNEL_LZ4
+@@ -745,6 +751,7 @@ if HYPERVISOR_GUEST
+ 
+ config PARAVIRT
+ 	bool "Enable paravirtualization code"
++	depends on !IPIPE
+ 	---help---
+ 	  This changes the kernel so it can modify itself when it is run
+ 	  under a hypervisor, potentially improving performance significantly
+@@ -934,7 +941,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
+ 
+ config MAXSMP
+ 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
+-	depends on X86_64 && SMP && DEBUG_KERNEL
++	depends on X86_64 && SMP && DEBUG_KERNEL && !IPIPE
+ 	select CPUMASK_OFFSTACK
+ 	---help---
+ 	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
+@@ -1034,6 +1041,8 @@ config SCHED_MC_PRIO
+ 
+ 	  If unsure say Y here.
+ 
++source "kernel/ipipe/Kconfig"
++
+ config UP_LATE_INIT
+        def_bool y
+        depends on !SMP && X86_LOCAL_APIC
+diff -uprN kernel/arch/x86/Kconfig.orig kernel_new/arch/x86/Kconfig.orig
+--- kernel/arch/x86/Kconfig.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/Kconfig.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,2982 @@
++# SPDX-License-Identifier: GPL-2.0
++# Select 32 or 64 bit
++config 64BIT
++	bool "64-bit kernel" if "$(ARCH)" = "x86"
++	default "$(ARCH)" != "i386"
++	---help---
++	  Say yes to build a 64-bit kernel - formerly known as x86_64
++	  Say no to build a 32-bit kernel - formerly known as i386
++
++config X86_32
++	def_bool y
++	depends on !64BIT
++	# Options that are inherently 32-bit kernel only:
++	select ARCH_WANT_IPC_PARSE_VERSION
++	select CLKSRC_I8253
++	select CLONE_BACKWARDS
++	select HAVE_GENERIC_DMA_COHERENT
++	select MODULES_USE_ELF_REL
++	select OLD_SIGACTION
++
++config X86_64
++	def_bool y
++	depends on 64BIT
++	# Options that are inherently 64-bit kernel only:
++	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
++	select ARCH_SUPPORTS_INT128
++	select ARCH_USE_CMPXCHG_LOCKREF
++	select HAVE_ARCH_SOFT_DIRTY
++	select MODULES_USE_ELF_RELA
++	select NEED_DMA_MAP_STATE
++	select SWIOTLB
++	select X86_DEV_DMA_OPS
++	select ARCH_HAS_SYSCALL_WRAPPER
++
++#
++# Arch settings
++#
++# ( Note that options that are marked 'if X86_64' could in principle be
++#   ported to 32-bit as well. )
++#
++config X86
++	def_bool y
++	#
++	# Note: keep this list sorted alphabetically
++	#
++	select ACPI_LEGACY_TABLES_LOOKUP	if ACPI
++	select ACPI_SYSTEM_POWER_STATES_SUPPORT	if ACPI
++	select ANON_INODES
++	select ARCH_32BIT_OFF_T			if X86_32
++	select ARCH_CLOCKSOURCE_DATA
++	select ARCH_DISCARD_MEMBLOCK
++	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
++	select ARCH_HAS_DEBUG_VIRTUAL
++	select ARCH_HAS_DEVMEM_IS_ALLOWED
++	select ARCH_HAS_ELF_RANDOMIZE
++	select ARCH_HAS_FAST_MULTIPLIER
++	select ARCH_HAS_FILTER_PGPROT
++	select ARCH_HAS_FORTIFY_SOURCE
++	select ARCH_HAS_GCOV_PROFILE_ALL
++	select ARCH_HAS_KCOV			if X86_64
++	select ARCH_HAS_MEMBARRIER_SYNC_CORE
++	select ARCH_HAS_PMEM_API		if X86_64
++	select ARCH_HAS_PTE_SPECIAL
++	select ARCH_HAS_REFCOUNT
++	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
++	select ARCH_HAS_UACCESS_MCSAFE		if X86_64 && X86_MCE
++	select ARCH_HAS_SET_MEMORY
++	select ARCH_HAS_SG_CHAIN
++	select ARCH_HAS_STRICT_KERNEL_RWX
++	select ARCH_HAS_STRICT_MODULE_RWX
++	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
++	select ARCH_HAS_UBSAN_SANITIZE_ALL
++	select ARCH_HAS_ZONE_DEVICE		if X86_64
++	select ARCH_HAVE_NMI_SAFE_CMPXCHG
++	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
++	select ARCH_MIGHT_HAVE_PC_PARPORT
++	select ARCH_MIGHT_HAVE_PC_SERIO
++	select ARCH_SUPPORTS_ACPI
++	select ARCH_SUPPORTS_ATOMIC_RMW
++	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
++	select ARCH_USE_BUILTIN_BSWAP
++	select ARCH_USE_QUEUED_RWLOCKS
++	select ARCH_USE_QUEUED_SPINLOCKS
++	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
++	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
++	select ARCH_WANTS_THP_SWAP		if X86_64
++	select BUILDTIME_EXTABLE_SORT
++	select CLKEVT_I8253
++	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
++	select CLOCKSOURCE_WATCHDOG
++	select DCACHE_WORD_ACCESS
++	select DMA_DIRECT_OPS
++	select EDAC_ATOMIC_SCRUB
++	select EDAC_SUPPORT
++	select GENERIC_CLOCKEVENTS
++	select GENERIC_CLOCKEVENTS_BROADCAST	if X86_64 || (X86_32 && X86_LOCAL_APIC)
++	select GENERIC_CLOCKEVENTS_MIN_ADJUST
++	select GENERIC_CMOS_UPDATE
++	select GENERIC_CPU_AUTOPROBE
++	select GENERIC_CPU_VULNERABILITIES
++	select GENERIC_EARLY_IOREMAP
++	select GENERIC_FIND_FIRST_BIT
++	select GENERIC_IOMAP
++	select GENERIC_IRQ_EFFECTIVE_AFF_MASK	if SMP
++	select GENERIC_IRQ_MATRIX_ALLOCATOR	if X86_LOCAL_APIC
++	select GENERIC_IRQ_MIGRATION		if SMP
++	select GENERIC_IRQ_PROBE
++	select GENERIC_IRQ_RESERVATION_MODE
++	select GENERIC_IRQ_SHOW
++	select GENERIC_PENDING_IRQ		if SMP
++	select GENERIC_SMP_IDLE_THREAD
++	select GENERIC_STRNCPY_FROM_USER
++	select GENERIC_STRNLEN_USER
++	select GENERIC_TIME_VSYSCALL
++	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64
++	select HAVE_ACPI_APEI			if ACPI
++	select HAVE_ACPI_APEI_NMI		if ACPI
++	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB
++	select HAVE_ARCH_AUDITSYSCALL
++	select HAVE_ARCH_HUGE_VMAP		if X86_64 || X86_PAE
++	select HAVE_ARCH_JUMP_LABEL
++	select HAVE_ARCH_KASAN			if X86_64
++	select HAVE_ARCH_KGDB
++	select HAVE_ARCH_MMAP_RND_BITS		if MMU
++	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
++	select HAVE_ARCH_COMPAT_MMAP_BASES	if MMU && COMPAT
++	select HAVE_ARCH_PREL32_RELOCATIONS
++	select HAVE_ARCH_SECCOMP_FILTER
++	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
++	select HAVE_ARCH_TRACEHOOK
++	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
++	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
++	select HAVE_ARCH_VMAP_STACK		if X86_64
++	select HAVE_ARCH_WITHIN_STACK_FRAMES
++	select HAVE_CMPXCHG_DOUBLE
++	select HAVE_CMPXCHG_LOCAL
++	select HAVE_CONTEXT_TRACKING		if X86_64
++	select HAVE_COPY_THREAD_TLS
++	select HAVE_C_RECORDMCOUNT
++	select HAVE_DEBUG_KMEMLEAK
++	select HAVE_DEBUG_STACKOVERFLOW
++	select HAVE_DMA_CONTIGUOUS
++	select HAVE_DYNAMIC_FTRACE
++	select HAVE_DYNAMIC_FTRACE_WITH_REGS
++	select HAVE_EBPF_JIT
++	select HAVE_EFFICIENT_UNALIGNED_ACCESS
++	select HAVE_EXIT_THREAD
++	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE
++	select HAVE_FTRACE_MCOUNT_RECORD
++	select HAVE_FUNCTION_GRAPH_TRACER
++	select HAVE_FUNCTION_TRACER
++	select HAVE_GCC_PLUGINS
++	select HAVE_HW_BREAKPOINT
++	select HAVE_IDE
++	select HAVE_IOREMAP_PROT
++	select HAVE_IRQ_EXIT_ON_IRQ_STACK	if X86_64
++	select HAVE_IRQ_TIME_ACCOUNTING
++	select HAVE_KERNEL_BZIP2
++	select HAVE_KERNEL_GZIP
++	select HAVE_KERNEL_LZ4
++	select HAVE_KERNEL_LZMA
++	select HAVE_KERNEL_LZO
++	select HAVE_KERNEL_XZ
++	select HAVE_KPROBES
++	select HAVE_KPROBES_ON_FTRACE
++	select HAVE_FUNCTION_ERROR_INJECTION
++	select HAVE_KRETPROBES
++	select HAVE_KVM
++	select HAVE_LIVEPATCH_FTRACE		if X86_64
++	select HAVE_LIVEPATCH_WO_FTRACE         if X86_64
++	select HAVE_MEMBLOCK
++	select HAVE_MEMBLOCK_NODE_MAP
++	select HAVE_MIXED_BREAKPOINTS_REGS
++	select HAVE_MOD_ARCH_SPECIFIC
++	select HAVE_NMI
++	select HAVE_OPROFILE
++	select HAVE_OPTPROBES
++	select HAVE_PCSPKR_PLATFORM
++	select HAVE_PERF_EVENTS
++	select HAVE_PERF_EVENTS_NMI
++	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
++	select HAVE_PERF_REGS
++	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_RCU_TABLE_FREE		if PARAVIRT
++	select HAVE_RCU_TABLE_INVALIDATE	if HAVE_RCU_TABLE_FREE
++	select HAVE_REGS_AND_STACK_ACCESS_API
++	select HAVE_RELIABLE_STACKTRACE		if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
++	select HAVE_STACKPROTECTOR		if CC_HAS_SANE_STACKPROTECTOR
++	select HAVE_STACK_VALIDATION		if X86_64
++	select HAVE_RSEQ
++	select HAVE_SYSCALL_TRACEPOINTS
++	select HAVE_UNSTABLE_SCHED_CLOCK
++	select HAVE_USER_RETURN_NOTIFIER
++	select HOTPLUG_SMT			if SMP
++	select IRQ_FORCED_THREADING
++	select NEED_SG_DMA_LENGTH
++	select PCI_LOCKLESS_CONFIG
++	select PERF_EVENTS
++	select RTC_LIB
++	select RTC_MC146818_LIB
++	select SPARSE_IRQ
++	select SRCU
++	select SYSCTL_EXCEPTION_TRACE
++	select THREAD_INFO_IN_TASK
++	select USER_STACKTRACE_SUPPORT
++	select VIRT_TO_BUS
++	select X86_FEATURE_NAMES		if PROC_FS
++
++config INSTRUCTION_DECODER
++	def_bool y
++	depends on KPROBES || PERF_EVENTS || UPROBES
++
++config OUTPUT_FORMAT
++	string
++	default "elf32-i386" if X86_32
++	default "elf64-x86-64" if X86_64
++
++config ARCH_DEFCONFIG
++	string
++	default "arch/x86/configs/i386_defconfig" if X86_32
++	default "arch/x86/configs/x86_64_defconfig" if X86_64
++
++config LOCKDEP_SUPPORT
++	def_bool y
++
++config STACKTRACE_SUPPORT
++	def_bool y
++
++config MMU
++	def_bool y
++
++config ARCH_MMAP_RND_BITS_MIN
++	default 28 if 64BIT
++	default 8
++
++config ARCH_MMAP_RND_BITS_MAX
++	default 32 if 64BIT
++	default 16
++
++config ARCH_MMAP_RND_COMPAT_BITS_MIN
++	default 8
++
++config ARCH_MMAP_RND_COMPAT_BITS_MAX
++	default 16
++
++config SBUS
++	bool
++
++config GENERIC_ISA_DMA
++	def_bool y
++	depends on ISA_DMA_API
++
++config GENERIC_BUG
++	def_bool y
++	depends on BUG
++	select GENERIC_BUG_RELATIVE_POINTERS if X86_64
++
++config GENERIC_BUG_RELATIVE_POINTERS
++	bool
++
++config GENERIC_HWEIGHT
++	def_bool y
++
++config ARCH_MAY_HAVE_PC_FDC
++	def_bool y
++	depends on ISA_DMA_API
++
++config RWSEM_XCHGADD_ALGORITHM
++	def_bool y
++
++config GENERIC_CALIBRATE_DELAY
++	def_bool y
++
++config ARCH_HAS_CPU_RELAX
++	def_bool y
++
++config ARCH_HAS_CACHE_LINE_SIZE
++	def_bool y
++
++config ARCH_HAS_FILTER_PGPROT
++	def_bool y
++
++config HAVE_SETUP_PER_CPU_AREA
++	def_bool y
++
++config NEED_PER_CPU_EMBED_FIRST_CHUNK
++	def_bool y
++
++config NEED_PER_CPU_PAGE_FIRST_CHUNK
++	def_bool y
++
++config ARCH_HIBERNATION_POSSIBLE
++	def_bool y
++
++config ARCH_SUSPEND_POSSIBLE
++	def_bool y
++
++config ARCH_WANT_HUGE_PMD_SHARE
++	def_bool y
++
++config ARCH_WANT_GENERAL_HUGETLB
++	def_bool y
++
++config ZONE_DMA32
++	def_bool y if X86_64
++
++config AUDIT_ARCH
++	def_bool y if X86_64
++
++config ARCH_SUPPORTS_OPTIMIZED_INLINING
++	def_bool y
++
++config ARCH_SUPPORTS_DEBUG_PAGEALLOC
++	def_bool y
++
++config KASAN_SHADOW_OFFSET
++	hex
++	depends on KASAN
++	default 0xdffffc0000000000
++
++config HAVE_INTEL_TXT
++	def_bool y
++	depends on INTEL_IOMMU && ACPI
++
++config X86_32_SMP
++	def_bool y
++	depends on X86_32 && SMP
++
++config X86_64_SMP
++	def_bool y
++	depends on X86_64 && SMP
++
++config X86_32_LAZY_GS
++	def_bool y
++	depends on X86_32 && !STACKPROTECTOR
++
++config ARCH_SUPPORTS_UPROBES
++	def_bool y
++
++config FIX_EARLYCON_MEM
++	def_bool y
++
++config DYNAMIC_PHYSICAL_MASK
++	bool
++
++config PGTABLE_LEVELS
++	int
++	default 5 if X86_5LEVEL
++	default 4 if X86_64
++	default 3 if X86_PAE
++	default 2
++
++config CC_HAS_SANE_STACKPROTECTOR
++	bool
++	default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC)) if 64BIT
++	default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
++	help
++	   We have to make sure stack protector is unconditionally disabled if
++	   the compiler produces broken code.
++
++menu "Processor type and features"
++
++config ZONE_DMA
++	bool "DMA memory allocation support" if EXPERT
++	default y
++	help
++	  DMA memory allocation support allows devices with less than 32-bit
++	  addressing to allocate within the first 16MB of address space.
++	  Disable if no such devices will be used.
++
++	  If unsure, say Y.
++
++config SMP
++	bool "Symmetric multi-processing support"
++	---help---
++	  This enables support for systems with more than one CPU. If you have
++	  a system with only one CPU, say N. If you have a system with more
++	  than one CPU, say Y.
++
++	  If you say N here, the kernel will run on uni- and multiprocessor
++	  machines, but will use only one CPU of a multiprocessor machine. If
++	  you say Y here, the kernel will run on many, but not all,
++	  uniprocessor machines. On a uniprocessor machine, the kernel
++	  will run faster if you say N here.
++
++	  Note that if you say Y here and choose architecture "586" or
++	  "Pentium" under "Processor family", the kernel will not work on 486
++	  architectures. Similarly, multiprocessor kernels for the "PPro"
++	  architecture may not work on all Pentium based boards.
++
++	  People using multiprocessor machines who say Y here should also say
++	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
++	  Management" code will be disabled if you say Y here.
++
++	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
++	  <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
++	  <http://www.tldp.org/docs.html#howto>.
++
++	  If you don't know what to do here, say N.
++
++config X86_FEATURE_NAMES
++	bool "Processor feature human-readable names" if EMBEDDED
++	default y
++	---help---
++	  This option compiles in a table of x86 feature bits and corresponding
++	  names.  This is required to support /proc/cpuinfo and a few kernel
++	  messages.  You can disable this to save space, at the expense of
++	  making those few kernel messages show numeric feature bits instead.
++
++	  If in doubt, say Y.
++
++config X86_X2APIC
++	bool "Support x2apic"
++	depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
++	---help---
++	  This enables x2apic support on CPUs that have this feature.
++
++	  This allows 32-bit apic IDs (so it can support very large systems),
++	  and accesses the local apic via MSRs not via mmio.
++
++	  If you don't know what to do here, say N.
++
++config X86_MPPARSE
++	bool "Enable MPS table" if ACPI || SFI
++	default y
++	depends on X86_LOCAL_APIC
++	---help---
++	  For old smp systems that do not have proper acpi support. Newer systems
++	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
++
++config GOLDFISH
++       def_bool y
++       depends on X86_GOLDFISH
++
++config RETPOLINE
++	bool "Avoid speculative indirect branches in kernel"
++	default y
++	select STACK_VALIDATION if HAVE_STACK_VALIDATION
++	help
++	  Compile kernel with the retpoline compiler options to guard against
++	  kernel-to-user data leaks by avoiding speculative indirect
++	  branches. Requires a compiler with -mindirect-branch=thunk-extern
++	  support for full protection. The kernel may run slower.
++
++config INTEL_RDT
++	bool "Intel Resource Director Technology support"
++	default n
++	depends on X86 && CPU_SUP_INTEL
++	select KERNFS
++	help
++	  Select to enable resource allocation and monitoring which are
++	  sub-features of Intel Resource Director Technology(RDT). More
++	  information about RDT can be found in the Intel x86
++	  Architecture Software Developer Manual.
++
++	  Say N if unsure.
++
++if X86_32
++config X86_BIGSMP
++	bool "Support for big SMP systems with more than 8 CPUs"
++	depends on SMP
++	---help---
++	  This option is needed for the systems that have more than 8 CPUs
++
++config X86_EXTENDED_PLATFORM
++	bool "Support for extended (non-PC) x86 platforms"
++	default y
++	---help---
++	  If you disable this option then the kernel will only support
++	  standard PC platforms. (which covers the vast majority of
++	  systems out there.)
++
++	  If you enable this option then you'll be able to select support
++	  for the following (non-PC) 32 bit x86 platforms:
++		Goldfish (Android emulator)
++		AMD Elan
++		RDC R-321x SoC
++		SGI 320/540 (Visual Workstation)
++		STA2X11-based (e.g. Northville)
++		Moorestown MID devices
++
++	  If you have one of these systems, or if you want to build a
++	  generic distribution kernel, say Y here - otherwise say N.
++endif
++
++if X86_64
++config X86_EXTENDED_PLATFORM
++	bool "Support for extended (non-PC) x86 platforms"
++	default y
++	---help---
++	  If you disable this option then the kernel will only support
++	  standard PC platforms. (which covers the vast majority of
++	  systems out there.)
++
++	  If you enable this option then you'll be able to select support
++	  for the following (non-PC) 64 bit x86 platforms:
++		Numascale NumaChip
++		ScaleMP vSMP
++		SGI Ultraviolet
++
++	  If you have one of these systems, or if you want to build a
++	  generic distribution kernel, say Y here - otherwise say N.
++endif
++# This is an alphabetically sorted list of 64 bit extended platforms
++# Please maintain the alphabetic order if and when there are additions
++config X86_NUMACHIP
++	bool "Numascale NumaChip"
++	depends on X86_64
++	depends on X86_EXTENDED_PLATFORM
++	depends on NUMA
++	depends on SMP
++	depends on X86_X2APIC
++	depends on PCI_MMCONFIG
++	---help---
++	  Adds support for Numascale NumaChip large-SMP systems. Needed to
++	  enable more than ~168 cores.
++	  If you don't have one of these, you should say N here.
++
++config X86_VSMP
++	bool "ScaleMP vSMP"
++	select HYPERVISOR_GUEST
++	select PARAVIRT
++	depends on X86_64 && PCI
++	depends on X86_EXTENDED_PLATFORM
++	depends on SMP
++	---help---
++	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
++	  supposed to run on these EM64T-based machines.  Only choose this option
++	  if you have one of these machines.
++
++config X86_UV
++	bool "SGI Ultraviolet"
++	depends on X86_64
++	depends on X86_EXTENDED_PLATFORM
++	depends on NUMA
++	depends on EFI
++	depends on X86_X2APIC
++	depends on PCI
++	---help---
++	  This option is needed in order to support SGI Ultraviolet systems.
++	  If you don't have one of these, you should say N here.
++
++# Following is an alphabetically sorted list of 32 bit extended platforms
++# Please maintain the alphabetic order if and when there are additions
++
++config X86_GOLDFISH
++       bool "Goldfish (Virtual Platform)"
++       depends on X86_EXTENDED_PLATFORM
++       ---help---
++	 Enable support for the Goldfish virtual platform used primarily
++	 for Android development. Unless you are building for the Android
++	 Goldfish emulator say N here.
++
++config X86_INTEL_CE
++	bool "CE4100 TV platform"
++	depends on PCI
++	depends on PCI_GODIRECT
++	depends on X86_IO_APIC
++	depends on X86_32
++	depends on X86_EXTENDED_PLATFORM
++	select X86_REBOOTFIXUPS
++	select OF
++	select OF_EARLY_FLATTREE
++	---help---
++	  Select for the Intel CE media processor (CE4100) SOC.
++	  This option compiles in support for the CE4100 SOC for settop
++	  boxes and media devices.
++
++config X86_INTEL_MID
++	bool "Intel MID platform support"
++	depends on X86_EXTENDED_PLATFORM
++	depends on X86_PLATFORM_DEVICES
++	depends on PCI
++	depends on X86_64 || (PCI_GOANY && X86_32)
++	depends on X86_IO_APIC
++	select SFI
++	select I2C
++	select DW_APB_TIMER
++	select APB_TIMER
++	select INTEL_SCU_IPC
++	select MFD_INTEL_MSIC
++	---help---
++	  Select to build a kernel capable of supporting Intel MID (Mobile
++	  Internet Device) platform systems which do not have the PCI legacy
++	  interfaces. If you are building for a PC class system say N here.
++
++	  Intel MID platforms are based on an Intel processor and chipset which
++	  consume less power than most of the x86 derivatives.
++
++config X86_INTEL_QUARK
++	bool "Intel Quark platform support"
++	depends on X86_32
++	depends on X86_EXTENDED_PLATFORM
++	depends on X86_PLATFORM_DEVICES
++	depends on X86_TSC
++	depends on PCI
++	depends on PCI_GOANY
++	depends on X86_IO_APIC
++	select IOSF_MBI
++	select INTEL_IMR
++	select COMMON_CLK
++	---help---
++	  Select to include support for Quark X1000 SoC.
++	  Say Y here if you have a Quark based system such as the Arduino
++	  compatible Intel Galileo.
++
++config X86_INTEL_LPSS
++	bool "Intel Low Power Subsystem Support"
++	depends on X86 && ACPI
++	select COMMON_CLK
++	select PINCTRL
++	select IOSF_MBI
++	---help---
++	  Select to build support for Intel Low Power Subsystem such as
++	  found on Intel Lynxpoint PCH. Selecting this option enables
++	  things like clock tree (common clock framework) and pincontrol
++	  which are needed by the LPSS peripheral drivers.
++
++config X86_AMD_PLATFORM_DEVICE
++	bool "AMD ACPI2Platform devices support"
++	depends on ACPI
++	select COMMON_CLK
++	select PINCTRL
++	---help---
++	  Select to interpret AMD specific ACPI device to platform device
++	  such as I2C, UART, GPIO found on AMD Carrizo and later chipsets.
++	  I2C and UART depend on COMMON_CLK to set clock. GPIO driver is
++	  implemented under PINCTRL subsystem.
++
++config IOSF_MBI
++	tristate "Intel SoC IOSF Sideband support for SoC platforms"
++	depends on PCI
++	---help---
++	  This option enables sideband register access support for Intel SoC
++	  platforms. On these platforms the IOSF sideband is used in lieu of
++	  MSR's for some register accesses, mostly but not limited to thermal
++	  and power. Drivers may query the availability of this device to
++	  determine if they need the sideband in order to work on these
++	  platforms. The sideband is available on the following SoC products.
++	  This list is not meant to be exclusive.
++	   - BayTrail
++	   - Braswell
++	   - Quark
++
++	  You should say Y if you are running a kernel on one of these SoC's.
++
++config IOSF_MBI_DEBUG
++	bool "Enable IOSF sideband access through debugfs"
++	depends on IOSF_MBI && DEBUG_FS
++	---help---
++	  Select this option to expose the IOSF sideband access registers (MCR,
++	  MDR, MCRX) through debugfs to write and read register information from
++	  different units on the SoC. This is most useful for obtaining device
++	  state information for debug and analysis. As this is a general access
++	  mechanism, users of this option would have specific knowledge of the
++	  device they want to access.
++
++	  If you don't require the option or are in doubt, say N.
++
++config X86_RDC321X
++	bool "RDC R-321x SoC"
++	depends on X86_32
++	depends on X86_EXTENDED_PLATFORM
++	select M486
++	select X86_REBOOTFIXUPS
++	---help---
++	  This option is needed for RDC R-321x system-on-chip, also known
++	  as R-8610-(G).
++	  If you don't have one of these chips, you should say N here.
++
++config X86_32_NON_STANDARD
++	bool "Support non-standard 32-bit SMP architectures"
++	depends on X86_32 && SMP
++	depends on X86_EXTENDED_PLATFORM
++	---help---
++	  This option compiles in the bigsmp and STA2X11 default
++	  subarchitectures.  It is intended for a generic binary
++	  kernel. If you select them all, kernel will probe it one by
++	  one and will fallback to default.
++
++# Alphabetically sorted list of Non standard 32 bit platforms
++
++config X86_SUPPORTS_MEMORY_FAILURE
++	def_bool y
++	# MCE code calls memory_failure():
++	depends on X86_MCE
++	# On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
++	# On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
++	depends on X86_64 || !SPARSEMEM
++	select ARCH_SUPPORTS_MEMORY_FAILURE
++
++config STA2X11
++	bool "STA2X11 Companion Chip Support"
++	depends on X86_32_NON_STANDARD && PCI
++	select ARCH_HAS_PHYS_TO_DMA
++	select X86_DEV_DMA_OPS
++	select X86_DMA_REMAP
++	select SWIOTLB
++	select MFD_STA2X11
++	select GPIOLIB
++	default n
++	---help---
++	  This adds support for boards based on the STA2X11 IO-Hub,
++	  a.k.a. "ConneXt". The chip is used in place of the standard
++	  PC chipset, so all "standard" peripherals are missing. If this
++	  option is selected the kernel will still be able to boot on
++	  standard PC machines.
++
++config X86_32_IRIS
++	tristate "Eurobraille/Iris poweroff module"
++	depends on X86_32
++	---help---
++	  The Iris machines from EuroBraille do not have APM or ACPI support
++	  to shut themselves down properly.  A special I/O sequence is
++	  needed to do so, which is what this module does at
++	  kernel shutdown.
++
++	  This is only for Iris machines from EuroBraille.
++
++	  If unused, say N.
++
++config SCHED_OMIT_FRAME_POINTER
++	def_bool y
++	prompt "Single-depth WCHAN output"
++	depends on X86
++	---help---
++	  Calculate simpler /proc/<PID>/wchan values. If this option
++	  is disabled then wchan values will recurse back to the
++	  caller function. This provides more accurate wchan values,
++	  at the expense of slightly more scheduling overhead.
++
++	  If in doubt, say "Y".
++
++menuconfig HYPERVISOR_GUEST
++	bool "Linux guest support"
++	---help---
++	  Say Y here to enable options for running Linux under various hyper-
++	  visors. This option enables basic hypervisor detection and platform
++	  setup.
++
++	  If you say N, all options in this submenu will be skipped and
++	  disabled, and Linux guest support won't be built in.
++
++if HYPERVISOR_GUEST
++
++config PARAVIRT
++	bool "Enable paravirtualization code"
++	---help---
++	  This changes the kernel so it can modify itself when it is run
++	  under a hypervisor, potentially improving performance significantly
++	  over full virtualization.  However, when run without a hypervisor
++	  the kernel is theoretically slower and slightly larger.
++
++config PARAVIRT_DEBUG
++	bool "paravirt-ops debugging"
++	depends on PARAVIRT && DEBUG_KERNEL
++	---help---
++	  Enable to debug paravirt_ops internals.  Specifically, BUG if
++	  a paravirt_op is missing when it is called.
++
++config PARAVIRT_SPINLOCKS
++	bool "Paravirtualization layer for spinlocks"
++	depends on PARAVIRT && SMP
++	---help---
++	  Paravirtualized spinlocks allow a pvops backend to replace the
++	  spinlock implementation with something virtualization-friendly
++	  (for example, block the virtual CPU rather than spinning).
++
++	  It has a minimal impact on native kernels and gives a nice performance
++	  benefit on paravirtualized KVM / Xen kernels.
++
++	  If you are unsure how to answer this question, answer Y.
++
++config QUEUED_LOCK_STAT
++	bool "Paravirt queued spinlock statistics"
++	depends on PARAVIRT_SPINLOCKS && DEBUG_FS
++	---help---
++	  Enable the collection of statistical data on the slowpath
++	  behavior of paravirtualized queued spinlocks and report
++	  them on debugfs.
++
++source "arch/x86/xen/Kconfig"
++
++config KVM_GUEST
++	bool "KVM Guest support (including kvmclock)"
++	depends on PARAVIRT
++	select PARAVIRT_CLOCK
++	default y
++	---help---
++	  This option enables various optimizations for running under the KVM
++	  hypervisor. It includes a paravirtualized clock, so that instead
++	  of relying on a PIT (or probably other) emulation by the
++	  underlying device model, the host provides the guest with
++	  timing infrastructure such as time of day, and system time
++
++config KVM_DEBUG_FS
++	bool "Enable debug information for KVM Guests in debugfs"
++	depends on KVM_GUEST && DEBUG_FS
++	default n
++	---help---
++	  This option enables collection of various statistics for KVM guest.
++	  Statistics are displayed in debugfs filesystem. Enabling this option
++	  may incur significant overhead.
++
++config PARAVIRT_TIME_ACCOUNTING
++	bool "Paravirtual steal time accounting"
++	depends on PARAVIRT
++	default n
++	---help---
++	  Select this option to enable fine granularity task steal time
++	  accounting. Time spent executing other tasks in parallel with
++	  the current vCPU is discounted from the vCPU power. To account for
++	  that, there can be a small performance impact.
++
++	  If in doubt, say N here.
++
++config PARAVIRT_CLOCK
++	bool
++
++config JAILHOUSE_GUEST
++	bool "Jailhouse non-root cell support"
++	depends on X86_64 && PCI
++	select X86_PM_TIMER
++	---help---
++	  This option allows to run Linux as guest in a Jailhouse non-root
++	  cell. You can leave this option disabled if you only want to start
++	  Jailhouse and run Linux afterwards in the root cell.
++
++endif #HYPERVISOR_GUEST
++
++config NO_BOOTMEM
++	def_bool y
++
++source "arch/x86/Kconfig.cpu"
++
++config HPET_TIMER
++	def_bool X86_64
++	prompt "HPET Timer Support" if X86_32
++	---help---
++	  Use the IA-PC HPET (High Precision Event Timer) to manage
++	  time in preference to the PIT and RTC, if a HPET is
++	  present.
++	  HPET is the next generation timer replacing legacy 8254s.
++	  The HPET provides a stable time base on SMP
++	  systems, unlike the TSC, but it is more expensive to access,
++	  as it is off-chip.  The interface used is documented
++	  in the HPET spec, revision 1.
++
++	  You can safely choose Y here.  However, HPET will only be
++	  activated if the platform and the BIOS support this feature.
++	  Otherwise the 8254 will be used for timing services.
++
++	  Choose N to continue using the legacy 8254 timer.
++
++config HPET_EMULATE_RTC
++	def_bool y
++	depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
++
++config APB_TIMER
++       def_bool y if X86_INTEL_MID
++       prompt "Intel MID APB Timer Support" if X86_INTEL_MID
++       select DW_APB_TIMER
++       depends on X86_INTEL_MID && SFI
++       help
++         APB timer is the replacement for 8254, HPET on X86 MID platforms.
++         The APBT provides a stable time base on SMP
++         systems, unlike the TSC, but it is more expensive to access,
++         as it is off-chip. APB timers are always running regardless of CPU
++         C states, they are used as per CPU clockevent device when possible.
++
++# Mark as expert because too many people got it wrong.
++# The code disables itself when not needed.
++config DMI
++	default y
++	select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
++	bool "Enable DMI scanning" if EXPERT
++	---help---
++	  Enabled scanning of DMI to identify machine quirks. Say Y
++	  here unless you have verified that your setup is not
++	  affected by entries in the DMI blacklist. Required by PNP
++	  BIOS code.
++
++config GART_IOMMU
++	bool "Old AMD GART IOMMU support"
++	select IOMMU_HELPER
++	select SWIOTLB
++	depends on X86_64 && PCI && AMD_NB
++	---help---
++	  Provides a driver for older AMD Athlon64/Opteron/Turion/Sempron
++	  GART based hardware IOMMUs.
++
++	  The GART supports full DMA access for devices with 32-bit access
++	  limitations, on systems with more than 3 GB. This is usually needed
++	  for USB, sound, many IDE/SATA chipsets and some other devices.
++
++	  Newer systems typically have a modern AMD IOMMU, supported via
++	  the CONFIG_AMD_IOMMU=y config option.
++
++	  In normal configurations this driver is only active when needed:
++	  there's more than 3 GB of memory and the system contains a
++	  32-bit limited device.
++
++	  If unsure, say Y.
++
++config CALGARY_IOMMU
++	bool "IBM Calgary IOMMU support"
++	select IOMMU_HELPER
++	select SWIOTLB
++	depends on X86_64 && PCI
++	---help---
++	  Support for hardware IOMMUs in IBM's xSeries x366 and x460
++	  systems. Needed to run systems with more than 3GB of memory
++	  properly with 32-bit PCI devices that do not support DAC
++	  (Double Address Cycle). Calgary also supports bus level
++	  isolation, where all DMAs pass through the IOMMU.  This
++	  prevents them from going anywhere except their intended
++	  destination. This catches hard-to-find kernel bugs and
++	  mis-behaving drivers and devices that do not use the DMA-API
++	  properly to set up their DMA buffers.  The IOMMU can be
++	  turned off at boot time with the iommu=off parameter.
++	  Normally the kernel will make the right choice by itself.
++	  If unsure, say Y.
++
++config CALGARY_IOMMU_ENABLED_BY_DEFAULT
++	def_bool y
++	prompt "Should Calgary be enabled by default?"
++	depends on CALGARY_IOMMU
++	---help---
++	  Should Calgary be enabled by default? if you choose 'y', Calgary
++	  will be used (if it exists). If you choose 'n', Calgary will not be
++	  used even if it exists. If you choose 'n' and would like to use
++	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
++	  If unsure, say Y.
++
++config MAXSMP
++	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
++	depends on X86_64 && SMP && DEBUG_KERNEL
++	select CPUMASK_OFFSTACK
++	---help---
++	  Enable maximum number of CPUS and NUMA Nodes for this architecture.
++	  If unsure, say N.
++
++#
++# The maximum number of CPUs supported:
++#
++# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
++# and which can be configured interactively in the
++# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
++#
++# The ranges are different on 32-bit and 64-bit kernels, depending on
++# hardware capabilities and scalability features of the kernel.
++#
++# ( If MAXSMP is enabled we just use the highest possible value and disable
++#   interactive configuration. )
++#
++
++config NR_CPUS_RANGE_BEGIN
++	int
++	default NR_CPUS_RANGE_END if MAXSMP
++	default    1 if !SMP
++	default    2
++
++config NR_CPUS_RANGE_END
++	int
++	depends on X86_32
++	default   64 if  SMP &&  X86_BIGSMP
++	default    8 if  SMP && !X86_BIGSMP
++	default    1 if !SMP
++
++config NR_CPUS_RANGE_END
++	int
++	depends on X86_64
++	default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
++	default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
++	default    1 if !SMP
++
++config NR_CPUS_DEFAULT
++	int
++	depends on X86_32
++	default   32 if  X86_BIGSMP
++	default    8 if  SMP
++	default    1 if !SMP
++
++config NR_CPUS_DEFAULT
++	int
++	depends on X86_64
++	default 8192 if  MAXSMP
++	default   64 if  SMP
++	default    1 if !SMP
++
++config NR_CPUS
++	int "Maximum number of CPUs" if SMP && !MAXSMP
++	range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
++	default NR_CPUS_DEFAULT
++	---help---
++	  This allows you to specify the maximum number of CPUs which this
++	  kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
++	  supported value is 8192, otherwise the maximum value is 512.  The
++	  minimum value which makes sense is 2.
++
++	  This is purely to save memory: each supported CPU adds about 8KB
++	  to the kernel image.
++
++config SCHED_SMT
++	def_bool y if SMP
++
++config SCHED_MC
++	def_bool y
++	prompt "Multi-core scheduler support"
++	depends on SMP
++	---help---
++	  Multi-core scheduler support improves the CPU scheduler's decision
++	  making when dealing with multi-core CPU chips at a cost of slightly
++	  increased overhead in some places. If unsure say N here.
++
++config SCHED_MC_PRIO
++	bool "CPU core priorities scheduler support"
++	depends on SCHED_MC && CPU_SUP_INTEL
++	select X86_INTEL_PSTATE
++	select CPU_FREQ
++	default y
++	---help---
++	  Intel Turbo Boost Max Technology 3.0 enabled CPUs have a
++	  core ordering determined at manufacturing time, which allows
++	  certain cores to reach higher turbo frequencies (when running
++	  single threaded workloads) than others.
++
++	  Enabling this kernel feature teaches the scheduler about
++	  the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the
++	  scheduler's CPU selection logic accordingly, so that higher
++	  overall system performance can be achieved.
++
++	  This feature will have no effect on CPUs without this feature.
++
++	  If unsure say Y here.
++
++config UP_LATE_INIT
++       def_bool y
++       depends on !SMP && X86_LOCAL_APIC
++
++config X86_UP_APIC
++	bool "Local APIC support on uniprocessors" if !PCI_MSI
++	default PCI_MSI
++	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
++	---help---
++	  A local APIC (Advanced Programmable Interrupt Controller) is an
++	  integrated interrupt controller in the CPU. If you have a single-CPU
++	  system which has a processor with a local APIC, you can say Y here to
++	  enable and use it. If you say Y here even though your machine doesn't
++	  have a local APIC, then the kernel will still run with no slowdown at
++	  all. The local APIC supports CPU-generated self-interrupts (timer,
++	  performance counters), and the NMI watchdog which detects hard
++	  lockups.
++
++config X86_UP_IOAPIC
++	bool "IO-APIC support on uniprocessors"
++	depends on X86_UP_APIC
++	---help---
++	  An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
++	  SMP-capable replacement for PC-style interrupt controllers. Most
++	  SMP systems and many recent uniprocessor systems have one.
++
++	  If you have a single-CPU system with an IO-APIC, you can say Y here
++	  to use it. If you say Y here even though your machine doesn't have
++	  an IO-APIC, then the kernel will still run with no slowdown at all.
++
++config X86_LOCAL_APIC
++	def_bool y
++	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
++	select IRQ_DOMAIN_HIERARCHY
++	select PCI_MSI_IRQ_DOMAIN if PCI_MSI
++
++config X86_IO_APIC
++	def_bool y
++	depends on X86_LOCAL_APIC || X86_UP_IOAPIC
++
++config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
++	bool "Reroute for broken boot IRQs"
++	depends on X86_IO_APIC
++	---help---
++	  This option enables a workaround that fixes a source of
++	  spurious interrupts. This is recommended when threaded
++	  interrupt handling is used on systems where the generation of
++	  superfluous "boot interrupts" cannot be disabled.
++
++	  Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
++	  entry in the chipset's IO-APIC is masked (as, e.g. the RT
++	  kernel does during interrupt handling). On chipsets where this
++	  boot IRQ generation cannot be disabled, this workaround keeps
++	  the original IRQ line masked so that only the equivalent "boot
++	  IRQ" is delivered to the CPUs. The workaround also tells the
++	  kernel to set up the IRQ handler on the boot IRQ line. In this
++	  way only one interrupt is delivered to the kernel. Otherwise
++	  the spurious second interrupt may cause the kernel to bring
++	  down (vital) interrupt lines.
++
++	  Only affects "broken" chipsets. Interrupt sharing may be
++	  increased on these systems.
++
++config X86_MCE
++	bool "Machine Check / overheating reporting"
++	select GENERIC_ALLOCATOR
++	default y
++	---help---
++	  Machine Check support allows the processor to notify the
++	  kernel if it detects a problem (e.g. overheating, data corruption).
++	  The action the kernel takes depends on the severity of the problem,
++	  ranging from warning messages to halting the machine.
++
++config X86_MCELOG_LEGACY
++	bool "Support for deprecated /dev/mcelog character device"
++	depends on X86_MCE
++	---help---
++	  Enable support for /dev/mcelog which is needed by the old mcelog
++	  userspace logging daemon. Consider switching to the new generation
++	  rasdaemon solution.
++
++config X86_MCE_INTEL
++	def_bool y
++	prompt "Intel MCE features"
++	depends on X86_MCE && X86_LOCAL_APIC
++	---help---
++	   Additional support for intel specific MCE features such as
++	   the thermal monitor.
++
++config X86_MCE_AMD
++	def_bool y
++	prompt "AMD MCE features"
++	depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
++	---help---
++	   Additional support for AMD specific MCE features such as
++	   the DRAM Error Threshold.
++
++config X86_ANCIENT_MCE
++	bool "Support for old Pentium 5 / WinChip machine checks"
++	depends on X86_32 && X86_MCE
++	---help---
++	  Include support for machine check handling on old Pentium 5 or WinChip
++	  systems. These typically need to be enabled explicitly on the command
++	  line.
++
++config X86_MCE_THRESHOLD
++	depends on X86_MCE_AMD || X86_MCE_INTEL
++	def_bool y
++
++config X86_MCE_INJECT
++	depends on X86_MCE && X86_LOCAL_APIC && DEBUG_FS
++	tristate "Machine check injector support"
++	---help---
++	  Provide support for injecting machine checks for testing purposes.
++	  If you don't know what a machine check is and you don't do kernel
++	  QA it is safe to say n.
++
++config X86_THERMAL_VECTOR
++	def_bool y
++	depends on X86_MCE_INTEL
++
++source "arch/x86/events/Kconfig"
++
++config X86_LEGACY_VM86
++	bool "Legacy VM86 support"
++	default n
++	depends on X86_32
++	---help---
++	  This option allows user programs to put the CPU into V8086
++	  mode, which is an 80286-era approximation of 16-bit real mode.
++
++	  Some very old versions of X and/or vbetool require this option
++	  for user mode setting.  Similarly, DOSEMU will use it if
++	  available to accelerate real mode DOS programs.  However, any
++	  recent version of DOSEMU, X, or vbetool should be fully
++	  functional even without kernel VM86 support, as they will all
++	  fall back to software emulation. Nevertheless, if you are using
++	  a 16-bit DOS program where 16-bit performance matters, vm86
++	  mode might be faster than emulation and you might want to
++	  enable this option.
++
++	  Note that any app that works on a 64-bit kernel is unlikely to
++	  need this option, as 64-bit kernels don't, and can't, support
++	  V8086 mode. This option is also unrelated to 16-bit protected
++	  mode and is not needed to run most 16-bit programs under Wine.
++
++	  Enabling this option increases the complexity of the kernel
++	  and slows down exception handling a tiny bit.
++
++	  If unsure, say N here.
++
++config VM86
++       bool
++       default X86_LEGACY_VM86
++
++config X86_16BIT
++	bool "Enable support for 16-bit segments" if EXPERT
++	default y
++	depends on MODIFY_LDT_SYSCALL
++	---help---
++	  This option is required by programs like Wine to run 16-bit
++	  protected mode legacy code on x86 processors.  Disabling
++	  this option saves about 300 bytes on i386, or around 6K text
++	  plus 16K runtime memory on x86-64,
++
++config X86_ESPFIX32
++	def_bool y
++	depends on X86_16BIT && X86_32
++
++config X86_ESPFIX64
++	def_bool y
++	depends on X86_16BIT && X86_64
++
++config X86_VSYSCALL_EMULATION
++       bool "Enable vsyscall emulation" if EXPERT
++       default y
++       depends on X86_64
++       ---help---
++	 This enables emulation of the legacy vsyscall page.  Disabling
++	 it is roughly equivalent to booting with vsyscall=none, except
++	 that it will also disable the helpful warning if a program
++	 tries to use a vsyscall.  With this option set to N, offending
++	 programs will just segfault, citing addresses of the form
++	 0xffffffffff600?00.
++
++	 This option is required by many programs built before 2013, and
++	 care should be used even with newer programs if set to N.
++
++	 Disabling this option saves about 7K of kernel size and
++	 possibly 4K of additional runtime pagetable memory.
++
++config TOSHIBA
++	tristate "Toshiba Laptop support"
++	depends on X86_32
++	---help---
++	  This adds a driver to safely access the System Management Mode of
++	  the CPU on Toshiba portables with a genuine Toshiba BIOS. It does
++	  not work on models with a Phoenix BIOS. The System Management Mode
++	  is used to set the BIOS and power saving options on Toshiba portables.
++
++	  For information on utilities to make use of this driver see the
++	  Toshiba Linux utilities web site at:
++	  <http://www.buzzard.org.uk/toshiba/>.
++
++	  Say Y if you intend to run this kernel on a Toshiba portable.
++	  Say N otherwise.
++
++config I8K
++	tristate "Dell i8k legacy laptop support"
++	select HWMON
++	select SENSORS_DELL_SMM
++	---help---
++	  This option enables legacy /proc/i8k userspace interface in hwmon
++	  dell-smm-hwmon driver. Character file /proc/i8k reports bios version,
++	  temperature and allows controlling fan speeds of Dell laptops via
++	  System Management Mode. For old Dell laptops (like Dell Inspiron 8000)
++	  it reports also power and hotkey status. For fan speed control is
++	  needed userspace package i8kutils.
++
++	  Say Y if you intend to run this kernel on old Dell laptops or want to
++	  use userspace package i8kutils.
++	  Say N otherwise.
++
++config X86_REBOOTFIXUPS
++	bool "Enable X86 board specific fixups for reboot"
++	depends on X86_32
++	---help---
++	  This enables chipset and/or board specific fixups to be done
++	  in order to get reboot to work correctly. This is only needed on
++	  some combinations of hardware and BIOS. The symptom, for which
++	  this config is intended, is when reboot ends with a stalled/hung
++	  system.
++
++	  Currently, the only fixup is for the Geode machines using
++	  CS5530A and CS5536 chipsets and the RDC R-321x SoC.
++
++	  Say Y if you want to enable the fixup. Currently, it's safe to
++	  enable this option even if you don't need it.
++	  Say N otherwise.
++
++config MICROCODE
++	bool "CPU microcode loading support"
++	default y
++	depends on CPU_SUP_AMD || CPU_SUP_INTEL
++	select FW_LOADER
++	---help---
++	  If you say Y here, you will be able to update the microcode on
++	  Intel and AMD processors. The Intel support is for the IA32 family,
++	  e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
++	  AMD support is for families 0x10 and later. You will obviously need
++	  the actual microcode binary data itself which is not shipped with
++	  the Linux kernel.
++
++	  The preferred method to load microcode from a detached initrd is described
++	  in Documentation/x86/microcode.txt. For that you need to enable
++	  CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
++	  initrd for microcode blobs.
++
++	  In addition, you can build the microcode into the kernel. For that you
++	  need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE
++	  config option.
++
++config MICROCODE_INTEL
++	bool "Intel microcode loading support"
++	depends on MICROCODE
++	default MICROCODE
++	select FW_LOADER
++	---help---
++	  This options enables microcode patch loading support for Intel
++	  processors.
++
++	  For the current Intel microcode data package go to
++	  <https://downloadcenter.intel.com> and search for
++	  'Linux Processor Microcode Data File'.
++
++config MICROCODE_AMD
++	bool "AMD microcode loading support"
++	depends on MICROCODE
++	select FW_LOADER
++	---help---
++	  If you select this option, microcode patch loading support for AMD
++	  processors will be enabled.
++
++config MICROCODE_OLD_INTERFACE
++	def_bool y
++	depends on MICROCODE
++
++config X86_MSR
++	tristate "/dev/cpu/*/msr - Model-specific register support"
++	---help---
++	  This device gives privileged processes access to the x86
++	  Model-Specific Registers (MSRs).  It is a character device with
++	  major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
++	  MSR accesses are directed to a specific CPU on multi-processor
++	  systems.
++
++config X86_CPUID
++	tristate "/dev/cpu/*/cpuid - CPU information support"
++	---help---
++	  This device gives processes access to the x86 CPUID instruction to
++	  be executed on a specific processor.  It is a character device
++	  with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
++	  /dev/cpu/31/cpuid.
++
++choice
++	prompt "High Memory Support"
++	default HIGHMEM4G
++	depends on X86_32
++
++config NOHIGHMEM
++	bool "off"
++	---help---
++	  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
++	  However, the address space of 32-bit x86 processors is only 4
++	  Gigabytes large. That means that, if you have a large amount of
++	  physical memory, not all of it can be "permanently mapped" by the
++	  kernel. The physical memory that's not permanently mapped is called
++	  "high memory".
++
++	  If you are compiling a kernel which will never run on a machine with
++	  more than 1 Gigabyte total physical RAM, answer "off" here (default
++	  choice and suitable for most users). This will result in a "3GB/1GB"
++	  split: 3GB are mapped so that each process sees a 3GB virtual memory
++	  space and the remaining part of the 4GB virtual memory space is used
++	  by the kernel to permanently map as much physical memory as
++	  possible.
++
++	  If the machine has between 1 and 4 Gigabytes physical RAM, then
++	  answer "4GB" here.
++
++	  If more than 4 Gigabytes is used then answer "64GB" here. This
++	  selection turns Intel PAE (Physical Address Extension) mode on.
++	  PAE implements 3-level paging on IA32 processors. PAE is fully
++	  supported by Linux, PAE mode is implemented on all recent Intel
++	  processors (Pentium Pro and better). NOTE: If you say "64GB" here,
++	  then the kernel will not boot on CPUs that don't support PAE!
++
++	  The actual amount of total physical memory will either be
++	  auto detected or can be forced by using a kernel command line option
++	  such as "mem=256M". (Try "man bootparam" or see the documentation of
++	  your boot loader (lilo or loadlin) about how to pass options to the
++	  kernel at boot time.)
++
++	  If unsure, say "off".
++
++config HIGHMEM4G
++	bool "4GB"
++	---help---
++	  Select this if you have a 32-bit processor and between 1 and 4
++	  gigabytes of physical RAM.
++
++config HIGHMEM64G
++	bool "64GB"
++	depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
++	select X86_PAE
++	---help---
++	  Select this if you have a 32-bit processor and more than 4
++	  gigabytes of physical RAM.
++
++endchoice
++
++choice
++	prompt "Memory split" if EXPERT
++	default VMSPLIT_3G
++	depends on X86_32
++	---help---
++	  Select the desired split between kernel and user memory.
++
++	  If the address range available to the kernel is less than the
++	  physical memory installed, the remaining memory will be available
++	  as "high memory". Accessing high memory is a little more costly
++	  than low memory, as it needs to be mapped into the kernel first.
++	  Note that increasing the kernel address space limits the range
++	  available to user programs, making the address space there
++	  tighter.  Selecting anything other than the default 3G/1G split
++	  will also likely make your kernel incompatible with binary-only
++	  kernel modules.
++
++	  If you are not absolutely sure what you are doing, leave this
++	  option alone!
++
++	config VMSPLIT_3G
++		bool "3G/1G user/kernel split"
++	config VMSPLIT_3G_OPT
++		depends on !X86_PAE
++		bool "3G/1G user/kernel split (for full 1G low memory)"
++	config VMSPLIT_2G
++		bool "2G/2G user/kernel split"
++	config VMSPLIT_2G_OPT
++		depends on !X86_PAE
++		bool "2G/2G user/kernel split (for full 2G low memory)"
++	config VMSPLIT_1G
++		bool "1G/3G user/kernel split"
++endchoice
++
++config PAGE_OFFSET
++	hex
++	default 0xB0000000 if VMSPLIT_3G_OPT
++	default 0x80000000 if VMSPLIT_2G
++	default 0x78000000 if VMSPLIT_2G_OPT
++	default 0x40000000 if VMSPLIT_1G
++	default 0xC0000000
++	depends on X86_32
++
++config HIGHMEM
++	def_bool y
++	depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
++
++config X86_PAE
++	bool "PAE (Physical Address Extension) Support"
++	depends on X86_32 && !HIGHMEM4G
++	select PHYS_ADDR_T_64BIT
++	select SWIOTLB
++	---help---
++	  PAE is required for NX support, and furthermore enables
++	  larger swapspace support for non-overcommit purposes. It
++	  has the cost of more pagetable lookup overhead, and also
++	  consumes more pagetable space per process.
++
++config X86_5LEVEL
++	bool "Enable 5-level page tables support"
++	select DYNAMIC_MEMORY_LAYOUT
++	select SPARSEMEM_VMEMMAP
++	depends on X86_64
++	---help---
++	  5-level paging enables access to larger address space:
++	  upto 128 PiB of virtual address space and 4 PiB of
++	  physical address space.
++
++	  It will be supported by future Intel CPUs.
++
++	  A kernel with the option enabled can be booted on machines that
++	  support 4- or 5-level paging.
++
++	  See Documentation/x86/x86_64/5level-paging.txt for more
++	  information.
++
++	  Say N if unsure.
++
++config X86_DIRECT_GBPAGES
++	def_bool y
++	depends on X86_64 && !DEBUG_PAGEALLOC
++	---help---
++	  Certain kernel features effectively disable kernel
++	  linear 1 GB mappings (even if the CPU otherwise
++	  supports them), so don't confuse the user by printing
++	  that we have them enabled.
++
++config ARCH_HAS_MEM_ENCRYPT
++	def_bool y
++
++config AMD_MEM_ENCRYPT
++	bool "AMD Secure Memory Encryption (SME) support"
++	depends on X86_64 && CPU_SUP_AMD
++	select DYNAMIC_PHYSICAL_MASK
++	select ARCH_USE_MEMREMAP_PROT
++	---help---
++	  Say yes to enable support for the encryption of system memory.
++	  This requires an AMD processor that supports Secure Memory
++	  Encryption (SME).
++
++config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
++	bool "Activate AMD Secure Memory Encryption (SME) by default"
++	default y
++	depends on AMD_MEM_ENCRYPT
++	---help---
++	  Say yes to have system memory encrypted by default if running on
++	  an AMD processor that supports Secure Memory Encryption (SME).
++
++	  If set to Y, then the encryption of system memory can be
++	  deactivated with the mem_encrypt=off command line option.
++
++	  If set to N, then the encryption of system memory can be
++	  activated with the mem_encrypt=on command line option.
++
++# Common NUMA Features
++config NUMA
++	bool "Numa Memory Allocation and Scheduler Support"
++	depends on SMP
++	depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
++	default y if X86_BIGSMP
++	---help---
++	  Enable NUMA (Non Uniform Memory Access) support.
++
++	  The kernel will try to allocate memory used by a CPU on the
++	  local memory controller of the CPU and add some more
++	  NUMA awareness to the kernel.
++
++	  For 64-bit this is recommended if the system is Intel Core i7
++	  (or later), AMD Opteron, or EM64T NUMA.
++
++	  For 32-bit this is only needed if you boot a 32-bit
++	  kernel on a 64-bit NUMA platform.
++
++	  Otherwise, you should say N.
++
++config NUMA_AWARE_SPINLOCKS
++	bool "Numa-aware spinlocks"
++	depends on NUMA
++	depends on QUEUED_SPINLOCKS
++	# For now, we depend on PARAVIRT_SPINLOCKS to make the patching work.
++	# This is awkward, but hopefully would be resolved once static_call()
++	# is available.
++	depends on PARAVIRT_SPINLOCKS
++	default y
++	help
++	  Introduce NUMA (Non Uniform Memory Access) awareness into
++	  the slow path of spinlocks.
++
++	  In this variant of qspinlock, the kernel will try to keep the lock
++	  on the same node, thus reducing the number of remote cache misses,
++	  while trading some of the short term fairness for better performance.
++
++	  Say N if you want absolute first come first serve fairness.
++
++config AMD_NUMA
++	def_bool y
++	prompt "Old style AMD Opteron NUMA detection"
++	depends on X86_64 && NUMA && PCI
++	---help---
++	  Enable AMD NUMA node topology detection.  You should say Y here if
++	  you have a multi processor AMD system. This uses an old method to
++	  read the NUMA configuration directly from the builtin Northbridge
++	  of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
++	  which also takes priority if both are compiled in.
++
++config X86_64_ACPI_NUMA
++	def_bool y
++	prompt "ACPI NUMA detection"
++	depends on X86_64 && NUMA && ACPI && PCI
++	select ACPI_NUMA
++	---help---
++	  Enable ACPI SRAT based node topology detection.
++
++# Some NUMA nodes have memory ranges that span
++# other nodes.  Even though a pfn is valid and
++# between a node's start and end pfns, it may not
++# reside on that node.  See memmap_init_zone()
++# for details.
++config NODES_SPAN_OTHER_NODES
++	def_bool y
++	depends on X86_64_ACPI_NUMA
++
++config NUMA_EMU
++	bool "NUMA emulation"
++	depends on NUMA
++	---help---
++	  Enable NUMA emulation. A flat machine will be split
++	  into virtual nodes when booted with "numa=fake=N", where N is the
++	  number of nodes. This is only useful for debugging.
++
++config NODES_SHIFT
++	int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
++	range 1 10
++	default "10" if MAXSMP
++	default "6" if X86_64
++	default "3"
++	depends on NEED_MULTIPLE_NODES
++	---help---
++	  Specify the maximum number of NUMA Nodes available on the target
++	  system.  Increases memory reserved to accommodate various tables.
++
++config ARCH_HAVE_MEMORY_PRESENT
++	def_bool y
++	depends on X86_32 && DISCONTIGMEM
++
++config ARCH_FLATMEM_ENABLE
++	def_bool y
++	depends on X86_32 && !NUMA
++
++config ARCH_DISCONTIGMEM_ENABLE
++	def_bool y
++	depends on NUMA && X86_32
++
++config ARCH_DISCONTIGMEM_DEFAULT
++	def_bool y
++	depends on NUMA && X86_32
++
++config ARCH_SPARSEMEM_ENABLE
++	def_bool y
++	depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD
++	select SPARSEMEM_STATIC if X86_32
++	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
++
++config ARCH_SPARSEMEM_DEFAULT
++	def_bool y
++	depends on X86_64
++
++config ARCH_SELECT_MEMORY_MODEL
++	def_bool y
++	depends on ARCH_SPARSEMEM_ENABLE
++
++config ARCH_MEMORY_PROBE
++	bool "Enable sysfs memory/probe interface"
++	depends on X86_64 && MEMORY_HOTPLUG
++	help
++	  This option enables a sysfs memory/probe interface for testing.
++	  See Documentation/memory-hotplug.txt for more information.
++	  If you are unsure how to answer this question, answer N.
++
++config ARCH_PROC_KCORE_TEXT
++	def_bool y
++	depends on X86_64 && PROC_KCORE
++
++config ILLEGAL_POINTER_VALUE
++       hex
++       default 0 if X86_32
++       default 0xdead000000000000 if X86_64
++
++config X86_PMEM_LEGACY_DEVICE
++	bool
++
++config X86_PMEM_LEGACY
++	tristate "Support non-standard NVDIMMs and ADR protected memory"
++	depends on PHYS_ADDR_T_64BIT
++	depends on BLK_DEV
++	select X86_PMEM_LEGACY_DEVICE
++	select LIBNVDIMM
++	help
++	  Treat memory marked using the non-standard e820 type of 12 as used
++	  by the Intel Sandy Bridge-EP reference BIOS as protected memory.
++	  The kernel will offer these regions to the 'pmem' driver so
++	  they can be used for persistent storage.
++
++	  Say Y if unsure.
++
++config HIGHPTE
++	bool "Allocate 3rd-level pagetables from highmem"
++	depends on HIGHMEM
++	---help---
++	  The VM uses one page table entry for each page of physical memory.
++	  For systems with a lot of RAM, this can be wasteful of precious
++	  low memory.  Setting this option will put user-space page table
++	  entries in high memory.
++
++config X86_CHECK_BIOS_CORRUPTION
++	bool "Check for low memory corruption"
++	---help---
++	  Periodically check for memory corruption in low memory, which
++	  is suspected to be caused by BIOS.  Even when enabled in the
++	  configuration, it is disabled at runtime.  Enable it by
++	  setting "memory_corruption_check=1" on the kernel command
++	  line.  By default it scans the low 64k of memory every 60
++	  seconds; see the memory_corruption_check_size and
++	  memory_corruption_check_period parameters in
++	  Documentation/admin-guide/kernel-parameters.rst to adjust this.
++
++	  When enabled with the default parameters, this option has
++	  almost no overhead, as it reserves a relatively small amount
++	  of memory and scans it infrequently.  It both detects corruption
++	  and prevents it from affecting the running system.
++
++	  It is, however, intended as a diagnostic tool; if repeatable
++	  BIOS-originated corruption always affects the same memory,
++	  you can use memmap= to prevent the kernel from using that
++	  memory.
++
++config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
++	bool "Set the default setting of memory_corruption_check"
++	depends on X86_CHECK_BIOS_CORRUPTION
++	default y
++	---help---
++	  Set whether the default state of memory_corruption_check is
++	  on or off.
++
++config X86_RESERVE_LOW
++	int "Amount of low memory, in kilobytes, to reserve for the BIOS"
++	default 64
++	range 4 640
++	---help---
++	  Specify the amount of low memory to reserve for the BIOS.
++
++	  The first page contains BIOS data structures that the kernel
++	  must not use, so that page must always be reserved.
++
++	  By default we reserve the first 64K of physical RAM, as a
++	  number of BIOSes are known to corrupt that memory range
++	  during events such as suspend/resume or monitor cable
++	  insertion, so it must not be used by the kernel.
++
++	  You can set this to 4 if you are absolutely sure that you
++	  trust the BIOS to get all its memory reservations and usages
++	  right.  If you know your BIOS have problems beyond the
++	  default 64K area, you can set this to 640 to avoid using the
++	  entire low memory range.
++
++	  If you have doubts about the BIOS (e.g. suspend/resume does
++	  not work or there's kernel crashes after certain hardware
++	  hotplug events) then you might want to enable
++	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
++	  typical corruption patterns.
++
++	  Leave this to the default value of 64 if you are unsure.
++
++config MATH_EMULATION
++	bool
++	depends on MODIFY_LDT_SYSCALL
++	prompt "Math emulation" if X86_32
++	---help---
++	  Linux can emulate a math coprocessor (used for floating point
++	  operations) if you don't have one. 486DX and Pentium processors have
++	  a math coprocessor built in, 486SX and 386 do not, unless you added
++	  a 487DX or 387, respectively. (The messages during boot time can
++	  give you some hints here ["man dmesg"].) Everyone needs either a
++	  coprocessor or this emulation.
++
++	  If you don't have a math coprocessor, you need to say Y here; if you
++	  say Y here even though you have a coprocessor, the coprocessor will
++	  be used nevertheless. (This behavior can be changed with the kernel
++	  command line option "no387", which comes handy if your coprocessor
++	  is broken. Try "man bootparam" or see the documentation of your boot
++	  loader (lilo or loadlin) about how to pass options to the kernel at
++	  boot time.) This means that it is a good idea to say Y here if you
++	  intend to use this kernel on different machines.
++
++	  More information about the internals of the Linux math coprocessor
++	  emulation can be found in <file:arch/x86/math-emu/README>.
++
++	  If you are not sure, say Y; apart from resulting in a 66 KB bigger
++	  kernel, it won't hurt.
++
++config MTRR
++	def_bool y
++	prompt "MTRR (Memory Type Range Register) support" if EXPERT
++	---help---
++	  On Intel P6 family processors (Pentium Pro, Pentium II and later)
++	  the Memory Type Range Registers (MTRRs) may be used to control
++	  processor access to memory ranges. This is most useful if you have
++	  a video (VGA) card on a PCI or AGP bus. Enabling write-combining
++	  allows bus write transfers to be combined into a larger transfer
++	  before bursting over the PCI/AGP bus. This can increase performance
++	  of image write operations 2.5 times or more. Saying Y here creates a
++	  /proc/mtrr file which may be used to manipulate your processor's
++	  MTRRs. Typically the X server should use this.
++
++	  This code has a reasonably generic interface so that similar
++	  control registers on other processors can be easily supported
++	  as well:
++
++	  The Cyrix 6x86, 6x86MX and M II processors have Address Range
++	  Registers (ARRs) which provide a similar functionality to MTRRs. For
++	  these, the ARRs are used to emulate the MTRRs.
++	  The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
++	  MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
++	  write-combining. All of these processors are supported by this code
++	  and it makes sense to say Y here if you have one of them.
++
++	  Saying Y here also fixes a problem with buggy SMP BIOSes which only
++	  set the MTRRs for the boot CPU and not for the secondary CPUs. This
++	  can lead to all sorts of problems, so it's good to say Y here.
++
++	  You can safely say Y even if your machine doesn't have MTRRs, you'll
++	  just add about 9 KB to your kernel.
++
++	  See <file:Documentation/x86/mtrr.txt> for more information.
++
++config MTRR_SANITIZER
++	def_bool y
++	prompt "MTRR cleanup support"
++	depends on MTRR
++	---help---
++	  Convert MTRR layout from continuous to discrete, so X drivers can
++	  add writeback entries.
++
++	  Can be disabled with disable_mtrr_cleanup on the kernel command line.
++	  The largest mtrr entry size for a continuous block can be set with
++	  mtrr_chunk_size.
++
++	  If unsure, say Y.
++
++config MTRR_SANITIZER_ENABLE_DEFAULT
++	int "MTRR cleanup enable value (0-1)"
++	range 0 1
++	default "0"
++	depends on MTRR_SANITIZER
++	---help---
++	  Enable mtrr cleanup default value
++
++config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
++	int "MTRR cleanup spare reg num (0-7)"
++	range 0 7
++	default "1"
++	depends on MTRR_SANITIZER
++	---help---
++	  mtrr cleanup spare entries default, it can be changed via
++	  mtrr_spare_reg_nr=N on the kernel command line.
++
++config X86_PAT
++	def_bool y
++	prompt "x86 PAT support" if EXPERT
++	depends on MTRR
++	---help---
++	  Use PAT attributes to setup page level cache control.
++
++	  PATs are the modern equivalents of MTRRs and are much more
++	  flexible than MTRRs.
++
++	  Say N here if you see bootup problems (boot crash, boot hang,
++	  spontaneous reboots) or a non-working video driver.
++
++	  If unsure, say Y.
++
++config ARCH_USES_PG_UNCACHED
++	def_bool y
++	depends on X86_PAT
++
++config ARCH_RANDOM
++	def_bool y
++	prompt "x86 architectural random number generator" if EXPERT
++	---help---
++	  Enable the x86 architectural RDRAND instruction
++	  (Intel Bull Mountain technology) to generate random numbers.
++	  If supported, this is a high bandwidth, cryptographically
++	  secure hardware random number generator.
++
++config X86_SMAP
++	def_bool y
++	prompt "Supervisor Mode Access Prevention" if EXPERT
++	---help---
++	  Supervisor Mode Access Prevention (SMAP) is a security
++	  feature in newer Intel processors.  There is a small
++	  performance cost if this enabled and turned on; there is
++	  also a small increase in the kernel size if this is enabled.
++
++	  If unsure, say Y.
++
++config X86_INTEL_UMIP
++	def_bool y
++	depends on CPU_SUP_INTEL
++	prompt "Intel User Mode Instruction Prevention" if EXPERT
++	---help---
++	  The User Mode Instruction Prevention (UMIP) is a security
++	  feature in newer Intel processors. If enabled, a general
++	  protection fault is issued if the SGDT, SLDT, SIDT, SMSW
++	  or STR instructions are executed in user mode. These instructions
++	  unnecessarily expose information about the hardware state.
++
++	  The vast majority of applications do not use these instructions.
++	  For the very few that do, software emulation is provided in
++	  specific cases in protected and virtual-8086 modes. Emulated
++	  results are dummy.
++
++config X86_INTEL_MPX
++	prompt "Intel MPX (Memory Protection Extensions)"
++	def_bool n
++	# Note: only available in 64-bit mode due to VMA flags shortage
++	depends on CPU_SUP_INTEL && X86_64
++	select ARCH_USES_HIGH_VMA_FLAGS
++	---help---
++	  MPX provides hardware features that can be used in
++	  conjunction with compiler-instrumented code to check
++	  memory references.  It is designed to detect buffer
++	  overflow or underflow bugs.
++
++	  This option enables running applications which are
++	  instrumented or otherwise use MPX.  It does not use MPX
++	  itself inside the kernel or to protect the kernel
++	  against bad memory references.
++
++	  Enabling this option will make the kernel larger:
++	  ~8k of kernel text and 36 bytes of data on a 64-bit
++	  defconfig.  It adds a long to the 'mm_struct' which
++	  will increase the kernel memory overhead of each
++	  process and adds some branches to paths used during
++	  exec() and munmap().
++
++	  For details, see Documentation/x86/intel_mpx.txt
++
++	  If unsure, say N.
++
++config X86_INTEL_MEMORY_PROTECTION_KEYS
++	prompt "Intel Memory Protection Keys"
++	def_bool y
++	# Note: only available in 64-bit mode
++	depends on CPU_SUP_INTEL && X86_64
++	select ARCH_USES_HIGH_VMA_FLAGS
++	select ARCH_HAS_PKEYS
++	---help---
++	  Memory Protection Keys provides a mechanism for enforcing
++	  page-based protections, but without requiring modification of the
++	  page tables when an application changes protection domains.
++
++	  For details, see Documentation/x86/protection-keys.txt
++
++	  If unsure, say y.
++
++choice
++	prompt "TSX enable mode"
++	depends on CPU_SUP_INTEL
++	default X86_INTEL_TSX_MODE_OFF
++	help
++	  Intel's TSX (Transactional Synchronization Extensions) feature
++	  allows to optimize locking protocols through lock elision which
++	  can lead to a noticeable performance boost.
++
++	  On the other hand it has been shown that TSX can be exploited
++	  to form side channel attacks (e.g. TAA) and chances are there
++	  will be more of those attacks discovered in the future.
++
++	  Therefore TSX is not enabled by default (aka tsx=off). An admin
++	  might override this decision by tsx=on the command line parameter.
++	  Even with TSX enabled, the kernel will attempt to enable the best
++	  possible TAA mitigation setting depending on the microcode available
++	  for the particular machine.
++
++	  This option allows to set the default tsx mode between tsx=on, =off
++	  and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
++	  details.
++
++	  Say off if not sure, auto if TSX is in use but it should be used on safe
++	  platforms or on if TSX is in use and the security aspect of tsx is not
++	  relevant.
++
++config X86_INTEL_TSX_MODE_OFF
++	bool "off"
++	help
++	  TSX is disabled if possible - equals to tsx=off command line parameter.
++
++config X86_INTEL_TSX_MODE_ON
++	bool "on"
++	help
++	  TSX is always enabled on TSX capable HW - equals the tsx=on command
++	  line parameter.
++
++config X86_INTEL_TSX_MODE_AUTO
++	bool "auto"
++	help
++	  TSX is enabled on TSX capable HW that is believed to be safe against
++	  side channel attacks- equals the tsx=auto command line parameter.
++endchoice
++
++config EFI
++	bool "EFI runtime service support"
++	depends on ACPI
++	select UCS2_STRING
++	select EFI_RUNTIME_WRAPPERS
++	---help---
++	  This enables the kernel to use EFI runtime services that are
++	  available (such as the EFI variable services).
++
++	  This option is only useful on systems that have EFI firmware.
++	  In addition, you should use the latest ELILO loader available
++	  at <http://elilo.sourceforge.net> in order to take advantage
++	  of EFI runtime services. However, even with this option, the
++	  resultant kernel should continue to boot on existing non-EFI
++	  platforms.
++
++config EFI_STUB
++       bool "EFI stub support"
++       depends on EFI && !X86_USE_3DNOW
++       select RELOCATABLE
++       ---help---
++          This kernel feature allows a bzImage to be loaded directly
++	  by EFI firmware without the use of a bootloader.
++
++	  See Documentation/efi-stub.txt for more information.
++
++config EFI_MIXED
++	bool "EFI mixed-mode support"
++	depends on EFI_STUB && X86_64
++	---help---
++	   Enabling this feature allows a 64-bit kernel to be booted
++	   on a 32-bit firmware, provided that your CPU supports 64-bit
++	   mode.
++
++	   Note that it is not possible to boot a mixed-mode enabled
++	   kernel via the EFI boot stub - a bootloader that supports
++	   the EFI handover protocol must be used.
++
++	   If unsure, say N.
++
++config SECCOMP
++	def_bool y
++	prompt "Enable seccomp to safely compute untrusted bytecode"
++	---help---
++	  This kernel feature is useful for number crunching applications
++	  that may need to compute untrusted bytecode during their
++	  execution. By using pipes or other transports made available to
++	  the process as file descriptors supporting the read/write
++	  syscalls, it's possible to isolate those applications in
++	  their own address space using seccomp. Once seccomp is
++	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
++	  and the task is only allowed to execute a few safe syscalls
++	  defined by each seccomp mode.
++
++	  If unsure, say Y. Only embedded should say N here.
++
++source kernel/Kconfig.hz
++
++config KEXEC
++	bool "kexec system call"
++	select KEXEC_CORE
++	---help---
++	  kexec is a system call that implements the ability to shutdown your
++	  current kernel, and to start another kernel.  It is like a reboot
++	  but it is independent of the system firmware.   And like a reboot
++	  you can start any kernel with it, not just Linux.
++
++	  The name comes from the similarity to the exec system call.
++
++	  It is an ongoing process to be certain the hardware in a machine
++	  is properly shutdown, so do not be surprised if this code does not
++	  initially work for you.  As of this writing the exact hardware
++	  interface is strongly in flux, so no good recommendation can be
++	  made.
++
++config KEXEC_FILE
++	bool "kexec file based system call"
++	select KEXEC_CORE
++	select BUILD_BIN2C
++	depends on X86_64
++	depends on CRYPTO=y
++	depends on CRYPTO_SHA256=y
++	---help---
++	  This is new version of kexec system call. This system call is
++	  file based and takes file descriptors as system call argument
++	  for kernel and initramfs as opposed to list of segments as
++	  accepted by previous system call.
++
++config ARCH_HAS_KEXEC_PURGATORY
++	def_bool KEXEC_FILE
++
++config KEXEC_VERIFY_SIG
++	bool "Verify kernel signature during kexec_file_load() syscall"
++	depends on KEXEC_FILE
++	---help---
++	  This option makes kernel signature verification mandatory for
++	  the kexec_file_load() syscall.
++
++	  In addition to that option, you need to enable signature
++	  verification for the corresponding kernel image type being
++	  loaded in order for this to work.
++
++config KEXEC_BZIMAGE_VERIFY_SIG
++	bool "Enable bzImage signature verification support"
++	depends on KEXEC_VERIFY_SIG
++	depends on SIGNED_PE_FILE_VERIFICATION
++	select SYSTEM_TRUSTED_KEYRING
++	---help---
++	  Enable bzImage signature verification support.
++
++config CRASH_DUMP
++	bool "kernel crash dumps"
++	depends on X86_64 || (X86_32 && HIGHMEM)
++	---help---
++	  Generate crash dump after being started by kexec.
++	  This should be normally only set in special crash dump kernels
++	  which are loaded in the main kernel with kexec-tools into
++	  a specially reserved region and then later executed after
++	  a crash by kdump/kexec. The crash dump kernel must be compiled
++	  to a memory address not used by the main kernel or BIOS using
++	  PHYSICAL_START, or it must be built as a relocatable image
++	  (CONFIG_RELOCATABLE=y).
++	  For more details see Documentation/kdump/kdump.txt
++
++config KEXEC_JUMP
++	bool "kexec jump"
++	depends on KEXEC && HIBERNATION
++	---help---
++	  Jump between original kernel and kexeced kernel and invoke
++	  code in physical address mode via KEXEC
++
++config PHYSICAL_START
++	hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
++	default "0x1000000"
++	---help---
++	  This gives the physical address where the kernel is loaded.
++
++	  If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
++	  bzImage will decompress itself to above physical address and
++	  run from there. Otherwise, bzImage will run from the address where
++	  it has been loaded by the boot loader and will ignore above physical
++	  address.
++
++	  In normal kdump cases one does not have to set/change this option
++	  as now bzImage can be compiled as a completely relocatable image
++	  (CONFIG_RELOCATABLE=y) and be used to load and run from a different
++	  address. This option is mainly useful for the folks who don't want
++	  to use a bzImage for capturing the crash dump and want to use a
++	  vmlinux instead. vmlinux is not relocatable hence a kernel needs
++	  to be specifically compiled to run from a specific memory area
++	  (normally a reserved region) and this option comes handy.
++
++	  So if you are using bzImage for capturing the crash dump,
++	  leave the value here unchanged to 0x1000000 and set
++	  CONFIG_RELOCATABLE=y.  Otherwise if you plan to use vmlinux
++	  for capturing the crash dump change this value to start of
++	  the reserved region.  In other words, it can be set based on
++	  the "X" value as specified in the "crashkernel=YM@XM"
++	  command line boot parameter passed to the panic-ed
++	  kernel. Please take a look at Documentation/kdump/kdump.txt
++	  for more details about crash dumps.
++
++	  Usage of bzImage for capturing the crash dump is recommended as
++	  one does not have to build two kernels. Same kernel can be used
++	  as production kernel and capture kernel. Above option should have
++	  gone away after relocatable bzImage support is introduced. But it
++	  is present because there are users out there who continue to use
++	  vmlinux for dump capture. This option should go away down the
++	  line.
++
++	  Don't change this unless you know what you are doing.
++
++config RELOCATABLE
++	bool "Build a relocatable kernel"
++	default y
++	---help---
++	  This builds a kernel image that retains relocation information
++	  so it can be loaded someplace besides the default 1MB.
++	  The relocations tend to make the kernel binary about 10% larger,
++	  but are discarded at runtime.
++
++	  One use is for the kexec on panic case where the recovery kernel
++	  must live at a different physical address than the primary
++	  kernel.
++
++	  Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
++	  it has been loaded at and the compile time physical address
++	  (CONFIG_PHYSICAL_START) is used as the minimum location.
++
++config RANDOMIZE_BASE
++	bool "Randomize the address of the kernel image (KASLR)"
++	depends on RELOCATABLE
++	default y
++	---help---
++	  In support of Kernel Address Space Layout Randomization (KASLR),
++	  this randomizes the physical address at which the kernel image
++	  is decompressed and the virtual address where the kernel
++	  image is mapped, as a security feature that deters exploit
++	  attempts relying on knowledge of the location of kernel
++	  code internals.
++
++	  On 64-bit, the kernel physical and virtual addresses are
++	  randomized separately. The physical address will be anywhere
++	  between 16MB and the top of physical memory (up to 64TB). The
++	  virtual address will be randomized from 16MB up to 1GB (9 bits
++	  of entropy). Note that this also reduces the memory space
++	  available to kernel modules from 1.5GB to 1GB.
++
++	  On 32-bit, the kernel physical and virtual addresses are
++	  randomized together. They will be randomized from 16MB up to
++	  512MB (8 bits of entropy).
++
++	  Entropy is generated using the RDRAND instruction if it is
++	  supported. If RDTSC is supported, its value is mixed into
++	  the entropy pool as well. If neither RDRAND nor RDTSC are
++	  supported, then entropy is read from the i8254 timer. The
++	  usable entropy is limited by the kernel being built using
++	  2GB addressing, and that PHYSICAL_ALIGN must be at a
++	  minimum of 2MB. As a result, only 10 bits of entropy are
++	  theoretically possible, but the implementations are further
++	  limited due to memory layouts.
++
++	  If unsure, say Y.
++
++# Relocation on x86 needs some additional build support
++config X86_NEED_RELOCS
++	def_bool y
++	depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
++
++config PHYSICAL_ALIGN
++	hex "Alignment value to which kernel should be aligned"
++	default "0x200000"
++	range 0x2000 0x1000000 if X86_32
++	range 0x200000 0x1000000 if X86_64
++	---help---
++	  This value puts the alignment restrictions on physical address
++	  where kernel is loaded and run from. Kernel is compiled for an
++	  address which meets above alignment restriction.
++
++	  If bootloader loads the kernel at a non-aligned address and
++	  CONFIG_RELOCATABLE is set, kernel will move itself to nearest
++	  address aligned to above value and run from there.
++
++	  If bootloader loads the kernel at a non-aligned address and
++	  CONFIG_RELOCATABLE is not set, kernel will ignore the run time
++	  load address and decompress itself to the address it has been
++	  compiled for and run from there. The address for which kernel is
++	  compiled already meets above alignment restrictions. Hence the
++	  end result is that kernel runs from a physical address meeting
++	  above alignment restrictions.
++
++	  On 32-bit this value must be a multiple of 0x2000. On 64-bit
++	  this value must be a multiple of 0x200000.
++
++	  Don't change this unless you know what you are doing.
++
++config DYNAMIC_MEMORY_LAYOUT
++	bool
++	---help---
++	  This option makes base addresses of vmalloc and vmemmap as well as
++	  __PAGE_OFFSET movable during boot.
++
++config RANDOMIZE_MEMORY
++	bool "Randomize the kernel memory sections"
++	depends on X86_64
++	depends on RANDOMIZE_BASE
++	select DYNAMIC_MEMORY_LAYOUT
++	default RANDOMIZE_BASE
++	---help---
++	   Randomizes the base virtual address of kernel memory sections
++	   (physical memory mapping, vmalloc & vmemmap). This security feature
++	   makes exploits relying on predictable memory locations less reliable.
++
++	   The order of allocations remains unchanged. Entropy is generated in
++	   the same way as RANDOMIZE_BASE. Current implementation in the optimal
++	   configuration have in average 30,000 different possible virtual
++	   addresses for each memory section.
++
++	   If unsure, say Y.
++
++config RANDOMIZE_MEMORY_PHYSICAL_PADDING
++	hex "Physical memory mapping padding" if EXPERT
++	depends on RANDOMIZE_MEMORY
++	default "0xa" if MEMORY_HOTPLUG
++	default "0x0"
++	range 0x1 0x40 if MEMORY_HOTPLUG
++	range 0x0 0x40
++	---help---
++	   Define the padding in terabytes added to the existing physical
++	   memory size during kernel memory randomization. It is useful
++	   for memory hotplug support but reduces the entropy available for
++	   address randomization.
++
++	   If unsure, leave at the default value.
++
++config HOTPLUG_CPU
++	def_bool y
++	depends on SMP
++
++config BOOTPARAM_HOTPLUG_CPU0
++	bool "Set default setting of cpu0_hotpluggable"
++	default n
++	depends on HOTPLUG_CPU
++	---help---
++	  Set whether default state of cpu0_hotpluggable is on or off.
++
++	  Say Y here to enable CPU0 hotplug by default. If this switch
++	  is turned on, there is no need to give cpu0_hotplug kernel
++	  parameter and the CPU0 hotplug feature is enabled by default.
++
++	  Please note: there are two known CPU0 dependencies if you want
++	  to enable the CPU0 hotplug feature either by this switch or by
++	  cpu0_hotplug kernel parameter.
++
++	  First, resume from hibernate or suspend always starts from CPU0.
++	  So hibernate and suspend are prevented if CPU0 is offline.
++
++	  Second dependency is PIC interrupts always go to CPU0. CPU0 can not
++	  offline if any interrupt can not migrate out of CPU0. There may
++	  be other CPU0 dependencies.
++
++	  Please make sure the dependencies are under your control before
++	  you enable this feature.
++
++	  Say N if you don't want to enable CPU0 hotplug feature by default.
++	  You still can enable the CPU0 hotplug feature at boot by kernel
++	  parameter cpu0_hotplug.
++
++config DEBUG_HOTPLUG_CPU0
++	def_bool n
++	prompt "Debug CPU0 hotplug"
++	depends on HOTPLUG_CPU
++	---help---
++	  Enabling this option offlines CPU0 (if CPU0 can be offlined) as
++	  soon as possible and boots up userspace with CPU0 offlined. User
++	  can online CPU0 back after boot time.
++
++	  To debug CPU0 hotplug, you need to enable CPU0 offline/online
++	  feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during
++	  compilation or giving cpu0_hotplug kernel parameter at boot.
++
++	  If unsure, say N.
++
++config COMPAT_VDSO
++	def_bool n
++	prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
++	depends on COMPAT_32
++	---help---
++	  Certain buggy versions of glibc will crash if they are
++	  presented with a 32-bit vDSO that is not mapped at the address
++	  indicated in its segment table.
++
++	  The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
++	  and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
++	  49ad572a70b8aeb91e57483a11dd1b77e31c4468.  Glibc 2.3.3 is
++	  the only released version with the bug, but OpenSUSE 9
++	  contains a buggy "glibc 2.3.2".
++
++	  The symptom of the bug is that everything crashes on startup, saying:
++	  dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
++
++	  Saying Y here changes the default value of the vdso32 boot
++	  option from 1 to 0, which turns off the 32-bit vDSO entirely.
++	  This works around the glibc bug but hurts performance.
++
++	  If unsure, say N: if you are compiling your own kernel, you
++	  are unlikely to be using a buggy version of glibc.
++
++choice
++	prompt "vsyscall table for legacy applications"
++	depends on X86_64
++	default LEGACY_VSYSCALL_EMULATE
++	help
++	  Legacy user code that does not know how to find the vDSO expects
++	  to be able to issue three syscalls by calling fixed addresses in
++	  kernel space. Since this location is not randomized with ASLR,
++	  it can be used to assist security vulnerability exploitation.
++
++	  This setting can be changed at boot time via the kernel command
++	  line parameter vsyscall=[emulate|none].
++
++	  On a system with recent enough glibc (2.14 or newer) and no
++	  static binaries, you can say None without a performance penalty
++	  to improve security.
++
++	  If unsure, select "Emulate".
++
++	config LEGACY_VSYSCALL_EMULATE
++		bool "Emulate"
++		help
++		  The kernel traps and emulates calls into the fixed
++		  vsyscall address mapping. This makes the mapping
++		  non-executable, but it still contains known contents,
++		  which could be used in certain rare security vulnerability
++		  exploits. This configuration is recommended when userspace
++		  still uses the vsyscall area.
++
++	config LEGACY_VSYSCALL_NONE
++		bool "None"
++		help
++		  There will be no vsyscall mapping at all. This will
++		  eliminate any risk of ASLR bypass due to the vsyscall
++		  fixed address mapping. Attempts to use the vsyscalls
++		  will be reported to dmesg, so that either old or
++		  malicious userspace programs can be identified.
++
++endchoice
++
++config CMDLINE_BOOL
++	bool "Built-in kernel command line"
++	---help---
++	  Allow for specifying boot arguments to the kernel at
++	  build time.  On some systems (e.g. embedded ones), it is
++	  necessary or convenient to provide some or all of the
++	  kernel boot arguments with the kernel itself (that is,
++	  to not rely on the boot loader to provide them.)
++
++	  To compile command line arguments into the kernel,
++	  set this option to 'Y', then fill in the
++	  boot arguments in CONFIG_CMDLINE.
++
++	  Systems with fully functional boot loaders (i.e. non-embedded)
++	  should leave this option set to 'N'.
++
++config CMDLINE
++	string "Built-in kernel command string"
++	depends on CMDLINE_BOOL
++	default ""
++	---help---
++	  Enter arguments here that should be compiled into the kernel
++	  image and used at boot time.  If the boot loader provides a
++	  command line at boot time, it is appended to this string to
++	  form the full kernel command line, when the system boots.
++
++	  However, you can use the CONFIG_CMDLINE_OVERRIDE option to
++	  change this behavior.
++
++	  In most cases, the command line (whether built-in or provided
++	  by the boot loader) should specify the device for the root
++	  file system.
++
++config CMDLINE_OVERRIDE
++	bool "Built-in command line overrides boot loader arguments"
++	depends on CMDLINE_BOOL
++	---help---
++	  Set this option to 'Y' to have the kernel ignore the boot loader
++	  command line, and use ONLY the built-in command line.
++
++	  This is used to work around broken boot loaders.  This should
++	  be set to 'N' under normal conditions.
++
++config MODIFY_LDT_SYSCALL
++	bool "Enable the LDT (local descriptor table)" if EXPERT
++	default y
++	---help---
++	  Linux can allow user programs to install a per-process x86
++	  Local Descriptor Table (LDT) using the modify_ldt(2) system
++	  call.  This is required to run 16-bit or segmented code such as
++	  DOSEMU or some Wine programs.  It is also used by some very old
++	  threading libraries.
++
++	  Enabling this feature adds a small amount of overhead to
++	  context switches and increases the low-level kernel attack
++	  surface.  Disabling it removes the modify_ldt(2) system call.
++
++	  Saying 'N' here may make sense for embedded or server kernels.
++
++source "kernel/livepatch/Kconfig"
++
++endmenu
++
++config ARCH_HAS_ADD_PAGES
++	def_bool y
++	depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
++
++config ARCH_ENABLE_MEMORY_HOTPLUG
++	def_bool y
++	depends on X86_64 || (X86_32 && HIGHMEM)
++
++config ARCH_ENABLE_MEMORY_HOTREMOVE
++	def_bool y
++	depends on MEMORY_HOTPLUG
++
++config USE_PERCPU_NUMA_NODE_ID
++	def_bool y
++	depends on NUMA
++
++config ARCH_ENABLE_SPLIT_PMD_PTLOCK
++	def_bool y
++	depends on X86_64 || X86_PAE
++
++config ARCH_ENABLE_HUGEPAGE_MIGRATION
++	def_bool y
++	depends on X86_64 && HUGETLB_PAGE && MIGRATION
++
++config ARCH_ENABLE_THP_MIGRATION
++	def_bool y
++	depends on X86_64 && TRANSPARENT_HUGEPAGE
++
++menu "Power management and ACPI options"
++
++config ARCH_HIBERNATION_HEADER
++	def_bool y
++	depends on X86_64 && HIBERNATION
++
++source "kernel/power/Kconfig"
++
++source "drivers/acpi/Kconfig"
++
++source "drivers/sfi/Kconfig"
++
++config X86_APM_BOOT
++	def_bool y
++	depends on APM
++
++menuconfig APM
++	tristate "APM (Advanced Power Management) BIOS support"
++	depends on X86_32 && PM_SLEEP
++	---help---
++	  APM is a BIOS specification for saving power using several different
++	  techniques. This is mostly useful for battery powered laptops with
++	  APM compliant BIOSes. If you say Y here, the system time will be
++	  reset after a RESUME operation, the /proc/apm device will provide
++	  battery status information, and user-space programs will receive
++	  notification of APM "events" (e.g. battery status change).
++
++	  If you select "Y" here, you can disable actual use of the APM
++	  BIOS by passing the "apm=off" option to the kernel at boot time.
++
++	  Note that the APM support is almost completely disabled for
++	  machines with more than one CPU.
++
++	  In order to use APM, you will need supporting software. For location
++	  and more information, read <file:Documentation/power/apm-acpi.txt>
++	  and the Battery Powered Linux mini-HOWTO, available from
++	  <http://www.tldp.org/docs.html#howto>.
++
++	  This driver does not spin down disk drives (see the hdparm(8)
++	  manpage ("man 8 hdparm") for that), and it doesn't turn off
++	  VESA-compliant "green" monitors.
++
++	  This driver does not support the TI 4000M TravelMate and the ACER
++	  486/DX4/75 because they don't have compliant BIOSes. Many "green"
++	  desktop machines also don't have compliant BIOSes, and this driver
++	  may cause those machines to panic during the boot phase.
++
++	  Generally, if you don't have a battery in your machine, there isn't
++	  much point in using this driver and you should say N. If you get
++	  random kernel OOPSes or reboots that don't seem to be related to
++	  anything, try disabling/enabling this option (or disabling/enabling
++	  APM in your BIOS).
++
++	  Some other things you should try when experiencing seemingly random,
++	  "weird" problems:
++
++	  1) make sure that you have enough swap space and that it is
++	  enabled.
++	  2) pass the "no-hlt" option to the kernel
++	  3) switch on floating point emulation in the kernel and pass
++	  the "no387" option to the kernel
++	  4) pass the "floppy=nodma" option to the kernel
++	  5) pass the "mem=4M" option to the kernel (thereby disabling
++	  all but the first 4 MB of RAM)
++	  6) make sure that the CPU is not over clocked.
++	  7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
++	  8) disable the cache from your BIOS settings
++	  9) install a fan for the video card or exchange video RAM
++	  10) install a better fan for the CPU
++	  11) exchange RAM chips
++	  12) exchange the motherboard.
++
++	  To compile this driver as a module, choose M here: the
++	  module will be called apm.
++
++if APM
++
++config APM_IGNORE_USER_SUSPEND
++	bool "Ignore USER SUSPEND"
++	---help---
++	  This option will ignore USER SUSPEND requests. On machines with a
++	  compliant APM BIOS, you want to say N. However, on the NEC Versa M
++	  series notebooks, it is necessary to say Y because of a BIOS bug.
++
++config APM_DO_ENABLE
++	bool "Enable PM at boot time"
++	---help---
++	  Enable APM features at boot time. From page 36 of the APM BIOS
++	  specification: "When disabled, the APM BIOS does not automatically
++	  power manage devices, enter the Standby State, enter the Suspend
++	  State, or take power saving steps in response to CPU Idle calls."
++	  This driver will make CPU Idle calls when Linux is idle (unless this
++	  feature is turned off -- see "Do CPU IDLE calls", below). This
++	  should always save battery power, but more complicated APM features
++	  will be dependent on your BIOS implementation. You may need to turn
++	  this option off if your computer hangs at boot time when using APM
++	  support, or if it beeps continuously instead of suspending. Turn
++	  this off if you have a NEC UltraLite Versa 33/C or a Toshiba
++	  T400CDT. This is off by default since most machines do fine without
++	  this feature.
++
++config APM_CPU_IDLE
++	depends on CPU_IDLE
++	bool "Make CPU Idle calls when idle"
++	---help---
++	  Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
++	  On some machines, this can activate improved power savings, such as
++	  a slowed CPU clock rate, when the machine is idle. These idle calls
++	  are made after the idle loop has run for some length of time (e.g.,
++	  333 mS). On some machines, this will cause a hang at boot time or
++	  whenever the CPU becomes idle. (On machines with more than one CPU,
++	  this option does nothing.)
++
++config APM_DISPLAY_BLANK
++	bool "Enable console blanking using APM"
++	---help---
++	  Enable console blanking using the APM. Some laptops can use this to
++	  turn off the LCD backlight when the screen blanker of the Linux
++	  virtual console blanks the screen. Note that this is only used by
++	  the virtual console screen blanker, and won't turn off the backlight
++	  when using the X Window system. This also doesn't have anything to
++	  do with your VESA-compliant power-saving monitor. Further, this
++	  option doesn't work for all laptops -- it might not turn off your
++	  backlight at all, or it might print a lot of errors to the console,
++	  especially if you are using gpm.
++
++config APM_ALLOW_INTS
++	bool "Allow interrupts during APM BIOS calls"
++	---help---
++	  Normally we disable external interrupts while we are making calls to
++	  the APM BIOS as a measure to lessen the effects of a badly behaving
++	  BIOS implementation.  The BIOS should reenable interrupts if it
++	  needs to.  Unfortunately, some BIOSes do not -- especially those in
++	  many of the newer IBM Thinkpads.  If you experience hangs when you
++	  suspend, try setting this to Y.  Otherwise, say N.
++
++endif # APM
++
++source "drivers/cpufreq/Kconfig"
++
++source "drivers/cpuidle/Kconfig"
++
++source "drivers/idle/Kconfig"
++
++endmenu
++
++
++menu "Bus options (PCI etc.)"
++
++config PCI
++	bool "PCI support"
++	default y
++	---help---
++	  Find out whether you have a PCI motherboard. PCI is the name of a
++	  bus system, i.e. the way the CPU talks to the other stuff inside
++	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
++	  VESA. If you have PCI, say Y, otherwise N.
++
++choice
++	prompt "PCI access mode"
++	depends on X86_32 && PCI
++	default PCI_GOANY
++	---help---
++	  On PCI systems, the BIOS can be used to detect the PCI devices and
++	  determine their configuration. However, some old PCI motherboards
++	  have BIOS bugs and may crash if this is done. Also, some embedded
++	  PCI-based systems don't have any BIOS at all. Linux can also try to
++	  detect the PCI hardware directly without using the BIOS.
++
++	  With this option, you can specify how Linux should detect the
++	  PCI devices. If you choose "BIOS", the BIOS will be used,
++	  if you choose "Direct", the BIOS won't be used, and if you
++	  choose "MMConfig", then PCI Express MMCONFIG will be used.
++	  If you choose "Any", the kernel will try MMCONFIG, then the
++	  direct access method and falls back to the BIOS if that doesn't
++	  work. If unsure, go with the default, which is "Any".
++
++config PCI_GOBIOS
++	bool "BIOS"
++
++config PCI_GOMMCONFIG
++	bool "MMConfig"
++
++config PCI_GODIRECT
++	bool "Direct"
++
++config PCI_GOOLPC
++	bool "OLPC XO-1"
++	depends on OLPC
++
++config PCI_GOANY
++	bool "Any"
++
++endchoice
++
++config PCI_BIOS
++	def_bool y
++	depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY)
++
++# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
++config PCI_DIRECT
++	def_bool y
++	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG))
++
++config PCI_MMCONFIG
++	bool "Support mmconfig PCI config space access" if X86_64
++	default y
++	depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST)
++	depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG)
++
++config PCI_OLPC
++	def_bool y
++	depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY)
++
++config PCI_XEN
++	def_bool y
++	depends on PCI && XEN
++	select SWIOTLB_XEN
++
++config PCI_DOMAINS
++	def_bool y
++	depends on PCI
++
++config MMCONF_FAM10H
++	def_bool y
++	depends on X86_64 && PCI_MMCONFIG && ACPI
++
++config PCI_CNB20LE_QUIRK
++	bool "Read CNB20LE Host Bridge Windows" if EXPERT
++	depends on PCI
++	help
++	  Read the PCI windows out of the CNB20LE host bridge. This allows
++	  PCI hotplug to work on systems with the CNB20LE chipset which do
++	  not have ACPI.
++
++	  There's no public spec for this chipset, and this functionality
++	  is known to be incomplete.
++
++	  You should say N unless you know you need this.
++
++source "drivers/pci/Kconfig"
++
++config ISA_BUS
++	bool "ISA bus support on modern systems" if EXPERT
++	help
++	  Expose ISA bus device drivers and options available for selection and
++	  configuration. Enable this option if your target machine has an ISA
++	  bus. ISA is an older system, displaced by PCI and newer bus
++	  architectures -- if your target machine is modern, it probably does
++	  not have an ISA bus.
++
++	  If unsure, say N.
++
++# x86_64 have no ISA slots, but can have ISA-style DMA.
++config ISA_DMA_API
++	bool "ISA-style DMA support" if (X86_64 && EXPERT)
++	default y
++	help
++	  Enables ISA-style DMA support for devices requiring such controllers.
++	  If unsure, say Y.
++
++if X86_32
++
++config ISA
++	bool "ISA support"
++	---help---
++	  Find out whether you have ISA slots on your motherboard.  ISA is the
++	  name of a bus system, i.e. the way the CPU talks to the other stuff
++	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
++	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
++	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
++
++config EISA
++	bool "EISA support"
++	depends on ISA
++	---help---
++	  The Extended Industry Standard Architecture (EISA) bus was
++	  developed as an open alternative to the IBM MicroChannel bus.
++
++	  The EISA bus provided some of the features of the IBM MicroChannel
++	  bus while maintaining backward compatibility with cards made for
++	  the older ISA bus.  The EISA bus saw limited use between 1988 and
++	  1995 when it was made obsolete by the PCI bus.
++
++	  Say Y here if you are building a kernel for an EISA-based machine.
++
++	  Otherwise, say N.
++
++source "drivers/eisa/Kconfig"
++
++config SCx200
++	tristate "NatSemi SCx200 support"
++	---help---
++	  This provides basic support for National Semiconductor's
++	  (now AMD's) Geode processors.  The driver probes for the
++	  PCI-IDs of several on-chip devices, so its a good dependency
++	  for other scx200_* drivers.
++
++	  If compiled as a module, the driver is named scx200.
++
++config SCx200HR_TIMER
++	tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
++	depends on SCx200
++	default y
++	---help---
++	  This driver provides a clocksource built upon the on-chip
++	  27MHz high-resolution timer.  Its also a workaround for
++	  NSC Geode SC-1100's buggy TSC, which loses time when the
++	  processor goes idle (as is done by the scheduler).  The
++	  other workaround is idle=poll boot option.
++
++config OLPC
++	bool "One Laptop Per Child support"
++	depends on !X86_PAE
++	select GPIOLIB
++	select OF
++	select OF_PROMTREE
++	select IRQ_DOMAIN
++	---help---
++	  Add support for detecting the unique features of the OLPC
++	  XO hardware.
++
++config OLPC_XO1_PM
++	bool "OLPC XO-1 Power Management"
++	depends on OLPC && MFD_CS5535=y && PM_SLEEP
++	---help---
++	  Add support for poweroff and suspend of the OLPC XO-1 laptop.
++
++config OLPC_XO1_RTC
++	bool "OLPC XO-1 Real Time Clock"
++	depends on OLPC_XO1_PM && RTC_DRV_CMOS
++	---help---
++	  Add support for the XO-1 real time clock, which can be used as a
++	  programmable wakeup source.
++
++config OLPC_XO1_SCI
++	bool "OLPC XO-1 SCI extras"
++	depends on OLPC && OLPC_XO1_PM && GPIO_CS5535=y
++	depends on INPUT=y
++	select POWER_SUPPLY
++	---help---
++	  Add support for SCI-based features of the OLPC XO-1 laptop:
++	   - EC-driven system wakeups
++	   - Power button
++	   - Ebook switch
++	   - Lid switch
++	   - AC adapter status updates
++	   - Battery status updates
++
++config OLPC_XO15_SCI
++	bool "OLPC XO-1.5 SCI extras"
++	depends on OLPC && ACPI
++	select POWER_SUPPLY
++	---help---
++	  Add support for SCI-based features of the OLPC XO-1.5 laptop:
++	   - EC-driven system wakeups
++	   - AC adapter status updates
++	   - Battery status updates
++
++config ALIX
++	bool "PCEngines ALIX System Support (LED setup)"
++	select GPIOLIB
++	---help---
++	  This option enables system support for the PCEngines ALIX.
++	  At present this just sets up LEDs for GPIO control on
++	  ALIX2/3/6 boards.  However, other system specific setup should
++	  get added here.
++
++	  Note: You must still enable the drivers for GPIO and LED support
++	  (GPIO_CS5535 & LEDS_GPIO) to actually use the LEDs
++
++	  Note: You have to set alix.force=1 for boards with Award BIOS.
++
++config NET5501
++	bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
++	select GPIOLIB
++	---help---
++	  This option enables system support for the Soekris Engineering net5501.
++
++config GEOS
++	bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)"
++	select GPIOLIB
++	depends on DMI
++	---help---
++	  This option enables system support for the Traverse Technologies GEOS.
++
++config TS5500
++	bool "Technologic Systems TS-5500 platform support"
++	depends on MELAN
++	select CHECK_SIGNATURE
++	select NEW_LEDS
++	select LEDS_CLASS
++	---help---
++	  This option enables system support for the Technologic Systems TS-5500.
++
++endif # X86_32
++
++config AMD_NB
++	def_bool y
++	depends on CPU_SUP_AMD && PCI
++
++source "drivers/pcmcia/Kconfig"
++
++config RAPIDIO
++	tristate "RapidIO support"
++	depends on PCI
++	default n
++	help
++	  If enabled this option will include drivers and the core
++	  infrastructure code to support RapidIO interconnect devices.
++
++source "drivers/rapidio/Kconfig"
++
++config X86_SYSFB
++	bool "Mark VGA/VBE/EFI FB as generic system framebuffer"
++	help
++	  Firmwares often provide initial graphics framebuffers so the BIOS,
++	  bootloader or kernel can show basic video-output during boot for
++	  user-guidance and debugging. Historically, x86 used the VESA BIOS
++	  Extensions and EFI-framebuffers for this, which are mostly limited
++	  to x86.
++	  This option, if enabled, marks VGA/VBE/EFI framebuffers as generic
++	  framebuffers so the new generic system-framebuffer drivers can be
++	  used on x86. If the framebuffer is not compatible with the generic
++	  modes, it is advertised as fallback platform framebuffer so legacy
++	  drivers like efifb, vesafb and uvesafb can pick it up.
++	  If this option is not selected, all system framebuffers are always
++	  marked as fallback platform framebuffers as usual.
++
++	  Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will
++	  not be able to pick up generic system framebuffers if this option
++	  is selected. You are highly encouraged to enable simplefb as
++	  replacement if you select this option. simplefb can correctly deal
++	  with generic system framebuffers. But you should still keep vesafb
++	  and others enabled as fallback if a system framebuffer is
++	  incompatible with simplefb.
++
++	  If unsure, say Y.
++
++endmenu
++
++
++menu "Binary Emulations"
++
++config IA32_EMULATION
++	bool "IA32 Emulation"
++	depends on X86_64
++	select ARCH_WANT_OLD_COMPAT_IPC
++	select BINFMT_ELF
++	select COMPAT_BINFMT_ELF
++	select COMPAT_OLD_SIGACTION
++	---help---
++	  Include code to run legacy 32-bit programs under a
++	  64-bit kernel. You should likely turn this on, unless you're
++	  100% sure that you don't have any 32-bit programs left.
++
++config IA32_AOUT
++	tristate "IA32 a.out support"
++	depends on IA32_EMULATION
++	depends on BROKEN
++	---help---
++	  Support old a.out binaries in the 32bit emulation.
++
++config X86_X32
++	bool "x32 ABI for 64-bit mode"
++	depends on X86_64
++	---help---
++	  Include code to run binaries for the x32 native 32-bit ABI
++	  for 64-bit processors.  An x32 process gets access to the
++	  full 64-bit register file and wide data path while leaving
++	  pointers at 32 bits for smaller memory footprint.
++
++	  You will need a recent binutils (2.22 or later) with
++	  elf32_x86_64 support enabled to compile a kernel with this
++	  option set.
++
++config COMPAT_32
++	def_bool y
++	depends on IA32_EMULATION || X86_32
++	select HAVE_UID16
++	select OLD_SIGSUSPEND3
++
++config COMPAT
++	def_bool y
++	depends on IA32_EMULATION || X86_X32
++
++if COMPAT
++config COMPAT_FOR_U64_ALIGNMENT
++	def_bool y
++
++config SYSVIPC_COMPAT
++	def_bool y
++	depends on SYSVIPC
++endif
++
++endmenu
++
++
++config HAVE_ATOMIC_IOMAP
++	def_bool y
++	depends on X86_32
++
++config X86_DEV_DMA_OPS
++	bool
++	depends on X86_64 || STA2X11
++
++config X86_DMA_REMAP
++	bool
++	depends on STA2X11
++
++config HAVE_GENERIC_GUP
++	def_bool y
++
++source "drivers/firmware/Kconfig"
++
++source "arch/x86/kvm/Kconfig"
+diff -uprN kernel/arch/x86/kernel/apic/apic.c kernel_new/arch/x86/kernel/apic/apic.c
+--- kernel/arch/x86/kernel/apic/apic.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/apic.c	2021-04-01 18:28:07.653863289 +0800
+@@ -34,6 +34,7 @@
+ #include <linux/dmi.h>
+ #include <linux/smp.h>
+ #include <linux/mm.h>
++#include <linux/ipipe_tickdev.h>
+ 
+ #include <asm/trace/irq_vectors.h>
+ #include <asm/irq_remapping.h>
+@@ -269,10 +270,10 @@ void native_apic_icr_write(u32 low, u32
+ {
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ 	apic_write(APIC_ICR, low);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ u64 native_apic_icr_read(void)
+@@ -479,16 +480,20 @@ static int lapic_next_deadline(unsigned
+ 
+ static int lapic_timer_shutdown(struct clock_event_device *evt)
+ {
++	unsigned long flags;
+ 	unsigned int v;
+ 
+ 	/* Lapic used as dummy for broadcast ? */
+ 	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+ 		return 0;
+ 
++	flags = hard_local_irq_save();
+ 	v = apic_read(APIC_LVTT);
+ 	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ 	apic_write(APIC_LVTT, v);
+ 	apic_write(APIC_TMICT, 0);
++	hard_local_irq_restore(flags);
++
+ 	return 0;
+ }
+ 
+@@ -523,6 +528,17 @@ static void lapic_timer_broadcast(const
+ #endif
+ }
+ 
++#ifdef CONFIG_IPIPE
++static void lapic_itimer_ack(void)
++{
++	__ack_APIC_irq();
++}
++
++static DEFINE_PER_CPU(struct ipipe_timer, lapic_itimer) = {
++	.irq = ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
++	.ack = lapic_itimer_ack,
++};
++#endif /* CONFIG_IPIPE */
+ 
+ /*
+  * The local apic timer can be used for any function which is CPU local.
+@@ -653,6 +669,16 @@ static void setup_APIC_timer(void)
+ 
+ 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+ 	levt->cpumask = cpumask_of(smp_processor_id());
++#ifdef CONFIG_IPIPE
++	if (!(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
++		levt->ipipe_timer = this_cpu_ptr(&lapic_itimer);
++	else {
++		static atomic_t once = ATOMIC_INIT(-1);
++		if (atomic_inc_and_test(&once))
++			printk(KERN_INFO
++			       "I-pipe: cannot use LAPIC as a tick device\n");
++	}
++#endif /* CONFIG_IPIPE */
+ 
+ 	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+ 		levt->name = "lapic-deadline";
+@@ -1239,7 +1265,7 @@ void lapic_shutdown(void)
+ 	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
+ 		return;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ #ifdef CONFIG_X86_32
+ 	if (!enabled_via_apicbase)
+@@ -1249,7 +1275,7 @@ void lapic_shutdown(void)
+ 		disable_local_APIC();
+ 
+ 
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /**
+@@ -1493,7 +1519,7 @@ static bool apic_check_and_ack(union api
+ 		 * per set bit.
+ 		 */
+ 		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+-			ack_APIC_irq();
++			__ack_APIC_irq();
+ 		return true;
+ 	}
+ 
+@@ -2126,7 +2152,7 @@ __visible void __irq_entry smp_spurious_
+ 	if (v & (1 << (vector & 0x1f))) {
+ 		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ 			vector, smp_processor_id());
+-		ack_APIC_irq();
++		__ack_APIC_irq();
+ 	} else {
+ 		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ 			vector, smp_processor_id());
+@@ -2581,12 +2607,12 @@ static int lapic_suspend(void)
+ 		apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
+ #endif
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	disable_local_APIC();
+ 
+ 	irq_remapping_disable();
+ 
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 	return 0;
+ }
+ 
+@@ -2599,7 +2625,7 @@ static void lapic_resume(void)
+ 	if (!apic_pm_state.active)
+ 		return;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ 	/*
+ 	 * IO-APIC and PIC have their own resume routines.
+@@ -2657,7 +2683,7 @@ static void lapic_resume(void)
+ 
+ 	irq_remapping_reenable(x2apic_mode);
+ 
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /*
+diff -uprN kernel/arch/x86/kernel/apic/apic.c.orig kernel_new/arch/x86/kernel/apic/apic.c.orig
+--- kernel/arch/x86/kernel/apic/apic.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/apic.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,2857 @@
++/*
++ *	Local APIC handling, local APIC timers
++ *
++ *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
++ *
++ *	Fixes
++ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
++ *					thanks to Eric Gilmore
++ *					and Rolf G. Tews
++ *					for testing these extensively.
++ *	Maciej W. Rozycki	:	Various updates and fixes.
++ *	Mikael Pettersson	:	Power Management for UP-APIC.
++ *	Pavel Machek and
++ *	Mikael Pettersson	:	PM converted to driver model.
++ */
++
++#include <linux/perf_event.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/acpi_pmtmr.h>
++#include <linux/clockchips.h>
++#include <linux/interrupt.h>
++#include <linux/bootmem.h>
++#include <linux/ftrace.h>
++#include <linux/ioport.h>
++#include <linux/export.h>
++#include <linux/syscore_ops.h>
++#include <linux/delay.h>
++#include <linux/timex.h>
++#include <linux/i8253.h>
++#include <linux/dmar.h>
++#include <linux/init.h>
++#include <linux/cpu.h>
++#include <linux/dmi.h>
++#include <linux/smp.h>
++#include <linux/mm.h>
++
++#include <asm/trace/irq_vectors.h>
++#include <asm/irq_remapping.h>
++#include <asm/perf_event.h>
++#include <asm/x86_init.h>
++#include <asm/pgalloc.h>
++#include <linux/atomic.h>
++#include <asm/mpspec.h>
++#include <asm/i8259.h>
++#include <asm/proto.h>
++#include <asm/apic.h>
++#include <asm/io_apic.h>
++#include <asm/desc.h>
++#include <asm/hpet.h>
++#include <asm/mtrr.h>
++#include <asm/time.h>
++#include <asm/smp.h>
++#include <asm/mce.h>
++#include <asm/tsc.h>
++#include <asm/hypervisor.h>
++#include <asm/cpu_device_id.h>
++#include <asm/intel-family.h>
++#include <asm/irq_regs.h>
++
++unsigned int num_processors;
++
++unsigned disabled_cpus;
++
++/* Processor that is doing the boot up */
++unsigned int boot_cpu_physical_apicid = -1U;
++EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
++
++u8 boot_cpu_apic_version;
++
++/*
++ * The highest APIC ID seen during enumeration.
++ */
++static unsigned int max_physical_apicid;
++
++/*
++ * Bitmask of physically existing CPUs:
++ */
++physid_mask_t phys_cpu_present_map;
++
++/*
++ * Processor to be disabled specified by kernel parameter
++ * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
++ * avoid undefined behaviour caused by sending INIT from AP to BSP.
++ */
++static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
++
++/*
++ * This variable controls which CPUs receive external NMIs.  By default,
++ * external NMIs are delivered only to the BSP.
++ */
++static int apic_extnmi = APIC_EXTNMI_BSP;
++
++/*
++ * Map cpu index to physical APIC ID
++ */
++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
++DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
++
++#ifdef CONFIG_X86_32
++
++/*
++ * On x86_32, the mapping between cpu and logical apicid may vary
++ * depending on apic in use.  The following early percpu variable is
++ * used for the mapping.  This is where the behaviors of x86_64 and 32
++ * actually diverge.  Let's keep it ugly for now.
++ */
++DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
++
++/* Local APIC was disabled by the BIOS and enabled by the kernel */
++static int enabled_via_apicbase;
++
++/*
++ * Handle interrupt mode configuration register (IMCR).
++ * This register controls whether the interrupt signals
++ * that reach the BSP come from the master PIC or from the
++ * local APIC. Before entering Symmetric I/O Mode, either
++ * the BIOS or the operating system must switch out of
++ * PIC Mode by changing the IMCR.
++ */
++static inline void imcr_pic_to_apic(void)
++{
++	/* select IMCR register */
++	outb(0x70, 0x22);
++	/* NMI and 8259 INTR go through APIC */
++	outb(0x01, 0x23);
++}
++
++static inline void imcr_apic_to_pic(void)
++{
++	/* select IMCR register */
++	outb(0x70, 0x22);
++	/* NMI and 8259 INTR go directly to BSP */
++	outb(0x00, 0x23);
++}
++#endif
++
++/*
++ * Knob to control our willingness to enable the local APIC.
++ *
++ * +1=force-enable
++ */
++static int force_enable_local_apic __initdata;
++
++/*
++ * APIC command line parameters
++ */
++static int __init parse_lapic(char *arg)
++{
++	if (IS_ENABLED(CONFIG_X86_32) && !arg)
++		force_enable_local_apic = 1;
++	else if (arg && !strncmp(arg, "notscdeadline", 13))
++		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
++	return 0;
++}
++early_param("lapic", parse_lapic);
++
++#ifdef CONFIG_X86_64
++static int apic_calibrate_pmtmr __initdata;
++static __init int setup_apicpmtimer(char *s)
++{
++	apic_calibrate_pmtmr = 1;
++	notsc_setup(NULL);
++	return 0;
++}
++__setup("apicpmtimer", setup_apicpmtimer);
++#endif
++
++unsigned long mp_lapic_addr;
++int disable_apic;
++/* Disable local APIC timer from the kernel commandline or via dmi quirk */
++static int disable_apic_timer __initdata;
++/* Local APIC timer works in C2 */
++int local_apic_timer_c2_ok;
++EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
++
++/*
++ * Debug level, exported for io_apic.c
++ */
++int apic_verbosity;
++
++int pic_mode;
++
++/* Have we found an MP table */
++int smp_found_config;
++
++static struct resource lapic_resource = {
++	.name = "Local APIC",
++	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
++};
++
++unsigned int lapic_timer_frequency = 0;
++
++static void apic_pm_activate(void);
++
++static unsigned long apic_phys;
++
++/*
++ * Get the LAPIC version
++ */
++static inline int lapic_get_version(void)
++{
++	return GET_APIC_VERSION(apic_read(APIC_LVR));
++}
++
++/*
++ * Check, if the APIC is integrated or a separate chip
++ */
++static inline int lapic_is_integrated(void)
++{
++	return APIC_INTEGRATED(lapic_get_version());
++}
++
++/*
++ * Check, whether this is a modern or a first generation APIC
++ */
++static int modern_apic(void)
++{
++	/* AMD systems use old APIC versions, so check the CPU */
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
++	    boot_cpu_data.x86 >= 0xf)
++		return 1;
++
++	/* Hygon systems use modern APIC */
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++		return 1;
++
++	return lapic_get_version() >= 0x14;
++}
++
++/*
++ * right after this call apic become NOOP driven
++ * so apic->write/read doesn't do anything
++ */
++static void __init apic_disable(void)
++{
++	pr_info("APIC: switched to apic NOOP\n");
++	apic = &apic_noop;
++}
++
++void native_apic_wait_icr_idle(void)
++{
++	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
++		cpu_relax();
++}
++
++u32 native_safe_apic_wait_icr_idle(void)
++{
++	u32 send_status;
++	int timeout;
++
++	timeout = 0;
++	do {
++		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
++		if (!send_status)
++			break;
++		inc_irq_stat(icr_read_retry_count);
++		udelay(100);
++	} while (timeout++ < 1000);
++
++	return send_status;
++}
++
++void native_apic_icr_write(u32 low, u32 id)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
++	apic_write(APIC_ICR, low);
++	local_irq_restore(flags);
++}
++
++u64 native_apic_icr_read(void)
++{
++	u32 icr1, icr2;
++
++	icr2 = apic_read(APIC_ICR2);
++	icr1 = apic_read(APIC_ICR);
++
++	return icr1 | ((u64)icr2 << 32);
++}
++
++#ifdef CONFIG_X86_32
++/**
++ * get_physical_broadcast - Get number of physical broadcast IDs
++ */
++int get_physical_broadcast(void)
++{
++	return modern_apic() ? 0xff : 0xf;
++}
++#endif
++
++/**
++ * lapic_get_maxlvt - get the maximum number of local vector table entries
++ */
++int lapic_get_maxlvt(void)
++{
++	/*
++	 * - we always have APIC integrated on 64bit mode
++	 * - 82489DXs do not report # of LVT entries
++	 */
++	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
++}
++
++/*
++ * Local APIC timer
++ */
++
++/* Clock divisor */
++#define APIC_DIVISOR 16
++#define TSC_DIVISOR  8
++
++/*
++ * This function sets up the local APIC timer, with a timeout of
++ * 'clocks' APIC bus clock. During calibration we actually call
++ * this function twice on the boot CPU, once with a bogus timeout
++ * value, second time for real. The other (noncalibrating) CPUs
++ * call this function only once, with the real, calibrated value.
++ *
++ * We do reads before writes even if unnecessary, to get around the
++ * P5 APIC double write bug.
++ */
++static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
++{
++	unsigned int lvtt_value, tmp_value;
++
++	lvtt_value = LOCAL_TIMER_VECTOR;
++	if (!oneshot)
++		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
++	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
++		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
++
++	if (!lapic_is_integrated())
++		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
++
++	if (!irqen)
++		lvtt_value |= APIC_LVT_MASKED;
++
++	apic_write(APIC_LVTT, lvtt_value);
++
++	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
++		/*
++		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
++		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
++		 * According to Intel, MFENCE can do the serialization here.
++		 */
++		asm volatile("mfence" : : : "memory");
++
++		printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
++		return;
++	}
++
++	/*
++	 * Divide PICLK by 16
++	 */
++	tmp_value = apic_read(APIC_TDCR);
++	apic_write(APIC_TDCR,
++		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
++		APIC_TDR_DIV_16);
++
++	if (!oneshot)
++		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
++}
++
++/*
++ * Setup extended LVT, AMD specific
++ *
++ * Software should use the LVT offsets the BIOS provides.  The offsets
++ * are determined by the subsystems using it like those for MCE
++ * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
++ * are supported. Beginning with family 10h at least 4 offsets are
++ * available.
++ *
++ * Since the offsets must be consistent for all cores, we keep track
++ * of the LVT offsets in software and reserve the offset for the same
++ * vector also to be used on other cores. An offset is freed by
++ * setting the entry to APIC_EILVT_MASKED.
++ *
++ * If the BIOS is right, there should be no conflicts. Otherwise a
++ * "[Firmware Bug]: ..." error message is generated. However, if
++ * software does not properly determines the offsets, it is not
++ * necessarily a BIOS bug.
++ */
++
++static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
++
++static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
++{
++	return (old & APIC_EILVT_MASKED)
++		|| (new == APIC_EILVT_MASKED)
++		|| ((new & ~APIC_EILVT_MASKED) == old);
++}
++
++static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
++{
++	unsigned int rsvd, vector;
++
++	if (offset >= APIC_EILVT_NR_MAX)
++		return ~0;
++
++	rsvd = atomic_read(&eilvt_offsets[offset]);
++	do {
++		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
++		if (vector && !eilvt_entry_is_changeable(vector, new))
++			/* may not change if vectors are different */
++			return rsvd;
++		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
++	} while (rsvd != new);
++
++	rsvd &= ~APIC_EILVT_MASKED;
++	if (rsvd && rsvd != vector)
++		pr_info("LVT offset %d assigned for vector 0x%02x\n",
++			offset, rsvd);
++
++	return new;
++}
++
++/*
++ * If mask=1, the LVT entry does not generate interrupts while mask=0
++ * enables the vector. See also the BKDGs. Must be called with
++ * preemption disabled.
++ */
++
++int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
++{
++	unsigned long reg = APIC_EILVTn(offset);
++	unsigned int new, old, reserved;
++
++	new = (mask << 16) | (msg_type << 8) | vector;
++	old = apic_read(reg);
++	reserved = reserve_eilvt_offset(offset, new);
++
++	if (reserved != new) {
++		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
++		       "vector 0x%x, but the register is already in use for "
++		       "vector 0x%x on another cpu\n",
++		       smp_processor_id(), reg, offset, new, reserved);
++		return -EINVAL;
++	}
++
++	if (!eilvt_entry_is_changeable(old, new)) {
++		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
++		       "vector 0x%x, but the register is already in use for "
++		       "vector 0x%x on this cpu\n",
++		       smp_processor_id(), reg, offset, new, old);
++		return -EBUSY;
++	}
++
++	apic_write(reg, new);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
++
++/*
++ * Program the next event, relative to now
++ */
++static int lapic_next_event(unsigned long delta,
++			    struct clock_event_device *evt)
++{
++	apic_write(APIC_TMICT, delta);
++	return 0;
++}
++
++static int lapic_next_deadline(unsigned long delta,
++			       struct clock_event_device *evt)
++{
++	u64 tsc;
++
++	tsc = rdtsc();
++	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
++	return 0;
++}
++
++static int lapic_timer_shutdown(struct clock_event_device *evt)
++{
++	unsigned int v;
++
++	/* Lapic used as dummy for broadcast ? */
++	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
++		return 0;
++
++	v = apic_read(APIC_LVTT);
++	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
++	apic_write(APIC_LVTT, v);
++	apic_write(APIC_TMICT, 0);
++	return 0;
++}
++
++static inline int
++lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
++{
++	/* Lapic used as dummy for broadcast ? */
++	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
++		return 0;
++
++	__setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
++	return 0;
++}
++
++static int lapic_timer_set_periodic(struct clock_event_device *evt)
++{
++	return lapic_timer_set_periodic_oneshot(evt, false);
++}
++
++static int lapic_timer_set_oneshot(struct clock_event_device *evt)
++{
++	return lapic_timer_set_periodic_oneshot(evt, true);
++}
++
++/*
++ * Local APIC timer broadcast function
++ */
++static void lapic_timer_broadcast(const struct cpumask *mask)
++{
++#ifdef CONFIG_SMP
++	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++#endif
++}
++
++
++/*
++ * The local apic timer can be used for any function which is CPU local.
++ */
++static struct clock_event_device lapic_clockevent = {
++	.name				= "lapic",
++	.features			= CLOCK_EVT_FEAT_PERIODIC |
++					  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
++					  | CLOCK_EVT_FEAT_DUMMY,
++	.shift				= 32,
++	.set_state_shutdown		= lapic_timer_shutdown,
++	.set_state_periodic		= lapic_timer_set_periodic,
++	.set_state_oneshot		= lapic_timer_set_oneshot,
++	.set_state_oneshot_stopped	= lapic_timer_shutdown,
++	.set_next_event			= lapic_next_event,
++	.broadcast			= lapic_timer_broadcast,
++	.rating				= 100,
++	.irq				= -1,
++};
++static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
++
++#define DEADLINE_MODEL_MATCH_FUNC(model, func)	\
++	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
++
++#define DEADLINE_MODEL_MATCH_REV(model, rev)	\
++	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
++
++static u32 hsx_deadline_rev(void)
++{
++	switch (boot_cpu_data.x86_stepping) {
++	case 0x02: return 0x3a; /* EP */
++	case 0x04: return 0x0f; /* EX */
++	}
++
++	return ~0U;
++}
++
++static u32 bdx_deadline_rev(void)
++{
++	switch (boot_cpu_data.x86_stepping) {
++	case 0x02: return 0x00000011;
++	case 0x03: return 0x0700000e;
++	case 0x04: return 0x0f00000c;
++	case 0x05: return 0x0e000003;
++	}
++
++	return ~0U;
++}
++
++static u32 skx_deadline_rev(void)
++{
++	switch (boot_cpu_data.x86_stepping) {
++	case 0x03: return 0x01000136;
++	case 0x04: return 0x02000014;
++	}
++
++	if (boot_cpu_data.x86_stepping > 4)
++		return 0;
++
++	return ~0U;
++}
++
++static const struct x86_cpu_id deadline_match[] = {
++	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
++	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev),
++	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
++
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E,	0x17),
++
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE,	0x25),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E,	0x17),
++
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE,	0xb2),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP,	0xb2),
++
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE,	0x52),
++	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP,	0x52),
++
++	{},
++};
++
++static void apic_check_deadline_errata(void)
++{
++	const struct x86_cpu_id *m;
++	u32 rev;
++
++	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
++	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
++		return;
++
++	m = x86_match_cpu(deadline_match);
++	if (!m)
++		return;
++
++	/*
++	 * Function pointers will have the MSB set due to address layout,
++	 * immediate revisions will not.
++	 */
++	if ((long)m->driver_data < 0)
++		rev = ((u32 (*)(void))(m->driver_data))();
++	else
++		rev = (u32)m->driver_data;
++
++	if (boot_cpu_data.microcode >= rev)
++		return;
++
++	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
++	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
++	       "please update microcode to version: 0x%x (or later)\n", rev);
++}
++
++/*
++ * Setup the local APIC timer for this CPU. Copy the initialized values
++ * of the boot CPU and register the clock event in the framework.
++ */
++static void setup_APIC_timer(void)
++{
++	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
++
++	if (this_cpu_has(X86_FEATURE_ARAT)) {
++		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
++		/* Make LAPIC timer preferrable over percpu HPET */
++		lapic_clockevent.rating = 150;
++	}
++
++	memcpy(levt, &lapic_clockevent, sizeof(*levt));
++	levt->cpumask = cpumask_of(smp_processor_id());
++
++	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
++		levt->name = "lapic-deadline";
++		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
++				    CLOCK_EVT_FEAT_DUMMY);
++		levt->set_next_event = lapic_next_deadline;
++		clockevents_config_and_register(levt,
++						tsc_khz * (1000 / TSC_DIVISOR),
++						0xF, ~0UL);
++	} else
++		clockevents_register_device(levt);
++}
++
++/*
++ * Install the updated TSC frequency from recalibration at the TSC
++ * deadline clockevent devices.
++ */
++static void __lapic_update_tsc_freq(void *info)
++{
++	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
++
++	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
++		return;
++
++	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
++}
++
++void lapic_update_tsc_freq(void)
++{
++	/*
++	 * The clockevent device's ->mult and ->shift can both be
++	 * changed. In order to avoid races, schedule the frequency
++	 * update code on each CPU.
++	 */
++	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
++}
++
++/*
++ * In this functions we calibrate APIC bus clocks to the external timer.
++ *
++ * We want to do the calibration only once since we want to have local timer
++ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
++ * frequency.
++ *
++ * This was previously done by reading the PIT/HPET and waiting for a wrap
++ * around to find out, that a tick has elapsed. I have a box, where the PIT
++ * readout is broken, so it never gets out of the wait loop again. This was
++ * also reported by others.
++ *
++ * Monitoring the jiffies value is inaccurate and the clockevents
++ * infrastructure allows us to do a simple substitution of the interrupt
++ * handler.
++ *
++ * The calibration routine also uses the pm_timer when possible, as the PIT
++ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
++ * back to normal later in the boot process).
++ */
++
++#define LAPIC_CAL_LOOPS		(HZ/10)
++
++static __initdata int lapic_cal_loops = -1;
++static __initdata long lapic_cal_t1, lapic_cal_t2;
++static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
++static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
++static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
++
++/*
++ * Temporary interrupt handler and polled calibration function.
++ */
++static void __init lapic_cal_handler(struct clock_event_device *dev)
++{
++	unsigned long long tsc = 0;
++	long tapic = apic_read(APIC_TMCCT);
++	unsigned long pm = acpi_pm_read_early();
++
++	if (boot_cpu_has(X86_FEATURE_TSC))
++		tsc = rdtsc();
++
++	switch (lapic_cal_loops++) {
++	case 0:
++		lapic_cal_t1 = tapic;
++		lapic_cal_tsc1 = tsc;
++		lapic_cal_pm1 = pm;
++		lapic_cal_j1 = jiffies;
++		break;
++
++	case LAPIC_CAL_LOOPS:
++		lapic_cal_t2 = tapic;
++		lapic_cal_tsc2 = tsc;
++		if (pm < lapic_cal_pm1)
++			pm += ACPI_PM_OVRRUN;
++		lapic_cal_pm2 = pm;
++		lapic_cal_j2 = jiffies;
++		break;
++	}
++}
++
++static int __init
++calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
++{
++	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
++	const long pm_thresh = pm_100ms / 100;
++	unsigned long mult;
++	u64 res;
++
++#ifndef CONFIG_X86_PM_TIMER
++	return -1;
++#endif
++
++	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
++
++	/* Check, if the PM timer is available */
++	if (!deltapm)
++		return -1;
++
++	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
++
++	if (deltapm > (pm_100ms - pm_thresh) &&
++	    deltapm < (pm_100ms + pm_thresh)) {
++		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
++		return 0;
++	}
++
++	res = (((u64)deltapm) *  mult) >> 22;
++	do_div(res, 1000000);
++	pr_warning("APIC calibration not consistent "
++		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
++
++	/* Correct the lapic counter value */
++	res = (((u64)(*delta)) * pm_100ms);
++	do_div(res, deltapm);
++	pr_info("APIC delta adjusted to PM-Timer: "
++		"%lu (%ld)\n", (unsigned long)res, *delta);
++	*delta = (long)res;
++
++	/* Correct the tsc counter value */
++	if (boot_cpu_has(X86_FEATURE_TSC)) {
++		res = (((u64)(*deltatsc)) * pm_100ms);
++		do_div(res, deltapm);
++		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
++					  "PM-Timer: %lu (%ld)\n",
++					(unsigned long)res, *deltatsc);
++		*deltatsc = (long)res;
++	}
++
++	return 0;
++}
++
++static int __init lapic_init_clockevent(void)
++{
++	if (!lapic_timer_frequency)
++		return -1;
++
++	/* Calculate the scaled math multiplication factor */
++	lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
++					TICK_NSEC, lapic_clockevent.shift);
++	lapic_clockevent.max_delta_ns =
++		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
++	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
++	lapic_clockevent.min_delta_ns =
++		clockevent_delta2ns(0xF, &lapic_clockevent);
++	lapic_clockevent.min_delta_ticks = 0xF;
++
++	return 0;
++}
++
++static int __init calibrate_APIC_clock(void)
++{
++	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
++	u64 tsc_perj = 0, tsc_start = 0;
++	unsigned long jif_start;
++	unsigned long deltaj;
++	long delta, deltatsc;
++	int pm_referenced = 0;
++
++	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
++		return 0;
++
++	/*
++	 * Check if lapic timer has already been calibrated by platform
++	 * specific routine, such as tsc calibration code. If so just fill
++	 * in the clockevent structure and return.
++	 */
++	if (!lapic_init_clockevent()) {
++		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
++			    lapic_timer_frequency);
++		/*
++		 * Direct calibration methods must have an always running
++		 * local APIC timer, no need for broadcast timer.
++		 */
++		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++		return 0;
++	}
++
++	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++		    "calibrating APIC timer ...\n");
++
++	/*
++	 * There are platforms w/o global clockevent devices. Instead of
++	 * making the calibration conditional on that, use a polling based
++	 * approach everywhere.
++	 */
++	local_irq_disable();
++
++	/*
++	 * Setup the APIC counter to maximum. There is no way the lapic
++	 * can underflow in the 100ms detection time frame
++	 */
++	__setup_APIC_LVTT(0xffffffff, 0, 0);
++
++	/*
++	 * Methods to terminate the calibration loop:
++	 *  1) Global clockevent if available (jiffies)
++	 *  2) TSC if available and frequency is known
++	 */
++	jif_start = READ_ONCE(jiffies);
++
++	if (tsc_khz) {
++		tsc_start = rdtsc();
++		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
++	}
++
++	/*
++	 * Enable interrupts so the tick can fire, if a global
++	 * clockevent device is available
++	 */
++	local_irq_enable();
++
++	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
++		/* Wait for a tick to elapse */
++		while (1) {
++			if (tsc_khz) {
++				u64 tsc_now = rdtsc();
++				if ((tsc_now - tsc_start) >= tsc_perj) {
++					tsc_start += tsc_perj;
++					break;
++				}
++			} else {
++				unsigned long jif_now = READ_ONCE(jiffies);
++
++				if (time_after(jif_now, jif_start)) {
++					jif_start = jif_now;
++					break;
++				}
++			}
++			cpu_relax();
++		}
++
++		/* Invoke the calibration routine */
++		local_irq_disable();
++		lapic_cal_handler(NULL);
++		local_irq_enable();
++	}
++
++	local_irq_disable();
++
++	/* Build delta t1-t2 as apic timer counts down */
++	delta = lapic_cal_t1 - lapic_cal_t2;
++	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
++
++	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
++
++	/* we trust the PM based calibration if possible */
++	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
++					&delta, &deltatsc);
++
++	lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
++	lapic_init_clockevent();
++
++	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
++	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
++	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
++		    lapic_timer_frequency);
++
++	if (boot_cpu_has(X86_FEATURE_TSC)) {
++		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
++			    "%ld.%04ld MHz.\n",
++			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
++			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
++	}
++
++	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
++		    "%u.%04u MHz.\n",
++		    lapic_timer_frequency / (1000000 / HZ),
++		    lapic_timer_frequency % (1000000 / HZ));
++
++	/*
++	 * Do a sanity check on the APIC calibration result
++	 */
++	if (lapic_timer_frequency < (1000000 / HZ)) {
++		local_irq_enable();
++		pr_warning("APIC frequency too slow, disabling apic timer\n");
++		return -1;
++	}
++
++	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
++
++	/*
++	 * PM timer calibration failed or not turned on so lets try APIC
++	 * timer based calibration, if a global clockevent device is
++	 * available.
++	 */
++	if (!pm_referenced && global_clock_event) {
++		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
++
++		/*
++		 * Setup the apic timer manually
++		 */
++		levt->event_handler = lapic_cal_handler;
++		lapic_timer_set_periodic(levt);
++		lapic_cal_loops = -1;
++
++		/* Let the interrupts run */
++		local_irq_enable();
++
++		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
++			cpu_relax();
++
++		/* Stop the lapic timer */
++		local_irq_disable();
++		lapic_timer_shutdown(levt);
++
++		/* Jiffies delta */
++		deltaj = lapic_cal_j2 - lapic_cal_j1;
++		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
++
++		/* Check, if the jiffies result is consistent */
++		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
++			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
++		else
++			levt->features |= CLOCK_EVT_FEAT_DUMMY;
++	}
++	local_irq_enable();
++
++	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
++		pr_warning("APIC timer disabled due to verification failure\n");
++		return -1;
++	}
++
++	return 0;
++}
++
++/*
++ * Setup the boot APIC
++ *
++ * Calibrate and verify the result.
++ */
++void __init setup_boot_APIC_clock(void)
++{
++	/*
++	 * The local apic timer can be disabled via the kernel
++	 * commandline or from the CPU detection code. Register the lapic
++	 * timer as a dummy clock event source on SMP systems, so the
++	 * broadcast mechanism is used. On UP systems simply ignore it.
++	 */
++	if (disable_apic_timer) {
++		pr_info("Disabling APIC timer\n");
++		/* No broadcast on UP ! */
++		if (num_possible_cpus() > 1) {
++			lapic_clockevent.mult = 1;
++			setup_APIC_timer();
++		}
++		return;
++	}
++
++	if (calibrate_APIC_clock()) {
++		/* No broadcast on UP ! */
++		if (num_possible_cpus() > 1)
++			setup_APIC_timer();
++		return;
++	}
++
++	/*
++	 * If nmi_watchdog is set to IO_APIC, we need the
++	 * PIT/HPET going.  Otherwise register lapic as a dummy
++	 * device.
++	 */
++	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++
++	/* Setup the lapic or request the broadcast */
++	setup_APIC_timer();
++	amd_e400_c1e_apic_setup();
++}
++
++void setup_secondary_APIC_clock(void)
++{
++	setup_APIC_timer();
++	amd_e400_c1e_apic_setup();
++}
++
++/*
++ * The guts of the apic timer interrupt
++ */
++static void local_apic_timer_interrupt(void)
++{
++	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
++
++	/*
++	 * Normally we should not be here till LAPIC has been initialized but
++	 * in some cases like kdump, its possible that there is a pending LAPIC
++	 * timer interrupt from previous kernel's context and is delivered in
++	 * new kernel the moment interrupts are enabled.
++	 *
++	 * Interrupts are enabled early and LAPIC is setup much later, hence
++	 * its possible that when we get here evt->event_handler is NULL.
++	 * Check for event_handler being NULL and discard the interrupt as
++	 * spurious.
++	 */
++	if (!evt->event_handler) {
++		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
++			   smp_processor_id());
++		/* Switch it off */
++		lapic_timer_shutdown(evt);
++		return;
++	}
++
++	/*
++	 * the NMI deadlock-detector uses this.
++	 */
++	inc_irq_stat(apic_timer_irqs);
++
++	evt->event_handler(evt);
++}
++
++/*
++ * Local APIC timer interrupt. This is the most natural way for doing
++ * local interrupts, but local timer interrupts can be emulated by
++ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
++ *
++ * [ if a single-CPU system runs an SMP kernel then we call the local
++ *   interrupt as well. Thus we cannot inline the local irq ... ]
++ */
++__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
++{
++	struct pt_regs *old_regs = set_irq_regs(regs);
++
++	/*
++	 * NOTE! We'd better ACK the irq immediately,
++	 * because timer handling can be slow.
++	 *
++	 * update_process_times() expects us to have done irq_enter().
++	 * Besides, if we don't timer interrupts ignore the global
++	 * interrupt lock, which is the WrongThing (tm) to do.
++	 */
++	entering_ack_irq();
++	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
++	local_apic_timer_interrupt();
++	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
++	exiting_irq();
++
++	set_irq_regs(old_regs);
++}
++
++int setup_profiling_timer(unsigned int multiplier)
++{
++	return -EINVAL;
++}
++
++/*
++ * Local APIC start and shutdown
++ */
++
++/**
++ * clear_local_APIC - shutdown the local APIC
++ *
++ * This is called, when a CPU is disabled and before rebooting, so the state of
++ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
++ * leftovers during boot.
++ */
++void clear_local_APIC(void)
++{
++	int maxlvt;
++	u32 v;
++
++	/* APIC hasn't been mapped yet */
++	if (!x2apic_mode && !apic_phys)
++		return;
++
++	maxlvt = lapic_get_maxlvt();
++	/*
++	 * Masking an LVT entry can trigger a local APIC error
++	 * if the vector is zero. Mask LVTERR first to prevent this.
++	 */
++	if (maxlvt >= 3) {
++		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
++		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
++	}
++	/*
++	 * Careful: we have to set masks only first to deassert
++	 * any level-triggered sources.
++	 */
++	v = apic_read(APIC_LVTT);
++	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
++	v = apic_read(APIC_LVT1);
++	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
++	if (maxlvt >= 4) {
++		v = apic_read(APIC_LVTPC);
++		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
++	}
++
++	/* lets not touch this if we didn't frob it */
++#ifdef CONFIG_X86_THERMAL_VECTOR
++	if (maxlvt >= 5) {
++		v = apic_read(APIC_LVTTHMR);
++		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
++	}
++#endif
++#ifdef CONFIG_X86_MCE_INTEL
++	if (maxlvt >= 6) {
++		v = apic_read(APIC_LVTCMCI);
++		if (!(v & APIC_LVT_MASKED))
++			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
++	}
++#endif
++
++	/*
++	 * Clean APIC state for other OSs:
++	 */
++	apic_write(APIC_LVTT, APIC_LVT_MASKED);
++	apic_write(APIC_LVT0, APIC_LVT_MASKED);
++	apic_write(APIC_LVT1, APIC_LVT_MASKED);
++	if (maxlvt >= 3)
++		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
++	if (maxlvt >= 4)
++		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
++
++	/* Integrated APIC (!82489DX) ? */
++	if (lapic_is_integrated()) {
++		if (maxlvt > 3)
++			/* Clear ESR due to Pentium errata 3AP and 11AP */
++			apic_write(APIC_ESR, 0);
++		apic_read(APIC_ESR);
++	}
++}
++
++/**
++ * disable_local_APIC - clear and disable the local APIC
++ */
++void disable_local_APIC(void)
++{
++	unsigned int value;
++
++	/* APIC hasn't been mapped yet */
++	if (!x2apic_mode && !apic_phys)
++		return;
++
++	clear_local_APIC();
++
++	/*
++	 * Disable APIC (implies clearing of registers
++	 * for 82489DX!).
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_SPIV_APIC_ENABLED;
++	apic_write(APIC_SPIV, value);
++
++#ifdef CONFIG_X86_32
++	/*
++	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
++	 * restore the disabled state.
++	 */
++	if (enabled_via_apicbase) {
++		unsigned int l, h;
++
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		l &= ~MSR_IA32_APICBASE_ENABLE;
++		wrmsr(MSR_IA32_APICBASE, l, h);
++	}
++#endif
++}
++
++/*
++ * If Linux enabled the LAPIC against the BIOS default disable it down before
++ * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
++ * not power-off.  Additionally clear all LVT entries before disable_local_APIC
++ * for the case where Linux didn't enable the LAPIC.
++ */
++void lapic_shutdown(void)
++{
++	unsigned long flags;
++
++	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
++		return;
++
++	local_irq_save(flags);
++
++#ifdef CONFIG_X86_32
++	if (!enabled_via_apicbase)
++		clear_local_APIC();
++	else
++#endif
++		disable_local_APIC();
++
++
++	local_irq_restore(flags);
++}
++
++/**
++ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
++ */
++void __init sync_Arb_IDs(void)
++{
++	/*
++	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
++	 * needed on AMD.
++	 */
++	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		return;
++
++	/*
++	 * Wait for idle.
++	 */
++	apic_wait_icr_idle();
++
++	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
++	apic_write(APIC_ICR, APIC_DEST_ALLINC |
++			APIC_INT_LEVELTRIG | APIC_DM_INIT);
++}
++
++enum apic_intr_mode_id apic_intr_mode;
++
++static int __init apic_intr_mode_select(void)
++{
++	/* Check kernel option */
++	if (disable_apic) {
++		pr_info("APIC disabled via kernel command line\n");
++		return APIC_PIC;
++	}
++
++	/* Check BIOS */
++#ifdef CONFIG_X86_64
++	/* On 64-bit, the APIC must be integrated, Check local APIC only */
++	if (!boot_cpu_has(X86_FEATURE_APIC)) {
++		disable_apic = 1;
++		pr_info("APIC disabled by BIOS\n");
++		return APIC_PIC;
++	}
++#else
++	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
++
++	/* Neither 82489DX nor integrated APIC ? */
++	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
++		disable_apic = 1;
++		return APIC_PIC;
++	}
++
++	/* If the BIOS pretends there is an integrated APIC ? */
++	if (!boot_cpu_has(X86_FEATURE_APIC) &&
++		APIC_INTEGRATED(boot_cpu_apic_version)) {
++		disable_apic = 1;
++		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
++				       boot_cpu_physical_apicid);
++		return APIC_PIC;
++	}
++#endif
++
++	/* Check MP table or ACPI MADT configuration */
++	if (!smp_found_config) {
++		disable_ioapic_support();
++		if (!acpi_lapic) {
++			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
++			return APIC_VIRTUAL_WIRE_NO_CONFIG;
++		}
++		return APIC_VIRTUAL_WIRE;
++	}
++
++#ifdef CONFIG_SMP
++	/* If SMP should be disabled, then really disable it! */
++	if (!setup_max_cpus) {
++		pr_info("APIC: SMP mode deactivated\n");
++		return APIC_SYMMETRIC_IO_NO_ROUTING;
++	}
++
++	if (read_apic_id() != boot_cpu_physical_apicid) {
++		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
++		     read_apic_id(), boot_cpu_physical_apicid);
++		/* Or can we switch back to PIC here? */
++	}
++#endif
++
++	return APIC_SYMMETRIC_IO;
++}
++
++/*
++ * An initial setup of the virtual wire mode.
++ */
++void __init init_bsp_APIC(void)
++{
++	unsigned int value;
++
++	/*
++	 * Don't do the setup now if we have a SMP BIOS as the
++	 * through-I/O-APIC virtual wire mode might be active.
++	 */
++	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
++		return;
++
++	/*
++	 * Do not trust the local APIC being empty at bootup.
++	 */
++	clear_local_APIC();
++
++	/*
++	 * Enable APIC.
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	value |= APIC_SPIV_APIC_ENABLED;
++
++#ifdef CONFIG_X86_32
++	/* This bit is reserved on P4/Xeon and should be cleared */
++	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
++	    (boot_cpu_data.x86 == 15))
++		value &= ~APIC_SPIV_FOCUS_DISABLED;
++	else
++#endif
++		value |= APIC_SPIV_FOCUS_DISABLED;
++	value |= SPURIOUS_APIC_VECTOR;
++	apic_write(APIC_SPIV, value);
++
++	/*
++	 * Set up the virtual wire mode.
++	 */
++	apic_write(APIC_LVT0, APIC_DM_EXTINT);
++	value = APIC_DM_NMI;
++	if (!lapic_is_integrated())		/* 82489DX */
++		value |= APIC_LVT_LEVEL_TRIGGER;
++	if (apic_extnmi == APIC_EXTNMI_NONE)
++		value |= APIC_LVT_MASKED;
++	apic_write(APIC_LVT1, value);
++}
++
++/* Init the interrupt delivery mode for the BSP */
++void __init apic_intr_mode_init(void)
++{
++	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
++
++	apic_intr_mode = apic_intr_mode_select();
++
++	switch (apic_intr_mode) {
++	case APIC_PIC:
++		pr_info("APIC: Keep in PIC mode(8259)\n");
++		return;
++	case APIC_VIRTUAL_WIRE:
++		pr_info("APIC: Switch to virtual wire mode setup\n");
++		default_setup_apic_routing();
++		break;
++	case APIC_VIRTUAL_WIRE_NO_CONFIG:
++		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
++		upmode = true;
++		default_setup_apic_routing();
++		break;
++	case APIC_SYMMETRIC_IO:
++		pr_info("APIC: Switch to symmetric I/O mode setup\n");
++		default_setup_apic_routing();
++		break;
++	case APIC_SYMMETRIC_IO_NO_ROUTING:
++		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
++		break;
++	}
++
++	apic_bsp_setup(upmode);
++}
++
++static void lapic_setup_esr(void)
++{
++	unsigned int oldvalue, value, maxlvt;
++
++	if (!lapic_is_integrated()) {
++		pr_info("No ESR for 82489DX.\n");
++		return;
++	}
++
++	if (apic->disable_esr) {
++		/*
++		 * Something untraceable is creating bad interrupts on
++		 * secondary quads ... for the moment, just leave the
++		 * ESR disabled - we can't do anything useful with the
++		 * errors anyway - mbligh
++		 */
++		pr_info("Leaving ESR disabled.\n");
++		return;
++	}
++
++	maxlvt = lapic_get_maxlvt();
++	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
++		apic_write(APIC_ESR, 0);
++	oldvalue = apic_read(APIC_ESR);
++
++	/* enables sending errors */
++	value = ERROR_APIC_VECTOR;
++	apic_write(APIC_LVTERR, value);
++
++	/*
++	 * spec says clear errors after enabling vector.
++	 */
++	if (maxlvt > 3)
++		apic_write(APIC_ESR, 0);
++	value = apic_read(APIC_ESR);
++	if (value != oldvalue)
++		apic_printk(APIC_VERBOSE, "ESR value before enabling "
++			"vector: 0x%08x  after: 0x%08x\n",
++			oldvalue, value);
++}
++
++#define APIC_IR_REGS		APIC_ISR_NR
++#define APIC_IR_BITS		(APIC_IR_REGS * 32)
++#define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG)
++
++union apic_ir {
++	unsigned long	map[APIC_IR_MAPSIZE];
++	u32		regs[APIC_IR_REGS];
++};
++
++static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
++{
++	int i, bit;
++
++	/* Read the IRRs */
++	for (i = 0; i < APIC_IR_REGS; i++)
++		irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
++
++	/* Read the ISRs */
++	for (i = 0; i < APIC_IR_REGS; i++)
++		isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
++
++	/*
++	 * If the ISR map is not empty. ACK the APIC and run another round
++	 * to verify whether a pending IRR has been unblocked and turned
++	 * into a ISR.
++	 */
++	if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
++		/*
++		 * There can be multiple ISR bits set when a high priority
++		 * interrupt preempted a lower priority one. Issue an ACK
++		 * per set bit.
++		 */
++		for_each_set_bit(bit, isr->map, APIC_IR_BITS)
++			ack_APIC_irq();
++		return true;
++	}
++
++	return !bitmap_empty(irr->map, APIC_IR_BITS);
++}
++
++/*
++ * After a crash, we no longer service the interrupts and a pending
++ * interrupt from previous kernel might still have ISR bit set.
++ *
++ * Most probably by now the CPU has serviced that pending interrupt and it
++ * might not have done the ack_APIC_irq() because it thought, interrupt
++ * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
++ * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
++ * a vector might get locked. It was noticed for timer irq (vector
++ * 0x31). Issue an extra EOI to clear ISR.
++ *
++ * If there are pending IRR bits they turn into ISR bits after a higher
++ * priority ISR bit has been acked.
++ */
++static void apic_pending_intr_clear(void)
++{
++	union apic_ir irr, isr;
++	unsigned int i;
++
++	/* 512 loops are way oversized and give the APIC a chance to obey. */
++	for (i = 0; i < 512; i++) {
++		if (!apic_check_and_ack(&irr, &isr))
++			return;
++	}
++	/* Dump the IRR/ISR content if that failed */
++	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
++}
++
++/**
++ * setup_local_APIC - setup the local APIC
++ *
++ * Used to setup local APIC while initializing BSP or bringing up APs.
++ * Always called with preemption disabled.
++ */
++static void setup_local_APIC(void)
++{
++	int cpu = smp_processor_id();
++	unsigned int value;
++
++
++	if (disable_apic) {
++		disable_ioapic_support();
++		return;
++	}
++
++	/*
++	 * If this comes from kexec/kcrash the APIC might be enabled in
++	 * SPIV. Soft disable it before doing further initialization.
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_SPIV_APIC_ENABLED;
++	apic_write(APIC_SPIV, value);
++
++#ifdef CONFIG_X86_32
++	/* Pound the ESR really hard over the head with a big hammer - mbligh */
++	if (lapic_is_integrated() && apic->disable_esr) {
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++	}
++#endif
++	perf_events_lapic_init();
++
++	/*
++	 * Double-check whether this APIC is really registered.
++	 * This is meaningless in clustered apic mode, so we skip it.
++	 */
++	BUG_ON(!apic->apic_id_registered());
++
++	/*
++	 * Intel recommends to set DFR, LDR and TPR before enabling
++	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++	 * document number 292116).  So here it goes...
++	 */
++	apic->init_apic_ldr();
++
++#ifdef CONFIG_X86_32
++	if (apic->dest_logical) {
++		int logical_apicid, ldr_apicid;
++
++		/*
++		 * APIC LDR is initialized.  If logical_apicid mapping was
++		 * initialized during get_smp_config(), make sure it matches
++		 * the actual value.
++		 */
++		logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
++		ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
++		if (logical_apicid != BAD_APICID)
++			WARN_ON(logical_apicid != ldr_apicid);
++		/* Always use the value from LDR. */
++		early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
++	}
++#endif
++
++	/*
++	 * Set Task Priority to 'accept all'. We never change this
++	 * later on.
++	 */
++	value = apic_read(APIC_TASKPRI);
++	value &= ~APIC_TPRI_MASK;
++	apic_write(APIC_TASKPRI, value);
++
++	/* Clear eventually stale ISR/IRR bits */
++	apic_pending_intr_clear();
++
++	/*
++	 * Now that we are all set up, enable the APIC
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	/*
++	 * Enable APIC
++	 */
++	value |= APIC_SPIV_APIC_ENABLED;
++
++#ifdef CONFIG_X86_32
++	/*
++	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
++	 * certain networking cards. If high frequency interrupts are
++	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
++	 * entry is masked/unmasked at a high rate as well then sooner or
++	 * later IOAPIC line gets 'stuck', no more interrupts are received
++	 * from the device. If focus CPU is disabled then the hang goes
++	 * away, oh well :-(
++	 *
++	 * [ This bug can be reproduced easily with a level-triggered
++	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
++	 *   BX chipset. ]
++	 */
++	/*
++	 * Actually disabling the focus CPU check just makes the hang less
++	 * frequent as it makes the interrupt distributon model be more
++	 * like LRU than MRU (the short-term load is more even across CPUs).
++	 */
++
++	/*
++	 * - enable focus processor (bit==0)
++	 * - 64bit mode always use processor focus
++	 *   so no need to set it
++	 */
++	value &= ~APIC_SPIV_FOCUS_DISABLED;
++#endif
++
++	/*
++	 * Set spurious IRQ vector
++	 */
++	value |= SPURIOUS_APIC_VECTOR;
++	apic_write(APIC_SPIV, value);
++
++	/*
++	 * Set up LVT0, LVT1:
++	 *
++	 * set up through-local-APIC on the boot CPU's LINT0. This is not
++	 * strictly necessary in pure symmetric-IO mode, but sometimes
++	 * we delegate interrupts to the 8259A.
++	 */
++	/*
++	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
++	 */
++	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
++	if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
++		value = APIC_DM_EXTINT;
++		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
++	} else {
++		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
++		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
++	}
++	apic_write(APIC_LVT0, value);
++
++	/*
++	 * Only the BSP sees the LINT1 NMI signal by default. This can be
++	 * modified by apic_extnmi= boot option.
++	 */
++	if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
++	    apic_extnmi == APIC_EXTNMI_ALL)
++		value = APIC_DM_NMI;
++	else
++		value = APIC_DM_NMI | APIC_LVT_MASKED;
++
++	/* Is 82489DX ? */
++	if (!lapic_is_integrated())
++		value |= APIC_LVT_LEVEL_TRIGGER;
++	apic_write(APIC_LVT1, value);
++
++#ifdef CONFIG_X86_MCE_INTEL
++	/* Recheck CMCI information after local APIC is up on CPU #0 */
++	if (!cpu)
++		cmci_recheck();
++#endif
++}
++
++static void end_local_APIC_setup(void)
++{
++	lapic_setup_esr();
++
++#ifdef CONFIG_X86_32
++	{
++		unsigned int value;
++		/* Disable the local apic timer */
++		value = apic_read(APIC_LVTT);
++		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
++		apic_write(APIC_LVTT, value);
++	}
++#endif
++
++	apic_pm_activate();
++}
++
++/*
++ * APIC setup function for application processors. Called from smpboot.c
++ */
++void apic_ap_setup(void)
++{
++	setup_local_APIC();
++	end_local_APIC_setup();
++}
++
++#ifdef CONFIG_X86_X2APIC
++int x2apic_mode;
++
++enum {
++	X2APIC_OFF,
++	X2APIC_ON,
++	X2APIC_DISABLED,
++};
++static int x2apic_state;
++
++static void __x2apic_disable(void)
++{
++	u64 msr;
++
++	if (!boot_cpu_has(X86_FEATURE_APIC))
++		return;
++
++	rdmsrl(MSR_IA32_APICBASE, msr);
++	if (!(msr & X2APIC_ENABLE))
++		return;
++	/* Disable xapic and x2apic first and then reenable xapic mode */
++	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
++	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
++	printk_once(KERN_INFO "x2apic disabled\n");
++}
++
++static void __x2apic_enable(void)
++{
++	u64 msr;
++
++	rdmsrl(MSR_IA32_APICBASE, msr);
++	if (msr & X2APIC_ENABLE)
++		return;
++	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
++	printk_once(KERN_INFO "x2apic enabled\n");
++}
++
++static int __init setup_nox2apic(char *str)
++{
++	if (x2apic_enabled()) {
++		int apicid = native_apic_msr_read(APIC_ID);
++
++		if (apicid >= 255) {
++			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
++				   apicid);
++			return 0;
++		}
++		pr_warning("x2apic already enabled.\n");
++		__x2apic_disable();
++	}
++	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
++	x2apic_state = X2APIC_DISABLED;
++	x2apic_mode = 0;
++	return 0;
++}
++early_param("nox2apic", setup_nox2apic);
++
++/* Called from cpu_init() to enable x2apic on (secondary) cpus */
++void x2apic_setup(void)
++{
++	/*
++	 * If x2apic is not in ON state, disable it if already enabled
++	 * from BIOS.
++	 */
++	if (x2apic_state != X2APIC_ON) {
++		__x2apic_disable();
++		return;
++	}
++	__x2apic_enable();
++}
++
++static __init void x2apic_disable(void)
++{
++	u32 x2apic_id, state = x2apic_state;
++
++	x2apic_mode = 0;
++	x2apic_state = X2APIC_DISABLED;
++
++	if (state != X2APIC_ON)
++		return;
++
++	x2apic_id = read_apic_id();
++	if (x2apic_id >= 255)
++		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
++
++	__x2apic_disable();
++	register_lapic_address(mp_lapic_addr);
++}
++
++static __init void x2apic_enable(void)
++{
++	if (x2apic_state != X2APIC_OFF)
++		return;
++
++	x2apic_mode = 1;
++	x2apic_state = X2APIC_ON;
++	__x2apic_enable();
++}
++
++static __init void try_to_enable_x2apic(int remap_mode)
++{
++	if (x2apic_state == X2APIC_DISABLED)
++		return;
++
++	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
++		/* IR is required if there is APIC ID > 255 even when running
++		 * under KVM
++		 */
++		if (max_physical_apicid > 255 ||
++		    !x86_init.hyper.x2apic_available()) {
++			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
++			x2apic_disable();
++			return;
++		}
++
++		/*
++		 * without IR all CPUs can be addressed by IOAPIC/MSI
++		 * only in physical mode
++		 */
++		x2apic_phys = 1;
++	}
++	x2apic_enable();
++}
++
++void __init check_x2apic(void)
++{
++	if (x2apic_enabled()) {
++		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
++		x2apic_mode = 1;
++		x2apic_state = X2APIC_ON;
++	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
++		x2apic_state = X2APIC_DISABLED;
++	}
++}
++#else /* CONFIG_X86_X2APIC */
++static int __init validate_x2apic(void)
++{
++	if (!apic_is_x2apic_enabled())
++		return 0;
++	/*
++	 * Checkme: Can we simply turn off x2apic here instead of panic?
++	 */
++	panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
++}
++early_initcall(validate_x2apic);
++
++static inline void try_to_enable_x2apic(int remap_mode) { }
++static inline void __x2apic_enable(void) { }
++#endif /* !CONFIG_X86_X2APIC */
++
++void __init enable_IR_x2apic(void)
++{
++	unsigned long flags;
++	int ret, ir_stat;
++
++	if (skip_ioapic_setup) {
++		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
++		return;
++	}
++
++	ir_stat = irq_remapping_prepare();
++	if (ir_stat < 0 && !x2apic_supported())
++		return;
++
++	ret = save_ioapic_entries();
++	if (ret) {
++		pr_info("Saving IO-APIC state failed: %d\n", ret);
++		return;
++	}
++
++	local_irq_save(flags);
++	legacy_pic->mask_all();
++	mask_ioapic_entries();
++
++	/* If irq_remapping_prepare() succeeded, try to enable it */
++	if (ir_stat >= 0)
++		ir_stat = irq_remapping_enable();
++	/* ir_stat contains the remap mode or an error code */
++	try_to_enable_x2apic(ir_stat);
++
++	if (ir_stat < 0)
++		restore_ioapic_entries();
++	legacy_pic->restore_mask();
++	local_irq_restore(flags);
++}
++
++#ifdef CONFIG_X86_64
++/*
++ * Detect and enable local APICs on non-SMP boards.
++ * Original code written by Keir Fraser.
++ * On AMD64 we trust the BIOS - if it says no APIC it is likely
++ * not correctly set up (usually the APIC timer won't work etc.)
++ */
++static int __init detect_init_APIC(void)
++{
++	if (!boot_cpu_has(X86_FEATURE_APIC)) {
++		pr_info("No local APIC present\n");
++		return -1;
++	}
++
++	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++	return 0;
++}
++#else
++
++static int __init apic_verify(void)
++{
++	u32 features, h, l;
++
++	/*
++	 * The APIC feature bit should now be enabled
++	 * in `cpuid'
++	 */
++	features = cpuid_edx(1);
++	if (!(features & (1 << X86_FEATURE_APIC))) {
++		pr_warning("Could not enable APIC!\n");
++		return -1;
++	}
++	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++
++	/* The BIOS may have set up the APIC at some other address */
++	if (boot_cpu_data.x86 >= 6) {
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		if (l & MSR_IA32_APICBASE_ENABLE)
++			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
++	}
++
++	pr_info("Found and enabled local APIC!\n");
++	return 0;
++}
++
++int __init apic_force_enable(unsigned long addr)
++{
++	u32 h, l;
++
++	if (disable_apic)
++		return -1;
++
++	/*
++	 * Some BIOSes disable the local APIC in the APIC_BASE
++	 * MSR. This can only be done in software for Intel P6 or later
++	 * and AMD K7 (Model > 1) or later.
++	 */
++	if (boot_cpu_data.x86 >= 6) {
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
++			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
++			l &= ~MSR_IA32_APICBASE_BASE;
++			l |= MSR_IA32_APICBASE_ENABLE | addr;
++			wrmsr(MSR_IA32_APICBASE, l, h);
++			enabled_via_apicbase = 1;
++		}
++	}
++	return apic_verify();
++}
++
++/*
++ * Detect and initialize APIC
++ */
++static int __init detect_init_APIC(void)
++{
++	/* Disabled by kernel option? */
++	if (disable_apic)
++		return -1;
++
++	switch (boot_cpu_data.x86_vendor) {
++	case X86_VENDOR_AMD:
++		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
++		    (boot_cpu_data.x86 >= 15))
++			break;
++		goto no_apic;
++	case X86_VENDOR_HYGON:
++		break;
++	case X86_VENDOR_INTEL:
++		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
++		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
++			break;
++		goto no_apic;
++	default:
++		goto no_apic;
++	}
++
++	if (!boot_cpu_has(X86_FEATURE_APIC)) {
++		/*
++		 * Over-ride BIOS and try to enable the local APIC only if
++		 * "lapic" specified.
++		 */
++		if (!force_enable_local_apic) {
++			pr_info("Local APIC disabled by BIOS -- "
++				"you can enable it with \"lapic\"\n");
++			return -1;
++		}
++		if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
++			return -1;
++	} else {
++		if (apic_verify())
++			return -1;
++	}
++
++	apic_pm_activate();
++
++	return 0;
++
++no_apic:
++	pr_info("No local APIC present or hardware disabled\n");
++	return -1;
++}
++#endif
++
++/**
++ * init_apic_mappings - initialize APIC mappings
++ */
++void __init init_apic_mappings(void)
++{
++	unsigned int new_apicid;
++
++	apic_check_deadline_errata();
++
++	if (x2apic_mode) {
++		boot_cpu_physical_apicid = read_apic_id();
++		return;
++	}
++
++	/* If no local APIC can be found return early */
++	if (!smp_found_config && detect_init_APIC()) {
++		/* lets NOP'ify apic operations */
++		pr_info("APIC: disable apic facility\n");
++		apic_disable();
++	} else {
++		apic_phys = mp_lapic_addr;
++
++		/*
++		 * If the system has ACPI MADT tables or MP info, the LAPIC
++		 * address is already registered.
++		 */
++		if (!acpi_lapic && !smp_found_config)
++			register_lapic_address(apic_phys);
++	}
++
++	/*
++	 * Fetch the APIC ID of the BSP in case we have a
++	 * default configuration (or the MP table is broken).
++	 */
++	new_apicid = read_apic_id();
++	if (boot_cpu_physical_apicid != new_apicid) {
++		boot_cpu_physical_apicid = new_apicid;
++		/*
++		 * yeah -- we lie about apic_version
++		 * in case if apic was disabled via boot option
++		 * but it's not a problem for SMP compiled kernel
++		 * since apic_intr_mode_select is prepared for such
++		 * a case and disable smp mode
++		 */
++		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
++	}
++}
++
++void __init register_lapic_address(unsigned long address)
++{
++	mp_lapic_addr = address;
++
++	if (!x2apic_mode) {
++		set_fixmap_nocache(FIX_APIC_BASE, address);
++		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
++			    APIC_BASE, address);
++	}
++	if (boot_cpu_physical_apicid == -1U) {
++		boot_cpu_physical_apicid  = read_apic_id();
++		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
++	}
++}
++
++/*
++ * Local APIC interrupts
++ */
++
++/*
++ * This interrupt should _never_ happen with our APIC/SMP architecture
++ */
++__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
++{
++	u8 vector = ~regs->orig_ax;
++	u32 v;
++
++	entering_irq();
++	trace_spurious_apic_entry(vector);
++
++	inc_irq_stat(irq_spurious_count);
++
++	/*
++	 * If this is a spurious interrupt then do not acknowledge
++	 */
++	if (vector == SPURIOUS_APIC_VECTOR) {
++		/* See SDM vol 3 */
++		pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
++			smp_processor_id());
++		goto out;
++	}
++
++	/*
++	 * If it is a vectored one, verify it's set in the ISR. If set,
++	 * acknowledge it.
++	 */
++	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
++	if (v & (1 << (vector & 0x1f))) {
++		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
++			vector, smp_processor_id());
++		ack_APIC_irq();
++	} else {
++		pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
++			vector, smp_processor_id());
++	}
++out:
++	trace_spurious_apic_exit(vector);
++	exiting_irq();
++}
++
++/*
++ * This interrupt should never happen with our APIC/SMP architecture
++ */
++__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
++{
++	static const char * const error_interrupt_reason[] = {
++		"Send CS error",		/* APIC Error Bit 0 */
++		"Receive CS error",		/* APIC Error Bit 1 */
++		"Send accept error",		/* APIC Error Bit 2 */
++		"Receive accept error",		/* APIC Error Bit 3 */
++		"Redirectable IPI",		/* APIC Error Bit 4 */
++		"Send illegal vector",		/* APIC Error Bit 5 */
++		"Received illegal vector",	/* APIC Error Bit 6 */
++		"Illegal register address",	/* APIC Error Bit 7 */
++	};
++	u32 v, i = 0;
++
++	entering_irq();
++	trace_error_apic_entry(ERROR_APIC_VECTOR);
++
++	/* First tickle the hardware, only then report what went on. -- REW */
++	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
++		apic_write(APIC_ESR, 0);
++	v = apic_read(APIC_ESR);
++	ack_APIC_irq();
++	atomic_inc(&irq_err_count);
++
++	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
++		    smp_processor_id(), v);
++
++	v &= 0xff;
++	while (v) {
++		if (v & 0x1)
++			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
++		i++;
++		v >>= 1;
++	}
++
++	apic_printk(APIC_DEBUG, KERN_CONT "\n");
++
++	trace_error_apic_exit(ERROR_APIC_VECTOR);
++	exiting_irq();
++}
++
++/**
++ * connect_bsp_APIC - attach the APIC to the interrupt system
++ */
++static void __init connect_bsp_APIC(void)
++{
++#ifdef CONFIG_X86_32
++	if (pic_mode) {
++		/*
++		 * Do not trust the local APIC being empty at bootup.
++		 */
++		clear_local_APIC();
++		/*
++		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
++		 * local APIC to INT and NMI lines.
++		 */
++		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
++				"enabling APIC mode.\n");
++		imcr_pic_to_apic();
++	}
++#endif
++}
++
++/**
++ * disconnect_bsp_APIC - detach the APIC from the interrupt system
++ * @virt_wire_setup:	indicates, whether virtual wire mode is selected
++ *
++ * Virtual wire mode is necessary to deliver legacy interrupts even when the
++ * APIC is disabled.
++ */
++void disconnect_bsp_APIC(int virt_wire_setup)
++{
++	unsigned int value;
++
++#ifdef CONFIG_X86_32
++	if (pic_mode) {
++		/*
++		 * Put the board back into PIC mode (has an effect only on
++		 * certain older boards).  Note that APIC interrupts, including
++		 * IPIs, won't work beyond this point!  The only exception are
++		 * INIT IPIs.
++		 */
++		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
++				"entering PIC mode.\n");
++		imcr_apic_to_pic();
++		return;
++	}
++#endif
++
++	/* Go back to Virtual Wire compatibility mode */
++
++	/* For the spurious interrupt use vector F, and enable it */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	value |= APIC_SPIV_APIC_ENABLED;
++	value |= 0xf;
++	apic_write(APIC_SPIV, value);
++
++	if (!virt_wire_setup) {
++		/*
++		 * For LVT0 make it edge triggered, active high,
++		 * external and enabled
++		 */
++		value = apic_read(APIC_LVT0);
++		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
++			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
++			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
++		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
++		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
++		apic_write(APIC_LVT0, value);
++	} else {
++		/* Disable LVT0 */
++		apic_write(APIC_LVT0, APIC_LVT_MASKED);
++	}
++
++	/*
++	 * For LVT1 make it edge triggered, active high,
++	 * nmi and enabled
++	 */
++	value = apic_read(APIC_LVT1);
++	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
++			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
++			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
++	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
++	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
++	apic_write(APIC_LVT1, value);
++}
++
++/*
++ * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
++ * contiguously, it equals to current allocated max logical CPU ID plus 1.
++ * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
++ * so the maximum of nr_logical_cpuids is nr_cpu_ids.
++ *
++ * NOTE: Reserve 0 for BSP.
++ */
++static int nr_logical_cpuids = 1;
++
++/*
++ * Used to store mapping between logical CPU IDs and APIC IDs.
++ */
++static int cpuid_to_apicid[] = {
++	[0 ... NR_CPUS - 1] = -1,
++};
++
++#ifdef CONFIG_SMP
++/**
++ * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
++ * @id:	APIC ID to check
++ */
++bool apic_id_is_primary_thread(unsigned int apicid)
++{
++	u32 mask;
++
++	if (smp_num_siblings == 1)
++		return true;
++	/* Isolate the SMT bit(s) in the APICID and check for 0 */
++	mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
++	return !(apicid & mask);
++}
++#endif
++
++/*
++ * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
++ * and cpuid_to_apicid[] synchronized.
++ */
++static int allocate_logical_cpuid(int apicid)
++{
++	int i;
++
++	/*
++	 * cpuid <-> apicid mapping is persistent, so when a cpu is up,
++	 * check if the kernel has allocated a cpuid for it.
++	 */
++	for (i = 0; i < nr_logical_cpuids; i++) {
++		if (cpuid_to_apicid[i] == apicid)
++			return i;
++	}
++
++	/* Allocate a new cpuid. */
++	if (nr_logical_cpuids >= nr_cpu_ids) {
++		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
++			     "Processor %d/0x%x and the rest are ignored.\n",
++			     nr_cpu_ids, nr_logical_cpuids, apicid);
++		return -EINVAL;
++	}
++
++	cpuid_to_apicid[nr_logical_cpuids] = apicid;
++	return nr_logical_cpuids++;
++}
++
++int generic_processor_info(int apicid, int version)
++{
++	int cpu, max = nr_cpu_ids;
++	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
++				phys_cpu_present_map);
++
++	/*
++	 * boot_cpu_physical_apicid is designed to have the apicid
++	 * returned by read_apic_id(), i.e, the apicid of the
++	 * currently booting-up processor. However, on some platforms,
++	 * it is temporarily modified by the apicid reported as BSP
++	 * through MP table. Concretely:
++	 *
++	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
++	 * - arch/x86/mm/amdtopology.c: amd_numa_init()
++	 *
++	 * This function is executed with the modified
++	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
++	 * parameter doesn't work to disable APs on kdump 2nd kernel.
++	 *
++	 * Since fixing handling of boot_cpu_physical_apicid requires
++	 * another discussion and tests on each platform, we leave it
++	 * for now and here we use read_apic_id() directly in this
++	 * function, generic_processor_info().
++	 */
++	if (disabled_cpu_apicid != BAD_APICID &&
++	    disabled_cpu_apicid != read_apic_id() &&
++	    disabled_cpu_apicid == apicid) {
++		int thiscpu = num_processors + disabled_cpus;
++
++		pr_warning("APIC: Disabling requested cpu."
++			   " Processor %d/0x%x ignored.\n",
++			   thiscpu, apicid);
++
++		disabled_cpus++;
++		return -ENODEV;
++	}
++
++	/*
++	 * If boot cpu has not been detected yet, then only allow upto
++	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
++	 */
++	if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
++	    apicid != boot_cpu_physical_apicid) {
++		int thiscpu = max + disabled_cpus - 1;
++
++		pr_warning(
++			"APIC: NR_CPUS/possible_cpus limit of %i almost"
++			" reached. Keeping one slot for boot cpu."
++			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
++
++		disabled_cpus++;
++		return -ENODEV;
++	}
++
++	if (num_processors >= nr_cpu_ids) {
++		int thiscpu = max + disabled_cpus;
++
++		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
++			   "reached. Processor %d/0x%x ignored.\n",
++			   max, thiscpu, apicid);
++
++		disabled_cpus++;
++		return -EINVAL;
++	}
++
++	if (apicid == boot_cpu_physical_apicid) {
++		/*
++		 * x86_bios_cpu_apicid is required to have processors listed
++		 * in same order as logical cpu numbers. Hence the first
++		 * entry is BSP, and so on.
++		 * boot_cpu_init() already hold bit 0 in cpu_present_mask
++		 * for BSP.
++		 */
++		cpu = 0;
++
++		/* Logical cpuid 0 is reserved for BSP. */
++		cpuid_to_apicid[0] = apicid;
++	} else {
++		cpu = allocate_logical_cpuid(apicid);
++		if (cpu < 0) {
++			disabled_cpus++;
++			return -EINVAL;
++		}
++	}
++
++	/*
++	 * Validate version
++	 */
++	if (version == 0x0) {
++		pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
++			   cpu, apicid);
++		version = 0x10;
++	}
++
++	if (version != boot_cpu_apic_version) {
++		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
++			boot_cpu_apic_version, cpu, version);
++	}
++
++	if (apicid > max_physical_apicid)
++		max_physical_apicid = apicid;
++
++#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
++	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
++	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
++#endif
++#ifdef CONFIG_X86_32
++	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
++		apic->x86_32_early_logical_apicid(cpu);
++#endif
++	set_cpu_possible(cpu, true);
++	physid_set(apicid, phys_cpu_present_map);
++	set_cpu_present(cpu, true);
++	num_processors++;
++
++	return cpu;
++}
++
++int hard_smp_processor_id(void)
++{
++	return read_apic_id();
++}
++
++/*
++ * Override the generic EOI implementation with an optimized version.
++ * Only called during early boot when only one CPU is active and with
++ * interrupts disabled, so we know this does not race with actual APIC driver
++ * use.
++ */
++void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
++{
++	struct apic **drv;
++
++	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
++		/* Should happen once for each apic */
++		WARN_ON((*drv)->eoi_write == eoi_write);
++		(*drv)->native_eoi_write = (*drv)->eoi_write;
++		(*drv)->eoi_write = eoi_write;
++	}
++}
++
++static void __init apic_bsp_up_setup(void)
++{
++#ifdef CONFIG_X86_64
++	apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
++#else
++	/*
++	 * Hack: In case of kdump, after a crash, kernel might be booting
++	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
++	 * might be zero if read from MP tables. Get it from LAPIC.
++	 */
++# ifdef CONFIG_CRASH_DUMP
++	boot_cpu_physical_apicid = read_apic_id();
++# endif
++#endif
++	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
++}
++
++/**
++ * apic_bsp_setup - Setup function for local apic and io-apic
++ * @upmode:		Force UP mode (for APIC_init_uniprocessor)
++ *
++ * Returns:
++ * apic_id of BSP APIC
++ */
++void __init apic_bsp_setup(bool upmode)
++{
++	connect_bsp_APIC();
++	if (upmode)
++		apic_bsp_up_setup();
++	setup_local_APIC();
++
++	enable_IO_APIC();
++	end_local_APIC_setup();
++	irq_remap_enable_fault_handling();
++	setup_IO_APIC();
++}
++
++#ifdef CONFIG_UP_LATE_INIT
++void __init up_late_init(void)
++{
++	if (apic_intr_mode == APIC_PIC)
++		return;
++
++	/* Setup local timer */
++	x86_init.timers.setup_percpu_clockev();
++}
++#endif
++
++/*
++ * Power management
++ */
++#ifdef CONFIG_PM
++
++static struct {
++	/*
++	 * 'active' is true if the local APIC was enabled by us and
++	 * not the BIOS; this signifies that we are also responsible
++	 * for disabling it before entering apm/acpi suspend
++	 */
++	int active;
++	/* r/w apic fields */
++	unsigned int apic_id;
++	unsigned int apic_taskpri;
++	unsigned int apic_ldr;
++	unsigned int apic_dfr;
++	unsigned int apic_spiv;
++	unsigned int apic_lvtt;
++	unsigned int apic_lvtpc;
++	unsigned int apic_lvt0;
++	unsigned int apic_lvt1;
++	unsigned int apic_lvterr;
++	unsigned int apic_tmict;
++	unsigned int apic_tdcr;
++	unsigned int apic_thmr;
++	unsigned int apic_cmci;
++} apic_pm_state;
++
++static int lapic_suspend(void)
++{
++	unsigned long flags;
++	int maxlvt;
++
++	if (!apic_pm_state.active)
++		return 0;
++
++	maxlvt = lapic_get_maxlvt();
++
++	apic_pm_state.apic_id = apic_read(APIC_ID);
++	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
++	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
++	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
++	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
++	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
++	if (maxlvt >= 4)
++		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
++	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
++	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
++	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
++	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
++	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
++#ifdef CONFIG_X86_THERMAL_VECTOR
++	if (maxlvt >= 5)
++		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
++#endif
++#ifdef CONFIG_X86_MCE_INTEL
++	if (maxlvt >= 6)
++		apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
++#endif
++
++	local_irq_save(flags);
++	disable_local_APIC();
++
++	irq_remapping_disable();
++
++	local_irq_restore(flags);
++	return 0;
++}
++
++static void lapic_resume(void)
++{
++	unsigned int l, h;
++	unsigned long flags;
++	int maxlvt;
++
++	if (!apic_pm_state.active)
++		return;
++
++	local_irq_save(flags);
++
++	/*
++	 * IO-APIC and PIC have their own resume routines.
++	 * We just mask them here to make sure the interrupt
++	 * subsystem is completely quiet while we enable x2apic
++	 * and interrupt-remapping.
++	 */
++	mask_ioapic_entries();
++	legacy_pic->mask_all();
++
++	if (x2apic_mode) {
++		__x2apic_enable();
++	} else {
++		/*
++		 * Make sure the APICBASE points to the right address
++		 *
++		 * FIXME! This will be wrong if we ever support suspend on
++		 * SMP! We'll need to do this as part of the CPU restore!
++		 */
++		if (boot_cpu_data.x86 >= 6) {
++			rdmsr(MSR_IA32_APICBASE, l, h);
++			l &= ~MSR_IA32_APICBASE_BASE;
++			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
++			wrmsr(MSR_IA32_APICBASE, l, h);
++		}
++	}
++
++	maxlvt = lapic_get_maxlvt();
++	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
++	apic_write(APIC_ID, apic_pm_state.apic_id);
++	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
++	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
++	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
++	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
++	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
++	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
++#ifdef CONFIG_X86_THERMAL_VECTOR
++	if (maxlvt >= 5)
++		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
++#endif
++#ifdef CONFIG_X86_MCE_INTEL
++	if (maxlvt >= 6)
++		apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
++#endif
++	if (maxlvt >= 4)
++		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
++	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
++	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
++	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
++	apic_write(APIC_ESR, 0);
++	apic_read(APIC_ESR);
++	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
++	apic_write(APIC_ESR, 0);
++	apic_read(APIC_ESR);
++
++	irq_remapping_reenable(x2apic_mode);
++
++	local_irq_restore(flags);
++}
++
++/*
++ * This device has no shutdown method - fully functioning local APICs
++ * are needed on every CPU up until machine_halt/restart/poweroff.
++ */
++
++static struct syscore_ops lapic_syscore_ops = {
++	.resume		= lapic_resume,
++	.suspend	= lapic_suspend,
++};
++
++static void apic_pm_activate(void)
++{
++	apic_pm_state.active = 1;
++}
++
++static int __init init_lapic_sysfs(void)
++{
++	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
++	if (boot_cpu_has(X86_FEATURE_APIC))
++		register_syscore_ops(&lapic_syscore_ops);
++
++	return 0;
++}
++
++/* local apic needs to resume before other devices access its registers. */
++core_initcall(init_lapic_sysfs);
++
++#else	/* CONFIG_PM */
++
++static void apic_pm_activate(void) { }
++
++#endif	/* CONFIG_PM */
++
++#ifdef CONFIG_X86_64
++
++static int multi_checked;
++static int multi;
++
++static int set_multi(const struct dmi_system_id *d)
++{
++	if (multi)
++		return 0;
++	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
++	multi = 1;
++	return 0;
++}
++
++static const struct dmi_system_id multi_dmi_table[] = {
++	{
++		.callback = set_multi,
++		.ident = "IBM System Summit2",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
++		},
++	},
++	{}
++};
++
++static void dmi_check_multi(void)
++{
++	if (multi_checked)
++		return;
++
++	dmi_check_system(multi_dmi_table);
++	multi_checked = 1;
++}
++
++/*
++ * apic_is_clustered_box() -- Check if we can expect good TSC
++ *
++ * Thus far, the major user of this is IBM's Summit2 series:
++ * Clustered boxes may have unsynced TSC problems if they are
++ * multi-chassis.
++ * Use DMI to check them
++ */
++int apic_is_clustered_box(void)
++{
++	dmi_check_multi();
++	return multi;
++}
++#endif
++
++/*
++ * APIC command line parameters
++ */
++static int __init setup_disableapic(char *arg)
++{
++	disable_apic = 1;
++	setup_clear_cpu_cap(X86_FEATURE_APIC);
++	return 0;
++}
++early_param("disableapic", setup_disableapic);
++
++/* same as disableapic, for compatibility */
++static int __init setup_nolapic(char *arg)
++{
++	return setup_disableapic(arg);
++}
++early_param("nolapic", setup_nolapic);
++
++static int __init parse_lapic_timer_c2_ok(char *arg)
++{
++	local_apic_timer_c2_ok = 1;
++	return 0;
++}
++early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
++
++static int __init parse_disable_apic_timer(char *arg)
++{
++	disable_apic_timer = 1;
++	return 0;
++}
++early_param("noapictimer", parse_disable_apic_timer);
++
++static int __init parse_nolapic_timer(char *arg)
++{
++	disable_apic_timer = 1;
++	return 0;
++}
++early_param("nolapic_timer", parse_nolapic_timer);
++
++static int __init apic_set_verbosity(char *arg)
++{
++	if (!arg)  {
++#ifdef CONFIG_X86_64
++		skip_ioapic_setup = 0;
++		return 0;
++#endif
++		return -EINVAL;
++	}
++
++	if (strcmp("debug", arg) == 0)
++		apic_verbosity = APIC_DEBUG;
++	else if (strcmp("verbose", arg) == 0)
++		apic_verbosity = APIC_VERBOSE;
++#ifdef CONFIG_X86_64
++	else {
++		pr_warning("APIC Verbosity level %s not recognised"
++			" use apic=verbose or apic=debug\n", arg);
++		return -EINVAL;
++	}
++#endif
++
++	return 0;
++}
++early_param("apic", apic_set_verbosity);
++
++static int __init lapic_insert_resource(void)
++{
++	if (!apic_phys)
++		return -1;
++
++	/* Put local APIC into the resource map. */
++	lapic_resource.start = apic_phys;
++	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
++	insert_resource(&iomem_resource, &lapic_resource);
++
++	return 0;
++}
++
++/*
++ * need call insert after e820__reserve_resources()
++ * that is using request_resource
++ */
++late_initcall(lapic_insert_resource);
++
++static int __init apic_set_disabled_cpu_apicid(char *arg)
++{
++	if (!arg || !get_option(&arg, &disabled_cpu_apicid))
++		return -EINVAL;
++
++	return 0;
++}
++early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
++
++static int __init apic_set_extnmi(char *arg)
++{
++	if (!arg)
++		return -EINVAL;
++
++	if (!strncmp("all", arg, 3))
++		apic_extnmi = APIC_EXTNMI_ALL;
++	else if (!strncmp("none", arg, 4))
++		apic_extnmi = APIC_EXTNMI_NONE;
++	else if (!strncmp("bsp", arg, 3))
++		apic_extnmi = APIC_EXTNMI_BSP;
++	else {
++		pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++early_param("apic_extnmi", apic_set_extnmi);
+diff -uprN kernel/arch/x86/kernel/apic/apic_flat_64.c kernel_new/arch/x86/kernel/apic/apic_flat_64.c
+--- kernel/arch/x86/kernel/apic/apic_flat_64.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/apic_flat_64.c	2021-04-01 18:28:07.653863289 +0800
+@@ -58,9 +58,9 @@ static void _flat_send_IPI_mask(unsigned
+ {
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+diff -uprN kernel/arch/x86/kernel/apic/io_apic.c kernel_new/arch/x86/kernel/apic/io_apic.c
+--- kernel/arch/x86/kernel/apic/io_apic.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/io_apic.c	2021-04-01 18:28:07.654863288 +0800
+@@ -77,7 +77,7 @@
+ #define for_each_irq_pin(entry, head) \
+ 	list_for_each_entry(entry, &head, list)
+ 
+-static DEFINE_RAW_SPINLOCK(ioapic_lock);
++static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock);
+ static DEFINE_MUTEX(ioapic_mutex);
+ static unsigned int ioapic_dynirq_base;
+ static int ioapic_initialized;
+@@ -465,13 +465,19 @@ static void io_apic_sync(struct irq_pin_
+ 	readl(&io_apic->data);
+ }
+ 
++static inline void __mask_ioapic(struct mp_chip_data *data)
++{
++	io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
++}
++
+ static void mask_ioapic_irq(struct irq_data *irq_data)
+ {
+ 	struct mp_chip_data *data = irq_data->chip_data;
+ 	unsigned long flags;
+ 
+ 	raw_spin_lock_irqsave(&ioapic_lock, flags);
+-	io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
++	ipipe_lock_irq(irq_data->irq);
++	__mask_ioapic(data);
+ 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+@@ -487,6 +493,7 @@ static void unmask_ioapic_irq(struct irq
+ 
+ 	raw_spin_lock_irqsave(&ioapic_lock, flags);
+ 	__unmask_ioapic(data);
++	ipipe_unlock_irq(irq_data->irq);
+ 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+@@ -530,14 +537,20 @@ static void __eoi_ioapic_pin(int apic, i
+ 	}
+ }
+ 
+-static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
++static void _eoi_ioapic_pin(int vector, struct mp_chip_data *data)
+ {
+-	unsigned long flags;
+ 	struct irq_pin_list *entry;
+ 
+-	raw_spin_lock_irqsave(&ioapic_lock, flags);
+ 	for_each_irq_pin(entry, data->irq_2_pin)
+ 		__eoi_ioapic_pin(entry->apic, entry->pin, vector);
++}
++
++void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	_eoi_ioapic_pin(vector, data);
+ 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ 
+@@ -1203,6 +1216,19 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector
+ 
+ static struct irq_chip ioapic_chip, ioapic_ir_chip;
+ 
++#ifdef CONFIG_IPIPE
++static void startup_legacy_irq(unsigned irq)
++{
++	unsigned long flags;
++	legacy_pic->mask(irq);
++	flags = hard_local_irq_save();
++	__ipipe_unlock_irq(irq);
++	hard_local_irq_restore(flags);
++}
++#else /* !CONFIG_IPIPE */
++#define startup_legacy_irq(irq) legacy_pic->mask(irq)
++#endif /* !CONFIG_IPIPE */
++
+ static void __init setup_IO_APIC_irqs(void)
+ {
+ 	unsigned int ioapic, pin;
+@@ -1686,11 +1712,12 @@ static unsigned int startup_ioapic_irq(s
+ 
+ 	raw_spin_lock_irqsave(&ioapic_lock, flags);
+ 	if (irq < nr_legacy_irqs()) {
+-		legacy_pic->mask(irq);
++		startup_legacy_irq(irq);
+ 		if (legacy_pic->irq_pending(irq))
+ 			was_pending = 1;
+ 	}
+ 	__unmask_ioapic(data->chip_data);
++	ipipe_unlock_irq(irq);
+ 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ 
+ 	return was_pending;
+@@ -1698,7 +1725,7 @@ static unsigned int startup_ioapic_irq(s
+ 
+ atomic_t irq_mis_count;
+ 
+-#ifdef CONFIG_GENERIC_PENDING_IRQ
++#if defined(CONFIG_GENERIC_PENDING_IRQ) || (defined(CONFIG_IPIPE) && defined(CONFIG_SMP))
+ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
+ {
+ 	struct irq_pin_list *entry;
+@@ -1783,9 +1810,9 @@ static void ioapic_ack_level(struct irq_
+ {
+ 	struct irq_cfg *cfg = irqd_cfg(irq_data);
+ 	unsigned long v;
+-	bool masked;
+ 	int i;
+-
++#ifndef CONFIG_IPIPE
++	bool masked;
+ 	irq_complete_move(cfg);
+ 	masked = ioapic_irqd_mask(irq_data);
+ 
+@@ -1843,6 +1870,24 @@ static void ioapic_ack_level(struct irq_
+ 	}
+ 
+ 	ioapic_irqd_unmask(irq_data, masked);
++#else /* CONFIG_IPIPE */
++	/*
++	 * Prevent low priority IRQs grabbed by high priority domains
++	 * from being delayed, waiting for a high priority interrupt
++	 * handler running in a low priority domain to complete.
++	 * This code assumes hw interrupts off.
++	 */
++	i = cfg->vector;
++	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
++	if (unlikely(!(v & (1 << (i & 0x1f))))) {
++		/* IO-APIC erratum: see comment above. */
++		atomic_inc(&irq_mis_count);
++		raw_spin_lock(&ioapic_lock);
++		_eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
++		raw_spin_unlock(&ioapic_lock);
++	}
++	__ack_APIC_irq();
++#endif /* CONFIG_IPIPE */
+ }
+ 
+ static void ioapic_ir_ack_level(struct irq_data *irq_data)
+@@ -1938,6 +1983,69 @@ static int ioapic_irq_get_chip_state(str
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++#ifdef CONFIG_SMP
++
++void move_xxapic_irq(struct irq_data *irq_data)
++{
++	unsigned int irq = irq_data->irq;
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct mp_chip_data *data = irq_data->chip_data;
++	struct irq_cfg *cfg = irqd_cfg(irq_data);
++
++	if (desc->handle_irq == &handle_edge_irq) {
++		raw_spin_lock(&desc->lock);
++		irq_complete_move(cfg);
++		irq_move_irq(irq_data);
++		raw_spin_unlock(&desc->lock);
++	} else if (desc->handle_irq == &handle_fasteoi_irq) {
++		raw_spin_lock(&desc->lock);
++		irq_complete_move(cfg);
++		if (unlikely(irqd_is_setaffinity_pending(irq_data))) {
++			if (!io_apic_level_ack_pending(data))
++				irq_move_masked_irq(irq_data);
++			unmask_ioapic_irq(irq_data);
++		}
++		raw_spin_unlock(&desc->lock);
++	} else
++		WARN_ON_ONCE(1);
++}
++
++#endif  /* CONFIG_SMP */
++
++static void hold_ioapic_irq(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++
++	raw_spin_lock(&ioapic_lock);
++	__mask_ioapic(data);
++	raw_spin_unlock(&ioapic_lock);
++	ioapic_ack_level(irq_data);
++}
++
++static void hold_ioapic_ir_irq(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++
++	raw_spin_lock(&ioapic_lock);
++	__mask_ioapic(data);
++	raw_spin_unlock(&ioapic_lock);
++	ioapic_ir_ack_level(irq_data);
++}
++
++static void release_ioapic_irq(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	__unmask_ioapic(data);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#endif	/* CONFIG_IPIPE */
++
+ static struct irq_chip ioapic_chip __read_mostly = {
+ 	.name			= "IO-APIC",
+ 	.irq_startup		= startup_ioapic_irq,
+@@ -1948,6 +2056,13 @@ static struct irq_chip ioapic_chip __rea
+ 	.irq_set_affinity	= ioapic_set_affinity,
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_get_irqchip_state	= ioapic_irq_get_chip_state,
++#ifdef CONFIG_IPIPE
++#ifdef CONFIG_SMP
++	.irq_move		= move_xxapic_irq,
++#endif
++	.irq_hold		= hold_ioapic_irq,
++	.irq_release		= release_ioapic_irq,
++#endif
+ 	.flags			= IRQCHIP_SKIP_SET_WAKE,
+ };
+ 
+@@ -1961,6 +2076,13 @@ static struct irq_chip ioapic_ir_chip __
+ 	.irq_set_affinity	= ioapic_set_affinity,
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_get_irqchip_state	= ioapic_irq_get_chip_state,
++#ifdef CONFIG_IPIPE
++#ifdef CONFIG_SMP
++	.irq_move		= move_xxapic_irq,
++#endif
++	.irq_hold		= hold_ioapic_ir_irq,
++	.irq_release		= release_ioapic_irq,
++#endif
+ 	.flags			= IRQCHIP_SKIP_SET_WAKE,
+ };
+ 
+@@ -1992,23 +2114,29 @@ static inline void init_IO_APIC_traps(vo
+ 
+ static void mask_lapic_irq(struct irq_data *data)
+ {
+-	unsigned long v;
++	unsigned long v, flags;
+ 
++	flags = hard_cond_local_irq_save();
++	ipipe_lock_irq(data->irq);
+ 	v = apic_read(APIC_LVT0);
+ 	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void unmask_lapic_irq(struct irq_data *data)
+ {
+-	unsigned long v;
++	unsigned long v, flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	v = apic_read(APIC_LVT0);
+ 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
++	ipipe_unlock_irq(data->irq);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void ack_lapic_irq(struct irq_data *data)
+ {
+-	ack_APIC_irq();
++	__ack_APIC_irq();
+ }
+ 
+ static struct irq_chip lapic_chip __read_mostly = {
+@@ -2016,6 +2144,9 @@ static struct irq_chip lapic_chip __read
+ 	.irq_mask	= mask_lapic_irq,
+ 	.irq_unmask	= unmask_lapic_irq,
+ 	.irq_ack	= ack_lapic_irq,
++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
++	.irq_move	= move_xxapic_irq,
++#endif
+ };
+ 
+ static void lapic_register_intr(int irq)
+@@ -2135,7 +2266,7 @@ static inline void __init check_timer(vo
+ 	/*
+ 	 * get/set the timer IRQ vector:
+ 	 */
+-	legacy_pic->mask(0);
++	startup_legacy_irq(0);
+ 
+ 	/*
+ 	 * As IRQ0 is to be enabled in the 8259A, the virtual
+@@ -2232,6 +2363,10 @@ static inline void __init check_timer(vo
+ 		    "...trying to set up timer as Virtual Wire IRQ...\n");
+ 
+ 	lapic_register_intr(0);
++#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
++	irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq;
++	irq_to_desc(0)->ipipe_end = __ipipe_nop_irq;
++#endif
+ 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
+ 	legacy_pic->unmask(0);
+ 
+@@ -2240,7 +2375,7 @@ static inline void __init check_timer(vo
+ 		goto out;
+ 	}
+ 	local_irq_disable();
+-	legacy_pic->mask(0);
++	startup_legacy_irq(0);
+ 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+ 	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+ 
+@@ -2612,6 +2747,21 @@ int acpi_get_override_irq(u32 gsi, int *
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_IPIPE
++unsigned int __ipipe_get_ioapic_irq_vector(int irq)
++{
++	if (irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS)
++		return ipipe_apic_irq_vector(irq);
++	else if (irq == IRQ_MOVE_CLEANUP_VECTOR)
++		return irq;
++	else {
++		if (irq_cfg(irq) == NULL)
++			return ISA_IRQ_VECTOR(irq); /* Assume ISA. */
++		return irq_cfg(irq)->vector;
++	}
++}
++#endif /* CONFIG_IPIPE */
++
+ /*
+  * This function updates target affinity of IOAPIC interrupts to include
+  * the CPUs which came online during SMP bringup.
+@@ -3006,7 +3156,7 @@ int mp_irqdomain_alloc(struct irq_domain
+ 		mp_setup_entry(cfg, data, info->ioapic_entry);
+ 	mp_register_handler(virq, data->trigger);
+ 	if (virq < nr_legacy_irqs())
+-		legacy_pic->mask(virq);
++		startup_legacy_irq(virq);
+ 	local_irq_restore(flags);
+ 
+ 	apic_printk(APIC_VERBOSE, KERN_DEBUG
+diff -uprN kernel/arch/x86/kernel/apic/io_apic.c.orig kernel_new/arch/x86/kernel/apic/io_apic.c.orig
+--- kernel/arch/x86/kernel/apic/io_apic.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/io_apic.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,3067 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ *	Intel IO-APIC support for multi-Pentium hosts.
++ *
++ *	Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
++ *
++ *	Many thanks to Stig Venaas for trying out countless experimental
++ *	patches and reporting/debugging problems patiently!
++ *
++ *	(c) 1999, Multiple IO-APIC support, developed by
++ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
++ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
++ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
++ *	and Ingo Molnar <mingo@redhat.com>
++ *
++ *	Fixes
++ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
++ *					thanks to Eric Gilmore
++ *					and Rolf G. Tews
++ *					for testing these extensively
++ *	Paul Diefenbaugh	:	Added full ACPI support
++ *
++ * Historical information which is worth to be preserved:
++ *
++ * - SiS APIC rmw bug:
++ *
++ *	We used to have a workaround for a bug in SiS chips which
++ *	required to rewrite the index register for a read-modify-write
++ *	operation as the chip lost the index information which was
++ *	setup for the read already. We cache the data now, so that
++ *	workaround has been removed.
++ */
++
++#include <linux/mm.h>
++#include <linux/interrupt.h>
++#include <linux/irq.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/sched.h>
++#include <linux/pci.h>
++#include <linux/mc146818rtc.h>
++#include <linux/compiler.h>
++#include <linux/acpi.h>
++#include <linux/export.h>
++#include <linux/syscore_ops.h>
++#include <linux/freezer.h>
++#include <linux/kthread.h>
++#include <linux/jiffies.h>	/* time_after() */
++#include <linux/slab.h>
++#include <linux/bootmem.h>
++
++#include <asm/irqdomain.h>
++#include <asm/io.h>
++#include <asm/smp.h>
++#include <asm/cpu.h>
++#include <asm/desc.h>
++#include <asm/proto.h>
++#include <asm/acpi.h>
++#include <asm/dma.h>
++#include <asm/timer.h>
++#include <asm/i8259.h>
++#include <asm/setup.h>
++#include <asm/irq_remapping.h>
++#include <asm/hw_irq.h>
++
++#include <asm/apic.h>
++
++#define	for_each_ioapic(idx)		\
++	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
++#define	for_each_ioapic_reverse(idx)	\
++	for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
++#define	for_each_pin(idx, pin)		\
++	for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
++#define	for_each_ioapic_pin(idx, pin)	\
++	for_each_ioapic((idx))		\
++		for_each_pin((idx), (pin))
++#define for_each_irq_pin(entry, head) \
++	list_for_each_entry(entry, &head, list)
++
++static DEFINE_RAW_SPINLOCK(ioapic_lock);
++static DEFINE_MUTEX(ioapic_mutex);
++static unsigned int ioapic_dynirq_base;
++static int ioapic_initialized;
++
++struct irq_pin_list {
++	struct list_head list;
++	int apic, pin;
++};
++
++struct mp_chip_data {
++	struct list_head irq_2_pin;
++	struct IO_APIC_route_entry entry;
++	int trigger;
++	int polarity;
++	u32 count;
++	bool isa_irq;
++};
++
++struct mp_ioapic_gsi {
++	u32 gsi_base;
++	u32 gsi_end;
++};
++
++static struct ioapic {
++	/*
++	 * # of IRQ routing registers
++	 */
++	int nr_registers;
++	/*
++	 * Saved state during suspend/resume, or while enabling intr-remap.
++	 */
++	struct IO_APIC_route_entry *saved_registers;
++	/* I/O APIC config */
++	struct mpc_ioapic mp_config;
++	/* IO APIC gsi routing info */
++	struct mp_ioapic_gsi  gsi_config;
++	struct ioapic_domain_cfg irqdomain_cfg;
++	struct irq_domain *irqdomain;
++	struct resource *iomem_res;
++} ioapics[MAX_IO_APICS];
++
++#define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
++
++int mpc_ioapic_id(int ioapic_idx)
++{
++	return ioapics[ioapic_idx].mp_config.apicid;
++}
++
++unsigned int mpc_ioapic_addr(int ioapic_idx)
++{
++	return ioapics[ioapic_idx].mp_config.apicaddr;
++}
++
++static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
++{
++	return &ioapics[ioapic_idx].gsi_config;
++}
++
++static inline int mp_ioapic_pin_count(int ioapic)
++{
++	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
++
++	return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
++}
++
++static inline u32 mp_pin_to_gsi(int ioapic, int pin)
++{
++	return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
++}
++
++static inline bool mp_is_legacy_irq(int irq)
++{
++	return irq >= 0 && irq < nr_legacy_irqs();
++}
++
++/*
++ * Initialize all legacy IRQs and all pins on the first IOAPIC
++ * if we have legacy interrupt controller. Kernel boot option "pirq="
++ * may rely on non-legacy pins on the first IOAPIC.
++ */
++static inline int mp_init_irq_at_boot(int ioapic, int irq)
++{
++	if (!nr_legacy_irqs())
++		return 0;
++
++	return ioapic == 0 || mp_is_legacy_irq(irq);
++}
++
++static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
++{
++	return ioapics[ioapic].irqdomain;
++}
++
++int nr_ioapics;
++
++/* The one past the highest gsi number used */
++u32 gsi_top;
++
++/* MP IRQ source entries */
++struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
++
++/* # of MP IRQ source entries */
++int mp_irq_entries;
++
++#ifdef CONFIG_EISA
++int mp_bus_id_to_type[MAX_MP_BUSSES];
++#endif
++
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
++
++int skip_ioapic_setup;
++
++/**
++ * disable_ioapic_support() - disables ioapic support at runtime
++ */
++void disable_ioapic_support(void)
++{
++#ifdef CONFIG_PCI
++	noioapicquirk = 1;
++	noioapicreroute = -1;
++#endif
++	skip_ioapic_setup = 1;
++}
++
++static int __init parse_noapic(char *str)
++{
++	/* disable IO-APIC */
++	disable_ioapic_support();
++	return 0;
++}
++early_param("noapic", parse_noapic);
++
++/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
++void mp_save_irq(struct mpc_intsrc *m)
++{
++	int i;
++
++	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
++		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
++		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
++		m->srcbusirq, m->dstapic, m->dstirq);
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
++			return;
++	}
++
++	memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
++	if (++mp_irq_entries == MAX_IRQ_SOURCES)
++		panic("Max # of irq sources exceeded!!\n");
++}
++
++static void alloc_ioapic_saved_registers(int idx)
++{
++	size_t size;
++
++	if (ioapics[idx].saved_registers)
++		return;
++
++	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
++	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
++	if (!ioapics[idx].saved_registers)
++		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
++}
++
++static void free_ioapic_saved_registers(int idx)
++{
++	kfree(ioapics[idx].saved_registers);
++	ioapics[idx].saved_registers = NULL;
++}
++
++int __init arch_early_ioapic_init(void)
++{
++	int i;
++
++	if (!nr_legacy_irqs())
++		io_apic_irqs = ~0UL;
++
++	for_each_ioapic(i)
++		alloc_ioapic_saved_registers(i);
++
++	return 0;
++}
++
++struct io_apic {
++	unsigned int index;
++	unsigned int unused[3];
++	unsigned int data;
++	unsigned int unused2[11];
++	unsigned int eoi;
++};
++
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++		+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
++}
++
++static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++	writel(vector, &io_apic->eoi);
++}
++
++unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++	writel(reg, &io_apic->index);
++	return readl(&io_apic->data);
++}
++
++static void io_apic_write(unsigned int apic, unsigned int reg,
++			  unsigned int value)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++
++	writel(reg, &io_apic->index);
++	writel(value, &io_apic->data);
++}
++
++union entry_union {
++	struct { u32 w1, w2; };
++	struct IO_APIC_route_entry entry;
++};
++
++static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
++{
++	union entry_union eu;
++
++	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++
++	return eu.entry;
++}
++
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++	union entry_union eu;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	eu.entry = __ioapic_read_entry(apic, pin);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return eu.entry;
++}
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++	union entry_union eu = {{0, 0}};
++
++	eu.entry = e;
++	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++}
++
++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	__ioapic_write_entry(apic, pin, e);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++	unsigned long flags;
++	union entry_union eu = { .entry.mask = IOAPIC_MASKED };
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++/*
++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
++ * shared ISA-space IRQs, so we have to support them. We are super
++ * fast in the common case, and fast for shared ISA-space IRQs.
++ */
++static int __add_pin_to_irq_node(struct mp_chip_data *data,
++				 int node, int apic, int pin)
++{
++	struct irq_pin_list *entry;
++
++	/* don't allow duplicates */
++	for_each_irq_pin(entry, data->irq_2_pin)
++		if (entry->apic == apic && entry->pin == pin)
++			return 0;
++
++	entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
++	if (!entry) {
++		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
++		       node, apic, pin);
++		return -ENOMEM;
++	}
++	entry->apic = apic;
++	entry->pin = pin;
++	list_add_tail(&entry->list, &data->irq_2_pin);
++
++	return 0;
++}
++
++static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
++{
++	struct irq_pin_list *tmp, *entry;
++
++	list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list)
++		if (entry->apic == apic && entry->pin == pin) {
++			list_del(&entry->list);
++			kfree(entry);
++			return;
++		}
++}
++
++static void add_pin_to_irq_node(struct mp_chip_data *data,
++				int node, int apic, int pin)
++{
++	if (__add_pin_to_irq_node(data, node, apic, pin))
++		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
++}
++
++/*
++ * Reroute an IRQ to a different pin.
++ */
++static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
++					   int oldapic, int oldpin,
++					   int newapic, int newpin)
++{
++	struct irq_pin_list *entry;
++
++	for_each_irq_pin(entry, data->irq_2_pin) {
++		if (entry->apic == oldapic && entry->pin == oldpin) {
++			entry->apic = newapic;
++			entry->pin = newpin;
++			/* every one is different, right? */
++			return;
++		}
++	}
++
++	/* old apic/pin didn't exist, so just add new ones */
++	add_pin_to_irq_node(data, node, newapic, newpin);
++}
++
++static void io_apic_modify_irq(struct mp_chip_data *data,
++			       int mask_and, int mask_or,
++			       void (*final)(struct irq_pin_list *entry))
++{
++	union entry_union eu;
++	struct irq_pin_list *entry;
++
++	eu.entry = data->entry;
++	eu.w1 &= mask_and;
++	eu.w1 |= mask_or;
++	data->entry = eu.entry;
++
++	for_each_irq_pin(entry, data->irq_2_pin) {
++		io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1);
++		if (final)
++			final(entry);
++	}
++}
++
++static void io_apic_sync(struct irq_pin_list *entry)
++{
++	/*
++	 * Synchronize the IO-APIC and the CPU by doing
++	 * a dummy read from the IO-APIC
++	 */
++	struct io_apic __iomem *io_apic;
++
++	io_apic = io_apic_base(entry->apic);
++	readl(&io_apic->data);
++}
++
++static void mask_ioapic_irq(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void __unmask_ioapic(struct mp_chip_data *data)
++{
++	io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL);
++}
++
++static void unmask_ioapic_irq(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	__unmask_ioapic(data);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++/*
++ * IO-APIC versions below 0x20 don't support EOI register.
++ * For the record, here is the information about various versions:
++ *     0Xh     82489DX
++ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
++ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
++ *     30h-FFh Reserved
++ *
++ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
++ * version as 0x2. This is an error with documentation and these ICH chips
++ * use io-apic's of version 0x20.
++ *
++ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
++ * Otherwise, we simulate the EOI message manually by changing the trigger
++ * mode to edge and then back to level, with RTE being masked during this.
++ */
++static void __eoi_ioapic_pin(int apic, int pin, int vector)
++{
++	if (mpc_ioapic_ver(apic) >= 0x20) {
++		io_apic_eoi(apic, vector);
++	} else {
++		struct IO_APIC_route_entry entry, entry1;
++
++		entry = entry1 = __ioapic_read_entry(apic, pin);
++
++		/*
++		 * Mask the entry and change the trigger mode to edge.
++		 */
++		entry1.mask = IOAPIC_MASKED;
++		entry1.trigger = IOAPIC_EDGE;
++
++		__ioapic_write_entry(apic, pin, entry1);
++
++		/*
++		 * Restore the previous level triggered entry.
++		 */
++		__ioapic_write_entry(apic, pin, entry);
++	}
++}
++
++static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
++{
++	unsigned long flags;
++	struct irq_pin_list *entry;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	for_each_irq_pin(entry, data->irq_2_pin)
++		__eoi_ioapic_pin(entry->apic, entry->pin, vector);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
++{
++	struct IO_APIC_route_entry entry;
++
++	/* Check delivery_mode to be sure we're not clearing an SMI pin */
++	entry = ioapic_read_entry(apic, pin);
++	if (entry.delivery_mode == dest_SMI)
++		return;
++
++	/*
++	 * Make sure the entry is masked and re-read the contents to check
++	 * if it is a level triggered pin and if the remote-IRR is set.
++	 */
++	if (entry.mask == IOAPIC_UNMASKED) {
++		entry.mask = IOAPIC_MASKED;
++		ioapic_write_entry(apic, pin, entry);
++		entry = ioapic_read_entry(apic, pin);
++	}
++
++	if (entry.irr) {
++		unsigned long flags;
++
++		/*
++		 * Make sure the trigger mode is set to level. Explicit EOI
++		 * doesn't clear the remote-IRR if the trigger mode is not
++		 * set to level.
++		 */
++		if (entry.trigger == IOAPIC_EDGE) {
++			entry.trigger = IOAPIC_LEVEL;
++			ioapic_write_entry(apic, pin, entry);
++		}
++		raw_spin_lock_irqsave(&ioapic_lock, flags);
++		__eoi_ioapic_pin(apic, pin, entry.vector);
++		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++	}
++
++	/*
++	 * Clear the rest of the bits in the IO-APIC RTE except for the mask
++	 * bit.
++	 */
++	ioapic_mask_entry(apic, pin);
++	entry = ioapic_read_entry(apic, pin);
++	if (entry.irr)
++		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
++		       mpc_ioapic_id(apic), pin);
++}
++
++void clear_IO_APIC (void)
++{
++	int apic, pin;
++
++	for_each_ioapic_pin(apic, pin)
++		clear_IO_APIC_pin(apic, pin);
++}
++
++#ifdef CONFIG_X86_32
++/*
++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
++ * specific CPU-side IRQs.
++ */
++
++#define MAX_PIRQS 8
++static int pirq_entries[MAX_PIRQS] = {
++	[0 ... MAX_PIRQS - 1] = -1
++};
++
++static int __init ioapic_pirq_setup(char *str)
++{
++	int i, max;
++	int ints[MAX_PIRQS+1];
++
++	get_options(str, ARRAY_SIZE(ints), ints);
++
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"PIRQ redirection, working around broken MP-BIOS.\n");
++	max = MAX_PIRQS;
++	if (ints[0] < MAX_PIRQS)
++		max = ints[0];
++
++	for (i = 0; i < max; i++) {
++		apic_printk(APIC_VERBOSE, KERN_DEBUG
++				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
++		/*
++		 * PIRQs are mapped upside down, usually.
++		 */
++		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
++	}
++	return 1;
++}
++
++__setup("pirq=", ioapic_pirq_setup);
++#endif /* CONFIG_X86_32 */
++
++/*
++ * Saves all the IO-APIC RTE's
++ */
++int save_ioapic_entries(void)
++{
++	int apic, pin;
++	int err = 0;
++
++	for_each_ioapic(apic) {
++		if (!ioapics[apic].saved_registers) {
++			err = -ENOMEM;
++			continue;
++		}
++
++		for_each_pin(apic, pin)
++			ioapics[apic].saved_registers[pin] =
++				ioapic_read_entry(apic, pin);
++	}
++
++	return err;
++}
++
++/*
++ * Mask all IO APIC entries.
++ */
++void mask_ioapic_entries(void)
++{
++	int apic, pin;
++
++	for_each_ioapic(apic) {
++		if (!ioapics[apic].saved_registers)
++			continue;
++
++		for_each_pin(apic, pin) {
++			struct IO_APIC_route_entry entry;
++
++			entry = ioapics[apic].saved_registers[pin];
++			if (entry.mask == IOAPIC_UNMASKED) {
++				entry.mask = IOAPIC_MASKED;
++				ioapic_write_entry(apic, pin, entry);
++			}
++		}
++	}
++}
++
++/*
++ * Restore IO APIC entries which was saved in the ioapic structure.
++ */
++int restore_ioapic_entries(void)
++{
++	int apic, pin;
++
++	for_each_ioapic(apic) {
++		if (!ioapics[apic].saved_registers)
++			continue;
++
++		for_each_pin(apic, pin)
++			ioapic_write_entry(apic, pin,
++					   ioapics[apic].saved_registers[pin]);
++	}
++	return 0;
++}
++
++/*
++ * Find the IRQ entry number of a certain pin.
++ */
++static int find_irq_entry(int ioapic_idx, int pin, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++)
++		if (mp_irqs[i].irqtype == type &&
++		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
++		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
++		    mp_irqs[i].dstirq == pin)
++			return i;
++
++	return -1;
++}
++
++/*
++ * Find the pin to which IRQ[irq] (ISA) is connected
++ */
++static int __init find_isa_irq_pin(int irq, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++
++		if (test_bit(lbus, mp_bus_not_pci) &&
++		    (mp_irqs[i].irqtype == type) &&
++		    (mp_irqs[i].srcbusirq == irq))
++
++			return mp_irqs[i].dstirq;
++	}
++	return -1;
++}
++
++static int __init find_isa_irq_apic(int irq, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++
++		if (test_bit(lbus, mp_bus_not_pci) &&
++		    (mp_irqs[i].irqtype == type) &&
++		    (mp_irqs[i].srcbusirq == irq))
++			break;
++	}
++
++	if (i < mp_irq_entries) {
++		int ioapic_idx;
++
++		for_each_ioapic(ioapic_idx)
++			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
++				return ioapic_idx;
++	}
++
++	return -1;
++}
++
++#ifdef CONFIG_EISA
++/*
++ * EISA Edge/Level control register, ELCR
++ */
++static int EISA_ELCR(unsigned int irq)
++{
++	if (irq < nr_legacy_irqs()) {
++		unsigned int port = 0x4d0 + (irq >> 3);
++		return (inb(port) >> (irq & 7)) & 1;
++	}
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"Broken MPtable reports ISA irq %d\n", irq);
++	return 0;
++}
++
++#endif
++
++/* ISA interrupts are always active high edge triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_ISA_trigger(idx)	(IOAPIC_EDGE)
++#define default_ISA_polarity(idx)	(IOAPIC_POL_HIGH)
++
++/* EISA interrupts are always polarity zero and can be edge or level
++ * trigger depending on the ELCR value.  If an interrupt is listed as
++ * EISA conforming in the MP table, that means its trigger type must
++ * be read in from the ELCR */
++
++#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
++#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
++
++/* PCI interrupts are always active low level triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_PCI_trigger(idx)	(IOAPIC_LEVEL)
++#define default_PCI_polarity(idx)	(IOAPIC_POL_LOW)
++
++static int irq_polarity(int idx)
++{
++	int bus = mp_irqs[idx].srcbus;
++
++	/*
++	 * Determine IRQ line polarity (high active or low active):
++	 */
++	switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
++	case MP_IRQPOL_DEFAULT:
++		/* conforms to spec, ie. bus-type dependent polarity */
++		if (test_bit(bus, mp_bus_not_pci))
++			return default_ISA_polarity(idx);
++		else
++			return default_PCI_polarity(idx);
++	case MP_IRQPOL_ACTIVE_HIGH:
++		return IOAPIC_POL_HIGH;
++	case MP_IRQPOL_RESERVED:
++		pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
++	case MP_IRQPOL_ACTIVE_LOW:
++	default: /* Pointless default required due to do gcc stupidity */
++		return IOAPIC_POL_LOW;
++	}
++}
++
++#ifdef CONFIG_EISA
++static int eisa_irq_trigger(int idx, int bus, int trigger)
++{
++	switch (mp_bus_id_to_type[bus]) {
++	case MP_BUS_PCI:
++	case MP_BUS_ISA:
++		return trigger;
++	case MP_BUS_EISA:
++		return default_EISA_trigger(idx);
++	}
++	pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus);
++	return IOAPIC_LEVEL;
++}
++#else
++static inline int eisa_irq_trigger(int idx, int bus, int trigger)
++{
++	return trigger;
++}
++#endif
++
++static int irq_trigger(int idx)
++{
++	int bus = mp_irqs[idx].srcbus;
++	int trigger;
++
++	/*
++	 * Determine IRQ trigger mode (edge or level sensitive):
++	 */
++	switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
++	case MP_IRQTRIG_DEFAULT:
++		/* conforms to spec, ie. bus-type dependent trigger mode */
++		if (test_bit(bus, mp_bus_not_pci))
++			trigger = default_ISA_trigger(idx);
++		else
++			trigger = default_PCI_trigger(idx);
++		/* Take EISA into account */
++		return eisa_irq_trigger(idx, bus, trigger);
++	case MP_IRQTRIG_EDGE:
++		return IOAPIC_EDGE;
++	case MP_IRQTRIG_RESERVED:
++		pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
++	case MP_IRQTRIG_LEVEL:
++	default: /* Pointless default required due to do gcc stupidity */
++		return IOAPIC_LEVEL;
++	}
++}
++
++void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
++			   int trigger, int polarity)
++{
++	init_irq_alloc_info(info, NULL);
++	info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
++	info->ioapic_node = node;
++	info->ioapic_trigger = trigger;
++	info->ioapic_polarity = polarity;
++	info->ioapic_valid = 1;
++}
++
++#ifndef CONFIG_ACPI
++int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
++#endif
++
++static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
++				   struct irq_alloc_info *src,
++				   u32 gsi, int ioapic_idx, int pin)
++{
++	int trigger, polarity;
++
++	copy_irq_alloc_info(dst, src);
++	dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
++	dst->ioapic_id = mpc_ioapic_id(ioapic_idx);
++	dst->ioapic_pin = pin;
++	dst->ioapic_valid = 1;
++	if (src && src->ioapic_valid) {
++		dst->ioapic_node = src->ioapic_node;
++		dst->ioapic_trigger = src->ioapic_trigger;
++		dst->ioapic_polarity = src->ioapic_polarity;
++	} else {
++		dst->ioapic_node = NUMA_NO_NODE;
++		if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) {
++			dst->ioapic_trigger = trigger;
++			dst->ioapic_polarity = polarity;
++		} else {
++			/*
++			 * PCI interrupts are always active low level
++			 * triggered.
++			 */
++			dst->ioapic_trigger = IOAPIC_LEVEL;
++			dst->ioapic_polarity = IOAPIC_POL_LOW;
++		}
++	}
++}
++
++static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
++{
++	return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE;
++}
++
++static void mp_register_handler(unsigned int irq, unsigned long trigger)
++{
++	irq_flow_handler_t hdl;
++	bool fasteoi;
++
++	if (trigger) {
++		irq_set_status_flags(irq, IRQ_LEVEL);
++		fasteoi = true;
++	} else {
++		irq_clear_status_flags(irq, IRQ_LEVEL);
++		fasteoi = false;
++	}
++
++	hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
++	__irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge");
++}
++
++static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
++{
++	struct mp_chip_data *data = irq_get_chip_data(irq);
++
++	/*
++	 * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger
++	 * and polarity attirbutes. So allow the first user to reprogram the
++	 * pin with real trigger and polarity attributes.
++	 */
++	if (irq < nr_legacy_irqs() && data->count == 1) {
++		if (info->ioapic_trigger != data->trigger)
++			mp_register_handler(irq, info->ioapic_trigger);
++		data->entry.trigger = data->trigger = info->ioapic_trigger;
++		data->entry.polarity = data->polarity = info->ioapic_polarity;
++	}
++
++	return data->trigger == info->ioapic_trigger &&
++	       data->polarity == info->ioapic_polarity;
++}
++
++static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
++				 struct irq_alloc_info *info)
++{
++	bool legacy = false;
++	int irq = -1;
++	int type = ioapics[ioapic].irqdomain_cfg.type;
++
++	switch (type) {
++	case IOAPIC_DOMAIN_LEGACY:
++		/*
++		 * Dynamically allocate IRQ number for non-ISA IRQs in the first
++		 * 16 GSIs on some weird platforms.
++		 */
++		if (!ioapic_initialized || gsi >= nr_legacy_irqs())
++			irq = gsi;
++		legacy = mp_is_legacy_irq(irq);
++		break;
++	case IOAPIC_DOMAIN_STRICT:
++		irq = gsi;
++		break;
++	case IOAPIC_DOMAIN_DYNAMIC:
++		break;
++	default:
++		WARN(1, "ioapic: unknown irqdomain type %d\n", type);
++		return -1;
++	}
++
++	return __irq_domain_alloc_irqs(domain, irq, 1,
++				       ioapic_alloc_attr_node(info),
++				       info, legacy, NULL);
++}
++
++/*
++ * Need special handling for ISA IRQs because there may be multiple IOAPIC pins
++ * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping
++ * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are
++ * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
++ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and
++ * some BIOSes may use MP Interrupt Source records to override IRQ numbers for
++ * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be
++ * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
++ */
++static int alloc_isa_irq_from_domain(struct irq_domain *domain,
++				     int irq, int ioapic, int pin,
++				     struct irq_alloc_info *info)
++{
++	struct mp_chip_data *data;
++	struct irq_data *irq_data = irq_get_irq_data(irq);
++	int node = ioapic_alloc_attr_node(info);
++
++	/*
++	 * Legacy ISA IRQ has already been allocated, just add pin to
++	 * the pin list assoicated with this IRQ and program the IOAPIC
++	 * entry. The IOAPIC entry
++	 */
++	if (irq_data && irq_data->parent_data) {
++		if (!mp_check_pin_attr(irq, info))
++			return -EBUSY;
++		if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
++					  info->ioapic_pin))
++			return -ENOMEM;
++	} else {
++		info->flags |= X86_IRQ_ALLOC_LEGACY;
++		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
++					      NULL);
++		if (irq >= 0) {
++			irq_data = irq_domain_get_irq_data(domain, irq);
++			data = irq_data->chip_data;
++			data->isa_irq = true;
++		}
++	}
++
++	return irq;
++}
++
++static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
++			     unsigned int flags, struct irq_alloc_info *info)
++{
++	int irq;
++	bool legacy = false;
++	struct irq_alloc_info tmp;
++	struct mp_chip_data *data;
++	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
++
++	if (!domain)
++		return -ENOSYS;
++
++	if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
++		irq = mp_irqs[idx].srcbusirq;
++		legacy = mp_is_legacy_irq(irq);
++	}
++
++	mutex_lock(&ioapic_mutex);
++	if (!(flags & IOAPIC_MAP_ALLOC)) {
++		if (!legacy) {
++			irq = irq_find_mapping(domain, pin);
++			if (irq == 0)
++				irq = -ENOENT;
++		}
++	} else {
++		ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin);
++		if (legacy)
++			irq = alloc_isa_irq_from_domain(domain, irq,
++							ioapic, pin, &tmp);
++		else if ((irq = irq_find_mapping(domain, pin)) == 0)
++			irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp);
++		else if (!mp_check_pin_attr(irq, &tmp))
++			irq = -EBUSY;
++		if (irq >= 0) {
++			data = irq_get_chip_data(irq);
++			data->count++;
++		}
++	}
++	mutex_unlock(&ioapic_mutex);
++
++	return irq;
++}
++
++static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
++{
++	u32 gsi = mp_pin_to_gsi(ioapic, pin);
++
++	/*
++	 * Debugging check, we are in big trouble if this message pops up!
++	 */
++	if (mp_irqs[idx].dstirq != pin)
++		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
++
++#ifdef CONFIG_X86_32
++	/*
++	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
++	 */
++	if ((pin >= 16) && (pin <= 23)) {
++		if (pirq_entries[pin-16] != -1) {
++			if (!pirq_entries[pin-16]) {
++				apic_printk(APIC_VERBOSE, KERN_DEBUG
++						"disabling PIRQ%d\n", pin-16);
++			} else {
++				int irq = pirq_entries[pin-16];
++				apic_printk(APIC_VERBOSE, KERN_DEBUG
++						"using PIRQ%d -> IRQ %d\n",
++						pin-16, irq);
++				return irq;
++			}
++		}
++	}
++#endif
++
++	return  mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL);
++}
++
++int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info)
++{
++	int ioapic, pin, idx;
++
++	ioapic = mp_find_ioapic(gsi);
++	if (ioapic < 0)
++		return -ENODEV;
++
++	pin = mp_find_ioapic_pin(ioapic, gsi);
++	idx = find_irq_entry(ioapic, pin, mp_INT);
++	if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
++		return -ENODEV;
++
++	return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info);
++}
++
++void mp_unmap_irq(int irq)
++{
++	struct irq_data *irq_data = irq_get_irq_data(irq);
++	struct mp_chip_data *data;
++
++	if (!irq_data || !irq_data->domain)
++		return;
++
++	data = irq_data->chip_data;
++	if (!data || data->isa_irq)
++		return;
++
++	mutex_lock(&ioapic_mutex);
++	if (--data->count == 0)
++		irq_domain_free_irqs(irq, 1);
++	mutex_unlock(&ioapic_mutex);
++}
++
++/*
++ * Find a specific PCI IRQ entry.
++ * Not an __init, possibly needed by modules
++ */
++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
++{
++	int irq, i, best_ioapic = -1, best_idx = -1;
++
++	apic_printk(APIC_DEBUG,
++		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
++		    bus, slot, pin);
++	if (test_bit(bus, mp_bus_not_pci)) {
++		apic_printk(APIC_VERBOSE,
++			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
++		return -1;
++	}
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++		int ioapic_idx, found = 0;
++
++		if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
++		    slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
++			continue;
++
++		for_each_ioapic(ioapic_idx)
++			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
++			    mp_irqs[i].dstapic == MP_APIC_ALL) {
++				found = 1;
++				break;
++			}
++		if (!found)
++			continue;
++
++		/* Skip ISA IRQs */
++		irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
++		if (irq > 0 && !IO_APIC_IRQ(irq))
++			continue;
++
++		if (pin == (mp_irqs[i].srcbusirq & 3)) {
++			best_idx = i;
++			best_ioapic = ioapic_idx;
++			goto out;
++		}
++
++		/*
++		 * Use the first all-but-pin matching entry as a
++		 * best-guess fuzzy result for broken mptables.
++		 */
++		if (best_idx < 0) {
++			best_idx = i;
++			best_ioapic = ioapic_idx;
++		}
++	}
++	if (best_idx < 0)
++		return -1;
++
++out:
++	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
++			 IOAPIC_MAP_ALLOC);
++}
++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
++
++static struct irq_chip ioapic_chip, ioapic_ir_chip;
++
++static void __init setup_IO_APIC_irqs(void)
++{
++	unsigned int ioapic, pin;
++	int idx;
++
++	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
++
++	for_each_ioapic_pin(ioapic, pin) {
++		idx = find_irq_entry(ioapic, pin, mp_INT);
++		if (idx < 0)
++			apic_printk(APIC_VERBOSE,
++				    KERN_DEBUG " apic %d pin %d not connected\n",
++				    mpc_ioapic_id(ioapic), pin);
++		else
++			pin_2_irq(idx, ioapic, pin,
++				  ioapic ? 0 : IOAPIC_MAP_ALLOC);
++	}
++}
++
++void ioapic_zap_locks(void)
++{
++	raw_spin_lock_init(&ioapic_lock);
++}
++
++static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
++{
++	int i;
++	char buf[256];
++	struct IO_APIC_route_entry entry;
++	struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry;
++
++	printk(KERN_DEBUG "IOAPIC %d:\n", apic);
++	for (i = 0; i <= nr_entries; i++) {
++		entry = ioapic_read_entry(apic, i);
++		snprintf(buf, sizeof(buf),
++			 " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
++			 i,
++			 entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
++			 entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
++			 entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
++			 entry.vector, entry.irr, entry.delivery_status);
++		if (ir_entry->format)
++			printk(KERN_DEBUG "%s, remapped, I(%04X),  Z(%X)\n",
++			       buf, (ir_entry->index2 << 15) | ir_entry->index,
++			       ir_entry->zero);
++		else
++			printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
++			       buf,
++			       entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
++			       "logical " : "physical",
++			       entry.dest, entry.delivery_mode);
++	}
++}
++
++static void __init print_IO_APIC(int ioapic_idx)
++{
++	union IO_APIC_reg_00 reg_00;
++	union IO_APIC_reg_01 reg_01;
++	union IO_APIC_reg_02 reg_02;
++	union IO_APIC_reg_03 reg_03;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(ioapic_idx, 0);
++	reg_01.raw = io_apic_read(ioapic_idx, 1);
++	if (reg_01.bits.version >= 0x10)
++		reg_02.raw = io_apic_read(ioapic_idx, 2);
++	if (reg_01.bits.version >= 0x20)
++		reg_03.raw = io_apic_read(ioapic_idx, 3);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
++	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
++	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
++	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
++	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
++
++	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
++	printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
++		reg_01.bits.entries);
++
++	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
++	printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
++		reg_01.bits.version);
++
++	/*
++	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
++	 * but the value of reg_02 is read as the previous read register
++	 * value, so ignore it if reg_02 == reg_01.
++	 */
++	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
++		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
++		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
++	}
++
++	/*
++	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
++	 * or reg_03, but the value of reg_0[23] is read as the previous read
++	 * register value, so ignore it if reg_03 == reg_0[12].
++	 */
++	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
++	    reg_03.raw != reg_01.raw) {
++		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
++		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
++	}
++
++	printk(KERN_DEBUG ".... IRQ redirection table:\n");
++	io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
++}
++
++void __init print_IO_APICs(void)
++{
++	int ioapic_idx;
++	unsigned int irq;
++
++	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
++	for_each_ioapic(ioapic_idx)
++		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
++		       mpc_ioapic_id(ioapic_idx),
++		       ioapics[ioapic_idx].nr_registers);
++
++	/*
++	 * We are a bit conservative about what we expect.  We have to
++	 * know about every hardware change ASAP.
++	 */
++	printk(KERN_INFO "testing the IO APIC.......................\n");
++
++	for_each_ioapic(ioapic_idx)
++		print_IO_APIC(ioapic_idx);
++
++	printk(KERN_DEBUG "IRQ to pin mappings:\n");
++	for_each_active_irq(irq) {
++		struct irq_pin_list *entry;
++		struct irq_chip *chip;
++		struct mp_chip_data *data;
++
++		chip = irq_get_chip(irq);
++		if (chip != &ioapic_chip && chip != &ioapic_ir_chip)
++			continue;
++		data = irq_get_chip_data(irq);
++		if (!data)
++			continue;
++		if (list_empty(&data->irq_2_pin))
++			continue;
++
++		printk(KERN_DEBUG "IRQ%d ", irq);
++		for_each_irq_pin(entry, data->irq_2_pin)
++			pr_cont("-> %d:%d", entry->apic, entry->pin);
++		pr_cont("\n");
++	}
++
++	printk(KERN_INFO ".................................... done.\n");
++}
++
++/* Where if anywhere is the i8259 connect in external int mode */
++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
++
++void __init enable_IO_APIC(void)
++{
++	int i8259_apic, i8259_pin;
++	int apic, pin;
++
++	if (skip_ioapic_setup)
++		nr_ioapics = 0;
++
++	if (!nr_legacy_irqs() || !nr_ioapics)
++		return;
++
++	for_each_ioapic_pin(apic, pin) {
++		/* See if any of the pins is in ExtINT mode */
++		struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
++
++		/* If the interrupt line is enabled and in ExtInt mode
++		 * I have found the pin where the i8259 is connected.
++		 */
++		if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
++			ioapic_i8259.apic = apic;
++			ioapic_i8259.pin  = pin;
++			goto found_i8259;
++		}
++	}
++ found_i8259:
++	/* Look to see what if the MP table has reported the ExtINT */
++	/* If we could not find the appropriate pin by looking at the ioapic
++	 * the i8259 probably is not connected the ioapic but give the
++	 * mptable a chance anyway.
++	 */
++	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
++	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
++	/* Trust the MP table if nothing is setup in the hardware */
++	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
++		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
++		ioapic_i8259.pin  = i8259_pin;
++		ioapic_i8259.apic = i8259_apic;
++	}
++	/* Complain if the MP table and the hardware disagree */
++	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
++		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
++	{
++		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
++	}
++
++	/*
++	 * Do not trust the IO-APIC being empty at bootup
++	 */
++	clear_IO_APIC();
++}
++
++void native_restore_boot_irq_mode(void)
++{
++	/*
++	 * If the i8259 is routed through an IOAPIC
++	 * Put that IOAPIC in virtual wire mode
++	 * so legacy interrupts can be delivered.
++	 */
++	if (ioapic_i8259.pin != -1) {
++		struct IO_APIC_route_entry entry;
++
++		memset(&entry, 0, sizeof(entry));
++		entry.mask		= IOAPIC_UNMASKED;
++		entry.trigger		= IOAPIC_EDGE;
++		entry.polarity		= IOAPIC_POL_HIGH;
++		entry.dest_mode		= IOAPIC_DEST_MODE_PHYSICAL;
++		entry.delivery_mode	= dest_ExtINT;
++		entry.dest		= read_apic_id();
++
++		/*
++		 * Add it to the IO-APIC irq-routing table:
++		 */
++		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
++	}
++
++	if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
++		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
++}
++
++void restore_boot_irq_mode(void)
++{
++	if (!nr_legacy_irqs())
++		return;
++
++	x86_apic_ops.restore();
++}
++
++#ifdef CONFIG_X86_32
++/*
++ * function to set the IO-APIC physical IDs based on the
++ * values stored in the MPC table.
++ *
++ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
++ */
++void __init setup_ioapic_ids_from_mpc_nocheck(void)
++{
++	union IO_APIC_reg_00 reg_00;
++	physid_mask_t phys_id_present_map;
++	int ioapic_idx;
++	int i;
++	unsigned char old_id;
++	unsigned long flags;
++
++	/*
++	 * This is broken; anything with a real cpu count has to
++	 * circumvent this idiocy regardless.
++	 */
++	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
++
++	/*
++	 * Set the IOAPIC ID to the value stored in the MPC table.
++	 */
++	for_each_ioapic(ioapic_idx) {
++		/* Read the register 0 value */
++		raw_spin_lock_irqsave(&ioapic_lock, flags);
++		reg_00.raw = io_apic_read(ioapic_idx, 0);
++		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		old_id = mpc_ioapic_id(ioapic_idx);
++
++		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
++			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
++				ioapic_idx, mpc_ioapic_id(ioapic_idx));
++			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
++				reg_00.bits.ID);
++			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
++		}
++
++		/*
++		 * Sanity check, is the ID really free? Every APIC in a
++		 * system must have a unique ID or we get lots of nice
++		 * 'stuck on smp_invalidate_needed IPI wait' messages.
++		 */
++		if (apic->check_apicid_used(&phys_id_present_map,
++					    mpc_ioapic_id(ioapic_idx))) {
++			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
++				ioapic_idx, mpc_ioapic_id(ioapic_idx));
++			for (i = 0; i < get_physical_broadcast(); i++)
++				if (!physid_isset(i, phys_id_present_map))
++					break;
++			if (i >= get_physical_broadcast())
++				panic("Max APIC ID exceeded!\n");
++			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
++				i);
++			physid_set(i, phys_id_present_map);
++			ioapics[ioapic_idx].mp_config.apicid = i;
++		} else {
++			physid_mask_t tmp;
++			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
++						    &tmp);
++			apic_printk(APIC_VERBOSE, "Setting %d in the "
++					"phys_id_present_map\n",
++					mpc_ioapic_id(ioapic_idx));
++			physids_or(phys_id_present_map, phys_id_present_map, tmp);
++		}
++
++		/*
++		 * We need to adjust the IRQ routing table
++		 * if the ID changed.
++		 */
++		if (old_id != mpc_ioapic_id(ioapic_idx))
++			for (i = 0; i < mp_irq_entries; i++)
++				if (mp_irqs[i].dstapic == old_id)
++					mp_irqs[i].dstapic
++						= mpc_ioapic_id(ioapic_idx);
++
++		/*
++		 * Update the ID register according to the right value
++		 * from the MPC table if they are different.
++		 */
++		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
++			continue;
++
++		apic_printk(APIC_VERBOSE, KERN_INFO
++			"...changing IO-APIC physical APIC ID to %d ...",
++			mpc_ioapic_id(ioapic_idx));
++
++		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
++		raw_spin_lock_irqsave(&ioapic_lock, flags);
++		io_apic_write(ioapic_idx, 0, reg_00.raw);
++		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		/*
++		 * Sanity check
++		 */
++		raw_spin_lock_irqsave(&ioapic_lock, flags);
++		reg_00.raw = io_apic_read(ioapic_idx, 0);
++		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
++			pr_cont("could not set ID!\n");
++		else
++			apic_printk(APIC_VERBOSE, " ok.\n");
++	}
++}
++
++void __init setup_ioapic_ids_from_mpc(void)
++{
++
++	if (acpi_ioapic)
++		return;
++	/*
++	 * Don't check I/O APIC IDs for xAPIC systems.  They have
++	 * no meaning without the serial APIC bus.
++	 */
++	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
++		|| APIC_XAPIC(boot_cpu_apic_version))
++		return;
++	setup_ioapic_ids_from_mpc_nocheck();
++}
++#endif
++
++int no_timer_check __initdata;
++
++static int __init notimercheck(char *s)
++{
++	no_timer_check = 1;
++	return 1;
++}
++__setup("no_timer_check", notimercheck);
++
++static void __init delay_with_tsc(void)
++{
++	unsigned long long start, now;
++	unsigned long end = jiffies + 4;
++
++	start = rdtsc();
++
++	/*
++	 * We don't know the TSC frequency yet, but waiting for
++	 * 40000000000/HZ TSC cycles is safe:
++	 * 4 GHz == 10 jiffies
++	 * 1 GHz == 40 jiffies
++	 */
++	do {
++		rep_nop();
++		now = rdtsc();
++	} while ((now - start) < 40000000000ULL / HZ &&
++		time_before_eq(jiffies, end));
++}
++
++static void __init delay_without_tsc(void)
++{
++	unsigned long end = jiffies + 4;
++	int band = 1;
++
++	/*
++	 * We don't know any frequency yet, but waiting for
++	 * 40940000000/HZ cycles is safe:
++	 * 4 GHz == 10 jiffies
++	 * 1 GHz == 40 jiffies
++	 * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
++	 */
++	do {
++		__delay(((1U << band++) * 10000000UL) / HZ);
++	} while (band < 12 && time_before_eq(jiffies, end));
++}
++
++/*
++ * There is a nasty bug in some older SMP boards, their mptable lies
++ * about the timer IRQ. We do the following to work around the situation:
++ *
++ *	- timer IRQ defaults to IO-APIC IRQ
++ *	- if this function detects that timer IRQs are defunct, then we fall
++ *	  back to ISA timer IRQs
++ */
++static int __init timer_irq_works(void)
++{
++	unsigned long t1 = jiffies;
++	unsigned long flags;
++
++	if (no_timer_check)
++		return 1;
++
++	local_save_flags(flags);
++	local_irq_enable();
++
++	if (boot_cpu_has(X86_FEATURE_TSC))
++		delay_with_tsc();
++	else
++		delay_without_tsc();
++
++	local_irq_restore(flags);
++
++	/*
++	 * Expect a few ticks at least, to be sure some possible
++	 * glue logic does not lock up after one or two first
++	 * ticks in a non-ExtINT mode.  Also the local APIC
++	 * might have cached one ExtINT interrupt.  Finally, at
++	 * least one tick may be lost due to delays.
++	 */
++
++	/* jiffies wrap? */
++	if (time_after(jiffies, t1 + 4))
++		return 1;
++	return 0;
++}
++
++/*
++ * In the SMP+IOAPIC case it might happen that there are an unspecified
++ * number of pending IRQ events unhandled. These cases are very rare,
++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
++ * better to do it this way as thus we do not have to be aware of
++ * 'pending' interrupts in the IRQ path, except at this point.
++ */
++/*
++ * Edge triggered needs to resend any interrupt
++ * that was delayed but this is now handled in the device
++ * independent code.
++ */
++
++/*
++ * Starting up a edge-triggered IO-APIC interrupt is
++ * nasty - we need to make sure that we get the edge.
++ * If it is already asserted for some reason, we need
++ * return 1 to indicate that is was pending.
++ *
++ * This is not complete - we should be able to fake
++ * an edge even if it isn't on the 8259A...
++ */
++static unsigned int startup_ioapic_irq(struct irq_data *data)
++{
++	int was_pending = 0, irq = data->irq;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	if (irq < nr_legacy_irqs()) {
++		legacy_pic->mask(irq);
++		if (legacy_pic->irq_pending(irq))
++			was_pending = 1;
++	}
++	__unmask_ioapic(data->chip_data);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return was_pending;
++}
++
++atomic_t irq_mis_count;
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++static bool io_apic_level_ack_pending(struct mp_chip_data *data)
++{
++	struct irq_pin_list *entry;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	for_each_irq_pin(entry, data->irq_2_pin) {
++		unsigned int reg;
++		int pin;
++
++		pin = entry->pin;
++		reg = io_apic_read(entry->apic, 0x10 + pin*2);
++		/* Is the remote IRR bit set? */
++		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
++			raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++			return true;
++		}
++	}
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return false;
++}
++
++static inline bool ioapic_irqd_mask(struct irq_data *data)
++{
++	/* If we are moving the IRQ we need to mask it */
++	if (unlikely(irqd_is_setaffinity_pending(data))) {
++		if (!irqd_irq_masked(data))
++			mask_ioapic_irq(data);
++		return true;
++	}
++	return false;
++}
++
++static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
++{
++	if (unlikely(masked)) {
++		/* Only migrate the irq if the ack has been received.
++		 *
++		 * On rare occasions the broadcast level triggered ack gets
++		 * delayed going to ioapics, and if we reprogram the
++		 * vector while Remote IRR is still set the irq will never
++		 * fire again.
++		 *
++		 * To prevent this scenario we read the Remote IRR bit
++		 * of the ioapic.  This has two effects.
++		 * - On any sane system the read of the ioapic will
++		 *   flush writes (and acks) going to the ioapic from
++		 *   this cpu.
++		 * - We get to see if the ACK has actually been delivered.
++		 *
++		 * Based on failed experiments of reprogramming the
++		 * ioapic entry from outside of irq context starting
++		 * with masking the ioapic entry and then polling until
++		 * Remote IRR was clear before reprogramming the
++		 * ioapic I don't trust the Remote IRR bit to be
++		 * completey accurate.
++		 *
++		 * However there appears to be no other way to plug
++		 * this race, so if the Remote IRR bit is not
++		 * accurate and is causing problems then it is a hardware bug
++		 * and you can go talk to the chipset vendor about it.
++		 */
++		if (!io_apic_level_ack_pending(data->chip_data))
++			irq_move_masked_irq(data);
++		/* If the IRQ is masked in the core, leave it: */
++		if (!irqd_irq_masked(data))
++			unmask_ioapic_irq(data);
++	}
++}
++#else
++static inline bool ioapic_irqd_mask(struct irq_data *data)
++{
++	return false;
++}
++static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
++{
++}
++#endif
++
++static void ioapic_ack_level(struct irq_data *irq_data)
++{
++	struct irq_cfg *cfg = irqd_cfg(irq_data);
++	unsigned long v;
++	bool masked;
++	int i;
++
++	irq_complete_move(cfg);
++	masked = ioapic_irqd_mask(irq_data);
++
++	/*
++	 * It appears there is an erratum which affects at least version 0x11
++	 * of I/O APIC (that's the 82093AA and cores integrated into various
++	 * chipsets).  Under certain conditions a level-triggered interrupt is
++	 * erroneously delivered as edge-triggered one but the respective IRR
++	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
++	 * message but it will never arrive and further interrupts are blocked
++	 * from the source.  The exact reason is so far unknown, but the
++	 * phenomenon was observed when two consecutive interrupt requests
++	 * from a given source get delivered to the same CPU and the source is
++	 * temporarily disabled in between.
++	 *
++	 * A workaround is to simulate an EOI message manually.  We achieve it
++	 * by setting the trigger mode to edge and then to level when the edge
++	 * trigger mode gets detected in the TMR of a local APIC for a
++	 * level-triggered interrupt.  We mask the source for the time of the
++	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
++	 * The idea is from Manfred Spraul.  --macro
++	 *
++	 * Also in the case when cpu goes offline, fixup_irqs() will forward
++	 * any unhandled interrupt on the offlined cpu to the new cpu
++	 * destination that is handling the corresponding interrupt. This
++	 * interrupt forwarding is done via IPI's. Hence, in this case also
++	 * level-triggered io-apic interrupt will be seen as an edge
++	 * interrupt in the IRR. And we can't rely on the cpu's EOI
++	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
++	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
++	 * supporting EOI register, we do an explicit EOI to clear the
++	 * remote IRR and on IO-APIC's which don't have an EOI register,
++	 * we use the above logic (mask+edge followed by unmask+level) from
++	 * Manfred Spraul to clear the remote IRR.
++	 */
++	i = cfg->vector;
++	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
++
++	/*
++	 * We must acknowledge the irq before we move it or the acknowledge will
++	 * not propagate properly.
++	 */
++	ack_APIC_irq();
++
++	/*
++	 * Tail end of clearing remote IRR bit (either by delivering the EOI
++	 * message via io-apic EOI register write or simulating it using
++	 * mask+edge followed by unnask+level logic) manually when the
++	 * level triggered interrupt is seen as the edge triggered interrupt
++	 * at the cpu.
++	 */
++	if (!(v & (1 << (i & 0x1f)))) {
++		atomic_inc(&irq_mis_count);
++		eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
++	}
++
++	ioapic_irqd_unmask(irq_data, masked);
++}
++
++static void ioapic_ir_ack_level(struct irq_data *irq_data)
++{
++	struct mp_chip_data *data = irq_data->chip_data;
++
++	/*
++	 * Intr-remapping uses pin number as the virtual vector
++	 * in the RTE. Actual vector is programmed in
++	 * intr-remapping table entry. Hence for the io-apic
++	 * EOI we use the pin number.
++	 */
++	apic_ack_irq(irq_data);
++	eoi_ioapic_pin(data->entry.vector, data);
++}
++
++static void ioapic_configure_entry(struct irq_data *irqd)
++{
++	struct mp_chip_data *mpd = irqd->chip_data;
++	struct irq_cfg *cfg = irqd_cfg(irqd);
++	struct irq_pin_list *entry;
++
++	/*
++	 * Only update when the parent is the vector domain, don't touch it
++	 * if the parent is the remapping domain. Check the installed
++	 * ioapic chip to verify that.
++	 */
++	if (irqd->chip == &ioapic_chip) {
++		mpd->entry.dest = cfg->dest_apicid;
++		mpd->entry.vector = cfg->vector;
++	}
++	for_each_irq_pin(entry, mpd->irq_2_pin)
++		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
++}
++
++static int ioapic_set_affinity(struct irq_data *irq_data,
++			       const struct cpumask *mask, bool force)
++{
++	struct irq_data *parent = irq_data->parent_data;
++	unsigned long flags;
++	int ret;
++
++	ret = parent->chip->irq_set_affinity(parent, mask, force);
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
++		ioapic_configure_entry(irq_data);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return ret;
++}
++
++/*
++ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
++ * be in flight, but not yet serviced by the target CPU. That means
++ * __synchronize_hardirq() would return and claim that everything is calmed
++ * down. So free_irq() would proceed and deactivate the interrupt and free
++ * resources.
++ *
++ * Once the target CPU comes around to service it it will find a cleared
++ * vector and complain. While the spurious interrupt is harmless, the full
++ * release of resources might prevent the interrupt from being acknowledged
++ * which keeps the hardware in a weird state.
++ *
++ * Verify that the corresponding Remote-IRR bits are clear.
++ */
++static int ioapic_irq_get_chip_state(struct irq_data *irqd,
++				   enum irqchip_irq_state which,
++				   bool *state)
++{
++	struct mp_chip_data *mcd = irqd->chip_data;
++	struct IO_APIC_route_entry rentry;
++	struct irq_pin_list *p;
++
++	if (which != IRQCHIP_STATE_ACTIVE)
++		return -EINVAL;
++
++	*state = false;
++	raw_spin_lock(&ioapic_lock);
++	for_each_irq_pin(p, mcd->irq_2_pin) {
++		rentry = __ioapic_read_entry(p->apic, p->pin);
++		/*
++		 * The remote IRR is only valid in level trigger mode. It's
++		 * meaning is undefined for edge triggered interrupts and
++		 * irrelevant because the IO-APIC treats them as fire and
++		 * forget.
++		 */
++		if (rentry.irr && rentry.trigger) {
++			*state = true;
++			break;
++		}
++	}
++	raw_spin_unlock(&ioapic_lock);
++	return 0;
++}
++
++static struct irq_chip ioapic_chip __read_mostly = {
++	.name			= "IO-APIC",
++	.irq_startup		= startup_ioapic_irq,
++	.irq_mask		= mask_ioapic_irq,
++	.irq_unmask		= unmask_ioapic_irq,
++	.irq_ack		= irq_chip_ack_parent,
++	.irq_eoi		= ioapic_ack_level,
++	.irq_set_affinity	= ioapic_set_affinity,
++	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++	.irq_get_irqchip_state	= ioapic_irq_get_chip_state,
++	.flags			= IRQCHIP_SKIP_SET_WAKE,
++};
++
++static struct irq_chip ioapic_ir_chip __read_mostly = {
++	.name			= "IR-IO-APIC",
++	.irq_startup		= startup_ioapic_irq,
++	.irq_mask		= mask_ioapic_irq,
++	.irq_unmask		= unmask_ioapic_irq,
++	.irq_ack		= irq_chip_ack_parent,
++	.irq_eoi		= ioapic_ir_ack_level,
++	.irq_set_affinity	= ioapic_set_affinity,
++	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++	.irq_get_irqchip_state	= ioapic_irq_get_chip_state,
++	.flags			= IRQCHIP_SKIP_SET_WAKE,
++};
++
++static inline void init_IO_APIC_traps(void)
++{
++	struct irq_cfg *cfg;
++	unsigned int irq;
++
++	for_each_active_irq(irq) {
++		cfg = irq_cfg(irq);
++		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
++			/*
++			 * Hmm.. We don't have an entry for this,
++			 * so default to an old-fashioned 8259
++			 * interrupt if we can..
++			 */
++			if (irq < nr_legacy_irqs())
++				legacy_pic->make_irq(irq);
++			else
++				/* Strange. Oh, well.. */
++				irq_set_chip(irq, &no_irq_chip);
++		}
++	}
++}
++
++/*
++ * The local APIC irq-chip implementation:
++ */
++
++static void mask_lapic_irq(struct irq_data *data)
++{
++	unsigned long v;
++
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
++}
++
++static void unmask_lapic_irq(struct irq_data *data)
++{
++	unsigned long v;
++
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
++}
++
++static void ack_lapic_irq(struct irq_data *data)
++{
++	ack_APIC_irq();
++}
++
++static struct irq_chip lapic_chip __read_mostly = {
++	.name		= "local-APIC",
++	.irq_mask	= mask_lapic_irq,
++	.irq_unmask	= unmask_lapic_irq,
++	.irq_ack	= ack_lapic_irq,
++};
++
++static void lapic_register_intr(int irq)
++{
++	irq_clear_status_flags(irq, IRQ_LEVEL);
++	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
++				      "edge");
++}
++
++/*
++ * This looks a bit hackish but it's about the only one way of sending
++ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
++ * not support the ExtINT mode, unfortunately.  We need to send these
++ * cycles as some i82489DX-based boards have glue logic that keeps the
++ * 8259A interrupt line asserted until INTA.  --macro
++ */
++static inline void __init unlock_ExtINT_logic(void)
++{
++	int apic, pin, i;
++	struct IO_APIC_route_entry entry0, entry1;
++	unsigned char save_control, save_freq_select;
++
++	pin  = find_isa_irq_pin(8, mp_INT);
++	if (pin == -1) {
++		WARN_ON_ONCE(1);
++		return;
++	}
++	apic = find_isa_irq_apic(8, mp_INT);
++	if (apic == -1) {
++		WARN_ON_ONCE(1);
++		return;
++	}
++
++	entry0 = ioapic_read_entry(apic, pin);
++	clear_IO_APIC_pin(apic, pin);
++
++	memset(&entry1, 0, sizeof(entry1));
++
++	entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
++	entry1.mask = IOAPIC_UNMASKED;
++	entry1.dest = hard_smp_processor_id();
++	entry1.delivery_mode = dest_ExtINT;
++	entry1.polarity = entry0.polarity;
++	entry1.trigger = IOAPIC_EDGE;
++	entry1.vector = 0;
++
++	ioapic_write_entry(apic, pin, entry1);
++
++	save_control = CMOS_READ(RTC_CONTROL);
++	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
++	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
++		   RTC_FREQ_SELECT);
++	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
++
++	i = 100;
++	while (i-- > 0) {
++		mdelay(10);
++		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
++			i -= 10;
++	}
++
++	CMOS_WRITE(save_control, RTC_CONTROL);
++	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
++	clear_IO_APIC_pin(apic, pin);
++
++	ioapic_write_entry(apic, pin, entry0);
++}
++
++static int disable_timer_pin_1 __initdata;
++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
++static int __init disable_timer_pin_setup(char *arg)
++{
++	disable_timer_pin_1 = 1;
++	return 0;
++}
++early_param("disable_timer_pin_1", disable_timer_pin_setup);
++
++static int mp_alloc_timer_irq(int ioapic, int pin)
++{
++	int irq = -1;
++	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
++
++	if (domain) {
++		struct irq_alloc_info info;
++
++		ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
++		info.ioapic_id = mpc_ioapic_id(ioapic);
++		info.ioapic_pin = pin;
++		mutex_lock(&ioapic_mutex);
++		irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
++		mutex_unlock(&ioapic_mutex);
++	}
++
++	return irq;
++}
++
++/*
++ * This code may look a bit paranoid, but it's supposed to cooperate with
++ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
++ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
++ * fanatically on his truly buggy board.
++ *
++ * FIXME: really need to revamp this for all platforms.
++ */
++static inline void __init check_timer(void)
++{
++	struct irq_data *irq_data = irq_get_irq_data(0);
++	struct mp_chip_data *data = irq_data->chip_data;
++	struct irq_cfg *cfg = irqd_cfg(irq_data);
++	int node = cpu_to_node(0);
++	int apic1, pin1, apic2, pin2;
++	unsigned long flags;
++	int no_pin1 = 0;
++
++	local_irq_save(flags);
++
++	/*
++	 * get/set the timer IRQ vector:
++	 */
++	legacy_pic->mask(0);
++
++	/*
++	 * As IRQ0 is to be enabled in the 8259A, the virtual
++	 * wire has to be disabled in the local APIC.  Also
++	 * timer interrupts need to be acknowledged manually in
++	 * the 8259A for the i82489DX when using the NMI
++	 * watchdog as that APIC treats NMIs as level-triggered.
++	 * The AEOI mode will finish them in the 8259A
++	 * automatically.
++	 */
++	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
++	legacy_pic->init(1);
++
++	pin1  = find_isa_irq_pin(0, mp_INT);
++	apic1 = find_isa_irq_apic(0, mp_INT);
++	pin2  = ioapic_i8259.pin;
++	apic2 = ioapic_i8259.apic;
++
++	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
++		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
++		    cfg->vector, apic1, pin1, apic2, pin2);
++
++	/*
++	 * Some BIOS writers are clueless and report the ExtINTA
++	 * I/O APIC input from the cascaded 8259A as the timer
++	 * interrupt input.  So just in case, if only one pin
++	 * was found above, try it both directly and through the
++	 * 8259A.
++	 */
++	if (pin1 == -1) {
++		panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
++		pin1 = pin2;
++		apic1 = apic2;
++		no_pin1 = 1;
++	} else if (pin2 == -1) {
++		pin2 = pin1;
++		apic2 = apic1;
++	}
++
++	if (pin1 != -1) {
++		/* Ok, does IRQ0 through the IOAPIC work? */
++		if (no_pin1) {
++			mp_alloc_timer_irq(apic1, pin1);
++		} else {
++			/*
++			 * for edge trigger, it's already unmasked,
++			 * so only need to unmask if it is level-trigger
++			 * do we really have level trigger timer?
++			 */
++			int idx;
++			idx = find_irq_entry(apic1, pin1, mp_INT);
++			if (idx != -1 && irq_trigger(idx))
++				unmask_ioapic_irq(irq_get_irq_data(0));
++		}
++		irq_domain_deactivate_irq(irq_data);
++		irq_domain_activate_irq(irq_data, false);
++		if (timer_irq_works()) {
++			if (disable_timer_pin_1 > 0)
++				clear_IO_APIC_pin(0, pin1);
++			goto out;
++		}
++		panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
++		local_irq_disable();
++		clear_IO_APIC_pin(apic1, pin1);
++		if (!no_pin1)
++			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
++				    "8254 timer not connected to IO-APIC\n");
++
++		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
++			    "(IRQ0) through the 8259A ...\n");
++		apic_printk(APIC_QUIET, KERN_INFO
++			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
++		/*
++		 * legacy devices should be connected to IO APIC #0
++		 */
++		replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
++		irq_domain_deactivate_irq(irq_data);
++		irq_domain_activate_irq(irq_data, false);
++		legacy_pic->unmask(0);
++		if (timer_irq_works()) {
++			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
++			goto out;
++		}
++		/*
++		 * Cleanup, just in case ...
++		 */
++		local_irq_disable();
++		legacy_pic->mask(0);
++		clear_IO_APIC_pin(apic2, pin2);
++		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
++	}
++
++	apic_printk(APIC_QUIET, KERN_INFO
++		    "...trying to set up timer as Virtual Wire IRQ...\n");
++
++	lapic_register_intr(0);
++	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
++	legacy_pic->unmask(0);
++
++	if (timer_irq_works()) {
++		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
++		goto out;
++	}
++	local_irq_disable();
++	legacy_pic->mask(0);
++	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
++	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
++
++	apic_printk(APIC_QUIET, KERN_INFO
++		    "...trying to set up timer as ExtINT IRQ...\n");
++
++	legacy_pic->init(0);
++	legacy_pic->make_irq(0);
++	apic_write(APIC_LVT0, APIC_DM_EXTINT);
++
++	unlock_ExtINT_logic();
++
++	if (timer_irq_works()) {
++		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
++		goto out;
++	}
++	local_irq_disable();
++	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
++	if (apic_is_x2apic_enabled())
++		apic_printk(APIC_QUIET, KERN_INFO
++			    "Perhaps problem with the pre-enabled x2apic mode\n"
++			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
++	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
++		"report.  Then try booting with the 'noapic' option.\n");
++out:
++	local_irq_restore(flags);
++}
++
++/*
++ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
++ * to devices.  However there may be an I/O APIC pin available for
++ * this interrupt regardless.  The pin may be left unconnected, but
++ * typically it will be reused as an ExtINT cascade interrupt for
++ * the master 8259A.  In the MPS case such a pin will normally be
++ * reported as an ExtINT interrupt in the MP table.  With ACPI
++ * there is no provision for ExtINT interrupts, and in the absence
++ * of an override it would be treated as an ordinary ISA I/O APIC
++ * interrupt, that is edge-triggered and unmasked by default.  We
++ * used to do this, but it caused problems on some systems because
++ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
++ * the same ExtINT cascade interrupt to drive the local APIC of the
++ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
++ * the I/O APIC in all cases now.  No actual device should request
++ * it anyway.  --macro
++ */
++#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
++
++static int mp_irqdomain_create(int ioapic)
++{
++	struct irq_alloc_info info;
++	struct irq_domain *parent;
++	int hwirqs = mp_ioapic_pin_count(ioapic);
++	struct ioapic *ip = &ioapics[ioapic];
++	struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
++	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
++	struct fwnode_handle *fn;
++	char *name = "IO-APIC";
++
++	if (cfg->type == IOAPIC_DOMAIN_INVALID)
++		return 0;
++
++	init_irq_alloc_info(&info, NULL);
++	info.type = X86_IRQ_ALLOC_TYPE_IOAPIC;
++	info.ioapic_id = mpc_ioapic_id(ioapic);
++	parent = irq_remapping_get_ir_irq_domain(&info);
++	if (!parent)
++		parent = x86_vector_domain;
++	else
++		name = "IO-APIC-IR";
++
++	/* Handle device tree enumerated APICs proper */
++	if (cfg->dev) {
++		fn = of_node_to_fwnode(cfg->dev);
++	} else {
++		fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
++		if (!fn)
++			return -ENOMEM;
++	}
++
++	ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
++						 (void *)(long)ioapic);
++
++	/* Release fw handle if it was allocated above */
++	if (!cfg->dev)
++		irq_domain_free_fwnode(fn);
++
++	if (!ip->irqdomain)
++		return -ENOMEM;
++
++	ip->irqdomain->parent = parent;
++
++	if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
++	    cfg->type == IOAPIC_DOMAIN_STRICT)
++		ioapic_dynirq_base = max(ioapic_dynirq_base,
++					 gsi_cfg->gsi_end + 1);
++
++	return 0;
++}
++
++static void ioapic_destroy_irqdomain(int idx)
++{
++	if (ioapics[idx].irqdomain) {
++		irq_domain_remove(ioapics[idx].irqdomain);
++		ioapics[idx].irqdomain = NULL;
++	}
++}
++
++void __init setup_IO_APIC(void)
++{
++	int ioapic;
++
++	if (skip_ioapic_setup || !nr_ioapics)
++		return;
++
++	io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
++
++	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
++	for_each_ioapic(ioapic)
++		BUG_ON(mp_irqdomain_create(ioapic));
++
++	/*
++         * Set up IO-APIC IRQ routing.
++         */
++	x86_init.mpparse.setup_ioapic_ids();
++
++	sync_Arb_IDs();
++	setup_IO_APIC_irqs();
++	init_IO_APIC_traps();
++	if (nr_legacy_irqs())
++		check_timer();
++
++	ioapic_initialized = 1;
++}
++
++static void resume_ioapic_id(int ioapic_idx)
++{
++	unsigned long flags;
++	union IO_APIC_reg_00 reg_00;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(ioapic_idx, 0);
++	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
++		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
++		io_apic_write(ioapic_idx, 0, reg_00.raw);
++	}
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void ioapic_resume(void)
++{
++	int ioapic_idx;
++
++	for_each_ioapic_reverse(ioapic_idx)
++		resume_ioapic_id(ioapic_idx);
++
++	restore_ioapic_entries();
++}
++
++static struct syscore_ops ioapic_syscore_ops = {
++	.suspend = save_ioapic_entries,
++	.resume = ioapic_resume,
++};
++
++static int __init ioapic_init_ops(void)
++{
++	register_syscore_ops(&ioapic_syscore_ops);
++
++	return 0;
++}
++
++device_initcall(ioapic_init_ops);
++
++static int io_apic_get_redir_entries(int ioapic)
++{
++	union IO_APIC_reg_01	reg_01;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_01.raw = io_apic_read(ioapic, 1);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	/* The register returns the maximum index redir index
++	 * supported, which is one less than the total number of redir
++	 * entries.
++	 */
++	return reg_01.bits.entries + 1;
++}
++
++unsigned int arch_dynirq_lower_bound(unsigned int from)
++{
++	/*
++	 * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
++	 * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
++	 */
++	if (!ioapic_initialized)
++		return gsi_top;
++	/*
++	 * For DT enabled machines ioapic_dynirq_base is irrelevant and not
++	 * updated. So simply return @from if ioapic_dynirq_base == 0.
++	 */
++	return ioapic_dynirq_base ? : from;
++}
++
++#ifdef CONFIG_X86_32
++static int io_apic_get_unique_id(int ioapic, int apic_id)
++{
++	union IO_APIC_reg_00 reg_00;
++	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
++	physid_mask_t tmp;
++	unsigned long flags;
++	int i = 0;
++
++	/*
++	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
++	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
++	 * supports up to 16 on one shared APIC bus.
++	 *
++	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
++	 *      advantage of new APIC bus architecture.
++	 */
++
++	if (physids_empty(apic_id_map))
++		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(ioapic, 0);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	if (apic_id >= get_physical_broadcast()) {
++		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
++			"%d\n", ioapic, apic_id, reg_00.bits.ID);
++		apic_id = reg_00.bits.ID;
++	}
++
++	/*
++	 * Every APIC in a system must have a unique ID or we get lots of nice
++	 * 'stuck on smp_invalidate_needed IPI wait' messages.
++	 */
++	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
++
++		for (i = 0; i < get_physical_broadcast(); i++) {
++			if (!apic->check_apicid_used(&apic_id_map, i))
++				break;
++		}
++
++		if (i == get_physical_broadcast())
++			panic("Max apic_id exceeded!\n");
++
++		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
++			"trying %d\n", ioapic, apic_id, i);
++
++		apic_id = i;
++	}
++
++	apic->apicid_to_cpu_present(apic_id, &tmp);
++	physids_or(apic_id_map, apic_id_map, tmp);
++
++	if (reg_00.bits.ID != apic_id) {
++		reg_00.bits.ID = apic_id;
++
++		raw_spin_lock_irqsave(&ioapic_lock, flags);
++		io_apic_write(ioapic, 0, reg_00.raw);
++		reg_00.raw = io_apic_read(ioapic, 0);
++		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		/* Sanity check */
++		if (reg_00.bits.ID != apic_id) {
++			pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
++			       ioapic);
++			return -1;
++		}
++	}
++
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
++
++	return apic_id;
++}
++
++static u8 io_apic_unique_id(int idx, u8 id)
++{
++	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
++	    !APIC_XAPIC(boot_cpu_apic_version))
++		return io_apic_get_unique_id(idx, id);
++	else
++		return id;
++}
++#else
++static u8 io_apic_unique_id(int idx, u8 id)
++{
++	union IO_APIC_reg_00 reg_00;
++	DECLARE_BITMAP(used, 256);
++	unsigned long flags;
++	u8 new_id;
++	int i;
++
++	bitmap_zero(used, 256);
++	for_each_ioapic(i)
++		__set_bit(mpc_ioapic_id(i), used);
++
++	/* Hand out the requested id if available */
++	if (!test_bit(id, used))
++		return id;
++
++	/*
++	 * Read the current id from the ioapic and keep it if
++	 * available.
++	 */
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(idx, 0);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++	new_id = reg_00.bits.ID;
++	if (!test_bit(new_id, used)) {
++		apic_printk(APIC_VERBOSE, KERN_INFO
++			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
++			 idx, new_id, id);
++		return new_id;
++	}
++
++	/*
++	 * Get the next free id and write it to the ioapic.
++	 */
++	new_id = find_first_zero_bit(used, 256);
++	reg_00.bits.ID = new_id;
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	io_apic_write(idx, 0, reg_00.raw);
++	reg_00.raw = io_apic_read(idx, 0);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++	/* Sanity check */
++	BUG_ON(reg_00.bits.ID != new_id);
++
++	return new_id;
++}
++#endif
++
++static int io_apic_get_version(int ioapic)
++{
++	union IO_APIC_reg_01	reg_01;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	reg_01.raw = io_apic_read(ioapic, 1);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return reg_01.bits.version;
++}
++
++int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
++{
++	int ioapic, pin, idx;
++
++	if (skip_ioapic_setup)
++		return -1;
++
++	ioapic = mp_find_ioapic(gsi);
++	if (ioapic < 0)
++		return -1;
++
++	pin = mp_find_ioapic_pin(ioapic, gsi);
++	if (pin < 0)
++		return -1;
++
++	idx = find_irq_entry(ioapic, pin, mp_INT);
++	if (idx < 0)
++		return -1;
++
++	*trigger = irq_trigger(idx);
++	*polarity = irq_polarity(idx);
++	return 0;
++}
++
++/*
++ * This function updates target affinity of IOAPIC interrupts to include
++ * the CPUs which came online during SMP bringup.
++ */
++#define IOAPIC_RESOURCE_NAME_SIZE 11
++
++static struct resource *ioapic_resources;
++
++static struct resource * __init ioapic_setup_resources(void)
++{
++	unsigned long n;
++	struct resource *res;
++	char *mem;
++	int i;
++
++	if (nr_ioapics == 0)
++		return NULL;
++
++	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
++	n *= nr_ioapics;
++
++	mem = alloc_bootmem(n);
++	res = (void *)mem;
++
++	mem += sizeof(struct resource) * nr_ioapics;
++
++	for_each_ioapic(i) {
++		res[i].name = mem;
++		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
++		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
++		mem += IOAPIC_RESOURCE_NAME_SIZE;
++		ioapics[i].iomem_res = &res[i];
++	}
++
++	ioapic_resources = res;
++
++	return res;
++}
++
++void __init io_apic_init_mappings(void)
++{
++	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
++	struct resource *ioapic_res;
++	int i;
++
++	ioapic_res = ioapic_setup_resources();
++	for_each_ioapic(i) {
++		if (smp_found_config) {
++			ioapic_phys = mpc_ioapic_addr(i);
++#ifdef CONFIG_X86_32
++			if (!ioapic_phys) {
++				printk(KERN_ERR
++				       "WARNING: bogus zero IO-APIC "
++				       "address found in MPTABLE, "
++				       "disabling IO/APIC support!\n");
++				smp_found_config = 0;
++				skip_ioapic_setup = 1;
++				goto fake_ioapic_page;
++			}
++#endif
++		} else {
++#ifdef CONFIG_X86_32
++fake_ioapic_page:
++#endif
++			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
++			ioapic_phys = __pa(ioapic_phys);
++		}
++		set_fixmap_nocache(idx, ioapic_phys);
++		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
++			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
++			ioapic_phys);
++		idx++;
++
++		ioapic_res->start = ioapic_phys;
++		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
++		ioapic_res++;
++	}
++}
++
++void __init ioapic_insert_resources(void)
++{
++	int i;
++	struct resource *r = ioapic_resources;
++
++	if (!r) {
++		if (nr_ioapics > 0)
++			printk(KERN_ERR
++				"IO APIC resources couldn't be allocated.\n");
++		return;
++	}
++
++	for_each_ioapic(i) {
++		insert_resource(&iomem_resource, r);
++		r++;
++	}
++}
++
++int mp_find_ioapic(u32 gsi)
++{
++	int i;
++
++	if (nr_ioapics == 0)
++		return -1;
++
++	/* Find the IOAPIC that manages this GSI. */
++	for_each_ioapic(i) {
++		struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
++		if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
++			return i;
++	}
++
++	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
++	return -1;
++}
++
++int mp_find_ioapic_pin(int ioapic, u32 gsi)
++{
++	struct mp_ioapic_gsi *gsi_cfg;
++
++	if (WARN_ON(ioapic < 0))
++		return -1;
++
++	gsi_cfg = mp_ioapic_gsi_routing(ioapic);
++	if (WARN_ON(gsi > gsi_cfg->gsi_end))
++		return -1;
++
++	return gsi - gsi_cfg->gsi_base;
++}
++
++static int bad_ioapic_register(int idx)
++{
++	union IO_APIC_reg_00 reg_00;
++	union IO_APIC_reg_01 reg_01;
++	union IO_APIC_reg_02 reg_02;
++
++	reg_00.raw = io_apic_read(idx, 0);
++	reg_01.raw = io_apic_read(idx, 1);
++	reg_02.raw = io_apic_read(idx, 2);
++
++	if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
++		pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
++			mpc_ioapic_addr(idx));
++		return 1;
++	}
++
++	return 0;
++}
++
++static int find_free_ioapic_entry(void)
++{
++	int idx;
++
++	for (idx = 0; idx < MAX_IO_APICS; idx++)
++		if (ioapics[idx].nr_registers == 0)
++			return idx;
++
++	return MAX_IO_APICS;
++}
++
++/**
++ * mp_register_ioapic - Register an IOAPIC device
++ * @id:		hardware IOAPIC ID
++ * @address:	physical address of IOAPIC register area
++ * @gsi_base:	base of GSI associated with the IOAPIC
++ * @cfg:	configuration information for the IOAPIC
++ */
++int mp_register_ioapic(int id, u32 address, u32 gsi_base,
++		       struct ioapic_domain_cfg *cfg)
++{
++	bool hotplug = !!ioapic_initialized;
++	struct mp_ioapic_gsi *gsi_cfg;
++	int idx, ioapic, entries;
++	u32 gsi_end;
++
++	if (!address) {
++		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
++		return -EINVAL;
++	}
++	for_each_ioapic(ioapic)
++		if (ioapics[ioapic].mp_config.apicaddr == address) {
++			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
++				address, ioapic);
++			return -EEXIST;
++		}
++
++	idx = find_free_ioapic_entry();
++	if (idx >= MAX_IO_APICS) {
++		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
++			MAX_IO_APICS, idx);
++		return -ENOSPC;
++	}
++
++	ioapics[idx].mp_config.type = MP_IOAPIC;
++	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
++	ioapics[idx].mp_config.apicaddr = address;
++
++	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
++	if (bad_ioapic_register(idx)) {
++		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
++		return -ENODEV;
++	}
++
++	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
++	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
++
++	/*
++	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
++	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
++	 */
++	entries = io_apic_get_redir_entries(idx);
++	gsi_end = gsi_base + entries - 1;
++	for_each_ioapic(ioapic) {
++		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
++		if ((gsi_base >= gsi_cfg->gsi_base &&
++		     gsi_base <= gsi_cfg->gsi_end) ||
++		    (gsi_end >= gsi_cfg->gsi_base &&
++		     gsi_end <= gsi_cfg->gsi_end)) {
++			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
++				gsi_base, gsi_end,
++				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
++			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
++			return -ENOSPC;
++		}
++	}
++	gsi_cfg = mp_ioapic_gsi_routing(idx);
++	gsi_cfg->gsi_base = gsi_base;
++	gsi_cfg->gsi_end = gsi_end;
++
++	ioapics[idx].irqdomain = NULL;
++	ioapics[idx].irqdomain_cfg = *cfg;
++
++	/*
++	 * If mp_register_ioapic() is called during early boot stage when
++	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
++	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
++	 */
++	if (hotplug) {
++		if (mp_irqdomain_create(idx)) {
++			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
++			return -ENOMEM;
++		}
++		alloc_ioapic_saved_registers(idx);
++	}
++
++	if (gsi_cfg->gsi_end >= gsi_top)
++		gsi_top = gsi_cfg->gsi_end + 1;
++	if (nr_ioapics <= idx)
++		nr_ioapics = idx + 1;
++
++	/* Set nr_registers to mark entry present */
++	ioapics[idx].nr_registers = entries;
++
++	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
++		idx, mpc_ioapic_id(idx),
++		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
++		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
++
++	return 0;
++}
++
++int mp_unregister_ioapic(u32 gsi_base)
++{
++	int ioapic, pin;
++	int found = 0;
++
++	for_each_ioapic(ioapic)
++		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
++			found = 1;
++			break;
++		}
++	if (!found) {
++		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
++		return -ENODEV;
++	}
++
++	for_each_pin(ioapic, pin) {
++		u32 gsi = mp_pin_to_gsi(ioapic, pin);
++		int irq = mp_map_gsi_to_irq(gsi, 0, NULL);
++		struct mp_chip_data *data;
++
++		if (irq >= 0) {
++			data = irq_get_chip_data(irq);
++			if (data && data->count) {
++				pr_warn("pin%d on IOAPIC%d is still in use.\n",
++					pin, ioapic);
++				return -EBUSY;
++			}
++		}
++	}
++
++	/* Mark entry not present */
++	ioapics[ioapic].nr_registers  = 0;
++	ioapic_destroy_irqdomain(ioapic);
++	free_ioapic_saved_registers(ioapic);
++	if (ioapics[ioapic].iomem_res)
++		release_resource(ioapics[ioapic].iomem_res);
++	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
++	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
++
++	return 0;
++}
++
++int mp_ioapic_registered(u32 gsi_base)
++{
++	int ioapic;
++
++	for_each_ioapic(ioapic)
++		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
++			return 1;
++
++	return 0;
++}
++
++static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
++				  struct irq_alloc_info *info)
++{
++	if (info && info->ioapic_valid) {
++		data->trigger = info->ioapic_trigger;
++		data->polarity = info->ioapic_polarity;
++	} else if (acpi_get_override_irq(gsi, &data->trigger,
++					 &data->polarity) < 0) {
++		/* PCI interrupts are always active low level triggered. */
++		data->trigger = IOAPIC_LEVEL;
++		data->polarity = IOAPIC_POL_LOW;
++	}
++}
++
++static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data,
++			   struct IO_APIC_route_entry *entry)
++{
++	memset(entry, 0, sizeof(*entry));
++	entry->delivery_mode = apic->irq_delivery_mode;
++	entry->dest_mode     = apic->irq_dest_mode;
++	entry->dest	     = cfg->dest_apicid;
++	entry->vector	     = cfg->vector;
++	entry->trigger	     = data->trigger;
++	entry->polarity	     = data->polarity;
++	/*
++	 * Mask level triggered irqs. Edge triggered irqs are masked
++	 * by the irq core code in case they fire.
++	 */
++	if (data->trigger == IOAPIC_LEVEL)
++		entry->mask = IOAPIC_MASKED;
++	else
++		entry->mask = IOAPIC_UNMASKED;
++}
++
++int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
++		       unsigned int nr_irqs, void *arg)
++{
++	int ret, ioapic, pin;
++	struct irq_cfg *cfg;
++	struct irq_data *irq_data;
++	struct mp_chip_data *data;
++	struct irq_alloc_info *info = arg;
++	unsigned long flags;
++
++	if (!info || nr_irqs > 1)
++		return -EINVAL;
++	irq_data = irq_domain_get_irq_data(domain, virq);
++	if (!irq_data)
++		return -EINVAL;
++
++	ioapic = mp_irqdomain_ioapic_idx(domain);
++	pin = info->ioapic_pin;
++	if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0)
++		return -EEXIST;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	info->ioapic_entry = &data->entry;
++	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
++	if (ret < 0) {
++		kfree(data);
++		return ret;
++	}
++
++	INIT_LIST_HEAD(&data->irq_2_pin);
++	irq_data->hwirq = info->ioapic_pin;
++	irq_data->chip = (domain->parent == x86_vector_domain) ?
++			  &ioapic_chip : &ioapic_ir_chip;
++	irq_data->chip_data = data;
++	mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
++
++	cfg = irqd_cfg(irq_data);
++	add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
++
++	local_irq_save(flags);
++	if (info->ioapic_entry)
++		mp_setup_entry(cfg, data, info->ioapic_entry);
++	mp_register_handler(virq, data->trigger);
++	if (virq < nr_legacy_irqs())
++		legacy_pic->mask(virq);
++	local_irq_restore(flags);
++
++	apic_printk(APIC_VERBOSE, KERN_DEBUG
++		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
++		    ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector,
++		    virq, data->trigger, data->polarity, cfg->dest_apicid);
++
++	return 0;
++}
++
++void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
++		       unsigned int nr_irqs)
++{
++	struct irq_data *irq_data;
++	struct mp_chip_data *data;
++
++	BUG_ON(nr_irqs != 1);
++	irq_data = irq_domain_get_irq_data(domain, virq);
++	if (irq_data && irq_data->chip_data) {
++		data = irq_data->chip_data;
++		__remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain),
++				      (int)irq_data->hwirq);
++		WARN_ON(!list_empty(&data->irq_2_pin));
++		kfree(irq_data->chip_data);
++	}
++	irq_domain_free_irqs_top(domain, virq, nr_irqs);
++}
++
++int mp_irqdomain_activate(struct irq_domain *domain,
++			  struct irq_data *irq_data, bool reserve)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&ioapic_lock, flags);
++	ioapic_configure_entry(irq_data);
++	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
++	return 0;
++}
++
++void mp_irqdomain_deactivate(struct irq_domain *domain,
++			     struct irq_data *irq_data)
++{
++	/* It won't be called for IRQ with multiple IOAPIC pins associated */
++	ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain),
++			  (int)irq_data->hwirq);
++}
++
++int mp_irqdomain_ioapic_idx(struct irq_domain *domain)
++{
++	return (int)(long)domain->host_data;
++}
++
++const struct irq_domain_ops mp_ioapic_irqdomain_ops = {
++	.alloc		= mp_irqdomain_alloc,
++	.free		= mp_irqdomain_free,
++	.activate	= mp_irqdomain_activate,
++	.deactivate	= mp_irqdomain_deactivate,
++};
+diff -uprN kernel/arch/x86/kernel/apic/ipi.c kernel_new/arch/x86/kernel/apic/ipi.c
+--- kernel/arch/x86/kernel/apic/ipi.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/ipi.c	2021-04-01 18:28:07.654863288 +0800
+@@ -28,7 +28,9 @@ void __default_send_IPI_shortcut(unsigne
+ 	 * to the APIC.
+ 	 */
+ 	unsigned int cfg;
++	unsigned long flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	/*
+ 	 * Wait for idle.
+ 	 */
+@@ -43,6 +45,8 @@ void __default_send_IPI_shortcut(unsigne
+ 	 * Send the IPI. The write to APIC_ICR fires this off.
+ 	 */
+ 	native_apic_mem_write(APIC_ICR, cfg);
++
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ /*
+@@ -51,8 +55,9 @@ void __default_send_IPI_shortcut(unsigne
+  */
+ void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+ {
+-	unsigned long cfg;
++	unsigned long cfg, flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	/*
+ 	 * Wait for idle.
+ 	 */
+@@ -76,6 +81,8 @@ void __default_send_IPI_dest_field(unsig
+ 	 * Send the IPI. The write to APIC_ICR fires this off.
+ 	 */
+ 	native_apic_mem_write(APIC_ICR, cfg);
++
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ void default_send_IPI_single_phys(int cpu, int vector)
+@@ -98,12 +105,12 @@ void default_send_IPI_mask_sequence_phys
+ 	 * to an arbitrary mask, so I do a unicast to each CPU instead.
+ 	 * - mbligh
+ 	 */
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	for_each_cpu(query_cpu, mask) {
+ 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ 				query_cpu), vector, APIC_DEST_PHYSICAL);
+ 	}
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+@@ -115,14 +122,14 @@ void default_send_IPI_mask_allbutself_ph
+ 
+ 	/* See Hack comment above */
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	for_each_cpu(query_cpu, mask) {
+ 		if (query_cpu == this_cpu)
+ 			continue;
+ 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+ 				 query_cpu), vector, APIC_DEST_PHYSICAL);
+ 	}
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /*
+@@ -147,12 +154,12 @@ void default_send_IPI_mask_sequence_logi
+ 	 * should be modified to do 1 message per cluster ID - mbligh
+ 	 */
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	for_each_cpu(query_cpu, mask)
+ 		__default_send_IPI_dest_field(
+ 			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ 			vector, apic->dest_logical);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+@@ -164,7 +171,7 @@ void default_send_IPI_mask_allbutself_lo
+ 
+ 	/* See Hack comment above */
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	for_each_cpu(query_cpu, mask) {
+ 		if (query_cpu == this_cpu)
+ 			continue;
+@@ -172,7 +179,7 @@ void default_send_IPI_mask_allbutself_lo
+ 			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ 			vector, apic->dest_logical);
+ 		}
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /*
+@@ -186,10 +193,10 @@ void default_send_IPI_mask_logical(const
+ 	if (!mask)
+ 		return;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+ 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ void default_send_IPI_allbutself(int vector)
+diff -uprN kernel/arch/x86/kernel/apic/msi.c kernel_new/arch/x86/kernel/apic/msi.c
+--- kernel/arch/x86/kernel/apic/msi.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/msi.c	2021-04-02 09:05:45.996880795 +0800
+@@ -181,7 +181,10 @@ static struct irq_chip pci_msi_controlle
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_compose_msi_msg	= irq_msi_compose_msg,
+ 	.irq_set_affinity	= msi_set_affinity,
+-	.flags			= IRQCHIP_SKIP_SET_WAKE,
++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
++	.irq_move		= move_xxapic_irq,
++#endif
++	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+@@ -281,7 +284,10 @@ static struct irq_chip pci_msi_ir_contro
+ 	.irq_ack		= irq_chip_ack_parent,
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
+-	.flags			= IRQCHIP_SKIP_SET_WAKE,
++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
++	.irq_move		= move_xxapic_irq,
++#endif
++	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static struct msi_domain_info pci_msi_ir_domain_info = {
+@@ -323,7 +329,10 @@ static struct irq_chip dmar_msi_controll
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_compose_msi_msg	= irq_msi_compose_msg,
+ 	.irq_write_msi_msg	= dmar_msi_write_msg,
+-	.flags			= IRQCHIP_SKIP_SET_WAKE,
++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
++	.irq_move		= move_xxapic_irq,
++#endif
++	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info,
+@@ -420,7 +429,10 @@ static struct irq_chip hpet_msi_controll
+ 	.irq_retrigger = irq_chip_retrigger_hierarchy,
+ 	.irq_compose_msi_msg = irq_msi_compose_msg,
+ 	.irq_write_msi_msg = hpet_msi_write_msg,
+-	.flags = IRQCHIP_SKIP_SET_WAKE,
++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
++	.irq_move = move_xxapic_irq,
++#endif
++	.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info,
+diff -uprN kernel/arch/x86/kernel/apic/msi.c.orig kernel_new/arch/x86/kernel/apic/msi.c.orig
+--- kernel/arch/x86/kernel/apic/msi.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/msi.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,511 @@
++/*
++ * Support of MSI, HPET and DMAR interrupts.
++ *
++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
++ *	Moved from arch/x86/kernel/apic/io_apic.c.
++ * Jiang Liu <jiang.liu@linux.intel.com>
++ *	Convert to hierarchical irqdomain
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/mm.h>
++#include <linux/interrupt.h>
++#include <linux/irq.h>
++#include <linux/pci.h>
++#include <linux/dmar.h>
++#include <linux/hpet.h>
++#include <linux/msi.h>
++#include <asm/irqdomain.h>
++#include <asm/msidef.h>
++#include <asm/hpet.h>
++#include <asm/hw_irq.h>
++#include <asm/apic.h>
++#include <asm/irq_remapping.h>
++
++static struct irq_domain *msi_default_domain;
++
++static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
++{
++	msg->address_hi = MSI_ADDR_BASE_HI;
++
++	if (x2apic_enabled())
++		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
++
++	msg->address_lo =
++		MSI_ADDR_BASE_LO |
++		((apic->irq_dest_mode == 0) ?
++			MSI_ADDR_DEST_MODE_PHYSICAL :
++			MSI_ADDR_DEST_MODE_LOGICAL) |
++		MSI_ADDR_REDIRECTION_CPU |
++		MSI_ADDR_DEST_ID(cfg->dest_apicid);
++
++	msg->data =
++		MSI_DATA_TRIGGER_EDGE |
++		MSI_DATA_LEVEL_ASSERT |
++		MSI_DATA_DELIVERY_FIXED |
++		MSI_DATA_VECTOR(cfg->vector);
++}
++
++static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
++{
++	__irq_msi_compose_msg(irqd_cfg(data), msg);
++}
++
++static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
++{
++	struct msi_msg msg[2] = { [1] = { }, };
++
++	__irq_msi_compose_msg(cfg, msg);
++	irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
++}
++
++static int
++msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
++{
++	struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd);
++	struct irq_data *parent = irqd->parent_data;
++	unsigned int cpu;
++	int ret;
++
++	/* Save the current configuration */
++	cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
++	old_cfg = *cfg;
++
++	/* Allocate a new target vector */
++	ret = parent->chip->irq_set_affinity(parent, mask, force);
++	if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
++		return ret;
++
++	/*
++	 * For non-maskable and non-remapped MSI interrupts the migration
++	 * to a different destination CPU and a different vector has to be
++	 * done careful to handle the possible stray interrupt which can be
++	 * caused by the non-atomic update of the address/data pair.
++	 *
++	 * Direct update is possible when:
++	 * - The MSI is maskable (remapped MSI does not use this code path)).
++	 *   The quirk bit is not set in this case.
++	 * - The new vector is the same as the old vector
++	 * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
++	 * - The new destination CPU is the same as the old destination CPU
++	 */
++	if (!irqd_msi_nomask_quirk(irqd) ||
++	    cfg->vector == old_cfg.vector ||
++	    old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
++	    cfg->dest_apicid == old_cfg.dest_apicid) {
++		irq_msi_update_msg(irqd, cfg);
++		return ret;
++	}
++
++	/*
++	 * Paranoia: Validate that the interrupt target is the local
++	 * CPU.
++	 */
++	if (WARN_ON_ONCE(cpu != smp_processor_id())) {
++		irq_msi_update_msg(irqd, cfg);
++		return ret;
++	}
++
++	/*
++	 * Redirect the interrupt to the new vector on the current CPU
++	 * first. This might cause a spurious interrupt on this vector if
++	 * the device raises an interrupt right between this update and the
++	 * update to the final destination CPU.
++	 *
++	 * If the vector is in use then the installed device handler will
++	 * denote it as spurious which is no harm as this is a rare event
++	 * and interrupt handlers have to cope with spurious interrupts
++	 * anyway. If the vector is unused, then it is marked so it won't
++	 * trigger the 'No irq handler for vector' warning in do_IRQ().
++	 *
++	 * This requires to hold vector lock to prevent concurrent updates to
++	 * the affected vector.
++	 */
++	lock_vector_lock();
++
++	/*
++	 * Mark the new target vector on the local CPU if it is currently
++	 * unused. Reuse the VECTOR_RETRIGGERED state which is also used in
++	 * the CPU hotplug path for a similar purpose. This cannot be
++	 * undone here as the current CPU has interrupts disabled and
++	 * cannot handle the interrupt before the whole set_affinity()
++	 * section is done. In the CPU unplug case, the current CPU is
++	 * about to vanish and will not handle any interrupts anymore. The
++	 * vector is cleaned up when the CPU comes online again.
++	 */
++	if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector])))
++		this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED);
++
++	/* Redirect it to the new vector on the local CPU temporarily */
++	old_cfg.vector = cfg->vector;
++	irq_msi_update_msg(irqd, &old_cfg);
++
++	/* Now transition it to the target CPU */
++	irq_msi_update_msg(irqd, cfg);
++
++	/*
++	 * All interrupts after this point are now targeted at the new
++	 * vector/CPU.
++	 *
++	 * Drop vector lock before testing whether the temporary assignment
++	 * to the local CPU was hit by an interrupt raised in the device,
++	 * because the retrigger function acquires vector lock again.
++	 */
++	unlock_vector_lock();
++
++	/*
++	 * Check whether the transition raced with a device interrupt and
++	 * is pending in the local APICs IRR. It is safe to do this outside
++	 * of vector lock as the irq_desc::lock of this interrupt is still
++	 * held and interrupts are disabled: The check is not accessing the
++	 * underlying vector store. It's just checking the local APIC's
++	 * IRR.
++	 */
++	if (lapic_vector_set_in_irr(cfg->vector))
++		irq_data_get_irq_chip(irqd)->irq_retrigger(irqd);
++
++	return ret;
++}
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip pci_msi_controller = {
++	.name			= "PCI-MSI",
++	.irq_unmask		= pci_msi_unmask_irq,
++	.irq_mask		= pci_msi_mask_irq,
++	.irq_ack		= irq_chip_ack_parent,
++	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++	.irq_compose_msi_msg	= irq_msi_compose_msg,
++	.irq_set_affinity	= msi_set_affinity,
++	.flags			= IRQCHIP_SKIP_SET_WAKE,
++};
++
++int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
++{
++	struct irq_domain *domain;
++	struct irq_alloc_info info;
++
++	init_irq_alloc_info(&info, NULL);
++	info.type = X86_IRQ_ALLOC_TYPE_MSI;
++	info.msi_dev = dev;
++
++	domain = irq_remapping_get_irq_domain(&info);
++	if (domain == NULL)
++		domain = msi_default_domain;
++	if (domain == NULL)
++		return -ENOSYS;
++
++	return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
++}
++
++void native_teardown_msi_irq(unsigned int irq)
++{
++	irq_domain_free_irqs(irq, 1);
++}
++
++static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
++					 msi_alloc_info_t *arg)
++{
++	return arg->msi_hwirq;
++}
++
++int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
++		    msi_alloc_info_t *arg)
++{
++	struct pci_dev *pdev = to_pci_dev(dev);
++	struct msi_desc *desc = first_pci_msi_entry(pdev);
++
++	init_irq_alloc_info(arg, NULL);
++	arg->msi_dev = pdev;
++	if (desc->msi_attrib.is_msix) {
++		arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
++	} else {
++		arg->type = X86_IRQ_ALLOC_TYPE_MSI;
++		arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(pci_msi_prepare);
++
++void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
++{
++	arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
++}
++EXPORT_SYMBOL_GPL(pci_msi_set_desc);
++
++static struct msi_domain_ops pci_msi_domain_ops = {
++	.get_hwirq	= pci_msi_get_hwirq,
++	.msi_prepare	= pci_msi_prepare,
++	.set_desc	= pci_msi_set_desc,
++};
++
++static struct msi_domain_info pci_msi_domain_info = {
++	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
++			  MSI_FLAG_PCI_MSIX,
++	.ops		= &pci_msi_domain_ops,
++	.chip		= &pci_msi_controller,
++	.handler	= handle_edge_irq,
++	.handler_name	= "edge",
++};
++
++void __init arch_init_msi_domain(struct irq_domain *parent)
++{
++	struct fwnode_handle *fn;
++
++	if (disable_apic)
++		return;
++
++	fn = irq_domain_alloc_named_fwnode("PCI-MSI");
++	if (fn) {
++		msi_default_domain =
++			pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
++						  parent);
++		irq_domain_free_fwnode(fn);
++	}
++	if (!msi_default_domain)
++		pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
++	else
++		msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
++}
++
++#ifdef CONFIG_IRQ_REMAP
++static struct irq_chip pci_msi_ir_controller = {
++	.name			= "IR-PCI-MSI",
++	.irq_unmask		= pci_msi_unmask_irq,
++	.irq_mask		= pci_msi_mask_irq,
++	.irq_ack		= irq_chip_ack_parent,
++	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
++	.flags			= IRQCHIP_SKIP_SET_WAKE,
++};
++
++static struct msi_domain_info pci_msi_ir_domain_info = {
++	.flags		= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
++			  MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
++	.ops		= &pci_msi_domain_ops,
++	.chip		= &pci_msi_ir_controller,
++	.handler	= handle_edge_irq,
++	.handler_name	= "edge",
++};
++
++struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
++						    const char *name, int id)
++{
++	struct fwnode_handle *fn;
++	struct irq_domain *d;
++
++	fn = irq_domain_alloc_named_id_fwnode(name, id);
++	if (!fn)
++		return NULL;
++	d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
++	irq_domain_free_fwnode(fn);
++	return d;
++}
++#endif
++
++#ifdef CONFIG_DMAR_TABLE
++static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
++{
++	dmar_msi_write(data->irq, msg);
++}
++
++static struct irq_chip dmar_msi_controller = {
++	.name			= "DMAR-MSI",
++	.irq_unmask		= dmar_msi_unmask,
++	.irq_mask		= dmar_msi_mask,
++	.irq_ack		= irq_chip_ack_parent,
++	.irq_set_affinity	= msi_domain_set_affinity,
++	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++	.irq_compose_msi_msg	= irq_msi_compose_msg,
++	.irq_write_msi_msg	= dmar_msi_write_msg,
++	.flags			= IRQCHIP_SKIP_SET_WAKE,
++};
++
++static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info,
++					  msi_alloc_info_t *arg)
++{
++	return arg->dmar_id;
++}
++
++static int dmar_msi_init(struct irq_domain *domain,
++			 struct msi_domain_info *info, unsigned int virq,
++			 irq_hw_number_t hwirq, msi_alloc_info_t *arg)
++{
++	irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL,
++			    handle_edge_irq, arg->dmar_data, "edge");
++
++	return 0;
++}
++
++static struct msi_domain_ops dmar_msi_domain_ops = {
++	.get_hwirq	= dmar_msi_get_hwirq,
++	.msi_init	= dmar_msi_init,
++};
++
++static struct msi_domain_info dmar_msi_domain_info = {
++	.ops		= &dmar_msi_domain_ops,
++	.chip		= &dmar_msi_controller,
++};
++
++static struct irq_domain *dmar_get_irq_domain(void)
++{
++	static struct irq_domain *dmar_domain;
++	static DEFINE_MUTEX(dmar_lock);
++	struct fwnode_handle *fn;
++
++	mutex_lock(&dmar_lock);
++	if (dmar_domain)
++		goto out;
++
++	fn = irq_domain_alloc_named_fwnode("DMAR-MSI");
++	if (fn) {
++		dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
++						    x86_vector_domain);
++		irq_domain_free_fwnode(fn);
++	}
++out:
++	mutex_unlock(&dmar_lock);
++	return dmar_domain;
++}
++
++int dmar_alloc_hwirq(int id, int node, void *arg)
++{
++	struct irq_domain *domain = dmar_get_irq_domain();
++	struct irq_alloc_info info;
++
++	if (!domain)
++		return -1;
++
++	init_irq_alloc_info(&info, NULL);
++	info.type = X86_IRQ_ALLOC_TYPE_DMAR;
++	info.dmar_id = id;
++	info.dmar_data = arg;
++
++	return irq_domain_alloc_irqs(domain, 1, node, &info);
++}
++
++void dmar_free_hwirq(int irq)
++{
++	irq_domain_free_irqs(irq, 1);
++}
++#endif
++
++/*
++ * MSI message composition
++ */
++#ifdef CONFIG_HPET_TIMER
++static inline int hpet_dev_id(struct irq_domain *domain)
++{
++	struct msi_domain_info *info = msi_get_domain_info(domain);
++
++	return (int)(long)info->data;
++}
++
++static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
++{
++	hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
++}
++
++static struct irq_chip hpet_msi_controller __ro_after_init = {
++	.name = "HPET-MSI",
++	.irq_unmask = hpet_msi_unmask,
++	.irq_mask = hpet_msi_mask,
++	.irq_ack = irq_chip_ack_parent,
++	.irq_set_affinity = msi_domain_set_affinity,
++	.irq_retrigger = irq_chip_retrigger_hierarchy,
++	.irq_compose_msi_msg = irq_msi_compose_msg,
++	.irq_write_msi_msg = hpet_msi_write_msg,
++	.flags = IRQCHIP_SKIP_SET_WAKE,
++};
++
++static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info,
++					  msi_alloc_info_t *arg)
++{
++	return arg->hpet_index;
++}
++
++static int hpet_msi_init(struct irq_domain *domain,
++			 struct msi_domain_info *info, unsigned int virq,
++			 irq_hw_number_t hwirq, msi_alloc_info_t *arg)
++{
++	irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
++	irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL,
++			    handle_edge_irq, arg->hpet_data, "edge");
++
++	return 0;
++}
++
++static void hpet_msi_free(struct irq_domain *domain,
++			  struct msi_domain_info *info, unsigned int virq)
++{
++	irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
++}
++
++static struct msi_domain_ops hpet_msi_domain_ops = {
++	.get_hwirq	= hpet_msi_get_hwirq,
++	.msi_init	= hpet_msi_init,
++	.msi_free	= hpet_msi_free,
++};
++
++static struct msi_domain_info hpet_msi_domain_info = {
++	.ops		= &hpet_msi_domain_ops,
++	.chip		= &hpet_msi_controller,
++};
++
++struct irq_domain *hpet_create_irq_domain(int hpet_id)
++{
++	struct msi_domain_info *domain_info;
++	struct irq_domain *parent, *d;
++	struct irq_alloc_info info;
++	struct fwnode_handle *fn;
++
++	if (x86_vector_domain == NULL)
++		return NULL;
++
++	domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL);
++	if (!domain_info)
++		return NULL;
++
++	*domain_info = hpet_msi_domain_info;
++	domain_info->data = (void *)(long)hpet_id;
++
++	init_irq_alloc_info(&info, NULL);
++	info.type = X86_IRQ_ALLOC_TYPE_HPET;
++	info.hpet_id = hpet_id;
++	parent = irq_remapping_get_ir_irq_domain(&info);
++	if (parent == NULL)
++		parent = x86_vector_domain;
++	else
++		hpet_msi_controller.name = "IR-HPET-MSI";
++
++	fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
++					      hpet_id);
++	if (!fn) {
++		kfree(domain_info);
++		return NULL;
++	}
++
++	d = msi_create_irq_domain(fn, domain_info, parent);
++	irq_domain_free_fwnode(fn);
++	return d;
++}
++
++int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
++		    int dev_num)
++{
++	struct irq_alloc_info info;
++
++	init_irq_alloc_info(&info, NULL);
++	info.type = X86_IRQ_ALLOC_TYPE_HPET;
++	info.hpet_data = dev;
++	info.hpet_id = hpet_dev_id(domain);
++	info.hpet_index = dev_num;
++
++	return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
++}
++#endif
+diff -uprN kernel/arch/x86/kernel/apic/msi.c.rej kernel_new/arch/x86/kernel/apic/msi.c.rej
+--- kernel/arch/x86/kernel/apic/msi.c.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/msi.c.rej	2021-04-01 18:28:07.654863288 +0800
+@@ -0,0 +1,14 @@
++--- arch/x86/kernel/apic/msi.c	2019-12-18 03:36:04.000000000 +0800
+++++ arch/x86/kernel/apic/msi.c	2021-03-22 09:21:43.194415288 +0800
++@@ -61,7 +61,10 @@ static struct irq_chip pci_msi_controlle
++ 	.irq_ack		= irq_chip_ack_parent,
++ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
++ 	.irq_compose_msi_msg	= irq_msi_compose_msg,
++-	.flags			= IRQCHIP_SKIP_SET_WAKE,
+++#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
+++	.irq_move		= move_xxapic_irq,
+++#endif
+++	.flags			= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
++ };
++ 
++ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+diff -uprN kernel/arch/x86/kernel/apic/vector.c kernel_new/arch/x86/kernel/apic/vector.c
+--- kernel/arch/x86/kernel/apic/vector.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/vector.c	2021-04-01 18:28:07.654863288 +0800
+@@ -41,7 +41,7 @@ struct apic_chip_data {
+ 
+ struct irq_domain *x86_vector_domain;
+ EXPORT_SYMBOL_GPL(x86_vector_domain);
+-static DEFINE_RAW_SPINLOCK(vector_lock);
++static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock);
+ static cpumask_var_t vector_searchmask;
+ static struct irq_chip lapic_controller;
+ static struct irq_matrix *vector_matrix;
+@@ -121,7 +121,9 @@ static void apic_update_irq_cfg(struct i
+ {
+ 	struct apic_chip_data *apicd = apic_chip_data(irqd);
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_held(&vector_lock);
++#endif
+ 
+ 	apicd->hw_irq_cfg.vector = vector;
+ 	apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+@@ -137,7 +139,9 @@ static void apic_update_vector(struct ir
+ 	struct irq_desc *desc = irq_data_to_desc(irqd);
+ 	bool managed = irqd_affinity_is_managed(irqd);
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_held(&vector_lock);
++#endif
+ 
+ 	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
+ 			    apicd->cpu);
+@@ -227,7 +231,9 @@ assign_vector_locked(struct irq_data *ir
+ 	unsigned int cpu = apicd->cpu;
+ 	int vector = apicd->vector;
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_held(&vector_lock);
++#endif
+ 
+ 	/*
+ 	 * If the current target CPU is online and in the new requested
+@@ -334,7 +340,9 @@ static void clear_irq_vector(struct irq_
+ 	bool managed = irqd_affinity_is_managed(irqd);
+ 	unsigned int vector = apicd->vector;
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_held(&vector_lock);
++#endif
+ 
+ 	if (!vector)
+ 		return;
+@@ -745,7 +753,9 @@ void lapic_online(void)
+ {
+ 	unsigned int vector;
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_held(&vector_lock);
++#endif
+ 
+ 	/* Online the vector matrix array for this CPU */
+ 	irq_matrix_online(vector_matrix);
+@@ -806,13 +816,17 @@ static int apic_retrigger_irq(struct irq
+ 
+ void apic_ack_irq(struct irq_data *irqd)
+ {
++#ifndef CONFIG_IPIPE
+ 	irq_move_irq(irqd);
+-	ack_APIC_irq();
++#endif /* !CONFIG_IPIPE */
++	__ack_APIC_irq();
+ }
+ 
+ void apic_ack_edge(struct irq_data *irqd)
+ {
++#ifndef CONFIG_IPIPE
+ 	irq_complete_move(irqd_cfg(irqd));
++#endif /* !CONFIG_IPIPE */
+ 	apic_ack_irq(irqd);
+ }
+ 
+diff -uprN kernel/arch/x86/kernel/apic/vector.c.orig kernel_new/arch/x86/kernel/apic/vector.c.orig
+--- kernel/arch/x86/kernel/apic/vector.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/vector.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,1249 @@
++/*
++ * Local APIC related interfaces to support IOAPIC, MSI, etc.
++ *
++ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
++ *	Moved from arch/x86/kernel/apic/io_apic.c.
++ * Jiang Liu <jiang.liu@linux.intel.com>
++ *	Enable support of hierarchical irqdomains
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/interrupt.h>
++#include <linux/irq.h>
++#include <linux/seq_file.h>
++#include <linux/init.h>
++#include <linux/compiler.h>
++#include <linux/slab.h>
++#include <asm/irqdomain.h>
++#include <asm/hw_irq.h>
++#include <asm/apic.h>
++#include <asm/i8259.h>
++#include <asm/desc.h>
++#include <asm/irq_remapping.h>
++
++#include <asm/trace/irq_vectors.h>
++
++struct apic_chip_data {
++	struct irq_cfg		hw_irq_cfg;
++	unsigned int		vector;
++	unsigned int		prev_vector;
++	unsigned int		cpu;
++	unsigned int		prev_cpu;
++	unsigned int		irq;
++	struct hlist_node	clist;
++	unsigned int		move_in_progress	: 1,
++				is_managed		: 1,
++				can_reserve		: 1,
++				has_reserved		: 1;
++};
++
++struct irq_domain *x86_vector_domain;
++EXPORT_SYMBOL_GPL(x86_vector_domain);
++static DEFINE_RAW_SPINLOCK(vector_lock);
++static cpumask_var_t vector_searchmask;
++static struct irq_chip lapic_controller;
++static struct irq_matrix *vector_matrix;
++#ifdef CONFIG_SMP
++static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
++#endif
++
++void lock_vector_lock(void)
++{
++	/* Used to the online set of cpus does not change
++	 * during assign_irq_vector.
++	 */
++	raw_spin_lock(&vector_lock);
++}
++
++void unlock_vector_lock(void)
++{
++	raw_spin_unlock(&vector_lock);
++}
++
++void init_irq_alloc_info(struct irq_alloc_info *info,
++			 const struct cpumask *mask)
++{
++	memset(info, 0, sizeof(*info));
++	info->mask = mask;
++}
++
++void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
++{
++	if (src)
++		*dst = *src;
++	else
++		memset(dst, 0, sizeof(*dst));
++}
++
++static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
++{
++	if (!irqd)
++		return NULL;
++
++	while (irqd->parent_data)
++		irqd = irqd->parent_data;
++
++	return irqd->chip_data;
++}
++
++struct irq_cfg *irqd_cfg(struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++
++	return apicd ? &apicd->hw_irq_cfg : NULL;
++}
++EXPORT_SYMBOL_GPL(irqd_cfg);
++
++struct irq_cfg *irq_cfg(unsigned int irq)
++{
++	return irqd_cfg(irq_get_irq_data(irq));
++}
++
++static struct apic_chip_data *alloc_apic_chip_data(int node)
++{
++	struct apic_chip_data *apicd;
++
++	apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
++	if (apicd)
++		INIT_HLIST_NODE(&apicd->clist);
++	return apicd;
++}
++
++static void free_apic_chip_data(struct apic_chip_data *apicd)
++{
++	kfree(apicd);
++}
++
++static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
++				unsigned int cpu)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++
++	lockdep_assert_held(&vector_lock);
++
++	apicd->hw_irq_cfg.vector = vector;
++	apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
++	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
++	trace_vector_config(irqd->irq, vector, cpu,
++			    apicd->hw_irq_cfg.dest_apicid);
++}
++
++static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
++			       unsigned int newcpu)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	struct irq_desc *desc = irq_data_to_desc(irqd);
++	bool managed = irqd_affinity_is_managed(irqd);
++
++	lockdep_assert_held(&vector_lock);
++
++	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
++			    apicd->cpu);
++
++	/*
++	 * If there is no vector associated or if the associated vector is
++	 * the shutdown vector, which is associated to make PCI/MSI
++	 * shutdown mode work, then there is nothing to release. Clear out
++	 * prev_vector for this and the offlined target case.
++	 */
++	apicd->prev_vector = 0;
++	if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
++		goto setnew;
++	/*
++	 * If the target CPU of the previous vector is online, then mark
++	 * the vector as move in progress and store it for cleanup when the
++	 * first interrupt on the new vector arrives. If the target CPU is
++	 * offline then the regular release mechanism via the cleanup
++	 * vector is not possible and the vector can be immediately freed
++	 * in the underlying matrix allocator.
++	 */
++	if (cpu_online(apicd->cpu)) {
++		apicd->move_in_progress = true;
++		apicd->prev_vector = apicd->vector;
++		apicd->prev_cpu = apicd->cpu;
++	} else {
++		irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
++				managed);
++	}
++
++setnew:
++	apicd->vector = newvec;
++	apicd->cpu = newcpu;
++	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
++	per_cpu(vector_irq, newcpu)[newvec] = desc;
++}
++
++static void vector_assign_managed_shutdown(struct irq_data *irqd)
++{
++	unsigned int cpu = cpumask_first(cpu_online_mask);
++
++	apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
++}
++
++static int reserve_managed_vector(struct irq_data *irqd)
++{
++	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	unsigned long flags;
++	int ret;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	apicd->is_managed = true;
++	ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++	trace_vector_reserve_managed(irqd->irq, ret);
++	return ret;
++}
++
++static void reserve_irq_vector_locked(struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++
++	irq_matrix_reserve(vector_matrix);
++	apicd->can_reserve = true;
++	apicd->has_reserved = true;
++	irqd_set_can_reserve(irqd);
++	trace_vector_reserve(irqd->irq, 0);
++	vector_assign_managed_shutdown(irqd);
++}
++
++static int reserve_irq_vector(struct irq_data *irqd)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	reserve_irq_vector_locked(irqd);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++	return 0;
++}
++
++static int
++assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	bool resvd = apicd->has_reserved;
++	unsigned int cpu = apicd->cpu;
++	int vector = apicd->vector;
++
++	lockdep_assert_held(&vector_lock);
++
++	/*
++	 * If the current target CPU is online and in the new requested
++	 * affinity mask, there is no point in moving the interrupt from
++	 * one CPU to another.
++	 */
++	if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
++		return 0;
++
++	/*
++	 * Careful here. @apicd might either have move_in_progress set or
++	 * be enqueued for cleanup. Assigning a new vector would either
++	 * leave a stale vector on some CPU around or in case of a pending
++	 * cleanup corrupt the hlist.
++	 */
++	if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist))
++		return -EBUSY;
++
++	vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
++	trace_vector_alloc(irqd->irq, vector, resvd, vector);
++	if (vector < 0)
++		return vector;
++	apic_update_vector(irqd, vector, cpu);
++	apic_update_irq_cfg(irqd, vector, cpu);
++
++	return 0;
++}
++
++static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
++{
++	unsigned long flags;
++	int ret;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	cpumask_and(vector_searchmask, dest, cpu_online_mask);
++	ret = assign_vector_locked(irqd, vector_searchmask);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++	return ret;
++}
++
++static int assign_irq_vector_any_locked(struct irq_data *irqd)
++{
++	/* Get the affinity mask - either irq_default_affinity or (user) set */
++	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
++	int node = irq_data_get_node(irqd);
++
++	if (node == NUMA_NO_NODE)
++		goto all;
++	/* Try the intersection of @affmsk and node mask */
++	cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
++	if (!assign_vector_locked(irqd, vector_searchmask))
++		return 0;
++	/* Try the node mask */
++	if (!assign_vector_locked(irqd, cpumask_of_node(node)))
++		return 0;
++all:
++	/* Try the full affinity mask */
++	cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
++	if (!assign_vector_locked(irqd, vector_searchmask))
++		return 0;
++	/* Try the full online mask */
++	return assign_vector_locked(irqd, cpu_online_mask);
++}
++
++static int
++assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
++{
++	if (irqd_affinity_is_managed(irqd))
++		return reserve_managed_vector(irqd);
++	if (info->mask)
++		return assign_irq_vector(irqd, info->mask);
++	/*
++	 * Make only a global reservation with no guarantee. A real vector
++	 * is associated at activation time.
++	 */
++	return reserve_irq_vector(irqd);
++}
++
++static int
++assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
++{
++	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	int vector, cpu;
++
++	cpumask_and(vector_searchmask, dest, affmsk);
++
++	/* set_affinity might call here for nothing */
++	if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
++		return 0;
++	vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
++					  &cpu);
++	trace_vector_alloc_managed(irqd->irq, vector, vector);
++	if (vector < 0)
++		return vector;
++	apic_update_vector(irqd, vector, cpu);
++	apic_update_irq_cfg(irqd, vector, cpu);
++	return 0;
++}
++
++static void clear_irq_vector(struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	bool managed = irqd_affinity_is_managed(irqd);
++	unsigned int vector = apicd->vector;
++
++	lockdep_assert_held(&vector_lock);
++
++	if (!vector)
++		return;
++
++	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
++			   apicd->prev_cpu);
++
++	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
++	irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
++	apicd->vector = 0;
++
++	/* Clean up move in progress */
++	vector = apicd->prev_vector;
++	if (!vector)
++		return;
++
++	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
++	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
++	apicd->prev_vector = 0;
++	apicd->move_in_progress = 0;
++	hlist_del_init(&apicd->clist);
++}
++
++static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	unsigned long flags;
++
++	trace_vector_deactivate(irqd->irq, apicd->is_managed,
++				apicd->can_reserve, false);
++
++	/* Regular fixed assigned interrupt */
++	if (!apicd->is_managed && !apicd->can_reserve)
++		return;
++	/* If the interrupt has a global reservation, nothing to do */
++	if (apicd->has_reserved)
++		return;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	clear_irq_vector(irqd);
++	if (apicd->can_reserve)
++		reserve_irq_vector_locked(irqd);
++	else
++		vector_assign_managed_shutdown(irqd);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++}
++
++static int activate_reserved(struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	int ret;
++
++	ret = assign_irq_vector_any_locked(irqd);
++	if (!ret) {
++		apicd->has_reserved = false;
++		/*
++		 * Core might have disabled reservation mode after
++		 * allocating the irq descriptor. Ideally this should
++		 * happen before allocation time, but that would require
++		 * completely convoluted ways of transporting that
++		 * information.
++		 */
++		if (!irqd_can_reserve(irqd))
++			apicd->can_reserve = false;
++	}
++
++	/*
++	 * Check to ensure that the effective affinity mask is a subset
++	 * the user supplied affinity mask, and warn the user if it is not
++	 */
++	if (!cpumask_subset(irq_data_get_effective_affinity_mask(irqd),
++			    irq_data_get_affinity_mask(irqd))) {
++		pr_warn("irq %u: Affinity broken due to vector space exhaustion.\n",
++			irqd->irq);
++	}
++
++	return ret;
++}
++
++static int activate_managed(struct irq_data *irqd)
++{
++	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
++	int ret;
++
++	cpumask_and(vector_searchmask, dest, cpu_online_mask);
++	if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
++		/* Something in the core code broke! Survive gracefully */
++		pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
++		return -EINVAL;
++	}
++
++	ret = assign_managed_vector(irqd, vector_searchmask);
++	/*
++	 * This should not happen. The vector reservation got buggered.  Handle
++	 * it gracefully.
++	 */
++	if (WARN_ON_ONCE(ret < 0)) {
++		pr_err("Managed startup irq %u, no vector available\n",
++		       irqd->irq);
++	}
++	return ret;
++}
++
++static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
++			       bool reserve)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	unsigned long flags;
++	int ret = 0;
++
++	trace_vector_activate(irqd->irq, apicd->is_managed,
++			      apicd->can_reserve, reserve);
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	if (!apicd->can_reserve && !apicd->is_managed)
++		assign_irq_vector_any_locked(irqd);
++	else if (reserve || irqd_is_managed_and_shutdown(irqd))
++		vector_assign_managed_shutdown(irqd);
++	else if (apicd->is_managed)
++		ret = activate_managed(irqd);
++	else if (apicd->has_reserved)
++		ret = activate_reserved(irqd);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++	return ret;
++}
++
++static void vector_free_reserved_and_managed(struct irq_data *irqd)
++{
++	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++
++	trace_vector_teardown(irqd->irq, apicd->is_managed,
++			      apicd->has_reserved);
++
++	if (apicd->has_reserved)
++		irq_matrix_remove_reserved(vector_matrix);
++	if (apicd->is_managed)
++		irq_matrix_remove_managed(vector_matrix, dest);
++}
++
++static void x86_vector_free_irqs(struct irq_domain *domain,
++				 unsigned int virq, unsigned int nr_irqs)
++{
++	struct apic_chip_data *apicd;
++	struct irq_data *irqd;
++	unsigned long flags;
++	int i;
++
++	for (i = 0; i < nr_irqs; i++) {
++		irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
++		if (irqd && irqd->chip_data) {
++			raw_spin_lock_irqsave(&vector_lock, flags);
++			clear_irq_vector(irqd);
++			vector_free_reserved_and_managed(irqd);
++			apicd = irqd->chip_data;
++			irq_domain_reset_irq_data(irqd);
++			raw_spin_unlock_irqrestore(&vector_lock, flags);
++			free_apic_chip_data(apicd);
++		}
++	}
++}
++
++static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
++				    struct apic_chip_data *apicd)
++{
++	unsigned long flags;
++	bool realloc = false;
++
++	apicd->vector = ISA_IRQ_VECTOR(virq);
++	apicd->cpu = 0;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	/*
++	 * If the interrupt is activated, then it must stay at this vector
++	 * position. That's usually the timer interrupt (0).
++	 */
++	if (irqd_is_activated(irqd)) {
++		trace_vector_setup(virq, true, 0);
++		apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
++	} else {
++		/* Release the vector */
++		apicd->can_reserve = true;
++		irqd_set_can_reserve(irqd);
++		clear_irq_vector(irqd);
++		realloc = true;
++	}
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++	return realloc;
++}
++
++static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
++				 unsigned int nr_irqs, void *arg)
++{
++	struct irq_alloc_info *info = arg;
++	struct apic_chip_data *apicd;
++	struct irq_data *irqd;
++	int i, err, node;
++
++	if (disable_apic)
++		return -ENXIO;
++
++	/* Currently vector allocator can't guarantee contiguous allocations */
++	if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
++		return -ENOSYS;
++
++	for (i = 0; i < nr_irqs; i++) {
++		irqd = irq_domain_get_irq_data(domain, virq + i);
++		BUG_ON(!irqd);
++		node = irq_data_get_node(irqd);
++		WARN_ON_ONCE(irqd->chip_data);
++		apicd = alloc_apic_chip_data(node);
++		if (!apicd) {
++			err = -ENOMEM;
++			goto error;
++		}
++
++		apicd->irq = virq + i;
++		irqd->chip = &lapic_controller;
++		irqd->chip_data = apicd;
++		irqd->hwirq = virq + i;
++		irqd_set_single_target(irqd);
++
++		/* Don't invoke affinity setter on deactivated interrupts */
++		irqd_set_affinity_on_activate(irqd);
++
++		/*
++		 * Legacy vectors are already assigned when the IOAPIC
++		 * takes them over. They stay on the same vector. This is
++		 * required for check_timer() to work correctly as it might
++		 * switch back to legacy mode. Only update the hardware
++		 * config.
++		 */
++		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
++			if (!vector_configure_legacy(virq + i, irqd, apicd))
++				continue;
++		}
++
++		err = assign_irq_vector_policy(irqd, info);
++		trace_vector_setup(virq + i, false, err);
++		if (err) {
++			irqd->chip_data = NULL;
++			free_apic_chip_data(apicd);
++			goto error;
++		}
++	}
++
++	return 0;
++
++error:
++	x86_vector_free_irqs(domain, virq, i);
++	return err;
++}
++
++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
++static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
++				  struct irq_data *irqd, int ind)
++{
++	struct apic_chip_data apicd;
++	unsigned long flags;
++	int irq;
++
++	if (!irqd) {
++		irq_matrix_debug_show(m, vector_matrix, ind);
++		return;
++	}
++
++	irq = irqd->irq;
++	if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
++		seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
++		seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
++		return;
++	}
++
++	if (!irqd->chip_data) {
++		seq_printf(m, "%*sVector: Not assigned\n", ind, "");
++		return;
++	}
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	memcpy(&apicd, irqd->chip_data, sizeof(apicd));
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++
++	seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector);
++	seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu);
++	if (apicd.prev_vector) {
++		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector);
++		seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu);
++	}
++	seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0);
++	seq_printf(m, "%*sis_managed:       %u\n", ind, "", apicd.is_managed ? 1 : 0);
++	seq_printf(m, "%*scan_reserve:      %u\n", ind, "", apicd.can_reserve ? 1 : 0);
++	seq_printf(m, "%*shas_reserved:     %u\n", ind, "", apicd.has_reserved ? 1 : 0);
++	seq_printf(m, "%*scleanup_pending:  %u\n", ind, "", !hlist_unhashed(&apicd.clist));
++}
++#endif
++
++static const struct irq_domain_ops x86_vector_domain_ops = {
++	.alloc		= x86_vector_alloc_irqs,
++	.free		= x86_vector_free_irqs,
++	.activate	= x86_vector_activate,
++	.deactivate	= x86_vector_deactivate,
++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
++	.debug_show	= x86_vector_debug_show,
++#endif
++};
++
++int __init arch_probe_nr_irqs(void)
++{
++	int nr;
++
++	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
++		nr_irqs = NR_VECTORS * nr_cpu_ids;
++
++	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
++#if defined(CONFIG_PCI_MSI)
++	/*
++	 * for MSI and HT dyn irq
++	 */
++	if (gsi_top <= NR_IRQS_LEGACY)
++		nr +=  8 * nr_cpu_ids;
++	else
++		nr += gsi_top * 16;
++#endif
++	if (nr < nr_irqs)
++		nr_irqs = nr;
++
++	/*
++	 * We don't know if PIC is present at this point so we need to do
++	 * probe() to get the right number of legacy IRQs.
++	 */
++	return legacy_pic->probe();
++}
++
++void lapic_assign_legacy_vector(unsigned int irq, bool replace)
++{
++	/*
++	 * Use assign system here so it wont get accounted as allocated
++	 * and moveable in the cpu hotplug check and it prevents managed
++	 * irq reservation from touching it.
++	 */
++	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
++}
++
++void __init lapic_assign_system_vectors(void)
++{
++	unsigned int i, vector = 0;
++
++	for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
++		irq_matrix_assign_system(vector_matrix, vector, false);
++
++	if (nr_legacy_irqs() > 1)
++		lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
++
++	/* System vectors are reserved, online it */
++	irq_matrix_online(vector_matrix);
++
++	/* Mark the preallocated legacy interrupts */
++	for (i = 0; i < nr_legacy_irqs(); i++) {
++		if (i != PIC_CASCADE_IR)
++			irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
++	}
++}
++
++int __init arch_early_irq_init(void)
++{
++	struct fwnode_handle *fn;
++
++	fn = irq_domain_alloc_named_fwnode("VECTOR");
++	BUG_ON(!fn);
++	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
++						   NULL);
++	BUG_ON(x86_vector_domain == NULL);
++	irq_domain_free_fwnode(fn);
++	irq_set_default_host(x86_vector_domain);
++
++	arch_init_msi_domain(x86_vector_domain);
++
++	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
++
++	/*
++	 * Allocate the vector matrix allocator data structure and limit the
++	 * search area.
++	 */
++	vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
++					 FIRST_SYSTEM_VECTOR);
++	BUG_ON(!vector_matrix);
++
++	return arch_early_ioapic_init();
++}
++
++#ifdef CONFIG_SMP
++
++static struct irq_desc *__setup_vector_irq(int vector)
++{
++	int isairq = vector - ISA_IRQ_VECTOR(0);
++
++	/* Check whether the irq is in the legacy space */
++	if (isairq < 0 || isairq >= nr_legacy_irqs())
++		return VECTOR_UNUSED;
++	/* Check whether the irq is handled by the IOAPIC */
++	if (test_bit(isairq, &io_apic_irqs))
++		return VECTOR_UNUSED;
++	return irq_to_desc(isairq);
++}
++
++/* Online the local APIC infrastructure and initialize the vectors */
++void lapic_online(void)
++{
++	unsigned int vector;
++
++	lockdep_assert_held(&vector_lock);
++
++	/* Online the vector matrix array for this CPU */
++	irq_matrix_online(vector_matrix);
++
++	/*
++	 * The interrupt affinity logic never targets interrupts to offline
++	 * CPUs. The exception are the legacy PIC interrupts. In general
++	 * they are only targeted to CPU0, but depending on the platform
++	 * they can be distributed to any online CPU in hardware. The
++	 * kernel has no influence on that. So all active legacy vectors
++	 * must be installed on all CPUs. All non legacy interrupts can be
++	 * cleared.
++	 */
++	for (vector = 0; vector < NR_VECTORS; vector++)
++		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
++}
++
++void lapic_offline(void)
++{
++	lock_vector_lock();
++	irq_matrix_offline(vector_matrix);
++	unlock_vector_lock();
++}
++
++static int apic_set_affinity(struct irq_data *irqd,
++			     const struct cpumask *dest, bool force)
++{
++	int err;
++
++	if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
++		return -EIO;
++
++	raw_spin_lock(&vector_lock);
++	cpumask_and(vector_searchmask, dest, cpu_online_mask);
++	if (irqd_affinity_is_managed(irqd))
++		err = assign_managed_vector(irqd, vector_searchmask);
++	else
++		err = assign_vector_locked(irqd, vector_searchmask);
++	raw_spin_unlock(&vector_lock);
++	return err ? err : IRQ_SET_MASK_OK;
++}
++
++#else
++# define apic_set_affinity	NULL
++#endif
++
++static int apic_retrigger_irq(struct irq_data *irqd)
++{
++	struct apic_chip_data *apicd = apic_chip_data(irqd);
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&vector_lock, flags);
++	apic->send_IPI(apicd->cpu, apicd->vector);
++	raw_spin_unlock_irqrestore(&vector_lock, flags);
++
++	return 1;
++}
++
++void apic_ack_irq(struct irq_data *irqd)
++{
++	irq_move_irq(irqd);
++	ack_APIC_irq();
++}
++
++void apic_ack_edge(struct irq_data *irqd)
++{
++	irq_complete_move(irqd_cfg(irqd));
++	apic_ack_irq(irqd);
++}
++
++static struct irq_chip lapic_controller = {
++	.name			= "APIC",
++	.irq_ack		= apic_ack_edge,
++	.irq_set_affinity	= apic_set_affinity,
++	.irq_retrigger		= apic_retrigger_irq,
++};
++
++#ifdef CONFIG_SMP
++
++static void free_moved_vector(struct apic_chip_data *apicd)
++{
++	unsigned int vector = apicd->prev_vector;
++	unsigned int cpu = apicd->prev_cpu;
++	bool managed = apicd->is_managed;
++
++	/*
++	 * This should never happen. Managed interrupts are not
++	 * migrated except on CPU down, which does not involve the
++	 * cleanup vector. But try to keep the accounting correct
++	 * nevertheless.
++	 */
++	WARN_ON_ONCE(managed);
++
++	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
++	irq_matrix_free(vector_matrix, cpu, vector, managed);
++	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
++	hlist_del_init(&apicd->clist);
++	apicd->prev_vector = 0;
++	apicd->move_in_progress = 0;
++}
++
++asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
++{
++	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
++	struct apic_chip_data *apicd;
++	struct hlist_node *tmp;
++
++	entering_ack_irq();
++	/* Prevent vectors vanishing under us */
++	raw_spin_lock(&vector_lock);
++
++	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
++		unsigned int irr, vector = apicd->prev_vector;
++
++		/*
++		 * Paranoia: Check if the vector that needs to be cleaned
++		 * up is registered at the APICs IRR. If so, then this is
++		 * not the best time to clean it up. Clean it up in the
++		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
++		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
++		 * priority external vector, so on return from this
++		 * interrupt the device interrupt will happen first.
++		 */
++		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
++		if (irr & (1U << (vector % 32))) {
++			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
++			continue;
++		}
++		free_moved_vector(apicd);
++	}
++
++	raw_spin_unlock(&vector_lock);
++	exiting_irq();
++}
++
++static void __send_cleanup_vector(struct apic_chip_data *apicd)
++{
++	unsigned int cpu;
++
++	raw_spin_lock(&vector_lock);
++	apicd->move_in_progress = 0;
++	cpu = apicd->prev_cpu;
++	if (cpu_online(cpu)) {
++		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
++		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
++	} else {
++		apicd->prev_vector = 0;
++	}
++	raw_spin_unlock(&vector_lock);
++}
++
++void send_cleanup_vector(struct irq_cfg *cfg)
++{
++	struct apic_chip_data *apicd;
++
++	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
++	if (apicd->move_in_progress)
++		__send_cleanup_vector(apicd);
++}
++
++static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
++{
++	struct apic_chip_data *apicd;
++
++	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
++	if (likely(!apicd->move_in_progress))
++		return;
++
++	if (vector == apicd->vector && apicd->cpu == smp_processor_id())
++		__send_cleanup_vector(apicd);
++}
++
++void irq_complete_move(struct irq_cfg *cfg)
++{
++	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
++}
++
++/*
++ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
++ */
++void irq_force_complete_move(struct irq_desc *desc)
++{
++	struct apic_chip_data *apicd;
++	struct irq_data *irqd;
++	unsigned int vector;
++
++	/*
++	 * The function is called for all descriptors regardless of which
++	 * irqdomain they belong to. For example if an IRQ is provided by
++	 * an irq_chip as part of a GPIO driver, the chip data for that
++	 * descriptor is specific to the irq_chip in question.
++	 *
++	 * Check first that the chip_data is what we expect
++	 * (apic_chip_data) before touching it any further.
++	 */
++	irqd = irq_domain_get_irq_data(x86_vector_domain,
++				       irq_desc_get_irq(desc));
++	if (!irqd)
++		return;
++
++	raw_spin_lock(&vector_lock);
++	apicd = apic_chip_data(irqd);
++	if (!apicd)
++		goto unlock;
++
++	/*
++	 * If prev_vector is empty, no action required.
++	 */
++	vector = apicd->prev_vector;
++	if (!vector)
++		goto unlock;
++
++	/*
++	 * This is tricky. If the cleanup of the old vector has not been
++	 * done yet, then the following setaffinity call will fail with
++	 * -EBUSY. This can leave the interrupt in a stale state.
++	 *
++	 * All CPUs are stuck in stop machine with interrupts disabled so
++	 * calling __irq_complete_move() would be completely pointless.
++	 *
++	 * 1) The interrupt is in move_in_progress state. That means that we
++	 *    have not seen an interrupt since the io_apic was reprogrammed to
++	 *    the new vector.
++	 *
++	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
++	 *    have not been processed yet.
++	 */
++	if (apicd->move_in_progress) {
++		/*
++		 * In theory there is a race:
++		 *
++		 * set_ioapic(new_vector) <-- Interrupt is raised before update
++		 *			      is effective, i.e. it's raised on
++		 *			      the old vector.
++		 *
++		 * So if the target cpu cannot handle that interrupt before
++		 * the old vector is cleaned up, we get a spurious interrupt
++		 * and in the worst case the ioapic irq line becomes stale.
++		 *
++		 * But in case of cpu hotplug this should be a non issue
++		 * because if the affinity update happens right before all
++		 * cpus rendevouz in stop machine, there is no way that the
++		 * interrupt can be blocked on the target cpu because all cpus
++		 * loops first with interrupts enabled in stop machine, so the
++		 * old vector is not yet cleaned up when the interrupt fires.
++		 *
++		 * So the only way to run into this issue is if the delivery
++		 * of the interrupt on the apic/system bus would be delayed
++		 * beyond the point where the target cpu disables interrupts
++		 * in stop machine. I doubt that it can happen, but at least
++		 * there is a theroretical chance. Virtualization might be
++		 * able to expose this, but AFAICT the IOAPIC emulation is not
++		 * as stupid as the real hardware.
++		 *
++		 * Anyway, there is nothing we can do about that at this point
++		 * w/o refactoring the whole fixup_irq() business completely.
++		 * We print at least the irq number and the old vector number,
++		 * so we have the necessary information when a problem in that
++		 * area arises.
++		 */
++		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
++			irqd->irq, vector);
++	}
++	free_moved_vector(apicd);
++unlock:
++	raw_spin_unlock(&vector_lock);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++/*
++ * Note, this is not accurate accounting, but at least good enough to
++ * prevent that the actual interrupt move will run out of vectors.
++ */
++int lapic_can_unplug_cpu(void)
++{
++	unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
++	int ret = 0;
++
++	raw_spin_lock(&vector_lock);
++	tomove = irq_matrix_allocated(vector_matrix);
++	avl = irq_matrix_available(vector_matrix, true);
++	if (avl < tomove) {
++		pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
++			cpu, tomove, avl);
++		ret = -ENOSPC;
++		goto out;
++	}
++	rsvd = irq_matrix_reserved(vector_matrix);
++	if (avl < rsvd) {
++		pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
++			rsvd, avl);
++	}
++out:
++	raw_spin_unlock(&vector_lock);
++	return ret;
++}
++#endif /* HOTPLUG_CPU */
++#endif /* SMP */
++
++static void __init print_APIC_field(int base)
++{
++	int i;
++
++	printk(KERN_DEBUG);
++
++	for (i = 0; i < 8; i++)
++		pr_cont("%08x", apic_read(base + i*0x10));
++
++	pr_cont("\n");
++}
++
++static void __init print_local_APIC(void *dummy)
++{
++	unsigned int i, v, ver, maxlvt;
++	u64 icr;
++
++	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
++		 smp_processor_id(), hard_smp_processor_id());
++	v = apic_read(APIC_ID);
++	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
++	v = apic_read(APIC_LVR);
++	pr_info("... APIC VERSION: %08x\n", v);
++	ver = GET_APIC_VERSION(v);
++	maxlvt = lapic_get_maxlvt();
++
++	v = apic_read(APIC_TASKPRI);
++	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
++
++	/* !82489DX */
++	if (APIC_INTEGRATED(ver)) {
++		if (!APIC_XAPIC(ver)) {
++			v = apic_read(APIC_ARBPRI);
++			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
++				 v, v & APIC_ARBPRI_MASK);
++		}
++		v = apic_read(APIC_PROCPRI);
++		pr_debug("... APIC PROCPRI: %08x\n", v);
++	}
++
++	/*
++	 * Remote read supported only in the 82489DX and local APIC for
++	 * Pentium processors.
++	 */
++	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
++		v = apic_read(APIC_RRR);
++		pr_debug("... APIC RRR: %08x\n", v);
++	}
++
++	v = apic_read(APIC_LDR);
++	pr_debug("... APIC LDR: %08x\n", v);
++	if (!x2apic_enabled()) {
++		v = apic_read(APIC_DFR);
++		pr_debug("... APIC DFR: %08x\n", v);
++	}
++	v = apic_read(APIC_SPIV);
++	pr_debug("... APIC SPIV: %08x\n", v);
++
++	pr_debug("... APIC ISR field:\n");
++	print_APIC_field(APIC_ISR);
++	pr_debug("... APIC TMR field:\n");
++	print_APIC_field(APIC_TMR);
++	pr_debug("... APIC IRR field:\n");
++	print_APIC_field(APIC_IRR);
++
++	/* !82489DX */
++	if (APIC_INTEGRATED(ver)) {
++		/* Due to the Pentium erratum 3AP. */
++		if (maxlvt > 3)
++			apic_write(APIC_ESR, 0);
++
++		v = apic_read(APIC_ESR);
++		pr_debug("... APIC ESR: %08x\n", v);
++	}
++
++	icr = apic_icr_read();
++	pr_debug("... APIC ICR: %08x\n", (u32)icr);
++	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
++
++	v = apic_read(APIC_LVTT);
++	pr_debug("... APIC LVTT: %08x\n", v);
++
++	if (maxlvt > 3) {
++		/* PC is LVT#4. */
++		v = apic_read(APIC_LVTPC);
++		pr_debug("... APIC LVTPC: %08x\n", v);
++	}
++	v = apic_read(APIC_LVT0);
++	pr_debug("... APIC LVT0: %08x\n", v);
++	v = apic_read(APIC_LVT1);
++	pr_debug("... APIC LVT1: %08x\n", v);
++
++	if (maxlvt > 2) {
++		/* ERR is LVT#3. */
++		v = apic_read(APIC_LVTERR);
++		pr_debug("... APIC LVTERR: %08x\n", v);
++	}
++
++	v = apic_read(APIC_TMICT);
++	pr_debug("... APIC TMICT: %08x\n", v);
++	v = apic_read(APIC_TMCCT);
++	pr_debug("... APIC TMCCT: %08x\n", v);
++	v = apic_read(APIC_TDCR);
++	pr_debug("... APIC TDCR: %08x\n", v);
++
++	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
++		v = apic_read(APIC_EFEAT);
++		maxlvt = (v >> 16) & 0xff;
++		pr_debug("... APIC EFEAT: %08x\n", v);
++		v = apic_read(APIC_ECTRL);
++		pr_debug("... APIC ECTRL: %08x\n", v);
++		for (i = 0; i < maxlvt; i++) {
++			v = apic_read(APIC_EILVTn(i));
++			pr_debug("... APIC EILVT%d: %08x\n", i, v);
++		}
++	}
++	pr_cont("\n");
++}
++
++static void __init print_local_APICs(int maxcpu)
++{
++	int cpu;
++
++	if (!maxcpu)
++		return;
++
++	preempt_disable();
++	for_each_online_cpu(cpu) {
++		if (cpu >= maxcpu)
++			break;
++		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
++	}
++	preempt_enable();
++}
++
++static void __init print_PIC(void)
++{
++	unsigned int v;
++	unsigned long flags;
++
++	if (!nr_legacy_irqs())
++		return;
++
++	pr_debug("\nprinting PIC contents\n");
++
++	raw_spin_lock_irqsave(&i8259A_lock, flags);
++
++	v = inb(0xa1) << 8 | inb(0x21);
++	pr_debug("... PIC  IMR: %04x\n", v);
++
++	v = inb(0xa0) << 8 | inb(0x20);
++	pr_debug("... PIC  IRR: %04x\n", v);
++
++	outb(0x0b, 0xa0);
++	outb(0x0b, 0x20);
++	v = inb(0xa0) << 8 | inb(0x20);
++	outb(0x0a, 0xa0);
++	outb(0x0a, 0x20);
++
++	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
++
++	pr_debug("... PIC  ISR: %04x\n", v);
++
++	v = inb(0x4d1) << 8 | inb(0x4d0);
++	pr_debug("... PIC ELCR: %04x\n", v);
++}
++
++static int show_lapic __initdata = 1;
++static __init int setup_show_lapic(char *arg)
++{
++	int num = -1;
++
++	if (strcmp(arg, "all") == 0) {
++		show_lapic = CONFIG_NR_CPUS;
++	} else {
++		get_option(&arg, &num);
++		if (num >= 0)
++			show_lapic = num;
++	}
++
++	return 1;
++}
++__setup("show_lapic=", setup_show_lapic);
++
++static int __init print_ICs(void)
++{
++	if (apic_verbosity == APIC_QUIET)
++		return 0;
++
++	print_PIC();
++
++	/* don't print out if apic is not there */
++	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
++		return 0;
++
++	print_local_APICs(show_lapic);
++	print_IO_APICs();
++
++	return 0;
++}
++
++late_initcall(print_ICs);
+diff -uprN kernel/arch/x86/kernel/apic/x2apic_cluster.c kernel_new/arch/x86/kernel/apic/x2apic_cluster.c
+--- kernel/arch/x86/kernel/apic/x2apic_cluster.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/x2apic_cluster.c	2021-04-01 18:28:07.654863288 +0800
+@@ -44,7 +44,7 @@ __x2apic_send_IPI_mask(const struct cpum
+ 	u32 dest;
+ 
+ 	x2apic_wrmsr_fence();
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ 	tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+ 	cpumask_copy(tmpmsk, mask);
+@@ -68,7 +68,7 @@ __x2apic_send_IPI_mask(const struct cpum
+ 		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
+ 	}
+ 
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+diff -uprN kernel/arch/x86/kernel/apic/x2apic_phys.c kernel_new/arch/x86/kernel/apic/x2apic_phys.c
+--- kernel/arch/x86/kernel/apic/x2apic_phys.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/apic/x2apic_phys.c	2021-04-01 18:28:07.654863288 +0800
+@@ -55,7 +55,7 @@ __x2apic_send_IPI_mask(const struct cpum
+ 
+ 	x2apic_wrmsr_fence();
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ 	this_cpu = smp_processor_id();
+ 	for_each_cpu(query_cpu, mask) {
+@@ -64,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpum
+ 		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ 				       vector, APIC_DEST_PHYSICAL);
+ 	}
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+diff -uprN kernel/arch/x86/kernel/asm-offsets.c kernel_new/arch/x86/kernel/asm-offsets.c
+--- kernel/arch/x86/kernel/asm-offsets.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/asm-offsets.c	2021-04-01 18:28:07.654863288 +0800
+@@ -38,6 +38,9 @@ void common(void) {
+ 
+ 	BLANK();
+ 	OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
++#ifdef CONFIG_IPIPE
++	OFFSET(TASK_TI_ipipe, task_struct, thread_info.ipipe_flags);
++#endif
+ 	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
+ 
+ 	BLANK();
+diff -uprN kernel/arch/x86/kernel/cpu/common.c kernel_new/arch/x86/kernel/cpu/common.c
+--- kernel/arch/x86/kernel/cpu/common.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/cpu/common.c	2021-04-01 18:28:07.655863287 +0800
+@@ -1674,6 +1674,7 @@ void syscall_init(void)
+ DEFINE_PER_CPU(struct orig_ist, orig_ist);
+ 
+ static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
++#ifndef CONFIG_IPIPE
+ DEFINE_PER_CPU(int, debug_stack_usage);
+ 
+ int is_debug_stack(unsigned long addr)
+@@ -1701,6 +1702,7 @@ void debug_stack_reset(void)
+ 		load_current_idt();
+ }
+ NOKPROBE_SYMBOL(debug_stack_reset);
++#endif /* !CONFIG_IPIPE */
+ 
+ #else	/* CONFIG_X86_64 */
+ 
+diff -uprN kernel/arch/x86/kernel/cpu/common.c.orig kernel_new/arch/x86/kernel/cpu/common.c.orig
+--- kernel/arch/x86/kernel/cpu/common.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/cpu/common.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,1993 @@
++/* cpu_feature_enabled() cannot be used this early */
++#define USE_EARLY_PGTABLE_L5
++
++#include <linux/bootmem.h>
++#include <linux/linkage.h>
++#include <linux/bitops.h>
++#include <linux/kernel.h>
++#include <linux/export.h>
++#include <linux/percpu.h>
++#include <linux/string.h>
++#include <linux/ctype.h>
++#include <linux/delay.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/clock.h>
++#include <linux/sched/task.h>
++#include <linux/init.h>
++#include <linux/kprobes.h>
++#include <linux/kgdb.h>
++#include <linux/smp.h>
++#include <linux/io.h>
++#include <linux/syscore_ops.h>
++
++#include <asm/stackprotector.h>
++#include <asm/perf_event.h>
++#include <asm/mmu_context.h>
++#include <asm/archrandom.h>
++#include <asm/hypervisor.h>
++#include <asm/processor.h>
++#include <asm/tlbflush.h>
++#include <asm/debugreg.h>
++#include <asm/sections.h>
++#include <asm/vsyscall.h>
++#include <linux/topology.h>
++#include <linux/cpumask.h>
++#include <asm/pgtable.h>
++#include <linux/atomic.h>
++#include <asm/proto.h>
++#include <asm/setup.h>
++#include <asm/apic.h>
++#include <asm/desc.h>
++#include <asm/fpu/internal.h>
++#include <asm/mtrr.h>
++#include <asm/hwcap2.h>
++#include <linux/numa.h>
++#include <asm/asm.h>
++#include <asm/bugs.h>
++#include <asm/cpu.h>
++#include <asm/mce.h>
++#include <asm/msr.h>
++#include <asm/pat.h>
++#include <asm/microcode.h>
++#include <asm/microcode_intel.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
++
++#ifdef CONFIG_X86_LOCAL_APIC
++#include <asm/uv/uv.h>
++#endif
++
++#include "cpu.h"
++
++u32 elf_hwcap2 __read_mostly;
++
++/* all of these masks are initialized in setup_cpu_local_masks() */
++cpumask_var_t cpu_initialized_mask;
++cpumask_var_t cpu_callout_mask;
++cpumask_var_t cpu_callin_mask;
++
++/* representing cpus for which sibling maps can be computed */
++cpumask_var_t cpu_sibling_setup_mask;
++
++/* Number of siblings per CPU package */
++int smp_num_siblings = 1;
++EXPORT_SYMBOL(smp_num_siblings);
++
++/* Last level cache ID of each logical CPU */
++DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
++
++/* correctly size the local cpu masks */
++void __init setup_cpu_local_masks(void)
++{
++	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
++	alloc_bootmem_cpumask_var(&cpu_callin_mask);
++	alloc_bootmem_cpumask_var(&cpu_callout_mask);
++	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
++}
++
++static void default_init(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_X86_64
++	cpu_detect_cache_sizes(c);
++#else
++	/* Not much we can do here... */
++	/* Check if at least it has cpuid */
++	if (c->cpuid_level == -1) {
++		/* No cpuid. It must be an ancient CPU */
++		if (c->x86 == 4)
++			strcpy(c->x86_model_id, "486");
++		else if (c->x86 == 3)
++			strcpy(c->x86_model_id, "386");
++	}
++#endif
++}
++
++static const struct cpu_dev default_cpu = {
++	.c_init		= default_init,
++	.c_vendor	= "Unknown",
++	.c_x86_vendor	= X86_VENDOR_UNKNOWN,
++};
++
++static const struct cpu_dev *this_cpu = &default_cpu;
++
++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++#ifdef CONFIG_X86_64
++	/*
++	 * We need valid kernel segments for data and code in long mode too
++	 * IRET will check the segment types  kkeil 2000/10/28
++	 * Also sysret mandates a special GDT layout
++	 *
++	 * TLS descriptors are currently at a different place compared to i386.
++	 * Hopefully nobody expects them at a fixed place (Wine?)
++	 */
++	[GDT_ENTRY_KERNEL32_CS]		= GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
++	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
++	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
++	[GDT_ENTRY_DEFAULT_USER32_CS]	= GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
++	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
++	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
++#else
++	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
++	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
++	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
++	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
++	/*
++	 * Segments used for calling PnP BIOS have byte granularity.
++	 * They code segments and data segments have fixed 64k limits,
++	 * the transfer segment sizes are set at run time.
++	 */
++	/* 32-bit code */
++	[GDT_ENTRY_PNPBIOS_CS32]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
++	/* 16-bit code */
++	[GDT_ENTRY_PNPBIOS_CS16]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
++	/* 16-bit data */
++	[GDT_ENTRY_PNPBIOS_DS]		= GDT_ENTRY_INIT(0x0092, 0, 0xffff),
++	/* 16-bit data */
++	[GDT_ENTRY_PNPBIOS_TS1]		= GDT_ENTRY_INIT(0x0092, 0, 0),
++	/* 16-bit data */
++	[GDT_ENTRY_PNPBIOS_TS2]		= GDT_ENTRY_INIT(0x0092, 0, 0),
++	/*
++	 * The APM segments have byte granularity and their bases
++	 * are set at run time.  All have 64k limits.
++	 */
++	/* 32-bit code */
++	[GDT_ENTRY_APMBIOS_BASE]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
++	/* 16-bit code */
++	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
++	/* data */
++	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x4092, 0, 0xffff),
++
++	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
++	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
++	GDT_STACK_CANARY_INIT
++#endif
++} };
++EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
++
++static int __init x86_mpx_setup(char *s)
++{
++	/* require an exact match without trailing characters */
++	if (strlen(s))
++		return 0;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_MPX))
++		return 1;
++
++	setup_clear_cpu_cap(X86_FEATURE_MPX);
++	pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
++	return 1;
++}
++__setup("nompx", x86_mpx_setup);
++
++#ifdef CONFIG_X86_64
++static int __init x86_nopcid_setup(char *s)
++{
++	/* nopcid doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_PCID))
++		return 0;
++
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++	pr_info("nopcid: PCID feature disabled\n");
++	return 0;
++}
++early_param("nopcid", x86_nopcid_setup);
++#endif
++
++static int __init x86_noinvpcid_setup(char *s)
++{
++	/* noinvpcid doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_INVPCID))
++		return 0;
++
++	setup_clear_cpu_cap(X86_FEATURE_INVPCID);
++	pr_info("noinvpcid: INVPCID feature disabled\n");
++	return 0;
++}
++early_param("noinvpcid", x86_noinvpcid_setup);
++
++#ifdef CONFIG_X86_32
++static int cachesize_override = -1;
++static int disable_x86_serial_nr = 1;
++
++static int __init cachesize_setup(char *str)
++{
++	get_option(&str, &cachesize_override);
++	return 1;
++}
++__setup("cachesize=", cachesize_setup);
++
++static int __init x86_sep_setup(char *s)
++{
++	setup_clear_cpu_cap(X86_FEATURE_SEP);
++	return 1;
++}
++__setup("nosep", x86_sep_setup);
++
++/* Standard macro to see if a specific flag is changeable */
++static inline int flag_is_changeable_p(u32 flag)
++{
++	u32 f1, f2;
++
++	/*
++	 * Cyrix and IDT cpus allow disabling of CPUID
++	 * so the code below may return different results
++	 * when it is executed before and after enabling
++	 * the CPUID. Add "volatile" to not allow gcc to
++	 * optimize the subsequent calls to this function.
++	 */
++	asm volatile ("pushfl		\n\t"
++		      "pushfl		\n\t"
++		      "popl %0		\n\t"
++		      "movl %0, %1	\n\t"
++		      "xorl %2, %0	\n\t"
++		      "pushl %0		\n\t"
++		      "popfl		\n\t"
++		      "pushfl		\n\t"
++		      "popl %0		\n\t"
++		      "popfl		\n\t"
++
++		      : "=&r" (f1), "=&r" (f2)
++		      : "ir" (flag));
++
++	return ((f1^f2) & flag) != 0;
++}
++
++/* Probe for the CPUID instruction */
++int have_cpuid_p(void)
++{
++	return flag_is_changeable_p(X86_EFLAGS_ID);
++}
++
++static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
++{
++	unsigned long lo, hi;
++
++	if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
++		return;
++
++	/* Disable processor serial number: */
++
++	rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
++	lo |= 0x200000;
++	wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
++
++	pr_notice("CPU serial number disabled.\n");
++	clear_cpu_cap(c, X86_FEATURE_PN);
++
++	/* Disabling the serial number may affect the cpuid level */
++	c->cpuid_level = cpuid_eax(0);
++}
++
++static int __init x86_serial_nr_setup(char *s)
++{
++	disable_x86_serial_nr = 0;
++	return 1;
++}
++__setup("serialnumber", x86_serial_nr_setup);
++#else
++static inline int flag_is_changeable_p(u32 flag)
++{
++	return 1;
++}
++static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
++{
++}
++#endif
++
++static __init int setup_disable_smep(char *arg)
++{
++	setup_clear_cpu_cap(X86_FEATURE_SMEP);
++	/* Check for things that depend on SMEP being enabled: */
++	check_mpx_erratum(&boot_cpu_data);
++	return 1;
++}
++__setup("nosmep", setup_disable_smep);
++
++static __always_inline void setup_smep(struct cpuinfo_x86 *c)
++{
++	if (cpu_has(c, X86_FEATURE_SMEP))
++		cr4_set_bits(X86_CR4_SMEP);
++}
++
++static __init int setup_disable_smap(char *arg)
++{
++	setup_clear_cpu_cap(X86_FEATURE_SMAP);
++	return 1;
++}
++__setup("nosmap", setup_disable_smap);
++
++static __always_inline void setup_smap(struct cpuinfo_x86 *c)
++{
++	unsigned long eflags = native_save_fl();
++
++	/* This should have been cleared long ago */
++	BUG_ON(eflags & X86_EFLAGS_AC);
++
++	if (cpu_has(c, X86_FEATURE_SMAP)) {
++#ifdef CONFIG_X86_SMAP
++		cr4_set_bits(X86_CR4_SMAP);
++#else
++		cr4_clear_bits(X86_CR4_SMAP);
++#endif
++	}
++}
++
++static __always_inline void setup_umip(struct cpuinfo_x86 *c)
++{
++	/* Check the boot processor, plus build option for UMIP. */
++	if (!cpu_feature_enabled(X86_FEATURE_UMIP))
++		goto out;
++
++	/* Check the current processor's cpuid bits. */
++	if (!cpu_has(c, X86_FEATURE_UMIP))
++		goto out;
++
++	cr4_set_bits(X86_CR4_UMIP);
++
++	pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
++
++	return;
++
++out:
++	/*
++	 * Make sure UMIP is disabled in case it was enabled in a
++	 * previous boot (e.g., via kexec).
++	 */
++	cr4_clear_bits(X86_CR4_UMIP);
++}
++
++/*
++ * Protection Keys are not available in 32-bit mode.
++ */
++static bool pku_disabled;
++
++static __always_inline void setup_pku(struct cpuinfo_x86 *c)
++{
++	/* check the boot processor, plus compile options for PKU: */
++	if (!cpu_feature_enabled(X86_FEATURE_PKU))
++		return;
++	/* checks the actual processor's cpuid bits: */
++	if (!cpu_has(c, X86_FEATURE_PKU))
++		return;
++	if (pku_disabled)
++		return;
++
++	cr4_set_bits(X86_CR4_PKE);
++	/*
++	 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
++	 * cpuid bit to be set.  We need to ensure that we
++	 * update that bit in this CPU's "cpu_info".
++	 */
++	get_cpu_cap(c);
++}
++
++#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
++static __init int setup_disable_pku(char *arg)
++{
++	/*
++	 * Do not clear the X86_FEATURE_PKU bit.  All of the
++	 * runtime checks are against OSPKE so clearing the
++	 * bit does nothing.
++	 *
++	 * This way, we will see "pku" in cpuinfo, but not
++	 * "ospke", which is exactly what we want.  It shows
++	 * that the CPU has PKU, but the OS has not enabled it.
++	 * This happens to be exactly how a system would look
++	 * if we disabled the config option.
++	 */
++	pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n");
++	pku_disabled = true;
++	return 1;
++}
++__setup("nopku", setup_disable_pku);
++#endif /* CONFIG_X86_64 */
++
++/*
++ * Some CPU features depend on higher CPUID levels, which may not always
++ * be available due to CPUID level capping or broken virtualization
++ * software.  Add those features to this table to auto-disable them.
++ */
++struct cpuid_dependent_feature {
++	u32 feature;
++	u32 level;
++};
++
++static const struct cpuid_dependent_feature
++cpuid_dependent_features[] = {
++	{ X86_FEATURE_MWAIT,		0x00000005 },
++	{ X86_FEATURE_DCA,		0x00000009 },
++	{ X86_FEATURE_XSAVE,		0x0000000d },
++	{ 0, 0 }
++};
++
++static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
++{
++	const struct cpuid_dependent_feature *df;
++
++	for (df = cpuid_dependent_features; df->feature; df++) {
++
++		if (!cpu_has(c, df->feature))
++			continue;
++		/*
++		 * Note: cpuid_level is set to -1 if unavailable, but
++		 * extended_extended_level is set to 0 if unavailable
++		 * and the legitimate extended levels are all negative
++		 * when signed; hence the weird messing around with
++		 * signs here...
++		 */
++		if (!((s32)df->level < 0 ?
++		     (u32)df->level > (u32)c->extended_cpuid_level :
++		     (s32)df->level > (s32)c->cpuid_level))
++			continue;
++
++		clear_cpu_cap(c, df->feature);
++		if (!warn)
++			continue;
++
++		pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
++			x86_cap_flag(df->feature), df->level);
++	}
++}
++
++/*
++ * Naming convention should be: <Name> [(<Codename>)]
++ * This table only is used unless init_<vendor>() below doesn't set it;
++ * in particular, if CPUID levels 0x80000002..4 are supported, this
++ * isn't used
++ */
++
++/* Look up CPU names by table lookup. */
++static const char *table_lookup_model(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_X86_32
++	const struct legacy_cpu_model_info *info;
++
++	if (c->x86_model >= 16)
++		return NULL;	/* Range check */
++
++	if (!this_cpu)
++		return NULL;
++
++	info = this_cpu->legacy_models;
++
++	while (info->family) {
++		if (info->family == c->x86)
++			return info->model_names[c->x86_model];
++		info++;
++	}
++#endif
++	return NULL;		/* Not found */
++}
++
++__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
++__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
++
++void load_percpu_segment(int cpu)
++{
++#ifdef CONFIG_X86_32
++	loadsegment(fs, __KERNEL_PERCPU);
++#else
++	__loadsegment_simple(gs, 0);
++	wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
++#endif
++	load_stack_canary_segment();
++}
++
++#ifdef CONFIG_X86_32
++/* The 32-bit entry code needs to find cpu_entry_area. */
++DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
++#endif
++
++#ifdef CONFIG_X86_64
++/*
++ * Special IST stacks which the CPU switches to when it calls
++ * an IST-marked descriptor entry. Up to 7 stacks (hardware
++ * limit), all of them are 4K, except the debug stack which
++ * is 8K.
++ */
++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
++	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
++	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
++};
++#endif
++
++/* Load the original GDT from the per-cpu structure */
++void load_direct_gdt(int cpu)
++{
++	struct desc_ptr gdt_descr;
++
++	gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
++	gdt_descr.size = GDT_SIZE - 1;
++	load_gdt(&gdt_descr);
++}
++EXPORT_SYMBOL_GPL(load_direct_gdt);
++
++/* Load a fixmap remapping of the per-cpu GDT */
++void load_fixmap_gdt(int cpu)
++{
++	struct desc_ptr gdt_descr;
++
++	gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
++	gdt_descr.size = GDT_SIZE - 1;
++	load_gdt(&gdt_descr);
++}
++EXPORT_SYMBOL_GPL(load_fixmap_gdt);
++
++/*
++ * Current gdt points %fs at the "master" per-cpu area: after this,
++ * it's on the real one.
++ */
++void switch_to_new_gdt(int cpu)
++{
++	/* Load the original GDT */
++	load_direct_gdt(cpu);
++	/* Reload the per-cpu base */
++	load_percpu_segment(cpu);
++}
++
++static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
++
++static void get_model_name(struct cpuinfo_x86 *c)
++{
++	unsigned int *v;
++	char *p, *q, *s;
++
++	if (c->extended_cpuid_level < 0x80000004)
++		return;
++
++	v = (unsigned int *)c->x86_model_id;
++	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
++	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
++	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
++	c->x86_model_id[48] = 0;
++
++	/* Trim whitespace */
++	p = q = s = &c->x86_model_id[0];
++
++	while (*p == ' ')
++		p++;
++
++	while (*p) {
++		/* Note the last non-whitespace index */
++		if (!isspace(*p))
++			s = q;
++
++		*q++ = *p++;
++	}
++
++	*(s + 1) = '\0';
++}
++
++void detect_num_cpu_cores(struct cpuinfo_x86 *c)
++{
++	unsigned int eax, ebx, ecx, edx;
++
++	c->x86_max_cores = 1;
++	if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
++		return;
++
++	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
++	if (eax & 0x1f)
++		c->x86_max_cores = (eax >> 26) + 1;
++}
++
++void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
++{
++	unsigned int n, dummy, ebx, ecx, edx, l2size;
++
++	n = c->extended_cpuid_level;
++
++	if (n >= 0x80000005) {
++		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
++		c->x86_cache_size = (ecx>>24) + (edx>>24);
++#ifdef CONFIG_X86_64
++		/* On K8 L1 TLB is inclusive, so don't count it */
++		c->x86_tlbsize = 0;
++#endif
++	}
++
++	if (n < 0x80000006)	/* Some chips just has a large L1. */
++		return;
++
++	cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
++	l2size = ecx >> 16;
++
++#ifdef CONFIG_X86_64
++	c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
++#else
++	/* do processor-specific cache resizing */
++	if (this_cpu->legacy_cache_size)
++		l2size = this_cpu->legacy_cache_size(c, l2size);
++
++	/* Allow user to override all this if necessary. */
++	if (cachesize_override != -1)
++		l2size = cachesize_override;
++
++	if (l2size == 0)
++		return;		/* Again, no L2 cache is possible */
++#endif
++
++	c->x86_cache_size = l2size;
++}
++
++u16 __read_mostly tlb_lli_4k[NR_INFO];
++u16 __read_mostly tlb_lli_2m[NR_INFO];
++u16 __read_mostly tlb_lli_4m[NR_INFO];
++u16 __read_mostly tlb_lld_4k[NR_INFO];
++u16 __read_mostly tlb_lld_2m[NR_INFO];
++u16 __read_mostly tlb_lld_4m[NR_INFO];
++u16 __read_mostly tlb_lld_1g[NR_INFO];
++
++static void cpu_detect_tlb(struct cpuinfo_x86 *c)
++{
++	if (this_cpu->c_detect_tlb)
++		this_cpu->c_detect_tlb(c);
++
++	pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
++		tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
++		tlb_lli_4m[ENTRIES]);
++
++	pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
++		tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
++		tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
++}
++
++int detect_ht_early(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	u32 eax, ebx, ecx, edx;
++
++	if (!cpu_has(c, X86_FEATURE_HT))
++		return -1;
++
++	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
++		return -1;
++
++	if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
++		return -1;
++
++	cpuid(1, &eax, &ebx, &ecx, &edx);
++
++	smp_num_siblings = (ebx & 0xff0000) >> 16;
++	if (smp_num_siblings == 1)
++		pr_info_once("CPU0: Hyper-Threading is disabled\n");
++#endif
++	return 0;
++}
++
++void detect_ht(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	int index_msb, core_bits;
++
++	if (detect_ht_early(c) < 0)
++		return;
++
++	index_msb = get_count_order(smp_num_siblings);
++	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
++
++	smp_num_siblings = smp_num_siblings / c->x86_max_cores;
++
++	index_msb = get_count_order(smp_num_siblings);
++
++	core_bits = get_count_order(c->x86_max_cores);
++
++	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
++				       ((1 << core_bits) - 1);
++#endif
++}
++
++static void get_cpu_vendor(struct cpuinfo_x86 *c)
++{
++	char *v = c->x86_vendor_id;
++	int i;
++
++	for (i = 0; i < X86_VENDOR_NUM; i++) {
++		if (!cpu_devs[i])
++			break;
++
++		if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
++		    (cpu_devs[i]->c_ident[1] &&
++		     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
++
++			this_cpu = cpu_devs[i];
++			c->x86_vendor = this_cpu->c_x86_vendor;
++			return;
++		}
++	}
++
++	pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
++		    "CPU: Your system may be unstable.\n", v);
++
++	c->x86_vendor = X86_VENDOR_UNKNOWN;
++	this_cpu = &default_cpu;
++}
++
++void cpu_detect(struct cpuinfo_x86 *c)
++{
++	/* Get vendor name */
++	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
++	      (unsigned int *)&c->x86_vendor_id[0],
++	      (unsigned int *)&c->x86_vendor_id[8],
++	      (unsigned int *)&c->x86_vendor_id[4]);
++
++	c->x86 = 4;
++	/* Intel-defined flags: level 0x00000001 */
++	if (c->cpuid_level >= 0x00000001) {
++		u32 junk, tfms, cap0, misc;
++
++		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
++		c->x86		= x86_family(tfms);
++		c->x86_model	= x86_model(tfms);
++		c->x86_stepping	= x86_stepping(tfms);
++
++		if (cap0 & (1<<19)) {
++			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
++			c->x86_cache_alignment = c->x86_clflush_size;
++		}
++	}
++}
++
++static void apply_forced_caps(struct cpuinfo_x86 *c)
++{
++	int i;
++
++	for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
++		c->x86_capability[i] &= ~cpu_caps_cleared[i];
++		c->x86_capability[i] |= cpu_caps_set[i];
++	}
++}
++
++static void init_speculation_control(struct cpuinfo_x86 *c)
++{
++	/*
++	 * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
++	 * and they also have a different bit for STIBP support. Also,
++	 * a hypervisor might have set the individual AMD bits even on
++	 * Intel CPUs, for finer-grained selection of what's available.
++	 */
++	if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
++		set_cpu_cap(c, X86_FEATURE_IBRS);
++		set_cpu_cap(c, X86_FEATURE_IBPB);
++		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++	}
++
++	if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
++		set_cpu_cap(c, X86_FEATURE_STIBP);
++
++	if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
++	    cpu_has(c, X86_FEATURE_VIRT_SSBD))
++		set_cpu_cap(c, X86_FEATURE_SSBD);
++
++	if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
++		set_cpu_cap(c, X86_FEATURE_IBRS);
++		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++	}
++
++	if (cpu_has(c, X86_FEATURE_AMD_IBPB))
++		set_cpu_cap(c, X86_FEATURE_IBPB);
++
++	if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
++		set_cpu_cap(c, X86_FEATURE_STIBP);
++		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++	}
++
++	if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
++		set_cpu_cap(c, X86_FEATURE_SSBD);
++		set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
++		clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
++	}
++}
++
++static void init_cqm(struct cpuinfo_x86 *c)
++{
++	if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
++		c->x86_cache_max_rmid  = -1;
++		c->x86_cache_occ_scale = -1;
++		return;
++	}
++
++	/* will be overridden if occupancy monitoring exists */
++	c->x86_cache_max_rmid = cpuid_ebx(0xf);
++
++	if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
++	    cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
++	    cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
++		u32 eax, ebx, ecx, edx;
++
++		/* QoS sub-leaf, EAX=0Fh, ECX=1 */
++		cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
++
++		c->x86_cache_max_rmid  = ecx;
++		c->x86_cache_occ_scale = ebx;
++	}
++}
++
++void get_cpu_cap(struct cpuinfo_x86 *c)
++{
++	u32 eax, ebx, ecx, edx;
++
++	/* Intel-defined flags: level 0x00000001 */
++	if (c->cpuid_level >= 0x00000001) {
++		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
++
++		c->x86_capability[CPUID_1_ECX] = ecx;
++		c->x86_capability[CPUID_1_EDX] = edx;
++	}
++
++	/* Thermal and Power Management Leaf: level 0x00000006 (eax) */
++	if (c->cpuid_level >= 0x00000006)
++		c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
++
++	/* Additional Intel-defined flags: level 0x00000007 */
++	if (c->cpuid_level >= 0x00000007) {
++		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
++		c->x86_capability[CPUID_7_0_EBX] = ebx;
++		c->x86_capability[CPUID_7_ECX] = ecx;
++		c->x86_capability[CPUID_7_EDX] = edx;
++	}
++
++	/* Extended state features: level 0x0000000d */
++	if (c->cpuid_level >= 0x0000000d) {
++		cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
++
++		c->x86_capability[CPUID_D_1_EAX] = eax;
++	}
++
++	/* AMD-defined flags: level 0x80000001 */
++	eax = cpuid_eax(0x80000000);
++	c->extended_cpuid_level = eax;
++
++	if ((eax & 0xffff0000) == 0x80000000) {
++		if (eax >= 0x80000001) {
++			cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
++
++			c->x86_capability[CPUID_8000_0001_ECX] = ecx;
++			c->x86_capability[CPUID_8000_0001_EDX] = edx;
++		}
++	}
++
++	if (c->extended_cpuid_level >= 0x80000007) {
++		cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
++
++		c->x86_capability[CPUID_8000_0007_EBX] = ebx;
++		c->x86_power = edx;
++	}
++
++	if (c->extended_cpuid_level >= 0x80000008) {
++		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++		c->x86_capability[CPUID_8000_0008_EBX] = ebx;
++	}
++
++	if (c->extended_cpuid_level >= 0x8000000a)
++		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
++
++	init_scattered_cpuid_features(c);
++	init_speculation_control(c);
++	init_cqm(c);
++
++	/*
++	 * Clear/Set all flags overridden by options, after probe.
++	 * This needs to happen each time we re-probe, which may happen
++	 * several times during CPU initialization.
++	 */
++	apply_forced_caps(c);
++}
++
++void get_cpu_address_sizes(struct cpuinfo_x86 *c)
++{
++	u32 eax, ebx, ecx, edx;
++
++	if (c->extended_cpuid_level >= 0x80000008) {
++		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
++
++		c->x86_virt_bits = (eax >> 8) & 0xff;
++		c->x86_phys_bits = eax & 0xff;
++	}
++#ifdef CONFIG_X86_32
++	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
++		c->x86_phys_bits = 36;
++#endif
++	c->x86_cache_bits = c->x86_phys_bits;
++}
++
++static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_X86_32
++	int i;
++
++	/*
++	 * First of all, decide if this is a 486 or higher
++	 * It's a 486 if we can modify the AC flag
++	 */
++	if (flag_is_changeable_p(X86_EFLAGS_AC))
++		c->x86 = 4;
++	else
++		c->x86 = 3;
++
++	for (i = 0; i < X86_VENDOR_NUM; i++)
++		if (cpu_devs[i] && cpu_devs[i]->c_identify) {
++			c->x86_vendor_id[0] = 0;
++			cpu_devs[i]->c_identify(c);
++			if (c->x86_vendor_id[0]) {
++				get_cpu_vendor(c);
++				break;
++			}
++		}
++#endif
++}
++
++#define NO_SPECULATION		BIT(0)
++#define NO_MELTDOWN		BIT(1)
++#define NO_SSB			BIT(2)
++#define NO_L1TF			BIT(3)
++#define NO_MDS			BIT(4)
++#define MSBDS_ONLY		BIT(5)
++#define NO_SWAPGS		BIT(6)
++#define NO_ITLB_MULTIHIT	BIT(7)
++
++#define VULNWL(_vendor, _family, _model, _whitelist)	\
++	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
++
++#define VULNWL_INTEL(model, whitelist)		\
++	VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
++
++#define VULNWL_AMD(family, whitelist)		\
++	VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
++
++#define VULNWL_HYGON(family, whitelist)		\
++	VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
++
++static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
++	VULNWL(ANY,	4, X86_MODEL_ANY,	NO_SPECULATION),
++	VULNWL(CENTAUR,	5, X86_MODEL_ANY,	NO_SPECULATION),
++	VULNWL(INTEL,	5, X86_MODEL_ANY,	NO_SPECULATION),
++	VULNWL(NSC,	5, X86_MODEL_ANY,	NO_SPECULATION),
++
++	/* Intel Family 6 */
++	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++
++	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++
++	VULNWL_INTEL(CORE_YONAH,		NO_SSB),
++
++	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++
++	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++
++	/*
++	 * Technically, swapgs isn't serializing on AMD (despite it previously
++	 * being documented as such in the APM).  But according to AMD, %gs is
++	 * updated non-speculatively, and the issuing of %gs-relative memory
++	 * operands will be blocked until the %gs update completes, which is
++	 * good enough for our purposes.
++	 */
++
++	VULNWL_INTEL(ATOM_TREMONT_X,		NO_ITLB_MULTIHIT),
++
++	/* AMD Family 0xf - 0x12 */
++	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++
++	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
++	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_HYGON(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	{}
++};
++
++#define VULNBL_INTEL_STEPPINGS(model, steppings, issues)		   \
++	X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6,		   \
++					    INTEL_FAM6_##model, steppings, \
++					    X86_FEATURE_ANY, issues)
++
++#define SRBDS		BIT(0)
++
++static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
++	VULNBL_INTEL_STEPPINGS(IVYBRIDGE,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(HASWELL_CORE,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(HASWELL_ULT,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(HASWELL_GT3E,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(BROADWELL_GT3E,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(BROADWELL_CORE,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(SKYLAKE_MOBILE,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(SKYLAKE_DESKTOP,	X86_STEPPING_ANY,		SRBDS),
++	VULNBL_INTEL_STEPPINGS(KABYLAKE_MOBILE,	X86_STEPPINGS(0x0, 0xC),	SRBDS),
++	VULNBL_INTEL_STEPPINGS(KABYLAKE_DESKTOP,X86_STEPPINGS(0x0, 0xD),	SRBDS),
++	{}
++};
++
++static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
++{
++	const struct x86_cpu_id *m = x86_match_cpu(table);
++
++	return m && !!(m->driver_data & which);
++}
++
++u64 x86_read_arch_cap_msr(void)
++{
++	u64 ia32_cap = 0;
++
++	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++	return ia32_cap;
++}
++
++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++{
++	u64 ia32_cap = x86_read_arch_cap_msr();
++
++	/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
++	if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
++	    !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
++		setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
++
++	if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
++		return;
++
++	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
++	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
++
++	if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
++	    !(ia32_cap & ARCH_CAP_SSB_NO) &&
++	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
++		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
++
++	if (ia32_cap & ARCH_CAP_IBRS_ALL)
++		setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
++
++	if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
++	    !(ia32_cap & ARCH_CAP_MDS_NO)) {
++		setup_force_cpu_bug(X86_BUG_MDS);
++		if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
++			setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
++	}
++
++	if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
++		setup_force_cpu_bug(X86_BUG_SWAPGS);
++
++	/*
++	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
++	 *	- TSX is supported or
++	 *	- TSX_CTRL is present
++	 *
++	 * TSX_CTRL check is needed for cases when TSX could be disabled before
++	 * the kernel boot e.g. kexec.
++	 * TSX_CTRL check alone is not sufficient for cases when the microcode
++	 * update is not present or running as guest that don't get TSX_CTRL.
++	 */
++	if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
++	    (cpu_has(c, X86_FEATURE_RTM) ||
++	     (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
++		setup_force_cpu_bug(X86_BUG_TAA);
++
++	/*
++	 * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
++	 * in the vulnerability blacklist.
++	 */
++	if ((cpu_has(c, X86_FEATURE_RDRAND) ||
++	     cpu_has(c, X86_FEATURE_RDSEED)) &&
++	    cpu_matches(cpu_vuln_blacklist, SRBDS))
++		    setup_force_cpu_bug(X86_BUG_SRBDS);
++
++	if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
++		return;
++
++	/* Rogue Data Cache Load? No! */
++	if (ia32_cap & ARCH_CAP_RDCL_NO)
++		return;
++
++	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
++
++	if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
++		return;
++
++	setup_force_cpu_bug(X86_BUG_L1TF);
++}
++
++/*
++ * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
++ * unfortunately, that's not true in practice because of early VIA
++ * chips and (more importantly) broken virtualizers that are not easy
++ * to detect. In the latter case it doesn't even *fail* reliably, so
++ * probing for it doesn't even work. Disable it completely on 32-bit
++ * unless we can find a reliable way to detect all the broken cases.
++ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
++ */
++static void detect_nopl(void)
++{
++#ifdef CONFIG_X86_32
++	setup_clear_cpu_cap(X86_FEATURE_NOPL);
++#else
++	setup_force_cpu_cap(X86_FEATURE_NOPL);
++#endif
++}
++
++/*
++ * Do minimum CPU detection early.
++ * Fields really needed: vendor, cpuid_level, family, model, mask,
++ * cache alignment.
++ * The others are not touched to avoid unwanted side effects.
++ *
++ * WARNING: this function is only called on the boot CPU.  Don't add code
++ * here that is supposed to run on all CPUs.
++ */
++static void __init early_identify_cpu(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_X86_64
++	c->x86_clflush_size = 64;
++	c->x86_phys_bits = 36;
++	c->x86_virt_bits = 48;
++#else
++	c->x86_clflush_size = 32;
++	c->x86_phys_bits = 32;
++	c->x86_virt_bits = 32;
++#endif
++	c->x86_cache_alignment = c->x86_clflush_size;
++
++	memset(&c->x86_capability, 0, sizeof c->x86_capability);
++	c->extended_cpuid_level = 0;
++
++	if (!have_cpuid_p())
++		identify_cpu_without_cpuid(c);
++
++	/* cyrix could have cpuid enabled via c_identify()*/
++	if (have_cpuid_p()) {
++		cpu_detect(c);
++		get_cpu_vendor(c);
++		get_cpu_cap(c);
++		get_cpu_address_sizes(c);
++		setup_force_cpu_cap(X86_FEATURE_CPUID);
++
++		if (this_cpu->c_early_init)
++			this_cpu->c_early_init(c);
++
++		c->cpu_index = 0;
++		filter_cpuid_features(c, false);
++
++		if (this_cpu->c_bsp_init)
++			this_cpu->c_bsp_init(c);
++	} else {
++		setup_clear_cpu_cap(X86_FEATURE_CPUID);
++	}
++
++	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
++
++	cpu_set_bug_bits(c);
++
++	fpu__init_system(c);
++
++#ifdef CONFIG_X86_32
++	/*
++	 * Regardless of whether PCID is enumerated, the SDM says
++	 * that it can't be enabled in 32-bit mode.
++	 */
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
++
++	/*
++	 * Later in the boot process pgtable_l5_enabled() relies on
++	 * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
++	 * enabled by this point we need to clear the feature bit to avoid
++	 * false-positives at the later stage.
++	 *
++	 * pgtable_l5_enabled() can be false here for several reasons:
++	 *  - 5-level paging is disabled compile-time;
++	 *  - it's 32-bit kernel;
++	 *  - machine doesn't support 5-level paging;
++	 *  - user specified 'no5lvl' in kernel command line.
++	 */
++	if (!pgtable_l5_enabled())
++		setup_clear_cpu_cap(X86_FEATURE_LA57);
++
++	detect_nopl();
++}
++
++void __init early_cpu_init(void)
++{
++	const struct cpu_dev *const *cdev;
++	int count = 0;
++
++#ifdef CONFIG_PROCESSOR_SELECT
++	pr_info("KERNEL supported cpus:\n");
++#endif
++
++	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
++		const struct cpu_dev *cpudev = *cdev;
++
++		if (count >= X86_VENDOR_NUM)
++			break;
++		cpu_devs[count] = cpudev;
++		count++;
++
++#ifdef CONFIG_PROCESSOR_SELECT
++		{
++			unsigned int j;
++
++			for (j = 0; j < 2; j++) {
++				if (!cpudev->c_ident[j])
++					continue;
++				pr_info("  %s %s\n", cpudev->c_vendor,
++					cpudev->c_ident[j]);
++			}
++		}
++#endif
++	}
++	early_identify_cpu(&boot_cpu_data);
++}
++
++static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_X86_64
++	/*
++	 * Empirically, writing zero to a segment selector on AMD does
++	 * not clear the base, whereas writing zero to a segment
++	 * selector on Intel does clear the base.  Intel's behavior
++	 * allows slightly faster context switches in the common case
++	 * where GS is unused by the prev and next threads.
++	 *
++	 * Since neither vendor documents this anywhere that I can see,
++	 * detect it directly instead of hardcoding the choice by
++	 * vendor.
++	 *
++	 * I've designated AMD's behavior as the "bug" because it's
++	 * counterintuitive and less friendly.
++	 */
++
++	unsigned long old_base, tmp;
++	rdmsrl(MSR_FS_BASE, old_base);
++	wrmsrl(MSR_FS_BASE, 1);
++	loadsegment(fs, 0);
++	rdmsrl(MSR_FS_BASE, tmp);
++	if (tmp != 0)
++		set_cpu_bug(c, X86_BUG_NULL_SEG);
++	wrmsrl(MSR_FS_BASE, old_base);
++#endif
++}
++
++static void generic_identify(struct cpuinfo_x86 *c)
++{
++	c->extended_cpuid_level = 0;
++
++	if (!have_cpuid_p())
++		identify_cpu_without_cpuid(c);
++
++	/* cyrix could have cpuid enabled via c_identify()*/
++	if (!have_cpuid_p())
++		return;
++
++	cpu_detect(c);
++
++	get_cpu_vendor(c);
++
++	get_cpu_cap(c);
++
++	get_cpu_address_sizes(c);
++
++	if (c->cpuid_level >= 0x00000001) {
++		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
++#ifdef CONFIG_X86_32
++# ifdef CONFIG_SMP
++		c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
++# else
++		c->apicid = c->initial_apicid;
++# endif
++#endif
++		c->phys_proc_id = c->initial_apicid;
++	}
++
++	get_model_name(c); /* Default name */
++
++	detect_null_seg_behavior(c);
++
++	/*
++	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
++	 * systems that run Linux at CPL > 0 may or may not have the
++	 * issue, but, even if they have the issue, there's absolutely
++	 * nothing we can do about it because we can't use the real IRET
++	 * instruction.
++	 *
++	 * NB: For the time being, only 32-bit kernels support
++	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
++	 * whether to apply espfix using paravirt hooks.  If any
++	 * non-paravirt system ever shows up that does *not* have the
++	 * ESPFIX issue, we can change this.
++	 */
++#ifdef CONFIG_X86_32
++# ifdef CONFIG_PARAVIRT
++	do {
++		extern void native_iret(void);
++		if (pv_cpu_ops.iret == native_iret)
++			set_cpu_bug(c, X86_BUG_ESPFIX);
++	} while (0);
++# else
++	set_cpu_bug(c, X86_BUG_ESPFIX);
++# endif
++#endif
++}
++
++static void x86_init_cache_qos(struct cpuinfo_x86 *c)
++{
++	/*
++	 * The heavy lifting of max_rmid and cache_occ_scale are handled
++	 * in get_cpu_cap().  Here we just set the max_rmid for the boot_cpu
++	 * in case CQM bits really aren't there in this CPU.
++	 */
++	if (c != &boot_cpu_data) {
++		boot_cpu_data.x86_cache_max_rmid =
++			min(boot_cpu_data.x86_cache_max_rmid,
++			    c->x86_cache_max_rmid);
++	}
++}
++
++/*
++ * Validate that ACPI/mptables have the same information about the
++ * effective APIC id and update the package map.
++ */
++static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	unsigned int apicid, cpu = smp_processor_id();
++
++	apicid = apic->cpu_present_to_apicid(cpu);
++
++	if (apicid != c->apicid) {
++		pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
++		       cpu, apicid, c->initial_apicid);
++	}
++	BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
++#else
++	c->logical_proc_id = 0;
++#endif
++}
++
++/*
++ * This does the hard work of actually picking apart the CPU stuff...
++ */
++static void identify_cpu(struct cpuinfo_x86 *c)
++{
++	int i;
++
++	c->loops_per_jiffy = loops_per_jiffy;
++	c->x86_cache_size = 0;
++	c->x86_vendor = X86_VENDOR_UNKNOWN;
++	c->x86_model = c->x86_stepping = 0;	/* So far unknown... */
++	c->x86_vendor_id[0] = '\0'; /* Unset */
++	c->x86_model_id[0] = '\0';  /* Unset */
++	c->x86_max_cores = 1;
++	c->x86_coreid_bits = 0;
++	c->cu_id = 0xff;
++#ifdef CONFIG_X86_64
++	c->x86_clflush_size = 64;
++	c->x86_phys_bits = 36;
++	c->x86_virt_bits = 48;
++#else
++	c->cpuid_level = -1;	/* CPUID not detected */
++	c->x86_clflush_size = 32;
++	c->x86_phys_bits = 32;
++	c->x86_virt_bits = 32;
++#endif
++	c->x86_cache_alignment = c->x86_clflush_size;
++	memset(&c->x86_capability, 0, sizeof c->x86_capability);
++
++	generic_identify(c);
++
++	if (this_cpu->c_identify)
++		this_cpu->c_identify(c);
++
++	/* Clear/Set all flags overridden by options, after probe */
++	apply_forced_caps(c);
++
++#ifdef CONFIG_X86_64
++	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
++#endif
++
++	/*
++	 * Vendor-specific initialization.  In this section we
++	 * canonicalize the feature flags, meaning if there are
++	 * features a certain CPU supports which CPUID doesn't
++	 * tell us, CPUID claiming incorrect flags, or other bugs,
++	 * we handle them here.
++	 *
++	 * At the end of this section, c->x86_capability better
++	 * indicate the features this CPU genuinely supports!
++	 */
++	if (this_cpu->c_init)
++		this_cpu->c_init(c);
++
++	/* Disable the PN if appropriate */
++	squash_the_stupid_serial_number(c);
++
++	/* Set up SMEP/SMAP/UMIP */
++	setup_smep(c);
++	setup_smap(c);
++	setup_umip(c);
++
++	/*
++	 * The vendor-specific functions might have changed features.
++	 * Now we do "generic changes."
++	 */
++
++	/* Filter out anything that depends on CPUID levels we don't have */
++	filter_cpuid_features(c, true);
++
++	/* If the model name is still unset, do table lookup. */
++	if (!c->x86_model_id[0]) {
++		const char *p;
++		p = table_lookup_model(c);
++		if (p)
++			strcpy(c->x86_model_id, p);
++		else
++			/* Last resort... */
++			sprintf(c->x86_model_id, "%02x/%02x",
++				c->x86, c->x86_model);
++	}
++
++#ifdef CONFIG_X86_64
++	detect_ht(c);
++#endif
++
++	x86_init_rdrand(c);
++	x86_init_cache_qos(c);
++	setup_pku(c);
++
++	/*
++	 * Clear/Set all flags overridden by options, need do it
++	 * before following smp all cpus cap AND.
++	 */
++	apply_forced_caps(c);
++
++	/*
++	 * On SMP, boot_cpu_data holds the common feature set between
++	 * all CPUs; so make sure that we indicate which features are
++	 * common between the CPUs.  The first time this routine gets
++	 * executed, c == &boot_cpu_data.
++	 */
++	if (c != &boot_cpu_data) {
++		/* AND the already accumulated flags with these */
++		for (i = 0; i < NCAPINTS; i++)
++			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
++
++		/* OR, i.e. replicate the bug flags */
++		for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++)
++			c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
++	}
++
++	/* Init Machine Check Exception if available. */
++	mcheck_cpu_init(c);
++
++	select_idle_routine(c);
++
++#ifdef CONFIG_NUMA
++	numa_add_cpu(smp_processor_id());
++#endif
++}
++
++/*
++ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
++ * on 32-bit kernels:
++ */
++#ifdef CONFIG_X86_32
++void enable_sep_cpu(void)
++{
++	struct tss_struct *tss;
++	int cpu;
++
++	if (!boot_cpu_has(X86_FEATURE_SEP))
++		return;
++
++	cpu = get_cpu();
++	tss = &per_cpu(cpu_tss_rw, cpu);
++
++	/*
++	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
++	 * see the big comment in struct x86_hw_tss's definition.
++	 */
++
++	tss->x86_tss.ss1 = __KERNEL_CS;
++	wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
++	wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
++	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
++
++	put_cpu();
++}
++#endif
++
++void __init identify_boot_cpu(void)
++{
++	identify_cpu(&boot_cpu_data);
++#ifdef CONFIG_X86_32
++	sysenter_setup();
++	enable_sep_cpu();
++#endif
++	cpu_detect_tlb(&boot_cpu_data);
++	tsx_init();
++}
++
++void identify_secondary_cpu(struct cpuinfo_x86 *c)
++{
++	BUG_ON(c == &boot_cpu_data);
++	identify_cpu(c);
++#ifdef CONFIG_X86_32
++	enable_sep_cpu();
++#endif
++	mtrr_ap_init();
++	validate_apic_and_package_id(c);
++	x86_spec_ctrl_setup_ap();
++	update_srbds_msr();
++}
++
++static __init int setup_noclflush(char *arg)
++{
++	setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
++	setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
++	return 1;
++}
++__setup("noclflush", setup_noclflush);
++
++void print_cpu_info(struct cpuinfo_x86 *c)
++{
++	const char *vendor = NULL;
++
++	if (c->x86_vendor < X86_VENDOR_NUM) {
++		vendor = this_cpu->c_vendor;
++	} else {
++		if (c->cpuid_level >= 0)
++			vendor = c->x86_vendor_id;
++	}
++
++	if (vendor && !strstr(c->x86_model_id, vendor))
++		pr_cont("%s ", vendor);
++
++	if (c->x86_model_id[0])
++		pr_cont("%s", c->x86_model_id);
++	else
++		pr_cont("%d86", c->x86);
++
++	pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
++
++	if (c->x86_stepping || c->cpuid_level >= 0)
++		pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
++	else
++		pr_cont(")\n");
++}
++
++/*
++ * clearcpuid= was already parsed in fpu__init_parse_early_param.
++ * But we need to keep a dummy __setup around otherwise it would
++ * show up as an environment variable for init.
++ */
++static __init int setup_clearcpuid(char *arg)
++{
++	return 1;
++}
++__setup("clearcpuid=", setup_clearcpuid);
++
++#ifdef CONFIG_X86_64
++DEFINE_PER_CPU_FIRST(union irq_stack_union,
++		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
++EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
++
++/*
++ * The following percpu variables are hot.  Align current_task to
++ * cacheline size such that they fall in the same cacheline.
++ */
++DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
++	&init_task;
++EXPORT_PER_CPU_SYMBOL(current_task);
++
++DEFINE_PER_CPU(char *, irq_stack_ptr) =
++	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
++
++DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
++
++DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
++EXPORT_PER_CPU_SYMBOL(__preempt_count);
++
++/* May not be marked __init: used by software suspend */
++void syscall_init(void)
++{
++	extern char _entry_trampoline[];
++	extern char entry_SYSCALL_64_trampoline[];
++
++	int cpu = smp_processor_id();
++	unsigned long SYSCALL64_entry_trampoline =
++		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
++		(entry_SYSCALL_64_trampoline - _entry_trampoline);
++
++	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
++	if (static_cpu_has(X86_FEATURE_PTI))
++		wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
++	else
++		wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
++
++#ifdef CONFIG_IA32_EMULATION
++	wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
++	/*
++	 * This only works on Intel CPUs.
++	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
++	 * This does not cause SYSENTER to jump to the wrong location, because
++	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
++	 */
++	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
++	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
++	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
++#else
++	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
++	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
++	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
++	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
++#endif
++
++	/* Flags to clear on syscall */
++	wrmsrl(MSR_SYSCALL_MASK,
++	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
++	       X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
++}
++
++/*
++ * Copies of the original ist values from the tss are only accessed during
++ * debugging, no special alignment required.
++ */
++DEFINE_PER_CPU(struct orig_ist, orig_ist);
++
++static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
++DEFINE_PER_CPU(int, debug_stack_usage);
++
++int is_debug_stack(unsigned long addr)
++{
++	return __this_cpu_read(debug_stack_usage) ||
++		(addr <= __this_cpu_read(debug_stack_addr) &&
++		 addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
++}
++NOKPROBE_SYMBOL(is_debug_stack);
++
++DEFINE_PER_CPU(u32, debug_idt_ctr);
++
++void debug_stack_set_zero(void)
++{
++	this_cpu_inc(debug_idt_ctr);
++	load_current_idt();
++}
++NOKPROBE_SYMBOL(debug_stack_set_zero);
++
++void debug_stack_reset(void)
++{
++	if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
++		return;
++	if (this_cpu_dec_return(debug_idt_ctr) == 0)
++		load_current_idt();
++}
++NOKPROBE_SYMBOL(debug_stack_reset);
++
++#else	/* CONFIG_X86_64 */
++
++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
++EXPORT_PER_CPU_SYMBOL(current_task);
++DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
++EXPORT_PER_CPU_SYMBOL(__preempt_count);
++
++/*
++ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
++ * the top of the kernel stack.  Use an extra percpu variable to track the
++ * top of the kernel stack directly.
++ */
++DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
++	(unsigned long)&init_thread_union + THREAD_SIZE;
++EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
++
++#ifdef CONFIG_STACKPROTECTOR
++DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
++#endif
++
++#endif	/* CONFIG_X86_64 */
++
++/*
++ * Clear all 6 debug registers:
++ */
++static void clear_all_debug_regs(void)
++{
++	int i;
++
++	for (i = 0; i < 8; i++) {
++		/* Ignore db4, db5 */
++		if ((i == 4) || (i == 5))
++			continue;
++
++		set_debugreg(0, i);
++	}
++}
++
++#ifdef CONFIG_KGDB
++/*
++ * Restore debug regs if using kgdbwait and you have a kernel debugger
++ * connection established.
++ */
++static void dbg_restore_debug_regs(void)
++{
++	if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break))
++		arch_kgdb_ops.correct_hw_break();
++}
++#else /* ! CONFIG_KGDB */
++#define dbg_restore_debug_regs()
++#endif /* ! CONFIG_KGDB */
++
++static void wait_for_master_cpu(int cpu)
++{
++#ifdef CONFIG_SMP
++	/*
++	 * wait for ACK from master CPU before continuing
++	 * with AP initialization
++	 */
++	WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
++	while (!cpumask_test_cpu(cpu, cpu_callout_mask))
++		cpu_relax();
++#endif
++}
++
++/*
++ * cpu_init() initializes state that is per-CPU. Some data is already
++ * initialized (naturally) in the bootstrap process, such as the GDT
++ * and IDT. We reload them nevertheless, this function acts as a
++ * 'CPU state barrier', nothing should get across.
++ * A lot of state is already set up in PDA init for 64 bit
++ */
++#ifdef CONFIG_X86_64
++
++void cpu_init(void)
++{
++	struct orig_ist *oist;
++	struct task_struct *me;
++	struct tss_struct *t;
++	unsigned long v;
++	int cpu = raw_smp_processor_id();
++	int i;
++
++	wait_for_master_cpu(cpu);
++
++	/*
++	 * Initialize the CR4 shadow before doing anything that could
++	 * try to read it.
++	 */
++	cr4_init_shadow();
++
++	if (cpu)
++		load_ucode_ap();
++
++	t = &per_cpu(cpu_tss_rw, cpu);
++	oist = &per_cpu(orig_ist, cpu);
++
++#ifdef CONFIG_NUMA
++	if (this_cpu_read(numa_node) == 0 &&
++	    early_cpu_to_node(cpu) != NUMA_NO_NODE)
++		set_numa_node(early_cpu_to_node(cpu));
++#endif
++
++	me = current;
++
++	pr_debug("Initializing CPU#%d\n", cpu);
++
++	cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
++
++	/*
++	 * Initialize the per-CPU GDT with the boot GDT,
++	 * and set up the GDT descriptor:
++	 */
++
++	switch_to_new_gdt(cpu);
++	loadsegment(fs, 0);
++
++	load_current_idt();
++
++	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
++	syscall_init();
++
++	wrmsrl(MSR_FS_BASE, 0);
++	wrmsrl(MSR_KERNEL_GS_BASE, 0);
++	barrier();
++
++	x86_configure_nx();
++	x2apic_setup();
++
++	/*
++	 * set up and load the per-CPU TSS
++	 */
++	if (!oist->ist[0]) {
++		char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
++
++		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
++			estacks += exception_stack_sizes[v];
++			oist->ist[v] = t->x86_tss.ist[v] =
++					(unsigned long)estacks;
++			if (v == DEBUG_STACK-1)
++				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
++		}
++	}
++
++	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
++
++	/*
++	 * <= is required because the CPU will access up to
++	 * 8 bits beyond the end of the IO permission bitmap.
++	 */
++	for (i = 0; i <= IO_BITMAP_LONGS; i++)
++		t->io_bitmap[i] = ~0UL;
++
++	mmgrab(&init_mm);
++	me->active_mm = &init_mm;
++	BUG_ON(me->mm);
++	initialize_tlbstate_and_flush();
++	enter_lazy_tlb(&init_mm, me);
++
++	/*
++	 * Initialize the TSS.  sp0 points to the entry trampoline stack
++	 * regardless of what task is running.
++	 */
++	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
++	load_TR_desc();
++	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
++
++	load_mm_ldt(&init_mm);
++
++	clear_all_debug_regs();
++	dbg_restore_debug_regs();
++
++	fpu__init_cpu();
++
++	if (is_uv_system())
++		uv_cpu_init();
++
++	load_fixmap_gdt(cpu);
++}
++
++#else
++
++void cpu_init(void)
++{
++	int cpu = smp_processor_id();
++	struct task_struct *curr = current;
++	struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
++
++	wait_for_master_cpu(cpu);
++
++	/*
++	 * Initialize the CR4 shadow before doing anything that could
++	 * try to read it.
++	 */
++	cr4_init_shadow();
++
++	show_ucode_info_early();
++
++	pr_info("Initializing CPU#%d\n", cpu);
++
++	if (cpu_feature_enabled(X86_FEATURE_VME) ||
++	    boot_cpu_has(X86_FEATURE_TSC) ||
++	    boot_cpu_has(X86_FEATURE_DE))
++		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
++
++	load_current_idt();
++	switch_to_new_gdt(cpu);
++
++	/*
++	 * Set up and load the per-CPU TSS and LDT
++	 */
++	mmgrab(&init_mm);
++	curr->active_mm = &init_mm;
++	BUG_ON(curr->mm);
++	initialize_tlbstate_and_flush();
++	enter_lazy_tlb(&init_mm, curr);
++
++	/*
++	 * Initialize the TSS.  sp0 points to the entry trampoline stack
++	 * regardless of what task is running.
++	 */
++	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
++	load_TR_desc();
++	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
++
++	load_mm_ldt(&init_mm);
++
++	t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
++
++#ifdef CONFIG_DOUBLEFAULT
++	/* Set up doublefault TSS pointer in the GDT */
++	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
++#endif
++
++	clear_all_debug_regs();
++	dbg_restore_debug_regs();
++
++	fpu__init_cpu();
++
++	load_fixmap_gdt(cpu);
++}
++#endif
++
++static void bsp_resume(void)
++{
++	if (this_cpu->c_bsp_resume)
++		this_cpu->c_bsp_resume(&boot_cpu_data);
++}
++
++static struct syscore_ops cpu_syscore_ops = {
++	.resume		= bsp_resume,
++};
++
++static int __init init_cpu_syscore(void)
++{
++	register_syscore_ops(&cpu_syscore_ops);
++	return 0;
++}
++core_initcall(init_cpu_syscore);
++
++/*
++ * The microcode loader calls this upon late microcode load to recheck features,
++ * only when microcode has been updated. Caller holds microcode_mutex and CPU
++ * hotplug lock.
++ */
++void microcode_check(void)
++{
++	struct cpuinfo_x86 info;
++
++	perf_check_microcode();
++
++	/* Reload CPUID max function as it might've changed. */
++	info.cpuid_level = cpuid_eax(0);
++
++	/*
++	 * Copy all capability leafs to pick up the synthetic ones so that
++	 * memcmp() below doesn't fail on that. The ones coming from CPUID will
++	 * get overwritten in get_cpu_cap().
++	 */
++	memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
++
++	get_cpu_cap(&info);
++
++	if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
++		return;
++
++	pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
++	pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
++}
+diff -uprN kernel/arch/x86/kernel/cpu/mtrr/cyrix.c kernel_new/arch/x86/kernel/cpu/mtrr/cyrix.c
+--- kernel/arch/x86/kernel/cpu/mtrr/cyrix.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/cpu/mtrr/cyrix.c	2021-04-01 18:28:07.655863287 +0800
+@@ -19,7 +19,7 @@ cyrix_get_arr(unsigned int reg, unsigned
+ 
+ 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ 	ccr3 = getCx86(CX86_CCR3);
+ 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
+@@ -29,7 +29,7 @@ cyrix_get_arr(unsigned int reg, unsigned
+ 	rcr = getCx86(CX86_RCR_BASE + reg);
+ 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
+ 
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 
+ 	shift = ((unsigned char *) base)[1] & 0x0f;
+ 	*base >>= PAGE_SHIFT;
+@@ -179,6 +179,7 @@ static void cyrix_set_arr(unsigned int r
+ 			  unsigned long size, mtrr_type type)
+ {
+ 	unsigned char arr, arr_type, arr_size;
++	unsigned long flags;
+ 
+ 	arr = CX86_ARR_BASE + (reg << 1) + reg;	/* avoid multiplication by 3 */
+ 
+@@ -222,6 +223,8 @@ static void cyrix_set_arr(unsigned int r
+ 		}
+ 	}
+ 
++	flags = hard_local_irq_save();
++
+ 	prepare_set();
+ 
+ 	base <<= PAGE_SHIFT;
+@@ -231,6 +234,8 @@ static void cyrix_set_arr(unsigned int r
+ 	setCx86(CX86_RCR_BASE + reg, arr_type);
+ 
+ 	post_set();
++
++	hard_local_irq_restore(flags);
+ }
+ 
+ typedef struct {
+@@ -248,8 +253,10 @@ static unsigned char ccr_state[7] = { 0,
+ 
+ static void cyrix_set_all(void)
+ {
++	unsigned long flags;
+ 	int i;
+ 
++	flags = hard_local_irq_save();
+ 	prepare_set();
+ 
+ 	/* the CCRs are not contiguous */
+@@ -264,6 +271,7 @@ static void cyrix_set_all(void)
+ 	}
+ 
+ 	post_set();
++	hard_local_irq_restore(flags);
+ }
+ 
+ static const struct mtrr_ops cyrix_mtrr_ops = {
+diff -uprN kernel/arch/x86/kernel/cpu/mtrr/generic.c kernel_new/arch/x86/kernel/cpu/mtrr/generic.c
+--- kernel/arch/x86/kernel/cpu/mtrr/generic.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/cpu/mtrr/generic.c	2021-04-01 18:28:07.655863287 +0800
+@@ -785,7 +785,7 @@ static void generic_set_all(void)
+ 	unsigned long mask, count;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	prepare_set();
+ 
+ 	/* Actually set the state */
+@@ -795,7 +795,7 @@ static void generic_set_all(void)
+ 	pat_init();
+ 
+ 	post_set();
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 
+ 	/* Use the atomic bitops to update the global mask */
+ 	for (count = 0; count < sizeof mask * 8; ++count) {
+@@ -819,12 +819,13 @@ static void generic_set_all(void)
+ static void generic_set_mtrr(unsigned int reg, unsigned long base,
+ 			     unsigned long size, mtrr_type type)
+ {
+-	unsigned long flags;
++	unsigned long rflags, vflags;
+ 	struct mtrr_var_range *vr;
+ 
+ 	vr = &mtrr_state.var_ranges[reg];
+ 
+-	local_irq_save(flags);
++	local_irq_save(vflags);
++	rflags = hard_local_irq_save();
+ 	prepare_set();
+ 
+ 	if (size == 0) {
+@@ -845,7 +846,8 @@ static void generic_set_mtrr(unsigned in
+ 	}
+ 
+ 	post_set();
+-	local_irq_restore(flags);
++	hard_local_irq_restore(rflags);
++	local_irq_restore(vflags);
+ }
+ 
+ int generic_validate_add_page(unsigned long base, unsigned long size,
+diff -uprN kernel/arch/x86/kernel/fpu/core.c kernel_new/arch/x86/kernel/fpu/core.c
+--- kernel/arch/x86/kernel/fpu/core.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/fpu/core.c	2021-04-01 18:28:07.655863287 +0800
+@@ -35,30 +35,13 @@ union fpregs_state init_fpstate __read_m
+  *
+  *   - to debug kernel_fpu_begin()/end() correctness
+  */
+-static DEFINE_PER_CPU(bool, in_kernel_fpu);
++DEFINE_PER_CPU(bool, in_kernel_fpu);
+ 
+ /*
+  * Track which context is using the FPU on the CPU:
+  */
+ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+ 
+-static void kernel_fpu_disable(void)
+-{
+-	WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
+-	this_cpu_write(in_kernel_fpu, true);
+-}
+-
+-static void kernel_fpu_enable(void)
+-{
+-	WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+-	this_cpu_write(in_kernel_fpu, false);
+-}
+-
+-static bool kernel_fpu_disabled(void)
+-{
+-	return this_cpu_read(in_kernel_fpu);
+-}
+-
+ static bool interrupted_kernel_fpu_idle(void)
+ {
+ 	return !kernel_fpu_disabled();
+@@ -96,9 +79,11 @@ EXPORT_SYMBOL(irq_fpu_usable);
+ static void __kernel_fpu_begin(void)
+ {
+ 	struct fpu *fpu = &current->thread.fpu;
++	unsigned long flags;
+ 
+ 	WARN_ON_FPU(!irq_fpu_usable());
+ 
++	flags = hard_cond_local_irq_save();
+ 	kernel_fpu_disable();
+ 
+ 	if (fpu->initialized) {
+@@ -110,16 +95,20 @@ static void __kernel_fpu_begin(void)
+ 	} else {
+ 		__cpu_invalidate_fpregs_state();
+ 	}
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void __kernel_fpu_end(void)
+ {
+ 	struct fpu *fpu = &current->thread.fpu;
++	unsigned long flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	if (fpu->initialized)
+ 		copy_kernel_to_fpregs(&fpu->state);
+ 
+ 	kernel_fpu_enable();
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ void kernel_fpu_begin(void)
+@@ -143,9 +132,11 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end);
+  */
+ void fpu__save(struct fpu *fpu)
+ {
++	unsigned long flags;
++
+ 	WARN_ON_FPU(fpu != &current->thread.fpu);
+ 
+-	preempt_disable();
++	flags = hard_preempt_disable();
+ 	trace_x86_fpu_before_save(fpu);
+ 	if (fpu->initialized) {
+ 		if (!copy_fpregs_to_fpstate(fpu)) {
+@@ -153,7 +144,7 @@ void fpu__save(struct fpu *fpu)
+ 		}
+ 	}
+ 	trace_x86_fpu_after_save(fpu);
+-	preempt_enable();
++	hard_preempt_enable(flags);
+ }
+ EXPORT_SYMBOL_GPL(fpu__save);
+ 
+@@ -315,6 +306,9 @@ void fpu__prepare_write(struct fpu *fpu)
+  */
+ void fpu__restore(struct fpu *fpu)
+ {
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
+ 	fpu__initialize(fpu);
+ 
+ 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
+@@ -324,9 +318,18 @@ void fpu__restore(struct fpu *fpu)
+ 	copy_kernel_to_fpregs(&fpu->state);
+ 	trace_x86_fpu_after_restore(fpu);
+ 	kernel_fpu_enable();
++	hard_local_irq_restore(flags);
+ }
+ EXPORT_SYMBOL_GPL(fpu__restore);
+ 
++#ifdef CONFIG_IPIPE
++#define FWAIT_PROLOGUE "sti\n"
++#define FWAIT_EPILOGUE "cli\n"
++#else
++#define FWAIT_PROLOGUE
++#define FWAIT_EPILOGUE
++#endif
++
+ /*
+  * Drops current FPU state: deactivates the fpregs and
+  * the fpstate. NOTE: it still leaves previous contents
+@@ -338,13 +341,16 @@ EXPORT_SYMBOL_GPL(fpu__restore);
+  */
+ void fpu__drop(struct fpu *fpu)
+ {
+-	preempt_disable();
++	unsigned long flags;
+ 
++	flags = hard_preempt_disable();
+ 	if (fpu == &current->thread.fpu) {
+ 		if (fpu->initialized) {
+ 			/* Ignore delayed exceptions from user space */
+-			asm volatile("1: fwait\n"
++			asm volatile(FWAIT_PROLOGUE
++				     "1: fwait\n"
+ 				     "2:\n"
++				     FWAIT_EPILOGUE
+ 				     _ASM_EXTABLE(1b, 2b));
+ 			fpregs_deactivate(fpu);
+ 		}
+@@ -354,7 +360,7 @@ void fpu__drop(struct fpu *fpu)
+ 
+ 	trace_x86_fpu_dropped(fpu);
+ 
+-	preempt_enable();
++	hard_preempt_enable(flags);
+ }
+ 
+ /*
+@@ -382,6 +388,8 @@ static inline void copy_init_fpstate_to_
+  */
+ void fpu__clear(struct fpu *fpu)
+ {
++	unsigned long flags;
++
+ 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
+ 
+ 	fpu__drop(fpu);
+@@ -390,11 +398,11 @@ void fpu__clear(struct fpu *fpu)
+ 	 * Make sure fpstate is cleared and initialized.
+ 	 */
+ 	if (static_cpu_has(X86_FEATURE_FPU)) {
+-		preempt_disable();
++		flags = hard_local_irq_save();
+ 		fpu__initialize(fpu);
+ 		user_fpu_begin();
+ 		copy_init_fpstate_to_fpregs();
+-		preempt_enable();
++		hard_local_irq_restore(flags);
+ 	}
+ }
+ 
+diff -uprN kernel/arch/x86/kernel/i8259.c kernel_new/arch/x86/kernel/i8259.c
+--- kernel/arch/x86/kernel/i8259.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/i8259.c	2021-04-01 18:28:07.655863287 +0800
+@@ -33,7 +33,7 @@
+ static void init_8259A(int auto_eoi);
+ 
+ static int i8259A_auto_eoi;
+-DEFINE_RAW_SPINLOCK(i8259A_lock);
++IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock);
+ 
+ /*
+  * 8259A PIC functions to handle ISA devices:
+@@ -61,6 +61,7 @@ static void mask_8259A_irq(unsigned int
+ 	unsigned long flags;
+ 
+ 	raw_spin_lock_irqsave(&i8259A_lock, flags);
++	ipipe_lock_irq(irq);
+ 	cached_irq_mask |= mask;
+ 	if (irq & 8)
+ 		outb(cached_slave_mask, PIC_SLAVE_IMR);
+@@ -76,15 +77,18 @@ static void disable_8259A_irq(struct irq
+ 
+ static void unmask_8259A_irq(unsigned int irq)
+ {
+-	unsigned int mask = ~(1 << irq);
++	unsigned int mask = (1 << irq);
+ 	unsigned long flags;
+ 
+ 	raw_spin_lock_irqsave(&i8259A_lock, flags);
+-	cached_irq_mask &= mask;
+-	if (irq & 8)
+-		outb(cached_slave_mask, PIC_SLAVE_IMR);
+-	else
+-		outb(cached_master_mask, PIC_MASTER_IMR);
++	if (cached_irq_mask & mask) {
++		cached_irq_mask &= ~mask;
++		if (irq & 8)
++			outb(cached_slave_mask, PIC_SLAVE_IMR);
++		else
++			outb(cached_master_mask, PIC_MASTER_IMR);
++		ipipe_unlock_irq(irq);
++	}
+ 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+ }
+ 
+@@ -171,6 +175,18 @@ static void mask_and_ack_8259A(struct ir
+ 	 */
+ 	if (cached_irq_mask & irqmask)
+ 		goto spurious_8259A_irq;
++#ifdef CONFIG_IPIPE
++	if (irq == 0) {
++		/*
++		 * Fast timer ack -- don't mask (unless supposedly
++		 * spurious). We trace outb's in order to detect
++		 * broken hardware inducing large delays.
++		 */
++		outb(0x60, PIC_MASTER_CMD);	/* Specific EOI to master. */
++		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
++		return;
++	}
++#endif /* CONFIG_IPIPE */
+ 	cached_irq_mask |= irqmask;
+ 
+ handle_real_irq:
+@@ -227,6 +243,7 @@ struct irq_chip i8259A_chip = {
+ 	.irq_disable	= disable_8259A_irq,
+ 	.irq_unmask	= enable_8259A_irq,
+ 	.irq_mask_ack	= mask_and_ack_8259A,
++	.flags		= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static char irq_trigger[2];
+diff -uprN kernel/arch/x86/kernel/idt.c kernel_new/arch/x86/kernel/idt.c
+--- kernel/arch/x86/kernel/idt.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/idt.c	2021-04-01 18:28:07.655863287 +0800
+@@ -116,6 +116,10 @@ static const __initconst struct idt_data
+ 	INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
+ 	INTG(IRQ_MOVE_CLEANUP_VECTOR,	irq_move_cleanup_interrupt),
+ 	INTG(REBOOT_VECTOR,		reboot_interrupt),
++#ifdef CONFIG_IPIPE
++	INTG(IPIPE_RESCHEDULE_VECTOR,	ipipe_reschedule_interrupt),
++	INTG(IPIPE_CRITICAL_VECTOR,	ipipe_critical_interrupt),
++#endif
+ #endif
+ 
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+@@ -146,6 +150,9 @@ static const __initconst struct idt_data
+ #endif
+ 	INTG(SPURIOUS_APIC_VECTOR,	spurious_interrupt),
+ 	INTG(ERROR_APIC_VECTOR,		error_interrupt),
++#ifdef CONFIG_IPIPE
++	INTG(IPIPE_HRTIMER_VECTOR,	ipipe_hrtimer_interrupt),
++#endif
+ #endif
+ };
+ 
+@@ -310,9 +317,26 @@ void __init idt_setup_apic_and_irq_gates
+ {
+ 	int i = FIRST_EXTERNAL_VECTOR;
+ 	void *entry;
++	unsigned int __maybe_unused cpu, ret;
+ 
+ 	idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
+ 
++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
++	/*
++	 * The cleanup vector is not part of the system vector range
++	 * but rather belongs to the external IRQ range, however we
++	 * still need to map it early to a legit interrupt number for
++	 * pipelining. Allocate a specific descriptor manually for it,
++	 * using IRQ_MOVE_CLEANUP_VECTOR as both the vector number and
++	 * interrupt number, so that we know the latter at build time.
++	 */
++	ret = irq_alloc_descs(IRQ_MOVE_CLEANUP_VECTOR, 0, 1, 0);
++	BUG_ON(IRQ_MOVE_CLEANUP_VECTOR != ret);
++	for_each_possible_cpu(cpu)
++		per_cpu(vector_irq, cpu)[IRQ_MOVE_CLEANUP_VECTOR] =
++			irq_to_desc(IRQ_MOVE_CLEANUP_VECTOR);
++#endif
++
+ 	for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
+ 		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+ 		set_intr_gate(i, entry);
+diff -uprN kernel/arch/x86/kernel/ipipe.c kernel_new/arch/x86/kernel/ipipe.c
+--- kernel/arch/x86/kernel/ipipe.c	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/ipipe.c	2021-04-01 18:28:07.655863287 +0800
+@@ -0,0 +1,564 @@
++/*   -*- linux-c -*-
++ *   linux/arch/x86/kernel/ipipe.c
++ *
++ *   Copyright (C) 2002-2012 Philippe Gerum.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ *   Architecture-dependent I-PIPE support for x86.
++ */
++
++#include <linux/kernel.h>
++#include <linux/smp.h>
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/sched/debug.h>
++#include <linux/interrupt.h>
++#include <linux/slab.h>
++#include <linux/irq.h>
++#include <linux/clockchips.h>
++#include <linux/kprobes.h>
++#include <linux/mm.h>
++#include <linux/extable.h>
++#include <linux/ipipe_tickdev.h>
++#include <asm/asm-offsets.h>
++#include <asm/unistd.h>
++#include <asm/processor.h>
++#include <asm/atomic.h>
++#include <asm/hw_irq.h>
++#include <asm/irq.h>
++#include <asm/desc.h>
++#include <asm/io.h>
++#ifdef CONFIG_X86_LOCAL_APIC
++#include <asm/tlbflush.h>
++#include <asm/fixmap.h>
++#include <asm/bitops.h>
++#include <asm/mpspec.h>
++#ifdef CONFIG_X86_IO_APIC
++#include <asm/io_apic.h>
++#endif	/* CONFIG_X86_IO_APIC */
++#include <asm/apic.h>
++#endif	/* CONFIG_X86_LOCAL_APIC */
++#include <asm/fpu/internal.h>
++#include <asm/traps.h>
++#include <asm/tsc.h>
++#include <asm/mce.h>
++#include <asm/mmu_context.h>
++
++void smp_apic_timer_interrupt(struct pt_regs *regs);
++void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs);
++void smp_kvm_posted_intr_ipi(struct pt_regs *regs);
++void smp_spurious_interrupt(struct pt_regs *regs);
++void smp_error_interrupt(struct pt_regs *regs);
++void smp_x86_platform_ipi(struct pt_regs *regs);
++void smp_irq_work_interrupt(struct pt_regs *regs);
++void smp_reschedule_interrupt(struct pt_regs *regs);
++void smp_call_function_interrupt(struct pt_regs *regs);
++void smp_call_function_single_interrupt(struct pt_regs *regs);
++void smp_irq_move_cleanup_interrupt(struct pt_regs *regs);
++void smp_reboot_interrupt(void);
++void smp_thermal_interrupt(struct pt_regs *regs);
++void smp_threshold_interrupt(struct pt_regs *regs);
++
++DEFINE_PER_CPU(unsigned long, __ipipe_cr2);
++EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2);
++
++int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
++{
++	info->sys_nr_cpus = num_online_cpus();
++	info->sys_cpu_freq = __ipipe_cpu_freq;
++	info->sys_hrtimer_irq = per_cpu(ipipe_percpu.hrtimer_irq, 0);
++	info->sys_hrtimer_freq = __ipipe_hrtimer_freq;
++	info->sys_hrclock_freq = __ipipe_hrclock_freq;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(ipipe_get_sysinfo);
++
++static void __ipipe_do_IRQ(unsigned int irq, void *cookie)
++{
++	void (*handler)(struct pt_regs *regs);
++	struct pt_regs *regs;
++
++	regs = raw_cpu_ptr(&ipipe_percpu.tick_regs);
++	regs->orig_ax = ~__ipipe_get_irq_vector(irq);
++	handler = (typeof(handler))cookie;
++	handler(regs);
++}
++
++#ifdef CONFIG_X86_LOCAL_APIC
++
++static void __ipipe_noack_apic(struct irq_desc *desc)
++{
++}
++
++static void __ipipe_ack_apic(struct irq_desc *desc)
++{
++	__ack_APIC_irq();
++}
++
++#endif	/* CONFIG_X86_LOCAL_APIC */
++
++/*
++ * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
++ * interrupts are off, and secondary CPUs are still lost in space.
++ */
++void __init __ipipe_enable_pipeline(void)
++{
++	unsigned int irq;
++
++#ifdef CONFIG_X86_LOCAL_APIC
++
++	/* Map the APIC system vectors. */
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
++			  __ipipe_do_IRQ, smp_apic_timer_interrupt,
++			  __ipipe_ack_apic);
++
++#ifdef CONFIG_HAVE_KVM
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(POSTED_INTR_WAKEUP_VECTOR),
++			  __ipipe_do_IRQ, smp_kvm_posted_intr_wakeup_ipi,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(POSTED_INTR_VECTOR),
++			  __ipipe_do_IRQ, smp_kvm_posted_intr_ipi,
++			  __ipipe_ack_apic);
++#endif
++
++#if defined(CONFIG_X86_MCE_AMD) && defined(CONFIG_X86_64)
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(DEFERRED_ERROR_VECTOR),
++			  __ipipe_do_IRQ, smp_deferred_error_interrupt,
++			  __ipipe_ack_apic);
++#endif
++
++#ifdef CONFIG_X86_UV
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(UV_BAU_MESSAGE),
++			  __ipipe_do_IRQ, uv_bau_message_interrupt,
++			  __ipipe_ack_apic);
++#endif /* CONFIG_X86_UV */
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
++			  __ipipe_do_IRQ, smp_spurious_interrupt,
++			  __ipipe_noack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
++			  __ipipe_do_IRQ, smp_error_interrupt,
++			  __ipipe_ack_apic);
++
++#ifdef CONFIG_X86_THERMAL_VECTOR
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
++			  __ipipe_do_IRQ, smp_thermal_interrupt,
++			  __ipipe_ack_apic);
++#endif /* CONFIG_X86_THERMAL_VECTOR */
++
++#ifdef CONFIG_X86_MCE_THRESHOLD
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR),
++			  __ipipe_do_IRQ, smp_threshold_interrupt,
++			  __ipipe_ack_apic);
++#endif /* CONFIG_X86_MCE_THRESHOLD */
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR),
++			  __ipipe_do_IRQ, smp_x86_platform_ipi,
++			  __ipipe_ack_apic);
++
++	/*
++	 * We expose two high priority APIC vectors the head domain
++	 * may use respectively for hires timing and SMP rescheduling.
++	 * We should never receive them in the root domain.
++	 */
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(IPIPE_HRTIMER_VECTOR),
++			  __ipipe_do_IRQ, smp_spurious_interrupt,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(IPIPE_RESCHEDULE_VECTOR),
++			  __ipipe_do_IRQ, smp_spurious_interrupt,
++			  __ipipe_ack_apic);
++
++#ifdef CONFIG_IRQ_WORK
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(IRQ_WORK_VECTOR),
++			  __ipipe_do_IRQ, smp_irq_work_interrupt,
++			  __ipipe_ack_apic);
++#endif /* CONFIG_IRQ_WORK */
++
++#endif	/* CONFIG_X86_LOCAL_APIC */
++
++#ifdef CONFIG_SMP
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
++			  __ipipe_do_IRQ, smp_reschedule_interrupt,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
++			  __ipipe_do_IRQ, smp_call_function_interrupt,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR),
++			  __ipipe_do_IRQ, smp_call_function_single_interrupt,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  IRQ_MOVE_CLEANUP_VECTOR,
++			  __ipipe_do_IRQ, smp_irq_move_cleanup_interrupt,
++			  __ipipe_ack_apic);
++
++	ipipe_request_irq(ipipe_root_domain,
++			  ipipe_apic_vector_irq(REBOOT_VECTOR),
++			  __ipipe_do_IRQ, smp_reboot_interrupt,
++			  __ipipe_ack_apic);
++#endif	/* CONFIG_SMP */
++
++	/*
++	 * Finally, request the remaining ISA and IO-APIC
++	 * interrupts. Interrupts which have already been requested
++	 * will just beget a silent -EBUSY error, that's ok.
++	 */
++	for (irq = 0; irq < IPIPE_NR_XIRQS; irq++)
++		ipipe_request_irq(ipipe_root_domain, irq,
++				  __ipipe_do_IRQ, do_IRQ,
++				  NULL);
++}
++
++#ifdef CONFIG_SMP
++int irq_activate(struct irq_desc *desc);
++
++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask)
++{
++	struct irq_desc *desc;
++	struct irq_chip *chip;
++	int err;
++
++	cpumask_and(&cpumask, &cpumask, cpu_online_mask);
++	if (cpumask_empty(&cpumask) || ipipe_virtual_irq_p(irq))
++		return -EINVAL;
++
++	desc = irq_to_desc(irq);
++	if (desc == NULL)
++		return -EINVAL;
++
++	chip = irq_desc_get_chip(desc);
++	if (chip->irq_set_affinity == NULL)
++		return -ENOSYS;
++
++	err = irq_activate(desc);
++	if (err)
++		return err;
++
++	chip->irq_set_affinity(irq_get_irq_data(irq), &cpumask, true);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(ipipe_set_irq_affinity);
++
++void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask)
++{
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++
++	cpumask_clear_cpu(ipipe_processor_id(), &cpumask);
++	if (likely(!cpumask_empty(&cpumask)))
++		apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi));
++
++	hard_local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(ipipe_send_ipi);
++
++void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
++{
++	unsigned int ipi = IPIPE_CRITICAL_IPI;
++
++	ipd->irqs[ipi].ackfn = __ipipe_ack_apic;
++	ipd->irqs[ipi].handler = __ipipe_do_critical_sync;
++	ipd->irqs[ipi].cookie = NULL;
++	ipd->irqs[ipi].control = IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK;
++}
++
++#endif	/* CONFIG_SMP */
++
++void __ipipe_halt_root(int use_mwait)
++{
++	struct ipipe_percpu_domain_data *p;
++
++	/* Emulate sti+hlt sequence over the root domain. */
++
++	hard_local_irq_disable();
++
++	p = ipipe_this_cpu_root_context();
++
++	trace_hardirqs_on();
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++
++	if (unlikely(__ipipe_ipending_p(p))) {
++		__ipipe_sync_stage();
++		hard_local_irq_enable();
++	} else {
++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
++		ipipe_trace_end(0x8000000E);
++#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
++		if (use_mwait)
++			asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
++				     :: "a" (0), "c" (0));
++		else
++			asm volatile("sti; hlt": : :"memory");
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_halt_root);
++
++static inline void __ipipe_fixup_if(bool stalled, struct pt_regs *regs)
++{
++	/*
++	 * Have the saved hw state look like the domain stall bit, so
++	 * that __ipipe_unstall_iret_root() restores the proper
++	 * pipeline state for the root stage upon exit.
++	 */
++	if (stalled)
++		regs->flags &= ~X86_EFLAGS_IF;
++	else
++		regs->flags |= X86_EFLAGS_IF;
++}
++
++dotraplinkage int __ipipe_trap_prologue(struct pt_regs *regs, int trapnr, unsigned long *flags)
++{
++	bool entry_irqs_off = hard_irqs_disabled();
++	struct ipipe_domain *ipd;
++	unsigned long cr2;
++
++	if (trapnr == X86_TRAP_PF)
++		cr2 = native_read_cr2();
++
++	/*
++	 * KGDB and ftrace may poke int3/debug ops into the kernel
++	 * code. Trap those exceptions early, do conditional fixups to
++	 * the interrupt state depending on the current domain, let
++	 * the regular handler see them.
++	 */
++	if (unlikely(!user_mode(regs) &&
++		     (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP))) {
++
++		if (ipipe_root_p)
++			goto root_fixup;
++
++		/*
++		 * Skip interrupt state fixup from the head domain,
++		 * but do call the regular handler which is assumed to
++		 * run fine within such context.
++		 */
++		return -1;
++	}
++
++	/*
++	 * Now that we have filtered out all debug traps which might
++	 * happen anywhere in kernel code in theory, detect attempts
++	 * to probe kernel memory (i.e. calls to probe_kernel_{read,
++	 * write}()). If that happened over the head domain, do the
++	 * fixup immediately then return right after upon success. If
++	 * that fails, the kernel is likely to crash but let's follow
++	 * the standard recovery procedure in that case anyway.
++	 */
++	if (unlikely(!ipipe_root_p && faulthandler_disabled())) {
++		if (fixup_exception(regs, trapnr))
++			return 1;
++	}
++
++	if (unlikely(__ipipe_notify_trap(trapnr, regs)))
++		return 1;
++
++	if (likely(ipipe_root_p)) {
++	root_fixup:
++		/*
++		 * If no head domain is installed, or in case we faulted in
++		 * the iret path of x86-32, regs->flags does not match the root
++		 * domain state. The fault handler may evaluate it. So fix this
++		 * up with the current state.
++		 */
++		local_save_flags(*flags);
++		__ipipe_fixup_if(raw_irqs_disabled_flags(*flags), regs);
++
++		/*
++		 * Sync Linux interrupt state with hardware state on
++		 * entry.
++		 */
++		if (entry_irqs_off)
++			local_irq_disable();
++	} else {
++		/* Plan for restoring the original flags at fault. */
++		*flags = regs->flags;
++
++		/*
++		 * Detect unhandled faults over the head domain,
++		 * switching to root so that it can handle the fault
++		 * cleanly.
++		 */
++		hard_local_irq_disable();
++		ipd = __ipipe_current_domain;
++		__ipipe_set_current_domain(ipipe_root_domain);
++
++		/* Sync Linux interrupt state with hardware state on entry. */
++		if (entry_irqs_off)
++			local_irq_disable();
++
++		ipipe_trace_panic_freeze();
++
++		/* Always warn about user land and unfixable faults. */
++		if (user_mode(regs) ||
++		    !search_exception_tables(instruction_pointer(regs))) {
++			printk(KERN_ERR "BUG: Unhandled exception over domain"
++			       " %s at 0x%lx - switching to ROOT\n",
++			       ipd->name, instruction_pointer(regs));
++			dump_stack();
++			ipipe_trace_panic_dump();
++		} else if (IS_ENABLED(CONFIG_IPIPE_DEBUG)) {
++			/* Also report fixable ones when debugging is enabled. */
++			printk(KERN_WARNING "WARNING: Fixable exception over "
++			       "domain %s at 0x%lx - switching to ROOT\n",
++			       ipd->name, instruction_pointer(regs));
++			dump_stack();
++			ipipe_trace_panic_dump();
++		}
++	}
++
++	if (trapnr == X86_TRAP_PF)
++		write_cr2(cr2);
++
++	return 0;
++}
++
++dotraplinkage
++void __ipipe_trap_epilogue(struct pt_regs *regs,
++			   unsigned long flags, unsigned long regs_flags)
++{
++	ipipe_restore_root(raw_irqs_disabled_flags(flags));
++	__ipipe_fixup_if(raw_irqs_disabled_flags(regs_flags), regs);
++}
++
++static inline int __ipipe_irq_from_vector(int vector, int *irq)
++{
++	struct irq_desc *desc;
++
++	if (vector >= FIRST_SYSTEM_VECTOR) {
++		*irq = ipipe_apic_vector_irq(vector);
++		return 0;
++	}
++
++	desc = __this_cpu_read(vector_irq[vector]);
++	if (likely(!IS_ERR_OR_NULL(desc))) {
++		*irq = irq_desc_get_irq(desc);
++		return 0;
++	}
++
++	if (vector == IRQ_MOVE_CLEANUP_VECTOR) {
++		*irq = vector;
++		return 0;
++	}
++
++#ifdef CONFIG_X86_LOCAL_APIC
++	__ack_APIC_irq();
++#endif
++	pr_err("unexpected IRQ trap at vector %#x\n", vector);
++	return -1;
++}
++
++int __ipipe_handle_irq(struct pt_regs *regs)
++{
++	struct ipipe_percpu_data *p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
++	int irq, vector = regs->orig_ax, flags = 0;
++	struct pt_regs *tick_regs;
++
++	if (likely(vector < 0)) {
++		if (__ipipe_irq_from_vector(~vector, &irq) < 0)
++			goto out;
++	} else { /* Software-generated. */
++		irq = vector;
++		flags = IPIPE_IRQF_NOACK;
++	}
++
++	ipipe_trace_irqbegin(irq, regs);
++
++	/*
++	 * Given our deferred dispatching model for regular IRQs, we
++	 * only record CPU regs for the last timer interrupt, so that
++	 * the timer handler charges CPU times properly. It is assumed
++	 * that no other interrupt handler cares for such information.
++	 */
++	if (irq == p->hrtimer_irq || p->hrtimer_irq == -1) {
++		tick_regs = &p->tick_regs;
++		tick_regs->flags = regs->flags;
++		tick_regs->cs = regs->cs;
++		tick_regs->ip = regs->ip;
++		tick_regs->bp = regs->bp;
++#ifdef CONFIG_X86_64
++		tick_regs->ss = regs->ss;
++		tick_regs->sp = regs->sp;
++#endif
++		if (!__ipipe_root_p)
++			tick_regs->flags &= ~X86_EFLAGS_IF;
++	}
++
++	__ipipe_dispatch_irq(irq, flags);
++
++	if (user_mode(regs) && ipipe_test_thread_flag(TIP_MAYDAY))
++		__ipipe_call_mayday(regs);
++
++	ipipe_trace_irqend(irq, regs);
++
++out:
++	if (!__ipipe_root_p ||
++	    test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status))
++		return 0;
++
++	return 1;
++}
++
++void __ipipe_arch_share_current(int flags)
++{
++	struct task_struct *p = current;
++
++	/*
++	 * Setup a clean extended FPU state for kernel threads.
++	 */
++	if (p->mm == NULL)
++		memcpy(&p->thread.fpu.state,
++		       &init_fpstate, fpu_kernel_xstate_size);
++}
++
++struct task_struct *__switch_to(struct task_struct *prev_p,
++				struct task_struct *next_p);
++EXPORT_SYMBOL_GPL(do_munmap);
++EXPORT_SYMBOL_GPL(__switch_to);
++EXPORT_SYMBOL_GPL(show_stack);
++
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
++EXPORT_SYMBOL(tasklist_lock);
++#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
++
++#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64)
++EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
++#endif
+diff -uprN kernel/arch/x86/kernel/irq_64.c kernel_new/arch/x86/kernel/irq_64.c
+--- kernel/arch/x86/kernel/irq_64.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/irq_64.c	2021-04-01 18:28:07.655863287 +0800
+@@ -47,28 +47,30 @@ static inline void stack_overflow_check(
+ 	u64 irq_stack_top, irq_stack_bottom;
+ 	u64 estack_top, estack_bottom;
+ 	u64 curbase = (u64)task_stack_page(current);
++	unsigned long sp;
+ 
+ 	if (user_mode(regs))
+ 		return;
+ 
+-	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+-	    regs->sp <= curbase + THREAD_SIZE)
++	sp = IS_ENABLED(CONFIG_IPIPE) ? current_stack_pointer : regs->sp;
++	if (sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
++	    sp <= curbase + THREAD_SIZE)
+ 		return;
+ 
+ 	irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
+ 			STACK_TOP_MARGIN;
+ 	irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
+-	if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
++	if (sp >= irq_stack_top && sp <= irq_stack_bottom)
+ 		return;
+ 
+ 	oist = this_cpu_ptr(&orig_ist);
+ 	estack_bottom = (u64)oist->ist[DEBUG_STACK];
+ 	estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN;
+-	if (regs->sp >= estack_top && regs->sp <= estack_bottom)
++	if (sp >= estack_top && sp <= estack_bottom)
+ 		return;
+ 
+ 	WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
+-		current->comm, curbase, regs->sp,
++		current->comm, curbase, sp,
+ 		irq_stack_top, irq_stack_bottom,
+ 		estack_top, estack_bottom, (void *)regs->ip);
+ 
+diff -uprN kernel/arch/x86/kernel/irq.c kernel_new/arch/x86/kernel/irq.c
+--- kernel/arch/x86/kernel/irq.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/irq.c	2021-04-01 18:28:07.656863286 +0800
+@@ -48,7 +48,7 @@ void ack_bad_irq(unsigned int irq)
+ 	 * completely.
+ 	 * But only ack when the APIC is enabled -AK
+ 	 */
+-	ack_APIC_irq();
++	__ack_APIC_irq();
+ }
+ 
+ #define irq_stats(x)		(&per_cpu(irq_stat, x))
+@@ -236,12 +236,13 @@ __visible unsigned int __irq_entry do_IR
+ 	/* high bit used in ret_from_ code  */
+ 	unsigned vector = ~regs->orig_ax;
+ 
++	desc = __this_cpu_read(vector_irq[vector]);
++	__ipipe_move_root_irq(desc);
+ 	entering_irq();
+ 
+ 	/* entering_irq() tells RCU that we're not quiescent.  Check it. */
+ 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
+ 
+-	desc = __this_cpu_read(vector_irq[vector]);
+ 
+ 	if (!handle_irq(desc, regs)) {
+ 		ack_APIC_irq();
+diff -uprN kernel/arch/x86/kernel/kgdb.c kernel_new/arch/x86/kernel/kgdb.c
+--- kernel/arch/x86/kernel/kgdb.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/kgdb.c	2021-04-01 18:28:07.656863286 +0800
+@@ -598,9 +598,9 @@ kgdb_notify(struct notifier_block *self,
+ 	unsigned long flags;
+ 	int ret;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	ret = __kgdb_notify(ptr, cmd);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 
+ 	return ret;
+ }
+diff -uprN kernel/arch/x86/kernel/Makefile kernel_new/arch/x86/kernel/Makefile
+--- kernel/arch/x86/kernel/Makefile	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/Makefile	2021-04-01 18:28:07.656863286 +0800
+@@ -79,6 +79,7 @@ obj-y				+= reboot.o
+ obj-$(CONFIG_X86_MSR)		+= msr.o
+ obj-$(CONFIG_X86_CPUID)		+= cpuid.o
+ obj-$(CONFIG_PCI)		+= early-quirks.o
++obj-$(CONFIG_IPIPE)		+= ipipe.o
+ apm-y				:= apm_32.o
+ obj-$(CONFIG_APM)		+= apm.o
+ obj-$(CONFIG_SMP)		+= smp.o
+diff -uprN kernel/arch/x86/kernel/process_64.c kernel_new/arch/x86/kernel/process_64.c
+--- kernel/arch/x86/kernel/process_64.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/process_64.c	2021-04-01 18:28:07.656863286 +0800
+@@ -431,7 +431,7 @@ __switch_to(struct task_struct *prev_p,
+ 	struct thread_struct *next = &next_p->thread;
+ 	struct fpu *prev_fpu = &prev->fpu;
+ 	struct fpu *next_fpu = &next->fpu;
+-	int cpu = smp_processor_id();
++	int cpu = raw_smp_processor_id();
+ 
+ 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+ 		     this_cpu_read(irq_count) != -1);
+diff -uprN kernel/arch/x86/kernel/process.c kernel_new/arch/x86/kernel/process.c
+--- kernel/arch/x86/kernel/process.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/process.c	2021-04-01 18:28:07.656863286 +0800
+@@ -113,8 +113,16 @@ void exit_thread(struct task_struct *tsk
+ 	if (bp) {
+ 		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
+ 
+-		t->io_bitmap_ptr = NULL;
++		/*
++		 * The caller may be preempted via I-pipe: to make
++		 * sure TIF_IO_BITMAP always denotes a valid I/O
++		 * bitmap when set, we clear it _before_ the I/O
++		 * bitmap pointer. No cache coherence issue ahead as
++		 * migration is currently locked (the primary domain
++		 * may never migrate either).
++		 */
+ 		clear_thread_flag(TIF_IO_BITMAP);
++		t->io_bitmap_ptr = NULL;
+ 		/*
+ 		 * Careful, clear this in the TSS too:
+ 		 */
+@@ -411,7 +419,9 @@ static __always_inline void __speculatio
+ 	u64 msr = x86_spec_ctrl_base;
+ 	bool updmsr = false;
+ 
++#ifndef CONFIG_IPIPE
+ 	lockdep_assert_irqs_disabled();
++#endif
+ 
+ 	/* Handle change of TIF_SSBD depending on the mitigation method. */
+ 	if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+@@ -459,9 +469,9 @@ void speculation_ctrl_update(unsigned lo
+ 	unsigned long flags;
+ 
+ 	/* Forced update. Make sure all relevant TIF flags are different */
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	__speculation_ctrl_update(~tif, tif);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /* Called from seccomp/prctl update */
+@@ -574,7 +584,7 @@ bool xen_set_default_idle(void)
+ 
+ void stop_this_cpu(void *dummy)
+ {
+-	local_irq_disable();
++	hard_local_irq_disable();
+ 	/*
+ 	 * Remove this CPU:
+ 	 */
+@@ -670,7 +680,11 @@ static __cpuidle void mwait_idle(void)
+ 
+ 		__monitor((void *)&current_thread_info()->flags, 0, 0);
+ 		if (!need_resched())
++#ifdef CONFIG_IPIPE
++			__ipipe_halt_root(1);
++#else
+ 			__sti_mwait(0, 0);
++#endif
+ 		else
+ 			local_irq_enable();
+ 		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+@@ -730,6 +744,10 @@ void __init arch_post_acpi_subsys_init(v
+ 	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ 		mark_tsc_unstable("TSC halt in AMD C1E");
+ 	pr_info("System has AMD C1E enabled\n");
++#ifdef CONFIG_IPIPE
++	pr_info("I-pipe: will not be able to use LAPIC as a tick device\n"
++		"I-pipe: disable C1E power state in your BIOS\n");
++#endif
+ }
+ 
+ static int __init idle_setup(char *str)
+diff -uprN kernel/arch/x86/kernel/process.c.orig kernel_new/arch/x86/kernel/process.c.orig
+--- kernel/arch/x86/kernel/process.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/process.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,854 @@
++// SPDX-License-Identifier: GPL-2.0
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/errno.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/smp.h>
++#include <linux/prctl.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++#include <linux/sched/idle.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/init.h>
++#include <linux/export.h>
++#include <linux/pm.h>
++#include <linux/tick.h>
++#include <linux/random.h>
++#include <linux/user-return-notifier.h>
++#include <linux/dmi.h>
++#include <linux/utsname.h>
++#include <linux/stackprotector.h>
++#include <linux/cpuidle.h>
++#include <trace/events/power.h>
++#include <linux/hw_breakpoint.h>
++#include <asm/cpu.h>
++#include <asm/apic.h>
++#include <asm/syscalls.h>
++#include <linux/uaccess.h>
++#include <asm/mwait.h>
++#include <asm/fpu/internal.h>
++#include <asm/debugreg.h>
++#include <asm/nmi.h>
++#include <asm/tlbflush.h>
++#include <asm/mce.h>
++#include <asm/vm86.h>
++#include <asm/switch_to.h>
++#include <asm/desc.h>
++#include <asm/prctl.h>
++#include <asm/spec-ctrl.h>
++
++#include "process.h"
++
++/*
++ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
++ * no more per-task TSS's. The TSS size is kept cacheline-aligned
++ * so they are allowed to end up in the .data..cacheline_aligned
++ * section. Since TSS's are completely CPU-local, we want them
++ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
++ */
++__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
++	.x86_tss = {
++		/*
++		 * .sp0 is only used when entering ring 0 from a lower
++		 * privilege level.  Since the init task never runs anything
++		 * but ring 0 code, there is no need for a valid value here.
++		 * Poison it.
++		 */
++		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
++
++		/*
++		 * .sp1 is cpu_current_top_of_stack.  The init task never
++		 * runs user code, but cpu_current_top_of_stack should still
++		 * be well defined before the first context switch.
++		 */
++		.sp1 = TOP_OF_INIT_STACK,
++
++#ifdef CONFIG_X86_32
++		.ss0 = __KERNEL_DS,
++		.ss1 = __KERNEL_CS,
++		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
++#endif
++	 },
++#ifdef CONFIG_X86_32
++	 /*
++	  * Note that the .io_bitmap member must be extra-big. This is because
++	  * the CPU will access an additional byte beyond the end of the IO
++	  * permission bitmap. The extra byte must be all 1 bits, and must
++	  * be within the limit.
++	  */
++	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
++#endif
++};
++EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
++
++DEFINE_PER_CPU(bool, __tss_limit_invalid);
++EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
++
++/*
++ * this gets called so that we can store lazy state into memory and copy the
++ * current task into the new thread.
++ */
++int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
++{
++	memcpy(dst, src, arch_task_struct_size);
++#ifdef CONFIG_VM86
++	dst->thread.vm86 = NULL;
++#endif
++
++	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
++}
++
++/*
++ * Free current thread data structures etc..
++ */
++void exit_thread(struct task_struct *tsk)
++{
++	struct thread_struct *t = &tsk->thread;
++	unsigned long *bp = t->io_bitmap_ptr;
++	struct fpu *fpu = &t->fpu;
++
++	if (bp) {
++		struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
++
++		t->io_bitmap_ptr = NULL;
++		clear_thread_flag(TIF_IO_BITMAP);
++		/*
++		 * Careful, clear this in the TSS too:
++		 */
++		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
++		t->io_bitmap_max = 0;
++		put_cpu();
++		kfree(bp);
++	}
++
++	free_vm86(t);
++
++	fpu__drop(fpu);
++}
++
++void flush_thread(void)
++{
++	struct task_struct *tsk = current;
++
++	flush_ptrace_hw_breakpoint(tsk);
++	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
++
++	fpu__clear(&tsk->thread.fpu);
++}
++
++void disable_TSC(void)
++{
++	preempt_disable();
++	if (!test_and_set_thread_flag(TIF_NOTSC))
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOTSC in the current running context.
++		 */
++		cr4_set_bits(X86_CR4_TSD);
++	preempt_enable();
++}
++
++static void enable_TSC(void)
++{
++	preempt_disable();
++	if (test_and_clear_thread_flag(TIF_NOTSC))
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOTSC in the current running context.
++		 */
++		cr4_clear_bits(X86_CR4_TSD);
++	preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++	unsigned int val;
++
++	if (test_thread_flag(TIF_NOTSC))
++		val = PR_TSC_SIGSEGV;
++	else
++		val = PR_TSC_ENABLE;
++
++	return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++	if (val == PR_TSC_SIGSEGV)
++		disable_TSC();
++	else if (val == PR_TSC_ENABLE)
++		enable_TSC();
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++DEFINE_PER_CPU(u64, msr_misc_features_shadow);
++
++static void set_cpuid_faulting(bool on)
++{
++	u64 msrval;
++
++	msrval = this_cpu_read(msr_misc_features_shadow);
++	msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
++	msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
++	this_cpu_write(msr_misc_features_shadow, msrval);
++	wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
++}
++
++static void disable_cpuid(void)
++{
++	preempt_disable();
++	if (!test_and_set_thread_flag(TIF_NOCPUID)) {
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOCPUID in the current running context.
++		 */
++		set_cpuid_faulting(true);
++	}
++	preempt_enable();
++}
++
++static void enable_cpuid(void)
++{
++	preempt_disable();
++	if (test_and_clear_thread_flag(TIF_NOCPUID)) {
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOCPUID in the current running context.
++		 */
++		set_cpuid_faulting(false);
++	}
++	preempt_enable();
++}
++
++static int get_cpuid_mode(void)
++{
++	return !test_thread_flag(TIF_NOCPUID);
++}
++
++static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
++{
++	if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
++		return -ENODEV;
++
++	if (cpuid_enabled)
++		enable_cpuid();
++	else
++		disable_cpuid();
++
++	return 0;
++}
++
++/*
++ * Called immediately after a successful exec.
++ */
++void arch_setup_new_exec(void)
++{
++	/* If cpuid was previously disabled for this task, re-enable it. */
++	if (test_thread_flag(TIF_NOCPUID))
++		enable_cpuid();
++}
++
++static inline void switch_to_bitmap(struct thread_struct *prev,
++				    struct thread_struct *next,
++				    unsigned long tifp, unsigned long tifn)
++{
++	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
++
++	if (tifn & _TIF_IO_BITMAP) {
++		/*
++		 * Copy the relevant range of the IO bitmap.
++		 * Normally this is 128 bytes or less:
++		 */
++		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
++		       max(prev->io_bitmap_max, next->io_bitmap_max));
++		/*
++		 * Make sure that the TSS limit is correct for the CPU
++		 * to notice the IO bitmap.
++		 */
++		refresh_tss_limit();
++	} else if (tifp & _TIF_IO_BITMAP) {
++		/*
++		 * Clear any possible leftover bits:
++		 */
++		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
++	}
++}
++
++#ifdef CONFIG_SMP
++
++struct ssb_state {
++	struct ssb_state	*shared_state;
++	raw_spinlock_t		lock;
++	unsigned int		disable_state;
++	unsigned long		local_state;
++};
++
++#define LSTATE_SSB	0
++
++static DEFINE_PER_CPU(struct ssb_state, ssb_state);
++
++void speculative_store_bypass_ht_init(void)
++{
++	struct ssb_state *st = this_cpu_ptr(&ssb_state);
++	unsigned int this_cpu = smp_processor_id();
++	unsigned int cpu;
++
++	st->local_state = 0;
++
++	/*
++	 * Shared state setup happens once on the first bringup
++	 * of the CPU. It's not destroyed on CPU hotunplug.
++	 */
++	if (st->shared_state)
++		return;
++
++	raw_spin_lock_init(&st->lock);
++
++	/*
++	 * Go over HT siblings and check whether one of them has set up the
++	 * shared state pointer already.
++	 */
++	for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
++		if (cpu == this_cpu)
++			continue;
++
++		if (!per_cpu(ssb_state, cpu).shared_state)
++			continue;
++
++		/* Link it to the state of the sibling: */
++		st->shared_state = per_cpu(ssb_state, cpu).shared_state;
++		return;
++	}
++
++	/*
++	 * First HT sibling to come up on the core.  Link shared state of
++	 * the first HT sibling to itself. The siblings on the same core
++	 * which come up later will see the shared state pointer and link
++	 * themself to the state of this CPU.
++	 */
++	st->shared_state = st;
++}
++
++/*
++ * Logic is: First HT sibling enables SSBD for both siblings in the core
++ * and last sibling to disable it, disables it for the whole core. This how
++ * MSR_SPEC_CTRL works in "hardware":
++ *
++ *  CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
++ */
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
++{
++	struct ssb_state *st = this_cpu_ptr(&ssb_state);
++	u64 msr = x86_amd_ls_cfg_base;
++
++	if (!static_cpu_has(X86_FEATURE_ZEN)) {
++		msr |= ssbd_tif_to_amd_ls_cfg(tifn);
++		wrmsrl(MSR_AMD64_LS_CFG, msr);
++		return;
++	}
++
++	if (tifn & _TIF_SSBD) {
++		/*
++		 * Since this can race with prctl(), block reentry on the
++		 * same CPU.
++		 */
++		if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
++			return;
++
++		msr |= x86_amd_ls_cfg_ssbd_mask;
++
++		raw_spin_lock(&st->shared_state->lock);
++		/* First sibling enables SSBD: */
++		if (!st->shared_state->disable_state)
++			wrmsrl(MSR_AMD64_LS_CFG, msr);
++		st->shared_state->disable_state++;
++		raw_spin_unlock(&st->shared_state->lock);
++	} else {
++		if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
++			return;
++
++		raw_spin_lock(&st->shared_state->lock);
++		st->shared_state->disable_state--;
++		if (!st->shared_state->disable_state)
++			wrmsrl(MSR_AMD64_LS_CFG, msr);
++		raw_spin_unlock(&st->shared_state->lock);
++	}
++}
++#else
++static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
++{
++	u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
++
++	wrmsrl(MSR_AMD64_LS_CFG, msr);
++}
++#endif
++
++static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
++{
++	/*
++	 * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
++	 * so ssbd_tif_to_spec_ctrl() just works.
++	 */
++	wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
++}
++
++/*
++ * Update the MSRs managing speculation control, during context switch.
++ *
++ * tifp: Previous task's thread flags
++ * tifn: Next task's thread flags
++ */
++static __always_inline void __speculation_ctrl_update(unsigned long tifp,
++						      unsigned long tifn)
++{
++	unsigned long tif_diff = tifp ^ tifn;
++	u64 msr = x86_spec_ctrl_base;
++	bool updmsr = false;
++
++	lockdep_assert_irqs_disabled();
++
++	/* Handle change of TIF_SSBD depending on the mitigation method. */
++	if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
++		if (tif_diff & _TIF_SSBD)
++			amd_set_ssb_virt_state(tifn);
++	} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
++		if (tif_diff & _TIF_SSBD)
++			amd_set_core_ssb_state(tifn);
++	} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
++		   static_cpu_has(X86_FEATURE_AMD_SSBD)) {
++		updmsr |= !!(tif_diff & _TIF_SSBD);
++		msr |= ssbd_tif_to_spec_ctrl(tifn);
++	}
++
++	/* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
++	if (IS_ENABLED(CONFIG_SMP) &&
++	    static_branch_unlikely(&switch_to_cond_stibp)) {
++		updmsr |= !!(tif_diff & _TIF_SPEC_IB);
++		msr |= stibp_tif_to_spec_ctrl(tifn);
++	}
++
++	if (updmsr)
++		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
++}
++
++static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
++{
++	if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
++		if (task_spec_ssb_disable(tsk))
++			set_tsk_thread_flag(tsk, TIF_SSBD);
++		else
++			clear_tsk_thread_flag(tsk, TIF_SSBD);
++
++		if (task_spec_ib_disable(tsk))
++			set_tsk_thread_flag(tsk, TIF_SPEC_IB);
++		else
++			clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
++	}
++	/* Return the updated threadinfo flags*/
++	return task_thread_info(tsk)->flags;
++}
++
++void speculation_ctrl_update(unsigned long tif)
++{
++	unsigned long flags;
++
++	/* Forced update. Make sure all relevant TIF flags are different */
++	local_irq_save(flags);
++	__speculation_ctrl_update(~tif, tif);
++	local_irq_restore(flags);
++}
++
++/* Called from seccomp/prctl update */
++void speculation_ctrl_update_current(void)
++{
++	preempt_disable();
++	speculation_ctrl_update(speculation_ctrl_update_tif(current));
++	preempt_enable();
++}
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
++{
++	struct thread_struct *prev, *next;
++	unsigned long tifp, tifn;
++
++	prev = &prev_p->thread;
++	next = &next_p->thread;
++
++	tifn = READ_ONCE(task_thread_info(next_p)->flags);
++	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
++	switch_to_bitmap(prev, next, tifp, tifn);
++
++	propagate_user_return_notify(prev_p, next_p);
++
++	if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
++	    arch_has_block_step()) {
++		unsigned long debugctl, msk;
++
++		rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
++		debugctl &= ~DEBUGCTLMSR_BTF;
++		msk = tifn & _TIF_BLOCKSTEP;
++		debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
++		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
++	}
++
++	if ((tifp ^ tifn) & _TIF_NOTSC)
++		cr4_toggle_bits_irqsoff(X86_CR4_TSD);
++
++	if ((tifp ^ tifn) & _TIF_NOCPUID)
++		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
++
++	if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
++		__speculation_ctrl_update(tifp, tifn);
++	} else {
++		speculation_ctrl_update_tif(prev_p);
++		tifn = speculation_ctrl_update_tif(next_p);
++
++		/* Enforce MSR update to ensure consistent state */
++		__speculation_ctrl_update(~tifn, tifn);
++	}
++}
++
++/*
++ * Idle related variables and functions
++ */
++unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
++EXPORT_SYMBOL(boot_option_idle_override);
++
++static void (*x86_idle)(void);
++
++#ifndef CONFIG_SMP
++static inline void play_dead(void)
++{
++	BUG();
++}
++#endif
++
++void arch_cpu_idle_enter(void)
++{
++	tsc_verify_tsc_adjust(false);
++	local_touch_nmi();
++}
++
++void arch_cpu_idle_dead(void)
++{
++	play_dead();
++}
++
++/*
++ * Called from the generic idle code.
++ */
++void arch_cpu_idle(void)
++{
++	x86_idle();
++}
++
++/*
++ * We use this if we don't have any better idle routine..
++ */
++void __cpuidle default_idle(void)
++{
++	trace_cpu_idle_rcuidle(1, smp_processor_id());
++	safe_halt();
++	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
++}
++#ifdef CONFIG_APM_MODULE
++EXPORT_SYMBOL(default_idle);
++#endif
++
++#ifdef CONFIG_XEN
++bool xen_set_default_idle(void)
++{
++	bool ret = !!x86_idle;
++
++	x86_idle = default_idle;
++
++	return ret;
++}
++#endif
++
++void stop_this_cpu(void *dummy)
++{
++	local_irq_disable();
++	/*
++	 * Remove this CPU:
++	 */
++	set_cpu_online(smp_processor_id(), false);
++	disable_local_APIC();
++	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
++
++	/*
++	 * Use wbinvd on processors that support SME. This provides support
++	 * for performing a successful kexec when going from SME inactive
++	 * to SME active (or vice-versa). The cache must be cleared so that
++	 * if there are entries with the same physical address, both with and
++	 * without the encryption bit, they don't race each other when flushed
++	 * and potentially end up with the wrong entry being committed to
++	 * memory.
++	 */
++	if (boot_cpu_has(X86_FEATURE_SME))
++		native_wbinvd();
++	for (;;) {
++		/*
++		 * Use native_halt() so that memory contents don't change
++		 * (stack usage and variables) after possibly issuing the
++		 * native_wbinvd() above.
++		 */
++		native_halt();
++	}
++}
++
++/*
++ * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
++ * states (local apic timer and TSC stop).
++ */
++static void amd_e400_idle(void)
++{
++	/*
++	 * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
++	 * gets set after static_cpu_has() places have been converted via
++	 * alternatives.
++	 */
++	if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
++		default_idle();
++		return;
++	}
++
++	tick_broadcast_enter();
++
++	default_idle();
++
++	/*
++	 * The switch back from broadcast mode needs to be called with
++	 * interrupts disabled.
++	 */
++	local_irq_disable();
++	tick_broadcast_exit();
++	local_irq_enable();
++}
++
++/*
++ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
++ * We can't rely on cpuidle installing MWAIT, because it will not load
++ * on systems that support only C1 -- so the boot default must be MWAIT.
++ *
++ * Some AMD machines are the opposite, they depend on using HALT.
++ *
++ * So for default C1, which is used during boot until cpuidle loads,
++ * use MWAIT-C1 on Intel HW that has it, else use HALT.
++ */
++static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
++{
++	if (c->x86_vendor != X86_VENDOR_INTEL)
++		return 0;
++
++	if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
++		return 0;
++
++	return 1;
++}
++
++/*
++ * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
++ * with interrupts enabled and no flags, which is backwards compatible with the
++ * original MWAIT implementation.
++ */
++static __cpuidle void mwait_idle(void)
++{
++	if (!current_set_polling_and_test()) {
++		trace_cpu_idle_rcuidle(1, smp_processor_id());
++		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
++			mb(); /* quirk */
++			clflush((void *)&current_thread_info()->flags);
++			mb(); /* quirk */
++		}
++
++		__monitor((void *)&current_thread_info()->flags, 0, 0);
++		if (!need_resched())
++			__sti_mwait(0, 0);
++		else
++			local_irq_enable();
++		trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
++	} else {
++		local_irq_enable();
++	}
++	__current_clr_polling();
++}
++
++void select_idle_routine(const struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
++		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
++#endif
++	if (x86_idle || boot_option_idle_override == IDLE_POLL)
++		return;
++
++	if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
++		pr_info("using AMD E400 aware idle routine\n");
++		x86_idle = amd_e400_idle;
++	} else if (prefer_mwait_c1_over_halt(c)) {
++		pr_info("using mwait in idle threads\n");
++		x86_idle = mwait_idle;
++	} else
++		x86_idle = default_idle;
++}
++
++void amd_e400_c1e_apic_setup(void)
++{
++	if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
++		pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id());
++		local_irq_disable();
++		tick_broadcast_force();
++		local_irq_enable();
++	}
++}
++
++void __init arch_post_acpi_subsys_init(void)
++{
++	u32 lo, hi;
++
++	if (!boot_cpu_has_bug(X86_BUG_AMD_E400))
++		return;
++
++	/*
++	 * AMD E400 detection needs to happen after ACPI has been enabled. If
++	 * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in
++	 * MSR_K8_INT_PENDING_MSG.
++	 */
++	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
++	if (!(lo & K8_INTP_C1E_ACTIVE_MASK))
++		return;
++
++	boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E);
++
++	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
++		mark_tsc_unstable("TSC halt in AMD C1E");
++	pr_info("System has AMD C1E enabled\n");
++}
++
++static int __init idle_setup(char *str)
++{
++	if (!str)
++		return -EINVAL;
++
++	if (!strcmp(str, "poll")) {
++		pr_info("using polling idle threads\n");
++		boot_option_idle_override = IDLE_POLL;
++		cpu_idle_poll_ctrl(true);
++	} else if (!strcmp(str, "halt")) {
++		/*
++		 * When the boot option of idle=halt is added, halt is
++		 * forced to be used for CPU idle. In such case CPU C2/C3
++		 * won't be used again.
++		 * To continue to load the CPU idle driver, don't touch
++		 * the boot_option_idle_override.
++		 */
++		x86_idle = default_idle;
++		boot_option_idle_override = IDLE_HALT;
++	} else if (!strcmp(str, "nomwait")) {
++		/*
++		 * If the boot option of "idle=nomwait" is added,
++		 * it means that mwait will be disabled for CPU C2/C3
++		 * states. In such case it won't touch the variable
++		 * of boot_option_idle_override.
++		 */
++		boot_option_idle_override = IDLE_NOMWAIT;
++	} else
++		return -1;
++
++	return 0;
++}
++early_param("idle", idle_setup);
++
++unsigned long arch_align_stack(unsigned long sp)
++{
++	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
++		sp -= get_random_int() % 8192;
++	return sp & ~0xf;
++}
++
++unsigned long arch_randomize_brk(struct mm_struct *mm)
++{
++	return randomize_page(mm->brk, 0x02000000);
++}
++
++/*
++ * Called from fs/proc with a reference on @p to find the function
++ * which called into schedule(). This needs to be done carefully
++ * because the task might wake up and we might look at a stack
++ * changing under us.
++ */
++unsigned long get_wchan(struct task_struct *p)
++{
++	unsigned long start, bottom, top, sp, fp, ip, ret = 0;
++	int count = 0;
++
++	if (!p || p == current || p->state == TASK_RUNNING)
++		return 0;
++
++	if (!try_get_task_stack(p))
++		return 0;
++
++	start = (unsigned long)task_stack_page(p);
++	if (!start)
++		goto out;
++
++	/*
++	 * Layout of the stack page:
++	 *
++	 * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
++	 * PADDING
++	 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
++	 * stack
++	 * ----------- bottom = start
++	 *
++	 * The tasks stack pointer points at the location where the
++	 * framepointer is stored. The data on the stack is:
++	 * ... IP FP ... IP FP
++	 *
++	 * We need to read FP and IP, so we need to adjust the upper
++	 * bound by another unsigned long.
++	 */
++	top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
++	top -= 2 * sizeof(unsigned long);
++	bottom = start;
++
++	sp = READ_ONCE(p->thread.sp);
++	if (sp < bottom || sp > top)
++		goto out;
++
++	fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
++	do {
++		if (fp < bottom || fp > top)
++			goto out;
++		ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
++		if (!in_sched_functions(ip)) {
++			ret = ip;
++			goto out;
++		}
++		fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
++	} while (count++ < 16 && p->state != TASK_RUNNING);
++
++out:
++	put_task_stack(p);
++	return ret;
++}
++
++long do_arch_prctl_common(struct task_struct *task, int option,
++			  unsigned long cpuid_enabled)
++{
++	switch (option) {
++	case ARCH_GET_CPUID:
++		return get_cpuid_mode();
++	case ARCH_SET_CPUID:
++		return set_cpuid_mode(task, cpuid_enabled);
++	}
++
++	return -EINVAL;
++}
+diff -uprN kernel/arch/x86/kernel/smpboot.c kernel_new/arch/x86/kernel/smpboot.c
+--- kernel/arch/x86/kernel/smpboot.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/smpboot.c	2021-04-01 18:28:07.656863286 +0800
+@@ -1074,7 +1074,7 @@ int native_cpu_up(unsigned int cpu, stru
+ {
+ 	int apicid = apic->cpu_present_to_apicid(cpu);
+ 	int cpu0_nmi_registered = 0;
+-	unsigned long flags;
++	unsigned long vflags, rflags;
+ 	int err, ret = 0;
+ 
+ 	lockdep_assert_irqs_enabled();
+@@ -1123,9 +1123,11 @@ int native_cpu_up(unsigned int cpu, stru
+ 	 * Check TSC synchronization with the AP (keep irqs disabled
+ 	 * while doing so):
+ 	 */
+-	local_irq_save(flags);
++	local_irq_save(vflags);
++	rflags = hard_local_irq_save();
+ 	check_tsc_sync_source(cpu);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(rflags);
++	local_irq_restore(vflags);
+ 
+ 	while (!cpu_online(cpu)) {
+ 		cpu_relax();
+diff -uprN kernel/arch/x86/kernel/smpboot.c.orig kernel_new/arch/x86/kernel/smpboot.c.orig
+--- kernel/arch/x86/kernel/smpboot.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kernel/smpboot.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,1701 @@
++ /*
++ *	x86 SMP booting functions
++ *
++ *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
++ *	(c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
++ *	Copyright 2001 Andi Kleen, SuSE Labs.
++ *
++ *	Much of the core SMP work is based on previous work by Thomas Radke, to
++ *	whom a great many thanks are extended.
++ *
++ *	Thanks to Intel for making available several different Pentium,
++ *	Pentium Pro and Pentium-II/Xeon MP machines.
++ *	Original development of Linux SMP code supported by Caldera.
++ *
++ *	This code is released under the GNU General Public License version 2 or
++ *	later.
++ *
++ *	Fixes
++ *		Felix Koop	:	NR_CPUS used properly
++ *		Jose Renau	:	Handle single CPU case.
++ *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
++ *		Greg Wright	:	Fix for kernel stacks panic.
++ *		Erich Boleyn	:	MP v1.4 and additional changes.
++ *	Matthias Sattler	:	Changes for 2.1 kernel map.
++ *	Michel Lespinasse	:	Changes for 2.1 kernel map.
++ *	Michael Chastain	:	Change trampoline.S to gnu as.
++ *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
++ *		Ingo Molnar	:	Added APIC timers, based on code
++ *					from Jose Renau
++ *		Ingo Molnar	:	various cleanups and rewrites
++ *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
++ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
++ *	Andi Kleen		:	Changed for SMP boot into long mode.
++ *		Martin J. Bligh	: 	Added support for multi-quad systems
++ *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
++ *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
++ *      Andi Kleen              :       Converted to new state machine.
++ *	Ashok Raj		: 	CPU hotplug support
++ *	Glauber Costa		:	i386 and x86_64 integration
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/init.h>
++#include <linux/smp.h>
++#include <linux/export.h>
++#include <linux/sched.h>
++#include <linux/sched/topology.h>
++#include <linux/sched/hotplug.h>
++#include <linux/sched/task_stack.h>
++#include <linux/percpu.h>
++#include <linux/bootmem.h>
++#include <linux/err.h>
++#include <linux/nmi.h>
++#include <linux/tboot.h>
++#include <linux/stackprotector.h>
++#include <linux/gfp.h>
++#include <linux/cpuidle.h>
++
++#include <asm/acpi.h>
++#include <asm/desc.h>
++#include <asm/nmi.h>
++#include <asm/irq.h>
++#include <asm/realmode.h>
++#include <asm/cpu.h>
++#include <asm/numa.h>
++#include <asm/pgtable.h>
++#include <asm/tlbflush.h>
++#include <asm/mtrr.h>
++#include <asm/mwait.h>
++#include <asm/apic.h>
++#include <asm/io_apic.h>
++#include <asm/fpu/internal.h>
++#include <asm/setup.h>
++#include <asm/uv/uv.h>
++#include <linux/mc146818rtc.h>
++#include <asm/i8259.h>
++#include <asm/misc.h>
++#include <asm/qspinlock.h>
++#include <asm/intel-family.h>
++#include <asm/cpu_device_id.h>
++#include <asm/spec-ctrl.h>
++#include <asm/hw_irq.h>
++
++/* representing HT siblings of each logical CPU */
++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
++EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
++
++/* representing HT and core siblings of each logical CPU */
++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
++EXPORT_PER_CPU_SYMBOL(cpu_core_map);
++
++DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
++
++/* Per CPU bogomips and other parameters */
++DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
++EXPORT_PER_CPU_SYMBOL(cpu_info);
++
++/* Logical package management. We might want to allocate that dynamically */
++unsigned int __max_logical_packages __read_mostly;
++EXPORT_SYMBOL(__max_logical_packages);
++static unsigned int logical_packages __read_mostly;
++
++/* Maximum number of SMT threads on any online core */
++int __read_mostly __max_smt_threads = 1;
++
++/* Flag to indicate if a complete sched domain rebuild is required */
++bool x86_topology_update;
++
++int arch_update_cpu_topology(void)
++{
++	int retval = x86_topology_update;
++
++	x86_topology_update = false;
++	return retval;
++}
++
++static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&rtc_lock, flags);
++	CMOS_WRITE(0xa, 0xf);
++	spin_unlock_irqrestore(&rtc_lock, flags);
++	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
++							start_eip >> 4;
++	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
++							start_eip & 0xf;
++}
++
++static inline void smpboot_restore_warm_reset_vector(void)
++{
++	unsigned long flags;
++
++	/*
++	 * Paranoid:  Set warm reset code and vector here back
++	 * to default values.
++	 */
++	spin_lock_irqsave(&rtc_lock, flags);
++	CMOS_WRITE(0, 0xf);
++	spin_unlock_irqrestore(&rtc_lock, flags);
++
++	*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
++}
++
++/*
++ * Report back to the Boot Processor during boot time or to the caller processor
++ * during CPU online.
++ */
++static void smp_callin(void)
++{
++	int cpuid, phys_id;
++
++	/*
++	 * If waken up by an INIT in an 82489DX configuration
++	 * cpu_callout_mask guarantees we don't get here before
++	 * an INIT_deassert IPI reaches our local APIC, so it is
++	 * now safe to touch our local APIC.
++	 */
++	cpuid = smp_processor_id();
++
++	/*
++	 * (This works even if the APIC is not enabled.)
++	 */
++	phys_id = read_apic_id();
++
++	/*
++	 * the boot CPU has finished the init stage and is spinning
++	 * on callin_map until we finish. We are free to set up this
++	 * CPU, first the APIC. (this is probably redundant on most
++	 * boards)
++	 */
++	apic_ap_setup();
++
++	/*
++	 * Save our processor parameters. Note: this information
++	 * is needed for clock calibration.
++	 */
++	smp_store_cpu_info(cpuid);
++
++	/*
++	 * The topology information must be up to date before
++	 * calibrate_delay() and notify_cpu_starting().
++	 */
++	set_cpu_sibling_map(raw_smp_processor_id());
++
++	/*
++	 * Get our bogomips.
++	 * Update loops_per_jiffy in cpu_data. Previous call to
++	 * smp_store_cpu_info() stored a value that is close but not as
++	 * accurate as the value just calculated.
++	 */
++	calibrate_delay();
++	cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
++	pr_debug("Stack at about %p\n", &cpuid);
++
++	wmb();
++
++	notify_cpu_starting(cpuid);
++
++	/*
++	 * Allow the master to continue.
++	 */
++	cpumask_set_cpu(cpuid, cpu_callin_mask);
++}
++
++static int cpu0_logical_apicid;
++static int enable_start_cpu0;
++/*
++ * Activate a secondary processor.
++ */
++static void notrace start_secondary(void *unused)
++{
++	/*
++	 * Don't put *anything* except direct CPU state initialization
++	 * before cpu_init(), SMP booting is too fragile that we want to
++	 * limit the things done here to the most necessary things.
++	 */
++	if (boot_cpu_has(X86_FEATURE_PCID))
++		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
++
++#ifdef CONFIG_X86_32
++	/* switch away from the initial page table */
++	load_cr3(swapper_pg_dir);
++	/*
++	 * Initialize the CR4 shadow before doing anything that could
++	 * try to read it.
++	 */
++	cr4_init_shadow();
++	__flush_tlb_all();
++#endif
++	load_current_idt();
++	cpu_init();
++	x86_cpuinit.early_percpu_clock_init();
++	preempt_disable();
++	smp_callin();
++
++	enable_start_cpu0 = 0;
++
++	/* otherwise gcc will move up smp_processor_id before the cpu_init */
++	barrier();
++	/*
++	 * Check TSC synchronization with the boot CPU:
++	 */
++	check_tsc_sync_target();
++
++	speculative_store_bypass_ht_init();
++
++	/*
++	 * Lock vector_lock, set CPU online and bring the vector
++	 * allocator online. Online must be set with vector_lock held
++	 * to prevent a concurrent irq setup/teardown from seeing a
++	 * half valid vector space.
++	 */
++	lock_vector_lock();
++	set_cpu_online(smp_processor_id(), true);
++	lapic_online();
++	unlock_vector_lock();
++	cpu_set_state_online(smp_processor_id());
++	x86_platform.nmi_init();
++
++	/* enable local interrupts */
++	local_irq_enable();
++
++	/* to prevent fake stack check failure in clock setup */
++	boot_init_stack_canary();
++
++	x86_cpuinit.setup_percpu_clockev();
++
++	wmb();
++	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
++}
++
++/**
++ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
++ * @cpu:	CPU to check
++ */
++bool topology_is_primary_thread(unsigned int cpu)
++{
++	return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
++}
++
++/**
++ * topology_smt_supported - Check whether SMT is supported by the CPUs
++ */
++bool topology_smt_supported(void)
++{
++	return smp_num_siblings > 1;
++}
++
++/**
++ * topology_phys_to_logical_pkg - Map a physical package id to a logical
++ *
++ * Returns logical package id or -1 if not found
++ */
++int topology_phys_to_logical_pkg(unsigned int phys_pkg)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		struct cpuinfo_x86 *c = &cpu_data(cpu);
++
++		if (c->initialized && c->phys_proc_id == phys_pkg)
++			return c->logical_proc_id;
++	}
++	return -1;
++}
++EXPORT_SYMBOL(topology_phys_to_logical_pkg);
++
++/**
++ * topology_update_package_map - Update the physical to logical package map
++ * @pkg:	The physical package id as retrieved via CPUID
++ * @cpu:	The cpu for which this is updated
++ */
++int topology_update_package_map(unsigned int pkg, unsigned int cpu)
++{
++	int new;
++
++	/* Already available somewhere? */
++	new = topology_phys_to_logical_pkg(pkg);
++	if (new >= 0)
++		goto found;
++
++	new = logical_packages++;
++	if (new != pkg) {
++		pr_info("CPU %u Converting physical %u to logical package %u\n",
++			cpu, pkg, new);
++	}
++found:
++	cpu_data(cpu).logical_proc_id = new;
++	return 0;
++}
++
++void __init smp_store_boot_cpu_info(void)
++{
++	int id = 0; /* CPU 0 */
++	struct cpuinfo_x86 *c = &cpu_data(id);
++
++	*c = boot_cpu_data;
++	c->cpu_index = id;
++	topology_update_package_map(c->phys_proc_id, id);
++	c->initialized = true;
++}
++
++/*
++ * The bootstrap kernel entry code has set these up. Save them for
++ * a given CPU
++ */
++void smp_store_cpu_info(int id)
++{
++	struct cpuinfo_x86 *c = &cpu_data(id);
++
++	/* Copy boot_cpu_data only on the first bringup */
++	if (!c->initialized)
++		*c = boot_cpu_data;
++	c->cpu_index = id;
++	/*
++	 * During boot time, CPU0 has this setup already. Save the info when
++	 * bringing up AP or offlined CPU0.
++	 */
++	identify_secondary_cpu(c);
++	c->initialized = true;
++}
++
++static bool
++topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
++{
++	int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
++
++	return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
++}
++
++static bool
++topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
++{
++	int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
++
++	return !WARN_ONCE(!topology_same_node(c, o),
++		"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
++		"[node: %d != %d]. Ignoring dependency.\n",
++		cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
++}
++
++#define link_mask(mfunc, c1, c2)					\
++do {									\
++	cpumask_set_cpu((c1), mfunc(c2));				\
++	cpumask_set_cpu((c2), mfunc(c1));				\
++} while (0)
++
++static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
++{
++	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
++		int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
++
++		if (c->phys_proc_id == o->phys_proc_id &&
++		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
++			if (c->cpu_core_id == o->cpu_core_id)
++				return topology_sane(c, o, "smt");
++
++			if ((c->cu_id != 0xff) &&
++			    (o->cu_id != 0xff) &&
++			    (c->cu_id == o->cu_id))
++				return topology_sane(c, o, "smt");
++		}
++
++	} else if (c->phys_proc_id == o->phys_proc_id &&
++		   c->cpu_core_id == o->cpu_core_id) {
++		return topology_sane(c, o, "smt");
++	}
++
++	return false;
++}
++
++/*
++ * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
++ *
++ * These are Intel CPUs that enumerate an LLC that is shared by
++ * multiple NUMA nodes. The LLC on these systems is shared for
++ * off-package data access but private to the NUMA node (half
++ * of the package) for on-package access.
++ *
++ * CPUID (the source of the information about the LLC) can only
++ * enumerate the cache as being shared *or* unshared, but not
++ * this particular configuration. The CPU in this case enumerates
++ * the cache to be shared across the entire package (spanning both
++ * NUMA nodes).
++ */
++
++static const struct x86_cpu_id snc_cpu[] = {
++	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
++	{}
++};
++
++static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
++{
++	int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
++
++	/* Do not match if we do not have a valid APICID for cpu: */
++	if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
++		return false;
++
++	/* Do not match if LLC id does not match: */
++	if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
++		return false;
++
++	/*
++	 * Allow the SNC topology without warning. Return of false
++	 * means 'c' does not share the LLC of 'o'. This will be
++	 * reflected to userspace.
++	 */
++	if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
++		return false;
++
++	return topology_sane(c, o, "llc");
++}
++
++/*
++ * Unlike the other levels, we do not enforce keeping a
++ * multicore group inside a NUMA node.  If this happens, we will
++ * discard the MC level of the topology later.
++ */
++static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
++{
++	if (c->phys_proc_id == o->phys_proc_id)
++		return true;
++	return false;
++}
++
++#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
++static inline int x86_sched_itmt_flags(void)
++{
++	return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
++}
++
++#ifdef CONFIG_SCHED_MC
++static int x86_core_flags(void)
++{
++	return cpu_core_flags() | x86_sched_itmt_flags();
++}
++#endif
++#ifdef CONFIG_SCHED_SMT
++static int x86_smt_flags(void)
++{
++	return cpu_smt_flags() | x86_sched_itmt_flags();
++}
++#endif
++#endif
++
++static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
++#ifdef CONFIG_SCHED_SMT
++	{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
++#endif
++#ifdef CONFIG_SCHED_MC
++	{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
++#endif
++	{ NULL, },
++};
++
++static struct sched_domain_topology_level x86_topology[] = {
++#ifdef CONFIG_SCHED_SMT
++	{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
++#endif
++#ifdef CONFIG_SCHED_MC
++	{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
++#endif
++	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
++	{ NULL, },
++};
++
++/*
++ * Set if a package/die has multiple NUMA nodes inside.
++ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
++ * Sub-NUMA Clustering have this.
++ */
++static bool x86_has_numa_in_package;
++
++void set_cpu_sibling_map(int cpu)
++{
++	bool has_smt = smp_num_siblings > 1;
++	bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
++	struct cpuinfo_x86 *c = &cpu_data(cpu);
++	struct cpuinfo_x86 *o;
++	int i, threads;
++
++	cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
++
++	if (!has_mp) {
++		cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
++		cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
++		cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
++		c->booted_cores = 1;
++		return;
++	}
++
++	for_each_cpu(i, cpu_sibling_setup_mask) {
++		o = &cpu_data(i);
++
++		if ((i == cpu) || (has_smt && match_smt(c, o)))
++			link_mask(topology_sibling_cpumask, cpu, i);
++
++		if ((i == cpu) || (has_mp && match_llc(c, o)))
++			link_mask(cpu_llc_shared_mask, cpu, i);
++
++	}
++
++	/*
++	 * This needs a separate iteration over the cpus because we rely on all
++	 * topology_sibling_cpumask links to be set-up.
++	 */
++	for_each_cpu(i, cpu_sibling_setup_mask) {
++		o = &cpu_data(i);
++
++		if ((i == cpu) || (has_mp && match_die(c, o))) {
++			link_mask(topology_core_cpumask, cpu, i);
++
++			/*
++			 *  Does this new cpu bringup a new core?
++			 */
++			if (cpumask_weight(
++			    topology_sibling_cpumask(cpu)) == 1) {
++				/*
++				 * for each core in package, increment
++				 * the booted_cores for this new cpu
++				 */
++				if (cpumask_first(
++				    topology_sibling_cpumask(i)) == i)
++					c->booted_cores++;
++				/*
++				 * increment the core count for all
++				 * the other cpus in this package
++				 */
++				if (i != cpu)
++					cpu_data(i).booted_cores++;
++			} else if (i != cpu && !c->booted_cores)
++				c->booted_cores = cpu_data(i).booted_cores;
++		}
++		if (match_die(c, o) && !topology_same_node(c, o))
++			x86_has_numa_in_package = true;
++	}
++
++	threads = cpumask_weight(topology_sibling_cpumask(cpu));
++	if (threads > __max_smt_threads)
++		__max_smt_threads = threads;
++}
++
++/* maps the cpu to the sched domain representing multi-core */
++const struct cpumask *cpu_coregroup_mask(int cpu)
++{
++	return cpu_llc_shared_mask(cpu);
++}
++
++static void impress_friends(void)
++{
++	int cpu;
++	unsigned long bogosum = 0;
++	/*
++	 * Allow the user to impress friends.
++	 */
++	pr_debug("Before bogomips\n");
++	for_each_possible_cpu(cpu)
++		if (cpumask_test_cpu(cpu, cpu_callout_mask))
++			bogosum += cpu_data(cpu).loops_per_jiffy;
++	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
++		num_online_cpus(),
++		bogosum/(500000/HZ),
++		(bogosum/(5000/HZ))%100);
++
++	pr_debug("Before bogocount - setting activated=1\n");
++}
++
++void __inquire_remote_apic(int apicid)
++{
++	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
++	const char * const names[] = { "ID", "VERSION", "SPIV" };
++	int timeout;
++	u32 status;
++
++	pr_info("Inquiring remote APIC 0x%x...\n", apicid);
++
++	for (i = 0; i < ARRAY_SIZE(regs); i++) {
++		pr_info("... APIC 0x%x %s: ", apicid, names[i]);
++
++		/*
++		 * Wait for idle.
++		 */
++		status = safe_apic_wait_icr_idle();
++		if (status)
++			pr_cont("a previous APIC delivery may have failed\n");
++
++		apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
++
++		timeout = 0;
++		do {
++			udelay(100);
++			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
++		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
++
++		switch (status) {
++		case APIC_ICR_RR_VALID:
++			status = apic_read(APIC_RRR);
++			pr_cont("%08x\n", status);
++			break;
++		default:
++			pr_cont("failed\n");
++		}
++	}
++}
++
++/*
++ * The Multiprocessor Specification 1.4 (1997) example code suggests
++ * that there should be a 10ms delay between the BSP asserting INIT
++ * and de-asserting INIT, when starting a remote processor.
++ * But that slows boot and resume on modern processors, which include
++ * many cores and don't require that delay.
++ *
++ * Cmdline "init_cpu_udelay=" is available to over-ride this delay.
++ * Modern processor families are quirked to remove the delay entirely.
++ */
++#define UDELAY_10MS_DEFAULT 10000
++
++static unsigned int init_udelay = UINT_MAX;
++
++static int __init cpu_init_udelay(char *str)
++{
++	get_option(&str, &init_udelay);
++
++	return 0;
++}
++early_param("cpu_init_udelay", cpu_init_udelay);
++
++static void __init smp_quirk_init_udelay(void)
++{
++	/* if cmdline changed it from default, leave it alone */
++	if (init_udelay != UINT_MAX)
++		return;
++
++	/* if modern processor, use no delay */
++	if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
++	    ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
++	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
++		init_udelay = 0;
++		return;
++	}
++	/* else, use legacy delay */
++	init_udelay = UDELAY_10MS_DEFAULT;
++}
++
++/*
++ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
++ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
++ * won't ... remember to clear down the APIC, etc later.
++ */
++int
++wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
++{
++	unsigned long send_status, accept_status = 0;
++	int maxlvt;
++
++	/* Target chip */
++	/* Boot on the stack */
++	/* Kick the second */
++	apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
++
++	pr_debug("Waiting for send to finish...\n");
++	send_status = safe_apic_wait_icr_idle();
++
++	/*
++	 * Give the other CPU some time to accept the IPI.
++	 */
++	udelay(200);
++	if (APIC_INTEGRATED(boot_cpu_apic_version)) {
++		maxlvt = lapic_get_maxlvt();
++		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
++			apic_write(APIC_ESR, 0);
++		accept_status = (apic_read(APIC_ESR) & 0xEF);
++	}
++	pr_debug("NMI sent\n");
++
++	if (send_status)
++		pr_err("APIC never delivered???\n");
++	if (accept_status)
++		pr_err("APIC delivery error (%lx)\n", accept_status);
++
++	return (send_status | accept_status);
++}
++
++static int
++wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
++{
++	unsigned long send_status = 0, accept_status = 0;
++	int maxlvt, num_starts, j;
++
++	maxlvt = lapic_get_maxlvt();
++
++	/*
++	 * Be paranoid about clearing APIC errors.
++	 */
++	if (APIC_INTEGRATED(boot_cpu_apic_version)) {
++		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
++			apic_write(APIC_ESR, 0);
++		apic_read(APIC_ESR);
++	}
++
++	pr_debug("Asserting INIT\n");
++
++	/*
++	 * Turn INIT on target chip
++	 */
++	/*
++	 * Send IPI
++	 */
++	apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
++		       phys_apicid);
++
++	pr_debug("Waiting for send to finish...\n");
++	send_status = safe_apic_wait_icr_idle();
++
++	udelay(init_udelay);
++
++	pr_debug("Deasserting INIT\n");
++
++	/* Target chip */
++	/* Send IPI */
++	apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
++
++	pr_debug("Waiting for send to finish...\n");
++	send_status = safe_apic_wait_icr_idle();
++
++	mb();
++
++	/*
++	 * Should we send STARTUP IPIs ?
++	 *
++	 * Determine this based on the APIC version.
++	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
++	 */
++	if (APIC_INTEGRATED(boot_cpu_apic_version))
++		num_starts = 2;
++	else
++		num_starts = 0;
++
++	/*
++	 * Run STARTUP IPI loop.
++	 */
++	pr_debug("#startup loops: %d\n", num_starts);
++
++	for (j = 1; j <= num_starts; j++) {
++		pr_debug("Sending STARTUP #%d\n", j);
++		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
++			apic_write(APIC_ESR, 0);
++		apic_read(APIC_ESR);
++		pr_debug("After apic_write\n");
++
++		/*
++		 * STARTUP IPI
++		 */
++
++		/* Target chip */
++		/* Boot on the stack */
++		/* Kick the second */
++		apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
++			       phys_apicid);
++
++		/*
++		 * Give the other CPU some time to accept the IPI.
++		 */
++		if (init_udelay == 0)
++			udelay(10);
++		else
++			udelay(300);
++
++		pr_debug("Startup point 1\n");
++
++		pr_debug("Waiting for send to finish...\n");
++		send_status = safe_apic_wait_icr_idle();
++
++		/*
++		 * Give the other CPU some time to accept the IPI.
++		 */
++		if (init_udelay == 0)
++			udelay(10);
++		else
++			udelay(200);
++
++		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
++			apic_write(APIC_ESR, 0);
++		accept_status = (apic_read(APIC_ESR) & 0xEF);
++		if (send_status || accept_status)
++			break;
++	}
++	pr_debug("After Startup\n");
++
++	if (send_status)
++		pr_err("APIC never delivered???\n");
++	if (accept_status)
++		pr_err("APIC delivery error (%lx)\n", accept_status);
++
++	return (send_status | accept_status);
++}
++
++/* reduce the number of lines printed when booting a large cpu count system */
++static void announce_cpu(int cpu, int apicid)
++{
++	static int current_node = -1;
++	int node = early_cpu_to_node(cpu);
++	static int width, node_width;
++
++	if (!width)
++		width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
++
++	if (!node_width)
++		node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
++
++	if (cpu == 1)
++		printk(KERN_INFO "x86: Booting SMP configuration:\n");
++
++	if (system_state < SYSTEM_RUNNING) {
++		if (node != current_node) {
++			if (current_node > (-1))
++				pr_cont("\n");
++			current_node = node;
++
++			printk(KERN_INFO ".... node %*s#%d, CPUs:  ",
++			       node_width - num_digits(node), " ", node);
++		}
++
++		/* Add padding for the BSP */
++		if (cpu == 1)
++			pr_cont("%*s", width + 1, " ");
++
++		pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
++
++	} else
++		pr_info("Booting Node %d Processor %d APIC 0x%x\n",
++			node, cpu, apicid);
++}
++
++static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
++{
++	int cpu;
++
++	cpu = smp_processor_id();
++	if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
++		return NMI_HANDLED;
++
++	return NMI_DONE;
++}
++
++/*
++ * Wake up AP by INIT, INIT, STARTUP sequence.
++ *
++ * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
++ * boot-strap code which is not a desired behavior for waking up BSP. To
++ * void the boot-strap code, wake up CPU0 by NMI instead.
++ *
++ * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
++ * (i.e. physically hot removed and then hot added), NMI won't wake it up.
++ * We'll change this code in the future to wake up hard offlined CPU0 if
++ * real platform and request are available.
++ */
++static int
++wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
++	       int *cpu0_nmi_registered)
++{
++	int id;
++	int boot_error;
++
++	preempt_disable();
++
++	/*
++	 * Wake up AP by INIT, INIT, STARTUP sequence.
++	 */
++	if (cpu) {
++		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
++		goto out;
++	}
++
++	/*
++	 * Wake up BSP by nmi.
++	 *
++	 * Register a NMI handler to help wake up CPU0.
++	 */
++	boot_error = register_nmi_handler(NMI_LOCAL,
++					  wakeup_cpu0_nmi, 0, "wake_cpu0");
++
++	if (!boot_error) {
++		enable_start_cpu0 = 1;
++		*cpu0_nmi_registered = 1;
++		if (apic->dest_logical == APIC_DEST_LOGICAL)
++			id = cpu0_logical_apicid;
++		else
++			id = apicid;
++		boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
++	}
++
++out:
++	preempt_enable();
++
++	return boot_error;
++}
++
++void common_cpu_up(unsigned int cpu, struct task_struct *idle)
++{
++	/* Just in case we booted with a single CPU. */
++	alternatives_enable_smp();
++
++	per_cpu(current_task, cpu) = idle;
++
++#ifdef CONFIG_X86_32
++	/* Stack for startup_32 can be just as for start_secondary onwards */
++	irq_ctx_init(cpu);
++	per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
++#else
++	initial_gs = per_cpu_offset(cpu);
++#endif
++}
++
++/*
++ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
++ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
++ * Returns zero if CPU booted OK, else error code from
++ * ->wakeup_secondary_cpu.
++ */
++static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
++		       int *cpu0_nmi_registered)
++{
++	volatile u32 *trampoline_status =
++		(volatile u32 *) __va(real_mode_header->trampoline_status);
++	/* start_ip had better be page-aligned! */
++	unsigned long start_ip = real_mode_header->trampoline_start;
++
++	unsigned long boot_error = 0;
++	unsigned long timeout;
++
++	idle->thread.sp = (unsigned long)task_pt_regs(idle);
++	early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
++	initial_code = (unsigned long)start_secondary;
++	initial_stack  = idle->thread.sp;
++
++	/* Enable the espfix hack for this CPU */
++	init_espfix_ap(cpu);
++
++	/* So we see what's up */
++	announce_cpu(cpu, apicid);
++
++	/*
++	 * This grunge runs the startup process for
++	 * the targeted processor.
++	 */
++
++	if (x86_platform.legacy.warm_reset) {
++
++		pr_debug("Setting warm reset code and vector.\n");
++
++		smpboot_setup_warm_reset_vector(start_ip);
++		/*
++		 * Be paranoid about clearing APIC errors.
++		*/
++		if (APIC_INTEGRATED(boot_cpu_apic_version)) {
++			apic_write(APIC_ESR, 0);
++			apic_read(APIC_ESR);
++		}
++	}
++
++	/*
++	 * AP might wait on cpu_callout_mask in cpu_init() with
++	 * cpu_initialized_mask set if previous attempt to online
++	 * it timed-out. Clear cpu_initialized_mask so that after
++	 * INIT/SIPI it could start with a clean state.
++	 */
++	cpumask_clear_cpu(cpu, cpu_initialized_mask);
++	smp_mb();
++
++	/*
++	 * Wake up a CPU in difference cases:
++	 * - Use the method in the APIC driver if it's defined
++	 * Otherwise,
++	 * - Use an INIT boot APIC message for APs or NMI for BSP.
++	 */
++	if (apic->wakeup_secondary_cpu)
++		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
++	else
++		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
++						     cpu0_nmi_registered);
++
++	if (!boot_error) {
++		/*
++		 * Wait 10s total for first sign of life from AP
++		 */
++		boot_error = -1;
++		timeout = jiffies + 10*HZ;
++		while (time_before(jiffies, timeout)) {
++			if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
++				/*
++				 * Tell AP to proceed with initialization
++				 */
++				cpumask_set_cpu(cpu, cpu_callout_mask);
++				boot_error = 0;
++				break;
++			}
++			schedule();
++		}
++	}
++
++	if (!boot_error) {
++		/*
++		 * Wait till AP completes initial initialization
++		 */
++		while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
++			/*
++			 * Allow other tasks to run while we wait for the
++			 * AP to come online. This also gives a chance
++			 * for the MTRR work(triggered by the AP coming online)
++			 * to be completed in the stop machine context.
++			 */
++			schedule();
++		}
++	}
++
++	/* mark "stuck" area as not stuck */
++	*trampoline_status = 0;
++
++	if (x86_platform.legacy.warm_reset) {
++		/*
++		 * Cleanup possible dangling ends...
++		 */
++		smpboot_restore_warm_reset_vector();
++	}
++
++	return boot_error;
++}
++
++int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
++{
++	int apicid = apic->cpu_present_to_apicid(cpu);
++	int cpu0_nmi_registered = 0;
++	unsigned long flags;
++	int err, ret = 0;
++
++	lockdep_assert_irqs_enabled();
++
++	pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
++
++	if (apicid == BAD_APICID ||
++	    !physid_isset(apicid, phys_cpu_present_map) ||
++	    !apic->apic_id_valid(apicid)) {
++		pr_err("%s: bad cpu %d\n", __func__, cpu);
++		return -EINVAL;
++	}
++
++	/*
++	 * Already booted CPU?
++	 */
++	if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
++		pr_debug("do_boot_cpu %d Already started\n", cpu);
++		return -ENOSYS;
++	}
++
++	/*
++	 * Save current MTRR state in case it was changed since early boot
++	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
++	 */
++	mtrr_save_state();
++
++	/* x86 CPUs take themselves offline, so delayed offline is OK. */
++	err = cpu_check_up_prepare(cpu);
++	if (err && err != -EBUSY)
++		return err;
++
++	/* the FPU context is blank, nobody can own it */
++	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
++
++	common_cpu_up(cpu, tidle);
++
++	err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
++	if (err) {
++		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
++		ret = -EIO;
++		goto unreg_nmi;
++	}
++
++	/*
++	 * Check TSC synchronization with the AP (keep irqs disabled
++	 * while doing so):
++	 */
++	local_irq_save(flags);
++	check_tsc_sync_source(cpu);
++	local_irq_restore(flags);
++
++	while (!cpu_online(cpu)) {
++		cpu_relax();
++		touch_nmi_watchdog();
++	}
++
++unreg_nmi:
++	/*
++	 * Clean up the nmi handler. Do this after the callin and callout sync
++	 * to avoid impact of possible long unregister time.
++	 */
++	if (cpu0_nmi_registered)
++		unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
++
++	return ret;
++}
++
++/**
++ * arch_disable_smp_support() - disables SMP support for x86 at runtime
++ */
++void arch_disable_smp_support(void)
++{
++	disable_ioapic_support();
++}
++
++/*
++ * Fall back to non SMP mode after errors.
++ *
++ * RED-PEN audit/test this more. I bet there is more state messed up here.
++ */
++static __init void disable_smp(void)
++{
++	pr_info("SMP disabled\n");
++
++	disable_ioapic_support();
++
++	init_cpu_present(cpumask_of(0));
++	init_cpu_possible(cpumask_of(0));
++
++	if (smp_found_config)
++		physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
++	else
++		physid_set_mask_of_physid(0, &phys_cpu_present_map);
++	cpumask_set_cpu(0, topology_sibling_cpumask(0));
++	cpumask_set_cpu(0, topology_core_cpumask(0));
++}
++
++/*
++ * Various sanity checks.
++ */
++static void __init smp_sanity_check(void)
++{
++	preempt_disable();
++
++#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
++	if (def_to_bigsmp && nr_cpu_ids > 8) {
++		unsigned int cpu;
++		unsigned nr;
++
++		pr_warn("More than 8 CPUs detected - skipping them\n"
++			"Use CONFIG_X86_BIGSMP\n");
++
++		nr = 0;
++		for_each_present_cpu(cpu) {
++			if (nr >= 8)
++				set_cpu_present(cpu, false);
++			nr++;
++		}
++
++		nr = 0;
++		for_each_possible_cpu(cpu) {
++			if (nr >= 8)
++				set_cpu_possible(cpu, false);
++			nr++;
++		}
++
++		nr_cpu_ids = 8;
++	}
++#endif
++
++	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
++		pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
++			hard_smp_processor_id());
++
++		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
++	}
++
++	/*
++	 * Should not be necessary because the MP table should list the boot
++	 * CPU too, but we do it for the sake of robustness anyway.
++	 */
++	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
++		pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
++			  boot_cpu_physical_apicid);
++		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
++	}
++	preempt_enable();
++}
++
++static void __init smp_cpu_index_default(void)
++{
++	int i;
++	struct cpuinfo_x86 *c;
++
++	for_each_possible_cpu(i) {
++		c = &cpu_data(i);
++		/* mark all to hotplug */
++		c->cpu_index = nr_cpu_ids;
++	}
++}
++
++static void __init smp_get_logical_apicid(void)
++{
++	if (x2apic_mode)
++		cpu0_logical_apicid = apic_read(APIC_LDR);
++	else
++		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
++}
++
++/*
++ * Prepare for SMP bootup.
++ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
++ *            for common interface support.
++ */
++void __init native_smp_prepare_cpus(unsigned int max_cpus)
++{
++	unsigned int i;
++
++	smp_cpu_index_default();
++
++	/*
++	 * Setup boot CPU information
++	 */
++	smp_store_boot_cpu_info(); /* Final full version of the data */
++	cpumask_copy(cpu_callin_mask, cpumask_of(0));
++	mb();
++
++	for_each_possible_cpu(i) {
++		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
++		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
++		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
++	}
++
++	/*
++	 * Set 'default' x86 topology, this matches default_topology() in that
++	 * it has NUMA nodes as a topology level. See also
++	 * native_smp_cpus_done().
++	 *
++	 * Must be done before set_cpus_sibling_map() is ran.
++	 */
++	set_sched_topology(x86_topology);
++
++	set_cpu_sibling_map(0);
++
++	smp_sanity_check();
++
++	switch (apic_intr_mode) {
++	case APIC_PIC:
++	case APIC_VIRTUAL_WIRE_NO_CONFIG:
++		disable_smp();
++		return;
++	case APIC_SYMMETRIC_IO_NO_ROUTING:
++		disable_smp();
++		/* Setup local timer */
++		x86_init.timers.setup_percpu_clockev();
++		return;
++	case APIC_VIRTUAL_WIRE:
++	case APIC_SYMMETRIC_IO:
++		break;
++	}
++
++	/* Setup local timer */
++	x86_init.timers.setup_percpu_clockev();
++
++	smp_get_logical_apicid();
++
++	pr_info("CPU0: ");
++	print_cpu_info(&cpu_data(0));
++
++	native_pv_lock_init();
++
++	uv_system_init();
++
++	set_mtrr_aps_delayed_init();
++
++	smp_quirk_init_udelay();
++
++	speculative_store_bypass_ht_init();
++}
++
++void arch_enable_nonboot_cpus_begin(void)
++{
++	set_mtrr_aps_delayed_init();
++}
++
++void arch_enable_nonboot_cpus_end(void)
++{
++	mtrr_aps_init();
++}
++
++/*
++ * Early setup to make printk work.
++ */
++void __init native_smp_prepare_boot_cpu(void)
++{
++	int me = smp_processor_id();
++	switch_to_new_gdt(me);
++	/* already set me in cpu_online_mask in boot_cpu_init() */
++	cpumask_set_cpu(me, cpu_callout_mask);
++	cpu_set_state_online(me);
++}
++
++void __init calculate_max_logical_packages(void)
++{
++	int ncpus;
++
++	/*
++	 * Today neither Intel nor AMD support heterogenous systems so
++	 * extrapolate the boot cpu's data to all packages.
++	 */
++	ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
++	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
++	pr_info("Max logical packages: %u\n", __max_logical_packages);
++}
++
++void __init native_smp_cpus_done(unsigned int max_cpus)
++{
++	pr_debug("Boot done\n");
++
++	calculate_max_logical_packages();
++
++	if (x86_has_numa_in_package)
++		set_sched_topology(x86_numa_in_package_topology);
++
++	nmi_selftest();
++	impress_friends();
++	mtrr_aps_init();
++}
++
++static int __initdata setup_possible_cpus = -1;
++static int __init _setup_possible_cpus(char *str)
++{
++	get_option(&str, &setup_possible_cpus);
++	return 0;
++}
++early_param("possible_cpus", _setup_possible_cpus);
++
++
++/*
++ * cpu_possible_mask should be static, it cannot change as cpu's
++ * are onlined, or offlined. The reason is per-cpu data-structures
++ * are allocated by some modules at init time, and dont expect to
++ * do this dynamically on cpu arrival/departure.
++ * cpu_present_mask on the other hand can change dynamically.
++ * In case when cpu_hotplug is not compiled, then we resort to current
++ * behaviour, which is cpu_possible == cpu_present.
++ * - Ashok Raj
++ *
++ * Three ways to find out the number of additional hotplug CPUs:
++ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
++ * - The user can overwrite it with possible_cpus=NUM
++ * - Otherwise don't reserve additional CPUs.
++ * We do this because additional CPUs waste a lot of memory.
++ * -AK
++ */
++__init void prefill_possible_map(void)
++{
++	int i, possible;
++
++	/* No boot processor was found in mptable or ACPI MADT */
++	if (!num_processors) {
++		if (boot_cpu_has(X86_FEATURE_APIC)) {
++			int apicid = boot_cpu_physical_apicid;
++			int cpu = hard_smp_processor_id();
++
++			pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
++
++			/* Make sure boot cpu is enumerated */
++			if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
++			    apic->apic_id_valid(apicid))
++				generic_processor_info(apicid, boot_cpu_apic_version);
++		}
++
++		if (!num_processors)
++			num_processors = 1;
++	}
++
++	i = setup_max_cpus ?: 1;
++	if (setup_possible_cpus == -1) {
++		possible = num_processors;
++#ifdef CONFIG_HOTPLUG_CPU
++		if (setup_max_cpus)
++			possible += disabled_cpus;
++#else
++		if (possible > i)
++			possible = i;
++#endif
++	} else
++		possible = setup_possible_cpus;
++
++	total_cpus = max_t(int, possible, num_processors + disabled_cpus);
++
++	/* nr_cpu_ids could be reduced via nr_cpus= */
++	if (possible > nr_cpu_ids) {
++		pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
++			possible, nr_cpu_ids);
++		possible = nr_cpu_ids;
++	}
++
++#ifdef CONFIG_HOTPLUG_CPU
++	if (!setup_max_cpus)
++#endif
++	if (possible > i) {
++		pr_warn("%d Processors exceeds max_cpus limit of %u\n",
++			possible, setup_max_cpus);
++		possible = i;
++	}
++
++	nr_cpu_ids = possible;
++
++	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
++		possible, max_t(int, possible - num_processors, 0));
++
++	reset_cpu_possible_mask();
++
++	for (i = 0; i < possible; i++)
++		set_cpu_possible(i, true);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++
++/* Recompute SMT state for all CPUs on offline */
++static void recompute_smt_state(void)
++{
++	int max_threads, cpu;
++
++	max_threads = 0;
++	for_each_online_cpu (cpu) {
++		int threads = cpumask_weight(topology_sibling_cpumask(cpu));
++
++		if (threads > max_threads)
++			max_threads = threads;
++	}
++	__max_smt_threads = max_threads;
++}
++
++static void remove_siblinginfo(int cpu)
++{
++	int sibling;
++	struct cpuinfo_x86 *c = &cpu_data(cpu);
++
++	for_each_cpu(sibling, topology_core_cpumask(cpu)) {
++		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
++		/*/
++		 * last thread sibling in this cpu core going down
++		 */
++		if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
++			cpu_data(sibling).booted_cores--;
++	}
++
++	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
++		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
++	for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
++		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
++	cpumask_clear(cpu_llc_shared_mask(cpu));
++	cpumask_clear(topology_sibling_cpumask(cpu));
++	cpumask_clear(topology_core_cpumask(cpu));
++	c->cpu_core_id = 0;
++	c->booted_cores = 0;
++	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
++	recompute_smt_state();
++}
++
++static void remove_cpu_from_maps(int cpu)
++{
++	set_cpu_online(cpu, false);
++	cpumask_clear_cpu(cpu, cpu_callout_mask);
++	cpumask_clear_cpu(cpu, cpu_callin_mask);
++	/* was set by cpu_init() */
++	cpumask_clear_cpu(cpu, cpu_initialized_mask);
++	numa_remove_cpu(cpu);
++}
++
++void cpu_disable_common(void)
++{
++	int cpu = smp_processor_id();
++
++	remove_siblinginfo(cpu);
++
++	/* It's now safe to remove this processor from the online map */
++	lock_vector_lock();
++	remove_cpu_from_maps(cpu);
++	unlock_vector_lock();
++	fixup_irqs();
++	lapic_offline();
++}
++
++int native_cpu_disable(void)
++{
++	int ret;
++
++	ret = lapic_can_unplug_cpu();
++	if (ret)
++		return ret;
++
++	clear_local_APIC();
++	cpu_disable_common();
++
++	return 0;
++}
++
++int common_cpu_die(unsigned int cpu)
++{
++	int ret = 0;
++
++	/* We don't do anything here: idle task is faking death itself. */
++
++	/* They ack this in play_dead() by setting CPU_DEAD */
++	if (cpu_wait_death(cpu, 5)) {
++		if (system_state == SYSTEM_RUNNING)
++			pr_info("CPU %u is now offline\n", cpu);
++	} else {
++		pr_err("CPU %u didn't die...\n", cpu);
++		ret = -1;
++	}
++
++	return ret;
++}
++
++void native_cpu_die(unsigned int cpu)
++{
++	common_cpu_die(cpu);
++}
++
++void play_dead_common(void)
++{
++	idle_task_exit();
++
++	/* Ack it */
++	(void)cpu_report_death();
++
++	/*
++	 * With physical CPU hotplug, we should halt the cpu
++	 */
++	local_irq_disable();
++}
++
++static bool wakeup_cpu0(void)
++{
++	if (smp_processor_id() == 0 && enable_start_cpu0)
++		return true;
++
++	return false;
++}
++
++/*
++ * We need to flush the caches before going to sleep, lest we have
++ * dirty data in our caches when we come back up.
++ */
++static inline void mwait_play_dead(void)
++{
++	unsigned int eax, ebx, ecx, edx;
++	unsigned int highest_cstate = 0;
++	unsigned int highest_subcstate = 0;
++	void *mwait_ptr;
++	int i;
++
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
++	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
++		return;
++	if (!this_cpu_has(X86_FEATURE_MWAIT))
++		return;
++	if (!this_cpu_has(X86_FEATURE_CLFLUSH))
++		return;
++	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
++		return;
++
++	eax = CPUID_MWAIT_LEAF;
++	ecx = 0;
++	native_cpuid(&eax, &ebx, &ecx, &edx);
++
++	/*
++	 * eax will be 0 if EDX enumeration is not valid.
++	 * Initialized below to cstate, sub_cstate value when EDX is valid.
++	 */
++	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
++		eax = 0;
++	} else {
++		edx >>= MWAIT_SUBSTATE_SIZE;
++		for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
++			if (edx & MWAIT_SUBSTATE_MASK) {
++				highest_cstate = i;
++				highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
++			}
++		}
++		eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
++			(highest_subcstate - 1);
++	}
++
++	/*
++	 * This should be a memory location in a cache line which is
++	 * unlikely to be touched by other processors.  The actual
++	 * content is immaterial as it is not actually modified in any way.
++	 */
++	mwait_ptr = &current_thread_info()->flags;
++
++	wbinvd();
++
++	while (1) {
++		/*
++		 * The CLFLUSH is a workaround for erratum AAI65 for
++		 * the Xeon 7400 series.  It's not clear it is actually
++		 * needed, but it should be harmless in either case.
++		 * The WBINVD is insufficient due to the spurious-wakeup
++		 * case where we return around the loop.
++		 */
++		mb();
++		clflush(mwait_ptr);
++		mb();
++		__monitor(mwait_ptr, 0, 0);
++		mb();
++		__mwait(eax, 0);
++		/*
++		 * If NMI wants to wake up CPU0, start CPU0.
++		 */
++		if (wakeup_cpu0())
++			start_cpu0();
++	}
++}
++
++void hlt_play_dead(void)
++{
++	if (__this_cpu_read(cpu_info.x86) >= 4)
++		wbinvd();
++
++	while (1) {
++		native_halt();
++		/*
++		 * If NMI wants to wake up CPU0, start CPU0.
++		 */
++		if (wakeup_cpu0())
++			start_cpu0();
++	}
++}
++
++void native_play_dead(void)
++{
++	play_dead_common();
++	tboot_shutdown(TB_SHUTDOWN_WFS);
++
++	mwait_play_dead();	/* Only returns on failure */
++	if (cpuidle_play_dead())
++		hlt_play_dead();
++}
++
++#else /* ... !CONFIG_HOTPLUG_CPU */
++int native_cpu_disable(void)
++{
++	return -ENOSYS;
++}
++
++void native_cpu_die(unsigned int cpu)
++{
++	/* We said "no" in __cpu_disable */
++	BUG();
++}
++
++void native_play_dead(void)
++{
++	BUG();
++}
++
++#endif
+diff -uprN kernel/arch/x86/kernel/smp.c kernel_new/arch/x86/kernel/smp.c
+--- kernel/arch/x86/kernel/smp.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/smp.c	2021-04-01 18:28:07.657863285 +0800
+@@ -255,10 +255,10 @@ static void native_stop_other_cpus(int w
+ 			udelay(1);
+ 	}
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	disable_local_APIC();
+ 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /*
+diff -uprN kernel/arch/x86/kernel/traps.c kernel_new/arch/x86/kernel/traps.c
+--- kernel/arch/x86/kernel/traps.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/traps.c	2021-04-01 18:28:07.657863285 +0800
+@@ -14,6 +14,7 @@
+ 
+ #include <linux/context_tracking.h>
+ #include <linux/interrupt.h>
++#include <linux/ipipe.h>
+ #include <linux/kallsyms.h>
+ #include <linux/spinlock.h>
+ #include <linux/kprobes.h>
+@@ -77,13 +78,13 @@ DECLARE_BITMAP(system_vectors, NR_VECTOR
+ static inline void cond_local_irq_enable(struct pt_regs *regs)
+ {
+ 	if (regs->flags & X86_EFLAGS_IF)
+-		local_irq_enable();
++		hard_local_irq_enable_notrace();
+ }
+ 
+ static inline void cond_local_irq_disable(struct pt_regs *regs)
+ {
+ 	if (regs->flags & X86_EFLAGS_IF)
+-		local_irq_disable();
++		hard_local_irq_disable_notrace();
+ }
+ 
+ /*
+@@ -544,7 +545,7 @@ do_general_protection(struct pt_regs *re
+ 	}
+ 
+ 	if (v8086_mode(regs)) {
+-		local_irq_enable();
++		hard_local_irq_enable();
+ 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+ 		return;
+ 	}
+@@ -931,7 +932,7 @@ dotraplinkage void do_iret_error(struct
+ 	siginfo_t info;
+ 
+ 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+-	local_irq_enable();
++	hard_local_irq_enable();
+ 
+ 	clear_siginfo(&info);
+ 	info.si_signo = SIGILL;
+diff -uprN kernel/arch/x86/kernel/tsc.c kernel_new/arch/x86/kernel/tsc.c
+--- kernel/arch/x86/kernel/tsc.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/tsc.c	2021-04-01 18:28:07.657863285 +0800
+@@ -731,11 +731,11 @@ static unsigned long pit_hpet_ptimer_cal
+ 		 * calibration, which will take at least 50ms, and
+ 		 * read the end value.
+ 		 */
+-		local_irq_save(flags);
++		flags = hard_local_irq_save();
+ 		tsc1 = tsc_read_refs(&ref1, hpet);
+ 		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
+ 		tsc2 = tsc_read_refs(&ref2, hpet);
+-		local_irq_restore(flags);
++		hard_local_irq_restore(flags);
+ 
+ 		/* Pick the lowest PIT TSC calibration so far */
+ 		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+@@ -844,9 +844,9 @@ unsigned long native_calibrate_cpu_early
+ 	if (!fast_calibrate)
+ 		fast_calibrate = cpu_khz_from_msr();
+ 	if (!fast_calibrate) {
+-		local_irq_save(flags);
++		flags = hard_local_irq_save();
+ 		fast_calibrate = quick_pit_calibrate();
+-		local_irq_restore(flags);
++		hard_local_irq_restore(flags);
+ 	}
+ 	return fast_calibrate;
+ }
+@@ -1109,7 +1109,7 @@ static struct clocksource clocksource_ts
+  * this one will immediately take over. We will only register if TSC has
+  * been found good.
+  */
+-static struct clocksource clocksource_tsc = {
++struct clocksource clocksource_tsc = {
+ 	.name                   = "tsc",
+ 	.rating                 = 300,
+ 	.read                   = read_tsc,
+diff -uprN kernel/arch/x86/kernel/vm86_32.c kernel_new/arch/x86/kernel/vm86_32.c
+--- kernel/arch/x86/kernel/vm86_32.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kernel/vm86_32.c	2021-04-01 18:28:07.657863285 +0800
+@@ -147,12 +147,14 @@ void save_v86_state(struct kernel_vm86_r
+ 	}
+ 
+ 	preempt_disable();
++	hard_cond_local_irq_disable();
+ 	tsk->thread.sp0 = vm86->saved_sp0;
+ 	tsk->thread.sysenter_cs = __KERNEL_CS;
+ 	update_task_stack(tsk);
+ 	refresh_sysenter_cs(&tsk->thread);
+ 	vm86->saved_sp0 = 0;
+ 	preempt_enable();
++	hard_cond_local_irq_enable();
+ 
+ 	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
+ 
+@@ -365,6 +367,7 @@ static long do_sys_vm86(struct vm86plus_
+ 	vm86->saved_sp0 = tsk->thread.sp0;
+ 	lazy_save_gs(vm86->regs32.gs);
+ 
++	hard_cond_local_irq_disable();
+ 	/* make room for real-mode segments */
+ 	preempt_disable();
+ 	tsk->thread.sp0 += 16;
+@@ -376,6 +379,7 @@ static long do_sys_vm86(struct vm86plus_
+ 
+ 	update_task_stack(tsk);
+ 	preempt_enable();
++	hard_cond_local_irq_enable();
+ 
+ 	if (vm86->flags & VM86_SCREEN_BITMAP)
+ 		mark_screen_rdonly(tsk->mm);
+diff -uprN kernel/arch/x86/kvm/svm.c kernel_new/arch/x86/kvm/svm.c
+--- kernel/arch/x86/kvm/svm.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kvm/svm.c	2021-04-01 18:28:07.657863285 +0800
+@@ -5637,7 +5637,7 @@ static void svm_vcpu_run(struct kvm_vcpu
+ 	 */
+ 	x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ 
+-	local_irq_enable();
++	hard_local_irq_enable();
+ 
+ 	asm volatile (
+ 		"push %%" _ASM_BP "; \n\t"
+@@ -5763,7 +5763,7 @@ static void svm_vcpu_run(struct kvm_vcpu
+ 
+ 	reload_tss(vcpu);
+ 
+-	local_irq_disable();
++	hard_local_irq_disable();
+ 
+ 	x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+ 
+@@ -6150,6 +6150,7 @@ out:
+ 
+ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
+ {
++	hard_cond_local_irq_enable();
+ 	local_irq_enable();
+ 	/*
+ 	 * We must have an instruction with interrupts enabled, so
+diff -uprN kernel/arch/x86/kvm/svm.c.orig kernel_new/arch/x86/kvm/svm.c.orig
+--- kernel/arch/x86/kvm/svm.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kvm/svm.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,7244 @@
++/*
++ * Kernel-based Virtual Machine driver for Linux
++ *
++ * AMD SVM support
++ *
++ * Copyright (C) 2006 Qumranet, Inc.
++ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
++ *
++ * Authors:
++ *   Yaniv Kamay  <yaniv@qumranet.com>
++ *   Avi Kivity   <avi@qumranet.com>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2.  See
++ * the COPYING file in the top-level directory.
++ *
++ */
++
++#define pr_fmt(fmt) "SVM: " fmt
++
++#include <linux/kvm_host.h>
++
++#include "irq.h"
++#include "mmu.h"
++#include "kvm_cache_regs.h"
++#include "x86.h"
++#include "cpuid.h"
++#include "pmu.h"
++
++#include <linux/module.h>
++#include <linux/mod_devicetable.h>
++#include <linux/kernel.h>
++#include <linux/vmalloc.h>
++#include <linux/highmem.h>
++#include <linux/sched.h>
++#include <linux/trace_events.h>
++#include <linux/slab.h>
++#include <linux/amd-iommu.h>
++#include <linux/hashtable.h>
++#include <linux/frame.h>
++#include <linux/psp-sev.h>
++#include <linux/file.h>
++#include <linux/pagemap.h>
++#include <linux/swap.h>
++
++#include <asm/apic.h>
++#include <asm/perf_event.h>
++#include <asm/tlbflush.h>
++#include <asm/desc.h>
++#include <asm/debugreg.h>
++#include <asm/kvm_para.h>
++#include <asm/irq_remapping.h>
++#include <asm/spec-ctrl.h>
++
++#include <asm/virtext.h>
++#include "trace.h"
++
++#define __ex(x) __kvm_handle_fault_on_reboot(x)
++
++MODULE_AUTHOR("Qumranet");
++MODULE_LICENSE("GPL");
++
++static const struct x86_cpu_id svm_cpu_id[] = {
++	X86_FEATURE_MATCH(X86_FEATURE_SVM),
++	{}
++};
++MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
++
++#define IOPM_ALLOC_ORDER 2
++#define MSRPM_ALLOC_ORDER 1
++
++#define SEG_TYPE_LDT 2
++#define SEG_TYPE_BUSY_TSS16 3
++
++#define SVM_FEATURE_NPT            (1 <<  0)
++#define SVM_FEATURE_LBRV           (1 <<  1)
++#define SVM_FEATURE_SVML           (1 <<  2)
++#define SVM_FEATURE_NRIP           (1 <<  3)
++#define SVM_FEATURE_TSC_RATE       (1 <<  4)
++#define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
++#define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
++#define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
++#define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
++
++#define SVM_AVIC_DOORBELL	0xc001011b
++
++#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
++#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
++#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
++
++#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
++
++#define TSC_RATIO_RSVD          0xffffff0000000000ULL
++#define TSC_RATIO_MIN		0x0000000000000001ULL
++#define TSC_RATIO_MAX		0x000000ffffffffffULL
++
++#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
++
++/*
++ * 0xff is broadcast, so the max index allowed for physical APIC ID
++ * table is 0xfe.  APIC IDs above 0xff are reserved.
++ */
++#define AVIC_MAX_PHYSICAL_ID_COUNT	255
++
++#define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
++#define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
++#define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF
++
++/* AVIC GATAG is encoded using VM and VCPU IDs */
++#define AVIC_VCPU_ID_BITS		8
++#define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)
++
++#define AVIC_VM_ID_BITS			24
++#define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
++#define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)
++
++#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
++						(y & AVIC_VCPU_ID_MASK))
++#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
++#define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
++
++static bool erratum_383_found __read_mostly;
++
++static const u32 host_save_user_msrs[] = {
++#ifdef CONFIG_X86_64
++	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
++	MSR_FS_BASE,
++#endif
++	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
++	MSR_TSC_AUX,
++};
++
++#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
++
++struct kvm_sev_info {
++	bool active;		/* SEV enabled guest */
++	unsigned int asid;	/* ASID used for this guest */
++	unsigned int handle;	/* SEV firmware handle */
++	int fd;			/* SEV device fd */
++	unsigned long pages_locked; /* Number of pages locked */
++	struct list_head regions_list;  /* List of registered regions */
++};
++
++struct kvm_svm {
++	struct kvm kvm;
++
++	/* Struct members for AVIC */
++	u32 avic_vm_id;
++	u32 ldr_mode;
++	struct page *avic_logical_id_table_page;
++	struct page *avic_physical_id_table_page;
++	struct hlist_node hnode;
++
++	struct kvm_sev_info sev_info;
++};
++
++struct kvm_vcpu;
++
++struct nested_state {
++	struct vmcb *hsave;
++	u64 hsave_msr;
++	u64 vm_cr_msr;
++	u64 vmcb;
++
++	/* These are the merged vectors */
++	u32 *msrpm;
++
++	/* gpa pointers to the real vectors */
++	u64 vmcb_msrpm;
++	u64 vmcb_iopm;
++
++	/* A VMEXIT is required but not yet emulated */
++	bool exit_required;
++
++	/* cache for intercepts of the guest */
++	u32 intercept_cr;
++	u32 intercept_dr;
++	u32 intercept_exceptions;
++	u64 intercept;
++
++	/* Nested Paging related state */
++	u64 nested_cr3;
++};
++
++#define MSRPM_OFFSETS	16
++static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
++
++/*
++ * Set osvw_len to higher value when updated Revision Guides
++ * are published and we know what the new status bits are
++ */
++static uint64_t osvw_len = 4, osvw_status;
++
++struct vcpu_svm {
++	struct kvm_vcpu vcpu;
++	struct vmcb *vmcb;
++	unsigned long vmcb_pa;
++	struct svm_cpu_data *svm_data;
++	uint64_t asid_generation;
++	uint64_t sysenter_esp;
++	uint64_t sysenter_eip;
++	uint64_t tsc_aux;
++
++	u64 msr_decfg;
++
++	u64 next_rip;
++
++	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
++	struct {
++		u16 fs;
++		u16 gs;
++		u16 ldt;
++		u64 gs_base;
++	} host;
++
++	u64 spec_ctrl;
++	/*
++	 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
++	 * translated into the appropriate L2_CFG bits on the host to
++	 * perform speculative control.
++	 */
++	u64 virt_spec_ctrl;
++
++	u32 *msrpm;
++
++	ulong nmi_iret_rip;
++
++	struct nested_state nested;
++
++	bool nmi_singlestep;
++	u64 nmi_singlestep_guest_rflags;
++
++	unsigned int3_injected;
++	unsigned long int3_rip;
++
++	/* cached guest cpuid flags for faster access */
++	bool nrips_enabled	: 1;
++
++	u32 ldr_reg;
++	struct page *avic_backing_page;
++	u64 *avic_physical_id_cache;
++	bool avic_is_running;
++
++	/*
++	 * Per-vcpu list of struct amd_svm_iommu_ir:
++	 * This is used mainly to store interrupt remapping information used
++	 * when update the vcpu affinity. This avoids the need to scan for
++	 * IRTE and try to match ga_tag in the IOMMU driver.
++	 */
++	struct list_head ir_list;
++	spinlock_t ir_list_lock;
++
++	/* which host CPU was used for running this vcpu */
++	unsigned int last_cpu;
++};
++
++/*
++ * This is a wrapper of struct amd_iommu_ir_data.
++ */
++struct amd_svm_iommu_ir {
++	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
++	void *data;		/* Storing pointer to struct amd_ir_data */
++};
++
++#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
++#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)
++
++#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	(0xFFULL)
++#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	(0xFFFFFFFFFFULL << 12)
++#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
++#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK		(1ULL << 63)
++
++static DEFINE_PER_CPU(u64, current_tsc_ratio);
++#define TSC_RATIO_DEFAULT	0x0100000000ULL
++
++#define MSR_INVALID			0xffffffffU
++
++static const struct svm_direct_access_msrs {
++	u32 index;   /* Index of the MSR */
++	bool always; /* True if intercept is always on */
++} direct_access_msrs[] = {
++	{ .index = MSR_STAR,				.always = true  },
++	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
++#ifdef CONFIG_X86_64
++	{ .index = MSR_GS_BASE,				.always = true  },
++	{ .index = MSR_FS_BASE,				.always = true  },
++	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
++	{ .index = MSR_LSTAR,				.always = true  },
++	{ .index = MSR_CSTAR,				.always = true  },
++	{ .index = MSR_SYSCALL_MASK,			.always = true  },
++#endif
++	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
++	{ .index = MSR_IA32_PRED_CMD,			.always = false },
++	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
++	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
++	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
++	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
++	{ .index = MSR_INVALID,				.always = false },
++};
++
++/* enable NPT for AMD64 and X86 with PAE */
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
++static bool npt_enabled = true;
++#else
++static bool npt_enabled;
++#endif
++
++/*
++ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
++ * pause_filter_count: On processors that support Pause filtering(indicated
++ *	by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
++ *	count value. On VMRUN this value is loaded into an internal counter.
++ *	Each time a pause instruction is executed, this counter is decremented
++ *	until it reaches zero at which time a #VMEXIT is generated if pause
++ *	intercept is enabled. Refer to  AMD APM Vol 2 Section 15.14.4 Pause
++ *	Intercept Filtering for more details.
++ *	This also indicate if ple logic enabled.
++ *
++ * pause_filter_thresh: In addition, some processor families support advanced
++ *	pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
++ *	the amount of time a guest is allowed to execute in a pause loop.
++ *	In this mode, a 16-bit pause filter threshold field is added in the
++ *	VMCB. The threshold value is a cycle count that is used to reset the
++ *	pause counter. As with simple pause filtering, VMRUN loads the pause
++ *	count value from VMCB into an internal counter. Then, on each pause
++ *	instruction the hardware checks the elapsed number of cycles since
++ *	the most recent pause instruction against the pause filter threshold.
++ *	If the elapsed cycle count is greater than the pause filter threshold,
++ *	then the internal pause count is reloaded from the VMCB and execution
++ *	continues. If the elapsed cycle count is less than the pause filter
++ *	threshold, then the internal pause count is decremented. If the count
++ *	value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
++ *	triggered. If advanced pause filtering is supported and pause filter
++ *	threshold field is set to zero, the filter will operate in the simpler,
++ *	count only mode.
++ */
++
++static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
++module_param(pause_filter_thresh, ushort, 0444);
++
++static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
++module_param(pause_filter_count, ushort, 0444);
++
++/* Default doubles per-vcpu window every exit. */
++static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
++module_param(pause_filter_count_grow, ushort, 0444);
++
++/* Default resets per-vcpu window every exit to pause_filter_count. */
++static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
++module_param(pause_filter_count_shrink, ushort, 0444);
++
++/* Default is to compute the maximum so we can never overflow. */
++static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
++module_param(pause_filter_count_max, ushort, 0444);
++
++/* allow nested paging (virtualized MMU) for all guests */
++static int npt = true;
++module_param(npt, int, S_IRUGO);
++
++/* allow nested virtualization in KVM/SVM */
++static int nested = true;
++module_param(nested, int, S_IRUGO);
++
++/* enable / disable AVIC */
++static int avic;
++#ifdef CONFIG_X86_LOCAL_APIC
++module_param(avic, int, S_IRUGO);
++#endif
++
++/* enable/disable Virtual VMLOAD VMSAVE */
++static int vls = true;
++module_param(vls, int, 0444);
++
++/* enable/disable Virtual GIF */
++static int vgif = true;
++module_param(vgif, int, 0444);
++
++/* enable/disable SEV support */
++static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
++module_param(sev, int, 0444);
++
++static u8 rsm_ins_bytes[] = "\x0f\xaa";
++
++static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
++static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
++static void svm_complete_interrupts(struct vcpu_svm *svm);
++
++static int nested_svm_exit_handled(struct vcpu_svm *svm);
++static int nested_svm_intercept(struct vcpu_svm *svm);
++static int nested_svm_vmexit(struct vcpu_svm *svm);
++static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
++				      bool has_error_code, u32 error_code);
++
++enum {
++	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
++			    pause filter count */
++	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
++	VMCB_ASID,	 /* ASID */
++	VMCB_INTR,	 /* int_ctl, int_vector */
++	VMCB_NPT,        /* npt_en, nCR3, gPAT */
++	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
++	VMCB_DR,         /* DR6, DR7 */
++	VMCB_DT,         /* GDT, IDT */
++	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
++	VMCB_CR2,        /* CR2 only */
++	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
++	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
++			  * AVIC PHYSICAL_TABLE pointer,
++			  * AVIC LOGICAL_TABLE pointer
++			  */
++	VMCB_DIRTY_MAX,
++};
++
++/* TPR and CR2 are always written before VMRUN */
++#define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
++
++#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
++
++static unsigned int max_sev_asid;
++static unsigned int min_sev_asid;
++static unsigned long *sev_asid_bitmap;
++#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
++
++struct enc_region {
++	struct list_head list;
++	unsigned long npages;
++	struct page **pages;
++	unsigned long uaddr;
++	unsigned long size;
++};
++
++
++static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
++{
++	return container_of(kvm, struct kvm_svm, kvm);
++}
++
++static inline bool svm_sev_enabled(void)
++{
++	return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
++}
++
++static inline bool sev_guest(struct kvm *kvm)
++{
++#ifdef CONFIG_KVM_AMD_SEV
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++
++	return sev->active;
++#else
++	return false;
++#endif
++}
++
++static inline int sev_get_asid(struct kvm *kvm)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++
++	return sev->asid;
++}
++
++static inline void mark_all_dirty(struct vmcb *vmcb)
++{
++	vmcb->control.clean = 0;
++}
++
++static inline void mark_all_clean(struct vmcb *vmcb)
++{
++	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
++			       & ~VMCB_ALWAYS_DIRTY_MASK;
++}
++
++static inline void mark_dirty(struct vmcb *vmcb, int bit)
++{
++	vmcb->control.clean &= ~(1 << bit);
++}
++
++static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
++{
++	return container_of(vcpu, struct vcpu_svm, vcpu);
++}
++
++static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
++{
++	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
++	mark_dirty(svm->vmcb, VMCB_AVIC);
++}
++
++static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u64 *entry = svm->avic_physical_id_cache;
++
++	if (!entry)
++		return false;
++
++	return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
++}
++
++static void recalc_intercepts(struct vcpu_svm *svm)
++{
++	struct vmcb_control_area *c, *h;
++	struct nested_state *g;
++
++	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
++	if (!is_guest_mode(&svm->vcpu))
++		return;
++
++	c = &svm->vmcb->control;
++	h = &svm->nested.hsave->control;
++	g = &svm->nested;
++
++	c->intercept_cr = h->intercept_cr | g->intercept_cr;
++	c->intercept_dr = h->intercept_dr | g->intercept_dr;
++	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
++	c->intercept = h->intercept | g->intercept;
++}
++
++static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
++{
++	if (is_guest_mode(&svm->vcpu))
++		return svm->nested.hsave;
++	else
++		return svm->vmcb;
++}
++
++static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_cr |= (1U << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_cr &= ~(1U << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	return vmcb->control.intercept_cr & (1U << bit);
++}
++
++static inline void set_dr_intercepts(struct vcpu_svm *svm)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
++		| (1 << INTERCEPT_DR1_READ)
++		| (1 << INTERCEPT_DR2_READ)
++		| (1 << INTERCEPT_DR3_READ)
++		| (1 << INTERCEPT_DR4_READ)
++		| (1 << INTERCEPT_DR5_READ)
++		| (1 << INTERCEPT_DR6_READ)
++		| (1 << INTERCEPT_DR7_READ)
++		| (1 << INTERCEPT_DR0_WRITE)
++		| (1 << INTERCEPT_DR1_WRITE)
++		| (1 << INTERCEPT_DR2_WRITE)
++		| (1 << INTERCEPT_DR3_WRITE)
++		| (1 << INTERCEPT_DR4_WRITE)
++		| (1 << INTERCEPT_DR5_WRITE)
++		| (1 << INTERCEPT_DR6_WRITE)
++		| (1 << INTERCEPT_DR7_WRITE);
++
++	recalc_intercepts(svm);
++}
++
++static inline void clr_dr_intercepts(struct vcpu_svm *svm)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_dr = 0;
++
++	recalc_intercepts(svm);
++}
++
++static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_exceptions |= (1U << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept_exceptions &= ~(1U << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline void set_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept |= (1ULL << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline void clr_intercept(struct vcpu_svm *svm, int bit)
++{
++	struct vmcb *vmcb = get_host_vmcb(svm);
++
++	vmcb->control.intercept &= ~(1ULL << bit);
++
++	recalc_intercepts(svm);
++}
++
++static inline bool vgif_enabled(struct vcpu_svm *svm)
++{
++	return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
++}
++
++static inline void enable_gif(struct vcpu_svm *svm)
++{
++	if (vgif_enabled(svm))
++		svm->vmcb->control.int_ctl |= V_GIF_MASK;
++	else
++		svm->vcpu.arch.hflags |= HF_GIF_MASK;
++}
++
++static inline void disable_gif(struct vcpu_svm *svm)
++{
++	if (vgif_enabled(svm))
++		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
++	else
++		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
++}
++
++static inline bool gif_set(struct vcpu_svm *svm)
++{
++	if (vgif_enabled(svm))
++		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
++	else
++		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
++}
++
++static unsigned long iopm_base;
++
++struct kvm_ldttss_desc {
++	u16 limit0;
++	u16 base0;
++	unsigned base1:8, type:5, dpl:2, p:1;
++	unsigned limit1:4, zero0:3, g:1, base2:8;
++	u32 base3;
++	u32 zero1;
++} __attribute__((packed));
++
++struct svm_cpu_data {
++	int cpu;
++
++	u64 asid_generation;
++	u32 max_asid;
++	u32 next_asid;
++	u32 min_asid;
++	struct kvm_ldttss_desc *tss_desc;
++
++	struct page *save_area;
++	struct vmcb *current_vmcb;
++
++	/* index = sev_asid, value = vmcb pointer */
++	struct vmcb **sev_vmcbs;
++};
++
++static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
++
++struct svm_init_data {
++	int cpu;
++	int r;
++};
++
++static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
++
++#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
++#define MSRS_RANGE_SIZE 2048
++#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
++
++static u32 svm_msrpm_offset(u32 msr)
++{
++	u32 offset;
++	int i;
++
++	for (i = 0; i < NUM_MSR_MAPS; i++) {
++		if (msr < msrpm_ranges[i] ||
++		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
++			continue;
++
++		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
++		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
++
++		/* Now we have the u8 offset - but need the u32 offset */
++		return offset / 4;
++	}
++
++	/* MSR not in any range */
++	return MSR_INVALID;
++}
++
++#define MAX_INST_SIZE 15
++
++static inline void clgi(void)
++{
++	asm volatile (__ex(SVM_CLGI));
++}
++
++static inline void stgi(void)
++{
++	asm volatile (__ex(SVM_STGI));
++}
++
++static inline void invlpga(unsigned long addr, u32 asid)
++{
++	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
++}
++
++static int get_npt_level(struct kvm_vcpu *vcpu)
++{
++#ifdef CONFIG_X86_64
++	return PT64_ROOT_4LEVEL;
++#else
++	return PT32E_ROOT_LEVEL;
++#endif
++}
++
++static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
++{
++	vcpu->arch.efer = efer;
++
++	if (!npt_enabled) {
++		/* Shadow paging assumes NX to be available.  */
++		efer |= EFER_NX;
++
++		if (!(efer & EFER_LMA))
++			efer &= ~EFER_LME;
++	}
++
++	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
++	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
++}
++
++static int is_external_interrupt(u32 info)
++{
++	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
++	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
++}
++
++static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u32 ret = 0;
++
++	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
++		ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
++	return ret;
++}
++
++static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (mask == 0)
++		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
++	else
++		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
++
++}
++
++static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (svm->vmcb->control.next_rip != 0) {
++		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
++		svm->next_rip = svm->vmcb->control.next_rip;
++	}
++
++	if (!svm->next_rip) {
++		if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
++				EMULATE_DONE)
++			printk(KERN_DEBUG "%s: NOP\n", __func__);
++		return;
++	}
++	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
++		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
++		       __func__, kvm_rip_read(vcpu), svm->next_rip);
++
++	kvm_rip_write(vcpu, svm->next_rip);
++	svm_set_interrupt_shadow(vcpu, 0);
++}
++
++static void svm_queue_exception(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	unsigned nr = vcpu->arch.exception.nr;
++	bool has_error_code = vcpu->arch.exception.has_error_code;
++	bool reinject = vcpu->arch.exception.injected;
++	u32 error_code = vcpu->arch.exception.error_code;
++
++	/*
++	 * If we are within a nested VM we'd better #VMEXIT and let the guest
++	 * handle the exception
++	 */
++	if (!reinject &&
++	    nested_svm_check_exception(svm, nr, has_error_code, error_code))
++		return;
++
++	if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
++		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
++
++		/*
++		 * For guest debugging where we have to reinject #BP if some
++		 * INT3 is guest-owned:
++		 * Emulate nRIP by moving RIP forward. Will fail if injection
++		 * raises a fault that is not intercepted. Still better than
++		 * failing in all cases.
++		 */
++		skip_emulated_instruction(&svm->vcpu);
++		rip = kvm_rip_read(&svm->vcpu);
++		svm->int3_rip = rip + svm->vmcb->save.cs.base;
++		svm->int3_injected = rip - old_rip;
++	}
++
++	svm->vmcb->control.event_inj = nr
++		| SVM_EVTINJ_VALID
++		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
++		| SVM_EVTINJ_TYPE_EXEPT;
++	svm->vmcb->control.event_inj_err = error_code;
++}
++
++static void svm_init_erratum_383(void)
++{
++	u32 low, high;
++	int err;
++	u64 val;
++
++	if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
++		return;
++
++	/* Use _safe variants to not break nested virtualization */
++	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
++	if (err)
++		return;
++
++	val |= (1ULL << 47);
++
++	low  = lower_32_bits(val);
++	high = upper_32_bits(val);
++
++	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
++
++	erratum_383_found = true;
++}
++
++static void svm_init_osvw(struct kvm_vcpu *vcpu)
++{
++	/*
++	 * Guests should see errata 400 and 415 as fixed (assuming that
++	 * HLT and IO instructions are intercepted).
++	 */
++	vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
++	vcpu->arch.osvw.status = osvw_status & ~(6ULL);
++
++	/*
++	 * By increasing VCPU's osvw.length to 3 we are telling the guest that
++	 * all osvw.status bits inside that length, including bit 0 (which is
++	 * reserved for erratum 298), are valid. However, if host processor's
++	 * osvw_len is 0 then osvw_status[0] carries no information. We need to
++	 * be conservative here and therefore we tell the guest that erratum 298
++	 * is present (because we really don't know).
++	 */
++	if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
++		vcpu->arch.osvw.status |= 1;
++}
++
++static int has_svm(void)
++{
++	const char *msg;
++
++	if (!cpu_has_svm(&msg)) {
++		printk(KERN_INFO "has_svm: %s\n", msg);
++		return 0;
++	}
++
++	return 1;
++}
++
++static void svm_hardware_disable(void)
++{
++	/* Make sure we clean up behind us */
++	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
++		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
++
++	cpu_svm_disable();
++
++	amd_pmu_disable_virt();
++}
++
++static int svm_hardware_enable(void)
++{
++
++	struct svm_cpu_data *sd;
++	uint64_t efer;
++	struct desc_struct *gdt;
++	int me = raw_smp_processor_id();
++
++	rdmsrl(MSR_EFER, efer);
++	if (efer & EFER_SVME)
++		return -EBUSY;
++
++	if (!has_svm()) {
++		pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
++		return -EINVAL;
++	}
++	sd = per_cpu(svm_data, me);
++	if (!sd) {
++		pr_err("%s: svm_data is NULL on %d\n", __func__, me);
++		return -EINVAL;
++	}
++
++	sd->asid_generation = 1;
++	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
++	sd->next_asid = sd->max_asid + 1;
++	sd->min_asid = max_sev_asid + 1;
++
++	gdt = get_current_gdt_rw();
++	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
++
++	wrmsrl(MSR_EFER, efer | EFER_SVME);
++
++	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
++
++	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
++		wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
++		__this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
++	}
++
++
++	/*
++	 * Get OSVW bits.
++	 *
++	 * Note that it is possible to have a system with mixed processor
++	 * revisions and therefore different OSVW bits. If bits are not the same
++	 * on different processors then choose the worst case (i.e. if erratum
++	 * is present on one processor and not on another then assume that the
++	 * erratum is present everywhere).
++	 */
++	if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
++		uint64_t len, status = 0;
++		int err;
++
++		len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
++		if (!err)
++			status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
++						      &err);
++
++		if (err)
++			osvw_status = osvw_len = 0;
++		else {
++			if (len < osvw_len)
++				osvw_len = len;
++			osvw_status |= status;
++			osvw_status &= (1ULL << osvw_len) - 1;
++		}
++	} else
++		osvw_status = osvw_len = 0;
++
++	svm_init_erratum_383();
++
++	amd_pmu_enable_virt();
++
++	return 0;
++}
++
++static void svm_cpu_uninit(int cpu)
++{
++	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
++
++	if (!sd)
++		return;
++
++	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
++	kfree(sd->sev_vmcbs);
++	__free_page(sd->save_area);
++	kfree(sd);
++}
++
++static int svm_cpu_init(int cpu)
++{
++	struct svm_cpu_data *sd;
++
++	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
++	if (!sd)
++		return -ENOMEM;
++	sd->cpu = cpu;
++	sd->save_area = alloc_page(GFP_KERNEL);
++	if (!sd->save_area)
++		goto free_cpu_data;
++
++	if (svm_sev_enabled()) {
++		sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
++					      sizeof(void *),
++					      GFP_KERNEL);
++		if (!sd->sev_vmcbs)
++			goto free_save_area;
++	}
++
++	per_cpu(svm_data, cpu) = sd;
++
++	return 0;
++
++free_save_area:
++	__free_page(sd->save_area);
++free_cpu_data:
++	kfree(sd);
++	return -ENOMEM;
++
++}
++
++static bool valid_msr_intercept(u32 index)
++{
++	int i;
++
++	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
++		if (direct_access_msrs[i].index == index)
++			return true;
++
++	return false;
++}
++
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
++{
++	u8 bit_write;
++	unsigned long tmp;
++	u32 offset;
++	u32 *msrpm;
++
++	msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
++				      to_svm(vcpu)->msrpm;
++
++	offset    = svm_msrpm_offset(msr);
++	bit_write = 2 * (msr & 0x0f) + 1;
++	tmp       = msrpm[offset];
++
++	BUG_ON(offset == MSR_INVALID);
++
++	return !!test_bit(bit_write,  &tmp);
++}
++
++static void set_msr_interception(u32 *msrpm, unsigned msr,
++				 int read, int write)
++{
++	u8 bit_read, bit_write;
++	unsigned long tmp;
++	u32 offset;
++
++	/*
++	 * If this warning triggers extend the direct_access_msrs list at the
++	 * beginning of the file
++	 */
++	WARN_ON(!valid_msr_intercept(msr));
++
++	offset    = svm_msrpm_offset(msr);
++	bit_read  = 2 * (msr & 0x0f);
++	bit_write = 2 * (msr & 0x0f) + 1;
++	tmp       = msrpm[offset];
++
++	BUG_ON(offset == MSR_INVALID);
++
++	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
++	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
++
++	msrpm[offset] = tmp;
++}
++
++static void svm_vcpu_init_msrpm(u32 *msrpm)
++{
++	int i;
++
++	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
++
++	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
++		if (!direct_access_msrs[i].always)
++			continue;
++
++		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
++	}
++}
++
++static void add_msr_offset(u32 offset)
++{
++	int i;
++
++	for (i = 0; i < MSRPM_OFFSETS; ++i) {
++
++		/* Offset already in list? */
++		if (msrpm_offsets[i] == offset)
++			return;
++
++		/* Slot used by another offset? */
++		if (msrpm_offsets[i] != MSR_INVALID)
++			continue;
++
++		/* Add offset to list */
++		msrpm_offsets[i] = offset;
++
++		return;
++	}
++
++	/*
++	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
++	 * increase MSRPM_OFFSETS in this case.
++	 */
++	BUG();
++}
++
++static void init_msrpm_offsets(void)
++{
++	int i;
++
++	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
++
++	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
++		u32 offset;
++
++		offset = svm_msrpm_offset(direct_access_msrs[i].index);
++		BUG_ON(offset == MSR_INVALID);
++
++		add_msr_offset(offset);
++	}
++}
++
++static void svm_enable_lbrv(struct vcpu_svm *svm)
++{
++	u32 *msrpm = svm->msrpm;
++
++	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
++	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
++	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
++	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
++	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
++}
++
++static void svm_disable_lbrv(struct vcpu_svm *svm)
++{
++	u32 *msrpm = svm->msrpm;
++
++	svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
++	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
++	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
++	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
++	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
++}
++
++static void disable_nmi_singlestep(struct vcpu_svm *svm)
++{
++	svm->nmi_singlestep = false;
++
++	if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
++		/* Clear our flags if they were not set by the guest */
++		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
++			svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
++		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
++			svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
++	}
++}
++
++/* Note:
++ * This hash table is used to map VM_ID to a struct kvm_svm,
++ * when handling AMD IOMMU GALOG notification to schedule in
++ * a particular vCPU.
++ */
++#define SVM_VM_DATA_HASH_BITS	8
++static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
++static u32 next_vm_id = 0;
++static bool next_vm_id_wrapped = 0;
++static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
++
++/* Note:
++ * This function is called from IOMMU driver to notify
++ * SVM to schedule in a particular vCPU of a particular VM.
++ */
++static int avic_ga_log_notifier(u32 ga_tag)
++{
++	unsigned long flags;
++	struct kvm_svm *kvm_svm;
++	struct kvm_vcpu *vcpu = NULL;
++	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
++	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
++
++	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
++
++	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
++	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
++		if (kvm_svm->avic_vm_id != vm_id)
++			continue;
++		vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
++		break;
++	}
++	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
++
++	/* Note:
++	 * At this point, the IOMMU should have already set the pending
++	 * bit in the vAPIC backing page. So, we just need to schedule
++	 * in the vcpu.
++	 */
++	if (vcpu)
++		kvm_vcpu_wake_up(vcpu);
++
++	return 0;
++}
++
++static __init int sev_hardware_setup(void)
++{
++	struct sev_user_data_status *status;
++	int rc;
++
++	/* Maximum number of encrypted guests supported simultaneously */
++	max_sev_asid = cpuid_ecx(0x8000001F);
++
++	if (!max_sev_asid)
++		return 1;
++
++	/* Minimum ASID value that should be used for SEV guest */
++	min_sev_asid = cpuid_edx(0x8000001F);
++
++	/* Initialize SEV ASID bitmap */
++	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
++	if (!sev_asid_bitmap)
++		return 1;
++
++	status = kmalloc(sizeof(*status), GFP_KERNEL);
++	if (!status)
++		return 1;
++
++	/*
++	 * Check SEV platform status.
++	 *
++	 * PLATFORM_STATUS can be called in any state, if we failed to query
++	 * the PLATFORM status then either PSP firmware does not support SEV
++	 * feature or SEV firmware is dead.
++	 */
++	rc = sev_platform_status(status, NULL);
++	if (rc)
++		goto err;
++
++	pr_info("SEV supported\n");
++
++err:
++	kfree(status);
++	return rc;
++}
++
++static void grow_ple_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb_control_area *control = &svm->vmcb->control;
++	int old = control->pause_filter_count;
++
++	control->pause_filter_count = __grow_ple_window(old,
++							pause_filter_count,
++							pause_filter_count_grow,
++							pause_filter_count_max);
++
++	if (control->pause_filter_count != old)
++		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
++	trace_kvm_ple_window_grow(vcpu->vcpu_id,
++				  control->pause_filter_count, old);
++}
++
++static void shrink_ple_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb_control_area *control = &svm->vmcb->control;
++	int old = control->pause_filter_count;
++
++	control->pause_filter_count =
++				__shrink_ple_window(old,
++						    pause_filter_count,
++						    pause_filter_count_shrink,
++						    pause_filter_count);
++	if (control->pause_filter_count != old)
++		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
++	trace_kvm_ple_window_shrink(vcpu->vcpu_id,
++				    control->pause_filter_count, old);
++}
++
++static __init int svm_hardware_setup(void)
++{
++	int cpu;
++	struct page *iopm_pages;
++	void *iopm_va;
++	int r;
++
++	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
++
++	if (!iopm_pages)
++		return -ENOMEM;
++
++	iopm_va = page_address(iopm_pages);
++	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
++	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
++
++	init_msrpm_offsets();
++
++	if (boot_cpu_has(X86_FEATURE_NX))
++		kvm_enable_efer_bits(EFER_NX);
++
++	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
++		kvm_enable_efer_bits(EFER_FFXSR);
++
++	if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
++		kvm_has_tsc_control = true;
++		kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
++		kvm_tsc_scaling_ratio_frac_bits = 32;
++	}
++
++	/* Check for pause filtering support */
++	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
++		pause_filter_count = 0;
++		pause_filter_thresh = 0;
++	} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
++		pause_filter_thresh = 0;
++	}
++
++	if (nested) {
++		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
++		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
++	}
++
++	if (sev) {
++		if (boot_cpu_has(X86_FEATURE_SEV) &&
++		    IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
++			r = sev_hardware_setup();
++			if (r)
++				sev = false;
++		} else {
++			sev = false;
++		}
++	}
++
++	for_each_possible_cpu(cpu) {
++		r = svm_cpu_init(cpu);
++		if (r)
++			goto err;
++	}
++
++	if (!boot_cpu_has(X86_FEATURE_NPT))
++		npt_enabled = false;
++
++	if (npt_enabled && !npt) {
++		printk(KERN_INFO "kvm: Nested Paging disabled\n");
++		npt_enabled = false;
++	}
++
++	if (npt_enabled) {
++		printk(KERN_INFO "kvm: Nested Paging enabled\n");
++		kvm_enable_tdp();
++	} else
++		kvm_disable_tdp();
++
++	if (avic) {
++		if (!npt_enabled ||
++		    !boot_cpu_has(X86_FEATURE_AVIC) ||
++		    !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
++			avic = false;
++		} else {
++			pr_info("AVIC enabled\n");
++
++			amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
++		}
++	}
++
++	if (vls) {
++		if (!npt_enabled ||
++		    !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
++		    !IS_ENABLED(CONFIG_X86_64)) {
++			vls = false;
++		} else {
++			pr_info("Virtual VMLOAD VMSAVE supported\n");
++		}
++	}
++
++	if (vgif) {
++		if (!boot_cpu_has(X86_FEATURE_VGIF))
++			vgif = false;
++		else
++			pr_info("Virtual GIF supported\n");
++	}
++
++	return 0;
++
++err:
++	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
++	iopm_base = 0;
++	return r;
++}
++
++static __exit void svm_hardware_unsetup(void)
++{
++	int cpu;
++
++	if (svm_sev_enabled())
++		bitmap_free(sev_asid_bitmap);
++
++	for_each_possible_cpu(cpu)
++		svm_cpu_uninit(cpu);
++
++	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
++	iopm_base = 0;
++}
++
++static void init_seg(struct vmcb_seg *seg)
++{
++	seg->selector = 0;
++	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
++		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
++	seg->limit = 0xffff;
++	seg->base = 0;
++}
++
++static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
++{
++	seg->selector = 0;
++	seg->attrib = SVM_SELECTOR_P_MASK | type;
++	seg->limit = 0xffff;
++	seg->base = 0;
++}
++
++static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (is_guest_mode(vcpu))
++		return svm->nested.hsave->control.tsc_offset;
++
++	return vcpu->arch.tsc_offset;
++}
++
++static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u64 g_tsc_offset = 0;
++
++	if (is_guest_mode(vcpu)) {
++		/* Write L1's TSC offset.  */
++		g_tsc_offset = svm->vmcb->control.tsc_offset -
++			       svm->nested.hsave->control.tsc_offset;
++		svm->nested.hsave->control.tsc_offset = offset;
++	} else
++		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
++					   svm->vmcb->control.tsc_offset,
++					   offset);
++
++	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
++
++	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++	return svm->vmcb->control.tsc_offset;
++}
++
++static void avic_init_vmcb(struct vcpu_svm *svm)
++{
++	struct vmcb *vmcb = svm->vmcb;
++	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
++	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
++	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
++	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
++
++	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
++	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
++	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
++	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
++	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
++}
++
++static void init_vmcb(struct vcpu_svm *svm)
++{
++	struct vmcb_control_area *control = &svm->vmcb->control;
++	struct vmcb_save_area *save = &svm->vmcb->save;
++
++	svm->vcpu.arch.hflags = 0;
++
++	set_cr_intercept(svm, INTERCEPT_CR0_READ);
++	set_cr_intercept(svm, INTERCEPT_CR3_READ);
++	set_cr_intercept(svm, INTERCEPT_CR4_READ);
++	set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
++	set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
++	set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
++	if (!kvm_vcpu_apicv_active(&svm->vcpu))
++		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
++
++	set_dr_intercepts(svm);
++
++	set_exception_intercept(svm, PF_VECTOR);
++	set_exception_intercept(svm, UD_VECTOR);
++	set_exception_intercept(svm, MC_VECTOR);
++	set_exception_intercept(svm, AC_VECTOR);
++	set_exception_intercept(svm, DB_VECTOR);
++	/*
++	 * Guest access to VMware backdoor ports could legitimately
++	 * trigger #GP because of TSS I/O permission bitmap.
++	 * We intercept those #GP and allow access to them anyway
++	 * as VMware does.
++	 */
++	if (enable_vmware_backdoor)
++		set_exception_intercept(svm, GP_VECTOR);
++
++	set_intercept(svm, INTERCEPT_INTR);
++	set_intercept(svm, INTERCEPT_NMI);
++	set_intercept(svm, INTERCEPT_SMI);
++	set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
++	set_intercept(svm, INTERCEPT_RDPMC);
++	set_intercept(svm, INTERCEPT_CPUID);
++	set_intercept(svm, INTERCEPT_INVD);
++	set_intercept(svm, INTERCEPT_INVLPG);
++	set_intercept(svm, INTERCEPT_INVLPGA);
++	set_intercept(svm, INTERCEPT_IOIO_PROT);
++	set_intercept(svm, INTERCEPT_MSR_PROT);
++	set_intercept(svm, INTERCEPT_TASK_SWITCH);
++	set_intercept(svm, INTERCEPT_SHUTDOWN);
++	set_intercept(svm, INTERCEPT_VMRUN);
++	set_intercept(svm, INTERCEPT_VMMCALL);
++	set_intercept(svm, INTERCEPT_VMLOAD);
++	set_intercept(svm, INTERCEPT_VMSAVE);
++	set_intercept(svm, INTERCEPT_STGI);
++	set_intercept(svm, INTERCEPT_CLGI);
++	set_intercept(svm, INTERCEPT_SKINIT);
++	set_intercept(svm, INTERCEPT_WBINVD);
++	set_intercept(svm, INTERCEPT_XSETBV);
++	set_intercept(svm, INTERCEPT_RSM);
++
++	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
++		set_intercept(svm, INTERCEPT_MONITOR);
++		set_intercept(svm, INTERCEPT_MWAIT);
++	}
++
++	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
++		set_intercept(svm, INTERCEPT_HLT);
++
++	control->iopm_base_pa = __sme_set(iopm_base);
++	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
++	control->int_ctl = V_INTR_MASKING_MASK;
++
++	init_seg(&save->es);
++	init_seg(&save->ss);
++	init_seg(&save->ds);
++	init_seg(&save->fs);
++	init_seg(&save->gs);
++
++	save->cs.selector = 0xf000;
++	save->cs.base = 0xffff0000;
++	/* Executable/Readable Code Segment */
++	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
++		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
++	save->cs.limit = 0xffff;
++
++	save->gdtr.limit = 0xffff;
++	save->idtr.limit = 0xffff;
++
++	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
++	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
++
++	svm_set_efer(&svm->vcpu, 0);
++	save->dr6 = 0xffff0ff0;
++	kvm_set_rflags(&svm->vcpu, 2);
++	save->rip = 0x0000fff0;
++	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
++
++	/*
++	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
++	 * It also updates the guest-visible cr0 value.
++	 */
++	svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
++	kvm_mmu_reset_context(&svm->vcpu);
++
++	save->cr4 = X86_CR4_PAE;
++	/* rdx = ?? */
++
++	if (npt_enabled) {
++		/* Setup VMCB for Nested Paging */
++		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
++		clr_intercept(svm, INTERCEPT_INVLPG);
++		clr_exception_intercept(svm, PF_VECTOR);
++		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
++		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
++		save->g_pat = svm->vcpu.arch.pat;
++		save->cr3 = 0;
++		save->cr4 = 0;
++	}
++	svm->asid_generation = 0;
++
++	svm->nested.vmcb = 0;
++	svm->vcpu.arch.hflags = 0;
++
++	if (pause_filter_count) {
++		control->pause_filter_count = pause_filter_count;
++		if (pause_filter_thresh)
++			control->pause_filter_thresh = pause_filter_thresh;
++		set_intercept(svm, INTERCEPT_PAUSE);
++	} else {
++		clr_intercept(svm, INTERCEPT_PAUSE);
++	}
++
++	if (kvm_vcpu_apicv_active(&svm->vcpu))
++		avic_init_vmcb(svm);
++
++	/*
++	 * If hardware supports Virtual VMLOAD VMSAVE then enable it
++	 * in VMCB and clear intercepts to avoid #VMEXIT.
++	 */
++	if (vls) {
++		clr_intercept(svm, INTERCEPT_VMLOAD);
++		clr_intercept(svm, INTERCEPT_VMSAVE);
++		svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
++	}
++
++	if (vgif) {
++		clr_intercept(svm, INTERCEPT_STGI);
++		clr_intercept(svm, INTERCEPT_CLGI);
++		svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
++	}
++
++	if (sev_guest(svm->vcpu.kvm)) {
++		svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
++		clr_exception_intercept(svm, UD_VECTOR);
++	}
++
++	mark_all_dirty(svm->vmcb);
++
++	enable_gif(svm);
++
++}
++
++static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
++				       unsigned int index)
++{
++	u64 *avic_physical_id_table;
++	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
++
++	if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
++		return NULL;
++
++	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
++
++	return &avic_physical_id_table[index];
++}
++
++/**
++ * Note:
++ * AVIC hardware walks the nested page table to check permissions,
++ * but does not use the SPA address specified in the leaf page
++ * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
++ * field of the VMCB. Therefore, we set up the
++ * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
++ */
++static int avic_init_access_page(struct kvm_vcpu *vcpu)
++{
++	struct kvm *kvm = vcpu->kvm;
++	int ret = 0;
++
++	mutex_lock(&kvm->slots_lock);
++	if (kvm->arch.apic_access_page_done)
++		goto out;
++
++	ret = __x86_set_memory_region(kvm,
++				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
++				      APIC_DEFAULT_PHYS_BASE,
++				      PAGE_SIZE);
++	if (ret)
++		goto out;
++
++	kvm->arch.apic_access_page_done = true;
++out:
++	mutex_unlock(&kvm->slots_lock);
++	return ret;
++}
++
++static int avic_init_backing_page(struct kvm_vcpu *vcpu)
++{
++	int ret;
++	u64 *entry, new_entry;
++	int id = vcpu->vcpu_id;
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	ret = avic_init_access_page(vcpu);
++	if (ret)
++		return ret;
++
++	if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
++		return -EINVAL;
++
++	if (!svm->vcpu.arch.apic->regs)
++		return -EINVAL;
++
++	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
++
++	/* Setting AVIC backing page address in the phy APIC ID table */
++	entry = avic_get_physical_id_entry(vcpu, id);
++	if (!entry)
++		return -EINVAL;
++
++	new_entry = READ_ONCE(*entry);
++	new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
++			      AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
++			      AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
++	WRITE_ONCE(*entry, new_entry);
++
++	svm->avic_physical_id_cache = entry;
++
++	return 0;
++}
++
++static void __sev_asid_free(int asid)
++{
++	struct svm_cpu_data *sd;
++	int cpu, pos;
++
++	pos = asid - 1;
++	clear_bit(pos, sev_asid_bitmap);
++
++	for_each_possible_cpu(cpu) {
++		sd = per_cpu(svm_data, cpu);
++		sd->sev_vmcbs[pos] = NULL;
++	}
++}
++
++static void sev_asid_free(struct kvm *kvm)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++
++	__sev_asid_free(sev->asid);
++}
++
++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
++{
++	struct sev_data_decommission *decommission;
++	struct sev_data_deactivate *data;
++
++	if (!handle)
++		return;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return;
++
++	/* deactivate handle */
++	data->handle = handle;
++	sev_guest_deactivate(data, NULL);
++
++	wbinvd_on_all_cpus();
++	sev_guest_df_flush(NULL);
++	kfree(data);
++
++	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
++	if (!decommission)
++		return;
++
++	/* decommission handle */
++	decommission->handle = handle;
++	sev_guest_decommission(decommission, NULL);
++
++	kfree(decommission);
++}
++
++static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
++				    unsigned long ulen, unsigned long *n,
++				    int write)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	unsigned long npages, npinned, size;
++	unsigned long locked, lock_limit;
++	struct page **pages;
++	unsigned long first, last;
++
++	if (ulen == 0 || uaddr + ulen < uaddr)
++		return NULL;
++
++	/* Calculate number of pages. */
++	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
++	last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
++	npages = (last - first + 1);
++
++	locked = sev->pages_locked + npages;
++	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
++	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
++		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
++		return NULL;
++	}
++
++	/* Avoid using vmalloc for smaller buffers. */
++	size = npages * sizeof(struct page *);
++	if (size > PAGE_SIZE)
++		pages = vmalloc(size);
++	else
++		pages = kmalloc(size, GFP_KERNEL);
++
++	if (!pages)
++		return NULL;
++
++	/* Pin the user virtual address. */
++	npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
++	if (npinned != npages) {
++		pr_err("SEV: Failure locking %lu pages.\n", npages);
++		goto err;
++	}
++
++	*n = npages;
++	sev->pages_locked = locked;
++
++	return pages;
++
++err:
++	if (npinned > 0)
++		release_pages(pages, npinned);
++
++	kvfree(pages);
++	return NULL;
++}
++
++static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
++			     unsigned long npages)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++
++	release_pages(pages, npages);
++	kvfree(pages);
++	sev->pages_locked -= npages;
++}
++
++static void sev_clflush_pages(struct page *pages[], unsigned long npages)
++{
++	uint8_t *page_virtual;
++	unsigned long i;
++
++	if (npages == 0 || pages == NULL)
++		return;
++
++	for (i = 0; i < npages; i++) {
++		page_virtual = kmap_atomic(pages[i]);
++		clflush_cache_range(page_virtual, PAGE_SIZE);
++		kunmap_atomic(page_virtual);
++	}
++}
++
++static void __unregister_enc_region_locked(struct kvm *kvm,
++					   struct enc_region *region)
++{
++	/*
++	 * The guest may change the memory encryption attribute from C=0 -> C=1
++	 * or vice versa for this memory range. Lets make sure caches are
++	 * flushed to ensure that guest data gets written into memory with
++	 * correct C-bit.
++	 */
++	sev_clflush_pages(region->pages, region->npages);
++
++	sev_unpin_memory(kvm, region->pages, region->npages);
++	list_del(&region->list);
++	kfree(region);
++}
++
++static struct kvm *svm_vm_alloc(void)
++{
++	struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
++	return &kvm_svm->kvm;
++}
++
++static void svm_vm_free(struct kvm *kvm)
++{
++	vfree(to_kvm_svm(kvm));
++}
++
++static void sev_vm_destroy(struct kvm *kvm)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct list_head *head = &sev->regions_list;
++	struct list_head *pos, *q;
++
++	if (!sev_guest(kvm))
++		return;
++
++	mutex_lock(&kvm->lock);
++
++	/*
++	 * if userspace was terminated before unregistering the memory regions
++	 * then lets unpin all the registered memory.
++	 */
++	if (!list_empty(head)) {
++		list_for_each_safe(pos, q, head) {
++			__unregister_enc_region_locked(kvm,
++				list_entry(pos, struct enc_region, list));
++		}
++	}
++
++	mutex_unlock(&kvm->lock);
++
++	sev_unbind_asid(kvm, sev->handle);
++	sev_asid_free(kvm);
++}
++
++static void avic_vm_destroy(struct kvm *kvm)
++{
++	unsigned long flags;
++	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
++
++	if (!avic)
++		return;
++
++	if (kvm_svm->avic_logical_id_table_page)
++		__free_page(kvm_svm->avic_logical_id_table_page);
++	if (kvm_svm->avic_physical_id_table_page)
++		__free_page(kvm_svm->avic_physical_id_table_page);
++
++	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
++	hash_del(&kvm_svm->hnode);
++	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
++}
++
++static void svm_vm_destroy(struct kvm *kvm)
++{
++	avic_vm_destroy(kvm);
++	sev_vm_destroy(kvm);
++}
++
++static int avic_vm_init(struct kvm *kvm)
++{
++	unsigned long flags;
++	int err = -ENOMEM;
++	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
++	struct kvm_svm *k2;
++	struct page *p_page;
++	struct page *l_page;
++	u32 vm_id;
++
++	if (!avic)
++		return 0;
++
++	/* Allocating physical APIC ID table (4KB) */
++	p_page = alloc_page(GFP_KERNEL);
++	if (!p_page)
++		goto free_avic;
++
++	kvm_svm->avic_physical_id_table_page = p_page;
++	clear_page(page_address(p_page));
++
++	/* Allocating logical APIC ID table (4KB) */
++	l_page = alloc_page(GFP_KERNEL);
++	if (!l_page)
++		goto free_avic;
++
++	kvm_svm->avic_logical_id_table_page = l_page;
++	clear_page(page_address(l_page));
++
++	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
++ again:
++	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
++	if (vm_id == 0) { /* id is 1-based, zero is not okay */
++		next_vm_id_wrapped = 1;
++		goto again;
++	}
++	/* Is it still in use? Only possible if wrapped at least once */
++	if (next_vm_id_wrapped) {
++		hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
++			if (k2->avic_vm_id == vm_id)
++				goto again;
++		}
++	}
++	kvm_svm->avic_vm_id = vm_id;
++	hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
++	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
++
++	return 0;
++
++free_avic:
++	avic_vm_destroy(kvm);
++	return err;
++}
++
++static inline int
++avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
++{
++	int ret = 0;
++	unsigned long flags;
++	struct amd_svm_iommu_ir *ir;
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (!kvm_arch_has_assigned_device(vcpu->kvm))
++		return 0;
++
++	/*
++	 * Here, we go through the per-vcpu ir_list to update all existing
++	 * interrupt remapping table entry targeting this vcpu.
++	 */
++	spin_lock_irqsave(&svm->ir_list_lock, flags);
++
++	if (list_empty(&svm->ir_list))
++		goto out;
++
++	list_for_each_entry(ir, &svm->ir_list, node) {
++		ret = amd_iommu_update_ga(cpu, r, ir->data);
++		if (ret)
++			break;
++	}
++out:
++	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
++	return ret;
++}
++
++static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
++{
++	u64 entry;
++	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
++	int h_physical_id = kvm_cpu_get_apicid(cpu);
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (!kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	/*
++	 * Since the host physical APIC id is 8 bits,
++	 * we can support host APIC ID upto 255.
++	 */
++	if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
++		return;
++
++	entry = READ_ONCE(*(svm->avic_physical_id_cache));
++	WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
++
++	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
++	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
++
++	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
++	if (svm->avic_is_running)
++		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
++
++	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
++	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
++					svm->avic_is_running);
++}
++
++static void avic_vcpu_put(struct kvm_vcpu *vcpu)
++{
++	u64 entry;
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (!kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	entry = READ_ONCE(*(svm->avic_physical_id_cache));
++	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
++		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
++
++	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
++	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
++}
++
++/**
++ * This function is called during VCPU halt/unhalt.
++ */
++static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->avic_is_running = is_run;
++	if (is_run)
++		avic_vcpu_load(vcpu, vcpu->cpu);
++	else
++		avic_vcpu_put(vcpu);
++}
++
++static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u32 dummy;
++	u32 eax = 1;
++
++	vcpu->arch.microcode_version = 0x01000065;
++	svm->spec_ctrl = 0;
++	svm->virt_spec_ctrl = 0;
++
++	if (!init_event) {
++		svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
++					   MSR_IA32_APICBASE_ENABLE;
++		if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
++			svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
++	}
++	init_vmcb(svm);
++
++	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
++	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
++
++	if (kvm_vcpu_apicv_active(vcpu) && !init_event)
++		avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
++}
++
++static int avic_init_vcpu(struct vcpu_svm *svm)
++{
++	int ret;
++
++	if (!kvm_vcpu_apicv_active(&svm->vcpu))
++		return 0;
++
++	ret = avic_init_backing_page(&svm->vcpu);
++	if (ret)
++		return ret;
++
++	INIT_LIST_HEAD(&svm->ir_list);
++	spin_lock_init(&svm->ir_list_lock);
++
++	return ret;
++}
++
++static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
++{
++	struct vcpu_svm *svm;
++	struct page *page;
++	struct page *msrpm_pages;
++	struct page *hsave_page;
++	struct page *nested_msrpm_pages;
++	int err;
++
++	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
++	if (!svm) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
++	if (err)
++		goto free_svm;
++
++	err = -ENOMEM;
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		goto uninit;
++
++	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
++	if (!msrpm_pages)
++		goto free_page1;
++
++	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
++	if (!nested_msrpm_pages)
++		goto free_page2;
++
++	hsave_page = alloc_page(GFP_KERNEL);
++	if (!hsave_page)
++		goto free_page3;
++
++	err = avic_init_vcpu(svm);
++	if (err)
++		goto free_page4;
++
++	/* We initialize this flag to true to make sure that the is_running
++	 * bit would be set the first time the vcpu is loaded.
++	 */
++	svm->avic_is_running = true;
++
++	svm->nested.hsave = page_address(hsave_page);
++
++	svm->msrpm = page_address(msrpm_pages);
++	svm_vcpu_init_msrpm(svm->msrpm);
++
++	svm->nested.msrpm = page_address(nested_msrpm_pages);
++	svm_vcpu_init_msrpm(svm->nested.msrpm);
++
++	svm->vmcb = page_address(page);
++	clear_page(svm->vmcb);
++	svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
++	svm->asid_generation = 0;
++	init_vmcb(svm);
++
++	svm_init_osvw(&svm->vcpu);
++
++	return &svm->vcpu;
++
++free_page4:
++	__free_page(hsave_page);
++free_page3:
++	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page2:
++	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
++free_page1:
++	__free_page(page);
++uninit:
++	kvm_vcpu_uninit(&svm->vcpu);
++free_svm:
++	kmem_cache_free(kvm_vcpu_cache, svm);
++out:
++	return ERR_PTR(err);
++}
++
++static void svm_clear_current_vmcb(struct vmcb *vmcb)
++{
++	int i;
++
++	for_each_online_cpu(i)
++		cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
++}
++
++static void svm_free_vcpu(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	/*
++	 * The vmcb page can be recycled, causing a false negative in
++	 * svm_vcpu_load(). So, ensure that no logical CPU has this
++	 * vmcb page recorded as its current vmcb.
++	 */
++	svm_clear_current_vmcb(svm->vmcb);
++
++	__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
++	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
++	__free_page(virt_to_page(svm->nested.hsave));
++	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
++	kvm_vcpu_uninit(vcpu);
++	kmem_cache_free(kvm_vcpu_cache, svm);
++}
++
++static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
++	int i;
++
++	if (unlikely(cpu != vcpu->cpu)) {
++		svm->asid_generation = 0;
++		mark_all_dirty(svm->vmcb);
++	}
++
++#ifdef CONFIG_X86_64
++	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
++#endif
++	savesegment(fs, svm->host.fs);
++	savesegment(gs, svm->host.gs);
++	svm->host.ldt = kvm_read_ldt();
++
++	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
++		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
++
++	if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
++		u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
++		if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
++			__this_cpu_write(current_tsc_ratio, tsc_ratio);
++			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
++		}
++	}
++	/* This assumes that the kernel never uses MSR_TSC_AUX */
++	if (static_cpu_has(X86_FEATURE_RDTSCP))
++		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
++
++	if (sd->current_vmcb != svm->vmcb) {
++		sd->current_vmcb = svm->vmcb;
++		indirect_branch_prediction_barrier();
++	}
++	avic_vcpu_load(vcpu, cpu);
++}
++
++static void svm_vcpu_put(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	int i;
++
++	avic_vcpu_put(vcpu);
++
++	++vcpu->stat.host_state_reload;
++	kvm_load_ldt(svm->host.ldt);
++#ifdef CONFIG_X86_64
++	loadsegment(fs, svm->host.fs);
++	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
++	load_gs_index(svm->host.gs);
++#else
++#ifdef CONFIG_X86_32_LAZY_GS
++	loadsegment(gs, svm->host.gs);
++#endif
++#endif
++	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
++		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
++}
++
++static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
++{
++	avic_set_running(vcpu, false);
++}
++
++static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
++{
++	avic_set_running(vcpu, true);
++}
++
++static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	unsigned long rflags = svm->vmcb->save.rflags;
++
++	if (svm->nmi_singlestep) {
++		/* Hide our flags if they were not set by the guest */
++		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
++			rflags &= ~X86_EFLAGS_TF;
++		if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
++			rflags &= ~X86_EFLAGS_RF;
++	}
++	return rflags;
++}
++
++static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
++{
++	if (to_svm(vcpu)->nmi_singlestep)
++		rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
++
++       /*
++        * Any change of EFLAGS.VM is accompanied by a reload of SS
++        * (caused by either a task switch or an inter-privilege IRET),
++        * so we do not need to update the CPL here.
++        */
++	to_svm(vcpu)->vmcb->save.rflags = rflags;
++}
++
++static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
++{
++	switch (reg) {
++	case VCPU_EXREG_PDPTR:
++		BUG_ON(!npt_enabled);
++		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
++		break;
++	default:
++		BUG();
++	}
++}
++
++static void svm_set_vintr(struct vcpu_svm *svm)
++{
++	set_intercept(svm, INTERCEPT_VINTR);
++}
++
++static void svm_clear_vintr(struct vcpu_svm *svm)
++{
++	clr_intercept(svm, INTERCEPT_VINTR);
++}
++
++static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
++{
++	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
++
++	switch (seg) {
++	case VCPU_SREG_CS: return &save->cs;
++	case VCPU_SREG_DS: return &save->ds;
++	case VCPU_SREG_ES: return &save->es;
++	case VCPU_SREG_FS: return &save->fs;
++	case VCPU_SREG_GS: return &save->gs;
++	case VCPU_SREG_SS: return &save->ss;
++	case VCPU_SREG_TR: return &save->tr;
++	case VCPU_SREG_LDTR: return &save->ldtr;
++	}
++	BUG();
++	return NULL;
++}
++
++static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
++{
++	struct vmcb_seg *s = svm_seg(vcpu, seg);
++
++	return s->base;
++}
++
++static void svm_get_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg)
++{
++	struct vmcb_seg *s = svm_seg(vcpu, seg);
++
++	var->base = s->base;
++	var->limit = s->limit;
++	var->selector = s->selector;
++	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
++	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
++	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
++	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
++	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
++	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
++	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
++
++	/*
++	 * AMD CPUs circa 2014 track the G bit for all segments except CS.
++	 * However, the SVM spec states that the G bit is not observed by the
++	 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
++	 * So let's synthesize a legal G bit for all segments, this helps
++	 * running KVM nested. It also helps cross-vendor migration, because
++	 * Intel's vmentry has a check on the 'G' bit.
++	 */
++	var->g = s->limit > 0xfffff;
++
++	/*
++	 * AMD's VMCB does not have an explicit unusable field, so emulate it
++	 * for cross vendor migration purposes by "not present"
++	 */
++	var->unusable = !var->present;
++
++	switch (seg) {
++	case VCPU_SREG_TR:
++		/*
++		 * Work around a bug where the busy flag in the tr selector
++		 * isn't exposed
++		 */
++		var->type |= 0x2;
++		break;
++	case VCPU_SREG_DS:
++	case VCPU_SREG_ES:
++	case VCPU_SREG_FS:
++	case VCPU_SREG_GS:
++		/*
++		 * The accessed bit must always be set in the segment
++		 * descriptor cache, although it can be cleared in the
++		 * descriptor, the cached bit always remains at 1. Since
++		 * Intel has a check on this, set it here to support
++		 * cross-vendor migration.
++		 */
++		if (!var->unusable)
++			var->type |= 0x1;
++		break;
++	case VCPU_SREG_SS:
++		/*
++		 * On AMD CPUs sometimes the DB bit in the segment
++		 * descriptor is left as 1, although the whole segment has
++		 * been made unusable. Clear it here to pass an Intel VMX
++		 * entry check when cross vendor migrating.
++		 */
++		if (var->unusable)
++			var->db = 0;
++		/* This is symmetric with svm_set_segment() */
++		var->dpl = to_svm(vcpu)->vmcb->save.cpl;
++		break;
++	}
++}
++
++static int svm_get_cpl(struct kvm_vcpu *vcpu)
++{
++	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
++
++	return save->cpl;
++}
++
++static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	dt->size = svm->vmcb->save.idtr.limit;
++	dt->address = svm->vmcb->save.idtr.base;
++}
++
++static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.idtr.limit = dt->size;
++	svm->vmcb->save.idtr.base = dt->address ;
++	mark_dirty(svm->vmcb, VMCB_DT);
++}
++
++static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	dt->size = svm->vmcb->save.gdtr.limit;
++	dt->address = svm->vmcb->save.gdtr.base;
++}
++
++static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.gdtr.limit = dt->size;
++	svm->vmcb->save.gdtr.base = dt->address ;
++	mark_dirty(svm->vmcb, VMCB_DT);
++}
++
++static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
++{
++}
++
++static void svm_decache_cr3(struct kvm_vcpu *vcpu)
++{
++}
++
++static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
++{
++}
++
++static void update_cr0_intercept(struct vcpu_svm *svm)
++{
++	ulong gcr0 = svm->vcpu.arch.cr0;
++	u64 *hcr0 = &svm->vmcb->save.cr0;
++
++	*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
++		| (gcr0 & SVM_CR0_SELECTIVE_MASK);
++
++	mark_dirty(svm->vmcb, VMCB_CR);
++
++	if (gcr0 == *hcr0) {
++		clr_cr_intercept(svm, INTERCEPT_CR0_READ);
++		clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
++	} else {
++		set_cr_intercept(svm, INTERCEPT_CR0_READ);
++		set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
++	}
++}
++
++static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++#ifdef CONFIG_X86_64
++	if (vcpu->arch.efer & EFER_LME) {
++		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
++			vcpu->arch.efer |= EFER_LMA;
++			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
++		}
++
++		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
++			vcpu->arch.efer &= ~EFER_LMA;
++			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
++		}
++	}
++#endif
++	vcpu->arch.cr0 = cr0;
++
++	if (!npt_enabled)
++		cr0 |= X86_CR0_PG | X86_CR0_WP;
++
++	/*
++	 * re-enable caching here because the QEMU bios
++	 * does not do it - this results in some delay at
++	 * reboot
++	 */
++	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
++		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
++	svm->vmcb->save.cr0 = cr0;
++	mark_dirty(svm->vmcb, VMCB_CR);
++	update_cr0_intercept(svm);
++}
++
++static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++	unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
++	unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
++
++	if (cr4 & X86_CR4_VMXE)
++		return 1;
++
++	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
++		svm_flush_tlb(vcpu, true);
++
++	vcpu->arch.cr4 = cr4;
++	if (!npt_enabled)
++		cr4 |= X86_CR4_PAE;
++	cr4 |= host_cr4_mce;
++	to_svm(vcpu)->vmcb->save.cr4 = cr4;
++	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
++	return 0;
++}
++
++static void svm_set_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb_seg *s = svm_seg(vcpu, seg);
++
++	s->base = var->base;
++	s->limit = var->limit;
++	s->selector = var->selector;
++	s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
++	s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
++	s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
++	s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
++	s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
++	s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
++	s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
++	s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
++
++	/*
++	 * This is always accurate, except if SYSRET returned to a segment
++	 * with SS.DPL != 3.  Intel does not have this quirk, and always
++	 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
++	 * would entail passing the CPL to userspace and back.
++	 */
++	if (seg == VCPU_SREG_SS)
++		/* This is symmetric with svm_get_segment() */
++		svm->vmcb->save.cpl = (var->dpl & 3);
++
++	mark_dirty(svm->vmcb, VMCB_SEG);
++}
++
++static void update_bp_intercept(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	clr_exception_intercept(svm, BP_VECTOR);
++
++	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
++		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
++			set_exception_intercept(svm, BP_VECTOR);
++	} else
++		vcpu->guest_debug = 0;
++}
++
++static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
++{
++	if (sd->next_asid > sd->max_asid) {
++		++sd->asid_generation;
++		sd->next_asid = sd->min_asid;
++		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
++	}
++
++	svm->asid_generation = sd->asid_generation;
++	svm->vmcb->control.asid = sd->next_asid++;
++
++	mark_dirty(svm->vmcb, VMCB_ASID);
++}
++
++static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
++{
++	return to_svm(vcpu)->vmcb->save.dr6;
++}
++
++static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.dr6 = value;
++	mark_dirty(svm->vmcb, VMCB_DR);
++}
++
++static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	get_debugreg(vcpu->arch.db[0], 0);
++	get_debugreg(vcpu->arch.db[1], 1);
++	get_debugreg(vcpu->arch.db[2], 2);
++	get_debugreg(vcpu->arch.db[3], 3);
++	vcpu->arch.dr6 = svm_get_dr6(vcpu);
++	vcpu->arch.dr7 = svm->vmcb->save.dr7;
++
++	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
++	set_dr_intercepts(svm);
++}
++
++static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.dr7 = value;
++	mark_dirty(svm->vmcb, VMCB_DR);
++}
++
++static int pf_interception(struct vcpu_svm *svm)
++{
++	u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
++	u64 error_code = svm->vmcb->control.exit_info_1;
++
++	return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
++			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
++			svm->vmcb->control.insn_bytes : NULL,
++			svm->vmcb->control.insn_len);
++}
++
++static int npf_interception(struct vcpu_svm *svm)
++{
++	u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
++	u64 error_code = svm->vmcb->control.exit_info_1;
++
++	trace_kvm_page_fault(fault_address, error_code);
++	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
++			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
++			svm->vmcb->control.insn_bytes : NULL,
++			svm->vmcb->control.insn_len);
++}
++
++static int db_interception(struct vcpu_svm *svm)
++{
++	struct kvm_run *kvm_run = svm->vcpu.run;
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++
++	if (!(svm->vcpu.guest_debug &
++	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
++		!svm->nmi_singlestep) {
++		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
++		return 1;
++	}
++
++	if (svm->nmi_singlestep) {
++		disable_nmi_singlestep(svm);
++		/* Make sure we check for pending NMIs upon entry */
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++	}
++
++	if (svm->vcpu.guest_debug &
++	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
++		kvm_run->exit_reason = KVM_EXIT_DEBUG;
++		kvm_run->debug.arch.pc =
++			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
++		kvm_run->debug.arch.exception = DB_VECTOR;
++		return 0;
++	}
++
++	return 1;
++}
++
++static int bp_interception(struct vcpu_svm *svm)
++{
++	struct kvm_run *kvm_run = svm->vcpu.run;
++
++	kvm_run->exit_reason = KVM_EXIT_DEBUG;
++	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
++	kvm_run->debug.arch.exception = BP_VECTOR;
++	return 0;
++}
++
++static int ud_interception(struct vcpu_svm *svm)
++{
++	return handle_ud(&svm->vcpu);
++}
++
++static int ac_interception(struct vcpu_svm *svm)
++{
++	kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
++	return 1;
++}
++
++static int gp_interception(struct vcpu_svm *svm)
++{
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++	u32 error_code = svm->vmcb->control.exit_info_1;
++	int er;
++
++	WARN_ON_ONCE(!enable_vmware_backdoor);
++
++	er = kvm_emulate_instruction(vcpu,
++		EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
++	if (er == EMULATE_USER_EXIT)
++		return 0;
++	else if (er != EMULATE_DONE)
++		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
++	return 1;
++}
++
++static bool is_erratum_383(void)
++{
++	int err, i;
++	u64 value;
++
++	if (!erratum_383_found)
++		return false;
++
++	value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
++	if (err)
++		return false;
++
++	/* Bit 62 may or may not be set for this mce */
++	value &= ~(1ULL << 62);
++
++	if (value != 0xb600000000010015ULL)
++		return false;
++
++	/* Clear MCi_STATUS registers */
++	for (i = 0; i < 6; ++i)
++		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
++
++	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
++	if (!err) {
++		u32 low, high;
++
++		value &= ~(1ULL << 2);
++		low    = lower_32_bits(value);
++		high   = upper_32_bits(value);
++
++		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
++	}
++
++	/* Flush tlb to evict multi-match entries */
++	__flush_tlb_all();
++
++	return true;
++}
++
++static void svm_handle_mce(struct vcpu_svm *svm)
++{
++	if (is_erratum_383()) {
++		/*
++		 * Erratum 383 triggered. Guest state is corrupt so kill the
++		 * guest.
++		 */
++		pr_err("KVM: Guest triggered AMD Erratum 383\n");
++
++		kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
++
++		return;
++	}
++
++	/*
++	 * On an #MC intercept the MCE handler is not called automatically in
++	 * the host. So do it by hand here.
++	 */
++	asm volatile (
++		"int $0x12\n");
++	/* not sure if we ever come back to this point */
++
++	return;
++}
++
++static int mc_interception(struct vcpu_svm *svm)
++{
++	return 1;
++}
++
++static int shutdown_interception(struct vcpu_svm *svm)
++{
++	struct kvm_run *kvm_run = svm->vcpu.run;
++
++	/*
++	 * VMCB is undefined after a SHUTDOWN intercept
++	 * so reinitialize it.
++	 */
++	clear_page(svm->vmcb);
++	init_vmcb(svm);
++
++	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
++	return 0;
++}
++
++static int io_interception(struct vcpu_svm *svm)
++{
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
++	int size, in, string;
++	unsigned port;
++
++	++svm->vcpu.stat.io_exits;
++	string = (io_info & SVM_IOIO_STR_MASK) != 0;
++	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
++	if (string)
++		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
++
++	port = io_info >> 16;
++	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
++	svm->next_rip = svm->vmcb->control.exit_info_2;
++
++	return kvm_fast_pio(&svm->vcpu, size, port, in);
++}
++
++static int nmi_interception(struct vcpu_svm *svm)
++{
++	return 1;
++}
++
++static int intr_interception(struct vcpu_svm *svm)
++{
++	++svm->vcpu.stat.irq_exits;
++	return 1;
++}
++
++static int nop_on_interception(struct vcpu_svm *svm)
++{
++	return 1;
++}
++
++static int halt_interception(struct vcpu_svm *svm)
++{
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
++	return kvm_emulate_halt(&svm->vcpu);
++}
++
++static int vmmcall_interception(struct vcpu_svm *svm)
++{
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	return kvm_emulate_hypercall(&svm->vcpu);
++}
++
++static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	return svm->nested.nested_cr3;
++}
++
++static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u64 cr3 = svm->nested.nested_cr3;
++	u64 pdpte;
++	int ret;
++
++	ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
++				       offset_in_page(cr3) + index * 8, 8);
++	if (ret)
++		return 0;
++	return pdpte;
++}
++
++static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
++				   unsigned long root)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->control.nested_cr3 = __sme_set(root);
++	mark_dirty(svm->vmcb, VMCB_NPT);
++}
++
++static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
++				       struct x86_exception *fault)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
++		/*
++		 * TODO: track the cause of the nested page fault, and
++		 * correctly fill in the high bits of exit_info_1.
++		 */
++		svm->vmcb->control.exit_code = SVM_EXIT_NPF;
++		svm->vmcb->control.exit_code_hi = 0;
++		svm->vmcb->control.exit_info_1 = (1ULL << 32);
++		svm->vmcb->control.exit_info_2 = fault->address;
++	}
++
++	svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
++	svm->vmcb->control.exit_info_1 |= fault->error_code;
++
++	/*
++	 * The present bit is always zero for page structure faults on real
++	 * hardware.
++	 */
++	if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
++		svm->vmcb->control.exit_info_1 &= ~1;
++
++	nested_svm_vmexit(svm);
++}
++
++static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
++{
++	WARN_ON(mmu_is_nested(vcpu));
++	kvm_init_shadow_mmu(vcpu);
++	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
++	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
++	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
++	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
++	vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
++	reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
++	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
++}
++
++static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
++}
++
++static int nested_svm_check_permissions(struct vcpu_svm *svm)
++{
++	if (!(svm->vcpu.arch.efer & EFER_SVME) ||
++	    !is_paging(&svm->vcpu)) {
++		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	if (svm->vmcb->save.cpl) {
++		kvm_inject_gp(&svm->vcpu, 0);
++		return 1;
++	}
++
++	return 0;
++}
++
++static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
++				      bool has_error_code, u32 error_code)
++{
++	int vmexit;
++
++	if (!is_guest_mode(&svm->vcpu))
++		return 0;
++
++	vmexit = nested_svm_intercept(svm);
++	if (vmexit != NESTED_EXIT_DONE)
++		return 0;
++
++	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
++	svm->vmcb->control.exit_code_hi = 0;
++	svm->vmcb->control.exit_info_1 = error_code;
++
++	/*
++	 * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
++	 * The fix is to add the ancillary datum (CR2 or DR6) to structs
++	 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
++	 * written only when inject_pending_event runs (DR6 would written here
++	 * too).  This should be conditional on a new capability---if the
++	 * capability is disabled, kvm_multiple_exception would write the
++	 * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
++	 */
++	if (svm->vcpu.arch.exception.nested_apf)
++		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
++	else
++		svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
++
++	svm->nested.exit_required = true;
++	return vmexit;
++}
++
++/* This function returns true if it is save to enable the irq window */
++static inline bool nested_svm_intr(struct vcpu_svm *svm)
++{
++	if (!is_guest_mode(&svm->vcpu))
++		return true;
++
++	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
++		return true;
++
++	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
++		return false;
++
++	/*
++	 * if vmexit was already requested (by intercepted exception
++	 * for instance) do not overwrite it with "external interrupt"
++	 * vmexit.
++	 */
++	if (svm->nested.exit_required)
++		return false;
++
++	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
++	svm->vmcb->control.exit_info_1 = 0;
++	svm->vmcb->control.exit_info_2 = 0;
++
++	if (svm->nested.intercept & 1ULL) {
++		/*
++		 * The #vmexit can't be emulated here directly because this
++		 * code path runs with irqs and preemption disabled. A
++		 * #vmexit emulation might sleep. Only signal request for
++		 * the #vmexit here.
++		 */
++		svm->nested.exit_required = true;
++		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
++		return false;
++	}
++
++	return true;
++}
++
++/* This function returns true if it is save to enable the nmi window */
++static inline bool nested_svm_nmi(struct vcpu_svm *svm)
++{
++	if (!is_guest_mode(&svm->vcpu))
++		return true;
++
++	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
++		return true;
++
++	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
++	svm->nested.exit_required = true;
++
++	return false;
++}
++
++static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
++{
++	struct page *page;
++
++	might_sleep();
++
++	page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
++	if (is_error_page(page))
++		goto error;
++
++	*_page = page;
++
++	return kmap(page);
++
++error:
++	kvm_inject_gp(&svm->vcpu, 0);
++
++	return NULL;
++}
++
++static void nested_svm_unmap(struct page *page)
++{
++	kunmap(page);
++	kvm_release_page_dirty(page);
++}
++
++static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
++{
++	unsigned port, size, iopm_len;
++	u16 val, mask;
++	u8 start_bit;
++	u64 gpa;
++
++	if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
++		return NESTED_EXIT_HOST;
++
++	port = svm->vmcb->control.exit_info_1 >> 16;
++	size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
++		SVM_IOIO_SIZE_SHIFT;
++	gpa  = svm->nested.vmcb_iopm + (port / 8);
++	start_bit = port % 8;
++	iopm_len = (start_bit + size > 8) ? 2 : 1;
++	mask = (0xf >> (4 - size)) << start_bit;
++	val = 0;
++
++	if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
++		return NESTED_EXIT_DONE;
++
++	return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
++}
++
++static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
++{
++	u32 offset, msr, value;
++	int write, mask;
++
++	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
++		return NESTED_EXIT_HOST;
++
++	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
++	offset = svm_msrpm_offset(msr);
++	write  = svm->vmcb->control.exit_info_1 & 1;
++	mask   = 1 << ((2 * (msr & 0xf)) + write);
++
++	if (offset == MSR_INVALID)
++		return NESTED_EXIT_DONE;
++
++	/* Offset is in 32 bit units but need in 8 bit units */
++	offset *= 4;
++
++	if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
++		return NESTED_EXIT_DONE;
++
++	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
++}
++
++/* DB exceptions for our internal use must not cause vmexit */
++static int nested_svm_intercept_db(struct vcpu_svm *svm)
++{
++	unsigned long dr6;
++
++	/* if we're not singlestepping, it's not ours */
++	if (!svm->nmi_singlestep)
++		return NESTED_EXIT_DONE;
++
++	/* if it's not a singlestep exception, it's not ours */
++	if (kvm_get_dr(&svm->vcpu, 6, &dr6))
++		return NESTED_EXIT_DONE;
++	if (!(dr6 & DR6_BS))
++		return NESTED_EXIT_DONE;
++
++	/* if the guest is singlestepping, it should get the vmexit */
++	if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
++		disable_nmi_singlestep(svm);
++		return NESTED_EXIT_DONE;
++	}
++
++	/* it's ours, the nested hypervisor must not see this one */
++	return NESTED_EXIT_HOST;
++}
++
++static int nested_svm_exit_special(struct vcpu_svm *svm)
++{
++	u32 exit_code = svm->vmcb->control.exit_code;
++
++	switch (exit_code) {
++	case SVM_EXIT_INTR:
++	case SVM_EXIT_NMI:
++	case SVM_EXIT_EXCP_BASE + MC_VECTOR:
++		return NESTED_EXIT_HOST;
++	case SVM_EXIT_NPF:
++		/* For now we are always handling NPFs when using them */
++		if (npt_enabled)
++			return NESTED_EXIT_HOST;
++		break;
++	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
++		/* When we're shadowing, trap PFs, but not async PF */
++		if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
++			return NESTED_EXIT_HOST;
++		break;
++	default:
++		break;
++	}
++
++	return NESTED_EXIT_CONTINUE;
++}
++
++/*
++ * If this function returns true, this #vmexit was already handled
++ */
++static int nested_svm_intercept(struct vcpu_svm *svm)
++{
++	u32 exit_code = svm->vmcb->control.exit_code;
++	int vmexit = NESTED_EXIT_HOST;
++
++	switch (exit_code) {
++	case SVM_EXIT_MSR:
++		vmexit = nested_svm_exit_handled_msr(svm);
++		break;
++	case SVM_EXIT_IOIO:
++		vmexit = nested_svm_intercept_ioio(svm);
++		break;
++	case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
++		u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
++		if (svm->nested.intercept_cr & bit)
++			vmexit = NESTED_EXIT_DONE;
++		break;
++	}
++	case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
++		u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
++		if (svm->nested.intercept_dr & bit)
++			vmexit = NESTED_EXIT_DONE;
++		break;
++	}
++	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
++		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
++		if (svm->nested.intercept_exceptions & excp_bits) {
++			if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
++				vmexit = nested_svm_intercept_db(svm);
++			else
++				vmexit = NESTED_EXIT_DONE;
++		}
++		/* async page fault always cause vmexit */
++		else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
++			 svm->vcpu.arch.exception.nested_apf != 0)
++			vmexit = NESTED_EXIT_DONE;
++		break;
++	}
++	case SVM_EXIT_ERR: {
++		vmexit = NESTED_EXIT_DONE;
++		break;
++	}
++	default: {
++		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
++		if (svm->nested.intercept & exit_bits)
++			vmexit = NESTED_EXIT_DONE;
++	}
++	}
++
++	return vmexit;
++}
++
++static int nested_svm_exit_handled(struct vcpu_svm *svm)
++{
++	int vmexit;
++
++	vmexit = nested_svm_intercept(svm);
++
++	if (vmexit == NESTED_EXIT_DONE)
++		nested_svm_vmexit(svm);
++
++	return vmexit;
++}
++
++static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
++{
++	struct vmcb_control_area *dst  = &dst_vmcb->control;
++	struct vmcb_control_area *from = &from_vmcb->control;
++
++	dst->intercept_cr         = from->intercept_cr;
++	dst->intercept_dr         = from->intercept_dr;
++	dst->intercept_exceptions = from->intercept_exceptions;
++	dst->intercept            = from->intercept;
++	dst->iopm_base_pa         = from->iopm_base_pa;
++	dst->msrpm_base_pa        = from->msrpm_base_pa;
++	dst->tsc_offset           = from->tsc_offset;
++	dst->asid                 = from->asid;
++	dst->tlb_ctl              = from->tlb_ctl;
++	dst->int_ctl              = from->int_ctl;
++	dst->int_vector           = from->int_vector;
++	dst->int_state            = from->int_state;
++	dst->exit_code            = from->exit_code;
++	dst->exit_code_hi         = from->exit_code_hi;
++	dst->exit_info_1          = from->exit_info_1;
++	dst->exit_info_2          = from->exit_info_2;
++	dst->exit_int_info        = from->exit_int_info;
++	dst->exit_int_info_err    = from->exit_int_info_err;
++	dst->nested_ctl           = from->nested_ctl;
++	dst->event_inj            = from->event_inj;
++	dst->event_inj_err        = from->event_inj_err;
++	dst->nested_cr3           = from->nested_cr3;
++	dst->virt_ext              = from->virt_ext;
++}
++
++static int nested_svm_vmexit(struct vcpu_svm *svm)
++{
++	struct vmcb *nested_vmcb;
++	struct vmcb *hsave = svm->nested.hsave;
++	struct vmcb *vmcb = svm->vmcb;
++	struct page *page;
++
++	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
++				       vmcb->control.exit_info_1,
++				       vmcb->control.exit_info_2,
++				       vmcb->control.exit_int_info,
++				       vmcb->control.exit_int_info_err,
++				       KVM_ISA_SVM);
++
++	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
++	if (!nested_vmcb)
++		return 1;
++
++	/* Exit Guest-Mode */
++	leave_guest_mode(&svm->vcpu);
++	svm->nested.vmcb = 0;
++
++	/* Give the current vmcb to the guest */
++	disable_gif(svm);
++
++	nested_vmcb->save.es     = vmcb->save.es;
++	nested_vmcb->save.cs     = vmcb->save.cs;
++	nested_vmcb->save.ss     = vmcb->save.ss;
++	nested_vmcb->save.ds     = vmcb->save.ds;
++	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
++	nested_vmcb->save.idtr   = vmcb->save.idtr;
++	nested_vmcb->save.efer   = svm->vcpu.arch.efer;
++	nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
++	nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
++	nested_vmcb->save.cr2    = vmcb->save.cr2;
++	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
++	nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
++	nested_vmcb->save.rip    = vmcb->save.rip;
++	nested_vmcb->save.rsp    = vmcb->save.rsp;
++	nested_vmcb->save.rax    = vmcb->save.rax;
++	nested_vmcb->save.dr7    = vmcb->save.dr7;
++	nested_vmcb->save.dr6    = vmcb->save.dr6;
++	nested_vmcb->save.cpl    = vmcb->save.cpl;
++
++	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
++	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
++	nested_vmcb->control.int_state         = vmcb->control.int_state;
++	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
++	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
++	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
++	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
++	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
++	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
++
++	if (svm->nrips_enabled)
++		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
++
++	/*
++	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
++	 * to make sure that we do not lose injected events. So check event_inj
++	 * here and copy it to exit_int_info if it is valid.
++	 * Exit_int_info and event_inj can't be both valid because the case
++	 * below only happens on a VMRUN instruction intercept which has
++	 * no valid exit_int_info set.
++	 */
++	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
++		struct vmcb_control_area *nc = &nested_vmcb->control;
++
++		nc->exit_int_info     = vmcb->control.event_inj;
++		nc->exit_int_info_err = vmcb->control.event_inj_err;
++	}
++
++	nested_vmcb->control.tlb_ctl           = 0;
++	nested_vmcb->control.event_inj         = 0;
++	nested_vmcb->control.event_inj_err     = 0;
++
++	/* We always set V_INTR_MASKING and remember the old value in hflags */
++	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
++		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
++
++	/* Restore the original control entries */
++	copy_vmcb_control_area(vmcb, hsave);
++
++	svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
++	kvm_clear_exception_queue(&svm->vcpu);
++	kvm_clear_interrupt_queue(&svm->vcpu);
++
++	svm->nested.nested_cr3 = 0;
++
++	/* Restore selected save entries */
++	svm->vmcb->save.es = hsave->save.es;
++	svm->vmcb->save.cs = hsave->save.cs;
++	svm->vmcb->save.ss = hsave->save.ss;
++	svm->vmcb->save.ds = hsave->save.ds;
++	svm->vmcb->save.gdtr = hsave->save.gdtr;
++	svm->vmcb->save.idtr = hsave->save.idtr;
++	kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
++	svm_set_efer(&svm->vcpu, hsave->save.efer);
++	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
++	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
++	if (npt_enabled) {
++		svm->vmcb->save.cr3 = hsave->save.cr3;
++		svm->vcpu.arch.cr3 = hsave->save.cr3;
++	} else {
++		(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
++	}
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
++	svm->vmcb->save.dr7 = 0;
++	svm->vmcb->save.cpl = 0;
++	svm->vmcb->control.exit_int_info = 0;
++
++	mark_all_dirty(svm->vmcb);
++
++	nested_svm_unmap(page);
++
++	nested_svm_uninit_mmu_context(&svm->vcpu);
++	kvm_mmu_reset_context(&svm->vcpu);
++	kvm_mmu_load(&svm->vcpu);
++
++	/*
++	 * Drop what we picked up for L2 via svm_complete_interrupts() so it
++	 * doesn't end up in L1.
++	 */
++	svm->vcpu.arch.nmi_injected = false;
++	kvm_clear_exception_queue(&svm->vcpu);
++	kvm_clear_interrupt_queue(&svm->vcpu);
++
++	return 0;
++}
++
++static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
++{
++	/*
++	 * This function merges the msr permission bitmaps of kvm and the
++	 * nested vmcb. It is optimized in that it only merges the parts where
++	 * the kvm msr permission bitmap may contain zero bits
++	 */
++	int i;
++
++	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
++		return true;
++
++	for (i = 0; i < MSRPM_OFFSETS; i++) {
++		u32 value, p;
++		u64 offset;
++
++		if (msrpm_offsets[i] == 0xffffffff)
++			break;
++
++		p      = msrpm_offsets[i];
++		offset = svm->nested.vmcb_msrpm + (p * 4);
++
++		if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
++			return false;
++
++		svm->nested.msrpm[p] = svm->msrpm[p] | value;
++	}
++
++	svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
++
++	return true;
++}
++
++static bool nested_vmcb_checks(struct vmcb *vmcb)
++{
++	if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
++		return false;
++
++	if (vmcb->control.asid == 0)
++		return false;
++
++	if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
++	    !npt_enabled)
++		return false;
++
++	return true;
++}
++
++static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
++				 struct vmcb *nested_vmcb, struct page *page)
++{
++	if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
++		svm->vcpu.arch.hflags |= HF_HIF_MASK;
++	else
++		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
++
++	if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
++		kvm_mmu_unload(&svm->vcpu);
++		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
++		nested_svm_init_mmu_context(&svm->vcpu);
++	}
++
++	/* Load the nested guest state */
++	svm->vmcb->save.es = nested_vmcb->save.es;
++	svm->vmcb->save.cs = nested_vmcb->save.cs;
++	svm->vmcb->save.ss = nested_vmcb->save.ss;
++	svm->vmcb->save.ds = nested_vmcb->save.ds;
++	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
++	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
++	kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
++	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
++	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
++	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
++	if (npt_enabled) {
++		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
++		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
++	} else
++		(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
++
++	/* Guest paging mode is active - reset mmu */
++	kvm_mmu_reset_context(&svm->vcpu);
++
++	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
++	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
++
++	/* In case we don't even reach vcpu_run, the fields are not updated */
++	svm->vmcb->save.rax = nested_vmcb->save.rax;
++	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
++	svm->vmcb->save.rip = nested_vmcb->save.rip;
++	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
++	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
++	svm->vmcb->save.cpl = nested_vmcb->save.cpl;
++
++	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
++	svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
++
++	/* cache intercepts */
++	svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
++	svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
++	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
++	svm->nested.intercept            = nested_vmcb->control.intercept;
++
++	svm_flush_tlb(&svm->vcpu, true);
++	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
++	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
++		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
++	else
++		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
++
++	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
++		/* We only want the cr8 intercept bits of the guest */
++		clr_cr_intercept(svm, INTERCEPT_CR8_READ);
++		clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
++	}
++
++	/* We don't want to see VMMCALLs from a nested guest */
++	clr_intercept(svm, INTERCEPT_VMMCALL);
++
++	svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
++	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
++
++	svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
++	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
++	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
++	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
++	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
++
++	nested_svm_unmap(page);
++
++	/* Enter Guest-Mode */
++	enter_guest_mode(&svm->vcpu);
++
++	/*
++	 * Merge guest and host intercepts - must be called  with vcpu in
++	 * guest-mode to take affect here
++	 */
++	recalc_intercepts(svm);
++
++	svm->nested.vmcb = vmcb_gpa;
++
++	enable_gif(svm);
++
++	mark_all_dirty(svm->vmcb);
++}
++
++static bool nested_svm_vmrun(struct vcpu_svm *svm)
++{
++	struct vmcb *nested_vmcb;
++	struct vmcb *hsave = svm->nested.hsave;
++	struct vmcb *vmcb = svm->vmcb;
++	struct page *page;
++	u64 vmcb_gpa;
++
++	vmcb_gpa = svm->vmcb->save.rax;
++
++	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
++	if (!nested_vmcb)
++		return false;
++
++	if (!nested_vmcb_checks(nested_vmcb)) {
++		nested_vmcb->control.exit_code    = SVM_EXIT_ERR;
++		nested_vmcb->control.exit_code_hi = 0;
++		nested_vmcb->control.exit_info_1  = 0;
++		nested_vmcb->control.exit_info_2  = 0;
++
++		nested_svm_unmap(page);
++
++		return false;
++	}
++
++	trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
++			       nested_vmcb->save.rip,
++			       nested_vmcb->control.int_ctl,
++			       nested_vmcb->control.event_inj,
++			       nested_vmcb->control.nested_ctl);
++
++	trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
++				    nested_vmcb->control.intercept_cr >> 16,
++				    nested_vmcb->control.intercept_exceptions,
++				    nested_vmcb->control.intercept);
++
++	/* Clear internal status */
++	kvm_clear_exception_queue(&svm->vcpu);
++	kvm_clear_interrupt_queue(&svm->vcpu);
++
++	/*
++	 * Save the old vmcb, so we don't need to pick what we save, but can
++	 * restore everything when a VMEXIT occurs
++	 */
++	hsave->save.es     = vmcb->save.es;
++	hsave->save.cs     = vmcb->save.cs;
++	hsave->save.ss     = vmcb->save.ss;
++	hsave->save.ds     = vmcb->save.ds;
++	hsave->save.gdtr   = vmcb->save.gdtr;
++	hsave->save.idtr   = vmcb->save.idtr;
++	hsave->save.efer   = svm->vcpu.arch.efer;
++	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
++	hsave->save.cr4    = svm->vcpu.arch.cr4;
++	hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
++	hsave->save.rip    = kvm_rip_read(&svm->vcpu);
++	hsave->save.rsp    = vmcb->save.rsp;
++	hsave->save.rax    = vmcb->save.rax;
++	if (npt_enabled)
++		hsave->save.cr3    = vmcb->save.cr3;
++	else
++		hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
++
++	copy_vmcb_control_area(hsave, vmcb);
++
++	enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
++
++	return true;
++}
++
++static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
++{
++	to_vmcb->save.fs = from_vmcb->save.fs;
++	to_vmcb->save.gs = from_vmcb->save.gs;
++	to_vmcb->save.tr = from_vmcb->save.tr;
++	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
++	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
++	to_vmcb->save.star = from_vmcb->save.star;
++	to_vmcb->save.lstar = from_vmcb->save.lstar;
++	to_vmcb->save.cstar = from_vmcb->save.cstar;
++	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
++	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
++	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
++	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
++}
++
++static int vmload_interception(struct vcpu_svm *svm)
++{
++	struct vmcb *nested_vmcb;
++	struct page *page;
++	int ret;
++
++	if (nested_svm_check_permissions(svm))
++		return 1;
++
++	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
++	if (!nested_vmcb)
++		return 1;
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	ret = kvm_skip_emulated_instruction(&svm->vcpu);
++
++	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
++	nested_svm_unmap(page);
++
++	return ret;
++}
++
++static int vmsave_interception(struct vcpu_svm *svm)
++{
++	struct vmcb *nested_vmcb;
++	struct page *page;
++	int ret;
++
++	if (nested_svm_check_permissions(svm))
++		return 1;
++
++	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
++	if (!nested_vmcb)
++		return 1;
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	ret = kvm_skip_emulated_instruction(&svm->vcpu);
++
++	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
++	nested_svm_unmap(page);
++
++	return ret;
++}
++
++static int vmrun_interception(struct vcpu_svm *svm)
++{
++	if (nested_svm_check_permissions(svm))
++		return 1;
++
++	/* Save rip after vmrun instruction */
++	kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
++
++	if (!nested_svm_vmrun(svm))
++		return 1;
++
++	if (!nested_svm_vmrun_msrpm(svm))
++		goto failed;
++
++	return 1;
++
++failed:
++
++	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
++	svm->vmcb->control.exit_code_hi = 0;
++	svm->vmcb->control.exit_info_1  = 0;
++	svm->vmcb->control.exit_info_2  = 0;
++
++	nested_svm_vmexit(svm);
++
++	return 1;
++}
++
++static int stgi_interception(struct vcpu_svm *svm)
++{
++	int ret;
++
++	if (nested_svm_check_permissions(svm))
++		return 1;
++
++	/*
++	 * If VGIF is enabled, the STGI intercept is only added to
++	 * detect the opening of the SMI/NMI window; remove it now.
++	 */
++	if (vgif_enabled(svm))
++		clr_intercept(svm, INTERCEPT_STGI);
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	ret = kvm_skip_emulated_instruction(&svm->vcpu);
++	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
++
++	enable_gif(svm);
++
++	return ret;
++}
++
++static int clgi_interception(struct vcpu_svm *svm)
++{
++	int ret;
++
++	if (nested_svm_check_permissions(svm))
++		return 1;
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	ret = kvm_skip_emulated_instruction(&svm->vcpu);
++
++	disable_gif(svm);
++
++	/* After a CLGI no interrupts should come */
++	if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
++		svm_clear_vintr(svm);
++		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
++		mark_dirty(svm->vmcb, VMCB_INTR);
++	}
++
++	return ret;
++}
++
++static int invlpga_interception(struct vcpu_svm *svm)
++{
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++
++	trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
++			  kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
++
++	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
++	kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++	return kvm_skip_emulated_instruction(&svm->vcpu);
++}
++
++static int skinit_interception(struct vcpu_svm *svm)
++{
++	trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
++
++	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
++	return 1;
++}
++
++static int wbinvd_interception(struct vcpu_svm *svm)
++{
++	return kvm_emulate_wbinvd(&svm->vcpu);
++}
++
++static int xsetbv_interception(struct vcpu_svm *svm)
++{
++	u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
++	u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
++
++	if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
++		svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
++		return kvm_skip_emulated_instruction(&svm->vcpu);
++	}
++
++	return 1;
++}
++
++static int task_switch_interception(struct vcpu_svm *svm)
++{
++	u16 tss_selector;
++	int reason;
++	int int_type = svm->vmcb->control.exit_int_info &
++		SVM_EXITINTINFO_TYPE_MASK;
++	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
++	uint32_t type =
++		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
++	uint32_t idt_v =
++		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
++	bool has_error_code = false;
++	u32 error_code = 0;
++
++	tss_selector = (u16)svm->vmcb->control.exit_info_1;
++
++	if (svm->vmcb->control.exit_info_2 &
++	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
++		reason = TASK_SWITCH_IRET;
++	else if (svm->vmcb->control.exit_info_2 &
++		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
++		reason = TASK_SWITCH_JMP;
++	else if (idt_v)
++		reason = TASK_SWITCH_GATE;
++	else
++		reason = TASK_SWITCH_CALL;
++
++	if (reason == TASK_SWITCH_GATE) {
++		switch (type) {
++		case SVM_EXITINTINFO_TYPE_NMI:
++			svm->vcpu.arch.nmi_injected = false;
++			break;
++		case SVM_EXITINTINFO_TYPE_EXEPT:
++			if (svm->vmcb->control.exit_info_2 &
++			    (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
++				has_error_code = true;
++				error_code =
++					(u32)svm->vmcb->control.exit_info_2;
++			}
++			kvm_clear_exception_queue(&svm->vcpu);
++			break;
++		case SVM_EXITINTINFO_TYPE_INTR:
++			kvm_clear_interrupt_queue(&svm->vcpu);
++			break;
++		default:
++			break;
++		}
++	}
++
++	if (reason != TASK_SWITCH_GATE ||
++	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
++	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
++	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
++		skip_emulated_instruction(&svm->vcpu);
++
++	if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
++		int_vec = -1;
++
++	if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
++				has_error_code, error_code) == EMULATE_FAIL) {
++		svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++		svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
++		svm->vcpu.run->internal.ndata = 0;
++		return 0;
++	}
++	return 1;
++}
++
++static int cpuid_interception(struct vcpu_svm *svm)
++{
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
++	return kvm_emulate_cpuid(&svm->vcpu);
++}
++
++static int iret_interception(struct vcpu_svm *svm)
++{
++	++svm->vcpu.stat.nmi_window_exits;
++	clr_intercept(svm, INTERCEPT_IRET);
++	svm->vcpu.arch.hflags |= HF_IRET_MASK;
++	svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
++	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
++	return 1;
++}
++
++static int invlpg_interception(struct vcpu_svm *svm)
++{
++	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
++		return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
++
++	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
++	return kvm_skip_emulated_instruction(&svm->vcpu);
++}
++
++static int emulate_on_interception(struct vcpu_svm *svm)
++{
++	return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
++}
++
++static int rsm_interception(struct vcpu_svm *svm)
++{
++	return kvm_emulate_instruction_from_buffer(&svm->vcpu,
++					rsm_ins_bytes, 2) == EMULATE_DONE;
++}
++
++static int rdpmc_interception(struct vcpu_svm *svm)
++{
++	int err;
++
++	if (!static_cpu_has(X86_FEATURE_NRIPS))
++		return emulate_on_interception(svm);
++
++	err = kvm_rdpmc(&svm->vcpu);
++	return kvm_complete_insn_gp(&svm->vcpu, err);
++}
++
++static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
++					    unsigned long val)
++{
++	unsigned long cr0 = svm->vcpu.arch.cr0;
++	bool ret = false;
++	u64 intercept;
++
++	intercept = svm->nested.intercept;
++
++	if (!is_guest_mode(&svm->vcpu) ||
++	    (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
++		return false;
++
++	cr0 &= ~SVM_CR0_SELECTIVE_MASK;
++	val &= ~SVM_CR0_SELECTIVE_MASK;
++
++	if (cr0 ^ val) {
++		svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
++		ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
++	}
++
++	return ret;
++}
++
++#define CR_VALID (1ULL << 63)
++
++static int cr_interception(struct vcpu_svm *svm)
++{
++	int reg, cr;
++	unsigned long val;
++	int err;
++
++	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
++		return emulate_on_interception(svm);
++
++	if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
++		return emulate_on_interception(svm);
++
++	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
++	if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
++		cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
++	else
++		cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
++
++	err = 0;
++	if (cr >= 16) { /* mov to cr */
++		cr -= 16;
++		val = kvm_register_read(&svm->vcpu, reg);
++		switch (cr) {
++		case 0:
++			if (!check_selective_cr0_intercepted(svm, val))
++				err = kvm_set_cr0(&svm->vcpu, val);
++			else
++				return 1;
++
++			break;
++		case 3:
++			err = kvm_set_cr3(&svm->vcpu, val);
++			break;
++		case 4:
++			err = kvm_set_cr4(&svm->vcpu, val);
++			break;
++		case 8:
++			err = kvm_set_cr8(&svm->vcpu, val);
++			break;
++		default:
++			WARN(1, "unhandled write to CR%d", cr);
++			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
++			return 1;
++		}
++	} else { /* mov from cr */
++		switch (cr) {
++		case 0:
++			val = kvm_read_cr0(&svm->vcpu);
++			break;
++		case 2:
++			val = svm->vcpu.arch.cr2;
++			break;
++		case 3:
++			val = kvm_read_cr3(&svm->vcpu);
++			break;
++		case 4:
++			val = kvm_read_cr4(&svm->vcpu);
++			break;
++		case 8:
++			val = kvm_get_cr8(&svm->vcpu);
++			break;
++		default:
++			WARN(1, "unhandled read from CR%d", cr);
++			kvm_queue_exception(&svm->vcpu, UD_VECTOR);
++			return 1;
++		}
++		kvm_register_write(&svm->vcpu, reg, val);
++	}
++	return kvm_complete_insn_gp(&svm->vcpu, err);
++}
++
++static int dr_interception(struct vcpu_svm *svm)
++{
++	int reg, dr;
++	unsigned long val;
++
++	if (svm->vcpu.guest_debug == 0) {
++		/*
++		 * No more DR vmexits; force a reload of the debug registers
++		 * and reenter on this instruction.  The next vmexit will
++		 * retrieve the full state of the debug registers.
++		 */
++		clr_dr_intercepts(svm);
++		svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
++		return 1;
++	}
++
++	if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
++		return emulate_on_interception(svm);
++
++	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
++	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
++
++	if (dr >= 16) { /* mov to DRn */
++		if (!kvm_require_dr(&svm->vcpu, dr - 16))
++			return 1;
++		val = kvm_register_read(&svm->vcpu, reg);
++		kvm_set_dr(&svm->vcpu, dr - 16, val);
++	} else {
++		if (!kvm_require_dr(&svm->vcpu, dr))
++			return 1;
++		kvm_get_dr(&svm->vcpu, dr, &val);
++		kvm_register_write(&svm->vcpu, reg, val);
++	}
++
++	return kvm_skip_emulated_instruction(&svm->vcpu);
++}
++
++static int cr8_write_interception(struct vcpu_svm *svm)
++{
++	struct kvm_run *kvm_run = svm->vcpu.run;
++	int r;
++
++	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
++	/* instruction emulation calls kvm_set_cr8() */
++	r = cr_interception(svm);
++	if (lapic_in_kernel(&svm->vcpu))
++		return r;
++	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
++		return r;
++	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
++	return 0;
++}
++
++static int svm_get_msr_feature(struct kvm_msr_entry *msr)
++{
++	msr->data = 0;
++
++	switch (msr->index) {
++	case MSR_F10H_DECFG:
++		if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
++			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
++		break;
++	default:
++		return 1;
++	}
++
++	return 0;
++}
++
++static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	switch (msr_info->index) {
++	case MSR_STAR:
++		msr_info->data = svm->vmcb->save.star;
++		break;
++#ifdef CONFIG_X86_64
++	case MSR_LSTAR:
++		msr_info->data = svm->vmcb->save.lstar;
++		break;
++	case MSR_CSTAR:
++		msr_info->data = svm->vmcb->save.cstar;
++		break;
++	case MSR_KERNEL_GS_BASE:
++		msr_info->data = svm->vmcb->save.kernel_gs_base;
++		break;
++	case MSR_SYSCALL_MASK:
++		msr_info->data = svm->vmcb->save.sfmask;
++		break;
++#endif
++	case MSR_IA32_SYSENTER_CS:
++		msr_info->data = svm->vmcb->save.sysenter_cs;
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++		msr_info->data = svm->sysenter_eip;
++		break;
++	case MSR_IA32_SYSENTER_ESP:
++		msr_info->data = svm->sysenter_esp;
++		break;
++	case MSR_TSC_AUX:
++		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
++			return 1;
++		msr_info->data = svm->tsc_aux;
++		break;
++	/*
++	 * Nobody will change the following 5 values in the VMCB so we can
++	 * safely return them on rdmsr. They will always be 0 until LBRV is
++	 * implemented.
++	 */
++	case MSR_IA32_DEBUGCTLMSR:
++		msr_info->data = svm->vmcb->save.dbgctl;
++		break;
++	case MSR_IA32_LASTBRANCHFROMIP:
++		msr_info->data = svm->vmcb->save.br_from;
++		break;
++	case MSR_IA32_LASTBRANCHTOIP:
++		msr_info->data = svm->vmcb->save.br_to;
++		break;
++	case MSR_IA32_LASTINTFROMIP:
++		msr_info->data = svm->vmcb->save.last_excp_from;
++		break;
++	case MSR_IA32_LASTINTTOIP:
++		msr_info->data = svm->vmcb->save.last_excp_to;
++		break;
++	case MSR_VM_HSAVE_PA:
++		msr_info->data = svm->nested.hsave_msr;
++		break;
++	case MSR_VM_CR:
++		msr_info->data = svm->nested.vm_cr_msr;
++		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
++			return 1;
++
++		msr_info->data = svm->spec_ctrl;
++		break;
++	case MSR_AMD64_VIRT_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
++			return 1;
++
++		msr_info->data = svm->virt_spec_ctrl;
++		break;
++	case MSR_F15H_IC_CFG: {
++
++		int family, model;
++
++		family = guest_cpuid_family(vcpu);
++		model  = guest_cpuid_model(vcpu);
++
++		if (family < 0 || model < 0)
++			return kvm_get_msr_common(vcpu, msr_info);
++
++		msr_info->data = 0;
++
++		if (family == 0x15 &&
++		    (model >= 0x2 && model < 0x20))
++			msr_info->data = 0x1E;
++		}
++		break;
++	case MSR_F10H_DECFG:
++		msr_info->data = svm->msr_decfg;
++		break;
++	default:
++		return kvm_get_msr_common(vcpu, msr_info);
++	}
++	return 0;
++}
++
++static int rdmsr_interception(struct vcpu_svm *svm)
++{
++	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
++	struct msr_data msr_info;
++
++	msr_info.index = ecx;
++	msr_info.host_initiated = false;
++	if (svm_get_msr(&svm->vcpu, &msr_info)) {
++		trace_kvm_msr_read_ex(ecx);
++		kvm_inject_gp(&svm->vcpu, 0);
++		return 1;
++	} else {
++		trace_kvm_msr_read(ecx, msr_info.data);
++
++		kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
++				   msr_info.data & 0xffffffff);
++		kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
++				   msr_info.data >> 32);
++		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
++		return kvm_skip_emulated_instruction(&svm->vcpu);
++	}
++}
++
++static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	int svm_dis, chg_mask;
++
++	if (data & ~SVM_VM_CR_VALID_MASK)
++		return 1;
++
++	chg_mask = SVM_VM_CR_VALID_MASK;
++
++	if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
++		chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
++
++	svm->nested.vm_cr_msr &= ~chg_mask;
++	svm->nested.vm_cr_msr |= (data & chg_mask);
++
++	svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
++
++	/* check for svm_disable while efer.svme is set */
++	if (svm_dis && (vcpu->arch.efer & EFER_SVME))
++		return 1;
++
++	return 0;
++}
++
++static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	u32 ecx = msr->index;
++	u64 data = msr->data;
++	switch (ecx) {
++	case MSR_IA32_CR_PAT:
++		if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
++			return 1;
++		vcpu->arch.pat = data;
++		svm->vmcb->save.g_pat = data;
++		mark_dirty(svm->vmcb, VMCB_NPT);
++		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
++			return 1;
++
++		svm->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_svm_vmrun_msrpm.
++		 * We update the L1 MSR bit as well since it will end up
++		 * touching the MSR anyway now.
++		 */
++		set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
++		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++		if (is_guest_mode(vcpu))
++			break;
++		set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
++		break;
++	case MSR_AMD64_VIRT_SPEC_CTRL:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
++			return 1;
++
++		if (data & ~SPEC_CTRL_SSBD)
++			return 1;
++
++		svm->virt_spec_ctrl = data;
++		break;
++	case MSR_STAR:
++		svm->vmcb->save.star = data;
++		break;
++#ifdef CONFIG_X86_64
++	case MSR_LSTAR:
++		svm->vmcb->save.lstar = data;
++		break;
++	case MSR_CSTAR:
++		svm->vmcb->save.cstar = data;
++		break;
++	case MSR_KERNEL_GS_BASE:
++		svm->vmcb->save.kernel_gs_base = data;
++		break;
++	case MSR_SYSCALL_MASK:
++		svm->vmcb->save.sfmask = data;
++		break;
++#endif
++	case MSR_IA32_SYSENTER_CS:
++		svm->vmcb->save.sysenter_cs = data;
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++		svm->sysenter_eip = data;
++		svm->vmcb->save.sysenter_eip = data;
++		break;
++	case MSR_IA32_SYSENTER_ESP:
++		svm->sysenter_esp = data;
++		svm->vmcb->save.sysenter_esp = data;
++		break;
++	case MSR_TSC_AUX:
++		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
++			return 1;
++
++		/*
++		 * This is rare, so we update the MSR here instead of using
++		 * direct_access_msrs.  Doing that would require a rdmsr in
++		 * svm_vcpu_put.
++		 */
++		svm->tsc_aux = data;
++		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
++		break;
++	case MSR_IA32_DEBUGCTLMSR:
++		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
++			vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
++				    __func__, data);
++			break;
++		}
++		if (data & DEBUGCTL_RESERVED_BITS)
++			return 1;
++
++		svm->vmcb->save.dbgctl = data;
++		mark_dirty(svm->vmcb, VMCB_LBR);
++		if (data & (1ULL<<0))
++			svm_enable_lbrv(svm);
++		else
++			svm_disable_lbrv(svm);
++		break;
++	case MSR_VM_HSAVE_PA:
++		svm->nested.hsave_msr = data;
++		break;
++	case MSR_VM_CR:
++		return svm_set_vm_cr(vcpu, data);
++	case MSR_VM_IGNNE:
++		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
++		break;
++	case MSR_F10H_DECFG: {
++		struct kvm_msr_entry msr_entry;
++
++		msr_entry.index = msr->index;
++		if (svm_get_msr_feature(&msr_entry))
++			return 1;
++
++		/* Check the supported bits */
++		if (data & ~msr_entry.data)
++			return 1;
++
++		/* Don't allow the guest to change a bit, #GP */
++		if (!msr->host_initiated && (data ^ msr_entry.data))
++			return 1;
++
++		svm->msr_decfg = data;
++		break;
++	}
++	case MSR_IA32_APICBASE:
++		if (kvm_vcpu_apicv_active(vcpu))
++			avic_update_vapic_bar(to_svm(vcpu), data);
++		/* Follow through */
++	default:
++		return kvm_set_msr_common(vcpu, msr);
++	}
++	return 0;
++}
++
++static int wrmsr_interception(struct vcpu_svm *svm)
++{
++	struct msr_data msr;
++	u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
++	u64 data = kvm_read_edx_eax(&svm->vcpu);
++
++	msr.data = data;
++	msr.index = ecx;
++	msr.host_initiated = false;
++
++	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
++	if (kvm_set_msr(&svm->vcpu, &msr)) {
++		trace_kvm_msr_write_ex(ecx, data);
++		kvm_inject_gp(&svm->vcpu, 0);
++		return 1;
++	} else {
++		trace_kvm_msr_write(ecx, data);
++		return kvm_skip_emulated_instruction(&svm->vcpu);
++	}
++}
++
++static int msr_interception(struct vcpu_svm *svm)
++{
++	if (svm->vmcb->control.exit_info_1)
++		return wrmsr_interception(svm);
++	else
++		return rdmsr_interception(svm);
++}
++
++static int interrupt_window_interception(struct vcpu_svm *svm)
++{
++	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
++	svm_clear_vintr(svm);
++	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
++	mark_dirty(svm->vmcb, VMCB_INTR);
++	++svm->vcpu.stat.irq_window_exits;
++	return 1;
++}
++
++static int pause_interception(struct vcpu_svm *svm)
++{
++	struct kvm_vcpu *vcpu = &svm->vcpu;
++	bool in_kernel = (svm_get_cpl(vcpu) == 0);
++
++	if (pause_filter_thresh)
++		grow_ple_window(vcpu);
++
++	kvm_vcpu_on_spin(vcpu, in_kernel);
++	return 1;
++}
++
++static int nop_interception(struct vcpu_svm *svm)
++{
++	return kvm_skip_emulated_instruction(&(svm->vcpu));
++}
++
++static int monitor_interception(struct vcpu_svm *svm)
++{
++	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
++	return nop_interception(svm);
++}
++
++static int mwait_interception(struct vcpu_svm *svm)
++{
++	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
++	return nop_interception(svm);
++}
++
++enum avic_ipi_failure_cause {
++	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
++	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
++	AVIC_IPI_FAILURE_INVALID_TARGET,
++	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
++};
++
++static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
++{
++	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
++	u32 icrl = svm->vmcb->control.exit_info_1;
++	u32 id = svm->vmcb->control.exit_info_2 >> 32;
++	u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
++	struct kvm_lapic *apic = svm->vcpu.arch.apic;
++
++	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
++
++	switch (id) {
++	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
++		/*
++		 * AVIC hardware handles the generation of
++		 * IPIs when the specified Message Type is Fixed
++		 * (also known as fixed delivery mode) and
++		 * the Trigger Mode is edge-triggered. The hardware
++		 * also supports self and broadcast delivery modes
++		 * specified via the Destination Shorthand(DSH)
++		 * field of the ICRL. Logical and physical APIC ID
++		 * formats are supported. All other IPI types cause
++		 * a #VMEXIT, which needs to emulated.
++		 */
++		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
++		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
++		break;
++	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
++		int i;
++		struct kvm_vcpu *vcpu;
++		struct kvm *kvm = svm->vcpu.kvm;
++		struct kvm_lapic *apic = svm->vcpu.arch.apic;
++
++		/*
++		 * At this point, we expect that the AVIC HW has already
++		 * set the appropriate IRR bits on the valid target
++		 * vcpus. So, we just need to kick the appropriate vcpu.
++		 */
++		kvm_for_each_vcpu(i, vcpu, kvm) {
++			bool m = kvm_apic_match_dest(vcpu, apic,
++						     icrl & KVM_APIC_SHORT_MASK,
++						     GET_APIC_DEST_FIELD(icrh),
++						     icrl & KVM_APIC_DEST_MASK);
++
++			if (m && !avic_vcpu_is_running(vcpu))
++				kvm_vcpu_wake_up(vcpu);
++		}
++		break;
++	}
++	case AVIC_IPI_FAILURE_INVALID_TARGET:
++		break;
++	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
++		WARN_ONCE(1, "Invalid backing page\n");
++		break;
++	default:
++		pr_err("Unknown IPI interception\n");
++	}
++
++	return 1;
++}
++
++static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
++{
++	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
++	int index;
++	u32 *logical_apic_id_table;
++	int dlid = GET_APIC_LOGICAL_ID(ldr);
++
++	if (!dlid)
++		return NULL;
++
++	if (flat) { /* flat */
++		index = ffs(dlid) - 1;
++		if (index > 7)
++			return NULL;
++	} else { /* cluster */
++		int cluster = (dlid & 0xf0) >> 4;
++		int apic = ffs(dlid & 0x0f) - 1;
++
++		if ((apic < 0) || (apic > 7) ||
++		    (cluster >= 0xf))
++			return NULL;
++		index = (cluster << 2) + apic;
++	}
++
++	logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
++
++	return &logical_apic_id_table[index];
++}
++
++static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
++			  bool valid)
++{
++	bool flat;
++	u32 *entry, new_entry;
++
++	flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
++	entry = avic_get_logical_id_entry(vcpu, ldr, flat);
++	if (!entry)
++		return -EINVAL;
++
++	new_entry = READ_ONCE(*entry);
++	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
++	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
++	if (valid)
++		new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
++	else
++		new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
++	WRITE_ONCE(*entry, new_entry);
++
++	return 0;
++}
++
++static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
++{
++	int ret;
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
++
++	if (!ldr)
++		return 1;
++
++	ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
++	if (ret && svm->ldr_reg) {
++		avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
++		svm->ldr_reg = 0;
++	} else {
++		svm->ldr_reg = ldr;
++	}
++	return ret;
++}
++
++static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
++{
++	u64 *old, *new;
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
++	u32 id = (apic_id_reg >> 24) & 0xff;
++
++	if (vcpu->vcpu_id == id)
++		return 0;
++
++	old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
++	new = avic_get_physical_id_entry(vcpu, id);
++	if (!new || !old)
++		return 1;
++
++	/* We need to move physical_id_entry to new offset */
++	*new = *old;
++	*old = 0ULL;
++	to_svm(vcpu)->avic_physical_id_cache = new;
++
++	/*
++	 * Also update the guest physical APIC ID in the logical
++	 * APIC ID table entry if already setup the LDR.
++	 */
++	if (svm->ldr_reg)
++		avic_handle_ldr_update(vcpu);
++
++	return 0;
++}
++
++static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
++	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
++	u32 mod = (dfr >> 28) & 0xf;
++
++	/*
++	 * We assume that all local APICs are using the same type.
++	 * If this changes, we need to flush the AVIC logical
++	 * APID id table.
++	 */
++	if (kvm_svm->ldr_mode == mod)
++		return 0;
++
++	clear_page(page_address(kvm_svm->avic_logical_id_table_page));
++	kvm_svm->ldr_mode = mod;
++
++	if (svm->ldr_reg)
++		avic_handle_ldr_update(vcpu);
++	return 0;
++}
++
++static int avic_unaccel_trap_write(struct vcpu_svm *svm)
++{
++	struct kvm_lapic *apic = svm->vcpu.arch.apic;
++	u32 offset = svm->vmcb->control.exit_info_1 &
++				AVIC_UNACCEL_ACCESS_OFFSET_MASK;
++
++	switch (offset) {
++	case APIC_ID:
++		if (avic_handle_apic_id_update(&svm->vcpu))
++			return 0;
++		break;
++	case APIC_LDR:
++		if (avic_handle_ldr_update(&svm->vcpu))
++			return 0;
++		break;
++	case APIC_DFR:
++		avic_handle_dfr_update(&svm->vcpu);
++		break;
++	default:
++		break;
++	}
++
++	kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
++
++	return 1;
++}
++
++static bool is_avic_unaccelerated_access_trap(u32 offset)
++{
++	bool ret = false;
++
++	switch (offset) {
++	case APIC_ID:
++	case APIC_EOI:
++	case APIC_RRR:
++	case APIC_LDR:
++	case APIC_DFR:
++	case APIC_SPIV:
++	case APIC_ESR:
++	case APIC_ICR:
++	case APIC_LVTT:
++	case APIC_LVTTHMR:
++	case APIC_LVTPC:
++	case APIC_LVT0:
++	case APIC_LVT1:
++	case APIC_LVTERR:
++	case APIC_TMICT:
++	case APIC_TDCR:
++		ret = true;
++		break;
++	default:
++		break;
++	}
++	return ret;
++}
++
++static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
++{
++	int ret = 0;
++	u32 offset = svm->vmcb->control.exit_info_1 &
++		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
++	u32 vector = svm->vmcb->control.exit_info_2 &
++		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
++	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
++		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
++	bool trap = is_avic_unaccelerated_access_trap(offset);
++
++	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
++					    trap, write, vector);
++	if (trap) {
++		/* Handling Trap */
++		WARN_ONCE(!write, "svm: Handling trap read.\n");
++		ret = avic_unaccel_trap_write(svm);
++	} else {
++		/* Handling Fault */
++		ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
++	}
++
++	return ret;
++}
++
++static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
++	[SVM_EXIT_READ_CR0]			= cr_interception,
++	[SVM_EXIT_READ_CR3]			= cr_interception,
++	[SVM_EXIT_READ_CR4]			= cr_interception,
++	[SVM_EXIT_READ_CR8]			= cr_interception,
++	[SVM_EXIT_CR0_SEL_WRITE]		= cr_interception,
++	[SVM_EXIT_WRITE_CR0]			= cr_interception,
++	[SVM_EXIT_WRITE_CR3]			= cr_interception,
++	[SVM_EXIT_WRITE_CR4]			= cr_interception,
++	[SVM_EXIT_WRITE_CR8]			= cr8_write_interception,
++	[SVM_EXIT_READ_DR0]			= dr_interception,
++	[SVM_EXIT_READ_DR1]			= dr_interception,
++	[SVM_EXIT_READ_DR2]			= dr_interception,
++	[SVM_EXIT_READ_DR3]			= dr_interception,
++	[SVM_EXIT_READ_DR4]			= dr_interception,
++	[SVM_EXIT_READ_DR5]			= dr_interception,
++	[SVM_EXIT_READ_DR6]			= dr_interception,
++	[SVM_EXIT_READ_DR7]			= dr_interception,
++	[SVM_EXIT_WRITE_DR0]			= dr_interception,
++	[SVM_EXIT_WRITE_DR1]			= dr_interception,
++	[SVM_EXIT_WRITE_DR2]			= dr_interception,
++	[SVM_EXIT_WRITE_DR3]			= dr_interception,
++	[SVM_EXIT_WRITE_DR4]			= dr_interception,
++	[SVM_EXIT_WRITE_DR5]			= dr_interception,
++	[SVM_EXIT_WRITE_DR6]			= dr_interception,
++	[SVM_EXIT_WRITE_DR7]			= dr_interception,
++	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
++	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
++	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
++	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
++	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
++	[SVM_EXIT_EXCP_BASE + AC_VECTOR]	= ac_interception,
++	[SVM_EXIT_EXCP_BASE + GP_VECTOR]	= gp_interception,
++	[SVM_EXIT_INTR]				= intr_interception,
++	[SVM_EXIT_NMI]				= nmi_interception,
++	[SVM_EXIT_SMI]				= nop_on_interception,
++	[SVM_EXIT_INIT]				= nop_on_interception,
++	[SVM_EXIT_VINTR]			= interrupt_window_interception,
++	[SVM_EXIT_RDPMC]			= rdpmc_interception,
++	[SVM_EXIT_CPUID]			= cpuid_interception,
++	[SVM_EXIT_IRET]                         = iret_interception,
++	[SVM_EXIT_INVD]                         = emulate_on_interception,
++	[SVM_EXIT_PAUSE]			= pause_interception,
++	[SVM_EXIT_HLT]				= halt_interception,
++	[SVM_EXIT_INVLPG]			= invlpg_interception,
++	[SVM_EXIT_INVLPGA]			= invlpga_interception,
++	[SVM_EXIT_IOIO]				= io_interception,
++	[SVM_EXIT_MSR]				= msr_interception,
++	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
++	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
++	[SVM_EXIT_VMRUN]			= vmrun_interception,
++	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
++	[SVM_EXIT_VMLOAD]			= vmload_interception,
++	[SVM_EXIT_VMSAVE]			= vmsave_interception,
++	[SVM_EXIT_STGI]				= stgi_interception,
++	[SVM_EXIT_CLGI]				= clgi_interception,
++	[SVM_EXIT_SKINIT]			= skinit_interception,
++	[SVM_EXIT_WBINVD]                       = wbinvd_interception,
++	[SVM_EXIT_MONITOR]			= monitor_interception,
++	[SVM_EXIT_MWAIT]			= mwait_interception,
++	[SVM_EXIT_XSETBV]			= xsetbv_interception,
++	[SVM_EXIT_NPF]				= npf_interception,
++	[SVM_EXIT_RSM]                          = rsm_interception,
++	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
++	[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]	= avic_unaccelerated_access_interception,
++};
++
++static void dump_vmcb(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb_control_area *control = &svm->vmcb->control;
++	struct vmcb_save_area *save = &svm->vmcb->save;
++
++	pr_err("VMCB Control Area:\n");
++	pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
++	pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
++	pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
++	pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
++	pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
++	pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
++	pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
++	pr_err("%-20s%d\n", "pause filter threshold:",
++	       control->pause_filter_thresh);
++	pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
++	pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
++	pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
++	pr_err("%-20s%d\n", "asid:", control->asid);
++	pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
++	pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
++	pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
++	pr_err("%-20s%08x\n", "int_state:", control->int_state);
++	pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
++	pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
++	pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
++	pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
++	pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
++	pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
++	pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
++	pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
++	pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
++	pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
++	pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
++	pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
++	pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
++	pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
++	pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
++	pr_err("VMCB State Save Area:\n");
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "es:",
++	       save->es.selector, save->es.attrib,
++	       save->es.limit, save->es.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "cs:",
++	       save->cs.selector, save->cs.attrib,
++	       save->cs.limit, save->cs.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "ss:",
++	       save->ss.selector, save->ss.attrib,
++	       save->ss.limit, save->ss.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "ds:",
++	       save->ds.selector, save->ds.attrib,
++	       save->ds.limit, save->ds.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "fs:",
++	       save->fs.selector, save->fs.attrib,
++	       save->fs.limit, save->fs.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "gs:",
++	       save->gs.selector, save->gs.attrib,
++	       save->gs.limit, save->gs.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "gdtr:",
++	       save->gdtr.selector, save->gdtr.attrib,
++	       save->gdtr.limit, save->gdtr.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "ldtr:",
++	       save->ldtr.selector, save->ldtr.attrib,
++	       save->ldtr.limit, save->ldtr.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "idtr:",
++	       save->idtr.selector, save->idtr.attrib,
++	       save->idtr.limit, save->idtr.base);
++	pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
++	       "tr:",
++	       save->tr.selector, save->tr.attrib,
++	       save->tr.limit, save->tr.base);
++	pr_err("cpl:            %d                efer:         %016llx\n",
++		save->cpl, save->efer);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "cr0:", save->cr0, "cr2:", save->cr2);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "cr3:", save->cr3, "cr4:", save->cr4);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "dr6:", save->dr6, "dr7:", save->dr7);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "rip:", save->rip, "rflags:", save->rflags);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "rsp:", save->rsp, "rax:", save->rax);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "star:", save->star, "lstar:", save->lstar);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "cstar:", save->cstar, "sfmask:", save->sfmask);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "kernel_gs_base:", save->kernel_gs_base,
++	       "sysenter_cs:", save->sysenter_cs);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "sysenter_esp:", save->sysenter_esp,
++	       "sysenter_eip:", save->sysenter_eip);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "br_from:", save->br_from, "br_to:", save->br_to);
++	pr_err("%-15s %016llx %-13s %016llx\n",
++	       "excp_from:", save->last_excp_from,
++	       "excp_to:", save->last_excp_to);
++}
++
++static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
++{
++	struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
++
++	*info1 = control->exit_info_1;
++	*info2 = control->exit_info_2;
++}
++
++static int handle_exit(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct kvm_run *kvm_run = vcpu->run;
++	u32 exit_code = svm->vmcb->control.exit_code;
++
++	trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
++
++	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
++		vcpu->arch.cr0 = svm->vmcb->save.cr0;
++	if (npt_enabled)
++		vcpu->arch.cr3 = svm->vmcb->save.cr3;
++
++	if (unlikely(svm->nested.exit_required)) {
++		nested_svm_vmexit(svm);
++		svm->nested.exit_required = false;
++
++		return 1;
++	}
++
++	if (is_guest_mode(vcpu)) {
++		int vmexit;
++
++		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
++					svm->vmcb->control.exit_info_1,
++					svm->vmcb->control.exit_info_2,
++					svm->vmcb->control.exit_int_info,
++					svm->vmcb->control.exit_int_info_err,
++					KVM_ISA_SVM);
++
++		vmexit = nested_svm_exit_special(svm);
++
++		if (vmexit == NESTED_EXIT_CONTINUE)
++			vmexit = nested_svm_exit_handled(svm);
++
++		if (vmexit == NESTED_EXIT_DONE)
++			return 1;
++	}
++
++	svm_complete_interrupts(svm);
++
++	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
++		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
++		kvm_run->fail_entry.hardware_entry_failure_reason
++			= svm->vmcb->control.exit_code;
++		pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
++		dump_vmcb(vcpu);
++		return 0;
++	}
++
++	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
++	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
++	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
++	    exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
++		printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
++		       "exit_code 0x%x\n",
++		       __func__, svm->vmcb->control.exit_int_info,
++		       exit_code);
++
++	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
++	    || !svm_exit_handlers[exit_code]) {
++		WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	return svm_exit_handlers[exit_code](svm);
++}
++
++static void reload_tss(struct kvm_vcpu *vcpu)
++{
++	int cpu = raw_smp_processor_id();
++
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
++	sd->tss_desc->type = 9; /* available 32/64-bit TSS */
++	load_TR_desc();
++}
++
++static void pre_sev_run(struct vcpu_svm *svm, int cpu)
++{
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
++	int asid = sev_get_asid(svm->vcpu.kvm);
++
++	/* Assign the asid allocated with this SEV guest */
++	svm->vmcb->control.asid = asid;
++
++	/*
++	 * Flush guest TLB:
++	 *
++	 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
++	 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
++	 */
++	if (sd->sev_vmcbs[asid] == svm->vmcb &&
++	    svm->last_cpu == cpu)
++		return;
++
++	svm->last_cpu = cpu;
++	sd->sev_vmcbs[asid] = svm->vmcb;
++	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
++	mark_dirty(svm->vmcb, VMCB_ASID);
++}
++
++static void pre_svm_run(struct vcpu_svm *svm)
++{
++	int cpu = raw_smp_processor_id();
++
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
++
++	if (sev_guest(svm->vcpu.kvm))
++		return pre_sev_run(svm, cpu);
++
++	/* FIXME: handle wraparound of asid_generation */
++	if (svm->asid_generation != sd->asid_generation)
++		new_asid(svm, sd);
++}
++
++static void svm_inject_nmi(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
++	vcpu->arch.hflags |= HF_NMI_MASK;
++	set_intercept(svm, INTERCEPT_IRET);
++	++vcpu->stat.nmi_injections;
++}
++
++static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
++{
++	struct vmcb_control_area *control;
++
++	/* The following fields are ignored when AVIC is enabled */
++	control = &svm->vmcb->control;
++	control->int_vector = irq;
++	control->int_ctl &= ~V_INTR_PRIO_MASK;
++	control->int_ctl |= V_IRQ_MASK |
++		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
++	mark_dirty(svm->vmcb, VMCB_INTR);
++}
++
++static void svm_set_irq(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	BUG_ON(!(gif_set(svm)));
++
++	trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
++	++vcpu->stat.irq_injections;
++
++	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
++		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
++}
++
++static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
++{
++	return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
++}
++
++static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (svm_nested_virtualize_tpr(vcpu) ||
++	    kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
++
++	if (irr == -1)
++		return;
++
++	if (tpr >= irr)
++		set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
++}
++
++static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
++{
++	return;
++}
++
++static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
++{
++	return avic && irqchip_split(vcpu->kvm);
++}
++
++static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
++{
++}
++
++static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
++{
++}
++
++/* Note: Currently only used by Hyper-V. */
++static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb *vmcb = svm->vmcb;
++
++	if (!kvm_vcpu_apicv_active(&svm->vcpu))
++		return;
++
++	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
++	mark_dirty(vmcb, VMCB_INTR);
++}
++
++static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
++{
++	return;
++}
++
++static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
++{
++	kvm_lapic_set_irr(vec, vcpu->arch.apic);
++	smp_mb__after_atomic();
++
++	if (avic_vcpu_is_running(vcpu))
++		wrmsrl(SVM_AVIC_DOORBELL,
++		       kvm_cpu_get_apicid(vcpu->cpu));
++	else
++		kvm_vcpu_wake_up(vcpu);
++}
++
++static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
++{
++	return false;
++}
++
++static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
++{
++	unsigned long flags;
++	struct amd_svm_iommu_ir *cur;
++
++	spin_lock_irqsave(&svm->ir_list_lock, flags);
++	list_for_each_entry(cur, &svm->ir_list, node) {
++		if (cur->data != pi->ir_data)
++			continue;
++		list_del(&cur->node);
++		kfree(cur);
++		break;
++	}
++	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
++}
++
++static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
++{
++	int ret = 0;
++	unsigned long flags;
++	struct amd_svm_iommu_ir *ir;
++
++	/**
++	 * In some cases, the existing irte is updaed and re-set,
++	 * so we need to check here if it's already been * added
++	 * to the ir_list.
++	 */
++	if (pi->ir_data && (pi->prev_ga_tag != 0)) {
++		struct kvm *kvm = svm->vcpu.kvm;
++		u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
++		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
++		struct vcpu_svm *prev_svm;
++
++		if (!prev_vcpu) {
++			ret = -EINVAL;
++			goto out;
++		}
++
++		prev_svm = to_svm(prev_vcpu);
++		svm_ir_list_del(prev_svm, pi);
++	}
++
++	/**
++	 * Allocating new amd_iommu_pi_data, which will get
++	 * add to the per-vcpu ir_list.
++	 */
++	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
++	if (!ir) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	ir->data = pi->ir_data;
++
++	spin_lock_irqsave(&svm->ir_list_lock, flags);
++	list_add(&ir->node, &svm->ir_list);
++	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
++out:
++	return ret;
++}
++
++/**
++ * Note:
++ * The HW cannot support posting multicast/broadcast
++ * interrupts to a vCPU. So, we still use legacy interrupt
++ * remapping for these kind of interrupts.
++ *
++ * For lowest-priority interrupts, we only support
++ * those with single CPU as the destination, e.g. user
++ * configures the interrupts via /proc/irq or uses
++ * irqbalance to make the interrupts single-CPU.
++ */
++static int
++get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
++		 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
++{
++	struct kvm_lapic_irq irq;
++	struct kvm_vcpu *vcpu = NULL;
++
++	kvm_set_msi_irq(kvm, e, &irq);
++
++	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
++		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
++			 __func__, irq.vector);
++		return -1;
++	}
++
++	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
++		 irq.vector);
++	*svm = to_svm(vcpu);
++	vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
++	vcpu_info->vector = irq.vector;
++
++	return 0;
++}
++
++/*
++ * svm_update_pi_irte - set IRTE for Posted-Interrupts
++ *
++ * @kvm: kvm
++ * @host_irq: host irq of the interrupt
++ * @guest_irq: gsi of the interrupt
++ * @set: set or unset PI
++ * returns 0 on success, < 0 on failure
++ */
++static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
++			      uint32_t guest_irq, bool set)
++{
++	struct kvm_kernel_irq_routing_entry *e;
++	struct kvm_irq_routing_table *irq_rt;
++	int idx, ret = -EINVAL;
++
++	if (!kvm_arch_has_assigned_device(kvm) ||
++	    !irq_remapping_cap(IRQ_POSTING_CAP))
++		return 0;
++
++	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
++		 __func__, host_irq, guest_irq, set);
++
++	idx = srcu_read_lock(&kvm->irq_srcu);
++	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
++	WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
++
++	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
++		struct vcpu_data vcpu_info;
++		struct vcpu_svm *svm = NULL;
++
++		if (e->type != KVM_IRQ_ROUTING_MSI)
++			continue;
++
++		/**
++		 * Here, we setup with legacy mode in the following cases:
++		 * 1. When cannot target interrupt to a specific vcpu.
++		 * 2. Unsetting posted interrupt.
++		 * 3. APIC virtialization is disabled for the vcpu.
++		 */
++		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
++		    kvm_vcpu_apicv_active(&svm->vcpu)) {
++			struct amd_iommu_pi_data pi;
++
++			/* Try to enable guest_mode in IRTE */
++			pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
++					    AVIC_HPA_MASK);
++			pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
++						     svm->vcpu.vcpu_id);
++			pi.is_guest_mode = true;
++			pi.vcpu_data = &vcpu_info;
++			ret = irq_set_vcpu_affinity(host_irq, &pi);
++
++			/**
++			 * Here, we successfully setting up vcpu affinity in
++			 * IOMMU guest mode. Now, we need to store the posted
++			 * interrupt information in a per-vcpu ir_list so that
++			 * we can reference to them directly when we update vcpu
++			 * scheduling information in IOMMU irte.
++			 */
++			if (!ret && pi.is_guest_mode)
++				svm_ir_list_add(svm, &pi);
++		} else {
++			/* Use legacy mode in IRTE */
++			struct amd_iommu_pi_data pi;
++
++			/**
++			 * Here, pi is used to:
++			 * - Tell IOMMU to use legacy mode for this interrupt.
++			 * - Retrieve ga_tag of prior interrupt remapping data.
++			 */
++			pi.is_guest_mode = false;
++			ret = irq_set_vcpu_affinity(host_irq, &pi);
++
++			/**
++			 * Check if the posted interrupt was previously
++			 * setup with the guest_mode by checking if the ga_tag
++			 * was cached. If so, we need to clean up the per-vcpu
++			 * ir_list.
++			 */
++			if (!ret && pi.prev_ga_tag) {
++				int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
++				struct kvm_vcpu *vcpu;
++
++				vcpu = kvm_get_vcpu_by_id(kvm, id);
++				if (vcpu)
++					svm_ir_list_del(to_svm(vcpu), &pi);
++			}
++		}
++
++		if (!ret && svm) {
++			trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
++						 e->gsi, vcpu_info.vector,
++						 vcpu_info.pi_desc_addr, set);
++		}
++
++		if (ret < 0) {
++			pr_err("%s: failed to update PI IRTE\n", __func__);
++			goto out;
++		}
++	}
++
++	ret = 0;
++out:
++	srcu_read_unlock(&kvm->irq_srcu, idx);
++	return ret;
++}
++
++static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb *vmcb = svm->vmcb;
++	int ret;
++	ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
++	      !(svm->vcpu.arch.hflags & HF_NMI_MASK);
++	ret = ret && gif_set(svm) && nested_svm_nmi(svm);
++
++	return ret;
++}
++
++static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
++}
++
++static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (masked) {
++		svm->vcpu.arch.hflags |= HF_NMI_MASK;
++		set_intercept(svm, INTERCEPT_IRET);
++	} else {
++		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
++		clr_intercept(svm, INTERCEPT_IRET);
++	}
++}
++
++static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb *vmcb = svm->vmcb;
++	int ret;
++
++	if (!gif_set(svm) ||
++	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
++		return 0;
++
++	ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
++
++	if (is_guest_mode(vcpu))
++		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
++
++	return ret;
++}
++
++static void enable_irq_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	/*
++	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
++	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
++	 * get that intercept, this function will be called again though and
++	 * we'll get the vintr intercept. However, if the vGIF feature is
++	 * enabled, the STGI interception will not occur. Enable the irq
++	 * window under the assumption that the hardware will set the GIF.
++	 */
++	if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
++		svm_set_vintr(svm);
++		svm_inject_irq(svm, 0x0);
++	}
++}
++
++static void enable_nmi_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
++	    == HF_NMI_MASK)
++		return; /* IRET will cause a vm exit */
++
++	if (!gif_set(svm)) {
++		if (vgif_enabled(svm))
++			set_intercept(svm, INTERCEPT_STGI);
++		return; /* STGI will cause a vm exit */
++	}
++
++	if (svm->nested.exit_required)
++		return; /* we're not going to run the guest yet */
++
++	/*
++	 * Something prevents NMI from been injected. Single step over possible
++	 * problem (IRET or exception injection or interrupt shadow)
++	 */
++	svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
++	svm->nmi_singlestep = true;
++	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
++}
++
++static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
++{
++	return 0;
++}
++
++static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
++{
++	return 0;
++}
++
++static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
++		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
++	else
++		svm->asid_generation--;
++}
++
++static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	invlpga(gva, svm->vmcb->control.asid);
++}
++
++static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
++{
++}
++
++static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (svm_nested_virtualize_tpr(vcpu))
++		return;
++
++	if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
++		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
++		kvm_set_cr8(vcpu, cr8);
++	}
++}
++
++static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	u64 cr8;
++
++	if (svm_nested_virtualize_tpr(vcpu) ||
++	    kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	cr8 = kvm_get_cr8(vcpu);
++	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
++	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
++}
++
++static void svm_complete_interrupts(struct vcpu_svm *svm)
++{
++	u8 vector;
++	int type;
++	u32 exitintinfo = svm->vmcb->control.exit_int_info;
++	unsigned int3_injected = svm->int3_injected;
++
++	svm->int3_injected = 0;
++
++	/*
++	 * If we've made progress since setting HF_IRET_MASK, we've
++	 * executed an IRET and can allow NMI injection.
++	 */
++	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
++	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
++		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
++		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
++	}
++
++	svm->vcpu.arch.nmi_injected = false;
++	kvm_clear_exception_queue(&svm->vcpu);
++	kvm_clear_interrupt_queue(&svm->vcpu);
++
++	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
++		return;
++
++	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
++
++	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
++	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
++
++	switch (type) {
++	case SVM_EXITINTINFO_TYPE_NMI:
++		svm->vcpu.arch.nmi_injected = true;
++		break;
++	case SVM_EXITINTINFO_TYPE_EXEPT:
++		/*
++		 * In case of software exceptions, do not reinject the vector,
++		 * but re-execute the instruction instead. Rewind RIP first
++		 * if we emulated INT3 before.
++		 */
++		if (kvm_exception_is_soft(vector)) {
++			if (vector == BP_VECTOR && int3_injected &&
++			    kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
++				kvm_rip_write(&svm->vcpu,
++					      kvm_rip_read(&svm->vcpu) -
++					      int3_injected);
++			break;
++		}
++		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
++			u32 err = svm->vmcb->control.exit_int_info_err;
++			kvm_requeue_exception_e(&svm->vcpu, vector, err);
++
++		} else
++			kvm_requeue_exception(&svm->vcpu, vector);
++		break;
++	case SVM_EXITINTINFO_TYPE_INTR:
++		kvm_queue_interrupt(&svm->vcpu, vector, false);
++		break;
++	default:
++		break;
++	}
++}
++
++static void svm_cancel_injection(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb_control_area *control = &svm->vmcb->control;
++
++	control->exit_int_info = control->event_inj;
++	control->exit_int_info_err = control->event_inj_err;
++	control->event_inj = 0;
++	svm_complete_interrupts(svm);
++}
++
++static void svm_vcpu_run(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
++	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
++	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
++
++	/*
++	 * A vmexit emulation is required before the vcpu can be executed
++	 * again.
++	 */
++	if (unlikely(svm->nested.exit_required))
++		return;
++
++	/*
++	 * Disable singlestep if we're injecting an interrupt/exception.
++	 * We don't want our modified rflags to be pushed on the stack where
++	 * we might not be able to easily reset them if we disabled NMI
++	 * singlestep later.
++	 */
++	if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
++		/*
++		 * Event injection happens before external interrupts cause a
++		 * vmexit and interrupts are disabled here, so smp_send_reschedule
++		 * is enough to force an immediate vmexit.
++		 */
++		disable_nmi_singlestep(svm);
++		smp_send_reschedule(vcpu->cpu);
++	}
++
++	pre_svm_run(svm);
++
++	sync_lapic_to_cr8(vcpu);
++
++	svm->vmcb->save.cr2 = vcpu->arch.cr2;
++
++	clgi();
++	kvm_load_guest_xcr0(vcpu);
++
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
++
++	local_irq_enable();
++
++	asm volatile (
++		"push %%" _ASM_BP "; \n\t"
++		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
++		"mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
++		"mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
++		"mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
++		"mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
++		"mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
++#ifdef CONFIG_X86_64
++		"mov %c[r8](%[svm]),  %%r8  \n\t"
++		"mov %c[r9](%[svm]),  %%r9  \n\t"
++		"mov %c[r10](%[svm]), %%r10 \n\t"
++		"mov %c[r11](%[svm]), %%r11 \n\t"
++		"mov %c[r12](%[svm]), %%r12 \n\t"
++		"mov %c[r13](%[svm]), %%r13 \n\t"
++		"mov %c[r14](%[svm]), %%r14 \n\t"
++		"mov %c[r15](%[svm]), %%r15 \n\t"
++#endif
++
++		/* Enter guest mode */
++		"push %%" _ASM_AX " \n\t"
++		"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
++		__ex(SVM_VMLOAD) "\n\t"
++		__ex(SVM_VMRUN) "\n\t"
++		__ex(SVM_VMSAVE) "\n\t"
++		"pop %%" _ASM_AX " \n\t"
++
++		/* Save guest registers, load host registers */
++		"mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
++		"mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
++		"mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
++		"mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
++		"mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
++		"mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
++#ifdef CONFIG_X86_64
++		"mov %%r8,  %c[r8](%[svm]) \n\t"
++		"mov %%r9,  %c[r9](%[svm]) \n\t"
++		"mov %%r10, %c[r10](%[svm]) \n\t"
++		"mov %%r11, %c[r11](%[svm]) \n\t"
++		"mov %%r12, %c[r12](%[svm]) \n\t"
++		"mov %%r13, %c[r13](%[svm]) \n\t"
++		"mov %%r14, %c[r14](%[svm]) \n\t"
++		"mov %%r15, %c[r15](%[svm]) \n\t"
++#endif
++		/*
++		* Clear host registers marked as clobbered to prevent
++		* speculative use.
++		*/
++		"xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
++		"xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
++		"xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
++		"xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
++		"xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
++#ifdef CONFIG_X86_64
++		"xor %%r8, %%r8 \n\t"
++		"xor %%r9, %%r9 \n\t"
++		"xor %%r10, %%r10 \n\t"
++		"xor %%r11, %%r11 \n\t"
++		"xor %%r12, %%r12 \n\t"
++		"xor %%r13, %%r13 \n\t"
++		"xor %%r14, %%r14 \n\t"
++		"xor %%r15, %%r15 \n\t"
++#endif
++		"pop %%" _ASM_BP
++		:
++		: [svm]"a"(svm),
++		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
++		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
++		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
++		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
++		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
++		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
++		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
++#ifdef CONFIG_X86_64
++		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
++		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
++		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
++		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
++		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
++		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
++		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
++		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
++#endif
++		: "cc", "memory"
++#ifdef CONFIG_X86_64
++		, "rbx", "rcx", "rdx", "rsi", "rdi"
++		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
++#else
++		, "ebx", "ecx", "edx", "esi", "edi"
++#endif
++		);
++
++	/* Eliminate branch target predictions from guest mode */
++	vmexit_fill_RSB();
++
++#ifdef CONFIG_X86_64
++	wrmsrl(MSR_GS_BASE, svm->host.gs_base);
++#else
++	loadsegment(fs, svm->host.fs);
++#ifndef CONFIG_X86_32_LAZY_GS
++	loadsegment(gs, svm->host.gs);
++#endif
++#endif
++
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
++		svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
++
++	reload_tss(vcpu);
++
++	local_irq_disable();
++
++	x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
++
++	vcpu->arch.cr2 = svm->vmcb->save.cr2;
++	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
++	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
++	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
++
++	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
++		kvm_before_interrupt(&svm->vcpu);
++
++	kvm_put_guest_xcr0(vcpu);
++	stgi();
++
++	/* Any pending NMI will happen here */
++
++	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
++		kvm_after_interrupt(&svm->vcpu);
++
++	sync_cr8_to_lapic(vcpu);
++
++	svm->next_rip = 0;
++
++	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
++
++	/* if exit due to PF check for async PF */
++	if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
++		svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
++
++	if (npt_enabled) {
++		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
++		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
++	}
++
++	/*
++	 * We need to handle MC intercepts here before the vcpu has a chance to
++	 * change the physical cpu
++	 */
++	if (unlikely(svm->vmcb->control.exit_code ==
++		     SVM_EXIT_EXCP_BASE + MC_VECTOR))
++		svm_handle_mce(svm);
++
++	mark_all_clean(svm->vmcb);
++}
++STACK_FRAME_NON_STANDARD(svm_vcpu_run);
++
++static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->save.cr3 = __sme_set(root);
++	mark_dirty(svm->vmcb, VMCB_CR);
++}
++
++static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	svm->vmcb->control.nested_cr3 = __sme_set(root);
++	mark_dirty(svm->vmcb, VMCB_NPT);
++
++	/* Also sync guest cr3 here in case we live migrate */
++	svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
++	mark_dirty(svm->vmcb, VMCB_CR);
++}
++
++static int is_disabled(void)
++{
++	u64 vm_cr;
++
++	rdmsrl(MSR_VM_CR, vm_cr);
++	if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
++		return 1;
++
++	return 0;
++}
++
++static void
++svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
++{
++	/*
++	 * Patch in the VMMCALL instruction:
++	 */
++	hypercall[0] = 0x0f;
++	hypercall[1] = 0x01;
++	hypercall[2] = 0xd9;
++}
++
++static void svm_check_processor_compat(void *rtn)
++{
++	*(int *)rtn = 0;
++}
++
++static bool svm_cpu_has_accelerated_tpr(void)
++{
++	return false;
++}
++
++static bool svm_has_emulated_msr(int index)
++{
++	switch (index) {
++	case MSR_IA32_MCG_EXT_CTL:
++		return false;
++	default:
++		break;
++	}
++
++	return true;
++}
++
++static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
++{
++	return 0;
++}
++
++static void svm_cpuid_update(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	/* Update nrips enabled cache */
++	svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
++
++	if (!kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
++}
++
++static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++	switch (func) {
++	case 0x1:
++		if (avic)
++			entry->ecx &= ~bit(X86_FEATURE_X2APIC);
++		break;
++	case 0x80000001:
++		if (nested)
++			entry->ecx |= (1 << 2); /* Set SVM bit */
++		break;
++	case 0x8000000A:
++		entry->eax = 1; /* SVM revision 1 */
++		entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
++				   ASID emulation to nested SVM */
++		entry->ecx = 0; /* Reserved */
++		entry->edx = 0; /* Per default do not support any
++				   additional features */
++
++		/* Support next_rip if host supports it */
++		if (boot_cpu_has(X86_FEATURE_NRIPS))
++			entry->edx |= SVM_FEATURE_NRIP;
++
++		/* Support NPT for the guest if enabled */
++		if (npt_enabled)
++			entry->edx |= SVM_FEATURE_NPT;
++
++		break;
++	case 0x8000001F:
++		/* Support memory encryption cpuid if host supports it */
++		if (boot_cpu_has(X86_FEATURE_SEV))
++			cpuid(0x8000001f, &entry->eax, &entry->ebx,
++				&entry->ecx, &entry->edx);
++
++	}
++}
++
++static int svm_get_lpage_level(void)
++{
++	return PT_PDPE_LEVEL;
++}
++
++static bool svm_rdtscp_supported(void)
++{
++	return boot_cpu_has(X86_FEATURE_RDTSCP);
++}
++
++static bool svm_invpcid_supported(void)
++{
++	return false;
++}
++
++static bool svm_mpx_supported(void)
++{
++	return false;
++}
++
++static bool svm_xsaves_supported(void)
++{
++	return false;
++}
++
++static bool svm_umip_emulated(void)
++{
++	return false;
++}
++
++static bool svm_has_wbinvd_exit(void)
++{
++	return true;
++}
++
++#define PRE_EX(exit)  { .exit_code = (exit), \
++			.stage = X86_ICPT_PRE_EXCEPT, }
++#define POST_EX(exit) { .exit_code = (exit), \
++			.stage = X86_ICPT_POST_EXCEPT, }
++#define POST_MEM(exit) { .exit_code = (exit), \
++			.stage = X86_ICPT_POST_MEMACCESS, }
++
++static const struct __x86_intercept {
++	u32 exit_code;
++	enum x86_intercept_stage stage;
++} x86_intercept_map[] = {
++	[x86_intercept_cr_read]		= POST_EX(SVM_EXIT_READ_CR0),
++	[x86_intercept_cr_write]	= POST_EX(SVM_EXIT_WRITE_CR0),
++	[x86_intercept_clts]		= POST_EX(SVM_EXIT_WRITE_CR0),
++	[x86_intercept_lmsw]		= POST_EX(SVM_EXIT_WRITE_CR0),
++	[x86_intercept_smsw]		= POST_EX(SVM_EXIT_READ_CR0),
++	[x86_intercept_dr_read]		= POST_EX(SVM_EXIT_READ_DR0),
++	[x86_intercept_dr_write]	= POST_EX(SVM_EXIT_WRITE_DR0),
++	[x86_intercept_sldt]		= POST_EX(SVM_EXIT_LDTR_READ),
++	[x86_intercept_str]		= POST_EX(SVM_EXIT_TR_READ),
++	[x86_intercept_lldt]		= POST_EX(SVM_EXIT_LDTR_WRITE),
++	[x86_intercept_ltr]		= POST_EX(SVM_EXIT_TR_WRITE),
++	[x86_intercept_sgdt]		= POST_EX(SVM_EXIT_GDTR_READ),
++	[x86_intercept_sidt]		= POST_EX(SVM_EXIT_IDTR_READ),
++	[x86_intercept_lgdt]		= POST_EX(SVM_EXIT_GDTR_WRITE),
++	[x86_intercept_lidt]		= POST_EX(SVM_EXIT_IDTR_WRITE),
++	[x86_intercept_vmrun]		= POST_EX(SVM_EXIT_VMRUN),
++	[x86_intercept_vmmcall]		= POST_EX(SVM_EXIT_VMMCALL),
++	[x86_intercept_vmload]		= POST_EX(SVM_EXIT_VMLOAD),
++	[x86_intercept_vmsave]		= POST_EX(SVM_EXIT_VMSAVE),
++	[x86_intercept_stgi]		= POST_EX(SVM_EXIT_STGI),
++	[x86_intercept_clgi]		= POST_EX(SVM_EXIT_CLGI),
++	[x86_intercept_skinit]		= POST_EX(SVM_EXIT_SKINIT),
++	[x86_intercept_invlpga]		= POST_EX(SVM_EXIT_INVLPGA),
++	[x86_intercept_rdtscp]		= POST_EX(SVM_EXIT_RDTSCP),
++	[x86_intercept_monitor]		= POST_MEM(SVM_EXIT_MONITOR),
++	[x86_intercept_mwait]		= POST_EX(SVM_EXIT_MWAIT),
++	[x86_intercept_invlpg]		= POST_EX(SVM_EXIT_INVLPG),
++	[x86_intercept_invd]		= POST_EX(SVM_EXIT_INVD),
++	[x86_intercept_wbinvd]		= POST_EX(SVM_EXIT_WBINVD),
++	[x86_intercept_wrmsr]		= POST_EX(SVM_EXIT_MSR),
++	[x86_intercept_rdtsc]		= POST_EX(SVM_EXIT_RDTSC),
++	[x86_intercept_rdmsr]		= POST_EX(SVM_EXIT_MSR),
++	[x86_intercept_rdpmc]		= POST_EX(SVM_EXIT_RDPMC),
++	[x86_intercept_cpuid]		= PRE_EX(SVM_EXIT_CPUID),
++	[x86_intercept_rsm]		= PRE_EX(SVM_EXIT_RSM),
++	[x86_intercept_pause]		= PRE_EX(SVM_EXIT_PAUSE),
++	[x86_intercept_pushf]		= PRE_EX(SVM_EXIT_PUSHF),
++	[x86_intercept_popf]		= PRE_EX(SVM_EXIT_POPF),
++	[x86_intercept_intn]		= PRE_EX(SVM_EXIT_SWINT),
++	[x86_intercept_iret]		= PRE_EX(SVM_EXIT_IRET),
++	[x86_intercept_icebp]		= PRE_EX(SVM_EXIT_ICEBP),
++	[x86_intercept_hlt]		= POST_EX(SVM_EXIT_HLT),
++	[x86_intercept_in]		= POST_EX(SVM_EXIT_IOIO),
++	[x86_intercept_ins]		= POST_EX(SVM_EXIT_IOIO),
++	[x86_intercept_out]		= POST_EX(SVM_EXIT_IOIO),
++	[x86_intercept_outs]		= POST_EX(SVM_EXIT_IOIO),
++};
++
++#undef PRE_EX
++#undef POST_EX
++#undef POST_MEM
++
++static int svm_check_intercept(struct kvm_vcpu *vcpu,
++			       struct x86_instruction_info *info,
++			       enum x86_intercept_stage stage)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	int vmexit, ret = X86EMUL_CONTINUE;
++	struct __x86_intercept icpt_info;
++	struct vmcb *vmcb = svm->vmcb;
++
++	if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
++		goto out;
++
++	icpt_info = x86_intercept_map[info->intercept];
++
++	if (stage != icpt_info.stage)
++		goto out;
++
++	switch (icpt_info.exit_code) {
++	case SVM_EXIT_READ_CR0:
++		if (info->intercept == x86_intercept_cr_read)
++			icpt_info.exit_code += info->modrm_reg;
++		break;
++	case SVM_EXIT_WRITE_CR0: {
++		unsigned long cr0, val;
++		u64 intercept;
++
++		if (info->intercept == x86_intercept_cr_write)
++			icpt_info.exit_code += info->modrm_reg;
++
++		if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
++		    info->intercept == x86_intercept_clts)
++			break;
++
++		intercept = svm->nested.intercept;
++
++		if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
++			break;
++
++		cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
++		val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
++
++		if (info->intercept == x86_intercept_lmsw) {
++			cr0 &= 0xfUL;
++			val &= 0xfUL;
++			/* lmsw can't clear PE - catch this here */
++			if (cr0 & X86_CR0_PE)
++				val |= X86_CR0_PE;
++		}
++
++		if (cr0 ^ val)
++			icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
++
++		break;
++	}
++	case SVM_EXIT_READ_DR0:
++	case SVM_EXIT_WRITE_DR0:
++		icpt_info.exit_code += info->modrm_reg;
++		break;
++	case SVM_EXIT_MSR:
++		if (info->intercept == x86_intercept_wrmsr)
++			vmcb->control.exit_info_1 = 1;
++		else
++			vmcb->control.exit_info_1 = 0;
++		break;
++	case SVM_EXIT_PAUSE:
++		/*
++		 * We get this for NOP only, but pause
++		 * is rep not, check this here
++		 */
++		if (info->rep_prefix != REPE_PREFIX)
++			goto out;
++		break;
++	case SVM_EXIT_IOIO: {
++		u64 exit_info;
++		u32 bytes;
++
++		if (info->intercept == x86_intercept_in ||
++		    info->intercept == x86_intercept_ins) {
++			exit_info = ((info->src_val & 0xffff) << 16) |
++				SVM_IOIO_TYPE_MASK;
++			bytes = info->dst_bytes;
++		} else {
++			exit_info = (info->dst_val & 0xffff) << 16;
++			bytes = info->src_bytes;
++		}
++
++		if (info->intercept == x86_intercept_outs ||
++		    info->intercept == x86_intercept_ins)
++			exit_info |= SVM_IOIO_STR_MASK;
++
++		if (info->rep_prefix)
++			exit_info |= SVM_IOIO_REP_MASK;
++
++		bytes = min(bytes, 4u);
++
++		exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
++
++		exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
++
++		vmcb->control.exit_info_1 = exit_info;
++		vmcb->control.exit_info_2 = info->next_rip;
++
++		break;
++	}
++	default:
++		break;
++	}
++
++	/* TODO: Advertise NRIPS to guest hypervisor unconditionally */
++	if (static_cpu_has(X86_FEATURE_NRIPS))
++		vmcb->control.next_rip  = info->next_rip;
++	vmcb->control.exit_code = icpt_info.exit_code;
++	vmexit = nested_svm_exit_handled(svm);
++
++	ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
++					   : X86EMUL_CONTINUE;
++
++out:
++	return ret;
++}
++
++static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
++{
++	local_irq_enable();
++	/*
++	 * We must have an instruction with interrupts enabled, so
++	 * the timer interrupt isn't delayed by the interrupt shadow.
++	 */
++	asm("nop");
++	local_irq_disable();
++}
++
++static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
++{
++	if (pause_filter_thresh)
++		shrink_ple_window(vcpu);
++}
++
++static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
++{
++	if (avic_handle_apic_id_update(vcpu) != 0)
++		return;
++	if (avic_handle_dfr_update(vcpu) != 0)
++		return;
++	avic_handle_ldr_update(vcpu);
++}
++
++static void svm_setup_mce(struct kvm_vcpu *vcpu)
++{
++	/* [63:9] are reserved. */
++	vcpu->arch.mcg_cap &= 0x1ff;
++}
++
++static int svm_smi_allowed(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	/* Per APM Vol.2 15.22.2 "Response to SMI" */
++	if (!gif_set(svm))
++		return 0;
++
++	if (is_guest_mode(&svm->vcpu) &&
++	    svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
++		/* TODO: Might need to set exit_info_1 and exit_info_2 here */
++		svm->vmcb->control.exit_code = SVM_EXIT_SMI;
++		svm->nested.exit_required = true;
++		return 0;
++	}
++
++	return 1;
++}
++
++static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	int ret;
++
++	if (is_guest_mode(vcpu)) {
++		/* FED8h - SVM Guest */
++		put_smstate(u64, smstate, 0x7ed8, 1);
++		/* FEE0h - SVM Guest VMCB Physical Address */
++		put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
++
++		svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
++		svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
++		svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
++
++		ret = nested_svm_vmexit(svm);
++		if (ret)
++			return ret;
++	}
++	return 0;
++}
++
++static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++	struct vmcb *nested_vmcb;
++	struct page *page;
++	struct {
++		u64 guest;
++		u64 vmcb;
++	} svm_state_save;
++	int ret;
++
++	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
++				  sizeof(svm_state_save));
++	if (ret)
++		return ret;
++
++	if (svm_state_save.guest) {
++		vcpu->arch.hflags &= ~HF_SMM_MASK;
++		nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
++		if (nested_vmcb)
++			enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
++		else
++			ret = 1;
++		vcpu->arch.hflags |= HF_SMM_MASK;
++	}
++	return ret;
++}
++
++static int enable_smi_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_svm *svm = to_svm(vcpu);
++
++	if (!gif_set(svm)) {
++		if (vgif_enabled(svm))
++			set_intercept(svm, INTERCEPT_STGI);
++		/* STGI will cause a vm exit */
++		return 1;
++	}
++	return 0;
++}
++
++static int sev_asid_new(void)
++{
++	int pos;
++
++	/*
++	 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
++	 */
++	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
++	if (pos >= max_sev_asid)
++		return -EBUSY;
++
++	set_bit(pos, sev_asid_bitmap);
++	return pos + 1;
++}
++
++static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	int asid, ret;
++
++	ret = -EBUSY;
++	if (unlikely(sev->active))
++		return ret;
++
++	asid = sev_asid_new();
++	if (asid < 0)
++		return ret;
++
++	ret = sev_platform_init(&argp->error);
++	if (ret)
++		goto e_free;
++
++	sev->active = true;
++	sev->asid = asid;
++	INIT_LIST_HEAD(&sev->regions_list);
++
++	return 0;
++
++e_free:
++	__sev_asid_free(asid);
++	return ret;
++}
++
++static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
++{
++	struct sev_data_activate *data;
++	int asid = sev_get_asid(kvm);
++	int ret;
++
++	wbinvd_on_all_cpus();
++
++	ret = sev_guest_df_flush(error);
++	if (ret)
++		return ret;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	/* activate ASID on the given handle */
++	data->handle = handle;
++	data->asid   = asid;
++	ret = sev_guest_activate(data, error);
++	kfree(data);
++
++	return ret;
++}
++
++static int __sev_issue_cmd(int fd, int id, void *data, int *error)
++{
++	struct fd f;
++	int ret;
++
++	f = fdget(fd);
++	if (!f.file)
++		return -EBADF;
++
++	ret = sev_issue_cmd_external_user(f.file, id, data, error);
++
++	fdput(f);
++	return ret;
++}
++
++static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++
++	return __sev_issue_cmd(sev->fd, id, data, error);
++}
++
++static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct sev_data_launch_start *start;
++	struct kvm_sev_launch_start params;
++	void *dh_blob, *session_blob;
++	int *error = &argp->error;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
++		return -EFAULT;
++
++	start = kzalloc(sizeof(*start), GFP_KERNEL);
++	if (!start)
++		return -ENOMEM;
++
++	dh_blob = NULL;
++	if (params.dh_uaddr) {
++		dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
++		if (IS_ERR(dh_blob)) {
++			ret = PTR_ERR(dh_blob);
++			goto e_free;
++		}
++
++		start->dh_cert_address = __sme_set(__pa(dh_blob));
++		start->dh_cert_len = params.dh_len;
++	}
++
++	session_blob = NULL;
++	if (params.session_uaddr) {
++		session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
++		if (IS_ERR(session_blob)) {
++			ret = PTR_ERR(session_blob);
++			goto e_free_dh;
++		}
++
++		start->session_address = __sme_set(__pa(session_blob));
++		start->session_len = params.session_len;
++	}
++
++	start->handle = params.handle;
++	start->policy = params.policy;
++
++	/* create memory encryption context */
++	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
++	if (ret)
++		goto e_free_session;
++
++	/* Bind ASID to this guest */
++	ret = sev_bind_asid(kvm, start->handle, error);
++	if (ret)
++		goto e_free_session;
++
++	/* return handle to userspace */
++	params.handle = start->handle;
++	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
++		sev_unbind_asid(kvm, start->handle);
++		ret = -EFAULT;
++		goto e_free_session;
++	}
++
++	sev->handle = start->handle;
++	sev->fd = argp->sev_fd;
++
++e_free_session:
++	kfree(session_blob);
++e_free_dh:
++	kfree(dh_blob);
++e_free:
++	kfree(start);
++	return ret;
++}
++
++static unsigned long get_num_contig_pages(unsigned long idx,
++				struct page **inpages, unsigned long npages)
++{
++	unsigned long paddr, next_paddr;
++	unsigned long i = idx + 1, pages = 1;
++
++	/* find the number of contiguous pages starting from idx */
++	paddr = __sme_page_pa(inpages[idx]);
++	while (i < npages) {
++		next_paddr = __sme_page_pa(inpages[i++]);
++		if ((paddr + PAGE_SIZE) == next_paddr) {
++			pages++;
++			paddr = next_paddr;
++			continue;
++		}
++		break;
++	}
++
++	return pages;
++}
++
++static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct kvm_sev_launch_update_data params;
++	struct sev_data_launch_update_data *data;
++	struct page **inpages;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
++		return -EFAULT;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	vaddr = params.uaddr;
++	size = params.len;
++	vaddr_end = vaddr + size;
++
++	/* Lock the user memory. */
++	inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
++	if (!inpages) {
++		ret = -ENOMEM;
++		goto e_free;
++	}
++
++	/*
++	 * The LAUNCH_UPDATE command will perform in-place encryption of the
++	 * memory content (i.e it will write the same memory region with C=1).
++	 * It's possible that the cache may contain the data with C=0, i.e.,
++	 * unencrypted so invalidate it first.
++	 */
++	sev_clflush_pages(inpages, npages);
++
++	for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
++		int offset, len;
++
++		/*
++		 * If the user buffer is not page-aligned, calculate the offset
++		 * within the page.
++		 */
++		offset = vaddr & (PAGE_SIZE - 1);
++
++		/* Calculate the number of pages that can be encrypted in one go. */
++		pages = get_num_contig_pages(i, inpages, npages);
++
++		len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
++
++		data->handle = sev->handle;
++		data->len = len;
++		data->address = __sme_page_pa(inpages[i]) + offset;
++		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
++		if (ret)
++			goto e_unpin;
++
++		size -= len;
++		next_vaddr = vaddr + len;
++	}
++
++e_unpin:
++	/* content of memory is updated, mark pages dirty */
++	for (i = 0; i < npages; i++) {
++		set_page_dirty_lock(inpages[i]);
++		mark_page_accessed(inpages[i]);
++	}
++	/* unlock the user pages */
++	sev_unpin_memory(kvm, inpages, npages);
++e_free:
++	kfree(data);
++	return ret;
++}
++
++static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	void __user *measure = (void __user *)(uintptr_t)argp->data;
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct sev_data_launch_measure *data;
++	struct kvm_sev_launch_measure params;
++	void __user *p = NULL;
++	void *blob = NULL;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (copy_from_user(&params, measure, sizeof(params)))
++		return -EFAULT;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	/* User wants to query the blob length */
++	if (!params.len)
++		goto cmd;
++
++	p = (void __user *)(uintptr_t)params.uaddr;
++	if (p) {
++		if (params.len > SEV_FW_BLOB_MAX_SIZE) {
++			ret = -EINVAL;
++			goto e_free;
++		}
++
++		ret = -ENOMEM;
++		blob = kmalloc(params.len, GFP_KERNEL);
++		if (!blob)
++			goto e_free;
++
++		data->address = __psp_pa(blob);
++		data->len = params.len;
++	}
++
++cmd:
++	data->handle = sev->handle;
++	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
++
++	/*
++	 * If we query the session length, FW responded with expected data.
++	 */
++	if (!params.len)
++		goto done;
++
++	if (ret)
++		goto e_free_blob;
++
++	if (blob) {
++		if (copy_to_user(p, blob, params.len))
++			ret = -EFAULT;
++	}
++
++done:
++	params.len = data->len;
++	if (copy_to_user(measure, &params, sizeof(params)))
++		ret = -EFAULT;
++e_free_blob:
++	kfree(blob);
++e_free:
++	kfree(data);
++	return ret;
++}
++
++static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct sev_data_launch_finish *data;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	data->handle = sev->handle;
++	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
++
++	kfree(data);
++	return ret;
++}
++
++static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct kvm_sev_guest_status params;
++	struct sev_data_guest_status *data;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	data->handle = sev->handle;
++	ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
++	if (ret)
++		goto e_free;
++
++	params.policy = data->policy;
++	params.state = data->state;
++	params.handle = data->handle;
++
++	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
++		ret = -EFAULT;
++e_free:
++	kfree(data);
++	return ret;
++}
++
++static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
++			       unsigned long dst, int size,
++			       int *error, bool enc)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct sev_data_dbg *data;
++	int ret;
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	data->handle = sev->handle;
++	data->dst_addr = dst;
++	data->src_addr = src;
++	data->len = size;
++
++	ret = sev_issue_cmd(kvm,
++			    enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
++			    data, error);
++	kfree(data);
++	return ret;
++}
++
++static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
++			     unsigned long dst_paddr, int sz, int *err)
++{
++	int offset;
++
++	/*
++	 * Its safe to read more than we are asked, caller should ensure that
++	 * destination has enough space.
++	 */
++	src_paddr = round_down(src_paddr, 16);
++	offset = src_paddr & 15;
++	sz = round_up(sz + offset, 16);
++
++	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
++}
++
++static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
++				  unsigned long __user dst_uaddr,
++				  unsigned long dst_paddr,
++				  int size, int *err)
++{
++	struct page *tpage = NULL;
++	int ret, offset;
++
++	/* if inputs are not 16-byte then use intermediate buffer */
++	if (!IS_ALIGNED(dst_paddr, 16) ||
++	    !IS_ALIGNED(paddr,     16) ||
++	    !IS_ALIGNED(size,      16)) {
++		tpage = (void *)alloc_page(GFP_KERNEL);
++		if (!tpage)
++			return -ENOMEM;
++
++		dst_paddr = __sme_page_pa(tpage);
++	}
++
++	ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
++	if (ret)
++		goto e_free;
++
++	if (tpage) {
++		offset = paddr & 15;
++		if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
++				 page_address(tpage) + offset, size))
++			ret = -EFAULT;
++	}
++
++e_free:
++	if (tpage)
++		__free_page(tpage);
++
++	return ret;
++}
++
++static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
++				  unsigned long __user vaddr,
++				  unsigned long dst_paddr,
++				  unsigned long __user dst_vaddr,
++				  int size, int *error)
++{
++	struct page *src_tpage = NULL;
++	struct page *dst_tpage = NULL;
++	int ret, len = size;
++
++	/* If source buffer is not aligned then use an intermediate buffer */
++	if (!IS_ALIGNED(vaddr, 16)) {
++		src_tpage = alloc_page(GFP_KERNEL);
++		if (!src_tpage)
++			return -ENOMEM;
++
++		if (copy_from_user(page_address(src_tpage),
++				(void __user *)(uintptr_t)vaddr, size)) {
++			__free_page(src_tpage);
++			return -EFAULT;
++		}
++
++		paddr = __sme_page_pa(src_tpage);
++	}
++
++	/*
++	 *  If destination buffer or length is not aligned then do read-modify-write:
++	 *   - decrypt destination in an intermediate buffer
++	 *   - copy the source buffer in an intermediate buffer
++	 *   - use the intermediate buffer as source buffer
++	 */
++	if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
++		int dst_offset;
++
++		dst_tpage = alloc_page(GFP_KERNEL);
++		if (!dst_tpage) {
++			ret = -ENOMEM;
++			goto e_free;
++		}
++
++		ret = __sev_dbg_decrypt(kvm, dst_paddr,
++					__sme_page_pa(dst_tpage), size, error);
++		if (ret)
++			goto e_free;
++
++		/*
++		 *  If source is kernel buffer then use memcpy() otherwise
++		 *  copy_from_user().
++		 */
++		dst_offset = dst_paddr & 15;
++
++		if (src_tpage)
++			memcpy(page_address(dst_tpage) + dst_offset,
++			       page_address(src_tpage), size);
++		else {
++			if (copy_from_user(page_address(dst_tpage) + dst_offset,
++					   (void __user *)(uintptr_t)vaddr, size)) {
++				ret = -EFAULT;
++				goto e_free;
++			}
++		}
++
++		paddr = __sme_page_pa(dst_tpage);
++		dst_paddr = round_down(dst_paddr, 16);
++		len = round_up(size, 16);
++	}
++
++	ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
++
++e_free:
++	if (src_tpage)
++		__free_page(src_tpage);
++	if (dst_tpage)
++		__free_page(dst_tpage);
++	return ret;
++}
++
++static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
++{
++	unsigned long vaddr, vaddr_end, next_vaddr;
++	unsigned long dst_vaddr;
++	struct page **src_p, **dst_p;
++	struct kvm_sev_dbg debug;
++	unsigned long n;
++	unsigned int size;
++	int ret;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
++		return -EFAULT;
++
++	if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
++		return -EINVAL;
++	if (!debug.dst_uaddr)
++		return -EINVAL;
++
++	vaddr = debug.src_uaddr;
++	size = debug.len;
++	vaddr_end = vaddr + size;
++	dst_vaddr = debug.dst_uaddr;
++
++	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
++		int len, s_off, d_off;
++
++		/* lock userspace source and destination page */
++		src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
++		if (!src_p)
++			return -EFAULT;
++
++		dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
++		if (!dst_p) {
++			sev_unpin_memory(kvm, src_p, n);
++			return -EFAULT;
++		}
++
++		/*
++		 * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
++		 * memory content (i.e it will write the same memory region with C=1).
++		 * It's possible that the cache may contain the data with C=0, i.e.,
++		 * unencrypted so invalidate it first.
++		 */
++		sev_clflush_pages(src_p, 1);
++		sev_clflush_pages(dst_p, 1);
++
++		/*
++		 * Since user buffer may not be page aligned, calculate the
++		 * offset within the page.
++		 */
++		s_off = vaddr & ~PAGE_MASK;
++		d_off = dst_vaddr & ~PAGE_MASK;
++		len = min_t(size_t, (PAGE_SIZE - s_off), size);
++
++		if (dec)
++			ret = __sev_dbg_decrypt_user(kvm,
++						     __sme_page_pa(src_p[0]) + s_off,
++						     dst_vaddr,
++						     __sme_page_pa(dst_p[0]) + d_off,
++						     len, &argp->error);
++		else
++			ret = __sev_dbg_encrypt_user(kvm,
++						     __sme_page_pa(src_p[0]) + s_off,
++						     vaddr,
++						     __sme_page_pa(dst_p[0]) + d_off,
++						     dst_vaddr,
++						     len, &argp->error);
++
++		sev_unpin_memory(kvm, src_p, n);
++		sev_unpin_memory(kvm, dst_p, n);
++
++		if (ret)
++			goto err;
++
++		next_vaddr = vaddr + len;
++		dst_vaddr = dst_vaddr + len;
++		size -= len;
++	}
++err:
++	return ret;
++}
++
++static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct sev_data_launch_secret *data;
++	struct kvm_sev_launch_secret params;
++	struct page **pages;
++	void *blob, *hdr;
++	unsigned long n;
++	int ret, offset;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
++		return -EFAULT;
++
++	pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
++	if (!pages)
++		return -ENOMEM;
++
++	/*
++	 * The secret must be copied into contiguous memory region, lets verify
++	 * that userspace memory pages are contiguous before we issue command.
++	 */
++	if (get_num_contig_pages(0, pages, n) != n) {
++		ret = -EINVAL;
++		goto e_unpin_memory;
++	}
++
++	ret = -ENOMEM;
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data)
++		goto e_unpin_memory;
++
++	offset = params.guest_uaddr & (PAGE_SIZE - 1);
++	data->guest_address = __sme_page_pa(pages[0]) + offset;
++	data->guest_len = params.guest_len;
++
++	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
++	if (IS_ERR(blob)) {
++		ret = PTR_ERR(blob);
++		goto e_free;
++	}
++
++	data->trans_address = __psp_pa(blob);
++	data->trans_len = params.trans_len;
++
++	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
++	if (IS_ERR(hdr)) {
++		ret = PTR_ERR(hdr);
++		goto e_free_blob;
++	}
++	data->hdr_address = __psp_pa(hdr);
++	data->hdr_len = params.hdr_len;
++
++	data->handle = sev->handle;
++	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
++
++	kfree(hdr);
++
++e_free_blob:
++	kfree(blob);
++e_free:
++	kfree(data);
++e_unpin_memory:
++	sev_unpin_memory(kvm, pages, n);
++	return ret;
++}
++
++static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
++{
++	struct kvm_sev_cmd sev_cmd;
++	int r;
++
++	if (!svm_sev_enabled())
++		return -ENOTTY;
++
++	if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
++		return -EFAULT;
++
++	mutex_lock(&kvm->lock);
++
++	switch (sev_cmd.id) {
++	case KVM_SEV_INIT:
++		r = sev_guest_init(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_LAUNCH_START:
++		r = sev_launch_start(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_LAUNCH_UPDATE_DATA:
++		r = sev_launch_update_data(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_LAUNCH_MEASURE:
++		r = sev_launch_measure(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_LAUNCH_FINISH:
++		r = sev_launch_finish(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_GUEST_STATUS:
++		r = sev_guest_status(kvm, &sev_cmd);
++		break;
++	case KVM_SEV_DBG_DECRYPT:
++		r = sev_dbg_crypt(kvm, &sev_cmd, true);
++		break;
++	case KVM_SEV_DBG_ENCRYPT:
++		r = sev_dbg_crypt(kvm, &sev_cmd, false);
++		break;
++	case KVM_SEV_LAUNCH_SECRET:
++		r = sev_launch_secret(kvm, &sev_cmd);
++		break;
++	default:
++		r = -EINVAL;
++		goto out;
++	}
++
++	if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
++		r = -EFAULT;
++
++out:
++	mutex_unlock(&kvm->lock);
++	return r;
++}
++
++static int svm_register_enc_region(struct kvm *kvm,
++				   struct kvm_enc_region *range)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct enc_region *region;
++	int ret = 0;
++
++	if (!sev_guest(kvm))
++		return -ENOTTY;
++
++	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
++		return -EINVAL;
++
++	region = kzalloc(sizeof(*region), GFP_KERNEL);
++	if (!region)
++		return -ENOMEM;
++
++	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
++	if (!region->pages) {
++		ret = -ENOMEM;
++		goto e_free;
++	}
++
++	/*
++	 * The guest may change the memory encryption attribute from C=0 -> C=1
++	 * or vice versa for this memory range. Lets make sure caches are
++	 * flushed to ensure that guest data gets written into memory with
++	 * correct C-bit.
++	 */
++	sev_clflush_pages(region->pages, region->npages);
++
++	region->uaddr = range->addr;
++	region->size = range->size;
++
++	mutex_lock(&kvm->lock);
++	list_add_tail(&region->list, &sev->regions_list);
++	mutex_unlock(&kvm->lock);
++
++	return ret;
++
++e_free:
++	kfree(region);
++	return ret;
++}
++
++static struct enc_region *
++find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
++{
++	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++	struct list_head *head = &sev->regions_list;
++	struct enc_region *i;
++
++	list_for_each_entry(i, head, list) {
++		if (i->uaddr == range->addr &&
++		    i->size == range->size)
++			return i;
++	}
++
++	return NULL;
++}
++
++
++static int svm_unregister_enc_region(struct kvm *kvm,
++				     struct kvm_enc_region *range)
++{
++	struct enc_region *region;
++	int ret;
++
++	mutex_lock(&kvm->lock);
++
++	if (!sev_guest(kvm)) {
++		ret = -ENOTTY;
++		goto failed;
++	}
++
++	region = find_enc_region(kvm, range);
++	if (!region) {
++		ret = -EINVAL;
++		goto failed;
++	}
++
++	__unregister_enc_region_locked(kvm, region);
++
++	mutex_unlock(&kvm->lock);
++	return 0;
++
++failed:
++	mutex_unlock(&kvm->lock);
++	return ret;
++}
++
++static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
++	.cpu_has_kvm_support = has_svm,
++	.disabled_by_bios = is_disabled,
++	.hardware_setup = svm_hardware_setup,
++	.hardware_unsetup = svm_hardware_unsetup,
++	.check_processor_compatibility = svm_check_processor_compat,
++	.hardware_enable = svm_hardware_enable,
++	.hardware_disable = svm_hardware_disable,
++	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
++	.has_emulated_msr = svm_has_emulated_msr,
++
++	.vcpu_create = svm_create_vcpu,
++	.vcpu_free = svm_free_vcpu,
++	.vcpu_reset = svm_vcpu_reset,
++
++	.vm_alloc = svm_vm_alloc,
++	.vm_free = svm_vm_free,
++	.vm_init = avic_vm_init,
++	.vm_destroy = svm_vm_destroy,
++
++	.prepare_guest_switch = svm_prepare_guest_switch,
++	.vcpu_load = svm_vcpu_load,
++	.vcpu_put = svm_vcpu_put,
++	.vcpu_blocking = svm_vcpu_blocking,
++	.vcpu_unblocking = svm_vcpu_unblocking,
++
++	.update_bp_intercept = update_bp_intercept,
++	.get_msr_feature = svm_get_msr_feature,
++	.get_msr = svm_get_msr,
++	.set_msr = svm_set_msr,
++	.get_segment_base = svm_get_segment_base,
++	.get_segment = svm_get_segment,
++	.set_segment = svm_set_segment,
++	.get_cpl = svm_get_cpl,
++	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
++	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
++	.decache_cr3 = svm_decache_cr3,
++	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
++	.set_cr0 = svm_set_cr0,
++	.set_cr3 = svm_set_cr3,
++	.set_cr4 = svm_set_cr4,
++	.set_efer = svm_set_efer,
++	.get_idt = svm_get_idt,
++	.set_idt = svm_set_idt,
++	.get_gdt = svm_get_gdt,
++	.set_gdt = svm_set_gdt,
++	.get_dr6 = svm_get_dr6,
++	.set_dr6 = svm_set_dr6,
++	.set_dr7 = svm_set_dr7,
++	.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
++	.cache_reg = svm_cache_reg,
++	.get_rflags = svm_get_rflags,
++	.set_rflags = svm_set_rflags,
++
++	.tlb_flush = svm_flush_tlb,
++	.tlb_flush_gva = svm_flush_tlb_gva,
++
++	.run = svm_vcpu_run,
++	.handle_exit = handle_exit,
++	.skip_emulated_instruction = skip_emulated_instruction,
++	.set_interrupt_shadow = svm_set_interrupt_shadow,
++	.get_interrupt_shadow = svm_get_interrupt_shadow,
++	.patch_hypercall = svm_patch_hypercall,
++	.set_irq = svm_set_irq,
++	.set_nmi = svm_inject_nmi,
++	.queue_exception = svm_queue_exception,
++	.cancel_injection = svm_cancel_injection,
++	.interrupt_allowed = svm_interrupt_allowed,
++	.nmi_allowed = svm_nmi_allowed,
++	.get_nmi_mask = svm_get_nmi_mask,
++	.set_nmi_mask = svm_set_nmi_mask,
++	.enable_nmi_window = enable_nmi_window,
++	.enable_irq_window = enable_irq_window,
++	.update_cr8_intercept = update_cr8_intercept,
++	.set_virtual_apic_mode = svm_set_virtual_apic_mode,
++	.get_enable_apicv = svm_get_enable_apicv,
++	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
++	.load_eoi_exitmap = svm_load_eoi_exitmap,
++	.hwapic_irr_update = svm_hwapic_irr_update,
++	.hwapic_isr_update = svm_hwapic_isr_update,
++	.sync_pir_to_irr = kvm_lapic_find_highest_irr,
++	.apicv_post_state_restore = avic_post_state_restore,
++
++	.set_tss_addr = svm_set_tss_addr,
++	.set_identity_map_addr = svm_set_identity_map_addr,
++	.get_tdp_level = get_npt_level,
++	.get_mt_mask = svm_get_mt_mask,
++
++	.get_exit_info = svm_get_exit_info,
++
++	.get_lpage_level = svm_get_lpage_level,
++
++	.cpuid_update = svm_cpuid_update,
++
++	.rdtscp_supported = svm_rdtscp_supported,
++	.invpcid_supported = svm_invpcid_supported,
++	.mpx_supported = svm_mpx_supported,
++	.xsaves_supported = svm_xsaves_supported,
++	.umip_emulated = svm_umip_emulated,
++
++	.set_supported_cpuid = svm_set_supported_cpuid,
++
++	.has_wbinvd_exit = svm_has_wbinvd_exit,
++
++	.read_l1_tsc_offset = svm_read_l1_tsc_offset,
++	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
++
++	.set_tdp_cr3 = set_tdp_cr3,
++
++	.check_intercept = svm_check_intercept,
++	.handle_external_intr = svm_handle_external_intr,
++
++	.request_immediate_exit = __kvm_request_immediate_exit,
++
++	.sched_in = svm_sched_in,
++
++	.pmu_ops = &amd_pmu_ops,
++	.deliver_posted_interrupt = svm_deliver_avic_intr,
++	.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
++	.update_pi_irte = svm_update_pi_irte,
++	.setup_mce = svm_setup_mce,
++
++	.smi_allowed = svm_smi_allowed,
++	.pre_enter_smm = svm_pre_enter_smm,
++	.pre_leave_smm = svm_pre_leave_smm,
++	.enable_smi_window = enable_smi_window,
++
++	.mem_enc_op = svm_mem_enc_op,
++	.mem_enc_reg_region = svm_register_enc_region,
++	.mem_enc_unreg_region = svm_unregister_enc_region,
++};
++
++static int __init svm_init(void)
++{
++	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
++			__alignof__(struct vcpu_svm), THIS_MODULE);
++}
++
++static void __exit svm_exit(void)
++{
++	kvm_exit();
++}
++
++module_init(svm_init)
++module_exit(svm_exit)
+diff -uprN kernel/arch/x86/kvm/vmx.c kernel_new/arch/x86/kvm/vmx.c
+--- kernel/arch/x86/kvm/vmx.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kvm/vmx.c	2021-04-01 18:28:07.658863284 +0800
+@@ -2986,19 +2986,23 @@ static void vmx_prepare_switch_to_host(s
+ #ifdef CONFIG_X86_64
+ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
+ {
+-	preempt_disable();
++	unsigned long flags;
++
++	flags = hard_preempt_disable();
+ 	if (vmx->loaded_cpu_state)
+ 		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+-	preempt_enable();
++	hard_preempt_enable(flags);
+ 	return vmx->msr_guest_kernel_gs_base;
+ }
+ 
+ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+ {
+-	preempt_disable();
++	unsigned long flags;
++
++	flags = hard_preempt_disable();
+ 	if (vmx->loaded_cpu_state)
+ 		wrmsrl(MSR_KERNEL_GS_BASE, data);
+-	preempt_enable();
++	hard_preempt_enable(flags);
+ 	vmx->msr_guest_kernel_gs_base = data;
+ }
+ #endif
+@@ -3392,6 +3396,7 @@ static void setup_msrs(struct vcpu_vmx *
+ {
+ 	int save_nmsrs, index;
+ 
++	hard_cond_local_irq_disable();
+ 	save_nmsrs = 0;
+ #ifdef CONFIG_X86_64
+ 	if (is_long_mode(&vmx->vcpu)) {
+@@ -3422,6 +3427,7 @@ static void setup_msrs(struct vcpu_vmx *
+ 
+ 	vmx->save_nmsrs = save_nmsrs;
+ 	vmx->guest_msrs_dirty = true;
++	hard_cond_local_irq_enable();
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+ 		vmx_update_msr_bitmap(&vmx->vcpu);
+@@ -4329,9 +4335,22 @@ static int vmx_set_msr(struct kvm_vcpu *
+ 			u64 old_msr_data = msr->data;
+ 			msr->data = data;
+ 			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
++				unsigned long flags;
++
+ 				preempt_disable();
++				flags = hard_cond_local_irq_save();
++				/*
++				 * This may be called without a ipipe notifier
++				 * registered, i.e. outside of vcpu_run. In
++				 * that case, shared MSRs may be set to guest
++				 * state while I-pipe will have no chance to
++				 * restore them when interrupting afterwards.
++				 * Therefore register the notifier.
++				 */
++				__ipipe_enter_vm(&vcpu->ipipe_notifier);
+ 				ret = kvm_set_shared_msr(msr->index, msr->data,
+ 							 msr->mask);
++				hard_cond_local_irq_restore(flags);
+ 				preempt_enable();
+ 				if (ret)
+ 					msr->data = old_msr_data;
+@@ -11113,7 +11132,9 @@ static struct kvm_vcpu *vmx_create_vcpu(
+ 	vmx_vcpu_load(&vmx->vcpu, cpu);
+ 	vmx->vcpu.cpu = cpu;
+ 	vmx_vcpu_setup(vmx);
++	hard_cond_local_irq_disable();
+ 	vmx_vcpu_put(&vmx->vcpu);
++	hard_cond_local_irq_enable();
+ 	put_cpu();
+ 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
+ 		err = alloc_apic_access_page(kvm);
+diff -uprN kernel/arch/x86/kvm/vmx.c.orig kernel_new/arch/x86/kvm/vmx.c.orig
+--- kernel/arch/x86/kvm/vmx.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kvm/vmx.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,14627 @@
++/*
++ * Kernel-based Virtual Machine driver for Linux
++ *
++ * This module enables machines with Intel VT-x extensions to run virtual
++ * machines without emulation or binary translation.
++ *
++ * Copyright (C) 2006 Qumranet, Inc.
++ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
++ *
++ * Authors:
++ *   Avi Kivity   <avi@qumranet.com>
++ *   Yaniv Kamay  <yaniv@qumranet.com>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2.  See
++ * the COPYING file in the top-level directory.
++ *
++ */
++
++#include "irq.h"
++#include "mmu.h"
++#include "cpuid.h"
++#include "lapic.h"
++
++#include <linux/kvm_host.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/highmem.h>
++#include <linux/sched.h>
++#include <linux/sched/smt.h>
++#include <linux/moduleparam.h>
++#include <linux/mod_devicetable.h>
++#include <linux/trace_events.h>
++#include <linux/slab.h>
++#include <linux/tboot.h>
++#include <linux/hrtimer.h>
++#include <linux/frame.h>
++#include <linux/nospec.h>
++#include "kvm_cache_regs.h"
++#include "x86.h"
++
++#include <asm/asm.h>
++#include <asm/cpu.h>
++#include <asm/io.h>
++#include <asm/desc.h>
++#include <asm/vmx.h>
++#include <asm/virtext.h>
++#include <asm/mce.h>
++#include <asm/fpu/internal.h>
++#include <asm/perf_event.h>
++#include <asm/debugreg.h>
++#include <asm/kexec.h>
++#include <asm/apic.h>
++#include <asm/irq_remapping.h>
++#include <asm/mmu_context.h>
++#include <asm/spec-ctrl.h>
++#include <asm/mshyperv.h>
++
++#include "trace.h"
++#include "pmu.h"
++#include "vmx_evmcs.h"
++
++#define __ex(x) __kvm_handle_fault_on_reboot(x)
++#define __ex_clear(x, reg) \
++	____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
++
++MODULE_AUTHOR("Qumranet");
++MODULE_LICENSE("GPL");
++
++static const struct x86_cpu_id vmx_cpu_id[] = {
++	X86_FEATURE_MATCH(X86_FEATURE_VMX),
++	{}
++};
++MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
++
++static bool __read_mostly enable_vpid = 1;
++module_param_named(vpid, enable_vpid, bool, 0444);
++
++static bool __read_mostly enable_vnmi = 1;
++module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
++
++static bool __read_mostly flexpriority_enabled = 1;
++module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
++
++static bool __read_mostly enable_ept = 1;
++module_param_named(ept, enable_ept, bool, S_IRUGO);
++
++static bool __read_mostly enable_unrestricted_guest = 1;
++module_param_named(unrestricted_guest,
++			enable_unrestricted_guest, bool, S_IRUGO);
++
++static bool __read_mostly enable_ept_ad_bits = 1;
++module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
++
++static bool __read_mostly emulate_invalid_guest_state = true;
++module_param(emulate_invalid_guest_state, bool, S_IRUGO);
++
++static bool __read_mostly fasteoi = 1;
++module_param(fasteoi, bool, S_IRUGO);
++
++static bool __read_mostly enable_apicv = 1;
++module_param(enable_apicv, bool, S_IRUGO);
++
++static bool __read_mostly enable_shadow_vmcs = 1;
++module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
++/*
++ * If nested=1, nested virtualization is supported, i.e., guests may use
++ * VMX and be a hypervisor for its own guests. If nested=0, guests may not
++ * use VMX instructions.
++ */
++static bool __read_mostly nested = 0;
++module_param(nested, bool, S_IRUGO);
++
++static u64 __read_mostly host_xss;
++
++static bool __read_mostly enable_pml = 1;
++module_param_named(pml, enable_pml, bool, S_IRUGO);
++
++#define MSR_TYPE_R	1
++#define MSR_TYPE_W	2
++#define MSR_TYPE_RW	3
++
++#define MSR_BITMAP_MODE_X2APIC		1
++#define MSR_BITMAP_MODE_X2APIC_APICV	2
++
++#define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
++
++/* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
++static int __read_mostly cpu_preemption_timer_multi;
++static bool __read_mostly enable_preemption_timer = 1;
++#ifdef CONFIG_X86_64
++module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
++#endif
++
++#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
++#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
++#define KVM_VM_CR0_ALWAYS_ON				\
++	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | 	\
++	 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
++#define KVM_CR4_GUEST_OWNED_BITS				      \
++	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
++	 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
++
++#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
++#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
++#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
++
++#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
++
++#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
++
++/*
++ * Hyper-V requires all of these, so mark them as supported even though
++ * they are just treated the same as all-context.
++ */
++#define VMX_VPID_EXTENT_SUPPORTED_MASK		\
++	(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT |	\
++	VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |	\
++	VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT |	\
++	VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
++
++/*
++ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
++ * ple_gap:    upper bound on the amount of time between two successive
++ *             executions of PAUSE in a loop. Also indicate if ple enabled.
++ *             According to test, this time is usually smaller than 128 cycles.
++ * ple_window: upper bound on the amount of time a guest is allowed to execute
++ *             in a PAUSE loop. Tests indicate that most spinlocks are held for
++ *             less than 2^12 cycles
++ * Time is measured based on a counter that runs at the same rate as the TSC,
++ * refer SDM volume 3b section 21.6.13 & 22.1.3.
++ */
++static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
++module_param(ple_gap, uint, 0444);
++
++static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
++module_param(ple_window, uint, 0444);
++
++/* Default doubles per-vcpu window every exit. */
++static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
++module_param(ple_window_grow, uint, 0444);
++
++/* Default resets per-vcpu window every exit to ple_window. */
++static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
++module_param(ple_window_shrink, uint, 0444);
++
++/* Default is to compute the maximum so we can never overflow. */
++static unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
++module_param(ple_window_max, uint, 0444);
++
++extern const ulong vmx_return;
++
++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
++static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
++static DEFINE_MUTEX(vmx_l1d_flush_mutex);
++
++/* Storage for pre module init parameter parsing */
++static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
++
++static const struct {
++	const char *option;
++	bool for_parse;
++} vmentry_l1d_param[] = {
++	[VMENTER_L1D_FLUSH_AUTO]	 = {"auto", true},
++	[VMENTER_L1D_FLUSH_NEVER]	 = {"never", true},
++	[VMENTER_L1D_FLUSH_COND]	 = {"cond", true},
++	[VMENTER_L1D_FLUSH_ALWAYS]	 = {"always", true},
++	[VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
++	[VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
++};
++
++#define L1D_CACHE_ORDER 4
++static void *vmx_l1d_flush_pages;
++
++static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
++{
++	struct page *page;
++	unsigned int i;
++
++	if (!enable_ept) {
++		l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
++		return 0;
++	}
++
++	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
++		u64 msr;
++
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
++		if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
++			l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
++			return 0;
++		}
++	}
++
++	/* If set to auto use the default l1tf mitigation method */
++	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
++		switch (l1tf_mitigation) {
++		case L1TF_MITIGATION_OFF:
++			l1tf = VMENTER_L1D_FLUSH_NEVER;
++			break;
++		case L1TF_MITIGATION_FLUSH_NOWARN:
++		case L1TF_MITIGATION_FLUSH:
++		case L1TF_MITIGATION_FLUSH_NOSMT:
++			l1tf = VMENTER_L1D_FLUSH_COND;
++			break;
++		case L1TF_MITIGATION_FULL:
++		case L1TF_MITIGATION_FULL_FORCE:
++			l1tf = VMENTER_L1D_FLUSH_ALWAYS;
++			break;
++		}
++	} else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
++		l1tf = VMENTER_L1D_FLUSH_ALWAYS;
++	}
++
++	if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
++	    !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
++		page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
++		if (!page)
++			return -ENOMEM;
++		vmx_l1d_flush_pages = page_address(page);
++
++		/*
++		 * Initialize each page with a different pattern in
++		 * order to protect against KSM in the nested
++		 * virtualization case.
++		 */
++		for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
++			memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
++			       PAGE_SIZE);
++		}
++	}
++
++	l1tf_vmx_mitigation = l1tf;
++
++	if (l1tf != VMENTER_L1D_FLUSH_NEVER)
++		static_branch_enable(&vmx_l1d_should_flush);
++	else
++		static_branch_disable(&vmx_l1d_should_flush);
++
++	if (l1tf == VMENTER_L1D_FLUSH_COND)
++		static_branch_enable(&vmx_l1d_flush_cond);
++	else
++		static_branch_disable(&vmx_l1d_flush_cond);
++	return 0;
++}
++
++static int vmentry_l1d_flush_parse(const char *s)
++{
++	unsigned int i;
++
++	if (s) {
++		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
++			if (vmentry_l1d_param[i].for_parse &&
++			    sysfs_streq(s, vmentry_l1d_param[i].option))
++				return i;
++		}
++	}
++	return -EINVAL;
++}
++
++static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
++{
++	int l1tf, ret;
++
++	l1tf = vmentry_l1d_flush_parse(s);
++	if (l1tf < 0)
++		return l1tf;
++
++	if (!boot_cpu_has(X86_BUG_L1TF))
++		return 0;
++
++	/*
++	 * Has vmx_init() run already? If not then this is the pre init
++	 * parameter parsing. In that case just store the value and let
++	 * vmx_init() do the proper setup after enable_ept has been
++	 * established.
++	 */
++	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
++		vmentry_l1d_flush_param = l1tf;
++		return 0;
++	}
++
++	mutex_lock(&vmx_l1d_flush_mutex);
++	ret = vmx_setup_l1d_flush(l1tf);
++	mutex_unlock(&vmx_l1d_flush_mutex);
++	return ret;
++}
++
++static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
++{
++	if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
++		return sprintf(s, "???\n");
++
++	return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
++}
++
++static const struct kernel_param_ops vmentry_l1d_flush_ops = {
++	.set = vmentry_l1d_flush_set,
++	.get = vmentry_l1d_flush_get,
++};
++module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
++
++enum ept_pointers_status {
++	EPT_POINTERS_CHECK = 0,
++	EPT_POINTERS_MATCH = 1,
++	EPT_POINTERS_MISMATCH = 2
++};
++
++struct kvm_vmx {
++	struct kvm kvm;
++
++	unsigned int tss_addr;
++	bool ept_identity_pagetable_done;
++	gpa_t ept_identity_map_addr;
++
++	enum ept_pointers_status ept_pointers_match;
++	spinlock_t ept_pointer_lock;
++};
++
++#define NR_AUTOLOAD_MSRS 8
++
++struct vmcs_hdr {
++	u32 revision_id:31;
++	u32 shadow_vmcs:1;
++};
++
++struct vmcs {
++	struct vmcs_hdr hdr;
++	u32 abort;
++	char data[0];
++};
++
++/*
++ * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
++ * and whose values change infrequently, but are not constant.  I.e. this is
++ * used as a write-through cache of the corresponding VMCS fields.
++ */
++struct vmcs_host_state {
++	unsigned long cr3;	/* May not match real cr3 */
++	unsigned long cr4;	/* May not match real cr4 */
++	unsigned long gs_base;
++	unsigned long fs_base;
++
++	u16           fs_sel, gs_sel, ldt_sel;
++#ifdef CONFIG_X86_64
++	u16           ds_sel, es_sel;
++#endif
++};
++
++/*
++ * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
++ * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
++ * loaded on this CPU (so we can clear them if the CPU goes down).
++ */
++struct loaded_vmcs {
++	struct vmcs *vmcs;
++	struct vmcs *shadow_vmcs;
++	int cpu;
++	bool launched;
++	bool nmi_known_unmasked;
++	bool hv_timer_armed;
++	/* Support for vnmi-less CPUs */
++	int soft_vnmi_blocked;
++	ktime_t entry_time;
++	s64 vnmi_blocked_time;
++	unsigned long *msr_bitmap;
++	struct list_head loaded_vmcss_on_cpu_link;
++	struct vmcs_host_state host_state;
++};
++
++struct shared_msr_entry {
++	unsigned index;
++	u64 data;
++	u64 mask;
++};
++
++/*
++ * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
++ * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
++ * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
++ * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
++ * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
++ * More than one of these structures may exist, if L1 runs multiple L2 guests.
++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
++ * underlying hardware which will be used to run L2.
++ * This structure is packed to ensure that its layout is identical across
++ * machines (necessary for live migration).
++ *
++ * IMPORTANT: Changing the layout of existing fields in this structure
++ * will break save/restore compatibility with older kvm releases. When
++ * adding new fields, either use space in the reserved padding* arrays
++ * or add the new fields to the end of the structure.
++ */
++typedef u64 natural_width;
++struct __packed vmcs12 {
++	/* According to the Intel spec, a VMCS region must start with the
++	 * following two fields. Then follow implementation-specific data.
++	 */
++	struct vmcs_hdr hdr;
++	u32 abort;
++
++	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
++	u32 padding[7]; /* room for future expansion */
++
++	u64 io_bitmap_a;
++	u64 io_bitmap_b;
++	u64 msr_bitmap;
++	u64 vm_exit_msr_store_addr;
++	u64 vm_exit_msr_load_addr;
++	u64 vm_entry_msr_load_addr;
++	u64 tsc_offset;
++	u64 virtual_apic_page_addr;
++	u64 apic_access_addr;
++	u64 posted_intr_desc_addr;
++	u64 ept_pointer;
++	u64 eoi_exit_bitmap0;
++	u64 eoi_exit_bitmap1;
++	u64 eoi_exit_bitmap2;
++	u64 eoi_exit_bitmap3;
++	u64 xss_exit_bitmap;
++	u64 guest_physical_address;
++	u64 vmcs_link_pointer;
++	u64 guest_ia32_debugctl;
++	u64 guest_ia32_pat;
++	u64 guest_ia32_efer;
++	u64 guest_ia32_perf_global_ctrl;
++	u64 guest_pdptr0;
++	u64 guest_pdptr1;
++	u64 guest_pdptr2;
++	u64 guest_pdptr3;
++	u64 guest_bndcfgs;
++	u64 host_ia32_pat;
++	u64 host_ia32_efer;
++	u64 host_ia32_perf_global_ctrl;
++	u64 vmread_bitmap;
++	u64 vmwrite_bitmap;
++	u64 vm_function_control;
++	u64 eptp_list_address;
++	u64 pml_address;
++	u64 padding64[3]; /* room for future expansion */
++	/*
++	 * To allow migration of L1 (complete with its L2 guests) between
++	 * machines of different natural widths (32 or 64 bit), we cannot have
++	 * unsigned long fields with no explict size. We use u64 (aliased
++	 * natural_width) instead. Luckily, x86 is little-endian.
++	 */
++	natural_width cr0_guest_host_mask;
++	natural_width cr4_guest_host_mask;
++	natural_width cr0_read_shadow;
++	natural_width cr4_read_shadow;
++	natural_width cr3_target_value0;
++	natural_width cr3_target_value1;
++	natural_width cr3_target_value2;
++	natural_width cr3_target_value3;
++	natural_width exit_qualification;
++	natural_width guest_linear_address;
++	natural_width guest_cr0;
++	natural_width guest_cr3;
++	natural_width guest_cr4;
++	natural_width guest_es_base;
++	natural_width guest_cs_base;
++	natural_width guest_ss_base;
++	natural_width guest_ds_base;
++	natural_width guest_fs_base;
++	natural_width guest_gs_base;
++	natural_width guest_ldtr_base;
++	natural_width guest_tr_base;
++	natural_width guest_gdtr_base;
++	natural_width guest_idtr_base;
++	natural_width guest_dr7;
++	natural_width guest_rsp;
++	natural_width guest_rip;
++	natural_width guest_rflags;
++	natural_width guest_pending_dbg_exceptions;
++	natural_width guest_sysenter_esp;
++	natural_width guest_sysenter_eip;
++	natural_width host_cr0;
++	natural_width host_cr3;
++	natural_width host_cr4;
++	natural_width host_fs_base;
++	natural_width host_gs_base;
++	natural_width host_tr_base;
++	natural_width host_gdtr_base;
++	natural_width host_idtr_base;
++	natural_width host_ia32_sysenter_esp;
++	natural_width host_ia32_sysenter_eip;
++	natural_width host_rsp;
++	natural_width host_rip;
++	natural_width paddingl[8]; /* room for future expansion */
++	u32 pin_based_vm_exec_control;
++	u32 cpu_based_vm_exec_control;
++	u32 exception_bitmap;
++	u32 page_fault_error_code_mask;
++	u32 page_fault_error_code_match;
++	u32 cr3_target_count;
++	u32 vm_exit_controls;
++	u32 vm_exit_msr_store_count;
++	u32 vm_exit_msr_load_count;
++	u32 vm_entry_controls;
++	u32 vm_entry_msr_load_count;
++	u32 vm_entry_intr_info_field;
++	u32 vm_entry_exception_error_code;
++	u32 vm_entry_instruction_len;
++	u32 tpr_threshold;
++	u32 secondary_vm_exec_control;
++	u32 vm_instruction_error;
++	u32 vm_exit_reason;
++	u32 vm_exit_intr_info;
++	u32 vm_exit_intr_error_code;
++	u32 idt_vectoring_info_field;
++	u32 idt_vectoring_error_code;
++	u32 vm_exit_instruction_len;
++	u32 vmx_instruction_info;
++	u32 guest_es_limit;
++	u32 guest_cs_limit;
++	u32 guest_ss_limit;
++	u32 guest_ds_limit;
++	u32 guest_fs_limit;
++	u32 guest_gs_limit;
++	u32 guest_ldtr_limit;
++	u32 guest_tr_limit;
++	u32 guest_gdtr_limit;
++	u32 guest_idtr_limit;
++	u32 guest_es_ar_bytes;
++	u32 guest_cs_ar_bytes;
++	u32 guest_ss_ar_bytes;
++	u32 guest_ds_ar_bytes;
++	u32 guest_fs_ar_bytes;
++	u32 guest_gs_ar_bytes;
++	u32 guest_ldtr_ar_bytes;
++	u32 guest_tr_ar_bytes;
++	u32 guest_interruptibility_info;
++	u32 guest_activity_state;
++	u32 guest_sysenter_cs;
++	u32 host_ia32_sysenter_cs;
++	u32 vmx_preemption_timer_value;
++	u32 padding32[7]; /* room for future expansion */
++	u16 virtual_processor_id;
++	u16 posted_intr_nv;
++	u16 guest_es_selector;
++	u16 guest_cs_selector;
++	u16 guest_ss_selector;
++	u16 guest_ds_selector;
++	u16 guest_fs_selector;
++	u16 guest_gs_selector;
++	u16 guest_ldtr_selector;
++	u16 guest_tr_selector;
++	u16 guest_intr_status;
++	u16 host_es_selector;
++	u16 host_cs_selector;
++	u16 host_ss_selector;
++	u16 host_ds_selector;
++	u16 host_fs_selector;
++	u16 host_gs_selector;
++	u16 host_tr_selector;
++	u16 guest_pml_index;
++};
++
++/*
++ * For save/restore compatibility, the vmcs12 field offsets must not change.
++ */
++#define CHECK_OFFSET(field, loc)				\
++	BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),	\
++		"Offset of " #field " in struct vmcs12 has changed.")
++
++static inline void vmx_check_vmcs12_offsets(void) {
++	CHECK_OFFSET(hdr, 0);
++	CHECK_OFFSET(abort, 4);
++	CHECK_OFFSET(launch_state, 8);
++	CHECK_OFFSET(io_bitmap_a, 40);
++	CHECK_OFFSET(io_bitmap_b, 48);
++	CHECK_OFFSET(msr_bitmap, 56);
++	CHECK_OFFSET(vm_exit_msr_store_addr, 64);
++	CHECK_OFFSET(vm_exit_msr_load_addr, 72);
++	CHECK_OFFSET(vm_entry_msr_load_addr, 80);
++	CHECK_OFFSET(tsc_offset, 88);
++	CHECK_OFFSET(virtual_apic_page_addr, 96);
++	CHECK_OFFSET(apic_access_addr, 104);
++	CHECK_OFFSET(posted_intr_desc_addr, 112);
++	CHECK_OFFSET(ept_pointer, 120);
++	CHECK_OFFSET(eoi_exit_bitmap0, 128);
++	CHECK_OFFSET(eoi_exit_bitmap1, 136);
++	CHECK_OFFSET(eoi_exit_bitmap2, 144);
++	CHECK_OFFSET(eoi_exit_bitmap3, 152);
++	CHECK_OFFSET(xss_exit_bitmap, 160);
++	CHECK_OFFSET(guest_physical_address, 168);
++	CHECK_OFFSET(vmcs_link_pointer, 176);
++	CHECK_OFFSET(guest_ia32_debugctl, 184);
++	CHECK_OFFSET(guest_ia32_pat, 192);
++	CHECK_OFFSET(guest_ia32_efer, 200);
++	CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
++	CHECK_OFFSET(guest_pdptr0, 216);
++	CHECK_OFFSET(guest_pdptr1, 224);
++	CHECK_OFFSET(guest_pdptr2, 232);
++	CHECK_OFFSET(guest_pdptr3, 240);
++	CHECK_OFFSET(guest_bndcfgs, 248);
++	CHECK_OFFSET(host_ia32_pat, 256);
++	CHECK_OFFSET(host_ia32_efer, 264);
++	CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
++	CHECK_OFFSET(vmread_bitmap, 280);
++	CHECK_OFFSET(vmwrite_bitmap, 288);
++	CHECK_OFFSET(vm_function_control, 296);
++	CHECK_OFFSET(eptp_list_address, 304);
++	CHECK_OFFSET(pml_address, 312);
++	CHECK_OFFSET(cr0_guest_host_mask, 344);
++	CHECK_OFFSET(cr4_guest_host_mask, 352);
++	CHECK_OFFSET(cr0_read_shadow, 360);
++	CHECK_OFFSET(cr4_read_shadow, 368);
++	CHECK_OFFSET(cr3_target_value0, 376);
++	CHECK_OFFSET(cr3_target_value1, 384);
++	CHECK_OFFSET(cr3_target_value2, 392);
++	CHECK_OFFSET(cr3_target_value3, 400);
++	CHECK_OFFSET(exit_qualification, 408);
++	CHECK_OFFSET(guest_linear_address, 416);
++	CHECK_OFFSET(guest_cr0, 424);
++	CHECK_OFFSET(guest_cr3, 432);
++	CHECK_OFFSET(guest_cr4, 440);
++	CHECK_OFFSET(guest_es_base, 448);
++	CHECK_OFFSET(guest_cs_base, 456);
++	CHECK_OFFSET(guest_ss_base, 464);
++	CHECK_OFFSET(guest_ds_base, 472);
++	CHECK_OFFSET(guest_fs_base, 480);
++	CHECK_OFFSET(guest_gs_base, 488);
++	CHECK_OFFSET(guest_ldtr_base, 496);
++	CHECK_OFFSET(guest_tr_base, 504);
++	CHECK_OFFSET(guest_gdtr_base, 512);
++	CHECK_OFFSET(guest_idtr_base, 520);
++	CHECK_OFFSET(guest_dr7, 528);
++	CHECK_OFFSET(guest_rsp, 536);
++	CHECK_OFFSET(guest_rip, 544);
++	CHECK_OFFSET(guest_rflags, 552);
++	CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
++	CHECK_OFFSET(guest_sysenter_esp, 568);
++	CHECK_OFFSET(guest_sysenter_eip, 576);
++	CHECK_OFFSET(host_cr0, 584);
++	CHECK_OFFSET(host_cr3, 592);
++	CHECK_OFFSET(host_cr4, 600);
++	CHECK_OFFSET(host_fs_base, 608);
++	CHECK_OFFSET(host_gs_base, 616);
++	CHECK_OFFSET(host_tr_base, 624);
++	CHECK_OFFSET(host_gdtr_base, 632);
++	CHECK_OFFSET(host_idtr_base, 640);
++	CHECK_OFFSET(host_ia32_sysenter_esp, 648);
++	CHECK_OFFSET(host_ia32_sysenter_eip, 656);
++	CHECK_OFFSET(host_rsp, 664);
++	CHECK_OFFSET(host_rip, 672);
++	CHECK_OFFSET(pin_based_vm_exec_control, 744);
++	CHECK_OFFSET(cpu_based_vm_exec_control, 748);
++	CHECK_OFFSET(exception_bitmap, 752);
++	CHECK_OFFSET(page_fault_error_code_mask, 756);
++	CHECK_OFFSET(page_fault_error_code_match, 760);
++	CHECK_OFFSET(cr3_target_count, 764);
++	CHECK_OFFSET(vm_exit_controls, 768);
++	CHECK_OFFSET(vm_exit_msr_store_count, 772);
++	CHECK_OFFSET(vm_exit_msr_load_count, 776);
++	CHECK_OFFSET(vm_entry_controls, 780);
++	CHECK_OFFSET(vm_entry_msr_load_count, 784);
++	CHECK_OFFSET(vm_entry_intr_info_field, 788);
++	CHECK_OFFSET(vm_entry_exception_error_code, 792);
++	CHECK_OFFSET(vm_entry_instruction_len, 796);
++	CHECK_OFFSET(tpr_threshold, 800);
++	CHECK_OFFSET(secondary_vm_exec_control, 804);
++	CHECK_OFFSET(vm_instruction_error, 808);
++	CHECK_OFFSET(vm_exit_reason, 812);
++	CHECK_OFFSET(vm_exit_intr_info, 816);
++	CHECK_OFFSET(vm_exit_intr_error_code, 820);
++	CHECK_OFFSET(idt_vectoring_info_field, 824);
++	CHECK_OFFSET(idt_vectoring_error_code, 828);
++	CHECK_OFFSET(vm_exit_instruction_len, 832);
++	CHECK_OFFSET(vmx_instruction_info, 836);
++	CHECK_OFFSET(guest_es_limit, 840);
++	CHECK_OFFSET(guest_cs_limit, 844);
++	CHECK_OFFSET(guest_ss_limit, 848);
++	CHECK_OFFSET(guest_ds_limit, 852);
++	CHECK_OFFSET(guest_fs_limit, 856);
++	CHECK_OFFSET(guest_gs_limit, 860);
++	CHECK_OFFSET(guest_ldtr_limit, 864);
++	CHECK_OFFSET(guest_tr_limit, 868);
++	CHECK_OFFSET(guest_gdtr_limit, 872);
++	CHECK_OFFSET(guest_idtr_limit, 876);
++	CHECK_OFFSET(guest_es_ar_bytes, 880);
++	CHECK_OFFSET(guest_cs_ar_bytes, 884);
++	CHECK_OFFSET(guest_ss_ar_bytes, 888);
++	CHECK_OFFSET(guest_ds_ar_bytes, 892);
++	CHECK_OFFSET(guest_fs_ar_bytes, 896);
++	CHECK_OFFSET(guest_gs_ar_bytes, 900);
++	CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
++	CHECK_OFFSET(guest_tr_ar_bytes, 908);
++	CHECK_OFFSET(guest_interruptibility_info, 912);
++	CHECK_OFFSET(guest_activity_state, 916);
++	CHECK_OFFSET(guest_sysenter_cs, 920);
++	CHECK_OFFSET(host_ia32_sysenter_cs, 924);
++	CHECK_OFFSET(vmx_preemption_timer_value, 928);
++	CHECK_OFFSET(virtual_processor_id, 960);
++	CHECK_OFFSET(posted_intr_nv, 962);
++	CHECK_OFFSET(guest_es_selector, 964);
++	CHECK_OFFSET(guest_cs_selector, 966);
++	CHECK_OFFSET(guest_ss_selector, 968);
++	CHECK_OFFSET(guest_ds_selector, 970);
++	CHECK_OFFSET(guest_fs_selector, 972);
++	CHECK_OFFSET(guest_gs_selector, 974);
++	CHECK_OFFSET(guest_ldtr_selector, 976);
++	CHECK_OFFSET(guest_tr_selector, 978);
++	CHECK_OFFSET(guest_intr_status, 980);
++	CHECK_OFFSET(host_es_selector, 982);
++	CHECK_OFFSET(host_cs_selector, 984);
++	CHECK_OFFSET(host_ss_selector, 986);
++	CHECK_OFFSET(host_ds_selector, 988);
++	CHECK_OFFSET(host_fs_selector, 990);
++	CHECK_OFFSET(host_gs_selector, 992);
++	CHECK_OFFSET(host_tr_selector, 994);
++	CHECK_OFFSET(guest_pml_index, 996);
++}
++
++/*
++ * VMCS12_REVISION is an arbitrary id that should be changed if the content or
++ * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
++ * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
++ *
++ * IMPORTANT: Changing this value will break save/restore compatibility with
++ * older kvm releases.
++ */
++#define VMCS12_REVISION 0x11e57ed0
++
++/*
++ * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
++ * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
++ * current implementation, 4K are reserved to avoid future complications.
++ */
++#define VMCS12_SIZE 0x1000
++
++/*
++ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
++ * supported VMCS12 field encoding.
++ */
++#define VMCS12_MAX_FIELD_INDEX 0x17
++
++struct nested_vmx_msrs {
++	/*
++	 * We only store the "true" versions of the VMX capability MSRs. We
++	 * generate the "non-true" versions by setting the must-be-1 bits
++	 * according to the SDM.
++	 */
++	u32 procbased_ctls_low;
++	u32 procbased_ctls_high;
++	u32 secondary_ctls_low;
++	u32 secondary_ctls_high;
++	u32 pinbased_ctls_low;
++	u32 pinbased_ctls_high;
++	u32 exit_ctls_low;
++	u32 exit_ctls_high;
++	u32 entry_ctls_low;
++	u32 entry_ctls_high;
++	u32 misc_low;
++	u32 misc_high;
++	u32 ept_caps;
++	u32 vpid_caps;
++	u64 basic;
++	u64 cr0_fixed0;
++	u64 cr0_fixed1;
++	u64 cr4_fixed0;
++	u64 cr4_fixed1;
++	u64 vmcs_enum;
++	u64 vmfunc_controls;
++};
++
++/*
++ * The nested_vmx structure is part of vcpu_vmx, and holds information we need
++ * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
++ */
++struct nested_vmx {
++	/* Has the level1 guest done vmxon? */
++	bool vmxon;
++	gpa_t vmxon_ptr;
++	bool pml_full;
++
++	/* The guest-physical address of the current VMCS L1 keeps for L2 */
++	gpa_t current_vmptr;
++	/*
++	 * Cache of the guest's VMCS, existing outside of guest memory.
++	 * Loaded from guest memory during VMPTRLD. Flushed to guest
++	 * memory during VMCLEAR and VMPTRLD.
++	 */
++	struct vmcs12 *cached_vmcs12;
++	/*
++	 * Cache of the guest's shadow VMCS, existing outside of guest
++	 * memory. Loaded from guest memory during VM entry. Flushed
++	 * to guest memory during VM exit.
++	 */
++	struct vmcs12 *cached_shadow_vmcs12;
++	/*
++	 * Indicates if the shadow vmcs must be updated with the
++	 * data hold by vmcs12
++	 */
++	bool sync_shadow_vmcs;
++	bool dirty_vmcs12;
++
++	bool change_vmcs01_virtual_apic_mode;
++
++	/* L2 must run next, and mustn't decide to exit to L1. */
++	bool nested_run_pending;
++
++	struct loaded_vmcs vmcs02;
++
++	/*
++	 * Guest pages referred to in the vmcs02 with host-physical
++	 * pointers, so we must keep them pinned while L2 runs.
++	 */
++	struct page *apic_access_page;
++	struct page *virtual_apic_page;
++	struct page *pi_desc_page;
++	struct pi_desc *pi_desc;
++	bool pi_pending;
++	u16 posted_intr_nv;
++
++	struct hrtimer preemption_timer;
++	bool preemption_timer_expired;
++
++	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
++	u64 vmcs01_debugctl;
++	u64 vmcs01_guest_bndcfgs;
++
++	u16 vpid02;
++	u16 last_vpid;
++
++	struct nested_vmx_msrs msrs;
++
++	/* SMM related state */
++	struct {
++		/* in VMX operation on SMM entry? */
++		bool vmxon;
++		/* in guest mode on SMM entry? */
++		bool guest_mode;
++	} smm;
++};
++
++#define POSTED_INTR_ON  0
++#define POSTED_INTR_SN  1
++
++/* Posted-Interrupt Descriptor */
++struct pi_desc {
++	u32 pir[8];     /* Posted interrupt requested */
++	union {
++		struct {
++				/* bit 256 - Outstanding Notification */
++			u16	on	: 1,
++				/* bit 257 - Suppress Notification */
++				sn	: 1,
++				/* bit 271:258 - Reserved */
++				rsvd_1	: 14;
++				/* bit 279:272 - Notification Vector */
++			u8	nv;
++				/* bit 287:280 - Reserved */
++			u8	rsvd_2;
++				/* bit 319:288 - Notification Destination */
++			u32	ndst;
++		};
++		u64 control;
++	};
++	u32 rsvd[6];
++} __aligned(64);
++
++static bool pi_test_and_set_on(struct pi_desc *pi_desc)
++{
++	return test_and_set_bit(POSTED_INTR_ON,
++			(unsigned long *)&pi_desc->control);
++}
++
++static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
++{
++	return test_and_clear_bit(POSTED_INTR_ON,
++			(unsigned long *)&pi_desc->control);
++}
++
++static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
++{
++	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
++}
++
++static inline void pi_clear_sn(struct pi_desc *pi_desc)
++{
++	return clear_bit(POSTED_INTR_SN,
++			(unsigned long *)&pi_desc->control);
++}
++
++static inline void pi_set_sn(struct pi_desc *pi_desc)
++{
++	return set_bit(POSTED_INTR_SN,
++			(unsigned long *)&pi_desc->control);
++}
++
++static inline void pi_clear_on(struct pi_desc *pi_desc)
++{
++	clear_bit(POSTED_INTR_ON,
++  		  (unsigned long *)&pi_desc->control);
++}
++
++static inline int pi_test_on(struct pi_desc *pi_desc)
++{
++	return test_bit(POSTED_INTR_ON,
++			(unsigned long *)&pi_desc->control);
++}
++
++static inline int pi_test_sn(struct pi_desc *pi_desc)
++{
++	return test_bit(POSTED_INTR_SN,
++			(unsigned long *)&pi_desc->control);
++}
++
++struct vmx_msrs {
++	unsigned int		nr;
++	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
++};
++
++struct vcpu_vmx {
++	struct kvm_vcpu       vcpu;
++	unsigned long         host_rsp;
++	u8                    fail;
++	u8		      msr_bitmap_mode;
++	u32                   exit_intr_info;
++	u32                   idt_vectoring_info;
++	ulong                 rflags;
++	struct shared_msr_entry *guest_msrs;
++	int                   nmsrs;
++	int                   save_nmsrs;
++	bool                  guest_msrs_dirty;
++	unsigned long	      host_idt_base;
++#ifdef CONFIG_X86_64
++	u64 		      msr_host_kernel_gs_base;
++	u64 		      msr_guest_kernel_gs_base;
++#endif
++
++	u64 		      spec_ctrl;
++
++	u32 vm_entry_controls_shadow;
++	u32 vm_exit_controls_shadow;
++	u32 secondary_exec_control;
++
++	/*
++	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
++	 * non-nested (L1) guest, it always points to vmcs01. For a nested
++	 * guest (L2), it points to a different VMCS.  loaded_cpu_state points
++	 * to the VMCS whose state is loaded into the CPU registers that only
++	 * need to be switched when transitioning to/from the kernel; a NULL
++	 * value indicates that host state is loaded.
++	 */
++	struct loaded_vmcs    vmcs01;
++	struct loaded_vmcs   *loaded_vmcs;
++	struct loaded_vmcs   *loaded_cpu_state;
++	bool                  __launched; /* temporary, used in vmx_vcpu_run */
++	struct msr_autoload {
++		struct vmx_msrs guest;
++		struct vmx_msrs host;
++	} msr_autoload;
++
++	struct {
++		int vm86_active;
++		ulong save_rflags;
++		struct kvm_segment segs[8];
++	} rmode;
++	struct {
++		u32 bitmask; /* 4 bits per segment (1 bit per field) */
++		struct kvm_save_segment {
++			u16 selector;
++			unsigned long base;
++			u32 limit;
++			u32 ar;
++		} seg[8];
++	} segment_cache;
++	int vpid;
++	bool emulation_required;
++
++	u32 exit_reason;
++
++	/* Posted interrupt descriptor */
++	struct pi_desc pi_desc;
++
++	/* Support for a guest hypervisor (nested VMX) */
++	struct nested_vmx nested;
++
++	/* Dynamic PLE window. */
++	int ple_window;
++	bool ple_window_dirty;
++
++	bool req_immediate_exit;
++
++	/* Support for PML */
++#define PML_ENTITY_NUM		512
++	struct page *pml_pg;
++
++	/* apic deadline value in host tsc */
++	u64 hv_deadline_tsc;
++
++	u64 current_tsc_ratio;
++
++	u32 host_pkru;
++
++	unsigned long host_debugctlmsr;
++
++	/*
++	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
++	 * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
++	 * in msr_ia32_feature_control_valid_bits.
++	 */
++	u64 msr_ia32_feature_control;
++	u64 msr_ia32_feature_control_valid_bits;
++	u64 ept_pointer;
++};
++
++enum segment_cache_field {
++	SEG_FIELD_SEL = 0,
++	SEG_FIELD_BASE = 1,
++	SEG_FIELD_LIMIT = 2,
++	SEG_FIELD_AR = 3,
++
++	SEG_FIELD_NR = 4
++};
++
++static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
++{
++	return container_of(kvm, struct kvm_vmx, kvm);
++}
++
++static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
++{
++	return container_of(vcpu, struct vcpu_vmx, vcpu);
++}
++
++static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
++{
++	return &(to_vmx(vcpu)->pi_desc);
++}
++
++#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
++#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
++#define FIELD(number, name)	[ROL16(number, 6)] = VMCS12_OFFSET(name)
++#define FIELD64(number, name)						\
++	FIELD(number, name),						\
++	[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
++
++
++static u16 shadow_read_only_fields[] = {
++#define SHADOW_FIELD_RO(x) x,
++#include "vmx_shadow_fields.h"
++};
++static int max_shadow_read_only_fields =
++	ARRAY_SIZE(shadow_read_only_fields);
++
++static u16 shadow_read_write_fields[] = {
++#define SHADOW_FIELD_RW(x) x,
++#include "vmx_shadow_fields.h"
++};
++static int max_shadow_read_write_fields =
++	ARRAY_SIZE(shadow_read_write_fields);
++
++static const unsigned short vmcs_field_to_offset_table[] = {
++	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
++	FIELD(POSTED_INTR_NV, posted_intr_nv),
++	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
++	FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
++	FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
++	FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
++	FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
++	FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
++	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
++	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
++	FIELD(GUEST_INTR_STATUS, guest_intr_status),
++	FIELD(GUEST_PML_INDEX, guest_pml_index),
++	FIELD(HOST_ES_SELECTOR, host_es_selector),
++	FIELD(HOST_CS_SELECTOR, host_cs_selector),
++	FIELD(HOST_SS_SELECTOR, host_ss_selector),
++	FIELD(HOST_DS_SELECTOR, host_ds_selector),
++	FIELD(HOST_FS_SELECTOR, host_fs_selector),
++	FIELD(HOST_GS_SELECTOR, host_gs_selector),
++	FIELD(HOST_TR_SELECTOR, host_tr_selector),
++	FIELD64(IO_BITMAP_A, io_bitmap_a),
++	FIELD64(IO_BITMAP_B, io_bitmap_b),
++	FIELD64(MSR_BITMAP, msr_bitmap),
++	FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
++	FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
++	FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
++	FIELD64(PML_ADDRESS, pml_address),
++	FIELD64(TSC_OFFSET, tsc_offset),
++	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
++	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
++	FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
++	FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
++	FIELD64(EPT_POINTER, ept_pointer),
++	FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
++	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
++	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
++	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
++	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
++	FIELD64(VMREAD_BITMAP, vmread_bitmap),
++	FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
++	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
++	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
++	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
++	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
++	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
++	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
++	FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
++	FIELD64(GUEST_PDPTR0, guest_pdptr0),
++	FIELD64(GUEST_PDPTR1, guest_pdptr1),
++	FIELD64(GUEST_PDPTR2, guest_pdptr2),
++	FIELD64(GUEST_PDPTR3, guest_pdptr3),
++	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
++	FIELD64(HOST_IA32_PAT, host_ia32_pat),
++	FIELD64(HOST_IA32_EFER, host_ia32_efer),
++	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
++	FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
++	FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
++	FIELD(EXCEPTION_BITMAP, exception_bitmap),
++	FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
++	FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
++	FIELD(CR3_TARGET_COUNT, cr3_target_count),
++	FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
++	FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
++	FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
++	FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
++	FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
++	FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
++	FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
++	FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
++	FIELD(TPR_THRESHOLD, tpr_threshold),
++	FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
++	FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
++	FIELD(VM_EXIT_REASON, vm_exit_reason),
++	FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
++	FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
++	FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
++	FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
++	FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
++	FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
++	FIELD(GUEST_ES_LIMIT, guest_es_limit),
++	FIELD(GUEST_CS_LIMIT, guest_cs_limit),
++	FIELD(GUEST_SS_LIMIT, guest_ss_limit),
++	FIELD(GUEST_DS_LIMIT, guest_ds_limit),
++	FIELD(GUEST_FS_LIMIT, guest_fs_limit),
++	FIELD(GUEST_GS_LIMIT, guest_gs_limit),
++	FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
++	FIELD(GUEST_TR_LIMIT, guest_tr_limit),
++	FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
++	FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
++	FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
++	FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
++	FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
++	FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
++	FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
++	FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
++	FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
++	FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
++	FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
++	FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
++	FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
++	FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
++	FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
++	FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
++	FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
++	FIELD(CR0_READ_SHADOW, cr0_read_shadow),
++	FIELD(CR4_READ_SHADOW, cr4_read_shadow),
++	FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
++	FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
++	FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
++	FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
++	FIELD(EXIT_QUALIFICATION, exit_qualification),
++	FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
++	FIELD(GUEST_CR0, guest_cr0),
++	FIELD(GUEST_CR3, guest_cr3),
++	FIELD(GUEST_CR4, guest_cr4),
++	FIELD(GUEST_ES_BASE, guest_es_base),
++	FIELD(GUEST_CS_BASE, guest_cs_base),
++	FIELD(GUEST_SS_BASE, guest_ss_base),
++	FIELD(GUEST_DS_BASE, guest_ds_base),
++	FIELD(GUEST_FS_BASE, guest_fs_base),
++	FIELD(GUEST_GS_BASE, guest_gs_base),
++	FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
++	FIELD(GUEST_TR_BASE, guest_tr_base),
++	FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
++	FIELD(GUEST_IDTR_BASE, guest_idtr_base),
++	FIELD(GUEST_DR7, guest_dr7),
++	FIELD(GUEST_RSP, guest_rsp),
++	FIELD(GUEST_RIP, guest_rip),
++	FIELD(GUEST_RFLAGS, guest_rflags),
++	FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
++	FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
++	FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
++	FIELD(HOST_CR0, host_cr0),
++	FIELD(HOST_CR3, host_cr3),
++	FIELD(HOST_CR4, host_cr4),
++	FIELD(HOST_FS_BASE, host_fs_base),
++	FIELD(HOST_GS_BASE, host_gs_base),
++	FIELD(HOST_TR_BASE, host_tr_base),
++	FIELD(HOST_GDTR_BASE, host_gdtr_base),
++	FIELD(HOST_IDTR_BASE, host_idtr_base),
++	FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
++	FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
++	FIELD(HOST_RSP, host_rsp),
++	FIELD(HOST_RIP, host_rip),
++};
++
++static inline short vmcs_field_to_offset(unsigned long field)
++{
++	const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
++	unsigned short offset;
++	unsigned index;
++
++	if (field >> 15)
++		return -ENOENT;
++
++	index = ROL16(field, 6);
++	if (index >= size)
++		return -ENOENT;
++
++	index = array_index_nospec(index, size);
++	offset = vmcs_field_to_offset_table[index];
++	if (offset == 0)
++		return -ENOENT;
++	return offset;
++}
++
++static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.cached_vmcs12;
++}
++
++static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
++}
++
++static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
++static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
++static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
++static bool vmx_xsaves_supported(void);
++static void vmx_set_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg);
++static void vmx_get_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg);
++static bool guest_state_valid(struct kvm_vcpu *vcpu);
++static u32 vmx_segment_access_rights(struct kvm_segment *var);
++static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
++static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
++static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
++static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
++					    u16 error_code);
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
++static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type);
++
++static DEFINE_PER_CPU(struct vmcs *, vmxarea);
++static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
++/*
++ * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
++ * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
++ */
++static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
++
++/*
++ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
++ * can find which vCPU should be waken up.
++ */
++static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
++static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
++
++enum {
++	VMX_VMREAD_BITMAP,
++	VMX_VMWRITE_BITMAP,
++	VMX_BITMAP_NR
++};
++
++static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
++
++#define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
++#define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
++
++static bool cpu_has_load_ia32_efer;
++static bool cpu_has_load_perf_global_ctrl;
++
++static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
++static DEFINE_SPINLOCK(vmx_vpid_lock);
++
++static struct vmcs_config {
++	int size;
++	int order;
++	u32 basic_cap;
++	u32 revision_id;
++	u32 pin_based_exec_ctrl;
++	u32 cpu_based_exec_ctrl;
++	u32 cpu_based_2nd_exec_ctrl;
++	u32 vmexit_ctrl;
++	u32 vmentry_ctrl;
++	struct nested_vmx_msrs nested;
++} vmcs_config;
++
++static struct vmx_capability {
++	u32 ept;
++	u32 vpid;
++} vmx_capability;
++
++#define VMX_SEGMENT_FIELD(seg)					\
++	[VCPU_SREG_##seg] = {                                   \
++		.selector = GUEST_##seg##_SELECTOR,		\
++		.base = GUEST_##seg##_BASE,		   	\
++		.limit = GUEST_##seg##_LIMIT,		   	\
++		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
++	}
++
++static const struct kvm_vmx_segment_field {
++	unsigned selector;
++	unsigned base;
++	unsigned limit;
++	unsigned ar_bytes;
++} kvm_vmx_segment_fields[] = {
++	VMX_SEGMENT_FIELD(CS),
++	VMX_SEGMENT_FIELD(DS),
++	VMX_SEGMENT_FIELD(ES),
++	VMX_SEGMENT_FIELD(FS),
++	VMX_SEGMENT_FIELD(GS),
++	VMX_SEGMENT_FIELD(SS),
++	VMX_SEGMENT_FIELD(TR),
++	VMX_SEGMENT_FIELD(LDTR),
++};
++
++static u64 host_efer;
++
++static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
++
++/*
++ * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
++ * away by decrementing the array size.
++ */
++static const u32 vmx_msr_index[] = {
++#ifdef CONFIG_X86_64
++	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
++#endif
++	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
++};
++
++DEFINE_STATIC_KEY_FALSE(enable_evmcs);
++
++#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
++
++#define KVM_EVMCS_VERSION 1
++
++#if IS_ENABLED(CONFIG_HYPERV)
++static bool __read_mostly enlightened_vmcs = true;
++module_param(enlightened_vmcs, bool, 0444);
++
++static inline void evmcs_write64(unsigned long field, u64 value)
++{
++	u16 clean_field;
++	int offset = get_evmcs_offset(field, &clean_field);
++
++	if (offset < 0)
++		return;
++
++	*(u64 *)((char *)current_evmcs + offset) = value;
++
++	current_evmcs->hv_clean_fields &= ~clean_field;
++}
++
++static inline void evmcs_write32(unsigned long field, u32 value)
++{
++	u16 clean_field;
++	int offset = get_evmcs_offset(field, &clean_field);
++
++	if (offset < 0)
++		return;
++
++	*(u32 *)((char *)current_evmcs + offset) = value;
++	current_evmcs->hv_clean_fields &= ~clean_field;
++}
++
++static inline void evmcs_write16(unsigned long field, u16 value)
++{
++	u16 clean_field;
++	int offset = get_evmcs_offset(field, &clean_field);
++
++	if (offset < 0)
++		return;
++
++	*(u16 *)((char *)current_evmcs + offset) = value;
++	current_evmcs->hv_clean_fields &= ~clean_field;
++}
++
++static inline u64 evmcs_read64(unsigned long field)
++{
++	int offset = get_evmcs_offset(field, NULL);
++
++	if (offset < 0)
++		return 0;
++
++	return *(u64 *)((char *)current_evmcs + offset);
++}
++
++static inline u32 evmcs_read32(unsigned long field)
++{
++	int offset = get_evmcs_offset(field, NULL);
++
++	if (offset < 0)
++		return 0;
++
++	return *(u32 *)((char *)current_evmcs + offset);
++}
++
++static inline u16 evmcs_read16(unsigned long field)
++{
++	int offset = get_evmcs_offset(field, NULL);
++
++	if (offset < 0)
++		return 0;
++
++	return *(u16 *)((char *)current_evmcs + offset);
++}
++
++static inline void evmcs_touch_msr_bitmap(void)
++{
++	if (unlikely(!current_evmcs))
++		return;
++
++	if (current_evmcs->hv_enlightenments_control.msr_bitmap)
++		current_evmcs->hv_clean_fields &=
++			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
++}
++
++static void evmcs_load(u64 phys_addr)
++{
++	struct hv_vp_assist_page *vp_ap =
++		hv_get_vp_assist_page(smp_processor_id());
++
++	vp_ap->current_nested_vmcs = phys_addr;
++	vp_ap->enlighten_vmentry = 1;
++}
++
++static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
++{
++	/*
++	 * Enlightened VMCSv1 doesn't support these:
++	 *
++	 *	POSTED_INTR_NV                  = 0x00000002,
++	 *	GUEST_INTR_STATUS               = 0x00000810,
++	 *	APIC_ACCESS_ADDR		= 0x00002014,
++	 *	POSTED_INTR_DESC_ADDR           = 0x00002016,
++	 *	EOI_EXIT_BITMAP0                = 0x0000201c,
++	 *	EOI_EXIT_BITMAP1                = 0x0000201e,
++	 *	EOI_EXIT_BITMAP2                = 0x00002020,
++	 *	EOI_EXIT_BITMAP3                = 0x00002022,
++	 */
++	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
++	vmcs_conf->cpu_based_2nd_exec_ctrl &=
++		~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
++	vmcs_conf->cpu_based_2nd_exec_ctrl &=
++		~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++	vmcs_conf->cpu_based_2nd_exec_ctrl &=
++		~SECONDARY_EXEC_APIC_REGISTER_VIRT;
++
++	/*
++	 *	GUEST_PML_INDEX			= 0x00000812,
++	 *	PML_ADDRESS			= 0x0000200e,
++	 */
++	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
++
++	/*	VM_FUNCTION_CONTROL             = 0x00002018, */
++	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
++
++	/*
++	 *	EPTP_LIST_ADDRESS               = 0x00002024,
++	 *	VMREAD_BITMAP                   = 0x00002026,
++	 *	VMWRITE_BITMAP                  = 0x00002028,
++	 */
++	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
++
++	/*
++	 *	TSC_MULTIPLIER                  = 0x00002032,
++	 */
++	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
++
++	/*
++	 *	PLE_GAP                         = 0x00004020,
++	 *	PLE_WINDOW                      = 0x00004022,
++	 */
++	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
++
++	/*
++	 *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
++	 */
++	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
++
++	/*
++	 *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
++	 *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
++	 */
++	vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
++	vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
++
++	/*
++	 * Currently unsupported in KVM:
++	 *	GUEST_IA32_RTIT_CTL		= 0x00002814,
++	 */
++}
++
++/* check_ept_pointer() should be under protection of ept_pointer_lock. */
++static void check_ept_pointer_match(struct kvm *kvm)
++{
++	struct kvm_vcpu *vcpu;
++	u64 tmp_eptp = INVALID_PAGE;
++	int i;
++
++	kvm_for_each_vcpu(i, vcpu, kvm) {
++		if (!VALID_PAGE(tmp_eptp)) {
++			tmp_eptp = to_vmx(vcpu)->ept_pointer;
++		} else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
++			to_kvm_vmx(kvm)->ept_pointers_match
++				= EPT_POINTERS_MISMATCH;
++			return;
++		}
++	}
++
++	to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
++}
++
++static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
++{
++	int ret;
++
++	spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
++
++	if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
++		check_ept_pointer_match(kvm);
++
++	if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
++		ret = -ENOTSUPP;
++		goto out;
++	}
++
++	/*
++	 * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
++	 * base of EPT PML4 table, strip off EPT configuration information.
++	 */
++	ret = hyperv_flush_guest_mapping(
++			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
++
++out:
++	spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
++	return ret;
++}
++#else /* !IS_ENABLED(CONFIG_HYPERV) */
++static inline void evmcs_write64(unsigned long field, u64 value) {}
++static inline void evmcs_write32(unsigned long field, u32 value) {}
++static inline void evmcs_write16(unsigned long field, u16 value) {}
++static inline u64 evmcs_read64(unsigned long field) { return 0; }
++static inline u32 evmcs_read32(unsigned long field) { return 0; }
++static inline u16 evmcs_read16(unsigned long field) { return 0; }
++static inline void evmcs_load(u64 phys_addr) {}
++static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
++static inline void evmcs_touch_msr_bitmap(void) {}
++#endif /* IS_ENABLED(CONFIG_HYPERV) */
++
++static inline bool is_exception_n(u32 intr_info, u8 vector)
++{
++	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
++			     INTR_INFO_VALID_MASK)) ==
++		(INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
++}
++
++static inline bool is_debug(u32 intr_info)
++{
++	return is_exception_n(intr_info, DB_VECTOR);
++}
++
++static inline bool is_breakpoint(u32 intr_info)
++{
++	return is_exception_n(intr_info, BP_VECTOR);
++}
++
++static inline bool is_page_fault(u32 intr_info)
++{
++	return is_exception_n(intr_info, PF_VECTOR);
++}
++
++static inline bool is_no_device(u32 intr_info)
++{
++	return is_exception_n(intr_info, NM_VECTOR);
++}
++
++static inline bool is_invalid_opcode(u32 intr_info)
++{
++	return is_exception_n(intr_info, UD_VECTOR);
++}
++
++static inline bool is_gp_fault(u32 intr_info)
++{
++	return is_exception_n(intr_info, GP_VECTOR);
++}
++
++static inline bool is_external_interrupt(u32 intr_info)
++{
++	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
++		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
++}
++
++static inline bool is_machine_check(u32 intr_info)
++{
++	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
++			     INTR_INFO_VALID_MASK)) ==
++		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
++}
++
++/* Undocumented: icebp/int1 */
++static inline bool is_icebp(u32 intr_info)
++{
++	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
++		== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
++}
++
++static inline bool cpu_has_vmx_msr_bitmap(void)
++{
++	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
++}
++
++static inline bool cpu_has_vmx_tpr_shadow(void)
++{
++	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
++}
++
++static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
++{
++	return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
++}
++
++static inline bool cpu_has_secondary_exec_ctrls(void)
++{
++	return vmcs_config.cpu_based_exec_ctrl &
++		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
++}
++
++static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++}
++
++static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
++}
++
++static inline bool cpu_has_vmx_apic_register_virt(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_APIC_REGISTER_VIRT;
++}
++
++static inline bool cpu_has_vmx_virtual_intr_delivery(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
++}
++
++static inline bool cpu_has_vmx_encls_vmexit(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENCLS_EXITING;
++}
++
++/*
++ * Comment's format: document - errata name - stepping - processor name.
++ * Refer from
++ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
++ */
++static u32 vmx_preemption_cpu_tfms[] = {
++/* 323344.pdf - BA86   - D0 - Xeon 7500 Series */
++0x000206E6,
++/* 323056.pdf - AAX65  - C2 - Xeon L3406 */
++/* 322814.pdf - AAT59  - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
++/* 322911.pdf - AAU65  - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
++0x00020652,
++/* 322911.pdf - AAU65  - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
++0x00020655,
++/* 322373.pdf - AAO95  - B1 - Xeon 3400 Series */
++/* 322166.pdf - AAN92  - B1 - i7-800 and i5-700 Desktop */
++/*
++ * 320767.pdf - AAP86  - B1 -
++ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
++ */
++0x000106E5,
++/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
++0x000106A0,
++/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
++0x000106A1,
++/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
++0x000106A4,
++ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
++ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
++ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
++0x000106A5,
++};
++
++static inline bool cpu_has_broken_vmx_preemption_timer(void)
++{
++	u32 eax = cpuid_eax(0x00000001), i;
++
++	/* Clear the reserved bits */
++	eax &= ~(0x3U << 14 | 0xfU << 28);
++	for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
++		if (eax == vmx_preemption_cpu_tfms[i])
++			return true;
++
++	return false;
++}
++
++static inline bool cpu_has_vmx_preemption_timer(void)
++{
++	return vmcs_config.pin_based_exec_ctrl &
++		PIN_BASED_VMX_PREEMPTION_TIMER;
++}
++
++static inline bool cpu_has_vmx_posted_intr(void)
++{
++	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
++		vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
++}
++
++static inline bool cpu_has_vmx_apicv(void)
++{
++	return cpu_has_vmx_apic_register_virt() &&
++		cpu_has_vmx_virtual_intr_delivery() &&
++		cpu_has_vmx_posted_intr();
++}
++
++static inline bool cpu_has_vmx_flexpriority(void)
++{
++	return cpu_has_vmx_tpr_shadow() &&
++		cpu_has_vmx_virtualize_apic_accesses();
++}
++
++static inline bool cpu_has_vmx_ept_execute_only(void)
++{
++	return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_2m_page(void)
++{
++	return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_1g_page(void)
++{
++	return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_4levels(void)
++{
++	return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_mt_wb(void)
++{
++	return vmx_capability.ept & VMX_EPTP_WB_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_5levels(void)
++{
++	return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
++}
++
++static inline bool cpu_has_vmx_ept_ad_bits(void)
++{
++	return vmx_capability.ept & VMX_EPT_AD_BIT;
++}
++
++static inline bool cpu_has_vmx_invept_context(void)
++{
++	return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
++}
++
++static inline bool cpu_has_vmx_invept_global(void)
++{
++	return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
++}
++
++static inline bool cpu_has_vmx_invvpid_individual_addr(void)
++{
++	return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
++}
++
++static inline bool cpu_has_vmx_invvpid_single(void)
++{
++	return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
++}
++
++static inline bool cpu_has_vmx_invvpid_global(void)
++{
++	return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
++}
++
++static inline bool cpu_has_vmx_invvpid(void)
++{
++	return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
++}
++
++static inline bool cpu_has_vmx_ept(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENABLE_EPT;
++}
++
++static inline bool cpu_has_vmx_unrestricted_guest(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_UNRESTRICTED_GUEST;
++}
++
++static inline bool cpu_has_vmx_ple(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_PAUSE_LOOP_EXITING;
++}
++
++static inline bool cpu_has_vmx_basic_inout(void)
++{
++	return	(((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
++}
++
++static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
++{
++	return flexpriority_enabled && lapic_in_kernel(vcpu);
++}
++
++static inline bool cpu_has_vmx_vpid(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENABLE_VPID;
++}
++
++static inline bool cpu_has_vmx_rdtscp(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_RDTSCP;
++}
++
++static inline bool cpu_has_vmx_invpcid(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENABLE_INVPCID;
++}
++
++static inline bool cpu_has_virtual_nmis(void)
++{
++	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
++}
++
++static inline bool cpu_has_vmx_wbinvd_exit(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_WBINVD_EXITING;
++}
++
++static inline bool cpu_has_vmx_shadow_vmcs(void)
++{
++	u64 vmx_msr;
++	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
++	/* check if the cpu supports writing r/o exit information fields */
++	if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
++		return false;
++
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_SHADOW_VMCS;
++}
++
++static inline bool cpu_has_vmx_pml(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
++}
++
++static inline bool cpu_has_vmx_tsc_scaling(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_TSC_SCALING;
++}
++
++static inline bool cpu_has_vmx_vmfunc(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENABLE_VMFUNC;
++}
++
++static bool vmx_umip_emulated(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_DESC;
++}
++
++static inline bool report_flexpriority(void)
++{
++	return flexpriority_enabled;
++}
++
++static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
++{
++	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
++}
++
++/*
++ * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
++ * to modify any valid field of the VMCS, or are the VM-exit
++ * information fields read-only?
++ */
++static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.msrs.misc_low &
++		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
++}
++
++static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
++}
++
++static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
++			CPU_BASED_MONITOR_TRAP_FLAG;
++}
++
++static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
++{
++	return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
++		SECONDARY_EXEC_SHADOW_VMCS;
++}
++
++static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
++{
++	return vmcs12->cpu_based_vm_exec_control & bit;
++}
++
++static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
++{
++	return (vmcs12->cpu_based_vm_exec_control &
++			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
++		(vmcs12->secondary_vm_exec_control & bit);
++}
++
++static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
++{
++	return vmcs12->pin_based_vm_exec_control &
++		PIN_BASED_VMX_PREEMPTION_TIMER;
++}
++
++static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
++{
++	return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
++}
++
++static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
++{
++	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
++}
++
++static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
++}
++
++static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
++}
++
++static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
++}
++
++static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
++}
++
++static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
++}
++
++static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
++}
++
++static inline bool nested_cpu_has_vid(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
++}
++
++static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
++{
++	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
++}
++
++static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
++}
++
++static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has_vmfunc(vmcs12) &&
++		(vmcs12->vm_function_control &
++		 VMX_VMFUNC_EPTP_SWITCHING);
++}
++
++static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
++{
++	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
++}
++
++static inline bool is_nmi(u32 intr_info)
++{
++	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
++		== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
++}
++
++static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
++			      u32 exit_intr_info,
++			      unsigned long exit_qualification);
++static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
++			struct vmcs12 *vmcs12,
++			u32 reason, unsigned long qualification);
++
++static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
++{
++	int i;
++
++	for (i = 0; i < vmx->nmsrs; ++i)
++		if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
++			return i;
++	return -1;
++}
++
++static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
++{
++    struct {
++	u64 vpid : 16;
++	u64 rsvd : 48;
++	u64 gva;
++    } operand = { vpid, 0, gva };
++    bool error;
++
++    asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
++		  : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
++		  : "memory");
++    BUG_ON(error);
++}
++
++static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
++{
++	struct {
++		u64 eptp, gpa;
++	} operand = {eptp, gpa};
++	bool error;
++
++	asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
++		      : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
++		      : "memory");
++	BUG_ON(error);
++}
++
++static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
++{
++	int i;
++
++	i = __find_msr_index(vmx, msr);
++	if (i >= 0)
++		return &vmx->guest_msrs[i];
++	return NULL;
++}
++
++static void vmcs_clear(struct vmcs *vmcs)
++{
++	u64 phys_addr = __pa(vmcs);
++	bool error;
++
++	asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
++		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
++		      : "memory");
++	if (unlikely(error))
++		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
++		       vmcs, phys_addr);
++}
++
++static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
++{
++	vmcs_clear(loaded_vmcs->vmcs);
++	if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
++		vmcs_clear(loaded_vmcs->shadow_vmcs);
++	loaded_vmcs->cpu = -1;
++	loaded_vmcs->launched = 0;
++}
++
++static void vmcs_load(struct vmcs *vmcs)
++{
++	u64 phys_addr = __pa(vmcs);
++	bool error;
++
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_load(phys_addr);
++
++	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
++		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
++		      : "memory");
++	if (unlikely(error))
++		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
++		       vmcs, phys_addr);
++}
++
++#ifdef CONFIG_KEXEC_CORE
++/*
++ * This bitmap is used to indicate whether the vmclear
++ * operation is enabled on all cpus. All disabled by
++ * default.
++ */
++static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
++
++static inline void crash_enable_local_vmclear(int cpu)
++{
++	cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
++}
++
++static inline void crash_disable_local_vmclear(int cpu)
++{
++	cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
++}
++
++static inline int crash_local_vmclear_enabled(int cpu)
++{
++	return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
++}
++
++static void crash_vmclear_local_loaded_vmcss(void)
++{
++	int cpu = raw_smp_processor_id();
++	struct loaded_vmcs *v;
++
++	if (!crash_local_vmclear_enabled(cpu))
++		return;
++
++	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
++			    loaded_vmcss_on_cpu_link)
++		vmcs_clear(v->vmcs);
++}
++#else
++static inline void crash_enable_local_vmclear(int cpu) { }
++static inline void crash_disable_local_vmclear(int cpu) { }
++#endif /* CONFIG_KEXEC_CORE */
++
++static void __loaded_vmcs_clear(void *arg)
++{
++	struct loaded_vmcs *loaded_vmcs = arg;
++	int cpu = raw_smp_processor_id();
++
++	if (loaded_vmcs->cpu != cpu)
++		return; /* vcpu migration can race with cpu offline */
++	if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
++		per_cpu(current_vmcs, cpu) = NULL;
++	crash_disable_local_vmclear(cpu);
++	list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
++
++	/*
++	 * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
++	 * is before setting loaded_vmcs->vcpu to -1 which is done in
++	 * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
++	 * then adds the vmcs into percpu list before it is deleted.
++	 */
++	smp_wmb();
++
++	loaded_vmcs_init(loaded_vmcs);
++	crash_enable_local_vmclear(cpu);
++}
++
++static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
++{
++	int cpu = loaded_vmcs->cpu;
++
++	if (cpu != -1)
++		smp_call_function_single(cpu,
++			 __loaded_vmcs_clear, loaded_vmcs, 1);
++}
++
++static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
++{
++	if (vpid == 0)
++		return true;
++
++	if (cpu_has_vmx_invvpid_individual_addr()) {
++		__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
++		return true;
++	}
++
++	return false;
++}
++
++static inline void vpid_sync_vcpu_single(int vpid)
++{
++	if (vpid == 0)
++		return;
++
++	if (cpu_has_vmx_invvpid_single())
++		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
++}
++
++static inline void vpid_sync_vcpu_global(void)
++{
++	if (cpu_has_vmx_invvpid_global())
++		__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
++}
++
++static inline void vpid_sync_context(int vpid)
++{
++	if (cpu_has_vmx_invvpid_single())
++		vpid_sync_vcpu_single(vpid);
++	else
++		vpid_sync_vcpu_global();
++}
++
++static inline void ept_sync_global(void)
++{
++	__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
++}
++
++static inline void ept_sync_context(u64 eptp)
++{
++	if (cpu_has_vmx_invept_context())
++		__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
++	else
++		ept_sync_global();
++}
++
++static __always_inline void vmcs_check16(unsigned long field)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
++			 "16-bit accessor invalid for 64-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
++			 "16-bit accessor invalid for 64-bit high field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
++			 "16-bit accessor invalid for 32-bit high field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
++			 "16-bit accessor invalid for natural width field");
++}
++
++static __always_inline void vmcs_check32(unsigned long field)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
++			 "32-bit accessor invalid for 16-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
++			 "32-bit accessor invalid for natural width field");
++}
++
++static __always_inline void vmcs_check64(unsigned long field)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
++			 "64-bit accessor invalid for 16-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
++			 "64-bit accessor invalid for 64-bit high field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
++			 "64-bit accessor invalid for 32-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
++			 "64-bit accessor invalid for natural width field");
++}
++
++static __always_inline void vmcs_checkl(unsigned long field)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
++			 "Natural width accessor invalid for 16-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
++			 "Natural width accessor invalid for 64-bit field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
++			 "Natural width accessor invalid for 64-bit high field");
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
++			 "Natural width accessor invalid for 32-bit field");
++}
++
++static __always_inline unsigned long __vmcs_readl(unsigned long field)
++{
++	unsigned long value;
++
++	asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
++		      : "=a"(value) : "d"(field) : "cc");
++	return value;
++}
++
++static __always_inline u16 vmcs_read16(unsigned long field)
++{
++	vmcs_check16(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_read16(field);
++	return __vmcs_readl(field);
++}
++
++static __always_inline u32 vmcs_read32(unsigned long field)
++{
++	vmcs_check32(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_read32(field);
++	return __vmcs_readl(field);
++}
++
++static __always_inline u64 vmcs_read64(unsigned long field)
++{
++	vmcs_check64(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_read64(field);
++#ifdef CONFIG_X86_64
++	return __vmcs_readl(field);
++#else
++	return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
++#endif
++}
++
++static __always_inline unsigned long vmcs_readl(unsigned long field)
++{
++	vmcs_checkl(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_read64(field);
++	return __vmcs_readl(field);
++}
++
++static noinline void vmwrite_error(unsigned long field, unsigned long value)
++{
++	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
++	       field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
++	dump_stack();
++}
++
++static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
++{
++	bool error;
++
++	asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
++		      : CC_OUT(na) (error) : "a"(value), "d"(field));
++	if (unlikely(error))
++		vmwrite_error(field, value);
++}
++
++static __always_inline void vmcs_write16(unsigned long field, u16 value)
++{
++	vmcs_check16(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write16(field, value);
++
++	__vmcs_writel(field, value);
++}
++
++static __always_inline void vmcs_write32(unsigned long field, u32 value)
++{
++	vmcs_check32(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write32(field, value);
++
++	__vmcs_writel(field, value);
++}
++
++static __always_inline void vmcs_write64(unsigned long field, u64 value)
++{
++	vmcs_check64(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write64(field, value);
++
++	__vmcs_writel(field, value);
++#ifndef CONFIG_X86_64
++	asm volatile ("");
++	__vmcs_writel(field+1, value >> 32);
++#endif
++}
++
++static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
++{
++	vmcs_checkl(field);
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write64(field, value);
++
++	__vmcs_writel(field, value);
++}
++
++static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
++			 "vmcs_clear_bits does not support 64-bit fields");
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write32(field, evmcs_read32(field) & ~mask);
++
++	__vmcs_writel(field, __vmcs_readl(field) & ~mask);
++}
++
++static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
++{
++        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
++			 "vmcs_set_bits does not support 64-bit fields");
++	if (static_branch_unlikely(&enable_evmcs))
++		return evmcs_write32(field, evmcs_read32(field) | mask);
++
++	__vmcs_writel(field, __vmcs_readl(field) | mask);
++}
++
++static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
++{
++	vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
++}
++
++static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
++{
++	vmcs_write32(VM_ENTRY_CONTROLS, val);
++	vmx->vm_entry_controls_shadow = val;
++}
++
++static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
++{
++	if (vmx->vm_entry_controls_shadow != val)
++		vm_entry_controls_init(vmx, val);
++}
++
++static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
++{
++	return vmx->vm_entry_controls_shadow;
++}
++
++
++static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
++{
++	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
++}
++
++static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
++{
++	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
++}
++
++static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
++{
++	vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
++}
++
++static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
++{
++	vmcs_write32(VM_EXIT_CONTROLS, val);
++	vmx->vm_exit_controls_shadow = val;
++}
++
++static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
++{
++	if (vmx->vm_exit_controls_shadow != val)
++		vm_exit_controls_init(vmx, val);
++}
++
++static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
++{
++	return vmx->vm_exit_controls_shadow;
++}
++
++
++static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
++{
++	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
++}
++
++static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
++{
++	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
++}
++
++static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
++{
++	vmx->segment_cache.bitmask = 0;
++}
++
++static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
++				       unsigned field)
++{
++	bool ret;
++	u32 mask = 1 << (seg * SEG_FIELD_NR + field);
++
++	if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
++		vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
++		vmx->segment_cache.bitmask = 0;
++	}
++	ret = vmx->segment_cache.bitmask & mask;
++	vmx->segment_cache.bitmask |= mask;
++	return ret;
++}
++
++static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
++{
++	u16 *p = &vmx->segment_cache.seg[seg].selector;
++
++	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
++		*p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
++	return *p;
++}
++
++static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
++{
++	ulong *p = &vmx->segment_cache.seg[seg].base;
++
++	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
++		*p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
++	return *p;
++}
++
++static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
++{
++	u32 *p = &vmx->segment_cache.seg[seg].limit;
++
++	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
++		*p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
++	return *p;
++}
++
++static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
++{
++	u32 *p = &vmx->segment_cache.seg[seg].ar;
++
++	if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
++		*p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
++	return *p;
++}
++
++static void update_exception_bitmap(struct kvm_vcpu *vcpu)
++{
++	u32 eb;
++
++	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
++	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
++	/*
++	 * Guest access to VMware backdoor ports could legitimately
++	 * trigger #GP because of TSS I/O permission bitmap.
++	 * We intercept those #GP and allow access to them anyway
++	 * as VMware does.
++	 */
++	if (enable_vmware_backdoor)
++		eb |= (1u << GP_VECTOR);
++	if ((vcpu->guest_debug &
++	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
++	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
++		eb |= 1u << BP_VECTOR;
++	if (to_vmx(vcpu)->rmode.vm86_active)
++		eb = ~0;
++	if (enable_ept)
++		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
++
++	/* When we are running a nested L2 guest and L1 specified for it a
++	 * certain exception bitmap, we must trap the same exceptions and pass
++	 * them to L1. When running L2, we will only handle the exceptions
++	 * specified above if L1 did not want them.
++	 */
++	if (is_guest_mode(vcpu))
++		eb |= get_vmcs12(vcpu)->exception_bitmap;
++
++	vmcs_write32(EXCEPTION_BITMAP, eb);
++}
++
++/*
++ * Check if MSR is intercepted for currently loaded MSR bitmap.
++ */
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
++/*
++ * Check if MSR is intercepted for L01 MSR bitmap.
++ */
++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
++static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
++		unsigned long entry, unsigned long exit)
++{
++	vm_entry_controls_clearbit(vmx, entry);
++	vm_exit_controls_clearbit(vmx, exit);
++}
++
++static int find_msr(struct vmx_msrs *m, unsigned int msr)
++{
++	unsigned int i;
++
++	for (i = 0; i < m->nr; ++i) {
++		if (m->val[i].index == msr)
++			return i;
++	}
++	return -ENOENT;
++}
++
++static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
++{
++	int i;
++	struct msr_autoload *m = &vmx->msr_autoload;
++
++	switch (msr) {
++	case MSR_EFER:
++		if (cpu_has_load_ia32_efer) {
++			clear_atomic_switch_msr_special(vmx,
++					VM_ENTRY_LOAD_IA32_EFER,
++					VM_EXIT_LOAD_IA32_EFER);
++			return;
++		}
++		break;
++	case MSR_CORE_PERF_GLOBAL_CTRL:
++		if (cpu_has_load_perf_global_ctrl) {
++			clear_atomic_switch_msr_special(vmx,
++					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
++					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
++			return;
++		}
++		break;
++	}
++	i = find_msr(&m->guest, msr);
++	if (i < 0)
++		goto skip_guest;
++	--m->guest.nr;
++	m->guest.val[i] = m->guest.val[m->guest.nr];
++	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
++
++skip_guest:
++	i = find_msr(&m->host, msr);
++	if (i < 0)
++		return;
++
++	--m->host.nr;
++	m->host.val[i] = m->host.val[m->host.nr];
++	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
++}
++
++static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
++		unsigned long entry, unsigned long exit,
++		unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
++		u64 guest_val, u64 host_val)
++{
++	vmcs_write64(guest_val_vmcs, guest_val);
++	vmcs_write64(host_val_vmcs, host_val);
++	vm_entry_controls_setbit(vmx, entry);
++	vm_exit_controls_setbit(vmx, exit);
++}
++
++static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
++				  u64 guest_val, u64 host_val, bool entry_only)
++{
++	int i, j = 0;
++	struct msr_autoload *m = &vmx->msr_autoload;
++
++	switch (msr) {
++	case MSR_EFER:
++		if (cpu_has_load_ia32_efer) {
++			add_atomic_switch_msr_special(vmx,
++					VM_ENTRY_LOAD_IA32_EFER,
++					VM_EXIT_LOAD_IA32_EFER,
++					GUEST_IA32_EFER,
++					HOST_IA32_EFER,
++					guest_val, host_val);
++			return;
++		}
++		break;
++	case MSR_CORE_PERF_GLOBAL_CTRL:
++		if (cpu_has_load_perf_global_ctrl) {
++			add_atomic_switch_msr_special(vmx,
++					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
++					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
++					GUEST_IA32_PERF_GLOBAL_CTRL,
++					HOST_IA32_PERF_GLOBAL_CTRL,
++					guest_val, host_val);
++			return;
++		}
++		break;
++	case MSR_IA32_PEBS_ENABLE:
++		/* PEBS needs a quiescent period after being disabled (to write
++		 * a record).  Disabling PEBS through VMX MSR swapping doesn't
++		 * provide that period, so a CPU could write host's record into
++		 * guest's memory.
++		 */
++		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
++	}
++
++	i = find_msr(&m->guest, msr);
++	if (!entry_only)
++		j = find_msr(&m->host, msr);
++
++	if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
++		(j < 0 &&  m->host.nr == NR_AUTOLOAD_MSRS)) {
++		printk_once(KERN_WARNING "Not enough msr switch entries. "
++				"Can't add msr %x\n", msr);
++		return;
++	}
++	if (i < 0) {
++		i = m->guest.nr++;
++		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
++	}
++	m->guest.val[i].index = msr;
++	m->guest.val[i].value = guest_val;
++
++	if (entry_only)
++		return;
++
++	if (j < 0) {
++		j = m->host.nr++;
++		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
++	}
++	m->host.val[j].index = msr;
++	m->host.val[j].value = host_val;
++}
++
++static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
++{
++	u64 guest_efer = vmx->vcpu.arch.efer;
++	u64 ignore_bits = 0;
++
++	/* Shadow paging assumes NX to be available.  */
++	if (!enable_ept)
++		guest_efer |= EFER_NX;
++
++	/*
++	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
++	 */
++	ignore_bits |= EFER_SCE;
++#ifdef CONFIG_X86_64
++	ignore_bits |= EFER_LMA | EFER_LME;
++	/* SCE is meaningful only in long mode on Intel */
++	if (guest_efer & EFER_LMA)
++		ignore_bits &= ~(u64)EFER_SCE;
++#endif
++
++	clear_atomic_switch_msr(vmx, MSR_EFER);
++
++	/*
++	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
++	 * On CPUs that support "load IA32_EFER", always switch EFER
++	 * atomically, since it's faster than switching it manually.
++	 */
++	if (cpu_has_load_ia32_efer ||
++	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
++		if (!(guest_efer & EFER_LMA))
++			guest_efer &= ~EFER_LME;
++		if (guest_efer != host_efer)
++			add_atomic_switch_msr(vmx, MSR_EFER,
++					      guest_efer, host_efer, false);
++		return false;
++	} else {
++		guest_efer &= ~ignore_bits;
++		guest_efer |= host_efer & ignore_bits;
++
++		vmx->guest_msrs[efer_offset].data = guest_efer;
++		vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
++
++		return true;
++	}
++}
++
++#ifdef CONFIG_X86_32
++/*
++ * On 32-bit kernels, VM exits still load the FS and GS bases from the
++ * VMCS rather than the segment table.  KVM uses this helper to figure
++ * out the current bases to poke them into the VMCS before entry.
++ */
++static unsigned long segment_base(u16 selector)
++{
++	struct desc_struct *table;
++	unsigned long v;
++
++	if (!(selector & ~SEGMENT_RPL_MASK))
++		return 0;
++
++	table = get_current_gdt_ro();
++
++	if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
++		u16 ldt_selector = kvm_read_ldt();
++
++		if (!(ldt_selector & ~SEGMENT_RPL_MASK))
++			return 0;
++
++		table = (struct desc_struct *)segment_base(ldt_selector);
++	}
++	v = get_desc_base(&table[selector >> 3]);
++	return v;
++}
++#endif
++
++static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs_host_state *host_state;
++#ifdef CONFIG_X86_64
++	int cpu = raw_smp_processor_id();
++#endif
++	unsigned long fs_base, gs_base;
++	u16 fs_sel, gs_sel;
++	int i;
++
++	vmx->req_immediate_exit = false;
++
++	/*
++	 * Note that guest MSRs to be saved/restored can also be changed
++	 * when guest state is loaded. This happens when guest transitions
++	 * to/from long-mode by setting MSR_EFER.LMA.
++	 */
++	if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
++		vmx->guest_msrs_dirty = false;
++		for (i = 0; i < vmx->save_nmsrs; ++i)
++			kvm_set_shared_msr(vmx->guest_msrs[i].index,
++					   vmx->guest_msrs[i].data,
++					   vmx->guest_msrs[i].mask);
++
++	}
++
++	if (vmx->loaded_cpu_state)
++		return;
++
++	vmx->loaded_cpu_state = vmx->loaded_vmcs;
++	host_state = &vmx->loaded_cpu_state->host_state;
++
++	/*
++	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
++	 * allow segment selectors with cpl > 0 or ti == 1.
++	 */
++	host_state->ldt_sel = kvm_read_ldt();
++
++#ifdef CONFIG_X86_64
++	savesegment(ds, host_state->ds_sel);
++	savesegment(es, host_state->es_sel);
++
++	gs_base = cpu_kernelmode_gs_base(cpu);
++	if (likely(is_64bit_mm(current->mm))) {
++		save_fsgs_for_kvm();
++		fs_sel = current->thread.fsindex;
++		gs_sel = current->thread.gsindex;
++		fs_base = current->thread.fsbase;
++		vmx->msr_host_kernel_gs_base = current->thread.gsbase;
++	} else {
++		savesegment(fs, fs_sel);
++		savesegment(gs, gs_sel);
++		fs_base = read_msr(MSR_FS_BASE);
++		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
++	}
++
++	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
++#else
++	savesegment(fs, fs_sel);
++	savesegment(gs, gs_sel);
++	fs_base = segment_base(fs_sel);
++	gs_base = segment_base(gs_sel);
++#endif
++
++	if (unlikely(fs_sel != host_state->fs_sel)) {
++		if (!(fs_sel & 7))
++			vmcs_write16(HOST_FS_SELECTOR, fs_sel);
++		else
++			vmcs_write16(HOST_FS_SELECTOR, 0);
++		host_state->fs_sel = fs_sel;
++	}
++	if (unlikely(gs_sel != host_state->gs_sel)) {
++		if (!(gs_sel & 7))
++			vmcs_write16(HOST_GS_SELECTOR, gs_sel);
++		else
++			vmcs_write16(HOST_GS_SELECTOR, 0);
++		host_state->gs_sel = gs_sel;
++	}
++	if (unlikely(fs_base != host_state->fs_base)) {
++		vmcs_writel(HOST_FS_BASE, fs_base);
++		host_state->fs_base = fs_base;
++	}
++	if (unlikely(gs_base != host_state->gs_base)) {
++		vmcs_writel(HOST_GS_BASE, gs_base);
++		host_state->gs_base = gs_base;
++	}
++}
++
++static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
++{
++	struct vmcs_host_state *host_state;
++
++	if (!vmx->loaded_cpu_state)
++		return;
++
++	WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
++	host_state = &vmx->loaded_cpu_state->host_state;
++
++	++vmx->vcpu.stat.host_state_reload;
++	vmx->loaded_cpu_state = NULL;
++
++#ifdef CONFIG_X86_64
++	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
++#endif
++	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
++		kvm_load_ldt(host_state->ldt_sel);
++#ifdef CONFIG_X86_64
++		load_gs_index(host_state->gs_sel);
++#else
++		loadsegment(gs, host_state->gs_sel);
++#endif
++	}
++	if (host_state->fs_sel & 7)
++		loadsegment(fs, host_state->fs_sel);
++#ifdef CONFIG_X86_64
++	if (unlikely(host_state->ds_sel | host_state->es_sel)) {
++		loadsegment(ds, host_state->ds_sel);
++		loadsegment(es, host_state->es_sel);
++	}
++#endif
++	invalidate_tss_limit();
++#ifdef CONFIG_X86_64
++	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
++#endif
++	load_fixmap_gdt(raw_smp_processor_id());
++}
++
++#ifdef CONFIG_X86_64
++static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
++{
++	preempt_disable();
++	if (vmx->loaded_cpu_state)
++		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
++	preempt_enable();
++	return vmx->msr_guest_kernel_gs_base;
++}
++
++static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
++{
++	preempt_disable();
++	if (vmx->loaded_cpu_state)
++		wrmsrl(MSR_KERNEL_GS_BASE, data);
++	preempt_enable();
++	vmx->msr_guest_kernel_gs_base = data;
++}
++#endif
++
++static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
++{
++	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++	struct pi_desc old, new;
++	unsigned int dest;
++
++	/*
++	 * In case of hot-plug or hot-unplug, we may have to undo
++	 * vmx_vcpu_pi_put even if there is no assigned device.  And we
++	 * always keep PI.NDST up to date for simplicity: it makes the
++	 * code easier, and CPU migration is not a fast path.
++	 */
++	if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
++		return;
++
++	/*
++	 * First handle the simple case where no cmpxchg is necessary; just
++	 * allow posting non-urgent interrupts.
++	 *
++	 * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
++	 * PI.NDST: pi_post_block will do it for us and the wakeup_handler
++	 * expects the VCPU to be on the blocked_vcpu_list that matches
++	 * PI.NDST.
++	 */
++	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
++	    vcpu->cpu == cpu) {
++		pi_clear_sn(pi_desc);
++		return;
++	}
++
++	/* The full case.  */
++	do {
++		old.control = new.control = pi_desc->control;
++
++		dest = cpu_physical_id(cpu);
++
++		if (x2apic_enabled())
++			new.ndst = dest;
++		else
++			new.ndst = (dest << 8) & 0xFF00;
++
++		new.sn = 0;
++	} while (cmpxchg64(&pi_desc->control, old.control,
++			   new.control) != old.control);
++}
++
++static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
++{
++	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
++	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
++}
++
++/*
++ * Switches to specified vcpu, until a matching vcpu_put(), but assumes
++ * vcpu mutex is already taken.
++ */
++static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
++
++	if (!already_loaded) {
++		loaded_vmcs_clear(vmx->loaded_vmcs);
++		local_irq_disable();
++		crash_disable_local_vmclear(cpu);
++
++		/*
++		 * Read loaded_vmcs->cpu should be before fetching
++		 * loaded_vmcs->loaded_vmcss_on_cpu_link.
++		 * See the comments in __loaded_vmcs_clear().
++		 */
++		smp_rmb();
++
++		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
++			 &per_cpu(loaded_vmcss_on_cpu, cpu));
++		crash_enable_local_vmclear(cpu);
++		local_irq_enable();
++	}
++
++	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
++		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
++		vmcs_load(vmx->loaded_vmcs->vmcs);
++		indirect_branch_prediction_barrier();
++	}
++
++	if (!already_loaded) {
++		void *gdt = get_current_gdt_ro();
++		unsigned long sysenter_esp;
++
++		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++
++		/*
++		 * Linux uses per-cpu TSS and GDT, so set these when switching
++		 * processors.  See 22.2.4.
++		 */
++		vmcs_writel(HOST_TR_BASE,
++			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
++		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
++
++		/*
++		 * VM exits change the host TR limit to 0x67 after a VM
++		 * exit.  This is okay, since 0x67 covers everything except
++		 * the IO bitmap and have have code to handle the IO bitmap
++		 * being lost after a VM exit.
++		 */
++		BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
++
++		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
++		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
++
++		vmx->loaded_vmcs->cpu = cpu;
++	}
++
++	/* Setup TSC multiplier */
++	if (kvm_has_tsc_control &&
++	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
++		decache_tsc_multiplier(vmx);
++
++	vmx_vcpu_pi_load(vcpu, cpu);
++	vmx->host_pkru = read_pkru();
++	vmx->host_debugctlmsr = get_debugctlmsr();
++}
++
++static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
++{
++	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++
++	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	/* Set SN when the vCPU is preempted */
++	if (vcpu->preempted)
++		pi_set_sn(pi_desc);
++}
++
++static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
++{
++	vmx_vcpu_pi_put(vcpu);
++
++	vmx_prepare_switch_to_host(to_vmx(vcpu));
++}
++
++static bool emulation_required(struct kvm_vcpu *vcpu)
++{
++	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
++}
++
++static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
++
++/*
++ * Return the cr0 value that a nested guest would read. This is a combination
++ * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
++ * its hypervisor (cr0_read_shadow).
++ */
++static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
++{
++	return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) |
++		(fields->cr0_read_shadow & fields->cr0_guest_host_mask);
++}
++static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
++{
++	return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) |
++		(fields->cr4_read_shadow & fields->cr4_guest_host_mask);
++}
++
++static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
++{
++	unsigned long rflags, save_rflags;
++
++	if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
++		__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
++		rflags = vmcs_readl(GUEST_RFLAGS);
++		if (to_vmx(vcpu)->rmode.vm86_active) {
++			rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++			save_rflags = to_vmx(vcpu)->rmode.save_rflags;
++			rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
++		}
++		to_vmx(vcpu)->rflags = rflags;
++	}
++	return to_vmx(vcpu)->rflags;
++}
++
++static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
++{
++	unsigned long old_rflags = vmx_get_rflags(vcpu);
++
++	__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
++	to_vmx(vcpu)->rflags = rflags;
++	if (to_vmx(vcpu)->rmode.vm86_active) {
++		to_vmx(vcpu)->rmode.save_rflags = rflags;
++		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
++	}
++	vmcs_writel(GUEST_RFLAGS, rflags);
++
++	if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
++		to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
++}
++
++static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
++{
++	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
++	int ret = 0;
++
++	if (interruptibility & GUEST_INTR_STATE_STI)
++		ret |= KVM_X86_SHADOW_INT_STI;
++	if (interruptibility & GUEST_INTR_STATE_MOV_SS)
++		ret |= KVM_X86_SHADOW_INT_MOV_SS;
++
++	return ret;
++}
++
++static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
++{
++	u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
++	u32 interruptibility = interruptibility_old;
++
++	interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
++
++	if (mask & KVM_X86_SHADOW_INT_MOV_SS)
++		interruptibility |= GUEST_INTR_STATE_MOV_SS;
++	else if (mask & KVM_X86_SHADOW_INT_STI)
++		interruptibility |= GUEST_INTR_STATE_STI;
++
++	if ((interruptibility != interruptibility_old))
++		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
++}
++
++static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
++{
++	unsigned long rip;
++
++	rip = kvm_rip_read(vcpu);
++	rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
++	kvm_rip_write(vcpu, rip);
++
++	/* skipping an emulated instruction also counts */
++	vmx_set_interrupt_shadow(vcpu, 0);
++}
++
++static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
++					       unsigned long exit_qual)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	unsigned int nr = vcpu->arch.exception.nr;
++	u32 intr_info = nr | INTR_INFO_VALID_MASK;
++
++	if (vcpu->arch.exception.has_error_code) {
++		vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
++		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
++	}
++
++	if (kvm_exception_is_soft(nr))
++		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
++	else
++		intr_info |= INTR_TYPE_HARD_EXCEPTION;
++
++	if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
++	    vmx_get_nmi_mask(vcpu))
++		intr_info |= INTR_INFO_UNBLOCK_NMI;
++
++	nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
++}
++
++/*
++ * KVM wants to inject page-faults which it got to the guest. This function
++ * checks whether in a nested guest, we need to inject them to L1 or L2.
++ */
++static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	unsigned int nr = vcpu->arch.exception.nr;
++
++	if (nr == PF_VECTOR) {
++		if (vcpu->arch.exception.nested_apf) {
++			*exit_qual = vcpu->arch.apf.nested_apf_token;
++			return 1;
++		}
++		/*
++		 * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
++		 * The fix is to add the ancillary datum (CR2 or DR6) to structs
++		 * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
++		 * can be written only when inject_pending_event runs.  This should be
++		 * conditional on a new capability---if the capability is disabled,
++		 * kvm_multiple_exception would write the ancillary information to
++		 * CR2 or DR6, for backwards ABI-compatibility.
++		 */
++		if (nested_vmx_is_page_fault_vmexit(vmcs12,
++						    vcpu->arch.exception.error_code)) {
++			*exit_qual = vcpu->arch.cr2;
++			return 1;
++		}
++	} else {
++		if (vmcs12->exception_bitmap & (1u << nr)) {
++			if (nr == DB_VECTOR) {
++				*exit_qual = vcpu->arch.dr6;
++				*exit_qual &= ~(DR6_FIXED_1 | DR6_BT);
++				*exit_qual ^= DR6_RTM;
++			} else {
++				*exit_qual = 0;
++			}
++			return 1;
++		}
++	}
++
++	return 0;
++}
++
++static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
++{
++	/*
++	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
++	 * explicitly skip the instruction because if the HLT state is set,
++	 * then the instruction is already executing and RIP has already been
++	 * advanced.
++	 */
++	if (kvm_hlt_in_guest(vcpu->kvm) &&
++			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
++		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
++}
++
++static void vmx_queue_exception(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned nr = vcpu->arch.exception.nr;
++	bool has_error_code = vcpu->arch.exception.has_error_code;
++	u32 error_code = vcpu->arch.exception.error_code;
++	u32 intr_info = nr | INTR_INFO_VALID_MASK;
++
++	if (has_error_code) {
++		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
++		intr_info |= INTR_INFO_DELIVER_CODE_MASK;
++	}
++
++	if (vmx->rmode.vm86_active) {
++		int inc_eip = 0;
++		if (kvm_exception_is_soft(nr))
++			inc_eip = vcpu->arch.event_exit_inst_len;
++		if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
++			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++		return;
++	}
++
++	WARN_ON_ONCE(vmx->emulation_required);
++
++	if (kvm_exception_is_soft(nr)) {
++		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
++			     vmx->vcpu.arch.event_exit_inst_len);
++		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
++	} else
++		intr_info |= INTR_TYPE_HARD_EXCEPTION;
++
++	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
++
++	vmx_clear_hlt(vcpu);
++}
++
++static bool vmx_rdtscp_supported(void)
++{
++	return cpu_has_vmx_rdtscp();
++}
++
++static bool vmx_invpcid_supported(void)
++{
++	return cpu_has_vmx_invpcid();
++}
++
++/*
++ * Swap MSR entry in host/guest MSR entry array.
++ */
++static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
++{
++	struct shared_msr_entry tmp;
++
++	tmp = vmx->guest_msrs[to];
++	vmx->guest_msrs[to] = vmx->guest_msrs[from];
++	vmx->guest_msrs[from] = tmp;
++}
++
++/*
++ * Set up the vmcs to automatically save and restore system
++ * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
++ * mode, as fiddling with msrs is very expensive.
++ */
++static void setup_msrs(struct vcpu_vmx *vmx)
++{
++	int save_nmsrs, index;
++
++	save_nmsrs = 0;
++#ifdef CONFIG_X86_64
++	if (is_long_mode(&vmx->vcpu)) {
++		index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
++		if (index >= 0)
++			move_msr_up(vmx, index, save_nmsrs++);
++		index = __find_msr_index(vmx, MSR_LSTAR);
++		if (index >= 0)
++			move_msr_up(vmx, index, save_nmsrs++);
++		index = __find_msr_index(vmx, MSR_CSTAR);
++		if (index >= 0)
++			move_msr_up(vmx, index, save_nmsrs++);
++		/*
++		 * MSR_STAR is only needed on long mode guests, and only
++		 * if efer.sce is enabled.
++		 */
++		index = __find_msr_index(vmx, MSR_STAR);
++		if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
++			move_msr_up(vmx, index, save_nmsrs++);
++	}
++#endif
++	index = __find_msr_index(vmx, MSR_EFER);
++	if (index >= 0 && update_transition_efer(vmx, index))
++		move_msr_up(vmx, index, save_nmsrs++);
++	index = __find_msr_index(vmx, MSR_TSC_AUX);
++	if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
++		move_msr_up(vmx, index, save_nmsrs++);
++
++	vmx->save_nmsrs = save_nmsrs;
++	vmx->guest_msrs_dirty = true;
++
++	if (cpu_has_vmx_msr_bitmap())
++		vmx_update_msr_bitmap(&vmx->vcpu);
++}
++
++static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	if (is_guest_mode(vcpu) &&
++	    (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
++		return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
++
++	return vcpu->arch.tsc_offset;
++}
++
++static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
++{
++	u64 active_offset = offset;
++	if (is_guest_mode(vcpu)) {
++		/*
++		 * We're here if L1 chose not to trap WRMSR to TSC. According
++		 * to the spec, this should set L1's TSC; The offset that L1
++		 * set for L2 remains unchanged, and still needs to be added
++		 * to the newly set TSC to get L2's TSC.
++		 */
++		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++		if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
++			active_offset += vmcs12->tsc_offset;
++	} else {
++		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
++					   vmcs_read64(TSC_OFFSET), offset);
++	}
++
++	vmcs_write64(TSC_OFFSET, active_offset);
++	return active_offset;
++}
++
++/*
++ * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
++ * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
++ * all guests if the "nested" module option is off, and can also be disabled
++ * for a single guest by disabling its VMX cpuid bit.
++ */
++static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
++{
++	return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
++}
++
++/*
++ * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
++ * returned for the various VMX controls MSRs when nested VMX is enabled.
++ * The same values should also be used to verify that vmcs12 control fields are
++ * valid during nested entry from L1 to L2.
++ * Each of these control msrs has a low and high 32-bit half: A low bit is on
++ * if the corresponding bit in the (32-bit) control field *must* be on, and a
++ * bit in the high half is on if the corresponding bit in the control field
++ * may be on. See also vmx_control_verify().
++ */
++static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
++{
++	if (!nested) {
++		memset(msrs, 0, sizeof(*msrs));
++		return;
++	}
++
++	/*
++	 * Note that as a general rule, the high half of the MSRs (bits in
++	 * the control fields which may be 1) should be initialized by the
++	 * intersection of the underlying hardware's MSR (i.e., features which
++	 * can be supported) and the list of features we want to expose -
++	 * because they are known to be properly supported in our code.
++	 * Also, usually, the low half of the MSRs (bits which must be 1) can
++	 * be set to 0, meaning that L1 may turn off any of these bits. The
++	 * reason is that if one of these bits is necessary, it will appear
++	 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
++	 * fields of vmcs01 and vmcs02, will turn these bits off - and
++	 * nested_vmx_exit_reflected() will not pass related exits to L1.
++	 * These rules have exceptions below.
++	 */
++
++	/* pin-based controls */
++	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
++		msrs->pinbased_ctls_low,
++		msrs->pinbased_ctls_high);
++	msrs->pinbased_ctls_low |=
++		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
++	msrs->pinbased_ctls_high &=
++		PIN_BASED_EXT_INTR_MASK |
++		PIN_BASED_NMI_EXITING |
++		PIN_BASED_VIRTUAL_NMIS |
++		(apicv ? PIN_BASED_POSTED_INTR : 0);
++	msrs->pinbased_ctls_high |=
++		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
++		PIN_BASED_VMX_PREEMPTION_TIMER;
++
++	/* exit controls */
++	rdmsr(MSR_IA32_VMX_EXIT_CTLS,
++		msrs->exit_ctls_low,
++		msrs->exit_ctls_high);
++	msrs->exit_ctls_low =
++		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
++
++	msrs->exit_ctls_high &=
++#ifdef CONFIG_X86_64
++		VM_EXIT_HOST_ADDR_SPACE_SIZE |
++#endif
++		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
++	msrs->exit_ctls_high |=
++		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
++		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
++		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
++
++	/* We support free control of debug control saving. */
++	msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
++
++	/* entry controls */
++	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
++		msrs->entry_ctls_low,
++		msrs->entry_ctls_high);
++	msrs->entry_ctls_low =
++		VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
++	msrs->entry_ctls_high &=
++#ifdef CONFIG_X86_64
++		VM_ENTRY_IA32E_MODE |
++#endif
++		VM_ENTRY_LOAD_IA32_PAT;
++	msrs->entry_ctls_high |=
++		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
++
++	/* We support free control of debug control loading. */
++	msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
++
++	/* cpu-based controls */
++	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
++		msrs->procbased_ctls_low,
++		msrs->procbased_ctls_high);
++	msrs->procbased_ctls_low =
++		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
++	msrs->procbased_ctls_high &=
++		CPU_BASED_VIRTUAL_INTR_PENDING |
++		CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
++		CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
++		CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
++		CPU_BASED_CR3_STORE_EXITING |
++#ifdef CONFIG_X86_64
++		CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
++#endif
++		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
++		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
++		CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
++		CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
++		CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
++	/*
++	 * We can allow some features even when not supported by the
++	 * hardware. For example, L1 can specify an MSR bitmap - and we
++	 * can use it to avoid exits to L1 - even when L0 runs L2
++	 * without MSR bitmaps.
++	 */
++	msrs->procbased_ctls_high |=
++		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
++		CPU_BASED_USE_MSR_BITMAPS;
++
++	/* We support free control of CR3 access interception. */
++	msrs->procbased_ctls_low &=
++		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
++
++	/*
++	 * secondary cpu-based controls.  Do not include those that
++	 * depend on CPUID bits, they are added later by vmx_cpuid_update.
++	 */
++	if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
++		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
++		      msrs->secondary_ctls_low,
++		      msrs->secondary_ctls_high);
++
++	msrs->secondary_ctls_low = 0;
++	msrs->secondary_ctls_high &=
++		SECONDARY_EXEC_DESC |
++		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
++		SECONDARY_EXEC_APIC_REGISTER_VIRT |
++		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
++		SECONDARY_EXEC_WBINVD_EXITING;
++
++	/*
++	 * We can emulate "VMCS shadowing," even if the hardware
++	 * doesn't support it.
++	 */
++	msrs->secondary_ctls_high |=
++		SECONDARY_EXEC_SHADOW_VMCS;
++
++	if (enable_ept) {
++		/* nested EPT: emulate EPT also to L1 */
++		msrs->secondary_ctls_high |=
++			SECONDARY_EXEC_ENABLE_EPT;
++		msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
++			 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
++		if (cpu_has_vmx_ept_execute_only())
++			msrs->ept_caps |=
++				VMX_EPT_EXECUTE_ONLY_BIT;
++		msrs->ept_caps &= vmx_capability.ept;
++		msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
++			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
++			VMX_EPT_1GB_PAGE_BIT;
++		if (enable_ept_ad_bits) {
++			msrs->secondary_ctls_high |=
++				SECONDARY_EXEC_ENABLE_PML;
++			msrs->ept_caps |= VMX_EPT_AD_BIT;
++		}
++	}
++
++	if (cpu_has_vmx_vmfunc()) {
++		msrs->secondary_ctls_high |=
++			SECONDARY_EXEC_ENABLE_VMFUNC;
++		/*
++		 * Advertise EPTP switching unconditionally
++		 * since we emulate it
++		 */
++		if (enable_ept)
++			msrs->vmfunc_controls =
++				VMX_VMFUNC_EPTP_SWITCHING;
++	}
++
++	/*
++	 * Old versions of KVM use the single-context version without
++	 * checking for support, so declare that it is supported even
++	 * though it is treated as global context.  The alternative is
++	 * not failing the single-context invvpid, and it is worse.
++	 */
++	if (enable_vpid) {
++		msrs->secondary_ctls_high |=
++			SECONDARY_EXEC_ENABLE_VPID;
++		msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
++			VMX_VPID_EXTENT_SUPPORTED_MASK;
++	}
++
++	if (enable_unrestricted_guest)
++		msrs->secondary_ctls_high |=
++			SECONDARY_EXEC_UNRESTRICTED_GUEST;
++
++	if (flexpriority_enabled)
++		msrs->secondary_ctls_high |=
++			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++
++	/* miscellaneous data */
++	rdmsr(MSR_IA32_VMX_MISC,
++		msrs->misc_low,
++		msrs->misc_high);
++	msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
++	msrs->misc_low |=
++		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
++		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
++		VMX_MISC_ACTIVITY_HLT;
++	msrs->misc_high = 0;
++
++	/*
++	 * This MSR reports some information about VMX support. We
++	 * should return information about the VMX we emulate for the
++	 * guest, and the VMCS structure we give it - not about the
++	 * VMX support of the underlying hardware.
++	 */
++	msrs->basic =
++		VMCS12_REVISION |
++		VMX_BASIC_TRUE_CTLS |
++		((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
++		(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
++
++	if (cpu_has_vmx_basic_inout())
++		msrs->basic |= VMX_BASIC_INOUT;
++
++	/*
++	 * These MSRs specify bits which the guest must keep fixed on
++	 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
++	 * We picked the standard core2 setting.
++	 */
++#define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
++#define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
++	msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
++	msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
++
++	/* These MSRs specify bits which the guest must keep fixed off. */
++	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
++	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
++
++	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
++	msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
++}
++
++/*
++ * if fixed0[i] == 1: val[i] must be 1
++ * if fixed1[i] == 0: val[i] must be 0
++ */
++static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
++{
++	return ((val & fixed1) | fixed0) == val;
++}
++
++static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
++{
++	return fixed_bits_valid(control, low, high);
++}
++
++static inline u64 vmx_control_msr(u32 low, u32 high)
++{
++	return low | ((u64)high << 32);
++}
++
++static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
++{
++	superset &= mask;
++	subset &= mask;
++
++	return (superset | subset) == superset;
++}
++
++static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
++{
++	const u64 feature_and_reserved =
++		/* feature (except bit 48; see below) */
++		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
++		/* reserved */
++		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
++	u64 vmx_basic = vmx->nested.msrs.basic;
++
++	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
++		return -EINVAL;
++
++	/*
++	 * KVM does not emulate a version of VMX that constrains physical
++	 * addresses of VMX structures (e.g. VMCS) to 32-bits.
++	 */
++	if (data & BIT_ULL(48))
++		return -EINVAL;
++
++	if (vmx_basic_vmcs_revision_id(vmx_basic) !=
++	    vmx_basic_vmcs_revision_id(data))
++		return -EINVAL;
++
++	if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
++		return -EINVAL;
++
++	vmx->nested.msrs.basic = data;
++	return 0;
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	u64 supported;
++	u32 *lowp, *highp;
++
++	switch (msr_index) {
++	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
++		lowp = &vmx->nested.msrs.pinbased_ctls_low;
++		highp = &vmx->nested.msrs.pinbased_ctls_high;
++		break;
++	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
++		lowp = &vmx->nested.msrs.procbased_ctls_low;
++		highp = &vmx->nested.msrs.procbased_ctls_high;
++		break;
++	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
++		lowp = &vmx->nested.msrs.exit_ctls_low;
++		highp = &vmx->nested.msrs.exit_ctls_high;
++		break;
++	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
++		lowp = &vmx->nested.msrs.entry_ctls_low;
++		highp = &vmx->nested.msrs.entry_ctls_high;
++		break;
++	case MSR_IA32_VMX_PROCBASED_CTLS2:
++		lowp = &vmx->nested.msrs.secondary_ctls_low;
++		highp = &vmx->nested.msrs.secondary_ctls_high;
++		break;
++	default:
++		BUG();
++	}
++
++	supported = vmx_control_msr(*lowp, *highp);
++
++	/* Check must-be-1 bits are still 1. */
++	if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
++		return -EINVAL;
++
++	/* Check must-be-0 bits are still 0. */
++	if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
++		return -EINVAL;
++
++	*lowp = data;
++	*highp = data >> 32;
++	return 0;
++}
++
++static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
++{
++	const u64 feature_and_reserved_bits =
++		/* feature */
++		BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
++		BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
++		/* reserved */
++		GENMASK_ULL(13, 9) | BIT_ULL(31);
++	u64 vmx_misc;
++
++	vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
++				   vmx->nested.msrs.misc_high);
++
++	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
++		return -EINVAL;
++
++	if ((vmx->nested.msrs.pinbased_ctls_high &
++	     PIN_BASED_VMX_PREEMPTION_TIMER) &&
++	    vmx_misc_preemption_timer_rate(data) !=
++	    vmx_misc_preemption_timer_rate(vmx_misc))
++		return -EINVAL;
++
++	if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
++		return -EINVAL;
++
++	if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
++		return -EINVAL;
++
++	if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
++		return -EINVAL;
++
++	vmx->nested.msrs.misc_low = data;
++	vmx->nested.msrs.misc_high = data >> 32;
++
++	/*
++	 * If L1 has read-only VM-exit information fields, use the
++	 * less permissive vmx_vmwrite_bitmap to specify write
++	 * permissions for the shadow VMCS.
++	 */
++	if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
++		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
++
++	return 0;
++}
++
++static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
++{
++	u64 vmx_ept_vpid_cap;
++
++	vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
++					   vmx->nested.msrs.vpid_caps);
++
++	/* Every bit is either reserved or a feature bit. */
++	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
++		return -EINVAL;
++
++	vmx->nested.msrs.ept_caps = data;
++	vmx->nested.msrs.vpid_caps = data >> 32;
++	return 0;
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	u64 *msr;
++
++	switch (msr_index) {
++	case MSR_IA32_VMX_CR0_FIXED0:
++		msr = &vmx->nested.msrs.cr0_fixed0;
++		break;
++	case MSR_IA32_VMX_CR4_FIXED0:
++		msr = &vmx->nested.msrs.cr4_fixed0;
++		break;
++	default:
++		BUG();
++	}
++
++	/*
++	 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
++	 * must be 1 in the restored value.
++	 */
++	if (!is_bitwise_subset(data, *msr, -1ULL))
++		return -EINVAL;
++
++	*msr = data;
++	return 0;
++}
++
++/*
++ * Called when userspace is restoring VMX MSRs.
++ *
++ * Returns 0 on success, non-0 otherwise.
++ */
++static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	/*
++	 * Don't allow changes to the VMX capability MSRs while the vCPU
++	 * is in VMX operation.
++	 */
++	if (vmx->nested.vmxon)
++		return -EBUSY;
++
++	switch (msr_index) {
++	case MSR_IA32_VMX_BASIC:
++		return vmx_restore_vmx_basic(vmx, data);
++	case MSR_IA32_VMX_PINBASED_CTLS:
++	case MSR_IA32_VMX_PROCBASED_CTLS:
++	case MSR_IA32_VMX_EXIT_CTLS:
++	case MSR_IA32_VMX_ENTRY_CTLS:
++		/*
++		 * The "non-true" VMX capability MSRs are generated from the
++		 * "true" MSRs, so we do not support restoring them directly.
++		 *
++		 * If userspace wants to emulate VMX_BASIC[55]=0, userspace
++		 * should restore the "true" MSRs with the must-be-1 bits
++		 * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
++		 * DEFAULT SETTINGS".
++		 */
++		return -EINVAL;
++	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
++	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
++	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
++	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
++	case MSR_IA32_VMX_PROCBASED_CTLS2:
++		return vmx_restore_control_msr(vmx, msr_index, data);
++	case MSR_IA32_VMX_MISC:
++		return vmx_restore_vmx_misc(vmx, data);
++	case MSR_IA32_VMX_CR0_FIXED0:
++	case MSR_IA32_VMX_CR4_FIXED0:
++		return vmx_restore_fixed0_msr(vmx, msr_index, data);
++	case MSR_IA32_VMX_CR0_FIXED1:
++	case MSR_IA32_VMX_CR4_FIXED1:
++		/*
++		 * These MSRs are generated based on the vCPU's CPUID, so we
++		 * do not support restoring them directly.
++		 */
++		return -EINVAL;
++	case MSR_IA32_VMX_EPT_VPID_CAP:
++		return vmx_restore_vmx_ept_vpid_cap(vmx, data);
++	case MSR_IA32_VMX_VMCS_ENUM:
++		vmx->nested.msrs.vmcs_enum = data;
++		return 0;
++	default:
++		/*
++		 * The rest of the VMX capability MSRs do not support restore.
++		 */
++		return -EINVAL;
++	}
++}
++
++/* Returns 0 on success, non-0 otherwise. */
++static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
++{
++	switch (msr_index) {
++	case MSR_IA32_VMX_BASIC:
++		*pdata = msrs->basic;
++		break;
++	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
++	case MSR_IA32_VMX_PINBASED_CTLS:
++		*pdata = vmx_control_msr(
++			msrs->pinbased_ctls_low,
++			msrs->pinbased_ctls_high);
++		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
++			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
++		break;
++	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
++	case MSR_IA32_VMX_PROCBASED_CTLS:
++		*pdata = vmx_control_msr(
++			msrs->procbased_ctls_low,
++			msrs->procbased_ctls_high);
++		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
++			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
++		break;
++	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
++	case MSR_IA32_VMX_EXIT_CTLS:
++		*pdata = vmx_control_msr(
++			msrs->exit_ctls_low,
++			msrs->exit_ctls_high);
++		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
++			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
++		break;
++	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
++	case MSR_IA32_VMX_ENTRY_CTLS:
++		*pdata = vmx_control_msr(
++			msrs->entry_ctls_low,
++			msrs->entry_ctls_high);
++		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
++			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
++		break;
++	case MSR_IA32_VMX_MISC:
++		*pdata = vmx_control_msr(
++			msrs->misc_low,
++			msrs->misc_high);
++		break;
++	case MSR_IA32_VMX_CR0_FIXED0:
++		*pdata = msrs->cr0_fixed0;
++		break;
++	case MSR_IA32_VMX_CR0_FIXED1:
++		*pdata = msrs->cr0_fixed1;
++		break;
++	case MSR_IA32_VMX_CR4_FIXED0:
++		*pdata = msrs->cr4_fixed0;
++		break;
++	case MSR_IA32_VMX_CR4_FIXED1:
++		*pdata = msrs->cr4_fixed1;
++		break;
++	case MSR_IA32_VMX_VMCS_ENUM:
++		*pdata = msrs->vmcs_enum;
++		break;
++	case MSR_IA32_VMX_PROCBASED_CTLS2:
++		*pdata = vmx_control_msr(
++			msrs->secondary_ctls_low,
++			msrs->secondary_ctls_high);
++		break;
++	case MSR_IA32_VMX_EPT_VPID_CAP:
++		*pdata = msrs->ept_caps |
++			((u64)msrs->vpid_caps << 32);
++		break;
++	case MSR_IA32_VMX_VMFUNC:
++		*pdata = msrs->vmfunc_controls;
++		break;
++	default:
++		return 1;
++	}
++
++	return 0;
++}
++
++static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
++						 uint64_t val)
++{
++	uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
++
++	return !(val & ~valid_bits);
++}
++
++static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
++{
++	switch (msr->index) {
++	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
++		if (!nested)
++			return 1;
++		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
++	default:
++		return 1;
++	}
++
++	return 0;
++}
++
++/*
++ * Reads an msr value (of 'msr_index') into 'pdata'.
++ * Returns 0 on success, non-0 otherwise.
++ * Assumes vcpu_load() was already called.
++ */
++static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct shared_msr_entry *msr;
++
++	switch (msr_info->index) {
++#ifdef CONFIG_X86_64
++	case MSR_FS_BASE:
++		msr_info->data = vmcs_readl(GUEST_FS_BASE);
++		break;
++	case MSR_GS_BASE:
++		msr_info->data = vmcs_readl(GUEST_GS_BASE);
++		break;
++	case MSR_KERNEL_GS_BASE:
++		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
++		break;
++#endif
++	case MSR_EFER:
++		return kvm_get_msr_common(vcpu, msr_info);
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		msr_info->data = to_vmx(vcpu)->spec_ctrl;
++		break;
++	case MSR_IA32_SYSENTER_CS:
++		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++		msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
++		break;
++	case MSR_IA32_SYSENTER_ESP:
++		msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
++		break;
++	case MSR_IA32_BNDCFGS:
++		if (!kvm_mpx_supported() ||
++		    (!msr_info->host_initiated &&
++		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
++			return 1;
++		msr_info->data = vmcs_read64(GUEST_BNDCFGS);
++		break;
++	case MSR_IA32_MCG_EXT_CTL:
++		if (!msr_info->host_initiated &&
++		    !(vmx->msr_ia32_feature_control &
++		      FEATURE_CONTROL_LMCE))
++			return 1;
++		msr_info->data = vcpu->arch.mcg_ext_ctl;
++		break;
++	case MSR_IA32_FEATURE_CONTROL:
++		msr_info->data = vmx->msr_ia32_feature_control;
++		break;
++	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
++		if (!nested_vmx_allowed(vcpu))
++			return 1;
++		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
++				       &msr_info->data);
++	case MSR_IA32_XSS:
++		if (!vmx_xsaves_supported() ||
++		    (!msr_info->host_initiated &&
++		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
++		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
++			return 1;
++		msr_info->data = vcpu->arch.ia32_xss;
++		break;
++	case MSR_TSC_AUX:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
++			return 1;
++		/* Otherwise falls through */
++	default:
++		msr = find_msr_entry(vmx, msr_info->index);
++		if (msr) {
++			msr_info->data = msr->data;
++			break;
++		}
++		return kvm_get_msr_common(vcpu, msr_info);
++	}
++
++	return 0;
++}
++
++static void vmx_leave_nested(struct kvm_vcpu *vcpu);
++
++/*
++ * Writes msr value into into the appropriate "register".
++ * Returns 0 on success, non-0 otherwise.
++ * Assumes vcpu_load() was already called.
++ */
++static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct shared_msr_entry *msr;
++	int ret = 0;
++	u32 msr_index = msr_info->index;
++	u64 data = msr_info->data;
++
++	switch (msr_index) {
++	case MSR_EFER:
++		ret = kvm_set_msr_common(vcpu, msr_info);
++		break;
++#ifdef CONFIG_X86_64
++	case MSR_FS_BASE:
++		vmx_segment_cache_clear(vmx);
++		vmcs_writel(GUEST_FS_BASE, data);
++		break;
++	case MSR_GS_BASE:
++		vmx_segment_cache_clear(vmx);
++		vmcs_writel(GUEST_GS_BASE, data);
++		break;
++	case MSR_KERNEL_GS_BASE:
++		vmx_write_guest_kernel_gs_base(vmx, data);
++		break;
++#endif
++	case MSR_IA32_SYSENTER_CS:
++		vmcs_write32(GUEST_SYSENTER_CS, data);
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++		vmcs_writel(GUEST_SYSENTER_EIP, data);
++		break;
++	case MSR_IA32_SYSENTER_ESP:
++		vmcs_writel(GUEST_SYSENTER_ESP, data);
++		break;
++	case MSR_IA32_BNDCFGS:
++		if (!kvm_mpx_supported() ||
++		    (!msr_info->host_initiated &&
++		     !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
++			return 1;
++		if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
++		    (data & MSR_IA32_BNDCFGS_RSVD))
++			return 1;
++		vmcs_write64(GUEST_BNDCFGS, data);
++		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
++			return 1;
++
++		vmx->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging. We update the vmcs01 here for L1 as well
++		 * since it will end up touching the MSR anyway now.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
++					      MSR_IA32_SPEC_CTRL,
++					      MSR_TYPE_RW);
++		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
++					      MSR_TYPE_W);
++		break;
++	case MSR_IA32_CR_PAT:
++		if (!kvm_pat_valid(data))
++			return 1;
++
++		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
++			vmcs_write64(GUEST_IA32_PAT, data);
++			vcpu->arch.pat = data;
++			break;
++		}
++		ret = kvm_set_msr_common(vcpu, msr_info);
++		break;
++	case MSR_IA32_TSC_ADJUST:
++		ret = kvm_set_msr_common(vcpu, msr_info);
++		break;
++	case MSR_IA32_MCG_EXT_CTL:
++		if ((!msr_info->host_initiated &&
++		     !(to_vmx(vcpu)->msr_ia32_feature_control &
++		       FEATURE_CONTROL_LMCE)) ||
++		    (data & ~MCG_EXT_CTL_LMCE_EN))
++			return 1;
++		vcpu->arch.mcg_ext_ctl = data;
++		break;
++	case MSR_IA32_FEATURE_CONTROL:
++		if (!vmx_feature_control_msr_valid(vcpu, data) ||
++		    (to_vmx(vcpu)->msr_ia32_feature_control &
++		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
++			return 1;
++		vmx->msr_ia32_feature_control = data;
++		if (msr_info->host_initiated && data == 0)
++			vmx_leave_nested(vcpu);
++		break;
++	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
++		if (!msr_info->host_initiated)
++			return 1; /* they are read-only */
++		if (!nested_vmx_allowed(vcpu))
++			return 1;
++		return vmx_set_vmx_msr(vcpu, msr_index, data);
++	case MSR_IA32_XSS:
++		if (!vmx_xsaves_supported() ||
++		    (!msr_info->host_initiated &&
++		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
++		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
++			return 1;
++		/*
++		 * The only supported bit as of Skylake is bit 8, but
++		 * it is not supported on KVM.
++		 */
++		if (data != 0)
++			return 1;
++		vcpu->arch.ia32_xss = data;
++		if (vcpu->arch.ia32_xss != host_xss)
++			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
++				vcpu->arch.ia32_xss, host_xss, false);
++		else
++			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
++		break;
++	case MSR_TSC_AUX:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
++			return 1;
++		/* Check reserved bit, higher 32 bits should be zero */
++		if ((data >> 32) != 0)
++			return 1;
++		/* Otherwise falls through */
++	default:
++		msr = find_msr_entry(vmx, msr_index);
++		if (msr) {
++			u64 old_msr_data = msr->data;
++			msr->data = data;
++			if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
++				preempt_disable();
++				ret = kvm_set_shared_msr(msr->index, msr->data,
++							 msr->mask);
++				preempt_enable();
++				if (ret)
++					msr->data = old_msr_data;
++			}
++			break;
++		}
++		ret = kvm_set_msr_common(vcpu, msr_info);
++	}
++
++	return ret;
++}
++
++static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
++{
++	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
++	switch (reg) {
++	case VCPU_REGS_RSP:
++		vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
++		break;
++	case VCPU_REGS_RIP:
++		vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
++		break;
++	case VCPU_EXREG_PDPTR:
++		if (enable_ept)
++			ept_save_pdptrs(vcpu);
++		break;
++	default:
++		break;
++	}
++}
++
++static __init int cpu_has_kvm_support(void)
++{
++	return cpu_has_vmx();
++}
++
++static __init int vmx_disabled_by_bios(void)
++{
++	u64 msr;
++
++	rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
++	if (msr & FEATURE_CONTROL_LOCKED) {
++		/* launched w/ TXT and VMX disabled */
++		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
++			&& tboot_enabled())
++			return 1;
++		/* launched w/o TXT and VMX only enabled w/ TXT */
++		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
++			&& (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
++			&& !tboot_enabled()) {
++			printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
++				"activate TXT before enabling KVM\n");
++			return 1;
++		}
++		/* launched w/o TXT and VMX disabled */
++		if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
++			&& !tboot_enabled())
++			return 1;
++	}
++
++	return 0;
++}
++
++static void kvm_cpu_vmxon(u64 addr)
++{
++	cr4_set_bits(X86_CR4_VMXE);
++	intel_pt_handle_vmx(1);
++
++	asm volatile (ASM_VMX_VMXON_RAX
++			: : "a"(&addr), "m"(addr)
++			: "memory", "cc");
++}
++
++static int hardware_enable(void)
++{
++	int cpu = raw_smp_processor_id();
++	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
++	u64 old, test_bits;
++
++	if (cr4_read_shadow() & X86_CR4_VMXE)
++		return -EBUSY;
++
++	/*
++	 * This can happen if we hot-added a CPU but failed to allocate
++	 * VP assist page for it.
++	 */
++	if (static_branch_unlikely(&enable_evmcs) &&
++	    !hv_get_vp_assist_page(cpu))
++		return -EFAULT;
++
++	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
++	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
++	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++
++	/*
++	 * Now we can enable the vmclear operation in kdump
++	 * since the loaded_vmcss_on_cpu list on this cpu
++	 * has been initialized.
++	 *
++	 * Though the cpu is not in VMX operation now, there
++	 * is no problem to enable the vmclear operation
++	 * for the loaded_vmcss_on_cpu list is empty!
++	 */
++	crash_enable_local_vmclear(cpu);
++
++	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
++
++	test_bits = FEATURE_CONTROL_LOCKED;
++	test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++	if (tboot_enabled())
++		test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
++
++	if ((old & test_bits) != test_bits) {
++		/* enable and lock */
++		wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
++	}
++	kvm_cpu_vmxon(phys_addr);
++	if (enable_ept)
++		ept_sync_global();
++
++	return 0;
++}
++
++static void vmclear_local_loaded_vmcss(void)
++{
++	int cpu = raw_smp_processor_id();
++	struct loaded_vmcs *v, *n;
++
++	list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
++				 loaded_vmcss_on_cpu_link)
++		__loaded_vmcs_clear(v);
++}
++
++
++/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
++ * tricks.
++ */
++static void kvm_cpu_vmxoff(void)
++{
++	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
++
++	intel_pt_handle_vmx(0);
++	cr4_clear_bits(X86_CR4_VMXE);
++}
++
++static void hardware_disable(void)
++{
++	vmclear_local_loaded_vmcss();
++	kvm_cpu_vmxoff();
++}
++
++static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
++				      u32 msr, u32 *result)
++{
++	u32 vmx_msr_low, vmx_msr_high;
++	u32 ctl = ctl_min | ctl_opt;
++
++	rdmsr(msr, vmx_msr_low, vmx_msr_high);
++
++	ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
++	ctl |= vmx_msr_low;  /* bit == 1 in low word  ==> must be one  */
++
++	/* Ensure minimum (required) set of control bits are supported. */
++	if (ctl_min & ~ctl)
++		return -EIO;
++
++	*result = ctl;
++	return 0;
++}
++
++static __init bool allow_1_setting(u32 msr, u32 ctl)
++{
++	u32 vmx_msr_low, vmx_msr_high;
++
++	rdmsr(msr, vmx_msr_low, vmx_msr_high);
++	return vmx_msr_high & ctl;
++}
++
++static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
++{
++	u32 vmx_msr_low, vmx_msr_high;
++	u32 min, opt, min2, opt2;
++	u32 _pin_based_exec_control = 0;
++	u32 _cpu_based_exec_control = 0;
++	u32 _cpu_based_2nd_exec_control = 0;
++	u32 _vmexit_control = 0;
++	u32 _vmentry_control = 0;
++
++	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
++	min = CPU_BASED_HLT_EXITING |
++#ifdef CONFIG_X86_64
++	      CPU_BASED_CR8_LOAD_EXITING |
++	      CPU_BASED_CR8_STORE_EXITING |
++#endif
++	      CPU_BASED_CR3_LOAD_EXITING |
++	      CPU_BASED_CR3_STORE_EXITING |
++	      CPU_BASED_UNCOND_IO_EXITING |
++	      CPU_BASED_MOV_DR_EXITING |
++	      CPU_BASED_USE_TSC_OFFSETING |
++	      CPU_BASED_MWAIT_EXITING |
++	      CPU_BASED_MONITOR_EXITING |
++	      CPU_BASED_INVLPG_EXITING |
++	      CPU_BASED_RDPMC_EXITING;
++
++	opt = CPU_BASED_TPR_SHADOW |
++	      CPU_BASED_USE_MSR_BITMAPS |
++	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
++	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
++				&_cpu_based_exec_control) < 0)
++		return -EIO;
++#ifdef CONFIG_X86_64
++	if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
++		_cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
++					   ~CPU_BASED_CR8_STORE_EXITING;
++#endif
++	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
++		min2 = 0;
++		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++			SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
++			SECONDARY_EXEC_WBINVD_EXITING |
++			SECONDARY_EXEC_ENABLE_VPID |
++			SECONDARY_EXEC_ENABLE_EPT |
++			SECONDARY_EXEC_UNRESTRICTED_GUEST |
++			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
++			SECONDARY_EXEC_DESC |
++			SECONDARY_EXEC_RDTSCP |
++			SECONDARY_EXEC_ENABLE_INVPCID |
++			SECONDARY_EXEC_APIC_REGISTER_VIRT |
++			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
++			SECONDARY_EXEC_SHADOW_VMCS |
++			SECONDARY_EXEC_XSAVES |
++			SECONDARY_EXEC_RDSEED_EXITING |
++			SECONDARY_EXEC_RDRAND_EXITING |
++			SECONDARY_EXEC_ENABLE_PML |
++			SECONDARY_EXEC_TSC_SCALING |
++			SECONDARY_EXEC_ENABLE_VMFUNC |
++			SECONDARY_EXEC_ENCLS_EXITING;
++		if (adjust_vmx_controls(min2, opt2,
++					MSR_IA32_VMX_PROCBASED_CTLS2,
++					&_cpu_based_2nd_exec_control) < 0)
++			return -EIO;
++	}
++#ifndef CONFIG_X86_64
++	if (!(_cpu_based_2nd_exec_control &
++				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
++		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
++#endif
++
++	if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
++		_cpu_based_2nd_exec_control &= ~(
++				SECONDARY_EXEC_APIC_REGISTER_VIRT |
++				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
++				SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
++
++	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
++		&vmx_capability.ept, &vmx_capability.vpid);
++
++	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
++		/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
++		   enabled */
++		_cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
++					     CPU_BASED_CR3_STORE_EXITING |
++					     CPU_BASED_INVLPG_EXITING);
++	} else if (vmx_capability.ept) {
++		vmx_capability.ept = 0;
++		pr_warn_once("EPT CAP should not exist if not support "
++				"1-setting enable EPT VM-execution control\n");
++	}
++	if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
++		vmx_capability.vpid) {
++		vmx_capability.vpid = 0;
++		pr_warn_once("VPID CAP should not exist if not support "
++				"1-setting enable VPID VM-execution control\n");
++	}
++
++	min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
++#ifdef CONFIG_X86_64
++	min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
++#endif
++	opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
++		VM_EXIT_CLEAR_BNDCFGS;
++	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
++				&_vmexit_control) < 0)
++		return -EIO;
++
++	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
++	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
++		 PIN_BASED_VMX_PREEMPTION_TIMER;
++	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
++				&_pin_based_exec_control) < 0)
++		return -EIO;
++
++	if (cpu_has_broken_vmx_preemption_timer())
++		_pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
++	if (!(_cpu_based_2nd_exec_control &
++		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
++		_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
++
++	min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
++	opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
++	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
++				&_vmentry_control) < 0)
++		return -EIO;
++
++	rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
++
++	/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
++	if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
++		return -EIO;
++
++#ifdef CONFIG_X86_64
++	/* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
++	if (vmx_msr_high & (1u<<16))
++		return -EIO;
++#endif
++
++	/* Require Write-Back (WB) memory type for VMCS accesses. */
++	if (((vmx_msr_high >> 18) & 15) != 6)
++		return -EIO;
++
++	vmcs_conf->size = vmx_msr_high & 0x1fff;
++	vmcs_conf->order = get_order(vmcs_conf->size);
++	vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
++
++	vmcs_conf->revision_id = vmx_msr_low;
++
++	vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
++	vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
++	vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
++	vmcs_conf->vmexit_ctrl         = _vmexit_control;
++	vmcs_conf->vmentry_ctrl        = _vmentry_control;
++
++	if (static_branch_unlikely(&enable_evmcs))
++		evmcs_sanitize_exec_ctrls(vmcs_conf);
++
++	cpu_has_load_ia32_efer =
++		allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
++				VM_ENTRY_LOAD_IA32_EFER)
++		&& allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
++				   VM_EXIT_LOAD_IA32_EFER);
++
++	cpu_has_load_perf_global_ctrl =
++		allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
++				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
++		&& allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
++				   VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
++
++	/*
++	 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
++	 * but due to errata below it can't be used. Workaround is to use
++	 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
++	 *
++	 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
++	 *
++	 * AAK155             (model 26)
++	 * AAP115             (model 30)
++	 * AAT100             (model 37)
++	 * BC86,AAY89,BD102   (model 44)
++	 * BA97               (model 46)
++	 *
++	 */
++	if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
++		switch (boot_cpu_data.x86_model) {
++		case 26:
++		case 30:
++		case 37:
++		case 44:
++		case 46:
++			cpu_has_load_perf_global_ctrl = false;
++			printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
++					"does not work properly. Using workaround\n");
++			break;
++		default:
++			break;
++		}
++	}
++
++	if (boot_cpu_has(X86_FEATURE_XSAVES))
++		rdmsrl(MSR_IA32_XSS, host_xss);
++
++	return 0;
++}
++
++static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
++{
++	int node = cpu_to_node(cpu);
++	struct page *pages;
++	struct vmcs *vmcs;
++
++	pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
++	if (!pages)
++		return NULL;
++	vmcs = page_address(pages);
++	memset(vmcs, 0, vmcs_config.size);
++
++	/* KVM supports Enlightened VMCS v1 only */
++	if (static_branch_unlikely(&enable_evmcs))
++		vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
++	else
++		vmcs->hdr.revision_id = vmcs_config.revision_id;
++
++	if (shadow)
++		vmcs->hdr.shadow_vmcs = 1;
++	return vmcs;
++}
++
++static void free_vmcs(struct vmcs *vmcs)
++{
++	free_pages((unsigned long)vmcs, vmcs_config.order);
++}
++
++/*
++ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
++ */
++static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
++{
++	if (!loaded_vmcs->vmcs)
++		return;
++	loaded_vmcs_clear(loaded_vmcs);
++	free_vmcs(loaded_vmcs->vmcs);
++	loaded_vmcs->vmcs = NULL;
++	if (loaded_vmcs->msr_bitmap)
++		free_page((unsigned long)loaded_vmcs->msr_bitmap);
++	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
++}
++
++static struct vmcs *alloc_vmcs(bool shadow)
++{
++	return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
++}
++
++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
++{
++	loaded_vmcs->vmcs = alloc_vmcs(false);
++	if (!loaded_vmcs->vmcs)
++		return -ENOMEM;
++
++	loaded_vmcs->shadow_vmcs = NULL;
++	loaded_vmcs_init(loaded_vmcs);
++
++	if (cpu_has_vmx_msr_bitmap()) {
++		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
++		if (!loaded_vmcs->msr_bitmap)
++			goto out_vmcs;
++		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
++
++		if (IS_ENABLED(CONFIG_HYPERV) &&
++		    static_branch_unlikely(&enable_evmcs) &&
++		    (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
++			struct hv_enlightened_vmcs *evmcs =
++				(struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
++
++			evmcs->hv_enlightenments_control.msr_bitmap = 1;
++		}
++	}
++
++	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
++
++	return 0;
++
++out_vmcs:
++	free_loaded_vmcs(loaded_vmcs);
++	return -ENOMEM;
++}
++
++static void free_kvm_area(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		free_vmcs(per_cpu(vmxarea, cpu));
++		per_cpu(vmxarea, cpu) = NULL;
++	}
++}
++
++enum vmcs_field_width {
++	VMCS_FIELD_WIDTH_U16 = 0,
++	VMCS_FIELD_WIDTH_U64 = 1,
++	VMCS_FIELD_WIDTH_U32 = 2,
++	VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
++};
++
++static inline int vmcs_field_width(unsigned long field)
++{
++	if (0x1 & field)	/* the *_HIGH fields are all 32 bit */
++		return VMCS_FIELD_WIDTH_U32;
++	return (field >> 13) & 0x3 ;
++}
++
++static inline int vmcs_field_readonly(unsigned long field)
++{
++	return (((field >> 10) & 0x3) == 1);
++}
++
++static void init_vmcs_shadow_fields(void)
++{
++	int i, j;
++
++	for (i = j = 0; i < max_shadow_read_only_fields; i++) {
++		u16 field = shadow_read_only_fields[i];
++		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
++		    (i + 1 == max_shadow_read_only_fields ||
++		     shadow_read_only_fields[i + 1] != field + 1))
++			pr_err("Missing field from shadow_read_only_field %x\n",
++			       field + 1);
++
++		clear_bit(field, vmx_vmread_bitmap);
++#ifdef CONFIG_X86_64
++		if (field & 1)
++			continue;
++#endif
++		if (j < i)
++			shadow_read_only_fields[j] = field;
++		j++;
++	}
++	max_shadow_read_only_fields = j;
++
++	for (i = j = 0; i < max_shadow_read_write_fields; i++) {
++		u16 field = shadow_read_write_fields[i];
++		if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
++		    (i + 1 == max_shadow_read_write_fields ||
++		     shadow_read_write_fields[i + 1] != field + 1))
++			pr_err("Missing field from shadow_read_write_field %x\n",
++			       field + 1);
++
++		/*
++		 * PML and the preemption timer can be emulated, but the
++		 * processor cannot vmwrite to fields that don't exist
++		 * on bare metal.
++		 */
++		switch (field) {
++		case GUEST_PML_INDEX:
++			if (!cpu_has_vmx_pml())
++				continue;
++			break;
++		case VMX_PREEMPTION_TIMER_VALUE:
++			if (!cpu_has_vmx_preemption_timer())
++				continue;
++			break;
++		case GUEST_INTR_STATUS:
++			if (!cpu_has_vmx_apicv())
++				continue;
++			break;
++		default:
++			break;
++		}
++
++		clear_bit(field, vmx_vmwrite_bitmap);
++		clear_bit(field, vmx_vmread_bitmap);
++#ifdef CONFIG_X86_64
++		if (field & 1)
++			continue;
++#endif
++		if (j < i)
++			shadow_read_write_fields[j] = field;
++		j++;
++	}
++	max_shadow_read_write_fields = j;
++}
++
++static __init int alloc_kvm_area(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		struct vmcs *vmcs;
++
++		vmcs = alloc_vmcs_cpu(false, cpu);
++		if (!vmcs) {
++			free_kvm_area();
++			return -ENOMEM;
++		}
++
++		/*
++		 * When eVMCS is enabled, alloc_vmcs_cpu() sets
++		 * vmcs->revision_id to KVM_EVMCS_VERSION instead of
++		 * revision_id reported by MSR_IA32_VMX_BASIC.
++		 *
++		 * However, even though not explictly documented by
++		 * TLFS, VMXArea passed as VMXON argument should
++		 * still be marked with revision_id reported by
++		 * physical CPU.
++		 */
++		if (static_branch_unlikely(&enable_evmcs))
++			vmcs->hdr.revision_id = vmcs_config.revision_id;
++
++		per_cpu(vmxarea, cpu) = vmcs;
++	}
++	return 0;
++}
++
++static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
++		struct kvm_segment *save)
++{
++	if (!emulate_invalid_guest_state) {
++		/*
++		 * CS and SS RPL should be equal during guest entry according
++		 * to VMX spec, but in reality it is not always so. Since vcpu
++		 * is in the middle of the transition from real mode to
++		 * protected mode it is safe to assume that RPL 0 is a good
++		 * default value.
++		 */
++		if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
++			save->selector &= ~SEGMENT_RPL_MASK;
++		save->dpl = save->selector & SEGMENT_RPL_MASK;
++		save->s = 1;
++	}
++	vmx_set_segment(vcpu, save, seg);
++}
++
++static void enter_pmode(struct kvm_vcpu *vcpu)
++{
++	unsigned long flags;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	/*
++	 * Update real mode segment cache. It may be not up-to-date if sement
++	 * register was written while vcpu was in a guest mode.
++	 */
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
++
++	vmx->rmode.vm86_active = 0;
++
++	vmx_segment_cache_clear(vmx);
++
++	vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
++
++	flags = vmcs_readl(GUEST_RFLAGS);
++	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
++	flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
++	vmcs_writel(GUEST_RFLAGS, flags);
++
++	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
++			(vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
++
++	update_exception_bitmap(vcpu);
++
++	fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
++	fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
++	fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
++	fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
++	fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
++	fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
++}
++
++static void fix_rmode_seg(int seg, struct kvm_segment *save)
++{
++	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
++	struct kvm_segment var = *save;
++
++	var.dpl = 0x3;
++	if (seg == VCPU_SREG_CS)
++		var.type = 0x3;
++
++	if (!emulate_invalid_guest_state) {
++		var.selector = var.base >> 4;
++		var.base = var.base & 0xffff0;
++		var.limit = 0xffff;
++		var.g = 0;
++		var.db = 0;
++		var.present = 1;
++		var.s = 1;
++		var.l = 0;
++		var.unusable = 0;
++		var.type = 0x3;
++		var.avl = 0;
++		if (save->base & 0xf)
++			printk_once(KERN_WARNING "kvm: segment base is not "
++					"paragraph aligned when entering "
++					"protected mode (seg=%d)", seg);
++	}
++
++	vmcs_write16(sf->selector, var.selector);
++	vmcs_writel(sf->base, var.base);
++	vmcs_write32(sf->limit, var.limit);
++	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
++}
++
++static void enter_rmode(struct kvm_vcpu *vcpu)
++{
++	unsigned long flags;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
++
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
++	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
++
++	vmx->rmode.vm86_active = 1;
++
++	/*
++	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
++	 * vcpu. Warn the user that an update is overdue.
++	 */
++	if (!kvm_vmx->tss_addr)
++		printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
++			     "called before entering vcpu\n");
++
++	vmx_segment_cache_clear(vmx);
++
++	vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
++	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
++	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
++
++	flags = vmcs_readl(GUEST_RFLAGS);
++	vmx->rmode.save_rflags = flags;
++
++	flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
++
++	vmcs_writel(GUEST_RFLAGS, flags);
++	vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
++	update_exception_bitmap(vcpu);
++
++	fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
++	fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
++	fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
++	fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
++	fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
++	fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
++
++	kvm_mmu_reset_context(vcpu);
++}
++
++static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
++
++	if (!msr)
++		return;
++
++	vcpu->arch.efer = efer;
++	if (efer & EFER_LMA) {
++		vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
++		msr->data = efer;
++	} else {
++		vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
++
++		msr->data = efer & ~EFER_LME;
++	}
++	setup_msrs(vmx);
++}
++
++#ifdef CONFIG_X86_64
++
++static void enter_lmode(struct kvm_vcpu *vcpu)
++{
++	u32 guest_tr_ar;
++
++	vmx_segment_cache_clear(to_vmx(vcpu));
++
++	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
++	if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
++		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
++				     __func__);
++		vmcs_write32(GUEST_TR_AR_BYTES,
++			     (guest_tr_ar & ~VMX_AR_TYPE_MASK)
++			     | VMX_AR_TYPE_BUSY_64_TSS);
++	}
++	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
++}
++
++static void exit_lmode(struct kvm_vcpu *vcpu)
++{
++	vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
++	vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
++}
++
++#endif
++
++static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
++				bool invalidate_gpa)
++{
++	if (enable_ept && (invalidate_gpa || !enable_vpid)) {
++		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
++			return;
++		ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
++	} else {
++		vpid_sync_context(vpid);
++	}
++}
++
++static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
++{
++	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
++}
++
++static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
++{
++	int vpid = to_vmx(vcpu)->vpid;
++
++	if (!vpid_sync_vcpu_addr(vpid, addr))
++		vpid_sync_context(vpid);
++
++	/*
++	 * If VPIDs are not supported or enabled, then the above is a no-op.
++	 * But we don't really need a TLB flush in that case anyway, because
++	 * each VM entry/exit includes an implicit flush when VPID is 0.
++	 */
++}
++
++static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
++{
++	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
++
++	vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
++	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
++}
++
++static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
++{
++	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
++		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
++	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
++}
++
++static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
++{
++	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
++
++	vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
++	vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
++}
++
++static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
++{
++	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
++
++	if (!test_bit(VCPU_EXREG_PDPTR,
++		      (unsigned long *)&vcpu->arch.regs_dirty))
++		return;
++
++	if (is_pae_paging(vcpu)) {
++		vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
++		vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
++		vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
++		vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
++	}
++}
++
++static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
++{
++	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
++
++	if (is_pae_paging(vcpu)) {
++		mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
++		mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
++		mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
++		mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
++	}
++
++	__set_bit(VCPU_EXREG_PDPTR,
++		  (unsigned long *)&vcpu->arch.regs_avail);
++	__set_bit(VCPU_EXREG_PDPTR,
++		  (unsigned long *)&vcpu->arch.regs_dirty);
++}
++
++static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
++	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
++		SECONDARY_EXEC_UNRESTRICTED_GUEST &&
++	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
++		fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
++
++	return fixed_bits_valid(val, fixed0, fixed1);
++}
++
++static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
++	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
++
++	return fixed_bits_valid(val, fixed0, fixed1);
++}
++
++static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
++	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
++
++	return fixed_bits_valid(val, fixed0, fixed1);
++}
++
++/* No difference in the restrictions on guest and host CR4 in VMX operation. */
++#define nested_guest_cr4_valid	nested_cr4_valid
++#define nested_host_cr4_valid	nested_cr4_valid
++
++static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++
++static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
++					unsigned long cr0,
++					struct kvm_vcpu *vcpu)
++{
++	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
++		vmx_decache_cr3(vcpu);
++	if (!(cr0 & X86_CR0_PG)) {
++		/* From paging/starting to nonpaging */
++		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
++			     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
++			     (CPU_BASED_CR3_LOAD_EXITING |
++			      CPU_BASED_CR3_STORE_EXITING));
++		vcpu->arch.cr0 = cr0;
++		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
++	} else if (!is_paging(vcpu)) {
++		/* From nonpaging to paging */
++		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
++			     vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
++			     ~(CPU_BASED_CR3_LOAD_EXITING |
++			       CPU_BASED_CR3_STORE_EXITING));
++		vcpu->arch.cr0 = cr0;
++		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
++	}
++
++	if (!(cr0 & X86_CR0_WP))
++		*hw_cr0 &= ~X86_CR0_WP;
++}
++
++static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long hw_cr0;
++
++	hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
++	if (enable_unrestricted_guest)
++		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
++	else {
++		hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
++
++		if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
++			enter_pmode(vcpu);
++
++		if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
++			enter_rmode(vcpu);
++	}
++
++#ifdef CONFIG_X86_64
++	if (vcpu->arch.efer & EFER_LME) {
++		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
++			enter_lmode(vcpu);
++		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
++			exit_lmode(vcpu);
++	}
++#endif
++
++	if (enable_ept && !enable_unrestricted_guest)
++		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
++
++	vmcs_writel(CR0_READ_SHADOW, cr0);
++	vmcs_writel(GUEST_CR0, hw_cr0);
++	vcpu->arch.cr0 = cr0;
++
++	/* depends on vcpu->arch.cr0 to be set to a new value */
++	vmx->emulation_required = emulation_required(vcpu);
++}
++
++static int get_ept_level(struct kvm_vcpu *vcpu)
++{
++	if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
++		return 5;
++	return 4;
++}
++
++static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
++{
++	u64 eptp = VMX_EPTP_MT_WB;
++
++	eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
++
++	if (enable_ept_ad_bits &&
++	    (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
++		eptp |= VMX_EPTP_AD_ENABLE_BIT;
++	eptp |= (root_hpa & PAGE_MASK);
++
++	return eptp;
++}
++
++static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
++{
++	struct kvm *kvm = vcpu->kvm;
++	unsigned long guest_cr3;
++	u64 eptp;
++
++	guest_cr3 = cr3;
++	if (enable_ept) {
++		eptp = construct_eptp(vcpu, cr3);
++		vmcs_write64(EPT_POINTER, eptp);
++
++		if (kvm_x86_ops->tlb_remote_flush) {
++			spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
++			to_vmx(vcpu)->ept_pointer = eptp;
++			to_kvm_vmx(kvm)->ept_pointers_match
++				= EPT_POINTERS_CHECK;
++			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
++		}
++
++		if (enable_unrestricted_guest || is_paging(vcpu) ||
++		    is_guest_mode(vcpu))
++			guest_cr3 = kvm_read_cr3(vcpu);
++		else
++			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
++		ept_load_pdptrs(vcpu);
++	}
++
++	vmcs_writel(GUEST_CR3, guest_cr3);
++}
++
++static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++	/*
++	 * Pass through host's Machine Check Enable value to hw_cr4, which
++	 * is in force while we are in guest mode.  Do not let guests control
++	 * this bit, even if host CR4.MCE == 0.
++	 */
++	unsigned long hw_cr4;
++
++	hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
++	if (enable_unrestricted_guest)
++		hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
++	else if (to_vmx(vcpu)->rmode.vm86_active)
++		hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
++	else
++		hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
++
++	if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
++		if (cr4 & X86_CR4_UMIP) {
++			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
++				SECONDARY_EXEC_DESC);
++			hw_cr4 &= ~X86_CR4_UMIP;
++		} else if (!is_guest_mode(vcpu) ||
++			!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
++			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
++					SECONDARY_EXEC_DESC);
++	}
++
++	if (cr4 & X86_CR4_VMXE) {
++		/*
++		 * To use VMXON (and later other VMX instructions), a guest
++		 * must first be able to turn on cr4.VMXE (see handle_vmon()).
++		 * So basically the check on whether to allow nested VMX
++		 * is here.  We operate under the default treatment of SMM,
++		 * so VMX cannot be enabled under SMM.
++		 */
++		if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
++			return 1;
++	}
++
++	if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
++		return 1;
++
++	vcpu->arch.cr4 = cr4;
++
++	if (!enable_unrestricted_guest) {
++		if (enable_ept) {
++			if (!is_paging(vcpu)) {
++				hw_cr4 &= ~X86_CR4_PAE;
++				hw_cr4 |= X86_CR4_PSE;
++			} else if (!(cr4 & X86_CR4_PAE)) {
++				hw_cr4 &= ~X86_CR4_PAE;
++			}
++		}
++
++		/*
++		 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
++		 * hardware.  To emulate this behavior, SMEP/SMAP/PKU needs
++		 * to be manually disabled when guest switches to non-paging
++		 * mode.
++		 *
++		 * If !enable_unrestricted_guest, the CPU is always running
++		 * with CR0.PG=1 and CR4 needs to be modified.
++		 * If enable_unrestricted_guest, the CPU automatically
++		 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
++		 */
++		if (!is_paging(vcpu))
++			hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
++	}
++
++	vmcs_writel(CR4_READ_SHADOW, cr4);
++	vmcs_writel(GUEST_CR4, hw_cr4);
++	return 0;
++}
++
++static void vmx_get_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 ar;
++
++	if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
++		*var = vmx->rmode.segs[seg];
++		if (seg == VCPU_SREG_TR
++		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
++			return;
++		var->base = vmx_read_guest_seg_base(vmx, seg);
++		var->selector = vmx_read_guest_seg_selector(vmx, seg);
++		return;
++	}
++	var->base = vmx_read_guest_seg_base(vmx, seg);
++	var->limit = vmx_read_guest_seg_limit(vmx, seg);
++	var->selector = vmx_read_guest_seg_selector(vmx, seg);
++	ar = vmx_read_guest_seg_ar(vmx, seg);
++	var->unusable = (ar >> 16) & 1;
++	var->type = ar & 15;
++	var->s = (ar >> 4) & 1;
++	var->dpl = (ar >> 5) & 3;
++	/*
++	 * Some userspaces do not preserve unusable property. Since usable
++	 * segment has to be present according to VMX spec we can use present
++	 * property to amend userspace bug by making unusable segment always
++	 * nonpresent. vmx_segment_access_rights() already marks nonpresent
++	 * segment as unusable.
++	 */
++	var->present = !var->unusable;
++	var->avl = (ar >> 12) & 1;
++	var->l = (ar >> 13) & 1;
++	var->db = (ar >> 14) & 1;
++	var->g = (ar >> 15) & 1;
++}
++
++static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
++{
++	struct kvm_segment s;
++
++	if (to_vmx(vcpu)->rmode.vm86_active) {
++		vmx_get_segment(vcpu, &s, seg);
++		return s.base;
++	}
++	return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
++}
++
++static int vmx_get_cpl(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (unlikely(vmx->rmode.vm86_active))
++		return 0;
++	else {
++		int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
++		return VMX_AR_DPL(ar);
++	}
++}
++
++static u32 vmx_segment_access_rights(struct kvm_segment *var)
++{
++	u32 ar;
++
++	if (var->unusable || !var->present)
++		ar = 1 << 16;
++	else {
++		ar = var->type & 15;
++		ar |= (var->s & 1) << 4;
++		ar |= (var->dpl & 3) << 5;
++		ar |= (var->present & 1) << 7;
++		ar |= (var->avl & 1) << 12;
++		ar |= (var->l & 1) << 13;
++		ar |= (var->db & 1) << 14;
++		ar |= (var->g & 1) << 15;
++	}
++
++	return ar;
++}
++
++static void vmx_set_segment(struct kvm_vcpu *vcpu,
++			    struct kvm_segment *var, int seg)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
++
++	vmx_segment_cache_clear(vmx);
++
++	if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
++		vmx->rmode.segs[seg] = *var;
++		if (seg == VCPU_SREG_TR)
++			vmcs_write16(sf->selector, var->selector);
++		else if (var->s)
++			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
++		goto out;
++	}
++
++	vmcs_writel(sf->base, var->base);
++	vmcs_write32(sf->limit, var->limit);
++	vmcs_write16(sf->selector, var->selector);
++
++	/*
++	 *   Fix the "Accessed" bit in AR field of segment registers for older
++	 * qemu binaries.
++	 *   IA32 arch specifies that at the time of processor reset the
++	 * "Accessed" bit in the AR field of segment registers is 1. And qemu
++	 * is setting it to 0 in the userland code. This causes invalid guest
++	 * state vmexit when "unrestricted guest" mode is turned on.
++	 *    Fix for this setup issue in cpu_reset is being pushed in the qemu
++	 * tree. Newer qemu binaries with that qemu fix would not need this
++	 * kvm hack.
++	 */
++	if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
++		var->type |= 0x1; /* Accessed */
++
++	vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
++
++out:
++	vmx->emulation_required = emulation_required(vcpu);
++}
++
++static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
++{
++	u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
++
++	*db = (ar >> 14) & 1;
++	*l = (ar >> 13) & 1;
++}
++
++static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
++	dt->address = vmcs_readl(GUEST_IDTR_BASE);
++}
++
++static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
++	vmcs_writel(GUEST_IDTR_BASE, dt->address);
++}
++
++static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
++	dt->address = vmcs_readl(GUEST_GDTR_BASE);
++}
++
++static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
++{
++	vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
++	vmcs_writel(GUEST_GDTR_BASE, dt->address);
++}
++
++static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
++{
++	struct kvm_segment var;
++	u32 ar;
++
++	vmx_get_segment(vcpu, &var, seg);
++	var.dpl = 0x3;
++	if (seg == VCPU_SREG_CS)
++		var.type = 0x3;
++	ar = vmx_segment_access_rights(&var);
++
++	if (var.base != (var.selector << 4))
++		return false;
++	if (var.limit != 0xffff)
++		return false;
++	if (ar != 0xf3)
++		return false;
++
++	return true;
++}
++
++static bool code_segment_valid(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment cs;
++	unsigned int cs_rpl;
++
++	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
++	cs_rpl = cs.selector & SEGMENT_RPL_MASK;
++
++	if (cs.unusable)
++		return false;
++	if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
++		return false;
++	if (!cs.s)
++		return false;
++	if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
++		if (cs.dpl > cs_rpl)
++			return false;
++	} else {
++		if (cs.dpl != cs_rpl)
++			return false;
++	}
++	if (!cs.present)
++		return false;
++
++	/* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
++	return true;
++}
++
++static bool stack_segment_valid(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment ss;
++	unsigned int ss_rpl;
++
++	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
++	ss_rpl = ss.selector & SEGMENT_RPL_MASK;
++
++	if (ss.unusable)
++		return true;
++	if (ss.type != 3 && ss.type != 7)
++		return false;
++	if (!ss.s)
++		return false;
++	if (ss.dpl != ss_rpl) /* DPL != RPL */
++		return false;
++	if (!ss.present)
++		return false;
++
++	return true;
++}
++
++static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
++{
++	struct kvm_segment var;
++	unsigned int rpl;
++
++	vmx_get_segment(vcpu, &var, seg);
++	rpl = var.selector & SEGMENT_RPL_MASK;
++
++	if (var.unusable)
++		return true;
++	if (!var.s)
++		return false;
++	if (!var.present)
++		return false;
++	if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
++		if (var.dpl < rpl) /* DPL < RPL */
++			return false;
++	}
++
++	/* TODO: Add other members to kvm_segment_field to allow checking for other access
++	 * rights flags
++	 */
++	return true;
++}
++
++static bool tr_valid(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment tr;
++
++	vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
++
++	if (tr.unusable)
++		return false;
++	if (tr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
++		return false;
++	if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
++		return false;
++	if (!tr.present)
++		return false;
++
++	return true;
++}
++
++static bool ldtr_valid(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment ldtr;
++
++	vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
++
++	if (ldtr.unusable)
++		return true;
++	if (ldtr.selector & SEGMENT_TI_MASK)	/* TI = 1 */
++		return false;
++	if (ldtr.type != 2)
++		return false;
++	if (!ldtr.present)
++		return false;
++
++	return true;
++}
++
++static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment cs, ss;
++
++	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
++	vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
++
++	return ((cs.selector & SEGMENT_RPL_MASK) ==
++		 (ss.selector & SEGMENT_RPL_MASK));
++}
++
++static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu,
++					unsigned int port, int size);
++static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
++				       struct vmcs12 *vmcs12)
++{
++	unsigned long exit_qualification;
++	unsigned short port;
++	int size;
++
++	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
++		return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++
++	port = exit_qualification >> 16;
++	size = (exit_qualification & 7) + 1;
++
++	return nested_vmx_check_io_bitmaps(vcpu, port, size);
++}
++
++/*
++ * Check if guest state is valid. Returns true if valid, false if
++ * not.
++ * We assume that registers are always usable
++ */
++static bool guest_state_valid(struct kvm_vcpu *vcpu)
++{
++	if (enable_unrestricted_guest)
++		return true;
++
++	/* real mode guest state checks */
++	if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
++			return false;
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
++			return false;
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
++			return false;
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
++			return false;
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
++			return false;
++		if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
++			return false;
++	} else {
++	/* protected mode guest state checks */
++		if (!cs_ss_rpl_check(vcpu))
++			return false;
++		if (!code_segment_valid(vcpu))
++			return false;
++		if (!stack_segment_valid(vcpu))
++			return false;
++		if (!data_segment_valid(vcpu, VCPU_SREG_DS))
++			return false;
++		if (!data_segment_valid(vcpu, VCPU_SREG_ES))
++			return false;
++		if (!data_segment_valid(vcpu, VCPU_SREG_FS))
++			return false;
++		if (!data_segment_valid(vcpu, VCPU_SREG_GS))
++			return false;
++		if (!tr_valid(vcpu))
++			return false;
++		if (!ldtr_valid(vcpu))
++			return false;
++	}
++	/* TODO:
++	 * - Add checks on RIP
++	 * - Add checks on RFLAGS
++	 */
++
++	return true;
++}
++
++static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
++{
++	return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
++}
++
++static int init_rmode_tss(struct kvm *kvm)
++{
++	gfn_t fn;
++	u16 data = 0;
++	int idx, r;
++
++	idx = srcu_read_lock(&kvm->srcu);
++	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
++	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
++	if (r < 0)
++		goto out;
++	data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
++	r = kvm_write_guest_page(kvm, fn++, &data,
++			TSS_IOPB_BASE_OFFSET, sizeof(u16));
++	if (r < 0)
++		goto out;
++	r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
++	if (r < 0)
++		goto out;
++	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
++	if (r < 0)
++		goto out;
++	data = ~0;
++	r = kvm_write_guest_page(kvm, fn, &data,
++				 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
++				 sizeof(u8));
++out:
++	srcu_read_unlock(&kvm->srcu, idx);
++	return r;
++}
++
++static int init_rmode_identity_map(struct kvm *kvm)
++{
++	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
++	int i, idx, r = 0;
++	kvm_pfn_t identity_map_pfn;
++	u32 tmp;
++
++	/* Protect kvm_vmx->ept_identity_pagetable_done. */
++	mutex_lock(&kvm->slots_lock);
++
++	if (likely(kvm_vmx->ept_identity_pagetable_done))
++		goto out2;
++
++	if (!kvm_vmx->ept_identity_map_addr)
++		kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
++	identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
++
++	r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
++				    kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
++	if (r < 0)
++		goto out2;
++
++	idx = srcu_read_lock(&kvm->srcu);
++	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
++	if (r < 0)
++		goto out;
++	/* Set up identity-mapping pagetable for EPT in real mode */
++	for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
++		tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
++			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
++		r = kvm_write_guest_page(kvm, identity_map_pfn,
++				&tmp, i * sizeof(tmp), sizeof(tmp));
++		if (r < 0)
++			goto out;
++	}
++	kvm_vmx->ept_identity_pagetable_done = true;
++
++out:
++	srcu_read_unlock(&kvm->srcu, idx);
++
++out2:
++	mutex_unlock(&kvm->slots_lock);
++	return r;
++}
++
++static void seg_setup(int seg)
++{
++	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
++	unsigned int ar;
++
++	vmcs_write16(sf->selector, 0);
++	vmcs_writel(sf->base, 0);
++	vmcs_write32(sf->limit, 0xffff);
++	ar = 0x93;
++	if (seg == VCPU_SREG_CS)
++		ar |= 0x08; /* code segment */
++
++	vmcs_write32(sf->ar_bytes, ar);
++}
++
++static int alloc_apic_access_page(struct kvm *kvm)
++{
++	struct page *page;
++	int r = 0;
++
++	mutex_lock(&kvm->slots_lock);
++	if (kvm->arch.apic_access_page_done)
++		goto out;
++	r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
++				    APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
++	if (r)
++		goto out;
++
++	page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
++	if (is_error_page(page)) {
++		r = -EFAULT;
++		goto out;
++	}
++
++	/*
++	 * Do not pin the page in memory, so that memory hot-unplug
++	 * is able to migrate it.
++	 */
++	put_page(page);
++	kvm->arch.apic_access_page_done = true;
++out:
++	mutex_unlock(&kvm->slots_lock);
++	return r;
++}
++
++static int allocate_vpid(void)
++{
++	int vpid;
++
++	if (!enable_vpid)
++		return 0;
++	spin_lock(&vmx_vpid_lock);
++	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
++	if (vpid < VMX_NR_VPIDS)
++		__set_bit(vpid, vmx_vpid_bitmap);
++	else
++		vpid = 0;
++	spin_unlock(&vmx_vpid_lock);
++	return vpid;
++}
++
++static void free_vpid(int vpid)
++{
++	if (!enable_vpid || vpid == 0)
++		return;
++	spin_lock(&vmx_vpid_lock);
++	__clear_bit(vpid, vmx_vpid_bitmap);
++	spin_unlock(&vmx_vpid_lock);
++}
++
++static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type)
++{
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return;
++
++	if (static_branch_unlikely(&enable_evmcs))
++		evmcs_touch_msr_bitmap();
++
++	/*
++	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
++	 * have the write-low and read-high bitmap offsets the wrong way round.
++	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
++	 */
++	if (msr <= 0x1fff) {
++		if (type & MSR_TYPE_R)
++			/* read-low */
++			__clear_bit(msr, msr_bitmap + 0x000 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-low */
++			__clear_bit(msr, msr_bitmap + 0x800 / f);
++
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		if (type & MSR_TYPE_R)
++			/* read-high */
++			__clear_bit(msr, msr_bitmap + 0x400 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-high */
++			__clear_bit(msr, msr_bitmap + 0xc00 / f);
++
++	}
++}
++
++static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
++							 u32 msr, int type)
++{
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return;
++
++	if (static_branch_unlikely(&enable_evmcs))
++		evmcs_touch_msr_bitmap();
++
++	/*
++	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
++	 * have the write-low and read-high bitmap offsets the wrong way round.
++	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
++	 */
++	if (msr <= 0x1fff) {
++		if (type & MSR_TYPE_R)
++			/* read-low */
++			__set_bit(msr, msr_bitmap + 0x000 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-low */
++			__set_bit(msr, msr_bitmap + 0x800 / f);
++
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		if (type & MSR_TYPE_R)
++			/* read-high */
++			__set_bit(msr, msr_bitmap + 0x400 / f);
++
++		if (type & MSR_TYPE_W)
++			/* write-high */
++			__set_bit(msr, msr_bitmap + 0xc00 / f);
++
++	}
++}
++
++static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
++			     			      u32 msr, int type, bool value)
++{
++	if (value)
++		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
++	else
++		vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
++}
++
++/*
++ * If a msr is allowed by L0, we should check whether it is allowed by L1.
++ * The corresponding bit will be cleared unless both of L0 and L1 allow it.
++ */
++static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
++					       unsigned long *msr_bitmap_nested,
++					       u32 msr, int type)
++{
++	int f = sizeof(unsigned long);
++
++	/*
++	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
++	 * have the write-low and read-high bitmap offsets the wrong way round.
++	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
++	 */
++	if (msr <= 0x1fff) {
++		if (type & MSR_TYPE_R &&
++		   !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
++			/* read-low */
++			__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
++
++		if (type & MSR_TYPE_W &&
++		   !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
++			/* write-low */
++			__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
++
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		if (type & MSR_TYPE_R &&
++		   !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
++			/* read-high */
++			__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
++
++		if (type & MSR_TYPE_W &&
++		   !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
++			/* write-high */
++			__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
++
++	}
++}
++
++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
++{
++	u8 mode = 0;
++
++	if (cpu_has_secondary_exec_ctrls() &&
++	    (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
++	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
++		mode |= MSR_BITMAP_MODE_X2APIC;
++		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
++			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
++	}
++
++	return mode;
++}
++
++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
++
++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
++					 u8 mode)
++{
++	int msr;
++
++	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++		unsigned word = msr / BITS_PER_LONG;
++		msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
++		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
++	}
++
++	if (mode & MSR_BITMAP_MODE_X2APIC) {
++		/*
++		 * TPR reads and writes can be virtualized even if virtual interrupt
++		 * delivery is not in use.
++		 */
++		vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
++		if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
++			vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
++		}
++	}
++}
++
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
++	u8 mode = vmx_msr_bitmap_mode(vcpu);
++	u8 changed = mode ^ vmx->msr_bitmap_mode;
++
++	if (!changed)
++		return;
++
++	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
++		vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
++
++	vmx->msr_bitmap_mode = mode;
++}
++
++static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
++{
++	return enable_apicv;
++}
++
++static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	gfn_t gfn;
++
++	/*
++	 * Don't need to mark the APIC access page dirty; it is never
++	 * written to by the CPU during APIC virtualization.
++	 */
++
++	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
++		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
++		kvm_vcpu_mark_page_dirty(vcpu, gfn);
++	}
++
++	if (nested_cpu_has_posted_intr(vmcs12)) {
++		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
++		kvm_vcpu_mark_page_dirty(vcpu, gfn);
++	}
++}
++
++
++static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int max_irr;
++	void *vapic_page;
++	u16 status;
++
++	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
++		return;
++
++	vmx->nested.pi_pending = false;
++	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
++		return;
++
++	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
++	if (max_irr != 256) {
++		vapic_page = kmap(vmx->nested.virtual_apic_page);
++		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
++			vapic_page, &max_irr);
++		kunmap(vmx->nested.virtual_apic_page);
++
++		status = vmcs_read16(GUEST_INTR_STATUS);
++		if ((u8)max_irr > ((u8)status & 0xff)) {
++			status &= ~0xff;
++			status |= (u8)max_irr;
++			vmcs_write16(GUEST_INTR_STATUS, status);
++		}
++	}
++
++	nested_mark_vmcs12_pages_dirty(vcpu);
++}
++
++static u8 vmx_get_rvi(void)
++{
++	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
++}
++
++static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	void *vapic_page;
++	u32 vppr;
++	int rvi;
++
++	if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
++		!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
++		WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
++		return false;
++
++	rvi = vmx_get_rvi();
++
++	vapic_page = kmap(vmx->nested.virtual_apic_page);
++	vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
++	kunmap(vmx->nested.virtual_apic_page);
++
++	return ((rvi & 0xf0) > (vppr & 0xf0));
++}
++
++static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
++						     bool nested)
++{
++#ifdef CONFIG_SMP
++	int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
++
++	if (vcpu->mode == IN_GUEST_MODE) {
++		/*
++		 * The vector of interrupt to be delivered to vcpu had
++		 * been set in PIR before this function.
++		 *
++		 * Following cases will be reached in this block, and
++		 * we always send a notification event in all cases as
++		 * explained below.
++		 *
++		 * Case 1: vcpu keeps in non-root mode. Sending a
++		 * notification event posts the interrupt to vcpu.
++		 *
++		 * Case 2: vcpu exits to root mode and is still
++		 * runnable. PIR will be synced to vIRR before the
++		 * next vcpu entry. Sending a notification event in
++		 * this case has no effect, as vcpu is not in root
++		 * mode.
++		 *
++		 * Case 3: vcpu exits to root mode and is blocked.
++		 * vcpu_block() has already synced PIR to vIRR and
++		 * never blocks vcpu if vIRR is not cleared. Therefore,
++		 * a blocked vcpu here does not wait for any requested
++		 * interrupts in PIR, and sending a notification event
++		 * which has no effect is safe here.
++		 */
++
++		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
++		return true;
++	}
++#endif
++	return false;
++}
++
++static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
++						int vector)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (is_guest_mode(vcpu) &&
++	    vector == vmx->nested.posted_intr_nv) {
++		/*
++		 * If a posted intr is not recognized by hardware,
++		 * we will accomplish it in the next vmentry.
++		 */
++		vmx->nested.pi_pending = true;
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++		/* the PIR and ON have been set by L1. */
++		if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
++			kvm_vcpu_kick(vcpu);
++		return 0;
++	}
++	return -1;
++}
++/*
++ * Send interrupt to vcpu via posted interrupt way.
++ * 1. If target vcpu is running(non-root mode), send posted interrupt
++ * notification to vcpu and hardware will sync PIR to vIRR atomically.
++ * 2. If target vcpu isn't running(root mode), kick it to pick up the
++ * interrupt from PIR in next vmentry.
++ */
++static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int r;
++
++	r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
++	if (!r)
++		return;
++
++	if (pi_test_and_set_pir(vector, &vmx->pi_desc))
++		return;
++
++	/* If a previous notification has sent the IPI, nothing to do.  */
++	if (pi_test_and_set_on(&vmx->pi_desc))
++		return;
++
++	if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
++		kvm_vcpu_kick(vcpu);
++}
++
++/*
++ * Set up the vmcs's constant host-state fields, i.e., host-state fields that
++ * will not change in the lifetime of the guest.
++ * Note that host-state that does change is set elsewhere. E.g., host-state
++ * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
++ */
++static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
++{
++	u32 low32, high32;
++	unsigned long tmpl;
++	struct desc_ptr dt;
++	unsigned long cr0, cr3, cr4;
++
++	cr0 = read_cr0();
++	WARN_ON(cr0 & X86_CR0_TS);
++	vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
++
++	/*
++	 * Save the most likely value for this task's CR3 in the VMCS.
++	 * We can't use __get_current_cr3_fast() because we're not atomic.
++	 */
++	cr3 = __read_cr3();
++	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
++	vmx->loaded_vmcs->host_state.cr3 = cr3;
++
++	/* Save the most likely value for this task's CR4 in the VMCS. */
++	cr4 = cr4_read_shadow();
++	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
++	vmx->loaded_vmcs->host_state.cr4 = cr4;
++
++	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
++#ifdef CONFIG_X86_64
++	/*
++	 * Load null selectors, so we can avoid reloading them in
++	 * vmx_prepare_switch_to_host(), in case userspace uses
++	 * the null selectors too (the expected case).
++	 */
++	vmcs_write16(HOST_DS_SELECTOR, 0);
++	vmcs_write16(HOST_ES_SELECTOR, 0);
++#else
++	vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
++	vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
++#endif
++	vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
++	vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
++
++	store_idt(&dt);
++	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
++	vmx->host_idt_base = dt.address;
++
++	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
++
++	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
++	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
++	rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
++	vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */
++
++	if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
++		rdmsr(MSR_IA32_CR_PAT, low32, high32);
++		vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
++	}
++}
++
++static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
++{
++	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
++	if (enable_ept)
++		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
++	if (is_guest_mode(&vmx->vcpu))
++		vmx->vcpu.arch.cr4_guest_owned_bits &=
++			~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
++	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
++}
++
++static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
++{
++	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
++
++	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
++		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
++
++	if (!enable_vnmi)
++		pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
++
++	/* Enable the preemption timer dynamically */
++	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
++	return pin_based_exec_ctrl;
++}
++
++static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
++	if (cpu_has_secondary_exec_ctrls()) {
++		if (kvm_vcpu_apicv_active(vcpu))
++			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
++				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
++				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
++		else
++			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
++					SECONDARY_EXEC_APIC_REGISTER_VIRT |
++					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
++	}
++
++	if (cpu_has_vmx_msr_bitmap())
++		vmx_update_msr_bitmap(vcpu);
++}
++
++static u32 vmx_exec_control(struct vcpu_vmx *vmx)
++{
++	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
++
++	if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
++		exec_control &= ~CPU_BASED_MOV_DR_EXITING;
++
++	if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
++		exec_control &= ~CPU_BASED_TPR_SHADOW;
++#ifdef CONFIG_X86_64
++		exec_control |= CPU_BASED_CR8_STORE_EXITING |
++				CPU_BASED_CR8_LOAD_EXITING;
++#endif
++	}
++	if (!enable_ept)
++		exec_control |= CPU_BASED_CR3_STORE_EXITING |
++				CPU_BASED_CR3_LOAD_EXITING  |
++				CPU_BASED_INVLPG_EXITING;
++	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
++		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
++				CPU_BASED_MONITOR_EXITING);
++	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
++		exec_control &= ~CPU_BASED_HLT_EXITING;
++	return exec_control;
++}
++
++static bool vmx_rdrand_supported(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_RDRAND_EXITING;
++}
++
++static bool vmx_rdseed_supported(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_RDSEED_EXITING;
++}
++
++static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
++{
++	struct kvm_vcpu *vcpu = &vmx->vcpu;
++
++	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
++
++	if (!cpu_need_virtualize_apic_accesses(vcpu))
++		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++	if (vmx->vpid == 0)
++		exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
++	if (!enable_ept) {
++		exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
++		enable_unrestricted_guest = 0;
++	}
++	if (!enable_unrestricted_guest)
++		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
++	if (kvm_pause_in_guest(vmx->vcpu.kvm))
++		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
++	if (!kvm_vcpu_apicv_active(vcpu))
++		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
++				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
++	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
++
++	/* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
++	 * in vmx_set_cr4.  */
++	exec_control &= ~SECONDARY_EXEC_DESC;
++
++	/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
++	   (handle_vmptrld).
++	   We can NOT enable shadow_vmcs here because we don't have yet
++	   a current VMCS12
++	*/
++	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
++
++	if (!enable_pml)
++		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
++
++	if (vmx_xsaves_supported()) {
++		/* Exposing XSAVES only when XSAVE is exposed */
++		bool xsaves_enabled =
++			guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
++			guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
++
++		if (!xsaves_enabled)
++			exec_control &= ~SECONDARY_EXEC_XSAVES;
++
++		if (nested) {
++			if (xsaves_enabled)
++				vmx->nested.msrs.secondary_ctls_high |=
++					SECONDARY_EXEC_XSAVES;
++			else
++				vmx->nested.msrs.secondary_ctls_high &=
++					~SECONDARY_EXEC_XSAVES;
++		}
++	}
++
++	if (vmx_rdtscp_supported()) {
++		bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
++		if (!rdtscp_enabled)
++			exec_control &= ~SECONDARY_EXEC_RDTSCP;
++
++		if (nested) {
++			if (rdtscp_enabled)
++				vmx->nested.msrs.secondary_ctls_high |=
++					SECONDARY_EXEC_RDTSCP;
++			else
++				vmx->nested.msrs.secondary_ctls_high &=
++					~SECONDARY_EXEC_RDTSCP;
++		}
++	}
++
++	if (vmx_invpcid_supported()) {
++		/* Exposing INVPCID only when PCID is exposed */
++		bool invpcid_enabled =
++			guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
++			guest_cpuid_has(vcpu, X86_FEATURE_PCID);
++
++		if (!invpcid_enabled) {
++			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
++			guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
++		}
++
++		if (nested) {
++			if (invpcid_enabled)
++				vmx->nested.msrs.secondary_ctls_high |=
++					SECONDARY_EXEC_ENABLE_INVPCID;
++			else
++				vmx->nested.msrs.secondary_ctls_high &=
++					~SECONDARY_EXEC_ENABLE_INVPCID;
++		}
++	}
++
++	if (vmx_rdrand_supported()) {
++		bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
++		if (rdrand_enabled)
++			exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
++
++		if (nested) {
++			if (rdrand_enabled)
++				vmx->nested.msrs.secondary_ctls_high |=
++					SECONDARY_EXEC_RDRAND_EXITING;
++			else
++				vmx->nested.msrs.secondary_ctls_high &=
++					~SECONDARY_EXEC_RDRAND_EXITING;
++		}
++	}
++
++	if (vmx_rdseed_supported()) {
++		bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
++		if (rdseed_enabled)
++			exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
++
++		if (nested) {
++			if (rdseed_enabled)
++				vmx->nested.msrs.secondary_ctls_high |=
++					SECONDARY_EXEC_RDSEED_EXITING;
++			else
++				vmx->nested.msrs.secondary_ctls_high &=
++					~SECONDARY_EXEC_RDSEED_EXITING;
++		}
++	}
++
++	vmx->secondary_exec_control = exec_control;
++}
++
++static void ept_set_mmio_spte_mask(void)
++{
++	/*
++	 * EPT Misconfigurations can be generated if the value of bits 2:0
++	 * of an EPT paging-structure entry is 110b (write/execute).
++	 */
++	kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
++				   VMX_EPT_MISCONFIG_WX_VALUE);
++}
++
++#define VMX_XSS_EXIT_BITMAP 0
++/*
++ * Sets up the vmcs for emulated real mode.
++ */
++static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++{
++	int i;
++
++	if (enable_shadow_vmcs) {
++		/*
++		 * At vCPU creation, "VMWRITE to any supported field
++		 * in the VMCS" is supported, so use the more
++		 * permissive vmx_vmread_bitmap to specify both read
++		 * and write permissions for the shadow VMCS.
++		 */
++		vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
++		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
++	}
++	if (cpu_has_vmx_msr_bitmap())
++		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
++
++	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
++
++	/* Control */
++	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
++	vmx->hv_deadline_tsc = -1;
++
++	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
++
++	if (cpu_has_secondary_exec_ctrls()) {
++		vmx_compute_secondary_exec_control(vmx);
++		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
++			     vmx->secondary_exec_control);
++	}
++
++	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
++		vmcs_write64(EOI_EXIT_BITMAP0, 0);
++		vmcs_write64(EOI_EXIT_BITMAP1, 0);
++		vmcs_write64(EOI_EXIT_BITMAP2, 0);
++		vmcs_write64(EOI_EXIT_BITMAP3, 0);
++
++		vmcs_write16(GUEST_INTR_STATUS, 0);
++
++		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
++		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
++	}
++
++	if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
++		vmcs_write32(PLE_GAP, ple_gap);
++		vmx->ple_window = ple_window;
++		vmx->ple_window_dirty = true;
++	}
++
++	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
++	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
++	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
++
++	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
++	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
++	vmx_set_constant_host_state(vmx);
++	vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
++	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
++
++	if (cpu_has_vmx_vmfunc())
++		vmcs_write64(VM_FUNCTION_CONTROL, 0);
++
++	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
++	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
++	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
++	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
++	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
++
++	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
++		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
++
++	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
++		u32 index = vmx_msr_index[i];
++		u32 data_low, data_high;
++		int j = vmx->nmsrs;
++
++		if (rdmsr_safe(index, &data_low, &data_high) < 0)
++			continue;
++		if (wrmsr_safe(index, data_low, data_high) < 0)
++			continue;
++		vmx->guest_msrs[j].index = i;
++		vmx->guest_msrs[j].data = 0;
++		vmx->guest_msrs[j].mask = -1ull;
++		++vmx->nmsrs;
++	}
++
++	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
++
++	/* 22.2.1, 20.8.1 */
++	vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
++
++	vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
++	vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
++
++	set_cr4_guest_host_mask(vmx);
++
++	if (vmx_xsaves_supported())
++		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
++
++	if (enable_pml) {
++		ASSERT(vmx->pml_pg);
++		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
++		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
++	}
++
++	if (cpu_has_vmx_encls_vmexit())
++		vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
++}
++
++static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct msr_data apic_base_msr;
++	u64 cr0;
++
++	vmx->rmode.vm86_active = 0;
++	vmx->spec_ctrl = 0;
++
++	vcpu->arch.microcode_version = 0x100000000ULL;
++	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
++	kvm_set_cr8(vcpu, 0);
++
++	if (!init_event) {
++		apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
++				     MSR_IA32_APICBASE_ENABLE;
++		if (kvm_vcpu_is_reset_bsp(vcpu))
++			apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
++		apic_base_msr.host_initiated = true;
++		kvm_set_apic_base(vcpu, &apic_base_msr);
++	}
++
++	vmx_segment_cache_clear(vmx);
++
++	seg_setup(VCPU_SREG_CS);
++	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
++	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
++
++	seg_setup(VCPU_SREG_DS);
++	seg_setup(VCPU_SREG_ES);
++	seg_setup(VCPU_SREG_FS);
++	seg_setup(VCPU_SREG_GS);
++	seg_setup(VCPU_SREG_SS);
++
++	vmcs_write16(GUEST_TR_SELECTOR, 0);
++	vmcs_writel(GUEST_TR_BASE, 0);
++	vmcs_write32(GUEST_TR_LIMIT, 0xffff);
++	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
++
++	vmcs_write16(GUEST_LDTR_SELECTOR, 0);
++	vmcs_writel(GUEST_LDTR_BASE, 0);
++	vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
++	vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
++
++	if (!init_event) {
++		vmcs_write32(GUEST_SYSENTER_CS, 0);
++		vmcs_writel(GUEST_SYSENTER_ESP, 0);
++		vmcs_writel(GUEST_SYSENTER_EIP, 0);
++		vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
++	}
++
++	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
++	kvm_rip_write(vcpu, 0xfff0);
++
++	vmcs_writel(GUEST_GDTR_BASE, 0);
++	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
++
++	vmcs_writel(GUEST_IDTR_BASE, 0);
++	vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
++
++	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
++	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
++	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
++	if (kvm_mpx_supported())
++		vmcs_write64(GUEST_BNDCFGS, 0);
++
++	setup_msrs(vmx);
++
++	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
++
++	if (cpu_has_vmx_tpr_shadow() && !init_event) {
++		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
++		if (cpu_need_tpr_shadow(vcpu))
++			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
++				     __pa(vcpu->arch.apic->regs));
++		vmcs_write32(TPR_THRESHOLD, 0);
++	}
++
++	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
++
++	if (vmx->vpid != 0)
++		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
++
++	cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
++	vmx->vcpu.arch.cr0 = cr0;
++	vmx_set_cr0(vcpu, cr0); /* enter rmode */
++	vmx_set_cr4(vcpu, 0);
++	vmx_set_efer(vcpu, 0);
++
++	update_exception_bitmap(vcpu);
++
++	vpid_sync_context(vmx->vpid);
++	if (init_event)
++		vmx_clear_hlt(vcpu);
++}
++
++/*
++ * In nested virtualization, check if L1 asked to exit on external interrupts.
++ * For most existing hypervisors, this will always return true.
++ */
++static bool nested_exit_on_intr(struct kvm_vcpu *vcpu)
++{
++	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
++		PIN_BASED_EXT_INTR_MASK;
++}
++
++/*
++ * In nested virtualization, check if L1 has set
++ * VM_EXIT_ACK_INTR_ON_EXIT
++ */
++static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
++{
++	return get_vmcs12(vcpu)->vm_exit_controls &
++		VM_EXIT_ACK_INTR_ON_EXIT;
++}
++
++static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
++{
++	return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
++}
++
++static void enable_irq_window(struct kvm_vcpu *vcpu)
++{
++	vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
++		      CPU_BASED_VIRTUAL_INTR_PENDING);
++}
++
++static void enable_nmi_window(struct kvm_vcpu *vcpu)
++{
++	if (!enable_vnmi ||
++	    vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
++		enable_irq_window(vcpu);
++		return;
++	}
++
++	vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
++		      CPU_BASED_VIRTUAL_NMI_PENDING);
++}
++
++static void vmx_inject_irq(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	uint32_t intr;
++	int irq = vcpu->arch.interrupt.nr;
++
++	trace_kvm_inj_virq(irq);
++
++	++vcpu->stat.irq_injections;
++	if (vmx->rmode.vm86_active) {
++		int inc_eip = 0;
++		if (vcpu->arch.interrupt.soft)
++			inc_eip = vcpu->arch.event_exit_inst_len;
++		if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
++			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++		return;
++	}
++	intr = irq | INTR_INFO_VALID_MASK;
++	if (vcpu->arch.interrupt.soft) {
++		intr |= INTR_TYPE_SOFT_INTR;
++		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
++			     vmx->vcpu.arch.event_exit_inst_len);
++	} else
++		intr |= INTR_TYPE_EXT_INTR;
++	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
++
++	vmx_clear_hlt(vcpu);
++}
++
++static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (!enable_vnmi) {
++		/*
++		 * Tracking the NMI-blocked state in software is built upon
++		 * finding the next open IRQ window. This, in turn, depends on
++		 * well-behaving guests: They have to keep IRQs disabled at
++		 * least as long as the NMI handler runs. Otherwise we may
++		 * cause NMI nesting, maybe breaking the guest. But as this is
++		 * highly unlikely, we can live with the residual risk.
++		 */
++		vmx->loaded_vmcs->soft_vnmi_blocked = 1;
++		vmx->loaded_vmcs->vnmi_blocked_time = 0;
++	}
++
++	++vcpu->stat.nmi_injections;
++	vmx->loaded_vmcs->nmi_known_unmasked = false;
++
++	if (vmx->rmode.vm86_active) {
++		if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
++			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++		return;
++	}
++
++	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
++			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
++
++	vmx_clear_hlt(vcpu);
++}
++
++static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	bool masked;
++
++	if (!enable_vnmi)
++		return vmx->loaded_vmcs->soft_vnmi_blocked;
++	if (vmx->loaded_vmcs->nmi_known_unmasked)
++		return false;
++	masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
++	vmx->loaded_vmcs->nmi_known_unmasked = !masked;
++	return masked;
++}
++
++static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (!enable_vnmi) {
++		if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
++			vmx->loaded_vmcs->soft_vnmi_blocked = masked;
++			vmx->loaded_vmcs->vnmi_blocked_time = 0;
++		}
++	} else {
++		vmx->loaded_vmcs->nmi_known_unmasked = !masked;
++		if (masked)
++			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
++				      GUEST_INTR_STATE_NMI);
++		else
++			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
++					GUEST_INTR_STATE_NMI);
++	}
++}
++
++static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
++{
++	if (to_vmx(vcpu)->nested.nested_run_pending)
++		return 0;
++
++	if (!enable_vnmi &&
++	    to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
++		return 0;
++
++	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
++		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
++		   | GUEST_INTR_STATE_NMI));
++}
++
++static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
++{
++	return (!to_vmx(vcpu)->nested.nested_run_pending &&
++		vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
++		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
++			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
++}
++
++static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
++{
++	int ret;
++
++	if (enable_unrestricted_guest)
++		return 0;
++
++	ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
++				    PAGE_SIZE * 3);
++	if (ret)
++		return ret;
++	to_kvm_vmx(kvm)->tss_addr = addr;
++	return init_rmode_tss(kvm);
++}
++
++static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
++{
++	to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
++	return 0;
++}
++
++static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
++{
++	switch (vec) {
++	case BP_VECTOR:
++		/*
++		 * Update instruction length as we may reinject the exception
++		 * from user space while in guest debugging mode.
++		 */
++		to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
++			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
++		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
++			return false;
++		/* fall through */
++	case DB_VECTOR:
++		if (vcpu->guest_debug &
++			(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
++			return false;
++		/* fall through */
++	case DE_VECTOR:
++	case OF_VECTOR:
++	case BR_VECTOR:
++	case UD_VECTOR:
++	case DF_VECTOR:
++	case SS_VECTOR:
++	case GP_VECTOR:
++	case MF_VECTOR:
++		return true;
++	break;
++	}
++	return false;
++}
++
++static int handle_rmode_exception(struct kvm_vcpu *vcpu,
++				  int vec, u32 err_code)
++{
++	/*
++	 * Instruction with address size override prefix opcode 0x67
++	 * Cause the #SS fault with 0 error code in VM86 mode.
++	 */
++	if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
++		if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
++			if (vcpu->arch.halt_request) {
++				vcpu->arch.halt_request = 0;
++				return kvm_vcpu_halt(vcpu);
++			}
++			return 1;
++		}
++		return 0;
++	}
++
++	/*
++	 * Forward all other exceptions that are valid in real mode.
++	 * FIXME: Breaks guest debugging in real mode, needs to be fixed with
++	 *        the required debugging infrastructure rework.
++	 */
++	kvm_queue_exception(vcpu, vec);
++	return 1;
++}
++
++/*
++ * Trigger machine check on the host. We assume all the MSRs are already set up
++ * by the CPU and that we still run on the same CPU as the MCE occurred on.
++ * We pass a fake environment to the machine check handler because we want
++ * the guest to be always treated like user space, no matter what context
++ * it used internally.
++ */
++static void kvm_machine_check(void)
++{
++#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
++	struct pt_regs regs = {
++		.cs = 3, /* Fake ring 3 no matter what the guest ran on */
++		.flags = X86_EFLAGS_IF,
++	};
++
++	do_machine_check(&regs, 0);
++#endif
++}
++
++static int handle_machine_check(struct kvm_vcpu *vcpu)
++{
++	/* already handled by vcpu_run */
++	return 1;
++}
++
++static int handle_exception(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct kvm_run *kvm_run = vcpu->run;
++	u32 intr_info, ex_no, error_code;
++	unsigned long cr2, rip, dr6;
++	u32 vect_info;
++	enum emulation_result er;
++
++	vect_info = vmx->idt_vectoring_info;
++	intr_info = vmx->exit_intr_info;
++
++	if (is_machine_check(intr_info))
++		return handle_machine_check(vcpu);
++
++	if (is_nmi(intr_info))
++		return 1;  /* already handled by vmx_vcpu_run() */
++
++	if (is_invalid_opcode(intr_info))
++		return handle_ud(vcpu);
++
++	error_code = 0;
++	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
++		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
++
++	if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
++		WARN_ON_ONCE(!enable_vmware_backdoor);
++		er = kvm_emulate_instruction(vcpu,
++			EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
++		if (er == EMULATE_USER_EXIT)
++			return 0;
++		else if (er != EMULATE_DONE)
++			kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
++		return 1;
++	}
++
++	/*
++	 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
++	 * MMIO, it is better to report an internal error.
++	 * See the comments in vmx_handle_exit.
++	 */
++	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
++	    !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
++		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
++		vcpu->run->internal.ndata = 3;
++		vcpu->run->internal.data[0] = vect_info;
++		vcpu->run->internal.data[1] = intr_info;
++		vcpu->run->internal.data[2] = error_code;
++		return 0;
++	}
++
++	if (is_page_fault(intr_info)) {
++		cr2 = vmcs_readl(EXIT_QUALIFICATION);
++		/* EPT won't cause page fault directly */
++		WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
++		return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
++	}
++
++	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
++
++	if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
++		return handle_rmode_exception(vcpu, ex_no, error_code);
++
++	switch (ex_no) {
++	case AC_VECTOR:
++		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
++		return 1;
++	case DB_VECTOR:
++		dr6 = vmcs_readl(EXIT_QUALIFICATION);
++		if (!(vcpu->guest_debug &
++		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
++			vcpu->arch.dr6 &= ~15;
++			vcpu->arch.dr6 |= dr6 | DR6_RTM;
++			if (is_icebp(intr_info))
++				skip_emulated_instruction(vcpu);
++
++			kvm_queue_exception(vcpu, DB_VECTOR);
++			return 1;
++		}
++		kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
++		kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
++		/* fall through */
++	case BP_VECTOR:
++		/*
++		 * Update instruction length as we may reinject #BP from
++		 * user space while in guest debugging mode. Reading it for
++		 * #DB as well causes no harm, it is not used in that case.
++		 */
++		vmx->vcpu.arch.event_exit_inst_len =
++			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
++		kvm_run->exit_reason = KVM_EXIT_DEBUG;
++		rip = kvm_rip_read(vcpu);
++		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
++		kvm_run->debug.arch.exception = ex_no;
++		break;
++	default:
++		kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
++		kvm_run->ex.exception = ex_no;
++		kvm_run->ex.error_code = error_code;
++		break;
++	}
++	return 0;
++}
++
++static int handle_external_interrupt(struct kvm_vcpu *vcpu)
++{
++	++vcpu->stat.irq_exits;
++	return 1;
++}
++
++static int handle_triple_fault(struct kvm_vcpu *vcpu)
++{
++	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
++	vcpu->mmio_needed = 0;
++	return 0;
++}
++
++static int handle_io(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification;
++	int size, in, string;
++	unsigned port;
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	string = (exit_qualification & 16) != 0;
++
++	++vcpu->stat.io_exits;
++
++	if (string)
++		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
++
++	port = exit_qualification >> 16;
++	size = (exit_qualification & 7) + 1;
++	in = (exit_qualification & 8) != 0;
++
++	return kvm_fast_pio(vcpu, size, port, in);
++}
++
++static void
++vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
++{
++	/*
++	 * Patch in the VMCALL instruction:
++	 */
++	hypercall[0] = 0x0f;
++	hypercall[1] = 0x01;
++	hypercall[2] = 0xc1;
++}
++
++/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
++static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	if (is_guest_mode(vcpu)) {
++		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++		unsigned long orig_val = val;
++
++		/*
++		 * We get here when L2 changed cr0 in a way that did not change
++		 * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
++		 * but did change L0 shadowed bits. So we first calculate the
++		 * effective cr0 value that L1 would like to write into the
++		 * hardware. It consists of the L2-owned bits from the new
++		 * value combined with the L1-owned bits from L1's guest_cr0.
++		 */
++		val = (val & ~vmcs12->cr0_guest_host_mask) |
++			(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
++
++		if (!nested_guest_cr0_valid(vcpu, val))
++			return 1;
++
++		if (kvm_set_cr0(vcpu, val))
++			return 1;
++		vmcs_writel(CR0_READ_SHADOW, orig_val);
++		return 0;
++	} else {
++		if (to_vmx(vcpu)->nested.vmxon &&
++		    !nested_host_cr0_valid(vcpu, val))
++			return 1;
++
++		return kvm_set_cr0(vcpu, val);
++	}
++}
++
++static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	if (is_guest_mode(vcpu)) {
++		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++		unsigned long orig_val = val;
++
++		/* analogously to handle_set_cr0 */
++		val = (val & ~vmcs12->cr4_guest_host_mask) |
++			(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
++		if (kvm_set_cr4(vcpu, val))
++			return 1;
++		vmcs_writel(CR4_READ_SHADOW, orig_val);
++		return 0;
++	} else
++		return kvm_set_cr4(vcpu, val);
++}
++
++static int handle_desc(struct kvm_vcpu *vcpu)
++{
++	WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
++	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
++}
++
++static int handle_cr(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification, val;
++	int cr;
++	int reg;
++	int err;
++	int ret;
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	cr = exit_qualification & 15;
++	reg = (exit_qualification >> 8) & 15;
++	switch ((exit_qualification >> 4) & 3) {
++	case 0: /* mov to cr */
++		val = kvm_register_readl(vcpu, reg);
++		trace_kvm_cr_write(cr, val);
++		switch (cr) {
++		case 0:
++			err = handle_set_cr0(vcpu, val);
++			return kvm_complete_insn_gp(vcpu, err);
++		case 3:
++			WARN_ON_ONCE(enable_unrestricted_guest);
++			err = kvm_set_cr3(vcpu, val);
++			return kvm_complete_insn_gp(vcpu, err);
++		case 4:
++			err = handle_set_cr4(vcpu, val);
++			return kvm_complete_insn_gp(vcpu, err);
++		case 8: {
++				u8 cr8_prev = kvm_get_cr8(vcpu);
++				u8 cr8 = (u8)val;
++				err = kvm_set_cr8(vcpu, cr8);
++				ret = kvm_complete_insn_gp(vcpu, err);
++				if (lapic_in_kernel(vcpu))
++					return ret;
++				if (cr8_prev <= cr8)
++					return ret;
++				/*
++				 * TODO: we might be squashing a
++				 * KVM_GUESTDBG_SINGLESTEP-triggered
++				 * KVM_EXIT_DEBUG here.
++				 */
++				vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
++				return 0;
++			}
++		}
++		break;
++	case 2: /* clts */
++		WARN_ONCE(1, "Guest should always own CR0.TS");
++		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
++		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
++		return kvm_skip_emulated_instruction(vcpu);
++	case 1: /*mov from cr*/
++		switch (cr) {
++		case 3:
++			WARN_ON_ONCE(enable_unrestricted_guest);
++			val = kvm_read_cr3(vcpu);
++			kvm_register_write(vcpu, reg, val);
++			trace_kvm_cr_read(cr, val);
++			return kvm_skip_emulated_instruction(vcpu);
++		case 8:
++			val = kvm_get_cr8(vcpu);
++			kvm_register_write(vcpu, reg, val);
++			trace_kvm_cr_read(cr, val);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		break;
++	case 3: /* lmsw */
++		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
++		trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
++		kvm_lmsw(vcpu, val);
++
++		return kvm_skip_emulated_instruction(vcpu);
++	default:
++		break;
++	}
++	vcpu->run->exit_reason = 0;
++	vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
++	       (int)(exit_qualification >> 4) & 3, cr);
++	return 0;
++}
++
++static int handle_dr(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification;
++	int dr, dr7, reg;
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
++
++	/* First, if DR does not exist, trigger UD */
++	if (!kvm_require_dr(vcpu, dr))
++		return 1;
++
++	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
++	if (!kvm_require_cpl(vcpu, 0))
++		return 1;
++	dr7 = vmcs_readl(GUEST_DR7);
++	if (dr7 & DR7_GD) {
++		/*
++		 * As the vm-exit takes precedence over the debug trap, we
++		 * need to emulate the latter, either for the host or the
++		 * guest debugging itself.
++		 */
++		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
++			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
++			vcpu->run->debug.arch.dr7 = dr7;
++			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
++			vcpu->run->debug.arch.exception = DB_VECTOR;
++			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
++			return 0;
++		} else {
++			vcpu->arch.dr6 &= ~15;
++			vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
++			kvm_queue_exception(vcpu, DB_VECTOR);
++			return 1;
++		}
++	}
++
++	if (vcpu->guest_debug == 0) {
++		vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
++				CPU_BASED_MOV_DR_EXITING);
++
++		/*
++		 * No more DR vmexits; force a reload of the debug registers
++		 * and reenter on this instruction.  The next vmexit will
++		 * retrieve the full state of the debug registers.
++		 */
++		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
++		return 1;
++	}
++
++	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
++	if (exit_qualification & TYPE_MOV_FROM_DR) {
++		unsigned long val;
++
++		if (kvm_get_dr(vcpu, dr, &val))
++			return 1;
++		kvm_register_write(vcpu, reg, val);
++	} else
++		if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
++			return 1;
++
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
++{
++	return vcpu->arch.dr6;
++}
++
++static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
++{
++}
++
++static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
++{
++	get_debugreg(vcpu->arch.db[0], 0);
++	get_debugreg(vcpu->arch.db[1], 1);
++	get_debugreg(vcpu->arch.db[2], 2);
++	get_debugreg(vcpu->arch.db[3], 3);
++	get_debugreg(vcpu->arch.dr6, 6);
++	vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
++
++	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
++	vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
++}
++
++static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	vmcs_writel(GUEST_DR7, val);
++}
++
++static int handle_cpuid(struct kvm_vcpu *vcpu)
++{
++	return kvm_emulate_cpuid(vcpu);
++}
++
++static int handle_rdmsr(struct kvm_vcpu *vcpu)
++{
++	u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
++	struct msr_data msr_info;
++
++	msr_info.index = ecx;
++	msr_info.host_initiated = false;
++	if (vmx_get_msr(vcpu, &msr_info)) {
++		trace_kvm_msr_read_ex(ecx);
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	trace_kvm_msr_read(ecx, msr_info.data);
++
++	/* FIXME: handling of bits 32:63 of rax, rdx */
++	vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
++	vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_wrmsr(struct kvm_vcpu *vcpu)
++{
++	struct msr_data msr;
++	u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
++	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
++		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
++
++	msr.data = data;
++	msr.index = ecx;
++	msr.host_initiated = false;
++	if (kvm_set_msr(vcpu, &msr) != 0) {
++		trace_kvm_msr_write_ex(ecx, data);
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	trace_kvm_msr_write(ecx, data);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
++{
++	kvm_apic_update_ppr(vcpu);
++	return 1;
++}
++
++static int handle_interrupt_window(struct kvm_vcpu *vcpu)
++{
++	vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
++			CPU_BASED_VIRTUAL_INTR_PENDING);
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	++vcpu->stat.irq_window_exits;
++	return 1;
++}
++
++static int handle_halt(struct kvm_vcpu *vcpu)
++{
++	return kvm_emulate_halt(vcpu);
++}
++
++static int handle_vmcall(struct kvm_vcpu *vcpu)
++{
++	return kvm_emulate_hypercall(vcpu);
++}
++
++static int handle_invd(struct kvm_vcpu *vcpu)
++{
++	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
++}
++
++static int handle_invlpg(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++
++	kvm_mmu_invlpg(vcpu, exit_qualification);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_rdpmc(struct kvm_vcpu *vcpu)
++{
++	int err;
++
++	err = kvm_rdpmc(vcpu);
++	return kvm_complete_insn_gp(vcpu, err);
++}
++
++static int handle_wbinvd(struct kvm_vcpu *vcpu)
++{
++	return kvm_emulate_wbinvd(vcpu);
++}
++
++static int handle_xsetbv(struct kvm_vcpu *vcpu)
++{
++	u64 new_bv = kvm_read_edx_eax(vcpu);
++	u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
++
++	if (kvm_set_xcr(vcpu, index, new_bv) == 0)
++		return kvm_skip_emulated_instruction(vcpu);
++	return 1;
++}
++
++static int handle_xsaves(struct kvm_vcpu *vcpu)
++{
++	kvm_skip_emulated_instruction(vcpu);
++	WARN(1, "this should never happen\n");
++	return 1;
++}
++
++static int handle_xrstors(struct kvm_vcpu *vcpu)
++{
++	kvm_skip_emulated_instruction(vcpu);
++	WARN(1, "this should never happen\n");
++	return 1;
++}
++
++static int handle_apic_access(struct kvm_vcpu *vcpu)
++{
++	if (likely(fasteoi)) {
++		unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++		int access_type, offset;
++
++		access_type = exit_qualification & APIC_ACCESS_TYPE;
++		offset = exit_qualification & APIC_ACCESS_OFFSET;
++		/*
++		 * Sane guest uses MOV to write EOI, with written value
++		 * not cared. So make a short-circuit here by avoiding
++		 * heavy instruction emulation.
++		 */
++		if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
++		    (offset == APIC_EOI)) {
++			kvm_lapic_set_eoi(vcpu);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++	}
++	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
++}
++
++static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	int vector = exit_qualification & 0xff;
++
++	/* EOI-induced VM exit is trap-like and thus no need to adjust IP */
++	kvm_apic_set_eoi_accelerated(vcpu, vector);
++	return 1;
++}
++
++static int handle_apic_write(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	u32 offset = exit_qualification & 0xfff;
++
++	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
++	kvm_apic_write_nodecode(vcpu, offset);
++	return 1;
++}
++
++static int handle_task_switch(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long exit_qualification;
++	bool has_error_code = false;
++	u32 error_code = 0;
++	u16 tss_selector;
++	int reason, type, idt_v, idt_index;
++
++	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
++	idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
++	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++
++	reason = (u32)exit_qualification >> 30;
++	if (reason == TASK_SWITCH_GATE && idt_v) {
++		switch (type) {
++		case INTR_TYPE_NMI_INTR:
++			vcpu->arch.nmi_injected = false;
++			vmx_set_nmi_mask(vcpu, true);
++			break;
++		case INTR_TYPE_EXT_INTR:
++		case INTR_TYPE_SOFT_INTR:
++			kvm_clear_interrupt_queue(vcpu);
++			break;
++		case INTR_TYPE_HARD_EXCEPTION:
++			if (vmx->idt_vectoring_info &
++			    VECTORING_INFO_DELIVER_CODE_MASK) {
++				has_error_code = true;
++				error_code =
++					vmcs_read32(IDT_VECTORING_ERROR_CODE);
++			}
++			/* fall through */
++		case INTR_TYPE_SOFT_EXCEPTION:
++			kvm_clear_exception_queue(vcpu);
++			break;
++		default:
++			break;
++		}
++	}
++	tss_selector = exit_qualification;
++
++	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
++		       type != INTR_TYPE_EXT_INTR &&
++		       type != INTR_TYPE_NMI_INTR))
++		skip_emulated_instruction(vcpu);
++
++	if (kvm_task_switch(vcpu, tss_selector,
++			    type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
++			    has_error_code, error_code) == EMULATE_FAIL) {
++		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
++		vcpu->run->internal.ndata = 0;
++		return 0;
++	}
++
++	/*
++	 * TODO: What about debug traps on tss switch?
++	 *       Are we supposed to inject them and update dr6?
++	 */
++
++	return 1;
++}
++
++static int handle_ept_violation(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification;
++	gpa_t gpa;
++	u64 error_code;
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++
++	/*
++	 * EPT violation happened while executing iret from NMI,
++	 * "blocked by NMI" bit has to be set before next VM entry.
++	 * There are errata that may cause this bit to not be set:
++	 * AAK134, BY25.
++	 */
++	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
++			enable_vnmi &&
++			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
++		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
++
++	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
++	trace_kvm_page_fault(gpa, exit_qualification);
++
++	/* Is it a read fault? */
++	error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
++		     ? PFERR_USER_MASK : 0;
++	/* Is it a write fault? */
++	error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
++		      ? PFERR_WRITE_MASK : 0;
++	/* Is it a fetch fault? */
++	error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
++		      ? PFERR_FETCH_MASK : 0;
++	/* ept page table entry is present? */
++	error_code |= (exit_qualification &
++		       (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
++			EPT_VIOLATION_EXECUTABLE))
++		      ? PFERR_PRESENT_MASK : 0;
++
++	error_code |= (exit_qualification & 0x100) != 0 ?
++	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
++
++	vcpu->arch.exit_qualification = exit_qualification;
++	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
++}
++
++static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
++{
++	gpa_t gpa;
++
++	/*
++	 * A nested guest cannot optimize MMIO vmexits, because we have an
++	 * nGPA here instead of the required GPA.
++	 */
++	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
++	if (!is_guest_mode(vcpu) &&
++	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
++		trace_kvm_fast_mmio(gpa);
++		/*
++		 * Doing kvm_skip_emulated_instruction() depends on undefined
++		 * behavior: Intel's manual doesn't mandate
++		 * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
++		 * occurs and while on real hardware it was observed to be set,
++		 * other hypervisors (namely Hyper-V) don't set it, we end up
++		 * advancing IP with some random value. Disable fast mmio when
++		 * running nested and keep it for real hardware in hope that
++		 * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
++		 */
++		if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
++			return kvm_skip_emulated_instruction(vcpu);
++		else
++			return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
++								EMULATE_DONE;
++	}
++
++	return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
++}
++
++static int handle_nmi_window(struct kvm_vcpu *vcpu)
++{
++	WARN_ON_ONCE(!enable_vnmi);
++	vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
++			CPU_BASED_VIRTUAL_NMI_PENDING);
++	++vcpu->stat.nmi_window_exits;
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	return 1;
++}
++
++static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	enum emulation_result err = EMULATE_DONE;
++	int ret = 1;
++	u32 cpu_exec_ctrl;
++	bool intr_window_requested;
++	unsigned count = 130;
++
++	/*
++	 * We should never reach the point where we are emulating L2
++	 * due to invalid guest state as that means we incorrectly
++	 * allowed a nested VMEntry with an invalid vmcs12.
++	 */
++	WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
++
++	cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
++	intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
++
++	while (vmx->emulation_required && count-- != 0) {
++		if (intr_window_requested && vmx_interrupt_allowed(vcpu))
++			return handle_interrupt_window(&vmx->vcpu);
++
++		if (kvm_test_request(KVM_REQ_EVENT, vcpu))
++			return 1;
++
++		err = kvm_emulate_instruction(vcpu, 0);
++
++		if (err == EMULATE_USER_EXIT) {
++			++vcpu->stat.mmio_exits;
++			ret = 0;
++			goto out;
++		}
++
++		if (err != EMULATE_DONE)
++			goto emulation_error;
++
++		if (vmx->emulation_required && !vmx->rmode.vm86_active &&
++		    vcpu->arch.exception.pending)
++			goto emulation_error;
++
++		if (vcpu->arch.halt_request) {
++			vcpu->arch.halt_request = 0;
++			ret = kvm_vcpu_halt(vcpu);
++			goto out;
++		}
++
++		if (signal_pending(current))
++			goto out;
++		if (need_resched())
++			schedule();
++	}
++
++out:
++	return ret;
++
++emulation_error:
++	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
++	vcpu->run->internal.ndata = 0;
++	return 0;
++}
++
++static void grow_ple_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int old = vmx->ple_window;
++
++	vmx->ple_window = __grow_ple_window(old, ple_window,
++					    ple_window_grow,
++					    ple_window_max);
++
++	if (vmx->ple_window != old)
++		vmx->ple_window_dirty = true;
++
++	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
++}
++
++static void shrink_ple_window(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int old = vmx->ple_window;
++
++	vmx->ple_window = __shrink_ple_window(old, ple_window,
++					      ple_window_shrink,
++					      ple_window);
++
++	if (vmx->ple_window != old)
++		vmx->ple_window_dirty = true;
++
++	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
++}
++
++/*
++ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
++ */
++static void wakeup_handler(void)
++{
++	struct kvm_vcpu *vcpu;
++	int cpu = smp_processor_id();
++
++	spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++	list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
++			blocked_vcpu_list) {
++		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++
++		if (pi_test_on(pi_desc) == 1)
++			kvm_vcpu_kick(vcpu);
++	}
++	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
++}
++
++static void vmx_enable_tdp(void)
++{
++	kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
++		enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
++		enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
++		0ull, VMX_EPT_EXECUTABLE_MASK,
++		cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
++		VMX_EPT_RWX_MASK, 0ull);
++
++	ept_set_mmio_spte_mask();
++	kvm_enable_tdp();
++}
++
++static __init int hardware_setup(void)
++{
++	unsigned long host_bndcfgs;
++	int r = -ENOMEM, i;
++
++	rdmsrl_safe(MSR_EFER, &host_efer);
++
++	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
++		kvm_define_shared_msr(i, vmx_msr_index[i]);
++
++	for (i = 0; i < VMX_BITMAP_NR; i++) {
++		vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
++		if (!vmx_bitmap[i])
++			goto out;
++	}
++
++	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
++	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
++
++	if (setup_vmcs_config(&vmcs_config) < 0) {
++		r = -EIO;
++		goto out;
++	}
++
++	if (boot_cpu_has(X86_FEATURE_NX))
++		kvm_enable_efer_bits(EFER_NX);
++
++	if (boot_cpu_has(X86_FEATURE_MPX)) {
++		rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
++		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
++	}
++
++	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
++		!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
++		enable_vpid = 0;
++
++	if (!cpu_has_vmx_ept() ||
++	    !cpu_has_vmx_ept_4levels() ||
++	    !cpu_has_vmx_ept_mt_wb() ||
++	    !cpu_has_vmx_invept_global())
++		enable_ept = 0;
++
++	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
++		enable_ept_ad_bits = 0;
++
++	if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
++		enable_unrestricted_guest = 0;
++
++	if (!cpu_has_vmx_flexpriority())
++		flexpriority_enabled = 0;
++
++	if (!cpu_has_virtual_nmis())
++		enable_vnmi = 0;
++
++	/*
++	 * set_apic_access_page_addr() is used to reload apic access
++	 * page upon invalidation.  No need to do anything if not
++	 * using the APIC_ACCESS_ADDR VMCS field.
++	 */
++	if (!flexpriority_enabled)
++		kvm_x86_ops->set_apic_access_page_addr = NULL;
++
++	if (!cpu_has_vmx_tpr_shadow())
++		kvm_x86_ops->update_cr8_intercept = NULL;
++
++	if (enable_ept && !cpu_has_vmx_ept_2m_page())
++		kvm_disable_largepages();
++
++#if IS_ENABLED(CONFIG_HYPERV)
++	if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
++	    && enable_ept)
++		kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
++#endif
++
++	if (!cpu_has_vmx_ple()) {
++		ple_gap = 0;
++		ple_window = 0;
++		ple_window_grow = 0;
++		ple_window_max = 0;
++		ple_window_shrink = 0;
++	}
++
++	if (!cpu_has_vmx_apicv()) {
++		enable_apicv = 0;
++		kvm_x86_ops->sync_pir_to_irr = NULL;
++	}
++
++	if (cpu_has_vmx_tsc_scaling()) {
++		kvm_has_tsc_control = true;
++		kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
++		kvm_tsc_scaling_ratio_frac_bits = 48;
++	}
++
++	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
++
++	if (enable_ept)
++		vmx_enable_tdp();
++	else
++		kvm_disable_tdp();
++
++	if (!nested) {
++		kvm_x86_ops->get_nested_state = NULL;
++		kvm_x86_ops->set_nested_state = NULL;
++	}
++
++	/*
++	 * Only enable PML when hardware supports PML feature, and both EPT
++	 * and EPT A/D bit features are enabled -- PML depends on them to work.
++	 */
++	if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
++		enable_pml = 0;
++
++	if (!enable_pml) {
++		kvm_x86_ops->slot_enable_log_dirty = NULL;
++		kvm_x86_ops->slot_disable_log_dirty = NULL;
++		kvm_x86_ops->flush_log_dirty = NULL;
++		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
++	}
++
++	if (!cpu_has_vmx_preemption_timer())
++		kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
++
++	if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
++		u64 vmx_msr;
++
++		rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
++		cpu_preemption_timer_multi =
++			 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
++	} else {
++		kvm_x86_ops->set_hv_timer = NULL;
++		kvm_x86_ops->cancel_hv_timer = NULL;
++	}
++
++	if (!cpu_has_vmx_shadow_vmcs())
++		enable_shadow_vmcs = 0;
++	if (enable_shadow_vmcs)
++		init_vmcs_shadow_fields();
++
++	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
++	nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
++
++	kvm_mce_cap_supported |= MCG_LMCE_P;
++
++	r = alloc_kvm_area();
++	if (r)
++		goto out;
++	return 0;
++
++out:
++	for (i = 0; i < VMX_BITMAP_NR; i++)
++		free_page((unsigned long)vmx_bitmap[i]);
++
++	return r;
++}
++
++static __exit void hardware_unsetup(void)
++{
++	int i;
++
++	for (i = 0; i < VMX_BITMAP_NR; i++)
++		free_page((unsigned long)vmx_bitmap[i]);
++
++	free_kvm_area();
++}
++
++/*
++ * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
++ * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
++ */
++static int handle_pause(struct kvm_vcpu *vcpu)
++{
++	if (!kvm_pause_in_guest(vcpu->kvm))
++		grow_ple_window(vcpu);
++
++	/*
++	 * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
++	 * VM-execution control is ignored if CPL > 0. OTOH, KVM
++	 * never set PAUSE_EXITING and just set PLE if supported,
++	 * so the vcpu must be CPL=0 if it gets a PAUSE exit.
++	 */
++	kvm_vcpu_on_spin(vcpu, true);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_nop(struct kvm_vcpu *vcpu)
++{
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_mwait(struct kvm_vcpu *vcpu)
++{
++	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
++	return handle_nop(vcpu);
++}
++
++static int handle_invalid_op(struct kvm_vcpu *vcpu)
++{
++	kvm_queue_exception(vcpu, UD_VECTOR);
++	return 1;
++}
++
++static int handle_monitor_trap(struct kvm_vcpu *vcpu)
++{
++	return 1;
++}
++
++static int handle_monitor(struct kvm_vcpu *vcpu)
++{
++	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
++	return handle_nop(vcpu);
++}
++
++/*
++ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
++ * set the success or error code of an emulated VMX instruction, as specified
++ * by Vol 2B, VMX Instruction Reference, "Conventions".
++ */
++static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
++{
++	vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
++			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
++			    X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
++}
++
++static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
++{
++	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
++			& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
++			    X86_EFLAGS_SF | X86_EFLAGS_OF))
++			| X86_EFLAGS_CF);
++}
++
++static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
++					u32 vm_instruction_error)
++{
++	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
++		/*
++		 * failValid writes the error number to the current VMCS, which
++		 * can't be done there isn't a current VMCS.
++		 */
++		nested_vmx_failInvalid(vcpu);
++		return;
++	}
++	vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
++			& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
++			    X86_EFLAGS_SF | X86_EFLAGS_OF))
++			| X86_EFLAGS_ZF);
++	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
++	/*
++	 * We don't need to force a shadow sync because
++	 * VM_INSTRUCTION_ERROR is not shadowed
++	 */
++}
++
++static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
++{
++	/* TODO: not to reset guest simply here. */
++	kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++	pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
++}
++
++static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
++{
++	struct vcpu_vmx *vmx =
++		container_of(timer, struct vcpu_vmx, nested.preemption_timer);
++
++	vmx->nested.preemption_timer_expired = true;
++	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
++	kvm_vcpu_kick(&vmx->vcpu);
++
++	return HRTIMER_NORESTART;
++}
++
++/*
++ * Decode the memory-address operand of a vmx instruction, as recorded on an
++ * exit caused by such an instruction (run by a guest hypervisor).
++ * On success, returns 0. When the operand is invalid, returns 1 and throws
++ * #UD or #GP.
++ */
++static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
++				 unsigned long exit_qualification,
++				 u32 vmx_instruction_info, bool wr, gva_t *ret)
++{
++	gva_t off;
++	bool exn;
++	struct kvm_segment s;
++
++	/*
++	 * According to Vol. 3B, "Information for VM Exits Due to Instruction
++	 * Execution", on an exit, vmx_instruction_info holds most of the
++	 * addressing components of the operand. Only the displacement part
++	 * is put in exit_qualification (see 3B, "Basic VM-Exit Information").
++	 * For how an actual address is calculated from all these components,
++	 * refer to Vol. 1, "Operand Addressing".
++	 */
++	int  scaling = vmx_instruction_info & 3;
++	int  addr_size = (vmx_instruction_info >> 7) & 7;
++	bool is_reg = vmx_instruction_info & (1u << 10);
++	int  seg_reg = (vmx_instruction_info >> 15) & 7;
++	int  index_reg = (vmx_instruction_info >> 18) & 0xf;
++	bool index_is_valid = !(vmx_instruction_info & (1u << 22));
++	int  base_reg       = (vmx_instruction_info >> 23) & 0xf;
++	bool base_is_valid  = !(vmx_instruction_info & (1u << 27));
++
++	if (is_reg) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	/* Addr = segment_base + offset */
++	/* offset = base + [index * scale] + displacement */
++	off = exit_qualification; /* holds the displacement */
++	if (addr_size == 1)
++		off = (gva_t)sign_extend64(off, 31);
++	else if (addr_size == 0)
++		off = (gva_t)sign_extend64(off, 15);
++	if (base_is_valid)
++		off += kvm_register_read(vcpu, base_reg);
++	if (index_is_valid)
++		off += kvm_register_read(vcpu, index_reg)<<scaling;
++	vmx_get_segment(vcpu, &s, seg_reg);
++
++	/*
++	 * The effective address, i.e. @off, of a memory operand is truncated
++	 * based on the address size of the instruction.  Note that this is
++	 * the *effective address*, i.e. the address prior to accounting for
++	 * the segment's base.
++	 */
++	if (addr_size == 1) /* 32 bit */
++		off &= 0xffffffff;
++	else if (addr_size == 0) /* 16 bit */
++		off &= 0xffff;
++
++	/* Checks for #GP/#SS exceptions. */
++	exn = false;
++	if (is_long_mode(vcpu)) {
++		/*
++		 * The virtual/linear address is never truncated in 64-bit
++		 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
++		 * address when using FS/GS with a non-zero base.
++		 */
++		*ret = s.base + off;
++
++		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
++		 * non-canonical form. This is the only check on the memory
++		 * destination for long mode!
++		 */
++		exn = is_noncanonical_address(*ret, vcpu);
++	} else if (is_protmode(vcpu)) {
++		/*
++		 * When not in long mode, the virtual/linear address is
++		 * unconditionally truncated to 32 bits regardless of the
++		 * address size.
++		 */
++		*ret = (s.base + off) & 0xffffffff;
++
++		/* Protected mode: apply checks for segment validity in the
++		 * following order:
++		 * - segment type check (#GP(0) may be thrown)
++		 * - usability check (#GP(0)/#SS(0))
++		 * - limit check (#GP(0)/#SS(0))
++		 */
++		if (wr)
++			/* #GP(0) if the destination operand is located in a
++			 * read-only data segment or any code segment.
++			 */
++			exn = ((s.type & 0xa) == 0 || (s.type & 8));
++		else
++			/* #GP(0) if the source operand is located in an
++			 * execute-only code segment
++			 */
++			exn = ((s.type & 0xa) == 8);
++		if (exn) {
++			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
++			return 1;
++		}
++		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
++		 */
++		exn = (s.unusable != 0);
++
++		/*
++		 * Protected mode: #GP(0)/#SS(0) if the memory operand is
++		 * outside the segment limit.  All CPUs that support VMX ignore
++		 * limit checks for flat segments, i.e. segments with base==0,
++		 * limit==0xffffffff and of type expand-up data or code.
++		 */
++		if (!(s.base == 0 && s.limit == 0xffffffff &&
++		     ((s.type & 8) || !(s.type & 4))))
++			exn = exn || (off + sizeof(u64) > s.limit);
++	}
++	if (exn) {
++		kvm_queue_exception_e(vcpu,
++				      seg_reg == VCPU_SREG_SS ?
++						SS_VECTOR : GP_VECTOR,
++				      0);
++		return 1;
++	}
++
++	return 0;
++}
++
++static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
++{
++	gva_t gva;
++	struct x86_exception e;
++
++	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
++			vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
++		return 1;
++
++	if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
++		kvm_inject_page_fault(vcpu, &e);
++		return 1;
++	}
++
++	return 0;
++}
++
++/*
++ * Allocate a shadow VMCS and associate it with the currently loaded
++ * VMCS, unless such a shadow VMCS already exists. The newly allocated
++ * VMCS is also VMCLEARed, so that it is ready for use.
++ */
++static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
++
++	/*
++	 * We should allocate a shadow vmcs for vmcs01 only when L1
++	 * executes VMXON and free it when L1 executes VMXOFF.
++	 * As it is invalid to execute VMXON twice, we shouldn't reach
++	 * here when vmcs01 already have an allocated shadow vmcs.
++	 */
++	WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
++
++	if (!loaded_vmcs->shadow_vmcs) {
++		loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
++		if (loaded_vmcs->shadow_vmcs)
++			vmcs_clear(loaded_vmcs->shadow_vmcs);
++	}
++	return loaded_vmcs->shadow_vmcs;
++}
++
++static int enter_vmx_operation(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int r;
++
++	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
++	if (r < 0)
++		goto out_vmcs02;
++
++	vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
++	if (!vmx->nested.cached_vmcs12)
++		goto out_cached_vmcs12;
++
++	vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL);
++	if (!vmx->nested.cached_shadow_vmcs12)
++		goto out_cached_shadow_vmcs12;
++
++	if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
++		goto out_shadow_vmcs;
++
++	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
++		     HRTIMER_MODE_REL_PINNED);
++	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
++
++	vmx->nested.vpid02 = allocate_vpid();
++
++	vmx->nested.vmxon = true;
++	return 0;
++
++out_shadow_vmcs:
++	kfree(vmx->nested.cached_shadow_vmcs12);
++
++out_cached_shadow_vmcs12:
++	kfree(vmx->nested.cached_vmcs12);
++
++out_cached_vmcs12:
++	free_loaded_vmcs(&vmx->nested.vmcs02);
++
++out_vmcs02:
++	return -ENOMEM;
++}
++
++/*
++ * Emulate the VMXON instruction.
++ * Currently, we just remember that VMX is active, and do not save or even
++ * inspect the argument to VMXON (the so-called "VMXON pointer") because we
++ * do not currently need to store anything in that guest-allocated memory
++ * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their
++ * argument is different from the VMXON pointer (which the spec says they do).
++ */
++static int handle_vmon(struct kvm_vcpu *vcpu)
++{
++	int ret;
++	gpa_t vmptr;
++	struct page *page;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
++		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++
++	/*
++	 * The Intel VMX Instruction Reference lists a bunch of bits that are
++	 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
++	 * 1 (see vmx_set_cr4() for when we allow the guest to set this).
++	 * Otherwise, we should fail with #UD.  But most faulting conditions
++	 * have already been checked by hardware, prior to the VM-exit for
++	 * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
++	 * that bit set to 1 in non-root mode.
++	 */
++	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	/* CPL=0 must be checked manually. */
++	if (vmx_get_cpl(vcpu)) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	if (vmx->nested.vmxon) {
++		nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
++			!= VMXON_NEEDED_FEATURES) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	if (nested_vmx_get_vmptr(vcpu, &vmptr))
++		return 1;
++
++	/*
++	 * SDM 3: 24.11.5
++	 * The first 4 bytes of VMXON region contain the supported
++	 * VMCS revision identifier
++	 *
++	 * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
++	 * which replaces physical address width with 32
++	 */
++	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
++		nested_vmx_failInvalid(vcpu);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
++	if (is_error_page(page)) {
++		nested_vmx_failInvalid(vcpu);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++	if (*(u32 *)kmap(page) != VMCS12_REVISION) {
++		kunmap(page);
++		kvm_release_page_clean(page);
++		nested_vmx_failInvalid(vcpu);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++	kunmap(page);
++	kvm_release_page_clean(page);
++
++	vmx->nested.vmxon_ptr = vmptr;
++	ret = enter_vmx_operation(vcpu);
++	if (ret)
++		return ret;
++
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++/*
++ * Intel's VMX Instruction Reference specifies a common set of prerequisites
++ * for running VMX instructions (except VMXON, whose prerequisites are
++ * slightly different). It also specifies what exception to inject otherwise.
++ * Note that many of these exceptions have priority over VM exits, so they
++ * don't have to be checked again here.
++ */
++static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
++{
++	if (!to_vmx(vcpu)->nested.vmxon) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 0;
++	}
++
++	if (vmx_get_cpl(vcpu)) {
++		kvm_inject_gp(vcpu, 0);
++		return 0;
++	}
++
++	return 1;
++}
++
++static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
++{
++	vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
++	vmcs_write64(VMCS_LINK_POINTER, -1ull);
++	vmx->nested.sync_shadow_vmcs = false;
++}
++
++static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
++{
++	if (vmx->nested.current_vmptr == -1ull)
++		return;
++
++	if (enable_shadow_vmcs) {
++		/* copy to memory all shadowed fields in case
++		   they were modified */
++		copy_shadow_to_vmcs12(vmx);
++		vmx_disable_shadow_vmcs(vmx);
++	}
++	vmx->nested.posted_intr_nv = -1;
++
++	/* Flush VMCS12 to guest memory */
++	kvm_vcpu_write_guest_page(&vmx->vcpu,
++				  vmx->nested.current_vmptr >> PAGE_SHIFT,
++				  vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
++
++	vmx->nested.current_vmptr = -1ull;
++}
++
++/*
++ * Free whatever needs to be freed from vmx->nested when L1 goes down, or
++ * just stops using VMX.
++ */
++static void free_nested(struct vcpu_vmx *vmx)
++{
++	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
++		return;
++
++	kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, &vmx->vcpu);
++
++	hrtimer_cancel(&vmx->nested.preemption_timer);
++	vmx->nested.vmxon = false;
++	vmx->nested.smm.vmxon = false;
++	free_vpid(vmx->nested.vpid02);
++	vmx->nested.posted_intr_nv = -1;
++	vmx->nested.current_vmptr = -1ull;
++	if (enable_shadow_vmcs) {
++		vmx_disable_shadow_vmcs(vmx);
++		vmcs_clear(vmx->vmcs01.shadow_vmcs);
++		free_vmcs(vmx->vmcs01.shadow_vmcs);
++		vmx->vmcs01.shadow_vmcs = NULL;
++	}
++	kfree(vmx->nested.cached_vmcs12);
++	kfree(vmx->nested.cached_shadow_vmcs12);
++	/* Unpin physical memory we referred to in the vmcs02 */
++	if (vmx->nested.apic_access_page) {
++		kvm_release_page_dirty(vmx->nested.apic_access_page);
++		vmx->nested.apic_access_page = NULL;
++	}
++	if (vmx->nested.virtual_apic_page) {
++		kvm_release_page_dirty(vmx->nested.virtual_apic_page);
++		vmx->nested.virtual_apic_page = NULL;
++	}
++	if (vmx->nested.pi_desc_page) {
++		kunmap(vmx->nested.pi_desc_page);
++		kvm_release_page_dirty(vmx->nested.pi_desc_page);
++		vmx->nested.pi_desc_page = NULL;
++		vmx->nested.pi_desc = NULL;
++	}
++
++	free_loaded_vmcs(&vmx->nested.vmcs02);
++}
++
++/* Emulate the VMXOFF instruction */
++static int handle_vmoff(struct kvm_vcpu *vcpu)
++{
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++	free_nested(to_vmx(vcpu));
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++/* Emulate the VMCLEAR instruction */
++static int handle_vmclear(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 zero = 0;
++	gpa_t vmptr;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (nested_vmx_get_vmptr(vcpu, &vmptr))
++		return 1;
++
++	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
++		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if (vmptr == vmx->nested.vmxon_ptr) {
++		nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if (vmptr == vmx->nested.current_vmptr)
++		nested_release_vmcs12(vmx);
++
++	kvm_vcpu_write_guest(vcpu,
++			vmptr + offsetof(struct vmcs12, launch_state),
++			&zero, sizeof(zero));
++
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
++
++/* Emulate the VMLAUNCH instruction */
++static int handle_vmlaunch(struct kvm_vcpu *vcpu)
++{
++	return nested_vmx_run(vcpu, true);
++}
++
++/* Emulate the VMRESUME instruction */
++static int handle_vmresume(struct kvm_vcpu *vcpu)
++{
++
++	return nested_vmx_run(vcpu, false);
++}
++
++/*
++ * Read a vmcs12 field. Since these can have varying lengths and we return
++ * one type, we chose the biggest type (u64) and zero-extend the return value
++ * to that size. Note that the caller, handle_vmread, might need to use only
++ * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
++ * 64-bit fields are to be returned).
++ */
++static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
++				  unsigned long field, u64 *ret)
++{
++	short offset = vmcs_field_to_offset(field);
++	char *p;
++
++	if (offset < 0)
++		return offset;
++
++	p = (char *)vmcs12 + offset;
++
++	switch (vmcs_field_width(field)) {
++	case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
++		*ret = *((natural_width *)p);
++		return 0;
++	case VMCS_FIELD_WIDTH_U16:
++		*ret = *((u16 *)p);
++		return 0;
++	case VMCS_FIELD_WIDTH_U32:
++		*ret = *((u32 *)p);
++		return 0;
++	case VMCS_FIELD_WIDTH_U64:
++		*ret = *((u64 *)p);
++		return 0;
++	default:
++		WARN_ON(1);
++		return -ENOENT;
++	}
++}
++
++
++static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
++				   unsigned long field, u64 field_value){
++	short offset = vmcs_field_to_offset(field);
++	char *p = (char *)vmcs12 + offset;
++	if (offset < 0)
++		return offset;
++
++	switch (vmcs_field_width(field)) {
++	case VMCS_FIELD_WIDTH_U16:
++		*(u16 *)p = field_value;
++		return 0;
++	case VMCS_FIELD_WIDTH_U32:
++		*(u32 *)p = field_value;
++		return 0;
++	case VMCS_FIELD_WIDTH_U64:
++		*(u64 *)p = field_value;
++		return 0;
++	case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
++		*(natural_width *)p = field_value;
++		return 0;
++	default:
++		WARN_ON(1);
++		return -ENOENT;
++	}
++
++}
++
++/*
++ * Copy the writable VMCS shadow fields back to the VMCS12, in case
++ * they have been modified by the L1 guest. Note that the "read-only"
++ * VM-exit information fields are actually writable if the vCPU is
++ * configured to support "VMWRITE to any supported field in the VMCS."
++ */
++static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
++{
++	const u16 *fields[] = {
++		shadow_read_write_fields,
++		shadow_read_only_fields
++	};
++	const int max_fields[] = {
++		max_shadow_read_write_fields,
++		max_shadow_read_only_fields
++	};
++	int i, q;
++	unsigned long field;
++	u64 field_value;
++	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
++
++	if (WARN_ON(!shadow_vmcs))
++		return;
++
++	preempt_disable();
++
++	vmcs_load(shadow_vmcs);
++
++	for (q = 0; q < ARRAY_SIZE(fields); q++) {
++		for (i = 0; i < max_fields[q]; i++) {
++			field = fields[q][i];
++			field_value = __vmcs_readl(field);
++			vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
++		}
++		/*
++		 * Skip the VM-exit information fields if they are read-only.
++		 */
++		if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
++			break;
++	}
++
++	vmcs_clear(shadow_vmcs);
++	vmcs_load(vmx->loaded_vmcs->vmcs);
++
++	preempt_enable();
++}
++
++static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
++{
++	const u16 *fields[] = {
++		shadow_read_write_fields,
++		shadow_read_only_fields
++	};
++	const int max_fields[] = {
++		max_shadow_read_write_fields,
++		max_shadow_read_only_fields
++	};
++	int i, q;
++	unsigned long field;
++	u64 field_value = 0;
++	struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
++
++	if (WARN_ON(!shadow_vmcs))
++		return;
++
++	vmcs_load(shadow_vmcs);
++
++	for (q = 0; q < ARRAY_SIZE(fields); q++) {
++		for (i = 0; i < max_fields[q]; i++) {
++			field = fields[q][i];
++			vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
++			__vmcs_writel(field, field_value);
++		}
++	}
++
++	vmcs_clear(shadow_vmcs);
++	vmcs_load(vmx->loaded_vmcs->vmcs);
++}
++
++/*
++ * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
++ * used before) all generate the same failure when it is missing.
++ */
++static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	if (vmx->nested.current_vmptr == -1ull) {
++		nested_vmx_failInvalid(vcpu);
++		return 0;
++	}
++	return 1;
++}
++
++static int handle_vmread(struct kvm_vcpu *vcpu)
++{
++	unsigned long field;
++	u64 field_value;
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	gva_t gva = 0;
++	struct vmcs12 *vmcs12;
++	struct x86_exception e;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (!nested_vmx_check_vmcs12(vcpu))
++		return kvm_skip_emulated_instruction(vcpu);
++
++	if (!is_guest_mode(vcpu))
++		vmcs12 = get_vmcs12(vcpu);
++	else {
++		/*
++		 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
++		 * to shadowed-field sets the ALU flags for VMfailInvalid.
++		 */
++		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
++			nested_vmx_failInvalid(vcpu);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		vmcs12 = get_shadow_vmcs12(vcpu);
++	}
++
++	/* Decode instruction info and find the field to read */
++	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
++	/* Read the field, zero-extended to a u64 field_value */
++	if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
++		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++	/*
++	 * Now copy part of this value to register or memory, as requested.
++	 * Note that the number of bits actually copied is 32 or 64 depending
++	 * on the guest's mode (32 or 64 bit), not on the given field's length.
++	 */
++	if (vmx_instruction_info & (1u << 10)) {
++		kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
++			field_value);
++	} else {
++		if (get_vmx_mem_address(vcpu, exit_qualification,
++				vmx_instruction_info, true, &gva))
++			return 1;
++		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
++		if (kvm_write_guest_virt_system(vcpu, gva, &field_value,
++						(is_long_mode(vcpu) ? 8 : 4),
++						&e))
++			kvm_inject_page_fault(vcpu, &e);
++	}
++
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++
++static int handle_vmwrite(struct kvm_vcpu *vcpu)
++{
++	unsigned long field;
++	gva_t gva;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++
++	/* The value to write might be 32 or 64 bits, depending on L1's long
++	 * mode, and eventually we need to write that into a field of several
++	 * possible lengths. The code below first zero-extends the value to 64
++	 * bit (field_value), and then copies only the appropriate number of
++	 * bits into the vmcs12 field.
++	 */
++	u64 field_value = 0;
++	struct x86_exception e;
++	struct vmcs12 *vmcs12;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (!nested_vmx_check_vmcs12(vcpu))
++		return kvm_skip_emulated_instruction(vcpu);
++
++	if (vmx_instruction_info & (1u << 10))
++		field_value = kvm_register_readl(vcpu,
++			(((vmx_instruction_info) >> 3) & 0xf));
++	else {
++		if (get_vmx_mem_address(vcpu, exit_qualification,
++				vmx_instruction_info, false, &gva))
++			return 1;
++		if (kvm_read_guest_virt(vcpu, gva, &field_value,
++					(is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
++			kvm_inject_page_fault(vcpu, &e);
++			return 1;
++		}
++	}
++
++
++	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
++	/*
++	 * If the vCPU supports "VMWRITE to any supported field in the
++	 * VMCS," then the "read-only" fields are actually read/write.
++	 */
++	if (vmcs_field_readonly(field) &&
++	    !nested_cpu_has_vmwrite_any_field(vcpu)) {
++		nested_vmx_failValid(vcpu,
++			VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if (!is_guest_mode(vcpu))
++		vmcs12 = get_vmcs12(vcpu);
++	else {
++		/*
++		 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
++		 * to shadowed-field sets the ALU flags for VMfailInvalid.
++		 */
++		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
++			nested_vmx_failInvalid(vcpu);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		vmcs12 = get_shadow_vmcs12(vcpu);
++
++	}
++
++	if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
++		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	/*
++	 * Do not track vmcs12 dirty-state if in guest-mode
++	 * as we actually dirty shadow vmcs12 instead of vmcs12.
++	 */
++	if (!is_guest_mode(vcpu)) {
++		switch (field) {
++#define SHADOW_FIELD_RW(x) case x:
++#include "vmx_shadow_fields.h"
++			/*
++			 * The fields that can be updated by L1 without a vmexit are
++			 * always updated in the vmcs02, the others go down the slow
++			 * path of prepare_vmcs02.
++			 */
++			break;
++		default:
++			vmx->nested.dirty_vmcs12 = true;
++			break;
++		}
++	}
++
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
++{
++	vmx->nested.current_vmptr = vmptr;
++	if (enable_shadow_vmcs) {
++		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
++			      SECONDARY_EXEC_SHADOW_VMCS);
++		vmcs_write64(VMCS_LINK_POINTER,
++			     __pa(vmx->vmcs01.shadow_vmcs));
++		vmx->nested.sync_shadow_vmcs = true;
++	}
++	vmx->nested.dirty_vmcs12 = true;
++}
++
++/* Emulate the VMPTRLD instruction */
++static int handle_vmptrld(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	gpa_t vmptr;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (nested_vmx_get_vmptr(vcpu, &vmptr))
++		return 1;
++
++	if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
++		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if (vmptr == vmx->nested.vmxon_ptr) {
++		nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	if (vmx->nested.current_vmptr != vmptr) {
++		struct vmcs12 *new_vmcs12;
++		struct page *page;
++		page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
++		if (is_error_page(page)) {
++			nested_vmx_failInvalid(vcpu);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		new_vmcs12 = kmap(page);
++		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
++		    (new_vmcs12->hdr.shadow_vmcs &&
++		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
++			kunmap(page);
++			kvm_release_page_clean(page);
++			nested_vmx_failValid(vcpu,
++				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++
++		nested_release_vmcs12(vmx);
++		/*
++		 * Load VMCS12 from guest memory since it is not already
++		 * cached.
++		 */
++		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
++		kunmap(page);
++		kvm_release_page_clean(page);
++
++		set_current_vmptr(vmx, vmptr);
++	}
++
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++/* Emulate the VMPTRST instruction */
++static int handle_vmptrst(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
++	u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
++	struct x86_exception e;
++	gva_t gva;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
++		return 1;
++	/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
++	if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
++					sizeof(gpa_t), &e)) {
++		kvm_inject_page_fault(vcpu, &e);
++		return 1;
++	}
++	nested_vmx_succeed(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++/* Emulate the INVEPT instruction */
++static int handle_invept(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 vmx_instruction_info, types;
++	unsigned long type;
++	gva_t gva;
++	struct x86_exception e;
++	struct {
++		u64 eptp, gpa;
++	} operand;
++
++	if (!(vmx->nested.msrs.secondary_ctls_high &
++	      SECONDARY_EXEC_ENABLE_EPT) ||
++	    !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
++
++	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
++
++	if (type >= 32 || !(types & (1 << type))) {
++		nested_vmx_failValid(vcpu,
++				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	/* According to the Intel VMX instruction reference, the memory
++	 * operand is read even if it isn't needed (e.g., for type==global)
++	 */
++	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
++			vmx_instruction_info, false, &gva))
++		return 1;
++	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
++		kvm_inject_page_fault(vcpu, &e);
++		return 1;
++	}
++
++	switch (type) {
++	case VMX_EPT_EXTENT_GLOBAL:
++	/*
++	 * TODO: track mappings and invalidate
++	 * single context requests appropriately
++	 */
++	case VMX_EPT_EXTENT_CONTEXT:
++		kvm_mmu_sync_roots(vcpu);
++		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++		nested_vmx_succeed(vcpu);
++		break;
++	default:
++		BUG_ON(1);
++		break;
++	}
++
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_invvpid(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 vmx_instruction_info;
++	unsigned long type, types;
++	gva_t gva;
++	struct x86_exception e;
++	struct {
++		u64 vpid;
++		u64 gla;
++	} operand;
++
++	if (!(vmx->nested.msrs.secondary_ctls_high &
++	      SECONDARY_EXEC_ENABLE_VPID) ||
++			!(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
++
++	types = (vmx->nested.msrs.vpid_caps &
++			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
++
++	if (type >= 32 || !(types & (1 << type))) {
++		nested_vmx_failValid(vcpu,
++			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	/* according to the intel vmx instruction reference, the memory
++	 * operand is read even if it isn't needed (e.g., for type==global)
++	 */
++	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
++			vmx_instruction_info, false, &gva))
++		return 1;
++	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
++		kvm_inject_page_fault(vcpu, &e);
++		return 1;
++	}
++	if (operand.vpid >> 16) {
++		nested_vmx_failValid(vcpu,
++			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	switch (type) {
++	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
++		if (!operand.vpid ||
++		    is_noncanonical_address(operand.gla, vcpu)) {
++			nested_vmx_failValid(vcpu,
++				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		if (cpu_has_vmx_invvpid_individual_addr() &&
++		    vmx->nested.vpid02) {
++			__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
++				vmx->nested.vpid02, operand.gla);
++		} else
++			__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
++		break;
++	case VMX_VPID_EXTENT_SINGLE_CONTEXT:
++	case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
++		if (!operand.vpid) {
++			nested_vmx_failValid(vcpu,
++				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
++			return kvm_skip_emulated_instruction(vcpu);
++		}
++		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
++		break;
++	case VMX_VPID_EXTENT_ALL_CONTEXT:
++		__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
++		break;
++	default:
++		WARN_ON_ONCE(1);
++		return kvm_skip_emulated_instruction(vcpu);
++	}
++
++	nested_vmx_succeed(vcpu);
++
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int handle_invpcid(struct kvm_vcpu *vcpu)
++{
++	u32 vmx_instruction_info;
++	unsigned long type;
++	bool pcid_enabled;
++	gva_t gva;
++	struct x86_exception e;
++	unsigned i;
++	unsigned long roots_to_free = 0;
++	struct {
++		u64 pcid;
++		u64 gla;
++	} operand;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
++
++	if (type > 3) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	/* According to the Intel instruction reference, the memory operand
++	 * is read even if it isn't needed (e.g., for type==all)
++	 */
++	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
++				vmx_instruction_info, false, &gva))
++		return 1;
++
++	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
++		kvm_inject_page_fault(vcpu, &e);
++		return 1;
++	}
++
++	if (operand.pcid >> 12 != 0) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++
++	pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
++
++	switch (type) {
++	case INVPCID_TYPE_INDIV_ADDR:
++		if ((!pcid_enabled && (operand.pcid != 0)) ||
++		    is_noncanonical_address(operand.gla, vcpu)) {
++			kvm_inject_gp(vcpu, 0);
++			return 1;
++		}
++		kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
++		return kvm_skip_emulated_instruction(vcpu);
++
++	case INVPCID_TYPE_SINGLE_CTXT:
++		if (!pcid_enabled && (operand.pcid != 0)) {
++			kvm_inject_gp(vcpu, 0);
++			return 1;
++		}
++
++		if (kvm_get_active_pcid(vcpu) == operand.pcid) {
++			kvm_mmu_sync_roots(vcpu);
++			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++		}
++
++		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
++			if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
++			    == operand.pcid)
++				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
++
++		kvm_mmu_free_roots(vcpu, roots_to_free);
++		/*
++		 * If neither the current cr3 nor any of the prev_roots use the
++		 * given PCID, then nothing needs to be done here because a
++		 * resync will happen anyway before switching to any other CR3.
++		 */
++
++		return kvm_skip_emulated_instruction(vcpu);
++
++	case INVPCID_TYPE_ALL_NON_GLOBAL:
++		/*
++		 * Currently, KVM doesn't mark global entries in the shadow
++		 * page tables, so a non-global flush just degenerates to a
++		 * global flush. If needed, we could optimize this later by
++		 * keeping track of global entries in shadow page tables.
++		 */
++
++		/* fall-through */
++	case INVPCID_TYPE_ALL_INCL_GLOBAL:
++		kvm_mmu_unload(vcpu);
++		return kvm_skip_emulated_instruction(vcpu);
++
++	default:
++		BUG(); /* We have already checked above that type <= 3 */
++	}
++}
++
++static int handle_pml_full(struct kvm_vcpu *vcpu)
++{
++	unsigned long exit_qualification;
++
++	trace_kvm_pml_full(vcpu->vcpu_id);
++
++	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++
++	/*
++	 * PML buffer FULL happened while executing iret from NMI,
++	 * "blocked by NMI" bit has to be set before next VM entry.
++	 */
++	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
++			enable_vnmi &&
++			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
++		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
++				GUEST_INTR_STATE_NMI);
++
++	/*
++	 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
++	 * here.., and there's no userspace involvement needed for PML.
++	 */
++	return 1;
++}
++
++static int handle_preemption_timer(struct kvm_vcpu *vcpu)
++{
++	if (!to_vmx(vcpu)->req_immediate_exit)
++		kvm_lapic_expired_hv_timer(vcpu);
++	return 1;
++}
++
++static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int maxphyaddr = cpuid_maxphyaddr(vcpu);
++
++	/* Check for memory type validity */
++	switch (address & VMX_EPTP_MT_MASK) {
++	case VMX_EPTP_MT_UC:
++		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
++			return false;
++		break;
++	case VMX_EPTP_MT_WB:
++		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
++			return false;
++		break;
++	default:
++		return false;
++	}
++
++	/* only 4 levels page-walk length are valid */
++	if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
++		return false;
++
++	/* Reserved bits should not be set */
++	if (address >> maxphyaddr || ((address >> 7) & 0x1f))
++		return false;
++
++	/* AD, if set, should be supported */
++	if (address & VMX_EPTP_AD_ENABLE_BIT) {
++		if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
++			return false;
++	}
++
++	return true;
++}
++
++static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
++				     struct vmcs12 *vmcs12)
++{
++	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
++	u64 address;
++	bool accessed_dirty;
++	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
++
++	if (!nested_cpu_has_eptp_switching(vmcs12) ||
++	    !nested_cpu_has_ept(vmcs12))
++		return 1;
++
++	if (index >= VMFUNC_EPTP_ENTRIES)
++		return 1;
++
++
++	if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
++				     &address, index * 8, 8))
++		return 1;
++
++	accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
++
++	/*
++	 * If the (L2) guest does a vmfunc to the currently
++	 * active ept pointer, we don't have to do anything else
++	 */
++	if (vmcs12->ept_pointer != address) {
++		if (!valid_ept_address(vcpu, address))
++			return 1;
++
++		kvm_mmu_unload(vcpu);
++		mmu->ept_ad = accessed_dirty;
++		mmu->base_role.ad_disabled = !accessed_dirty;
++		vmcs12->ept_pointer = address;
++		/*
++		 * TODO: Check what's the correct approach in case
++		 * mmu reload fails. Currently, we just let the next
++		 * reload potentially fail
++		 */
++		kvm_mmu_reload(vcpu);
++	}
++
++	return 0;
++}
++
++static int handle_vmfunc(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs12 *vmcs12;
++	u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
++
++	/*
++	 * VMFUNC is only supported for nested guests, but we always enable the
++	 * secondary control for simplicity; for non-nested mode, fake that we
++	 * didn't by injecting #UD.
++	 */
++	if (!is_guest_mode(vcpu)) {
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++
++	vmcs12 = get_vmcs12(vcpu);
++	if ((vmcs12->vm_function_control & (1 << function)) == 0)
++		goto fail;
++
++	switch (function) {
++	case 0:
++		if (nested_vmx_eptp_switching(vcpu, vmcs12))
++			goto fail;
++		break;
++	default:
++		goto fail;
++	}
++	return kvm_skip_emulated_instruction(vcpu);
++
++fail:
++	nested_vmx_vmexit(vcpu, vmx->exit_reason,
++			  vmcs_read32(VM_EXIT_INTR_INFO),
++			  vmcs_readl(EXIT_QUALIFICATION));
++	return 1;
++}
++
++static int handle_encls(struct kvm_vcpu *vcpu)
++{
++	/*
++	 * SGX virtualization is not yet supported.  There is no software
++	 * enable bit for SGX, so we have to trap ENCLS and inject a #UD
++	 * to prevent the guest from executing ENCLS.
++	 */
++	kvm_queue_exception(vcpu, UD_VECTOR);
++	return 1;
++}
++
++/*
++ * The exit handlers return 1 if the exit was handled fully and guest execution
++ * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
++ * to be done to userspace and return 0.
++ */
++static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
++	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
++	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
++	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
++	[EXIT_REASON_NMI_WINDOW]	      = handle_nmi_window,
++	[EXIT_REASON_IO_INSTRUCTION]          = handle_io,
++	[EXIT_REASON_CR_ACCESS]               = handle_cr,
++	[EXIT_REASON_DR_ACCESS]               = handle_dr,
++	[EXIT_REASON_CPUID]                   = handle_cpuid,
++	[EXIT_REASON_MSR_READ]                = handle_rdmsr,
++	[EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
++	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
++	[EXIT_REASON_HLT]                     = handle_halt,
++	[EXIT_REASON_INVD]		      = handle_invd,
++	[EXIT_REASON_INVLPG]		      = handle_invlpg,
++	[EXIT_REASON_RDPMC]                   = handle_rdpmc,
++	[EXIT_REASON_VMCALL]                  = handle_vmcall,
++	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
++	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
++	[EXIT_REASON_VMPTRLD]                 = handle_vmptrld,
++	[EXIT_REASON_VMPTRST]                 = handle_vmptrst,
++	[EXIT_REASON_VMREAD]                  = handle_vmread,
++	[EXIT_REASON_VMRESUME]                = handle_vmresume,
++	[EXIT_REASON_VMWRITE]                 = handle_vmwrite,
++	[EXIT_REASON_VMOFF]                   = handle_vmoff,
++	[EXIT_REASON_VMON]                    = handle_vmon,
++	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
++	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
++	[EXIT_REASON_APIC_WRITE]              = handle_apic_write,
++	[EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
++	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
++	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
++	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
++	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
++	[EXIT_REASON_GDTR_IDTR]		      = handle_desc,
++	[EXIT_REASON_LDTR_TR]		      = handle_desc,
++	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
++	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
++	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
++	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
++	[EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
++	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
++	[EXIT_REASON_INVEPT]                  = handle_invept,
++	[EXIT_REASON_INVVPID]                 = handle_invvpid,
++	[EXIT_REASON_RDRAND]                  = handle_invalid_op,
++	[EXIT_REASON_RDSEED]                  = handle_invalid_op,
++	[EXIT_REASON_XSAVES]                  = handle_xsaves,
++	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
++	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
++	[EXIT_REASON_INVPCID]                 = handle_invpcid,
++	[EXIT_REASON_VMFUNC]                  = handle_vmfunc,
++	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
++	[EXIT_REASON_ENCLS]		      = handle_encls,
++};
++
++static const int kvm_vmx_max_exit_handlers =
++	ARRAY_SIZE(kvm_vmx_exit_handlers);
++
++/*
++ * Return true if an IO instruction with the specified port and size should cause
++ * a VM-exit into L1.
++ */
++bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
++				 int size)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	gpa_t bitmap, last_bitmap;
++	u8 b;
++
++	last_bitmap = (gpa_t)-1;
++	b = -1;
++
++	while (size > 0) {
++		if (port < 0x8000)
++			bitmap = vmcs12->io_bitmap_a;
++		else if (port < 0x10000)
++			bitmap = vmcs12->io_bitmap_b;
++		else
++			return true;
++		bitmap += (port & 0x7fff) / 8;
++
++		if (last_bitmap != bitmap)
++			if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
++				return true;
++		if (b & (1 << (port & 7)))
++			return true;
++
++		port++;
++		size--;
++		last_bitmap = bitmap;
++	}
++
++	return false;
++}
++
++/*
++ * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
++ * rather than handle it ourselves in L0. I.e., check whether L1 expressed
++ * disinterest in the current event (read or write a specific MSR) by using an
++ * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
++ */
++static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
++	struct vmcs12 *vmcs12, u32 exit_reason)
++{
++	u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
++	gpa_t bitmap;
++
++	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
++		return true;
++
++	/*
++	 * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
++	 * for the four combinations of read/write and low/high MSR numbers.
++	 * First we need to figure out which of the four to use:
++	 */
++	bitmap = vmcs12->msr_bitmap;
++	if (exit_reason == EXIT_REASON_MSR_WRITE)
++		bitmap += 2048;
++	if (msr_index >= 0xc0000000) {
++		msr_index -= 0xc0000000;
++		bitmap += 1024;
++	}
++
++	/* Then read the msr_index'th bit from this bitmap: */
++	if (msr_index < 1024*8) {
++		unsigned char b;
++		if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
++			return true;
++		return 1 & (b >> (msr_index & 7));
++	} else
++		return true; /* let L1 handle the wrong parameter */
++}
++
++/*
++ * Return 1 if we should exit from L2 to L1 to handle a CR access exit,
++ * rather than handle it ourselves in L0. I.e., check if L1 wanted to
++ * intercept (via guest_host_mask etc.) the current event.
++ */
++static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
++	struct vmcs12 *vmcs12)
++{
++	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
++	int cr = exit_qualification & 15;
++	int reg;
++	unsigned long val;
++
++	switch ((exit_qualification >> 4) & 3) {
++	case 0: /* mov to cr */
++		reg = (exit_qualification >> 8) & 15;
++		val = kvm_register_readl(vcpu, reg);
++		switch (cr) {
++		case 0:
++			if (vmcs12->cr0_guest_host_mask &
++			    (val ^ vmcs12->cr0_read_shadow))
++				return true;
++			break;
++		case 3:
++			if ((vmcs12->cr3_target_count >= 1 &&
++					vmcs12->cr3_target_value0 == val) ||
++				(vmcs12->cr3_target_count >= 2 &&
++					vmcs12->cr3_target_value1 == val) ||
++				(vmcs12->cr3_target_count >= 3 &&
++					vmcs12->cr3_target_value2 == val) ||
++				(vmcs12->cr3_target_count >= 4 &&
++					vmcs12->cr3_target_value3 == val))
++				return false;
++			if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
++				return true;
++			break;
++		case 4:
++			if (vmcs12->cr4_guest_host_mask &
++			    (vmcs12->cr4_read_shadow ^ val))
++				return true;
++			break;
++		case 8:
++			if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
++				return true;
++			break;
++		}
++		break;
++	case 2: /* clts */
++		if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
++		    (vmcs12->cr0_read_shadow & X86_CR0_TS))
++			return true;
++		break;
++	case 1: /* mov from cr */
++		switch (cr) {
++		case 3:
++			if (vmcs12->cpu_based_vm_exec_control &
++			    CPU_BASED_CR3_STORE_EXITING)
++				return true;
++			break;
++		case 8:
++			if (vmcs12->cpu_based_vm_exec_control &
++			    CPU_BASED_CR8_STORE_EXITING)
++				return true;
++			break;
++		}
++		break;
++	case 3: /* lmsw */
++		/*
++		 * lmsw can change bits 1..3 of cr0, and only set bit 0 of
++		 * cr0. Other attempted changes are ignored, with no exit.
++		 */
++		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
++		if (vmcs12->cr0_guest_host_mask & 0xe &
++		    (val ^ vmcs12->cr0_read_shadow))
++			return true;
++		if ((vmcs12->cr0_guest_host_mask & 0x1) &&
++		    !(vmcs12->cr0_read_shadow & 0x1) &&
++		    (val & 0x1))
++			return true;
++		break;
++	}
++	return false;
++}
++
++static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
++	struct vmcs12 *vmcs12, gpa_t bitmap)
++{
++	u32 vmx_instruction_info;
++	unsigned long field;
++	u8 b;
++
++	if (!nested_cpu_has_shadow_vmcs(vmcs12))
++		return true;
++
++	/* Decode instruction info and find the field to access */
++	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
++
++	/* Out-of-range fields always cause a VM exit from L2 to L1 */
++	if (field >> 15)
++		return true;
++
++	if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
++		return true;
++
++	return 1 & (b >> (field & 7));
++}
++
++/*
++ * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
++ * should handle it ourselves in L0 (and then continue L2). Only call this
++ * when in is_guest_mode (L2).
++ */
++static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
++{
++	u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	if (vmx->nested.nested_run_pending)
++		return false;
++
++	if (unlikely(vmx->fail)) {
++		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
++				    vmcs_read32(VM_INSTRUCTION_ERROR));
++		return true;
++	}
++
++	/*
++	 * The host physical addresses of some pages of guest memory
++	 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
++	 * Page). The CPU may write to these pages via their host
++	 * physical address while L2 is running, bypassing any
++	 * address-translation-based dirty tracking (e.g. EPT write
++	 * protection).
++	 *
++	 * Mark them dirty on every exit from L2 to prevent them from
++	 * getting out of sync with dirty tracking.
++	 */
++	nested_mark_vmcs12_pages_dirty(vcpu);
++
++	trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
++				vmcs_readl(EXIT_QUALIFICATION),
++				vmx->idt_vectoring_info,
++				intr_info,
++				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
++				KVM_ISA_VMX);
++
++	switch (exit_reason) {
++	case EXIT_REASON_EXCEPTION_NMI:
++		if (is_nmi(intr_info))
++			return false;
++		else if (is_page_fault(intr_info))
++			return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
++		else if (is_no_device(intr_info) &&
++			 !(vmcs12->guest_cr0 & X86_CR0_TS))
++			return false;
++		else if (is_debug(intr_info) &&
++			 vcpu->guest_debug &
++			 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
++			return false;
++		else if (is_breakpoint(intr_info) &&
++			 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
++			return false;
++		return vmcs12->exception_bitmap &
++				(1u << (intr_info & INTR_INFO_VECTOR_MASK));
++	case EXIT_REASON_EXTERNAL_INTERRUPT:
++		return false;
++	case EXIT_REASON_TRIPLE_FAULT:
++		return true;
++	case EXIT_REASON_PENDING_INTERRUPT:
++		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
++	case EXIT_REASON_NMI_WINDOW:
++		return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
++	case EXIT_REASON_TASK_SWITCH:
++		return true;
++	case EXIT_REASON_CPUID:
++		return true;
++	case EXIT_REASON_HLT:
++		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
++	case EXIT_REASON_INVD:
++		return true;
++	case EXIT_REASON_INVLPG:
++		return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
++	case EXIT_REASON_RDPMC:
++		return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
++	case EXIT_REASON_RDRAND:
++		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
++	case EXIT_REASON_RDSEED:
++		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
++	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
++		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
++	case EXIT_REASON_VMREAD:
++		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
++			vmcs12->vmread_bitmap);
++	case EXIT_REASON_VMWRITE:
++		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
++			vmcs12->vmwrite_bitmap);
++	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
++	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
++	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
++	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
++	case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
++		/*
++		 * VMX instructions trap unconditionally. This allows L1 to
++		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
++		 */
++		return true;
++	case EXIT_REASON_CR_ACCESS:
++		return nested_vmx_exit_handled_cr(vcpu, vmcs12);
++	case EXIT_REASON_DR_ACCESS:
++		return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
++	case EXIT_REASON_IO_INSTRUCTION:
++		return nested_vmx_exit_handled_io(vcpu, vmcs12);
++	case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
++		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
++	case EXIT_REASON_MSR_READ:
++	case EXIT_REASON_MSR_WRITE:
++		return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
++	case EXIT_REASON_INVALID_STATE:
++		return true;
++	case EXIT_REASON_MWAIT_INSTRUCTION:
++		return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
++	case EXIT_REASON_MONITOR_TRAP_FLAG:
++		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
++	case EXIT_REASON_MONITOR_INSTRUCTION:
++		return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
++	case EXIT_REASON_PAUSE_INSTRUCTION:
++		return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
++			nested_cpu_has2(vmcs12,
++				SECONDARY_EXEC_PAUSE_LOOP_EXITING);
++	case EXIT_REASON_MCE_DURING_VMENTRY:
++		return false;
++	case EXIT_REASON_TPR_BELOW_THRESHOLD:
++		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
++	case EXIT_REASON_APIC_ACCESS:
++	case EXIT_REASON_APIC_WRITE:
++	case EXIT_REASON_EOI_INDUCED:
++		/*
++		 * The controls for "virtualize APIC accesses," "APIC-
++		 * register virtualization," and "virtual-interrupt
++		 * delivery" only come from vmcs12.
++		 */
++		return true;
++	case EXIT_REASON_EPT_VIOLATION:
++		/*
++		 * L0 always deals with the EPT violation. If nested EPT is
++		 * used, and the nested mmu code discovers that the address is
++		 * missing in the guest EPT table (EPT12), the EPT violation
++		 * will be injected with nested_ept_inject_page_fault()
++		 */
++		return false;
++	case EXIT_REASON_EPT_MISCONFIG:
++		/*
++		 * L2 never uses directly L1's EPT, but rather L0's own EPT
++		 * table (shadow on EPT) or a merged EPT table that L0 built
++		 * (EPT on EPT). So any problems with the structure of the
++		 * table is L0's fault.
++		 */
++		return false;
++	case EXIT_REASON_INVPCID:
++		return
++			nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
++			nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
++	case EXIT_REASON_WBINVD:
++		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
++	case EXIT_REASON_XSETBV:
++		return true;
++	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
++		/*
++		 * This should never happen, since it is not possible to
++		 * set XSS to a non-zero value---neither in L1 nor in L2.
++		 * If if it were, XSS would have to be checked against
++		 * the XSS exit bitmap in vmcs12.
++		 */
++		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
++	case EXIT_REASON_PREEMPTION_TIMER:
++		return false;
++	case EXIT_REASON_PML_FULL:
++		/* We emulate PML support to L1. */
++		return false;
++	case EXIT_REASON_VMFUNC:
++		/* VM functions are emulated through L2->L0 vmexits. */
++		return false;
++	case EXIT_REASON_ENCLS:
++		/* SGX is never exposed to L1 */
++		return false;
++	default:
++		return true;
++	}
++}
++
++static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason)
++{
++	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++
++	/*
++	 * At this point, the exit interruption info in exit_intr_info
++	 * is only valid for EXCEPTION_NMI exits.  For EXTERNAL_INTERRUPT
++	 * we need to query the in-kernel LAPIC.
++	 */
++	WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT);
++	if ((exit_intr_info &
++	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
++	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
++		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++		vmcs12->vm_exit_intr_error_code =
++			vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
++	}
++
++	nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info,
++			  vmcs_readl(EXIT_QUALIFICATION));
++	return 1;
++}
++
++static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
++{
++	*info1 = vmcs_readl(EXIT_QUALIFICATION);
++	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
++}
++
++static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
++{
++	if (vmx->pml_pg) {
++		__free_page(vmx->pml_pg);
++		vmx->pml_pg = NULL;
++	}
++}
++
++static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u64 *pml_buf;
++	u16 pml_idx;
++
++	pml_idx = vmcs_read16(GUEST_PML_INDEX);
++
++	/* Do nothing if PML buffer is empty */
++	if (pml_idx == (PML_ENTITY_NUM - 1))
++		return;
++
++	/* PML index always points to next available PML buffer entity */
++	if (pml_idx >= PML_ENTITY_NUM)
++		pml_idx = 0;
++	else
++		pml_idx++;
++
++	pml_buf = page_address(vmx->pml_pg);
++	for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
++		u64 gpa;
++
++		gpa = pml_buf[pml_idx];
++		WARN_ON(gpa & (PAGE_SIZE - 1));
++		kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
++	}
++
++	/* reset PML index */
++	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
++}
++
++/*
++ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
++ * Called before reporting dirty_bitmap to userspace.
++ */
++static void kvm_flush_pml_buffers(struct kvm *kvm)
++{
++	int i;
++	struct kvm_vcpu *vcpu;
++	/*
++	 * We only need to kick vcpu out of guest mode here, as PML buffer
++	 * is flushed at beginning of all VMEXITs, and it's obvious that only
++	 * vcpus running in guest are possible to have unflushed GPAs in PML
++	 * buffer.
++	 */
++	kvm_for_each_vcpu(i, vcpu, kvm)
++		kvm_vcpu_kick(vcpu);
++}
++
++static void vmx_dump_sel(char *name, uint32_t sel)
++{
++	pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
++	       name, vmcs_read16(sel),
++	       vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
++	       vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
++	       vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
++}
++
++static void vmx_dump_dtsel(char *name, uint32_t limit)
++{
++	pr_err("%s                           limit=0x%08x, base=0x%016lx\n",
++	       name, vmcs_read32(limit),
++	       vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
++}
++
++static void dump_vmcs(void)
++{
++	u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
++	u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
++	u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
++	u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
++	u32 secondary_exec_control = 0;
++	unsigned long cr4 = vmcs_readl(GUEST_CR4);
++	u64 efer = vmcs_read64(GUEST_IA32_EFER);
++	int i, n;
++
++	if (cpu_has_secondary_exec_ctrls())
++		secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
++
++	pr_err("*** Guest State ***\n");
++	pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
++	       vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
++	       vmcs_readl(CR0_GUEST_HOST_MASK));
++	pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
++	       cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
++	pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
++	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
++	    (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
++	{
++		pr_err("PDPTR0 = 0x%016llx  PDPTR1 = 0x%016llx\n",
++		       vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
++		pr_err("PDPTR2 = 0x%016llx  PDPTR3 = 0x%016llx\n",
++		       vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
++	}
++	pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
++	       vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
++	pr_err("RFLAGS=0x%08lx         DR7 = 0x%016lx\n",
++	       vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
++	pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
++	       vmcs_readl(GUEST_SYSENTER_ESP),
++	       vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
++	vmx_dump_sel("CS:  ", GUEST_CS_SELECTOR);
++	vmx_dump_sel("DS:  ", GUEST_DS_SELECTOR);
++	vmx_dump_sel("SS:  ", GUEST_SS_SELECTOR);
++	vmx_dump_sel("ES:  ", GUEST_ES_SELECTOR);
++	vmx_dump_sel("FS:  ", GUEST_FS_SELECTOR);
++	vmx_dump_sel("GS:  ", GUEST_GS_SELECTOR);
++	vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
++	vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
++	vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
++	vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
++	if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
++	    (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
++		pr_err("EFER =     0x%016llx  PAT = 0x%016llx\n",
++		       efer, vmcs_read64(GUEST_IA32_PAT));
++	pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
++	       vmcs_read64(GUEST_IA32_DEBUGCTL),
++	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
++	if (cpu_has_load_perf_global_ctrl &&
++	    vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
++		pr_err("PerfGlobCtl = 0x%016llx\n",
++		       vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
++	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
++		pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
++	pr_err("Interruptibility = %08x  ActivityState = %08x\n",
++	       vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
++	       vmcs_read32(GUEST_ACTIVITY_STATE));
++	if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
++		pr_err("InterruptStatus = %04x\n",
++		       vmcs_read16(GUEST_INTR_STATUS));
++
++	pr_err("*** Host State ***\n");
++	pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
++	       vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
++	pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
++	       vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
++	       vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
++	       vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
++	       vmcs_read16(HOST_TR_SELECTOR));
++	pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
++	       vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
++	       vmcs_readl(HOST_TR_BASE));
++	pr_err("GDTBase=%016lx IDTBase=%016lx\n",
++	       vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
++	pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
++	       vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
++	       vmcs_readl(HOST_CR4));
++	pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
++	       vmcs_readl(HOST_IA32_SYSENTER_ESP),
++	       vmcs_read32(HOST_IA32_SYSENTER_CS),
++	       vmcs_readl(HOST_IA32_SYSENTER_EIP));
++	if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
++		pr_err("EFER = 0x%016llx  PAT = 0x%016llx\n",
++		       vmcs_read64(HOST_IA32_EFER),
++		       vmcs_read64(HOST_IA32_PAT));
++	if (cpu_has_load_perf_global_ctrl &&
++	    vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++		pr_err("PerfGlobCtl = 0x%016llx\n",
++		       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
++
++	pr_err("*** Control State ***\n");
++	pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
++	       pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
++	pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
++	pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
++	       vmcs_read32(EXCEPTION_BITMAP),
++	       vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
++	       vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
++	pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
++	       vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
++	       vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
++	       vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
++	pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
++	       vmcs_read32(VM_EXIT_INTR_INFO),
++	       vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
++	       vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
++	pr_err("        reason=%08x qualification=%016lx\n",
++	       vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
++	pr_err("IDTVectoring: info=%08x errcode=%08x\n",
++	       vmcs_read32(IDT_VECTORING_INFO_FIELD),
++	       vmcs_read32(IDT_VECTORING_ERROR_CODE));
++	pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
++	if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
++		pr_err("TSC Multiplier = 0x%016llx\n",
++		       vmcs_read64(TSC_MULTIPLIER));
++	if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
++		pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
++	if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
++		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
++	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
++		pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
++	n = vmcs_read32(CR3_TARGET_COUNT);
++	for (i = 0; i + 1 < n; i += 4)
++		pr_err("CR3 target%u=%016lx target%u=%016lx\n",
++		       i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
++		       i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
++	if (i < n)
++		pr_err("CR3 target%u=%016lx\n",
++		       i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
++	if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
++		pr_err("PLE Gap=%08x Window=%08x\n",
++		       vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
++	if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
++		pr_err("Virtual processor ID = 0x%04x\n",
++		       vmcs_read16(VIRTUAL_PROCESSOR_ID));
++}
++
++/*
++ * The guest has exited.  See if we can fix it or if we need userspace
++ * assistance.
++ */
++static int vmx_handle_exit(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 exit_reason = vmx->exit_reason;
++	u32 vectoring_info = vmx->idt_vectoring_info;
++
++	trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
++
++	/*
++	 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
++	 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
++	 * querying dirty_bitmap, we only need to kick all vcpus out of guest
++	 * mode as if vcpus is in root mode, the PML buffer must has been
++	 * flushed already.
++	 */
++	if (enable_pml)
++		vmx_flush_pml_buffer(vcpu);
++
++	/* If guest state is invalid, start emulating */
++	if (vmx->emulation_required)
++		return handle_invalid_guest_state(vcpu);
++
++	if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
++		return nested_vmx_reflect_vmexit(vcpu, exit_reason);
++
++	if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
++		dump_vmcs();
++		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
++		vcpu->run->fail_entry.hardware_entry_failure_reason
++			= exit_reason;
++		return 0;
++	}
++
++	if (unlikely(vmx->fail)) {
++		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
++		vcpu->run->fail_entry.hardware_entry_failure_reason
++			= vmcs_read32(VM_INSTRUCTION_ERROR);
++		return 0;
++	}
++
++	/*
++	 * Note:
++	 * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
++	 * delivery event since it indicates guest is accessing MMIO.
++	 * The vm-exit can be triggered again after return to guest that
++	 * will cause infinite loop.
++	 */
++	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
++			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
++			exit_reason != EXIT_REASON_EPT_VIOLATION &&
++			exit_reason != EXIT_REASON_PML_FULL &&
++			exit_reason != EXIT_REASON_TASK_SWITCH)) {
++		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
++		vcpu->run->internal.ndata = 3;
++		vcpu->run->internal.data[0] = vectoring_info;
++		vcpu->run->internal.data[1] = exit_reason;
++		vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
++		if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
++			vcpu->run->internal.ndata++;
++			vcpu->run->internal.data[3] =
++				vmcs_read64(GUEST_PHYSICAL_ADDRESS);
++		}
++		return 0;
++	}
++
++	if (unlikely(!enable_vnmi &&
++		     vmx->loaded_vmcs->soft_vnmi_blocked)) {
++		if (vmx_interrupt_allowed(vcpu)) {
++			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
++		} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
++			   vcpu->arch.nmi_pending) {
++			/*
++			 * This CPU don't support us in finding the end of an
++			 * NMI-blocked window if the guest runs with IRQs
++			 * disabled. So we pull the trigger after 1 s of
++			 * futile waiting, but inform the user about this.
++			 */
++			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
++			       "state on VCPU %d after 1 s timeout\n",
++			       __func__, vcpu->vcpu_id);
++			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
++		}
++	}
++
++	if (exit_reason < kvm_vmx_max_exit_handlers
++	    && kvm_vmx_exit_handlers[exit_reason])
++		return kvm_vmx_exit_handlers[exit_reason](vcpu);
++	else {
++		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
++				exit_reason);
++		kvm_queue_exception(vcpu, UD_VECTOR);
++		return 1;
++	}
++}
++
++/*
++ * Software based L1D cache flush which is used when microcode providing
++ * the cache control MSR is not loaded.
++ *
++ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
++ * flush it is required to read in 64 KiB because the replacement algorithm
++ * is not exactly LRU. This could be sized at runtime via topology
++ * information but as all relevant affected CPUs have 32KiB L1D cache size
++ * there is no point in doing so.
++ */
++static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
++{
++	int size = PAGE_SIZE << L1D_CACHE_ORDER;
++
++	/*
++	 * This code is only executed when the the flush mode is 'cond' or
++	 * 'always'
++	 */
++	if (static_branch_likely(&vmx_l1d_flush_cond)) {
++		bool flush_l1d;
++
++		/*
++		 * Clear the per-vcpu flush bit, it gets set again
++		 * either from vcpu_run() or from one of the unsafe
++		 * VMEXIT handlers.
++		 */
++		flush_l1d = vcpu->arch.l1tf_flush_l1d;
++		vcpu->arch.l1tf_flush_l1d = false;
++
++		/*
++		 * Clear the per-cpu flush bit, it gets set again from
++		 * the interrupt handlers.
++		 */
++		flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
++		kvm_clear_cpu_l1tf_flush_l1d();
++
++		if (!flush_l1d)
++			return;
++	}
++
++	vcpu->stat.l1d_flush++;
++
++	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
++		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
++		return;
++	}
++
++	asm volatile(
++		/* First ensure the pages are in the TLB */
++		"xorl	%%eax, %%eax\n"
++		".Lpopulate_tlb:\n\t"
++		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
++		"addl	$4096, %%eax\n\t"
++		"cmpl	%%eax, %[size]\n\t"
++		"jne	.Lpopulate_tlb\n\t"
++		"xorl	%%eax, %%eax\n\t"
++		"cpuid\n\t"
++		/* Now fill the cache */
++		"xorl	%%eax, %%eax\n"
++		".Lfill_cache:\n"
++		"movzbl	(%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
++		"addl	$64, %%eax\n\t"
++		"cmpl	%%eax, %[size]\n\t"
++		"jne	.Lfill_cache\n\t"
++		"lfence\n"
++		:: [flush_pages] "r" (vmx_l1d_flush_pages),
++		    [size] "r" (size)
++		: "eax", "ebx", "ecx", "edx");
++}
++
++static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	if (is_guest_mode(vcpu) &&
++		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
++		return;
++
++	if (irr == -1 || tpr < irr) {
++		vmcs_write32(TPR_THRESHOLD, 0);
++		return;
++	}
++
++	vmcs_write32(TPR_THRESHOLD, irr);
++}
++
++static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
++{
++	u32 sec_exec_control;
++
++	if (!lapic_in_kernel(vcpu))
++		return;
++
++	if (!flexpriority_enabled &&
++	    !cpu_has_vmx_virtualize_x2apic_mode())
++		return;
++
++	/* Postpone execution until vmcs01 is the current VMCS. */
++	if (is_guest_mode(vcpu)) {
++		to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
++		return;
++	}
++
++	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
++	sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++			      SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
++
++	switch (kvm_get_apic_mode(vcpu)) {
++	case LAPIC_MODE_INVALID:
++		WARN_ONCE(true, "Invalid local APIC state");
++	case LAPIC_MODE_DISABLED:
++		break;
++	case LAPIC_MODE_XAPIC:
++		if (flexpriority_enabled) {
++			sec_exec_control |=
++				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
++			vmx_flush_tlb(vcpu, true);
++		}
++		break;
++	case LAPIC_MODE_X2APIC:
++		if (cpu_has_vmx_virtualize_x2apic_mode())
++			sec_exec_control |=
++				SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
++		break;
++	}
++	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
++
++	vmx_update_msr_bitmap(vcpu);
++}
++
++static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
++{
++	if (!is_guest_mode(vcpu)) {
++		vmcs_write64(APIC_ACCESS_ADDR, hpa);
++		vmx_flush_tlb(vcpu, true);
++	}
++}
++
++static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
++{
++	u16 status;
++	u8 old;
++
++	if (max_isr == -1)
++		max_isr = 0;
++
++	status = vmcs_read16(GUEST_INTR_STATUS);
++	old = status >> 8;
++	if (max_isr != old) {
++		status &= 0xff;
++		status |= max_isr << 8;
++		vmcs_write16(GUEST_INTR_STATUS, status);
++	}
++}
++
++static void vmx_set_rvi(int vector)
++{
++	u16 status;
++	u8 old;
++
++	if (vector == -1)
++		vector = 0;
++
++	status = vmcs_read16(GUEST_INTR_STATUS);
++	old = (u8)status & 0xff;
++	if ((u8)vector != old) {
++		status &= ~0xff;
++		status |= (u8)vector;
++		vmcs_write16(GUEST_INTR_STATUS, status);
++	}
++}
++
++static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
++{
++	/*
++	 * When running L2, updating RVI is only relevant when
++	 * vmcs12 virtual-interrupt-delivery enabled.
++	 * However, it can be enabled only when L1 also
++	 * intercepts external-interrupts and in that case
++	 * we should not update vmcs02 RVI but instead intercept
++	 * interrupt. Therefore, do nothing when running L2.
++	 */
++	if (!is_guest_mode(vcpu))
++		vmx_set_rvi(max_irr);
++}
++
++static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int max_irr;
++	bool max_irr_updated;
++
++	WARN_ON(!vcpu->arch.apicv_active);
++	if (pi_test_on(&vmx->pi_desc)) {
++		pi_clear_on(&vmx->pi_desc);
++		/*
++		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
++		 * But on x86 this is just a compiler barrier anyway.
++		 */
++		smp_mb__after_atomic();
++		max_irr_updated =
++			kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
++
++		/*
++		 * If we are running L2 and L1 has a new pending interrupt
++		 * which can be injected, we should re-evaluate
++		 * what should be done with this new L1 interrupt.
++		 * If L1 intercepts external-interrupts, we should
++		 * exit from L2 to L1. Otherwise, interrupt should be
++		 * delivered directly to L2.
++		 */
++		if (is_guest_mode(vcpu) && max_irr_updated) {
++			if (nested_exit_on_intr(vcpu))
++				kvm_vcpu_exiting_guest_mode(vcpu);
++			else
++				kvm_make_request(KVM_REQ_EVENT, vcpu);
++		}
++	} else {
++		max_irr = kvm_lapic_find_highest_irr(vcpu);
++	}
++	vmx_hwapic_irr_update(vcpu, max_irr);
++	return max_irr;
++}
++
++static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
++{
++	u8 rvi = vmx_get_rvi();
++	u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
++
++	return ((rvi & 0xf0) > (vppr & 0xf0));
++}
++
++static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
++{
++	return pi_test_on(vcpu_to_pi_desc(vcpu));
++}
++
++static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
++{
++	if (!kvm_vcpu_apicv_active(vcpu))
++		return;
++
++	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
++	vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
++	vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
++	vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
++}
++
++static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	pi_clear_on(&vmx->pi_desc);
++	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
++}
++
++static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
++{
++	if (vmx->exit_reason != EXIT_REASON_EXCEPTION_NMI)
++		return;
++
++	vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++
++	/* if exit due to PF check for async PF */
++	if (is_page_fault(vmx->exit_intr_info))
++		vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
++
++	/* Handle machine checks before interrupts are enabled */
++	if (is_machine_check(vmx->exit_intr_info))
++		kvm_machine_check();
++
++	/* We need to handle NMIs before interrupts are enabled */
++	if (is_nmi(vmx->exit_intr_info)) {
++		kvm_before_interrupt(&vmx->vcpu);
++		asm("int $2");
++		kvm_after_interrupt(&vmx->vcpu);
++	}
++}
++
++static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
++{
++	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++
++	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
++			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
++		unsigned int vector;
++		unsigned long entry;
++		gate_desc *desc;
++		struct vcpu_vmx *vmx = to_vmx(vcpu);
++#ifdef CONFIG_X86_64
++		unsigned long tmp;
++#endif
++
++		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
++		desc = (gate_desc *)vmx->host_idt_base + vector;
++		entry = gate_offset(desc);
++		asm volatile(
++#ifdef CONFIG_X86_64
++			"mov %%" _ASM_SP ", %[sp]\n\t"
++			"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
++			"push $%c[ss]\n\t"
++			"push %[sp]\n\t"
++#endif
++			"pushf\n\t"
++			__ASM_SIZE(push) " $%c[cs]\n\t"
++			CALL_NOSPEC
++			:
++#ifdef CONFIG_X86_64
++			[sp]"=&r"(tmp),
++#endif
++			ASM_CALL_CONSTRAINT
++			:
++			THUNK_TARGET(entry),
++			[ss]"i"(__KERNEL_DS),
++			[cs]"i"(__KERNEL_CS)
++			);
++	}
++}
++STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
++
++static bool vmx_has_emulated_msr(int index)
++{
++	switch (index) {
++	case MSR_IA32_SMBASE:
++		/*
++		 * We cannot do SMM unless we can run the guest in big
++		 * real mode.
++		 */
++		return enable_unrestricted_guest || emulate_invalid_guest_state;
++	case MSR_AMD64_VIRT_SPEC_CTRL:
++		/* This is AMD only.  */
++		return false;
++	default:
++		return true;
++	}
++}
++
++static bool vmx_mpx_supported(void)
++{
++	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
++		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
++}
++
++static bool vmx_xsaves_supported(void)
++{
++	return vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_XSAVES;
++}
++
++static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
++{
++	u32 exit_intr_info;
++	bool unblock_nmi;
++	u8 vector;
++	bool idtv_info_valid;
++
++	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
++
++	if (enable_vnmi) {
++		if (vmx->loaded_vmcs->nmi_known_unmasked)
++			return;
++		/*
++		 * Can't use vmx->exit_intr_info since we're not sure what
++		 * the exit reason is.
++		 */
++		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
++		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
++		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
++		/*
++		 * SDM 3: 27.7.1.2 (September 2008)
++		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
++		 * a guest IRET fault.
++		 * SDM 3: 23.2.2 (September 2008)
++		 * Bit 12 is undefined in any of the following cases:
++		 *  If the VM exit sets the valid bit in the IDT-vectoring
++		 *   information field.
++		 *  If the VM exit is due to a double fault.
++		 */
++		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
++		    vector != DF_VECTOR && !idtv_info_valid)
++			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
++				      GUEST_INTR_STATE_NMI);
++		else
++			vmx->loaded_vmcs->nmi_known_unmasked =
++				!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
++				  & GUEST_INTR_STATE_NMI);
++	} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
++		vmx->loaded_vmcs->vnmi_blocked_time +=
++			ktime_to_ns(ktime_sub(ktime_get(),
++					      vmx->loaded_vmcs->entry_time));
++}
++
++static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
++				      u32 idt_vectoring_info,
++				      int instr_len_field,
++				      int error_code_field)
++{
++	u8 vector;
++	int type;
++	bool idtv_info_valid;
++
++	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
++
++	vcpu->arch.nmi_injected = false;
++	kvm_clear_exception_queue(vcpu);
++	kvm_clear_interrupt_queue(vcpu);
++
++	if (!idtv_info_valid)
++		return;
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
++	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
++
++	switch (type) {
++	case INTR_TYPE_NMI_INTR:
++		vcpu->arch.nmi_injected = true;
++		/*
++		 * SDM 3: 27.7.1.2 (September 2008)
++		 * Clear bit "block by NMI" before VM entry if a NMI
++		 * delivery faulted.
++		 */
++		vmx_set_nmi_mask(vcpu, false);
++		break;
++	case INTR_TYPE_SOFT_EXCEPTION:
++		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
++		/* fall through */
++	case INTR_TYPE_HARD_EXCEPTION:
++		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
++			u32 err = vmcs_read32(error_code_field);
++			kvm_requeue_exception_e(vcpu, vector, err);
++		} else
++			kvm_requeue_exception(vcpu, vector);
++		break;
++	case INTR_TYPE_SOFT_INTR:
++		vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
++		/* fall through */
++	case INTR_TYPE_EXT_INTR:
++		kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
++		break;
++	default:
++		break;
++	}
++}
++
++static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
++{
++	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
++				  VM_EXIT_INSTRUCTION_LEN,
++				  IDT_VECTORING_ERROR_CODE);
++}
++
++static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
++{
++	__vmx_complete_interrupts(vcpu,
++				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
++				  VM_ENTRY_INSTRUCTION_LEN,
++				  VM_ENTRY_EXCEPTION_ERROR_CODE);
++
++	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
++}
++
++static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
++{
++	int i, nr_msrs;
++	struct perf_guest_switch_msr *msrs;
++
++	msrs = perf_guest_get_msrs(&nr_msrs);
++
++	if (!msrs)
++		return;
++
++	for (i = 0; i < nr_msrs; i++)
++		if (msrs[i].host == msrs[i].guest)
++			clear_atomic_switch_msr(vmx, msrs[i].msr);
++		else
++			add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
++					msrs[i].host, false);
++}
++
++static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
++{
++	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
++	if (!vmx->loaded_vmcs->hv_timer_armed)
++		vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
++			      PIN_BASED_VMX_PREEMPTION_TIMER);
++	vmx->loaded_vmcs->hv_timer_armed = true;
++}
++
++static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u64 tscl;
++	u32 delta_tsc;
++
++	if (vmx->req_immediate_exit) {
++		vmx_arm_hv_timer(vmx, 0);
++		return;
++	}
++
++	if (vmx->hv_deadline_tsc != -1) {
++		tscl = rdtsc();
++		if (vmx->hv_deadline_tsc > tscl)
++			/* set_hv_timer ensures the delta fits in 32-bits */
++			delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
++				cpu_preemption_timer_multi);
++		else
++			delta_tsc = 0;
++
++		vmx_arm_hv_timer(vmx, delta_tsc);
++		return;
++	}
++
++	if (vmx->loaded_vmcs->hv_timer_armed)
++		vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
++				PIN_BASED_VMX_PREEMPTION_TIMER);
++	vmx->loaded_vmcs->hv_timer_armed = false;
++}
++
++static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long cr3, cr4, evmcs_rsp;
++
++	/* Record the guest's net vcpu time for enforced NMI injections. */
++	if (unlikely(!enable_vnmi &&
++		     vmx->loaded_vmcs->soft_vnmi_blocked))
++		vmx->loaded_vmcs->entry_time = ktime_get();
++
++	/* Don't enter VMX if guest state is invalid, let the exit handler
++	   start emulation until we arrive back to a valid state */
++	if (vmx->emulation_required)
++		return;
++
++	if (vmx->ple_window_dirty) {
++		vmx->ple_window_dirty = false;
++		vmcs_write32(PLE_WINDOW, vmx->ple_window);
++	}
++
++	if (vmx->nested.sync_shadow_vmcs) {
++		copy_vmcs12_to_shadow(vmx);
++		vmx->nested.sync_shadow_vmcs = false;
++	}
++
++	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
++		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
++	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
++		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
++
++	cr3 = __get_current_cr3_fast();
++	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
++		vmcs_writel(HOST_CR3, cr3);
++		vmx->loaded_vmcs->host_state.cr3 = cr3;
++	}
++
++	cr4 = cr4_read_shadow();
++	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
++		vmcs_writel(HOST_CR4, cr4);
++		vmx->loaded_vmcs->host_state.cr4 = cr4;
++	}
++
++	/* When single-stepping over STI and MOV SS, we must clear the
++	 * corresponding interruptibility bits in the guest state. Otherwise
++	 * vmentry fails as it then expects bit 14 (BS) in pending debug
++	 * exceptions being set, but that's not correct for the guest debugging
++	 * case. */
++	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
++		vmx_set_interrupt_shadow(vcpu, 0);
++
++	kvm_load_guest_xcr0(vcpu);
++
++	if (static_cpu_has(X86_FEATURE_PKU) &&
++	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
++	    vcpu->arch.pkru != vmx->host_pkru)
++		__write_pkru(vcpu->arch.pkru);
++
++	atomic_switch_perf_msrs(vmx);
++
++	vmx_update_hv_timer(vcpu);
++
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
++
++	vmx->__launched = vmx->loaded_vmcs->launched;
++
++	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
++		(unsigned long)&current_evmcs->host_rsp : 0;
++
++	/* L1D Flush includes CPU buffer clear to mitigate MDS */
++	if (static_branch_unlikely(&vmx_l1d_should_flush))
++		vmx_l1d_flush(vcpu);
++	else if (static_branch_unlikely(&mds_user_clear))
++		mds_clear_cpu_buffers();
++
++	asm(
++		/* Store host registers */
++		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
++		"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
++		"push %%" _ASM_CX " \n\t"
++		"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
++		"je 1f \n\t"
++		"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
++		/* Avoid VMWRITE when Enlightened VMCS is in use */
++		"test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
++		"jz 2f \n\t"
++		"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
++		"jmp 1f \n\t"
++		"2: \n\t"
++		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
++		"1: \n\t"
++		/* Reload cr2 if changed */
++		"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
++		"mov %%cr2, %%" _ASM_DX " \n\t"
++		"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
++		"je 3f \n\t"
++		"mov %%" _ASM_AX", %%cr2 \n\t"
++		"3: \n\t"
++		/* Check if vmlaunch of vmresume is needed */
++		"cmpb $0, %c[launched](%0) \n\t"
++		/* Load guest registers.  Don't clobber flags. */
++		"mov %c[rax](%0), %%" _ASM_AX " \n\t"
++		"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
++		"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
++		"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
++		"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
++		"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
++#ifdef CONFIG_X86_64
++		"mov %c[r8](%0),  %%r8  \n\t"
++		"mov %c[r9](%0),  %%r9  \n\t"
++		"mov %c[r10](%0), %%r10 \n\t"
++		"mov %c[r11](%0), %%r11 \n\t"
++		"mov %c[r12](%0), %%r12 \n\t"
++		"mov %c[r13](%0), %%r13 \n\t"
++		"mov %c[r14](%0), %%r14 \n\t"
++		"mov %c[r15](%0), %%r15 \n\t"
++#endif
++		"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
++
++		/* Enter guest mode */
++		"jne 1f \n\t"
++		__ex(ASM_VMX_VMLAUNCH) "\n\t"
++		"jmp 2f \n\t"
++		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
++		"2: "
++		/* Save guest registers, load host registers, keep flags */
++		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
++		"pop %0 \n\t"
++		"setbe %c[fail](%0)\n\t"
++		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
++		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
++		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
++		"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
++		"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
++		"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
++		"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
++#ifdef CONFIG_X86_64
++		"mov %%r8,  %c[r8](%0) \n\t"
++		"mov %%r9,  %c[r9](%0) \n\t"
++		"mov %%r10, %c[r10](%0) \n\t"
++		"mov %%r11, %c[r11](%0) \n\t"
++		"mov %%r12, %c[r12](%0) \n\t"
++		"mov %%r13, %c[r13](%0) \n\t"
++		"mov %%r14, %c[r14](%0) \n\t"
++		"mov %%r15, %c[r15](%0) \n\t"
++		"xor %%r8d,  %%r8d \n\t"
++		"xor %%r9d,  %%r9d \n\t"
++		"xor %%r10d, %%r10d \n\t"
++		"xor %%r11d, %%r11d \n\t"
++		"xor %%r12d, %%r12d \n\t"
++		"xor %%r13d, %%r13d \n\t"
++		"xor %%r14d, %%r14d \n\t"
++		"xor %%r15d, %%r15d \n\t"
++#endif
++		"mov %%cr2, %%" _ASM_AX "   \n\t"
++		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
++
++		"xor %%eax, %%eax \n\t"
++		"xor %%ebx, %%ebx \n\t"
++		"xor %%esi, %%esi \n\t"
++		"xor %%edi, %%edi \n\t"
++		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
++		".pushsection .rodata \n\t"
++		".global vmx_return \n\t"
++		"vmx_return: " _ASM_PTR " 2b \n\t"
++		".popsection"
++	      : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
++		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
++		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
++		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
++		[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
++		[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
++		[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
++		[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
++		[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
++		[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
++		[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
++#ifdef CONFIG_X86_64
++		[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
++		[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
++		[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
++		[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
++		[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
++		[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
++		[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
++		[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
++#endif
++		[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
++		[wordsize]"i"(sizeof(ulong))
++	      : "cc", "memory"
++#ifdef CONFIG_X86_64
++		, "rax", "rbx", "rdi"
++		, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
++#else
++		, "eax", "ebx", "edi"
++#endif
++	      );
++
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
++		vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
++
++	x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
++
++	/* Eliminate branch target predictions from guest mode */
++	vmexit_fill_RSB();
++
++	/* All fields are clean at this point */
++	if (static_branch_unlikely(&enable_evmcs))
++		current_evmcs->hv_clean_fields |=
++			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
++
++	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
++	if (vmx->host_debugctlmsr)
++		update_debugctlmsr(vmx->host_debugctlmsr);
++
++#ifndef CONFIG_X86_64
++	/*
++	 * The sysexit path does not restore ds/es, so we must set them to
++	 * a reasonable value ourselves.
++	 *
++	 * We can't defer this to vmx_prepare_switch_to_host() since that
++	 * function may be executed in interrupt context, which saves and
++	 * restore segments around it, nullifying its effect.
++	 */
++	loadsegment(ds, __USER_DS);
++	loadsegment(es, __USER_DS);
++#endif
++
++	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
++				  | (1 << VCPU_EXREG_RFLAGS)
++				  | (1 << VCPU_EXREG_PDPTR)
++				  | (1 << VCPU_EXREG_SEGMENTS)
++				  | (1 << VCPU_EXREG_CR3));
++	vcpu->arch.regs_dirty = 0;
++
++	/*
++	 * eager fpu is enabled if PKEY is supported and CR4 is switched
++	 * back on host, so it is safe to read guest PKRU from current
++	 * XSAVE.
++	 */
++	if (static_cpu_has(X86_FEATURE_PKU) &&
++	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
++		vcpu->arch.pkru = __read_pkru();
++		if (vcpu->arch.pkru != vmx->host_pkru)
++			__write_pkru(vmx->host_pkru);
++	}
++
++	kvm_put_guest_xcr0(vcpu);
++
++	vmx->nested.nested_run_pending = 0;
++	vmx->idt_vectoring_info = 0;
++
++	vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
++	if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
++		kvm_machine_check();
++
++	if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
++		return;
++
++	vmx->loaded_vmcs->launched = 1;
++	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
++
++	vmx_complete_atomic_exit(vmx);
++	vmx_recover_nmi_blocking(vmx);
++	vmx_complete_interrupts(vmx);
++}
++STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
++
++static struct kvm *vmx_vm_alloc(void)
++{
++	struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
++	return &kvm_vmx->kvm;
++}
++
++static void vmx_vm_free(struct kvm *kvm)
++{
++	vfree(to_kvm_vmx(kvm));
++}
++
++static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int cpu;
++
++	if (vmx->loaded_vmcs == vmcs)
++		return;
++
++	cpu = get_cpu();
++	vmx_vcpu_put(vcpu);
++	vmx->loaded_vmcs = vmcs;
++	vmx_vcpu_load(vcpu, cpu);
++	put_cpu();
++}
++
++/*
++ * Ensure that the current vmcs of the logical processor is the
++ * vmcs01 of the vcpu before calling free_nested().
++ */
++static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
++{
++       struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++       vcpu_load(vcpu);
++       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
++       free_nested(vmx);
++       vcpu_put(vcpu);
++}
++
++static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (enable_pml)
++		vmx_destroy_pml_buffer(vmx);
++	free_vpid(vmx->vpid);
++	leave_guest_mode(vcpu);
++	vmx_free_vcpu_nested(vcpu);
++	free_loaded_vmcs(vmx->loaded_vmcs);
++	kfree(vmx->guest_msrs);
++	kvm_vcpu_uninit(vcpu);
++	kmem_cache_free(kvm_vcpu_cache, vmx);
++}
++
++static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
++{
++	int err;
++	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
++	unsigned long *msr_bitmap;
++	int cpu;
++
++	if (!vmx)
++		return ERR_PTR(-ENOMEM);
++
++	vmx->vpid = allocate_vpid();
++
++	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
++	if (err)
++		goto free_vcpu;
++
++	err = -ENOMEM;
++
++	/*
++	 * If PML is turned on, failure on enabling PML just results in failure
++	 * of creating the vcpu, therefore we can simplify PML logic (by
++	 * avoiding dealing with cases, such as enabling PML partially on vcpus
++	 * for the guest, etc.
++	 */
++	if (enable_pml) {
++		vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
++		if (!vmx->pml_pg)
++			goto uninit_vcpu;
++	}
++
++	vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
++		     > PAGE_SIZE);
++
++	if (!vmx->guest_msrs)
++		goto free_pml;
++
++	err = alloc_loaded_vmcs(&vmx->vmcs01);
++	if (err < 0)
++		goto free_msrs;
++
++	msr_bitmap = vmx->vmcs01.msr_bitmap;
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
++	vmx->msr_bitmap_mode = 0;
++
++	vmx->loaded_vmcs = &vmx->vmcs01;
++	cpu = get_cpu();
++	vmx_vcpu_load(&vmx->vcpu, cpu);
++	vmx->vcpu.cpu = cpu;
++	vmx_vcpu_setup(vmx);
++	vmx_vcpu_put(&vmx->vcpu);
++	put_cpu();
++	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
++		err = alloc_apic_access_page(kvm);
++		if (err)
++			goto free_vmcs;
++	}
++
++	if (enable_ept && !enable_unrestricted_guest) {
++		err = init_rmode_identity_map(kvm);
++		if (err)
++			goto free_vmcs;
++	}
++
++	if (nested)
++		nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
++					   kvm_vcpu_apicv_active(&vmx->vcpu));
++
++	vmx->nested.posted_intr_nv = -1;
++	vmx->nested.current_vmptr = -1ull;
++
++	vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
++
++	/*
++	 * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
++	 * or POSTED_INTR_WAKEUP_VECTOR.
++	 */
++	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
++	vmx->pi_desc.sn = 1;
++
++	return &vmx->vcpu;
++
++free_vmcs:
++	free_loaded_vmcs(vmx->loaded_vmcs);
++free_msrs:
++	kfree(vmx->guest_msrs);
++free_pml:
++	vmx_destroy_pml_buffer(vmx);
++uninit_vcpu:
++	kvm_vcpu_uninit(&vmx->vcpu);
++free_vcpu:
++	free_vpid(vmx->vpid);
++	kmem_cache_free(kvm_vcpu_cache, vmx);
++	return ERR_PTR(err);
++}
++
++#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
++#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
++
++static int vmx_vm_init(struct kvm *kvm)
++{
++	spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
++
++	if (!ple_gap)
++		kvm->arch.pause_in_guest = true;
++
++	if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
++		switch (l1tf_mitigation) {
++		case L1TF_MITIGATION_OFF:
++		case L1TF_MITIGATION_FLUSH_NOWARN:
++			/* 'I explicitly don't care' is set */
++			break;
++		case L1TF_MITIGATION_FLUSH:
++		case L1TF_MITIGATION_FLUSH_NOSMT:
++		case L1TF_MITIGATION_FULL:
++			/*
++			 * Warn upon starting the first VM in a potentially
++			 * insecure environment.
++			 */
++			if (sched_smt_active())
++				pr_warn_once(L1TF_MSG_SMT);
++			if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
++				pr_warn_once(L1TF_MSG_L1D);
++			break;
++		case L1TF_MITIGATION_FULL_FORCE:
++			/* Flush is enforced */
++			break;
++		}
++	}
++	return 0;
++}
++
++static void __init vmx_check_processor_compat(void *rtn)
++{
++	struct vmcs_config vmcs_conf;
++
++	*(int *)rtn = 0;
++	if (setup_vmcs_config(&vmcs_conf) < 0)
++		*(int *)rtn = -EIO;
++	nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
++	if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
++		printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
++				smp_processor_id());
++		*(int *)rtn = -EIO;
++	}
++}
++
++static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
++{
++	u8 cache;
++	u64 ipat = 0;
++
++	/* For VT-d and EPT combination
++	 * 1. MMIO: always map as UC
++	 * 2. EPT with VT-d:
++	 *   a. VT-d without snooping control feature: can't guarantee the
++	 *	result, try to trust guest.
++	 *   b. VT-d with snooping control feature: snooping control feature of
++	 *	VT-d engine can guarantee the cache correctness. Just set it
++	 *	to WB to keep consistent with host. So the same as item 3.
++	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
++	 *    consistent with host MTRR
++	 */
++	if (is_mmio) {
++		cache = MTRR_TYPE_UNCACHABLE;
++		goto exit;
++	}
++
++	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
++		ipat = VMX_EPT_IPAT_BIT;
++		cache = MTRR_TYPE_WRBACK;
++		goto exit;
++	}
++
++	if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
++		ipat = VMX_EPT_IPAT_BIT;
++		if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
++			cache = MTRR_TYPE_WRBACK;
++		else
++			cache = MTRR_TYPE_UNCACHABLE;
++		goto exit;
++	}
++
++	cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
++
++exit:
++	return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
++}
++
++static int vmx_get_lpage_level(void)
++{
++	if (enable_ept && !cpu_has_vmx_ept_1g_page())
++		return PT_DIRECTORY_LEVEL;
++	else
++		/* For shadow and EPT supported 1GB page */
++		return PT_PDPE_LEVEL;
++}
++
++static void vmcs_set_secondary_exec_control(u32 new_ctl)
++{
++	/*
++	 * These bits in the secondary execution controls field
++	 * are dynamic, the others are mostly based on the hypervisor
++	 * architecture and the guest's CPUID.  Do not touch the
++	 * dynamic bits.
++	 */
++	u32 mask =
++		SECONDARY_EXEC_SHADOW_VMCS |
++		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
++		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++		SECONDARY_EXEC_DESC;
++
++	u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
++
++	vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
++		     (new_ctl & ~mask) | (cur_ctl & mask));
++}
++
++/*
++ * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
++ * (indicating "allowed-1") if they are supported in the guest's CPUID.
++ */
++static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct kvm_cpuid_entry2 *entry;
++
++	vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
++	vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
++
++#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {		\
++	if (entry && (entry->_reg & (_cpuid_mask)))			\
++		vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);	\
++} while (0)
++
++	entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
++	cr4_fixed1_update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
++	cr4_fixed1_update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
++	cr4_fixed1_update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
++	cr4_fixed1_update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
++	cr4_fixed1_update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
++	cr4_fixed1_update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
++	cr4_fixed1_update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
++	cr4_fixed1_update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
++	cr4_fixed1_update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
++	cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
++	cr4_fixed1_update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
++	cr4_fixed1_update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
++	cr4_fixed1_update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
++	cr4_fixed1_update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
++
++	entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
++	cr4_fixed1_update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
++	cr4_fixed1_update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
++	cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
++	cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
++	cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP));
++
++#undef cr4_fixed1_update
++}
++
++static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (kvm_mpx_supported()) {
++		bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
++
++		if (mpx_enabled) {
++			vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
++			vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
++		} else {
++			vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
++			vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
++		}
++	}
++}
++
++static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (cpu_has_secondary_exec_ctrls()) {
++		vmx_compute_secondary_exec_control(vmx);
++		vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
++	}
++
++	if (nested_vmx_allowed(vcpu))
++		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
++			FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++	else
++		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
++			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++
++	if (nested_vmx_allowed(vcpu)) {
++		nested_vmx_cr_fixed1_bits_update(vcpu);
++		nested_vmx_entry_exit_ctls_update(vcpu);
++	}
++}
++
++static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
++{
++	if (func == 1 && nested)
++		entry->ecx |= bit(X86_FEATURE_VMX);
++}
++
++static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
++		struct x86_exception *fault)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 exit_reason;
++	unsigned long exit_qualification = vcpu->arch.exit_qualification;
++
++	if (vmx->nested.pml_full) {
++		exit_reason = EXIT_REASON_PML_FULL;
++		vmx->nested.pml_full = false;
++		exit_qualification &= INTR_INFO_UNBLOCK_NMI;
++	} else if (fault->error_code & PFERR_RSVD_MASK)
++		exit_reason = EXIT_REASON_EPT_MISCONFIG;
++	else
++		exit_reason = EXIT_REASON_EPT_VIOLATION;
++
++	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
++	vmcs12->guest_physical_address = fault->address;
++}
++
++static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
++{
++	return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
++}
++
++/* Callbacks for nested_ept_init_mmu_context: */
++
++static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
++{
++	/* return the page table to be shadowed - in our case, EPT12 */
++	return get_vmcs12(vcpu)->ept_pointer;
++}
++
++static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
++{
++	WARN_ON(mmu_is_nested(vcpu));
++	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
++		return 1;
++
++	kvm_init_shadow_ept_mmu(vcpu,
++			to_vmx(vcpu)->nested.msrs.ept_caps &
++			VMX_EPT_EXECUTE_ONLY_BIT,
++			nested_ept_ad_enabled(vcpu),
++			nested_ept_get_cr3(vcpu));
++	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
++	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
++	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
++
++	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
++	return 0;
++}
++
++static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
++}
++
++static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
++					    u16 error_code)
++{
++	bool inequality, bit;
++
++	bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
++	inequality =
++		(error_code & vmcs12->page_fault_error_code_mask) !=
++		 vmcs12->page_fault_error_code_match;
++	return inequality ^ bit;
++}
++
++static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
++		struct x86_exception *fault)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	WARN_ON(!is_guest_mode(vcpu));
++
++	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
++		!to_vmx(vcpu)->nested.nested_run_pending) {
++		vmcs12->vm_exit_intr_error_code = fault->error_code;
++		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
++				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
++				  INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
++				  fault->address);
++	} else {
++		kvm_inject_page_fault(vcpu, fault);
++	}
++}
++
++static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
++						 struct vmcs12 *vmcs12);
++
++static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct page *page;
++	u64 hpa;
++
++	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
++		/*
++		 * Translate L1 physical address to host physical
++		 * address for vmcs02. Keep the page pinned, so this
++		 * physical address remains valid. We keep a reference
++		 * to it so we can release it later.
++		 */
++		if (vmx->nested.apic_access_page) { /* shouldn't happen */
++			kvm_release_page_dirty(vmx->nested.apic_access_page);
++			vmx->nested.apic_access_page = NULL;
++		}
++		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
++		/*
++		 * If translation failed, no matter: This feature asks
++		 * to exit when accessing the given address, and if it
++		 * can never be accessed, this feature won't do
++		 * anything anyway.
++		 */
++		if (!is_error_page(page)) {
++			vmx->nested.apic_access_page = page;
++			hpa = page_to_phys(vmx->nested.apic_access_page);
++			vmcs_write64(APIC_ACCESS_ADDR, hpa);
++		} else {
++			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
++					SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
++		}
++	}
++
++	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
++		if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
++			kvm_release_page_dirty(vmx->nested.virtual_apic_page);
++			vmx->nested.virtual_apic_page = NULL;
++		}
++		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
++
++		/*
++		 * If translation failed, VM entry will fail because
++		 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
++		 * Failing the vm entry is _not_ what the processor
++		 * does but it's basically the only possibility we
++		 * have.  We could still enter the guest if CR8 load
++		 * exits are enabled, CR8 store exits are enabled, and
++		 * virtualize APIC access is disabled; in this case
++		 * the processor would never use the TPR shadow and we
++		 * could simply clear the bit from the execution
++		 * control.  But such a configuration is useless, so
++		 * let's keep the code simple.
++		 */
++		if (!is_error_page(page)) {
++			vmx->nested.virtual_apic_page = page;
++			hpa = page_to_phys(vmx->nested.virtual_apic_page);
++			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
++		}
++	}
++
++	if (nested_cpu_has_posted_intr(vmcs12)) {
++		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
++			kunmap(vmx->nested.pi_desc_page);
++			kvm_release_page_dirty(vmx->nested.pi_desc_page);
++			vmx->nested.pi_desc_page = NULL;
++			vmx->nested.pi_desc = NULL;
++			vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
++		}
++		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
++		if (is_error_page(page))
++			return;
++		vmx->nested.pi_desc_page = page;
++		vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
++		vmx->nested.pi_desc =
++			(struct pi_desc *)((void *)vmx->nested.pi_desc +
++			(unsigned long)(vmcs12->posted_intr_desc_addr &
++			(PAGE_SIZE - 1)));
++		vmcs_write64(POSTED_INTR_DESC_ADDR,
++			page_to_phys(vmx->nested.pi_desc_page) +
++			(unsigned long)(vmcs12->posted_intr_desc_addr &
++			(PAGE_SIZE - 1)));
++	}
++	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
++		vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
++			      CPU_BASED_USE_MSR_BITMAPS);
++	else
++		vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
++				CPU_BASED_USE_MSR_BITMAPS);
++}
++
++static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
++{
++	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	/*
++	 * A timer value of zero is architecturally guaranteed to cause
++	 * a VMExit prior to executing any instructions in the guest.
++	 */
++	if (preemption_timeout == 0) {
++		vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
++		return;
++	}
++
++	if (vcpu->arch.virtual_tsc_khz == 0)
++		return;
++
++	preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
++	preemption_timeout *= 1000000;
++	do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
++	hrtimer_start(&vmx->nested.preemption_timer,
++		      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
++}
++
++static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
++					       struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
++		return 0;
++
++	if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
++	    !page_address_valid(vcpu, vmcs12->io_bitmap_b))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
++						struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
++		return 0;
++
++	if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
++						struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
++		return 0;
++
++	if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
++		return -EINVAL;
++
++	return 0;
++}
++
++static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
++	int msr;
++
++	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++		unsigned word = msr / BITS_PER_LONG;
++
++		msr_bitmap[word] = ~0;
++		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
++	}
++}
++
++/*
++ * Merge L0's and L1's MSR bitmap, return false to indicate that
++ * we do not use the hardware.
++ */
++static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
++						 struct vmcs12 *vmcs12)
++{
++	int msr;
++	struct page *page;
++	unsigned long *msr_bitmap_l1;
++	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
++	/*
++	 * pred_cmd & spec_ctrl are trying to verify two things:
++	 *
++	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
++	 *    ensures that we do not accidentally generate an L02 MSR bitmap
++	 *    from the L12 MSR bitmap that is too permissive.
++	 * 2. That L1 or L2s have actually used the MSR. This avoids
++	 *    unnecessarily merging of the bitmap if the MSR is unused. This
++	 *    works properly because we only update the L01 MSR bitmap lazily.
++	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
++	 *    updated to reflect this when L1 (or its L2s) actually write to
++	 *    the MSR.
++	 */
++	bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
++	bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
++
++	/* Nothing to do if the MSR bitmap is not in use.  */
++	if (!cpu_has_vmx_msr_bitmap() ||
++	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
++		return false;
++
++	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++	    !pred_cmd && !spec_ctrl)
++		return false;
++
++	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
++	if (is_error_page(page))
++		return false;
++
++	msr_bitmap_l1 = (unsigned long *)kmap(page);
++
++	/*
++	 * To keep the control flow simple, pay eight 8-byte writes (sixteen
++	 * 4-byte writes on 32-bit systems) up front to enable intercepts for
++	 * the x2APIC MSR range and selectively disable them below.
++	 */
++	enable_x2apic_msr_intercepts(msr_bitmap_l0);
++
++	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
++		if (nested_cpu_has_apic_reg_virt(vmcs12)) {
++			/*
++			 * L0 need not intercept reads for MSRs between 0x800
++			 * and 0x8ff, it just lets the processor take the value
++			 * from the virtual-APIC page; take those 256 bits
++			 * directly from the L1 bitmap.
++			 */
++			for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++				unsigned word = msr / BITS_PER_LONG;
++
++				msr_bitmap_l0[word] = msr_bitmap_l1[word];
++			}
++		}
++
++		nested_vmx_disable_intercept_for_msr(
++			msr_bitmap_l1, msr_bitmap_l0,
++			X2APIC_MSR(APIC_TASKPRI),
++			MSR_TYPE_R | MSR_TYPE_W);
++
++		if (nested_cpu_has_vid(vmcs12)) {
++			nested_vmx_disable_intercept_for_msr(
++				msr_bitmap_l1, msr_bitmap_l0,
++				X2APIC_MSR(APIC_EOI),
++				MSR_TYPE_W);
++			nested_vmx_disable_intercept_for_msr(
++				msr_bitmap_l1, msr_bitmap_l0,
++				X2APIC_MSR(APIC_SELF_IPI),
++				MSR_TYPE_W);
++		}
++	}
++
++	if (spec_ctrl)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_SPEC_CTRL,
++					MSR_TYPE_R | MSR_TYPE_W);
++
++	if (pred_cmd)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_PRED_CMD,
++					MSR_TYPE_W);
++
++	kunmap(page);
++	kvm_release_page_clean(page);
++
++	return true;
++}
++
++static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
++				       struct vmcs12 *vmcs12)
++{
++	struct vmcs12 *shadow;
++	struct page *page;
++
++	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
++	    vmcs12->vmcs_link_pointer == -1ull)
++		return;
++
++	shadow = get_shadow_vmcs12(vcpu);
++	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
++
++	memcpy(shadow, kmap(page), VMCS12_SIZE);
++
++	kunmap(page);
++	kvm_release_page_clean(page);
++}
++
++static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
++					      struct vmcs12 *vmcs12)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
++	    vmcs12->vmcs_link_pointer == -1ull)
++		return;
++
++	kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
++			get_shadow_vmcs12(vcpu), VMCS12_SIZE);
++}
++
++static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
++					  struct vmcs12 *vmcs12)
++{
++	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
++	    !page_address_valid(vcpu, vmcs12->apic_access_addr))
++		return -EINVAL;
++	else
++		return 0;
++}
++
++static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
++					   struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++	    !nested_cpu_has_apic_reg_virt(vmcs12) &&
++	    !nested_cpu_has_vid(vmcs12) &&
++	    !nested_cpu_has_posted_intr(vmcs12))
++		return 0;
++
++	/*
++	 * If virtualize x2apic mode is enabled,
++	 * virtualize apic access must be disabled.
++	 */
++	if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
++		return -EINVAL;
++
++	/*
++	 * If virtual interrupt delivery is enabled,
++	 * we must exit on external interrupts.
++	 */
++	if (nested_cpu_has_vid(vmcs12) &&
++	   !nested_exit_on_intr(vcpu))
++		return -EINVAL;
++
++	/*
++	 * bits 15:8 should be zero in posted_intr_nv,
++	 * the descriptor address has been already checked
++	 * in nested_get_vmcs12_pages.
++	 *
++	 * bits 5:0 of posted_intr_desc_addr should be zero.
++	 */
++	if (nested_cpu_has_posted_intr(vmcs12) &&
++	   (!nested_cpu_has_vid(vmcs12) ||
++	    !nested_exit_intr_ack_set(vcpu) ||
++	    (vmcs12->posted_intr_nv & 0xff00) ||
++	    (vmcs12->posted_intr_desc_addr & 0x3f) ||
++	    (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
++		return -EINVAL;
++
++	/* tpr shadow is needed by all apicv features. */
++	if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
++				       unsigned long count_field,
++				       unsigned long addr_field)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	int maxphyaddr;
++	u64 count, addr;
++
++	if (vmcs12_read_any(vmcs12, count_field, &count) ||
++	    vmcs12_read_any(vmcs12, addr_field, &addr)) {
++		WARN_ON(1);
++		return -EINVAL;
++	}
++	if (count == 0)
++		return 0;
++	maxphyaddr = cpuid_maxphyaddr(vcpu);
++	if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
++	    (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
++		pr_debug_ratelimited(
++			"nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
++			addr_field, maxphyaddr, count, addr);
++		return -EINVAL;
++	}
++	return 0;
++}
++
++static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
++						struct vmcs12 *vmcs12)
++{
++	if (vmcs12->vm_exit_msr_load_count == 0 &&
++	    vmcs12->vm_exit_msr_store_count == 0 &&
++	    vmcs12->vm_entry_msr_load_count == 0)
++		return 0; /* Fast path */
++	if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
++					VM_EXIT_MSR_LOAD_ADDR) ||
++	    nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
++					VM_EXIT_MSR_STORE_ADDR) ||
++	    nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
++					VM_ENTRY_MSR_LOAD_ADDR))
++		return -EINVAL;
++	return 0;
++}
++
++static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
++					 struct vmcs12 *vmcs12)
++{
++	u64 address = vmcs12->pml_address;
++	int maxphyaddr = cpuid_maxphyaddr(vcpu);
++
++	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
++		if (!nested_cpu_has_ept(vmcs12) ||
++		    !IS_ALIGNED(address, 4096)  ||
++		    address >> maxphyaddr)
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
++						 struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has_shadow_vmcs(vmcs12))
++		return 0;
++
++	if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
++	    !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
++				       struct vmx_msr_entry *e)
++{
++	/* x2APIC MSR accesses are not allowed */
++	if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
++		return -EINVAL;
++	if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */
++	    e->index == MSR_IA32_UCODE_REV)
++		return -EINVAL;
++	if (e->reserved != 0)
++		return -EINVAL;
++	return 0;
++}
++
++static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
++				     struct vmx_msr_entry *e)
++{
++	if (e->index == MSR_FS_BASE ||
++	    e->index == MSR_GS_BASE ||
++	    e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */
++	    nested_vmx_msr_check_common(vcpu, e))
++		return -EINVAL;
++	return 0;
++}
++
++static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
++				      struct vmx_msr_entry *e)
++{
++	if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */
++	    nested_vmx_msr_check_common(vcpu, e))
++		return -EINVAL;
++	return 0;
++}
++
++/*
++ * Load guest's/host's msr at nested entry/exit.
++ * return 0 for success, entry index for failure.
++ */
++static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
++{
++	u32 i;
++	struct vmx_msr_entry e;
++	struct msr_data msr;
++
++	msr.host_initiated = false;
++	for (i = 0; i < count; i++) {
++		if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
++					&e, sizeof(e))) {
++			pr_debug_ratelimited(
++				"%s cannot read MSR entry (%u, 0x%08llx)\n",
++				__func__, i, gpa + i * sizeof(e));
++			goto fail;
++		}
++		if (nested_vmx_load_msr_check(vcpu, &e)) {
++			pr_debug_ratelimited(
++				"%s check failed (%u, 0x%x, 0x%x)\n",
++				__func__, i, e.index, e.reserved);
++			goto fail;
++		}
++		msr.index = e.index;
++		msr.data = e.value;
++		if (kvm_set_msr(vcpu, &msr)) {
++			pr_debug_ratelimited(
++				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
++				__func__, i, e.index, e.value);
++			goto fail;
++		}
++	}
++	return 0;
++fail:
++	return i + 1;
++}
++
++static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
++{
++	u32 i;
++	struct vmx_msr_entry e;
++
++	for (i = 0; i < count; i++) {
++		struct msr_data msr_info;
++		if (kvm_vcpu_read_guest(vcpu,
++					gpa + i * sizeof(e),
++					&e, 2 * sizeof(u32))) {
++			pr_debug_ratelimited(
++				"%s cannot read MSR entry (%u, 0x%08llx)\n",
++				__func__, i, gpa + i * sizeof(e));
++			return -EINVAL;
++		}
++		if (nested_vmx_store_msr_check(vcpu, &e)) {
++			pr_debug_ratelimited(
++				"%s check failed (%u, 0x%x, 0x%x)\n",
++				__func__, i, e.index, e.reserved);
++			return -EINVAL;
++		}
++		msr_info.host_initiated = false;
++		msr_info.index = e.index;
++		if (kvm_get_msr(vcpu, &msr_info)) {
++			pr_debug_ratelimited(
++				"%s cannot read MSR (%u, 0x%x)\n",
++				__func__, i, e.index);
++			return -EINVAL;
++		}
++		if (kvm_vcpu_write_guest(vcpu,
++					 gpa + i * sizeof(e) +
++					     offsetof(struct vmx_msr_entry, value),
++					 &msr_info.data, sizeof(msr_info.data))) {
++			pr_debug_ratelimited(
++				"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
++				__func__, i, e.index, msr_info.data);
++			return -EINVAL;
++		}
++	}
++	return 0;
++}
++
++static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
++{
++	unsigned long invalid_mask;
++
++	invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
++	return (val & invalid_mask) == 0;
++}
++
++/*
++ * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
++ * emulating VM entry into a guest with EPT enabled.
++ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
++ * is assigned to entry_failure_code on failure.
++ */
++static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
++			       u32 *entry_failure_code)
++{
++	if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
++		if (!nested_cr3_valid(vcpu, cr3)) {
++			*entry_failure_code = ENTRY_FAIL_DEFAULT;
++			return 1;
++		}
++
++		/*
++		 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
++		 * must not be dereferenced.
++		 */
++		if (is_pae_paging(vcpu) && !nested_ept) {
++			if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
++				*entry_failure_code = ENTRY_FAIL_PDPTE;
++				return 1;
++			}
++		}
++	}
++
++	if (!nested_ept)
++		kvm_mmu_new_cr3(vcpu, cr3, false);
++
++	vcpu->arch.cr3 = cr3;
++	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
++
++	kvm_init_mmu(vcpu, false);
++
++	return 0;
++}
++
++static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
++	vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
++	vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
++	vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
++	vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
++	vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
++	vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
++	vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
++	vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
++	vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
++	vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
++	vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
++	vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
++	vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
++	vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
++	vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
++	vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
++	vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
++	vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
++	vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
++	vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
++	vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
++	vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
++	vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
++	vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
++	vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
++	vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
++	vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
++	vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
++	vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
++	vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
++
++	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
++	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
++		vmcs12->guest_pending_dbg_exceptions);
++	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
++	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
++
++	if (nested_cpu_has_xsaves(vmcs12))
++		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
++	vmcs_write64(VMCS_LINK_POINTER, -1ull);
++
++	if (cpu_has_vmx_posted_intr())
++		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
++
++	/*
++	 * Whether page-faults are trapped is determined by a combination of
++	 * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
++	 * If enable_ept, L0 doesn't care about page faults and we should
++	 * set all of these to L1's desires. However, if !enable_ept, L0 does
++	 * care about (at least some) page faults, and because it is not easy
++	 * (if at all possible?) to merge L0 and L1's desires, we simply ask
++	 * to exit on each and every L2 page fault. This is done by setting
++	 * MASK=MATCH=0 and (see below) EB.PF=1.
++	 * Note that below we don't need special code to set EB.PF beyond the
++	 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
++	 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
++	 * !enable_ept, EB.PF is 1, so the "or" will always be 1.
++	 */
++	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
++		enable_ept ? vmcs12->page_fault_error_code_mask : 0);
++	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
++		enable_ept ? vmcs12->page_fault_error_code_match : 0);
++
++	/* All VMFUNCs are currently emulated through L0 vmexits.  */
++	if (cpu_has_vmx_vmfunc())
++		vmcs_write64(VM_FUNCTION_CONTROL, 0);
++
++	if (cpu_has_vmx_apicv()) {
++		vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
++		vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
++		vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
++		vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
++	}
++
++	/*
++	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
++	 * Some constant fields are set here by vmx_set_constant_host_state().
++	 * Other fields are different per CPU, and will be set later when
++	 * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
++	 * is called.
++	 */
++	vmx_set_constant_host_state(vmx);
++
++	/*
++	 * Set the MSR load/store lists to match L0's settings.
++	 */
++	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
++	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
++	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
++	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
++	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
++
++	set_cr4_guest_host_mask(vmx);
++
++	if (kvm_mpx_supported()) {
++		if (vmx->nested.nested_run_pending &&
++			(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
++			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
++		else
++			vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
++	}
++
++	if (enable_vpid) {
++		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
++			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
++		else
++			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
++	}
++
++	/*
++	 * L1 may access the L2's PDPTR, so save them to construct vmcs12
++	 */
++	if (enable_ept) {
++		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
++		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
++		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
++		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
++	}
++
++	if (cpu_has_vmx_msr_bitmap())
++		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
++}
++
++/*
++ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
++ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
++ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
++ * guest in a way that will both be appropriate to L1's requests, and our
++ * needs. In addition to modifying the active vmcs (which is vmcs02), this
++ * function also has additional necessary side-effects, like setting various
++ * vcpu->arch fields.
++ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
++ * is assigned to entry_failure_code on failure.
++ */
++static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
++			  u32 *entry_failure_code)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 exec_control, vmcs12_exec_ctrl;
++
++	if (vmx->nested.dirty_vmcs12) {
++		prepare_vmcs02_full(vcpu, vmcs12);
++		vmx->nested.dirty_vmcs12 = false;
++	}
++
++	/*
++	 * First, the fields that are shadowed.  This must be kept in sync
++	 * with vmx_shadow_fields.h.
++	 */
++
++	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
++	vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
++	vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
++	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
++	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
++
++	if (vmx->nested.nested_run_pending &&
++	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
++		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
++		vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
++	} else {
++		kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
++		vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
++	}
++	if (vmx->nested.nested_run_pending) {
++		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
++			     vmcs12->vm_entry_intr_info_field);
++		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
++			     vmcs12->vm_entry_exception_error_code);
++		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
++			     vmcs12->vm_entry_instruction_len);
++		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
++			     vmcs12->guest_interruptibility_info);
++		vmx->loaded_vmcs->nmi_known_unmasked =
++			!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
++	} else {
++		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
++	}
++	vmx_set_rflags(vcpu, vmcs12->guest_rflags);
++
++	exec_control = vmcs12->pin_based_vm_exec_control;
++
++	/* Preemption timer setting is computed directly in vmx_vcpu_run.  */
++	exec_control |= vmcs_config.pin_based_exec_ctrl;
++	exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
++	vmx->loaded_vmcs->hv_timer_armed = false;
++
++	/* Posted interrupts setting is only taken from vmcs12.  */
++	if (nested_cpu_has_posted_intr(vmcs12)) {
++		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
++		vmx->nested.pi_pending = false;
++	} else {
++		exec_control &= ~PIN_BASED_POSTED_INTR;
++	}
++
++	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
++
++	vmx->nested.preemption_timer_expired = false;
++	if (nested_cpu_has_preemption_timer(vmcs12))
++		vmx_start_preemption_timer(vcpu);
++
++	if (cpu_has_secondary_exec_ctrls()) {
++		exec_control = vmx->secondary_exec_control;
++
++		/* Take the following fields only from vmcs12 */
++		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++				  SECONDARY_EXEC_ENABLE_INVPCID |
++				  SECONDARY_EXEC_RDTSCP |
++				  SECONDARY_EXEC_XSAVES |
++				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
++				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
++				  SECONDARY_EXEC_ENABLE_VMFUNC);
++		if (nested_cpu_has(vmcs12,
++				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
++			vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
++				~SECONDARY_EXEC_ENABLE_PML;
++			exec_control |= vmcs12_exec_ctrl;
++		}
++
++		/* VMCS shadowing for L2 is emulated for now */
++		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
++
++		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
++			vmcs_write16(GUEST_INTR_STATUS,
++				vmcs12->guest_intr_status);
++
++		/*
++		 * Write an illegal value to APIC_ACCESS_ADDR. Later,
++		 * nested_get_vmcs12_pages will either fix it up or
++		 * remove the VM execution control.
++		 */
++		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
++			vmcs_write64(APIC_ACCESS_ADDR, -1ull);
++
++		if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
++			vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
++
++		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
++	}
++
++	/*
++	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
++	 * entry, but only if the current (host) sp changed from the value
++	 * we wrote last (vmx->host_rsp). This cache is no longer relevant
++	 * if we switch vmcs, and rather than hold a separate cache per vmcs,
++	 * here we just force the write to happen on entry.
++	 */
++	vmx->host_rsp = 0;
++
++	exec_control = vmx_exec_control(vmx); /* L0's desires */
++	exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
++	exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
++	exec_control &= ~CPU_BASED_TPR_SHADOW;
++	exec_control |= vmcs12->cpu_based_vm_exec_control;
++
++	/*
++	 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
++	 * nested_get_vmcs12_pages can't fix it up, the illegal value
++	 * will result in a VM entry failure.
++	 */
++	if (exec_control & CPU_BASED_TPR_SHADOW) {
++		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
++		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
++	} else {
++#ifdef CONFIG_X86_64
++		exec_control |= CPU_BASED_CR8_LOAD_EXITING |
++				CPU_BASED_CR8_STORE_EXITING;
++#endif
++	}
++
++	/*
++	 * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
++	 * for I/O port accesses.
++	 */
++	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
++	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
++
++	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
++
++	/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
++	 * bitwise-or of what L1 wants to trap for L2, and what we want to
++	 * trap. Note that CR0.TS also needs updating - we do this later.
++	 */
++	update_exception_bitmap(vcpu);
++	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
++	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
++
++	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
++	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
++	 * bits are further modified by vmx_set_efer() below.
++	 */
++	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
++
++	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
++	 * emulated by vmx_set_efer(), below.
++	 */
++	vm_entry_controls_init(vmx, 
++		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
++			~VM_ENTRY_IA32E_MODE) |
++		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
++
++	if (vmx->nested.nested_run_pending &&
++	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
++		vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
++		vcpu->arch.pat = vmcs12->guest_ia32_pat;
++	} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
++		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
++	}
++
++	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
++
++	if (kvm_has_tsc_control)
++		decache_tsc_multiplier(vmx);
++
++	if (enable_vpid) {
++		/*
++		 * There is no direct mapping between vpid02 and vpid12, the
++		 * vpid02 is per-vCPU for L0 and reused while the value of
++		 * vpid12 is changed w/ one invvpid during nested vmentry.
++		 * The vpid12 is allocated by L1 for L2, so it will not
++		 * influence global bitmap(for vpid01 and vpid02 allocation)
++		 * even if spawn a lot of nested vCPUs.
++		 */
++		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
++			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
++				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
++				__vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
++			}
++		} else {
++			vmx_flush_tlb(vcpu, true);
++		}
++	}
++
++	if (enable_pml) {
++		/*
++		 * Conceptually we want to copy the PML address and index from
++		 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
++		 * since we always flush the log on each vmexit, this happens
++		 * to be equivalent to simply resetting the fields in vmcs02.
++		 */
++		ASSERT(vmx->pml_pg);
++		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
++		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
++	}
++
++	if (nested_cpu_has_ept(vmcs12)) {
++		if (nested_ept_init_mmu_context(vcpu)) {
++			*entry_failure_code = ENTRY_FAIL_DEFAULT;
++			return 1;
++		}
++	} else if (nested_cpu_has2(vmcs12,
++				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
++		vmx_flush_tlb(vcpu, true);
++	}
++
++	/*
++	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
++	 * bits which we consider mandatory enabled.
++	 * The CR0_READ_SHADOW is what L2 should have expected to read given
++	 * the specifications by L1; It's not enough to take
++	 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
++	 * have more bits than L1 expected.
++	 */
++	vmx_set_cr0(vcpu, vmcs12->guest_cr0);
++	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
++
++	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
++	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
++
++	if (vmx->nested.nested_run_pending &&
++	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
++		vcpu->arch.efer = vmcs12->guest_ia32_efer;
++	else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
++		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
++	else
++		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
++	/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
++	vmx_set_efer(vcpu, vcpu->arch.efer);
++
++	/*
++	 * Guest state is invalid and unrestricted guest is disabled,
++	 * which means L1 attempted VMEntry to L2 with invalid state.
++	 * Fail the VMEntry.
++	 */
++	if (vmx->emulation_required) {
++		*entry_failure_code = ENTRY_FAIL_DEFAULT;
++		return 1;
++	}
++
++	/* Shadow page tables on either EPT or shadow page tables. */
++	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
++				entry_failure_code))
++		return 1;
++
++	if (!enable_ept)
++		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
++
++	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
++	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
++	return 0;
++}
++
++static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
++{
++	if (!nested_cpu_has_nmi_exiting(vmcs12) &&
++	    nested_cpu_has_virtual_nmis(vmcs12))
++		return -EINVAL;
++
++	if (!nested_cpu_has_virtual_nmis(vmcs12) &&
++	    nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
++	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
++				vmx->nested.msrs.procbased_ctls_low,
++				vmx->nested.msrs.procbased_ctls_high) ||
++	    (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
++	     !vmx_control_verify(vmcs12->secondary_vm_exec_control,
++				 vmx->nested.msrs.secondary_ctls_low,
++				 vmx->nested.msrs.secondary_ctls_high)) ||
++	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
++				vmx->nested.msrs.pinbased_ctls_low,
++				vmx->nested.msrs.pinbased_ctls_high) ||
++	    !vmx_control_verify(vmcs12->vm_exit_controls,
++				vmx->nested.msrs.exit_ctls_low,
++				vmx->nested.msrs.exit_ctls_high) ||
++	    !vmx_control_verify(vmcs12->vm_entry_controls,
++				vmx->nested.msrs.entry_ctls_low,
++				vmx->nested.msrs.entry_ctls_high))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_vmx_check_nmi_controls(vmcs12))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (nested_cpu_has_vmfunc(vmcs12)) {
++		if (vmcs12->vm_function_control &
++		    ~vmx->nested.msrs.vmfunc_controls)
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		if (nested_cpu_has_eptp_switching(vmcs12)) {
++			if (!nested_cpu_has_ept(vmcs12) ||
++			    !page_address_valid(vcpu, vmcs12->eptp_list_address))
++				return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++		}
++	}
++
++	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
++		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++	if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
++	    !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
++	    !nested_cr3_valid(vcpu, vmcs12->host_cr3))
++		return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
++
++	/*
++	 * From the Intel SDM, volume 3:
++	 * Fields relevant to VM-entry event injection must be set properly.
++	 * These fields are the VM-entry interruption-information field, the
++	 * VM-entry exception error code, and the VM-entry instruction length.
++	 */
++	if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
++		u32 intr_info = vmcs12->vm_entry_intr_info_field;
++		u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
++		u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
++		bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
++		bool should_have_error_code;
++		bool urg = nested_cpu_has2(vmcs12,
++					   SECONDARY_EXEC_UNRESTRICTED_GUEST);
++		bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
++
++		/* VM-entry interruption-info field: interruption type */
++		if (intr_type == INTR_TYPE_RESERVED ||
++		    (intr_type == INTR_TYPE_OTHER_EVENT &&
++		     !nested_cpu_supports_monitor_trap_flag(vcpu)))
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		/* VM-entry interruption-info field: vector */
++		if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
++		    (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
++		    (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		/* VM-entry interruption-info field: deliver error code */
++		should_have_error_code =
++			intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
++			x86_exception_has_error_code(vector);
++		if (has_error_code != should_have_error_code)
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		/* VM-entry exception error code */
++		if (has_error_code &&
++		    vmcs12->vm_entry_exception_error_code & GENMASK(31, 16))
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		/* VM-entry interruption-info field: reserved bits */
++		if (intr_info & INTR_INFO_RESVD_BITS_MASK)
++			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++
++		/* VM-entry instruction length */
++		switch (intr_type) {
++		case INTR_TYPE_SOFT_EXCEPTION:
++		case INTR_TYPE_SOFT_INTR:
++		case INTR_TYPE_PRIV_SW_EXCEPTION:
++			if ((vmcs12->vm_entry_instruction_len > 15) ||
++			    (vmcs12->vm_entry_instruction_len == 0 &&
++			     !nested_cpu_has_zero_length_injection(vcpu)))
++				return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
++		}
++	}
++
++	return 0;
++}
++
++static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
++					  struct vmcs12 *vmcs12)
++{
++	int r;
++	struct page *page;
++	struct vmcs12 *shadow;
++
++	if (vmcs12->vmcs_link_pointer == -1ull)
++		return 0;
++
++	if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
++		return -EINVAL;
++
++	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
++	if (is_error_page(page))
++		return -EINVAL;
++
++	r = 0;
++	shadow = kmap(page);
++	if (shadow->hdr.revision_id != VMCS12_REVISION ||
++	    shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
++		r = -EINVAL;
++	kunmap(page);
++	kvm_release_page_clean(page);
++	return r;
++}
++
++static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
++				  u32 *exit_qual)
++{
++	bool ia32e;
++
++	*exit_qual = ENTRY_FAIL_DEFAULT;
++
++	if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
++	    !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
++		return 1;
++
++	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
++		*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
++		return 1;
++	}
++
++	/*
++	 * If the load IA32_EFER VM-entry control is 1, the following checks
++	 * are performed on the field for the IA32_EFER MSR:
++	 * - Bits reserved in the IA32_EFER MSR must be 0.
++	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
++	 *   the IA-32e mode guest VM-exit control. It must also be identical
++	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
++	 *   CR0.PG) is 1.
++	 */
++	if (to_vmx(vcpu)->nested.nested_run_pending &&
++	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
++		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
++		if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
++		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
++		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
++		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
++			return 1;
++	}
++
++	/*
++	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
++	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
++	 * the values of the LMA and LME bits in the field must each be that of
++	 * the host address-space size VM-exit control.
++	 */
++	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
++		ia32e = (vmcs12->vm_exit_controls &
++			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
++		if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
++		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
++		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
++			return 1;
++	}
++
++	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
++		(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
++		(vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
++			return 1;
++
++	return 0;
++}
++
++/*
++ * If exit_qual is NULL, this is being called from state restore (either RSM
++ * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
++ */
++static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	bool from_vmentry = !!exit_qual;
++	u32 dummy_exit_qual;
++	bool evaluate_pending_interrupts;
++	int r = 0;
++
++	evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
++		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
++	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
++		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
++
++	enter_guest_mode(vcpu);
++
++	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
++		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
++	if (kvm_mpx_supported() &&
++		!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
++		vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
++
++	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
++	vmx_segment_cache_clear(vmx);
++
++	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
++
++	r = EXIT_REASON_INVALID_STATE;
++	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
++		goto fail;
++
++	if (from_vmentry) {
++		nested_get_vmcs12_pages(vcpu);
++
++		r = EXIT_REASON_MSR_LOAD_FAIL;
++		*exit_qual = nested_vmx_load_msr(vcpu,
++	     					 vmcs12->vm_entry_msr_load_addr,
++					      	 vmcs12->vm_entry_msr_load_count);
++		if (*exit_qual)
++			goto fail;
++	} else {
++		/*
++		 * The MMU is not initialized to point at the right entities yet and
++		 * "get pages" would need to read data from the guest (i.e. we will
++		 * need to perform gpa to hpa translation). Request a call
++		 * to nested_get_vmcs12_pages before the next VM-entry.  The MSRs
++		 * have already been set at vmentry time and should not be reset.
++		 */
++		kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
++	}
++
++	/*
++	 * If L1 had a pending IRQ/NMI until it executed
++	 * VMLAUNCH/VMRESUME which wasn't delivered because it was
++	 * disallowed (e.g. interrupts disabled), L0 needs to
++	 * evaluate if this pending event should cause an exit from L2
++	 * to L1 or delivered directly to L2 (e.g. In case L1 don't
++	 * intercept EXTERNAL_INTERRUPT).
++	 *
++	 * Usually this would be handled by the processor noticing an
++	 * IRQ/NMI window request, or checking RVI during evaluation of
++	 * pending virtual interrupts.  However, this setting was done
++	 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
++	 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
++	 */
++	if (unlikely(evaluate_pending_interrupts))
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	/*
++	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
++	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
++	 * returned as far as L1 is concerned. It will only return (and set
++	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
++	 */
++	return 0;
++
++fail:
++	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
++	leave_guest_mode(vcpu);
++	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
++	return r;
++}
++
++/*
++ * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
++ * for running an L2 nested guest.
++ */
++static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
++{
++	struct vmcs12 *vmcs12;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
++	u32 exit_qual;
++	int ret;
++
++	if (!nested_vmx_check_permission(vcpu))
++		return 1;
++
++	if (!nested_vmx_check_vmcs12(vcpu))
++		goto out;
++
++	vmcs12 = get_vmcs12(vcpu);
++
++	/*
++	 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
++	 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
++	 * rather than RFLAGS.ZF, and no error number is stored to the
++	 * VM-instruction error field.
++	 */
++	if (vmcs12->hdr.shadow_vmcs) {
++		nested_vmx_failInvalid(vcpu);
++		goto out;
++	}
++
++	if (enable_shadow_vmcs)
++		copy_shadow_to_vmcs12(vmx);
++
++	/*
++	 * The nested entry process starts with enforcing various prerequisites
++	 * on vmcs12 as required by the Intel SDM, and act appropriately when
++	 * they fail: As the SDM explains, some conditions should cause the
++	 * instruction to fail, while others will cause the instruction to seem
++	 * to succeed, but return an EXIT_REASON_INVALID_STATE.
++	 * To speed up the normal (success) code path, we should avoid checking
++	 * for misconfigurations which will anyway be caught by the processor
++	 * when using the merged vmcs02.
++	 */
++	if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
++		nested_vmx_failValid(vcpu,
++				     VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
++		goto out;
++	}
++
++	if (vmcs12->launch_state == launch) {
++		nested_vmx_failValid(vcpu,
++			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
++			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
++		goto out;
++	}
++
++	ret = check_vmentry_prereqs(vcpu, vmcs12);
++	if (ret) {
++		nested_vmx_failValid(vcpu, ret);
++		goto out;
++	}
++
++	/*
++	 * After this point, the trap flag no longer triggers a singlestep trap
++	 * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
++	 * This is not 100% correct; for performance reasons, we delegate most
++	 * of the checks on host state to the processor.  If those fail,
++	 * the singlestep trap is missed.
++	 */
++	skip_emulated_instruction(vcpu);
++
++	ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
++	if (ret) {
++		nested_vmx_entry_failure(vcpu, vmcs12,
++					 EXIT_REASON_INVALID_STATE, exit_qual);
++		return 1;
++	}
++
++	/*
++	 * We're finally done with prerequisite checking, and can start with
++	 * the nested entry.
++	 */
++
++	vmx->nested.nested_run_pending = 1;
++	ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
++	if (ret) {
++		nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
++		vmx->nested.nested_run_pending = 0;
++		return 1;
++	}
++
++	/* Hide L1D cache contents from the nested guest.  */
++	vmx->vcpu.arch.l1tf_flush_l1d = true;
++
++	/*
++	 * Must happen outside of enter_vmx_non_root_mode() as it will
++	 * also be used as part of restoring nVMX state for
++	 * snapshot restore (migration).
++	 *
++	 * In this flow, it is assumed that vmcs12 cache was
++	 * trasferred as part of captured nVMX state and should
++	 * therefore not be read from guest memory (which may not
++	 * exist on destination host yet).
++	 */
++	nested_cache_shadow_vmcs12(vcpu, vmcs12);
++
++	/*
++	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be
++	 * awakened by event injection or by an NMI-window VM-exit or
++	 * by an interrupt-window VM-exit, halt the vcpu.
++	 */
++	if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
++	    !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
++	    !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) &&
++	    !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) &&
++	      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
++		vmx->nested.nested_run_pending = 0;
++		return kvm_vcpu_halt(vcpu);
++	}
++	return 1;
++
++out:
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++/*
++ * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
++ * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
++ * This function returns the new value we should put in vmcs12.guest_cr0.
++ * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
++ *  1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
++ *     available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
++ *     didn't trap the bit, because if L1 did, so would L0).
++ *  2. Bits that L1 asked to trap (and therefore L0 also did) could not have
++ *     been modified by L2, and L1 knows it. So just leave the old value of
++ *     the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
++ *     isn't relevant, because if L0 traps this bit it can set it to anything.
++ *  3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
++ *     changed these bits, and therefore they need to be updated, but L0
++ *     didn't necessarily allow them to be changed in GUEST_CR0 - and rather
++ *     put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
++ */
++static inline unsigned long
++vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
++{
++	return
++	/*1*/	(vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
++	/*2*/	(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
++	/*3*/	(vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
++			vcpu->arch.cr0_guest_owned_bits));
++}
++
++static inline unsigned long
++vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
++{
++	return
++	/*1*/	(vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
++	/*2*/	(vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
++	/*3*/	(vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
++			vcpu->arch.cr4_guest_owned_bits));
++}
++
++static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
++				       struct vmcs12 *vmcs12)
++{
++	u32 idt_vectoring;
++	unsigned int nr;
++
++	if (vcpu->arch.exception.injected) {
++		nr = vcpu->arch.exception.nr;
++		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
++
++		if (kvm_exception_is_soft(nr)) {
++			vmcs12->vm_exit_instruction_len =
++				vcpu->arch.event_exit_inst_len;
++			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
++		} else
++			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
++
++		if (vcpu->arch.exception.has_error_code) {
++			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
++			vmcs12->idt_vectoring_error_code =
++				vcpu->arch.exception.error_code;
++		}
++
++		vmcs12->idt_vectoring_info_field = idt_vectoring;
++	} else if (vcpu->arch.nmi_injected) {
++		vmcs12->idt_vectoring_info_field =
++			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
++	} else if (vcpu->arch.interrupt.injected) {
++		nr = vcpu->arch.interrupt.nr;
++		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
++
++		if (vcpu->arch.interrupt.soft) {
++			idt_vectoring |= INTR_TYPE_SOFT_INTR;
++			vmcs12->vm_entry_instruction_len =
++				vcpu->arch.event_exit_inst_len;
++		} else
++			idt_vectoring |= INTR_TYPE_EXT_INTR;
++
++		vmcs12->idt_vectoring_info_field = idt_vectoring;
++	}
++}
++
++static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long exit_qual;
++	bool block_nested_events =
++	    vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
++
++	if (vcpu->arch.exception.pending &&
++		nested_vmx_check_exception(vcpu, &exit_qual)) {
++		if (block_nested_events)
++			return -EBUSY;
++		nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
++		return 0;
++	}
++
++	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
++	    vmx->nested.preemption_timer_expired) {
++		if (block_nested_events)
++			return -EBUSY;
++		nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
++		return 0;
++	}
++
++	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
++		if (block_nested_events)
++			return -EBUSY;
++		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
++				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
++				  INTR_INFO_VALID_MASK, 0);
++		/*
++		 * The NMI-triggered VM exit counts as injection:
++		 * clear this one and block further NMIs.
++		 */
++		vcpu->arch.nmi_pending = 0;
++		vmx_set_nmi_mask(vcpu, true);
++		return 0;
++	}
++
++	if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
++	    nested_exit_on_intr(vcpu)) {
++		if (block_nested_events)
++			return -EBUSY;
++		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
++		return 0;
++	}
++
++	vmx_complete_nested_posted_interrupt(vcpu);
++	return 0;
++}
++
++static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
++{
++	to_vmx(vcpu)->req_immediate_exit = true;
++}
++
++static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
++{
++	ktime_t remaining =
++		hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
++	u64 value;
++
++	if (ktime_to_ns(remaining) <= 0)
++		return 0;
++
++	value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
++	do_div(value, 1000000);
++	return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
++}
++
++/*
++ * Update the guest state fields of vmcs12 to reflect changes that
++ * occurred while L2 was running. (The "IA-32e mode guest" bit of the
++ * VM-entry controls is also updated, since this is really a guest
++ * state bit.)
++ */
++static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
++{
++	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
++	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
++
++	vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
++	vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
++	vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
++
++	vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
++	vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
++	vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
++	vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
++	vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
++	vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
++	vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
++	vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
++	vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
++	vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
++	vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
++	vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
++	vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
++	vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
++	vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
++	vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
++	vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
++	vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
++	vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
++	vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
++	vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
++	vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
++	vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
++	vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
++	vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
++	vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
++	vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
++	vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
++	vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
++	vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
++	vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
++	vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
++	vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
++	vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
++	vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
++	vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
++
++	vmcs12->guest_interruptibility_info =
++		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
++	vmcs12->guest_pending_dbg_exceptions =
++		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
++	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
++		vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
++	else
++		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
++
++	if (nested_cpu_has_preemption_timer(vmcs12)) {
++		if (vmcs12->vm_exit_controls &
++		    VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
++			vmcs12->vmx_preemption_timer_value =
++				vmx_get_preemption_timer_value(vcpu);
++		hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
++	}
++
++	/*
++	 * In some cases (usually, nested EPT), L2 is allowed to change its
++	 * own CR3 without exiting. If it has changed it, we must keep it.
++	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
++	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
++	 *
++	 * Additionally, restore L2's PDPTR to vmcs12.
++	 */
++	if (enable_ept) {
++		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
++		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
++		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
++		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
++		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
++	}
++
++	vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
++
++	if (nested_cpu_has_vid(vmcs12))
++		vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
++
++	vmcs12->vm_entry_controls =
++		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
++		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
++
++	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
++		kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
++		vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
++	}
++
++	/* TODO: These cannot have changed unless we have MSR bitmaps and
++	 * the relevant bit asks not to trap the change */
++	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
++		vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
++	if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
++		vmcs12->guest_ia32_efer = vcpu->arch.efer;
++	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
++	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
++	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
++	if (kvm_mpx_supported())
++		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
++}
++
++/*
++ * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
++ * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
++ * and this function updates it to reflect the changes to the guest state while
++ * L2 was running (and perhaps made some exits which were handled directly by L0
++ * without going back to L1), and to reflect the exit reason.
++ * Note that we do not have to copy here all VMCS fields, just those that
++ * could have changed by the L2 guest or the exit - i.e., the guest-state and
++ * exit-information fields only. Other fields are modified by L1 with VMWRITE,
++ * which already writes to vmcs12 directly.
++ */
++static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
++			   u32 exit_reason, u32 exit_intr_info,
++			   unsigned long exit_qualification)
++{
++	/* update guest state fields: */
++	sync_vmcs12(vcpu, vmcs12);
++
++	/* update exit information fields: */
++
++	vmcs12->vm_exit_reason = exit_reason;
++	vmcs12->exit_qualification = exit_qualification;
++	vmcs12->vm_exit_intr_info = exit_intr_info;
++
++	vmcs12->idt_vectoring_info_field = 0;
++	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
++	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
++
++	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
++		vmcs12->launch_state = 1;
++
++		/* vm_entry_intr_info_field is cleared on exit. Emulate this
++		 * instead of reading the real value. */
++		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
++
++		/*
++		 * Transfer the event that L0 or L1 may wanted to inject into
++		 * L2 to IDT_VECTORING_INFO_FIELD.
++		 */
++		vmcs12_save_pending_event(vcpu, vmcs12);
++	}
++
++	/*
++	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
++	 * preserved above and would only end up incorrectly in L1.
++	 */
++	vcpu->arch.nmi_injected = false;
++	kvm_clear_exception_queue(vcpu);
++	kvm_clear_interrupt_queue(vcpu);
++}
++
++/*
++ * A part of what we need to when the nested L2 guest exits and we want to
++ * run its L1 parent, is to reset L1's guest state to the host state specified
++ * in vmcs12.
++ * This function is to be called not only on normal nested exit, but also on
++ * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
++ * Failures During or After Loading Guest State").
++ * This function should be called when the active VMCS is L1's (vmcs01).
++ */
++static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
++				   struct vmcs12 *vmcs12)
++{
++	struct kvm_segment seg;
++	u32 entry_failure_code;
++
++	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
++		vcpu->arch.efer = vmcs12->host_ia32_efer;
++	else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
++		vcpu->arch.efer |= (EFER_LMA | EFER_LME);
++	else
++		vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
++	vmx_set_efer(vcpu, vcpu->arch.efer);
++
++	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
++	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
++	vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
++	/*
++	 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
++	 * actually changed, because vmx_set_cr0 refers to efer set above.
++	 *
++	 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
++	 * (KVM doesn't change it);
++	 */
++	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
++	vmx_set_cr0(vcpu, vmcs12->host_cr0);
++
++	/* Same as above - no reason to call set_cr4_guest_host_mask().  */
++	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
++	vmx_set_cr4(vcpu, vmcs12->host_cr4);
++
++	nested_ept_uninit_mmu_context(vcpu);
++
++	/*
++	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
++	 * couldn't have changed.
++	 */
++	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
++		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
++
++	if (!enable_ept)
++		vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
++
++	/*
++	 * If vmcs01 don't use VPID, CPU flushes TLB on every
++	 * VMEntry/VMExit. Thus, no need to flush TLB.
++	 *
++	 * If vmcs12 uses VPID, TLB entries populated by L2 are
++	 * tagged with vmx->nested.vpid02 while L1 entries are tagged
++	 * with vmx->vpid. Thus, no need to flush TLB.
++	 *
++	 * Therefore, flush TLB only in case vmcs01 uses VPID and
++	 * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
++	 * are both tagged with vmx->vpid.
++	 */
++	if (enable_vpid &&
++	    !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
++		vmx_flush_tlb(vcpu, true);
++	}
++
++	vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
++	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
++	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
++	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
++	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
++	vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
++	vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
++
++	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
++	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
++		vmcs_write64(GUEST_BNDCFGS, 0);
++
++	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
++		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
++		vcpu->arch.pat = vmcs12->host_ia32_pat;
++	}
++	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++		vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
++			vmcs12->host_ia32_perf_global_ctrl);
++
++	/* Set L1 segment info according to Intel SDM
++	    27.5.2 Loading Host Segment and Descriptor-Table Registers */
++	seg = (struct kvm_segment) {
++		.base = 0,
++		.limit = 0xFFFFFFFF,
++		.selector = vmcs12->host_cs_selector,
++		.type = 11,
++		.present = 1,
++		.s = 1,
++		.g = 1
++	};
++	if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
++		seg.l = 1;
++	else
++		seg.db = 1;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
++	seg = (struct kvm_segment) {
++		.base = 0,
++		.limit = 0xFFFFFFFF,
++		.type = 3,
++		.present = 1,
++		.s = 1,
++		.db = 1,
++		.g = 1
++	};
++	seg.selector = vmcs12->host_ds_selector;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
++	seg.selector = vmcs12->host_es_selector;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
++	seg.selector = vmcs12->host_ss_selector;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
++	seg.selector = vmcs12->host_fs_selector;
++	seg.base = vmcs12->host_fs_base;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
++	seg.selector = vmcs12->host_gs_selector;
++	seg.base = vmcs12->host_gs_base;
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
++	seg = (struct kvm_segment) {
++		.base = vmcs12->host_tr_base,
++		.limit = 0x67,
++		.selector = vmcs12->host_tr_selector,
++		.type = 11,
++		.present = 1
++	};
++	vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
++
++	kvm_set_dr(vcpu, 7, 0x400);
++	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
++
++	if (cpu_has_vmx_msr_bitmap())
++		vmx_update_msr_bitmap(vcpu);
++
++	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
++				vmcs12->vm_exit_msr_load_count))
++		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
++}
++
++static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
++{
++	struct shared_msr_entry *efer_msr;
++	unsigned int i;
++
++	if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
++		return vmcs_read64(GUEST_IA32_EFER);
++
++	if (cpu_has_load_ia32_efer)
++		return host_efer;
++
++	for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
++		if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
++			return vmx->msr_autoload.guest.val[i].value;
++	}
++
++	efer_msr = find_msr_entry(vmx, MSR_EFER);
++	if (efer_msr)
++		return efer_msr->data;
++
++	return host_efer;
++}
++
++static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmx_msr_entry g, h;
++	struct msr_data msr;
++	gpa_t gpa;
++	u32 i, j;
++
++	vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
++
++	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
++		/*
++		 * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
++		 * as vmcs01.GUEST_DR7 contains a userspace defined value
++		 * and vcpu->arch.dr7 is not squirreled away before the
++		 * nested VMENTER (not worth adding a variable in nested_vmx).
++		 */
++		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
++			kvm_set_dr(vcpu, 7, DR7_FIXED_1);
++		else
++			WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
++	}
++
++	/*
++	 * Note that calling vmx_set_{efer,cr0,cr4} is important as they
++	 * handle a variety of side effects to KVM's software model.
++	 */
++	vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
++
++	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
++	vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
++
++	vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
++	vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
++
++	nested_ept_uninit_mmu_context(vcpu);
++	vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
++	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
++
++	/*
++	 * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
++	 * from vmcs01 (if necessary).  The PDPTRs are not loaded on
++	 * VMFail, like everything else we just need to ensure our
++	 * software model is up-to-date.
++	 */
++	ept_save_pdptrs(vcpu);
++
++	kvm_mmu_reset_context(vcpu);
++
++	if (cpu_has_vmx_msr_bitmap())
++		vmx_update_msr_bitmap(vcpu);
++
++	/*
++	 * This nasty bit of open coding is a compromise between blindly
++	 * loading L1's MSRs using the exit load lists (incorrect emulation
++	 * of VMFail), leaving the nested VM's MSRs in the software model
++	 * (incorrect behavior) and snapshotting the modified MSRs (too
++	 * expensive since the lists are unbound by hardware).  For each
++	 * MSR that was (prematurely) loaded from the nested VMEntry load
++	 * list, reload it from the exit load list if it exists and differs
++	 * from the guest value.  The intent is to stuff host state as
++	 * silently as possible, not to fully process the exit load list.
++	 */
++	msr.host_initiated = false;
++	for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
++		gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
++		if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
++			pr_debug_ratelimited(
++				"%s read MSR index failed (%u, 0x%08llx)\n",
++				__func__, i, gpa);
++			goto vmabort;
++		}
++
++		for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
++			gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
++			if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
++				pr_debug_ratelimited(
++					"%s read MSR failed (%u, 0x%08llx)\n",
++					__func__, j, gpa);
++				goto vmabort;
++			}
++			if (h.index != g.index)
++				continue;
++			if (h.value == g.value)
++				break;
++
++			if (nested_vmx_load_msr_check(vcpu, &h)) {
++				pr_debug_ratelimited(
++					"%s check failed (%u, 0x%x, 0x%x)\n",
++					__func__, j, h.index, h.reserved);
++				goto vmabort;
++			}
++
++			msr.index = h.index;
++			msr.data = h.value;
++			if (kvm_set_msr(vcpu, &msr)) {
++				pr_debug_ratelimited(
++					"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
++					__func__, j, h.index, h.value);
++				goto vmabort;
++			}
++		}
++	}
++
++	return;
++
++vmabort:
++	nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
++}
++
++/*
++ * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
++ * and modify vmcs12 to make it see what it would expect to see there if
++ * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
++ */
++static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
++			      u32 exit_intr_info,
++			      unsigned long exit_qualification)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++
++	/* trying to cancel vmlaunch/vmresume is a bug */
++	WARN_ON_ONCE(vmx->nested.nested_run_pending);
++
++	/*
++	 * The only expected VM-instruction error is "VM entry with
++	 * invalid control field(s)." Anything else indicates a
++	 * problem with L0.
++	 */
++	WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
++				   VMXERR_ENTRY_INVALID_CONTROL_FIELD));
++
++	leave_guest_mode(vcpu);
++
++	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
++		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
++
++	if (likely(!vmx->fail)) {
++		if (exit_reason == -1)
++			sync_vmcs12(vcpu, vmcs12);
++		else
++			prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
++				       exit_qualification);
++
++		/*
++		 * Must happen outside of sync_vmcs12() as it will
++		 * also be used to capture vmcs12 cache as part of
++		 * capturing nVMX state for snapshot (migration).
++		 *
++		 * Otherwise, this flush will dirty guest memory at a
++		 * point it is already assumed by user-space to be
++		 * immutable.
++		 */
++		nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
++
++		if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
++					 vmcs12->vm_exit_msr_store_count))
++			nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
++	}
++
++	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
++	vm_entry_controls_reset_shadow(vmx);
++	vm_exit_controls_reset_shadow(vmx);
++	vmx_segment_cache_clear(vmx);
++
++	/* Update any VMCS fields that might have changed while L2 ran */
++	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
++	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
++	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
++
++	if (kvm_has_tsc_control)
++		decache_tsc_multiplier(vmx);
++
++	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
++		vmx->nested.change_vmcs01_virtual_apic_mode = false;
++		vmx_set_virtual_apic_mode(vcpu);
++	} else if (!nested_cpu_has_ept(vmcs12) &&
++		   nested_cpu_has2(vmcs12,
++				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
++		vmx_flush_tlb(vcpu, true);
++	}
++
++	/* This is needed for same reason as it was needed in prepare_vmcs02 */
++	vmx->host_rsp = 0;
++
++	/* Unpin physical memory we referred to in vmcs02 */
++	if (vmx->nested.apic_access_page) {
++		kvm_release_page_dirty(vmx->nested.apic_access_page);
++		vmx->nested.apic_access_page = NULL;
++	}
++	if (vmx->nested.virtual_apic_page) {
++		kvm_release_page_dirty(vmx->nested.virtual_apic_page);
++		vmx->nested.virtual_apic_page = NULL;
++	}
++	if (vmx->nested.pi_desc_page) {
++		kunmap(vmx->nested.pi_desc_page);
++		kvm_release_page_dirty(vmx->nested.pi_desc_page);
++		vmx->nested.pi_desc_page = NULL;
++		vmx->nested.pi_desc = NULL;
++	}
++
++	/*
++	 * We are now running in L2, mmu_notifier will force to reload the
++	 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
++	 */
++	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
++
++	if (enable_shadow_vmcs && exit_reason != -1)
++		vmx->nested.sync_shadow_vmcs = true;
++
++	/* in case we halted in L2 */
++	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++
++	if (likely(!vmx->fail)) {
++		/*
++		 * TODO: SDM says that with acknowledge interrupt on
++		 * exit, bit 31 of the VM-exit interrupt information
++		 * (valid interrupt) is always set to 1 on
++		 * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
++		 * need kvm_cpu_has_interrupt().  See the commit
++		 * message for details.
++		 */
++		if (nested_exit_intr_ack_set(vcpu) &&
++		    exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
++		    kvm_cpu_has_interrupt(vcpu)) {
++			int irq = kvm_cpu_get_interrupt(vcpu);
++			WARN_ON(irq < 0);
++			vmcs12->vm_exit_intr_info = irq |
++				INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
++		}
++
++		if (exit_reason != -1)
++			trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
++						       vmcs12->exit_qualification,
++						       vmcs12->idt_vectoring_info_field,
++						       vmcs12->vm_exit_intr_info,
++						       vmcs12->vm_exit_intr_error_code,
++						       KVM_ISA_VMX);
++
++		load_vmcs12_host_state(vcpu, vmcs12);
++
++		return;
++	}
++	
++	/*
++	 * After an early L2 VM-entry failure, we're now back
++	 * in L1 which thinks it just finished a VMLAUNCH or
++	 * VMRESUME instruction, so we need to set the failure
++	 * flag and the VM-instruction error field of the VMCS
++	 * accordingly.
++	 */
++	nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
++
++	/*
++	 * Restore L1's host state to KVM's software model.  We're here
++	 * because a consistency check was caught by hardware, which
++	 * means some amount of guest state has been propagated to KVM's
++	 * model and needs to be unwound to the host's state.
++	 */
++	nested_vmx_restore_host_state(vcpu);
++
++	/*
++	 * The emulated instruction was already skipped in
++	 * nested_vmx_run, but the updated RIP was never
++	 * written back to the vmcs01.
++	 */
++	skip_emulated_instruction(vcpu);
++	vmx->fail = 0;
++}
++
++/*
++ * Forcibly leave nested mode in order to be able to reset the VCPU later on.
++ */
++static void vmx_leave_nested(struct kvm_vcpu *vcpu)
++{
++	if (is_guest_mode(vcpu)) {
++		to_vmx(vcpu)->nested.nested_run_pending = 0;
++		nested_vmx_vmexit(vcpu, -1, 0, 0);
++	}
++	free_nested(to_vmx(vcpu));
++}
++
++/*
++ * L1's failure to enter L2 is a subset of a normal exit, as explained in
++ * 23.7 "VM-entry failures during or after loading guest state" (this also
++ * lists the acceptable exit-reason and exit-qualification parameters).
++ * It should only be called before L2 actually succeeded to run, and when
++ * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
++ */
++static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
++			struct vmcs12 *vmcs12,
++			u32 reason, unsigned long qualification)
++{
++	load_vmcs12_host_state(vcpu, vmcs12);
++	vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
++	vmcs12->exit_qualification = qualification;
++	nested_vmx_succeed(vcpu);
++	if (enable_shadow_vmcs)
++		to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
++}
++
++static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
++				  struct x86_instruction_info *info)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	unsigned short port;
++	bool intercept;
++	int size;
++
++	if (info->intercept == x86_intercept_in ||
++	    info->intercept == x86_intercept_ins) {
++		port = info->src_val;
++		size = info->dst_bytes;
++	} else {
++		port = info->dst_val;
++		size = info->src_bytes;
++	}
++
++	/*
++	 * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
++	 * VM-exits depend on the 'unconditional IO exiting' VM-execution
++	 * control.
++	 *
++	 * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
++	 */
++	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
++		intercept = nested_cpu_has(vmcs12,
++					   CPU_BASED_UNCOND_IO_EXITING);
++	else
++		intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
++
++	return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
++}
++
++static int vmx_check_intercept(struct kvm_vcpu *vcpu,
++			       struct x86_instruction_info *info,
++			       enum x86_intercept_stage stage)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++
++	switch (info->intercept) {
++	/*
++	 * RDPID causes #UD if disabled through secondary execution controls.
++	 * Because it is marked as EmulateOnUD, we need to intercept it here.
++	 */
++	case x86_intercept_rdtscp:
++		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
++			ctxt->exception.vector = UD_VECTOR;
++			ctxt->exception.error_code_valid = false;
++			return X86EMUL_PROPAGATE_FAULT;
++		}
++		break;
++
++	case x86_intercept_in:
++	case x86_intercept_ins:
++	case x86_intercept_out:
++	case x86_intercept_outs:
++		return vmx_check_intercept_io(vcpu, info);
++
++	/* TODO: check more intercepts... */
++	default:
++		break;
++	}
++
++	return X86EMUL_UNHANDLEABLE;
++}
++
++#ifdef CONFIG_X86_64
++/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
++static inline int u64_shl_div_u64(u64 a, unsigned int shift,
++				  u64 divisor, u64 *result)
++{
++	u64 low = a << shift, high = a >> (64 - shift);
++
++	/* To avoid the overflow on divq */
++	if (high >= divisor)
++		return 1;
++
++	/* Low hold the result, high hold rem which is discarded */
++	asm("divq %2\n\t" : "=a" (low), "=d" (high) :
++	    "rm" (divisor), "0" (low), "1" (high));
++	*result = low;
++
++	return 0;
++}
++
++static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
++{
++	struct vcpu_vmx *vmx;
++	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
++
++	if (kvm_mwait_in_guest(vcpu->kvm))
++		return -EOPNOTSUPP;
++
++	vmx = to_vmx(vcpu);
++	tscl = rdtsc();
++	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
++	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
++	lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
++
++	if (delta_tsc > lapic_timer_advance_cycles)
++		delta_tsc -= lapic_timer_advance_cycles;
++	else
++		delta_tsc = 0;
++
++	/* Convert to host delta tsc if tsc scaling is enabled */
++	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
++			u64_shl_div_u64(delta_tsc,
++				kvm_tsc_scaling_ratio_frac_bits,
++				vcpu->arch.tsc_scaling_ratio,
++				&delta_tsc))
++		return -ERANGE;
++
++	/*
++	 * If the delta tsc can't fit in the 32 bit after the multi shift,
++	 * we can't use the preemption timer.
++	 * It's possible that it fits on later vmentries, but checking
++	 * on every vmentry is costly so we just use an hrtimer.
++	 */
++	if (delta_tsc >> (cpu_preemption_timer_multi + 32))
++		return -ERANGE;
++
++	vmx->hv_deadline_tsc = tscl + delta_tsc;
++	return delta_tsc == 0;
++}
++
++static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
++{
++	to_vmx(vcpu)->hv_deadline_tsc = -1;
++}
++#endif
++
++static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
++{
++	if (!kvm_pause_in_guest(vcpu->kvm))
++		shrink_ple_window(vcpu);
++}
++
++static void vmx_slot_enable_log_dirty(struct kvm *kvm,
++				     struct kvm_memory_slot *slot)
++{
++	kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
++	kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
++}
++
++static void vmx_slot_disable_log_dirty(struct kvm *kvm,
++				       struct kvm_memory_slot *slot)
++{
++	kvm_mmu_slot_set_dirty(kvm, slot);
++}
++
++static void vmx_flush_log_dirty(struct kvm *kvm)
++{
++	kvm_flush_pml_buffers(kvm);
++}
++
++static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12;
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	gpa_t gpa;
++	struct page *page = NULL;
++	u64 *pml_address;
++
++	if (is_guest_mode(vcpu)) {
++		WARN_ON_ONCE(vmx->nested.pml_full);
++
++		/*
++		 * Check if PML is enabled for the nested guest.
++		 * Whether eptp bit 6 is set is already checked
++		 * as part of A/D emulation.
++		 */
++		vmcs12 = get_vmcs12(vcpu);
++		if (!nested_cpu_has_pml(vmcs12))
++			return 0;
++
++		if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
++			vmx->nested.pml_full = true;
++			return 1;
++		}
++
++		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
++
++		page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
++		if (is_error_page(page))
++			return 0;
++
++		pml_address = kmap(page);
++		pml_address[vmcs12->guest_pml_index--] = gpa;
++		kunmap(page);
++		kvm_release_page_clean(page);
++	}
++
++	return 0;
++}
++
++static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
++					   struct kvm_memory_slot *memslot,
++					   gfn_t offset, unsigned long mask)
++{
++	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
++}
++
++static void __pi_post_block(struct kvm_vcpu *vcpu)
++{
++	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++	struct pi_desc old, new;
++	unsigned int dest;
++
++	do {
++		old.control = new.control = pi_desc->control;
++		WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
++		     "Wakeup handler not enabled while the VCPU is blocked\n");
++
++		dest = cpu_physical_id(vcpu->cpu);
++
++		if (x2apic_enabled())
++			new.ndst = dest;
++		else
++			new.ndst = (dest << 8) & 0xFF00;
++
++		/* set 'NV' to 'notification vector' */
++		new.nv = POSTED_INTR_VECTOR;
++	} while (cmpxchg64(&pi_desc->control, old.control,
++			   new.control) != old.control);
++
++	if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
++		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++		list_del(&vcpu->blocked_vcpu_list);
++		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++		vcpu->pre_pcpu = -1;
++	}
++}
++
++/*
++ * This routine does the following things for vCPU which is going
++ * to be blocked if VT-d PI is enabled.
++ * - Store the vCPU to the wakeup list, so when interrupts happen
++ *   we can find the right vCPU to wake up.
++ * - Change the Posted-interrupt descriptor as below:
++ *      'NDST' <-- vcpu->pre_pcpu
++ *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
++ * - If 'ON' is set during this process, which means at least one
++ *   interrupt is posted for this vCPU, we cannot block it, in
++ *   this case, return 1, otherwise, return 0.
++ *
++ */
++static int pi_pre_block(struct kvm_vcpu *vcpu)
++{
++	unsigned int dest;
++	struct pi_desc old, new;
++	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
++
++	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
++		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
++		!kvm_vcpu_apicv_active(vcpu))
++		return 0;
++
++	WARN_ON(irqs_disabled());
++	local_irq_disable();
++	if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
++		vcpu->pre_pcpu = vcpu->cpu;
++		spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++		list_add_tail(&vcpu->blocked_vcpu_list,
++			      &per_cpu(blocked_vcpu_on_cpu,
++				       vcpu->pre_pcpu));
++		spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
++	}
++
++	do {
++		old.control = new.control = pi_desc->control;
++
++		WARN((pi_desc->sn == 1),
++		     "Warning: SN field of posted-interrupts "
++		     "is set before blocking\n");
++
++		/*
++		 * Since vCPU can be preempted during this process,
++		 * vcpu->cpu could be different with pre_pcpu, we
++		 * need to set pre_pcpu as the destination of wakeup
++		 * notification event, then we can find the right vCPU
++		 * to wakeup in wakeup handler if interrupts happen
++		 * when the vCPU is in blocked state.
++		 */
++		dest = cpu_physical_id(vcpu->pre_pcpu);
++
++		if (x2apic_enabled())
++			new.ndst = dest;
++		else
++			new.ndst = (dest << 8) & 0xFF00;
++
++		/* set 'NV' to 'wakeup vector' */
++		new.nv = POSTED_INTR_WAKEUP_VECTOR;
++	} while (cmpxchg64(&pi_desc->control, old.control,
++			   new.control) != old.control);
++
++	/* We should not block the vCPU if an interrupt is posted for it.  */
++	if (pi_test_on(pi_desc) == 1)
++		__pi_post_block(vcpu);
++
++	local_irq_enable();
++	return (vcpu->pre_pcpu == -1);
++}
++
++static int vmx_pre_block(struct kvm_vcpu *vcpu)
++{
++	if (pi_pre_block(vcpu))
++		return 1;
++
++	if (kvm_lapic_hv_timer_in_use(vcpu))
++		kvm_lapic_switch_to_sw_timer(vcpu);
++
++	return 0;
++}
++
++static void pi_post_block(struct kvm_vcpu *vcpu)
++{
++	if (vcpu->pre_pcpu == -1)
++		return;
++
++	WARN_ON(irqs_disabled());
++	local_irq_disable();
++	__pi_post_block(vcpu);
++	local_irq_enable();
++}
++
++static void vmx_post_block(struct kvm_vcpu *vcpu)
++{
++	if (kvm_x86_ops->set_hv_timer)
++		kvm_lapic_switch_to_hv_timer(vcpu);
++
++	pi_post_block(vcpu);
++}
++
++/*
++ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
++ *
++ * @kvm: kvm
++ * @host_irq: host irq of the interrupt
++ * @guest_irq: gsi of the interrupt
++ * @set: set or unset PI
++ * returns 0 on success, < 0 on failure
++ */
++static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
++			      uint32_t guest_irq, bool set)
++{
++	struct kvm_kernel_irq_routing_entry *e;
++	struct kvm_irq_routing_table *irq_rt;
++	struct kvm_lapic_irq irq;
++	struct kvm_vcpu *vcpu;
++	struct vcpu_data vcpu_info;
++	int idx, ret = 0;
++
++	if (!kvm_arch_has_assigned_device(kvm) ||
++		!irq_remapping_cap(IRQ_POSTING_CAP) ||
++		!kvm_vcpu_apicv_active(kvm->vcpus[0]))
++		return 0;
++
++	idx = srcu_read_lock(&kvm->irq_srcu);
++	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
++	if (guest_irq >= irq_rt->nr_rt_entries ||
++	    hlist_empty(&irq_rt->map[guest_irq])) {
++		pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
++			     guest_irq, irq_rt->nr_rt_entries);
++		goto out;
++	}
++
++	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
++		if (e->type != KVM_IRQ_ROUTING_MSI)
++			continue;
++		/*
++		 * VT-d PI cannot support posting multicast/broadcast
++		 * interrupts to a vCPU, we still use interrupt remapping
++		 * for these kind of interrupts.
++		 *
++		 * For lowest-priority interrupts, we only support
++		 * those with single CPU as the destination, e.g. user
++		 * configures the interrupts via /proc/irq or uses
++		 * irqbalance to make the interrupts single-CPU.
++		 *
++		 * We will support full lowest-priority interrupt later.
++		 */
++
++		kvm_set_msi_irq(kvm, e, &irq);
++		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
++			/*
++			 * Make sure the IRTE is in remapped mode if
++			 * we don't handle it in posted mode.
++			 */
++			ret = irq_set_vcpu_affinity(host_irq, NULL);
++			if (ret < 0) {
++				printk(KERN_INFO
++				   "failed to back to remapped mode, irq: %u\n",
++				   host_irq);
++				goto out;
++			}
++
++			continue;
++		}
++
++		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
++		vcpu_info.vector = irq.vector;
++
++		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
++				vcpu_info.vector, vcpu_info.pi_desc_addr, set);
++
++		if (set)
++			ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
++		else
++			ret = irq_set_vcpu_affinity(host_irq, NULL);
++
++		if (ret < 0) {
++			printk(KERN_INFO "%s: failed to update PI IRTE\n",
++					__func__);
++			goto out;
++		}
++	}
++
++	ret = 0;
++out:
++	srcu_read_unlock(&kvm->irq_srcu, idx);
++	return ret;
++}
++
++static void vmx_setup_mce(struct kvm_vcpu *vcpu)
++{
++	if (vcpu->arch.mcg_cap & MCG_LMCE_P)
++		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
++			FEATURE_CONTROL_LMCE;
++	else
++		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
++			~FEATURE_CONTROL_LMCE;
++}
++
++static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
++{
++	/* we need a nested vmexit to enter SMM, postpone if run is pending */
++	if (to_vmx(vcpu)->nested.nested_run_pending)
++		return 0;
++	return 1;
++}
++
++static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++
++	vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
++	if (vmx->nested.smm.guest_mode)
++		nested_vmx_vmexit(vcpu, -1, 0, 0);
++
++	vmx->nested.smm.vmxon = vmx->nested.vmxon;
++	vmx->nested.vmxon = false;
++	vmx_clear_hlt(vcpu);
++	return 0;
++}
++
++static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	int ret;
++
++	if (vmx->nested.smm.vmxon) {
++		vmx->nested.vmxon = true;
++		vmx->nested.smm.vmxon = false;
++	}
++
++	if (vmx->nested.smm.guest_mode) {
++		vcpu->arch.hflags &= ~HF_SMM_MASK;
++		ret = enter_vmx_non_root_mode(vcpu, NULL);
++		vcpu->arch.hflags |= HF_SMM_MASK;
++		if (ret)
++			return ret;
++
++		vmx->nested.smm.guest_mode = false;
++	}
++	return 0;
++}
++
++static int enable_smi_window(struct kvm_vcpu *vcpu)
++{
++	return 0;
++}
++
++static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
++				struct kvm_nested_state __user *user_kvm_nested_state,
++				u32 user_data_size)
++{
++	struct vcpu_vmx *vmx;
++	struct vmcs12 *vmcs12;
++	struct kvm_nested_state kvm_state = {
++		.flags = 0,
++		.format = 0,
++		.size = sizeof(kvm_state),
++		.vmx.vmxon_pa = -1ull,
++		.vmx.vmcs_pa = -1ull,
++	};
++
++	if (!vcpu)
++		return kvm_state.size + 2 * VMCS12_SIZE;
++
++	vmx = to_vmx(vcpu);
++	vmcs12 = get_vmcs12(vcpu);
++	if (nested_vmx_allowed(vcpu) &&
++	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
++		kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
++		kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
++
++		if (vmx->nested.current_vmptr != -1ull) {
++			kvm_state.size += VMCS12_SIZE;
++
++			if (is_guest_mode(vcpu) &&
++			    nested_cpu_has_shadow_vmcs(vmcs12) &&
++			    vmcs12->vmcs_link_pointer != -1ull)
++				kvm_state.size += VMCS12_SIZE;
++		}
++
++		if (vmx->nested.smm.vmxon)
++			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
++
++		if (vmx->nested.smm.guest_mode)
++			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
++
++		if (is_guest_mode(vcpu)) {
++			kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
++
++			if (vmx->nested.nested_run_pending)
++				kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
++		}
++	}
++
++	if (user_data_size < kvm_state.size)
++		goto out;
++
++	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
++		return -EFAULT;
++
++	if (vmx->nested.current_vmptr == -1ull)
++		goto out;
++
++	/*
++	 * When running L2, the authoritative vmcs12 state is in the
++	 * vmcs02. When running L1, the authoritative vmcs12 state is
++	 * in the shadow vmcs linked to vmcs01, unless
++	 * sync_shadow_vmcs is set, in which case, the authoritative
++	 * vmcs12 state is in the vmcs12 already.
++	 */
++	if (is_guest_mode(vcpu))
++		sync_vmcs12(vcpu, vmcs12);
++	else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
++		copy_shadow_to_vmcs12(vmx);
++
++	/*
++	 * Copy over the full allocated size of vmcs12 rather than just the size
++	 * of the struct.
++	 */
++	if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE))
++		return -EFAULT;
++
++	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
++	    vmcs12->vmcs_link_pointer != -1ull) {
++		if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
++				 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
++			return -EFAULT;
++	}
++
++out:
++	return kvm_state.size;
++}
++
++static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
++				struct kvm_nested_state __user *user_kvm_nested_state,
++				struct kvm_nested_state *kvm_state)
++{
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	struct vmcs12 *vmcs12;
++	u32 exit_qual;
++	int ret;
++
++	if (kvm_state->format != 0)
++		return -EINVAL;
++
++	if (!nested_vmx_allowed(vcpu))
++		return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
++
++	if (kvm_state->vmx.vmxon_pa == -1ull) {
++		if (kvm_state->vmx.smm.flags)
++			return -EINVAL;
++
++		if (kvm_state->vmx.vmcs_pa != -1ull)
++			return -EINVAL;
++
++		vmx_leave_nested(vcpu);
++		return 0;
++	}
++
++	if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
++		return -EINVAL;
++
++	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
++	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
++		return -EINVAL;
++
++	if (kvm_state->vmx.smm.flags &
++	    ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
++		return -EINVAL;
++
++	/*
++	 * SMM temporarily disables VMX, so we cannot be in guest mode,
++	 * nor can VMLAUNCH/VMRESUME be pending.  Outside SMM, SMM flags
++	 * must be zero.
++	 */
++	if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
++		return -EINVAL;
++
++	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
++	    !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
++		return -EINVAL;
++
++	vmx_leave_nested(vcpu);
++	if (kvm_state->vmx.vmxon_pa == -1ull)
++		return 0;
++
++	vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
++	ret = enter_vmx_operation(vcpu);
++	if (ret)
++		return ret;
++
++	/* Empty 'VMXON' state is permitted */
++	if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
++		return 0;
++
++	if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
++	    !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
++		return -EINVAL;
++
++	set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
++
++	if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
++		vmx->nested.smm.vmxon = true;
++		vmx->nested.vmxon = false;
++
++		if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
++			vmx->nested.smm.guest_mode = true;
++	}
++
++	vmcs12 = get_vmcs12(vcpu);
++	if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
++		return -EFAULT;
++
++	if (vmcs12->hdr.revision_id != VMCS12_REVISION)
++		return -EINVAL;
++
++	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
++		return 0;
++
++	vmx->nested.nested_run_pending =
++		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
++
++	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
++	    vmcs12->vmcs_link_pointer != -1ull) {
++		struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
++		if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
++			return -EINVAL;
++
++		if (copy_from_user(shadow_vmcs12,
++				   user_kvm_nested_state->data + VMCS12_SIZE,
++				   sizeof(*vmcs12)))
++			return -EFAULT;
++
++		if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
++		    !shadow_vmcs12->hdr.shadow_vmcs)
++			return -EINVAL;
++	}
++
++	if (check_vmentry_prereqs(vcpu, vmcs12) ||
++	    check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
++		return -EINVAL;
++
++	vmx->nested.dirty_vmcs12 = true;
++	ret = enter_vmx_non_root_mode(vcpu, NULL);
++	if (ret)
++		return -EINVAL;
++
++	return 0;
++}
++
++static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
++	.cpu_has_kvm_support = cpu_has_kvm_support,
++	.disabled_by_bios = vmx_disabled_by_bios,
++	.hardware_setup = hardware_setup,
++	.hardware_unsetup = hardware_unsetup,
++	.check_processor_compatibility = vmx_check_processor_compat,
++	.hardware_enable = hardware_enable,
++	.hardware_disable = hardware_disable,
++	.cpu_has_accelerated_tpr = report_flexpriority,
++	.has_emulated_msr = vmx_has_emulated_msr,
++
++	.vm_init = vmx_vm_init,
++	.vm_alloc = vmx_vm_alloc,
++	.vm_free = vmx_vm_free,
++
++	.vcpu_create = vmx_create_vcpu,
++	.vcpu_free = vmx_free_vcpu,
++	.vcpu_reset = vmx_vcpu_reset,
++
++	.prepare_guest_switch = vmx_prepare_switch_to_guest,
++	.vcpu_load = vmx_vcpu_load,
++	.vcpu_put = vmx_vcpu_put,
++
++	.update_bp_intercept = update_exception_bitmap,
++	.get_msr_feature = vmx_get_msr_feature,
++	.get_msr = vmx_get_msr,
++	.set_msr = vmx_set_msr,
++	.get_segment_base = vmx_get_segment_base,
++	.get_segment = vmx_get_segment,
++	.set_segment = vmx_set_segment,
++	.get_cpl = vmx_get_cpl,
++	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
++	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
++	.decache_cr3 = vmx_decache_cr3,
++	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
++	.set_cr0 = vmx_set_cr0,
++	.set_cr3 = vmx_set_cr3,
++	.set_cr4 = vmx_set_cr4,
++	.set_efer = vmx_set_efer,
++	.get_idt = vmx_get_idt,
++	.set_idt = vmx_set_idt,
++	.get_gdt = vmx_get_gdt,
++	.set_gdt = vmx_set_gdt,
++	.get_dr6 = vmx_get_dr6,
++	.set_dr6 = vmx_set_dr6,
++	.set_dr7 = vmx_set_dr7,
++	.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
++	.cache_reg = vmx_cache_reg,
++	.get_rflags = vmx_get_rflags,
++	.set_rflags = vmx_set_rflags,
++
++	.tlb_flush = vmx_flush_tlb,
++	.tlb_flush_gva = vmx_flush_tlb_gva,
++
++	.run = vmx_vcpu_run,
++	.handle_exit = vmx_handle_exit,
++	.skip_emulated_instruction = skip_emulated_instruction,
++	.set_interrupt_shadow = vmx_set_interrupt_shadow,
++	.get_interrupt_shadow = vmx_get_interrupt_shadow,
++	.patch_hypercall = vmx_patch_hypercall,
++	.set_irq = vmx_inject_irq,
++	.set_nmi = vmx_inject_nmi,
++	.queue_exception = vmx_queue_exception,
++	.cancel_injection = vmx_cancel_injection,
++	.interrupt_allowed = vmx_interrupt_allowed,
++	.nmi_allowed = vmx_nmi_allowed,
++	.get_nmi_mask = vmx_get_nmi_mask,
++	.set_nmi_mask = vmx_set_nmi_mask,
++	.enable_nmi_window = enable_nmi_window,
++	.enable_irq_window = enable_irq_window,
++	.update_cr8_intercept = update_cr8_intercept,
++	.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
++	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
++	.get_enable_apicv = vmx_get_enable_apicv,
++	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
++	.load_eoi_exitmap = vmx_load_eoi_exitmap,
++	.apicv_post_state_restore = vmx_apicv_post_state_restore,
++	.hwapic_irr_update = vmx_hwapic_irr_update,
++	.hwapic_isr_update = vmx_hwapic_isr_update,
++	.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
++	.sync_pir_to_irr = vmx_sync_pir_to_irr,
++	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
++	.dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
++
++	.set_tss_addr = vmx_set_tss_addr,
++	.set_identity_map_addr = vmx_set_identity_map_addr,
++	.get_tdp_level = get_ept_level,
++	.get_mt_mask = vmx_get_mt_mask,
++
++	.get_exit_info = vmx_get_exit_info,
++
++	.get_lpage_level = vmx_get_lpage_level,
++
++	.cpuid_update = vmx_cpuid_update,
++
++	.rdtscp_supported = vmx_rdtscp_supported,
++	.invpcid_supported = vmx_invpcid_supported,
++
++	.set_supported_cpuid = vmx_set_supported_cpuid,
++
++	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
++
++	.read_l1_tsc_offset = vmx_read_l1_tsc_offset,
++	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
++
++	.set_tdp_cr3 = vmx_set_cr3,
++
++	.check_intercept = vmx_check_intercept,
++	.handle_external_intr = vmx_handle_external_intr,
++	.mpx_supported = vmx_mpx_supported,
++	.xsaves_supported = vmx_xsaves_supported,
++	.umip_emulated = vmx_umip_emulated,
++
++	.check_nested_events = vmx_check_nested_events,
++	.request_immediate_exit = vmx_request_immediate_exit,
++
++	.sched_in = vmx_sched_in,
++
++	.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
++	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
++	.flush_log_dirty = vmx_flush_log_dirty,
++	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
++	.write_log_dirty = vmx_write_pml_buffer,
++
++	.pre_block = vmx_pre_block,
++	.post_block = vmx_post_block,
++
++	.pmu_ops = &intel_pmu_ops,
++
++	.update_pi_irte = vmx_update_pi_irte,
++
++#ifdef CONFIG_X86_64
++	.set_hv_timer = vmx_set_hv_timer,
++	.cancel_hv_timer = vmx_cancel_hv_timer,
++#endif
++
++	.setup_mce = vmx_setup_mce,
++
++	.get_nested_state = vmx_get_nested_state,
++	.set_nested_state = vmx_set_nested_state,
++	.get_vmcs12_pages = nested_get_vmcs12_pages,
++
++	.smi_allowed = vmx_smi_allowed,
++	.pre_enter_smm = vmx_pre_enter_smm,
++	.pre_leave_smm = vmx_pre_leave_smm,
++	.enable_smi_window = enable_smi_window,
++};
++
++static void vmx_cleanup_l1d_flush(void)
++{
++	if (vmx_l1d_flush_pages) {
++		free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
++		vmx_l1d_flush_pages = NULL;
++	}
++	/* Restore state so sysfs ignores VMX */
++	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
++}
++
++static void vmx_exit(void)
++{
++#ifdef CONFIG_KEXEC_CORE
++	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
++	synchronize_rcu();
++#endif
++
++	kvm_exit();
++
++#if IS_ENABLED(CONFIG_HYPERV)
++	if (static_branch_unlikely(&enable_evmcs)) {
++		int cpu;
++		struct hv_vp_assist_page *vp_ap;
++		/*
++		 * Reset everything to support using non-enlightened VMCS
++		 * access later (e.g. when we reload the module with
++		 * enlightened_vmcs=0)
++		 */
++		for_each_online_cpu(cpu) {
++			vp_ap =	hv_get_vp_assist_page(cpu);
++
++			if (!vp_ap)
++				continue;
++
++			vp_ap->current_nested_vmcs = 0;
++			vp_ap->enlighten_vmentry = 0;
++		}
++
++		static_branch_disable(&enable_evmcs);
++	}
++#endif
++	vmx_cleanup_l1d_flush();
++}
++module_exit(vmx_exit);
++
++static int __init vmx_init(void)
++{
++	int r;
++
++#if IS_ENABLED(CONFIG_HYPERV)
++	/*
++	 * Enlightened VMCS usage should be recommended and the host needs
++	 * to support eVMCS v1 or above. We can also disable eVMCS support
++	 * with module parameter.
++	 */
++	if (enlightened_vmcs &&
++	    ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
++	    (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
++	    KVM_EVMCS_VERSION) {
++		int cpu;
++
++		/* Check that we have assist pages on all online CPUs */
++		for_each_online_cpu(cpu) {
++			if (!hv_get_vp_assist_page(cpu)) {
++				enlightened_vmcs = false;
++				break;
++			}
++		}
++
++		if (enlightened_vmcs) {
++			pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
++			static_branch_enable(&enable_evmcs);
++		}
++	} else {
++		enlightened_vmcs = false;
++	}
++#endif
++
++	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
++		     __alignof__(struct vcpu_vmx), THIS_MODULE);
++	if (r)
++		return r;
++
++	/*
++	 * Must be called after kvm_init() so enable_ept is properly set
++	 * up. Hand the parameter mitigation value in which was stored in
++	 * the pre module init parser. If no parameter was given, it will
++	 * contain 'auto' which will be turned into the default 'cond'
++	 * mitigation mode.
++	 */
++	if (boot_cpu_has(X86_BUG_L1TF)) {
++		r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
++		if (r) {
++			vmx_exit();
++			return r;
++		}
++	}
++
++#ifdef CONFIG_KEXEC_CORE
++	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
++			   crash_vmclear_local_loaded_vmcss);
++#endif
++	vmx_check_vmcs12_offsets();
++
++	return 0;
++}
++module_init(vmx_init);
+diff -uprN kernel/arch/x86/kvm/x86.c kernel_new/arch/x86/kvm/x86.c
+--- kernel/arch/x86/kvm/x86.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/kvm/x86.c	2021-04-01 18:28:07.659863283 +0800
+@@ -42,6 +42,7 @@
+ #include <linux/iommu.h>
+ #include <linux/intel-iommu.h>
+ #include <linux/cpufreq.h>
++#include <linux/ipipe.h>
+ #include <linux/user-return-notifier.h>
+ #include <linux/srcu.h>
+ #include <linux/slab.h>
+@@ -160,6 +161,7 @@ struct kvm_shared_msrs_global {
+ struct kvm_shared_msrs {
+ 	struct user_return_notifier urn;
+ 	bool registered;
++	bool dirty;
+ 	struct kvm_shared_msr_values {
+ 		u64 host;
+ 		u64 curr;
+@@ -228,12 +230,31 @@ static inline void kvm_async_pf_hash_res
+ 		vcpu->arch.apf.gfns[i] = ~0;
+ }
+ 
++static void kvm_restore_shared_msrs(struct kvm_shared_msrs *locals)
++{
++	struct kvm_shared_msr_values *values;
++	unsigned long flags;
++	unsigned int slot;
++
++	flags = hard_cond_local_irq_save();
++	if (locals->dirty) {
++		for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
++			values = &locals->values[slot];
++			if (values->host != values->curr) {
++				wrmsrl(shared_msrs_global.msrs[slot],
++				       values->host);
++				values->curr = values->host;
++			}
++		}
++		locals->dirty = false;
++	}
++	hard_cond_local_irq_restore(flags);
++}
++
+ static void kvm_on_user_return(struct user_return_notifier *urn)
+ {
+-	unsigned slot;
+ 	struct kvm_shared_msrs *locals
+ 		= container_of(urn, struct kvm_shared_msrs, urn);
+-	struct kvm_shared_msr_values *values;
+ 	unsigned long flags;
+ 
+ 	/*
+@@ -246,13 +267,8 @@ static void kvm_on_user_return(struct us
+ 		user_return_notifier_unregister(urn);
+ 	}
+ 	local_irq_restore(flags);
+-	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+-		values = &locals->values[slot];
+-		if (values->host != values->curr) {
+-			wrmsrl(shared_msrs_global.msrs[slot], values->host);
+-			values->curr = values->host;
+-		}
+-	}
++	kvm_restore_shared_msrs(locals);
++	__ipipe_exit_vm();
+ }
+ 
+ static void shared_msr_update(unsigned slot, u32 msr)
+@@ -302,6 +318,7 @@ int kvm_set_shared_msr(unsigned slot, u6
+ 	if (err)
+ 		return 1;
+ 
++	smsr->dirty = true;
+ 	smsr->values[slot].curr = value;
+ 	if (!smsr->registered) {
+ 		smsr->urn.on_user_return = kvm_on_user_return;
+@@ -3252,11 +3269,25 @@ static void kvm_steal_time_set_preempted
+ 
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ {
++	unsigned int cpu = smp_processor_id();
++	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++	unsigned long flags;
+ 	int idx;
+ 
+ 	if (vcpu->preempted)
+ 		vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+ 
++	flags = hard_cond_local_irq_save();
++
++	/*
++	 * Do not update steal time accounting while running over the head
++	 * domain as this may introduce high latencies and will also issue
++	 * context violation reports. The code will be executed when kvm does
++	 * the regular kvm_arch_vcpu_put, after returning from the head domain.
++	 */
++	if (!ipipe_root_p)
++		goto skip_steal_time_update;
++
+ 	/*
+ 	 * Disable page faults because we're in atomic context here.
+ 	 * kvm_write_guest_offset_cached() would call might_fault()
+@@ -3274,6 +3305,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *
+ 	kvm_steal_time_set_preempted(vcpu);
+ 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ 	pagefault_enable();
++skip_steal_time_update:
+ 	kvm_x86_ops->vcpu_put(vcpu);
+ 	vcpu->arch.last_host_tsc = rdtsc();
+ 	/*
+@@ -3282,7 +3314,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *
+ 	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ 	 */
+ 	set_debugreg(0, 6);
++
++#ifdef CONFIG_IPIPE
++	vcpu->ipipe_put_vcpu = false;
++	if (!smsr->dirty)
++		__ipipe_exit_vm();
++#endif
++
++	hard_cond_local_irq_restore(flags);
++}
++
++#ifdef CONFIG_IPIPE
++
++void __ipipe_handle_vm_preemption(struct ipipe_vm_notifier *nfy)
++{
++	unsigned int cpu = raw_smp_processor_id();
++	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++	struct kvm_vcpu *vcpu;
++
++	vcpu = container_of(nfy, struct kvm_vcpu, ipipe_notifier);
++
++	/*
++	 * We may leave kvm_arch_vcpu_put with the ipipe notifier still
++	 * registered in case shared MSRs are still active. If a VM preemption
++	 * hits us after that point but before the user return notifier fired,
++	 * we may run kvm_arch_vcpu_put again from here. Do not rely on this
++	 * being harmless and rather use a flag to decide if the run is needed.
++	 */
++	if (vcpu->ipipe_put_vcpu)
++		kvm_arch_vcpu_put(vcpu);
++
++	kvm_restore_shared_msrs(smsr);
++	__ipipe_exit_vm();
+ }
++EXPORT_SYMBOL_GPL(__ipipe_handle_vm_preemption);
++
++#endif
+ 
+ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
+ 				    struct kvm_lapic_state *s)
+@@ -7682,6 +7749,13 @@ static int vcpu_enter_guest(struct kvm_v
+ 	}
+ 
+ 	preempt_disable();
++	local_irq_disable();
++	hard_cond_local_irq_disable();
++
++#ifdef CONFIG_IPIPE
++	__ipipe_enter_vm(&vcpu->ipipe_notifier);
++	vcpu->ipipe_put_vcpu = true;
++#endif
+ 
+ 	kvm_x86_ops->prepare_guest_switch(vcpu);
+ 
+@@ -7690,7 +7764,6 @@ static int vcpu_enter_guest(struct kvm_v
+ 	 * IPI are then delayed after guest entry, which ensures that they
+ 	 * result in virtual interrupt delivery.
+ 	 */
+-	local_irq_disable();
+ 	vcpu->mode = IN_GUEST_MODE;
+ 
+ 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+@@ -7720,6 +7793,7 @@ static int vcpu_enter_guest(struct kvm_v
+ 	    || need_resched() || signal_pending(current)) {
+ 		vcpu->mode = OUTSIDE_GUEST_MODE;
+ 		smp_wmb();
++		hard_cond_local_irq_enable();
+ 		local_irq_enable();
+ 		preempt_enable();
+ 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+@@ -7787,6 +7861,7 @@ static int vcpu_enter_guest(struct kvm_v
+ 
+ 	guest_exit_irqoff();
+ 
++	hard_cond_local_irq_enable();
+ 	local_irq_enable();
+ 	preempt_enable();
+ 
+@@ -8608,6 +8683,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
+ 		"guest TSC will not be reliable\n");
+ 
+ 	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
++#ifdef CONFIG_IPIPE
++	vcpu->ipipe_notifier.handler = __ipipe_handle_vm_preemption;
++#endif
+ 
+ 	return vcpu;
+ }
+diff -uprN kernel/arch/x86/kvm/x86.c.orig kernel_new/arch/x86/kvm/x86.c.orig
+--- kernel/arch/x86/kvm/x86.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/kvm/x86.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,9763 @@
++/*
++ * Kernel-based Virtual Machine driver for Linux
++ *
++ * derived from drivers/kvm/kvm_main.c
++ *
++ * Copyright (C) 2006 Qumranet, Inc.
++ * Copyright (C) 2008 Qumranet, Inc.
++ * Copyright IBM Corporation, 2008
++ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
++ *
++ * Authors:
++ *   Avi Kivity   <avi@qumranet.com>
++ *   Yaniv Kamay  <yaniv@qumranet.com>
++ *   Amit Shah    <amit.shah@qumranet.com>
++ *   Ben-Ami Yassour <benami@il.ibm.com>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2.  See
++ * the COPYING file in the top-level directory.
++ *
++ */
++
++#include <linux/kvm_host.h>
++#include "irq.h"
++#include "mmu.h"
++#include "i8254.h"
++#include "tss.h"
++#include "kvm_cache_regs.h"
++#include "x86.h"
++#include "cpuid.h"
++#include "pmu.h"
++#include "hyperv.h"
++
++#include <linux/clocksource.h>
++#include <linux/interrupt.h>
++#include <linux/kvm.h>
++#include <linux/fs.h>
++#include <linux/vmalloc.h>
++#include <linux/export.h>
++#include <linux/moduleparam.h>
++#include <linux/mman.h>
++#include <linux/highmem.h>
++#include <linux/iommu.h>
++#include <linux/intel-iommu.h>
++#include <linux/cpufreq.h>
++#include <linux/user-return-notifier.h>
++#include <linux/srcu.h>
++#include <linux/slab.h>
++#include <linux/perf_event.h>
++#include <linux/uaccess.h>
++#include <linux/hash.h>
++#include <linux/pci.h>
++#include <linux/timekeeper_internal.h>
++#include <linux/pvclock_gtod.h>
++#include <linux/kvm_irqfd.h>
++#include <linux/irqbypass.h>
++#include <linux/sched/stat.h>
++#include <linux/mem_encrypt.h>
++
++#include <trace/events/kvm.h>
++
++#include <asm/debugreg.h>
++#include <asm/msr.h>
++#include <asm/desc.h>
++#include <asm/mce.h>
++#include <linux/kernel_stat.h>
++#include <asm/fpu/internal.h> /* Ugh! */
++#include <asm/pvclock.h>
++#include <asm/div64.h>
++#include <asm/irq_remapping.h>
++#include <asm/mshyperv.h>
++#include <asm/hypervisor.h>
++
++#define CREATE_TRACE_POINTS
++#include "trace.h"
++
++#define MAX_IO_MSRS 256
++#define KVM_MAX_MCE_BANKS 32
++u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
++EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
++
++#define emul_to_vcpu(ctxt) \
++	container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
++
++/* EFER defaults:
++ * - enable syscall per default because its emulated by KVM
++ * - enable LME and LMA per default on 64 bit KVM
++ */
++#ifdef CONFIG_X86_64
++static
++u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
++#else
++static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
++#endif
++
++#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
++#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
++
++#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
++                                    KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
++
++static void update_cr8_intercept(struct kvm_vcpu *vcpu);
++static void process_nmi(struct kvm_vcpu *vcpu);
++static void enter_smm(struct kvm_vcpu *vcpu);
++static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
++static void store_regs(struct kvm_vcpu *vcpu);
++static int sync_regs(struct kvm_vcpu *vcpu);
++
++struct kvm_x86_ops *kvm_x86_ops __read_mostly;
++EXPORT_SYMBOL_GPL(kvm_x86_ops);
++
++static bool __read_mostly ignore_msrs = 0;
++module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
++
++static bool __read_mostly report_ignored_msrs = true;
++module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
++
++unsigned int min_timer_period_us = 200;
++module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
++
++static bool __read_mostly kvmclock_periodic_sync = true;
++module_param(kvmclock_periodic_sync, bool, S_IRUGO);
++
++bool __read_mostly kvm_has_tsc_control;
++EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
++u32  __read_mostly kvm_max_guest_tsc_khz;
++EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
++u8   __read_mostly kvm_tsc_scaling_ratio_frac_bits;
++EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
++u64  __read_mostly kvm_max_tsc_scaling_ratio;
++EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
++u64 __read_mostly kvm_default_tsc_scaling_ratio;
++EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
++
++/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
++static u32 __read_mostly tsc_tolerance_ppm = 250;
++module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
++
++/* lapic timer advance (tscdeadline mode only) in nanoseconds */
++unsigned int __read_mostly lapic_timer_advance_ns = 0;
++module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
++EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
++
++static bool __read_mostly vector_hashing = true;
++module_param(vector_hashing, bool, S_IRUGO);
++
++bool __read_mostly enable_vmware_backdoor = false;
++module_param(enable_vmware_backdoor, bool, S_IRUGO);
++EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
++
++static bool __read_mostly force_emulation_prefix = false;
++module_param(force_emulation_prefix, bool, S_IRUGO);
++
++#define KVM_NR_SHARED_MSRS 16
++
++struct kvm_shared_msrs_global {
++	int nr;
++	u32 msrs[KVM_NR_SHARED_MSRS];
++};
++
++struct kvm_shared_msrs {
++	struct user_return_notifier urn;
++	bool registered;
++	struct kvm_shared_msr_values {
++		u64 host;
++		u64 curr;
++	} values[KVM_NR_SHARED_MSRS];
++};
++
++static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
++static struct kvm_shared_msrs __percpu *shared_msrs;
++
++struct kvm_stats_debugfs_item debugfs_entries[] = {
++	{ "pf_fixed", VCPU_STAT(pf_fixed) },
++	{ "pf_guest", VCPU_STAT(pf_guest) },
++	{ "tlb_flush", VCPU_STAT(tlb_flush) },
++	{ "invlpg", VCPU_STAT(invlpg) },
++	{ "exits", VCPU_STAT(exits) },
++	{ "io_exits", VCPU_STAT(io_exits) },
++	{ "mmio_exits", VCPU_STAT(mmio_exits) },
++	{ "signal_exits", VCPU_STAT(signal_exits) },
++	{ "irq_window", VCPU_STAT(irq_window_exits) },
++	{ "nmi_window", VCPU_STAT(nmi_window_exits) },
++	{ "halt_exits", VCPU_STAT(halt_exits) },
++	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
++	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
++	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
++	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
++	{ "hypercalls", VCPU_STAT(hypercalls) },
++	{ "request_irq", VCPU_STAT(request_irq_exits) },
++	{ "irq_exits", VCPU_STAT(irq_exits) },
++	{ "host_state_reload", VCPU_STAT(host_state_reload) },
++	{ "fpu_reload", VCPU_STAT(fpu_reload) },
++	{ "insn_emulation", VCPU_STAT(insn_emulation) },
++	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
++	{ "irq_injections", VCPU_STAT(irq_injections) },
++	{ "nmi_injections", VCPU_STAT(nmi_injections) },
++	{ "req_event", VCPU_STAT(req_event) },
++	{ "l1d_flush", VCPU_STAT(l1d_flush) },
++	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
++	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
++	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
++	{ "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
++	{ "mmu_flooded", VM_STAT(mmu_flooded) },
++	{ "mmu_recycled", VM_STAT(mmu_recycled) },
++	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
++	{ "mmu_unsync", VM_STAT(mmu_unsync) },
++	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
++	{ "largepages", VM_STAT(lpages, .mode = 0444) },
++	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
++	{ "max_mmu_page_hash_collisions",
++		VM_STAT(max_mmu_page_hash_collisions) },
++	{ NULL }
++};
++
++/* debugfs entries of Detail For vcpu stat EXtension */
++struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = {
++	{ NULL }
++};
++
++u64 __read_mostly host_xcr0;
++
++static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
++
++static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
++{
++	int i;
++	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
++		vcpu->arch.apf.gfns[i] = ~0;
++}
++
++static void kvm_on_user_return(struct user_return_notifier *urn)
++{
++	unsigned slot;
++	struct kvm_shared_msrs *locals
++		= container_of(urn, struct kvm_shared_msrs, urn);
++	struct kvm_shared_msr_values *values;
++	unsigned long flags;
++
++	/*
++	 * Disabling irqs at this point since the following code could be
++	 * interrupted and executed through kvm_arch_hardware_disable()
++	 */
++	local_irq_save(flags);
++	if (locals->registered) {
++		locals->registered = false;
++		user_return_notifier_unregister(urn);
++	}
++	local_irq_restore(flags);
++	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
++		values = &locals->values[slot];
++		if (values->host != values->curr) {
++			wrmsrl(shared_msrs_global.msrs[slot], values->host);
++			values->curr = values->host;
++		}
++	}
++}
++
++static void shared_msr_update(unsigned slot, u32 msr)
++{
++	u64 value;
++	unsigned int cpu = smp_processor_id();
++	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++
++	/* only read, and nobody should modify it at this time,
++	 * so don't need lock */
++	if (slot >= shared_msrs_global.nr) {
++		printk(KERN_ERR "kvm: invalid MSR slot!");
++		return;
++	}
++	rdmsrl_safe(msr, &value);
++	smsr->values[slot].host = value;
++	smsr->values[slot].curr = value;
++}
++
++void kvm_define_shared_msr(unsigned slot, u32 msr)
++{
++	BUG_ON(slot >= KVM_NR_SHARED_MSRS);
++	shared_msrs_global.msrs[slot] = msr;
++	if (slot >= shared_msrs_global.nr)
++		shared_msrs_global.nr = slot + 1;
++}
++EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
++
++static void kvm_shared_msr_cpu_online(void)
++{
++	unsigned i;
++
++	for (i = 0; i < shared_msrs_global.nr; ++i)
++		shared_msr_update(i, shared_msrs_global.msrs[i]);
++}
++
++int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
++{
++	unsigned int cpu = smp_processor_id();
++	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++	int err;
++
++	value = (value & mask) | (smsr->values[slot].host & ~mask);
++	if (value == smsr->values[slot].curr)
++		return 0;
++	err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
++	if (err)
++		return 1;
++
++	smsr->values[slot].curr = value;
++	if (!smsr->registered) {
++		smsr->urn.on_user_return = kvm_on_user_return;
++		user_return_notifier_register(&smsr->urn);
++		smsr->registered = true;
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
++
++static void drop_user_return_notifiers(void)
++{
++	unsigned int cpu = smp_processor_id();
++	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
++
++	if (smsr->registered)
++		kvm_on_user_return(&smsr->urn);
++}
++
++u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
++{
++	return vcpu->arch.apic_base;
++}
++EXPORT_SYMBOL_GPL(kvm_get_apic_base);
++
++enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
++{
++	return kvm_apic_mode(kvm_get_apic_base(vcpu));
++}
++EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
++
++int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
++	enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
++	u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
++		(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
++
++	if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
++		return 1;
++	if (!msr_info->host_initiated) {
++		if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
++			return 1;
++		if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
++			return 1;
++	}
++
++	kvm_lapic_set_base(vcpu, msr_info->data);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_apic_base);
++
++asmlinkage __visible void kvm_spurious_fault(void)
++{
++	/* Fault while not rebooting.  We want the trace. */
++	BUG();
++}
++EXPORT_SYMBOL_GPL(kvm_spurious_fault);
++
++#define EXCPT_BENIGN		0
++#define EXCPT_CONTRIBUTORY	1
++#define EXCPT_PF		2
++
++static int exception_class(int vector)
++{
++	switch (vector) {
++	case PF_VECTOR:
++		return EXCPT_PF;
++	case DE_VECTOR:
++	case TS_VECTOR:
++	case NP_VECTOR:
++	case SS_VECTOR:
++	case GP_VECTOR:
++		return EXCPT_CONTRIBUTORY;
++	default:
++		break;
++	}
++	return EXCPT_BENIGN;
++}
++
++#define EXCPT_FAULT		0
++#define EXCPT_TRAP		1
++#define EXCPT_ABORT		2
++#define EXCPT_INTERRUPT		3
++
++static int exception_type(int vector)
++{
++	unsigned int mask;
++
++	if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
++		return EXCPT_INTERRUPT;
++
++	mask = 1 << vector;
++
++	/* #DB is trap, as instruction watchpoints are handled elsewhere */
++	if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
++		return EXCPT_TRAP;
++
++	if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
++		return EXCPT_ABORT;
++
++	/* Reserved exceptions will result in fault */
++	return EXCPT_FAULT;
++}
++
++static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
++		unsigned nr, bool has_error, u32 error_code,
++		bool reinject)
++{
++	u32 prev_nr;
++	int class1, class2;
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
++	queue:
++		if (has_error && !is_protmode(vcpu))
++			has_error = false;
++		if (reinject) {
++			/*
++			 * On vmentry, vcpu->arch.exception.pending is only
++			 * true if an event injection was blocked by
++			 * nested_run_pending.  In that case, however,
++			 * vcpu_enter_guest requests an immediate exit,
++			 * and the guest shouldn't proceed far enough to
++			 * need reinjection.
++			 */
++			WARN_ON_ONCE(vcpu->arch.exception.pending);
++			vcpu->arch.exception.injected = true;
++		} else {
++			vcpu->arch.exception.pending = true;
++			vcpu->arch.exception.injected = false;
++		}
++		vcpu->arch.exception.has_error_code = has_error;
++		vcpu->arch.exception.nr = nr;
++		vcpu->arch.exception.error_code = error_code;
++		return;
++	}
++
++	/* to check exception */
++	prev_nr = vcpu->arch.exception.nr;
++	if (prev_nr == DF_VECTOR) {
++		/* triple fault -> shutdown */
++		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++		return;
++	}
++	class1 = exception_class(prev_nr);
++	class2 = exception_class(nr);
++	if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
++		|| (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
++		/*
++		 * Generate double fault per SDM Table 5-5.  Set
++		 * exception.pending = true so that the double fault
++		 * can trigger a nested vmexit.
++		 */
++		vcpu->arch.exception.pending = true;
++		vcpu->arch.exception.injected = false;
++		vcpu->arch.exception.has_error_code = true;
++		vcpu->arch.exception.nr = DF_VECTOR;
++		vcpu->arch.exception.error_code = 0;
++	} else
++		/* replace previous exception with a new one in a hope
++		   that instruction re-execution will regenerate lost
++		   exception */
++		goto queue;
++}
++
++void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
++{
++	kvm_multiple_exception(vcpu, nr, false, 0, false);
++}
++EXPORT_SYMBOL_GPL(kvm_queue_exception);
++
++void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
++{
++	kvm_multiple_exception(vcpu, nr, false, 0, true);
++}
++EXPORT_SYMBOL_GPL(kvm_requeue_exception);
++
++int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
++{
++	if (err)
++		kvm_inject_gp(vcpu, 0);
++	else
++		return kvm_skip_emulated_instruction(vcpu);
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
++
++void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
++{
++	++vcpu->stat.pf_guest;
++	vcpu->arch.exception.nested_apf =
++		is_guest_mode(vcpu) && fault->async_page_fault;
++	if (vcpu->arch.exception.nested_apf)
++		vcpu->arch.apf.nested_apf_token = fault->address;
++	else
++		vcpu->arch.cr2 = fault->address;
++	kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
++}
++EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
++
++static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
++{
++	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
++		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
++	else
++		vcpu->arch.mmu.inject_page_fault(vcpu, fault);
++
++	return fault->nested_page_fault;
++}
++
++void kvm_inject_nmi(struct kvm_vcpu *vcpu)
++{
++	atomic_inc(&vcpu->arch.nmi_queued);
++	kvm_make_request(KVM_REQ_NMI, vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_inject_nmi);
++
++void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
++{
++	kvm_multiple_exception(vcpu, nr, true, error_code, false);
++}
++EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
++
++void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
++{
++	kvm_multiple_exception(vcpu, nr, true, error_code, true);
++}
++EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
++
++/*
++ * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
++ * a #GP and return false.
++ */
++bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
++{
++	if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
++		return true;
++	kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
++	return false;
++}
++EXPORT_SYMBOL_GPL(kvm_require_cpl);
++
++bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
++{
++	if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
++		return true;
++
++	kvm_queue_exception(vcpu, UD_VECTOR);
++	return false;
++}
++EXPORT_SYMBOL_GPL(kvm_require_dr);
++
++/*
++ * This function will be used to read from the physical memory of the currently
++ * running guest. The difference to kvm_vcpu_read_guest_page is that this function
++ * can read from guest physical or from the guest's guest physical memory.
++ */
++int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
++			    gfn_t ngfn, void *data, int offset, int len,
++			    u32 access)
++{
++	struct x86_exception exception;
++	gfn_t real_gfn;
++	gpa_t ngpa;
++
++	ngpa     = gfn_to_gpa(ngfn);
++	real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
++	if (real_gfn == UNMAPPED_GVA)
++		return -EFAULT;
++
++	real_gfn = gpa_to_gfn(real_gfn);
++
++	return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
++}
++EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
++
++static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
++			       void *data, int offset, int len, u32 access)
++{
++	return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
++				       data, offset, len, access);
++}
++
++static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
++{
++	return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
++	       rsvd_bits(1, 2);
++}
++
++/*
++ * Load the pae pdptrs.  Return 1 if they are all valid, 0 otherwise.
++ */
++int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
++{
++	gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
++	unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
++	int i;
++	int ret;
++	u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
++
++	ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
++				      offset * sizeof(u64), sizeof(pdpte),
++				      PFERR_USER_MASK|PFERR_WRITE_MASK);
++	if (ret < 0) {
++		ret = 0;
++		goto out;
++	}
++	for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
++		if ((pdpte[i] & PT_PRESENT_MASK) &&
++		    (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
++			ret = 0;
++			goto out;
++		}
++	}
++	ret = 1;
++
++	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
++	__set_bit(VCPU_EXREG_PDPTR,
++		  (unsigned long *)&vcpu->arch.regs_avail);
++	__set_bit(VCPU_EXREG_PDPTR,
++		  (unsigned long *)&vcpu->arch.regs_dirty);
++out:
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(load_pdptrs);
++
++bool pdptrs_changed(struct kvm_vcpu *vcpu)
++{
++	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
++	bool changed = true;
++	int offset;
++	gfn_t gfn;
++	int r;
++
++	if (!is_pae_paging(vcpu))
++		return false;
++
++	if (!test_bit(VCPU_EXREG_PDPTR,
++		      (unsigned long *)&vcpu->arch.regs_avail))
++		return true;
++
++	gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
++	offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
++	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
++				       PFERR_USER_MASK | PFERR_WRITE_MASK);
++	if (r < 0)
++		goto out;
++	changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
++out:
++
++	return changed;
++}
++EXPORT_SYMBOL_GPL(pdptrs_changed);
++
++int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++	unsigned long old_cr0 = kvm_read_cr0(vcpu);
++	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
++
++	cr0 |= X86_CR0_ET;
++
++#ifdef CONFIG_X86_64
++	if (cr0 & 0xffffffff00000000UL)
++		return 1;
++#endif
++
++	cr0 &= ~CR0_RESERVED_BITS;
++
++	if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
++		return 1;
++
++	if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
++		return 1;
++
++	if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
++#ifdef CONFIG_X86_64
++		if ((vcpu->arch.efer & EFER_LME)) {
++			int cs_db, cs_l;
++
++			if (!is_pae(vcpu))
++				return 1;
++			kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
++			if (cs_l)
++				return 1;
++		} else
++#endif
++		if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
++						 kvm_read_cr3(vcpu)))
++			return 1;
++	}
++
++	if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
++		return 1;
++
++	kvm_x86_ops->set_cr0(vcpu, cr0);
++
++	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
++		kvm_clear_async_pf_completion_queue(vcpu);
++		kvm_async_pf_hash_reset(vcpu);
++	}
++
++	if ((cr0 ^ old_cr0) & update_bits)
++		kvm_mmu_reset_context(vcpu);
++
++	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
++	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
++	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
++		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_cr0);
++
++void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
++{
++	(void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
++}
++EXPORT_SYMBOL_GPL(kvm_lmsw);
++
++void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
++{
++	if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
++			!vcpu->guest_xcr0_loaded) {
++		/* kvm_set_xcr() also depends on this */
++		if (vcpu->arch.xcr0 != host_xcr0)
++			xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
++		vcpu->guest_xcr0_loaded = 1;
++	}
++}
++EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
++
++void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
++{
++	if (vcpu->guest_xcr0_loaded) {
++		if (vcpu->arch.xcr0 != host_xcr0)
++			xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
++		vcpu->guest_xcr0_loaded = 0;
++	}
++}
++EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
++
++static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
++{
++	u64 xcr0 = xcr;
++	u64 old_xcr0 = vcpu->arch.xcr0;
++	u64 valid_bits;
++
++	/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
++	if (index != XCR_XFEATURE_ENABLED_MASK)
++		return 1;
++	if (!(xcr0 & XFEATURE_MASK_FP))
++		return 1;
++	if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
++		return 1;
++
++	/*
++	 * Do not allow the guest to set bits that we do not support
++	 * saving.  However, xcr0 bit 0 is always set, even if the
++	 * emulated CPU does not support XSAVE (see fx_init).
++	 */
++	valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
++	if (xcr0 & ~valid_bits)
++		return 1;
++
++	if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
++	    (!(xcr0 & XFEATURE_MASK_BNDCSR)))
++		return 1;
++
++	if (xcr0 & XFEATURE_MASK_AVX512) {
++		if (!(xcr0 & XFEATURE_MASK_YMM))
++			return 1;
++		if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
++			return 1;
++	}
++	vcpu->arch.xcr0 = xcr0;
++
++	if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
++		kvm_update_cpuid(vcpu);
++	return 0;
++}
++
++int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
++{
++	if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
++	    __kvm_set_xcr(vcpu, index, xcr)) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_xcr);
++
++static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++	if (cr4 & CR4_RESERVED_BITS)
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
++		return -EINVAL;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
++		return -EINVAL;
++
++	return 0;
++}
++
++int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++	unsigned long old_cr4 = kvm_read_cr4(vcpu);
++	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
++				   X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
++
++	if (kvm_valid_cr4(vcpu, cr4))
++		return 1;
++
++	if (is_long_mode(vcpu)) {
++		if (!(cr4 & X86_CR4_PAE))
++			return 1;
++	} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
++		   && ((cr4 ^ old_cr4) & pdptr_bits)
++		   && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
++				   kvm_read_cr3(vcpu)))
++		return 1;
++
++	if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
++		if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
++			return 1;
++
++		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
++		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
++			return 1;
++	}
++
++	if (kvm_x86_ops->set_cr4(vcpu, cr4))
++		return 1;
++
++	if (((cr4 ^ old_cr4) & pdptr_bits) ||
++	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
++		kvm_mmu_reset_context(vcpu);
++
++	if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
++		kvm_update_cpuid(vcpu);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_cr4);
++
++int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
++{
++	bool skip_tlb_flush = false;
++#ifdef CONFIG_X86_64
++	bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
++
++	if (pcid_enabled) {
++		skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
++		cr3 &= ~X86_CR3_PCID_NOFLUSH;
++	}
++#endif
++
++	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
++		if (!skip_tlb_flush) {
++			kvm_mmu_sync_roots(vcpu);
++			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++		}
++		return 0;
++	}
++
++	if (is_long_mode(vcpu) &&
++	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
++		return 1;
++	else if (is_pae_paging(vcpu) &&
++		 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
++		return 1;
++
++	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
++	vcpu->arch.cr3 = cr3;
++	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_cr3);
++
++int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
++{
++	if (cr8 & CR8_RESERVED_BITS)
++		return 1;
++	if (lapic_in_kernel(vcpu))
++		kvm_lapic_set_tpr(vcpu, cr8);
++	else
++		vcpu->arch.cr8 = cr8;
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_cr8);
++
++unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
++{
++	if (lapic_in_kernel(vcpu))
++		return kvm_lapic_get_cr8(vcpu);
++	else
++		return vcpu->arch.cr8;
++}
++EXPORT_SYMBOL_GPL(kvm_get_cr8);
++
++static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
++{
++	int i;
++
++	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
++		for (i = 0; i < KVM_NR_DB_REGS; i++)
++			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
++		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
++	}
++}
++
++static void kvm_update_dr6(struct kvm_vcpu *vcpu)
++{
++	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
++		kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
++}
++
++static void kvm_update_dr7(struct kvm_vcpu *vcpu)
++{
++	unsigned long dr7;
++
++	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
++		dr7 = vcpu->arch.guest_debug_dr7;
++	else
++		dr7 = vcpu->arch.dr7;
++	kvm_x86_ops->set_dr7(vcpu, dr7);
++	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
++	if (dr7 & DR7_BP_EN_MASK)
++		vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
++}
++
++static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
++{
++	u64 fixed = DR6_FIXED_1;
++
++	if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
++		fixed |= DR6_RTM;
++	return fixed;
++}
++
++static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
++{
++	switch (dr) {
++	case 0 ... 3:
++		vcpu->arch.db[dr] = val;
++		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
++			vcpu->arch.eff_db[dr] = val;
++		break;
++	case 4:
++		/* fall through */
++	case 6:
++		if (val & 0xffffffff00000000ULL)
++			return -1; /* #GP */
++		vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
++		kvm_update_dr6(vcpu);
++		break;
++	case 5:
++		/* fall through */
++	default: /* 7 */
++		if (val & 0xffffffff00000000ULL)
++			return -1; /* #GP */
++		vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
++		kvm_update_dr7(vcpu);
++		break;
++	}
++
++	return 0;
++}
++
++int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
++{
++	if (__kvm_set_dr(vcpu, dr, val)) {
++		kvm_inject_gp(vcpu, 0);
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_dr);
++
++int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
++{
++	switch (dr) {
++	case 0 ... 3:
++		*val = vcpu->arch.db[dr];
++		break;
++	case 4:
++		/* fall through */
++	case 6:
++		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
++			*val = vcpu->arch.dr6;
++		else
++			*val = kvm_x86_ops->get_dr6(vcpu);
++		break;
++	case 5:
++		/* fall through */
++	default: /* 7 */
++		*val = vcpu->arch.dr7;
++		break;
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_get_dr);
++
++bool kvm_rdpmc(struct kvm_vcpu *vcpu)
++{
++	u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
++	u64 data;
++	int err;
++
++	err = kvm_pmu_rdpmc(vcpu, ecx, &data);
++	if (err)
++		return err;
++	kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
++	kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
++	return err;
++}
++EXPORT_SYMBOL_GPL(kvm_rdpmc);
++
++/*
++ * List of msr numbers which we expose to userspace through KVM_GET_MSRS
++ * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
++ *
++ * This list is modified at module load time to reflect the
++ * capabilities of the host cpu. This capabilities test skips MSRs that are
++ * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
++ * may depend on host virtualization features rather than host cpu features.
++ */
++
++static u32 msrs_to_save[] = {
++	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
++	MSR_STAR,
++#ifdef CONFIG_X86_64
++	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
++#endif
++	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
++	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++	MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
++};
++
++static unsigned num_msrs_to_save;
++
++static u32 emulated_msrs[] = {
++	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
++	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
++	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
++	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
++	HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
++	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
++	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
++	HV_X64_MSR_RESET,
++	HV_X64_MSR_VP_INDEX,
++	HV_X64_MSR_VP_RUNTIME,
++	HV_X64_MSR_SCONTROL,
++	HV_X64_MSR_STIMER0_CONFIG,
++	HV_X64_MSR_VP_ASSIST_PAGE,
++	HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
++	HV_X64_MSR_TSC_EMULATION_STATUS,
++
++	MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
++	MSR_KVM_PV_EOI_EN,
++
++	MSR_IA32_TSC_ADJUST,
++	MSR_IA32_TSCDEADLINE,
++	MSR_IA32_MISC_ENABLE,
++	MSR_IA32_MCG_STATUS,
++	MSR_IA32_MCG_CTL,
++	MSR_IA32_MCG_EXT_CTL,
++	MSR_IA32_SMBASE,
++	MSR_SMI_COUNT,
++	MSR_PLATFORM_INFO,
++	MSR_MISC_FEATURES_ENABLES,
++	MSR_AMD64_VIRT_SPEC_CTRL,
++};
++
++static unsigned num_emulated_msrs;
++
++/*
++ * List of msr numbers which are used to expose MSR-based features that
++ * can be used by a hypervisor to validate requested CPU features.
++ */
++static u32 msr_based_features[] = {
++	MSR_IA32_VMX_BASIC,
++	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
++	MSR_IA32_VMX_PINBASED_CTLS,
++	MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
++	MSR_IA32_VMX_PROCBASED_CTLS,
++	MSR_IA32_VMX_TRUE_EXIT_CTLS,
++	MSR_IA32_VMX_EXIT_CTLS,
++	MSR_IA32_VMX_TRUE_ENTRY_CTLS,
++	MSR_IA32_VMX_ENTRY_CTLS,
++	MSR_IA32_VMX_MISC,
++	MSR_IA32_VMX_CR0_FIXED0,
++	MSR_IA32_VMX_CR0_FIXED1,
++	MSR_IA32_VMX_CR4_FIXED0,
++	MSR_IA32_VMX_CR4_FIXED1,
++	MSR_IA32_VMX_VMCS_ENUM,
++	MSR_IA32_VMX_PROCBASED_CTLS2,
++	MSR_IA32_VMX_EPT_VPID_CAP,
++	MSR_IA32_VMX_VMFUNC,
++
++	MSR_F10H_DECFG,
++	MSR_IA32_UCODE_REV,
++	MSR_IA32_ARCH_CAPABILITIES,
++};
++
++static unsigned int num_msr_based_features;
++
++u64 kvm_get_arch_capabilities(void)
++{
++	u64 data;
++
++	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
++
++	/*
++	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
++	 * the nested hypervisor runs with NX huge pages.  If it is not,
++	 * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
++	 * L1 guests, so it need not worry about its own (L2) guests.
++	 */
++	data |= ARCH_CAP_PSCHANGE_MC_NO;
++
++	/*
++	 * If we're doing cache flushes (either "always" or "cond")
++	 * we will do one whenever the guest does a vmlaunch/vmresume.
++	 * If an outer hypervisor is doing the cache flush for us
++	 * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
++	 * capability to the guest too, and if EPT is disabled we're not
++	 * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
++	 * require a nested hypervisor to do a flush of its own.
++	 */
++	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
++		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
++
++	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++		data |= ARCH_CAP_RDCL_NO;
++	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
++		data |= ARCH_CAP_SSB_NO;
++	if (!boot_cpu_has_bug(X86_BUG_MDS))
++		data |= ARCH_CAP_MDS_NO;
++
++	/*
++	 * On TAA affected systems, export MDS_NO=0 when:
++	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
++	 *	- Updated microcode is present. This is detected by
++	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
++	 *	  that VERW clears CPU buffers.
++	 *
++	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
++	 * mitigation and don't complain:
++	 *
++	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
++	 *
++	 * If TSX is disabled on the system, guests are also mitigated against
++	 * TAA and clear CPU buffer mitigation is not required for guests.
++	 */
++	if (!boot_cpu_has(X86_FEATURE_RTM))
++		data &= ~ARCH_CAP_TAA_NO;
++	else if (!boot_cpu_has_bug(X86_BUG_TAA))
++		data |= ARCH_CAP_TAA_NO;
++	else if (data & ARCH_CAP_TSX_CTRL_MSR)
++		data &= ~ARCH_CAP_MDS_NO;
++
++	/* KVM does not emulate MSR_IA32_TSX_CTRL.  */
++	data &= ~ARCH_CAP_TSX_CTRL_MSR;
++	return data;
++}
++
++EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
++
++static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
++{
++	switch (msr->index) {
++	case MSR_IA32_ARCH_CAPABILITIES:
++		msr->data = kvm_get_arch_capabilities();
++		break;
++	case MSR_IA32_UCODE_REV:
++		rdmsrl_safe(msr->index, &msr->data);
++		break;
++	default:
++		if (kvm_x86_ops->get_msr_feature(msr))
++			return 1;
++	}
++	return 0;
++}
++
++static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
++{
++	struct kvm_msr_entry msr;
++	int r;
++
++	msr.index = index;
++	r = kvm_get_msr_feature(&msr);
++	if (r)
++		return r;
++
++	*data = msr.data;
++
++	return 0;
++}
++
++static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
++{
++	if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
++		return false;
++
++	if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
++		return false;
++
++	return true;
++
++}
++bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
++{
++	if (efer & efer_reserved_bits)
++		return false;
++
++	return __kvm_valid_efer(vcpu, efer);
++}
++EXPORT_SYMBOL_GPL(kvm_valid_efer);
++
++static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	u64 old_efer = vcpu->arch.efer;
++	u64 efer = msr_info->data;
++
++	if (efer & efer_reserved_bits)
++		return 1;
++
++	if (!msr_info->host_initiated) {
++		if (!__kvm_valid_efer(vcpu, efer))
++			return 1;
++
++		if (is_paging(vcpu) &&
++		    (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
++			return 1;
++	}
++
++	efer &= ~EFER_LMA;
++	efer |= vcpu->arch.efer & EFER_LMA;
++
++	kvm_x86_ops->set_efer(vcpu, efer);
++
++	/* Update reserved bits */
++	if ((efer ^ old_efer) & EFER_NX)
++		kvm_mmu_reset_context(vcpu);
++
++	return 0;
++}
++
++void kvm_enable_efer_bits(u64 mask)
++{
++       efer_reserved_bits &= ~mask;
++}
++EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
++
++/*
++ * Writes msr value into into the appropriate "register".
++ * Returns 0 on success, non-0 otherwise.
++ * Assumes vcpu_load() was already called.
++ */
++int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
++{
++	switch (msr->index) {
++	case MSR_FS_BASE:
++	case MSR_GS_BASE:
++	case MSR_KERNEL_GS_BASE:
++	case MSR_CSTAR:
++	case MSR_LSTAR:
++		if (is_noncanonical_address(msr->data, vcpu))
++			return 1;
++		break;
++	case MSR_IA32_SYSENTER_EIP:
++	case MSR_IA32_SYSENTER_ESP:
++		/*
++		 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
++		 * non-canonical address is written on Intel but not on
++		 * AMD (which ignores the top 32-bits, because it does
++		 * not implement 64-bit SYSENTER).
++		 *
++		 * 64-bit code should hence be able to write a non-canonical
++		 * value on AMD.  Making the address canonical ensures that
++		 * vmentry does not fail on Intel after writing a non-canonical
++		 * value, and that something deterministic happens if the guest
++		 * invokes 64-bit SYSENTER.
++		 */
++		msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
++	}
++	return kvm_x86_ops->set_msr(vcpu, msr);
++}
++EXPORT_SYMBOL_GPL(kvm_set_msr);
++
++/*
++ * Adapt set_msr() to msr_io()'s calling convention
++ */
++static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
++{
++	struct msr_data msr;
++	int r;
++
++	msr.index = index;
++	msr.host_initiated = true;
++	r = kvm_get_msr(vcpu, &msr);
++	if (r)
++		return r;
++
++	*data = msr.data;
++	return 0;
++}
++
++static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
++{
++	struct msr_data msr;
++
++	msr.data = *data;
++	msr.index = index;
++	msr.host_initiated = true;
++	return kvm_set_msr(vcpu, &msr);
++}
++
++#ifdef CONFIG_X86_64
++struct pvclock_gtod_data {
++	seqcount_t	seq;
++
++	struct { /* extract of a clocksource struct */
++		int vclock_mode;
++		u64	cycle_last;
++		u64	mask;
++		u32	mult;
++		u32	shift;
++	} clock;
++
++	u64		boot_ns;
++	u64		nsec_base;
++	u64		wall_time_sec;
++};
++
++static struct pvclock_gtod_data pvclock_gtod_data;
++
++static void update_pvclock_gtod(struct timekeeper *tk)
++{
++	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
++	u64 boot_ns;
++
++	boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
++
++	write_seqcount_begin(&vdata->seq);
++
++	/* copy pvclock gtod data */
++	vdata->clock.vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
++	vdata->clock.cycle_last		= tk->tkr_mono.cycle_last;
++	vdata->clock.mask		= tk->tkr_mono.mask;
++	vdata->clock.mult		= tk->tkr_mono.mult;
++	vdata->clock.shift		= tk->tkr_mono.shift;
++
++	vdata->boot_ns			= boot_ns;
++	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
++
++	vdata->wall_time_sec            = tk->xtime_sec;
++
++	write_seqcount_end(&vdata->seq);
++}
++#endif
++
++void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
++{
++	/*
++	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
++	 * vcpu_enter_guest.  This function is only called from
++	 * the physical CPU that is running vcpu.
++	 */
++	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
++}
++
++static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
++{
++	int version;
++	int r;
++	struct pvclock_wall_clock wc;
++	struct timespec64 boot;
++
++	if (!wall_clock)
++		return;
++
++	r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
++	if (r)
++		return;
++
++	if (version & 1)
++		++version;  /* first time write, random junk */
++
++	++version;
++
++	if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
++		return;
++
++	/*
++	 * The guest calculates current wall clock time by adding
++	 * system time (updated by kvm_guest_time_update below) to the
++	 * wall clock specified here.  guest system time equals host
++	 * system time for us, thus we must fill in host boot time here.
++	 */
++	getboottime64(&boot);
++
++	if (kvm->arch.kvmclock_offset) {
++		struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
++		boot = timespec64_sub(boot, ts);
++	}
++	wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
++	wc.nsec = boot.tv_nsec;
++	wc.version = version;
++
++	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
++
++	version++;
++	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
++}
++
++static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
++{
++	do_shl32_div32(dividend, divisor);
++	return dividend;
++}
++
++static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
++			       s8 *pshift, u32 *pmultiplier)
++{
++	uint64_t scaled64;
++	int32_t  shift = 0;
++	uint64_t tps64;
++	uint32_t tps32;
++
++	tps64 = base_hz;
++	scaled64 = scaled_hz;
++	while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
++		tps64 >>= 1;
++		shift--;
++	}
++
++	tps32 = (uint32_t)tps64;
++	while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
++		if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
++			scaled64 >>= 1;
++		else
++			tps32 <<= 1;
++		shift++;
++	}
++
++	*pshift = shift;
++	*pmultiplier = div_frac(scaled64, tps32);
++
++	pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
++		 __func__, base_hz, scaled_hz, shift, *pmultiplier);
++}
++
++#ifdef CONFIG_X86_64
++static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
++#endif
++
++static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
++static unsigned long max_tsc_khz;
++
++static u32 adjust_tsc_khz(u32 khz, s32 ppm)
++{
++	u64 v = (u64)khz * (1000000 + ppm);
++	do_div(v, 1000000);
++	return v;
++}
++
++static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
++{
++	u64 ratio;
++
++	/* Guest TSC same frequency as host TSC? */
++	if (!scale) {
++		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
++		return 0;
++	}
++
++	/* TSC scaling supported? */
++	if (!kvm_has_tsc_control) {
++		if (user_tsc_khz > tsc_khz) {
++			vcpu->arch.tsc_catchup = 1;
++			vcpu->arch.tsc_always_catchup = 1;
++			return 0;
++		} else {
++			pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
++			return -1;
++		}
++	}
++
++	/* TSC scaling required  - calculate ratio */
++	ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
++				user_tsc_khz, tsc_khz);
++
++	if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
++		pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
++			            user_tsc_khz);
++		return -1;
++	}
++
++	vcpu->arch.tsc_scaling_ratio = ratio;
++	return 0;
++}
++
++static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
++{
++	u32 thresh_lo, thresh_hi;
++	int use_scaling = 0;
++
++	/* tsc_khz can be zero if TSC calibration fails */
++	if (user_tsc_khz == 0) {
++		/* set tsc_scaling_ratio to a safe value */
++		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
++		return -1;
++	}
++
++	/* Compute a scale to convert nanoseconds in TSC cycles */
++	kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
++			   &vcpu->arch.virtual_tsc_shift,
++			   &vcpu->arch.virtual_tsc_mult);
++	vcpu->arch.virtual_tsc_khz = user_tsc_khz;
++
++	/*
++	 * Compute the variation in TSC rate which is acceptable
++	 * within the range of tolerance and decide if the
++	 * rate being applied is within that bounds of the hardware
++	 * rate.  If so, no scaling or compensation need be done.
++	 */
++	thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
++	thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
++	if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
++		pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
++		use_scaling = 1;
++	}
++	return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
++}
++
++static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
++{
++	u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
++				      vcpu->arch.virtual_tsc_mult,
++				      vcpu->arch.virtual_tsc_shift);
++	tsc += vcpu->arch.this_tsc_write;
++	return tsc;
++}
++
++static inline int gtod_is_based_on_tsc(int mode)
++{
++	return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
++}
++
++static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
++{
++#ifdef CONFIG_X86_64
++	bool vcpus_matched;
++	struct kvm_arch *ka = &vcpu->kvm->arch;
++	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
++
++	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
++			 atomic_read(&vcpu->kvm->online_vcpus));
++
++	/*
++	 * Once the masterclock is enabled, always perform request in
++	 * order to update it.
++	 *
++	 * In order to enable masterclock, the host clocksource must be TSC
++	 * and the vcpus need to have matched TSCs.  When that happens,
++	 * perform request to enable masterclock.
++	 */
++	if (ka->use_master_clock ||
++	    (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
++		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
++
++	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
++			    atomic_read(&vcpu->kvm->online_vcpus),
++		            ka->use_master_clock, gtod->clock.vclock_mode);
++#endif
++}
++
++static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
++{
++	u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
++	vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
++}
++
++/*
++ * Multiply tsc by a fixed point number represented by ratio.
++ *
++ * The most significant 64-N bits (mult) of ratio represent the
++ * integral part of the fixed point number; the remaining N bits
++ * (frac) represent the fractional part, ie. ratio represents a fixed
++ * point number (mult + frac * 2^(-N)).
++ *
++ * N equals to kvm_tsc_scaling_ratio_frac_bits.
++ */
++static inline u64 __scale_tsc(u64 ratio, u64 tsc)
++{
++	return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
++}
++
++u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
++{
++	u64 _tsc = tsc;
++	u64 ratio = vcpu->arch.tsc_scaling_ratio;
++
++	if (ratio != kvm_default_tsc_scaling_ratio)
++		_tsc = __scale_tsc(ratio, tsc);
++
++	return _tsc;
++}
++EXPORT_SYMBOL_GPL(kvm_scale_tsc);
++
++static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
++{
++	u64 tsc;
++
++	tsc = kvm_scale_tsc(vcpu, rdtsc());
++
++	return target_tsc - tsc;
++}
++
++u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
++{
++	u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
++
++	return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
++}
++EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
++
++static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
++{
++	vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
++}
++
++static inline bool kvm_check_tsc_unstable(void)
++{
++#ifdef CONFIG_X86_64
++	/*
++	 * TSC is marked unstable when we're running on Hyper-V,
++	 * 'TSC page' clocksource is good.
++	 */
++	if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
++		return false;
++#endif
++	return check_tsc_unstable();
++}
++
++void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
++{
++	struct kvm *kvm = vcpu->kvm;
++	u64 offset, ns, elapsed;
++	unsigned long flags;
++	bool matched;
++	bool already_matched;
++	u64 data = msr->data;
++	bool synchronizing = false;
++
++	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
++	offset = kvm_compute_tsc_offset(vcpu, data);
++	ns = ktime_get_boot_ns();
++	elapsed = ns - kvm->arch.last_tsc_nsec;
++
++	if (vcpu->arch.virtual_tsc_khz) {
++		if (data == 0 && msr->host_initiated) {
++			/*
++			 * detection of vcpu initialization -- need to sync
++			 * with other vCPUs. This particularly helps to keep
++			 * kvm_clock stable after CPU hotplug
++			 */
++			synchronizing = true;
++		} else {
++			u64 tsc_exp = kvm->arch.last_tsc_write +
++						nsec_to_cycles(vcpu, elapsed);
++			u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
++			/*
++			 * Special case: TSC write with a small delta (1 second)
++			 * of virtual cycle time against real time is
++			 * interpreted as an attempt to synchronize the CPU.
++			 */
++			synchronizing = data < tsc_exp + tsc_hz &&
++					data + tsc_hz > tsc_exp;
++		}
++	}
++
++	/*
++	 * For a reliable TSC, we can match TSC offsets, and for an unstable
++	 * TSC, we add elapsed time in this computation.  We could let the
++	 * compensation code attempt to catch up if we fall behind, but
++	 * it's better to try to match offsets from the beginning.
++         */
++	if (synchronizing &&
++	    vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
++		if (!kvm_check_tsc_unstable()) {
++			offset = kvm->arch.cur_tsc_offset;
++			pr_debug("kvm: matched tsc offset for %llu\n", data);
++		} else {
++			u64 delta = nsec_to_cycles(vcpu, elapsed);
++			data += delta;
++			offset = kvm_compute_tsc_offset(vcpu, data);
++			pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
++		}
++		matched = true;
++		already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
++	} else {
++		/*
++		 * We split periods of matched TSC writes into generations.
++		 * For each generation, we track the original measured
++		 * nanosecond time, offset, and write, so if TSCs are in
++		 * sync, we can match exact offset, and if not, we can match
++		 * exact software computation in compute_guest_tsc()
++		 *
++		 * These values are tracked in kvm->arch.cur_xxx variables.
++		 */
++		kvm->arch.cur_tsc_generation++;
++		kvm->arch.cur_tsc_nsec = ns;
++		kvm->arch.cur_tsc_write = data;
++		kvm->arch.cur_tsc_offset = offset;
++		matched = false;
++		pr_debug("kvm: new tsc generation %llu, clock %llu\n",
++			 kvm->arch.cur_tsc_generation, data);
++	}
++
++	/*
++	 * We also track th most recent recorded KHZ, write and time to
++	 * allow the matching interval to be extended at each write.
++	 */
++	kvm->arch.last_tsc_nsec = ns;
++	kvm->arch.last_tsc_write = data;
++	kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
++
++	vcpu->arch.last_guest_tsc = data;
++
++	/* Keep track of which generation this VCPU has synchronized to */
++	vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
++	vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
++	vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
++
++	if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
++		update_ia32_tsc_adjust_msr(vcpu, offset);
++
++	kvm_vcpu_write_tsc_offset(vcpu, offset);
++	raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
++
++	spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
++	if (!matched) {
++		kvm->arch.nr_vcpus_matched_tsc = 0;
++	} else if (!already_matched) {
++		kvm->arch.nr_vcpus_matched_tsc++;
++	}
++
++	kvm_track_tsc_matching(vcpu);
++	spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
++}
++
++EXPORT_SYMBOL_GPL(kvm_write_tsc);
++
++static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
++					   s64 adjustment)
++{
++	u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
++	kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
++}
++
++static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
++{
++	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
++		WARN_ON(adjustment < 0);
++	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
++	adjust_tsc_offset_guest(vcpu, adjustment);
++}
++
++#ifdef CONFIG_X86_64
++
++static u64 read_tsc(void)
++{
++	u64 ret = (u64)rdtsc_ordered();
++	u64 last = pvclock_gtod_data.clock.cycle_last;
++
++	if (likely(ret >= last))
++		return ret;
++
++	/*
++	 * GCC likes to generate cmov here, but this branch is extremely
++	 * predictable (it's just a function of time and the likely is
++	 * very likely) and there's a data dependence, so force GCC
++	 * to generate a branch instead.  I don't barrier() because
++	 * we don't actually need a barrier, and if this function
++	 * ever gets inlined it will generate worse code.
++	 */
++	asm volatile ("");
++	return last;
++}
++
++static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
++{
++	long v;
++	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
++	u64 tsc_pg_val;
++
++	switch (gtod->clock.vclock_mode) {
++	case VCLOCK_HVCLOCK:
++		tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
++						  tsc_timestamp);
++		if (tsc_pg_val != U64_MAX) {
++			/* TSC page valid */
++			*mode = VCLOCK_HVCLOCK;
++			v = (tsc_pg_val - gtod->clock.cycle_last) &
++				gtod->clock.mask;
++		} else {
++			/* TSC page invalid */
++			*mode = VCLOCK_NONE;
++		}
++		break;
++	case VCLOCK_TSC:
++		*mode = VCLOCK_TSC;
++		*tsc_timestamp = read_tsc();
++		v = (*tsc_timestamp - gtod->clock.cycle_last) &
++			gtod->clock.mask;
++		break;
++	default:
++		*mode = VCLOCK_NONE;
++	}
++
++	if (*mode == VCLOCK_NONE)
++		*tsc_timestamp = v = 0;
++
++	return v * gtod->clock.mult;
++}
++
++static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
++{
++	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
++	unsigned long seq;
++	int mode;
++	u64 ns;
++
++	do {
++		seq = read_seqcount_begin(&gtod->seq);
++		ns = gtod->nsec_base;
++		ns += vgettsc(tsc_timestamp, &mode);
++		ns >>= gtod->clock.shift;
++		ns += gtod->boot_ns;
++	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
++	*t = ns;
++
++	return mode;
++}
++
++static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
++{
++	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
++	unsigned long seq;
++	int mode;
++	u64 ns;
++
++	do {
++		seq = read_seqcount_begin(&gtod->seq);
++		ts->tv_sec = gtod->wall_time_sec;
++		ns = gtod->nsec_base;
++		ns += vgettsc(tsc_timestamp, &mode);
++		ns >>= gtod->clock.shift;
++	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
++
++	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
++	ts->tv_nsec = ns;
++
++	return mode;
++}
++
++/* returns true if host is using TSC based clocksource */
++static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
++{
++	/* checked again under seqlock below */
++	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
++		return false;
++
++	return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
++						      tsc_timestamp));
++}
++
++/* returns true if host is using TSC based clocksource */
++static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
++					   u64 *tsc_timestamp)
++{
++	/* checked again under seqlock below */
++	if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
++		return false;
++
++	return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
++}
++#endif
++
++/*
++ *
++ * Assuming a stable TSC across physical CPUS, and a stable TSC
++ * across virtual CPUs, the following condition is possible.
++ * Each numbered line represents an event visible to both
++ * CPUs at the next numbered event.
++ *
++ * "timespecX" represents host monotonic time. "tscX" represents
++ * RDTSC value.
++ *
++ * 		VCPU0 on CPU0		|	VCPU1 on CPU1
++ *
++ * 1.  read timespec0,tsc0
++ * 2.					| timespec1 = timespec0 + N
++ * 					| tsc1 = tsc0 + M
++ * 3. transition to guest		| transition to guest
++ * 4. ret0 = timespec0 + (rdtsc - tsc0) |
++ * 5.				        | ret1 = timespec1 + (rdtsc - tsc1)
++ * 				        | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
++ *
++ * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
++ *
++ * 	- ret0 < ret1
++ *	- timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
++ *		...
++ *	- 0 < N - M => M < N
++ *
++ * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
++ * always the case (the difference between two distinct xtime instances
++ * might be smaller then the difference between corresponding TSC reads,
++ * when updating guest vcpus pvclock areas).
++ *
++ * To avoid that problem, do not allow visibility of distinct
++ * system_timestamp/tsc_timestamp values simultaneously: use a master
++ * copy of host monotonic time values. Update that master copy
++ * in lockstep.
++ *
++ * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
++ *
++ */
++
++static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
++{
++#ifdef CONFIG_X86_64
++	struct kvm_arch *ka = &kvm->arch;
++	int vclock_mode;
++	bool host_tsc_clocksource, vcpus_matched;
++
++	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
++			atomic_read(&kvm->online_vcpus));
++
++	/*
++	 * If the host uses TSC clock, then passthrough TSC as stable
++	 * to the guest.
++	 */
++	host_tsc_clocksource = kvm_get_time_and_clockread(
++					&ka->master_kernel_ns,
++					&ka->master_cycle_now);
++
++	ka->use_master_clock = host_tsc_clocksource && vcpus_matched
++				&& !ka->backwards_tsc_observed
++				&& !ka->boot_vcpu_runs_old_kvmclock;
++
++	if (ka->use_master_clock)
++		atomic_set(&kvm_guest_has_master_clock, 1);
++
++	vclock_mode = pvclock_gtod_data.clock.vclock_mode;
++	trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
++					vcpus_matched);
++#endif
++}
++
++void kvm_make_mclock_inprogress_request(struct kvm *kvm)
++{
++	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
++}
++
++static void kvm_gen_update_masterclock(struct kvm *kvm)
++{
++#ifdef CONFIG_X86_64
++	int i;
++	struct kvm_vcpu *vcpu;
++	struct kvm_arch *ka = &kvm->arch;
++
++	spin_lock(&ka->pvclock_gtod_sync_lock);
++	kvm_make_mclock_inprogress_request(kvm);
++	/* no guest entries from this point */
++	pvclock_update_vm_gtod_copy(kvm);
++
++	kvm_for_each_vcpu(i, vcpu, kvm)
++		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++
++	/* guest entries allowed */
++	kvm_for_each_vcpu(i, vcpu, kvm)
++		kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
++
++	spin_unlock(&ka->pvclock_gtod_sync_lock);
++#endif
++}
++
++u64 get_kvmclock_ns(struct kvm *kvm)
++{
++	struct kvm_arch *ka = &kvm->arch;
++	struct pvclock_vcpu_time_info hv_clock;
++	u64 ret;
++
++	spin_lock(&ka->pvclock_gtod_sync_lock);
++	if (!ka->use_master_clock) {
++		spin_unlock(&ka->pvclock_gtod_sync_lock);
++		return ktime_get_boot_ns() + ka->kvmclock_offset;
++	}
++
++	hv_clock.tsc_timestamp = ka->master_cycle_now;
++	hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
++	spin_unlock(&ka->pvclock_gtod_sync_lock);
++
++	/* both __this_cpu_read() and rdtsc() should be on the same cpu */
++	get_cpu();
++
++	if (__this_cpu_read(cpu_tsc_khz)) {
++		kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
++				   &hv_clock.tsc_shift,
++				   &hv_clock.tsc_to_system_mul);
++		ret = __pvclock_read_cycles(&hv_clock, rdtsc());
++	} else
++		ret = ktime_get_boot_ns() + ka->kvmclock_offset;
++
++	put_cpu();
++
++	return ret;
++}
++
++static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
++{
++	struct kvm_vcpu_arch *vcpu = &v->arch;
++	struct pvclock_vcpu_time_info guest_hv_clock;
++
++	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
++		&guest_hv_clock, sizeof(guest_hv_clock))))
++		return;
++
++	/* This VCPU is paused, but it's legal for a guest to read another
++	 * VCPU's kvmclock, so we really have to follow the specification where
++	 * it says that version is odd if data is being modified, and even after
++	 * it is consistent.
++	 *
++	 * Version field updates must be kept separate.  This is because
++	 * kvm_write_guest_cached might use a "rep movs" instruction, and
++	 * writes within a string instruction are weakly ordered.  So there
++	 * are three writes overall.
++	 *
++	 * As a small optimization, only write the version field in the first
++	 * and third write.  The vcpu->pv_time cache is still valid, because the
++	 * version field is the first in the struct.
++	 */
++	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
++
++	if (guest_hv_clock.version & 1)
++		++guest_hv_clock.version;  /* first time write, random junk */
++
++	vcpu->hv_clock.version = guest_hv_clock.version + 1;
++	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
++				&vcpu->hv_clock,
++				sizeof(vcpu->hv_clock.version));
++
++	smp_wmb();
++
++	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
++	vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
++
++	if (vcpu->pvclock_set_guest_stopped_request) {
++		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
++		vcpu->pvclock_set_guest_stopped_request = false;
++	}
++
++	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
++
++	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
++				&vcpu->hv_clock,
++				sizeof(vcpu->hv_clock));
++
++	smp_wmb();
++
++	vcpu->hv_clock.version++;
++	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
++				&vcpu->hv_clock,
++				sizeof(vcpu->hv_clock.version));
++}
++
++static int kvm_guest_time_update(struct kvm_vcpu *v)
++{
++	unsigned long flags, tgt_tsc_khz;
++	struct kvm_vcpu_arch *vcpu = &v->arch;
++	struct kvm_arch *ka = &v->kvm->arch;
++	s64 kernel_ns;
++	u64 tsc_timestamp, host_tsc;
++	u8 pvclock_flags;
++	bool use_master_clock;
++
++	kernel_ns = 0;
++	host_tsc = 0;
++
++	/*
++	 * If the host uses TSC clock, then passthrough TSC as stable
++	 * to the guest.
++	 */
++	spin_lock(&ka->pvclock_gtod_sync_lock);
++	use_master_clock = ka->use_master_clock;
++	if (use_master_clock) {
++		host_tsc = ka->master_cycle_now;
++		kernel_ns = ka->master_kernel_ns;
++	}
++	spin_unlock(&ka->pvclock_gtod_sync_lock);
++
++	/* Keep irq disabled to prevent changes to the clock */
++	local_irq_save(flags);
++	tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
++	if (unlikely(tgt_tsc_khz == 0)) {
++		local_irq_restore(flags);
++		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
++		return 1;
++	}
++	if (!use_master_clock) {
++		host_tsc = rdtsc();
++		kernel_ns = ktime_get_boot_ns();
++	}
++
++	tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
++
++	/*
++	 * We may have to catch up the TSC to match elapsed wall clock
++	 * time for two reasons, even if kvmclock is used.
++	 *   1) CPU could have been running below the maximum TSC rate
++	 *   2) Broken TSC compensation resets the base at each VCPU
++	 *      entry to avoid unknown leaps of TSC even when running
++	 *      again on the same CPU.  This may cause apparent elapsed
++	 *      time to disappear, and the guest to stand still or run
++	 *	very slowly.
++	 */
++	if (vcpu->tsc_catchup) {
++		u64 tsc = compute_guest_tsc(v, kernel_ns);
++		if (tsc > tsc_timestamp) {
++			adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
++			tsc_timestamp = tsc;
++		}
++	}
++
++	local_irq_restore(flags);
++
++	/* With all the info we got, fill in the values */
++
++	if (kvm_has_tsc_control)
++		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
++
++	if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
++		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
++				   &vcpu->hv_clock.tsc_shift,
++				   &vcpu->hv_clock.tsc_to_system_mul);
++		vcpu->hw_tsc_khz = tgt_tsc_khz;
++	}
++
++	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
++	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
++	vcpu->last_guest_tsc = tsc_timestamp;
++
++	/* If the host uses TSC clocksource, then it is stable */
++	pvclock_flags = 0;
++	if (use_master_clock)
++		pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
++
++	vcpu->hv_clock.flags = pvclock_flags;
++
++	if (vcpu->pv_time_enabled)
++		kvm_setup_pvclock_page(v);
++	if (v == kvm_get_vcpu(v->kvm, 0))
++		kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
++	return 0;
++}
++
++/*
++ * kvmclock updates which are isolated to a given vcpu, such as
++ * vcpu->cpu migration, should not allow system_timestamp from
++ * the rest of the vcpus to remain static. Otherwise ntp frequency
++ * correction applies to one vcpu's system_timestamp but not
++ * the others.
++ *
++ * So in those cases, request a kvmclock update for all vcpus.
++ * We need to rate-limit these requests though, as they can
++ * considerably slow guests that have a large number of vcpus.
++ * The time for a remote vcpu to update its kvmclock is bound
++ * by the delay we use to rate-limit the updates.
++ */
++
++#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
++
++static void kvmclock_update_fn(struct work_struct *work)
++{
++	int i;
++	struct delayed_work *dwork = to_delayed_work(work);
++	struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
++					   kvmclock_update_work);
++	struct kvm *kvm = container_of(ka, struct kvm, arch);
++	struct kvm_vcpu *vcpu;
++
++	kvm_for_each_vcpu(i, vcpu, kvm) {
++		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++		kvm_vcpu_kick(vcpu);
++	}
++}
++
++static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
++{
++	struct kvm *kvm = v->kvm;
++
++	kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
++	schedule_delayed_work(&kvm->arch.kvmclock_update_work,
++					KVMCLOCK_UPDATE_DELAY);
++}
++
++#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
++
++static void kvmclock_sync_fn(struct work_struct *work)
++{
++	struct delayed_work *dwork = to_delayed_work(work);
++	struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
++					   kvmclock_sync_work);
++	struct kvm *kvm = container_of(ka, struct kvm, arch);
++
++	if (!kvmclock_periodic_sync)
++		return;
++
++	schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
++	schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
++					KVMCLOCK_SYNC_PERIOD);
++}
++
++static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	u64 mcg_cap = vcpu->arch.mcg_cap;
++	unsigned bank_num = mcg_cap & 0xff;
++	u32 msr = msr_info->index;
++	u64 data = msr_info->data;
++
++	switch (msr) {
++	case MSR_IA32_MCG_STATUS:
++		vcpu->arch.mcg_status = data;
++		break;
++	case MSR_IA32_MCG_CTL:
++		if (!(mcg_cap & MCG_CTL_P) &&
++		    (data || !msr_info->host_initiated))
++			return 1;
++		if (data != 0 && data != ~(u64)0)
++			return 1;
++		vcpu->arch.mcg_ctl = data;
++		break;
++	default:
++		if (msr >= MSR_IA32_MC0_CTL &&
++		    msr < MSR_IA32_MCx_CTL(bank_num)) {
++			u32 offset = msr - MSR_IA32_MC0_CTL;
++			/* only 0 or all 1s can be written to IA32_MCi_CTL
++			 * some Linux kernels though clear bit 10 in bank 4 to
++			 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
++			 * this to avoid an uncatched #GP in the guest
++			 */
++			if ((offset & 0x3) == 0 &&
++			    data != 0 && (data | (1 << 10)) != ~(u64)0)
++				return -1;
++			if (!msr_info->host_initiated &&
++				(offset & 0x3) == 1 && data != 0)
++				return -1;
++			vcpu->arch.mce_banks[offset] = data;
++			break;
++		}
++		return 1;
++	}
++	return 0;
++}
++
++static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
++{
++	struct kvm *kvm = vcpu->kvm;
++	int lm = is_long_mode(vcpu);
++	u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
++		: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
++	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
++		: kvm->arch.xen_hvm_config.blob_size_32;
++	u32 page_num = data & ~PAGE_MASK;
++	u64 page_addr = data & PAGE_MASK;
++	u8 *page;
++	int r;
++
++	r = -E2BIG;
++	if (page_num >= blob_size)
++		goto out;
++	r = -ENOMEM;
++	page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
++	if (IS_ERR(page)) {
++		r = PTR_ERR(page);
++		goto out;
++	}
++	if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
++		goto out_free;
++	r = 0;
++out_free:
++	kfree(page);
++out:
++	return r;
++}
++
++static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
++{
++	gpa_t gpa = data & ~0x3f;
++
++	/* Bits 3:5 are reserved, Should be zero */
++	if (data & 0x38)
++		return 1;
++
++	vcpu->arch.apf.msr_val = data;
++
++	if (!(data & KVM_ASYNC_PF_ENABLED)) {
++		kvm_clear_async_pf_completion_queue(vcpu);
++		kvm_async_pf_hash_reset(vcpu);
++		return 0;
++	}
++
++	if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
++					sizeof(u32)))
++		return 1;
++
++	vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
++	vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
++	kvm_async_pf_wakeup_all(vcpu);
++	return 0;
++}
++
++static void kvmclock_reset(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.pv_time_enabled = false;
++}
++
++static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
++{
++	++vcpu->stat.tlb_flush;
++	kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
++}
++
++static void record_steal_time(struct kvm_vcpu *vcpu)
++{
++	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
++		return;
++
++	if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
++		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
++		return;
++
++	/*
++	 * Doing a TLB flush here, on the guest's behalf, can avoid
++	 * expensive IPIs.
++	 */
++	if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
++		kvm_vcpu_flush_tlb(vcpu, false);
++
++	if (vcpu->arch.st.steal.version & 1)
++		vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
++
++	vcpu->arch.st.steal.version += 1;
++
++	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
++		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++
++	smp_wmb();
++
++	vcpu->arch.st.steal.steal += current->sched_info.run_delay -
++		vcpu->arch.st.last_steal;
++	vcpu->arch.st.last_steal = current->sched_info.run_delay;
++
++	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
++		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++
++	smp_wmb();
++
++	vcpu->arch.st.steal.version += 1;
++
++	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
++		&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
++}
++
++int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	bool pr = false;
++	u32 msr = msr_info->index;
++	u64 data = msr_info->data;
++
++	switch (msr) {
++	case MSR_AMD64_NB_CFG:
++	case MSR_IA32_UCODE_WRITE:
++	case MSR_VM_HSAVE_PA:
++	case MSR_AMD64_PATCH_LOADER:
++	case MSR_AMD64_BU_CFG2:
++	case MSR_AMD64_DC_CFG:
++	case MSR_F15H_EX_CFG:
++		break;
++
++	case MSR_IA32_UCODE_REV:
++		if (msr_info->host_initiated)
++			vcpu->arch.microcode_version = data;
++		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated)
++			return 1;
++		vcpu->arch.arch_capabilities = data;
++		break;
++	case MSR_EFER:
++		return set_efer(vcpu, msr_info);
++	case MSR_K7_HWCR:
++		data &= ~(u64)0x40;	/* ignore flush filter disable */
++		data &= ~(u64)0x100;	/* ignore ignne emulation enable */
++		data &= ~(u64)0x8;	/* ignore TLB cache disable */
++		data &= ~(u64)0x40000;  /* ignore Mc status write enable */
++		if (data != 0) {
++			vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
++				    data);
++			return 1;
++		}
++		break;
++	case MSR_FAM10H_MMIO_CONF_BASE:
++		if (data != 0) {
++			vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
++				    "0x%llx\n", data);
++			return 1;
++		}
++		break;
++	case MSR_IA32_DEBUGCTLMSR:
++		if (!data) {
++			/* We support the non-activated case already */
++			break;
++		} else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
++			/* Values other than LBR and BTF are vendor-specific,
++			   thus reserved and should throw a #GP */
++			return 1;
++		}
++		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
++			    __func__, data);
++		break;
++	case 0x200 ... 0x2ff:
++		return kvm_mtrr_set_msr(vcpu, msr, data);
++	case MSR_IA32_APICBASE:
++		return kvm_set_apic_base(vcpu, msr_info);
++	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
++		return kvm_x2apic_msr_write(vcpu, msr, data);
++	case MSR_IA32_TSCDEADLINE:
++		kvm_set_lapic_tscdeadline_msr(vcpu, data);
++		break;
++	case MSR_IA32_TSC_ADJUST:
++		if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
++			if (!msr_info->host_initiated) {
++				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
++				adjust_tsc_offset_guest(vcpu, adj);
++			}
++			vcpu->arch.ia32_tsc_adjust_msr = data;
++		}
++		break;
++	case MSR_IA32_MISC_ENABLE:
++		vcpu->arch.ia32_misc_enable_msr = data;
++		break;
++	case MSR_IA32_SMBASE:
++		if (!msr_info->host_initiated)
++			return 1;
++		vcpu->arch.smbase = data;
++		break;
++	case MSR_IA32_TSC:
++		kvm_write_tsc(vcpu, msr_info);
++		break;
++	case MSR_SMI_COUNT:
++		if (!msr_info->host_initiated)
++			return 1;
++		vcpu->arch.smi_count = data;
++		break;
++	case MSR_KVM_WALL_CLOCK_NEW:
++	case MSR_KVM_WALL_CLOCK:
++		vcpu->kvm->arch.wall_clock = data;
++		kvm_write_wall_clock(vcpu->kvm, data);
++		break;
++	case MSR_KVM_SYSTEM_TIME_NEW:
++	case MSR_KVM_SYSTEM_TIME: {
++		struct kvm_arch *ka = &vcpu->kvm->arch;
++
++		kvmclock_reset(vcpu);
++
++		if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
++			bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
++
++			if (ka->boot_vcpu_runs_old_kvmclock != tmp)
++				kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
++
++			ka->boot_vcpu_runs_old_kvmclock = tmp;
++		}
++
++		vcpu->arch.time = data;
++		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
++
++		/* we verify if the enable bit is set... */
++		if (!(data & 1))
++			break;
++
++		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
++		     &vcpu->arch.pv_time, data & ~1ULL,
++		     sizeof(struct pvclock_vcpu_time_info)))
++			vcpu->arch.pv_time_enabled = false;
++		else
++			vcpu->arch.pv_time_enabled = true;
++
++		break;
++	}
++	case MSR_KVM_ASYNC_PF_EN:
++		if (kvm_pv_enable_async_pf(vcpu, data))
++			return 1;
++		break;
++	case MSR_KVM_STEAL_TIME:
++
++		if (unlikely(!sched_info_on()))
++			return 1;
++
++		if (data & KVM_STEAL_RESERVED_MASK)
++			return 1;
++
++		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
++						data & KVM_STEAL_VALID_BITS,
++						sizeof(struct kvm_steal_time)))
++			return 1;
++
++		vcpu->arch.st.msr_val = data;
++
++		if (!(data & KVM_MSR_ENABLED))
++			break;
++
++		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
++
++		break;
++	case MSR_KVM_PV_EOI_EN:
++		if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
++			return 1;
++		break;
++
++	case MSR_IA32_MCG_CTL:
++	case MSR_IA32_MCG_STATUS:
++	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
++		return set_msr_mce(vcpu, msr_info);
++
++	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
++	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
++		pr = true; /* fall through */
++	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
++	case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
++		if (kvm_pmu_is_valid_msr(vcpu, msr))
++			return kvm_pmu_set_msr(vcpu, msr_info);
++
++		if (pr || data != 0)
++			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
++				    "0x%x data 0x%llx\n", msr, data);
++		break;
++	case MSR_K7_CLK_CTL:
++		/*
++		 * Ignore all writes to this no longer documented MSR.
++		 * Writes are only relevant for old K7 processors,
++		 * all pre-dating SVM, but a recommended workaround from
++		 * AMD for these chips. It is possible to specify the
++		 * affected processor models on the command line, hence
++		 * the need to ignore the workaround.
++		 */
++		break;
++	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
++	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
++	case HV_X64_MSR_CRASH_CTL:
++	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
++	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
++	case HV_X64_MSR_TSC_EMULATION_CONTROL:
++	case HV_X64_MSR_TSC_EMULATION_STATUS:
++		return kvm_hv_set_msr_common(vcpu, msr, data,
++					     msr_info->host_initiated);
++	case MSR_IA32_BBL_CR_CTL3:
++		/* Drop writes to this legacy MSR -- see rdmsr
++		 * counterpart for further detail.
++		 */
++		if (report_ignored_msrs)
++			vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
++				msr, data);
++		break;
++	case MSR_AMD64_OSVW_ID_LENGTH:
++		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
++			return 1;
++		vcpu->arch.osvw.length = data;
++		break;
++	case MSR_AMD64_OSVW_STATUS:
++		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
++			return 1;
++		vcpu->arch.osvw.status = data;
++		break;
++	case MSR_PLATFORM_INFO:
++		if (!msr_info->host_initiated ||
++		    (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
++		     cpuid_fault_enabled(vcpu)))
++			return 1;
++		vcpu->arch.msr_platform_info = data;
++		break;
++	case MSR_MISC_FEATURES_ENABLES:
++		if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
++		    (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
++		     !supports_cpuid_fault(vcpu)))
++			return 1;
++		vcpu->arch.msr_misc_features_enables = data;
++		break;
++	default:
++		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
++			return xen_hvm_config(vcpu, data);
++		if (kvm_pmu_is_valid_msr(vcpu, msr))
++			return kvm_pmu_set_msr(vcpu, msr_info);
++		if (!ignore_msrs) {
++			vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
++				    msr, data);
++			return 1;
++		} else {
++			if (report_ignored_msrs)
++				vcpu_unimpl(vcpu,
++					"ignored wrmsr: 0x%x data 0x%llx\n",
++					msr, data);
++			break;
++		}
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_set_msr_common);
++
++
++/*
++ * Reads an msr value (of 'msr_index') into 'pdata'.
++ * Returns 0 on success, non-0 otherwise.
++ * Assumes vcpu_load() was already called.
++ */
++int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
++{
++	return kvm_x86_ops->get_msr(vcpu, msr);
++}
++EXPORT_SYMBOL_GPL(kvm_get_msr);
++
++static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
++{
++	u64 data;
++	u64 mcg_cap = vcpu->arch.mcg_cap;
++	unsigned bank_num = mcg_cap & 0xff;
++
++	switch (msr) {
++	case MSR_IA32_P5_MC_ADDR:
++	case MSR_IA32_P5_MC_TYPE:
++		data = 0;
++		break;
++	case MSR_IA32_MCG_CAP:
++		data = vcpu->arch.mcg_cap;
++		break;
++	case MSR_IA32_MCG_CTL:
++		if (!(mcg_cap & MCG_CTL_P) && !host)
++			return 1;
++		data = vcpu->arch.mcg_ctl;
++		break;
++	case MSR_IA32_MCG_STATUS:
++		data = vcpu->arch.mcg_status;
++		break;
++	default:
++		if (msr >= MSR_IA32_MC0_CTL &&
++		    msr < MSR_IA32_MCx_CTL(bank_num)) {
++			u32 offset = msr - MSR_IA32_MC0_CTL;
++			data = vcpu->arch.mce_banks[offset];
++			break;
++		}
++		return 1;
++	}
++	*pdata = data;
++	return 0;
++}
++
++int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
++{
++	switch (msr_info->index) {
++	case MSR_IA32_PLATFORM_ID:
++	case MSR_IA32_EBL_CR_POWERON:
++	case MSR_IA32_DEBUGCTLMSR:
++	case MSR_IA32_LASTBRANCHFROMIP:
++	case MSR_IA32_LASTBRANCHTOIP:
++	case MSR_IA32_LASTINTFROMIP:
++	case MSR_IA32_LASTINTTOIP:
++	case MSR_K8_SYSCFG:
++	case MSR_K8_TSEG_ADDR:
++	case MSR_K8_TSEG_MASK:
++	case MSR_K7_HWCR:
++	case MSR_VM_HSAVE_PA:
++	case MSR_K8_INT_PENDING_MSG:
++	case MSR_AMD64_NB_CFG:
++	case MSR_FAM10H_MMIO_CONF_BASE:
++	case MSR_AMD64_BU_CFG2:
++	case MSR_IA32_PERF_CTL:
++	case MSR_AMD64_DC_CFG:
++	case MSR_F15H_EX_CFG:
++		msr_info->data = 0;
++		break;
++	case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
++	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
++	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
++	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
++	case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
++		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
++			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
++		msr_info->data = 0;
++		break;
++	case MSR_IA32_UCODE_REV:
++		msr_info->data = vcpu->arch.microcode_version;
++		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
++			return 1;
++		msr_info->data = vcpu->arch.arch_capabilities;
++		break;
++	case MSR_IA32_TSC:
++		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
++		break;
++	case MSR_MTRRcap:
++	case 0x200 ... 0x2ff:
++		return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
++	case 0xcd: /* fsb frequency */
++		msr_info->data = 3;
++		break;
++		/*
++		 * MSR_EBC_FREQUENCY_ID
++		 * Conservative value valid for even the basic CPU models.
++		 * Models 0,1: 000 in bits 23:21 indicating a bus speed of
++		 * 100MHz, model 2 000 in bits 18:16 indicating 100MHz,
++		 * and 266MHz for model 3, or 4. Set Core Clock
++		 * Frequency to System Bus Frequency Ratio to 1 (bits
++		 * 31:24) even though these are only valid for CPU
++		 * models > 2, however guests may end up dividing or
++		 * multiplying by zero otherwise.
++		 */
++	case MSR_EBC_FREQUENCY_ID:
++		msr_info->data = 1 << 24;
++		break;
++	case MSR_IA32_APICBASE:
++		msr_info->data = kvm_get_apic_base(vcpu);
++		break;
++	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
++		return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
++		break;
++	case MSR_IA32_TSCDEADLINE:
++		msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
++		break;
++	case MSR_IA32_TSC_ADJUST:
++		msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
++		break;
++	case MSR_IA32_MISC_ENABLE:
++		msr_info->data = vcpu->arch.ia32_misc_enable_msr;
++		break;
++	case MSR_IA32_SMBASE:
++		if (!msr_info->host_initiated)
++			return 1;
++		msr_info->data = vcpu->arch.smbase;
++		break;
++	case MSR_SMI_COUNT:
++		msr_info->data = vcpu->arch.smi_count;
++		break;
++	case MSR_IA32_PERF_STATUS:
++		/* TSC increment by tick */
++		msr_info->data = 1000ULL;
++		/* CPU multiplier */
++		msr_info->data |= (((uint64_t)4ULL) << 40);
++		break;
++	case MSR_EFER:
++		msr_info->data = vcpu->arch.efer;
++		break;
++	case MSR_KVM_WALL_CLOCK:
++	case MSR_KVM_WALL_CLOCK_NEW:
++		msr_info->data = vcpu->kvm->arch.wall_clock;
++		break;
++	case MSR_KVM_SYSTEM_TIME:
++	case MSR_KVM_SYSTEM_TIME_NEW:
++		msr_info->data = vcpu->arch.time;
++		break;
++	case MSR_KVM_ASYNC_PF_EN:
++		msr_info->data = vcpu->arch.apf.msr_val;
++		break;
++	case MSR_KVM_STEAL_TIME:
++		msr_info->data = vcpu->arch.st.msr_val;
++		break;
++	case MSR_KVM_PV_EOI_EN:
++		msr_info->data = vcpu->arch.pv_eoi.msr_val;
++		break;
++	case MSR_IA32_P5_MC_ADDR:
++	case MSR_IA32_P5_MC_TYPE:
++	case MSR_IA32_MCG_CAP:
++	case MSR_IA32_MCG_CTL:
++	case MSR_IA32_MCG_STATUS:
++	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
++		return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
++				   msr_info->host_initiated);
++	case MSR_K7_CLK_CTL:
++		/*
++		 * Provide expected ramp-up count for K7. All other
++		 * are set to zero, indicating minimum divisors for
++		 * every field.
++		 *
++		 * This prevents guest kernels on AMD host with CPU
++		 * type 6, model 8 and higher from exploding due to
++		 * the rdmsr failing.
++		 */
++		msr_info->data = 0x20000000;
++		break;
++	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
++	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
++	case HV_X64_MSR_CRASH_CTL:
++	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
++	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
++	case HV_X64_MSR_TSC_EMULATION_CONTROL:
++	case HV_X64_MSR_TSC_EMULATION_STATUS:
++		return kvm_hv_get_msr_common(vcpu,
++					     msr_info->index, &msr_info->data,
++					     msr_info->host_initiated);
++		break;
++	case MSR_IA32_BBL_CR_CTL3:
++		/* This legacy MSR exists but isn't fully documented in current
++		 * silicon.  It is however accessed by winxp in very narrow
++		 * scenarios where it sets bit #19, itself documented as
++		 * a "reserved" bit.  Best effort attempt to source coherent
++		 * read data here should the balance of the register be
++		 * interpreted by the guest:
++		 *
++		 * L2 cache control register 3: 64GB range, 256KB size,
++		 * enabled, latency 0x1, configured
++		 */
++		msr_info->data = 0xbe702111;
++		break;
++	case MSR_AMD64_OSVW_ID_LENGTH:
++		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
++			return 1;
++		msr_info->data = vcpu->arch.osvw.length;
++		break;
++	case MSR_AMD64_OSVW_STATUS:
++		if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
++			return 1;
++		msr_info->data = vcpu->arch.osvw.status;
++		break;
++	case MSR_PLATFORM_INFO:
++		if (!msr_info->host_initiated &&
++		    !vcpu->kvm->arch.guest_can_read_msr_platform_info)
++			return 1;
++		msr_info->data = vcpu->arch.msr_platform_info;
++		break;
++	case MSR_MISC_FEATURES_ENABLES:
++		msr_info->data = vcpu->arch.msr_misc_features_enables;
++		break;
++	default:
++		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
++			return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
++		if (!ignore_msrs) {
++			vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
++					       msr_info->index);
++			return 1;
++		} else {
++			if (report_ignored_msrs)
++				vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
++					msr_info->index);
++			msr_info->data = 0;
++		}
++		break;
++	}
++	return 0;
++}
++EXPORT_SYMBOL_GPL(kvm_get_msr_common);
++
++/*
++ * Read or write a bunch of msrs. All parameters are kernel addresses.
++ *
++ * @return number of msrs set successfully.
++ */
++static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
++		    struct kvm_msr_entry *entries,
++		    int (*do_msr)(struct kvm_vcpu *vcpu,
++				  unsigned index, u64 *data))
++{
++	int i;
++
++	for (i = 0; i < msrs->nmsrs; ++i)
++		if (do_msr(vcpu, entries[i].index, &entries[i].data))
++			break;
++
++	return i;
++}
++
++/*
++ * Read or write a bunch of msrs. Parameters are user addresses.
++ *
++ * @return number of msrs set successfully.
++ */
++static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
++		  int (*do_msr)(struct kvm_vcpu *vcpu,
++				unsigned index, u64 *data),
++		  int writeback)
++{
++	struct kvm_msrs msrs;
++	struct kvm_msr_entry *entries;
++	int r, n;
++	unsigned size;
++
++	r = -EFAULT;
++	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
++		goto out;
++
++	r = -E2BIG;
++	if (msrs.nmsrs >= MAX_IO_MSRS)
++		goto out;
++
++	size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
++	entries = memdup_user(user_msrs->entries, size);
++	if (IS_ERR(entries)) {
++		r = PTR_ERR(entries);
++		goto out;
++	}
++
++	r = n = __msr_io(vcpu, &msrs, entries, do_msr);
++	if (r < 0)
++		goto out_free;
++
++	r = -EFAULT;
++	if (writeback && copy_to_user(user_msrs->entries, entries, size))
++		goto out_free;
++
++	r = n;
++
++out_free:
++	kfree(entries);
++out:
++	return r;
++}
++
++static inline bool kvm_can_mwait_in_guest(void)
++{
++	return boot_cpu_has(X86_FEATURE_MWAIT) &&
++		!boot_cpu_has_bug(X86_BUG_MONITOR) &&
++		boot_cpu_has(X86_FEATURE_ARAT);
++}
++
++int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
++{
++	int r = 0;
++
++	switch (ext) {
++	case KVM_CAP_IRQCHIP:
++	case KVM_CAP_HLT:
++	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
++	case KVM_CAP_SET_TSS_ADDR:
++	case KVM_CAP_EXT_CPUID:
++	case KVM_CAP_EXT_EMUL_CPUID:
++	case KVM_CAP_CLOCKSOURCE:
++	case KVM_CAP_PIT:
++	case KVM_CAP_NOP_IO_DELAY:
++	case KVM_CAP_MP_STATE:
++	case KVM_CAP_SYNC_MMU:
++	case KVM_CAP_USER_NMI:
++	case KVM_CAP_REINJECT_CONTROL:
++	case KVM_CAP_IRQ_INJECT_STATUS:
++	case KVM_CAP_IOEVENTFD:
++	case KVM_CAP_IOEVENTFD_NO_LENGTH:
++	case KVM_CAP_PIT2:
++	case KVM_CAP_PIT_STATE2:
++	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
++	case KVM_CAP_XEN_HVM:
++	case KVM_CAP_VCPU_EVENTS:
++	case KVM_CAP_HYPERV:
++	case KVM_CAP_HYPERV_VAPIC:
++	case KVM_CAP_HYPERV_SPIN:
++	case KVM_CAP_HYPERV_SYNIC:
++	case KVM_CAP_HYPERV_SYNIC2:
++	case KVM_CAP_HYPERV_VP_INDEX:
++	case KVM_CAP_HYPERV_EVENTFD:
++	case KVM_CAP_HYPERV_TLBFLUSH:
++	case KVM_CAP_PCI_SEGMENT:
++	case KVM_CAP_DEBUGREGS:
++	case KVM_CAP_X86_ROBUST_SINGLESTEP:
++	case KVM_CAP_XSAVE:
++	case KVM_CAP_ASYNC_PF:
++	case KVM_CAP_GET_TSC_KHZ:
++	case KVM_CAP_KVMCLOCK_CTRL:
++	case KVM_CAP_READONLY_MEM:
++	case KVM_CAP_HYPERV_TIME:
++	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
++	case KVM_CAP_TSC_DEADLINE_TIMER:
++	case KVM_CAP_ENABLE_CAP_VM:
++	case KVM_CAP_DISABLE_QUIRKS:
++	case KVM_CAP_SET_BOOT_CPU_ID:
++ 	case KVM_CAP_SPLIT_IRQCHIP:
++	case KVM_CAP_IMMEDIATE_EXIT:
++	case KVM_CAP_GET_MSR_FEATURES:
++	case KVM_CAP_MSR_PLATFORM_INFO:
++		r = 1;
++		break;
++	case KVM_CAP_SYNC_REGS:
++		r = KVM_SYNC_X86_VALID_FIELDS;
++		break;
++	case KVM_CAP_ADJUST_CLOCK:
++		r = KVM_CLOCK_TSC_STABLE;
++		break;
++	case KVM_CAP_X86_DISABLE_EXITS:
++		r |=  KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
++		if(kvm_can_mwait_in_guest())
++			r |= KVM_X86_DISABLE_EXITS_MWAIT;
++		break;
++	case KVM_CAP_X86_SMM:
++		/* SMBASE is usually relocated above 1M on modern chipsets,
++		 * and SMM handlers might indeed rely on 4G segment limits,
++		 * so do not report SMM to be available if real mode is
++		 * emulated via vm86 mode.  Still, do not go to great lengths
++		 * to avoid userspace's usage of the feature, because it is a
++		 * fringe case that is not enabled except via specific settings
++		 * of the module parameters.
++		 */
++		r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
++		break;
++	case KVM_CAP_VAPIC:
++		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
++		break;
++	case KVM_CAP_NR_VCPUS:
++		r = KVM_SOFT_MAX_VCPUS;
++		break;
++	case KVM_CAP_MAX_VCPUS:
++		r = KVM_MAX_VCPUS;
++		break;
++	case KVM_CAP_MAX_VCPU_ID:
++		r = KVM_MAX_VCPU_ID;
++		break;
++	case KVM_CAP_NR_MEMSLOTS:
++		r = KVM_USER_MEM_SLOTS;
++		break;
++	case KVM_CAP_PV_MMU:	/* obsolete */
++		r = 0;
++		break;
++	case KVM_CAP_MCE:
++		r = KVM_MAX_MCE_BANKS;
++		break;
++	case KVM_CAP_XCRS:
++		r = boot_cpu_has(X86_FEATURE_XSAVE);
++		break;
++	case KVM_CAP_TSC_CONTROL:
++		r = kvm_has_tsc_control;
++		break;
++	case KVM_CAP_X2APIC_API:
++		r = KVM_X2APIC_API_VALID_FLAGS;
++		break;
++	case KVM_CAP_NESTED_STATE:
++		r = kvm_x86_ops->get_nested_state ?
++			kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
++		break;
++	default:
++		break;
++	}
++	return r;
++
++}
++
++long kvm_arch_dev_ioctl(struct file *filp,
++			unsigned int ioctl, unsigned long arg)
++{
++	void __user *argp = (void __user *)arg;
++	long r;
++
++	switch (ioctl) {
++	case KVM_GET_MSR_INDEX_LIST: {
++		struct kvm_msr_list __user *user_msr_list = argp;
++		struct kvm_msr_list msr_list;
++		unsigned n;
++
++		r = -EFAULT;
++		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
++			goto out;
++		n = msr_list.nmsrs;
++		msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
++		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
++			goto out;
++		r = -E2BIG;
++		if (n < msr_list.nmsrs)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
++				 num_msrs_to_save * sizeof(u32)))
++			goto out;
++		if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
++				 &emulated_msrs,
++				 num_emulated_msrs * sizeof(u32)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_GET_SUPPORTED_CPUID:
++	case KVM_GET_EMULATED_CPUID: {
++		struct kvm_cpuid2 __user *cpuid_arg = argp;
++		struct kvm_cpuid2 cpuid;
++
++		r = -EFAULT;
++		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
++			goto out;
++
++		r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
++					    ioctl);
++		if (r)
++			goto out;
++
++		r = -EFAULT;
++		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
++		r = -EFAULT;
++		if (copy_to_user(argp, &kvm_mce_cap_supported,
++				 sizeof(kvm_mce_cap_supported)))
++			goto out;
++		r = 0;
++		break;
++	case KVM_GET_MSR_FEATURE_INDEX_LIST: {
++		struct kvm_msr_list __user *user_msr_list = argp;
++		struct kvm_msr_list msr_list;
++		unsigned int n;
++
++		r = -EFAULT;
++		if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
++			goto out;
++		n = msr_list.nmsrs;
++		msr_list.nmsrs = num_msr_based_features;
++		if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
++			goto out;
++		r = -E2BIG;
++		if (n < msr_list.nmsrs)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(user_msr_list->indices, &msr_based_features,
++				 num_msr_based_features * sizeof(u32)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_GET_MSRS:
++		r = msr_io(NULL, argp, do_get_msr_feature, 1);
++		break;
++	}
++	default:
++		r = -EINVAL;
++	}
++out:
++	return r;
++}
++
++static void wbinvd_ipi(void *garbage)
++{
++	wbinvd();
++}
++
++static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
++{
++	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
++}
++
++void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
++{
++	/* Address WBINVD may be executed by guest */
++	if (need_emulate_wbinvd(vcpu)) {
++		if (kvm_x86_ops->has_wbinvd_exit())
++			cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
++		else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
++			smp_call_function_single(vcpu->cpu,
++					wbinvd_ipi, NULL, 1);
++	}
++
++	kvm_x86_ops->vcpu_load(vcpu, cpu);
++
++	/* Apply any externally detected TSC adjustments (due to suspend) */
++	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
++		adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
++		vcpu->arch.tsc_offset_adjustment = 0;
++		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++	}
++
++	if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
++		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
++				rdtsc() - vcpu->arch.last_host_tsc;
++		if (tsc_delta < 0)
++			mark_tsc_unstable("KVM discovered backwards TSC");
++
++		if (kvm_check_tsc_unstable()) {
++			u64 offset = kvm_compute_tsc_offset(vcpu,
++						vcpu->arch.last_guest_tsc);
++			kvm_vcpu_write_tsc_offset(vcpu, offset);
++			vcpu->arch.tsc_catchup = 1;
++		}
++
++		if (kvm_lapic_hv_timer_in_use(vcpu))
++			kvm_lapic_restart_hv_timer(vcpu);
++
++		/*
++		 * On a host with synchronized TSC, there is no need to update
++		 * kvmclock on vcpu->cpu migration
++		 */
++		if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
++			kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
++		if (vcpu->cpu != cpu)
++			kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
++		vcpu->cpu = cpu;
++	}
++
++	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
++}
++
++static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
++{
++	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
++		return;
++
++	if (vcpu->arch.st.steal.preempted)
++		return;
++
++	vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
++
++	kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
++			&vcpu->arch.st.steal.preempted,
++			offsetof(struct kvm_steal_time, preempted),
++			sizeof(vcpu->arch.st.steal.preempted));
++}
++
++void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
++{
++	int idx;
++
++	if (vcpu->preempted)
++		vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
++
++	/*
++	 * Disable page faults because we're in atomic context here.
++	 * kvm_write_guest_offset_cached() would call might_fault()
++	 * that relies on pagefault_disable() to tell if there's a
++	 * bug. NOTE: the write to guest memory may not go through if
++	 * during postcopy live migration or if there's heavy guest
++	 * paging.
++	 */
++	pagefault_disable();
++	/*
++	 * kvm_memslots() will be called by
++	 * kvm_write_guest_offset_cached() so take the srcu lock.
++	 */
++	idx = srcu_read_lock(&vcpu->kvm->srcu);
++	kvm_steal_time_set_preempted(vcpu);
++	srcu_read_unlock(&vcpu->kvm->srcu, idx);
++	pagefault_enable();
++	kvm_x86_ops->vcpu_put(vcpu);
++	vcpu->arch.last_host_tsc = rdtsc();
++	/*
++	 * If userspace has set any breakpoints or watchpoints, dr6 is restored
++	 * on every vmexit, but if not, we might have a stale dr6 from the
++	 * guest. do_debug expects dr6 to be cleared after it runs, do the same.
++	 */
++	set_debugreg(0, 6);
++}
++
++static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
++				    struct kvm_lapic_state *s)
++{
++	if (vcpu->arch.apicv_active)
++		kvm_x86_ops->sync_pir_to_irr(vcpu);
++
++	return kvm_apic_get_state(vcpu, s);
++}
++
++static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
++				    struct kvm_lapic_state *s)
++{
++	int r;
++
++	r = kvm_apic_set_state(vcpu, s);
++	if (r)
++		return r;
++	update_cr8_intercept(vcpu);
++
++	return 0;
++}
++
++static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
++{
++	return (!lapic_in_kernel(vcpu) ||
++		kvm_apic_accept_pic_intr(vcpu));
++}
++
++/*
++ * if userspace requested an interrupt window, check that the
++ * interrupt window is open.
++ *
++ * No need to exit to userspace if we already have an interrupt queued.
++ */
++static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
++{
++	return kvm_arch_interrupt_allowed(vcpu) &&
++		!kvm_cpu_has_interrupt(vcpu) &&
++		!kvm_event_needs_reinjection(vcpu) &&
++		kvm_cpu_accept_dm_intr(vcpu);
++}
++
++static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
++				    struct kvm_interrupt *irq)
++{
++	if (irq->irq >= KVM_NR_INTERRUPTS)
++		return -EINVAL;
++
++	if (!irqchip_in_kernel(vcpu->kvm)) {
++		kvm_queue_interrupt(vcpu, irq->irq, false);
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++		return 0;
++	}
++
++	/*
++	 * With in-kernel LAPIC, we only use this to inject EXTINT, so
++	 * fail for in-kernel 8259.
++	 */
++	if (pic_in_kernel(vcpu->kvm))
++		return -ENXIO;
++
++	if (vcpu->arch.pending_external_vector != -1)
++		return -EEXIST;
++
++	vcpu->arch.pending_external_vector = irq->irq;
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++	return 0;
++}
++
++static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
++{
++	kvm_inject_nmi(vcpu);
++
++	return 0;
++}
++
++static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
++{
++	kvm_make_request(KVM_REQ_SMI, vcpu);
++
++	return 0;
++}
++
++static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
++					   struct kvm_tpr_access_ctl *tac)
++{
++	if (tac->flags)
++		return -EINVAL;
++	vcpu->arch.tpr_access_reporting = !!tac->enabled;
++	return 0;
++}
++
++static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
++					u64 mcg_cap)
++{
++	int r;
++	unsigned bank_num = mcg_cap & 0xff, bank;
++
++	r = -EINVAL;
++	if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
++		goto out;
++	if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
++		goto out;
++	r = 0;
++	vcpu->arch.mcg_cap = mcg_cap;
++	/* Init IA32_MCG_CTL to all 1s */
++	if (mcg_cap & MCG_CTL_P)
++		vcpu->arch.mcg_ctl = ~(u64)0;
++	/* Init IA32_MCi_CTL to all 1s */
++	for (bank = 0; bank < bank_num; bank++)
++		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
++
++	if (kvm_x86_ops->setup_mce)
++		kvm_x86_ops->setup_mce(vcpu);
++out:
++	return r;
++}
++
++static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
++				      struct kvm_x86_mce *mce)
++{
++	u64 mcg_cap = vcpu->arch.mcg_cap;
++	unsigned bank_num = mcg_cap & 0xff;
++	u64 *banks = vcpu->arch.mce_banks;
++
++	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
++		return -EINVAL;
++	/*
++	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
++	 * reporting is disabled
++	 */
++	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
++	    vcpu->arch.mcg_ctl != ~(u64)0)
++		return 0;
++	banks += 4 * mce->bank;
++	/*
++	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
++	 * reporting is disabled for the bank
++	 */
++	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
++		return 0;
++	if (mce->status & MCI_STATUS_UC) {
++		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
++		    !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
++			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
++			return 0;
++		}
++		if (banks[1] & MCI_STATUS_VAL)
++			mce->status |= MCI_STATUS_OVER;
++		banks[2] = mce->addr;
++		banks[3] = mce->misc;
++		vcpu->arch.mcg_status = mce->mcg_status;
++		banks[1] = mce->status;
++		kvm_queue_exception(vcpu, MC_VECTOR);
++	} else if (!(banks[1] & MCI_STATUS_VAL)
++		   || !(banks[1] & MCI_STATUS_UC)) {
++		if (banks[1] & MCI_STATUS_VAL)
++			mce->status |= MCI_STATUS_OVER;
++		banks[2] = mce->addr;
++		banks[3] = mce->misc;
++		banks[1] = mce->status;
++	} else
++		banks[1] |= MCI_STATUS_OVER;
++	return 0;
++}
++
++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
++					       struct kvm_vcpu_events *events)
++{
++	process_nmi(vcpu);
++	/*
++	 * FIXME: pass injected and pending separately.  This is only
++	 * needed for nested virtualization, whose state cannot be
++	 * migrated yet.  For now we can combine them.
++	 */
++	events->exception.injected =
++		(vcpu->arch.exception.pending ||
++		 vcpu->arch.exception.injected) &&
++		!kvm_exception_is_soft(vcpu->arch.exception.nr);
++	events->exception.nr = vcpu->arch.exception.nr;
++	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
++	events->exception.pad = 0;
++	events->exception.error_code = vcpu->arch.exception.error_code;
++
++	events->interrupt.injected =
++		vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
++	events->interrupt.nr = vcpu->arch.interrupt.nr;
++	events->interrupt.soft = 0;
++	events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
++
++	events->nmi.injected = vcpu->arch.nmi_injected;
++	events->nmi.pending = vcpu->arch.nmi_pending != 0;
++	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
++	events->nmi.pad = 0;
++
++	events->sipi_vector = 0; /* never valid when reporting to user space */
++
++	events->smi.smm = is_smm(vcpu);
++	events->smi.pending = vcpu->arch.smi_pending;
++	events->smi.smm_inside_nmi =
++		!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
++	events->smi.latched_init = kvm_lapic_latched_init(vcpu);
++
++	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
++			 | KVM_VCPUEVENT_VALID_SHADOW
++			 | KVM_VCPUEVENT_VALID_SMM);
++	memset(&events->reserved, 0, sizeof(events->reserved));
++}
++
++static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
++
++static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
++					      struct kvm_vcpu_events *events)
++{
++	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
++			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
++			      | KVM_VCPUEVENT_VALID_SHADOW
++			      | KVM_VCPUEVENT_VALID_SMM))
++		return -EINVAL;
++
++	if (events->exception.injected &&
++	    (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
++	     is_guest_mode(vcpu)))
++		return -EINVAL;
++
++	/* INITs are latched while in SMM */
++	if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
++	    (events->smi.smm || events->smi.pending) &&
++	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
++		return -EINVAL;
++
++	process_nmi(vcpu);
++	vcpu->arch.exception.injected = false;
++	vcpu->arch.exception.pending = events->exception.injected;
++	vcpu->arch.exception.nr = events->exception.nr;
++	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
++	vcpu->arch.exception.error_code = events->exception.error_code;
++
++	vcpu->arch.interrupt.injected = events->interrupt.injected;
++	vcpu->arch.interrupt.nr = events->interrupt.nr;
++	vcpu->arch.interrupt.soft = events->interrupt.soft;
++	if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
++		kvm_x86_ops->set_interrupt_shadow(vcpu,
++						  events->interrupt.shadow);
++
++	vcpu->arch.nmi_injected = events->nmi.injected;
++	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
++		vcpu->arch.nmi_pending = events->nmi.pending;
++	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
++
++	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
++	    lapic_in_kernel(vcpu))
++		vcpu->arch.apic->sipi_vector = events->sipi_vector;
++
++	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
++		u32 hflags = vcpu->arch.hflags;
++		if (events->smi.smm)
++			hflags |= HF_SMM_MASK;
++		else
++			hflags &= ~HF_SMM_MASK;
++		kvm_set_hflags(vcpu, hflags);
++
++		vcpu->arch.smi_pending = events->smi.pending;
++
++		if (events->smi.smm) {
++			if (events->smi.smm_inside_nmi)
++				vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
++			else
++				vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
++			if (lapic_in_kernel(vcpu)) {
++				if (events->smi.latched_init)
++					set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
++				else
++					clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
++			}
++		}
++	}
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	return 0;
++}
++
++static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
++					     struct kvm_debugregs *dbgregs)
++{
++	unsigned long val;
++
++	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
++	kvm_get_dr(vcpu, 6, &val);
++	dbgregs->dr6 = val;
++	dbgregs->dr7 = vcpu->arch.dr7;
++	dbgregs->flags = 0;
++	memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
++}
++
++static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
++					    struct kvm_debugregs *dbgregs)
++{
++	if (dbgregs->flags)
++		return -EINVAL;
++
++	if (dbgregs->dr6 & ~0xffffffffull)
++		return -EINVAL;
++	if (dbgregs->dr7 & ~0xffffffffull)
++		return -EINVAL;
++
++	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
++	kvm_update_dr0123(vcpu);
++	vcpu->arch.dr6 = dbgregs->dr6;
++	kvm_update_dr6(vcpu);
++	vcpu->arch.dr7 = dbgregs->dr7;
++	kvm_update_dr7(vcpu);
++
++	return 0;
++}
++
++#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
++
++static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
++{
++	struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
++	u64 xstate_bv = xsave->header.xfeatures;
++	u64 valid;
++
++	/*
++	 * Copy legacy XSAVE area, to avoid complications with CPUID
++	 * leaves 0 and 1 in the loop below.
++	 */
++	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
++
++	/* Set XSTATE_BV */
++	xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
++	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
++
++	/*
++	 * Copy each region from the possibly compacted offset to the
++	 * non-compacted offset.
++	 */
++	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
++	while (valid) {
++		u64 feature = valid & -valid;
++		int index = fls64(feature) - 1;
++		void *src = get_xsave_addr(xsave, feature);
++
++		if (src) {
++			u32 size, offset, ecx, edx;
++			cpuid_count(XSTATE_CPUID, index,
++				    &size, &offset, &ecx, &edx);
++			if (feature == XFEATURE_MASK_PKRU)
++				memcpy(dest + offset, &vcpu->arch.pkru,
++				       sizeof(vcpu->arch.pkru));
++			else
++				memcpy(dest + offset, src, size);
++
++		}
++
++		valid -= feature;
++	}
++}
++
++static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
++{
++	struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
++	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
++	u64 valid;
++
++	/*
++	 * Copy legacy XSAVE area, to avoid complications with CPUID
++	 * leaves 0 and 1 in the loop below.
++	 */
++	memcpy(xsave, src, XSAVE_HDR_OFFSET);
++
++	/* Set XSTATE_BV and possibly XCOMP_BV.  */
++	xsave->header.xfeatures = xstate_bv;
++	if (boot_cpu_has(X86_FEATURE_XSAVES))
++		xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
++
++	/*
++	 * Copy each region from the non-compacted offset to the
++	 * possibly compacted offset.
++	 */
++	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
++	while (valid) {
++		u64 feature = valid & -valid;
++		int index = fls64(feature) - 1;
++		void *dest = get_xsave_addr(xsave, feature);
++
++		if (dest) {
++			u32 size, offset, ecx, edx;
++			cpuid_count(XSTATE_CPUID, index,
++				    &size, &offset, &ecx, &edx);
++			if (feature == XFEATURE_MASK_PKRU)
++				memcpy(&vcpu->arch.pkru, src + offset,
++				       sizeof(vcpu->arch.pkru));
++			else
++				memcpy(dest, src + offset, size);
++		}
++
++		valid -= feature;
++	}
++}
++
++static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
++					 struct kvm_xsave *guest_xsave)
++{
++	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
++		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
++		fill_xsave((u8 *) guest_xsave->region, vcpu);
++	} else {
++		memcpy(guest_xsave->region,
++			&vcpu->arch.guest_fpu.state.fxsave,
++			sizeof(struct fxregs_state));
++		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
++			XFEATURE_MASK_FPSSE;
++	}
++}
++
++#define XSAVE_MXCSR_OFFSET 24
++
++static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
++					struct kvm_xsave *guest_xsave)
++{
++	u64 xstate_bv =
++		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
++	u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
++
++	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
++		/*
++		 * Here we allow setting states that are not present in
++		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
++		 * with old userspace.
++		 */
++		if (xstate_bv & ~kvm_supported_xcr0() ||
++			mxcsr & ~mxcsr_feature_mask)
++			return -EINVAL;
++		load_xsave(vcpu, (u8 *)guest_xsave->region);
++	} else {
++		if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
++			mxcsr & ~mxcsr_feature_mask)
++			return -EINVAL;
++		memcpy(&vcpu->arch.guest_fpu.state.fxsave,
++			guest_xsave->region, sizeof(struct fxregs_state));
++	}
++	return 0;
++}
++
++static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
++					struct kvm_xcrs *guest_xcrs)
++{
++	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
++		guest_xcrs->nr_xcrs = 0;
++		return;
++	}
++
++	guest_xcrs->nr_xcrs = 1;
++	guest_xcrs->flags = 0;
++	guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
++	guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
++}
++
++static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
++				       struct kvm_xcrs *guest_xcrs)
++{
++	int i, r = 0;
++
++	if (!boot_cpu_has(X86_FEATURE_XSAVE))
++		return -EINVAL;
++
++	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
++		return -EINVAL;
++
++	for (i = 0; i < guest_xcrs->nr_xcrs; i++)
++		/* Only support XCR0 currently */
++		if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
++			r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
++				guest_xcrs->xcrs[i].value);
++			break;
++		}
++	if (r)
++		r = -EINVAL;
++	return r;
++}
++
++/*
++ * kvm_set_guest_paused() indicates to the guest kernel that it has been
++ * stopped by the hypervisor.  This function will be called from the host only.
++ * EINVAL is returned when the host attempts to set the flag for a guest that
++ * does not support pv clocks.
++ */
++static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
++{
++	if (!vcpu->arch.pv_time_enabled)
++		return -EINVAL;
++	vcpu->arch.pvclock_set_guest_stopped_request = true;
++	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++	return 0;
++}
++
++static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
++				     struct kvm_enable_cap *cap)
++{
++	if (cap->flags)
++		return -EINVAL;
++
++	switch (cap->cap) {
++	case KVM_CAP_HYPERV_SYNIC2:
++		if (cap->args[0])
++			return -EINVAL;
++	case KVM_CAP_HYPERV_SYNIC:
++		if (!irqchip_in_kernel(vcpu->kvm))
++			return -EINVAL;
++		return kvm_hv_activate_synic(vcpu, cap->cap ==
++					     KVM_CAP_HYPERV_SYNIC2);
++	default:
++		return -EINVAL;
++	}
++}
++
++long kvm_arch_vcpu_ioctl(struct file *filp,
++			 unsigned int ioctl, unsigned long arg)
++{
++	struct kvm_vcpu *vcpu = filp->private_data;
++	void __user *argp = (void __user *)arg;
++	int r;
++	union {
++		struct kvm_lapic_state *lapic;
++		struct kvm_xsave *xsave;
++		struct kvm_xcrs *xcrs;
++		void *buffer;
++	} u;
++
++	vcpu_load(vcpu);
++
++	u.buffer = NULL;
++	switch (ioctl) {
++	case KVM_GET_LAPIC: {
++		r = -EINVAL;
++		if (!lapic_in_kernel(vcpu))
++			goto out;
++		u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
++
++		r = -ENOMEM;
++		if (!u.lapic)
++			goto out;
++		r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
++		if (r)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_SET_LAPIC: {
++		r = -EINVAL;
++		if (!lapic_in_kernel(vcpu))
++			goto out;
++		u.lapic = memdup_user(argp, sizeof(*u.lapic));
++		if (IS_ERR(u.lapic)) {
++			r = PTR_ERR(u.lapic);
++			goto out_nofree;
++		}
++
++		r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
++		break;
++	}
++	case KVM_INTERRUPT: {
++		struct kvm_interrupt irq;
++
++		r = -EFAULT;
++		if (copy_from_user(&irq, argp, sizeof irq))
++			goto out;
++		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
++		break;
++	}
++	case KVM_NMI: {
++		r = kvm_vcpu_ioctl_nmi(vcpu);
++		break;
++	}
++	case KVM_SMI: {
++		r = kvm_vcpu_ioctl_smi(vcpu);
++		break;
++	}
++	case KVM_SET_CPUID: {
++		struct kvm_cpuid __user *cpuid_arg = argp;
++		struct kvm_cpuid cpuid;
++
++		r = -EFAULT;
++		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
++			goto out;
++		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
++		break;
++	}
++	case KVM_SET_CPUID2: {
++		struct kvm_cpuid2 __user *cpuid_arg = argp;
++		struct kvm_cpuid2 cpuid;
++
++		r = -EFAULT;
++		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
++			goto out;
++		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
++					      cpuid_arg->entries);
++		break;
++	}
++	case KVM_GET_CPUID2: {
++		struct kvm_cpuid2 __user *cpuid_arg = argp;
++		struct kvm_cpuid2 cpuid;
++
++		r = -EFAULT;
++		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
++			goto out;
++		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
++					      cpuid_arg->entries);
++		if (r)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_GET_MSRS: {
++		int idx = srcu_read_lock(&vcpu->kvm->srcu);
++		r = msr_io(vcpu, argp, do_get_msr, 1);
++		srcu_read_unlock(&vcpu->kvm->srcu, idx);
++		break;
++	}
++	case KVM_SET_MSRS: {
++		int idx = srcu_read_lock(&vcpu->kvm->srcu);
++		r = msr_io(vcpu, argp, do_set_msr, 0);
++		srcu_read_unlock(&vcpu->kvm->srcu, idx);
++		break;
++	}
++	case KVM_TPR_ACCESS_REPORTING: {
++		struct kvm_tpr_access_ctl tac;
++
++		r = -EFAULT;
++		if (copy_from_user(&tac, argp, sizeof tac))
++			goto out;
++		r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
++		if (r)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(argp, &tac, sizeof tac))
++			goto out;
++		r = 0;
++		break;
++	};
++	case KVM_SET_VAPIC_ADDR: {
++		struct kvm_vapic_addr va;
++		int idx;
++
++		r = -EINVAL;
++		if (!lapic_in_kernel(vcpu))
++			goto out;
++		r = -EFAULT;
++		if (copy_from_user(&va, argp, sizeof va))
++			goto out;
++		idx = srcu_read_lock(&vcpu->kvm->srcu);
++		r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
++		srcu_read_unlock(&vcpu->kvm->srcu, idx);
++		break;
++	}
++	case KVM_X86_SETUP_MCE: {
++		u64 mcg_cap;
++
++		r = -EFAULT;
++		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
++			goto out;
++		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
++		break;
++	}
++	case KVM_X86_SET_MCE: {
++		struct kvm_x86_mce mce;
++
++		r = -EFAULT;
++		if (copy_from_user(&mce, argp, sizeof mce))
++			goto out;
++		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
++		break;
++	}
++	case KVM_GET_VCPU_EVENTS: {
++		struct kvm_vcpu_events events;
++
++		kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
++
++		r = -EFAULT;
++		if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
++			break;
++		r = 0;
++		break;
++	}
++	case KVM_SET_VCPU_EVENTS: {
++		struct kvm_vcpu_events events;
++
++		r = -EFAULT;
++		if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
++			break;
++
++		r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
++		break;
++	}
++	case KVM_GET_DEBUGREGS: {
++		struct kvm_debugregs dbgregs;
++
++		kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
++
++		r = -EFAULT;
++		if (copy_to_user(argp, &dbgregs,
++				 sizeof(struct kvm_debugregs)))
++			break;
++		r = 0;
++		break;
++	}
++	case KVM_SET_DEBUGREGS: {
++		struct kvm_debugregs dbgregs;
++
++		r = -EFAULT;
++		if (copy_from_user(&dbgregs, argp,
++				   sizeof(struct kvm_debugregs)))
++			break;
++
++		r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
++		break;
++	}
++	case KVM_GET_XSAVE: {
++		u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
++		r = -ENOMEM;
++		if (!u.xsave)
++			break;
++
++		kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
++
++		r = -EFAULT;
++		if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
++			break;
++		r = 0;
++		break;
++	}
++	case KVM_SET_XSAVE: {
++		u.xsave = memdup_user(argp, sizeof(*u.xsave));
++		if (IS_ERR(u.xsave)) {
++			r = PTR_ERR(u.xsave);
++			goto out_nofree;
++		}
++
++		r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
++		break;
++	}
++	case KVM_GET_XCRS: {
++		u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
++		r = -ENOMEM;
++		if (!u.xcrs)
++			break;
++
++		kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
++
++		r = -EFAULT;
++		if (copy_to_user(argp, u.xcrs,
++				 sizeof(struct kvm_xcrs)))
++			break;
++		r = 0;
++		break;
++	}
++	case KVM_SET_XCRS: {
++		u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
++		if (IS_ERR(u.xcrs)) {
++			r = PTR_ERR(u.xcrs);
++			goto out_nofree;
++		}
++
++		r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
++		break;
++	}
++	case KVM_SET_TSC_KHZ: {
++		u32 user_tsc_khz;
++
++		r = -EINVAL;
++		user_tsc_khz = (u32)arg;
++
++		if (user_tsc_khz >= kvm_max_guest_tsc_khz)
++			goto out;
++
++		if (user_tsc_khz == 0)
++			user_tsc_khz = tsc_khz;
++
++		if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
++			r = 0;
++
++		goto out;
++	}
++	case KVM_GET_TSC_KHZ: {
++		r = vcpu->arch.virtual_tsc_khz;
++		goto out;
++	}
++	case KVM_KVMCLOCK_CTRL: {
++		r = kvm_set_guest_paused(vcpu);
++		goto out;
++	}
++	case KVM_ENABLE_CAP: {
++		struct kvm_enable_cap cap;
++
++		r = -EFAULT;
++		if (copy_from_user(&cap, argp, sizeof(cap)))
++			goto out;
++		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
++		break;
++	}
++	case KVM_GET_NESTED_STATE: {
++		struct kvm_nested_state __user *user_kvm_nested_state = argp;
++		u32 user_data_size;
++
++		r = -EINVAL;
++		if (!kvm_x86_ops->get_nested_state)
++			break;
++
++		BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
++		r = -EFAULT;
++		if (get_user(user_data_size, &user_kvm_nested_state->size))
++			break;
++
++		r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
++						  user_data_size);
++		if (r < 0)
++			break;
++
++		if (r > user_data_size) {
++			if (put_user(r, &user_kvm_nested_state->size))
++				r = -EFAULT;
++			else
++				r = -E2BIG;
++			break;
++		}
++
++		r = 0;
++		break;
++	}
++	case KVM_SET_NESTED_STATE: {
++		struct kvm_nested_state __user *user_kvm_nested_state = argp;
++		struct kvm_nested_state kvm_state;
++		int idx;
++
++		r = -EINVAL;
++		if (!kvm_x86_ops->set_nested_state)
++			break;
++
++		r = -EFAULT;
++		if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
++			break;
++
++		r = -EINVAL;
++		if (kvm_state.size < sizeof(kvm_state))
++			break;
++
++		if (kvm_state.flags &
++		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
++			break;
++
++		/* nested_run_pending implies guest_mode.  */
++		if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
++			break;
++
++		idx = srcu_read_lock(&vcpu->kvm->srcu);
++		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
++		srcu_read_unlock(&vcpu->kvm->srcu, idx);
++		break;
++	}
++	default:
++		r = -EINVAL;
++	}
++out:
++	kfree(u.buffer);
++out_nofree:
++	vcpu_put(vcpu);
++	return r;
++}
++
++vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
++{
++	return VM_FAULT_SIGBUS;
++}
++
++static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
++{
++	int ret;
++
++	if (addr > (unsigned int)(-3 * PAGE_SIZE))
++		return -EINVAL;
++	ret = kvm_x86_ops->set_tss_addr(kvm, addr);
++	return ret;
++}
++
++static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
++					      u64 ident_addr)
++{
++	return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
++}
++
++static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
++					 unsigned long kvm_nr_mmu_pages)
++{
++	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
++		return -EINVAL;
++
++	mutex_lock(&kvm->slots_lock);
++
++	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
++	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
++
++	mutex_unlock(&kvm->slots_lock);
++	return 0;
++}
++
++static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
++{
++	return kvm->arch.n_max_mmu_pages;
++}
++
++static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
++{
++	struct kvm_pic *pic = kvm->arch.vpic;
++	int r;
++
++	r = 0;
++	switch (chip->chip_id) {
++	case KVM_IRQCHIP_PIC_MASTER:
++		memcpy(&chip->chip.pic, &pic->pics[0],
++			sizeof(struct kvm_pic_state));
++		break;
++	case KVM_IRQCHIP_PIC_SLAVE:
++		memcpy(&chip->chip.pic, &pic->pics[1],
++			sizeof(struct kvm_pic_state));
++		break;
++	case KVM_IRQCHIP_IOAPIC:
++		kvm_get_ioapic(kvm, &chip->chip.ioapic);
++		break;
++	default:
++		r = -EINVAL;
++		break;
++	}
++	return r;
++}
++
++static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
++{
++	struct kvm_pic *pic = kvm->arch.vpic;
++	int r;
++
++	r = 0;
++	switch (chip->chip_id) {
++	case KVM_IRQCHIP_PIC_MASTER:
++		spin_lock(&pic->lock);
++		memcpy(&pic->pics[0], &chip->chip.pic,
++			sizeof(struct kvm_pic_state));
++		spin_unlock(&pic->lock);
++		break;
++	case KVM_IRQCHIP_PIC_SLAVE:
++		spin_lock(&pic->lock);
++		memcpy(&pic->pics[1], &chip->chip.pic,
++			sizeof(struct kvm_pic_state));
++		spin_unlock(&pic->lock);
++		break;
++	case KVM_IRQCHIP_IOAPIC:
++		kvm_set_ioapic(kvm, &chip->chip.ioapic);
++		break;
++	default:
++		r = -EINVAL;
++		break;
++	}
++	kvm_pic_update_irq(pic);
++	return r;
++}
++
++static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
++{
++	struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
++
++	BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
++
++	mutex_lock(&kps->lock);
++	memcpy(ps, &kps->channels, sizeof(*ps));
++	mutex_unlock(&kps->lock);
++	return 0;
++}
++
++static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
++{
++	int i;
++	struct kvm_pit *pit = kvm->arch.vpit;
++
++	mutex_lock(&pit->pit_state.lock);
++	memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
++	for (i = 0; i < 3; i++)
++		kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
++	mutex_unlock(&pit->pit_state.lock);
++	return 0;
++}
++
++static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
++{
++	mutex_lock(&kvm->arch.vpit->pit_state.lock);
++	memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
++		sizeof(ps->channels));
++	ps->flags = kvm->arch.vpit->pit_state.flags;
++	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
++	memset(&ps->reserved, 0, sizeof(ps->reserved));
++	return 0;
++}
++
++static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
++{
++	int start = 0;
++	int i;
++	u32 prev_legacy, cur_legacy;
++	struct kvm_pit *pit = kvm->arch.vpit;
++
++	mutex_lock(&pit->pit_state.lock);
++	prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
++	cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
++	if (!prev_legacy && cur_legacy)
++		start = 1;
++	memcpy(&pit->pit_state.channels, &ps->channels,
++	       sizeof(pit->pit_state.channels));
++	pit->pit_state.flags = ps->flags;
++	for (i = 0; i < 3; i++)
++		kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
++				   start && i == 0);
++	mutex_unlock(&pit->pit_state.lock);
++	return 0;
++}
++
++static int kvm_vm_ioctl_reinject(struct kvm *kvm,
++				 struct kvm_reinject_control *control)
++{
++	struct kvm_pit *pit = kvm->arch.vpit;
++
++	if (!pit)
++		return -ENXIO;
++
++	/* pit->pit_state.lock was overloaded to prevent userspace from getting
++	 * an inconsistent state after running multiple KVM_REINJECT_CONTROL
++	 * ioctls in parallel.  Use a separate lock if that ioctl isn't rare.
++	 */
++	mutex_lock(&pit->pit_state.lock);
++	kvm_pit_set_reinject(pit, control->pit_reinject);
++	mutex_unlock(&pit->pit_state.lock);
++
++	return 0;
++}
++
++/**
++ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
++ * @kvm: kvm instance
++ * @log: slot id and address to which we copy the log
++ *
++ * Steps 1-4 below provide general overview of dirty page logging. See
++ * kvm_get_dirty_log_protect() function description for additional details.
++ *
++ * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
++ * always flush the TLB (step 4) even if previous step failed  and the dirty
++ * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
++ * does not preclude user space subsequent dirty log read. Flushing TLB ensures
++ * writes will be marked dirty for next log read.
++ *
++ *   1. Take a snapshot of the bit and clear it if needed.
++ *   2. Write protect the corresponding page.
++ *   3. Copy the snapshot to the userspace.
++ *   4. Flush TLB's if needed.
++ */
++int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
++{
++	bool is_dirty = false;
++	int r;
++
++	mutex_lock(&kvm->slots_lock);
++
++	/*
++	 * Flush potentially hardware-cached dirty pages to dirty_bitmap.
++	 */
++	if (kvm_x86_ops->flush_log_dirty)
++		kvm_x86_ops->flush_log_dirty(kvm);
++
++	r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
++
++	/*
++	 * All the TLBs can be flushed out of mmu lock, see the comments in
++	 * kvm_mmu_slot_remove_write_access().
++	 */
++	lockdep_assert_held(&kvm->slots_lock);
++	if (is_dirty)
++		kvm_flush_remote_tlbs(kvm);
++
++	mutex_unlock(&kvm->slots_lock);
++	return r;
++}
++
++int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
++			bool line_status)
++{
++	if (!irqchip_in_kernel(kvm))
++		return -ENXIO;
++
++	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
++					irq_event->irq, irq_event->level,
++					line_status);
++	return 0;
++}
++
++static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
++				   struct kvm_enable_cap *cap)
++{
++	int r;
++
++	if (cap->flags)
++		return -EINVAL;
++
++	switch (cap->cap) {
++	case KVM_CAP_DISABLE_QUIRKS:
++		kvm->arch.disabled_quirks = cap->args[0];
++		r = 0;
++		break;
++	case KVM_CAP_SPLIT_IRQCHIP: {
++		mutex_lock(&kvm->lock);
++		r = -EINVAL;
++		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
++			goto split_irqchip_unlock;
++		r = -EEXIST;
++		if (irqchip_in_kernel(kvm))
++			goto split_irqchip_unlock;
++		if (kvm->created_vcpus)
++			goto split_irqchip_unlock;
++		r = kvm_setup_empty_irq_routing(kvm);
++		if (r)
++			goto split_irqchip_unlock;
++		/* Pairs with irqchip_in_kernel. */
++		smp_wmb();
++		kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
++		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
++		r = 0;
++split_irqchip_unlock:
++		mutex_unlock(&kvm->lock);
++		break;
++	}
++	case KVM_CAP_X2APIC_API:
++		r = -EINVAL;
++		if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
++			break;
++
++		if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
++			kvm->arch.x2apic_format = true;
++		if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
++			kvm->arch.x2apic_broadcast_quirk_disabled = true;
++
++		r = 0;
++		break;
++	case KVM_CAP_X86_DISABLE_EXITS:
++		r = -EINVAL;
++		if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
++			break;
++
++		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
++			kvm_can_mwait_in_guest())
++			kvm->arch.mwait_in_guest = true;
++		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
++			kvm->arch.hlt_in_guest = true;
++		if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
++			kvm->arch.pause_in_guest = true;
++		r = 0;
++		break;
++	case KVM_CAP_MSR_PLATFORM_INFO:
++		kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
++		r = 0;
++		break;
++	default:
++		r = -EINVAL;
++		break;
++	}
++	return r;
++}
++
++long kvm_arch_vm_ioctl(struct file *filp,
++		       unsigned int ioctl, unsigned long arg)
++{
++	struct kvm *kvm = filp->private_data;
++	void __user *argp = (void __user *)arg;
++	int r = -ENOTTY;
++	/*
++	 * This union makes it completely explicit to gcc-3.x
++	 * that these two variables' stack usage should be
++	 * combined, not added together.
++	 */
++	union {
++		struct kvm_pit_state ps;
++		struct kvm_pit_state2 ps2;
++		struct kvm_pit_config pit_config;
++	} u;
++
++	switch (ioctl) {
++	case KVM_SET_TSS_ADDR:
++		r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
++		break;
++	case KVM_SET_IDENTITY_MAP_ADDR: {
++		u64 ident_addr;
++
++		mutex_lock(&kvm->lock);
++		r = -EINVAL;
++		if (kvm->created_vcpus)
++			goto set_identity_unlock;
++		r = -EFAULT;
++		if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
++			goto set_identity_unlock;
++		r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
++set_identity_unlock:
++		mutex_unlock(&kvm->lock);
++		break;
++	}
++	case KVM_SET_NR_MMU_PAGES:
++		r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
++		break;
++	case KVM_GET_NR_MMU_PAGES:
++		r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
++		break;
++	case KVM_CREATE_IRQCHIP: {
++		mutex_lock(&kvm->lock);
++
++		r = -EEXIST;
++		if (irqchip_in_kernel(kvm))
++			goto create_irqchip_unlock;
++
++		r = -EINVAL;
++		if (kvm->created_vcpus)
++			goto create_irqchip_unlock;
++
++		r = kvm_pic_init(kvm);
++		if (r)
++			goto create_irqchip_unlock;
++
++		r = kvm_ioapic_init(kvm);
++		if (r) {
++			kvm_pic_destroy(kvm);
++			goto create_irqchip_unlock;
++		}
++
++		r = kvm_setup_default_irq_routing(kvm);
++		if (r) {
++			kvm_ioapic_destroy(kvm);
++			kvm_pic_destroy(kvm);
++			goto create_irqchip_unlock;
++		}
++		/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
++		smp_wmb();
++		kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
++	create_irqchip_unlock:
++		mutex_unlock(&kvm->lock);
++		break;
++	}
++	case KVM_CREATE_PIT:
++		u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
++		goto create_pit;
++	case KVM_CREATE_PIT2:
++		r = -EFAULT;
++		if (copy_from_user(&u.pit_config, argp,
++				   sizeof(struct kvm_pit_config)))
++			goto out;
++	create_pit:
++		mutex_lock(&kvm->lock);
++		r = -EEXIST;
++		if (kvm->arch.vpit)
++			goto create_pit_unlock;
++		r = -ENOMEM;
++		kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
++		if (kvm->arch.vpit)
++			r = 0;
++	create_pit_unlock:
++		mutex_unlock(&kvm->lock);
++		break;
++	case KVM_GET_IRQCHIP: {
++		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
++		struct kvm_irqchip *chip;
++
++		chip = memdup_user(argp, sizeof(*chip));
++		if (IS_ERR(chip)) {
++			r = PTR_ERR(chip);
++			goto out;
++		}
++
++		r = -ENXIO;
++		if (!irqchip_kernel(kvm))
++			goto get_irqchip_out;
++		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
++		if (r)
++			goto get_irqchip_out;
++		r = -EFAULT;
++		if (copy_to_user(argp, chip, sizeof *chip))
++			goto get_irqchip_out;
++		r = 0;
++	get_irqchip_out:
++		kfree(chip);
++		break;
++	}
++	case KVM_SET_IRQCHIP: {
++		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
++		struct kvm_irqchip *chip;
++
++		chip = memdup_user(argp, sizeof(*chip));
++		if (IS_ERR(chip)) {
++			r = PTR_ERR(chip);
++			goto out;
++		}
++
++		r = -ENXIO;
++		if (!irqchip_kernel(kvm))
++			goto set_irqchip_out;
++		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
++		if (r)
++			goto set_irqchip_out;
++		r = 0;
++	set_irqchip_out:
++		kfree(chip);
++		break;
++	}
++	case KVM_GET_PIT: {
++		r = -EFAULT;
++		if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
++			goto out;
++		r = -ENXIO;
++		if (!kvm->arch.vpit)
++			goto out;
++		r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
++		if (r)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_SET_PIT: {
++		r = -EFAULT;
++		if (copy_from_user(&u.ps, argp, sizeof u.ps))
++			goto out;
++		r = -ENXIO;
++		if (!kvm->arch.vpit)
++			goto out;
++		r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
++		break;
++	}
++	case KVM_GET_PIT2: {
++		r = -ENXIO;
++		if (!kvm->arch.vpit)
++			goto out;
++		r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
++		if (r)
++			goto out;
++		r = -EFAULT;
++		if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_SET_PIT2: {
++		r = -EFAULT;
++		if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
++			goto out;
++		r = -ENXIO;
++		if (!kvm->arch.vpit)
++			goto out;
++		r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
++		break;
++	}
++	case KVM_REINJECT_CONTROL: {
++		struct kvm_reinject_control control;
++		r =  -EFAULT;
++		if (copy_from_user(&control, argp, sizeof(control)))
++			goto out;
++		r = kvm_vm_ioctl_reinject(kvm, &control);
++		break;
++	}
++	case KVM_SET_BOOT_CPU_ID:
++		r = 0;
++		mutex_lock(&kvm->lock);
++		if (kvm->created_vcpus)
++			r = -EBUSY;
++		else
++			kvm->arch.bsp_vcpu_id = arg;
++		mutex_unlock(&kvm->lock);
++		break;
++	case KVM_XEN_HVM_CONFIG: {
++		struct kvm_xen_hvm_config xhc;
++		r = -EFAULT;
++		if (copy_from_user(&xhc, argp, sizeof(xhc)))
++			goto out;
++		r = -EINVAL;
++		if (xhc.flags)
++			goto out;
++		memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
++		r = 0;
++		break;
++	}
++	case KVM_SET_CLOCK: {
++		struct kvm_clock_data user_ns;
++		u64 now_ns;
++
++		r = -EFAULT;
++		if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
++			goto out;
++
++		r = -EINVAL;
++		if (user_ns.flags)
++			goto out;
++
++		r = 0;
++		/*
++		 * TODO: userspace has to take care of races with VCPU_RUN, so
++		 * kvm_gen_update_masterclock() can be cut down to locked
++		 * pvclock_update_vm_gtod_copy().
++		 */
++		kvm_gen_update_masterclock(kvm);
++		now_ns = get_kvmclock_ns(kvm);
++		kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
++		kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
++		break;
++	}
++	case KVM_GET_CLOCK: {
++		struct kvm_clock_data user_ns;
++		u64 now_ns;
++
++		now_ns = get_kvmclock_ns(kvm);
++		user_ns.clock = now_ns;
++		user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
++		memset(&user_ns.pad, 0, sizeof(user_ns.pad));
++
++		r = -EFAULT;
++		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
++			goto out;
++		r = 0;
++		break;
++	}
++	case KVM_ENABLE_CAP: {
++		struct kvm_enable_cap cap;
++
++		r = -EFAULT;
++		if (copy_from_user(&cap, argp, sizeof(cap)))
++			goto out;
++		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
++		break;
++	}
++	case KVM_MEMORY_ENCRYPT_OP: {
++		r = -ENOTTY;
++		if (kvm_x86_ops->mem_enc_op)
++			r = kvm_x86_ops->mem_enc_op(kvm, argp);
++		break;
++	}
++	case KVM_MEMORY_ENCRYPT_REG_REGION: {
++		struct kvm_enc_region region;
++
++		r = -EFAULT;
++		if (copy_from_user(&region, argp, sizeof(region)))
++			goto out;
++
++		r = -ENOTTY;
++		if (kvm_x86_ops->mem_enc_reg_region)
++			r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
++		break;
++	}
++	case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
++		struct kvm_enc_region region;
++
++		r = -EFAULT;
++		if (copy_from_user(&region, argp, sizeof(region)))
++			goto out;
++
++		r = -ENOTTY;
++		if (kvm_x86_ops->mem_enc_unreg_region)
++			r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
++		break;
++	}
++	case KVM_HYPERV_EVENTFD: {
++		struct kvm_hyperv_eventfd hvevfd;
++
++		r = -EFAULT;
++		if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
++			goto out;
++		r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
++		break;
++	}
++	default:
++		r = -ENOTTY;
++	}
++out:
++	return r;
++}
++
++static void kvm_init_msr_list(void)
++{
++	u32 dummy[2];
++	unsigned i, j;
++
++	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
++		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
++			continue;
++
++		/*
++		 * Even MSRs that are valid in the host may not be exposed
++		 * to the guests in some cases.
++		 */
++		switch (msrs_to_save[i]) {
++		case MSR_IA32_BNDCFGS:
++			if (!kvm_mpx_supported())
++				continue;
++			break;
++		case MSR_TSC_AUX:
++			if (!kvm_x86_ops->rdtscp_supported())
++				continue;
++			break;
++		default:
++			break;
++		}
++
++		if (j < i)
++			msrs_to_save[j] = msrs_to_save[i];
++		j++;
++	}
++	num_msrs_to_save = j;
++
++	for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
++		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
++			continue;
++
++		if (j < i)
++			emulated_msrs[j] = emulated_msrs[i];
++		j++;
++	}
++	num_emulated_msrs = j;
++
++	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
++		struct kvm_msr_entry msr;
++
++		msr.index = msr_based_features[i];
++		if (kvm_get_msr_feature(&msr))
++			continue;
++
++		if (j < i)
++			msr_based_features[j] = msr_based_features[i];
++		j++;
++	}
++	num_msr_based_features = j;
++}
++
++static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
++			   const void *v)
++{
++	int handled = 0;
++	int n;
++
++	do {
++		n = min(len, 8);
++		if (!(lapic_in_kernel(vcpu) &&
++		      !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
++		    && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
++			break;
++		handled += n;
++		addr += n;
++		len -= n;
++		v += n;
++	} while (len);
++
++	return handled;
++}
++
++static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
++{
++	int handled = 0;
++	int n;
++
++	do {
++		n = min(len, 8);
++		if (!(lapic_in_kernel(vcpu) &&
++		      !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
++					 addr, n, v))
++		    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
++			break;
++		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
++		handled += n;
++		addr += n;
++		len -= n;
++		v += n;
++	} while (len);
++
++	return handled;
++}
++
++static void kvm_set_segment(struct kvm_vcpu *vcpu,
++			struct kvm_segment *var, int seg)
++{
++	kvm_x86_ops->set_segment(vcpu, var, seg);
++}
++
++void kvm_get_segment(struct kvm_vcpu *vcpu,
++		     struct kvm_segment *var, int seg)
++{
++	kvm_x86_ops->get_segment(vcpu, var, seg);
++}
++
++gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
++			   struct x86_exception *exception)
++{
++	gpa_t t_gpa;
++
++	BUG_ON(!mmu_is_nested(vcpu));
++
++	/* NPT walks are always user-walks */
++	access |= PFERR_USER_MASK;
++	t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
++
++	return t_gpa;
++}
++
++gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
++			      struct x86_exception *exception)
++{
++	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
++	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
++}
++
++ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
++				struct x86_exception *exception)
++{
++	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
++	access |= PFERR_FETCH_MASK;
++	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
++}
++
++gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
++			       struct x86_exception *exception)
++{
++	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
++	access |= PFERR_WRITE_MASK;
++	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
++}
++
++/* uses this to access any guest's mapped memory without checking CPL */
++gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
++				struct x86_exception *exception)
++{
++	return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
++}
++
++static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
++				      struct kvm_vcpu *vcpu, u32 access,
++				      struct x86_exception *exception)
++{
++	void *data = val;
++	int r = X86EMUL_CONTINUE;
++
++	while (bytes) {
++		gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
++							    exception);
++		unsigned offset = addr & (PAGE_SIZE-1);
++		unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
++		int ret;
++
++		if (gpa == UNMAPPED_GVA)
++			return X86EMUL_PROPAGATE_FAULT;
++		ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
++					       offset, toread);
++		if (ret < 0) {
++			r = X86EMUL_IO_NEEDED;
++			goto out;
++		}
++
++		bytes -= toread;
++		data += toread;
++		addr += toread;
++	}
++out:
++	return r;
++}
++
++/* used for instruction fetching */
++static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
++				gva_t addr, void *val, unsigned int bytes,
++				struct x86_exception *exception)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
++	unsigned offset;
++	int ret;
++
++	/* Inline kvm_read_guest_virt_helper for speed.  */
++	gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
++						    exception);
++	if (unlikely(gpa == UNMAPPED_GVA))
++		return X86EMUL_PROPAGATE_FAULT;
++
++	offset = addr & (PAGE_SIZE-1);
++	if (WARN_ON(offset + bytes > PAGE_SIZE))
++		bytes = (unsigned)PAGE_SIZE - offset;
++	ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
++				       offset, bytes);
++	if (unlikely(ret < 0))
++		return X86EMUL_IO_NEEDED;
++
++	return X86EMUL_CONTINUE;
++}
++
++int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
++			       gva_t addr, void *val, unsigned int bytes,
++			       struct x86_exception *exception)
++{
++	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
++
++	/*
++	 * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
++	 * is returned, but our callers are not ready for that and they blindly
++	 * call kvm_inject_page_fault.  Ensure that they at least do not leak
++	 * uninitialized kernel stack memory into cr2 and error code.
++	 */
++	memset(exception, 0, sizeof(*exception));
++	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
++					  exception);
++}
++EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
++
++static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
++			     gva_t addr, void *val, unsigned int bytes,
++			     struct x86_exception *exception, bool system)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	u32 access = 0;
++
++	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
++		access |= PFERR_USER_MASK;
++
++	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
++}
++
++static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
++		unsigned long addr, void *val, unsigned int bytes)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
++
++	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
++}
++
++static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
++				      struct kvm_vcpu *vcpu, u32 access,
++				      struct x86_exception *exception)
++{
++	void *data = val;
++	int r = X86EMUL_CONTINUE;
++
++	while (bytes) {
++		gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
++							     access,
++							     exception);
++		unsigned offset = addr & (PAGE_SIZE-1);
++		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
++		int ret;
++
++		if (gpa == UNMAPPED_GVA)
++			return X86EMUL_PROPAGATE_FAULT;
++		ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
++		if (ret < 0) {
++			r = X86EMUL_IO_NEEDED;
++			goto out;
++		}
++
++		bytes -= towrite;
++		data += towrite;
++		addr += towrite;
++	}
++out:
++	return r;
++}
++
++static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
++			      unsigned int bytes, struct x86_exception *exception,
++			      bool system)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	u32 access = PFERR_WRITE_MASK;
++
++	if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
++		access |= PFERR_USER_MASK;
++
++	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
++					   access, exception);
++}
++
++int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
++				unsigned int bytes, struct x86_exception *exception)
++{
++	/* kvm_write_guest_virt_system can pull in tons of pages. */
++	vcpu->arch.l1tf_flush_l1d = true;
++
++	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
++					   PFERR_WRITE_MASK, exception);
++}
++EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
++
++int handle_ud(struct kvm_vcpu *vcpu)
++{
++	int emul_type = EMULTYPE_TRAP_UD;
++	enum emulation_result er;
++	char sig[5]; /* ud2; .ascii "kvm" */
++	struct x86_exception e;
++
++	if (force_emulation_prefix &&
++	    kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
++				sig, sizeof(sig), &e) == 0 &&
++	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
++		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
++		emul_type = 0;
++	}
++
++	er = kvm_emulate_instruction(vcpu, emul_type);
++	if (er == EMULATE_USER_EXIT)
++		return 0;
++	if (er != EMULATE_DONE)
++		kvm_queue_exception(vcpu, UD_VECTOR);
++	return 1;
++}
++EXPORT_SYMBOL_GPL(handle_ud);
++
++static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
++			    gpa_t gpa, bool write)
++{
++	/* For APIC access vmexit */
++	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
++		return 1;
++
++	if (vcpu_match_mmio_gpa(vcpu, gpa)) {
++		trace_vcpu_match_mmio(gva, gpa, write, true);
++		return 1;
++	}
++
++	return 0;
++}
++
++static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
++				gpa_t *gpa, struct x86_exception *exception,
++				bool write)
++{
++	u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
++		| (write ? PFERR_WRITE_MASK : 0);
++
++	/*
++	 * currently PKRU is only applied to ept enabled guest so
++	 * there is no pkey in EPT page table for L1 guest or EPT
++	 * shadow page table for L2 guest.
++	 */
++	if (vcpu_match_mmio_gva(vcpu, gva)
++	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
++				 vcpu->arch.access, 0, access)) {
++		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
++					(gva & (PAGE_SIZE - 1));
++		trace_vcpu_match_mmio(gva, *gpa, write, false);
++		return 1;
++	}
++
++	*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
++
++	if (*gpa == UNMAPPED_GVA)
++		return -1;
++
++	return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
++}
++
++int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
++			const void *val, int bytes)
++{
++	int ret;
++
++	ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
++	if (ret < 0)
++		return 0;
++	kvm_page_track_write(vcpu, gpa, val, bytes);
++	return 1;
++}
++
++struct read_write_emulator_ops {
++	int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
++				  int bytes);
++	int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
++				  void *val, int bytes);
++	int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
++			       int bytes, void *val);
++	int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
++				    void *val, int bytes);
++	bool write;
++};
++
++static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
++{
++	if (vcpu->mmio_read_completed) {
++		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
++			       vcpu->mmio_fragments[0].gpa, val);
++		vcpu->mmio_read_completed = 0;
++		return 1;
++	}
++
++	return 0;
++}
++
++static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
++			void *val, int bytes)
++{
++	return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
++}
++
++static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
++			 void *val, int bytes)
++{
++	return emulator_write_phys(vcpu, gpa, val, bytes);
++}
++
++static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
++{
++	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
++	return vcpu_mmio_write(vcpu, gpa, bytes, val);
++}
++
++static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
++			  void *val, int bytes)
++{
++	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
++	return X86EMUL_IO_NEEDED;
++}
++
++static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
++			   void *val, int bytes)
++{
++	struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
++
++	memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
++	return X86EMUL_CONTINUE;
++}
++
++static const struct read_write_emulator_ops read_emultor = {
++	.read_write_prepare = read_prepare,
++	.read_write_emulate = read_emulate,
++	.read_write_mmio = vcpu_mmio_read,
++	.read_write_exit_mmio = read_exit_mmio,
++};
++
++static const struct read_write_emulator_ops write_emultor = {
++	.read_write_emulate = write_emulate,
++	.read_write_mmio = write_mmio,
++	.read_write_exit_mmio = write_exit_mmio,
++	.write = true,
++};
++
++static int emulator_read_write_onepage(unsigned long addr, void *val,
++				       unsigned int bytes,
++				       struct x86_exception *exception,
++				       struct kvm_vcpu *vcpu,
++				       const struct read_write_emulator_ops *ops)
++{
++	gpa_t gpa;
++	int handled, ret;
++	bool write = ops->write;
++	struct kvm_mmio_fragment *frag;
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++
++	/*
++	 * If the exit was due to a NPF we may already have a GPA.
++	 * If the GPA is present, use it to avoid the GVA to GPA table walk.
++	 * Note, this cannot be used on string operations since string
++	 * operation using rep will only have the initial GPA from the NPF
++	 * occurred.
++	 */
++	if (vcpu->arch.gpa_available &&
++	    emulator_can_use_gpa(ctxt) &&
++	    (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
++		gpa = vcpu->arch.gpa_val;
++		ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
++	} else {
++		ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
++		if (ret < 0)
++			return X86EMUL_PROPAGATE_FAULT;
++	}
++
++	if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
++		return X86EMUL_CONTINUE;
++
++	/*
++	 * Is this MMIO handled locally?
++	 */
++	handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
++	if (handled == bytes)
++		return X86EMUL_CONTINUE;
++
++	gpa += handled;
++	bytes -= handled;
++	val += handled;
++
++	WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
++	frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
++	frag->gpa = gpa;
++	frag->data = val;
++	frag->len = bytes;
++	return X86EMUL_CONTINUE;
++}
++
++static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
++			unsigned long addr,
++			void *val, unsigned int bytes,
++			struct x86_exception *exception,
++			const struct read_write_emulator_ops *ops)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	gpa_t gpa;
++	int rc;
++
++	if (ops->read_write_prepare &&
++		  ops->read_write_prepare(vcpu, val, bytes))
++		return X86EMUL_CONTINUE;
++
++	vcpu->mmio_nr_fragments = 0;
++
++	/* Crossing a page boundary? */
++	if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
++		int now;
++
++		now = -addr & ~PAGE_MASK;
++		rc = emulator_read_write_onepage(addr, val, now, exception,
++						 vcpu, ops);
++
++		if (rc != X86EMUL_CONTINUE)
++			return rc;
++		addr += now;
++		if (ctxt->mode != X86EMUL_MODE_PROT64)
++			addr = (u32)addr;
++		val += now;
++		bytes -= now;
++	}
++
++	rc = emulator_read_write_onepage(addr, val, bytes, exception,
++					 vcpu, ops);
++	if (rc != X86EMUL_CONTINUE)
++		return rc;
++
++	if (!vcpu->mmio_nr_fragments)
++		return rc;
++
++	gpa = vcpu->mmio_fragments[0].gpa;
++
++	vcpu->mmio_needed = 1;
++	vcpu->mmio_cur_fragment = 0;
++
++	vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
++	vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
++	vcpu->run->exit_reason = KVM_EXIT_MMIO;
++	vcpu->run->mmio.phys_addr = gpa;
++
++	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
++}
++
++static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
++				  unsigned long addr,
++				  void *val,
++				  unsigned int bytes,
++				  struct x86_exception *exception)
++{
++	return emulator_read_write(ctxt, addr, val, bytes,
++				   exception, &read_emultor);
++}
++
++static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
++			    unsigned long addr,
++			    const void *val,
++			    unsigned int bytes,
++			    struct x86_exception *exception)
++{
++	return emulator_read_write(ctxt, addr, (void *)val, bytes,
++				   exception, &write_emultor);
++}
++
++#define CMPXCHG_TYPE(t, ptr, old, new) \
++	(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
++
++#ifdef CONFIG_X86_64
++#  define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
++#else
++#  define CMPXCHG64(ptr, old, new) \
++	(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
++#endif
++
++static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
++				     unsigned long addr,
++				     const void *old,
++				     const void *new,
++				     unsigned int bytes,
++				     struct x86_exception *exception)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	gpa_t gpa;
++	struct page *page;
++	char *kaddr;
++	bool exchanged;
++
++	/* guests cmpxchg8b have to be emulated atomically */
++	if (bytes > 8 || (bytes & (bytes - 1)))
++		goto emul_write;
++
++	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
++
++	if (gpa == UNMAPPED_GVA ||
++	    (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
++		goto emul_write;
++
++	if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
++		goto emul_write;
++
++	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
++	if (is_error_page(page))
++		goto emul_write;
++
++	kaddr = kmap_atomic(page);
++	kaddr += offset_in_page(gpa);
++	switch (bytes) {
++	case 1:
++		exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
++		break;
++	case 2:
++		exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
++		break;
++	case 4:
++		exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
++		break;
++	case 8:
++		exchanged = CMPXCHG64(kaddr, old, new);
++		break;
++	default:
++		BUG();
++	}
++	kunmap_atomic(kaddr);
++	kvm_release_page_dirty(page);
++
++	if (!exchanged)
++		return X86EMUL_CMPXCHG_FAILED;
++
++	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
++	kvm_page_track_write(vcpu, gpa, new, bytes);
++
++	return X86EMUL_CONTINUE;
++
++emul_write:
++	printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
++
++	return emulator_write_emulated(ctxt, addr, new, bytes, exception);
++}
++
++static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
++{
++	int r = 0, i;
++
++	for (i = 0; i < vcpu->arch.pio.count; i++) {
++		if (vcpu->arch.pio.in)
++			r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
++					    vcpu->arch.pio.size, pd);
++		else
++			r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
++					     vcpu->arch.pio.port, vcpu->arch.pio.size,
++					     pd);
++		if (r)
++			break;
++		pd += vcpu->arch.pio.size;
++	}
++	return r;
++}
++
++static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
++			       unsigned short port, void *val,
++			       unsigned int count, bool in)
++{
++	vcpu->arch.pio.port = port;
++	vcpu->arch.pio.in = in;
++	vcpu->arch.pio.count  = count;
++	vcpu->arch.pio.size = size;
++
++	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
++		vcpu->arch.pio.count = 0;
++		return 1;
++	}
++
++	vcpu->run->exit_reason = KVM_EXIT_IO;
++	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
++	vcpu->run->io.size = size;
++	vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
++	vcpu->run->io.count = count;
++	vcpu->run->io.port = port;
++
++	return 0;
++}
++
++static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
++				    int size, unsigned short port, void *val,
++				    unsigned int count)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	int ret;
++
++	if (vcpu->arch.pio.count)
++		goto data_avail;
++
++	memset(vcpu->arch.pio_data, 0, size * count);
++
++	ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
++	if (ret) {
++data_avail:
++		memcpy(val, vcpu->arch.pio_data, size * count);
++		trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
++		vcpu->arch.pio.count = 0;
++		return 1;
++	}
++
++	return 0;
++}
++
++static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
++				     int size, unsigned short port,
++				     const void *val, unsigned int count)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++
++	memcpy(vcpu->arch.pio_data, val, size * count);
++	trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
++	return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
++}
++
++static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
++{
++	return kvm_x86_ops->get_segment_base(vcpu, seg);
++}
++
++static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
++{
++	kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
++}
++
++static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
++{
++	if (!need_emulate_wbinvd(vcpu))
++		return X86EMUL_CONTINUE;
++
++	if (kvm_x86_ops->has_wbinvd_exit()) {
++		int cpu = get_cpu();
++
++		cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
++		smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
++				wbinvd_ipi, NULL, 1);
++		put_cpu();
++		cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
++	} else
++		wbinvd();
++	return X86EMUL_CONTINUE;
++}
++
++int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
++{
++	kvm_emulate_wbinvd_noskip(vcpu);
++	return kvm_skip_emulated_instruction(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
++
++
++
++static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
++{
++	kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
++}
++
++static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
++			   unsigned long *dest)
++{
++	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
++}
++
++static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
++			   unsigned long value)
++{
++
++	return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
++}
++
++static u64 mk_cr_64(u64 curr_cr, u32 new_val)
++{
++	return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
++}
++
++static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	unsigned long value;
++
++	switch (cr) {
++	case 0:
++		value = kvm_read_cr0(vcpu);
++		break;
++	case 2:
++		value = vcpu->arch.cr2;
++		break;
++	case 3:
++		value = kvm_read_cr3(vcpu);
++		break;
++	case 4:
++		value = kvm_read_cr4(vcpu);
++		break;
++	case 8:
++		value = kvm_get_cr8(vcpu);
++		break;
++	default:
++		kvm_err("%s: unexpected cr %u\n", __func__, cr);
++		return 0;
++	}
++
++	return value;
++}
++
++static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	int res = 0;
++
++	switch (cr) {
++	case 0:
++		res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
++		break;
++	case 2:
++		vcpu->arch.cr2 = val;
++		break;
++	case 3:
++		res = kvm_set_cr3(vcpu, val);
++		break;
++	case 4:
++		res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
++		break;
++	case 8:
++		res = kvm_set_cr8(vcpu, val);
++		break;
++	default:
++		kvm_err("%s: unexpected cr %u\n", __func__, cr);
++		res = -1;
++	}
++
++	return res;
++}
++
++static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
++{
++	return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
++}
++
++static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
++{
++	kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
++}
++
++static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
++{
++	kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
++}
++
++static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
++{
++	kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
++}
++
++static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
++{
++	kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
++}
++
++static unsigned long emulator_get_cached_segment_base(
++	struct x86_emulate_ctxt *ctxt, int seg)
++{
++	return get_segment_base(emul_to_vcpu(ctxt), seg);
++}
++
++static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
++				 struct desc_struct *desc, u32 *base3,
++				 int seg)
++{
++	struct kvm_segment var;
++
++	kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
++	*selector = var.selector;
++
++	if (var.unusable) {
++		memset(desc, 0, sizeof(*desc));
++		if (base3)
++			*base3 = 0;
++		return false;
++	}
++
++	if (var.g)
++		var.limit >>= 12;
++	set_desc_limit(desc, var.limit);
++	set_desc_base(desc, (unsigned long)var.base);
++#ifdef CONFIG_X86_64
++	if (base3)
++		*base3 = var.base >> 32;
++#endif
++	desc->type = var.type;
++	desc->s = var.s;
++	desc->dpl = var.dpl;
++	desc->p = var.present;
++	desc->avl = var.avl;
++	desc->l = var.l;
++	desc->d = var.db;
++	desc->g = var.g;
++
++	return true;
++}
++
++static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
++				 struct desc_struct *desc, u32 base3,
++				 int seg)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	struct kvm_segment var;
++
++	var.selector = selector;
++	var.base = get_desc_base(desc);
++#ifdef CONFIG_X86_64
++	var.base |= ((u64)base3) << 32;
++#endif
++	var.limit = get_desc_limit(desc);
++	if (desc->g)
++		var.limit = (var.limit << 12) | 0xfff;
++	var.type = desc->type;
++	var.dpl = desc->dpl;
++	var.db = desc->d;
++	var.s = desc->s;
++	var.l = desc->l;
++	var.g = desc->g;
++	var.avl = desc->avl;
++	var.present = desc->p;
++	var.unusable = !var.present;
++	var.padding = 0;
++
++	kvm_set_segment(vcpu, &var, seg);
++	return;
++}
++
++static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
++			    u32 msr_index, u64 *pdata)
++{
++	struct msr_data msr;
++	int r;
++
++	msr.index = msr_index;
++	msr.host_initiated = false;
++	r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
++	if (r)
++		return r;
++
++	*pdata = msr.data;
++	return 0;
++}
++
++static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
++			    u32 msr_index, u64 data)
++{
++	struct msr_data msr;
++
++	msr.data = data;
++	msr.index = msr_index;
++	msr.host_initiated = false;
++	return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
++}
++
++static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++
++	return vcpu->arch.smbase;
++}
++
++static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++
++	vcpu->arch.smbase = smbase;
++}
++
++static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
++			      u32 pmc)
++{
++	return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
++}
++
++static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
++			     u32 pmc, u64 *pdata)
++{
++	return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
++}
++
++static void emulator_halt(struct x86_emulate_ctxt *ctxt)
++{
++	emul_to_vcpu(ctxt)->arch.halt_request = 1;
++}
++
++static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
++			      struct x86_instruction_info *info,
++			      enum x86_intercept_stage stage)
++{
++	return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
++}
++
++static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
++			u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
++{
++	return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
++}
++
++static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
++{
++	return kvm_register_read(emul_to_vcpu(ctxt), reg);
++}
++
++static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
++{
++	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
++}
++
++static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
++{
++	kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
++}
++
++static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
++{
++	return emul_to_vcpu(ctxt)->arch.hflags;
++}
++
++static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
++{
++	kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
++}
++
++static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
++{
++	return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
++}
++
++static const struct x86_emulate_ops emulate_ops = {
++	.read_gpr            = emulator_read_gpr,
++	.write_gpr           = emulator_write_gpr,
++	.read_std            = emulator_read_std,
++	.write_std           = emulator_write_std,
++	.read_phys           = kvm_read_guest_phys_system,
++	.fetch               = kvm_fetch_guest_virt,
++	.read_emulated       = emulator_read_emulated,
++	.write_emulated      = emulator_write_emulated,
++	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
++	.invlpg              = emulator_invlpg,
++	.pio_in_emulated     = emulator_pio_in_emulated,
++	.pio_out_emulated    = emulator_pio_out_emulated,
++	.get_segment         = emulator_get_segment,
++	.set_segment         = emulator_set_segment,
++	.get_cached_segment_base = emulator_get_cached_segment_base,
++	.get_gdt             = emulator_get_gdt,
++	.get_idt	     = emulator_get_idt,
++	.set_gdt             = emulator_set_gdt,
++	.set_idt	     = emulator_set_idt,
++	.get_cr              = emulator_get_cr,
++	.set_cr              = emulator_set_cr,
++	.cpl                 = emulator_get_cpl,
++	.get_dr              = emulator_get_dr,
++	.set_dr              = emulator_set_dr,
++	.get_smbase          = emulator_get_smbase,
++	.set_smbase          = emulator_set_smbase,
++	.set_msr             = emulator_set_msr,
++	.get_msr             = emulator_get_msr,
++	.check_pmc	     = emulator_check_pmc,
++	.read_pmc            = emulator_read_pmc,
++	.halt                = emulator_halt,
++	.wbinvd              = emulator_wbinvd,
++	.fix_hypercall       = emulator_fix_hypercall,
++	.intercept           = emulator_intercept,
++	.get_cpuid           = emulator_get_cpuid,
++	.set_nmi_mask        = emulator_set_nmi_mask,
++	.get_hflags          = emulator_get_hflags,
++	.set_hflags          = emulator_set_hflags,
++	.pre_leave_smm       = emulator_pre_leave_smm,
++};
++
++static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
++{
++	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
++	/*
++	 * an sti; sti; sequence only disable interrupts for the first
++	 * instruction. So, if the last instruction, be it emulated or
++	 * not, left the system with the INT_STI flag enabled, it
++	 * means that the last instruction is an sti. We should not
++	 * leave the flag on in this case. The same goes for mov ss
++	 */
++	if (int_shadow & mask)
++		mask = 0;
++	if (unlikely(int_shadow || mask)) {
++		kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
++		if (!mask)
++			kvm_make_request(KVM_REQ_EVENT, vcpu);
++	}
++}
++
++static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
++{
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++	if (ctxt->exception.vector == PF_VECTOR)
++		return kvm_propagate_fault(vcpu, &ctxt->exception);
++
++	if (ctxt->exception.error_code_valid)
++		kvm_queue_exception_e(vcpu, ctxt->exception.vector,
++				      ctxt->exception.error_code);
++	else
++		kvm_queue_exception(vcpu, ctxt->exception.vector);
++	return false;
++}
++
++static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
++{
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++	int cs_db, cs_l;
++
++	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
++
++	ctxt->eflags = kvm_get_rflags(vcpu);
++	ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
++
++	ctxt->eip = kvm_rip_read(vcpu);
++	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
++		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
++		     (cs_l && is_long_mode(vcpu))	? X86EMUL_MODE_PROT64 :
++		     cs_db				? X86EMUL_MODE_PROT32 :
++							  X86EMUL_MODE_PROT16;
++	BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
++	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
++	BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
++
++	init_decode_cache(ctxt);
++	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
++}
++
++int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
++{
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++	int ret;
++
++	init_emulate_ctxt(vcpu);
++
++	ctxt->op_bytes = 2;
++	ctxt->ad_bytes = 2;
++	ctxt->_eip = ctxt->eip + inc_eip;
++	ret = emulate_int_real(ctxt, irq);
++
++	if (ret != X86EMUL_CONTINUE)
++		return EMULATE_FAIL;
++
++	ctxt->eip = ctxt->_eip;
++	kvm_rip_write(vcpu, ctxt->eip);
++	kvm_set_rflags(vcpu, ctxt->eflags);
++
++	return EMULATE_DONE;
++}
++EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
++
++static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
++{
++	int r = EMULATE_DONE;
++
++	++vcpu->stat.insn_emulation_fail;
++	trace_kvm_emulate_insn_failed(vcpu);
++
++	if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
++		return EMULATE_FAIL;
++
++	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
++		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
++		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
++		vcpu->run->internal.ndata = 0;
++		r = EMULATE_USER_EXIT;
++	}
++
++	kvm_queue_exception(vcpu, UD_VECTOR);
++
++	return r;
++}
++
++static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
++				  bool write_fault_to_shadow_pgtable,
++				  int emulation_type)
++{
++	gpa_t gpa = cr2;
++	kvm_pfn_t pfn;
++
++	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
++		return false;
++
++	if (WARN_ON_ONCE(is_guest_mode(vcpu)))
++		return false;
++
++	if (!vcpu->arch.mmu.direct_map) {
++		/*
++		 * Write permission should be allowed since only
++		 * write access need to be emulated.
++		 */
++		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
++
++		/*
++		 * If the mapping is invalid in guest, let cpu retry
++		 * it to generate fault.
++		 */
++		if (gpa == UNMAPPED_GVA)
++			return true;
++	}
++
++	/*
++	 * Do not retry the unhandleable instruction if it faults on the
++	 * readonly host memory, otherwise it will goto a infinite loop:
++	 * retry instruction -> write #PF -> emulation fail -> retry
++	 * instruction -> ...
++	 */
++	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
++
++	/*
++	 * If the instruction failed on the error pfn, it can not be fixed,
++	 * report the error to userspace.
++	 */
++	if (is_error_noslot_pfn(pfn))
++		return false;
++
++	kvm_release_pfn_clean(pfn);
++
++	/* The instructions are well-emulated on direct mmu. */
++	if (vcpu->arch.mmu.direct_map) {
++		unsigned int indirect_shadow_pages;
++
++		spin_lock(&vcpu->kvm->mmu_lock);
++		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
++		spin_unlock(&vcpu->kvm->mmu_lock);
++
++		if (indirect_shadow_pages)
++			kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
++
++		return true;
++	}
++
++	/*
++	 * if emulation was due to access to shadowed page table
++	 * and it failed try to unshadow page and re-enter the
++	 * guest to let CPU execute the instruction.
++	 */
++	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
++
++	/*
++	 * If the access faults on its page table, it can not
++	 * be fixed by unprotecting shadow page and it should
++	 * be reported to userspace.
++	 */
++	return !write_fault_to_shadow_pgtable;
++}
++
++static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
++			      unsigned long cr2,  int emulation_type)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
++
++	last_retry_eip = vcpu->arch.last_retry_eip;
++	last_retry_addr = vcpu->arch.last_retry_addr;
++
++	/*
++	 * If the emulation is caused by #PF and it is non-page_table
++	 * writing instruction, it means the VM-EXIT is caused by shadow
++	 * page protected, we can zap the shadow page and retry this
++	 * instruction directly.
++	 *
++	 * Note: if the guest uses a non-page-table modifying instruction
++	 * on the PDE that points to the instruction, then we will unmap
++	 * the instruction and go to an infinite loop. So, we cache the
++	 * last retried eip and the last fault address, if we meet the eip
++	 * and the address again, we can break out of the potential infinite
++	 * loop.
++	 */
++	vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
++
++	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
++		return false;
++
++	if (WARN_ON_ONCE(is_guest_mode(vcpu)))
++		return false;
++
++	if (x86_page_table_writing_insn(ctxt))
++		return false;
++
++	if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
++		return false;
++
++	vcpu->arch.last_retry_eip = ctxt->eip;
++	vcpu->arch.last_retry_addr = cr2;
++
++	if (!vcpu->arch.mmu.direct_map)
++		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
++
++	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
++
++	return true;
++}
++
++static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
++static int complete_emulated_pio(struct kvm_vcpu *vcpu);
++
++static void kvm_smm_changed(struct kvm_vcpu *vcpu)
++{
++	if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
++		/* This is a good place to trace that we are exiting SMM.  */
++		trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
++
++		/* Process a latched INIT or SMI, if any.  */
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++	}
++
++	kvm_mmu_reset_context(vcpu);
++}
++
++static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
++{
++	unsigned changed = vcpu->arch.hflags ^ emul_flags;
++
++	vcpu->arch.hflags = emul_flags;
++
++	if (changed & HF_SMM_MASK)
++		kvm_smm_changed(vcpu);
++}
++
++static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
++				unsigned long *db)
++{
++	u32 dr6 = 0;
++	int i;
++	u32 enable, rwlen;
++
++	enable = dr7;
++	rwlen = dr7 >> 16;
++	for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
++		if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
++			dr6 |= (1 << i);
++	return dr6;
++}
++
++static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
++{
++	struct kvm_run *kvm_run = vcpu->run;
++
++	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
++		kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
++		kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
++		kvm_run->debug.arch.exception = DB_VECTOR;
++		kvm_run->exit_reason = KVM_EXIT_DEBUG;
++		*r = EMULATE_USER_EXIT;
++	} else {
++		/*
++		 * "Certain debug exceptions may clear bit 0-3.  The
++		 * remaining contents of the DR6 register are never
++		 * cleared by the processor".
++		 */
++		vcpu->arch.dr6 &= ~15;
++		vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
++		kvm_queue_exception(vcpu, DB_VECTOR);
++	}
++}
++
++int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
++{
++	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
++	int r = EMULATE_DONE;
++
++	kvm_x86_ops->skip_emulated_instruction(vcpu);
++
++	/*
++	 * rflags is the old, "raw" value of the flags.  The new value has
++	 * not been saved yet.
++	 *
++	 * This is correct even for TF set by the guest, because "the
++	 * processor will not generate this exception after the instruction
++	 * that sets the TF flag".
++	 */
++	if (unlikely(rflags & X86_EFLAGS_TF))
++		kvm_vcpu_do_singlestep(vcpu, &r);
++	return r == EMULATE_DONE;
++}
++EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
++
++static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
++{
++	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
++	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
++		struct kvm_run *kvm_run = vcpu->run;
++		unsigned long eip = kvm_get_linear_rip(vcpu);
++		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
++					   vcpu->arch.guest_debug_dr7,
++					   vcpu->arch.eff_db);
++
++		if (dr6 != 0) {
++			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
++			kvm_run->debug.arch.pc = eip;
++			kvm_run->debug.arch.exception = DB_VECTOR;
++			kvm_run->exit_reason = KVM_EXIT_DEBUG;
++			*r = EMULATE_USER_EXIT;
++			return true;
++		}
++	}
++
++	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
++	    !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
++		unsigned long eip = kvm_get_linear_rip(vcpu);
++		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
++					   vcpu->arch.dr7,
++					   vcpu->arch.db);
++
++		if (dr6 != 0) {
++			vcpu->arch.dr6 &= ~15;
++			vcpu->arch.dr6 |= dr6 | DR6_RTM;
++			kvm_queue_exception(vcpu, DB_VECTOR);
++			*r = EMULATE_DONE;
++			return true;
++		}
++	}
++
++	return false;
++}
++
++static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
++{
++	switch (ctxt->opcode_len) {
++	case 1:
++		switch (ctxt->b) {
++		case 0xe4:	/* IN */
++		case 0xe5:
++		case 0xec:
++		case 0xed:
++		case 0xe6:	/* OUT */
++		case 0xe7:
++		case 0xee:
++		case 0xef:
++		case 0x6c:	/* INS */
++		case 0x6d:
++		case 0x6e:	/* OUTS */
++		case 0x6f:
++			return true;
++		}
++		break;
++	case 2:
++		switch (ctxt->b) {
++		case 0x33:	/* RDPMC */
++			return true;
++		}
++		break;
++	}
++
++	return false;
++}
++
++int x86_emulate_instruction(struct kvm_vcpu *vcpu,
++			    unsigned long cr2,
++			    int emulation_type,
++			    void *insn,
++			    int insn_len)
++{
++	int r;
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++	bool writeback = true;
++	bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
++
++	vcpu->arch.l1tf_flush_l1d = true;
++
++	/*
++	 * Clear write_fault_to_shadow_pgtable here to ensure it is
++	 * never reused.
++	 */
++	vcpu->arch.write_fault_to_shadow_pgtable = false;
++	kvm_clear_exception_queue(vcpu);
++
++	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
++		init_emulate_ctxt(vcpu);
++
++		/*
++		 * We will reenter on the same instruction since
++		 * we do not set complete_userspace_io.  This does not
++		 * handle watchpoints yet, those would be handled in
++		 * the emulate_ops.
++		 */
++		if (!(emulation_type & EMULTYPE_SKIP) &&
++		    kvm_vcpu_check_breakpoint(vcpu, &r))
++			return r;
++
++		ctxt->interruptibility = 0;
++		ctxt->have_exception = false;
++		ctxt->exception.vector = -1;
++		ctxt->perm_ok = false;
++
++		ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
++
++		r = x86_decode_insn(ctxt, insn, insn_len);
++
++		trace_kvm_emulate_insn_start(vcpu);
++		++vcpu->stat.insn_emulation;
++		if (r != EMULATION_OK)  {
++			if (emulation_type & EMULTYPE_TRAP_UD)
++				return EMULATE_FAIL;
++			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
++						emulation_type))
++				return EMULATE_DONE;
++			if (ctxt->have_exception) {
++				/*
++				 * #UD should result in just EMULATION_FAILED, and trap-like
++				 * exception should not be encountered during decode.
++				 */
++				WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
++					     exception_type(ctxt->exception.vector) == EXCPT_TRAP);
++				inject_emulated_exception(vcpu);
++				return EMULATE_DONE;
++			}
++			if (emulation_type & EMULTYPE_SKIP)
++				return EMULATE_FAIL;
++			return handle_emulation_failure(vcpu, emulation_type);
++		}
++	}
++
++	if ((emulation_type & EMULTYPE_VMWARE) &&
++	    !is_vmware_backdoor_opcode(ctxt))
++		return EMULATE_FAIL;
++
++	if (emulation_type & EMULTYPE_SKIP) {
++		kvm_rip_write(vcpu, ctxt->_eip);
++		if (ctxt->eflags & X86_EFLAGS_RF)
++			kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
++		return EMULATE_DONE;
++	}
++
++	if (retry_instruction(ctxt, cr2, emulation_type))
++		return EMULATE_DONE;
++
++	/* this is needed for vmware backdoor interface to work since it
++	   changes registers values  during IO operation */
++	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
++		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
++		emulator_invalidate_register_cache(ctxt);
++	}
++
++restart:
++	/* Save the faulting GPA (cr2) in the address field */
++	ctxt->exception.address = cr2;
++
++	r = x86_emulate_insn(ctxt);
++
++	if (r == EMULATION_INTERCEPTED)
++		return EMULATE_DONE;
++
++	if (r == EMULATION_FAILED) {
++		if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
++					emulation_type))
++			return EMULATE_DONE;
++
++		return handle_emulation_failure(vcpu, emulation_type);
++	}
++
++	if (ctxt->have_exception) {
++		r = EMULATE_DONE;
++		if (inject_emulated_exception(vcpu))
++			return r;
++	} else if (vcpu->arch.pio.count) {
++		if (!vcpu->arch.pio.in) {
++			/* FIXME: return into emulator if single-stepping.  */
++			vcpu->arch.pio.count = 0;
++		} else {
++			writeback = false;
++			vcpu->arch.complete_userspace_io = complete_emulated_pio;
++		}
++		r = EMULATE_USER_EXIT;
++	} else if (vcpu->mmio_needed) {
++		if (!vcpu->mmio_is_write)
++			writeback = false;
++		r = EMULATE_USER_EXIT;
++		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
++	} else if (r == EMULATION_RESTART)
++		goto restart;
++	else
++		r = EMULATE_DONE;
++
++	if (writeback) {
++		unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
++		toggle_interruptibility(vcpu, ctxt->interruptibility);
++		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
++		if (!ctxt->have_exception ||
++		    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
++			kvm_rip_write(vcpu, ctxt->eip);
++			if (r == EMULATE_DONE && ctxt->tf)
++				kvm_vcpu_do_singlestep(vcpu, &r);
++			__kvm_set_rflags(vcpu, ctxt->eflags);
++		}
++
++		/*
++		 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
++		 * do nothing, and it will be requested again as soon as
++		 * the shadow expires.  But we still need to check here,
++		 * because POPF has no interrupt shadow.
++		 */
++		if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
++			kvm_make_request(KVM_REQ_EVENT, vcpu);
++	} else
++		vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
++
++	return r;
++}
++
++int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
++{
++	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
++
++int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
++					void *insn, int insn_len)
++{
++	return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
++
++static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.pio.count = 0;
++	return 1;
++}
++
++static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.pio.count = 0;
++
++	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
++		return 1;
++
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
++			    unsigned short port)
++{
++	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
++	int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
++					    size, port, &val, 1);
++	if (ret)
++		return ret;
++
++	/*
++	 * Workaround userspace that relies on old KVM behavior of %rip being
++	 * incremented prior to exiting to userspace to handle "OUT 0x7e".
++	 */
++	if (port == 0x7e &&
++	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
++		vcpu->arch.complete_userspace_io =
++			complete_fast_pio_out_port_0x7e;
++		kvm_skip_emulated_instruction(vcpu);
++	} else {
++		vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
++		vcpu->arch.complete_userspace_io = complete_fast_pio_out;
++	}
++	return 0;
++}
++
++static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
++{
++	unsigned long val;
++
++	/* We should only ever be called with arch.pio.count equal to 1 */
++	BUG_ON(vcpu->arch.pio.count != 1);
++
++	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
++		vcpu->arch.pio.count = 0;
++		return 1;
++	}
++
++	/* For size less than 4 we merge, else we zero extend */
++	val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
++					: 0;
++
++	/*
++	 * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
++	 * the copy and tracing
++	 */
++	emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
++				 vcpu->arch.pio.port, &val, 1);
++	kvm_register_write(vcpu, VCPU_REGS_RAX, val);
++
++	return kvm_skip_emulated_instruction(vcpu);
++}
++
++static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
++			   unsigned short port)
++{
++	unsigned long val;
++	int ret;
++
++	/* For size less than 4 we merge, else we zero extend */
++	val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
++
++	ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
++				       &val, 1);
++	if (ret) {
++		kvm_register_write(vcpu, VCPU_REGS_RAX, val);
++		return ret;
++	}
++
++	vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
++	vcpu->arch.complete_userspace_io = complete_fast_pio_in;
++
++	return 0;
++}
++
++int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
++{
++	int ret;
++
++	if (in)
++		ret = kvm_fast_pio_in(vcpu, size, port);
++	else
++		ret = kvm_fast_pio_out(vcpu, size, port);
++	return ret && kvm_skip_emulated_instruction(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_fast_pio);
++
++static int kvmclock_cpu_down_prep(unsigned int cpu)
++{
++	__this_cpu_write(cpu_tsc_khz, 0);
++	return 0;
++}
++
++static void tsc_khz_changed(void *data)
++{
++	struct cpufreq_freqs *freq = data;
++	unsigned long khz = 0;
++
++	if (data)
++		khz = freq->new;
++	else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
++		khz = cpufreq_quick_get(raw_smp_processor_id());
++	if (!khz)
++		khz = tsc_khz;
++	__this_cpu_write(cpu_tsc_khz, khz);
++}
++
++#ifdef CONFIG_X86_64
++static void kvm_hyperv_tsc_notifier(void)
++{
++	struct kvm *kvm;
++	struct kvm_vcpu *vcpu;
++	int cpu;
++
++	mutex_lock(&kvm_lock);
++	list_for_each_entry(kvm, &vm_list, vm_list)
++		kvm_make_mclock_inprogress_request(kvm);
++
++	hyperv_stop_tsc_emulation();
++
++	/* TSC frequency always matches when on Hyper-V */
++	for_each_present_cpu(cpu)
++		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
++	kvm_max_guest_tsc_khz = tsc_khz;
++
++	list_for_each_entry(kvm, &vm_list, vm_list) {
++		struct kvm_arch *ka = &kvm->arch;
++
++		spin_lock(&ka->pvclock_gtod_sync_lock);
++
++		pvclock_update_vm_gtod_copy(kvm);
++
++		kvm_for_each_vcpu(cpu, vcpu, kvm)
++			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++
++		kvm_for_each_vcpu(cpu, vcpu, kvm)
++			kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
++
++		spin_unlock(&ka->pvclock_gtod_sync_lock);
++	}
++	mutex_unlock(&kvm_lock);
++}
++#endif
++
++static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
++				     void *data)
++{
++	struct cpufreq_freqs *freq = data;
++	struct kvm *kvm;
++	struct kvm_vcpu *vcpu;
++	int i, send_ipi = 0;
++
++	/*
++	 * We allow guests to temporarily run on slowing clocks,
++	 * provided we notify them after, or to run on accelerating
++	 * clocks, provided we notify them before.  Thus time never
++	 * goes backwards.
++	 *
++	 * However, we have a problem.  We can't atomically update
++	 * the frequency of a given CPU from this function; it is
++	 * merely a notifier, which can be called from any CPU.
++	 * Changing the TSC frequency at arbitrary points in time
++	 * requires a recomputation of local variables related to
++	 * the TSC for each VCPU.  We must flag these local variables
++	 * to be updated and be sure the update takes place with the
++	 * new frequency before any guests proceed.
++	 *
++	 * Unfortunately, the combination of hotplug CPU and frequency
++	 * change creates an intractable locking scenario; the order
++	 * of when these callouts happen is undefined with respect to
++	 * CPU hotplug, and they can race with each other.  As such,
++	 * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is
++	 * undefined; you can actually have a CPU frequency change take
++	 * place in between the computation of X and the setting of the
++	 * variable.  To protect against this problem, all updates of
++	 * the per_cpu tsc_khz variable are done in an interrupt
++	 * protected IPI, and all callers wishing to update the value
++	 * must wait for a synchronous IPI to complete (which is trivial
++	 * if the caller is on the CPU already).  This establishes the
++	 * necessary total order on variable updates.
++	 *
++	 * Note that because a guest time update may take place
++	 * anytime after the setting of the VCPU's request bit, the
++	 * correct TSC value must be set before the request.  However,
++	 * to ensure the update actually makes it to any guest which
++	 * starts running in hardware virtualization between the set
++	 * and the acquisition of the spinlock, we must also ping the
++	 * CPU after setting the request bit.
++	 *
++	 */
++
++	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
++		return 0;
++	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
++		return 0;
++
++	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
++
++	mutex_lock(&kvm_lock);
++	list_for_each_entry(kvm, &vm_list, vm_list) {
++		kvm_for_each_vcpu(i, vcpu, kvm) {
++			if (vcpu->cpu != freq->cpu)
++				continue;
++			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++			if (vcpu->cpu != raw_smp_processor_id())
++				send_ipi = 1;
++		}
++	}
++	mutex_unlock(&kvm_lock);
++
++	if (freq->old < freq->new && send_ipi) {
++		/*
++		 * We upscale the frequency.  Must make the guest
++		 * doesn't see old kvmclock values while running with
++		 * the new frequency, otherwise we risk the guest sees
++		 * time go backwards.
++		 *
++		 * In case we update the frequency for another cpu
++		 * (which might be in guest context) send an interrupt
++		 * to kick the cpu out of guest context.  Next time
++		 * guest context is entered kvmclock will be updated,
++		 * so the guest will not see stale values.
++		 */
++		smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
++	}
++	return 0;
++}
++
++static struct notifier_block kvmclock_cpufreq_notifier_block = {
++	.notifier_call  = kvmclock_cpufreq_notifier
++};
++
++static int kvmclock_cpu_online(unsigned int cpu)
++{
++	tsc_khz_changed(NULL);
++	return 0;
++}
++
++static void kvm_timer_init(void)
++{
++	max_tsc_khz = tsc_khz;
++
++	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
++#ifdef CONFIG_CPU_FREQ
++		struct cpufreq_policy policy;
++		int cpu;
++
++		memset(&policy, 0, sizeof(policy));
++		cpu = get_cpu();
++		cpufreq_get_policy(&policy, cpu);
++		if (policy.cpuinfo.max_freq)
++			max_tsc_khz = policy.cpuinfo.max_freq;
++		put_cpu();
++#endif
++		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
++					  CPUFREQ_TRANSITION_NOTIFIER);
++	}
++	pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
++
++	cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
++			  kvmclock_cpu_online, kvmclock_cpu_down_prep);
++}
++
++DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
++EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
++
++int kvm_is_in_guest(void)
++{
++	return __this_cpu_read(current_vcpu) != NULL;
++}
++
++static int kvm_is_user_mode(void)
++{
++	int user_mode = 3;
++
++	if (__this_cpu_read(current_vcpu))
++		user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
++
++	return user_mode != 0;
++}
++
++static unsigned long kvm_get_guest_ip(void)
++{
++	unsigned long ip = 0;
++
++	if (__this_cpu_read(current_vcpu))
++		ip = kvm_rip_read(__this_cpu_read(current_vcpu));
++
++	return ip;
++}
++
++static struct perf_guest_info_callbacks kvm_guest_cbs = {
++	.is_in_guest		= kvm_is_in_guest,
++	.is_user_mode		= kvm_is_user_mode,
++	.get_guest_ip		= kvm_get_guest_ip,
++};
++
++static void kvm_set_mmio_spte_mask(void)
++{
++	u64 mask;
++	int maxphyaddr = boot_cpu_data.x86_phys_bits;
++
++	/*
++	 * Set the reserved bits and the present bit of an paging-structure
++	 * entry to generate page fault with PFER.RSV = 1.
++	 */
++
++	/*
++	 * Mask the uppermost physical address bit, which would be reserved as
++	 * long as the supported physical address width is less than 52.
++	 */
++	mask = 1ull << 51;
++
++	/* Set the present bit. */
++	mask |= 1ull;
++
++	/*
++	 * If reserved bit is not supported, clear the present bit to disable
++	 * mmio page fault.
++	 */
++	if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
++		mask &= ~1ull;
++
++	kvm_mmu_set_mmio_spte_mask(mask, mask);
++}
++
++#ifdef CONFIG_X86_64
++static void pvclock_gtod_update_fn(struct work_struct *work)
++{
++	struct kvm *kvm;
++
++	struct kvm_vcpu *vcpu;
++	int i;
++
++	mutex_lock(&kvm_lock);
++	list_for_each_entry(kvm, &vm_list, vm_list)
++		kvm_for_each_vcpu(i, vcpu, kvm)
++			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
++	atomic_set(&kvm_guest_has_master_clock, 0);
++	mutex_unlock(&kvm_lock);
++}
++
++static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
++
++/*
++ * Notification about pvclock gtod data update.
++ */
++static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
++			       void *priv)
++{
++	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
++	struct timekeeper *tk = priv;
++
++	update_pvclock_gtod(tk);
++
++	/* disable master clock if host does not trust, or does not
++	 * use, TSC based clocksource.
++	 */
++	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
++	    atomic_read(&kvm_guest_has_master_clock) != 0)
++		queue_work(system_long_wq, &pvclock_gtod_work);
++
++	return 0;
++}
++
++static struct notifier_block pvclock_gtod_notifier = {
++	.notifier_call = pvclock_gtod_notify,
++};
++#endif
++
++int kvm_arch_init(void *opaque)
++{
++	int r;
++	struct kvm_x86_ops *ops = opaque;
++
++	if (kvm_x86_ops) {
++		printk(KERN_ERR "kvm: already loaded the other module\n");
++		r = -EEXIST;
++		goto out;
++	}
++
++	if (!ops->cpu_has_kvm_support()) {
++		printk(KERN_ERR "kvm: no hardware support\n");
++		r = -EOPNOTSUPP;
++		goto out;
++	}
++	if (ops->disabled_by_bios()) {
++		printk(KERN_ERR "kvm: disabled by bios\n");
++		r = -EOPNOTSUPP;
++		goto out;
++	}
++
++	r = -ENOMEM;
++	shared_msrs = alloc_percpu(struct kvm_shared_msrs);
++	if (!shared_msrs) {
++		printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
++		goto out;
++	}
++
++	r = kvm_mmu_module_init();
++	if (r)
++		goto out_free_percpu;
++
++	kvm_set_mmio_spte_mask();
++
++	kvm_x86_ops = ops;
++
++	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
++			PT_DIRTY_MASK, PT64_NX_MASK, 0,
++			PT_PRESENT_MASK, 0, sme_me_mask);
++	kvm_timer_init();
++
++	perf_register_guest_info_callbacks(&kvm_guest_cbs);
++
++	if (boot_cpu_has(X86_FEATURE_XSAVE))
++		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
++
++	kvm_lapic_init();
++#ifdef CONFIG_X86_64
++	pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
++
++	if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
++		set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
++#endif
++
++	return 0;
++
++out_free_percpu:
++	free_percpu(shared_msrs);
++out:
++	return r;
++}
++
++void kvm_arch_exit(void)
++{
++#ifdef CONFIG_X86_64
++	if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
++		clear_hv_tscchange_cb();
++#endif
++	kvm_lapic_exit();
++	perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
++
++	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
++		cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
++					    CPUFREQ_TRANSITION_NOTIFIER);
++	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
++#ifdef CONFIG_X86_64
++	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
++#endif
++	kvm_x86_ops = NULL;
++	kvm_mmu_module_exit();
++	free_percpu(shared_msrs);
++}
++
++int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
++{
++	++vcpu->stat.halt_exits;
++	if (lapic_in_kernel(vcpu)) {
++		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
++		return 1;
++	} else {
++		vcpu->run->exit_reason = KVM_EXIT_HLT;
++		return 0;
++	}
++}
++EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
++
++int kvm_emulate_halt(struct kvm_vcpu *vcpu)
++{
++	int ret = kvm_skip_emulated_instruction(vcpu);
++	/*
++	 * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
++	 * KVM_EXIT_DEBUG here.
++	 */
++	return kvm_vcpu_halt(vcpu) && ret;
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_halt);
++
++#ifdef CONFIG_X86_64
++static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
++			        unsigned long clock_type)
++{
++	struct kvm_clock_pairing clock_pairing;
++	struct timespec64 ts;
++	u64 cycle;
++	int ret;
++
++	if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
++		return -KVM_EOPNOTSUPP;
++
++	if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
++		return -KVM_EOPNOTSUPP;
++
++	clock_pairing.sec = ts.tv_sec;
++	clock_pairing.nsec = ts.tv_nsec;
++	clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
++	clock_pairing.flags = 0;
++	memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
++
++	ret = 0;
++	if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
++			    sizeof(struct kvm_clock_pairing)))
++		ret = -KVM_EFAULT;
++
++	return ret;
++}
++#endif
++
++/*
++ * kvm_pv_kick_cpu_op:  Kick a vcpu.
++ *
++ * @apicid - apicid of vcpu to be kicked.
++ */
++static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
++{
++	struct kvm_lapic_irq lapic_irq;
++
++	lapic_irq.shorthand = 0;
++	lapic_irq.dest_mode = 0;
++	lapic_irq.level = 0;
++	lapic_irq.dest_id = apicid;
++	lapic_irq.msi_redir_hint = false;
++
++	lapic_irq.delivery_mode = APIC_DM_REMRD;
++	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
++}
++
++void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.apicv_active = false;
++	kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
++}
++
++int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
++{
++	unsigned long nr, a0, a1, a2, a3, ret;
++	int op_64_bit;
++
++	if (kvm_hv_hypercall_enabled(vcpu->kvm))
++		return kvm_hv_hypercall(vcpu);
++
++	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
++	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
++	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
++	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
++	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
++
++	trace_kvm_hypercall(nr, a0, a1, a2, a3);
++
++	op_64_bit = is_64_bit_mode(vcpu);
++	if (!op_64_bit) {
++		nr &= 0xFFFFFFFF;
++		a0 &= 0xFFFFFFFF;
++		a1 &= 0xFFFFFFFF;
++		a2 &= 0xFFFFFFFF;
++		a3 &= 0xFFFFFFFF;
++	}
++
++	if (kvm_x86_ops->get_cpl(vcpu) != 0) {
++		ret = -KVM_EPERM;
++		goto out;
++	}
++
++	switch (nr) {
++	case KVM_HC_VAPIC_POLL_IRQ:
++		ret = 0;
++		break;
++	case KVM_HC_KICK_CPU:
++		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
++		ret = 0;
++		break;
++#ifdef CONFIG_X86_64
++	case KVM_HC_CLOCK_PAIRING:
++		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
++		break;
++#endif
++	case KVM_HC_SEND_IPI:
++		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
++		break;
++	default:
++		ret = -KVM_ENOSYS;
++		break;
++	}
++out:
++	if (!op_64_bit)
++		ret = (u32)ret;
++	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
++
++	++vcpu->stat.hypercalls;
++	return kvm_skip_emulated_instruction(vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
++
++static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
++{
++	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
++	char instruction[3];
++	unsigned long rip = kvm_rip_read(vcpu);
++
++	kvm_x86_ops->patch_hypercall(vcpu, instruction);
++
++	return emulator_write_emulated(ctxt, rip, instruction, 3,
++		&ctxt->exception);
++}
++
++static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
++{
++	return vcpu->run->request_interrupt_window &&
++		likely(!pic_in_kernel(vcpu->kvm));
++}
++
++static void post_kvm_run_save(struct kvm_vcpu *vcpu)
++{
++	struct kvm_run *kvm_run = vcpu->run;
++
++	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
++	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
++	kvm_run->cr8 = kvm_get_cr8(vcpu);
++	kvm_run->apic_base = kvm_get_apic_base(vcpu);
++	kvm_run->ready_for_interrupt_injection =
++		pic_in_kernel(vcpu->kvm) ||
++		kvm_vcpu_ready_for_interrupt_injection(vcpu);
++}
++
++static void update_cr8_intercept(struct kvm_vcpu *vcpu)
++{
++	int max_irr, tpr;
++
++	if (!kvm_x86_ops->update_cr8_intercept)
++		return;
++
++	if (!lapic_in_kernel(vcpu))
++		return;
++
++	if (vcpu->arch.apicv_active)
++		return;
++
++	if (!vcpu->arch.apic->vapic_addr)
++		max_irr = kvm_lapic_find_highest_irr(vcpu);
++	else
++		max_irr = -1;
++
++	if (max_irr != -1)
++		max_irr >>= 4;
++
++	tpr = kvm_lapic_get_cr8(vcpu);
++
++	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
++}
++
++static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
++{
++	int r;
++
++	/* try to reinject previous events if any */
++
++	if (vcpu->arch.exception.injected)
++		kvm_x86_ops->queue_exception(vcpu);
++	/*
++	 * Do not inject an NMI or interrupt if there is a pending
++	 * exception.  Exceptions and interrupts are recognized at
++	 * instruction boundaries, i.e. the start of an instruction.
++	 * Trap-like exceptions, e.g. #DB, have higher priority than
++	 * NMIs and interrupts, i.e. traps are recognized before an
++	 * NMI/interrupt that's pending on the same instruction.
++	 * Fault-like exceptions, e.g. #GP and #PF, are the lowest
++	 * priority, but are only generated (pended) during instruction
++	 * execution, i.e. a pending fault-like exception means the
++	 * fault occurred on the *previous* instruction and must be
++	 * serviced prior to recognizing any new events in order to
++	 * fully complete the previous instruction.
++	 */
++	else if (!vcpu->arch.exception.pending) {
++		if (vcpu->arch.nmi_injected)
++			kvm_x86_ops->set_nmi(vcpu);
++		else if (vcpu->arch.interrupt.injected)
++			kvm_x86_ops->set_irq(vcpu);
++	}
++
++	/*
++	 * Call check_nested_events() even if we reinjected a previous event
++	 * in order for caller to determine if it should require immediate-exit
++	 * from L2 to L1 due to pending L1 events which require exit
++	 * from L2 to L1.
++	 */
++	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
++		r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
++		if (r != 0)
++			return r;
++	}
++
++	/* try to inject new event if pending */
++	if (vcpu->arch.exception.pending) {
++		trace_kvm_inj_exception(vcpu->arch.exception.nr,
++					vcpu->arch.exception.has_error_code,
++					vcpu->arch.exception.error_code);
++
++		WARN_ON_ONCE(vcpu->arch.exception.injected);
++		vcpu->arch.exception.pending = false;
++		vcpu->arch.exception.injected = true;
++
++		if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
++			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
++					     X86_EFLAGS_RF);
++
++		if (vcpu->arch.exception.nr == DB_VECTOR &&
++		    (vcpu->arch.dr7 & DR7_GD)) {
++			vcpu->arch.dr7 &= ~DR7_GD;
++			kvm_update_dr7(vcpu);
++		}
++
++		kvm_x86_ops->queue_exception(vcpu);
++	}
++
++	/* Don't consider new event if we re-injected an event */
++	if (kvm_event_needs_reinjection(vcpu))
++		return 0;
++
++	if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
++	    kvm_x86_ops->smi_allowed(vcpu)) {
++		vcpu->arch.smi_pending = false;
++		++vcpu->arch.smi_count;
++		enter_smm(vcpu);
++	} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
++		--vcpu->arch.nmi_pending;
++		vcpu->arch.nmi_injected = true;
++		kvm_x86_ops->set_nmi(vcpu);
++	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
++		/*
++		 * Because interrupts can be injected asynchronously, we are
++		 * calling check_nested_events again here to avoid a race condition.
++		 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
++		 * proposal and current concerns.  Perhaps we should be setting
++		 * KVM_REQ_EVENT only on certain events and not unconditionally?
++		 */
++		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
++			r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
++			if (r != 0)
++				return r;
++		}
++		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
++			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
++					    false);
++			kvm_x86_ops->set_irq(vcpu);
++		}
++	}
++
++	return 0;
++}
++
++static void process_nmi(struct kvm_vcpu *vcpu)
++{
++	unsigned limit = 2;
++
++	/*
++	 * x86 is limited to one NMI running, and one NMI pending after it.
++	 * If an NMI is already in progress, limit further NMIs to just one.
++	 * Otherwise, allow two (and we'll inject the first one immediately).
++	 */
++	if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
++		limit = 1;
++
++	vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
++	vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++}
++
++static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
++{
++	u32 flags = 0;
++	flags |= seg->g       << 23;
++	flags |= seg->db      << 22;
++	flags |= seg->l       << 21;
++	flags |= seg->avl     << 20;
++	flags |= seg->present << 15;
++	flags |= seg->dpl     << 13;
++	flags |= seg->s       << 12;
++	flags |= seg->type    << 8;
++	return flags;
++}
++
++static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
++{
++	struct kvm_segment seg;
++	int offset;
++
++	kvm_get_segment(vcpu, &seg, n);
++	put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
++
++	if (n < 3)
++		offset = 0x7f84 + n * 12;
++	else
++		offset = 0x7f2c + (n - 3) * 12;
++
++	put_smstate(u32, buf, offset + 8, seg.base);
++	put_smstate(u32, buf, offset + 4, seg.limit);
++	put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
++}
++
++#ifdef CONFIG_X86_64
++static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
++{
++	struct kvm_segment seg;
++	int offset;
++	u16 flags;
++
++	kvm_get_segment(vcpu, &seg, n);
++	offset = 0x7e00 + n * 16;
++
++	flags = enter_smm_get_segment_flags(&seg) >> 8;
++	put_smstate(u16, buf, offset, seg.selector);
++	put_smstate(u16, buf, offset + 2, flags);
++	put_smstate(u32, buf, offset + 4, seg.limit);
++	put_smstate(u64, buf, offset + 8, seg.base);
++}
++#endif
++
++static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
++{
++	struct desc_ptr dt;
++	struct kvm_segment seg;
++	unsigned long val;
++	int i;
++
++	put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
++	put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
++	put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
++	put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
++
++	for (i = 0; i < 8; i++)
++		put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
++
++	kvm_get_dr(vcpu, 6, &val);
++	put_smstate(u32, buf, 0x7fcc, (u32)val);
++	kvm_get_dr(vcpu, 7, &val);
++	put_smstate(u32, buf, 0x7fc8, (u32)val);
++
++	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
++	put_smstate(u32, buf, 0x7fc4, seg.selector);
++	put_smstate(u32, buf, 0x7f64, seg.base);
++	put_smstate(u32, buf, 0x7f60, seg.limit);
++	put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
++
++	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
++	put_smstate(u32, buf, 0x7fc0, seg.selector);
++	put_smstate(u32, buf, 0x7f80, seg.base);
++	put_smstate(u32, buf, 0x7f7c, seg.limit);
++	put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
++
++	kvm_x86_ops->get_gdt(vcpu, &dt);
++	put_smstate(u32, buf, 0x7f74, dt.address);
++	put_smstate(u32, buf, 0x7f70, dt.size);
++
++	kvm_x86_ops->get_idt(vcpu, &dt);
++	put_smstate(u32, buf, 0x7f58, dt.address);
++	put_smstate(u32, buf, 0x7f54, dt.size);
++
++	for (i = 0; i < 6; i++)
++		enter_smm_save_seg_32(vcpu, buf, i);
++
++	put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
++
++	/* revision id */
++	put_smstate(u32, buf, 0x7efc, 0x00020000);
++	put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
++}
++
++#ifdef CONFIG_X86_64
++static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
++{
++	struct desc_ptr dt;
++	struct kvm_segment seg;
++	unsigned long val;
++	int i;
++
++	for (i = 0; i < 16; i++)
++		put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
++
++	put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
++	put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
++
++	kvm_get_dr(vcpu, 6, &val);
++	put_smstate(u64, buf, 0x7f68, val);
++	kvm_get_dr(vcpu, 7, &val);
++	put_smstate(u64, buf, 0x7f60, val);
++
++	put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
++	put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
++	put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
++
++	put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
++
++	/* revision id */
++	put_smstate(u32, buf, 0x7efc, 0x00020064);
++
++	put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
++
++	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
++	put_smstate(u16, buf, 0x7e90, seg.selector);
++	put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
++	put_smstate(u32, buf, 0x7e94, seg.limit);
++	put_smstate(u64, buf, 0x7e98, seg.base);
++
++	kvm_x86_ops->get_idt(vcpu, &dt);
++	put_smstate(u32, buf, 0x7e84, dt.size);
++	put_smstate(u64, buf, 0x7e88, dt.address);
++
++	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
++	put_smstate(u16, buf, 0x7e70, seg.selector);
++	put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
++	put_smstate(u32, buf, 0x7e74, seg.limit);
++	put_smstate(u64, buf, 0x7e78, seg.base);
++
++	kvm_x86_ops->get_gdt(vcpu, &dt);
++	put_smstate(u32, buf, 0x7e64, dt.size);
++	put_smstate(u64, buf, 0x7e68, dt.address);
++
++	for (i = 0; i < 6; i++)
++		enter_smm_save_seg_64(vcpu, buf, i);
++}
++#endif
++
++static void enter_smm(struct kvm_vcpu *vcpu)
++{
++	struct kvm_segment cs, ds;
++	struct desc_ptr dt;
++	char buf[512];
++	u32 cr0;
++
++	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
++	memset(buf, 0, 512);
++#ifdef CONFIG_X86_64
++	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
++		enter_smm_save_state_64(vcpu, buf);
++	else
++#endif
++		enter_smm_save_state_32(vcpu, buf);
++
++	/*
++	 * Give pre_enter_smm() a chance to make ISA-specific changes to the
++	 * vCPU state (e.g. leave guest mode) after we've saved the state into
++	 * the SMM state-save area.
++	 */
++	kvm_x86_ops->pre_enter_smm(vcpu, buf);
++
++	vcpu->arch.hflags |= HF_SMM_MASK;
++	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
++
++	if (kvm_x86_ops->get_nmi_mask(vcpu))
++		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
++	else
++		kvm_x86_ops->set_nmi_mask(vcpu, true);
++
++	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
++	kvm_rip_write(vcpu, 0x8000);
++
++	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
++	kvm_x86_ops->set_cr0(vcpu, cr0);
++	vcpu->arch.cr0 = cr0;
++
++	kvm_x86_ops->set_cr4(vcpu, 0);
++
++	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
++	dt.address = dt.size = 0;
++	kvm_x86_ops->set_idt(vcpu, &dt);
++
++	__kvm_set_dr(vcpu, 7, DR7_FIXED_1);
++
++	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
++	cs.base = vcpu->arch.smbase;
++
++	ds.selector = 0;
++	ds.base = 0;
++
++	cs.limit    = ds.limit = 0xffffffff;
++	cs.type     = ds.type = 0x3;
++	cs.dpl      = ds.dpl = 0;
++	cs.db       = ds.db = 0;
++	cs.s        = ds.s = 1;
++	cs.l        = ds.l = 0;
++	cs.g        = ds.g = 1;
++	cs.avl      = ds.avl = 0;
++	cs.present  = ds.present = 1;
++	cs.unusable = ds.unusable = 0;
++	cs.padding  = ds.padding = 0;
++
++	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
++	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
++	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
++	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
++	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
++	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
++
++#ifdef CONFIG_X86_64
++	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
++		kvm_x86_ops->set_efer(vcpu, 0);
++#endif
++
++	kvm_update_cpuid(vcpu);
++	kvm_mmu_reset_context(vcpu);
++}
++
++static void process_smi(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.smi_pending = true;
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++}
++
++void kvm_make_scan_ioapic_request(struct kvm *kvm)
++{
++	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
++}
++
++static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
++{
++	if (!kvm_apic_present(vcpu))
++		return;
++
++	bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
++
++	if (irqchip_split(vcpu->kvm))
++		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
++	else {
++		if (vcpu->arch.apicv_active)
++			kvm_x86_ops->sync_pir_to_irr(vcpu);
++		if (ioapic_in_kernel(vcpu->kvm))
++			kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
++	}
++
++	if (is_guest_mode(vcpu))
++		vcpu->arch.load_eoi_exitmap_pending = true;
++	else
++		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
++}
++
++static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
++{
++	u64 eoi_exit_bitmap[4];
++
++	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
++		return;
++
++	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
++		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
++	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
++}
++
++int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
++		unsigned long start, unsigned long end,
++		bool blockable)
++{
++	unsigned long apic_address;
++
++	/*
++	 * The physical address of apic access page is stored in the VMCS.
++	 * Update it when it becomes invalid.
++	 */
++	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
++	if (start <= apic_address && apic_address < end)
++		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
++
++	return 0;
++}
++
++void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
++{
++	struct page *page = NULL;
++
++	if (!lapic_in_kernel(vcpu))
++		return;
++
++	if (!kvm_x86_ops->set_apic_access_page_addr)
++		return;
++
++	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
++	if (is_error_page(page))
++		return;
++	kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
++
++	/*
++	 * Do not pin apic access page in memory, the MMU notifier
++	 * will call us again if it is migrated or swapped out.
++	 */
++	put_page(page);
++}
++EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
++
++void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
++{
++	smp_send_reschedule(vcpu->cpu);
++}
++EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
++
++/*
++ * Returns 1 to let vcpu_run() continue the guest execution loop without
++ * exiting to the userspace.  Otherwise, the value will be returned to the
++ * userspace.
++ */
++static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
++{
++	int r;
++	bool req_int_win =
++		dm_request_for_irq_injection(vcpu) &&
++		kvm_cpu_accept_dm_intr(vcpu);
++
++	bool req_immediate_exit = false;
++
++	if (kvm_request_pending(vcpu)) {
++		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
++			kvm_x86_ops->get_vmcs12_pages(vcpu);
++		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
++			kvm_mmu_unload(vcpu);
++		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
++			__kvm_migrate_timers(vcpu);
++		if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
++			kvm_gen_update_masterclock(vcpu->kvm);
++		if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
++			kvm_gen_kvmclock_update(vcpu);
++		if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
++			r = kvm_guest_time_update(vcpu);
++			if (unlikely(r))
++				goto out;
++		}
++		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
++			kvm_mmu_sync_roots(vcpu);
++		if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
++			kvm_mmu_load_cr3(vcpu);
++		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
++			kvm_vcpu_flush_tlb(vcpu, true);
++		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
++			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
++			r = 0;
++			goto out;
++		}
++		if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
++			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
++			vcpu->mmio_needed = 0;
++			r = 0;
++			goto out;
++		}
++		if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
++			/* Page is swapped out. Do synthetic halt */
++			vcpu->arch.apf.halted = true;
++			r = 1;
++			goto out;
++		}
++		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
++			record_steal_time(vcpu);
++		if (kvm_check_request(KVM_REQ_SMI, vcpu))
++			process_smi(vcpu);
++		if (kvm_check_request(KVM_REQ_NMI, vcpu))
++			process_nmi(vcpu);
++		if (kvm_check_request(KVM_REQ_PMU, vcpu))
++			kvm_pmu_handle_event(vcpu);
++		if (kvm_check_request(KVM_REQ_PMI, vcpu))
++			kvm_pmu_deliver_pmi(vcpu);
++		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
++			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
++			if (test_bit(vcpu->arch.pending_ioapic_eoi,
++				     vcpu->arch.ioapic_handled_vectors)) {
++				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
++				vcpu->run->eoi.vector =
++						vcpu->arch.pending_ioapic_eoi;
++				r = 0;
++				goto out;
++			}
++		}
++		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
++			vcpu_scan_ioapic(vcpu);
++		if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
++			vcpu_load_eoi_exitmap(vcpu);
++		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
++			kvm_vcpu_reload_apic_access_page(vcpu);
++		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
++			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
++			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
++			r = 0;
++			goto out;
++		}
++		if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
++			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
++			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
++			r = 0;
++			goto out;
++		}
++		if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
++			vcpu->run->exit_reason = KVM_EXIT_HYPERV;
++			vcpu->run->hyperv = vcpu->arch.hyperv.exit;
++			r = 0;
++			goto out;
++		}
++
++		/*
++		 * KVM_REQ_HV_STIMER has to be processed after
++		 * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
++		 * depend on the guest clock being up-to-date
++		 */
++		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
++			kvm_hv_process_stimers(vcpu);
++	}
++
++	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
++		++vcpu->stat.req_event;
++		kvm_apic_accept_events(vcpu);
++		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
++			r = 1;
++			goto out;
++		}
++
++		if (inject_pending_event(vcpu, req_int_win) != 0)
++			req_immediate_exit = true;
++		else {
++			/* Enable SMI/NMI/IRQ window open exits if needed.
++			 *
++			 * SMIs have three cases:
++			 * 1) They can be nested, and then there is nothing to
++			 *    do here because RSM will cause a vmexit anyway.
++			 * 2) There is an ISA-specific reason why SMI cannot be
++			 *    injected, and the moment when this changes can be
++			 *    intercepted.
++			 * 3) Or the SMI can be pending because
++			 *    inject_pending_event has completed the injection
++			 *    of an IRQ or NMI from the previous vmexit, and
++			 *    then we request an immediate exit to inject the
++			 *    SMI.
++			 */
++			if (vcpu->arch.smi_pending && !is_smm(vcpu))
++				if (!kvm_x86_ops->enable_smi_window(vcpu))
++					req_immediate_exit = true;
++			if (vcpu->arch.nmi_pending)
++				kvm_x86_ops->enable_nmi_window(vcpu);
++			if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
++				kvm_x86_ops->enable_irq_window(vcpu);
++			WARN_ON(vcpu->arch.exception.pending);
++		}
++
++		if (kvm_lapic_enabled(vcpu)) {
++			update_cr8_intercept(vcpu);
++			kvm_lapic_sync_to_vapic(vcpu);
++		}
++	}
++
++	r = kvm_mmu_reload(vcpu);
++	if (unlikely(r)) {
++		goto cancel_injection;
++	}
++
++	preempt_disable();
++
++	kvm_x86_ops->prepare_guest_switch(vcpu);
++
++	/*
++	 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
++	 * IPI are then delayed after guest entry, which ensures that they
++	 * result in virtual interrupt delivery.
++	 */
++	local_irq_disable();
++	vcpu->mode = IN_GUEST_MODE;
++
++	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
++
++	/*
++	 * 1) We should set ->mode before checking ->requests.  Please see
++	 * the comment in kvm_vcpu_exiting_guest_mode().
++	 *
++	 * 2) For APICv, we should set ->mode before checking PIR.ON.  This
++	 * pairs with the memory barrier implicit in pi_test_and_set_on
++	 * (see vmx_deliver_posted_interrupt).
++	 *
++	 * 3) This also orders the write to mode from any reads to the page
++	 * tables done while the VCPU is running.  Please see the comment
++	 * in kvm_flush_remote_tlbs.
++	 */
++	smp_mb__after_srcu_read_unlock();
++
++	/*
++	 * This handles the case where a posted interrupt was
++	 * notified with kvm_vcpu_kick.
++	 */
++	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
++		kvm_x86_ops->sync_pir_to_irr(vcpu);
++
++	if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
++	    || need_resched() || signal_pending(current)) {
++		vcpu->mode = OUTSIDE_GUEST_MODE;
++		smp_wmb();
++		local_irq_enable();
++		preempt_enable();
++		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
++		r = 1;
++		goto cancel_injection;
++	}
++
++	if (req_immediate_exit) {
++		kvm_make_request(KVM_REQ_EVENT, vcpu);
++		kvm_x86_ops->request_immediate_exit(vcpu);
++	}
++
++	trace_kvm_entry(vcpu->vcpu_id);
++	if (lapic_timer_advance_ns)
++		wait_lapic_expire(vcpu);
++	guest_enter_irqoff();
++
++	if (unlikely(vcpu->arch.switch_db_regs)) {
++		set_debugreg(0, 7);
++		set_debugreg(vcpu->arch.eff_db[0], 0);
++		set_debugreg(vcpu->arch.eff_db[1], 1);
++		set_debugreg(vcpu->arch.eff_db[2], 2);
++		set_debugreg(vcpu->arch.eff_db[3], 3);
++		set_debugreg(vcpu->arch.dr6, 6);
++		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
++	}
++
++	kvm_x86_ops->run(vcpu);
++
++	/*
++	 * Do this here before restoring debug registers on the host.  And
++	 * since we do this before handling the vmexit, a DR access vmexit
++	 * can (a) read the correct value of the debug registers, (b) set
++	 * KVM_DEBUGREG_WONT_EXIT again.
++	 */
++	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
++		WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
++		kvm_x86_ops->sync_dirty_debug_regs(vcpu);
++		kvm_update_dr0123(vcpu);
++		kvm_update_dr6(vcpu);
++		kvm_update_dr7(vcpu);
++		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
++	}
++
++	/*
++	 * If the guest has used debug registers, at least dr7
++	 * will be disabled while returning to the host.
++	 * If we don't have active breakpoints in the host, we don't
++	 * care about the messed up debug address registers. But if
++	 * we have some of them active, restore the old state.
++	 */
++	if (hw_breakpoint_active())
++		hw_breakpoint_restore();
++
++	vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
++
++	vcpu->mode = OUTSIDE_GUEST_MODE;
++	smp_wmb();
++
++	kvm_before_interrupt(vcpu);
++	kvm_x86_ops->handle_external_intr(vcpu);
++	kvm_after_interrupt(vcpu);
++
++	++vcpu->stat.exits;
++
++	guest_exit_irqoff();
++
++	local_irq_enable();
++	preempt_enable();
++
++	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
++
++	/*
++	 * Profile KVM exit RIPs:
++	 */
++	if (unlikely(prof_on == KVM_PROFILING)) {
++		unsigned long rip = kvm_rip_read(vcpu);
++		profile_hit(KVM_PROFILING, (void *)rip);
++	}
++
++	if (unlikely(vcpu->arch.tsc_always_catchup))
++		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++
++	if (vcpu->arch.apic_attention)
++		kvm_lapic_sync_from_vapic(vcpu);
++
++	vcpu->arch.gpa_available = false;
++	r = kvm_x86_ops->handle_exit(vcpu);
++	return r;
++
++cancel_injection:
++	kvm_x86_ops->cancel_injection(vcpu);
++	if (unlikely(vcpu->arch.apic_attention))
++		kvm_lapic_sync_from_vapic(vcpu);
++out:
++	return r;
++}
++
++static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
++{
++	if (!kvm_arch_vcpu_runnable(vcpu) &&
++	    (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
++		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
++		kvm_vcpu_block(vcpu);
++		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
++
++		if (kvm_x86_ops->post_block)
++			kvm_x86_ops->post_block(vcpu);
++
++		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
++			return 1;
++	}
++
++	kvm_apic_accept_events(vcpu);
++	switch(vcpu->arch.mp_state) {
++	case KVM_MP_STATE_HALTED:
++		vcpu->arch.pv.pv_unhalted = false;
++		vcpu->arch.mp_state =
++			KVM_MP_STATE_RUNNABLE;
++	case KVM_MP_STATE_RUNNABLE:
++		vcpu->arch.apf.halted = false;
++		break;
++	case KVM_MP_STATE_INIT_RECEIVED:
++		break;
++	default:
++		return -EINTR;
++		break;
++	}
++	return 1;
++}
++
++static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
++{
++	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
++		kvm_x86_ops->check_nested_events(vcpu, false);
++
++	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
++		!vcpu->arch.apf.halted);
++}
++
++static int vcpu_run(struct kvm_vcpu *vcpu)
++{
++	int r;
++	struct kvm *kvm = vcpu->kvm;
++
++	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
++	vcpu->arch.l1tf_flush_l1d = true;
++
++	for (;;) {
++		if (kvm_vcpu_running(vcpu)) {
++			r = vcpu_enter_guest(vcpu);
++		} else {
++			r = vcpu_block(kvm, vcpu);
++		}
++
++		if (r <= 0)
++			break;
++
++		kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
++		if (kvm_cpu_has_pending_timer(vcpu))
++			kvm_inject_pending_timer_irqs(vcpu);
++
++		if (dm_request_for_irq_injection(vcpu) &&
++			kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
++			r = 0;
++			vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++			++vcpu->stat.request_irq_exits;
++			break;
++		}
++
++		kvm_check_async_pf_completion(vcpu);
++
++		if (signal_pending(current)) {
++			r = -EINTR;
++			vcpu->run->exit_reason = KVM_EXIT_INTR;
++			++vcpu->stat.signal_exits;
++			break;
++		}
++		if (need_resched()) {
++			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
++			cond_resched();
++			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
++		}
++	}
++
++	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
++
++	return r;
++}
++
++static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
++{
++	int r;
++	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
++	r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
++	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
++	if (r != EMULATE_DONE)
++		return 0;
++	return 1;
++}
++
++static int complete_emulated_pio(struct kvm_vcpu *vcpu)
++{
++	BUG_ON(!vcpu->arch.pio.count);
++
++	return complete_emulated_io(vcpu);
++}
++
++/*
++ * Implements the following, as a state machine:
++ *
++ * read:
++ *   for each fragment
++ *     for each mmio piece in the fragment
++ *       write gpa, len
++ *       exit
++ *       copy data
++ *   execute insn
++ *
++ * write:
++ *   for each fragment
++ *     for each mmio piece in the fragment
++ *       write gpa, len
++ *       copy data
++ *       exit
++ */
++static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
++{
++	struct kvm_run *run = vcpu->run;
++	struct kvm_mmio_fragment *frag;
++	unsigned len;
++
++	BUG_ON(!vcpu->mmio_needed);
++
++	/* Complete previous fragment */
++	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
++	len = min(8u, frag->len);
++	if (!vcpu->mmio_is_write)
++		memcpy(frag->data, run->mmio.data, len);
++
++	if (frag->len <= 8) {
++		/* Switch to the next fragment. */
++		frag++;
++		vcpu->mmio_cur_fragment++;
++	} else {
++		/* Go forward to the next mmio piece. */
++		frag->data += len;
++		frag->gpa += len;
++		frag->len -= len;
++	}
++
++	if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
++		vcpu->mmio_needed = 0;
++
++		/* FIXME: return into emulator if single-stepping.  */
++		if (vcpu->mmio_is_write)
++			return 1;
++		vcpu->mmio_read_completed = 1;
++		return complete_emulated_io(vcpu);
++	}
++
++	run->exit_reason = KVM_EXIT_MMIO;
++	run->mmio.phys_addr = frag->gpa;
++	if (vcpu->mmio_is_write)
++		memcpy(run->mmio.data, frag->data, min(8u, frag->len));
++	run->mmio.len = min(8u, frag->len);
++	run->mmio.is_write = vcpu->mmio_is_write;
++	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
++	return 0;
++}
++
++/* Swap (qemu) user FPU context for the guest FPU context. */
++static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
++{
++	preempt_disable();
++	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
++	/* PKRU is separately restored in kvm_x86_ops->run.  */
++	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
++				~XFEATURE_MASK_PKRU);
++	preempt_enable();
++	trace_kvm_fpu(1);
++}
++
++/* When vcpu_run ends, restore user space FPU context. */
++static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
++{
++	preempt_disable();
++	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
++	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
++	preempt_enable();
++	++vcpu->stat.fpu_reload;
++	trace_kvm_fpu(0);
++}
++
++int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
++{
++	int r;
++
++	vcpu_load(vcpu);
++	kvm_sigset_activate(vcpu);
++	kvm_load_guest_fpu(vcpu);
++
++	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
++		if (kvm_run->immediate_exit) {
++			r = -EINTR;
++			goto out;
++		}
++		kvm_vcpu_block(vcpu);
++		kvm_apic_accept_events(vcpu);
++		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
++		r = -EAGAIN;
++		if (signal_pending(current)) {
++			r = -EINTR;
++			vcpu->run->exit_reason = KVM_EXIT_INTR;
++			++vcpu->stat.signal_exits;
++		}
++		goto out;
++	}
++
++	if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
++		r = -EINVAL;
++		goto out;
++	}
++
++	if (vcpu->run->kvm_dirty_regs) {
++		r = sync_regs(vcpu);
++		if (r != 0)
++			goto out;
++	}
++
++	/* re-sync apic's tpr */
++	if (!lapic_in_kernel(vcpu)) {
++		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
++			r = -EINVAL;
++			goto out;
++		}
++	}
++
++	if (unlikely(vcpu->arch.complete_userspace_io)) {
++		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
++		vcpu->arch.complete_userspace_io = NULL;
++		r = cui(vcpu);
++		if (r <= 0)
++			goto out;
++	} else
++		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
++
++	if (kvm_run->immediate_exit)
++		r = -EINTR;
++	else
++		r = vcpu_run(vcpu);
++
++out:
++	kvm_put_guest_fpu(vcpu);
++	if (vcpu->run->kvm_valid_regs)
++		store_regs(vcpu);
++	post_kvm_run_save(vcpu);
++	kvm_sigset_deactivate(vcpu);
++
++	vcpu_put(vcpu);
++	return r;
++}
++
++static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
++{
++	if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
++		/*
++		 * We are here if userspace calls get_regs() in the middle of
++		 * instruction emulation. Registers state needs to be copied
++		 * back from emulation context to vcpu. Userspace shouldn't do
++		 * that usually, but some bad designed PV devices (vmware
++		 * backdoor interface) need this to work
++		 */
++		emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
++		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
++	}
++	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
++	regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
++	regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
++	regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
++	regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
++	regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
++	regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
++	regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
++#ifdef CONFIG_X86_64
++	regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
++	regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
++	regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
++	regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
++	regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
++	regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
++	regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
++	regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
++#endif
++
++	regs->rip = kvm_rip_read(vcpu);
++	regs->rflags = kvm_get_rflags(vcpu);
++}
++
++int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
++{
++	vcpu_load(vcpu);
++	__get_regs(vcpu, regs);
++	vcpu_put(vcpu);
++	return 0;
++}
++
++static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
++{
++	vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
++	vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
++
++	kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
++	kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
++	kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
++	kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
++	kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
++	kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
++	kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
++	kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
++#ifdef CONFIG_X86_64
++	kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
++	kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
++	kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
++	kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
++	kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
++	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
++	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
++	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
++#endif
++
++	kvm_rip_write(vcpu, regs->rip);
++	kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
++
++	vcpu->arch.exception.pending = false;
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++}
++
++int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
++{
++	vcpu_load(vcpu);
++	__set_regs(vcpu, regs);
++	vcpu_put(vcpu);
++	return 0;
++}
++
++void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
++{
++	struct kvm_segment cs;
++
++	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
++	*db = cs.db;
++	*l = cs.l;
++}
++EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
++
++static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
++{
++	struct desc_ptr dt;
++
++	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
++	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
++	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
++	kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
++	kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
++	kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
++
++	kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
++	kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
++
++	kvm_x86_ops->get_idt(vcpu, &dt);
++	sregs->idt.limit = dt.size;
++	sregs->idt.base = dt.address;
++	kvm_x86_ops->get_gdt(vcpu, &dt);
++	sregs->gdt.limit = dt.size;
++	sregs->gdt.base = dt.address;
++
++	sregs->cr0 = kvm_read_cr0(vcpu);
++	sregs->cr2 = vcpu->arch.cr2;
++	sregs->cr3 = kvm_read_cr3(vcpu);
++	sregs->cr4 = kvm_read_cr4(vcpu);
++	sregs->cr8 = kvm_get_cr8(vcpu);
++	sregs->efer = vcpu->arch.efer;
++	sregs->apic_base = kvm_get_apic_base(vcpu);
++
++	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
++
++	if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
++		set_bit(vcpu->arch.interrupt.nr,
++			(unsigned long *)sregs->interrupt_bitmap);
++}
++
++int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
++				  struct kvm_sregs *sregs)
++{
++	vcpu_load(vcpu);
++	__get_sregs(vcpu, sregs);
++	vcpu_put(vcpu);
++	return 0;
++}
++
++int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
++				    struct kvm_mp_state *mp_state)
++{
++	vcpu_load(vcpu);
++
++	kvm_apic_accept_events(vcpu);
++	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
++					vcpu->arch.pv.pv_unhalted)
++		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
++	else
++		mp_state->mp_state = vcpu->arch.mp_state;
++
++	vcpu_put(vcpu);
++	return 0;
++}
++
++int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
++				    struct kvm_mp_state *mp_state)
++{
++	int ret = -EINVAL;
++
++	vcpu_load(vcpu);
++
++	if (!lapic_in_kernel(vcpu) &&
++	    mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
++		goto out;
++
++	/* INITs are latched while in SMM */
++	if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
++	    (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
++	     mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
++		goto out;
++
++	if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
++		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
++		set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
++	} else
++		vcpu->arch.mp_state = mp_state->mp_state;
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	ret = 0;
++out:
++	vcpu_put(vcpu);
++	return ret;
++}
++
++int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
++		    int reason, bool has_error_code, u32 error_code)
++{
++	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
++	int ret;
++
++	init_emulate_ctxt(vcpu);
++
++	ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
++				   has_error_code, error_code);
++
++	if (ret)
++		return EMULATE_FAIL;
++
++	kvm_rip_write(vcpu, ctxt->eip);
++	kvm_set_rflags(vcpu, ctxt->eflags);
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++	return EMULATE_DONE;
++}
++EXPORT_SYMBOL_GPL(kvm_task_switch);
++
++static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
++{
++	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
++		/*
++		 * When EFER.LME and CR0.PG are set, the processor is in
++		 * 64-bit mode (though maybe in a 32-bit code segment).
++		 * CR4.PAE and EFER.LMA must be set.
++		 */
++		if (!(sregs->cr4 & X86_CR4_PAE)
++		    || !(sregs->efer & EFER_LMA))
++			return -EINVAL;
++	} else {
++		/*
++		 * Not in 64-bit mode: EFER.LMA is clear and the code
++		 * segment cannot be 64-bit.
++		 */
++		if (sregs->efer & EFER_LMA || sregs->cs.l)
++			return -EINVAL;
++	}
++
++	return kvm_valid_cr4(vcpu, sregs->cr4);
++}
++
++static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
++{
++	struct msr_data apic_base_msr;
++	int mmu_reset_needed = 0;
++	int cpuid_update_needed = 0;
++	int pending_vec, max_bits, idx;
++	struct desc_ptr dt;
++	int ret = -EINVAL;
++
++	if (kvm_valid_sregs(vcpu, sregs))
++		goto out;
++
++	apic_base_msr.data = sregs->apic_base;
++	apic_base_msr.host_initiated = true;
++	if (kvm_set_apic_base(vcpu, &apic_base_msr))
++		goto out;
++
++	dt.size = sregs->idt.limit;
++	dt.address = sregs->idt.base;
++	kvm_x86_ops->set_idt(vcpu, &dt);
++	dt.size = sregs->gdt.limit;
++	dt.address = sregs->gdt.base;
++	kvm_x86_ops->set_gdt(vcpu, &dt);
++
++	vcpu->arch.cr2 = sregs->cr2;
++	mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
++	vcpu->arch.cr3 = sregs->cr3;
++	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
++
++	kvm_set_cr8(vcpu, sregs->cr8);
++
++	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
++	kvm_x86_ops->set_efer(vcpu, sregs->efer);
++
++	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
++	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
++	vcpu->arch.cr0 = sregs->cr0;
++
++	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
++	cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
++				(X86_CR4_OSXSAVE | X86_CR4_PKE));
++	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
++	if (cpuid_update_needed)
++		kvm_update_cpuid(vcpu);
++
++	idx = srcu_read_lock(&vcpu->kvm->srcu);
++	if (is_pae_paging(vcpu)) {
++		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
++		mmu_reset_needed = 1;
++	}
++	srcu_read_unlock(&vcpu->kvm->srcu, idx);
++
++	if (mmu_reset_needed)
++		kvm_mmu_reset_context(vcpu);
++
++	max_bits = KVM_NR_INTERRUPTS;
++	pending_vec = find_first_bit(
++		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
++	if (pending_vec < max_bits) {
++		kvm_queue_interrupt(vcpu, pending_vec, false);
++		pr_debug("Set back pending irq %d\n", pending_vec);
++	}
++
++	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
++	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
++	kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
++	kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
++	kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
++	kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
++
++	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
++	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
++
++	update_cr8_intercept(vcpu);
++
++	/* Older userspace won't unhalt the vcpu on reset. */
++	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
++	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
++	    !is_protmode(vcpu))
++		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++
++	ret = 0;
++out:
++	return ret;
++}
++
++int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
++				  struct kvm_sregs *sregs)
++{
++	int ret;
++
++	vcpu_load(vcpu);
++	ret = __set_sregs(vcpu, sregs);
++	vcpu_put(vcpu);
++	return ret;
++}
++
++int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
++					struct kvm_guest_debug *dbg)
++{
++	unsigned long rflags;
++	int i, r;
++
++	vcpu_load(vcpu);
++
++	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
++		r = -EBUSY;
++		if (vcpu->arch.exception.pending)
++			goto out;
++		if (dbg->control & KVM_GUESTDBG_INJECT_DB)
++			kvm_queue_exception(vcpu, DB_VECTOR);
++		else
++			kvm_queue_exception(vcpu, BP_VECTOR);
++	}
++
++	/*
++	 * Read rflags as long as potentially injected trace flags are still
++	 * filtered out.
++	 */
++	rflags = kvm_get_rflags(vcpu);
++
++	vcpu->guest_debug = dbg->control;
++	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
++		vcpu->guest_debug = 0;
++
++	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
++		for (i = 0; i < KVM_NR_DB_REGS; ++i)
++			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
++		vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
++	} else {
++		for (i = 0; i < KVM_NR_DB_REGS; i++)
++			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
++	}
++	kvm_update_dr7(vcpu);
++
++	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
++		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
++			get_segment_base(vcpu, VCPU_SREG_CS);
++
++	/*
++	 * Trigger an rflags update that will inject or remove the trace
++	 * flags.
++	 */
++	kvm_set_rflags(vcpu, rflags);
++
++	kvm_x86_ops->update_bp_intercept(vcpu);
++
++	r = 0;
++
++out:
++	vcpu_put(vcpu);
++	return r;
++}
++
++/*
++ * Translate a guest virtual address to a guest physical address.
++ */
++int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
++				    struct kvm_translation *tr)
++{
++	unsigned long vaddr = tr->linear_address;
++	gpa_t gpa;
++	int idx;
++
++	vcpu_load(vcpu);
++
++	idx = srcu_read_lock(&vcpu->kvm->srcu);
++	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
++	srcu_read_unlock(&vcpu->kvm->srcu, idx);
++	tr->physical_address = gpa;
++	tr->valid = gpa != UNMAPPED_GVA;
++	tr->writeable = 1;
++	tr->usermode = 0;
++
++	vcpu_put(vcpu);
++	return 0;
++}
++
++int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
++{
++	struct fxregs_state *fxsave;
++
++	vcpu_load(vcpu);
++
++	fxsave = &vcpu->arch.guest_fpu.state.fxsave;
++	memcpy(fpu->fpr, fxsave->st_space, 128);
++	fpu->fcw = fxsave->cwd;
++	fpu->fsw = fxsave->swd;
++	fpu->ftwx = fxsave->twd;
++	fpu->last_opcode = fxsave->fop;
++	fpu->last_ip = fxsave->rip;
++	fpu->last_dp = fxsave->rdp;
++	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
++
++	vcpu_put(vcpu);
++	return 0;
++}
++
++int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
++{
++	struct fxregs_state *fxsave;
++
++	vcpu_load(vcpu);
++
++	fxsave = &vcpu->arch.guest_fpu.state.fxsave;
++
++	memcpy(fxsave->st_space, fpu->fpr, 128);
++	fxsave->cwd = fpu->fcw;
++	fxsave->swd = fpu->fsw;
++	fxsave->twd = fpu->ftwx;
++	fxsave->fop = fpu->last_opcode;
++	fxsave->rip = fpu->last_ip;
++	fxsave->rdp = fpu->last_dp;
++	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
++
++	vcpu_put(vcpu);
++	return 0;
++}
++
++static void store_regs(struct kvm_vcpu *vcpu)
++{
++	BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
++
++	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
++		__get_regs(vcpu, &vcpu->run->s.regs.regs);
++
++	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
++		__get_sregs(vcpu, &vcpu->run->s.regs.sregs);
++
++	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
++		kvm_vcpu_ioctl_x86_get_vcpu_events(
++				vcpu, &vcpu->run->s.regs.events);
++}
++
++static int sync_regs(struct kvm_vcpu *vcpu)
++{
++	if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
++		return -EINVAL;
++
++	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
++		__set_regs(vcpu, &vcpu->run->s.regs.regs);
++		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
++	}
++	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
++		if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
++			return -EINVAL;
++		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
++	}
++	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
++		if (kvm_vcpu_ioctl_x86_set_vcpu_events(
++				vcpu, &vcpu->run->s.regs.events))
++			return -EINVAL;
++		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
++	}
++
++	return 0;
++}
++
++static void fx_init(struct kvm_vcpu *vcpu)
++{
++	fpstate_init(&vcpu->arch.guest_fpu.state);
++	if (boot_cpu_has(X86_FEATURE_XSAVES))
++		vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
++			host_xcr0 | XSTATE_COMPACTION_ENABLED;
++
++	/*
++	 * Ensure guest xcr0 is valid for loading
++	 */
++	vcpu->arch.xcr0 = XFEATURE_MASK_FP;
++
++	vcpu->arch.cr0 |= X86_CR0_ET;
++}
++
++void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
++{
++	void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
++
++	kvmclock_reset(vcpu);
++
++	kvm_x86_ops->vcpu_free(vcpu);
++	free_cpumask_var(wbinvd_dirty_mask);
++}
++
++struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
++						unsigned int id)
++{
++	struct kvm_vcpu *vcpu;
++
++	if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
++		printk_once(KERN_WARNING
++		"kvm: SMP vm created on host with unstable TSC; "
++		"guest TSC will not be reliable\n");
++
++	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
++
++	return vcpu;
++}
++
++int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
++	kvm_vcpu_mtrr_init(vcpu);
++	vcpu_load(vcpu);
++	kvm_vcpu_reset(vcpu, false);
++	kvm_mmu_setup(vcpu);
++	vcpu_put(vcpu);
++	return 0;
++}
++
++void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
++{
++	struct msr_data msr;
++	struct kvm *kvm = vcpu->kvm;
++
++	kvm_hv_vcpu_postcreate(vcpu);
++
++	if (mutex_lock_killable(&vcpu->mutex))
++		return;
++	vcpu_load(vcpu);
++	msr.data = 0x0;
++	msr.index = MSR_IA32_TSC;
++	msr.host_initiated = true;
++	kvm_write_tsc(vcpu, &msr);
++	vcpu_put(vcpu);
++	mutex_unlock(&vcpu->mutex);
++
++	if (!kvmclock_periodic_sync)
++		return;
++
++	schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
++					KVMCLOCK_SYNC_PERIOD);
++}
++
++void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
++{
++	vcpu->arch.apf.msr_val = 0;
++
++	vcpu_load(vcpu);
++	kvm_mmu_unload(vcpu);
++	vcpu_put(vcpu);
++
++	kvm_x86_ops->vcpu_free(vcpu);
++}
++
++void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
++{
++	kvm_lapic_reset(vcpu, init_event);
++
++	vcpu->arch.hflags = 0;
++
++	vcpu->arch.smi_pending = 0;
++	vcpu->arch.smi_count = 0;
++	atomic_set(&vcpu->arch.nmi_queued, 0);
++	vcpu->arch.nmi_pending = 0;
++	vcpu->arch.nmi_injected = false;
++	kvm_clear_interrupt_queue(vcpu);
++	kvm_clear_exception_queue(vcpu);
++	vcpu->arch.exception.pending = false;
++
++	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
++	kvm_update_dr0123(vcpu);
++	vcpu->arch.dr6 = DR6_INIT;
++	kvm_update_dr6(vcpu);
++	vcpu->arch.dr7 = DR7_FIXED_1;
++	kvm_update_dr7(vcpu);
++
++	vcpu->arch.cr2 = 0;
++
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++	vcpu->arch.apf.msr_val = 0;
++	vcpu->arch.st.msr_val = 0;
++
++	kvmclock_reset(vcpu);
++
++	kvm_clear_async_pf_completion_queue(vcpu);
++	kvm_async_pf_hash_reset(vcpu);
++	vcpu->arch.apf.halted = false;
++
++	if (kvm_mpx_supported()) {
++		void *mpx_state_buffer;
++
++		/*
++		 * To avoid have the INIT path from kvm_apic_has_events() that be
++		 * called with loaded FPU and does not let userspace fix the state.
++		 */
++		if (init_event)
++			kvm_put_guest_fpu(vcpu);
++		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
++					XFEATURE_MASK_BNDREGS);
++		if (mpx_state_buffer)
++			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
++		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
++					XFEATURE_MASK_BNDCSR);
++		if (mpx_state_buffer)
++			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
++		if (init_event)
++			kvm_load_guest_fpu(vcpu);
++	}
++
++	if (!init_event) {
++		kvm_pmu_reset(vcpu);
++		vcpu->arch.smbase = 0x30000;
++
++		vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
++		vcpu->arch.msr_misc_features_enables = 0;
++
++		vcpu->arch.xcr0 = XFEATURE_MASK_FP;
++	}
++
++	memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
++	vcpu->arch.regs_avail = ~0;
++	vcpu->arch.regs_dirty = ~0;
++
++	vcpu->arch.ia32_xss = 0;
++
++	kvm_x86_ops->vcpu_reset(vcpu, init_event);
++}
++
++void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
++{
++	struct kvm_segment cs;
++
++	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
++	cs.selector = vector << 8;
++	cs.base = vector << 12;
++	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
++	kvm_rip_write(vcpu, 0);
++}
++
++int kvm_arch_hardware_enable(void)
++{
++	struct kvm *kvm;
++	struct kvm_vcpu *vcpu;
++	int i;
++	int ret;
++	u64 local_tsc;
++	u64 max_tsc = 0;
++	bool stable, backwards_tsc = false;
++
++	kvm_shared_msr_cpu_online();
++	ret = kvm_x86_ops->hardware_enable();
++	if (ret != 0)
++		return ret;
++
++	local_tsc = rdtsc();
++	stable = !kvm_check_tsc_unstable();
++	list_for_each_entry(kvm, &vm_list, vm_list) {
++		kvm_for_each_vcpu(i, vcpu, kvm) {
++			if (!stable && vcpu->cpu == smp_processor_id())
++				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
++			if (stable && vcpu->arch.last_host_tsc > local_tsc) {
++				backwards_tsc = true;
++				if (vcpu->arch.last_host_tsc > max_tsc)
++					max_tsc = vcpu->arch.last_host_tsc;
++			}
++		}
++	}
++
++	/*
++	 * Sometimes, even reliable TSCs go backwards.  This happens on
++	 * platforms that reset TSC during suspend or hibernate actions, but
++	 * maintain synchronization.  We must compensate.  Fortunately, we can
++	 * detect that condition here, which happens early in CPU bringup,
++	 * before any KVM threads can be running.  Unfortunately, we can't
++	 * bring the TSCs fully up to date with real time, as we aren't yet far
++	 * enough into CPU bringup that we know how much real time has actually
++	 * elapsed; our helper function, ktime_get_boot_ns() will be using boot
++	 * variables that haven't been updated yet.
++	 *
++	 * So we simply find the maximum observed TSC above, then record the
++	 * adjustment to TSC in each VCPU.  When the VCPU later gets loaded,
++	 * the adjustment will be applied.  Note that we accumulate
++	 * adjustments, in case multiple suspend cycles happen before some VCPU
++	 * gets a chance to run again.  In the event that no KVM threads get a
++	 * chance to run, we will miss the entire elapsed period, as we'll have
++	 * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
++	 * loose cycle time.  This isn't too big a deal, since the loss will be
++	 * uniform across all VCPUs (not to mention the scenario is extremely
++	 * unlikely). It is possible that a second hibernate recovery happens
++	 * much faster than a first, causing the observed TSC here to be
++	 * smaller; this would require additional padding adjustment, which is
++	 * why we set last_host_tsc to the local tsc observed here.
++	 *
++	 * N.B. - this code below runs only on platforms with reliable TSC,
++	 * as that is the only way backwards_tsc is set above.  Also note
++	 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
++	 * have the same delta_cyc adjustment applied if backwards_tsc
++	 * is detected.  Note further, this adjustment is only done once,
++	 * as we reset last_host_tsc on all VCPUs to stop this from being
++	 * called multiple times (one for each physical CPU bringup).
++	 *
++	 * Platforms with unreliable TSCs don't have to deal with this, they
++	 * will be compensated by the logic in vcpu_load, which sets the TSC to
++	 * catchup mode.  This will catchup all VCPUs to real time, but cannot
++	 * guarantee that they stay in perfect synchronization.
++	 */
++	if (backwards_tsc) {
++		u64 delta_cyc = max_tsc - local_tsc;
++		list_for_each_entry(kvm, &vm_list, vm_list) {
++			kvm->arch.backwards_tsc_observed = true;
++			kvm_for_each_vcpu(i, vcpu, kvm) {
++				vcpu->arch.tsc_offset_adjustment += delta_cyc;
++				vcpu->arch.last_host_tsc = local_tsc;
++				kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
++			}
++
++			/*
++			 * We have to disable TSC offset matching.. if you were
++			 * booting a VM while issuing an S4 host suspend....
++			 * you may have some problem.  Solving this issue is
++			 * left as an exercise to the reader.
++			 */
++			kvm->arch.last_tsc_nsec = 0;
++			kvm->arch.last_tsc_write = 0;
++		}
++
++	}
++	return 0;
++}
++
++void kvm_arch_hardware_disable(void)
++{
++	kvm_x86_ops->hardware_disable();
++	drop_user_return_notifiers();
++}
++
++int kvm_arch_hardware_setup(void)
++{
++	int r;
++
++	r = kvm_x86_ops->hardware_setup();
++	if (r != 0)
++		return r;
++
++	if (kvm_has_tsc_control) {
++		/*
++		 * Make sure the user can only configure tsc_khz values that
++		 * fit into a signed integer.
++		 * A min value is not calculated because it will always
++		 * be 1 on all machines.
++		 */
++		u64 max = min(0x7fffffffULL,
++			      __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
++		kvm_max_guest_tsc_khz = max;
++
++		kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
++	}
++
++	kvm_init_msr_list();
++	return 0;
++}
++
++void kvm_arch_hardware_unsetup(void)
++{
++	kvm_x86_ops->hardware_unsetup();
++}
++
++void kvm_arch_check_processor_compat(void *rtn)
++{
++	kvm_x86_ops->check_processor_compatibility(rtn);
++}
++
++bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
++{
++	return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
++}
++EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
++
++bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
++{
++	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
++}
++
++struct static_key kvm_no_apic_vcpu __read_mostly;
++EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
++
++int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
++{
++	struct page *page;
++	int r;
++
++	vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
++	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
++	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
++		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++	else
++		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
++
++	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
++	if (!page) {
++		r = -ENOMEM;
++		goto fail;
++	}
++	vcpu->arch.pio_data = page_address(page);
++
++	kvm_set_tsc_khz(vcpu, max_tsc_khz);
++
++	r = kvm_mmu_create(vcpu);
++	if (r < 0)
++		goto fail_free_pio_data;
++
++	if (irqchip_in_kernel(vcpu->kvm)) {
++		r = kvm_create_lapic(vcpu);
++		if (r < 0)
++			goto fail_mmu_destroy;
++	} else
++		static_key_slow_inc(&kvm_no_apic_vcpu);
++
++	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
++				       GFP_KERNEL);
++	if (!vcpu->arch.mce_banks) {
++		r = -ENOMEM;
++		goto fail_free_lapic;
++	}
++	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
++
++	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
++		r = -ENOMEM;
++		goto fail_free_mce_banks;
++	}
++
++	fx_init(vcpu);
++
++	vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
++
++	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
++
++	vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
++
++	kvm_async_pf_hash_reset(vcpu);
++	kvm_pmu_init(vcpu);
++
++	vcpu->arch.pending_external_vector = -1;
++	vcpu->arch.preempted_in_kernel = false;
++
++	kvm_hv_vcpu_init(vcpu);
++
++	return 0;
++
++fail_free_mce_banks:
++	kfree(vcpu->arch.mce_banks);
++fail_free_lapic:
++	kvm_free_lapic(vcpu);
++fail_mmu_destroy:
++	kvm_mmu_destroy(vcpu);
++fail_free_pio_data:
++	free_page((unsigned long)vcpu->arch.pio_data);
++fail:
++	return r;
++}
++
++void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
++{
++	int idx;
++
++	kvm_hv_vcpu_uninit(vcpu);
++	kvm_pmu_destroy(vcpu);
++	kfree(vcpu->arch.mce_banks);
++	kvm_free_lapic(vcpu);
++	idx = srcu_read_lock(&vcpu->kvm->srcu);
++	kvm_mmu_destroy(vcpu);
++	srcu_read_unlock(&vcpu->kvm->srcu, idx);
++	free_page((unsigned long)vcpu->arch.pio_data);
++	if (!lapic_in_kernel(vcpu))
++		static_key_slow_dec(&kvm_no_apic_vcpu);
++}
++
++void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
++{
++	vcpu->arch.l1tf_flush_l1d = true;
++	kvm_x86_ops->sched_in(vcpu, cpu);
++}
++
++int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
++{
++	if (type)
++		return -EINVAL;
++
++	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
++	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
++	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
++	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
++	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
++	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
++
++	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
++	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
++	/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
++	set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
++		&kvm->arch.irq_sources_bitmap);
++
++	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
++	mutex_init(&kvm->arch.apic_map_lock);
++	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
++
++	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
++	pvclock_update_vm_gtod_copy(kvm);
++
++	kvm->arch.guest_can_read_msr_platform_info = true;
++
++	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
++	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
++
++	kvm_hv_init_vm(kvm);
++	kvm_page_track_init(kvm);
++	kvm_mmu_init_vm(kvm);
++
++	if (kvm_x86_ops->vm_init)
++		return kvm_x86_ops->vm_init(kvm);
++
++	return 0;
++}
++
++int kvm_arch_post_init_vm(struct kvm *kvm)
++{
++	return kvm_mmu_post_init_vm(kvm);
++}
++
++static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
++{
++	vcpu_load(vcpu);
++	kvm_mmu_unload(vcpu);
++	vcpu_put(vcpu);
++}
++
++static void kvm_free_vcpus(struct kvm *kvm)
++{
++	unsigned int i;
++	struct kvm_vcpu *vcpu;
++
++	/*
++	 * Unpin any mmu pages first.
++	 */
++	kvm_for_each_vcpu(i, vcpu, kvm) {
++		kvm_clear_async_pf_completion_queue(vcpu);
++		kvm_unload_vcpu_mmu(vcpu);
++	}
++	kvm_for_each_vcpu(i, vcpu, kvm)
++		kvm_arch_vcpu_free(vcpu);
++
++	mutex_lock(&kvm->lock);
++	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
++		kvm->vcpus[i] = NULL;
++
++	atomic_set(&kvm->online_vcpus, 0);
++	mutex_unlock(&kvm->lock);
++}
++
++void kvm_arch_sync_events(struct kvm *kvm)
++{
++	cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
++	cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
++	kvm_free_pit(kvm);
++}
++
++int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
++{
++	int i, r;
++	unsigned long hva;
++	struct kvm_memslots *slots = kvm_memslots(kvm);
++	struct kvm_memory_slot *slot, old;
++
++	/* Called with kvm->slots_lock held.  */
++	if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
++		return -EINVAL;
++
++	slot = id_to_memslot(slots, id);
++	if (size) {
++		if (slot->npages)
++			return -EEXIST;
++
++		/*
++		 * MAP_SHARED to prevent internal slot pages from being moved
++		 * by fork()/COW.
++		 */
++		hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
++			      MAP_SHARED | MAP_ANONYMOUS, 0);
++		if (IS_ERR((void *)hva))
++			return PTR_ERR((void *)hva);
++	} else {
++		if (!slot->npages)
++			return 0;
++
++		hva = 0;
++	}
++
++	old = *slot;
++	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
++		struct kvm_userspace_memory_region m;
++
++		m.slot = id | (i << 16);
++		m.flags = 0;
++		m.guest_phys_addr = gpa;
++		m.userspace_addr = hva;
++		m.memory_size = size;
++		r = __kvm_set_memory_region(kvm, &m);
++		if (r < 0)
++			return r;
++	}
++
++	if (!size)
++		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(__x86_set_memory_region);
++
++int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
++{
++	int r;
++
++	mutex_lock(&kvm->slots_lock);
++	r = __x86_set_memory_region(kvm, id, gpa, size);
++	mutex_unlock(&kvm->slots_lock);
++
++	return r;
++}
++EXPORT_SYMBOL_GPL(x86_set_memory_region);
++
++void kvm_arch_pre_destroy_vm(struct kvm *kvm)
++{
++	kvm_mmu_pre_destroy_vm(kvm);
++}
++
++void kvm_arch_destroy_vm(struct kvm *kvm)
++{
++	if (current->mm == kvm->mm) {
++		/*
++		 * Free memory regions allocated on behalf of userspace,
++		 * unless the the memory map has changed due to process exit
++		 * or fd copying.
++		 */
++		x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
++		x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
++		x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
++	}
++	if (kvm_x86_ops->vm_destroy)
++		kvm_x86_ops->vm_destroy(kvm);
++	kvm_pic_destroy(kvm);
++	kvm_ioapic_destroy(kvm);
++	kvm_free_vcpus(kvm);
++	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
++	kvm_mmu_uninit_vm(kvm);
++	kvm_page_track_cleanup(kvm);
++	kvm_hv_destroy_vm(kvm);
++}
++
++void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
++			   struct kvm_memory_slot *dont)
++{
++	int i;
++
++	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
++		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
++			kvfree(free->arch.rmap[i]);
++			free->arch.rmap[i] = NULL;
++		}
++		if (i == 0)
++			continue;
++
++		if (!dont || free->arch.lpage_info[i - 1] !=
++			     dont->arch.lpage_info[i - 1]) {
++			kvfree(free->arch.lpage_info[i - 1]);
++			free->arch.lpage_info[i - 1] = NULL;
++		}
++	}
++
++	kvm_page_track_free_memslot(free, dont);
++}
++
++int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
++			    unsigned long npages)
++{
++	int i;
++
++	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
++		struct kvm_lpage_info *linfo;
++		unsigned long ugfn;
++		int lpages;
++		int level = i + 1;
++
++		lpages = gfn_to_index(slot->base_gfn + npages - 1,
++				      slot->base_gfn, level) + 1;
++
++		slot->arch.rmap[i] =
++			kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
++				 GFP_KERNEL);
++		if (!slot->arch.rmap[i])
++			goto out_free;
++		if (i == 0)
++			continue;
++
++		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
++		if (!linfo)
++			goto out_free;
++
++		slot->arch.lpage_info[i - 1] = linfo;
++
++		if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
++			linfo[0].disallow_lpage = 1;
++		if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
++			linfo[lpages - 1].disallow_lpage = 1;
++		ugfn = slot->userspace_addr >> PAGE_SHIFT;
++		/*
++		 * If the gfn and userspace address are not aligned wrt each
++		 * other, or if explicitly asked to, disable large page
++		 * support for this slot
++		 */
++		if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
++		    !kvm_largepages_enabled()) {
++			unsigned long j;
++
++			for (j = 0; j < lpages; ++j)
++				linfo[j].disallow_lpage = 1;
++		}
++	}
++
++	if (kvm_page_track_create_memslot(slot, npages))
++		goto out_free;
++
++	return 0;
++
++out_free:
++	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
++		kvfree(slot->arch.rmap[i]);
++		slot->arch.rmap[i] = NULL;
++		if (i == 0)
++			continue;
++
++		kvfree(slot->arch.lpage_info[i - 1]);
++		slot->arch.lpage_info[i - 1] = NULL;
++	}
++	return -ENOMEM;
++}
++
++void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
++{
++	/*
++	 * memslots->generation has been incremented.
++	 * mmio generation may have reached its maximum value.
++	 */
++	kvm_mmu_invalidate_mmio_sptes(kvm, gen);
++}
++
++int kvm_arch_prepare_memory_region(struct kvm *kvm,
++				struct kvm_memory_slot *memslot,
++				const struct kvm_userspace_memory_region *mem,
++				enum kvm_mr_change change)
++{
++	return 0;
++}
++
++static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
++				     struct kvm_memory_slot *new)
++{
++	/* Still write protect RO slot */
++	if (new->flags & KVM_MEM_READONLY) {
++		kvm_mmu_slot_remove_write_access(kvm, new);
++		return;
++	}
++
++	/*
++	 * Call kvm_x86_ops dirty logging hooks when they are valid.
++	 *
++	 * kvm_x86_ops->slot_disable_log_dirty is called when:
++	 *
++	 *  - KVM_MR_CREATE with dirty logging is disabled
++	 *  - KVM_MR_FLAGS_ONLY with dirty logging is disabled in new flag
++	 *
++	 * The reason is, in case of PML, we need to set D-bit for any slots
++	 * with dirty logging disabled in order to eliminate unnecessary GPA
++	 * logging in PML buffer (and potential PML buffer full VMEXT). This
++	 * guarantees leaving PML enabled during guest's lifetime won't have
++	 * any additonal overhead from PML when guest is running with dirty
++	 * logging disabled for memory slots.
++	 *
++	 * kvm_x86_ops->slot_enable_log_dirty is called when switching new slot
++	 * to dirty logging mode.
++	 *
++	 * If kvm_x86_ops dirty logging hooks are invalid, use write protect.
++	 *
++	 * In case of write protect:
++	 *
++	 * Write protect all pages for dirty logging.
++	 *
++	 * All the sptes including the large sptes which point to this
++	 * slot are set to readonly. We can not create any new large
++	 * spte on this slot until the end of the logging.
++	 *
++	 * See the comments in fast_page_fault().
++	 */
++	if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
++		if (kvm_x86_ops->slot_enable_log_dirty)
++			kvm_x86_ops->slot_enable_log_dirty(kvm, new);
++		else
++			kvm_mmu_slot_remove_write_access(kvm, new);
++	} else {
++		if (kvm_x86_ops->slot_disable_log_dirty)
++			kvm_x86_ops->slot_disable_log_dirty(kvm, new);
++	}
++}
++
++void kvm_arch_commit_memory_region(struct kvm *kvm,
++				const struct kvm_userspace_memory_region *mem,
++				const struct kvm_memory_slot *old,
++				const struct kvm_memory_slot *new,
++				enum kvm_mr_change change)
++{
++	int nr_mmu_pages = 0;
++
++	if (!kvm->arch.n_requested_mmu_pages)
++		nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
++
++	if (nr_mmu_pages)
++		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
++
++	/*
++	 * Dirty logging tracks sptes in 4k granularity, meaning that large
++	 * sptes have to be split.  If live migration is successful, the guest
++	 * in the source machine will be destroyed and large sptes will be
++	 * created in the destination. However, if the guest continues to run
++	 * in the source machine (for example if live migration fails), small
++	 * sptes will remain around and cause bad performance.
++	 *
++	 * Scan sptes if dirty logging has been stopped, dropping those
++	 * which can be collapsed into a single large-page spte.  Later
++	 * page faults will create the large-page sptes.
++	 */
++	if ((change != KVM_MR_DELETE) &&
++		(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
++		!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
++		kvm_mmu_zap_collapsible_sptes(kvm, new);
++
++	/*
++	 * Set up write protection and/or dirty logging for the new slot.
++	 *
++	 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
++	 * been zapped so no dirty logging staff is needed for old slot. For
++	 * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the
++	 * new and it's also covered when dealing with the new slot.
++	 *
++	 * FIXME: const-ify all uses of struct kvm_memory_slot.
++	 */
++	if (change != KVM_MR_DELETE)
++		kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
++}
++
++void kvm_arch_flush_shadow_all(struct kvm *kvm)
++{
++	kvm_mmu_invalidate_zap_all_pages(kvm);
++}
++
++void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
++				   struct kvm_memory_slot *slot)
++{
++	kvm_page_track_flush_slot(kvm, slot);
++}
++
++static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
++{
++	return (is_guest_mode(vcpu) &&
++			kvm_x86_ops->guest_apic_has_interrupt &&
++			kvm_x86_ops->guest_apic_has_interrupt(vcpu));
++}
++
++static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
++{
++	if (!list_empty_careful(&vcpu->async_pf.done))
++		return true;
++
++	if (kvm_apic_has_events(vcpu))
++		return true;
++
++	if (vcpu->arch.pv.pv_unhalted)
++		return true;
++
++	if (vcpu->arch.exception.pending)
++		return true;
++
++	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
++	    (vcpu->arch.nmi_pending &&
++	     kvm_x86_ops->nmi_allowed(vcpu)))
++		return true;
++
++	if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
++	    (vcpu->arch.smi_pending && !is_smm(vcpu)))
++		return true;
++
++	if (kvm_arch_interrupt_allowed(vcpu) &&
++	    (kvm_cpu_has_interrupt(vcpu) ||
++	    kvm_guest_apic_has_interrupt(vcpu)))
++		return true;
++
++	if (kvm_hv_has_stimer_pending(vcpu))
++		return true;
++
++	return false;
++}
++
++int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
++{
++	return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
++}
++
++bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
++{
++	if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
++		return true;
++
++	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
++		kvm_test_request(KVM_REQ_SMI, vcpu) ||
++		 kvm_test_request(KVM_REQ_EVENT, vcpu))
++		return true;
++
++	if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
++		return true;
++
++	return false;
++}
++
++bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
++{
++	return vcpu->arch.preempted_in_kernel;
++}
++
++int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
++{
++	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
++}
++
++int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
++{
++	return kvm_x86_ops->interrupt_allowed(vcpu);
++}
++
++unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
++{
++	if (is_64_bit_mode(vcpu))
++		return kvm_rip_read(vcpu);
++	return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
++		     kvm_rip_read(vcpu));
++}
++EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
++
++bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
++{
++	return kvm_get_linear_rip(vcpu) == linear_rip;
++}
++EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
++
++unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
++{
++	unsigned long rflags;
++
++	rflags = kvm_x86_ops->get_rflags(vcpu);
++	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
++		rflags &= ~X86_EFLAGS_TF;
++	return rflags;
++}
++EXPORT_SYMBOL_GPL(kvm_get_rflags);
++
++static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
++{
++	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
++	    kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
++		rflags |= X86_EFLAGS_TF;
++	kvm_x86_ops->set_rflags(vcpu, rflags);
++}
++
++void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
++{
++	__kvm_set_rflags(vcpu, rflags);
++	kvm_make_request(KVM_REQ_EVENT, vcpu);
++}
++EXPORT_SYMBOL_GPL(kvm_set_rflags);
++
++void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
++{
++	int r;
++
++	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
++	      work->wakeup_all)
++		return;
++
++	r = kvm_mmu_reload(vcpu);
++	if (unlikely(r))
++		return;
++
++	if (!vcpu->arch.mmu.direct_map &&
++	      work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
++		return;
++
++	vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
++}
++
++static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
++{
++	return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
++}
++
++static inline u32 kvm_async_pf_next_probe(u32 key)
++{
++	return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
++}
++
++static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
++{
++	u32 key = kvm_async_pf_hash_fn(gfn);
++
++	while (vcpu->arch.apf.gfns[key] != ~0)
++		key = kvm_async_pf_next_probe(key);
++
++	vcpu->arch.apf.gfns[key] = gfn;
++}
++
++static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
++{
++	int i;
++	u32 key = kvm_async_pf_hash_fn(gfn);
++
++	for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
++		     (vcpu->arch.apf.gfns[key] != gfn &&
++		      vcpu->arch.apf.gfns[key] != ~0); i++)
++		key = kvm_async_pf_next_probe(key);
++
++	return key;
++}
++
++bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
++{
++	return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
++}
++
++static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
++{
++	u32 i, j, k;
++
++	i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
++	while (true) {
++		vcpu->arch.apf.gfns[i] = ~0;
++		do {
++			j = kvm_async_pf_next_probe(j);
++			if (vcpu->arch.apf.gfns[j] == ~0)
++				return;
++			k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
++			/*
++			 * k lies cyclically in ]i,j]
++			 * |    i.k.j |
++			 * |....j i.k.| or  |.k..j i...|
++			 */
++		} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
++		vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
++		i = j;
++	}
++}
++
++static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
++{
++
++	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
++				      sizeof(val));
++}
++
++static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
++{
++
++	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
++				      sizeof(u32));
++}
++
++void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
++				     struct kvm_async_pf *work)
++{
++	struct x86_exception fault;
++
++	trace_kvm_async_pf_not_present(work->arch.token, work->gva);
++	kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
++
++	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
++	    (vcpu->arch.apf.send_user_only &&
++	     kvm_x86_ops->get_cpl(vcpu) == 0))
++		kvm_make_request(KVM_REQ_APF_HALT, vcpu);
++	else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
++		fault.vector = PF_VECTOR;
++		fault.error_code_valid = true;
++		fault.error_code = 0;
++		fault.nested_page_fault = false;
++		fault.address = work->arch.token;
++		fault.async_page_fault = true;
++		kvm_inject_page_fault(vcpu, &fault);
++	}
++}
++
++void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
++				 struct kvm_async_pf *work)
++{
++	struct x86_exception fault;
++	u32 val;
++
++	if (work->wakeup_all)
++		work->arch.token = ~0; /* broadcast wakeup */
++	else
++		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
++	trace_kvm_async_pf_ready(work->arch.token, work->gva);
++
++	if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
++	    !apf_get_user(vcpu, &val)) {
++		if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
++		    vcpu->arch.exception.pending &&
++		    vcpu->arch.exception.nr == PF_VECTOR &&
++		    !apf_put_user(vcpu, 0)) {
++			vcpu->arch.exception.injected = false;
++			vcpu->arch.exception.pending = false;
++			vcpu->arch.exception.nr = 0;
++			vcpu->arch.exception.has_error_code = false;
++			vcpu->arch.exception.error_code = 0;
++		} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
++			fault.vector = PF_VECTOR;
++			fault.error_code_valid = true;
++			fault.error_code = 0;
++			fault.nested_page_fault = false;
++			fault.address = work->arch.token;
++			fault.async_page_fault = true;
++			kvm_inject_page_fault(vcpu, &fault);
++		}
++	}
++	vcpu->arch.apf.halted = false;
++	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
++}
++
++bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
++{
++	if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
++		return true;
++	else
++		return kvm_can_do_async_pf(vcpu);
++}
++
++void kvm_arch_start_assignment(struct kvm *kvm)
++{
++	atomic_inc(&kvm->arch.assigned_device_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
++
++void kvm_arch_end_assignment(struct kvm *kvm)
++{
++	atomic_dec(&kvm->arch.assigned_device_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
++
++bool kvm_arch_has_assigned_device(struct kvm *kvm)
++{
++	return atomic_read(&kvm->arch.assigned_device_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
++
++void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
++{
++	atomic_inc(&kvm->arch.noncoherent_dma_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
++
++void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
++{
++	atomic_dec(&kvm->arch.noncoherent_dma_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
++
++bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
++{
++	return atomic_read(&kvm->arch.noncoherent_dma_count);
++}
++EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
++
++bool kvm_arch_has_irq_bypass(void)
++{
++	return kvm_x86_ops->update_pi_irte != NULL;
++}
++
++int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
++				      struct irq_bypass_producer *prod)
++{
++	struct kvm_kernel_irqfd *irqfd =
++		container_of(cons, struct kvm_kernel_irqfd, consumer);
++
++	irqfd->producer = prod;
++
++	return kvm_x86_ops->update_pi_irte(irqfd->kvm,
++					   prod->irq, irqfd->gsi, 1);
++}
++
++void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
++				      struct irq_bypass_producer *prod)
++{
++	int ret;
++	struct kvm_kernel_irqfd *irqfd =
++		container_of(cons, struct kvm_kernel_irqfd, consumer);
++
++	WARN_ON(irqfd->producer != prod);
++	irqfd->producer = NULL;
++
++	/*
++	 * When producer of consumer is unregistered, we change back to
++	 * remapped mode, so we can re-use the current implementation
++	 * when the irq is masked/disabled or the consumer side (KVM
++	 * int this case doesn't want to receive the interrupts.
++	*/
++	ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
++	if (ret)
++		printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
++		       " fails: %d\n", irqfd->consumer.token, ret);
++}
++
++int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
++				   uint32_t guest_irq, bool set)
++{
++	if (!kvm_x86_ops->update_pi_irte)
++		return -EINVAL;
++
++	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
++}
++
++bool kvm_vector_hashing_enabled(void)
++{
++	return vector_hashing;
++}
++EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
++
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
++EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
+diff -uprN kernel/arch/x86/lib/mmx_32.c kernel_new/arch/x86/lib/mmx_32.c
+--- kernel/arch/x86/lib/mmx_32.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/lib/mmx_32.c	2021-04-01 18:28:07.659863283 +0800
+@@ -31,7 +31,7 @@ void *_mmx_memcpy(void *to, const void *
+ 	void *p;
+ 	int i;
+ 
+-	if (unlikely(in_interrupt()))
++	if (unlikely(!ipipe_root_p || in_interrupt()))
+ 		return __memcpy(to, from, len);
+ 
+ 	p = to;
+diff -uprN kernel/arch/x86/lib/usercopy.c kernel_new/arch/x86/lib/usercopy.c
+--- kernel/arch/x86/lib/usercopy.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/lib/usercopy.c	2021-04-01 18:28:07.659863283 +0800
+@@ -5,6 +5,7 @@
+  */
+ 
+ #include <linux/uaccess.h>
++#include <linux/ipipe.h>
+ #include <linux/export.h>
+ 
+ #include <asm/tlbflush.h>
+@@ -18,7 +19,7 @@ copy_from_user_nmi(void *to, const void
+ {
+ 	unsigned long ret;
+ 
+-	if (__range_not_ok(from, n, TASK_SIZE))
++	if (!ipipe_root_p || __range_not_ok(from, n, TASK_SIZE))
+ 		return n;
+ 
+ 	if (!nmi_uaccess_okay())
+diff -uprN kernel/arch/x86/mm/fault.c kernel_new/arch/x86/mm/fault.c
+--- kernel/arch/x86/mm/fault.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/mm/fault.c	2021-04-01 18:28:07.659863283 +0800
+@@ -1235,6 +1235,12 @@ __do_page_fault(struct pt_regs *regs, un
+ 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ 	u32 pkey;
+ 
++#ifdef CONFIG_IPIPE
++	if (ipipe_root_domain != ipipe_head_domain) {
++		trace_hardirqs_on();
++		hard_local_irq_enable();
++	}
++#endif
+ 	tsk = current;
+ 	mm = tsk->mm;
+ 
+@@ -1488,3 +1494,50 @@ do_page_fault(struct pt_regs *regs, unsi
+ 	exception_exit(prev_state);
+ }
+ NOKPROBE_SYMBOL(do_page_fault);
++
++#ifdef CONFIG_IPIPE
++
++void __ipipe_pin_mapping_globally(unsigned long start, unsigned long end)
++{
++#ifdef CONFIG_X86_32
++	unsigned long next, addr = start;
++
++	do {
++		unsigned long flags;
++		struct page *page;
++
++		next = pgd_addr_end(addr, end);
++		spin_lock_irqsave(&pgd_lock, flags);
++		list_for_each_entry(page, &pgd_list, lru)
++			vmalloc_sync_one(page_address(page), addr);
++		spin_unlock_irqrestore(&pgd_lock, flags);
++
++	} while (addr = next, addr != end);
++#else
++	unsigned long next, addr = start;
++	pgd_t *pgd, *pgd_ref;
++	struct page *page;
++
++	if (!(start >= VMALLOC_START && start < VMALLOC_END))
++		return;
++
++	do {
++		next = pgd_addr_end(addr, end);
++		pgd_ref = pgd_offset_k(addr);
++		if (pgd_none(*pgd_ref))
++			continue;
++		spin_lock(&pgd_lock);
++		list_for_each_entry(page, &pgd_list, lru) {
++			pgd = page_address(page) + pgd_index(addr);
++			if (pgd_none(*pgd))
++				set_pgd(pgd, *pgd_ref);
++		}
++		spin_unlock(&pgd_lock);
++		addr = next;
++	} while (addr != end);
++
++	arch_flush_lazy_mmu_mode();
++#endif
++}
++
++#endif /* CONFIG_IPIPE */
+diff -uprN kernel/arch/x86/mm/fault.c.orig kernel_new/arch/x86/mm/fault.c.orig
+--- kernel/arch/x86/mm/fault.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/arch/x86/mm/fault.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,1490 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ *  Copyright (C) 1995  Linus Torvalds
++ *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
++ *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
++ */
++#include <linux/sched.h>		/* test_thread_flag(), ...	*/
++#include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
++#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
++#include <linux/extable.h>		/* search_exception_tables	*/
++#include <linux/bootmem.h>		/* max_low_pfn			*/
++#include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
++#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
++#include <linux/perf_event.h>		/* perf_sw_event		*/
++#include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
++#include <linux/prefetch.h>		/* prefetchw			*/
++#include <linux/context_tracking.h>	/* exception_enter(), ...	*/
++#include <linux/uaccess.h>		/* faulthandler_disabled()	*/
++#include <linux/mm_types.h>
++
++#include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
++#include <asm/traps.h>			/* dotraplinkage, ...		*/
++#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
++#include <asm/fixmap.h>			/* VSYSCALL_ADDR		*/
++#include <asm/vsyscall.h>		/* emulate_vsyscall		*/
++#include <asm/vm86.h>			/* struct vm86			*/
++#include <asm/mmu_context.h>		/* vma_pkey()			*/
++
++#define CREATE_TRACE_POINTS
++#include <asm/trace/exceptions.h>
++
++/*
++ * Returns 0 if mmiotrace is disabled, or if the fault is not
++ * handled by mmiotrace:
++ */
++static nokprobe_inline int
++kmmio_fault(struct pt_regs *regs, unsigned long addr)
++{
++	if (unlikely(is_kmmio_active()))
++		if (kmmio_handler(regs, addr) == 1)
++			return -1;
++	return 0;
++}
++
++static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
++{
++	int ret = 0;
++
++	/* kprobe_running() needs smp_processor_id() */
++	if (kprobes_built_in() && !user_mode(regs)) {
++		preempt_disable();
++		if (kprobe_running() && kprobe_fault_handler(regs, 14))
++			ret = 1;
++		preempt_enable();
++	}
++
++	return ret;
++}
++
++/*
++ * Prefetch quirks:
++ *
++ * 32-bit mode:
++ *
++ *   Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
++ *   Check that here and ignore it.
++ *
++ * 64-bit mode:
++ *
++ *   Sometimes the CPU reports invalid exceptions on prefetch.
++ *   Check that here and ignore it.
++ *
++ * Opcode checker based on code by Richard Brunner.
++ */
++static inline int
++check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
++		      unsigned char opcode, int *prefetch)
++{
++	unsigned char instr_hi = opcode & 0xf0;
++	unsigned char instr_lo = opcode & 0x0f;
++
++	switch (instr_hi) {
++	case 0x20:
++	case 0x30:
++		/*
++		 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
++		 * In X86_64 long mode, the CPU will signal invalid
++		 * opcode if some of these prefixes are present so
++		 * X86_64 will never get here anyway
++		 */
++		return ((instr_lo & 7) == 0x6);
++#ifdef CONFIG_X86_64
++	case 0x40:
++		/*
++		 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
++		 * Need to figure out under what instruction mode the
++		 * instruction was issued. Could check the LDT for lm,
++		 * but for now it's good enough to assume that long
++		 * mode only uses well known segments or kernel.
++		 */
++		return (!user_mode(regs) || user_64bit_mode(regs));
++#endif
++	case 0x60:
++		/* 0x64 thru 0x67 are valid prefixes in all modes. */
++		return (instr_lo & 0xC) == 0x4;
++	case 0xF0:
++		/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
++		return !instr_lo || (instr_lo>>1) == 1;
++	case 0x00:
++		/* Prefetch instruction is 0x0F0D or 0x0F18 */
++		if (probe_kernel_address(instr, opcode))
++			return 0;
++
++		*prefetch = (instr_lo == 0xF) &&
++			(opcode == 0x0D || opcode == 0x18);
++		return 0;
++	default:
++		return 0;
++	}
++}
++
++static int
++is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
++{
++	unsigned char *max_instr;
++	unsigned char *instr;
++	int prefetch = 0;
++
++	/*
++	 * If it was a exec (instruction fetch) fault on NX page, then
++	 * do not ignore the fault:
++	 */
++	if (error_code & X86_PF_INSTR)
++		return 0;
++
++	instr = (void *)convert_ip_to_linear(current, regs);
++	max_instr = instr + 15;
++
++	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
++		return 0;
++
++	while (instr < max_instr) {
++		unsigned char opcode;
++
++		if (probe_kernel_address(instr, opcode))
++			break;
++
++		instr++;
++
++		if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
++			break;
++	}
++	return prefetch;
++}
++
++/*
++ * A protection key fault means that the PKRU value did not allow
++ * access to some PTE.  Userspace can figure out what PKRU was
++ * from the XSAVE state, and this function fills out a field in
++ * siginfo so userspace can discover which protection key was set
++ * on the PTE.
++ *
++ * If we get here, we know that the hardware signaled a X86_PF_PK
++ * fault and that there was a VMA once we got in the fault
++ * handler.  It does *not* guarantee that the VMA we find here
++ * was the one that we faulted on.
++ *
++ * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
++ * 2. T1   : set PKRU to deny access to pkey=4, touches page
++ * 3. T1   : faults...
++ * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
++ * 5. T1   : enters fault handler, takes mmap_sem, etc...
++ * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
++ *	     faulted on a pte with its pkey=4.
++ */
++static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
++		u32 *pkey)
++{
++	/* This is effectively an #ifdef */
++	if (!boot_cpu_has(X86_FEATURE_OSPKE))
++		return;
++
++	/* Fault not from Protection Keys: nothing to do */
++	if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
++		return;
++	/*
++	 * force_sig_info_fault() is called from a number of
++	 * contexts, some of which have a VMA and some of which
++	 * do not.  The X86_PF_PK handing happens after we have a
++	 * valid VMA, so we should never reach this without a
++	 * valid VMA.
++	 */
++	if (!pkey) {
++		WARN_ONCE(1, "PKU fault with no VMA passed in");
++		info->si_pkey = 0;
++		return;
++	}
++	/*
++	 * si_pkey should be thought of as a strong hint, but not
++	 * absolutely guranteed to be 100% accurate because of
++	 * the race explained above.
++	 */
++	info->si_pkey = *pkey;
++}
++
++static void
++force_sig_info_fault(int si_signo, int si_code, unsigned long address,
++		     struct task_struct *tsk, u32 *pkey, int fault)
++{
++	unsigned lsb = 0;
++	siginfo_t info;
++
++	clear_siginfo(&info);
++	info.si_signo	= si_signo;
++	info.si_errno	= 0;
++	info.si_code	= si_code;
++	info.si_addr	= (void __user *)address;
++	if (fault & VM_FAULT_HWPOISON_LARGE)
++		lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); 
++	if (fault & VM_FAULT_HWPOISON)
++		lsb = PAGE_SHIFT;
++	info.si_addr_lsb = lsb;
++
++	fill_sig_info_pkey(si_signo, si_code, &info, pkey);
++
++	force_sig_info(si_signo, &info, tsk);
++}
++
++DEFINE_SPINLOCK(pgd_lock);
++LIST_HEAD(pgd_list);
++
++#ifdef CONFIG_X86_32
++static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
++{
++	unsigned index = pgd_index(address);
++	pgd_t *pgd_k;
++	p4d_t *p4d, *p4d_k;
++	pud_t *pud, *pud_k;
++	pmd_t *pmd, *pmd_k;
++
++	pgd += index;
++	pgd_k = init_mm.pgd + index;
++
++	if (!pgd_present(*pgd_k))
++		return NULL;
++
++	/*
++	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
++	 * and redundant with the set_pmd() on non-PAE. As would
++	 * set_p4d/set_pud.
++	 */
++	p4d = p4d_offset(pgd, address);
++	p4d_k = p4d_offset(pgd_k, address);
++	if (!p4d_present(*p4d_k))
++		return NULL;
++
++	pud = pud_offset(p4d, address);
++	pud_k = pud_offset(p4d_k, address);
++	if (!pud_present(*pud_k))
++		return NULL;
++
++	pmd = pmd_offset(pud, address);
++	pmd_k = pmd_offset(pud_k, address);
++
++	if (pmd_present(*pmd) != pmd_present(*pmd_k))
++		set_pmd(pmd, *pmd_k);
++
++	if (!pmd_present(*pmd_k))
++		return NULL;
++	else
++		BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
++
++	return pmd_k;
++}
++
++static void vmalloc_sync(void)
++{
++	unsigned long address;
++
++	if (SHARED_KERNEL_PMD)
++		return;
++
++	for (address = VMALLOC_START & PMD_MASK;
++	     address >= TASK_SIZE_MAX && address < VMALLOC_END;
++	     address += PMD_SIZE) {
++		struct page *page;
++
++		spin_lock(&pgd_lock);
++		list_for_each_entry(page, &pgd_list, lru) {
++			spinlock_t *pgt_lock;
++
++			/* the pgt_lock only for Xen */
++			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
++
++			spin_lock(pgt_lock);
++			vmalloc_sync_one(page_address(page), address);
++			spin_unlock(pgt_lock);
++		}
++		spin_unlock(&pgd_lock);
++	}
++}
++
++void vmalloc_sync_mappings(void)
++{
++	vmalloc_sync();
++}
++
++void vmalloc_sync_unmappings(void)
++{
++	vmalloc_sync();
++}
++
++/*
++ * 32-bit:
++ *
++ *   Handle a fault on the vmalloc or module mapping area
++ */
++static noinline int vmalloc_fault(unsigned long address)
++{
++	unsigned long pgd_paddr;
++	pmd_t *pmd_k;
++	pte_t *pte_k;
++
++	/* Make sure we are in vmalloc area: */
++	if (!(address >= VMALLOC_START && address < VMALLOC_END))
++		return -1;
++
++	/*
++	 * Synchronize this task's top level page-table
++	 * with the 'reference' page table.
++	 *
++	 * Do _not_ use "current" here. We might be inside
++	 * an interrupt in the middle of a task switch..
++	 */
++	pgd_paddr = read_cr3_pa();
++	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
++	if (!pmd_k)
++		return -1;
++
++	if (pmd_large(*pmd_k))
++		return 0;
++
++	pte_k = pte_offset_kernel(pmd_k, address);
++	if (!pte_present(*pte_k))
++		return -1;
++
++	return 0;
++}
++NOKPROBE_SYMBOL(vmalloc_fault);
++
++/*
++ * Did it hit the DOS screen memory VA from vm86 mode?
++ */
++static inline void
++check_v8086_mode(struct pt_regs *regs, unsigned long address,
++		 struct task_struct *tsk)
++{
++#ifdef CONFIG_VM86
++	unsigned long bit;
++
++	if (!v8086_mode(regs) || !tsk->thread.vm86)
++		return;
++
++	bit = (address - 0xA0000) >> PAGE_SHIFT;
++	if (bit < 32)
++		tsk->thread.vm86->screen_bitmap |= 1 << bit;
++#endif
++}
++
++static bool low_pfn(unsigned long pfn)
++{
++	return pfn < max_low_pfn;
++}
++
++static void dump_pagetable(unsigned long address)
++{
++	pgd_t *base = __va(read_cr3_pa());
++	pgd_t *pgd = &base[pgd_index(address)];
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++#ifdef CONFIG_X86_PAE
++	pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
++	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
++		goto out;
++#define pr_pde pr_cont
++#else
++#define pr_pde pr_info
++#endif
++	p4d = p4d_offset(pgd, address);
++	pud = pud_offset(p4d, address);
++	pmd = pmd_offset(pud, address);
++	pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
++#undef pr_pde
++
++	/*
++	 * We must not directly access the pte in the highpte
++	 * case if the page table is located in highmem.
++	 * And let's rather not kmap-atomic the pte, just in case
++	 * it's allocated already:
++	 */
++	if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
++		goto out;
++
++	pte = pte_offset_kernel(pmd, address);
++	pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
++out:
++	pr_cont("\n");
++}
++
++#else /* CONFIG_X86_64: */
++
++void vmalloc_sync_mappings(void)
++{
++	/*
++	 * 64-bit mappings might allocate new p4d/pud pages
++	 * that need to be propagated to all tasks' PGDs.
++	 */
++	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
++}
++
++void vmalloc_sync_unmappings(void)
++{
++	/*
++	 * Unmappings never allocate or free p4d/pud pages.
++	 * No work is required here.
++	 */
++}
++
++/*
++ * 64-bit:
++ *
++ *   Handle a fault on the vmalloc area
++ */
++static noinline int vmalloc_fault(unsigned long address)
++{
++	pgd_t *pgd, *pgd_k;
++	p4d_t *p4d, *p4d_k;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	/* Make sure we are in vmalloc area: */
++	if (!(address >= VMALLOC_START && address < VMALLOC_END))
++		return -1;
++
++	/*
++	 * Copy kernel mappings over when needed. This can also
++	 * happen within a race in page table update. In the later
++	 * case just flush:
++	 */
++	pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
++	pgd_k = pgd_offset_k(address);
++	if (pgd_none(*pgd_k))
++		return -1;
++
++	if (pgtable_l5_enabled()) {
++		if (pgd_none(*pgd)) {
++			set_pgd(pgd, *pgd_k);
++			arch_flush_lazy_mmu_mode();
++		} else {
++			BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
++		}
++	}
++
++	/* With 4-level paging, copying happens on the p4d level. */
++	p4d = p4d_offset(pgd, address);
++	p4d_k = p4d_offset(pgd_k, address);
++	if (p4d_none(*p4d_k))
++		return -1;
++
++	if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
++		set_p4d(p4d, *p4d_k);
++		arch_flush_lazy_mmu_mode();
++	} else {
++		BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
++	}
++
++	BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
++
++	pud = pud_offset(p4d, address);
++	if (pud_none(*pud))
++		return -1;
++
++	if (pud_large(*pud))
++		return 0;
++
++	pmd = pmd_offset(pud, address);
++	if (pmd_none(*pmd))
++		return -1;
++
++	if (pmd_large(*pmd))
++		return 0;
++
++	pte = pte_offset_kernel(pmd, address);
++	if (!pte_present(*pte))
++		return -1;
++
++	return 0;
++}
++NOKPROBE_SYMBOL(vmalloc_fault);
++
++#ifdef CONFIG_CPU_SUP_AMD
++static const char errata93_warning[] =
++KERN_ERR 
++"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
++"******* Working around it, but it may cause SEGVs or burn power.\n"
++"******* Please consider a BIOS update.\n"
++"******* Disabling USB legacy in the BIOS may also help.\n";
++#endif
++
++/*
++ * No vm86 mode in 64-bit mode:
++ */
++static inline void
++check_v8086_mode(struct pt_regs *regs, unsigned long address,
++		 struct task_struct *tsk)
++{
++}
++
++static int bad_address(void *p)
++{
++	unsigned long dummy;
++
++	return probe_kernel_address((unsigned long *)p, dummy);
++}
++
++static void dump_pagetable(unsigned long address)
++{
++	pgd_t *base = __va(read_cr3_pa());
++	pgd_t *pgd = base + pgd_index(address);
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	if (bad_address(pgd))
++		goto bad;
++
++	pr_info("PGD %lx ", pgd_val(*pgd));
++
++	if (!pgd_present(*pgd))
++		goto out;
++
++	p4d = p4d_offset(pgd, address);
++	if (bad_address(p4d))
++		goto bad;
++
++	pr_cont("P4D %lx ", p4d_val(*p4d));
++	if (!p4d_present(*p4d) || p4d_large(*p4d))
++		goto out;
++
++	pud = pud_offset(p4d, address);
++	if (bad_address(pud))
++		goto bad;
++
++	pr_cont("PUD %lx ", pud_val(*pud));
++	if (!pud_present(*pud) || pud_large(*pud))
++		goto out;
++
++	pmd = pmd_offset(pud, address);
++	if (bad_address(pmd))
++		goto bad;
++
++	pr_cont("PMD %lx ", pmd_val(*pmd));
++	if (!pmd_present(*pmd) || pmd_large(*pmd))
++		goto out;
++
++	pte = pte_offset_kernel(pmd, address);
++	if (bad_address(pte))
++		goto bad;
++
++	pr_cont("PTE %lx", pte_val(*pte));
++out:
++	pr_cont("\n");
++	return;
++bad:
++	pr_info("BAD\n");
++}
++
++#endif /* CONFIG_X86_64 */
++
++/*
++ * Workaround for K8 erratum #93 & buggy BIOS.
++ *
++ * BIOS SMM functions are required to use a specific workaround
++ * to avoid corruption of the 64bit RIP register on C stepping K8.
++ *
++ * A lot of BIOS that didn't get tested properly miss this.
++ *
++ * The OS sees this as a page fault with the upper 32bits of RIP cleared.
++ * Try to work around it here.
++ *
++ * Note we only handle faults in kernel here.
++ * Does nothing on 32-bit.
++ */
++static int is_errata93(struct pt_regs *regs, unsigned long address)
++{
++#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD)
++	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
++	    || boot_cpu_data.x86 != 0xf)
++		return 0;
++
++	if (address != regs->ip)
++		return 0;
++
++	if ((address >> 32) != 0)
++		return 0;
++
++	address |= 0xffffffffUL << 32;
++	if ((address >= (u64)_stext && address <= (u64)_etext) ||
++	    (address >= MODULES_VADDR && address <= MODULES_END)) {
++		printk_once(errata93_warning);
++		regs->ip = address;
++		return 1;
++	}
++#endif
++	return 0;
++}
++
++/*
++ * Work around K8 erratum #100 K8 in compat mode occasionally jumps
++ * to illegal addresses >4GB.
++ *
++ * We catch this in the page fault handler because these addresses
++ * are not reachable. Just detect this case and return.  Any code
++ * segment in LDT is compatibility mode.
++ */
++static int is_errata100(struct pt_regs *regs, unsigned long address)
++{
++#ifdef CONFIG_X86_64
++	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
++		return 1;
++#endif
++	return 0;
++}
++
++static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
++{
++#ifdef CONFIG_X86_F00F_BUG
++	unsigned long nr;
++
++	/*
++	 * Pentium F0 0F C7 C8 bug workaround:
++	 */
++	if (boot_cpu_has_bug(X86_BUG_F00F)) {
++		nr = (address - idt_descr.address) >> 3;
++
++		if (nr == 6) {
++			do_invalid_op(regs, 0);
++			return 1;
++		}
++	}
++#endif
++	return 0;
++}
++
++static void
++show_fault_oops(struct pt_regs *regs, unsigned long error_code,
++		unsigned long address)
++{
++	if (!oops_may_print())
++		return;
++
++	if (error_code & X86_PF_INSTR) {
++		unsigned int level;
++		pgd_t *pgd;
++		pte_t *pte;
++
++		pgd = __va(read_cr3_pa());
++		pgd += pgd_index(address);
++
++		pte = lookup_address_in_pgd(pgd, address, &level);
++
++		if (pte && pte_present(*pte) && !pte_exec(*pte))
++			pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
++				from_kuid(&init_user_ns, current_uid()));
++		if (pte && pte_present(*pte) && pte_exec(*pte) &&
++				(pgd_flags(*pgd) & _PAGE_USER) &&
++				(__read_cr4() & X86_CR4_SMEP))
++			pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
++				from_kuid(&init_user_ns, current_uid()));
++	}
++
++	pr_alert("BUG: unable to handle kernel %s at %px\n",
++		 address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
++		 (void *)address);
++
++	dump_pagetable(address);
++}
++
++static noinline void
++pgtable_bad(struct pt_regs *regs, unsigned long error_code,
++	    unsigned long address)
++{
++	struct task_struct *tsk;
++	unsigned long flags;
++	int sig;
++
++	flags = oops_begin();
++	tsk = current;
++	sig = SIGKILL;
++
++	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
++	       tsk->comm, address);
++	dump_pagetable(address);
++
++	tsk->thread.cr2		= address;
++	tsk->thread.trap_nr	= X86_TRAP_PF;
++	tsk->thread.error_code	= error_code;
++
++	if (__die("Bad pagetable", regs, error_code))
++		sig = 0;
++
++	oops_end(flags, regs, sig);
++}
++
++static noinline void
++no_context(struct pt_regs *regs, unsigned long error_code,
++	   unsigned long address, int signal, int si_code)
++{
++	struct task_struct *tsk = current;
++	unsigned long flags;
++	int sig;
++
++	/* Are we prepared to handle this kernel fault? */
++	if (fixup_exception(regs, X86_TRAP_PF)) {
++		/*
++		 * Any interrupt that takes a fault gets the fixup. This makes
++		 * the below recursive fault logic only apply to a faults from
++		 * task context.
++		 */
++		if (in_interrupt())
++			return;
++
++		/*
++		 * Per the above we're !in_interrupt(), aka. task context.
++		 *
++		 * In this case we need to make sure we're not recursively
++		 * faulting through the emulate_vsyscall() logic.
++		 */
++		if (current->thread.sig_on_uaccess_err && signal) {
++			tsk->thread.trap_nr = X86_TRAP_PF;
++			tsk->thread.error_code = error_code | X86_PF_USER;
++			tsk->thread.cr2 = address;
++
++			/* XXX: hwpoison faults will set the wrong code. */
++			force_sig_info_fault(signal, si_code, address,
++					     tsk, NULL, 0);
++		}
++
++		/*
++		 * Barring that, we can do the fixup and be happy.
++		 */
++		return;
++	}
++
++#ifdef CONFIG_VMAP_STACK
++	/*
++	 * Stack overflow?  During boot, we can fault near the initial
++	 * stack in the direct map, but that's not an overflow -- check
++	 * that we're in vmalloc space to avoid this.
++	 */
++	if (is_vmalloc_addr((void *)address) &&
++	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
++	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
++		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
++		/*
++		 * We're likely to be running with very little stack space
++		 * left.  It's plausible that we'd hit this condition but
++		 * double-fault even before we get this far, in which case
++		 * we're fine: the double-fault handler will deal with it.
++		 *
++		 * We don't want to make it all the way into the oops code
++		 * and then double-fault, though, because we're likely to
++		 * break the console driver and lose most of the stack dump.
++		 */
++		asm volatile ("movq %[stack], %%rsp\n\t"
++			      "call handle_stack_overflow\n\t"
++			      "1: jmp 1b"
++			      : ASM_CALL_CONSTRAINT
++			      : "D" ("kernel stack overflow (page fault)"),
++				"S" (regs), "d" (address),
++				[stack] "rm" (stack));
++		unreachable();
++	}
++#endif
++
++	/*
++	 * 32-bit:
++	 *
++	 *   Valid to do another page fault here, because if this fault
++	 *   had been triggered by is_prefetch fixup_exception would have
++	 *   handled it.
++	 *
++	 * 64-bit:
++	 *
++	 *   Hall of shame of CPU/BIOS bugs.
++	 */
++	if (is_prefetch(regs, error_code, address))
++		return;
++
++	if (is_errata93(regs, address))
++		return;
++
++	/*
++	 * Oops. The kernel tried to access some bad page. We'll have to
++	 * terminate things with extreme prejudice:
++	 */
++	flags = oops_begin();
++
++	show_fault_oops(regs, error_code, address);
++
++	if (task_stack_end_corrupted(tsk))
++		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
++
++	tsk->thread.cr2		= address;
++	tsk->thread.trap_nr	= X86_TRAP_PF;
++	tsk->thread.error_code	= error_code;
++
++	sig = SIGKILL;
++	if (__die("Oops", regs, error_code))
++		sig = 0;
++
++	/* Executive summary in case the body of the oops scrolled away */
++	printk(KERN_DEFAULT "CR2: %016lx\n", address);
++
++	oops_end(flags, regs, sig);
++}
++
++/*
++ * Print out info about fatal segfaults, if the show_unhandled_signals
++ * sysctl is set:
++ */
++static inline void
++show_signal_msg(struct pt_regs *regs, unsigned long error_code,
++		unsigned long address, struct task_struct *tsk)
++{
++	const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
++
++	if (!unhandled_signal(tsk, SIGSEGV))
++		return;
++
++	if (!printk_ratelimit())
++		return;
++
++	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
++		loglvl, tsk->comm, task_pid_nr(tsk), address,
++		(void *)regs->ip, (void *)regs->sp, error_code);
++
++	print_vma_addr(KERN_CONT " in ", regs->ip);
++
++	printk(KERN_CONT "\n");
++
++	show_opcodes(regs, loglvl);
++}
++
++static void
++__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
++		       unsigned long address, u32 *pkey, int si_code)
++{
++	struct task_struct *tsk = current;
++
++	/* User mode accesses just cause a SIGSEGV */
++	if (error_code & X86_PF_USER) {
++		/*
++		 * It's possible to have interrupts off here:
++		 */
++		local_irq_enable();
++
++		/*
++		 * Valid to do another page fault here because this one came
++		 * from user space:
++		 */
++		if (is_prefetch(regs, error_code, address))
++			return;
++
++		if (is_errata100(regs, address))
++			return;
++
++#ifdef CONFIG_X86_64
++		/*
++		 * Instruction fetch faults in the vsyscall page might need
++		 * emulation.
++		 */
++		if (unlikely((error_code & X86_PF_INSTR) &&
++			     ((address & ~0xfff) == VSYSCALL_ADDR))) {
++			if (emulate_vsyscall(regs, address))
++				return;
++		}
++#endif
++
++		/*
++		 * To avoid leaking information about the kernel page table
++		 * layout, pretend that user-mode accesses to kernel addresses
++		 * are always protection faults.
++		 */
++		if (address >= TASK_SIZE_MAX)
++			error_code |= X86_PF_PROT;
++
++		if (likely(show_unhandled_signals))
++			show_signal_msg(regs, error_code, address, tsk);
++
++		tsk->thread.cr2		= address;
++		tsk->thread.error_code	= error_code;
++		tsk->thread.trap_nr	= X86_TRAP_PF;
++
++		force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
++
++		return;
++	}
++
++	if (is_f00f_bug(regs, address))
++		return;
++
++	no_context(regs, error_code, address, SIGSEGV, si_code);
++}
++
++static noinline void
++bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
++		     unsigned long address, u32 *pkey)
++{
++	__bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
++}
++
++static void
++__bad_area(struct pt_regs *regs, unsigned long error_code,
++	   unsigned long address,  struct vm_area_struct *vma, int si_code)
++{
++	struct mm_struct *mm = current->mm;
++	u32 pkey;
++
++	if (vma)
++		pkey = vma_pkey(vma);
++
++	/*
++	 * Something tried to access memory that isn't in our memory map..
++	 * Fix it, but check if it's kernel or user first..
++	 */
++	up_read(&mm->mmap_sem);
++
++	__bad_area_nosemaphore(regs, error_code, address,
++			       (vma) ? &pkey : NULL, si_code);
++}
++
++static noinline void
++bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
++{
++	__bad_area(regs, error_code, address, NULL, SEGV_MAPERR);
++}
++
++static inline bool bad_area_access_from_pkeys(unsigned long error_code,
++		struct vm_area_struct *vma)
++{
++	/* This code is always called on the current mm */
++	bool foreign = false;
++
++	if (!boot_cpu_has(X86_FEATURE_OSPKE))
++		return false;
++	if (error_code & X86_PF_PK)
++		return true;
++	/* this checks permission keys on the VMA: */
++	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
++				       (error_code & X86_PF_INSTR), foreign))
++		return true;
++	return false;
++}
++
++static noinline void
++bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
++		      unsigned long address, struct vm_area_struct *vma)
++{
++	/*
++	 * This OSPKE check is not strictly necessary at runtime.
++	 * But, doing it this way allows compiler optimizations
++	 * if pkeys are compiled out.
++	 */
++	if (bad_area_access_from_pkeys(error_code, vma))
++		__bad_area(regs, error_code, address, vma, SEGV_PKUERR);
++	else
++		__bad_area(regs, error_code, address, vma, SEGV_ACCERR);
++}
++
++static void
++do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
++	  u32 *pkey, unsigned int fault)
++{
++	struct task_struct *tsk = current;
++	int code = BUS_ADRERR;
++
++	/* Kernel mode? Handle exceptions or die: */
++	if (!(error_code & X86_PF_USER)) {
++		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
++		return;
++	}
++
++	/* User-space => ok to do another page fault: */
++	if (is_prefetch(regs, error_code, address))
++		return;
++
++	tsk->thread.cr2		= address;
++	tsk->thread.error_code	= error_code;
++	tsk->thread.trap_nr	= X86_TRAP_PF;
++
++#ifdef CONFIG_MEMORY_FAILURE
++	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
++		printk(KERN_ERR
++	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
++			tsk->comm, tsk->pid, address);
++		code = BUS_MCEERR_AR;
++	}
++#endif
++	force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
++}
++
++static noinline void
++mm_fault_error(struct pt_regs *regs, unsigned long error_code,
++	       unsigned long address, u32 *pkey, vm_fault_t fault)
++{
++	if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
++		no_context(regs, error_code, address, 0, 0);
++		return;
++	}
++
++	if (fault & VM_FAULT_OOM) {
++		/* Kernel mode? Handle exceptions or die: */
++		if (!(error_code & X86_PF_USER)) {
++			no_context(regs, error_code, address,
++				   SIGSEGV, SEGV_MAPERR);
++			return;
++		}
++
++		/*
++		 * We ran out of memory, call the OOM killer, and return the
++		 * userspace (which will retry the fault, or kill us if we got
++		 * oom-killed):
++		 */
++		pagefault_out_of_memory();
++	} else {
++		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
++			     VM_FAULT_HWPOISON_LARGE))
++			do_sigbus(regs, error_code, address, pkey, fault);
++		else if (fault & VM_FAULT_SIGSEGV)
++			bad_area_nosemaphore(regs, error_code, address, pkey);
++		else
++			BUG();
++	}
++}
++
++static int spurious_fault_check(unsigned long error_code, pte_t *pte)
++{
++	if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
++		return 0;
++
++	if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
++		return 0;
++	/*
++	 * Note: We do not do lazy flushing on protection key
++	 * changes, so no spurious fault will ever set X86_PF_PK.
++	 */
++	if ((error_code & X86_PF_PK))
++		return 1;
++
++	return 1;
++}
++
++/*
++ * Handle a spurious fault caused by a stale TLB entry.
++ *
++ * This allows us to lazily refresh the TLB when increasing the
++ * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
++ * eagerly is very expensive since that implies doing a full
++ * cross-processor TLB flush, even if no stale TLB entries exist
++ * on other processors.
++ *
++ * Spurious faults may only occur if the TLB contains an entry with
++ * fewer permission than the page table entry.  Non-present (P = 0)
++ * and reserved bit (R = 1) faults are never spurious.
++ *
++ * There are no security implications to leaving a stale TLB when
++ * increasing the permissions on a page.
++ *
++ * Returns non-zero if a spurious fault was handled, zero otherwise.
++ *
++ * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
++ * (Optional Invalidation).
++ */
++static noinline int
++spurious_fault(unsigned long error_code, unsigned long address)
++{
++	pgd_t *pgd;
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++	int ret;
++
++	/*
++	 * Only writes to RO or instruction fetches from NX may cause
++	 * spurious faults.
++	 *
++	 * These could be from user or supervisor accesses but the TLB
++	 * is only lazily flushed after a kernel mapping protection
++	 * change, so user accesses are not expected to cause spurious
++	 * faults.
++	 */
++	if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
++	    error_code != (X86_PF_INSTR | X86_PF_PROT))
++		return 0;
++
++	pgd = init_mm.pgd + pgd_index(address);
++	if (!pgd_present(*pgd))
++		return 0;
++
++	p4d = p4d_offset(pgd, address);
++	if (!p4d_present(*p4d))
++		return 0;
++
++	if (p4d_large(*p4d))
++		return spurious_fault_check(error_code, (pte_t *) p4d);
++
++	pud = pud_offset(p4d, address);
++	if (!pud_present(*pud))
++		return 0;
++
++	if (pud_large(*pud))
++		return spurious_fault_check(error_code, (pte_t *) pud);
++
++	pmd = pmd_offset(pud, address);
++	if (!pmd_present(*pmd))
++		return 0;
++
++	if (pmd_large(*pmd))
++		return spurious_fault_check(error_code, (pte_t *) pmd);
++
++	pte = pte_offset_kernel(pmd, address);
++	if (!pte_present(*pte))
++		return 0;
++
++	ret = spurious_fault_check(error_code, pte);
++	if (!ret)
++		return 0;
++
++	/*
++	 * Make sure we have permissions in PMD.
++	 * If not, then there's a bug in the page tables:
++	 */
++	ret = spurious_fault_check(error_code, (pte_t *) pmd);
++	WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
++
++	return ret;
++}
++NOKPROBE_SYMBOL(spurious_fault);
++
++int show_unhandled_signals = 1;
++
++static inline int
++access_error(unsigned long error_code, struct vm_area_struct *vma)
++{
++	/* This is only called for the current mm, so: */
++	bool foreign = false;
++
++	/*
++	 * Read or write was blocked by protection keys.  This is
++	 * always an unconditional error and can never result in
++	 * a follow-up action to resolve the fault, like a COW.
++	 */
++	if (error_code & X86_PF_PK)
++		return 1;
++
++	/*
++	 * Make sure to check the VMA so that we do not perform
++	 * faults just to hit a X86_PF_PK as soon as we fill in a
++	 * page.
++	 */
++	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
++				       (error_code & X86_PF_INSTR), foreign))
++		return 1;
++
++	if (error_code & X86_PF_WRITE) {
++		/* write, present and write, not present: */
++		if (unlikely(!(vma->vm_flags & VM_WRITE)))
++			return 1;
++		return 0;
++	}
++
++	/* read, present: */
++	if (unlikely(error_code & X86_PF_PROT))
++		return 1;
++
++	/* read, not present: */
++	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
++		return 1;
++
++	return 0;
++}
++
++static int fault_in_kernel_space(unsigned long address)
++{
++	return address >= TASK_SIZE_MAX;
++}
++
++static inline bool smap_violation(int error_code, struct pt_regs *regs)
++{
++	if (!IS_ENABLED(CONFIG_X86_SMAP))
++		return false;
++
++	if (!static_cpu_has(X86_FEATURE_SMAP))
++		return false;
++
++	if (error_code & X86_PF_USER)
++		return false;
++
++	if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
++		return false;
++
++	return true;
++}
++
++/*
++ * This routine handles page faults.  It determines the address,
++ * and the problem, and then passes it off to one of the appropriate
++ * routines.
++ */
++static noinline void
++__do_page_fault(struct pt_regs *regs, unsigned long error_code,
++		unsigned long address)
++{
++	struct vm_area_struct *vma;
++	struct task_struct *tsk;
++	struct mm_struct *mm;
++	vm_fault_t fault, major = 0;
++	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
++	u32 pkey;
++
++	tsk = current;
++	mm = tsk->mm;
++
++	prefetchw(&mm->mmap_sem);
++
++	if (unlikely(kmmio_fault(regs, address)))
++		return;
++
++	/*
++	 * We fault-in kernel-space virtual memory on-demand. The
++	 * 'reference' page table is init_mm.pgd.
++	 *
++	 * NOTE! We MUST NOT take any locks for this case. We may
++	 * be in an interrupt or a critical region, and should
++	 * only copy the information from the master page table,
++	 * nothing more.
++	 *
++	 * This verifies that the fault happens in kernel space
++	 * (error_code & 4) == 0, and that the fault was not a
++	 * protection error (error_code & 9) == 0.
++	 */
++	if (unlikely(fault_in_kernel_space(address))) {
++		if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
++			if (vmalloc_fault(address) >= 0)
++				return;
++		}
++
++		/* Can handle a stale RO->RW TLB: */
++		if (spurious_fault(error_code, address))
++			return;
++
++		/* kprobes don't want to hook the spurious faults: */
++		if (kprobes_fault(regs))
++			return;
++		/*
++		 * Don't take the mm semaphore here. If we fixup a prefetch
++		 * fault we could otherwise deadlock:
++		 */
++		bad_area_nosemaphore(regs, error_code, address, NULL);
++
++		return;
++	}
++
++	/* kprobes don't want to hook the spurious faults: */
++	if (unlikely(kprobes_fault(regs)))
++		return;
++
++	if (unlikely(error_code & X86_PF_RSVD))
++		pgtable_bad(regs, error_code, address);
++
++	if (unlikely(smap_violation(error_code, regs))) {
++		bad_area_nosemaphore(regs, error_code, address, NULL);
++		return;
++	}
++
++	/*
++	 * If we're in an interrupt, have no user context or are running
++	 * in a region with pagefaults disabled then we must not take the fault
++	 */
++	if (unlikely(faulthandler_disabled() || !mm)) {
++		bad_area_nosemaphore(regs, error_code, address, NULL);
++		return;
++	}
++
++	/*
++	 * It's safe to allow irq's after cr2 has been saved and the
++	 * vmalloc fault has been handled.
++	 *
++	 * User-mode registers count as a user access even for any
++	 * potential system fault or CPU buglet:
++	 */
++	if (user_mode(regs)) {
++		local_irq_enable();
++		error_code |= X86_PF_USER;
++		flags |= FAULT_FLAG_USER;
++	} else {
++		if (regs->flags & X86_EFLAGS_IF)
++			local_irq_enable();
++	}
++
++	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
++
++	if (error_code & X86_PF_WRITE)
++		flags |= FAULT_FLAG_WRITE;
++	if (error_code & X86_PF_INSTR)
++		flags |= FAULT_FLAG_INSTRUCTION;
++
++	/*
++	 * When running in the kernel we expect faults to occur only to
++	 * addresses in user space.  All other faults represent errors in
++	 * the kernel and should generate an OOPS.  Unfortunately, in the
++	 * case of an erroneous fault occurring in a code path which already
++	 * holds mmap_sem we will deadlock attempting to validate the fault
++	 * against the address space.  Luckily the kernel only validly
++	 * references user space from well defined areas of code, which are
++	 * listed in the exceptions table.
++	 *
++	 * As the vast majority of faults will be valid we will only perform
++	 * the source reference check when there is a possibility of a
++	 * deadlock. Attempt to lock the address space, if we cannot we then
++	 * validate the source. If this is invalid we can skip the address
++	 * space check, thus avoiding the deadlock:
++	 */
++	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
++		if (!(error_code & X86_PF_USER) &&
++		    !search_exception_tables(regs->ip)) {
++			bad_area_nosemaphore(regs, error_code, address, NULL);
++			return;
++		}
++retry:
++		down_read(&mm->mmap_sem);
++	} else {
++		/*
++		 * The above down_read_trylock() might have succeeded in
++		 * which case we'll have missed the might_sleep() from
++		 * down_read():
++		 */
++		might_sleep();
++	}
++
++	vma = find_vma(mm, address);
++	if (unlikely(!vma)) {
++		bad_area(regs, error_code, address);
++		return;
++	}
++	if (likely(vma->vm_start <= address))
++		goto good_area;
++	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
++		bad_area(regs, error_code, address);
++		return;
++	}
++	if (error_code & X86_PF_USER) {
++		/*
++		 * Accessing the stack below %sp is always a bug.
++		 * The large cushion allows instructions like enter
++		 * and pusha to work. ("enter $65535, $31" pushes
++		 * 32 pointers and then decrements %sp by 65535.)
++		 */
++		if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
++			bad_area(regs, error_code, address);
++			return;
++		}
++	}
++	if (unlikely(expand_stack(vma, address))) {
++		bad_area(regs, error_code, address);
++		return;
++	}
++
++	/*
++	 * Ok, we have a good vm_area for this memory access, so
++	 * we can handle it..
++	 */
++good_area:
++	if (unlikely(access_error(error_code, vma))) {
++		bad_area_access_error(regs, error_code, address, vma);
++		return;
++	}
++
++	/*
++	 * If for any reason at all we couldn't handle the fault,
++	 * make sure we exit gracefully rather than endlessly redo
++	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
++	 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
++	 *
++	 * Note that handle_userfault() may also release and reacquire mmap_sem
++	 * (and not return with VM_FAULT_RETRY), when returning to userland to
++	 * repeat the page fault later with a VM_FAULT_NOPAGE retval
++	 * (potentially after handling any pending signal during the return to
++	 * userland). The return to userland is identified whenever
++	 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
++	 * Thus we have to be careful about not touching vma after handling the
++	 * fault, so we read the pkey beforehand.
++	 */
++	pkey = vma_pkey(vma);
++	fault = handle_mm_fault(vma, address, flags);
++	major |= fault & VM_FAULT_MAJOR;
++
++	/*
++	 * If we need to retry the mmap_sem has already been released,
++	 * and if there is a fatal signal pending there is no guarantee
++	 * that we made any progress. Handle this case first.
++	 */
++	if (unlikely(fault & VM_FAULT_RETRY)) {
++		/* Retry at most once */
++		if (flags & FAULT_FLAG_ALLOW_RETRY) {
++			flags &= ~FAULT_FLAG_ALLOW_RETRY;
++			flags |= FAULT_FLAG_TRIED;
++			if (!fatal_signal_pending(tsk))
++				goto retry;
++		}
++
++		/* User mode? Just return to handle the fatal exception */
++		if (flags & FAULT_FLAG_USER)
++			return;
++
++		/* Not returning to user mode? Handle exceptions or die: */
++		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
++		return;
++	}
++
++	up_read(&mm->mmap_sem);
++	if (unlikely(fault & VM_FAULT_ERROR)) {
++		mm_fault_error(regs, error_code, address, &pkey, fault);
++		return;
++	}
++
++	/*
++	 * Major/minor page fault accounting. If any of the events
++	 * returned VM_FAULT_MAJOR, we account it as a major fault.
++	 */
++	if (major) {
++		tsk->maj_flt++;
++		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
++	} else {
++		tsk->min_flt++;
++		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
++	}
++
++	check_v8086_mode(regs, address, tsk);
++}
++NOKPROBE_SYMBOL(__do_page_fault);
++
++static nokprobe_inline void
++trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
++			 unsigned long error_code)
++{
++	if (user_mode(regs))
++		trace_page_fault_user(address, regs, error_code);
++	else
++		trace_page_fault_kernel(address, regs, error_code);
++}
++
++/*
++ * We must have this function blacklisted from kprobes, tagged with notrace
++ * and call read_cr2() before calling anything else. To avoid calling any
++ * kind of tracing machinery before we've observed the CR2 value.
++ *
++ * exception_{enter,exit}() contains all sorts of tracepoints.
++ */
++dotraplinkage void notrace
++do_page_fault(struct pt_regs *regs, unsigned long error_code)
++{
++	unsigned long address = read_cr2(); /* Get the faulting address */
++	enum ctx_state prev_state;
++
++	prev_state = exception_enter();
++	if (trace_pagefault_enabled())
++		trace_page_fault_entries(address, regs, error_code);
++
++	__do_page_fault(regs, error_code, address);
++	exception_exit(prev_state);
++}
++NOKPROBE_SYMBOL(do_page_fault);
+diff -uprN kernel/arch/x86/mm/tlb.c kernel_new/arch/x86/mm/tlb.c
+--- kernel/arch/x86/mm/tlb.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/arch/x86/mm/tlb.c	2021-04-01 18:28:07.660863282 +0800
+@@ -153,9 +153,9 @@ void switch_mm(struct mm_struct *prev, s
+ {
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	switch_mm_irqs_off(prev, next, tsk);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static void sync_current_stack_to_mm(struct mm_struct *mm)
+@@ -274,7 +274,7 @@ void switch_mm_irqs_off(struct mm_struct
+ {
+ 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+ 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+-	unsigned cpu = smp_processor_id();
++	unsigned cpu = raw_smp_processor_id();
+ 	u64 next_tlb_gen;
+ 
+ 	/*
+@@ -286,8 +286,11 @@ void switch_mm_irqs_off(struct mm_struct
+ 	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+ 	 */
+ 
++	WARN_ON_ONCE(IS_ENABLED(CONFIG_IPIPE_DEBUG_INTERNAL) &&
++		     !hard_irqs_disabled());
++
+ 	/* We don't want flush_tlb_func_* to run concurrently with us. */
+-	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
++	if (!IS_ENABLED(CONFIG_IPIPE) && IS_ENABLED(CONFIG_PROVE_LOCKING))
+ 		WARN_ON_ONCE(!irqs_disabled());
+ 
+ 	/*
+@@ -413,7 +416,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
+ 	}
+ 
+-	load_mm_cr4(next);
++	load_mm_cr4_irqsoff(next);
+ 	switch_ldt(real_prev, next);
+ }
+ 
+@@ -519,6 +522,7 @@ static void flush_tlb_func_common(const
+ 	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ 	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+ 	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
++	unsigned long flags;
+ 
+ 	/* This code cannot presently handle being reentered. */
+ 	VM_WARN_ON(!irqs_disabled());
+@@ -536,7 +540,9 @@ static void flush_tlb_func_common(const
+ 		 * garbage into our TLB.  Since switching to init_mm is barely
+ 		 * slower than a minimal flush, just switch to init_mm.
+ 		 */
++		flags = hard_cond_local_irq_save();
+ 		switch_mm_irqs_off(NULL, &init_mm, NULL);
++		hard_cond_local_irq_restore(flags);
+ 		return;
+ 	}
+ 
+diff -uprN kernel/Documentation/ipipe.rst kernel_new/Documentation/ipipe.rst
+--- kernel/Documentation/ipipe.rst	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/Documentation/ipipe.rst	2021-04-01 18:28:07.660863282 +0800
+@@ -0,0 +1,924 @@
++.. include:: <isonum.txt>
++
++===================================
++The Interrupt Pipeline (aka I-pipe)
++===================================
++
++:Copyright: |copy| 2018: Philippe Gerum
++
++Purpose
++=======
++
++Using Linux as a host for lightweight software cores specialized in
++delivering very short and bounded response times has been a popular
++way of supporting real-time applications in the embedded space over
++the years.
++
++This design - known as the *dual kernel* approach - introduces a small
++real-time infrastructure which schedules time-critical activities
++independently from the main kernel. Application threads co-managed by
++this infrastructure still benefit from the ancillary kernel services
++such as virtual memory management, and can also leverage the rich GPOS
++feature set Linux provides such as networking, data storage or GUIs.
++
++Although the real-time infrastructure has to present specific driver
++stack and API implementations to applications, there are nonetheless
++significant upsides to keeping the real-time core separate from the
++GPOS infrastructure:
++
++- because the two kernels are independent, real-time activities are
++  not serialized with GPOS operations internally, removing potential
++  delays which might be induced by the non time-critical
++  work. Likewise, there is no requirement for keeping the GPOS
++  operations fine-grained and highly preemptible at any time, which
++  would otherwise induce noticeable overhead on low-end hardware, due
++  to the requirement for pervasive task priority inheritance and IRQ
++  threading.
++
++- the functional isolation of the real-time infrastructure from the
++  rest of the kernel code restricts common bug hunting to the scope of
++  the smaller kernel, excluding most interactions with the very large
++  GPOS kernel base.
++
++- with a dedicated infrastructure providing a specific, well-defined
++  set of real-time services, applications can unambiguously figure out
++  which API calls are available for supporting time-critical work,
++  excluding all the rest as being potentially non-deterministic with
++  respect to response time.
++
++To support such a *dual kernel system*, we need the kernel to exhibit
++a high-priority execution context, for running out-of-band real-time
++duties concurrently to the regular operations.
++
++.. NOTE:: The I-pipe only introduces the basic mechanisms for hosting
++such a real-time core, enabling the common programming model for its
++applications in user-space. It does *not* implement the real-time core
++per se, which should be provided by a separate kernel component.
++
++The issue of interrupt response time
++====================================
++
++The real-time core has to act upon device interrupts with no delay,
++regardless of the regular kernel operations which may be ongoing when
++the interrupt is received by the CPU.
++
++However, to protect from deadlocks and maintain data integrity, Linux
++normally hard disables interrupts around any critical section of code
++which must not be preempted by interrupt handlers on the same CPU,
++enforcing a strictly serialized execution among those contexts.
++
++The unpredictable delay this may cause before external events can be
++handled is a major roadblock for kernel components requiring
++predictable and very short response times to external events, in the
++range of a few microseconds.
++
++Therefore, there is a basic requirement for prioritizing interrupt
++masking and delivery between the real-time core and GPOS operations,
++while maintaining consistent internal serialization for the kernel.
++
++To address this issue, the I-pipe implements a mechanism called
++*interrupt pipelining* turns all device IRQs into NMIs, only to run
++NMI-safe interrupt handlers from the perspective of the regular kernel
++activities.
++
++Two-stage IRQ pipeline
++======================
++
++.. _pipeline
++Interrupt pipelining is a lightweight approach based on the
++introduction of a separate, high-priority execution stage for running
++out-of-band interrupt handlers immediately upon IRQ receipt, which
++cannot be delayed by the in-band, regular kernel work even if the
++latter serializes the execution by - seemingly - disabling interrupts.
++
++IRQs which have no handlers in the high priority stage may be deferred
++on the receiving CPU until the out-of-band activity has quiesced on
++that CPU. Eventually, the preempted in-band code can resume normally,
++which may involve handling the deferred interrupts.
++
++In other words, interrupts are flowing down from the out-of-band to
++the in-band interrupt stages, which form a two-stage pipeline for
++prioritizing interrupt delivery.
++
++The runtime context of the out-of-band interrupt handlers is known as
++the *head stage* of the pipeline, as opposed to the in-band kernel
++activities sitting on the *root stage*::
++
++                    Out-of-band                 In-band
++                    IRQ handlers()            IRQ handlers()
++               __________   _______________________   ______
++                  .     /  /  .             .     /  /  .
++                  .    /  /   .             .    /  /   .
++                  .   /  /    .             .   /  /    .
++                  ___/  /______________________/  /     .
++     [IRQ] -----> _______________________________/      .
++                  .           .             .           .
++                  .   Head    .             .   Root    .
++                  .   Stage   .             .   Stage   .
++               _____________________________________________
++
++
++A software core may base its own activities on the head stage,
++interposing on specific IRQ events, for delivering real-time
++capabilities to a particular set of applications. Meanwhile, the
++regular kernel operations keep going over the root stage unaffected,
++only delayed by short preemption times for running the out-of-band
++work.
++
++.. NOTE:: Interrupt pipelining is a partial implementation of [#f2]_,
++          in which an interrupt *stage* is a limited form of an
++          operating system *domain*.
++
++Virtual interrupt flag
++----------------------
++
++.. _flag:
++As hinted earlier, predictable response time of out-of-band handlers
++to IRQ receipts requires the in-band kernel work not to be allowed to
++delay them by masking interrupts in the CPU.
++
++However, critical sections delimited this way by the in-band code must
++still be enforced for the *root stage*, so that system integrity is
++not at risk. This means that although out-of-band IRQ handlers may run
++at any time while the *head stage* is accepting interrupts, in-band
++IRQ handlers should be allowed to run only when the root stage is
++accepting interrupts too.
++
++So we need to decouple the interrupt masking and delivery logic which
++applies to the head stage from the one in effect on the root stage, by
++implementing a dual interrupt control mechanism.
++
++To this end, a software logic managing a virtual interrupt flag (aka
++*IPIPE_STALL_FLAG*) is introduced by the interrupt pipeline between
++the hardware and the generic IRQ management layer. This logic can mask
++IRQs from the perspective of the regular kernel work when
++:c:func:`local_irq_save`, :c:func:`local_irq_disable` or any
++lock-controlled masking operations like :c:func:`spin_lock_irqsave` is
++called, while still accepting IRQs from the CPU for immediate delivery
++to out-of-band handlers.
++
++The head stage protects from interrupts by disabling them in the CPU's
++status register, while the root stage disables interrupts only
++virtually. A stage for which interrupts are disabled is said to be
++*stalled*. Conversely, *unstalling* a stage means re-enabling
++interrupts for it.
++
++Obviously, stalling the head stage implicitly means disabling
++further IRQ receipts for the root stage too.
++
++Interrupt deferral for the *root stage*
++---------------------------------------
++
++.. _deferral:
++.. _deferred:
++When the root stage is stalled by setting the virtual interrupt flag,
++the occurrence of any incoming IRQ which was not delivered to the
++*head stage* is recorded into a per-CPU log, postponing its actual
++delivery to the root stage.
++
++The delivery of the interrupt event to the corresponding in-band IRQ
++handler is deferred until the in-band kernel code clears the virtual
++interrupt flag by calling :c:func:`local_irq_enable` or any of its
++variants, which unstalls the root stage. When this happens, the
++interrupt state is resynchronized by playing the log, firing the
++in-band handlers for which an IRQ was set pending.
++
++::
++   /* Both stages unstalled on entry */
++   local_irq_save(flags);
++   <IRQx received: no out-of-band handler>
++       (pipeline logs IRQx event)
++   ...
++   local_irq_restore(flags);
++       (pipeline plays IRQx event)
++            handle_IRQx_interrupt();
++
++If the root stage is unstalled at the time of the IRQ receipt, the
++in-band handler is immediately invoked, just like with the
++non-pipelined IRQ model.
++
++.. NOTE:: The principle of deferring interrupt delivery based on a
++          software flag coupled to an event log has been originally
++          described as "Optimistic interrupt protection" in [#f1]_.
++
++Device interrupts virtually turned into NMIs
++--------------------------------------------
++
++From the standpoint of the in-band kernel code (i.e. the one running
++over the *root* interrupt stage) , the interrupt pipelining logic
++virtually turns all device IRQs into NMIs, for running out-of-band
++handlers.
++
++.. _re-entry:
++For this reason, out-of-band code may generally **NOT** re-enter
++in-band code, for preventing creepy situations like this one::
++
++   /* in-band context */
++   spin_lock_irqsave(&lock, flags);
++      <IRQx received: out-of-band handler installed>
++         handle_oob_event();
++            /* attempted re-entry to in-band from out-of-band. */
++            in_band_routine();
++               spin_lock_irqsave(&lock, flags);
++               <DEADLOCK>
++               ...
++            ...
++         ...
++   ...
++   spin_unlock irqrestore(&lock, flags);
++
++Even in absence of any attempt to get a spinlock recursively, the
++outer in-band code in the example above is entitled to assume that no
++access race can occur on the current CPU while interrupts are
++masked. Re-entering in-band code from an out-of-band handler would
++invalidate this assumption.
++
++In rare cases, we may need to fix up the in-band kernel routines in
++order to allow out-of-band handlers to call them. Typically, atomic_
++helpers are such routines, which serialize in-band and out-of-band
++callers.
++
++Virtual/Synthetic interrupt vectors
++-----------------------------------
++
++.. _synthetic:
++.. _virtual:
++The pipeline introduces an additional type of interrupts, which are
++purely software-originated, with no hardware involvement. These IRQs
++can be triggered by any kernel code. So-called virtual IRQs are
++inherently per-CPU events.
++
++Because the common pipeline flow_ applies to virtual interrupts, it
++is possible to attach them to out-of-band and/or in-band handlers,
++just like device interrupts.
++
++.. NOTE:: virtual interrupts and regular softirqs differ in essence:
++          the latter only exist in the in-band context, and therefore
++          cannot trigger out-of-band activities.
++
++Virtual interrupt vectors are allocated by a call to
++:c:func:`ipipe_alloc_virq`, and conversely released with
++:c:func:`ipipe_free_virq`.
++
++For instance, a virtual interrupt can be used for triggering an
++in-band activity on the root stage from the head stage as follows::
++
++  #include <linux/ipipe.h>
++
++  static void virq_handler(unsigned int virq, void *cookie)
++  {
++        do_in_band_work();
++  }
++
++  void install_virq(void)
++  {
++     unsigned int virq;
++     ...
++     virq = ipipe_alloc_virq();
++     ...
++     ipipe_request_irq(ipipe_root_domain, virq, virq_handler,
++		       handler_arg, NULL);
++  }
++
++An out-of-band handler can schedule the execution of
++:c:func:`virq_handler` like this::
++
++  ipipe_post_irq_root(virq);
++
++Conversely, a virtual interrupt can be handled from the out-of-band
++context::
++
++  static void virq_oob_handler(unsigned int virq, void *cookie)
++  {
++        do_oob_work();
++  }
++
++  void install_virq(void)
++  {
++     unsigned int virq;
++     ...
++     virq = ipipe_alloc_virq();
++     ...
++     ipipe_request_irq(ipipe_head_domain, virq, virq_oob_handler,
++		       handler_arg, NULL);
++  }
++
++Any in-band code can trigger the immediate execution of
++:c:func:`virq_oob_handler` on the head stage as follows::
++
++  ipipe_post_irq_head(virq);
++
++Pipelined interrupt flow
++------------------------
++
++.. _flow:
++When interrupt pipelining is enabled, IRQs are first delivered to the
++pipeline entry point via a call to the generic
++:c:func:`__ipipe_dispatch_irq` routine. Before this happens, the event
++has been propagated through the arch-specific code for handling an IRQ::
++
++    asm_irq_entry
++       -> irqchip_handle_irq()
++          -> ipipe_handle_domain_irq()
++             -> __ipipe_grab_irq()
++                -> __ipipe_dispatch_irq()
++                -> irq_flow_handler()
++                <IRQ delivery logic>
++
++Contrary to the non-pipelined model, the generic IRQ flow handler does
++*not* call the in-band interrupt handler immediately, but only runs
++the irqchip-specific handler for acknowledging the incoming IRQ event
++in the hardware.
++
++.. _Holding interrupt lines:
++If the interrupt is either of the *level-triggered*, *fasteoi* or
++*percpu* type, the irqchip is given a chance to hold the interrupt
++line, typically by masking it, until either of the out-of-band or
++in-band handler have run. This addresses the following scenario, which
++happens for a similar reason while an IRQ thread waits for being
++scheduled in, requiring the same kind of provision::
++
++    /* root stage stalled on entry */
++    asm_irq_entry
++       ...
++          -> __ipipe_dispatch_irq()
++             ...
++                <IRQ logged, delivery deferred>
++    asm_irq_exit
++    /*
++     * CPU allowed to accept interrupts again with IRQ cause not
++     * acknowledged in device yet => **IRQ storm**.
++     */
++    asm_irq_entry
++       ...
++    asm_irq_exit
++    asm_irq_entry
++       ...
++    asm_irq_exit
++
++IRQ delivery logic
++------------------
++
++If an out-of-band handler exists for the interrupt received,
++:c:func:`__ipipe_dispatch_irq` invokes it immediately, after switching
++the execution context to the head stage if not current yet.
++
++Otherwise, if the execution context is currently over the root stage
++and unstalled, the pipeline core delivers it immediately to the
++in-band handler.
++
++In all other cases, the interrupt is only set pending into the per-CPU
++log, then the interrupt frame is left.
++
++Alternate scheduling
++====================
++
++The I-pipe promotes the idea that a *dual kernel* system should keep
++the functional overlap between the kernel and the real-time core
++minimal. To this end, a real-time thread should be merely seen as a
++regular task with additional scheduling capabilities guaranteeing very
++low response times.
++
++To support such idea, the I-pipe enables kthreads and regular user
++tasks to run alternatively in the out-of-band execution context
++introduced by the interrupt pipeline_ (aka *head* stage), or the
++common in-band kernel context for GPOS operations (aka *root* stage).
++
++As a result, real-time core applications in user-space benefit from
++the common Linux programming model - including virtual memory
++protection -, and still have access to the regular Linux services for
++carrying out non time-critical work.
++
++Task migration to the head stage
++--------------------------------
++
++Low latency response time to events can be achieved when Linux tasks
++wait for them from the out-of-band execution context. The real-time
++core is responsible for switching a task to such a context as part of
++its task management rules; the I-pipe facilitates this migration with
++dedicated services.
++
++The migration process of a task from the GPOS/in-band context to the
++high-priority, out-of-band context is as follows:
++
++1. :c:func:`__ipipe_migrate_head` is invoked from the migrating task
++   context, with the same prerequisites than for calling
++   :c:func:`schedule` (preemption enabled, interrupts on).
++
++.. _`in-band sleep operation`:
++2. the caller is put to interruptible sleep state (S).
++
++3. before resuming in-band operations, the next task picked by the
++   (regular kernel) scheduler on the same CPU for replacing the
++   migrating task fires :c:func:`ipipe_migration_hook` which the
++   real-time core should override (*__weak* binding). Before the call,
++   the head stage is stalled, interrupts are disabled in the CPU. The
++   root execution stage is still current though.
++
++4. the real-time core's implementation of
++   :c:func:`ipipe_migration_hook` is passed a pointer to the
++   task_struct descriptor of the migrating task. This routine is expected
++   to perform the necessary steps for taking control over the task on
++   behalf of the real-time core, re-scheduling its code appropriately
++   over the head stage. This typically involves resuming it from the
++   `out-of-band suspended state`_ applied during the converse migration
++   path.
++
++5. at some point later, when the migrated task is picked by the
++   real-time scheduler, it resumes execution on the head stage with
++   the register file previously saved by the kernel scheduler in
++   :c:func:`switch_to` at step 1.
++
++Task migration to the root stage
++--------------------------------
++
++Sometimes, a real-time thread may want to leave the out-of-band
++context, continuing execution from the in-band context instead, so as
++to:
++
++- run non time-critical (in-band) work involving regular system calls
++  handled by the kernel,
++
++- recover from CPU exceptions, such as handling major memory access
++  faults, for which there is no point in caring for response time, and
++  therefore makes no sense to duplicate in the real-time core anyway.
++
++.. NOTE: The discussion about exception_ handling covers the last
++   point in details.
++
++The migration process of a task from the high-priority, out-of-band
++context to the GPOS/in-band context is as follows::
++
++1. the real-time core schedules an in-band handler for execution which
++   should call :c:func:`wake_up_process` to unblock the migrating task
++   from the standpoint of the kernel scheduler. This is the
++   counterpart of the :ref:`in-band sleep operation <in-band sleep
++   operation>` from the converse migration path. A virtual_ IRQ can be
++   used for scheduling such event from the out-of-band context.
++
++.. _`out-of-band suspended state`:
++2. the real-time core suspends execution of the current task from its
++   own standpoint. The real-time scheduler is assumed to be using the
++   common :c:func:`switch_to` routine for switching task contexts.
++
++3. at some point later, the out-of-band context is exited by the
++   current CPU when no more high-priority work is left, causing the
++   preempted in-band kernel code to resume execution on the root
++   stage. The handler scheduled at step 1 eventually runs, waking up
++   the migrating task from the standpoint of the kernel.
++
++4. the migrating task resumes from the tail scheduling code of the
++   real-time scheduler, where it suspended in step 2. Noticing the
++   migration, the real-time core eventually calls
++   :c:func:`__ipipe_reenter_root` for finalizing the transition of the
++   incoming task to the root stage.
++
++Binding to the real-time core
++-----------------------------
++
++.. _binding:
++The I-pipe facilitates fine-grained per-thread management from the
++real-time core, as opposed to per-process. For this reason, the
++real-time core should at least implement a mechanism for turning a
++regular task into a real-time thread with extended capabilities,
++binding it to the core.
++
++The real-time core should inform the kernel about its intent to
++receive notifications about that task, by calling
++:c:func::`ipipe_enable_notifier` when such task is current.
++
++For this reason, the binding operation is usually carried out by a
++dedicated system call exposed by the real-time core, which a regular
++task would invoke.
++
++.. NOTE:: Whether there should be distinct procedures for binding
++	  processes *and* threads to the real-time core, or only a
++	  thread binding procedure is up to the real-time core
++	  implementation.
++
++Notifications
++-------------
++
++Exception handling
++~~~~~~~~~~~~~~~~~~
++
++.. _exception
++If a processor exception is raised while the CPU is busy running a
++real-time thread in the out-of-band context (e.g. due to some invalid
++memory access, bad instruction, FPU or alignment error etc), the task
++may have to leave such context immediately if the fault handler is not
++protected against out-of-band interrupts, and therefore cannot be
++properly serialized with out-of-band code.
++
++The I-pipe notifies the real-time core about incoming exceptions early
++from the low-level fault handlers, but only when some out-of-band code
++was running when the exception was taken. The real-time core may then
++take action, such as reconciling the current task's execution context
++with the kernel's expectations before the task may traverse the
++regular fault handling code.
++
++.. HINT:: Enabling debuggers to trace real-time thread involves
++          dealing with debug traps the former may poke into the
++          debuggee's code for breakpointing duties.
++
++The notification is issued by a call to :c:func:`__ipipe_notify_trap`
++which in turn invokes the :c:func:`ipipe_trap_hook` routine the
++real-time core should override for receiving those events (*__weak*
++binding). Interrupts are **disabled** in the CPU when
++:c:func:`ipipe_trap_hook` is called.::
++
++     /* out-of-band code running */
++     *bad_pointer = 42;
++        [ACCESS EXCEPTION]
++	   /* low-level fault handler in arch/<arch>/mm */
++           -> do_page_fault()
++	      -> __ipipe_notify_trap(...)
++	         /* real-time core */
++	         -> ipipe_trap_hook(...)
++		    -> forced task migration to root stage
++	   ...
++           -> handle_mm_fault()
++
++.. NOTE:: handling minor memory access faults only requiring quick PTE
++          fixups should not involve switching the current task to the
++          in-band context though. Instead, the fixup code should be
++          made atomic_ for serializing accesses from any context.
++
++System calls
++~~~~~~~~~~~~
++
++A real-time core interfaced with the kernel via the I-pipe may
++introduce its own set of system calls. From the standpoint of the
++kernel, this is a foreign set of calls, which can be distinguished
++unambiguously from regular ones based on an arch-specific marker.
++
++.. HINT:: Syscall numbers from this set might have a different base,
++	  and/or some high-order bit set which regular syscall numbers
++	  would not have.
++
++If a task bound to the real-time core issues any system call,
++regardless of which of the kernel or real-time core should handle it,
++the latter must be given the opportunity to:
++
++- perform the service directly, possibly switching the caller to
++  out-of-band context first would the request require it.
++
++- pass the request downward to the normal system call path on the root
++  stage, possibly switching the caller to in-band context if needed.
++
++If a regular task (i.e. *not* known from the real-time core [yet])
++issues any foreign system call, the real-time core is given a chance
++to handle it. This way, a foreign system call which would initially
++bind a regular task to the real-time core would be delivered to the
++real-time core as expected (see binding_).
++
++The I-pipe intercepts system calls early in the kernel entry code,
++delivering them to the proper handler according to the following
++logic::
++
++     is_foreign(syscall_nr)?
++	    Y: is_bound(task)
++	           Y: -> ipipe_fastcall_hook()
++		   N: -> ipipe_syscall_hook()
++            N: is_bound(task)
++	           Y: -> ipipe_syscall_hook()
++		   N: -> normal syscall handling
++
++:c:func:`ipipe_fastcall_hook` is the fast path for handling foreign
++system calls from tasks already running in out-of-band context.
++
++:c:func:`ipipe_syscall_hook` is a slower path for handling requests
++which might require the caller to switch to the out-of-band context
++first before proceeding.
++
++Kernel events
++~~~~~~~~~~~~~
++
++The last set of notifications involves pure kernel events which the
++real-time core may need to know about, as they may affect its own task
++management. Except for IPIPE_KEVT_CLEANUP which is called for *any*
++exiting user-space task, all other notifications are only issued for
++tasks bound to the real-time core (which may involve kthreads).
++
++The notification is issued by a call to :c:func:`__ipipe_notify_kevent`
++which in turn invokes the :c:func:`ipipe_kevent_hook` routine the
++real-time core should override for receiving those events (*__weak*
++binding). Interrupts are **enabled** in the CPU when
++:c:func:`ipipe_kevent_hook` is called.
++
++The notification hook is given the event type code, and a single
++pointer argument which relates to the event type.
++
++The following events are defined (include/linux/ipipe_domain.h):
++
++- IPIPE_KEVT_SCHEDULE(struct task_struct *next)
++
++  sent in preparation of a context switch, right before the memory
++  context is switched to *next*.
++
++- IPIPE_KEVT_SIGWAKE(struct task_struct *target)
++
++  sent when *target* is about to receive a signal. The real-time core
++  may decide to schedule a transition of the recipient to the root
++  stage in order to have it handle that signal asap, which is commonly
++  required for keeping the kernel sane. This notification is always
++  sent from the context of the issuer.
++
++- IPIPE_KEVT_SETAFFINITY(struct ipipe_migration_data *p)
++
++  sent when p->task is about to move to CPU p->dest_cpu.
++
++- IPIPE_KEVT_EXIT(struct task_struct *current)
++
++  sent from :c:func:`do_exit` before the current task has dropped the
++  files and mappings it owns.
++
++- IPIPE_KEVT_CLEANUP(struct mm_struct *mm)
++
++  sent before *mm* is entirely dropped, before the mappings are
++  exited. Per-process resources which might be maintained by the
++  real-time core could be released there, as all threads have exited.
++
++  ..NOTE:: IPIPE_KEVT_SETSCHED is deprecated, and should not be used.
++
++Prerequisites
++=============
++
++The interrupt pipeline requires the following features to be available
++from the target kernel:
++
++- Generic IRQ handling
++- Clock event abstraction
++
++Implementation
++==============
++
++The following kernel areas are involved in interrupt pipelining:
++
++- Generic IRQ core
++
++  * IRQ flow handlers
++
++    Generic flow handlers acknowledge the incoming IRQ event in the
++    hardware by calling the appropriate irqchip-specific
++    handler. However, the generic flow_ handlers do not immediately
++    invoke the in-band interrupt handlers, but leave this decision to
++    the pipeline core which calls them, according to the pipelined
++    delivery logic.
++
++- Arch-specific bits
++
++  * CPU interrupt mask handling
++
++    The architecture-specific code which manipulates the interrupt
++    flag in the CPU's state register
++    (i.e. arch/<arch>/include/asm/irqflags.h) is split between real
++    and virtual interrupt control:
++
++    + the *hard_local_irq* level helpers affect the hardware state in
++      the CPU.
++
++    + the *arch_* level helpers affect the virtual interrupt flag_
++      implemented by the pipeline core for controlling the root stage
++      protection against interrupts.
++
++    This means that generic helpers from <linux/irqflags.h> such as
++    :c:func:`local_irq_disable` and :c:func:`local_irq_enable`
++    actually refer to the virtual protection scheme when interrupts
++    are pipelined, implementing interrupt deferral_ for the protected
++    in-band code running over the root stage.
++
++  * Assembly-level IRQ, exception paths
++
++    Since interrupts are only virtually masked by the in-band code,
++    IRQs can still be taken by the CPU although they should not be
++    visible from the root stage when they happen in the following
++    situations:
++
++    + when the virtual protection flag_ is raised, meaning the root
++      stage does not accept IRQs, in which case interrupt _deferral
++      happens.
++
++    + when the CPU runs out-of-band code, regardless of the state of
++      the virtual protection flag.
++
++    In both cases, the low-level assembly code handling incoming IRQs
++    takes a fast exit path unwinding the interrupt frame early,
++    instead of running the common in-band epilogue which checks for
++    task rescheduling opportunities and pending signals.
++
++    Likewise, the low-level fault/exception handling code also takes a
++    fast exit path under the same circumstances. Typically, an
++    out-of-band handler causing a minor page fault should benefit from
++    a lightweight PTE fixup performed by the high-level fault handler,
++    but is not allowed to traverse the rescheduling logic upon return
++    from exception.
++
++- Scheduler core
++
++  * CPUIDLE support
++
++    The logic of the CPUIDLE framework has to account for those
++    specific issues the interrupt pipelining introduces:
++
++    - the kernel might be idle in the sense that no in-band activity
++    is scheduled yet, and planning to shut down the timer device
++    suffering the C3STOP (mis)feature.  However, at the same time,
++    some out-of-band code might wait for a tick event already
++    programmed in the timer hardware controlled by some out-of-band
++    code via the timer_ interposition mechanism.
++
++    - switching the CPU to a power saving state may incur a
++    significant latency, particularly for waking it up before it can
++    handle an incoming IRQ, which is at odds with the purpose of
++    interrupt pipelining.
++
++    Obviously, we don't want the CPUIDLE logic to turn off the
++    hardware timer when C3STOP is in effect for the timer device,
++    which would cause the pending out-of-band event to be
++    lost.
++
++    Likewise, the wake up latency induced by entering a sleep state on
++    a particular hardware may not always be acceptable.
++
++    Since the in-band kernel code does not know about the out-of-band
++    code plans by design, CPUIDLE calls :c:func:`ipipe_cpuidle_control`
++    to figure out whether the out-of-band system is fine with entering
++    the idle state as well.  This routine should be overriden by the
++    out-of-band code for receiving such notification (*__weak*
++    binding).
++
++    If this hook returns a boolean *true* value, CPUIDLE proceeds as
++    normally. Otherwise, the CPU is simply denied from entering the
++    idle state, leaving the timer hardware enabled.
++
++    ..CAUTION:: If some out-of-band code waiting for an external event
++    cannot bear with the latency that might be induced by the default
++    architecture-specific CPU idling code, then CPUIDLE is not usable
++    and should be disabled at build time.
++
++  * Kernel preemption control (PREEMPT)
++
++    :c:func:`__preempt_schedule_irq` reconciles the virtual interrupt
++    state - which has not been touched by the assembly level code upon
++    kernel entry - with basic assumptions made by the scheduler core,
++    such as entering with interrupts disabled. It should be called by
++    the arch-specific assembly code in replacement of
++    :c:func:`preempt_schedule_irq`, from the call site dealing with
++    kernel preemption upon return from IRQ or system call.
++
++- Timer management
++
++  * Timer interposition
++
++.. _timer:
++    The timer interposition mechanism is designed for handing over
++    control of the hardware tick device in use by the kernel to an
++    out-of-band timing logic. Typically, a real-time co-kernel would
++    make good use of this feature, for grabbing control over the timer
++    hardware.
++
++    Once some out-of-band logic has grabbed control over the timer
++    device by calling :c:func:`ipipe_select_timers`, it can install
++    its own out-of-band handlers using :c:func:`ipipe_timer_start`.
++    From that point, it must carry out the timing requests from the
++    in-band timer core (e.g. hrtimers) in addition to its own timing
++    duties.
++
++    In other words, once the interposition is set up, the
++    functionality of the tick device is shared between the in-band and
++    out-of-band contexts, with only the latter actually programming
++    the hardware.
++
++    This mechanism is based on the clock event abstraction (`struct
++    clock_event_device`). Clock event devices which may be controlled
++    by this way need their drivers to be specifically adapted for such
++    use:
++
++    + the interrupt handler receiving tick IRQs must be check with
++    :c:func:`clockevent_ipipe_stolen` whether they actually control
++    the hardware. A non-zero return from this routine means that it
++    does not, and therefore should skip the timer acknowledge
++    code, which would have run earlier in that case.
++
++- Generic locking & atomic
++
++  * Generic atomic ops
++
++.. _atomic:
++    The effect of virtualizing interrupt protection must be reversed
++    for atomic helpers in <asm-generic/{atomic|bitops/atomic}.h> and
++    <asm-generic/cmpxchg-local.h>, so that no interrupt can preempt
++    their execution, regardless of the stage their caller live
++    on.
++
++    This is required to keep those helpers usable on data which
++    might be accessed concurrently from both stages.
++
++    The usual way to revert such virtualization consists of delimiting
++    the protected section with :c:func:`hard_local_irq_save`,
++    :c:func:`hard_local_irq_restore` calls, in replacement for
++    :c:func:`local_irq_save`, :c:func:`local_irq_restore`
++    respectively.
++
++  * Hard spinlocks
++
++    The pipeline core introduces one more spinlock type:
++
++    + *hard* spinlocks manipulate the CPU interrupt mask, and don't
++      affect the kernel preemption state in locking/unlocking
++      operations.
++
++      This type of spinlock is useful for implementing a critical
++      section to serialize concurrent accesses from both in-band and
++      out-of-band contexts, i.e. from root and head stages. Obviously,
++      sleeping into a critical section protected by a hard spinlock
++      would be a very bad idea.
++
++      In other words, hard spinlocks are not subject to virtual
++      interrupt masking, therefore can be used to serialize with
++      out-of-band activities, including from the in-band kernel
++      code. At any rate, those sections ought to be quite short, for
++      keeping latency low.
++
++- Drivers
++
++  * IRQ chip drivers
++
++    .. _irqchip:
++    irqchip drivers need to be specifically adapted for supporting the
++    pipelined interrupt model. The irqchip descriptor gains additional
++    handlers:
++
++    + irq_chip.irq_hold is an optional handler called by the pipeline
++      core upon events from *level-triggered*, *fasteoi* and *percpu*
++      types. See Holding_ interrupt lines.
++
++      When specified in the descriptor, irq_chip.irq_hold should
++      perform as follows, depending on the hardware acknowledge logic:
++
++          + level   ->  mask[+ack]
++          + percpu  ->  mask[+ack][+eoi]
++          + fasteoi ->  mask+eoi
++
++      .. CAUTION:: proper acknowledge and/or EOI is important when
++                   holding a line, as those operations may also
++                   decrease the current interrupt priority level for
++                   the CPU, allowing same or lower priority
++                   out-of-band interrupts to be taken while the
++                   initial IRQ might be deferred_ for the root stage.
++
++    + irq_chip.irq_release is the converse operation to
++      irq_chip.irq_hold, releasing an interrupt line from the held
++      state.
++
++      The :c:func:`ipipe_end_irq` routine invokes the available
++      handler for releasing the interrupt line. The pipeline core
++      calls :c:func:`irq_release` automatically for each IRQ which has
++      been accepted by an in-band handler (`IRQ_HANDLED` status). This
++      routine should be called explicitly by out-of-band handlers
++      before returning to their caller.
++
++    `IRQCHIP_PIPELINE_SAFE` must be added to `struct irqchip::flags`
++    member of a pipeline-aware irqchip driver.
++
++    .. NOTE:: :c:func:`irq_set_chip` will complain loudly with a
++              kernel warning whenever the irqchip descriptor passed
++              does not bear the `IRQCHIP_PIPELINE_SAFE` flag and
++              CONFIG_IPIPE is enabled.
++
++- Misc
++
++  * :c:func:`printk`
++
++    :c:func:`printk` may be called by out-of-band code safely, without
++    encurring extra latency. The output is delayed until the in-band
++    code resumes, and the console driver(s) can handle it.
++
++  * Tracing core
++
++    Tracepoints can be traversed by out-of-band code safely. Dynamic
++    tracing is available to a kernel running the pipelined interrupt
++    model too.
++
++Terminology
++===========
++
++.. _terminology:
++======================   =======================================================
++    Term                                       Definition
++======================   =======================================================
++Head stage               high-priority execution context trigged by out-of-band IRQs
++Root stage               regular kernel context performing GPOS work
++Out-of-band code         code running over the head stage
++In-band code             code running over the root stage
++Scheduler                the regular, Linux kernel scheduler
++Real-time scheduler      the out-of-band task scheduling logic implemented on top of the I-pipe
++
++Resources
++=========
++
++.. [#f1] Stodolsky, Chen & Bershad; "Fast Interrupt Priority Management in Operating System Kernels"
++    https://www.usenix.org/legacy/publications/library/proceedings/micro93/full_papers/stodolsky.txt
++.. [#f2] Yaghmour, Karim; "ADEOS - Adaptive Domain Environment for Operating Systems"
++    https://www.opersys.com/ftp/pub/Adeos/adeos.pdf
+diff -uprN kernel/drivers/base/core.c kernel_new/drivers/base/core.c
+--- kernel/drivers/base/core.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/base/core.c	2021-04-01 18:28:07.660863282 +0800
+@@ -3267,6 +3267,17 @@ EXPORT_SYMBOL(dev_printk_emit);
+ static void __dev_printk(const char *level, const struct device *dev,
+ 			struct va_format *vaf)
+ {
++#ifdef CONFIG_IPIPE
++	/*
++	 * Console logging only if hard locked, or over the head
++	 * stage.
++	 */
++	if (hard_irqs_disabled() || !ipipe_root_p) {
++		__ipipe_log_printk(vaf->fmt, *vaf->va);
++		return;
++	}
++#endif
++
+ 	if (dev)
+ 		dev_printk_emit(level[1] - '0', dev, "%s %s: %pV",
+ 				dev_driver_string(dev), dev_name(dev), vaf);
+diff -uprN kernel/drivers/base/core.c.orig kernel_new/drivers/base/core.c.orig
+--- kernel/drivers/base/core.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/base/core.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,3392 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * drivers/base/core.c - core driver model code (device registration, etc)
++ *
++ * Copyright (c) 2002-3 Patrick Mochel
++ * Copyright (c) 2002-3 Open Source Development Labs
++ * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de>
++ * Copyright (c) 2006 Novell, Inc.
++ */
++
++#include <linux/cpufreq.h>
++#include <linux/device.h>
++#include <linux/err.h>
++#include <linux/fwnode.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/string.h>
++#include <linux/kdev_t.h>
++#include <linux/notifier.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/genhd.h>
++#include <linux/mutex.h>
++#include <linux/pm_runtime.h>
++#include <linux/netdevice.h>
++#include <linux/sched/signal.h>
++#include <linux/sysfs.h>
++
++#include "base.h"
++#include "power/power.h"
++
++#ifdef CONFIG_SYSFS_DEPRECATED
++#ifdef CONFIG_SYSFS_DEPRECATED_V2
++long sysfs_deprecated = 1;
++#else
++long sysfs_deprecated = 0;
++#endif
++static int __init sysfs_deprecated_setup(char *arg)
++{
++	return kstrtol(arg, 10, &sysfs_deprecated);
++}
++early_param("sysfs.deprecated", sysfs_deprecated_setup);
++#endif
++
++/* Device links support. */
++
++#ifdef CONFIG_SRCU
++static DEFINE_MUTEX(device_links_lock);
++DEFINE_STATIC_SRCU(device_links_srcu);
++
++static inline void device_links_write_lock(void)
++{
++	mutex_lock(&device_links_lock);
++}
++
++static inline void device_links_write_unlock(void)
++{
++	mutex_unlock(&device_links_lock);
++}
++
++int device_links_read_lock(void)
++{
++	return srcu_read_lock(&device_links_srcu);
++}
++
++void device_links_read_unlock(int idx)
++{
++	srcu_read_unlock(&device_links_srcu, idx);
++}
++#else /* !CONFIG_SRCU */
++static DECLARE_RWSEM(device_links_lock);
++
++static inline void device_links_write_lock(void)
++{
++	down_write(&device_links_lock);
++}
++
++static inline void device_links_write_unlock(void)
++{
++	up_write(&device_links_lock);
++}
++
++int device_links_read_lock(void)
++{
++	down_read(&device_links_lock);
++	return 0;
++}
++
++void device_links_read_unlock(int not_used)
++{
++	up_read(&device_links_lock);
++}
++#endif /* !CONFIG_SRCU */
++
++/**
++ * device_is_dependent - Check if one device depends on another one
++ * @dev: Device to check dependencies for.
++ * @target: Device to check against.
++ *
++ * Check if @target depends on @dev or any device dependent on it (its child or
++ * its consumer etc).  Return 1 if that is the case or 0 otherwise.
++ */
++static int device_is_dependent(struct device *dev, void *target)
++{
++	struct device_link *link;
++	int ret;
++
++	if (dev == target)
++		return 1;
++
++	ret = device_for_each_child(dev, target, device_is_dependent);
++	if (ret)
++		return ret;
++
++	list_for_each_entry(link, &dev->links.consumers, s_node) {
++		if (link->consumer == target)
++			return 1;
++
++		ret = device_is_dependent(link->consumer, target);
++		if (ret)
++			break;
++	}
++	return ret;
++}
++
++static void device_link_init_status(struct device_link *link,
++				    struct device *consumer,
++				    struct device *supplier)
++{
++	switch (supplier->links.status) {
++	case DL_DEV_PROBING:
++		switch (consumer->links.status) {
++		case DL_DEV_PROBING:
++			/*
++			 * A consumer driver can create a link to a supplier
++			 * that has not completed its probing yet as long as it
++			 * knows that the supplier is already functional (for
++			 * example, it has just acquired some resources from the
++			 * supplier).
++			 */
++			link->status = DL_STATE_CONSUMER_PROBE;
++			break;
++		default:
++			link->status = DL_STATE_DORMANT;
++			break;
++		}
++		break;
++	case DL_DEV_DRIVER_BOUND:
++		switch (consumer->links.status) {
++		case DL_DEV_PROBING:
++			link->status = DL_STATE_CONSUMER_PROBE;
++			break;
++		case DL_DEV_DRIVER_BOUND:
++			link->status = DL_STATE_ACTIVE;
++			break;
++		default:
++			link->status = DL_STATE_AVAILABLE;
++			break;
++		}
++		break;
++	case DL_DEV_UNBINDING:
++		link->status = DL_STATE_SUPPLIER_UNBIND;
++		break;
++	default:
++		link->status = DL_STATE_DORMANT;
++		break;
++	}
++}
++
++static int device_reorder_to_tail(struct device *dev, void *not_used)
++{
++	struct device_link *link;
++
++	/*
++	 * Devices that have not been registered yet will be put to the ends
++	 * of the lists during the registration, so skip them here.
++	 */
++	if (device_is_registered(dev))
++		devices_kset_move_last(dev);
++
++	if (device_pm_initialized(dev))
++		device_pm_move_last(dev);
++
++	device_for_each_child(dev, NULL, device_reorder_to_tail);
++	list_for_each_entry(link, &dev->links.consumers, s_node)
++		device_reorder_to_tail(link->consumer, NULL);
++
++	return 0;
++}
++
++/**
++ * device_pm_move_to_tail - Move set of devices to the end of device lists
++ * @dev: Device to move
++ *
++ * This is a device_reorder_to_tail() wrapper taking the requisite locks.
++ *
++ * It moves the @dev along with all of its children and all of its consumers
++ * to the ends of the device_kset and dpm_list, recursively.
++ */
++void device_pm_move_to_tail(struct device *dev)
++{
++	int idx;
++
++	idx = device_links_read_lock();
++	device_pm_lock();
++	device_reorder_to_tail(dev, NULL);
++	device_pm_unlock();
++	device_links_read_unlock(idx);
++}
++
++#define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
++			       DL_FLAG_AUTOREMOVE_SUPPLIER | \
++			       DL_FLAG_AUTOPROBE_CONSUMER)
++
++#define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
++			    DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)
++
++/**
++ * device_link_add - Create a link between two devices.
++ * @consumer: Consumer end of the link.
++ * @supplier: Supplier end of the link.
++ * @flags: Link flags.
++ *
++ * The caller is responsible for the proper synchronization of the link creation
++ * with runtime PM.  First, setting the DL_FLAG_PM_RUNTIME flag will cause the
++ * runtime PM framework to take the link into account.  Second, if the
++ * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will
++ * be forced into the active metastate and reference-counted upon the creation
++ * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
++ * ignored.
++ *
++ * If DL_FLAG_STATELESS is set in @flags, the caller of this function is
++ * expected to release the link returned by it directly with the help of either
++ * device_link_del() or device_link_remove().
++ *
++ * If that flag is not set, however, the caller of this function is handing the
++ * management of the link over to the driver core entirely and its return value
++ * can only be used to check whether or not the link is present.  In that case,
++ * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link
++ * flags can be used to indicate to the driver core when the link can be safely
++ * deleted.  Namely, setting one of them in @flags indicates to the driver core
++ * that the link is not going to be used (by the given caller of this function)
++ * after unbinding the consumer or supplier driver, respectively, from its
++ * device, so the link can be deleted at that point.  If none of them is set,
++ * the link will be maintained until one of the devices pointed to by it (either
++ * the consumer or the supplier) is unregistered.
++ *
++ * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and
++ * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent
++ * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can
++ * be used to request the driver core to automaticall probe for a consmer
++ * driver after successfully binding a driver to the supplier device.
++ *
++ * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER,
++ * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at
++ * the same time is invalid and will cause NULL to be returned upfront.
++ * However, if a device link between the given @consumer and @supplier pair
++ * exists already when this function is called for them, the existing link will
++ * be returned regardless of its current type and status (the link's flags may
++ * be modified then).  The caller of this function is then expected to treat
++ * the link as though it has just been created, so (in particular) if
++ * DL_FLAG_STATELESS was passed in @flags, the link needs to be released
++ * explicitly when not needed any more (as stated above).
++ *
++ * A side effect of the link creation is re-ordering of dpm_list and the
++ * devices_kset list by moving the consumer device and all devices depending
++ * on it to the ends of these lists (that does not happen to devices that have
++ * not been registered when this function is called).
++ *
++ * The supplier device is required to be registered when this function is called
++ * and NULL will be returned if that is not the case.  The consumer device need
++ * not be registered, however.
++ */
++struct device_link *device_link_add(struct device *consumer,
++				    struct device *supplier, u32 flags)
++{
++	struct device_link *link;
++
++	if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS ||
++	    (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
++	    (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
++	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
++		      DL_FLAG_AUTOREMOVE_SUPPLIER)))
++		return NULL;
++
++	if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) {
++		if (pm_runtime_get_sync(supplier) < 0) {
++			pm_runtime_put_noidle(supplier);
++			return NULL;
++		}
++	}
++
++	if (!(flags & DL_FLAG_STATELESS))
++		flags |= DL_FLAG_MANAGED;
++
++	device_links_write_lock();
++	device_pm_lock();
++
++	/*
++	 * If the supplier has not been fully registered yet or there is a
++	 * reverse dependency between the consumer and the supplier already in
++	 * the graph, return NULL.
++	 */
++	if (!device_pm_initialized(supplier)
++	    || device_is_dependent(consumer, supplier)) {
++		link = NULL;
++		goto out;
++	}
++
++	/*
++	 * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed
++	 * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both
++	 * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER.
++	 */
++	if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
++		flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
++
++	list_for_each_entry(link, &supplier->links.consumers, s_node) {
++		if (link->consumer != consumer)
++			continue;
++
++		if (flags & DL_FLAG_PM_RUNTIME) {
++			if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
++				pm_runtime_new_link(consumer);
++				link->flags |= DL_FLAG_PM_RUNTIME;
++			}
++			if (flags & DL_FLAG_RPM_ACTIVE)
++				refcount_inc(&link->rpm_active);
++		}
++
++		if (flags & DL_FLAG_STATELESS) {
++			link->flags |= DL_FLAG_STATELESS;
++			kref_get(&link->kref);
++			goto out;
++		}
++
++		/*
++		 * If the life time of the link following from the new flags is
++		 * longer than indicated by the flags of the existing link,
++		 * update the existing link to stay around longer.
++		 */
++		if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) {
++			if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
++				link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
++				link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER;
++			}
++		} else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) {
++			link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER |
++					 DL_FLAG_AUTOREMOVE_SUPPLIER);
++		}
++		if (!(link->flags & DL_FLAG_MANAGED)) {
++			kref_get(&link->kref);
++			link->flags |= DL_FLAG_MANAGED;
++			device_link_init_status(link, consumer, supplier);
++		}
++		goto out;
++	}
++
++	link = kzalloc(sizeof(*link), GFP_KERNEL);
++	if (!link)
++		goto out;
++
++	refcount_set(&link->rpm_active, 1);
++
++	if (flags & DL_FLAG_PM_RUNTIME) {
++		if (flags & DL_FLAG_RPM_ACTIVE)
++			refcount_inc(&link->rpm_active);
++
++		pm_runtime_new_link(consumer);
++	}
++
++	get_device(supplier);
++	link->supplier = supplier;
++	INIT_LIST_HEAD(&link->s_node);
++	get_device(consumer);
++	link->consumer = consumer;
++	INIT_LIST_HEAD(&link->c_node);
++	link->flags = flags;
++	kref_init(&link->kref);
++
++	/* Determine the initial link state. */
++	if (flags & DL_FLAG_STATELESS)
++		link->status = DL_STATE_NONE;
++	else
++		device_link_init_status(link, consumer, supplier);
++
++	/*
++	 * Some callers expect the link creation during consumer driver probe to
++	 * resume the supplier even without DL_FLAG_RPM_ACTIVE.
++	 */
++	if (link->status == DL_STATE_CONSUMER_PROBE &&
++	    flags & DL_FLAG_PM_RUNTIME)
++		pm_runtime_resume(supplier);
++
++	/*
++	 * Move the consumer and all of the devices depending on it to the end
++	 * of dpm_list and the devices_kset list.
++	 *
++	 * It is necessary to hold dpm_list locked throughout all that or else
++	 * we may end up suspending with a wrong ordering of it.
++	 */
++	device_reorder_to_tail(consumer, NULL);
++
++	list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
++	list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
++
++	dev_info(consumer, "Linked as a consumer to %s\n", dev_name(supplier));
++
++ out:
++	device_pm_unlock();
++	device_links_write_unlock();
++
++	if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link)
++		pm_runtime_put(supplier);
++
++	return link;
++}
++EXPORT_SYMBOL_GPL(device_link_add);
++
++static void device_link_free(struct device_link *link)
++{
++	while (refcount_dec_not_one(&link->rpm_active))
++		pm_runtime_put(link->supplier);
++
++	put_device(link->consumer);
++	put_device(link->supplier);
++	kfree(link);
++}
++
++#ifdef CONFIG_SRCU
++static void __device_link_free_srcu(struct rcu_head *rhead)
++{
++	device_link_free(container_of(rhead, struct device_link, rcu_head));
++}
++
++static void __device_link_del(struct kref *kref)
++{
++	struct device_link *link = container_of(kref, struct device_link, kref);
++
++	dev_info(link->consumer, "Dropping the link to %s\n",
++		 dev_name(link->supplier));
++
++	if (link->flags & DL_FLAG_PM_RUNTIME)
++		pm_runtime_drop_link(link->consumer);
++
++	list_del_rcu(&link->s_node);
++	list_del_rcu(&link->c_node);
++	call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
++}
++#else /* !CONFIG_SRCU */
++static void __device_link_del(struct kref *kref)
++{
++	struct device_link *link = container_of(kref, struct device_link, kref);
++
++	dev_info(link->consumer, "Dropping the link to %s\n",
++		 dev_name(link->supplier));
++
++	if (link->flags & DL_FLAG_PM_RUNTIME)
++		pm_runtime_drop_link(link->consumer);
++
++	list_del(&link->s_node);
++	list_del(&link->c_node);
++	device_link_free(link);
++}
++#endif /* !CONFIG_SRCU */
++
++static void device_link_put_kref(struct device_link *link)
++{
++	if (link->flags & DL_FLAG_STATELESS)
++		kref_put(&link->kref, __device_link_del);
++	else
++		WARN(1, "Unable to drop a managed device link reference\n");
++}
++
++/**
++ * device_link_del - Delete a stateless link between two devices.
++ * @link: Device link to delete.
++ *
++ * The caller must ensure proper synchronization of this function with runtime
++ * PM.  If the link was added multiple times, it needs to be deleted as often.
++ * Care is required for hotplugged devices:  Their links are purged on removal
++ * and calling device_link_del() is then no longer allowed.
++ */
++void device_link_del(struct device_link *link)
++{
++	device_links_write_lock();
++	device_pm_lock();
++	device_link_put_kref(link);
++	device_pm_unlock();
++	device_links_write_unlock();
++}
++EXPORT_SYMBOL_GPL(device_link_del);
++
++/**
++ * device_link_remove - Delete a stateless link between two devices.
++ * @consumer: Consumer end of the link.
++ * @supplier: Supplier end of the link.
++ *
++ * The caller must ensure proper synchronization of this function with runtime
++ * PM.
++ */
++void device_link_remove(void *consumer, struct device *supplier)
++{
++	struct device_link *link;
++
++	if (WARN_ON(consumer == supplier))
++		return;
++
++	device_links_write_lock();
++	device_pm_lock();
++
++	list_for_each_entry(link, &supplier->links.consumers, s_node) {
++		if (link->consumer == consumer) {
++			device_link_put_kref(link);
++			break;
++		}
++	}
++
++	device_pm_unlock();
++	device_links_write_unlock();
++}
++EXPORT_SYMBOL_GPL(device_link_remove);
++
++static void device_links_missing_supplier(struct device *dev)
++{
++	struct device_link *link;
++
++	list_for_each_entry(link, &dev->links.suppliers, c_node)
++		if (link->status == DL_STATE_CONSUMER_PROBE)
++			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
++}
++
++/**
++ * device_links_check_suppliers - Check presence of supplier drivers.
++ * @dev: Consumer device.
++ *
++ * Check links from this device to any suppliers.  Walk the list of the device's
++ * links to suppliers and see if all of them are available.  If not, simply
++ * return -EPROBE_DEFER.
++ *
++ * We need to guarantee that the supplier will not go away after the check has
++ * been positive here.  It only can go away in __device_release_driver() and
++ * that function  checks the device's links to consumers.  This means we need to
++ * mark the link as "consumer probe in progress" to make the supplier removal
++ * wait for us to complete (or bad things may happen).
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++int device_links_check_suppliers(struct device *dev)
++{
++	struct device_link *link;
++	int ret = 0;
++
++	device_links_write_lock();
++
++	list_for_each_entry(link, &dev->links.suppliers, c_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		if (link->status != DL_STATE_AVAILABLE) {
++			device_links_missing_supplier(dev);
++			ret = -EPROBE_DEFER;
++			break;
++		}
++		WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
++	}
++	dev->links.status = DL_DEV_PROBING;
++
++	device_links_write_unlock();
++	return ret;
++}
++
++/**
++ * device_links_driver_bound - Update device links after probing its driver.
++ * @dev: Device to update the links for.
++ *
++ * The probe has been successful, so update links from this device to any
++ * consumers by changing their status to "available".
++ *
++ * Also change the status of @dev's links to suppliers to "active".
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++void device_links_driver_bound(struct device *dev)
++{
++	struct device_link *link;
++
++	device_links_write_lock();
++
++	list_for_each_entry(link, &dev->links.consumers, s_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		/*
++		 * Links created during consumer probe may be in the "consumer
++		 * probe" state to start with if the supplier is still probing
++		 * when they are created and they may become "active" if the
++		 * consumer probe returns first.  Skip them here.
++		 */
++		if (link->status == DL_STATE_CONSUMER_PROBE ||
++		    link->status == DL_STATE_ACTIVE)
++			continue;
++
++		WARN_ON(link->status != DL_STATE_DORMANT);
++		WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
++
++		if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER)
++			driver_deferred_probe_add(link->consumer);
++	}
++
++	list_for_each_entry(link, &dev->links.suppliers, c_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
++		WRITE_ONCE(link->status, DL_STATE_ACTIVE);
++	}
++
++	dev->links.status = DL_DEV_DRIVER_BOUND;
++
++	device_links_write_unlock();
++}
++
++static void device_link_drop_managed(struct device_link *link)
++{
++	link->flags &= ~DL_FLAG_MANAGED;
++	WRITE_ONCE(link->status, DL_STATE_NONE);
++	kref_put(&link->kref, __device_link_del);
++}
++
++/**
++ * __device_links_no_driver - Update links of a device without a driver.
++ * @dev: Device without a drvier.
++ *
++ * Delete all non-persistent links from this device to any suppliers.
++ *
++ * Persistent links stay around, but their status is changed to "available",
++ * unless they already are in the "supplier unbind in progress" state in which
++ * case they need not be updated.
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++static void __device_links_no_driver(struct device *dev)
++{
++	struct device_link *link, *ln;
++
++	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
++			device_link_drop_managed(link);
++		else if (link->status == DL_STATE_CONSUMER_PROBE ||
++			 link->status == DL_STATE_ACTIVE)
++			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
++	}
++
++	dev->links.status = DL_DEV_NO_DRIVER;
++}
++
++/**
++ * device_links_no_driver - Update links after failing driver probe.
++ * @dev: Device whose driver has just failed to probe.
++ *
++ * Clean up leftover links to consumers for @dev and invoke
++ * %__device_links_no_driver() to update links to suppliers for it as
++ * appropriate.
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++void device_links_no_driver(struct device *dev)
++{
++	struct device_link *link;
++
++	device_links_write_lock();
++
++	list_for_each_entry(link, &dev->links.consumers, s_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		/*
++		 * The probe has failed, so if the status of the link is
++		 * "consumer probe" or "active", it must have been added by
++		 * a probing consumer while this device was still probing.
++		 * Change its state to "dormant", as it represents a valid
++		 * relationship, but it is not functionally meaningful.
++		 */
++		if (link->status == DL_STATE_CONSUMER_PROBE ||
++		    link->status == DL_STATE_ACTIVE)
++			WRITE_ONCE(link->status, DL_STATE_DORMANT);
++	}
++
++	__device_links_no_driver(dev);
++
++	device_links_write_unlock();
++}
++
++/**
++ * device_links_driver_cleanup - Update links after driver removal.
++ * @dev: Device whose driver has just gone away.
++ *
++ * Update links to consumers for @dev by changing their status to "dormant" and
++ * invoke %__device_links_no_driver() to update links to suppliers for it as
++ * appropriate.
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++void device_links_driver_cleanup(struct device *dev)
++{
++	struct device_link *link, *ln;
++
++	device_links_write_lock();
++
++	list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
++		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
++
++		/*
++		 * autoremove the links between this @dev and its consumer
++		 * devices that are not active, i.e. where the link state
++		 * has moved to DL_STATE_SUPPLIER_UNBIND.
++		 */
++		if (link->status == DL_STATE_SUPPLIER_UNBIND &&
++		    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
++			device_link_drop_managed(link);
++
++		WRITE_ONCE(link->status, DL_STATE_DORMANT);
++	}
++
++	__device_links_no_driver(dev);
++
++	device_links_write_unlock();
++}
++
++/**
++ * device_links_busy - Check if there are any busy links to consumers.
++ * @dev: Device to check.
++ *
++ * Check each consumer of the device and return 'true' if its link's status
++ * is one of "consumer probe" or "active" (meaning that the given consumer is
++ * probing right now or its driver is present).  Otherwise, change the link
++ * state to "supplier unbind" to prevent the consumer from being probed
++ * successfully going forward.
++ *
++ * Return 'false' if there are no probing or active consumers.
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++bool device_links_busy(struct device *dev)
++{
++	struct device_link *link;
++	bool ret = false;
++
++	device_links_write_lock();
++
++	list_for_each_entry(link, &dev->links.consumers, s_node) {
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		if (link->status == DL_STATE_CONSUMER_PROBE
++		    || link->status == DL_STATE_ACTIVE) {
++			ret = true;
++			break;
++		}
++		WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
++	}
++
++	dev->links.status = DL_DEV_UNBINDING;
++
++	device_links_write_unlock();
++	return ret;
++}
++
++/**
++ * device_links_unbind_consumers - Force unbind consumers of the given device.
++ * @dev: Device to unbind the consumers of.
++ *
++ * Walk the list of links to consumers for @dev and if any of them is in the
++ * "consumer probe" state, wait for all device probes in progress to complete
++ * and start over.
++ *
++ * If that's not the case, change the status of the link to "supplier unbind"
++ * and check if the link was in the "active" state.  If so, force the consumer
++ * driver to unbind and start over (the consumer will not re-probe as we have
++ * changed the state of the link already).
++ *
++ * Links without the DL_FLAG_MANAGED flag set are ignored.
++ */
++void device_links_unbind_consumers(struct device *dev)
++{
++	struct device_link *link;
++
++ start:
++	device_links_write_lock();
++
++	list_for_each_entry(link, &dev->links.consumers, s_node) {
++		enum device_link_state status;
++
++		if (!(link->flags & DL_FLAG_MANAGED))
++			continue;
++
++		status = link->status;
++		if (status == DL_STATE_CONSUMER_PROBE) {
++			device_links_write_unlock();
++
++			wait_for_device_probe();
++			goto start;
++		}
++		WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
++		if (status == DL_STATE_ACTIVE) {
++			struct device *consumer = link->consumer;
++
++			get_device(consumer);
++
++			device_links_write_unlock();
++
++			device_release_driver_internal(consumer, NULL,
++						       consumer->parent);
++			put_device(consumer);
++			goto start;
++		}
++	}
++
++	device_links_write_unlock();
++}
++
++/**
++ * device_links_purge - Delete existing links to other devices.
++ * @dev: Target device.
++ */
++static void device_links_purge(struct device *dev)
++{
++	struct device_link *link, *ln;
++
++	/*
++	 * Delete all of the remaining links from this device to any other
++	 * devices (either consumers or suppliers).
++	 */
++	device_links_write_lock();
++
++	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
++		WARN_ON(link->status == DL_STATE_ACTIVE);
++		__device_link_del(&link->kref);
++	}
++
++	list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) {
++		WARN_ON(link->status != DL_STATE_DORMANT &&
++			link->status != DL_STATE_NONE);
++		__device_link_del(&link->kref);
++	}
++
++	device_links_write_unlock();
++}
++
++/* Device links support end. */
++
++int (*platform_notify)(struct device *dev) = NULL;
++int (*platform_notify_remove)(struct device *dev) = NULL;
++static struct kobject *dev_kobj;
++struct kobject *sysfs_dev_char_kobj;
++struct kobject *sysfs_dev_block_kobj;
++
++static DEFINE_MUTEX(device_hotplug_lock);
++
++void lock_device_hotplug(void)
++{
++	mutex_lock(&device_hotplug_lock);
++}
++
++void unlock_device_hotplug(void)
++{
++	mutex_unlock(&device_hotplug_lock);
++}
++
++int lock_device_hotplug_sysfs(void)
++{
++	if (mutex_trylock(&device_hotplug_lock))
++		return 0;
++
++	/* Avoid busy looping (5 ms of sleep should do). */
++	msleep(5);
++	return restart_syscall();
++}
++
++#ifdef CONFIG_BLOCK
++static inline int device_is_not_partition(struct device *dev)
++{
++	return !(dev->type == &part_type);
++}
++#else
++static inline int device_is_not_partition(struct device *dev)
++{
++	return 1;
++}
++#endif
++
++/**
++ * dev_driver_string - Return a device's driver name, if at all possible
++ * @dev: struct device to get the name of
++ *
++ * Will return the device's driver's name if it is bound to a device.  If
++ * the device is not bound to a driver, it will return the name of the bus
++ * it is attached to.  If it is not attached to a bus either, an empty
++ * string will be returned.
++ */
++const char *dev_driver_string(const struct device *dev)
++{
++	struct device_driver *drv;
++
++	/* dev->driver can change to NULL underneath us because of unbinding,
++	 * so be careful about accessing it.  dev->bus and dev->class should
++	 * never change once they are set, so they don't need special care.
++	 */
++	drv = READ_ONCE(dev->driver);
++	return drv ? drv->name :
++			(dev->bus ? dev->bus->name :
++			(dev->class ? dev->class->name : ""));
++}
++EXPORT_SYMBOL(dev_driver_string);
++
++#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
++
++static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
++			     char *buf)
++{
++	struct device_attribute *dev_attr = to_dev_attr(attr);
++	struct device *dev = kobj_to_dev(kobj);
++	ssize_t ret = -EIO;
++
++	if (dev_attr->show)
++		ret = dev_attr->show(dev, dev_attr, buf);
++	if (ret >= (ssize_t)PAGE_SIZE) {
++		printk("dev_attr_show: %pS returned bad count\n",
++				dev_attr->show);
++	}
++	return ret;
++}
++
++static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
++			      const char *buf, size_t count)
++{
++	struct device_attribute *dev_attr = to_dev_attr(attr);
++	struct device *dev = kobj_to_dev(kobj);
++	ssize_t ret = -EIO;
++
++	if (dev_attr->store)
++		ret = dev_attr->store(dev, dev_attr, buf, count);
++	return ret;
++}
++
++static const struct sysfs_ops dev_sysfs_ops = {
++	.show	= dev_attr_show,
++	.store	= dev_attr_store,
++};
++
++#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)
++
++ssize_t device_store_ulong(struct device *dev,
++			   struct device_attribute *attr,
++			   const char *buf, size_t size)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++	char *end;
++	unsigned long new = simple_strtoul(buf, &end, 0);
++	if (end == buf)
++		return -EINVAL;
++	*(unsigned long *)(ea->var) = new;
++	/* Always return full write size even if we didn't consume all */
++	return size;
++}
++EXPORT_SYMBOL_GPL(device_store_ulong);
++
++ssize_t device_show_ulong(struct device *dev,
++			  struct device_attribute *attr,
++			  char *buf)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
++}
++EXPORT_SYMBOL_GPL(device_show_ulong);
++
++ssize_t device_store_int(struct device *dev,
++			 struct device_attribute *attr,
++			 const char *buf, size_t size)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++	char *end;
++	long new = simple_strtol(buf, &end, 0);
++	if (end == buf || new > INT_MAX || new < INT_MIN)
++		return -EINVAL;
++	*(int *)(ea->var) = new;
++	/* Always return full write size even if we didn't consume all */
++	return size;
++}
++EXPORT_SYMBOL_GPL(device_store_int);
++
++ssize_t device_show_int(struct device *dev,
++			struct device_attribute *attr,
++			char *buf)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++
++	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
++}
++EXPORT_SYMBOL_GPL(device_show_int);
++
++ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
++			  const char *buf, size_t size)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++
++	if (strtobool(buf, ea->var) < 0)
++		return -EINVAL;
++
++	return size;
++}
++EXPORT_SYMBOL_GPL(device_store_bool);
++
++ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
++			 char *buf)
++{
++	struct dev_ext_attribute *ea = to_ext_attr(attr);
++
++	return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var));
++}
++EXPORT_SYMBOL_GPL(device_show_bool);
++
++/**
++ * device_release - free device structure.
++ * @kobj: device's kobject.
++ *
++ * This is called once the reference count for the object
++ * reaches 0. We forward the call to the device's release
++ * method, which should handle actually freeing the structure.
++ */
++static void device_release(struct kobject *kobj)
++{
++	struct device *dev = kobj_to_dev(kobj);
++	struct device_private *p = dev->p;
++
++	/*
++	 * Some platform devices are driven without driver attached
++	 * and managed resources may have been acquired.  Make sure
++	 * all resources are released.
++	 *
++	 * Drivers still can add resources into device after device
++	 * is deleted but alive, so release devres here to avoid
++	 * possible memory leak.
++	 */
++	devres_release_all(dev);
++
++	if (dev->release)
++		dev->release(dev);
++	else if (dev->type && dev->type->release)
++		dev->type->release(dev);
++	else if (dev->class && dev->class->dev_release)
++		dev->class->dev_release(dev);
++	else
++		WARN(1, KERN_ERR "Device '%s' does not have a release() "
++			"function, it is broken and must be fixed.\n",
++			dev_name(dev));
++	kfree(p);
++}
++
++static const void *device_namespace(struct kobject *kobj)
++{
++	struct device *dev = kobj_to_dev(kobj);
++	const void *ns = NULL;
++
++	if (dev->class && dev->class->ns_type)
++		ns = dev->class->namespace(dev);
++
++	return ns;
++}
++
++static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
++{
++	struct device *dev = kobj_to_dev(kobj);
++
++	if (dev->class && dev->class->get_ownership)
++		dev->class->get_ownership(dev, uid, gid);
++}
++
++static struct kobj_type device_ktype = {
++	.release	= device_release,
++	.sysfs_ops	= &dev_sysfs_ops,
++	.namespace	= device_namespace,
++	.get_ownership	= device_get_ownership,
++};
++
++
++static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
++{
++	struct kobj_type *ktype = get_ktype(kobj);
++
++	if (ktype == &device_ktype) {
++		struct device *dev = kobj_to_dev(kobj);
++		if (dev->bus)
++			return 1;
++		if (dev->class)
++			return 1;
++	}
++	return 0;
++}
++
++static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
++{
++	struct device *dev = kobj_to_dev(kobj);
++
++	if (dev->bus)
++		return dev->bus->name;
++	if (dev->class)
++		return dev->class->name;
++	return NULL;
++}
++
++static int dev_uevent(struct kset *kset, struct kobject *kobj,
++		      struct kobj_uevent_env *env)
++{
++	struct device *dev = kobj_to_dev(kobj);
++	int retval = 0;
++
++	/* add device node properties if present */
++	if (MAJOR(dev->devt)) {
++		const char *tmp;
++		const char *name;
++		umode_t mode = 0;
++		kuid_t uid = GLOBAL_ROOT_UID;
++		kgid_t gid = GLOBAL_ROOT_GID;
++
++		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
++		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
++		name = device_get_devnode(dev, &mode, &uid, &gid, &tmp);
++		if (name) {
++			add_uevent_var(env, "DEVNAME=%s", name);
++			if (mode)
++				add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
++			if (!uid_eq(uid, GLOBAL_ROOT_UID))
++				add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid));
++			if (!gid_eq(gid, GLOBAL_ROOT_GID))
++				add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid));
++			kfree(tmp);
++		}
++	}
++
++	if (dev->type && dev->type->name)
++		add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
++
++	if (dev->driver)
++		add_uevent_var(env, "DRIVER=%s", dev->driver->name);
++
++	/* Add common DT information about the device */
++	of_device_uevent(dev, env);
++
++	/* have the bus specific function add its stuff */
++	if (dev->bus && dev->bus->uevent) {
++		retval = dev->bus->uevent(dev, env);
++		if (retval)
++			pr_debug("device: '%s': %s: bus uevent() returned %d\n",
++				 dev_name(dev), __func__, retval);
++	}
++
++	/* have the class specific function add its stuff */
++	if (dev->class && dev->class->dev_uevent) {
++		retval = dev->class->dev_uevent(dev, env);
++		if (retval)
++			pr_debug("device: '%s': %s: class uevent() "
++				 "returned %d\n", dev_name(dev),
++				 __func__, retval);
++	}
++
++	/* have the device type specific function add its stuff */
++	if (dev->type && dev->type->uevent) {
++		retval = dev->type->uevent(dev, env);
++		if (retval)
++			pr_debug("device: '%s': %s: dev_type uevent() "
++				 "returned %d\n", dev_name(dev),
++				 __func__, retval);
++	}
++
++	return retval;
++}
++
++static const struct kset_uevent_ops device_uevent_ops = {
++	.filter =	dev_uevent_filter,
++	.name =		dev_uevent_name,
++	.uevent =	dev_uevent,
++};
++
++static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
++			   char *buf)
++{
++	struct kobject *top_kobj;
++	struct kset *kset;
++	struct kobj_uevent_env *env = NULL;
++	int i;
++	size_t count = 0;
++	int retval;
++
++	/* search the kset, the device belongs to */
++	top_kobj = &dev->kobj;
++	while (!top_kobj->kset && top_kobj->parent)
++		top_kobj = top_kobj->parent;
++	if (!top_kobj->kset)
++		goto out;
++
++	kset = top_kobj->kset;
++	if (!kset->uevent_ops || !kset->uevent_ops->uevent)
++		goto out;
++
++	/* respect filter */
++	if (kset->uevent_ops && kset->uevent_ops->filter)
++		if (!kset->uevent_ops->filter(kset, &dev->kobj))
++			goto out;
++
++	env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
++	if (!env)
++		return -ENOMEM;
++
++	/* let the kset specific function add its keys */
++	retval = kset->uevent_ops->uevent(kset, &dev->kobj, env);
++	if (retval)
++		goto out;
++
++	/* copy keys to file */
++	for (i = 0; i < env->envp_idx; i++)
++		count += sprintf(&buf[count], "%s\n", env->envp[i]);
++out:
++	kfree(env);
++	return count;
++}
++
++static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
++			    const char *buf, size_t count)
++{
++	int rc;
++
++	rc = kobject_synth_uevent(&dev->kobj, buf, count);
++
++	if (rc) {
++		dev_err(dev, "uevent: failed to send synthetic uevent\n");
++		return rc;
++	}
++
++	return count;
++}
++static DEVICE_ATTR_RW(uevent);
++
++static ssize_t online_show(struct device *dev, struct device_attribute *attr,
++			   char *buf)
++{
++	bool val;
++
++	device_lock(dev);
++	val = !dev->offline;
++	device_unlock(dev);
++	return sprintf(buf, "%u\n", val);
++}
++
++static ssize_t online_store(struct device *dev, struct device_attribute *attr,
++			    const char *buf, size_t count)
++{
++	bool val;
++	int ret;
++
++	ret = strtobool(buf, &val);
++	if (ret < 0)
++		return ret;
++
++	ret = lock_device_hotplug_sysfs();
++	if (ret)
++		return ret;
++
++	ret = val ? device_online(dev) : device_offline(dev);
++	unlock_device_hotplug();
++	return ret < 0 ? ret : count;
++}
++static DEVICE_ATTR_RW(online);
++
++int device_add_groups(struct device *dev, const struct attribute_group **groups)
++{
++	return sysfs_create_groups(&dev->kobj, groups);
++}
++EXPORT_SYMBOL_GPL(device_add_groups);
++
++void device_remove_groups(struct device *dev,
++			  const struct attribute_group **groups)
++{
++	sysfs_remove_groups(&dev->kobj, groups);
++}
++EXPORT_SYMBOL_GPL(device_remove_groups);
++
++union device_attr_group_devres {
++	const struct attribute_group *group;
++	const struct attribute_group **groups;
++};
++
++static int devm_attr_group_match(struct device *dev, void *res, void *data)
++{
++	return ((union device_attr_group_devres *)res)->group == data;
++}
++
++static void devm_attr_group_remove(struct device *dev, void *res)
++{
++	union device_attr_group_devres *devres = res;
++	const struct attribute_group *group = devres->group;
++
++	dev_dbg(dev, "%s: removing group %p\n", __func__, group);
++	sysfs_remove_group(&dev->kobj, group);
++}
++
++static void devm_attr_groups_remove(struct device *dev, void *res)
++{
++	union device_attr_group_devres *devres = res;
++	const struct attribute_group **groups = devres->groups;
++
++	dev_dbg(dev, "%s: removing groups %p\n", __func__, groups);
++	sysfs_remove_groups(&dev->kobj, groups);
++}
++
++/**
++ * devm_device_add_group - given a device, create a managed attribute group
++ * @dev:	The device to create the group for
++ * @grp:	The attribute group to create
++ *
++ * This function creates a group for the first time.  It will explicitly
++ * warn and error if any of the attribute files being created already exist.
++ *
++ * Returns 0 on success or error code on failure.
++ */
++int devm_device_add_group(struct device *dev, const struct attribute_group *grp)
++{
++	union device_attr_group_devres *devres;
++	int error;
++
++	devres = devres_alloc(devm_attr_group_remove,
++			      sizeof(*devres), GFP_KERNEL);
++	if (!devres)
++		return -ENOMEM;
++
++	error = sysfs_create_group(&dev->kobj, grp);
++	if (error) {
++		devres_free(devres);
++		return error;
++	}
++
++	devres->group = grp;
++	devres_add(dev, devres);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(devm_device_add_group);
++
++/**
++ * devm_device_remove_group: remove a managed group from a device
++ * @dev:	device to remove the group from
++ * @grp:	group to remove
++ *
++ * This function removes a group of attributes from a device. The attributes
++ * previously have to have been created for this group, otherwise it will fail.
++ */
++void devm_device_remove_group(struct device *dev,
++			      const struct attribute_group *grp)
++{
++	WARN_ON(devres_release(dev, devm_attr_group_remove,
++			       devm_attr_group_match,
++			       /* cast away const */ (void *)grp));
++}
++EXPORT_SYMBOL_GPL(devm_device_remove_group);
++
++/**
++ * devm_device_add_groups - create a bunch of managed attribute groups
++ * @dev:	The device to create the group for
++ * @groups:	The attribute groups to create, NULL terminated
++ *
++ * This function creates a bunch of managed attribute groups.  If an error
++ * occurs when creating a group, all previously created groups will be
++ * removed, unwinding everything back to the original state when this
++ * function was called.  It will explicitly warn and error if any of the
++ * attribute files being created already exist.
++ *
++ * Returns 0 on success or error code from sysfs_create_group on failure.
++ */
++int devm_device_add_groups(struct device *dev,
++			   const struct attribute_group **groups)
++{
++	union device_attr_group_devres *devres;
++	int error;
++
++	devres = devres_alloc(devm_attr_groups_remove,
++			      sizeof(*devres), GFP_KERNEL);
++	if (!devres)
++		return -ENOMEM;
++
++	error = sysfs_create_groups(&dev->kobj, groups);
++	if (error) {
++		devres_free(devres);
++		return error;
++	}
++
++	devres->groups = groups;
++	devres_add(dev, devres);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(devm_device_add_groups);
++
++/**
++ * devm_device_remove_groups - remove a list of managed groups
++ *
++ * @dev:	The device for the groups to be removed from
++ * @groups:	NULL terminated list of groups to be removed
++ *
++ * If groups is not NULL, remove the specified groups from the device.
++ */
++void devm_device_remove_groups(struct device *dev,
++			       const struct attribute_group **groups)
++{
++	WARN_ON(devres_release(dev, devm_attr_groups_remove,
++			       devm_attr_group_match,
++			       /* cast away const */ (void *)groups));
++}
++EXPORT_SYMBOL_GPL(devm_device_remove_groups);
++
++static int device_add_attrs(struct device *dev)
++{
++	struct class *class = dev->class;
++	const struct device_type *type = dev->type;
++	int error;
++
++	if (class) {
++		error = device_add_groups(dev, class->dev_groups);
++		if (error)
++			return error;
++	}
++
++	if (type) {
++		error = device_add_groups(dev, type->groups);
++		if (error)
++			goto err_remove_class_groups;
++	}
++
++	error = device_add_groups(dev, dev->groups);
++	if (error)
++		goto err_remove_type_groups;
++
++	if (device_supports_offline(dev) && !dev->offline_disabled) {
++		error = device_create_file(dev, &dev_attr_online);
++		if (error)
++			goto err_remove_dev_groups;
++	}
++
++	return 0;
++
++ err_remove_dev_groups:
++	device_remove_groups(dev, dev->groups);
++ err_remove_type_groups:
++	if (type)
++		device_remove_groups(dev, type->groups);
++ err_remove_class_groups:
++	if (class)
++		device_remove_groups(dev, class->dev_groups);
++
++	return error;
++}
++
++static void device_remove_attrs(struct device *dev)
++{
++	struct class *class = dev->class;
++	const struct device_type *type = dev->type;
++
++	device_remove_file(dev, &dev_attr_online);
++	device_remove_groups(dev, dev->groups);
++
++	if (type)
++		device_remove_groups(dev, type->groups);
++
++	if (class)
++		device_remove_groups(dev, class->dev_groups);
++}
++
++static ssize_t dev_show(struct device *dev, struct device_attribute *attr,
++			char *buf)
++{
++	return print_dev_t(buf, dev->devt);
++}
++static DEVICE_ATTR_RO(dev);
++
++/* /sys/devices/ */
++struct kset *devices_kset;
++
++/**
++ * devices_kset_move_before - Move device in the devices_kset's list.
++ * @deva: Device to move.
++ * @devb: Device @deva should come before.
++ */
++static void devices_kset_move_before(struct device *deva, struct device *devb)
++{
++	if (!devices_kset)
++		return;
++	pr_debug("devices_kset: Moving %s before %s\n",
++		 dev_name(deva), dev_name(devb));
++	spin_lock(&devices_kset->list_lock);
++	list_move_tail(&deva->kobj.entry, &devb->kobj.entry);
++	spin_unlock(&devices_kset->list_lock);
++}
++
++/**
++ * devices_kset_move_after - Move device in the devices_kset's list.
++ * @deva: Device to move
++ * @devb: Device @deva should come after.
++ */
++static void devices_kset_move_after(struct device *deva, struct device *devb)
++{
++	if (!devices_kset)
++		return;
++	pr_debug("devices_kset: Moving %s after %s\n",
++		 dev_name(deva), dev_name(devb));
++	spin_lock(&devices_kset->list_lock);
++	list_move(&deva->kobj.entry, &devb->kobj.entry);
++	spin_unlock(&devices_kset->list_lock);
++}
++
++/**
++ * devices_kset_move_last - move the device to the end of devices_kset's list.
++ * @dev: device to move
++ */
++void devices_kset_move_last(struct device *dev)
++{
++	if (!devices_kset)
++		return;
++	pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev));
++	spin_lock(&devices_kset->list_lock);
++	list_move_tail(&dev->kobj.entry, &devices_kset->list);
++	spin_unlock(&devices_kset->list_lock);
++}
++
++/**
++ * device_create_file - create sysfs attribute file for device.
++ * @dev: device.
++ * @attr: device attribute descriptor.
++ */
++int device_create_file(struct device *dev,
++		       const struct device_attribute *attr)
++{
++	int error = 0;
++
++	if (dev) {
++		WARN(((attr->attr.mode & S_IWUGO) && !attr->store),
++			"Attribute %s: write permission without 'store'\n",
++			attr->attr.name);
++		WARN(((attr->attr.mode & S_IRUGO) && !attr->show),
++			"Attribute %s: read permission without 'show'\n",
++			attr->attr.name);
++		error = sysfs_create_file(&dev->kobj, &attr->attr);
++	}
++
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_create_file);
++
++/**
++ * device_remove_file - remove sysfs attribute file.
++ * @dev: device.
++ * @attr: device attribute descriptor.
++ */
++void device_remove_file(struct device *dev,
++			const struct device_attribute *attr)
++{
++	if (dev)
++		sysfs_remove_file(&dev->kobj, &attr->attr);
++}
++EXPORT_SYMBOL_GPL(device_remove_file);
++
++/**
++ * device_remove_file_self - remove sysfs attribute file from its own method.
++ * @dev: device.
++ * @attr: device attribute descriptor.
++ *
++ * See kernfs_remove_self() for details.
++ */
++bool device_remove_file_self(struct device *dev,
++			     const struct device_attribute *attr)
++{
++	if (dev)
++		return sysfs_remove_file_self(&dev->kobj, &attr->attr);
++	else
++		return false;
++}
++EXPORT_SYMBOL_GPL(device_remove_file_self);
++
++/**
++ * device_create_bin_file - create sysfs binary attribute file for device.
++ * @dev: device.
++ * @attr: device binary attribute descriptor.
++ */
++int device_create_bin_file(struct device *dev,
++			   const struct bin_attribute *attr)
++{
++	int error = -EINVAL;
++	if (dev)
++		error = sysfs_create_bin_file(&dev->kobj, attr);
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_create_bin_file);
++
++/**
++ * device_remove_bin_file - remove sysfs binary attribute file
++ * @dev: device.
++ * @attr: device binary attribute descriptor.
++ */
++void device_remove_bin_file(struct device *dev,
++			    const struct bin_attribute *attr)
++{
++	if (dev)
++		sysfs_remove_bin_file(&dev->kobj, attr);
++}
++EXPORT_SYMBOL_GPL(device_remove_bin_file);
++
++static void klist_children_get(struct klist_node *n)
++{
++	struct device_private *p = to_device_private_parent(n);
++	struct device *dev = p->device;
++
++	get_device(dev);
++}
++
++static void klist_children_put(struct klist_node *n)
++{
++	struct device_private *p = to_device_private_parent(n);
++	struct device *dev = p->device;
++
++	put_device(dev);
++}
++
++/**
++ * device_initialize - init device structure.
++ * @dev: device.
++ *
++ * This prepares the device for use by other layers by initializing
++ * its fields.
++ * It is the first half of device_register(), if called by
++ * that function, though it can also be called separately, so one
++ * may use @dev's fields. In particular, get_device()/put_device()
++ * may be used for reference counting of @dev after calling this
++ * function.
++ *
++ * All fields in @dev must be initialized by the caller to 0, except
++ * for those explicitly set to some other value.  The simplest
++ * approach is to use kzalloc() to allocate the structure containing
++ * @dev.
++ *
++ * NOTE: Use put_device() to give up your reference instead of freeing
++ * @dev directly once you have called this function.
++ */
++void device_initialize(struct device *dev)
++{
++	dev->kobj.kset = devices_kset;
++	kobject_init(&dev->kobj, &device_ktype);
++	INIT_LIST_HEAD(&dev->dma_pools);
++	mutex_init(&dev->mutex);
++	lockdep_set_novalidate_class(&dev->mutex);
++	spin_lock_init(&dev->devres_lock);
++	INIT_LIST_HEAD(&dev->devres_head);
++	device_pm_init(dev);
++	set_dev_node(dev, -1);
++#ifdef CONFIG_GENERIC_MSI_IRQ
++	INIT_LIST_HEAD(&dev->msi_list);
++#endif
++	INIT_LIST_HEAD(&dev->links.consumers);
++	INIT_LIST_HEAD(&dev->links.suppliers);
++	dev->links.status = DL_DEV_NO_DRIVER;
++}
++EXPORT_SYMBOL_GPL(device_initialize);
++
++struct kobject *virtual_device_parent(struct device *dev)
++{
++	static struct kobject *virtual_dir = NULL;
++
++	if (!virtual_dir)
++		virtual_dir = kobject_create_and_add("virtual",
++						     &devices_kset->kobj);
++
++	return virtual_dir;
++}
++
++struct class_dir {
++	struct kobject kobj;
++	struct class *class;
++};
++
++#define to_class_dir(obj) container_of(obj, struct class_dir, kobj)
++
++static void class_dir_release(struct kobject *kobj)
++{
++	struct class_dir *dir = to_class_dir(kobj);
++	kfree(dir);
++}
++
++static const
++struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj)
++{
++	struct class_dir *dir = to_class_dir(kobj);
++	return dir->class->ns_type;
++}
++
++static struct kobj_type class_dir_ktype = {
++	.release	= class_dir_release,
++	.sysfs_ops	= &kobj_sysfs_ops,
++	.child_ns_type	= class_dir_child_ns_type
++};
++
++static struct kobject *
++class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
++{
++	struct class_dir *dir;
++	int retval;
++
++	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
++	if (!dir)
++		return ERR_PTR(-ENOMEM);
++
++	dir->class = class;
++	kobject_init(&dir->kobj, &class_dir_ktype);
++
++	dir->kobj.kset = &class->p->glue_dirs;
++
++	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
++	if (retval < 0) {
++		kobject_put(&dir->kobj);
++		return ERR_PTR(retval);
++	}
++	return &dir->kobj;
++}
++
++static DEFINE_MUTEX(gdp_mutex);
++
++static struct kobject *get_device_parent(struct device *dev,
++					 struct device *parent)
++{
++	if (dev->class) {
++		struct kobject *kobj = NULL;
++		struct kobject *parent_kobj;
++		struct kobject *k;
++
++#ifdef CONFIG_BLOCK
++		/* block disks show up in /sys/block */
++		if (sysfs_deprecated && dev->class == &block_class) {
++			if (parent && parent->class == &block_class)
++				return &parent->kobj;
++			return &block_class.p->subsys.kobj;
++		}
++#endif
++
++		/*
++		 * If we have no parent, we live in "virtual".
++		 * Class-devices with a non class-device as parent, live
++		 * in a "glue" directory to prevent namespace collisions.
++		 */
++		if (parent == NULL)
++			parent_kobj = virtual_device_parent(dev);
++		else if (parent->class && !dev->class->ns_type)
++			return &parent->kobj;
++		else
++			parent_kobj = &parent->kobj;
++
++		mutex_lock(&gdp_mutex);
++
++		/* find our class-directory at the parent and reference it */
++		spin_lock(&dev->class->p->glue_dirs.list_lock);
++		list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
++			if (k->parent == parent_kobj) {
++				kobj = kobject_get(k);
++				break;
++			}
++		spin_unlock(&dev->class->p->glue_dirs.list_lock);
++		if (kobj) {
++			mutex_unlock(&gdp_mutex);
++			return kobj;
++		}
++
++		/* or create a new class-directory at the parent device */
++		k = class_dir_create_and_add(dev->class, parent_kobj);
++		/* do not emit an uevent for this simple "glue" directory */
++		mutex_unlock(&gdp_mutex);
++		return k;
++	}
++
++	/* subsystems can specify a default root directory for their devices */
++	if (!parent && dev->bus && dev->bus->dev_root)
++		return &dev->bus->dev_root->kobj;
++
++	if (parent)
++		return &parent->kobj;
++	return NULL;
++}
++
++static inline bool live_in_glue_dir(struct kobject *kobj,
++				    struct device *dev)
++{
++	if (!kobj || !dev->class ||
++	    kobj->kset != &dev->class->p->glue_dirs)
++		return false;
++	return true;
++}
++
++static inline struct kobject *get_glue_dir(struct device *dev)
++{
++	return dev->kobj.parent;
++}
++
++/*
++ * make sure cleaning up dir as the last step, we need to make
++ * sure .release handler of kobject is run with holding the
++ * global lock
++ */
++static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
++{
++	unsigned int ref;
++
++	/* see if we live in a "glue" directory */
++	if (!live_in_glue_dir(glue_dir, dev))
++		return;
++
++	mutex_lock(&gdp_mutex);
++	/**
++	 * There is a race condition between removing glue directory
++	 * and adding a new device under the glue directory.
++	 *
++	 * CPU1:                                         CPU2:
++	 *
++	 * device_add()
++	 *   get_device_parent()
++	 *     class_dir_create_and_add()
++	 *       kobject_add_internal()
++	 *         create_dir()    // create glue_dir
++	 *
++	 *                                               device_add()
++	 *                                                 get_device_parent()
++	 *                                                   kobject_get() // get glue_dir
++	 *
++	 * device_del()
++	 *   cleanup_glue_dir()
++	 *     kobject_del(glue_dir)
++	 *
++	 *                                               kobject_add()
++	 *                                                 kobject_add_internal()
++	 *                                                   create_dir() // in glue_dir
++	 *                                                     sysfs_create_dir_ns()
++	 *                                                       kernfs_create_dir_ns(sd)
++	 *
++	 *       sysfs_remove_dir() // glue_dir->sd=NULL
++	 *       sysfs_put()        // free glue_dir->sd
++	 *
++	 *                                                         // sd is freed
++	 *                                                         kernfs_new_node(sd)
++	 *                                                           kernfs_get(glue_dir)
++	 *                                                           kernfs_add_one()
++	 *                                                           kernfs_put()
++	 *
++	 * Before CPU1 remove last child device under glue dir, if CPU2 add
++	 * a new device under glue dir, the glue_dir kobject reference count
++	 * will be increase to 2 in kobject_get(k). And CPU2 has been called
++	 * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir()
++	 * and sysfs_put(). This result in glue_dir->sd is freed.
++	 *
++	 * Then the CPU2 will see a stale "empty" but still potentially used
++	 * glue dir around in kernfs_new_node().
++	 *
++	 * In order to avoid this happening, we also should make sure that
++	 * kernfs_node for glue_dir is released in CPU1 only when refcount
++	 * for glue_dir kobj is 1.
++	 */
++	ref = kref_read(&glue_dir->kref);
++	if (!kobject_has_children(glue_dir) && !--ref)
++		kobject_del(glue_dir);
++	kobject_put(glue_dir);
++	mutex_unlock(&gdp_mutex);
++}
++
++static int device_add_class_symlinks(struct device *dev)
++{
++	struct device_node *of_node = dev_of_node(dev);
++	int error;
++
++	if (of_node) {
++		error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node");
++		if (error)
++			dev_warn(dev, "Error %d creating of_node link\n",error);
++		/* An error here doesn't warrant bringing down the device */
++	}
++
++	if (!dev->class)
++		return 0;
++
++	error = sysfs_create_link(&dev->kobj,
++				  &dev->class->p->subsys.kobj,
++				  "subsystem");
++	if (error)
++		goto out_devnode;
++
++	if (dev->parent && device_is_not_partition(dev)) {
++		error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
++					  "device");
++		if (error)
++			goto out_subsys;
++	}
++
++#ifdef CONFIG_BLOCK
++	/* /sys/block has directories and does not need symlinks */
++	if (sysfs_deprecated && dev->class == &block_class)
++		return 0;
++#endif
++
++	/* link in the class directory pointing to the device */
++	error = sysfs_create_link(&dev->class->p->subsys.kobj,
++				  &dev->kobj, dev_name(dev));
++	if (error)
++		goto out_device;
++
++	return 0;
++
++out_device:
++	sysfs_remove_link(&dev->kobj, "device");
++
++out_subsys:
++	sysfs_remove_link(&dev->kobj, "subsystem");
++out_devnode:
++	sysfs_remove_link(&dev->kobj, "of_node");
++	return error;
++}
++
++static void device_remove_class_symlinks(struct device *dev)
++{
++	if (dev_of_node(dev))
++		sysfs_remove_link(&dev->kobj, "of_node");
++
++	if (!dev->class)
++		return;
++
++	if (dev->parent && device_is_not_partition(dev))
++		sysfs_remove_link(&dev->kobj, "device");
++	sysfs_remove_link(&dev->kobj, "subsystem");
++#ifdef CONFIG_BLOCK
++	if (sysfs_deprecated && dev->class == &block_class)
++		return;
++#endif
++	sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev));
++}
++
++/**
++ * dev_set_name - set a device name
++ * @dev: device
++ * @fmt: format string for the device's name
++ */
++int dev_set_name(struct device *dev, const char *fmt, ...)
++{
++	va_list vargs;
++	int err;
++
++	va_start(vargs, fmt);
++	err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
++	va_end(vargs);
++	return err;
++}
++EXPORT_SYMBOL_GPL(dev_set_name);
++
++/**
++ * device_to_dev_kobj - select a /sys/dev/ directory for the device
++ * @dev: device
++ *
++ * By default we select char/ for new entries.  Setting class->dev_obj
++ * to NULL prevents an entry from being created.  class->dev_kobj must
++ * be set (or cleared) before any devices are registered to the class
++ * otherwise device_create_sys_dev_entry() and
++ * device_remove_sys_dev_entry() will disagree about the presence of
++ * the link.
++ */
++static struct kobject *device_to_dev_kobj(struct device *dev)
++{
++	struct kobject *kobj;
++
++	if (dev->class)
++		kobj = dev->class->dev_kobj;
++	else
++		kobj = sysfs_dev_char_kobj;
++
++	return kobj;
++}
++
++static int device_create_sys_dev_entry(struct device *dev)
++{
++	struct kobject *kobj = device_to_dev_kobj(dev);
++	int error = 0;
++	char devt_str[15];
++
++	if (kobj) {
++		format_dev_t(devt_str, dev->devt);
++		error = sysfs_create_link(kobj, &dev->kobj, devt_str);
++	}
++
++	return error;
++}
++
++static void device_remove_sys_dev_entry(struct device *dev)
++{
++	struct kobject *kobj = device_to_dev_kobj(dev);
++	char devt_str[15];
++
++	if (kobj) {
++		format_dev_t(devt_str, dev->devt);
++		sysfs_remove_link(kobj, devt_str);
++	}
++}
++
++static int device_private_init(struct device *dev)
++{
++	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
++	if (!dev->p)
++		return -ENOMEM;
++	dev->p->device = dev;
++	klist_init(&dev->p->klist_children, klist_children_get,
++		   klist_children_put);
++	INIT_LIST_HEAD(&dev->p->deferred_probe);
++	return 0;
++}
++
++/**
++ * device_add - add device to device hierarchy.
++ * @dev: device.
++ *
++ * This is part 2 of device_register(), though may be called
++ * separately _iff_ device_initialize() has been called separately.
++ *
++ * This adds @dev to the kobject hierarchy via kobject_add(), adds it
++ * to the global and sibling lists for the device, then
++ * adds it to the other relevant subsystems of the driver model.
++ *
++ * Do not call this routine or device_register() more than once for
++ * any device structure.  The driver model core is not designed to work
++ * with devices that get unregistered and then spring back to life.
++ * (Among other things, it's very hard to guarantee that all references
++ * to the previous incarnation of @dev have been dropped.)  Allocate
++ * and register a fresh new struct device instead.
++ *
++ * NOTE: _Never_ directly free @dev after calling this function, even
++ * if it returned an error! Always use put_device() to give up your
++ * reference instead.
++ */
++int device_add(struct device *dev)
++{
++	struct device *parent;
++	struct kobject *kobj;
++	struct class_interface *class_intf;
++	int error = -EINVAL;
++	struct kobject *glue_dir = NULL;
++
++	dev = get_device(dev);
++	if (!dev)
++		goto done;
++
++	if (!dev->p) {
++		error = device_private_init(dev);
++		if (error)
++			goto done;
++	}
++
++	/*
++	 * for statically allocated devices, which should all be converted
++	 * some day, we need to initialize the name. We prevent reading back
++	 * the name, and force the use of dev_name()
++	 */
++	if (dev->init_name) {
++		dev_set_name(dev, "%s", dev->init_name);
++		dev->init_name = NULL;
++	}
++
++	/* subsystems can specify simple device enumeration */
++	if (!dev_name(dev) && dev->bus && dev->bus->dev_name)
++		dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
++
++	if (!dev_name(dev)) {
++		error = -EINVAL;
++		goto name_error;
++	}
++
++	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
++
++	parent = get_device(dev->parent);
++	kobj = get_device_parent(dev, parent);
++	if (IS_ERR(kobj)) {
++		error = PTR_ERR(kobj);
++		goto parent_error;
++	}
++	if (kobj)
++		dev->kobj.parent = kobj;
++
++	/* use parent numa_node */
++	if (parent && (dev_to_node(dev) == NUMA_NO_NODE))
++		set_dev_node(dev, dev_to_node(parent));
++
++	/* first, register with generic layer. */
++	/* we require the name to be set before, and pass NULL */
++	error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
++	if (error) {
++		glue_dir = get_glue_dir(dev);
++		goto Error;
++	}
++
++	/* notify platform of device entry */
++	if (platform_notify)
++		platform_notify(dev);
++
++	error = device_create_file(dev, &dev_attr_uevent);
++	if (error)
++		goto attrError;
++
++	error = device_add_class_symlinks(dev);
++	if (error)
++		goto SymlinkError;
++	error = device_add_attrs(dev);
++	if (error)
++		goto AttrsError;
++	error = bus_add_device(dev);
++	if (error)
++		goto BusError;
++	error = dpm_sysfs_add(dev);
++	if (error)
++		goto DPMError;
++	device_pm_add(dev);
++
++	if (MAJOR(dev->devt)) {
++		error = device_create_file(dev, &dev_attr_dev);
++		if (error)
++			goto DevAttrError;
++
++		error = device_create_sys_dev_entry(dev);
++		if (error)
++			goto SysEntryError;
++
++		devtmpfs_create_node(dev);
++	}
++
++	/* Notify clients of device addition.  This call must come
++	 * after dpm_sysfs_add() and before kobject_uevent().
++	 */
++	if (dev->bus)
++		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
++					     BUS_NOTIFY_ADD_DEVICE, dev);
++
++	kobject_uevent(&dev->kobj, KOBJ_ADD);
++	bus_probe_device(dev);
++	if (parent)
++		klist_add_tail(&dev->p->knode_parent,
++			       &parent->p->klist_children);
++
++	if (dev->class) {
++		mutex_lock(&dev->class->p->mutex);
++		/* tie the class to the device */
++		klist_add_tail(&dev->knode_class,
++			       &dev->class->p->klist_devices);
++
++		/* notify any interfaces that the device is here */
++		list_for_each_entry(class_intf,
++				    &dev->class->p->interfaces, node)
++			if (class_intf->add_dev)
++				class_intf->add_dev(dev, class_intf);
++		mutex_unlock(&dev->class->p->mutex);
++	}
++done:
++	put_device(dev);
++	return error;
++ SysEntryError:
++	if (MAJOR(dev->devt))
++		device_remove_file(dev, &dev_attr_dev);
++ DevAttrError:
++	device_pm_remove(dev);
++	dpm_sysfs_remove(dev);
++ DPMError:
++	bus_remove_device(dev);
++ BusError:
++	device_remove_attrs(dev);
++ AttrsError:
++	device_remove_class_symlinks(dev);
++ SymlinkError:
++	device_remove_file(dev, &dev_attr_uevent);
++ attrError:
++	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
++	glue_dir = get_glue_dir(dev);
++	kobject_del(&dev->kobj);
++ Error:
++	cleanup_glue_dir(dev, glue_dir);
++parent_error:
++	put_device(parent);
++name_error:
++	kfree(dev->p);
++	dev->p = NULL;
++	goto done;
++}
++EXPORT_SYMBOL_GPL(device_add);
++
++/**
++ * device_register - register a device with the system.
++ * @dev: pointer to the device structure
++ *
++ * This happens in two clean steps - initialize the device
++ * and add it to the system. The two steps can be called
++ * separately, but this is the easiest and most common.
++ * I.e. you should only call the two helpers separately if
++ * have a clearly defined need to use and refcount the device
++ * before it is added to the hierarchy.
++ *
++ * For more information, see the kerneldoc for device_initialize()
++ * and device_add().
++ *
++ * NOTE: _Never_ directly free @dev after calling this function, even
++ * if it returned an error! Always use put_device() to give up the
++ * reference initialized in this function instead.
++ */
++int device_register(struct device *dev)
++{
++	device_initialize(dev);
++	return device_add(dev);
++}
++EXPORT_SYMBOL_GPL(device_register);
++
++/**
++ * get_device - increment reference count for device.
++ * @dev: device.
++ *
++ * This simply forwards the call to kobject_get(), though
++ * we do take care to provide for the case that we get a NULL
++ * pointer passed in.
++ */
++struct device *get_device(struct device *dev)
++{
++	return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL;
++}
++EXPORT_SYMBOL_GPL(get_device);
++
++/**
++ * put_device - decrement reference count.
++ * @dev: device in question.
++ */
++void put_device(struct device *dev)
++{
++	/* might_sleep(); */
++	if (dev)
++		kobject_put(&dev->kobj);
++}
++EXPORT_SYMBOL_GPL(put_device);
++
++bool kill_device(struct device *dev)
++{
++	/*
++	 * Require the device lock and set the "dead" flag to guarantee that
++	 * the update behavior is consistent with the other bitfields near
++	 * it and that we cannot have an asynchronous probe routine trying
++	 * to run while we are tearing out the bus/class/sysfs from
++	 * underneath the device.
++	 */
++	lockdep_assert_held(&dev->mutex);
++
++	if (dev->p->dead)
++		return false;
++	dev->p->dead = true;
++	return true;
++}
++EXPORT_SYMBOL_GPL(kill_device);
++
++/**
++ * device_del - delete device from system.
++ * @dev: device.
++ *
++ * This is the first part of the device unregistration
++ * sequence. This removes the device from the lists we control
++ * from here, has it removed from the other driver model
++ * subsystems it was added to in device_add(), and removes it
++ * from the kobject hierarchy.
++ *
++ * NOTE: this should be called manually _iff_ device_add() was
++ * also called manually.
++ */
++void device_del(struct device *dev)
++{
++	struct device *parent = dev->parent;
++	struct kobject *glue_dir = NULL;
++	struct class_interface *class_intf;
++
++	device_lock(dev);
++	kill_device(dev);
++	device_unlock(dev);
++
++	/* Notify clients of device removal.  This call must come
++	 * before dpm_sysfs_remove().
++	 */
++	if (dev->bus)
++		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
++					     BUS_NOTIFY_DEL_DEVICE, dev);
++
++	dpm_sysfs_remove(dev);
++	if (parent)
++		klist_del(&dev->p->knode_parent);
++	if (MAJOR(dev->devt)) {
++		devtmpfs_delete_node(dev);
++		device_remove_sys_dev_entry(dev);
++		device_remove_file(dev, &dev_attr_dev);
++	}
++	if (dev->class) {
++		device_remove_class_symlinks(dev);
++
++		mutex_lock(&dev->class->p->mutex);
++		/* notify any interfaces that the device is now gone */
++		list_for_each_entry(class_intf,
++				    &dev->class->p->interfaces, node)
++			if (class_intf->remove_dev)
++				class_intf->remove_dev(dev, class_intf);
++		/* remove the device from the class list */
++		klist_del(&dev->knode_class);
++		mutex_unlock(&dev->class->p->mutex);
++	}
++	device_remove_file(dev, &dev_attr_uevent);
++	device_remove_attrs(dev);
++	bus_remove_device(dev);
++	device_pm_remove(dev);
++	driver_deferred_probe_del(dev);
++	device_remove_properties(dev);
++	device_links_purge(dev);
++
++	/* Notify the platform of the removal, in case they
++	 * need to do anything...
++	 */
++	if (platform_notify_remove)
++		platform_notify_remove(dev);
++	if (dev->bus)
++		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
++					     BUS_NOTIFY_REMOVED_DEVICE, dev);
++	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
++	glue_dir = get_glue_dir(dev);
++	kobject_del(&dev->kobj);
++	cleanup_glue_dir(dev, glue_dir);
++	put_device(parent);
++}
++EXPORT_SYMBOL_GPL(device_del);
++
++/**
++ * device_unregister - unregister device from system.
++ * @dev: device going away.
++ *
++ * We do this in two parts, like we do device_register(). First,
++ * we remove it from all the subsystems with device_del(), then
++ * we decrement the reference count via put_device(). If that
++ * is the final reference count, the device will be cleaned up
++ * via device_release() above. Otherwise, the structure will
++ * stick around until the final reference to the device is dropped.
++ */
++void device_unregister(struct device *dev)
++{
++	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
++	device_del(dev);
++	put_device(dev);
++}
++EXPORT_SYMBOL_GPL(device_unregister);
++
++static struct device *prev_device(struct klist_iter *i)
++{
++	struct klist_node *n = klist_prev(i);
++	struct device *dev = NULL;
++	struct device_private *p;
++
++	if (n) {
++		p = to_device_private_parent(n);
++		dev = p->device;
++	}
++	return dev;
++}
++
++static struct device *next_device(struct klist_iter *i)
++{
++	struct klist_node *n = klist_next(i);
++	struct device *dev = NULL;
++	struct device_private *p;
++
++	if (n) {
++		p = to_device_private_parent(n);
++		dev = p->device;
++	}
++	return dev;
++}
++
++/**
++ * device_get_devnode - path of device node file
++ * @dev: device
++ * @mode: returned file access mode
++ * @uid: returned file owner
++ * @gid: returned file group
++ * @tmp: possibly allocated string
++ *
++ * Return the relative path of a possible device node.
++ * Non-default names may need to allocate a memory to compose
++ * a name. This memory is returned in tmp and needs to be
++ * freed by the caller.
++ */
++const char *device_get_devnode(struct device *dev,
++			       umode_t *mode, kuid_t *uid, kgid_t *gid,
++			       const char **tmp)
++{
++	char *s;
++
++	*tmp = NULL;
++
++	/* the device type may provide a specific name */
++	if (dev->type && dev->type->devnode)
++		*tmp = dev->type->devnode(dev, mode, uid, gid);
++	if (*tmp)
++		return *tmp;
++
++	/* the class may provide a specific name */
++	if (dev->class && dev->class->devnode)
++		*tmp = dev->class->devnode(dev, mode);
++	if (*tmp)
++		return *tmp;
++
++	/* return name without allocation, tmp == NULL */
++	if (strchr(dev_name(dev), '!') == NULL)
++		return dev_name(dev);
++
++	/* replace '!' in the name with '/' */
++	s = kstrdup(dev_name(dev), GFP_KERNEL);
++	if (!s)
++		return NULL;
++	strreplace(s, '!', '/');
++	return *tmp = s;
++}
++
++/**
++ * device_for_each_child - device child iterator.
++ * @parent: parent struct device.
++ * @fn: function to be called for each device.
++ * @data: data for the callback.
++ *
++ * Iterate over @parent's child devices, and call @fn for each,
++ * passing it @data.
++ *
++ * We check the return of @fn each time. If it returns anything
++ * other than 0, we break out and return that value.
++ */
++int device_for_each_child(struct device *parent, void *data,
++			  int (*fn)(struct device *dev, void *data))
++{
++	struct klist_iter i;
++	struct device *child;
++	int error = 0;
++
++	if (!parent->p)
++		return 0;
++
++	klist_iter_init(&parent->p->klist_children, &i);
++	while (!error && (child = next_device(&i)))
++		error = fn(child, data);
++	klist_iter_exit(&i);
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_for_each_child);
++
++/**
++ * device_for_each_child_reverse - device child iterator in reversed order.
++ * @parent: parent struct device.
++ * @fn: function to be called for each device.
++ * @data: data for the callback.
++ *
++ * Iterate over @parent's child devices, and call @fn for each,
++ * passing it @data.
++ *
++ * We check the return of @fn each time. If it returns anything
++ * other than 0, we break out and return that value.
++ */
++int device_for_each_child_reverse(struct device *parent, void *data,
++				  int (*fn)(struct device *dev, void *data))
++{
++	struct klist_iter i;
++	struct device *child;
++	int error = 0;
++
++	if (!parent->p)
++		return 0;
++
++	klist_iter_init(&parent->p->klist_children, &i);
++	while ((child = prev_device(&i)) && !error)
++		error = fn(child, data);
++	klist_iter_exit(&i);
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_for_each_child_reverse);
++
++/**
++ * device_find_child - device iterator for locating a particular device.
++ * @parent: parent struct device
++ * @match: Callback function to check device
++ * @data: Data to pass to match function
++ *
++ * This is similar to the device_for_each_child() function above, but it
++ * returns a reference to a device that is 'found' for later use, as
++ * determined by the @match callback.
++ *
++ * The callback should return 0 if the device doesn't match and non-zero
++ * if it does.  If the callback returns non-zero and a reference to the
++ * current device can be obtained, this function will return to the caller
++ * and not iterate over any more devices.
++ *
++ * NOTE: you will need to drop the reference with put_device() after use.
++ */
++struct device *device_find_child(struct device *parent, void *data,
++				 int (*match)(struct device *dev, void *data))
++{
++	struct klist_iter i;
++	struct device *child;
++
++	if (!parent)
++		return NULL;
++
++	klist_iter_init(&parent->p->klist_children, &i);
++	while ((child = next_device(&i)))
++		if (match(child, data) && get_device(child))
++			break;
++	klist_iter_exit(&i);
++	return child;
++}
++EXPORT_SYMBOL_GPL(device_find_child);
++
++int __init devices_init(void)
++{
++	devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
++	if (!devices_kset)
++		return -ENOMEM;
++	dev_kobj = kobject_create_and_add("dev", NULL);
++	if (!dev_kobj)
++		goto dev_kobj_err;
++	sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj);
++	if (!sysfs_dev_block_kobj)
++		goto block_kobj_err;
++	sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
++	if (!sysfs_dev_char_kobj)
++		goto char_kobj_err;
++
++	return 0;
++
++ char_kobj_err:
++	kobject_put(sysfs_dev_block_kobj);
++ block_kobj_err:
++	kobject_put(dev_kobj);
++ dev_kobj_err:
++	kset_unregister(devices_kset);
++	return -ENOMEM;
++}
++
++static int device_check_offline(struct device *dev, void *not_used)
++{
++	int ret;
++
++	ret = device_for_each_child(dev, NULL, device_check_offline);
++	if (ret)
++		return ret;
++
++	return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0;
++}
++
++/**
++ * device_offline - Prepare the device for hot-removal.
++ * @dev: Device to be put offline.
++ *
++ * Execute the device bus type's .offline() callback, if present, to prepare
++ * the device for a subsequent hot-removal.  If that succeeds, the device must
++ * not be used until either it is removed or its bus type's .online() callback
++ * is executed.
++ *
++ * Call under device_hotplug_lock.
++ */
++int device_offline(struct device *dev)
++{
++	int ret;
++
++	if (dev->offline_disabled)
++		return -EPERM;
++
++	ret = device_for_each_child(dev, NULL, device_check_offline);
++	if (ret)
++		return ret;
++
++	device_lock(dev);
++	if (device_supports_offline(dev)) {
++		if (dev->offline) {
++			ret = 1;
++		} else {
++			ret = dev->bus->offline(dev);
++			if (!ret) {
++				kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
++				dev->offline = true;
++			}
++		}
++	}
++	device_unlock(dev);
++
++	return ret;
++}
++
++/**
++ * device_online - Put the device back online after successful device_offline().
++ * @dev: Device to be put back online.
++ *
++ * If device_offline() has been successfully executed for @dev, but the device
++ * has not been removed subsequently, execute its bus type's .online() callback
++ * to indicate that the device can be used again.
++ *
++ * Call under device_hotplug_lock.
++ */
++int device_online(struct device *dev)
++{
++	int ret = 0;
++
++	device_lock(dev);
++	if (device_supports_offline(dev)) {
++		if (dev->offline) {
++			ret = dev->bus->online(dev);
++			if (!ret) {
++				kobject_uevent(&dev->kobj, KOBJ_ONLINE);
++				dev->offline = false;
++			}
++		} else {
++			ret = 1;
++		}
++	}
++	device_unlock(dev);
++
++	return ret;
++}
++
++struct root_device {
++	struct device dev;
++	struct module *owner;
++};
++
++static inline struct root_device *to_root_device(struct device *d)
++{
++	return container_of(d, struct root_device, dev);
++}
++
++static void root_device_release(struct device *dev)
++{
++	kfree(to_root_device(dev));
++}
++
++/**
++ * __root_device_register - allocate and register a root device
++ * @name: root device name
++ * @owner: owner module of the root device, usually THIS_MODULE
++ *
++ * This function allocates a root device and registers it
++ * using device_register(). In order to free the returned
++ * device, use root_device_unregister().
++ *
++ * Root devices are dummy devices which allow other devices
++ * to be grouped under /sys/devices. Use this function to
++ * allocate a root device and then use it as the parent of
++ * any device which should appear under /sys/devices/{name}
++ *
++ * The /sys/devices/{name} directory will also contain a
++ * 'module' symlink which points to the @owner directory
++ * in sysfs.
++ *
++ * Returns &struct device pointer on success, or ERR_PTR() on error.
++ *
++ * Note: You probably want to use root_device_register().
++ */
++struct device *__root_device_register(const char *name, struct module *owner)
++{
++	struct root_device *root;
++	int err = -ENOMEM;
++
++	root = kzalloc(sizeof(struct root_device), GFP_KERNEL);
++	if (!root)
++		return ERR_PTR(err);
++
++	err = dev_set_name(&root->dev, "%s", name);
++	if (err) {
++		kfree(root);
++		return ERR_PTR(err);
++	}
++
++	root->dev.release = root_device_release;
++
++	err = device_register(&root->dev);
++	if (err) {
++		put_device(&root->dev);
++		return ERR_PTR(err);
++	}
++
++#ifdef CONFIG_MODULES	/* gotta find a "cleaner" way to do this */
++	if (owner) {
++		struct module_kobject *mk = &owner->mkobj;
++
++		err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module");
++		if (err) {
++			device_unregister(&root->dev);
++			return ERR_PTR(err);
++		}
++		root->owner = owner;
++	}
++#endif
++
++	return &root->dev;
++}
++EXPORT_SYMBOL_GPL(__root_device_register);
++
++/**
++ * root_device_unregister - unregister and free a root device
++ * @dev: device going away
++ *
++ * This function unregisters and cleans up a device that was created by
++ * root_device_register().
++ */
++void root_device_unregister(struct device *dev)
++{
++	struct root_device *root = to_root_device(dev);
++
++	if (root->owner)
++		sysfs_remove_link(&root->dev.kobj, "module");
++
++	device_unregister(dev);
++}
++EXPORT_SYMBOL_GPL(root_device_unregister);
++
++
++static void device_create_release(struct device *dev)
++{
++	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
++	kfree(dev);
++}
++
++static __printf(6, 0) struct device *
++device_create_groups_vargs(struct class *class, struct device *parent,
++			   dev_t devt, void *drvdata,
++			   const struct attribute_group **groups,
++			   const char *fmt, va_list args)
++{
++	struct device *dev = NULL;
++	int retval = -ENODEV;
++
++	if (class == NULL || IS_ERR(class))
++		goto error;
++
++	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		retval = -ENOMEM;
++		goto error;
++	}
++
++	device_initialize(dev);
++	dev->devt = devt;
++	dev->class = class;
++	dev->parent = parent;
++	dev->groups = groups;
++	dev->release = device_create_release;
++	dev_set_drvdata(dev, drvdata);
++
++	retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
++	if (retval)
++		goto error;
++
++	retval = device_add(dev);
++	if (retval)
++		goto error;
++
++	return dev;
++
++error:
++	put_device(dev);
++	return ERR_PTR(retval);
++}
++
++/**
++ * device_create_vargs - creates a device and registers it with sysfs
++ * @class: pointer to the struct class that this device should be registered to
++ * @parent: pointer to the parent struct device of this new device, if any
++ * @devt: the dev_t for the char device to be added
++ * @drvdata: the data to be added to the device for callbacks
++ * @fmt: string for the device's name
++ * @args: va_list for the device's name
++ *
++ * This function can be used by char device classes.  A struct device
++ * will be created in sysfs, registered to the specified class.
++ *
++ * A "dev" file will be created, showing the dev_t for the device, if
++ * the dev_t is not 0,0.
++ * If a pointer to a parent struct device is passed in, the newly created
++ * struct device will be a child of that device in sysfs.
++ * The pointer to the struct device will be returned from the call.
++ * Any further sysfs files that might be required can be created using this
++ * pointer.
++ *
++ * Returns &struct device pointer on success, or ERR_PTR() on error.
++ *
++ * Note: the struct class passed to this function must have previously
++ * been created with a call to class_create().
++ */
++struct device *device_create_vargs(struct class *class, struct device *parent,
++				   dev_t devt, void *drvdata, const char *fmt,
++				   va_list args)
++{
++	return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
++					  fmt, args);
++}
++EXPORT_SYMBOL_GPL(device_create_vargs);
++
++/**
++ * device_create - creates a device and registers it with sysfs
++ * @class: pointer to the struct class that this device should be registered to
++ * @parent: pointer to the parent struct device of this new device, if any
++ * @devt: the dev_t for the char device to be added
++ * @drvdata: the data to be added to the device for callbacks
++ * @fmt: string for the device's name
++ *
++ * This function can be used by char device classes.  A struct device
++ * will be created in sysfs, registered to the specified class.
++ *
++ * A "dev" file will be created, showing the dev_t for the device, if
++ * the dev_t is not 0,0.
++ * If a pointer to a parent struct device is passed in, the newly created
++ * struct device will be a child of that device in sysfs.
++ * The pointer to the struct device will be returned from the call.
++ * Any further sysfs files that might be required can be created using this
++ * pointer.
++ *
++ * Returns &struct device pointer on success, or ERR_PTR() on error.
++ *
++ * Note: the struct class passed to this function must have previously
++ * been created with a call to class_create().
++ */
++struct device *device_create(struct class *class, struct device *parent,
++			     dev_t devt, void *drvdata, const char *fmt, ...)
++{
++	va_list vargs;
++	struct device *dev;
++
++	va_start(vargs, fmt);
++	dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
++	va_end(vargs);
++	return dev;
++}
++EXPORT_SYMBOL_GPL(device_create);
++
++/**
++ * device_create_with_groups - creates a device and registers it with sysfs
++ * @class: pointer to the struct class that this device should be registered to
++ * @parent: pointer to the parent struct device of this new device, if any
++ * @devt: the dev_t for the char device to be added
++ * @drvdata: the data to be added to the device for callbacks
++ * @groups: NULL-terminated list of attribute groups to be created
++ * @fmt: string for the device's name
++ *
++ * This function can be used by char device classes.  A struct device
++ * will be created in sysfs, registered to the specified class.
++ * Additional attributes specified in the groups parameter will also
++ * be created automatically.
++ *
++ * A "dev" file will be created, showing the dev_t for the device, if
++ * the dev_t is not 0,0.
++ * If a pointer to a parent struct device is passed in, the newly created
++ * struct device will be a child of that device in sysfs.
++ * The pointer to the struct device will be returned from the call.
++ * Any further sysfs files that might be required can be created using this
++ * pointer.
++ *
++ * Returns &struct device pointer on success, or ERR_PTR() on error.
++ *
++ * Note: the struct class passed to this function must have previously
++ * been created with a call to class_create().
++ */
++struct device *device_create_with_groups(struct class *class,
++					 struct device *parent, dev_t devt,
++					 void *drvdata,
++					 const struct attribute_group **groups,
++					 const char *fmt, ...)
++{
++	va_list vargs;
++	struct device *dev;
++
++	va_start(vargs, fmt);
++	dev = device_create_groups_vargs(class, parent, devt, drvdata, groups,
++					 fmt, vargs);
++	va_end(vargs);
++	return dev;
++}
++EXPORT_SYMBOL_GPL(device_create_with_groups);
++
++static int __match_devt(struct device *dev, const void *data)
++{
++	const dev_t *devt = data;
++
++	return dev->devt == *devt;
++}
++
++/**
++ * device_destroy - removes a device that was created with device_create()
++ * @class: pointer to the struct class that this device was registered with
++ * @devt: the dev_t of the device that was previously registered
++ *
++ * This call unregisters and cleans up a device that was created with a
++ * call to device_create().
++ */
++void device_destroy(struct class *class, dev_t devt)
++{
++	struct device *dev;
++
++	dev = class_find_device(class, NULL, &devt, __match_devt);
++	if (dev) {
++		put_device(dev);
++		device_unregister(dev);
++	}
++}
++EXPORT_SYMBOL_GPL(device_destroy);
++
++/**
++ * device_rename - renames a device
++ * @dev: the pointer to the struct device to be renamed
++ * @new_name: the new name of the device
++ *
++ * It is the responsibility of the caller to provide mutual
++ * exclusion between two different calls of device_rename
++ * on the same device to ensure that new_name is valid and
++ * won't conflict with other devices.
++ *
++ * Note: Don't call this function.  Currently, the networking layer calls this
++ * function, but that will change.  The following text from Kay Sievers offers
++ * some insight:
++ *
++ * Renaming devices is racy at many levels, symlinks and other stuff are not
++ * replaced atomically, and you get a "move" uevent, but it's not easy to
++ * connect the event to the old and new device. Device nodes are not renamed at
++ * all, there isn't even support for that in the kernel now.
++ *
++ * In the meantime, during renaming, your target name might be taken by another
++ * driver, creating conflicts. Or the old name is taken directly after you
++ * renamed it -- then you get events for the same DEVPATH, before you even see
++ * the "move" event. It's just a mess, and nothing new should ever rely on
++ * kernel device renaming. Besides that, it's not even implemented now for
++ * other things than (driver-core wise very simple) network devices.
++ *
++ * We are currently about to change network renaming in udev to completely
++ * disallow renaming of devices in the same namespace as the kernel uses,
++ * because we can't solve the problems properly, that arise with swapping names
++ * of multiple interfaces without races. Means, renaming of eth[0-9]* will only
++ * be allowed to some other name than eth[0-9]*, for the aforementioned
++ * reasons.
++ *
++ * Make up a "real" name in the driver before you register anything, or add
++ * some other attributes for userspace to find the device, or use udev to add
++ * symlinks -- but never rename kernel devices later, it's a complete mess. We
++ * don't even want to get into that and try to implement the missing pieces in
++ * the core. We really have other pieces to fix in the driver core mess. :)
++ */
++int device_rename(struct device *dev, const char *new_name)
++{
++	struct kobject *kobj = &dev->kobj;
++	char *old_device_name = NULL;
++	int error;
++
++	dev = get_device(dev);
++	if (!dev)
++		return -EINVAL;
++
++	dev_dbg(dev, "renaming to %s\n", new_name);
++
++	old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
++	if (!old_device_name) {
++		error = -ENOMEM;
++		goto out;
++	}
++
++	if (dev->class) {
++		error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj,
++					     kobj, old_device_name,
++					     new_name, kobject_namespace(kobj));
++		if (error)
++			goto out;
++	}
++
++	error = kobject_rename(kobj, new_name);
++	if (error)
++		goto out;
++
++out:
++	put_device(dev);
++
++	kfree(old_device_name);
++
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_rename);
++
++static int device_move_class_links(struct device *dev,
++				   struct device *old_parent,
++				   struct device *new_parent)
++{
++	int error = 0;
++
++	if (old_parent)
++		sysfs_remove_link(&dev->kobj, "device");
++	if (new_parent)
++		error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
++					  "device");
++	return error;
++}
++
++/**
++ * device_move - moves a device to a new parent
++ * @dev: the pointer to the struct device to be moved
++ * @new_parent: the new parent of the device (can be NULL)
++ * @dpm_order: how to reorder the dpm_list
++ */
++int device_move(struct device *dev, struct device *new_parent,
++		enum dpm_order dpm_order)
++{
++	int error;
++	struct device *old_parent;
++	struct kobject *new_parent_kobj;
++
++	dev = get_device(dev);
++	if (!dev)
++		return -EINVAL;
++
++	device_pm_lock();
++	new_parent = get_device(new_parent);
++	new_parent_kobj = get_device_parent(dev, new_parent);
++	if (IS_ERR(new_parent_kobj)) {
++		error = PTR_ERR(new_parent_kobj);
++		put_device(new_parent);
++		goto out;
++	}
++
++	pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev),
++		 __func__, new_parent ? dev_name(new_parent) : "<NULL>");
++	error = kobject_move(&dev->kobj, new_parent_kobj);
++	if (error) {
++		cleanup_glue_dir(dev, new_parent_kobj);
++		put_device(new_parent);
++		goto out;
++	}
++	old_parent = dev->parent;
++	dev->parent = new_parent;
++	if (old_parent)
++		klist_remove(&dev->p->knode_parent);
++	if (new_parent) {
++		klist_add_tail(&dev->p->knode_parent,
++			       &new_parent->p->klist_children);
++		set_dev_node(dev, dev_to_node(new_parent));
++	}
++
++	if (dev->class) {
++		error = device_move_class_links(dev, old_parent, new_parent);
++		if (error) {
++			/* We ignore errors on cleanup since we're hosed anyway... */
++			device_move_class_links(dev, new_parent, old_parent);
++			if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
++				if (new_parent)
++					klist_remove(&dev->p->knode_parent);
++				dev->parent = old_parent;
++				if (old_parent) {
++					klist_add_tail(&dev->p->knode_parent,
++						       &old_parent->p->klist_children);
++					set_dev_node(dev, dev_to_node(old_parent));
++				}
++			}
++			cleanup_glue_dir(dev, new_parent_kobj);
++			put_device(new_parent);
++			goto out;
++		}
++	}
++	switch (dpm_order) {
++	case DPM_ORDER_NONE:
++		break;
++	case DPM_ORDER_DEV_AFTER_PARENT:
++		device_pm_move_after(dev, new_parent);
++		devices_kset_move_after(dev, new_parent);
++		break;
++	case DPM_ORDER_PARENT_BEFORE_DEV:
++		device_pm_move_before(new_parent, dev);
++		devices_kset_move_before(new_parent, dev);
++		break;
++	case DPM_ORDER_DEV_LAST:
++		device_pm_move_last(dev);
++		devices_kset_move_last(dev);
++		break;
++	}
++
++	put_device(old_parent);
++out:
++	device_pm_unlock();
++	put_device(dev);
++	return error;
++}
++EXPORT_SYMBOL_GPL(device_move);
++
++/**
++ * device_shutdown - call ->shutdown() on each device to shutdown.
++ */
++void device_shutdown(void)
++{
++	struct device *dev, *parent;
++
++	wait_for_device_probe();
++	device_block_probing();
++
++	cpufreq_suspend();
++
++	spin_lock(&devices_kset->list_lock);
++	/*
++	 * Walk the devices list backward, shutting down each in turn.
++	 * Beware that device unplug events may also start pulling
++	 * devices offline, even as the system is shutting down.
++	 */
++	while (!list_empty(&devices_kset->list)) {
++		dev = list_entry(devices_kset->list.prev, struct device,
++				kobj.entry);
++
++		/*
++		 * hold reference count of device's parent to
++		 * prevent it from being freed because parent's
++		 * lock is to be held
++		 */
++		parent = get_device(dev->parent);
++		get_device(dev);
++		/*
++		 * Make sure the device is off the kset list, in the
++		 * event that dev->*->shutdown() doesn't remove it.
++		 */
++		list_del_init(&dev->kobj.entry);
++		spin_unlock(&devices_kset->list_lock);
++
++		/* hold lock to avoid race with probe/release */
++		if (parent)
++			device_lock(parent);
++		device_lock(dev);
++
++		/* Don't allow any more runtime suspends */
++		pm_runtime_get_noresume(dev);
++		pm_runtime_barrier(dev);
++
++		if (dev->class && dev->class->shutdown_pre) {
++			if (initcall_debug)
++				dev_info(dev, "shutdown_pre\n");
++			dev->class->shutdown_pre(dev);
++		}
++		if (dev->bus && dev->bus->shutdown) {
++			if (initcall_debug)
++				dev_info(dev, "shutdown\n");
++			dev->bus->shutdown(dev);
++		} else if (dev->driver && dev->driver->shutdown) {
++			if (initcall_debug)
++				dev_info(dev, "shutdown\n");
++			dev->driver->shutdown(dev);
++		}
++
++		device_unlock(dev);
++		if (parent)
++			device_unlock(parent);
++
++		put_device(dev);
++		put_device(parent);
++
++		spin_lock(&devices_kset->list_lock);
++	}
++	spin_unlock(&devices_kset->list_lock);
++}
++
++/*
++ * Device logging functions
++ */
++
++#ifdef CONFIG_PRINTK
++static int
++create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
++{
++	const char *subsys;
++	size_t pos = 0;
++
++	if (dev->class)
++		subsys = dev->class->name;
++	else if (dev->bus)
++		subsys = dev->bus->name;
++	else
++		return 0;
++
++	pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
++	if (pos >= hdrlen)
++		goto overflow;
++
++	/*
++	 * Add device identifier DEVICE=:
++	 *   b12:8         block dev_t
++	 *   c127:3        char dev_t
++	 *   n8            netdev ifindex
++	 *   +sound:card0  subsystem:devname
++	 */
++	if (MAJOR(dev->devt)) {
++		char c;
++
++		if (strcmp(subsys, "block") == 0)
++			c = 'b';
++		else
++			c = 'c';
++		pos++;
++		pos += snprintf(hdr + pos, hdrlen - pos,
++				"DEVICE=%c%u:%u",
++				c, MAJOR(dev->devt), MINOR(dev->devt));
++	} else if (strcmp(subsys, "net") == 0) {
++		struct net_device *net = to_net_dev(dev);
++
++		pos++;
++		pos += snprintf(hdr + pos, hdrlen - pos,
++				"DEVICE=n%u", net->ifindex);
++	} else {
++		pos++;
++		pos += snprintf(hdr + pos, hdrlen - pos,
++				"DEVICE=+%s:%s", subsys, dev_name(dev));
++	}
++
++	if (pos >= hdrlen)
++		goto overflow;
++
++	return pos;
++
++overflow:
++	dev_WARN(dev, "device/subsystem name too long");
++	return 0;
++}
++
++int dev_vprintk_emit(int level, const struct device *dev,
++		     const char *fmt, va_list args)
++{
++	char hdr[128];
++	size_t hdrlen;
++
++	hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
++
++	return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
++}
++EXPORT_SYMBOL(dev_vprintk_emit);
++
++int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...)
++{
++	va_list args;
++	int r;
++
++	va_start(args, fmt);
++
++	r = dev_vprintk_emit(level, dev, fmt, args);
++
++	va_end(args);
++
++	return r;
++}
++EXPORT_SYMBOL(dev_printk_emit);
++
++static void __dev_printk(const char *level, const struct device *dev,
++			struct va_format *vaf)
++{
++	if (dev)
++		dev_printk_emit(level[1] - '0', dev, "%s %s: %pV",
++				dev_driver_string(dev), dev_name(dev), vaf);
++	else
++		printk("%s(NULL device *): %pV", level, vaf);
++}
++
++void dev_printk(const char *level, const struct device *dev,
++		const char *fmt, ...)
++{
++	struct va_format vaf;
++	va_list args;
++
++	va_start(args, fmt);
++
++	vaf.fmt = fmt;
++	vaf.va = &args;
++
++	__dev_printk(level, dev, &vaf);
++
++	va_end(args);
++}
++EXPORT_SYMBOL(dev_printk);
++
++#define define_dev_printk_level(func, kern_level)		\
++void func(const struct device *dev, const char *fmt, ...)	\
++{								\
++	struct va_format vaf;					\
++	va_list args;						\
++								\
++	va_start(args, fmt);					\
++								\
++	vaf.fmt = fmt;						\
++	vaf.va = &args;						\
++								\
++	__dev_printk(kern_level, dev, &vaf);			\
++								\
++	va_end(args);						\
++}								\
++EXPORT_SYMBOL(func);
++
++define_dev_printk_level(_dev_emerg, KERN_EMERG);
++define_dev_printk_level(_dev_alert, KERN_ALERT);
++define_dev_printk_level(_dev_crit, KERN_CRIT);
++define_dev_printk_level(_dev_err, KERN_ERR);
++define_dev_printk_level(_dev_warn, KERN_WARNING);
++define_dev_printk_level(_dev_notice, KERN_NOTICE);
++define_dev_printk_level(_dev_info, KERN_INFO);
++
++#endif
++
++static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
++{
++	return fwnode && !IS_ERR(fwnode->secondary);
++}
++
++/**
++ * set_primary_fwnode - Change the primary firmware node of a given device.
++ * @dev: Device to handle.
++ * @fwnode: New primary firmware node of the device.
++ *
++ * Set the device's firmware node pointer to @fwnode, but if a secondary
++ * firmware node of the device is present, preserve it.
++ */
++void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
++{
++	struct fwnode_handle *fn = dev->fwnode;
++
++	if (fwnode) {
++		if (fwnode_is_primary(fn))
++			fn = fn->secondary;
++
++		if (fn) {
++			WARN_ON(fwnode->secondary);
++			fwnode->secondary = fn;
++		}
++		dev->fwnode = fwnode;
++	} else {
++		if (fwnode_is_primary(fn)) {
++			dev->fwnode = fn->secondary;
++			fn->secondary = NULL;
++		} else {
++			dev->fwnode = NULL;
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(set_primary_fwnode);
++
++/**
++ * set_secondary_fwnode - Change the secondary firmware node of a given device.
++ * @dev: Device to handle.
++ * @fwnode: New secondary firmware node of the device.
++ *
++ * If a primary firmware node of the device is present, set its secondary
++ * pointer to @fwnode.  Otherwise, set the device's firmware node pointer to
++ * @fwnode.
++ */
++void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
++{
++	if (fwnode)
++		fwnode->secondary = ERR_PTR(-ENODEV);
++
++	if (fwnode_is_primary(dev->fwnode))
++		dev->fwnode->secondary = fwnode;
++	else
++		dev->fwnode = fwnode;
++}
++
++/**
++ * device_set_of_node_from_dev - reuse device-tree node of another device
++ * @dev: device whose device-tree node is being set
++ * @dev2: device whose device-tree node is being reused
++ *
++ * Takes another reference to the new device-tree node after first dropping
++ * any reference held to the old node.
++ */
++void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
++{
++	of_node_put(dev->of_node);
++	dev->of_node = of_node_get(dev2->of_node);
++	dev->of_node_reused = true;
++}
++EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
+diff -uprN kernel/drivers/base/regmap/regmap-irq.c kernel_new/drivers/base/regmap/regmap-irq.c
+--- kernel/drivers/base/regmap/regmap-irq.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/base/regmap/regmap-irq.c	2021-04-01 18:28:07.660863282 +0800
+@@ -197,8 +197,11 @@ static void regmap_irq_enable(struct irq
+ 	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+ 	struct regmap *map = d->map;
+ 	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
++	unsigned long flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~irq_data->mask;
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void regmap_irq_disable(struct irq_data *data)
+@@ -206,8 +209,11 @@ static void regmap_irq_disable(struct ir
+ 	struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
+ 	struct regmap *map = d->map;
+ 	const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
++	unsigned long flags;
+ 
++	flags = hard_cond_local_irq_save();
+ 	d->mask_buf[irq_data->reg_offset / map->reg_stride] |= irq_data->mask;
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
+@@ -270,6 +276,7 @@ static const struct irq_chip regmap_irq_
+ 	.irq_enable		= regmap_irq_enable,
+ 	.irq_set_type		= regmap_irq_set_type,
+ 	.irq_set_wake		= regmap_irq_set_wake,
++	.flags			= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static irqreturn_t regmap_irq_thread(int irq, void *d)
+diff -uprN kernel/drivers/clocksource/arm_arch_timer.c kernel_new/drivers/clocksource/arm_arch_timer.c
+--- kernel/drivers/clocksource/arm_arch_timer.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/arm_arch_timer.c	2021-04-01 18:28:07.660863282 +0800
+@@ -20,6 +20,8 @@
+ #include <linux/clockchips.h>
+ #include <linux/clocksource.h>
+ #include <linux/interrupt.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_address.h>
+ #include <linux/io.h>
+@@ -633,8 +635,7 @@ static bool arch_timer_this_cpu_has_cntv
+ #define arch_timer_this_cpu_has_cntvct_wa()		({false;})
+ #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
+ 
+-static __always_inline irqreturn_t timer_handler(const int access,
+-					struct clock_event_device *evt)
++static int arch_timer_ack(const int access, struct clock_event_device *evt)
+ {
+ 	unsigned long ctrl;
+ 
+@@ -642,6 +643,52 @@ static __always_inline irqreturn_t timer
+ 	if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
+ 		ctrl |= ARCH_TIMER_CTRL_IT_MASK;
+ 		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
++		return 1;
++	}
++	return 0;
++}
++
++#ifdef CONFIG_IPIPE
++static DEFINE_PER_CPU(struct ipipe_timer, arch_itimer);
++static struct __ipipe_tscinfo tsc_info = {
++	.type = IPIPE_TSC_TYPE_FREERUNNING_ARCH,
++	.u = {
++		{
++			.mask = 0xffffffffffffffff,
++		},
++	},
++};
++
++static void arch_itimer_ack_phys(void)
++{
++	struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt);
++	arch_timer_ack(ARCH_TIMER_PHYS_ACCESS, evt);
++}
++
++static void arch_itimer_ack_virt(void)
++{
++	struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt);
++	arch_timer_ack(ARCH_TIMER_VIRT_ACCESS, evt);
++}
++#endif /* CONFIG_IPIPE */
++
++static inline irqreturn_t timer_handler(int irq, const int access,
++					struct clock_event_device *evt)
++{
++	if (clockevent_ipipe_stolen(evt))
++		goto stolen;
++
++	if (arch_timer_ack(access, evt)) {
++#ifdef CONFIG_IPIPE
++		struct ipipe_timer *itimer = raw_cpu_ptr(&arch_itimer);
++		if (itimer->irq != irq)
++			itimer->irq = irq;
++#endif /* CONFIG_IPIPE */
++	  stolen:
++		/*
++		 * This is a 64bit clock source, no need for TSC
++		 * update.
++		 */
+ 		evt->event_handler(evt);
+ 		return IRQ_HANDLED;
+ 	}
+@@ -653,28 +700,28 @@ static irqreturn_t arch_timer_handler_vi
+ {
+ 	struct clock_event_device *evt = dev_id;
+ 
+-	return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt);
++	return timer_handler(irq, ARCH_TIMER_VIRT_ACCESS, evt);
+ }
+ 
+ static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
+ {
+ 	struct clock_event_device *evt = dev_id;
+ 
+-	return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
++	return timer_handler(irq, ARCH_TIMER_PHYS_ACCESS, evt);
+ }
+ 
+ static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
+ {
+ 	struct clock_event_device *evt = dev_id;
+ 
+-	return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
++	return timer_handler(irq, ARCH_TIMER_MEM_PHYS_ACCESS, evt);
+ }
+ 
+ static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
+ {
+ 	struct clock_event_device *evt = dev_id;
+ 
+-	return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
++	return timer_handler(irq, ARCH_TIMER_MEM_VIRT_ACCESS, evt);
+ }
+ 
+ static __always_inline int timer_shutdown(const int access,
+@@ -788,6 +835,17 @@ static void __arch_timer_setup(unsigned
+ 		}
+ 
+ 		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
++#ifdef CONFIG_IPIPE
++		clk->ipipe_timer = raw_cpu_ptr(&arch_itimer);
++		if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
++			clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
++			clk->ipipe_timer->ack = arch_itimer_ack_virt;
++		} else {
++			clk->ipipe_timer->irq = arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI];
++			clk->ipipe_timer->ack = arch_itimer_ack_phys;
++		}
++		clk->ipipe_timer->freq = arch_timer_rate;
++#endif
+ 	} else {
+ 		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
+ 		clk->name = "arch_mem_timer";
+@@ -862,6 +920,9 @@ static void arch_counter_set_user_access
+ 	else
+ 		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+ 
++#ifdef CONFIG_IPIPE
++	cntkctl |= ARCH_TIMER_USR_PCT_ACCESS_EN;
++#endif
+ 	arch_timer_set_cntkctl(cntkctl);
+ }
+ 
+@@ -997,6 +1058,10 @@ static void __init arch_counter_register
+ 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
+ 	}
+ 
++#ifdef CONFIG_IPIPE
++	tsc_info.freq = arch_timer_rate;
++	__ipipe_tsc_register(&tsc_info);
++#endif /* CONFIG_IPIPE */
+ 	if (!arch_counter_suspend_stop)
+ 		clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
+ 	start_count = arch_timer_read_counter();
+diff -uprN kernel/drivers/clocksource/arm_arch_timer.c.orig kernel_new/drivers/clocksource/arm_arch_timer.c.orig
+--- kernel/drivers/clocksource/arm_arch_timer.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/clocksource/arm_arch_timer.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,1631 @@
++/*
++ *  linux/drivers/clocksource/arm_arch_timer.c
++ *
++ *  Copyright (C) 2011 ARM Ltd.
++ *  All Rights Reserved
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#define pr_fmt(fmt)	"arm_arch_timer: " fmt
++
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/device.h>
++#include <linux/smp.h>
++#include <linux/cpu.h>
++#include <linux/cpu_pm.h>
++#include <linux/clockchips.h>
++#include <linux/clocksource.h>
++#include <linux/interrupt.h>
++#include <linux/of_irq.h>
++#include <linux/of_address.h>
++#include <linux/io.h>
++#include <linux/slab.h>
++#include <linux/sched/clock.h>
++#include <linux/sched_clock.h>
++#include <linux/acpi.h>
++
++#include <asm/arch_timer.h>
++#include <asm/virt.h>
++
++#include <clocksource/arm_arch_timer.h>
++
++#undef pr_fmt
++#define pr_fmt(fmt) "arch_timer: " fmt
++
++#define CNTTIDR		0x08
++#define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
++
++#define CNTACR(n)	(0x40 + ((n) * 4))
++#define CNTACR_RPCT	BIT(0)
++#define CNTACR_RVCT	BIT(1)
++#define CNTACR_RFRQ	BIT(2)
++#define CNTACR_RVOFF	BIT(3)
++#define CNTACR_RWVT	BIT(4)
++#define CNTACR_RWPT	BIT(5)
++
++#define CNTVCT_LO	0x08
++#define CNTVCT_HI	0x0c
++#define CNTFRQ		0x10
++#define CNTP_TVAL	0x28
++#define CNTP_CTL	0x2c
++#define CNTV_TVAL	0x38
++#define CNTV_CTL	0x3c
++
++static unsigned arch_timers_present __initdata;
++
++static void __iomem *arch_counter_base;
++
++struct arch_timer {
++	void __iomem *base;
++	struct clock_event_device evt;
++};
++
++#define to_arch_timer(e) container_of(e, struct arch_timer, evt)
++
++static u32 arch_timer_rate;
++static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI];
++
++static struct clock_event_device __percpu *arch_timer_evt;
++
++static enum arch_timer_ppi_nr arch_timer_uses_ppi = ARCH_TIMER_VIRT_PPI;
++static bool arch_timer_c3stop;
++static bool arch_timer_mem_use_virtual;
++static bool arch_counter_suspend_stop;
++static bool vdso_default = true;
++static bool vdso_fix;
++
++static cpumask_t evtstrm_available = CPU_MASK_NONE;
++static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
++
++static int __init early_evtstrm_cfg(char *buf)
++{
++	return strtobool(buf, &evtstrm_enable);
++}
++early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg);
++
++/*
++ * Architected system timer support.
++ */
++
++static __always_inline
++void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
++			  struct clock_event_device *clk)
++{
++	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
++		struct arch_timer *timer = to_arch_timer(clk);
++		switch (reg) {
++		case ARCH_TIMER_REG_CTRL:
++			writel_relaxed(val, timer->base + CNTP_CTL);
++			break;
++		case ARCH_TIMER_REG_TVAL:
++			writel_relaxed(val, timer->base + CNTP_TVAL);
++			break;
++		}
++	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
++		struct arch_timer *timer = to_arch_timer(clk);
++		switch (reg) {
++		case ARCH_TIMER_REG_CTRL:
++			writel_relaxed(val, timer->base + CNTV_CTL);
++			break;
++		case ARCH_TIMER_REG_TVAL:
++			writel_relaxed(val, timer->base + CNTV_TVAL);
++			break;
++		}
++	} else {
++		arch_timer_reg_write_cp15(access, reg, val);
++	}
++}
++
++static __always_inline
++u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
++			struct clock_event_device *clk)
++{
++	u32 val;
++
++	if (access == ARCH_TIMER_MEM_PHYS_ACCESS) {
++		struct arch_timer *timer = to_arch_timer(clk);
++		switch (reg) {
++		case ARCH_TIMER_REG_CTRL:
++			val = readl_relaxed(timer->base + CNTP_CTL);
++			break;
++		case ARCH_TIMER_REG_TVAL:
++			val = readl_relaxed(timer->base + CNTP_TVAL);
++			break;
++		}
++	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
++		struct arch_timer *timer = to_arch_timer(clk);
++		switch (reg) {
++		case ARCH_TIMER_REG_CTRL:
++			val = readl_relaxed(timer->base + CNTV_CTL);
++			break;
++		case ARCH_TIMER_REG_TVAL:
++			val = readl_relaxed(timer->base + CNTV_TVAL);
++			break;
++		}
++	} else {
++		val = arch_timer_reg_read_cp15(access, reg);
++	}
++
++	return val;
++}
++
++/*
++ * Default to cp15 based access because arm64 uses this function for
++ * sched_clock() before DT is probed and the cp15 method is guaranteed
++ * to exist on arm64. arm doesn't use this before DT is probed so even
++ * if we don't have the cp15 accessors we won't have a problem.
++ */
++u64 (*arch_timer_read_counter)(void) = arch_counter_get_cntvct;
++EXPORT_SYMBOL_GPL(arch_timer_read_counter);
++
++static u64 arch_counter_read(struct clocksource *cs)
++{
++	return arch_timer_read_counter();
++}
++
++static u64 arch_counter_read_cc(const struct cyclecounter *cc)
++{
++	return arch_timer_read_counter();
++}
++
++static struct clocksource clocksource_counter = {
++	.name	= "arch_sys_counter",
++	.rating	= 400,
++	.read	= arch_counter_read,
++	.mask	= CLOCKSOURCE_MASK(56),
++	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
++};
++
++static struct cyclecounter cyclecounter __ro_after_init = {
++	.read	= arch_counter_read_cc,
++	.mask	= CLOCKSOURCE_MASK(56),
++};
++
++struct ate_acpi_oem_info {
++	char oem_id[ACPI_OEM_ID_SIZE + 1];
++	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
++	u32 oem_revision;
++};
++
++#ifdef CONFIG_FSL_ERRATUM_A008585
++/*
++ * The number of retries is an arbitrary value well beyond the highest number
++ * of iterations the loop has been observed to take.
++ */
++#define __fsl_a008585_read_reg(reg) ({			\
++	u64 _old, _new;					\
++	int _retries = 200;				\
++							\
++	do {						\
++		_old = read_sysreg(reg);		\
++		_new = read_sysreg(reg);		\
++		_retries--;				\
++	} while (unlikely(_old != _new) && _retries);	\
++							\
++	WARN_ON_ONCE(!_retries);			\
++	_new;						\
++})
++
++static u32 notrace fsl_a008585_read_cntp_tval_el0(void)
++{
++	return __fsl_a008585_read_reg(cntp_tval_el0);
++}
++
++static u32 notrace fsl_a008585_read_cntv_tval_el0(void)
++{
++	return __fsl_a008585_read_reg(cntv_tval_el0);
++}
++
++static u64 notrace fsl_a008585_read_cntpct_el0(void)
++{
++	return __fsl_a008585_read_reg(cntpct_el0);
++}
++
++static u64 notrace fsl_a008585_read_cntvct_el0(void)
++{
++	return __fsl_a008585_read_reg(cntvct_el0);
++}
++#endif
++
++#ifdef CONFIG_HISILICON_ERRATUM_161010101
++/*
++ * Verify whether the value of the second read is larger than the first by
++ * less than 32 is the only way to confirm the value is correct, so clear the
++ * lower 5 bits to check whether the difference is greater than 32 or not.
++ * Theoretically the erratum should not occur more than twice in succession
++ * when reading the system counter, but it is possible that some interrupts
++ * may lead to more than twice read errors, triggering the warning, so setting
++ * the number of retries far beyond the number of iterations the loop has been
++ * observed to take.
++ */
++#define __hisi_161010101_read_reg(reg) ({				\
++	u64 _old, _new;						\
++	int _retries = 50;					\
++								\
++	do {							\
++		_old = read_sysreg(reg);			\
++		_new = read_sysreg(reg);			\
++		_retries--;					\
++	} while (unlikely((_new - _old) >> 5) && _retries);	\
++								\
++	WARN_ON_ONCE(!_retries);				\
++	_new;							\
++})
++
++static u32 notrace hisi_161010101_read_cntp_tval_el0(void)
++{
++	return __hisi_161010101_read_reg(cntp_tval_el0);
++}
++
++static u32 notrace hisi_161010101_read_cntv_tval_el0(void)
++{
++	return __hisi_161010101_read_reg(cntv_tval_el0);
++}
++
++static u64 notrace hisi_161010101_read_cntpct_el0(void)
++{
++	return __hisi_161010101_read_reg(cntpct_el0);
++}
++
++static u64 notrace hisi_161010101_read_cntvct_el0(void)
++{
++	return __hisi_161010101_read_reg(cntvct_el0);
++}
++
++static struct ate_acpi_oem_info hisi_161010101_oem_info[] = {
++	/*
++	 * Note that trailing spaces are required to properly match
++	 * the OEM table information.
++	 */
++	{
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP05   ",
++		.oem_revision	= 0,
++	},
++	{
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP06   ",
++		.oem_revision	= 0,
++	},
++	{
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP07   ",
++		.oem_revision	= 0,
++	},
++	{ /* Sentinel indicating the end of the OEM array */ },
++};
++#endif
++
++#ifdef CONFIG_ARM64_ERRATUM_858921
++static u64 notrace arm64_858921_read_cntpct_el0(void)
++{
++	u64 old, new;
++
++	old = read_sysreg(cntpct_el0);
++	new = read_sysreg(cntpct_el0);
++	return (((old ^ new) >> 32) & 1) ? old : new;
++}
++
++static u64 notrace arm64_858921_read_cntvct_el0(void)
++{
++	u64 old, new;
++
++	old = read_sysreg(cntvct_el0);
++	new = read_sysreg(cntvct_el0);
++	return (((old ^ new) >> 32) & 1) ? old : new;
++}
++#endif
++
++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
++/*
++ * The low bits of the counter registers are indeterminate while bit 10 or
++ * greater is rolling over. Since the counter value can jump both backward
++ * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values
++ * with all ones or all zeros in the low bits. Bound the loop by the maximum
++ * number of CPU cycles in 3 consecutive 24 MHz counter periods.
++ */
++#define __sun50i_a64_read_reg(reg) ({					\
++	u64 _val;							\
++	int _retries = 150;						\
++									\
++	do {								\
++		_val = read_sysreg(reg);				\
++		_retries--;						\
++	} while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries);	\
++									\
++	WARN_ON_ONCE(!_retries);					\
++	_val;								\
++})
++
++static u64 notrace sun50i_a64_read_cntpct_el0(void)
++{
++	return __sun50i_a64_read_reg(cntpct_el0);
++}
++
++static u64 notrace sun50i_a64_read_cntvct_el0(void)
++{
++	return __sun50i_a64_read_reg(cntvct_el0);
++}
++
++static u32 notrace sun50i_a64_read_cntp_tval_el0(void)
++{
++	return read_sysreg(cntp_cval_el0) - sun50i_a64_read_cntpct_el0();
++}
++
++static u32 notrace sun50i_a64_read_cntv_tval_el0(void)
++{
++	return read_sysreg(cntv_cval_el0) - sun50i_a64_read_cntvct_el0();
++}
++#endif
++
++#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND
++DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround);
++EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround);
++
++DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
++EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
++
++static void erratum_set_next_event_tval_generic(const int access, unsigned long evt,
++						struct clock_event_device *clk)
++{
++	unsigned long ctrl;
++	u64 cval;
++
++	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
++	ctrl |= ARCH_TIMER_CTRL_ENABLE;
++	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
++
++	if (access == ARCH_TIMER_PHYS_ACCESS) {
++		cval = evt + arch_counter_get_cntpct();
++		write_sysreg(cval, cntp_cval_el0);
++	} else {
++		cval = evt + arch_counter_get_cntvct();
++		write_sysreg(cval, cntv_cval_el0);
++	}
++
++	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
++}
++
++static __maybe_unused int erratum_set_next_event_tval_virt(unsigned long evt,
++					    struct clock_event_device *clk)
++{
++	erratum_set_next_event_tval_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk);
++	return 0;
++}
++
++static __maybe_unused int erratum_set_next_event_tval_phys(unsigned long evt,
++					    struct clock_event_device *clk)
++{
++	erratum_set_next_event_tval_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk);
++	return 0;
++}
++
++static const struct arch_timer_erratum_workaround ool_workarounds[] = {
++#ifdef CONFIG_FSL_ERRATUM_A008585
++	{
++		.match_type = ate_match_dt,
++		.id = "fsl,erratum-a008585",
++		.desc = "Freescale erratum a005858",
++		.read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0,
++		.read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0,
++		.read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
++		.read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
++		.set_next_event_phys = erratum_set_next_event_tval_phys,
++		.set_next_event_virt = erratum_set_next_event_tval_virt,
++	},
++#endif
++#ifdef CONFIG_HISILICON_ERRATUM_161010101
++	{
++		.match_type = ate_match_dt,
++		.id = "hisilicon,erratum-161010101",
++		.desc = "HiSilicon erratum 161010101",
++		.read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
++		.read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
++		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
++		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
++		.set_next_event_phys = erratum_set_next_event_tval_phys,
++		.set_next_event_virt = erratum_set_next_event_tval_virt,
++	},
++	{
++		.match_type = ate_match_acpi_oem_info,
++		.id = hisi_161010101_oem_info,
++		.desc = "HiSilicon erratum 161010101",
++		.read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
++		.read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
++		.read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
++		.read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
++		.set_next_event_phys = erratum_set_next_event_tval_phys,
++		.set_next_event_virt = erratum_set_next_event_tval_virt,
++	},
++#endif
++#ifdef CONFIG_ARM64_ERRATUM_858921
++	{
++		.match_type = ate_match_local_cap_id,
++		.id = (void *)ARM64_WORKAROUND_858921,
++		.desc = "ARM erratum 858921",
++		.read_cntpct_el0 = arm64_858921_read_cntpct_el0,
++		.read_cntvct_el0 = arm64_858921_read_cntvct_el0,
++	},
++#endif
++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
++	{
++		.match_type = ate_match_dt,
++		.id = "allwinner,erratum-unknown1",
++		.desc = "Allwinner erratum UNKNOWN1",
++		.read_cntp_tval_el0 = sun50i_a64_read_cntp_tval_el0,
++		.read_cntv_tval_el0 = sun50i_a64_read_cntv_tval_el0,
++		.read_cntpct_el0 = sun50i_a64_read_cntpct_el0,
++		.read_cntvct_el0 = sun50i_a64_read_cntvct_el0,
++		.set_next_event_phys = erratum_set_next_event_tval_phys,
++		.set_next_event_virt = erratum_set_next_event_tval_virt,
++	},
++#endif
++};
++
++typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *,
++			       const void *);
++
++static
++bool arch_timer_check_dt_erratum(const struct arch_timer_erratum_workaround *wa,
++				 const void *arg)
++{
++	const struct device_node *np = arg;
++
++	return of_property_read_bool(np, wa->id);
++}
++
++static
++bool arch_timer_check_local_cap_erratum(const struct arch_timer_erratum_workaround *wa,
++					const void *arg)
++{
++	return this_cpu_has_cap((uintptr_t)wa->id);
++}
++
++
++static
++bool arch_timer_check_acpi_oem_erratum(const struct arch_timer_erratum_workaround *wa,
++				       const void *arg)
++{
++	static const struct ate_acpi_oem_info empty_oem_info = {};
++	const struct ate_acpi_oem_info *info = wa->id;
++	const struct acpi_table_header *table = arg;
++
++	/* Iterate over the ACPI OEM info array, looking for a match */
++	while (memcmp(info, &empty_oem_info, sizeof(*info))) {
++		if (!memcmp(info->oem_id, table->oem_id, ACPI_OEM_ID_SIZE) &&
++		    !memcmp(info->oem_table_id, table->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
++		    info->oem_revision == table->oem_revision)
++			return true;
++
++		info++;
++	}
++
++	return false;
++}
++
++static const struct arch_timer_erratum_workaround *
++arch_timer_iterate_errata(enum arch_timer_erratum_match_type type,
++			  ate_match_fn_t match_fn,
++			  void *arg)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) {
++		if (ool_workarounds[i].match_type != type)
++			continue;
++
++		if (match_fn(&ool_workarounds[i], arg))
++			return &ool_workarounds[i];
++	}
++
++	return NULL;
++}
++
++static
++void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa,
++				  bool local)
++{
++	int i;
++
++	if (local) {
++		__this_cpu_write(timer_unstable_counter_workaround, wa);
++	} else {
++		for_each_possible_cpu(i)
++			per_cpu(timer_unstable_counter_workaround, i) = wa;
++	}
++
++	/*
++	 * Use the locked version, as we're called from the CPU
++	 * hotplug framework. Otherwise, we end-up in deadlock-land.
++	 */
++	static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled);
++
++	/*
++	 * Don't use the vdso fastpath if errata require using the
++	 * out-of-line counter accessor. We may change our mind pretty
++	 * late in the game (with a per-CPU erratum, for example), so
++	 * change both the default value and the vdso itself.
++	 */
++	if (wa->read_cntvct_el0) {
++		clocksource_counter.archdata.vdso_direct = true;
++		vdso_default = true;
++		vdso_fix = true;
++	}
++}
++
++static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type,
++					    void *arg)
++{
++	const struct arch_timer_erratum_workaround *wa;
++	ate_match_fn_t match_fn = NULL;
++	bool local = false;
++
++	switch (type) {
++	case ate_match_dt:
++		match_fn = arch_timer_check_dt_erratum;
++		break;
++	case ate_match_local_cap_id:
++		match_fn = arch_timer_check_local_cap_erratum;
++		local = true;
++		break;
++	case ate_match_acpi_oem_info:
++		match_fn = arch_timer_check_acpi_oem_erratum;
++		break;
++	default:
++		WARN_ON(1);
++		return;
++	}
++
++	wa = arch_timer_iterate_errata(type, match_fn, arg);
++	if (!wa)
++		return;
++
++	if (needs_unstable_timer_counter_workaround()) {
++		const struct arch_timer_erratum_workaround *__wa;
++		__wa = __this_cpu_read(timer_unstable_counter_workaround);
++		if (__wa && wa != __wa)
++			pr_warn("Can't enable workaround for %s (clashes with %s\n)",
++				wa->desc, __wa->desc);
++
++		if (__wa)
++			return;
++	}
++
++	arch_timer_enable_workaround(wa, local);
++	pr_info("Enabling %s workaround for %s\n",
++		local ? "local" : "global", wa->desc);
++}
++
++#define erratum_handler(fn, r, ...)					\
++({									\
++	bool __val;							\
++	if (needs_unstable_timer_counter_workaround()) {		\
++		const struct arch_timer_erratum_workaround *__wa;	\
++		__wa = __this_cpu_read(timer_unstable_counter_workaround); \
++		if (__wa && __wa->fn) {					\
++			r = __wa->fn(__VA_ARGS__);			\
++			__val = true;					\
++		} else {						\
++			__val = false;					\
++		}							\
++	} else {							\
++		__val = false;						\
++	}								\
++	__val;								\
++})
++
++static bool arch_timer_this_cpu_has_cntvct_wa(void)
++{
++	const struct arch_timer_erratum_workaround *wa;
++
++	wa = __this_cpu_read(timer_unstable_counter_workaround);
++	return wa && wa->read_cntvct_el0;
++}
++#else
++#define arch_timer_check_ool_workaround(t,a)		do { } while(0)
++#define erratum_set_next_event_tval_virt(...)		({BUG(); 0;})
++#define erratum_set_next_event_tval_phys(...)		({BUG(); 0;})
++#define erratum_handler(fn, r, ...)			({false;})
++#define arch_timer_this_cpu_has_cntvct_wa()		({false;})
++#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */
++
++static __always_inline irqreturn_t timer_handler(const int access,
++					struct clock_event_device *evt)
++{
++	unsigned long ctrl;
++
++	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, evt);
++	if (ctrl & ARCH_TIMER_CTRL_IT_STAT) {
++		ctrl |= ARCH_TIMER_CTRL_IT_MASK;
++		arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, evt);
++		evt->event_handler(evt);
++		return IRQ_HANDLED;
++	}
++
++	return IRQ_NONE;
++}
++
++static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id)
++{
++	struct clock_event_device *evt = dev_id;
++
++	return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt);
++}
++
++static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id)
++{
++	struct clock_event_device *evt = dev_id;
++
++	return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt);
++}
++
++static irqreturn_t arch_timer_handler_phys_mem(int irq, void *dev_id)
++{
++	struct clock_event_device *evt = dev_id;
++
++	return timer_handler(ARCH_TIMER_MEM_PHYS_ACCESS, evt);
++}
++
++static irqreturn_t arch_timer_handler_virt_mem(int irq, void *dev_id)
++{
++	struct clock_event_device *evt = dev_id;
++
++	return timer_handler(ARCH_TIMER_MEM_VIRT_ACCESS, evt);
++}
++
++static __always_inline int timer_shutdown(const int access,
++					  struct clock_event_device *clk)
++{
++	unsigned long ctrl;
++
++	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
++	ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
++	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
++
++	return 0;
++}
++
++static int arch_timer_shutdown_virt(struct clock_event_device *clk)
++{
++	return timer_shutdown(ARCH_TIMER_VIRT_ACCESS, clk);
++}
++
++static int arch_timer_shutdown_phys(struct clock_event_device *clk)
++{
++	return timer_shutdown(ARCH_TIMER_PHYS_ACCESS, clk);
++}
++
++static int arch_timer_shutdown_virt_mem(struct clock_event_device *clk)
++{
++	return timer_shutdown(ARCH_TIMER_MEM_VIRT_ACCESS, clk);
++}
++
++static int arch_timer_shutdown_phys_mem(struct clock_event_device *clk)
++{
++	return timer_shutdown(ARCH_TIMER_MEM_PHYS_ACCESS, clk);
++}
++
++static __always_inline void set_next_event(const int access, unsigned long evt,
++					   struct clock_event_device *clk)
++{
++	unsigned long ctrl;
++	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
++	ctrl |= ARCH_TIMER_CTRL_ENABLE;
++	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
++	arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt, clk);
++	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
++}
++
++static int arch_timer_set_next_event_virt(unsigned long evt,
++					  struct clock_event_device *clk)
++{
++	int ret;
++
++	if (erratum_handler(set_next_event_virt, ret, evt, clk))
++		return ret;
++
++	set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
++	return 0;
++}
++
++static int arch_timer_set_next_event_phys(unsigned long evt,
++					  struct clock_event_device *clk)
++{
++	int ret;
++
++	if (erratum_handler(set_next_event_phys, ret, evt, clk))
++		return ret;
++
++	set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
++	return 0;
++}
++
++static int arch_timer_set_next_event_virt_mem(unsigned long evt,
++					      struct clock_event_device *clk)
++{
++	set_next_event(ARCH_TIMER_MEM_VIRT_ACCESS, evt, clk);
++	return 0;
++}
++
++static int arch_timer_set_next_event_phys_mem(unsigned long evt,
++					      struct clock_event_device *clk)
++{
++	set_next_event(ARCH_TIMER_MEM_PHYS_ACCESS, evt, clk);
++	return 0;
++}
++
++static void __arch_timer_setup(unsigned type,
++			       struct clock_event_device *clk)
++{
++	clk->features = CLOCK_EVT_FEAT_ONESHOT;
++
++	if (type == ARCH_TIMER_TYPE_CP15) {
++		if (arch_timer_c3stop)
++			clk->features |= CLOCK_EVT_FEAT_C3STOP;
++		clk->name = "arch_sys_timer";
++		clk->rating = 450;
++		clk->cpumask = cpumask_of(smp_processor_id());
++		clk->irq = arch_timer_ppi[arch_timer_uses_ppi];
++		switch (arch_timer_uses_ppi) {
++		case ARCH_TIMER_VIRT_PPI:
++			clk->set_state_shutdown = arch_timer_shutdown_virt;
++			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
++			clk->set_next_event = arch_timer_set_next_event_virt;
++			break;
++		case ARCH_TIMER_PHYS_SECURE_PPI:
++		case ARCH_TIMER_PHYS_NONSECURE_PPI:
++		case ARCH_TIMER_HYP_PPI:
++			clk->set_state_shutdown = arch_timer_shutdown_phys;
++			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
++			clk->set_next_event = arch_timer_set_next_event_phys;
++			break;
++		default:
++			BUG();
++		}
++
++		arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL);
++	} else {
++		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
++		clk->name = "arch_mem_timer";
++		clk->rating = 400;
++		clk->cpumask = cpu_possible_mask;
++		if (arch_timer_mem_use_virtual) {
++			clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
++			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
++			clk->set_next_event =
++				arch_timer_set_next_event_virt_mem;
++		} else {
++			clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
++			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
++			clk->set_next_event =
++				arch_timer_set_next_event_phys_mem;
++		}
++	}
++
++	clk->set_state_shutdown(clk);
++
++	clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff);
++}
++
++static void arch_timer_evtstrm_enable(int divider)
++{
++	u32 cntkctl = arch_timer_get_cntkctl();
++
++	cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK;
++	/* Set the divider and enable virtual event stream */
++	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
++			| ARCH_TIMER_VIRT_EVT_EN;
++	arch_timer_set_cntkctl(cntkctl);
++	elf_hwcap |= HWCAP_EVTSTRM;
++#ifdef CONFIG_AARCH32_EL0
++	a32_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
++#endif
++	cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
++}
++
++static void arch_timer_configure_evtstream(void)
++{
++	int evt_stream_div, pos;
++
++	/* Find the closest power of two to the divisor */
++	evt_stream_div = arch_timer_rate / ARCH_TIMER_EVT_STREAM_FREQ;
++	pos = fls(evt_stream_div);
++	if (pos > 1 && !(evt_stream_div & (1 << (pos - 2))))
++		pos--;
++	/* enable event stream */
++	arch_timer_evtstrm_enable(min(pos, 15));
++}
++
++static void arch_counter_set_user_access(void)
++{
++	u32 cntkctl = arch_timer_get_cntkctl();
++
++	/* Disable user access to the timers and both counters */
++	/* Also disable virtual event stream */
++	cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN
++			| ARCH_TIMER_USR_VT_ACCESS_EN
++		        | ARCH_TIMER_USR_VCT_ACCESS_EN
++			| ARCH_TIMER_VIRT_EVT_EN
++			| ARCH_TIMER_USR_PCT_ACCESS_EN);
++
++	/*
++	 * Enable user access to the virtual counter if it doesn't
++	 * need to be workaround. The vdso may have been already
++	 * disabled though.
++	 */
++	if (arch_timer_this_cpu_has_cntvct_wa() && !vdso_fix)
++		pr_info("CPU%d: Trapping CNTVCT access\n", smp_processor_id());
++	else
++		cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
++
++	arch_timer_set_cntkctl(cntkctl);
++}
++
++static bool arch_timer_has_nonsecure_ppi(void)
++{
++	return (arch_timer_uses_ppi == ARCH_TIMER_PHYS_SECURE_PPI &&
++		arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
++}
++
++static u32 check_ppi_trigger(int irq)
++{
++	u32 flags = irq_get_trigger_type(irq);
++
++	if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
++		pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
++		pr_warn("WARNING: Please fix your firmware\n");
++		flags = IRQF_TRIGGER_LOW;
++	}
++
++	return flags;
++}
++
++static int arch_timer_starting_cpu(unsigned int cpu)
++{
++	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
++	u32 flags;
++
++	__arch_timer_setup(ARCH_TIMER_TYPE_CP15, clk);
++
++	flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
++	enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
++
++	if (arch_timer_has_nonsecure_ppi()) {
++		flags = check_ppi_trigger(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
++		enable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
++				  flags);
++	}
++
++	arch_counter_set_user_access();
++	if (evtstrm_enable)
++		arch_timer_configure_evtstream();
++
++	return 0;
++}
++
++/*
++ * For historical reasons, when probing with DT we use whichever (non-zero)
++ * rate was probed first, and don't verify that others match. If the first node
++ * probed has a clock-frequency property, this overrides the HW register.
++ */
++static void arch_timer_of_configure_rate(u32 rate, struct device_node *np)
++{
++	/* Who has more than one independent system counter? */
++	if (arch_timer_rate)
++		return;
++
++	if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate))
++		arch_timer_rate = rate;
++
++	/* Check the timer frequency. */
++	if (arch_timer_rate == 0)
++		pr_warn("frequency not available\n");
++}
++
++static void arch_timer_banner(unsigned type)
++{
++	pr_info("%s%s%s timer(s) running at %lu.%02luMHz (%s%s%s).\n",
++		type & ARCH_TIMER_TYPE_CP15 ? "cp15" : "",
++		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ?
++			" and " : "",
++		type & ARCH_TIMER_TYPE_MEM ? "mmio" : "",
++		(unsigned long)arch_timer_rate / 1000000,
++		(unsigned long)(arch_timer_rate / 10000) % 100,
++		type & ARCH_TIMER_TYPE_CP15 ?
++			(arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) ? "virt" : "phys" :
++			"",
++		type == (ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM) ? "/" : "",
++		type & ARCH_TIMER_TYPE_MEM ?
++			arch_timer_mem_use_virtual ? "virt" : "phys" :
++			"");
++}
++
++u32 arch_timer_get_rate(void)
++{
++	return arch_timer_rate;
++}
++
++bool arch_timer_evtstrm_available(void)
++{
++	/*
++	 * We might get called from a preemptible context. This is fine
++	 * because availability of the event stream should be always the same
++	 * for a preemptible context and context where we might resume a task.
++	 */
++	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
++}
++
++static u64 arch_counter_get_cntvct_mem(void)
++{
++	u32 vct_lo, vct_hi, tmp_hi;
++
++	do {
++		vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
++		vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO);
++		tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
++	} while (vct_hi != tmp_hi);
++
++	return ((u64) vct_hi << 32) | vct_lo;
++}
++
++static struct arch_timer_kvm_info arch_timer_kvm_info;
++
++struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
++{
++	return &arch_timer_kvm_info;
++}
++
++static void __init arch_counter_register(unsigned type)
++{
++	u64 start_count;
++
++	/* Register the CP15 based counter if we have one */
++	if (type & ARCH_TIMER_TYPE_CP15) {
++		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
++		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
++			arch_timer_read_counter = arch_counter_get_cntvct;
++		else
++			arch_timer_read_counter = arch_counter_get_cntpct;
++
++		clocksource_counter.archdata.vdso_direct = vdso_default;
++		clocksource_counter.archdata.vdso_fix = vdso_fix;
++	} else {
++		arch_timer_read_counter = arch_counter_get_cntvct_mem;
++	}
++
++	if (!arch_counter_suspend_stop)
++		clocksource_counter.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
++	start_count = arch_timer_read_counter();
++	clocksource_register_hz(&clocksource_counter, arch_timer_rate);
++	cyclecounter.mult = clocksource_counter.mult;
++	cyclecounter.shift = clocksource_counter.shift;
++	timecounter_init(&arch_timer_kvm_info.timecounter,
++			 &cyclecounter, start_count);
++
++	/* 56 bits minimum, so we assume worst case rollover */
++	sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
++}
++
++static void arch_timer_stop(struct clock_event_device *clk)
++{
++	pr_debug("disable IRQ%d cpu #%d\n", clk->irq, smp_processor_id());
++
++	disable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi]);
++	if (arch_timer_has_nonsecure_ppi())
++		disable_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI]);
++
++	clk->set_state_shutdown(clk);
++}
++
++static int arch_timer_dying_cpu(unsigned int cpu)
++{
++	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
++
++	cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
++
++	arch_timer_stop(clk);
++	return 0;
++}
++
++#ifdef CONFIG_CPU_PM
++static DEFINE_PER_CPU(unsigned long, saved_cntkctl);
++static int arch_timer_cpu_pm_notify(struct notifier_block *self,
++				    unsigned long action, void *hcpu)
++{
++	if (action == CPU_PM_ENTER) {
++		__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
++
++		cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
++	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
++		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
++
++		if (elf_hwcap & HWCAP_EVTSTRM)
++			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
++	}
++	return NOTIFY_OK;
++}
++
++static struct notifier_block arch_timer_cpu_pm_notifier = {
++	.notifier_call = arch_timer_cpu_pm_notify,
++};
++
++static int __init arch_timer_cpu_pm_init(void)
++{
++	return cpu_pm_register_notifier(&arch_timer_cpu_pm_notifier);
++}
++
++static void __init arch_timer_cpu_pm_deinit(void)
++{
++	WARN_ON(cpu_pm_unregister_notifier(&arch_timer_cpu_pm_notifier));
++}
++
++#else
++static int __init arch_timer_cpu_pm_init(void)
++{
++	return 0;
++}
++
++static void __init arch_timer_cpu_pm_deinit(void)
++{
++}
++#endif
++
++static int __init arch_timer_register(void)
++{
++	int err;
++	int ppi;
++
++	arch_timer_evt = alloc_percpu(struct clock_event_device);
++	if (!arch_timer_evt) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	ppi = arch_timer_ppi[arch_timer_uses_ppi];
++	switch (arch_timer_uses_ppi) {
++	case ARCH_TIMER_VIRT_PPI:
++		err = request_percpu_irq(ppi, arch_timer_handler_virt,
++					 "arch_timer", arch_timer_evt);
++		break;
++	case ARCH_TIMER_PHYS_SECURE_PPI:
++	case ARCH_TIMER_PHYS_NONSECURE_PPI:
++		err = request_percpu_irq(ppi, arch_timer_handler_phys,
++					 "arch_timer", arch_timer_evt);
++		if (!err && arch_timer_has_nonsecure_ppi()) {
++			ppi = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
++			err = request_percpu_irq(ppi, arch_timer_handler_phys,
++						 "arch_timer", arch_timer_evt);
++			if (err)
++				free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_SECURE_PPI],
++						arch_timer_evt);
++		}
++		break;
++	case ARCH_TIMER_HYP_PPI:
++		err = request_percpu_irq(ppi, arch_timer_handler_phys,
++					 "arch_timer", arch_timer_evt);
++		break;
++	default:
++		BUG();
++	}
++
++	if (err) {
++		pr_err("can't register interrupt %d (%d)\n", ppi, err);
++		goto out_free;
++	}
++
++	err = arch_timer_cpu_pm_init();
++	if (err)
++		goto out_unreg_notify;
++
++	/* Register and immediately configure the timer on the boot CPU */
++	err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
++				"clockevents/arm/arch_timer:starting",
++				arch_timer_starting_cpu, arch_timer_dying_cpu);
++	if (err)
++		goto out_unreg_cpupm;
++	return 0;
++
++out_unreg_cpupm:
++	arch_timer_cpu_pm_deinit();
++
++out_unreg_notify:
++	free_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], arch_timer_evt);
++	if (arch_timer_has_nonsecure_ppi())
++		free_percpu_irq(arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI],
++				arch_timer_evt);
++
++out_free:
++	free_percpu(arch_timer_evt);
++out:
++	return err;
++}
++
++static int __init arch_timer_mem_register(void __iomem *base, unsigned int irq)
++{
++	int ret;
++	irq_handler_t func;
++	struct arch_timer *t;
++
++	t = kzalloc(sizeof(*t), GFP_KERNEL);
++	if (!t)
++		return -ENOMEM;
++
++	t->base = base;
++	t->evt.irq = irq;
++	__arch_timer_setup(ARCH_TIMER_TYPE_MEM, &t->evt);
++
++	if (arch_timer_mem_use_virtual)
++		func = arch_timer_handler_virt_mem;
++	else
++		func = arch_timer_handler_phys_mem;
++
++	ret = request_irq(irq, func, IRQF_TIMER, "arch_mem_timer", &t->evt);
++	if (ret) {
++		pr_err("Failed to request mem timer irq\n");
++		kfree(t);
++	}
++
++	return ret;
++}
++
++static const struct of_device_id arch_timer_of_match[] __initconst = {
++	{ .compatible   = "arm,armv7-timer",    },
++	{ .compatible   = "arm,armv8-timer",    },
++	{},
++};
++
++static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
++	{ .compatible   = "arm,armv7-timer-mem", },
++	{},
++};
++
++static bool __init arch_timer_needs_of_probing(void)
++{
++	struct device_node *dn;
++	bool needs_probing = false;
++	unsigned int mask = ARCH_TIMER_TYPE_CP15 | ARCH_TIMER_TYPE_MEM;
++
++	/* We have two timers, and both device-tree nodes are probed. */
++	if ((arch_timers_present & mask) == mask)
++		return false;
++
++	/*
++	 * Only one type of timer is probed,
++	 * check if we have another type of timer node in device-tree.
++	 */
++	if (arch_timers_present & ARCH_TIMER_TYPE_CP15)
++		dn = of_find_matching_node(NULL, arch_timer_mem_of_match);
++	else
++		dn = of_find_matching_node(NULL, arch_timer_of_match);
++
++	if (dn && of_device_is_available(dn))
++		needs_probing = true;
++
++	of_node_put(dn);
++
++	return needs_probing;
++}
++
++static int __init arch_timer_common_init(void)
++{
++	arch_timer_banner(arch_timers_present);
++	arch_counter_register(arch_timers_present);
++	return arch_timer_arch_init();
++}
++
++/**
++ * arch_timer_select_ppi() - Select suitable PPI for the current system.
++ *
++ * If HYP mode is available, we know that the physical timer
++ * has been configured to be accessible from PL1. Use it, so
++ * that a guest can use the virtual timer instead.
++ *
++ * On ARMv8.1 with VH extensions, the kernel runs in HYP. VHE
++ * accesses to CNTP_*_EL1 registers are silently redirected to
++ * their CNTHP_*_EL2 counterparts, and use a different PPI
++ * number.
++ *
++ * If no interrupt provided for virtual timer, we'll have to
++ * stick to the physical timer. It'd better be accessible...
++ * For arm64 we never use the secure interrupt.
++ *
++ * Return: a suitable PPI type for the current system.
++ */
++static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
++{
++	if (is_kernel_in_hyp_mode())
++		return ARCH_TIMER_HYP_PPI;
++
++	if (!is_hyp_mode_available() && arch_timer_ppi[ARCH_TIMER_VIRT_PPI])
++		return ARCH_TIMER_VIRT_PPI;
++
++	if (IS_ENABLED(CONFIG_ARM64))
++		return ARCH_TIMER_PHYS_NONSECURE_PPI;
++
++	return ARCH_TIMER_PHYS_SECURE_PPI;
++}
++
++static int __init arch_timer_of_init(struct device_node *np)
++{
++	int i, ret;
++	u32 rate;
++
++	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
++		pr_warn("multiple nodes in dt, skipping\n");
++		return 0;
++	}
++
++	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
++	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)
++		arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
++
++	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
++
++	rate = arch_timer_get_cntfrq();
++	arch_timer_of_configure_rate(rate, np);
++
++	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
++
++	/* Check for globally applicable workarounds */
++	arch_timer_check_ool_workaround(ate_match_dt, np);
++
++	/*
++	 * If we cannot rely on firmware initializing the timer registers then
++	 * we should use the physical timers instead.
++	 */
++	if (IS_ENABLED(CONFIG_ARM) &&
++	    of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
++		arch_timer_uses_ppi = ARCH_TIMER_PHYS_SECURE_PPI;
++	else
++		arch_timer_uses_ppi = arch_timer_select_ppi();
++
++	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
++		pr_err("No interrupt available, giving up\n");
++		return -EINVAL;
++	}
++
++	/* On some systems, the counter stops ticking when in suspend. */
++	arch_counter_suspend_stop = of_property_read_bool(np,
++							 "arm,no-tick-in-suspend");
++
++	ret = arch_timer_register();
++	if (ret)
++		return ret;
++
++	if (arch_timer_needs_of_probing())
++		return 0;
++
++	return arch_timer_common_init();
++}
++TIMER_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
++TIMER_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
++
++static u32 __init
++arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
++{
++	void __iomem *base;
++	u32 rate;
++
++	base = ioremap(frame->cntbase, frame->size);
++	if (!base) {
++		pr_err("Unable to map frame @ %pa\n", &frame->cntbase);
++		return 0;
++	}
++
++	rate = readl_relaxed(base + CNTFRQ);
++
++	iounmap(base);
++
++	return rate;
++}
++
++static struct arch_timer_mem_frame * __init
++arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
++{
++	struct arch_timer_mem_frame *frame, *best_frame = NULL;
++	void __iomem *cntctlbase;
++	u32 cnttidr;
++	int i;
++
++	cntctlbase = ioremap(timer_mem->cntctlbase, timer_mem->size);
++	if (!cntctlbase) {
++		pr_err("Can't map CNTCTLBase @ %pa\n",
++			&timer_mem->cntctlbase);
++		return NULL;
++	}
++
++	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
++
++	/*
++	 * Try to find a virtual capable frame. Otherwise fall back to a
++	 * physical capable frame.
++	 */
++	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
++		u32 cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
++			     CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
++
++		frame = &timer_mem->frame[i];
++		if (!frame->valid)
++			continue;
++
++		/* Try enabling everything, and see what sticks */
++		writel_relaxed(cntacr, cntctlbase + CNTACR(i));
++		cntacr = readl_relaxed(cntctlbase + CNTACR(i));
++
++		if ((cnttidr & CNTTIDR_VIRT(i)) &&
++		    !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
++			best_frame = frame;
++			arch_timer_mem_use_virtual = true;
++			break;
++		}
++
++		if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
++			continue;
++
++		best_frame = frame;
++	}
++
++	iounmap(cntctlbase);
++
++	return best_frame;
++}
++
++static int __init
++arch_timer_mem_frame_register(struct arch_timer_mem_frame *frame)
++{
++	void __iomem *base;
++	int ret, irq = 0;
++
++	if (arch_timer_mem_use_virtual)
++		irq = frame->virt_irq;
++	else
++		irq = frame->phys_irq;
++
++	if (!irq) {
++		pr_err("Frame missing %s irq.\n",
++		       arch_timer_mem_use_virtual ? "virt" : "phys");
++		return -EINVAL;
++	}
++
++	if (!request_mem_region(frame->cntbase, frame->size,
++				"arch_mem_timer"))
++		return -EBUSY;
++
++	base = ioremap(frame->cntbase, frame->size);
++	if (!base) {
++		pr_err("Can't map frame's registers\n");
++		return -ENXIO;
++	}
++
++	ret = arch_timer_mem_register(base, irq);
++	if (ret) {
++		iounmap(base);
++		return ret;
++	}
++
++	arch_counter_base = base;
++	arch_timers_present |= ARCH_TIMER_TYPE_MEM;
++
++	return 0;
++}
++
++static int __init arch_timer_mem_of_init(struct device_node *np)
++{
++	struct arch_timer_mem *timer_mem;
++	struct arch_timer_mem_frame *frame;
++	struct device_node *frame_node;
++	struct resource res;
++	int ret = -EINVAL;
++	u32 rate;
++
++	timer_mem = kzalloc(sizeof(*timer_mem), GFP_KERNEL);
++	if (!timer_mem)
++		return -ENOMEM;
++
++	if (of_address_to_resource(np, 0, &res))
++		goto out;
++	timer_mem->cntctlbase = res.start;
++	timer_mem->size = resource_size(&res);
++
++	for_each_available_child_of_node(np, frame_node) {
++		u32 n;
++		struct arch_timer_mem_frame *frame;
++
++		if (of_property_read_u32(frame_node, "frame-number", &n)) {
++			pr_err(FW_BUG "Missing frame-number.\n");
++			of_node_put(frame_node);
++			goto out;
++		}
++		if (n >= ARCH_TIMER_MEM_MAX_FRAMES) {
++			pr_err(FW_BUG "Wrong frame-number, only 0-%u are permitted.\n",
++			       ARCH_TIMER_MEM_MAX_FRAMES - 1);
++			of_node_put(frame_node);
++			goto out;
++		}
++		frame = &timer_mem->frame[n];
++
++		if (frame->valid) {
++			pr_err(FW_BUG "Duplicated frame-number.\n");
++			of_node_put(frame_node);
++			goto out;
++		}
++
++		if (of_address_to_resource(frame_node, 0, &res)) {
++			of_node_put(frame_node);
++			goto out;
++		}
++		frame->cntbase = res.start;
++		frame->size = resource_size(&res);
++
++		frame->virt_irq = irq_of_parse_and_map(frame_node,
++						       ARCH_TIMER_VIRT_SPI);
++		frame->phys_irq = irq_of_parse_and_map(frame_node,
++						       ARCH_TIMER_PHYS_SPI);
++
++		frame->valid = true;
++	}
++
++	frame = arch_timer_mem_find_best_frame(timer_mem);
++	if (!frame) {
++		pr_err("Unable to find a suitable frame in timer @ %pa\n",
++			&timer_mem->cntctlbase);
++		ret = -EINVAL;
++		goto out;
++	}
++
++	rate = arch_timer_mem_frame_get_cntfrq(frame);
++	arch_timer_of_configure_rate(rate, np);
++
++	ret = arch_timer_mem_frame_register(frame);
++	if (!ret && !arch_timer_needs_of_probing())
++		ret = arch_timer_common_init();
++out:
++	kfree(timer_mem);
++	return ret;
++}
++TIMER_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
++		       arch_timer_mem_of_init);
++
++#ifdef CONFIG_ACPI_GTDT
++static int __init
++arch_timer_mem_verify_cntfrq(struct arch_timer_mem *timer_mem)
++{
++	struct arch_timer_mem_frame *frame;
++	u32 rate;
++	int i;
++
++	for (i = 0; i < ARCH_TIMER_MEM_MAX_FRAMES; i++) {
++		frame = &timer_mem->frame[i];
++
++		if (!frame->valid)
++			continue;
++
++		rate = arch_timer_mem_frame_get_cntfrq(frame);
++		if (rate == arch_timer_rate)
++			continue;
++
++		pr_err(FW_BUG "CNTFRQ mismatch: frame @ %pa: (0x%08lx), CPU: (0x%08lx)\n",
++			&frame->cntbase,
++			(unsigned long)rate, (unsigned long)arch_timer_rate);
++
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int __init arch_timer_mem_acpi_init(int platform_timer_count)
++{
++	struct arch_timer_mem *timers, *timer;
++	struct arch_timer_mem_frame *frame, *best_frame = NULL;
++	int timer_count, i, ret = 0;
++
++	timers = kcalloc(platform_timer_count, sizeof(*timers),
++			    GFP_KERNEL);
++	if (!timers)
++		return -ENOMEM;
++
++	ret = acpi_arch_timer_mem_init(timers, &timer_count);
++	if (ret || !timer_count)
++		goto out;
++
++	/*
++	 * While unlikely, it's theoretically possible that none of the frames
++	 * in a timer expose the combination of feature we want.
++	 */
++	for (i = 0; i < timer_count; i++) {
++		timer = &timers[i];
++
++		frame = arch_timer_mem_find_best_frame(timer);
++		if (!best_frame)
++			best_frame = frame;
++
++		ret = arch_timer_mem_verify_cntfrq(timer);
++		if (ret) {
++			pr_err("Disabling MMIO timers due to CNTFRQ mismatch\n");
++			goto out;
++		}
++
++		if (!best_frame) /* implies !frame */
++			/*
++			 * Only complain about missing suitable frames if we
++			 * haven't already found one in a previous iteration.
++			 */
++			pr_err("Unable to find a suitable frame in timer @ %pa\n",
++				&timer->cntctlbase);
++	}
++
++	if (best_frame)
++		ret = arch_timer_mem_frame_register(best_frame);
++out:
++	kfree(timers);
++	return ret;
++}
++
++/* Initialize per-processor generic timer and memory-mapped timer(if present) */
++static int __init arch_timer_acpi_init(struct acpi_table_header *table)
++{
++	int ret, platform_timer_count;
++
++	if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {
++		pr_warn("already initialized, skipping\n");
++		return -EINVAL;
++	}
++
++	arch_timers_present |= ARCH_TIMER_TYPE_CP15;
++
++	ret = acpi_gtdt_init(table, &platform_timer_count);
++	if (ret) {
++		pr_err("Failed to init GTDT table.\n");
++		return ret;
++	}
++
++	arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI] =
++		acpi_gtdt_map_ppi(ARCH_TIMER_PHYS_NONSECURE_PPI);
++
++	arch_timer_ppi[ARCH_TIMER_VIRT_PPI] =
++		acpi_gtdt_map_ppi(ARCH_TIMER_VIRT_PPI);
++
++	arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
++		acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
++
++	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
++
++	/*
++	 * When probing via ACPI, we have no mechanism to override the sysreg
++	 * CNTFRQ value. This *must* be correct.
++	 */
++	arch_timer_rate = arch_timer_get_cntfrq();
++	if (!arch_timer_rate) {
++		pr_err(FW_BUG "frequency not available.\n");
++		return -EINVAL;
++	}
++
++	arch_timer_uses_ppi = arch_timer_select_ppi();
++	if (!arch_timer_ppi[arch_timer_uses_ppi]) {
++		pr_err("No interrupt available, giving up\n");
++		return -EINVAL;
++	}
++
++	/* Always-on capability */
++	arch_timer_c3stop = acpi_gtdt_c3stop(arch_timer_uses_ppi);
++
++	/* Check for globally applicable workarounds */
++	arch_timer_check_ool_workaround(ate_match_acpi_oem_info, table);
++
++	ret = arch_timer_register();
++	if (ret)
++		return ret;
++
++	if (platform_timer_count &&
++	    arch_timer_mem_acpi_init(platform_timer_count))
++		pr_err("Failed to initialize memory-mapped timer.\n");
++
++	return arch_timer_common_init();
++}
++TIMER_ACPI_DECLARE(arch_timer, ACPI_SIG_GTDT, arch_timer_acpi_init);
++#endif
+diff -uprN kernel/drivers/clocksource/arm_global_timer.c kernel_new/drivers/clocksource/arm_global_timer.c
+--- kernel/drivers/clocksource/arm_global_timer.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/arm_global_timer.c	2021-04-01 18:28:07.661863280 +0800
+@@ -23,6 +23,7 @@
+ #include <linux/of_irq.h>
+ #include <linux/of_address.h>
+ #include <linux/sched_clock.h>
++#include <linux/ipipe_tickdev.h>
+ 
+ #include <asm/cputype.h>
+ 
+@@ -49,10 +50,69 @@
+  * the units for all operations.
+  */
+ static void __iomem *gt_base;
++static unsigned long gt_pbase;
++static struct clk *gt_clk;
+ static unsigned long gt_clk_rate;
+ static int gt_ppi;
+ static struct clock_event_device __percpu *gt_evt;
+ 
++#ifdef CONFIG_IPIPE
++
++static struct clocksource gt_clocksource;
++
++static int gt_clockevent_ack(struct clock_event_device *evt);
++
++static DEFINE_PER_CPU(struct ipipe_timer, gt_itimer);
++
++static unsigned int refresh_gt_freq(void)
++{
++	gt_clk_rate = clk_get_rate(gt_clk);
++
++	__clocksource_update_freq_hz(&gt_clocksource, gt_clk_rate);
++
++	return gt_clk_rate;
++}
++
++static inline void gt_ipipe_cs_setup(void)
++{
++	struct __ipipe_tscinfo tsc_info = {
++		.type = IPIPE_TSC_TYPE_FREERUNNING,
++		.freq = gt_clk_rate,
++		.counter_vaddr = (unsigned long)gt_base,
++		.u = {
++			{
++				.counter_paddr = gt_pbase,
++				.mask = 0xffffffff,
++			}
++		},
++		.refresh_freq = refresh_gt_freq,
++	};
++
++	__ipipe_tsc_register(&tsc_info);
++}
++
++static void gt_itimer_ack(void)
++{
++	struct clock_event_device *evt = this_cpu_ptr(gt_evt);
++	gt_clockevent_ack(evt);
++}
++
++static inline void gt_ipipe_evt_setup(struct clock_event_device *evt)
++{
++	evt->ipipe_timer = this_cpu_ptr(&gt_itimer);
++	evt->ipipe_timer->irq = evt->irq;
++	evt->ipipe_timer->ack = gt_itimer_ack;
++	evt->ipipe_timer->freq = gt_clk_rate;
++}
++
++#else
++
++static inline void gt_ipipe_cs_setup(void) { }
++
++static inline void gt_ipipe_evt_setup(struct clock_event_device *evt) { }
++
++#endif /* CONFIG_IPIPE */
++
+ /*
+  * To get the value from the Global Timer Counter register proceed as follows:
+  * 1. Read the upper 32-bit timer counter register
+@@ -137,13 +197,11 @@ static int gt_clockevent_set_next_event(
+ 	return 0;
+ }
+ 
+-static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id)
++static int gt_clockevent_ack(struct clock_event_device *evt)
+ {
+-	struct clock_event_device *evt = dev_id;
+-
+ 	if (!(readl_relaxed(gt_base + GT_INT_STATUS) &
+ 				GT_INT_STATUS_EVENT_FLAG))
+-		return IRQ_NONE;
++		return IS_ENABLED(CONFIG_IPIPE);
+ 
+ 	/**
+ 	 * ERRATA 740657( Global Timer can send 2 interrupts for
+@@ -156,10 +214,23 @@ static irqreturn_t gt_clockevent_interru
+ 	 *	the Global Timer flag _after_ having incremented
+ 	 *	the Comparator register	value to a higher value.
+ 	 */
+-	if (clockevent_state_oneshot(evt))
++	if (clockevent_ipipe_stolen(evt) || clockevent_state_oneshot(evt))
+ 		gt_compare_set(ULONG_MAX, 0);
+ 
+ 	writel_relaxed(GT_INT_STATUS_EVENT_FLAG, gt_base + GT_INT_STATUS);
++
++	return 1;
++}
++
++static irqreturn_t gt_clockevent_interrupt(int irq, void *dev_id)
++{
++	struct clock_event_device *evt = dev_id;
++
++	if (!clockevent_ipipe_stolen(evt)) {
++		if (!gt_clockevent_ack(evt))
++			return IRQ_NONE;
++	}
++
+ 	evt->event_handler(evt);
+ 
+ 	return IRQ_HANDLED;
+@@ -180,6 +251,7 @@ static int gt_starting_cpu(unsigned int
+ 	clk->cpumask = cpumask_of(cpu);
+ 	clk->rating = 300;
+ 	clk->irq = gt_ppi;
++	gt_ipipe_evt_setup(clk);
+ 	clockevents_config_and_register(clk, gt_clk_rate,
+ 					1, 0xffffffff);
+ 	enable_percpu_irq(clk->irq, IRQ_TYPE_NONE);
+@@ -252,13 +324,14 @@ static int __init gt_clocksource_init(vo
+ #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
+ 	sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate);
+ #endif
++	gt_ipipe_cs_setup();
+ 	return clocksource_register_hz(&gt_clocksource, gt_clk_rate);
+ }
+ 
+ static int __init global_timer_of_register(struct device_node *np)
+ {
+-	struct clk *gt_clk;
+ 	int err = 0;
++	struct resource res;
+ 
+ 	/*
+ 	 * In A9 r2p0 the comparators for each processor with the global timer
+@@ -283,6 +356,11 @@ static int __init global_timer_of_regist
+ 		return -ENXIO;
+ 	}
+ 
++	if (of_address_to_resource(np, 0, &res))
++		res.start = 0;
++
++	gt_pbase = res.start;
++
+ 	gt_clk = of_clk_get(np, 0);
+ 	if (!IS_ERR(gt_clk)) {
+ 		err = clk_prepare_enable(gt_clk);
+diff -uprN kernel/drivers/clocksource/bcm2835_timer.c kernel_new/drivers/clocksource/bcm2835_timer.c
+--- kernel/drivers/clocksource/bcm2835_timer.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/bcm2835_timer.c	2021-04-01 18:28:07.661863280 +0800
+@@ -29,6 +29,9 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/sched_clock.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
++#include <linux/time.h>
+ 
+ #include <asm/irq.h>
+ 
+@@ -39,6 +42,7 @@
+ #define MAX_TIMER	3
+ #define DEFAULT_TIMER	3
+ 
++
+ struct bcm2835_timer {
+ 	void __iomem *control;
+ 	void __iomem *compare;
+@@ -46,9 +50,53 @@ struct bcm2835_timer {
+ 	struct clock_event_device evt;
+ 	struct irqaction act;
+ };
+-
+ static void __iomem *system_clock __read_mostly;
+ 
++#ifdef CONFIG_IPIPE
++
++static void __iomem *t_base;
++static unsigned long t_pbase;
++
++static inline void bcm2835_ipipe_cs_setup(unsigned int freq)
++{
++	struct __ipipe_tscinfo tsc_info = {
++		.type = IPIPE_TSC_TYPE_FREERUNNING,
++		.freq = freq,
++		.counter_vaddr = (unsigned long)t_base + 0x04,
++		.u = {
++			{
++				.counter_paddr = t_pbase + 0x04,
++				.mask = 0xffffffff,
++			}
++		},
++	};
++
++	__ipipe_tsc_register(&tsc_info);
++}
++
++static struct ipipe_timer bcm2835_itimer;
++
++static void bcm2835_itimer_ack(void)
++{
++        struct bcm2835_timer *timer = container_of(bcm2835_itimer.host_timer,
++                                                  struct bcm2835_timer, evt);
++        writel(timer->match_mask, timer->control);
++}
++
++static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt,
++                                                                      int freq)
++{
++	evt->ipipe_timer = &bcm2835_itimer;
++	evt->ipipe_timer->irq = evt->irq;
++	evt->ipipe_timer->ack = bcm2835_itimer_ack;
++	evt->ipipe_timer->freq = freq;
++}
++
++#else
++static inline void bcm2835_ipipe_cs_setup(void) { }
++static inline void bcm2835_ipipe_evt_setup(struct clock_event_device *evt) { }
++#endif /* CONFIG_IPIPE */
++
+ static u64 notrace bcm2835_sched_read(void)
+ {
+ 	return readl_relaxed(system_clock);
+@@ -59,8 +107,7 @@ static int bcm2835_time_set_next_event(u
+ {
+ 	struct bcm2835_timer *timer = container_of(evt_dev,
+ 		struct bcm2835_timer, evt);
+-	writel_relaxed(readl_relaxed(system_clock) + event,
+-		timer->compare);
++        writel_relaxed(readl_relaxed(system_clock) + event, timer->compare);
+ 	return 0;
+ }
+ 
+@@ -68,9 +115,13 @@ static irqreturn_t bcm2835_time_interrup
+ {
+ 	struct bcm2835_timer *timer = dev_id;
+ 	void (*event_handler)(struct clock_event_device *);
++
++        if (clockevent_ipipe_stolen(&timer->evt)) {
++                goto handle;
++        }
+ 	if (readl_relaxed(timer->control) & timer->match_mask) {
+ 		writel_relaxed(timer->match_mask, timer->control);
+-
++        handle:
+ 		event_handler = READ_ONCE(timer->evt.event_handler);
+ 		if (event_handler)
+ 			event_handler(&timer->evt);
+@@ -93,6 +144,17 @@ static int __init bcm2835_timer_init(str
+ 		return -ENXIO;
+ 	}
+ 
++        if (IS_ENABLED(CONFIG_IPIPE)) {
++                struct resource res;
++                int ret;
++
++                ret = of_address_to_resource(node, 0, &res);
++                if (ret)
++		        res.start = 0;
++                t_base = base;
++                t_pbase = res.start;
++        }
++
+ 	ret = of_property_read_u32(node, "clock-frequency", &freq);
+ 	if (ret) {
+ 		pr_err("Can't read clock-frequency\n");
+@@ -127,11 +189,22 @@ static int __init bcm2835_timer_init(str
+ 	timer->evt.set_next_event = bcm2835_time_set_next_event;
+ 	timer->evt.cpumask = cpumask_of(0);
+ 	timer->act.name = node->name;
+-	timer->act.flags = IRQF_TIMER | IRQF_SHARED;
++	timer->act.flags = IRQF_TIMER;
+ 	timer->act.dev_id = timer;
+ 	timer->act.handler = bcm2835_time_interrupt;
+ 
+-	ret = setup_irq(irq, &timer->act);
++        if (IS_ENABLED(CONFIG_IPIPE)) {
++                bcm2835_ipipe_cs_setup(freq);
++                bcm2835_ipipe_evt_setup(&timer->evt, freq);
++                timer->evt.ipipe_timer = &bcm2835_itimer;
++                timer->evt.ipipe_timer->irq = irq;
++                timer->evt.ipipe_timer->ack = bcm2835_itimer_ack;
++                timer->evt.ipipe_timer->freq = freq;
++        } else {
++                timer->act.flags |= IRQF_SHARED;
++        }
++
++        ret = setup_irq(irq, &timer->act);
+ 	if (ret) {
+ 		pr_err("Can't set up timer IRQ\n");
+ 		goto err_iounmap;
+diff -uprN kernel/drivers/clocksource/dw_apb_timer.c kernel_new/drivers/clocksource/dw_apb_timer.c
+--- kernel/drivers/clocksource/dw_apb_timer.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/dw_apb_timer.c	2021-04-01 18:28:07.661863280 +0800
+@@ -15,6 +15,7 @@
+ #include <linux/kernel.h>
+ #include <linux/interrupt.h>
+ #include <linux/irq.h>
++#include <linux/ipipe.h>
+ #include <linux/io.h>
+ #include <linux/slab.h>
+ 
+@@ -384,7 +385,7 @@ static void apbt_restart_clocksource(str
+  */
+ struct dw_apb_clocksource *
+ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base,
+-			unsigned long freq)
++			unsigned long phys, unsigned long freq)
+ {
+ 	struct dw_apb_clocksource *dw_cs = kzalloc(sizeof(*dw_cs), GFP_KERNEL);
+ 
+@@ -399,10 +400,22 @@ dw_apb_clocksource_init(unsigned rating,
+ 	dw_cs->cs.mask = CLOCKSOURCE_MASK(32);
+ 	dw_cs->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ 	dw_cs->cs.resume = apbt_restart_clocksource;
++	dw_cs->phys = phys;
+ 
+ 	return dw_cs;
+ }
+ 
++#ifdef CONFIG_IPIPE
++static struct __ipipe_tscinfo apb_tsc_info = {
++	.type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN,
++	.u = {
++		.dec = {
++			.mask = 0xffffffffU,
++		},
++	},
++};
++#endif
++
+ /**
+  * dw_apb_clocksource_register() - register the APB clocksource.
+  *
+@@ -411,6 +424,12 @@ dw_apb_clocksource_init(unsigned rating,
+ void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs)
+ {
+ 	clocksource_register_hz(&dw_cs->cs, dw_cs->timer.freq);
++#ifdef CONFIG_IPIPE
++	apb_tsc_info.u.dec.counter = (void *)(dw_cs->phys + APBTMR_N_CURRENT_VALUE);
++	apb_tsc_info.counter_vaddr = (unsigned long)dw_cs->timer.base + APBTMR_N_CURRENT_VALUE;
++	apb_tsc_info.freq = dw_cs->timer.freq;
++	__ipipe_tsc_register(&apb_tsc_info);
++#endif
+ }
+ 
+ /**
+diff -uprN kernel/drivers/clocksource/dw_apb_timer_of.c kernel_new/drivers/clocksource/dw_apb_timer_of.c
+--- kernel/drivers/clocksource/dw_apb_timer_of.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/dw_apb_timer_of.c	2021-04-01 18:28:07.661863280 +0800
+@@ -25,16 +25,20 @@
+ #include <linux/sched_clock.h>
+ 
+ static void __init timer_get_base_and_rate(struct device_node *np,
+-				    void __iomem **base, u32 *rate)
++					   void __iomem **base, unsigned long *phys,
++					   u32 *rate)
+ {
+ 	struct clk *timer_clk;
++	struct resource res;
+ 	struct clk *pclk;
+ 
+ 	*base = of_iomap(np, 0);
+ 
+-	if (!*base)
++	if (!*base || of_address_to_resource(np, 0, &res))
+ 		panic("Unable to map regs for %s", np->name);
+ 
++	*phys = res.start;
++
+ 	/*
+ 	 * Not all implementations use a periphal clock, so don't panic
+ 	 * if it's not present
+@@ -64,13 +68,14 @@ static void __init add_clockevent(struct
+ {
+ 	void __iomem *iobase;
+ 	struct dw_apb_clock_event_device *ced;
++	unsigned long phys;
+ 	u32 irq, rate;
+ 
+ 	irq = irq_of_parse_and_map(event_timer, 0);
+ 	if (irq == 0)
+ 		panic("No IRQ for clock event timer");
+ 
+-	timer_get_base_and_rate(event_timer, &iobase, &rate);
++	timer_get_base_and_rate(event_timer, &iobase, &phys, &rate);
+ 
+ 	ced = dw_apb_clockevent_init(0, event_timer->name, 300, iobase, irq,
+ 				     rate);
+@@ -87,11 +92,12 @@ static void __init add_clocksource(struc
+ {
+ 	void __iomem *iobase;
+ 	struct dw_apb_clocksource *cs;
++	unsigned long phys;
+ 	u32 rate;
+ 
+-	timer_get_base_and_rate(source_timer, &iobase, &rate);
++	timer_get_base_and_rate(source_timer, &iobase, &phys, &rate);
+ 
+-	cs = dw_apb_clocksource_init(300, source_timer->name, iobase, rate);
++	cs = dw_apb_clocksource_init(300, source_timer->name, iobase, phys, rate);
+ 	if (!cs)
+ 		panic("Unable to initialise clocksource device");
+ 
+@@ -120,11 +126,12 @@ static const struct of_device_id sptimer
+ static void __init init_sched_clock(void)
+ {
+ 	struct device_node *sched_timer;
++	unsigned long phys;
+ 
+ 	sched_timer = of_find_matching_node(NULL, sptimer_ids);
+ 	if (sched_timer) {
+ 		timer_get_base_and_rate(sched_timer, &sched_io_base,
+-					&sched_rate);
++					&phys, &sched_rate);
+ 		of_node_put(sched_timer);
+ 	}
+ 
+diff -uprN kernel/drivers/clocksource/timer-imx-gpt.c kernel_new/drivers/clocksource/timer-imx-gpt.c
+--- kernel/drivers/clocksource/timer-imx-gpt.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/timer-imx-gpt.c	2021-04-01 18:28:07.661863280 +0800
+@@ -16,6 +16,8 @@
+ #include <linux/of.h>
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
+ #include <soc/imx/timer.h>
+ 
+ /*
+@@ -61,6 +63,9 @@
+ 
+ struct imx_timer {
+ 	enum imx_gpt_type type;
++#ifdef CONFIG_IPIPE
++	unsigned long pbase;
++#endif
+ 	void __iomem *base;
+ 	int irq;
+ 	struct clk *clk_per;
+@@ -265,6 +270,30 @@ static int mxc_set_oneshot(struct clock_
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++static struct imx_timer *global_imx_timer;
++
++static void mxc_timer_ack(void)
++{
++	global_imx_timer->gpt->gpt_irq_acknowledge(global_imx_timer);
++}
++
++static struct __ipipe_tscinfo tsc_info = {
++       .type = IPIPE_TSC_TYPE_FREERUNNING,
++       .u = {
++	       {
++		       .mask = 0xffffffff,
++	       },
++       },
++};
++
++static struct ipipe_timer mxc_itimer = {
++	.ack = mxc_timer_ack,
++};
++
++#endif
++
+ /*
+  * IRQ handler for the timer
+  */
+@@ -276,7 +305,8 @@ static irqreturn_t mxc_timer_interrupt(i
+ 
+ 	tstat = readl_relaxed(imxtm->base + imxtm->gpt->reg_tstat);
+ 
+-	imxtm->gpt->gpt_irq_acknowledge(imxtm);
++	if (!clockevent_ipipe_stolen(ced))
++		imxtm->gpt->gpt_irq_acknowledge(imxtm);
+ 
+ 	ced->event_handler(ced);
+ 
+@@ -297,6 +327,9 @@ static int __init mxc_clockevent_init(st
+ 	ced->rating = 200;
+ 	ced->cpumask = cpumask_of(0);
+ 	ced->irq = imxtm->irq;
++#ifdef CONFIG_IPIPE
++	ced->ipipe_timer = &mxc_itimer;
++#endif
+ 	clockevents_config_and_register(ced, clk_get_rate(imxtm->clk_per),
+ 					0xff, 0xfffffffe);
+ 
+@@ -436,6 +469,17 @@ static int __init _mxc_timer_init(struct
+ 	if (ret)
+ 		return ret;
+ 
++#ifdef CONFIG_IPIPE
++	tsc_info.u.counter_paddr = imxtm->pbase + imxtm->gpt->reg_tcn;
++	tsc_info.counter_vaddr = (unsigned long)imxtm->base + imxtm->gpt->reg_tcn;
++	tsc_info.freq = clk_get_rate(imxtm->clk_per);
++	__ipipe_tsc_register(&tsc_info);
++	mxc_itimer.irq = imxtm->irq;
++	mxc_itimer.freq = clk_get_rate(imxtm->clk_per);
++	mxc_itimer.min_delay_ticks = ipipe_timer_ns2ticks(&mxc_itimer, 2000);
++	global_imx_timer = imxtm;
++#endif /* CONFIG_IPIPE */
++
+ 	return mxc_clockevent_init(imxtm);
+ }
+ 
+@@ -451,6 +495,9 @@ void __init mxc_timer_init(unsigned long
+ 
+ 	imxtm->base = ioremap(pbase, SZ_4K);
+ 	BUG_ON(!imxtm->base);
++#ifdef CONFIG_IPIPE
++	imxtm->pbase = pbase;
++#endif
+ 
+ 	imxtm->type = type;
+ 	imxtm->irq = irq;
+@@ -462,6 +509,7 @@ static int __init mxc_timer_init_dt(stru
+ {
+ 	struct imx_timer *imxtm;
+ 	static int initialized;
++	struct resource res;
+ 	int ret;
+ 
+ 	/* Support one instance only */
+@@ -480,6 +528,13 @@ static int __init mxc_timer_init_dt(stru
+ 	if (imxtm->irq <= 0)
+ 		return -EINVAL;
+ 
++	if (of_address_to_resource(np, 0, &res))
++	    res.start = 0;
++
++#ifdef CONFIG_IPIPE
++	imxtm->pbase = res.start;
++#endif
++
+ 	imxtm->clk_ipg = of_clk_get_by_name(np, "ipg");
+ 
+ 	/* Try osc_per first, and fall back to per otherwise */
+diff -uprN kernel/drivers/clocksource/timer-sp804.c kernel_new/drivers/clocksource/timer-sp804.c
+--- kernel/drivers/clocksource/timer-sp804.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/clocksource/timer-sp804.c	2021-04-01 18:28:07.661863280 +0800
+@@ -30,11 +30,25 @@
+ #include <linux/of_clk.h>
+ #include <linux/of_irq.h>
+ #include <linux/sched_clock.h>
++#include <linux/module.h>
++#include <linux/ipipe.h>
++#include <linux/ipipe_tickdev.h>
+ 
+ #include <clocksource/timer-sp804.h>
+ 
+ #include "timer-sp.h"
+ 
++#ifdef CONFIG_IPIPE
++static struct __ipipe_tscinfo tsc_info = {
++	.type = IPIPE_TSC_TYPE_FREERUNNING_COUNTDOWN,
++	.u = {
++		{
++			.mask = 0xffffffff,
++		},
++	},
++};
++#endif /* CONFIG_IPIPE */
++
+ static long __init sp804_get_clock_rate(struct clk *clk)
+ {
+ 	long rate;
+@@ -79,6 +93,7 @@ void __init sp804_timer_disable(void __i
+ }
+ 
+ int  __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
++						     unsigned long phys,
+ 						     const char *name,
+ 						     struct clk *clk,
+ 						     int use_sched_clock)
+@@ -113,6 +128,12 @@ int  __init __sp804_clocksource_and_sche
+ 		sched_clock_register(sp804_read, 32, rate);
+ 	}
+ 
++#ifdef CONFIG_IPIPE
++	tsc_info.freq = rate;
++	tsc_info.counter_vaddr = (unsigned long)base + TIMER_VALUE;
++	tsc_info.u.counter_paddr = phys + TIMER_VALUE;
++	__ipipe_tsc_register(&tsc_info);
++#endif
+ 	return 0;
+ }
+ 
+@@ -227,6 +248,7 @@ static int __init sp804_of_init(struct d
+ 	u32 irq_num = 0;
+ 	struct clk *clk1, *clk2;
+ 	const char *name = of_get_property(np, "compatible", NULL);
++	struct resource res;
+ 
+ 	base = of_iomap(np, 0);
+ 	if (!base)
+@@ -260,6 +282,9 @@ static int __init sp804_of_init(struct d
+ 	if (irq <= 0)
+ 		goto err;
+ 
++	if (of_address_to_resource(np, 0, &res))
++	    res.start = 0;
++
+ 	of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
+ 	if (irq_num == 2) {
+ 
+@@ -267,7 +292,7 @@ static int __init sp804_of_init(struct d
+ 		if (ret)
+ 			goto err;
+ 
+-		ret = __sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
++		ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk1, 1);
+ 		if (ret)
+ 			goto err;
+ 	} else {
+@@ -277,7 +302,7 @@ static int __init sp804_of_init(struct d
+ 			goto err;
+ 
+ 		ret =__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
+-							      name, clk2, 1);
++							      res.start, name, clk2, 1);
+ 		if (ret)
+ 			goto err;
+ 	}
+@@ -297,6 +322,7 @@ static int __init integrator_cp_of_init(
+ 	int irq, ret = -EINVAL;
+ 	const char *name = of_get_property(np, "compatible", NULL);
+ 	struct clk *clk;
++	struct resource res;
+ 
+ 	base = of_iomap(np, 0);
+ 	if (!base) {
+@@ -316,8 +342,11 @@ static int __init integrator_cp_of_init(
+ 	if (init_count == 2 || !of_device_is_available(np))
+ 		goto err;
+ 
++	if (of_address_to_resource(np, 0, &res))
++	    res.start = 0;
++
+ 	if (!init_count) {
+-		ret = __sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
++		ret = __sp804_clocksource_and_sched_clock_init(base, res.start, name, clk, 0);
+ 		if (ret)
+ 			goto err;
+ 	} else {
+diff -uprN kernel/drivers/cpuidle/cpuidle.c kernel_new/drivers/cpuidle/cpuidle.c
+--- kernel/drivers/cpuidle/cpuidle.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/cpuidle/cpuidle.c	2021-04-01 18:28:07.661863280 +0800
+@@ -17,6 +17,7 @@
+ #include <linux/pm_qos.h>
+ #include <linux/cpu.h>
+ #include <linux/cpuidle.h>
++#include <linux/ipipe.h>
+ #include <linux/ktime.h>
+ #include <linux/hrtimer.h>
+ #include <linux/module.h>
+@@ -206,6 +207,19 @@ int cpuidle_enter_state(struct cpuidle_d
+ 	s64 diff;
+ 
+ 	/*
++	 * A co-kernel running on the head stage of the IRQ pipeline
++	 * may deny switching to a deeper C-state. If so, call the
++	 * default idle routine instead. If the co-kernel cannot bear
++	 * with the latency induced by the default idling operation,
++	 * then CPUIDLE is not usable and should be disabled at build
++	 * time.
++	 */
++	if (!ipipe_enter_cpuidle(dev, target_state)) {
++		default_idle_call();
++		return -EBUSY;
++	}
++
++	/*
+ 	 * Tell the time framework to switch to a broadcast timer because our
+ 	 * local timer will be shut down.  If a local timer is used from another
+ 	 * CPU as a broadcast timer, this call may fail if it is not available.
+@@ -229,6 +243,7 @@ int cpuidle_enter_state(struct cpuidle_d
+ 
+ 	stop_critical_timings();
+ 	entered_state = target_state->enter(dev, drv, index);
++	hard_cond_local_irq_enable();
+ 	start_critical_timings();
+ 
+ 	sched_clock_idle_wakeup_event();
+diff -uprN kernel/drivers/cpuidle/cpuidle.c.orig kernel_new/drivers/cpuidle/cpuidle.c.orig
+--- kernel/drivers/cpuidle/cpuidle.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/cpuidle/cpuidle.c.orig	2020-12-21 21:59:17.000000000 +0800
+@@ -0,0 +1,706 @@
++/*
++ * cpuidle.c - core cpuidle infrastructure
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *               Shaohua Li <shaohua.li@intel.com>
++ *               Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/clockchips.h>
++#include <linux/kernel.h>
++#include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/sched/clock.h>
++#include <linux/notifier.h>
++#include <linux/pm_qos.h>
++#include <linux/cpu.h>
++#include <linux/cpuidle.h>
++#include <linux/ktime.h>
++#include <linux/hrtimer.h>
++#include <linux/module.h>
++#include <linux/suspend.h>
++#include <linux/tick.h>
++#include <trace/events/power.h>
++
++#include "cpuidle.h"
++
++DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
++DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev);
++
++DEFINE_MUTEX(cpuidle_lock);
++LIST_HEAD(cpuidle_detected_devices);
++
++static int enabled_devices;
++static int off __read_mostly;
++static int initialized __read_mostly;
++
++int cpuidle_disabled(void)
++{
++	return off;
++}
++void disable_cpuidle(void)
++{
++	off = 1;
++}
++
++bool cpuidle_not_available(struct cpuidle_driver *drv,
++			   struct cpuidle_device *dev)
++{
++	return off || !initialized || !drv || !dev || !dev->enabled;
++}
++
++/**
++ * cpuidle_play_dead - cpu off-lining
++ *
++ * Returns in case of an error or no driver
++ */
++int cpuidle_play_dead(void)
++{
++	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
++	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
++	int i;
++
++	if (!drv)
++		return -ENODEV;
++
++	/* Find lowest-power state that supports long-term idle */
++	for (i = drv->state_count - 1; i >= 0; i--)
++		if (drv->states[i].enter_dead)
++			return drv->states[i].enter_dead(dev, i);
++
++	return -ENODEV;
++}
++
++static int find_deepest_state(struct cpuidle_driver *drv,
++			      struct cpuidle_device *dev,
++			      unsigned int max_latency,
++			      unsigned int forbidden_flags,
++			      bool s2idle)
++{
++	unsigned int latency_req = 0;
++	int i, ret = 0;
++
++	for (i = 1; i < drv->state_count; i++) {
++		struct cpuidle_state *s = &drv->states[i];
++		struct cpuidle_state_usage *su = &dev->states_usage[i];
++
++		if (s->disabled || su->disable || s->exit_latency <= latency_req
++		    || s->exit_latency > max_latency
++		    || (s->flags & forbidden_flags)
++		    || (s2idle && !s->enter_s2idle))
++			continue;
++
++		latency_req = s->exit_latency;
++		ret = i;
++	}
++	return ret;
++}
++
++/**
++ * cpuidle_use_deepest_state - Set/clear governor override flag.
++ * @enable: New value of the flag.
++ *
++ * Set/unset the current CPU to use the deepest idle state (override governors
++ * going forward if set).
++ */
++void cpuidle_use_deepest_state(bool enable)
++{
++	struct cpuidle_device *dev;
++
++	preempt_disable();
++	dev = cpuidle_get_device();
++	if (dev)
++		dev->use_deepest_state = enable;
++	preempt_enable();
++}
++
++/**
++ * cpuidle_find_deepest_state - Find the deepest available idle state.
++ * @drv: cpuidle driver for the given CPU.
++ * @dev: cpuidle device for the given CPU.
++ */
++int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
++			       struct cpuidle_device *dev)
++{
++	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
++}
++
++#ifdef CONFIG_SUSPEND
++static void enter_s2idle_proper(struct cpuidle_driver *drv,
++				struct cpuidle_device *dev, int index)
++{
++	ktime_t time_start, time_end;
++
++	time_start = ns_to_ktime(local_clock());
++
++	/*
++	 * trace_suspend_resume() called by tick_freeze() for the last CPU
++	 * executing it contains RCU usage regarded as invalid in the idle
++	 * context, so tell RCU about that.
++	 */
++	RCU_NONIDLE(tick_freeze());
++	/*
++	 * The state used here cannot be a "coupled" one, because the "coupled"
++	 * cpuidle mechanism enables interrupts and doing that with timekeeping
++	 * suspended is generally unsafe.
++	 */
++	stop_critical_timings();
++	drv->states[index].enter_s2idle(dev, drv, index);
++	if (WARN_ON_ONCE(!irqs_disabled()))
++		local_irq_disable();
++	/*
++	 * timekeeping_resume() that will be called by tick_unfreeze() for the
++	 * first CPU executing it calls functions containing RCU read-side
++	 * critical sections, so tell RCU about that.
++	 */
++	RCU_NONIDLE(tick_unfreeze());
++	start_critical_timings();
++
++	time_end = ns_to_ktime(local_clock());
++
++	dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
++	dev->states_usage[index].s2idle_usage++;
++}
++
++/**
++ * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
++ * @drv: cpuidle driver for the given CPU.
++ * @dev: cpuidle device for the given CPU.
++ *
++ * If there are states with the ->enter_s2idle callback, find the deepest of
++ * them and enter it with frozen tick.
++ */
++int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
++{
++	int index;
++
++	/*
++	 * Find the deepest state with ->enter_s2idle present, which guarantees
++	 * that interrupts won't be enabled when it exits and allows the tick to
++	 * be frozen safely.
++	 */
++	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
++	if (index > 0)
++		enter_s2idle_proper(drv, dev, index);
++
++	return index;
++}
++#endif /* CONFIG_SUSPEND */
++
++/**
++ * cpuidle_enter_state - enter the state and update stats
++ * @dev: cpuidle device for this cpu
++ * @drv: cpuidle driver for this cpu
++ * @index: index into the states table in @drv of the state to enter
++ */
++int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
++			int index)
++{
++	int entered_state;
++
++	struct cpuidle_state *target_state = &drv->states[index];
++	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
++	ktime_t time_start, time_end;
++	s64 diff;
++
++	/*
++	 * Tell the time framework to switch to a broadcast timer because our
++	 * local timer will be shut down.  If a local timer is used from another
++	 * CPU as a broadcast timer, this call may fail if it is not available.
++	 */
++	if (broadcast && tick_broadcast_enter()) {
++		index = find_deepest_state(drv, dev, target_state->exit_latency,
++					   CPUIDLE_FLAG_TIMER_STOP, false);
++		if (index < 0) {
++			default_idle_call();
++			return -EBUSY;
++		}
++		target_state = &drv->states[index];
++		broadcast = false;
++	}
++
++	/* Take note of the planned idle state. */
++	sched_idle_set_state(target_state);
++
++	trace_cpu_idle_rcuidle(index, dev->cpu);
++	time_start = ns_to_ktime(local_clock());
++
++	stop_critical_timings();
++	entered_state = target_state->enter(dev, drv, index);
++	start_critical_timings();
++
++	sched_clock_idle_wakeup_event();
++	time_end = ns_to_ktime(local_clock());
++	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
++
++	/* The cpu is no longer idle or about to enter idle. */
++	sched_idle_set_state(NULL);
++
++	if (broadcast) {
++		if (WARN_ON_ONCE(!irqs_disabled()))
++			local_irq_disable();
++
++		tick_broadcast_exit();
++	}
++
++	if (!cpuidle_state_is_coupled(drv, index))
++		local_irq_enable();
++
++	diff = ktime_us_delta(time_end, time_start);
++	if (diff > INT_MAX)
++		diff = INT_MAX;
++
++	dev->last_residency = (int) diff;
++
++	if (entered_state >= 0) {
++		/* Update cpuidle counters */
++		/* This can be moved to within driver enter routine
++		 * but that results in multiple copies of same code.
++		 */
++		dev->states_usage[entered_state].time += dev->last_residency;
++		dev->states_usage[entered_state].usage++;
++	} else {
++		dev->last_residency = 0;
++	}
++
++	return entered_state;
++}
++
++/**
++ * cpuidle_select - ask the cpuidle framework to choose an idle state
++ *
++ * @drv: the cpuidle driver
++ * @dev: the cpuidle device
++ * @stop_tick: indication on whether or not to stop the tick
++ *
++ * Returns the index of the idle state.  The return value must not be negative.
++ *
++ * The memory location pointed to by @stop_tick is expected to be written the
++ * 'false' boolean value if the scheduler tick should not be stopped before
++ * entering the returned state.
++ */
++int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
++		   bool *stop_tick)
++{
++	return cpuidle_curr_governor->select(drv, dev, stop_tick);
++}
++
++/**
++ * cpuidle_enter - enter into the specified idle state
++ *
++ * @drv:   the cpuidle driver tied with the cpu
++ * @dev:   the cpuidle device
++ * @index: the index in the idle state table
++ *
++ * Returns the index in the idle state, < 0 in case of error.
++ * The error code depends on the backend driver
++ */
++int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
++		  int index)
++{
++	if (cpuidle_state_is_coupled(drv, index))
++		return cpuidle_enter_state_coupled(dev, drv, index);
++	return cpuidle_enter_state(dev, drv, index);
++}
++
++/**
++ * cpuidle_reflect - tell the underlying governor what was the state
++ * we were in
++ *
++ * @dev  : the cpuidle device
++ * @index: the index in the idle state table
++ *
++ */
++void cpuidle_reflect(struct cpuidle_device *dev, int index)
++{
++	if (cpuidle_curr_governor->reflect && index >= 0)
++		cpuidle_curr_governor->reflect(dev, index);
++}
++
++/**
++ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
++ */
++void cpuidle_install_idle_handler(void)
++{
++	if (enabled_devices) {
++		/* Make sure all changes finished before we switch to new idle */
++		smp_wmb();
++		initialized = 1;
++	}
++}
++
++/**
++ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
++ */
++void cpuidle_uninstall_idle_handler(void)
++{
++	if (enabled_devices) {
++		initialized = 0;
++		wake_up_all_idle_cpus();
++	}
++
++	/*
++	 * Make sure external observers (such as the scheduler)
++	 * are done looking at pointed idle states.
++	 */
++	synchronize_rcu();
++}
++
++/**
++ * cpuidle_pause_and_lock - temporarily disables CPUIDLE
++ */
++void cpuidle_pause_and_lock(void)
++{
++	mutex_lock(&cpuidle_lock);
++	cpuidle_uninstall_idle_handler();
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
++
++/**
++ * cpuidle_resume_and_unlock - resumes CPUIDLE operation
++ */
++void cpuidle_resume_and_unlock(void)
++{
++	cpuidle_install_idle_handler();
++	mutex_unlock(&cpuidle_lock);
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
++
++/* Currently used in suspend/resume path to suspend cpuidle */
++void cpuidle_pause(void)
++{
++	mutex_lock(&cpuidle_lock);
++	cpuidle_uninstall_idle_handler();
++	mutex_unlock(&cpuidle_lock);
++}
++
++/* Currently used in suspend/resume path to resume cpuidle */
++void cpuidle_resume(void)
++{
++	mutex_lock(&cpuidle_lock);
++	cpuidle_install_idle_handler();
++	mutex_unlock(&cpuidle_lock);
++}
++
++/**
++ * cpuidle_enable_device - enables idle PM for a CPU
++ * @dev: the CPU
++ *
++ * This function must be called between cpuidle_pause_and_lock and
++ * cpuidle_resume_and_unlock when used externally.
++ */
++int cpuidle_enable_device(struct cpuidle_device *dev)
++{
++	int ret;
++	struct cpuidle_driver *drv;
++
++	if (!dev)
++		return -EINVAL;
++
++	if (dev->enabled)
++		return 0;
++
++	if (!cpuidle_curr_governor)
++		return -EIO;
++
++	drv = cpuidle_get_cpu_driver(dev);
++
++	if (!drv)
++		return -EIO;
++
++	if (!dev->registered)
++		return -EINVAL;
++
++	ret = cpuidle_add_device_sysfs(dev);
++	if (ret)
++		return ret;
++
++	if (cpuidle_curr_governor->enable) {
++		ret = cpuidle_curr_governor->enable(drv, dev);
++		if (ret)
++			goto fail_sysfs;
++	}
++
++	smp_wmb();
++
++	dev->enabled = 1;
++
++	enabled_devices++;
++	return 0;
++
++fail_sysfs:
++	cpuidle_remove_device_sysfs(dev);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_enable_device);
++
++/**
++ * cpuidle_disable_device - disables idle PM for a CPU
++ * @dev: the CPU
++ *
++ * This function must be called between cpuidle_pause_and_lock and
++ * cpuidle_resume_and_unlock when used externally.
++ */
++void cpuidle_disable_device(struct cpuidle_device *dev)
++{
++	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
++
++	if (!dev || !dev->enabled)
++		return;
++
++	if (!drv || !cpuidle_curr_governor)
++		return;
++
++	dev->enabled = 0;
++
++	if (cpuidle_curr_governor->disable)
++		cpuidle_curr_governor->disable(drv, dev);
++
++	cpuidle_remove_device_sysfs(dev);
++	enabled_devices--;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_disable_device);
++
++static void __cpuidle_unregister_device(struct cpuidle_device *dev)
++{
++	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
++
++	list_del(&dev->device_list);
++	per_cpu(cpuidle_devices, dev->cpu) = NULL;
++	module_put(drv->owner);
++
++	dev->registered = 0;
++}
++
++static void __cpuidle_device_init(struct cpuidle_device *dev)
++{
++	memset(dev->states_usage, 0, sizeof(dev->states_usage));
++	dev->last_residency = 0;
++}
++
++/**
++ * __cpuidle_register_device - internal register function called before register
++ * and enable routines
++ * @dev: the cpu
++ *
++ * cpuidle_lock mutex must be held before this is called
++ */
++static int __cpuidle_register_device(struct cpuidle_device *dev)
++{
++	int ret;
++	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
++
++	if (!try_module_get(drv->owner))
++		return -EINVAL;
++
++	per_cpu(cpuidle_devices, dev->cpu) = dev;
++	list_add(&dev->device_list, &cpuidle_detected_devices);
++
++	ret = cpuidle_coupled_register_device(dev);
++	if (ret)
++		__cpuidle_unregister_device(dev);
++	else
++		dev->registered = 1;
++
++	return ret;
++}
++
++/**
++ * cpuidle_register_device - registers a CPU's idle PM feature
++ * @dev: the cpu
++ */
++int cpuidle_register_device(struct cpuidle_device *dev)
++{
++	int ret = -EBUSY;
++
++	if (!dev)
++		return -EINVAL;
++
++	mutex_lock(&cpuidle_lock);
++
++	if (dev->registered)
++		goto out_unlock;
++
++	__cpuidle_device_init(dev);
++
++	ret = __cpuidle_register_device(dev);
++	if (ret)
++		goto out_unlock;
++
++	ret = cpuidle_add_sysfs(dev);
++	if (ret)
++		goto out_unregister;
++
++	ret = cpuidle_enable_device(dev);
++	if (ret)
++		goto out_sysfs;
++
++	cpuidle_install_idle_handler();
++
++out_unlock:
++	mutex_unlock(&cpuidle_lock);
++
++	return ret;
++
++out_sysfs:
++	cpuidle_remove_sysfs(dev);
++out_unregister:
++	__cpuidle_unregister_device(dev);
++	goto out_unlock;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_register_device);
++
++/**
++ * cpuidle_unregister_device - unregisters a CPU's idle PM feature
++ * @dev: the cpu
++ */
++void cpuidle_unregister_device(struct cpuidle_device *dev)
++{
++	if (!dev || dev->registered == 0)
++		return;
++
++	cpuidle_pause_and_lock();
++
++	cpuidle_disable_device(dev);
++
++	cpuidle_remove_sysfs(dev);
++
++	__cpuidle_unregister_device(dev);
++
++	cpuidle_coupled_unregister_device(dev);
++
++	cpuidle_resume_and_unlock();
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
++
++/**
++ * cpuidle_unregister: unregister a driver and the devices. This function
++ * can be used only if the driver has been previously registered through
++ * the cpuidle_register function.
++ *
++ * @drv: a valid pointer to a struct cpuidle_driver
++ */
++void cpuidle_unregister(struct cpuidle_driver *drv)
++{
++	int cpu;
++	struct cpuidle_device *device;
++
++	for_each_cpu(cpu, drv->cpumask) {
++		device = &per_cpu(cpuidle_dev, cpu);
++		cpuidle_unregister_device(device);
++	}
++
++	cpuidle_unregister_driver(drv);
++}
++EXPORT_SYMBOL_GPL(cpuidle_unregister);
++
++/**
++ * cpuidle_register: registers the driver and the cpu devices with the
++ * coupled_cpus passed as parameter. This function is used for all common
++ * initialization pattern there are in the arch specific drivers. The
++ * devices is globally defined in this file.
++ *
++ * @drv         : a valid pointer to a struct cpuidle_driver
++ * @coupled_cpus: a cpumask for the coupled states
++ *
++ * Returns 0 on success, < 0 otherwise
++ */
++int cpuidle_register(struct cpuidle_driver *drv,
++		     const struct cpumask *const coupled_cpus)
++{
++	int ret, cpu;
++	struct cpuidle_device *device;
++
++	ret = cpuidle_register_driver(drv);
++	if (ret) {
++		pr_err("failed to register cpuidle driver\n");
++		return ret;
++	}
++
++	for_each_cpu(cpu, drv->cpumask) {
++		device = &per_cpu(cpuidle_dev, cpu);
++		device->cpu = cpu;
++
++#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
++		/*
++		 * On multiplatform for ARM, the coupled idle states could be
++		 * enabled in the kernel even if the cpuidle driver does not
++		 * use it. Note, coupled_cpus is a struct copy.
++		 */
++		if (coupled_cpus)
++			device->coupled_cpus = *coupled_cpus;
++#endif
++		ret = cpuidle_register_device(device);
++		if (!ret)
++			continue;
++
++		pr_err("Failed to register cpuidle device for cpu%d\n", cpu);
++
++		cpuidle_unregister(drv);
++		break;
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(cpuidle_register);
++
++#ifdef CONFIG_SMP
++
++/*
++ * This function gets called when a part of the kernel has a new latency
++ * requirement.  This means we need to get all processors out of their C-state,
++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
++ * wakes them all right up.
++ */
++static int cpuidle_latency_notify(struct notifier_block *b,
++		unsigned long l, void *v)
++{
++	wake_up_all_idle_cpus();
++	return NOTIFY_OK;
++}
++
++static struct notifier_block cpuidle_latency_notifier = {
++	.notifier_call = cpuidle_latency_notify,
++};
++
++static inline void latency_notifier_init(struct notifier_block *n)
++{
++	pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
++}
++
++#else /* CONFIG_SMP */
++
++#define latency_notifier_init(x) do { } while (0)
++
++#endif /* CONFIG_SMP */
++
++/**
++ * cpuidle_init - core initializer
++ */
++static int __init cpuidle_init(void)
++{
++	int ret;
++
++	if (cpuidle_disabled())
++		return -ENODEV;
++
++	ret = cpuidle_add_interface(cpu_subsys.dev_root);
++	if (ret)
++		return ret;
++
++	latency_notifier_init(&cpuidle_latency_notifier);
++
++	return 0;
++}
++
++module_param(off, int, 0444);
++core_initcall(cpuidle_init);
+diff -uprN kernel/drivers/gpio/gpio-davinci.c kernel_new/drivers/gpio/gpio-davinci.c
+--- kernel/drivers/gpio/gpio-davinci.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-davinci.c	2021-04-01 18:28:07.662863279 +0800
+@@ -24,6 +24,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/platform_data/gpio-davinci.h>
+ #include <linux/irqchip/chained_irq.h>
++#include <linux/ipipe.h>
+ 
+ struct davinci_gpio_regs {
+ 	u32	dir;
+@@ -327,7 +328,7 @@ static struct irq_chip gpio_irqchip = {
+ 	.irq_enable	= gpio_irq_enable,
+ 	.irq_disable	= gpio_irq_disable,
+ 	.irq_set_type	= gpio_irq_type,
+-	.flags		= IRQCHIP_SET_TYPE_MASKED,
++	.flags		= IRQCHIP_SET_TYPE_MASKED | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static void gpio_irq_handler(struct irq_desc *desc)
+@@ -370,7 +371,7 @@ static void gpio_irq_handler(struct irq_
+ 			 */
+ 			hw_irq = (bank_num / 2) * 32 + bit;
+ 
+-			generic_handle_irq(
++			ipipe_handle_demuxed_irq(
+ 				irq_find_mapping(d->irq_domain, hw_irq));
+ 		}
+ 	}
+diff -uprN kernel/drivers/gpio/gpio-mvebu.c kernel_new/drivers/gpio/gpio-mvebu.c
+--- kernel/drivers/gpio/gpio-mvebu.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-mvebu.c	2021-04-01 18:28:07.662863279 +0800
+@@ -51,6 +51,7 @@
+ #include <linux/pwm.h>
+ #include <linux/regmap.h>
+ #include <linux/slab.h>
++#include <linux/ipipe.h>
+ 
+ /*
+  * GPIO unit register offsets.
+@@ -391,10 +392,11 @@ static void mvebu_gpio_irq_ack(struct ir
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct mvebu_gpio_chip *mvchip = gc->private;
+ 	u32 mask = d->mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	mvebu_gpio_write_edge_cause(mvchip, ~mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void mvebu_gpio_edge_irq_mask(struct irq_data *d)
+@@ -403,11 +405,12 @@ static void mvebu_gpio_edge_irq_mask(str
+ 	struct mvebu_gpio_chip *mvchip = gc->private;
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+ 	u32 mask = d->mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	ct->mask_cache_priv &= ~mask;
+ 	mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
+@@ -416,11 +419,12 @@ static void mvebu_gpio_edge_irq_unmask(s
+ 	struct mvebu_gpio_chip *mvchip = gc->private;
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+ 	u32 mask = d->mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	ct->mask_cache_priv |= mask;
+ 	mvebu_gpio_write_edge_mask(mvchip, ct->mask_cache_priv);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void mvebu_gpio_level_irq_mask(struct irq_data *d)
+@@ -429,11 +433,12 @@ static void mvebu_gpio_level_irq_mask(st
+ 	struct mvebu_gpio_chip *mvchip = gc->private;
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+ 	u32 mask = d->mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	ct->mask_cache_priv &= ~mask;
+ 	mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void mvebu_gpio_level_irq_unmask(struct irq_data *d)
+@@ -442,11 +447,12 @@ static void mvebu_gpio_level_irq_unmask(
+ 	struct mvebu_gpio_chip *mvchip = gc->private;
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
+ 	u32 mask = d->mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	ct->mask_cache_priv |= mask;
+ 	mvebu_gpio_write_level_mask(mvchip, ct->mask_cache_priv);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /*****************************************************************************
+@@ -580,7 +586,7 @@ static void mvebu_gpio_irq_handler(struc
+ 				     polarity);
+ 		}
+ 
+-		generic_handle_irq(irq);
++		ipipe_handle_demuxed_irq(irq);
+ 	}
+ 
+ 	chained_irq_exit(chip, desc);
+@@ -1224,6 +1230,7 @@ static int mvebu_gpio_probe(struct platf
+ 	ct->chip.irq_unmask = mvebu_gpio_level_irq_unmask;
+ 	ct->chip.irq_set_type = mvebu_gpio_irq_set_type;
+ 	ct->chip.name = mvchip->chip.label;
++	ct->chip.flags = IRQCHIP_PIPELINE_SAFE;
+ 
+ 	ct = &gc->chip_types[1];
+ 	ct->type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
+@@ -1233,6 +1240,7 @@ static int mvebu_gpio_probe(struct platf
+ 	ct->chip.irq_set_type = mvebu_gpio_irq_set_type;
+ 	ct->handler = handle_edge_irq;
+ 	ct->chip.name = mvchip->chip.label;
++	ct->chip.flags = IRQCHIP_PIPELINE_SAFE;
+ 
+ 	/*
+ 	 * Setup the interrupt handlers. Each chip can have up to 4
+diff -uprN kernel/drivers/gpio/gpio-mxc.c kernel_new/drivers/gpio/gpio-mxc.c
+--- kernel/drivers/gpio/gpio-mxc.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-mxc.c	2021-04-01 18:28:07.662863279 +0800
+@@ -22,6 +22,7 @@
+ #include <linux/of.h>
+ #include <linux/of_device.h>
+ #include <linux/bug.h>
++#include <linux/ipipe.h>
+ 
+ enum mxc_gpio_hwtype {
+ 	IMX1_GPIO,	/* runs on i.mx1 */
+@@ -266,7 +267,7 @@ static void mxc_gpio_irq_handler(struct
+ 		if (port->both_edges & (1 << irqoffset))
+ 			mxc_flip_edge(port, irqoffset);
+ 
+-		generic_handle_irq(irq_find_mapping(port->domain, irqoffset));
++		ipipe_handle_demuxed_irq(irq_find_mapping(port->domain, irqoffset));
+ 
+ 		irq_stat &= ~(1 << irqoffset);
+ 	}
+@@ -359,7 +360,7 @@ static int mxc_gpio_init_gc(struct mxc_g
+ 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
+ 	ct->chip.irq_set_type = gpio_set_irq_type;
+ 	ct->chip.irq_set_wake = gpio_set_wake_irq;
+-	ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND;
++	ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE;
+ 	ct->regs.ack = GPIO_ISR;
+ 	ct->regs.mask = GPIO_IMR;
+ 
+diff -uprN kernel/drivers/gpio/gpio-omap.c kernel_new/drivers/gpio/gpio-omap.c
+--- kernel/drivers/gpio/gpio-omap.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-omap.c	2021-04-01 18:28:07.662863279 +0800
+@@ -26,6 +26,7 @@
+ #include <linux/of_device.h>
+ #include <linux/gpio/driver.h>
+ #include <linux/bitops.h>
++#include <linux/ipipe.h>
+ #include <linux/platform_data/gpio-omap.h>
+ 
+ #define OFF_MODE	1
+@@ -58,7 +59,11 @@ struct gpio_bank {
+ 	u32 saved_datain;
+ 	u32 level_mask;
+ 	u32 toggle_mask;
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t lock;
++#else
+ 	raw_spinlock_t lock;
++#endif
+ 	raw_spinlock_t wa_lock;
+ 	struct gpio_chip chip;
+ 	struct clk *dbck;
+@@ -737,20 +742,17 @@ static void omap_gpio_free(struct gpio_c
+  * line's interrupt handler has been run, we may miss some nested
+  * interrupts.
+  */
+-static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
++static void __omap_gpio_irq_handler(struct gpio_bank *bank)
+ {
+ 	void __iomem *isr_reg = NULL;
+ 	u32 enabled, isr, level_mask;
+ 	unsigned int bit;
+-	struct gpio_bank *bank = gpiobank;
+ 	unsigned long wa_lock_flags;
+ 	unsigned long lock_flags;
+ 
+ 	isr_reg = bank->base + bank->regs->irqstatus;
+ 	if (WARN_ON(!isr_reg))
+-		goto exit;
+-
+-	pm_runtime_get_sync(bank->chip.parent);
++		return;
+ 
+ 	while (1) {
+ 		raw_spin_lock_irqsave(&bank->lock, lock_flags);
+@@ -793,18 +795,38 @@ static irqreturn_t omap_gpio_irq_handler
+ 
+ 			raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags);
+ 
+-			generic_handle_irq(irq_find_mapping(bank->chip.irq.domain,
++			ipipe_handle_demuxed_irq(irq_find_mapping(bank->chip.irq.domain,
+ 							    bit));
+ 
+ 			raw_spin_unlock_irqrestore(&bank->wa_lock,
+ 						   wa_lock_flags);
+ 		}
+ 	}
+-exit:
++}
++
++#ifdef CONFIG_IPIPE
++
++static void omap_gpio_irq_handler(struct irq_desc *d)
++{
++	struct gpio_bank *bank = irq_desc_get_handler_data(d);
++	__omap_gpio_irq_handler(bank);
++}
++
++#else
++
++static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
++{
++	struct gpio_bank *bank = gpiobank;
++
++	pm_runtime_get_sync(bank->chip.parent);
++	__omap_gpio_irq_handler(bank);
+ 	pm_runtime_put(bank->chip.parent);
++
+ 	return IRQ_HANDLED;
+ }
+ 
++#endif
++
+ static unsigned int omap_gpio_irq_startup(struct irq_data *d)
+ {
+ 	struct gpio_bank *bank = omap_irq_data_get_bank(d);
+@@ -886,6 +908,19 @@ static void omap_gpio_mask_irq(struct ir
+ 	raw_spin_unlock_irqrestore(&bank->lock, flags);
+ }
+ 
++static void omap_gpio_mask_ack_irq(struct irq_data *d)
++{
++	struct gpio_bank *bank = omap_irq_data_get_bank(d);
++	unsigned offset = d->hwirq;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&bank->lock, flags);
++	omap_set_gpio_irqenable(bank, offset, 0);
++	omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
++	omap_clear_gpio_irqstatus(bank, offset);
++	raw_spin_unlock_irqrestore(&bank->lock, flags);
++}
++
+ static void omap_gpio_unmask_irq(struct irq_data *d)
+ {
+ 	struct gpio_bank *bank = omap_irq_data_get_bank(d);
+@@ -1218,11 +1253,16 @@ static int omap_gpio_chip_init(struct gp
+ 		return ret;
+ 	}
+ 
++#ifdef CONFIG_IPIPE
++	irq_set_chained_handler_and_data(bank->irq,
++					 omap_gpio_irq_handler, bank);
++#else
+ 	ret = devm_request_irq(bank->chip.parent, bank->irq,
+ 			       omap_gpio_irq_handler,
+ 			       0, dev_name(bank->chip.parent), bank);
+ 	if (ret)
+ 		gpiochip_remove(&bank->chip);
++#endif
+ 
+ 	if (!bank->is_mpuio)
+ 		gpio += bank->width;
+@@ -1261,13 +1301,14 @@ static int omap_gpio_probe(struct platfo
+ 	irqc->irq_shutdown = omap_gpio_irq_shutdown,
+ 	irqc->irq_ack = omap_gpio_ack_irq,
+ 	irqc->irq_mask = omap_gpio_mask_irq,
++	irqc->irq_mask_ack = omap_gpio_mask_ack_irq,
+ 	irqc->irq_unmask = omap_gpio_unmask_irq,
+ 	irqc->irq_set_type = omap_gpio_irq_type,
+ 	irqc->irq_set_wake = omap_gpio_wake_enable,
+ 	irqc->irq_bus_lock = omap_gpio_irq_bus_lock,
+ 	irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
+ 	irqc->name = dev_name(&pdev->dev);
+-	irqc->flags = IRQCHIP_MASK_ON_SUSPEND;
++	irqc->flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE;
+ 
+ 	bank->irq = platform_get_irq(pdev, 0);
+ 	if (bank->irq <= 0) {
+diff -uprN kernel/drivers/gpio/gpio-pl061.c kernel_new/drivers/gpio/gpio-pl061.c
+--- kernel/drivers/gpio/gpio-pl061.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-pl061.c	2021-04-01 18:28:07.662863279 +0800
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/pinctrl/consumer.h>
+ #include <linux/pm.h>
++#include <linux/ipipe.h>
+ 
+ #define GPIODIR 0x400
+ #define GPIOIS  0x404
+@@ -50,7 +51,11 @@ struct pl061_context_save_regs {
+ #endif
+ 
+ struct pl061 {
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t	lock;
++#else
+ 	raw_spinlock_t		lock;
++#endif
+ 
+ 	void __iomem		*base;
+ 	struct gpio_chip	gc;
+@@ -222,8 +227,8 @@ static void pl061_irq_handler(struct irq
+ 	pending = readb(pl061->base + GPIOMIS);
+ 	if (pending) {
+ 		for_each_set_bit(offset, &pending, PL061_GPIO_NR)
+-			generic_handle_irq(irq_find_mapping(gc->irq.domain,
+-							    offset));
++			ipipe_handle_demuxed_irq(irq_find_mapping(gc->irq.domain,
++								  offset));
+ 	}
+ 
+ 	chained_irq_exit(irqchip, desc);
+@@ -234,6 +239,22 @@ static void pl061_irq_mask(struct irq_da
+ 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ 	struct pl061 *pl061 = gpiochip_get_data(gc);
+ 	u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR);
++	unsigned long flags;
++	u8 gpioie;
++
++	raw_spin_lock_irqsave(&pl061->lock, flags);
++	gpioie = readb(pl061->base + GPIOIE) & ~mask;
++	writeb(gpioie, pl061->base + GPIOIE);
++	ipipe_lock_irq(d->irq);
++	raw_spin_unlock_irqrestore(&pl061->lock, flags);
++}
++
++#ifdef CONFIG_IPIPE
++static void pl061_irq_mask_ack(struct irq_data *d)
++{
++	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
++	struct pl061 *pl061 = gpiochip_get_data(gc);
++	u8 mask = BIT(irqd_to_hwirq(d) % PL061_GPIO_NR);
+ 	u8 gpioie;
+ 
+ 	raw_spin_lock(&pl061->lock);
+@@ -241,6 +262,7 @@ static void pl061_irq_mask(struct irq_da
+ 	writeb(gpioie, pl061->base + GPIOIE);
+ 	raw_spin_unlock(&pl061->lock);
+ }
++#endif
+ 
+ static void pl061_irq_unmask(struct irq_data *d)
+ {
+@@ -326,6 +348,10 @@ static int pl061_probe(struct amba_devic
+ 	pl061->irq_chip.irq_unmask = pl061_irq_unmask;
+ 	pl061->irq_chip.irq_set_type = pl061_irq_type;
+ 	pl061->irq_chip.irq_set_wake = pl061_irq_set_wake;
++#ifdef CONFIG_IPIPE
++	pl061->irq_chip.irq_mask_ack = pl061_irq_mask_ack;
++	pl061->irq_chip.flags = IRQCHIP_PIPELINE_SAFE;
++#endif
+ 
+ 	writeb(0, pl061->base + GPIOIE); /* disable irqs */
+ 	irq = adev->irq[0];
+diff -uprN kernel/drivers/gpio/gpio-zynq.c kernel_new/drivers/gpio/gpio-zynq.c
+--- kernel/drivers/gpio/gpio-zynq.c	2020-12-21 21:59:17.000000000 +0800
++++ kernel_new/drivers/gpio/gpio-zynq.c	2021-04-01 18:28:07.662863279 +0800
+@@ -14,6 +14,7 @@
+ #include <linux/gpio/driver.h>
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
++#include <linux/ipipe.h>
+ #include <linux/io.h>
+ #include <linux/module.h>
+ #include <linux/platform_device.h>
+@@ -130,6 +131,8 @@ struct zynq_gpio {
+ 	struct gpio_regs context;
+ };
+ 
++static IPIPE_DEFINE_RAW_SPINLOCK(zynq_gpio_lock);
++
+ /**
+  * struct zynq_platform_data -  zynq gpio platform data structure
+  * @label:	string to store in gpio->label
+@@ -302,6 +305,7 @@ static int zynq_gpio_dir_in(struct gpio_
+ 	u32 reg;
+ 	unsigned int bank_num, bank_pin_num;
+ 	struct zynq_gpio *gpio = gpiochip_get_data(chip);
++	unsigned long flags;
+ 
+ 	zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio);
+ 
+@@ -313,10 +317,12 @@ static int zynq_gpio_dir_in(struct gpio_
+ 	    (bank_pin_num == 7 || bank_pin_num == 8))
+ 		return -EINVAL;
+ 
++	raw_spin_lock_irqsave(&zynq_gpio_lock, flags);
+ 	/* clear the bit in direction mode reg to set the pin as input */
+ 	reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num));
+ 	reg &= ~BIT(bank_pin_num);
+ 	writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num));
++	raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags);
+ 
+ 	return 0;
+ }
+@@ -339,9 +345,11 @@ static int zynq_gpio_dir_out(struct gpio
+ 	u32 reg;
+ 	unsigned int bank_num, bank_pin_num;
+ 	struct zynq_gpio *gpio = gpiochip_get_data(chip);
++	unsigned long flags;
+ 
+ 	zynq_gpio_get_bank_pin(pin, &bank_num, &bank_pin_num, gpio);
+ 
++	raw_spin_lock_irqsave(&zynq_gpio_lock, flags);
+ 	/* set the GPIO pin as output */
+ 	reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num));
+ 	reg |= BIT(bank_pin_num);
+@@ -351,6 +359,7 @@ static int zynq_gpio_dir_out(struct gpio
+ 	reg = readl_relaxed(gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num));
+ 	reg |= BIT(bank_pin_num);
+ 	writel_relaxed(reg, gpio->base_addr + ZYNQ_GPIO_OUTEN_OFFSET(bank_num));
++	raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags);
+ 
+ 	/* set the state of the pin */
+ 	zynq_gpio_set_value(chip, pin, state);
+@@ -370,11 +379,15 @@ static void zynq_gpio_irq_mask(struct ir
+ 	unsigned int device_pin_num, bank_num, bank_pin_num;
+ 	struct zynq_gpio *gpio =
+ 		gpiochip_get_data(irq_data_get_irq_chip_data(irq_data));
++	unsigned long flags;
+ 
+ 	device_pin_num = irq_data->hwirq;
+ 	zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio);
++	raw_spin_lock_irqsave(&zynq_gpio_lock, flags);
++	ipipe_lock_irq(irq_data->irq);
+ 	writel_relaxed(BIT(bank_pin_num),
+ 		       gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num));
++	raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags);
+ }
+ 
+ /**
+@@ -391,11 +404,15 @@ static void zynq_gpio_irq_unmask(struct
+ 	unsigned int device_pin_num, bank_num, bank_pin_num;
+ 	struct zynq_gpio *gpio =
+ 		gpiochip_get_data(irq_data_get_irq_chip_data(irq_data));
++	unsigned long flags;
+ 
+ 	device_pin_num = irq_data->hwirq;
+ 	zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio);
++	raw_spin_lock_irqsave(&zynq_gpio_lock, flags);
+ 	writel_relaxed(BIT(bank_pin_num),
+ 		       gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num));
++	ipipe_unlock_irq(irq_data->irq);
++	raw_spin_unlock_irqrestore(&zynq_gpio_lock, flags);
+ }
+ 
+ /**
+@@ -533,28 +550,68 @@ static int zynq_gpio_set_wake(struct irq
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++static void zynq_gpio_hold_irq(struct irq_data *irq_data)
++{
++	unsigned int device_pin_num, bank_num, bank_pin_num;
++	struct zynq_gpio *gpio =
++		gpiochip_get_data(irq_data_get_irq_chip_data(irq_data));
++
++	device_pin_num = irq_data->hwirq;
++	zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio);
++	raw_spin_lock(&zynq_gpio_lock);
++	writel_relaxed(BIT(bank_pin_num),
++		       gpio->base_addr + ZYNQ_GPIO_INTDIS_OFFSET(bank_num));
++	writel_relaxed(BIT(bank_pin_num),
++		       gpio->base_addr + ZYNQ_GPIO_INTSTS_OFFSET(bank_num));
++	raw_spin_unlock(&zynq_gpio_lock);
++}
++
++static void zynq_gpio_release_irq(struct irq_data *irq_data)
++{
++	unsigned int device_pin_num, bank_num, bank_pin_num;
++	struct zynq_gpio *gpio =
++		gpiochip_get_data(irq_data_get_irq_chip_data(irq_data));
++
++	device_pin_num = irq_data->hwirq;
++	zynq_gpio_get_bank_pin(device_pin_num, &bank_num, &bank_pin_num, gpio);
++	writel_relaxed(BIT(bank_pin_num),
++		       gpio->base_addr + ZYNQ_GPIO_INTEN_OFFSET(bank_num));
++}
++
++#endif /* CONFIG_IPIPE */
++
+ /* irq chip descriptor */
+ static struct irq_chip zynq_gpio_level_irqchip = {
+-	.name		= DRIVER_NAME,
++	.name		= DRIVER_NAME "-level",
+ 	.irq_enable	= zynq_gpio_irq_enable,
+ 	.irq_eoi	= zynq_gpio_irq_ack,
++#ifdef CONFIG_IPIPE
++	.irq_hold	= zynq_gpio_hold_irq,
++	.irq_release	= zynq_gpio_release_irq,
++#endif
+ 	.irq_mask	= zynq_gpio_irq_mask,
+ 	.irq_unmask	= zynq_gpio_irq_unmask,
+ 	.irq_set_type	= zynq_gpio_set_irq_type,
+ 	.irq_set_wake	= zynq_gpio_set_wake,
+ 	.flags		= IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
+-			  IRQCHIP_MASK_ON_SUSPEND,
++			  IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static struct irq_chip zynq_gpio_edge_irqchip = {
+-	.name		= DRIVER_NAME,
++	.name		= DRIVER_NAME "-edge",
+ 	.irq_enable	= zynq_gpio_irq_enable,
++#ifdef CONFIG_IPIPE
++	.irq_mask_ack	= zynq_gpio_hold_irq,
++#else
+ 	.irq_ack	= zynq_gpio_irq_ack,
++#endif
+ 	.irq_mask	= zynq_gpio_irq_mask,
+ 	.irq_unmask	= zynq_gpio_irq_unmask,
+ 	.irq_set_type	= zynq_gpio_set_irq_type,
+ 	.irq_set_wake	= zynq_gpio_set_wake,
+-	.flags		= IRQCHIP_MASK_ON_SUSPEND,
++	.flags		= IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
+@@ -572,7 +629,7 @@ static void zynq_gpio_handle_bank_irq(st
+ 		unsigned int gpio_irq;
+ 
+ 		gpio_irq = irq_find_mapping(irqdomain, offset + bank_offset);
+-		generic_handle_irq(gpio_irq);
++		ipipe_handle_demuxed_irq(gpio_irq);
+ 	}
+ }
+ 
+diff -uprN kernel/drivers/gpu/ipu-v3/ipu-common.c kernel_new/drivers/gpu/ipu-v3/ipu-common.c
+--- kernel/drivers/gpu/ipu-v3/ipu-common.c	2020-12-21 21:59:18.000000000 +0800
++++ kernel_new/drivers/gpu/ipu-v3/ipu-common.c	2021-04-01 18:28:07.663863278 +0800
+@@ -1084,7 +1084,7 @@ static void ipu_irq_handle(struct ipu_so
+ 			irq = irq_linear_revmap(ipu->domain,
+ 						regs[i] * 32 + bit);
+ 			if (irq)
+-				generic_handle_irq(irq);
++				ipipe_handle_demuxed_irq(irq);
+ 		}
+ 	}
+ }
+@@ -1308,6 +1308,7 @@ static int ipu_irq_init(struct ipu_soc *
+ 		ct->chip.irq_ack = irq_gc_ack_set_bit;
+ 		ct->chip.irq_mask = irq_gc_mask_clr_bit;
+ 		ct->chip.irq_unmask = irq_gc_mask_set_bit;
++		ct->chip.flags = IRQCHIP_PIPELINE_SAFE;
+ 		ct->regs.ack = IPU_INT_STAT(i / 32);
+ 		ct->regs.mask = IPU_INT_CTRL(i / 32);
+ 	}
+diff -uprN kernel/drivers/gpu/ipu-v3/ipu-prv.h kernel_new/drivers/gpu/ipu-v3/ipu-prv.h
+--- kernel/drivers/gpu/ipu-v3/ipu-prv.h	2020-12-21 21:59:18.000000000 +0800
++++ kernel_new/drivers/gpu/ipu-v3/ipu-prv.h	2021-04-01 18:28:07.663863278 +0800
+@@ -179,7 +179,7 @@ struct ipu_soc {
+ 	struct device		*dev;
+ 	const struct ipu_devtype	*devtype;
+ 	enum ipuv3_type		ipu_type;
+-	spinlock_t		lock;
++	ipipe_spinlock_t	lock;
+ 	struct mutex		channel_lock;
+ 	struct list_head	channels;
+ 
+diff -uprN kernel/drivers/irqchip/irq-atmel-aic5.c kernel_new/drivers/irqchip/irq-atmel-aic5.c
+--- kernel/drivers/irqchip/irq-atmel-aic5.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-atmel-aic5.c	2021-04-01 18:28:07.663863278 +0800
+@@ -80,7 +80,7 @@ aic5_handle(struct pt_regs *regs)
+ 	if (!irqstat)
+ 		irq_reg_writel(bgc, 0, AT91_AIC5_EOICR);
+ 	else
+-		handle_domain_irq(aic5_domain, irqnr, regs);
++		ipipe_handle_domain_irq(aic5_domain, irqnr, regs);
+ }
+ 
+ static void aic5_mask(struct irq_data *d)
+@@ -88,16 +88,18 @@ static void aic5_mask(struct irq_data *d
+ 	struct irq_domain *domain = d->domain;
+ 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
++	unsigned long flags;
+ 
+ 	/*
+ 	 * Disable interrupt on AIC5. We always take the lock of the
+ 	 * first irq chip as all chips share the same registers.
+ 	 */
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
++	ipipe_lock_irq(d->irq);
+ 	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+ 	irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
+ 	gc->mask_cache &= ~d->mask;
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ }
+ 
+ static void aic5_unmask(struct irq_data *d)
+@@ -105,28 +107,59 @@ static void aic5_unmask(struct irq_data
+ 	struct irq_domain *domain = d->domain;
+ 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
++	unsigned long flags;
+ 
+ 	/*
+ 	 * Enable interrupt on AIC5. We always take the lock of the
+ 	 * first irq chip as all chips share the same registers.
+ 	 */
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+ 	irq_reg_writel(gc, 1, AT91_AIC5_IECR);
+ 	gc->mask_cache |= d->mask;
+-	irq_gc_unlock(bgc);
++	ipipe_unlock_irq(d->irq);
++	irq_gc_unlock(bgc, flags);
++}
++
++#ifdef CONFIG_IPIPE
++
++static void aic5_hold(struct irq_data *d)
++{
++	struct irq_domain *domain = d->domain;
++	struct irq_domain_chip_generic *dgc = domain->gc;
++	struct irq_chip_generic *gc = dgc->gc[0];
++
++	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
++	irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
++	irq_reg_writel(gc, 0, AT91_AIC5_EOICR);
++}
++
++static void aic5_release(struct irq_data *d)
++{
++	struct irq_domain *domain = d->domain;
++	struct irq_domain_chip_generic *dgc = domain->gc;
++	struct irq_chip_generic *gc = dgc->gc[0];
++	unsigned long flags;
++
++	flags = irq_gc_lock(gc);
++	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
++	irq_reg_writel(gc, 1, AT91_AIC5_IECR);
++	irq_gc_unlock(gc, flags);
+ }
+ 
++#endif
++
+ static int aic5_retrigger(struct irq_data *d)
+ {
+ 	struct irq_domain *domain = d->domain;
+ 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
++	unsigned long flags;
+ 
+ 	/* Enable interrupt on AIC5 */
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 	irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR);
+ 	irq_reg_writel(bgc, 1, AT91_AIC5_ISCR);
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ 
+ 	return 0;
+ }
+@@ -135,16 +168,17 @@ static int aic5_set_type(struct irq_data
+ {
+ 	struct irq_domain *domain = d->domain;
+ 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
++	unsigned long flags;
+ 	unsigned int smr;
+ 	int ret;
+ 
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 	irq_reg_writel(bgc, d->hwirq, AT91_AIC5_SSR);
+ 	smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
+ 	ret = aic_common_set_type(d, type, &smr);
+ 	if (!ret)
+ 		irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ 
+ 	return ret;
+ }
+@@ -160,6 +194,7 @@ static void aic5_suspend(struct irq_data
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	int i;
+ 	u32 mask;
++	unsigned long flags;
+ 
+ 	if (smr_cache)
+ 		for (i = 0; i < domain->revmap_size; i++) {
+@@ -167,7 +202,7 @@ static void aic5_suspend(struct irq_data
+ 			smr_cache[i] = irq_reg_readl(bgc, AT91_AIC5_SMR);
+ 		}
+ 
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 	for (i = 0; i < dgc->irqs_per_chip; i++) {
+ 		mask = 1 << i;
+ 		if ((mask & gc->mask_cache) == (mask & gc->wake_active))
+@@ -179,7 +214,7 @@ static void aic5_suspend(struct irq_data
+ 		else
+ 			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
+ 	}
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ }
+ 
+ static void aic5_resume(struct irq_data *d)
+@@ -190,8 +225,9 @@ static void aic5_resume(struct irq_data
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	int i;
+ 	u32 mask;
++	unsigned long flags;
+ 
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 
+ 	if (smr_cache) {
+ 		irq_reg_writel(bgc, 0xffffffff, AT91_AIC5_SPU);
+@@ -215,7 +251,7 @@ static void aic5_resume(struct irq_data
+ 		else
+ 			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
+ 	}
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ }
+ 
+ static void aic5_pm_shutdown(struct irq_data *d)
+@@ -224,15 +260,16 @@ static void aic5_pm_shutdown(struct irq_
+ 	struct irq_domain_chip_generic *dgc = domain->gc;
+ 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(domain, 0);
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
++	unsigned long flags;
+ 	int i;
+ 
+-	irq_gc_lock(bgc);
++	flags = irq_gc_lock(bgc);
+ 	for (i = 0; i < dgc->irqs_per_chip; i++) {
+ 		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
+ 		irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
+ 		irq_reg_writel(bgc, 1, AT91_AIC5_ICCR);
+ 	}
+-	irq_gc_unlock(bgc);
++	irq_gc_unlock(bgc, flags);
+ }
+ #else
+ #define aic5_suspend		NULL
+@@ -349,6 +386,11 @@ static int __init aic5_of_init(struct de
+ 		gc->chip_types[0].chip.irq_suspend = aic5_suspend;
+ 		gc->chip_types[0].chip.irq_resume = aic5_resume;
+ 		gc->chip_types[0].chip.irq_pm_shutdown = aic5_pm_shutdown;
++#ifdef CONFIG_IPIPE
++		gc->chip_types[0].chip.irq_hold	= aic5_hold;
++		gc->chip_types[0].chip.irq_release = aic5_release;
++		gc->chip_types[0].chip.flags = IRQCHIP_PIPELINE_SAFE;
++#endif
+ 	}
+ 
+ 	aic5_hw_init(domain);
+diff -uprN kernel/drivers/irqchip/irq-atmel-aic.c kernel_new/drivers/irqchip/irq-atmel-aic.c
+--- kernel/drivers/irqchip/irq-atmel-aic.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-atmel-aic.c	1970-01-01 08:00:00.000000000 +0800
+@@ -1,274 +0,0 @@
+-/*
+- * Atmel AT91 AIC (Advanced Interrupt Controller) driver
+- *
+- *  Copyright (C) 2004 SAN People
+- *  Copyright (C) 2004 ATMEL
+- *  Copyright (C) Rick Bronson
+- *  Copyright (C) 2014 Free Electrons
+- *
+- *  Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+- *
+- * This file is licensed under the terms of the GNU General Public
+- * License version 2.  This program is licensed "as is" without any
+- * warranty of any kind, whether express or implied.
+- */
+-
+-#include <linux/init.h>
+-#include <linux/module.h>
+-#include <linux/mm.h>
+-#include <linux/bitmap.h>
+-#include <linux/types.h>
+-#include <linux/irq.h>
+-#include <linux/irqchip.h>
+-#include <linux/of.h>
+-#include <linux/of_address.h>
+-#include <linux/of_irq.h>
+-#include <linux/irqdomain.h>
+-#include <linux/err.h>
+-#include <linux/slab.h>
+-#include <linux/io.h>
+-
+-#include <asm/exception.h>
+-#include <asm/mach/irq.h>
+-
+-#include "irq-atmel-aic-common.h"
+-
+-/* Number of irq lines managed by AIC */
+-#define NR_AIC_IRQS	32
+-
+-#define AT91_AIC_SMR(n)			((n) * 4)
+-
+-#define AT91_AIC_SVR(n)			(0x80 + ((n) * 4))
+-#define AT91_AIC_IVR			0x100
+-#define AT91_AIC_FVR			0x104
+-#define AT91_AIC_ISR			0x108
+-
+-#define AT91_AIC_IPR			0x10c
+-#define AT91_AIC_IMR			0x110
+-#define AT91_AIC_CISR			0x114
+-
+-#define AT91_AIC_IECR			0x120
+-#define AT91_AIC_IDCR			0x124
+-#define AT91_AIC_ICCR			0x128
+-#define AT91_AIC_ISCR			0x12c
+-#define AT91_AIC_EOICR			0x130
+-#define AT91_AIC_SPU			0x134
+-#define AT91_AIC_DCR			0x138
+-
+-static struct irq_domain *aic_domain;
+-
+-static asmlinkage void __exception_irq_entry
+-aic_handle(struct pt_regs *regs)
+-{
+-	struct irq_domain_chip_generic *dgc = aic_domain->gc;
+-	struct irq_chip_generic *gc = dgc->gc[0];
+-	u32 irqnr;
+-	u32 irqstat;
+-
+-	irqnr = irq_reg_readl(gc, AT91_AIC_IVR);
+-	irqstat = irq_reg_readl(gc, AT91_AIC_ISR);
+-
+-	if (!irqstat)
+-		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
+-	else
+-		handle_domain_irq(aic_domain, irqnr, regs);
+-}
+-
+-static int aic_retrigger(struct irq_data *d)
+-{
+-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+-
+-	/* Enable interrupt on AIC5 */
+-	irq_gc_lock(gc);
+-	irq_reg_writel(gc, d->mask, AT91_AIC_ISCR);
+-	irq_gc_unlock(gc);
+-
+-	return 0;
+-}
+-
+-static int aic_set_type(struct irq_data *d, unsigned type)
+-{
+-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+-	unsigned int smr;
+-	int ret;
+-
+-	smr = irq_reg_readl(gc, AT91_AIC_SMR(d->hwirq));
+-	ret = aic_common_set_type(d, type, &smr);
+-	if (ret)
+-		return ret;
+-
+-	irq_reg_writel(gc, smr, AT91_AIC_SMR(d->hwirq));
+-
+-	return 0;
+-}
+-
+-#ifdef CONFIG_PM
+-static void aic_suspend(struct irq_data *d)
+-{
+-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+-
+-	irq_gc_lock(gc);
+-	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR);
+-	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR);
+-	irq_gc_unlock(gc);
+-}
+-
+-static void aic_resume(struct irq_data *d)
+-{
+-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+-
+-	irq_gc_lock(gc);
+-	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR);
+-	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR);
+-	irq_gc_unlock(gc);
+-}
+-
+-static void aic_pm_shutdown(struct irq_data *d)
+-{
+-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+-
+-	irq_gc_lock(gc);
+-	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+-	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
+-	irq_gc_unlock(gc);
+-}
+-#else
+-#define aic_suspend		NULL
+-#define aic_resume		NULL
+-#define aic_pm_shutdown		NULL
+-#endif /* CONFIG_PM */
+-
+-static void __init aic_hw_init(struct irq_domain *domain)
+-{
+-	struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
+-	int i;
+-
+-	/*
+-	 * Perform 8 End Of Interrupt Command to make sure AIC
+-	 * will not Lock out nIRQ
+-	 */
+-	for (i = 0; i < 8; i++)
+-		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
+-
+-	/*
+-	 * Spurious Interrupt ID in Spurious Vector Register.
+-	 * When there is no current interrupt, the IRQ Vector Register
+-	 * reads the value stored in AIC_SPU
+-	 */
+-	irq_reg_writel(gc, 0xffffffff, AT91_AIC_SPU);
+-
+-	/* No debugging in AIC: Debug (Protect) Control Register */
+-	irq_reg_writel(gc, 0, AT91_AIC_DCR);
+-
+-	/* Disable and clear all interrupts initially */
+-	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+-	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
+-
+-	for (i = 0; i < 32; i++)
+-		irq_reg_writel(gc, i, AT91_AIC_SVR(i));
+-}
+-
+-static int aic_irq_domain_xlate(struct irq_domain *d,
+-				struct device_node *ctrlr,
+-				const u32 *intspec, unsigned int intsize,
+-				irq_hw_number_t *out_hwirq,
+-				unsigned int *out_type)
+-{
+-	struct irq_domain_chip_generic *dgc = d->gc;
+-	struct irq_chip_generic *gc;
+-	unsigned long flags;
+-	unsigned smr;
+-	int idx;
+-	int ret;
+-
+-	if (!dgc)
+-		return -EINVAL;
+-
+-	ret = aic_common_irq_domain_xlate(d, ctrlr, intspec, intsize,
+-					  out_hwirq, out_type);
+-	if (ret)
+-		return ret;
+-
+-	idx = intspec[0] / dgc->irqs_per_chip;
+-	if (idx >= dgc->num_chips)
+-		return -EINVAL;
+-
+-	gc = dgc->gc[idx];
+-
+-	irq_gc_lock_irqsave(gc, flags);
+-	smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
+-	aic_common_set_priority(intspec[2], &smr);
+-	irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
+-	irq_gc_unlock_irqrestore(gc, flags);
+-
+-	return ret;
+-}
+-
+-static const struct irq_domain_ops aic_irq_ops = {
+-	.map	= irq_map_generic_chip,
+-	.xlate	= aic_irq_domain_xlate,
+-};
+-
+-static void __init at91rm9200_aic_irq_fixup(void)
+-{
+-	aic_common_rtc_irq_fixup();
+-}
+-
+-static void __init at91sam9260_aic_irq_fixup(void)
+-{
+-	aic_common_rtt_irq_fixup();
+-}
+-
+-static void __init at91sam9g45_aic_irq_fixup(void)
+-{
+-	aic_common_rtc_irq_fixup();
+-	aic_common_rtt_irq_fixup();
+-}
+-
+-static const struct of_device_id aic_irq_fixups[] __initconst = {
+-	{ .compatible = "atmel,at91rm9200", .data = at91rm9200_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9g45", .data = at91sam9g45_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9n12", .data = at91rm9200_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9rl", .data = at91sam9g45_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9x5", .data = at91rm9200_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9260", .data = at91sam9260_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9261", .data = at91sam9260_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9263", .data = at91sam9260_aic_irq_fixup },
+-	{ .compatible = "atmel,at91sam9g20", .data = at91sam9260_aic_irq_fixup },
+-	{ /* sentinel */ },
+-};
+-
+-static int __init aic_of_init(struct device_node *node,
+-			      struct device_node *parent)
+-{
+-	struct irq_chip_generic *gc;
+-	struct irq_domain *domain;
+-
+-	if (aic_domain)
+-		return -EEXIST;
+-
+-	domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic",
+-				    NR_AIC_IRQS, aic_irq_fixups);
+-	if (IS_ERR(domain))
+-		return PTR_ERR(domain);
+-
+-	aic_domain = domain;
+-	gc = irq_get_domain_generic_chip(domain, 0);
+-
+-	gc->chip_types[0].regs.eoi = AT91_AIC_EOICR;
+-	gc->chip_types[0].regs.enable = AT91_AIC_IECR;
+-	gc->chip_types[0].regs.disable = AT91_AIC_IDCR;
+-	gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
+-	gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
+-	gc->chip_types[0].chip.irq_retrigger = aic_retrigger;
+-	gc->chip_types[0].chip.irq_set_type = aic_set_type;
+-	gc->chip_types[0].chip.irq_suspend = aic_suspend;
+-	gc->chip_types[0].chip.irq_resume = aic_resume;
+-	gc->chip_types[0].chip.irq_pm_shutdown = aic_pm_shutdown;
+-
+-	aic_hw_init(domain);
+-	set_handle_irq(aic_handle);
+-
+-	return 0;
+-}
+-IRQCHIP_DECLARE(at91rm9200_aic, "atmel,at91rm9200-aic", aic_of_init);
+diff -uprN kernel/drivers/irqchip/irq-bcm2835.c kernel_new/drivers/irqchip/irq-bcm2835.c
+--- kernel/drivers/irqchip/irq-bcm2835.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-bcm2835.c	2021-04-01 18:28:07.663863278 +0800
+@@ -110,7 +110,12 @@ static void armctrl_unmask_irq(struct ir
+ static struct irq_chip armctrl_chip = {
+ 	.name = "ARMCTRL-level",
+ 	.irq_mask = armctrl_mask_irq,
+-	.irq_unmask = armctrl_unmask_irq
++	.irq_unmask = armctrl_unmask_irq,
++#ifdef CONFIG_IPIPE
++	.irq_hold = armctrl_mask_irq,
++	.irq_release = armctrl_unmask_irq,
++#endif
++	.flags	     = IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int armctrl_xlate(struct irq_domain *d, struct device_node *ctrlr,
+@@ -240,7 +245,7 @@ static void __exception_irq_entry bcm283
+ 	u32 hwirq;
+ 
+ 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
+-		handle_domain_irq(intc.domain, hwirq, regs);
++		ipipe_handle_domain_irq(intc.domain, hwirq, regs);
+ }
+ 
+ static void bcm2836_chained_handle_irq(struct irq_desc *desc)
+@@ -248,7 +253,7 @@ static void bcm2836_chained_handle_irq(s
+ 	u32 hwirq;
+ 
+ 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
+-		generic_handle_irq(irq_linear_revmap(intc.domain, hwirq));
++		ipipe_handle_demuxed_irq(irq_linear_revmap(intc.domain, hwirq));
+ }
+ 
+ IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic",
+diff -uprN kernel/drivers/irqchip/irq-bcm2836.c kernel_new/drivers/irqchip/irq-bcm2836.c
+--- kernel/drivers/irqchip/irq-bcm2836.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-bcm2836.c	2021-04-01 18:28:07.663863278 +0800
+@@ -48,40 +48,68 @@ static void bcm2836_arm_irqchip_unmask_p
+ 	writel(readl(reg) | BIT(bit), reg);
+ }
+ 
+-static void bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d)
++static void __bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d)
+ {
+ 	bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0,
+ 					     d->hwirq - LOCAL_IRQ_CNTPSIRQ,
+-					     smp_processor_id());
++					     raw_smp_processor_id());
+ }
+ 
+-static void bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d)
++static void bcm2836_arm_irqchip_mask_timer_irq(struct irq_data *d)
++{
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++	__bcm2836_arm_irqchip_mask_timer_irq(d);
++	hard_local_irq_restore(flags);
++}
++
++static void __bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d)
+ {
+ 	bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_TIMER_INT_CONTROL0,
+ 					       d->hwirq - LOCAL_IRQ_CNTPSIRQ,
+-					       smp_processor_id());
++					       raw_smp_processor_id());
++}
++
++static void bcm2836_arm_irqchip_unmask_timer_irq(struct irq_data *d)
++{
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++	__bcm2836_arm_irqchip_unmask_timer_irq(d);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static struct irq_chip bcm2836_arm_irqchip_timer = {
+ 	.name		= "bcm2836-timer",
+ 	.irq_mask	= bcm2836_arm_irqchip_mask_timer_irq,
+ 	.irq_unmask	= bcm2836_arm_irqchip_unmask_timer_irq,
++#ifdef CONFIG_IPIPE
++	.irq_hold	= __bcm2836_arm_irqchip_mask_timer_irq,
++	.irq_release	= __bcm2836_arm_irqchip_unmask_timer_irq,
++#endif
++	.flags		= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static void bcm2836_arm_irqchip_mask_pmu_irq(struct irq_data *d)
+ {
+-	writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_CLR);
++	writel(1 << raw_smp_processor_id(), intc.base + LOCAL_PM_ROUTING_CLR);
+ }
+ 
+ static void bcm2836_arm_irqchip_unmask_pmu_irq(struct irq_data *d)
+ {
+-	writel(1 << smp_processor_id(), intc.base + LOCAL_PM_ROUTING_SET);
++	writel(1 << raw_smp_processor_id(), intc.base + LOCAL_PM_ROUTING_SET);
+ }
+ 
+ static struct irq_chip bcm2836_arm_irqchip_pmu = {
+ 	.name		= "bcm2836-pmu",
+ 	.irq_mask	= bcm2836_arm_irqchip_mask_pmu_irq,
+ 	.irq_unmask	= bcm2836_arm_irqchip_unmask_pmu_irq,
++#ifdef CONFIG_IPIPE
++	.irq_hold	= bcm2836_arm_irqchip_mask_pmu_irq,
++	.irq_release	= bcm2836_arm_irqchip_unmask_pmu_irq,
++#endif
++	.flags		= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static void bcm2836_arm_irqchip_mask_gpu_irq(struct irq_data *d)
+@@ -96,6 +124,11 @@ static struct irq_chip bcm2836_arm_irqch
+ 	.name		= "bcm2836-gpu",
+ 	.irq_mask	= bcm2836_arm_irqchip_mask_gpu_irq,
+ 	.irq_unmask	= bcm2836_arm_irqchip_unmask_gpu_irq,
++#ifdef CONFIG_IPIPE
++	.irq_hold	= bcm2836_arm_irqchip_mask_gpu_irq,
++	.irq_release	= bcm2836_arm_irqchip_unmask_gpu_irq,
++#endif
++	.flags		= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int bcm2836_map(struct irq_domain *d, unsigned int irq,
+@@ -132,7 +165,7 @@ static int bcm2836_map(struct irq_domain
+ static void
+ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
+ {
+-	int cpu = smp_processor_id();
++	int cpu = raw_smp_processor_id();
+ 	u32 stat;
+ 
+ 	stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
+@@ -144,12 +177,12 @@ __exception_irq_entry bcm2836_arm_irqchi
+ 		u32 ipi = ffs(mbox_val) - 1;
+ 
+ 		writel(1 << ipi, mailbox0);
+-		handle_IPI(ipi, regs);
++		ipipe_handle_multi_ipi(ipi, regs);
+ #endif
+ 	} else if (stat) {
+ 		u32 hwirq = ffs(stat) - 1;
+ 
+-		handle_domain_irq(intc.domain, hwirq, regs);
++		ipipe_handle_domain_irq(intc.domain, hwirq, regs);
+ 	}
+ }
+ 
+diff -uprN kernel/drivers/irqchip/irq-bcm7120-l2.c kernel_new/drivers/irqchip/irq-bcm7120-l2.c
+--- kernel/drivers/irqchip/irq-bcm7120-l2.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-bcm7120-l2.c	2021-04-01 18:28:07.663863278 +0800
+@@ -61,6 +61,7 @@ static void bcm7120_l2_intc_irq_handle(s
+ 	struct bcm7120_l2_intc_data *b = data->b;
+ 	struct irq_chip *chip = irq_desc_get_chip(desc);
+ 	unsigned int idx;
++	unsigned long flags;
+ 
+ 	chained_irq_enter(chip, desc);
+ 
+@@ -71,11 +72,11 @@ static void bcm7120_l2_intc_irq_handle(s
+ 		unsigned long pending;
+ 		int hwirq;
+ 
+-		irq_gc_lock(gc);
++		flags = irq_gc_lock(gc);
+ 		pending = irq_reg_readl(gc, b->stat_offset[idx]) &
+ 					    gc->mask_cache &
+ 					    data->irq_map_mask[idx];
+-		irq_gc_unlock(gc);
++		irq_gc_unlock(gc, flags);
+ 
+ 		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
+ 			generic_handle_irq(irq_find_mapping(b->domain,
+@@ -90,22 +91,24 @@ static void bcm7120_l2_intc_suspend(stru
+ {
+ 	struct bcm7120_l2_intc_data *b = gc->private;
+ 	struct irq_chip_type *ct = gc->chip_types;
++        unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	if (b->can_wake)
+ 		irq_reg_writel(gc, gc->mask_cache | gc->wake_active,
+ 			       ct->regs.mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void bcm7120_l2_intc_resume(struct irq_chip_generic *gc)
+ {
+ 	struct irq_chip_type *ct = gc->chip_types;
++        unsigned long flags;
+ 
+ 	/* Restore the saved mask */
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, gc->mask_cache, ct->regs.mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static int bcm7120_l2_intc_init_one(struct device_node *dn,
+diff -uprN kernel/drivers/irqchip/irq-brcmstb-l2.c kernel_new/drivers/irqchip/irq-brcmstb-l2.c
+--- kernel/drivers/irqchip/irq-brcmstb-l2.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-brcmstb-l2.c	2021-04-01 18:28:07.663863278 +0800
+@@ -131,7 +131,7 @@ static void brcmstb_l2_intc_suspend(stru
+ 	struct brcmstb_l2_intc_data *b = gc->private;
+ 	unsigned long flags;
+ 
+-	irq_gc_lock_irqsave(gc, flags);
++	flags = irq_gc_lock(gc);
+ 	/* Save the current mask */
+ 	b->saved_mask = irq_reg_readl(gc, ct->regs.mask);
+ 
+@@ -140,7 +140,7 @@ static void brcmstb_l2_intc_suspend(stru
+ 		irq_reg_writel(gc, ~gc->wake_active, ct->regs.disable);
+ 		irq_reg_writel(gc, gc->wake_active, ct->regs.enable);
+ 	}
+-	irq_gc_unlock_irqrestore(gc, flags);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static void brcmstb_l2_intc_resume(struct irq_data *d)
+@@ -150,7 +150,7 @@ static void brcmstb_l2_intc_resume(struc
+ 	struct brcmstb_l2_intc_data *b = gc->private;
+ 	unsigned long flags;
+ 
+-	irq_gc_lock_irqsave(gc, flags);
++	flags = irq_gc_lock(gc);
+ 	if (ct->chip.irq_ack) {
+ 		/* Clear unmasked non-wakeup interrupts */
+ 		irq_reg_writel(gc, ~b->saved_mask & ~gc->wake_active,
+@@ -160,7 +160,7 @@ static void brcmstb_l2_intc_resume(struc
+ 	/* Restore the saved mask */
+ 	irq_reg_writel(gc, b->saved_mask, ct->regs.disable);
+ 	irq_reg_writel(gc, ~b->saved_mask, ct->regs.enable);
+-	irq_gc_unlock_irqrestore(gc, flags);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
+diff -uprN kernel/drivers/irqchip/irq-crossbar.c kernel_new/drivers/irqchip/irq-crossbar.c
+--- kernel/drivers/irqchip/irq-crossbar.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-crossbar.c	2021-04-01 18:28:07.663863278 +0800
+@@ -16,6 +16,7 @@
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/slab.h>
++#include <linux/ipipe.h>
+ 
+ #define IRQ_FREE	-1
+ #define IRQ_RESERVED	-2
+@@ -69,10 +70,15 @@ static struct irq_chip crossbar_chip = {
+ 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+ 	.irq_set_type		= irq_chip_set_type_parent,
+ 	.flags			= IRQCHIP_MASK_ON_SUSPEND |
+-				  IRQCHIP_SKIP_SET_WAKE,
++				  IRQCHIP_SKIP_SET_WAKE |
++				  IRQCHIP_PIPELINE_SAFE,
+ #ifdef CONFIG_SMP
+ 	.irq_set_affinity	= irq_chip_set_affinity_parent,
+ #endif
++#ifdef CONFIG_IPIPE
++	.irq_hold		= irq_chip_hold_parent,
++	.irq_release		= irq_chip_release_parent,
++#endif
+ };
+ 
+ static int allocate_gic_irq(struct irq_domain *domain, unsigned virq,
+diff -uprN kernel/drivers/irqchip/irq-dw-apb-ictl.c kernel_new/drivers/irqchip/irq-dw-apb-ictl.c
+--- kernel/drivers/irqchip/irq-dw-apb-ictl.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-dw-apb-ictl.c	2021-04-01 18:28:07.664863277 +0800
+@@ -17,6 +17,7 @@
+ #include <linux/irqchip/chained_irq.h>
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
++#include <linux/ipipe.h>
+ 
+ #define APB_INT_ENABLE_L	0x00
+ #define APB_INT_ENABLE_H	0x04
+@@ -42,7 +43,7 @@ static void dw_apb_ictl_handler(struct i
+ 			u32 hwirq = ffs(stat) - 1;
+ 			u32 virq = irq_find_mapping(d, gc->irq_base + hwirq);
+ 
+-			generic_handle_irq(virq);
++			ipipe_handle_demuxed_irq(virq);
+ 			stat &= ~(1 << hwirq);
+ 		}
+ 	}
+@@ -55,11 +56,12 @@ static void dw_apb_ictl_resume(struct ir
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	writel_relaxed(~0, gc->reg_base + ct->regs.enable);
+ 	writel_relaxed(*ct->mask_cache, gc->reg_base + ct->regs.mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ #else
+ #define dw_apb_ictl_resume	NULL
+@@ -144,6 +146,7 @@ static int __init dw_apb_ictl_init(struc
+ 		gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit;
+ 		gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit;
+ 		gc->chip_types[0].chip.irq_resume = dw_apb_ictl_resume;
++		gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE;
+ 	}
+ 
+ 	irq_set_chained_handler_and_data(irq, dw_apb_ictl_handler, domain);
+diff -uprN kernel/drivers/irqchip/irq-gic.c kernel_new/drivers/irqchip/irq-gic.c
+--- kernel/drivers/irqchip/irq-gic.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-gic.c	2021-04-01 18:28:07.664863277 +0800
+@@ -38,6 +38,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/percpu.h>
+ #include <linux/slab.h>
++#include <linux/ipipe.h>
+ #include <linux/irqchip.h>
+ #include <linux/irqchip/chained_irq.h>
+ #include <linux/irqchip/arm-gic.h>
+@@ -91,9 +92,17 @@ struct gic_chip_data {
+ #endif
+ };
+ 
++#ifdef CONFIG_IPIPE
++#define pipeline_lock(__flags)		do { (__flags) = hard_local_irq_save(); } while (0)
++#define pipeline_unlock(__flags)	hard_local_irq_restore(__flags)
++#else
++#define pipeline_lock(__flags)		do { (void)__flags; } while (0)
++#define pipeline_unlock(__flags)	do { (void)__flags; } while (0)
++#endif
++
+ #ifdef CONFIG_BL_SWITCHER
+ 
+-static DEFINE_RAW_SPINLOCK(cpu_map_lock);
++static IPIPE_DEFINE_RAW_SPINLOCK(cpu_map_lock);
+ 
+ #define gic_lock_irqsave(f)		\
+ 	raw_spin_lock_irqsave(&cpu_map_lock, (f))
+@@ -204,7 +213,12 @@ static int gic_peek_irq(struct irq_data
+ 
+ static void gic_mask_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	pipeline_lock(flags);
++	ipipe_lock_irq(d->irq);
+ 	gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
++	pipeline_unlock(flags);
+ }
+ 
+ static void gic_eoimode1_mask_irq(struct irq_data *d)
+@@ -224,7 +238,12 @@ static void gic_eoimode1_mask_irq(struct
+ 
+ static void gic_unmask_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	pipeline_lock(flags);
+ 	gic_poke_irq(d, GIC_DIST_ENABLE_SET);
++	ipipe_unlock_irq(d->irq);
++	pipeline_unlock(flags);
+ }
+ 
+ static void gic_eoi_irq(struct irq_data *d)
+@@ -241,6 +260,27 @@ static void gic_eoimode1_eoi_irq(struct
+ 	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE);
+ }
+ 
++#ifdef CONFIG_IPIPE
++static void gic_hold_irq(struct irq_data *d)
++{
++	struct irq_chip *chip = irq_data_get_irq_chip(d);
++
++	gic_poke_irq(d, GIC_DIST_ENABLE_CLEAR);
++
++	if (chip->irq_eoi == gic_eoimode1_eoi_irq) {
++		if (irqd_is_forwarded_to_vcpu(d))
++			gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR);
++		gic_eoimode1_eoi_irq(d);
++	} else
++		gic_eoi_irq(d);
++}
++
++static void gic_release_irq(struct irq_data *d)
++{
++	gic_poke_irq(d, GIC_DIST_ENABLE_SET);
++}
++#endif /* CONFIG_IPIPE */
++
+ static int gic_irq_set_irqchip_state(struct irq_data *d,
+ 				     enum irqchip_irq_state which, bool val)
+ {
+@@ -364,7 +404,7 @@ static void __exception_irq_entry gic_ha
+ 			if (static_branch_likely(&supports_deactivate_key))
+ 				writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
+ 			isb();
+-			handle_domain_irq(gic->domain, irqnr, regs);
++			ipipe_handle_domain_irq(gic->domain, irqnr, regs);
+ 			continue;
+ 		}
+ 		if (irqnr < 16) {
+@@ -380,7 +420,7 @@ static void __exception_irq_entry gic_ha
+ 			 * Pairs with the write barrier in gic_raise_softirq
+ 			 */
+ 			smp_rmb();
+-			handle_IPI(irqnr, regs);
++			ipipe_handle_multi_ipi(irqnr, regs);
+ #endif
+ 			continue;
+ 		}
+@@ -408,7 +448,7 @@ static void gic_handle_cascade_irq(struc
+ 		handle_bad_irq(desc);
+ 	} else {
+ 		isb();
+-		generic_handle_irq(cascade_irq);
++		ipipe_handle_demuxed_irq(cascade_irq);
+ 	}
+ 
+  out:
+@@ -420,11 +460,16 @@ static const struct irq_chip gic_chip =
+ 	.irq_unmask		= gic_unmask_irq,
+ 	.irq_eoi		= gic_eoi_irq,
+ 	.irq_set_type		= gic_set_type,
++#ifdef CONFIG_IPIPE
++	.irq_hold		= gic_hold_irq,
++	.irq_release		= gic_release_irq,
++#endif
+ 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+ 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+ 	.flags			= IRQCHIP_SET_TYPE_MASKED |
+ 				  IRQCHIP_SKIP_SET_WAKE |
+-				  IRQCHIP_MASK_ON_SUSPEND,
++				  IRQCHIP_MASK_ON_SUSPEND |
++				  IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
+@@ -482,7 +527,6 @@ static void gic_cpu_if_up(struct gic_chi
+ 	writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+ }
+ 
+-
+ static void gic_dist_init(struct gic_chip_data *gic)
+ {
+ 	unsigned int i;
+diff -uprN kernel/drivers/irqchip/irq-gic-v2m.c kernel_new/drivers/irqchip/irq-gic-v2m.c
+--- kernel/drivers/irqchip/irq-gic-v2m.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-gic-v2m.c	2021-04-01 18:28:07.664863277 +0800
+@@ -74,14 +74,22 @@ struct v2m_data {
+ 
+ static void gicv2m_mask_msi_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	flags = hard_cond_local_irq_save();
+ 	pci_msi_mask_irq(d);
+ 	irq_chip_mask_parent(d);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void gicv2m_unmask_msi_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	flags = hard_cond_local_irq_save();
+ 	pci_msi_unmask_irq(d);
+ 	irq_chip_unmask_parent(d);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static struct irq_chip gicv2m_msi_irq_chip = {
+@@ -90,6 +98,11 @@ static struct irq_chip gicv2m_msi_irq_ch
+ 	.irq_unmask		= gicv2m_unmask_msi_irq,
+ 	.irq_eoi		= irq_chip_eoi_parent,
+ 	.irq_write_msi_msg	= pci_msi_domain_write_msg,
++#ifdef CONFIG_IPIPE
++	.irq_hold		= irq_chip_hold_parent,
++	.irq_release		= irq_chip_release_parent,
++#endif
++	.flags			= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static struct msi_domain_info gicv2m_msi_domain_info = {
+@@ -120,6 +133,11 @@ static struct irq_chip gicv2m_irq_chip =
+ 	.irq_eoi		= irq_chip_eoi_parent,
+ 	.irq_set_affinity	= irq_chip_set_affinity_parent,
+ 	.irq_compose_msi_msg	= gicv2m_compose_msi_msg,
++#ifdef CONFIG_IPIPE
++	.irq_hold		= irq_chip_hold_parent,
++	.irq_release		= irq_chip_release_parent,
++#endif
++	.flags			= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
+@@ -236,6 +254,7 @@ static bool is_msi_spi_valid(u32 base, u
+ 
+ static struct irq_chip gicv2m_pmsi_irq_chip = {
+ 	.name			= "pMSI",
++	.flags			= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static struct msi_domain_ops gicv2m_pmsi_ops = {
+diff -uprN kernel/drivers/irqchip/irq-gic-v3.c kernel_new/drivers/irqchip/irq-gic-v3.c
+--- kernel/drivers/irqchip/irq-gic-v3.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-gic-v3.c	2021-04-02 09:09:37.322234347 +0800
+@@ -243,7 +243,12 @@ static void gic_poke_irq(struct irq_data
+ 
+ static void gic_mask_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	flags = hard_cond_local_irq_save();
++	ipipe_lock_irq(d->irq);
+ 	gic_poke_irq(d, GICD_ICENABLER);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void gic_eoimode1_mask_irq(struct irq_data *d)
+@@ -263,7 +268,12 @@ static void gic_eoimode1_mask_irq(struct
+ 
+ static void gic_unmask_irq(struct irq_data *d)
+ {
++	unsigned long flags;
++
++	flags = hard_cond_local_irq_save();
+ 	gic_poke_irq(d, GICD_ISENABLER);
++	ipipe_unlock_irq(d->irq);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static inline bool gic_supports_nmi(void)
+@@ -421,6 +431,27 @@ static void gic_eoimode1_eoi_irq(struct
+ 	gic_write_dir(gic_irq(d));
+ }
+ 
++#ifdef CONFIG_IPIPE
++static void gic_hold_irq(struct irq_data *d)
++{
++	struct irq_chip *chip = irq_data_get_irq_chip(d);
++
++	gic_poke_irq(d, GICD_ICENABLER);
++
++	if (chip->irq_eoi == gic_eoimode1_eoi_irq) {
++		if (irqd_is_forwarded_to_vcpu(d))
++			gic_poke_irq(d, GICD_ICACTIVER);
++		gic_eoimode1_eoi_irq(d);
++	} else
++		gic_eoi_irq(d);
++}
++
++static void gic_release_irq(struct irq_data *d)
++{
++	gic_poke_irq(d, GICD_ISENABLER);
++}
++#endif /* CONFIG_IPIPE */
++
+ static int gic_set_type(struct irq_data *d, unsigned int type)
+ {
+ 	unsigned int irq = gic_irq(d);
+@@ -537,7 +568,7 @@ static asmlinkage void __exception_irq_e
+ 		else
+ 			isb();
+ 
+-		err = handle_domain_irq(gic_data.domain, irqnr, regs);
++		err = ipipe_handle_domain_irq(gic_data.domain, irqnr, regs);
+ 		if (err) {
+ 			WARN_ONCE(true, "Unexpected interrupt received!\n");
+ 			gic_deactivate_unhandled(irqnr);
+@@ -556,7 +587,7 @@ static asmlinkage void __exception_irq_e
+ 		 * that any shared data read by handle_IPI will
+ 		 * be read after the ACK.
+ 		 */
+-		handle_IPI(irqnr, regs);
++		ipipe_handle_multi_ipi(irqnr, regs);
+ #else
+ 		WARN_ONCE(true, "Unexpected SGI received!\n");
+ #endif
+@@ -1270,6 +1301,10 @@ static struct irq_chip gic_chip = {
+ 	.irq_unmask		= gic_unmask_irq,
+ 	.irq_eoi		= gic_eoi_irq,
+ 	.irq_set_type		= gic_set_type,
++#ifdef CONFIG_IPIPE
++	.irq_hold		= gic_hold_irq,
++	.irq_release		= gic_release_irq,
++#endif
+ 	.irq_set_affinity	= gic_set_affinity,
+ 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+ 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+@@ -1277,6 +1312,7 @@ static struct irq_chip gic_chip = {
+ 	.irq_nmi_teardown	= gic_irq_nmi_teardown,
+ 	.flags			= IRQCHIP_SET_TYPE_MASKED |
+ 				  IRQCHIP_SKIP_SET_WAKE |
++				  IRQCHIP_PIPELINE_SAFE |
+ 				  IRQCHIP_MASK_ON_SUSPEND,
+ };
+ 
+@@ -1286,6 +1322,10 @@ static struct irq_chip gic_eoimode1_chip
+ 	.irq_unmask		= gic_unmask_irq,
+ 	.irq_eoi		= gic_eoimode1_eoi_irq,
+ 	.irq_set_type		= gic_set_type,
++#ifdef CONFIG_IPIPE
++	.irq_hold		= gic_hold_irq,
++	.irq_release		= gic_release_irq,
++#endif
+ 	.irq_set_affinity	= gic_set_affinity,
+ 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
+ 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
+@@ -1294,6 +1334,7 @@ static struct irq_chip gic_eoimode1_chip
+ 	.irq_nmi_teardown	= gic_irq_nmi_teardown,
+ 	.flags			= IRQCHIP_SET_TYPE_MASKED |
+ 				  IRQCHIP_SKIP_SET_WAKE |
++				  IRQCHIP_PIPELINE_SAFE |
+ 				  IRQCHIP_MASK_ON_SUSPEND,
+ };
+ 
+diff -uprN kernel/drivers/irqchip/irq-gic-v3.c.orig kernel_new/drivers/irqchip/irq-gic-v3.c.orig
+--- kernel/drivers/irqchip/irq-gic-v3.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-gic-v3.c.orig	2020-12-21 21:59:19.000000000 +0800
+@@ -0,0 +1,2118 @@
++/*
++ * Copyright (C) 2013-2017 ARM Limited, All Rights Reserved.
++ * Author: Marc Zyngier <marc.zyngier@arm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
++ */
++
++#define pr_fmt(fmt)	"GICv3: " fmt
++
++#include <linux/acpi.h>
++#include <linux/cpu.h>
++#include <linux/cpu_pm.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/irqdomain.h>
++#include <linux/of.h>
++#include <linux/of_address.h>
++#include <linux/of_irq.h>
++#include <linux/percpu.h>
++#include <linux/refcount.h>
++#include <linux/slab.h>
++
++#include <linux/irqchip.h>
++#include <linux/irqchip/arm-gic-common.h>
++#include <linux/irqchip/arm-gic-v3.h>
++#include <linux/irqchip/irq-partition-percpu.h>
++
++#include <asm/cputype.h>
++#include <asm/exception.h>
++#include <asm/smp_plat.h>
++#include <asm/virt.h>
++
++#include "irq-gic-common.h"
++
++#define GICD_INT_NMI_PRI	(GICD_INT_DEF_PRI & ~0x80)
++
++struct redist_region {
++	void __iomem		*redist_base;
++	phys_addr_t		phys_base;
++	bool			single_redist;
++};
++
++struct gic_chip_data {
++	struct fwnode_handle	*fwnode;
++	void __iomem		*dist_base;
++	struct redist_region	*redist_regions;
++	struct rdists		rdists;
++	struct irq_domain	*domain;
++	u64			redist_stride;
++	u32			nr_redist_regions;
++	bool			has_rss;
++	unsigned int		irq_nr;
++	struct partition_desc	*ppi_descs[16];
++};
++
++static struct gic_chip_data gic_data __read_mostly;
++static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
++
++/*
++ * The behaviours of RPR and PMR registers differ depending on the value of
++ * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the
++ * distributor and redistributors depends on whether security is enabled in the
++ * GIC.
++ *
++ * When security is enabled, non-secure priority values from the (re)distributor
++ * are presented to the GIC CPUIF as follow:
++ *     (GIC_(R)DIST_PRI[irq] >> 1) | 0x80;
++ *
++ * If SCR_EL3.FIQ == 1, the values writen to/read from PMR and RPR at non-secure
++ * EL1 are subject to a similar operation thus matching the priorities presented
++ * from the (re)distributor when security is enabled.
++ *
++ * see GICv3/GICv4 Architecture Specification (IHI0069D):
++ * - section 4.8.1 Non-secure accesses to register fields for Secure interrupt
++ *   priorities.
++ * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1
++ *   interrupt.
++ *
++ * For now, we only support pseudo-NMIs if we have non-secure view of
++ * priorities.
++ */
++static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
++
++/*
++ * Global static key controlling whether an update to PMR allowing more
++ * interrupts requires to be propagated to the redistributor (DSB SY).
++ * And this needs to be exported for modules to be able to enable
++ * interrupts...
++ */
++DEFINE_STATIC_KEY_FALSE(gic_pmr_sync);
++EXPORT_SYMBOL(gic_pmr_sync);
++
++/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
++static refcount_t ppi_nmi_refs[16];
++
++static struct gic_kvm_info gic_v3_kvm_info;
++static DEFINE_PER_CPU(bool, has_rss);
++
++#define MPIDR_RS(mpidr)			(((mpidr) & 0xF0UL) >> 4)
++#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdists.rdist))
++#define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
++#define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
++
++/* Our default, arbitrary priority value. Linux only uses one anyway. */
++#define DEFAULT_PMR_VALUE	0xf0
++
++static inline unsigned int gic_irq(struct irq_data *d)
++{
++	return d->hwirq;
++}
++
++static inline int gic_irq_in_rdist(struct irq_data *d)
++{
++	return gic_irq(d) < 32;
++}
++
++static inline void __iomem *gic_dist_base(struct irq_data *d)
++{
++	if (gic_irq_in_rdist(d))	/* SGI+PPI -> SGI_base for this CPU */
++		return gic_data_rdist_sgi_base();
++
++	if (d->hwirq <= 1023)		/* SPI -> dist_base */
++		return gic_data.dist_base;
++
++	return NULL;
++}
++
++static void gic_do_wait_for_rwp(void __iomem *base)
++{
++	u32 count = 1000000;	/* 1s! */
++
++	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
++		count--;
++		if (!count) {
++			pr_err_ratelimited("RWP timeout, gone fishing\n");
++			return;
++		}
++		cpu_relax();
++		udelay(1);
++	};
++}
++
++/* Wait for completion of a distributor change */
++static void gic_dist_wait_for_rwp(void)
++{
++	gic_do_wait_for_rwp(gic_data.dist_base);
++}
++
++/* Wait for completion of a redistributor change */
++static void gic_redist_wait_for_rwp(void)
++{
++	gic_do_wait_for_rwp(gic_data_rdist_rd_base());
++}
++
++#ifdef CONFIG_ARM64
++
++static u64 __maybe_unused gic_read_iar(void)
++{
++	if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154))
++		return gic_read_iar_cavium_thunderx();
++	else
++		return gic_read_iar_common();
++}
++#endif
++
++static void gic_enable_redist(bool enable)
++{
++	void __iomem *rbase;
++	u32 count = 1000000;	/* 1s! */
++	u32 val;
++
++	rbase = gic_data_rdist_rd_base();
++
++	val = readl_relaxed(rbase + GICR_WAKER);
++	if (enable)
++		/* Wake up this CPU redistributor */
++		val &= ~GICR_WAKER_ProcessorSleep;
++	else
++		val |= GICR_WAKER_ProcessorSleep;
++	writel_relaxed(val, rbase + GICR_WAKER);
++
++	if (!enable) {		/* Check that GICR_WAKER is writeable */
++		val = readl_relaxed(rbase + GICR_WAKER);
++		if (!(val & GICR_WAKER_ProcessorSleep))
++			return;	/* No PM support in this redistributor */
++	}
++
++	while (--count) {
++		val = readl_relaxed(rbase + GICR_WAKER);
++		if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep))
++			break;
++		cpu_relax();
++		udelay(1);
++	};
++	if (!count)
++		pr_err_ratelimited("redistributor failed to %s...\n",
++				   enable ? "wakeup" : "sleep");
++}
++
++/*
++ * Routines to disable, enable, EOI and route interrupts
++ */
++static int gic_peek_irq(struct irq_data *d, u32 offset)
++{
++	u32 mask = 1 << (gic_irq(d) % 32);
++	void __iomem *base;
++
++	if (gic_irq_in_rdist(d))
++		base = gic_data_rdist_sgi_base();
++	else
++		base = gic_data.dist_base;
++
++	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
++}
++
++static void gic_poke_irq(struct irq_data *d, u32 offset)
++{
++	u32 mask = 1 << (gic_irq(d) % 32);
++	void (*rwp_wait)(void);
++	void __iomem *base;
++
++	if (gic_irq_in_rdist(d)) {
++		base = gic_data_rdist_sgi_base();
++		rwp_wait = gic_redist_wait_for_rwp;
++	} else {
++		base = gic_data.dist_base;
++		rwp_wait = gic_dist_wait_for_rwp;
++	}
++
++	writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4);
++	rwp_wait();
++}
++
++static void gic_mask_irq(struct irq_data *d)
++{
++	gic_poke_irq(d, GICD_ICENABLER);
++}
++
++static void gic_eoimode1_mask_irq(struct irq_data *d)
++{
++	gic_mask_irq(d);
++	/*
++	 * When masking a forwarded interrupt, make sure it is
++	 * deactivated as well.
++	 *
++	 * This ensures that an interrupt that is getting
++	 * disabled/masked will not get "stuck", because there is
++	 * noone to deactivate it (guest is being terminated).
++	 */
++	if (irqd_is_forwarded_to_vcpu(d))
++		gic_poke_irq(d, GICD_ICACTIVER);
++}
++
++static void gic_unmask_irq(struct irq_data *d)
++{
++	gic_poke_irq(d, GICD_ISENABLER);
++}
++
++static inline bool gic_supports_nmi(void)
++{
++	return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
++	       static_branch_likely(&supports_pseudo_nmis);
++}
++
++bool gic_supports_pseudo_nmis(void)
++{
++	return gic_supports_nmi();
++}
++
++static int gic_irq_set_irqchip_state(struct irq_data *d,
++				     enum irqchip_irq_state which, bool val)
++{
++	u32 reg;
++
++	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
++		return -EINVAL;
++
++	switch (which) {
++	case IRQCHIP_STATE_PENDING:
++		reg = val ? GICD_ISPENDR : GICD_ICPENDR;
++		break;
++
++	case IRQCHIP_STATE_ACTIVE:
++		reg = val ? GICD_ISACTIVER : GICD_ICACTIVER;
++		break;
++
++	case IRQCHIP_STATE_MASKED:
++		reg = val ? GICD_ICENABLER : GICD_ISENABLER;
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	gic_poke_irq(d, reg);
++	return 0;
++}
++
++static int gic_irq_get_irqchip_state(struct irq_data *d,
++				     enum irqchip_irq_state which, bool *val)
++{
++	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
++		return -EINVAL;
++
++	switch (which) {
++	case IRQCHIP_STATE_PENDING:
++		*val = gic_peek_irq(d, GICD_ISPENDR);
++		break;
++
++	case IRQCHIP_STATE_ACTIVE:
++		*val = gic_peek_irq(d, GICD_ISACTIVER);
++		break;
++
++	case IRQCHIP_STATE_MASKED:
++		*val = !gic_peek_irq(d, GICD_ISENABLER);
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static void gic_irq_set_prio(struct irq_data *d, u8 prio)
++{
++	void __iomem *base = gic_dist_base(d);
++
++	writeb_relaxed(prio, base + GICD_IPRIORITYR + gic_irq(d));
++}
++
++static int gic_irq_nmi_setup(struct irq_data *d)
++{
++	struct irq_desc *desc = irq_to_desc(d->irq);
++
++	if (!gic_supports_nmi())
++		return -EINVAL;
++
++	if (gic_peek_irq(d, GICD_ISENABLER)) {
++		pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq);
++		return -EINVAL;
++	}
++
++	/*
++	 * A secondary irq_chip should be in charge of LPI request,
++	 * it should not be possible to get there
++	 */
++	if (WARN_ON(gic_irq(d) >= 8192))
++		return -EINVAL;
++
++	/* desc lock should already be held */
++	if (gic_irq(d) < 32) {
++		/* Setting up PPI as NMI, only switch handler for first NMI */
++		if (!refcount_inc_not_zero(&ppi_nmi_refs[gic_irq(d) - 16])) {
++			refcount_set(&ppi_nmi_refs[gic_irq(d) - 16], 1);
++			desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
++		}
++	} else {
++		desc->handle_irq = handle_fasteoi_nmi;
++	}
++
++	gic_irq_set_prio(d, GICD_INT_NMI_PRI);
++
++	return 0;
++}
++
++static void gic_irq_nmi_teardown(struct irq_data *d)
++{
++	struct irq_desc *desc = irq_to_desc(d->irq);
++
++	if (WARN_ON(!gic_supports_nmi()))
++		return;
++
++	if (gic_peek_irq(d, GICD_ISENABLER)) {
++		pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq);
++		return;
++	}
++
++	/*
++	 * A secondary irq_chip should be in charge of LPI request,
++	 * it should not be possible to get there
++	 */
++	if (WARN_ON(gic_irq(d) >= 8192))
++		return;
++
++	/* desc lock should already be held */
++	if (gic_irq(d) < 32) {
++		/* Tearing down NMI, only switch handler for last NMI */
++		if (refcount_dec_and_test(&ppi_nmi_refs[gic_irq(d) - 16]))
++			desc->handle_irq = handle_percpu_devid_irq;
++	} else {
++		desc->handle_irq = handle_fasteoi_irq;
++	}
++
++	gic_irq_set_prio(d, GICD_INT_DEF_PRI);
++}
++
++static void gic_eoi_irq(struct irq_data *d)
++{
++	gic_write_eoir(gic_irq(d));
++}
++
++static void gic_eoimode1_eoi_irq(struct irq_data *d)
++{
++	/*
++	 * No need to deactivate an LPI, or an interrupt that
++	 * is is getting forwarded to a vcpu.
++	 */
++	if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
++		return;
++	gic_write_dir(gic_irq(d));
++}
++
++static int gic_set_type(struct irq_data *d, unsigned int type)
++{
++	unsigned int irq = gic_irq(d);
++	void (*rwp_wait)(void);
++	void __iomem *base;
++
++	/* Interrupt configuration for SGIs can't be changed */
++	if (irq < 16)
++		return -EINVAL;
++
++	/* SPIs have restrictions on the supported types */
++	if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH &&
++			 type != IRQ_TYPE_EDGE_RISING)
++		return -EINVAL;
++
++	if (gic_irq_in_rdist(d)) {
++		base = gic_data_rdist_sgi_base();
++		rwp_wait = gic_redist_wait_for_rwp;
++	} else {
++		base = gic_data.dist_base;
++		rwp_wait = gic_dist_wait_for_rwp;
++	}
++
++	return gic_configure_irq(irq, type, base, rwp_wait);
++}
++
++static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
++{
++	if (vcpu)
++		irqd_set_forwarded_to_vcpu(d);
++	else
++		irqd_clr_forwarded_to_vcpu(d);
++	return 0;
++}
++
++static u64 gic_mpidr_to_affinity(unsigned long mpidr)
++{
++	u64 aff;
++
++	aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8  |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
++
++	return aff;
++}
++
++static void gic_deactivate_unhandled(u32 irqnr)
++{
++	if (static_branch_likely(&supports_deactivate_key)) {
++		if (irqnr < 8192)
++			gic_write_dir(irqnr);
++	} else {
++		gic_write_eoir(irqnr);
++	}
++}
++
++static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
++{
++	bool irqs_enabled = interrupts_enabled(regs);
++	int err;
++
++	if (unlikely(irqnr < 16)) {
++		gic_write_eoir(irqnr);
++		if (static_branch_likely(&supports_deactivate_key))
++			gic_write_dir(irqnr);
++#ifdef CONFIG_SMP
++		handle_IPI(irqnr, regs);
++#endif
++		return;
++	}
++
++	if (irqs_enabled)
++		nmi_enter();
++
++	if (static_branch_likely(&supports_deactivate_key))
++		gic_write_eoir(irqnr);
++	/*
++	 * Leave the PSR.I bit set to prevent other NMIs to be
++	 * received while handling this one.
++	 * PSR.I will be restored when we ERET to the
++	 * interrupted context.
++	 */
++	err = handle_domain_nmi(gic_data.domain, irqnr, regs);
++	if (err)
++		gic_deactivate_unhandled(irqnr);
++
++	if (irqs_enabled)
++		nmi_exit();
++}
++
++static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
++{
++	u32 irqnr;
++
++	irqnr = gic_read_iar();
++
++	if (gic_supports_nmi() &&
++	    unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) {
++		gic_handle_nmi(irqnr, regs);
++		return;
++	}
++
++	if (gic_prio_masking_enabled()) {
++		gic_pmr_mask_irqs();
++		gic_arch_enable_irqs();
++	}
++
++	if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
++		int err;
++
++		if (static_branch_likely(&supports_deactivate_key))
++			gic_write_eoir(irqnr);
++		else
++			isb();
++
++		err = handle_domain_irq(gic_data.domain, irqnr, regs);
++		if (err) {
++			WARN_ONCE(true, "Unexpected interrupt received!\n");
++			gic_deactivate_unhandled(irqnr);
++		}
++		return;
++	}
++	if (irqnr < 16) {
++		gic_write_eoir(irqnr);
++		if (static_branch_likely(&supports_deactivate_key))
++			gic_write_dir(irqnr);
++#ifdef CONFIG_SMP
++		/*
++		 * Unlike GICv2, we don't need an smp_rmb() here.
++		 * The control dependency from gic_read_iar to
++		 * the ISB in gic_write_eoir is enough to ensure
++		 * that any shared data read by handle_IPI will
++		 * be read after the ACK.
++		 */
++		handle_IPI(irqnr, regs);
++#else
++		WARN_ONCE(true, "Unexpected SGI received!\n");
++#endif
++	}
++}
++
++static u32 gic_get_pribits(void)
++{
++	u32 pribits;
++
++	pribits = gic_read_ctlr();
++	pribits &= ICC_CTLR_EL1_PRI_BITS_MASK;
++	pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT;
++	pribits++;
++
++	return pribits;
++}
++
++static bool gic_has_group0(void)
++{
++	u32 val;
++	u32 old_pmr;
++
++	old_pmr = gic_read_pmr();
++
++	/*
++	 * Let's find out if Group0 is under control of EL3 or not by
++	 * setting the highest possible, non-zero priority in PMR.
++	 *
++	 * If SCR_EL3.FIQ is set, the priority gets shifted down in
++	 * order for the CPU interface to set bit 7, and keep the
++	 * actual priority in the non-secure range. In the process, it
++	 * looses the least significant bit and the actual priority
++	 * becomes 0x80. Reading it back returns 0, indicating that
++	 * we're don't have access to Group0.
++	 */
++	gic_write_pmr(BIT(8 - gic_get_pribits()));
++	val = gic_read_pmr();
++
++	gic_write_pmr(old_pmr);
++
++	return val != 0;
++}
++
++static void __init gic_dist_init(void)
++{
++	unsigned int i;
++	u64 affinity;
++	void __iomem *base = gic_data.dist_base;
++
++	/* Disable the distributor */
++	writel_relaxed(0, base + GICD_CTLR);
++	gic_dist_wait_for_rwp();
++
++	/*
++	 * Configure SPIs as non-secure Group-1. This will only matter
++	 * if the GIC only has a single security state. This will not
++	 * do the right thing if the kernel is running in secure mode,
++	 * but that's not the intended use case anyway.
++	 */
++	for (i = 32; i < gic_data.irq_nr; i += 32)
++		writel_relaxed(~0, base + GICD_IGROUPR + i / 8);
++
++	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
++
++	/* Enable distributor with ARE, Group1 */
++	writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1,
++		       base + GICD_CTLR);
++
++	/*
++	 * Set all global interrupts to the boot CPU only. ARE must be
++	 * enabled.
++	 */
++	affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
++	for (i = 32; i < gic_data.irq_nr; i++)
++		gic_write_irouter(affinity, base + GICD_IROUTER + i * 8);
++}
++
++static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
++{
++	int ret = -ENODEV;
++	int i;
++
++	for (i = 0; i < gic_data.nr_redist_regions; i++) {
++		void __iomem *ptr = gic_data.redist_regions[i].redist_base;
++		u64 typer;
++		u32 reg;
++
++		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
++		if (reg != GIC_PIDR2_ARCH_GICv3 &&
++		    reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */
++			pr_warn("No redistributor present @%p\n", ptr);
++			break;
++		}
++
++		do {
++			typer = gic_read_typer(ptr + GICR_TYPER);
++			ret = fn(gic_data.redist_regions + i, ptr);
++			if (!ret)
++				return 0;
++
++			if (gic_data.redist_regions[i].single_redist)
++				break;
++
++			if (gic_data.redist_stride) {
++				ptr += gic_data.redist_stride;
++			} else {
++				ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */
++				if (typer & GICR_TYPER_VLPIS)
++					ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */
++			}
++		} while (!(typer & GICR_TYPER_LAST));
++	}
++
++	return ret ? -ENODEV : 0;
++}
++
++static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr)
++{
++	unsigned long mpidr = cpu_logical_map(smp_processor_id());
++	u64 typer;
++	u32 aff;
++
++	/*
++	 * Convert affinity to a 32bit value that can be matched to
++	 * GICR_TYPER bits [63:32].
++	 */
++	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
++
++	typer = gic_read_typer(ptr + GICR_TYPER);
++	if ((typer >> 32) == aff) {
++		u64 offset = ptr - region->redist_base;
++		gic_data_rdist_rd_base() = ptr;
++		gic_data_rdist()->phys_base = region->phys_base + offset;
++
++		pr_info("CPU%d: found redistributor %lx region %d:%pa\n",
++			smp_processor_id(), mpidr,
++			(int)(region - gic_data.redist_regions),
++			&gic_data_rdist()->phys_base);
++		return 0;
++	}
++
++	/* Try next one */
++	return 1;
++}
++
++static int gic_populate_rdist(void)
++{
++	if (gic_iterate_rdists(__gic_populate_rdist) == 0)
++		return 0;
++
++	/* We couldn't even deal with ourselves... */
++	WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n",
++	     smp_processor_id(),
++	     (unsigned long)cpu_logical_map(smp_processor_id()));
++	return -ENODEV;
++}
++
++static int __gic_update_vlpi_properties(struct redist_region *region,
++					void __iomem *ptr)
++{
++	u64 typer = gic_read_typer(ptr + GICR_TYPER);
++	gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
++	gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS);
++
++	return 1;
++}
++
++static void gic_update_vlpi_properties(void)
++{
++	gic_iterate_rdists(__gic_update_vlpi_properties);
++	pr_info("%sVLPI support, %sdirect LPI support\n",
++		!gic_data.rdists.has_vlpis ? "no " : "",
++		!gic_data.rdists.has_direct_lpi ? "no " : "");
++}
++
++/* Check whether it's single security state view */
++static inline bool gic_dist_security_disabled(void)
++{
++	return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
++}
++
++static void gic_cpu_sys_reg_init(void)
++{
++	int i, cpu = smp_processor_id();
++	u64 mpidr = cpu_logical_map(cpu);
++	u64 need_rss = MPIDR_RS(mpidr);
++	bool group0;
++	u32 pribits;
++
++	/*
++	 * Need to check that the SRE bit has actually been set. If
++	 * not, it means that SRE is disabled at EL2. We're going to
++	 * die painfully, and there is nothing we can do about it.
++	 *
++	 * Kindly inform the luser.
++	 */
++	if (!gic_enable_sre())
++		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
++
++	pribits = gic_get_pribits();
++
++	group0 = gic_has_group0();
++
++	/* Set priority mask register */
++	if (!gic_prio_masking_enabled()) {
++		write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1);
++	} else {
++		/*
++		 * Mismatch configuration with boot CPU, the system is likely
++		 * to die as interrupt masking will not work properly on all
++		 * CPUs
++		 */
++		WARN_ON(gic_supports_nmi() && group0 &&
++			!gic_dist_security_disabled());
++	}
++
++	/*
++	 * Some firmwares hand over to the kernel with the BPR changed from
++	 * its reset value (and with a value large enough to prevent
++	 * any pre-emptive interrupts from working at all). Writing a zero
++	 * to BPR restores is reset value.
++	 */
++	gic_write_bpr1(0);
++
++	if (static_branch_likely(&supports_deactivate_key)) {
++		/* EOI drops priority only (mode 1) */
++		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
++	} else {
++		/* EOI deactivates interrupt too (mode 0) */
++		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
++	}
++
++	/* Always whack Group0 before Group1 */
++	if (group0) {
++		switch(pribits) {
++		case 8:
++		case 7:
++			write_gicreg(0, ICC_AP0R3_EL1);
++			write_gicreg(0, ICC_AP0R2_EL1);
++		case 6:
++			write_gicreg(0, ICC_AP0R1_EL1);
++		case 5:
++		case 4:
++			write_gicreg(0, ICC_AP0R0_EL1);
++		}
++
++		isb();
++	}
++
++	switch(pribits) {
++	case 8:
++	case 7:
++		write_gicreg(0, ICC_AP1R3_EL1);
++		write_gicreg(0, ICC_AP1R2_EL1);
++	case 6:
++		write_gicreg(0, ICC_AP1R1_EL1);
++	case 5:
++	case 4:
++		write_gicreg(0, ICC_AP1R0_EL1);
++	}
++
++	isb();
++
++	/* ... and let's hit the road... */
++	gic_write_grpen1(1);
++
++	/* Keep the RSS capability status in per_cpu variable */
++	per_cpu(has_rss, cpu) = !!(gic_read_ctlr() & ICC_CTLR_EL1_RSS);
++
++	/* Check all the CPUs have capable of sending SGIs to other CPUs */
++	for_each_online_cpu(i) {
++		bool have_rss = per_cpu(has_rss, i) && per_cpu(has_rss, cpu);
++
++		need_rss |= MPIDR_RS(cpu_logical_map(i));
++		if (need_rss && (!have_rss))
++			pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n",
++				cpu, (unsigned long)mpidr,
++				i, (unsigned long)cpu_logical_map(i));
++	}
++
++	/**
++	 * GIC spec says, when ICC_CTLR_EL1.RSS==1 and GICD_TYPER.RSS==0,
++	 * writing ICC_ASGI1R_EL1 register with RS != 0 is a CONSTRAINED
++	 * UNPREDICTABLE choice of :
++	 *   - The write is ignored.
++	 *   - The RS field is treated as 0.
++	 */
++	if (need_rss && (!gic_data.has_rss))
++		pr_crit_once("RSS is required but GICD doesn't support it\n");
++}
++
++static bool gicv3_nolpi;
++
++static int __init gicv3_nolpi_cfg(char *buf)
++{
++	return strtobool(buf, &gicv3_nolpi);
++}
++early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg);
++
++static int gic_dist_supports_lpis(void)
++{
++	return (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) &&
++		!!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) &&
++		!gicv3_nolpi);
++}
++
++static void gic_cpu_init(void)
++{
++	void __iomem *rbase;
++
++	/* Register ourselves with the rest of the world */
++	if (gic_populate_rdist())
++		return;
++
++	gic_enable_redist(true);
++
++	rbase = gic_data_rdist_sgi_base();
++
++	/* Configure SGIs/PPIs as non-secure Group-1 */
++	writel_relaxed(~0, rbase + GICR_IGROUPR0);
++
++	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
++
++	if (gic_supports_nmi())
++		ipi_set_nmi_prio(rbase, GICD_INT_NMI_PRI);
++
++	/* initialise system registers */
++	gic_cpu_sys_reg_init();
++}
++
++#ifdef CONFIG_ASCEND_INIT_ALL_GICR
++struct workaround_oem_info {
++	char oem_id[ACPI_OEM_ID_SIZE + 1];
++	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
++	u32 oem_revision;
++};
++
++static struct workaround_oem_info gicr_wkrd_info[] = {
++	{
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x300,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x301,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x400,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x401,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x402,
++	}
++};
++
++static void gic_check_hisi_workaround(void)
++{
++	struct acpi_table_header *tbl;
++	acpi_status status = AE_OK;
++	int i;
++
++	status = acpi_get_table(ACPI_SIG_MADT, 0, &tbl);
++	if (ACPI_FAILURE(status) || !tbl)
++		return;
++
++	for (i = 0; i < ARRAY_SIZE(gicr_wkrd_info); i++) {
++		if (!memcmp(gicr_wkrd_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
++		    !memcmp(gicr_wkrd_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
++		    gicr_wkrd_info[i].oem_revision == tbl->oem_revision) {
++			its_enable_init_all_gicr();
++			break;
++		}
++	}
++
++	acpi_put_table(tbl);
++}
++
++static void gic_compute_nr_gicr(void)
++{
++	int i;
++	int sum = 0;
++
++	for (i = 0; i < gic_data.nr_redist_regions; i++) {
++		u64 typer;
++		void __iomem *ptr = gic_data.redist_regions[i].redist_base;
++
++		do {
++			typer = gic_read_typer(ptr + GICR_TYPER);
++			sum++;
++
++			if (gic_data.redist_regions[i].single_redist)
++				break;
++
++			if (gic_data.redist_stride) {
++				ptr += gic_data.redist_stride;
++			} else {
++				ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */
++				if (typer & GICR_TYPER_VLPIS)
++					/* Skip VLPI_base + reserved page */
++					ptr += SZ_64K * 2;
++			}
++		} while (!(typer & GICR_TYPER_LAST));
++	}
++
++	its_set_gicr_nr(sum);
++}
++
++static void gic_enable_redist_others(void __iomem *rbase, bool enable)
++{
++	u32 count = 1000000;	/* 1s! */
++	u32 val;
++
++	val = readl_relaxed(rbase + GICR_WAKER);
++	if (enable)
++		/* Wake up this CPU redistributor */
++		val &= ~GICR_WAKER_ProcessorSleep;
++	else
++		val |= GICR_WAKER_ProcessorSleep;
++	writel_relaxed(val, rbase + GICR_WAKER);
++
++	if (!enable) {		/* Check that GICR_WAKER is writeable */
++		val = readl_relaxed(rbase + GICR_WAKER);
++		if (!(val & GICR_WAKER_ProcessorSleep))
++			return;	/* No PM support in this redistributor */
++	}
++
++	while (--count) {
++		val = readl_relaxed(rbase + GICR_WAKER);
++		if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep))
++			break;
++		cpu_relax();
++		udelay(1);
++	};
++	if (!count)
++		pr_err_ratelimited("redistributor failed to %s...\n",
++				   enable ? "wakeup" : "sleep");
++}
++
++static int gic_rdist_cpu(void __iomem *ptr, unsigned int cpu)
++{
++	unsigned long mpidr = cpu_logical_map(cpu);
++	u64 typer;
++	u32 aff;
++
++	/*
++	 * Convert affinity to a 32bit value that can be matched to
++	 * GICR_TYPER bits [63:32].
++	 */
++	aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
++	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
++
++	typer = gic_read_typer(ptr + GICR_TYPER);
++	if ((typer >> 32) == aff)
++		return 0;
++
++	return 1;
++}
++
++static int gic_rdist_cpus(void __iomem *ptr)
++{
++	unsigned int i;
++
++	for (i = 0; i < nr_cpu_ids; i++) {
++		if (gic_rdist_cpu(ptr, i) == 0)
++			return 0;
++	}
++
++	return 1;
++}
++
++static void gic_cpu_init_others(void)
++{
++	int i, cpu = nr_cpu_ids;
++	int gicr_nr = its_gicr_nr();
++
++	if (!its_init_all_gicr())
++		return;
++
++	for (i = 0; i < gic_data.nr_redist_regions; i++) {
++		u64 typer;
++		void __iomem *redist_base =
++				gic_data.redist_regions[i].redist_base;
++		phys_addr_t phys_base = gic_data.redist_regions[i].phys_base;
++
++		do {
++			typer = gic_read_typer(redist_base + GICR_TYPER);
++
++			if (gic_rdist_cpus(redist_base) == 1) {
++				if (cpu >= gicr_nr) {
++					pr_err("CPU over GICR number.\n");
++					break;
++				}
++				gic_enable_redist_others(redist_base, true);
++
++				if (gic_dist_supports_lpis())
++					its_cpu_init_others(redist_base, phys_base, cpu);
++				cpu++;
++			}
++
++			if (gic_data.redist_regions[i].single_redist)
++				break;
++
++			if (gic_data.redist_stride) {
++				redist_base += gic_data.redist_stride;
++				phys_base += gic_data.redist_stride;
++			} else {
++				/* Skip RD_base + SGI_base */
++				redist_base += SZ_64K * 2;
++				phys_base += SZ_64K * 2;
++				if (typer & GICR_TYPER_VLPIS) {
++					/* Skip VLPI_base + reserved page */
++					redist_base += SZ_64K * 2;
++					phys_base += SZ_64K * 2;
++				}
++			}
++		} while (!(typer & GICR_TYPER_LAST));
++	}
++}
++#else
++static inline void gic_check_hisi_workaround(void) {}
++
++static inline void gic_compute_nr_gicr(void) {}
++
++static inline void gic_cpu_init_others(void) {}
++#endif
++
++#ifdef CONFIG_SMP
++
++#define MPIDR_TO_SGI_RS(mpidr)	(MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT)
++#define MPIDR_TO_SGI_CLUSTER_ID(mpidr)	((mpidr) & ~0xFUL)
++
++static int gic_starting_cpu(unsigned int cpu)
++{
++	gic_cpu_init();
++
++	if (gic_dist_supports_lpis())
++		its_cpu_init();
++
++	return 0;
++}
++
++static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
++				   unsigned long cluster_id)
++{
++	int next_cpu, cpu = *base_cpu;
++	unsigned long mpidr = cpu_logical_map(cpu);
++	u16 tlist = 0;
++
++	while (cpu < nr_cpu_ids) {
++		tlist |= 1 << (mpidr & 0xf);
++
++		next_cpu = cpumask_next(cpu, mask);
++		if (next_cpu >= nr_cpu_ids)
++			goto out;
++		cpu = next_cpu;
++
++		mpidr = cpu_logical_map(cpu);
++
++		if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) {
++			cpu--;
++			goto out;
++		}
++	}
++out:
++	*base_cpu = cpu;
++	return tlist;
++}
++
++#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
++	(MPIDR_AFFINITY_LEVEL(cluster_id, level) \
++		<< ICC_SGI1R_AFFINITY_## level ##_SHIFT)
++
++static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
++{
++	u64 val;
++
++	val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3)	|
++	       MPIDR_TO_SGI_AFFINITY(cluster_id, 2)	|
++	       irq << ICC_SGI1R_SGI_ID_SHIFT		|
++	       MPIDR_TO_SGI_AFFINITY(cluster_id, 1)	|
++	       MPIDR_TO_SGI_RS(cluster_id)		|
++	       tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
++
++	pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
++	gic_write_sgi1r(val);
++}
++
++static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
++{
++	int cpu;
++
++	if (WARN_ON(irq >= 16))
++		return;
++
++	/*
++	 * Ensure that stores to Normal memory are visible to the
++	 * other CPUs before issuing the IPI.
++	 */
++	wmb();
++
++	for_each_cpu(cpu, mask) {
++		u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
++		u16 tlist;
++
++		tlist = gic_compute_target_list(&cpu, mask, cluster_id);
++		gic_send_sgi(cluster_id, tlist, irq);
++	}
++
++	/* Force the above writes to ICC_SGI1R_EL1 to be executed */
++	isb();
++}
++
++static void gic_smp_init(void)
++{
++	set_smp_cross_call(gic_raise_softirq);
++	cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
++				  "irqchip/arm/gicv3:starting",
++				  gic_starting_cpu, NULL);
++}
++
++static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
++			    bool force)
++{
++	unsigned int cpu;
++	void __iomem *reg;
++	int enabled;
++	u64 val;
++
++	if (force)
++		cpu = cpumask_first(mask_val);
++	else
++		cpu = cpumask_any_and(mask_val, cpu_online_mask);
++
++	if (cpu >= nr_cpu_ids)
++		return -EINVAL;
++
++	if (gic_irq_in_rdist(d))
++		return -EINVAL;
++
++	/* If interrupt was enabled, disable it first */
++	enabled = gic_peek_irq(d, GICD_ISENABLER);
++	if (enabled)
++		gic_mask_irq(d);
++
++	reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8);
++	val = gic_mpidr_to_affinity(cpu_logical_map(cpu));
++
++	gic_write_irouter(val, reg);
++
++	/*
++	 * If the interrupt was enabled, enabled it again. Otherwise,
++	 * just wait for the distributor to have digested our changes.
++	 */
++	if (enabled)
++		gic_unmask_irq(d);
++	else
++		gic_dist_wait_for_rwp();
++
++	irq_data_update_effective_affinity(d, cpumask_of(cpu));
++
++	return IRQ_SET_MASK_OK_DONE;
++}
++#else
++#define gic_set_affinity	NULL
++#define gic_smp_init()		do { } while(0)
++#endif
++
++#ifdef CONFIG_CPU_PM
++static int gic_cpu_pm_notifier(struct notifier_block *self,
++			       unsigned long cmd, void *v)
++{
++	if (cmd == CPU_PM_EXIT) {
++		if (gic_dist_security_disabled())
++			gic_enable_redist(true);
++		gic_cpu_sys_reg_init();
++	} else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) {
++		gic_write_grpen1(0);
++		gic_enable_redist(false);
++	}
++	return NOTIFY_OK;
++}
++
++static struct notifier_block gic_cpu_pm_notifier_block = {
++	.notifier_call = gic_cpu_pm_notifier,
++};
++
++static void gic_cpu_pm_init(void)
++{
++	cpu_pm_register_notifier(&gic_cpu_pm_notifier_block);
++}
++
++#else
++static inline void gic_cpu_pm_init(void) { }
++#endif /* CONFIG_CPU_PM */
++
++static struct irq_chip gic_chip = {
++	.name			= "GICv3",
++	.irq_mask		= gic_mask_irq,
++	.irq_unmask		= gic_unmask_irq,
++	.irq_eoi		= gic_eoi_irq,
++	.irq_set_type		= gic_set_type,
++	.irq_set_affinity	= gic_set_affinity,
++	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
++	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
++	.irq_nmi_setup		= gic_irq_nmi_setup,
++	.irq_nmi_teardown	= gic_irq_nmi_teardown,
++	.flags			= IRQCHIP_SET_TYPE_MASKED |
++				  IRQCHIP_SKIP_SET_WAKE |
++				  IRQCHIP_MASK_ON_SUSPEND,
++};
++
++static struct irq_chip gic_eoimode1_chip = {
++	.name			= "GICv3",
++	.irq_mask		= gic_eoimode1_mask_irq,
++	.irq_unmask		= gic_unmask_irq,
++	.irq_eoi		= gic_eoimode1_eoi_irq,
++	.irq_set_type		= gic_set_type,
++	.irq_set_affinity	= gic_set_affinity,
++	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
++	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
++	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
++	.irq_nmi_setup		= gic_irq_nmi_setup,
++	.irq_nmi_teardown	= gic_irq_nmi_teardown,
++	.flags			= IRQCHIP_SET_TYPE_MASKED |
++				  IRQCHIP_SKIP_SET_WAKE |
++				  IRQCHIP_MASK_ON_SUSPEND,
++};
++
++#define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
++
++static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
++			      irq_hw_number_t hw)
++{
++	struct irq_chip *chip = &gic_chip;
++
++	if (static_branch_likely(&supports_deactivate_key))
++		chip = &gic_eoimode1_chip;
++
++	/* SGIs are private to the core kernel */
++	if (hw < 16)
++		return -EPERM;
++	/* Nothing here */
++	if (hw >= gic_data.irq_nr && hw < 8192)
++		return -EPERM;
++	/* Off limits */
++	if (hw >= GIC_ID_NR)
++		return -EPERM;
++
++	/* PPIs */
++	if (hw < 32) {
++		irq_set_percpu_devid(irq);
++		irq_domain_set_info(d, irq, hw, chip, d->host_data,
++				    handle_percpu_devid_irq, NULL, NULL);
++		irq_set_status_flags(irq, IRQ_NOAUTOEN);
++	}
++	/* SPIs */
++	if (hw >= 32 && hw < gic_data.irq_nr) {
++		irq_domain_set_info(d, irq, hw, chip, d->host_data,
++				    handle_fasteoi_irq, NULL, NULL);
++		irq_set_probe(irq);
++		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
++	}
++	/* LPIs */
++	if (hw >= 8192 && hw < GIC_ID_NR) {
++		if (!gic_dist_supports_lpis())
++			return -EPERM;
++		irq_domain_set_info(d, irq, hw, chip, d->host_data,
++				    handle_fasteoi_irq, NULL, NULL);
++	}
++
++	return 0;
++}
++
++#define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
++
++static int gic_irq_domain_translate(struct irq_domain *d,
++				    struct irq_fwspec *fwspec,
++				    unsigned long *hwirq,
++				    unsigned int *type)
++{
++	if (is_of_node(fwspec->fwnode)) {
++		if (fwspec->param_count < 3)
++			return -EINVAL;
++
++		switch (fwspec->param[0]) {
++		case 0:			/* SPI */
++			*hwirq = fwspec->param[1] + 32;
++			break;
++		case 1:			/* PPI */
++		case GIC_IRQ_TYPE_PARTITION:
++			*hwirq = fwspec->param[1] + 16;
++			break;
++		case GIC_IRQ_TYPE_LPI:	/* LPI */
++			*hwirq = fwspec->param[1];
++			break;
++		default:
++			return -EINVAL;
++		}
++
++		*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
++
++		/*
++		 * Make it clear that broken DTs are... broken.
++		 * Partitionned PPIs are an unfortunate exception.
++		 */
++		WARN_ON(*type == IRQ_TYPE_NONE &&
++			fwspec->param[0] != GIC_IRQ_TYPE_PARTITION);
++		return 0;
++	}
++
++	if (is_fwnode_irqchip(fwspec->fwnode)) {
++		if(fwspec->param_count != 2)
++			return -EINVAL;
++
++		*hwirq = fwspec->param[0];
++		*type = fwspec->param[1];
++
++		WARN_ON(*type == IRQ_TYPE_NONE);
++		return 0;
++	}
++
++	return -EINVAL;
++}
++
++static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
++				unsigned int nr_irqs, void *arg)
++{
++	int i, ret;
++	irq_hw_number_t hwirq;
++	unsigned int type = IRQ_TYPE_NONE;
++	struct irq_fwspec *fwspec = arg;
++
++	ret = gic_irq_domain_translate(domain, fwspec, &hwirq, &type);
++	if (ret)
++		return ret;
++
++	for (i = 0; i < nr_irqs; i++) {
++		ret = gic_irq_domain_map(domain, virq + i, hwirq + i);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq,
++				unsigned int nr_irqs)
++{
++	int i;
++
++	for (i = 0; i < nr_irqs; i++) {
++		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
++		irq_set_handler(virq + i, NULL);
++		irq_domain_reset_irq_data(d);
++	}
++}
++
++static int gic_irq_domain_select(struct irq_domain *d,
++				 struct irq_fwspec *fwspec,
++				 enum irq_domain_bus_token bus_token)
++{
++	/* Not for us */
++        if (fwspec->fwnode != d->fwnode)
++		return 0;
++
++	/* If this is not DT, then we have a single domain */
++	if (!is_of_node(fwspec->fwnode))
++		return 1;
++
++	/*
++	 * If this is a PPI and we have a 4th (non-null) parameter,
++	 * then we need to match the partition domain.
++	 */
++	if (fwspec->param_count >= 4 &&
++	    fwspec->param[0] == 1 && fwspec->param[3] != 0)
++		return d == partition_get_domain(gic_data.ppi_descs[fwspec->param[1]]);
++
++	return d == gic_data.domain;
++}
++
++static const struct irq_domain_ops gic_irq_domain_ops = {
++	.translate = gic_irq_domain_translate,
++	.alloc = gic_irq_domain_alloc,
++	.free = gic_irq_domain_free,
++	.select = gic_irq_domain_select,
++};
++
++static int partition_domain_translate(struct irq_domain *d,
++				      struct irq_fwspec *fwspec,
++				      unsigned long *hwirq,
++				      unsigned int *type)
++{
++	struct device_node *np;
++	int ret;
++
++	np = of_find_node_by_phandle(fwspec->param[3]);
++	if (WARN_ON(!np))
++		return -EINVAL;
++
++	ret = partition_translate_id(gic_data.ppi_descs[fwspec->param[1]],
++				     of_node_to_fwnode(np));
++	if (ret < 0)
++		return ret;
++
++	*hwirq = ret;
++	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
++
++	return 0;
++}
++
++static const struct irq_domain_ops partition_domain_ops = {
++	.translate = partition_domain_translate,
++	.select = gic_irq_domain_select,
++};
++
++static void gic_enable_nmi_support(void)
++{
++	int i;
++
++	for (i = 0; i < 16; i++)
++		refcount_set(&ppi_nmi_refs[i], 0);
++
++	/*
++	 * Linux itself doesn't use 1:N distribution, so has no need to
++	 * set PMHE. The only reason to have it set is if EL3 requires it
++	 * (and we can't change it).
++	 */
++	if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK)
++		static_branch_enable(&gic_pmr_sync);
++
++	pr_info("%s ICC_PMR_EL1 synchronisation\n",
++		static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing");
++
++	static_branch_enable(&supports_pseudo_nmis);
++
++	if (static_branch_likely(&supports_deactivate_key))
++		gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI;
++	else
++		gic_chip.flags |= IRQCHIP_SUPPORTS_NMI;
++}
++
++static int __init gic_init_bases(void __iomem *dist_base,
++				 struct redist_region *rdist_regs,
++				 u32 nr_redist_regions,
++				 u64 redist_stride,
++				 struct fwnode_handle *handle)
++{
++	u32 typer;
++	int gic_irqs;
++	int err;
++
++	if (!is_hyp_mode_available())
++		static_branch_disable(&supports_deactivate_key);
++
++	if (static_branch_likely(&supports_deactivate_key))
++		pr_info("GIC: Using split EOI/Deactivate mode\n");
++
++	gic_data.fwnode = handle;
++	gic_data.dist_base = dist_base;
++	gic_data.redist_regions = rdist_regs;
++	gic_data.nr_redist_regions = nr_redist_regions;
++	gic_data.redist_stride = redist_stride;
++
++	/*
++	 * Find out how many interrupts are supported.
++	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
++	 */
++	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
++	gic_data.rdists.gicd_typer = typer;
++	gic_irqs = GICD_TYPER_IRQS(typer);
++	if (gic_irqs > 1020)
++		gic_irqs = 1020;
++	gic_data.irq_nr = gic_irqs;
++
++	gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
++						 &gic_data);
++	irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
++	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
++	gic_data.rdists.has_vlpis = true;
++	gic_data.rdists.has_direct_lpi = true;
++	gic_check_hisi_workaround();
++	gic_compute_nr_gicr();
++
++	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
++		err = -ENOMEM;
++		goto out_free;
++	}
++
++	gic_data.has_rss = !!(typer & GICD_TYPER_RSS);
++	pr_info("Distributor has %sRange Selector support\n",
++		gic_data.has_rss ? "" : "no ");
++
++	if (typer & GICD_TYPER_MBIS) {
++		err = mbi_init(handle, gic_data.domain);
++		if (err)
++			pr_err("Failed to initialize MBIs\n");
++	}
++
++	set_handle_irq(gic_handle_irq);
++
++	gic_update_vlpi_properties();
++
++	/*
++	 * NMI backtrace DFX need check nmi support, this should be
++	 * called before enable NMI backtrace DFX.
++	 */
++	if (gic_prio_masking_enabled()) {
++		if (!gic_has_group0() || gic_dist_security_disabled())
++			gic_enable_nmi_support();
++		else
++			pr_warn("SCR_EL3.FIQ is cleared, cannot enable use of pseudo-NMIs\n");
++	}
++
++	gic_smp_init();
++	gic_dist_init();
++	gic_cpu_init();
++	gic_cpu_pm_init();
++
++	if (gic_dist_supports_lpis()) {
++		its_init(handle, &gic_data.rdists, gic_data.domain);
++		its_cpu_init();
++	}
++
++	gic_cpu_init_others();
++
++	return 0;
++
++out_free:
++	if (gic_data.domain)
++		irq_domain_remove(gic_data.domain);
++	free_percpu(gic_data.rdists.rdist);
++	return err;
++}
++
++static int __init gic_validate_dist_version(void __iomem *dist_base)
++{
++	u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
++
++	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4)
++		return -ENODEV;
++
++	return 0;
++}
++
++/* Create all possible partitions at boot time */
++static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
++{
++	struct device_node *parts_node, *child_part;
++	int part_idx = 0, i;
++	int nr_parts;
++	struct partition_affinity *parts;
++
++	parts_node = of_get_child_by_name(gic_node, "ppi-partitions");
++	if (!parts_node)
++		return;
++
++	nr_parts = of_get_child_count(parts_node);
++
++	if (!nr_parts)
++		goto out_put_node;
++
++	parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL);
++	if (WARN_ON(!parts))
++		goto out_put_node;
++
++	for_each_child_of_node(parts_node, child_part) {
++		struct partition_affinity *part;
++		int n;
++
++		part = &parts[part_idx];
++
++		part->partition_id = of_node_to_fwnode(child_part);
++
++		pr_info("GIC: PPI partition %s[%d] { ",
++			child_part->name, part_idx);
++
++		n = of_property_count_elems_of_size(child_part, "affinity",
++						    sizeof(u32));
++		WARN_ON(n <= 0);
++
++		for (i = 0; i < n; i++) {
++			int err, cpu;
++			u32 cpu_phandle;
++			struct device_node *cpu_node;
++
++			err = of_property_read_u32_index(child_part, "affinity",
++							 i, &cpu_phandle);
++			if (WARN_ON(err))
++				continue;
++
++			cpu_node = of_find_node_by_phandle(cpu_phandle);
++			if (WARN_ON(!cpu_node))
++				continue;
++
++			cpu = of_cpu_node_to_id(cpu_node);
++			if (WARN_ON(cpu < 0))
++				continue;
++
++			pr_cont("%pOF[%d] ", cpu_node, cpu);
++
++			cpumask_set_cpu(cpu, &part->mask);
++		}
++
++		pr_cont("}\n");
++		part_idx++;
++	}
++
++	for (i = 0; i < 16; i++) {
++		unsigned int irq;
++		struct partition_desc *desc;
++		struct irq_fwspec ppi_fwspec = {
++			.fwnode		= gic_data.fwnode,
++			.param_count	= 3,
++			.param		= {
++				[0]	= GIC_IRQ_TYPE_PARTITION,
++				[1]	= i,
++				[2]	= IRQ_TYPE_NONE,
++			},
++		};
++
++		irq = irq_create_fwspec_mapping(&ppi_fwspec);
++		if (WARN_ON(!irq))
++			continue;
++		desc = partition_create_desc(gic_data.fwnode, parts, nr_parts,
++					     irq, &partition_domain_ops);
++		if (WARN_ON(!desc))
++			continue;
++
++		gic_data.ppi_descs[i] = desc;
++	}
++
++out_put_node:
++	of_node_put(parts_node);
++}
++
++static void __init gic_of_setup_kvm_info(struct device_node *node)
++{
++	int ret;
++	struct resource r;
++	u32 gicv_idx;
++
++	gic_v3_kvm_info.type = GIC_V3;
++
++	gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
++	if (!gic_v3_kvm_info.maint_irq)
++		return;
++
++	if (of_property_read_u32(node, "#redistributor-regions",
++				 &gicv_idx))
++		gicv_idx = 1;
++
++	gicv_idx += 3;	/* Also skip GICD, GICC, GICH */
++	ret = of_address_to_resource(node, gicv_idx, &r);
++	if (!ret)
++		gic_v3_kvm_info.vcpu = r;
++
++	gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
++	gic_set_kvm_info(&gic_v3_kvm_info);
++}
++
++static int __init gic_of_init(struct device_node *node, struct device_node *parent)
++{
++	void __iomem *dist_base;
++	struct redist_region *rdist_regs;
++	u64 redist_stride;
++	u32 nr_redist_regions;
++	int err, i;
++
++	dist_base = of_iomap(node, 0);
++	if (!dist_base) {
++		pr_err("%pOF: unable to map gic dist registers\n", node);
++		return -ENXIO;
++	}
++
++	err = gic_validate_dist_version(dist_base);
++	if (err) {
++		pr_err("%pOF: no distributor detected, giving up\n", node);
++		goto out_unmap_dist;
++	}
++
++	if (of_property_read_u32(node, "#redistributor-regions", &nr_redist_regions))
++		nr_redist_regions = 1;
++
++	rdist_regs = kcalloc(nr_redist_regions, sizeof(*rdist_regs),
++			     GFP_KERNEL);
++	if (!rdist_regs) {
++		err = -ENOMEM;
++		goto out_unmap_dist;
++	}
++
++	for (i = 0; i < nr_redist_regions; i++) {
++		struct resource res;
++		int ret;
++
++		ret = of_address_to_resource(node, 1 + i, &res);
++		rdist_regs[i].redist_base = of_iomap(node, 1 + i);
++		if (ret || !rdist_regs[i].redist_base) {
++			pr_err("%pOF: couldn't map region %d\n", node, i);
++			err = -ENODEV;
++			goto out_unmap_rdist;
++		}
++		rdist_regs[i].phys_base = res.start;
++	}
++
++	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
++		redist_stride = 0;
++
++	err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions,
++			     redist_stride, &node->fwnode);
++	if (err)
++		goto out_unmap_rdist;
++
++	gic_populate_ppi_partitions(node);
++
++	if (static_branch_likely(&supports_deactivate_key))
++		gic_of_setup_kvm_info(node);
++	return 0;
++
++out_unmap_rdist:
++	for (i = 0; i < nr_redist_regions; i++)
++		if (rdist_regs[i].redist_base)
++			iounmap(rdist_regs[i].redist_base);
++	kfree(rdist_regs);
++out_unmap_dist:
++	iounmap(dist_base);
++	return err;
++}
++
++IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
++
++#ifdef CONFIG_ACPI
++static struct
++{
++	void __iomem *dist_base;
++	struct redist_region *redist_regs;
++	u32 nr_redist_regions;
++	bool single_redist;
++	int enabled_rdists;
++	u32 maint_irq;
++	int maint_irq_mode;
++	phys_addr_t vcpu_base;
++} acpi_data __initdata;
++
++static void __init
++gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
++{
++	static int count = 0;
++
++	acpi_data.redist_regs[count].phys_base = phys_base;
++	acpi_data.redist_regs[count].redist_base = redist_base;
++	acpi_data.redist_regs[count].single_redist = acpi_data.single_redist;
++	count++;
++}
++
++static int __init
++gic_acpi_parse_madt_redist(struct acpi_subtable_header *header,
++			   const unsigned long end)
++{
++	struct acpi_madt_generic_redistributor *redist =
++			(struct acpi_madt_generic_redistributor *)header;
++	void __iomem *redist_base;
++
++	redist_base = ioremap(redist->base_address, redist->length);
++	if (!redist_base) {
++		pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
++		return -ENOMEM;
++	}
++
++	gic_acpi_register_redist(redist->base_address, redist_base);
++	return 0;
++}
++
++static int __init
++gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
++			 const unsigned long end)
++{
++	struct acpi_madt_generic_interrupt *gicc =
++				(struct acpi_madt_generic_interrupt *)header;
++	u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
++	u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
++	void __iomem *redist_base;
++
++	/* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
++	if (!(gicc->flags & ACPI_MADT_ENABLED))
++		return 0;
++
++	redist_base = ioremap(gicc->gicr_base_address, size);
++	if (!redist_base)
++		return -ENOMEM;
++
++	gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
++	return 0;
++}
++
++static int __init gic_acpi_collect_gicr_base(void)
++{
++	acpi_tbl_entry_handler redist_parser;
++	enum acpi_madt_type type;
++
++	if (acpi_data.single_redist) {
++		type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
++		redist_parser = gic_acpi_parse_madt_gicc;
++	} else {
++		type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR;
++		redist_parser = gic_acpi_parse_madt_redist;
++	}
++
++	/* Collect redistributor base addresses in GICR entries */
++	if (acpi_table_parse_madt(type, redist_parser, 0) > 0)
++		return 0;
++
++	pr_info("No valid GICR entries exist\n");
++	return -ENODEV;
++}
++
++static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header,
++				  const unsigned long end)
++{
++	/* Subtable presence means that redist exists, that's it */
++	return 0;
++}
++
++static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
++				      const unsigned long end)
++{
++	struct acpi_madt_generic_interrupt *gicc =
++				(struct acpi_madt_generic_interrupt *)header;
++
++	/*
++	 * If GICC is enabled and has valid gicr base address, then it means
++	 * GICR base is presented via GICC
++	 */
++	if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
++		acpi_data.enabled_rdists++;
++		return 0;
++	}
++
++	/*
++	 * It's perfectly valid firmware can pass disabled GICC entry, driver
++	 * should not treat as errors, skip the entry instead of probe fail.
++	 */
++	if (!(gicc->flags & ACPI_MADT_ENABLED))
++		return 0;
++
++	return -ENODEV;
++}
++
++static int __init gic_acpi_count_gicr_regions(void)
++{
++	int count;
++
++	/*
++	 * Count how many redistributor regions we have. It is not allowed
++	 * to mix redistributor description, GICR and GICC subtables have to be
++	 * mutually exclusive.
++	 */
++	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
++				      gic_acpi_match_gicr, 0);
++	if (count > 0) {
++		acpi_data.single_redist = false;
++		return count;
++	}
++
++	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
++				      gic_acpi_match_gicc, 0);
++	if (count > 0) {
++		acpi_data.single_redist = true;
++		count = acpi_data.enabled_rdists;
++	}
++
++	return count;
++}
++
++static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
++					   struct acpi_probe_entry *ape)
++{
++	struct acpi_madt_generic_distributor *dist;
++	int count;
++
++	dist = (struct acpi_madt_generic_distributor *)header;
++	if (dist->version != ape->driver_data)
++		return false;
++
++	/* We need to do that exercise anyway, the sooner the better */
++	count = gic_acpi_count_gicr_regions();
++	if (count <= 0)
++		return false;
++
++	acpi_data.nr_redist_regions = count;
++	return true;
++}
++
++static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
++						const unsigned long end)
++{
++	struct acpi_madt_generic_interrupt *gicc =
++		(struct acpi_madt_generic_interrupt *)header;
++	int maint_irq_mode;
++	static int first_madt = true;
++
++	/* Skip unusable CPUs */
++	if (!(gicc->flags & ACPI_MADT_ENABLED))
++		return 0;
++
++	maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
++		ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
++
++	if (first_madt) {
++		first_madt = false;
++
++		acpi_data.maint_irq = gicc->vgic_interrupt;
++		acpi_data.maint_irq_mode = maint_irq_mode;
++		acpi_data.vcpu_base = gicc->gicv_base_address;
++
++		return 0;
++	}
++
++	/*
++	 * The maintenance interrupt and GICV should be the same for every CPU
++	 */
++	if ((acpi_data.maint_irq != gicc->vgic_interrupt) ||
++	    (acpi_data.maint_irq_mode != maint_irq_mode) ||
++	    (acpi_data.vcpu_base != gicc->gicv_base_address))
++		return -EINVAL;
++
++	return 0;
++}
++
++static bool __init gic_acpi_collect_virt_info(void)
++{
++	int count;
++
++	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
++				      gic_acpi_parse_virt_madt_gicc, 0);
++
++	return (count > 0);
++}
++
++#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
++#define ACPI_GICV2_VCTRL_MEM_SIZE	(SZ_4K)
++#define ACPI_GICV2_VCPU_MEM_SIZE	(SZ_8K)
++
++static void __init gic_acpi_setup_kvm_info(void)
++{
++	int irq;
++
++	if (!gic_acpi_collect_virt_info()) {
++		pr_warn("Unable to get hardware information used for virtualization\n");
++		return;
++	}
++
++	gic_v3_kvm_info.type = GIC_V3;
++
++	irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
++				acpi_data.maint_irq_mode,
++				ACPI_ACTIVE_HIGH);
++	if (irq <= 0)
++		return;
++
++	gic_v3_kvm_info.maint_irq = irq;
++
++	if (acpi_data.vcpu_base) {
++		struct resource *vcpu = &gic_v3_kvm_info.vcpu;
++
++		vcpu->flags = IORESOURCE_MEM;
++		vcpu->start = acpi_data.vcpu_base;
++		vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
++	}
++
++	gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
++	gic_set_kvm_info(&gic_v3_kvm_info);
++}
++
++static int __init
++gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
++{
++	struct acpi_madt_generic_distributor *dist;
++	struct fwnode_handle *domain_handle;
++	size_t size;
++	int i, err;
++
++	/* Get distributor base address */
++	dist = (struct acpi_madt_generic_distributor *)header;
++	acpi_data.dist_base = ioremap(dist->base_address,
++				      ACPI_GICV3_DIST_MEM_SIZE);
++	if (!acpi_data.dist_base) {
++		pr_err("Unable to map GICD registers\n");
++		return -ENOMEM;
++	}
++
++	err = gic_validate_dist_version(acpi_data.dist_base);
++	if (err) {
++		pr_err("No distributor detected at @%p, giving up\n",
++		       acpi_data.dist_base);
++		goto out_dist_unmap;
++	}
++
++	size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions;
++	acpi_data.redist_regs = kzalloc(size, GFP_KERNEL);
++	if (!acpi_data.redist_regs) {
++		err = -ENOMEM;
++		goto out_dist_unmap;
++	}
++
++	err = gic_acpi_collect_gicr_base();
++	if (err)
++		goto out_redist_unmap;
++
++	domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base);
++	if (!domain_handle) {
++		err = -ENOMEM;
++		goto out_redist_unmap;
++	}
++
++	err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
++			     acpi_data.nr_redist_regions, 0, domain_handle);
++	if (err)
++		goto out_fwhandle_free;
++
++	acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
++
++	if (static_branch_likely(&supports_deactivate_key))
++		gic_acpi_setup_kvm_info();
++
++	return 0;
++
++out_fwhandle_free:
++	irq_domain_free_fwnode(domain_handle);
++out_redist_unmap:
++	for (i = 0; i < acpi_data.nr_redist_regions; i++)
++		if (acpi_data.redist_regs[i].redist_base)
++			iounmap(acpi_data.redist_regs[i].redist_base);
++	kfree(acpi_data.redist_regs);
++out_dist_unmap:
++	iounmap(acpi_data.dist_base);
++	return err;
++}
++IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
++		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V3,
++		     gic_acpi_init);
++IRQCHIP_ACPI_DECLARE(gic_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
++		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V4,
++		     gic_acpi_init);
++IRQCHIP_ACPI_DECLARE(gic_v3_or_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
++		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_NONE,
++		     gic_acpi_init);
++#endif
+diff -uprN kernel/drivers/irqchip/irq-gic-v3.c.rej kernel_new/drivers/irqchip/irq-gic-v3.c.rej
+--- kernel/drivers/irqchip/irq-gic-v3.c.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-gic-v3.c.rej	2021-04-01 18:28:07.664863277 +0800
+@@ -0,0 +1,55 @@
++--- drivers/irqchip/irq-gic-v3.c	2019-12-18 03:36:04.000000000 +0800
+++++ drivers/irqchip/irq-gic-v3.c	2021-03-22 09:21:43.205415349 +0800
++@@ -390,7 +421,7 @@ static asmlinkage void __exception_irq_e
++ 			else
++ 				isb();
++ 
++-			err = handle_domain_irq(gic_data.domain, irqnr, regs);
+++			err = ipipe_handle_domain_irq(gic_data.domain, irqnr, regs);
++ 			if (err) {
++ 				WARN_ONCE(true, "Unexpected interrupt received!\n");
++ 				if (static_branch_likely(&supports_deactivate_key)) {
++@@ -414,7 +445,7 @@ static asmlinkage void __exception_irq_e
++ 			 * that any shared data read by handle_IPI will
++ 			 * be read after the ACK.
++ 			 */
++-			handle_IPI(irqnr, regs);
+++			ipipe_handle_multi_ipi(irqnr, regs);
++ #else
++ 			WARN_ONCE(true, "Unexpected SGI received!\n");
++ #endif
++@@ -889,11 +920,16 @@ static struct irq_chip gic_chip = {
++ 	.irq_unmask		= gic_unmask_irq,
++ 	.irq_eoi		= gic_eoi_irq,
++ 	.irq_set_type		= gic_set_type,
+++#ifdef CONFIG_IPIPE
+++	.irq_hold		= gic_hold_irq,
+++	.irq_release		= gic_release_irq,
+++#endif
++ 	.irq_set_affinity	= gic_set_affinity,
++ 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
++ 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
++ 	.flags			= IRQCHIP_SET_TYPE_MASKED |
++ 				  IRQCHIP_SKIP_SET_WAKE |
+++				  IRQCHIP_PIPELINE_SAFE |
++ 				  IRQCHIP_MASK_ON_SUSPEND,
++ };
++ 
++@@ -903,12 +939,17 @@ static struct irq_chip gic_eoimode1_chip
++ 	.irq_unmask		= gic_unmask_irq,
++ 	.irq_eoi		= gic_eoimode1_eoi_irq,
++ 	.irq_set_type		= gic_set_type,
+++#ifdef CONFIG_IPIPE
+++	.irq_hold		= gic_hold_irq,
+++	.irq_release		= gic_release_irq,
+++#endif
++ 	.irq_set_affinity	= gic_set_affinity,
++ 	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
++ 	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
++ 	.irq_set_vcpu_affinity	= gic_irq_set_vcpu_affinity,
++ 	.flags			= IRQCHIP_SET_TYPE_MASKED |
++ 				  IRQCHIP_SKIP_SET_WAKE |
+++				  IRQCHIP_PIPELINE_SAFE |
++ 				  IRQCHIP_MASK_ON_SUSPEND,
++ };
++ 
+diff -uprN kernel/drivers/irqchip/irq-imx-gpcv2.c kernel_new/drivers/irqchip/irq-imx-gpcv2.c
+--- kernel/drivers/irqchip/irq-imx-gpcv2.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-imx-gpcv2.c	2021-04-01 18:28:07.664863277 +0800
+@@ -10,6 +10,7 @@
+ #include <linux/of_irq.h>
+ #include <linux/slab.h>
+ #include <linux/irqchip.h>
++#include <linux/ipipe.h>
+ #include <linux/syscore_ops.h>
+ 
+ #define IMR_NUM			4
+@@ -19,7 +20,11 @@
+ #define GPC_IMR1_CORE1		0x40
+ 
+ struct gpcv2_irqchip_data {
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t	rlock;
++#else
+ 	struct raw_spinlock	rlock;
++#endif
+ 	void __iomem		*gpc_base;
+ 	u32			wakeup_sources[IMR_NUM];
+ 	u32			saved_irq_mask[IMR_NUM];
+@@ -31,6 +36,7 @@ static struct gpcv2_irqchip_data *imx_gp
+ static int gpcv2_wakeup_source_save(void)
+ {
+ 	struct gpcv2_irqchip_data *cd;
++	unsigned long flags;
+ 	void __iomem *reg;
+ 	int i;
+ 
+@@ -40,8 +46,10 @@ static int gpcv2_wakeup_source_save(void
+ 
+ 	for (i = 0; i < IMR_NUM; i++) {
+ 		reg = cd->gpc_base + cd->cpu2wakeup + i * 4;
++		flags = hard_cond_local_irq_save();
+ 		cd->saved_irq_mask[i] = readl_relaxed(reg);
+ 		writel_relaxed(cd->wakeup_sources[i], reg);
++		hard_cond_local_irq_restore(flags);
+ 	}
+ 
+ 	return 0;
+@@ -50,6 +58,7 @@ static int gpcv2_wakeup_source_save(void
+ static void gpcv2_wakeup_source_restore(void)
+ {
+ 	struct gpcv2_irqchip_data *cd;
++	unsigned long flags;
+ 	void __iomem *reg;
+ 	int i;
+ 
+@@ -58,8 +67,10 @@ static void gpcv2_wakeup_source_restore(
+ 		return;
+ 
+ 	for (i = 0; i < IMR_NUM; i++) {
++		flags = hard_cond_local_irq_save();
+ 		reg = cd->gpc_base + cd->cpu2wakeup + i * 4;
+ 		writel_relaxed(cd->saved_irq_mask[i], reg);
++		hard_cond_local_irq_restore(flags);
+ 	}
+ }
+ 
+@@ -92,38 +103,77 @@ static int imx_gpcv2_irq_set_wake(struct
+ 	return 0;
+ }
+ 
+-static void imx_gpcv2_irq_unmask(struct irq_data *d)
++static void __imx_gpcv2_irq_unmask(struct irq_data *d)
+ {
+ 	struct gpcv2_irqchip_data *cd = d->chip_data;
+ 	void __iomem *reg;
+ 	u32 val;
+ 
+-	raw_spin_lock(&cd->rlock);
+ 	reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4;
+ 	val = readl_relaxed(reg);
+ 	val &= ~(1 << d->hwirq % 32);
+ 	writel_relaxed(val, reg);
+-	raw_spin_unlock(&cd->rlock);
++}
++
++static void imx_gpcv2_irq_unmask(struct irq_data *d)
++{
++	struct gpcv2_irqchip_data *cd = d->chip_data;
++	unsigned long flags;
+ 
++	raw_spin_lock_irqsave(&cd->rlock, flags);
++	__imx_gpcv2_irq_unmask(d);
++	raw_spin_unlock_irqrestore(&cd->rlock, flags);
+ 	irq_chip_unmask_parent(d);
+ }
+ 
+-static void imx_gpcv2_irq_mask(struct irq_data *d)
++static void __imx_gpcv2_irq_mask(struct irq_data *d)
+ {
+ 	struct gpcv2_irqchip_data *cd = d->chip_data;
+ 	void __iomem *reg;
+ 	u32 val;
+ 
+-	raw_spin_lock(&cd->rlock);
+ 	reg = cd->gpc_base + cd->cpu2wakeup + d->hwirq / 32 * 4;
+ 	val = readl_relaxed(reg);
+ 	val |= 1 << (d->hwirq % 32);
+ 	writel_relaxed(val, reg);
+-	raw_spin_unlock(&cd->rlock);
++}
+ 
++static void imx_gpcv2_irq_mask(struct irq_data *d)
++{
++	struct gpcv2_irqchip_data *cd = d->chip_data;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&cd->rlock, flags);
++	__imx_gpcv2_irq_mask(d);
++	raw_spin_unlock_irqrestore(&cd->rlock, flags);
+ 	irq_chip_mask_parent(d);
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++static void imx_gpc_hold_irq(struct irq_data *d)
++{
++	struct gpcv2_irqchip_data *cd = d->chip_data;
++
++	raw_spin_lock(&cd->rlock);
++	__imx_gpcv2_irq_mask(d);
++	raw_spin_unlock(&cd->rlock);
++	irq_chip_hold_parent(d);
++}
++
++static void imx_gpc_release_irq(struct irq_data *d)
++{
++	struct gpcv2_irqchip_data *cd = d->chip_data;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&cd->rlock, flags);
++	__imx_gpcv2_irq_unmask(d);
++	raw_spin_unlock_irqrestore(&cd->rlock, flags);
++	irq_chip_release_parent(d);
++}
++
++#endif /* CONFIG_IPIPE */
++
+ static struct irq_chip gpcv2_irqchip_data_chip = {
+ 	.name			= "GPCv2",
+ 	.irq_eoi		= irq_chip_eoi_parent,
+@@ -135,6 +185,11 @@ static struct irq_chip gpcv2_irqchip_dat
+ #ifdef CONFIG_SMP
+ 	.irq_set_affinity	= irq_chip_set_affinity_parent,
+ #endif
++#ifdef CONFIG_IPIPE
++	.irq_hold		= imx_gpc_hold_irq,
++	.irq_release		= imx_gpc_release_irq,
++#endif
++	.flags			= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int imx_gpcv2_domain_translate(struct irq_domain *d,
+diff -uprN kernel/drivers/irqchip/irq-omap-intc.c kernel_new/drivers/irqchip/irq-omap-intc.c
+--- kernel/drivers/irqchip/irq-omap-intc.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-omap-intc.c	2021-04-01 18:28:07.664863277 +0800
+@@ -15,6 +15,7 @@
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
++#include <asm/ipipe.h>
+ 
+ #include <asm/exception.h>
+ #include <linux/irqchip.h>
+@@ -39,6 +40,7 @@
+ #define INTC_MIR_CLEAR0		0x0088
+ #define INTC_MIR_SET0		0x008c
+ #define INTC_PENDING_IRQ0	0x0098
++#define INTC_PRIO               0x0100
+ #define INTC_PENDING_IRQ1	0x00b8
+ #define INTC_PENDING_IRQ2	0x00d8
+ #define INTC_PENDING_IRQ3	0x00f8
+@@ -49,6 +51,12 @@
+ #define INTCPS_NR_ILR_REGS	128
+ #define INTCPS_NR_MIR_REGS	4
+ 
++#if !defined(MULTI_OMAP1) && !defined(MULTI_OMAP2)
++#define inline_single inline
++#else
++#define inline_single
++#endif
++
+ #define INTC_IDLE_FUNCIDLE	(1 << 0)
+ #define INTC_IDLE_TURBO		(1 << 1)
+ 
+@@ -69,12 +77,12 @@ static void __iomem *omap_irq_base;
+ static int omap_nr_pending;
+ static int omap_nr_irqs;
+ 
+-static void intc_writel(u32 reg, u32 val)
++static inline_single void intc_writel(u32 reg, u32 val)
+ {
+ 	writel_relaxed(val, omap_irq_base + reg);
+ }
+ 
+-static u32 intc_readl(u32 reg)
++static inline_single u32 intc_readl(u32 reg)
+ {
+ 	return readl_relaxed(omap_irq_base + reg);
+ }
+@@ -137,9 +145,10 @@ void omap3_intc_resume_idle(void)
+ }
+ 
+ /* XXX: FIQ and additional INTC support (only MPU at the moment) */
+-static void omap_ack_irq(struct irq_data *d)
++static inline_single void omap_ack_irq(struct irq_data *d)
+ {
+ 	intc_writel(INTC_CONTROL, 0x1);
++	dsb();
+ }
+ 
+ static void omap_mask_ack_irq(struct irq_data *d)
+@@ -164,8 +173,14 @@ static void __init omap_irq_soft_reset(v
+ 	while (!(intc_readl(INTC_SYSSTATUS) & 0x1))
+ 		/* Wait for reset to complete */;
+ 
++#ifndef CONFIG_IPIPE
+ 	/* Enable autoidle */
+ 	intc_writel(INTC_SYSCONFIG, 1 << 0);
++#else /* CONFIG_IPIPE */
++	/* Disable autoidle */
++	intc_writel(INTC_SYSCONFIG, 0);
++	intc_writel(INTC_IDLE, 0x1);
++#endif /* CONFIG_IPIPE */
+ }
+ 
+ int omap_irq_pending(void)
+@@ -211,7 +226,7 @@ static int __init omap_alloc_gc_of(struc
+ 		ct->chip.irq_mask = irq_gc_mask_disable_reg;
+ 		ct->chip.irq_unmask = irq_gc_unmask_enable_reg;
+ 
+-		ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE;
++		ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE;
+ 
+ 		ct->regs.enable = INTC_MIR_CLEAR0 + 32 * i;
+ 		ct->regs.disable = INTC_MIR_SET0 + 32 * i;
+@@ -231,8 +246,11 @@ static void __init omap_alloc_gc_legacy(
+ 	ct = gc->chip_types;
+ 	ct->chip.irq_ack = omap_mask_ack_irq;
+ 	ct->chip.irq_mask = irq_gc_mask_disable_reg;
++#ifdef CONFIG_IPIPE
++	ct->chip.irq_mask_ack = omap_mask_ack_irq;
++#endif
+ 	ct->chip.irq_unmask = irq_gc_unmask_enable_reg;
+-	ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE;
++	ct->chip.flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE;
+ 
+ 	ct->regs.enable = INTC_MIR_CLEAR0;
+ 	ct->regs.disable = INTC_MIR_SET0;
+@@ -357,7 +375,7 @@ omap_intc_handle_irq(struct pt_regs *reg
+ 	}
+ 
+ 	irqnr &= ACTIVEIRQ_MASK;
+-	handle_domain_irq(domain, irqnr, regs);
++	ipipe_handle_domain_irq(domain, irqnr, regs);
+ }
+ 
+ static int __init intc_of_init(struct device_node *node,
+@@ -387,6 +405,28 @@ static int __init intc_of_init(struct de
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_IPIPE) && defined(CONFIG_ARCH_OMAP2PLUS)
++#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX)
++void omap3_intc_mute(void)
++{
++	intc_writel(INTC_THRESHOLD, 0x1);
++	intc_writel(INTC_CONTROL, 0x1);
++}
++
++void omap3_intc_unmute(void)
++{
++	intc_writel(INTC_THRESHOLD, 0xff);
++}
++
++void omap3_intc_set_irq_prio(int irq, int hi)
++{
++	if (irq >= INTCPS_NR_MIR_REGS * 32)
++		return;
++	intc_writel(INTC_PRIO + 4 * irq, hi ? 0 : 0xfc);
++}
++#endif /* CONFIG_ARCH_OMAP3 */
++#endif /* CONFIG_IPIPE && ARCH_OMAP2PLUS */
++
+ IRQCHIP_DECLARE(omap2_intc, "ti,omap2-intc", intc_of_init);
+ IRQCHIP_DECLARE(omap3_intc, "ti,omap3-intc", intc_of_init);
+ IRQCHIP_DECLARE(dm814x_intc, "ti,dm814-intc", intc_of_init);
+diff -uprN kernel/drivers/irqchip/irq-sunxi-nmi.c kernel_new/drivers/irqchip/irq-sunxi-nmi.c
+--- kernel/drivers/irqchip/irq-sunxi-nmi.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-sunxi-nmi.c	2021-04-01 18:28:07.665863276 +0800
+@@ -115,8 +115,9 @@ static int sunxi_sc_nmi_set_type(struct
+ 	u32 ctrl_off = ct->regs.type;
+ 	unsigned int src_type;
+ 	unsigned int i;
++	unsigned long flags;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 
+ 	switch (flow_type & IRQF_TRIGGER_MASK) {
+ 	case IRQ_TYPE_EDGE_FALLING:
+@@ -133,7 +134,7 @@ static int sunxi_sc_nmi_set_type(struct
+ 		src_type = SUNXI_SRC_TYPE_LEVEL_LOW;
+ 		break;
+ 	default:
+-		irq_gc_unlock(gc);
++		irq_gc_unlock(gc, flags);
+ 		pr_err("Cannot assign multiple trigger modes to IRQ %d.\n",
+ 			data->irq);
+ 		return -EBADR;
+@@ -151,7 +152,7 @@ static int sunxi_sc_nmi_set_type(struct
+ 	src_type_reg |= src_type;
+ 	sunxi_sc_nmi_write(gc, ctrl_off, src_type_reg);
+ 
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ 
+ 	return IRQ_SET_MASK_OK;
+ }
+@@ -200,7 +201,7 @@ static int __init sunxi_sc_nmi_irq_init(
+ 	gc->chip_types[0].chip.irq_unmask	= irq_gc_mask_set_bit;
+ 	gc->chip_types[0].chip.irq_eoi		= irq_gc_ack_set_bit;
+ 	gc->chip_types[0].chip.irq_set_type	= sunxi_sc_nmi_set_type;
+-	gc->chip_types[0].chip.flags		= IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED;
++	gc->chip_types[0].chip.flags		= IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE;
+ 	gc->chip_types[0].regs.ack		= reg_offs->pend;
+ 	gc->chip_types[0].regs.mask		= reg_offs->enable;
+ 	gc->chip_types[0].regs.type		= reg_offs->ctrl;
+@@ -211,6 +212,7 @@ static int __init sunxi_sc_nmi_irq_init(
+ 	gc->chip_types[1].chip.irq_mask		= irq_gc_mask_clr_bit;
+ 	gc->chip_types[1].chip.irq_unmask	= irq_gc_mask_set_bit;
+ 	gc->chip_types[1].chip.irq_set_type	= sunxi_sc_nmi_set_type;
++	gc->chip_types[1].chip.flags		= IRQCHIP_PIPELINE_SAFE;
+ 	gc->chip_types[1].regs.ack		= reg_offs->pend;
+ 	gc->chip_types[1].regs.mask		= reg_offs->enable;
+ 	gc->chip_types[1].regs.type		= reg_offs->ctrl;
+diff -uprN kernel/drivers/irqchip/irq-versatile-fpga.c kernel_new/drivers/irqchip/irq-versatile-fpga.c
+--- kernel/drivers/irqchip/irq-versatile-fpga.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-versatile-fpga.c	2021-04-01 18:28:07.665863276 +0800
+@@ -80,7 +80,7 @@ static void fpga_irq_handle(struct irq_d
+ 		unsigned int irq = ffs(status) - 1;
+ 
+ 		status &= ~(1 << irq);
+-		generic_handle_irq(irq_find_mapping(f->domain, irq));
++		ipipe_handle_demuxed_irq(irq_find_mapping(f->domain, irq));
+ 	} while (status);
+ }
+ 
+@@ -97,7 +97,7 @@ static int handle_one_fpga(struct fpga_i
+ 
+ 	while ((status  = readl(f->base + IRQ_STATUS))) {
+ 		irq = ffs(status) - 1;
+-		handle_domain_irq(f->domain, irq, regs);
++		ipipe_handle_domain_irq(f->domain, irq, regs);
+ 		handled = 1;
+ 	}
+ 
+@@ -153,7 +153,11 @@ void __init fpga_irq_init(void __iomem *
+ 	f->chip.name = name;
+ 	f->chip.irq_ack = fpga_irq_mask;
+ 	f->chip.irq_mask = fpga_irq_mask;
++#ifdef CONFIG_IPIPE
++	f->chip.irq_mask_ack = fpga_irq_mask;
++#endif
+ 	f->chip.irq_unmask = fpga_irq_unmask;
++	f->chip.flags = IRQCHIP_PIPELINE_SAFE;
+ 	f->valid = valid;
+ 
+ 	if (parent_irq != -1) {
+diff -uprN kernel/drivers/irqchip/irq-vic.c kernel_new/drivers/irqchip/irq-vic.c
+--- kernel/drivers/irqchip/irq-vic.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/irqchip/irq-vic.c	2021-04-01 18:28:07.665863276 +0800
+@@ -34,6 +34,7 @@
+ #include <linux/device.h>
+ #include <linux/amba/bus.h>
+ #include <linux/irqchip/arm-vic.h>
++#include <linux/ipipe.h>
+ 
+ #include <asm/exception.h>
+ #include <asm/irq.h>
+@@ -218,7 +219,7 @@ static int handle_one_vic(struct vic_dev
+ 
+ 	while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) {
+ 		irq = ffs(stat) - 1;
+-		handle_domain_irq(vic->domain, irq, regs);
++		ipipe_handle_domain_irq(vic->domain, irq, regs);
+ 		handled = 1;
+ 	}
+ 
+@@ -235,7 +236,7 @@ static void vic_handle_irq_cascaded(stru
+ 
+ 	while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) {
+ 		hwirq = ffs(stat) - 1;
+-		generic_handle_irq(irq_find_mapping(vic->domain, hwirq));
++		ipipe_handle_demuxed_irq(irq_find_mapping(vic->domain, hwirq));
+ 	}
+ 
+ 	chained_irq_exit(host_chip, desc);
+@@ -339,7 +340,7 @@ static void vic_unmask_irq(struct irq_da
+ #if defined(CONFIG_PM)
+ static struct vic_device *vic_from_irq(unsigned int irq)
+ {
+-        struct vic_device *v = vic_devices;
++	struct vic_device *v = vic_devices;
+ 	unsigned int base_irq = irq & ~31;
+ 	int id;
+ 
+@@ -378,8 +379,12 @@ static struct irq_chip vic_chip = {
+ 	.name		= "VIC",
+ 	.irq_ack	= vic_ack_irq,
+ 	.irq_mask	= vic_mask_irq,
++#ifdef CONFIG_IPIPE
++	.irq_mask_ack   = vic_ack_irq,
++#endif /* CONFIG_IPIPE */
+ 	.irq_unmask	= vic_unmask_irq,
+ 	.irq_set_wake	= vic_set_wake,
++	.flags		= IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static void __init vic_disable(void __iomem *base)
+diff -uprN kernel/drivers/memory/omap-gpmc.c kernel_new/drivers/memory/omap-gpmc.c
+--- kernel/drivers/memory/omap-gpmc.c	2020-12-21 21:59:19.000000000 +0800
++++ kernel_new/drivers/memory/omap-gpmc.c	2021-04-01 18:28:07.665863276 +0800
+@@ -1261,12 +1261,15 @@ int gpmc_get_client_irq(unsigned irq_con
+ 
+ static int gpmc_irq_endis(unsigned long hwirq, bool endis)
+ {
++	unsigned long flags;
+ 	u32 regval;
+ 
+ 	/* bits GPMC_NR_NAND_IRQS to 8 are reserved */
+ 	if (hwirq >= GPMC_NR_NAND_IRQS)
+ 		hwirq += 8 - GPMC_NR_NAND_IRQS;
+ 
++	flags = hard_local_irq_save();
++
+ 	regval = gpmc_read_reg(GPMC_IRQENABLE);
+ 	if (endis)
+ 		regval |= BIT(hwirq);
+@@ -1274,6 +1277,8 @@ static int gpmc_irq_endis(unsigned long
+ 		regval &= ~BIT(hwirq);
+ 	gpmc_write_reg(GPMC_IRQENABLE, regval);
+ 
++	hard_local_irq_restore(flags);
++
+ 	return 0;
+ }
+ 
+@@ -1299,6 +1304,7 @@ static void gpmc_irq_unmask(struct irq_d
+ 
+ static void gpmc_irq_edge_config(unsigned long hwirq, bool rising_edge)
+ {
++	unsigned long flags;
+ 	u32 regval;
+ 
+ 	/* NAND IRQs polarity is not configurable */
+@@ -1308,6 +1314,8 @@ static void gpmc_irq_edge_config(unsigne
+ 	/* WAITPIN starts at BIT 8 */
+ 	hwirq += 8 - GPMC_NR_NAND_IRQS;
+ 
++	flags = hard_local_irq_save();
++
+ 	regval = gpmc_read_reg(GPMC_CONFIG);
+ 	if (rising_edge)
+ 		regval &= ~BIT(hwirq);
+@@ -1315,6 +1323,8 @@ static void gpmc_irq_edge_config(unsigne
+ 		regval |= BIT(hwirq);
+ 
+ 	gpmc_write_reg(GPMC_CONFIG, regval);
++
++	hard_local_irq_restore(flags);
+ }
+ 
+ static void gpmc_irq_ack(struct irq_data *d)
+@@ -1394,7 +1404,7 @@ static irqreturn_t gpmc_handle_irq(int i
+ 					 hwirq, virq);
+ 			}
+ 
+-			generic_handle_irq(virq);
++			ipipe_handle_demuxed_irq(virq);
+ 		}
+ 	}
+ 
+@@ -1422,6 +1432,7 @@ static int gpmc_setup_irq(struct gpmc_de
+ 	gpmc->irq_chip.irq_mask = gpmc_irq_mask;
+ 	gpmc->irq_chip.irq_unmask = gpmc_irq_unmask;
+ 	gpmc->irq_chip.irq_set_type = gpmc_irq_set_type;
++	gpmc->irq_chip.flags |= IRQCHIP_PIPELINE_SAFE;
+ 
+ 	gpmc_irq_domain = irq_domain_add_linear(gpmc->dev->of_node,
+ 						gpmc->nirqs,
+diff -uprN kernel/drivers/pci/controller/dwc/pcie-designware-host.c kernel_new/drivers/pci/controller/dwc/pcie-designware-host.c
+--- kernel/drivers/pci/controller/dwc/pcie-designware-host.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pci/controller/dwc/pcie-designware-host.c	2021-04-01 18:28:07.665863276 +0800
+@@ -66,6 +66,7 @@ static struct irq_chip dw_pcie_msi_irq_c
+ 	.irq_ack = dw_msi_ack_irq,
+ 	.irq_mask = dw_msi_mask_irq,
+ 	.irq_unmask = dw_msi_unmask_irq,
++	.flags = IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static struct msi_domain_info dw_pcie_msi_domain_info = {
+diff -uprN kernel/drivers/pci/controller/pcie-altera.c kernel_new/drivers/pci/controller/pcie-altera.c
+--- kernel/drivers/pci/controller/pcie-altera.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pci/controller/pcie-altera.c	2021-04-01 18:28:07.665863276 +0800
+@@ -477,7 +477,7 @@ static void altera_pcie_isr(struct irq_d
+ 
+ 			virq = irq_find_mapping(pcie->irq_domain, bit);
+ 			if (virq)
+-				generic_handle_irq(virq);
++				ipipe_handle_demuxed_irq(virq);
+ 			else
+ 				dev_err(dev, "unexpected IRQ, INT%d\n", bit);
+ 		}
+diff -uprN kernel/drivers/pinctrl/bcm/pinctrl-bcm2835.c kernel_new/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+--- kernel/drivers/pinctrl/bcm/pinctrl-bcm2835.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pinctrl/bcm/pinctrl-bcm2835.c	2021-04-01 18:28:07.665863276 +0800
+@@ -27,6 +27,7 @@
+ #include <linux/io.h>
+ #include <linux/irq.h>
+ #include <linux/irqdesc.h>
++#include <linux/ipipe.h>
+ #include <linux/init.h>
+ #include <linux/of_address.h>
+ #include <linux/of.h>
+@@ -88,7 +89,11 @@ struct bcm2835_pinctrl {
+ 	struct gpio_chip gpio_chip;
+ 	struct pinctrl_gpio_range gpio_range;
+ 
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t irq_lock[BCM2835_NUM_BANKS];
++#else
+ 	raw_spinlock_t irq_lock[BCM2835_NUM_BANKS];
++#endif
+ };
+ 
+ /* pins are just named GPIO0..GPIO53 */
+@@ -367,7 +372,7 @@ static void bcm2835_gpio_irq_handle_bank
+ 	events &= pc->enabled_irq_map[bank];
+ 	for_each_set_bit(offset, &events, 32) {
+ 		gpio = (32 * bank) + offset;
+-		generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irq.domain,
++		ipipe_handle_demuxed_irq(irq_linear_revmap(pc->gpio_chip.irq.domain,
+ 						     gpio));
+ 	}
+ }
+@@ -462,6 +467,7 @@ static void bcm2835_gpio_irq_enable(stru
+ 	raw_spin_lock_irqsave(&pc->irq_lock[bank], flags);
+ 	set_bit(offset, &pc->enabled_irq_map[bank]);
+ 	bcm2835_gpio_irq_config(pc, gpio, true);
++	ipipe_unlock_irq(data->irq);
+ 	raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags);
+ }
+ 
+@@ -479,6 +485,7 @@ static void bcm2835_gpio_irq_disable(str
+ 	/* Clear events that were latched prior to clearing event sources */
+ 	bcm2835_gpio_set_bit(pc, GPEDS0, gpio);
+ 	clear_bit(offset, &pc->enabled_irq_map[bank]);
++	ipipe_lock_irq(data->irq);
+ 	raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags);
+ }
+ 
+@@ -608,6 +615,39 @@ static void bcm2835_gpio_irq_ack(struct
+ 	bcm2835_gpio_set_bit(pc, GPEDS0, gpio);
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++static void bcm2835_gpio_irq_hold(struct irq_data *data)
++{
++	struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data);
++	unsigned gpio = irqd_to_hwirq(data);
++	unsigned offset = GPIO_REG_SHIFT(gpio);
++	unsigned bank = GPIO_REG_OFFSET(gpio);
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&pc->irq_lock[bank], flags);
++	bcm2835_gpio_irq_config(pc, gpio, false);
++	bcm2835_gpio_set_bit(pc, GPEDS0, gpio);
++	clear_bit(offset, &pc->enabled_irq_map[bank]);
++	raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags);
++}
++
++static void bcm2835_gpio_irq_release(struct irq_data *data)
++{
++	struct bcm2835_pinctrl *pc = irq_data_get_irq_chip_data(data);
++	unsigned gpio = irqd_to_hwirq(data);
++	unsigned offset = GPIO_REG_SHIFT(gpio);
++	unsigned bank = GPIO_REG_OFFSET(gpio);
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&pc->irq_lock[bank], flags);
++	set_bit(offset, &pc->enabled_irq_map[bank]);
++	bcm2835_gpio_irq_config(pc, gpio, true);
++	raw_spin_unlock_irqrestore(&pc->irq_lock[bank], flags);
++}
++
++#endif
++
+ static struct irq_chip bcm2835_gpio_irq_chip = {
+ 	.name = MODULE_NAME,
+ 	.irq_enable = bcm2835_gpio_irq_enable,
+@@ -616,6 +656,11 @@ static struct irq_chip bcm2835_gpio_irq_
+ 	.irq_ack = bcm2835_gpio_irq_ack,
+ 	.irq_mask = bcm2835_gpio_irq_disable,
+ 	.irq_unmask = bcm2835_gpio_irq_enable,
++#ifdef CONFIG_IPIPE
++	.irq_hold = bcm2835_gpio_irq_hold,
++	.irq_release = bcm2835_gpio_irq_release,
++#endif
++	.flags = IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int bcm2835_pctl_get_groups_count(struct pinctrl_dev *pctldev)
+diff -uprN kernel/drivers/pinctrl/pinctrl-rockchip.c kernel_new/drivers/pinctrl/pinctrl-rockchip.c
+--- kernel/drivers/pinctrl/pinctrl-rockchip.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pinctrl/pinctrl-rockchip.c	2021-04-01 18:28:07.796863130 +0800
+@@ -2868,7 +2868,7 @@ static int rockchip_irq_set_type(struct
+ 	u32 polarity;
+ 	u32 level;
+ 	u32 data;
+-	unsigned long flags;
++	unsigned long flags, flags2;
+ 	int ret;
+ 
+ 	/* make sure the pin is configured as gpio input */
+@@ -2891,7 +2891,7 @@ static int rockchip_irq_set_type(struct
+ 		irq_set_handler_locked(d, handle_level_irq);
+ 
+ 	raw_spin_lock_irqsave(&bank->slock, flags);
+-	irq_gc_lock(gc);
++	flags2 = irq_gc_lock(gc);
+ 
+ 	level = readl_relaxed(gc->reg_base + GPIO_INTTYPE_LEVEL);
+ 	polarity = readl_relaxed(gc->reg_base + GPIO_INT_POLARITY);
+@@ -2932,7 +2932,7 @@ static int rockchip_irq_set_type(struct
+ 		polarity &= ~mask;
+ 		break;
+ 	default:
+-		irq_gc_unlock(gc);
++		irq_gc_unlock(gc, flags2);
+ 		raw_spin_unlock_irqrestore(&bank->slock, flags);
+ 		clk_disable(bank->clk);
+ 		return -EINVAL;
+@@ -2941,7 +2941,7 @@ static int rockchip_irq_set_type(struct
+ 	writel_relaxed(level, gc->reg_base + GPIO_INTTYPE_LEVEL);
+ 	writel_relaxed(polarity, gc->reg_base + GPIO_INT_POLARITY);
+ 
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags2);
+ 	raw_spin_unlock_irqrestore(&bank->slock, flags);
+ 	clk_disable(bank->clk);
+ 
+diff -uprN kernel/drivers/pinctrl/pinctrl-single.c kernel_new/drivers/pinctrl/pinctrl-single.c
+--- kernel/drivers/pinctrl/pinctrl-single.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pinctrl/pinctrl-single.c	2021-04-01 18:28:07.797863129 +0800
+@@ -16,6 +16,7 @@
+ #include <linux/err.h>
+ #include <linux/list.h>
+ #include <linux/interrupt.h>
++#include <linux/ipipe.h>
+ 
+ #include <linux/irqchip/chained_irq.h>
+ 
+@@ -185,7 +186,11 @@ struct pcs_device {
+ #define PCS_FEAT_PINCONF	(1 << 0)
+ 	struct property *missing_nr_pinctrl_cells;
+ 	struct pcs_soc_data socdata;
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t lock;
++#else /* !IPIPE */
+ 	raw_spinlock_t lock;
++#endif /* !IPIPE */
+ 	struct mutex mutex;
+ 	unsigned width;
+ 	unsigned fmask;
+@@ -1460,7 +1465,7 @@ static int pcs_irq_handle(struct pcs_soc
+ 		mask = pcs->read(pcswi->reg);
+ 		raw_spin_unlock(&pcs->lock);
+ 		if (mask & pcs_soc->irq_status_mask) {
+-			generic_handle_irq(irq_find_mapping(pcs->domain,
++			ipipe_handle_demuxed_irq(irq_find_mapping(pcs->domain,
+ 							    pcswi->hwirq));
+ 			count++;
+ 		}
+@@ -1480,8 +1485,14 @@ static int pcs_irq_handle(struct pcs_soc
+ static irqreturn_t pcs_irq_handler(int irq, void *d)
+ {
+ 	struct pcs_soc_data *pcs_soc = d;
++	unsigned long flags;
++	irqreturn_t ret;
+ 
+-	return pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE;
++	flags = hard_cond_local_irq_save();
++	ret = pcs_irq_handle(pcs_soc) ? IRQ_HANDLED : IRQ_NONE;
++	hard_cond_local_irq_restore(flags);
++
++	return ret;
+ }
+ 
+ /**
+diff -uprN kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.c kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+--- kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.c	2021-04-01 18:28:07.797863129 +0800
+@@ -15,6 +15,7 @@
+ #include <linux/gpio/driver.h>
+ #include <linux/irqdomain.h>
+ #include <linux/irqchip/chained_irq.h>
++#include <linux/ipipe.h>
+ #include <linux/export.h>
+ #include <linux/of.h>
+ #include <linux/of_clk.h>
+@@ -937,14 +938,33 @@ static struct irq_chip sunxi_pinctrl_edg
+ 	.irq_request_resources = sunxi_pinctrl_irq_request_resources,
+ 	.irq_release_resources = sunxi_pinctrl_irq_release_resources,
+ 	.irq_set_type	= sunxi_pinctrl_irq_set_type,
+-	.flags		= IRQCHIP_SKIP_SET_WAKE,
++	.flags		= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
++#ifdef CONFIG_IPIPE
++
++static void sunxi_pinctrl_irq_hold(struct irq_data *d)
++{
++	sunxi_pinctrl_irq_mask(d);
++	sunxi_pinctrl_irq_ack(d);
++}
++
++static void sunxi_pinctrl_irq_release(struct irq_data *d)
++{
++	sunxi_pinctrl_irq_unmask(d);
++}
++
++#endif
++
+ static struct irq_chip sunxi_pinctrl_level_irq_chip = {
+ 	.name		= "sunxi_pio_level",
+ 	.irq_eoi	= sunxi_pinctrl_irq_ack,
+ 	.irq_mask	= sunxi_pinctrl_irq_mask,
+ 	.irq_unmask	= sunxi_pinctrl_irq_unmask,
++#ifdef CONFIG_IPIPE
++	.irq_hold	= sunxi_pinctrl_irq_hold,
++	.irq_release	= sunxi_pinctrl_irq_release,
++#endif
+ 	/* Define irq_enable / disable to avoid spurious irqs for drivers
+ 	 * using these to suppress irqs while they clear the irq source */
+ 	.irq_enable	= sunxi_pinctrl_irq_ack_unmask,
+@@ -953,7 +973,7 @@ static struct irq_chip sunxi_pinctrl_lev
+ 	.irq_release_resources = sunxi_pinctrl_irq_release_resources,
+ 	.irq_set_type	= sunxi_pinctrl_irq_set_type,
+ 	.flags		= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_EOI_THREADED |
+-			  IRQCHIP_EOI_IF_HANDLED,
++			  IRQCHIP_EOI_IF_HANDLED | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ static int sunxi_pinctrl_irq_of_xlate(struct irq_domain *d,
+@@ -1011,7 +1031,7 @@ static void sunxi_pinctrl_irq_handler(st
+ 		for_each_set_bit(irqoffset, &val, IRQ_PER_BANK) {
+ 			int pin_irq = irq_find_mapping(pctl->domain,
+ 						       bank * IRQ_PER_BANK + irqoffset);
+-			generic_handle_irq(pin_irq);
++			ipipe_handle_demuxed_irq(pin_irq);
+ 		}
+ 		chained_irq_exit(chip, desc);
+ 	}
+diff -uprN kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.h kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.h
+--- kernel/drivers/pinctrl/sunxi/pinctrl-sunxi.h	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/pinctrl/sunxi/pinctrl-sunxi.h	2021-04-01 18:28:07.797863129 +0800
+@@ -138,7 +138,11 @@ struct sunxi_pinctrl {
+ 	unsigned			ngroups;
+ 	int				*irq;
+ 	unsigned			*irq_array;
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t		lock;
++#else
+ 	raw_spinlock_t			lock;
++#endif
+ 	struct pinctrl_dev		*pctl_dev;
+ 	unsigned long			variant;
+ };
+diff -uprN kernel/drivers/soc/dove/pmu.c kernel_new/drivers/soc/dove/pmu.c
+--- kernel/drivers/soc/dove/pmu.c	2020-12-21 21:59:20.000000000 +0800
++++ kernel_new/drivers/soc/dove/pmu.c	2021-04-01 18:28:07.797863129 +0800
+@@ -16,6 +16,7 @@
+ #include <linux/slab.h>
+ #include <linux/soc/dove/pmu.h>
+ #include <linux/spinlock.h>
++#include <linux/ipipe.h>
+ 
+ #define NR_PMU_IRQS		7
+ 
+@@ -231,6 +232,7 @@ static void pmu_irq_handler(struct irq_d
+ 	void __iomem *base = gc->reg_base;
+ 	u32 stat = readl_relaxed(base + PMC_IRQ_CAUSE) & gc->mask_cache;
+ 	u32 done = ~0;
++	unsigned long flags;
+ 
+ 	if (stat == 0) {
+ 		handle_bad_irq(desc);
+@@ -243,7 +245,7 @@ static void pmu_irq_handler(struct irq_d
+ 		stat &= ~(1 << hwirq);
+ 		done &= ~(1 << hwirq);
+ 
+-		generic_handle_irq(irq_find_mapping(domain, hwirq));
++		ipipe_handle_demuxed_irq(irq_find_mapping(domain, hwirq));
+ 	}
+ 
+ 	/*
+@@ -257,10 +259,10 @@ static void pmu_irq_handler(struct irq_d
+ 	 * So, let's structure the code so that the window is as small as
+ 	 * possible.
+ 	 */
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	done &= readl_relaxed(base + PMC_IRQ_CAUSE);
+ 	writel_relaxed(done, base + PMC_IRQ_CAUSE);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq)
+@@ -296,6 +298,7 @@ static int __init dove_init_pmu_irq(stru
+ 	gc->chip_types[0].regs.mask = PMC_IRQ_MASK;
+ 	gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit;
+ 	gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit;
++	gc->chip_types[0].chip.flags |= IRQCHIP_PIPELINE_SAFE;
+ 
+ 	pmu->irq_domain = domain;
+ 	pmu->irq_gc = gc;
+diff -uprN kernel/drivers/tty/serial/8250/8250_core.c kernel_new/drivers/tty/serial/8250/8250_core.c
+--- kernel/drivers/tty/serial/8250/8250_core.c	2020-12-21 21:59:21.000000000 +0800
++++ kernel_new/drivers/tty/serial/8250/8250_core.c	2021-04-01 18:28:07.797863129 +0800
+@@ -590,6 +590,48 @@ static void univ8250_console_write(struc
+ 	serial8250_console_write(up, s, count);
+ }
+ 
++#ifdef CONFIG_RAW_PRINTK
++
++static void raw_write_char(struct uart_8250_port *up, int c)
++{
++	unsigned int status, tmout = 10000;
++
++	for (;;) {
++		status = serial_in(up, UART_LSR);
++		up->lsr_saved_flags |= status & LSR_SAVE_FLAGS;
++		if ((status & UART_LSR_THRE) == UART_LSR_THRE)
++			break;
++		if (--tmout == 0)
++			break;
++		cpu_relax();
++	}
++	serial_port_out(&up->port, UART_TX, c);
++}
++
++static void univ8250_console_write_raw(struct console *co, const char *s,
++				       unsigned int count)
++{
++	struct uart_8250_port *up = &serial8250_ports[co->index];
++	unsigned int ier;
++
++        ier = serial_in(up, UART_IER);
++
++        if (up->capabilities & UART_CAP_UUE)
++                serial_out(up, UART_IER, UART_IER_UUE);
++        else
++                serial_out(up, UART_IER, 0);
++
++	while (count-- > 0) {
++		if (*s == '\n')
++			raw_write_char(up, '\r');
++		raw_write_char(up, *s++);
++	}
++
++        serial_out(up, UART_IER, ier);
++}
++
++#endif
++
+ static int univ8250_console_setup(struct console *co, char *options)
+ {
+ 	struct uart_port *port;
+@@ -671,7 +713,12 @@ static struct console univ8250_console =
+ 	.device		= uart_console_device,
+ 	.setup		= univ8250_console_setup,
+ 	.match		= univ8250_console_match,
++#ifdef CONFIG_RAW_PRINTK
++	.write_raw	= univ8250_console_write_raw,
++	.flags		= CON_PRINTBUFFER |  CON_ANYTIME | CON_RAW,
++#else
+ 	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
++#endif
+ 	.index		= -1,
+ 	.data		= &serial8250_reg,
+ };
+diff -uprN kernel/drivers/tty/serial/amba-pl011.c kernel_new/drivers/tty/serial/amba-pl011.c
+--- kernel/drivers/tty/serial/amba-pl011.c	2020-12-21 21:59:21.000000000 +0800
++++ kernel_new/drivers/tty/serial/amba-pl011.c	2021-04-01 18:28:07.797863129 +0800
+@@ -2270,6 +2270,42 @@ static void pl011_console_putchar(struct
+ 	pl011_write(ch, uap, REG_DR);
+ }
+ 
++#ifdef CONFIG_RAW_PRINTK
++
++#define pl011_clk_setup(clk)	clk_prepare_enable(clk)
++#define pl011_clk_enable(clk)	do { } while (0)
++#define pl011_clk_disable(clk)	do { } while (0)
++
++static void
++pl011_console_write_raw(struct console *co, const char *s, unsigned int count)
++{
++	struct uart_amba_port *uap = amba_ports[co->index];
++	unsigned int old_cr, new_cr, status;
++
++	old_cr = readw(uap->port.membase + UART011_CR);
++	new_cr = old_cr & ~UART011_CR_CTSEN;
++	new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE;
++	writew(new_cr, uap->port.membase + UART011_CR);
++
++	while (count-- > 0) {
++		if (*s == '\n')
++			pl011_console_putchar(&uap->port, '\r');
++		pl011_console_putchar(&uap->port, *s++);
++	}
++	do
++		status = readw(uap->port.membase + UART01x_FR);
++	while (status & UART01x_FR_BUSY);
++	writew(old_cr, uap->port.membase + UART011_CR);
++}
++
++#else  /* !CONFIG_RAW_PRINTK */
++
++#define pl011_clk_setup(clk)	clk_prepare(clk)
++#define pl011_clk_enable(clk)	clk_enable(clk)
++#define pl011_clk_disable(clk)	clk_disable(clk)
++
++#endif  /* !CONFIG_RAW_PRINTK */
++
+ static void
+ pl011_console_write(struct console *co, const char *s, unsigned int count)
+ {
+@@ -2278,7 +2314,7 @@ pl011_console_write(struct console *co,
+ 	unsigned long flags;
+ 	int locked = 1;
+ 
+-	clk_enable(uap->clk);
++	pl011_clk_enable(uap->clk);
+ 
+ 	local_irq_save(flags);
+ 	if (uap->port.sysrq)
+@@ -2315,7 +2351,7 @@ pl011_console_write(struct console *co,
+ 		spin_unlock(&uap->port.lock);
+ 	local_irq_restore(flags);
+ 
+-	clk_disable(uap->clk);
++	pl011_clk_disable(uap->clk);
+ }
+ 
+ static void pl011_console_get_options(struct uart_amba_port *uap, int *baud,
+@@ -2375,7 +2411,7 @@ static int pl011_console_setup(struct co
+ 	/* Allow pins to be muxed in and configured */
+ 	pinctrl_pm_select_default_state(uap->port.dev);
+ 
+-	ret = clk_prepare(uap->clk);
++	ret = pl011_clk_setup(uap->clk);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -2469,7 +2505,12 @@ static struct console amba_console = {
+ 	.device		= uart_console_device,
+ 	.setup		= pl011_console_setup,
+ 	.match		= pl011_console_match,
++#ifdef CONFIG_RAW_PRINTK
++	.write_raw	= pl011_console_write_raw,
++	.flags		= CON_PRINTBUFFER | CON_RAW | CON_ANYTIME,
++#else
+ 	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
++#endif
+ 	.index		= -1,
+ 	.data		= &amba_reg,
+ };
+diff -uprN kernel/drivers/tty/serial/amba-pl011.c.orig kernel_new/drivers/tty/serial/amba-pl011.c.orig
+--- kernel/drivers/tty/serial/amba-pl011.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/drivers/tty/serial/amba-pl011.c.orig	2020-12-21 21:59:21.000000000 +0800
+@@ -0,0 +1,2910 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ *  Driver for AMBA serial ports
++ *
++ *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
++ *
++ *  Copyright 1999 ARM Limited
++ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
++ *  Copyright (C) 2010 ST-Ericsson SA
++ *
++ * This is a generic driver for ARM AMBA-type serial ports.  They
++ * have a lot of 16550-like features, but are not register compatible.
++ * Note that although they do have CTS, DCD and DSR inputs, they do
++ * not have an RI input, nor do they have DTR or RTS outputs.  If
++ * required, these have to be supplied via some other means (eg, GPIO)
++ * and hooked into this driver.
++ */
++
++
++#if defined(CONFIG_SERIAL_AMBA_PL011_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
++#define SUPPORT_SYSRQ
++#endif
++
++#include <linux/module.h>
++#include <linux/ioport.h>
++#include <linux/init.h>
++#include <linux/console.h>
++#include <linux/sysrq.h>
++#include <linux/device.h>
++#include <linux/tty.h>
++#include <linux/tty_flip.h>
++#include <linux/serial_core.h>
++#include <linux/serial.h>
++#include <linux/amba/bus.h>
++#include <linux/amba/serial.h>
++#include <linux/clk.h>
++#include <linux/slab.h>
++#include <linux/dmaengine.h>
++#include <linux/dma-mapping.h>
++#include <linux/scatterlist.h>
++#include <linux/delay.h>
++#include <linux/types.h>
++#include <linux/of.h>
++#include <linux/of_device.h>
++#include <linux/pinctrl/consumer.h>
++#include <linux/sizes.h>
++#include <linux/io.h>
++#include <linux/acpi.h>
++
++#include "amba-pl011.h"
++
++#define UART_NR			14
++
++#define SERIAL_AMBA_MAJOR	204
++#define SERIAL_AMBA_MINOR	64
++#define SERIAL_AMBA_NR		UART_NR
++
++#define AMBA_ISR_PASS_LIMIT	256
++
++#define UART_DR_ERROR		(UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE)
++#define UART_DUMMY_DR_RX	(1 << 16)
++
++static u16 pl011_std_offsets[REG_ARRAY_SIZE] = {
++	[REG_DR] = UART01x_DR,
++	[REG_FR] = UART01x_FR,
++	[REG_LCRH_RX] = UART011_LCRH,
++	[REG_LCRH_TX] = UART011_LCRH,
++	[REG_IBRD] = UART011_IBRD,
++	[REG_FBRD] = UART011_FBRD,
++	[REG_CR] = UART011_CR,
++	[REG_IFLS] = UART011_IFLS,
++	[REG_IMSC] = UART011_IMSC,
++	[REG_RIS] = UART011_RIS,
++	[REG_MIS] = UART011_MIS,
++	[REG_ICR] = UART011_ICR,
++	[REG_DMACR] = UART011_DMACR,
++};
++
++/* There is by now at least one vendor with differing details, so handle it */
++struct vendor_data {
++	const u16		*reg_offset;
++	unsigned int		ifls;
++	unsigned int		fr_busy;
++	unsigned int		fr_dsr;
++	unsigned int		fr_cts;
++	unsigned int		fr_ri;
++	unsigned int		inv_fr;
++	bool			access_32b;
++	bool			oversampling;
++	bool			dma_threshold;
++	bool			cts_event_workaround;
++	bool			always_enabled;
++	bool			fixed_options;
++
++	unsigned int (*get_fifosize)(struct amba_device *dev);
++};
++
++static unsigned int get_fifosize_arm(struct amba_device *dev)
++{
++	return amba_rev(dev) < 3 ? 16 : 32;
++}
++
++static struct vendor_data vendor_arm = {
++	.reg_offset		= pl011_std_offsets,
++	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
++	.fr_busy		= UART01x_FR_BUSY,
++	.fr_dsr			= UART01x_FR_DSR,
++	.fr_cts			= UART01x_FR_CTS,
++	.fr_ri			= UART011_FR_RI,
++	.oversampling		= false,
++	.dma_threshold		= false,
++	.cts_event_workaround	= false,
++	.always_enabled		= false,
++	.fixed_options		= false,
++	.get_fifosize		= get_fifosize_arm,
++};
++
++static const struct vendor_data vendor_sbsa = {
++	.reg_offset		= pl011_std_offsets,
++	.fr_busy		= UART01x_FR_BUSY,
++	.fr_dsr			= UART01x_FR_DSR,
++	.fr_cts			= UART01x_FR_CTS,
++	.fr_ri			= UART011_FR_RI,
++	.access_32b		= true,
++	.oversampling		= false,
++	.dma_threshold		= false,
++	.cts_event_workaround	= false,
++	.always_enabled		= true,
++	.fixed_options		= true,
++};
++
++#ifdef CONFIG_ACPI_SPCR_TABLE
++static const struct vendor_data vendor_qdt_qdf2400_e44 = {
++	.reg_offset		= pl011_std_offsets,
++	.fr_busy		= UART011_FR_TXFE,
++	.fr_dsr			= UART01x_FR_DSR,
++	.fr_cts			= UART01x_FR_CTS,
++	.fr_ri			= UART011_FR_RI,
++	.inv_fr			= UART011_FR_TXFE,
++	.access_32b		= true,
++	.oversampling		= false,
++	.dma_threshold		= false,
++	.cts_event_workaround	= false,
++	.always_enabled		= true,
++	.fixed_options		= true,
++};
++#endif
++
++static u16 pl011_st_offsets[REG_ARRAY_SIZE] = {
++	[REG_DR] = UART01x_DR,
++	[REG_ST_DMAWM] = ST_UART011_DMAWM,
++	[REG_ST_TIMEOUT] = ST_UART011_TIMEOUT,
++	[REG_FR] = UART01x_FR,
++	[REG_LCRH_RX] = ST_UART011_LCRH_RX,
++	[REG_LCRH_TX] = ST_UART011_LCRH_TX,
++	[REG_IBRD] = UART011_IBRD,
++	[REG_FBRD] = UART011_FBRD,
++	[REG_CR] = UART011_CR,
++	[REG_IFLS] = UART011_IFLS,
++	[REG_IMSC] = UART011_IMSC,
++	[REG_RIS] = UART011_RIS,
++	[REG_MIS] = UART011_MIS,
++	[REG_ICR] = UART011_ICR,
++	[REG_DMACR] = UART011_DMACR,
++	[REG_ST_XFCR] = ST_UART011_XFCR,
++	[REG_ST_XON1] = ST_UART011_XON1,
++	[REG_ST_XON2] = ST_UART011_XON2,
++	[REG_ST_XOFF1] = ST_UART011_XOFF1,
++	[REG_ST_XOFF2] = ST_UART011_XOFF2,
++	[REG_ST_ITCR] = ST_UART011_ITCR,
++	[REG_ST_ITIP] = ST_UART011_ITIP,
++	[REG_ST_ABCR] = ST_UART011_ABCR,
++	[REG_ST_ABIMSC] = ST_UART011_ABIMSC,
++};
++
++static unsigned int get_fifosize_st(struct amba_device *dev)
++{
++	return 64;
++}
++
++static struct vendor_data vendor_st = {
++	.reg_offset		= pl011_st_offsets,
++	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
++	.fr_busy		= UART01x_FR_BUSY,
++	.fr_dsr			= UART01x_FR_DSR,
++	.fr_cts			= UART01x_FR_CTS,
++	.fr_ri			= UART011_FR_RI,
++	.oversampling		= true,
++	.dma_threshold		= true,
++	.cts_event_workaround	= true,
++	.always_enabled		= false,
++	.fixed_options		= false,
++	.get_fifosize		= get_fifosize_st,
++};
++
++static const u16 pl011_zte_offsets[REG_ARRAY_SIZE] = {
++	[REG_DR] = ZX_UART011_DR,
++	[REG_FR] = ZX_UART011_FR,
++	[REG_LCRH_RX] = ZX_UART011_LCRH,
++	[REG_LCRH_TX] = ZX_UART011_LCRH,
++	[REG_IBRD] = ZX_UART011_IBRD,
++	[REG_FBRD] = ZX_UART011_FBRD,
++	[REG_CR] = ZX_UART011_CR,
++	[REG_IFLS] = ZX_UART011_IFLS,
++	[REG_IMSC] = ZX_UART011_IMSC,
++	[REG_RIS] = ZX_UART011_RIS,
++	[REG_MIS] = ZX_UART011_MIS,
++	[REG_ICR] = ZX_UART011_ICR,
++	[REG_DMACR] = ZX_UART011_DMACR,
++};
++
++static unsigned int get_fifosize_zte(struct amba_device *dev)
++{
++	return 16;
++}
++
++static struct vendor_data vendor_zte = {
++	.reg_offset		= pl011_zte_offsets,
++	.access_32b		= true,
++	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
++	.fr_busy		= ZX_UART01x_FR_BUSY,
++	.fr_dsr			= ZX_UART01x_FR_DSR,
++	.fr_cts			= ZX_UART01x_FR_CTS,
++	.fr_ri			= ZX_UART011_FR_RI,
++	.get_fifosize		= get_fifosize_zte,
++};
++
++/* Deals with DMA transactions */
++
++struct pl011_sgbuf {
++	struct scatterlist sg;
++	char *buf;
++};
++
++struct pl011_dmarx_data {
++	struct dma_chan		*chan;
++	struct completion	complete;
++	bool			use_buf_b;
++	struct pl011_sgbuf	sgbuf_a;
++	struct pl011_sgbuf	sgbuf_b;
++	dma_cookie_t		cookie;
++	bool			running;
++	struct timer_list	timer;
++	unsigned int last_residue;
++	unsigned long last_jiffies;
++	bool auto_poll_rate;
++	unsigned int poll_rate;
++	unsigned int poll_timeout;
++};
++
++struct pl011_dmatx_data {
++	struct dma_chan		*chan;
++	struct scatterlist	sg;
++	char			*buf;
++	bool			queued;
++};
++
++/*
++ * We wrap our port structure around the generic uart_port.
++ */
++struct uart_amba_port {
++	struct uart_port	port;
++	const u16		*reg_offset;
++	struct clk		*clk;
++	const struct vendor_data *vendor;
++	unsigned int		dmacr;		/* dma control reg */
++	unsigned int		im;		/* interrupt mask */
++	unsigned int		old_status;
++	unsigned int		fifosize;	/* vendor-specific */
++	unsigned int		old_cr;		/* state during shutdown */
++	unsigned int		fixed_baud;	/* vendor-set fixed baud rate */
++	char			type[12];
++#ifdef CONFIG_DMA_ENGINE
++	/* DMA stuff */
++	bool			using_tx_dma;
++	bool			using_rx_dma;
++	struct pl011_dmarx_data dmarx;
++	struct pl011_dmatx_data	dmatx;
++	bool			dma_probed;
++#endif
++};
++
++static unsigned int pl011_reg_to_offset(const struct uart_amba_port *uap,
++	unsigned int reg)
++{
++	return uap->reg_offset[reg];
++}
++
++static unsigned int pl011_read(const struct uart_amba_port *uap,
++	unsigned int reg)
++{
++	void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg);
++
++	return (uap->port.iotype == UPIO_MEM32) ?
++		readl_relaxed(addr) : readw_relaxed(addr);
++}
++
++static void pl011_write(unsigned int val, const struct uart_amba_port *uap,
++	unsigned int reg)
++{
++	void __iomem *addr = uap->port.membase + pl011_reg_to_offset(uap, reg);
++
++	if (uap->port.iotype == UPIO_MEM32)
++		writel_relaxed(val, addr);
++	else
++		writew_relaxed(val, addr);
++}
++
++/*
++ * Reads up to 256 characters from the FIFO or until it's empty and
++ * inserts them into the TTY layer. Returns the number of characters
++ * read from the FIFO.
++ */
++static int pl011_fifo_to_tty(struct uart_amba_port *uap)
++{
++	u16 status;
++	unsigned int ch, flag, fifotaken;
++
++	for (fifotaken = 0; fifotaken != 256; fifotaken++) {
++		status = pl011_read(uap, REG_FR);
++		if (status & UART01x_FR_RXFE)
++			break;
++
++		/* Take chars from the FIFO and update status */
++		ch = pl011_read(uap, REG_DR) | UART_DUMMY_DR_RX;
++		flag = TTY_NORMAL;
++		uap->port.icount.rx++;
++
++		if (unlikely(ch & UART_DR_ERROR)) {
++			if (ch & UART011_DR_BE) {
++				ch &= ~(UART011_DR_FE | UART011_DR_PE);
++				uap->port.icount.brk++;
++				if (uart_handle_break(&uap->port))
++					continue;
++			} else if (ch & UART011_DR_PE)
++				uap->port.icount.parity++;
++			else if (ch & UART011_DR_FE)
++				uap->port.icount.frame++;
++			if (ch & UART011_DR_OE)
++				uap->port.icount.overrun++;
++
++			ch &= uap->port.read_status_mask;
++
++			if (ch & UART011_DR_BE)
++				flag = TTY_BREAK;
++			else if (ch & UART011_DR_PE)
++				flag = TTY_PARITY;
++			else if (ch & UART011_DR_FE)
++				flag = TTY_FRAME;
++		}
++
++		if (uart_handle_sysrq_char(&uap->port, ch & 255))
++			continue;
++
++		uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
++	}
++
++	return fifotaken;
++}
++
++
++/*
++ * All the DMA operation mode stuff goes inside this ifdef.
++ * This assumes that you have a generic DMA device interface,
++ * no custom DMA interfaces are supported.
++ */
++#ifdef CONFIG_DMA_ENGINE
++
++#define PL011_DMA_BUFFER_SIZE PAGE_SIZE
++
++static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg,
++	enum dma_data_direction dir)
++{
++	dma_addr_t dma_addr;
++
++	sg->buf = dma_alloc_coherent(chan->device->dev,
++		PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL);
++	if (!sg->buf)
++		return -ENOMEM;
++
++	sg_init_table(&sg->sg, 1);
++	sg_set_page(&sg->sg, phys_to_page(dma_addr),
++		PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr));
++	sg_dma_address(&sg->sg) = dma_addr;
++	sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE;
++
++	return 0;
++}
++
++static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg,
++	enum dma_data_direction dir)
++{
++	if (sg->buf) {
++		dma_free_coherent(chan->device->dev,
++			PL011_DMA_BUFFER_SIZE, sg->buf,
++			sg_dma_address(&sg->sg));
++	}
++}
++
++static void pl011_dma_probe(struct uart_amba_port *uap)
++{
++	/* DMA is the sole user of the platform data right now */
++	struct amba_pl011_data *plat = dev_get_platdata(uap->port.dev);
++	struct device *dev = uap->port.dev;
++	struct dma_slave_config tx_conf = {
++		.dst_addr = uap->port.mapbase +
++				 pl011_reg_to_offset(uap, REG_DR),
++		.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
++		.direction = DMA_MEM_TO_DEV,
++		.dst_maxburst = uap->fifosize >> 1,
++		.device_fc = false,
++	};
++	struct dma_chan *chan;
++	dma_cap_mask_t mask;
++
++	uap->dma_probed = true;
++	chan = dma_request_slave_channel_reason(dev, "tx");
++	if (IS_ERR(chan)) {
++		if (PTR_ERR(chan) == -EPROBE_DEFER) {
++			uap->dma_probed = false;
++			return;
++		}
++
++		/* We need platform data */
++		if (!plat || !plat->dma_filter) {
++			dev_info(uap->port.dev, "no DMA platform data\n");
++			return;
++		}
++
++		/* Try to acquire a generic DMA engine slave TX channel */
++		dma_cap_zero(mask);
++		dma_cap_set(DMA_SLAVE, mask);
++
++		chan = dma_request_channel(mask, plat->dma_filter,
++						plat->dma_tx_param);
++		if (!chan) {
++			dev_err(uap->port.dev, "no TX DMA channel!\n");
++			return;
++		}
++	}
++
++	dmaengine_slave_config(chan, &tx_conf);
++	uap->dmatx.chan = chan;
++
++	dev_info(uap->port.dev, "DMA channel TX %s\n",
++		 dma_chan_name(uap->dmatx.chan));
++
++	/* Optionally make use of an RX channel as well */
++	chan = dma_request_slave_channel(dev, "rx");
++
++	if (!chan && plat && plat->dma_rx_param) {
++		chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param);
++
++		if (!chan) {
++			dev_err(uap->port.dev, "no RX DMA channel!\n");
++			return;
++		}
++	}
++
++	if (chan) {
++		struct dma_slave_config rx_conf = {
++			.src_addr = uap->port.mapbase +
++				pl011_reg_to_offset(uap, REG_DR),
++			.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
++			.direction = DMA_DEV_TO_MEM,
++			.src_maxburst = uap->fifosize >> 2,
++			.device_fc = false,
++		};
++		struct dma_slave_caps caps;
++
++		/*
++		 * Some DMA controllers provide information on their capabilities.
++		 * If the controller does, check for suitable residue processing
++		 * otherwise assime all is well.
++		 */
++		if (0 == dma_get_slave_caps(chan, &caps)) {
++			if (caps.residue_granularity ==
++					DMA_RESIDUE_GRANULARITY_DESCRIPTOR) {
++				dma_release_channel(chan);
++				dev_info(uap->port.dev,
++					"RX DMA disabled - no residue processing\n");
++				return;
++			}
++		}
++		dmaengine_slave_config(chan, &rx_conf);
++		uap->dmarx.chan = chan;
++
++		uap->dmarx.auto_poll_rate = false;
++		if (plat && plat->dma_rx_poll_enable) {
++			/* Set poll rate if specified. */
++			if (plat->dma_rx_poll_rate) {
++				uap->dmarx.auto_poll_rate = false;
++				uap->dmarx.poll_rate = plat->dma_rx_poll_rate;
++			} else {
++				/*
++				 * 100 ms defaults to poll rate if not
++				 * specified. This will be adjusted with
++				 * the baud rate at set_termios.
++				 */
++				uap->dmarx.auto_poll_rate = true;
++				uap->dmarx.poll_rate =  100;
++			}
++			/* 3 secs defaults poll_timeout if not specified. */
++			if (plat->dma_rx_poll_timeout)
++				uap->dmarx.poll_timeout =
++					plat->dma_rx_poll_timeout;
++			else
++				uap->dmarx.poll_timeout = 3000;
++		} else if (!plat && dev->of_node) {
++			uap->dmarx.auto_poll_rate = of_property_read_bool(
++						dev->of_node, "auto-poll");
++			if (uap->dmarx.auto_poll_rate) {
++				u32 x;
++
++				if (0 == of_property_read_u32(dev->of_node,
++						"poll-rate-ms", &x))
++					uap->dmarx.poll_rate = x;
++				else
++					uap->dmarx.poll_rate = 100;
++				if (0 == of_property_read_u32(dev->of_node,
++						"poll-timeout-ms", &x))
++					uap->dmarx.poll_timeout = x;
++				else
++					uap->dmarx.poll_timeout = 3000;
++			}
++		}
++		dev_info(uap->port.dev, "DMA channel RX %s\n",
++			 dma_chan_name(uap->dmarx.chan));
++	}
++}
++
++static void pl011_dma_remove(struct uart_amba_port *uap)
++{
++	if (uap->dmatx.chan)
++		dma_release_channel(uap->dmatx.chan);
++	if (uap->dmarx.chan)
++		dma_release_channel(uap->dmarx.chan);
++}
++
++/* Forward declare these for the refill routine */
++static int pl011_dma_tx_refill(struct uart_amba_port *uap);
++static void pl011_start_tx_pio(struct uart_amba_port *uap);
++
++/*
++ * The current DMA TX buffer has been sent.
++ * Try to queue up another DMA buffer.
++ */
++static void pl011_dma_tx_callback(void *data)
++{
++	struct uart_amba_port *uap = data;
++	struct pl011_dmatx_data *dmatx = &uap->dmatx;
++	unsigned long flags;
++	u16 dmacr;
++
++	spin_lock_irqsave(&uap->port.lock, flags);
++	if (uap->dmatx.queued)
++		dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1,
++			     DMA_TO_DEVICE);
++
++	dmacr = uap->dmacr;
++	uap->dmacr = dmacr & ~UART011_TXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++
++	/*
++	 * If TX DMA was disabled, it means that we've stopped the DMA for
++	 * some reason (eg, XOFF received, or we want to send an X-char.)
++	 *
++	 * Note: we need to be careful here of a potential race between DMA
++	 * and the rest of the driver - if the driver disables TX DMA while
++	 * a TX buffer completing, we must update the tx queued status to
++	 * get further refills (hence we check dmacr).
++	 */
++	if (!(dmacr & UART011_TXDMAE) || uart_tx_stopped(&uap->port) ||
++	    uart_circ_empty(&uap->port.state->xmit)) {
++		uap->dmatx.queued = false;
++		spin_unlock_irqrestore(&uap->port.lock, flags);
++		return;
++	}
++
++	if (pl011_dma_tx_refill(uap) <= 0)
++		/*
++		 * We didn't queue a DMA buffer for some reason, but we
++		 * have data pending to be sent.  Re-enable the TX IRQ.
++		 */
++		pl011_start_tx_pio(uap);
++
++	spin_unlock_irqrestore(&uap->port.lock, flags);
++}
++
++/*
++ * Try to refill the TX DMA buffer.
++ * Locking: called with port lock held and IRQs disabled.
++ * Returns:
++ *   1 if we queued up a TX DMA buffer.
++ *   0 if we didn't want to handle this by DMA
++ *  <0 on error
++ */
++static int pl011_dma_tx_refill(struct uart_amba_port *uap)
++{
++	struct pl011_dmatx_data *dmatx = &uap->dmatx;
++	struct dma_chan *chan = dmatx->chan;
++	struct dma_device *dma_dev = chan->device;
++	struct dma_async_tx_descriptor *desc;
++	struct circ_buf *xmit = &uap->port.state->xmit;
++	unsigned int count;
++
++	/*
++	 * Try to avoid the overhead involved in using DMA if the
++	 * transaction fits in the first half of the FIFO, by using
++	 * the standard interrupt handling.  This ensures that we
++	 * issue a uart_write_wakeup() at the appropriate time.
++	 */
++	count = uart_circ_chars_pending(xmit);
++	if (count < (uap->fifosize >> 1)) {
++		uap->dmatx.queued = false;
++		return 0;
++	}
++
++	/*
++	 * Bodge: don't send the last character by DMA, as this
++	 * will prevent XON from notifying us to restart DMA.
++	 */
++	count -= 1;
++
++	/* Else proceed to copy the TX chars to the DMA buffer and fire DMA */
++	if (count > PL011_DMA_BUFFER_SIZE)
++		count = PL011_DMA_BUFFER_SIZE;
++
++	if (xmit->tail < xmit->head)
++		memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], count);
++	else {
++		size_t first = UART_XMIT_SIZE - xmit->tail;
++		size_t second;
++
++		if (first > count)
++			first = count;
++		second = count - first;
++
++		memcpy(&dmatx->buf[0], &xmit->buf[xmit->tail], first);
++		if (second)
++			memcpy(&dmatx->buf[first], &xmit->buf[0], second);
++	}
++
++	dmatx->sg.length = count;
++
++	if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) {
++		uap->dmatx.queued = false;
++		dev_dbg(uap->port.dev, "unable to map TX DMA\n");
++		return -EBUSY;
++	}
++
++	desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV,
++					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
++	if (!desc) {
++		dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE);
++		uap->dmatx.queued = false;
++		/*
++		 * If DMA cannot be used right now, we complete this
++		 * transaction via IRQ and let the TTY layer retry.
++		 */
++		dev_dbg(uap->port.dev, "TX DMA busy\n");
++		return -EBUSY;
++	}
++
++	/* Some data to go along to the callback */
++	desc->callback = pl011_dma_tx_callback;
++	desc->callback_param = uap;
++
++	/* All errors should happen at prepare time */
++	dmaengine_submit(desc);
++
++	/* Fire the DMA transaction */
++	dma_dev->device_issue_pending(chan);
++
++	uap->dmacr |= UART011_TXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++	uap->dmatx.queued = true;
++
++	/*
++	 * Now we know that DMA will fire, so advance the ring buffer
++	 * with the stuff we just dispatched.
++	 */
++	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++	uap->port.icount.tx += count;
++
++	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
++		uart_write_wakeup(&uap->port);
++
++	return 1;
++}
++
++/*
++ * We received a transmit interrupt without a pending X-char but with
++ * pending characters.
++ * Locking: called with port lock held and IRQs disabled.
++ * Returns:
++ *   false if we want to use PIO to transmit
++ *   true if we queued a DMA buffer
++ */
++static bool pl011_dma_tx_irq(struct uart_amba_port *uap)
++{
++	if (!uap->using_tx_dma)
++		return false;
++
++	/*
++	 * If we already have a TX buffer queued, but received a
++	 * TX interrupt, it will be because we've just sent an X-char.
++	 * Ensure the TX DMA is enabled and the TX IRQ is disabled.
++	 */
++	if (uap->dmatx.queued) {
++		uap->dmacr |= UART011_TXDMAE;
++		pl011_write(uap->dmacr, uap, REG_DMACR);
++		uap->im &= ~UART011_TXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++		return true;
++	}
++
++	/*
++	 * We don't have a TX buffer queued, so try to queue one.
++	 * If we successfully queued a buffer, mask the TX IRQ.
++	 */
++	if (pl011_dma_tx_refill(uap) > 0) {
++		uap->im &= ~UART011_TXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++		return true;
++	}
++	return false;
++}
++
++/*
++ * Stop the DMA transmit (eg, due to received XOFF).
++ * Locking: called with port lock held and IRQs disabled.
++ */
++static inline void pl011_dma_tx_stop(struct uart_amba_port *uap)
++{
++	if (uap->dmatx.queued) {
++		uap->dmacr &= ~UART011_TXDMAE;
++		pl011_write(uap->dmacr, uap, REG_DMACR);
++	}
++}
++
++/*
++ * Try to start a DMA transmit, or in the case of an XON/OFF
++ * character queued for send, try to get that character out ASAP.
++ * Locking: called with port lock held and IRQs disabled.
++ * Returns:
++ *   false if we want the TX IRQ to be enabled
++ *   true if we have a buffer queued
++ */
++static inline bool pl011_dma_tx_start(struct uart_amba_port *uap)
++{
++	u16 dmacr;
++
++	if (!uap->using_tx_dma)
++		return false;
++
++	if (!uap->port.x_char) {
++		/* no X-char, try to push chars out in DMA mode */
++		bool ret = true;
++
++		if (!uap->dmatx.queued) {
++			if (pl011_dma_tx_refill(uap) > 0) {
++				uap->im &= ~UART011_TXIM;
++				pl011_write(uap->im, uap, REG_IMSC);
++			} else
++				ret = false;
++		} else if (!(uap->dmacr & UART011_TXDMAE)) {
++			uap->dmacr |= UART011_TXDMAE;
++			pl011_write(uap->dmacr, uap, REG_DMACR);
++		}
++		return ret;
++	}
++
++	/*
++	 * We have an X-char to send.  Disable DMA to prevent it loading
++	 * the TX fifo, and then see if we can stuff it into the FIFO.
++	 */
++	dmacr = uap->dmacr;
++	uap->dmacr &= ~UART011_TXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++
++	if (pl011_read(uap, REG_FR) & UART01x_FR_TXFF) {
++		/*
++		 * No space in the FIFO, so enable the transmit interrupt
++		 * so we know when there is space.  Note that once we've
++		 * loaded the character, we should just re-enable DMA.
++		 */
++		return false;
++	}
++
++	pl011_write(uap->port.x_char, uap, REG_DR);
++	uap->port.icount.tx++;
++	uap->port.x_char = 0;
++
++	/* Success - restore the DMA state */
++	uap->dmacr = dmacr;
++	pl011_write(dmacr, uap, REG_DMACR);
++
++	return true;
++}
++
++/*
++ * Flush the transmit buffer.
++ * Locking: called with port lock held and IRQs disabled.
++ */
++static void pl011_dma_flush_buffer(struct uart_port *port)
++__releases(&uap->port.lock)
++__acquires(&uap->port.lock)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	if (!uap->using_tx_dma)
++		return;
++
++	dmaengine_terminate_async(uap->dmatx.chan);
++
++	if (uap->dmatx.queued) {
++		dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
++			     DMA_TO_DEVICE);
++		uap->dmatx.queued = false;
++		uap->dmacr &= ~UART011_TXDMAE;
++		pl011_write(uap->dmacr, uap, REG_DMACR);
++	}
++}
++
++static void pl011_dma_rx_callback(void *data);
++
++static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
++{
++	struct dma_chan *rxchan = uap->dmarx.chan;
++	struct pl011_dmarx_data *dmarx = &uap->dmarx;
++	struct dma_async_tx_descriptor *desc;
++	struct pl011_sgbuf *sgbuf;
++
++	if (!rxchan)
++		return -EIO;
++
++	/* Start the RX DMA job */
++	sgbuf = uap->dmarx.use_buf_b ?
++		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
++	desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1,
++					DMA_DEV_TO_MEM,
++					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
++	/*
++	 * If the DMA engine is busy and cannot prepare a
++	 * channel, no big deal, the driver will fall back
++	 * to interrupt mode as a result of this error code.
++	 */
++	if (!desc) {
++		uap->dmarx.running = false;
++		dmaengine_terminate_all(rxchan);
++		return -EBUSY;
++	}
++
++	/* Some data to go along to the callback */
++	desc->callback = pl011_dma_rx_callback;
++	desc->callback_param = uap;
++	dmarx->cookie = dmaengine_submit(desc);
++	dma_async_issue_pending(rxchan);
++
++	uap->dmacr |= UART011_RXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++	uap->dmarx.running = true;
++
++	uap->im &= ~UART011_RXIM;
++	pl011_write(uap->im, uap, REG_IMSC);
++
++	return 0;
++}
++
++/*
++ * This is called when either the DMA job is complete, or
++ * the FIFO timeout interrupt occurred. This must be called
++ * with the port spinlock uap->port.lock held.
++ */
++static void pl011_dma_rx_chars(struct uart_amba_port *uap,
++			       u32 pending, bool use_buf_b,
++			       bool readfifo)
++{
++	struct tty_port *port = &uap->port.state->port;
++	struct pl011_sgbuf *sgbuf = use_buf_b ?
++		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
++	int dma_count = 0;
++	u32 fifotaken = 0; /* only used for vdbg() */
++
++	struct pl011_dmarx_data *dmarx = &uap->dmarx;
++	int dmataken = 0;
++
++	if (uap->dmarx.poll_rate) {
++		/* The data can be taken by polling */
++		dmataken = sgbuf->sg.length - dmarx->last_residue;
++		/* Recalculate the pending size */
++		if (pending >= dmataken)
++			pending -= dmataken;
++	}
++
++	/* Pick the remain data from the DMA */
++	if (pending) {
++
++		/*
++		 * First take all chars in the DMA pipe, then look in the FIFO.
++		 * Note that tty_insert_flip_buf() tries to take as many chars
++		 * as it can.
++		 */
++		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
++				pending);
++
++		uap->port.icount.rx += dma_count;
++		if (dma_count < pending)
++			dev_warn(uap->port.dev,
++				 "couldn't insert all characters (TTY is full?)\n");
++	}
++
++	/* Reset the last_residue for Rx DMA poll */
++	if (uap->dmarx.poll_rate)
++		dmarx->last_residue = sgbuf->sg.length;
++
++	/*
++	 * Only continue with trying to read the FIFO if all DMA chars have
++	 * been taken first.
++	 */
++	if (dma_count == pending && readfifo) {
++		/* Clear any error flags */
++		pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS |
++			    UART011_FEIS, uap, REG_ICR);
++
++		/*
++		 * If we read all the DMA'd characters, and we had an
++		 * incomplete buffer, that could be due to an rx error, or
++		 * maybe we just timed out. Read any pending chars and check
++		 * the error status.
++		 *
++		 * Error conditions will only occur in the FIFO, these will
++		 * trigger an immediate interrupt and stop the DMA job, so we
++		 * will always find the error in the FIFO, never in the DMA
++		 * buffer.
++		 */
++		fifotaken = pl011_fifo_to_tty(uap);
++	}
++
++	spin_unlock(&uap->port.lock);
++	dev_vdbg(uap->port.dev,
++		 "Took %d chars from DMA buffer and %d chars from the FIFO\n",
++		 dma_count, fifotaken);
++	tty_flip_buffer_push(port);
++	spin_lock(&uap->port.lock);
++}
++
++static void pl011_dma_rx_irq(struct uart_amba_port *uap)
++{
++	struct pl011_dmarx_data *dmarx = &uap->dmarx;
++	struct dma_chan *rxchan = dmarx->chan;
++	struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ?
++		&dmarx->sgbuf_b : &dmarx->sgbuf_a;
++	size_t pending;
++	struct dma_tx_state state;
++	enum dma_status dmastat;
++
++	/*
++	 * Pause the transfer so we can trust the current counter,
++	 * do this before we pause the PL011 block, else we may
++	 * overflow the FIFO.
++	 */
++	if (dmaengine_pause(rxchan))
++		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
++	dmastat = rxchan->device->device_tx_status(rxchan,
++						   dmarx->cookie, &state);
++	if (dmastat != DMA_PAUSED)
++		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
++
++	/* Disable RX DMA - incoming data will wait in the FIFO */
++	uap->dmacr &= ~UART011_RXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++	uap->dmarx.running = false;
++
++	pending = sgbuf->sg.length - state.residue;
++	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
++	/* Then we terminate the transfer - we now know our residue */
++	dmaengine_terminate_all(rxchan);
++
++	/*
++	 * This will take the chars we have so far and insert
++	 * into the framework.
++	 */
++	pl011_dma_rx_chars(uap, pending, dmarx->use_buf_b, true);
++
++	/* Switch buffer & re-trigger DMA job */
++	dmarx->use_buf_b = !dmarx->use_buf_b;
++	if (pl011_dma_rx_trigger_dma(uap)) {
++		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
++			"fall back to interrupt mode\n");
++		uap->im |= UART011_RXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++	}
++}
++
++static void pl011_dma_rx_callback(void *data)
++{
++	struct uart_amba_port *uap = data;
++	struct pl011_dmarx_data *dmarx = &uap->dmarx;
++	struct dma_chan *rxchan = dmarx->chan;
++	bool lastbuf = dmarx->use_buf_b;
++	struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ?
++		&dmarx->sgbuf_b : &dmarx->sgbuf_a;
++	size_t pending;
++	struct dma_tx_state state;
++	int ret;
++
++	/*
++	 * This completion interrupt occurs typically when the
++	 * RX buffer is totally stuffed but no timeout has yet
++	 * occurred. When that happens, we just want the RX
++	 * routine to flush out the secondary DMA buffer while
++	 * we immediately trigger the next DMA job.
++	 */
++	spin_lock_irq(&uap->port.lock);
++	/*
++	 * Rx data can be taken by the UART interrupts during
++	 * the DMA irq handler. So we check the residue here.
++	 */
++	rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state);
++	pending = sgbuf->sg.length - state.residue;
++	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
++	/* Then we terminate the transfer - we now know our residue */
++	dmaengine_terminate_all(rxchan);
++
++	uap->dmarx.running = false;
++	dmarx->use_buf_b = !lastbuf;
++	ret = pl011_dma_rx_trigger_dma(uap);
++
++	pl011_dma_rx_chars(uap, pending, lastbuf, false);
++	spin_unlock_irq(&uap->port.lock);
++	/*
++	 * Do this check after we picked the DMA chars so we don't
++	 * get some IRQ immediately from RX.
++	 */
++	if (ret) {
++		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
++			"fall back to interrupt mode\n");
++		uap->im |= UART011_RXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++	}
++}
++
++/*
++ * Stop accepting received characters, when we're shutting down or
++ * suspending this port.
++ * Locking: called with port lock held and IRQs disabled.
++ */
++static inline void pl011_dma_rx_stop(struct uart_amba_port *uap)
++{
++	/* FIXME.  Just disable the DMA enable */
++	uap->dmacr &= ~UART011_RXDMAE;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++}
++
++/*
++ * Timer handler for Rx DMA polling.
++ * Every polling, It checks the residue in the dma buffer and transfer
++ * data to the tty. Also, last_residue is updated for the next polling.
++ */
++static void pl011_dma_rx_poll(struct timer_list *t)
++{
++	struct uart_amba_port *uap = from_timer(uap, t, dmarx.timer);
++	struct tty_port *port = &uap->port.state->port;
++	struct pl011_dmarx_data *dmarx = &uap->dmarx;
++	struct dma_chan *rxchan = uap->dmarx.chan;
++	unsigned long flags = 0;
++	unsigned int dmataken = 0;
++	unsigned int size = 0;
++	struct pl011_sgbuf *sgbuf;
++	int dma_count;
++	struct dma_tx_state state;
++
++	sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
++	rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state);
++	if (likely(state.residue < dmarx->last_residue)) {
++		dmataken = sgbuf->sg.length - dmarx->last_residue;
++		size = dmarx->last_residue - state.residue;
++		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
++				size);
++		if (dma_count == size)
++			dmarx->last_residue =  state.residue;
++		dmarx->last_jiffies = jiffies;
++	}
++	tty_flip_buffer_push(port);
++
++	/*
++	 * If no data is received in poll_timeout, the driver will fall back
++	 * to interrupt mode. We will retrigger DMA at the first interrupt.
++	 */
++	if (jiffies_to_msecs(jiffies - dmarx->last_jiffies)
++			> uap->dmarx.poll_timeout) {
++
++		spin_lock_irqsave(&uap->port.lock, flags);
++		pl011_dma_rx_stop(uap);
++		uap->im |= UART011_RXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++		spin_unlock_irqrestore(&uap->port.lock, flags);
++
++		uap->dmarx.running = false;
++		dmaengine_terminate_all(rxchan);
++		del_timer(&uap->dmarx.timer);
++	} else {
++		mod_timer(&uap->dmarx.timer,
++			jiffies + msecs_to_jiffies(uap->dmarx.poll_rate));
++	}
++}
++
++static void pl011_dma_startup(struct uart_amba_port *uap)
++{
++	int ret;
++
++	if (!uap->dma_probed)
++		pl011_dma_probe(uap);
++
++	if (!uap->dmatx.chan)
++		return;
++
++	uap->dmatx.buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL | __GFP_DMA);
++	if (!uap->dmatx.buf) {
++		dev_err(uap->port.dev, "no memory for DMA TX buffer\n");
++		uap->port.fifosize = uap->fifosize;
++		return;
++	}
++
++	sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE);
++
++	/* The DMA buffer is now the FIFO the TTY subsystem can use */
++	uap->port.fifosize = PL011_DMA_BUFFER_SIZE;
++	uap->using_tx_dma = true;
++
++	if (!uap->dmarx.chan)
++		goto skip_rx;
++
++	/* Allocate and map DMA RX buffers */
++	ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a,
++			       DMA_FROM_DEVICE);
++	if (ret) {
++		dev_err(uap->port.dev, "failed to init DMA %s: %d\n",
++			"RX buffer A", ret);
++		goto skip_rx;
++	}
++
++	ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b,
++			       DMA_FROM_DEVICE);
++	if (ret) {
++		dev_err(uap->port.dev, "failed to init DMA %s: %d\n",
++			"RX buffer B", ret);
++		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a,
++				 DMA_FROM_DEVICE);
++		goto skip_rx;
++	}
++
++	uap->using_rx_dma = true;
++
++skip_rx:
++	/* Turn on DMA error (RX/TX will be enabled on demand) */
++	uap->dmacr |= UART011_DMAONERR;
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++
++	/*
++	 * ST Micro variants has some specific dma burst threshold
++	 * compensation. Set this to 16 bytes, so burst will only
++	 * be issued above/below 16 bytes.
++	 */
++	if (uap->vendor->dma_threshold)
++		pl011_write(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16,
++			    uap, REG_ST_DMAWM);
++
++	if (uap->using_rx_dma) {
++		if (pl011_dma_rx_trigger_dma(uap))
++			dev_dbg(uap->port.dev, "could not trigger initial "
++				"RX DMA job, fall back to interrupt mode\n");
++		if (uap->dmarx.poll_rate) {
++			timer_setup(&uap->dmarx.timer, pl011_dma_rx_poll, 0);
++			mod_timer(&uap->dmarx.timer,
++				jiffies +
++				msecs_to_jiffies(uap->dmarx.poll_rate));
++			uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE;
++			uap->dmarx.last_jiffies = jiffies;
++		}
++	}
++}
++
++static void pl011_dma_shutdown(struct uart_amba_port *uap)
++{
++	if (!(uap->using_tx_dma || uap->using_rx_dma))
++		return;
++
++	/* Disable RX and TX DMA */
++	while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy)
++		cpu_relax();
++
++	spin_lock_irq(&uap->port.lock);
++	uap->dmacr &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE);
++	pl011_write(uap->dmacr, uap, REG_DMACR);
++	spin_unlock_irq(&uap->port.lock);
++
++	if (uap->using_tx_dma) {
++		/* In theory, this should already be done by pl011_dma_flush_buffer */
++		dmaengine_terminate_all(uap->dmatx.chan);
++		if (uap->dmatx.queued) {
++			dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1,
++				     DMA_TO_DEVICE);
++			uap->dmatx.queued = false;
++		}
++
++		kfree(uap->dmatx.buf);
++		uap->using_tx_dma = false;
++	}
++
++	if (uap->using_rx_dma) {
++		dmaengine_terminate_all(uap->dmarx.chan);
++		/* Clean up the RX DMA */
++		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE);
++		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE);
++		if (uap->dmarx.poll_rate)
++			del_timer_sync(&uap->dmarx.timer);
++		uap->using_rx_dma = false;
++	}
++}
++
++static inline bool pl011_dma_rx_available(struct uart_amba_port *uap)
++{
++	return uap->using_rx_dma;
++}
++
++static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
++{
++	return uap->using_rx_dma && uap->dmarx.running;
++}
++
++#else
++/* Blank functions if the DMA engine is not available */
++static inline void pl011_dma_probe(struct uart_amba_port *uap)
++{
++}
++
++static inline void pl011_dma_remove(struct uart_amba_port *uap)
++{
++}
++
++static inline void pl011_dma_startup(struct uart_amba_port *uap)
++{
++}
++
++static inline void pl011_dma_shutdown(struct uart_amba_port *uap)
++{
++}
++
++static inline bool pl011_dma_tx_irq(struct uart_amba_port *uap)
++{
++	return false;
++}
++
++static inline void pl011_dma_tx_stop(struct uart_amba_port *uap)
++{
++}
++
++static inline bool pl011_dma_tx_start(struct uart_amba_port *uap)
++{
++	return false;
++}
++
++static inline void pl011_dma_rx_irq(struct uart_amba_port *uap)
++{
++}
++
++static inline void pl011_dma_rx_stop(struct uart_amba_port *uap)
++{
++}
++
++static inline int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
++{
++	return -EIO;
++}
++
++static inline bool pl011_dma_rx_available(struct uart_amba_port *uap)
++{
++	return false;
++}
++
++static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
++{
++	return false;
++}
++
++#define pl011_dma_flush_buffer	NULL
++#endif
++
++static void pl011_stop_tx(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	uap->im &= ~UART011_TXIM;
++	pl011_write(uap->im, uap, REG_IMSC);
++	pl011_dma_tx_stop(uap);
++}
++
++static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq);
++
++/* Start TX with programmed I/O only (no DMA) */
++static void pl011_start_tx_pio(struct uart_amba_port *uap)
++{
++	if (pl011_tx_chars(uap, false)) {
++		uap->im |= UART011_TXIM;
++		pl011_write(uap->im, uap, REG_IMSC);
++	}
++}
++
++static void pl011_start_tx(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	if (!pl011_dma_tx_start(uap))
++		pl011_start_tx_pio(uap);
++}
++
++static void pl011_stop_rx(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	uap->im &= ~(UART011_RXIM|UART011_RTIM|UART011_FEIM|
++		     UART011_PEIM|UART011_BEIM|UART011_OEIM);
++	pl011_write(uap->im, uap, REG_IMSC);
++
++	pl011_dma_rx_stop(uap);
++}
++
++static void pl011_enable_ms(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	uap->im |= UART011_RIMIM|UART011_CTSMIM|UART011_DCDMIM|UART011_DSRMIM;
++	pl011_write(uap->im, uap, REG_IMSC);
++}
++
++static void pl011_rx_chars(struct uart_amba_port *uap)
++__releases(&uap->port.lock)
++__acquires(&uap->port.lock)
++{
++	pl011_fifo_to_tty(uap);
++
++	spin_unlock(&uap->port.lock);
++	tty_flip_buffer_push(&uap->port.state->port);
++	/*
++	 * If we were temporarily out of DMA mode for a while,
++	 * attempt to switch back to DMA mode again.
++	 */
++	if (pl011_dma_rx_available(uap)) {
++		if (pl011_dma_rx_trigger_dma(uap)) {
++			dev_dbg(uap->port.dev, "could not trigger RX DMA job "
++				"fall back to interrupt mode again\n");
++			uap->im |= UART011_RXIM;
++			pl011_write(uap->im, uap, REG_IMSC);
++		} else {
++#ifdef CONFIG_DMA_ENGINE
++			/* Start Rx DMA poll */
++			if (uap->dmarx.poll_rate) {
++				uap->dmarx.last_jiffies = jiffies;
++				uap->dmarx.last_residue	= PL011_DMA_BUFFER_SIZE;
++				mod_timer(&uap->dmarx.timer,
++					jiffies +
++					msecs_to_jiffies(uap->dmarx.poll_rate));
++			}
++#endif
++		}
++	}
++	spin_lock(&uap->port.lock);
++}
++
++static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c,
++			  bool from_irq)
++{
++	if (unlikely(!from_irq) &&
++	    pl011_read(uap, REG_FR) & UART01x_FR_TXFF)
++		return false; /* unable to transmit character */
++
++	pl011_write(c, uap, REG_DR);
++	uap->port.icount.tx++;
++
++	return true;
++}
++
++/* Returns true if tx interrupts have to be (kept) enabled  */
++static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq)
++{
++	struct circ_buf *xmit = &uap->port.state->xmit;
++	int count = uap->fifosize >> 1;
++
++	if (uap->port.x_char) {
++		if (!pl011_tx_char(uap, uap->port.x_char, from_irq))
++			return true;
++		uap->port.x_char = 0;
++		--count;
++	}
++	if (uart_circ_empty(xmit) || uart_tx_stopped(&uap->port)) {
++		pl011_stop_tx(&uap->port);
++		return false;
++	}
++
++	/* If we are using DMA mode, try to send some characters. */
++	if (pl011_dma_tx_irq(uap))
++		return true;
++
++	do {
++		if (likely(from_irq) && count-- == 0)
++			break;
++
++		if (!pl011_tx_char(uap, xmit->buf[xmit->tail], from_irq))
++			break;
++
++		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
++	} while (!uart_circ_empty(xmit));
++
++	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
++		uart_write_wakeup(&uap->port);
++
++	if (uart_circ_empty(xmit)) {
++		pl011_stop_tx(&uap->port);
++		return false;
++	}
++	return true;
++}
++
++static void pl011_modem_status(struct uart_amba_port *uap)
++{
++	unsigned int status, delta;
++
++	status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY;
++
++	delta = status ^ uap->old_status;
++	uap->old_status = status;
++
++	if (!delta)
++		return;
++
++	if (delta & UART01x_FR_DCD)
++		uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD);
++
++	if (delta & uap->vendor->fr_dsr)
++		uap->port.icount.dsr++;
++
++	if (delta & uap->vendor->fr_cts)
++		uart_handle_cts_change(&uap->port,
++				       status & uap->vendor->fr_cts);
++
++	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
++}
++
++static void check_apply_cts_event_workaround(struct uart_amba_port *uap)
++{
++	unsigned int dummy_read;
++
++	if (!uap->vendor->cts_event_workaround)
++		return;
++
++	/* workaround to make sure that all bits are unlocked.. */
++	pl011_write(0x00, uap, REG_ICR);
++
++	/*
++	 * WA: introduce 26ns(1 uart clk) delay before W1C;
++	 * single apb access will incur 2 pclk(133.12Mhz) delay,
++	 * so add 2 dummy reads
++	 */
++	dummy_read = pl011_read(uap, REG_ICR);
++	dummy_read = pl011_read(uap, REG_ICR);
++}
++
++#ifdef CONFIG_SERIAL_ATTACHED_MBIGEN
++struct workaround_oem_info {
++	char oem_id[ACPI_OEM_ID_SIZE + 1];
++	char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
++	u32 oem_revision;
++};
++
++static bool pl011_enable_hisi_wkrd;
++static struct workaround_oem_info pl011_wkrd_info[] = {
++	{
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x300,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x301,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x400,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x401,
++	}, {
++		.oem_id		= "HISI  ",
++		.oem_table_id	= "HIP08   ",
++		.oem_revision	= 0x402,
++	}
++};
++
++static void pl011_check_hisi_workaround(void)
++{
++	struct acpi_table_header *tbl;
++	acpi_status status = AE_OK;
++	int i;
++
++	status = acpi_get_table(ACPI_SIG_MADT, 0, &tbl);
++	if (ACPI_FAILURE(status) || !tbl)
++		return;
++
++	for (i = 0; i < ARRAY_SIZE(pl011_wkrd_info); i++) {
++		if (!memcmp(pl011_wkrd_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
++		    !memcmp(pl011_wkrd_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
++		    pl011_wkrd_info[i].oem_revision == tbl->oem_revision) {
++			pl011_enable_hisi_wkrd = true;
++			break;
++		}
++	}
++
++	acpi_put_table(tbl);
++}
++
++#else
++#define pl011_enable_hisi_wkrd	0
++static inline void pl011_check_hisi_workaround(void){ }
++#endif
++
++static irqreturn_t pl011_int(int irq, void *dev_id)
++{
++	struct uart_amba_port *uap = dev_id;
++	unsigned long flags;
++	unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT;
++	int handled = 0;
++
++	spin_lock_irqsave(&uap->port.lock, flags);
++	status = pl011_read(uap, REG_RIS) & uap->im;
++	if (status) {
++		do {
++			check_apply_cts_event_workaround(uap);
++
++			pl011_write(status & ~(UART011_TXIS|UART011_RTIS|
++					       UART011_RXIS),
++				    uap, REG_ICR);
++
++			if (status & (UART011_RTIS|UART011_RXIS)) {
++				if (pl011_dma_rx_running(uap))
++					pl011_dma_rx_irq(uap);
++				else
++					pl011_rx_chars(uap);
++			}
++			if (status & (UART011_DSRMIS|UART011_DCDMIS|
++				      UART011_CTSMIS|UART011_RIMIS))
++				pl011_modem_status(uap);
++			if (status & UART011_TXIS)
++				pl011_tx_chars(uap, true);
++
++			if (pass_counter-- == 0)
++				break;
++
++			status = pl011_read(uap, REG_RIS) & uap->im;
++		} while (status != 0);
++		handled = 1;
++	}
++
++	if (pl011_enable_hisi_wkrd) {
++		pl011_write(0, uap, REG_IMSC);
++		pl011_write(uap->im, uap, REG_IMSC);
++	}
++
++	spin_unlock_irqrestore(&uap->port.lock, flags);
++
++	return IRQ_RETVAL(handled);
++}
++
++static unsigned int pl011_tx_empty(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	/* Allow feature register bits to be inverted to work around errata */
++	unsigned int status = pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr;
++
++	return status & (uap->vendor->fr_busy | UART01x_FR_TXFF) ?
++							0 : TIOCSER_TEMT;
++}
++
++static unsigned int pl011_get_mctrl(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned int result = 0;
++	unsigned int status = pl011_read(uap, REG_FR);
++
++#define TIOCMBIT(uartbit, tiocmbit)	\
++	if (status & uartbit)		\
++		result |= tiocmbit
++
++	TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
++	TIOCMBIT(uap->vendor->fr_dsr, TIOCM_DSR);
++	TIOCMBIT(uap->vendor->fr_cts, TIOCM_CTS);
++	TIOCMBIT(uap->vendor->fr_ri, TIOCM_RNG);
++#undef TIOCMBIT
++	return result;
++}
++
++static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned int cr;
++
++	cr = pl011_read(uap, REG_CR);
++
++#define	TIOCMBIT(tiocmbit, uartbit)		\
++	if (mctrl & tiocmbit)		\
++		cr |= uartbit;		\
++	else				\
++		cr &= ~uartbit
++
++	TIOCMBIT(TIOCM_RTS, UART011_CR_RTS);
++	TIOCMBIT(TIOCM_DTR, UART011_CR_DTR);
++	TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1);
++	TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2);
++	TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE);
++
++	if (port->status & UPSTAT_AUTORTS) {
++		/* We need to disable auto-RTS if we want to turn RTS off */
++		TIOCMBIT(TIOCM_RTS, UART011_CR_RTSEN);
++	}
++#undef TIOCMBIT
++
++	pl011_write(cr, uap, REG_CR);
++}
++
++static void pl011_break_ctl(struct uart_port *port, int break_state)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned long flags;
++	unsigned int lcr_h;
++
++	spin_lock_irqsave(&uap->port.lock, flags);
++	lcr_h = pl011_read(uap, REG_LCRH_TX);
++	if (break_state == -1)
++		lcr_h |= UART01x_LCRH_BRK;
++	else
++		lcr_h &= ~UART01x_LCRH_BRK;
++	pl011_write(lcr_h, uap, REG_LCRH_TX);
++	spin_unlock_irqrestore(&uap->port.lock, flags);
++}
++
++#ifdef CONFIG_CONSOLE_POLL
++
++static void pl011_quiesce_irqs(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	pl011_write(pl011_read(uap, REG_MIS), uap, REG_ICR);
++	/*
++	 * There is no way to clear TXIM as this is "ready to transmit IRQ", so
++	 * we simply mask it. start_tx() will unmask it.
++	 *
++	 * Note we can race with start_tx(), and if the race happens, the
++	 * polling user might get another interrupt just after we clear it.
++	 * But it should be OK and can happen even w/o the race, e.g.
++	 * controller immediately got some new data and raised the IRQ.
++	 *
++	 * And whoever uses polling routines assumes that it manages the device
++	 * (including tx queue), so we're also fine with start_tx()'s caller
++	 * side.
++	 */
++	pl011_write(pl011_read(uap, REG_IMSC) & ~UART011_TXIM, uap,
++		    REG_IMSC);
++}
++
++static int pl011_get_poll_char(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned int status;
++
++	/*
++	 * The caller might need IRQs lowered, e.g. if used with KDB NMI
++	 * debugger.
++	 */
++	pl011_quiesce_irqs(port);
++
++	status = pl011_read(uap, REG_FR);
++	if (status & UART01x_FR_RXFE)
++		return NO_POLL_CHAR;
++
++	return pl011_read(uap, REG_DR);
++}
++
++static void pl011_put_poll_char(struct uart_port *port,
++			 unsigned char ch)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF)
++		cpu_relax();
++
++	pl011_write(ch, uap, REG_DR);
++}
++
++#endif /* CONFIG_CONSOLE_POLL */
++
++static int pl011_hwinit(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	int retval;
++
++	/* Optionaly enable pins to be muxed in and configured */
++	pinctrl_pm_select_default_state(port->dev);
++
++	/*
++	 * Try to enable the clock producer.
++	 */
++	retval = clk_prepare_enable(uap->clk);
++	if (retval)
++		return retval;
++
++	uap->port.uartclk = clk_get_rate(uap->clk);
++
++	/* Clear pending error and receive interrupts */
++	pl011_write(UART011_OEIS | UART011_BEIS | UART011_PEIS |
++		    UART011_FEIS | UART011_RTIS | UART011_RXIS,
++		    uap, REG_ICR);
++
++	/*
++	 * Save interrupts enable mask, and enable RX interrupts in case if
++	 * the interrupt is used for NMI entry.
++	 */
++	uap->im = pl011_read(uap, REG_IMSC);
++	pl011_write(UART011_RTIM | UART011_RXIM, uap, REG_IMSC);
++
++	if (dev_get_platdata(uap->port.dev)) {
++		struct amba_pl011_data *plat;
++
++		plat = dev_get_platdata(uap->port.dev);
++		if (plat->init)
++			plat->init();
++	}
++
++	pl011_check_hisi_workaround();
++	return 0;
++}
++
++static bool pl011_split_lcrh(const struct uart_amba_port *uap)
++{
++	return pl011_reg_to_offset(uap, REG_LCRH_RX) !=
++	       pl011_reg_to_offset(uap, REG_LCRH_TX);
++}
++
++static void pl011_write_lcr_h(struct uart_amba_port *uap, unsigned int lcr_h)
++{
++	pl011_write(lcr_h, uap, REG_LCRH_RX);
++	if (pl011_split_lcrh(uap)) {
++		int i;
++		/*
++		 * Wait 10 PCLKs before writing LCRH_TX register,
++		 * to get this delay write read only register 10 times
++		 */
++		for (i = 0; i < 10; ++i)
++			pl011_write(0xff, uap, REG_MIS);
++		pl011_write(lcr_h, uap, REG_LCRH_TX);
++	}
++}
++
++static int pl011_allocate_irq(struct uart_amba_port *uap)
++{
++	pl011_write(uap->im, uap, REG_IMSC);
++
++	return request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap);
++}
++
++/*
++ * Enable interrupts, only timeouts when using DMA
++ * if initial RX DMA job failed, start in interrupt mode
++ * as well.
++ */
++static void pl011_enable_interrupts(struct uart_amba_port *uap)
++{
++	unsigned int i;
++
++	spin_lock_irq(&uap->port.lock);
++
++	/* Clear out any spuriously appearing RX interrupts */
++	pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
++
++	/*
++	 * RXIS is asserted only when the RX FIFO transitions from below
++	 * to above the trigger threshold.  If the RX FIFO is already
++	 * full to the threshold this can't happen and RXIS will now be
++	 * stuck off.  Drain the RX FIFO explicitly to fix this:
++	 */
++	for (i = 0; i < uap->fifosize * 2; ++i) {
++		if (pl011_read(uap, REG_FR) & UART01x_FR_RXFE)
++			break;
++
++		pl011_read(uap, REG_DR);
++	}
++
++	uap->im = UART011_RTIM;
++	if (!pl011_dma_rx_running(uap))
++		uap->im |= UART011_RXIM;
++	pl011_write(uap->im, uap, REG_IMSC);
++	spin_unlock_irq(&uap->port.lock);
++}
++
++static int pl011_startup(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned int cr;
++	int retval;
++
++	retval = pl011_hwinit(port);
++	if (retval)
++		goto clk_dis;
++
++	retval = pl011_allocate_irq(uap);
++	if (retval)
++		goto clk_dis;
++
++	pl011_write(uap->vendor->ifls, uap, REG_IFLS);
++
++	spin_lock_irq(&uap->port.lock);
++
++	/* restore RTS and DTR */
++	cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR);
++	cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE;
++	pl011_write(cr, uap, REG_CR);
++
++	spin_unlock_irq(&uap->port.lock);
++
++	/*
++	 * initialise the old status of the modem signals
++	 */
++	uap->old_status = pl011_read(uap, REG_FR) & UART01x_FR_MODEM_ANY;
++
++	/* Startup DMA */
++	pl011_dma_startup(uap);
++
++	pl011_enable_interrupts(uap);
++
++	return 0;
++
++ clk_dis:
++	clk_disable_unprepare(uap->clk);
++	return retval;
++}
++
++static int sbsa_uart_startup(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++		container_of(port, struct uart_amba_port, port);
++	int retval;
++
++	retval = pl011_hwinit(port);
++	if (retval)
++		return retval;
++
++	retval = pl011_allocate_irq(uap);
++	if (retval)
++		return retval;
++
++	/* The SBSA UART does not support any modem status lines. */
++	uap->old_status = 0;
++
++	pl011_enable_interrupts(uap);
++
++	return 0;
++}
++
++static void pl011_shutdown_channel(struct uart_amba_port *uap,
++					unsigned int lcrh)
++{
++      unsigned long val;
++
++      val = pl011_read(uap, lcrh);
++      val &= ~(UART01x_LCRH_BRK | UART01x_LCRH_FEN);
++      pl011_write(val, uap, lcrh);
++}
++
++/*
++ * disable the port. It should not disable RTS and DTR.
++ * Also RTS and DTR state should be preserved to restore
++ * it during startup().
++ */
++static void pl011_disable_uart(struct uart_amba_port *uap)
++{
++	unsigned int cr;
++
++	uap->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS);
++	spin_lock_irq(&uap->port.lock);
++	cr = pl011_read(uap, REG_CR);
++	uap->old_cr = cr;
++	cr &= UART011_CR_RTS | UART011_CR_DTR;
++	cr |= UART01x_CR_UARTEN | UART011_CR_TXE;
++	pl011_write(cr, uap, REG_CR);
++	spin_unlock_irq(&uap->port.lock);
++
++	/*
++	 * disable break condition and fifos
++	 */
++	pl011_shutdown_channel(uap, REG_LCRH_RX);
++	if (pl011_split_lcrh(uap))
++		pl011_shutdown_channel(uap, REG_LCRH_TX);
++}
++
++static void pl011_disable_interrupts(struct uart_amba_port *uap)
++{
++	spin_lock_irq(&uap->port.lock);
++
++	/* mask all interrupts and clear all pending ones */
++	uap->im = 0;
++	pl011_write(uap->im, uap, REG_IMSC);
++	pl011_write(0xffff, uap, REG_ICR);
++
++	spin_unlock_irq(&uap->port.lock);
++}
++
++static void pl011_shutdown(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++		container_of(port, struct uart_amba_port, port);
++
++	pl011_disable_interrupts(uap);
++
++	pl011_dma_shutdown(uap);
++
++	free_irq(uap->port.irq, uap);
++
++	pl011_disable_uart(uap);
++
++	/*
++	 * Shut down the clock producer
++	 */
++	clk_disable_unprepare(uap->clk);
++	/* Optionally let pins go into sleep states */
++	pinctrl_pm_select_sleep_state(port->dev);
++
++	if (dev_get_platdata(uap->port.dev)) {
++		struct amba_pl011_data *plat;
++
++		plat = dev_get_platdata(uap->port.dev);
++		if (plat->exit)
++			plat->exit();
++	}
++
++	if (uap->port.ops->flush_buffer)
++		uap->port.ops->flush_buffer(port);
++}
++
++static void sbsa_uart_shutdown(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++		container_of(port, struct uart_amba_port, port);
++
++	pl011_disable_interrupts(uap);
++
++	free_irq(uap->port.irq, uap);
++
++	if (uap->port.ops->flush_buffer)
++		uap->port.ops->flush_buffer(port);
++}
++
++static void
++pl011_setup_status_masks(struct uart_port *port, struct ktermios *termios)
++{
++	port->read_status_mask = UART011_DR_OE | 255;
++	if (termios->c_iflag & INPCK)
++		port->read_status_mask |= UART011_DR_FE | UART011_DR_PE;
++	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
++		port->read_status_mask |= UART011_DR_BE;
++
++	/*
++	 * Characters to ignore
++	 */
++	port->ignore_status_mask = 0;
++	if (termios->c_iflag & IGNPAR)
++		port->ignore_status_mask |= UART011_DR_FE | UART011_DR_PE;
++	if (termios->c_iflag & IGNBRK) {
++		port->ignore_status_mask |= UART011_DR_BE;
++		/*
++		 * If we're ignoring parity and break indicators,
++		 * ignore overruns too (for real raw support).
++		 */
++		if (termios->c_iflag & IGNPAR)
++			port->ignore_status_mask |= UART011_DR_OE;
++	}
++
++	/*
++	 * Ignore all characters if CREAD is not set.
++	 */
++	if ((termios->c_cflag & CREAD) == 0)
++		port->ignore_status_mask |= UART_DUMMY_DR_RX;
++}
++
++static void
++pl011_set_termios(struct uart_port *port, struct ktermios *termios,
++		     struct ktermios *old)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned int lcr_h, old_cr;
++	unsigned long flags;
++	unsigned int baud, quot, clkdiv;
++
++	if (uap->vendor->oversampling)
++		clkdiv = 8;
++	else
++		clkdiv = 16;
++
++	/*
++	 * Ask the core to calculate the divisor for us.
++	 */
++	baud = uart_get_baud_rate(port, termios, old, 0,
++				  port->uartclk / clkdiv);
++#ifdef CONFIG_DMA_ENGINE
++	/*
++	 * Adjust RX DMA polling rate with baud rate if not specified.
++	 */
++	if (uap->dmarx.auto_poll_rate)
++		uap->dmarx.poll_rate = DIV_ROUND_UP(10000000, baud);
++#endif
++
++	if (baud > port->uartclk/16)
++		quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud);
++	else
++		quot = DIV_ROUND_CLOSEST(port->uartclk * 4, baud);
++
++	switch (termios->c_cflag & CSIZE) {
++	case CS5:
++		lcr_h = UART01x_LCRH_WLEN_5;
++		break;
++	case CS6:
++		lcr_h = UART01x_LCRH_WLEN_6;
++		break;
++	case CS7:
++		lcr_h = UART01x_LCRH_WLEN_7;
++		break;
++	default: // CS8
++		lcr_h = UART01x_LCRH_WLEN_8;
++		break;
++	}
++	if (termios->c_cflag & CSTOPB)
++		lcr_h |= UART01x_LCRH_STP2;
++	if (termios->c_cflag & PARENB) {
++		lcr_h |= UART01x_LCRH_PEN;
++		if (!(termios->c_cflag & PARODD))
++			lcr_h |= UART01x_LCRH_EPS;
++		if (termios->c_cflag & CMSPAR)
++			lcr_h |= UART011_LCRH_SPS;
++	}
++	if (uap->fifosize > 1)
++		lcr_h |= UART01x_LCRH_FEN;
++
++	spin_lock_irqsave(&port->lock, flags);
++
++	/*
++	 * Update the per-port timeout.
++	 */
++	uart_update_timeout(port, termios->c_cflag, baud);
++
++	pl011_setup_status_masks(port, termios);
++
++	if (UART_ENABLE_MS(port, termios->c_cflag))
++		pl011_enable_ms(port);
++
++	/* first, disable everything */
++	old_cr = pl011_read(uap, REG_CR);
++	pl011_write(0, uap, REG_CR);
++
++	if (termios->c_cflag & CRTSCTS) {
++		if (old_cr & UART011_CR_RTS)
++			old_cr |= UART011_CR_RTSEN;
++
++		old_cr |= UART011_CR_CTSEN;
++		port->status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
++	} else {
++		old_cr &= ~(UART011_CR_CTSEN | UART011_CR_RTSEN);
++		port->status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS);
++	}
++
++	if (uap->vendor->oversampling) {
++		if (baud > port->uartclk / 16)
++			old_cr |= ST_UART011_CR_OVSFACT;
++		else
++			old_cr &= ~ST_UART011_CR_OVSFACT;
++	}
++
++	/*
++	 * Workaround for the ST Micro oversampling variants to
++	 * increase the bitrate slightly, by lowering the divisor,
++	 * to avoid delayed sampling of start bit at high speeds,
++	 * else we see data corruption.
++	 */
++	if (uap->vendor->oversampling) {
++		if ((baud >= 3000000) && (baud < 3250000) && (quot > 1))
++			quot -= 1;
++		else if ((baud > 3250000) && (quot > 2))
++			quot -= 2;
++	}
++	/* Set baud rate */
++	pl011_write(quot & 0x3f, uap, REG_FBRD);
++	pl011_write(quot >> 6, uap, REG_IBRD);
++
++	/*
++	 * ----------v----------v----------v----------v-----
++	 * NOTE: REG_LCRH_TX and REG_LCRH_RX MUST BE WRITTEN AFTER
++	 * REG_FBRD & REG_IBRD.
++	 * ----------^----------^----------^----------^-----
++	 */
++	pl011_write_lcr_h(uap, lcr_h);
++	pl011_write(old_cr, uap, REG_CR);
++
++	spin_unlock_irqrestore(&port->lock, flags);
++}
++
++static void
++sbsa_uart_set_termios(struct uart_port *port, struct ktermios *termios,
++		      struct ktermios *old)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	unsigned long flags;
++
++	tty_termios_encode_baud_rate(termios, uap->fixed_baud, uap->fixed_baud);
++
++	/* The SBSA UART only supports 8n1 without hardware flow control. */
++	termios->c_cflag &= ~(CSIZE | CSTOPB | PARENB | PARODD);
++	termios->c_cflag &= ~(CMSPAR | CRTSCTS);
++	termios->c_cflag |= CS8 | CLOCAL;
++
++	spin_lock_irqsave(&port->lock, flags);
++	uart_update_timeout(port, CS8, uap->fixed_baud);
++	pl011_setup_status_masks(port, termios);
++	spin_unlock_irqrestore(&port->lock, flags);
++}
++
++static const char *pl011_type(struct uart_port *port)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++	return uap->port.type == PORT_AMBA ? uap->type : NULL;
++}
++
++/*
++ * Release the memory region(s) being used by 'port'
++ */
++static void pl011_release_port(struct uart_port *port)
++{
++	release_mem_region(port->mapbase, SZ_4K);
++}
++
++/*
++ * Request the memory region(s) being used by 'port'
++ */
++static int pl011_request_port(struct uart_port *port)
++{
++	return request_mem_region(port->mapbase, SZ_4K, "uart-pl011")
++			!= NULL ? 0 : -EBUSY;
++}
++
++/*
++ * Configure/autoconfigure the port.
++ */
++static void pl011_config_port(struct uart_port *port, int flags)
++{
++	if (flags & UART_CONFIG_TYPE) {
++		port->type = PORT_AMBA;
++		pl011_request_port(port);
++	}
++}
++
++/*
++ * verify the new serial_struct (for TIOCSSERIAL).
++ */
++static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser)
++{
++	int ret = 0;
++	if (ser->type != PORT_UNKNOWN && ser->type != PORT_AMBA)
++		ret = -EINVAL;
++	if (ser->irq < 0 || ser->irq >= nr_irqs)
++		ret = -EINVAL;
++	if (ser->baud_base < 9600)
++		ret = -EINVAL;
++	return ret;
++}
++
++static const struct uart_ops amba_pl011_pops = {
++	.tx_empty	= pl011_tx_empty,
++	.set_mctrl	= pl011_set_mctrl,
++	.get_mctrl	= pl011_get_mctrl,
++	.stop_tx	= pl011_stop_tx,
++	.start_tx	= pl011_start_tx,
++	.stop_rx	= pl011_stop_rx,
++	.enable_ms	= pl011_enable_ms,
++	.break_ctl	= pl011_break_ctl,
++	.startup	= pl011_startup,
++	.shutdown	= pl011_shutdown,
++	.flush_buffer	= pl011_dma_flush_buffer,
++	.set_termios	= pl011_set_termios,
++	.type		= pl011_type,
++	.release_port	= pl011_release_port,
++	.request_port	= pl011_request_port,
++	.config_port	= pl011_config_port,
++	.verify_port	= pl011_verify_port,
++#ifdef CONFIG_CONSOLE_POLL
++	.poll_init     = pl011_hwinit,
++	.poll_get_char = pl011_get_poll_char,
++	.poll_put_char = pl011_put_poll_char,
++#endif
++};
++
++static void sbsa_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
++{
++}
++
++static unsigned int sbsa_uart_get_mctrl(struct uart_port *port)
++{
++	return 0;
++}
++
++static const struct uart_ops sbsa_uart_pops = {
++	.tx_empty	= pl011_tx_empty,
++	.set_mctrl	= sbsa_uart_set_mctrl,
++	.get_mctrl	= sbsa_uart_get_mctrl,
++	.stop_tx	= pl011_stop_tx,
++	.start_tx	= pl011_start_tx,
++	.stop_rx	= pl011_stop_rx,
++	.startup	= sbsa_uart_startup,
++	.shutdown	= sbsa_uart_shutdown,
++	.set_termios	= sbsa_uart_set_termios,
++	.type		= pl011_type,
++	.release_port	= pl011_release_port,
++	.request_port	= pl011_request_port,
++	.config_port	= pl011_config_port,
++	.verify_port	= pl011_verify_port,
++#ifdef CONFIG_CONSOLE_POLL
++	.poll_init     = pl011_hwinit,
++	.poll_get_char = pl011_get_poll_char,
++	.poll_put_char = pl011_put_poll_char,
++#endif
++};
++
++static struct uart_amba_port *amba_ports[UART_NR];
++
++#ifdef CONFIG_SERIAL_AMBA_PL011_CONSOLE
++
++static void pl011_console_putchar(struct uart_port *port, int ch)
++{
++	struct uart_amba_port *uap =
++	    container_of(port, struct uart_amba_port, port);
++
++	while (pl011_read(uap, REG_FR) & UART01x_FR_TXFF)
++		cpu_relax();
++	pl011_write(ch, uap, REG_DR);
++}
++
++static void
++pl011_console_write(struct console *co, const char *s, unsigned int count)
++{
++	struct uart_amba_port *uap = amba_ports[co->index];
++	unsigned int old_cr = 0, new_cr;
++	unsigned long flags;
++	int locked = 1;
++
++	clk_enable(uap->clk);
++
++	local_irq_save(flags);
++	if (uap->port.sysrq)
++		locked = 0;
++	else if (oops_in_progress)
++		locked = spin_trylock(&uap->port.lock);
++	else
++		spin_lock(&uap->port.lock);
++
++	/*
++	 *	First save the CR then disable the interrupts
++	 */
++	if (!uap->vendor->always_enabled) {
++		old_cr = pl011_read(uap, REG_CR);
++		new_cr = old_cr & ~UART011_CR_CTSEN;
++		new_cr |= UART01x_CR_UARTEN | UART011_CR_TXE;
++		pl011_write(new_cr, uap, REG_CR);
++	}
++
++	uart_console_write(&uap->port, s, count, pl011_console_putchar);
++
++	/*
++	 *	Finally, wait for transmitter to become empty and restore the
++	 *	TCR. Allow feature register bits to be inverted to work around
++	 *	errata.
++	 */
++	while ((pl011_read(uap, REG_FR) ^ uap->vendor->inv_fr)
++						& uap->vendor->fr_busy)
++		cpu_relax();
++	if (!uap->vendor->always_enabled)
++		pl011_write(old_cr, uap, REG_CR);
++
++	if (locked)
++		spin_unlock(&uap->port.lock);
++	local_irq_restore(flags);
++
++	clk_disable(uap->clk);
++}
++
++static void pl011_console_get_options(struct uart_amba_port *uap, int *baud,
++				      int *parity, int *bits)
++{
++	if (pl011_read(uap, REG_CR) & UART01x_CR_UARTEN) {
++		unsigned int lcr_h, ibrd, fbrd;
++
++		lcr_h = pl011_read(uap, REG_LCRH_TX);
++
++		*parity = 'n';
++		if (lcr_h & UART01x_LCRH_PEN) {
++			if (lcr_h & UART01x_LCRH_EPS)
++				*parity = 'e';
++			else
++				*parity = 'o';
++		}
++
++		if ((lcr_h & 0x60) == UART01x_LCRH_WLEN_7)
++			*bits = 7;
++		else
++			*bits = 8;
++
++		ibrd = pl011_read(uap, REG_IBRD);
++		fbrd = pl011_read(uap, REG_FBRD);
++
++		*baud = uap->port.uartclk * 4 / (64 * ibrd + fbrd);
++
++		if (uap->vendor->oversampling) {
++			if (pl011_read(uap, REG_CR)
++				  & ST_UART011_CR_OVSFACT)
++				*baud *= 2;
++		}
++	}
++}
++
++static int pl011_console_setup(struct console *co, char *options)
++{
++	struct uart_amba_port *uap;
++	int baud = 38400;
++	int bits = 8;
++	int parity = 'n';
++	int flow = 'n';
++	int ret;
++
++	/*
++	 * Check whether an invalid uart number has been specified, and
++	 * if so, search for the first available port that does have
++	 * console support.
++	 */
++	if (co->index >= UART_NR)
++		co->index = 0;
++	uap = amba_ports[co->index];
++	if (!uap)
++		return -ENODEV;
++
++	/* Allow pins to be muxed in and configured */
++	pinctrl_pm_select_default_state(uap->port.dev);
++
++	ret = clk_prepare(uap->clk);
++	if (ret)
++		return ret;
++
++	if (dev_get_platdata(uap->port.dev)) {
++		struct amba_pl011_data *plat;
++
++		plat = dev_get_platdata(uap->port.dev);
++		if (plat->init)
++			plat->init();
++	}
++
++	uap->port.uartclk = clk_get_rate(uap->clk);
++
++	if (uap->vendor->fixed_options) {
++		baud = uap->fixed_baud;
++	} else {
++		if (options)
++			uart_parse_options(options,
++					   &baud, &parity, &bits, &flow);
++		else
++			pl011_console_get_options(uap, &baud, &parity, &bits);
++	}
++
++	return uart_set_options(&uap->port, co, baud, parity, bits, flow);
++}
++
++/**
++ *	pl011_console_match - non-standard console matching
++ *	@co:	  registering console
++ *	@name:	  name from console command line
++ *	@idx:	  index from console command line
++ *	@options: ptr to option string from console command line
++ *
++ *	Only attempts to match console command lines of the form:
++ *	    console=pl011,mmio|mmio32,<addr>[,<options>]
++ *	    console=pl011,0x<addr>[,<options>]
++ *	This form is used to register an initial earlycon boot console and
++ *	replace it with the amba_console at pl011 driver init.
++ *
++ *	Performs console setup for a match (as required by interface)
++ *	If no <options> are specified, then assume the h/w is already setup.
++ *
++ *	Returns 0 if console matches; otherwise non-zero to use default matching
++ */
++static int pl011_console_match(struct console *co, char *name, int idx,
++			       char *options)
++{
++	unsigned char iotype;
++	resource_size_t addr;
++	int i;
++
++	/*
++	 * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum
++	 * have a distinct console name, so make sure we check for that.
++	 * The actual implementation of the erratum occurs in the probe
++	 * function.
++	 */
++	if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0))
++		return -ENODEV;
++
++	if (uart_parse_earlycon(options, &iotype, &addr, &options))
++		return -ENODEV;
++
++	if (iotype != UPIO_MEM && iotype != UPIO_MEM32)
++		return -ENODEV;
++
++	/* try to match the port specified on the command line */
++	for (i = 0; i < ARRAY_SIZE(amba_ports); i++) {
++		struct uart_port *port;
++
++		if (!amba_ports[i])
++			continue;
++
++		port = &amba_ports[i]->port;
++
++		if (port->mapbase != addr)
++			continue;
++
++		co->index = i;
++		port->cons = co;
++		return pl011_console_setup(co, options);
++	}
++
++	return -ENODEV;
++}
++
++static struct uart_driver amba_reg;
++static struct console amba_console = {
++	.name		= "ttyAMA",
++	.write		= pl011_console_write,
++	.device		= uart_console_device,
++	.setup		= pl011_console_setup,
++	.match		= pl011_console_match,
++	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
++	.index		= -1,
++	.data		= &amba_reg,
++};
++
++#define AMBA_CONSOLE	(&amba_console)
++
++static void qdf2400_e44_putc(struct uart_port *port, int c)
++{
++	while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF)
++		cpu_relax();
++	writel(c, port->membase + UART01x_DR);
++	while (!(readl(port->membase + UART01x_FR) & UART011_FR_TXFE))
++		cpu_relax();
++}
++
++static void qdf2400_e44_early_write(struct console *con, const char *s, unsigned n)
++{
++	struct earlycon_device *dev = con->data;
++
++	uart_console_write(&dev->port, s, n, qdf2400_e44_putc);
++}
++
++static void pl011_putc(struct uart_port *port, int c)
++{
++	while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF)
++		cpu_relax();
++	if (port->iotype == UPIO_MEM32)
++		writel(c, port->membase + UART01x_DR);
++	else
++		writeb(c, port->membase + UART01x_DR);
++	while (readl(port->membase + UART01x_FR) & UART01x_FR_BUSY)
++		cpu_relax();
++}
++
++static void pl011_early_write(struct console *con, const char *s, unsigned n)
++{
++	struct earlycon_device *dev = con->data;
++
++	uart_console_write(&dev->port, s, n, pl011_putc);
++}
++
++/*
++ * On non-ACPI systems, earlycon is enabled by specifying
++ * "earlycon=pl011,<address>" on the kernel command line.
++ *
++ * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table,
++ * by specifying only "earlycon" on the command line.  Because it requires
++ * SPCR, the console starts after ACPI is parsed, which is later than a
++ * traditional early console.
++ *
++ * To get the traditional early console that starts before ACPI is parsed,
++ * specify the full "earlycon=pl011,<address>" option.
++ */
++static int __init pl011_early_console_setup(struct earlycon_device *device,
++					    const char *opt)
++{
++	if (!device->port.membase)
++		return -ENODEV;
++
++	device->con->write = pl011_early_write;
++
++	return 0;
++}
++OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup);
++OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup);
++
++/*
++ * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by
++ * Erratum 44, traditional earlycon can be enabled by specifying
++ * "earlycon=qdf2400_e44,<address>".  Any options are ignored.
++ *
++ * Alternatively, you can just specify "earlycon", and the early console
++ * will be enabled with the information from the SPCR table.  In this
++ * case, the SPCR code will detect the need for the E44 work-around,
++ * and set the console name to "qdf2400_e44".
++ */
++static int __init
++qdf2400_e44_early_console_setup(struct earlycon_device *device,
++				const char *opt)
++{
++	if (!device->port.membase)
++		return -ENODEV;
++
++	device->con->write = qdf2400_e44_early_write;
++	return 0;
++}
++EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup);
++
++#else
++#define AMBA_CONSOLE	NULL
++#endif
++
++static struct uart_driver amba_reg = {
++	.owner			= THIS_MODULE,
++	.driver_name		= "ttyAMA",
++	.dev_name		= "ttyAMA",
++	.major			= SERIAL_AMBA_MAJOR,
++	.minor			= SERIAL_AMBA_MINOR,
++	.nr			= UART_NR,
++	.cons			= AMBA_CONSOLE,
++};
++
++static int pl011_probe_dt_alias(int index, struct device *dev)
++{
++	struct device_node *np;
++	static bool seen_dev_with_alias = false;
++	static bool seen_dev_without_alias = false;
++	int ret = index;
++
++	if (!IS_ENABLED(CONFIG_OF))
++		return ret;
++
++	np = dev->of_node;
++	if (!np)
++		return ret;
++
++	ret = of_alias_get_id(np, "serial");
++	if (ret < 0) {
++		seen_dev_without_alias = true;
++		ret = index;
++	} else {
++		seen_dev_with_alias = true;
++		if (ret >= ARRAY_SIZE(amba_ports) || amba_ports[ret] != NULL) {
++			dev_warn(dev, "requested serial port %d  not available.\n", ret);
++			ret = index;
++		}
++	}
++
++	if (seen_dev_with_alias && seen_dev_without_alias)
++		dev_warn(dev, "aliased and non-aliased serial devices found in device tree. Serial port enumeration may be unpredictable.\n");
++
++	return ret;
++}
++
++/* unregisters the driver also if no more ports are left */
++static void pl011_unregister_port(struct uart_amba_port *uap)
++{
++	int i;
++	bool busy = false;
++
++	for (i = 0; i < ARRAY_SIZE(amba_ports); i++) {
++		if (amba_ports[i] == uap)
++			amba_ports[i] = NULL;
++		else if (amba_ports[i])
++			busy = true;
++	}
++	pl011_dma_remove(uap);
++	if (!busy)
++		uart_unregister_driver(&amba_reg);
++}
++
++static int pl011_find_free_port(void)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(amba_ports); i++)
++		if (amba_ports[i] == NULL)
++			return i;
++
++	return -EBUSY;
++}
++
++static int pl011_setup_port(struct device *dev, struct uart_amba_port *uap,
++			    struct resource *mmiobase, int index)
++{
++	void __iomem *base;
++
++	base = devm_ioremap_resource(dev, mmiobase);
++	if (IS_ERR(base))
++		return PTR_ERR(base);
++
++	index = pl011_probe_dt_alias(index, dev);
++
++	uap->old_cr = 0;
++	uap->port.dev = dev;
++	uap->port.mapbase = mmiobase->start;
++	uap->port.membase = base;
++	uap->port.fifosize = uap->fifosize;
++	uap->port.flags = UPF_BOOT_AUTOCONF;
++	uap->port.line = index;
++	spin_lock_init(&uap->port.lock);
++
++	amba_ports[index] = uap;
++
++	return 0;
++}
++
++static int pl011_register_port(struct uart_amba_port *uap)
++{
++	int ret, i;
++
++	/* Ensure interrupts from this UART are masked and cleared */
++	pl011_write(0, uap, REG_IMSC);
++	pl011_write(0xffff, uap, REG_ICR);
++
++	if (!amba_reg.state) {
++		ret = uart_register_driver(&amba_reg);
++		if (ret < 0) {
++			dev_err(uap->port.dev,
++				"Failed to register AMBA-PL011 driver\n");
++			for (i = 0; i < ARRAY_SIZE(amba_ports); i++)
++				if (amba_ports[i] == uap)
++					amba_ports[i] = NULL;
++			return ret;
++		}
++	}
++
++	ret = uart_add_one_port(&amba_reg, &uap->port);
++	if (ret)
++		pl011_unregister_port(uap);
++
++	return ret;
++}
++
++static int pl011_probe(struct amba_device *dev, const struct amba_id *id)
++{
++	struct uart_amba_port *uap;
++	struct vendor_data *vendor = id->data;
++	int portnr, ret;
++
++	portnr = pl011_find_free_port();
++	if (portnr < 0)
++		return portnr;
++
++	uap = devm_kzalloc(&dev->dev, sizeof(struct uart_amba_port),
++			   GFP_KERNEL);
++	if (!uap)
++		return -ENOMEM;
++
++	uap->clk = devm_clk_get(&dev->dev, NULL);
++	if (IS_ERR(uap->clk))
++		return PTR_ERR(uap->clk);
++
++	uap->reg_offset = vendor->reg_offset;
++	uap->vendor = vendor;
++	uap->fifosize = vendor->get_fifosize(dev);
++	uap->port.iotype = vendor->access_32b ? UPIO_MEM32 : UPIO_MEM;
++	uap->port.irq = dev->irq[0];
++	uap->port.ops = &amba_pl011_pops;
++
++	snprintf(uap->type, sizeof(uap->type), "PL011 rev%u", amba_rev(dev));
++
++	ret = pl011_setup_port(&dev->dev, uap, &dev->res, portnr);
++	if (ret)
++		return ret;
++
++	amba_set_drvdata(dev, uap);
++
++	return pl011_register_port(uap);
++}
++
++static int pl011_remove(struct amba_device *dev)
++{
++	struct uart_amba_port *uap = amba_get_drvdata(dev);
++
++	uart_remove_one_port(&amba_reg, &uap->port);
++	pl011_unregister_port(uap);
++	return 0;
++}
++
++#ifdef CONFIG_PM_SLEEP
++static int pl011_suspend(struct device *dev)
++{
++	struct uart_amba_port *uap = dev_get_drvdata(dev);
++
++	if (!uap)
++		return -EINVAL;
++
++	return uart_suspend_port(&amba_reg, &uap->port);
++}
++
++static int pl011_resume(struct device *dev)
++{
++	struct uart_amba_port *uap = dev_get_drvdata(dev);
++
++	if (!uap)
++		return -EINVAL;
++
++	return uart_resume_port(&amba_reg, &uap->port);
++}
++#endif
++
++static SIMPLE_DEV_PM_OPS(pl011_dev_pm_ops, pl011_suspend, pl011_resume);
++
++static int sbsa_uart_probe(struct platform_device *pdev)
++{
++	struct uart_amba_port *uap;
++	struct resource *r;
++	int portnr, ret;
++	int baudrate;
++
++	/*
++	 * Check the mandatory baud rate parameter in the DT node early
++	 * so that we can easily exit with the error.
++	 */
++	if (pdev->dev.of_node) {
++		struct device_node *np = pdev->dev.of_node;
++
++		ret = of_property_read_u32(np, "current-speed", &baudrate);
++		if (ret)
++			return ret;
++	} else {
++		baudrate = 115200;
++	}
++
++	portnr = pl011_find_free_port();
++	if (portnr < 0)
++		return portnr;
++
++	uap = devm_kzalloc(&pdev->dev, sizeof(struct uart_amba_port),
++			   GFP_KERNEL);
++	if (!uap)
++		return -ENOMEM;
++
++	ret = platform_get_irq(pdev, 0);
++	if (ret < 0) {
++		if (ret != -EPROBE_DEFER)
++			dev_err(&pdev->dev, "cannot obtain irq\n");
++		return ret;
++	}
++	uap->port.irq	= ret;
++
++#ifdef CONFIG_ACPI_SPCR_TABLE
++	if (qdf2400_e44_present) {
++		dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n");
++		uap->vendor = &vendor_qdt_qdf2400_e44;
++	} else
++#endif
++		uap->vendor = &vendor_sbsa;
++
++	uap->reg_offset	= uap->vendor->reg_offset;
++	uap->fifosize	= 32;
++	uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM;
++	uap->port.ops	= &sbsa_uart_pops;
++	uap->fixed_baud = baudrate;
++
++	snprintf(uap->type, sizeof(uap->type), "SBSA");
++
++	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++
++	ret = pl011_setup_port(&pdev->dev, uap, r, portnr);
++	if (ret)
++		return ret;
++
++	platform_set_drvdata(pdev, uap);
++
++	return pl011_register_port(uap);
++}
++
++static int sbsa_uart_remove(struct platform_device *pdev)
++{
++	struct uart_amba_port *uap = platform_get_drvdata(pdev);
++
++	uart_remove_one_port(&amba_reg, &uap->port);
++	pl011_unregister_port(uap);
++	return 0;
++}
++
++static const struct of_device_id sbsa_uart_of_match[] = {
++	{ .compatible = "arm,sbsa-uart", },
++	{},
++};
++MODULE_DEVICE_TABLE(of, sbsa_uart_of_match);
++
++static const struct acpi_device_id sbsa_uart_acpi_match[] = {
++	{ "ARMH0011", 0 },
++	{},
++};
++MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match);
++
++static struct platform_driver arm_sbsa_uart_platform_driver = {
++	.probe		= sbsa_uart_probe,
++	.remove		= sbsa_uart_remove,
++	.driver	= {
++		.name	= "sbsa-uart",
++		.of_match_table = of_match_ptr(sbsa_uart_of_match),
++		.acpi_match_table = ACPI_PTR(sbsa_uart_acpi_match),
++		.suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011),
++	},
++};
++
++static const struct amba_id pl011_ids[] = {
++	{
++		.id	= 0x00041011,
++		.mask	= 0x000fffff,
++		.data	= &vendor_arm,
++	},
++	{
++		.id	= 0x00380802,
++		.mask	= 0x00ffffff,
++		.data	= &vendor_st,
++	},
++	{
++		.id	= AMBA_LINUX_ID(0x00, 0x1, 0xffe),
++		.mask	= 0x00ffffff,
++		.data	= &vendor_zte,
++	},
++	{ 0, 0 },
++};
++
++MODULE_DEVICE_TABLE(amba, pl011_ids);
++
++static struct amba_driver pl011_driver = {
++	.drv = {
++		.name	= "uart-pl011",
++		.pm	= &pl011_dev_pm_ops,
++		.suppress_bind_attrs = IS_BUILTIN(CONFIG_SERIAL_AMBA_PL011),
++	},
++	.id_table	= pl011_ids,
++	.probe		= pl011_probe,
++	.remove		= pl011_remove,
++};
++
++static int __init pl011_init(void)
++{
++	printk(KERN_INFO "Serial: AMBA PL011 UART driver\n");
++
++	if (platform_driver_register(&arm_sbsa_uart_platform_driver))
++		pr_warn("could not register SBSA UART platform driver\n");
++	return amba_driver_register(&pl011_driver);
++}
++
++static void __exit pl011_exit(void)
++{
++	platform_driver_unregister(&arm_sbsa_uart_platform_driver);
++	amba_driver_unregister(&pl011_driver);
++}
++
++/*
++ * While this can be a module, if builtin it's most likely the console
++ * So let's leave module_exit but move module_init to an earlier place
++ */
++arch_initcall(pl011_init);
++module_exit(pl011_exit);
++
++MODULE_AUTHOR("ARM Ltd/Deep Blue Solutions Ltd");
++MODULE_DESCRIPTION("ARM AMBA serial port driver");
++MODULE_LICENSE("GPL");
+diff -uprN kernel/drivers/tty/serial/xilinx_uartps.c kernel_new/drivers/tty/serial/xilinx_uartps.c
+--- kernel/drivers/tty/serial/xilinx_uartps.c	2020-12-21 21:59:21.000000000 +0800
++++ kernel_new/drivers/tty/serial/xilinx_uartps.c	2021-04-01 18:28:07.798863128 +0800
+@@ -1216,6 +1216,34 @@ static void cdns_uart_console_write(stru
+ 		spin_unlock_irqrestore(&port->lock, flags);
+ }
+ 
++#ifdef CONFIG_RAW_PRINTK
++
++static void cdns_uart_console_write_raw(struct console *co, const char *s,
++					unsigned int count)
++{
++	struct uart_port *port = &cdns_uart_port[co->index];
++	unsigned int imr, ctrl;
++
++	imr = readl(port->membase + CDNS_UART_IMR);
++	writel(imr, port->membase + CDNS_UART_IDR);
++
++	ctrl = readl(port->membase + CDNS_UART_CR);
++	ctrl &= ~CDNS_UART_CR_TX_DIS;
++	ctrl |= CDNS_UART_CR_TX_EN;
++	writel(ctrl, port->membase + CDNS_UART_CR);
++
++	while (count-- > 0) {
++		if (*s == '\n')
++			writel('\r', port->membase + CDNS_UART_FIFO);
++		writel(*s++, port->membase + CDNS_UART_FIFO);
++	}
++
++	writel(ctrl, port->membase + CDNS_UART_CR);
++	writel(imr, port->membase + CDNS_UART_IER);
++}
++
++#endif
++
+ /**
+  * cdns_uart_console_setup - Initialize the uart to default config
+  * @co: Console handle
+@@ -1251,7 +1279,12 @@ static struct console cdns_uart_console
+ 	.write	= cdns_uart_console_write,
+ 	.device	= uart_console_device,
+ 	.setup	= cdns_uart_console_setup,
++#ifdef CONFIG_RAW_PRINTK
++	.write_raw = cdns_uart_console_write_raw,
++	.flags	= CON_PRINTBUFFER | CON_RAW,
++#else
+ 	.flags	= CON_PRINTBUFFER,
++#endif
+ 	.index	= -1, /* Specified on the cmdline (e.g. console=ttyPS ) */
+ 	.data	= &cdns_uart_uart_driver,
+ };
+diff -uprN kernel/fs/exec.c kernel_new/fs/exec.c
+--- kernel/fs/exec.c	2020-12-21 21:59:21.000000000 +0800
++++ kernel_new/fs/exec.c	2021-04-01 18:28:07.798863128 +0800
+@@ -49,6 +49,7 @@
+ #include <linux/module.h>
+ #include <linux/namei.h>
+ #include <linux/mount.h>
++#include <linux/ipipe.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
+ #include <linux/tsacct_kern.h>
+@@ -1007,6 +1008,7 @@ static int exec_mmap(struct mm_struct *m
+ {
+ 	struct task_struct *tsk;
+ 	struct mm_struct *old_mm, *active_mm;
++	unsigned long flags;
+ 
+ 	/* Notify parent that we're no longer interested in the old VM */
+ 	tsk = current;
+@@ -1031,8 +1033,10 @@ static int exec_mmap(struct mm_struct *m
+ 	active_mm = tsk->active_mm;
+ 	membarrier_exec_mmap(mm);
+ 	tsk->mm = mm;
++	ipipe_mm_switch_protect(flags);
+ 	tsk->active_mm = mm;
+ 	activate_mm(active_mm, mm);
++	ipipe_mm_switch_unprotect(flags);
+ 	tsk->mm->vmacache_seqnum = 0;
+ 	vmacache_flush(tsk);
+ 	task_unlock(tsk);
+diff -uprN kernel/fs/exec.c.orig kernel_new/fs/exec.c.orig
+--- kernel/fs/exec.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/fs/exec.c.orig	2020-12-21 21:59:21.000000000 +0800
+@@ -0,0 +1,2001 @@
++/*
++ *  linux/fs/exec.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ */
++
++/*
++ * #!-checking implemented by tytso.
++ */
++/*
++ * Demand-loading implemented 01.12.91 - no need to read anything but
++ * the header into memory. The inode of the executable is put into
++ * "current->executable", and page faults do the actual loading. Clean.
++ *
++ * Once more I can proudly say that linux stood up to being changed: it
++ * was less than 2 hours work to get demand-loading completely implemented.
++ *
++ * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
++ * current->executable is only used by the procfs.  This allows a dispatch
++ * table to check for several different types  of binary formats.  We keep
++ * trying until we recognize the file or we run out of supported binary
++ * formats.
++ */
++
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/fdtable.h>
++#include <linux/mm.h>
++#include <linux/vmacache.h>
++#include <linux/stat.h>
++#include <linux/fcntl.h>
++#include <linux/swap.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/signal.h>
++#include <linux/sched/numa_balancing.h>
++#include <linux/sched/task.h>
++#include <linux/pagemap.h>
++#include <linux/perf_event.h>
++#include <linux/highmem.h>
++#include <linux/spinlock.h>
++#include <linux/key.h>
++#include <linux/personality.h>
++#include <linux/binfmts.h>
++#include <linux/utsname.h>
++#include <linux/pid_namespace.h>
++#include <linux/module.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/security.h>
++#include <linux/syscalls.h>
++#include <linux/tsacct_kern.h>
++#include <linux/cn_proc.h>
++#include <linux/audit.h>
++#include <linux/tracehook.h>
++#include <linux/kmod.h>
++#include <linux/fsnotify.h>
++#include <linux/fs_struct.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/oom.h>
++#include <linux/compat.h>
++#include <linux/vmalloc.h>
++
++#include <linux/uaccess.h>
++#include <asm/mmu_context.h>
++#include <asm/tlb.h>
++
++#include <trace/events/task.h>
++#include "internal.h"
++
++#include <trace/events/sched.h>
++
++int suid_dumpable = 0;
++
++static LIST_HEAD(formats);
++static DEFINE_RWLOCK(binfmt_lock);
++
++void __register_binfmt(struct linux_binfmt * fmt, int insert)
++{
++	BUG_ON(!fmt);
++	if (WARN_ON(!fmt->load_binary))
++		return;
++	write_lock(&binfmt_lock);
++	insert ? list_add(&fmt->lh, &formats) :
++		 list_add_tail(&fmt->lh, &formats);
++	write_unlock(&binfmt_lock);
++}
++
++EXPORT_SYMBOL(__register_binfmt);
++
++void unregister_binfmt(struct linux_binfmt * fmt)
++{
++	write_lock(&binfmt_lock);
++	list_del(&fmt->lh);
++	write_unlock(&binfmt_lock);
++}
++
++EXPORT_SYMBOL(unregister_binfmt);
++
++static inline void put_binfmt(struct linux_binfmt * fmt)
++{
++	module_put(fmt->module);
++}
++
++bool path_noexec(const struct path *path)
++{
++	return (path->mnt->mnt_flags & MNT_NOEXEC) ||
++	       (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
++}
++
++#ifdef CONFIG_USELIB
++/*
++ * Note that a shared library must be both readable and executable due to
++ * security reasons.
++ *
++ * Also note that we take the address to load from from the file itself.
++ */
++SYSCALL_DEFINE1(uselib, const char __user *, library)
++{
++	struct linux_binfmt *fmt;
++	struct file *file;
++	struct filename *tmp = getname(library);
++	int error = PTR_ERR(tmp);
++	static const struct open_flags uselib_flags = {
++		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
++		.acc_mode = MAY_READ | MAY_EXEC,
++		.intent = LOOKUP_OPEN,
++		.lookup_flags = LOOKUP_FOLLOW,
++	};
++
++	if (IS_ERR(tmp))
++		goto out;
++
++	file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
++	putname(tmp);
++	error = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out;
++
++	error = -EINVAL;
++	if (!S_ISREG(file_inode(file)->i_mode))
++		goto exit;
++
++	error = -EACCES;
++	if (path_noexec(&file->f_path))
++		goto exit;
++
++	fsnotify_open(file);
++
++	error = -ENOEXEC;
++
++	read_lock(&binfmt_lock);
++	list_for_each_entry(fmt, &formats, lh) {
++		if (!fmt->load_shlib)
++			continue;
++		if (!try_module_get(fmt->module))
++			continue;
++		read_unlock(&binfmt_lock);
++		error = fmt->load_shlib(file);
++		read_lock(&binfmt_lock);
++		put_binfmt(fmt);
++		if (error != -ENOEXEC)
++			break;
++	}
++	read_unlock(&binfmt_lock);
++exit:
++	fput(file);
++out:
++  	return error;
++}
++#endif /* #ifdef CONFIG_USELIB */
++
++#ifdef CONFIG_MMU
++/*
++ * The nascent bprm->mm is not visible until exec_mmap() but it can
++ * use a lot of memory, account these pages in current->mm temporary
++ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
++ * change the counter back via acct_arg_size(0).
++ */
++static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
++{
++	struct mm_struct *mm = current->mm;
++	long diff = (long)(pages - bprm->vma_pages);
++
++	if (!mm || !diff)
++		return;
++
++	bprm->vma_pages = pages;
++	add_mm_counter(mm, MM_ANONPAGES, diff);
++}
++
++static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
++		int write)
++{
++	struct page *page;
++	int ret;
++	unsigned int gup_flags = FOLL_FORCE;
++
++#ifdef CONFIG_STACK_GROWSUP
++	if (write) {
++		ret = expand_downwards(bprm->vma, pos);
++		if (ret < 0)
++			return NULL;
++	}
++#endif
++
++	if (write)
++		gup_flags |= FOLL_WRITE;
++
++	/*
++	 * We are doing an exec().  'current' is the process
++	 * doing the exec and bprm->mm is the new process's mm.
++	 */
++	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
++			&page, NULL, NULL);
++	if (ret <= 0)
++		return NULL;
++
++	if (write) {
++		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
++		unsigned long ptr_size, limit;
++
++		/*
++		 * Since the stack will hold pointers to the strings, we
++		 * must account for them as well.
++		 *
++		 * The size calculation is the entire vma while each arg page is
++		 * built, so each time we get here it's calculating how far it
++		 * is currently (rather than each call being just the newly
++		 * added size from the arg page).  As a result, we need to
++		 * always add the entire size of the pointers, so that on the
++		 * last call to get_arg_page() we'll actually have the entire
++		 * correct size.
++		 */
++		ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
++		if (ptr_size > ULONG_MAX - size)
++			goto fail;
++		size += ptr_size;
++
++		acct_arg_size(bprm, size / PAGE_SIZE);
++
++		/*
++		 * We've historically supported up to 32 pages (ARG_MAX)
++		 * of argument strings even with small stacks
++		 */
++		if (size <= ARG_MAX)
++			return page;
++
++		/*
++		 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
++		 * (whichever is smaller) for the argv+env strings.
++		 * This ensures that:
++		 *  - the remaining binfmt code will not run out of stack space,
++		 *  - the program will have a reasonable amount of stack left
++		 *    to work from.
++		 */
++		limit = _STK_LIM / 4 * 3;
++		limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
++		if (size > limit)
++			goto fail;
++	}
++
++	return page;
++
++fail:
++	put_page(page);
++	return NULL;
++}
++
++static void put_arg_page(struct page *page)
++{
++	put_page(page);
++}
++
++static void free_arg_pages(struct linux_binprm *bprm)
++{
++}
++
++static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
++		struct page *page)
++{
++	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
++}
++
++static int __bprm_mm_init(struct linux_binprm *bprm)
++{
++	int err;
++	struct vm_area_struct *vma = NULL;
++	struct mm_struct *mm = bprm->mm;
++
++	bprm->vma = vma = vm_area_alloc(mm);
++	if (!vma)
++		return -ENOMEM;
++	vma_set_anonymous(vma);
++
++	if (down_write_killable(&mm->mmap_sem)) {
++		err = -EINTR;
++		goto err_free;
++	}
++
++	/*
++	 * Place the stack at the largest stack address the architecture
++	 * supports. Later, we'll move this to an appropriate place. We don't
++	 * use STACK_TOP because that can depend on attributes which aren't
++	 * configured yet.
++	 */
++	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
++	vma->vm_end = STACK_TOP_MAX;
++	vma->vm_start = vma->vm_end - PAGE_SIZE;
++	vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
++	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
++
++	err = insert_vm_struct(mm, vma);
++	if (err)
++		goto err;
++
++	mm->stack_vm = mm->total_vm = 1;
++	arch_bprm_mm_init(mm, vma);
++	up_write(&mm->mmap_sem);
++	bprm->p = vma->vm_end - sizeof(void *);
++	return 0;
++err:
++	up_write(&mm->mmap_sem);
++err_free:
++	bprm->vma = NULL;
++	vm_area_free(vma);
++	return err;
++}
++
++static bool valid_arg_len(struct linux_binprm *bprm, long len)
++{
++	return len <= MAX_ARG_STRLEN;
++}
++
++#else
++
++static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
++{
++}
++
++static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
++		int write)
++{
++	struct page *page;
++
++	page = bprm->page[pos / PAGE_SIZE];
++	if (!page && write) {
++		page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
++		if (!page)
++			return NULL;
++		bprm->page[pos / PAGE_SIZE] = page;
++	}
++
++	return page;
++}
++
++static void put_arg_page(struct page *page)
++{
++}
++
++static void free_arg_page(struct linux_binprm *bprm, int i)
++{
++	if (bprm->page[i]) {
++		__free_page(bprm->page[i]);
++		bprm->page[i] = NULL;
++	}
++}
++
++static void free_arg_pages(struct linux_binprm *bprm)
++{
++	int i;
++
++	for (i = 0; i < MAX_ARG_PAGES; i++)
++		free_arg_page(bprm, i);
++}
++
++static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
++		struct page *page)
++{
++}
++
++static int __bprm_mm_init(struct linux_binprm *bprm)
++{
++	bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
++	return 0;
++}
++
++static bool valid_arg_len(struct linux_binprm *bprm, long len)
++{
++	return len <= bprm->p;
++}
++
++#endif /* CONFIG_MMU */
++
++/*
++ * Create a new mm_struct and populate it with a temporary stack
++ * vm_area_struct.  We don't have enough context at this point to set the stack
++ * flags, permissions, and offset, so we use temporary values.  We'll update
++ * them later in setup_arg_pages().
++ */
++static int bprm_mm_init(struct linux_binprm *bprm)
++{
++	int err;
++	struct mm_struct *mm = NULL;
++
++	bprm->mm = mm = mm_alloc();
++	err = -ENOMEM;
++	if (!mm)
++		goto err;
++
++	/* Save current stack limit for all calculations made during exec. */
++	task_lock(current->group_leader);
++	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
++	task_unlock(current->group_leader);
++
++	err = __bprm_mm_init(bprm);
++	if (err)
++		goto err;
++
++	return 0;
++
++err:
++	if (mm) {
++		bprm->mm = NULL;
++		mmdrop(mm);
++	}
++
++	return err;
++}
++
++struct user_arg_ptr {
++#ifdef CONFIG_COMPAT
++	bool is_compat;
++#endif
++	union {
++		const char __user *const __user *native;
++#ifdef CONFIG_COMPAT
++		const compat_uptr_t __user *compat;
++#endif
++	} ptr;
++};
++
++static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
++{
++	const char __user *native;
++
++#ifdef CONFIG_COMPAT
++	if (unlikely(argv.is_compat)) {
++		compat_uptr_t compat;
++
++		if (get_user(compat, argv.ptr.compat + nr))
++			return ERR_PTR(-EFAULT);
++
++		return compat_ptr(compat);
++	}
++#endif
++
++	if (get_user(native, argv.ptr.native + nr))
++		return ERR_PTR(-EFAULT);
++
++	return native;
++}
++
++/*
++ * count() counts the number of strings in array ARGV.
++ */
++static int count(struct user_arg_ptr argv, int max)
++{
++	int i = 0;
++
++	if (argv.ptr.native != NULL) {
++		for (;;) {
++			const char __user *p = get_user_arg_ptr(argv, i);
++
++			if (!p)
++				break;
++
++			if (IS_ERR(p))
++				return -EFAULT;
++
++			if (i >= max)
++				return -E2BIG;
++			++i;
++
++			if (fatal_signal_pending(current))
++				return -ERESTARTNOHAND;
++			cond_resched();
++		}
++	}
++	return i;
++}
++
++/*
++ * 'copy_strings()' copies argument/environment strings from the old
++ * processes's memory to the new process's stack.  The call to get_user_pages()
++ * ensures the destination page is created and not swapped out.
++ */
++static int copy_strings(int argc, struct user_arg_ptr argv,
++			struct linux_binprm *bprm)
++{
++	struct page *kmapped_page = NULL;
++	char *kaddr = NULL;
++	unsigned long kpos = 0;
++	int ret;
++
++	while (argc-- > 0) {
++		const char __user *str;
++		int len;
++		unsigned long pos;
++
++		ret = -EFAULT;
++		str = get_user_arg_ptr(argv, argc);
++		if (IS_ERR(str))
++			goto out;
++
++		len = strnlen_user(str, MAX_ARG_STRLEN);
++		if (!len)
++			goto out;
++
++		ret = -E2BIG;
++		if (!valid_arg_len(bprm, len))
++			goto out;
++
++		/* We're going to work our way backwords. */
++		pos = bprm->p;
++		str += len;
++		bprm->p -= len;
++
++		while (len > 0) {
++			int offset, bytes_to_copy;
++
++			if (fatal_signal_pending(current)) {
++				ret = -ERESTARTNOHAND;
++				goto out;
++			}
++			cond_resched();
++
++			offset = pos % PAGE_SIZE;
++			if (offset == 0)
++				offset = PAGE_SIZE;
++
++			bytes_to_copy = offset;
++			if (bytes_to_copy > len)
++				bytes_to_copy = len;
++
++			offset -= bytes_to_copy;
++			pos -= bytes_to_copy;
++			str -= bytes_to_copy;
++			len -= bytes_to_copy;
++
++			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
++				struct page *page;
++
++				page = get_arg_page(bprm, pos, 1);
++				if (!page) {
++					ret = -E2BIG;
++					goto out;
++				}
++
++				if (kmapped_page) {
++					flush_kernel_dcache_page(kmapped_page);
++					kunmap(kmapped_page);
++					put_arg_page(kmapped_page);
++				}
++				kmapped_page = page;
++				kaddr = kmap(kmapped_page);
++				kpos = pos & PAGE_MASK;
++				flush_arg_page(bprm, kpos, kmapped_page);
++			}
++			if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
++				ret = -EFAULT;
++				goto out;
++			}
++		}
++	}
++	ret = 0;
++out:
++	if (kmapped_page) {
++		flush_kernel_dcache_page(kmapped_page);
++		kunmap(kmapped_page);
++		put_arg_page(kmapped_page);
++	}
++	return ret;
++}
++
++/*
++ * Like copy_strings, but get argv and its values from kernel memory.
++ */
++int copy_strings_kernel(int argc, const char *const *__argv,
++			struct linux_binprm *bprm)
++{
++	int r;
++	mm_segment_t oldfs = get_fs();
++	struct user_arg_ptr argv = {
++		.ptr.native = (const char __user *const  __user *)__argv,
++	};
++
++	set_fs(KERNEL_DS);
++	r = copy_strings(argc, argv, bprm);
++	set_fs(oldfs);
++
++	return r;
++}
++EXPORT_SYMBOL(copy_strings_kernel);
++
++#ifdef CONFIG_MMU
++
++/*
++ * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
++ * the binfmt code determines where the new stack should reside, we shift it to
++ * its final location.  The process proceeds as follows:
++ *
++ * 1) Use shift to calculate the new vma endpoints.
++ * 2) Extend vma to cover both the old and new ranges.  This ensures the
++ *    arguments passed to subsequent functions are consistent.
++ * 3) Move vma's page tables to the new range.
++ * 4) Free up any cleared pgd range.
++ * 5) Shrink the vma to cover only the new range.
++ */
++static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long old_start = vma->vm_start;
++	unsigned long old_end = vma->vm_end;
++	unsigned long length = old_end - old_start;
++	unsigned long new_start = old_start - shift;
++	unsigned long new_end = old_end - shift;
++	struct mmu_gather tlb;
++
++	BUG_ON(new_start > new_end);
++
++	/*
++	 * ensure there are no vmas between where we want to go
++	 * and where we are
++	 */
++	if (vma != find_vma(mm, new_start))
++		return -EFAULT;
++
++	/*
++	 * cover the whole range: [new_start, old_end)
++	 */
++	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
++		return -ENOMEM;
++
++	/*
++	 * move the page tables downwards, on failure we rely on
++	 * process cleanup to remove whatever mess we made.
++	 */
++	if (length != move_page_tables(vma, old_start,
++				       vma, new_start, length, false))
++		return -ENOMEM;
++
++	lru_add_drain();
++	tlb_gather_mmu(&tlb, mm, old_start, old_end);
++	if (new_end > old_start) {
++		/*
++		 * when the old and new regions overlap clear from new_end.
++		 */
++		free_pgd_range(&tlb, new_end, old_end, new_end,
++			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
++	} else {
++		/*
++		 * otherwise, clean from old_start; this is done to not touch
++		 * the address space in [new_end, old_start) some architectures
++		 * have constraints on va-space that make this illegal (IA64) -
++		 * for the others its just a little faster.
++		 */
++		free_pgd_range(&tlb, old_start, old_end, new_end,
++			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
++	}
++	tlb_finish_mmu(&tlb, old_start, old_end);
++
++	/*
++	 * Shrink the vma to just the new range.  Always succeeds.
++	 */
++	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
++
++	return 0;
++}
++
++/*
++ * Finalizes the stack vm_area_struct. The flags and permissions are updated,
++ * the stack is optionally relocated, and some extra space is added.
++ */
++int setup_arg_pages(struct linux_binprm *bprm,
++		    unsigned long stack_top,
++		    int executable_stack)
++{
++	unsigned long ret;
++	unsigned long stack_shift;
++	struct mm_struct *mm = current->mm;
++	struct vm_area_struct *vma = bprm->vma;
++	struct vm_area_struct *prev = NULL;
++	unsigned long vm_flags;
++	unsigned long stack_base;
++	unsigned long stack_size;
++	unsigned long stack_expand;
++	unsigned long rlim_stack;
++
++#ifdef CONFIG_STACK_GROWSUP
++	/* Limit stack size */
++	stack_base = bprm->rlim_stack.rlim_max;
++	if (stack_base > STACK_SIZE_MAX)
++		stack_base = STACK_SIZE_MAX;
++
++	/* Add space for stack randomization. */
++	stack_base += (STACK_RND_MASK << PAGE_SHIFT);
++
++	/* Make sure we didn't let the argument array grow too large. */
++	if (vma->vm_end - vma->vm_start > stack_base)
++		return -ENOMEM;
++
++	stack_base = PAGE_ALIGN(stack_top - stack_base);
++
++	stack_shift = vma->vm_start - stack_base;
++	mm->arg_start = bprm->p - stack_shift;
++	bprm->p = vma->vm_end - stack_shift;
++#else
++	stack_top = arch_align_stack(stack_top);
++	stack_top = PAGE_ALIGN(stack_top);
++
++	if (unlikely(stack_top < mmap_min_addr) ||
++	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
++		return -ENOMEM;
++
++	stack_shift = vma->vm_end - stack_top;
++
++	bprm->p -= stack_shift;
++	mm->arg_start = bprm->p;
++#endif
++
++	if (bprm->loader)
++		bprm->loader -= stack_shift;
++	bprm->exec -= stack_shift;
++
++	if (down_write_killable(&mm->mmap_sem))
++		return -EINTR;
++
++	vm_flags = VM_STACK_FLAGS;
++
++	/*
++	 * Adjust stack execute permissions; explicitly enable for
++	 * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
++	 * (arch default) otherwise.
++	 */
++	if (unlikely(executable_stack == EXSTACK_ENABLE_X))
++		vm_flags |= VM_EXEC;
++	else if (executable_stack == EXSTACK_DISABLE_X)
++		vm_flags &= ~VM_EXEC;
++	vm_flags |= mm->def_flags;
++	vm_flags |= VM_STACK_INCOMPLETE_SETUP;
++
++	ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
++			vm_flags);
++	if (ret)
++		goto out_unlock;
++	BUG_ON(prev != vma);
++
++	/* Move stack pages down in memory. */
++	if (stack_shift) {
++		ret = shift_arg_pages(vma, stack_shift);
++		if (ret)
++			goto out_unlock;
++	}
++
++	/* mprotect_fixup is overkill to remove the temporary stack flags */
++	vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
++
++	stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
++	stack_size = vma->vm_end - vma->vm_start;
++	/*
++	 * Align this down to a page boundary as expand_stack
++	 * will align it up.
++	 */
++	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
++#ifdef CONFIG_STACK_GROWSUP
++	if (stack_size + stack_expand > rlim_stack)
++		stack_base = vma->vm_start + rlim_stack;
++	else
++		stack_base = vma->vm_end + stack_expand;
++#else
++	if (stack_size + stack_expand > rlim_stack)
++		stack_base = vma->vm_end - rlim_stack;
++	else
++		stack_base = vma->vm_start - stack_expand;
++#endif
++	current->mm->start_stack = bprm->p;
++	ret = expand_stack(vma, stack_base);
++	if (ret)
++		ret = -EFAULT;
++
++out_unlock:
++	up_write(&mm->mmap_sem);
++	return ret;
++}
++EXPORT_SYMBOL(setup_arg_pages);
++
++#else
++
++/*
++ * Transfer the program arguments and environment from the holding pages
++ * onto the stack. The provided stack pointer is adjusted accordingly.
++ */
++int transfer_args_to_stack(struct linux_binprm *bprm,
++			   unsigned long *sp_location)
++{
++	unsigned long index, stop, sp;
++	int ret = 0;
++
++	stop = bprm->p >> PAGE_SHIFT;
++	sp = *sp_location;
++
++	for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
++		unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
++		char *src = kmap(bprm->page[index]) + offset;
++		sp -= PAGE_SIZE - offset;
++		if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
++			ret = -EFAULT;
++		kunmap(bprm->page[index]);
++		if (ret)
++			goto out;
++	}
++
++	*sp_location = sp;
++
++out:
++	return ret;
++}
++EXPORT_SYMBOL(transfer_args_to_stack);
++
++#endif /* CONFIG_MMU */
++
++static struct file *do_open_execat(int fd, struct filename *name, int flags)
++{
++	struct file *file;
++	int err;
++	struct open_flags open_exec_flags = {
++		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
++		.acc_mode = MAY_EXEC,
++		.intent = LOOKUP_OPEN,
++		.lookup_flags = LOOKUP_FOLLOW,
++	};
++
++	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
++		return ERR_PTR(-EINVAL);
++	if (flags & AT_SYMLINK_NOFOLLOW)
++		open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
++	if (flags & AT_EMPTY_PATH)
++		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
++
++	file = do_filp_open(fd, name, &open_exec_flags);
++	if (IS_ERR(file))
++		goto out;
++
++	err = -EACCES;
++	if (!S_ISREG(file_inode(file)->i_mode))
++		goto exit;
++
++	if (path_noexec(&file->f_path))
++		goto exit;
++
++	err = deny_write_access(file);
++	if (err)
++		goto exit;
++
++	if (name->name[0] != '\0')
++		fsnotify_open(file);
++
++out:
++	return file;
++
++exit:
++	fput(file);
++	return ERR_PTR(err);
++}
++
++struct file *open_exec(const char *name)
++{
++	struct filename *filename = getname_kernel(name);
++	struct file *f = ERR_CAST(filename);
++
++	if (!IS_ERR(filename)) {
++		f = do_open_execat(AT_FDCWD, filename, 0);
++		putname(filename);
++	}
++	return f;
++}
++EXPORT_SYMBOL(open_exec);
++
++int kernel_read_file(struct file *file, void **buf, loff_t *size,
++		     loff_t max_size, enum kernel_read_file_id id)
++{
++	loff_t i_size, pos;
++	ssize_t bytes = 0;
++	int ret;
++
++	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
++		return -EINVAL;
++
++	ret = deny_write_access(file);
++	if (ret)
++		return ret;
++
++	ret = security_kernel_read_file(file, id);
++	if (ret)
++		goto out;
++
++	i_size = i_size_read(file_inode(file));
++	if (max_size > 0 && i_size > max_size) {
++		ret = -EFBIG;
++		goto out;
++	}
++	if (i_size <= 0) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (id != READING_FIRMWARE_PREALLOC_BUFFER)
++		*buf = vmalloc(i_size);
++	if (!*buf) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	pos = 0;
++	while (pos < i_size) {
++		bytes = kernel_read(file, *buf + pos, i_size - pos, &pos);
++		if (bytes < 0) {
++			ret = bytes;
++			goto out_free;
++		}
++
++		if (bytes == 0)
++			break;
++	}
++
++	if (pos != i_size) {
++		ret = -EIO;
++		goto out_free;
++	}
++
++	ret = security_kernel_post_read_file(file, *buf, i_size, id);
++	if (!ret)
++		*size = pos;
++
++out_free:
++	if (ret < 0) {
++		if (id != READING_FIRMWARE_PREALLOC_BUFFER) {
++			vfree(*buf);
++			*buf = NULL;
++		}
++	}
++
++out:
++	allow_write_access(file);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kernel_read_file);
++
++int kernel_read_file_from_path(const char *path, void **buf, loff_t *size,
++			       loff_t max_size, enum kernel_read_file_id id)
++{
++	struct file *file;
++	int ret;
++
++	if (!path || !*path)
++		return -EINVAL;
++
++	file = filp_open(path, O_RDONLY, 0);
++	if (IS_ERR(file))
++		return PTR_ERR(file);
++
++	ret = kernel_read_file(file, buf, size, max_size, id);
++	fput(file);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
++
++int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
++			     enum kernel_read_file_id id)
++{
++	struct fd f = fdget(fd);
++	int ret = -EBADF;
++
++	if (!f.file)
++		goto out;
++
++	ret = kernel_read_file(f.file, buf, size, max_size, id);
++out:
++	fdput(f);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
++
++ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
++{
++	ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
++	if (res > 0)
++		flush_icache_range(addr, addr + len);
++	return res;
++}
++EXPORT_SYMBOL(read_code);
++
++static int exec_mmap(struct mm_struct *mm)
++{
++	struct task_struct *tsk;
++	struct mm_struct *old_mm, *active_mm;
++
++	/* Notify parent that we're no longer interested in the old VM */
++	tsk = current;
++	old_mm = current->mm;
++	mm_release(tsk, old_mm);
++
++	if (old_mm) {
++		sync_mm_rss(old_mm);
++		/*
++		 * Make sure that if there is a core dump in progress
++		 * for the old mm, we get out and die instead of going
++		 * through with the exec.  We must hold mmap_sem around
++		 * checking core_state and changing tsk->mm.
++		 */
++		down_read(&old_mm->mmap_sem);
++		if (unlikely(old_mm->core_state)) {
++			up_read(&old_mm->mmap_sem);
++			return -EINTR;
++		}
++	}
++	task_lock(tsk);
++	active_mm = tsk->active_mm;
++	membarrier_exec_mmap(mm);
++	tsk->mm = mm;
++	tsk->active_mm = mm;
++	activate_mm(active_mm, mm);
++	tsk->mm->vmacache_seqnum = 0;
++	vmacache_flush(tsk);
++	task_unlock(tsk);
++	if (old_mm) {
++		up_read(&old_mm->mmap_sem);
++		BUG_ON(active_mm != old_mm);
++		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
++		mm_update_next_owner(old_mm);
++		mmput(old_mm);
++		return 0;
++	}
++	mmdrop(active_mm);
++	return 0;
++}
++
++/*
++ * This function makes sure the current process has its own signal table,
++ * so that flush_signal_handlers can later reset the handlers without
++ * disturbing other processes.  (Other processes might share the signal
++ * table via the CLONE_SIGHAND option to clone().)
++ */
++static int de_thread(struct task_struct *tsk)
++{
++	struct signal_struct *sig = tsk->signal;
++	struct sighand_struct *oldsighand = tsk->sighand;
++	spinlock_t *lock = &oldsighand->siglock;
++
++	if (thread_group_empty(tsk))
++		goto no_thread_group;
++
++	/*
++	 * Kill all other threads in the thread group.
++	 */
++	spin_lock_irq(lock);
++	if (signal_group_exit(sig)) {
++		/*
++		 * Another group action in progress, just
++		 * return so that the signal is processed.
++		 */
++		spin_unlock_irq(lock);
++		return -EAGAIN;
++	}
++
++	sig->group_exit_task = tsk;
++	sig->notify_count = zap_other_threads(tsk);
++	if (!thread_group_leader(tsk))
++		sig->notify_count--;
++
++	while (sig->notify_count) {
++		__set_current_state(TASK_KILLABLE);
++		spin_unlock_irq(lock);
++		schedule();
++		if (unlikely(__fatal_signal_pending(tsk)))
++			goto killed;
++		spin_lock_irq(lock);
++	}
++	spin_unlock_irq(lock);
++
++	/*
++	 * At this point all other threads have exited, all we have to
++	 * do is to wait for the thread group leader to become inactive,
++	 * and to assume its PID:
++	 */
++	if (!thread_group_leader(tsk)) {
++		struct task_struct *leader = tsk->group_leader;
++
++		for (;;) {
++			cgroup_threadgroup_change_begin(tsk);
++			write_lock_irq(&tasklist_lock);
++			/*
++			 * Do this under tasklist_lock to ensure that
++			 * exit_notify() can't miss ->group_exit_task
++			 */
++			sig->notify_count = -1;
++			if (likely(leader->exit_state))
++				break;
++			__set_current_state(TASK_KILLABLE);
++			write_unlock_irq(&tasklist_lock);
++			cgroup_threadgroup_change_end(tsk);
++			schedule();
++			if (unlikely(__fatal_signal_pending(tsk)))
++				goto killed;
++		}
++
++		/*
++		 * The only record we have of the real-time age of a
++		 * process, regardless of execs it's done, is start_time.
++		 * All the past CPU time is accumulated in signal_struct
++		 * from sister threads now dead.  But in this non-leader
++		 * exec, nothing survives from the original leader thread,
++		 * whose birth marks the true age of this process now.
++		 * When we take on its identity by switching to its PID, we
++		 * also take its birthdate (always earlier than our own).
++		 */
++		tsk->start_time = leader->start_time;
++		tsk->real_start_time = leader->real_start_time;
++
++		BUG_ON(!same_thread_group(leader, tsk));
++		BUG_ON(has_group_leader_pid(tsk));
++		/*
++		 * An exec() starts a new thread group with the
++		 * TGID of the previous thread group. Rehash the
++		 * two threads with a switched PID, and release
++		 * the former thread group leader:
++		 */
++
++		/* Become a process group leader with the old leader's pid.
++		 * The old leader becomes a thread of the this thread group.
++		 * Note: The old leader also uses this pid until release_task
++		 *       is called.  Odd but simple and correct.
++		 */
++		tsk->pid = leader->pid;
++		change_pid(tsk, PIDTYPE_PID, task_pid(leader));
++		transfer_pid(leader, tsk, PIDTYPE_TGID);
++		transfer_pid(leader, tsk, PIDTYPE_PGID);
++		transfer_pid(leader, tsk, PIDTYPE_SID);
++
++		list_replace_rcu(&leader->tasks, &tsk->tasks);
++		list_replace_init(&leader->sibling, &tsk->sibling);
++
++		tsk->group_leader = tsk;
++		leader->group_leader = tsk;
++
++		tsk->exit_signal = SIGCHLD;
++		leader->exit_signal = -1;
++
++		BUG_ON(leader->exit_state != EXIT_ZOMBIE);
++		leader->exit_state = EXIT_DEAD;
++
++		/*
++		 * We are going to release_task()->ptrace_unlink() silently,
++		 * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
++		 * the tracer wont't block again waiting for this thread.
++		 */
++		if (unlikely(leader->ptrace))
++			__wake_up_parent(leader, leader->parent);
++		write_unlock_irq(&tasklist_lock);
++		cgroup_threadgroup_change_end(tsk);
++
++		release_task(leader);
++	}
++
++	sig->group_exit_task = NULL;
++	sig->notify_count = 0;
++
++no_thread_group:
++	/* we have changed execution domain */
++	tsk->exit_signal = SIGCHLD;
++
++#ifdef CONFIG_POSIX_TIMERS
++	exit_itimers(sig);
++	flush_itimer_signals();
++#endif
++
++	if (atomic_read(&oldsighand->count) != 1) {
++		struct sighand_struct *newsighand;
++		/*
++		 * This ->sighand is shared with the CLONE_SIGHAND
++		 * but not CLONE_THREAD task, switch to the new one.
++		 */
++		newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
++		if (!newsighand)
++			return -ENOMEM;
++
++		atomic_set(&newsighand->count, 1);
++		memcpy(newsighand->action, oldsighand->action,
++		       sizeof(newsighand->action));
++
++		write_lock_irq(&tasklist_lock);
++		spin_lock(&oldsighand->siglock);
++		rcu_assign_pointer(tsk->sighand, newsighand);
++		spin_unlock(&oldsighand->siglock);
++		write_unlock_irq(&tasklist_lock);
++
++		__cleanup_sighand(oldsighand);
++	}
++
++	BUG_ON(!thread_group_leader(tsk));
++	return 0;
++
++killed:
++	/* protects against exit_notify() and __exit_signal() */
++	read_lock(&tasklist_lock);
++	sig->group_exit_task = NULL;
++	sig->notify_count = 0;
++	read_unlock(&tasklist_lock);
++	return -EAGAIN;
++}
++
++char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
++{
++	task_lock(tsk);
++	strncpy(buf, tsk->comm, buf_size);
++	task_unlock(tsk);
++	return buf;
++}
++EXPORT_SYMBOL_GPL(__get_task_comm);
++
++/*
++ * These functions flushes out all traces of the currently running executable
++ * so that a new one can be started
++ */
++
++void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
++{
++	task_lock(tsk);
++	trace_task_rename(tsk, buf);
++	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
++	task_unlock(tsk);
++	perf_event_comm(tsk, exec);
++}
++
++/*
++ * Calling this is the point of no return. None of the failures will be
++ * seen by userspace since either the process is already taking a fatal
++ * signal (via de_thread() or coredump), or will have SEGV raised
++ * (after exec_mmap()) by search_binary_handlers (see below).
++ */
++int flush_old_exec(struct linux_binprm * bprm)
++{
++	int retval;
++
++	/*
++	 * Make sure we have a private signal table and that
++	 * we are unassociated from the previous thread group.
++	 */
++	retval = de_thread(current);
++	if (retval)
++		goto out;
++
++	/*
++	 * Must be called _before_ exec_mmap() as bprm->mm is
++	 * not visibile until then. This also enables the update
++	 * to be lockless.
++	 */
++	set_mm_exe_file(bprm->mm, bprm->file);
++
++	would_dump(bprm, bprm->file);
++
++	/*
++	 * Release all of the old mmap stuff
++	 */
++	acct_arg_size(bprm, 0);
++	retval = exec_mmap(bprm->mm);
++	if (retval)
++		goto out;
++
++	/*
++	 * After clearing bprm->mm (to mark that current is using the
++	 * prepared mm now), we have nothing left of the original
++	 * process. If anything from here on returns an error, the check
++	 * in search_binary_handler() will SEGV current.
++	 */
++	bprm->mm = NULL;
++
++	set_fs(USER_DS);
++	current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
++					PF_NOFREEZE | PF_NO_SETAFFINITY);
++	flush_thread();
++	current->personality &= ~bprm->per_clear;
++
++	/*
++	 * We have to apply CLOEXEC before we change whether the process is
++	 * dumpable (in setup_new_exec) to avoid a race with a process in userspace
++	 * trying to access the should-be-closed file descriptors of a process
++	 * undergoing exec(2).
++	 */
++	do_close_on_exec(current->files);
++	return 0;
++
++out:
++	return retval;
++}
++EXPORT_SYMBOL(flush_old_exec);
++
++void would_dump(struct linux_binprm *bprm, struct file *file)
++{
++	struct inode *inode = file_inode(file);
++	if (inode_permission(inode, MAY_READ) < 0) {
++		struct user_namespace *old, *user_ns;
++		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
++
++		/* Ensure mm->user_ns contains the executable */
++		user_ns = old = bprm->mm->user_ns;
++		while ((user_ns != &init_user_ns) &&
++		       !privileged_wrt_inode_uidgid(user_ns, inode))
++			user_ns = user_ns->parent;
++
++		if (old != user_ns) {
++			bprm->mm->user_ns = get_user_ns(user_ns);
++			put_user_ns(old);
++		}
++	}
++}
++EXPORT_SYMBOL(would_dump);
++
++void setup_new_exec(struct linux_binprm * bprm)
++{
++	/*
++	 * Once here, prepare_binrpm() will not be called any more, so
++	 * the final state of setuid/setgid/fscaps can be merged into the
++	 * secureexec flag.
++	 */
++	bprm->secureexec |= bprm->cap_elevated;
++
++	if (bprm->secureexec) {
++		/* Make sure parent cannot signal privileged process. */
++		current->pdeath_signal = 0;
++
++		/*
++		 * For secureexec, reset the stack limit to sane default to
++		 * avoid bad behavior from the prior rlimits. This has to
++		 * happen before arch_pick_mmap_layout(), which examines
++		 * RLIMIT_STACK, but after the point of no return to avoid
++		 * needing to clean up the change on failure.
++		 */
++		if (bprm->rlim_stack.rlim_cur > _STK_LIM)
++			bprm->rlim_stack.rlim_cur = _STK_LIM;
++	}
++
++	arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
++
++	current->sas_ss_sp = current->sas_ss_size = 0;
++
++	/*
++	 * Figure out dumpability. Note that this checking only of current
++	 * is wrong, but userspace depends on it. This should be testing
++	 * bprm->secureexec instead.
++	 */
++	if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
++	    !(uid_eq(current_euid(), current_uid()) &&
++	      gid_eq(current_egid(), current_gid())))
++		set_dumpable(current->mm, suid_dumpable);
++	else
++		set_dumpable(current->mm, SUID_DUMP_USER);
++
++	arch_setup_new_exec();
++	perf_event_exec();
++	__set_task_comm(current, kbasename(bprm->filename), true);
++
++	/* Set the new mm task size. We have to do that late because it may
++	 * depend on TIF_32BIT which is only updated in flush_thread() on
++	 * some architectures like powerpc
++	 */
++	current->mm->task_size = TASK_SIZE;
++
++	/* An exec changes our domain. We are no longer part of the thread
++	   group */
++	WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
++	WRITE_ONCE(current->self_exec_id_u64, current->self_exec_id_u64 + 1);
++	flush_signal_handlers(current, 0);
++}
++EXPORT_SYMBOL(setup_new_exec);
++
++/* Runs immediately before start_thread() takes over. */
++void finalize_exec(struct linux_binprm *bprm)
++{
++	/* Store any stack rlimit changes before starting thread. */
++	task_lock(current->group_leader);
++	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
++	task_unlock(current->group_leader);
++}
++EXPORT_SYMBOL(finalize_exec);
++
++/*
++ * Prepare credentials and lock ->cred_guard_mutex.
++ * install_exec_creds() commits the new creds and drops the lock.
++ * Or, if exec fails before, free_bprm() should release ->cred and
++ * and unlock.
++ */
++int prepare_bprm_creds(struct linux_binprm *bprm)
++{
++	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
++		return -ERESTARTNOINTR;
++
++	bprm->cred = prepare_exec_creds();
++	if (likely(bprm->cred))
++		return 0;
++
++	mutex_unlock(&current->signal->cred_guard_mutex);
++	return -ENOMEM;
++}
++
++static void free_bprm(struct linux_binprm *bprm)
++{
++	free_arg_pages(bprm);
++	if (bprm->cred) {
++		mutex_unlock(&current->signal->cred_guard_mutex);
++		abort_creds(bprm->cred);
++	}
++	if (bprm->file) {
++		allow_write_access(bprm->file);
++		fput(bprm->file);
++	}
++	/* If a binfmt changed the interp, free it. */
++	if (bprm->interp != bprm->filename)
++		kfree(bprm->interp);
++	kfree(bprm);
++}
++
++int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
++{
++	/* If a binfmt changed the interp, free it first. */
++	if (bprm->interp != bprm->filename)
++		kfree(bprm->interp);
++	bprm->interp = kstrdup(interp, GFP_KERNEL);
++	if (!bprm->interp)
++		return -ENOMEM;
++	return 0;
++}
++EXPORT_SYMBOL(bprm_change_interp);
++
++/*
++ * install the new credentials for this executable
++ */
++void install_exec_creds(struct linux_binprm *bprm)
++{
++	security_bprm_committing_creds(bprm);
++
++	commit_creds(bprm->cred);
++	bprm->cred = NULL;
++
++	/*
++	 * Disable monitoring for regular users
++	 * when executing setuid binaries. Must
++	 * wait until new credentials are committed
++	 * by commit_creds() above
++	 */
++	if (get_dumpable(current->mm) != SUID_DUMP_USER)
++		perf_event_exit_task(current);
++	/*
++	 * cred_guard_mutex must be held at least to this point to prevent
++	 * ptrace_attach() from altering our determination of the task's
++	 * credentials; any time after this it may be unlocked.
++	 */
++	security_bprm_committed_creds(bprm);
++	mutex_unlock(&current->signal->cred_guard_mutex);
++}
++EXPORT_SYMBOL(install_exec_creds);
++
++/*
++ * determine how safe it is to execute the proposed program
++ * - the caller must hold ->cred_guard_mutex to protect against
++ *   PTRACE_ATTACH or seccomp thread-sync
++ */
++static void check_unsafe_exec(struct linux_binprm *bprm)
++{
++	struct task_struct *p = current, *t;
++	unsigned n_fs;
++
++	if (p->ptrace)
++		bprm->unsafe |= LSM_UNSAFE_PTRACE;
++
++	/*
++	 * This isn't strictly necessary, but it makes it harder for LSMs to
++	 * mess up.
++	 */
++	if (task_no_new_privs(current))
++		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
++
++	t = p;
++	n_fs = 1;
++	spin_lock(&p->fs->lock);
++	rcu_read_lock();
++	while_each_thread(p, t) {
++		if (t->fs == p->fs)
++			n_fs++;
++	}
++	rcu_read_unlock();
++
++	if (p->fs->users > n_fs)
++		bprm->unsafe |= LSM_UNSAFE_SHARE;
++	else
++		p->fs->in_exec = 1;
++	spin_unlock(&p->fs->lock);
++}
++
++static void bprm_fill_uid(struct linux_binprm *bprm)
++{
++	struct inode *inode;
++	unsigned int mode;
++	kuid_t uid;
++	kgid_t gid;
++
++	/*
++	 * Since this can be called multiple times (via prepare_binprm),
++	 * we must clear any previous work done when setting set[ug]id
++	 * bits from any earlier bprm->file uses (for example when run
++	 * first for a setuid script then again for its interpreter).
++	 */
++	bprm->cred->euid = current_euid();
++	bprm->cred->egid = current_egid();
++
++	if (!mnt_may_suid(bprm->file->f_path.mnt))
++		return;
++
++	if (task_no_new_privs(current))
++		return;
++
++	inode = bprm->file->f_path.dentry->d_inode;
++	mode = READ_ONCE(inode->i_mode);
++	if (!(mode & (S_ISUID|S_ISGID)))
++		return;
++
++	/* Be careful if suid/sgid is set */
++	inode_lock(inode);
++
++	/* reload atomically mode/uid/gid now that lock held */
++	mode = inode->i_mode;
++	uid = inode->i_uid;
++	gid = inode->i_gid;
++	inode_unlock(inode);
++
++	/* We ignore suid/sgid if there are no mappings for them in the ns */
++	if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
++		 !kgid_has_mapping(bprm->cred->user_ns, gid))
++		return;
++
++	if (mode & S_ISUID) {
++		bprm->per_clear |= PER_CLEAR_ON_SETID;
++		bprm->cred->euid = uid;
++	}
++
++	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
++		bprm->per_clear |= PER_CLEAR_ON_SETID;
++		bprm->cred->egid = gid;
++	}
++}
++
++/*
++ * Fill the binprm structure from the inode.
++ * Check permissions, then read the first BINPRM_BUF_SIZE bytes
++ *
++ * This may be called multiple times for binary chains (scripts for example).
++ */
++int prepare_binprm(struct linux_binprm *bprm)
++{
++	int retval;
++	loff_t pos = 0;
++
++	bprm_fill_uid(bprm);
++
++	/* fill in binprm security blob */
++	retval = security_bprm_set_creds(bprm);
++	if (retval)
++		return retval;
++	bprm->called_set_creds = 1;
++
++	memset(bprm->buf, 0, BINPRM_BUF_SIZE);
++	return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
++}
++
++EXPORT_SYMBOL(prepare_binprm);
++
++/*
++ * Arguments are '\0' separated strings found at the location bprm->p
++ * points to; chop off the first by relocating brpm->p to right after
++ * the first '\0' encountered.
++ */
++int remove_arg_zero(struct linux_binprm *bprm)
++{
++	int ret = 0;
++	unsigned long offset;
++	char *kaddr;
++	struct page *page;
++
++	if (!bprm->argc)
++		return 0;
++
++	do {
++		offset = bprm->p & ~PAGE_MASK;
++		page = get_arg_page(bprm, bprm->p, 0);
++		if (!page) {
++			ret = -EFAULT;
++			goto out;
++		}
++		kaddr = kmap_atomic(page);
++
++		for (; offset < PAGE_SIZE && kaddr[offset];
++				offset++, bprm->p++)
++			;
++
++		kunmap_atomic(kaddr);
++		put_arg_page(page);
++	} while (offset == PAGE_SIZE);
++
++	bprm->p++;
++	bprm->argc--;
++	ret = 0;
++
++out:
++	return ret;
++}
++EXPORT_SYMBOL(remove_arg_zero);
++
++#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
++/*
++ * cycle the list of binary formats handler, until one recognizes the image
++ */
++int search_binary_handler(struct linux_binprm *bprm)
++{
++	bool need_retry = IS_ENABLED(CONFIG_MODULES);
++	struct linux_binfmt *fmt;
++	int retval;
++
++	/* This allows 4 levels of binfmt rewrites before failing hard. */
++	if (bprm->recursion_depth > 5)
++		return -ELOOP;
++
++	retval = security_bprm_check(bprm);
++	if (retval)
++		return retval;
++
++	retval = -ENOENT;
++ retry:
++	read_lock(&binfmt_lock);
++	list_for_each_entry(fmt, &formats, lh) {
++		if (!try_module_get(fmt->module))
++			continue;
++		read_unlock(&binfmt_lock);
++		bprm->recursion_depth++;
++		retval = fmt->load_binary(bprm);
++		read_lock(&binfmt_lock);
++		put_binfmt(fmt);
++		bprm->recursion_depth--;
++		if (retval < 0 && !bprm->mm) {
++			/* we got to flush_old_exec() and failed after it */
++			read_unlock(&binfmt_lock);
++			force_sigsegv(SIGSEGV, current);
++			return retval;
++		}
++		if (retval != -ENOEXEC || !bprm->file) {
++			read_unlock(&binfmt_lock);
++			return retval;
++		}
++	}
++	read_unlock(&binfmt_lock);
++
++	if (need_retry) {
++		if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
++		    printable(bprm->buf[2]) && printable(bprm->buf[3]))
++			return retval;
++		if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
++			return retval;
++		need_retry = false;
++		goto retry;
++	}
++
++	return retval;
++}
++EXPORT_SYMBOL(search_binary_handler);
++
++static int exec_binprm(struct linux_binprm *bprm)
++{
++	pid_t old_pid, old_vpid;
++	int ret;
++
++	/* Need to fetch pid before load_binary changes it */
++	old_pid = current->pid;
++	rcu_read_lock();
++	old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
++	rcu_read_unlock();
++
++	ret = search_binary_handler(bprm);
++	if (ret >= 0) {
++		audit_bprm(bprm);
++		trace_sched_process_exec(current, old_pid, bprm);
++		ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
++		proc_exec_connector(current);
++	}
++
++	return ret;
++}
++
++/*
++ * sys_execve() executes a new program.
++ */
++static int __do_execve_file(int fd, struct filename *filename,
++			    struct user_arg_ptr argv,
++			    struct user_arg_ptr envp,
++			    int flags, struct file *file)
++{
++	char *pathbuf = NULL;
++	struct linux_binprm *bprm;
++	struct files_struct *displaced;
++	int retval;
++
++	if (IS_ERR(filename))
++		return PTR_ERR(filename);
++
++	/*
++	 * We move the actual failure in case of RLIMIT_NPROC excess from
++	 * set*uid() to execve() because too many poorly written programs
++	 * don't check setuid() return code.  Here we additionally recheck
++	 * whether NPROC limit is still exceeded.
++	 */
++	if ((current->flags & PF_NPROC_EXCEEDED) &&
++	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
++		retval = -EAGAIN;
++		goto out_ret;
++	}
++
++	/* We're below the limit (still or again), so we don't want to make
++	 * further execve() calls fail. */
++	current->flags &= ~PF_NPROC_EXCEEDED;
++
++	retval = unshare_files(&displaced);
++	if (retval)
++		goto out_ret;
++
++	retval = -ENOMEM;
++	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
++	if (!bprm)
++		goto out_files;
++
++	retval = prepare_bprm_creds(bprm);
++	if (retval)
++		goto out_free;
++
++	check_unsafe_exec(bprm);
++	current->in_execve = 1;
++
++	if (!file)
++		file = do_open_execat(fd, filename, flags);
++	retval = PTR_ERR(file);
++	if (IS_ERR(file))
++		goto out_unmark;
++
++	sched_exec();
++
++	bprm->file = file;
++	if (!filename) {
++		bprm->filename = "none";
++	} else if (fd == AT_FDCWD || filename->name[0] == '/') {
++		bprm->filename = filename->name;
++	} else {
++		if (filename->name[0] == '\0')
++			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
++		else
++			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
++					    fd, filename->name);
++		if (!pathbuf) {
++			retval = -ENOMEM;
++			goto out_unmark;
++		}
++		/*
++		 * Record that a name derived from an O_CLOEXEC fd will be
++		 * inaccessible after exec. Relies on having exclusive access to
++		 * current->files (due to unshare_files above).
++		 */
++		if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
++			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
++		bprm->filename = pathbuf;
++	}
++	bprm->interp = bprm->filename;
++
++	retval = bprm_mm_init(bprm);
++	if (retval)
++		goto out_unmark;
++
++	bprm->argc = count(argv, MAX_ARG_STRINGS);
++	if ((retval = bprm->argc) < 0)
++		goto out;
++
++	bprm->envc = count(envp, MAX_ARG_STRINGS);
++	if ((retval = bprm->envc) < 0)
++		goto out;
++
++	retval = prepare_binprm(bprm);
++	if (retval < 0)
++		goto out;
++
++	retval = copy_strings_kernel(1, &bprm->filename, bprm);
++	if (retval < 0)
++		goto out;
++
++	bprm->exec = bprm->p;
++	retval = copy_strings(bprm->envc, envp, bprm);
++	if (retval < 0)
++		goto out;
++
++	retval = copy_strings(bprm->argc, argv, bprm);
++	if (retval < 0)
++		goto out;
++
++	retval = exec_binprm(bprm);
++	if (retval < 0)
++		goto out;
++
++	/* execve succeeded */
++	current->fs->in_exec = 0;
++	current->in_execve = 0;
++	rseq_execve(current);
++	acct_update_integrals(current);
++	task_numa_free(current, false);
++	free_bprm(bprm);
++	kfree(pathbuf);
++	if (filename)
++		putname(filename);
++	if (displaced)
++		put_files_struct(displaced);
++	return retval;
++
++out:
++	if (bprm->mm) {
++		acct_arg_size(bprm, 0);
++		mmput(bprm->mm);
++	}
++
++out_unmark:
++	current->fs->in_exec = 0;
++	current->in_execve = 0;
++
++out_free:
++	free_bprm(bprm);
++	kfree(pathbuf);
++
++out_files:
++	if (displaced)
++		reset_files_struct(displaced);
++out_ret:
++	if (filename)
++		putname(filename);
++	return retval;
++}
++
++static int do_execveat_common(int fd, struct filename *filename,
++			      struct user_arg_ptr argv,
++			      struct user_arg_ptr envp,
++			      int flags)
++{
++	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
++}
++
++int do_execve_file(struct file *file, void *__argv, void *__envp)
++{
++	struct user_arg_ptr argv = { .ptr.native = __argv };
++	struct user_arg_ptr envp = { .ptr.native = __envp };
++
++	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
++}
++
++int do_execve(struct filename *filename,
++	const char __user *const __user *__argv,
++	const char __user *const __user *__envp)
++{
++	struct user_arg_ptr argv = { .ptr.native = __argv };
++	struct user_arg_ptr envp = { .ptr.native = __envp };
++	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
++}
++
++int do_execveat(int fd, struct filename *filename,
++		const char __user *const __user *__argv,
++		const char __user *const __user *__envp,
++		int flags)
++{
++	struct user_arg_ptr argv = { .ptr.native = __argv };
++	struct user_arg_ptr envp = { .ptr.native = __envp };
++
++	return do_execveat_common(fd, filename, argv, envp, flags);
++}
++
++#ifdef CONFIG_COMPAT
++static int compat_do_execve(struct filename *filename,
++	const compat_uptr_t __user *__argv,
++	const compat_uptr_t __user *__envp)
++{
++	struct user_arg_ptr argv = {
++		.is_compat = true,
++		.ptr.compat = __argv,
++	};
++	struct user_arg_ptr envp = {
++		.is_compat = true,
++		.ptr.compat = __envp,
++	};
++	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
++}
++
++static int compat_do_execveat(int fd, struct filename *filename,
++			      const compat_uptr_t __user *__argv,
++			      const compat_uptr_t __user *__envp,
++			      int flags)
++{
++	struct user_arg_ptr argv = {
++		.is_compat = true,
++		.ptr.compat = __argv,
++	};
++	struct user_arg_ptr envp = {
++		.is_compat = true,
++		.ptr.compat = __envp,
++	};
++	return do_execveat_common(fd, filename, argv, envp, flags);
++}
++#endif
++
++void set_binfmt(struct linux_binfmt *new)
++{
++	struct mm_struct *mm = current->mm;
++
++	if (mm->binfmt)
++		module_put(mm->binfmt->module);
++
++	mm->binfmt = new;
++	if (new)
++		__module_get(new->module);
++}
++EXPORT_SYMBOL(set_binfmt);
++
++/*
++ * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
++ */
++void set_dumpable(struct mm_struct *mm, int value)
++{
++	unsigned long old, new;
++
++	if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
++		return;
++
++	do {
++		old = READ_ONCE(mm->flags);
++		new = (old & ~MMF_DUMPABLE_MASK) | value;
++	} while (cmpxchg(&mm->flags, old, new) != old);
++}
++
++SYSCALL_DEFINE3(execve,
++		const char __user *, filename,
++		const char __user *const __user *, argv,
++		const char __user *const __user *, envp)
++{
++	return do_execve(getname(filename), argv, envp);
++}
++
++SYSCALL_DEFINE5(execveat,
++		int, fd, const char __user *, filename,
++		const char __user *const __user *, argv,
++		const char __user *const __user *, envp,
++		int, flags)
++{
++	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
++
++	return do_execveat(fd,
++			   getname_flags(filename, lookup_flags, NULL),
++			   argv, envp, flags);
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
++	const compat_uptr_t __user *, argv,
++	const compat_uptr_t __user *, envp)
++{
++	return compat_do_execve(getname(filename), argv, envp);
++}
++
++COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
++		       const char __user *, filename,
++		       const compat_uptr_t __user *, argv,
++		       const compat_uptr_t __user *, envp,
++		       int,  flags)
++{
++	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
++
++	return compat_do_execveat(fd,
++				  getname_flags(filename, lookup_flags, NULL),
++				  argv, envp, flags);
++}
++#endif
+diff -uprN kernel/include/asm-generic/atomic.h kernel_new/include/asm-generic/atomic.h
+--- kernel/include/asm-generic/atomic.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/asm-generic/atomic.h	2021-04-01 18:28:07.798863128 +0800
+@@ -80,9 +80,9 @@ static inline void atomic_##op(int i, at
+ {									\
+ 	unsigned long flags;						\
+ 									\
+-	raw_local_irq_save(flags);					\
++	flags = hard_local_irq_save();					\
+ 	v->counter = v->counter c_op i;					\
+-	raw_local_irq_restore(flags);					\
++	hard_local_irq_restore(flags);					\
+ }
+ 
+ #define ATOMIC_OP_RETURN(op, c_op)					\
+@@ -91,9 +91,9 @@ static inline int atomic_##op##_return(i
+ 	unsigned long flags;						\
+ 	int ret;							\
+ 									\
+-	raw_local_irq_save(flags);					\
++	flags = hard_local_irq_save();					\
+ 	ret = (v->counter = v->counter c_op i);				\
+-	raw_local_irq_restore(flags);					\
++	hard_local_irq_restore(flags);					\
+ 									\
+ 	return ret;							\
+ }
+@@ -104,10 +104,10 @@ static inline int atomic_fetch_##op(int
+ 	unsigned long flags;						\
+ 	int ret;							\
+ 									\
+-	raw_local_irq_save(flags);					\
++	flags = hard_local_irq_save(flags);				\
+ 	ret = v->counter;						\
+ 	v->counter = v->counter c_op i;					\
+-	raw_local_irq_restore(flags);					\
++	hard_local_irq_restore(flags);					\
+ 									\
+ 	return ret;							\
+ }
+diff -uprN kernel/include/asm-generic/cmpxchg-local.h kernel_new/include/asm-generic/cmpxchg-local.h
+--- kernel/include/asm-generic/cmpxchg-local.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/asm-generic/cmpxchg-local.h	2021-04-01 18:28:07.798863128 +0800
+@@ -4,6 +4,7 @@
+ 
+ #include <linux/types.h>
+ #include <linux/irqflags.h>
++#include <asm-generic/ipipe.h>
+ 
+ extern unsigned long wrong_size_cmpxchg(volatile void *ptr)
+ 	__noreturn;
+@@ -23,7 +24,7 @@ static inline unsigned long __cmpxchg_lo
+ 	if (size == 8 && sizeof(unsigned long) != 8)
+ 		wrong_size_cmpxchg(ptr);
+ 
+-	raw_local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	switch (size) {
+ 	case 1: prev = *(u8 *)ptr;
+ 		if (prev == old)
+@@ -44,7 +45,7 @@ static inline unsigned long __cmpxchg_lo
+ 	default:
+ 		wrong_size_cmpxchg(ptr);
+ 	}
+-	raw_local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
+@@ -57,11 +58,11 @@ static inline u64 __cmpxchg64_local_gene
+ 	u64 prev;
+ 	unsigned long flags;
+ 
+-	raw_local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	prev = *(u64 *)ptr;
+ 	if (prev == old)
+ 		*(u64 *)ptr = new;
+-	raw_local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
+diff -uprN kernel/include/asm-generic/ipipe.h kernel_new/include/asm-generic/ipipe.h
+--- kernel/include/asm-generic/ipipe.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/asm-generic/ipipe.h	2021-04-01 18:28:07.798863128 +0800
+@@ -0,0 +1,93 @@
++/* -*- linux-c -*-
++ * include/asm-generic/ipipe.h
++ *
++ * Copyright (C) 2002-2017 Philippe Gerum.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ */
++#ifndef __ASM_GENERIC_IPIPE_H
++#define __ASM_GENERIC_IPIPE_H
++
++#ifdef CONFIG_IPIPE
++
++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
++	defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
++void __ipipe_uaccess_might_fault(void);
++#else
++#define __ipipe_uaccess_might_fault() might_fault()
++#endif
++
++#define hard_cond_local_irq_enable()		hard_local_irq_enable()
++#define hard_cond_local_irq_disable()		hard_local_irq_disable()
++#define hard_cond_local_irq_save()		hard_local_irq_save()
++#define hard_cond_local_irq_restore(flags)	hard_local_irq_restore(flags)
++
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++void ipipe_root_only(void);
++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
++static inline void ipipe_root_only(void) { }
++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
++
++void ipipe_stall_root(void);
++
++void ipipe_unstall_root(void);
++
++unsigned long ipipe_test_and_stall_root(void);
++
++unsigned long ipipe_test_root(void);
++
++void ipipe_restore_root(unsigned long x);
++
++#else  /* !CONFIG_IPIPE */
++
++#define hard_local_irq_save_notrace()		\
++	({					\
++		unsigned long __flags;		\
++		raw_local_irq_save(__flags);	\
++		__flags;			\
++	})
++
++#define hard_local_irq_restore_notrace(__flags)	\
++	raw_local_irq_restore(__flags)
++
++#define hard_local_irq_enable_notrace()	\
++	raw_local_irq_enable()
++
++#define hard_local_irq_disable_notrace()	\
++	raw_local_irq_disable()
++
++#define hard_local_irq_save()			\
++	({					\
++		unsigned long __flags;		\
++		local_irq_save(__flags);	\
++		__flags;			\
++	})
++#define hard_local_irq_restore(__flags)	local_irq_restore(__flags)
++#define hard_local_irq_enable()		local_irq_enable()
++#define hard_local_irq_disable()	local_irq_disable()
++#define hard_irqs_disabled()		irqs_disabled()
++
++#define hard_cond_local_irq_enable()		do { } while(0)
++#define hard_cond_local_irq_disable()		do { } while(0)
++#define hard_cond_local_irq_save()		0
++#define hard_cond_local_irq_restore(__flags)	do { (void)(__flags); } while(0)
++
++#define __ipipe_uaccess_might_fault()		might_fault()
++
++static inline void ipipe_root_only(void) { }
++
++#endif /* !CONFIG_IPIPE */
++
++#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE)
++#define hard_smp_local_irq_save()		hard_local_irq_save()
++#define hard_smp_local_irq_restore(__flags)	hard_local_irq_restore(__flags)
++#else /* !CONFIG_SMP */
++#define hard_smp_local_irq_save()		0
++#define hard_smp_local_irq_restore(__flags)	do { (void)(__flags); } while(0)
++#endif /* CONFIG_SMP */
++
++#endif
+diff -uprN kernel/include/asm-generic/percpu.h kernel_new/include/asm-generic/percpu.h
+--- kernel/include/asm-generic/percpu.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/asm-generic/percpu.h	2021-04-01 18:28:07.798863128 +0800
+@@ -5,6 +5,7 @@
+ #include <linux/compiler.h>
+ #include <linux/threads.h>
+ #include <linux/percpu-defs.h>
++#include <asm-generic/ipipe.h>
+ 
+ #ifdef CONFIG_SMP
+ 
+@@ -44,11 +45,29 @@ extern unsigned long __per_cpu_offset[NR
+ #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
+ #endif
+ 
++#ifdef CONFIG_IPIPE
++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
++unsigned long __ipipe_cpu_get_offset(void);
++#define __ipipe_cpu_offset  __ipipe_cpu_get_offset()
++#else
++#define __ipipe_cpu_offset  __my_cpu_offset
++#endif
++#ifndef __ipipe_raw_cpu_ptr
++#define __ipipe_raw_cpu_ptr(ptr)  SHIFT_PERCPU_PTR(ptr, __ipipe_cpu_offset)
++#endif
++#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
++#endif /* CONFIG_IPIPE */
++
+ #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+ extern void setup_per_cpu_areas(void);
+ #endif
+ 
+-#endif	/* SMP */
++#else /* !SMP */
++
++#define __ipipe_raw_cpu_ptr(ptr)  VERIFY_PERCPU_PTR(ptr)
++#define __ipipe_raw_cpu_read(var) (*__ipipe_raw_cpu_ptr(&(var)))
++
++#endif	/* !SMP */
+ 
+ #ifndef PER_CPU_BASE_SECTION
+ #ifdef CONFIG_SMP
+@@ -148,9 +167,9 @@ do {									\
+ #define this_cpu_generic_to_op(pcp, val, op)				\
+ do {									\
+ 	unsigned long __flags;						\
+-	raw_local_irq_save(__flags);					\
++	__flags = hard_local_irq_save();				\
+ 	raw_cpu_generic_to_op(pcp, val, op);				\
+-	raw_local_irq_restore(__flags);					\
++	hard_local_irq_restore(__flags);				\
+ } while (0)
+ 
+ 
+@@ -158,9 +177,9 @@ do {									\
+ ({									\
+ 	typeof(pcp) __ret;						\
+ 	unsigned long __flags;						\
+-	raw_local_irq_save(__flags);					\
++	__flags = hard_local_irq_save();				\
+ 	__ret = raw_cpu_generic_add_return(pcp, val);			\
+-	raw_local_irq_restore(__flags);					\
++	hard_local_irq_restore(__flags);				\
+ 	__ret;								\
+ })
+ 
+@@ -168,9 +187,9 @@ do {									\
+ ({									\
+ 	typeof(pcp) __ret;						\
+ 	unsigned long __flags;						\
+-	raw_local_irq_save(__flags);					\
++	__flags = hard_local_irq_save();				\
+ 	__ret = raw_cpu_generic_xchg(pcp, nval);			\
+-	raw_local_irq_restore(__flags);					\
++	hard_local_irq_restore(__flags);				\
+ 	__ret;								\
+ })
+ 
+@@ -178,9 +197,9 @@ do {									\
+ ({									\
+ 	typeof(pcp) __ret;						\
+ 	unsigned long __flags;						\
+-	raw_local_irq_save(__flags);					\
++	__flags = hard_local_irq_save();				\
+ 	__ret = raw_cpu_generic_cmpxchg(pcp, oval, nval);		\
+-	raw_local_irq_restore(__flags);					\
++	hard_local_irq_restore(__flags);				\
+ 	__ret;								\
+ })
+ 
+@@ -188,10 +207,10 @@ do {									\
+ ({									\
+ 	int __ret;							\
+ 	unsigned long __flags;						\
+-	raw_local_irq_save(__flags);					\
++	__flags = hard_local_irq_save();				\
+ 	__ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
+ 			oval1, oval2, nval1, nval2);			\
+-	raw_local_irq_restore(__flags);					\
++	hard_local_irq_restore(__flags);				\
+ 	__ret;								\
+ })
+ 
+diff -uprN kernel/include/asm-generic/switch_to.h kernel_new/include/asm-generic/switch_to.h
+--- kernel/include/asm-generic/switch_to.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/asm-generic/switch_to.h	2021-04-01 18:28:07.798863128 +0800
+@@ -21,10 +21,17 @@
+  */
+ extern struct task_struct *__switch_to(struct task_struct *,
+ 				       struct task_struct *);
+-
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
+ #define switch_to(prev, next, last)					\
+ 	do {								\
++		hard_cond_local_irq_disable();                                  \
+ 		((last) = __switch_to((prev), (next)));			\
++		hard_cond_local_irq_enable();                                   \
+ 	} while (0)
+-
++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++#define switch_to(prev, next, last)					\
++	do {								\
++		((last) = __switch_to((prev), (next)));			\
++	} while (0)
++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
+ #endif /* __ASM_GENERIC_SWITCH_TO_H */
+diff -uprN kernel/include/clocksource/timer-sp804.h kernel_new/include/clocksource/timer-sp804.h
+--- kernel/include/clocksource/timer-sp804.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/clocksource/timer-sp804.h	2021-04-01 18:28:07.799863127 +0800
+@@ -5,20 +5,23 @@
+ struct clk;
+ 
+ int __sp804_clocksource_and_sched_clock_init(void __iomem *,
++					     unsigned long phys,
+ 					     const char *, struct clk *, int);
+ int __sp804_clockevents_init(void __iomem *, unsigned int,
+ 			     struct clk *, const char *);
+ void sp804_timer_disable(void __iomem *);
+ 
+-static inline void sp804_clocksource_init(void __iomem *base, const char *name)
++static inline void sp804_clocksource_init(void __iomem *base, unsigned long phys,
++					  const char *name)
+ {
+-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 0);
++	__sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 0);
+ }
+ 
+ static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base,
++							  unsigned long phys,
+ 							  const char *name)
+ {
+-	__sp804_clocksource_and_sched_clock_init(base, name, NULL, 1);
++	__sp804_clocksource_and_sched_clock_init(base, phys, name, NULL, 1);
+ }
+ 
+ static inline void sp804_clockevents_init(void __iomem *base, unsigned int irq, const char *name)
+diff -uprN kernel/include/ipipe/setup.h kernel_new/include/ipipe/setup.h
+--- kernel/include/ipipe/setup.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/ipipe/setup.h	2021-04-01 18:28:07.799863127 +0800
+@@ -0,0 +1,10 @@
++#ifndef _IPIPE_SETUP_H
++#define _IPIPE_SETUP_H
++
++/*
++ * Placeholders for setup hooks defined by client domains.
++ */
++
++static inline void __ipipe_early_client_setup(void) { }
++
++#endif /* !_IPIPE_SETUP_H */
+diff -uprN kernel/include/ipipe/thread_info.h kernel_new/include/ipipe/thread_info.h
+--- kernel/include/ipipe/thread_info.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/ipipe/thread_info.h	2021-04-01 18:28:07.799863127 +0800
+@@ -0,0 +1,14 @@
++#ifndef _IPIPE_THREAD_INFO_H
++#define _IPIPE_THREAD_INFO_H
++
++/*
++ * Placeholder for private thread information defined by client
++ * domains.
++ */
++
++struct ipipe_threadinfo {
++};
++
++#define __ipipe_init_threadinfo(__p) do { } while (0)
++
++#endif /* !_IPIPE_THREAD_INFO_H */
+diff -uprN kernel/include/linux/clockchips.h kernel_new/include/linux/clockchips.h
+--- kernel/include/linux/clockchips.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/clockchips.h	2021-04-01 18:28:07.799863127 +0800
+@@ -129,6 +129,15 @@ struct clock_event_device {
+ 	const struct cpumask	*cpumask;
+ 	struct list_head	list;
+ 	struct module		*owner;
++
++#ifdef CONFIG_IPIPE
++	struct ipipe_timer      *ipipe_timer;
++	unsigned                ipipe_stolen;
++
++#define clockevent_ipipe_stolen(evt) ((evt)->ipipe_stolen)
++#else
++#define clockevent_ipipe_stolen(evt) (0)
++#endif /* !CONFIG_IPIPE */
+ } ____cacheline_aligned;
+ 
+ /* Helpers to verify state of a clockevent device */
+diff -uprN kernel/include/linux/console.h kernel_new/include/linux/console.h
+--- kernel/include/linux/console.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/console.h	2021-04-01 18:28:07.799863127 +0800
+@@ -141,10 +141,12 @@ static inline int con_debug_leave(void)
+ #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
+ #define CON_BRL		(32) /* Used for a braille device */
+ #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
++#define CON_RAW		(128) /* Supports raw write mode */
+ 
+ struct console {
+ 	char	name[16];
+ 	void	(*write)(struct console *, const char *, unsigned);
++	void	(*write_raw)(struct console *, const char *, unsigned);
+ 	int	(*read)(struct console *, char *, unsigned);
+ 	struct tty_driver *(*device)(struct console *, int *);
+ 	void	(*unblank)(void);
+diff -uprN kernel/include/linux/dw_apb_timer.h kernel_new/include/linux/dw_apb_timer.h
+--- kernel/include/linux/dw_apb_timer.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/dw_apb_timer.h	2021-04-01 18:28:07.799863127 +0800
+@@ -35,6 +35,7 @@ struct dw_apb_clock_event_device {
+ struct dw_apb_clocksource {
+ 	struct dw_apb_timer			timer;
+ 	struct clocksource			cs;
++	unsigned long				phys;
+ };
+ 
+ void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced);
+@@ -47,7 +48,7 @@ dw_apb_clockevent_init(int cpu, const ch
+ 		       void __iomem *base, int irq, unsigned long freq);
+ struct dw_apb_clocksource *
+ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base,
+-			unsigned long freq);
++			unsigned long phys, unsigned long freq);
+ void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs);
+ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs);
+ u64 dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs);
+diff -uprN kernel/include/linux/ftrace.h kernel_new/include/linux/ftrace.h
+--- kernel/include/linux/ftrace.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/ftrace.h	2021-04-01 18:28:07.799863127 +0800
+@@ -160,6 +160,7 @@ enum {
+ 	FTRACE_OPS_FL_PID			= 1 << 13,
+ 	FTRACE_OPS_FL_RCU			= 1 << 14,
+ 	FTRACE_OPS_FL_TRACE_ARRAY		= 1 << 15,
++	FTRACE_OPS_FL_IPIPE_EXCLUSIVE		= 1 << 17,
+ };
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE
+diff -uprN kernel/include/linux/gpio/driver.h kernel_new/include/linux/gpio/driver.h
+--- kernel/include/linux/gpio/driver.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/gpio/driver.h	2021-04-01 18:28:07.799863127 +0800
+@@ -284,7 +284,7 @@ struct gpio_chip {
+ 	void __iomem *reg_dir;
+ 	bool bgpio_dir_inverted;
+ 	int bgpio_bits;
+-	spinlock_t bgpio_lock;
++	ipipe_spinlock_t bgpio_lock;
+ 	unsigned long bgpio_data;
+ 	unsigned long bgpio_dir;
+ #endif
+diff -uprN kernel/include/linux/gpio/driver.h.orig kernel_new/include/linux/gpio/driver.h.orig
+--- kernel/include/linux/gpio/driver.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/gpio/driver.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,602 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __LINUX_GPIO_DRIVER_H
++#define __LINUX_GPIO_DRIVER_H
++
++#include <linux/device.h>
++#include <linux/types.h>
++#include <linux/irq.h>
++#include <linux/irqchip/chained_irq.h>
++#include <linux/irqdomain.h>
++#include <linux/lockdep.h>
++#include <linux/pinctrl/pinctrl.h>
++#include <linux/pinctrl/pinconf-generic.h>
++
++struct gpio_desc;
++struct of_phandle_args;
++struct device_node;
++struct seq_file;
++struct gpio_device;
++struct module;
++
++#ifdef CONFIG_GPIOLIB
++
++#ifdef CONFIG_GPIOLIB_IRQCHIP
++/**
++ * struct gpio_irq_chip - GPIO interrupt controller
++ */
++struct gpio_irq_chip {
++	/**
++	 * @chip:
++	 *
++	 * GPIO IRQ chip implementation, provided by GPIO driver.
++	 */
++	struct irq_chip *chip;
++
++	/**
++	 * @domain:
++	 *
++	 * Interrupt translation domain; responsible for mapping between GPIO
++	 * hwirq number and Linux IRQ number.
++	 */
++	struct irq_domain *domain;
++
++	/**
++	 * @domain_ops:
++	 *
++	 * Table of interrupt domain operations for this IRQ chip.
++	 */
++	const struct irq_domain_ops *domain_ops;
++
++	/**
++	 * @handler:
++	 *
++	 * The IRQ handler to use (often a predefined IRQ core function) for
++	 * GPIO IRQs, provided by GPIO driver.
++	 */
++	irq_flow_handler_t handler;
++
++	/**
++	 * @default_type:
++	 *
++	 * Default IRQ triggering type applied during GPIO driver
++	 * initialization, provided by GPIO driver.
++	 */
++	unsigned int default_type;
++
++	/**
++	 * @lock_key:
++	 *
++	 * Per GPIO IRQ chip lockdep class for IRQ lock.
++	 */
++	struct lock_class_key *lock_key;
++
++	/**
++	 * @request_key:
++	 *
++	 * Per GPIO IRQ chip lockdep class for IRQ request.
++	 */
++	struct lock_class_key *request_key;
++
++	/**
++	 * @parent_handler:
++	 *
++	 * The interrupt handler for the GPIO chip's parent interrupts, may be
++	 * NULL if the parent interrupts are nested rather than cascaded.
++	 */
++	irq_flow_handler_t parent_handler;
++
++	/**
++	 * @parent_handler_data:
++	 *
++	 * Data associated, and passed to, the handler for the parent
++	 * interrupt.
++	 */
++	void *parent_handler_data;
++
++	/**
++	 * @num_parents:
++	 *
++	 * The number of interrupt parents of a GPIO chip.
++	 */
++	unsigned int num_parents;
++
++	/**
++	 * @parent_irq:
++	 *
++	 * For use by gpiochip_set_cascaded_irqchip()
++	 */
++	unsigned int parent_irq;
++
++	/**
++	 * @parents:
++	 *
++	 * A list of interrupt parents of a GPIO chip. This is owned by the
++	 * driver, so the core will only reference this list, not modify it.
++	 */
++	unsigned int *parents;
++
++	/**
++	 * @map:
++	 *
++	 * A list of interrupt parents for each line of a GPIO chip.
++	 */
++	unsigned int *map;
++
++	/**
++	 * @threaded:
++	 *
++	 * True if set the interrupt handling uses nested threads.
++	 */
++	bool threaded;
++
++	/**
++	 * @need_valid_mask:
++	 *
++	 * If set core allocates @valid_mask with all bits set to one.
++	 */
++	bool need_valid_mask;
++
++	/**
++	 * @valid_mask:
++	 *
++	 * If not %NULL holds bitmask of GPIOs which are valid to be included
++	 * in IRQ domain of the chip.
++	 */
++	unsigned long *valid_mask;
++
++	/**
++	 * @first:
++	 *
++	 * Required for static IRQ allocation. If set, irq_domain_add_simple()
++	 * will allocate and map all IRQs during initialization.
++	 */
++	unsigned int first;
++};
++#endif
++
++/**
++ * struct gpio_chip - abstract a GPIO controller
++ * @label: a functional name for the GPIO device, such as a part
++ *	number or the name of the SoC IP-block implementing it.
++ * @gpiodev: the internal state holder, opaque struct
++ * @parent: optional parent device providing the GPIOs
++ * @owner: helps prevent removal of modules exporting active GPIOs
++ * @request: optional hook for chip-specific activation, such as
++ *	enabling module power and clock; may sleep
++ * @free: optional hook for chip-specific deactivation, such as
++ *	disabling module power and clock; may sleep
++ * @get_direction: returns direction for signal "offset", 0=out, 1=in,
++ *	(same as GPIOF_DIR_XXX), or negative error
++ * @direction_input: configures signal "offset" as input, or returns error
++ * @direction_output: configures signal "offset" as output, or returns error
++ * @get: returns value for signal "offset", 0=low, 1=high, or negative error
++ * @get_multiple: reads values for multiple signals defined by "mask" and
++ *	stores them in "bits", returns 0 on success or negative error
++ * @set: assigns output value for signal "offset"
++ * @set_multiple: assigns output values for multiple signals defined by "mask"
++ * @set_config: optional hook for all kinds of settings. Uses the same
++ *	packed config format as generic pinconf.
++ * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
++ *	implementation may not sleep
++ * @dbg_show: optional routine to show contents in debugfs; default code
++ *	will be used when this is omitted, but custom code can show extra
++ *	state (such as pullup/pulldown configuration).
++ * @init_valid_mask: optional routine to initialize @valid_mask, to be used if
++ *	not all GPIOs are valid.
++ * @base: identifies the first GPIO number handled by this chip;
++ *	or, if negative during registration, requests dynamic ID allocation.
++ *	DEPRECATION: providing anything non-negative and nailing the base
++ *	offset of GPIO chips is deprecated. Please pass -1 as base to
++ *	let gpiolib select the chip base in all possible cases. We want to
++ *	get rid of the static GPIO number space in the long run.
++ * @ngpio: the number of GPIOs handled by this controller; the last GPIO
++ *	handled is (base + ngpio - 1).
++ * @names: if set, must be an array of strings to use as alternative
++ *      names for the GPIOs in this chip. Any entry in the array
++ *      may be NULL if there is no alias for the GPIO, however the
++ *      array must be @ngpio entries long.  A name can include a single printk
++ *      format specifier for an unsigned int.  It is substituted by the actual
++ *      number of the gpio.
++ * @can_sleep: flag must be set iff get()/set() methods sleep, as they
++ *	must while accessing GPIO expander chips over I2C or SPI. This
++ *	implies that if the chip supports IRQs, these IRQs need to be threaded
++ *	as the chip access may sleep when e.g. reading out the IRQ status
++ *	registers.
++ * @read_reg: reader function for generic GPIO
++ * @write_reg: writer function for generic GPIO
++ * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing
++ *	line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the
++ *	generic GPIO core. It is for internal housekeeping only.
++ * @reg_dat: data (in) register for generic GPIO
++ * @reg_set: output set register (out=high) for generic GPIO
++ * @reg_clr: output clear register (out=low) for generic GPIO
++ * @reg_dir: direction setting register for generic GPIO
++ * @bgpio_dir_inverted: indicates that the direction register is inverted
++ *	(gpiolib private state variable)
++ * @bgpio_bits: number of register bits used for a generic GPIO i.e.
++ *	<register width> * 8
++ * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep
++ *	shadowed and real data registers writes together.
++ * @bgpio_data:	shadowed data register for generic GPIO to clear/set bits
++ *	safely.
++ * @bgpio_dir: shadowed direction register for generic GPIO to clear/set
++ *	direction safely.
++ *
++ * A gpio_chip can help platforms abstract various sources of GPIOs so
++ * they can all be accessed through a common programing interface.
++ * Example sources would be SOC controllers, FPGAs, multifunction
++ * chips, dedicated GPIO expanders, and so on.
++ *
++ * Each chip controls a number of signals, identified in method calls
++ * by "offset" values in the range 0..(@ngpio - 1).  When those signals
++ * are referenced through calls like gpio_get_value(gpio), the offset
++ * is calculated by subtracting @base from the gpio number.
++ */
++struct gpio_chip {
++	const char		*label;
++	struct gpio_device	*gpiodev;
++	struct device		*parent;
++	struct module		*owner;
++
++	int			(*request)(struct gpio_chip *chip,
++						unsigned offset);
++	void			(*free)(struct gpio_chip *chip,
++						unsigned offset);
++	int			(*get_direction)(struct gpio_chip *chip,
++						unsigned offset);
++	int			(*direction_input)(struct gpio_chip *chip,
++						unsigned offset);
++	int			(*direction_output)(struct gpio_chip *chip,
++						unsigned offset, int value);
++	int			(*get)(struct gpio_chip *chip,
++						unsigned offset);
++	int			(*get_multiple)(struct gpio_chip *chip,
++						unsigned long *mask,
++						unsigned long *bits);
++	void			(*set)(struct gpio_chip *chip,
++						unsigned offset, int value);
++	void			(*set_multiple)(struct gpio_chip *chip,
++						unsigned long *mask,
++						unsigned long *bits);
++	int			(*set_config)(struct gpio_chip *chip,
++					      unsigned offset,
++					      unsigned long config);
++	int			(*to_irq)(struct gpio_chip *chip,
++						unsigned offset);
++
++	void			(*dbg_show)(struct seq_file *s,
++						struct gpio_chip *chip);
++
++	int			(*init_valid_mask)(struct gpio_chip *chip);
++
++	int			base;
++	u16			ngpio;
++	const char		*const *names;
++	bool			can_sleep;
++
++#if IS_ENABLED(CONFIG_GPIO_GENERIC)
++	unsigned long (*read_reg)(void __iomem *reg);
++	void (*write_reg)(void __iomem *reg, unsigned long data);
++	bool be_bits;
++	void __iomem *reg_dat;
++	void __iomem *reg_set;
++	void __iomem *reg_clr;
++	void __iomem *reg_dir;
++	bool bgpio_dir_inverted;
++	int bgpio_bits;
++	spinlock_t bgpio_lock;
++	unsigned long bgpio_data;
++	unsigned long bgpio_dir;
++#endif
++
++#ifdef CONFIG_GPIOLIB_IRQCHIP
++	/*
++	 * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib
++	 * to handle IRQs for most practical cases.
++	 */
++
++	/**
++	 * @irq:
++	 *
++	 * Integrates interrupt chip functionality with the GPIO chip. Can be
++	 * used to handle IRQs for most practical cases.
++	 */
++	struct gpio_irq_chip irq;
++#endif
++
++	/**
++	 * @need_valid_mask:
++	 *
++	 * If set core allocates @valid_mask with all its values initialized
++	 * with init_valid_mask() or set to one if init_valid_mask() is not
++	 * defined
++	 */
++	bool need_valid_mask;
++
++	/**
++	 * @valid_mask:
++	 *
++	 * If not %NULL holds bitmask of GPIOs which are valid to be used
++	 * from the chip.
++	 */
++	unsigned long *valid_mask;
++
++#if defined(CONFIG_OF_GPIO)
++	/*
++	 * If CONFIG_OF is enabled, then all GPIO controllers described in the
++	 * device tree automatically may have an OF translation
++	 */
++
++	/**
++	 * @of_node:
++	 *
++	 * Pointer to a device tree node representing this GPIO controller.
++	 */
++	struct device_node *of_node;
++
++	/**
++	 * @of_gpio_n_cells:
++	 *
++	 * Number of cells used to form the GPIO specifier.
++	 */
++	unsigned int of_gpio_n_cells;
++
++	/**
++	 * @of_xlate:
++	 *
++	 * Callback to translate a device tree GPIO specifier into a chip-
++	 * relative GPIO number and flags.
++	 */
++	int (*of_xlate)(struct gpio_chip *gc,
++			const struct of_phandle_args *gpiospec, u32 *flags);
++#endif
++};
++
++extern const char *gpiochip_is_requested(struct gpio_chip *chip,
++			unsigned offset);
++
++/* add/remove chips */
++extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
++				      struct lock_class_key *lock_key,
++				      struct lock_class_key *request_key);
++
++/**
++ * gpiochip_add_data() - register a gpio_chip
++ * @chip: the chip to register, with chip->base initialized
++ * @data: driver-private data associated with this chip
++ *
++ * Context: potentially before irqs will work
++ *
++ * When gpiochip_add_data() is called very early during boot, so that GPIOs
++ * can be freely used, the chip->parent device must be registered before
++ * the gpio framework's arch_initcall().  Otherwise sysfs initialization
++ * for GPIOs will fail rudely.
++ *
++ * gpiochip_add_data() must only be called after gpiolib initialization,
++ * ie after core_initcall().
++ *
++ * If chip->base is negative, this requests dynamic assignment of
++ * a range of valid GPIOs.
++ *
++ * Returns:
++ * A negative errno if the chip can't be registered, such as because the
++ * chip->base is invalid or already associated with a different chip.
++ * Otherwise it returns zero as a success code.
++ */
++#ifdef CONFIG_LOCKDEP
++#define gpiochip_add_data(chip, data) ({		\
++		static struct lock_class_key lock_key;	\
++		static struct lock_class_key request_key;	  \
++		gpiochip_add_data_with_key(chip, data, &lock_key, \
++					   &request_key);	  \
++	})
++#else
++#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
++#endif
++
++static inline int gpiochip_add(struct gpio_chip *chip)
++{
++	return gpiochip_add_data(chip, NULL);
++}
++extern void gpiochip_remove(struct gpio_chip *chip);
++extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip,
++				  void *data);
++extern void devm_gpiochip_remove(struct device *dev, struct gpio_chip *chip);
++
++extern struct gpio_chip *gpiochip_find(void *data,
++			      int (*match)(struct gpio_chip *chip, void *data));
++
++/* lock/unlock as IRQ */
++int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
++void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
++bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
++
++/* Line status inquiry for drivers */
++bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset);
++bool gpiochip_line_is_open_source(struct gpio_chip *chip, unsigned int offset);
++
++/* Sleep persistence inquiry for drivers */
++bool gpiochip_line_is_persistent(struct gpio_chip *chip, unsigned int offset);
++bool gpiochip_line_is_valid(const struct gpio_chip *chip, unsigned int offset);
++
++/* get driver data */
++void *gpiochip_get_data(struct gpio_chip *chip);
++
++struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
++
++struct bgpio_pdata {
++	const char *label;
++	int base;
++	int ngpio;
++};
++
++#if IS_ENABLED(CONFIG_GPIO_GENERIC)
++
++int bgpio_init(struct gpio_chip *gc, struct device *dev,
++	       unsigned long sz, void __iomem *dat, void __iomem *set,
++	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
++	       unsigned long flags);
++
++#define BGPIOF_BIG_ENDIAN		BIT(0)
++#define BGPIOF_UNREADABLE_REG_SET	BIT(1) /* reg_set is unreadable */
++#define BGPIOF_UNREADABLE_REG_DIR	BIT(2) /* reg_dir is unreadable */
++#define BGPIOF_BIG_ENDIAN_BYTE_ORDER	BIT(3)
++#define BGPIOF_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
++#define BGPIOF_NO_OUTPUT		BIT(5) /* only input */
++
++#endif
++
++#ifdef CONFIG_GPIOLIB_IRQCHIP
++
++int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
++		     irq_hw_number_t hwirq);
++void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq);
++
++void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
++		struct irq_chip *irqchip,
++		unsigned int parent_irq,
++		irq_flow_handler_t parent_handler);
++
++void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
++		struct irq_chip *irqchip,
++		unsigned int parent_irq);
++
++int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
++			     struct irq_chip *irqchip,
++			     unsigned int first_irq,
++			     irq_flow_handler_t handler,
++			     unsigned int type,
++			     bool threaded,
++			     struct lock_class_key *lock_key,
++			     struct lock_class_key *request_key);
++
++bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gpiochip,
++				unsigned int offset);
++
++#ifdef CONFIG_LOCKDEP
++
++/*
++ * Lockdep requires that each irqchip instance be created with a
++ * unique key so as to avoid unnecessary warnings. This upfront
++ * boilerplate static inlines provides such a key for each
++ * unique instance.
++ */
++static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
++				       struct irq_chip *irqchip,
++				       unsigned int first_irq,
++				       irq_flow_handler_t handler,
++				       unsigned int type)
++{
++	static struct lock_class_key lock_key;
++	static struct lock_class_key request_key;
++
++	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
++					handler, type, false,
++					&lock_key, &request_key);
++}
++
++static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
++			  struct irq_chip *irqchip,
++			  unsigned int first_irq,
++			  irq_flow_handler_t handler,
++			  unsigned int type)
++{
++
++	static struct lock_class_key lock_key;
++	static struct lock_class_key request_key;
++
++	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
++					handler, type, true,
++					&lock_key, &request_key);
++}
++#else
++static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
++				       struct irq_chip *irqchip,
++				       unsigned int first_irq,
++				       irq_flow_handler_t handler,
++				       unsigned int type)
++{
++	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
++					handler, type, false, NULL, NULL);
++}
++
++static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
++			  struct irq_chip *irqchip,
++			  unsigned int first_irq,
++			  irq_flow_handler_t handler,
++			  unsigned int type)
++{
++	return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
++					handler, type, true, NULL, NULL);
++}
++#endif /* CONFIG_LOCKDEP */
++
++#endif /* CONFIG_GPIOLIB_IRQCHIP */
++
++int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset);
++void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset);
++int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset,
++			    unsigned long config);
++
++#ifdef CONFIG_PINCTRL
++
++/**
++ * struct gpio_pin_range - pin range controlled by a gpio chip
++ * @node: list for maintaining set of pin ranges, used internally
++ * @pctldev: pinctrl device which handles corresponding pins
++ * @range: actual range of pins controlled by a gpio controller
++ */
++struct gpio_pin_range {
++	struct list_head node;
++	struct pinctrl_dev *pctldev;
++	struct pinctrl_gpio_range range;
++};
++
++int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
++			   unsigned int gpio_offset, unsigned int pin_offset,
++			   unsigned int npins);
++int gpiochip_add_pingroup_range(struct gpio_chip *chip,
++			struct pinctrl_dev *pctldev,
++			unsigned int gpio_offset, const char *pin_group);
++void gpiochip_remove_pin_ranges(struct gpio_chip *chip);
++
++#else
++
++static inline int
++gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
++		       unsigned int gpio_offset, unsigned int pin_offset,
++		       unsigned int npins)
++{
++	return 0;
++}
++static inline int
++gpiochip_add_pingroup_range(struct gpio_chip *chip,
++			struct pinctrl_dev *pctldev,
++			unsigned int gpio_offset, const char *pin_group)
++{
++	return 0;
++}
++
++static inline void
++gpiochip_remove_pin_ranges(struct gpio_chip *chip)
++{
++}
++
++#endif /* CONFIG_PINCTRL */
++
++struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
++					    const char *label);
++void gpiochip_free_own_desc(struct gpio_desc *desc);
++
++#else /* CONFIG_GPIOLIB */
++
++static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
++{
++	/* GPIO can never have been requested */
++	WARN_ON(1);
++	return ERR_PTR(-ENODEV);
++}
++
++#endif /* CONFIG_GPIOLIB */
++
++#endif
+diff -uprN kernel/include/linux/hardirq.h kernel_new/include/linux/hardirq.h
+--- kernel/include/linux/hardirq.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/hardirq.h	2021-04-01 18:28:07.799863127 +0800
+@@ -6,6 +6,7 @@
+ #include <linux/lockdep.h>
+ #include <linux/ftrace_irq.h>
+ #include <linux/vtime.h>
++#include <linux/ipipe.h>
+ #include <asm/hardirq.h>
+ 
+ 
+@@ -67,6 +68,7 @@ extern void irq_exit(void);
+ 
+ #define nmi_enter()						\
+ 	do {							\
++		__ipipe_nmi_enter();				\
+ 		arch_nmi_enter();				\
+ 		printk_nmi_enter();				\
+ 		lockdep_off();					\
+@@ -87,6 +89,7 @@ extern void irq_exit(void);
+ 		lockdep_on();					\
+ 		printk_nmi_exit();				\
+ 		arch_nmi_exit();				\
++		__ipipe_nmi_exit();				\
+ 	} while (0)
+ 
+ #endif /* LINUX_HARDIRQ_H */
+diff -uprN kernel/include/linux/interrupt.h kernel_new/include/linux/interrupt.h
+--- kernel/include/linux/interrupt.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/interrupt.h	2021-04-01 18:28:07.800863126 +0800
+@@ -472,6 +472,23 @@ extern bool force_irqthreads;
+ #define hard_irq_disable()	do { } while(0)
+ #endif
+ 
++/*
++ * Unlike other virtualized interrupt disabling schemes may assume, we
++ * can't expect local_irq_restore() to turn hard interrupts on when
++ * pipelining.  hard_irq_enable() is introduced to be paired with
++ * hard_irq_disable(), for unconditionally turning them on. The only
++ * sane sequence mixing virtual and real disable state manipulation
++ * is:
++ *
++ * 1. local_irq_save/disable
++ * 2. hard_irq_disable
++ * 3. hard_irq_enable
++ * 4. local_irq_restore/enable
++ */
++#ifndef hard_irq_enable
++#define hard_irq_enable()	hard_cond_local_irq_enable()
++#endif
++
+ /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
+    frequency threaded job scheduling. For almost all the purposes
+    tasklets are more than enough. F.e. all serial device BHs et
+diff -uprN kernel/include/linux/interrupt.h.orig kernel_new/include/linux/interrupt.h.orig
+--- kernel/include/linux/interrupt.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/interrupt.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,746 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* interrupt.h */
++#ifndef _LINUX_INTERRUPT_H
++#define _LINUX_INTERRUPT_H
++
++#include <linux/kernel.h>
++#include <linux/bitops.h>
++#include <linux/cpumask.h>
++#include <linux/irqreturn.h>
++#include <linux/irqnr.h>
++#include <linux/hardirq.h>
++#include <linux/irqflags.h>
++#include <linux/hrtimer.h>
++#include <linux/kref.h>
++#include <linux/workqueue.h>
++
++#include <linux/atomic.h>
++#include <asm/ptrace.h>
++#include <asm/irq.h>
++#include <asm/sections.h>
++
++/*
++ * These correspond to the IORESOURCE_IRQ_* defines in
++ * linux/ioport.h to select the interrupt line behaviour.  When
++ * requesting an interrupt without specifying a IRQF_TRIGGER, the
++ * setting should be assumed to be "as already configured", which
++ * may be as per machine or firmware initialisation.
++ */
++#define IRQF_TRIGGER_NONE	0x00000000
++#define IRQF_TRIGGER_RISING	0x00000001
++#define IRQF_TRIGGER_FALLING	0x00000002
++#define IRQF_TRIGGER_HIGH	0x00000004
++#define IRQF_TRIGGER_LOW	0x00000008
++#define IRQF_TRIGGER_MASK	(IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW | \
++				 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)
++#define IRQF_TRIGGER_PROBE	0x00000010
++
++/*
++ * These flags used only by the kernel as part of the
++ * irq handling routines.
++ *
++ * IRQF_SHARED - allow sharing the irq among several devices
++ * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur
++ * IRQF_TIMER - Flag to mark this interrupt as timer interrupt
++ * IRQF_PERCPU - Interrupt is per cpu
++ * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing
++ * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is
++ *                registered first in an shared interrupt is considered for
++ *                performance reasons)
++ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
++ *                Used by threaded interrupts which need to keep the
++ *                irq line disabled until the threaded handler has been run.
++ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend.  Does not guarantee
++ *                   that this interrupt will wake the system from a suspended
++ *                   state.  See Documentation/power/suspend-and-interrupts.txt
++ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
++ * IRQF_NO_THREAD - Interrupt cannot be threaded
++ * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device
++ *                resume time.
++ * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this
++ *                interrupt handler after suspending interrupts. For system
++ *                wakeup devices users need to implement wakeup detection in
++ *                their interrupt handlers.
++ */
++#define IRQF_SHARED		0x00000080
++#define IRQF_PROBE_SHARED	0x00000100
++#define __IRQF_TIMER		0x00000200
++#define IRQF_PERCPU		0x00000400
++#define IRQF_NOBALANCING	0x00000800
++#define IRQF_IRQPOLL		0x00001000
++#define IRQF_ONESHOT		0x00002000
++#define IRQF_NO_SUSPEND		0x00004000
++#define IRQF_FORCE_RESUME	0x00008000
++#define IRQF_NO_THREAD		0x00010000
++#define IRQF_EARLY_RESUME	0x00020000
++#define IRQF_COND_SUSPEND	0x00040000
++
++#define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
++
++/*
++ * These values can be returned by request_any_context_irq() and
++ * describe the context the interrupt will be run in.
++ *
++ * IRQC_IS_HARDIRQ - interrupt runs in hardirq context
++ * IRQC_IS_NESTED - interrupt runs in a nested threaded context
++ */
++enum {
++	IRQC_IS_HARDIRQ	= 0,
++	IRQC_IS_NESTED,
++};
++
++typedef irqreturn_t (*irq_handler_t)(int, void *);
++
++/**
++ * struct irqaction - per interrupt action descriptor
++ * @handler:	interrupt handler function
++ * @name:	name of the device
++ * @dev_id:	cookie to identify the device
++ * @percpu_dev_id:	cookie to identify the device
++ * @next:	pointer to the next irqaction for shared interrupts
++ * @irq:	interrupt number
++ * @flags:	flags (see IRQF_* above)
++ * @thread_fn:	interrupt handler function for threaded interrupts
++ * @thread:	thread pointer for threaded interrupts
++ * @secondary:	pointer to secondary irqaction (force threading)
++ * @thread_flags:	flags related to @thread
++ * @thread_mask:	bitmask for keeping track of @thread activity
++ * @dir:	pointer to the proc/irq/NN/name entry
++ */
++struct irqaction {
++	irq_handler_t		handler;
++	void			*dev_id;
++	void __percpu		*percpu_dev_id;
++	struct irqaction	*next;
++	irq_handler_t		thread_fn;
++	struct task_struct	*thread;
++	struct irqaction	*secondary;
++	unsigned int		irq;
++	unsigned int		flags;
++	unsigned long		thread_flags;
++	unsigned long		thread_mask;
++	const char		*name;
++	struct proc_dir_entry	*dir;
++} ____cacheline_internodealigned_in_smp;
++
++extern irqreturn_t no_action(int cpl, void *dev_id);
++
++/*
++ * If a (PCI) device interrupt is not connected we set dev->irq to
++ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
++ * can distingiush that case from other error returns.
++ *
++ * 0x80000000 is guaranteed to be outside the available range of interrupts
++ * and easy to distinguish from other possible incorrect values.
++ */
++#define IRQ_NOTCONNECTED	(1U << 31)
++
++extern int __must_check
++request_threaded_irq(unsigned int irq, irq_handler_t handler,
++		     irq_handler_t thread_fn,
++		     unsigned long flags, const char *name, void *dev);
++
++static inline int __must_check
++request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
++	    const char *name, void *dev)
++{
++	return request_threaded_irq(irq, handler, NULL, flags, name, dev);
++}
++
++extern int __must_check
++request_any_context_irq(unsigned int irq, irq_handler_t handler,
++			unsigned long flags, const char *name, void *dev_id);
++
++extern int __must_check
++__request_percpu_irq(unsigned int irq, irq_handler_t handler,
++		     unsigned long flags, const char *devname,
++		     void __percpu *percpu_dev_id);
++
++extern int __must_check
++request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags,
++	    const char *name, void *dev);
++
++static inline int __must_check
++request_percpu_irq(unsigned int irq, irq_handler_t handler,
++		   const char *devname, void __percpu *percpu_dev_id)
++{
++	return __request_percpu_irq(irq, handler, 0,
++				    devname, percpu_dev_id);
++}
++
++extern int __must_check
++request_percpu_nmi(unsigned int irq, irq_handler_t handler,
++		   const char *devname, void __percpu *dev);
++
++extern const void *free_irq(unsigned int, void *);
++extern void free_percpu_irq(unsigned int, void __percpu *);
++
++extern const void *free_nmi(unsigned int irq, void *dev_id);
++extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id);
++
++struct device;
++
++extern int __must_check
++devm_request_threaded_irq(struct device *dev, unsigned int irq,
++			  irq_handler_t handler, irq_handler_t thread_fn,
++			  unsigned long irqflags, const char *devname,
++			  void *dev_id);
++
++static inline int __must_check
++devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
++		 unsigned long irqflags, const char *devname, void *dev_id)
++{
++	return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
++					 devname, dev_id);
++}
++
++extern int __must_check
++devm_request_any_context_irq(struct device *dev, unsigned int irq,
++		 irq_handler_t handler, unsigned long irqflags,
++		 const char *devname, void *dev_id);
++
++extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
++
++/*
++ * On lockdep we dont want to enable hardirqs in hardirq
++ * context. Use local_irq_enable_in_hardirq() to annotate
++ * kernel code that has to do this nevertheless (pretty much
++ * the only valid case is for old/broken hardware that is
++ * insanely slow).
++ *
++ * NOTE: in theory this might break fragile code that relies
++ * on hardirq delivery - in practice we dont seem to have such
++ * places left. So the only effect should be slightly increased
++ * irqs-off latencies.
++ */
++#ifdef CONFIG_LOCKDEP
++# define local_irq_enable_in_hardirq()	do { } while (0)
++#else
++# define local_irq_enable_in_hardirq()	local_irq_enable()
++#endif
++
++extern void disable_irq_nosync(unsigned int irq);
++extern bool disable_hardirq(unsigned int irq);
++extern void disable_irq(unsigned int irq);
++extern void disable_percpu_irq(unsigned int irq);
++extern void enable_irq(unsigned int irq);
++extern void enable_percpu_irq(unsigned int irq, unsigned int type);
++extern bool irq_percpu_is_enabled(unsigned int irq);
++extern void irq_wake_thread(unsigned int irq, void *dev_id);
++
++extern void disable_nmi_nosync(unsigned int irq);
++extern void disable_percpu_nmi(unsigned int irq);
++extern void enable_nmi(unsigned int irq);
++extern void enable_percpu_nmi(unsigned int irq, unsigned int type);
++extern int prepare_percpu_nmi(unsigned int irq);
++extern void teardown_percpu_nmi(unsigned int irq);
++
++/* The following three functions are for the core kernel use only. */
++extern void suspend_device_irqs(void);
++extern void resume_device_irqs(void);
++
++/**
++ * struct irq_affinity_notify - context for notification of IRQ affinity changes
++ * @irq:		Interrupt to which notification applies
++ * @kref:		Reference count, for internal use
++ * @work:		Work item, for internal use
++ * @notify:		Function to be called on change.  This will be
++ *			called in process context.
++ * @release:		Function to be called on release.  This will be
++ *			called in process context.  Once registered, the
++ *			structure must only be freed when this function is
++ *			called or later.
++ */
++struct irq_affinity_notify {
++	unsigned int irq;
++	struct kref kref;
++	struct work_struct work;
++	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
++	void (*release)(struct kref *ref);
++};
++
++/**
++ * struct irq_affinity - Description for automatic irq affinity assignements
++ * @pre_vectors:	Don't apply affinity to @pre_vectors at beginning of
++ *			the MSI(-X) vector space
++ * @post_vectors:	Don't apply affinity to @post_vectors at end of
++ *			the MSI(-X) vector space
++ */
++struct irq_affinity {
++	int	pre_vectors;
++	int	post_vectors;
++};
++
++#if defined(CONFIG_SMP)
++
++extern cpumask_var_t irq_default_affinity;
++
++/* Internal implementation. Use the helpers below */
++extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask,
++			      bool force);
++
++/**
++ * irq_set_affinity - Set the irq affinity of a given irq
++ * @irq:	Interrupt to set affinity
++ * @cpumask:	cpumask
++ *
++ * Fails if cpumask does not contain an online CPU
++ */
++static inline int
++irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
++{
++	return __irq_set_affinity(irq, cpumask, false);
++}
++
++/**
++ * irq_force_affinity - Force the irq affinity of a given irq
++ * @irq:	Interrupt to set affinity
++ * @cpumask:	cpumask
++ *
++ * Same as irq_set_affinity, but without checking the mask against
++ * online cpus.
++ *
++ * Solely for low level cpu hotplug code, where we need to make per
++ * cpu interrupts affine before the cpu becomes online.
++ */
++static inline int
++irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
++{
++	return __irq_set_affinity(irq, cpumask, true);
++}
++
++extern int irq_can_set_affinity(unsigned int irq);
++extern int irq_select_affinity(unsigned int irq);
++
++extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
++
++extern int
++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
++
++struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd);
++int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd);
++
++#else /* CONFIG_SMP */
++
++static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
++{
++	return -EINVAL;
++}
++
++static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
++{
++	return 0;
++}
++
++static inline int irq_can_set_affinity(unsigned int irq)
++{
++	return 0;
++}
++
++static inline int irq_select_affinity(unsigned int irq)  { return 0; }
++
++static inline int irq_set_affinity_hint(unsigned int irq,
++					const struct cpumask *m)
++{
++	return -EINVAL;
++}
++
++static inline int
++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
++{
++	return 0;
++}
++
++static inline struct cpumask *
++irq_create_affinity_masks(int nvec, const struct irq_affinity *affd)
++{
++	return NULL;
++}
++
++static inline int
++irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
++{
++	return maxvec;
++}
++
++#endif /* CONFIG_SMP */
++
++/*
++ * Special lockdep variants of irq disabling/enabling.
++ * These should be used for locking constructs that
++ * know that a particular irq context which is disabled,
++ * and which is the only irq-context user of a lock,
++ * that it's safe to take the lock in the irq-disabled
++ * section without disabling hardirqs.
++ *
++ * On !CONFIG_LOCKDEP they are equivalent to the normal
++ * irq disable/enable methods.
++ */
++static inline void disable_irq_nosync_lockdep(unsigned int irq)
++{
++	disable_irq_nosync(irq);
++#ifdef CONFIG_LOCKDEP
++	local_irq_disable();
++#endif
++}
++
++static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags)
++{
++	disable_irq_nosync(irq);
++#ifdef CONFIG_LOCKDEP
++	local_irq_save(*flags);
++#endif
++}
++
++static inline void disable_irq_lockdep(unsigned int irq)
++{
++	disable_irq(irq);
++#ifdef CONFIG_LOCKDEP
++	local_irq_disable();
++#endif
++}
++
++static inline void enable_irq_lockdep(unsigned int irq)
++{
++#ifdef CONFIG_LOCKDEP
++	local_irq_enable();
++#endif
++	enable_irq(irq);
++}
++
++static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags)
++{
++#ifdef CONFIG_LOCKDEP
++	local_irq_restore(*flags);
++#endif
++	enable_irq(irq);
++}
++
++/* IRQ wakeup (PM) control: */
++extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
++
++static inline int enable_irq_wake(unsigned int irq)
++{
++	return irq_set_irq_wake(irq, 1);
++}
++
++static inline int disable_irq_wake(unsigned int irq)
++{
++	return irq_set_irq_wake(irq, 0);
++}
++
++/*
++ * irq_get_irqchip_state/irq_set_irqchip_state specific flags
++ */
++enum irqchip_irq_state {
++	IRQCHIP_STATE_PENDING,		/* Is interrupt pending? */
++	IRQCHIP_STATE_ACTIVE,		/* Is interrupt in progress? */
++	IRQCHIP_STATE_MASKED,		/* Is interrupt masked? */
++	IRQCHIP_STATE_LINE_LEVEL,	/* Is IRQ line high? */
++};
++
++extern int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
++				 bool *state);
++extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
++				 bool state);
++
++#ifdef CONFIG_IRQ_FORCED_THREADING
++extern bool force_irqthreads;
++#else
++#define force_irqthreads	(0)
++#endif
++
++#ifndef local_softirq_pending
++
++#ifndef local_softirq_pending_ref
++#define local_softirq_pending_ref irq_stat.__softirq_pending
++#endif
++
++#define local_softirq_pending()	(__this_cpu_read(local_softirq_pending_ref))
++#define set_softirq_pending(x)	(__this_cpu_write(local_softirq_pending_ref, (x)))
++#define or_softirq_pending(x)	(__this_cpu_or(local_softirq_pending_ref, (x)))
++
++#endif /* local_softirq_pending */
++
++/* Some architectures might implement lazy enabling/disabling of
++ * interrupts. In some cases, such as stop_machine, we might want
++ * to ensure that after a local_irq_disable(), interrupts have
++ * really been disabled in hardware. Such architectures need to
++ * implement the following hook.
++ */
++#ifndef hard_irq_disable
++#define hard_irq_disable()	do { } while(0)
++#endif
++
++/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
++   frequency threaded job scheduling. For almost all the purposes
++   tasklets are more than enough. F.e. all serial device BHs et
++   al. should be converted to tasklets, not to softirqs.
++ */
++
++enum
++{
++	HI_SOFTIRQ=0,
++	TIMER_SOFTIRQ,
++	NET_TX_SOFTIRQ,
++	NET_RX_SOFTIRQ,
++	BLOCK_SOFTIRQ,
++	IRQ_POLL_SOFTIRQ,
++	TASKLET_SOFTIRQ,
++	SCHED_SOFTIRQ,
++	HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
++			    numbering. Sigh! */
++	RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */
++
++	NR_SOFTIRQS
++};
++
++#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
++
++/* map softirq index to softirq name. update 'softirq_to_name' in
++ * kernel/softirq.c when adding a new softirq.
++ */
++extern const char * const softirq_to_name[NR_SOFTIRQS];
++
++/* softirq mask and active fields moved to irq_cpustat_t in
++ * asm/hardirq.h to get better cache usage.  KAO
++ */
++
++struct softirq_action
++{
++	void	(*action)(struct softirq_action *);
++};
++
++asmlinkage void do_softirq(void);
++asmlinkage void __do_softirq(void);
++
++#ifdef __ARCH_HAS_DO_SOFTIRQ
++void do_softirq_own_stack(void);
++#else
++static inline void do_softirq_own_stack(void)
++{
++	__do_softirq();
++}
++#endif
++
++extern void open_softirq(int nr, void (*action)(struct softirq_action *));
++extern void softirq_init(void);
++extern void __raise_softirq_irqoff(unsigned int nr);
++
++extern void raise_softirq_irqoff(unsigned int nr);
++extern void raise_softirq(unsigned int nr);
++
++DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
++
++static inline struct task_struct *this_cpu_ksoftirqd(void)
++{
++	return this_cpu_read(ksoftirqd);
++}
++
++/* Tasklets --- multithreaded analogue of BHs.
++
++   Main feature differing them of generic softirqs: tasklet
++   is running only on one CPU simultaneously.
++
++   Main feature differing them of BHs: different tasklets
++   may be run simultaneously on different CPUs.
++
++   Properties:
++   * If tasklet_schedule() is called, then tasklet is guaranteed
++     to be executed on some cpu at least once after this.
++   * If the tasklet is already scheduled, but its execution is still not
++     started, it will be executed only once.
++   * If this tasklet is already running on another CPU (or schedule is called
++     from tasklet itself), it is rescheduled for later.
++   * Tasklet is strictly serialized wrt itself, but not
++     wrt another tasklets. If client needs some intertask synchronization,
++     he makes it with spinlocks.
++ */
++
++struct tasklet_struct
++{
++	struct tasklet_struct *next;
++	unsigned long state;
++	atomic_t count;
++	void (*func)(unsigned long);
++	unsigned long data;
++};
++
++#define DECLARE_TASKLET(name, func, data) \
++struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }
++
++#define DECLARE_TASKLET_DISABLED(name, func, data) \
++struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
++
++
++enum
++{
++	TASKLET_STATE_SCHED,	/* Tasklet is scheduled for execution */
++	TASKLET_STATE_RUN	/* Tasklet is running (SMP only) */
++};
++
++#ifdef CONFIG_SMP
++static inline int tasklet_trylock(struct tasklet_struct *t)
++{
++	return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
++}
++
++static inline void tasklet_unlock(struct tasklet_struct *t)
++{
++	smp_mb__before_atomic();
++	clear_bit(TASKLET_STATE_RUN, &(t)->state);
++}
++
++static inline void tasklet_unlock_wait(struct tasklet_struct *t)
++{
++	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
++}
++#else
++#define tasklet_trylock(t) 1
++#define tasklet_unlock_wait(t) do { } while (0)
++#define tasklet_unlock(t) do { } while (0)
++#endif
++
++extern void __tasklet_schedule(struct tasklet_struct *t);
++
++static inline void tasklet_schedule(struct tasklet_struct *t)
++{
++	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
++		__tasklet_schedule(t);
++}
++
++extern void __tasklet_hi_schedule(struct tasklet_struct *t);
++
++static inline void tasklet_hi_schedule(struct tasklet_struct *t)
++{
++	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
++		__tasklet_hi_schedule(t);
++}
++
++static inline void tasklet_disable_nosync(struct tasklet_struct *t)
++{
++	atomic_inc(&t->count);
++	smp_mb__after_atomic();
++}
++
++static inline void tasklet_disable(struct tasklet_struct *t)
++{
++	tasklet_disable_nosync(t);
++	tasklet_unlock_wait(t);
++	smp_mb();
++}
++
++static inline void tasklet_enable(struct tasklet_struct *t)
++{
++	smp_mb__before_atomic();
++	atomic_dec(&t->count);
++}
++
++extern void tasklet_kill(struct tasklet_struct *t);
++extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
++extern void tasklet_init(struct tasklet_struct *t,
++			 void (*func)(unsigned long), unsigned long data);
++
++struct tasklet_hrtimer {
++	struct hrtimer		timer;
++	struct tasklet_struct	tasklet;
++	enum hrtimer_restart	(*function)(struct hrtimer *);
++};
++
++extern void
++tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
++		     enum hrtimer_restart (*function)(struct hrtimer *),
++		     clockid_t which_clock, enum hrtimer_mode mode);
++
++static inline
++void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
++			   const enum hrtimer_mode mode)
++{
++	hrtimer_start(&ttimer->timer, time, mode);
++}
++
++static inline
++void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
++{
++	hrtimer_cancel(&ttimer->timer);
++	tasklet_kill(&ttimer->tasklet);
++}
++
++/*
++ * Autoprobing for irqs:
++ *
++ * probe_irq_on() and probe_irq_off() provide robust primitives
++ * for accurate IRQ probing during kernel initialization.  They are
++ * reasonably simple to use, are not "fooled" by spurious interrupts,
++ * and, unlike other attempts at IRQ probing, they do not get hung on
++ * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards).
++ *
++ * For reasonably foolproof probing, use them as follows:
++ *
++ * 1. clear and/or mask the device's internal interrupt.
++ * 2. sti();
++ * 3. irqs = probe_irq_on();      // "take over" all unassigned idle IRQs
++ * 4. enable the device and cause it to trigger an interrupt.
++ * 5. wait for the device to interrupt, using non-intrusive polling or a delay.
++ * 6. irq = probe_irq_off(irqs);  // get IRQ number, 0=none, negative=multiple
++ * 7. service the device to clear its pending interrupt.
++ * 8. loop again if paranoia is required.
++ *
++ * probe_irq_on() returns a mask of allocated irq's.
++ *
++ * probe_irq_off() takes the mask as a parameter,
++ * and returns the irq number which occurred,
++ * or zero if none occurred, or a negative irq number
++ * if more than one irq occurred.
++ */
++
++#if !defined(CONFIG_GENERIC_IRQ_PROBE) 
++static inline unsigned long probe_irq_on(void)
++{
++	return 0;
++}
++static inline int probe_irq_off(unsigned long val)
++{
++	return 0;
++}
++static inline unsigned int probe_irq_mask(unsigned long val)
++{
++	return 0;
++}
++#else
++extern unsigned long probe_irq_on(void);	/* returns 0 on failure */
++extern int probe_irq_off(unsigned long);	/* returns 0 or negative on failure */
++extern unsigned int probe_irq_mask(unsigned long);	/* returns mask of ISA interrupts */
++#endif
++
++#ifdef CONFIG_PROC_FS
++/* Initialize /proc/irq/ */
++extern void init_irq_proc(void);
++#else
++static inline void init_irq_proc(void)
++{
++}
++#endif
++
++#ifdef CONFIG_IRQ_TIMINGS
++void irq_timings_enable(void);
++void irq_timings_disable(void);
++u64 irq_timings_next_event(u64 now);
++#endif
++
++struct seq_file;
++int show_interrupts(struct seq_file *p, void *v);
++int arch_show_interrupts(struct seq_file *p, int prec);
++
++extern int early_irq_init(void);
++extern int arch_probe_nr_irqs(void);
++extern int arch_early_irq_init(void);
++
++/*
++ * We want to know which function is an entrypoint of a hardirq or a softirq.
++ */
++#define __irq_entry		 __attribute__((__section__(".irqentry.text")))
++#define __softirq_entry  \
++	__attribute__((__section__(".softirqentry.text")))
++
++#endif
+diff -uprN kernel/include/linux/ipipe_debug.h kernel_new/include/linux/ipipe_debug.h
+--- kernel/include/linux/ipipe_debug.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe_debug.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,100 @@
++/* -*- linux-c -*-
++ * include/linux/ipipe_debug.h
++ *
++ * Copyright (C) 2012 Philippe Gerum <rpm@xenomai.org>.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __LINUX_IPIPE_DEBUG_H
++#define __LINUX_IPIPE_DEBUG_H
++
++#include <linux/ipipe_domain.h>
++
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++
++#include <asm/bug.h>
++
++static inline int ipipe_disable_context_check(void)
++{
++	return xchg(raw_cpu_ptr(&ipipe_percpu.context_check), 0);
++}
++
++static inline void ipipe_restore_context_check(int old_state)
++{
++	__this_cpu_write(ipipe_percpu.context_check, old_state);
++}
++
++static inline void ipipe_context_check_off(void)
++{
++	int cpu;
++	for_each_online_cpu(cpu)
++		per_cpu(ipipe_percpu, cpu).context_check = 0;
++}
++
++static inline void ipipe_save_context_nmi(void)
++{
++	int state = ipipe_disable_context_check();
++	__this_cpu_write(ipipe_percpu.context_check_saved, state);
++}
++
++static inline void ipipe_restore_context_nmi(void)
++{
++	ipipe_restore_context_check(__this_cpu_read(ipipe_percpu.context_check_saved));
++}
++
++#else	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
++
++static inline int ipipe_disable_context_check(void)
++{
++	return 0;
++}
++
++static inline void ipipe_restore_context_check(int old_state) { }
++
++static inline void ipipe_context_check_off(void) { }
++
++static inline void ipipe_save_context_nmi(void) { }
++
++static inline void ipipe_restore_context_nmi(void) { }
++
++#endif	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
++
++#ifdef CONFIG_IPIPE_DEBUG
++
++#define ipipe_check_irqoff()					\
++	do {							\
++		if (WARN_ON_ONCE(!hard_irqs_disabled()))	\
++			hard_local_irq_disable();		\
++	} while (0)
++
++#else /* !CONFIG_IPIPE_DEBUG */
++
++static inline void ipipe_check_irqoff(void) { }
++
++#endif /* !CONFIG_IPIPE_DEBUG */
++
++#ifdef CONFIG_IPIPE_DEBUG_INTERNAL
++#define IPIPE_WARN(c)		WARN_ON(c)
++#define IPIPE_WARN_ONCE(c)	WARN_ON_ONCE(c)
++#define IPIPE_BUG_ON(c)		BUG_ON(c)
++#else
++#define IPIPE_WARN(c)		do { (void)(c); } while (0)
++#define IPIPE_WARN_ONCE(c)	do { (void)(c); } while (0)
++#define IPIPE_BUG_ON(c)		do { (void)(c); } while (0)
++#endif
++
++#endif /* !__LINUX_IPIPE_DEBUG_H */
+diff -uprN kernel/include/linux/ipipe_domain.h kernel_new/include/linux/ipipe_domain.h
+--- kernel/include/linux/ipipe_domain.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe_domain.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,368 @@
++/*   -*- linux-c -*-
++ *   include/linux/ipipe_domain.h
++ *
++ *   Copyright (C) 2007-2012 Philippe Gerum.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __LINUX_IPIPE_DOMAIN_H
++#define __LINUX_IPIPE_DOMAIN_H
++
++#ifdef CONFIG_IPIPE
++
++#include <linux/mutex.h>
++#include <linux/percpu.h>
++#include <asm/ptrace.h>
++#include <asm/hw_irq.h>
++#include <asm/ipipe_base.h>
++
++struct task_struct;
++struct mm_struct;
++struct irq_desc;
++struct ipipe_vm_notifier;
++
++#define __bpl_up(x)		(((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1))
++/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */
++#define IPIPE_NR_VIRQS		BITS_PER_LONG
++/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */
++#define IPIPE_VIRQ_BASE		__bpl_up(IPIPE_NR_XIRQS)
++/* Total number of IRQ slots */
++#define IPIPE_NR_IRQS		(IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS)
++
++#define IPIPE_IRQ_MAPSZ		(IPIPE_NR_IRQS / BITS_PER_LONG)
++#define IPIPE_IRQ_1MAPSZ	BITS_PER_LONG
++#if IPIPE_IRQ_MAPSZ > BITS_PER_LONG * BITS_PER_LONG
++/*
++ * We need a 4-level mapping, up to 16M IRQs (64bit long, MAXSMP
++ * defines 512K IRQs).
++ */
++#define __IPIPE_IRQMAP_LEVELS	4
++#define IPIPE_IRQ_2MAPSZ	(BITS_PER_LONG * BITS_PER_LONG)
++#elif IPIPE_IRQ_MAPSZ > BITS_PER_LONG
++/*
++ * 3-level mapping. Up to 256K IRQs (64 bit long).
++ */
++#define __IPIPE_IRQMAP_LEVELS	3
++#else
++/*
++ * 2-level mapping is enough. Up to 4K IRQs (64 bit long).
++ */
++#define __IPIPE_IRQMAP_LEVELS	2
++#endif
++
++/* Per-cpu pipeline status */
++#define IPIPE_STALL_FLAG	0 /* interrupts (virtually) disabled. */
++#define IPIPE_STALL_MASK	(1L << IPIPE_STALL_FLAG)
++
++/* Interrupt control bits */
++#define IPIPE_HANDLE_FLAG	0
++#define IPIPE_STICKY_FLAG	1
++#define IPIPE_LOCK_FLAG		2
++#define IPIPE_HANDLE_MASK	(1 << IPIPE_HANDLE_FLAG)
++#define IPIPE_STICKY_MASK	(1 << IPIPE_STICKY_FLAG)
++#define IPIPE_LOCK_MASK		(1 << IPIPE_LOCK_FLAG)
++
++#define __IPIPE_SYSCALL_P  0
++#define __IPIPE_TRAP_P     1
++#define __IPIPE_KEVENT_P   2
++#define __IPIPE_SYSCALL_E (1 << __IPIPE_SYSCALL_P)
++#define __IPIPE_TRAP_E	  (1 << __IPIPE_TRAP_P)
++#define __IPIPE_KEVENT_E  (1 << __IPIPE_KEVENT_P)
++#define __IPIPE_ALL_E	   0x7
++#define __IPIPE_SYSCALL_R (8 << __IPIPE_SYSCALL_P)
++#define __IPIPE_TRAP_R	  (8 << __IPIPE_TRAP_P)
++#define __IPIPE_KEVENT_R  (8 << __IPIPE_KEVENT_P)
++#define __IPIPE_SHIFT_R	   3
++#define __IPIPE_ALL_R	  (__IPIPE_ALL_E << __IPIPE_SHIFT_R)
++
++#define IPIPE_KEVT_SCHEDULE	0
++#define IPIPE_KEVT_SIGWAKE	1
++#define IPIPE_KEVT_SETSCHED	2
++#define IPIPE_KEVT_SETAFFINITY	3
++#define IPIPE_KEVT_EXIT		4
++#define IPIPE_KEVT_CLEANUP	5
++#define IPIPE_KEVT_HOSTRT	6
++#define IPIPE_KEVT_CLOCKFREQ	7
++#define IPIPE_KEVT_USERINTRET	8
++#define IPIPE_KEVT_PTRESUME	9
++
++typedef void (*ipipe_irq_ackfn_t)(struct irq_desc *desc);
++
++typedef void (*ipipe_irq_handler_t)(unsigned int irq,
++				    void *cookie);
++
++struct ipipe_domain {
++	int context_offset;
++	struct ipipe_irqdesc {
++		unsigned long control;
++		ipipe_irq_ackfn_t ackfn;
++		ipipe_irq_handler_t handler;
++		void *cookie;
++	} ____cacheline_aligned irqs[IPIPE_NR_IRQS];
++	const char *name;
++	struct mutex mutex;
++};
++
++static inline void *
++__ipipe_irq_cookie(struct ipipe_domain *ipd, unsigned int irq)
++{
++	return ipd->irqs[irq].cookie;
++}
++
++static inline ipipe_irq_handler_t
++__ipipe_irq_handler(struct ipipe_domain *ipd, unsigned int irq)
++{
++	return ipd->irqs[irq].handler;
++}
++
++extern struct ipipe_domain ipipe_root;
++
++#define ipipe_root_domain (&ipipe_root)
++
++extern struct ipipe_domain *ipipe_head_domain;
++
++struct ipipe_percpu_domain_data {
++	unsigned long status;	/* <= Must be first in struct. */
++	unsigned long irqpend_0map;
++#if __IPIPE_IRQMAP_LEVELS >= 3
++	unsigned long irqpend_1map[IPIPE_IRQ_1MAPSZ];
++#if __IPIPE_IRQMAP_LEVELS >= 4
++	unsigned long irqpend_2map[IPIPE_IRQ_2MAPSZ];
++#endif
++#endif
++	unsigned long irqpend_map[IPIPE_IRQ_MAPSZ];
++	unsigned long irqheld_map[IPIPE_IRQ_MAPSZ];
++	unsigned long irqall[IPIPE_NR_IRQS];
++	struct ipipe_domain *domain;
++	int coflags;
++};
++
++struct ipipe_percpu_data {
++	struct ipipe_percpu_domain_data root;
++	struct ipipe_percpu_domain_data head;
++	struct ipipe_percpu_domain_data *curr;
++	struct pt_regs tick_regs;
++	int hrtimer_irq;
++	struct task_struct *task_hijacked;
++	struct task_struct *rqlock_owner;
++	struct ipipe_vm_notifier *vm_notifier;
++	unsigned long nmi_state;
++	struct mm_struct *active_mm;
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++	int context_check;
++	int context_check_saved;
++#endif
++};
++
++/*
++ * CAREFUL: all accessors based on __ipipe_raw_cpu_ptr() you may find
++ * in this file should be used only while hw interrupts are off, to
++ * prevent from CPU migration regardless of the running domain.
++ */
++DECLARE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu);
++
++static inline struct ipipe_percpu_domain_data *
++__context_of(struct ipipe_percpu_data *p, struct ipipe_domain *ipd)
++{
++	return (void *)p + ipd->context_offset;
++}
++
++/**
++ * ipipe_percpu_context - return the address of the pipeline context
++ * data for a domain on a given CPU.
++ *
++ * NOTE: this is the slowest accessor, use it carefully. Prefer
++ * ipipe_this_cpu_context() for requests targeted at the current
++ * CPU. Additionally, if the target domain is known at build time,
++ * consider ipipe_this_cpu_{root, head}_context().
++ */
++static inline struct ipipe_percpu_domain_data *
++ipipe_percpu_context(struct ipipe_domain *ipd, int cpu)
++{
++	return __context_of(&per_cpu(ipipe_percpu, cpu), ipd);
++}
++
++/**
++ * ipipe_this_cpu_context - return the address of the pipeline context
++ * data for a domain on the current CPU. hw IRQs must be off.
++ *
++ * NOTE: this accessor is a bit faster, but since we don't know which
++ * one of "root" or "head" ipd refers to, we still need to compute the
++ * context address from its offset.
++ */
++static inline struct ipipe_percpu_domain_data *
++ipipe_this_cpu_context(struct ipipe_domain *ipd)
++{
++	return __context_of(__ipipe_raw_cpu_ptr(&ipipe_percpu), ipd);
++}
++
++/**
++ * ipipe_this_cpu_root_context - return the address of the pipeline
++ * context data for the root domain on the current CPU. hw IRQs must
++ * be off.
++ *
++ * NOTE: this accessor is recommended when the domain we refer to is
++ * known at build time to be the root one.
++ */
++static inline struct ipipe_percpu_domain_data *
++ipipe_this_cpu_root_context(void)
++{
++	return __ipipe_raw_cpu_ptr(&ipipe_percpu.root);
++}
++
++/**
++ * ipipe_this_cpu_head_context - return the address of the pipeline
++ * context data for the registered head domain on the current CPU. hw
++ * IRQs must be off.
++ *
++ * NOTE: this accessor is recommended when the domain we refer to is
++ * known at build time to be the registered head domain. This address
++ * is always different from the context data of the root domain in
++ * absence of registered head domain. To get the address of the
++ * context data for the domain leading the pipeline at the time of the
++ * call (which may be root in absence of registered head domain), use
++ * ipipe_this_cpu_leading_context() instead.
++ */
++static inline struct ipipe_percpu_domain_data *
++ipipe_this_cpu_head_context(void)
++{
++	return __ipipe_raw_cpu_ptr(&ipipe_percpu.head);
++}
++
++/**
++ * ipipe_this_cpu_leading_context - return the address of the pipeline
++ * context data for the domain leading the pipeline on the current
++ * CPU. hw IRQs must be off.
++ *
++ * NOTE: this accessor is required when either root or a registered
++ * head domain may be the final target of this call, depending on
++ * whether the high priority domain was installed via
++ * ipipe_register_head().
++ */
++static inline struct ipipe_percpu_domain_data *
++ipipe_this_cpu_leading_context(void)
++{
++	return ipipe_this_cpu_context(ipipe_head_domain);
++}
++
++/**
++ * __ipipe_get_current_context() - return the address of the pipeline
++ * context data of the domain running on the current CPU. hw IRQs must
++ * be off.
++ */
++static inline struct ipipe_percpu_domain_data *__ipipe_get_current_context(void)
++{
++	return __ipipe_raw_cpu_read(ipipe_percpu.curr);
++}
++
++#define __ipipe_current_context __ipipe_get_current_context()
++
++/**
++ * __ipipe_set_current_context() - switch the current CPU to the
++ * specified domain context.  hw IRQs must be off.
++ *
++ * NOTE: this is the only way to change the current domain for the
++ * current CPU. Don't bypass.
++ */
++static inline
++void __ipipe_set_current_context(struct ipipe_percpu_domain_data *pd)
++{
++	struct ipipe_percpu_data *p;
++	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
++	p->curr = pd;
++}
++
++/**
++ * __ipipe_set_current_domain() - switch the current CPU to the
++ * specified domain. This is equivalent to calling
++ * __ipipe_set_current_context() with the context data of that
++ * domain. hw IRQs must be off.
++ */
++static inline void __ipipe_set_current_domain(struct ipipe_domain *ipd)
++{
++	struct ipipe_percpu_data *p;
++	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
++	p->curr = __context_of(p, ipd);
++}
++
++static inline struct ipipe_percpu_domain_data *ipipe_current_context(void)
++{
++	struct ipipe_percpu_domain_data *pd;
++	unsigned long flags;
++
++	flags = hard_smp_local_irq_save();
++	pd = __ipipe_get_current_context();
++	hard_smp_local_irq_restore(flags);
++
++	return pd;
++}
++
++static inline struct ipipe_domain *__ipipe_get_current_domain(void)
++{
++	return __ipipe_get_current_context()->domain;
++}
++
++#define __ipipe_current_domain	__ipipe_get_current_domain()
++
++/**
++ * __ipipe_get_current_domain() - return the address of the pipeline
++ * domain running on the current CPU. hw IRQs must be off.
++ */
++static inline struct ipipe_domain *ipipe_get_current_domain(void)
++{
++	struct ipipe_domain *ipd;
++	unsigned long flags;
++
++	flags = hard_smp_local_irq_save();
++	ipd = __ipipe_get_current_domain();
++	hard_smp_local_irq_restore(flags);
++
++	return ipd;
++}
++
++#define ipipe_current_domain	ipipe_get_current_domain()
++
++#define __ipipe_root_p	(__ipipe_current_domain == ipipe_root_domain)
++#define ipipe_root_p	(ipipe_current_domain == ipipe_root_domain)
++
++#ifdef CONFIG_SMP
++#define __ipipe_root_status	(ipipe_this_cpu_root_context()->status)
++#else
++extern unsigned long __ipipe_root_status;
++#endif
++
++#define __ipipe_head_status	(ipipe_this_cpu_head_context()->status)
++
++/**
++ * __ipipe_ipending_p() - Whether we have interrupts pending
++ * (i.e. logged) for the given domain context on the current CPU. hw
++ * IRQs must be off.
++ */
++static inline int __ipipe_ipending_p(struct ipipe_percpu_domain_data *pd)
++{
++	return pd->irqpend_0map != 0;
++}
++
++static inline unsigned long
++__ipipe_cpudata_irq_hits(struct ipipe_domain *ipd, int cpu, unsigned int irq)
++{
++	return ipipe_percpu_context(ipd, cpu)->irqall[irq];
++}
++
++#endif /* CONFIG_IPIPE */
++
++#endif	/* !__LINUX_IPIPE_DOMAIN_H */
+diff -uprN kernel/include/linux/ipipe.h kernel_new/include/linux/ipipe.h
+--- kernel/include/linux/ipipe.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,721 @@
++/* -*- linux-c -*-
++ * include/linux/ipipe.h
++ *
++ * Copyright (C) 2002-2014 Philippe Gerum.
++ *               2007 Jan Kiszka.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __LINUX_IPIPE_H
++#define __LINUX_IPIPE_H
++
++#include <linux/spinlock.h>
++#include <linux/cache.h>
++#include <linux/percpu.h>
++#include <linux/irq.h>
++#include <linux/thread_info.h>
++#include <linux/ipipe_debug.h>
++#include <asm/ptrace.h>
++#ifdef CONFIG_HAVE_IPIPE_SUPPORT
++#include <asm/ipipe.h>
++#endif
++
++struct cpuidle_device;
++struct cpuidle_state;
++struct kvm_vcpu;
++struct ipipe_vm_notifier;
++struct irq_desc;
++struct task_struct;
++struct mm_struct;
++
++#ifdef CONFIG_IPIPE
++
++#include <linux/ipipe_domain.h>
++
++#define IPIPE_CORE_APIREV  CONFIG_IPIPE_CORE_APIREV
++
++#include <linux/ipipe_domain.h>
++#include <linux/compiler.h>
++#include <linux/linkage.h>
++#include <asm/ipipe_base.h>
++
++struct pt_regs;
++struct ipipe_domain;
++
++struct ipipe_vm_notifier {
++	void (*handler)(struct ipipe_vm_notifier *nfy);
++};
++
++static inline int ipipe_virtual_irq_p(unsigned int irq)
++{
++	return irq >= IPIPE_VIRQ_BASE && irq < IPIPE_NR_IRQS;
++}
++
++void __ipipe_init_early(void);
++
++void __ipipe_init(void);
++
++#ifdef CONFIG_PROC_FS
++void __ipipe_init_proc(void);
++#ifdef CONFIG_IPIPE_TRACE
++void __ipipe_init_tracer(void);
++#else /* !CONFIG_IPIPE_TRACE */
++static inline void __ipipe_init_tracer(void) { }
++#endif /* CONFIG_IPIPE_TRACE */
++#else	/* !CONFIG_PROC_FS */
++static inline void __ipipe_init_proc(void) { }
++#endif	/* CONFIG_PROC_FS */
++
++void __ipipe_restore_root_nosync(unsigned long x);
++
++#define IPIPE_IRQF_NOACK    0x1
++#define IPIPE_IRQF_NOSYNC   0x2
++
++void __ipipe_dispatch_irq(unsigned int irq, int flags);
++
++void __ipipe_do_sync_stage(void);
++
++void __ipipe_do_sync_pipeline(struct ipipe_domain *top);
++
++void __ipipe_lock_irq(unsigned int irq);
++
++void __ipipe_unlock_irq(unsigned int irq);
++
++void __ipipe_do_critical_sync(unsigned int irq, void *cookie);
++
++void __ipipe_ack_edge_irq(struct irq_desc *desc);
++
++void __ipipe_nop_irq(struct irq_desc *desc);
++
++static inline void __ipipe_idle(void)
++{
++	ipipe_unstall_root();
++}
++
++#ifndef __ipipe_sync_check
++#define __ipipe_sync_check	1
++#endif
++
++static inline void __ipipe_sync_stage(void)
++{
++	if (likely(__ipipe_sync_check))
++		__ipipe_do_sync_stage();
++}
++
++#ifndef __ipipe_run_irqtail
++#define __ipipe_run_irqtail(irq) do { } while(0)
++#endif
++
++int __ipipe_log_printk(const char *fmt, va_list args);
++void __ipipe_flush_printk(unsigned int irq, void *cookie);
++
++#define __ipipe_get_cpu(flags)	({ (flags) = hard_preempt_disable(); ipipe_processor_id(); })
++#define __ipipe_put_cpu(flags)	hard_preempt_enable(flags)
++
++int __ipipe_notify_kevent(int event, void *data);
++
++#define __ipipe_report_sigwake(p)					\
++	do {								\
++		if (ipipe_notifier_enabled_p(p))			\
++			__ipipe_notify_kevent(IPIPE_KEVT_SIGWAKE, p);	\
++	} while (0)
++
++struct ipipe_cpu_migration_data {
++	struct task_struct *task;
++	int dest_cpu;
++};
++
++#define __ipipe_report_setaffinity(__p, __dest_cpu)			\
++	do {								\
++		struct ipipe_cpu_migration_data d = {			\
++			.task = (__p),					\
++			.dest_cpu = (__dest_cpu),			\
++		};							\
++		if (ipipe_notifier_enabled_p(__p))			\
++			__ipipe_notify_kevent(IPIPE_KEVT_SETAFFINITY, &d); \
++	} while (0)
++
++#define __ipipe_report_exit(p)						\
++	do {								\
++		if (ipipe_notifier_enabled_p(p))			\
++			__ipipe_notify_kevent(IPIPE_KEVT_EXIT, p);	\
++	} while (0)
++
++#define __ipipe_report_setsched(p)					\
++	do {								\
++		if (ipipe_notifier_enabled_p(p))			\
++			__ipipe_notify_kevent(IPIPE_KEVT_SETSCHED, p); \
++	} while (0)
++
++#define __ipipe_report_schedule(prev, next)				\
++do {									\
++	if (ipipe_notifier_enabled_p(next) ||				\
++	    ipipe_notifier_enabled_p(prev)) {				\
++		__this_cpu_write(ipipe_percpu.rqlock_owner, prev);	\
++		__ipipe_notify_kevent(IPIPE_KEVT_SCHEDULE, next);	\
++	}								\
++} while (0)
++
++#define __ipipe_report_cleanup(mm)					\
++	__ipipe_notify_kevent(IPIPE_KEVT_CLEANUP, mm)
++
++#define __ipipe_report_clockfreq_update(freq)				\
++	__ipipe_notify_kevent(IPIPE_KEVT_CLOCKFREQ, &(freq))
++
++struct ipipe_ptrace_resume_data {
++	struct task_struct *task;
++	long request;
++};
++
++#define __ipipe_report_ptrace_resume(__p, __request)			\
++	do {								\
++		struct ipipe_ptrace_resume_data d = {			\
++			.task = (__p),					\
++			.request = (__request),				\
++		};							\
++		if (ipipe_notifier_enabled_p(__p))			\
++			__ipipe_notify_kevent(IPIPE_KEVT_PTRESUME, &d); \
++	} while (0)
++
++int __ipipe_notify_syscall(struct pt_regs *regs);
++
++int __ipipe_notify_trap(int exception, struct pt_regs *regs);
++
++#define __ipipe_report_trap(exception, regs)				\
++	__ipipe_notify_trap(exception, regs)
++
++void __ipipe_call_mayday(struct pt_regs *regs);
++
++int __ipipe_notify_user_intreturn(void);
++
++#define __ipipe_serial_debug(__fmt, __args...)	raw_printk(__fmt, ##__args)
++
++struct ipipe_trap_data {
++	int exception;
++	struct pt_regs *regs;
++};
++
++/* ipipe_set_hooks(..., enables) */
++#define IPIPE_SYSCALL	__IPIPE_SYSCALL_E
++#define IPIPE_TRAP	__IPIPE_TRAP_E
++#define IPIPE_KEVENT	__IPIPE_KEVENT_E
++
++struct ipipe_sysinfo {
++	int sys_nr_cpus;	/* Number of CPUs on board */
++	int sys_hrtimer_irq;	/* hrtimer device IRQ */
++	u64 sys_hrtimer_freq;	/* hrtimer device frequency */
++	u64 sys_hrclock_freq;	/* hrclock device frequency */
++	u64 sys_cpu_freq;	/* CPU frequency (Hz) */
++	struct ipipe_arch_sysinfo arch;
++};
++
++struct ipipe_work_header {
++	size_t size;
++	void (*handler)(struct ipipe_work_header *work);
++};
++
++extern unsigned int __ipipe_printk_virq;
++
++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq);
++
++void __ipipe_complete_domain_migration(void);
++
++int __ipipe_switch_tail(void);
++
++int __ipipe_migrate_head(void);
++
++void __ipipe_reenter_root(void);
++
++void __ipipe_share_current(int flags);
++
++void __ipipe_arch_share_current(int flags);
++
++int __ipipe_disable_ondemand_mappings(struct task_struct *p);
++
++int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma);
++
++/*
++ * Obsolete - no arch implements PIC muting anymore. Null helpers are
++ * kept for building legacy co-kernel releases.
++ */
++static inline void ipipe_mute_pic(void) { }
++static inline void ipipe_unmute_pic(void) { }
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++
++#define prepare_arch_switch(next)			\
++	do {						\
++		hard_local_irq_enable();		\
++		__ipipe_report_schedule(current, next);	\
++	} while(0)
++
++#ifndef ipipe_get_active_mm
++static inline struct mm_struct *ipipe_get_active_mm(void)
++{
++	return __this_cpu_read(ipipe_percpu.active_mm);
++}
++#define ipipe_get_active_mm ipipe_get_active_mm
++#endif
++
++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++#define prepare_arch_switch(next)			\
++	do {						\
++		__ipipe_report_schedule(current, next);	\
++		hard_local_irq_disable();		\
++	} while(0)
++
++#ifndef ipipe_get_active_mm
++#define ipipe_get_active_mm()	(current->active_mm)
++#endif
++
++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++static inline bool __ipipe_hrclock_ok(void)
++{
++	return __ipipe_hrclock_freq != 0;
++}
++
++static inline void __ipipe_nmi_enter(void)
++{
++	__this_cpu_write(ipipe_percpu.nmi_state, __ipipe_root_status);
++	__set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
++	ipipe_save_context_nmi();
++}
++
++static inline void __ipipe_nmi_exit(void)
++{
++	ipipe_restore_context_nmi();
++	if (!test_bit(IPIPE_STALL_FLAG, raw_cpu_ptr(&ipipe_percpu.nmi_state)))
++		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
++}
++
++/* KVM-side calls, hw IRQs off. */
++static inline void __ipipe_enter_vm(struct ipipe_vm_notifier *vmf)
++{
++	struct ipipe_percpu_data *p;
++
++	p = raw_cpu_ptr(&ipipe_percpu);
++	p->vm_notifier = vmf;
++	barrier();
++}
++
++static inline void __ipipe_exit_vm(void)
++{
++	struct ipipe_percpu_data *p;
++
++	p = raw_cpu_ptr(&ipipe_percpu);
++	p->vm_notifier = NULL;
++	barrier();
++}
++
++/* Client-side call, hw IRQs off. */
++void __ipipe_notify_vm_preemption(void);
++
++static inline void __ipipe_sync_pipeline(struct ipipe_domain *top)
++{
++	if (__ipipe_current_domain != top) {
++		__ipipe_do_sync_pipeline(top);
++		return;
++	}
++	if (!test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(top)->status))
++		__ipipe_sync_stage();
++}
++
++void ipipe_register_head(struct ipipe_domain *ipd,
++			 const char *name);
++
++void ipipe_unregister_head(struct ipipe_domain *ipd);
++
++int ipipe_request_irq(struct ipipe_domain *ipd,
++		      unsigned int irq,
++		      ipipe_irq_handler_t handler,
++		      void *cookie,
++		      ipipe_irq_ackfn_t ackfn);
++
++void ipipe_free_irq(struct ipipe_domain *ipd,
++		    unsigned int irq);
++
++void ipipe_raise_irq(unsigned int irq);
++
++void ipipe_set_hooks(struct ipipe_domain *ipd,
++		     int enables);
++
++int ipipe_handle_syscall(struct thread_info *ti,
++			 unsigned long nr, struct pt_regs *regs);
++
++unsigned int ipipe_alloc_virq(void);
++
++void ipipe_free_virq(unsigned int virq);
++
++static inline void ipipe_post_irq_head(unsigned int irq)
++{
++	__ipipe_set_irq_pending(ipipe_head_domain, irq);
++}
++
++static inline void ipipe_post_irq_root(unsigned int irq)
++{
++	__ipipe_set_irq_pending(&ipipe_root, irq);
++}
++
++static inline void ipipe_stall_head(void)
++{
++	hard_local_irq_disable();
++	__set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
++}
++
++static inline unsigned long ipipe_test_and_stall_head(void)
++{
++	hard_local_irq_disable();
++	return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
++}
++
++static inline unsigned long ipipe_test_head(void)
++{
++	unsigned long flags, ret;
++
++	flags = hard_smp_local_irq_save();
++	ret = test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status);
++	hard_smp_local_irq_restore(flags);
++
++	return ret;
++}
++
++void ipipe_unstall_head(void);
++
++void __ipipe_restore_head(unsigned long x);
++
++static inline void ipipe_restore_head(unsigned long x)
++{
++	ipipe_check_irqoff();
++	if ((x ^ test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)) & 1)
++		__ipipe_restore_head(x);
++}
++
++void __ipipe_post_work_root(struct ipipe_work_header *work);
++
++#define ipipe_post_work_root(p, header)			\
++	do {						\
++		void header_not_at_start(void);		\
++		if (offsetof(typeof(*(p)), header)) {	\
++			header_not_at_start();		\
++		}					\
++		__ipipe_post_work_root(&(p)->header);	\
++	} while (0)
++
++int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
++
++unsigned long ipipe_critical_enter(void (*syncfn)(void));
++
++void ipipe_critical_exit(unsigned long flags);
++
++void ipipe_prepare_panic(void);
++
++#ifdef CONFIG_SMP
++#ifndef ipipe_smp_p
++#define ipipe_smp_p (1)
++#endif
++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask);
++void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask);
++#else  /* !CONFIG_SMP */
++#define ipipe_smp_p (0)
++static inline
++int ipipe_set_irq_affinity(unsigned int irq, cpumask_t cpumask) { return 0; }
++static inline void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask) { }
++static inline void ipipe_disable_smp(void) { }
++#endif	/* CONFIG_SMP */
++
++static inline void ipipe_restore_root_nosync(unsigned long x)
++{
++	unsigned long flags;
++
++	flags = hard_smp_local_irq_save();
++	__ipipe_restore_root_nosync(x);
++	hard_smp_local_irq_restore(flags);
++}
++
++/* Must be called hw IRQs off. */
++static inline void ipipe_lock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = __ipipe_current_domain;
++	if (ipd == ipipe_root_domain)
++		__ipipe_lock_irq(irq);
++}
++
++/* Must be called hw IRQs off. */
++static inline void ipipe_unlock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = __ipipe_current_domain;
++	if (ipd == ipipe_root_domain)
++		__ipipe_unlock_irq(irq);
++}
++
++static inline struct ipipe_threadinfo *ipipe_current_threadinfo(void)
++{
++	return &current_thread_info()->ipipe_data;
++}
++
++#define ipipe_task_threadinfo(p) (&task_thread_info(p)->ipipe_data)
++
++int ipipe_enable_irq(unsigned int irq);
++
++static inline void ipipe_disable_irq(unsigned int irq)
++{
++	struct irq_desc *desc;
++	struct irq_chip *chip;
++
++	desc = irq_to_desc(irq);
++	if (desc == NULL)
++		return;
++
++	chip = irq_desc_get_chip(desc);
++
++	if (WARN_ON_ONCE(chip->irq_disable == NULL && chip->irq_mask == NULL))
++		return;
++
++	if (chip->irq_disable)
++		chip->irq_disable(&desc->irq_data);
++	else
++		chip->irq_mask(&desc->irq_data);
++}
++
++static inline void ipipe_end_irq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc)
++		desc->ipipe_end(desc);
++}
++
++static inline int ipipe_chained_irq_p(struct irq_desc *desc)
++{
++	void __ipipe_chained_irq(struct irq_desc *desc);
++
++	return desc->handle_irq == __ipipe_chained_irq;
++}
++
++static inline void ipipe_handle_demuxed_irq(unsigned int cascade_irq)
++{
++	ipipe_trace_irq_entry(cascade_irq);
++	__ipipe_dispatch_irq(cascade_irq, IPIPE_IRQF_NOSYNC);
++	ipipe_trace_irq_exit(cascade_irq);
++}
++
++static inline void __ipipe_init_threadflags(struct thread_info *ti)
++{
++	ti->ipipe_flags = 0;
++}
++
++static inline
++void ipipe_set_ti_thread_flag(struct thread_info *ti, int flag)
++{
++	set_bit(flag, &ti->ipipe_flags);
++}
++
++static inline
++void ipipe_clear_ti_thread_flag(struct thread_info *ti, int flag)
++{
++	clear_bit(flag, &ti->ipipe_flags);
++}
++
++static inline
++void ipipe_test_and_clear_ti_thread_flag(struct thread_info *ti, int flag)
++{
++	test_and_clear_bit(flag, &ti->ipipe_flags);
++}
++
++static inline
++int ipipe_test_ti_thread_flag(struct thread_info *ti, int flag)
++{
++	return test_bit(flag, &ti->ipipe_flags);
++}
++
++#define ipipe_set_thread_flag(flag) \
++	ipipe_set_ti_thread_flag(current_thread_info(), flag)
++
++#define ipipe_clear_thread_flag(flag) \
++	ipipe_clear_ti_thread_flag(current_thread_info(), flag)
++
++#define ipipe_test_and_clear_thread_flag(flag) \
++	ipipe_test_and_clear_ti_thread_flag(current_thread_info(), flag)
++
++#define ipipe_test_thread_flag(flag) \
++	ipipe_test_ti_thread_flag(current_thread_info(), flag)
++
++#define ipipe_enable_notifier(p)					\
++	ipipe_set_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
++
++#define ipipe_disable_notifier(p)					\
++	do {								\
++		struct thread_info *ti = task_thread_info(p);		\
++		ipipe_clear_ti_thread_flag(ti, TIP_NOTIFY);		\
++		ipipe_clear_ti_thread_flag(ti, TIP_MAYDAY);		\
++	} while (0)
++
++#define ipipe_notifier_enabled_p(p)					\
++	ipipe_test_ti_thread_flag(task_thread_info(p), TIP_NOTIFY)
++
++#define ipipe_raise_mayday(p)						\
++	do {								\
++		struct thread_info *ti = task_thread_info(p);		\
++		ipipe_check_irqoff();					\
++		if (ipipe_test_ti_thread_flag(ti, TIP_NOTIFY))		\
++			ipipe_set_ti_thread_flag(ti, TIP_MAYDAY);	\
++	} while (0)
++
++#define ipipe_enable_user_intret_notifier()				\
++	ipipe_set_thread_flag(TIP_USERINTRET)
++
++#define ipipe_disable_user_intret_notifier()				\
++	ipipe_clear_thread_flag(TIP_USERINTRET)
++
++#define ipipe_user_intret_notifier_enabled(ti)				\
++	ipipe_test_ti_thread_flag(ti, TIP_USERINTRET)
++
++#ifdef CONFIG_IPIPE_TRACE
++void __ipipe_tracer_hrclock_initialized(void);
++#else /* !CONFIG_IPIPE_TRACE */
++#define __ipipe_tracer_hrclock_initialized()	do { } while(0)
++#endif /* !CONFIG_IPIPE_TRACE */
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++#define ipipe_mm_switch_protect(__flags)	do { (void)(__flags); } while (0)
++#define ipipe_mm_switch_unprotect(__flags)	do { (void)(__flags); } while (0)
++#else /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++#define ipipe_mm_switch_protect(__flags)		\
++	do {						\
++		(__flags) = hard_local_irq_save();	\
++	} while (0)
++#define ipipe_mm_switch_unprotect(__flags)		\
++	do {						\
++		hard_local_irq_restore(__flags);	\
++	} while (0)
++#endif /* !CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH */
++
++bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
++			 struct cpuidle_state *state);
++
++#else	/* !CONFIG_IPIPE */
++
++static inline void __ipipe_init_early(void) { }
++
++static inline void __ipipe_init(void) { }
++
++static inline void __ipipe_init_proc(void) { }
++
++static inline void __ipipe_idle(void) { }
++
++static inline void __ipipe_report_sigwake(struct task_struct *p) { }
++
++static inline void __ipipe_report_setaffinity(struct task_struct *p,
++					      int dest_cpu) { }
++
++static inline void __ipipe_report_setsched(struct task_struct *p) { }
++
++static inline void __ipipe_report_exit(struct task_struct *p) { }
++
++static inline void __ipipe_report_cleanup(struct mm_struct *mm) { }
++
++static inline void __ipipe_report_ptrace_resume(struct task_struct *p,
++						long request) { }
++
++#define __ipipe_report_trap(exception, regs)  0
++
++#define hard_preempt_disable()		({ preempt_disable(); 0; })
++#define hard_preempt_enable(flags)	({ preempt_enable(); (void)(flags); })
++
++#define __ipipe_get_cpu(flags)		({ (void)(flags); get_cpu(); })
++#define __ipipe_put_cpu(flags)		\
++	do {				\
++		(void)(flags);		\
++		put_cpu();		\
++	} while (0)
++
++#define __ipipe_root_tick_p(regs)	1
++
++#define ipipe_handle_domain_irq(__domain, __hwirq, __regs)	\
++	handle_domain_irq(__domain, __hwirq, __regs)
++
++#define ipipe_handle_demuxed_irq(irq)		generic_handle_irq(irq)
++
++#define __ipipe_enter_vm(vmf)	do { } while (0)
++
++static inline void __ipipe_exit_vm(void) { }
++
++static inline void __ipipe_notify_vm_preemption(void) { }
++
++#define __ipipe_notify_user_intreturn()	0
++
++#define __ipipe_serial_debug(__fmt, __args...)	do { } while (0)
++
++#define __ipipe_root_p		1
++#define ipipe_root_p		1
++
++#define ipipe_mm_switch_protect(__flags)	do { (void)(__flags); } while (0)
++#define ipipe_mm_switch_unprotect(__flags)	do { (void)(__flags); } while (0)
++
++static inline void __ipipe_init_threadflags(struct thread_info *ti) { }
++
++static inline void __ipipe_complete_domain_migration(void) { }
++
++static inline int __ipipe_switch_tail(void)
++{
++	return 0;
++}
++
++static inline void __ipipe_nmi_enter(void) { }
++
++static inline void __ipipe_nmi_exit(void) { }
++
++#define ipipe_processor_id()	smp_processor_id()
++
++static inline void ipipe_lock_irq(unsigned int irq) { }
++
++static inline void ipipe_unlock_irq(unsigned int irq) { }
++
++static inline
++int ipipe_handle_syscall(struct thread_info *ti,
++			 unsigned long nr, struct pt_regs *regs)
++{
++	return 0;
++}
++
++static inline
++bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
++			 struct cpuidle_state *state)
++{
++	return true;
++}
++
++#define ipipe_user_intret_notifier_enabled(ti)	0
++
++#endif	/* !CONFIG_IPIPE */
++
++#ifdef CONFIG_IPIPE_WANT_PTE_PINNING
++void __ipipe_pin_mapping_globally(unsigned long start,
++				  unsigned long end);
++#else
++static inline void __ipipe_pin_mapping_globally(unsigned long start,
++						unsigned long end)
++{ }
++#endif
++
++#ifndef ipipe_root_nr_syscalls
++#define ipipe_root_nr_syscalls(ti)	NR_syscalls
++#endif
++
++#endif	/* !__LINUX_IPIPE_H */
+diff -uprN kernel/include/linux/ipipe_lock.h kernel_new/include/linux/ipipe_lock.h
+--- kernel/include/linux/ipipe_lock.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe_lock.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,329 @@
++/*   -*- linux-c -*-
++ *   include/linux/ipipe_lock.h
++ *
++ *   Copyright (C) 2009 Philippe Gerum.
++ *
++ *   This program is free software; you can redistribute it and/or modify
++ *   it under the terms of the GNU General Public License as published by
++ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ *   USA; either version 2 of the License, or (at your option) any later
++ *   version.
++ *
++ *   This program is distributed in the hope that it will be useful,
++ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   GNU General Public License for more details.
++ *
++ *   You should have received a copy of the GNU General Public License
++ *   along with this program; if not, write to the Free Software
++ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __LINUX_IPIPE_LOCK_H
++#define __LINUX_IPIPE_LOCK_H
++
++#include <asm-generic/ipipe.h>
++
++typedef struct {
++	arch_spinlock_t arch_lock;
++} __ipipe_spinlock_t;
++
++#define ipipe_spinlock(lock)	((__ipipe_spinlock_t *)(lock))
++#define ipipe_spinlock_p(lock)							\
++	__builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) ||	\
++	__builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t [])
++
++#define std_spinlock_raw(lock)	((raw_spinlock_t *)(lock))
++#define std_spinlock_raw_p(lock)					\
++	__builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) ||	\
++	__builtin_types_compatible_p(typeof(lock), raw_spinlock_t [])
++
++#ifdef CONFIG_PREEMPT_RT_FULL
++
++#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
++	do {								\
++		if (ipipe_spinlock_p(lock))				\
++			(flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
++		else __bad_lock_type();					\
++	} while (0)
++
++#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)				\
++	({								\
++		int __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
++		else __bad_lock_type();					\
++		__ret__;						\
++	 })
++
++#define PICK_SPINTRYLOCK_IRQ(lock)					\
++	({								\
++		int __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
++		else __bad_lock_type();					\
++		__ret__;						\
++	 })
++
++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
++	do {								\
++		if (ipipe_spinlock_p(lock))				\
++			__ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
++		else if (std_spinlock_raw_p(lock)) {			\
++			__ipipe_spin_unlock_debug(flags);		\
++			__real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
++		} else __bad_lock_type();				\
++	} while (0)
++
++#define PICK_SPINOP(op, lock)						\
++	({								\
++		if (ipipe_spinlock_p(lock))				\
++			arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
++		else if (std_spinlock_raw_p(lock))			\
++			__real_raw_spin##op(std_spinlock_raw(lock));	\
++		else __bad_lock_type();					\
++		(void)0;						\
++	})
++
++#define PICK_SPINOP_RET(op, lock, type)					\
++	({								\
++		type __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
++		else if (std_spinlock_raw_p(lock))			\
++			__ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
++		else { __ret__ = -1; __bad_lock_type(); }		\
++		__ret__;						\
++	})
++
++#else /* !CONFIG_PREEMPT_RT_FULL */
++
++#define std_spinlock(lock)	((spinlock_t *)(lock))
++#define std_spinlock_p(lock)						\
++	__builtin_types_compatible_p(typeof(lock), spinlock_t *) ||	\
++	__builtin_types_compatible_p(typeof(lock), spinlock_t [])
++
++#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
++	do {								\
++		if (ipipe_spinlock_p(lock))				\
++			(flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \
++		else if (std_spinlock_p(lock))				\
++			__real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \
++		else __bad_lock_type();					\
++	} while (0)
++
++#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags)				\
++	({								\
++		int __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \
++		else if (std_spinlock_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \
++		else __bad_lock_type();					\
++		__ret__;						\
++	 })
++
++#define PICK_SPINTRYLOCK_IRQ(lock)					\
++	({								\
++		int __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \
++		else if (std_spinlock_raw_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \
++		else if (std_spinlock_p(lock))				\
++			__ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \
++		else __bad_lock_type();					\
++		__ret__;						\
++	 })
++
++#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
++	do {								\
++		if (ipipe_spinlock_p(lock))				\
++			__ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \
++		else {							\
++			__ipipe_spin_unlock_debug(flags);		\
++			if (std_spinlock_raw_p(lock))			\
++				__real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \
++			else if (std_spinlock_p(lock))			\
++				__real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \
++		}							\
++	} while (0)
++
++#define PICK_SPINOP(op, lock)						\
++	({								\
++		if (ipipe_spinlock_p(lock))				\
++			arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
++		else if (std_spinlock_raw_p(lock))			\
++			__real_raw_spin##op(std_spinlock_raw(lock));	\
++		else if (std_spinlock_p(lock))				\
++			__real_raw_spin##op(&std_spinlock(lock)->rlock); \
++		else __bad_lock_type();					\
++		(void)0;						\
++	})
++
++#define PICK_SPINOP_RET(op, lock, type)					\
++	({								\
++		type __ret__;						\
++		if (ipipe_spinlock_p(lock))				\
++			__ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \
++		else if (std_spinlock_raw_p(lock))			\
++			__ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \
++		else if (std_spinlock_p(lock))				\
++			__ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \
++		else { __ret__ = -1; __bad_lock_type(); }		\
++		__ret__;						\
++	})
++
++#endif /* !CONFIG_PREEMPT_RT_FULL */
++
++#define arch_spin_lock_init(lock)					\
++	do {								\
++		IPIPE_DEFINE_SPINLOCK(__lock__);			\
++		*((ipipe_spinlock_t *)lock) = __lock__;			\
++	} while (0)
++
++#define arch_spin_lock_irq(lock)					\
++	do {								\
++		hard_local_irq_disable();				\
++		arch_spin_lock(lock);					\
++	} while (0)
++
++#define arch_spin_unlock_irq(lock)					\
++	do {								\
++		arch_spin_unlock(lock);					\
++		hard_local_irq_enable();				\
++	} while (0)
++
++typedef struct {
++	arch_rwlock_t arch_lock;
++} __ipipe_rwlock_t;
++
++#define ipipe_rwlock_p(lock)						\
++	__builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *)
++
++#define std_rwlock_p(lock)						\
++	__builtin_types_compatible_p(typeof(lock), rwlock_t *)
++
++#define ipipe_rwlock(lock)	((__ipipe_rwlock_t *)(lock))
++#define std_rwlock(lock)	((rwlock_t *)(lock))
++
++#define PICK_RWOP(op, lock)						\
++	do {								\
++		if (ipipe_rwlock_p(lock))				\
++			arch##op(&ipipe_rwlock(lock)->arch_lock);	\
++		else if (std_rwlock_p(lock))				\
++			_raw##op(std_rwlock(lock));			\
++		else __bad_lock_type();					\
++	} while (0)
++
++extern int __bad_lock_type(void);
++
++#ifdef CONFIG_IPIPE
++
++#define ipipe_spinlock_t		__ipipe_spinlock_t
++#define IPIPE_DEFINE_RAW_SPINLOCK(x)	ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
++#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern ipipe_spinlock_t x
++#define IPIPE_DEFINE_SPINLOCK(x)	IPIPE_DEFINE_RAW_SPINLOCK(x)
++#define IPIPE_DECLARE_SPINLOCK(x)	IPIPE_DECLARE_RAW_SPINLOCK(x)
++
++#define IPIPE_SPIN_LOCK_UNLOCKED					\
++	(__ipipe_spinlock_t) {	.arch_lock = __ARCH_SPIN_LOCK_UNLOCKED }
++
++#define spin_lock_irqsave_cond(lock, flags) \
++	spin_lock_irqsave(lock, flags)
++
++#define spin_unlock_irqrestore_cond(lock, flags) \
++	spin_unlock_irqrestore(lock, flags)
++
++#define raw_spin_lock_irqsave_cond(lock, flags) \
++	raw_spin_lock_irqsave(lock, flags)
++
++#define raw_spin_unlock_irqrestore_cond(lock, flags) \
++	raw_spin_unlock_irqrestore(lock, flags)
++
++void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock);
++
++int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock);
++
++void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock);
++
++unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock);
++
++int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
++				 unsigned long *x);
++
++void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
++				    unsigned long x);
++
++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
++
++void __ipipe_spin_unlock_irqcomplete(unsigned long x);
++
++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
++void __ipipe_spin_unlock_debug(unsigned long flags);
++#else
++#define __ipipe_spin_unlock_debug(flags)  do { } while (0)
++#endif
++
++#define ipipe_rwlock_t			__ipipe_rwlock_t
++#define IPIPE_DEFINE_RWLOCK(x)		ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED
++#define IPIPE_DECLARE_RWLOCK(x)		extern ipipe_rwlock_t x
++
++#define IPIPE_RW_LOCK_UNLOCKED	\
++	(__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED }
++
++#else /* !CONFIG_IPIPE */
++
++#define ipipe_spinlock_t		spinlock_t
++#define IPIPE_DEFINE_SPINLOCK(x)	DEFINE_SPINLOCK(x)
++#define IPIPE_DECLARE_SPINLOCK(x)	extern spinlock_t x
++#define IPIPE_SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(unknown)
++#define IPIPE_DEFINE_RAW_SPINLOCK(x)	DEFINE_RAW_SPINLOCK(x)
++#define IPIPE_DECLARE_RAW_SPINLOCK(x)	extern raw_spinlock_t x
++
++#define spin_lock_irqsave_cond(lock, flags)		\
++	do {						\
++		(void)(flags);				\
++		spin_lock(lock);			\
++	} while(0)
++
++#define spin_unlock_irqrestore_cond(lock, flags)	\
++	spin_unlock(lock)
++
++#define raw_spin_lock_irqsave_cond(lock, flags) \
++	do {					\
++		(void)(flags);			\
++		raw_spin_lock(lock);		\
++	} while(0)
++
++#define raw_spin_unlock_irqrestore_cond(lock, flags) \
++	raw_spin_unlock(lock)
++
++#define __ipipe_spin_lock_irq(lock)		do { } while (0)
++#define __ipipe_spin_unlock_irq(lock)		do { } while (0)
++#define __ipipe_spin_lock_irqsave(lock)		0
++#define __ipipe_spin_trylock_irq(lock)		1
++#define __ipipe_spin_trylock_irqsave(lock, x)	({ (void)(x); 1; })
++#define __ipipe_spin_unlock_irqrestore(lock, x)	do { (void)(x); } while (0)
++#define __ipipe_spin_unlock_irqbegin(lock)	spin_unlock(lock)
++#define __ipipe_spin_unlock_irqcomplete(x)	do { (void)(x); } while (0)
++#define __ipipe_spin_unlock_debug(flags)	do { } while (0)
++
++#define ipipe_rwlock_t			rwlock_t
++#define IPIPE_DEFINE_RWLOCK(x)		DEFINE_RWLOCK(x)
++#define IPIPE_DECLARE_RWLOCK(x)		extern rwlock_t x
++#define IPIPE_RW_LOCK_UNLOCKED		RW_LOCK_UNLOCKED
++
++#endif /* !CONFIG_IPIPE */
++
++#endif /* !__LINUX_IPIPE_LOCK_H */
+diff -uprN kernel/include/linux/ipipe_tickdev.h kernel_new/include/linux/ipipe_tickdev.h
+--- kernel/include/linux/ipipe_tickdev.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe_tickdev.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,167 @@
++/* -*- linux-c -*-
++ * include/linux/ipipe_tickdev.h
++ *
++ * Copyright (C) 2007 Philippe Gerum.
++ * Copyright (C) 2012 Gilles Chanteperdrix
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef __LINUX_IPIPE_TICKDEV_H
++#define __LINUX_IPIPE_TICKDEV_H
++
++#include <linux/list.h>
++#include <linux/cpumask.h>
++#include <linux/clockchips.h>
++#include <linux/ipipe_domain.h>
++#include <linux/clocksource.h>
++#include <linux/timekeeper_internal.h>
++
++#ifdef CONFIG_IPIPE
++
++struct clock_event_device;
++
++struct ipipe_hostrt_data {
++	short live;
++	seqcount_t seqcount;
++	time_t wall_time_sec;
++	u32 wall_time_nsec;
++	struct timespec wall_to_monotonic;
++	u64 cycle_last;
++	u64 mask;
++	u32 mult;
++	u32 shift;
++};
++
++enum clock_event_mode {
++	CLOCK_EVT_MODE_PERIODIC,
++	CLOCK_EVT_MODE_ONESHOT,
++	CLOCK_EVT_MODE_UNUSED,
++	CLOCK_EVT_MODE_SHUTDOWN,
++};
++
++struct ipipe_timer {
++	int irq;
++	void (*request)(struct ipipe_timer *timer, int steal);
++	int (*set)(unsigned long ticks, void *timer);
++	void (*ack)(void);
++	void (*release)(struct ipipe_timer *timer);
++
++	/* Only if registering a timer directly */
++	const char *name;
++	unsigned rating;
++	unsigned long freq;
++	unsigned long min_delay_ticks;
++	unsigned long max_delay_ticks;
++	const struct cpumask *cpumask;
++
++	/* For internal use */
++	void *timer_set;	/* pointer passed to ->set() callback */
++	struct clock_event_device *host_timer;
++	struct list_head link;
++
++	/* Conversions between clock frequency and timer frequency */
++	unsigned c2t_integ;
++	unsigned c2t_frac;
++
++	/* For clockevent interception */
++	u32 real_mult;
++	u32 real_shift;
++	void (*mode_handler)(enum clock_event_mode mode,
++			     struct clock_event_device *);
++	int orig_mode;
++	int (*orig_set_state_periodic)(struct clock_event_device *);
++	int (*orig_set_state_oneshot)(struct clock_event_device *);
++	int (*orig_set_state_oneshot_stopped)(struct clock_event_device *);
++	int (*orig_set_state_shutdown)(struct clock_event_device *);
++	int (*orig_set_next_event)(unsigned long evt,
++				   struct clock_event_device *cdev);
++	unsigned int (*refresh_freq)(void);
++};
++
++#define __ipipe_hrtimer_irq __ipipe_raw_cpu_read(ipipe_percpu.hrtimer_irq)
++
++extern unsigned long __ipipe_hrtimer_freq;
++
++/*
++ * Called by clockevents_register_device, to register a piggybacked
++ * ipipe timer, if there is one
++ */
++void ipipe_host_timer_register(struct clock_event_device *clkevt);
++
++/*
++ * Called by tick_cleanup_dead_cpu, to drop per-CPU timer devices
++ */
++void ipipe_host_timer_cleanup(struct clock_event_device *clkevt);
++
++/*
++ * Register a standalone ipipe timer
++ */
++void ipipe_timer_register(struct ipipe_timer *timer);
++
++/*
++ * Chooses the best timer for each cpu. Take over its handling.
++ */
++int ipipe_select_timers(const struct cpumask *mask);
++
++/*
++ * Release the per-cpu timers
++ */
++void ipipe_timers_release(void);
++
++/*
++ * Start handling the per-cpu timer irq, and intercepting the linux clockevent
++ * device callbacks.
++ */
++int ipipe_timer_start(void (*tick_handler)(void),
++		      void (*emumode)(enum clock_event_mode mode,
++				      struct clock_event_device *cdev),
++		      int (*emutick)(unsigned long evt,
++				     struct clock_event_device *cdev),
++		      unsigned cpu);
++
++/*
++ * Stop handling a per-cpu timer
++ */
++void ipipe_timer_stop(unsigned cpu);
++
++/*
++ * Program the timer
++ */
++void ipipe_timer_set(unsigned long delay);
++
++const char *ipipe_timer_name(void);
++
++unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns);
++
++void __ipipe_timer_refresh_freq(unsigned int hrclock_freq);
++
++#else /* !CONFIG_IPIPE */
++
++#define ipipe_host_timer_register(clkevt) do { } while (0)
++
++#define ipipe_host_timer_cleanup(clkevt) do { } while (0)
++
++#endif /* !CONFIG_IPIPE */
++
++#ifdef CONFIG_IPIPE_HAVE_HOSTRT
++void ipipe_update_hostrt(struct timekeeper *tk);
++#else
++static inline void
++ipipe_update_hostrt(struct timekeeper *tk) {}
++#endif
++
++#endif /* __LINUX_IPIPE_TICKDEV_H */
+diff -uprN kernel/include/linux/ipipe_trace.h kernel_new/include/linux/ipipe_trace.h
+--- kernel/include/linux/ipipe_trace.h	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/ipipe_trace.h	2021-04-01 18:28:07.800863126 +0800
+@@ -0,0 +1,78 @@
++/* -*- linux-c -*-
++ * include/linux/ipipe_trace.h
++ *
++ * Copyright (C) 2005 Luotao Fu.
++ *               2005-2007 Jan Kiszka.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#ifndef _LINUX_IPIPE_TRACE_H
++#define _LINUX_IPIPE_TRACE_H
++
++#ifdef CONFIG_IPIPE_TRACE
++
++#include <linux/types.h>
++
++struct pt_regs;
++
++void ipipe_trace_begin(unsigned long v);
++void ipipe_trace_end(unsigned long v);
++void ipipe_trace_freeze(unsigned long v);
++void ipipe_trace_special(unsigned char special_id, unsigned long v);
++void ipipe_trace_pid(pid_t pid, short prio);
++void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
++int ipipe_trace_max_reset(void);
++int ipipe_trace_frozen_reset(void);
++void ipipe_trace_irqbegin(int irq, struct pt_regs *regs);
++void ipipe_trace_irqend(int irq, struct pt_regs *regs);
++
++#else /* !CONFIG_IPIPE_TRACE */
++
++#define ipipe_trace_begin(v)			do { (void)(v); } while(0)
++#define ipipe_trace_end(v)			do { (void)(v); } while(0)
++#define ipipe_trace_freeze(v)			do { (void)(v); } while(0)
++#define ipipe_trace_special(id, v)		do { (void)(id); (void)(v); } while(0)
++#define ipipe_trace_pid(pid, prio)		do { (void)(pid); (void)(prio); } while(0)
++#define ipipe_trace_event(id, delay_tsc)	do { (void)(id); (void)(delay_tsc); } while(0)
++#define ipipe_trace_max_reset()			({ 0; })
++#define ipipe_trace_frozen_reset()		({ 0; })
++#define ipipe_trace_irqbegin(irq, regs)		do { } while(0)
++#define ipipe_trace_irqend(irq, regs)		do { } while(0)
++
++#endif /* !CONFIG_IPIPE_TRACE */
++
++#ifdef CONFIG_IPIPE_TRACE_PANIC
++void ipipe_trace_panic_freeze(void);
++void ipipe_trace_panic_dump(void);
++#else
++static inline void ipipe_trace_panic_freeze(void) { }
++static inline void ipipe_trace_panic_dump(void) { }
++#endif
++
++#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
++#define ipipe_trace_irq_entry(irq)	ipipe_trace_begin(irq)
++#define ipipe_trace_irq_exit(irq)	ipipe_trace_end(irq)
++#define ipipe_trace_irqsoff()		ipipe_trace_begin(0x80000000UL)
++#define ipipe_trace_irqson()		ipipe_trace_end(0x80000000UL)
++#else
++#define ipipe_trace_irq_entry(irq)	do { (void)(irq);} while(0)
++#define ipipe_trace_irq_exit(irq)	do { (void)(irq);} while(0)
++#define ipipe_trace_irqsoff()		do { } while(0)
++#define ipipe_trace_irqson()		do { } while(0)
++#endif
++
++#endif	/* !__LINUX_IPIPE_TRACE_H */
+diff -uprN kernel/include/linux/irqchip/arm-gic.h kernel_new/include/linux/irqchip/arm-gic.h
+--- kernel/include/linux/irqchip/arm-gic.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/irqchip/arm-gic.h	2021-04-02 09:12:21.831373736 +0800
+@@ -65,6 +65,11 @@
+ #define GICD_INT_EN_CLR_X32		0xffffffff
+ #define GICD_INT_EN_SET_SGI		0x0000ffff
+ #define GICD_INT_EN_CLR_PPI		0xffff0000
++#ifndef CONFIG_IPIPE
++#define GICD_INT_DEF_PRI		0xa0
++#else
++#define GICD_INT_DEF_PRI		0x10
++#endif
+ 
+ #define GICD_IIDR_IMPLEMENTER_SHIFT	0
+ #define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+diff -uprN kernel/include/linux/irqchip/arm-gic.h.orig kernel_new/include/linux/irqchip/arm-gic.h.orig
+--- kernel/include/linux/irqchip/arm-gic.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/irqchip/arm-gic.h.orig	2021-04-01 18:28:07.801863125 +0800
+@@ -0,0 +1,173 @@
++/*
++ *  include/linux/irqchip/arm-gic.h
++ *
++ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef __LINUX_IRQCHIP_ARM_GIC_H
++#define __LINUX_IRQCHIP_ARM_GIC_H
++
++#define GIC_CPU_CTRL			0x00
++#define GIC_CPU_PRIMASK			0x04
++#define GIC_CPU_BINPOINT		0x08
++#define GIC_CPU_INTACK			0x0c
++#define GIC_CPU_EOI			0x10
++#define GIC_CPU_RUNNINGPRI		0x14
++#define GIC_CPU_HIGHPRI			0x18
++#define GIC_CPU_ALIAS_BINPOINT		0x1c
++#define GIC_CPU_ACTIVEPRIO		0xd0
++#define GIC_CPU_IDENT			0xfc
++#define GIC_CPU_DEACTIVATE		0x1000
++
++#define GICC_ENABLE			0x1
++#define GICC_INT_PRI_THRESHOLD		0xf0
++
++#define GIC_CPU_CTRL_EnableGrp0_SHIFT	0
++#define GIC_CPU_CTRL_EnableGrp0		(1 << GIC_CPU_CTRL_EnableGrp0_SHIFT)
++#define GIC_CPU_CTRL_EnableGrp1_SHIFT	1
++#define GIC_CPU_CTRL_EnableGrp1		(1 << GIC_CPU_CTRL_EnableGrp1_SHIFT)
++#define GIC_CPU_CTRL_AckCtl_SHIFT	2
++#define GIC_CPU_CTRL_AckCtl		(1 << GIC_CPU_CTRL_AckCtl_SHIFT)
++#define GIC_CPU_CTRL_FIQEn_SHIFT	3
++#define GIC_CPU_CTRL_FIQEn		(1 << GIC_CPU_CTRL_FIQEn_SHIFT)
++#define GIC_CPU_CTRL_CBPR_SHIFT		4
++#define GIC_CPU_CTRL_CBPR		(1 << GIC_CPU_CTRL_CBPR_SHIFT)
++#define GIC_CPU_CTRL_EOImodeNS_SHIFT	9
++#define GIC_CPU_CTRL_EOImodeNS		(1 << GIC_CPU_CTRL_EOImodeNS_SHIFT)
++
++#define GICC_IAR_INT_ID_MASK		0x3ff
++#define GICC_INT_SPURIOUS		1023
++#define GICC_DIS_BYPASS_MASK		0x1e0
++
++#define GIC_DIST_CTRL			0x000
++#define GIC_DIST_CTR			0x004
++#define GIC_DIST_IIDR			0x008
++#define GIC_DIST_IGROUP			0x080
++#define GIC_DIST_ENABLE_SET		0x100
++#define GIC_DIST_ENABLE_CLEAR		0x180
++#define GIC_DIST_PENDING_SET		0x200
++#define GIC_DIST_PENDING_CLEAR		0x280
++#define GIC_DIST_ACTIVE_SET		0x300
++#define GIC_DIST_ACTIVE_CLEAR		0x380
++#define GIC_DIST_PRI			0x400
++#define GIC_DIST_TARGET			0x800
++#define GIC_DIST_CONFIG			0xc00
++#define GIC_DIST_SOFTINT		0xf00
++#define GIC_DIST_SGI_PENDING_CLEAR	0xf10
++#define GIC_DIST_SGI_PENDING_SET	0xf20
++
++#define GICD_ENABLE			0x1
++#define GICD_DISABLE			0x0
++#define GICD_INT_ACTLOW_LVLTRIG		0x0
++#define GICD_INT_EN_CLR_X32		0xffffffff
++#define GICD_INT_EN_SET_SGI		0x0000ffff
++#define GICD_INT_EN_CLR_PPI		0xffff0000
++
++#define GICD_IIDR_IMPLEMENTER_SHIFT	0
++#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
++#define GICD_IIDR_REVISION_SHIFT	12
++#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
++#define GICD_IIDR_VARIANT_SHIFT		16
++#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
++#define GICD_IIDR_PRODUCT_ID_SHIFT	24
++#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
++
++
++#define GICH_HCR			0x0
++#define GICH_VTR			0x4
++#define GICH_VMCR			0x8
++#define GICH_MISR			0x10
++#define GICH_EISR0 			0x20
++#define GICH_EISR1 			0x24
++#define GICH_ELRSR0 			0x30
++#define GICH_ELRSR1 			0x34
++#define GICH_APR			0xf0
++#define GICH_LR0			0x100
++
++#define GICH_HCR_EN			(1 << 0)
++#define GICH_HCR_UIE			(1 << 1)
++#define GICH_HCR_NPIE			(1 << 3)
++
++#define GICH_LR_VIRTUALID		(0x3ff << 0)
++#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
++#define GICH_LR_PHYSID_CPUID		(0x3ff << GICH_LR_PHYSID_CPUID_SHIFT)
++#define GICH_LR_PRIORITY_SHIFT		23
++#define GICH_LR_STATE			(3 << 28)
++#define GICH_LR_PENDING_BIT		(1 << 28)
++#define GICH_LR_ACTIVE_BIT		(1 << 29)
++#define GICH_LR_EOI			(1 << 19)
++#define GICH_LR_GROUP1			(1 << 30)
++#define GICH_LR_HW			(1 << 31)
++
++#define GICH_VMCR_ENABLE_GRP0_SHIFT	0
++#define GICH_VMCR_ENABLE_GRP0_MASK	(1 << GICH_VMCR_ENABLE_GRP0_SHIFT)
++#define GICH_VMCR_ENABLE_GRP1_SHIFT	1
++#define GICH_VMCR_ENABLE_GRP1_MASK	(1 << GICH_VMCR_ENABLE_GRP1_SHIFT)
++#define GICH_VMCR_ACK_CTL_SHIFT		2
++#define GICH_VMCR_ACK_CTL_MASK		(1 << GICH_VMCR_ACK_CTL_SHIFT)
++#define GICH_VMCR_FIQ_EN_SHIFT		3
++#define GICH_VMCR_FIQ_EN_MASK		(1 << GICH_VMCR_FIQ_EN_SHIFT)
++#define GICH_VMCR_CBPR_SHIFT		4
++#define GICH_VMCR_CBPR_MASK		(1 << GICH_VMCR_CBPR_SHIFT)
++#define GICH_VMCR_EOI_MODE_SHIFT	9
++#define GICH_VMCR_EOI_MODE_MASK		(1 << GICH_VMCR_EOI_MODE_SHIFT)
++
++#define GICH_VMCR_PRIMASK_SHIFT		27
++#define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
++#define GICH_VMCR_BINPOINT_SHIFT	21
++#define GICH_VMCR_BINPOINT_MASK		(0x7 << GICH_VMCR_BINPOINT_SHIFT)
++#define GICH_VMCR_ALIAS_BINPOINT_SHIFT	18
++#define GICH_VMCR_ALIAS_BINPOINT_MASK	(0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
++
++#define GICH_MISR_EOI			(1 << 0)
++#define GICH_MISR_U			(1 << 1)
++
++#define GICV_PMR_PRIORITY_SHIFT		3
++#define GICV_PMR_PRIORITY_MASK		(0x1f << GICV_PMR_PRIORITY_SHIFT)
++
++#ifndef __ASSEMBLY__
++
++#include <linux/irqdomain.h>
++
++struct device_node;
++struct gic_chip_data;
++
++void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
++int gic_cpu_if_down(unsigned int gic_nr);
++void gic_cpu_save(struct gic_chip_data *gic);
++void gic_cpu_restore(struct gic_chip_data *gic);
++void gic_dist_save(struct gic_chip_data *gic);
++void gic_dist_restore(struct gic_chip_data *gic);
++
++/*
++ * Subdrivers that need some preparatory work can initialize their
++ * chips and call this to register their GICs.
++ */
++int gic_of_init(struct device_node *node, struct device_node *parent);
++
++/*
++ * Initialises and registers a non-root or child GIC chip. Memory for
++ * the gic_chip_data structure is dynamically allocated.
++ */
++int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq);
++
++/*
++ * Legacy platforms not converted to DT yet must use this to init
++ * their GIC
++ */
++void gic_init(unsigned int nr, int start,
++	      void __iomem *dist , void __iomem *cpu);
++
++int gicv2m_init(struct fwnode_handle *parent_handle,
++		struct irq_domain *parent);
++
++void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
++int gic_get_cpu_id(unsigned int cpu);
++void gic_migrate_target(unsigned int new_cpu_id);
++unsigned long gic_get_sgir_physaddr(void);
++
++#endif /* __ASSEMBLY */
++#endif
+diff -uprN kernel/include/linux/irqchip/arm-gic.h.rej kernel_new/include/linux/irqchip/arm-gic.h.rej
+--- kernel/include/linux/irqchip/arm-gic.h.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/irqchip/arm-gic.h.rej	2021-04-01 18:28:07.801863125 +0800
+@@ -0,0 +1,14 @@
++--- include/linux/irqchip/arm-gic.h	2019-12-18 03:36:04.000000000 +0800
+++++ include/linux/irqchip/arm-gic.h	2021-03-22 09:21:43.212415388 +0800
++@@ -65,7 +65,11 @@
++ #define GICD_INT_EN_CLR_X32		0xffffffff
++ #define GICD_INT_EN_SET_SGI		0x0000ffff
++ #define GICD_INT_EN_CLR_PPI		0xffff0000
+++#ifndef CONFIG_IPIPE
++ #define GICD_INT_DEF_PRI		0xa0
+++#else
+++#define GICD_INT_DEF_PRI		0x10
+++#endif
++ #define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
++ 					(GICD_INT_DEF_PRI << 16) |\
++ 					(GICD_INT_DEF_PRI << 8) |\
+diff -uprN kernel/include/linux/irqdesc.h kernel_new/include/linux/irqdesc.h
+--- kernel/include/linux/irqdesc.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/irqdesc.h	2021-04-01 18:28:07.801863125 +0800
+@@ -56,6 +56,10 @@ struct irq_desc {
+ 	struct irq_common_data	irq_common_data;
+ 	struct irq_data		irq_data;
+ 	unsigned int __percpu	*kstat_irqs;
++#ifdef CONFIG_IPIPE
++	void			(*ipipe_ack)(struct irq_desc *desc);
++	void			(*ipipe_end)(struct irq_desc *desc);
++#endif /* CONFIG_IPIPE */
+ 	irq_flow_handler_t	handle_irq;
+ #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+ 	irq_preflow_handler_t	preflow_handler;
+@@ -185,6 +189,10 @@ static inline int irq_desc_has_action(st
+ 	return desc->action != NULL;
+ }
+ 
++irq_flow_handler_t
++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle,
++		    int is_chained);
++
+ static inline int irq_has_action(unsigned int irq)
+ {
+ 	return irq_desc_has_action(irq_to_desc(irq));
+diff -uprN kernel/include/linux/irqdesc.h.orig kernel_new/include/linux/irqdesc.h.orig
+--- kernel/include/linux/irqdesc.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/irqdesc.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,281 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_IRQDESC_H
++#define _LINUX_IRQDESC_H
++
++#include <linux/rcupdate.h>
++#include <linux/kobject.h>
++#include <linux/mutex.h>
++
++/*
++ * Core internal functions to deal with irq descriptors
++ */
++
++struct irq_affinity_notify;
++struct proc_dir_entry;
++struct module;
++struct irq_desc;
++struct irq_domain;
++struct pt_regs;
++
++/**
++ * struct irq_desc - interrupt descriptor
++ * @irq_common_data:	per irq and chip data passed down to chip functions
++ * @kstat_irqs:		irq stats per cpu
++ * @handle_irq:		highlevel irq-events handler
++ * @preflow_handler:	handler called before the flow handler (currently used by sparc)
++ * @action:		the irq action chain
++ * @status:		status information
++ * @core_internal_state__do_not_mess_with_it: core internal status information
++ * @depth:		disable-depth, for nested irq_disable() calls
++ * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
++ * @irq_count:		stats field to detect stalled irqs
++ * @last_unhandled:	aging timer for unhandled count
++ * @irqs_unhandled:	stats field for spurious unhandled interrupts
++ * @threads_handled:	stats field for deferred spurious detection of threaded handlers
++ * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers
++ * @lock:		locking for SMP
++ * @affinity_hint:	hint to user space for preferred irq affinity
++ * @affinity_notify:	context for notification of affinity changes
++ * @pending_mask:	pending rebalanced interrupts
++ * @threads_oneshot:	bitfield to handle shared oneshot threads
++ * @threads_active:	number of irqaction threads currently running
++ * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
++ * @nr_actions:		number of installed actions on this descriptor
++ * @no_suspend_depth:	number of irqactions on a irq descriptor with
++ *			IRQF_NO_SUSPEND set
++ * @force_resume_depth:	number of irqactions on a irq descriptor with
++ *			IRQF_FORCE_RESUME set
++ * @rcu:		rcu head for delayed free
++ * @kobj:		kobject used to represent this struct in sysfs
++ * @request_mutex:	mutex to protect request/free before locking desc->lock
++ * @dir:		/proc/irq/ procfs entry
++ * @debugfs_file:	dentry for the debugfs file
++ * @name:		flow handler name for /proc/interrupts output
++ */
++struct irq_desc {
++	struct irq_common_data	irq_common_data;
++	struct irq_data		irq_data;
++	unsigned int __percpu	*kstat_irqs;
++	irq_flow_handler_t	handle_irq;
++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
++	irq_preflow_handler_t	preflow_handler;
++#endif
++	struct irqaction	*action;	/* IRQ action list */
++	unsigned int		status_use_accessors;
++	unsigned int		core_internal_state__do_not_mess_with_it;
++	unsigned int		depth;		/* nested irq disables */
++	unsigned int		wake_depth;	/* nested wake enables */
++	unsigned int		tot_count;
++	unsigned int		irq_count;	/* For detecting broken IRQs */
++	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
++	unsigned int		irqs_unhandled;
++	atomic_t		threads_handled;
++	int			threads_handled_last;
++	raw_spinlock_t		lock;
++	struct cpumask		*percpu_enabled;
++	const struct cpumask	*percpu_affinity;
++#ifdef CONFIG_SMP
++	const struct cpumask	*affinity_hint;
++	struct irq_affinity_notify *affinity_notify;
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++	cpumask_var_t		pending_mask;
++#endif
++#endif
++	unsigned long		threads_oneshot;
++	atomic_t		threads_active;
++	wait_queue_head_t       wait_for_threads;
++#ifdef CONFIG_PM_SLEEP
++	unsigned int		nr_actions;
++	unsigned int		no_suspend_depth;
++	unsigned int		cond_suspend_depth;
++	unsigned int		force_resume_depth;
++#endif
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry	*dir;
++#endif
++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
++	struct dentry		*debugfs_file;
++	const char		*dev_name;
++#endif
++#ifdef CONFIG_SPARSE_IRQ
++	struct rcu_head		rcu;
++	struct kobject		kobj;
++#endif
++	struct mutex		request_mutex;
++	int			parent_irq;
++	struct module		*owner;
++	const char		*name;
++} ____cacheline_internodealigned_in_smp;
++
++#ifdef CONFIG_SPARSE_IRQ
++extern void irq_lock_sparse(void);
++extern void irq_unlock_sparse(void);
++#else
++static inline void irq_lock_sparse(void) { }
++static inline void irq_unlock_sparse(void) { }
++extern struct irq_desc irq_desc[NR_IRQS];
++#endif
++
++static inline struct irq_desc *irq_data_to_desc(struct irq_data *data)
++{
++	return container_of(data->common, struct irq_desc, irq_common_data);
++}
++
++static inline unsigned int irq_desc_get_irq(struct irq_desc *desc)
++{
++	return desc->irq_data.irq;
++}
++
++static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
++{
++	return &desc->irq_data;
++}
++
++static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
++{
++	return desc->irq_data.chip;
++}
++
++static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
++{
++	return desc->irq_data.chip_data;
++}
++
++static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
++{
++	return desc->irq_common_data.handler_data;
++}
++
++/*
++ * Architectures call this to let the generic IRQ layer
++ * handle an interrupt.
++ */
++static inline void generic_handle_irq_desc(struct irq_desc *desc)
++{
++	desc->handle_irq(desc);
++}
++
++int generic_handle_irq(unsigned int irq);
++
++#ifdef CONFIG_HANDLE_DOMAIN_IRQ
++/*
++ * Convert a HW interrupt number to a logical one using a IRQ domain,
++ * and handle the result interrupt number. Return -EINVAL if
++ * conversion failed. Providing a NULL domain indicates that the
++ * conversion has already been done.
++ */
++int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
++			bool lookup, struct pt_regs *regs);
++
++static inline int handle_domain_irq(struct irq_domain *domain,
++				    unsigned int hwirq, struct pt_regs *regs)
++{
++	return __handle_domain_irq(domain, hwirq, true, regs);
++}
++
++#ifdef CONFIG_IRQ_DOMAIN
++int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
++		      struct pt_regs *regs);
++#endif
++#endif
++
++/* Test to see if a driver has successfully requested an irq */
++static inline int irq_desc_has_action(struct irq_desc *desc)
++{
++	return desc->action != NULL;
++}
++
++static inline int irq_has_action(unsigned int irq)
++{
++	return irq_desc_has_action(irq_to_desc(irq));
++}
++
++/**
++ * irq_set_handler_locked - Set irq handler from a locked region
++ * @data:	Pointer to the irq_data structure which identifies the irq
++ * @handler:	Flow control handler function for this interrupt
++ *
++ * Sets the handler in the irq descriptor associated to @data.
++ *
++ * Must be called with irq_desc locked and valid parameters. Typical
++ * call site is the irq_set_type() callback.
++ */
++static inline void irq_set_handler_locked(struct irq_data *data,
++					  irq_flow_handler_t handler)
++{
++	struct irq_desc *desc = irq_data_to_desc(data);
++
++	desc->handle_irq = handler;
++}
++
++/**
++ * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region
++ * @data:	Pointer to the irq_data structure for which the chip is set
++ * @chip:	Pointer to the new irq chip
++ * @handler:	Flow control handler function for this interrupt
++ * @name:	Name of the interrupt
++ *
++ * Replace the irq chip at the proper hierarchy level in @data and
++ * sets the handler and name in the associated irq descriptor.
++ *
++ * Must be called with irq_desc locked and valid parameters.
++ */
++static inline void
++irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip,
++				 irq_flow_handler_t handler, const char *name)
++{
++	struct irq_desc *desc = irq_data_to_desc(data);
++
++	desc->handle_irq = handler;
++	desc->name = name;
++	data->chip = chip;
++}
++
++static inline bool irq_balancing_disabled(unsigned int irq)
++{
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++	return desc->status_use_accessors & IRQ_NO_BALANCING_MASK;
++}
++
++static inline bool irq_is_percpu(unsigned int irq)
++{
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++	return desc->status_use_accessors & IRQ_PER_CPU;
++}
++
++static inline bool irq_is_percpu_devid(unsigned int irq)
++{
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++	return desc->status_use_accessors & IRQ_PER_CPU_DEVID;
++}
++
++static inline void
++irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
++		      struct lock_class_key *request_class)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc) {
++		lockdep_set_class(&desc->lock, lock_class);
++		lockdep_set_class(&desc->request_mutex, request_class);
++	}
++}
++
++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
++static inline void
++__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
++{
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++	desc->preflow_handler = handler;
++}
++#endif
++
++#endif
+diff -uprN kernel/include/linux/irqflags.h kernel_new/include/linux/irqflags.h
+--- kernel/include/linux/irqflags.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/irqflags.h	2021-04-01 18:28:07.801863125 +0800
+@@ -148,6 +148,18 @@ do {						\
+ 
+ #endif /* CONFIG_TRACE_IRQFLAGS */
+ 
++#ifdef CONFIG_IPIPE
++#define local_irq_enable_full()		local_irq_enable()
++#define local_irq_disable_full()		\
++	do {					\
++		local_irq_disable();		\
++		hard_local_irq_disable();	\
++	} while (0)
++#else
++#define local_irq_enable_full()		local_irq_enable()
++#define local_irq_disable_full()	local_irq_disable()
++#endif
++
+ #define local_save_flags(flags)	raw_local_save_flags(flags)
+ 
+ /*
+diff -uprN kernel/include/linux/irq.h kernel_new/include/linux/irq.h
+--- kernel/include/linux/irq.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/irq.h	2021-04-02 09:15:38.162510091 +0800
+@@ -497,6 +497,11 @@ struct irq_chip {
+ 
+ 	void		(*irq_bus_lock)(struct irq_data *data);
+ 	void		(*irq_bus_sync_unlock)(struct irq_data *data);
++#ifdef CONFIG_IPIPE
++	void		(*irq_move)(struct irq_data *data);
++	void		(*irq_hold)(struct irq_data *data);
++	void		(*irq_release)(struct irq_data *data);
++#endif /* CONFIG_IPIPE */
+ 
+ 	void		(*irq_cpu_online)(struct irq_data *data);
+ 	void		(*irq_cpu_offline)(struct irq_data *data);
+@@ -541,6 +546,7 @@ struct irq_chip {
+  * IRQCHIP_EOI_THREADED:	Chip requires eoi() on unmask in threaded mode
+  * IRQCHIP_SUPPORTS_LEVEL_MSI	Chip can provide two doorbells for Level MSIs
+  * IRQCHIP_SUPPORTS_NMI:	Chip can deliver NMIs, only for root irqchips
++ * IRQCHIP_PIPELINE_SAFE:	Chip can work in pipelined mode
+  */
+ enum {
+ 	IRQCHIP_SET_TYPE_MASKED		= (1 <<  0),
+@@ -551,6 +557,7 @@ enum {
+ 	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
+ 	IRQCHIP_EOI_THREADED		= (1 <<  6),
+ 	IRQCHIP_SUPPORTS_LEVEL_MSI	= (1 <<  7),
++	IRQCHIP_PIPELINE_SAFE		= (1 <<  7),
+ 	IRQCHIP_SUPPORTS_NMI		= (1 <<  8),
+ };
+ 
+@@ -647,6 +654,11 @@ extern int irq_chip_retrigger_hierarchy(
+ extern void irq_chip_mask_parent(struct irq_data *data);
+ extern void irq_chip_unmask_parent(struct irq_data *data);
+ extern void irq_chip_eoi_parent(struct irq_data *data);
++#ifdef CONFIG_IPIPE
++extern void irq_chip_hold_parent(struct irq_data *data);
++extern void irq_chip_release_parent(struct irq_data *data);
++#endif
++
+ extern int irq_chip_set_affinity_parent(struct irq_data *data,
+ 					const struct cpumask *dest,
+ 					bool force);
+@@ -771,7 +783,14 @@ extern int irq_set_irq_type(unsigned int
+ extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
+ extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+ 				struct msi_desc *entry);
+-extern struct irq_data *irq_get_irq_data(unsigned int irq);
++
++static inline __attribute__((const)) struct irq_data *
++irq_get_irq_data(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	return desc ? &desc->irq_data : NULL;
++}
+ 
+ static inline struct irq_chip *irq_get_chip(unsigned int irq)
+ {
+@@ -1013,7 +1032,11 @@ struct irq_chip_type {
+  * different flow mechanisms (level/edge) for it.
+  */
+ struct irq_chip_generic {
++#ifdef CONFIG_IPIPE
++	ipipe_spinlock_t	lock;
++#else
+ 	raw_spinlock_t		lock;
++#endif
+ 	void __iomem		*reg_base;
+ 	u32			(*reg_readl)(void __iomem *addr);
+ 	void			(*reg_writel)(u32 val, void __iomem *addr);
+@@ -1141,18 +1164,28 @@ static inline struct irq_chip_type *irq_
+ #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
+ 
+ #ifdef CONFIG_SMP
+-static inline void irq_gc_lock(struct irq_chip_generic *gc)
++static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc)
+ {
+-	raw_spin_lock(&gc->lock);
++	unsigned long flags = 0;
++	raw_spin_lock_irqsave_cond(&gc->lock, flags);
++	return flags;
+ }
+ 
+-static inline void irq_gc_unlock(struct irq_chip_generic *gc)
++static inline void
++irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags)
+ {
+-	raw_spin_unlock(&gc->lock);
++	raw_spin_unlock_irqrestore_cond(&gc->lock, flags);
+ }
+ #else
+-static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
+-static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
++static inline unsigned long irq_gc_lock(struct irq_chip_generic *gc)
++{
++	return hard_cond_local_irq_save();
++}
++static inline void
++irq_gc_unlock(struct irq_chip_generic *gc, unsigned long flags)
++{
++	hard_cond_local_irq_restore(flags);
++}
+ #endif
+ 
+ /*
+diff -uprN kernel/include/linux/irq.h.orig kernel_new/include/linux/irq.h.orig
+--- kernel/include/linux/irq.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/irq.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1235 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_IRQ_H
++#define _LINUX_IRQ_H
++
++/*
++ * Please do not include this file in generic code.  There is currently
++ * no requirement for any architecture to implement anything held
++ * within this file.
++ *
++ * Thanks. --rmk
++ */
++
++#include <linux/cache.h>
++#include <linux/spinlock.h>
++#include <linux/cpumask.h>
++#include <linux/irqhandler.h>
++#include <linux/irqreturn.h>
++#include <linux/irqnr.h>
++#include <linux/topology.h>
++#include <linux/io.h>
++#include <linux/slab.h>
++
++#include <asm/irq.h>
++#include <asm/ptrace.h>
++#include <asm/irq_regs.h>
++
++struct seq_file;
++struct module;
++struct msi_msg;
++enum irqchip_irq_state;
++
++/*
++ * IRQ line status.
++ *
++ * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
++ *
++ * IRQ_TYPE_NONE		- default, unspecified type
++ * IRQ_TYPE_EDGE_RISING		- rising edge triggered
++ * IRQ_TYPE_EDGE_FALLING	- falling edge triggered
++ * IRQ_TYPE_EDGE_BOTH		- rising and falling edge triggered
++ * IRQ_TYPE_LEVEL_HIGH		- high level triggered
++ * IRQ_TYPE_LEVEL_LOW		- low level triggered
++ * IRQ_TYPE_LEVEL_MASK		- Mask to filter out the level bits
++ * IRQ_TYPE_SENSE_MASK		- Mask for all the above bits
++ * IRQ_TYPE_DEFAULT		- For use by some PICs to ask irq_set_type
++ *				  to setup the HW to a sane default (used
++ *                                by irqdomain map() callbacks to synchronize
++ *                                the HW state and SW flags for a newly
++ *                                allocated descriptor).
++ *
++ * IRQ_TYPE_PROBE		- Special flag for probing in progress
++ *
++ * Bits which can be modified via irq_set/clear/modify_status_flags()
++ * IRQ_LEVEL			- Interrupt is level type. Will be also
++ *				  updated in the code when the above trigger
++ *				  bits are modified via irq_set_irq_type()
++ * IRQ_PER_CPU			- Mark an interrupt PER_CPU. Will protect
++ *				  it from affinity setting
++ * IRQ_NOPROBE			- Interrupt cannot be probed by autoprobing
++ * IRQ_NOREQUEST		- Interrupt cannot be requested via
++ *				  request_irq()
++ * IRQ_NOTHREAD			- Interrupt cannot be threaded
++ * IRQ_NOAUTOEN			- Interrupt is not automatically enabled in
++ *				  request/setup_irq()
++ * IRQ_NO_BALANCING		- Interrupt cannot be balanced (affinity set)
++ * IRQ_MOVE_PCNTXT		- Interrupt can be migrated from process context
++ * IRQ_NESTED_THREAD		- Interrupt nests into another thread
++ * IRQ_PER_CPU_DEVID		- Dev_id is a per-cpu variable
++ * IRQ_IS_POLLED		- Always polled by another interrupt. Exclude
++ *				  it from the spurious interrupt detection
++ *				  mechanism and from core side polling.
++ * IRQ_DISABLE_UNLAZY		- Disable lazy irq disable
++ */
++enum {
++	IRQ_TYPE_NONE		= 0x00000000,
++	IRQ_TYPE_EDGE_RISING	= 0x00000001,
++	IRQ_TYPE_EDGE_FALLING	= 0x00000002,
++	IRQ_TYPE_EDGE_BOTH	= (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
++	IRQ_TYPE_LEVEL_HIGH	= 0x00000004,
++	IRQ_TYPE_LEVEL_LOW	= 0x00000008,
++	IRQ_TYPE_LEVEL_MASK	= (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
++	IRQ_TYPE_SENSE_MASK	= 0x0000000f,
++	IRQ_TYPE_DEFAULT	= IRQ_TYPE_SENSE_MASK,
++
++	IRQ_TYPE_PROBE		= 0x00000010,
++
++	IRQ_LEVEL		= (1 <<  8),
++	IRQ_PER_CPU		= (1 <<  9),
++	IRQ_NOPROBE		= (1 << 10),
++	IRQ_NOREQUEST		= (1 << 11),
++	IRQ_NOAUTOEN		= (1 << 12),
++	IRQ_NO_BALANCING	= (1 << 13),
++	IRQ_MOVE_PCNTXT		= (1 << 14),
++	IRQ_NESTED_THREAD	= (1 << 15),
++	IRQ_NOTHREAD		= (1 << 16),
++	IRQ_PER_CPU_DEVID	= (1 << 17),
++	IRQ_IS_POLLED		= (1 << 18),
++	IRQ_DISABLE_UNLAZY	= (1 << 19),
++};
++
++#define IRQF_MODIFY_MASK	\
++	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
++	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
++	 IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
++	 IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY)
++
++#define IRQ_NO_BALANCING_MASK	(IRQ_PER_CPU | IRQ_NO_BALANCING)
++
++/*
++ * Return value for chip->irq_set_affinity()
++ *
++ * IRQ_SET_MASK_OK	- OK, core updates irq_common_data.affinity
++ * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_common_data.affinity
++ * IRQ_SET_MASK_OK_DONE	- Same as IRQ_SET_MASK_OK for core. Special code to
++ *			  support stacked irqchips, which indicates skipping
++ *			  all descendent irqchips.
++ */
++enum {
++	IRQ_SET_MASK_OK = 0,
++	IRQ_SET_MASK_OK_NOCOPY,
++	IRQ_SET_MASK_OK_DONE,
++};
++
++struct msi_desc;
++struct irq_domain;
++
++/**
++ * struct irq_common_data - per irq data shared by all irqchips
++ * @state_use_accessors: status information for irq chip functions.
++ *			Use accessor functions to deal with it
++ * @node:		node index useful for balancing
++ * @handler_data:	per-IRQ data for the irq_chip methods
++ * @affinity:		IRQ affinity on SMP. If this is an IPI
++ *			related irq, then this is the mask of the
++ *			CPUs to which an IPI can be sent.
++ * @effective_affinity:	The effective IRQ affinity on SMP as some irq
++ *			chips do not allow multi CPU destinations.
++ *			A subset of @affinity.
++ * @msi_desc:		MSI descriptor
++ * @ipi_offset:		Offset of first IPI target cpu in @affinity. Optional.
++ */
++struct irq_common_data {
++	unsigned int		__private state_use_accessors;
++#ifdef CONFIG_NUMA
++	unsigned int		node;
++#endif
++	void			*handler_data;
++	struct msi_desc		*msi_desc;
++	cpumask_var_t		affinity;
++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
++	cpumask_var_t		effective_affinity;
++#endif
++#ifdef CONFIG_GENERIC_IRQ_IPI
++	unsigned int		ipi_offset;
++#endif
++};
++
++/**
++ * struct irq_data - per irq chip data passed down to chip functions
++ * @mask:		precomputed bitmask for accessing the chip registers
++ * @irq:		interrupt number
++ * @hwirq:		hardware interrupt number, local to the interrupt domain
++ * @common:		point to data shared by all irqchips
++ * @chip:		low level interrupt hardware access
++ * @domain:		Interrupt translation domain; responsible for mapping
++ *			between hwirq number and linux irq number.
++ * @parent_data:	pointer to parent struct irq_data to support hierarchy
++ *			irq_domain
++ * @chip_data:		platform-specific per-chip private data for the chip
++ *			methods, to allow shared chip implementations
++ */
++struct irq_data {
++	u32			mask;
++	unsigned int		irq;
++	unsigned long		hwirq;
++	struct irq_common_data	*common;
++	struct irq_chip		*chip;
++	struct irq_domain	*domain;
++#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
++	struct irq_data		*parent_data;
++#endif
++	void			*chip_data;
++};
++
++/*
++ * Bit masks for irq_common_data.state_use_accessors
++ *
++ * IRQD_TRIGGER_MASK		- Mask for the trigger type bits
++ * IRQD_SETAFFINITY_PENDING	- Affinity setting is pending
++ * IRQD_ACTIVATED		- Interrupt has already been activated
++ * IRQD_NO_BALANCING		- Balancing disabled for this IRQ
++ * IRQD_PER_CPU			- Interrupt is per cpu
++ * IRQD_AFFINITY_SET		- Interrupt affinity was set
++ * IRQD_LEVEL			- Interrupt is level triggered
++ * IRQD_WAKEUP_STATE		- Interrupt is configured for wakeup
++ *				  from suspend
++ * IRDQ_MOVE_PCNTXT		- Interrupt can be moved in process
++ *				  context
++ * IRQD_IRQ_DISABLED		- Disabled state of the interrupt
++ * IRQD_IRQ_MASKED		- Masked state of the interrupt
++ * IRQD_IRQ_INPROGRESS		- In progress state of the interrupt
++ * IRQD_WAKEUP_ARMED		- Wakeup mode armed
++ * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
++ * IRQD_AFFINITY_MANAGED	- Affinity is auto-managed by the kernel
++ * IRQD_IRQ_STARTED		- Startup state of the interrupt
++ * IRQD_MANAGED_SHUTDOWN	- Interrupt was shutdown due to empty affinity
++ *				  mask. Applies only to affinity managed irqs.
++ * IRQD_SINGLE_TARGET		- IRQ allows only a single affinity target
++ * IRQD_DEFAULT_TRIGGER_SET	- Expected trigger already been set
++ * IRQD_CAN_RESERVE		- Can use reservation mode
++ * IRQD_MSI_NOMASK_QUIRK	- Non-maskable MSI quirk for affinity change
++ *				  required
++ * IRQD_AFFINITY_ON_ACTIVATE	- Affinity is set on activation. Don't call
++ *				  irq_chip::irq_set_affinity() when deactivated.
++ */
++enum {
++	IRQD_TRIGGER_MASK		= 0xf,
++	IRQD_SETAFFINITY_PENDING	= (1 <<  8),
++	IRQD_ACTIVATED			= (1 <<  9),
++	IRQD_NO_BALANCING		= (1 << 10),
++	IRQD_PER_CPU			= (1 << 11),
++	IRQD_AFFINITY_SET		= (1 << 12),
++	IRQD_LEVEL			= (1 << 13),
++	IRQD_WAKEUP_STATE		= (1 << 14),
++	IRQD_MOVE_PCNTXT		= (1 << 15),
++	IRQD_IRQ_DISABLED		= (1 << 16),
++	IRQD_IRQ_MASKED			= (1 << 17),
++	IRQD_IRQ_INPROGRESS		= (1 << 18),
++	IRQD_WAKEUP_ARMED		= (1 << 19),
++	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
++	IRQD_AFFINITY_MANAGED		= (1 << 21),
++	IRQD_IRQ_STARTED		= (1 << 22),
++	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
++	IRQD_SINGLE_TARGET		= (1 << 24),
++	IRQD_DEFAULT_TRIGGER_SET	= (1 << 25),
++	IRQD_CAN_RESERVE		= (1 << 26),
++	IRQD_MSI_NOMASK_QUIRK		= (1 << 27),
++	IRQD_AFFINITY_ON_ACTIVATE	= (1 << 29),
++};
++
++#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
++
++static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_SETAFFINITY_PENDING;
++}
++
++static inline bool irqd_is_per_cpu(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_PER_CPU;
++}
++
++static inline bool irqd_can_balance(struct irq_data *d)
++{
++	return !(__irqd_to_state(d) & (IRQD_PER_CPU | IRQD_NO_BALANCING));
++}
++
++static inline bool irqd_affinity_was_set(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_AFFINITY_SET;
++}
++
++static inline void irqd_mark_affinity_was_set(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_AFFINITY_SET;
++}
++
++static inline bool irqd_trigger_type_was_set(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_DEFAULT_TRIGGER_SET;
++}
++
++static inline u32 irqd_get_trigger_type(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_TRIGGER_MASK;
++}
++
++/*
++ * Must only be called inside irq_chip.irq_set_type() functions or
++ * from the DT/ACPI setup code.
++ */
++static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
++{
++	__irqd_to_state(d) &= ~IRQD_TRIGGER_MASK;
++	__irqd_to_state(d) |= type & IRQD_TRIGGER_MASK;
++	__irqd_to_state(d) |= IRQD_DEFAULT_TRIGGER_SET;
++}
++
++static inline bool irqd_is_level_type(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_LEVEL;
++}
++
++/*
++ * Must only be called of irqchip.irq_set_affinity() or low level
++ * hieararchy domain allocation functions.
++ */
++static inline void irqd_set_single_target(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_SINGLE_TARGET;
++}
++
++static inline bool irqd_is_single_target(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_SINGLE_TARGET;
++}
++
++static inline bool irqd_is_wakeup_set(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
++}
++
++static inline bool irqd_can_move_in_process_context(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_MOVE_PCNTXT;
++}
++
++static inline bool irqd_irq_disabled(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_IRQ_DISABLED;
++}
++
++static inline bool irqd_irq_masked(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_IRQ_MASKED;
++}
++
++static inline bool irqd_irq_inprogress(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_IRQ_INPROGRESS;
++}
++
++static inline bool irqd_is_wakeup_armed(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_WAKEUP_ARMED;
++}
++
++static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU;
++}
++
++static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU;
++}
++
++static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
++}
++
++static inline bool irqd_affinity_is_managed(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
++}
++
++static inline bool irqd_is_activated(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_ACTIVATED;
++}
++
++static inline void irqd_set_activated(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_ACTIVATED;
++}
++
++static inline void irqd_clr_activated(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
++}
++
++static inline bool irqd_is_started(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_IRQ_STARTED;
++}
++
++static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
++}
++
++static inline void irqd_set_can_reserve(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_CAN_RESERVE;
++}
++
++static inline void irqd_clr_can_reserve(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
++}
++
++static inline bool irqd_can_reserve(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_CAN_RESERVE;
++}
++
++static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
++}
++
++static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
++}
++
++static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
++}
++
++static inline void irqd_set_affinity_on_activate(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE;
++}
++
++static inline bool irqd_affinity_on_activate(struct irq_data *d)
++{
++	return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
++}
++
++#undef __irqd_to_state
++
++static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
++{
++	return d->hwirq;
++}
++
++/**
++ * struct irq_chip - hardware interrupt chip descriptor
++ *
++ * @parent_device:	pointer to parent device for irqchip
++ * @name:		name for /proc/interrupts
++ * @irq_startup:	start up the interrupt (defaults to ->enable if NULL)
++ * @irq_shutdown:	shut down the interrupt (defaults to ->disable if NULL)
++ * @irq_enable:		enable the interrupt (defaults to chip->unmask if NULL)
++ * @irq_disable:	disable the interrupt
++ * @irq_ack:		start of a new interrupt
++ * @irq_mask:		mask an interrupt source
++ * @irq_mask_ack:	ack and mask an interrupt source
++ * @irq_unmask:		unmask an interrupt source
++ * @irq_eoi:		end of interrupt
++ * @irq_set_affinity:	Set the CPU affinity on SMP machines. If the force
++ *			argument is true, it tells the driver to
++ *			unconditionally apply the affinity setting. Sanity
++ *			checks against the supplied affinity mask are not
++ *			required. This is used for CPU hotplug where the
++ *			target CPU is not yet set in the cpu_online_mask.
++ * @irq_retrigger:	resend an IRQ to the CPU
++ * @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
++ * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
++ * @irq_bus_lock:	function to lock access to slow bus (i2c) chips
++ * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
++ * @irq_cpu_online:	configure an interrupt source for a secondary CPU
++ * @irq_cpu_offline:	un-configure an interrupt source for a secondary CPU
++ * @irq_suspend:	function called from core code on suspend once per
++ *			chip, when one or more interrupts are installed
++ * @irq_resume:		function called from core code on resume once per chip,
++ *			when one ore more interrupts are installed
++ * @irq_pm_shutdown:	function called from core code on shutdown once per chip
++ * @irq_calc_mask:	Optional function to set irq_data.mask for special cases
++ * @irq_print_chip:	optional to print special chip info in show_interrupts
++ * @irq_request_resources:	optional to request resources before calling
++ *				any other callback related to this irq
++ * @irq_release_resources:	optional to release resources acquired with
++ *				irq_request_resources
++ * @irq_compose_msi_msg:	optional to compose message content for MSI
++ * @irq_write_msi_msg:	optional to write message content for MSI
++ * @irq_get_irqchip_state:	return the internal state of an interrupt
++ * @irq_set_irqchip_state:	set the internal state of a interrupt
++ * @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
++ * @ipi_send_single:	send a single IPI to destination cpus
++ * @ipi_send_mask:	send an IPI to destination cpus in cpumask
++ * @irq_nmi_setup:	function called from core code before enabling an NMI
++ * @irq_nmi_teardown:	function called from core code after disabling an NMI
++ * @flags:		chip specific flags
++ */
++struct irq_chip {
++	struct device	*parent_device;
++	const char	*name;
++	unsigned int	(*irq_startup)(struct irq_data *data);
++	void		(*irq_shutdown)(struct irq_data *data);
++	void		(*irq_enable)(struct irq_data *data);
++	void		(*irq_disable)(struct irq_data *data);
++
++	void		(*irq_ack)(struct irq_data *data);
++	void		(*irq_mask)(struct irq_data *data);
++	void		(*irq_mask_ack)(struct irq_data *data);
++	void		(*irq_unmask)(struct irq_data *data);
++	void		(*irq_eoi)(struct irq_data *data);
++
++	int		(*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
++	int		(*irq_retrigger)(struct irq_data *data);
++	int		(*irq_set_type)(struct irq_data *data, unsigned int flow_type);
++	int		(*irq_set_wake)(struct irq_data *data, unsigned int on);
++
++	void		(*irq_bus_lock)(struct irq_data *data);
++	void		(*irq_bus_sync_unlock)(struct irq_data *data);
++
++	void		(*irq_cpu_online)(struct irq_data *data);
++	void		(*irq_cpu_offline)(struct irq_data *data);
++
++	void		(*irq_suspend)(struct irq_data *data);
++	void		(*irq_resume)(struct irq_data *data);
++	void		(*irq_pm_shutdown)(struct irq_data *data);
++
++	void		(*irq_calc_mask)(struct irq_data *data);
++
++	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
++	int		(*irq_request_resources)(struct irq_data *data);
++	void		(*irq_release_resources)(struct irq_data *data);
++
++	void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
++	void		(*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);
++
++	int		(*irq_get_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool *state);
++	int		(*irq_set_irqchip_state)(struct irq_data *data, enum irqchip_irq_state which, bool state);
++
++	int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);
++
++	void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
++	void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);
++
++	int		(*irq_nmi_setup)(struct irq_data *data);
++	void		(*irq_nmi_teardown)(struct irq_data *data);
++
++	unsigned long	flags;
++};
++
++/*
++ * irq_chip specific flags
++ *
++ * IRQCHIP_SET_TYPE_MASKED:	Mask before calling chip.irq_set_type()
++ * IRQCHIP_EOI_IF_HANDLED:	Only issue irq_eoi() when irq was handled
++ * IRQCHIP_MASK_ON_SUSPEND:	Mask non wake irqs in the suspend path
++ * IRQCHIP_ONOFFLINE_ENABLED:	Only call irq_on/off_line callbacks
++ *				when irq enabled
++ * IRQCHIP_SKIP_SET_WAKE:	Skip chip.irq_set_wake(), for this irq chip
++ * IRQCHIP_ONESHOT_SAFE:	One shot does not require mask/unmask
++ * IRQCHIP_EOI_THREADED:	Chip requires eoi() on unmask in threaded mode
++ * IRQCHIP_SUPPORTS_LEVEL_MSI	Chip can provide two doorbells for Level MSIs
++ * IRQCHIP_SUPPORTS_NMI:	Chip can deliver NMIs, only for root irqchips
++ */
++enum {
++	IRQCHIP_SET_TYPE_MASKED		= (1 <<  0),
++	IRQCHIP_EOI_IF_HANDLED		= (1 <<  1),
++	IRQCHIP_MASK_ON_SUSPEND		= (1 <<  2),
++	IRQCHIP_ONOFFLINE_ENABLED	= (1 <<  3),
++	IRQCHIP_SKIP_SET_WAKE		= (1 <<  4),
++	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
++	IRQCHIP_EOI_THREADED		= (1 <<  6),
++	IRQCHIP_SUPPORTS_LEVEL_MSI	= (1 <<  7),
++	IRQCHIP_SUPPORTS_NMI		= (1 <<  8),
++};
++
++#include <linux/irqdesc.h>
++
++/*
++ * Pick up the arch-dependent methods:
++ */
++#include <asm/hw_irq.h>
++
++#ifndef NR_IRQS_LEGACY
++# define NR_IRQS_LEGACY 0
++#endif
++
++#ifndef ARCH_IRQ_INIT_FLAGS
++# define ARCH_IRQ_INIT_FLAGS	0
++#endif
++
++#define IRQ_DEFAULT_INIT_FLAGS	ARCH_IRQ_INIT_FLAGS
++
++struct irqaction;
++extern int setup_irq(unsigned int irq, struct irqaction *new);
++extern void remove_irq(unsigned int irq, struct irqaction *act);
++extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
++extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
++
++extern void irq_cpu_online(void);
++extern void irq_cpu_offline(void);
++extern int irq_set_affinity_locked(struct irq_data *data,
++				   const struct cpumask *cpumask, bool force);
++extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
++
++#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
++extern void irq_migrate_all_off_this_cpu(void);
++extern int irq_affinity_online_cpu(unsigned int cpu);
++#else
++# define irq_affinity_online_cpu	NULL
++#endif
++
++#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
++void __irq_move_irq(struct irq_data *data);
++static inline void irq_move_irq(struct irq_data *data)
++{
++	if (unlikely(irqd_is_setaffinity_pending(data)))
++		__irq_move_irq(data);
++}
++void irq_move_masked_irq(struct irq_data *data);
++void irq_force_complete_move(struct irq_desc *desc);
++#else
++static inline void irq_move_irq(struct irq_data *data) { }
++static inline void irq_move_masked_irq(struct irq_data *data) { }
++static inline void irq_force_complete_move(struct irq_desc *desc) { }
++#endif
++
++extern int no_irq_affinity;
++
++#ifdef CONFIG_HARDIRQS_SW_RESEND
++int irq_set_parent(int irq, int parent_irq);
++#else
++static inline int irq_set_parent(int irq, int parent_irq)
++{
++	return 0;
++}
++#endif
++
++/*
++ * Built-in IRQ handlers for various IRQ types,
++ * callable via desc->handle_irq()
++ */
++extern void handle_level_irq(struct irq_desc *desc);
++extern void handle_fasteoi_irq(struct irq_desc *desc);
++extern void handle_edge_irq(struct irq_desc *desc);
++extern void handle_edge_eoi_irq(struct irq_desc *desc);
++extern void handle_simple_irq(struct irq_desc *desc);
++extern void handle_untracked_irq(struct irq_desc *desc);
++extern void handle_percpu_irq(struct irq_desc *desc);
++extern void handle_percpu_devid_irq(struct irq_desc *desc);
++extern void handle_bad_irq(struct irq_desc *desc);
++extern void handle_nested_irq(unsigned int irq);
++
++extern void handle_fasteoi_nmi(struct irq_desc *desc);
++extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc);
++
++extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
++extern int irq_chip_pm_get(struct irq_data *data);
++extern int irq_chip_pm_put(struct irq_data *data);
++#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
++extern void handle_fasteoi_ack_irq(struct irq_desc *desc);
++extern void handle_fasteoi_mask_irq(struct irq_desc *desc);
++extern void irq_chip_enable_parent(struct irq_data *data);
++extern void irq_chip_disable_parent(struct irq_data *data);
++extern void irq_chip_ack_parent(struct irq_data *data);
++extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
++extern void irq_chip_mask_parent(struct irq_data *data);
++extern void irq_chip_unmask_parent(struct irq_data *data);
++extern void irq_chip_eoi_parent(struct irq_data *data);
++extern int irq_chip_set_affinity_parent(struct irq_data *data,
++					const struct cpumask *dest,
++					bool force);
++extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on);
++extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
++					     void *vcpu_info);
++extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type);
++#endif
++
++/* Handling of unhandled and spurious interrupts: */
++extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
++
++
++/* Enable/disable irq debugging output: */
++extern int noirqdebug_setup(char *str);
++
++/* Checks whether the interrupt can be requested by request_irq(): */
++extern int can_request_irq(unsigned int irq, unsigned long irqflags);
++
++/* Dummy irq-chip implementations: */
++extern struct irq_chip no_irq_chip;
++extern struct irq_chip dummy_irq_chip;
++
++extern void
++irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
++			      irq_flow_handler_t handle, const char *name);
++
++static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
++					    irq_flow_handler_t handle)
++{
++	irq_set_chip_and_handler_name(irq, chip, handle, NULL);
++}
++
++extern int irq_set_percpu_devid(unsigned int irq);
++extern int irq_set_percpu_devid_partition(unsigned int irq,
++					  const struct cpumask *affinity);
++extern int irq_get_percpu_devid_partition(unsigned int irq,
++					  struct cpumask *affinity);
++
++extern void
++__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
++		  const char *name);
++
++static inline void
++irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
++{
++	__irq_set_handler(irq, handle, 0, NULL);
++}
++
++/*
++ * Set a highlevel chained flow handler for a given IRQ.
++ * (a chained handler is automatically enabled and set to
++ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
++ */
++static inline void
++irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
++{
++	__irq_set_handler(irq, handle, 1, NULL);
++}
++
++/*
++ * Set a highlevel chained flow handler and its data for a given IRQ.
++ * (a chained handler is automatically enabled and set to
++ *  IRQ_NOREQUEST, IRQ_NOPROBE, and IRQ_NOTHREAD)
++ */
++void
++irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
++				 void *data);
++
++void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
++
++static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
++{
++	irq_modify_status(irq, 0, set);
++}
++
++static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
++{
++	irq_modify_status(irq, clr, 0);
++}
++
++static inline void irq_set_noprobe(unsigned int irq)
++{
++	irq_modify_status(irq, 0, IRQ_NOPROBE);
++}
++
++static inline void irq_set_probe(unsigned int irq)
++{
++	irq_modify_status(irq, IRQ_NOPROBE, 0);
++}
++
++static inline void irq_set_nothread(unsigned int irq)
++{
++	irq_modify_status(irq, 0, IRQ_NOTHREAD);
++}
++
++static inline void irq_set_thread(unsigned int irq)
++{
++	irq_modify_status(irq, IRQ_NOTHREAD, 0);
++}
++
++static inline void irq_set_nested_thread(unsigned int irq, bool nest)
++{
++	if (nest)
++		irq_set_status_flags(irq, IRQ_NESTED_THREAD);
++	else
++		irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
++}
++
++static inline void irq_set_percpu_devid_flags(unsigned int irq)
++{
++	irq_set_status_flags(irq,
++			     IRQ_NOAUTOEN | IRQ_PER_CPU | IRQ_NOTHREAD |
++			     IRQ_NOPROBE | IRQ_PER_CPU_DEVID);
++}
++
++/* Set/get chip/data for an IRQ: */
++extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
++extern int irq_set_handler_data(unsigned int irq, void *data);
++extern int irq_set_chip_data(unsigned int irq, void *data);
++extern int irq_set_irq_type(unsigned int irq, unsigned int type);
++extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
++extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
++				struct msi_desc *entry);
++extern struct irq_data *irq_get_irq_data(unsigned int irq);
++
++static inline struct irq_chip *irq_get_chip(unsigned int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++	return d ? d->chip : NULL;
++}
++
++static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
++{
++	return d->chip;
++}
++
++static inline void *irq_get_chip_data(unsigned int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++	return d ? d->chip_data : NULL;
++}
++
++static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
++{
++	return d->chip_data;
++}
++
++static inline void *irq_get_handler_data(unsigned int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++	return d ? d->common->handler_data : NULL;
++}
++
++static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
++{
++	return d->common->handler_data;
++}
++
++static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++	return d ? d->common->msi_desc : NULL;
++}
++
++static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d)
++{
++	return d->common->msi_desc;
++}
++
++static inline u32 irq_get_trigger_type(unsigned int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++	return d ? irqd_get_trigger_type(d) : 0;
++}
++
++static inline int irq_common_data_get_node(struct irq_common_data *d)
++{
++#ifdef CONFIG_NUMA
++	return d->node;
++#else
++	return 0;
++#endif
++}
++
++static inline int irq_data_get_node(struct irq_data *d)
++{
++	return irq_common_data_get_node(d->common);
++}
++
++static inline struct cpumask *irq_get_affinity_mask(int irq)
++{
++	struct irq_data *d = irq_get_irq_data(irq);
++
++	return d ? d->common->affinity : NULL;
++}
++
++static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
++{
++	return d->common->affinity;
++}
++
++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
++static inline
++struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
++{
++	return d->common->effective_affinity;
++}
++static inline void irq_data_update_effective_affinity(struct irq_data *d,
++						      const struct cpumask *m)
++{
++	cpumask_copy(d->common->effective_affinity, m);
++}
++#else
++static inline void irq_data_update_effective_affinity(struct irq_data *d,
++						      const struct cpumask *m)
++{
++}
++static inline
++struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
++{
++	return d->common->affinity;
++}
++#endif
++
++unsigned int arch_dynirq_lower_bound(unsigned int from);
++
++int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
++		      struct module *owner, const struct cpumask *affinity);
++
++int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
++			   unsigned int cnt, int node, struct module *owner,
++			   const struct cpumask *affinity);
++
++/* use macros to avoid needing export.h for THIS_MODULE */
++#define irq_alloc_descs(irq, from, cnt, node)	\
++	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL)
++
++#define irq_alloc_desc(node)			\
++	irq_alloc_descs(-1, 0, 1, node)
++
++#define irq_alloc_desc_at(at, node)		\
++	irq_alloc_descs(at, at, 1, node)
++
++#define irq_alloc_desc_from(from, node)		\
++	irq_alloc_descs(-1, from, 1, node)
++
++#define irq_alloc_descs_from(from, cnt, node)	\
++	irq_alloc_descs(-1, from, cnt, node)
++
++#define devm_irq_alloc_descs(dev, irq, from, cnt, node)		\
++	__devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL)
++
++#define devm_irq_alloc_desc(dev, node)				\
++	devm_irq_alloc_descs(dev, -1, 0, 1, node)
++
++#define devm_irq_alloc_desc_at(dev, at, node)			\
++	devm_irq_alloc_descs(dev, at, at, 1, node)
++
++#define devm_irq_alloc_desc_from(dev, from, node)		\
++	devm_irq_alloc_descs(dev, -1, from, 1, node)
++
++#define devm_irq_alloc_descs_from(dev, from, cnt, node)		\
++	devm_irq_alloc_descs(dev, -1, from, cnt, node)
++
++void irq_free_descs(unsigned int irq, unsigned int cnt);
++static inline void irq_free_desc(unsigned int irq)
++{
++	irq_free_descs(irq, 1);
++}
++
++#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
++unsigned int irq_alloc_hwirqs(int cnt, int node);
++static inline unsigned int irq_alloc_hwirq(int node)
++{
++	return irq_alloc_hwirqs(1, node);
++}
++void irq_free_hwirqs(unsigned int from, int cnt);
++static inline void irq_free_hwirq(unsigned int irq)
++{
++	return irq_free_hwirqs(irq, 1);
++}
++int arch_setup_hwirq(unsigned int irq, int node);
++void arch_teardown_hwirq(unsigned int irq);
++#endif
++
++#ifdef CONFIG_GENERIC_IRQ_LEGACY
++void irq_init_desc(unsigned int irq);
++#endif
++
++/**
++ * struct irq_chip_regs - register offsets for struct irq_gci
++ * @enable:	Enable register offset to reg_base
++ * @disable:	Disable register offset to reg_base
++ * @mask:	Mask register offset to reg_base
++ * @ack:	Ack register offset to reg_base
++ * @eoi:	Eoi register offset to reg_base
++ * @type:	Type configuration register offset to reg_base
++ * @polarity:	Polarity configuration register offset to reg_base
++ */
++struct irq_chip_regs {
++	unsigned long		enable;
++	unsigned long		disable;
++	unsigned long		mask;
++	unsigned long		ack;
++	unsigned long		eoi;
++	unsigned long		type;
++	unsigned long		polarity;
++};
++
++/**
++ * struct irq_chip_type - Generic interrupt chip instance for a flow type
++ * @chip:		The real interrupt chip which provides the callbacks
++ * @regs:		Register offsets for this chip
++ * @handler:		Flow handler associated with this chip
++ * @type:		Chip can handle these flow types
++ * @mask_cache_priv:	Cached mask register private to the chip type
++ * @mask_cache:		Pointer to cached mask register
++ *
++ * A irq_generic_chip can have several instances of irq_chip_type when
++ * it requires different functions and register offsets for different
++ * flow types.
++ */
++struct irq_chip_type {
++	struct irq_chip		chip;
++	struct irq_chip_regs	regs;
++	irq_flow_handler_t	handler;
++	u32			type;
++	u32			mask_cache_priv;
++	u32			*mask_cache;
++};
++
++/**
++ * struct irq_chip_generic - Generic irq chip data structure
++ * @lock:		Lock to protect register and cache data access
++ * @reg_base:		Register base address (virtual)
++ * @reg_readl:		Alternate I/O accessor (defaults to readl if NULL)
++ * @reg_writel:		Alternate I/O accessor (defaults to writel if NULL)
++ * @suspend:		Function called from core code on suspend once per
++ *			chip; can be useful instead of irq_chip::suspend to
++ *			handle chip details even when no interrupts are in use
++ * @resume:		Function called from core code on resume once per chip;
++ *			can be useful instead of irq_chip::suspend to handle
++ *			chip details even when no interrupts are in use
++ * @irq_base:		Interrupt base nr for this chip
++ * @irq_cnt:		Number of interrupts handled by this chip
++ * @mask_cache:		Cached mask register shared between all chip types
++ * @type_cache:		Cached type register
++ * @polarity_cache:	Cached polarity register
++ * @wake_enabled:	Interrupt can wakeup from suspend
++ * @wake_active:	Interrupt is marked as an wakeup from suspend source
++ * @num_ct:		Number of available irq_chip_type instances (usually 1)
++ * @private:		Private data for non generic chip callbacks
++ * @installed:		bitfield to denote installed interrupts
++ * @unused:		bitfield to denote unused interrupts
++ * @domain:		irq domain pointer
++ * @list:		List head for keeping track of instances
++ * @chip_types:		Array of interrupt irq_chip_types
++ *
++ * Note, that irq_chip_generic can have multiple irq_chip_type
++ * implementations which can be associated to a particular irq line of
++ * an irq_chip_generic instance. That allows to share and protect
++ * state in an irq_chip_generic instance when we need to implement
++ * different flow mechanisms (level/edge) for it.
++ */
++struct irq_chip_generic {
++	raw_spinlock_t		lock;
++	void __iomem		*reg_base;
++	u32			(*reg_readl)(void __iomem *addr);
++	void			(*reg_writel)(u32 val, void __iomem *addr);
++	void			(*suspend)(struct irq_chip_generic *gc);
++	void			(*resume)(struct irq_chip_generic *gc);
++	unsigned int		irq_base;
++	unsigned int		irq_cnt;
++	u32			mask_cache;
++	u32			type_cache;
++	u32			polarity_cache;
++	u32			wake_enabled;
++	u32			wake_active;
++	unsigned int		num_ct;
++	void			*private;
++	unsigned long		installed;
++	unsigned long		unused;
++	struct irq_domain	*domain;
++	struct list_head	list;
++	struct irq_chip_type	chip_types[0];
++};
++
++/**
++ * enum irq_gc_flags - Initialization flags for generic irq chips
++ * @IRQ_GC_INIT_MASK_CACHE:	Initialize the mask_cache by reading mask reg
++ * @IRQ_GC_INIT_NESTED_LOCK:	Set the lock class of the irqs to nested for
++ *				irq chips which need to call irq_set_wake() on
++ *				the parent irq. Usually GPIO implementations
++ * @IRQ_GC_MASK_CACHE_PER_TYPE:	Mask cache is chip type private
++ * @IRQ_GC_NO_MASK:		Do not calculate irq_data->mask
++ * @IRQ_GC_BE_IO:		Use big-endian register accesses (default: LE)
++ */
++enum irq_gc_flags {
++	IRQ_GC_INIT_MASK_CACHE		= 1 << 0,
++	IRQ_GC_INIT_NESTED_LOCK		= 1 << 1,
++	IRQ_GC_MASK_CACHE_PER_TYPE	= 1 << 2,
++	IRQ_GC_NO_MASK			= 1 << 3,
++	IRQ_GC_BE_IO			= 1 << 4,
++};
++
++/*
++ * struct irq_domain_chip_generic - Generic irq chip data structure for irq domains
++ * @irqs_per_chip:	Number of interrupts per chip
++ * @num_chips:		Number of chips
++ * @irq_flags_to_set:	IRQ* flags to set on irq setup
++ * @irq_flags_to_clear:	IRQ* flags to clear on irq setup
++ * @gc_flags:		Generic chip specific setup flags
++ * @gc:			Array of pointers to generic interrupt chips
++ */
++struct irq_domain_chip_generic {
++	unsigned int		irqs_per_chip;
++	unsigned int		num_chips;
++	unsigned int		irq_flags_to_clear;
++	unsigned int		irq_flags_to_set;
++	enum irq_gc_flags	gc_flags;
++	struct irq_chip_generic	*gc[0];
++};
++
++/* Generic chip callback functions */
++void irq_gc_noop(struct irq_data *d);
++void irq_gc_mask_disable_reg(struct irq_data *d);
++void irq_gc_mask_set_bit(struct irq_data *d);
++void irq_gc_mask_clr_bit(struct irq_data *d);
++void irq_gc_unmask_enable_reg(struct irq_data *d);
++void irq_gc_ack_set_bit(struct irq_data *d);
++void irq_gc_ack_clr_bit(struct irq_data *d);
++void irq_gc_mask_disable_and_ack_set(struct irq_data *d);
++void irq_gc_eoi(struct irq_data *d);
++int irq_gc_set_wake(struct irq_data *d, unsigned int on);
++
++/* Setup functions for irq_chip_generic */
++int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
++			 irq_hw_number_t hw_irq);
++struct irq_chip_generic *
++irq_alloc_generic_chip(const char *name, int nr_ct, unsigned int irq_base,
++		       void __iomem *reg_base, irq_flow_handler_t handler);
++void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
++			    enum irq_gc_flags flags, unsigned int clr,
++			    unsigned int set);
++int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
++void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
++			     unsigned int clr, unsigned int set);
++
++struct irq_chip_generic *
++devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
++			    unsigned int irq_base, void __iomem *reg_base,
++			    irq_flow_handler_t handler);
++int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
++				u32 msk, enum irq_gc_flags flags,
++				unsigned int clr, unsigned int set);
++
++struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
++
++int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
++				     int num_ct, const char *name,
++				     irq_flow_handler_t handler,
++				     unsigned int clr, unsigned int set,
++				     enum irq_gc_flags flags);
++
++#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,	\
++				       handler,	clr, set, flags)	\
++({									\
++	MAYBE_BUILD_BUG_ON(irqs_per_chip > 32);				\
++	__irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\
++					 handler, clr, set, flags);	\
++})
++
++static inline void irq_free_generic_chip(struct irq_chip_generic *gc)
++{
++	kfree(gc);
++}
++
++static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc,
++					    u32 msk, unsigned int clr,
++					    unsigned int set)
++{
++	irq_remove_generic_chip(gc, msk, clr, set);
++	irq_free_generic_chip(gc);
++}
++
++static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
++{
++	return container_of(d->chip, struct irq_chip_type, chip);
++}
++
++#define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX)
++
++#ifdef CONFIG_SMP
++static inline void irq_gc_lock(struct irq_chip_generic *gc)
++{
++	raw_spin_lock(&gc->lock);
++}
++
++static inline void irq_gc_unlock(struct irq_chip_generic *gc)
++{
++	raw_spin_unlock(&gc->lock);
++}
++#else
++static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
++static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
++#endif
++
++/*
++ * The irqsave variants are for usage in non interrupt code. Do not use
++ * them in irq_chip callbacks. Use irq_gc_lock() instead.
++ */
++#define irq_gc_lock_irqsave(gc, flags)	\
++	raw_spin_lock_irqsave(&(gc)->lock, flags)
++
++#define irq_gc_unlock_irqrestore(gc, flags)	\
++	raw_spin_unlock_irqrestore(&(gc)->lock, flags)
++
++static inline void irq_reg_writel(struct irq_chip_generic *gc,
++				  u32 val, int reg_offset)
++{
++	if (gc->reg_writel)
++		gc->reg_writel(val, gc->reg_base + reg_offset);
++	else
++		writel(val, gc->reg_base + reg_offset);
++}
++
++static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
++				int reg_offset)
++{
++	if (gc->reg_readl)
++		return gc->reg_readl(gc->reg_base + reg_offset);
++	else
++		return readl(gc->reg_base + reg_offset);
++}
++
++struct irq_matrix;
++struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
++				    unsigned int alloc_start,
++				    unsigned int alloc_end);
++void irq_matrix_online(struct irq_matrix *m);
++void irq_matrix_offline(struct irq_matrix *m);
++void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace);
++int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk);
++void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk);
++int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
++				unsigned int *mapped_cpu);
++void irq_matrix_reserve(struct irq_matrix *m);
++void irq_matrix_remove_reserved(struct irq_matrix *m);
++int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
++		     bool reserved, unsigned int *mapped_cpu);
++void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
++		     unsigned int bit, bool managed);
++void irq_matrix_assign(struct irq_matrix *m, unsigned int bit);
++unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown);
++unsigned int irq_matrix_allocated(struct irq_matrix *m);
++unsigned int irq_matrix_reserved(struct irq_matrix *m);
++void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind);
++
++/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
++#define INVALID_HWIRQ	(~0UL)
++irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
++int __ipi_send_single(struct irq_desc *desc, unsigned int cpu);
++int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
++int ipi_send_single(unsigned int virq, unsigned int cpu);
++int ipi_send_mask(unsigned int virq, const struct cpumask *dest);
++
++#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
++/*
++ * Registers a generic IRQ handling function as the top-level IRQ handler in
++ * the system, which is generally the first C code called from an assembly
++ * architecture-specific interrupt handler.
++ *
++ * Returns 0 on success, or -EBUSY if an IRQ handler has already been
++ * registered.
++ */
++int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));
++
++/*
++ * Allows interrupt handlers to find the irqchip that's been registered as the
++ * top-level IRQ handler.
++ */
++extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
++#endif
++
++#endif /* _LINUX_IRQ_H */
+diff -uprN kernel/include/linux/irq.h.rej kernel_new/include/linux/irq.h.rej
+--- kernel/include/linux/irq.h.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/irq.h.rej	2021-04-01 18:28:07.802863124 +0800
+@@ -0,0 +1,18 @@
++--- include/linux/irq.h	2019-12-18 03:36:04.000000000 +0800
+++++ include/linux/irq.h	2021-03-22 09:21:43.212415388 +0800
++@@ -509,6 +514,7 @@ struct irq_chip {
++  * IRQCHIP_ONESHOT_SAFE:	One shot does not require mask/unmask
++  * IRQCHIP_EOI_THREADED:	Chip requires eoi() on unmask in threaded mode
++  * IRQCHIP_SUPPORTS_LEVEL_MSI	Chip can provide two doorbells for Level MSIs
+++ * IRQCHIP_PIPELINE_SAFE:	Chip can work in pipelined mode
++  */
++ enum {
++ 	IRQCHIP_SET_TYPE_MASKED		= (1 <<  0),
++@@ -519,6 +525,7 @@ enum {
++ 	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
++ 	IRQCHIP_EOI_THREADED		= (1 <<  6),
++ 	IRQCHIP_SUPPORTS_LEVEL_MSI	= (1 <<  7),
+++	IRQCHIP_PIPELINE_SAFE		= (1 <<  7),
++ };
++ 
++ #include <linux/irqdesc.h>
+diff -uprN kernel/include/linux/irqnr.h kernel_new/include/linux/irqnr.h
+--- kernel/include/linux/irqnr.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/irqnr.h	2021-04-01 18:28:07.802863124 +0800
+@@ -6,7 +6,11 @@
+ 
+ 
+ extern int nr_irqs;
++#if !defined(CONFIG_IPIPE) || defined(CONFIG_SPARSE_IRQ)
+ extern struct irq_desc *irq_to_desc(unsigned int irq);
++#else
++#define irq_to_desc(irq)	({ ipipe_virtual_irq_p(irq) ? NULL : &irq_desc[irq]; })
++#endif
+ unsigned int irq_get_next_irq(unsigned int offset);
+ 
+ # define for_each_irq_desc(irq, desc)					\
+diff -uprN kernel/include/linux/kernel.h kernel_new/include/linux/kernel.h
+--- kernel/include/linux/kernel.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/kernel.h	2021-04-01 18:28:07.802863124 +0800
+@@ -14,6 +14,7 @@
+ #include <linux/printk.h>
+ #include <linux/build_bug.h>
+ #include <asm/byteorder.h>
++#include <asm-generic/ipipe.h>
+ #include <uapi/linux/kernel.h>
+ 
+ #define USHRT_MAX	((u16)(~0U))
+@@ -240,9 +241,12 @@ struct user;
+ 
+ #ifdef CONFIG_PREEMPT_VOLUNTARY
+ extern int _cond_resched(void);
+-# define might_resched() _cond_resched()
++# define might_resched() do { \
++		ipipe_root_only(); \
++		_cond_resched(); \
++	} while (0)
+ #else
+-# define might_resched() do { } while (0)
++# define might_resched() ipipe_root_only()
+ #endif
+ 
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+diff -uprN kernel/include/linux/kvm_host.h kernel_new/include/linux/kvm_host.h
+--- kernel/include/linux/kvm_host.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/kvm_host.h	2021-04-01 18:28:07.802863124 +0800
+@@ -221,6 +221,10 @@ struct kvm_vcpu {
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	struct preempt_notifier preempt_notifier;
+ #endif
++#ifdef CONFIG_IPIPE
++	struct ipipe_vm_notifier ipipe_notifier;
++	bool ipipe_put_vcpu;
++#endif
+ 	int cpu;
+ 	int vcpu_id;
+ 	int srcu_idx;
+diff -uprN kernel/include/linux/preempt.h kernel_new/include/linux/preempt.h
+--- kernel/include/linux/preempt.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/preempt.h	2021-04-01 18:28:07.802863124 +0800
+@@ -255,7 +255,28 @@ do { \
+ 
+ #endif /* CONFIG_PREEMPT_COUNT */
+ 
+-#ifdef MODULE
++#ifdef CONFIG_IPIPE
++#define hard_preempt_disable()				\
++	({						\
++		unsigned long __flags__;		\
++		__flags__ = hard_local_irq_save();	\
++		if (__ipipe_root_p)			\
++			preempt_disable();		\
++		__flags__;				\
++	})
++
++#define hard_preempt_enable(__flags__)					\
++	do {								\
++		if (__ipipe_root_p) {					\
++			preempt_enable_no_resched();			\
++			hard_local_irq_restore(__flags__);		\
++			if (!hard_irqs_disabled_flags(__flags__))	\
++				preempt_check_resched();		\
++		} else							\
++			hard_local_irq_restore(__flags__);		\
++	} while (0)
++
++#elif defined(MODULE)
+ /*
+  * Modules have no business playing preemption tricks.
+  */
+@@ -263,7 +284,7 @@ do { \
+ #undef preempt_enable_no_resched
+ #undef preempt_enable_no_resched_notrace
+ #undef preempt_check_resched
+-#endif
++#endif	/* !IPIPE && MODULE */
+ 
+ #define preempt_set_need_resched() \
+ do { \
+diff -uprN kernel/include/linux/printk.h kernel_new/include/linux/printk.h
+--- kernel/include/linux/printk.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/printk.h	2021-04-01 18:28:07.802863124 +0800
+@@ -157,6 +157,17 @@ static inline void printk_nmi_direct_ent
+ static inline void printk_nmi_direct_exit(void) { }
+ #endif /* PRINTK_NMI */
+ 
++#ifdef CONFIG_RAW_PRINTK
++void raw_vprintk(const char *fmt, va_list ap);
++asmlinkage __printf(1, 2)
++void raw_printk(const char *fmt, ...);
++#else
++static inline __cold
++void raw_vprintk(const char *s, va_list ap) { }
++static inline __printf(1, 2) __cold
++void raw_printk(const char *s, ...) { }
++#endif
++
+ #ifdef CONFIG_PRINTK
+ extern void printk_safe_enter(void);
+ extern void printk_safe_exit(void);
+diff -uprN kernel/include/linux/printk.h.orig kernel_new/include/linux/printk.h.orig
+--- kernel/include/linux/printk.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/printk.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,573 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __KERNEL_PRINTK__
++#define __KERNEL_PRINTK__
++
++#include <stdarg.h>
++#include <linux/init.h>
++#include <linux/kern_levels.h>
++#include <linux/linkage.h>
++#include <linux/cache.h>
++
++extern const char linux_banner[];
++extern const char linux_proc_banner[];
++
++#define PRINTK_MAX_SINGLE_HEADER_LEN 2
++
++static inline int printk_get_level(const char *buffer)
++{
++	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
++		switch (buffer[1]) {
++		case '0' ... '7':
++		case 'd':	/* KERN_DEFAULT */
++		case 'c':	/* KERN_CONT */
++			return buffer[1];
++		}
++	}
++	return 0;
++}
++
++static inline const char *printk_skip_level(const char *buffer)
++{
++	if (printk_get_level(buffer))
++		return buffer + 2;
++
++	return buffer;
++}
++
++static inline const char *printk_skip_headers(const char *buffer)
++{
++	while (printk_get_level(buffer))
++		buffer = printk_skip_level(buffer);
++
++	return buffer;
++}
++
++#define CONSOLE_EXT_LOG_MAX	8192
++
++/* printk's without a loglevel use this.. */
++#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
++
++/* We show everything that is MORE important than this.. */
++#define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
++#define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
++#define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
++#define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
++
++/*
++ * Default used to be hard-coded at 7, quiet used to be hardcoded at 4,
++ * we're now allowing both to be set from kernel config.
++ */
++#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
++#define CONSOLE_LOGLEVEL_QUIET	 CONFIG_CONSOLE_LOGLEVEL_QUIET
++
++extern int console_printk[];
++
++#define console_loglevel (console_printk[0])
++#define default_message_loglevel (console_printk[1])
++#define minimum_console_loglevel (console_printk[2])
++#define default_console_loglevel (console_printk[3])
++
++static inline void console_silent(void)
++{
++	console_loglevel = CONSOLE_LOGLEVEL_SILENT;
++}
++
++static inline void console_verbose(void)
++{
++	if (console_loglevel)
++		console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
++}
++
++/* strlen("ratelimit") + 1 */
++#define DEVKMSG_STR_MAX_SIZE 10
++extern char devkmsg_log_str[];
++struct ctl_table;
++
++struct va_format {
++	const char *fmt;
++	va_list *va;
++};
++
++/*
++ * FW_BUG
++ * Add this to a message where you are sure the firmware is buggy or behaves
++ * really stupid or out of spec. Be aware that the responsible BIOS developer
++ * should be able to fix this issue or at least get a concrete idea of the
++ * problem by reading your message without the need of looking at the kernel
++ * code.
++ *
++ * Use it for definite and high priority BIOS bugs.
++ *
++ * FW_WARN
++ * Use it for not that clear (e.g. could the kernel messed up things already?)
++ * and medium priority BIOS bugs.
++ *
++ * FW_INFO
++ * Use this one if you want to tell the user or vendor about something
++ * suspicious, but generally harmless related to the firmware.
++ *
++ * Use it for information or very low priority BIOS bugs.
++ */
++#define FW_BUG		"[Firmware Bug]: "
++#define FW_WARN		"[Firmware Warn]: "
++#define FW_INFO		"[Firmware Info]: "
++
++/*
++ * HW_ERR
++ * Add this to a message for hardware errors, so that user can report
++ * it to hardware vendor instead of LKML or software vendor.
++ */
++#define HW_ERR		"[Hardware Error]: "
++
++/*
++ * DEPRECATED
++ * Add this to a message whenever you want to warn user space about the use
++ * of a deprecated aspect of an API so they can stop using it
++ */
++#define DEPRECATED	"[Deprecated]: "
++
++/*
++ * Dummy printk for disabled debugging statements to use whilst maintaining
++ * gcc's format checking.
++ */
++#define no_printk(fmt, ...)				\
++({							\
++	if (0)						\
++		printk(fmt, ##__VA_ARGS__);		\
++	0;						\
++})
++
++#ifdef CONFIG_EARLY_PRINTK
++extern asmlinkage __printf(1, 2)
++void early_printk(const char *fmt, ...);
++#else
++static inline __printf(1, 2) __cold
++void early_printk(const char *s, ...) { }
++#endif
++
++#ifdef CONFIG_PRINTK_NMI
++extern void printk_nmi_enter(void);
++extern void printk_nmi_exit(void);
++extern void printk_nmi_direct_enter(void);
++extern void printk_nmi_direct_exit(void);
++#else
++static inline void printk_nmi_enter(void) { }
++static inline void printk_nmi_exit(void) { }
++static inline void printk_nmi_direct_enter(void) { }
++static inline void printk_nmi_direct_exit(void) { }
++#endif /* PRINTK_NMI */
++
++#ifdef CONFIG_PRINTK
++extern void printk_safe_enter(void);
++extern void printk_safe_exit(void);
++
++#define printk_safe_enter_irqsave(flags)	\
++	do {					\
++		local_irq_save(flags);		\
++		printk_safe_enter();		\
++	} while (0)
++
++#define printk_safe_exit_irqrestore(flags)	\
++	do {					\
++		printk_safe_exit();		\
++		local_irq_restore(flags);	\
++	} while (0)
++
++#define printk_safe_enter_irq()		\
++	do {					\
++		local_irq_disable();		\
++		printk_safe_enter();		\
++	} while (0)
++
++#define printk_safe_exit_irq()			\
++	do {					\
++		printk_safe_exit();		\
++		local_irq_enable();		\
++	} while (0)
++#else
++/*
++ * On !PRINTK builds we still export console output related locks
++ * and some functions (console_unlock()/tty/etc.), so printk-safe
++ * must preserve the existing local IRQ guarantees.
++ */
++#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
++#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
++
++#define printk_safe_enter_irq() local_irq_disable()
++#define printk_safe_exit_irq() local_irq_enable()
++#endif
++
++#ifdef CONFIG_PRINTK
++asmlinkage __printf(5, 0)
++int vprintk_emit(int facility, int level,
++		 const char *dict, size_t dictlen,
++		 const char *fmt, va_list args);
++
++asmlinkage __printf(1, 0)
++int vprintk(const char *fmt, va_list args);
++
++asmlinkage __printf(5, 6) __cold
++int printk_emit(int facility, int level,
++		const char *dict, size_t dictlen,
++		const char *fmt, ...);
++
++asmlinkage __printf(1, 2) __cold
++int printk(const char *fmt, ...);
++
++/*
++ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
++ */
++__printf(1, 2) __cold int printk_deferred(const char *fmt, ...);
++
++/*
++ * Please don't use printk_ratelimit(), because it shares ratelimiting state
++ * with all other unrelated printk_ratelimit() callsites.  Instead use
++ * printk_ratelimited() or plain old __ratelimit().
++ */
++extern int __printk_ratelimit(const char *func);
++#define printk_ratelimit() __printk_ratelimit(__func__)
++extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
++				   unsigned int interval_msec);
++
++extern int printk_delay_msec;
++extern int dmesg_restrict;
++
++extern int
++devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf,
++			  size_t *lenp, loff_t *ppos);
++
++extern void wake_up_klogd(void);
++
++char *log_buf_addr_get(void);
++u32 log_buf_len_get(void);
++void log_buf_vmcoreinfo_setup(void);
++void __init setup_log_buf(int early);
++__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
++void dump_stack_print_info(const char *log_lvl);
++void show_regs_print_info(const char *log_lvl);
++extern asmlinkage void dump_stack(void) __cold;
++extern void printk_safe_flush(void);
++extern void printk_safe_flush_on_panic(void);
++extern void zap_locks(void);
++#else
++static inline __printf(1, 0)
++int vprintk(const char *s, va_list args)
++{
++	return 0;
++}
++static inline __printf(1, 2) __cold
++int printk(const char *s, ...)
++{
++	return 0;
++}
++static inline __printf(1, 2) __cold
++int printk_deferred(const char *s, ...)
++{
++	return 0;
++}
++static inline int printk_ratelimit(void)
++{
++	return 0;
++}
++static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
++					  unsigned int interval_msec)
++{
++	return false;
++}
++
++static inline void wake_up_klogd(void)
++{
++}
++
++static inline char *log_buf_addr_get(void)
++{
++	return NULL;
++}
++
++static inline u32 log_buf_len_get(void)
++{
++	return 0;
++}
++
++static inline void log_buf_vmcoreinfo_setup(void)
++{
++}
++
++static inline void setup_log_buf(int early)
++{
++}
++
++static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...)
++{
++}
++
++static inline void dump_stack_print_info(const char *log_lvl)
++{
++}
++
++static inline void show_regs_print_info(const char *log_lvl)
++{
++}
++
++static inline asmlinkage void dump_stack(void)
++{
++}
++
++static inline void printk_safe_flush(void)
++{
++}
++
++static inline void printk_safe_flush_on_panic(void)
++{
++}
++
++static inline void zap_locks(void)
++{
++}
++#endif
++
++extern int kptr_restrict;
++
++#ifndef pr_fmt
++#define pr_fmt(fmt) fmt
++#endif
++
++/*
++ * These can be used to print at the various log levels.
++ * All of these will print unconditionally, although note that pr_debug()
++ * and other debug macros are compiled out unless either DEBUG is defined
++ * or CONFIG_DYNAMIC_DEBUG is set.
++ */
++#define pr_emerg(fmt, ...) \
++	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_alert(fmt, ...) \
++	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_crit(fmt, ...) \
++	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_err(fmt, ...) \
++	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_warning(fmt, ...) \
++	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_warn pr_warning
++#define pr_notice(fmt, ...) \
++	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_info(fmt, ...) \
++	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
++/*
++ * Like KERN_CONT, pr_cont() should only be used when continuing
++ * a line with no newline ('\n') enclosed. Otherwise it defaults
++ * back to KERN_DEFAULT.
++ */
++#define pr_cont(fmt, ...) \
++	printk(KERN_CONT fmt, ##__VA_ARGS__)
++
++/* pr_devel() should produce zero code unless DEBUG is defined */
++#ifdef DEBUG
++#define pr_devel(fmt, ...) \
++	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_devel(fmt, ...) \
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++
++/* If you are writing a driver, please use dev_dbg instead */
++#if defined(CONFIG_DYNAMIC_DEBUG)
++#include <linux/dynamic_debug.h>
++
++/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
++#define pr_debug(fmt, ...) \
++	dynamic_pr_debug(fmt, ##__VA_ARGS__)
++#elif defined(DEBUG)
++#define pr_debug(fmt, ...) \
++	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_debug(fmt, ...) \
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++/*
++ * Print a one-time message (analogous to WARN_ONCE() et al):
++ */
++
++#ifdef CONFIG_PRINTK
++#define printk_once(fmt, ...)					\
++({								\
++	static bool __print_once __read_mostly;			\
++	bool __ret_print_once = !__print_once;			\
++								\
++	if (!__print_once) {					\
++		__print_once = true;				\
++		printk(fmt, ##__VA_ARGS__);			\
++	}							\
++	unlikely(__ret_print_once);				\
++})
++#define printk_deferred_once(fmt, ...)				\
++({								\
++	static bool __print_once __read_mostly;			\
++	bool __ret_print_once = !__print_once;			\
++								\
++	if (!__print_once) {					\
++		__print_once = true;				\
++		printk_deferred(fmt, ##__VA_ARGS__);		\
++	}							\
++	unlikely(__ret_print_once);				\
++})
++#else
++#define printk_once(fmt, ...)					\
++	no_printk(fmt, ##__VA_ARGS__)
++#define printk_deferred_once(fmt, ...)				\
++	no_printk(fmt, ##__VA_ARGS__)
++#endif
++
++#define pr_emerg_once(fmt, ...)					\
++	printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_alert_once(fmt, ...)					\
++	printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_crit_once(fmt, ...)					\
++	printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_err_once(fmt, ...)					\
++	printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_warn_once(fmt, ...)					\
++	printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_notice_once(fmt, ...)				\
++	printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_info_once(fmt, ...)					\
++	printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_cont_once(fmt, ...)					\
++	printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
++
++#if defined(DEBUG)
++#define pr_devel_once(fmt, ...)					\
++	printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_devel_once(fmt, ...)					\
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++/* If you are writing a driver, please use dev_dbg instead */
++#if defined(DEBUG)
++#define pr_debug_once(fmt, ...)					\
++	printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_debug_once(fmt, ...)					\
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++/*
++ * ratelimited messages with local ratelimit_state,
++ * no local ratelimit_state used in the !PRINTK case
++ */
++#ifdef CONFIG_PRINTK
++#define printk_ratelimited(fmt, ...)					\
++({									\
++	static DEFINE_RATELIMIT_STATE(_rs,				\
++				      DEFAULT_RATELIMIT_INTERVAL,	\
++				      DEFAULT_RATELIMIT_BURST);		\
++									\
++	if (__ratelimit(&_rs))						\
++		printk(fmt, ##__VA_ARGS__);				\
++})
++#else
++#define printk_ratelimited(fmt, ...)					\
++	no_printk(fmt, ##__VA_ARGS__)
++#endif
++
++#define pr_emerg_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_alert_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_crit_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_err_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_warn_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_notice_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
++#define pr_info_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
++/* no pr_cont_ratelimited, don't do that... */
++
++#if defined(DEBUG)
++#define pr_devel_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_devel_ratelimited(fmt, ...)					\
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++/* If you are writing a driver, please use dev_dbg instead */
++#if defined(CONFIG_DYNAMIC_DEBUG)
++/* descriptor check is first to prevent flooding with "callbacks suppressed" */
++#define pr_debug_ratelimited(fmt, ...)					\
++do {									\
++	static DEFINE_RATELIMIT_STATE(_rs,				\
++				      DEFAULT_RATELIMIT_INTERVAL,	\
++				      DEFAULT_RATELIMIT_BURST);		\
++	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt));		\
++	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
++	    __ratelimit(&_rs))						\
++		__dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__);	\
++} while (0)
++#elif defined(DEBUG)
++#define pr_debug_ratelimited(fmt, ...)					\
++	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#else
++#define pr_debug_ratelimited(fmt, ...) \
++	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
++#endif
++
++extern const struct file_operations kmsg_fops;
++
++enum {
++	DUMP_PREFIX_NONE,
++	DUMP_PREFIX_ADDRESS,
++	DUMP_PREFIX_OFFSET
++};
++extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
++			      int groupsize, char *linebuf, size_t linebuflen,
++			      bool ascii);
++#ifdef CONFIG_PRINTK
++extern void print_hex_dump(const char *level, const char *prefix_str,
++			   int prefix_type, int rowsize, int groupsize,
++			   const void *buf, size_t len, bool ascii);
++#if defined(CONFIG_DYNAMIC_DEBUG)
++#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)	\
++	dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
++#else
++extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
++				 const void *buf, size_t len);
++#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
++#else
++static inline void print_hex_dump(const char *level, const char *prefix_str,
++				  int prefix_type, int rowsize, int groupsize,
++				  const void *buf, size_t len, bool ascii)
++{
++}
++static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
++					const void *buf, size_t len)
++{
++}
++
++#endif
++
++#if defined(CONFIG_DYNAMIC_DEBUG)
++#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,	\
++			     groupsize, buf, len, ascii)	\
++	dynamic_hex_dump(prefix_str, prefix_type, rowsize,	\
++			 groupsize, buf, len, ascii)
++#elif defined(DEBUG)
++#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,		\
++			     groupsize, buf, len, ascii)		\
++	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize,	\
++		       groupsize, buf, len, ascii)
++#else
++static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
++					int rowsize, int groupsize,
++					const void *buf, size_t len, bool ascii)
++{
++}
++#endif
++
++#endif
+diff -uprN kernel/include/linux/rwlock_api_smp.h kernel_new/include/linux/rwlock_api_smp.h
+--- kernel/include/linux/rwlock_api_smp.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/rwlock_api_smp.h	2021-04-01 18:28:07.803863123 +0800
+@@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rw
+  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+  */
+-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
++#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
++	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
++	defined(CONFIG_IPIPE)
+ 
+ static inline void __raw_read_lock(rwlock_t *lock)
+ {
+diff -uprN kernel/include/linux/rwlock.h kernel_new/include/linux/rwlock.h
+--- kernel/include/linux/rwlock.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/rwlock.h	2021-04-01 18:28:07.803863123 +0800
+@@ -67,8 +67,8 @@ do {								\
+ #define read_trylock(lock)	__cond_lock(lock, _raw_read_trylock(lock))
+ #define write_trylock(lock)	__cond_lock(lock, _raw_write_trylock(lock))
+ 
+-#define write_lock(lock)	_raw_write_lock(lock)
+-#define read_lock(lock)		_raw_read_lock(lock)
++#define write_lock(lock)	PICK_RWOP(_write_lock, lock)
++#define read_lock(lock)		PICK_RWOP(_read_lock, lock)
+ 
+ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ 
+@@ -102,8 +102,8 @@ do {								\
+ #define read_lock_bh(lock)		_raw_read_lock_bh(lock)
+ #define write_lock_irq(lock)		_raw_write_lock_irq(lock)
+ #define write_lock_bh(lock)		_raw_write_lock_bh(lock)
+-#define read_unlock(lock)		_raw_read_unlock(lock)
+-#define write_unlock(lock)		_raw_write_unlock(lock)
++#define read_unlock(lock)		PICK_RWOP(_read_unlock, lock)
++#define write_unlock(lock)		PICK_RWOP(_write_unlock, lock)
+ #define read_unlock_irq(lock)		_raw_read_unlock_irq(lock)
+ #define write_unlock_irq(lock)		_raw_write_unlock_irq(lock)
+ 
+diff -uprN kernel/include/linux/sched/coredump.h kernel_new/include/linux/sched/coredump.h
+--- kernel/include/linux/sched/coredump.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/sched/coredump.h	2021-04-01 18:28:07.803863123 +0800
+@@ -74,6 +74,7 @@ static inline int get_dumpable(struct mm
+ #define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
+ #define MMF_MULTIPROCESS	27	/* mm is shared between processes */
+ #define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
++#define MMF_VM_PINNED		31	/* ondemand load up and COW disabled */
+ 
+ #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+ 				 MMF_DISABLE_THP_MASK)
+diff -uprN kernel/include/linux/sched/coredump.h.orig kernel_new/include/linux/sched/coredump.h.orig
+--- kernel/include/linux/sched/coredump.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/sched/coredump.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,81 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_SCHED_COREDUMP_H
++#define _LINUX_SCHED_COREDUMP_H
++
++#include <linux/mm_types.h>
++
++#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
++#define SUID_DUMP_USER		1	/* Dump as user of process */
++#define SUID_DUMP_ROOT		2	/* Dump as root */
++
++/* mm flags */
++
++/* for SUID_DUMP_* above */
++#define MMF_DUMPABLE_BITS 2
++#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
++
++extern void set_dumpable(struct mm_struct *mm, int value);
++/*
++ * This returns the actual value of the suid_dumpable flag. For things
++ * that are using this for checking for privilege transitions, it must
++ * test against SUID_DUMP_USER rather than treating it as a boolean
++ * value.
++ */
++static inline int __get_dumpable(unsigned long mm_flags)
++{
++	return mm_flags & MMF_DUMPABLE_MASK;
++}
++
++static inline int get_dumpable(struct mm_struct *mm)
++{
++	return __get_dumpable(mm->flags);
++}
++
++/* coredump filter bits */
++#define MMF_DUMP_ANON_PRIVATE	2
++#define MMF_DUMP_ANON_SHARED	3
++#define MMF_DUMP_MAPPED_PRIVATE	4
++#define MMF_DUMP_MAPPED_SHARED	5
++#define MMF_DUMP_ELF_HEADERS	6
++#define MMF_DUMP_HUGETLB_PRIVATE 7
++#define MMF_DUMP_HUGETLB_SHARED  8
++#define MMF_DUMP_DAX_PRIVATE	9
++#define MMF_DUMP_DAX_SHARED	10
++
++#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
++#define MMF_DUMP_FILTER_BITS	9
++#define MMF_DUMP_FILTER_MASK \
++	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
++#define MMF_DUMP_FILTER_DEFAULT \
++	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
++	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
++
++#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
++# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
++#else
++# define MMF_DUMP_MASK_DEFAULT_ELF	0
++#endif
++					/* leave room for more dump flags */
++#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
++#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
++/*
++ * This one-shot flag is dropped due to necessity of changing exe once again
++ * on NFS restore
++ */
++//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
++
++#define MMF_HAS_UPROBES		19	/* has uprobes */
++#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
++#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
++#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
++#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
++#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
++#define MMF_OOM_VICTIM		25	/* mm is the oom victim */
++#define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
++#define MMF_MULTIPROCESS	27	/* mm is shared between processes */
++#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
++
++#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
++				 MMF_DISABLE_THP_MASK)
++
++#endif /* _LINUX_SCHED_COREDUMP_H */
+diff -uprN kernel/include/linux/sched.h kernel_new/include/linux/sched.h
+--- kernel/include/linux/sched.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/sched.h	2021-04-01 18:28:07.803863123 +0800
+@@ -84,7 +84,9 @@ struct task_group;
+ #define TASK_WAKING			0x0200
+ #define TASK_NOLOAD			0x0400
+ #define TASK_NEW			0x0800
+-#define TASK_STATE_MAX			0x1000
++#define TASK_HARDENING			0x1000
++#define TASK_NOWAKEUP			0x2000
++#define TASK_STATE_MAX			0x4000
+ 
+ /* Convenience macros for the sake of set_current_state: */
+ #define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+diff -uprN kernel/include/linux/sched.h.orig kernel_new/include/linux/sched.h.orig
+--- kernel/include/linux/sched.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/include/linux/sched.h.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1931 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_SCHED_H
++#define _LINUX_SCHED_H
++
++/*
++ * Define 'struct task_struct' and provide the main scheduler
++ * APIs (schedule(), wakeup variants, etc.)
++ */
++
++#include <uapi/linux/sched.h>
++
++#include <asm/current.h>
++
++#include <linux/pid.h>
++#include <linux/sem.h>
++#include <linux/shm.h>
++#include <linux/kcov.h>
++#include <linux/mutex.h>
++#include <linux/plist.h>
++#include <linux/hrtimer.h>
++#include <linux/seccomp.h>
++#include <linux/nodemask.h>
++#include <linux/rcupdate.h>
++#include <linux/resource.h>
++#include <linux/latencytop.h>
++#include <linux/sched/prio.h>
++#include <linux/signal_types.h>
++#include <linux/mm_types_task.h>
++#include <linux/task_io_accounting.h>
++#include <linux/rseq.h>
++#include <linux/thread_bits.h>
++
++/* task_struct member predeclarations (sorted alphabetically): */
++struct audit_context;
++struct backing_dev_info;
++struct bio_list;
++struct blk_plug;
++struct cfs_rq;
++struct fs_struct;
++struct futex_pi_state;
++struct io_context;
++struct mempolicy;
++struct nameidata;
++struct nsproxy;
++struct perf_event_context;
++struct pid_namespace;
++struct pipe_inode_info;
++struct rcu_node;
++struct reclaim_state;
++struct robust_list_head;
++struct sched_attr;
++struct sched_param;
++struct seq_file;
++struct sighand_struct;
++struct signal_struct;
++struct task_delay_info;
++struct task_group;
++
++/*
++ * Task state bitmask. NOTE! These bits are also
++ * encoded in fs/proc/array.c: get_task_state().
++ *
++ * We have two separate sets of flags: task->state
++ * is about runnability, while task->exit_state are
++ * about the task exiting. Confusing, but this way
++ * modifying one set can't modify the other one by
++ * mistake.
++ */
++
++/* Used in tsk->state: */
++#define TASK_RUNNING			0x0000
++#define TASK_INTERRUPTIBLE		0x0001
++#define TASK_UNINTERRUPTIBLE		0x0002
++#define __TASK_STOPPED			0x0004
++#define __TASK_TRACED			0x0008
++/* Used in tsk->exit_state: */
++#define EXIT_DEAD			0x0010
++#define EXIT_ZOMBIE			0x0020
++#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
++/* Used in tsk->state again: */
++#define TASK_PARKED			0x0040
++#define TASK_DEAD			0x0080
++#define TASK_WAKEKILL			0x0100
++#define TASK_WAKING			0x0200
++#define TASK_NOLOAD			0x0400
++#define TASK_NEW			0x0800
++#define TASK_STATE_MAX			0x1000
++
++/* Convenience macros for the sake of set_current_state: */
++#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
++#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
++#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
++
++#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
++
++/* Convenience macros for the sake of wake_up(): */
++#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
++
++/* get_task_state(): */
++#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
++					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
++					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
++					 TASK_PARKED)
++
++#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
++
++#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
++
++#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
++
++#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
++					 (task->flags & PF_FROZEN) == 0 && \
++					 (task->state & TASK_NOLOAD) == 0)
++
++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
++
++/*
++ * Special states are those that do not use the normal wait-loop pattern. See
++ * the comment with set_special_state().
++ */
++#define is_special_task_state(state)				\
++	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
++
++#define __set_current_state(state_value)			\
++	do {							\
++		WARN_ON_ONCE(is_special_task_state(state_value));\
++		current->task_state_change = _THIS_IP_;		\
++		current->state = (state_value);			\
++	} while (0)
++
++#define set_current_state(state_value)				\
++	do {							\
++		WARN_ON_ONCE(is_special_task_state(state_value));\
++		current->task_state_change = _THIS_IP_;		\
++		smp_store_mb(current->state, (state_value));	\
++	} while (0)
++
++#define set_special_state(state_value)					\
++	do {								\
++		unsigned long flags; /* may shadow */			\
++		WARN_ON_ONCE(!is_special_task_state(state_value));	\
++		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
++		current->task_state_change = _THIS_IP_;			\
++		current->state = (state_value);				\
++		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
++	} while (0)
++#else
++/*
++ * set_current_state() includes a barrier so that the write of current->state
++ * is correctly serialised wrt the caller's subsequent test of whether to
++ * actually sleep:
++ *
++ *   for (;;) {
++ *	set_current_state(TASK_UNINTERRUPTIBLE);
++ *	if (!need_sleep)
++ *		break;
++ *
++ *	schedule();
++ *   }
++ *   __set_current_state(TASK_RUNNING);
++ *
++ * If the caller does not need such serialisation (because, for instance, the
++ * condition test and condition change and wakeup are under the same lock) then
++ * use __set_current_state().
++ *
++ * The above is typically ordered against the wakeup, which does:
++ *
++ *   need_sleep = false;
++ *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
++ *
++ * where wake_up_state() executes a full memory barrier before accessing the
++ * task state.
++ *
++ * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
++ * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
++ * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
++ *
++ * However, with slightly different timing the wakeup TASK_RUNNING store can
++ * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
++ * a problem either because that will result in one extra go around the loop
++ * and our @cond test will save the day.
++ *
++ * Also see the comments of try_to_wake_up().
++ */
++#define __set_current_state(state_value)				\
++	current->state = (state_value)
++
++#define set_current_state(state_value)					\
++	smp_store_mb(current->state, (state_value))
++
++/*
++ * set_special_state() should be used for those states when the blocking task
++ * can not use the regular condition based wait-loop. In that case we must
++ * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
++ * will not collide with our state change.
++ */
++#define set_special_state(state_value)					\
++	do {								\
++		unsigned long flags; /* may shadow */			\
++		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
++		current->state = (state_value);				\
++		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
++	} while (0)
++
++#endif
++
++/* Task command name length: */
++#define TASK_COMM_LEN			16
++
++extern void scheduler_tick(void);
++
++#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
++
++extern long schedule_timeout(long timeout);
++extern long schedule_timeout_interruptible(long timeout);
++extern long schedule_timeout_killable(long timeout);
++extern long schedule_timeout_uninterruptible(long timeout);
++extern long schedule_timeout_idle(long timeout);
++asmlinkage void schedule(void);
++extern void schedule_preempt_disabled(void);
++
++extern int __must_check io_schedule_prepare(void);
++extern void io_schedule_finish(int token);
++extern long io_schedule_timeout(long timeout);
++extern void io_schedule(void);
++
++/**
++ * struct prev_cputime - snapshot of system and user cputime
++ * @utime: time spent in user mode
++ * @stime: time spent in system mode
++ * @lock: protects the above two fields
++ *
++ * Stores previous user/system time values such that we can guarantee
++ * monotonicity.
++ */
++struct prev_cputime {
++#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
++	u64				utime;
++	u64				stime;
++	raw_spinlock_t			lock;
++#endif
++};
++
++/**
++ * struct task_cputime - collected CPU time counts
++ * @utime:		time spent in user mode, in nanoseconds
++ * @stime:		time spent in kernel mode, in nanoseconds
++ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
++ *
++ * This structure groups together three kinds of CPU time that are tracked for
++ * threads and thread groups.  Most things considering CPU time want to group
++ * these counts together and treat all three of them in parallel.
++ */
++struct task_cputime {
++	u64				utime;
++	u64				stime;
++	unsigned long long		sum_exec_runtime;
++};
++
++/* Alternate field names when used on cache expirations: */
++#define virt_exp			utime
++#define prof_exp			stime
++#define sched_exp			sum_exec_runtime
++
++enum vtime_state {
++	/* Task is sleeping or running in a CPU with VTIME inactive: */
++	VTIME_INACTIVE = 0,
++	/* Task runs in userspace in a CPU with VTIME active: */
++	VTIME_USER,
++	/* Task runs in kernelspace in a CPU with VTIME active: */
++	VTIME_SYS,
++};
++
++struct vtime {
++	seqcount_t		seqcount;
++	unsigned long long	starttime;
++	enum vtime_state	state;
++	u64			utime;
++	u64			stime;
++	u64			gtime;
++};
++
++struct sched_info {
++#ifdef CONFIG_SCHED_INFO
++	/* Cumulative counters: */
++
++	/* # of times we have run on this CPU: */
++	unsigned long			pcount;
++
++	/* Time spent waiting on a runqueue: */
++	unsigned long long		run_delay;
++
++	/* Timestamps: */
++
++	/* When did we last run on a CPU? */
++	unsigned long long		last_arrival;
++
++	/* When were we last queued to run? */
++	unsigned long long		last_queued;
++
++#endif /* CONFIG_SCHED_INFO */
++};
++
++/*
++ * Integer metrics need fixed point arithmetic, e.g., sched/fair
++ * has a few: load, load_avg, util_avg, freq, and capacity.
++ *
++ * We define a basic fixed point arithmetic range, and then formalize
++ * all these metrics based on that basic range.
++ */
++# define SCHED_FIXEDPOINT_SHIFT		10
++# define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)
++
++struct load_weight {
++	unsigned long			weight;
++	u32				inv_weight;
++};
++
++/**
++ * struct util_est - Estimation utilization of FAIR tasks
++ * @enqueued: instantaneous estimated utilization of a task/cpu
++ * @ewma:     the Exponential Weighted Moving Average (EWMA)
++ *            utilization of a task
++ *
++ * Support data structure to track an Exponential Weighted Moving Average
++ * (EWMA) of a FAIR task's utilization. New samples are added to the moving
++ * average each time a task completes an activation. Sample's weight is chosen
++ * so that the EWMA will be relatively insensitive to transient changes to the
++ * task's workload.
++ *
++ * The enqueued attribute has a slightly different meaning for tasks and cpus:
++ * - task:   the task's util_avg at last task dequeue time
++ * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
++ * Thus, the util_est.enqueued of a task represents the contribution on the
++ * estimated utilization of the CPU where that task is currently enqueued.
++ *
++ * Only for tasks we track a moving average of the past instantaneous
++ * estimated utilization. This allows to absorb sporadic drops in utilization
++ * of an otherwise almost periodic task.
++ */
++struct util_est {
++	unsigned int			enqueued;
++	unsigned int			ewma;
++#define UTIL_EST_WEIGHT_SHIFT		2
++} __attribute__((__aligned__(sizeof(u64))));
++
++/*
++ * The load_avg/util_avg accumulates an infinite geometric series
++ * (see __update_load_avg() in kernel/sched/fair.c).
++ *
++ * [load_avg definition]
++ *
++ *   load_avg = runnable% * scale_load_down(load)
++ *
++ * where runnable% is the time ratio that a sched_entity is runnable.
++ * For cfs_rq, it is the aggregated load_avg of all runnable and
++ * blocked sched_entities.
++ *
++ * load_avg may also take frequency scaling into account:
++ *
++ *   load_avg = runnable% * scale_load_down(load) * freq%
++ *
++ * where freq% is the CPU frequency normalized to the highest frequency.
++ *
++ * [util_avg definition]
++ *
++ *   util_avg = running% * SCHED_CAPACITY_SCALE
++ *
++ * where running% is the time ratio that a sched_entity is running on
++ * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
++ * and blocked sched_entities.
++ *
++ * util_avg may also factor frequency scaling and CPU capacity scaling:
++ *
++ *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
++ *
++ * where freq% is the same as above, and capacity% is the CPU capacity
++ * normalized to the greatest capacity (due to uarch differences, etc).
++ *
++ * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
++ * themselves are in the range of [0, 1]. To do fixed point arithmetics,
++ * we therefore scale them to as large a range as necessary. This is for
++ * example reflected by util_avg's SCHED_CAPACITY_SCALE.
++ *
++ * [Overflow issue]
++ *
++ * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
++ * with the highest load (=88761), always runnable on a single cfs_rq,
++ * and should not overflow as the number already hits PID_MAX_LIMIT.
++ *
++ * For all other cases (including 32-bit kernels), struct load_weight's
++ * weight will overflow first before we do, because:
++ *
++ *    Max(load_avg) <= Max(load.weight)
++ *
++ * Then it is the load_weight's responsibility to consider overflow
++ * issues.
++ */
++struct sched_avg {
++	u64				last_update_time;
++	u64				load_sum;
++	u64				runnable_load_sum;
++	u32				util_sum;
++	u32				period_contrib;
++	unsigned long			load_avg;
++	unsigned long			runnable_load_avg;
++	unsigned long			util_avg;
++	struct util_est			util_est;
++} ____cacheline_aligned;
++
++struct sched_statistics {
++#ifdef CONFIG_SCHEDSTATS
++	u64				wait_start;
++	u64				wait_max;
++	u64				wait_count;
++	u64				wait_sum;
++	u64				iowait_count;
++	u64				iowait_sum;
++
++	u64				sleep_start;
++	u64				sleep_max;
++	s64				sum_sleep_runtime;
++
++	u64				block_start;
++	u64				block_max;
++	u64				exec_max;
++	u64				slice_max;
++
++	u64				nr_migrations_cold;
++	u64				nr_failed_migrations_affine;
++	u64				nr_failed_migrations_running;
++	u64				nr_failed_migrations_hot;
++	u64				nr_forced_migrations;
++
++	u64				nr_wakeups;
++	u64				nr_wakeups_sync;
++	u64				nr_wakeups_migrate;
++	u64				nr_wakeups_local;
++	u64				nr_wakeups_remote;
++	u64				nr_wakeups_affine;
++	u64				nr_wakeups_affine_attempts;
++	u64				nr_wakeups_passive;
++	u64				nr_wakeups_idle;
++#endif
++};
++
++struct sched_entity {
++	/* For load-balancing: */
++	struct load_weight		load;
++	unsigned long			runnable_weight;
++	struct rb_node			run_node;
++	struct list_head		group_node;
++	unsigned int			on_rq;
++
++	u64				exec_start;
++	u64				sum_exec_runtime;
++	u64				vruntime;
++	u64				prev_sum_exec_runtime;
++
++	u64				nr_migrations;
++
++	struct sched_statistics		statistics;
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	int				depth;
++	struct sched_entity		*parent;
++	/* rq on which this entity is (to be) queued: */
++	struct cfs_rq			*cfs_rq;
++	/* rq "owned" by this entity/group: */
++	struct cfs_rq			*my_q;
++#endif
++
++#ifdef CONFIG_SMP
++	/*
++	 * Per entity load average tracking.
++	 *
++	 * Put into separate cache line so it does not
++	 * collide with read-mostly values above.
++	 */
++	struct sched_avg		avg;
++#endif
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++	KABI_RESERVE(3)
++	KABI_RESERVE(4)
++};
++
++struct sched_rt_entity {
++	struct list_head		run_list;
++	unsigned long			timeout;
++	unsigned long			watchdog_stamp;
++	unsigned int			time_slice;
++	unsigned short			on_rq;
++	unsigned short			on_list;
++
++	struct sched_rt_entity		*back;
++#ifdef CONFIG_RT_GROUP_SCHED
++	struct sched_rt_entity		*parent;
++	/* rq on which this entity is (to be) queued: */
++	struct rt_rq			*rt_rq;
++	/* rq "owned" by this entity/group: */
++	struct rt_rq			*my_q;
++#endif
++} __randomize_layout;
++
++struct sched_dl_entity {
++	struct rb_node			rb_node;
++
++	/*
++	 * Original scheduling parameters. Copied here from sched_attr
++	 * during sched_setattr(), they will remain the same until
++	 * the next sched_setattr().
++	 */
++	u64				dl_runtime;	/* Maximum runtime for each instance	*/
++	u64				dl_deadline;	/* Relative deadline of each instance	*/
++	u64				dl_period;	/* Separation of two instances (period) */
++	u64				dl_bw;		/* dl_runtime / dl_period		*/
++	u64				dl_density;	/* dl_runtime / dl_deadline		*/
++
++	/*
++	 * Actual scheduling parameters. Initialized with the values above,
++	 * they are continously updated during task execution. Note that
++	 * the remaining runtime could be < 0 in case we are in overrun.
++	 */
++	s64				runtime;	/* Remaining runtime for this instance	*/
++	u64				deadline;	/* Absolute deadline for this instance	*/
++	unsigned int			flags;		/* Specifying the scheduler behaviour	*/
++
++	/*
++	 * Some bool flags:
++	 *
++	 * @dl_throttled tells if we exhausted the runtime. If so, the
++	 * task has to wait for a replenishment to be performed at the
++	 * next firing of dl_timer.
++	 *
++	 * @dl_boosted tells if we are boosted due to DI. If so we are
++	 * outside bandwidth enforcement mechanism (but only until we
++	 * exit the critical section);
++	 *
++	 * @dl_yielded tells if task gave up the CPU before consuming
++	 * all its available runtime during the last job.
++	 *
++	 * @dl_non_contending tells if the task is inactive while still
++	 * contributing to the active utilization. In other words, it
++	 * indicates if the inactive timer has been armed and its handler
++	 * has not been executed yet. This flag is useful to avoid race
++	 * conditions between the inactive timer handler and the wakeup
++	 * code.
++	 *
++	 * @dl_overrun tells if the task asked to be informed about runtime
++	 * overruns.
++	 */
++	unsigned int			dl_throttled      : 1;
++	unsigned int			dl_boosted        : 1;
++	unsigned int			dl_yielded        : 1;
++	unsigned int			dl_non_contending : 1;
++	unsigned int			dl_overrun	  : 1;
++
++	/*
++	 * Bandwidth enforcement timer. Each -deadline task has its
++	 * own bandwidth to be enforced, thus we need one timer per task.
++	 */
++	struct hrtimer			dl_timer;
++
++	/*
++	 * Inactive timer, responsible for decreasing the active utilization
++	 * at the "0-lag time". When a -deadline task blocks, it contributes
++	 * to GRUB's active utilization until the "0-lag time", hence a
++	 * timer is needed to decrease the active utilization at the correct
++	 * time.
++	 */
++	struct hrtimer inactive_timer;
++};
++
++union rcu_special {
++	struct {
++		u8			blocked;
++		u8			need_qs;
++		u8			exp_need_qs;
++
++		/* Otherwise the compiler can store garbage here: */
++		u8			pad;
++	} b; /* Bits. */
++	u32 s; /* Set of bits. */
++};
++
++enum perf_event_task_context {
++	perf_invalid_context = -1,
++	perf_hw_context = 0,
++	perf_sw_context,
++	perf_nr_task_contexts,
++};
++
++struct wake_q_node {
++	struct wake_q_node *next;
++};
++
++struct task_struct {
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	/*
++	 * For reasons of header soup (see current_thread_info()), this
++	 * must be the first element of task_struct.
++	 */
++	struct thread_info		thread_info;
++#endif
++	/* -1 unrunnable, 0 runnable, >0 stopped: */
++	volatile long			state;
++
++	/*
++	 * This begins the randomizable portion of task_struct. Only
++	 * scheduling-critical items should be added above here.
++	 */
++	randomized_struct_fields_start
++
++	void				*stack;
++	atomic_t			usage;
++	/* Per task flags (PF_*), defined further below: */
++	unsigned int			flags;
++	unsigned int			ptrace;
++
++#ifdef CONFIG_SMP
++	struct llist_node		wake_entry;
++	int				on_cpu;
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	/* Current CPU: */
++	unsigned int			cpu;
++#endif
++	unsigned int			wakee_flips;
++	unsigned long			wakee_flip_decay_ts;
++	struct task_struct		*last_wakee;
++
++	/*
++	 * recent_used_cpu is initially set as the last CPU used by a task
++	 * that wakes affine another task. Waker/wakee relationships can
++	 * push tasks around a CPU where each wakeup moves to the next one.
++	 * Tracking a recently used CPU allows a quick search for a recently
++	 * used CPU that may be idle.
++	 */
++	int				recent_used_cpu;
++	int				wake_cpu;
++#endif
++	int				on_rq;
++
++	int				prio;
++	int				static_prio;
++	int				normal_prio;
++	unsigned int			rt_priority;
++
++	const struct sched_class	*sched_class;
++	struct sched_entity		se;
++	struct sched_rt_entity		rt;
++#ifdef CONFIG_CGROUP_SCHED
++	struct task_group		*sched_task_group;
++#endif
++	struct sched_dl_entity		dl;
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	/* List of struct preempt_notifier: */
++	struct hlist_head		preempt_notifiers;
++#endif
++
++#ifdef CONFIG_BLK_DEV_IO_TRACE
++	unsigned int			btrace_seq;
++#endif
++
++	unsigned int			policy;
++	int				nr_cpus_allowed;
++	cpumask_t			cpus_allowed;
++
++#ifdef CONFIG_PREEMPT_RCU
++	int				rcu_read_lock_nesting;
++	union rcu_special		rcu_read_unlock_special;
++	struct list_head		rcu_node_entry;
++	struct rcu_node			*rcu_blocked_node;
++#endif /* #ifdef CONFIG_PREEMPT_RCU */
++
++#ifdef CONFIG_TASKS_RCU
++	unsigned long			rcu_tasks_nvcsw;
++	u8				rcu_tasks_holdout;
++	u8				rcu_tasks_idx;
++	int				rcu_tasks_idle_cpu;
++	struct list_head		rcu_tasks_holdout_list;
++#endif /* #ifdef CONFIG_TASKS_RCU */
++
++	struct sched_info		sched_info;
++
++	struct list_head		tasks;
++#ifdef CONFIG_SMP
++	struct plist_node		pushable_tasks;
++	struct rb_node			pushable_dl_tasks;
++#endif
++
++	struct mm_struct		*mm;
++	struct mm_struct		*active_mm;
++
++	/* Per-thread vma caching: */
++	struct vmacache			vmacache;
++
++#ifdef SPLIT_RSS_COUNTING
++	struct task_rss_stat		rss_stat;
++#endif
++	int				exit_state;
++	int				exit_code;
++	int				exit_signal;
++	/* The signal sent when the parent dies: */
++	int				pdeath_signal;
++	/* JOBCTL_*, siglock protected: */
++	unsigned long			jobctl;
++
++	/* Used for emulating ABI behavior of previous Linux versions: */
++	unsigned int			personality;
++
++	/* Scheduler bits, serialized by scheduler locks: */
++	unsigned			sched_reset_on_fork:1;
++	unsigned			sched_contributes_to_load:1;
++	unsigned			sched_migrated:1;
++	unsigned			sched_remote_wakeup:1;
++	/* Force alignment to the next boundary: */
++	unsigned			:0;
++
++	/* Unserialized, strictly 'current' */
++
++	/* Bit to tell LSMs we're in execve(): */
++	unsigned			in_execve:1;
++	unsigned			in_iowait:1;
++#ifndef TIF_RESTORE_SIGMASK
++	unsigned			restore_sigmask:1;
++#endif
++#ifdef CONFIG_MEMCG
++	unsigned			in_user_fault:1;
++#ifdef CONFIG_MEMCG_KMEM
++	unsigned			memcg_kmem_skip_account:1;
++#endif
++#endif
++#ifdef CONFIG_COMPAT_BRK
++	unsigned			brk_randomized:1;
++#endif
++#ifdef CONFIG_CGROUPS
++	/* disallow userland-initiated cgroup migration */
++	unsigned			no_cgroup_migration:1;
++#endif
++#ifdef CONFIG_BLK_CGROUP
++	/* to be used once the psi infrastructure lands upstream. */
++	unsigned			use_memdelay:1;
++#endif
++
++	unsigned long			atomic_flags; /* Flags requiring atomic access. */
++
++	struct restart_block		restart_block;
++
++	pid_t				pid;
++	pid_t				tgid;
++
++#ifdef CONFIG_STACKPROTECTOR
++	/* Canary value for the -fstack-protector GCC feature: */
++	unsigned long			stack_canary;
++#endif
++	/*
++	 * Pointers to the (original) parent process, youngest child, younger sibling,
++	 * older sibling, respectively.  (p->father can be replaced with
++	 * p->real_parent->pid)
++	 */
++
++	/* Real parent process: */
++	struct task_struct __rcu	*real_parent;
++
++	/* Recipient of SIGCHLD, wait4() reports: */
++	struct task_struct __rcu	*parent;
++
++	/*
++	 * Children/sibling form the list of natural children:
++	 */
++	struct list_head		children;
++	struct list_head		sibling;
++	struct task_struct		*group_leader;
++
++	/*
++	 * 'ptraced' is the list of tasks this task is using ptrace() on.
++	 *
++	 * This includes both natural children and PTRACE_ATTACH targets.
++	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
++	 */
++	struct list_head		ptraced;
++	struct list_head		ptrace_entry;
++
++	/* PID/PID hash table linkage. */
++	struct pid			*thread_pid;
++	struct hlist_node		pid_links[PIDTYPE_MAX];
++	struct list_head		thread_group;
++	struct list_head		thread_node;
++
++	struct completion		*vfork_done;
++
++	/* CLONE_CHILD_SETTID: */
++	int __user			*set_child_tid;
++
++	/* CLONE_CHILD_CLEARTID: */
++	int __user			*clear_child_tid;
++
++	u64				utime;
++	u64				stime;
++#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
++	u64				utimescaled;
++	u64				stimescaled;
++#endif
++	u64				gtime;
++	struct prev_cputime		prev_cputime;
++#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
++	struct vtime			vtime;
++#endif
++
++#ifdef CONFIG_NO_HZ_FULL
++	atomic_t			tick_dep_mask;
++#endif
++	/* Context switch counts: */
++	unsigned long			nvcsw;
++	unsigned long			nivcsw;
++
++	/* Monotonic time in nsecs: */
++	u64				start_time;
++
++	/* Boot based time in nsecs: */
++	u64				real_start_time;
++
++	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
++	unsigned long			min_flt;
++	unsigned long			maj_flt;
++
++#ifdef CONFIG_POSIX_TIMERS
++	struct task_cputime		cputime_expires;
++	struct list_head		cpu_timers[3];
++#endif
++
++	/* Process credentials: */
++
++	/* Tracer's credentials at attach: */
++	const struct cred __rcu		*ptracer_cred;
++
++	/* Objective and real subjective task credentials (COW): */
++	const struct cred __rcu		*real_cred;
++
++	/* Effective (overridable) subjective task credentials (COW): */
++	const struct cred __rcu		*cred;
++
++	/*
++	 * executable name, excluding path.
++	 *
++	 * - normally initialized setup_new_exec()
++	 * - access it with [gs]et_task_comm()
++	 * - lock it with task_lock()
++	 */
++	char				comm[TASK_COMM_LEN];
++
++	struct nameidata		*nameidata;
++
++#ifdef CONFIG_SYSVIPC
++	struct sysv_sem			sysvsem;
++	struct sysv_shm			sysvshm;
++#endif
++#ifdef CONFIG_DETECT_HUNG_TASK
++	unsigned long			last_switch_count;
++	unsigned long			last_switch_time;
++#endif
++	/* Filesystem information: */
++	struct fs_struct		*fs;
++
++	/* Open file information: */
++	struct files_struct		*files;
++
++	/* Namespaces: */
++	struct nsproxy			*nsproxy;
++
++	/* Signal handlers: */
++	struct signal_struct		*signal;
++	struct sighand_struct		*sighand;
++	sigset_t			blocked;
++	sigset_t			real_blocked;
++	/* Restored if set_restore_sigmask() was used: */
++	sigset_t			saved_sigmask;
++	struct sigpending		pending;
++	unsigned long			sas_ss_sp;
++	size_t				sas_ss_size;
++	unsigned int			sas_ss_flags;
++
++	struct callback_head		*task_works;
++
++	struct audit_context		*audit_context;
++#ifdef CONFIG_AUDITSYSCALL
++	kuid_t				loginuid;
++	unsigned int			sessionid;
++#endif
++	struct seccomp			seccomp;
++
++	/* Thread group tracking: */
++	u32				parent_exec_id;
++	u32				self_exec_id;
++
++	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
++	spinlock_t			alloc_lock;
++
++	/* Protection of the PI data structures: */
++	raw_spinlock_t			pi_lock;
++
++	struct wake_q_node		wake_q;
++
++#ifdef CONFIG_RT_MUTEXES
++	/* PI waiters blocked on a rt_mutex held by this task: */
++	struct rb_root_cached		pi_waiters;
++	/* Updated under owner's pi_lock and rq lock */
++	struct task_struct		*pi_top_task;
++	/* Deadlock detection and priority inheritance handling: */
++	struct rt_mutex_waiter		*pi_blocked_on;
++#endif
++
++#ifdef CONFIG_DEBUG_MUTEXES
++	/* Mutex deadlock detection: */
++	struct mutex_waiter		*blocked_on;
++#endif
++
++#ifdef CONFIG_TRACE_IRQFLAGS
++	unsigned int			irq_events;
++	unsigned long			hardirq_enable_ip;
++	unsigned long			hardirq_disable_ip;
++	unsigned int			hardirq_enable_event;
++	unsigned int			hardirq_disable_event;
++	int				hardirqs_enabled;
++	int				hardirq_context;
++	unsigned long			softirq_disable_ip;
++	unsigned long			softirq_enable_ip;
++	unsigned int			softirq_disable_event;
++	unsigned int			softirq_enable_event;
++	int				softirqs_enabled;
++	int				softirq_context;
++#endif
++
++#ifdef CONFIG_LOCKDEP
++# define MAX_LOCK_DEPTH			48UL
++	u64				curr_chain_key;
++	int				lockdep_depth;
++	unsigned int			lockdep_recursion;
++	struct held_lock		held_locks[MAX_LOCK_DEPTH];
++#endif
++
++#ifdef CONFIG_UBSAN
++	unsigned int			in_ubsan;
++#endif
++
++	/* Journalling filesystem info: */
++	void				*journal_info;
++
++	/* Stacked block device info: */
++	struct bio_list			*bio_list;
++
++#ifdef CONFIG_BLOCK
++	/* Stack plugging: */
++	struct blk_plug			*plug;
++#endif
++
++	/* VM state: */
++	struct reclaim_state		*reclaim_state;
++
++	struct backing_dev_info		*backing_dev_info;
++
++	struct io_context		*io_context;
++
++	/* Ptrace state: */
++	unsigned long			ptrace_message;
++	siginfo_t			*last_siginfo;
++
++	struct task_io_accounting	ioac;
++#ifdef CONFIG_TASK_XACCT
++	/* Accumulated RSS usage: */
++	u64				acct_rss_mem1;
++	/* Accumulated virtual memory usage: */
++	u64				acct_vm_mem1;
++	/* stime + utime since last update: */
++	u64				acct_timexpd;
++#endif
++#ifdef CONFIG_CPUSETS
++	/* Protected by ->alloc_lock: */
++	nodemask_t			mems_allowed;
++	/* Seqence number to catch updates: */
++	seqcount_t			mems_allowed_seq;
++	int				cpuset_mem_spread_rotor;
++	int				cpuset_slab_spread_rotor;
++#endif
++#ifdef CONFIG_CGROUPS
++	/* Control Group info protected by css_set_lock: */
++	struct css_set __rcu		*cgroups;
++	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
++	struct list_head		cg_list;
++#endif
++#if defined(CONFIG_RESCTRL) || defined(CONFIG_INTEL_RDT)
++	u32				closid;
++	u32				rmid;
++#endif
++#ifdef CONFIG_FUTEX
++	struct robust_list_head __user	*robust_list;
++#ifdef CONFIG_COMPAT
++	struct compat_robust_list_head __user *compat_robust_list;
++#endif
++	struct list_head		pi_state_list;
++	struct futex_pi_state		*pi_state_cache;
++#endif
++#ifdef CONFIG_PERF_EVENTS
++	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
++	struct mutex			perf_event_mutex;
++	struct list_head		perf_event_list;
++#endif
++#ifdef CONFIG_DEBUG_PREEMPT
++	unsigned long			preempt_disable_ip;
++#endif
++#ifdef CONFIG_NUMA
++	/* Protected by alloc_lock: */
++	struct mempolicy		*mempolicy;
++	short				il_prev;
++	short				pref_node_fork;
++#endif
++#ifdef CONFIG_NUMA_BALANCING
++	int				numa_scan_seq;
++	unsigned int			numa_scan_period;
++	unsigned int			numa_scan_period_max;
++	int				numa_preferred_nid;
++	unsigned long			numa_migrate_retry;
++	/* Migration stamp: */
++	u64				node_stamp;
++	u64				last_task_numa_placement;
++	u64				last_sum_exec_runtime;
++	struct callback_head		numa_work;
++
++	/*
++	 * This pointer is only modified for current in syscall and
++	 * pagefault context (and for tasks being destroyed), so it can be read
++	 * from any of the following contexts:
++	 *  - RCU read-side critical section
++	 *  - current->numa_group from everywhere
++	 *  - task's runqueue locked, task not running
++	 */
++	struct numa_group __rcu		*numa_group;
++
++	/*
++	 * numa_faults is an array split into four regions:
++	 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
++	 * in this precise order.
++	 *
++	 * faults_memory: Exponential decaying average of faults on a per-node
++	 * basis. Scheduling placement decisions are made based on these
++	 * counts. The values remain static for the duration of a PTE scan.
++	 * faults_cpu: Track the nodes the process was running on when a NUMA
++	 * hinting fault was incurred.
++	 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
++	 * during the current scan window. When the scan completes, the counts
++	 * in faults_memory and faults_cpu decay and these values are copied.
++	 */
++	unsigned long			*numa_faults;
++	unsigned long			total_numa_faults;
++
++	/*
++	 * numa_faults_locality tracks if faults recorded during the last
++	 * scan window were remote/local or failed to migrate. The task scan
++	 * period is adapted based on the locality of the faults with different
++	 * weights depending on whether they were shared or private faults
++	 */
++	unsigned long			numa_faults_locality[3];
++
++	unsigned long			numa_pages_migrated;
++#endif /* CONFIG_NUMA_BALANCING */
++
++#ifdef CONFIG_RSEQ
++	struct rseq __user *rseq;
++	u32 rseq_len;
++	u32 rseq_sig;
++	/*
++	 * RmW on rseq_event_mask must be performed atomically
++	 * with respect to preemption.
++	 */
++	unsigned long rseq_event_mask;
++#endif
++
++	struct tlbflush_unmap_batch	tlb_ubc;
++
++	struct rcu_head			rcu;
++
++	/* Cache last used pipe for splice(): */
++	struct pipe_inode_info		*splice_pipe;
++
++	struct page_frag		task_frag;
++
++#ifdef CONFIG_TASK_DELAY_ACCT
++	struct task_delay_info		*delays;
++#endif
++
++#ifdef CONFIG_FAULT_INJECTION
++	int				make_it_fail;
++	unsigned int			fail_nth;
++#endif
++	/*
++	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
++	 * balance_dirty_pages() for a dirty throttling pause:
++	 */
++	int				nr_dirtied;
++	int				nr_dirtied_pause;
++	/* Start of a write-and-pause period: */
++	unsigned long			dirty_paused_when;
++
++#ifdef CONFIG_LATENCYTOP
++	int				latency_record_count;
++	struct latency_record		latency_record[LT_SAVECOUNT];
++#endif
++	/*
++	 * Time slack values; these are used to round up poll() and
++	 * select() etc timeout values. These are in nanoseconds.
++	 */
++	u64				timer_slack_ns;
++	u64				default_timer_slack_ns;
++
++#ifdef CONFIG_KASAN
++	unsigned int			kasan_depth;
++#endif
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	/* Index of current stored address in ret_stack: */
++	int				curr_ret_stack;
++	int				curr_ret_depth;
++
++	/* Stack of return addresses for return function tracing: */
++	struct ftrace_ret_stack		*ret_stack;
++
++	/* Timestamp for last schedule: */
++	unsigned long long		ftrace_timestamp;
++
++	/*
++	 * Number of functions that haven't been traced
++	 * because of depth overrun:
++	 */
++	atomic_t			trace_overrun;
++
++	/* Pause tracing: */
++	atomic_t			tracing_graph_pause;
++#endif
++
++#ifdef CONFIG_TRACING
++	/* State flags for use by tracers: */
++	unsigned long			trace;
++
++	/* Bitmask and counter of trace recursion: */
++	unsigned long			trace_recursion;
++#endif /* CONFIG_TRACING */
++
++#ifdef CONFIG_KCOV
++	/* Coverage collection mode enabled for this task (0 if disabled): */
++	unsigned int			kcov_mode;
++
++	/* Size of the kcov_area: */
++	unsigned int			kcov_size;
++
++	/* Buffer for coverage collection: */
++	void				*kcov_area;
++
++	/* KCOV descriptor wired with this task or NULL: */
++	struct kcov			*kcov;
++#endif
++
++#ifdef CONFIG_MEMCG
++	struct mem_cgroup		*memcg_in_oom;
++	gfp_t				memcg_oom_gfp_mask;
++	int				memcg_oom_order;
++
++	/* Number of pages to reclaim on returning to userland: */
++	unsigned int			memcg_nr_pages_over_high;
++
++	/* Used by memcontrol for targeted memcg charge: */
++	struct mem_cgroup		*active_memcg;
++#endif
++
++#ifdef CONFIG_BLK_CGROUP
++	struct request_queue		*throttle_queue;
++#endif
++
++#ifdef CONFIG_UPROBES
++	struct uprobe_task		*utask;
++#endif
++#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
++	unsigned int			sequential_io;
++	unsigned int			sequential_io_avg;
++#endif
++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
++	unsigned long			task_state_change;
++#endif
++	int				pagefault_disabled;
++#ifdef CONFIG_MMU
++	struct task_struct		*oom_reaper_list;
++#endif
++#ifdef CONFIG_VMAP_STACK
++	struct vm_struct		*stack_vm_area;
++#endif
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	/* A live task holds one reference: */
++	atomic_t			stack_refcount;
++#endif
++#ifdef CONFIG_LIVEPATCH
++	int patch_state;
++#endif
++#ifdef CONFIG_SECURITY
++	/* Used by LSM modules for access restriction: */
++	void				*security;
++#endif
++
++	/*
++	 * New fields for task_struct should be added above here, so that
++	 * they are included in the randomized portion of task_struct.
++	 */
++	randomized_struct_fields_end
++
++#ifndef __GENKSYMS__
++	u64				parent_exec_id_u64;
++	u64				self_exec_id_u64;
++#else
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++#endif
++	KABI_RESERVE(3)
++	KABI_RESERVE(4)
++	KABI_RESERVE(5)
++	KABI_RESERVE(6)
++	KABI_RESERVE(7)
++	KABI_RESERVE(8)
++
++	/* CPU-specific state of this task: */
++	struct thread_struct		thread;
++
++	/*
++	 * WARNING: on x86, 'thread_struct' contains a variable-sized
++	 * structure.  It *MUST* be at the end of 'task_struct'.
++	 *
++	 * Do not put anything below here!
++	 */
++};
++
++static inline struct pid *task_pid(struct task_struct *task)
++{
++	return task->thread_pid;
++}
++
++/*
++ * the helpers to get the task's different pids as they are seen
++ * from various namespaces
++ *
++ * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
++ * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
++ *                     current.
++ * task_xid_nr_ns()  : id seen from the ns specified;
++ *
++ * see also pid_nr() etc in include/linux/pid.h
++ */
++pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
++
++static inline pid_t task_pid_nr(struct task_struct *tsk)
++{
++	return tsk->pid;
++}
++
++static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
++}
++
++static inline pid_t task_pid_vnr(struct task_struct *tsk)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
++}
++
++
++static inline pid_t task_tgid_nr(struct task_struct *tsk)
++{
++	return tsk->tgid;
++}
++
++/**
++ * pid_alive - check that a task structure is not stale
++ * @p: Task structure to be checked.
++ *
++ * Test if a process is not yet dead (at most zombie state)
++ * If pid_alive fails, then pointers within the task structure
++ * can be stale and must not be dereferenced.
++ *
++ * Return: 1 if the process is alive. 0 otherwise.
++ */
++static inline int pid_alive(const struct task_struct *p)
++{
++	return p->thread_pid != NULL;
++}
++
++static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
++}
++
++static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
++}
++
++
++static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
++}
++
++static inline pid_t task_session_vnr(struct task_struct *tsk)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
++}
++
++static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
++}
++
++static inline pid_t task_tgid_vnr(struct task_struct *tsk)
++{
++	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
++}
++
++static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
++{
++	pid_t pid = 0;
++
++	rcu_read_lock();
++	if (pid_alive(tsk))
++		pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
++	rcu_read_unlock();
++
++	return pid;
++}
++
++static inline pid_t task_ppid_nr(const struct task_struct *tsk)
++{
++	return task_ppid_nr_ns(tsk, &init_pid_ns);
++}
++
++/* Obsolete, do not use: */
++static inline pid_t task_pgrp_nr(struct task_struct *tsk)
++{
++	return task_pgrp_nr_ns(tsk, &init_pid_ns);
++}
++
++#define TASK_REPORT_IDLE	(TASK_REPORT + 1)
++#define TASK_REPORT_MAX		(TASK_REPORT_IDLE << 1)
++
++static inline unsigned int task_state_index(struct task_struct *tsk)
++{
++	unsigned int tsk_state = READ_ONCE(tsk->state);
++	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
++
++	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
++
++	if (tsk_state == TASK_IDLE)
++		state = TASK_REPORT_IDLE;
++
++	return fls(state);
++}
++
++static inline char task_index_to_char(unsigned int state)
++{
++	static const char state_char[] = "RSDTtXZPI";
++
++	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
++
++	return state_char[state];
++}
++
++static inline char task_state_to_char(struct task_struct *tsk)
++{
++	return task_index_to_char(task_state_index(tsk));
++}
++
++/**
++ * is_global_init - check if a task structure is init. Since init
++ * is free to have sub-threads we need to check tgid.
++ * @tsk: Task structure to be checked.
++ *
++ * Check if a task structure is the first user space task the kernel created.
++ *
++ * Return: 1 if the task structure is init. 0 otherwise.
++ */
++static inline int is_global_init(struct task_struct *tsk)
++{
++	return task_tgid_nr(tsk) == 1;
++}
++
++extern struct pid *cad_pid;
++
++/*
++ * Per process flags
++ */
++#define PF_IDLE			0x00000002	/* I am an IDLE thread */
++#define PF_EXITING		0x00000004	/* Getting shut down */
++#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
++#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
++#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
++#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
++#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
++#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
++#define PF_DUMPCORE		0x00000200	/* Dumped core */
++#define PF_SIGNALED		0x00000400	/* Killed by a signal */
++#define PF_MEMALLOC		0x00000800	/* Allocating memory */
++#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
++#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
++#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
++#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
++#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
++#define PF_KSWAPD		0x00020000	/* I am kswapd */
++#define PF_MEMALLOC_NOFS	0x00040000	/* All allocation requests will inherit GFP_NOFS */
++#define PF_MEMALLOC_NOIO	0x00080000	/* All allocation requests will inherit GFP_NOIO */
++#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
++#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
++#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
++#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
++#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
++#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
++#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
++#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
++#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
++
++/*
++ * Only the _current_ task can read/write to tsk->flags, but other
++ * tasks can access tsk->flags in readonly mode for example
++ * with tsk_used_math (like during threaded core dumping).
++ * There is however an exception to this rule during ptrace
++ * or during fork: the ptracer task is allowed to write to the
++ * child->flags of its traced child (same goes for fork, the parent
++ * can write to the child->flags), because we're guaranteed the
++ * child is not running and in turn not changing child->flags
++ * at the same time the parent does it.
++ */
++#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
++#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
++#define clear_used_math()			clear_stopped_child_used_math(current)
++#define set_used_math()				set_stopped_child_used_math(current)
++
++#define conditional_stopped_child_used_math(condition, child) \
++	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
++
++#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)
++
++#define copy_to_stopped_child_used_math(child) \
++	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
++
++/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
++#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
++#define used_math()				tsk_used_math(current)
++
++static inline bool is_percpu_thread(void)
++{
++#ifdef CONFIG_SMP
++	return (current->flags & PF_NO_SETAFFINITY) &&
++		(current->nr_cpus_allowed  == 1);
++#else
++	return true;
++#endif
++}
++
++/* Per-process atomic flags. */
++#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
++#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
++#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
++#define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
++#define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
++#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
++#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
++
++#define TASK_PFA_TEST(name, func)					\
++	static inline bool task_##func(struct task_struct *p)		\
++	{ return test_bit(PFA_##name, &p->atomic_flags); }
++
++#define TASK_PFA_SET(name, func)					\
++	static inline void task_set_##func(struct task_struct *p)	\
++	{ set_bit(PFA_##name, &p->atomic_flags); }
++
++#define TASK_PFA_CLEAR(name, func)					\
++	static inline void task_clear_##func(struct task_struct *p)	\
++	{ clear_bit(PFA_##name, &p->atomic_flags); }
++
++TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
++TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
++
++TASK_PFA_TEST(SPREAD_PAGE, spread_page)
++TASK_PFA_SET(SPREAD_PAGE, spread_page)
++TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
++
++TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
++TASK_PFA_SET(SPREAD_SLAB, spread_slab)
++TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
++
++TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
++TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
++TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
++
++TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
++TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
++
++TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
++TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
++
++TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
++
++static inline void
++current_restore_flags(unsigned long orig_flags, unsigned long flags)
++{
++	current->flags &= ~flags;
++	current->flags |= orig_flags & flags;
++}
++
++extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
++extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
++#ifdef CONFIG_SMP
++extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
++extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
++#else
++static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
++{
++}
++static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
++{
++	if (!cpumask_test_cpu(0, new_mask))
++		return -EINVAL;
++	return 0;
++}
++#endif
++
++#ifndef cpu_relax_yield
++#define cpu_relax_yield() cpu_relax()
++#endif
++
++extern int yield_to(struct task_struct *p, bool preempt);
++extern void set_user_nice(struct task_struct *p, long nice);
++extern int task_prio(const struct task_struct *p);
++
++/**
++ * task_nice - return the nice value of a given task.
++ * @p: the task in question.
++ *
++ * Return: The nice value [ -20 ... 0 ... 19 ].
++ */
++static inline int task_nice(const struct task_struct *p)
++{
++	return PRIO_TO_NICE((p)->static_prio);
++}
++
++extern int can_nice(const struct task_struct *p, const int nice);
++extern int task_curr(const struct task_struct *p);
++extern int idle_cpu(int cpu);
++extern int available_idle_cpu(int cpu);
++extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
++extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
++extern int sched_setattr(struct task_struct *, const struct sched_attr *);
++extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
++extern struct task_struct *idle_task(int cpu);
++
++/**
++ * is_idle_task - is the specified task an idle task?
++ * @p: the task in question.
++ *
++ * Return: 1 if @p is an idle task. 0 otherwise.
++ */
++static inline bool is_idle_task(const struct task_struct *p)
++{
++	return !!(p->flags & PF_IDLE);
++}
++
++extern struct task_struct *curr_task(int cpu);
++extern void ia64_set_curr_task(int cpu, struct task_struct *p);
++
++void yield(void);
++
++union thread_union {
++#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK
++	struct task_struct task;
++#endif
++#ifndef CONFIG_THREAD_INFO_IN_TASK
++	struct thread_info thread_info;
++#endif
++	unsigned long stack[THREAD_SIZE/sizeof(long)];
++};
++
++#ifndef CONFIG_THREAD_INFO_IN_TASK
++extern struct thread_info init_thread_info;
++#endif
++
++extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
++
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++static inline struct thread_info *task_thread_info(struct task_struct *task)
++{
++	return &task->thread_info;
++}
++#elif !defined(__HAVE_THREAD_FUNCTIONS)
++# define task_thread_info(task)	((struct thread_info *)(task)->stack)
++#endif
++
++/*
++ * find a task by one of its numerical ids
++ *
++ * find_task_by_pid_ns():
++ *      finds a task by its pid in the specified namespace
++ * find_task_by_vpid():
++ *      finds a task by its virtual pid
++ *
++ * see also find_vpid() etc in include/linux/pid.h
++ */
++
++extern struct task_struct *find_task_by_vpid(pid_t nr);
++extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
++
++/*
++ * find a task by its virtual pid and get the task struct
++ */
++extern struct task_struct *find_get_task_by_vpid(pid_t nr);
++
++extern int wake_up_state(struct task_struct *tsk, unsigned int state);
++extern int wake_up_process(struct task_struct *tsk);
++extern void wake_up_new_task(struct task_struct *tsk);
++
++#ifdef CONFIG_SMP
++extern void kick_process(struct task_struct *tsk);
++#else
++static inline void kick_process(struct task_struct *tsk) { }
++#endif
++
++extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
++
++static inline void set_task_comm(struct task_struct *tsk, const char *from)
++{
++	__set_task_comm(tsk, from, false);
++}
++
++extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
++#define get_task_comm(buf, tsk) ({			\
++	BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);	\
++	__get_task_comm(buf, sizeof(buf), tsk);		\
++})
++
++#ifdef CONFIG_SMP
++void scheduler_ipi(void);
++extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
++#else
++static inline void scheduler_ipi(void) { }
++static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
++{
++	return 1;
++}
++#endif
++
++/*
++ * Set thread flags in other task's structures.
++ * See asm/thread_info.h for TIF_xxxx flags available:
++ */
++static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
++{
++	set_ti_thread_flag(task_thread_info(tsk), flag);
++}
++
++static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
++{
++	clear_ti_thread_flag(task_thread_info(tsk), flag);
++}
++
++static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
++					  bool value)
++{
++	update_ti_thread_flag(task_thread_info(tsk), flag, value);
++}
++
++static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
++{
++	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
++}
++
++static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
++{
++	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
++}
++
++static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
++{
++	return test_ti_thread_flag(task_thread_info(tsk), flag);
++}
++
++static inline void set_tsk_need_resched(struct task_struct *tsk)
++{
++	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
++}
++
++static inline void clear_tsk_need_resched(struct task_struct *tsk)
++{
++	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
++}
++
++static inline int test_tsk_need_resched(struct task_struct *tsk)
++{
++	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
++}
++
++/*
++ * cond_resched() and cond_resched_lock(): latency reduction via
++ * explicit rescheduling in places that are safe. The return
++ * value indicates whether a reschedule was done in fact.
++ * cond_resched_lock() will drop the spinlock before scheduling,
++ */
++#ifndef CONFIG_PREEMPT
++extern int _cond_resched(void);
++#else
++static inline int _cond_resched(void) { return 0; }
++#endif
++
++#define cond_resched() ({			\
++	___might_sleep(__FILE__, __LINE__, 0);	\
++	_cond_resched();			\
++})
++
++extern int __cond_resched_lock(spinlock_t *lock);
++
++#define cond_resched_lock(lock) ({				\
++	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
++	__cond_resched_lock(lock);				\
++})
++
++static inline void cond_resched_rcu(void)
++{
++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
++	rcu_read_unlock();
++	cond_resched();
++	rcu_read_lock();
++#endif
++}
++
++/*
++ * Does a critical section need to be broken due to another
++ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
++ * but a general need for low latency)
++ */
++static inline int spin_needbreak(spinlock_t *lock)
++{
++#ifdef CONFIG_PREEMPT
++	return spin_is_contended(lock);
++#else
++	return 0;
++#endif
++}
++
++static __always_inline bool need_resched(void)
++{
++	return unlikely(tif_need_resched());
++}
++
++/*
++ * Wrappers for p->thread_info->cpu access. No-op on UP.
++ */
++#ifdef CONFIG_SMP
++
++static inline unsigned int task_cpu(const struct task_struct *p)
++{
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	return READ_ONCE(p->cpu);
++#else
++	return READ_ONCE(task_thread_info(p)->cpu);
++#endif
++}
++
++extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
++
++#else
++
++static inline unsigned int task_cpu(const struct task_struct *p)
++{
++	return 0;
++}
++
++static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++}
++
++#endif /* CONFIG_SMP */
++
++/*
++ * In order to reduce various lock holder preemption latencies provide an
++ * interface to see if a vCPU is currently running or not.
++ *
++ * This allows us to terminate optimistic spin loops and block, analogous to
++ * the native optimistic spin heuristic of testing if the lock owner task is
++ * running or not.
++ */
++#ifndef vcpu_is_preempted
++# define vcpu_is_preempted(cpu)	false
++#endif
++
++extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
++extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
++
++#ifndef TASK_SIZE_OF
++#define TASK_SIZE_OF(tsk)	TASK_SIZE
++#endif
++
++#ifdef CONFIG_RSEQ
++
++/*
++ * Map the event mask on the user-space ABI enum rseq_cs_flags
++ * for direct mask checks.
++ */
++enum rseq_event_mask_bits {
++	RSEQ_EVENT_PREEMPT_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
++	RSEQ_EVENT_SIGNAL_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
++	RSEQ_EVENT_MIGRATE_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
++};
++
++enum rseq_event_mask {
++	RSEQ_EVENT_PREEMPT	= (1U << RSEQ_EVENT_PREEMPT_BIT),
++	RSEQ_EVENT_SIGNAL	= (1U << RSEQ_EVENT_SIGNAL_BIT),
++	RSEQ_EVENT_MIGRATE	= (1U << RSEQ_EVENT_MIGRATE_BIT),
++};
++
++static inline void rseq_set_notify_resume(struct task_struct *t)
++{
++	if (t->rseq)
++		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++}
++
++void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
++
++static inline void rseq_handle_notify_resume(struct ksignal *ksig,
++					     struct pt_regs *regs)
++{
++	if (current->rseq)
++		__rseq_handle_notify_resume(ksig, regs);
++}
++
++static inline void rseq_signal_deliver(struct ksignal *ksig,
++				       struct pt_regs *regs)
++{
++	preempt_disable();
++	__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
++	preempt_enable();
++	rseq_handle_notify_resume(ksig, regs);
++}
++
++/* rseq_preempt() requires preemption to be disabled. */
++static inline void rseq_preempt(struct task_struct *t)
++{
++	__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
++	rseq_set_notify_resume(t);
++}
++
++/* rseq_migrate() requires preemption to be disabled. */
++static inline void rseq_migrate(struct task_struct *t)
++{
++	__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
++	rseq_set_notify_resume(t);
++}
++
++/*
++ * If parent process has a registered restartable sequences area, the
++ * child inherits. Only applies when forking a process, not a thread.
++ */
++static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
++{
++	if (clone_flags & CLONE_THREAD) {
++		t->rseq = NULL;
++		t->rseq_len = 0;
++		t->rseq_sig = 0;
++		t->rseq_event_mask = 0;
++	} else {
++		t->rseq = current->rseq;
++		t->rseq_len = current->rseq_len;
++		t->rseq_sig = current->rseq_sig;
++		t->rseq_event_mask = current->rseq_event_mask;
++	}
++}
++
++static inline void rseq_execve(struct task_struct *t)
++{
++	t->rseq = NULL;
++	t->rseq_len = 0;
++	t->rseq_sig = 0;
++	t->rseq_event_mask = 0;
++}
++
++#else
++
++static inline void rseq_set_notify_resume(struct task_struct *t)
++{
++}
++static inline void rseq_handle_notify_resume(struct ksignal *ksig,
++					     struct pt_regs *regs)
++{
++}
++static inline void rseq_signal_deliver(struct ksignal *ksig,
++				       struct pt_regs *regs)
++{
++}
++static inline void rseq_preempt(struct task_struct *t)
++{
++}
++static inline void rseq_migrate(struct task_struct *t)
++{
++}
++static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
++{
++}
++static inline void rseq_execve(struct task_struct *t)
++{
++}
++
++#endif
++
++#ifdef CONFIG_DEBUG_RSEQ
++
++void rseq_syscall(struct pt_regs *regs);
++
++#else
++
++static inline void rseq_syscall(struct pt_regs *regs)
++{
++}
++
++#endif
++
++#endif
+diff -uprN kernel/include/linux/spinlock_api_smp.h kernel_new/include/linux/spinlock_api_smp.h
+--- kernel/include/linux/spinlock_api_smp.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/spinlock_api_smp.h	2021-04-01 18:28:07.803863123 +0800
+@@ -99,7 +99,9 @@ static inline int __raw_spin_trylock(raw
+  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+  */
+-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
++#if !defined(CONFIG_GENERIC_LOCKBREAK) ||	\
++	defined(CONFIG_DEBUG_LOCK_ALLOC) ||	\
++	defined(CONFIG_IPIPE)
+ 
+ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
+ {
+@@ -113,7 +115,7 @@ static inline unsigned long __raw_spin_l
+ 	 * do_raw_spin_lock_flags() code, because lockdep assumes
+ 	 * that interrupts are not re-enabled during lock-acquire:
+ 	 */
+-#ifdef CONFIG_LOCKDEP
++#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
+ 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
+ #else
+ 	do_raw_spin_lock_flags(lock, &flags);
+diff -uprN kernel/include/linux/spinlock.h kernel_new/include/linux/spinlock.h
+--- kernel/include/linux/spinlock.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/spinlock.h	2021-04-01 18:28:07.803863123 +0800
+@@ -90,10 +90,12 @@
+ # include <linux/spinlock_up.h>
+ #endif
+ 
++#include <linux/ipipe_lock.h>
++
+ #ifdef CONFIG_DEBUG_SPINLOCK
+   extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+ 				   struct lock_class_key *key);
+-# define raw_spin_lock_init(lock)				\
++# define __real_raw_spin_lock_init(lock)			\
+ do {								\
+ 	static struct lock_class_key __key;			\
+ 								\
+@@ -101,11 +103,14 @@ do {								\
+ } while (0)
+ 
+ #else
+-# define raw_spin_lock_init(lock)				\
++# define __real_raw_spin_lock_init(lock)			\
+ 	do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
+ #endif
++#define raw_spin_lock_init(lock)	PICK_SPINOP(_lock_init, lock)
+ 
+-#define raw_spin_is_locked(lock)	arch_spin_is_locked(&(lock)->raw_lock)
++#define __real_raw_spin_is_locked(lock)				\
++	arch_spin_is_locked(&(lock)->raw_lock)
++#define raw_spin_is_locked(lock)	PICK_SPINOP_RET(_is_locked, lock, int)
+ 
+ #ifdef arch_spin_is_contended
+ #define raw_spin_is_contended(lock)	arch_spin_is_contended(&(lock)->raw_lock)
+@@ -209,9 +214,11 @@ static inline void do_raw_spin_unlock(ra
+  * various methods are defined as nops in the case they are not
+  * required.
+  */
+-#define raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
++#define __real_raw_spin_trylock(lock)	__cond_lock(lock, _raw_spin_trylock(lock))
++#define raw_spin_trylock(lock)		PICK_SPINOP_RET(_trylock, lock, int)
+ 
+-#define raw_spin_lock(lock)	_raw_spin_lock(lock)
++#define __real_raw_spin_lock(lock)	_raw_spin_lock(lock)
++#define raw_spin_lock(lock)		PICK_SPINOP(_lock, lock)
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ # define raw_spin_lock_nested(lock, subclass) \
+@@ -235,7 +242,7 @@ static inline void do_raw_spin_unlock(ra
+ 
+ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ 
+-#define raw_spin_lock_irqsave(lock, flags)			\
++#define __real_raw_spin_lock_irqsave(lock, flags)	\
+ 	do {						\
+ 		typecheck(unsigned long, flags);	\
+ 		flags = _raw_spin_lock_irqsave(lock);	\
+@@ -257,7 +264,7 @@ static inline void do_raw_spin_unlock(ra
+ 
+ #else
+ 
+-#define raw_spin_lock_irqsave(lock, flags)		\
++#define __real_raw_spin_lock_irqsave(lock, flags)	\
+ 	do {						\
+ 		typecheck(unsigned long, flags);	\
+ 		_raw_spin_lock_irqsave(lock, flags);	\
+@@ -268,34 +275,46 @@ static inline void do_raw_spin_unlock(ra
+ 
+ #endif
+ 
+-#define raw_spin_lock_irq(lock)		_raw_spin_lock_irq(lock)
++#define raw_spin_lock_irqsave(lock, flags)  \
++	PICK_SPINLOCK_IRQSAVE(lock, flags)
++
++#define __real_raw_spin_lock_irq(lock)	_raw_spin_lock_irq(lock)
++#define raw_spin_lock_irq(lock)		PICK_SPINOP(_lock_irq, lock)
+ #define raw_spin_lock_bh(lock)		_raw_spin_lock_bh(lock)
+-#define raw_spin_unlock(lock)		_raw_spin_unlock(lock)
+-#define raw_spin_unlock_irq(lock)	_raw_spin_unlock_irq(lock)
++#define __real_raw_spin_unlock(lock)	_raw_spin_unlock(lock)
++#define raw_spin_unlock(lock)		PICK_SPINOP(_unlock, lock)
++#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock)
++#define raw_spin_unlock_irq(lock)	PICK_SPINOP(_unlock_irq, lock)
+ 
+-#define raw_spin_unlock_irqrestore(lock, flags)		\
++#define __real_raw_spin_unlock_irqrestore(lock, flags)		\
+ 	do {							\
+ 		typecheck(unsigned long, flags);		\
+ 		_raw_spin_unlock_irqrestore(lock, flags);	\
+ 	} while (0)
++#define raw_spin_unlock_irqrestore(lock, flags)	\
++	PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
++
+ #define raw_spin_unlock_bh(lock)	_raw_spin_unlock_bh(lock)
+ 
+ #define raw_spin_trylock_bh(lock) \
+ 	__cond_lock(lock, _raw_spin_trylock_bh(lock))
+ 
+-#define raw_spin_trylock_irq(lock) \
++#define __real_raw_spin_trylock_irq(lock) \
+ ({ \
+ 	local_irq_disable(); \
+-	raw_spin_trylock(lock) ? \
++	__real_raw_spin_trylock(lock) ? \
+ 	1 : ({ local_irq_enable(); 0;  }); \
+ })
++#define raw_spin_trylock_irq(lock)	PICK_SPINTRYLOCK_IRQ(lock)
+ 
+-#define raw_spin_trylock_irqsave(lock, flags) \
++#define __real_raw_spin_trylock_irqsave(lock, flags) \
+ ({ \
+ 	local_irq_save(flags); \
+ 	raw_spin_trylock(lock) ? \
+ 	1 : ({ local_irq_restore(flags); 0; }); \
+ })
++#define raw_spin_trylock_irqsave(lock, flags)	\
++	PICK_SPINTRYLOCK_IRQSAVE(lock, flags)
+ 
+ /* Include rwlock functions */
+ #include <linux/rwlock.h>
+@@ -320,24 +339,17 @@ static __always_inline raw_spinlock_t *s
+ 
+ #define spin_lock_init(_lock)				\
+ do {							\
+-	spinlock_check(_lock);				\
+-	raw_spin_lock_init(&(_lock)->rlock);		\
++	raw_spin_lock_init(_lock);			\
+ } while (0)
+ 
+-static __always_inline void spin_lock(spinlock_t *lock)
+-{
+-	raw_spin_lock(&lock->rlock);
+-}
++#define spin_lock(lock)		raw_spin_lock(lock)
+ 
+ static __always_inline void spin_lock_bh(spinlock_t *lock)
+ {
+ 	raw_spin_lock_bh(&lock->rlock);
+ }
+ 
+-static __always_inline int spin_trylock(spinlock_t *lock)
+-{
+-	return raw_spin_trylock(&lock->rlock);
+-}
++#define spin_trylock(lock)	raw_spin_trylock(lock)
+ 
+ #define spin_lock_nested(lock, subclass)			\
+ do {								\
+@@ -349,14 +361,11 @@ do {									\
+ 	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
+ } while (0)
+ 
+-static __always_inline void spin_lock_irq(spinlock_t *lock)
+-{
+-	raw_spin_lock_irq(&lock->rlock);
+-}
++#define spin_lock_irq(lock)	raw_spin_lock_irq(lock)
+ 
+ #define spin_lock_irqsave(lock, flags)				\
+ do {								\
+-	raw_spin_lock_irqsave(spinlock_check(lock), flags);	\
++	raw_spin_lock_irqsave(lock, flags);			\
+ } while (0)
+ 
+ #define spin_lock_irqsave_nested(lock, flags, subclass)			\
+@@ -364,39 +373,28 @@ do {									\
+ 	raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
+ } while (0)
+ 
+-static __always_inline void spin_unlock(spinlock_t *lock)
+-{
+-	raw_spin_unlock(&lock->rlock);
+-}
++#define spin_unlock(lock)	raw_spin_unlock(lock)
+ 
+ static __always_inline void spin_unlock_bh(spinlock_t *lock)
+ {
+ 	raw_spin_unlock_bh(&lock->rlock);
+ }
+ 
+-static __always_inline void spin_unlock_irq(spinlock_t *lock)
+-{
+-	raw_spin_unlock_irq(&lock->rlock);
+-}
++#define spin_unlock_irq(lock)	raw_spin_unlock_irq(lock)
+ 
+-static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+-{
+-	raw_spin_unlock_irqrestore(&lock->rlock, flags);
+-}
++#define spin_unlock_irqrestore(lock, flags)	\
++	raw_spin_unlock_irqrestore(lock, flags)
+ 
+ static __always_inline int spin_trylock_bh(spinlock_t *lock)
+ {
+ 	return raw_spin_trylock_bh(&lock->rlock);
+ }
+ 
+-static __always_inline int spin_trylock_irq(spinlock_t *lock)
+-{
+-	return raw_spin_trylock_irq(&lock->rlock);
+-}
++#define spin_trylock_irq(lock)	raw_spin_trylock_irq(lock)
+ 
+ #define spin_trylock_irqsave(lock, flags)			\
+ ({								\
+-	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
++	raw_spin_trylock_irqsave(lock, flags);			\
+ })
+ 
+ /**
+diff -uprN kernel/include/linux/spinlock_up.h kernel_new/include/linux/spinlock_up.h
+--- kernel/include/linux/spinlock_up.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/spinlock_up.h	2021-04-01 18:28:07.804863121 +0800
+@@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch
+ 	lock->slock = 1;
+ }
+ 
+-/*
+- * Read-write spinlocks. No debug version.
+- */
+-#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
+-#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
+-#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
+-#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
+-#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
+-#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
+-
+ #else /* DEBUG_SPINLOCK */
+ #define arch_spin_is_locked(lock)	((void)(lock), 0)
+ /* for sched/core.c and kernel_lock.c: */
+@@ -67,6 +57,13 @@ static inline void arch_spin_unlock(arch
+ # define arch_spin_trylock(lock)	({ barrier(); (void)(lock); 1; })
+ #endif /* DEBUG_SPINLOCK */
+ 
++#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
++#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
++#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
++#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
++#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
++#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
++
+ #define arch_spin_is_contended(lock)	(((void)(lock), 0))
+ 
+ #endif /* __LINUX_SPINLOCK_UP_H */
+diff -uprN kernel/include/linux/stop_machine.h kernel_new/include/linux/stop_machine.h
+--- kernel/include/linux/stop_machine.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/include/linux/stop_machine.h	2021-04-01 18:28:07.804863121 +0800
+@@ -138,13 +138,17 @@ int stop_machine_from_inactive_cpu(cpu_s
+ 				   const struct cpumask *cpus);
+ #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
+ 
++#include <linux/interrupt.h>
++
+ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+ 					  const struct cpumask *cpus)
+ {
+ 	unsigned long flags;
+ 	int ret;
+ 	local_irq_save(flags);
++	hard_irq_disable();
+ 	ret = fn(data);
++	hard_irq_enable();
+ 	local_irq_restore(flags);
+ 	return ret;
+ }
+diff -uprN kernel/init/Kconfig kernel_new/init/Kconfig
+--- kernel/init/Kconfig	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/init/Kconfig	2021-04-01 18:28:07.804863121 +0800
+@@ -1291,6 +1291,18 @@ config PRINTK_NMI
+ 	depends on PRINTK
+ 	depends on HAVE_NMI
+ 
++config RAW_PRINTK
++       bool "Enable support for raw printk"
++       default n
++       help
++         This option enables a printk variant called raw_printk() for
++         writing all output unmodified to a raw console channel
++         immediately, without any header or preparation whatsoever,
++         usable from any context.
++
++	 Unlike early_printk() console devices, raw_printk() devices
++         can live past the boot sequence.
++
+ config BUG
+ 	bool "BUG() support" if EXPERT
+ 	default y
+diff -uprN kernel/init/Kconfig.orig kernel_new/init/Kconfig.orig
+--- kernel/init/Kconfig.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/init/Kconfig.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2046 @@
++config DEFCONFIG_LIST
++	string
++	depends on !UML
++	option defconfig_list
++	default "/lib/modules/$(shell,uname -r)/.config"
++	default "/etc/kernel-config"
++	default "/boot/config-$(shell,uname -r)"
++	default ARCH_DEFCONFIG
++	default "arch/$(ARCH)/defconfig"
++
++config CC_IS_GCC
++	def_bool $(success,$(CC) --version | head -n 1 | grep -q gcc)
++
++config GCC_VERSION
++	int
++	default $(shell,$(srctree)/scripts/gcc-version.sh -p $(CC) | sed 's/^0*//') if CC_IS_GCC
++	default 0
++
++config CC_IS_CLANG
++	def_bool $(success,$(CC) --version | head -n 1 | grep -q clang)
++
++config CLANG_VERSION
++	int
++	default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
++
++config CC_HAS_ASM_GOTO
++	def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
++
++config CONSTRUCTORS
++	bool
++	depends on !UML
++
++config IRQ_WORK
++	bool
++
++config BUILDTIME_EXTABLE_SORT
++	bool
++
++config THREAD_INFO_IN_TASK
++	bool
++	help
++	  Select this to move thread_info off the stack into task_struct.  To
++	  make this work, an arch will need to remove all thread_info fields
++	  except flags and fix any runtime bugs.
++
++	  One subtle change that will be needed is to use try_get_task_stack()
++	  and put_task_stack() in save_thread_stack_tsk() and get_wchan().
++
++menu "General setup"
++
++config BROKEN
++	bool
++
++config BROKEN_ON_SMP
++	bool
++	depends on BROKEN || !SMP
++	default y
++
++config INIT_ENV_ARG_LIMIT
++	int
++	default 32 if !UML
++	default 128 if UML
++	help
++	  Maximum of each of the number of arguments and environment
++	  variables passed to init from the kernel command line.
++
++config COMPILE_TEST
++	bool "Compile also drivers which will not load"
++	depends on !UML
++	default n
++	help
++	  Some drivers can be compiled on a different platform than they are
++	  intended to be run on. Despite they cannot be loaded there (or even
++	  when they load they cannot be used due to missing HW support),
++	  developers still, opposing to distributors, might want to build such
++	  drivers to compile-test them.
++
++	  If you are a developer and want to build everything available, say Y
++	  here. If you are a user/distributor, say N here to exclude useless
++	  drivers to be distributed.
++
++config LOCALVERSION
++	string "Local version - append to kernel release"
++	help
++	  Append an extra string to the end of your kernel version.
++	  This will show up when you type uname, for example.
++	  The string you set here will be appended after the contents of
++	  any files with a filename matching localversion* in your
++	  object and source tree, in that order.  Your total string can
++	  be a maximum of 64 characters.
++
++config LOCALVERSION_AUTO
++	bool "Automatically append version information to the version string"
++	default y
++	depends on !COMPILE_TEST
++	help
++	  This will try to automatically determine if the current tree is a
++	  release tree by looking for git tags that belong to the current
++	  top of tree revision.
++
++	  A string of the format -gxxxxxxxx will be added to the localversion
++	  if a git-based tree is found.  The string generated by this will be
++	  appended after any matching localversion* files, and after the value
++	  set in CONFIG_LOCALVERSION.
++
++	  (The actual string used here is the first eight characters produced
++	  by running the command:
++
++	    $ git rev-parse --verify HEAD
++
++	  which is done within the script "scripts/setlocalversion".)
++
++config BUILD_SALT
++       string "Build ID Salt"
++       default ""
++       help
++          The build ID is used to link binaries and their debug info. Setting
++          this option will use the value in the calculation of the build id.
++          This is mostly useful for distributions which want to ensure the
++          build is unique between builds. It's safe to leave the default.
++
++config HAVE_KERNEL_GZIP
++	bool
++
++config HAVE_KERNEL_BZIP2
++	bool
++
++config HAVE_KERNEL_LZMA
++	bool
++
++config HAVE_KERNEL_XZ
++	bool
++
++config HAVE_KERNEL_LZO
++	bool
++
++config HAVE_KERNEL_LZ4
++	bool
++
++config HAVE_KERNEL_UNCOMPRESSED
++	bool
++
++choice
++	prompt "Kernel compression mode"
++	default KERNEL_GZIP
++	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED
++	help
++	  The linux kernel is a kind of self-extracting executable.
++	  Several compression algorithms are available, which differ
++	  in efficiency, compression and decompression speed.
++	  Compression speed is only relevant when building a kernel.
++	  Decompression speed is relevant at each boot.
++
++	  If you have any problems with bzip2 or lzma compressed
++	  kernels, mail me (Alain Knaff) <alain@knaff.lu>. (An older
++	  version of this functionality (bzip2 only), for 2.4, was
++	  supplied by Christian Ludwig)
++
++	  High compression options are mostly useful for users, who
++	  are low on disk space (embedded systems), but for whom ram
++	  size matters less.
++
++	  If in doubt, select 'gzip'
++
++config KERNEL_GZIP
++	bool "Gzip"
++	depends on HAVE_KERNEL_GZIP
++	help
++	  The old and tried gzip compression. It provides a good balance
++	  between compression ratio and decompression speed.
++
++config KERNEL_BZIP2
++	bool "Bzip2"
++	depends on HAVE_KERNEL_BZIP2
++	help
++	  Its compression ratio and speed is intermediate.
++	  Decompression speed is slowest among the choices.  The kernel
++	  size is about 10% smaller with bzip2, in comparison to gzip.
++	  Bzip2 uses a large amount of memory. For modern kernels you
++	  will need at least 8MB RAM or more for booting.
++
++config KERNEL_LZMA
++	bool "LZMA"
++	depends on HAVE_KERNEL_LZMA
++	help
++	  This compression algorithm's ratio is best.  Decompression speed
++	  is between gzip and bzip2.  Compression is slowest.
++	  The kernel size is about 33% smaller with LZMA in comparison to gzip.
++
++config KERNEL_XZ
++	bool "XZ"
++	depends on HAVE_KERNEL_XZ
++	help
++	  XZ uses the LZMA2 algorithm and instruction set specific
++	  BCJ filters which can improve compression ratio of executable
++	  code. The size of the kernel is about 30% smaller with XZ in
++	  comparison to gzip. On architectures for which there is a BCJ
++	  filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
++	  will create a few percent smaller kernel than plain LZMA.
++
++	  The speed is about the same as with LZMA: The decompression
++	  speed of XZ is better than that of bzip2 but worse than gzip
++	  and LZO. Compression is slow.
++
++config KERNEL_LZO
++	bool "LZO"
++	depends on HAVE_KERNEL_LZO
++	help
++	  Its compression ratio is the poorest among the choices. The kernel
++	  size is about 10% bigger than gzip; however its speed
++	  (both compression and decompression) is the fastest.
++
++config KERNEL_LZ4
++	bool "LZ4"
++	depends on HAVE_KERNEL_LZ4
++	help
++	  LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
++	  A preliminary version of LZ4 de/compression tool is available at
++	  <https://code.google.com/p/lz4/>.
++
++	  Its compression ratio is worse than LZO. The size of the kernel
++	  is about 8% bigger than LZO. But the decompression speed is
++	  faster than LZO.
++
++config KERNEL_UNCOMPRESSED
++	bool "None"
++	depends on HAVE_KERNEL_UNCOMPRESSED
++	help
++	  Produce uncompressed kernel image. This option is usually not what
++	  you want. It is useful for debugging the kernel in slow simulation
++	  environments, where decompressing and moving the kernel is awfully
++	  slow. This option allows early boot code to skip the decompressor
++	  and jump right at uncompressed kernel image.
++
++endchoice
++
++config DEFAULT_HOSTNAME
++	string "Default hostname"
++	default "(none)"
++	help
++	  This option determines the default system hostname before userspace
++	  calls sethostname(2). The kernel traditionally uses "(none)" here,
++	  but you may wish to use a different default here to make a minimal
++	  system more usable with less configuration.
++
++#
++# For some reason microblaze and nios2 hard code SWAP=n.  Hopefully we can
++# add proper SWAP support to them, in which case this can be remove.
++#
++config ARCH_NO_SWAP
++	bool
++
++config SWAP
++	bool "Support for paging of anonymous memory (swap)"
++	depends on MMU && BLOCK && !ARCH_NO_SWAP
++	default y
++	help
++	  This option allows you to choose whether you want to have support
++	  for so called swap devices or swap files in your kernel that are
++	  used to provide more virtual memory than the actual RAM present
++	  in your computer.  If unsure say Y.
++
++config SYSVIPC
++	bool "System V IPC"
++	---help---
++	  Inter Process Communication is a suite of library functions and
++	  system calls which let processes (running programs) synchronize and
++	  exchange information. It is generally considered to be a good thing,
++	  and some programs won't run unless you say Y here. In particular, if
++	  you want to run the DOS emulator dosemu under Linux (read the
++	  DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
++	  you'll need to say Y here.
++
++	  You can find documentation about IPC with "info ipc" and also in
++	  section 6.4 of the Linux Programmer's Guide, available from
++	  <http://www.tldp.org/guides.html>.
++
++config SYSVIPC_SYSCTL
++	bool
++	depends on SYSVIPC
++	depends on SYSCTL
++	default y
++
++config POSIX_MQUEUE
++	bool "POSIX Message Queues"
++	depends on NET
++	---help---
++	  POSIX variant of message queues is a part of IPC. In POSIX message
++	  queues every message has a priority which decides about succession
++	  of receiving it by a process. If you want to compile and run
++	  programs written e.g. for Solaris with use of its POSIX message
++	  queues (functions mq_*) say Y here.
++
++	  POSIX message queues are visible as a filesystem called 'mqueue'
++	  and can be mounted somewhere if you want to do filesystem
++	  operations on message queues.
++
++	  If unsure, say Y.
++
++config POSIX_MQUEUE_SYSCTL
++	bool
++	depends on POSIX_MQUEUE
++	depends on SYSCTL
++	default y
++
++config CROSS_MEMORY_ATTACH
++	bool "Enable process_vm_readv/writev syscalls"
++	depends on MMU
++	default y
++	help
++	  Enabling this option adds the system calls process_vm_readv and
++	  process_vm_writev which allow a process with the correct privileges
++	  to directly read from or write to another process' address space.
++	  See the man page for more details.
++
++config USELIB
++	bool "uselib syscall"
++	def_bool ALPHA || M68K || SPARC || X86_32 || IA32_EMULATION
++	help
++	  This option enables the uselib syscall, a system call used in the
++	  dynamic linker from libc5 and earlier.  glibc does not use this
++	  system call.  If you intend to run programs built on libc5 or
++	  earlier, you may need to enable this syscall.  Current systems
++	  running glibc can safely disable this.
++
++config AUDIT
++	bool "Auditing support"
++	depends on NET
++	help
++	  Enable auditing infrastructure that can be used with another
++	  kernel subsystem, such as SELinux (which requires this for
++	  logging of avc messages output).  System call auditing is included
++	  on architectures which support it.
++
++config HAVE_ARCH_AUDITSYSCALL
++	bool
++
++config AUDITSYSCALL
++	def_bool y
++	depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
++
++config AUDIT_WATCH
++	def_bool y
++	depends on AUDITSYSCALL
++	select FSNOTIFY
++
++config AUDIT_TREE
++	def_bool y
++	depends on AUDITSYSCALL
++	select FSNOTIFY
++
++config KTASK
++	bool "Multithread CPU-intensive kernel work"
++	depends on SMP
++	default y
++	help
++	  Parallelize CPU-intensive kernel work.  This feature is designed for
++          big machines that can take advantage of their extra CPUs to speed up
++	  large kernel tasks.  When enabled, kworker threads may occupy more
++          CPU time during these kernel tasks, but these threads are throttled
++          when other tasks on the system need CPU time.
++
++source "kernel/irq/Kconfig"
++source "kernel/time/Kconfig"
++source "kernel/Kconfig.preempt"
++
++menu "CPU/Task time and stats accounting"
++
++config VIRT_CPU_ACCOUNTING
++	bool
++
++choice
++	prompt "Cputime accounting"
++	default TICK_CPU_ACCOUNTING if !PPC64
++	default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
++
++# Kind of a stub config for the pure tick based cputime accounting
++config TICK_CPU_ACCOUNTING
++	bool "Simple tick based cputime accounting"
++	depends on !S390 && !NO_HZ_FULL
++	help
++	  This is the basic tick based cputime accounting that maintains
++	  statistics about user, system and idle time spent on per jiffies
++	  granularity.
++
++	  If unsure, say Y.
++
++config VIRT_CPU_ACCOUNTING_NATIVE
++	bool "Deterministic task and CPU time accounting"
++	depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL
++	select VIRT_CPU_ACCOUNTING
++	help
++	  Select this option to enable more accurate task and CPU time
++	  accounting.  This is done by reading a CPU counter on each
++	  kernel entry and exit and on transitions within the kernel
++	  between system, softirq and hardirq state, so there is a
++	  small performance impact.  In the case of s390 or IBM POWER > 5,
++	  this also enables accounting of stolen time on logically-partitioned
++	  systems.
++
++config VIRT_CPU_ACCOUNTING_GEN
++	bool "Full dynticks CPU time accounting"
++	depends on HAVE_CONTEXT_TRACKING
++	depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
++	select VIRT_CPU_ACCOUNTING
++	select CONTEXT_TRACKING
++	help
++	  Select this option to enable task and CPU time accounting on full
++	  dynticks systems. This accounting is implemented by watching every
++	  kernel-user boundaries using the context tracking subsystem.
++	  The accounting is thus performed at the expense of some significant
++	  overhead.
++
++	  For now this is only useful if you are working on the full
++	  dynticks subsystem development.
++
++	  If unsure, say N.
++
++endchoice
++
++config IRQ_TIME_ACCOUNTING
++	bool "Fine granularity task level IRQ time accounting"
++	depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
++	help
++	  Select this option to enable fine granularity task irq time
++	  accounting. This is done by reading a timestamp on each
++	  transitions between softirq and hardirq state, so there can be a
++	  small performance impact.
++
++	  If in doubt, say N here.
++
++config HAVE_SCHED_AVG_IRQ
++	def_bool y
++	depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
++	depends on SMP
++
++config BSD_PROCESS_ACCT
++	bool "BSD Process Accounting"
++	depends on MULTIUSER
++	help
++	  If you say Y here, a user level program will be able to instruct the
++	  kernel (via a special system call) to write process accounting
++	  information to a file: whenever a process exits, information about
++	  that process will be appended to the file by the kernel.  The
++	  information includes things such as creation time, owning user,
++	  command name, memory usage, controlling terminal etc. (the complete
++	  list is in the struct acct in <file:include/linux/acct.h>).  It is
++	  up to the user level program to do useful things with this
++	  information.  This is generally a good idea, so say Y.
++
++config BSD_PROCESS_ACCT_V3
++	bool "BSD Process Accounting version 3 file format"
++	depends on BSD_PROCESS_ACCT
++	default n
++	help
++	  If you say Y here, the process accounting information is written
++	  in a new file format that also logs the process IDs of each
++	  process and its parent. Note that this file format is incompatible
++	  with previous v0/v1/v2 file formats, so you will need updated tools
++	  for processing it. A preliminary version of these tools is available
++	  at <http://www.gnu.org/software/acct/>.
++
++config TASKSTATS
++	bool "Export task/process statistics through netlink"
++	depends on NET
++	depends on MULTIUSER
++	default n
++	help
++	  Export selected statistics for tasks/processes through the
++	  generic netlink interface. Unlike BSD process accounting, the
++	  statistics are available during the lifetime of tasks/processes as
++	  responses to commands. Like BSD accounting, they are sent to user
++	  space on task exit.
++
++	  Say N if unsure.
++
++config TASK_DELAY_ACCT
++	bool "Enable per-task delay accounting"
++	depends on TASKSTATS
++	select SCHED_INFO
++	help
++	  Collect information on time spent by a task waiting for system
++	  resources like cpu, synchronous block I/O completion and swapping
++	  in pages. Such statistics can help in setting a task's priorities
++	  relative to other tasks for cpu, io, rss limits etc.
++
++	  Say N if unsure.
++
++config TASK_XACCT
++	bool "Enable extended accounting over taskstats"
++	depends on TASKSTATS
++	help
++	  Collect extended task accounting data and send the data
++	  to userland for processing over the taskstats interface.
++
++	  Say N if unsure.
++
++config TASK_IO_ACCOUNTING
++	bool "Enable per-task storage I/O accounting"
++	depends on TASK_XACCT
++	help
++	  Collect information on the number of bytes of storage I/O which this
++	  task has caused.
++
++	  Say N if unsure.
++
++endmenu # "CPU/Task time and stats accounting"
++
++config CPU_ISOLATION
++	bool "CPU isolation"
++	depends on SMP || COMPILE_TEST
++	default y
++	help
++	  Make sure that CPUs running critical tasks are not disturbed by
++	  any source of "noise" such as unbound workqueues, timers, kthreads...
++	  Unbound jobs get offloaded to housekeeping CPUs. This is driven by
++	  the "isolcpus=" boot parameter.
++
++	  Say Y if unsure.
++
++source "kernel/rcu/Kconfig"
++
++config BUILD_BIN2C
++	bool
++	default n
++
++config IKCONFIG
++	tristate "Kernel .config support"
++	select BUILD_BIN2C
++	---help---
++	  This option enables the complete Linux kernel ".config" file
++	  contents to be saved in the kernel. It provides documentation
++	  of which kernel options are used in a running kernel or in an
++	  on-disk kernel.  This information can be extracted from the kernel
++	  image file with the script scripts/extract-ikconfig and used as
++	  input to rebuild the current kernel or to build another kernel.
++	  It can also be extracted from a running kernel by reading
++	  /proc/config.gz if enabled (below).
++
++config IKCONFIG_PROC
++	bool "Enable access to .config through /proc/config.gz"
++	depends on IKCONFIG && PROC_FS
++	---help---
++	  This option enables access to the kernel configuration file
++	  through /proc/config.gz.
++
++config LOG_BUF_SHIFT
++	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
++	range 12 25
++	default 17
++	depends on PRINTK
++	help
++	  Select the minimal kernel log buffer size as a power of 2.
++	  The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
++	  parameter, see below. Any higher size also might be forced
++	  by "log_buf_len" boot parameter.
++
++	  Examples:
++		     17 => 128 KB
++		     16 => 64 KB
++		     15 => 32 KB
++		     14 => 16 KB
++		     13 =>  8 KB
++		     12 =>  4 KB
++
++config LOG_CPU_MAX_BUF_SHIFT
++	int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
++	depends on SMP
++	range 0 21
++	default 12 if !BASE_SMALL
++	default 0 if BASE_SMALL
++	depends on PRINTK
++	help
++	  This option allows to increase the default ring buffer size
++	  according to the number of CPUs. The value defines the contribution
++	  of each CPU as a power of 2. The used space is typically only few
++	  lines however it might be much more when problems are reported,
++	  e.g. backtraces.
++
++	  The increased size means that a new buffer has to be allocated and
++	  the original static one is unused. It makes sense only on systems
++	  with more CPUs. Therefore this value is used only when the sum of
++	  contributions is greater than the half of the default kernel ring
++	  buffer as defined by LOG_BUF_SHIFT. The default values are set
++	  so that more than 64 CPUs are needed to trigger the allocation.
++
++	  Also this option is ignored when "log_buf_len" kernel parameter is
++	  used as it forces an exact (power of two) size of the ring buffer.
++
++	  The number of possible CPUs is used for this computation ignoring
++	  hotplugging making the computation optimal for the worst case
++	  scenario while allowing a simple algorithm to be used from bootup.
++
++	  Examples shift values and their meaning:
++		     17 => 128 KB for each CPU
++		     16 =>  64 KB for each CPU
++		     15 =>  32 KB for each CPU
++		     14 =>  16 KB for each CPU
++		     13 =>   8 KB for each CPU
++		     12 =>   4 KB for each CPU
++
++config PRINTK_SAFE_LOG_BUF_SHIFT
++	int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)"
++	range 10 21
++	default 13
++	depends on PRINTK
++	help
++	  Select the size of an alternate printk per-CPU buffer where messages
++	  printed from usafe contexts are temporary stored. One example would
++	  be NMI messages, another one - printk recursion. The messages are
++	  copied to the main log buffer in a safe context to avoid a deadlock.
++	  The value defines the size as a power of 2.
++
++	  Those messages are rare and limited. The largest one is when
++	  a backtrace is printed. It usually fits into 4KB. Select
++	  8KB if you want to be on the safe side.
++
++	  Examples:
++		     17 => 128 KB for each CPU
++		     16 =>  64 KB for each CPU
++		     15 =>  32 KB for each CPU
++		     14 =>  16 KB for each CPU
++		     13 =>   8 KB for each CPU
++		     12 =>   4 KB for each CPU
++
++#
++# Architectures with an unreliable sched_clock() should select this:
++#
++config HAVE_UNSTABLE_SCHED_CLOCK
++	bool
++
++config GENERIC_SCHED_CLOCK
++	bool
++
++#
++# For architectures that want to enable the support for NUMA-affine scheduler
++# balancing logic:
++#
++config ARCH_SUPPORTS_NUMA_BALANCING
++	bool
++
++#
++# For architectures that prefer to flush all TLBs after a number of pages
++# are unmapped instead of sending one IPI per page to flush. The architecture
++# must provide guarantees on what happens if a clean TLB cache entry is
++# written after the unmap. Details are in mm/rmap.c near the check for
++# should_defer_flush. The architecture should also consider if the full flush
++# and the refill costs are offset by the savings of sending fewer IPIs.
++config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
++	bool
++
++#
++# For architectures that know their GCC __int128 support is sound
++#
++config ARCH_SUPPORTS_INT128
++	bool
++
++# For architectures that (ab)use NUMA to represent different memory regions
++# all cpu-local but of different latencies, such as SuperH.
++#
++config ARCH_WANT_NUMA_VARIABLE_LOCALITY
++	bool
++
++config NUMA_BALANCING
++	bool "Memory placement aware NUMA scheduler"
++	depends on ARCH_SUPPORTS_NUMA_BALANCING
++	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
++	depends on SMP && NUMA && MIGRATION
++	help
++	  This option adds support for automatic NUMA aware memory/task placement.
++	  The mechanism is quite primitive and is based on migrating memory when
++	  it has references to the node the task is running on.
++
++	  This system will be inactive on UMA systems.
++
++config NUMA_BALANCING_DEFAULT_ENABLED
++	bool "Automatically enable NUMA aware memory/task placement"
++	default y
++	depends on NUMA_BALANCING
++	help
++	  If set, automatic NUMA balancing will be enabled if running on a NUMA
++	  machine.
++
++menuconfig CGROUPS
++	bool "Control Group support"
++	select KERNFS
++	help
++	  This option adds support for grouping sets of processes together, for
++	  use with process control subsystems such as Cpusets, CFS, memory
++	  controls or device isolation.
++	  See
++		- Documentation/scheduler/sched-design-CFS.txt	(CFS)
++		- Documentation/cgroup-v1/ (features for grouping, isolation
++					  and resource control)
++
++	  Say N if unsure.
++
++if CGROUPS
++
++config PAGE_COUNTER
++       bool
++
++config MEMCG
++	bool "Memory controller"
++	select PAGE_COUNTER
++	select EVENTFD
++	help
++	  Provides control over the memory footprint of tasks in a cgroup.
++
++config MEMCG_SWAP
++	bool "Swap controller"
++	depends on MEMCG && SWAP
++	help
++	  Provides control over the swap space consumed by tasks in a cgroup.
++
++config MEMCG_SWAP_ENABLED
++	bool "Swap controller enabled by default"
++	depends on MEMCG_SWAP
++	default y
++	help
++	  Memory Resource Controller Swap Extension comes with its price in
++	  a bigger memory consumption. General purpose distribution kernels
++	  which want to enable the feature but keep it disabled by default
++	  and let the user enable it by swapaccount=1 boot command line
++	  parameter should have this option unselected.
++	  For those who want to have the feature enabled by default should
++	  select this option (if, for some reason, they need to disable it
++	  then swapaccount=0 does the trick).
++
++config MEMCG_KMEM
++	bool
++	depends on MEMCG && !SLOB
++	default y
++
++config BLK_CGROUP
++	bool "IO controller"
++	depends on BLOCK
++	default n
++	---help---
++	Generic block IO controller cgroup interface. This is the common
++	cgroup interface which should be used by various IO controlling
++	policies.
++
++	Currently, CFQ IO scheduler uses it to recognize task groups and
++	control disk bandwidth allocation (proportional time slice allocation)
++	to such task groups. It is also used by bio throttling logic in
++	block layer to implement upper limit in IO rates on a device.
++
++	This option only enables generic Block IO controller infrastructure.
++	One needs to also enable actual IO controlling logic/policy. For
++	enabling proportional weight division of disk bandwidth in CFQ, set
++	CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
++	CONFIG_BLK_DEV_THROTTLING=y.
++
++	See Documentation/cgroup-v1/blkio-controller.txt for more information.
++
++config DEBUG_BLK_CGROUP
++	bool "IO controller debugging"
++	depends on BLK_CGROUP
++	default n
++	---help---
++	Enable some debugging help. Currently it exports additional stat
++	files in a cgroup which can be useful for debugging.
++
++config CGROUP_WRITEBACK
++	bool
++	depends on MEMCG && BLK_CGROUP
++	default y
++
++menuconfig CGROUP_SCHED
++	bool "CPU controller"
++	default n
++	help
++	  This feature lets CPU scheduler recognize task groups and control CPU
++	  bandwidth allocation to such task groups. It uses cgroups to group
++	  tasks.
++
++if CGROUP_SCHED
++config FAIR_GROUP_SCHED
++	bool "Group scheduling for SCHED_OTHER"
++	depends on CGROUP_SCHED
++	default CGROUP_SCHED
++
++config CFS_BANDWIDTH
++	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
++	depends on FAIR_GROUP_SCHED
++	default n
++	help
++	  This option allows users to define CPU bandwidth rates (limits) for
++	  tasks running within the fair group scheduler.  Groups with no limit
++	  set are considered to be unconstrained and will run with no
++	  restriction.
++	  See Documentation/scheduler/sched-bwc.txt for more information.
++
++config RT_GROUP_SCHED
++	bool "Group scheduling for SCHED_RR/FIFO"
++	depends on CGROUP_SCHED
++	default n
++	help
++	  This feature lets you explicitly allocate real CPU bandwidth
++	  to task groups. If enabled, it will also make it impossible to
++	  schedule realtime tasks for non-root users until you allocate
++	  realtime bandwidth for them.
++	  See Documentation/scheduler/sched-rt-group.txt for more information.
++
++endif #CGROUP_SCHED
++
++config CGROUP_PIDS
++	bool "PIDs controller"
++	help
++	  Provides enforcement of process number limits in the scope of a
++	  cgroup. Any attempt to fork more processes than is allowed in the
++	  cgroup will fail. PIDs are fundamentally a global resource because it
++	  is fairly trivial to reach PID exhaustion before you reach even a
++	  conservative kmemcg limit. As a result, it is possible to grind a
++	  system to halt without being limited by other cgroup policies. The
++	  PIDs controller is designed to stop this from happening.
++
++	  It should be noted that organisational operations (such as attaching
++	  to a cgroup hierarchy will *not* be blocked by the PIDs controller),
++	  since the PIDs limit only affects a process's ability to fork, not to
++	  attach to a cgroup.
++
++config CGROUP_RDMA
++	bool "RDMA controller"
++	help
++	  Provides enforcement of RDMA resources defined by IB stack.
++	  It is fairly easy for consumers to exhaust RDMA resources, which
++	  can result into resource unavailability to other consumers.
++	  RDMA controller is designed to stop this from happening.
++	  Attaching processes with active RDMA resources to the cgroup
++	  hierarchy is allowed even if can cross the hierarchy's limit.
++
++config CGROUP_FREEZER
++	bool "Freezer controller"
++	help
++	  Provides a way to freeze and unfreeze all tasks in a
++	  cgroup.
++
++	  This option affects the ORIGINAL cgroup interface. The cgroup2 memory
++	  controller includes important in-kernel memory consumers per default.
++
++	  If you're using cgroup2, say N.
++
++config CGROUP_HUGETLB
++	bool "HugeTLB controller"
++	depends on HUGETLB_PAGE
++	select PAGE_COUNTER
++	default n
++	help
++	  Provides a cgroup controller for HugeTLB pages.
++	  When you enable this, you can put a per cgroup limit on HugeTLB usage.
++	  The limit is enforced during page fault. Since HugeTLB doesn't
++	  support page reclaim, enforcing the limit at page fault time implies
++	  that, the application will get SIGBUS signal if it tries to access
++	  HugeTLB pages beyond its limit. This requires the application to know
++	  beforehand how much HugeTLB pages it would require for its use. The
++	  control group is tracked in the third page lru pointer. This means
++	  that we cannot use the controller with huge page less than 3 pages.
++
++config CPUSETS
++	bool "Cpuset controller"
++	depends on SMP
++	help
++	  This option will let you create and manage CPUSETs which
++	  allow dynamically partitioning a system into sets of CPUs and
++	  Memory Nodes and assigning tasks to run only within those sets.
++	  This is primarily useful on large SMP or NUMA systems.
++
++	  Say N if unsure.
++
++config PROC_PID_CPUSET
++	bool "Include legacy /proc/<pid>/cpuset file"
++	depends on CPUSETS
++	default y
++
++config CGROUP_DEVICE
++	bool "Device controller"
++	help
++	  Provides a cgroup controller implementing whitelists for
++	  devices which a process in the cgroup can mknod or open.
++
++config CGROUP_CPUACCT
++	bool "Simple CPU accounting controller"
++	help
++	  Provides a simple controller for monitoring the
++	  total CPU consumed by the tasks in a cgroup.
++
++config CGROUP_PERF
++	bool "Perf controller"
++	depends on PERF_EVENTS
++	help
++	  This option extends the perf per-cpu mode to restrict monitoring
++	  to threads which belong to the cgroup specified and run on the
++	  designated cpu.
++
++	  Say N if unsure.
++
++config CGROUP_BPF
++	bool "Support for eBPF programs attached to cgroups"
++	depends on BPF_SYSCALL
++	select SOCK_CGROUP_DATA
++	help
++	  Allow attaching eBPF programs to a cgroup using the bpf(2)
++	  syscall command BPF_PROG_ATTACH.
++
++	  In which context these programs are accessed depends on the type
++	  of attachment. For instance, programs that are attached using
++	  BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
++	  inet sockets.
++
++config CGROUP_DEBUG
++	bool "Debug controller"
++	default n
++	depends on DEBUG_KERNEL
++	help
++	  This option enables a simple controller that exports
++	  debugging information about the cgroups framework. This
++	  controller is for control cgroup debugging only. Its
++	  interfaces are not stable.
++
++	  Say N.
++
++config SOCK_CGROUP_DATA
++	bool
++	default n
++
++config CGROUP_FILES
++	bool "Files Resource Controller for Control Groups"
++	select PAGE_COUNTER
++	default n
++	help
++	  Provides a cgroup resource controller that limits number of open
++	  file handles within a cgroup.
++	  This supports catching misbehaving processes and
++	  return EMFILE instead of ENOMEM for kernel memory limits.
++
++endif # CGROUPS
++
++menuconfig NAMESPACES
++	bool "Namespaces support" if EXPERT
++	depends on MULTIUSER
++	default !EXPERT
++	help
++	  Provides the way to make tasks work with different objects using
++	  the same id. For example same IPC id may refer to different objects
++	  or same user id or pid may refer to different tasks when used in
++	  different namespaces.
++
++if NAMESPACES
++
++config UTS_NS
++	bool "UTS namespace"
++	default y
++	help
++	  In this namespace tasks see different info provided with the
++	  uname() system call
++
++config IPC_NS
++	bool "IPC namespace"
++	depends on (SYSVIPC || POSIX_MQUEUE)
++	default y
++	help
++	  In this namespace tasks work with IPC ids which correspond to
++	  different IPC objects in different namespaces.
++
++config USER_NS
++	bool "User namespace"
++	default n
++	help
++	  This allows containers, i.e. vservers, to use user namespaces
++	  to provide different user info for different servers.
++
++	  When user namespaces are enabled in the kernel it is
++	  recommended that the MEMCG option also be enabled and that
++	  user-space use the memory control groups to limit the amount
++	  of memory a memory unprivileged users can use.
++
++	  If unsure, say N.
++
++config PID_NS
++	bool "PID Namespaces"
++	default y
++	help
++	  Support process id namespaces.  This allows having multiple
++	  processes with the same pid as long as they are in different
++	  pid namespaces.  This is a building block of containers.
++
++config NET_NS
++	bool "Network namespace"
++	depends on NET
++	default y
++	help
++	  Allow user space to create what appear to be multiple instances
++	  of the network stack.
++
++endif # NAMESPACES
++
++config CHECKPOINT_RESTORE
++	bool "Checkpoint/restore support"
++	select PROC_CHILDREN
++	default n
++	help
++	  Enables additional kernel features in a sake of checkpoint/restore.
++	  In particular it adds auxiliary prctl codes to setup process text,
++	  data and heap segment sizes, and a few additional /proc filesystem
++	  entries.
++
++	  If unsure, say N here.
++
++config SCHED_AUTOGROUP
++	bool "Automatic process group scheduling"
++	select CGROUPS
++	select CGROUP_SCHED
++	select FAIR_GROUP_SCHED
++	help
++	  This option optimizes the scheduler for common desktop workloads by
++	  automatically creating and populating task groups.  This separation
++	  of workloads isolates aggressive CPU burners (like build jobs) from
++	  desktop applications.  Task group autogeneration is currently based
++	  upon task session.
++
++config SYSFS_DEPRECATED
++	bool "Enable deprecated sysfs features to support old userspace tools"
++	depends on SYSFS
++	default n
++	help
++	  This option adds code that switches the layout of the "block" class
++	  devices, to not show up in /sys/class/block/, but only in
++	  /sys/block/.
++
++	  This switch is only active when the sysfs.deprecated=1 boot option is
++	  passed or the SYSFS_DEPRECATED_V2 option is set.
++
++	  This option allows new kernels to run on old distributions and tools,
++	  which might get confused by /sys/class/block/. Since 2007/2008 all
++	  major distributions and tools handle this just fine.
++
++	  Recent distributions and userspace tools after 2009/2010 depend on
++	  the existence of /sys/class/block/, and will not work with this
++	  option enabled.
++
++	  Only if you are using a new kernel on an old distribution, you might
++	  need to say Y here.
++
++config SYSFS_DEPRECATED_V2
++	bool "Enable deprecated sysfs features by default"
++	default n
++	depends on SYSFS
++	depends on SYSFS_DEPRECATED
++	help
++	  Enable deprecated sysfs by default.
++
++	  See the CONFIG_SYSFS_DEPRECATED option for more details about this
++	  option.
++
++	  Only if you are using a new kernel on an old distribution, you might
++	  need to say Y here. Even then, odds are you would not need it
++	  enabled, you can always pass the boot option if absolutely necessary.
++
++config RELAY
++	bool "Kernel->user space relay support (formerly relayfs)"
++	select IRQ_WORK
++	help
++	  This option enables support for relay interface support in
++	  certain file systems (such as debugfs).
++	  It is designed to provide an efficient mechanism for tools and
++	  facilities to relay large amounts of data from kernel space to
++	  user space.
++
++	  If unsure, say N.
++
++config BLK_DEV_INITRD
++	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
++	help
++	  The initial RAM filesystem is a ramfs which is loaded by the
++	  boot loader (loadlin or lilo) and that is mounted as root
++	  before the normal boot procedure. It is typically used to
++	  load modules needed to mount the "real" root file system,
++	  etc. See <file:Documentation/admin-guide/initrd.rst> for details.
++
++	  If RAM disk support (BLK_DEV_RAM) is also included, this
++	  also enables initial RAM disk (initrd) support and adds
++	  15 Kbytes (more on some other architectures) to the kernel size.
++
++	  If unsure say Y.
++
++if BLK_DEV_INITRD
++
++source "usr/Kconfig"
++
++endif
++
++choice
++	prompt "Compiler optimization level"
++	default CC_OPTIMIZE_FOR_PERFORMANCE
++
++config CC_OPTIMIZE_FOR_PERFORMANCE
++	bool "Optimize for performance"
++	help
++	  This is the default optimization level for the kernel, building
++	  with the "-O2" compiler flag for best performance and most
++	  helpful compile-time warnings.
++
++config CC_OPTIMIZE_FOR_SIZE
++	bool "Optimize for size"
++	help
++	  Enabling this option will pass "-Os" instead of "-O2" to
++	  your compiler resulting in a smaller kernel.
++
++	  If unsure, say N.
++
++endchoice
++
++config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
++	bool
++	help
++	  This requires that the arch annotates or otherwise protects
++	  its external entry points from being discarded. Linker scripts
++	  must also merge .text.*, .data.*, and .bss.* correctly into
++	  output sections. Care must be taken not to pull in unrelated
++	  sections (e.g., '.text.init'). Typically '.' in section names
++	  is used to distinguish them from label names / C identifiers.
++
++config LD_DEAD_CODE_DATA_ELIMINATION
++	bool "Dead code and data elimination (EXPERIMENTAL)"
++	depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
++	depends on EXPERT
++	depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800)
++	depends on $(cc-option,-ffunction-sections -fdata-sections)
++	depends on $(ld-option,--gc-sections)
++	help
++	  Enable this if you want to do dead code and data elimination with
++	  the linker by compiling with -ffunction-sections -fdata-sections,
++	  and linking with --gc-sections.
++
++	  This can reduce on disk and in-memory size of the kernel
++	  code and static data, particularly for small configs and
++	  on small systems. This has the possibility of introducing
++	  silently broken kernel if the required annotations are not
++	  present. This option is not well tested yet, so use at your
++	  own risk.
++
++config SYSCTL
++	bool
++
++config ANON_INODES
++	bool
++
++config HAVE_UID16
++	bool
++
++config SYSCTL_EXCEPTION_TRACE
++	bool
++	help
++	  Enable support for /proc/sys/debug/exception-trace.
++
++config SYSCTL_ARCH_UNALIGN_NO_WARN
++	bool
++	help
++	  Enable support for /proc/sys/kernel/ignore-unaligned-usertrap
++	  Allows arch to define/use @no_unaligned_warning to possibly warn
++	  about unaligned access emulation going on under the hood.
++
++config SYSCTL_ARCH_UNALIGN_ALLOW
++	bool
++	help
++	  Enable support for /proc/sys/kernel/unaligned-trap
++	  Allows arches to define/use @unaligned_enabled to runtime toggle
++	  the unaligned access emulation.
++	  see arch/parisc/kernel/unaligned.c for reference
++
++config HAVE_PCSPKR_PLATFORM
++	bool
++
++# interpreter that classic socket filters depend on
++config BPF
++	bool
++
++menuconfig EXPERT
++	bool "Configure standard kernel features (expert users)"
++	# Unhide debug options, to make the on-by-default options visible
++	select DEBUG_KERNEL
++	help
++	  This option allows certain base kernel options and settings
++          to be disabled or tweaked. This is for specialized
++          environments which can tolerate a "non-standard" kernel.
++          Only use this if you really know what you are doing.
++
++config UID16
++	bool "Enable 16-bit UID system calls" if EXPERT
++	depends on HAVE_UID16 && MULTIUSER
++	default y
++	help
++	  This enables the legacy 16-bit UID syscall wrappers.
++
++config MULTIUSER
++	bool "Multiple users, groups and capabilities support" if EXPERT
++	default y
++	help
++	  This option enables support for non-root users, groups and
++	  capabilities.
++
++	  If you say N here, all processes will run with UID 0, GID 0, and all
++	  possible capabilities.  Saying N here also compiles out support for
++	  system calls related to UIDs, GIDs, and capabilities, such as setuid,
++	  setgid, and capset.
++
++	  If unsure, say Y here.
++
++config SGETMASK_SYSCALL
++	bool "sgetmask/ssetmask syscalls support" if EXPERT
++	def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
++	---help---
++	  sys_sgetmask and sys_ssetmask are obsolete system calls
++	  no longer supported in libc but still enabled by default in some
++	  architectures.
++
++	  If unsure, leave the default option here.
++
++config SYSFS_SYSCALL
++	bool "Sysfs syscall support" if EXPERT
++	default y
++	---help---
++	  sys_sysfs is an obsolete system call no longer supported in libc.
++	  Note that disabling this option is more secure but might break
++	  compatibility with some systems.
++
++	  If unsure say Y here.
++
++config SYSCTL_SYSCALL
++	bool "Sysctl syscall support" if EXPERT
++	depends on PROC_SYSCTL
++	default n
++	select SYSCTL
++	---help---
++	  sys_sysctl uses binary paths that have been found challenging
++	  to properly maintain and use.  The interface in /proc/sys
++	  using paths with ascii names is now the primary path to this
++	  information.
++
++	  Almost nothing using the binary sysctl interface so if you are
++	  trying to save some space it is probably safe to disable this,
++	  making your kernel marginally smaller.
++
++	  If unsure say N here.
++
++config FHANDLE
++	bool "open by fhandle syscalls" if EXPERT
++	select EXPORTFS
++	default y
++	help
++	  If you say Y here, a user level program will be able to map
++	  file names to handle and then later use the handle for
++	  different file system operations. This is useful in implementing
++	  userspace file servers, which now track files using handles instead
++	  of names. The handle would remain the same even if file names
++	  get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
++	  syscalls.
++
++config POSIX_TIMERS
++	bool "Posix Clocks & timers" if EXPERT
++	default y
++	help
++	  This includes native support for POSIX timers to the kernel.
++	  Some embedded systems have no use for them and therefore they
++	  can be configured out to reduce the size of the kernel image.
++
++	  When this option is disabled, the following syscalls won't be
++	  available: timer_create, timer_gettime: timer_getoverrun,
++	  timer_settime, timer_delete, clock_adjtime, getitimer,
++	  setitimer, alarm. Furthermore, the clock_settime, clock_gettime,
++	  clock_getres and clock_nanosleep syscalls will be limited to
++	  CLOCK_REALTIME, CLOCK_MONOTONIC and CLOCK_BOOTTIME only.
++
++	  If unsure say y.
++
++config PRINTK
++	default y
++	bool "Enable support for printk" if EXPERT
++	select IRQ_WORK
++	help
++	  This option enables normal printk support. Removing it
++	  eliminates most of the message strings from the kernel image
++	  and makes the kernel more or less silent. As this makes it
++	  very difficult to diagnose system problems, saying N here is
++	  strongly discouraged.
++
++config PRINTK_NMI
++	def_bool y
++	depends on PRINTK
++	depends on HAVE_NMI
++
++config BUG
++	bool "BUG() support" if EXPERT
++	default y
++	help
++          Disabling this option eliminates support for BUG and WARN, reducing
++          the size of your kernel image and potentially quietly ignoring
++          numerous fatal conditions. You should only consider disabling this
++          option for embedded systems with no facilities for reporting errors.
++          Just say Y.
++
++config ELF_CORE
++	depends on COREDUMP
++	default y
++	bool "Enable ELF core dumps" if EXPERT
++	help
++	  Enable support for generating core dumps. Disabling saves about 4k.
++
++
++config PCSPKR_PLATFORM
++	bool "Enable PC-Speaker support" if EXPERT
++	depends on HAVE_PCSPKR_PLATFORM
++	select I8253_LOCK
++	default y
++	help
++          This option allows to disable the internal PC-Speaker
++          support, saving some memory.
++
++config BASE_FULL
++	default y
++	bool "Enable full-sized data structures for core" if EXPERT
++	help
++	  Disabling this option reduces the size of miscellaneous core
++	  kernel data structures. This saves memory on small machines,
++	  but may reduce performance.
++
++config FUTEX
++	bool "Enable futex support" if EXPERT
++	default y
++	imply RT_MUTEXES
++	help
++	  Disabling this option will cause the kernel to be built without
++	  support for "fast userspace mutexes".  The resulting kernel may not
++	  run glibc-based applications correctly.
++
++config FUTEX_PI
++	bool
++	depends on FUTEX && RT_MUTEXES
++	default y
++
++config HAVE_FUTEX_CMPXCHG
++	bool
++	depends on FUTEX
++	help
++	  Architectures should select this if futex_atomic_cmpxchg_inatomic()
++	  is implemented and always working. This removes a couple of runtime
++	  checks.
++
++config EPOLL
++	bool "Enable eventpoll support" if EXPERT
++	default y
++	select ANON_INODES
++	help
++	  Disabling this option will cause the kernel to be built without
++	  support for epoll family of system calls.
++
++config SIGNALFD
++	bool "Enable signalfd() system call" if EXPERT
++	select ANON_INODES
++	default y
++	help
++	  Enable the signalfd() system call that allows to receive signals
++	  on a file descriptor.
++
++	  If unsure, say Y.
++
++config TIMERFD
++	bool "Enable timerfd() system call" if EXPERT
++	select ANON_INODES
++	default y
++	help
++	  Enable the timerfd() system call that allows to receive timer
++	  events on a file descriptor.
++
++	  If unsure, say Y.
++
++config EVENTFD
++	bool "Enable eventfd() system call" if EXPERT
++	select ANON_INODES
++	default y
++	help
++	  Enable the eventfd() system call that allows to receive both
++	  kernel notification (ie. KAIO) or userspace notifications.
++
++	  If unsure, say Y.
++
++config SHMEM
++	bool "Use full shmem filesystem" if EXPERT
++	default y
++	depends on MMU
++	help
++	  The shmem is an internal filesystem used to manage shared memory.
++	  It is backed by swap and manages resource limits. It is also exported
++	  to userspace as tmpfs if TMPFS is enabled. Disabling this
++	  option replaces shmem and tmpfs with the much simpler ramfs code,
++	  which may be appropriate on small systems without swap.
++
++config AIO
++	bool "Enable AIO support" if EXPERT
++	default y
++	help
++	  This option enables POSIX asynchronous I/O which may by used
++	  by some high performance threaded applications. Disabling
++	  this option saves about 7k.
++
++config ADVISE_SYSCALLS
++	bool "Enable madvise/fadvise syscalls" if EXPERT
++	default y
++	help
++	  This option enables the madvise and fadvise syscalls, used by
++	  applications to advise the kernel about their future memory or file
++	  usage, improving performance. If building an embedded system where no
++	  applications use these syscalls, you can disable this option to save
++	  space.
++
++config MEMBARRIER
++	bool "Enable membarrier() system call" if EXPERT
++	default y
++	help
++	  Enable the membarrier() system call that allows issuing memory
++	  barriers across all running threads, which can be used to distribute
++	  the cost of user-space memory barriers asymmetrically by transforming
++	  pairs of memory barriers into pairs consisting of membarrier() and a
++	  compiler barrier.
++
++	  If unsure, say Y.
++
++config KALLSYMS
++	 bool "Load all symbols for debugging/ksymoops" if EXPERT
++	 default y
++	 help
++	   Say Y here to let the kernel print out symbolic crash information and
++	   symbolic stack backtraces. This increases the size of the kernel
++	   somewhat, as all symbols have to be loaded into the kernel image.
++
++config KALLSYMS_ALL
++	bool "Include all symbols in kallsyms"
++	depends on DEBUG_KERNEL && KALLSYMS
++	help
++	   Normally kallsyms only contains the symbols of functions for nicer
++	   OOPS messages and backtraces (i.e., symbols from the text and inittext
++	   sections). This is sufficient for most cases. And only in very rare
++	   cases (e.g., when a debugger is used) all symbols are required (e.g.,
++	   names of variables from the data sections, etc).
++
++	   This option makes sure that all symbols are loaded into the kernel
++	   image (i.e., symbols from all sections) in cost of increased kernel
++	   size (depending on the kernel configuration, it may be 300KiB or
++	   something like this).
++
++	   Say N unless you really need all symbols.
++
++config KALLSYMS_ABSOLUTE_PERCPU
++	bool
++	depends on KALLSYMS
++	default X86_64 && SMP
++
++config KALLSYMS_BASE_RELATIVE
++	bool
++	depends on KALLSYMS
++	default !IA64
++	help
++	  Instead of emitting them as absolute values in the native word size,
++	  emit the symbol references in the kallsyms table as 32-bit entries,
++	  each containing a relative value in the range [base, base + U32_MAX]
++	  or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
++	  an absolute value in the range [0, S32_MAX] or a relative value in the
++	  range [base, base + S32_MAX], where base is the lowest relative symbol
++	  address encountered in the image.
++
++	  On 64-bit builds, this reduces the size of the address table by 50%,
++	  but more importantly, it results in entries whose values are build
++	  time constants, and no relocation pass is required at runtime to fix
++	  up the entries based on the runtime load address of the kernel.
++
++# end of the "standard kernel features (expert users)" menu
++
++# syscall, maps, verifier
++config BPF_SYSCALL
++	bool "Enable bpf() system call"
++	select ANON_INODES
++	select BPF
++	select IRQ_WORK
++	default n
++	help
++	  Enable the bpf() system call that allows to manipulate eBPF
++	  programs and maps via file descriptors.
++
++config BPF_JIT_ALWAYS_ON
++	bool "Permanently enable BPF JIT and remove BPF interpreter"
++	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
++	help
++	  Enables BPF JIT and removes BPF interpreter to avoid
++	  speculative execution of BPF instructions by the interpreter
++
++config USERFAULTFD
++	bool "Enable userfaultfd() system call"
++	select ANON_INODES
++	depends on MMU
++	help
++	  Enable the userfaultfd() system call that allows to intercept and
++	  handle page faults in userland.
++
++config ARCH_HAS_MEMBARRIER_CALLBACKS
++	bool
++
++config ARCH_HAS_MEMBARRIER_SYNC_CORE
++	bool
++
++config RSEQ
++	bool "Enable rseq() system call" if EXPERT
++	default y
++	depends on HAVE_RSEQ
++	select MEMBARRIER
++	help
++	  Enable the restartable sequences system call. It provides a
++	  user-space cache for the current CPU number value, which
++	  speeds up getting the current CPU number from user-space,
++	  as well as an ABI to speed up user-space operations on
++	  per-CPU data.
++
++	  If unsure, say Y.
++
++config DEBUG_RSEQ
++	default n
++	bool "Enabled debugging of rseq() system call" if EXPERT
++	depends on RSEQ && DEBUG_KERNEL
++	help
++	  Enable extra debugging checks for the rseq system call.
++
++	  If unsure, say N.
++
++config EMBEDDED
++	bool "Embedded system"
++	option allnoconfig_y
++	select EXPERT
++	help
++	  This option should be enabled if compiling the kernel for
++	  an embedded system so certain expert options are available
++	  for configuration.
++
++config HAVE_PERF_EVENTS
++	bool
++	help
++	  See tools/perf/design.txt for details.
++
++config PERF_USE_VMALLOC
++	bool
++	help
++	  See tools/perf/design.txt for details
++
++config PC104
++	bool "PC/104 support" if EXPERT
++	help
++	  Expose PC/104 form factor device drivers and options available for
++	  selection and configuration. Enable this option if your target
++	  machine has a PC/104 bus.
++
++menu "Kernel Performance Events And Counters"
++
++config PERF_EVENTS
++	bool "Kernel performance events and counters"
++	default y if PROFILING
++	depends on HAVE_PERF_EVENTS
++	select ANON_INODES
++	select IRQ_WORK
++	select SRCU
++	help
++	  Enable kernel support for various performance events provided
++	  by software and hardware.
++
++	  Software events are supported either built-in or via the
++	  use of generic tracepoints.
++
++	  Most modern CPUs support performance events via performance
++	  counter registers. These registers count the number of certain
++	  types of hw events: such as instructions executed, cachemisses
++	  suffered, or branches mis-predicted - without slowing down the
++	  kernel or applications. These registers can also trigger interrupts
++	  when a threshold number of events have passed - and can thus be
++	  used to profile the code that runs on that CPU.
++
++	  The Linux Performance Event subsystem provides an abstraction of
++	  these software and hardware event capabilities, available via a
++	  system call and used by the "perf" utility in tools/perf/. It
++	  provides per task and per CPU counters, and it provides event
++	  capabilities on top of those.
++
++	  Say Y if unsure.
++
++config DEBUG_PERF_USE_VMALLOC
++	default n
++	bool "Debug: use vmalloc to back perf mmap() buffers"
++	depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
++	select PERF_USE_VMALLOC
++	help
++	 Use vmalloc memory to back perf mmap() buffers.
++
++	 Mostly useful for debugging the vmalloc code on platforms
++	 that don't require it.
++
++	 Say N if unsure.
++
++endmenu
++
++config VM_EVENT_COUNTERS
++	default y
++	bool "Enable VM event counters for /proc/vmstat" if EXPERT
++	help
++	  VM event counters are needed for event counts to be shown.
++	  This option allows the disabling of the VM event counters
++	  on EXPERT systems.  /proc/vmstat will only show page counts
++	  if VM event counters are disabled.
++
++config SLUB_DEBUG
++	default y
++	bool "Enable SLUB debugging support" if EXPERT
++	depends on SLUB && SYSFS
++	help
++	  SLUB has extensive debug support features. Disabling these can
++	  result in significant savings in code size. This also disables
++	  SLUB sysfs support. /sys/slab will not exist and there will be
++	  no support for cache validation etc.
++
++config SLUB_MEMCG_SYSFS_ON
++	default n
++	bool "Enable memcg SLUB sysfs support by default" if EXPERT
++	depends on SLUB && SYSFS && MEMCG
++	help
++	  SLUB creates a directory under /sys/kernel/slab for each
++	  allocation cache to host info and debug files. If memory
++	  cgroup is enabled, each cache can have per memory cgroup
++	  caches. SLUB can create the same sysfs directories for these
++	  caches under /sys/kernel/slab/CACHE/cgroup but it can lead
++	  to a very high number of debug files being created. This is
++	  controlled by slub_memcg_sysfs boot parameter and this
++	  config option determines the parameter's default value.
++
++config COMPAT_BRK
++	bool "Disable heap randomization"
++	default y
++	help
++	  Randomizing heap placement makes heap exploits harder, but it
++	  also breaks ancient binaries (including anything libc5 based).
++	  This option changes the bootup default to heap randomization
++	  disabled, and can be overridden at runtime by setting
++	  /proc/sys/kernel/randomize_va_space to 2.
++
++	  On non-ancient distros (post-2000 ones) N is usually a safe choice.
++
++choice
++	prompt "Choose SLAB allocator"
++	default SLUB
++	help
++	   This option allows to select a slab allocator.
++
++config SLAB
++	bool "SLAB"
++	select HAVE_HARDENED_USERCOPY_ALLOCATOR
++	help
++	  The regular slab allocator that is established and known to work
++	  well in all environments. It organizes cache hot objects in
++	  per cpu and per node queues.
++
++config SLUB
++	bool "SLUB (Unqueued Allocator)"
++	select HAVE_HARDENED_USERCOPY_ALLOCATOR
++	help
++	   SLUB is a slab allocator that minimizes cache line usage
++	   instead of managing queues of cached objects (SLAB approach).
++	   Per cpu caching is realized using slabs of objects instead
++	   of queues of objects. SLUB can use memory efficiently
++	   and has enhanced diagnostics. SLUB is the default choice for
++	   a slab allocator.
++
++config SLOB
++	depends on EXPERT
++	bool "SLOB (Simple Allocator)"
++	help
++	   SLOB replaces the stock allocator with a drastically simpler
++	   allocator. SLOB is generally more space efficient but
++	   does not perform as well on large systems.
++
++endchoice
++
++config SLAB_MERGE_DEFAULT
++	bool "Allow slab caches to be merged"
++	default y
++	help
++	  For reduced kernel memory fragmentation, slab caches can be
++	  merged when they share the same size and other characteristics.
++	  This carries a risk of kernel heap overflows being able to
++	  overwrite objects from merged caches (and more easily control
++	  cache layout), which makes such heap attacks easier to exploit
++	  by attackers. By keeping caches unmerged, these kinds of exploits
++	  can usually only damage objects in the same cache. To disable
++	  merging at runtime, "slab_nomerge" can be passed on the kernel
++	  command line.
++
++config SLAB_FREELIST_RANDOM
++	default n
++	depends on SLAB || SLUB
++	bool "SLAB freelist randomization"
++	help
++	  Randomizes the freelist order used on creating new pages. This
++	  security feature reduces the predictability of the kernel slab
++	  allocator against heap overflows.
++
++config SLAB_FREELIST_HARDENED
++	bool "Harden slab freelist metadata"
++	depends on SLUB
++	help
++	  Many kernel heap attacks try to target slab cache metadata and
++	  other infrastructure. This options makes minor performance
++	  sacrifies to harden the kernel slab allocator against common
++	  freelist exploit methods.
++
++config SLUB_CPU_PARTIAL
++	default y
++	depends on SLUB && SMP
++	bool "SLUB per cpu partial cache"
++	help
++	  Per cpu partial caches accellerate objects allocation and freeing
++	  that is local to a processor at the price of more indeterminism
++	  in the latency of the free. On overflow these caches will be cleared
++	  which requires the taking of locks that may cause latency spikes.
++	  Typically one would choose no for a realtime system.
++
++config MMAP_ALLOW_UNINITIALIZED
++	bool "Allow mmapped anonymous memory to be uninitialized"
++	depends on EXPERT && !MMU
++	default n
++	help
++	  Normally, and according to the Linux spec, anonymous memory obtained
++	  from mmap() has its contents cleared before it is passed to
++	  userspace.  Enabling this config option allows you to request that
++	  mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
++	  providing a huge performance boost.  If this option is not enabled,
++	  then the flag will be ignored.
++
++	  This is taken advantage of by uClibc's malloc(), and also by
++	  ELF-FDPIC binfmt's brk and stack allocator.
++
++	  Because of the obvious security issues, this option should only be
++	  enabled on embedded devices where you control what is run in
++	  userspace.  Since that isn't generally a problem on no-MMU systems,
++	  it is normally safe to say Y here.
++
++	  See Documentation/nommu-mmap.txt for more information.
++
++config SYSTEM_DATA_VERIFICATION
++	def_bool n
++	select SYSTEM_TRUSTED_KEYRING
++	select KEYS
++	select CRYPTO
++	select CRYPTO_RSA
++	select ASYMMETRIC_KEY_TYPE
++	select ASYMMETRIC_PUBLIC_KEY_SUBTYPE
++	select ASN1
++	select OID_REGISTRY
++	select X509_CERTIFICATE_PARSER
++	select PKCS7_MESSAGE_PARSER
++	help
++	  Provide PKCS#7 message verification using the contents of the system
++	  trusted keyring to provide public keys.  This then can be used for
++	  module verification, kexec image verification and firmware blob
++	  verification.
++
++config PROFILING
++	bool "Profiling support"
++	help
++	  Say Y here to enable the extended profiling support mechanisms used
++	  by profilers such as OProfile.
++
++#
++# Place an empty function call at each tracepoint site. Can be
++# dynamically changed for a probe function.
++#
++config TRACEPOINTS
++	bool
++
++endmenu		# General setup
++
++source "arch/Kconfig"
++
++config RT_MUTEXES
++	bool
++
++config BASE_SMALL
++	int
++	default 0 if BASE_FULL
++	default 1 if !BASE_FULL
++
++menuconfig MODULES
++	bool "Enable loadable module support"
++	option modules
++	help
++	  Kernel modules are small pieces of compiled code which can
++	  be inserted in the running kernel, rather than being
++	  permanently built into the kernel.  You use the "modprobe"
++	  tool to add (and sometimes remove) them.  If you say Y here,
++	  many parts of the kernel can be built as modules (by
++	  answering M instead of Y where indicated): this is most
++	  useful for infrequently used options which are not required
++	  for booting.  For more information, see the man pages for
++	  modprobe, lsmod, modinfo, insmod and rmmod.
++
++	  If you say Y here, you will need to run "make
++	  modules_install" to put the modules under /lib/modules/
++	  where modprobe can find them (you may need to be root to do
++	  this).
++
++	  If unsure, say Y.
++
++if MODULES
++
++config MODULE_FORCE_LOAD
++	bool "Forced module loading"
++	default n
++	help
++	  Allow loading of modules without version information (ie. modprobe
++	  --force).  Forced module loading sets the 'F' (forced) taint flag and
++	  is usually a really bad idea.
++
++config MODULE_UNLOAD
++	bool "Module unloading"
++	help
++	  Without this option you will not be able to unload any
++	  modules (note that some modules may not be unloadable
++	  anyway), which makes your kernel smaller, faster
++	  and simpler.  If unsure, say Y.
++
++config MODULE_FORCE_UNLOAD
++	bool "Forced module unloading"
++	depends on MODULE_UNLOAD
++	help
++	  This option allows you to force a module to unload, even if the
++	  kernel believes it is unsafe: the kernel will remove the module
++	  without waiting for anyone to stop using it (using the -f option to
++	  rmmod).  This is mainly for kernel developers and desperate users.
++	  If unsure, say N.
++
++config MODVERSIONS
++	bool "Module versioning support"
++	help
++	  Usually, you have to use modules compiled with your kernel.
++	  Saying Y here makes it sometimes possible to use modules
++	  compiled for different kernels, by adding enough information
++	  to the modules to (hopefully) spot any changes which would
++	  make them incompatible with the kernel you are running.  If
++	  unsure, say N.
++
++config MODULE_REL_CRCS
++	bool
++	depends on MODVERSIONS
++
++config MODULE_SRCVERSION_ALL
++	bool "Source checksum for all modules"
++	help
++	  Modules which contain a MODULE_VERSION get an extra "srcversion"
++	  field inserted into their modinfo section, which contains a
++    	  sum of the source files which made it.  This helps maintainers
++	  see exactly which source was used to build a module (since
++	  others sometimes change the module source without updating
++	  the version).  With this option, such a "srcversion" field
++	  will be created for all modules.  If unsure, say N.
++
++config MODULE_SIG
++	bool "Module signature verification"
++	depends on MODULES
++	select SYSTEM_DATA_VERIFICATION
++	help
++	  Check modules for valid signatures upon load: the signature
++	  is simply appended to the module. For more information see
++	  <file:Documentation/admin-guide/module-signing.rst>.
++
++	  Note that this option adds the OpenSSL development packages as a
++	  kernel build dependency so that the signing tool can use its crypto
++	  library.
++
++	  !!!WARNING!!!  If you enable this option, you MUST make sure that the
++	  module DOES NOT get stripped after being signed.  This includes the
++	  debuginfo strip done by some packagers (such as rpmbuild) and
++	  inclusion into an initramfs that wants the module size reduced.
++
++config MODULE_SIG_FORCE
++	bool "Require modules to be validly signed"
++	depends on MODULE_SIG
++	help
++	  Reject unsigned modules or signed modules for which we don't have a
++	  key.  Without this, such modules will simply taint the kernel.
++
++config MODULE_SIG_ALL
++	bool "Automatically sign all modules"
++	default y
++	depends on MODULE_SIG
++	help
++	  Sign all modules during make modules_install. Without this option,
++	  modules must be signed manually, using the scripts/sign-file tool.
++
++comment "Do not forget to sign required modules with scripts/sign-file"
++	depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL
++
++choice
++	prompt "Which hash algorithm should modules be signed with?"
++	depends on MODULE_SIG
++	help
++	  This determines which sort of hashing algorithm will be used during
++	  signature generation.  This algorithm _must_ be built into the kernel
++	  directly so that signature verification can take place.  It is not
++	  possible to load a signed module containing the algorithm to check
++	  the signature on that module.
++
++config MODULE_SIG_SHA1
++	bool "Sign modules with SHA-1"
++	select CRYPTO_SHA1
++
++config MODULE_SIG_SHA224
++	bool "Sign modules with SHA-224"
++	select CRYPTO_SHA256
++
++config MODULE_SIG_SHA256
++	bool "Sign modules with SHA-256"
++	select CRYPTO_SHA256
++
++config MODULE_SIG_SHA384
++	bool "Sign modules with SHA-384"
++	select CRYPTO_SHA512
++
++config MODULE_SIG_SHA512
++	bool "Sign modules with SHA-512"
++	select CRYPTO_SHA512
++
++endchoice
++
++config MODULE_SIG_HASH
++	string
++	depends on MODULE_SIG
++	default "sha1" if MODULE_SIG_SHA1
++	default "sha224" if MODULE_SIG_SHA224
++	default "sha256" if MODULE_SIG_SHA256
++	default "sha384" if MODULE_SIG_SHA384
++	default "sha512" if MODULE_SIG_SHA512
++
++config MODULE_COMPRESS
++	bool "Compress modules on installation"
++	depends on MODULES
++	help
++
++	  Compresses kernel modules when 'make modules_install' is run; gzip or
++	  xz depending on "Compression algorithm" below.
++
++	  module-init-tools MAY support gzip, and kmod MAY support gzip and xz.
++
++	  Out-of-tree kernel modules installed using Kbuild will also be
++	  compressed upon installation.
++
++	  Note: for modules inside an initrd or initramfs, it's more efficient
++	  to compress the whole initrd or initramfs instead.
++
++	  Note: This is fully compatible with signed modules.
++
++	  If in doubt, say N.
++
++choice
++	prompt "Compression algorithm"
++	depends on MODULE_COMPRESS
++	default MODULE_COMPRESS_GZIP
++	help
++	  This determines which sort of compression will be used during
++	  'make modules_install'.
++
++	  GZIP (default) and XZ are supported.
++
++config MODULE_COMPRESS_GZIP
++	bool "GZIP"
++
++config MODULE_COMPRESS_XZ
++	bool "XZ"
++
++endchoice
++
++config TRIM_UNUSED_KSYMS
++	bool "Trim unused exported kernel symbols"
++	depends on MODULES && !UNUSED_SYMBOLS
++	help
++	  The kernel and some modules make many symbols available for
++	  other modules to use via EXPORT_SYMBOL() and variants. Depending
++	  on the set of modules being selected in your kernel configuration,
++	  many of those exported symbols might never be used.
++
++	  This option allows for unused exported symbols to be dropped from
++	  the build. In turn, this provides the compiler more opportunities
++	  (especially when using LTO) for optimizing the code and reducing
++	  binary size.  This might have some security advantages as well.
++
++	  If unsure, or if you need to build out-of-tree modules, say N.
++
++endif # MODULES
++
++config MODULES_TREE_LOOKUP
++	def_bool y
++	depends on PERF_EVENTS || TRACING
++
++config INIT_ALL_POSSIBLE
++	bool
++	help
++	  Back when each arch used to define their own cpu_online_mask and
++	  cpu_possible_mask, some of them chose to initialize cpu_possible_mask
++	  with all 1s, and others with all 0s.  When they were centralised,
++	  it was better to provide this option than to break all the archs
++	  and have several arch maintainers pursuing me down dark alleys.
++
++source "block/Kconfig"
++
++config PREEMPT_NOTIFIERS
++	bool
++
++config PADATA
++	depends on SMP
++	bool
++
++config ASN1
++	tristate
++	help
++	  Build a simple ASN.1 grammar compiler that produces a bytecode output
++	  that can be interpreted by the ASN.1 stream decoder and used to
++	  inform it as to what tags are to be expected in a stream and what
++	  functions to call on what tags.
++
++source "kernel/Kconfig.locks"
++
++config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
++	bool
++
++# It may be useful for an architecture to override the definitions of the
++# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in <linux/syscalls.h>
++# and the COMPAT_ variants in <linux/compat.h>, in particular to use a
++# different calling convention for syscalls. They can also override the
++# macros for not-implemented syscalls in kernel/sys_ni.c and
++# kernel/time/posix-stubs.c. All these overrides need to be available in
++# <asm/syscall_wrapper.h>.
++config ARCH_HAS_SYSCALL_WRAPPER
++	def_bool n
+diff -uprN kernel/init/main.c kernel_new/init/main.c
+--- kernel/init/main.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/init/main.c	2021-04-02 09:18:43.444844752 +0800
+@@ -45,6 +45,7 @@
+ #include <linux/cpuset.h>
+ #include <linux/cgroup.h>
+ #include <linux/efi.h>
++#include <linux/ipipe.h>
+ #include <linux/tick.h>
+ #include <linux/sched/isolation.h>
+ #include <linux/interrupt.h>
+@@ -539,7 +540,7 @@ asmlinkage __visible void __init start_k
+ 
+ 	cgroup_init_early();
+ 
+-	local_irq_disable();
++	hard_local_irq_disable();
+ 	early_boot_irqs_disabled = true;
+ 
+ 	/*
+@@ -586,6 +587,7 @@ asmlinkage __visible void __init start_k
+ 	setup_log_buf(0);
+ 	vfs_caches_init_early();
+ 	sort_main_extable();
++	__ipipe_init_early();
+ 	trap_init();
+ 	mm_init();
+ 
+@@ -642,6 +644,11 @@ asmlinkage __visible void __init start_k
+ 	softirq_init();
+ 	timekeeping_init();
+ 	time_init();
++	/*
++	 * We need to wait for the interrupt and time subsystems to be
++	 * initialized before enabling the pipeline.
++	 */
++	__ipipe_init();	
+ 	perf_event_init();
+ 	profile_init();
+ 	call_function_init();
+@@ -972,6 +979,7 @@ static void __init do_basic_setup(void)
+ 	shmem_init();
+ 	driver_init();
+ 	init_irq_proc();
++	__ipipe_init_proc();
+ 	do_ctors();
+ 	usermodehelper_enable();
+ 	do_initcalls();
+diff -uprN kernel/init/main.c.orig kernel_new/init/main.c.orig
+--- kernel/init/main.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/init/main.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1179 @@
++/*
++ *  linux/init/main.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ *
++ *  GK 2/5/95  -  Changed to support mounting root fs via NFS
++ *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
++ *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
++ *  Simplified starting of init:  Michael A. Griffith <grif@acm.org>
++ */
++
++#define DEBUG		/* Enable initcall_debug */
++
++#include <linux/types.h>
++#include <linux/extable.h>
++#include <linux/module.h>
++#include <linux/proc_fs.h>
++#include <linux/binfmts.h>
++#include <linux/kernel.h>
++#include <linux/syscalls.h>
++#include <linux/stackprotector.h>
++#include <linux/string.h>
++#include <linux/ctype.h>
++#include <linux/delay.h>
++#include <linux/ioport.h>
++#include <linux/init.h>
++#include <linux/initrd.h>
++#include <linux/bootmem.h>
++#include <linux/acpi.h>
++#include <linux/console.h>
++#include <linux/nmi.h>
++#include <linux/percpu.h>
++#include <linux/kmod.h>
++#include <linux/vmalloc.h>
++#include <linux/kernel_stat.h>
++#include <linux/start_kernel.h>
++#include <linux/security.h>
++#include <linux/smp.h>
++#include <linux/profile.h>
++#include <linux/rcupdate.h>
++#include <linux/moduleparam.h>
++#include <linux/kallsyms.h>
++#include <linux/writeback.h>
++#include <linux/cpu.h>
++#include <linux/cpuset.h>
++#include <linux/cgroup.h>
++#include <linux/efi.h>
++#include <linux/tick.h>
++#include <linux/sched/isolation.h>
++#include <linux/interrupt.h>
++#include <linux/taskstats_kern.h>
++#include <linux/delayacct.h>
++#include <linux/unistd.h>
++#include <linux/utsname.h>
++#include <linux/rmap.h>
++#include <linux/mempolicy.h>
++#include <linux/key.h>
++#include <linux/buffer_head.h>
++#include <linux/page_ext.h>
++#include <linux/debug_locks.h>
++#include <linux/debugobjects.h>
++#include <linux/lockdep.h>
++#include <linux/kmemleak.h>
++#include <linux/pid_namespace.h>
++#include <linux/device.h>
++#include <linux/kthread.h>
++#include <linux/sched.h>
++#include <linux/sched/init.h>
++#include <linux/signal.h>
++#include <linux/idr.h>
++#include <linux/kgdb.h>
++#include <linux/ftrace.h>
++#include <linux/async.h>
++#include <linux/sfi.h>
++#include <linux/shmem_fs.h>
++#include <linux/slab.h>
++#include <linux/perf_event.h>
++#include <linux/ptrace.h>
++#include <linux/pti.h>
++#include <linux/blkdev.h>
++#include <linux/elevator.h>
++#include <linux/sched/clock.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/context_tracking.h>
++#include <linux/random.h>
++#include <linux/list.h>
++#include <linux/integrity.h>
++#include <linux/proc_ns.h>
++#include <linux/io.h>
++#include <linux/cache.h>
++#include <linux/rodata_test.h>
++#include <linux/jump_label.h>
++#include <linux/mem_encrypt.h>
++#include <linux/ktask.h>
++
++#include <asm/io.h>
++#include <asm/bugs.h>
++#include <asm/setup.h>
++#include <asm/sections.h>
++#include <asm/cacheflush.h>
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/initcall.h>
++
++static int kernel_init(void *);
++
++extern void init_IRQ(void);
++extern void radix_tree_init(void);
++
++/*
++ * Debug helper: via this flag we know that we are in 'early bootup code'
++ * where only the boot processor is running with IRQ disabled.  This means
++ * two things - IRQ must not be enabled before the flag is cleared and some
++ * operations which are not allowed with IRQ disabled are allowed while the
++ * flag is set.
++ */
++bool early_boot_irqs_disabled __read_mostly;
++
++enum system_states system_state __read_mostly;
++EXPORT_SYMBOL(system_state);
++
++/*
++ * Boot command-line arguments
++ */
++#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
++#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
++
++extern void time_init(void);
++/* Default late time init is NULL. archs can override this later. */
++void (*__initdata late_time_init)(void);
++
++/* Untouched command line saved by arch-specific code. */
++char __initdata boot_command_line[COMMAND_LINE_SIZE];
++/* Untouched saved command line (eg. for /proc) */
++char *saved_command_line;
++/* Command line for parameter parsing */
++static char *static_command_line;
++/* Command line for per-initcall parameter parsing */
++static char *initcall_command_line;
++
++static char *execute_command;
++static char *ramdisk_execute_command;
++
++/*
++ * Used to generate warnings if static_key manipulation functions are used
++ * before jump_label_init is called.
++ */
++bool static_key_initialized __read_mostly;
++EXPORT_SYMBOL_GPL(static_key_initialized);
++
++/*
++ * If set, this is an indication to the drivers that reset the underlying
++ * device before going ahead with the initialization otherwise driver might
++ * rely on the BIOS and skip the reset operation.
++ *
++ * This is useful if kernel is booting in an unreliable environment.
++ * For ex. kdump situation where previous kernel has crashed, BIOS has been
++ * skipped and devices will be in unknown state.
++ */
++unsigned int reset_devices;
++EXPORT_SYMBOL(reset_devices);
++
++static int __init set_reset_devices(char *str)
++{
++	reset_devices = 1;
++	return 1;
++}
++
++__setup("reset_devices", set_reset_devices);
++
++static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
++const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
++static const char *panic_later, *panic_param;
++
++extern const struct obs_kernel_param __setup_start[], __setup_end[];
++
++static bool __init obsolete_checksetup(char *line)
++{
++	const struct obs_kernel_param *p;
++	bool had_early_param = false;
++
++	p = __setup_start;
++	do {
++		int n = strlen(p->str);
++		if (parameqn(line, p->str, n)) {
++			if (p->early) {
++				/* Already done in parse_early_param?
++				 * (Needs exact match on param part).
++				 * Keep iterating, as we can have early
++				 * params and __setups of same names 8( */
++				if (line[n] == '\0' || line[n] == '=')
++					had_early_param = true;
++			} else if (!p->setup_func) {
++				pr_warn("Parameter %s is obsolete, ignored\n",
++					p->str);
++				return true;
++			} else if (p->setup_func(line + n))
++				return true;
++		}
++		p++;
++	} while (p < __setup_end);
++
++	return had_early_param;
++}
++
++/*
++ * This should be approx 2 Bo*oMips to start (note initial shift), and will
++ * still work even if initially too large, it will just take slightly longer
++ */
++unsigned long loops_per_jiffy = (1<<12);
++EXPORT_SYMBOL(loops_per_jiffy);
++
++static int __init debug_kernel(char *str)
++{
++	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
++	return 0;
++}
++
++static int __init quiet_kernel(char *str)
++{
++	console_loglevel = CONSOLE_LOGLEVEL_QUIET;
++	return 0;
++}
++
++early_param("debug", debug_kernel);
++early_param("quiet", quiet_kernel);
++
++static int __init loglevel(char *str)
++{
++	int newlevel;
++
++	/*
++	 * Only update loglevel value when a correct setting was passed,
++	 * to prevent blind crashes (when loglevel being set to 0) that
++	 * are quite hard to debug
++	 */
++	if (get_option(&str, &newlevel)) {
++		console_loglevel = newlevel;
++		return 0;
++	}
++
++	return -EINVAL;
++}
++
++early_param("loglevel", loglevel);
++
++/* Change NUL term back to "=", to make "param" the whole string. */
++static int __init repair_env_string(char *param, char *val,
++				    const char *unused, void *arg)
++{
++	if (val) {
++		/* param=val or param="val"? */
++		if (val == param+strlen(param)+1)
++			val[-1] = '=';
++		else if (val == param+strlen(param)+2) {
++			val[-2] = '=';
++			memmove(val-1, val, strlen(val)+1);
++			val--;
++		} else
++			BUG();
++	}
++	return 0;
++}
++
++/* Anything after -- gets handed straight to init. */
++static int __init set_init_arg(char *param, char *val,
++			       const char *unused, void *arg)
++{
++	unsigned int i;
++
++	if (panic_later)
++		return 0;
++
++	repair_env_string(param, val, unused, NULL);
++
++	for (i = 0; argv_init[i]; i++) {
++		if (i == MAX_INIT_ARGS) {
++			panic_later = "init";
++			panic_param = param;
++			return 0;
++		}
++	}
++	argv_init[i] = param;
++	return 0;
++}
++
++/*
++ * Unknown boot options get handed to init, unless they look like
++ * unused parameters (modprobe will find them in /proc/cmdline).
++ */
++static int __init unknown_bootoption(char *param, char *val,
++				     const char *unused, void *arg)
++{
++	repair_env_string(param, val, unused, NULL);
++
++	/* Handle obsolete-style parameters */
++	if (obsolete_checksetup(param))
++		return 0;
++
++	/* Unused module parameter. */
++	if (strchr(param, '.') && (!val || strchr(param, '.') < val))
++		return 0;
++
++	if (panic_later)
++		return 0;
++
++	if (val) {
++		/* Environment option */
++		unsigned int i;
++		for (i = 0; envp_init[i]; i++) {
++			if (i == MAX_INIT_ENVS) {
++				panic_later = "env";
++				panic_param = param;
++			}
++			if (!strncmp(param, envp_init[i], val - param))
++				break;
++		}
++		envp_init[i] = param;
++	} else {
++		/* Command line option */
++		unsigned int i;
++		for (i = 0; argv_init[i]; i++) {
++			if (i == MAX_INIT_ARGS) {
++				panic_later = "init";
++				panic_param = param;
++			}
++		}
++		argv_init[i] = param;
++	}
++	return 0;
++}
++
++static int __init init_setup(char *str)
++{
++	unsigned int i;
++
++	execute_command = str;
++	/*
++	 * In case LILO is going to boot us with default command line,
++	 * it prepends "auto" before the whole cmdline which makes
++	 * the shell think it should execute a script with such name.
++	 * So we ignore all arguments entered _before_ init=... [MJ]
++	 */
++	for (i = 1; i < MAX_INIT_ARGS; i++)
++		argv_init[i] = NULL;
++	return 1;
++}
++__setup("init=", init_setup);
++
++static int __init rdinit_setup(char *str)
++{
++	unsigned int i;
++
++	ramdisk_execute_command = str;
++	/* See "auto" comment in init_setup */
++	for (i = 1; i < MAX_INIT_ARGS; i++)
++		argv_init[i] = NULL;
++	return 1;
++}
++__setup("rdinit=", rdinit_setup);
++
++#ifndef CONFIG_SMP
++static const unsigned int setup_max_cpus = NR_CPUS;
++static inline void setup_nr_cpu_ids(void) { }
++static inline void smp_prepare_cpus(unsigned int maxcpus) { }
++#endif
++
++/*
++ * We need to store the untouched command line for future reference.
++ * We also need to store the touched command line since the parameter
++ * parsing is performed in place, and we should allow a component to
++ * store reference of name/value for future reference.
++ */
++static void __init setup_command_line(char *command_line)
++{
++	saved_command_line =
++		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
++	initcall_command_line =
++		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
++	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
++	strcpy(saved_command_line, boot_command_line);
++	strcpy(static_command_line, command_line);
++}
++
++/*
++ * We need to finalize in a non-__init function or else race conditions
++ * between the root thread and the init thread may cause start_kernel to
++ * be reaped by free_initmem before the root thread has proceeded to
++ * cpu_idle.
++ *
++ * gcc-3.4 accidentally inlines this function, so use noinline.
++ */
++
++static __initdata DECLARE_COMPLETION(kthreadd_done);
++
++static noinline void __ref rest_init(void)
++{
++	struct task_struct *tsk;
++	int pid;
++
++	rcu_scheduler_starting();
++	/*
++	 * We need to spawn init first so that it obtains pid 1, however
++	 * the init task will end up wanting to create kthreads, which, if
++	 * we schedule it before we create kthreadd, will OOPS.
++	 */
++	pid = kernel_thread(kernel_init, NULL, CLONE_FS);
++	/*
++	 * Pin init on the boot CPU. Task migration is not properly working
++	 * until sched_init_smp() has been run. It will set the allowed
++	 * CPUs for init to the non isolated CPUs.
++	 */
++	rcu_read_lock();
++	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
++	set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
++	rcu_read_unlock();
++
++	numa_default_policy();
++	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
++	rcu_read_lock();
++	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
++	rcu_read_unlock();
++
++	/*
++	 * Enable might_sleep() and smp_processor_id() checks.
++	 * They cannot be enabled earlier because with CONFIG_PREEMPT=y
++	 * kernel_thread() would trigger might_sleep() splats. With
++	 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
++	 * already, but it's stuck on the kthreadd_done completion.
++	 */
++	system_state = SYSTEM_SCHEDULING;
++
++	complete(&kthreadd_done);
++
++	/*
++	 * The boot idle thread must execute schedule()
++	 * at least once to get things moving:
++	 */
++	schedule_preempt_disabled();
++	/* Call into cpu_idle with preempt disabled */
++	cpu_startup_entry(CPUHP_ONLINE);
++}
++
++/* Check for early params. */
++static int __init do_early_param(char *param, char *val,
++				 const char *unused, void *arg)
++{
++	const struct obs_kernel_param *p;
++
++	for (p = __setup_start; p < __setup_end; p++) {
++		if ((p->early && parameq(param, p->str)) ||
++		    (strcmp(param, "console") == 0 &&
++		     strcmp(p->str, "earlycon") == 0)
++		) {
++			if (p->setup_func(val) != 0)
++				pr_warn("Malformed early option '%s'\n", param);
++		}
++	}
++	/* We accept everything at this stage. */
++	return 0;
++}
++
++void __init parse_early_options(char *cmdline)
++{
++	parse_args("early options", cmdline, NULL, 0, 0, 0, NULL,
++		   do_early_param);
++}
++
++/* Arch code calls this early on, or if not, just before other parsing. */
++void __init parse_early_param(void)
++{
++	static int done __initdata;
++	static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
++
++	if (done)
++		return;
++
++	/* All fall through to do_early_param. */
++	strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
++	parse_early_options(tmp_cmdline);
++	done = 1;
++}
++
++void __init __weak arch_post_acpi_subsys_init(void) { }
++
++void __init __weak smp_setup_processor_id(void)
++{
++}
++
++# if THREAD_SIZE >= PAGE_SIZE
++void __init __weak thread_stack_cache_init(void)
++{
++}
++#endif
++
++void __init __weak mem_encrypt_init(void) { }
++
++bool initcall_debug;
++core_param(initcall_debug, initcall_debug, bool, 0644);
++
++#ifdef TRACEPOINTS_ENABLED
++static void __init initcall_debug_enable(void);
++#else
++static inline void initcall_debug_enable(void)
++{
++}
++#endif
++
++/*
++ * Set up kernel memory allocators
++ */
++static void __init mm_init(void)
++{
++	/*
++	 * page_ext requires contiguous pages,
++	 * bigger than MAX_ORDER unless SPARSEMEM.
++	 */
++	page_ext_init_flatmem();
++	mem_init();
++	kmem_cache_init();
++	pgtable_init();
++	vmalloc_init();
++	ioremap_huge_init();
++	/* Should be run before the first non-init thread is created */
++	init_espfix_bsp();
++	/* Should be run after espfix64 is set up. */
++	pti_init();
++}
++
++asmlinkage __visible void __init start_kernel(void)
++{
++	char *command_line;
++	char *after_dashes;
++
++	set_task_stack_end_magic(&init_task);
++	smp_setup_processor_id();
++	debug_objects_early_init();
++
++	cgroup_init_early();
++
++	local_irq_disable();
++	early_boot_irqs_disabled = true;
++
++	/*
++	 * Interrupts are still disabled. Do necessary setups, then
++	 * enable them.
++	 */
++	boot_cpu_init();
++	page_address_init();
++	pr_notice("%s", linux_banner);
++	setup_arch(&command_line);
++	/*
++	 * Set up the the initial canary and entropy after arch
++	 * and after adding latent and command line entropy.
++	 */
++	add_latent_entropy();
++	add_device_randomness(command_line, strlen(command_line));
++	boot_init_stack_canary();
++	mm_init_cpumask(&init_mm);
++	setup_command_line(command_line);
++	setup_nr_cpu_ids();
++	setup_per_cpu_areas();
++	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
++	boot_cpu_hotplug_init();
++
++	build_all_zonelists(NULL);
++	page_alloc_init();
++
++	pr_notice("Kernel command line: %s\n", boot_command_line);
++	/* parameters may set static keys */
++	jump_label_init();
++	parse_early_param();
++	after_dashes = parse_args("Booting kernel",
++				  static_command_line, __start___param,
++				  __stop___param - __start___param,
++				  -1, -1, NULL, &unknown_bootoption);
++	if (!IS_ERR_OR_NULL(after_dashes))
++		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
++			   NULL, set_init_arg);
++
++	/*
++	 * These use large bootmem allocations and must precede
++	 * kmem_cache_init()
++	 */
++	setup_log_buf(0);
++	vfs_caches_init_early();
++	sort_main_extable();
++	trap_init();
++	mm_init();
++
++	ftrace_init();
++
++	/* trace_printk can be enabled here */
++	early_trace_init();
++
++	/*
++	 * Set up the scheduler prior starting any interrupts (such as the
++	 * timer interrupt). Full topology setup happens at smp_init()
++	 * time - but meanwhile we still have a functioning scheduler.
++	 */
++	sched_init();
++	/*
++	 * Disable preemption - early bootup scheduling is extremely
++	 * fragile until we cpu_idle() for the first time.
++	 */
++	preempt_disable();
++	if (WARN(!irqs_disabled(),
++		 "Interrupts were enabled *very* early, fixing it\n"))
++		local_irq_disable();
++	radix_tree_init();
++
++	/*
++	 * Set up housekeeping before setting up workqueues to allow the unbound
++	 * workqueue to take non-housekeeping into account.
++	 */
++	housekeeping_init();
++
++	/*
++	 * Allow workqueue creation and work item queueing/cancelling
++	 * early.  Work item execution depends on kthreads and starts after
++	 * workqueue_init().
++	 */
++	workqueue_init_early();
++
++	rcu_init();
++
++	/* Trace events are available after this */
++	trace_init();
++
++	if (initcall_debug)
++		initcall_debug_enable();
++
++	context_tracking_init();
++	/* init some links before init_ISA_irqs() */
++	early_irq_init();
++	init_IRQ();
++	tick_init();
++	rcu_init_nohz();
++	init_timers();
++	hrtimers_init();
++	softirq_init();
++	timekeeping_init();
++	time_init();
++	perf_event_init();
++	profile_init();
++	call_function_init();
++	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
++
++	early_boot_irqs_disabled = false;
++	local_irq_enable();
++
++	kmem_cache_init_late();
++
++	/*
++	 * HACK ALERT! This is early. We're enabling the console before
++	 * we've done PCI setups etc, and console_init() must be aware of
++	 * this. But we do want output early, in case something goes wrong.
++	 */
++	console_init();
++	if (panic_later)
++		panic("Too many boot %s vars at `%s'", panic_later,
++		      panic_param);
++
++	lockdep_init();
++
++	/*
++	 * Need to run this when irqs are enabled, because it wants
++	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
++	 * too:
++	 */
++	locking_selftest();
++
++	/*
++	 * This needs to be called before any devices perform DMA
++	 * operations that might use the SWIOTLB bounce buffers. It will
++	 * mark the bounce buffers as decrypted so that their usage will
++	 * not cause "plain-text" data to be decrypted when accessed.
++	 */
++	mem_encrypt_init();
++
++#ifdef CONFIG_BLK_DEV_INITRD
++	if (initrd_start && !initrd_below_start_ok &&
++	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
++		pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
++		    page_to_pfn(virt_to_page((void *)initrd_start)),
++		    min_low_pfn);
++		initrd_start = 0;
++	}
++#endif
++	kmemleak_init();
++	debug_objects_mem_init();
++	setup_per_cpu_pageset();
++	numa_policy_init();
++	acpi_early_init();
++	if (late_time_init)
++		late_time_init();
++	sched_clock_init();
++	calibrate_delay();
++	pid_idr_init();
++	anon_vma_init();
++#ifdef CONFIG_X86
++	if (efi_enabled(EFI_RUNTIME_SERVICES))
++		efi_enter_virtual_mode();
++#endif
++	thread_stack_cache_init();
++	cred_init();
++	fork_init();
++	proc_caches_init();
++	uts_ns_init();
++	buffer_init();
++	key_init();
++	security_init();
++	dbg_late_init();
++	vfs_caches_init();
++	pagecache_init();
++	signals_init();
++	seq_file_init();
++	proc_root_init();
++	nsfs_init();
++	cpuset_init();
++	cgroup_init();
++	taskstats_init_early();
++	delayacct_init();
++
++	check_bugs();
++
++	acpi_subsystem_init();
++	arch_post_acpi_subsys_init();
++	sfi_init_late();
++
++	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
++		efi_free_boot_services();
++	}
++
++	/* Do the rest non-__init'ed, we're now alive */
++	rest_init();
++}
++
++/* Call all constructor functions linked into the kernel. */
++static void __init do_ctors(void)
++{
++#ifdef CONFIG_CONSTRUCTORS
++	ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
++
++	for (; fn < (ctor_fn_t *) __ctors_end; fn++)
++		(*fn)();
++#endif
++}
++
++#ifdef CONFIG_KALLSYMS
++struct blacklist_entry {
++	struct list_head next;
++	char *buf;
++};
++
++static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
++
++static int __init initcall_blacklist(char *str)
++{
++	char *str_entry;
++	struct blacklist_entry *entry;
++
++	/* str argument is a comma-separated list of functions */
++	do {
++		str_entry = strsep(&str, ",");
++		if (str_entry) {
++			pr_debug("blacklisting initcall %s\n", str_entry);
++			entry = alloc_bootmem(sizeof(*entry));
++			entry->buf = alloc_bootmem(strlen(str_entry) + 1);
++			strcpy(entry->buf, str_entry);
++			list_add(&entry->next, &blacklisted_initcalls);
++		}
++	} while (str_entry);
++
++	return 0;
++}
++
++static bool __init_or_module initcall_blacklisted(initcall_t fn)
++{
++	struct blacklist_entry *entry;
++	char fn_name[KSYM_SYMBOL_LEN];
++	unsigned long addr;
++
++	if (list_empty(&blacklisted_initcalls))
++		return false;
++
++	addr = (unsigned long) dereference_function_descriptor(fn);
++	sprint_symbol_no_offset(fn_name, addr);
++
++	/*
++	 * fn will be "function_name [module_name]" where [module_name] is not
++	 * displayed for built-in init functions.  Strip off the [module_name].
++	 */
++	strreplace(fn_name, ' ', '\0');
++
++	list_for_each_entry(entry, &blacklisted_initcalls, next) {
++		if (!strcmp(fn_name, entry->buf)) {
++			pr_debug("initcall %s blacklisted\n", fn_name);
++			return true;
++		}
++	}
++
++	return false;
++}
++#else
++static int __init initcall_blacklist(char *str)
++{
++	pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
++	return 0;
++}
++
++static bool __init_or_module initcall_blacklisted(initcall_t fn)
++{
++	return false;
++}
++#endif
++__setup("initcall_blacklist=", initcall_blacklist);
++
++static __init_or_module void
++trace_initcall_start_cb(void *data, initcall_t fn)
++{
++	ktime_t *calltime = (ktime_t *)data;
++
++	printk(KERN_DEBUG "calling  %pF @ %i\n", fn, task_pid_nr(current));
++	*calltime = ktime_get();
++}
++
++static __init_or_module void
++trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
++{
++	ktime_t *calltime = (ktime_t *)data;
++	ktime_t delta, rettime;
++	unsigned long long duration;
++
++	rettime = ktime_get();
++	delta = ktime_sub(rettime, *calltime);
++	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
++	printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
++		 fn, ret, duration);
++}
++
++static ktime_t initcall_calltime;
++
++#ifdef TRACEPOINTS_ENABLED
++static void __init initcall_debug_enable(void)
++{
++	int ret;
++
++	ret = register_trace_initcall_start(trace_initcall_start_cb,
++					    &initcall_calltime);
++	ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
++					      &initcall_calltime);
++	WARN(ret, "Failed to register initcall tracepoints\n");
++}
++# define do_trace_initcall_start	trace_initcall_start
++# define do_trace_initcall_finish	trace_initcall_finish
++#else
++static inline void do_trace_initcall_start(initcall_t fn)
++{
++	if (!initcall_debug)
++		return;
++	trace_initcall_start_cb(&initcall_calltime, fn);
++}
++static inline void do_trace_initcall_finish(initcall_t fn, int ret)
++{
++	if (!initcall_debug)
++		return;
++	trace_initcall_finish_cb(&initcall_calltime, fn, ret);
++}
++#endif /* !TRACEPOINTS_ENABLED */
++
++int __init_or_module do_one_initcall(initcall_t fn)
++{
++	int count = preempt_count();
++	char msgbuf[64];
++	int ret;
++
++	if (initcall_blacklisted(fn))
++		return -EPERM;
++
++	do_trace_initcall_start(fn);
++	ret = fn();
++	do_trace_initcall_finish(fn, ret);
++
++	msgbuf[0] = 0;
++
++	if (preempt_count() != count) {
++		sprintf(msgbuf, "preemption imbalance ");
++		preempt_count_set(count);
++	}
++	if (irqs_disabled()) {
++		strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
++		local_irq_enable();
++	}
++	WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
++
++	add_latent_entropy();
++	return ret;
++}
++
++
++extern initcall_entry_t __initcall_start[];
++extern initcall_entry_t __initcall0_start[];
++extern initcall_entry_t __initcall1_start[];
++extern initcall_entry_t __initcall2_start[];
++extern initcall_entry_t __initcall3_start[];
++extern initcall_entry_t __initcall4_start[];
++extern initcall_entry_t __initcall5_start[];
++extern initcall_entry_t __initcall6_start[];
++extern initcall_entry_t __initcall7_start[];
++extern initcall_entry_t __initcall_end[];
++
++static initcall_entry_t *initcall_levels[] __initdata = {
++	__initcall0_start,
++	__initcall1_start,
++	__initcall2_start,
++	__initcall3_start,
++	__initcall4_start,
++	__initcall5_start,
++	__initcall6_start,
++	__initcall7_start,
++	__initcall_end,
++};
++
++/* Keep these in sync with initcalls in include/linux/init.h */
++static char *initcall_level_names[] __initdata = {
++	"pure",
++	"core",
++	"postcore",
++	"arch",
++	"subsys",
++	"fs",
++	"device",
++	"late",
++};
++
++static void __init do_initcall_level(int level)
++{
++	initcall_entry_t *fn;
++
++	strcpy(initcall_command_line, saved_command_line);
++	parse_args(initcall_level_names[level],
++		   initcall_command_line, __start___param,
++		   __stop___param - __start___param,
++		   level, level,
++		   NULL, &repair_env_string);
++
++	trace_initcall_level(initcall_level_names[level]);
++	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
++		do_one_initcall(initcall_from_entry(fn));
++}
++
++static void __init do_initcalls(void)
++{
++	int level;
++
++	for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
++		do_initcall_level(level);
++}
++
++/*
++ * Ok, the machine is now initialized. None of the devices
++ * have been touched yet, but the CPU subsystem is up and
++ * running, and memory and process management works.
++ *
++ * Now we can finally start doing some real work..
++ */
++static void __init do_basic_setup(void)
++{
++	cpuset_init_smp();
++	shmem_init();
++	driver_init();
++	init_irq_proc();
++	do_ctors();
++	usermodehelper_enable();
++	do_initcalls();
++}
++
++static void __init do_pre_smp_initcalls(void)
++{
++	initcall_entry_t *fn;
++
++	trace_initcall_level("early");
++	for (fn = __initcall_start; fn < __initcall0_start; fn++)
++		do_one_initcall(initcall_from_entry(fn));
++}
++
++/*
++ * This function requests modules which should be loaded by default and is
++ * called twice right after initrd is mounted and right before init is
++ * exec'd.  If such modules are on either initrd or rootfs, they will be
++ * loaded before control is passed to userland.
++ */
++void __init load_default_modules(void)
++{
++	load_default_elevator_module();
++}
++
++static int run_init_process(const char *init_filename)
++{
++	argv_init[0] = init_filename;
++	pr_info("Run %s as init process\n", init_filename);
++	return do_execve(getname_kernel(init_filename),
++		(const char __user *const __user *)argv_init,
++		(const char __user *const __user *)envp_init);
++}
++
++static int try_to_run_init_process(const char *init_filename)
++{
++	int ret;
++
++	ret = run_init_process(init_filename);
++
++	if (ret && ret != -ENOENT) {
++		pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
++		       init_filename, ret);
++	}
++
++	return ret;
++}
++
++static noinline void __init kernel_init_freeable(void);
++
++#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
++bool rodata_enabled __ro_after_init = true;
++static int __init set_debug_rodata(char *str)
++{
++	return strtobool(str, &rodata_enabled);
++}
++__setup("rodata=", set_debug_rodata);
++#endif
++
++#ifdef CONFIG_STRICT_KERNEL_RWX
++static void mark_readonly(void)
++{
++	if (rodata_enabled) {
++		/*
++		 * load_module() results in W+X mappings, which are cleaned up
++		 * with call_rcu_sched().  Let's make sure that queued work is
++		 * flushed so that we don't hit false positives looking for
++		 * insecure pages which are W+X.
++		 */
++		rcu_barrier_sched();
++		mark_rodata_ro();
++		rodata_test();
++	} else
++		pr_info("Kernel memory protection disabled.\n");
++}
++#else
++static inline void mark_readonly(void)
++{
++	pr_warn("This architecture does not have kernel memory protection.\n");
++}
++#endif
++
++static int __ref kernel_init(void *unused)
++{
++	int ret;
++
++	kernel_init_freeable();
++	/* need to finish all async __init code before freeing the memory */
++	async_synchronize_full();
++	ftrace_free_init_mem();
++	jump_label_invalidate_initmem();
++	free_initmem();
++	mark_readonly();
++
++	/*
++	 * Kernel mappings are now finalized - update the userspace page-table
++	 * to finalize PTI.
++	 */
++	pti_finalize();
++
++	system_state = SYSTEM_RUNNING;
++	numa_default_policy();
++
++	rcu_end_inkernel_boot();
++
++	if (ramdisk_execute_command) {
++		ret = run_init_process(ramdisk_execute_command);
++		if (!ret)
++			return 0;
++		pr_err("Failed to execute %s (error %d)\n",
++		       ramdisk_execute_command, ret);
++	}
++
++	/*
++	 * We try each of these until one succeeds.
++	 *
++	 * The Bourne shell can be used instead of init if we are
++	 * trying to recover a really broken machine.
++	 */
++	if (execute_command) {
++		ret = run_init_process(execute_command);
++		if (!ret)
++			return 0;
++		panic("Requested init %s failed (error %d).",
++		      execute_command, ret);
++	}
++	if (!try_to_run_init_process("/sbin/init") ||
++	    !try_to_run_init_process("/etc/init") ||
++	    !try_to_run_init_process("/bin/init") ||
++	    !try_to_run_init_process("/bin/sh"))
++		return 0;
++
++	panic("No working init found.  Try passing init= option to kernel. "
++	      "See Linux Documentation/admin-guide/init.rst for guidance.");
++}
++
++static noinline void __init kernel_init_freeable(void)
++{
++	/*
++	 * Wait until kthreadd is all set-up.
++	 */
++	wait_for_completion(&kthreadd_done);
++
++	/* Now the scheduler is fully set up and can do blocking allocations */
++	gfp_allowed_mask = __GFP_BITS_MASK;
++
++	/*
++	 * init can allocate pages on any node
++	 */
++	set_mems_allowed(node_states[N_MEMORY]);
++
++	cad_pid = task_pid(current);
++
++	smp_prepare_cpus(setup_max_cpus);
++
++	workqueue_init();
++
++	init_mm_internals();
++
++	do_pre_smp_initcalls();
++
++	smp_init();
++	sched_init_smp();
++	ktask_init();
++
++	page_alloc_init_late();
++	/* Initialize page ext after all struct pages are initialized. */
++	page_ext_init();
++
++	do_basic_setup();
++
++	lockup_detector_init();
++
++	/* Open the /dev/console on the rootfs, this should never fail */
++	if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
++		pr_err("Warning: unable to open an initial console.\n");
++
++	(void) ksys_dup(0);
++	(void) ksys_dup(0);
++	/*
++	 * check if there is an early userspace init.  If yes, let it do all
++	 * the work
++	 */
++
++	if (!ramdisk_execute_command)
++		ramdisk_execute_command = "/init";
++
++	if (ksys_access((const char __user *)
++			ramdisk_execute_command, 0) != 0) {
++		ramdisk_execute_command = NULL;
++		prepare_namespace();
++	}
++
++	/*
++	 * Ok, we have completed the initial bootup, and
++	 * we're essentially up and running. Get rid of the
++	 * initmem segments and start the user-mode stuff..
++	 *
++	 * rootfs is available now, try loading the public keys
++	 * and default modules
++	 */
++
++	integrity_load_keys();
++	load_default_modules();
++}
+diff -uprN kernel/init/main.c.rej kernel_new/init/main.c.rej
+--- kernel/init/main.c.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/init/main.c.rej	2021-04-01 18:28:07.804863121 +0800
+@@ -0,0 +1,14 @@
++--- init/main.c	2019-12-18 03:36:04.000000000 +0800
+++++ init/main.c	2021-03-22 09:21:43.215415405 +0800
++@@ -643,6 +645,11 @@ asmlinkage __visible void __init start_k
++ 	softirq_init();
++ 	timekeeping_init();
++ 	time_init();
+++	/*
+++	 * We need to wait for the interrupt and time subsystems to be
+++	 * initialized before enabling the pipeline.
+++	 */
+++	__ipipe_init();
++ 	printk_safe_init();
++ 	perf_event_init();
++ 	profile_init();
+diff -uprN kernel/kernel/context_tracking.c kernel_new/kernel/context_tracking.c
+--- kernel/kernel/context_tracking.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/context_tracking.c	2021-04-01 18:28:07.804863121 +0800
+@@ -113,7 +113,7 @@ void context_tracking_enter(enum ctx_sta
+ 	 * helpers are enough to protect RCU uses inside the exception. So
+ 	 * just return immediately if we detect we are in an IRQ.
+ 	 */
+-	if (in_interrupt())
++	if (!ipipe_root_p || in_interrupt())
+ 		return;
+ 
+ 	local_irq_save(flags);
+@@ -169,7 +169,7 @@ void context_tracking_exit(enum ctx_stat
+ {
+ 	unsigned long flags;
+ 
+-	if (in_interrupt())
++	if (!ipipe_root_p || in_interrupt())
+ 		return;
+ 
+ 	local_irq_save(flags);
+diff -uprN kernel/kernel/debug/debug_core.c kernel_new/kernel/debug/debug_core.c
+--- kernel/kernel/debug/debug_core.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/debug/debug_core.c	2021-04-01 18:28:07.804863121 +0800
+@@ -119,8 +119,8 @@ static struct kgdb_bkpt		kgdb_break[KGDB
+  */
+ atomic_t			kgdb_active = ATOMIC_INIT(-1);
+ EXPORT_SYMBOL_GPL(kgdb_active);
+-static DEFINE_RAW_SPINLOCK(dbg_master_lock);
+-static DEFINE_RAW_SPINLOCK(dbg_slave_lock);
++static IPIPE_DEFINE_RAW_SPINLOCK(dbg_master_lock);
++static IPIPE_DEFINE_RAW_SPINLOCK(dbg_slave_lock);
+ 
+ /*
+  * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
+@@ -461,7 +461,9 @@ static int kgdb_reenter_check(struct kgd
+ static void dbg_touch_watchdogs(void)
+ {
+ 	touch_softlockup_watchdog_sync();
++#ifndef CONFIG_IPIPE
+ 	clocksource_touch_watchdog();
++#endif
+ 	rcu_cpu_stall_reset();
+ }
+ 
+@@ -492,7 +494,7 @@ acquirelock:
+ 	 * Interrupts will be restored by the 'trap return' code, except when
+ 	 * single stepping.
+ 	 */
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 
+ 	cpu = ks->cpu;
+ 	kgdb_info[cpu].debuggerinfo = regs;
+@@ -543,7 +545,7 @@ return_normal:
+ 			smp_mb__before_atomic();
+ 			atomic_dec(&slaves_in_kgdb);
+ 			dbg_touch_watchdogs();
+-			local_irq_restore(flags);
++			hard_local_irq_restore(flags);
+ 			return 0;
+ 		}
+ 		cpu_relax();
+@@ -561,7 +563,7 @@ return_normal:
+ 		atomic_set(&kgdb_active, -1);
+ 		raw_spin_unlock(&dbg_master_lock);
+ 		dbg_touch_watchdogs();
+-		local_irq_restore(flags);
++		hard_local_irq_restore(flags);
+ 
+ 		goto acquirelock;
+ 	}
+@@ -680,7 +682,7 @@ kgdb_restore:
+ 	atomic_set(&kgdb_active, -1);
+ 	raw_spin_unlock(&dbg_master_lock);
+ 	dbg_touch_watchdogs();
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ 
+ 	return kgdb_info[cpu].ret_state;
+ }
+@@ -799,9 +801,9 @@ static void kgdb_console_write(struct co
+ 	if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode)
+ 		return;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	gdbstub_msg_write(s, count);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static struct console kgdbcons = {
+diff -uprN kernel/kernel/exit.c kernel_new/kernel/exit.c
+--- kernel/kernel/exit.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/exit.c	2021-04-01 18:28:07.805863120 +0800
+@@ -56,6 +56,7 @@
+ #include <trace/events/sched.h>
+ #include <linux/hw_breakpoint.h>
+ #include <linux/oom.h>
++#include <linux/ipipe.h>
+ #include <linux/writeback.h>
+ #include <linux/shm.h>
+ #include <linux/kcov.h>
+@@ -827,6 +828,7 @@ void __noreturn do_exit(long code)
+ 	 */
+ 	raw_spin_lock_irq(&tsk->pi_lock);
+ 	raw_spin_unlock_irq(&tsk->pi_lock);
++	__ipipe_report_exit(tsk);
+ 
+ 	if (unlikely(in_atomic())) {
+ 		pr_info("note: %s[%d] exited with preempt_count %d\n",
+diff -uprN kernel/kernel/exit.c.orig kernel_new/kernel/exit.c.orig
+--- kernel/kernel/exit.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/exit.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1771 @@
++/*
++ *  linux/kernel/exit.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ */
++
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/sched/autogroup.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/stat.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/sched/cputime.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/capability.h>
++#include <linux/completion.h>
++#include <linux/personality.h>
++#include <linux/tty.h>
++#include <linux/iocontext.h>
++#include <linux/key.h>
++#include <linux/cpu.h>
++#include <linux/acct.h>
++#include <linux/tsacct_kern.h>
++#include <linux/file.h>
++#include <linux/fdtable.h>
++#include <linux/freezer.h>
++#include <linux/binfmts.h>
++#include <linux/nsproxy.h>
++#include <linux/pid_namespace.h>
++#include <linux/ptrace.h>
++#include <linux/profile.h>
++#include <linux/mount.h>
++#include <linux/proc_fs.h>
++#include <linux/kthread.h>
++#include <linux/mempolicy.h>
++#include <linux/taskstats_kern.h>
++#include <linux/delayacct.h>
++#include <linux/cgroup.h>
++#include <linux/syscalls.h>
++#include <linux/signal.h>
++#include <linux/posix-timers.h>
++#include <linux/cn_proc.h>
++#include <linux/mutex.h>
++#include <linux/futex.h>
++#include <linux/pipe_fs_i.h>
++#include <linux/audit.h> /* for audit_free() */
++#include <linux/resource.h>
++#include <linux/blkdev.h>
++#include <linux/task_io_accounting_ops.h>
++#include <linux/tracehook.h>
++#include <linux/fs_struct.h>
++#include <linux/init_task.h>
++#include <linux/perf_event.h>
++#include <trace/events/sched.h>
++#include <linux/hw_breakpoint.h>
++#include <linux/oom.h>
++#include <linux/writeback.h>
++#include <linux/shm.h>
++#include <linux/kcov.h>
++#include <linux/random.h>
++#include <linux/rcuwait.h>
++#include <linux/compat.h>
++
++#include <linux/uaccess.h>
++#include <asm/unistd.h>
++#include <asm/pgtable.h>
++#include <asm/mmu_context.h>
++
++static void __unhash_process(struct task_struct *p, bool group_dead)
++{
++	nr_threads--;
++	detach_pid(p, PIDTYPE_PID);
++	if (group_dead) {
++		detach_pid(p, PIDTYPE_TGID);
++		detach_pid(p, PIDTYPE_PGID);
++		detach_pid(p, PIDTYPE_SID);
++
++		list_del_rcu(&p->tasks);
++		list_del_init(&p->sibling);
++		__this_cpu_dec(process_counts);
++	}
++	list_del_rcu(&p->thread_group);
++	list_del_rcu(&p->thread_node);
++}
++
++/*
++ * This function expects the tasklist_lock write-locked.
++ */
++static void __exit_signal(struct task_struct *tsk)
++{
++	struct signal_struct *sig = tsk->signal;
++	bool group_dead = thread_group_leader(tsk);
++	struct sighand_struct *sighand;
++	struct tty_struct *uninitialized_var(tty);
++	u64 utime, stime;
++
++	sighand = rcu_dereference_check(tsk->sighand,
++					lockdep_tasklist_lock_is_held());
++	spin_lock(&sighand->siglock);
++
++#ifdef CONFIG_POSIX_TIMERS
++	posix_cpu_timers_exit(tsk);
++	if (group_dead) {
++		posix_cpu_timers_exit_group(tsk);
++	} else {
++		/*
++		 * This can only happen if the caller is de_thread().
++		 * FIXME: this is the temporary hack, we should teach
++		 * posix-cpu-timers to handle this case correctly.
++		 */
++		if (unlikely(has_group_leader_pid(tsk)))
++			posix_cpu_timers_exit_group(tsk);
++	}
++#endif
++
++	if (group_dead) {
++		tty = sig->tty;
++		sig->tty = NULL;
++	} else {
++		/*
++		 * If there is any task waiting for the group exit
++		 * then notify it:
++		 */
++		if (sig->notify_count > 0 && !--sig->notify_count)
++			wake_up_process(sig->group_exit_task);
++
++		if (tsk == sig->curr_target)
++			sig->curr_target = next_thread(tsk);
++	}
++
++	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
++			      sizeof(unsigned long long));
++
++	/*
++	 * Accumulate here the counters for all threads as they die. We could
++	 * skip the group leader because it is the last user of signal_struct,
++	 * but we want to avoid the race with thread_group_cputime() which can
++	 * see the empty ->thread_head list.
++	 */
++	task_cputime(tsk, &utime, &stime);
++	write_seqlock(&sig->stats_lock);
++	sig->utime += utime;
++	sig->stime += stime;
++	sig->gtime += task_gtime(tsk);
++	sig->min_flt += tsk->min_flt;
++	sig->maj_flt += tsk->maj_flt;
++	sig->nvcsw += tsk->nvcsw;
++	sig->nivcsw += tsk->nivcsw;
++	sig->inblock += task_io_get_inblock(tsk);
++	sig->oublock += task_io_get_oublock(tsk);
++	task_io_accounting_add(&sig->ioac, &tsk->ioac);
++	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
++	sig->nr_threads--;
++	__unhash_process(tsk, group_dead);
++	write_sequnlock(&sig->stats_lock);
++
++	/*
++	 * Do this under ->siglock, we can race with another thread
++	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
++	 */
++	flush_sigqueue(&tsk->pending);
++	tsk->sighand = NULL;
++	spin_unlock(&sighand->siglock);
++
++	__cleanup_sighand(sighand);
++	clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
++	if (group_dead) {
++		flush_sigqueue(&sig->shared_pending);
++		tty_kref_put(tty);
++	}
++}
++
++static void delayed_put_task_struct(struct rcu_head *rhp)
++{
++	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
++
++	perf_event_delayed_put(tsk);
++	trace_sched_process_free(tsk);
++	put_task_struct(tsk);
++}
++
++
++void release_task(struct task_struct *p)
++{
++	struct task_struct *leader;
++	int zap_leader;
++repeat:
++	/* don't need to get the RCU readlock here - the process is dead and
++	 * can't be modifying its own credentials. But shut RCU-lockdep up */
++	rcu_read_lock();
++	atomic_dec(&__task_cred(p)->user->processes);
++	rcu_read_unlock();
++
++	proc_flush_task(p);
++	cgroup_release(p);
++
++	write_lock_irq(&tasklist_lock);
++	ptrace_release_task(p);
++	__exit_signal(p);
++
++	/*
++	 * If we are the last non-leader member of the thread
++	 * group, and the leader is zombie, then notify the
++	 * group leader's parent process. (if it wants notification.)
++	 */
++	zap_leader = 0;
++	leader = p->group_leader;
++	if (leader != p && thread_group_empty(leader)
++			&& leader->exit_state == EXIT_ZOMBIE) {
++		/*
++		 * If we were the last child thread and the leader has
++		 * exited already, and the leader's parent ignores SIGCHLD,
++		 * then we are the one who should release the leader.
++		 */
++		zap_leader = do_notify_parent(leader, leader->exit_signal);
++		if (zap_leader)
++			leader->exit_state = EXIT_DEAD;
++	}
++
++	write_unlock_irq(&tasklist_lock);
++	release_thread(p);
++	call_rcu(&p->rcu, delayed_put_task_struct);
++
++	p = leader;
++	if (unlikely(zap_leader))
++		goto repeat;
++}
++
++/*
++ * Note that if this function returns a valid task_struct pointer (!NULL)
++ * task->usage must remain >0 for the duration of the RCU critical section.
++ */
++struct task_struct *task_rcu_dereference(struct task_struct **ptask)
++{
++	struct sighand_struct *sighand;
++	struct task_struct *task;
++
++	/*
++	 * We need to verify that release_task() was not called and thus
++	 * delayed_put_task_struct() can't run and drop the last reference
++	 * before rcu_read_unlock(). We check task->sighand != NULL,
++	 * but we can read the already freed and reused memory.
++	 */
++retry:
++	task = rcu_dereference(*ptask);
++	if (!task)
++		return NULL;
++
++	probe_kernel_address(&task->sighand, sighand);
++
++	/*
++	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
++	 * was already freed we can not miss the preceding update of this
++	 * pointer.
++	 */
++	smp_rmb();
++	if (unlikely(task != READ_ONCE(*ptask)))
++		goto retry;
++
++	/*
++	 * We've re-checked that "task == *ptask", now we have two different
++	 * cases:
++	 *
++	 * 1. This is actually the same task/task_struct. In this case
++	 *    sighand != NULL tells us it is still alive.
++	 *
++	 * 2. This is another task which got the same memory for task_struct.
++	 *    We can't know this of course, and we can not trust
++	 *    sighand != NULL.
++	 *
++	 *    In this case we actually return a random value, but this is
++	 *    correct.
++	 *
++	 *    If we return NULL - we can pretend that we actually noticed that
++	 *    *ptask was updated when the previous task has exited. Or pretend
++	 *    that probe_slab_address(&sighand) reads NULL.
++	 *
++	 *    If we return the new task (because sighand is not NULL for any
++	 *    reason) - this is fine too. This (new) task can't go away before
++	 *    another gp pass.
++	 *
++	 *    And note: We could even eliminate the false positive if re-read
++	 *    task->sighand once again to avoid the falsely NULL. But this case
++	 *    is very unlikely so we don't care.
++	 */
++	if (!sighand)
++		return NULL;
++
++	return task;
++}
++
++void rcuwait_wake_up(struct rcuwait *w)
++{
++	struct task_struct *task;
++
++	rcu_read_lock();
++
++	/*
++	 * Order condition vs @task, such that everything prior to the load
++	 * of @task is visible. This is the condition as to why the user called
++	 * rcuwait_trywake() in the first place. Pairs with set_current_state()
++	 * barrier (A) in rcuwait_wait_event().
++	 *
++	 *    WAIT                WAKE
++	 *    [S] tsk = current	  [S] cond = true
++	 *        MB (A)	      MB (B)
++	 *    [L] cond		  [L] tsk
++	 */
++	smp_mb(); /* (B) */
++
++	/*
++	 * Avoid using task_rcu_dereference() magic as long as we are careful,
++	 * see comment in rcuwait_wait_event() regarding ->exit_state.
++	 */
++	task = rcu_dereference(w->task);
++	if (task)
++		wake_up_process(task);
++	rcu_read_unlock();
++}
++
++/*
++ * Determine if a process group is "orphaned", according to the POSIX
++ * definition in 2.2.2.52.  Orphaned process groups are not to be affected
++ * by terminal-generated stop signals.  Newly orphaned process groups are
++ * to receive a SIGHUP and a SIGCONT.
++ *
++ * "I ask you, have you ever known what it is to be an orphan?"
++ */
++static int will_become_orphaned_pgrp(struct pid *pgrp,
++					struct task_struct *ignored_task)
++{
++	struct task_struct *p;
++
++	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
++		if ((p == ignored_task) ||
++		    (p->exit_state && thread_group_empty(p)) ||
++		    is_global_init(p->real_parent))
++			continue;
++
++		if (task_pgrp(p->real_parent) != pgrp &&
++		    task_session(p->real_parent) == task_session(p))
++			return 0;
++	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
++
++	return 1;
++}
++
++int is_current_pgrp_orphaned(void)
++{
++	int retval;
++
++	read_lock(&tasklist_lock);
++	retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
++	read_unlock(&tasklist_lock);
++
++	return retval;
++}
++
++static bool has_stopped_jobs(struct pid *pgrp)
++{
++	struct task_struct *p;
++
++	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
++		if (p->signal->flags & SIGNAL_STOP_STOPPED)
++			return true;
++	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
++
++	return false;
++}
++
++/*
++ * Check to see if any process groups have become orphaned as
++ * a result of our exiting, and if they have any stopped jobs,
++ * send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
++ */
++static void
++kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
++{
++	struct pid *pgrp = task_pgrp(tsk);
++	struct task_struct *ignored_task = tsk;
++
++	if (!parent)
++		/* exit: our father is in a different pgrp than
++		 * we are and we were the only connection outside.
++		 */
++		parent = tsk->real_parent;
++	else
++		/* reparent: our child is in a different pgrp than
++		 * we are, and it was the only connection outside.
++		 */
++		ignored_task = NULL;
++
++	if (task_pgrp(parent) != pgrp &&
++	    task_session(parent) == task_session(tsk) &&
++	    will_become_orphaned_pgrp(pgrp, ignored_task) &&
++	    has_stopped_jobs(pgrp)) {
++		__kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
++		__kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
++	}
++}
++
++#ifdef CONFIG_MEMCG
++/*
++ * A task is exiting.   If it owned this mm, find a new owner for the mm.
++ */
++void mm_update_next_owner(struct mm_struct *mm)
++{
++	struct task_struct *c, *g, *p = current;
++
++retry:
++	/*
++	 * If the exiting or execing task is not the owner, it's
++	 * someone else's problem.
++	 */
++	if (mm->owner != p)
++		return;
++	/*
++	 * The current owner is exiting/execing and there are no other
++	 * candidates.  Do not leave the mm pointing to a possibly
++	 * freed task structure.
++	 */
++	if (atomic_read(&mm->mm_users) <= 1) {
++		WRITE_ONCE(mm->owner, NULL);
++		return;
++	}
++
++	read_lock(&tasklist_lock);
++	/*
++	 * Search in the children
++	 */
++	list_for_each_entry(c, &p->children, sibling) {
++		if (c->mm == mm)
++			goto assign_new_owner;
++	}
++
++	/*
++	 * Search in the siblings
++	 */
++	list_for_each_entry(c, &p->real_parent->children, sibling) {
++		if (c->mm == mm)
++			goto assign_new_owner;
++	}
++
++	/*
++	 * Search through everything else, we should not get here often.
++	 */
++	for_each_process(g) {
++		if (g->flags & PF_KTHREAD)
++			continue;
++		for_each_thread(g, c) {
++			if (c->mm == mm)
++				goto assign_new_owner;
++			if (c->mm)
++				break;
++		}
++	}
++	read_unlock(&tasklist_lock);
++	/*
++	 * We found no owner yet mm_users > 1: this implies that we are
++	 * most likely racing with swapoff (try_to_unuse()) or /proc or
++	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
++	 */
++	WRITE_ONCE(mm->owner, NULL);
++	return;
++
++assign_new_owner:
++	BUG_ON(c == p);
++	get_task_struct(c);
++	/*
++	 * The task_lock protects c->mm from changing.
++	 * We always want mm->owner->mm == mm
++	 */
++	task_lock(c);
++	/*
++	 * Delay read_unlock() till we have the task_lock()
++	 * to ensure that c does not slip away underneath us
++	 */
++	read_unlock(&tasklist_lock);
++	if (c->mm != mm) {
++		task_unlock(c);
++		put_task_struct(c);
++		goto retry;
++	}
++	WRITE_ONCE(mm->owner, c);
++	task_unlock(c);
++	put_task_struct(c);
++}
++#endif /* CONFIG_MEMCG */
++
++/*
++ * Turn us into a lazy TLB process if we
++ * aren't already..
++ */
++static void exit_mm(void)
++{
++	struct mm_struct *mm = current->mm;
++	struct core_state *core_state;
++
++	mm_release(current, mm);
++	if (!mm)
++		return;
++	sync_mm_rss(mm);
++	/*
++	 * Serialize with any possible pending coredump.
++	 * We must hold mmap_sem around checking core_state
++	 * and clearing tsk->mm.  The core-inducing thread
++	 * will increment ->nr_threads for each thread in the
++	 * group with ->mm != NULL.
++	 */
++	down_read(&mm->mmap_sem);
++	core_state = mm->core_state;
++	if (core_state) {
++		struct core_thread self;
++
++		up_read(&mm->mmap_sem);
++
++		self.task = current;
++		self.next = xchg(&core_state->dumper.next, &self);
++		/*
++		 * Implies mb(), the result of xchg() must be visible
++		 * to core_state->dumper.
++		 */
++		if (atomic_dec_and_test(&core_state->nr_threads))
++			complete(&core_state->startup);
++
++		for (;;) {
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			if (!self.task) /* see coredump_finish() */
++				break;
++			freezable_schedule();
++		}
++		__set_current_state(TASK_RUNNING);
++		down_read(&mm->mmap_sem);
++	}
++	mmgrab(mm);
++	BUG_ON(mm != current->active_mm);
++	/* more a memory barrier than a real lock */
++	task_lock(current);
++	current->mm = NULL;
++	up_read(&mm->mmap_sem);
++	enter_lazy_tlb(mm, current);
++	task_unlock(current);
++	mm_update_next_owner(mm);
++	mmput(mm);
++	if (test_thread_flag(TIF_MEMDIE))
++		exit_oom_victim();
++}
++
++static struct task_struct *find_alive_thread(struct task_struct *p)
++{
++	struct task_struct *t;
++
++	for_each_thread(p, t) {
++		if (!(t->flags & PF_EXITING))
++			return t;
++	}
++	return NULL;
++}
++
++static struct task_struct *find_child_reaper(struct task_struct *father,
++						struct list_head *dead)
++	__releases(&tasklist_lock)
++	__acquires(&tasklist_lock)
++{
++	struct pid_namespace *pid_ns = task_active_pid_ns(father);
++	struct task_struct *reaper = pid_ns->child_reaper;
++	struct task_struct *p, *n;
++
++	if (likely(reaper != father))
++		return reaper;
++
++	reaper = find_alive_thread(father);
++	if (reaper) {
++		pid_ns->child_reaper = reaper;
++		return reaper;
++	}
++
++	write_unlock_irq(&tasklist_lock);
++
++	list_for_each_entry_safe(p, n, dead, ptrace_entry) {
++		list_del_init(&p->ptrace_entry);
++		release_task(p);
++	}
++
++	zap_pid_ns_processes(pid_ns);
++	write_lock_irq(&tasklist_lock);
++
++	return father;
++}
++
++/*
++ * When we die, we re-parent all our children, and try to:
++ * 1. give them to another thread in our thread group, if such a member exists
++ * 2. give it to the first ancestor process which prctl'd itself as a
++ *    child_subreaper for its children (like a service manager)
++ * 3. give it to the init process (PID 1) in our pid namespace
++ */
++static struct task_struct *find_new_reaper(struct task_struct *father,
++					   struct task_struct *child_reaper)
++{
++	struct task_struct *thread, *reaper;
++
++	thread = find_alive_thread(father);
++	if (thread)
++		return thread;
++
++	if (father->signal->has_child_subreaper) {
++		unsigned int ns_level = task_pid(father)->level;
++		/*
++		 * Find the first ->is_child_subreaper ancestor in our pid_ns.
++		 * We can't check reaper != child_reaper to ensure we do not
++		 * cross the namespaces, the exiting parent could be injected
++		 * by setns() + fork().
++		 * We check pid->level, this is slightly more efficient than
++		 * task_active_pid_ns(reaper) != task_active_pid_ns(father).
++		 */
++		for (reaper = father->real_parent;
++		     task_pid(reaper)->level == ns_level;
++		     reaper = reaper->real_parent) {
++			if (reaper == &init_task)
++				break;
++			if (!reaper->signal->is_child_subreaper)
++				continue;
++			thread = find_alive_thread(reaper);
++			if (thread)
++				return thread;
++		}
++	}
++
++	return child_reaper;
++}
++
++/*
++* Any that need to be release_task'd are put on the @dead list.
++ */
++static void reparent_leader(struct task_struct *father, struct task_struct *p,
++				struct list_head *dead)
++{
++	if (unlikely(p->exit_state == EXIT_DEAD))
++		return;
++
++	/* We don't want people slaying init. */
++	p->exit_signal = SIGCHLD;
++
++	/* If it has exited notify the new parent about this child's death. */
++	if (!p->ptrace &&
++	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
++		if (do_notify_parent(p, p->exit_signal)) {
++			p->exit_state = EXIT_DEAD;
++			list_add(&p->ptrace_entry, dead);
++		}
++	}
++
++	kill_orphaned_pgrp(p, father);
++}
++
++/*
++ * This does two things:
++ *
++ * A.  Make init inherit all the child processes
++ * B.  Check to see if any process groups have become orphaned
++ *	as a result of our exiting, and if they have any stopped
++ *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
++ */
++static void forget_original_parent(struct task_struct *father,
++					struct list_head *dead)
++{
++	struct task_struct *p, *t, *reaper;
++
++	if (unlikely(!list_empty(&father->ptraced)))
++		exit_ptrace(father, dead);
++
++	/* Can drop and reacquire tasklist_lock */
++	reaper = find_child_reaper(father, dead);
++	if (list_empty(&father->children))
++		return;
++
++	reaper = find_new_reaper(father, reaper);
++	list_for_each_entry(p, &father->children, sibling) {
++		for_each_thread(p, t) {
++			t->real_parent = reaper;
++			BUG_ON((!t->ptrace) != (t->parent == father));
++			if (likely(!t->ptrace))
++				t->parent = t->real_parent;
++			if (t->pdeath_signal)
++				group_send_sig_info(t->pdeath_signal,
++						    SEND_SIG_NOINFO, t,
++						    PIDTYPE_TGID);
++		}
++		/*
++		 * If this is a threaded reparent there is no need to
++		 * notify anyone anything has happened.
++		 */
++		if (!same_thread_group(reaper, father))
++			reparent_leader(father, p, dead);
++	}
++	list_splice_tail_init(&father->children, &reaper->children);
++}
++
++/*
++ * Send signals to all our closest relatives so that they know
++ * to properly mourn us..
++ */
++static void exit_notify(struct task_struct *tsk, int group_dead)
++{
++	bool autoreap;
++	struct task_struct *p, *n;
++	LIST_HEAD(dead);
++
++	write_lock_irq(&tasklist_lock);
++	forget_original_parent(tsk, &dead);
++
++	if (group_dead)
++		kill_orphaned_pgrp(tsk->group_leader, NULL);
++
++	if (unlikely(tsk->ptrace)) {
++		int sig = thread_group_leader(tsk) &&
++				thread_group_empty(tsk) &&
++				!ptrace_reparented(tsk) ?
++			tsk->exit_signal : SIGCHLD;
++		autoreap = do_notify_parent(tsk, sig);
++	} else if (thread_group_leader(tsk)) {
++		autoreap = thread_group_empty(tsk) &&
++			do_notify_parent(tsk, tsk->exit_signal);
++	} else {
++		autoreap = true;
++	}
++
++	tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
++	if (tsk->exit_state == EXIT_DEAD)
++		list_add(&tsk->ptrace_entry, &dead);
++
++	/* mt-exec, de_thread() is waiting for group leader */
++	if (unlikely(tsk->signal->notify_count < 0))
++		wake_up_process(tsk->signal->group_exit_task);
++	write_unlock_irq(&tasklist_lock);
++
++	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
++		list_del_init(&p->ptrace_entry);
++		release_task(p);
++	}
++}
++
++#ifdef CONFIG_DEBUG_STACK_USAGE
++static void check_stack_usage(void)
++{
++	static DEFINE_SPINLOCK(low_water_lock);
++	static int lowest_to_date = THREAD_SIZE;
++	unsigned long free;
++
++	free = stack_not_used(current);
++
++	if (free >= lowest_to_date)
++		return;
++
++	spin_lock(&low_water_lock);
++	if (free < lowest_to_date) {
++		pr_info("%s (%d) used greatest stack depth: %lu bytes left\n",
++			current->comm, task_pid_nr(current), free);
++		lowest_to_date = free;
++	}
++	spin_unlock(&low_water_lock);
++}
++#else
++static inline void check_stack_usage(void) {}
++#endif
++
++void __noreturn do_exit(long code)
++{
++	struct task_struct *tsk = current;
++	int group_dead;
++
++	profile_task_exit(tsk);
++	kcov_task_exit(tsk);
++
++	WARN_ON(blk_needs_flush_plug(tsk));
++
++	if (unlikely(in_interrupt()))
++		panic("Aiee, killing interrupt handler!");
++	if (unlikely(!tsk->pid))
++		panic("Attempted to kill the idle task!");
++
++	/*
++	 * If do_exit is called because this processes oopsed, it's possible
++	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
++	 * continuing. Amongst other possible reasons, this is to prevent
++	 * mm_release()->clear_child_tid() from writing to a user-controlled
++	 * kernel address.
++	 */
++	set_fs(USER_DS);
++
++	ptrace_event(PTRACE_EVENT_EXIT, code);
++
++	validate_creds_for_do_exit(tsk);
++
++	/*
++	 * We're taking recursive faults here in do_exit. Safest is to just
++	 * leave this task alone and wait for reboot.
++	 */
++	if (unlikely(tsk->flags & PF_EXITING)) {
++		pr_alert("Fixing recursive fault but reboot is needed!\n");
++		/*
++		 * We can do this unlocked here. The futex code uses
++		 * this flag just to verify whether the pi state
++		 * cleanup has been done or not. In the worst case it
++		 * loops once more. We pretend that the cleanup was
++		 * done as there is no way to return. Either the
++		 * OWNER_DIED bit is set by now or we push the blocked
++		 * task into the wait for ever nirwana as well.
++		 */
++		tsk->flags |= PF_EXITPIDONE;
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++	}
++
++	exit_signals(tsk);  /* sets PF_EXITING */
++	/*
++	 * Ensure that all new tsk->pi_lock acquisitions must observe
++	 * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
++	 */
++	smp_mb();
++	/*
++	 * Ensure that we must observe the pi_state in exit_mm() ->
++	 * mm_release() -> exit_pi_state_list().
++	 */
++	raw_spin_lock_irq(&tsk->pi_lock);
++	raw_spin_unlock_irq(&tsk->pi_lock);
++
++	if (unlikely(in_atomic())) {
++		pr_info("note: %s[%d] exited with preempt_count %d\n",
++			current->comm, task_pid_nr(current),
++			preempt_count());
++		preempt_count_set(PREEMPT_ENABLED);
++	}
++
++	/* sync mm's RSS info before statistics gathering */
++	if (tsk->mm)
++		sync_mm_rss(tsk->mm);
++	acct_update_integrals(tsk);
++	group_dead = atomic_dec_and_test(&tsk->signal->live);
++	if (group_dead) {
++		/*
++		 * If the last thread of global init has exited, panic
++		 * immediately to get a useable coredump.
++		 */
++		if (unlikely(is_global_init(tsk)))
++			panic("Attempted to kill init! exitcode=0x%08x\n",
++				tsk->signal->group_exit_code ?: (int)code);
++
++#ifdef CONFIG_POSIX_TIMERS
++		hrtimer_cancel(&tsk->signal->real_timer);
++		exit_itimers(tsk->signal);
++#endif
++		if (tsk->mm)
++			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
++	}
++	acct_collect(code, group_dead);
++	if (group_dead)
++		tty_audit_exit();
++	audit_free(tsk);
++
++	tsk->exit_code = code;
++	taskstats_exit(tsk, group_dead);
++
++	exit_mm();
++
++	if (group_dead)
++		acct_process();
++	trace_sched_process_exit(tsk);
++
++	exit_sem(tsk);
++	exit_shm(tsk);
++	exit_files(tsk);
++	exit_fs(tsk);
++	if (group_dead)
++		disassociate_ctty(1);
++	exit_task_namespaces(tsk);
++	exit_task_work(tsk);
++	exit_thread(tsk);
++
++	/*
++	 * Flush inherited counters to the parent - before the parent
++	 * gets woken up by child-exit notifications.
++	 *
++	 * because of cgroup mode, must be called before cgroup_exit()
++	 */
++	perf_event_exit_task(tsk);
++
++	sched_autogroup_exit_task(tsk);
++	cgroup_exit(tsk);
++
++	/*
++	 * FIXME: do that only when needed, using sched_exit tracepoint
++	 */
++	flush_ptrace_hw_breakpoint(tsk);
++
++	exit_tasks_rcu_start();
++	exit_notify(tsk, group_dead);
++	proc_exit_connector(tsk);
++	mpol_put_task_policy(tsk);
++#ifdef CONFIG_FUTEX
++	if (unlikely(current->pi_state_cache))
++		kfree(current->pi_state_cache);
++#endif
++	/*
++	 * Make sure we are holding no locks:
++	 */
++	debug_check_no_locks_held();
++	/*
++	 * We can do this unlocked here. The futex code uses this flag
++	 * just to verify whether the pi state cleanup has been done
++	 * or not. In the worst case it loops once more.
++	 */
++	tsk->flags |= PF_EXITPIDONE;
++
++	if (tsk->io_context)
++		exit_io_context(tsk);
++
++	if (tsk->splice_pipe)
++		free_pipe_info(tsk->splice_pipe);
++
++	if (tsk->task_frag.page)
++		put_page(tsk->task_frag.page);
++
++	validate_creds_for_do_exit(tsk);
++
++	check_stack_usage();
++	preempt_disable();
++	if (tsk->nr_dirtied)
++		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
++	exit_rcu();
++	exit_tasks_rcu_finish();
++
++	lockdep_free_task(tsk);
++	do_task_dead();
++}
++EXPORT_SYMBOL_GPL(do_exit);
++
++void complete_and_exit(struct completion *comp, long code)
++{
++	if (comp)
++		complete(comp);
++
++	do_exit(code);
++}
++EXPORT_SYMBOL(complete_and_exit);
++
++SYSCALL_DEFINE1(exit, int, error_code)
++{
++	do_exit((error_code&0xff)<<8);
++}
++
++/*
++ * Take down every thread in the group.  This is called by fatal signals
++ * as well as by sys_exit_group (below).
++ */
++void
++do_group_exit(int exit_code)
++{
++	struct signal_struct *sig = current->signal;
++
++	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
++
++	if (signal_group_exit(sig))
++		exit_code = sig->group_exit_code;
++	else if (!thread_group_empty(current)) {
++		struct sighand_struct *const sighand = current->sighand;
++
++		spin_lock_irq(&sighand->siglock);
++		if (signal_group_exit(sig))
++			/* Another thread got here before we took the lock.  */
++			exit_code = sig->group_exit_code;
++		else {
++			sig->group_exit_code = exit_code;
++			sig->flags = SIGNAL_GROUP_EXIT;
++			zap_other_threads(current);
++		}
++		spin_unlock_irq(&sighand->siglock);
++	}
++
++	do_exit(exit_code);
++	/* NOTREACHED */
++}
++
++/*
++ * this kills every thread in the thread group. Note that any externally
++ * wait4()-ing process will get the correct exit code - even if this
++ * thread is not the thread group leader.
++ */
++SYSCALL_DEFINE1(exit_group, int, error_code)
++{
++	do_group_exit((error_code & 0xff) << 8);
++	/* NOTREACHED */
++	return 0;
++}
++
++struct waitid_info {
++	pid_t pid;
++	uid_t uid;
++	int status;
++	int cause;
++};
++
++struct wait_opts {
++	enum pid_type		wo_type;
++	int			wo_flags;
++	struct pid		*wo_pid;
++
++	struct waitid_info	*wo_info;
++	int			wo_stat;
++	struct rusage		*wo_rusage;
++
++	wait_queue_entry_t		child_wait;
++	int			notask_error;
++};
++
++static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
++{
++	return	wo->wo_type == PIDTYPE_MAX ||
++		task_pid_type(p, wo->wo_type) == wo->wo_pid;
++}
++
++static int
++eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
++{
++	if (!eligible_pid(wo, p))
++		return 0;
++
++	/*
++	 * Wait for all children (clone and not) if __WALL is set or
++	 * if it is traced by us.
++	 */
++	if (ptrace || (wo->wo_flags & __WALL))
++		return 1;
++
++	/*
++	 * Otherwise, wait for clone children *only* if __WCLONE is set;
++	 * otherwise, wait for non-clone children *only*.
++	 *
++	 * Note: a "clone" child here is one that reports to its parent
++	 * using a signal other than SIGCHLD, or a non-leader thread which
++	 * we can only see if it is traced by us.
++	 */
++	if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
++		return 0;
++
++	return 1;
++}
++
++/*
++ * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
++ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
++ * the lock and this task is uninteresting.  If we return nonzero, we have
++ * released the lock and the system call should return.
++ */
++static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
++{
++	int state, status;
++	pid_t pid = task_pid_vnr(p);
++	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
++	struct waitid_info *infop;
++
++	if (!likely(wo->wo_flags & WEXITED))
++		return 0;
++
++	if (unlikely(wo->wo_flags & WNOWAIT)) {
++		status = p->exit_code;
++		get_task_struct(p);
++		read_unlock(&tasklist_lock);
++		sched_annotate_sleep();
++		if (wo->wo_rusage)
++			getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
++		put_task_struct(p);
++		goto out_info;
++	}
++	/*
++	 * Move the task's state to DEAD/TRACE, only one thread can do this.
++	 */
++	state = (ptrace_reparented(p) && thread_group_leader(p)) ?
++		EXIT_TRACE : EXIT_DEAD;
++	if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
++		return 0;
++	/*
++	 * We own this thread, nobody else can reap it.
++	 */
++	read_unlock(&tasklist_lock);
++	sched_annotate_sleep();
++
++	/*
++	 * Check thread_group_leader() to exclude the traced sub-threads.
++	 */
++	if (state == EXIT_DEAD && thread_group_leader(p)) {
++		struct signal_struct *sig = p->signal;
++		struct signal_struct *psig = current->signal;
++		unsigned long maxrss;
++		u64 tgutime, tgstime;
++
++		/*
++		 * The resource counters for the group leader are in its
++		 * own task_struct.  Those for dead threads in the group
++		 * are in its signal_struct, as are those for the child
++		 * processes it has previously reaped.  All these
++		 * accumulate in the parent's signal_struct c* fields.
++		 *
++		 * We don't bother to take a lock here to protect these
++		 * p->signal fields because the whole thread group is dead
++		 * and nobody can change them.
++		 *
++		 * psig->stats_lock also protects us from our sub-theads
++		 * which can reap other children at the same time. Until
++		 * we change k_getrusage()-like users to rely on this lock
++		 * we have to take ->siglock as well.
++		 *
++		 * We use thread_group_cputime_adjusted() to get times for
++		 * the thread group, which consolidates times for all threads
++		 * in the group including the group leader.
++		 */
++		thread_group_cputime_adjusted(p, &tgutime, &tgstime);
++		spin_lock_irq(&current->sighand->siglock);
++		write_seqlock(&psig->stats_lock);
++		psig->cutime += tgutime + sig->cutime;
++		psig->cstime += tgstime + sig->cstime;
++		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
++		psig->cmin_flt +=
++			p->min_flt + sig->min_flt + sig->cmin_flt;
++		psig->cmaj_flt +=
++			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
++		psig->cnvcsw +=
++			p->nvcsw + sig->nvcsw + sig->cnvcsw;
++		psig->cnivcsw +=
++			p->nivcsw + sig->nivcsw + sig->cnivcsw;
++		psig->cinblock +=
++			task_io_get_inblock(p) +
++			sig->inblock + sig->cinblock;
++		psig->coublock +=
++			task_io_get_oublock(p) +
++			sig->oublock + sig->coublock;
++		maxrss = max(sig->maxrss, sig->cmaxrss);
++		if (psig->cmaxrss < maxrss)
++			psig->cmaxrss = maxrss;
++		task_io_accounting_add(&psig->ioac, &p->ioac);
++		task_io_accounting_add(&psig->ioac, &sig->ioac);
++		write_sequnlock(&psig->stats_lock);
++		spin_unlock_irq(&current->sighand->siglock);
++	}
++
++	if (wo->wo_rusage)
++		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
++	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
++		? p->signal->group_exit_code : p->exit_code;
++	wo->wo_stat = status;
++
++	if (state == EXIT_TRACE) {
++		write_lock_irq(&tasklist_lock);
++		/* We dropped tasklist, ptracer could die and untrace */
++		ptrace_unlink(p);
++
++		/* If parent wants a zombie, don't release it now */
++		state = EXIT_ZOMBIE;
++		if (do_notify_parent(p, p->exit_signal))
++			state = EXIT_DEAD;
++		p->exit_state = state;
++		write_unlock_irq(&tasklist_lock);
++	}
++	if (state == EXIT_DEAD)
++		release_task(p);
++
++out_info:
++	infop = wo->wo_info;
++	if (infop) {
++		if ((status & 0x7f) == 0) {
++			infop->cause = CLD_EXITED;
++			infop->status = status >> 8;
++		} else {
++			infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
++			infop->status = status & 0x7f;
++		}
++		infop->pid = pid;
++		infop->uid = uid;
++	}
++
++	return pid;
++}
++
++static int *task_stopped_code(struct task_struct *p, bool ptrace)
++{
++	if (ptrace) {
++		if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
++			return &p->exit_code;
++	} else {
++		if (p->signal->flags & SIGNAL_STOP_STOPPED)
++			return &p->signal->group_exit_code;
++	}
++	return NULL;
++}
++
++/**
++ * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
++ * @wo: wait options
++ * @ptrace: is the wait for ptrace
++ * @p: task to wait for
++ *
++ * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
++ *
++ * CONTEXT:
++ * read_lock(&tasklist_lock), which is released if return value is
++ * non-zero.  Also, grabs and releases @p->sighand->siglock.
++ *
++ * RETURNS:
++ * 0 if wait condition didn't exist and search for other wait conditions
++ * should continue.  Non-zero return, -errno on failure and @p's pid on
++ * success, implies that tasklist_lock is released and wait condition
++ * search should terminate.
++ */
++static int wait_task_stopped(struct wait_opts *wo,
++				int ptrace, struct task_struct *p)
++{
++	struct waitid_info *infop;
++	int exit_code, *p_code, why;
++	uid_t uid = 0; /* unneeded, required by compiler */
++	pid_t pid;
++
++	/*
++	 * Traditionally we see ptrace'd stopped tasks regardless of options.
++	 */
++	if (!ptrace && !(wo->wo_flags & WUNTRACED))
++		return 0;
++
++	if (!task_stopped_code(p, ptrace))
++		return 0;
++
++	exit_code = 0;
++	spin_lock_irq(&p->sighand->siglock);
++
++	p_code = task_stopped_code(p, ptrace);
++	if (unlikely(!p_code))
++		goto unlock_sig;
++
++	exit_code = *p_code;
++	if (!exit_code)
++		goto unlock_sig;
++
++	if (!unlikely(wo->wo_flags & WNOWAIT))
++		*p_code = 0;
++
++	uid = from_kuid_munged(current_user_ns(), task_uid(p));
++unlock_sig:
++	spin_unlock_irq(&p->sighand->siglock);
++	if (!exit_code)
++		return 0;
++
++	/*
++	 * Now we are pretty sure this task is interesting.
++	 * Make sure it doesn't get reaped out from under us while we
++	 * give up the lock and then examine it below.  We don't want to
++	 * keep holding onto the tasklist_lock while we call getrusage and
++	 * possibly take page faults for user memory.
++	 */
++	get_task_struct(p);
++	pid = task_pid_vnr(p);
++	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
++	read_unlock(&tasklist_lock);
++	sched_annotate_sleep();
++	if (wo->wo_rusage)
++		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
++	put_task_struct(p);
++
++	if (likely(!(wo->wo_flags & WNOWAIT)))
++		wo->wo_stat = (exit_code << 8) | 0x7f;
++
++	infop = wo->wo_info;
++	if (infop) {
++		infop->cause = why;
++		infop->status = exit_code;
++		infop->pid = pid;
++		infop->uid = uid;
++	}
++	return pid;
++}
++
++/*
++ * Handle do_wait work for one task in a live, non-stopped state.
++ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
++ * the lock and this task is uninteresting.  If we return nonzero, we have
++ * released the lock and the system call should return.
++ */
++static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
++{
++	struct waitid_info *infop;
++	pid_t pid;
++	uid_t uid;
++
++	if (!unlikely(wo->wo_flags & WCONTINUED))
++		return 0;
++
++	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
++		return 0;
++
++	spin_lock_irq(&p->sighand->siglock);
++	/* Re-check with the lock held.  */
++	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
++		spin_unlock_irq(&p->sighand->siglock);
++		return 0;
++	}
++	if (!unlikely(wo->wo_flags & WNOWAIT))
++		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
++	uid = from_kuid_munged(current_user_ns(), task_uid(p));
++	spin_unlock_irq(&p->sighand->siglock);
++
++	pid = task_pid_vnr(p);
++	get_task_struct(p);
++	read_unlock(&tasklist_lock);
++	sched_annotate_sleep();
++	if (wo->wo_rusage)
++		getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
++	put_task_struct(p);
++
++	infop = wo->wo_info;
++	if (!infop) {
++		wo->wo_stat = 0xffff;
++	} else {
++		infop->cause = CLD_CONTINUED;
++		infop->pid = pid;
++		infop->uid = uid;
++		infop->status = SIGCONT;
++	}
++	return pid;
++}
++
++/*
++ * Consider @p for a wait by @parent.
++ *
++ * -ECHILD should be in ->notask_error before the first call.
++ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
++ * Returns zero if the search for a child should continue;
++ * then ->notask_error is 0 if @p is an eligible child,
++ * or still -ECHILD.
++ */
++static int wait_consider_task(struct wait_opts *wo, int ptrace,
++				struct task_struct *p)
++{
++	/*
++	 * We can race with wait_task_zombie() from another thread.
++	 * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
++	 * can't confuse the checks below.
++	 */
++	int exit_state = READ_ONCE(p->exit_state);
++	int ret;
++
++	if (unlikely(exit_state == EXIT_DEAD))
++		return 0;
++
++	ret = eligible_child(wo, ptrace, p);
++	if (!ret)
++		return ret;
++
++	if (unlikely(exit_state == EXIT_TRACE)) {
++		/*
++		 * ptrace == 0 means we are the natural parent. In this case
++		 * we should clear notask_error, debugger will notify us.
++		 */
++		if (likely(!ptrace))
++			wo->notask_error = 0;
++		return 0;
++	}
++
++	if (likely(!ptrace) && unlikely(p->ptrace)) {
++		/*
++		 * If it is traced by its real parent's group, just pretend
++		 * the caller is ptrace_do_wait() and reap this child if it
++		 * is zombie.
++		 *
++		 * This also hides group stop state from real parent; otherwise
++		 * a single stop can be reported twice as group and ptrace stop.
++		 * If a ptracer wants to distinguish these two events for its
++		 * own children it should create a separate process which takes
++		 * the role of real parent.
++		 */
++		if (!ptrace_reparented(p))
++			ptrace = 1;
++	}
++
++	/* slay zombie? */
++	if (exit_state == EXIT_ZOMBIE) {
++		/* we don't reap group leaders with subthreads */
++		if (!delay_group_leader(p)) {
++			/*
++			 * A zombie ptracee is only visible to its ptracer.
++			 * Notification and reaping will be cascaded to the
++			 * real parent when the ptracer detaches.
++			 */
++			if (unlikely(ptrace) || likely(!p->ptrace))
++				return wait_task_zombie(wo, p);
++		}
++
++		/*
++		 * Allow access to stopped/continued state via zombie by
++		 * falling through.  Clearing of notask_error is complex.
++		 *
++		 * When !@ptrace:
++		 *
++		 * If WEXITED is set, notask_error should naturally be
++		 * cleared.  If not, subset of WSTOPPED|WCONTINUED is set,
++		 * so, if there are live subthreads, there are events to
++		 * wait for.  If all subthreads are dead, it's still safe
++		 * to clear - this function will be called again in finite
++		 * amount time once all the subthreads are released and
++		 * will then return without clearing.
++		 *
++		 * When @ptrace:
++		 *
++		 * Stopped state is per-task and thus can't change once the
++		 * target task dies.  Only continued and exited can happen.
++		 * Clear notask_error if WCONTINUED | WEXITED.
++		 */
++		if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
++			wo->notask_error = 0;
++	} else {
++		/*
++		 * @p is alive and it's gonna stop, continue or exit, so
++		 * there always is something to wait for.
++		 */
++		wo->notask_error = 0;
++	}
++
++	/*
++	 * Wait for stopped.  Depending on @ptrace, different stopped state
++	 * is used and the two don't interact with each other.
++	 */
++	ret = wait_task_stopped(wo, ptrace, p);
++	if (ret)
++		return ret;
++
++	/*
++	 * Wait for continued.  There's only one continued state and the
++	 * ptracer can consume it which can confuse the real parent.  Don't
++	 * use WCONTINUED from ptracer.  You don't need or want it.
++	 */
++	return wait_task_continued(wo, p);
++}
++
++/*
++ * Do the work of do_wait() for one thread in the group, @tsk.
++ *
++ * -ECHILD should be in ->notask_error before the first call.
++ * Returns nonzero for a final return, when we have unlocked tasklist_lock.
++ * Returns zero if the search for a child should continue; then
++ * ->notask_error is 0 if there were any eligible children,
++ * or still -ECHILD.
++ */
++static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
++{
++	struct task_struct *p;
++
++	list_for_each_entry(p, &tsk->children, sibling) {
++		int ret = wait_consider_task(wo, 0, p);
++
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
++{
++	struct task_struct *p;
++
++	list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
++		int ret = wait_consider_task(wo, 1, p);
++
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
++				int sync, void *key)
++{
++	struct wait_opts *wo = container_of(wait, struct wait_opts,
++						child_wait);
++	struct task_struct *p = key;
++
++	if (!eligible_pid(wo, p))
++		return 0;
++
++	if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
++		return 0;
++
++	return default_wake_function(wait, mode, sync, key);
++}
++
++void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
++{
++	__wake_up_sync_key(&parent->signal->wait_chldexit,
++				TASK_INTERRUPTIBLE, 1, p);
++}
++
++static long do_wait(struct wait_opts *wo)
++{
++	struct task_struct *tsk;
++	int retval;
++
++	trace_sched_process_wait(wo->wo_pid);
++
++	init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
++	wo->child_wait.private = current;
++	add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
++repeat:
++	/*
++	 * If there is nothing that can match our criteria, just get out.
++	 * We will clear ->notask_error to zero if we see any child that
++	 * might later match our criteria, even if we are not able to reap
++	 * it yet.
++	 */
++	wo->notask_error = -ECHILD;
++	if ((wo->wo_type < PIDTYPE_MAX) &&
++	   (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
++		goto notask;
++
++	set_current_state(TASK_INTERRUPTIBLE);
++	read_lock(&tasklist_lock);
++	tsk = current;
++	do {
++		retval = do_wait_thread(wo, tsk);
++		if (retval)
++			goto end;
++
++		retval = ptrace_do_wait(wo, tsk);
++		if (retval)
++			goto end;
++
++		if (wo->wo_flags & __WNOTHREAD)
++			break;
++	} while_each_thread(current, tsk);
++	read_unlock(&tasklist_lock);
++
++notask:
++	retval = wo->notask_error;
++	if (!retval && !(wo->wo_flags & WNOHANG)) {
++		retval = -ERESTARTSYS;
++		if (!signal_pending(current)) {
++			schedule();
++			goto repeat;
++		}
++	}
++end:
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
++	return retval;
++}
++
++static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
++			  int options, struct rusage *ru)
++{
++	struct wait_opts wo;
++	struct pid *pid = NULL;
++	enum pid_type type;
++	long ret;
++
++	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
++			__WNOTHREAD|__WCLONE|__WALL))
++		return -EINVAL;
++	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
++		return -EINVAL;
++
++	switch (which) {
++	case P_ALL:
++		type = PIDTYPE_MAX;
++		break;
++	case P_PID:
++		type = PIDTYPE_PID;
++		if (upid <= 0)
++			return -EINVAL;
++		break;
++	case P_PGID:
++		type = PIDTYPE_PGID;
++		if (upid <= 0)
++			return -EINVAL;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	if (type < PIDTYPE_MAX)
++		pid = find_get_pid(upid);
++
++	wo.wo_type	= type;
++	wo.wo_pid	= pid;
++	wo.wo_flags	= options;
++	wo.wo_info	= infop;
++	wo.wo_rusage	= ru;
++	ret = do_wait(&wo);
++
++	put_pid(pid);
++	return ret;
++}
++
++SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
++		infop, int, options, struct rusage __user *, ru)
++{
++	struct rusage r;
++	struct waitid_info info = {.status = 0};
++	long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
++	int signo = 0;
++
++	if (err > 0) {
++		signo = SIGCHLD;
++		err = 0;
++		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
++			return -EFAULT;
++	}
++	if (!infop)
++		return err;
++
++	if (!user_access_begin(infop, sizeof(*infop)))
++		return -EFAULT;
++
++	unsafe_put_user(signo, &infop->si_signo, Efault);
++	unsafe_put_user(0, &infop->si_errno, Efault);
++	unsafe_put_user(info.cause, &infop->si_code, Efault);
++	unsafe_put_user(info.pid, &infop->si_pid, Efault);
++	unsafe_put_user(info.uid, &infop->si_uid, Efault);
++	unsafe_put_user(info.status, &infop->si_status, Efault);
++	user_access_end();
++	return err;
++Efault:
++	user_access_end();
++	return -EFAULT;
++}
++
++long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
++		  struct rusage *ru)
++{
++	struct wait_opts wo;
++	struct pid *pid = NULL;
++	enum pid_type type;
++	long ret;
++
++	if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
++			__WNOTHREAD|__WCLONE|__WALL))
++		return -EINVAL;
++
++	/* -INT_MIN is not defined */
++	if (upid == INT_MIN)
++		return -ESRCH;
++
++	if (upid == -1)
++		type = PIDTYPE_MAX;
++	else if (upid < 0) {
++		type = PIDTYPE_PGID;
++		pid = find_get_pid(-upid);
++	} else if (upid == 0) {
++		type = PIDTYPE_PGID;
++		pid = get_task_pid(current, PIDTYPE_PGID);
++	} else /* upid > 0 */ {
++		type = PIDTYPE_PID;
++		pid = find_get_pid(upid);
++	}
++
++	wo.wo_type	= type;
++	wo.wo_pid	= pid;
++	wo.wo_flags	= options | WEXITED;
++	wo.wo_info	= NULL;
++	wo.wo_stat	= 0;
++	wo.wo_rusage	= ru;
++	ret = do_wait(&wo);
++	put_pid(pid);
++	if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr))
++		ret = -EFAULT;
++
++	return ret;
++}
++
++SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
++		int, options, struct rusage __user *, ru)
++{
++	struct rusage r;
++	long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL);
++
++	if (err > 0) {
++		if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
++			return -EFAULT;
++	}
++	return err;
++}
++
++#ifdef __ARCH_WANT_SYS_WAITPID
++
++/*
++ * sys_waitpid() remains for compatibility. waitpid() should be
++ * implemented by calling sys_wait4() from libc.a.
++ */
++SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
++{
++	return kernel_wait4(pid, stat_addr, options, NULL);
++}
++
++#endif
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE4(wait4,
++	compat_pid_t, pid,
++	compat_uint_t __user *, stat_addr,
++	int, options,
++	struct compat_rusage __user *, ru)
++{
++	struct rusage r;
++	long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL);
++	if (err > 0) {
++		if (ru && put_compat_rusage(&r, ru))
++			return -EFAULT;
++	}
++	return err;
++}
++
++COMPAT_SYSCALL_DEFINE5(waitid,
++		int, which, compat_pid_t, pid,
++		struct compat_siginfo __user *, infop, int, options,
++		struct compat_rusage __user *, uru)
++{
++	struct rusage ru;
++	struct waitid_info info = {.status = 0};
++	long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
++	int signo = 0;
++	if (err > 0) {
++		signo = SIGCHLD;
++		err = 0;
++		if (uru) {
++			/* kernel_waitid() overwrites everything in ru */
++			if (COMPAT_USE_64BIT_TIME)
++				err = copy_to_user(uru, &ru, sizeof(ru));
++			else
++				err = put_compat_rusage(&ru, uru);
++			if (err)
++				return -EFAULT;
++		}
++	}
++
++	if (!infop)
++		return err;
++
++	if (!user_access_begin(infop, sizeof(*infop)))
++		return -EFAULT;
++
++	unsafe_put_user(signo, &infop->si_signo, Efault);
++	unsafe_put_user(0, &infop->si_errno, Efault);
++	unsafe_put_user(info.cause, &infop->si_code, Efault);
++	unsafe_put_user(info.pid, &infop->si_pid, Efault);
++	unsafe_put_user(info.uid, &infop->si_uid, Efault);
++	unsafe_put_user(info.status, &infop->si_status, Efault);
++	user_access_end();
++	return err;
++Efault:
++	user_access_end();
++	return -EFAULT;
++}
++#endif
++
++__weak void abort(void)
++{
++	BUG();
++
++	/* if that doesn't kill us, halt */
++	panic("Oops failed to kill thread");
++}
++EXPORT_SYMBOL(abort);
+diff -uprN kernel/kernel/fork.c kernel_new/kernel/fork.c
+--- kernel/kernel/fork.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/fork.c	2021-04-01 18:28:07.805863120 +0800
+@@ -54,6 +54,7 @@
+ #include <linux/futex.h>
+ #include <linux/compat.h>
+ #include <linux/kthread.h>
++#include <linux/ipipe.h>
+ #include <linux/task_io_accounting_ops.h>
+ #include <linux/rcupdate.h>
+ #include <linux/ptrace.h>
+@@ -91,6 +92,7 @@
+ #include <linux/kcov.h>
+ #include <linux/livepatch.h>
+ #include <linux/thread_info.h>
++#include <ipipe/thread_info.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -886,6 +888,8 @@ static struct task_struct *dup_task_stru
+ #endif
+ 
+ 	setup_thread_stack(tsk, orig);
++	__ipipe_init_threadflags(task_thread_info(tsk));
++	__ipipe_init_threadinfo(&task_thread_info(tsk)->ipipe_data);
+ 	clear_user_return_notifier(tsk);
+ 	clear_tsk_need_resched(tsk);
+ 	set_task_stack_end_magic(tsk);
+@@ -1055,6 +1059,7 @@ static inline void __mmput(struct mm_str
+ 	exit_aio(mm);
+ 	ksm_exit(mm);
+ 	khugepaged_exit(mm); /* must run before exit_mmap */
++	__ipipe_report_cleanup(mm);
+ 	exit_mmap(mm);
+ 	mm_put_huge_zero_page(mm);
+ 	set_mm_exe_file(mm, NULL);
+diff -uprN kernel/kernel/fork.c.orig kernel_new/kernel/fork.c.orig
+--- kernel/kernel/fork.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/fork.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2730 @@
++/*
++ *  linux/kernel/fork.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ */
++
++/*
++ *  'fork.c' contains the help-routines for the 'fork' system call
++ * (see also entry.S and others).
++ * Fork is rather simple, once you get the hang of it, but the memory
++ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
++ */
++
++#include <linux/slab.h>
++#include <linux/sched/autogroup.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/user.h>
++#include <linux/sched/numa_balancing.h>
++#include <linux/sched/stat.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/sched/cputime.h>
++#include <linux/rtmutex.h>
++#include <linux/init.h>
++#include <linux/unistd.h>
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/completion.h>
++#include <linux/personality.h>
++#include <linux/mempolicy.h>
++#include <linux/sem.h>
++#include <linux/file.h>
++#include <linux/fdtable.h>
++#include <linux/iocontext.h>
++#include <linux/key.h>
++#include <linux/binfmts.h>
++#include <linux/mman.h>
++#include <linux/mmu_notifier.h>
++#include <linux/hmm.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/vmacache.h>
++#include <linux/nsproxy.h>
++#include <linux/capability.h>
++#include <linux/cpu.h>
++#include <linux/cgroup.h>
++#include <linux/security.h>
++#include <linux/hugetlb.h>
++#include <linux/seccomp.h>
++#include <linux/swap.h>
++#include <linux/syscalls.h>
++#include <linux/jiffies.h>
++#include <linux/futex.h>
++#include <linux/compat.h>
++#include <linux/kthread.h>
++#include <linux/task_io_accounting_ops.h>
++#include <linux/rcupdate.h>
++#include <linux/ptrace.h>
++#include <linux/mount.h>
++#include <linux/audit.h>
++#include <linux/memcontrol.h>
++#include <linux/ftrace.h>
++#include <linux/proc_fs.h>
++#include <linux/profile.h>
++#include <linux/rmap.h>
++#include <linux/ksm.h>
++#include <linux/acct.h>
++#include <linux/userfaultfd_k.h>
++#include <linux/tsacct_kern.h>
++#include <linux/cn_proc.h>
++#include <linux/freezer.h>
++#include <linux/delayacct.h>
++#include <linux/taskstats_kern.h>
++#include <linux/random.h>
++#include <linux/tty.h>
++#include <linux/blkdev.h>
++#include <linux/fs_struct.h>
++#include <linux/magic.h>
++#include <linux/sched/mm.h>
++#include <linux/perf_event.h>
++#include <linux/posix-timers.h>
++#include <linux/user-return-notifier.h>
++#include <linux/oom.h>
++#include <linux/khugepaged.h>
++#include <linux/signalfd.h>
++#include <linux/uprobes.h>
++#include <linux/aio.h>
++#include <linux/compiler.h>
++#include <linux/sysctl.h>
++#include <linux/kcov.h>
++#include <linux/livepatch.h>
++#include <linux/thread_info.h>
++
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <linux/uaccess.h>
++#include <asm/mmu_context.h>
++#include <asm/cacheflush.h>
++#include <asm/tlbflush.h>
++
++#include <trace/events/sched.h>
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/task.h>
++
++/*
++ * Minimum number of threads to boot the kernel
++ */
++#define MIN_THREADS 20
++
++/*
++ * Maximum number of threads
++ */
++#define MAX_THREADS FUTEX_TID_MASK
++
++/*
++ * Protected counters by write_lock_irq(&tasklist_lock)
++ */
++unsigned long total_forks;	/* Handle normal Linux uptimes. */
++int nr_threads;			/* The idle threads do not count.. */
++
++int max_threads;		/* tunable limit on nr_threads */
++
++DEFINE_PER_CPU(unsigned long, process_counts) = 0;
++
++__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
++
++#ifdef CONFIG_PROVE_RCU
++int lockdep_tasklist_lock_is_held(void)
++{
++	return lockdep_is_held(&tasklist_lock);
++}
++EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
++#endif /* #ifdef CONFIG_PROVE_RCU */
++
++int nr_processes(void)
++{
++	int cpu;
++	int total = 0;
++
++	for_each_possible_cpu(cpu)
++		total += per_cpu(process_counts, cpu);
++
++	return total;
++}
++
++void __weak arch_release_task_struct(struct task_struct *tsk)
++{
++}
++
++#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
++static struct kmem_cache *task_struct_cachep;
++
++static inline struct task_struct *alloc_task_struct_node(int node)
++{
++	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
++}
++
++static inline void free_task_struct(struct task_struct *tsk)
++{
++	kmem_cache_free(task_struct_cachep, tsk);
++}
++#endif
++
++#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
++
++/*
++ * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
++ * kmemcache based allocator.
++ */
++# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
++
++#ifdef CONFIG_VMAP_STACK
++/*
++ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
++ * flush.  Try to minimize the number of calls by caching stacks.
++ */
++#define NR_CACHED_STACKS 2
++static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
++
++static int free_vm_stack_cache(unsigned int cpu)
++{
++	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
++	int i;
++
++	for (i = 0; i < NR_CACHED_STACKS; i++) {
++		struct vm_struct *vm_stack = cached_vm_stacks[i];
++
++		if (!vm_stack)
++			continue;
++
++		vfree(vm_stack->addr);
++		cached_vm_stacks[i] = NULL;
++	}
++
++	return 0;
++}
++#endif
++
++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
++{
++#ifdef CONFIG_VMAP_STACK
++	void *stack;
++	int i;
++
++	for (i = 0; i < NR_CACHED_STACKS; i++) {
++		struct vm_struct *s;
++
++		s = this_cpu_xchg(cached_stacks[i], NULL);
++
++		if (!s)
++			continue;
++
++		/* Clear stale pointers from reused stack. */
++		memset(s->addr, 0, THREAD_SIZE);
++
++		tsk->stack_vm_area = s;
++		tsk->stack = s->addr;
++		return s->addr;
++	}
++
++	/*
++	 * Allocated stacks are cached and later reused by new threads,
++	 * so memcg accounting is performed manually on assigning/releasing
++	 * stacks to tasks. Drop __GFP_ACCOUNT.
++	 */
++	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
++				     VMALLOC_START, VMALLOC_END,
++				     THREADINFO_GFP & ~__GFP_ACCOUNT,
++				     PAGE_KERNEL,
++				     0, node, __builtin_return_address(0));
++
++	/*
++	 * We can't call find_vm_area() in interrupt context, and
++	 * free_thread_stack() can be called in interrupt context,
++	 * so cache the vm_struct.
++	 */
++	if (stack) {
++		tsk->stack_vm_area = find_vm_area(stack);
++		tsk->stack = stack;
++	}
++	return stack;
++#else
++	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
++					     THREAD_SIZE_ORDER);
++
++	if (likely(page)) {
++		tsk->stack = page_address(page);
++		return tsk->stack;
++	}
++	return NULL;
++#endif
++}
++
++static inline void free_thread_stack(struct task_struct *tsk)
++{
++#ifdef CONFIG_VMAP_STACK
++	struct vm_struct *vm = task_stack_vm_area(tsk);
++
++	if (vm) {
++		int i;
++
++		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
++			mod_memcg_page_state(vm->pages[i],
++					     MEMCG_KERNEL_STACK_KB,
++					     -(int)(PAGE_SIZE / 1024));
++
++			memcg_kmem_uncharge(vm->pages[i], 0);
++		}
++
++		for (i = 0; i < NR_CACHED_STACKS; i++) {
++			if (this_cpu_cmpxchg(cached_stacks[i],
++					NULL, tsk->stack_vm_area) != NULL)
++				continue;
++
++			return;
++		}
++
++		vfree_atomic(tsk->stack);
++		return;
++	}
++#endif
++
++	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
++}
++# else
++static struct kmem_cache *thread_stack_cache;
++
++static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
++						  int node)
++{
++	unsigned long *stack;
++	stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
++	tsk->stack = stack;
++	return stack;
++}
++
++static void free_thread_stack(struct task_struct *tsk)
++{
++	kmem_cache_free(thread_stack_cache, tsk->stack);
++}
++
++void thread_stack_cache_init(void)
++{
++	thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
++					THREAD_SIZE, THREAD_SIZE, 0, 0,
++					THREAD_SIZE, NULL);
++	BUG_ON(thread_stack_cache == NULL);
++}
++# endif
++#endif
++
++/* SLAB cache for signal_struct structures (tsk->signal) */
++static struct kmem_cache *signal_cachep;
++
++/* SLAB cache for sighand_struct structures (tsk->sighand) */
++struct kmem_cache *sighand_cachep;
++
++/* SLAB cache for files_struct structures (tsk->files) */
++struct kmem_cache *files_cachep;
++
++/* SLAB cache for fs_struct structures (tsk->fs) */
++struct kmem_cache *fs_cachep;
++
++/* SLAB cache for vm_area_struct structures */
++static struct kmem_cache *vm_area_cachep;
++
++/* SLAB cache for mm_struct structures (tsk->mm) */
++static struct kmem_cache *mm_cachep;
++
++struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
++{
++	struct vm_area_struct *vma;
++
++	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
++	if (vma)
++		vma_init(vma, mm);
++	return vma;
++}
++
++struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
++{
++	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
++
++	if (new) {
++		*new = *orig;
++		INIT_LIST_HEAD(&new->anon_vma_chain);
++	}
++	return new;
++}
++
++void vm_area_free(struct vm_area_struct *vma)
++{
++	kmem_cache_free(vm_area_cachep, vma);
++}
++
++static void account_kernel_stack(struct task_struct *tsk, int account)
++{
++	void *stack = task_stack_page(tsk);
++	struct vm_struct *vm = task_stack_vm_area(tsk);
++
++	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
++
++	if (vm) {
++		int i;
++
++		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
++
++		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
++			mod_zone_page_state(page_zone(vm->pages[i]),
++					    NR_KERNEL_STACK_KB,
++					    PAGE_SIZE / 1024 * account);
++		}
++	} else {
++		/*
++		 * All stack pages are in the same zone and belong to the
++		 * same memcg.
++		 */
++		struct page *first_page = virt_to_page(stack);
++
++		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
++				    THREAD_SIZE / 1024 * account);
++
++		mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
++				     account * (THREAD_SIZE / 1024));
++	}
++}
++
++static int memcg_charge_kernel_stack(struct task_struct *tsk)
++{
++#ifdef CONFIG_VMAP_STACK
++	struct vm_struct *vm = task_stack_vm_area(tsk);
++	int ret;
++
++	if (vm) {
++		int i;
++
++		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
++			/*
++			 * If memcg_kmem_charge() fails, page->mem_cgroup
++			 * pointer is NULL, and both memcg_kmem_uncharge()
++			 * and mod_memcg_page_state() in free_thread_stack()
++			 * will ignore this page. So it's safe.
++			 */
++			ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0);
++			if (ret)
++				return ret;
++
++			mod_memcg_page_state(vm->pages[i],
++					     MEMCG_KERNEL_STACK_KB,
++					     PAGE_SIZE / 1024);
++		}
++	}
++#endif
++	return 0;
++}
++
++static void release_task_stack(struct task_struct *tsk)
++{
++	if (WARN_ON(tsk->state != TASK_DEAD))
++		return;  /* Better to leak the stack than to free prematurely */
++
++	account_kernel_stack(tsk, -1);
++	free_thread_stack(tsk);
++	tsk->stack = NULL;
++#ifdef CONFIG_VMAP_STACK
++	tsk->stack_vm_area = NULL;
++#endif
++}
++
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++void put_task_stack(struct task_struct *tsk)
++{
++	if (atomic_dec_and_test(&tsk->stack_refcount))
++		release_task_stack(tsk);
++}
++#endif
++
++void free_task(struct task_struct *tsk)
++{
++#ifndef CONFIG_THREAD_INFO_IN_TASK
++	/*
++	 * The task is finally done with both the stack and thread_info,
++	 * so free both.
++	 */
++	release_task_stack(tsk);
++#else
++	/*
++	 * If the task had a separate stack allocation, it should be gone
++	 * by now.
++	 */
++	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
++#endif
++	rt_mutex_debug_task_free(tsk);
++	ftrace_graph_exit_task(tsk);
++	put_seccomp_filter(tsk);
++	arch_release_task_struct(tsk);
++	if (tsk->flags & PF_KTHREAD)
++		free_kthread_struct(tsk);
++	free_task_struct(tsk);
++}
++EXPORT_SYMBOL(free_task);
++
++#ifdef CONFIG_MMU
++static __latent_entropy int dup_mmap(struct mm_struct *mm,
++					struct mm_struct *oldmm)
++{
++	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
++	struct rb_node **rb_link, *rb_parent;
++	int retval;
++	unsigned long charge;
++	LIST_HEAD(uf);
++
++	uprobe_start_dup_mmap();
++	if (down_write_killable(&oldmm->mmap_sem)) {
++		retval = -EINTR;
++		goto fail_uprobe_end;
++	}
++	flush_cache_dup_mm(oldmm);
++	uprobe_dup_mmap(oldmm, mm);
++	/*
++	 * Not linked in yet - no deadlock potential:
++	 */
++	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
++
++	/* No ordering required: file already has been exposed. */
++	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
++
++	mm->total_vm = oldmm->total_vm;
++	mm->data_vm = oldmm->data_vm;
++	mm->exec_vm = oldmm->exec_vm;
++	mm->stack_vm = oldmm->stack_vm;
++
++	rb_link = &mm->mm_rb.rb_node;
++	rb_parent = NULL;
++	pprev = &mm->mmap;
++	retval = ksm_fork(mm, oldmm);
++	if (retval)
++		goto out;
++	retval = khugepaged_fork(mm, oldmm);
++	if (retval)
++		goto out;
++
++	prev = NULL;
++	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
++		struct file *file;
++
++		if (mpnt->vm_flags & VM_DONTCOPY) {
++			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
++			continue;
++		}
++		charge = 0;
++		/*
++		 * Don't duplicate many vmas if we've been oom-killed (for
++		 * example)
++		 */
++		if (fatal_signal_pending(current)) {
++			retval = -EINTR;
++			goto out;
++		}
++		if (mpnt->vm_flags & VM_ACCOUNT) {
++			unsigned long len = vma_pages(mpnt);
++
++			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
++				goto fail_nomem;
++			charge = len;
++		}
++		tmp = vm_area_dup(mpnt);
++		if (!tmp)
++			goto fail_nomem;
++		retval = vma_dup_policy(mpnt, tmp);
++		if (retval)
++			goto fail_nomem_policy;
++		tmp->vm_mm = mm;
++		retval = dup_userfaultfd(tmp, &uf);
++		if (retval)
++			goto fail_nomem_anon_vma_fork;
++		if (tmp->vm_flags & VM_WIPEONFORK) {
++			/* VM_WIPEONFORK gets a clean slate in the child. */
++			tmp->anon_vma = NULL;
++			if (anon_vma_prepare(tmp))
++				goto fail_nomem_anon_vma_fork;
++		} else if (anon_vma_fork(tmp, mpnt))
++			goto fail_nomem_anon_vma_fork;
++		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
++		tmp->vm_next = tmp->vm_prev = NULL;
++		file = tmp->vm_file;
++		if (file) {
++			struct inode *inode = file_inode(file);
++			struct address_space *mapping = file->f_mapping;
++
++			get_file(file);
++			if (tmp->vm_flags & VM_DENYWRITE)
++				atomic_dec(&inode->i_writecount);
++			i_mmap_lock_write(mapping);
++			if (tmp->vm_flags & VM_SHARED)
++				atomic_inc(&mapping->i_mmap_writable);
++			flush_dcache_mmap_lock(mapping);
++			/* insert tmp into the share list, just after mpnt */
++			vma_interval_tree_insert_after(tmp, mpnt,
++					&mapping->i_mmap);
++			flush_dcache_mmap_unlock(mapping);
++			i_mmap_unlock_write(mapping);
++		}
++
++		/*
++		 * Clear hugetlb-related page reserves for children. This only
++		 * affects MAP_PRIVATE mappings. Faults generated by the child
++		 * are not guaranteed to succeed, even if read-only
++		 */
++		if (is_vm_hugetlb_page(tmp))
++			reset_vma_resv_huge_pages(tmp);
++
++		/*
++		 * Link in the new vma and copy the page table entries.
++		 */
++		*pprev = tmp;
++		pprev = &tmp->vm_next;
++		tmp->vm_prev = prev;
++		prev = tmp;
++
++		__vma_link_rb(mm, tmp, rb_link, rb_parent);
++		rb_link = &tmp->vm_rb.rb_right;
++		rb_parent = &tmp->vm_rb;
++
++		mm->map_count++;
++		if (!(tmp->vm_flags & VM_WIPEONFORK))
++			retval = copy_page_range(mm, oldmm, mpnt);
++
++		if (tmp->vm_ops && tmp->vm_ops->open)
++			tmp->vm_ops->open(tmp);
++
++		if (retval)
++			goto out;
++	}
++	/* a new mm has just been created */
++	retval = arch_dup_mmap(oldmm, mm);
++out:
++	up_write(&mm->mmap_sem);
++	flush_tlb_mm(oldmm);
++	up_write(&oldmm->mmap_sem);
++	dup_userfaultfd_complete(&uf);
++fail_uprobe_end:
++	uprobe_end_dup_mmap();
++	return retval;
++fail_nomem_anon_vma_fork:
++	mpol_put(vma_policy(tmp));
++fail_nomem_policy:
++	vm_area_free(tmp);
++fail_nomem:
++	retval = -ENOMEM;
++	vm_unacct_memory(charge);
++	goto out;
++}
++
++static inline int mm_alloc_pgd(struct mm_struct *mm)
++{
++	mm->pgd = pgd_alloc(mm);
++	if (unlikely(!mm->pgd))
++		return -ENOMEM;
++	return 0;
++}
++
++static inline void mm_free_pgd(struct mm_struct *mm)
++{
++	pgd_free(mm, mm->pgd);
++}
++#else
++static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
++{
++	down_write(&oldmm->mmap_sem);
++	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
++	up_write(&oldmm->mmap_sem);
++	return 0;
++}
++#define mm_alloc_pgd(mm)	(0)
++#define mm_free_pgd(mm)
++#endif /* CONFIG_MMU */
++
++static void check_mm(struct mm_struct *mm)
++{
++	int i;
++
++	for (i = 0; i < NR_MM_COUNTERS; i++) {
++		long x = atomic_long_read(&mm->rss_stat.count[i]);
++
++		if (unlikely(x))
++			printk(KERN_ALERT "BUG: Bad rss-counter state "
++					  "mm:%p idx:%d val:%ld\n", mm, i, x);
++	}
++
++	if (mm_pgtables_bytes(mm))
++		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
++				mm_pgtables_bytes(mm));
++
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
++	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
++#endif
++}
++
++#define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
++#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
++
++/*
++ * Called when the last reference to the mm
++ * is dropped: either by a lazy thread or by
++ * mmput. Free the page directory and the mm.
++ */
++void __mmdrop(struct mm_struct *mm)
++{
++	BUG_ON(mm == &init_mm);
++	WARN_ON_ONCE(mm == current->mm);
++	WARN_ON_ONCE(mm == current->active_mm);
++	mm_free_pgd(mm);
++	destroy_context(mm);
++	hmm_mm_destroy(mm);
++	mmu_notifier_mm_destroy(mm);
++	check_mm(mm);
++	put_user_ns(mm->user_ns);
++	free_mm(mm);
++}
++EXPORT_SYMBOL_GPL(__mmdrop);
++
++static void mmdrop_async_fn(struct work_struct *work)
++{
++	struct mm_struct *mm;
++
++	mm = container_of(work, struct mm_struct, async_put_work);
++	__mmdrop(mm);
++}
++
++static void mmdrop_async(struct mm_struct *mm)
++{
++	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
++		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
++		schedule_work(&mm->async_put_work);
++	}
++}
++
++static inline void free_signal_struct(struct signal_struct *sig)
++{
++	taskstats_tgid_free(sig);
++	sched_autogroup_exit(sig);
++	/*
++	 * __mmdrop is not safe to call from softirq context on x86 due to
++	 * pgd_dtor so postpone it to the async context
++	 */
++	if (sig->oom_mm)
++		mmdrop_async(sig->oom_mm);
++	kmem_cache_free(signal_cachep, sig);
++}
++
++static inline void put_signal_struct(struct signal_struct *sig)
++{
++	if (atomic_dec_and_test(&sig->sigcnt))
++		free_signal_struct(sig);
++}
++
++void __put_task_struct(struct task_struct *tsk)
++{
++	WARN_ON(!tsk->exit_state);
++	WARN_ON(atomic_read(&tsk->usage));
++	WARN_ON(tsk == current);
++
++	cgroup_free(tsk);
++	task_numa_free(tsk, true);
++	security_task_free(tsk);
++	exit_creds(tsk);
++	delayacct_tsk_free(tsk);
++	put_signal_struct(tsk->signal);
++
++	if (!profile_handoff_task(tsk))
++		free_task(tsk);
++}
++EXPORT_SYMBOL_GPL(__put_task_struct);
++
++void __init __weak arch_task_cache_init(void) { }
++
++/*
++ * set_max_threads
++ */
++static void set_max_threads(unsigned int max_threads_suggested)
++{
++	u64 threads;
++
++	/*
++	 * The number of threads shall be limited such that the thread
++	 * structures may only consume a small part of the available memory.
++	 */
++	if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
++		threads = MAX_THREADS;
++	else
++		threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
++				    (u64) THREAD_SIZE * 8UL);
++
++	if (threads > max_threads_suggested)
++		threads = max_threads_suggested;
++
++	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
++}
++
++#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
++/* Initialized by the architecture: */
++int arch_task_struct_size __read_mostly;
++#endif
++
++static void task_struct_whitelist(unsigned long *offset, unsigned long *size)
++{
++	/* Fetch thread_struct whitelist for the architecture. */
++	arch_thread_struct_whitelist(offset, size);
++
++	/*
++	 * Handle zero-sized whitelist or empty thread_struct, otherwise
++	 * adjust offset to position of thread_struct in task_struct.
++	 */
++	if (unlikely(*size == 0))
++		*offset = 0;
++	else
++		*offset += offsetof(struct task_struct, thread);
++}
++
++void __init fork_init(void)
++{
++	int i;
++#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
++#ifndef ARCH_MIN_TASKALIGN
++#define ARCH_MIN_TASKALIGN	0
++#endif
++	int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
++	unsigned long useroffset, usersize;
++
++	/* create a slab on which task_structs can be allocated */
++	task_struct_whitelist(&useroffset, &usersize);
++	task_struct_cachep = kmem_cache_create_usercopy("task_struct",
++			arch_task_struct_size, align,
++			SLAB_PANIC|SLAB_ACCOUNT,
++			useroffset, usersize, NULL);
++#endif
++
++	/* do the arch specific task caches init */
++	arch_task_cache_init();
++
++	set_max_threads(MAX_THREADS);
++
++	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
++	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
++	init_task.signal->rlim[RLIMIT_SIGPENDING] =
++		init_task.signal->rlim[RLIMIT_NPROC];
++
++	for (i = 0; i < UCOUNT_COUNTS; i++) {
++		init_user_ns.ucount_max[i] = max_threads/2;
++	}
++
++#ifdef CONFIG_VMAP_STACK
++	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
++			  NULL, free_vm_stack_cache);
++#endif
++
++	lockdep_init_task(&init_task);
++}
++
++int __weak arch_dup_task_struct(struct task_struct *dst,
++					       struct task_struct *src)
++{
++	*dst = *src;
++	return 0;
++}
++
++void set_task_stack_end_magic(struct task_struct *tsk)
++{
++	unsigned long *stackend;
++
++	stackend = end_of_stack(tsk);
++	*stackend = STACK_END_MAGIC;	/* for overflow detection */
++}
++
++static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
++{
++	struct task_struct *tsk;
++	unsigned long *stack;
++	struct vm_struct *stack_vm_area;
++	int err;
++
++	if (node == NUMA_NO_NODE)
++		node = tsk_fork_get_node(orig);
++	tsk = alloc_task_struct_node(node);
++	if (!tsk)
++		return NULL;
++
++	stack = alloc_thread_stack_node(tsk, node);
++	if (!stack)
++		goto free_tsk;
++
++	if (memcg_charge_kernel_stack(tsk))
++		goto free_stack;
++
++	stack_vm_area = task_stack_vm_area(tsk);
++
++	err = arch_dup_task_struct(tsk, orig);
++
++	/*
++	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
++	 * sure they're properly initialized before using any stack-related
++	 * functions again.
++	 */
++	tsk->stack = stack;
++#ifdef CONFIG_VMAP_STACK
++	tsk->stack_vm_area = stack_vm_area;
++#endif
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	atomic_set(&tsk->stack_refcount, 1);
++#endif
++
++	if (err)
++		goto free_stack;
++
++#ifdef CONFIG_SECCOMP
++	/*
++	 * We must handle setting up seccomp filters once we're under
++	 * the sighand lock in case orig has changed between now and
++	 * then. Until then, filter must be NULL to avoid messing up
++	 * the usage counts on the error path calling free_task.
++	 */
++	tsk->seccomp.filter = NULL;
++#endif
++
++	setup_thread_stack(tsk, orig);
++	clear_user_return_notifier(tsk);
++	clear_tsk_need_resched(tsk);
++	set_task_stack_end_magic(tsk);
++
++#ifdef CONFIG_STACKPROTECTOR
++	tsk->stack_canary = get_random_canary();
++#endif
++
++	/*
++	 * One for us, one for whoever does the "release_task()" (usually
++	 * parent)
++	 */
++	atomic_set(&tsk->usage, 2);
++#ifdef CONFIG_BLK_DEV_IO_TRACE
++	tsk->btrace_seq = 0;
++#endif
++	tsk->splice_pipe = NULL;
++	tsk->task_frag.page = NULL;
++	tsk->wake_q.next = NULL;
++
++	account_kernel_stack(tsk, 1);
++
++	kcov_task_init(tsk);
++
++#ifdef CONFIG_FAULT_INJECTION
++	tsk->fail_nth = 0;
++#endif
++
++#ifdef CONFIG_BLK_CGROUP
++	tsk->throttle_queue = NULL;
++	tsk->use_memdelay = 0;
++#endif
++
++#ifdef CONFIG_MEMCG
++	tsk->active_memcg = NULL;
++#endif
++	return tsk;
++
++free_stack:
++	free_thread_stack(tsk);
++free_tsk:
++	free_task_struct(tsk);
++	return NULL;
++}
++
++__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
++
++static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
++
++static int __init coredump_filter_setup(char *s)
++{
++	default_dump_filter =
++		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
++		MMF_DUMP_FILTER_MASK;
++	return 1;
++}
++
++__setup("coredump_filter=", coredump_filter_setup);
++
++#include <linux/init_task.h>
++
++static void mm_init_aio(struct mm_struct *mm)
++{
++#ifdef CONFIG_AIO
++	spin_lock_init(&mm->ioctx_lock);
++	mm->ioctx_table = NULL;
++#endif
++}
++
++static __always_inline void mm_clear_owner(struct mm_struct *mm,
++					   struct task_struct *p)
++{
++#ifdef CONFIG_MEMCG
++	if (mm->owner == p)
++		WRITE_ONCE(mm->owner, NULL);
++#endif
++}
++
++static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
++{
++#ifdef CONFIG_MEMCG
++	mm->owner = p;
++#endif
++}
++
++static void mm_init_uprobes_state(struct mm_struct *mm)
++{
++#ifdef CONFIG_UPROBES
++	mm->uprobes_state.xol_area = NULL;
++#endif
++}
++
++static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
++	struct user_namespace *user_ns)
++{
++	mm->mmap = NULL;
++	mm->mm_rb = RB_ROOT;
++	mm->vmacache_seqnum = 0;
++	atomic_set(&mm->mm_users, 1);
++	atomic_set(&mm->mm_count, 1);
++	init_rwsem(&mm->mmap_sem);
++	INIT_LIST_HEAD(&mm->mmlist);
++	mm->core_state = NULL;
++	mm_pgtables_bytes_init(mm);
++	mm->map_count = 0;
++	atomic_long_set(&mm->locked_vm, 0);
++	mm->pinned_vm = 0;
++	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
++	spin_lock_init(&mm->page_table_lock);
++	spin_lock_init(&mm->arg_lock);
++	mm_init_cpumask(mm);
++	mm_init_aio(mm);
++	mm_init_owner(mm, p);
++	RCU_INIT_POINTER(mm->exe_file, NULL);
++	mmu_notifier_mm_init(mm);
++	hmm_mm_init(mm);
++	init_tlb_flush_pending(mm);
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
++	mm->pmd_huge_pte = NULL;
++#endif
++	mm_init_uprobes_state(mm);
++
++	if (current->mm) {
++		mm->flags = current->mm->flags & MMF_INIT_MASK;
++		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
++	} else {
++		mm->flags = default_dump_filter;
++		mm->def_flags = 0;
++	}
++
++	if (mm_alloc_pgd(mm))
++		goto fail_nopgd;
++
++	if (init_new_context(p, mm))
++		goto fail_nocontext;
++
++	mm->user_ns = get_user_ns(user_ns);
++	return mm;
++
++fail_nocontext:
++	mm_free_pgd(mm);
++fail_nopgd:
++	free_mm(mm);
++	return NULL;
++}
++
++/*
++ * Allocate and initialize an mm_struct.
++ */
++struct mm_struct *mm_alloc(void)
++{
++	struct mm_struct *mm;
++
++	mm = allocate_mm();
++	if (!mm)
++		return NULL;
++
++	memset(mm, 0, sizeof(*mm));
++	return mm_init(mm, current, current_user_ns());
++}
++
++static inline void __mmput(struct mm_struct *mm)
++{
++	VM_BUG_ON(atomic_read(&mm->mm_users));
++
++	uprobe_clear_state(mm);
++	exit_aio(mm);
++	ksm_exit(mm);
++	khugepaged_exit(mm); /* must run before exit_mmap */
++	exit_mmap(mm);
++	mm_put_huge_zero_page(mm);
++	set_mm_exe_file(mm, NULL);
++	if (!list_empty(&mm->mmlist)) {
++		spin_lock(&mmlist_lock);
++		list_del(&mm->mmlist);
++		spin_unlock(&mmlist_lock);
++	}
++	if (mm->binfmt)
++		module_put(mm->binfmt->module);
++	mmdrop(mm);
++}
++
++/*
++ * Decrement the use count and release all resources for an mm.
++ */
++void mmput(struct mm_struct *mm)
++{
++	might_sleep();
++
++	if (atomic_dec_and_test(&mm->mm_users))
++		__mmput(mm);
++}
++EXPORT_SYMBOL_GPL(mmput);
++
++#ifdef CONFIG_MMU
++static void mmput_async_fn(struct work_struct *work)
++{
++	struct mm_struct *mm = container_of(work, struct mm_struct,
++					    async_put_work);
++
++	__mmput(mm);
++}
++
++void mmput_async(struct mm_struct *mm)
++{
++	if (atomic_dec_and_test(&mm->mm_users)) {
++		INIT_WORK(&mm->async_put_work, mmput_async_fn);
++		schedule_work(&mm->async_put_work);
++	}
++}
++EXPORT_SYMBOL_GPL(mmput_async);
++#endif
++
++/**
++ * set_mm_exe_file - change a reference to the mm's executable file
++ *
++ * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
++ *
++ * Main users are mmput() and sys_execve(). Callers prevent concurrent
++ * invocations: in mmput() nobody alive left, in execve task is single
++ * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
++ * mm->exe_file, but does so without using set_mm_exe_file() in order
++ * to do avoid the need for any locks.
++ */
++void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
++{
++	struct file *old_exe_file;
++
++	/*
++	 * It is safe to dereference the exe_file without RCU as
++	 * this function is only called if nobody else can access
++	 * this mm -- see comment above for justification.
++	 */
++	old_exe_file = rcu_dereference_raw(mm->exe_file);
++
++	if (new_exe_file)
++		get_file(new_exe_file);
++	rcu_assign_pointer(mm->exe_file, new_exe_file);
++	if (old_exe_file)
++		fput(old_exe_file);
++}
++
++/**
++ * get_mm_exe_file - acquire a reference to the mm's executable file
++ *
++ * Returns %NULL if mm has no associated executable file.
++ * User must release file via fput().
++ */
++struct file *get_mm_exe_file(struct mm_struct *mm)
++{
++	struct file *exe_file;
++
++	rcu_read_lock();
++	exe_file = rcu_dereference(mm->exe_file);
++	if (exe_file && !get_file_rcu(exe_file))
++		exe_file = NULL;
++	rcu_read_unlock();
++	return exe_file;
++}
++EXPORT_SYMBOL(get_mm_exe_file);
++
++/**
++ * get_task_exe_file - acquire a reference to the task's executable file
++ *
++ * Returns %NULL if task's mm (if any) has no associated executable file or
++ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
++ * User must release file via fput().
++ */
++struct file *get_task_exe_file(struct task_struct *task)
++{
++	struct file *exe_file = NULL;
++	struct mm_struct *mm;
++
++	task_lock(task);
++	mm = task->mm;
++	if (mm) {
++		if (!(task->flags & PF_KTHREAD))
++			exe_file = get_mm_exe_file(mm);
++	}
++	task_unlock(task);
++	return exe_file;
++}
++EXPORT_SYMBOL(get_task_exe_file);
++
++/**
++ * get_task_mm - acquire a reference to the task's mm
++ *
++ * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
++ * this kernel workthread has transiently adopted a user mm with use_mm,
++ * to do its AIO) is not set and if so returns a reference to it, after
++ * bumping up the use count.  User must release the mm via mmput()
++ * after use.  Typically used by /proc and ptrace.
++ */
++struct mm_struct *get_task_mm(struct task_struct *task)
++{
++	struct mm_struct *mm;
++
++	task_lock(task);
++	mm = task->mm;
++	if (mm) {
++		if (task->flags & PF_KTHREAD)
++			mm = NULL;
++		else
++			mmget(mm);
++	}
++	task_unlock(task);
++	return mm;
++}
++EXPORT_SYMBOL_GPL(get_task_mm);
++
++/**
++ * mm_access - check access permission to a task and and acquire a reference to
++ * its mm.
++ * @task: target task
++ * @mode: selects type of access and caller credentials
++ *
++ * Return the task's mm on success, or %NULL if it cannot be accessed.
++ *
++ * Check if the caller is allowed to read or write the target task's pages.
++ * @mode describes the access mode and credentials using ptrace access flags.
++ * See ptrace_may_access() for more details. On success, a reference to the mm
++ * is taken.
++ */
++struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
++{
++	struct mm_struct *mm;
++	int err;
++
++	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
++	if (err)
++		return ERR_PTR(err);
++
++	mm = get_task_mm(task);
++	if (mm && mm != current->mm &&
++			!ptrace_may_access(task, mode)) {
++		mmput(mm);
++		mm = ERR_PTR(-EACCES);
++	}
++	mutex_unlock(&task->signal->cred_guard_mutex);
++
++	return mm;
++}
++EXPORT_SYMBOL_GPL(mm_access);
++
++static void complete_vfork_done(struct task_struct *tsk)
++{
++	struct completion *vfork;
++
++	task_lock(tsk);
++	vfork = tsk->vfork_done;
++	if (likely(vfork)) {
++		tsk->vfork_done = NULL;
++		complete(vfork);
++	}
++	task_unlock(tsk);
++}
++
++static int wait_for_vfork_done(struct task_struct *child,
++				struct completion *vfork)
++{
++	int killed;
++
++	freezer_do_not_count();
++	killed = wait_for_completion_killable(vfork);
++	freezer_count();
++
++	if (killed) {
++		task_lock(child);
++		child->vfork_done = NULL;
++		task_unlock(child);
++	}
++
++	put_task_struct(child);
++	return killed;
++}
++
++/* Please note the differences between mmput and mm_release.
++ * mmput is called whenever we stop holding onto a mm_struct,
++ * error success whatever.
++ *
++ * mm_release is called after a mm_struct has been removed
++ * from the current process.
++ *
++ * This difference is important for error handling, when we
++ * only half set up a mm_struct for a new process and need to restore
++ * the old one.  Because we mmput the new mm_struct before
++ * restoring the old one. . .
++ * Eric Biederman 10 January 1998
++ */
++void mm_release(struct task_struct *tsk, struct mm_struct *mm)
++{
++	/* Get rid of any futexes when releasing the mm */
++#ifdef CONFIG_FUTEX
++	if (unlikely(tsk->robust_list)) {
++		exit_robust_list(tsk);
++		tsk->robust_list = NULL;
++	}
++#ifdef CONFIG_COMPAT
++	if (unlikely(tsk->compat_robust_list)) {
++		compat_exit_robust_list(tsk);
++		tsk->compat_robust_list = NULL;
++	}
++#endif
++	if (unlikely(!list_empty(&tsk->pi_state_list)))
++		exit_pi_state_list(tsk);
++#endif
++
++	uprobe_free_utask(tsk);
++
++	/* Get rid of any cached register state */
++	deactivate_mm(tsk, mm);
++
++	/*
++	 * Signal userspace if we're not exiting with a core dump
++	 * because we want to leave the value intact for debugging
++	 * purposes.
++	 */
++	if (tsk->clear_child_tid) {
++		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
++		    atomic_read(&mm->mm_users) > 1) {
++			/*
++			 * We don't check the error code - if userspace has
++			 * not set up a proper pointer then tough luck.
++			 */
++			put_user(0, tsk->clear_child_tid);
++			do_futex(tsk->clear_child_tid, FUTEX_WAKE,
++					1, NULL, NULL, 0, 0);
++		}
++		tsk->clear_child_tid = NULL;
++	}
++
++	/*
++	 * All done, finally we can wake up parent and return this mm to him.
++	 * Also kthread_stop() uses this completion for synchronization.
++	 */
++	if (tsk->vfork_done)
++		complete_vfork_done(tsk);
++}
++
++/*
++ * Allocate a new mm structure and copy contents from the
++ * mm structure of the passed in task structure.
++ */
++static struct mm_struct *dup_mm(struct task_struct *tsk)
++{
++	struct mm_struct *mm, *oldmm = current->mm;
++	int err;
++
++	mm = allocate_mm();
++	if (!mm)
++		goto fail_nomem;
++
++	memcpy(mm, oldmm, sizeof(*mm));
++
++	if (!mm_init(mm, tsk, mm->user_ns))
++		goto fail_nomem;
++
++	err = dup_mmap(mm, oldmm);
++	if (err)
++		goto free_pt;
++
++	mm->hiwater_rss = get_mm_rss(mm);
++	mm->hiwater_vm = mm->total_vm;
++
++	if (mm->binfmt && !try_module_get(mm->binfmt->module))
++		goto free_pt;
++
++	return mm;
++
++free_pt:
++	/* don't put binfmt in mmput, we haven't got module yet */
++	mm->binfmt = NULL;
++	mm_init_owner(mm, NULL);
++	mmput(mm);
++
++fail_nomem:
++	return NULL;
++}
++
++static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
++{
++	struct mm_struct *mm, *oldmm;
++	int retval;
++
++	tsk->min_flt = tsk->maj_flt = 0;
++	tsk->nvcsw = tsk->nivcsw = 0;
++#ifdef CONFIG_DETECT_HUNG_TASK
++	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
++	tsk->last_switch_time = 0;
++#endif
++
++	tsk->mm = NULL;
++	tsk->active_mm = NULL;
++
++	/*
++	 * Are we cloning a kernel thread?
++	 *
++	 * We need to steal a active VM for that..
++	 */
++	oldmm = current->mm;
++	if (!oldmm)
++		return 0;
++
++	/* initialize the new vmacache entries */
++	vmacache_flush(tsk);
++
++	if (clone_flags & CLONE_VM) {
++		mmget(oldmm);
++		mm = oldmm;
++		goto good_mm;
++	}
++
++	retval = -ENOMEM;
++	mm = dup_mm(tsk);
++	if (!mm)
++		goto fail_nomem;
++
++good_mm:
++	tsk->mm = mm;
++	tsk->active_mm = mm;
++	return 0;
++
++fail_nomem:
++	return retval;
++}
++
++static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
++{
++	struct fs_struct *fs = current->fs;
++	if (clone_flags & CLONE_FS) {
++		/* tsk->fs is already what we want */
++		spin_lock(&fs->lock);
++		if (fs->in_exec) {
++			spin_unlock(&fs->lock);
++			return -EAGAIN;
++		}
++		fs->users++;
++		spin_unlock(&fs->lock);
++		return 0;
++	}
++	tsk->fs = copy_fs_struct(fs);
++	if (!tsk->fs)
++		return -ENOMEM;
++	return 0;
++}
++
++static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
++{
++	struct files_struct *oldf, *newf;
++	int error = 0;
++
++	/*
++	 * A background process may not have any files ...
++	 */
++	oldf = current->files;
++	if (!oldf)
++		goto out;
++
++	if (clone_flags & CLONE_FILES) {
++		atomic_inc(&oldf->count);
++		goto out;
++	}
++
++	newf = dup_fd(oldf, &error);
++	if (!newf)
++		goto out;
++
++	tsk->files = newf;
++	error = 0;
++out:
++	return error;
++}
++
++static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
++{
++#ifdef CONFIG_BLOCK
++	struct io_context *ioc = current->io_context;
++	struct io_context *new_ioc;
++
++	if (!ioc)
++		return 0;
++	/*
++	 * Share io context with parent, if CLONE_IO is set
++	 */
++	if (clone_flags & CLONE_IO) {
++		ioc_task_link(ioc);
++		tsk->io_context = ioc;
++	} else if (ioprio_valid(ioc->ioprio)) {
++		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
++		if (unlikely(!new_ioc))
++			return -ENOMEM;
++
++		new_ioc->ioprio = ioc->ioprio;
++		put_io_context(new_ioc);
++	}
++#endif
++	return 0;
++}
++
++static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
++{
++	struct sighand_struct *sig;
++
++	if (clone_flags & CLONE_SIGHAND) {
++		atomic_inc(&current->sighand->count);
++		return 0;
++	}
++	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
++	rcu_assign_pointer(tsk->sighand, sig);
++	if (!sig)
++		return -ENOMEM;
++
++	atomic_set(&sig->count, 1);
++	spin_lock_irq(&current->sighand->siglock);
++	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
++	spin_unlock_irq(&current->sighand->siglock);
++	return 0;
++}
++
++void __cleanup_sighand(struct sighand_struct *sighand)
++{
++	if (atomic_dec_and_test(&sighand->count)) {
++		signalfd_cleanup(sighand);
++		/*
++		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
++		 * without an RCU grace period, see __lock_task_sighand().
++		 */
++		kmem_cache_free(sighand_cachep, sighand);
++	}
++}
++
++#ifdef CONFIG_POSIX_TIMERS
++/*
++ * Initialize POSIX timer handling for a thread group.
++ */
++static void posix_cpu_timers_init_group(struct signal_struct *sig)
++{
++	unsigned long cpu_limit;
++
++	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
++	if (cpu_limit != RLIM_INFINITY) {
++		sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
++		sig->cputimer.running = true;
++	}
++
++	/* The timer lists. */
++	INIT_LIST_HEAD(&sig->cpu_timers[0]);
++	INIT_LIST_HEAD(&sig->cpu_timers[1]);
++	INIT_LIST_HEAD(&sig->cpu_timers[2]);
++}
++#else
++static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
++#endif
++
++static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
++{
++	struct signal_struct *sig;
++
++	if (clone_flags & CLONE_THREAD)
++		return 0;
++
++	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
++	tsk->signal = sig;
++	if (!sig)
++		return -ENOMEM;
++
++	sig->nr_threads = 1;
++	atomic_set(&sig->live, 1);
++	atomic_set(&sig->sigcnt, 1);
++
++	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
++	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
++	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
++
++	init_waitqueue_head(&sig->wait_chldexit);
++	sig->curr_target = tsk;
++	init_sigpending(&sig->shared_pending);
++	INIT_HLIST_HEAD(&sig->multiprocess);
++	seqlock_init(&sig->stats_lock);
++	prev_cputime_init(&sig->prev_cputime);
++
++#ifdef CONFIG_POSIX_TIMERS
++	INIT_LIST_HEAD(&sig->posix_timers);
++	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	sig->real_timer.function = it_real_fn;
++#endif
++
++	task_lock(current->group_leader);
++	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
++	task_unlock(current->group_leader);
++
++	posix_cpu_timers_init_group(sig);
++
++	tty_audit_fork(sig);
++	sched_autogroup_fork(sig);
++
++	sig->oom_score_adj = current->signal->oom_score_adj;
++	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
++
++	mutex_init(&sig->cred_guard_mutex);
++
++	return 0;
++}
++
++static void copy_seccomp(struct task_struct *p)
++{
++#ifdef CONFIG_SECCOMP
++	/*
++	 * Must be called with sighand->lock held, which is common to
++	 * all threads in the group. Holding cred_guard_mutex is not
++	 * needed because this new task is not yet running and cannot
++	 * be racing exec.
++	 */
++	assert_spin_locked(&current->sighand->siglock);
++
++	/* Ref-count the new filter user, and assign it. */
++	get_seccomp_filter(current);
++	p->seccomp = current->seccomp;
++
++	/*
++	 * Explicitly enable no_new_privs here in case it got set
++	 * between the task_struct being duplicated and holding the
++	 * sighand lock. The seccomp state and nnp must be in sync.
++	 */
++	if (task_no_new_privs(current))
++		task_set_no_new_privs(p);
++
++	/*
++	 * If the parent gained a seccomp mode after copying thread
++	 * flags and between before we held the sighand lock, we have
++	 * to manually enable the seccomp thread flag here.
++	 */
++	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
++		set_tsk_thread_flag(p, TIF_SECCOMP);
++#endif
++}
++
++SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
++{
++	current->clear_child_tid = tidptr;
++
++	return task_pid_vnr(current);
++}
++
++static void rt_mutex_init_task(struct task_struct *p)
++{
++	raw_spin_lock_init(&p->pi_lock);
++#ifdef CONFIG_RT_MUTEXES
++	p->pi_waiters = RB_ROOT_CACHED;
++	p->pi_top_task = NULL;
++	p->pi_blocked_on = NULL;
++#endif
++}
++
++#ifdef CONFIG_POSIX_TIMERS
++/*
++ * Initialize POSIX timer handling for a single task.
++ */
++static void posix_cpu_timers_init(struct task_struct *tsk)
++{
++	tsk->cputime_expires.prof_exp = 0;
++	tsk->cputime_expires.virt_exp = 0;
++	tsk->cputime_expires.sched_exp = 0;
++	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
++	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
++	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
++}
++#else
++static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
++#endif
++
++static inline void init_task_pid_links(struct task_struct *task)
++{
++	enum pid_type type;
++
++	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
++		INIT_HLIST_NODE(&task->pid_links[type]);
++	}
++}
++
++static inline void
++init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
++{
++	if (type == PIDTYPE_PID)
++		task->thread_pid = pid;
++	else
++		task->signal->pids[type] = pid;
++}
++
++static inline void rcu_copy_process(struct task_struct *p)
++{
++#ifdef CONFIG_PREEMPT_RCU
++	p->rcu_read_lock_nesting = 0;
++	p->rcu_read_unlock_special.s = 0;
++	p->rcu_blocked_node = NULL;
++	INIT_LIST_HEAD(&p->rcu_node_entry);
++#endif /* #ifdef CONFIG_PREEMPT_RCU */
++#ifdef CONFIG_TASKS_RCU
++	p->rcu_tasks_holdout = false;
++	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
++	p->rcu_tasks_idle_cpu = -1;
++#endif /* #ifdef CONFIG_TASKS_RCU */
++}
++
++#ifdef CONFIG_MEMCG
++static void __delayed_free_task(struct rcu_head *rhp)
++{
++	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
++
++	free_task(tsk);
++}
++#endif /* CONFIG_MEMCG */
++
++static __always_inline void delayed_free_task(struct task_struct *tsk)
++{
++#ifdef CONFIG_MEMCG
++	call_rcu(&tsk->rcu, __delayed_free_task);
++#else /* CONFIG_MEMCG */
++	free_task(tsk);
++#endif /* CONFIG_MEMCG */
++}
++
++static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
++{
++	/* Skip if kernel thread */
++	if (!tsk->mm)
++		return;
++
++	/* Skip if spawning a thread or using vfork */
++	if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
++		return;
++
++	/* We need to synchronize with __set_oom_adj */
++	mutex_lock(&oom_adj_mutex);
++	set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
++	/* Update the values in case they were changed after copy_signal */
++	tsk->signal->oom_score_adj = current->signal->oom_score_adj;
++	tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
++	mutex_unlock(&oom_adj_mutex);
++}
++
++/*
++ * This creates a new process as a copy of the old one,
++ * but does not actually start it yet.
++ *
++ * It copies the registers, and all the appropriate
++ * parts of the process environment (as per the clone
++ * flags). The actual kick-off is left to the caller.
++ */
++static __latent_entropy struct task_struct *copy_process(
++					unsigned long clone_flags,
++					unsigned long stack_start,
++					unsigned long stack_size,
++					int __user *child_tidptr,
++					struct pid *pid,
++					int trace,
++					unsigned long tls,
++					int node)
++{
++	int retval;
++	struct task_struct *p;
++	struct multiprocess_signals delayed;
++
++	/*
++	 * Don't allow sharing the root directory with processes in a different
++	 * namespace
++	 */
++	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
++		return ERR_PTR(-EINVAL);
++
++	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
++		return ERR_PTR(-EINVAL);
++
++	/*
++	 * Thread groups must share signals as well, and detached threads
++	 * can only be started up within the thread group.
++	 */
++	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
++		return ERR_PTR(-EINVAL);
++
++	/*
++	 * Shared signal handlers imply shared VM. By way of the above,
++	 * thread groups also imply shared VM. Blocking this case allows
++	 * for various simplifications in other code.
++	 */
++	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
++		return ERR_PTR(-EINVAL);
++
++	/*
++	 * Siblings of global init remain as zombies on exit since they are
++	 * not reaped by their parent (swapper). To solve this and to avoid
++	 * multi-rooted process trees, prevent global and container-inits
++	 * from creating siblings.
++	 */
++	if ((clone_flags & CLONE_PARENT) &&
++				current->signal->flags & SIGNAL_UNKILLABLE)
++		return ERR_PTR(-EINVAL);
++
++	/*
++	 * If the new process will be in a different pid or user namespace
++	 * do not allow it to share a thread group with the forking task.
++	 */
++	if (clone_flags & CLONE_THREAD) {
++		if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
++		    (task_active_pid_ns(current) !=
++				current->nsproxy->pid_ns_for_children))
++			return ERR_PTR(-EINVAL);
++	}
++
++	/*
++	 * Force any signals received before this point to be delivered
++	 * before the fork happens.  Collect up signals sent to multiple
++	 * processes that happen during the fork and delay them so that
++	 * they appear to happen after the fork.
++	 */
++	sigemptyset(&delayed.signal);
++	INIT_HLIST_NODE(&delayed.node);
++
++	spin_lock_irq(&current->sighand->siglock);
++	if (!(clone_flags & CLONE_THREAD))
++		hlist_add_head(&delayed.node, &current->signal->multiprocess);
++	recalc_sigpending();
++	spin_unlock_irq(&current->sighand->siglock);
++	retval = -ERESTARTNOINTR;
++	if (signal_pending(current))
++		goto fork_out;
++
++	retval = -ENOMEM;
++	p = dup_task_struct(current, node);
++	if (!p)
++		goto fork_out;
++
++	/*
++	 * This _must_ happen before we call free_task(), i.e. before we jump
++	 * to any of the bad_fork_* labels. This is to avoid freeing
++	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
++	 * kernel threads (PF_KTHREAD).
++	 */
++	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
++	/*
++	 * Clear TID on mm_release()?
++	 */
++	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
++
++	ftrace_graph_init_task(p);
++
++	rt_mutex_init_task(p);
++
++#ifdef CONFIG_PROVE_LOCKING
++	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
++	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
++#endif
++	retval = -EAGAIN;
++	if (atomic_read(&p->real_cred->user->processes) >=
++			task_rlimit(p, RLIMIT_NPROC)) {
++		if (p->real_cred->user != INIT_USER &&
++		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
++			goto bad_fork_free;
++	}
++	current->flags &= ~PF_NPROC_EXCEEDED;
++
++	retval = copy_creds(p, clone_flags);
++	if (retval < 0)
++		goto bad_fork_free;
++
++	/*
++	 * If multiple threads are within copy_process(), then this check
++	 * triggers too late. This doesn't hurt, the check is only there
++	 * to stop root fork bombs.
++	 */
++	retval = -EAGAIN;
++	if (nr_threads >= max_threads)
++		goto bad_fork_cleanup_count;
++
++	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
++	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
++	p->flags |= PF_FORKNOEXEC;
++	INIT_LIST_HEAD(&p->children);
++	INIT_LIST_HEAD(&p->sibling);
++	rcu_copy_process(p);
++	p->vfork_done = NULL;
++	spin_lock_init(&p->alloc_lock);
++
++	init_sigpending(&p->pending);
++
++	p->utime = p->stime = p->gtime = 0;
++#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
++	p->utimescaled = p->stimescaled = 0;
++#endif
++	prev_cputime_init(&p->prev_cputime);
++
++#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
++	seqcount_init(&p->vtime.seqcount);
++	p->vtime.starttime = 0;
++	p->vtime.state = VTIME_INACTIVE;
++#endif
++
++#if defined(SPLIT_RSS_COUNTING)
++	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
++#endif
++
++	p->default_timer_slack_ns = current->timer_slack_ns;
++
++	task_io_accounting_init(&p->ioac);
++	acct_clear_integrals(p);
++
++	posix_cpu_timers_init(p);
++
++	p->io_context = NULL;
++	audit_set_context(p, NULL);
++	cgroup_fork(p);
++#ifdef CONFIG_NUMA
++	p->mempolicy = mpol_dup(p->mempolicy);
++	if (IS_ERR(p->mempolicy)) {
++		retval = PTR_ERR(p->mempolicy);
++		p->mempolicy = NULL;
++		goto bad_fork_cleanup_threadgroup_lock;
++	}
++#endif
++#ifdef CONFIG_CPUSETS
++	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
++	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
++	seqcount_init(&p->mems_allowed_seq);
++#endif
++#ifdef CONFIG_TRACE_IRQFLAGS
++	p->irq_events = 0;
++	p->hardirqs_enabled = 0;
++	p->hardirq_enable_ip = 0;
++	p->hardirq_enable_event = 0;
++	p->hardirq_disable_ip = _THIS_IP_;
++	p->hardirq_disable_event = 0;
++	p->softirqs_enabled = 1;
++	p->softirq_enable_ip = _THIS_IP_;
++	p->softirq_enable_event = 0;
++	p->softirq_disable_ip = 0;
++	p->softirq_disable_event = 0;
++	p->hardirq_context = 0;
++	p->softirq_context = 0;
++#endif
++
++	p->pagefault_disabled = 0;
++
++#ifdef CONFIG_LOCKDEP
++	p->lockdep_depth = 0; /* no locks held yet */
++	p->curr_chain_key = 0;
++	p->lockdep_recursion = 0;
++	lockdep_init_task(p);
++#endif
++
++#ifdef CONFIG_DEBUG_MUTEXES
++	p->blocked_on = NULL; /* not blocked yet */
++#endif
++#ifdef CONFIG_BCACHE
++	p->sequential_io	= 0;
++	p->sequential_io_avg	= 0;
++#endif
++
++	/* Perform scheduler related setup. Assign this task to a CPU. */
++	retval = sched_fork(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_policy;
++
++	retval = perf_event_init_task(p);
++	if (retval)
++		goto bad_fork_cleanup_policy;
++	retval = audit_alloc(p);
++	if (retval)
++		goto bad_fork_cleanup_perf;
++	/* copy all the process information */
++	shm_init_task(p);
++	retval = security_task_alloc(p, clone_flags);
++	if (retval)
++		goto bad_fork_cleanup_audit;
++	retval = copy_semundo(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_security;
++	retval = copy_files(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_semundo;
++	retval = copy_fs(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_files;
++	retval = copy_sighand(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_fs;
++	retval = copy_signal(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_sighand;
++	retval = copy_mm(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_signal;
++	retval = copy_namespaces(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_mm;
++	retval = copy_io(clone_flags, p);
++	if (retval)
++		goto bad_fork_cleanup_namespaces;
++	retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
++	if (retval)
++		goto bad_fork_cleanup_io;
++
++	if (pid != &init_struct_pid) {
++		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
++		if (IS_ERR(pid)) {
++			retval = PTR_ERR(pid);
++			goto bad_fork_cleanup_thread;
++		}
++	}
++
++#ifdef CONFIG_BLOCK
++	p->plug = NULL;
++#endif
++#ifdef CONFIG_FUTEX
++	p->robust_list = NULL;
++#ifdef CONFIG_COMPAT
++	p->compat_robust_list = NULL;
++#endif
++	INIT_LIST_HEAD(&p->pi_state_list);
++	p->pi_state_cache = NULL;
++#endif
++	/*
++	 * sigaltstack should be cleared when sharing the same VM
++	 */
++	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
++		sas_ss_reset(p);
++
++	/*
++	 * Syscall tracing and stepping should be turned off in the
++	 * child regardless of CLONE_PTRACE.
++	 */
++	user_disable_single_step(p);
++	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
++#ifdef TIF_SYSCALL_EMU
++	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
++#endif
++	clear_all_latency_tracing(p);
++
++	/* ok, now we should be set up.. */
++	p->pid = pid_nr(pid);
++	if (clone_flags & CLONE_THREAD) {
++		p->exit_signal = -1;
++		p->group_leader = current->group_leader;
++		p->tgid = current->tgid;
++	} else {
++		if (clone_flags & CLONE_PARENT)
++			p->exit_signal = current->group_leader->exit_signal;
++		else
++			p->exit_signal = (clone_flags & CSIGNAL);
++		p->group_leader = p;
++		p->tgid = p->pid;
++	}
++
++	p->nr_dirtied = 0;
++	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
++	p->dirty_paused_when = 0;
++
++	p->pdeath_signal = 0;
++	INIT_LIST_HEAD(&p->thread_group);
++	p->task_works = NULL;
++
++	cgroup_threadgroup_change_begin(current);
++	/*
++	 * Ensure that the cgroup subsystem policies allow the new process to be
++	 * forked. It should be noted the the new process's css_set can be changed
++	 * between here and cgroup_post_fork() if an organisation operation is in
++	 * progress.
++	 */
++	retval = cgroup_can_fork(p);
++	if (retval)
++		goto bad_fork_free_pid;
++
++	/*
++	 * From this point on we must avoid any synchronous user-space
++	 * communication until we take the tasklist-lock. In particular, we do
++	 * not want user-space to be able to predict the process start-time by
++	 * stalling fork(2) after we recorded the start_time but before it is
++	 * visible to the system.
++	 */
++
++	p->start_time = ktime_get_ns();
++	p->real_start_time = ktime_get_boot_ns();
++
++	/*
++	 * Make it visible to the rest of the system, but dont wake it up yet.
++	 * Need tasklist lock for parent etc handling!
++	 */
++	write_lock_irq(&tasklist_lock);
++
++	/* CLONE_PARENT re-uses the old parent */
++	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
++		p->real_parent = current->real_parent;
++		p->parent_exec_id = current->parent_exec_id;
++		p->parent_exec_id_u64 = current->parent_exec_id_u64;
++	} else {
++		p->real_parent = current;
++		p->parent_exec_id = current->self_exec_id;
++		p->parent_exec_id_u64 = current->self_exec_id_u64;
++	}
++
++	klp_copy_process(p);
++
++	spin_lock(&current->sighand->siglock);
++
++	/*
++	 * Copy seccomp details explicitly here, in case they were changed
++	 * before holding sighand lock.
++	 */
++	copy_seccomp(p);
++
++	rseq_fork(p, clone_flags);
++
++	/* Don't start children in a dying pid namespace */
++	if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
++		retval = -ENOMEM;
++		goto bad_fork_cancel_cgroup;
++	}
++
++	/* Let kill terminate clone/fork in the middle */
++	if (fatal_signal_pending(current)) {
++		retval = -EINTR;
++		goto bad_fork_cancel_cgroup;
++	}
++
++
++	init_task_pid_links(p);
++	if (likely(p->pid)) {
++		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
++
++		init_task_pid(p, PIDTYPE_PID, pid);
++		if (thread_group_leader(p)) {
++			init_task_pid(p, PIDTYPE_TGID, pid);
++			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
++			init_task_pid(p, PIDTYPE_SID, task_session(current));
++
++			if (is_child_reaper(pid)) {
++				ns_of_pid(pid)->child_reaper = p;
++				p->signal->flags |= SIGNAL_UNKILLABLE;
++			}
++			p->signal->shared_pending.signal = delayed.signal;
++			p->signal->tty = tty_kref_get(current->signal->tty);
++			/*
++			 * Inherit has_child_subreaper flag under the same
++			 * tasklist_lock with adding child to the process tree
++			 * for propagate_has_child_subreaper optimization.
++			 */
++			p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
++							 p->real_parent->signal->is_child_subreaper;
++			list_add_tail(&p->sibling, &p->real_parent->children);
++			list_add_tail_rcu(&p->tasks, &init_task.tasks);
++			attach_pid(p, PIDTYPE_TGID);
++			attach_pid(p, PIDTYPE_PGID);
++			attach_pid(p, PIDTYPE_SID);
++			__this_cpu_inc(process_counts);
++		} else {
++			current->signal->nr_threads++;
++			atomic_inc(&current->signal->live);
++			atomic_inc(&current->signal->sigcnt);
++			task_join_group_stop(p);
++			list_add_tail_rcu(&p->thread_group,
++					  &p->group_leader->thread_group);
++			list_add_tail_rcu(&p->thread_node,
++					  &p->signal->thread_head);
++		}
++		attach_pid(p, PIDTYPE_PID);
++		nr_threads++;
++	}
++	total_forks++;
++	hlist_del_init(&delayed.node);
++	spin_unlock(&current->sighand->siglock);
++	syscall_tracepoint_update(p);
++	write_unlock_irq(&tasklist_lock);
++
++	proc_fork_connector(p);
++	cgroup_post_fork(p);
++	cgroup_threadgroup_change_end(current);
++	perf_event_fork(p);
++
++	trace_task_newtask(p, clone_flags);
++	uprobe_copy_process(p, clone_flags);
++
++	copy_oom_score_adj(clone_flags, p);
++
++	return p;
++
++bad_fork_cancel_cgroup:
++	spin_unlock(&current->sighand->siglock);
++	write_unlock_irq(&tasklist_lock);
++	cgroup_cancel_fork(p);
++bad_fork_free_pid:
++	cgroup_threadgroup_change_end(current);
++	if (pid != &init_struct_pid)
++		free_pid(pid);
++bad_fork_cleanup_thread:
++	exit_thread(p);
++bad_fork_cleanup_io:
++	if (p->io_context)
++		exit_io_context(p);
++bad_fork_cleanup_namespaces:
++	exit_task_namespaces(p);
++bad_fork_cleanup_mm:
++	if (p->mm) {
++		mm_clear_owner(p->mm, p);
++		mmput(p->mm);
++	}
++bad_fork_cleanup_signal:
++	if (!(clone_flags & CLONE_THREAD))
++		free_signal_struct(p->signal);
++bad_fork_cleanup_sighand:
++	__cleanup_sighand(p->sighand);
++bad_fork_cleanup_fs:
++	exit_fs(p); /* blocking */
++bad_fork_cleanup_files:
++	exit_files(p); /* blocking */
++bad_fork_cleanup_semundo:
++	exit_sem(p);
++bad_fork_cleanup_security:
++	security_task_free(p);
++bad_fork_cleanup_audit:
++	audit_free(p);
++bad_fork_cleanup_perf:
++	perf_event_free_task(p);
++bad_fork_cleanup_policy:
++	lockdep_free_task(p);
++#ifdef CONFIG_NUMA
++	mpol_put(p->mempolicy);
++bad_fork_cleanup_threadgroup_lock:
++#endif
++	delayacct_tsk_free(p);
++bad_fork_cleanup_count:
++	atomic_dec(&p->cred->user->processes);
++	exit_creds(p);
++bad_fork_free:
++	p->state = TASK_DEAD;
++	put_task_stack(p);
++	delayed_free_task(p);
++fork_out:
++	spin_lock_irq(&current->sighand->siglock);
++	hlist_del_init(&delayed.node);
++	spin_unlock_irq(&current->sighand->siglock);
++	return ERR_PTR(retval);
++}
++
++static inline void init_idle_pids(struct task_struct *idle)
++{
++	enum pid_type type;
++
++	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
++		INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
++		init_task_pid(idle, type, &init_struct_pid);
++	}
++}
++
++struct task_struct *fork_idle(int cpu)
++{
++	struct task_struct *task;
++	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
++			    cpu_to_node(cpu));
++	if (!IS_ERR(task)) {
++		init_idle_pids(task);
++		init_idle(task, cpu);
++	}
++
++	return task;
++}
++
++/*
++ *  Ok, this is the main fork-routine.
++ *
++ * It copies the process, and if successful kick-starts
++ * it and waits for it to finish using the VM if required.
++ */
++long _do_fork(unsigned long clone_flags,
++	      unsigned long stack_start,
++	      unsigned long stack_size,
++	      int __user *parent_tidptr,
++	      int __user *child_tidptr,
++	      unsigned long tls)
++{
++	struct completion vfork;
++	struct pid *pid;
++	struct task_struct *p;
++	int trace = 0;
++	long nr;
++
++	/*
++	 * Determine whether and which event to report to ptracer.  When
++	 * called from kernel_thread or CLONE_UNTRACED is explicitly
++	 * requested, no event is reported; otherwise, report if the event
++	 * for the type of forking is enabled.
++	 */
++	if (!(clone_flags & CLONE_UNTRACED)) {
++		if (clone_flags & CLONE_VFORK)
++			trace = PTRACE_EVENT_VFORK;
++		else if ((clone_flags & CSIGNAL) != SIGCHLD)
++			trace = PTRACE_EVENT_CLONE;
++		else
++			trace = PTRACE_EVENT_FORK;
++
++		if (likely(!ptrace_event_enabled(current, trace)))
++			trace = 0;
++	}
++
++	p = copy_process(clone_flags, stack_start, stack_size,
++			 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
++	add_latent_entropy();
++
++	if (IS_ERR(p))
++		return PTR_ERR(p);
++
++	/*
++	 * Do this prior waking up the new thread - the thread pointer
++	 * might get invalid after that point, if the thread exits quickly.
++	 */
++	trace_sched_process_fork(current, p);
++
++	pid = get_task_pid(p, PIDTYPE_PID);
++	nr = pid_vnr(pid);
++
++	if (clone_flags & CLONE_PARENT_SETTID)
++		put_user(nr, parent_tidptr);
++
++	if (clone_flags & CLONE_VFORK) {
++		p->vfork_done = &vfork;
++		init_completion(&vfork);
++		get_task_struct(p);
++	}
++
++	wake_up_new_task(p);
++
++	/* forking complete and child started to run, tell ptracer */
++	if (unlikely(trace))
++		ptrace_event_pid(trace, pid);
++
++	if (clone_flags & CLONE_VFORK) {
++		if (!wait_for_vfork_done(p, &vfork))
++			ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
++	}
++
++	put_pid(pid);
++	return nr;
++}
++
++#ifndef CONFIG_HAVE_COPY_THREAD_TLS
++/* For compatibility with architectures that call do_fork directly rather than
++ * using the syscall entry points below. */
++long do_fork(unsigned long clone_flags,
++	      unsigned long stack_start,
++	      unsigned long stack_size,
++	      int __user *parent_tidptr,
++	      int __user *child_tidptr)
++{
++	return _do_fork(clone_flags, stack_start, stack_size,
++			parent_tidptr, child_tidptr, 0);
++}
++#endif
++
++/*
++ * Create a kernel thread.
++ */
++pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
++{
++	return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
++		(unsigned long)arg, NULL, NULL, 0);
++}
++
++#ifdef __ARCH_WANT_SYS_FORK
++SYSCALL_DEFINE0(fork)
++{
++#ifdef CONFIG_MMU
++	return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
++#else
++	/* can not support in nommu mode */
++	return -EINVAL;
++#endif
++}
++#endif
++
++#ifdef __ARCH_WANT_SYS_VFORK
++SYSCALL_DEFINE0(vfork)
++{
++	return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
++			0, NULL, NULL, 0);
++}
++#endif
++
++#ifdef __ARCH_WANT_SYS_CLONE
++#ifdef CONFIG_CLONE_BACKWARDS
++SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
++		 int __user *, parent_tidptr,
++		 unsigned long, tls,
++		 int __user *, child_tidptr)
++#elif defined(CONFIG_CLONE_BACKWARDS2)
++SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
++		 int __user *, parent_tidptr,
++		 int __user *, child_tidptr,
++		 unsigned long, tls)
++#elif defined(CONFIG_CLONE_BACKWARDS3)
++SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
++		int, stack_size,
++		int __user *, parent_tidptr,
++		int __user *, child_tidptr,
++		unsigned long, tls)
++#else
++SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
++		 int __user *, parent_tidptr,
++		 int __user *, child_tidptr,
++		 unsigned long, tls)
++#endif
++{
++	return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
++}
++#endif
++
++void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
++{
++	struct task_struct *leader, *parent, *child;
++	int res;
++
++	read_lock(&tasklist_lock);
++	leader = top = top->group_leader;
++down:
++	for_each_thread(leader, parent) {
++		list_for_each_entry(child, &parent->children, sibling) {
++			res = visitor(child, data);
++			if (res) {
++				if (res < 0)
++					goto out;
++				leader = child;
++				goto down;
++			}
++up:
++			;
++		}
++	}
++
++	if (leader != top) {
++		child = leader;
++		parent = child->real_parent;
++		leader = parent->group_leader;
++		goto up;
++	}
++out:
++	read_unlock(&tasklist_lock);
++}
++
++#ifndef ARCH_MIN_MMSTRUCT_ALIGN
++#define ARCH_MIN_MMSTRUCT_ALIGN 0
++#endif
++
++static void sighand_ctor(void *data)
++{
++	struct sighand_struct *sighand = data;
++
++	spin_lock_init(&sighand->siglock);
++	init_waitqueue_head(&sighand->signalfd_wqh);
++}
++
++void __init proc_caches_init(void)
++{
++	unsigned int mm_size;
++
++	sighand_cachep = kmem_cache_create("sighand_cache",
++			sizeof(struct sighand_struct), 0,
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
++			SLAB_ACCOUNT, sighand_ctor);
++	signal_cachep = kmem_cache_create("signal_cache",
++			sizeof(struct signal_struct), 0,
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
++			NULL);
++	files_cachep = kmem_cache_create("files_cache",
++			sizeof(struct files_struct), 0,
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
++			NULL);
++	fs_cachep = kmem_cache_create("fs_cache",
++			sizeof(struct fs_struct), 0,
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
++			NULL);
++
++	/*
++	 * The mm_cpumask is located at the end of mm_struct, and is
++	 * dynamically sized based on the maximum CPU number this system
++	 * can have, taking hotplug into account (nr_cpu_ids).
++	 */
++	mm_size = sizeof(struct mm_struct) + cpumask_size();
++
++	mm_cachep = kmem_cache_create_usercopy("mm_struct",
++			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
++			offsetof(struct mm_struct, saved_auxv),
++			sizeof_field(struct mm_struct, saved_auxv),
++			NULL);
++	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
++	mmap_init();
++	nsproxy_cache_init();
++}
++
++/*
++ * Check constraints on flags passed to the unshare system call.
++ */
++static int check_unshare_flags(unsigned long unshare_flags)
++{
++	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
++				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
++				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
++				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
++		return -EINVAL;
++	/*
++	 * Not implemented, but pretend it works if there is nothing
++	 * to unshare.  Note that unsharing the address space or the
++	 * signal handlers also need to unshare the signal queues (aka
++	 * CLONE_THREAD).
++	 */
++	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
++		if (!thread_group_empty(current))
++			return -EINVAL;
++	}
++	if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
++		if (atomic_read(&current->sighand->count) > 1)
++			return -EINVAL;
++	}
++	if (unshare_flags & CLONE_VM) {
++		if (!current_is_single_threaded())
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++/*
++ * Unshare the filesystem structure if it is being shared
++ */
++static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
++{
++	struct fs_struct *fs = current->fs;
++
++	if (!(unshare_flags & CLONE_FS) || !fs)
++		return 0;
++
++	/* don't need lock here; in the worst case we'll do useless copy */
++	if (fs->users == 1)
++		return 0;
++
++	*new_fsp = copy_fs_struct(fs);
++	if (!*new_fsp)
++		return -ENOMEM;
++
++	return 0;
++}
++
++/*
++ * Unshare file descriptor table if it is being shared
++ */
++static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
++{
++	struct files_struct *fd = current->files;
++	int error = 0;
++
++	if ((unshare_flags & CLONE_FILES) &&
++	    (fd && atomic_read(&fd->count) > 1)) {
++		*new_fdp = dup_fd(fd, &error);
++		if (!*new_fdp)
++			return error;
++	}
++
++	return 0;
++}
++
++/*
++ * unshare allows a process to 'unshare' part of the process
++ * context which was originally shared using clone.  copy_*
++ * functions used by do_fork() cannot be used here directly
++ * because they modify an inactive task_struct that is being
++ * constructed. Here we are modifying the current, active,
++ * task_struct.
++ */
++int ksys_unshare(unsigned long unshare_flags)
++{
++	struct fs_struct *fs, *new_fs = NULL;
++	struct files_struct *fd, *new_fd = NULL;
++	struct cred *new_cred = NULL;
++	struct nsproxy *new_nsproxy = NULL;
++	int do_sysvsem = 0;
++	int err;
++
++	/*
++	 * If unsharing a user namespace must also unshare the thread group
++	 * and unshare the filesystem root and working directories.
++	 */
++	if (unshare_flags & CLONE_NEWUSER)
++		unshare_flags |= CLONE_THREAD | CLONE_FS;
++	/*
++	 * If unsharing vm, must also unshare signal handlers.
++	 */
++	if (unshare_flags & CLONE_VM)
++		unshare_flags |= CLONE_SIGHAND;
++	/*
++	 * If unsharing a signal handlers, must also unshare the signal queues.
++	 */
++	if (unshare_flags & CLONE_SIGHAND)
++		unshare_flags |= CLONE_THREAD;
++	/*
++	 * If unsharing namespace, must also unshare filesystem information.
++	 */
++	if (unshare_flags & CLONE_NEWNS)
++		unshare_flags |= CLONE_FS;
++
++	err = check_unshare_flags(unshare_flags);
++	if (err)
++		goto bad_unshare_out;
++	/*
++	 * CLONE_NEWIPC must also detach from the undolist: after switching
++	 * to a new ipc namespace, the semaphore arrays from the old
++	 * namespace are unreachable.
++	 */
++	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
++		do_sysvsem = 1;
++	err = unshare_fs(unshare_flags, &new_fs);
++	if (err)
++		goto bad_unshare_out;
++	err = unshare_fd(unshare_flags, &new_fd);
++	if (err)
++		goto bad_unshare_cleanup_fs;
++	err = unshare_userns(unshare_flags, &new_cred);
++	if (err)
++		goto bad_unshare_cleanup_fd;
++	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
++					 new_cred, new_fs);
++	if (err)
++		goto bad_unshare_cleanup_cred;
++
++	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
++		if (do_sysvsem) {
++			/*
++			 * CLONE_SYSVSEM is equivalent to sys_exit().
++			 */
++			exit_sem(current);
++		}
++		if (unshare_flags & CLONE_NEWIPC) {
++			/* Orphan segments in old ns (see sem above). */
++			exit_shm(current);
++			shm_init_task(current);
++		}
++
++		if (new_nsproxy)
++			switch_task_namespaces(current, new_nsproxy);
++
++		task_lock(current);
++
++		if (new_fs) {
++			fs = current->fs;
++			spin_lock(&fs->lock);
++			current->fs = new_fs;
++			if (--fs->users)
++				new_fs = NULL;
++			else
++				new_fs = fs;
++			spin_unlock(&fs->lock);
++		}
++
++		if (new_fd) {
++			fd = current->files;
++			current->files = new_fd;
++			new_fd = fd;
++		}
++
++		task_unlock(current);
++
++		if (new_cred) {
++			/* Install the new user namespace */
++			commit_creds(new_cred);
++			new_cred = NULL;
++		}
++	}
++
++	perf_event_namespaces(current);
++
++bad_unshare_cleanup_cred:
++	if (new_cred)
++		put_cred(new_cred);
++bad_unshare_cleanup_fd:
++	if (new_fd)
++		put_files_struct(new_fd);
++
++bad_unshare_cleanup_fs:
++	if (new_fs)
++		free_fs_struct(new_fs);
++
++bad_unshare_out:
++	return err;
++}
++
++SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
++{
++	return ksys_unshare(unshare_flags);
++}
++
++/*
++ *	Helper to unshare the files of the current task.
++ *	We don't want to expose copy_files internals to
++ *	the exec layer of the kernel.
++ */
++
++int unshare_files(struct files_struct **displaced)
++{
++	struct task_struct *task = current;
++	struct files_struct *copy = NULL;
++	int error;
++
++	error = unshare_fd(CLONE_FILES, &copy);
++	if (error || !copy) {
++		*displaced = NULL;
++		return error;
++	}
++	*displaced = task->files;
++	task_lock(task);
++	task->files = copy;
++	task_unlock(task);
++	return 0;
++}
++
++int sysctl_max_threads(struct ctl_table *table, int write,
++		       void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	struct ctl_table t;
++	int ret;
++	int threads = max_threads;
++	int min = 1;
++	int max = MAX_THREADS;
++
++	t = *table;
++	t.data = &threads;
++	t.extra1 = &min;
++	t.extra2 = &max;
++
++	ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
++	if (ret || !write)
++		return ret;
++
++	max_threads = threads;
++
++	return 0;
++}
+diff -uprN kernel/kernel/ipipe/core.c kernel_new/kernel/ipipe/core.c
+--- kernel/kernel/ipipe/core.c	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/core.c	2021-04-01 18:28:07.805863120 +0800
+@@ -0,0 +1,2117 @@
++/* -*- linux-c -*-
++ * linux/kernel/ipipe/core.c
++ *
++ * Copyright (C) 2002-2012 Philippe Gerum.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Architecture-independent I-PIPE core support.
++ */
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/sched/debug.h>
++#include <linux/kallsyms.h>
++#include <linux/bitops.h>
++#include <linux/tick.h>
++#include <linux/interrupt.h>
++#include <linux/uaccess.h>
++#include <linux/cpuidle.h>
++#include <linux/sched/idle.h>
++#ifdef CONFIG_PROC_FS
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#endif	/* CONFIG_PROC_FS */
++#include <linux/ipipe_trace.h>
++#include <linux/ipipe.h>
++#include <ipipe/setup.h>
++#include <asm/syscall.h>
++#include <asm/unistd.h>
++
++struct ipipe_domain ipipe_root;
++EXPORT_SYMBOL_GPL(ipipe_root);
++
++struct ipipe_domain *ipipe_head_domain = &ipipe_root;
++EXPORT_SYMBOL_GPL(ipipe_head_domain);
++
++#ifdef CONFIG_SMP
++static __initdata struct ipipe_percpu_domain_data bootup_context = {
++	.status = IPIPE_STALL_MASK,
++	.domain = &ipipe_root,
++};
++#else
++#define bootup_context ipipe_percpu.root
++#endif	/* !CONFIG_SMP */
++
++DEFINE_PER_CPU(struct ipipe_percpu_data, ipipe_percpu) = {
++	.root = {
++		.status = IPIPE_STALL_MASK,
++		.domain = &ipipe_root,
++	},
++	.curr = &bootup_context,
++	.hrtimer_irq = -1,
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++	.context_check = 1,
++#endif
++};
++EXPORT_PER_CPU_SYMBOL(ipipe_percpu);
++
++/* Up to 2k of pending work data per CPU. */
++#define WORKBUF_SIZE 2048
++static DEFINE_PER_CPU_ALIGNED(unsigned char[WORKBUF_SIZE], work_buf);
++static DEFINE_PER_CPU(void *, work_tail);
++static unsigned int __ipipe_work_virq;
++
++static void __ipipe_do_work(unsigned int virq, void *cookie);
++
++#ifdef CONFIG_SMP
++
++#define IPIPE_CRITICAL_TIMEOUT	1000000
++static cpumask_t __ipipe_cpu_sync_map;
++static cpumask_t __ipipe_cpu_lock_map;
++static cpumask_t __ipipe_cpu_pass_map;
++static unsigned long __ipipe_critical_lock;
++static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
++static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
++static void (*__ipipe_cpu_sync) (void);
++
++#else /* !CONFIG_SMP */
++/*
++ * Create an alias to the unique root status, so that arch-dep code
++ * may get fast access to this percpu variable including from
++ * assembly.  A hard-coded assumption is that root.status appears at
++ * offset #0 of the ipipe_percpu struct.
++ */
++extern unsigned long __ipipe_root_status
++__attribute__((alias(__stringify(ipipe_percpu))));
++EXPORT_SYMBOL(__ipipe_root_status);
++
++#endif /* !CONFIG_SMP */
++
++IPIPE_DEFINE_SPINLOCK(__ipipe_lock);
++
++static unsigned long __ipipe_virtual_irq_map;
++
++#ifdef CONFIG_PRINTK
++unsigned int __ipipe_printk_virq;
++int __ipipe_printk_bypass;
++#endif /* CONFIG_PRINTK */
++
++#ifdef CONFIG_PROC_FS
++
++struct proc_dir_entry *ipipe_proc_root;
++
++static int __ipipe_version_info_show(struct seq_file *p, void *data)
++{
++	seq_printf(p, "%d\n", IPIPE_CORE_RELEASE);
++	return 0;
++}
++
++static int __ipipe_version_info_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, __ipipe_version_info_show, NULL);
++}
++
++static const struct file_operations __ipipe_version_proc_ops = {
++	.open		= __ipipe_version_info_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++static int __ipipe_common_info_show(struct seq_file *p, void *data)
++{
++	struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
++	char handling, lockbit, virtuality;
++	unsigned long ctlbits;
++	unsigned int irq;
++
++	seq_printf(p, "        +--- Handled\n");
++	seq_printf(p, "        |+-- Locked\n");
++	seq_printf(p, "        ||+- Virtual\n");
++	seq_printf(p, " [IRQ]  |||  Handler\n");
++
++	mutex_lock(&ipd->mutex);
++
++	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
++		ctlbits = ipd->irqs[irq].control;
++		/*
++		 * There might be a hole between the last external IRQ
++		 * and the first virtual one; skip it.
++		 */
++		if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
++			continue;
++
++		if (ipipe_virtual_irq_p(irq)
++		    && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
++			/* Non-allocated virtual IRQ; skip it. */
++			continue;
++
++		if (ctlbits & IPIPE_HANDLE_MASK)
++			handling = 'H';
++		else
++			handling = '.';
++
++		if (ctlbits & IPIPE_LOCK_MASK)
++			lockbit = 'L';
++		else
++			lockbit = '.';
++
++		if (ipipe_virtual_irq_p(irq))
++			virtuality = 'V';
++		else
++			virtuality = '.';
++
++		if (ctlbits & IPIPE_HANDLE_MASK)
++			seq_printf(p, " %4u:  %c%c%c  %pf\n",
++				   irq, handling, lockbit, virtuality,
++				   ipd->irqs[irq].handler);
++		else
++			seq_printf(p, " %4u:  %c%c%c\n",
++				   irq, handling, lockbit, virtuality);
++	}
++
++	mutex_unlock(&ipd->mutex);
++
++	return 0;
++}
++
++static int __ipipe_common_info_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, __ipipe_common_info_show, PDE_DATA(inode));
++}
++
++static const struct file_operations __ipipe_info_proc_ops = {
++	.owner		= THIS_MODULE,
++	.open		= __ipipe_common_info_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++void add_domain_proc(struct ipipe_domain *ipd)
++{
++	proc_create_data(ipd->name, 0444, ipipe_proc_root,
++			 &__ipipe_info_proc_ops, ipd);
++}
++
++void remove_domain_proc(struct ipipe_domain *ipd)
++{
++	remove_proc_entry(ipd->name, ipipe_proc_root);
++}
++
++void __init __ipipe_init_proc(void)
++{
++	ipipe_proc_root = proc_mkdir("ipipe", NULL);
++	proc_create("version", 0444, ipipe_proc_root,
++		    &__ipipe_version_proc_ops);
++	add_domain_proc(ipipe_root_domain);
++
++	__ipipe_init_tracer();
++}
++
++#else
++
++static inline void add_domain_proc(struct ipipe_domain *ipd)
++{
++}
++
++static inline void remove_domain_proc(struct ipipe_domain *ipd)
++{
++}
++
++#endif	/* CONFIG_PROC_FS */
++
++static void init_stage(struct ipipe_domain *ipd)
++{
++	memset(&ipd->irqs, 0, sizeof(ipd->irqs));
++	mutex_init(&ipd->mutex);
++	__ipipe_hook_critical_ipi(ipd);
++}
++
++static inline int root_context_offset(void)
++{
++	void root_context_not_at_start_of_ipipe_percpu(void);
++
++	/* ipipe_percpu.root must be found at offset #0. */
++
++	if (offsetof(struct ipipe_percpu_data, root))
++		root_context_not_at_start_of_ipipe_percpu();
++
++	return 0;
++}
++
++#ifdef CONFIG_SMP
++
++static inline void fixup_percpu_data(void)
++{
++	struct ipipe_percpu_data *p;
++	int cpu;
++
++	/*
++	 * ipipe_percpu.curr cannot be assigned statically to
++	 * &ipipe_percpu.root, due to the dynamic nature of percpu
++	 * data. So we make ipipe_percpu.curr refer to a temporary
++	 * boot up context in static memory, until we can fixup all
++	 * context pointers in this routine, after per-cpu areas have
++	 * been eventually set up. The temporary context data is
++	 * copied to per_cpu(ipipe_percpu, 0).root in the same move.
++	 *
++	 * Obviously, this code must run over the boot CPU, before SMP
++	 * operations start.
++	 */
++	BUG_ON(smp_processor_id() || !irqs_disabled());
++
++	per_cpu(ipipe_percpu, 0).root = bootup_context;
++
++	for_each_possible_cpu(cpu) {
++		p = &per_cpu(ipipe_percpu, cpu);
++		p->curr = &p->root;
++	}
++}
++
++#else /* !CONFIG_SMP */
++
++static inline void fixup_percpu_data(void) { }
++
++#endif /* CONFIG_SMP */
++
++void __init __ipipe_init_early(void)
++{
++	struct ipipe_domain *ipd = &ipipe_root;
++	int cpu;
++
++	fixup_percpu_data();
++
++	/*
++	 * A lightweight registration code for the root domain. We are
++	 * running on the boot CPU, hw interrupts are off, and
++	 * secondary CPUs are still lost in space.
++	 */
++	ipd->name = "Linux";
++	ipd->context_offset = root_context_offset();
++	init_stage(ipd);
++
++	/*
++	 * Do the early init stuff. First we do the per-arch pipeline
++	 * core setup, then we run the per-client setup code. At this
++	 * point, the kernel does not provide much services yet: be
++	 * careful.
++	 */
++	__ipipe_early_core_setup();
++	__ipipe_early_client_setup();
++
++#ifdef CONFIG_PRINTK
++	__ipipe_printk_virq = ipipe_alloc_virq();
++	ipd->irqs[__ipipe_printk_virq].handler = __ipipe_flush_printk;
++	ipd->irqs[__ipipe_printk_virq].cookie = NULL;
++	ipd->irqs[__ipipe_printk_virq].ackfn = NULL;
++	ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
++#endif /* CONFIG_PRINTK */
++
++	__ipipe_work_virq = ipipe_alloc_virq();
++	ipd->irqs[__ipipe_work_virq].handler = __ipipe_do_work;
++	ipd->irqs[__ipipe_work_virq].cookie = NULL;
++	ipd->irqs[__ipipe_work_virq].ackfn = NULL;
++	ipd->irqs[__ipipe_work_virq].control = IPIPE_HANDLE_MASK;
++
++	for_each_possible_cpu(cpu)
++		per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
++}
++
++void __init __ipipe_init(void)
++{
++	/* Now we may engage the pipeline. */
++	__ipipe_enable_pipeline();
++
++	pr_info("Interrupt pipeline (release #%d)\n", IPIPE_CORE_RELEASE);
++}
++
++static inline void init_head_stage(struct ipipe_domain *ipd)
++{
++	struct ipipe_percpu_domain_data *p;
++	int cpu;
++
++	/* Must be set first, used in ipipe_percpu_context(). */
++	ipd->context_offset = offsetof(struct ipipe_percpu_data, head);
++
++	for_each_online_cpu(cpu) {
++		p = ipipe_percpu_context(ipd, cpu);
++		memset(p, 0, sizeof(*p));
++		p->domain = ipd;
++	}
++
++	init_stage(ipd);
++}
++
++void ipipe_register_head(struct ipipe_domain *ipd, const char *name)
++{
++	BUG_ON(!ipipe_root_p || ipd == &ipipe_root);
++
++	ipd->name = name;
++	init_head_stage(ipd);
++	barrier();
++	ipipe_head_domain = ipd;
++	add_domain_proc(ipd);
++
++	pr_info("I-pipe: head domain %s registered.\n", name);
++}
++EXPORT_SYMBOL_GPL(ipipe_register_head);
++
++void ipipe_unregister_head(struct ipipe_domain *ipd)
++{
++	BUG_ON(!ipipe_root_p || ipd != ipipe_head_domain);
++
++	ipipe_head_domain = &ipipe_root;
++	smp_mb();
++	mutex_lock(&ipd->mutex);
++	remove_domain_proc(ipd);
++	mutex_unlock(&ipd->mutex);
++
++	pr_info("I-pipe: head domain %s unregistered.\n", ipd->name);
++}
++EXPORT_SYMBOL_GPL(ipipe_unregister_head);
++
++void ipipe_stall_root(void)
++{
++	unsigned long flags;
++
++	ipipe_root_only();
++	flags = hard_smp_local_irq_save();
++	__set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
++	hard_smp_local_irq_restore(flags);
++}
++EXPORT_SYMBOL(ipipe_stall_root);
++
++unsigned long ipipe_test_and_stall_root(void)
++{
++	unsigned long flags;
++	int x;
++
++	ipipe_root_only();
++	flags = hard_smp_local_irq_save();
++	x = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
++	hard_smp_local_irq_restore(flags);
++
++	return x;
++}
++EXPORT_SYMBOL(ipipe_test_and_stall_root);
++
++unsigned long ipipe_test_root(void)
++{
++	unsigned long flags;
++	int x;
++
++	flags = hard_smp_local_irq_save();
++	x = test_bit(IPIPE_STALL_FLAG, &__ipipe_root_status);
++	hard_smp_local_irq_restore(flags);
++
++	return x;
++}
++EXPORT_SYMBOL(ipipe_test_root);
++
++void ipipe_unstall_root(void)
++{
++	struct ipipe_percpu_domain_data *p;
++
++	hard_local_irq_disable();
++
++	/* This helps catching bad usage from assembly call sites. */
++	ipipe_root_only();
++
++	p = ipipe_this_cpu_root_context();
++
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++
++	if (unlikely(__ipipe_ipending_p(p)))
++		__ipipe_sync_stage();
++
++	hard_local_irq_enable();
++}
++EXPORT_SYMBOL(ipipe_unstall_root);
++
++void ipipe_restore_root(unsigned long x)
++{
++	ipipe_root_only();
++
++	if (x)
++		ipipe_stall_root();
++	else
++		ipipe_unstall_root();
++}
++EXPORT_SYMBOL(ipipe_restore_root);
++
++void __ipipe_restore_root_nosync(unsigned long x)
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_root_context();
++
++	if (raw_irqs_disabled_flags(x)) {
++		__set_bit(IPIPE_STALL_FLAG, &p->status);
++		trace_hardirqs_off();
++	} else {
++		trace_hardirqs_on();
++		__clear_bit(IPIPE_STALL_FLAG, &p->status);
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_restore_root_nosync);
++
++void ipipe_unstall_head(void)
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
++
++	hard_local_irq_disable();
++
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++
++	if (unlikely(__ipipe_ipending_p(p)))
++		__ipipe_sync_pipeline(ipipe_head_domain);
++
++	hard_local_irq_enable();
++}
++EXPORT_SYMBOL_GPL(ipipe_unstall_head);
++
++void __ipipe_restore_head(unsigned long x) /* hw interrupt off */
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context();
++
++	if (x) {
++#ifdef CONFIG_DEBUG_KERNEL
++		static int warned;
++		if (!warned &&
++		    __test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
++			/*
++			 * Already stalled albeit ipipe_restore_head()
++			 * should have detected it? Send a warning once.
++			 */
++			hard_local_irq_enable();
++			warned = 1;
++			pr_warning("I-pipe: ipipe_restore_head() "
++				   "optimization failed.\n");
++			dump_stack();
++			hard_local_irq_disable();
++		}
++#else /* !CONFIG_DEBUG_KERNEL */
++		__set_bit(IPIPE_STALL_FLAG, &p->status);
++#endif /* CONFIG_DEBUG_KERNEL */
++	} else {
++		__clear_bit(IPIPE_STALL_FLAG, &p->status);
++		if (unlikely(__ipipe_ipending_p(p)))
++			__ipipe_sync_pipeline(ipipe_head_domain);
++		hard_local_irq_enable();
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_restore_head);
++
++void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock)
++{
++	hard_local_irq_disable();
++	if (ipipe_smp_p)
++		arch_spin_lock(&lock->arch_lock);
++	__set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irq);
++
++void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock)
++{
++	if (ipipe_smp_p)
++		arch_spin_unlock(&lock->arch_lock);
++	__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++	hard_local_irq_enable();
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irq);
++
++unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock)
++{
++	unsigned long flags;
++	int s;
++
++	flags = hard_local_irq_save();
++	if (ipipe_smp_p)
++		arch_spin_lock(&lock->arch_lock);
++	s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++
++	return arch_mangle_irq_bits(s, flags);
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_lock_irqsave);
++
++int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock,
++				 unsigned long *x)
++{
++	unsigned long flags;
++	int s;
++
++	flags = hard_local_irq_save();
++	if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
++		hard_local_irq_restore(flags);
++		return 0;
++	}
++	s = __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++	*x = arch_mangle_irq_bits(s, flags);
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irqsave);
++
++void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock,
++				    unsigned long x)
++{
++	if (ipipe_smp_p)
++		arch_spin_unlock(&lock->arch_lock);
++	if (!arch_demangle_irq_bits(&x))
++		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++	hard_local_irq_restore(x);
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_unlock_irqrestore);
++
++int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock)
++{
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++	if (ipipe_smp_p && !arch_spin_trylock(&lock->arch_lock)) {
++		hard_local_irq_restore(flags);
++		return 0;
++	}
++	__set_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++
++	return 1;
++}
++EXPORT_SYMBOL_GPL(__ipipe_spin_trylock_irq);
++
++void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
++{
++	if (ipipe_smp_p)
++		arch_spin_unlock(&lock->arch_lock);
++}
++
++void __ipipe_spin_unlock_irqcomplete(unsigned long x)
++{
++	if (!arch_demangle_irq_bits(&x))
++		__clear_bit(IPIPE_STALL_FLAG, &__ipipe_current_context->status);
++	hard_local_irq_restore(x);
++}
++
++/* Must be called hw IRQs off. */
++static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p,
++					unsigned int irq)
++{
++	__set_bit(irq, p->irqheld_map);
++	p->irqall[irq]++;
++}
++
++#if __IPIPE_IRQMAP_LEVELS == 4
++
++/* Must be called hw IRQs off. */
++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
++	int l0b, l1b, l2b;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG);
++	l1b = irq / (BITS_PER_LONG * BITS_PER_LONG);
++	l2b = irq / BITS_PER_LONG;
++
++	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
++		__set_bit(l0b, &p->irqpend_0map);
++		__set_bit(l1b, p->irqpend_1map);
++		__set_bit(l2b, p->irqpend_2map);
++		__set_bit(irq, p->irqpend_map);
++	} else
++		__set_bit(irq, p->irqheld_map);
++
++	p->irqall[irq]++;
++}
++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
++
++/* Must be called hw IRQs off. */
++void __ipipe_lock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_root_domain;
++	struct ipipe_percpu_domain_data *p;
++	int l0b, l1b, l2b;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	/*
++	 * Interrupts requested by a registered head domain cannot be
++	 * locked, since this would make no sense: interrupts are
++	 * globally masked at CPU level when the head domain is
++	 * stalled, so there is no way we could encounter the
++	 * situation IRQ locks are handling.
++	 */
++	if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++		return;
++
++	p = ipipe_this_cpu_context(ipd);
++	if (__test_and_clear_bit(irq, p->irqpend_map)) {
++		__set_bit(irq, p->irqheld_map);
++		l2b = irq / BITS_PER_LONG;
++		if (p->irqpend_map[l2b] == 0) {
++			__clear_bit(l2b, p->irqpend_2map);
++			l1b = l2b / BITS_PER_LONG;
++			if (p->irqpend_2map[l1b] == 0) {
++				__clear_bit(l1b, p->irqpend_1map);
++				l0b = l1b / BITS_PER_LONG;
++				if (p->irqpend_1map[l0b] == 0)
++					__clear_bit(l0b, &p->irqpend_0map);
++			}
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
++
++/* Must be called hw IRQs off. */
++void __ipipe_unlock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_root_domain;
++	struct ipipe_percpu_domain_data *p;
++	int l0b, l1b, l2b, cpu;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++		return;
++
++	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG * BITS_PER_LONG);
++	l1b = irq / (BITS_PER_LONG * BITS_PER_LONG);
++	l2b = irq / BITS_PER_LONG;
++
++	for_each_online_cpu(cpu) {
++		p = ipipe_this_cpu_root_context();
++		if (test_and_clear_bit(irq, p->irqheld_map)) {
++			/* We need atomic ops here: */
++			set_bit(irq, p->irqpend_map);
++			set_bit(l2b, p->irqpend_2map);
++			set_bit(l1b, p->irqpend_1map);
++			set_bit(l0b, &p->irqpend_0map);
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
++
++#define wmul1(__n)  ((__n) * BITS_PER_LONG)
++#define wmul2(__n)  (wmul1(__n) * BITS_PER_LONG)
++#define wmul3(__n)  (wmul2(__n) * BITS_PER_LONG)
++
++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
++{
++	unsigned long l0m, l1m, l2m, l3m;
++	int l0b, l1b, l2b, l3b;
++	unsigned int irq;
++
++	l0m = p->irqpend_0map;
++	if (unlikely(l0m == 0))
++		return -1;
++	l0b = __ipipe_ffnz(l0m);
++	irq = wmul3(l0b);
++
++	l1m = p->irqpend_1map[l0b];
++	if (unlikely(l1m == 0))
++		return -1;
++	l1b = __ipipe_ffnz(l1m);
++	irq += wmul2(l1b);
++
++	l2m = p->irqpend_2map[wmul1(l0b) + l1b];
++	if (unlikely(l2m == 0))
++		return -1;
++	l2b = __ipipe_ffnz(l2m);
++	irq += wmul1(l2b);
++
++	l3m = p->irqpend_map[wmul2(l0b) + wmul1(l1b) + l2b];
++	if (unlikely(l3m == 0))
++		return -1;
++	l3b = __ipipe_ffnz(l3m);
++	irq += l3b;
++
++	__clear_bit(irq, p->irqpend_map);
++	if (p->irqpend_map[irq / BITS_PER_LONG] == 0) {
++		__clear_bit(l2b, &p->irqpend_2map[wmul1(l0b) + l1b]);
++		if (p->irqpend_2map[wmul1(l0b) + l1b] == 0) {
++			__clear_bit(l1b, &p->irqpend_1map[l0b]);
++			if (p->irqpend_1map[l0b] == 0)
++				__clear_bit(l0b, &p->irqpend_0map);
++		}
++	}
++
++	return irq;
++}
++
++#elif __IPIPE_IRQMAP_LEVELS == 3
++
++/* Must be called hw IRQs off. */
++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
++	int l0b, l1b;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
++	l1b = irq / BITS_PER_LONG;
++
++	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
++		__set_bit(irq, p->irqpend_map);
++		__set_bit(l1b, p->irqpend_1map);
++		__set_bit(l0b, &p->irqpend_0map);
++	} else
++		__set_bit(irq, p->irqheld_map);
++
++	p->irqall[irq]++;
++}
++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
++
++/* Must be called hw IRQs off. */
++void __ipipe_lock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_root_domain;
++	struct ipipe_percpu_domain_data *p;
++	int l0b, l1b;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	/*
++	 * Interrupts requested by a registered head domain cannot be
++	 * locked, since this would make no sense: interrupts are
++	 * globally masked at CPU level when the head domain is
++	 * stalled, so there is no way we could encounter the
++	 * situation IRQ locks are handling.
++	 */
++	if (test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++		return;
++
++	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
++	l1b = irq / BITS_PER_LONG;
++
++	p = ipipe_this_cpu_context(ipd);
++	if (__test_and_clear_bit(irq, p->irqpend_map)) {
++		__set_bit(irq, p->irqheld_map);
++		if (p->irqpend_map[l1b] == 0) {
++			__clear_bit(l1b, p->irqpend_1map);
++			if (p->irqpend_1map[l0b] == 0)
++				__clear_bit(l0b, &p->irqpend_0map);
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
++
++/* Must be called hw IRQs off. */
++void __ipipe_unlock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_root_domain;
++	struct ipipe_percpu_domain_data *p;
++	int l0b, l1b, cpu;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++		return;
++
++	l0b = irq / (BITS_PER_LONG * BITS_PER_LONG);
++	l1b = irq / BITS_PER_LONG;
++
++	for_each_online_cpu(cpu) {
++		p = ipipe_this_cpu_root_context();
++		if (test_and_clear_bit(irq, p->irqheld_map)) {
++			/* We need atomic ops here: */
++			set_bit(irq, p->irqpend_map);
++			set_bit(l1b, p->irqpend_1map);
++			set_bit(l0b, &p->irqpend_0map);
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
++
++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
++{
++	int l0b, l1b, l2b;
++	unsigned long l0m, l1m, l2m;
++	unsigned int irq;
++
++	l0m = p->irqpend_0map;
++	if (unlikely(l0m == 0))
++		return -1;
++
++	l0b = __ipipe_ffnz(l0m);
++	l1m = p->irqpend_1map[l0b];
++	if (unlikely(l1m == 0))
++		return -1;
++
++	l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG;
++	l2m = p->irqpend_map[l1b];
++	if (unlikely(l2m == 0))
++		return -1;
++
++	l2b = __ipipe_ffnz(l2m);
++	irq = l1b * BITS_PER_LONG + l2b;
++
++	__clear_bit(irq, p->irqpend_map);
++	if (p->irqpend_map[l1b] == 0) {
++		__clear_bit(l1b, p->irqpend_1map);
++		if (p->irqpend_1map[l0b] == 0)
++			__clear_bit(l0b, &p->irqpend_0map);
++	}
++
++	return irq;
++}
++
++#else /* __IPIPE_IRQMAP_LEVELS == 2 */
++
++/* Must be called hw IRQs off. */
++void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq)
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_context(ipd);
++	int l0b = irq / BITS_PER_LONG;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
++		__set_bit(irq, p->irqpend_map);
++		__set_bit(l0b, &p->irqpend_0map);
++	} else
++		__set_bit(irq, p->irqheld_map);
++
++	p->irqall[irq]++;
++}
++EXPORT_SYMBOL_GPL(__ipipe_set_irq_pending);
++
++/* Must be called hw IRQs off. */
++void __ipipe_lock_irq(unsigned int irq)
++{
++	struct ipipe_percpu_domain_data *p;
++	int l0b = irq / BITS_PER_LONG;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	if (test_and_set_bit(IPIPE_LOCK_FLAG,
++			     &ipipe_root_domain->irqs[irq].control))
++		return;
++
++	p = ipipe_this_cpu_root_context();
++	if (__test_and_clear_bit(irq, p->irqpend_map)) {
++		__set_bit(irq, p->irqheld_map);
++		if (p->irqpend_map[l0b] == 0)
++			__clear_bit(l0b, &p->irqpend_0map);
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_lock_irq);
++
++/* Must be called hw IRQs off. */
++void __ipipe_unlock_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_root_domain;
++	struct ipipe_percpu_domain_data *p;
++	int l0b = irq / BITS_PER_LONG, cpu;
++
++	IPIPE_WARN_ONCE(!hard_irqs_disabled());
++
++	if (!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++		return;
++
++	for_each_online_cpu(cpu) {
++		p = ipipe_percpu_context(ipd, cpu);
++		if (test_and_clear_bit(irq, p->irqheld_map)) {
++			/* We need atomic ops here: */
++			set_bit(irq, p->irqpend_map);
++			set_bit(l0b, &p->irqpend_0map);
++		}
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_unlock_irq);
++
++static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p)
++{
++	unsigned long l0m, l1m;
++	int l0b, l1b;
++
++	l0m = p->irqpend_0map;
++	if (unlikely(l0m == 0))
++		return -1;
++
++	l0b = __ipipe_ffnz(l0m);
++	l1m = p->irqpend_map[l0b];
++	if (unlikely(l1m == 0))
++		return -1;
++
++	l1b = __ipipe_ffnz(l1m);
++	__clear_bit(l1b, &p->irqpend_map[l0b]);
++	if (p->irqpend_map[l0b] == 0)
++		__clear_bit(l0b, &p->irqpend_0map);
++
++	return l0b * BITS_PER_LONG + l1b;
++}
++
++#endif
++
++void __ipipe_do_sync_pipeline(struct ipipe_domain *top)
++{
++	struct ipipe_percpu_domain_data *p;
++	struct ipipe_domain *ipd;
++
++	/* We must enter over the root domain. */
++	IPIPE_WARN_ONCE(__ipipe_current_domain != ipipe_root_domain);
++	ipd = top;
++next:
++	p = ipipe_this_cpu_context(ipd);
++	if (test_bit(IPIPE_STALL_FLAG, &p->status))
++		return;
++
++	if (__ipipe_ipending_p(p)) {
++		if (ipd == ipipe_root_domain)
++			__ipipe_sync_stage();
++		else {
++			/* Switching to head. */
++			p->coflags &= ~__IPIPE_ALL_R;
++			__ipipe_set_current_context(p);
++			__ipipe_sync_stage();
++			__ipipe_set_current_domain(ipipe_root_domain);
++		}
++	}
++
++	if (ipd != ipipe_root_domain) {
++		ipd = ipipe_root_domain;
++		goto next;
++	}
++}
++EXPORT_SYMBOL_GPL(__ipipe_do_sync_pipeline);
++
++unsigned int ipipe_alloc_virq(void)
++{
++	unsigned long flags, irq = 0;
++	int ipos;
++
++	raw_spin_lock_irqsave(&__ipipe_lock, flags);
++
++	if (__ipipe_virtual_irq_map != ~0) {
++		ipos = ffz(__ipipe_virtual_irq_map);
++		set_bit(ipos, &__ipipe_virtual_irq_map);
++		irq = ipos + IPIPE_VIRQ_BASE;
++	}
++
++	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
++
++	return irq;
++}
++EXPORT_SYMBOL_GPL(ipipe_alloc_virq);
++
++void ipipe_free_virq(unsigned int virq)
++{
++	clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
++	smp_mb__after_atomic();
++}
++EXPORT_SYMBOL_GPL(ipipe_free_virq);
++
++int ipipe_request_irq(struct ipipe_domain *ipd,
++		      unsigned int irq,
++		      ipipe_irq_handler_t handler,
++		      void *cookie,
++		      ipipe_irq_ackfn_t ackfn)
++{
++	unsigned long flags;
++	int ret = 0;
++
++	ipipe_root_only();
++
++	if (handler == NULL ||
++	    (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)))
++		return -EINVAL;
++
++	raw_spin_lock_irqsave(&__ipipe_lock, flags);
++
++	if (ipd->irqs[irq].handler) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++	if (ackfn == NULL)
++		ackfn = ipipe_root_domain->irqs[irq].ackfn;
++
++	ipd->irqs[irq].handler = handler;
++	ipd->irqs[irq].cookie = cookie;
++	ipd->irqs[irq].ackfn = ackfn;
++	ipd->irqs[irq].control = IPIPE_HANDLE_MASK;
++out:
++	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(ipipe_request_irq);
++
++void ipipe_free_irq(struct ipipe_domain *ipd,
++		    unsigned int irq)
++{
++	unsigned long flags;
++
++	ipipe_root_only();
++
++	raw_spin_lock_irqsave(&__ipipe_lock, flags);
++
++	if (ipd->irqs[irq].handler == NULL)
++		goto out;
++
++	ipd->irqs[irq].handler = NULL;
++	ipd->irqs[irq].cookie = NULL;
++	ipd->irqs[irq].ackfn = NULL;
++	ipd->irqs[irq].control = 0;
++out:
++	raw_spin_unlock_irqrestore(&__ipipe_lock, flags);
++}
++EXPORT_SYMBOL_GPL(ipipe_free_irq);
++
++void ipipe_set_hooks(struct ipipe_domain *ipd, int enables)
++{
++	struct ipipe_percpu_domain_data *p;
++	unsigned long flags;
++	int cpu, wait;
++
++	if (ipd == ipipe_root_domain) {
++		IPIPE_WARN(enables & __IPIPE_TRAP_E);
++		enables &= ~__IPIPE_TRAP_E;
++	} else {
++		IPIPE_WARN(enables & __IPIPE_KEVENT_E);
++		enables &= ~__IPIPE_KEVENT_E;
++	}
++
++	flags = ipipe_critical_enter(NULL);
++
++	for_each_online_cpu(cpu) {
++		p = ipipe_percpu_context(ipd, cpu);
++		p->coflags &= ~__IPIPE_ALL_E;
++		p->coflags |= enables;
++	}
++
++	wait = (enables ^ __IPIPE_ALL_E) << __IPIPE_SHIFT_R;
++	if (wait == 0 || !__ipipe_root_p) {
++		ipipe_critical_exit(flags);
++		return;
++	}
++
++	ipipe_this_cpu_context(ipd)->coflags &= ~wait;
++
++	ipipe_critical_exit(flags);
++
++	/*
++	 * In case we cleared some hooks over the root domain, we have
++	 * to wait for any ongoing execution to finish, since our
++	 * caller might subsequently unmap the target domain code.
++	 *
++	 * We synchronize with the relevant __ipipe_notify_*()
++	 * helpers, disabling all hooks before we start waiting for
++	 * completion on all CPUs.
++	 */
++	for_each_online_cpu(cpu) {
++		while (ipipe_percpu_context(ipd, cpu)->coflags & wait)
++			schedule_timeout_interruptible(HZ / 50);
++	}
++}
++EXPORT_SYMBOL_GPL(ipipe_set_hooks);
++
++int __weak ipipe_fastcall_hook(struct pt_regs *regs)
++{
++	return -1;	/* i.e. fall back to slow path. */
++}
++
++int __weak ipipe_syscall_hook(struct ipipe_domain *ipd, struct pt_regs *regs)
++{
++	return 0;
++}
++
++static inline void sync_root_irqs(void)
++{
++	struct ipipe_percpu_domain_data *p;
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++
++	p = ipipe_this_cpu_root_context();
++	if (unlikely(__ipipe_ipending_p(p)))
++		__ipipe_sync_stage();
++
++	hard_local_irq_restore(flags);
++}
++
++int ipipe_handle_syscall(struct thread_info *ti,
++			 unsigned long nr, struct pt_regs *regs)
++{
++	unsigned long local_flags = READ_ONCE(ti->ipipe_flags);
++	unsigned int nr_syscalls = ipipe_root_nr_syscalls(ti);
++	int ret;
++
++	/*
++	 * NOTE: This is a backport from the DOVETAIL syscall
++	 * redirector to the older pipeline implementation.
++	 *
++	 * ==
++	 *
++	 * If the syscall # is out of bounds and the current IRQ stage
++	 * is not the root one, this has to be a non-native system
++	 * call handled by some co-kernel on the head stage. Hand it
++	 * over to the head stage via the fast syscall handler.
++	 *
++	 * Otherwise, if the system call is out of bounds or the
++	 * current thread is shared with a co-kernel, hand the syscall
++	 * over to the latter through the pipeline stages. This
++	 * allows:
++	 *
++	 * - the co-kernel to receive the initial - foreign - syscall
++	 * a thread should send for enabling syscall handling by the
++	 * co-kernel.
++	 *
++	 * - the co-kernel to manipulate the current execution stage
++	 * for handling the request, which includes switching the
++	 * current thread back to the root stage if the syscall is a
++	 * native one, or promoting it to the head stage if handling
++	 * the foreign syscall requires this.
++	 *
++	 * Native syscalls from regular (non-pipeline) threads are
++	 * ignored by this routine, and flow down to the regular
++	 * system call handler.
++	 */
++
++	if (nr >= nr_syscalls && (local_flags & _TIP_HEAD)) {
++		ipipe_fastcall_hook(regs);
++		local_flags = READ_ONCE(ti->ipipe_flags);
++		if (local_flags & _TIP_HEAD) {
++			if (local_flags &  _TIP_MAYDAY)
++				__ipipe_call_mayday(regs);
++			return 1; /* don't pass down, no tail work. */
++		} else {
++			sync_root_irqs();
++			return -1; /* don't pass down, do tail work. */
++		}
++	}
++
++	if ((local_flags & _TIP_NOTIFY) || nr >= nr_syscalls) {
++		ret =__ipipe_notify_syscall(regs);
++		local_flags = READ_ONCE(ti->ipipe_flags);
++		if (local_flags & _TIP_HEAD)
++			return 1; /* don't pass down, no tail work. */
++		if (ret)
++			return -1; /* don't pass down, do tail work. */
++	}
++
++	return 0; /* pass syscall down to the host. */
++}
++
++int __ipipe_notify_syscall(struct pt_regs *regs)
++{
++	struct ipipe_domain *caller_domain, *this_domain, *ipd;
++	struct ipipe_percpu_domain_data *p;
++	unsigned long flags;
++	int ret = 0;
++
++	/*
++	 * We should definitely not pipeline a syscall with IRQs off.
++	 */
++	IPIPE_WARN_ONCE(hard_irqs_disabled());
++
++	flags = hard_local_irq_save();
++	caller_domain = this_domain = __ipipe_current_domain;
++	ipd = ipipe_head_domain;
++next:
++	p = ipipe_this_cpu_context(ipd);
++	if (likely(p->coflags & __IPIPE_SYSCALL_E)) {
++		__ipipe_set_current_context(p);
++		p->coflags |= __IPIPE_SYSCALL_R;
++		hard_local_irq_restore(flags);
++		ret = ipipe_syscall_hook(caller_domain, regs);
++		flags = hard_local_irq_save();
++		p->coflags &= ~__IPIPE_SYSCALL_R;
++		if (__ipipe_current_domain != ipd)
++			/* Account for domain migration. */
++			this_domain = __ipipe_current_domain;
++		else
++			__ipipe_set_current_domain(this_domain);
++	}
++
++	if (this_domain == ipipe_root_domain) {
++		if (ipd != ipipe_root_domain && ret == 0) {
++			ipd = ipipe_root_domain;
++			goto next;
++		}
++		/*
++		 * Careful: we may have migrated from head->root, so p
++		 * would be ipipe_this_cpu_context(head).
++		 */
++		p = ipipe_this_cpu_root_context();
++		if (__ipipe_ipending_p(p))
++			__ipipe_sync_stage();
++	} else if (ipipe_test_thread_flag(TIP_MAYDAY))
++		__ipipe_call_mayday(regs);
++
++	hard_local_irq_restore(flags);
++
++	return ret;
++}
++
++int __weak ipipe_trap_hook(struct ipipe_trap_data *data)
++{
++	return 0;
++}
++
++int __ipipe_notify_trap(int exception, struct pt_regs *regs)
++{
++	struct ipipe_percpu_domain_data *p;
++	struct ipipe_trap_data data;
++	unsigned long flags;
++	int ret = 0;
++
++	flags = hard_local_irq_save();
++
++	/*
++	 * We send a notification about all traps raised over a
++	 * registered head domain only.
++	 */
++	if (__ipipe_root_p)
++		goto out;
++
++	p = ipipe_this_cpu_head_context();
++	if (likely(p->coflags & __IPIPE_TRAP_E)) {
++		p->coflags |= __IPIPE_TRAP_R;
++		hard_local_irq_restore(flags);
++		data.exception = exception;
++		data.regs = regs;
++		ret = ipipe_trap_hook(&data);
++		flags = hard_local_irq_save();
++		p->coflags &= ~__IPIPE_TRAP_R;
++	}
++out:
++	hard_local_irq_restore(flags);
++
++	return ret;
++}
++
++int __ipipe_notify_user_intreturn(void)
++{
++	__ipipe_notify_kevent(IPIPE_KEVT_USERINTRET, current);
++
++	return !ipipe_root_p;
++}
++
++int __weak ipipe_kevent_hook(int kevent, void *data)
++{
++	return 0;
++}
++
++int __ipipe_notify_kevent(int kevent, void *data)
++{
++	struct ipipe_percpu_domain_data *p;
++	unsigned long flags;
++	int ret = 0;
++
++	ipipe_root_only();
++
++	flags = hard_local_irq_save();
++
++	p = ipipe_this_cpu_root_context();
++	if (likely(p->coflags & __IPIPE_KEVENT_E)) {
++		p->coflags |= __IPIPE_KEVENT_R;
++		hard_local_irq_restore(flags);
++		ret = ipipe_kevent_hook(kevent, data);
++		flags = hard_local_irq_save();
++		p->coflags &= ~__IPIPE_KEVENT_R;
++	}
++
++	hard_local_irq_restore(flags);
++
++	return ret;
++}
++
++void __weak ipipe_migration_hook(struct task_struct *p)
++{
++}
++
++static void complete_domain_migration(void) /* hw IRQs off */
++{
++	struct ipipe_percpu_domain_data *p;
++	struct ipipe_percpu_data *pd;
++	struct task_struct *t;
++
++	ipipe_root_only();
++	pd = raw_cpu_ptr(&ipipe_percpu);
++	t = pd->task_hijacked;
++	if (t == NULL)
++		return;
++
++	pd->task_hijacked = NULL;
++	t->state &= ~TASK_HARDENING;
++	if (t->state != TASK_INTERRUPTIBLE)
++		/* Migration aborted (by signal). */
++		return;
++
++	ipipe_set_ti_thread_flag(task_thread_info(t), TIP_HEAD);
++	p = ipipe_this_cpu_head_context();
++	IPIPE_WARN_ONCE(test_bit(IPIPE_STALL_FLAG, &p->status));
++	/*
++	 * hw IRQs are disabled, but the completion hook assumes the
++	 * head domain is logically stalled: fix it up.
++	 */
++	__set_bit(IPIPE_STALL_FLAG, &p->status);
++	ipipe_migration_hook(t);
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++	if (__ipipe_ipending_p(p))
++		__ipipe_sync_pipeline(p->domain);
++}
++
++void __ipipe_complete_domain_migration(void)
++{
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++	complete_domain_migration();
++	hard_local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(__ipipe_complete_domain_migration);
++
++int __ipipe_switch_tail(void)
++{
++	int x;
++
++#ifdef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	hard_local_irq_disable();
++#endif
++	x = __ipipe_root_p;
++	if (x)
++		complete_domain_migration();
++
++#ifndef CONFIG_IPIPE_WANT_PREEMPTIBLE_SWITCH
++	if (x)
++#endif
++		hard_local_irq_enable();
++
++	return !x;
++}
++
++void __ipipe_notify_vm_preemption(void)
++{
++	struct ipipe_vm_notifier *vmf;
++	struct ipipe_percpu_data *p;
++
++	ipipe_check_irqoff();
++	p = __ipipe_raw_cpu_ptr(&ipipe_percpu);
++	vmf = p->vm_notifier;
++	if (unlikely(vmf))
++		vmf->handler(vmf);
++}
++EXPORT_SYMBOL_GPL(__ipipe_notify_vm_preemption);
++
++static void dispatch_irq_head(unsigned int irq) /* hw interrupts off */
++{
++	struct ipipe_percpu_domain_data *p = ipipe_this_cpu_head_context(), *old;
++	struct ipipe_domain *head = p->domain;
++
++	if (unlikely(test_bit(IPIPE_STALL_FLAG, &p->status))) {
++		__ipipe_set_irq_pending(head, irq);
++		return;
++	}
++
++	/* Switch to the head domain if not current. */
++	old = __ipipe_current_context;
++	if (old != p)
++		__ipipe_set_current_context(p);
++
++	p->irqall[irq]++;
++	__set_bit(IPIPE_STALL_FLAG, &p->status);
++	barrier();
++	head->irqs[irq].handler(irq, head->irqs[irq].cookie);
++	__ipipe_run_irqtail(irq);
++	hard_local_irq_disable();
++	p = ipipe_this_cpu_head_context();
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++
++	/* Are we still running in the head domain? */
++	if (likely(__ipipe_current_context == p)) {
++		/* Did we enter this code over the head domain? */
++		if (old->domain == head) {
++			/* Yes, do immediate synchronization. */
++			if (__ipipe_ipending_p(p))
++				__ipipe_sync_stage();
++			return;
++		}
++		__ipipe_set_current_context(ipipe_this_cpu_root_context());
++	}
++
++	/*
++	 * We must be running over the root domain, synchronize
++	 * the pipeline for high priority IRQs (slow path).
++	 */
++	__ipipe_do_sync_pipeline(head);
++}
++
++void __ipipe_dispatch_irq(unsigned int irq, int flags) /* hw interrupts off */
++{
++	struct ipipe_domain *ipd;
++	struct irq_desc *desc;
++	unsigned long control;
++	int chained_irq;
++
++	/*
++	 * Survival kit when reading this code:
++	 *
++	 * - we have two main situations, leading to three cases for
++	 *   handling interrupts:
++	 *
++	 *   a) the root domain is alone, no registered head domain
++	 *      => all interrupts go through the interrupt log
++	 *   b) a head domain is registered
++	 *      => head domain IRQs go through the fast dispatcher
++	 *      => root domain IRQs go through the interrupt log
++	 *
++	 * - when no head domain is registered, ipipe_head_domain ==
++	 *   ipipe_root_domain == &ipipe_root.
++	 *
++	 * - the caller tells us whether we should acknowledge this
++	 *   IRQ. Even virtual IRQs may require acknowledge on some
++	 *   platforms (e.g. arm/SMP).
++	 *
++	 * - the caller tells us whether we may try to run the IRQ log
++	 *   syncer. Typically, demuxed IRQs won't be synced
++	 *   immediately.
++	 *
++	 * - multiplex IRQs most likely have a valid acknowledge
++	 *   handler and we may not be called with IPIPE_IRQF_NOACK
++	 *   for them. The ack handler for the multiplex IRQ actually
++	 *   decodes the demuxed interrupts.
++	 */
++
++#ifdef CONFIG_IPIPE_DEBUG
++	if (irq >= IPIPE_NR_IRQS) {
++		pr_err("I-pipe: spurious interrupt %u\n", irq);
++		return;
++	}
++#endif
++	/*
++	 * CAUTION: on some archs, virtual IRQs may have acknowledge
++	 * handlers. Multiplex IRQs should have one too.
++	 */
++	if (unlikely(irq >= IPIPE_NR_XIRQS)) {
++		desc = NULL;
++		chained_irq = 0;
++	} else {
++		desc = irq_to_desc(irq);
++		chained_irq = desc ? ipipe_chained_irq_p(desc) : 0;
++	}
++	if (flags & IPIPE_IRQF_NOACK)
++		IPIPE_WARN_ONCE(chained_irq);
++	else {
++		ipd = ipipe_head_domain;
++		control = ipd->irqs[irq].control;
++		if ((control & IPIPE_HANDLE_MASK) == 0)
++			ipd = ipipe_root_domain;
++		if (ipd->irqs[irq].ackfn)
++			ipd->irqs[irq].ackfn(desc);
++		if (chained_irq) {
++			if ((flags & IPIPE_IRQF_NOSYNC) == 0)
++				/* Run demuxed IRQ handlers. */
++				goto sync;
++			return;
++		}
++	}
++
++	/*
++	 * Sticky interrupts must be handled early and separately, so
++	 * that we always process them on the current domain.
++	 */
++	ipd = __ipipe_current_domain;
++	control = ipd->irqs[irq].control;
++	if (control & IPIPE_STICKY_MASK)
++		goto log;
++
++	/*
++	 * In case we have no registered head domain
++	 * (i.e. ipipe_head_domain == &ipipe_root), we always go
++	 * through the interrupt log, and leave the dispatching work
++	 * ultimately to __ipipe_sync_pipeline().
++	 */
++	ipd = ipipe_head_domain;
++	control = ipd->irqs[irq].control;
++	if (ipd == ipipe_root_domain)
++		/*
++		 * The root domain must handle all interrupts, so
++		 * testing the HANDLE bit would be pointless.
++		 */
++		goto log;
++
++	if (control & IPIPE_HANDLE_MASK) {
++		if (unlikely(flags & IPIPE_IRQF_NOSYNC))
++			__ipipe_set_irq_pending(ipd, irq);
++		else
++			dispatch_irq_head(irq);
++		return;
++	}
++
++	ipd = ipipe_root_domain;
++log:
++	__ipipe_set_irq_pending(ipd, irq);
++
++	if (flags & IPIPE_IRQF_NOSYNC)
++		return;
++
++	/*
++	 * Optimize if we preempted a registered high priority head
++	 * domain: we don't need to synchronize the pipeline unless
++	 * there is a pending interrupt for it.
++	 */
++	if (!__ipipe_root_p &&
++	    !__ipipe_ipending_p(ipipe_this_cpu_head_context()))
++		return;
++sync:
++	__ipipe_sync_pipeline(ipipe_head_domain);
++}
++
++void ipipe_raise_irq(unsigned int irq)
++{
++	struct ipipe_domain *ipd = ipipe_head_domain;
++	unsigned long flags, control;
++
++	flags = hard_local_irq_save();
++
++	/*
++	 * Fast path: raising a virtual IRQ handled by the head
++	 * domain.
++	 */
++	if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) {
++		control = ipd->irqs[irq].control;
++		if (likely(control & IPIPE_HANDLE_MASK)) {
++			dispatch_irq_head(irq);
++			goto out;
++		}
++	}
++
++	/* Emulate regular device IRQ receipt. */
++	__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
++out:
++	hard_local_irq_restore(flags);
++
++}
++EXPORT_SYMBOL_GPL(ipipe_raise_irq);
++
++#ifdef CONFIG_PREEMPT
++
++void preempt_schedule_irq(void);
++
++void __sched __ipipe_preempt_schedule_irq(void)
++{
++	struct ipipe_percpu_domain_data *p;
++	unsigned long flags;
++
++	if (WARN_ON_ONCE(!hard_irqs_disabled()))
++		hard_local_irq_disable();
++
++	local_irq_save(flags);
++	hard_local_irq_enable();
++	preempt_schedule_irq(); /* Ok, may reschedule now. */
++	hard_local_irq_disable();
++
++	/*
++	 * Flush any pending interrupt that may have been logged after
++	 * preempt_schedule_irq() stalled the root stage before
++	 * returning to us, and now.
++	 */
++	p = ipipe_this_cpu_root_context();
++	if (unlikely(__ipipe_ipending_p(p))) {
++		trace_hardirqs_on();
++		__clear_bit(IPIPE_STALL_FLAG, &p->status);
++		__ipipe_sync_stage();
++	}
++
++	__ipipe_restore_root_nosync(flags);
++}
++
++#else /* !CONFIG_PREEMPT */
++
++#define __ipipe_preempt_schedule_irq()	do { } while (0)
++
++#endif	/* !CONFIG_PREEMPT */
++
++#ifdef CONFIG_TRACE_IRQFLAGS
++#define root_stall_after_handler()	local_irq_disable()
++#else
++#define root_stall_after_handler()	do { } while (0)
++#endif
++
++/*
++ * __ipipe_do_sync_stage() -- Flush the pending IRQs for the current
++ * domain (and processor). This routine flushes the interrupt log (see
++ * "Optimistic interrupt protection" from D. Stodolsky et al. for more
++ * on the deferred interrupt scheme). Every interrupt that occurred
++ * while the pipeline was stalled gets played.
++ *
++ * WARNING: CPU migration may occur over this routine.
++ */
++void __ipipe_do_sync_stage(void)
++{
++	struct ipipe_percpu_domain_data *p;
++	struct ipipe_domain *ipd;
++	int irq;
++
++	p = __ipipe_current_context;
++respin:
++	ipd = p->domain;
++
++	__set_bit(IPIPE_STALL_FLAG, &p->status);
++	smp_wmb();
++
++	if (ipd == ipipe_root_domain)
++		trace_hardirqs_off();
++
++	for (;;) {
++		irq = __ipipe_next_irq(p);
++		if (irq < 0)
++			break;
++		/*
++		 * Make sure the compiler does not reorder wrongly, so
++		 * that all updates to maps are done before the
++		 * handler gets called.
++		 */
++		barrier();
++
++		if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
++			continue;
++
++		if (ipd != ipipe_head_domain)
++			hard_local_irq_enable();
++
++		if (likely(ipd != ipipe_root_domain)) {
++			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
++			__ipipe_run_irqtail(irq);
++			hard_local_irq_disable();
++		} else if (ipipe_virtual_irq_p(irq)) {
++			irq_enter();
++			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
++			irq_exit();
++			root_stall_after_handler();
++			hard_local_irq_disable();
++		} else {
++			ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);
++			root_stall_after_handler();
++			hard_local_irq_disable();
++		}
++
++		/*
++		 * We may have migrated to a different CPU (1) upon
++		 * return from the handler, or downgraded from the
++		 * head domain to the root one (2), the opposite way
++		 * is NOT allowed though.
++		 *
++		 * (1) reload the current per-cpu context pointer, so
++		 * that we further pull pending interrupts from the
++		 * proper per-cpu log.
++		 *
++		 * (2) check the stall bit to know whether we may
++		 * dispatch any interrupt pending for the root domain,
++		 * and respin the entire dispatch loop if
++		 * so. Otherwise, immediately return to the caller,
++		 * _without_ affecting the stall state for the root
++		 * domain, since we do not own it at this stage.  This
++		 * case is basically reflecting what may happen in
++		 * dispatch_irq_head() for the fast path.
++		 */
++		p = __ipipe_current_context;
++		if (p->domain != ipd) {
++			IPIPE_BUG_ON(ipd == ipipe_root_domain);
++			if (test_bit(IPIPE_STALL_FLAG, &p->status))
++				return;
++			goto respin;
++		}
++	}
++
++	if (ipd == ipipe_root_domain)
++		trace_hardirqs_on();
++
++	__clear_bit(IPIPE_STALL_FLAG, &p->status);
++}
++
++void __ipipe_call_mayday(struct pt_regs *regs)
++{
++	unsigned long flags;
++
++	ipipe_clear_thread_flag(TIP_MAYDAY);
++	flags = hard_local_irq_save();
++	__ipipe_notify_trap(IPIPE_TRAP_MAYDAY, regs);
++	hard_local_irq_restore(flags);
++}
++
++#ifdef CONFIG_SMP
++
++/* Always called with hw interrupts off. */
++void __ipipe_do_critical_sync(unsigned int irq, void *cookie)
++{
++	int cpu = ipipe_processor_id();
++
++	cpumask_set_cpu(cpu, &__ipipe_cpu_sync_map);
++
++	/*
++	 * Now we are in sync with the lock requestor running on
++	 * another CPU. Enter a spinning wait until he releases the
++	 * global lock.
++	 */
++	raw_spin_lock(&__ipipe_cpu_barrier);
++
++	/* Got it. Now get out. */
++
++	/* Call the sync routine if any. */
++	if (__ipipe_cpu_sync)
++		__ipipe_cpu_sync();
++
++	cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
++
++	raw_spin_unlock(&__ipipe_cpu_barrier);
++
++	cpumask_clear_cpu(cpu, &__ipipe_cpu_sync_map);
++}
++#endif	/* CONFIG_SMP */
++
++unsigned long ipipe_critical_enter(void (*syncfn)(void))
++{
++	static cpumask_t allbutself __maybe_unused, online __maybe_unused;
++	int cpu __maybe_unused, n __maybe_unused;
++	unsigned long flags, loops __maybe_unused;
++
++	flags = hard_local_irq_save();
++
++	if (num_online_cpus() == 1)
++		return flags;
++
++#ifdef CONFIG_SMP
++
++	cpu = ipipe_processor_id();
++	if (!cpumask_test_and_set_cpu(cpu, &__ipipe_cpu_lock_map)) {
++		while (test_and_set_bit(0, &__ipipe_critical_lock)) {
++			n = 0;
++			hard_local_irq_enable();
++
++			do
++				cpu_relax();
++			while (++n < cpu);
++
++			hard_local_irq_disable();
++		}
++restart:
++		online = *cpu_online_mask;
++		raw_spin_lock(&__ipipe_cpu_barrier);
++
++		__ipipe_cpu_sync = syncfn;
++
++		cpumask_clear(&__ipipe_cpu_pass_map);
++		cpumask_set_cpu(cpu, &__ipipe_cpu_pass_map);
++
++		/*
++		 * Send the sync IPI to all processors but the current
++		 * one.
++		 */
++		cpumask_andnot(&allbutself, &online, &__ipipe_cpu_pass_map);
++		ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself);
++		loops = IPIPE_CRITICAL_TIMEOUT;
++
++		while (!cpumask_equal(&__ipipe_cpu_sync_map, &allbutself)) {
++			if (--loops > 0) {
++				cpu_relax();
++				continue;
++			}
++			/*
++			 * We ran into a deadlock due to a contended
++			 * rwlock. Cancel this round and retry.
++			 */
++			__ipipe_cpu_sync = NULL;
++
++			raw_spin_unlock(&__ipipe_cpu_barrier);
++			/*
++			 * Ensure all CPUs consumed the IPI to avoid
++			 * running __ipipe_cpu_sync prematurely. This
++			 * usually resolves the deadlock reason too.
++			 */
++			while (!cpumask_equal(&online, &__ipipe_cpu_pass_map))
++				cpu_relax();
++
++			goto restart;
++		}
++	}
++
++	atomic_inc(&__ipipe_critical_count);
++
++#endif	/* CONFIG_SMP */
++
++	return flags;
++}
++EXPORT_SYMBOL_GPL(ipipe_critical_enter);
++
++void ipipe_critical_exit(unsigned long flags)
++{
++	if (num_online_cpus() == 1) {
++		hard_local_irq_restore(flags);
++		return;
++	}
++
++#ifdef CONFIG_SMP
++	if (atomic_dec_and_test(&__ipipe_critical_count)) {
++		raw_spin_unlock(&__ipipe_cpu_barrier);
++		while (!cpumask_empty(&__ipipe_cpu_sync_map))
++			cpu_relax();
++		cpumask_clear_cpu(ipipe_processor_id(), &__ipipe_cpu_lock_map);
++		clear_bit(0, &__ipipe_critical_lock);
++		smp_mb__after_atomic();
++	}
++#endif /* CONFIG_SMP */
++
++	hard_local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(ipipe_critical_exit);
++
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++
++void ipipe_root_only(void)
++{
++	struct ipipe_domain *this_domain;
++	unsigned long flags;
++
++	flags = hard_smp_local_irq_save();
++
++	this_domain = __ipipe_current_domain;
++	if (likely(this_domain == ipipe_root_domain &&
++		   !test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status))) {
++		hard_smp_local_irq_restore(flags);
++		return;
++	}
++
++	if (!__this_cpu_read(ipipe_percpu.context_check)) {
++		hard_smp_local_irq_restore(flags);
++		return;
++	}
++
++	hard_smp_local_irq_restore(flags);
++
++	ipipe_prepare_panic();
++	ipipe_trace_panic_freeze();
++
++	if (this_domain != ipipe_root_domain)
++		pr_err("I-pipe: Detected illicit call from head domain '%s'\n"
++		       "        into a regular Linux service\n",
++		       this_domain->name);
++	else
++		pr_err("I-pipe: Detected stalled head domain, "
++			"probably caused by a bug.\n"
++			"        A critical section may have been "
++			"left unterminated.\n");
++	dump_stack();
++	ipipe_trace_panic_dump();
++}
++EXPORT_SYMBOL(ipipe_root_only);
++
++#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
++
++#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP)
++
++unsigned long notrace __ipipe_cpu_get_offset(void)
++{
++	struct ipipe_domain *this_domain;
++	unsigned long flags;
++	bool bad = false;
++
++	flags = hard_local_irq_save_notrace();
++	if (raw_irqs_disabled_flags(flags))
++		goto out;
++
++	/*
++	 * Only the root domain may implement preemptive CPU migration
++	 * of tasks, so anything above in the pipeline should be fine.
++	 * CAUTION: we want open coded access to the current domain,
++	 * don't use __ipipe_current_domain here, this would recurse
++	 * indefinitely.
++	 */
++	this_domain = raw_cpu_read(ipipe_percpu.curr)->domain;
++	if (this_domain != ipipe_root_domain)
++		goto out;
++
++	/*
++	 * Since we run on the root stage with hard irqs enabled, we
++	 * need preemption to be disabled.  Otherwise, our caller may
++	 * end up accessing the wrong per-cpu variable instance due to
++	 * CPU migration, complain loudly.
++	 */
++	if (preempt_count() == 0 && !irqs_disabled())
++		bad = true;
++out:
++	hard_local_irq_restore_notrace(flags);
++
++	WARN_ON_ONCE(bad);
++
++	return __my_cpu_offset;
++}
++EXPORT_SYMBOL(__ipipe_cpu_get_offset);
++
++void __ipipe_spin_unlock_debug(unsigned long flags)
++{
++	/*
++	 * We catch a nasty issue where spin_unlock_irqrestore() on a
++	 * regular kernel spinlock is about to re-enable hw interrupts
++	 * in a section entered with hw irqs off. This is clearly the
++	 * sign of a massive breakage coming. Usual suspect is a
++	 * regular spinlock which was overlooked, used within a
++	 * section which must run with hw irqs disabled.
++	 */
++	IPIPE_WARN_ONCE(!raw_irqs_disabled_flags(flags) && hard_irqs_disabled());
++}
++EXPORT_SYMBOL(__ipipe_spin_unlock_debug);
++
++#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */
++
++void ipipe_prepare_panic(void)
++{
++#ifdef CONFIG_PRINTK
++	__ipipe_printk_bypass = 1;
++#endif
++	ipipe_context_check_off();
++}
++EXPORT_SYMBOL_GPL(ipipe_prepare_panic);
++
++static void __ipipe_do_work(unsigned int virq, void *cookie)
++{
++	struct ipipe_work_header *work;
++	unsigned long flags;
++	void *curr, *tail;
++	int cpu;
++
++	/*
++	 * Work is dispatched in enqueuing order. This interrupt
++	 * context can't migrate to another CPU.
++	 */
++	cpu = smp_processor_id();
++	curr = per_cpu(work_buf, cpu);
++
++	for (;;) {
++		flags = hard_local_irq_save();
++		tail = per_cpu(work_tail, cpu);
++		if (curr == tail) {
++			per_cpu(work_tail, cpu) = per_cpu(work_buf, cpu);
++			hard_local_irq_restore(flags);
++			return;
++		}
++		work = curr;
++		curr += work->size;
++		hard_local_irq_restore(flags);
++		work->handler(work);
++	}
++}
++
++void __ipipe_post_work_root(struct ipipe_work_header *work)
++{
++	unsigned long flags;
++	void *tail;
++	int cpu;
++
++	/*
++	 * Subtle: we want to use the head stall/unstall operators,
++	 * not the hard_* routines to protect against races. This way,
++	 * we ensure that a root-based caller will trigger the virq
++	 * handling immediately when unstalling the head stage, as a
++	 * result of calling __ipipe_sync_pipeline() under the hood.
++	 */
++	flags = ipipe_test_and_stall_head();
++	cpu = ipipe_processor_id();
++	tail = per_cpu(work_tail, cpu);
++
++	if (WARN_ON_ONCE((unsigned char *)tail + work->size >=
++			 per_cpu(work_buf, cpu) + WORKBUF_SIZE))
++		goto out;
++
++	/* Work handling is deferred, so data has to be copied. */
++	memcpy(tail, work, work->size);
++	per_cpu(work_tail, cpu) = tail + work->size;
++	ipipe_post_irq_root(__ipipe_work_virq);
++out:
++	ipipe_restore_head(flags);
++}
++EXPORT_SYMBOL_GPL(__ipipe_post_work_root);
++
++void __weak __ipipe_arch_share_current(int flags)
++{
++}
++
++void __ipipe_share_current(int flags)
++{
++	ipipe_root_only();
++
++	__ipipe_arch_share_current(flags);
++}
++EXPORT_SYMBOL_GPL(__ipipe_share_current);
++
++bool __weak ipipe_cpuidle_control(struct cpuidle_device *dev,
++				  struct cpuidle_state *state)
++{
++	/*
++	 * By default, always deny entering sleep state if this
++	 * entails stopping the timer (i.e. C3STOP misfeature),
++	 * Xenomai could not deal with this case.
++	 */
++	if (state && (state->flags & CPUIDLE_FLAG_TIMER_STOP))
++		return false;
++
++	/* Otherwise, allow switching to idle state. */
++	return true;
++}
++
++bool ipipe_enter_cpuidle(struct cpuidle_device *dev,
++			 struct cpuidle_state *state)
++{
++	struct ipipe_percpu_domain_data *p;
++
++	WARN_ON_ONCE(!irqs_disabled());
++
++	hard_local_irq_disable();
++	p = ipipe_this_cpu_root_context();
++
++	/*
++	 * Pending IRQ(s) waiting for delivery to the root stage, or
++	 * the arbitrary decision of a co-kernel may deny the
++	 * transition to a deeper C-state. Note that we return from
++	 * this call with hard irqs off, so that we won't allow any
++	 * interrupt to sneak into the IRQ log until we reach the
++	 * processor idling code, or leave the CPU idle framework
++	 * without sleeping.
++	 */
++	return !__ipipe_ipending_p(p) && ipipe_cpuidle_control(dev, state);
++}
++
++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || defined(CONFIG_PROVE_LOCKING) || \
++	defined(CONFIG_PREEMPT_VOLUNTARY) || defined(CONFIG_IPIPE_DEBUG_CONTEXT)
++void __ipipe_uaccess_might_fault(void)
++{
++	struct ipipe_percpu_domain_data *pdd;
++	struct ipipe_domain *ipd;
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++	ipd = __ipipe_current_domain;
++	if (ipd == ipipe_root_domain) {
++		hard_local_irq_restore(flags);
++		might_fault();
++		return;
++	}
++
++#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
++	pdd = ipipe_this_cpu_context(ipd);
++	WARN_ON_ONCE(hard_irqs_disabled_flags(flags)
++		     || test_bit(IPIPE_STALL_FLAG, &pdd->status));
++#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
++	(void)pdd;
++#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
++	hard_local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(__ipipe_uaccess_might_fault);
++#endif
+diff -uprN kernel/kernel/ipipe/Kconfig kernel_new/kernel/ipipe/Kconfig
+--- kernel/kernel/ipipe/Kconfig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/Kconfig	2021-04-01 18:28:07.805863120 +0800
+@@ -0,0 +1,47 @@
++
++config HAVE_IPIPE_SUPPORT
++       depends on GENERIC_CLOCKEVENTS
++       bool
++
++config IPIPE
++	bool "Interrupt pipeline"
++	depends on HAVE_IPIPE_SUPPORT
++	default n
++	---help---
++	  Activate this option if you want the interrupt pipeline to be
++	  compiled in.
++
++config IPIPE_CORE
++	def_bool y if IPIPE
++
++config IPIPE_WANT_PTE_PINNING
++       bool
++
++config IPIPE_CORE_APIREV
++       int
++       depends on IPIPE
++       default 2
++	---help---
++	  The API revision level we implement.
++
++config IPIPE_WANT_APIREV_2
++       bool
++
++config IPIPE_TARGET_APIREV
++       int
++       depends on IPIPE
++       default IPIPE_CORE_APIREV
++	---help---
++	  The API revision level the we want (must be <=
++	  IPIPE_CORE_APIREV).
++
++config IPIPE_HAVE_HOSTRT
++       bool
++
++config IPIPE_HAVE_EAGER_FPU
++	bool
++
++if IPIPE && ARM && RAW_PRINTK && !DEBUG_LL
++comment "CAUTION: DEBUG_LL must be selected, and properly configured for"
++comment "RAW_PRINTK to work. Otherwise, you will get no output on raw_printk()"
++endif
+diff -uprN kernel/kernel/ipipe/Kconfig.debug kernel_new/kernel/ipipe/Kconfig.debug
+--- kernel/kernel/ipipe/Kconfig.debug	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/Kconfig.debug	2021-04-01 18:28:07.805863120 +0800
+@@ -0,0 +1,100 @@
++config IPIPE_DEBUG
++	bool "I-pipe debugging"
++	depends on IPIPE
++	select RAW_PRINTK
++
++config IPIPE_DEBUG_CONTEXT
++	bool "Check for illicit cross-domain calls"
++	depends on IPIPE_DEBUG
++	default y
++	---help---
++	  Enable this feature to arm checkpoints in the kernel that
++	  verify the correct invocation context. On entry of critical
++	  Linux services a warning is issued if the caller is not
++	  running over the root domain.
++
++config IPIPE_DEBUG_INTERNAL
++	bool "Enable internal debug checks"
++	depends on IPIPE_DEBUG
++	default y
++	---help---
++	  When this feature is enabled, I-pipe will perform internal
++	  consistency checks of its subsystems, e.g. on per-cpu variable
++	  access.
++
++config HAVE_IPIPE_TRACER_SUPPORT
++       bool
++
++config IPIPE_TRACE
++	bool "Latency tracing"
++	depends on HAVE_IPIPE_TRACER_SUPPORT
++	depends on IPIPE_DEBUG
++	select CONFIG_FTRACE
++	select CONFIG_FUNCTION_TRACER
++	select KALLSYMS
++	select PROC_FS
++	---help---
++	  Activate this option if you want to use per-function tracing of
++	  the kernel. The tracer will collect data via instrumentation
++	  features like the one below or with the help of explicite calls
++	  of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
++	  in-kernel tracing API. The collected data and runtime control
++	  is available via /proc/ipipe/trace/*.
++
++if IPIPE_TRACE
++
++config IPIPE_TRACE_ENABLE
++	bool "Enable tracing on boot"
++	default y
++	---help---
++	  Disable this option if you want to arm the tracer after booting
++	  manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
++	  boot time on slow embedded devices due to the tracer overhead.
++
++config IPIPE_TRACE_MCOUNT
++	bool "Instrument function entries"
++	default y
++	select FTRACE
++	select FUNCTION_TRACER
++	---help---
++	  When enabled, records every kernel function entry in the tracer
++	  log. While this slows down the system noticeably, it provides
++	  the highest level of information about the flow of events.
++	  However, it can be switch off in order to record only explicit
++	  I-pipe trace points.
++
++config IPIPE_TRACE_IRQSOFF
++	bool "Trace IRQs-off times"
++	default y
++	---help---
++	  Activate this option if I-pipe shall trace the longest path
++	  with hard-IRQs switched off.
++
++config IPIPE_TRACE_SHIFT
++	int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
++	range 10 18
++	default 14
++	---help---
++	  The number of trace points to hold tracing data for each
++	  trace path, as a power of 2.
++
++config IPIPE_TRACE_VMALLOC
++	bool "Use vmalloc'ed trace buffer"
++	default y if EMBEDDED
++	---help---
++	  Instead of reserving static kernel data, the required buffer
++	  is allocated via vmalloc during boot-up when this option is
++	  enabled. This can help to start systems that are low on memory,
++	  but it slightly degrades overall performance. Try this option
++	  when a traced kernel hangs unexpectedly at boot time.
++
++config IPIPE_TRACE_PANIC
++	bool "Enable panic back traces"
++	default y
++	---help---
++	  Provides services to freeze and dump a back trace on panic
++	  situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
++	  as well as ordinary kernel oopses. You can control the number
++	  of printed back trace points via /proc/ipipe/trace.
++
++endif
+diff -uprN kernel/kernel/ipipe/Makefile kernel_new/kernel/ipipe/Makefile
+--- kernel/kernel/ipipe/Makefile	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/Makefile	2021-04-01 18:28:07.805863120 +0800
+@@ -0,0 +1,2 @@
++obj-$(CONFIG_IPIPE)	+= core.o timer.o
++obj-$(CONFIG_IPIPE_TRACE) += tracer.o
+diff -uprN kernel/kernel/ipipe/timer.c kernel_new/kernel/ipipe/timer.c
+--- kernel/kernel/ipipe/timer.c	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/timer.c	2021-04-01 18:28:07.806863119 +0800
+@@ -0,0 +1,656 @@
++/* -*- linux-c -*-
++ * linux/kernel/ipipe/timer.c
++ *
++ * Copyright (C) 2012 Gilles Chanteperdrix
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * I-pipe timer request interface.
++ */
++#include <linux/ipipe.h>
++#include <linux/percpu.h>
++#include <linux/irqdesc.h>
++#include <linux/cpumask.h>
++#include <linux/spinlock.h>
++#include <linux/ipipe_tickdev.h>
++#include <linux/interrupt.h>
++#include <linux/export.h>
++
++unsigned long __ipipe_hrtimer_freq;
++
++static LIST_HEAD(timers);
++static IPIPE_DEFINE_SPINLOCK(lock);
++
++static DEFINE_PER_CPU(struct ipipe_timer *, percpu_timer);
++
++/*
++ * Default request method: switch to oneshot mode if supported.
++ */
++static void ipipe_timer_default_request(struct ipipe_timer *timer, int steal)
++{
++	struct clock_event_device *evtdev = timer->host_timer;
++
++	if (!(evtdev->features & CLOCK_EVT_FEAT_ONESHOT))
++		return;
++
++	if (clockevent_state_oneshot(evtdev) ||
++		clockevent_state_oneshot_stopped(evtdev))
++		timer->orig_mode = CLOCK_EVT_MODE_ONESHOT;
++	else {
++		if (clockevent_state_periodic(evtdev))
++			timer->orig_mode = CLOCK_EVT_MODE_PERIODIC;
++		else if (clockevent_state_shutdown(evtdev))
++			timer->orig_mode = CLOCK_EVT_MODE_SHUTDOWN;
++		else
++			timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
++		evtdev->set_state_oneshot(evtdev);
++		evtdev->set_next_event(timer->freq / HZ, evtdev);
++	}
++}
++
++/*
++ * Default release method: return the timer to the mode it had when
++ * starting.
++ */
++static void ipipe_timer_default_release(struct ipipe_timer *timer)
++{
++	struct clock_event_device *evtdev = timer->host_timer;
++
++	switch (timer->orig_mode) {
++	case CLOCK_EVT_MODE_SHUTDOWN:
++		evtdev->set_state_shutdown(evtdev);
++		break;
++	case CLOCK_EVT_MODE_PERIODIC:
++		evtdev->set_state_periodic(evtdev);
++	case CLOCK_EVT_MODE_ONESHOT:
++		evtdev->set_next_event(timer->freq / HZ, evtdev);
++		break;
++	}
++}
++
++static int get_dev_mode(struct clock_event_device *evtdev)
++{
++	if (clockevent_state_oneshot(evtdev) ||
++		clockevent_state_oneshot_stopped(evtdev))
++		return CLOCK_EVT_MODE_ONESHOT;
++
++	if (clockevent_state_periodic(evtdev))
++		return CLOCK_EVT_MODE_PERIODIC;
++
++	if (clockevent_state_shutdown(evtdev))
++		return CLOCK_EVT_MODE_SHUTDOWN;
++
++	return CLOCK_EVT_MODE_UNUSED;
++}
++
++void ipipe_host_timer_register(struct clock_event_device *evtdev)
++{
++	struct ipipe_timer *timer = evtdev->ipipe_timer;
++
++	if (timer == NULL)
++		return;
++
++	timer->orig_mode = CLOCK_EVT_MODE_UNUSED;
++
++	if (timer->request == NULL)
++		timer->request = ipipe_timer_default_request;
++
++	/*
++	 * By default, use the same method as linux timer, on ARM at
++	 * least, most set_next_event methods are safe to be called
++	 * from Xenomai domain anyway.
++	 */
++	if (timer->set == NULL) {
++		timer->timer_set = evtdev;
++		timer->set = (typeof(timer->set))evtdev->set_next_event;
++	}
++
++	if (timer->release == NULL)
++		timer->release = ipipe_timer_default_release;
++
++	if (timer->name == NULL)
++		timer->name = evtdev->name;
++
++	if (timer->rating == 0)
++		timer->rating = evtdev->rating;
++
++	timer->freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
++
++	if (timer->min_delay_ticks == 0)
++		timer->min_delay_ticks =
++			(evtdev->min_delta_ns * evtdev->mult) >> evtdev->shift;
++
++	if (timer->max_delay_ticks == 0)
++		timer->max_delay_ticks =
++			(evtdev->max_delta_ns * evtdev->mult) >> evtdev->shift;
++
++	if (timer->cpumask == NULL)
++		timer->cpumask = evtdev->cpumask;
++
++	timer->host_timer = evtdev;
++
++	ipipe_timer_register(timer);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++void ipipe_host_timer_cleanup(struct clock_event_device *evtdev)
++{
++	struct ipipe_timer *timer = evtdev->ipipe_timer;
++	unsigned long flags;
++
++	if (timer == NULL)
++		return;
++
++	raw_spin_lock_irqsave(&lock, flags);
++	list_del(&timer->link);
++	raw_spin_unlock_irqrestore(&lock, flags);
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
++/*
++ * register a timer: maintain them in a list sorted by rating
++ */
++void ipipe_timer_register(struct ipipe_timer *timer)
++{
++	struct ipipe_timer *t;
++	unsigned long flags;
++
++	if (timer->timer_set == NULL)
++		timer->timer_set = timer;
++
++	if (timer->cpumask == NULL)
++		timer->cpumask = cpumask_of(smp_processor_id());
++
++	raw_spin_lock_irqsave(&lock, flags);
++
++	list_for_each_entry(t, &timers, link) {
++		if (t->rating <= timer->rating) {
++			__list_add(&timer->link, t->link.prev, &t->link);
++			goto done;
++		}
++	}
++	list_add_tail(&timer->link, &timers);
++  done:
++	raw_spin_unlock_irqrestore(&lock, flags);
++}
++
++static void ipipe_timer_request_sync(void)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++	struct clock_event_device *evtdev;
++	int steal;
++
++	if (!timer)
++		return;
++
++	evtdev = timer->host_timer;
++	steal = evtdev != NULL && !clockevent_state_detached(evtdev);
++	timer->request(timer, steal);
++}
++
++static void config_pcpu_timer(struct ipipe_timer *t, unsigned hrclock_freq)
++{
++	unsigned long long tmp;
++	unsigned hrtimer_freq;
++
++	if (__ipipe_hrtimer_freq != t->freq)
++		__ipipe_hrtimer_freq = t->freq;
++
++	hrtimer_freq = t->freq;
++	if (__ipipe_hrclock_freq > UINT_MAX)
++		hrtimer_freq /= 1000;
++
++	t->c2t_integ = hrtimer_freq / hrclock_freq;
++	tmp = (((unsigned long long)
++		(hrtimer_freq % hrclock_freq)) << 32)
++		+ hrclock_freq - 1;
++	do_div(tmp, hrclock_freq);
++	t->c2t_frac = tmp;
++}
++
++/* Set up a timer as per-cpu timer for ipipe */
++static void install_pcpu_timer(unsigned cpu, unsigned hrclock_freq,
++			      struct ipipe_timer *t)
++{
++	per_cpu(ipipe_percpu.hrtimer_irq, cpu) = t->irq;
++	per_cpu(percpu_timer, cpu) = t;
++	config_pcpu_timer(t, hrclock_freq);
++}
++
++static void select_root_only_timer(unsigned cpu, unsigned hrclock_khz,
++				   const struct cpumask *mask,
++				   struct ipipe_timer *t) {
++	unsigned icpu;
++	struct clock_event_device *evtdev;
++
++	/*
++	 * If no ipipe-supported CPU shares an interrupt with the
++	 * timer, we do not need to care about it.
++	 */
++	for_each_cpu(icpu, mask) {
++		if (t->irq == per_cpu(ipipe_percpu.hrtimer_irq, icpu)) {
++			evtdev = t->host_timer;
++			if (evtdev && clockevent_state_shutdown(evtdev))
++				continue;
++			goto found;
++		}
++	}
++
++	return;
++
++found:
++	install_pcpu_timer(cpu, hrclock_khz, t);
++}
++
++/*
++ * Choose per-cpu timers with the highest rating by traversing the
++ * rating-sorted list for each CPU.
++ */
++int ipipe_select_timers(const struct cpumask *mask)
++{
++	unsigned hrclock_freq;
++	unsigned long long tmp;
++	struct ipipe_timer *t;
++	struct clock_event_device *evtdev;
++	unsigned long flags;
++	unsigned cpu;
++	cpumask_var_t fixup;
++
++	if (!__ipipe_hrclock_ok()) {
++		printk("I-pipe: high-resolution clock not working\n");
++		return -ENODEV;
++	}
++
++	if (__ipipe_hrclock_freq > UINT_MAX) {
++		tmp = __ipipe_hrclock_freq;
++		do_div(tmp, 1000);
++		hrclock_freq = tmp;
++	} else
++		hrclock_freq = __ipipe_hrclock_freq;
++
++
++	if (!zalloc_cpumask_var(&fixup, GFP_KERNEL)) {
++		WARN_ON(1);
++		return -ENODEV;
++	}
++
++	raw_spin_lock_irqsave(&lock, flags);
++
++	/* First, choose timers for the CPUs handled by ipipe */
++	for_each_cpu(cpu, mask) {
++		list_for_each_entry(t, &timers, link) {
++			if (!cpumask_test_cpu(cpu, t->cpumask))
++				continue;
++
++			evtdev = t->host_timer;
++			if (evtdev && clockevent_state_shutdown(evtdev))
++				continue;
++			goto found;
++		}
++
++		printk("I-pipe: could not find timer for cpu #%d\n",
++		       cpu);
++		goto err_remove_all;
++found:
++		install_pcpu_timer(cpu, hrclock_freq, t);
++	}
++
++	/*
++	 * Second, check if we need to fix up any CPUs not supported
++	 * by ipipe (but by Linux) whose interrupt may need to be
++	 * forwarded because they have the same IRQ as an ipipe-enabled
++	 * timer.
++	 */
++	cpumask_andnot(fixup, cpu_online_mask, mask);
++
++	for_each_cpu(cpu, fixup) {
++		list_for_each_entry(t, &timers, link) {
++			if (!cpumask_test_cpu(cpu, t->cpumask))
++				continue;
++
++			select_root_only_timer(cpu, hrclock_freq, mask, t);
++		}
++	}
++
++	raw_spin_unlock_irqrestore(&lock, flags);
++
++	free_cpumask_var(fixup);
++	flags = ipipe_critical_enter(ipipe_timer_request_sync);
++	ipipe_timer_request_sync();
++	ipipe_critical_exit(flags);
++
++	return 0;
++
++err_remove_all:
++	raw_spin_unlock_irqrestore(&lock, flags);
++	free_cpumask_var(fixup);
++
++	for_each_cpu(cpu, mask) {
++		per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
++		per_cpu(percpu_timer, cpu) = NULL;
++	}
++	__ipipe_hrtimer_freq = 0;
++
++	return -ENODEV;
++}
++
++static void ipipe_timer_release_sync(void)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	if (timer)
++		timer->release(timer);
++}
++
++void ipipe_timers_release(void)
++{
++	unsigned long flags;
++	unsigned cpu;
++
++	flags = ipipe_critical_enter(ipipe_timer_release_sync);
++	ipipe_timer_release_sync();
++	ipipe_critical_exit(flags);
++
++	for_each_online_cpu(cpu) {
++		per_cpu(ipipe_percpu.hrtimer_irq, cpu) = -1;
++		per_cpu(percpu_timer, cpu) = NULL;
++		__ipipe_hrtimer_freq = 0;
++	}
++}
++
++static void __ipipe_ack_hrtimer_irq(struct irq_desc *desc)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	/*
++	 * Pseudo-IRQs like pipelined IPIs have no descriptor, we have
++	 * to check for this.
++	 */
++	if (desc)
++		desc->ipipe_ack(desc);
++
++	if (timer->ack)
++		timer->ack();
++
++	if (desc)
++		desc->ipipe_end(desc);
++}
++
++static int do_set_oneshot(struct clock_event_device *cdev)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	timer->orig_set_state_oneshot(cdev);
++	timer->mode_handler(CLOCK_EVT_MODE_ONESHOT, cdev);
++
++	return 0;
++}
++
++static int do_set_oneshot_stopped(struct clock_event_device *cdev)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev);
++
++	return 0;
++}
++
++static int do_set_periodic(struct clock_event_device *cdev)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	timer->mode_handler(CLOCK_EVT_MODE_PERIODIC, cdev);
++
++	return 0;
++}
++
++static int do_set_shutdown(struct clock_event_device *cdev)
++{
++	struct ipipe_timer *timer = __ipipe_raw_cpu_read(percpu_timer);
++
++	timer->mode_handler(CLOCK_EVT_MODE_SHUTDOWN, cdev);
++
++	return 0;
++}
++
++int clockevents_program_event(struct clock_event_device *dev,
++			      ktime_t expires, bool force);
++
++struct grab_timer_data {
++	void (*tick_handler)(void);
++	void (*emumode)(enum clock_event_mode mode,
++			struct clock_event_device *cdev);
++	int (*emutick)(unsigned long evt,
++		       struct clock_event_device *cdev);
++	int retval;
++};
++
++static void grab_timer(void *arg)
++{
++	struct grab_timer_data *data = arg;
++	struct clock_event_device *evtdev;
++	struct ipipe_timer *timer;
++	struct irq_desc *desc;
++	unsigned long flags;
++	int steal, ret;
++
++	flags = hard_local_irq_save();
++
++	timer = this_cpu_read(percpu_timer);
++	evtdev = timer->host_timer;
++	ret = ipipe_request_irq(ipipe_head_domain, timer->irq,
++				(ipipe_irq_handler_t)data->tick_handler,
++				NULL, __ipipe_ack_hrtimer_irq);
++	if (ret < 0 && ret != -EBUSY) {
++		hard_local_irq_restore(flags);
++		data->retval = ret;
++		return;
++	}
++
++	steal = !clockevent_state_detached(evtdev);
++	if (steal && evtdev->ipipe_stolen == 0) {
++		timer->real_mult = evtdev->mult;
++		timer->real_shift = evtdev->shift;
++		timer->orig_set_state_periodic = evtdev->set_state_periodic;
++		timer->orig_set_state_oneshot = evtdev->set_state_oneshot;
++		timer->orig_set_state_oneshot_stopped = evtdev->set_state_oneshot_stopped;
++		timer->orig_set_state_shutdown = evtdev->set_state_shutdown;
++		timer->orig_set_next_event = evtdev->set_next_event;
++		timer->mode_handler = data->emumode;
++		evtdev->mult = 1;
++		evtdev->shift = 0;
++		evtdev->max_delta_ns = UINT_MAX;
++		if (timer->orig_set_state_periodic)
++			evtdev->set_state_periodic = do_set_periodic;
++		if (timer->orig_set_state_oneshot)
++			evtdev->set_state_oneshot = do_set_oneshot;
++		if (timer->orig_set_state_oneshot_stopped)
++			evtdev->set_state_oneshot_stopped = do_set_oneshot_stopped;
++		if (timer->orig_set_state_shutdown)
++			evtdev->set_state_shutdown = do_set_shutdown;
++		evtdev->set_next_event = data->emutick;
++		evtdev->ipipe_stolen = 1;
++	}
++
++	hard_local_irq_restore(flags);
++
++	data->retval = get_dev_mode(evtdev);
++
++	desc = irq_to_desc(timer->irq);
++	if (desc && irqd_irq_disabled(&desc->irq_data))
++		ipipe_enable_irq(timer->irq);
++
++	if (evtdev->ipipe_stolen && clockevent_state_oneshot(evtdev)) {
++		ret = clockevents_program_event(evtdev,
++						evtdev->next_event, true);
++		if (ret)
++			data->retval = ret;
++	}
++}
++
++int ipipe_timer_start(void (*tick_handler)(void),
++		      void (*emumode)(enum clock_event_mode mode,
++				      struct clock_event_device *cdev),
++		      int (*emutick)(unsigned long evt,
++				     struct clock_event_device *cdev),
++		      unsigned int cpu)
++{
++	struct grab_timer_data data;
++	int ret;
++
++	data.tick_handler = tick_handler;
++	data.emutick = emutick;
++	data.emumode = emumode;
++	data.retval = -EINVAL;
++	ret = smp_call_function_single(cpu, grab_timer, &data, true);
++
++	return ret ?: data.retval;
++}
++
++static void release_timer(void *arg)
++{
++	struct clock_event_device *evtdev;
++	struct ipipe_timer *timer;
++	struct irq_desc *desc;
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
++
++	timer = this_cpu_read(percpu_timer);
++
++	desc = irq_to_desc(timer->irq);
++	if (desc && irqd_irq_disabled(&desc->irq_data))
++		ipipe_disable_irq(timer->irq);
++
++	ipipe_free_irq(ipipe_head_domain, timer->irq);
++
++	evtdev = timer->host_timer;
++	if (evtdev && evtdev->ipipe_stolen) {
++		evtdev->mult = timer->real_mult;
++		evtdev->shift = timer->real_shift;
++		evtdev->set_state_periodic = timer->orig_set_state_periodic;
++		evtdev->set_state_oneshot = timer->orig_set_state_oneshot;
++		evtdev->set_state_oneshot_stopped = timer->orig_set_state_oneshot_stopped;
++		evtdev->set_state_shutdown = timer->orig_set_state_shutdown;
++		evtdev->set_next_event = timer->orig_set_next_event;
++		evtdev->ipipe_stolen = 0;
++		hard_local_irq_restore(flags);
++		if (clockevent_state_oneshot(evtdev))
++			clockevents_program_event(evtdev,
++						  evtdev->next_event, true);
++	} else
++		hard_local_irq_restore(flags);
++}
++
++void ipipe_timer_stop(unsigned int cpu)
++{
++	smp_call_function_single(cpu, release_timer, NULL, true);
++}
++
++void ipipe_timer_set(unsigned long cdelay)
++{
++	unsigned long tdelay;
++	struct ipipe_timer *t;
++
++	t = __ipipe_raw_cpu_read(percpu_timer);
++
++	/*
++	 * Even though some architectures may use a 64 bits delay
++	 * here, we voluntarily limit to 32 bits, 4 billions ticks
++	 * should be enough for now. Would a timer needs more, an
++	 * extra call to the tick handler would simply occur after 4
++	 * billions ticks.
++	 */
++	if (cdelay > UINT_MAX)
++		cdelay = UINT_MAX;
++
++	tdelay = cdelay;
++	if (t->c2t_integ != 1)
++		tdelay *= t->c2t_integ;
++	if (t->c2t_frac)
++		tdelay += ((unsigned long long)cdelay * t->c2t_frac) >> 32;
++	if (tdelay < t->min_delay_ticks)
++		tdelay = t->min_delay_ticks;
++	if (tdelay > t->max_delay_ticks)
++		tdelay = t->max_delay_ticks;
++
++	if (t->set(tdelay, t->timer_set) < 0)
++		ipipe_raise_irq(t->irq);
++}
++EXPORT_SYMBOL_GPL(ipipe_timer_set);
++
++const char *ipipe_timer_name(void)
++{
++	return per_cpu(percpu_timer, 0)->name;
++}
++EXPORT_SYMBOL_GPL(ipipe_timer_name);
++
++unsigned ipipe_timer_ns2ticks(struct ipipe_timer *timer, unsigned ns)
++{
++	unsigned long long tmp;
++	BUG_ON(!timer->freq);
++	tmp = (unsigned long long)ns * timer->freq;
++	do_div(tmp, 1000000000);
++	return tmp;
++}
++
++#ifdef CONFIG_IPIPE_HAVE_HOSTRT
++/*
++ * NOTE: The architecture specific code must only call this function
++ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled.
++ * The event receiver is responsible for providing proper locking.
++ */
++void ipipe_update_hostrt(struct timekeeper *tk)
++{
++	struct tk_read_base *tkr = &tk->tkr_mono;
++	struct clocksource *clock = tkr->clock;
++	struct ipipe_hostrt_data data;
++	struct timespec xt;
++
++	xt.tv_sec = tk->xtime_sec;
++	xt.tv_nsec = (long)(tkr->xtime_nsec >> tkr->shift);
++	ipipe_root_only();
++	data.live = 1;
++	data.cycle_last = tkr->cycle_last;
++	data.mask = clock->mask;
++	data.mult = tkr->mult;
++	data.shift = tkr->shift;
++	data.wall_time_sec = xt.tv_sec;
++	data.wall_time_nsec = xt.tv_nsec;
++	data.wall_to_monotonic.tv_sec = tk->wall_to_monotonic.tv_sec;
++	data.wall_to_monotonic.tv_nsec = tk->wall_to_monotonic.tv_nsec;
++	__ipipe_notify_kevent(IPIPE_KEVT_HOSTRT, &data);
++}
++
++#endif /* CONFIG_IPIPE_HAVE_HOSTRT */
++
++int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
++			      bool force);
++
++void __ipipe_timer_refresh_freq(unsigned int hrclock_freq)
++{
++	struct ipipe_timer *t = __ipipe_raw_cpu_read(percpu_timer);
++	unsigned long flags;
++
++	if (t && t->refresh_freq) {
++		t->freq = t->refresh_freq();
++		flags = hard_local_irq_save();
++		config_pcpu_timer(t, hrclock_freq);
++		hard_local_irq_restore(flags);
++		clockevents_program_event(t->host_timer,
++					  t->host_timer->next_event, false);
++	}
++}
+diff -uprN kernel/kernel/ipipe/tracer.c kernel_new/kernel/ipipe/tracer.c
+--- kernel/kernel/ipipe/tracer.c	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/ipipe/tracer.c	2021-04-01 18:28:07.806863119 +0800
+@@ -0,0 +1,1486 @@
++/* -*- linux-c -*-
++ * kernel/ipipe/tracer.c
++ *
++ * Copyright (C) 2005 Luotao Fu.
++ *		 2005-2008 Jan Kiszka.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
++ * USA; either version 2 of the License, or (at your option) any later
++ * version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/kallsyms.h>
++#include <linux/seq_file.h>
++#include <linux/proc_fs.h>
++#include <linux/ctype.h>
++#include <linux/vmalloc.h>
++#include <linux/pid.h>
++#include <linux/vermagic.h>
++#include <linux/sched.h>
++#include <linux/ipipe.h>
++#include <linux/ftrace.h>
++#include <linux/uaccess.h>
++
++#define IPIPE_TRACE_PATHS	    4 /* <!> Do not lower below 3 */
++#define IPIPE_DEFAULT_ACTIVE	    0
++#define IPIPE_DEFAULT_MAX	    1
++#define IPIPE_DEFAULT_FROZEN	    2
++
++#define IPIPE_TRACE_POINTS	    (1 << CONFIG_IPIPE_TRACE_SHIFT)
++#define WRAP_POINT_NO(point)	    ((point) & (IPIPE_TRACE_POINTS-1))
++
++#define IPIPE_DEFAULT_PRE_TRACE	    10
++#define IPIPE_DEFAULT_POST_TRACE    10
++#define IPIPE_DEFAULT_BACK_TRACE    100
++
++#define IPIPE_DELAY_NOTE	    1000  /* in nanoseconds */
++#define IPIPE_DELAY_WARN	    10000 /* in nanoseconds */
++
++#define IPIPE_TFLG_NMI_LOCK	    0x0001
++#define IPIPE_TFLG_NMI_HIT	    0x0002
++#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
++
++#define IPIPE_TFLG_HWIRQ_OFF	    0x0100
++#define IPIPE_TFLG_FREEZING	    0x0200
++#define IPIPE_TFLG_CURRDOM_SHIFT    10	 /* bits 10..11: current domain */
++#define IPIPE_TFLG_CURRDOM_MASK	    0x0C00
++#define IPIPE_TFLG_DOMSTATE_SHIFT   12	 /* bits 12..15: domain stalled? */
++#define IPIPE_TFLG_DOMSTATE_BITS    1
++
++#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
++	(point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
++#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
++	((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
++
++struct ipipe_trace_point {
++	short type;
++	short flags;
++	unsigned long eip;
++	unsigned long parent_eip;
++	unsigned long v;
++	unsigned long long timestamp;
++};
++
++struct ipipe_trace_path {
++	volatile int flags;
++	int dump_lock; /* separated from flags due to cross-cpu access */
++	int trace_pos; /* next point to fill */
++	int begin, end; /* finalised path begin and end */
++	int post_trace; /* non-zero when in post-trace phase */
++	unsigned long long length; /* max path length in cycles */
++	unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
++	unsigned long nmi_saved_parent_eip;
++	unsigned long nmi_saved_v;
++	struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
++} ____cacheline_aligned_in_smp;
++
++enum ipipe_trace_type
++{
++	IPIPE_TRACE_FUNC = 0,
++	IPIPE_TRACE_BEGIN,
++	IPIPE_TRACE_END,
++	IPIPE_TRACE_FREEZE,
++	IPIPE_TRACE_SPECIAL,
++	IPIPE_TRACE_PID,
++	IPIPE_TRACE_EVENT,
++};
++
++#define IPIPE_TYPE_MASK		    0x0007
++#define IPIPE_TYPE_BITS		    3
++
++#ifdef CONFIG_IPIPE_TRACE_VMALLOC
++static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
++#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
++static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
++	{ [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
++
++int ipipe_trace_enable = 0;
++
++static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
++static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
++static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
++static IPIPE_DEFINE_SPINLOCK(global_path_lock);
++static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
++static int post_trace = IPIPE_DEFAULT_POST_TRACE;
++static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
++static int verbose_trace = 1;
++static unsigned long trace_overhead;
++
++static unsigned long trigger_begin;
++static unsigned long trigger_end;
++
++static DEFINE_MUTEX(out_mutex);
++static struct ipipe_trace_path *print_path;
++#ifdef CONFIG_IPIPE_TRACE_PANIC
++static struct ipipe_trace_path *panic_path;
++#endif /* CONFIG_IPIPE_TRACE_PANIC */
++static int print_pre_trace;
++static int print_post_trace;
++
++
++static long __ipipe_signed_tsc2us(long long tsc);
++static void
++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
++
++static inline void store_states(struct ipipe_domain *ipd,
++				struct ipipe_trace_point *point, int pos)
++{
++	if (test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpu_context(ipd)->status))
++		point->flags |= 1 << (pos + IPIPE_TFLG_DOMSTATE_SHIFT);
++
++	if (ipd == __ipipe_current_domain)
++		point->flags |= pos << IPIPE_TFLG_CURRDOM_SHIFT;
++}
++
++static notrace void
++__ipipe_store_domain_states(struct ipipe_trace_point *point)
++{
++	store_states(ipipe_root_domain, point, 0);
++	if (ipipe_head_domain != ipipe_root_domain)
++		store_states(ipipe_head_domain, point, 1);
++}
++
++static notrace int __ipipe_get_free_trace_path(int old, int cpu)
++{
++	int new_active = old;
++	struct ipipe_trace_path *tp;
++
++	do {
++		if (++new_active == IPIPE_TRACE_PATHS)
++			new_active = 0;
++		tp = &per_cpu(trace_path, cpu)[new_active];
++	} while (new_active == per_cpu(max_path, cpu) ||
++		 new_active == per_cpu(frozen_path, cpu) ||
++		 tp->dump_lock);
++
++	return new_active;
++}
++
++static notrace void
++__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
++			  struct ipipe_trace_path *old_tp, int old_pos)
++{
++	int i;
++
++	new_tp->trace_pos = pre_trace+1;
++
++	for (i = new_tp->trace_pos; i > 0; i--)
++		memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
++		       &old_tp->point[WRAP_POINT_NO(old_pos-i)],
++		       sizeof(struct ipipe_trace_point));
++
++	/* mark the end (i.e. the point before point[0]) invalid */
++	new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
++}
++
++static notrace struct ipipe_trace_path *
++__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
++{
++	struct ipipe_trace_path *old_tp = tp;
++	long active = per_cpu(active_path, cpu);
++	unsigned long long length;
++
++	/* do we have a new worst case? */
++	length = tp->point[tp->end].timestamp -
++		 tp->point[tp->begin].timestamp;
++	if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
++		/* we need protection here against other cpus trying
++		   to start a proc dump */
++		raw_spin_lock(&global_path_lock);
++
++		/* active path holds new worst case */
++		tp->length = length;
++		per_cpu(max_path, cpu) = active;
++
++		/* find next unused trace path */
++		active = __ipipe_get_free_trace_path(active, cpu);
++
++		raw_spin_unlock(&global_path_lock);
++
++		tp = &per_cpu(trace_path, cpu)[active];
++
++		/* migrate last entries for pre-tracing */
++		__ipipe_migrate_pre_trace(tp, old_tp, pos);
++	}
++
++	return tp;
++}
++
++static notrace struct ipipe_trace_path *
++__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
++{
++	struct ipipe_trace_path *old_tp = tp;
++	long active = per_cpu(active_path, cpu);
++	int n;
++
++	/* frozen paths have no core (begin=end) */
++	tp->begin = tp->end;
++
++	/* we need protection here against other cpus trying
++	 * to set their frozen path or to start a proc dump */
++	raw_spin_lock(&global_path_lock);
++
++	per_cpu(frozen_path, cpu) = active;
++
++	/* find next unused trace path */
++	active = __ipipe_get_free_trace_path(active, cpu);
++
++	/* check if this is the first frozen path */
++	for_each_possible_cpu(n) {
++		if (n != cpu &&
++		    per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
++			tp->end = -1;
++	}
++
++	raw_spin_unlock(&global_path_lock);
++
++	tp = &per_cpu(trace_path, cpu)[active];
++
++	/* migrate last entries for pre-tracing */
++	__ipipe_migrate_pre_trace(tp, old_tp, pos);
++
++	return tp;
++}
++
++void notrace
++__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
++	      unsigned long parent_eip, unsigned long v)
++{
++	struct ipipe_trace_path *tp, *old_tp;
++	int pos, next_pos, begin;
++	struct ipipe_trace_point *point;
++	unsigned long flags;
++	int cpu;
++
++	flags = hard_local_irq_save_notrace();
++
++	cpu = ipipe_processor_id();
++ restart:
++	tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
++
++	/* here starts a race window with NMIs - catched below */
++
++	/* check for NMI recursion */
++	if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
++		tp->flags |= IPIPE_TFLG_NMI_HIT;
++
++		/* first freeze request from NMI context? */
++		if ((type == IPIPE_TRACE_FREEZE) &&
++		    !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
++			/* save arguments and mark deferred freezing */
++			tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
++			tp->nmi_saved_eip = eip;
++			tp->nmi_saved_parent_eip = parent_eip;
++			tp->nmi_saved_v = v;
++		}
++		return; /* no need for restoring flags inside IRQ */
++	}
++
++	/* clear NMI events and set lock (atomically per cpu) */
++	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
++				   IPIPE_TFLG_NMI_FREEZE_REQ))
++			       | IPIPE_TFLG_NMI_LOCK;
++
++	/* check active_path again - some nasty NMI may have switched
++	 * it meanwhile */
++	if (unlikely(tp !=
++		     &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
++		/* release lock on wrong path and restart */
++		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
++
++		/* there is no chance that the NMI got deferred
++		 * => no need to check for pending freeze requests */
++		goto restart;
++	}
++
++	/* get the point buffer */
++	pos = tp->trace_pos;
++	point = &tp->point[pos];
++
++	/* store all trace point data */
++	point->type = type;
++	point->flags = hard_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
++	point->eip = eip;
++	point->parent_eip = parent_eip;
++	point->v = v;
++	ipipe_read_tsc(point->timestamp);
++
++	__ipipe_store_domain_states(point);
++
++	/* forward to next point buffer */
++	next_pos = WRAP_POINT_NO(pos+1);
++	tp->trace_pos = next_pos;
++
++	/* only mark beginning if we haven't started yet */
++	begin = tp->begin;
++	if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
++		tp->begin = pos;
++
++	/* end of critical path, start post-trace if not already started */
++	if (unlikely(type == IPIPE_TRACE_END) &&
++	    (begin >= 0) && !tp->post_trace)
++		tp->post_trace = post_trace + 1;
++
++	/* freeze only if the slot is free and we are not already freezing */
++	if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
++	     (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
++	     type == IPIPE_TRACE_FUNC)) &&
++	    per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
++	    !(tp->flags & IPIPE_TFLG_FREEZING)) {
++		tp->post_trace = post_trace + 1;
++		tp->flags |= IPIPE_TFLG_FREEZING;
++	}
++
++	/* enforce end of trace in case of overflow */
++	if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
++		tp->end = pos;
++		goto enforce_end;
++	}
++
++	/* stop tracing this path if we are in post-trace and
++	 *  a) that phase is over now or
++	 *  b) a new TRACE_BEGIN came in but we are not freezing this path */
++	if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
++		     ((type == IPIPE_TRACE_BEGIN) &&
++		      !(tp->flags & IPIPE_TFLG_FREEZING))))) {
++		/* store the path's end (i.e. excluding post-trace) */
++		tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
++
++ enforce_end:
++		if (tp->flags & IPIPE_TFLG_FREEZING)
++			tp = __ipipe_trace_freeze(cpu, tp, pos);
++		else
++			tp = __ipipe_trace_end(cpu, tp, pos);
++
++		/* reset the active path, maybe already start a new one */
++		tp->begin = (type == IPIPE_TRACE_BEGIN) ?
++			WRAP_POINT_NO(tp->trace_pos - 1) : -1;
++		tp->end = -1;
++		tp->post_trace = 0;
++		tp->flags = 0;
++
++		/* update active_path not earlier to avoid races with NMIs */
++		per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
++	}
++
++	/* we still have old_tp and point,
++	 * let's reset NMI lock and check for catches */
++	old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
++	if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
++		/* well, this late tagging may not immediately be visible for
++		 * other cpus already dumping this path - a minor issue */
++		point->flags |= IPIPE_TFLG_NMI_HIT;
++
++		/* handle deferred freezing from NMI context */
++		if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
++			__ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
++				      old_tp->nmi_saved_parent_eip,
++				      old_tp->nmi_saved_v);
++	}
++
++	hard_local_irq_restore_notrace(flags);
++}
++
++static unsigned long __ipipe_global_path_lock(void)
++{
++	unsigned long flags;
++	int cpu;
++	struct ipipe_trace_path *tp;
++
++	raw_spin_lock_irqsave(&global_path_lock, flags);
++
++	cpu = ipipe_processor_id();
++ restart:
++	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
++
++	/* here is small race window with NMIs - catched below */
++
++	/* clear NMI events and set lock (atomically per cpu) */
++	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
++				   IPIPE_TFLG_NMI_FREEZE_REQ))
++			       | IPIPE_TFLG_NMI_LOCK;
++
++	/* check active_path again - some nasty NMI may have switched
++	 * it meanwhile */
++	if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
++		/* release lock on wrong path and restart */
++		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
++
++		/* there is no chance that the NMI got deferred
++		 * => no need to check for pending freeze requests */
++		goto restart;
++	}
++
++	return flags;
++}
++
++static void __ipipe_global_path_unlock(unsigned long flags)
++{
++	int cpu;
++	struct ipipe_trace_path *tp;
++
++	/* release spinlock first - it's not involved in the NMI issue */
++	__ipipe_spin_unlock_irqbegin(&global_path_lock);
++
++	cpu = ipipe_processor_id();
++	tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
++
++	tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
++
++	/* handle deferred freezing from NMI context */
++	if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
++		__ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
++			      tp->nmi_saved_parent_eip, tp->nmi_saved_v);
++
++	/* See __ipipe_spin_lock_irqsave() and friends. */
++	__ipipe_spin_unlock_irqcomplete(flags);
++}
++
++void notrace asmlinkage
++ipipe_trace_asm(enum ipipe_trace_type type, unsigned long eip,
++		unsigned long parent_eip, unsigned long v)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(type, eip, parent_eip, v);
++}
++
++void notrace ipipe_trace_begin(unsigned long v)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_BEGIN, CALLER_ADDR0,
++		      CALLER_ADDR1, v);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_begin);
++
++void notrace ipipe_trace_end(unsigned long v)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_END, CALLER_ADDR0,
++		      CALLER_ADDR1, v);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_end);
++
++void notrace ipipe_trace_irqbegin(int irq, struct pt_regs *regs)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_BEGIN, instruction_pointer(regs),
++		      CALLER_ADDR1, irq);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_irqbegin);
++
++void notrace ipipe_trace_irqend(int irq, struct pt_regs *regs)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_END, instruction_pointer(regs),
++		      CALLER_ADDR1, irq);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_irqend);
++
++void notrace ipipe_trace_freeze(unsigned long v)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_FREEZE, CALLER_ADDR0,
++		      CALLER_ADDR1, v);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_freeze);
++
++void notrace ipipe_trace_special(unsigned char id, unsigned long v)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
++		      CALLER_ADDR0,
++		      CALLER_ADDR1, v);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_special);
++
++void notrace ipipe_trace_pid(pid_t pid, short prio)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
++		      CALLER_ADDR0,
++		      CALLER_ADDR1, pid);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_pid);
++
++void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
++		      CALLER_ADDR0,
++		      CALLER_ADDR1, delay_tsc);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_event);
++
++int ipipe_trace_max_reset(void)
++{
++	int cpu;
++	unsigned long flags;
++	struct ipipe_trace_path *path;
++	int ret = 0;
++
++	flags = __ipipe_global_path_lock();
++
++	for_each_possible_cpu(cpu) {
++		path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
++
++		if (path->dump_lock) {
++			ret = -EBUSY;
++			break;
++		}
++
++		path->begin	= -1;
++		path->end	= -1;
++		path->trace_pos = 0;
++		path->length	= 0;
++	}
++
++	__ipipe_global_path_unlock(flags);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_max_reset);
++
++int ipipe_trace_frozen_reset(void)
++{
++	int cpu;
++	unsigned long flags;
++	struct ipipe_trace_path *path;
++	int ret = 0;
++
++	flags = __ipipe_global_path_lock();
++
++	for_each_online_cpu(cpu) {
++		path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
++
++		if (path->dump_lock) {
++			ret = -EBUSY;
++			break;
++		}
++
++		path->begin = -1;
++		path->end = -1;
++		path->trace_pos = 0;
++		path->length	= 0;
++	}
++
++	__ipipe_global_path_unlock(flags);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_frozen_reset);
++
++static void
++__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
++		      int trylock)
++{
++	struct task_struct *task = NULL;
++	char buf[8];
++	int i;
++	int locked = 1;
++
++	if (trylock) {
++		if (!read_trylock(&tasklist_lock))
++			locked = 0;
++	} else
++		read_lock(&tasklist_lock);
++
++	if (locked)
++		task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns);
++
++	if (task)
++		strncpy(task_info, task->comm, 11);
++	else
++		strcpy(task_info, "-<?>-");
++
++	if (locked)
++		read_unlock(&tasklist_lock);
++
++	for (i = strlen(task_info); i < 11; i++)
++		task_info[i] = ' ';
++
++	sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
++	strcpy(task_info + (11 - strlen(buf)), buf);
++}
++
++static void
++__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
++		       struct ipipe_trace_point *point)
++{
++	long time;
++	int type;
++
++	time = __ipipe_signed_tsc2us(point->timestamp -
++				     path->point[path->begin].timestamp + point->v);
++	type = point->type >> IPIPE_TYPE_BITS;
++
++	if (type == 0)
++		/*
++		 * Event type #0 is predefined, stands for the next
++		 * timer tick.
++		 */
++		sprintf(buf, "tick@%-6ld", time);
++	else
++		sprintf(buf, "%3d@%-7ld", type, time);
++}
++
++#ifdef CONFIG_IPIPE_TRACE_PANIC
++
++void ipipe_trace_panic_freeze(void)
++{
++	unsigned long flags;
++	int cpu;
++
++	if (!ipipe_trace_enable)
++		return;
++
++	ipipe_trace_enable = 0;
++	flags = hard_local_irq_save_notrace();
++
++	cpu = ipipe_processor_id();
++
++	panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
++
++	hard_local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_panic_freeze);
++
++void ipipe_trace_panic_dump(void)
++{
++	int cnt = back_trace;
++	int start, pos;
++	char buf[16];
++
++	if (!panic_path)
++		return;
++
++	ipipe_context_check_off();
++
++	printk(KERN_CONT "I-pipe tracer log (%d points):\n", cnt);
++
++	start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
++
++	while (cnt-- > 0) {
++		struct ipipe_trace_point *point = &panic_path->point[pos];
++		long time;
++		char info[16];
++		int i;
++
++		printk(KERN_CONT " %c",
++		       (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
++
++		for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
++			printk(KERN_CONT "%c",
++			       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
++				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
++					'#' : '+') :
++				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
++					'*' : ' '));
++
++		if (!point->eip)
++			printk(KERN_CONT "-<invalid>-\n");
++		else {
++			__ipipe_trace_point_type(buf, point);
++			printk(KERN_CONT "%s", buf);
++
++			switch (point->type & IPIPE_TYPE_MASK) {
++				case IPIPE_TRACE_FUNC:
++					printk(KERN_CONT "           ");
++					break;
++
++				case IPIPE_TRACE_PID:
++					__ipipe_get_task_info(info,
++							      point, 1);
++					printk(KERN_CONT "%s", info);
++					break;
++
++				case IPIPE_TRACE_EVENT:
++					__ipipe_get_event_date(info,
++							       panic_path, point);
++					printk(KERN_CONT "%s", info);
++					break;
++
++				default:
++					printk(KERN_CONT "0x%08lx ", point->v);
++			}
++
++			time = __ipipe_signed_tsc2us(point->timestamp -
++				panic_path->point[start].timestamp);
++			printk(KERN_CONT " %5ld ", time);
++
++			__ipipe_print_symname(NULL, point->eip);
++			printk(KERN_CONT " (");
++			__ipipe_print_symname(NULL, point->parent_eip);
++			printk(KERN_CONT ")\n");
++		}
++		pos = WRAP_POINT_NO(pos - 1);
++	}
++
++	panic_path = NULL;
++}
++EXPORT_SYMBOL_GPL(ipipe_trace_panic_dump);
++
++#endif /* CONFIG_IPIPE_TRACE_PANIC */
++
++
++/* --- /proc output --- */
++
++static notrace int __ipipe_in_critical_trpath(long point_no)
++{
++	return ((WRAP_POINT_NO(point_no-print_path->begin) <
++		 WRAP_POINT_NO(print_path->end-print_path->begin)) ||
++		((print_path->end == print_path->begin) &&
++		 (WRAP_POINT_NO(point_no-print_path->end) >
++		  print_post_trace)));
++}
++
++static long __ipipe_signed_tsc2us(long long tsc)
++{
++	unsigned long long abs_tsc;
++	long us;
++
++	if (!__ipipe_hrclock_ok())
++		return 0;
++
++	/* ipipe_tsc2us works on unsigned => handle sign separately */
++	abs_tsc = (tsc >= 0) ? tsc : -tsc;
++	us = ipipe_tsc2us(abs_tsc);
++	if (tsc < 0)
++		return -us;
++	else
++		return us;
++}
++
++static void
++__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
++{
++	switch (point->type & IPIPE_TYPE_MASK) {
++		case IPIPE_TRACE_FUNC:
++			strcpy(buf, "func    ");
++			break;
++
++		case IPIPE_TRACE_BEGIN:
++			strcpy(buf, "begin   ");
++			break;
++
++		case IPIPE_TRACE_END:
++			strcpy(buf, "end     ");
++			break;
++
++		case IPIPE_TRACE_FREEZE:
++			strcpy(buf, "freeze  ");
++			break;
++
++		case IPIPE_TRACE_SPECIAL:
++			sprintf(buf, "(0x%02x)  ",
++				point->type >> IPIPE_TYPE_BITS);
++			break;
++
++		case IPIPE_TRACE_PID:
++			sprintf(buf, "[%5d] ", (pid_t)point->v);
++			break;
++
++		case IPIPE_TRACE_EVENT:
++			sprintf(buf, "event   ");
++			break;
++	}
++}
++
++static void
++__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
++{
++	char mark = ' ';
++	int point_no = point - print_path->point;
++	int i;
++
++	if (print_path->end == point_no)
++		mark = '<';
++	else if (print_path->begin == point_no)
++		mark = '>';
++	else if (__ipipe_in_critical_trpath(point_no))
++		mark = ':';
++	seq_printf(m, "%c%c", mark,
++		   (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
++
++	if (!verbose_trace)
++		return;
++
++	for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
++		seq_printf(m, "%c",
++			(IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
++			    (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
++				'#' : '+') :
++			(IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
++}
++
++static void
++__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
++{
++	unsigned long delay = 0;
++	int next;
++	char *mark = "  ";
++
++	next = WRAP_POINT_NO(point+1 - print_path->point);
++
++	if (next != print_path->trace_pos)
++		delay = ipipe_tsc2ns(print_path->point[next].timestamp -
++				     point->timestamp);
++
++	if (__ipipe_in_critical_trpath(point - print_path->point)) {
++		if (delay > IPIPE_DELAY_WARN)
++			mark = "! ";
++		else if (delay > IPIPE_DELAY_NOTE)
++			mark = "+ ";
++	}
++	seq_puts(m, mark);
++
++	if (verbose_trace)
++		seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
++			   (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
++	else
++		seq_puts(m, " ");
++}
++
++static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
++{
++	char namebuf[KSYM_NAME_LEN+1];
++	unsigned long size, offset;
++	const char *sym_name;
++	char *modname;
++
++	sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
++
++#ifdef CONFIG_IPIPE_TRACE_PANIC
++	if (!m) {
++		/* panic dump */
++		if (sym_name) {
++			printk(KERN_CONT "%s+0x%lx", sym_name, offset);
++			if (modname)
++				printk(KERN_CONT " [%s]", modname);
++		} else
++			printk(KERN_CONT "<%08lx>", eip);
++	} else
++#endif /* CONFIG_IPIPE_TRACE_PANIC */
++	{
++		if (sym_name) {
++			if (verbose_trace) {
++				seq_printf(m, "%s+0x%lx", sym_name, offset);
++				if (modname)
++					seq_printf(m, " [%s]", modname);
++			} else
++				seq_puts(m, sym_name);
++		} else
++			seq_printf(m, "<%08lx>", eip);
++	}
++}
++
++static void __ipipe_print_headline(struct seq_file *m)
++{
++	const char *name[2];
++
++	seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
++		   "us\n\n", trace_overhead/1000, trace_overhead%1000);
++
++	if (verbose_trace) {
++		name[0] = ipipe_root_domain->name;
++		if (ipipe_head_domain != ipipe_root_domain)
++			name[1] = ipipe_head_domain->name;
++		else
++			name[1] = "<unused>";
++
++		seq_printf(m,
++			   " +----- Hard IRQs ('|': locked)\n"
++			   " |+-- %s\n"
++			   " ||+- %s%s\n"
++		           " |||                        +---------- "
++			       "Delay flag ('+': > %d us, '!': > %d us)\n"
++			   " |||                        |        +- "
++			       "NMI noise ('N')\n"
++			   " |||                        |        |\n"
++			   "    Type    User Val.   Time    Delay  Function "
++			       "(Parent)\n",
++			   name[1], name[0],
++			   " ('*': domain stalled, '+': current, "
++			   "'#': current+stalled)",
++			   IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
++	} else
++		seq_printf(m,
++			   " +--------------- Hard IRQs ('|': locked)\n"
++			   " |             +- Delay flag "
++			       "('+': > %d us, '!': > %d us)\n"
++			   " |             |\n"
++			   "  Type     Time   Function (Parent)\n",
++			   IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
++}
++
++static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t n = *pos;
++
++	mutex_lock(&out_mutex);
++
++	if (!n) {
++		struct ipipe_trace_path *tp;
++		unsigned long length_usecs;
++		int points, cpu;
++		unsigned long flags;
++
++		/* protect against max_path/frozen_path updates while we
++		 * haven't locked our target path, also avoid recursively
++		 * taking global_path_lock from NMI context */
++		flags = __ipipe_global_path_lock();
++
++		/* find the longest of all per-cpu paths */
++		print_path = NULL;
++		for_each_online_cpu(cpu) {
++			tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
++			if ((print_path == NULL) ||
++			    (tp->length > print_path->length)) {
++				print_path = tp;
++				break;
++			}
++		}
++		print_path->dump_lock = 1;
++
++		__ipipe_global_path_unlock(flags);
++
++		if (!__ipipe_hrclock_ok()) {
++			seq_printf(m, "No hrclock available, dumping traces disabled\n");
++			return NULL;
++		}
++
++		/* does this path actually contain data? */
++		if (print_path->end == print_path->begin)
++			return NULL;
++
++		/* number of points inside the critical path */
++		points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
++
++		/* pre- and post-tracing length, post-trace length was frozen
++		   in __ipipe_trace, pre-trace may have to be reduced due to
++		   buffer overrun */
++		print_pre_trace	 = pre_trace;
++		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
++						 print_path->end - 1);
++		if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
++			print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
++				print_post_trace;
++
++		length_usecs = ipipe_tsc2us(print_path->length);
++		seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe release #%d\n"
++			   "-------------------------------------------------------------\n",
++			UTS_RELEASE, IPIPE_CORE_RELEASE);
++		seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
++			"%d (-%d/+%d), Length: %lu us\n",
++			cpu, print_path->point[print_path->begin].timestamp,
++			points, print_pre_trace, print_post_trace, length_usecs);
++		__ipipe_print_headline(m);
++	}
++
++	/* check if we are inside the trace range */
++	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
++			       print_pre_trace + print_post_trace))
++		return NULL;
++
++	/* return the next point to be shown */
++	return &print_path->point[WRAP_POINT_NO(print_path->begin -
++						print_pre_trace + n)];
++}
++
++static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	loff_t n = ++*pos;
++
++	/* check if we are inside the trace range with the next entry */
++	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
++			       print_pre_trace + print_post_trace))
++		return NULL;
++
++	/* return the next point to be shown */
++	return &print_path->point[WRAP_POINT_NO(print_path->begin -
++						print_pre_trace + *pos)];
++}
++
++static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
++{
++	if (print_path)
++		print_path->dump_lock = 0;
++	mutex_unlock(&out_mutex);
++}
++
++static int __ipipe_prtrace_show(struct seq_file *m, void *p)
++{
++	long time;
++	struct ipipe_trace_point *point = p;
++	char buf[16];
++
++	if (!point->eip) {
++		seq_puts(m, "-<invalid>-\n");
++		return 0;
++	}
++
++	__ipipe_print_pathmark(m, point);
++	__ipipe_trace_point_type(buf, point);
++	seq_puts(m, buf);
++	if (verbose_trace)
++		switch (point->type & IPIPE_TYPE_MASK) {
++			case IPIPE_TRACE_FUNC:
++				seq_puts(m, "           ");
++				break;
++
++			case IPIPE_TRACE_PID:
++				__ipipe_get_task_info(buf, point, 0);
++				seq_puts(m, buf);
++				break;
++
++			case IPIPE_TRACE_EVENT:
++				__ipipe_get_event_date(buf, print_path, point);
++				seq_puts(m, buf);
++				break;
++
++			default:
++				seq_printf(m, "0x%08lx ", point->v);
++		}
++
++	time = __ipipe_signed_tsc2us(point->timestamp -
++		print_path->point[print_path->begin].timestamp);
++	seq_printf(m, "%5ld", time);
++
++	__ipipe_print_delay(m, point);
++	__ipipe_print_symname(m, point->eip);
++	seq_puts(m, " (");
++	__ipipe_print_symname(m, point->parent_eip);
++	seq_puts(m, ")\n");
++
++	return 0;
++}
++
++static struct seq_operations __ipipe_max_ptrace_ops = {
++	.start = __ipipe_max_prtrace_start,
++	.next  = __ipipe_prtrace_next,
++	.stop  = __ipipe_prtrace_stop,
++	.show  = __ipipe_prtrace_show
++};
++
++static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &__ipipe_max_ptrace_ops);
++}
++
++static ssize_t
++__ipipe_max_reset(struct file *file, const char __user *pbuffer,
++		  size_t count, loff_t *data)
++{
++	mutex_lock(&out_mutex);
++	ipipe_trace_max_reset();
++	mutex_unlock(&out_mutex);
++
++	return count;
++}
++
++static const struct file_operations __ipipe_max_prtrace_fops = {
++	.open	    = __ipipe_max_prtrace_open,
++	.read	    = seq_read,
++	.write	    = __ipipe_max_reset,
++	.llseek	    = seq_lseek,
++	.release    = seq_release,
++};
++
++static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t n = *pos;
++
++	mutex_lock(&out_mutex);
++
++	if (!n) {
++		struct ipipe_trace_path *tp;
++		int cpu;
++		unsigned long flags;
++
++		/* protect against max_path/frozen_path updates while we
++		 * haven't locked our target path, also avoid recursively
++		 * taking global_path_lock from NMI context */
++		flags = __ipipe_global_path_lock();
++
++		/* find the first of all per-cpu frozen paths */
++		print_path = NULL;
++		for_each_online_cpu(cpu) {
++			tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
++			if (tp->end >= 0) {
++				print_path = tp;
++				break;
++			}
++		}
++		if (print_path)
++			print_path->dump_lock = 1;
++
++		__ipipe_global_path_unlock(flags);
++
++		if (!print_path)
++			return NULL;
++
++		if (!__ipipe_hrclock_ok()) {
++			seq_printf(m, "No hrclock available, dumping traces disabled\n");
++			return NULL;
++		}
++
++		/* back- and post-tracing length, post-trace length was frozen
++		   in __ipipe_trace, back-trace may have to be reduced due to
++		   buffer overrun */
++		print_pre_trace	 = back_trace-1; /* substract freeze point */
++		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
++						 print_path->end - 1);
++		if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
++			print_pre_trace = IPIPE_TRACE_POINTS - 2 -
++				print_post_trace;
++
++		seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe release #%d\n"
++			      "------------------------------------------------------------\n",
++			   UTS_RELEASE, IPIPE_CORE_RELEASE);
++		seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
++			cpu, print_path->point[print_path->begin].timestamp,
++			print_pre_trace+1, print_post_trace);
++		__ipipe_print_headline(m);
++	}
++
++	/* check if we are inside the trace range */
++	if (n >= print_pre_trace + 1 + print_post_trace)
++		return NULL;
++
++	/* return the next point to be shown */
++	return &print_path->point[WRAP_POINT_NO(print_path->begin-
++						print_pre_trace+n)];
++}
++
++static struct seq_operations __ipipe_frozen_ptrace_ops = {
++	.start = __ipipe_frozen_prtrace_start,
++	.next  = __ipipe_prtrace_next,
++	.stop  = __ipipe_prtrace_stop,
++	.show  = __ipipe_prtrace_show
++};
++
++static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &__ipipe_frozen_ptrace_ops);
++}
++
++static ssize_t
++__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
++		    size_t count, loff_t *data)
++{
++	char *end, buf[16];
++	int val;
++	int n;
++
++	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
++
++	if (copy_from_user(buf, pbuffer, n))
++		return -EFAULT;
++
++	buf[n] = '\0';
++	val = simple_strtol(buf, &end, 0);
++
++	if (((*end != '\0') && !isspace(*end)) || (val < 0))
++		return -EINVAL;
++
++	mutex_lock(&out_mutex);
++	ipipe_trace_frozen_reset();
++	if (val > 0)
++		ipipe_trace_freeze(-1);
++	mutex_unlock(&out_mutex);
++
++	return count;
++}
++
++static const struct file_operations __ipipe_frozen_prtrace_fops = {
++	.open	    = __ipipe_frozen_prtrace_open,
++	.read	    = seq_read,
++	.write	    = __ipipe_frozen_ctrl,
++	.llseek	    = seq_lseek,
++	.release    = seq_release,
++};
++
++static int __ipipe_rd_proc_val(struct seq_file *p, void *data)
++{
++	seq_printf(p, "%u\n", *(int *)p->private);
++	return 0;
++}
++
++static ssize_t
++__ipipe_wr_proc_val(struct file *file, const char __user *buffer,
++		    size_t count, loff_t *data)
++{
++	struct seq_file *p = file->private_data;
++	char *end, buf[16];
++	int val;
++	int n;
++
++	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
++
++	if (copy_from_user(buf, buffer, n))
++		return -EFAULT;
++
++	buf[n] = '\0';
++	val = simple_strtol(buf, &end, 0);
++
++	if (((*end != '\0') && !isspace(*end)) || (val < 0))
++		return -EINVAL;
++
++	mutex_lock(&out_mutex);
++	*(int *)p->private = val;
++	mutex_unlock(&out_mutex);
++
++	return count;
++}
++
++static int __ipipe_rw_proc_val_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, __ipipe_rd_proc_val, PDE_DATA(inode));
++}
++
++static const struct file_operations __ipipe_rw_proc_val_ops = {
++	.open		= __ipipe_rw_proc_val_open,
++	.read		= seq_read,
++	.write		= __ipipe_wr_proc_val,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++static void __init
++__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
++			      const char *name, int *value_ptr)
++{
++	proc_create_data(name, 0644, trace_dir, &__ipipe_rw_proc_val_ops,
++			 value_ptr);
++}
++
++static int __ipipe_rd_trigger(struct seq_file *p, void *data)
++{
++	char str[KSYM_SYMBOL_LEN];
++
++	if (trigger_begin) {
++		sprint_symbol(str, trigger_begin);
++		seq_printf(p, "%s\n", str);
++	}
++	return 0;
++}
++
++static ssize_t
++__ipipe_wr_trigger(struct file *file, const char __user *buffer,
++		   size_t count, loff_t *data)
++{
++	char buf[KSYM_SYMBOL_LEN];
++	unsigned long begin, end;
++
++	if (count > sizeof(buf) - 1)
++		count = sizeof(buf) - 1;
++	if (copy_from_user(buf, buffer, count))
++		return -EFAULT;
++	buf[count] = 0;
++	if (buf[count-1] == '\n')
++		buf[count-1] = 0;
++
++	begin = kallsyms_lookup_name(buf);
++	if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
++		return -ENOENT;
++	end += begin - 1;
++
++	mutex_lock(&out_mutex);
++	/* invalidate the current range before setting a new one */
++	trigger_end = 0;
++	wmb();
++	ipipe_trace_frozen_reset();
++
++	/* set new range */
++	trigger_begin = begin;
++	wmb();
++	trigger_end = end;
++	mutex_unlock(&out_mutex);
++
++	return count;
++}
++
++static int __ipipe_rw_trigger_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, __ipipe_rd_trigger, NULL);
++}
++
++static const struct file_operations __ipipe_rw_trigger_ops = {
++	.open		= __ipipe_rw_trigger_open,
++	.read		= seq_read,
++	.write		= __ipipe_wr_trigger,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++
++#ifdef CONFIG_IPIPE_TRACE_MCOUNT
++static void notrace
++ipipe_trace_function(unsigned long ip, unsigned long parent_ip,
++		     struct ftrace_ops *op, struct pt_regs *regs)
++{
++	if (!ipipe_trace_enable)
++		return;
++	__ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0);
++}
++
++static struct ftrace_ops ipipe_trace_ops = {
++	.func = ipipe_trace_function,
++	.flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE,
++};
++
++static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer,
++				 size_t count, loff_t *data)
++{
++	char *end, buf[16];
++	int val;
++	int n;
++
++	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
++
++	if (copy_from_user(buf, buffer, n))
++		return -EFAULT;
++
++	buf[n] = '\0';
++	val = simple_strtol(buf, &end, 0);
++
++	if (((*end != '\0') && !isspace(*end)) || (val < 0))
++		return -EINVAL;
++
++	mutex_lock(&out_mutex);
++
++	if (ipipe_trace_enable) {
++		if (!val)
++			unregister_ftrace_function(&ipipe_trace_ops);
++	} else if (val)
++		register_ftrace_function(&ipipe_trace_ops);
++
++	ipipe_trace_enable = val;
++
++	mutex_unlock(&out_mutex);
++
++	return count;
++}
++
++static const struct file_operations __ipipe_rw_enable_ops = {
++	.open		= __ipipe_rw_proc_val_open,
++	.read		= seq_read,
++	.write		= __ipipe_wr_enable,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
++
++extern struct proc_dir_entry *ipipe_proc_root;
++
++void __init __ipipe_tracer_hrclock_initialized(void)
++{
++	unsigned long long start, end, min = ULLONG_MAX;
++	int i;
++
++#ifdef CONFIG_IPIPE_TRACE_VMALLOC
++	if (!per_cpu(trace_path, 0))
++		return;
++#endif
++	/* Calculate minimum overhead of __ipipe_trace() */
++	hard_local_irq_disable();
++	for (i = 0; i < 100; i++) {
++		ipipe_read_tsc(start);
++		__ipipe_trace(IPIPE_TRACE_FUNC, CALLER_ADDR0,
++			      CALLER_ADDR1, 0);
++		ipipe_read_tsc(end);
++
++		end -= start;
++		if (end < min)
++			min = end;
++	}
++	hard_local_irq_enable();
++	trace_overhead = ipipe_tsc2ns(min);
++}
++
++void __init __ipipe_init_tracer(void)
++{
++	struct proc_dir_entry *trace_dir;
++#ifdef CONFIG_IPIPE_TRACE_VMALLOC
++	int cpu, path;
++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
++
++#ifdef CONFIG_IPIPE_TRACE_VMALLOC
++	for_each_possible_cpu(cpu) {
++		struct ipipe_trace_path *tp_buf;
++
++		tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
++				      IPIPE_TRACE_PATHS, cpu_to_node(cpu));
++		if (!tp_buf) {
++			pr_err("I-pipe: "
++			       "insufficient memory for trace buffer.\n");
++			return;
++		}
++		memset(tp_buf, 0,
++		       sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
++		for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
++			tp_buf[path].begin = -1;
++			tp_buf[path].end   = -1;
++		}
++		per_cpu(trace_path, cpu) = tp_buf;
++	}
++#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
++
++	if (__ipipe_hrclock_ok() && !trace_overhead)
++		__ipipe_tracer_hrclock_initialized();
++
++#ifdef CONFIG_IPIPE_TRACE_ENABLE
++	ipipe_trace_enable = 1;
++#ifdef CONFIG_IPIPE_TRACE_MCOUNT
++	ftrace_enabled = 1;
++	register_ftrace_function(&ipipe_trace_ops);
++#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
++#endif /* CONFIG_IPIPE_TRACE_ENABLE */
++
++	trace_dir = proc_mkdir("trace", ipipe_proc_root);
++
++	proc_create("max", 0644, trace_dir, &__ipipe_max_prtrace_fops);
++	proc_create("frozen", 0644, trace_dir, &__ipipe_frozen_prtrace_fops);
++
++	proc_create("trigger", 0644, trace_dir, &__ipipe_rw_trigger_ops);
++
++	__ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
++				      &pre_trace);
++	__ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
++				      &post_trace);
++	__ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
++				      &back_trace);
++	__ipipe_create_trace_proc_val(trace_dir, "verbose",
++				      &verbose_trace);
++#ifdef CONFIG_IPIPE_TRACE_MCOUNT
++	proc_create_data("enable", 0644, trace_dir, &__ipipe_rw_enable_ops,
++			 &ipipe_trace_enable);
++#else /* !CONFIG_IPIPE_TRACE_MCOUNT */
++	__ipipe_create_trace_proc_val(trace_dir, "enable",
++				      &ipipe_trace_enable);
++#endif /* !CONFIG_IPIPE_TRACE_MCOUNT */
++}
+diff -uprN kernel/kernel/irq/chip.c kernel_new/kernel/irq/chip.c
+--- kernel/kernel/irq/chip.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/chip.c	2021-04-02 10:23:55.498805866 +0800
+@@ -14,6 +14,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/irqdomain.h>
++#include <linux/ipipe.h>
+ 
+ #include <trace/events/irq.h>
+ 
+@@ -48,6 +49,10 @@ int irq_set_chip(unsigned int irq, struc
+ 
+ 	if (!chip)
+ 		chip = &no_irq_chip;
++	else
++		WARN_ONCE(IS_ENABLED(CONFIG_IPIPE) &&
++			  (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0,
++			  "irqchip %s is not pipeline-safe!", chip->name);
+ 
+ 	desc->irq_data.chip = chip;
+ 	irq_put_desc_unlock(desc, flags);
+@@ -155,14 +160,6 @@ int irq_set_chip_data(unsigned int irq,
+ }
+ EXPORT_SYMBOL(irq_set_chip_data);
+ 
+-struct irq_data *irq_get_irq_data(unsigned int irq)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	return desc ? &desc->irq_data : NULL;
+-}
+-EXPORT_SYMBOL_GPL(irq_get_irq_data);
+-
+ static void irq_state_clr_disabled(struct irq_desc *desc)
+ {
+ 	irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
+@@ -242,9 +239,14 @@ static int __irq_startup(struct irq_desc
+ 	WARN_ON_ONCE(!irqd_is_activated(d));
+ 
+ 	if (d->chip->irq_startup) {
++		unsigned long flags = hard_cond_local_irq_save();
+ 		ret = d->chip->irq_startup(d);
+ 		irq_state_clr_disabled(desc);
+ 		irq_state_clr_masked(desc);
++		hard_cond_local_irq_restore(flags);
++#ifdef CONFIG_IPIPE
++		desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP;
++#endif
+ 	} else {
+ 		irq_enable(desc);
+ 	}
+@@ -309,6 +311,9 @@ void irq_shutdown(struct irq_desc *desc)
+ 			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ 			irq_state_set_disabled(desc);
+ 			irq_state_set_masked(desc);
++#ifdef CONFIG_IPIPE
++			desc->istate |= IPIPE_IRQS_NEEDS_STARTUP;
++#endif
+ 		} else {
+ 			__irq_disable(desc, true);
+ 		}
+@@ -331,6 +336,8 @@ void irq_shutdown_and_deactivate(struct
+ 
+ void irq_enable(struct irq_desc *desc)
+ {
++	unsigned long flags = hard_cond_local_irq_save();
++
+ 	if (!irqd_irq_disabled(&desc->irq_data)) {
+ 		unmask_irq(desc);
+ 	} else {
+@@ -342,10 +349,14 @@ void irq_enable(struct irq_desc *desc)
+ 			unmask_irq(desc);
+ 		}
+ 	}
++
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ static void __irq_disable(struct irq_desc *desc, bool mask)
+ {
++	unsigned long flags = hard_cond_local_irq_save();
++
+ 	if (irqd_irq_disabled(&desc->irq_data)) {
+ 		if (mask)
+ 			mask_irq(desc);
+@@ -358,6 +369,8 @@ static void __irq_disable(struct irq_des
+ 			mask_irq(desc);
+ 		}
+ 	}
++
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ /**
+@@ -387,11 +400,13 @@ void irq_disable(struct irq_desc *desc)
+ 
+ void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
+ {
++	unsigned long flags = hard_cond_local_irq_save();
+ 	if (desc->irq_data.chip->irq_enable)
+ 		desc->irq_data.chip->irq_enable(&desc->irq_data);
+ 	else
+ 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ 	cpumask_set_cpu(cpu, desc->percpu_enabled);
++	hard_cond_local_irq_restore(flags);
+ }
+ 
+ void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
+@@ -428,12 +443,16 @@ void mask_irq(struct irq_desc *desc)
+ 
+ void unmask_irq(struct irq_desc *desc)
+ {
++	unsigned long flags;
++
+ 	if (!irqd_irq_masked(&desc->irq_data))
+ 		return;
+ 
+ 	if (desc->irq_data.chip->irq_unmask) {
++		flags = hard_cond_local_irq_save();
+ 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ 		irq_state_clr_masked(desc);
++		hard_cond_local_irq_restore(flags);
+ 	}
+ }
+ 
+@@ -630,7 +649,9 @@ static void cond_unmask_irq(struct irq_d
+ void handle_level_irq(struct irq_desc *desc)
+ {
+ 	raw_spin_lock(&desc->lock);
++#ifndef CONFIG_IPIPE
+ 	mask_ack_irq(desc);
++#endif
+ 
+ 	if (!irq_may_run(desc))
+ 		goto out_unlock;
+@@ -666,7 +687,16 @@ static inline void preflow_handler(struc
+ static inline void preflow_handler(struct irq_desc *desc) { }
+ #endif
+ 
+-static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
++#ifdef CONFIG_IPIPE
++static void cond_release_fasteoi_irq(struct irq_desc *desc,
++				     struct irq_chip *chip)
++{
++	if (chip->irq_release &&
++	    !irqd_irq_disabled(&desc->irq_data) && !desc->threads_oneshot)
++		chip->irq_release(&desc->irq_data);
++}
++#else
++static inline void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
+ {
+ 	if (!(desc->istate & IRQS_ONESHOT)) {
+ 		chip->irq_eoi(&desc->irq_data);
+@@ -686,6 +716,7 @@ static void cond_unmask_eoi_irq(struct i
+ 		chip->irq_eoi(&desc->irq_data);
+ 	}
+ }
++#endif /* !CONFIG_IPIPE */
+ 
+ /**
+  *	handle_fasteoi_irq - irq handler for transparent controllers
+@@ -718,13 +749,23 @@ void handle_fasteoi_irq(struct irq_desc
+ 	}
+ 
+ 	kstat_incr_irqs_this_cpu(desc);
++#ifndef CONFIG_IPIPE
+ 	if (desc->istate & IRQS_ONESHOT)
+ 		mask_irq(desc);
++#endif
+ 
+ 	preflow_handler(desc);
+ 	handle_irq_event(desc);
+ 
++#ifdef CONFIG_IPIPE
++	/*
++	 * IRQCHIP_EOI_IF_HANDLED is ignored as the I-pipe always
++	 * sends EOI.
++	 */
++	cond_release_fasteoi_irq(desc, chip);
++#else  /* !CONFIG_IPIPE */
+ 	cond_unmask_eoi_irq(desc, chip);
++#endif	/* !CONFIG_IPIPE */
+ 
+ 	raw_spin_unlock(&desc->lock);
+ 	return;
+@@ -807,7 +848,9 @@ void handle_edge_irq(struct irq_desc *de
+ 	kstat_incr_irqs_this_cpu(desc);
+ 
+ 	/* Start handling the irq */
++#ifndef CONFIG_IPIPE
+ 	desc->irq_data.chip->irq_ack(&desc->irq_data);
++#endif
+ 
+ 	do {
+ 		if (unlikely(!desc->action)) {
+@@ -899,6 +942,11 @@ void handle_percpu_irq(struct irq_desc *
+ 	 */
+ 	__kstat_incr_irqs_this_cpu(desc);
+ 
++#ifdef CONFIG_IPIPE
++	(void)chip;
++	handle_irq_event_percpu(desc);
++	desc->ipipe_end(desc);
++#else
+ 	if (chip->irq_ack)
+ 		chip->irq_ack(&desc->irq_data);
+ 
+@@ -906,6 +954,7 @@ void handle_percpu_irq(struct irq_desc *
+ 
+ 	if (chip->irq_eoi)
+ 		chip->irq_eoi(&desc->irq_data);
++#endif
+ }
+ 
+ /**
+@@ -932,13 +981,20 @@ void handle_percpu_devid_irq(struct irq_
+ 	 */
+ 	__kstat_incr_irqs_this_cpu(desc);
+ 
++#ifndef CONFIG_IPIPE
+ 	if (chip->irq_ack)
+ 		chip->irq_ack(&desc->irq_data);
++#endif
+ 
+ 	if (likely(action)) {
+ 		trace_irq_handler_entry(irq, action);
+ 		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+ 		trace_irq_handler_exit(irq, action, res);
++#ifdef CONFIG_IPIPE
++		(void)chip;
++		desc->ipipe_end(desc);
++		return;
++#endif
+ 	} else {
+ 		unsigned int cpu = smp_processor_id();
+ 		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+@@ -978,6 +1034,171 @@ void handle_percpu_devid_fasteoi_nmi(str
+ 		chip->irq_eoi(&desc->irq_data);
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++void __ipipe_ack_level_irq(struct irq_desc *desc)
++{
++	mask_ack_irq(desc);
++}
++
++void __ipipe_end_level_irq(struct irq_desc *desc)
++{
++	desc->irq_data.chip->irq_unmask(&desc->irq_data);
++}
++
++void __ipipe_ack_fasteoi_irq(struct irq_desc *desc)
++{
++	if (desc->irq_data.chip->irq_hold)
++		desc->irq_data.chip->irq_hold(&desc->irq_data);
++}
++
++void __ipipe_end_fasteoi_irq(struct irq_desc *desc)
++{
++	if (desc->irq_data.chip->irq_release)
++		desc->irq_data.chip->irq_release(&desc->irq_data);
++}
++
++void __ipipe_ack_edge_irq(struct irq_desc *desc)
++{
++	desc->irq_data.chip->irq_ack(&desc->irq_data);
++}
++
++void __ipipe_ack_percpu_irq(struct irq_desc *desc)
++{
++	if (desc->irq_data.chip->irq_ack)
++		desc->irq_data.chip->irq_ack(&desc->irq_data);
++
++	if (desc->irq_data.chip->irq_eoi)
++		desc->irq_data.chip->irq_eoi(&desc->irq_data);
++}
++
++void __ipipe_nop_irq(struct irq_desc *desc)
++{
++}
++
++void __ipipe_chained_irq(struct irq_desc *desc)
++{
++	/*
++	 * XXX: Do NOT fold this into __ipipe_nop_irq(), see
++	 * ipipe_chained_irq_p().
++	 */
++}
++
++static void __ipipe_ack_bad_irq(struct irq_desc *desc)
++{
++	handle_bad_irq(desc);
++	WARN_ON_ONCE(1);
++}
++
++irq_flow_handler_t
++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
++{
++	if (unlikely(handle == NULL)) {
++		desc->ipipe_ack = __ipipe_ack_bad_irq;
++		desc->ipipe_end = __ipipe_nop_irq;
++	} else {
++		if (is_chained) {
++			desc->ipipe_ack = handle;
++			desc->ipipe_end = __ipipe_nop_irq;
++			handle = __ipipe_chained_irq;
++		} else if (handle == handle_simple_irq) {
++			desc->ipipe_ack = __ipipe_nop_irq;
++			desc->ipipe_end = __ipipe_nop_irq;
++		} else if (handle == handle_level_irq) {
++			desc->ipipe_ack = __ipipe_ack_level_irq;
++			desc->ipipe_end = __ipipe_end_level_irq;
++		} else if (handle == handle_edge_irq) {
++			desc->ipipe_ack = __ipipe_ack_edge_irq;
++			desc->ipipe_end = __ipipe_nop_irq;
++		} else if (handle == handle_fasteoi_irq) {
++			desc->ipipe_ack = __ipipe_ack_fasteoi_irq;
++			desc->ipipe_end = __ipipe_end_fasteoi_irq;
++		} else if (handle == handle_percpu_irq ||
++			   handle == handle_percpu_devid_irq) {
++			if (irq_desc_get_chip(desc) &&
++			    irq_desc_get_chip(desc)->irq_hold) {
++				desc->ipipe_ack = __ipipe_ack_fasteoi_irq;
++				desc->ipipe_end = __ipipe_end_fasteoi_irq;
++			} else {
++				desc->ipipe_ack = __ipipe_ack_percpu_irq;
++				desc->ipipe_end = __ipipe_nop_irq;
++			}
++		} else if (irq_desc_get_chip(desc) == &no_irq_chip) {
++			desc->ipipe_ack = __ipipe_nop_irq;
++			desc->ipipe_end = __ipipe_nop_irq;
++		} else {
++			desc->ipipe_ack = __ipipe_ack_bad_irq;
++			desc->ipipe_end = __ipipe_nop_irq;
++		}
++	}
++
++	/*
++	 * We don't cope well with lazy disabling simply because we
++	 * neither track nor update the descriptor state bits, which
++	 * is badly wrong.
++	 */
++	irq_settings_clr_and_set(desc, 0, _IRQ_DISABLE_UNLAZY);
++
++	/* Suppress intermediate trampoline routine. */
++	ipipe_root_domain->irqs[desc->irq_data.irq].ackfn = desc->ipipe_ack;
++
++	return handle;
++}
++
++int ipipe_enable_irq(unsigned int irq)
++{
++	struct irq_desc *desc;
++	struct irq_chip *chip;
++	unsigned long flags;
++	int err;
++
++	desc = irq_to_desc(irq);
++	if (desc == NULL)
++		return -EINVAL;
++
++	chip = irq_desc_get_chip(desc);
++
++	if (chip->irq_startup && (desc->istate & IPIPE_IRQS_NEEDS_STARTUP)) {
++
++		ipipe_root_only();
++
++		err = irq_activate(desc);
++		if (err)
++			return err;
++
++		raw_spin_lock_irqsave(&desc->lock, flags);
++		if (desc->istate & IPIPE_IRQS_NEEDS_STARTUP) {
++			desc->istate &= ~IPIPE_IRQS_NEEDS_STARTUP;
++			chip->irq_startup(&desc->irq_data);
++		}
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++		return 0;
++	}
++
++	if (chip->irq_enable == NULL && chip->irq_unmask == NULL)
++		return -ENOSYS;
++
++	if (chip->irq_enable)
++		chip->irq_enable(&desc->irq_data);
++	else
++		chip->irq_unmask(&desc->irq_data);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(ipipe_enable_irq);
++
++#else /* !CONFIG_IPIPE */
++
++irq_flow_handler_t
++__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained)
++{
++	return handle;
++}
++
++#endif /* !CONFIG_IPIPE */
++EXPORT_SYMBOL_GPL(__fixup_irq_handler);
++
+ static void
+ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
+ 		     int is_chained, const char *name)
+@@ -1012,6 +1233,8 @@ __irq_do_set_handler(struct irq_desc *de
+ 			return;
+ 	}
+ 
++	handle = __fixup_irq_handler(desc, handle, is_chained);
++
+ 	/* Uninstall? */
+ 	if (handle == handle_bad_irq) {
+ 		if (desc->irq_data.chip != &no_irq_chip)
+@@ -1347,6 +1570,20 @@ void irq_chip_mask_parent(struct irq_dat
+ }
+ EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
+ 
++#ifdef CONFIG_IPIPE
++void irq_chip_hold_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_hold(data);
++}
++
++void irq_chip_release_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_release(data);
++}
++#endif
++
+ /**
+  * irq_chip_unmask_parent - Unmask the parent interrupt
+  * @data:	Pointer to interrupt specific data
+diff -uprN kernel/kernel/irq/chip.c.orig kernel_new/kernel/irq/chip.c.orig
+--- kernel/kernel/irq/chip.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/irq/chip.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1524 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
++ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
++ *
++ * This file contains the core interrupt handling code, for irq-chip based
++ * architectures. Detailed information is available in
++ * Documentation/core-api/genericirq.rst
++ */
++
++#include <linux/irq.h>
++#include <linux/msi.h>
++#include <linux/module.h>
++#include <linux/interrupt.h>
++#include <linux/kernel_stat.h>
++#include <linux/irqdomain.h>
++
++#include <trace/events/irq.h>
++
++#include "internals.h"
++
++static irqreturn_t bad_chained_irq(int irq, void *dev_id)
++{
++	WARN_ONCE(1, "Chained irq %d should not call an action\n", irq);
++	return IRQ_NONE;
++}
++
++/*
++ * Chained handlers should never call action on their IRQ. This default
++ * action will emit warning if such thing happens.
++ */
++struct irqaction chained_action = {
++	.handler = bad_chained_irq,
++};
++
++/**
++ *	irq_set_chip - set the irq chip for an irq
++ *	@irq:	irq number
++ *	@chip:	pointer to irq chip description structure
++ */
++int irq_set_chip(unsigned int irq, struct irq_chip *chip)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++
++	if (!desc)
++		return -EINVAL;
++
++	if (!chip)
++		chip = &no_irq_chip;
++
++	desc->irq_data.chip = chip;
++	irq_put_desc_unlock(desc, flags);
++	/*
++	 * For !CONFIG_SPARSE_IRQ make the irq show up in
++	 * allocated_irqs.
++	 */
++	irq_mark_irq(irq);
++	return 0;
++}
++EXPORT_SYMBOL(irq_set_chip);
++
++/**
++ *	irq_set_type - set the irq trigger type for an irq
++ *	@irq:	irq number
++ *	@type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
++ */
++int irq_set_irq_type(unsigned int irq, unsigned int type)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++	int ret = 0;
++
++	if (!desc)
++		return -EINVAL;
++
++	ret = __irq_set_trigger(desc, type);
++	irq_put_desc_busunlock(desc, flags);
++	return ret;
++}
++EXPORT_SYMBOL(irq_set_irq_type);
++
++/**
++ *	irq_set_handler_data - set irq handler data for an irq
++ *	@irq:	Interrupt number
++ *	@data:	Pointer to interrupt specific data
++ *
++ *	Set the hardware irq controller data for an irq
++ */
++int irq_set_handler_data(unsigned int irq, void *data)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++
++	if (!desc)
++		return -EINVAL;
++	desc->irq_common_data.handler_data = data;
++	irq_put_desc_unlock(desc, flags);
++	return 0;
++}
++EXPORT_SYMBOL(irq_set_handler_data);
++
++/**
++ *	irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
++ *	@irq_base:	Interrupt number base
++ *	@irq_offset:	Interrupt number offset
++ *	@entry:		Pointer to MSI descriptor data
++ *
++ *	Set the MSI descriptor entry for an irq at offset
++ */
++int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
++			 struct msi_desc *entry)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++
++	if (!desc)
++		return -EINVAL;
++	desc->irq_common_data.msi_desc = entry;
++	if (entry && !irq_offset)
++		entry->irq = irq_base;
++	irq_put_desc_unlock(desc, flags);
++	return 0;
++}
++
++/**
++ *	irq_set_msi_desc - set MSI descriptor data for an irq
++ *	@irq:	Interrupt number
++ *	@entry:	Pointer to MSI descriptor data
++ *
++ *	Set the MSI descriptor entry for an irq
++ */
++int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
++{
++	return irq_set_msi_desc_off(irq, 0, entry);
++}
++
++/**
++ *	irq_set_chip_data - set irq chip data for an irq
++ *	@irq:	Interrupt number
++ *	@data:	Pointer to chip specific data
++ *
++ *	Set the hardware irq chip data for an irq
++ */
++int irq_set_chip_data(unsigned int irq, void *data)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++
++	if (!desc)
++		return -EINVAL;
++	desc->irq_data.chip_data = data;
++	irq_put_desc_unlock(desc, flags);
++	return 0;
++}
++EXPORT_SYMBOL(irq_set_chip_data);
++
++struct irq_data *irq_get_irq_data(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	return desc ? &desc->irq_data : NULL;
++}
++EXPORT_SYMBOL_GPL(irq_get_irq_data);
++
++static void irq_state_clr_disabled(struct irq_desc *desc)
++{
++	irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
++}
++
++static void irq_state_clr_masked(struct irq_desc *desc)
++{
++	irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
++}
++
++static void irq_state_clr_started(struct irq_desc *desc)
++{
++	irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
++}
++
++static void irq_state_set_started(struct irq_desc *desc)
++{
++	irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
++}
++
++enum {
++	IRQ_STARTUP_NORMAL,
++	IRQ_STARTUP_MANAGED,
++	IRQ_STARTUP_ABORT,
++};
++
++#ifdef CONFIG_SMP
++static int
++__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++
++	if (!irqd_affinity_is_managed(d))
++		return IRQ_STARTUP_NORMAL;
++
++	irqd_clr_managed_shutdown(d);
++
++	if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
++		/*
++		 * Catch code which fiddles with enable_irq() on a managed
++		 * and potentially shutdown IRQ. Chained interrupt
++		 * installment or irq auto probing should not happen on
++		 * managed irqs either.
++		 */
++		if (WARN_ON_ONCE(force))
++			return IRQ_STARTUP_ABORT;
++		/*
++		 * The interrupt was requested, but there is no online CPU
++		 * in it's affinity mask. Put it into managed shutdown
++		 * state and let the cpu hotplug mechanism start it up once
++		 * a CPU in the mask becomes available.
++		 */
++		return IRQ_STARTUP_ABORT;
++	}
++	/*
++	 * Managed interrupts have reserved resources, so this should not
++	 * happen.
++	 */
++	if (WARN_ON(irq_domain_activate_irq(d, false)))
++		return IRQ_STARTUP_ABORT;
++	return IRQ_STARTUP_MANAGED;
++}
++#else
++static __always_inline int
++__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
++{
++	return IRQ_STARTUP_NORMAL;
++}
++#endif
++
++static int __irq_startup(struct irq_desc *desc)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++	int ret = 0;
++
++	/* Warn if this interrupt is not activated but try nevertheless */
++	WARN_ON_ONCE(!irqd_is_activated(d));
++
++	if (d->chip->irq_startup) {
++		ret = d->chip->irq_startup(d);
++		irq_state_clr_disabled(desc);
++		irq_state_clr_masked(desc);
++	} else {
++		irq_enable(desc);
++	}
++	irq_state_set_started(desc);
++	return ret;
++}
++
++int irq_startup(struct irq_desc *desc, bool resend, bool force)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++	struct cpumask *aff = irq_data_get_affinity_mask(d);
++	int ret = 0;
++
++	desc->depth = 0;
++
++	if (irqd_is_started(d)) {
++		irq_enable(desc);
++	} else {
++		switch (__irq_startup_managed(desc, aff, force)) {
++		case IRQ_STARTUP_NORMAL:
++			ret = __irq_startup(desc);
++			irq_setup_affinity(desc);
++			break;
++		case IRQ_STARTUP_MANAGED:
++			irq_do_set_affinity(d, aff, false);
++			ret = __irq_startup(desc);
++			break;
++		case IRQ_STARTUP_ABORT:
++			irqd_set_managed_shutdown(d);
++			return 0;
++		}
++	}
++	if (resend)
++		check_irq_resend(desc);
++
++	return ret;
++}
++
++int irq_activate(struct irq_desc *desc)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++
++	if (!irqd_affinity_is_managed(d))
++		return irq_domain_activate_irq(d, false);
++	return 0;
++}
++
++int irq_activate_and_startup(struct irq_desc *desc, bool resend)
++{
++	if (WARN_ON(irq_activate(desc)))
++		return 0;
++	return irq_startup(desc, resend, IRQ_START_FORCE);
++}
++
++static void __irq_disable(struct irq_desc *desc, bool mask);
++
++void irq_shutdown(struct irq_desc *desc)
++{
++	if (irqd_is_started(&desc->irq_data)) {
++		desc->depth = 1;
++		if (desc->irq_data.chip->irq_shutdown) {
++			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
++			irq_state_set_disabled(desc);
++			irq_state_set_masked(desc);
++		} else {
++			__irq_disable(desc, true);
++		}
++		irq_state_clr_started(desc);
++	}
++}
++
++
++void irq_shutdown_and_deactivate(struct irq_desc *desc)
++{
++	irq_shutdown(desc);
++	/*
++	 * This must be called even if the interrupt was never started up,
++	 * because the activation can happen before the interrupt is
++	 * available for request/startup. It has it's own state tracking so
++	 * it's safe to call it unconditionally.
++	 */
++	irq_domain_deactivate_irq(&desc->irq_data);
++}
++
++void irq_enable(struct irq_desc *desc)
++{
++	if (!irqd_irq_disabled(&desc->irq_data)) {
++		unmask_irq(desc);
++	} else {
++		irq_state_clr_disabled(desc);
++		if (desc->irq_data.chip->irq_enable) {
++			desc->irq_data.chip->irq_enable(&desc->irq_data);
++			irq_state_clr_masked(desc);
++		} else {
++			unmask_irq(desc);
++		}
++	}
++}
++
++static void __irq_disable(struct irq_desc *desc, bool mask)
++{
++	if (irqd_irq_disabled(&desc->irq_data)) {
++		if (mask)
++			mask_irq(desc);
++	} else {
++		irq_state_set_disabled(desc);
++		if (desc->irq_data.chip->irq_disable) {
++			desc->irq_data.chip->irq_disable(&desc->irq_data);
++			irq_state_set_masked(desc);
++		} else if (mask) {
++			mask_irq(desc);
++		}
++	}
++}
++
++/**
++ * irq_disable - Mark interrupt disabled
++ * @desc:	irq descriptor which should be disabled
++ *
++ * If the chip does not implement the irq_disable callback, we
++ * use a lazy disable approach. That means we mark the interrupt
++ * disabled, but leave the hardware unmasked. That's an
++ * optimization because we avoid the hardware access for the
++ * common case where no interrupt happens after we marked it
++ * disabled. If an interrupt happens, then the interrupt flow
++ * handler masks the line at the hardware level and marks it
++ * pending.
++ *
++ * If the interrupt chip does not implement the irq_disable callback,
++ * a driver can disable the lazy approach for a particular irq line by
++ * calling 'irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY)'. This can
++ * be used for devices which cannot disable the interrupt at the
++ * device level under certain circumstances and have to use
++ * disable_irq[_nosync] instead.
++ */
++void irq_disable(struct irq_desc *desc)
++{
++	__irq_disable(desc, irq_settings_disable_unlazy(desc));
++}
++
++void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
++{
++	if (desc->irq_data.chip->irq_enable)
++		desc->irq_data.chip->irq_enable(&desc->irq_data);
++	else
++		desc->irq_data.chip->irq_unmask(&desc->irq_data);
++	cpumask_set_cpu(cpu, desc->percpu_enabled);
++}
++
++void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
++{
++	if (desc->irq_data.chip->irq_disable)
++		desc->irq_data.chip->irq_disable(&desc->irq_data);
++	else
++		desc->irq_data.chip->irq_mask(&desc->irq_data);
++	cpumask_clear_cpu(cpu, desc->percpu_enabled);
++}
++
++static inline void mask_ack_irq(struct irq_desc *desc)
++{
++	if (desc->irq_data.chip->irq_mask_ack) {
++		desc->irq_data.chip->irq_mask_ack(&desc->irq_data);
++		irq_state_set_masked(desc);
++	} else {
++		mask_irq(desc);
++		if (desc->irq_data.chip->irq_ack)
++			desc->irq_data.chip->irq_ack(&desc->irq_data);
++	}
++}
++
++void mask_irq(struct irq_desc *desc)
++{
++	if (irqd_irq_masked(&desc->irq_data))
++		return;
++
++	if (desc->irq_data.chip->irq_mask) {
++		desc->irq_data.chip->irq_mask(&desc->irq_data);
++		irq_state_set_masked(desc);
++	}
++}
++
++void unmask_irq(struct irq_desc *desc)
++{
++	if (!irqd_irq_masked(&desc->irq_data))
++		return;
++
++	if (desc->irq_data.chip->irq_unmask) {
++		desc->irq_data.chip->irq_unmask(&desc->irq_data);
++		irq_state_clr_masked(desc);
++	}
++}
++
++void unmask_threaded_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = desc->irq_data.chip;
++
++	if (chip->flags & IRQCHIP_EOI_THREADED)
++		chip->irq_eoi(&desc->irq_data);
++
++	unmask_irq(desc);
++}
++
++/*
++ *	handle_nested_irq - Handle a nested irq from a irq thread
++ *	@irq:	the interrupt number
++ *
++ *	Handle interrupts which are nested into a threaded interrupt
++ *	handler. The handler function is called inside the calling
++ *	threads context.
++ */
++void handle_nested_irq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irqaction *action;
++	irqreturn_t action_ret;
++
++	might_sleep();
++
++	raw_spin_lock_irq(&desc->lock);
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	action = desc->action;
++	if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		goto out_unlock;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
++	raw_spin_unlock_irq(&desc->lock);
++
++	action_ret = IRQ_NONE;
++	for_each_action_of_desc(desc, action)
++		action_ret |= action->thread_fn(action->irq, action->dev_id);
++
++	if (!noirqdebug)
++		note_interrupt(desc, action_ret);
++
++	raw_spin_lock_irq(&desc->lock);
++	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
++
++out_unlock:
++	raw_spin_unlock_irq(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_nested_irq);
++
++static bool irq_check_poll(struct irq_desc *desc)
++{
++	if (!(desc->istate & IRQS_POLL_INPROGRESS))
++		return false;
++	return irq_wait_for_poll(desc);
++}
++
++static bool irq_may_run(struct irq_desc *desc)
++{
++	unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
++
++	/*
++	 * If the interrupt is not in progress and is not an armed
++	 * wakeup interrupt, proceed.
++	 */
++	if (!irqd_has_set(&desc->irq_data, mask))
++		return true;
++
++	/*
++	 * If the interrupt is an armed wakeup source, mark it pending
++	 * and suspended, disable it and notify the pm core about the
++	 * event.
++	 */
++	if (irq_pm_check_wakeup(desc))
++		return false;
++
++	/*
++	 * Handle a potential concurrent poll on a different core.
++	 */
++	return irq_check_poll(desc);
++}
++
++/**
++ *	handle_simple_irq - Simple and software-decoded IRQs.
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Simple interrupts are either sent from a demultiplexing interrupt
++ *	handler or come from hardware, where no interrupt hardware control
++ *	is necessary.
++ *
++ *	Note: The caller is expected to handle the ack, clear, mask and
++ *	unmask issues if necessary.
++ */
++void handle_simple_irq(struct irq_desc *desc)
++{
++	raw_spin_lock(&desc->lock);
++
++	if (!irq_may_run(desc))
++		goto out_unlock;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		goto out_unlock;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	handle_irq_event(desc);
++
++out_unlock:
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_simple_irq);
++
++/**
++ *	handle_untracked_irq - Simple and software-decoded IRQs.
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Untracked interrupts are sent from a demultiplexing interrupt
++ *	handler when the demultiplexer does not know which device it its
++ *	multiplexed irq domain generated the interrupt. IRQ's handled
++ *	through here are not subjected to stats tracking, randomness, or
++ *	spurious interrupt detection.
++ *
++ *	Note: Like handle_simple_irq, the caller is expected to handle
++ *	the ack, clear, mask and unmask issues if necessary.
++ */
++void handle_untracked_irq(struct irq_desc *desc)
++{
++	unsigned int flags = 0;
++
++	raw_spin_lock(&desc->lock);
++
++	if (!irq_may_run(desc))
++		goto out_unlock;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		goto out_unlock;
++	}
++
++	desc->istate &= ~IRQS_PENDING;
++	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
++	raw_spin_unlock(&desc->lock);
++
++	__handle_irq_event_percpu(desc, &flags);
++
++	raw_spin_lock(&desc->lock);
++	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
++
++out_unlock:
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_untracked_irq);
++
++/*
++ * Called unconditionally from handle_level_irq() and only for oneshot
++ * interrupts from handle_fasteoi_irq()
++ */
++static void cond_unmask_irq(struct irq_desc *desc)
++{
++	/*
++	 * We need to unmask in the following cases:
++	 * - Standard level irq (IRQF_ONESHOT is not set)
++	 * - Oneshot irq which did not wake the thread (caused by a
++	 *   spurious interrupt or a primary handler handling it
++	 *   completely).
++	 */
++	if (!irqd_irq_disabled(&desc->irq_data) &&
++	    irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot)
++		unmask_irq(desc);
++}
++
++/**
++ *	handle_level_irq - Level type irq handler
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Level type interrupts are active as long as the hardware line has
++ *	the active level. This may require to mask the interrupt and unmask
++ *	it after the associated handler has acknowledged the device, so the
++ *	interrupt line is back to inactive.
++ */
++void handle_level_irq(struct irq_desc *desc)
++{
++	raw_spin_lock(&desc->lock);
++	mask_ack_irq(desc);
++
++	if (!irq_may_run(desc))
++		goto out_unlock;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	/*
++	 * If its disabled or no action available
++	 * keep it masked and get out of here
++	 */
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		goto out_unlock;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	handle_irq_event(desc);
++
++	cond_unmask_irq(desc);
++
++out_unlock:
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_level_irq);
++
++#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
++static inline void preflow_handler(struct irq_desc *desc)
++{
++	if (desc->preflow_handler)
++		desc->preflow_handler(&desc->irq_data);
++}
++#else
++static inline void preflow_handler(struct irq_desc *desc) { }
++#endif
++
++static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
++{
++	if (!(desc->istate & IRQS_ONESHOT)) {
++		chip->irq_eoi(&desc->irq_data);
++		return;
++	}
++	/*
++	 * We need to unmask in the following cases:
++	 * - Oneshot irq which did not wake the thread (caused by a
++	 *   spurious interrupt or a primary handler handling it
++	 *   completely).
++	 */
++	if (!irqd_irq_disabled(&desc->irq_data) &&
++	    irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) {
++		chip->irq_eoi(&desc->irq_data);
++		unmask_irq(desc);
++	} else if (!(chip->flags & IRQCHIP_EOI_THREADED)) {
++		chip->irq_eoi(&desc->irq_data);
++	}
++}
++
++/**
++ *	handle_fasteoi_irq - irq handler for transparent controllers
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Only a single callback will be issued to the chip: an ->eoi()
++ *	call when the interrupt has been serviced. This enables support
++ *	for modern forms of interrupt handlers, which handle the flow
++ *	details in hardware, transparently.
++ */
++void handle_fasteoi_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = desc->irq_data.chip;
++
++	raw_spin_lock(&desc->lock);
++
++	if (!irq_may_run(desc))
++		goto out;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	/*
++	 * If its disabled or no action available
++	 * then mask it and get out of here:
++	 */
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		mask_irq(desc);
++		goto out;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	if (desc->istate & IRQS_ONESHOT)
++		mask_irq(desc);
++
++	preflow_handler(desc);
++	handle_irq_event(desc);
++
++	cond_unmask_eoi_irq(desc, chip);
++
++	raw_spin_unlock(&desc->lock);
++	return;
++out:
++	if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
++		chip->irq_eoi(&desc->irq_data);
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
++
++/**
++ *	handle_fasteoi_nmi - irq handler for NMI interrupt lines
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	A simple NMI-safe handler, considering the restrictions
++ *	from request_nmi.
++ *
++ *	Only a single callback will be issued to the chip: an ->eoi()
++ *	call when the interrupt has been serviced. This enables support
++ *	for modern forms of interrupt handlers, which handle the flow
++ *	details in hardware, transparently.
++ */
++void handle_fasteoi_nmi(struct irq_desc *desc)
++{
++	struct irq_chip *chip = irq_desc_get_chip(desc);
++	struct irqaction *action = desc->action;
++	unsigned int irq = irq_desc_get_irq(desc);
++	irqreturn_t res;
++
++	this_cpu_inc(*desc->kstat_irqs);
++	trace_irq_handler_entry(irq, action);
++	/*
++	 * NMIs cannot be shared, there is only one action.
++	 */
++	res = action->handler(irq, action->dev_id);
++	trace_irq_handler_exit(irq, action, res);
++
++	if (chip->irq_eoi)
++		chip->irq_eoi(&desc->irq_data);
++}
++EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);
++
++/**
++ *	handle_edge_irq - edge type IRQ handler
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Interrupt occures on the falling and/or rising edge of a hardware
++ *	signal. The occurrence is latched into the irq controller hardware
++ *	and must be acked in order to be reenabled. After the ack another
++ *	interrupt can happen on the same source even before the first one
++ *	is handled by the associated event handler. If this happens it
++ *	might be necessary to disable (mask) the interrupt depending on the
++ *	controller hardware. This requires to reenable the interrupt inside
++ *	of the loop which handles the interrupts which have arrived while
++ *	the handler was running. If all pending interrupts are handled, the
++ *	loop is left.
++ */
++void handle_edge_irq(struct irq_desc *desc)
++{
++	raw_spin_lock(&desc->lock);
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	if (!irq_may_run(desc)) {
++		desc->istate |= IRQS_PENDING;
++		mask_ack_irq(desc);
++		goto out_unlock;
++	}
++
++	/*
++	 * If its disabled or no action available then mask it and get
++	 * out of here.
++	 */
++	if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
++		desc->istate |= IRQS_PENDING;
++		mask_ack_irq(desc);
++		goto out_unlock;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++
++	/* Start handling the irq */
++	desc->irq_data.chip->irq_ack(&desc->irq_data);
++
++	do {
++		if (unlikely(!desc->action)) {
++			mask_irq(desc);
++			goto out_unlock;
++		}
++
++		/*
++		 * When another irq arrived while we were handling
++		 * one, we could have masked the irq.
++		 * Renable it, if it was not disabled in meantime.
++		 */
++		if (unlikely(desc->istate & IRQS_PENDING)) {
++			if (!irqd_irq_disabled(&desc->irq_data) &&
++			    irqd_irq_masked(&desc->irq_data))
++				unmask_irq(desc);
++		}
++
++		handle_irq_event(desc);
++
++	} while ((desc->istate & IRQS_PENDING) &&
++		 !irqd_irq_disabled(&desc->irq_data));
++
++out_unlock:
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL(handle_edge_irq);
++
++#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
++/**
++ *	handle_edge_eoi_irq - edge eoi type IRQ handler
++ *	@desc:	the interrupt description structure for this irq
++ *
++ * Similar as the above handle_edge_irq, but using eoi and w/o the
++ * mask/unmask logic.
++ */
++void handle_edge_eoi_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = irq_desc_get_chip(desc);
++
++	raw_spin_lock(&desc->lock);
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	if (!irq_may_run(desc)) {
++		desc->istate |= IRQS_PENDING;
++		goto out_eoi;
++	}
++
++	/*
++	 * If its disabled or no action available then mask it and get
++	 * out of here.
++	 */
++	if (irqd_irq_disabled(&desc->irq_data) || !desc->action) {
++		desc->istate |= IRQS_PENDING;
++		goto out_eoi;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++
++	do {
++		if (unlikely(!desc->action))
++			goto out_eoi;
++
++		handle_irq_event(desc);
++
++	} while ((desc->istate & IRQS_PENDING) &&
++		 !irqd_irq_disabled(&desc->irq_data));
++
++out_eoi:
++	chip->irq_eoi(&desc->irq_data);
++	raw_spin_unlock(&desc->lock);
++}
++#endif
++
++/**
++ *	handle_percpu_irq - Per CPU local irq handler
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Per CPU interrupts on SMP machines without locking requirements
++ */
++void handle_percpu_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = irq_desc_get_chip(desc);
++
++	/*
++	 * PER CPU interrupts are not serialized. Do not touch
++	 * desc->tot_count.
++	 */
++	__kstat_incr_irqs_this_cpu(desc);
++
++	if (chip->irq_ack)
++		chip->irq_ack(&desc->irq_data);
++
++	handle_irq_event_percpu(desc);
++
++	if (chip->irq_eoi)
++		chip->irq_eoi(&desc->irq_data);
++}
++
++/**
++ * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
++ * @desc:	the interrupt description structure for this irq
++ *
++ * Per CPU interrupts on SMP machines without locking requirements. Same as
++ * handle_percpu_irq() above but with the following extras:
++ *
++ * action->percpu_dev_id is a pointer to percpu variables which
++ * contain the real device id for the cpu on which this handler is
++ * called
++ */
++void handle_percpu_devid_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = irq_desc_get_chip(desc);
++	struct irqaction *action = desc->action;
++	unsigned int irq = irq_desc_get_irq(desc);
++	irqreturn_t res;
++
++	/*
++	 * PER CPU interrupts are not serialized. Do not touch
++	 * desc->tot_count.
++	 */
++	__kstat_incr_irqs_this_cpu(desc);
++
++	if (chip->irq_ack)
++		chip->irq_ack(&desc->irq_data);
++
++	if (likely(action)) {
++		trace_irq_handler_entry(irq, action);
++		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
++		trace_irq_handler_exit(irq, action, res);
++	} else {
++		unsigned int cpu = smp_processor_id();
++		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
++
++		if (enabled)
++			irq_percpu_disable(desc, cpu);
++
++		pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
++			    enabled ? " and unmasked" : "", irq, cpu);
++	}
++
++	if (chip->irq_eoi)
++		chip->irq_eoi(&desc->irq_data);
++}
++
++/**
++ * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu
++ *				     dev ids
++ * @desc:	the interrupt description structure for this irq
++ *
++ * Similar to handle_fasteoi_nmi, but handling the dev_id cookie
++ * as a percpu pointer.
++ */
++void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
++{
++	struct irq_chip *chip = irq_desc_get_chip(desc);
++	struct irqaction *action = desc->action;
++	unsigned int irq = irq_desc_get_irq(desc);
++	irqreturn_t res;
++
++	this_cpu_inc(*desc->kstat_irqs);
++	trace_irq_handler_entry(irq, action);
++	res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
++	trace_irq_handler_exit(irq, action, res);
++
++	if (chip->irq_eoi)
++		chip->irq_eoi(&desc->irq_data);
++}
++
++static void
++__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
++		     int is_chained, const char *name)
++{
++	if (!handle) {
++		handle = handle_bad_irq;
++	} else {
++		struct irq_data *irq_data = &desc->irq_data;
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++		/*
++		 * With hierarchical domains we might run into a
++		 * situation where the outermost chip is not yet set
++		 * up, but the inner chips are there.  Instead of
++		 * bailing we install the handler, but obviously we
++		 * cannot enable/startup the interrupt at this point.
++		 */
++		while (irq_data) {
++			if (irq_data->chip != &no_irq_chip)
++				break;
++			/*
++			 * Bail out if the outer chip is not set up
++			 * and the interrrupt supposed to be started
++			 * right away.
++			 */
++			if (WARN_ON(is_chained))
++				return;
++			/* Try the parent */
++			irq_data = irq_data->parent_data;
++		}
++#endif
++		if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip))
++			return;
++	}
++
++	/* Uninstall? */
++	if (handle == handle_bad_irq) {
++		if (desc->irq_data.chip != &no_irq_chip)
++			mask_ack_irq(desc);
++		irq_state_set_disabled(desc);
++		if (is_chained)
++			desc->action = NULL;
++		desc->depth = 1;
++	}
++	desc->handle_irq = handle;
++	desc->name = name;
++
++	if (handle != handle_bad_irq && is_chained) {
++		unsigned int type = irqd_get_trigger_type(&desc->irq_data);
++
++		/*
++		 * We're about to start this interrupt immediately,
++		 * hence the need to set the trigger configuration.
++		 * But the .set_type callback may have overridden the
++		 * flow handler, ignoring that we're dealing with a
++		 * chained interrupt. Reset it immediately because we
++		 * do know better.
++		 */
++		if (type != IRQ_TYPE_NONE) {
++			__irq_set_trigger(desc, type);
++			desc->handle_irq = handle;
++		}
++
++		irq_settings_set_noprobe(desc);
++		irq_settings_set_norequest(desc);
++		irq_settings_set_nothread(desc);
++		desc->action = &chained_action;
++		irq_activate_and_startup(desc, IRQ_RESEND);
++	}
++}
++
++void
++__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
++		  const char *name)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);
++
++	if (!desc)
++		return;
++
++	__irq_do_set_handler(desc, handle, is_chained, name);
++	irq_put_desc_busunlock(desc, flags);
++}
++EXPORT_SYMBOL_GPL(__irq_set_handler);
++
++void
++irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
++				 void *data)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0);
++
++	if (!desc)
++		return;
++
++	desc->irq_common_data.handler_data = data;
++	__irq_do_set_handler(desc, handle, 1, NULL);
++
++	irq_put_desc_busunlock(desc, flags);
++}
++EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
++
++void
++irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
++			      irq_flow_handler_t handle, const char *name)
++{
++	irq_set_chip(irq, chip);
++	__irq_set_handler(irq, handle, 0, name);
++}
++EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name);
++
++void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
++{
++	unsigned long flags, trigger, tmp;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++
++	if (!desc)
++		return;
++
++	/*
++	 * Warn when a driver sets the no autoenable flag on an already
++	 * active interrupt.
++	 */
++	WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));
++
++	irq_settings_clr_and_set(desc, clr, set);
++
++	trigger = irqd_get_trigger_type(&desc->irq_data);
++
++	irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
++		   IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
++	if (irq_settings_has_no_balance_set(desc))
++		irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
++	if (irq_settings_is_per_cpu(desc))
++		irqd_set(&desc->irq_data, IRQD_PER_CPU);
++	if (irq_settings_can_move_pcntxt(desc))
++		irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
++	if (irq_settings_is_level(desc))
++		irqd_set(&desc->irq_data, IRQD_LEVEL);
++
++	tmp = irq_settings_get_trigger_mask(desc);
++	if (tmp != IRQ_TYPE_NONE)
++		trigger = tmp;
++
++	irqd_set(&desc->irq_data, trigger);
++
++	irq_put_desc_unlock(desc, flags);
++}
++EXPORT_SYMBOL_GPL(irq_modify_status);
++
++/**
++ *	irq_cpu_online - Invoke all irq_cpu_online functions.
++ *
++ *	Iterate through all irqs and invoke the chip.irq_cpu_online()
++ *	for each.
++ */
++void irq_cpu_online(void)
++{
++	struct irq_desc *desc;
++	struct irq_chip *chip;
++	unsigned long flags;
++	unsigned int irq;
++
++	for_each_active_irq(irq) {
++		desc = irq_to_desc(irq);
++		if (!desc)
++			continue;
++
++		raw_spin_lock_irqsave(&desc->lock, flags);
++
++		chip = irq_data_get_irq_chip(&desc->irq_data);
++		if (chip && chip->irq_cpu_online &&
++		    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
++		     !irqd_irq_disabled(&desc->irq_data)))
++			chip->irq_cpu_online(&desc->irq_data);
++
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++	}
++}
++
++/**
++ *	irq_cpu_offline - Invoke all irq_cpu_offline functions.
++ *
++ *	Iterate through all irqs and invoke the chip.irq_cpu_offline()
++ *	for each.
++ */
++void irq_cpu_offline(void)
++{
++	struct irq_desc *desc;
++	struct irq_chip *chip;
++	unsigned long flags;
++	unsigned int irq;
++
++	for_each_active_irq(irq) {
++		desc = irq_to_desc(irq);
++		if (!desc)
++			continue;
++
++		raw_spin_lock_irqsave(&desc->lock, flags);
++
++		chip = irq_data_get_irq_chip(&desc->irq_data);
++		if (chip && chip->irq_cpu_offline &&
++		    (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
++		     !irqd_irq_disabled(&desc->irq_data)))
++			chip->irq_cpu_offline(&desc->irq_data);
++
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++	}
++}
++
++#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
++
++#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS
++/**
++ *	handle_fasteoi_ack_irq - irq handler for edge hierarchy
++ *	stacked on transparent controllers
++ *
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Like handle_fasteoi_irq(), but for use with hierarchy where
++ *	the irq_chip also needs to have its ->irq_ack() function
++ *	called.
++ */
++void handle_fasteoi_ack_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = desc->irq_data.chip;
++
++	raw_spin_lock(&desc->lock);
++
++	if (!irq_may_run(desc))
++		goto out;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	/*
++	 * If its disabled or no action available
++	 * then mask it and get out of here:
++	 */
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		mask_irq(desc);
++		goto out;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	if (desc->istate & IRQS_ONESHOT)
++		mask_irq(desc);
++
++	/* Start handling the irq */
++	desc->irq_data.chip->irq_ack(&desc->irq_data);
++
++	preflow_handler(desc);
++	handle_irq_event(desc);
++
++	cond_unmask_eoi_irq(desc, chip);
++
++	raw_spin_unlock(&desc->lock);
++	return;
++out:
++	if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
++		chip->irq_eoi(&desc->irq_data);
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq);
++
++/**
++ *	handle_fasteoi_mask_irq - irq handler for level hierarchy
++ *	stacked on transparent controllers
++ *
++ *	@desc:	the interrupt description structure for this irq
++ *
++ *	Like handle_fasteoi_irq(), but for use with hierarchy where
++ *	the irq_chip also needs to have its ->irq_mask_ack() function
++ *	called.
++ */
++void handle_fasteoi_mask_irq(struct irq_desc *desc)
++{
++	struct irq_chip *chip = desc->irq_data.chip;
++
++	raw_spin_lock(&desc->lock);
++	mask_ack_irq(desc);
++
++	if (!irq_may_run(desc))
++		goto out;
++
++	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
++
++	/*
++	 * If its disabled or no action available
++	 * then mask it and get out of here:
++	 */
++	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
++		desc->istate |= IRQS_PENDING;
++		mask_irq(desc);
++		goto out;
++	}
++
++	kstat_incr_irqs_this_cpu(desc);
++	if (desc->istate & IRQS_ONESHOT)
++		mask_irq(desc);
++
++	preflow_handler(desc);
++	handle_irq_event(desc);
++
++	cond_unmask_eoi_irq(desc, chip);
++
++	raw_spin_unlock(&desc->lock);
++	return;
++out:
++	if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
++		chip->irq_eoi(&desc->irq_data);
++	raw_spin_unlock(&desc->lock);
++}
++EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
++
++#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */
++
++/**
++ * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if
++ * NULL)
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_enable_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	if (data->chip->irq_enable)
++		data->chip->irq_enable(data);
++	else
++		data->chip->irq_unmask(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_enable_parent);
++
++/**
++ * irq_chip_disable_parent - Disable the parent interrupt (defaults to mask if
++ * NULL)
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_disable_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	if (data->chip->irq_disable)
++		data->chip->irq_disable(data);
++	else
++		data->chip->irq_mask(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_disable_parent);
++
++/**
++ * irq_chip_ack_parent - Acknowledge the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_ack_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_ack(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_ack_parent);
++
++/**
++ * irq_chip_mask_parent - Mask the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_mask_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_mask(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
++
++/**
++ * irq_chip_unmask_parent - Unmask the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_unmask_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_unmask(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_unmask_parent);
++
++/**
++ * irq_chip_eoi_parent - Invoke EOI on the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ */
++void irq_chip_eoi_parent(struct irq_data *data)
++{
++	data = data->parent_data;
++	data->chip->irq_eoi(data);
++}
++EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);
++
++/**
++ * irq_chip_set_affinity_parent - Set affinity on the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ * @dest:	The affinity mask to set
++ * @force:	Flag to enforce setting (disable online checks)
++ *
++ * Conditinal, as the underlying parent chip might not implement it.
++ */
++int irq_chip_set_affinity_parent(struct irq_data *data,
++				 const struct cpumask *dest, bool force)
++{
++	data = data->parent_data;
++	if (data->chip->irq_set_affinity)
++		return data->chip->irq_set_affinity(data, dest, force);
++
++	return -ENOSYS;
++}
++EXPORT_SYMBOL_GPL(irq_chip_set_affinity_parent);
++
++/**
++ * irq_chip_set_type_parent - Set IRQ type on the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ * @type:	IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
++ *
++ * Conditional, as the underlying parent chip might not implement it.
++ */
++int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
++{
++	data = data->parent_data;
++
++	if (data->chip->irq_set_type)
++		return data->chip->irq_set_type(data, type);
++
++	return -ENOSYS;
++}
++EXPORT_SYMBOL_GPL(irq_chip_set_type_parent);
++
++/**
++ * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
++ * @data:	Pointer to interrupt specific data
++ *
++ * Iterate through the domain hierarchy of the interrupt and check
++ * whether a hw retrigger function exists. If yes, invoke it.
++ */
++int irq_chip_retrigger_hierarchy(struct irq_data *data)
++{
++	for (data = data->parent_data; data; data = data->parent_data)
++		if (data->chip && data->chip->irq_retrigger)
++			return data->chip->irq_retrigger(data);
++
++	return 0;
++}
++
++/**
++ * irq_chip_set_vcpu_affinity_parent - Set vcpu affinity on the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ * @vcpu_info:	The vcpu affinity information
++ */
++int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
++{
++	data = data->parent_data;
++	if (data->chip->irq_set_vcpu_affinity)
++		return data->chip->irq_set_vcpu_affinity(data, vcpu_info);
++
++	return -ENOSYS;
++}
++
++/**
++ * irq_chip_set_wake_parent - Set/reset wake-up on the parent interrupt
++ * @data:	Pointer to interrupt specific data
++ * @on:		Whether to set or reset the wake-up capability of this irq
++ *
++ * Conditional, as the underlying parent chip might not implement it.
++ */
++int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
++{
++	data = data->parent_data;
++
++	if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
++		return 0;
++
++	if (data->chip->irq_set_wake)
++		return data->chip->irq_set_wake(data, on);
++
++	return -ENOSYS;
++}
++#endif
++
++/**
++ * irq_chip_compose_msi_msg - Componse msi message for a irq chip
++ * @data:	Pointer to interrupt specific data
++ * @msg:	Pointer to the MSI message
++ *
++ * For hierarchical domains we find the first chip in the hierarchy
++ * which implements the irq_compose_msi_msg callback. For non
++ * hierarchical we use the top level chip.
++ */
++int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
++{
++	struct irq_data *pos = NULL;
++
++#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
++	for (; data; data = data->parent_data)
++#endif
++		if (data->chip && data->chip->irq_compose_msi_msg)
++			pos = data;
++	if (!pos)
++		return -ENOSYS;
++
++	pos->chip->irq_compose_msi_msg(pos, msg);
++
++	return 0;
++}
++
++/**
++ * irq_chip_pm_get - Enable power for an IRQ chip
++ * @data:	Pointer to interrupt specific data
++ *
++ * Enable the power to the IRQ chip referenced by the interrupt data
++ * structure.
++ */
++int irq_chip_pm_get(struct irq_data *data)
++{
++	int retval;
++
++	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
++		retval = pm_runtime_get_sync(data->chip->parent_device);
++		if (retval < 0) {
++			pm_runtime_put_noidle(data->chip->parent_device);
++			return retval;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ * irq_chip_pm_put - Disable power for an IRQ chip
++ * @data:	Pointer to interrupt specific data
++ *
++ * Disable the power to the IRQ chip referenced by the interrupt data
++ * structure, belongs. Note that power will only be disabled, once this
++ * function has been called for all IRQs that have called irq_chip_pm_get().
++ */
++int irq_chip_pm_put(struct irq_data *data)
++{
++	int retval = 0;
++
++	if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
++		retval = pm_runtime_put(data->chip->parent_device);
++
++	return (retval < 0) ? retval : 0;
++}
+diff -uprN kernel/kernel/irq/dummychip.c kernel_new/kernel/irq/dummychip.c
+--- kernel/kernel/irq/dummychip.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/dummychip.c	2021-04-01 18:28:07.806863119 +0800
+@@ -43,7 +43,7 @@ struct irq_chip no_irq_chip = {
+ 	.irq_enable	= noop,
+ 	.irq_disable	= noop,
+ 	.irq_ack	= ack_bad,
+-	.flags		= IRQCHIP_SKIP_SET_WAKE,
++	.flags		= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ 
+ /*
+@@ -59,6 +59,6 @@ struct irq_chip dummy_irq_chip = {
+ 	.irq_ack	= noop,
+ 	.irq_mask	= noop,
+ 	.irq_unmask	= noop,
+-	.flags		= IRQCHIP_SKIP_SET_WAKE,
++	.flags		= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_PIPELINE_SAFE,
+ };
+ EXPORT_SYMBOL_GPL(dummy_irq_chip);
+diff -uprN kernel/kernel/irq/generic-chip.c kernel_new/kernel/irq/generic-chip.c
+--- kernel/kernel/irq/generic-chip.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/generic-chip.c	2021-04-01 18:28:07.806863119 +0800
+@@ -37,12 +37,13 @@ void irq_gc_mask_disable_reg(struct irq_
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.disable);
+ 	*ct->mask_cache &= ~mask;
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /**
+@@ -56,12 +57,13 @@ void irq_gc_mask_set_bit(struct irq_data
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	*ct->mask_cache |= mask;
+ 	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit);
+ 
+@@ -76,12 +78,13 @@ void irq_gc_mask_clr_bit(struct irq_data
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	*ct->mask_cache &= ~mask;
+ 	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit);
+ 
+@@ -96,12 +99,13 @@ void irq_gc_unmask_enable_reg(struct irq
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.enable);
+ 	*ct->mask_cache |= mask;
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /**
+@@ -112,11 +116,12 @@ void irq_gc_ack_set_bit(struct irq_data
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.ack);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit);
+ 
+@@ -128,11 +133,12 @@ void irq_gc_ack_clr_bit(struct irq_data
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = ~d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.ack);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /**
+@@ -151,13 +157,14 @@ void irq_gc_mask_disable_and_ack_set(str
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.disable);
+ 	*ct->mask_cache &= ~mask;
+ 	irq_reg_writel(gc, mask, ct->regs.ack);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /**
+@@ -168,11 +175,12 @@ void irq_gc_eoi(struct irq_data *d)
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+ 	struct irq_chip_type *ct = irq_data_get_chip_type(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	irq_reg_writel(gc, mask, ct->regs.eoi);
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ }
+ 
+ /**
+@@ -187,17 +195,18 @@ void irq_gc_eoi(struct irq_data *d)
+ int irq_gc_set_wake(struct irq_data *d, unsigned int on)
+ {
+ 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
++	unsigned long flags;
+ 	u32 mask = d->mask;
+ 
+ 	if (!(mask & gc->wake_enabled))
+ 		return -EINVAL;
+ 
+-	irq_gc_lock(gc);
++	flags = irq_gc_lock(gc);
+ 	if (on)
+ 		gc->wake_active |= mask;
+ 	else
+ 		gc->wake_active &= ~mask;
+-	irq_gc_unlock(gc);
++	irq_gc_unlock(gc, flags);
+ 	return 0;
+ }
+ 
+diff -uprN kernel/kernel/irq/internals.h kernel_new/kernel/irq/internals.h
+--- kernel/kernel/irq/internals.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/internals.h	2021-04-02 09:20:04.679709490 +0800
+@@ -62,6 +62,7 @@ enum {
+ 	IRQS_SUSPENDED		= 0x00000800,
+ 	IRQS_TIMINGS		= 0x00001000,
+ 	IRQS_NMI		= 0x00002000,
++	IPIPE_IRQS_NEEDS_STARTUP= 0x80000000,
+ };
+ 
+ #include "debug.h"
+diff -uprN kernel/kernel/irq/internals.h.orig kernel_new/kernel/irq/internals.h.orig
+--- kernel/kernel/irq/internals.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/irq/internals.h.orig	2021-04-01 18:28:07.806863119 +0800
+@@ -0,0 +1,492 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * IRQ subsystem internal functions and variables:
++ *
++ * Do not ever include this file from anything else than
++ * kernel/irq/. Do not even think about using any information outside
++ * of this file for your non core code.
++ */
++#include <linux/irqdesc.h>
++#include <linux/kernel_stat.h>
++#include <linux/pm_runtime.h>
++#include <linux/sched/clock.h>
++
++#ifdef CONFIG_SPARSE_IRQ
++# define IRQ_BITMAP_BITS	(NR_IRQS + 8196)
++#else
++# define IRQ_BITMAP_BITS	NR_IRQS
++#endif
++
++#define istate core_internal_state__do_not_mess_with_it
++
++extern bool noirqdebug;
++
++extern struct irqaction chained_action;
++
++/*
++ * Bits used by threaded handlers:
++ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
++ * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
++ * IRQTF_AFFINITY  - irq thread is requested to adjust affinity
++ * IRQTF_FORCED_THREAD  - irq action is force threaded
++ */
++enum {
++	IRQTF_RUNTHREAD,
++	IRQTF_WARNED,
++	IRQTF_AFFINITY,
++	IRQTF_FORCED_THREAD,
++};
++
++/*
++ * Bit masks for desc->core_internal_state__do_not_mess_with_it
++ *
++ * IRQS_AUTODETECT		- autodetection in progress
++ * IRQS_SPURIOUS_DISABLED	- was disabled due to spurious interrupt
++ *				  detection
++ * IRQS_POLL_INPROGRESS		- polling in progress
++ * IRQS_ONESHOT			- irq is not unmasked in primary handler
++ * IRQS_REPLAY			- irq is replayed
++ * IRQS_WAITING			- irq is waiting
++ * IRQS_PENDING			- irq is pending and replayed later
++ * IRQS_SUSPENDED		- irq is suspended
++ * IRQS_NMI			- irq line is used to deliver NMIs
++ */
++enum {
++	IRQS_AUTODETECT		= 0x00000001,
++	IRQS_SPURIOUS_DISABLED	= 0x00000002,
++	IRQS_POLL_INPROGRESS	= 0x00000008,
++	IRQS_ONESHOT		= 0x00000020,
++	IRQS_REPLAY		= 0x00000040,
++	IRQS_WAITING		= 0x00000080,
++	IRQS_PENDING		= 0x00000200,
++	IRQS_SUSPENDED		= 0x00000800,
++	IRQS_TIMINGS		= 0x00001000,
++	IRQS_NMI		= 0x00002000,
++};
++
++#include "debug.h"
++#include "settings.h"
++
++extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags);
++extern void __disable_irq(struct irq_desc *desc);
++extern void __enable_irq(struct irq_desc *desc);
++
++#define IRQ_RESEND	true
++#define IRQ_NORESEND	false
++
++#define IRQ_START_FORCE	true
++#define IRQ_START_COND	false
++
++extern int irq_activate(struct irq_desc *desc);
++extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
++extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
++
++extern void irq_shutdown(struct irq_desc *desc);
++extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
++extern void irq_enable(struct irq_desc *desc);
++extern void irq_disable(struct irq_desc *desc);
++extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
++extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu);
++extern void mask_irq(struct irq_desc *desc);
++extern void unmask_irq(struct irq_desc *desc);
++extern void unmask_threaded_irq(struct irq_desc *desc);
++
++#ifdef CONFIG_SPARSE_IRQ
++static inline void irq_mark_irq(unsigned int irq) { }
++#else
++extern void irq_mark_irq(unsigned int irq);
++#endif
++
++extern int __irq_get_irqchip_state(struct irq_data *data,
++				   enum irqchip_irq_state which,
++				   bool *state);
++
++extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
++
++irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
++irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
++irqreturn_t handle_irq_event(struct irq_desc *desc);
++
++/* Resending of interrupts :*/
++void check_irq_resend(struct irq_desc *desc);
++bool irq_wait_for_poll(struct irq_desc *desc);
++void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
++
++#ifdef CONFIG_PROC_FS
++extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
++extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
++extern void register_handler_proc(unsigned int irq, struct irqaction *action);
++extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
++#else
++static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
++static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
++static inline void register_handler_proc(unsigned int irq,
++					 struct irqaction *action) { }
++static inline void unregister_handler_proc(unsigned int irq,
++					   struct irqaction *action) { }
++#endif
++
++extern bool irq_can_set_affinity_usr(unsigned int irq);
++
++extern void irq_set_thread_affinity(struct irq_desc *desc);
++
++extern int irq_do_set_affinity(struct irq_data *data,
++			       const struct cpumask *dest, bool force);
++
++#ifdef CONFIG_SMP
++extern int irq_setup_affinity(struct irq_desc *desc);
++#else
++static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; }
++#endif
++
++/* Inline functions for support of irq chips on slow busses */
++static inline void chip_bus_lock(struct irq_desc *desc)
++{
++	if (unlikely(desc->irq_data.chip->irq_bus_lock))
++		desc->irq_data.chip->irq_bus_lock(&desc->irq_data);
++}
++
++static inline void chip_bus_sync_unlock(struct irq_desc *desc)
++{
++	if (unlikely(desc->irq_data.chip->irq_bus_sync_unlock))
++		desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
++}
++
++#define _IRQ_DESC_CHECK		(1 << 0)
++#define _IRQ_DESC_PERCPU	(1 << 1)
++
++#define IRQ_GET_DESC_CHECK_GLOBAL	(_IRQ_DESC_CHECK)
++#define IRQ_GET_DESC_CHECK_PERCPU	(_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)
++
++#define for_each_action_of_desc(desc, act)			\
++	for (act = desc->action; act; act = act->next)
++
++struct irq_desc *
++__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
++		    unsigned int check);
++void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
++
++static inline struct irq_desc *
++irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check)
++{
++	return __irq_get_desc_lock(irq, flags, true, check);
++}
++
++static inline void
++irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
++{
++	__irq_put_desc_unlock(desc, flags, true);
++}
++
++static inline struct irq_desc *
++irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check)
++{
++	return __irq_get_desc_lock(irq, flags, false, check);
++}
++
++static inline void
++irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
++{
++	__irq_put_desc_unlock(desc, flags, false);
++}
++
++#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
++
++static inline unsigned int irqd_get(struct irq_data *d)
++{
++	return __irqd_to_state(d);
++}
++
++/*
++ * Manipulation functions for irq_data.state
++ */
++static inline void irqd_set_move_pending(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_SETAFFINITY_PENDING;
++}
++
++static inline void irqd_clr_move_pending(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING;
++}
++
++static inline void irqd_set_managed_shutdown(struct irq_data *d)
++{
++	__irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN;
++}
++
++static inline void irqd_clr_managed_shutdown(struct irq_data *d)
++{
++	__irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN;
++}
++
++static inline void irqd_clear(struct irq_data *d, unsigned int mask)
++{
++	__irqd_to_state(d) &= ~mask;
++}
++
++static inline void irqd_set(struct irq_data *d, unsigned int mask)
++{
++	__irqd_to_state(d) |= mask;
++}
++
++static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
++{
++	return __irqd_to_state(d) & mask;
++}
++
++static inline void irq_state_set_disabled(struct irq_desc *desc)
++{
++	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
++}
++
++static inline void irq_state_set_masked(struct irq_desc *desc)
++{
++	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
++}
++
++#undef __irqd_to_state
++
++static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
++{
++	__this_cpu_inc(*desc->kstat_irqs);
++	__this_cpu_inc(kstat.irqs_sum);
++}
++
++static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
++{
++	__kstat_incr_irqs_this_cpu(desc);
++	desc->tot_count++;
++}
++
++static inline int irq_desc_get_node(struct irq_desc *desc)
++{
++	return irq_common_data_get_node(&desc->irq_common_data);
++}
++
++static inline int irq_desc_is_chained(struct irq_desc *desc)
++{
++	return (desc->action && desc->action == &chained_action);
++}
++
++#ifdef CONFIG_PM_SLEEP
++bool irq_pm_check_wakeup(struct irq_desc *desc);
++void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
++void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
++#else
++static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; }
++static inline void
++irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
++static inline void
++irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
++#endif
++
++#ifdef CONFIG_IRQ_TIMINGS
++
++#define IRQ_TIMINGS_SHIFT	5
++#define IRQ_TIMINGS_SIZE	(1 << IRQ_TIMINGS_SHIFT)
++#define IRQ_TIMINGS_MASK	(IRQ_TIMINGS_SIZE - 1)
++
++/**
++ * struct irq_timings - irq timings storing structure
++ * @values: a circular buffer of u64 encoded <timestamp,irq> values
++ * @count: the number of elements in the array
++ */
++struct irq_timings {
++	u64	values[IRQ_TIMINGS_SIZE];
++	int	count;
++};
++
++DECLARE_PER_CPU(struct irq_timings, irq_timings);
++
++extern void irq_timings_free(int irq);
++extern int irq_timings_alloc(int irq);
++
++static inline void irq_remove_timings(struct irq_desc *desc)
++{
++	desc->istate &= ~IRQS_TIMINGS;
++
++	irq_timings_free(irq_desc_get_irq(desc));
++}
++
++static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
++{
++	int irq = irq_desc_get_irq(desc);
++	int ret;
++
++	/*
++	 * We don't need the measurement because the idle code already
++	 * knows the next expiry event.
++	 */
++	if (act->flags & __IRQF_TIMER)
++		return;
++
++	/*
++	 * In case the timing allocation fails, we just want to warn,
++	 * not fail, so letting the system boot anyway.
++	 */
++	ret = irq_timings_alloc(irq);
++	if (ret) {
++		pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
++			irq, ret);
++		return;
++	}
++
++	desc->istate |= IRQS_TIMINGS;
++}
++
++extern void irq_timings_enable(void);
++extern void irq_timings_disable(void);
++
++DECLARE_STATIC_KEY_FALSE(irq_timing_enabled);
++
++/*
++ * The interrupt number and the timestamp are encoded into a single
++ * u64 variable to optimize the size.
++ * 48 bit time stamp and 16 bit IRQ number is way sufficient.
++ *  Who cares an IRQ after 78 hours of idle time?
++ */
++static inline u64 irq_timing_encode(u64 timestamp, int irq)
++{
++	return (timestamp << 16) | irq;
++}
++
++static inline int irq_timing_decode(u64 value, u64 *timestamp)
++{
++	*timestamp = value >> 16;
++	return value & U16_MAX;
++}
++
++/*
++ * The function record_irq_time is only called in one place in the
++ * interrupts handler. We want this function always inline so the code
++ * inside is embedded in the function and the static key branching
++ * code can act at the higher level. Without the explicit
++ * __always_inline we can end up with a function call and a small
++ * overhead in the hotpath for nothing.
++ */
++static __always_inline void record_irq_time(struct irq_desc *desc)
++{
++	if (!static_branch_likely(&irq_timing_enabled))
++		return;
++
++	if (desc->istate & IRQS_TIMINGS) {
++		struct irq_timings *timings = this_cpu_ptr(&irq_timings);
++
++		timings->values[timings->count & IRQ_TIMINGS_MASK] =
++			irq_timing_encode(local_clock(),
++					  irq_desc_get_irq(desc));
++
++		timings->count++;
++	}
++}
++#else
++static inline void irq_remove_timings(struct irq_desc *desc) {}
++static inline void irq_setup_timings(struct irq_desc *desc,
++				     struct irqaction *act) {};
++static inline void record_irq_time(struct irq_desc *desc) {}
++#endif /* CONFIG_IRQ_TIMINGS */
++
++
++#ifdef CONFIG_GENERIC_IRQ_CHIP
++void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
++			   int num_ct, unsigned int irq_base,
++			   void __iomem *reg_base, irq_flow_handler_t handler);
++#else
++static inline void
++irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
++		      int num_ct, unsigned int irq_base,
++		      void __iomem *reg_base, irq_flow_handler_t handler) { }
++#endif /* CONFIG_GENERIC_IRQ_CHIP */
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++static inline bool irq_can_move_pcntxt(struct irq_data *data)
++{
++	return irqd_can_move_in_process_context(data);
++}
++static inline bool irq_move_pending(struct irq_data *data)
++{
++	return irqd_is_setaffinity_pending(data);
++}
++static inline void
++irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
++{
++	cpumask_copy(desc->pending_mask, mask);
++}
++static inline void
++irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
++{
++	cpumask_copy(mask, desc->pending_mask);
++}
++static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
++{
++	return desc->pending_mask;
++}
++bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
++#else /* CONFIG_GENERIC_PENDING_IRQ */
++static inline bool irq_can_move_pcntxt(struct irq_data *data)
++{
++	return true;
++}
++static inline bool irq_move_pending(struct irq_data *data)
++{
++	return false;
++}
++static inline void
++irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
++{
++}
++static inline void
++irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
++{
++}
++static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
++{
++	return NULL;
++}
++static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
++{
++	return false;
++}
++#endif /* !CONFIG_GENERIC_PENDING_IRQ */
++
++#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
++static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
++{
++	irqd_set_activated(data);
++	return 0;
++}
++static inline void irq_domain_deactivate_irq(struct irq_data *data)
++{
++	irqd_clr_activated(data);
++}
++#endif
++
++#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
++#include <linux/debugfs.h>
++
++void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
++static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
++{
++	debugfs_remove(desc->debugfs_file);
++	kfree(desc->dev_name);
++}
++void irq_debugfs_copy_devname(int irq, struct device *dev);
++# ifdef CONFIG_IRQ_DOMAIN
++void irq_domain_debugfs_init(struct dentry *root);
++# else
++static inline void irq_domain_debugfs_init(struct dentry *root)
++{
++}
++# endif
++#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
++static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
++{
++}
++static inline void irq_remove_debugfs_entry(struct irq_desc *d)
++{
++}
++static inline void irq_debugfs_copy_devname(int irq, struct device *dev)
++{
++}
++#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
+diff -uprN kernel/kernel/irq/internals.h.rej kernel_new/kernel/irq/internals.h.rej
+--- kernel/kernel/irq/internals.h.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/irq/internals.h.rej	2021-04-01 18:28:07.806863119 +0800
+@@ -0,0 +1,10 @@
++--- kernel/irq/internals.h	2019-12-18 03:36:04.000000000 +0800
+++++ kernel/irq/internals.h	2021-03-22 09:21:43.218415421 +0800
++@@ -60,6 +60,7 @@ enum {
++ 	IRQS_PENDING		= 0x00000200,
++ 	IRQS_SUSPENDED		= 0x00000800,
++ 	IRQS_TIMINGS		= 0x00001000,
+++	IPIPE_IRQS_NEEDS_STARTUP= 0x80000000,
++ };
++ 
++ #include "debug.h"
+diff -uprN kernel/kernel/irq/irqdesc.c kernel_new/kernel/irq/irqdesc.c
+--- kernel/kernel/irq/irqdesc.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/irqdesc.c	2021-04-01 18:28:07.807863118 +0800
+@@ -125,6 +125,9 @@ static void desc_set_defaults(unsigned i
+ 	for_each_possible_cpu(cpu)
+ 		*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
+ 	desc_smp_init(desc, node, affinity);
++#ifdef CONFIG_IPIPE
++	desc->istate |= IPIPE_IRQS_NEEDS_STARTUP;
++#endif
+ }
+ 
+ int nr_irqs = NR_IRQS;
+@@ -573,11 +576,13 @@ int __init early_irq_init(void)
+ 	return arch_early_irq_init();
+ }
+ 
++#ifndef CONFIG_IPIPE
+ struct irq_desc *irq_to_desc(unsigned int irq)
+ {
+ 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+ }
+ EXPORT_SYMBOL(irq_to_desc);
++#endif /* CONFIG_IPIPE */
+ 
+ static void free_desc(unsigned int irq)
+ {
+diff -uprN kernel/kernel/irq/manage.c kernel_new/kernel/irq/manage.c
+--- kernel/kernel/irq/manage.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/manage.c	2021-04-01 18:28:07.809863116 +0800
+@@ -957,9 +957,14 @@ again:
+ 
+ 	desc->threads_oneshot &= ~action->thread_mask;
+ 
++#ifndef CONFIG_IPIPE
+ 	if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
+ 	    irqd_irq_masked(&desc->irq_data))
+ 		unmask_threaded_irq(desc);
++#else /* CONFIG_IPIPE */
++	if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data))
++		desc->ipipe_end(desc);
++#endif /* CONFIG_IPIPE */
+ 
+ out_unlock:
+ 	raw_spin_unlock_irq(&desc->lock);
+diff -uprN kernel/kernel/irq/manage.c.orig kernel_new/kernel/irq/manage.c.orig
+--- kernel/kernel/irq/manage.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/irq/manage.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2748 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
++ * Copyright (C) 2005-2006 Thomas Gleixner
++ *
++ * This file contains driver APIs to the irq subsystem.
++ */
++
++#define pr_fmt(fmt) "genirq: " fmt
++
++#include <linux/irq.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/random.h>
++#include <linux/interrupt.h>
++#include <linux/irqdomain.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++#include <linux/sched/rt.h>
++#include <linux/sched/task.h>
++#include <uapi/linux/sched/types.h>
++#include <linux/task_work.h>
++
++#include "internals.h"
++
++#ifdef CONFIG_IRQ_FORCED_THREADING
++__read_mostly bool force_irqthreads;
++EXPORT_SYMBOL_GPL(force_irqthreads);
++
++static int __init setup_forced_irqthreads(char *arg)
++{
++	force_irqthreads = true;
++	return 0;
++}
++early_param("threadirqs", setup_forced_irqthreads);
++#endif
++
++static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
++{
++	struct irq_data *irqd = irq_desc_get_irq_data(desc);
++	bool inprogress;
++
++	do {
++		unsigned long flags;
++
++		/*
++		 * Wait until we're out of the critical section.  This might
++		 * give the wrong answer due to the lack of memory barriers.
++		 */
++		while (irqd_irq_inprogress(&desc->irq_data))
++			cpu_relax();
++
++		/* Ok, that indicated we're done: double-check carefully. */
++		raw_spin_lock_irqsave(&desc->lock, flags);
++		inprogress = irqd_irq_inprogress(&desc->irq_data);
++
++		/*
++		 * If requested and supported, check at the chip whether it
++		 * is in flight at the hardware level, i.e. already pending
++		 * in a CPU and waiting for service and acknowledge.
++		 */
++		if (!inprogress && sync_chip) {
++			/*
++			 * Ignore the return code. inprogress is only updated
++			 * when the chip supports it.
++			 */
++			__irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
++						&inprogress);
++		}
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++		/* Oops, that failed? */
++	} while (inprogress);
++}
++
++/**
++ *	synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs)
++ *	@irq: interrupt number to wait for
++ *
++ *	This function waits for any pending hard IRQ handlers for this
++ *	interrupt to complete before returning. If you use this
++ *	function while holding a resource the IRQ handler may need you
++ *	will deadlock. It does not take associated threaded handlers
++ *	into account.
++ *
++ *	Do not use this for shutdown scenarios where you must be sure
++ *	that all parts (hardirq and threaded handler) have completed.
++ *
++ *	Returns: false if a threaded handler is active.
++ *
++ *	This function may be called - with care - from IRQ context.
++ *
++ *	It does not check whether there is an interrupt in flight at the
++ *	hardware level, but not serviced yet, as this might deadlock when
++ *	called with interrupts disabled and the target CPU of the interrupt
++ *	is the current CPU.
++ */
++bool synchronize_hardirq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc) {
++		__synchronize_hardirq(desc, false);
++		return !atomic_read(&desc->threads_active);
++	}
++
++	return true;
++}
++EXPORT_SYMBOL(synchronize_hardirq);
++
++/**
++ *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
++ *	@irq: interrupt number to wait for
++ *
++ *	This function waits for any pending IRQ handlers for this interrupt
++ *	to complete before returning. If you use this function while
++ *	holding a resource the IRQ handler may need you will deadlock.
++ *
++ *	Can only be called from preemptible code as it might sleep when
++ *	an interrupt thread is associated to @irq.
++ *
++ *	It optionally makes sure (when the irq chip supports that method)
++ *	that the interrupt is not pending in any CPU and waiting for
++ *	service.
++ */
++void synchronize_irq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc) {
++		__synchronize_hardirq(desc, true);
++		/*
++		 * We made sure that no hardirq handler is
++		 * running. Now verify that no threaded handlers are
++		 * active.
++		 */
++		wait_event(desc->wait_for_threads,
++			   !atomic_read(&desc->threads_active));
++	}
++}
++EXPORT_SYMBOL(synchronize_irq);
++
++#ifdef CONFIG_SMP
++cpumask_var_t irq_default_affinity;
++
++static bool __irq_can_set_affinity(struct irq_desc *desc)
++{
++	if (!desc || !irqd_can_balance(&desc->irq_data) ||
++	    !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
++		return false;
++	return true;
++}
++
++/**
++ *	irq_can_set_affinity - Check if the affinity of a given irq can be set
++ *	@irq:		Interrupt to check
++ *
++ */
++int irq_can_set_affinity(unsigned int irq)
++{
++	return __irq_can_set_affinity(irq_to_desc(irq));
++}
++
++/**
++ * irq_can_set_affinity_usr - Check if affinity of a irq can be set from user space
++ * @irq:	Interrupt to check
++ *
++ * Like irq_can_set_affinity() above, but additionally checks for the
++ * AFFINITY_MANAGED flag.
++ */
++bool irq_can_set_affinity_usr(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	return __irq_can_set_affinity(desc) &&
++		!irqd_affinity_is_managed(&desc->irq_data);
++}
++
++/**
++ *	irq_set_thread_affinity - Notify irq threads to adjust affinity
++ *	@desc:		irq descriptor which has affitnity changed
++ *
++ *	We just set IRQTF_AFFINITY and delegate the affinity setting
++ *	to the interrupt thread itself. We can not call
++ *	set_cpus_allowed_ptr() here as we hold desc->lock and this
++ *	code can be called from hard interrupt context.
++ */
++void irq_set_thread_affinity(struct irq_desc *desc)
++{
++	struct irqaction *action;
++
++	for_each_action_of_desc(desc, action)
++		if (action->thread)
++			set_bit(IRQTF_AFFINITY, &action->thread_flags);
++}
++
++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
++static void irq_validate_effective_affinity(struct irq_data *data)
++{
++	const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
++	struct irq_chip *chip = irq_data_get_irq_chip(data);
++
++	if (!cpumask_empty(m))
++		return;
++	pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
++		     chip->name, data->irq);
++}
++
++static inline void irq_init_effective_affinity(struct irq_data *data,
++					       const struct cpumask *mask)
++{
++	cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
++}
++#else
++static inline void irq_validate_effective_affinity(struct irq_data *data) { }
++static inline void irq_init_effective_affinity(struct irq_data *data,
++					       const struct cpumask *mask) { }
++#endif
++
++int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
++			bool force)
++{
++	struct irq_desc *desc = irq_data_to_desc(data);
++	struct irq_chip *chip = irq_data_get_irq_chip(data);
++	int ret;
++
++	if (!chip || !chip->irq_set_affinity)
++		return -EINVAL;
++
++	ret = chip->irq_set_affinity(data, mask, force);
++	switch (ret) {
++	case IRQ_SET_MASK_OK:
++	case IRQ_SET_MASK_OK_DONE:
++		cpumask_copy(desc->irq_common_data.affinity, mask);
++	case IRQ_SET_MASK_OK_NOCOPY:
++		irq_validate_effective_affinity(data);
++		irq_set_thread_affinity(desc);
++		ret = 0;
++	}
++
++	return ret;
++}
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++static inline int irq_set_affinity_pending(struct irq_data *data,
++					   const struct cpumask *dest)
++{
++	struct irq_desc *desc = irq_data_to_desc(data);
++
++	irqd_set_move_pending(data);
++	irq_copy_pending(desc, dest);
++	return 0;
++}
++#else
++static inline int irq_set_affinity_pending(struct irq_data *data,
++					   const struct cpumask *dest)
++{
++	return -EBUSY;
++}
++#endif
++
++static int irq_try_set_affinity(struct irq_data *data,
++				const struct cpumask *dest, bool force)
++{
++	int ret = irq_do_set_affinity(data, dest, force);
++
++	/*
++	 * In case that the underlying vector management is busy and the
++	 * architecture supports the generic pending mechanism then utilize
++	 * this to avoid returning an error to user space.
++	 */
++	if (ret == -EBUSY && !force)
++		ret = irq_set_affinity_pending(data, dest);
++	return ret;
++}
++
++static bool irq_set_affinity_deactivated(struct irq_data *data,
++					 const struct cpumask *mask, bool force)
++{
++	struct irq_desc *desc = irq_data_to_desc(data);
++
++	/*
++	 * Handle irq chips which can handle affinity only in activated
++	 * state correctly
++	 *
++	 * If the interrupt is not yet activated, just store the affinity
++	 * mask and do not call the chip driver at all. On activation the
++	 * driver has to make sure anyway that the interrupt is in a
++	 * useable state so startup works.
++	 */
++	if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
++	    irqd_is_activated(data) || !irqd_affinity_on_activate(data))
++		return false;
++
++	cpumask_copy(desc->irq_common_data.affinity, mask);
++	irq_init_effective_affinity(data, mask);
++	irqd_set(data, IRQD_AFFINITY_SET);
++	return true;
++}
++
++int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
++			    bool force)
++{
++	struct irq_chip *chip = irq_data_get_irq_chip(data);
++	struct irq_desc *desc = irq_data_to_desc(data);
++	int ret = 0;
++
++	if (!chip || !chip->irq_set_affinity)
++		return -EINVAL;
++
++	if (irq_set_affinity_deactivated(data, mask, force))
++		return 0;
++
++	if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
++		ret = irq_try_set_affinity(data, mask, force);
++	} else {
++		irqd_set_move_pending(data);
++		irq_copy_pending(desc, mask);
++	}
++
++	if (desc->affinity_notify) {
++		kref_get(&desc->affinity_notify->kref);
++		if (!schedule_work(&desc->affinity_notify->work)) {
++			/* Work was already scheduled, drop our extra ref */
++			kref_put(&desc->affinity_notify->kref,
++				 desc->affinity_notify->release);
++		}
++	}
++	irqd_set(data, IRQD_AFFINITY_SET);
++
++	return ret;
++}
++
++int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	unsigned long flags;
++	int ret;
++
++	if (!desc)
++		return -EINVAL;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force);
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++	return ret;
++}
++
++int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++
++	if (!desc)
++		return -EINVAL;
++	desc->affinity_hint = m;
++	irq_put_desc_unlock(desc, flags);
++	/* set the initial affinity to prevent every interrupt being on CPU0 */
++	if (m)
++		__irq_set_affinity(irq, m, false);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
++
++static void irq_affinity_notify(struct work_struct *work)
++{
++	struct irq_affinity_notify *notify =
++		container_of(work, struct irq_affinity_notify, work);
++	struct irq_desc *desc = irq_to_desc(notify->irq);
++	cpumask_var_t cpumask;
++	unsigned long flags;
++
++	if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
++		goto out;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	if (irq_move_pending(&desc->irq_data))
++		irq_get_pending(cpumask, desc);
++	else
++		cpumask_copy(cpumask, desc->irq_common_data.affinity);
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	notify->notify(notify, cpumask);
++
++	free_cpumask_var(cpumask);
++out:
++	kref_put(&notify->kref, notify->release);
++}
++
++/**
++ *	irq_set_affinity_notifier - control notification of IRQ affinity changes
++ *	@irq:		Interrupt for which to enable/disable notification
++ *	@notify:	Context for notification, or %NULL to disable
++ *			notification.  Function pointers must be initialised;
++ *			the other fields will be initialised by this function.
++ *
++ *	Must be called in process context.  Notification may only be enabled
++ *	after the IRQ is allocated and must be disabled before the IRQ is
++ *	freed using free_irq().
++ */
++int
++irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_affinity_notify *old_notify;
++	unsigned long flags;
++
++	/* The release function is promised process context */
++	might_sleep();
++
++	if (!desc || desc->istate & IRQS_NMI)
++		return -EINVAL;
++
++	/* Complete initialisation of *notify */
++	if (notify) {
++		notify->irq = irq;
++		kref_init(&notify->kref);
++		INIT_WORK(&notify->work, irq_affinity_notify);
++	}
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	old_notify = desc->affinity_notify;
++	desc->affinity_notify = notify;
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	if (old_notify) {
++		if (cancel_work_sync(&old_notify->work)) {
++			/* Pending work had a ref, put that one too */
++			kref_put(&old_notify->kref, old_notify->release);
++		}
++		kref_put(&old_notify->kref, old_notify->release);
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
++
++#ifndef CONFIG_AUTO_IRQ_AFFINITY
++/*
++ * Generic version of the affinity autoselector.
++ */
++int irq_setup_affinity(struct irq_desc *desc)
++{
++	struct cpumask *set = irq_default_affinity;
++	int ret, node = irq_desc_get_node(desc);
++	static DEFINE_RAW_SPINLOCK(mask_lock);
++	static struct cpumask mask;
++
++	/* Excludes PER_CPU and NO_BALANCE interrupts */
++	if (!__irq_can_set_affinity(desc))
++		return 0;
++
++	raw_spin_lock(&mask_lock);
++	/*
++	 * Preserve the managed affinity setting and a userspace affinity
++	 * setup, but make sure that one of the targets is online.
++	 */
++	if (irqd_affinity_is_managed(&desc->irq_data) ||
++	    irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
++		if (cpumask_intersects(desc->irq_common_data.affinity,
++				       cpu_online_mask))
++			set = desc->irq_common_data.affinity;
++		else
++			irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
++	}
++
++	cpumask_and(&mask, cpu_online_mask, set);
++	if (cpumask_empty(&mask))
++		cpumask_copy(&mask, cpu_online_mask);
++
++	if (node != NUMA_NO_NODE) {
++		const struct cpumask *nodemask = cpumask_of_node(node);
++
++		/* make sure at least one of the cpus in nodemask is online */
++		if (cpumask_intersects(&mask, nodemask))
++			cpumask_and(&mask, &mask, nodemask);
++	}
++	ret = irq_do_set_affinity(&desc->irq_data, &mask, false);
++	raw_spin_unlock(&mask_lock);
++	return ret;
++}
++#else
++/* Wrapper for ALPHA specific affinity selector magic */
++int irq_setup_affinity(struct irq_desc *desc)
++{
++	return irq_select_affinity(irq_desc_get_irq(desc));
++}
++#endif /* CONFIG_AUTO_IRQ_AFFINITY */
++#endif /* CONFIG_SMP */
++
++
++/**
++ *	irq_set_vcpu_affinity - Set vcpu affinity for the interrupt
++ *	@irq: interrupt number to set affinity
++ *	@vcpu_info: vCPU specific data or pointer to a percpu array of vCPU
++ *	            specific data for percpu_devid interrupts
++ *
++ *	This function uses the vCPU specific data to set the vCPU
++ *	affinity for an irq. The vCPU specific data is passed from
++ *	outside, such as KVM. One example code path is as below:
++ *	KVM -> IOMMU -> irq_set_vcpu_affinity().
++ */
++int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++	struct irq_data *data;
++	struct irq_chip *chip;
++	int ret = -ENOSYS;
++
++	if (!desc)
++		return -EINVAL;
++
++	data = irq_desc_get_irq_data(desc);
++	do {
++		chip = irq_data_get_irq_chip(data);
++		if (chip && chip->irq_set_vcpu_affinity)
++			break;
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++		data = data->parent_data;
++#else
++		data = NULL;
++#endif
++	} while (data);
++
++	if (data)
++		ret = chip->irq_set_vcpu_affinity(data, vcpu_info);
++	irq_put_desc_unlock(desc, flags);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity);
++
++void __disable_irq(struct irq_desc *desc)
++{
++	if (!desc->depth++)
++		irq_disable(desc);
++}
++
++static int __disable_irq_nosync(unsigned int irq)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++
++	if (!desc)
++		return -EINVAL;
++	__disable_irq(desc);
++	irq_put_desc_busunlock(desc, flags);
++	return 0;
++}
++
++/**
++ *	disable_irq_nosync - disable an irq without waiting
++ *	@irq: Interrupt to disable
++ *
++ *	Disable the selected interrupt line.  Disables and Enables are
++ *	nested.
++ *	Unlike disable_irq(), this function does not ensure existing
++ *	instances of the IRQ handler have completed before returning.
++ *
++ *	This function may be called from IRQ context.
++ */
++void disable_irq_nosync(unsigned int irq)
++{
++	__disable_irq_nosync(irq);
++}
++EXPORT_SYMBOL(disable_irq_nosync);
++
++/**
++ *	disable_irq - disable an irq and wait for completion
++ *	@irq: Interrupt to disable
++ *
++ *	Disable the selected interrupt line.  Enables and Disables are
++ *	nested.
++ *	This function waits for any pending IRQ handlers for this interrupt
++ *	to complete before returning. If you use this function while
++ *	holding a resource the IRQ handler may need you will deadlock.
++ *
++ *	This function may be called - with care - from IRQ context.
++ */
++void disable_irq(unsigned int irq)
++{
++	if (!__disable_irq_nosync(irq))
++		synchronize_irq(irq);
++}
++EXPORT_SYMBOL(disable_irq);
++
++/**
++ *	disable_hardirq - disables an irq and waits for hardirq completion
++ *	@irq: Interrupt to disable
++ *
++ *	Disable the selected interrupt line.  Enables and Disables are
++ *	nested.
++ *	This function waits for any pending hard IRQ handlers for this
++ *	interrupt to complete before returning. If you use this function while
++ *	holding a resource the hard IRQ handler may need you will deadlock.
++ *
++ *	When used to optimistically disable an interrupt from atomic context
++ *	the return value must be checked.
++ *
++ *	Returns: false if a threaded handler is active.
++ *
++ *	This function may be called - with care - from IRQ context.
++ */
++bool disable_hardirq(unsigned int irq)
++{
++	if (!__disable_irq_nosync(irq))
++		return synchronize_hardirq(irq);
++
++	return false;
++}
++EXPORT_SYMBOL_GPL(disable_hardirq);
++
++/**
++ *	disable_nmi_nosync - disable an nmi without waiting
++ *	@irq: Interrupt to disable
++ *
++ *	Disable the selected interrupt line. Disables and enables are
++ *	nested.
++ *	The interrupt to disable must have been requested through request_nmi.
++ *	Unlike disable_nmi(), this function does not ensure existing
++ *	instances of the IRQ handler have completed before returning.
++ */
++void disable_nmi_nosync(unsigned int irq)
++{
++	disable_irq_nosync(irq);
++}
++
++void __enable_irq(struct irq_desc *desc)
++{
++	switch (desc->depth) {
++	case 0:
++ err_out:
++		WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n",
++		     irq_desc_get_irq(desc));
++		break;
++	case 1: {
++		if (desc->istate & IRQS_SUSPENDED)
++			goto err_out;
++		/* Prevent probing on this irq: */
++		irq_settings_set_noprobe(desc);
++		/*
++		 * Call irq_startup() not irq_enable() here because the
++		 * interrupt might be marked NOAUTOEN. So irq_startup()
++		 * needs to be invoked when it gets enabled the first
++		 * time. If it was already started up, then irq_startup()
++		 * will invoke irq_enable() under the hood.
++		 */
++		irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
++		break;
++	}
++	default:
++		desc->depth--;
++	}
++}
++
++/**
++ *	enable_irq - enable handling of an irq
++ *	@irq: Interrupt to enable
++ *
++ *	Undoes the effect of one call to disable_irq().  If this
++ *	matches the last disable, processing of interrupts on this
++ *	IRQ line is re-enabled.
++ *
++ *	This function may be called from IRQ context only when
++ *	desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
++ */
++void enable_irq(unsigned int irq)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++
++	if (!desc)
++		return;
++	if (WARN(!desc->irq_data.chip,
++		 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
++		goto out;
++
++	__enable_irq(desc);
++out:
++	irq_put_desc_busunlock(desc, flags);
++}
++EXPORT_SYMBOL(enable_irq);
++
++/**
++ *	enable_nmi - enable handling of an nmi
++ *	@irq: Interrupt to enable
++ *
++ *	The interrupt to enable must have been requested through request_nmi.
++ *	Undoes the effect of one call to disable_nmi(). If this
++ *	matches the last disable, processing of interrupts on this
++ *	IRQ line is re-enabled.
++ */
++void enable_nmi(unsigned int irq)
++{
++	enable_irq(irq);
++}
++
++static int set_irq_wake_real(unsigned int irq, unsigned int on)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	int ret = -ENXIO;
++
++	if (irq_desc_get_chip(desc)->flags &  IRQCHIP_SKIP_SET_WAKE)
++		return 0;
++
++	if (desc->irq_data.chip->irq_set_wake)
++		ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on);
++
++	return ret;
++}
++
++/**
++ *	irq_set_irq_wake - control irq power management wakeup
++ *	@irq:	interrupt to control
++ *	@on:	enable/disable power management wakeup
++ *
++ *	Enable/disable power management wakeup mode, which is
++ *	disabled by default.  Enables and disables must match,
++ *	just as they match for non-wakeup mode support.
++ *
++ *	Wakeup mode lets this IRQ wake the system from sleep
++ *	states like "suspend to RAM".
++ */
++int irq_set_irq_wake(unsigned int irq, unsigned int on)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
++	int ret = 0;
++
++	if (!desc)
++		return -EINVAL;
++
++	/* Don't use NMIs as wake up interrupts please */
++	if (desc->istate & IRQS_NMI) {
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	/* wakeup-capable irqs can be shared between drivers that
++	 * don't need to have the same sleep mode behaviors.
++	 */
++	if (on) {
++		if (desc->wake_depth++ == 0) {
++			ret = set_irq_wake_real(irq, on);
++			if (ret)
++				desc->wake_depth = 0;
++			else
++				irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
++		}
++	} else {
++		if (desc->wake_depth == 0) {
++			WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
++		} else if (--desc->wake_depth == 0) {
++			ret = set_irq_wake_real(irq, on);
++			if (ret)
++				desc->wake_depth = 1;
++			else
++				irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
++		}
++	}
++
++out_unlock:
++	irq_put_desc_busunlock(desc, flags);
++	return ret;
++}
++EXPORT_SYMBOL(irq_set_irq_wake);
++
++/*
++ * Internal function that tells the architecture code whether a
++ * particular irq has been exclusively allocated or is available
++ * for driver use.
++ */
++int can_request_irq(unsigned int irq, unsigned long irqflags)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++	int canrequest = 0;
++
++	if (!desc)
++		return 0;
++
++	if (irq_settings_can_request(desc)) {
++		if (!desc->action ||
++		    irqflags & desc->action->flags & IRQF_SHARED)
++			canrequest = 1;
++	}
++	irq_put_desc_unlock(desc, flags);
++	return canrequest;
++}
++
++int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
++{
++	struct irq_chip *chip = desc->irq_data.chip;
++	int ret, unmask = 0;
++
++	if (!chip || !chip->irq_set_type) {
++		/*
++		 * IRQF_TRIGGER_* but the PIC does not support multiple
++		 * flow-types?
++		 */
++		pr_debug("No set_type function for IRQ %d (%s)\n",
++			 irq_desc_get_irq(desc),
++			 chip ? (chip->name ? : "unknown") : "unknown");
++		return 0;
++	}
++
++	if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
++		if (!irqd_irq_masked(&desc->irq_data))
++			mask_irq(desc);
++		if (!irqd_irq_disabled(&desc->irq_data))
++			unmask = 1;
++	}
++
++	/* Mask all flags except trigger mode */
++	flags &= IRQ_TYPE_SENSE_MASK;
++	ret = chip->irq_set_type(&desc->irq_data, flags);
++
++	switch (ret) {
++	case IRQ_SET_MASK_OK:
++	case IRQ_SET_MASK_OK_DONE:
++		irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
++		irqd_set(&desc->irq_data, flags);
++
++	case IRQ_SET_MASK_OK_NOCOPY:
++		flags = irqd_get_trigger_type(&desc->irq_data);
++		irq_settings_set_trigger_mask(desc, flags);
++		irqd_clear(&desc->irq_data, IRQD_LEVEL);
++		irq_settings_clr_level(desc);
++		if (flags & IRQ_TYPE_LEVEL_MASK) {
++			irq_settings_set_level(desc);
++			irqd_set(&desc->irq_data, IRQD_LEVEL);
++		}
++
++		ret = 0;
++		break;
++	default:
++		pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
++		       flags, irq_desc_get_irq(desc), chip->irq_set_type);
++	}
++	if (unmask)
++		unmask_irq(desc);
++	return ret;
++}
++
++#ifdef CONFIG_HARDIRQS_SW_RESEND
++int irq_set_parent(int irq, int parent_irq)
++{
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
++
++	if (!desc)
++		return -EINVAL;
++
++	desc->parent_irq = parent_irq;
++
++	irq_put_desc_unlock(desc, flags);
++	return 0;
++}
++EXPORT_SYMBOL_GPL(irq_set_parent);
++#endif
++
++/*
++ * Default primary interrupt handler for threaded interrupts. Is
++ * assigned as primary handler when request_threaded_irq is called
++ * with handler == NULL. Useful for oneshot interrupts.
++ */
++static irqreturn_t irq_default_primary_handler(int irq, void *dev_id)
++{
++	return IRQ_WAKE_THREAD;
++}
++
++/*
++ * Primary handler for nested threaded interrupts. Should never be
++ * called.
++ */
++static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
++{
++	WARN(1, "Primary handler called for nested irq %d\n", irq);
++	return IRQ_NONE;
++}
++
++static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
++{
++	WARN(1, "Secondary action handler called for irq %d\n", irq);
++	return IRQ_NONE;
++}
++
++static int irq_wait_for_interrupt(struct irqaction *action)
++{
++	for (;;) {
++		set_current_state(TASK_INTERRUPTIBLE);
++
++		if (kthread_should_stop()) {
++			/* may need to run one last time */
++			if (test_and_clear_bit(IRQTF_RUNTHREAD,
++					       &action->thread_flags)) {
++				__set_current_state(TASK_RUNNING);
++				return 0;
++			}
++			__set_current_state(TASK_RUNNING);
++			return -1;
++		}
++
++		if (test_and_clear_bit(IRQTF_RUNTHREAD,
++				       &action->thread_flags)) {
++			__set_current_state(TASK_RUNNING);
++			return 0;
++		}
++		schedule();
++	}
++}
++
++/*
++ * Oneshot interrupts keep the irq line masked until the threaded
++ * handler finished. unmask if the interrupt has not been disabled and
++ * is marked MASKED.
++ */
++static void irq_finalize_oneshot(struct irq_desc *desc,
++				 struct irqaction *action)
++{
++	if (!(desc->istate & IRQS_ONESHOT) ||
++	    action->handler == irq_forced_secondary_handler)
++		return;
++again:
++	chip_bus_lock(desc);
++	raw_spin_lock_irq(&desc->lock);
++
++	/*
++	 * Implausible though it may be we need to protect us against
++	 * the following scenario:
++	 *
++	 * The thread is faster done than the hard interrupt handler
++	 * on the other CPU. If we unmask the irq line then the
++	 * interrupt can come in again and masks the line, leaves due
++	 * to IRQS_INPROGRESS and the irq line is masked forever.
++	 *
++	 * This also serializes the state of shared oneshot handlers
++	 * versus "desc->threads_onehsot |= action->thread_mask;" in
++	 * irq_wake_thread(). See the comment there which explains the
++	 * serialization.
++	 */
++	if (unlikely(irqd_irq_inprogress(&desc->irq_data))) {
++		raw_spin_unlock_irq(&desc->lock);
++		chip_bus_sync_unlock(desc);
++		cpu_relax();
++		goto again;
++	}
++
++	/*
++	 * Now check again, whether the thread should run. Otherwise
++	 * we would clear the threads_oneshot bit of this thread which
++	 * was just set.
++	 */
++	if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
++		goto out_unlock;
++
++	desc->threads_oneshot &= ~action->thread_mask;
++
++	if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
++	    irqd_irq_masked(&desc->irq_data))
++		unmask_threaded_irq(desc);
++
++out_unlock:
++	raw_spin_unlock_irq(&desc->lock);
++	chip_bus_sync_unlock(desc);
++}
++
++#ifdef CONFIG_SMP
++/*
++ * Check whether we need to change the affinity of the interrupt thread.
++ */
++static void
++irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
++{
++	cpumask_var_t mask;
++	bool valid = true;
++
++	if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
++		return;
++
++	/*
++	 * In case we are out of memory we set IRQTF_AFFINITY again and
++	 * try again next time
++	 */
++	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
++		set_bit(IRQTF_AFFINITY, &action->thread_flags);
++		return;
++	}
++
++	raw_spin_lock_irq(&desc->lock);
++	/*
++	 * This code is triggered unconditionally. Check the affinity
++	 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
++	 */
++	if (cpumask_available(desc->irq_common_data.affinity)) {
++		const struct cpumask *m;
++
++		m = irq_data_get_effective_affinity_mask(&desc->irq_data);
++		cpumask_copy(mask, m);
++	} else {
++		valid = false;
++	}
++	raw_spin_unlock_irq(&desc->lock);
++
++	if (valid)
++		set_cpus_allowed_ptr(current, mask);
++	free_cpumask_var(mask);
++}
++#else
++static inline void
++irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
++#endif
++
++/*
++ * Interrupts which are not explicitely requested as threaded
++ * interrupts rely on the implicit bh/preempt disable of the hard irq
++ * context. So we need to disable bh here to avoid deadlocks and other
++ * side effects.
++ */
++static irqreturn_t
++irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
++{
++	irqreturn_t ret;
++
++	local_bh_disable();
++	ret = action->thread_fn(action->irq, action->dev_id);
++	if (ret == IRQ_HANDLED)
++		atomic_inc(&desc->threads_handled);
++
++	irq_finalize_oneshot(desc, action);
++	local_bh_enable();
++	return ret;
++}
++
++/*
++ * Interrupts explicitly requested as threaded interrupts want to be
++ * preemtible - many of them need to sleep and wait for slow busses to
++ * complete.
++ */
++static irqreturn_t irq_thread_fn(struct irq_desc *desc,
++		struct irqaction *action)
++{
++	irqreturn_t ret;
++
++	ret = action->thread_fn(action->irq, action->dev_id);
++	if (ret == IRQ_HANDLED)
++		atomic_inc(&desc->threads_handled);
++
++	irq_finalize_oneshot(desc, action);
++	return ret;
++}
++
++static void wake_threads_waitq(struct irq_desc *desc)
++{
++	if (atomic_dec_and_test(&desc->threads_active))
++		wake_up(&desc->wait_for_threads);
++}
++
++static void irq_thread_dtor(struct callback_head *unused)
++{
++	struct task_struct *tsk = current;
++	struct irq_desc *desc;
++	struct irqaction *action;
++
++	if (WARN_ON_ONCE(!(current->flags & PF_EXITING)))
++		return;
++
++	action = kthread_data(tsk);
++
++	pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
++	       tsk->comm, tsk->pid, action->irq);
++
++
++	desc = irq_to_desc(action->irq);
++	/*
++	 * If IRQTF_RUNTHREAD is set, we need to decrement
++	 * desc->threads_active and wake possible waiters.
++	 */
++	if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
++		wake_threads_waitq(desc);
++
++	/* Prevent a stale desc->threads_oneshot */
++	irq_finalize_oneshot(desc, action);
++}
++
++static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
++{
++	struct irqaction *secondary = action->secondary;
++
++	if (WARN_ON_ONCE(!secondary))
++		return;
++
++	raw_spin_lock_irq(&desc->lock);
++	__irq_wake_thread(desc, secondary);
++	raw_spin_unlock_irq(&desc->lock);
++}
++
++/*
++ * Interrupt handler thread
++ */
++static int irq_thread(void *data)
++{
++	struct callback_head on_exit_work;
++	struct irqaction *action = data;
++	struct irq_desc *desc = irq_to_desc(action->irq);
++	irqreturn_t (*handler_fn)(struct irq_desc *desc,
++			struct irqaction *action);
++
++	if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
++					&action->thread_flags))
++		handler_fn = irq_forced_thread_fn;
++	else
++		handler_fn = irq_thread_fn;
++
++	init_task_work(&on_exit_work, irq_thread_dtor);
++	task_work_add(current, &on_exit_work, false);
++
++	irq_thread_check_affinity(desc, action);
++
++	while (!irq_wait_for_interrupt(action)) {
++		irqreturn_t action_ret;
++
++		irq_thread_check_affinity(desc, action);
++
++		action_ret = handler_fn(desc, action);
++		if (action_ret == IRQ_WAKE_THREAD)
++			irq_wake_secondary(desc, action);
++
++		wake_threads_waitq(desc);
++	}
++
++	/*
++	 * This is the regular exit path. __free_irq() is stopping the
++	 * thread via kthread_stop() after calling
++	 * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
++	 * oneshot mask bit can be set.
++	 */
++	task_work_cancel(current, irq_thread_dtor);
++	return 0;
++}
++
++/**
++ *	irq_wake_thread - wake the irq thread for the action identified by dev_id
++ *	@irq:		Interrupt line
++ *	@dev_id:	Device identity for which the thread should be woken
++ *
++ */
++void irq_wake_thread(unsigned int irq, void *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irqaction *action;
++	unsigned long flags;
++
++	if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		return;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	for_each_action_of_desc(desc, action) {
++		if (action->dev_id == dev_id) {
++			if (action->thread)
++				__irq_wake_thread(desc, action);
++			break;
++		}
++	}
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++}
++EXPORT_SYMBOL_GPL(irq_wake_thread);
++
++static int irq_setup_forced_threading(struct irqaction *new)
++{
++	if (!force_irqthreads)
++		return 0;
++	if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
++		return 0;
++
++	/*
++	 * No further action required for interrupts which are requested as
++	 * threaded interrupts already
++	 */
++	if (new->handler == irq_default_primary_handler)
++		return 0;
++
++	new->flags |= IRQF_ONESHOT;
++
++	/*
++	 * Handle the case where we have a real primary handler and a
++	 * thread handler. We force thread them as well by creating a
++	 * secondary action.
++	 */
++	if (new->handler && new->thread_fn) {
++		/* Allocate the secondary action */
++		new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++		if (!new->secondary)
++			return -ENOMEM;
++		new->secondary->handler = irq_forced_secondary_handler;
++		new->secondary->thread_fn = new->thread_fn;
++		new->secondary->dev_id = new->dev_id;
++		new->secondary->irq = new->irq;
++		new->secondary->name = new->name;
++	}
++	/* Deal with the primary handler */
++	set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
++	new->thread_fn = new->handler;
++	new->handler = irq_default_primary_handler;
++	return 0;
++}
++
++static int irq_request_resources(struct irq_desc *desc)
++{
++	struct irq_data *d = &desc->irq_data;
++	struct irq_chip *c = d->chip;
++
++	return c->irq_request_resources ? c->irq_request_resources(d) : 0;
++}
++
++static void irq_release_resources(struct irq_desc *desc)
++{
++	struct irq_data *d = &desc->irq_data;
++	struct irq_chip *c = d->chip;
++
++	if (c->irq_release_resources)
++		c->irq_release_resources(d);
++}
++
++static bool irq_supports_nmi(struct irq_desc *desc)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++	/* Only IRQs directly managed by the root irqchip can be set as NMI */
++	if (d->parent_data)
++		return false;
++#endif
++	/* Don't support NMIs for chips behind a slow bus */
++	if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock)
++		return false;
++
++	return d->chip->flags & IRQCHIP_SUPPORTS_NMI;
++}
++
++static int irq_nmi_setup(struct irq_desc *desc)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++	struct irq_chip *c = d->chip;
++
++	return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL;
++}
++
++static void irq_nmi_teardown(struct irq_desc *desc)
++{
++	struct irq_data *d = irq_desc_get_irq_data(desc);
++	struct irq_chip *c = d->chip;
++
++	if (c->irq_nmi_teardown)
++		c->irq_nmi_teardown(d);
++}
++
++static int
++setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
++{
++	struct task_struct *t;
++	struct sched_param param = {
++		.sched_priority = MAX_USER_RT_PRIO/2,
++	};
++
++	if (!secondary) {
++		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
++				   new->name);
++	} else {
++		t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
++				   new->name);
++		param.sched_priority -= 1;
++	}
++
++	if (IS_ERR(t))
++		return PTR_ERR(t);
++
++	sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
++
++	/*
++	 * We keep the reference to the task struct even if
++	 * the thread dies to avoid that the interrupt code
++	 * references an already freed task_struct.
++	 */
++	get_task_struct(t);
++	new->thread = t;
++	/*
++	 * Tell the thread to set its affinity. This is
++	 * important for shared interrupt handlers as we do
++	 * not invoke setup_affinity() for the secondary
++	 * handlers as everything is already set up. Even for
++	 * interrupts marked with IRQF_NO_BALANCE this is
++	 * correct as we want the thread to move to the cpu(s)
++	 * on which the requesting code placed the interrupt.
++	 */
++	set_bit(IRQTF_AFFINITY, &new->thread_flags);
++	return 0;
++}
++
++/*
++ * Internal function to register an irqaction - typically used to
++ * allocate special interrupts that are part of the architecture.
++ *
++ * Locking rules:
++ *
++ * desc->request_mutex	Provides serialization against a concurrent free_irq()
++ *   chip_bus_lock	Provides serialization for slow bus operations
++ *     desc->lock	Provides serialization against hard interrupts
++ *
++ * chip_bus_lock and desc->lock are sufficient for all other management and
++ * interrupt related functions. desc->request_mutex solely serializes
++ * request/free_irq().
++ */
++static int
++__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
++{
++	struct irqaction *old, **old_ptr;
++	unsigned long flags, thread_mask = 0;
++	int ret, nested, shared = 0;
++
++	if (!desc)
++		return -EINVAL;
++
++	if (desc->irq_data.chip == &no_irq_chip)
++		return -ENOSYS;
++	if (!try_module_get(desc->owner))
++		return -ENODEV;
++
++	new->irq = irq;
++
++	/*
++	 * If the trigger type is not specified by the caller,
++	 * then use the default for this interrupt.
++	 */
++	if (!(new->flags & IRQF_TRIGGER_MASK))
++		new->flags |= irqd_get_trigger_type(&desc->irq_data);
++
++	/*
++	 * Check whether the interrupt nests into another interrupt
++	 * thread.
++	 */
++	nested = irq_settings_is_nested_thread(desc);
++	if (nested) {
++		if (!new->thread_fn) {
++			ret = -EINVAL;
++			goto out_mput;
++		}
++		/*
++		 * Replace the primary handler which was provided from
++		 * the driver for non nested interrupt handling by the
++		 * dummy function which warns when called.
++		 */
++		new->handler = irq_nested_primary_handler;
++	} else {
++		if (irq_settings_can_thread(desc)) {
++			ret = irq_setup_forced_threading(new);
++			if (ret)
++				goto out_mput;
++		}
++	}
++
++	/*
++	 * Create a handler thread when a thread function is supplied
++	 * and the interrupt does not nest into another interrupt
++	 * thread.
++	 */
++	if (new->thread_fn && !nested) {
++		ret = setup_irq_thread(new, irq, false);
++		if (ret)
++			goto out_mput;
++		if (new->secondary) {
++			ret = setup_irq_thread(new->secondary, irq, true);
++			if (ret)
++				goto out_thread;
++		}
++	}
++
++	/*
++	 * Drivers are often written to work w/o knowledge about the
++	 * underlying irq chip implementation, so a request for a
++	 * threaded irq without a primary hard irq context handler
++	 * requires the ONESHOT flag to be set. Some irq chips like
++	 * MSI based interrupts are per se one shot safe. Check the
++	 * chip flags, so we can avoid the unmask dance at the end of
++	 * the threaded handler for those.
++	 */
++	if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
++		new->flags &= ~IRQF_ONESHOT;
++
++	/*
++	 * Protects against a concurrent __free_irq() call which might wait
++	 * for synchronize_hardirq() to complete without holding the optional
++	 * chip bus lock and desc->lock. Also protects against handing out
++	 * a recycled oneshot thread_mask bit while it's still in use by
++	 * its previous owner.
++	 */
++	mutex_lock(&desc->request_mutex);
++
++	/*
++	 * Acquire bus lock as the irq_request_resources() callback below
++	 * might rely on the serialization or the magic power management
++	 * functions which are abusing the irq_bus_lock() callback,
++	 */
++	chip_bus_lock(desc);
++
++	/* First installed action requests resources. */
++	if (!desc->action) {
++		ret = irq_request_resources(desc);
++		if (ret) {
++			pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
++			       new->name, irq, desc->irq_data.chip->name);
++			goto out_bus_unlock;
++		}
++	}
++
++	/*
++	 * The following block of code has to be executed atomically
++	 * protected against a concurrent interrupt and any of the other
++	 * management calls which are not serialized via
++	 * desc->request_mutex or the optional bus lock.
++	 */
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	old_ptr = &desc->action;
++	old = *old_ptr;
++	if (old) {
++		/*
++		 * Can't share interrupts unless both agree to and are
++		 * the same type (level, edge, polarity). So both flag
++		 * fields must have IRQF_SHARED set and the bits which
++		 * set the trigger type must match. Also all must
++		 * agree on ONESHOT.
++		 * Interrupt lines used for NMIs cannot be shared.
++		 */
++		unsigned int oldtype;
++
++		if (desc->istate & IRQS_NMI) {
++			pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
++				new->name, irq, desc->irq_data.chip->name);
++			ret = -EINVAL;
++			goto out_unlock;
++		}
++
++		/*
++		 * If nobody did set the configuration before, inherit
++		 * the one provided by the requester.
++		 */
++		if (irqd_trigger_type_was_set(&desc->irq_data)) {
++			oldtype = irqd_get_trigger_type(&desc->irq_data);
++		} else {
++			oldtype = new->flags & IRQF_TRIGGER_MASK;
++			irqd_set_trigger_type(&desc->irq_data, oldtype);
++		}
++
++		if (!((old->flags & new->flags) & IRQF_SHARED) ||
++		    (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
++		    ((old->flags ^ new->flags) & IRQF_ONESHOT))
++			goto mismatch;
++
++		/* All handlers must agree on per-cpuness */
++		if ((old->flags & IRQF_PERCPU) !=
++		    (new->flags & IRQF_PERCPU))
++			goto mismatch;
++
++		/* add new interrupt at end of irq queue */
++		do {
++			/*
++			 * Or all existing action->thread_mask bits,
++			 * so we can find the next zero bit for this
++			 * new action.
++			 */
++			thread_mask |= old->thread_mask;
++			old_ptr = &old->next;
++			old = *old_ptr;
++		} while (old);
++		shared = 1;
++	}
++
++	/*
++	 * Setup the thread mask for this irqaction for ONESHOT. For
++	 * !ONESHOT irqs the thread mask is 0 so we can avoid a
++	 * conditional in irq_wake_thread().
++	 */
++	if (new->flags & IRQF_ONESHOT) {
++		/*
++		 * Unlikely to have 32 resp 64 irqs sharing one line,
++		 * but who knows.
++		 */
++		if (thread_mask == ~0UL) {
++			ret = -EBUSY;
++			goto out_unlock;
++		}
++		/*
++		 * The thread_mask for the action is or'ed to
++		 * desc->thread_active to indicate that the
++		 * IRQF_ONESHOT thread handler has been woken, but not
++		 * yet finished. The bit is cleared when a thread
++		 * completes. When all threads of a shared interrupt
++		 * line have completed desc->threads_active becomes
++		 * zero and the interrupt line is unmasked. See
++		 * handle.c:irq_wake_thread() for further information.
++		 *
++		 * If no thread is woken by primary (hard irq context)
++		 * interrupt handlers, then desc->threads_active is
++		 * also checked for zero to unmask the irq line in the
++		 * affected hard irq flow handlers
++		 * (handle_[fasteoi|level]_irq).
++		 *
++		 * The new action gets the first zero bit of
++		 * thread_mask assigned. See the loop above which or's
++		 * all existing action->thread_mask bits.
++		 */
++		new->thread_mask = 1UL << ffz(thread_mask);
++
++	} else if (new->handler == irq_default_primary_handler &&
++		   !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
++		/*
++		 * The interrupt was requested with handler = NULL, so
++		 * we use the default primary handler for it. But it
++		 * does not have the oneshot flag set. In combination
++		 * with level interrupts this is deadly, because the
++		 * default primary handler just wakes the thread, then
++		 * the irq lines is reenabled, but the device still
++		 * has the level irq asserted. Rinse and repeat....
++		 *
++		 * While this works for edge type interrupts, we play
++		 * it safe and reject unconditionally because we can't
++		 * say for sure which type this interrupt really
++		 * has. The type flags are unreliable as the
++		 * underlying chip implementation can override them.
++		 */
++		pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
++		       irq);
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	if (!shared) {
++		init_waitqueue_head(&desc->wait_for_threads);
++
++		/* Setup the type (level, edge polarity) if configured: */
++		if (new->flags & IRQF_TRIGGER_MASK) {
++			ret = __irq_set_trigger(desc,
++						new->flags & IRQF_TRIGGER_MASK);
++
++			if (ret)
++				goto out_unlock;
++		}
++
++		/*
++		 * Activate the interrupt. That activation must happen
++		 * independently of IRQ_NOAUTOEN. request_irq() can fail
++		 * and the callers are supposed to handle
++		 * that. enable_irq() of an interrupt requested with
++		 * IRQ_NOAUTOEN is not supposed to fail. The activation
++		 * keeps it in shutdown mode, it merily associates
++		 * resources if necessary and if that's not possible it
++		 * fails. Interrupts which are in managed shutdown mode
++		 * will simply ignore that activation request.
++		 */
++		ret = irq_activate(desc);
++		if (ret)
++			goto out_unlock;
++
++		desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
++				  IRQS_ONESHOT | IRQS_WAITING);
++		irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
++
++		if (new->flags & IRQF_PERCPU) {
++			irqd_set(&desc->irq_data, IRQD_PER_CPU);
++			irq_settings_set_per_cpu(desc);
++		}
++
++		if (new->flags & IRQF_ONESHOT)
++			desc->istate |= IRQS_ONESHOT;
++
++		/* Exclude IRQ from balancing if requested */
++		if (new->flags & IRQF_NOBALANCING) {
++			irq_settings_set_no_balancing(desc);
++			irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
++		}
++
++		if (irq_settings_can_autoenable(desc)) {
++			irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
++		} else {
++			/*
++			 * Shared interrupts do not go well with disabling
++			 * auto enable. The sharing interrupt might request
++			 * it while it's still disabled and then wait for
++			 * interrupts forever.
++			 */
++			WARN_ON_ONCE(new->flags & IRQF_SHARED);
++			/* Undo nested disables: */
++			desc->depth = 1;
++		}
++
++	} else if (new->flags & IRQF_TRIGGER_MASK) {
++		unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
++		unsigned int omsk = irqd_get_trigger_type(&desc->irq_data);
++
++		if (nmsk != omsk)
++			/* hope the handler works with current  trigger mode */
++			pr_warn("irq %d uses trigger mode %u; requested %u\n",
++				irq, omsk, nmsk);
++	}
++
++	*old_ptr = new;
++
++	irq_pm_install_action(desc, new);
++
++	/* Reset broken irq detection when installing new handler */
++	desc->irq_count = 0;
++	desc->irqs_unhandled = 0;
++
++	/*
++	 * Check whether we disabled the irq via the spurious handler
++	 * before. Reenable it and give it another chance.
++	 */
++	if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
++		desc->istate &= ~IRQS_SPURIOUS_DISABLED;
++		__enable_irq(desc);
++	}
++
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++	chip_bus_sync_unlock(desc);
++	mutex_unlock(&desc->request_mutex);
++
++	irq_setup_timings(desc, new);
++
++	/*
++	 * Strictly no need to wake it up, but hung_task complains
++	 * when no hard interrupt wakes the thread up.
++	 */
++	if (new->thread)
++		wake_up_process(new->thread);
++	if (new->secondary)
++		wake_up_process(new->secondary->thread);
++
++	register_irq_proc(irq, desc);
++	new->dir = NULL;
++	register_handler_proc(irq, new);
++	return 0;
++
++mismatch:
++	if (!(new->flags & IRQF_PROBE_SHARED)) {
++		pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
++		       irq, new->flags, new->name, old->flags, old->name);
++#ifdef CONFIG_DEBUG_SHIRQ
++		dump_stack();
++#endif
++	}
++	ret = -EBUSY;
++
++out_unlock:
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	if (!desc->action)
++		irq_release_resources(desc);
++out_bus_unlock:
++	chip_bus_sync_unlock(desc);
++	mutex_unlock(&desc->request_mutex);
++
++out_thread:
++	if (new->thread) {
++		struct task_struct *t = new->thread;
++
++		new->thread = NULL;
++		kthread_stop(t);
++		put_task_struct(t);
++	}
++	if (new->secondary && new->secondary->thread) {
++		struct task_struct *t = new->secondary->thread;
++
++		new->secondary->thread = NULL;
++		kthread_stop(t);
++		put_task_struct(t);
++	}
++out_mput:
++	module_put(desc->owner);
++	return ret;
++}
++
++/**
++ *	setup_irq - setup an interrupt
++ *	@irq: Interrupt line to setup
++ *	@act: irqaction for the interrupt
++ *
++ * Used to statically setup interrupts in the early boot process.
++ */
++int setup_irq(unsigned int irq, struct irqaction *act)
++{
++	int retval;
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		return -EINVAL;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0)
++		return retval;
++
++	retval = __setup_irq(irq, desc, act);
++
++	if (retval)
++		irq_chip_pm_put(&desc->irq_data);
++
++	return retval;
++}
++EXPORT_SYMBOL_GPL(setup_irq);
++
++/*
++ * Internal function to unregister an irqaction - used to free
++ * regular and special interrupts that are part of the architecture.
++ */
++static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
++{
++	unsigned irq = desc->irq_data.irq;
++	struct irqaction *action, **action_ptr;
++	unsigned long flags;
++
++	WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
++
++	mutex_lock(&desc->request_mutex);
++	chip_bus_lock(desc);
++	raw_spin_lock_irqsave(&desc->lock, flags);
++
++	/*
++	 * There can be multiple actions per IRQ descriptor, find the right
++	 * one based on the dev_id:
++	 */
++	action_ptr = &desc->action;
++	for (;;) {
++		action = *action_ptr;
++
++		if (!action) {
++			WARN(1, "Trying to free already-free IRQ %d\n", irq);
++			raw_spin_unlock_irqrestore(&desc->lock, flags);
++			chip_bus_sync_unlock(desc);
++			mutex_unlock(&desc->request_mutex);
++			return NULL;
++		}
++
++		if (action->dev_id == dev_id)
++			break;
++		action_ptr = &action->next;
++	}
++
++	/* Found it - now remove it from the list of entries: */
++	*action_ptr = action->next;
++
++	irq_pm_remove_action(desc, action);
++
++	/* If this was the last handler, shut down the IRQ line: */
++	if (!desc->action) {
++		irq_settings_clr_disable_unlazy(desc);
++		/* Only shutdown. Deactivate after synchronize_hardirq() */
++		irq_shutdown(desc);
++	}
++
++#ifdef CONFIG_SMP
++	/* make sure affinity_hint is cleaned up */
++	if (WARN_ON_ONCE(desc->affinity_hint))
++		desc->affinity_hint = NULL;
++#endif
++
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++	/*
++	 * Drop bus_lock here so the changes which were done in the chip
++	 * callbacks above are synced out to the irq chips which hang
++	 * behind a slow bus (I2C, SPI) before calling synchronize_hardirq().
++	 *
++	 * Aside of that the bus_lock can also be taken from the threaded
++	 * handler in irq_finalize_oneshot() which results in a deadlock
++	 * because kthread_stop() would wait forever for the thread to
++	 * complete, which is blocked on the bus lock.
++	 *
++	 * The still held desc->request_mutex() protects against a
++	 * concurrent request_irq() of this irq so the release of resources
++	 * and timing data is properly serialized.
++	 */
++	chip_bus_sync_unlock(desc);
++
++	unregister_handler_proc(irq, action);
++
++	/*
++	 * Make sure it's not being used on another CPU and if the chip
++	 * supports it also make sure that there is no (not yet serviced)
++	 * interrupt in flight at the hardware level.
++	 */
++	__synchronize_hardirq(desc, true);
++
++#ifdef CONFIG_DEBUG_SHIRQ
++	/*
++	 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
++	 * event to happen even now it's being freed, so let's make sure that
++	 * is so by doing an extra call to the handler ....
++	 *
++	 * ( We do this after actually deregistering it, to make sure that a
++	 *   'real' IRQ doesn't run in parallel with our fake. )
++	 */
++	if (action->flags & IRQF_SHARED) {
++		local_irq_save(flags);
++		action->handler(irq, dev_id);
++		local_irq_restore(flags);
++	}
++#endif
++
++	/*
++	 * The action has already been removed above, but the thread writes
++	 * its oneshot mask bit when it completes. Though request_mutex is
++	 * held across this which prevents __setup_irq() from handing out
++	 * the same bit to a newly requested action.
++	 */
++	if (action->thread) {
++		kthread_stop(action->thread);
++		put_task_struct(action->thread);
++		if (action->secondary && action->secondary->thread) {
++			kthread_stop(action->secondary->thread);
++			put_task_struct(action->secondary->thread);
++		}
++	}
++
++	/* Last action releases resources */
++	if (!desc->action) {
++		/*
++		 * Reaquire bus lock as irq_release_resources() might
++		 * require it to deallocate resources over the slow bus.
++		 */
++		chip_bus_lock(desc);
++		/*
++		 * There is no interrupt on the fly anymore. Deactivate it
++		 * completely.
++		 */
++		raw_spin_lock_irqsave(&desc->lock, flags);
++		irq_domain_deactivate_irq(&desc->irq_data);
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++		irq_release_resources(desc);
++		chip_bus_sync_unlock(desc);
++		irq_remove_timings(desc);
++	}
++
++	mutex_unlock(&desc->request_mutex);
++
++	irq_chip_pm_put(&desc->irq_data);
++	module_put(desc->owner);
++	kfree(action->secondary);
++	return action;
++}
++
++/**
++ *	remove_irq - free an interrupt
++ *	@irq: Interrupt line to free
++ *	@act: irqaction for the interrupt
++ *
++ * Used to remove interrupts statically setup by the early boot process.
++ */
++void remove_irq(unsigned int irq, struct irqaction *act)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		__free_irq(desc, act->dev_id);
++}
++EXPORT_SYMBOL_GPL(remove_irq);
++
++/**
++ *	free_irq - free an interrupt allocated with request_irq
++ *	@irq: Interrupt line to free
++ *	@dev_id: Device identity to free
++ *
++ *	Remove an interrupt handler. The handler is removed and if the
++ *	interrupt line is no longer in use by any driver it is disabled.
++ *	On a shared IRQ the caller must ensure the interrupt is disabled
++ *	on the card it drives before calling this function. The function
++ *	does not return until any executing interrupts for this IRQ
++ *	have completed.
++ *
++ *	This function must not be called from interrupt context.
++ *
++ *	Returns the devname argument passed to request_irq.
++ */
++const void *free_irq(unsigned int irq, void *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irqaction *action;
++	const char *devname;
++
++	if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		return NULL;
++
++#ifdef CONFIG_SMP
++	if (WARN_ON(desc->affinity_notify))
++		desc->affinity_notify = NULL;
++#endif
++
++	action = __free_irq(desc, dev_id);
++
++	if (!action)
++		return NULL;
++
++	devname = action->name;
++	kfree(action);
++	return devname;
++}
++EXPORT_SYMBOL(free_irq);
++
++/* This function must be called with desc->lock held */
++static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
++{
++	const char *devname = NULL;
++
++	desc->istate &= ~IRQS_NMI;
++
++	if (!WARN_ON(desc->action == NULL)) {
++		irq_pm_remove_action(desc, desc->action);
++		devname = desc->action->name;
++		unregister_handler_proc(irq, desc->action);
++
++		kfree(desc->action);
++		desc->action = NULL;
++	}
++
++	irq_settings_clr_disable_unlazy(desc);
++	irq_shutdown(desc);
++
++	irq_release_resources(desc);
++
++	irq_chip_pm_put(&desc->irq_data);
++	module_put(desc->owner);
++
++	return devname;
++}
++
++const void *free_nmi(unsigned int irq, void *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	unsigned long flags;
++	const void *devname;
++
++	if (!desc || WARN_ON(!(desc->istate & IRQS_NMI)))
++		return NULL;
++
++	if (WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		return NULL;
++
++	/* NMI still enabled */
++	if (WARN_ON(desc->depth == 0))
++		disable_nmi_nosync(irq);
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++
++	irq_nmi_teardown(desc);
++	devname = __cleanup_nmi(irq, desc);
++
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	return devname;
++}
++
++/**
++ *	request_threaded_irq - allocate an interrupt line
++ *	@irq: Interrupt line to allocate
++ *	@handler: Function to be called when the IRQ occurs.
++ *		  Primary handler for threaded interrupts
++ *		  If NULL and thread_fn != NULL the default
++ *		  primary handler is installed
++ *	@thread_fn: Function called from the irq handler thread
++ *		    If NULL, no irq thread is created
++ *	@irqflags: Interrupt type flags
++ *	@devname: An ascii name for the claiming device
++ *	@dev_id: A cookie passed back to the handler function
++ *
++ *	This call allocates interrupt resources and enables the
++ *	interrupt line and IRQ handling. From the point this
++ *	call is made your handler function may be invoked. Since
++ *	your handler function must clear any interrupt the board
++ *	raises, you must take care both to initialise your hardware
++ *	and to set up the interrupt handler in the right order.
++ *
++ *	If you want to set up a threaded irq handler for your device
++ *	then you need to supply @handler and @thread_fn. @handler is
++ *	still called in hard interrupt context and has to check
++ *	whether the interrupt originates from the device. If yes it
++ *	needs to disable the interrupt on the device and return
++ *	IRQ_WAKE_THREAD which will wake up the handler thread and run
++ *	@thread_fn. This split handler design is necessary to support
++ *	shared interrupts.
++ *
++ *	Dev_id must be globally unique. Normally the address of the
++ *	device data structure is used as the cookie. Since the handler
++ *	receives this value it makes sense to use it.
++ *
++ *	If your interrupt is shared you must pass a non NULL dev_id
++ *	as this is required when freeing the interrupt.
++ *
++ *	Flags:
++ *
++ *	IRQF_SHARED		Interrupt is shared
++ *	IRQF_TRIGGER_*		Specify active edge(s) or level
++ *
++ */
++int request_threaded_irq(unsigned int irq, irq_handler_t handler,
++			 irq_handler_t thread_fn, unsigned long irqflags,
++			 const char *devname, void *dev_id)
++{
++	struct irqaction *action;
++	struct irq_desc *desc;
++	int retval;
++
++	if (irq == IRQ_NOTCONNECTED)
++		return -ENOTCONN;
++
++	/*
++	 * Sanity-check: shared interrupts must pass in a real dev-ID,
++	 * otherwise we'll have trouble later trying to figure out
++	 * which interrupt is which (messes up the interrupt freeing
++	 * logic etc).
++	 *
++	 * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
++	 * it cannot be set along with IRQF_NO_SUSPEND.
++	 */
++	if (((irqflags & IRQF_SHARED) && !dev_id) ||
++	    (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
++	    ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
++		return -EINVAL;
++
++	desc = irq_to_desc(irq);
++	if (!desc)
++		return -EINVAL;
++
++	if (!irq_settings_can_request(desc) ||
++	    WARN_ON(irq_settings_is_per_cpu_devid(desc)))
++		return -EINVAL;
++
++	if (!handler) {
++		if (!thread_fn)
++			return -EINVAL;
++		handler = irq_default_primary_handler;
++	}
++
++	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++	if (!action)
++		return -ENOMEM;
++
++	action->handler = handler;
++	action->thread_fn = thread_fn;
++	action->flags = irqflags;
++	action->name = devname;
++	action->dev_id = dev_id;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0) {
++		kfree(action);
++		return retval;
++	}
++
++	retval = __setup_irq(irq, desc, action);
++
++	if (retval) {
++		irq_chip_pm_put(&desc->irq_data);
++		kfree(action->secondary);
++		kfree(action);
++	}
++
++#ifdef CONFIG_DEBUG_SHIRQ_FIXME
++	if (!retval && (irqflags & IRQF_SHARED)) {
++		/*
++		 * It's a shared IRQ -- the driver ought to be prepared for it
++		 * to happen immediately, so let's make sure....
++		 * We disable the irq to make sure that a 'real' IRQ doesn't
++		 * run in parallel with our fake.
++		 */
++		unsigned long flags;
++
++		disable_irq(irq);
++		local_irq_save(flags);
++
++		handler(irq, dev_id);
++
++		local_irq_restore(flags);
++		enable_irq(irq);
++	}
++#endif
++	return retval;
++}
++EXPORT_SYMBOL(request_threaded_irq);
++
++/**
++ *	request_any_context_irq - allocate an interrupt line
++ *	@irq: Interrupt line to allocate
++ *	@handler: Function to be called when the IRQ occurs.
++ *		  Threaded handler for threaded interrupts.
++ *	@flags: Interrupt type flags
++ *	@name: An ascii name for the claiming device
++ *	@dev_id: A cookie passed back to the handler function
++ *
++ *	This call allocates interrupt resources and enables the
++ *	interrupt line and IRQ handling. It selects either a
++ *	hardirq or threaded handling method depending on the
++ *	context.
++ *
++ *	On failure, it returns a negative value. On success,
++ *	it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
++ */
++int request_any_context_irq(unsigned int irq, irq_handler_t handler,
++			    unsigned long flags, const char *name, void *dev_id)
++{
++	struct irq_desc *desc;
++	int ret;
++
++	if (irq == IRQ_NOTCONNECTED)
++		return -ENOTCONN;
++
++	desc = irq_to_desc(irq);
++	if (!desc)
++		return -EINVAL;
++
++	if (irq_settings_is_nested_thread(desc)) {
++		ret = request_threaded_irq(irq, NULL, handler,
++					   flags, name, dev_id);
++		return !ret ? IRQC_IS_NESTED : ret;
++	}
++
++	ret = request_irq(irq, handler, flags, name, dev_id);
++	return !ret ? IRQC_IS_HARDIRQ : ret;
++}
++EXPORT_SYMBOL_GPL(request_any_context_irq);
++
++/**
++ *	request_nmi - allocate an interrupt line for NMI delivery
++ *	@irq: Interrupt line to allocate
++ *	@handler: Function to be called when the IRQ occurs.
++ *		  Threaded handler for threaded interrupts.
++ *	@irqflags: Interrupt type flags
++ *	@name: An ascii name for the claiming device
++ *	@dev_id: A cookie passed back to the handler function
++ *
++ *	This call allocates interrupt resources and enables the
++ *	interrupt line and IRQ handling. It sets up the IRQ line
++ *	to be handled as an NMI.
++ *
++ *	An interrupt line delivering NMIs cannot be shared and IRQ handling
++ *	cannot be threaded.
++ *
++ *	Interrupt lines requested for NMI delivering must produce per cpu
++ *	interrupts and have auto enabling setting disabled.
++ *
++ *	Dev_id must be globally unique. Normally the address of the
++ *	device data structure is used as the cookie. Since the handler
++ *	receives this value it makes sense to use it.
++ *
++ *	If the interrupt line cannot be used to deliver NMIs, function
++ *	will fail and return a negative value.
++ */
++int request_nmi(unsigned int irq, irq_handler_t handler,
++		unsigned long irqflags, const char *name, void *dev_id)
++{
++	struct irqaction *action;
++	struct irq_desc *desc;
++	unsigned long flags;
++	int retval;
++
++	if (irq == IRQ_NOTCONNECTED)
++		return -ENOTCONN;
++
++	/* NMI cannot be shared, used for Polling */
++	if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL))
++		return -EINVAL;
++
++	if (!(irqflags & IRQF_PERCPU))
++		return -EINVAL;
++
++	if (!handler)
++		return -EINVAL;
++
++	desc = irq_to_desc(irq);
++
++	if (!desc || irq_settings_can_autoenable(desc) ||
++	    !irq_settings_can_request(desc) ||
++	    WARN_ON(irq_settings_is_per_cpu_devid(desc)) ||
++	    !irq_supports_nmi(desc))
++		return -EINVAL;
++
++	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++	if (!action)
++		return -ENOMEM;
++
++	action->handler = handler;
++	action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING;
++	action->name = name;
++	action->dev_id = dev_id;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0)
++		goto err_out;
++
++	retval = __setup_irq(irq, desc, action);
++	if (retval)
++		goto err_irq_setup;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++
++	/* Setup NMI state */
++	desc->istate |= IRQS_NMI;
++	retval = irq_nmi_setup(desc);
++	if (retval) {
++		__cleanup_nmi(irq, desc);
++		raw_spin_unlock_irqrestore(&desc->lock, flags);
++		return -EINVAL;
++	}
++
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	return 0;
++
++err_irq_setup:
++	irq_chip_pm_put(&desc->irq_data);
++err_out:
++	kfree(action);
++
++	return retval;
++}
++
++void enable_percpu_irq(unsigned int irq, unsigned int type)
++{
++	unsigned int cpu = smp_processor_id();
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
++
++	if (!desc)
++		return;
++
++	/*
++	 * If the trigger type is not specified by the caller, then
++	 * use the default for this interrupt.
++	 */
++	type &= IRQ_TYPE_SENSE_MASK;
++	if (type == IRQ_TYPE_NONE)
++		type = irqd_get_trigger_type(&desc->irq_data);
++
++	if (type != IRQ_TYPE_NONE) {
++		int ret;
++
++		ret = __irq_set_trigger(desc, type);
++
++		if (ret) {
++			WARN(1, "failed to set type for IRQ%d\n", irq);
++			goto out;
++		}
++	}
++
++	irq_percpu_enable(desc, cpu);
++out:
++	irq_put_desc_unlock(desc, flags);
++}
++EXPORT_SYMBOL_GPL(enable_percpu_irq);
++
++void enable_percpu_nmi(unsigned int irq, unsigned int type)
++{
++	enable_percpu_irq(irq, type);
++}
++
++/**
++ * irq_percpu_is_enabled - Check whether the per cpu irq is enabled
++ * @irq:	Linux irq number to check for
++ *
++ * Must be called from a non migratable context. Returns the enable
++ * state of a per cpu interrupt on the current cpu.
++ */
++bool irq_percpu_is_enabled(unsigned int irq)
++{
++	unsigned int cpu = smp_processor_id();
++	struct irq_desc *desc;
++	unsigned long flags;
++	bool is_enabled;
++
++	desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
++	if (!desc)
++		return false;
++
++	is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
++	irq_put_desc_unlock(desc, flags);
++
++	return is_enabled;
++}
++EXPORT_SYMBOL_GPL(irq_percpu_is_enabled);
++
++void disable_percpu_irq(unsigned int irq)
++{
++	unsigned int cpu = smp_processor_id();
++	unsigned long flags;
++	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
++
++	if (!desc)
++		return;
++
++	irq_percpu_disable(desc, cpu);
++	irq_put_desc_unlock(desc, flags);
++}
++EXPORT_SYMBOL_GPL(disable_percpu_irq);
++
++void disable_percpu_nmi(unsigned int irq)
++{
++	disable_percpu_irq(irq);
++}
++
++/*
++ * Internal function to unregister a percpu irqaction.
++ */
++static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irqaction *action;
++	unsigned long flags;
++
++	WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
++
++	if (!desc)
++		return NULL;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++
++	action = desc->action;
++	if (!action || action->percpu_dev_id != dev_id) {
++		WARN(1, "Trying to free already-free IRQ %d\n", irq);
++		goto bad;
++	}
++
++	if (!cpumask_empty(desc->percpu_enabled)) {
++		WARN(1, "percpu IRQ %d still enabled on CPU%d!\n",
++		     irq, cpumask_first(desc->percpu_enabled));
++		goto bad;
++	}
++
++	/* Found it - now remove it from the list of entries: */
++	desc->action = NULL;
++
++	desc->istate &= ~IRQS_NMI;
++
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	unregister_handler_proc(irq, action);
++
++	irq_chip_pm_put(&desc->irq_data);
++	module_put(desc->owner);
++	return action;
++
++bad:
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++	return NULL;
++}
++
++/**
++ *	remove_percpu_irq - free a per-cpu interrupt
++ *	@irq: Interrupt line to free
++ *	@act: irqaction for the interrupt
++ *
++ * Used to remove interrupts statically setup by the early boot process.
++ */
++void remove_percpu_irq(unsigned int irq, struct irqaction *act)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (desc && irq_settings_is_per_cpu_devid(desc))
++	    __free_percpu_irq(irq, act->percpu_dev_id);
++}
++
++/**
++ *	free_percpu_irq - free an interrupt allocated with request_percpu_irq
++ *	@irq: Interrupt line to free
++ *	@dev_id: Device identity to free
++ *
++ *	Remove a percpu interrupt handler. The handler is removed, but
++ *	the interrupt line is not disabled. This must be done on each
++ *	CPU before calling this function. The function does not return
++ *	until any executing interrupts for this IRQ have completed.
++ *
++ *	This function must not be called from interrupt context.
++ */
++void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (!desc || !irq_settings_is_per_cpu_devid(desc))
++		return;
++
++	chip_bus_lock(desc);
++	kfree(__free_percpu_irq(irq, dev_id));
++	chip_bus_sync_unlock(desc);
++}
++EXPORT_SYMBOL_GPL(free_percpu_irq);
++
++void free_percpu_nmi(unsigned int irq, void __percpu *dev_id)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (!desc || !irq_settings_is_per_cpu_devid(desc))
++		return;
++
++	if (WARN_ON(!(desc->istate & IRQS_NMI)))
++		return;
++
++	kfree(__free_percpu_irq(irq, dev_id));
++}
++
++/**
++ *	setup_percpu_irq - setup a per-cpu interrupt
++ *	@irq: Interrupt line to setup
++ *	@act: irqaction for the interrupt
++ *
++ * Used to statically setup per-cpu interrupts in the early boot process.
++ */
++int setup_percpu_irq(unsigned int irq, struct irqaction *act)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	int retval;
++
++	if (!desc || !irq_settings_is_per_cpu_devid(desc))
++		return -EINVAL;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0)
++		return retval;
++
++	retval = __setup_irq(irq, desc, act);
++
++	if (retval)
++		irq_chip_pm_put(&desc->irq_data);
++
++	return retval;
++}
++
++/**
++ *	__request_percpu_irq - allocate a percpu interrupt line
++ *	@irq: Interrupt line to allocate
++ *	@handler: Function to be called when the IRQ occurs.
++ *	@flags: Interrupt type flags (IRQF_TIMER only)
++ *	@devname: An ascii name for the claiming device
++ *	@dev_id: A percpu cookie passed back to the handler function
++ *
++ *	This call allocates interrupt resources and enables the
++ *	interrupt on the local CPU. If the interrupt is supposed to be
++ *	enabled on other CPUs, it has to be done on each CPU using
++ *	enable_percpu_irq().
++ *
++ *	Dev_id must be globally unique. It is a per-cpu variable, and
++ *	the handler gets called with the interrupted CPU's instance of
++ *	that variable.
++ */
++int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
++			 unsigned long flags, const char *devname,
++			 void __percpu *dev_id)
++{
++	struct irqaction *action;
++	struct irq_desc *desc;
++	int retval;
++
++	if (!dev_id)
++		return -EINVAL;
++
++	desc = irq_to_desc(irq);
++	if (!desc || !irq_settings_can_request(desc) ||
++	    !irq_settings_is_per_cpu_devid(desc))
++		return -EINVAL;
++
++	if (flags && flags != IRQF_TIMER)
++		return -EINVAL;
++
++	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++	if (!action)
++		return -ENOMEM;
++
++	action->handler = handler;
++	action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND;
++	action->name = devname;
++	action->percpu_dev_id = dev_id;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0) {
++		kfree(action);
++		return retval;
++	}
++
++	retval = __setup_irq(irq, desc, action);
++
++	if (retval) {
++		irq_chip_pm_put(&desc->irq_data);
++		kfree(action);
++	}
++
++	return retval;
++}
++EXPORT_SYMBOL_GPL(__request_percpu_irq);
++
++int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
++			    bool *state)
++{
++	struct irq_chip *chip;
++	int err = -EINVAL;
++
++	do {
++		chip = irq_data_get_irq_chip(data);
++		if (chip->irq_get_irqchip_state)
++			break;
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++		data = data->parent_data;
++#else
++		data = NULL;
++#endif
++	} while (data);
++
++	if (data)
++		err = chip->irq_get_irqchip_state(data, which, state);
++	return err;
++}
++
++/**
++ *	request_percpu_nmi - allocate a percpu interrupt line for NMI delivery
++ *	@irq: Interrupt line to allocate
++ *	@handler: Function to be called when the IRQ occurs.
++ *	@name: An ascii name for the claiming device
++ *	@dev_id: A percpu cookie passed back to the handler function
++ *
++ *	This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs
++ *	have to be setup on each CPU by calling prepare_percpu_nmi() before being
++ *	enabled on the same CPU by using enable_percpu_nmi().
++ *
++ *	Dev_id must be globally unique. It is a per-cpu variable, and
++ *	the handler gets called with the interrupted CPU's instance of
++ *	that variable.
++ *
++ *	Interrupt lines requested for NMI delivering should have auto enabling
++ *	setting disabled.
++ *
++ *	If the interrupt line cannot be used to deliver NMIs, function
++ *	will fail returning a negative value.
++ */
++int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
++		       const char *name, void __percpu *dev_id)
++{
++	struct irqaction *action;
++	struct irq_desc *desc;
++	unsigned long flags;
++	int retval;
++
++	if (!handler)
++		return -EINVAL;
++
++	desc = irq_to_desc(irq);
++
++	if (!desc || !irq_settings_can_request(desc) ||
++	    !irq_settings_is_per_cpu_devid(desc) ||
++	    irq_settings_can_autoenable(desc) ||
++	    !irq_supports_nmi(desc))
++		return -EINVAL;
++
++	/* The line cannot already be NMI */
++	if (desc->istate & IRQS_NMI)
++		return -EINVAL;
++
++	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
++	if (!action)
++		return -ENOMEM;
++
++	action->handler = handler;
++	action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD
++		| IRQF_NOBALANCING;
++	action->name = name;
++	action->percpu_dev_id = dev_id;
++
++	retval = irq_chip_pm_get(&desc->irq_data);
++	if (retval < 0)
++		goto err_out;
++
++	retval = __setup_irq(irq, desc, action);
++	if (retval)
++		goto err_irq_setup;
++
++	raw_spin_lock_irqsave(&desc->lock, flags);
++	desc->istate |= IRQS_NMI;
++	raw_spin_unlock_irqrestore(&desc->lock, flags);
++
++	return 0;
++
++err_irq_setup:
++	irq_chip_pm_put(&desc->irq_data);
++err_out:
++	kfree(action);
++
++	return retval;
++}
++
++/**
++ *	prepare_percpu_nmi - performs CPU local setup for NMI delivery
++ *	@irq: Interrupt line to prepare for NMI delivery
++ *
++ *	This call prepares an interrupt line to deliver NMI on the current CPU,
++ *	before that interrupt line gets enabled with enable_percpu_nmi().
++ *
++ *	As a CPU local operation, this should be called from non-preemptible
++ *	context.
++ *
++ *	If the interrupt line cannot be used to deliver NMIs, function
++ *	will fail returning a negative value.
++ */
++int prepare_percpu_nmi(unsigned int irq)
++{
++	unsigned long flags;
++	struct irq_desc *desc;
++	int ret = 0;
++
++	WARN_ON(preemptible());
++
++	desc = irq_get_desc_lock(irq, &flags,
++				 IRQ_GET_DESC_CHECK_PERCPU);
++	if (!desc)
++		return -EINVAL;
++
++	if (WARN(!(desc->istate & IRQS_NMI),
++		 KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n",
++		 irq)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	ret = irq_nmi_setup(desc);
++	if (ret) {
++		pr_err("Failed to setup NMI delivery: irq %u\n", irq);
++		goto out;
++	}
++
++out:
++	irq_put_desc_unlock(desc, flags);
++	return ret;
++}
++
++/**
++ *	teardown_percpu_nmi - undoes NMI setup of IRQ line
++ *	@irq: Interrupt line from which CPU local NMI configuration should be
++ *	      removed
++ *
++ *	This call undoes the setup done by prepare_percpu_nmi().
++ *
++ *	IRQ line should not be enabled for the current CPU.
++ *
++ *	As a CPU local operation, this should be called from non-preemptible
++ *	context.
++ */
++void teardown_percpu_nmi(unsigned int irq)
++{
++	unsigned long flags;
++	struct irq_desc *desc;
++
++	WARN_ON(preemptible());
++
++	desc = irq_get_desc_lock(irq, &flags,
++				 IRQ_GET_DESC_CHECK_PERCPU);
++	if (!desc)
++		return;
++
++	if (WARN_ON(!(desc->istate & IRQS_NMI)))
++		goto out;
++
++	irq_nmi_teardown(desc);
++out:
++	irq_put_desc_unlock(desc, flags);
++}
++
++/**
++ *	irq_get_irqchip_state - returns the irqchip state of a interrupt.
++ *	@irq: Interrupt line that is forwarded to a VM
++ *	@which: One of IRQCHIP_STATE_* the caller wants to know about
++ *	@state: a pointer to a boolean where the state is to be storeed
++ *
++ *	This call snapshots the internal irqchip state of an
++ *	interrupt, returning into @state the bit corresponding to
++ *	stage @which
++ *
++ *	This function should be called with preemption disabled if the
++ *	interrupt controller has per-cpu registers.
++ */
++int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
++			  bool *state)
++{
++	struct irq_desc *desc;
++	struct irq_data *data;
++	unsigned long flags;
++	int err = -EINVAL;
++
++	desc = irq_get_desc_buslock(irq, &flags, 0);
++	if (!desc)
++		return err;
++
++	data = irq_desc_get_irq_data(desc);
++
++	err = __irq_get_irqchip_state(data, which, state);
++
++	irq_put_desc_busunlock(desc, flags);
++	return err;
++}
++EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
++
++/**
++ *	irq_set_irqchip_state - set the state of a forwarded interrupt.
++ *	@irq: Interrupt line that is forwarded to a VM
++ *	@which: State to be restored (one of IRQCHIP_STATE_*)
++ *	@val: Value corresponding to @which
++ *
++ *	This call sets the internal irqchip state of an interrupt,
++ *	depending on the value of @which.
++ *
++ *	This function should be called with preemption disabled if the
++ *	interrupt controller has per-cpu registers.
++ */
++int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
++			  bool val)
++{
++	struct irq_desc *desc;
++	struct irq_data *data;
++	struct irq_chip *chip;
++	unsigned long flags;
++	int err = -EINVAL;
++
++	desc = irq_get_desc_buslock(irq, &flags, 0);
++	if (!desc)
++		return err;
++
++	data = irq_desc_get_irq_data(desc);
++
++	do {
++		chip = irq_data_get_irq_chip(data);
++		if (chip->irq_set_irqchip_state)
++			break;
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++		data = data->parent_data;
++#else
++		data = NULL;
++#endif
++	} while (data);
++
++	if (data)
++		err = chip->irq_set_irqchip_state(data, which, val);
++
++	irq_put_desc_busunlock(desc, flags);
++	return err;
++}
++EXPORT_SYMBOL_GPL(irq_set_irqchip_state);
+diff -uprN kernel/kernel/irq/msi.c kernel_new/kernel/irq/msi.c
+--- kernel/kernel/irq/msi.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/irq/msi.c	2021-04-01 18:28:07.809863116 +0800
+@@ -268,6 +268,9 @@ static void msi_domain_update_chip_ops(s
+ 	struct irq_chip *chip = info->chip;
+ 
+ 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
++	WARN_ONCE(IS_ENABLED(CONFIG_IPIPE) &&
++		  (chip->flags & IRQCHIP_PIPELINE_SAFE) == 0,
++		  "MSI domain irqchip %s is not pipeline-safe!", chip->name);
+ 	if (!chip->irq_set_affinity)
+ 		chip->irq_set_affinity = msi_domain_set_affinity;
+ }
+diff -uprN kernel/kernel/locking/lockdep.c kernel_new/kernel/locking/lockdep.c
+--- kernel/kernel/locking/lockdep.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/locking/lockdep.c	2021-04-01 18:28:07.809863116 +0800
+@@ -2859,7 +2859,7 @@ void lockdep_hardirqs_on(unsigned long i
+ 	 * already enabled, yet we find the hardware thinks they are in fact
+ 	 * enabled.. someone messed up their IRQ state tracing.
+ 	 */
+-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
++	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
+ 		return;
+ 
+ 	/*
+@@ -2885,7 +2885,9 @@ void lockdep_hardirqs_on(unsigned long i
+  */
+ void lockdep_hardirqs_off(unsigned long ip)
+ {
+-	struct task_struct *curr = current;
++	struct task_struct *curr;
++
++	curr = current;
+ 
+ 	if (unlikely(!debug_locks || current->lockdep_recursion))
+ 		return;
+@@ -2894,7 +2896,7 @@ void lockdep_hardirqs_off(unsigned long
+ 	 * So we're supposed to get called after you mask local IRQs, but for
+ 	 * some reason the hardware doesn't quite think you did a proper job.
+ 	 */
+-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
++	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
+ 		return;
+ 
+ 	if (curr->hardirqs_enabled) {
+@@ -2923,7 +2925,7 @@ void trace_softirqs_on(unsigned long ip)
+ 	 * We fancy IRQs being disabled here, see softirq.c, avoids
+ 	 * funny state and nesting things.
+ 	 */
+-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
++	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
+ 		return;
+ 
+ 	if (curr->softirqs_enabled) {
+@@ -2962,7 +2964,7 @@ void trace_softirqs_off(unsigned long ip
+ 	/*
+ 	 * We fancy IRQs being disabled here, see softirq.c
+ 	 */
+-	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
++	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !hard_irqs_disabled()))
+ 		return;
+ 
+ 	if (curr->softirqs_enabled) {
+diff -uprN kernel/kernel/locking/lockdep_internals.h kernel_new/kernel/locking/lockdep_internals.h
+--- kernel/kernel/locking/lockdep_internals.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/locking/lockdep_internals.h	2021-04-01 18:28:07.810863115 +0800
+@@ -160,12 +160,12 @@ DECLARE_PER_CPU(struct lockdep_stats, lo
+ 	this_cpu_inc(lockdep_stats.ptr);
+ 
+ #define debug_atomic_inc(ptr)			{		\
+-	WARN_ON_ONCE(!irqs_disabled());				\
++	WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled()); \
+ 	__this_cpu_inc(lockdep_stats.ptr);			\
+ }
+ 
+ #define debug_atomic_dec(ptr)			{		\
+-	WARN_ON_ONCE(!irqs_disabled());				\
++	WARN_ON_ONCE(!hard_irqs_disabled() && !irqs_disabled());\
+ 	__this_cpu_dec(lockdep_stats.ptr);			\
+ }
+ 
+diff -uprN kernel/kernel/locking/spinlock.c kernel_new/kernel/locking/spinlock.c
+--- kernel/kernel/locking/spinlock.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/locking/spinlock.c	2021-04-01 18:28:07.810863115 +0800
+@@ -27,7 +27,9 @@
+  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+  */
+-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
++#if !defined(CONFIG_GENERIC_LOCKBREAK) ||			\
++	defined(CONFIG_DEBUG_LOCK_ALLOC) ||			\
++	defined(CONFIG_IPIPE)
+ /*
+  * The __lock_function inlines are taken from
+  * spinlock : include/linux/spinlock_api_smp.h
+diff -uprN kernel/kernel/Makefile kernel_new/kernel/Makefile
+--- kernel/kernel/Makefile	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/Makefile	2021-04-01 18:28:07.810863115 +0800
+@@ -87,6 +87,7 @@ obj-$(CONFIG_LOCKUP_DETECTOR) += watchdo
+ obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+ obj-$(CONFIG_SECCOMP) += seccomp.o
+ obj-$(CONFIG_RELAY) += relay.o
++obj-$(CONFIG_IPIPE) += ipipe/
+ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
+ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+diff -uprN kernel/kernel/Makefile.orig kernel_new/kernel/Makefile.orig
+--- kernel/kernel/Makefile.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/Makefile.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,127 @@
++# SPDX-License-Identifier: GPL-2.0
++#
++# Makefile for the linux kernel.
++#
++
++obj-y     = fork.o exec_domain.o panic.o \
++	    cpu.o exit.o softirq.o resource.o \
++	    sysctl.o sysctl_binary.o capability.o ptrace.o user.o \
++	    signal.o sys.o umh.o workqueue.o pid.o task_work.o \
++	    extable.o params.o \
++	    kthread.o sys_ni.o nsproxy.o \
++	    notifier.o ksysfs.o cred.o reboot.o \
++	    async.o range.o smpboot.o ucount.o ktask.o
++
++obj-$(CONFIG_MODULES) += kmod.o
++obj-$(CONFIG_MULTIUSER) += groups.o
++
++ifdef CONFIG_FUNCTION_TRACER
++# Do not trace internal ftrace files
++CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
++endif
++
++# Prevents flicker of uninteresting __do_softirq()/__local_bh_disable_ip()
++# in coverage traces.
++KCOV_INSTRUMENT_softirq.o := n
++# These are called from save_stack_trace() on slub debug path,
++# and produce insane amounts of uninteresting coverage.
++KCOV_INSTRUMENT_module.o := n
++KCOV_INSTRUMENT_extable.o := n
++# Don't self-instrument.
++KCOV_INSTRUMENT_kcov.o := n
++KASAN_SANITIZE_kcov.o := n
++CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
++
++# cond_syscall is currently not LTO compatible
++CFLAGS_sys_ni.o = $(DISABLE_LTO)
++
++obj-y += sched/
++obj-y += locking/
++obj-y += power/
++obj-y += printk/
++obj-y += irq/
++obj-y += rcu/
++obj-y += livepatch/
++obj-y += dma/
++
++obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
++obj-$(CONFIG_FREEZER) += freezer.o
++obj-$(CONFIG_PROFILING) += profile.o
++obj-$(CONFIG_STACKTRACE) += stacktrace.o
++obj-y += time/
++obj-$(CONFIG_FUTEX) += futex.o
++obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
++obj-$(CONFIG_SMP) += smp.o
++ifneq ($(CONFIG_SMP),y)
++obj-y += up.o
++endif
++obj-$(CONFIG_UID16) += uid16.o
++obj-$(CONFIG_MODULES) += module.o
++obj-$(CONFIG_MODULE_SIG) += module_signing.o
++obj-$(CONFIG_KALLSYMS) += kallsyms.o
++obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
++obj-$(CONFIG_CRASH_CORE) += crash_core.o
++obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
++obj-$(CONFIG_KEXEC) += kexec.o
++obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
++obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
++obj-$(CONFIG_COMPAT) += compat.o
++obj-$(CONFIG_CGROUPS) += cgroup/
++obj-$(CONFIG_UTS_NS) += utsname.o
++obj-$(CONFIG_USER_NS) += user_namespace.o
++obj-$(CONFIG_PID_NS) += pid_namespace.o
++obj-$(CONFIG_IKCONFIG) += configs.o
++obj-$(CONFIG_SMP) += stop_machine.o
++obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
++obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
++obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
++obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
++obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
++obj-$(CONFIG_GCOV_KERNEL) += gcov/
++obj-$(CONFIG_KCOV) += kcov.o
++obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
++obj-$(CONFIG_KGDB) += debug/
++obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
++obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
++obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
++obj-$(CONFIG_SECCOMP) += seccomp.o
++obj-$(CONFIG_RELAY) += relay.o
++obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
++obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
++obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
++obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
++obj-$(CONFIG_LATENCYTOP) += latencytop.o
++obj-$(CONFIG_ELFCORE) += elfcore.o
++obj-$(CONFIG_FUNCTION_TRACER) += trace/
++obj-$(CONFIG_TRACING) += trace/
++obj-$(CONFIG_TRACE_CLOCK) += trace/
++obj-$(CONFIG_RING_BUFFER) += trace/
++obj-$(CONFIG_TRACEPOINTS) += trace/
++obj-$(CONFIG_IRQ_WORK) += irq_work.o
++obj-$(CONFIG_CPU_PM) += cpu_pm.o
++obj-$(CONFIG_BPF) += bpf/
++
++obj-$(CONFIG_PERF_EVENTS) += events/
++
++obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
++obj-$(CONFIG_PADATA) += padata.o
++obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
++obj-$(CONFIG_JUMP_LABEL) += jump_label.o
++obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
++obj-$(CONFIG_TORTURE_TEST) += torture.o
++
++obj-$(CONFIG_HAS_IOMEM) += iomem.o
++obj-$(CONFIG_ZONE_DEVICE) += memremap.o
++obj-$(CONFIG_RSEQ) += rseq.o
++
++$(obj)/configs.o: $(obj)/config_data.h
++
++targets += config_data.gz
++$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
++	$(call if_changed,gzip)
++
++      filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
++targets += config_data.h
++$(obj)/config_data.h: $(obj)/config_data.gz FORCE
++	$(call filechk,ikconfiggz)
+diff -uprN kernel/kernel/module.c kernel_new/kernel/module.c
+--- kernel/kernel/module.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/module.c	2021-04-01 18:28:07.810863115 +0800
+@@ -1112,7 +1112,7 @@ bool try_module_get(struct module *modul
+ 	bool ret = true;
+ 
+ 	if (module) {
+-		preempt_disable();
++		unsigned long flags = hard_preempt_disable();
+ 		/* Note: here, we can fail to get a reference */
+ 		if (likely(module_is_live(module) &&
+ 			   atomic_inc_not_zero(&module->refcnt) != 0))
+@@ -1120,7 +1120,7 @@ bool try_module_get(struct module *modul
+ 		else
+ 			ret = false;
+ 
+-		preempt_enable();
++		hard_preempt_enable(flags);
+ 	}
+ 	return ret;
+ }
+@@ -1131,11 +1131,11 @@ void module_put(struct module *module)
+ 	int ret;
+ 
+ 	if (module) {
+-		preempt_disable();
++		unsigned long flags = hard_preempt_disable();
+ 		ret = atomic_dec_if_positive(&module->refcnt);
+ 		WARN_ON(ret < 0);	/* Failed to put refcount */
+ 		trace_module_put(module, _RET_IP_);
+-		preempt_enable();
++		hard_preempt_enable(flags);
+ 	}
+ }
+ EXPORT_SYMBOL(module_put);
+diff -uprN kernel/kernel/notifier.c kernel_new/kernel/notifier.c
+--- kernel/kernel/notifier.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/notifier.c	2021-04-01 18:28:07.810863115 +0800
+@@ -5,6 +5,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/vmalloc.h>
+ #include <linux/reboot.h>
++#include <linux/ipipe.h>
+ 
+ /*
+  *	Notifier list for kernel code which wants to be called
+@@ -195,6 +196,9 @@ NOKPROBE_SYMBOL(__atomic_notifier_call_c
+ int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+ 			       unsigned long val, void *v)
+ {
++	if (!ipipe_root_p)
++		return notifier_call_chain(&nh->head, val, v, -1, NULL);
++
+ 	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
+ }
+ EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+diff -uprN kernel/kernel/notifier.c.orig kernel_new/kernel/notifier.c.orig
+--- kernel/kernel/notifier.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/notifier.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,569 @@
++#include <linux/kdebug.h>
++#include <linux/kprobes.h>
++#include <linux/export.h>
++#include <linux/notifier.h>
++#include <linux/rcupdate.h>
++#include <linux/vmalloc.h>
++#include <linux/reboot.h>
++
++/*
++ *	Notifier list for kernel code which wants to be called
++ *	at shutdown. This is used to stop any idling DMA operations
++ *	and the like.
++ */
++BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
++
++/*
++ *	Notifier chain core routines.  The exported routines below
++ *	are layered on top of these, with appropriate locking added.
++ */
++
++static int notifier_chain_register(struct notifier_block **nl,
++		struct notifier_block *n)
++{
++	while ((*nl) != NULL) {
++		if (unlikely((*nl) == n)) {
++			WARN(1, "double register detected");
++			return 0;
++		}
++
++		if (n->priority > (*nl)->priority)
++			break;
++		nl = &((*nl)->next);
++	}
++	n->next = *nl;
++	rcu_assign_pointer(*nl, n);
++	return 0;
++}
++
++static int notifier_chain_cond_register(struct notifier_block **nl,
++		struct notifier_block *n)
++{
++	while ((*nl) != NULL) {
++		if ((*nl) == n)
++			return 0;
++		if (n->priority > (*nl)->priority)
++			break;
++		nl = &((*nl)->next);
++	}
++	n->next = *nl;
++	rcu_assign_pointer(*nl, n);
++	return 0;
++}
++
++static int notifier_chain_unregister(struct notifier_block **nl,
++		struct notifier_block *n)
++{
++	while ((*nl) != NULL) {
++		if ((*nl) == n) {
++			rcu_assign_pointer(*nl, n->next);
++			return 0;
++		}
++		nl = &((*nl)->next);
++	}
++	return -ENOENT;
++}
++
++/**
++ * notifier_call_chain - Informs the registered notifiers about an event.
++ *	@nl:		Pointer to head of the blocking notifier chain
++ *	@val:		Value passed unmodified to notifier function
++ *	@v:		Pointer passed unmodified to notifier function
++ *	@nr_to_call:	Number of notifier functions to be called. Don't care
++ *			value of this parameter is -1.
++ *	@nr_calls:	Records the number of notifications sent. Don't care
++ *			value of this field is NULL.
++ *	@returns:	notifier_call_chain returns the value returned by the
++ *			last notifier function called.
++ */
++static int notifier_call_chain(struct notifier_block **nl,
++			       unsigned long val, void *v,
++			       int nr_to_call, int *nr_calls)
++{
++	int ret = NOTIFY_DONE;
++	struct notifier_block *nb, *next_nb;
++
++	nb = rcu_dereference_raw(*nl);
++
++	while (nb && nr_to_call) {
++		next_nb = rcu_dereference_raw(nb->next);
++
++#ifdef CONFIG_DEBUG_NOTIFIERS
++		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
++			WARN(1, "Invalid notifier called!");
++			nb = next_nb;
++			continue;
++		}
++#endif
++		ret = nb->notifier_call(nb, val, v);
++
++		if (nr_calls)
++			(*nr_calls)++;
++
++		if (ret & NOTIFY_STOP_MASK)
++			break;
++		nb = next_nb;
++		nr_to_call--;
++	}
++	return ret;
++}
++NOKPROBE_SYMBOL(notifier_call_chain);
++
++/*
++ *	Atomic notifier chain routines.  Registration and unregistration
++ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
++ */
++
++/**
++ *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
++ *	@nh: Pointer to head of the atomic notifier chain
++ *	@n: New entry in notifier chain
++ *
++ *	Adds a notifier to an atomic notifier chain.
++ *
++ *	Currently always returns zero.
++ */
++int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
++		struct notifier_block *n)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&nh->lock, flags);
++	ret = notifier_chain_register(&nh->head, n);
++	spin_unlock_irqrestore(&nh->lock, flags);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
++
++/**
++ *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
++ *	@nh: Pointer to head of the atomic notifier chain
++ *	@n: Entry to remove from notifier chain
++ *
++ *	Removes a notifier from an atomic notifier chain.
++ *
++ *	Returns zero on success or %-ENOENT on failure.
++ */
++int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
++		struct notifier_block *n)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&nh->lock, flags);
++	ret = notifier_chain_unregister(&nh->head, n);
++	spin_unlock_irqrestore(&nh->lock, flags);
++	synchronize_rcu();
++	return ret;
++}
++EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
++
++/**
++ *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
++ *	@nh: Pointer to head of the atomic notifier chain
++ *	@val: Value passed unmodified to notifier function
++ *	@v: Pointer passed unmodified to notifier function
++ *	@nr_to_call: See the comment for notifier_call_chain.
++ *	@nr_calls: See the comment for notifier_call_chain.
++ *
++ *	Calls each function in a notifier chain in turn.  The functions
++ *	run in an atomic context, so they must not block.
++ *	This routine uses RCU to synchronize with changes to the chain.
++ *
++ *	If the return value of the notifier can be and'ed
++ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
++ *	will return immediately, with the return value of
++ *	the notifier function which halted execution.
++ *	Otherwise the return value is the return value
++ *	of the last notifier function called.
++ */
++int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
++				 unsigned long val, void *v,
++				 int nr_to_call, int *nr_calls)
++{
++	int ret;
++
++	rcu_read_lock();
++	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
++	rcu_read_unlock();
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
++NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
++
++int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
++			       unsigned long val, void *v)
++{
++	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
++}
++EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
++NOKPROBE_SYMBOL(atomic_notifier_call_chain);
++
++/*
++ *	Blocking notifier chain routines.  All access to the chain is
++ *	synchronized by an rwsem.
++ */
++
++/**
++ *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
++ *	@nh: Pointer to head of the blocking notifier chain
++ *	@n: New entry in notifier chain
++ *
++ *	Adds a notifier to a blocking notifier chain.
++ *	Must be called in process context.
++ *
++ *	Currently always returns zero.
++ */
++int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
++		struct notifier_block *n)
++{
++	int ret;
++
++	/*
++	 * This code gets used during boot-up, when task switching is
++	 * not yet working and interrupts must remain disabled.  At
++	 * such times we must not call down_write().
++	 */
++	if (unlikely(system_state == SYSTEM_BOOTING))
++		return notifier_chain_register(&nh->head, n);
++
++	down_write(&nh->rwsem);
++	ret = notifier_chain_register(&nh->head, n);
++	up_write(&nh->rwsem);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
++
++/**
++ *	blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain
++ *	@nh: Pointer to head of the blocking notifier chain
++ *	@n: New entry in notifier chain
++ *
++ *	Adds a notifier to a blocking notifier chain, only if not already
++ *	present in the chain.
++ *	Must be called in process context.
++ *
++ *	Currently always returns zero.
++ */
++int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh,
++		struct notifier_block *n)
++{
++	int ret;
++
++	down_write(&nh->rwsem);
++	ret = notifier_chain_cond_register(&nh->head, n);
++	up_write(&nh->rwsem);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register);
++
++/**
++ *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
++ *	@nh: Pointer to head of the blocking notifier chain
++ *	@n: Entry to remove from notifier chain
++ *
++ *	Removes a notifier from a blocking notifier chain.
++ *	Must be called from process context.
++ *
++ *	Returns zero on success or %-ENOENT on failure.
++ */
++int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
++		struct notifier_block *n)
++{
++	int ret;
++
++	/*
++	 * This code gets used during boot-up, when task switching is
++	 * not yet working and interrupts must remain disabled.  At
++	 * such times we must not call down_write().
++	 */
++	if (unlikely(system_state == SYSTEM_BOOTING))
++		return notifier_chain_unregister(&nh->head, n);
++
++	down_write(&nh->rwsem);
++	ret = notifier_chain_unregister(&nh->head, n);
++	up_write(&nh->rwsem);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
++
++/**
++ *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
++ *	@nh: Pointer to head of the blocking notifier chain
++ *	@val: Value passed unmodified to notifier function
++ *	@v: Pointer passed unmodified to notifier function
++ *	@nr_to_call: See comment for notifier_call_chain.
++ *	@nr_calls: See comment for notifier_call_chain.
++ *
++ *	Calls each function in a notifier chain in turn.  The functions
++ *	run in a process context, so they are allowed to block.
++ *
++ *	If the return value of the notifier can be and'ed
++ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
++ *	will return immediately, with the return value of
++ *	the notifier function which halted execution.
++ *	Otherwise the return value is the return value
++ *	of the last notifier function called.
++ */
++int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
++				   unsigned long val, void *v,
++				   int nr_to_call, int *nr_calls)
++{
++	int ret = NOTIFY_DONE;
++
++	/*
++	 * We check the head outside the lock, but if this access is
++	 * racy then it does not matter what the result of the test
++	 * is, we re-check the list after having taken the lock anyway:
++	 */
++	if (rcu_access_pointer(nh->head)) {
++		down_read(&nh->rwsem);
++		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
++					nr_calls);
++		up_read(&nh->rwsem);
++	}
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
++
++int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
++		unsigned long val, void *v)
++{
++	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
++}
++EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
++
++/*
++ *	Raw notifier chain routines.  There is no protection;
++ *	the caller must provide it.  Use at your own risk!
++ */
++
++/**
++ *	raw_notifier_chain_register - Add notifier to a raw notifier chain
++ *	@nh: Pointer to head of the raw notifier chain
++ *	@n: New entry in notifier chain
++ *
++ *	Adds a notifier to a raw notifier chain.
++ *	All locking must be provided by the caller.
++ *
++ *	Currently always returns zero.
++ */
++int raw_notifier_chain_register(struct raw_notifier_head *nh,
++		struct notifier_block *n)
++{
++	return notifier_chain_register(&nh->head, n);
++}
++EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
++
++/**
++ *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
++ *	@nh: Pointer to head of the raw notifier chain
++ *	@n: Entry to remove from notifier chain
++ *
++ *	Removes a notifier from a raw notifier chain.
++ *	All locking must be provided by the caller.
++ *
++ *	Returns zero on success or %-ENOENT on failure.
++ */
++int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
++		struct notifier_block *n)
++{
++	return notifier_chain_unregister(&nh->head, n);
++}
++EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
++
++/**
++ *	__raw_notifier_call_chain - Call functions in a raw notifier chain
++ *	@nh: Pointer to head of the raw notifier chain
++ *	@val: Value passed unmodified to notifier function
++ *	@v: Pointer passed unmodified to notifier function
++ *	@nr_to_call: See comment for notifier_call_chain.
++ *	@nr_calls: See comment for notifier_call_chain
++ *
++ *	Calls each function in a notifier chain in turn.  The functions
++ *	run in an undefined context.
++ *	All locking must be provided by the caller.
++ *
++ *	If the return value of the notifier can be and'ed
++ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
++ *	will return immediately, with the return value of
++ *	the notifier function which halted execution.
++ *	Otherwise the return value is the return value
++ *	of the last notifier function called.
++ */
++int __raw_notifier_call_chain(struct raw_notifier_head *nh,
++			      unsigned long val, void *v,
++			      int nr_to_call, int *nr_calls)
++{
++	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
++}
++EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
++
++int raw_notifier_call_chain(struct raw_notifier_head *nh,
++		unsigned long val, void *v)
++{
++	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
++}
++EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
++
++#ifdef CONFIG_SRCU
++/*
++ *	SRCU notifier chain routines.    Registration and unregistration
++ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
++ */
++
++/**
++ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
++ *	@nh: Pointer to head of the SRCU notifier chain
++ *	@n: New entry in notifier chain
++ *
++ *	Adds a notifier to an SRCU notifier chain.
++ *	Must be called in process context.
++ *
++ *	Currently always returns zero.
++ */
++int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
++		struct notifier_block *n)
++{
++	int ret;
++
++	/*
++	 * This code gets used during boot-up, when task switching is
++	 * not yet working and interrupts must remain disabled.  At
++	 * such times we must not call mutex_lock().
++	 */
++	if (unlikely(system_state == SYSTEM_BOOTING))
++		return notifier_chain_register(&nh->head, n);
++
++	mutex_lock(&nh->mutex);
++	ret = notifier_chain_register(&nh->head, n);
++	mutex_unlock(&nh->mutex);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
++
++/**
++ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
++ *	@nh: Pointer to head of the SRCU notifier chain
++ *	@n: Entry to remove from notifier chain
++ *
++ *	Removes a notifier from an SRCU notifier chain.
++ *	Must be called from process context.
++ *
++ *	Returns zero on success or %-ENOENT on failure.
++ */
++int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
++		struct notifier_block *n)
++{
++	int ret;
++
++	/*
++	 * This code gets used during boot-up, when task switching is
++	 * not yet working and interrupts must remain disabled.  At
++	 * such times we must not call mutex_lock().
++	 */
++	if (unlikely(system_state == SYSTEM_BOOTING))
++		return notifier_chain_unregister(&nh->head, n);
++
++	mutex_lock(&nh->mutex);
++	ret = notifier_chain_unregister(&nh->head, n);
++	mutex_unlock(&nh->mutex);
++	synchronize_srcu(&nh->srcu);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
++
++/**
++ *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
++ *	@nh: Pointer to head of the SRCU notifier chain
++ *	@val: Value passed unmodified to notifier function
++ *	@v: Pointer passed unmodified to notifier function
++ *	@nr_to_call: See comment for notifier_call_chain.
++ *	@nr_calls: See comment for notifier_call_chain
++ *
++ *	Calls each function in a notifier chain in turn.  The functions
++ *	run in a process context, so they are allowed to block.
++ *
++ *	If the return value of the notifier can be and'ed
++ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
++ *	will return immediately, with the return value of
++ *	the notifier function which halted execution.
++ *	Otherwise the return value is the return value
++ *	of the last notifier function called.
++ */
++int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
++			       unsigned long val, void *v,
++			       int nr_to_call, int *nr_calls)
++{
++	int ret;
++	int idx;
++
++	idx = srcu_read_lock(&nh->srcu);
++	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
++	srcu_read_unlock(&nh->srcu, idx);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
++
++int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
++		unsigned long val, void *v)
++{
++	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
++}
++EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
++
++/**
++ *	srcu_init_notifier_head - Initialize an SRCU notifier head
++ *	@nh: Pointer to head of the srcu notifier chain
++ *
++ *	Unlike other sorts of notifier heads, SRCU notifier heads require
++ *	dynamic initialization.  Be sure to call this routine before
++ *	calling any of the other SRCU notifier routines for this head.
++ *
++ *	If an SRCU notifier head is deallocated, it must first be cleaned
++ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
++ *	per-cpu data (used by the SRCU mechanism) will leak.
++ */
++void srcu_init_notifier_head(struct srcu_notifier_head *nh)
++{
++	mutex_init(&nh->mutex);
++	if (init_srcu_struct(&nh->srcu) < 0)
++		BUG();
++	nh->head = NULL;
++}
++EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
++
++#endif /* CONFIG_SRCU */
++
++static ATOMIC_NOTIFIER_HEAD(die_chain);
++
++int notrace notify_die(enum die_val val, const char *str,
++	       struct pt_regs *regs, long err, int trap, int sig)
++{
++	struct die_args args = {
++		.regs	= regs,
++		.str	= str,
++		.err	= err,
++		.trapnr	= trap,
++		.signr	= sig,
++
++	};
++	RCU_LOCKDEP_WARN(!rcu_is_watching(),
++			   "notify_die called but RCU thinks we're quiescent");
++	return atomic_notifier_call_chain(&die_chain, val, &args);
++}
++NOKPROBE_SYMBOL(notify_die);
++
++int register_die_notifier(struct notifier_block *nb)
++{
++	vmalloc_sync_mappings();
++	return atomic_notifier_chain_register(&die_chain, nb);
++}
++EXPORT_SYMBOL_GPL(register_die_notifier);
++
++int unregister_die_notifier(struct notifier_block *nb)
++{
++	return atomic_notifier_chain_unregister(&die_chain, nb);
++}
++EXPORT_SYMBOL_GPL(unregister_die_notifier);
+diff -uprN kernel/kernel/panic.c kernel_new/kernel/panic.c
+--- kernel/kernel/panic.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/panic.c	2021-04-01 18:28:07.810863115 +0800
+@@ -20,8 +20,10 @@
+ #include <linux/ftrace.h>
+ #include <linux/reboot.h>
+ #include <linux/delay.h>
++#include <linux/ipipe_trace.h>
+ #include <linux/kexec.h>
+ #include <linux/sched.h>
++#include <linux/ipipe.h>
+ #include <linux/sysrq.h>
+ #include <linux/init.h>
+ #include <linux/nmi.h>
+@@ -523,6 +525,8 @@ void oops_enter(void)
+ {
+ 	tracing_off();
+ 	/* can't trust the integrity of the kernel anymore: */
++	ipipe_trace_panic_freeze();
++	ipipe_disable_context_check();
+ 	debug_locks_off();
+ 	do_oops_enter_exit();
+ }
+diff -uprN kernel/kernel/panic.c.orig kernel_new/kernel/panic.c.orig
+--- kernel/kernel/panic.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/panic.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,725 @@
++/*
++ *  linux/kernel/panic.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ */
++
++/*
++ * This function is used through-out the kernel (including mm and fs)
++ * to indicate a major problem.
++ */
++#include <linux/debug_locks.h>
++#include <linux/sched/debug.h>
++#include <linux/interrupt.h>
++#include <linux/kmsg_dump.h>
++#include <linux/kallsyms.h>
++#include <linux/notifier.h>
++#include <linux/vt_kern.h>
++#include <linux/module.h>
++#include <linux/random.h>
++#include <linux/ftrace.h>
++#include <linux/reboot.h>
++#include <linux/delay.h>
++#include <linux/kexec.h>
++#include <linux/sched.h>
++#include <linux/sysrq.h>
++#include <linux/init.h>
++#include <linux/nmi.h>
++#include <linux/console.h>
++#include <linux/bug.h>
++#include <linux/ratelimit.h>
++#include <linux/debugfs.h>
++#include <asm/sections.h>
++
++#define PANIC_TIMER_STEP 100
++#define PANIC_BLINK_SPD 18
++
++int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
++static unsigned long tainted_mask =
++	IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
++static int pause_on_oops;
++static int pause_on_oops_flag;
++static DEFINE_SPINLOCK(pause_on_oops_lock);
++bool crash_kexec_post_notifiers;
++int panic_on_warn __read_mostly;
++
++int panic_timeout = CONFIG_PANIC_TIMEOUT;
++EXPORT_SYMBOL_GPL(panic_timeout);
++
++#define PANIC_PRINT_TASK_INFO		0x00000001
++#define PANIC_PRINT_MEM_INFO		0x00000002
++#define PANIC_PRINT_TIMER_INFO		0x00000004
++#define PANIC_PRINT_LOCK_INFO		0x00000008
++#define PANIC_PRINT_FTRACE_INFO		0x00000010
++unsigned long panic_print;
++
++ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
++
++EXPORT_SYMBOL(panic_notifier_list);
++
++static long no_blink(int state)
++{
++	return 0;
++}
++
++/* Returns how long it waited in ms */
++long (*panic_blink)(int state);
++EXPORT_SYMBOL(panic_blink);
++
++/*
++ * Stop ourself in panic -- architecture code may override this
++ */
++void __weak panic_smp_self_stop(void)
++{
++	while (1)
++		cpu_relax();
++}
++
++/*
++ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
++ * may override this to prepare for crash dumping, e.g. save regs info.
++ */
++void __weak nmi_panic_self_stop(struct pt_regs *regs)
++{
++	panic_smp_self_stop();
++}
++
++/*
++ * Stop other CPUs in panic.  Architecture dependent code may override this
++ * with more suitable version.  For example, if the architecture supports
++ * crash dump, it should save registers of each stopped CPU and disable
++ * per-CPU features such as virtualization extensions.
++ */
++void __weak crash_smp_send_stop(void)
++{
++	static int cpus_stopped;
++
++	/*
++	 * This function can be called twice in panic path, but obviously
++	 * we execute this only once.
++	 */
++	if (cpus_stopped)
++		return;
++
++	/*
++	 * Note smp_send_stop is the usual smp shutdown function, which
++	 * unfortunately means it may not be hardened to work in a panic
++	 * situation.
++	 */
++	smp_send_stop();
++	cpus_stopped = 1;
++}
++
++atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
++
++/*
++ * A variant of panic() called from NMI context. We return if we've already
++ * panicked on this CPU. If another CPU already panicked, loop in
++ * nmi_panic_self_stop() which can provide architecture dependent code such
++ * as saving register state for crash dump.
++ */
++void nmi_panic(struct pt_regs *regs, const char *msg)
++{
++	int old_cpu, cpu;
++
++	cpu = raw_smp_processor_id();
++	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
++
++	if (old_cpu == PANIC_CPU_INVALID)
++		panic("%s", msg);
++	else if (old_cpu != cpu)
++		nmi_panic_self_stop(regs);
++}
++EXPORT_SYMBOL(nmi_panic);
++
++static void panic_print_sys_info(void)
++{
++	if (panic_print & PANIC_PRINT_TASK_INFO)
++		show_state();
++
++	if (panic_print & PANIC_PRINT_MEM_INFO)
++		show_mem(0, NULL);
++
++	if (panic_print & PANIC_PRINT_TIMER_INFO)
++		sysrq_timer_list_show();
++
++	if (panic_print & PANIC_PRINT_LOCK_INFO)
++		debug_show_all_locks();
++
++	if (panic_print & PANIC_PRINT_FTRACE_INFO)
++		ftrace_dump(DUMP_ALL);
++}
++
++/**
++ *	panic - halt the system
++ *	@fmt: The text string to print
++ *
++ *	Display a message, then perform cleanups.
++ *
++ *	This function never returns.
++ */
++void panic(const char *fmt, ...)
++{
++	static char buf[1024];
++	va_list args;
++	long i, i_next = 0;
++	int state = 0;
++	int old_cpu, this_cpu;
++	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
++
++	/*
++	 * Disable local interrupts. This will prevent panic_smp_self_stop
++	 * from deadlocking the first cpu that invokes the panic, since
++	 * there is nothing to prevent an interrupt handler (that runs
++	 * after setting panic_cpu) from invoking panic() again.
++	 */
++	local_irq_disable();
++	preempt_disable_notrace();
++
++	/*
++	 * It's possible to come here directly from a panic-assertion and
++	 * not have preempt disabled. Some functions called from here want
++	 * preempt to be disabled. No point enabling it later though...
++	 *
++	 * Only one CPU is allowed to execute the panic code from here. For
++	 * multiple parallel invocations of panic, all other CPUs either
++	 * stop themself or will wait until they are stopped by the 1st CPU
++	 * with smp_send_stop().
++	 *
++	 * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
++	 * comes here, so go ahead.
++	 * `old_cpu == this_cpu' means we came from nmi_panic() which sets
++	 * panic_cpu to this CPU.  In this case, this is also the 1st CPU.
++	 */
++	this_cpu = raw_smp_processor_id();
++	old_cpu  = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
++
++	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
++		panic_smp_self_stop();
++
++	console_verbose();
++	bust_spinlocks(1);
++	va_start(args, fmt);
++	vsnprintf(buf, sizeof(buf), fmt, args);
++	va_end(args);
++	pr_emerg("Kernel panic - not syncing: %s\n", buf);
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++	/*
++	 * Avoid nested stack-dumping if a panic occurs during oops processing
++	 */
++	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
++		dump_stack();
++#endif
++
++	/*
++	 * If we have crashed and we have a crash kernel loaded let it handle
++	 * everything else.
++	 * If we want to run this after calling panic_notifiers, pass
++	 * the "crash_kexec_post_notifiers" option to the kernel.
++	 *
++	 * Bypass the panic_cpu check and call __crash_kexec directly.
++	 */
++	if (!_crash_kexec_post_notifiers) {
++		printk_safe_flush_on_panic();
++		__crash_kexec(NULL);
++
++		/*
++		 * Note smp_send_stop is the usual smp shutdown function, which
++		 * unfortunately means it may not be hardened to work in a
++		 * panic situation.
++		 */
++		smp_send_stop();
++	} else {
++		/*
++		 * If we want to do crash dump after notifier calls and
++		 * kmsg_dump, we will need architecture dependent extra
++		 * works in addition to stopping other CPUs.
++		 */
++		crash_smp_send_stop();
++	}
++
++	/*
++	 * ZAP console related locks when nmi broadcast. If a crash is occurring,
++	 * make sure we can't deadlock. And make sure that we print immediately.
++	 *
++	 * A deadlock caused by logbuf_lock can be occured when panic:
++	 *	a) Panic CPU is running in non-NMI context;
++	 *	b) Panic CPU sends out shutdown IPI via NMI vector;
++	 *      c) One of the CPUs that we bring down via NMI vector holded logbuf_lock;
++	 *	d) Panic CPU try to hold logbuf_lock, then deadlock occurs.
++	 *
++	 * At present, only try to solve this problem for the ARCH with NMI,
++	 * by reinit lock, this situation is more complicated when NMI is not
++	 * used.
++	 * 1).	Non-stopped CPUs are in unknown state, most likely in a busy loop.
++	 *	Nobody knows whether printk() is repeatedly called in the loop.
++	 *	When it was called, re-initializing any lock would cause double
++	 *      unlock and deadlock.
++	 *
++	 * 2).	It would be possible to add some more hacks. One problem is that
++	 *	there are two groups of users. One prefer to risk a deadlock and
++	 *	have a chance to see the messages. Others prefer to always
++	 *      reach emergency_restart() and reboot the machine.
++	 */
++#ifdef CONFIG_X86
++	zap_locks();
++#endif
++
++	/*
++	 * Run any panic handlers, including those that might need to
++	 * add information to the kmsg dump output.
++	 */
++	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
++
++	/* Call flush even twice. It tries harder with a single online CPU */
++	printk_safe_flush_on_panic();
++	kmsg_dump(KMSG_DUMP_PANIC);
++
++	/*
++	 * If you doubt kdump always works fine in any situation,
++	 * "crash_kexec_post_notifiers" offers you a chance to run
++	 * panic_notifiers and dumping kmsg before kdump.
++	 * Note: since some panic_notifiers can make crashed kernel
++	 * more unstable, it can increase risks of the kdump failure too.
++	 *
++	 * Bypass the panic_cpu check and call __crash_kexec directly.
++	 */
++	if (_crash_kexec_post_notifiers)
++		__crash_kexec(NULL);
++
++#ifdef CONFIG_VT
++	unblank_screen();
++#endif
++	console_unblank();
++
++	/*
++	 * We may have ended up stopping the CPU holding the lock (in
++	 * smp_send_stop()) while still having some valuable data in the console
++	 * buffer.  Try to acquire the lock then release it regardless of the
++	 * result.  The release will also print the buffers out.  Locks debug
++	 * should be disabled to avoid reporting bad unlock balance when
++	 * panic() is not being callled from OOPS.
++	 */
++	debug_locks_off();
++	console_flush_on_panic();
++
++	panic_print_sys_info();
++
++	if (!panic_blink)
++		panic_blink = no_blink;
++
++	if (panic_timeout > 0) {
++		/*
++		 * Delay timeout seconds before rebooting the machine.
++		 * We can't use the "normal" timers since we just panicked.
++		 */
++		pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
++
++		for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
++			touch_nmi_watchdog();
++			if (i >= i_next) {
++				i += panic_blink(state ^= 1);
++				i_next = i + 3600 / PANIC_BLINK_SPD;
++			}
++			mdelay(PANIC_TIMER_STEP);
++		}
++	}
++	if (panic_timeout != 0) {
++		/*
++		 * This will not be a clean reboot, with everything
++		 * shutting down.  But if there is a chance of
++		 * rebooting the system it will be rebooted.
++		 */
++		emergency_restart();
++	}
++#ifdef __sparc__
++	{
++		extern int stop_a_enabled;
++		/* Make sure the user can actually press Stop-A (L1-A) */
++		stop_a_enabled = 1;
++		pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
++			 "twice on console to return to the boot prom\n");
++	}
++#endif
++#if defined(CONFIG_S390)
++	{
++		unsigned long caller;
++
++		caller = (unsigned long)__builtin_return_address(0);
++		disabled_wait(caller);
++	}
++#endif
++	pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
++	local_irq_enable();
++	for (i = 0; ; i += PANIC_TIMER_STEP) {
++		touch_softlockup_watchdog();
++		if (i >= i_next) {
++			i += panic_blink(state ^= 1);
++			i_next = i + 3600 / PANIC_BLINK_SPD;
++		}
++		mdelay(PANIC_TIMER_STEP);
++	}
++}
++
++EXPORT_SYMBOL(panic);
++
++/*
++ * TAINT_FORCED_RMMOD could be a per-module flag but the module
++ * is being removed anyway.
++ */
++const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
++	[ TAINT_PROPRIETARY_MODULE ]	= { 'P', 'G', true },
++	[ TAINT_FORCED_MODULE ]		= { 'F', ' ', true },
++	[ TAINT_CPU_OUT_OF_SPEC ]	= { 'S', ' ', false },
++	[ TAINT_FORCED_RMMOD ]		= { 'R', ' ', false },
++	[ TAINT_MACHINE_CHECK ]		= { 'M', ' ', false },
++	[ TAINT_BAD_PAGE ]		= { 'B', ' ', false },
++	[ TAINT_USER ]			= { 'U', ' ', false },
++	[ TAINT_DIE ]			= { 'D', ' ', false },
++	[ TAINT_OVERRIDDEN_ACPI_TABLE ]	= { 'A', ' ', false },
++	[ TAINT_WARN ]			= { 'W', ' ', false },
++	[ TAINT_CRAP ]			= { 'C', ' ', true },
++	[ TAINT_FIRMWARE_WORKAROUND ]	= { 'I', ' ', false },
++	[ TAINT_OOT_MODULE ]		= { 'O', ' ', true },
++	[ TAINT_UNSIGNED_MODULE ]	= { 'E', ' ', true },
++	[ TAINT_SOFTLOCKUP ]		= { 'L', ' ', false },
++	[ TAINT_LIVEPATCH ]		= { 'K', ' ', true },
++	[ TAINT_AUX ]			= { 'X', ' ', true },
++	[ TAINT_RANDSTRUCT ]		= { 'T', ' ', true },
++};
++
++/**
++ * print_tainted - return a string to represent the kernel taint state.
++ *
++ * For individual taint flag meanings, see Documentation/sysctl/kernel.txt
++ *
++ * The string is overwritten by the next call to print_tainted(),
++ * but is always NULL terminated.
++ */
++const char *print_tainted(void)
++{
++	static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
++
++	BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
++
++	if (tainted_mask) {
++		char *s;
++		int i;
++
++		s = buf + sprintf(buf, "Tainted: ");
++		for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
++			const struct taint_flag *t = &taint_flags[i];
++			*s++ = test_bit(i, &tainted_mask) ?
++					t->c_true : t->c_false;
++		}
++		*s = 0;
++	} else
++		snprintf(buf, sizeof(buf), "Not tainted");
++
++	return buf;
++}
++
++int test_taint(unsigned flag)
++{
++	return test_bit(flag, &tainted_mask);
++}
++EXPORT_SYMBOL(test_taint);
++
++unsigned long get_taint(void)
++{
++	return tainted_mask;
++}
++
++/**
++ * add_taint: add a taint flag if not already set.
++ * @flag: one of the TAINT_* constants.
++ * @lockdep_ok: whether lock debugging is still OK.
++ *
++ * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for
++ * some notewortht-but-not-corrupting cases, it can be set to true.
++ */
++void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
++{
++	if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
++		pr_warn("Disabling lock debugging due to kernel taint\n");
++
++	set_bit(flag, &tainted_mask);
++}
++EXPORT_SYMBOL(add_taint);
++
++static void spin_msec(int msecs)
++{
++	int i;
++
++	for (i = 0; i < msecs; i++) {
++		touch_nmi_watchdog();
++		mdelay(1);
++	}
++}
++
++/*
++ * It just happens that oops_enter() and oops_exit() are identically
++ * implemented...
++ */
++static void do_oops_enter_exit(void)
++{
++	unsigned long flags;
++	static int spin_counter;
++
++	if (!pause_on_oops)
++		return;
++
++	spin_lock_irqsave(&pause_on_oops_lock, flags);
++	if (pause_on_oops_flag == 0) {
++		/* This CPU may now print the oops message */
++		pause_on_oops_flag = 1;
++	} else {
++		/* We need to stall this CPU */
++		if (!spin_counter) {
++			/* This CPU gets to do the counting */
++			spin_counter = pause_on_oops;
++			do {
++				spin_unlock(&pause_on_oops_lock);
++				spin_msec(MSEC_PER_SEC);
++				spin_lock(&pause_on_oops_lock);
++			} while (--spin_counter);
++			pause_on_oops_flag = 0;
++		} else {
++			/* This CPU waits for a different one */
++			while (spin_counter) {
++				spin_unlock(&pause_on_oops_lock);
++				spin_msec(1);
++				spin_lock(&pause_on_oops_lock);
++			}
++		}
++	}
++	spin_unlock_irqrestore(&pause_on_oops_lock, flags);
++}
++
++/*
++ * Return true if the calling CPU is allowed to print oops-related info.
++ * This is a bit racy..
++ */
++int oops_may_print(void)
++{
++	return pause_on_oops_flag == 0;
++}
++
++/*
++ * Called when the architecture enters its oops handler, before it prints
++ * anything.  If this is the first CPU to oops, and it's oopsing the first
++ * time then let it proceed.
++ *
++ * This is all enabled by the pause_on_oops kernel boot option.  We do all
++ * this to ensure that oopses don't scroll off the screen.  It has the
++ * side-effect of preventing later-oopsing CPUs from mucking up the display,
++ * too.
++ *
++ * It turns out that the CPU which is allowed to print ends up pausing for
++ * the right duration, whereas all the other CPUs pause for twice as long:
++ * once in oops_enter(), once in oops_exit().
++ */
++void oops_enter(void)
++{
++	tracing_off();
++	/* can't trust the integrity of the kernel anymore: */
++	debug_locks_off();
++	do_oops_enter_exit();
++}
++
++/*
++ * 64-bit random ID for oopses:
++ */
++static u64 oops_id;
++
++static int init_oops_id(void)
++{
++	if (!oops_id)
++		get_random_bytes(&oops_id, sizeof(oops_id));
++	else
++		oops_id++;
++
++	return 0;
++}
++late_initcall(init_oops_id);
++
++void print_oops_end_marker(void)
++{
++	init_oops_id();
++	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
++}
++
++/*
++ * Called when the architecture exits its oops handler, after printing
++ * everything.
++ */
++void oops_exit(void)
++{
++	do_oops_enter_exit();
++	print_oops_end_marker();
++	kmsg_dump(KMSG_DUMP_OOPS);
++}
++
++struct warn_args {
++	const char *fmt;
++	va_list args;
++};
++
++void __warn(const char *file, int line, void *caller, unsigned taint,
++	    struct pt_regs *regs, struct warn_args *args)
++{
++	disable_trace_on_warning();
++
++	if (args)
++		pr_warn(CUT_HERE);
++
++	if (file)
++		pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
++			raw_smp_processor_id(), current->pid, file, line,
++			caller);
++	else
++		pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
++			raw_smp_processor_id(), current->pid, caller);
++
++	if (args)
++		vprintk(args->fmt, args->args);
++
++	if (panic_on_warn) {
++		/*
++		 * This thread may hit another WARN() in the panic path.
++		 * Resetting this prevents additional WARN() from panicking the
++		 * system on this thread.  Other threads are blocked by the
++		 * panic_mutex in panic().
++		 */
++		panic_on_warn = 0;
++		panic("panic_on_warn set ...\n");
++	}
++
++	print_modules();
++
++	if (regs)
++		show_regs(regs);
++	else
++		dump_stack();
++
++	print_irqtrace_events(current);
++
++	print_oops_end_marker();
++
++	/* Just a warning, don't kill lockdep. */
++	add_taint(taint, LOCKDEP_STILL_OK);
++}
++
++#ifdef WANT_WARN_ON_SLOWPATH
++void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
++{
++	struct warn_args args;
++
++	args.fmt = fmt;
++	va_start(args.args, fmt);
++	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
++	       &args);
++	va_end(args.args);
++}
++EXPORT_SYMBOL(warn_slowpath_fmt);
++
++void warn_slowpath_fmt_taint(const char *file, int line,
++			     unsigned taint, const char *fmt, ...)
++{
++	struct warn_args args;
++
++	args.fmt = fmt;
++	va_start(args.args, fmt);
++	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
++	va_end(args.args);
++}
++EXPORT_SYMBOL(warn_slowpath_fmt_taint);
++
++void warn_slowpath_null(const char *file, int line)
++{
++	pr_warn(CUT_HERE);
++	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
++}
++EXPORT_SYMBOL(warn_slowpath_null);
++#else
++void __warn_printk(const char *fmt, ...)
++{
++	va_list args;
++
++	pr_warn(CUT_HERE);
++
++	va_start(args, fmt);
++	vprintk(fmt, args);
++	va_end(args);
++}
++EXPORT_SYMBOL(__warn_printk);
++#endif
++
++#ifdef CONFIG_BUG
++
++/* Support resetting WARN*_ONCE state */
++
++static int clear_warn_once_set(void *data, u64 val)
++{
++	generic_bug_clear_once();
++	memset(__start_once, 0, __end_once - __start_once);
++	return 0;
++}
++
++DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops,
++			NULL,
++			clear_warn_once_set,
++			"%lld\n");
++
++static __init int register_warn_debugfs(void)
++{
++	/* Don't care about failure */
++	debugfs_create_file("clear_warn_once", 0200, NULL,
++			    NULL, &clear_warn_once_fops);
++	return 0;
++}
++
++device_initcall(register_warn_debugfs);
++#endif
++
++#ifdef CONFIG_STACKPROTECTOR
++
++/*
++ * Called when gcc's -fstack-protector feature is used, and
++ * gcc detects corruption of the on-stack canary value
++ */
++__visible void __stack_chk_fail(void)
++{
++	panic("stack-protector: Kernel stack is corrupted in: %pB",
++		__builtin_return_address(0));
++}
++EXPORT_SYMBOL(__stack_chk_fail);
++
++#endif
++
++#ifdef CONFIG_ARCH_HAS_REFCOUNT
++void refcount_error_report(struct pt_regs *regs, const char *err)
++{
++	WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
++		err, (void *)instruction_pointer(regs),
++		current->comm, task_pid_nr(current),
++		from_kuid_munged(&init_user_ns, current_uid()),
++		from_kuid_munged(&init_user_ns, current_euid()));
++}
++#endif
++
++core_param(panic, panic_timeout, int, 0644);
++core_param(panic_print, panic_print, ulong, 0644);
++core_param(pause_on_oops, pause_on_oops, int, 0644);
++core_param(panic_on_warn, panic_on_warn, int, 0644);
++core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
++
++static int __init oops_setup(char *s)
++{
++	if (!s)
++		return -EINVAL;
++	if (!strcmp(s, "panic"))
++		panic_on_oops = 1;
++	return 0;
++}
++early_param("oops", oops_setup);
+diff -uprN kernel/kernel/power/hibernate.c kernel_new/kernel/power/hibernate.c
+--- kernel/kernel/power/hibernate.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/power/hibernate.c	2021-04-01 18:28:07.811863114 +0800
+@@ -300,6 +300,7 @@ static int create_image(int platform_mod
+ 		goto Enable_cpus;
+ 
+ 	local_irq_disable();
++	hard_cond_local_irq_disable();
+ 
+ 	system_state = SYSTEM_SUSPEND;
+ 
+@@ -467,6 +468,7 @@ static int resume_target_kernel(bool pla
+ 
+ 	local_irq_disable();
+ 	system_state = SYSTEM_SUSPEND;
++	hard_cond_local_irq_disable();
+ 
+ 	error = syscore_suspend();
+ 	if (error)
+@@ -588,6 +590,7 @@ int hibernation_platform_enter(void)
+ 
+ 	local_irq_disable();
+ 	system_state = SYSTEM_SUSPEND;
++	hard_cond_local_irq_disable();
+ 	syscore_suspend();
+ 	if (pm_wakeup_pending()) {
+ 		error = -EAGAIN;
+diff -uprN kernel/kernel/power/hibernate.c.orig kernel_new/kernel/power/hibernate.c.orig
+--- kernel/kernel/power/hibernate.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/power/hibernate.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,1254 @@
++/*
++ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
++ *
++ * Copyright (c) 2003 Patrick Mochel
++ * Copyright (c) 2003 Open Source Development Lab
++ * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz>
++ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
++ * Copyright (C) 2012 Bojan Smojver <bojan@rexursive.com>
++ *
++ * This file is released under the GPLv2.
++ */
++
++#define pr_fmt(fmt) "PM: " fmt
++
++#include <linux/export.h>
++#include <linux/suspend.h>
++#include <linux/syscalls.h>
++#include <linux/reboot.h>
++#include <linux/string.h>
++#include <linux/device.h>
++#include <linux/async.h>
++#include <linux/delay.h>
++#include <linux/fs.h>
++#include <linux/mount.h>
++#include <linux/pm.h>
++#include <linux/nmi.h>
++#include <linux/console.h>
++#include <linux/cpu.h>
++#include <linux/freezer.h>
++#include <linux/gfp.h>
++#include <linux/syscore_ops.h>
++#include <linux/ctype.h>
++#include <linux/genhd.h>
++#include <linux/ktime.h>
++#include <trace/events/power.h>
++
++#include "power.h"
++
++
++static int nocompress;
++static int noresume;
++static int nohibernate;
++static int resume_wait;
++static unsigned int resume_delay;
++static char resume_file[256] = CONFIG_PM_STD_PARTITION;
++dev_t swsusp_resume_device;
++sector_t swsusp_resume_block;
++__visible int in_suspend __nosavedata;
++
++enum {
++	HIBERNATION_INVALID,
++	HIBERNATION_PLATFORM,
++	HIBERNATION_SHUTDOWN,
++	HIBERNATION_REBOOT,
++#ifdef CONFIG_SUSPEND
++	HIBERNATION_SUSPEND,
++#endif
++	HIBERNATION_TEST_RESUME,
++	/* keep last */
++	__HIBERNATION_AFTER_LAST
++};
++#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
++#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
++
++static int hibernation_mode = HIBERNATION_SHUTDOWN;
++
++bool freezer_test_done;
++
++static const struct platform_hibernation_ops *hibernation_ops;
++
++bool hibernation_available(void)
++{
++	return (nohibernate == 0);
++}
++
++/**
++ * hibernation_set_ops - Set the global hibernate operations.
++ * @ops: Hibernation operations to use in subsequent hibernation transitions.
++ */
++void hibernation_set_ops(const struct platform_hibernation_ops *ops)
++{
++	if (ops && !(ops->begin && ops->end &&  ops->pre_snapshot
++	    && ops->prepare && ops->finish && ops->enter && ops->pre_restore
++	    && ops->restore_cleanup && ops->leave)) {
++		WARN_ON(1);
++		return;
++	}
++	lock_system_sleep();
++	hibernation_ops = ops;
++	if (ops)
++		hibernation_mode = HIBERNATION_PLATFORM;
++	else if (hibernation_mode == HIBERNATION_PLATFORM)
++		hibernation_mode = HIBERNATION_SHUTDOWN;
++
++	unlock_system_sleep();
++}
++EXPORT_SYMBOL_GPL(hibernation_set_ops);
++
++static bool entering_platform_hibernation;
++
++bool system_entering_hibernation(void)
++{
++	return entering_platform_hibernation;
++}
++EXPORT_SYMBOL(system_entering_hibernation);
++
++/* To let some devices or syscore know if system carrying out hibernation*/
++static bool carry_out_hibernation;
++
++bool system_in_hibernation(void)
++{
++	return carry_out_hibernation;
++}
++EXPORT_SYMBOL(system_in_hibernation);
++
++#ifdef CONFIG_PM_DEBUG
++static void hibernation_debug_sleep(void)
++{
++	pr_info("hibernation debug: Waiting for 5 seconds.\n");
++	mdelay(5000);
++}
++
++static int hibernation_test(int level)
++{
++	if (pm_test_level == level) {
++		hibernation_debug_sleep();
++		return 1;
++	}
++	return 0;
++}
++#else /* !CONFIG_PM_DEBUG */
++static int hibernation_test(int level) { return 0; }
++#endif /* !CONFIG_PM_DEBUG */
++
++/**
++ * platform_begin - Call platform to start hibernation.
++ * @platform_mode: Whether or not to use the platform driver.
++ */
++static int platform_begin(int platform_mode)
++{
++	return (platform_mode && hibernation_ops) ?
++		hibernation_ops->begin() : 0;
++}
++
++/**
++ * platform_end - Call platform to finish transition to the working state.
++ * @platform_mode: Whether or not to use the platform driver.
++ */
++static void platform_end(int platform_mode)
++{
++	if (platform_mode && hibernation_ops)
++		hibernation_ops->end();
++}
++
++/**
++ * platform_pre_snapshot - Call platform to prepare the machine for hibernation.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Use the platform driver to prepare the system for creating a hibernate image,
++ * if so configured, and return an error code if that fails.
++ */
++
++static int platform_pre_snapshot(int platform_mode)
++{
++	return (platform_mode && hibernation_ops) ?
++		hibernation_ops->pre_snapshot() : 0;
++}
++
++/**
++ * platform_leave - Call platform to prepare a transition to the working state.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Use the platform driver prepare to prepare the machine for switching to the
++ * normal mode of operation.
++ *
++ * This routine is called on one CPU with interrupts disabled.
++ */
++static void platform_leave(int platform_mode)
++{
++	if (platform_mode && hibernation_ops)
++		hibernation_ops->leave();
++}
++
++/**
++ * platform_finish - Call platform to switch the system to the working state.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Use the platform driver to switch the machine to the normal mode of
++ * operation.
++ *
++ * This routine must be called after platform_prepare().
++ */
++static void platform_finish(int platform_mode)
++{
++	if (platform_mode && hibernation_ops)
++		hibernation_ops->finish();
++}
++
++/**
++ * platform_pre_restore - Prepare for hibernate image restoration.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Use the platform driver to prepare the system for resume from a hibernation
++ * image.
++ *
++ * If the restore fails after this function has been called,
++ * platform_restore_cleanup() must be called.
++ */
++static int platform_pre_restore(int platform_mode)
++{
++	return (platform_mode && hibernation_ops) ?
++		hibernation_ops->pre_restore() : 0;
++}
++
++/**
++ * platform_restore_cleanup - Switch to the working state after failing restore.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Use the platform driver to switch the system to the normal mode of operation
++ * after a failing restore.
++ *
++ * If platform_pre_restore() has been called before the failing restore, this
++ * function must be called too, regardless of the result of
++ * platform_pre_restore().
++ */
++static void platform_restore_cleanup(int platform_mode)
++{
++	if (platform_mode && hibernation_ops)
++		hibernation_ops->restore_cleanup();
++}
++
++/**
++ * platform_recover - Recover from a failure to suspend devices.
++ * @platform_mode: Whether or not to use the platform driver.
++ */
++static void platform_recover(int platform_mode)
++{
++	if (platform_mode && hibernation_ops && hibernation_ops->recover)
++		hibernation_ops->recover();
++}
++
++/**
++ * swsusp_show_speed - Print time elapsed between two events during hibernation.
++ * @start: Starting event.
++ * @stop: Final event.
++ * @nr_pages: Number of memory pages processed between @start and @stop.
++ * @msg: Additional diagnostic message to print.
++ */
++void swsusp_show_speed(ktime_t start, ktime_t stop,
++		      unsigned nr_pages, char *msg)
++{
++	ktime_t diff;
++	u64 elapsed_centisecs64;
++	unsigned int centisecs;
++	unsigned int k;
++	unsigned int kps;
++
++	diff = ktime_sub(stop, start);
++	elapsed_centisecs64 = ktime_divns(diff, 10*NSEC_PER_MSEC);
++	centisecs = elapsed_centisecs64;
++	if (centisecs == 0)
++		centisecs = 1;	/* avoid div-by-zero */
++	k = nr_pages * (PAGE_SIZE / 1024);
++	kps = (k * 100) / centisecs;
++	pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
++		msg, k, centisecs / 100, centisecs % 100, kps / 1000,
++		(kps % 1000) / 10);
++}
++
++__weak int arch_resume_nosmt(void)
++{
++	return 0;
++}
++
++/**
++ * create_image - Create a hibernation image.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Execute device drivers' "late" and "noirq" freeze callbacks, create a
++ * hibernation image and run the drivers' "noirq" and "early" thaw callbacks.
++ *
++ * Control reappears in this routine after the subsequent restore.
++ */
++static int create_image(int platform_mode)
++{
++	int error;
++
++	error = dpm_suspend_end(PMSG_FREEZE);
++	if (error) {
++		pr_err("Some devices failed to power down, aborting hibernation\n");
++		return error;
++	}
++
++	error = platform_pre_snapshot(platform_mode);
++	if (error || hibernation_test(TEST_PLATFORM))
++		goto Platform_finish;
++
++	error = disable_nonboot_cpus();
++	if (error || hibernation_test(TEST_CPUS))
++		goto Enable_cpus;
++
++	local_irq_disable();
++
++	system_state = SYSTEM_SUSPEND;
++
++	error = syscore_suspend();
++	if (error) {
++		pr_err("Some system devices failed to power down, aborting hibernation\n");
++		goto Enable_irqs;
++	}
++
++	if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
++		goto Power_up;
++
++	in_suspend = 1;
++	save_processor_state();
++	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
++	error = swsusp_arch_suspend();
++	/* Restore control flow magically appears here */
++	restore_processor_state();
++	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
++	if (error)
++		pr_err("Error %d creating hibernation image\n", error);
++
++	if (!in_suspend) {
++		events_check_enabled = false;
++		clear_free_pages();
++	}
++
++	platform_leave(platform_mode);
++
++ Power_up:
++	syscore_resume();
++
++ Enable_irqs:
++	system_state = SYSTEM_RUNNING;
++	local_irq_enable();
++
++ Enable_cpus:
++	enable_nonboot_cpus();
++
++	/* Allow architectures to do nosmt-specific post-resume dances */
++	if (!in_suspend)
++		error = arch_resume_nosmt();
++
++ Platform_finish:
++	platform_finish(platform_mode);
++
++	dpm_resume_start(in_suspend ?
++		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
++
++	return error;
++}
++
++/**
++ * hibernation_snapshot - Quiesce devices and create a hibernation image.
++ * @platform_mode: If set, use platform driver to prepare for the transition.
++ *
++ * This routine must be called with system_transition_mutex held.
++ */
++int hibernation_snapshot(int platform_mode)
++{
++	pm_message_t msg;
++	int error;
++
++	pm_suspend_clear_flags();
++	error = platform_begin(platform_mode);
++	if (error)
++		goto Close;
++
++	/* Preallocate image memory before shutting down devices. */
++	error = hibernate_preallocate_memory();
++	if (error)
++		goto Close;
++
++	error = freeze_kernel_threads();
++	if (error)
++		goto Cleanup;
++
++	if (hibernation_test(TEST_FREEZER)) {
++
++		/*
++		 * Indicate to the caller that we are returning due to a
++		 * successful freezer test.
++		 */
++		freezer_test_done = true;
++		goto Thaw;
++	}
++
++	error = dpm_prepare(PMSG_FREEZE);
++	if (error) {
++		dpm_complete(PMSG_RECOVER);
++		goto Thaw;
++	}
++
++	suspend_console();
++	pm_restrict_gfp_mask();
++
++	error = dpm_suspend(PMSG_FREEZE);
++
++	if (error || hibernation_test(TEST_DEVICES))
++		platform_recover(platform_mode);
++	else
++		error = create_image(platform_mode);
++
++	/*
++	 * In the case that we call create_image() above, the control
++	 * returns here (1) after the image has been created or the
++	 * image creation has failed and (2) after a successful restore.
++	 */
++
++	/* We may need to release the preallocated image pages here. */
++	if (error || !in_suspend)
++		swsusp_free();
++
++	msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE;
++	dpm_resume(msg);
++
++	if (error || !in_suspend)
++		pm_restore_gfp_mask();
++
++	resume_console();
++	dpm_complete(msg);
++
++ Close:
++	platform_end(platform_mode);
++	return error;
++
++ Thaw:
++	thaw_kernel_threads();
++ Cleanup:
++	swsusp_free();
++	goto Close;
++}
++
++int __weak hibernate_resume_nonboot_cpu_disable(void)
++{
++	return disable_nonboot_cpus();
++}
++
++/**
++ * resume_target_kernel - Restore system state from a hibernation image.
++ * @platform_mode: Whether or not to use the platform driver.
++ *
++ * Execute device drivers' "noirq" and "late" freeze callbacks, restore the
++ * contents of highmem that have not been restored yet from the image and run
++ * the low-level code that will restore the remaining contents of memory and
++ * switch to the just restored target kernel.
++ */
++static int resume_target_kernel(bool platform_mode)
++{
++	int error;
++
++	error = dpm_suspend_end(PMSG_QUIESCE);
++	if (error) {
++		pr_err("Some devices failed to power down, aborting resume\n");
++		return error;
++	}
++
++	error = platform_pre_restore(platform_mode);
++	if (error)
++		goto Cleanup;
++
++	error = hibernate_resume_nonboot_cpu_disable();
++	if (error)
++		goto Enable_cpus;
++
++	local_irq_disable();
++	system_state = SYSTEM_SUSPEND;
++
++	error = syscore_suspend();
++	if (error)
++		goto Enable_irqs;
++
++	save_processor_state();
++	error = restore_highmem();
++	if (!error) {
++		error = swsusp_arch_resume();
++		/*
++		 * The code below is only ever reached in case of a failure.
++		 * Otherwise, execution continues at the place where
++		 * swsusp_arch_suspend() was called.
++		 */
++		BUG_ON(!error);
++		/*
++		 * This call to restore_highmem() reverts the changes made by
++		 * the previous one.
++		 */
++		restore_highmem();
++	}
++	/*
++	 * The only reason why swsusp_arch_resume() can fail is memory being
++	 * very tight, so we have to free it as soon as we can to avoid
++	 * subsequent failures.
++	 */
++	swsusp_free();
++	restore_processor_state();
++	touch_softlockup_watchdog();
++
++	syscore_resume();
++
++ Enable_irqs:
++	system_state = SYSTEM_RUNNING;
++	local_irq_enable();
++
++ Enable_cpus:
++	enable_nonboot_cpus();
++
++ Cleanup:
++	platform_restore_cleanup(platform_mode);
++
++	dpm_resume_start(PMSG_RECOVER);
++
++	return error;
++}
++
++/**
++ * hibernation_restore - Quiesce devices and restore from a hibernation image.
++ * @platform_mode: If set, use platform driver to prepare for the transition.
++ *
++ * This routine must be called with system_transition_mutex held.  If it is
++ * successful, control reappears in the restored target kernel in
++ * hibernation_snapshot().
++ */
++int hibernation_restore(int platform_mode)
++{
++	int error;
++
++	pm_prepare_console();
++	suspend_console();
++	pm_restrict_gfp_mask();
++	error = dpm_suspend_start(PMSG_QUIESCE);
++	if (!error) {
++		error = resume_target_kernel(platform_mode);
++		/*
++		 * The above should either succeed and jump to the new kernel,
++		 * or return with an error. Otherwise things are just
++		 * undefined, so let's be paranoid.
++		 */
++		BUG_ON(!error);
++	}
++	dpm_resume_end(PMSG_RECOVER);
++	pm_restore_gfp_mask();
++	resume_console();
++	pm_restore_console();
++	return error;
++}
++
++/**
++ * hibernation_platform_enter - Power off the system using the platform driver.
++ */
++int hibernation_platform_enter(void)
++{
++	int error;
++
++	if (!hibernation_ops)
++		return -ENOSYS;
++
++	/*
++	 * We have cancelled the power transition by running
++	 * hibernation_ops->finish() before saving the image, so we should let
++	 * the firmware know that we're going to enter the sleep state after all
++	 */
++	error = hibernation_ops->begin();
++	if (error)
++		goto Close;
++
++	entering_platform_hibernation = true;
++	suspend_console();
++	error = dpm_suspend_start(PMSG_HIBERNATE);
++	if (error) {
++		if (hibernation_ops->recover)
++			hibernation_ops->recover();
++		goto Resume_devices;
++	}
++
++	error = dpm_suspend_end(PMSG_HIBERNATE);
++	if (error)
++		goto Resume_devices;
++
++	error = hibernation_ops->prepare();
++	if (error)
++		goto Platform_finish;
++
++	error = disable_nonboot_cpus();
++	if (error)
++		goto Enable_cpus;
++
++	local_irq_disable();
++	system_state = SYSTEM_SUSPEND;
++	syscore_suspend();
++	if (pm_wakeup_pending()) {
++		error = -EAGAIN;
++		goto Power_up;
++	}
++
++	hibernation_ops->enter();
++	/* We should never get here */
++	while (1);
++
++ Power_up:
++	syscore_resume();
++	system_state = SYSTEM_RUNNING;
++	local_irq_enable();
++
++ Enable_cpus:
++	enable_nonboot_cpus();
++
++ Platform_finish:
++	hibernation_ops->finish();
++
++	dpm_resume_start(PMSG_RESTORE);
++
++ Resume_devices:
++	entering_platform_hibernation = false;
++	dpm_resume_end(PMSG_RESTORE);
++	resume_console();
++
++ Close:
++	hibernation_ops->end();
++
++	return error;
++}
++
++/**
++ * power_down - Shut the machine down for hibernation.
++ *
++ * Use the platform driver, if configured, to put the system into the sleep
++ * state corresponding to hibernation, or try to power it off or reboot,
++ * depending on the value of hibernation_mode.
++ */
++static void power_down(void)
++{
++#ifdef CONFIG_SUSPEND
++	int error;
++
++	if (hibernation_mode == HIBERNATION_SUSPEND) {
++		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
++		if (error) {
++			hibernation_mode = hibernation_ops ?
++						HIBERNATION_PLATFORM :
++						HIBERNATION_SHUTDOWN;
++		} else {
++			/* Restore swap signature. */
++			error = swsusp_unmark();
++			if (error)
++				pr_err("Swap will be unusable! Try swapon -a.\n");
++
++			return;
++		}
++	}
++#endif
++
++	switch (hibernation_mode) {
++	case HIBERNATION_REBOOT:
++		kernel_restart(NULL);
++		break;
++	case HIBERNATION_PLATFORM:
++		hibernation_platform_enter();
++		/* Fall through */
++	case HIBERNATION_SHUTDOWN:
++		if (pm_power_off)
++			kernel_power_off();
++		break;
++	}
++	kernel_halt();
++	/*
++	 * Valid image is on the disk, if we continue we risk serious data
++	 * corruption after resume.
++	 */
++	pr_crit("Power down manually\n");
++	while (1)
++		cpu_relax();
++}
++
++static int load_image_and_restore(void)
++{
++	int error;
++	unsigned int flags;
++
++	pm_pr_dbg("Loading hibernation image.\n");
++
++	lock_device_hotplug();
++	error = create_basic_memory_bitmaps();
++	if (error)
++		goto Unlock;
++
++	error = swsusp_read(&flags);
++	swsusp_close(FMODE_READ);
++	if (!error)
++		hibernation_restore(flags & SF_PLATFORM_MODE);
++
++	pr_err("Failed to load hibernation image, recovering.\n");
++	swsusp_free();
++	free_basic_memory_bitmaps();
++ Unlock:
++	unlock_device_hotplug();
++
++	return error;
++}
++
++/**
++ * hibernate - Carry out system hibernation, including saving the image.
++ */
++int hibernate(void)
++{
++	int error, nr_calls = 0;
++	bool snapshot_test = false;
++
++	if (!hibernation_available()) {
++		pm_pr_dbg("Hibernation not available.\n");
++		return -EPERM;
++	}
++
++	lock_system_sleep();
++	/* The snapshot device should not be opened while we're running */
++	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
++		error = -EBUSY;
++		goto Unlock;
++	}
++
++	pr_info("hibernation entry\n");
++	carry_out_hibernation = true;
++	pm_prepare_console();
++	error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
++	if (error) {
++		nr_calls--;
++		goto Exit;
++	}
++
++	pr_info("Syncing filesystems ... \n");
++	ksys_sync();
++	pr_info("done.\n");
++
++	error = freeze_processes();
++	if (error)
++		goto Exit;
++
++	lock_device_hotplug();
++	/* Allocate memory management structures */
++	error = create_basic_memory_bitmaps();
++	if (error)
++		goto Thaw;
++
++	error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
++	if (error || freezer_test_done)
++		goto Free_bitmaps;
++
++	if (in_suspend) {
++		unsigned int flags = 0;
++
++		if (hibernation_mode == HIBERNATION_PLATFORM)
++			flags |= SF_PLATFORM_MODE;
++		if (nocompress)
++			flags |= SF_NOCOMPRESS_MODE;
++		else
++		        flags |= SF_CRC32_MODE;
++
++		pm_pr_dbg("Writing image.\n");
++		error = swsusp_write(flags);
++		swsusp_free();
++		if (!error) {
++			if (hibernation_mode == HIBERNATION_TEST_RESUME)
++				snapshot_test = true;
++			else
++				power_down();
++		}
++		in_suspend = 0;
++		pm_restore_gfp_mask();
++	} else {
++		pm_pr_dbg("Image restored successfully.\n");
++	}
++
++ Free_bitmaps:
++	free_basic_memory_bitmaps();
++ Thaw:
++	unlock_device_hotplug();
++	if (snapshot_test) {
++		pm_pr_dbg("Checking hibernation image\n");
++		error = swsusp_check();
++		if (!error)
++			error = load_image_and_restore();
++	}
++	thaw_processes();
++
++	/* Don't bother checking whether freezer_test_done is true */
++	freezer_test_done = false;
++ Exit:
++	__pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
++	pm_restore_console();
++	atomic_inc(&snapshot_device_available);
++ Unlock:
++	unlock_system_sleep();
++	carry_out_hibernation = false;
++	pr_info("hibernation exit\n");
++
++	return error;
++}
++
++
++/**
++ * software_resume - Resume from a saved hibernation image.
++ *
++ * This routine is called as a late initcall, when all devices have been
++ * discovered and initialized already.
++ *
++ * The image reading code is called to see if there is a hibernation image
++ * available for reading.  If that is the case, devices are quiesced and the
++ * contents of memory is restored from the saved image.
++ *
++ * If this is successful, control reappears in the restored target kernel in
++ * hibernation_snapshot() which returns to hibernate().  Otherwise, the routine
++ * attempts to recover gracefully and make the kernel return to the normal mode
++ * of operation.
++ */
++static int software_resume(void)
++{
++	int error, nr_calls = 0;
++
++	/*
++	 * If the user said "noresume".. bail out early.
++	 */
++	if (noresume || !hibernation_available())
++		return 0;
++
++	/*
++	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
++	 * is configured into the kernel. Since the regular hibernate
++	 * trigger path is via sysfs which takes a buffer mutex before
++	 * calling hibernate functions (which take system_transition_mutex)
++	 * this can cause lockdep to complain about a possible ABBA deadlock
++	 * which cannot happen since we're in the boot code here and
++	 * sysfs can't be invoked yet. Therefore, we use a subclass
++	 * here to avoid lockdep complaining.
++	 */
++	mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
++
++	if (swsusp_resume_device)
++		goto Check_image;
++
++	if (!strlen(resume_file)) {
++		error = -ENOENT;
++		goto Unlock;
++	}
++
++	pm_pr_dbg("Checking hibernation image partition %s\n", resume_file);
++
++	if (resume_delay) {
++		pr_info("Waiting %dsec before reading resume device ...\n",
++			resume_delay);
++		ssleep(resume_delay);
++	}
++
++	/* Check if the device is there */
++	swsusp_resume_device = name_to_dev_t(resume_file);
++
++	/*
++	 * name_to_dev_t is ineffective to verify parition if resume_file is in
++	 * integer format. (e.g. major:minor)
++	 */
++	if (isdigit(resume_file[0]) && resume_wait) {
++		int partno;
++		while (!get_gendisk(swsusp_resume_device, &partno))
++			msleep(10);
++	}
++
++	if (!swsusp_resume_device) {
++		/*
++		 * Some device discovery might still be in progress; we need
++		 * to wait for this to finish.
++		 */
++		wait_for_device_probe();
++
++		if (resume_wait) {
++			while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0)
++				msleep(10);
++			async_synchronize_full();
++		}
++
++		swsusp_resume_device = name_to_dev_t(resume_file);
++		if (!swsusp_resume_device) {
++			error = -ENODEV;
++			goto Unlock;
++		}
++	}
++
++ Check_image:
++	pm_pr_dbg("Hibernation image partition %d:%d present\n",
++		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
++
++	pm_pr_dbg("Looking for hibernation image.\n");
++	error = swsusp_check();
++	if (error)
++		goto Unlock;
++
++	/* The snapshot device should not be opened while we're running */
++	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
++		error = -EBUSY;
++		swsusp_close(FMODE_READ);
++		goto Unlock;
++	}
++
++	pr_info("resume from hibernation\n");
++	pm_prepare_console();
++	error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
++	if (error) {
++		nr_calls--;
++		goto Close_Finish;
++	}
++
++	pm_pr_dbg("Preparing processes for restore.\n");
++	error = freeze_processes();
++	if (error)
++		goto Close_Finish;
++	error = load_image_and_restore();
++	thaw_processes();
++ Finish:
++	__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
++	pm_restore_console();
++	pr_info("resume from hibernation failed (%d)\n", error);
++	atomic_inc(&snapshot_device_available);
++	/* For success case, the suspend path will release the lock */
++ Unlock:
++	mutex_unlock(&system_transition_mutex);
++	pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
++	return error;
++ Close_Finish:
++	swsusp_close(FMODE_READ);
++	goto Finish;
++}
++
++late_initcall_sync(software_resume);
++
++
++static const char * const hibernation_modes[] = {
++	[HIBERNATION_PLATFORM]	= "platform",
++	[HIBERNATION_SHUTDOWN]	= "shutdown",
++	[HIBERNATION_REBOOT]	= "reboot",
++#ifdef CONFIG_SUSPEND
++	[HIBERNATION_SUSPEND]	= "suspend",
++#endif
++	[HIBERNATION_TEST_RESUME]	= "test_resume",
++};
++
++/*
++ * /sys/power/disk - Control hibernation mode.
++ *
++ * Hibernation can be handled in several ways.  There are a few different ways
++ * to put the system into the sleep state: using the platform driver (e.g. ACPI
++ * or other hibernation_ops), powering it off or rebooting it (for testing
++ * mostly).
++ *
++ * The sysfs file /sys/power/disk provides an interface for selecting the
++ * hibernation mode to use.  Reading from this file causes the available modes
++ * to be printed.  There are 3 modes that can be supported:
++ *
++ *	'platform'
++ *	'shutdown'
++ *	'reboot'
++ *
++ * If a platform hibernation driver is in use, 'platform' will be supported
++ * and will be used by default.  Otherwise, 'shutdown' will be used by default.
++ * The selected option (i.e. the one corresponding to the current value of
++ * hibernation_mode) is enclosed by a square bracket.
++ *
++ * To select a given hibernation mode it is necessary to write the mode's
++ * string representation (as returned by reading from /sys/power/disk) back
++ * into /sys/power/disk.
++ */
++
++static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
++			 char *buf)
++{
++	int i;
++	char *start = buf;
++
++	if (!hibernation_available())
++		return sprintf(buf, "[disabled]\n");
++
++	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
++		if (!hibernation_modes[i])
++			continue;
++		switch (i) {
++		case HIBERNATION_SHUTDOWN:
++		case HIBERNATION_REBOOT:
++#ifdef CONFIG_SUSPEND
++		case HIBERNATION_SUSPEND:
++#endif
++		case HIBERNATION_TEST_RESUME:
++			break;
++		case HIBERNATION_PLATFORM:
++			if (hibernation_ops)
++				break;
++			/* not a valid mode, continue with loop */
++			continue;
++		}
++		if (i == hibernation_mode)
++			buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
++		else
++			buf += sprintf(buf, "%s ", hibernation_modes[i]);
++	}
++	buf += sprintf(buf, "\n");
++	return buf-start;
++}
++
++static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
++			  const char *buf, size_t n)
++{
++	int error = 0;
++	int i;
++	int len;
++	char *p;
++	int mode = HIBERNATION_INVALID;
++
++	if (!hibernation_available())
++		return -EPERM;
++
++	p = memchr(buf, '\n', n);
++	len = p ? p - buf : n;
++
++	lock_system_sleep();
++	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
++		if (len == strlen(hibernation_modes[i])
++		    && !strncmp(buf, hibernation_modes[i], len)) {
++			mode = i;
++			break;
++		}
++	}
++	if (mode != HIBERNATION_INVALID) {
++		switch (mode) {
++		case HIBERNATION_SHUTDOWN:
++		case HIBERNATION_REBOOT:
++#ifdef CONFIG_SUSPEND
++		case HIBERNATION_SUSPEND:
++#endif
++		case HIBERNATION_TEST_RESUME:
++			hibernation_mode = mode;
++			break;
++		case HIBERNATION_PLATFORM:
++			if (hibernation_ops)
++				hibernation_mode = mode;
++			else
++				error = -EINVAL;
++		}
++	} else
++		error = -EINVAL;
++
++	if (!error)
++		pm_pr_dbg("Hibernation mode set to '%s'\n",
++			       hibernation_modes[mode]);
++	unlock_system_sleep();
++	return error ? error : n;
++}
++
++power_attr(disk);
++
++static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
++			   char *buf)
++{
++	return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
++		       MINOR(swsusp_resume_device));
++}
++
++static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
++			    const char *buf, size_t n)
++{
++	dev_t res;
++	int len = n;
++	char *name;
++
++	if (len && buf[len-1] == '\n')
++		len--;
++	name = kstrndup(buf, len, GFP_KERNEL);
++	if (!name)
++		return -ENOMEM;
++
++	res = name_to_dev_t(name);
++	kfree(name);
++	if (!res)
++		return -EINVAL;
++
++	lock_system_sleep();
++	swsusp_resume_device = res;
++	unlock_system_sleep();
++	pm_pr_dbg("Configured resume from disk to %u\n", swsusp_resume_device);
++	noresume = 0;
++	software_resume();
++	return n;
++}
++
++power_attr(resume);
++
++static ssize_t resume_offset_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%llu\n", (unsigned long long)swsusp_resume_block);
++}
++
++static ssize_t resume_offset_store(struct kobject *kobj,
++				   struct kobj_attribute *attr, const char *buf,
++				   size_t n)
++{
++	unsigned long long offset;
++	int rc;
++
++	rc = kstrtoull(buf, 0, &offset);
++	if (rc)
++		return rc;
++	swsusp_resume_block = offset;
++
++	return n;
++}
++
++power_attr(resume_offset);
++
++static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
++			       char *buf)
++{
++	return sprintf(buf, "%lu\n", image_size);
++}
++
++static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
++				const char *buf, size_t n)
++{
++	unsigned long size;
++
++	if (sscanf(buf, "%lu", &size) == 1) {
++		image_size = size;
++		return n;
++	}
++
++	return -EINVAL;
++}
++
++power_attr(image_size);
++
++static ssize_t reserved_size_show(struct kobject *kobj,
++				  struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%lu\n", reserved_size);
++}
++
++static ssize_t reserved_size_store(struct kobject *kobj,
++				   struct kobj_attribute *attr,
++				   const char *buf, size_t n)
++{
++	unsigned long size;
++
++	if (sscanf(buf, "%lu", &size) == 1) {
++		reserved_size = size;
++		return n;
++	}
++
++	return -EINVAL;
++}
++
++power_attr(reserved_size);
++
++static struct attribute * g[] = {
++	&disk_attr.attr,
++	&resume_offset_attr.attr,
++	&resume_attr.attr,
++	&image_size_attr.attr,
++	&reserved_size_attr.attr,
++	NULL,
++};
++
++
++static const struct attribute_group attr_group = {
++	.attrs = g,
++};
++
++
++static int __init pm_disk_init(void)
++{
++	return sysfs_create_group(power_kobj, &attr_group);
++}
++
++core_initcall(pm_disk_init);
++
++
++static int __init resume_setup(char *str)
++{
++	if (noresume)
++		return 1;
++
++	strncpy( resume_file, str, 255 );
++	return 1;
++}
++
++static int __init resume_offset_setup(char *str)
++{
++	unsigned long long offset;
++
++	if (noresume)
++		return 1;
++
++	if (sscanf(str, "%llu", &offset) == 1)
++		swsusp_resume_block = offset;
++
++	return 1;
++}
++
++static int __init hibernate_setup(char *str)
++{
++	if (!strncmp(str, "noresume", 8)) {
++		noresume = 1;
++	} else if (!strncmp(str, "nocompress", 10)) {
++		nocompress = 1;
++	} else if (!strncmp(str, "no", 2)) {
++		noresume = 1;
++		nohibernate = 1;
++	} else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)
++		   && !strncmp(str, "protect_image", 13)) {
++		enable_restore_image_protection();
++	}
++	return 1;
++}
++
++static int __init noresume_setup(char *str)
++{
++	noresume = 1;
++	return 1;
++}
++
++static int __init resumewait_setup(char *str)
++{
++	resume_wait = 1;
++	return 1;
++}
++
++static int __init resumedelay_setup(char *str)
++{
++	int rc = kstrtouint(str, 0, &resume_delay);
++
++	if (rc)
++		return rc;
++	return 1;
++}
++
++static int __init nohibernate_setup(char *str)
++{
++	noresume = 1;
++	nohibernate = 1;
++	return 1;
++}
++
++__setup("noresume", noresume_setup);
++__setup("resume_offset=", resume_offset_setup);
++__setup("resume=", resume_setup);
++__setup("hibernate=", hibernate_setup);
++__setup("resumewait", resumewait_setup);
++__setup("resumedelay=", resumedelay_setup);
++__setup("nohibernate", nohibernate_setup);
+diff -uprN kernel/kernel/printk/printk.c kernel_new/kernel/printk/printk.c
+--- kernel/kernel/printk/printk.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/printk/printk.c	2021-04-01 18:28:07.811863114 +0800
+@@ -38,6 +38,7 @@
+ #include <linux/kmsg_dump.h>
+ #include <linux/syslog.h>
+ #include <linux/cpu.h>
++#include <linux/ipipe.h>
+ #include <linux/rculist.h>
+ #include <linux/poll.h>
+ #include <linux/irq_work.h>
+@@ -1995,10 +1996,116 @@ asmlinkage int vprintk_emit(int facility
+ }
+ EXPORT_SYMBOL(vprintk_emit);
+ 
+-asmlinkage int vprintk(const char *fmt, va_list args)
++#ifdef CONFIG_IPIPE
++
++extern int __ipipe_printk_bypass;
++
++static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock);
++
++static int __ipipe_printk_fill;
++
++static char __ipipe_printk_buf[__LOG_BUF_LEN];
++
++int __ipipe_log_printk(const char *fmt, va_list args)
++{
++	int ret = 0, fbytes, oldcount;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&__ipipe_printk_lock, flags);
++
++	oldcount = __ipipe_printk_fill;
++	fbytes = __LOG_BUF_LEN - oldcount;
++	if (fbytes > 1)	{
++		ret = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
++				 fbytes, fmt, args) + 1;
++		__ipipe_printk_fill += ret;
++	}
++
++	raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
++
++	if (oldcount == 0)
++		ipipe_raise_irq(__ipipe_printk_virq);
++
++	return ret;
++}
++
++static void do_deferred_vprintk(const char *fmt, ...)
++{
++	va_list args;
++
++	va_start(args, fmt);
++	vprintk_func(fmt, args);
++	va_end(args);
++}
++
++void __ipipe_flush_printk (unsigned virq, void *cookie)
++{
++	char *p = __ipipe_printk_buf;
++	int len, lmax, out = 0;
++	unsigned long flags;
++
++	goto start;
++	do {
++	raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
++start:
++		lmax = __ipipe_printk_fill;
++		while (out < lmax) {
++			len = strlen(p) + 1;
++			do_deferred_vprintk("%s", p);
++			p += len;
++			out += len;
++		}
++		raw_spin_lock_irqsave(&__ipipe_printk_lock, flags);
++	} while (__ipipe_printk_fill != lmax);
++
++	__ipipe_printk_fill = 0;
++
++	raw_spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
++}
++
++static int do_vprintk(const char *fmt, va_list args)
++{
++	int sprintk = 1, cs = -1;
++	unsigned long flags;
++	int ret;
++
++	flags = hard_local_irq_save();
++
++	if (__ipipe_printk_bypass || oops_in_progress)
++		cs = ipipe_disable_context_check();
++	else if (__ipipe_current_domain == ipipe_root_domain) {
++		if (ipipe_head_domain != ipipe_root_domain &&
++		    (raw_irqs_disabled_flags(flags) ||
++		     test_bit(IPIPE_STALL_FLAG, &__ipipe_head_status)))
++			sprintk = 0;
++	} else
++		sprintk = 0;
++
++	hard_local_irq_restore(flags);
++
++	if (sprintk) {
++		ret = vprintk_func(fmt, args);
++		if (cs != -1)
++			ipipe_restore_context_check(cs);
++	} else
++		ret = __ipipe_log_printk(fmt, args);
++
++	return ret;
++}
++
++#else /* !CONFIG_IPIPE */
++
++static int do_vprintk(const char *fmt, va_list args)
+ {
+ 	return vprintk_func(fmt, args);
+ }
++
++#endif /* !CONFIG_IPIPE */
++
++asmlinkage int vprintk(const char *fmt, va_list args)
++{
++	return do_vprintk(fmt, args);
++}
+ EXPORT_SYMBOL(vprintk);
+ 
+ asmlinkage int printk_emit(int facility, int level,
+@@ -2060,7 +2167,7 @@ asmlinkage __visible int printk(const ch
+ 	int r;
+ 
+ 	va_start(args, fmt);
+-	r = vprintk_func(fmt, args);
++	r = do_vprintk(fmt, args);
+ 	va_end(args);
+ 
+ 	return r;
+@@ -2121,6 +2228,63 @@ asmlinkage __visible void early_printk(c
+ }
+ #endif
+ 
++#ifdef CONFIG_RAW_PRINTK
++static struct console *raw_console;
++static IPIPE_DEFINE_RAW_SPINLOCK(raw_console_lock);
++
++void raw_vprintk(const char *fmt, va_list ap)
++{
++	unsigned long flags;
++	char buf[256];
++	int n;
++
++	if (raw_console == NULL || console_suspended)
++		return;
++
++	n = vscnprintf(buf, sizeof(buf), fmt, ap);
++        touch_nmi_watchdog();
++	raw_spin_lock_irqsave(&raw_console_lock, flags);
++	if (raw_console)
++		raw_console->write_raw(raw_console, buf, n);
++	raw_spin_unlock_irqrestore(&raw_console_lock, flags);
++}
++
++asmlinkage __visible void raw_printk(const char *fmt, ...)
++{
++	va_list ap;
++
++	va_start(ap, fmt);
++	raw_vprintk(fmt, ap);
++	va_end(ap);
++}
++EXPORT_SYMBOL(raw_printk);
++
++static inline void register_raw_console(struct console *newcon)
++{
++	if ((newcon->flags & CON_RAW) != 0 && newcon->write_raw)
++		raw_console = newcon;
++}
++
++static inline void unregister_raw_console(struct console *oldcon)
++{
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&raw_console_lock, flags);
++	if (oldcon == raw_console)
++		raw_console = NULL;
++	raw_spin_unlock_irqrestore(&raw_console_lock, flags);
++}
++
++#else
++
++static inline void register_raw_console(struct console *newcon)
++{ }
++
++static inline void unregister_raw_console(struct console *oldcon)
++{ }
++
++#endif
++
+ static int __add_preferred_console(char *name, int idx, char *options,
+ 				   char *brl_options)
+ {
+@@ -2761,6 +2925,9 @@ void register_console(struct console *ne
+ 		console_drivers->next = newcon;
+ 	}
+ 
++	/* The latest raw console to register is current. */
++	register_raw_console(newcon);
++
+ 	if (newcon->flags & CON_EXTENDED)
+ 		if (!nr_ext_console_drivers++)
+ 			pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
+@@ -2821,6 +2988,8 @@ int unregister_console(struct console *c
+ 		(console->flags & CON_BOOT) ? "boot" : "" ,
+ 		console->name, console->index);
+ 
++	unregister_raw_console(console);
++
+ 	res = _braille_unregister_console(console);
+ 	if (res)
+ 		return res;
+diff -uprN kernel/kernel/printk/printk.c.orig kernel_new/kernel/printk/printk.c.orig
+--- kernel/kernel/printk/printk.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/printk/printk.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,3362 @@
++/*
++ *  linux/kernel/printk.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ *
++ * Modified to make sys_syslog() more flexible: added commands to
++ * return the last 4k of kernel messages, regardless of whether
++ * they've been read or not.  Added option to suppress kernel printk's
++ * to the console.  Added hook for sending the console messages
++ * elsewhere, in preparation for a serial line console (someday).
++ * Ted Ts'o, 2/11/93.
++ * Modified for sysctl support, 1/8/97, Chris Horn.
++ * Fixed SMP synchronization, 08/08/99, Manfred Spraul
++ *     manfred@colorfullife.com
++ * Rewrote bits to get rid of console_lock
++ *	01Mar01 Andrew Morton
++ */
++
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/tty.h>
++#include <linux/tty_driver.h>
++#include <linux/console.h>
++#include <linux/init.h>
++#include <linux/jiffies.h>
++#include <linux/nmi.h>
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/delay.h>
++#include <linux/smp.h>
++#include <linux/security.h>
++#include <linux/bootmem.h>
++#include <linux/memblock.h>
++#include <linux/syscalls.h>
++#include <linux/crash_core.h>
++#include <linux/kdb.h>
++#include <linux/ratelimit.h>
++#include <linux/kmsg_dump.h>
++#include <linux/syslog.h>
++#include <linux/cpu.h>
++#include <linux/rculist.h>
++#include <linux/poll.h>
++#include <linux/irq_work.h>
++#include <linux/ctype.h>
++#include <linux/uio.h>
++#include <linux/sched/clock.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/task_stack.h>
++
++#include <linux/uaccess.h>
++#include <asm/sections.h>
++
++#include <trace/events/initcall.h>
++#define CREATE_TRACE_POINTS
++#include <trace/events/printk.h>
++
++#include "console_cmdline.h"
++#include "braille.h"
++#include "internal.h"
++
++int console_printk[4] = {
++	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
++	MESSAGE_LOGLEVEL_DEFAULT,	/* default_message_loglevel */
++	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
++	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
++};
++
++atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
++EXPORT_SYMBOL(ignore_console_lock_warning);
++
++/*
++ * Low level drivers may need that to know if they can schedule in
++ * their unblank() callback or not. So let's export it.
++ */
++int oops_in_progress;
++EXPORT_SYMBOL(oops_in_progress);
++
++/*
++ * console_sem protects the console_drivers list, and also
++ * provides serialisation for access to the entire console
++ * driver system.
++ */
++static DEFINE_SEMAPHORE(console_sem);
++struct console *console_drivers;
++EXPORT_SYMBOL_GPL(console_drivers);
++
++#ifdef CONFIG_LOCKDEP
++static struct lockdep_map console_lock_dep_map = {
++	.name = "console_lock"
++};
++#endif
++
++enum devkmsg_log_bits {
++	__DEVKMSG_LOG_BIT_ON = 0,
++	__DEVKMSG_LOG_BIT_OFF,
++	__DEVKMSG_LOG_BIT_LOCK,
++};
++
++enum devkmsg_log_masks {
++	DEVKMSG_LOG_MASK_ON             = BIT(__DEVKMSG_LOG_BIT_ON),
++	DEVKMSG_LOG_MASK_OFF            = BIT(__DEVKMSG_LOG_BIT_OFF),
++	DEVKMSG_LOG_MASK_LOCK           = BIT(__DEVKMSG_LOG_BIT_LOCK),
++};
++
++/* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */
++#define DEVKMSG_LOG_MASK_DEFAULT	0
++
++static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
++
++static int __control_devkmsg(char *str)
++{
++	if (!str)
++		return -EINVAL;
++
++	if (!strncmp(str, "on", 2)) {
++		devkmsg_log = DEVKMSG_LOG_MASK_ON;
++		return 2;
++	} else if (!strncmp(str, "off", 3)) {
++		devkmsg_log = DEVKMSG_LOG_MASK_OFF;
++		return 3;
++	} else if (!strncmp(str, "ratelimit", 9)) {
++		devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
++		return 9;
++	}
++	return -EINVAL;
++}
++
++static int __init control_devkmsg(char *str)
++{
++	if (__control_devkmsg(str) < 0)
++		return 1;
++
++	/*
++	 * Set sysctl string accordingly:
++	 */
++	if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
++		strcpy(devkmsg_log_str, "on");
++	else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
++		strcpy(devkmsg_log_str, "off");
++	/* else "ratelimit" which is set by default. */
++
++	/*
++	 * Sysctl cannot change it anymore. The kernel command line setting of
++	 * this parameter is to force the setting to be permanent throughout the
++	 * runtime of the system. This is a precation measure against userspace
++	 * trying to be a smarta** and attempting to change it up on us.
++	 */
++	devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
++
++	return 0;
++}
++__setup("printk.devkmsg=", control_devkmsg);
++
++char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
++
++int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
++			      void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	char old_str[DEVKMSG_STR_MAX_SIZE];
++	unsigned int old;
++	int err;
++
++	if (write) {
++		if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK)
++			return -EINVAL;
++
++		old = devkmsg_log;
++		strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE);
++	}
++
++	err = proc_dostring(table, write, buffer, lenp, ppos);
++	if (err)
++		return err;
++
++	if (write) {
++		err = __control_devkmsg(devkmsg_log_str);
++
++		/*
++		 * Do not accept an unknown string OR a known string with
++		 * trailing crap...
++		 */
++		if (err < 0 || (err + 1 != *lenp)) {
++
++			/* ... and restore old setting. */
++			devkmsg_log = old;
++			strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE);
++
++			return -EINVAL;
++		}
++	}
++
++	return 0;
++}
++
++/*
++ * Number of registered extended console drivers.
++ *
++ * If extended consoles are present, in-kernel cont reassembly is disabled
++ * and each fragment is stored as a separate log entry with proper
++ * continuation flag so that every emitted message has full metadata.  This
++ * doesn't change the result for regular consoles or /proc/kmsg.  For
++ * /dev/kmsg, as long as the reader concatenates messages according to
++ * consecutive continuation flags, the end result should be the same too.
++ */
++static int nr_ext_console_drivers;
++
++/*
++ * Helper macros to handle lockdep when locking/unlocking console_sem. We use
++ * macros instead of functions so that _RET_IP_ contains useful information.
++ */
++#define down_console_sem() do { \
++	down(&console_sem);\
++	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
++} while (0)
++
++static int __down_trylock_console_sem(unsigned long ip)
++{
++	int lock_failed;
++	unsigned long flags;
++
++	/*
++	 * Here and in __up_console_sem() we need to be in safe mode,
++	 * because spindump/WARN/etc from under console ->lock will
++	 * deadlock in printk()->down_trylock_console_sem() otherwise.
++	 */
++	printk_safe_enter_irqsave(flags);
++	lock_failed = down_trylock(&console_sem);
++	printk_safe_exit_irqrestore(flags);
++
++	if (lock_failed)
++		return 1;
++	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
++	return 0;
++}
++#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)
++
++static void __up_console_sem(unsigned long ip)
++{
++	unsigned long flags;
++
++	mutex_release(&console_lock_dep_map, 1, ip);
++
++	printk_safe_enter_irqsave(flags);
++	up(&console_sem);
++	printk_safe_exit_irqrestore(flags);
++}
++#define up_console_sem() __up_console_sem(_RET_IP_)
++
++/*
++ * This is used for debugging the mess that is the VT code by
++ * keeping track if we have the console semaphore held. It's
++ * definitely not the perfect debug tool (we don't know if _WE_
++ * hold it and are racing, but it helps tracking those weird code
++ * paths in the console code where we end up in places I want
++ * locked without the console sempahore held).
++ */
++static int console_locked, console_suspended;
++
++/*
++ * If exclusive_console is non-NULL then only this console is to be printed to.
++ */
++static struct console *exclusive_console;
++
++/*
++ *	Array of consoles built from command line options (console=)
++ */
++
++#define MAX_CMDLINECONSOLES 8
++
++static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
++
++static int preferred_console = -1;
++int console_set_on_cmdline;
++EXPORT_SYMBOL(console_set_on_cmdline);
++
++/* Flag: console code may call schedule() */
++static int console_may_schedule;
++
++enum con_msg_format_flags {
++	MSG_FORMAT_DEFAULT	= 0,
++	MSG_FORMAT_SYSLOG	= (1 << 0),
++};
++
++static int console_msg_format = MSG_FORMAT_DEFAULT;
++
++/*
++ * The printk log buffer consists of a chain of concatenated variable
++ * length records. Every record starts with a record header, containing
++ * the overall length of the record.
++ *
++ * The heads to the first and last entry in the buffer, as well as the
++ * sequence numbers of these entries are maintained when messages are
++ * stored.
++ *
++ * If the heads indicate available messages, the length in the header
++ * tells the start next message. A length == 0 for the next message
++ * indicates a wrap-around to the beginning of the buffer.
++ *
++ * Every record carries the monotonic timestamp in microseconds, as well as
++ * the standard userspace syslog level and syslog facility. The usual
++ * kernel messages use LOG_KERN; userspace-injected messages always carry
++ * a matching syslog facility, by default LOG_USER. The origin of every
++ * message can be reliably determined that way.
++ *
++ * The human readable log message directly follows the message header. The
++ * length of the message text is stored in the header, the stored message
++ * is not terminated.
++ *
++ * Optionally, a message can carry a dictionary of properties (key/value pairs),
++ * to provide userspace with a machine-readable message context.
++ *
++ * Examples for well-defined, commonly used property names are:
++ *   DEVICE=b12:8               device identifier
++ *                                b12:8         block dev_t
++ *                                c127:3        char dev_t
++ *                                n8            netdev ifindex
++ *                                +sound:card0  subsystem:devname
++ *   SUBSYSTEM=pci              driver-core subsystem name
++ *
++ * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value
++ * follows directly after a '=' character. Every property is terminated by
++ * a '\0' character. The last property is not terminated.
++ *
++ * Example of a message structure:
++ *   0000  ff 8f 00 00 00 00 00 00      monotonic time in nsec
++ *   0008  34 00                        record is 52 bytes long
++ *   000a        0b 00                  text is 11 bytes long
++ *   000c              1f 00            dictionary is 23 bytes long
++ *   000e                    03 00      LOG_KERN (facility) LOG_ERR (level)
++ *   0010  69 74 27 73 20 61 20 6c      "it's a l"
++ *         69 6e 65                     "ine"
++ *   001b           44 45 56 49 43      "DEVIC"
++ *         45 3d 62 38 3a 32 00 44      "E=b8:2\0D"
++ *         52 49 56 45 52 3d 62 75      "RIVER=bu"
++ *         67                           "g"
++ *   0032     00 00 00                  padding to next message header
++ *
++ * The 'struct printk_log' buffer header must never be directly exported to
++ * userspace, it is a kernel-private implementation detail that might
++ * need to be changed in the future, when the requirements change.
++ *
++ * /dev/kmsg exports the structured data in the following line format:
++ *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
++ *
++ * Users of the export format should ignore possible additional values
++ * separated by ',', and find the message after the ';' character.
++ *
++ * The optional key/value pairs are attached as continuation lines starting
++ * with a space character and terminated by a newline. All possible
++ * non-prinatable characters are escaped in the "\xff" notation.
++ */
++
++enum log_flags {
++	LOG_NEWLINE	= 2,	/* text ended with a newline */
++	LOG_PREFIX	= 4,	/* text started with a prefix */
++	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
++};
++
++struct printk_log {
++	u64 ts_nsec;		/* timestamp in nanoseconds */
++	u16 len;		/* length of entire record */
++	u16 text_len;		/* length of text buffer */
++	u16 dict_len;		/* length of dictionary buffer */
++	u8 facility;		/* syslog facility */
++	u8 flags:5;		/* internal record flags */
++	u8 level:3;		/* syslog level */
++}
++#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
++__packed __aligned(4)
++#endif
++;
++
++/*
++ * The logbuf_lock protects kmsg buffer, indices, counters.  This can be taken
++ * within the scheduler's rq lock. It must be released before calling
++ * console_unlock() or anything else that might wake up a process.
++ */
++DEFINE_RAW_SPINLOCK(logbuf_lock);
++
++/*
++ * Helper macros to lock/unlock logbuf_lock and switch between
++ * printk-safe/unsafe modes.
++ */
++#define logbuf_lock_irq()				\
++	do {						\
++		printk_safe_enter_irq();		\
++		raw_spin_lock(&logbuf_lock);		\
++	} while (0)
++
++#define logbuf_unlock_irq()				\
++	do {						\
++		raw_spin_unlock(&logbuf_lock);		\
++		printk_safe_exit_irq();			\
++	} while (0)
++
++#define logbuf_lock_irqsave(flags)			\
++	do {						\
++		printk_safe_enter_irqsave(flags);	\
++		raw_spin_lock(&logbuf_lock);		\
++	} while (0)
++
++#define logbuf_unlock_irqrestore(flags)		\
++	do {						\
++		raw_spin_unlock(&logbuf_lock);		\
++		printk_safe_exit_irqrestore(flags);	\
++	} while (0)
++
++#ifdef CONFIG_PRINTK
++DECLARE_WAIT_QUEUE_HEAD(log_wait);
++/* the next printk record to read by syslog(READ) or /proc/kmsg */
++static u64 syslog_seq;
++static u32 syslog_idx;
++static size_t syslog_partial;
++static bool syslog_time;
++
++/* index and sequence number of the first record stored in the buffer */
++static u64 log_first_seq;
++static u32 log_first_idx;
++
++/* index and sequence number of the next record to store in the buffer */
++static u64 log_next_seq;
++static u32 log_next_idx;
++
++/* the next printk record to write to the console */
++static u64 console_seq;
++static u32 console_idx;
++static u64 exclusive_console_stop_seq;
++
++/* the next printk record to read after the last 'clear' command */
++static u64 clear_seq;
++static u32 clear_idx;
++
++#define PREFIX_MAX		32
++#define LOG_LINE_MAX		(1024 - PREFIX_MAX)
++
++#define LOG_LEVEL(v)		((v) & 0x07)
++#define LOG_FACILITY(v)		((v) >> 3 & 0xff)
++
++/* record buffer */
++#define LOG_ALIGN __alignof__(struct printk_log)
++#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
++#define LOG_BUF_LEN_MAX (u32)(1 << 31)
++static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
++static char *log_buf = __log_buf;
++static u32 log_buf_len = __LOG_BUF_LEN;
++
++/*
++ * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
++ * per_cpu_areas are initialised. This variable is set to true when
++ * it's safe to access per-CPU data.
++ */
++static bool __printk_percpu_data_ready __read_mostly;
++
++bool printk_percpu_data_ready(void)
++{
++	return __printk_percpu_data_ready;
++}
++
++/* Return log buffer address */
++char *log_buf_addr_get(void)
++{
++	return log_buf;
++}
++EXPORT_SYMBOL_GPL(log_buf_addr_get);
++
++/* Return log buffer size */
++u32 log_buf_len_get(void)
++{
++	return log_buf_len;
++}
++EXPORT_SYMBOL_GPL(log_buf_len_get);
++
++/* human readable text of the record */
++static char *log_text(const struct printk_log *msg)
++{
++	return (char *)msg + sizeof(struct printk_log);
++}
++
++/* optional key/value pair dictionary attached to the record */
++static char *log_dict(const struct printk_log *msg)
++{
++	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
++}
++
++/* get record by index; idx must point to valid msg */
++static struct printk_log *log_from_idx(u32 idx)
++{
++	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
++
++	/*
++	 * A length == 0 record is the end of buffer marker. Wrap around and
++	 * read the message at the start of the buffer.
++	 */
++	if (!msg->len)
++		return (struct printk_log *)log_buf;
++	return msg;
++}
++
++/* get next record; idx must point to valid msg */
++static u32 log_next(u32 idx)
++{
++	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
++
++	/* length == 0 indicates the end of the buffer; wrap */
++	/*
++	 * A length == 0 record is the end of buffer marker. Wrap around and
++	 * read the message at the start of the buffer as *this* one, and
++	 * return the one after that.
++	 */
++	if (!msg->len) {
++		msg = (struct printk_log *)log_buf;
++		return msg->len;
++	}
++	return idx + msg->len;
++}
++
++/*
++ * Check whether there is enough free space for the given message.
++ *
++ * The same values of first_idx and next_idx mean that the buffer
++ * is either empty or full.
++ *
++ * If the buffer is empty, we must respect the position of the indexes.
++ * They cannot be reset to the beginning of the buffer.
++ */
++static int logbuf_has_space(u32 msg_size, bool empty)
++{
++	u32 free;
++
++	if (log_next_idx > log_first_idx || empty)
++		free = max(log_buf_len - log_next_idx, log_first_idx);
++	else
++		free = log_first_idx - log_next_idx;
++
++	/*
++	 * We need space also for an empty header that signalizes wrapping
++	 * of the buffer.
++	 */
++	return free >= msg_size + sizeof(struct printk_log);
++}
++
++static int log_make_free_space(u32 msg_size)
++{
++	while (log_first_seq < log_next_seq &&
++	       !logbuf_has_space(msg_size, false)) {
++		/* drop old messages until we have enough contiguous space */
++		log_first_idx = log_next(log_first_idx);
++		log_first_seq++;
++	}
++
++	if (clear_seq < log_first_seq) {
++		clear_seq = log_first_seq;
++		clear_idx = log_first_idx;
++	}
++
++	/* sequence numbers are equal, so the log buffer is empty */
++	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
++		return 0;
++
++	return -ENOMEM;
++}
++
++/* compute the message size including the padding bytes */
++static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
++{
++	u32 size;
++
++	size = sizeof(struct printk_log) + text_len + dict_len;
++	*pad_len = (-size) & (LOG_ALIGN - 1);
++	size += *pad_len;
++
++	return size;
++}
++
++/*
++ * Define how much of the log buffer we could take at maximum. The value
++ * must be greater than two. Note that only half of the buffer is available
++ * when the index points to the middle.
++ */
++#define MAX_LOG_TAKE_PART 4
++static const char trunc_msg[] = "<truncated>";
++
++static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
++			u16 *dict_len, u32 *pad_len)
++{
++	/*
++	 * The message should not take the whole buffer. Otherwise, it might
++	 * get removed too soon.
++	 */
++	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
++	if (*text_len > max_text_len)
++		*text_len = max_text_len;
++	/* enable the warning message */
++	*trunc_msg_len = strlen(trunc_msg);
++	/* disable the "dict" completely */
++	*dict_len = 0;
++	/* compute the size again, count also the warning message */
++	return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
++}
++
++/* insert record into the buffer, discard old ones, update heads */
++static int log_store(int facility, int level,
++		     enum log_flags flags, u64 ts_nsec,
++		     const char *dict, u16 dict_len,
++		     const char *text, u16 text_len)
++{
++	struct printk_log *msg;
++	u32 size, pad_len;
++	u16 trunc_msg_len = 0;
++
++	/* number of '\0' padding bytes to next message */
++	size = msg_used_size(text_len, dict_len, &pad_len);
++
++	if (log_make_free_space(size)) {
++		/* truncate the message if it is too long for empty buffer */
++		size = truncate_msg(&text_len, &trunc_msg_len,
++				    &dict_len, &pad_len);
++		/* survive when the log buffer is too small for trunc_msg */
++		if (log_make_free_space(size))
++			return 0;
++	}
++
++	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
++		/*
++		 * This message + an additional empty header does not fit
++		 * at the end of the buffer. Add an empty header with len == 0
++		 * to signify a wrap around.
++		 */
++		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
++		log_next_idx = 0;
++	}
++
++	/* fill message */
++	msg = (struct printk_log *)(log_buf + log_next_idx);
++	memcpy(log_text(msg), text, text_len);
++	msg->text_len = text_len;
++	if (trunc_msg_len) {
++		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
++		msg->text_len += trunc_msg_len;
++	}
++	memcpy(log_dict(msg), dict, dict_len);
++	msg->dict_len = dict_len;
++	msg->facility = facility;
++	msg->level = level & 7;
++	msg->flags = flags & 0x1f;
++	if (ts_nsec > 0)
++		msg->ts_nsec = ts_nsec;
++	else
++		msg->ts_nsec = local_clock();
++	memset(log_dict(msg) + dict_len, 0, pad_len);
++	msg->len = size;
++
++	/* insert message */
++	log_next_idx += msg->len;
++	log_next_seq++;
++
++	return msg->text_len;
++}
++
++int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
++
++static int syslog_action_restricted(int type)
++{
++	if (dmesg_restrict)
++		return 1;
++	/*
++	 * Unless restricted, we allow "read all" and "get buffer size"
++	 * for everybody.
++	 */
++	return type != SYSLOG_ACTION_READ_ALL &&
++	       type != SYSLOG_ACTION_SIZE_BUFFER;
++}
++
++static int check_syslog_permissions(int type, int source)
++{
++	/*
++	 * If this is from /proc/kmsg and we've already opened it, then we've
++	 * already done the capabilities checks at open time.
++	 */
++	if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
++		goto ok;
++
++	if (syslog_action_restricted(type)) {
++		if (capable(CAP_SYSLOG))
++			goto ok;
++		/*
++		 * For historical reasons, accept CAP_SYS_ADMIN too, with
++		 * a warning.
++		 */
++		if (capable(CAP_SYS_ADMIN)) {
++			pr_warn_once("%s (%d): Attempt to access syslog with "
++				     "CAP_SYS_ADMIN but no CAP_SYSLOG "
++				     "(deprecated).\n",
++				 current->comm, task_pid_nr(current));
++			goto ok;
++		}
++		return -EPERM;
++	}
++ok:
++	return security_syslog(type);
++}
++
++static void append_char(char **pp, char *e, char c)
++{
++	if (*pp < e)
++		*(*pp)++ = c;
++}
++
++static ssize_t msg_print_ext_header(char *buf, size_t size,
++				    struct printk_log *msg, u64 seq)
++{
++	u64 ts_usec = msg->ts_nsec;
++
++	do_div(ts_usec, 1000);
++
++	return scnprintf(buf, size, "%u,%llu,%llu,%c;",
++		       (msg->facility << 3) | msg->level, seq, ts_usec,
++		       msg->flags & LOG_CONT ? 'c' : '-');
++}
++
++static ssize_t msg_print_ext_body(char *buf, size_t size,
++				  char *dict, size_t dict_len,
++				  char *text, size_t text_len)
++{
++	char *p = buf, *e = buf + size;
++	size_t i;
++
++	/* escape non-printable characters */
++	for (i = 0; i < text_len; i++) {
++		unsigned char c = text[i];
++
++		if (c < ' ' || c >= 127 || c == '\\')
++			p += scnprintf(p, e - p, "\\x%02x", c);
++		else
++			append_char(&p, e, c);
++	}
++	append_char(&p, e, '\n');
++
++	if (dict_len) {
++		bool line = true;
++
++		for (i = 0; i < dict_len; i++) {
++			unsigned char c = dict[i];
++
++			if (line) {
++				append_char(&p, e, ' ');
++				line = false;
++			}
++
++			if (c == '\0') {
++				append_char(&p, e, '\n');
++				line = true;
++				continue;
++			}
++
++			if (c < ' ' || c >= 127 || c == '\\') {
++				p += scnprintf(p, e - p, "\\x%02x", c);
++				continue;
++			}
++
++			append_char(&p, e, c);
++		}
++		append_char(&p, e, '\n');
++	}
++
++	return p - buf;
++}
++
++/* /dev/kmsg - userspace message inject/listen interface */
++struct devkmsg_user {
++	u64 seq;
++	u32 idx;
++	struct ratelimit_state rs;
++	struct mutex lock;
++	char buf[CONSOLE_EXT_LOG_MAX];
++};
++
++static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
++{
++	char *buf, *line;
++	int level = default_message_loglevel;
++	int facility = 1;	/* LOG_USER */
++	struct file *file = iocb->ki_filp;
++	struct devkmsg_user *user = file->private_data;
++	size_t len = iov_iter_count(from);
++	ssize_t ret = len;
++
++	if (!user || len > LOG_LINE_MAX)
++		return -EINVAL;
++
++	/* Ignore when user logging is disabled. */
++	if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
++		return len;
++
++	/* Ratelimit when not explicitly enabled. */
++	if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) {
++		if (!___ratelimit(&user->rs, current->comm))
++			return ret;
++	}
++
++	buf = kmalloc(len+1, GFP_KERNEL);
++	if (buf == NULL)
++		return -ENOMEM;
++
++	buf[len] = '\0';
++	if (!copy_from_iter_full(buf, len, from)) {
++		kfree(buf);
++		return -EFAULT;
++	}
++
++	/*
++	 * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
++	 * the decimal value represents 32bit, the lower 3 bit are the log
++	 * level, the rest are the log facility.
++	 *
++	 * If no prefix or no userspace facility is specified, we
++	 * enforce LOG_USER, to be able to reliably distinguish
++	 * kernel-generated messages from userspace-injected ones.
++	 */
++	line = buf;
++	if (line[0] == '<') {
++		char *endp = NULL;
++		unsigned int u;
++
++		u = simple_strtoul(line + 1, &endp, 10);
++		if (endp && endp[0] == '>') {
++			level = LOG_LEVEL(u);
++			if (LOG_FACILITY(u) != 0)
++				facility = LOG_FACILITY(u);
++			endp++;
++			len -= endp - line;
++			line = endp;
++		}
++	}
++
++	printk_emit(facility, level, NULL, 0, "%s", line);
++	kfree(buf);
++	return ret;
++}
++
++static ssize_t devkmsg_read(struct file *file, char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	struct devkmsg_user *user = file->private_data;
++	struct printk_log *msg;
++	size_t len;
++	ssize_t ret;
++
++	if (!user)
++		return -EBADF;
++
++	ret = mutex_lock_interruptible(&user->lock);
++	if (ret)
++		return ret;
++
++	logbuf_lock_irq();
++	while (user->seq == log_next_seq) {
++		if (file->f_flags & O_NONBLOCK) {
++			ret = -EAGAIN;
++			logbuf_unlock_irq();
++			goto out;
++		}
++
++		logbuf_unlock_irq();
++		ret = wait_event_interruptible(log_wait,
++					       user->seq != log_next_seq);
++		if (ret)
++			goto out;
++		logbuf_lock_irq();
++	}
++
++	if (user->seq < log_first_seq) {
++		/* our last seen message is gone, return error and reset */
++		user->idx = log_first_idx;
++		user->seq = log_first_seq;
++		ret = -EPIPE;
++		logbuf_unlock_irq();
++		goto out;
++	}
++
++	msg = log_from_idx(user->idx);
++	len = msg_print_ext_header(user->buf, sizeof(user->buf),
++				   msg, user->seq);
++	len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
++				  log_dict(msg), msg->dict_len,
++				  log_text(msg), msg->text_len);
++
++	user->idx = log_next(user->idx);
++	user->seq++;
++	logbuf_unlock_irq();
++
++	if (len > count) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (copy_to_user(buf, user->buf, len)) {
++		ret = -EFAULT;
++		goto out;
++	}
++	ret = len;
++out:
++	mutex_unlock(&user->lock);
++	return ret;
++}
++
++static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
++{
++	struct devkmsg_user *user = file->private_data;
++	loff_t ret = 0;
++
++	if (!user)
++		return -EBADF;
++	if (offset)
++		return -ESPIPE;
++
++	logbuf_lock_irq();
++	switch (whence) {
++	case SEEK_SET:
++		/* the first record */
++		user->idx = log_first_idx;
++		user->seq = log_first_seq;
++		break;
++	case SEEK_DATA:
++		/*
++		 * The first record after the last SYSLOG_ACTION_CLEAR,
++		 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
++		 * changes no global state, and does not clear anything.
++		 */
++		user->idx = clear_idx;
++		user->seq = clear_seq;
++		break;
++	case SEEK_END:
++		/* after the last record */
++		user->idx = log_next_idx;
++		user->seq = log_next_seq;
++		break;
++	default:
++		ret = -EINVAL;
++	}
++	logbuf_unlock_irq();
++	return ret;
++}
++
++static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
++{
++	struct devkmsg_user *user = file->private_data;
++	__poll_t ret = 0;
++
++	if (!user)
++		return EPOLLERR|EPOLLNVAL;
++
++	poll_wait(file, &log_wait, wait);
++
++	logbuf_lock_irq();
++	if (user->seq < log_next_seq) {
++		/* return error when data has vanished underneath us */
++		if (user->seq < log_first_seq)
++			ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
++		else
++			ret = EPOLLIN|EPOLLRDNORM;
++	}
++	logbuf_unlock_irq();
++
++	return ret;
++}
++
++static int devkmsg_open(struct inode *inode, struct file *file)
++{
++	struct devkmsg_user *user;
++	int err;
++
++	if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
++		return -EPERM;
++
++	/* write-only does not need any file context */
++	if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
++		err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
++					       SYSLOG_FROM_READER);
++		if (err)
++			return err;
++	}
++
++	user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
++	if (!user)
++		return -ENOMEM;
++
++	ratelimit_default_init(&user->rs);
++	ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE);
++
++	mutex_init(&user->lock);
++
++	logbuf_lock_irq();
++	user->idx = log_first_idx;
++	user->seq = log_first_seq;
++	logbuf_unlock_irq();
++
++	file->private_data = user;
++	return 0;
++}
++
++static int devkmsg_release(struct inode *inode, struct file *file)
++{
++	struct devkmsg_user *user = file->private_data;
++
++	if (!user)
++		return 0;
++
++	ratelimit_state_exit(&user->rs);
++
++	mutex_destroy(&user->lock);
++	kfree(user);
++	return 0;
++}
++
++const struct file_operations kmsg_fops = {
++	.open = devkmsg_open,
++	.read = devkmsg_read,
++	.write_iter = devkmsg_write,
++	.llseek = devkmsg_llseek,
++	.poll = devkmsg_poll,
++	.release = devkmsg_release,
++};
++
++#ifdef CONFIG_CRASH_CORE
++/*
++ * This appends the listed symbols to /proc/vmcore
++ *
++ * /proc/vmcore is used by various utilities, like crash and makedumpfile to
++ * obtain access to symbols that are otherwise very difficult to locate.  These
++ * symbols are specifically used so that utilities can access and extract the
++ * dmesg log from a vmcore file after a crash.
++ */
++void log_buf_vmcoreinfo_setup(void)
++{
++	VMCOREINFO_SYMBOL(log_buf);
++	VMCOREINFO_SYMBOL(log_buf_len);
++	VMCOREINFO_SYMBOL(log_first_idx);
++	VMCOREINFO_SYMBOL(clear_idx);
++	VMCOREINFO_SYMBOL(log_next_idx);
++	/*
++	 * Export struct printk_log size and field offsets. User space tools can
++	 * parse it and detect any changes to structure down the line.
++	 */
++	VMCOREINFO_STRUCT_SIZE(printk_log);
++	VMCOREINFO_OFFSET(printk_log, ts_nsec);
++	VMCOREINFO_OFFSET(printk_log, len);
++	VMCOREINFO_OFFSET(printk_log, text_len);
++	VMCOREINFO_OFFSET(printk_log, dict_len);
++}
++#endif
++
++/* requested log_buf_len from kernel cmdline */
++static unsigned long __initdata new_log_buf_len;
++
++/* we practice scaling the ring buffer by powers of 2 */
++static void __init log_buf_len_update(u64 size)
++{
++	if (size > (u64)LOG_BUF_LEN_MAX) {
++		size = (u64)LOG_BUF_LEN_MAX;
++		pr_err("log_buf over 2G is not supported.\n");
++	}
++
++	if (size)
++		size = roundup_pow_of_two(size);
++	if (size > log_buf_len)
++		new_log_buf_len = (unsigned long)size;
++}
++
++/* save requested log_buf_len since it's too early to process it */
++static int __init log_buf_len_setup(char *str)
++{
++	u64 size;
++
++	if (!str)
++		return -EINVAL;
++
++	size = memparse(str, &str);
++
++	log_buf_len_update(size);
++
++	return 0;
++}
++early_param("log_buf_len", log_buf_len_setup);
++
++#ifdef CONFIG_SMP
++#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
++
++static void __init log_buf_add_cpu(void)
++{
++	unsigned int cpu_extra;
++
++	/*
++	 * archs should set up cpu_possible_bits properly with
++	 * set_cpu_possible() after setup_arch() but just in
++	 * case lets ensure this is valid.
++	 */
++	if (num_possible_cpus() == 1)
++		return;
++
++	cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
++
++	/* by default this will only continue through for large > 64 CPUs */
++	if (cpu_extra <= __LOG_BUF_LEN / 2)
++		return;
++
++	pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
++		__LOG_CPU_MAX_BUF_LEN);
++	pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
++		cpu_extra);
++	pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
++
++	log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
++}
++#else /* !CONFIG_SMP */
++static inline void log_buf_add_cpu(void) {}
++#endif /* CONFIG_SMP */
++
++static void __init set_percpu_data_ready(void)
++{
++	printk_safe_init();
++	/* Make sure we set this flag only after printk_safe() init is done */
++	barrier();
++	__printk_percpu_data_ready = true;
++}
++
++void __init setup_log_buf(int early)
++{
++	unsigned long flags;
++	char *new_log_buf;
++	unsigned int free;
++
++	/*
++	 * Some archs call setup_log_buf() multiple times - first is very
++	 * early, e.g. from setup_arch(), and second - when percpu_areas
++	 * are initialised.
++	 */
++	if (!early)
++		set_percpu_data_ready();
++
++	if (log_buf != __log_buf)
++		return;
++
++	if (!early && !new_log_buf_len)
++		log_buf_add_cpu();
++
++	if (!new_log_buf_len)
++		return;
++
++	if (early) {
++		new_log_buf =
++			memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
++	} else {
++		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
++							  LOG_ALIGN);
++	}
++
++	if (unlikely(!new_log_buf)) {
++		pr_err("log_buf_len: %lu bytes not available\n",
++			new_log_buf_len);
++		return;
++	}
++
++	logbuf_lock_irqsave(flags);
++	log_buf_len = new_log_buf_len;
++	log_buf = new_log_buf;
++	new_log_buf_len = 0;
++	free = __LOG_BUF_LEN - log_next_idx;
++	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
++	logbuf_unlock_irqrestore(flags);
++
++	pr_info("log_buf_len: %u bytes\n", log_buf_len);
++	pr_info("early log buf free: %u(%u%%)\n",
++		free, (free * 100) / __LOG_BUF_LEN);
++}
++
++static bool __read_mostly ignore_loglevel;
++
++static int __init ignore_loglevel_setup(char *str)
++{
++	ignore_loglevel = true;
++	pr_info("debug: ignoring loglevel setting.\n");
++
++	return 0;
++}
++
++early_param("ignore_loglevel", ignore_loglevel_setup);
++module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(ignore_loglevel,
++		 "ignore loglevel setting (prints all kernel messages to the console)");
++
++static bool suppress_message_printing(int level)
++{
++	return (level >= console_loglevel && !ignore_loglevel);
++}
++
++#ifdef CONFIG_BOOT_PRINTK_DELAY
++
++static int boot_delay; /* msecs delay after each printk during bootup */
++static unsigned long long loops_per_msec;	/* based on boot_delay */
++
++static int __init boot_delay_setup(char *str)
++{
++	unsigned long lpj;
++
++	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
++	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
++
++	get_option(&str, &boot_delay);
++	if (boot_delay > 10 * 1000)
++		boot_delay = 0;
++
++	pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
++		"HZ: %d, loops_per_msec: %llu\n",
++		boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
++	return 0;
++}
++early_param("boot_delay", boot_delay_setup);
++
++static void boot_delay_msec(int level)
++{
++	unsigned long long k;
++	unsigned long timeout;
++
++	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
++		|| suppress_message_printing(level)) {
++		return;
++	}
++
++	k = (unsigned long long)loops_per_msec * boot_delay;
++
++	timeout = jiffies + msecs_to_jiffies(boot_delay);
++	while (k) {
++		k--;
++		cpu_relax();
++		/*
++		 * use (volatile) jiffies to prevent
++		 * compiler reduction; loop termination via jiffies
++		 * is secondary and may or may not happen.
++		 */
++		if (time_after(jiffies, timeout))
++			break;
++		touch_nmi_watchdog();
++	}
++}
++#else
++static inline void boot_delay_msec(int level)
++{
++}
++#endif
++
++static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
++module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
++
++static size_t print_time(u64 ts, char *buf)
++{
++	unsigned long rem_nsec = do_div(ts, 1000000000);
++
++	if (!buf)
++		return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
++
++	return sprintf(buf, "[%5lu.%06lu] ",
++		       (unsigned long)ts, rem_nsec / 1000);
++}
++
++static size_t print_prefix(const struct printk_log *msg, bool syslog,
++			   bool time, char *buf)
++{
++	size_t len = 0;
++	unsigned int prefix = (msg->facility << 3) | msg->level;
++
++	if (syslog) {
++		if (buf) {
++			len += sprintf(buf, "<%u>", prefix);
++		} else {
++			len += 3;
++			if (prefix > 999)
++				len += 3;
++			else if (prefix > 99)
++				len += 2;
++			else if (prefix > 9)
++				len++;
++		}
++	}
++
++	if (time)
++		len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
++	return len;
++}
++
++static size_t msg_print_text(const struct printk_log *msg, bool syslog,
++			     bool time, char *buf, size_t size)
++{
++	const char *text = log_text(msg);
++	size_t text_size = msg->text_len;
++	size_t len = 0;
++
++	do {
++		const char *next = memchr(text, '\n', text_size);
++		size_t text_len;
++
++		if (next) {
++			text_len = next - text;
++			next++;
++			text_size -= next - text;
++		} else {
++			text_len = text_size;
++		}
++
++		if (buf) {
++			if (print_prefix(msg, syslog, time, NULL) +
++			    text_len + 1 >= size - len)
++				break;
++
++			len += print_prefix(msg, syslog, time, buf + len);
++			memcpy(buf + len, text, text_len);
++			len += text_len;
++			buf[len++] = '\n';
++		} else {
++			/* SYSLOG_ACTION_* buffer size only calculation */
++			len += print_prefix(msg, syslog, time, NULL);
++			len += text_len;
++			len++;
++		}
++
++		text = next;
++	} while (text);
++
++	return len;
++}
++
++static int syslog_print(char __user *buf, int size)
++{
++	char *text;
++	struct printk_log *msg;
++	int len = 0;
++
++	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	if (!text)
++		return -ENOMEM;
++
++	while (size > 0) {
++		size_t n;
++		size_t skip;
++
++		logbuf_lock_irq();
++		if (syslog_seq < log_first_seq) {
++			/* messages are gone, move to first one */
++			syslog_seq = log_first_seq;
++			syslog_idx = log_first_idx;
++			syslog_partial = 0;
++		}
++		if (syslog_seq == log_next_seq) {
++			logbuf_unlock_irq();
++			break;
++		}
++
++		/*
++		 * To keep reading/counting partial line consistent,
++		 * use printk_time value as of the beginning of a line.
++		 */
++		if (!syslog_partial)
++			syslog_time = printk_time;
++
++		skip = syslog_partial;
++		msg = log_from_idx(syslog_idx);
++		n = msg_print_text(msg, true, syslog_time, text,
++				   LOG_LINE_MAX + PREFIX_MAX);
++		if (n - syslog_partial <= size) {
++			/* message fits into buffer, move forward */
++			syslog_idx = log_next(syslog_idx);
++			syslog_seq++;
++			n -= syslog_partial;
++			syslog_partial = 0;
++		} else if (!len){
++			/* partial read(), remember position */
++			n = size;
++			syslog_partial += n;
++		} else
++			n = 0;
++		logbuf_unlock_irq();
++
++		if (!n)
++			break;
++
++		if (copy_to_user(buf, text + skip, n)) {
++			if (!len)
++				len = -EFAULT;
++			break;
++		}
++
++		len += n;
++		size -= n;
++		buf += n;
++	}
++
++	kfree(text);
++	return len;
++}
++
++static int syslog_print_all(char __user *buf, int size, bool clear)
++{
++	char *text;
++	int len = 0;
++	u64 next_seq;
++	u64 seq;
++	u32 idx;
++	bool time;
++
++	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	if (!text)
++		return -ENOMEM;
++
++	time = printk_time;
++	logbuf_lock_irq();
++	/*
++	 * Find first record that fits, including all following records,
++	 * into the user-provided buffer for this dump.
++	 */
++	seq = clear_seq;
++	idx = clear_idx;
++	while (seq < log_next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++
++		len += msg_print_text(msg, true, time, NULL, 0);
++		idx = log_next(idx);
++		seq++;
++	}
++
++	/* move first record forward until length fits into the buffer */
++	seq = clear_seq;
++	idx = clear_idx;
++	while (len > size && seq < log_next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++
++		len -= msg_print_text(msg, true, time, NULL, 0);
++		idx = log_next(idx);
++		seq++;
++	}
++
++	/* last message fitting into this dump */
++	next_seq = log_next_seq;
++
++	len = 0;
++	while (len >= 0 && seq < next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++		int textlen = msg_print_text(msg, true, time, text,
++					     LOG_LINE_MAX + PREFIX_MAX);
++
++		idx = log_next(idx);
++		seq++;
++
++		logbuf_unlock_irq();
++		if (copy_to_user(buf + len, text, textlen))
++			len = -EFAULT;
++		else
++			len += textlen;
++		logbuf_lock_irq();
++
++		if (seq < log_first_seq) {
++			/* messages are gone, move to next one */
++			seq = log_first_seq;
++			idx = log_first_idx;
++		}
++	}
++
++	if (clear) {
++		clear_seq = log_next_seq;
++		clear_idx = log_next_idx;
++	}
++	logbuf_unlock_irq();
++
++	kfree(text);
++	return len;
++}
++
++static void syslog_clear(void)
++{
++	logbuf_lock_irq();
++	clear_seq = log_next_seq;
++	clear_idx = log_next_idx;
++	logbuf_unlock_irq();
++}
++
++int do_syslog(int type, char __user *buf, int len, int source)
++{
++	bool clear = false;
++	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
++	int error;
++
++	error = check_syslog_permissions(type, source);
++	if (error)
++		return error;
++
++	switch (type) {
++	case SYSLOG_ACTION_CLOSE:	/* Close log */
++		break;
++	case SYSLOG_ACTION_OPEN:	/* Open log */
++		break;
++	case SYSLOG_ACTION_READ:	/* Read from log */
++		if (!buf || len < 0)
++			return -EINVAL;
++		if (!len)
++			return 0;
++		if (!access_ok(buf, len))
++			return -EFAULT;
++		error = wait_event_interruptible(log_wait,
++						 syslog_seq != log_next_seq);
++		if (error)
++			return error;
++		error = syslog_print(buf, len);
++		break;
++	/* Read/clear last kernel messages */
++	case SYSLOG_ACTION_READ_CLEAR:
++		clear = true;
++		/* FALL THRU */
++	/* Read last kernel messages */
++	case SYSLOG_ACTION_READ_ALL:
++		if (!buf || len < 0)
++			return -EINVAL;
++		if (!len)
++			return 0;
++		if (!access_ok(buf, len))
++			return -EFAULT;
++		error = syslog_print_all(buf, len, clear);
++		break;
++	/* Clear ring buffer */
++	case SYSLOG_ACTION_CLEAR:
++		syslog_clear();
++		break;
++	/* Disable logging to console */
++	case SYSLOG_ACTION_CONSOLE_OFF:
++		if (saved_console_loglevel == LOGLEVEL_DEFAULT)
++			saved_console_loglevel = console_loglevel;
++		console_loglevel = minimum_console_loglevel;
++		break;
++	/* Enable logging to console */
++	case SYSLOG_ACTION_CONSOLE_ON:
++		if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
++			console_loglevel = saved_console_loglevel;
++			saved_console_loglevel = LOGLEVEL_DEFAULT;
++		}
++		break;
++	/* Set level of messages printed to console */
++	case SYSLOG_ACTION_CONSOLE_LEVEL:
++		if (len < 1 || len > 8)
++			return -EINVAL;
++		if (len < minimum_console_loglevel)
++			len = minimum_console_loglevel;
++		console_loglevel = len;
++		/* Implicitly re-enable logging to console */
++		saved_console_loglevel = LOGLEVEL_DEFAULT;
++		break;
++	/* Number of chars in the log buffer */
++	case SYSLOG_ACTION_SIZE_UNREAD:
++		logbuf_lock_irq();
++		if (syslog_seq < log_first_seq) {
++			/* messages are gone, move to first one */
++			syslog_seq = log_first_seq;
++			syslog_idx = log_first_idx;
++			syslog_partial = 0;
++		}
++		if (source == SYSLOG_FROM_PROC) {
++			/*
++			 * Short-cut for poll(/"proc/kmsg") which simply checks
++			 * for pending data, not the size; return the count of
++			 * records, not the length.
++			 */
++			error = log_next_seq - syslog_seq;
++		} else {
++			u64 seq = syslog_seq;
++			u32 idx = syslog_idx;
++			bool time = syslog_partial ? syslog_time : printk_time;
++
++			while (seq < log_next_seq) {
++				struct printk_log *msg = log_from_idx(idx);
++
++				error += msg_print_text(msg, true, time, NULL,
++							0);
++				time = printk_time;
++				idx = log_next(idx);
++				seq++;
++			}
++			error -= syslog_partial;
++		}
++		logbuf_unlock_irq();
++		break;
++	/* Size of the log buffer */
++	case SYSLOG_ACTION_SIZE_BUFFER:
++		error = log_buf_len;
++		break;
++	default:
++		error = -EINVAL;
++		break;
++	}
++
++	return error;
++}
++
++SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
++{
++	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
++}
++
++/*
++ * Special console_lock variants that help to reduce the risk of soft-lockups.
++ * They allow to pass console_lock to another printk() call using a busy wait.
++ */
++
++#ifdef CONFIG_LOCKDEP
++static struct lockdep_map console_owner_dep_map = {
++	.name = "console_owner"
++};
++#endif
++
++static DEFINE_RAW_SPINLOCK(console_owner_lock);
++static struct task_struct *console_owner;
++static bool console_waiter;
++
++void zap_locks(void)
++{
++	if (raw_spin_is_locked(&logbuf_lock)) {
++		debug_locks_off();
++		raw_spin_lock_init(&logbuf_lock);
++	}
++
++	if (raw_spin_is_locked(&console_owner_lock)) {
++		raw_spin_lock_init(&console_owner_lock);
++		console_owner = NULL;
++		console_waiter = false;
++	}
++}
++
++/**
++ * console_lock_spinning_enable - mark beginning of code where another
++ *	thread might safely busy wait
++ *
++ * This basically converts console_lock into a spinlock. This marks
++ * the section where the console_lock owner can not sleep, because
++ * there may be a waiter spinning (like a spinlock). Also it must be
++ * ready to hand over the lock at the end of the section.
++ */
++static void console_lock_spinning_enable(void)
++{
++	raw_spin_lock(&console_owner_lock);
++	console_owner = current;
++	raw_spin_unlock(&console_owner_lock);
++
++	/* The waiter may spin on us after setting console_owner */
++	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
++}
++
++/**
++ * console_lock_spinning_disable_and_check - mark end of code where another
++ *	thread was able to busy wait and check if there is a waiter
++ *
++ * This is called at the end of the section where spinning is allowed.
++ * It has two functions. First, it is a signal that it is no longer
++ * safe to start busy waiting for the lock. Second, it checks if
++ * there is a busy waiter and passes the lock rights to her.
++ *
++ * Important: Callers lose the lock if there was a busy waiter.
++ *	They must not touch items synchronized by console_lock
++ *	in this case.
++ *
++ * Return: 1 if the lock rights were passed, 0 otherwise.
++ */
++static int console_lock_spinning_disable_and_check(void)
++{
++	int waiter;
++
++	raw_spin_lock(&console_owner_lock);
++	waiter = READ_ONCE(console_waiter);
++	console_owner = NULL;
++	raw_spin_unlock(&console_owner_lock);
++
++	if (!waiter) {
++		spin_release(&console_owner_dep_map, 1, _THIS_IP_);
++		return 0;
++	}
++
++	/* The waiter is now free to continue */
++	WRITE_ONCE(console_waiter, false);
++
++	spin_release(&console_owner_dep_map, 1, _THIS_IP_);
++
++	/*
++	 * Hand off console_lock to waiter. The waiter will perform
++	 * the up(). After this, the waiter is the console_lock owner.
++	 */
++	mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
++	return 1;
++}
++
++/**
++ * console_trylock_spinning - try to get console_lock by busy waiting
++ *
++ * This allows to busy wait for the console_lock when the current
++ * owner is running in specially marked sections. It means that
++ * the current owner is running and cannot reschedule until it
++ * is ready to lose the lock.
++ *
++ * Return: 1 if we got the lock, 0 othrewise
++ */
++static int console_trylock_spinning(void)
++{
++	struct task_struct *owner = NULL;
++	bool waiter;
++	bool spin = false;
++	unsigned long flags;
++
++	if (console_trylock())
++		return 1;
++
++	printk_safe_enter_irqsave(flags);
++
++	raw_spin_lock(&console_owner_lock);
++	owner = READ_ONCE(console_owner);
++	waiter = READ_ONCE(console_waiter);
++	if (!waiter && owner && owner != current) {
++		WRITE_ONCE(console_waiter, true);
++		spin = true;
++	}
++	raw_spin_unlock(&console_owner_lock);
++
++	/*
++	 * If there is an active printk() writing to the
++	 * consoles, instead of having it write our data too,
++	 * see if we can offload that load from the active
++	 * printer, and do some printing ourselves.
++	 * Go into a spin only if there isn't already a waiter
++	 * spinning, and there is an active printer, and
++	 * that active printer isn't us (recursive printk?).
++	 */
++	if (!spin) {
++		printk_safe_exit_irqrestore(flags);
++		return 0;
++	}
++
++	/* We spin waiting for the owner to release us */
++	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
++	/* Owner will clear console_waiter on hand off */
++	while (READ_ONCE(console_waiter))
++		cpu_relax();
++	spin_release(&console_owner_dep_map, 1, _THIS_IP_);
++
++	printk_safe_exit_irqrestore(flags);
++	/*
++	 * The owner passed the console lock to us.
++	 * Since we did not spin on console lock, annotate
++	 * this as a trylock. Otherwise lockdep will
++	 * complain.
++	 */
++	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
++
++	return 1;
++}
++
++/*
++ * Call the console drivers, asking them to write out
++ * log_buf[start] to log_buf[end - 1].
++ * The console_lock must be held.
++ */
++static void call_console_drivers(const char *ext_text, size_t ext_len,
++				 const char *text, size_t len)
++{
++	struct console *con;
++
++	trace_console_rcuidle(text, len);
++
++	if (!console_drivers)
++		return;
++
++	for_each_console(con) {
++		if (exclusive_console && con != exclusive_console)
++			continue;
++		if (!(con->flags & CON_ENABLED))
++			continue;
++		if (!con->write)
++			continue;
++		if (!cpu_online(smp_processor_id()) &&
++		    !(con->flags & CON_ANYTIME))
++			continue;
++		if (con->flags & CON_EXTENDED)
++			con->write(con, ext_text, ext_len);
++		else
++			con->write(con, text, len);
++	}
++}
++
++int printk_delay_msec __read_mostly;
++
++static inline void printk_delay(void)
++{
++	if (unlikely(printk_delay_msec)) {
++		int m = printk_delay_msec;
++
++		while (m--) {
++			mdelay(1);
++			touch_nmi_watchdog();
++		}
++	}
++}
++
++/*
++ * Continuation lines are buffered, and not committed to the record buffer
++ * until the line is complete, or a race forces it. The line fragments
++ * though, are printed immediately to the consoles to ensure everything has
++ * reached the console in case of a kernel crash.
++ */
++static struct cont {
++	char buf[LOG_LINE_MAX];
++	size_t len;			/* length == 0 means unused buffer */
++	struct task_struct *owner;	/* task of first print*/
++	u64 ts_nsec;			/* time of first print */
++	u8 level;			/* log level of first message */
++	u8 facility;			/* log facility of first message */
++	enum log_flags flags;		/* prefix, newline flags */
++} cont;
++
++static void cont_flush(void)
++{
++	if (cont.len == 0)
++		return;
++
++	log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec,
++		  NULL, 0, cont.buf, cont.len);
++	cont.len = 0;
++}
++
++static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len)
++{
++	/*
++	 * If ext consoles are present, flush and skip in-kernel
++	 * continuation.  See nr_ext_console_drivers definition.  Also, if
++	 * the line gets too long, split it up in separate records.
++	 */
++	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
++		cont_flush();
++		return false;
++	}
++
++	if (!cont.len) {
++		cont.facility = facility;
++		cont.level = level;
++		cont.owner = current;
++		cont.ts_nsec = local_clock();
++		cont.flags = flags;
++	}
++
++	memcpy(cont.buf + cont.len, text, len);
++	cont.len += len;
++
++	// The original flags come from the first line,
++	// but later continuations can add a newline.
++	if (flags & LOG_NEWLINE) {
++		cont.flags |= LOG_NEWLINE;
++		cont_flush();
++	}
++
++	if (cont.len > (sizeof(cont.buf) * 80) / 100)
++		cont_flush();
++
++	return true;
++}
++
++static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
++{
++	/*
++	 * If an earlier line was buffered, and we're a continuation
++	 * write from the same process, try to add it to the buffer.
++	 */
++	if (cont.len) {
++		if (cont.owner == current && (lflags & LOG_CONT)) {
++			if (cont_add(facility, level, lflags, text, text_len))
++				return text_len;
++		}
++		/* Otherwise, make sure it's flushed */
++		cont_flush();
++	}
++
++	/* Skip empty continuation lines that couldn't be added - they just flush */
++	if (!text_len && (lflags & LOG_CONT))
++		return 0;
++
++	/* If it doesn't end in a newline, try to buffer the current line */
++	if (!(lflags & LOG_NEWLINE)) {
++		if (cont_add(facility, level, lflags, text, text_len))
++			return text_len;
++	}
++
++	/* Store it in the record log */
++	return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
++}
++
++/* Must be called under logbuf_lock. */
++int vprintk_store(int facility, int level,
++		  const char *dict, size_t dictlen,
++		  const char *fmt, va_list args)
++{
++	static char textbuf[LOG_LINE_MAX];
++	char *text = textbuf;
++	size_t text_len;
++	enum log_flags lflags = 0;
++
++	/*
++	 * The printf needs to come first; we need the syslog
++	 * prefix which might be passed-in as a parameter.
++	 */
++	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
++
++	/* mark and strip a trailing newline */
++	if (text_len && text[text_len-1] == '\n') {
++		text_len--;
++		lflags |= LOG_NEWLINE;
++	}
++
++	/* strip kernel syslog prefix and extract log level or control flags */
++	if (facility == 0) {
++		int kern_level;
++
++		while ((kern_level = printk_get_level(text)) != 0) {
++			switch (kern_level) {
++			case '0' ... '7':
++				if (level == LOGLEVEL_DEFAULT)
++					level = kern_level - '0';
++				/* fallthrough */
++			case 'd':	/* KERN_DEFAULT */
++				lflags |= LOG_PREFIX;
++				break;
++			case 'c':	/* KERN_CONT */
++				lflags |= LOG_CONT;
++			}
++
++			text_len -= 2;
++			text += 2;
++		}
++	}
++
++	if (level == LOGLEVEL_DEFAULT)
++		level = default_message_loglevel;
++
++	if (dict)
++		lflags |= LOG_PREFIX|LOG_NEWLINE;
++
++	return log_output(facility, level, lflags,
++			  dict, dictlen, text, text_len);
++}
++
++asmlinkage int vprintk_emit(int facility, int level,
++			    const char *dict, size_t dictlen,
++			    const char *fmt, va_list args)
++{
++	int printed_len;
++	bool in_sched = false, pending_output;
++	unsigned long flags;
++	u64 curr_log_seq;
++
++	if (level == LOGLEVEL_SCHED) {
++		level = LOGLEVEL_DEFAULT;
++		in_sched = true;
++	}
++
++	boot_delay_msec(level);
++	printk_delay();
++
++	/* This stops the holder of console_sem just where we want him */
++	logbuf_lock_irqsave(flags);
++	curr_log_seq = log_next_seq;
++	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
++	pending_output = (curr_log_seq != log_next_seq);
++	logbuf_unlock_irqrestore(flags);
++
++	/* If called from the scheduler, we can not call up(). */
++	if (!in_sched && pending_output) {
++		/*
++		 * Disable preemption to avoid being preempted while holding
++		 * console_sem which would prevent anyone from printing to
++		 * console
++		 */
++		preempt_disable();
++		/*
++		 * Try to acquire and then immediately release the console
++		 * semaphore.  The release will print out buffers and wake up
++		 * /dev/kmsg and syslog() users.
++		 */
++		if (console_trylock_spinning())
++			console_unlock();
++		preempt_enable();
++	}
++
++	if (pending_output)
++		wake_up_klogd();
++	return printed_len;
++}
++EXPORT_SYMBOL(vprintk_emit);
++
++asmlinkage int vprintk(const char *fmt, va_list args)
++{
++	return vprintk_func(fmt, args);
++}
++EXPORT_SYMBOL(vprintk);
++
++asmlinkage int printk_emit(int facility, int level,
++			   const char *dict, size_t dictlen,
++			   const char *fmt, ...)
++{
++	va_list args;
++	int r;
++
++	va_start(args, fmt);
++	r = vprintk_emit(facility, level, dict, dictlen, fmt, args);
++	va_end(args);
++
++	return r;
++}
++EXPORT_SYMBOL(printk_emit);
++
++int vprintk_default(const char *fmt, va_list args)
++{
++	int r;
++
++#ifdef CONFIG_KGDB_KDB
++	/* Allow to pass printk() to kdb but avoid a recursion. */
++	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
++		r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
++		return r;
++	}
++#endif
++	r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
++
++	return r;
++}
++EXPORT_SYMBOL_GPL(vprintk_default);
++
++/**
++ * printk - print a kernel message
++ * @fmt: format string
++ *
++ * This is printk(). It can be called from any context. We want it to work.
++ *
++ * We try to grab the console_lock. If we succeed, it's easy - we log the
++ * output and call the console drivers.  If we fail to get the semaphore, we
++ * place the output into the log buffer and return. The current holder of
++ * the console_sem will notice the new output in console_unlock(); and will
++ * send it to the consoles before releasing the lock.
++ *
++ * One effect of this deferred printing is that code which calls printk() and
++ * then changes console_loglevel may break. This is because console_loglevel
++ * is inspected when the actual printing occurs.
++ *
++ * See also:
++ * printf(3)
++ *
++ * See the vsnprintf() documentation for format string extensions over C99.
++ */
++asmlinkage __visible int printk(const char *fmt, ...)
++{
++	va_list args;
++	int r;
++
++	va_start(args, fmt);
++	r = vprintk_func(fmt, args);
++	va_end(args);
++
++	return r;
++}
++EXPORT_SYMBOL(printk);
++
++#else /* CONFIG_PRINTK */
++
++#define LOG_LINE_MAX		0
++#define PREFIX_MAX		0
++#define printk_time		false
++
++static u64 syslog_seq;
++static u32 syslog_idx;
++static u64 console_seq;
++static u32 console_idx;
++static u64 exclusive_console_stop_seq;
++static u64 log_first_seq;
++static u32 log_first_idx;
++static u64 log_next_seq;
++static char *log_text(const struct printk_log *msg) { return NULL; }
++static char *log_dict(const struct printk_log *msg) { return NULL; }
++static struct printk_log *log_from_idx(u32 idx) { return NULL; }
++static u32 log_next(u32 idx) { return 0; }
++static ssize_t msg_print_ext_header(char *buf, size_t size,
++				    struct printk_log *msg,
++				    u64 seq) { return 0; }
++static ssize_t msg_print_ext_body(char *buf, size_t size,
++				  char *dict, size_t dict_len,
++				  char *text, size_t text_len) { return 0; }
++static void console_lock_spinning_enable(void) { }
++static int console_lock_spinning_disable_and_check(void) { return 0; }
++static void call_console_drivers(const char *ext_text, size_t ext_len,
++				 const char *text, size_t len) {}
++static size_t msg_print_text(const struct printk_log *msg, bool syslog,
++			     bool time, char *buf, size_t size) { return 0; }
++static bool suppress_message_printing(int level) { return false; }
++
++#endif /* CONFIG_PRINTK */
++
++#ifdef CONFIG_EARLY_PRINTK
++struct console *early_console;
++
++asmlinkage __visible void early_printk(const char *fmt, ...)
++{
++	va_list ap;
++	char buf[512];
++	int n;
++
++	if (!early_console)
++		return;
++
++	va_start(ap, fmt);
++	n = vscnprintf(buf, sizeof(buf), fmt, ap);
++	va_end(ap);
++
++	early_console->write(early_console, buf, n);
++}
++#endif
++
++static int __add_preferred_console(char *name, int idx, char *options,
++				   char *brl_options)
++{
++	struct console_cmdline *c;
++	int i;
++
++	/*
++	 *	See if this tty is not yet registered, and
++	 *	if we have a slot free.
++	 */
++	for (i = 0, c = console_cmdline;
++	     i < MAX_CMDLINECONSOLES && c->name[0];
++	     i++, c++) {
++		if (strcmp(c->name, name) == 0 && c->index == idx) {
++			if (!brl_options)
++				preferred_console = i;
++			return 0;
++		}
++	}
++	if (i == MAX_CMDLINECONSOLES)
++		return -E2BIG;
++	if (!brl_options)
++		preferred_console = i;
++	strlcpy(c->name, name, sizeof(c->name));
++	c->options = options;
++	braille_set_options(c, brl_options);
++
++	c->index = idx;
++	return 0;
++}
++
++static int __init console_msg_format_setup(char *str)
++{
++	if (!strcmp(str, "syslog"))
++		console_msg_format = MSG_FORMAT_SYSLOG;
++	if (!strcmp(str, "default"))
++		console_msg_format = MSG_FORMAT_DEFAULT;
++	return 1;
++}
++__setup("console_msg_format=", console_msg_format_setup);
++
++/*
++ * Set up a console.  Called via do_early_param() in init/main.c
++ * for each "console=" parameter in the boot command line.
++ */
++static int __init console_setup(char *str)
++{
++	char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
++	char *s, *options, *brl_options = NULL;
++	int idx;
++
++	if (str[0] == 0)
++		return 1;
++
++	if (_braille_console_setup(&str, &brl_options))
++		return 1;
++
++	/*
++	 * Decode str into name, index, options.
++	 */
++	if (str[0] >= '0' && str[0] <= '9') {
++		strcpy(buf, "ttyS");
++		strncpy(buf + 4, str, sizeof(buf) - 5);
++	} else {
++		strncpy(buf, str, sizeof(buf) - 1);
++	}
++	buf[sizeof(buf) - 1] = 0;
++	options = strchr(str, ',');
++	if (options)
++		*(options++) = 0;
++#ifdef __sparc__
++	if (!strcmp(str, "ttya"))
++		strcpy(buf, "ttyS0");
++	if (!strcmp(str, "ttyb"))
++		strcpy(buf, "ttyS1");
++#endif
++	for (s = buf; *s; s++)
++		if (isdigit(*s) || *s == ',')
++			break;
++	idx = simple_strtoul(s, NULL, 10);
++	*s = 0;
++
++	__add_preferred_console(buf, idx, options, brl_options);
++	console_set_on_cmdline = 1;
++	return 1;
++}
++__setup("console=", console_setup);
++
++/**
++ * add_preferred_console - add a device to the list of preferred consoles.
++ * @name: device name
++ * @idx: device index
++ * @options: options for this console
++ *
++ * The last preferred console added will be used for kernel messages
++ * and stdin/out/err for init.  Normally this is used by console_setup
++ * above to handle user-supplied console arguments; however it can also
++ * be used by arch-specific code either to override the user or more
++ * commonly to provide a default console (ie from PROM variables) when
++ * the user has not supplied one.
++ */
++int add_preferred_console(char *name, int idx, char *options)
++{
++	return __add_preferred_console(name, idx, options, NULL);
++}
++
++bool console_suspend_enabled = true;
++EXPORT_SYMBOL(console_suspend_enabled);
++
++static int __init console_suspend_disable(char *str)
++{
++	console_suspend_enabled = false;
++	return 1;
++}
++__setup("no_console_suspend", console_suspend_disable);
++module_param_named(console_suspend, console_suspend_enabled,
++		bool, S_IRUGO | S_IWUSR);
++MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
++	" and hibernate operations");
++
++/**
++ * suspend_console - suspend the console subsystem
++ *
++ * This disables printk() while we go into suspend states
++ */
++void suspend_console(void)
++{
++	if (!console_suspend_enabled)
++		return;
++	pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
++	console_lock();
++	console_suspended = 1;
++	up_console_sem();
++}
++
++void resume_console(void)
++{
++	if (!console_suspend_enabled)
++		return;
++	down_console_sem();
++	console_suspended = 0;
++	console_unlock();
++}
++
++/**
++ * console_cpu_notify - print deferred console messages after CPU hotplug
++ * @cpu: unused
++ *
++ * If printk() is called from a CPU that is not online yet, the messages
++ * will be printed on the console only if there are CON_ANYTIME consoles.
++ * This function is called when a new CPU comes online (or fails to come
++ * up) or goes offline.
++ */
++static int console_cpu_notify(unsigned int cpu)
++{
++	if (!cpuhp_tasks_frozen) {
++		/* If trylock fails, someone else is doing the printing */
++		if (console_trylock())
++			console_unlock();
++	}
++	return 0;
++}
++
++/**
++ * console_lock - lock the console system for exclusive use.
++ *
++ * Acquires a lock which guarantees that the caller has
++ * exclusive access to the console system and the console_drivers list.
++ *
++ * Can sleep, returns nothing.
++ */
++void console_lock(void)
++{
++	might_sleep();
++
++	down_console_sem();
++	if (console_suspended)
++		return;
++	console_locked = 1;
++	console_may_schedule = 1;
++}
++EXPORT_SYMBOL(console_lock);
++
++/**
++ * console_trylock - try to lock the console system for exclusive use.
++ *
++ * Try to acquire a lock which guarantees that the caller has exclusive
++ * access to the console system and the console_drivers list.
++ *
++ * returns 1 on success, and 0 on failure to acquire the lock.
++ */
++int console_trylock(void)
++{
++	if (down_trylock_console_sem())
++		return 0;
++	if (console_suspended) {
++		up_console_sem();
++		return 0;
++	}
++	console_locked = 1;
++	console_may_schedule = 0;
++	return 1;
++}
++EXPORT_SYMBOL(console_trylock);
++
++int is_console_locked(void)
++{
++	return console_locked;
++}
++EXPORT_SYMBOL(is_console_locked);
++
++/*
++ * Check if we have any console that is capable of printing while cpu is
++ * booting or shutting down. Requires console_sem.
++ */
++static int have_callable_console(void)
++{
++	struct console *con;
++
++	for_each_console(con)
++		if ((con->flags & CON_ENABLED) &&
++				(con->flags & CON_ANYTIME))
++			return 1;
++
++	return 0;
++}
++
++/*
++ * Can we actually use the console at this time on this cpu?
++ *
++ * Console drivers may assume that per-cpu resources have been allocated. So
++ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
++ * call them until this CPU is officially up.
++ */
++static inline int can_use_console(void)
++{
++	return cpu_online(raw_smp_processor_id()) || have_callable_console();
++}
++
++/**
++ * console_unlock - unlock the console system
++ *
++ * Releases the console_lock which the caller holds on the console system
++ * and the console driver list.
++ *
++ * While the console_lock was held, console output may have been buffered
++ * by printk().  If this is the case, console_unlock(); emits
++ * the output prior to releasing the lock.
++ *
++ * If there is output waiting, we wake /dev/kmsg and syslog() users.
++ *
++ * console_unlock(); may be called from any context.
++ */
++void console_unlock(void)
++{
++	static char ext_text[CONSOLE_EXT_LOG_MAX];
++	static char text[LOG_LINE_MAX + PREFIX_MAX];
++	unsigned long flags;
++	bool do_cond_resched, retry;
++
++	if (console_suspended) {
++		up_console_sem();
++		return;
++	}
++
++	/*
++	 * Console drivers are called with interrupts disabled, so
++	 * @console_may_schedule should be cleared before; however, we may
++	 * end up dumping a lot of lines, for example, if called from
++	 * console registration path, and should invoke cond_resched()
++	 * between lines if allowable.  Not doing so can cause a very long
++	 * scheduling stall on a slow console leading to RCU stall and
++	 * softlockup warnings which exacerbate the issue with more
++	 * messages practically incapacitating the system.
++	 *
++	 * console_trylock() is not able to detect the preemptive
++	 * context reliably. Therefore the value must be stored before
++	 * and cleared after the the "again" goto label.
++	 */
++	do_cond_resched = console_may_schedule;
++again:
++	console_may_schedule = 0;
++
++	/*
++	 * We released the console_sem lock, so we need to recheck if
++	 * cpu is online and (if not) is there at least one CON_ANYTIME
++	 * console.
++	 */
++	if (!can_use_console()) {
++		console_locked = 0;
++		up_console_sem();
++		return;
++	}
++
++	for (;;) {
++		struct printk_log *msg;
++		size_t ext_len = 0;
++		size_t len;
++
++		printk_safe_enter_irqsave(flags);
++		raw_spin_lock(&logbuf_lock);
++		if (console_seq < log_first_seq) {
++			len = sprintf(text,
++				      "** %llu printk messages dropped **\n",
++				      log_first_seq - console_seq);
++
++			/* messages are gone, move to first one */
++			console_seq = log_first_seq;
++			console_idx = log_first_idx;
++		} else {
++			len = 0;
++		}
++skip:
++		if (console_seq == log_next_seq)
++			break;
++
++		msg = log_from_idx(console_idx);
++		if (suppress_message_printing(msg->level)) {
++			/*
++			 * Skip record we have buffered and already printed
++			 * directly to the console when we received it, and
++			 * record that has level above the console loglevel.
++			 */
++			console_idx = log_next(console_idx);
++			console_seq++;
++			goto skip;
++		}
++
++		/* Output to all consoles once old messages replayed. */
++		if (unlikely(exclusive_console &&
++			     console_seq >= exclusive_console_stop_seq)) {
++			exclusive_console = NULL;
++		}
++
++		len += msg_print_text(msg,
++				console_msg_format & MSG_FORMAT_SYSLOG,
++				printk_time, text + len, sizeof(text) - len);
++		if (nr_ext_console_drivers) {
++			ext_len = msg_print_ext_header(ext_text,
++						sizeof(ext_text),
++						msg, console_seq);
++			ext_len += msg_print_ext_body(ext_text + ext_len,
++						sizeof(ext_text) - ext_len,
++						log_dict(msg), msg->dict_len,
++						log_text(msg), msg->text_len);
++		}
++		console_idx = log_next(console_idx);
++		console_seq++;
++		raw_spin_unlock(&logbuf_lock);
++
++		/*
++		 * While actively printing out messages, if another printk()
++		 * were to occur on another CPU, it may wait for this one to
++		 * finish. This task can not be preempted if there is a
++		 * waiter waiting to take over.
++		 */
++		console_lock_spinning_enable();
++
++		stop_critical_timings();	/* don't trace print latency */
++		call_console_drivers(ext_text, ext_len, text, len);
++		start_critical_timings();
++
++		if (console_lock_spinning_disable_and_check()) {
++			printk_safe_exit_irqrestore(flags);
++			return;
++		}
++
++		printk_safe_exit_irqrestore(flags);
++
++		if (do_cond_resched)
++			cond_resched();
++	}
++
++	console_locked = 0;
++
++	raw_spin_unlock(&logbuf_lock);
++
++	up_console_sem();
++
++	/*
++	 * Someone could have filled up the buffer again, so re-check if there's
++	 * something to flush. In case we cannot trylock the console_sem again,
++	 * there's a new owner and the console_unlock() from them will do the
++	 * flush, no worries.
++	 */
++	raw_spin_lock(&logbuf_lock);
++	retry = console_seq != log_next_seq;
++	raw_spin_unlock(&logbuf_lock);
++	printk_safe_exit_irqrestore(flags);
++
++	if (retry && console_trylock())
++		goto again;
++}
++EXPORT_SYMBOL(console_unlock);
++
++/**
++ * console_conditional_schedule - yield the CPU if required
++ *
++ * If the console code is currently allowed to sleep, and
++ * if this CPU should yield the CPU to another task, do
++ * so here.
++ *
++ * Must be called within console_lock();.
++ */
++void __sched console_conditional_schedule(void)
++{
++	if (console_may_schedule)
++		cond_resched();
++}
++EXPORT_SYMBOL(console_conditional_schedule);
++
++void console_unblank(void)
++{
++	struct console *c;
++
++	/*
++	 * console_unblank can no longer be called in interrupt context unless
++	 * oops_in_progress is set to 1..
++	 */
++	if (oops_in_progress) {
++		if (down_trylock_console_sem() != 0)
++			return;
++	} else
++		console_lock();
++
++	console_locked = 1;
++	console_may_schedule = 0;
++	for_each_console(c)
++		if ((c->flags & CON_ENABLED) && c->unblank)
++			c->unblank();
++	console_unlock();
++}
++
++/**
++ * console_flush_on_panic - flush console content on panic
++ *
++ * Immediately output all pending messages no matter what.
++ */
++void console_flush_on_panic(void)
++{
++	/*
++	 * If someone else is holding the console lock, trylock will fail
++	 * and may_schedule may be set.  Ignore and proceed to unlock so
++	 * that messages are flushed out.  As this can be called from any
++	 * context and we don't want to get preempted while flushing,
++	 * ensure may_schedule is cleared.
++	 */
++	console_trylock();
++	console_may_schedule = 0;
++	console_unlock();
++}
++
++/*
++ * Return the console tty driver structure and its associated index
++ */
++struct tty_driver *console_device(int *index)
++{
++	struct console *c;
++	struct tty_driver *driver = NULL;
++
++	console_lock();
++	for_each_console(c) {
++		if (!c->device)
++			continue;
++		driver = c->device(c, index);
++		if (driver)
++			break;
++	}
++	console_unlock();
++	return driver;
++}
++
++/*
++ * Prevent further output on the passed console device so that (for example)
++ * serial drivers can disable console output before suspending a port, and can
++ * re-enable output afterwards.
++ */
++void console_stop(struct console *console)
++{
++	console_lock();
++	console->flags &= ~CON_ENABLED;
++	console_unlock();
++}
++EXPORT_SYMBOL(console_stop);
++
++void console_start(struct console *console)
++{
++	console_lock();
++	console->flags |= CON_ENABLED;
++	console_unlock();
++}
++EXPORT_SYMBOL(console_start);
++
++static int __read_mostly keep_bootcon;
++
++static int __init keep_bootcon_setup(char *str)
++{
++	keep_bootcon = 1;
++	pr_info("debug: skip boot console de-registration.\n");
++
++	return 0;
++}
++
++early_param("keep_bootcon", keep_bootcon_setup);
++
++/*
++ * The console driver calls this routine during kernel initialization
++ * to register the console printing procedure with printk() and to
++ * print any messages that were printed by the kernel before the
++ * console driver was initialized.
++ *
++ * This can happen pretty early during the boot process (because of
++ * early_printk) - sometimes before setup_arch() completes - be careful
++ * of what kernel features are used - they may not be initialised yet.
++ *
++ * There are two types of consoles - bootconsoles (early_printk) and
++ * "real" consoles (everything which is not a bootconsole) which are
++ * handled differently.
++ *  - Any number of bootconsoles can be registered at any time.
++ *  - As soon as a "real" console is registered, all bootconsoles
++ *    will be unregistered automatically.
++ *  - Once a "real" console is registered, any attempt to register a
++ *    bootconsoles will be rejected
++ */
++void register_console(struct console *newcon)
++{
++	int i;
++	unsigned long flags;
++	struct console *bcon = NULL;
++	struct console_cmdline *c;
++	static bool has_preferred;
++
++	if (console_drivers)
++		for_each_console(bcon)
++			if (WARN(bcon == newcon,
++					"console '%s%d' already registered\n",
++					bcon->name, bcon->index))
++				return;
++
++	/*
++	 * before we register a new CON_BOOT console, make sure we don't
++	 * already have a valid console
++	 */
++	if (console_drivers && newcon->flags & CON_BOOT) {
++		/* find the last or real console */
++		for_each_console(bcon) {
++			if (!(bcon->flags & CON_BOOT)) {
++				pr_info("Too late to register bootconsole %s%d\n",
++					newcon->name, newcon->index);
++				return;
++			}
++		}
++	}
++
++	if (console_drivers && console_drivers->flags & CON_BOOT)
++		bcon = console_drivers;
++
++	if (!has_preferred || bcon || !console_drivers)
++		has_preferred = preferred_console >= 0;
++
++	/*
++	 *	See if we want to use this console driver. If we
++	 *	didn't select a console we take the first one
++	 *	that registers here.
++	 */
++	if (!has_preferred) {
++		if (newcon->index < 0)
++			newcon->index = 0;
++		if (newcon->setup == NULL ||
++		    newcon->setup(newcon, NULL) == 0) {
++			newcon->flags |= CON_ENABLED;
++			if (newcon->device) {
++				newcon->flags |= CON_CONSDEV;
++				has_preferred = true;
++			}
++		}
++	}
++
++	/*
++	 *	See if this console matches one we selected on
++	 *	the command line.
++	 */
++	for (i = 0, c = console_cmdline;
++	     i < MAX_CMDLINECONSOLES && c->name[0];
++	     i++, c++) {
++		if (!newcon->match ||
++		    newcon->match(newcon, c->name, c->index, c->options) != 0) {
++			/* default matching */
++			BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
++			if (strcmp(c->name, newcon->name) != 0)
++				continue;
++			if (newcon->index >= 0 &&
++			    newcon->index != c->index)
++				continue;
++			if (newcon->index < 0)
++				newcon->index = c->index;
++
++			if (_braille_register_console(newcon, c))
++				return;
++
++			if (newcon->setup &&
++			    newcon->setup(newcon, c->options) != 0)
++				break;
++		}
++
++		newcon->flags |= CON_ENABLED;
++		if (i == preferred_console) {
++			newcon->flags |= CON_CONSDEV;
++			has_preferred = true;
++		}
++		break;
++	}
++
++	if (!(newcon->flags & CON_ENABLED))
++		return;
++
++	/*
++	 * If we have a bootconsole, and are switching to a real console,
++	 * don't print everything out again, since when the boot console, and
++	 * the real console are the same physical device, it's annoying to
++	 * see the beginning boot messages twice
++	 */
++	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
++		newcon->flags &= ~CON_PRINTBUFFER;
++
++	/*
++	 *	Put this console in the list - keep the
++	 *	preferred driver at the head of the list.
++	 */
++	console_lock();
++	if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
++		newcon->next = console_drivers;
++		console_drivers = newcon;
++		if (newcon->next)
++			newcon->next->flags &= ~CON_CONSDEV;
++	} else {
++		newcon->next = console_drivers->next;
++		console_drivers->next = newcon;
++	}
++
++	if (newcon->flags & CON_EXTENDED)
++		if (!nr_ext_console_drivers++)
++			pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
++
++	if (newcon->flags & CON_PRINTBUFFER) {
++		/*
++		 * console_unlock(); will print out the buffered messages
++		 * for us.
++		 */
++		logbuf_lock_irqsave(flags);
++		console_seq = syslog_seq;
++		console_idx = syslog_idx;
++		/*
++		 * We're about to replay the log buffer.  Only do this to the
++		 * just-registered console to avoid excessive message spam to
++		 * the already-registered consoles.
++		 *
++		 * Set exclusive_console with disabled interrupts to reduce
++		 * race window with eventual console_flush_on_panic() that
++		 * ignores console_lock.
++		 */
++		exclusive_console = newcon;
++		exclusive_console_stop_seq = console_seq;
++		logbuf_unlock_irqrestore(flags);
++	}
++	console_unlock();
++	console_sysfs_notify();
++
++	/*
++	 * By unregistering the bootconsoles after we enable the real console
++	 * we get the "console xxx enabled" message on all the consoles -
++	 * boot consoles, real consoles, etc - this is to ensure that end
++	 * users know there might be something in the kernel's log buffer that
++	 * went to the bootconsole (that they do not see on the real console)
++	 */
++	pr_info("%sconsole [%s%d] enabled\n",
++		(newcon->flags & CON_BOOT) ? "boot" : "" ,
++		newcon->name, newcon->index);
++	if (bcon &&
++	    ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
++	    !keep_bootcon) {
++		/* We need to iterate through all boot consoles, to make
++		 * sure we print everything out, before we unregister them.
++		 */
++		for_each_console(bcon)
++			if (bcon->flags & CON_BOOT)
++				unregister_console(bcon);
++	}
++}
++EXPORT_SYMBOL(register_console);
++
++int unregister_console(struct console *console)
++{
++        struct console *a, *b;
++	int res;
++
++	pr_info("%sconsole [%s%d] disabled\n",
++		(console->flags & CON_BOOT) ? "boot" : "" ,
++		console->name, console->index);
++
++	res = _braille_unregister_console(console);
++	if (res)
++		return res;
++
++	res = 1;
++	console_lock();
++	if (console_drivers == console) {
++		console_drivers=console->next;
++		res = 0;
++	} else if (console_drivers) {
++		for (a=console_drivers->next, b=console_drivers ;
++		     a; b=a, a=b->next) {
++			if (a == console) {
++				b->next = a->next;
++				res = 0;
++				break;
++			}
++		}
++	}
++
++	if (!res && (console->flags & CON_EXTENDED))
++		nr_ext_console_drivers--;
++
++	/*
++	 * If this isn't the last console and it has CON_CONSDEV set, we
++	 * need to set it on the next preferred console.
++	 */
++	if (console_drivers != NULL && console->flags & CON_CONSDEV)
++		console_drivers->flags |= CON_CONSDEV;
++
++	console->flags &= ~CON_ENABLED;
++	console_unlock();
++	console_sysfs_notify();
++	return res;
++}
++EXPORT_SYMBOL(unregister_console);
++
++/*
++ * Initialize the console device. This is called *early*, so
++ * we can't necessarily depend on lots of kernel help here.
++ * Just do some early initializations, and do the complex setup
++ * later.
++ */
++void __init console_init(void)
++{
++	int ret;
++	initcall_t call;
++	initcall_entry_t *ce;
++
++	/* Setup the default TTY line discipline. */
++	n_tty_init();
++
++	/*
++	 * set up the console device so that later boot sequences can
++	 * inform about problems etc..
++	 */
++	ce = __con_initcall_start;
++	trace_initcall_level("console");
++	while (ce < __con_initcall_end) {
++		call = initcall_from_entry(ce);
++		trace_initcall_start(call);
++		ret = call();
++		trace_initcall_finish(call, ret);
++		ce++;
++	}
++}
++
++/*
++ * Some boot consoles access data that is in the init section and which will
++ * be discarded after the initcalls have been run. To make sure that no code
++ * will access this data, unregister the boot consoles in a late initcall.
++ *
++ * If for some reason, such as deferred probe or the driver being a loadable
++ * module, the real console hasn't registered yet at this point, there will
++ * be a brief interval in which no messages are logged to the console, which
++ * makes it difficult to diagnose problems that occur during this time.
++ *
++ * To mitigate this problem somewhat, only unregister consoles whose memory
++ * intersects with the init section. Note that all other boot consoles will
++ * get unregistred when the real preferred console is registered.
++ */
++static int __init printk_late_init(void)
++{
++	struct console *con;
++	int ret;
++
++	for_each_console(con) {
++		if (!(con->flags & CON_BOOT))
++			continue;
++
++		/* Check addresses that might be used for enabled consoles. */
++		if (init_section_intersects(con, sizeof(*con)) ||
++		    init_section_contains(con->write, 0) ||
++		    init_section_contains(con->read, 0) ||
++		    init_section_contains(con->device, 0) ||
++		    init_section_contains(con->unblank, 0) ||
++		    init_section_contains(con->data, 0)) {
++			/*
++			 * Please, consider moving the reported consoles out
++			 * of the init section.
++			 */
++			pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
++				con->name, con->index);
++			unregister_console(con);
++		}
++	}
++	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
++					console_cpu_notify);
++	WARN_ON(ret < 0);
++	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
++					console_cpu_notify, NULL);
++	WARN_ON(ret < 0);
++	return 0;
++}
++late_initcall(printk_late_init);
++
++#if defined CONFIG_PRINTK
++/*
++ * Delayed printk version, for scheduler-internal messages:
++ */
++#define PRINTK_PENDING_WAKEUP	0x01
++#define PRINTK_PENDING_OUTPUT	0x02
++
++static DEFINE_PER_CPU(int, printk_pending);
++
++static void wake_up_klogd_work_func(struct irq_work *irq_work)
++{
++	int pending = __this_cpu_xchg(printk_pending, 0);
++
++	if (pending & PRINTK_PENDING_OUTPUT) {
++		/* If trylock fails, someone else is doing the printing */
++		if (console_trylock())
++			console_unlock();
++	}
++
++	if (pending & PRINTK_PENDING_WAKEUP)
++		wake_up_interruptible(&log_wait);
++}
++
++static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
++	.func = wake_up_klogd_work_func,
++	.flags = IRQ_WORK_LAZY,
++};
++
++void wake_up_klogd(void)
++{
++	if (!printk_percpu_data_ready())
++		return;
++
++	preempt_disable();
++	if (waitqueue_active(&log_wait)) {
++		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
++		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
++	}
++	preempt_enable();
++}
++
++void defer_console_output(void)
++{
++	if (!printk_percpu_data_ready())
++		return;
++
++	preempt_disable();
++	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
++	irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
++	preempt_enable();
++}
++
++int vprintk_deferred(const char *fmt, va_list args)
++{
++	int r;
++
++	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
++	defer_console_output();
++
++	return r;
++}
++
++int printk_deferred(const char *fmt, ...)
++{
++	va_list args;
++	int r;
++
++	va_start(args, fmt);
++	r = vprintk_deferred(fmt, args);
++	va_end(args);
++
++	return r;
++}
++
++/*
++ * printk rate limiting, lifted from the networking subsystem.
++ *
++ * This enforces a rate limit: not more than 10 kernel messages
++ * every 5s to make a denial-of-service attack impossible.
++ */
++DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
++
++int __printk_ratelimit(const char *func)
++{
++	return ___ratelimit(&printk_ratelimit_state, func);
++}
++EXPORT_SYMBOL(__printk_ratelimit);
++
++/**
++ * printk_timed_ratelimit - caller-controlled printk ratelimiting
++ * @caller_jiffies: pointer to caller's state
++ * @interval_msecs: minimum interval between prints
++ *
++ * printk_timed_ratelimit() returns true if more than @interval_msecs
++ * milliseconds have elapsed since the last time printk_timed_ratelimit()
++ * returned true.
++ */
++bool printk_timed_ratelimit(unsigned long *caller_jiffies,
++			unsigned int interval_msecs)
++{
++	unsigned long elapsed = jiffies - *caller_jiffies;
++
++	if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
++		return false;
++
++	*caller_jiffies = jiffies;
++	return true;
++}
++EXPORT_SYMBOL(printk_timed_ratelimit);
++
++static DEFINE_SPINLOCK(dump_list_lock);
++static LIST_HEAD(dump_list);
++
++/**
++ * kmsg_dump_register - register a kernel log dumper.
++ * @dumper: pointer to the kmsg_dumper structure
++ *
++ * Adds a kernel log dumper to the system. The dump callback in the
++ * structure will be called when the kernel oopses or panics and must be
++ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
++ */
++int kmsg_dump_register(struct kmsg_dumper *dumper)
++{
++	unsigned long flags;
++	int err = -EBUSY;
++
++	/* The dump callback needs to be set */
++	if (!dumper->dump)
++		return -EINVAL;
++
++	spin_lock_irqsave(&dump_list_lock, flags);
++	/* Don't allow registering multiple times */
++	if (!dumper->registered) {
++		dumper->registered = 1;
++		list_add_tail_rcu(&dumper->list, &dump_list);
++		err = 0;
++	}
++	spin_unlock_irqrestore(&dump_list_lock, flags);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_register);
++
++/**
++ * kmsg_dump_unregister - unregister a kmsg dumper.
++ * @dumper: pointer to the kmsg_dumper structure
++ *
++ * Removes a dump device from the system. Returns zero on success and
++ * %-EINVAL otherwise.
++ */
++int kmsg_dump_unregister(struct kmsg_dumper *dumper)
++{
++	unsigned long flags;
++	int err = -EINVAL;
++
++	spin_lock_irqsave(&dump_list_lock, flags);
++	if (dumper->registered) {
++		dumper->registered = 0;
++		list_del_rcu(&dumper->list);
++		err = 0;
++	}
++	spin_unlock_irqrestore(&dump_list_lock, flags);
++	synchronize_rcu();
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
++
++static bool always_kmsg_dump;
++module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
++
++/**
++ * kmsg_dump - dump kernel log to kernel message dumpers.
++ * @reason: the reason (oops, panic etc) for dumping
++ *
++ * Call each of the registered dumper's dump() callback, which can
++ * retrieve the kmsg records with kmsg_dump_get_line() or
++ * kmsg_dump_get_buffer().
++ */
++void kmsg_dump(enum kmsg_dump_reason reason)
++{
++	struct kmsg_dumper *dumper;
++	unsigned long flags;
++
++	if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
++		return;
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(dumper, &dump_list, list) {
++		if (dumper->max_reason && reason > dumper->max_reason)
++			continue;
++
++		/* initialize iterator with data about the stored records */
++		dumper->active = true;
++
++		logbuf_lock_irqsave(flags);
++		dumper->cur_seq = clear_seq;
++		dumper->cur_idx = clear_idx;
++		dumper->next_seq = log_next_seq;
++		dumper->next_idx = log_next_idx;
++		logbuf_unlock_irqrestore(flags);
++
++		/* invoke dumper which will iterate over records */
++		dumper->dump(dumper, reason);
++
++		/* reset iterator */
++		dumper->active = false;
++	}
++	rcu_read_unlock();
++}
++
++/**
++ * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
++ * @dumper: registered kmsg dumper
++ * @syslog: include the "<4>" prefixes
++ * @line: buffer to copy the line to
++ * @size: maximum size of the buffer
++ * @len: length of line placed into buffer
++ *
++ * Start at the beginning of the kmsg buffer, with the oldest kmsg
++ * record, and copy one record into the provided buffer.
++ *
++ * Consecutive calls will return the next available record moving
++ * towards the end of the buffer with the youngest messages.
++ *
++ * A return value of FALSE indicates that there are no more records to
++ * read.
++ *
++ * The function is similar to kmsg_dump_get_line(), but grabs no locks.
++ */
++bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
++			       char *line, size_t size, size_t *len)
++{
++	struct printk_log *msg;
++	size_t l = 0;
++	bool ret = false;
++
++	if (!dumper->active)
++		goto out;
++
++	if (dumper->cur_seq < log_first_seq) {
++		/* messages are gone, move to first available one */
++		dumper->cur_seq = log_first_seq;
++		dumper->cur_idx = log_first_idx;
++	}
++
++	/* last entry */
++	if (dumper->cur_seq >= log_next_seq)
++		goto out;
++
++	msg = log_from_idx(dumper->cur_idx);
++	l = msg_print_text(msg, syslog, printk_time, line, size);
++
++	dumper->cur_idx = log_next(dumper->cur_idx);
++	dumper->cur_seq++;
++	ret = true;
++out:
++	if (len)
++		*len = l;
++	return ret;
++}
++
++/**
++ * kmsg_dump_get_line - retrieve one kmsg log line
++ * @dumper: registered kmsg dumper
++ * @syslog: include the "<4>" prefixes
++ * @line: buffer to copy the line to
++ * @size: maximum size of the buffer
++ * @len: length of line placed into buffer
++ *
++ * Start at the beginning of the kmsg buffer, with the oldest kmsg
++ * record, and copy one record into the provided buffer.
++ *
++ * Consecutive calls will return the next available record moving
++ * towards the end of the buffer with the youngest messages.
++ *
++ * A return value of FALSE indicates that there are no more records to
++ * read.
++ */
++bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
++			char *line, size_t size, size_t *len)
++{
++	unsigned long flags;
++	bool ret;
++
++	logbuf_lock_irqsave(flags);
++	ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
++	logbuf_unlock_irqrestore(flags);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
++
++/**
++ * kmsg_dump_get_buffer - copy kmsg log lines
++ * @dumper: registered kmsg dumper
++ * @syslog: include the "<4>" prefixes
++ * @buf: buffer to copy the line to
++ * @size: maximum size of the buffer
++ * @len: length of line placed into buffer
++ *
++ * Start at the end of the kmsg buffer and fill the provided buffer
++ * with as many of the the *youngest* kmsg records that fit into it.
++ * If the buffer is large enough, all available kmsg records will be
++ * copied with a single call.
++ *
++ * Consecutive calls will fill the buffer with the next block of
++ * available older records, not including the earlier retrieved ones.
++ *
++ * A return value of FALSE indicates that there are no more records to
++ * read.
++ */
++bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
++			  char *buf, size_t size, size_t *len)
++{
++	unsigned long flags;
++	u64 seq;
++	u32 idx;
++	u64 next_seq;
++	u32 next_idx;
++	size_t l = 0;
++	bool ret = false;
++	bool time = printk_time;
++
++	if (!dumper->active)
++		goto out;
++
++	logbuf_lock_irqsave(flags);
++	if (dumper->cur_seq < log_first_seq) {
++		/* messages are gone, move to first available one */
++		dumper->cur_seq = log_first_seq;
++		dumper->cur_idx = log_first_idx;
++	}
++
++	/* last entry */
++	if (dumper->cur_seq >= dumper->next_seq) {
++		logbuf_unlock_irqrestore(flags);
++		goto out;
++	}
++
++	/* calculate length of entire buffer */
++	seq = dumper->cur_seq;
++	idx = dumper->cur_idx;
++	while (seq < dumper->next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++
++		l += msg_print_text(msg, true, time, NULL, 0);
++		idx = log_next(idx);
++		seq++;
++	}
++
++	/* move first record forward until length fits into the buffer */
++	seq = dumper->cur_seq;
++	idx = dumper->cur_idx;
++	while (l >= size && seq < dumper->next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++
++		l -= msg_print_text(msg, true, time, NULL, 0);
++		idx = log_next(idx);
++		seq++;
++	}
++
++	/* last message in next interation */
++	next_seq = seq;
++	next_idx = idx;
++
++	l = 0;
++	while (seq < dumper->next_seq) {
++		struct printk_log *msg = log_from_idx(idx);
++
++		l += msg_print_text(msg, syslog, time, buf + l, size - l);
++		idx = log_next(idx);
++		seq++;
++	}
++
++	dumper->next_seq = next_seq;
++	dumper->next_idx = next_idx;
++	ret = true;
++	logbuf_unlock_irqrestore(flags);
++out:
++	if (len)
++		*len = l;
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
++
++/**
++ * kmsg_dump_rewind_nolock - reset the interator (unlocked version)
++ * @dumper: registered kmsg dumper
++ *
++ * Reset the dumper's iterator so that kmsg_dump_get_line() and
++ * kmsg_dump_get_buffer() can be called again and used multiple
++ * times within the same dumper.dump() callback.
++ *
++ * The function is similar to kmsg_dump_rewind(), but grabs no locks.
++ */
++void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
++{
++	dumper->cur_seq = clear_seq;
++	dumper->cur_idx = clear_idx;
++	dumper->next_seq = log_next_seq;
++	dumper->next_idx = log_next_idx;
++}
++
++/**
++ * kmsg_dump_rewind - reset the interator
++ * @dumper: registered kmsg dumper
++ *
++ * Reset the dumper's iterator so that kmsg_dump_get_line() and
++ * kmsg_dump_get_buffer() can be called again and used multiple
++ * times within the same dumper.dump() callback.
++ */
++void kmsg_dump_rewind(struct kmsg_dumper *dumper)
++{
++	unsigned long flags;
++
++	logbuf_lock_irqsave(flags);
++	kmsg_dump_rewind_nolock(dumper);
++	logbuf_unlock_irqrestore(flags);
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
++
++#endif
+diff -uprN kernel/kernel/ptrace.c kernel_new/kernel/ptrace.c
+--- kernel/kernel/ptrace.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/ptrace.c	2021-04-01 18:28:07.811863114 +0800
+@@ -824,6 +824,8 @@ static int ptrace_resume(struct task_str
+ 		user_disable_single_step(child);
+ 	}
+ 
++	__ipipe_report_ptrace_resume(child, request);
++
+ 	/*
+ 	 * Change ->exit_code and ->state under siglock to avoid the race
+ 	 * with wait_task_stopped() in between; a non-zero ->exit_code will
+diff -uprN kernel/kernel/rcu/Kconfig.debug kernel_new/kernel/rcu/Kconfig.debug
+--- kernel/kernel/rcu/Kconfig.debug	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/rcu/Kconfig.debug	2021-04-01 18:28:07.811863114 +0800
+@@ -5,7 +5,7 @@
+ menu "RCU Debugging"
+ 
+ config PROVE_RCU
+-	def_bool PROVE_LOCKING
++	def_bool PROVE_LOCKING && !IPIPE
+ 
+ config TORTURE_TEST
+ 	tristate
+diff -uprN kernel/kernel/sched/core.c kernel_new/kernel/sched/core.c
+--- kernel/kernel/sched/core.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/sched/core.c	2021-04-01 18:28:07.812863113 +0800
+@@ -1121,8 +1121,12 @@ static int __set_cpus_allowed_ptr(struct
+ 	}
+ 
+ 	/* Can the task run on the task's current CPU? If so, we're done */
+-	if (cpumask_test_cpu(task_cpu(p), new_mask))
++	if (cpumask_test_cpu(task_cpu(p), new_mask)) {
++		__ipipe_report_setaffinity(p, task_cpu(p));
+ 		goto out;
++	}
++
++	__ipipe_report_setaffinity(p, dest_cpu);
+ 
+ 	if (task_running(rq, p) || p->state == TASK_WAKING) {
+ 		struct migration_arg arg = { p, dest_cpu };
+@@ -1793,7 +1797,9 @@ void scheduler_ipi(void)
+ 	 * however a fair share of IPIs are still resched only so this would
+ 	 * somewhat pessimize the simple resched case.
+ 	 */
++#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
+ 	irq_enter();
++#endif
+ 	sched_ttwu_pending();
+ 
+ 	/*
+@@ -1803,7 +1809,9 @@ void scheduler_ipi(void)
+ 		this_rq()->idle_balance = 1;
+ 		raise_softirq_irqoff(SCHED_SOFTIRQ);
+ 	}
++#ifndef IPIPE_ARCH_HAVE_VIRQ_IPI
+ 	irq_exit();
++#endif
+ }
+ 
+ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
+@@ -1985,7 +1993,8 @@ try_to_wake_up(struct task_struct *p, un
+ 	 */
+ 	raw_spin_lock_irqsave(&p->pi_lock, flags);
+ 	smp_mb__after_spinlock();
+-	if (!(p->state & state))
++	if (!(p->state & state) ||
++	    (p->state & (TASK_NOWAKEUP|TASK_HARDENING)))
+ 		goto out;
+ 
+ 	trace_sched_waking(p);
+@@ -2796,6 +2805,7 @@ asmlinkage __visible void schedule_tail(
+ 	 * PREEMPT_COUNT kernels).
+ 	 */
+ 
++	__ipipe_complete_domain_migration();
+ 	rq = finish_task_switch(prev);
+ 	balance_callback(rq);
+ 	preempt_enable();
+@@ -2864,6 +2874,9 @@ context_switch(struct rq *rq, struct tas
+ 	switch_to(prev, next, prev);
+ 	barrier();
+ 
++	if (unlikely(__ipipe_switch_tail()))
++		return NULL;
++
+ 	return finish_task_switch(prev);
+ }
+ 
+@@ -3361,6 +3374,7 @@ static noinline void __schedule_bug(stru
+  */
+ static inline void schedule_debug(struct task_struct *prev)
+ {
++	ipipe_root_only();
+ #ifdef CONFIG_SCHED_STACK_END_CHECK
+ 	if (task_stack_end_corrupted(prev))
+ 		panic("corrupted stack end detected inside scheduler\n");
+@@ -3460,7 +3474,7 @@ again:
+  *
+  * WARNING: must be called with preemption disabled!
+  */
+-static void __sched notrace __schedule(bool preempt)
++static bool __sched notrace __schedule(bool preempt)
+ {
+ 	struct task_struct *prev, *next;
+ 	unsigned long *switch_count;
+@@ -3551,12 +3565,17 @@ static void __sched notrace __schedule(b
+ 
+ 		/* Also unlocks the rq: */
+ 		rq = context_switch(rq, prev, next, &rf);
++		if (rq == NULL)
++			return true; /* task hijacked by head domain */
+ 	} else {
++		prev->state &= ~TASK_HARDENING;
+ 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
+ 		rq_unlock_irq(rq, &rf);
+ 	}
+ 
+ 	balance_callback(rq);
++
++	return false;
+ }
+ 
+ void __noreturn do_task_dead(void)
+@@ -3594,7 +3613,8 @@ asmlinkage __visible void __sched schedu
+ 	sched_submit_work(tsk);
+ 	do {
+ 		preempt_disable();
+-		__schedule(false);
++		if (__schedule(false))
++			return;
+ 		sched_preempt_enable_no_resched();
+ 	} while (need_resched());
+ }
+@@ -3674,7 +3694,8 @@ static void __sched notrace preempt_sche
+ 		 */
+ 		preempt_disable_notrace();
+ 		preempt_latency_start(1);
+-		__schedule(true);
++		if (__schedule(true))
++			return;
+ 		preempt_latency_stop(1);
+ 		preempt_enable_no_resched_notrace();
+ 
+@@ -3697,7 +3718,7 @@ asmlinkage __visible void __sched notrac
+ 	 * If there is a non-zero preempt_count or interrupts are disabled,
+ 	 * we do not want to preempt the current task. Just return..
+ 	 */
+-	if (likely(!preemptible()))
++	if (likely(!preemptible() || !ipipe_root_p))
+ 		return;
+ 
+ 	preempt_schedule_common();
+@@ -3723,7 +3744,7 @@ asmlinkage __visible void __sched notrac
+ {
+ 	enum ctx_state prev_ctx;
+ 
+-	if (likely(!preemptible()))
++	if (likely(!preemptible() || !ipipe_root_p || hard_irqs_disabled()))
+ 		return;
+ 
+ 	do {
+@@ -4406,6 +4427,7 @@ change:
+ 
+ 	prev_class = p->sched_class;
+ 	__setscheduler(rq, p, attr, pi);
++	__ipipe_report_setsched(p);
+ 
+ 	if (queued) {
+ 		/*
+@@ -5981,6 +6003,43 @@ int in_sched_functions(unsigned long add
+ 		&& addr < (unsigned long)__sched_text_end);
+ }
+ 
++#ifdef CONFIG_IPIPE
++
++int __ipipe_migrate_head(void)
++{
++	struct task_struct *p = current;
++
++	preempt_disable();
++
++	IPIPE_WARN_ONCE(__this_cpu_read(ipipe_percpu.task_hijacked) != NULL);
++
++	__this_cpu_write(ipipe_percpu.task_hijacked, p);
++	set_current_state(TASK_INTERRUPTIBLE | TASK_HARDENING);
++	sched_submit_work(p);
++	if (likely(__schedule(false)))
++		return 0;
++
++	preempt_enable();
++	return -ERESTARTSYS;
++}
++EXPORT_SYMBOL_GPL(__ipipe_migrate_head);
++
++void __ipipe_reenter_root(void)
++{
++	struct rq *rq;
++	struct task_struct *p;
++
++	p = __this_cpu_read(ipipe_percpu.rqlock_owner);
++	BUG_ON(p == NULL);
++	ipipe_clear_thread_flag(TIP_HEAD);
++	rq = finish_task_switch(p);
++	balance_callback(rq);
++	preempt_enable_no_resched_notrace();
++}
++EXPORT_SYMBOL_GPL(__ipipe_reenter_root);
++
++#endif /* CONFIG_IPIPE */
++
+ #ifdef CONFIG_CGROUP_SCHED
+ /*
+  * Default task group.
+diff -uprN kernel/kernel/sched/core.c.orig kernel_new/kernel/sched/core.c.orig
+--- kernel/kernel/sched/core.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/sched/core.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,7161 @@
++/*
++ *  kernel/sched/core.c
++ *
++ *  Core kernel scheduler code and related syscalls
++ *
++ *  Copyright (C) 1991-2002  Linus Torvalds
++ */
++#include "sched.h"
++
++#include <linux/nospec.h>
++
++#include <linux/kcov.h>
++
++#include <asm/switch_to.h>
++#include <asm/tlb.h>
++
++#include "../workqueue_internal.h"
++#include "../smpboot.h"
++
++#include "pelt.h"
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/sched.h>
++
++DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
++
++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
++/*
++ * Debugging: various feature bits
++ *
++ * If SCHED_DEBUG is disabled, each compilation unit has its own copy of
++ * sysctl_sched_features, defined in sched.h, to allow constants propagation
++ * at compile time and compiler optimization based on features default.
++ */
++#define SCHED_FEAT(name, enabled)	\
++	(1UL << __SCHED_FEAT_##name) * enabled |
++const_debug unsigned int sysctl_sched_features =
++#include "features.h"
++	0;
++#undef SCHED_FEAT
++#endif
++
++/*
++ * Number of tasks to iterate in a single balance run.
++ * Limited because this is done with IRQs disabled.
++ */
++const_debug unsigned int sysctl_sched_nr_migrate = 32;
++
++/*
++ * period over which we measure -rt task CPU usage in us.
++ * default: 1s
++ */
++unsigned int sysctl_sched_rt_period = 1000000;
++
++__read_mostly int scheduler_running;
++
++/*
++ * part of the period that we allow rt tasks to run in us.
++ * default: 0.95s
++ */
++int sysctl_sched_rt_runtime = 950000;
++
++/*
++ * __task_rq_lock - lock the rq @p resides on.
++ */
++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	struct rq *rq;
++
++	lockdep_assert_held(&p->pi_lock);
++
++	for (;;) {
++		rq = task_rq(p);
++		raw_spin_lock(&rq->lock);
++		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
++			rq_pin_lock(rq, rf);
++			return rq;
++		}
++		raw_spin_unlock(&rq->lock);
++
++		while (unlikely(task_on_rq_migrating(p)))
++			cpu_relax();
++	}
++}
++
++/*
++ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
++ */
++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
++	__acquires(p->pi_lock)
++	__acquires(rq->lock)
++{
++	struct rq *rq;
++
++	for (;;) {
++		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
++		rq = task_rq(p);
++		raw_spin_lock(&rq->lock);
++		/*
++		 *	move_queued_task()		task_rq_lock()
++		 *
++		 *	ACQUIRE (rq->lock)
++		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
++		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
++		 *	[S] ->cpu = new_cpu		[L] task_rq()
++		 *					[L] ->on_rq
++		 *	RELEASE (rq->lock)
++		 *
++		 * If we observe the old CPU in task_rq_lock(), the acquire of
++		 * the old rq->lock will fully serialize against the stores.
++		 *
++		 * If we observe the new CPU in task_rq_lock(), the address
++		 * dependency headed by '[L] rq = task_rq()' and the acquire
++		 * will pair with the WMB to ensure we then also see migrating.
++		 */
++		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
++			rq_pin_lock(rq, rf);
++			return rq;
++		}
++		raw_spin_unlock(&rq->lock);
++		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
++
++		while (unlikely(task_on_rq_migrating(p)))
++			cpu_relax();
++	}
++}
++
++/*
++ * RQ-clock updating methods:
++ */
++bool account_irqtime_to_task __read_mostly;
++static int __init setup_account_irqtime(char *str)
++{
++	account_irqtime_to_task = true;
++
++	return 0;
++}
++__setup("account-irqtime-to-task", setup_account_irqtime);
++
++static void update_rq_clock_task(struct rq *rq, s64 delta)
++{
++/*
++ * In theory, the compile should just see 0 here, and optimize out the call
++ * to sched_rt_avg_update. But I don't trust it...
++ */
++	s64 __maybe_unused steal = 0, irq_delta = 0;
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	if (account_irqtime_to_task)
++		goto out;
++
++	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
++
++	/*
++	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
++	 * this case when a previous update_rq_clock() happened inside a
++	 * {soft,}irq region.
++	 *
++	 * When this happens, we stop ->clock_task and only update the
++	 * prev_irq_time stamp to account for the part that fit, so that a next
++	 * update will consume the rest. This ensures ->clock_task is
++	 * monotonic.
++	 *
++	 * It does however cause some slight miss-attribution of {soft,}irq
++	 * time, a more accurate solution would be to update the irq_time using
++	 * the current rq->clock timestamp, except that would require using
++	 * atomic ops.
++	 */
++	if (irq_delta > delta)
++		irq_delta = delta;
++
++	rq->prev_irq_time += irq_delta;
++	delta -= irq_delta;
++
++out:
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	if (static_key_false((&paravirt_steal_rq_enabled))) {
++		steal = paravirt_steal_clock(cpu_of(rq));
++		steal -= rq->prev_steal_time_rq;
++
++		if (unlikely(steal > delta))
++			steal = delta;
++
++		rq->prev_steal_time_rq += steal;
++		delta -= steal;
++	}
++#endif
++
++	rq->clock_task += delta;
++
++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
++	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
++		update_irq_load_avg(rq, irq_delta + steal);
++#endif
++}
++
++void update_rq_clock(struct rq *rq)
++{
++	s64 delta;
++
++	lockdep_assert_held(&rq->lock);
++
++	if (rq->clock_update_flags & RQCF_ACT_SKIP)
++		return;
++
++#ifdef CONFIG_SCHED_DEBUG
++	if (sched_feat(WARN_DOUBLE_CLOCK))
++		SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED);
++	rq->clock_update_flags |= RQCF_UPDATED;
++#endif
++
++	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
++	if (delta < 0)
++		return;
++	rq->clock += delta;
++	update_rq_clock_task(rq, delta);
++}
++
++
++#ifdef CONFIG_SCHED_HRTICK
++/*
++ * Use HR-timers to deliver accurate preemption points.
++ */
++
++static void hrtick_clear(struct rq *rq)
++{
++	if (hrtimer_active(&rq->hrtick_timer))
++		hrtimer_cancel(&rq->hrtick_timer);
++}
++
++/*
++ * High-resolution timer tick.
++ * Runs from hardirq context with interrupts disabled.
++ */
++static enum hrtimer_restart hrtick(struct hrtimer *timer)
++{
++	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
++	struct rq_flags rf;
++
++	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
++
++	rq_lock(rq, &rf);
++	update_rq_clock(rq);
++	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
++	rq_unlock(rq, &rf);
++
++	return HRTIMER_NORESTART;
++}
++
++#ifdef CONFIG_SMP
++
++static void __hrtick_restart(struct rq *rq)
++{
++	struct hrtimer *timer = &rq->hrtick_timer;
++
++	hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
++}
++
++/*
++ * called from hardirq (IPI) context
++ */
++static void __hrtick_start(void *arg)
++{
++	struct rq *rq = arg;
++	struct rq_flags rf;
++
++	rq_lock(rq, &rf);
++	__hrtick_restart(rq);
++	rq->hrtick_csd_pending = 0;
++	rq_unlock(rq, &rf);
++}
++
++/*
++ * Called to set the hrtick timer state.
++ *
++ * called with rq->lock held and irqs disabled
++ */
++void hrtick_start(struct rq *rq, u64 delay)
++{
++	struct hrtimer *timer = &rq->hrtick_timer;
++	ktime_t time;
++	s64 delta;
++
++	/*
++	 * Don't schedule slices shorter than 10000ns, that just
++	 * doesn't make sense and can cause timer DoS.
++	 */
++	delta = max_t(s64, delay, 10000LL);
++	time = ktime_add_ns(timer->base->get_time(), delta);
++
++	hrtimer_set_expires(timer, time);
++
++	if (rq == this_rq()) {
++		__hrtick_restart(rq);
++	} else if (!rq->hrtick_csd_pending) {
++		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
++		rq->hrtick_csd_pending = 1;
++	}
++}
++
++#else
++/*
++ * Called to set the hrtick timer state.
++ *
++ * called with rq->lock held and irqs disabled
++ */
++void hrtick_start(struct rq *rq, u64 delay)
++{
++	/*
++	 * Don't schedule slices shorter than 10000ns, that just
++	 * doesn't make sense. Rely on vruntime for fairness.
++	 */
++	delay = max_t(u64, delay, 10000LL);
++	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
++		      HRTIMER_MODE_REL_PINNED);
++}
++#endif /* CONFIG_SMP */
++
++static void hrtick_rq_init(struct rq *rq)
++{
++#ifdef CONFIG_SMP
++	rq->hrtick_csd_pending = 0;
++
++	rq->hrtick_csd.flags = 0;
++	rq->hrtick_csd.func = __hrtick_start;
++	rq->hrtick_csd.info = rq;
++#endif
++
++	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	rq->hrtick_timer.function = hrtick;
++}
++#else	/* CONFIG_SCHED_HRTICK */
++static inline void hrtick_clear(struct rq *rq)
++{
++}
++
++static inline void hrtick_rq_init(struct rq *rq)
++{
++}
++#endif	/* CONFIG_SCHED_HRTICK */
++
++/*
++ * cmpxchg based fetch_or, macro so it works for different integer types
++ */
++#define fetch_or(ptr, mask)						\
++	({								\
++		typeof(ptr) _ptr = (ptr);				\
++		typeof(mask) _mask = (mask);				\
++		typeof(*_ptr) _old, _val = *_ptr;			\
++									\
++		for (;;) {						\
++			_old = cmpxchg(_ptr, _val, _val | _mask);	\
++			if (_old == _val)				\
++				break;					\
++			_val = _old;					\
++		}							\
++	_old;								\
++})
++
++#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
++/*
++ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
++ * this avoids any races wrt polling state changes and thereby avoids
++ * spurious IPIs.
++ */
++static bool set_nr_and_not_polling(struct task_struct *p)
++{
++	struct thread_info *ti = task_thread_info(p);
++	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
++}
++
++/*
++ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
++ *
++ * If this returns true, then the idle task promises to call
++ * sched_ttwu_pending() and reschedule soon.
++ */
++static bool set_nr_if_polling(struct task_struct *p)
++{
++	struct thread_info *ti = task_thread_info(p);
++	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
++
++	for (;;) {
++		if (!(val & _TIF_POLLING_NRFLAG))
++			return false;
++		if (val & _TIF_NEED_RESCHED)
++			return true;
++		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
++		if (old == val)
++			break;
++		val = old;
++	}
++	return true;
++}
++
++#else
++static bool set_nr_and_not_polling(struct task_struct *p)
++{
++	set_tsk_need_resched(p);
++	return true;
++}
++
++#ifdef CONFIG_SMP
++static bool set_nr_if_polling(struct task_struct *p)
++{
++	return false;
++}
++#endif
++#endif
++
++/**
++ * wake_q_add() - queue a wakeup for 'later' waking.
++ * @head: the wake_q_head to add @task to
++ * @task: the task to queue for 'later' wakeup
++ *
++ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
++ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
++ * instantly.
++ *
++ * This function must be used as-if it were wake_up_process(); IOW the task
++ * must be ready to be woken at this location.
++ */
++void wake_q_add(struct wake_q_head *head, struct task_struct *task)
++{
++	struct wake_q_node *node = &task->wake_q;
++
++	/*
++	 * Atomically grab the task, if ->wake_q is !nil already it means
++	 * its already queued (either by us or someone else) and will get the
++	 * wakeup due to that.
++	 *
++	 * In order to ensure that a pending wakeup will observe our pending
++	 * state, even in the failed case, an explicit smp_mb() must be used.
++	 */
++	smp_mb__before_atomic();
++	if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
++		return;
++
++	get_task_struct(task);
++
++	/*
++	 * The head is context local, there can be no concurrency.
++	 */
++	*head->lastp = node;
++	head->lastp = &node->next;
++}
++
++void wake_up_q(struct wake_q_head *head)
++{
++	struct wake_q_node *node = head->first;
++
++	while (node != WAKE_Q_TAIL) {
++		struct task_struct *task;
++
++		task = container_of(node, struct task_struct, wake_q);
++		BUG_ON(!task);
++		/* Task can safely be re-inserted now: */
++		node = node->next;
++		task->wake_q.next = NULL;
++
++		/*
++		 * wake_up_process() executes a full barrier, which pairs with
++		 * the queueing in wake_q_add() so as not to miss wakeups.
++		 */
++		wake_up_process(task);
++		put_task_struct(task);
++	}
++}
++
++/*
++ * resched_curr - mark rq's current task 'to be rescheduled now'.
++ *
++ * On UP this means the setting of the need_resched flag, on SMP it
++ * might also involve a cross-CPU call to trigger the scheduler on
++ * the target CPU.
++ */
++void resched_curr(struct rq *rq)
++{
++	struct task_struct *curr = rq->curr;
++	int cpu;
++
++	lockdep_assert_held(&rq->lock);
++
++	if (test_tsk_need_resched(curr))
++		return;
++
++	cpu = cpu_of(rq);
++
++	if (cpu == smp_processor_id()) {
++		set_tsk_need_resched(curr);
++		set_preempt_need_resched();
++		return;
++	}
++
++	if (set_nr_and_not_polling(curr))
++		smp_send_reschedule(cpu);
++	else
++		trace_sched_wake_idle_without_ipi(cpu);
++}
++
++void resched_cpu(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&rq->lock, flags);
++	if (cpu_online(cpu) || cpu == smp_processor_id())
++		resched_curr(rq);
++	raw_spin_unlock_irqrestore(&rq->lock, flags);
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_NO_HZ_COMMON
++/*
++ * In the semi idle case, use the nearest busy CPU for migrating timers
++ * from an idle CPU.  This is good for power-savings.
++ *
++ * We don't do similar optimization for completely idle system, as
++ * selecting an idle CPU will add more delays to the timers than intended
++ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
++ */
++int get_nohz_timer_target(void)
++{
++	int i, cpu = smp_processor_id();
++	struct sched_domain *sd;
++
++	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
++		return cpu;
++
++	rcu_read_lock();
++	for_each_domain(cpu, sd) {
++		for_each_cpu(i, sched_domain_span(sd)) {
++			if (cpu == i)
++				continue;
++
++			if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
++				cpu = i;
++				goto unlock;
++			}
++		}
++	}
++
++	if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
++		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
++unlock:
++	rcu_read_unlock();
++	return cpu;
++}
++
++/*
++ * When add_timer_on() enqueues a timer into the timer wheel of an
++ * idle CPU then this timer might expire before the next timer event
++ * which is scheduled to wake up that CPU. In case of a completely
++ * idle system the next event might even be infinite time into the
++ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
++ * leaves the inner idle loop so the newly added timer is taken into
++ * account when the CPU goes back to idle and evaluates the timer
++ * wheel for the next timer event.
++ */
++static void wake_up_idle_cpu(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++
++	if (cpu == smp_processor_id())
++		return;
++
++	if (set_nr_and_not_polling(rq->idle))
++		smp_send_reschedule(cpu);
++	else
++		trace_sched_wake_idle_without_ipi(cpu);
++}
++
++static bool wake_up_full_nohz_cpu(int cpu)
++{
++	/*
++	 * We just need the target to call irq_exit() and re-evaluate
++	 * the next tick. The nohz full kick at least implies that.
++	 * If needed we can still optimize that later with an
++	 * empty IRQ.
++	 */
++	if (cpu_is_offline(cpu))
++		return true;  /* Don't try to wake offline CPUs. */
++	if (tick_nohz_full_cpu(cpu)) {
++		if (cpu != smp_processor_id() ||
++		    tick_nohz_tick_stopped())
++			tick_nohz_full_kick_cpu(cpu);
++		return true;
++	}
++
++	return false;
++}
++
++/*
++ * Wake up the specified CPU.  If the CPU is going offline, it is the
++ * caller's responsibility to deal with the lost wakeup, for example,
++ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
++ */
++void wake_up_nohz_cpu(int cpu)
++{
++	if (!wake_up_full_nohz_cpu(cpu))
++		wake_up_idle_cpu(cpu);
++}
++
++static inline bool got_nohz_idle_kick(void)
++{
++	int cpu = smp_processor_id();
++
++	if (!(atomic_read(nohz_flags(cpu)) & NOHZ_KICK_MASK))
++		return false;
++
++	if (idle_cpu(cpu) && !need_resched())
++		return true;
++
++	/*
++	 * We can't run Idle Load Balance on this CPU for this time so we
++	 * cancel it and clear NOHZ_BALANCE_KICK
++	 */
++	atomic_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
++	return false;
++}
++
++#else /* CONFIG_NO_HZ_COMMON */
++
++static inline bool got_nohz_idle_kick(void)
++{
++	return false;
++}
++
++#endif /* CONFIG_NO_HZ_COMMON */
++
++#ifdef CONFIG_NO_HZ_FULL
++bool sched_can_stop_tick(struct rq *rq)
++{
++	int fifo_nr_running;
++
++	/* Deadline tasks, even if single, need the tick */
++	if (rq->dl.dl_nr_running)
++		return false;
++
++	/*
++	 * If there are more than one RR tasks, we need the tick to effect the
++	 * actual RR behaviour.
++	 */
++	if (rq->rt.rr_nr_running) {
++		if (rq->rt.rr_nr_running == 1)
++			return true;
++		else
++			return false;
++	}
++
++	/*
++	 * If there's no RR tasks, but FIFO tasks, we can skip the tick, no
++	 * forced preemption between FIFO tasks.
++	 */
++	fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
++	if (fifo_nr_running)
++		return true;
++
++	/*
++	 * If there are no DL,RR/FIFO tasks, there must only be CFS tasks left;
++	 * if there's more than one we need the tick for involuntary
++	 * preemption.
++	 */
++	if (rq->nr_running > 1)
++		return false;
++
++	return true;
++}
++#endif /* CONFIG_NO_HZ_FULL */
++#endif /* CONFIG_SMP */
++
++#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
++			(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
++/*
++ * Iterate task_group tree rooted at *from, calling @down when first entering a
++ * node and @up when leaving it for the final time.
++ *
++ * Caller must hold rcu_lock or sufficient equivalent.
++ */
++int walk_tg_tree_from(struct task_group *from,
++			     tg_visitor down, tg_visitor up, void *data)
++{
++	struct task_group *parent, *child;
++	int ret;
++
++	parent = from;
++
++down:
++	ret = (*down)(parent, data);
++	if (ret)
++		goto out;
++	list_for_each_entry_rcu(child, &parent->children, siblings) {
++		parent = child;
++		goto down;
++
++up:
++		continue;
++	}
++	ret = (*up)(parent, data);
++	if (ret || parent == from)
++		goto out;
++
++	child = parent;
++	parent = parent->parent;
++	if (parent)
++		goto up;
++out:
++	return ret;
++}
++
++int tg_nop(struct task_group *tg, void *data)
++{
++	return 0;
++}
++#endif
++
++static void set_load_weight(struct task_struct *p, bool update_load)
++{
++	int prio = p->static_prio - MAX_RT_PRIO;
++	struct load_weight *load = &p->se.load;
++
++	/*
++	 * SCHED_IDLE tasks get minimal weight:
++	 */
++	if (idle_policy(p->policy)) {
++		load->weight = scale_load(WEIGHT_IDLEPRIO);
++		load->inv_weight = WMULT_IDLEPRIO;
++		return;
++	}
++
++	/*
++	 * SCHED_OTHER tasks have to update their load when changing their
++	 * weight
++	 */
++	if (update_load && p->sched_class == &fair_sched_class) {
++		reweight_task(p, prio);
++	} else {
++		load->weight = scale_load(sched_prio_to_weight[prio]);
++		load->inv_weight = sched_prio_to_wmult[prio];
++	}
++}
++
++static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
++{
++	if (!(flags & ENQUEUE_NOCLOCK))
++		update_rq_clock(rq);
++
++	if (!(flags & ENQUEUE_RESTORE))
++		sched_info_queued(rq, p);
++
++	p->sched_class->enqueue_task(rq, p, flags);
++}
++
++static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
++{
++	if (!(flags & DEQUEUE_NOCLOCK))
++		update_rq_clock(rq);
++
++	if (!(flags & DEQUEUE_SAVE))
++		sched_info_dequeued(rq, p);
++
++	p->sched_class->dequeue_task(rq, p, flags);
++}
++
++void activate_task(struct rq *rq, struct task_struct *p, int flags)
++{
++	if (task_contributes_to_load(p))
++		rq->nr_uninterruptible--;
++
++	enqueue_task(rq, p, flags);
++}
++
++void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
++{
++	if (task_contributes_to_load(p))
++		rq->nr_uninterruptible++;
++
++	dequeue_task(rq, p, flags);
++}
++
++/*
++ * __normal_prio - return the priority that is based on the static prio
++ */
++static inline int __normal_prio(struct task_struct *p)
++{
++	return p->static_prio;
++}
++
++/*
++ * Calculate the expected normal priority: i.e. priority
++ * without taking RT-inheritance into account. Might be
++ * boosted by interactivity modifiers. Changes upon fork,
++ * setprio syscalls, and whenever the interactivity
++ * estimator recalculates.
++ */
++static inline int normal_prio(struct task_struct *p)
++{
++	int prio;
++
++	if (task_has_dl_policy(p))
++		prio = MAX_DL_PRIO-1;
++	else if (task_has_rt_policy(p))
++		prio = MAX_RT_PRIO-1 - p->rt_priority;
++	else
++		prio = __normal_prio(p);
++	return prio;
++}
++
++/*
++ * Calculate the current priority, i.e. the priority
++ * taken into account by the scheduler. This value might
++ * be boosted by RT tasks, or might be boosted by
++ * interactivity modifiers. Will be RT if the task got
++ * RT-boosted. If not then it returns p->normal_prio.
++ */
++static int effective_prio(struct task_struct *p)
++{
++	p->normal_prio = normal_prio(p);
++	/*
++	 * If we are RT tasks or we were boosted to RT priority,
++	 * keep the priority unchanged. Otherwise, update priority
++	 * to the normal priority:
++	 */
++	if (!rt_prio(p->prio))
++		return p->normal_prio;
++	return p->prio;
++}
++
++/**
++ * task_curr - is this task currently executing on a CPU?
++ * @p: the task in question.
++ *
++ * Return: 1 if the task is currently executing. 0 otherwise.
++ */
++inline int task_curr(const struct task_struct *p)
++{
++	return cpu_curr(task_cpu(p)) == p;
++}
++
++/*
++ * switched_from, switched_to and prio_changed must _NOT_ drop rq->lock,
++ * use the balance_callback list if you want balancing.
++ *
++ * this means any call to check_class_changed() must be followed by a call to
++ * balance_callback().
++ */
++static inline void check_class_changed(struct rq *rq, struct task_struct *p,
++				       const struct sched_class *prev_class,
++				       int oldprio)
++{
++	if (prev_class != p->sched_class) {
++		if (prev_class->switched_from)
++			prev_class->switched_from(rq, p);
++
++		p->sched_class->switched_to(rq, p);
++	} else if (oldprio != p->prio || dl_task(p))
++		p->sched_class->prio_changed(rq, p, oldprio);
++}
++
++void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
++{
++	const struct sched_class *class;
++
++	if (p->sched_class == rq->curr->sched_class) {
++		rq->curr->sched_class->check_preempt_curr(rq, p, flags);
++	} else {
++		for_each_class(class) {
++			if (class == rq->curr->sched_class)
++				break;
++			if (class == p->sched_class) {
++				resched_curr(rq);
++				break;
++			}
++		}
++	}
++
++	/*
++	 * A queue event has occurred, and we're going to schedule.  In
++	 * this case, we can save a useless back to back clock update.
++	 */
++	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
++		rq_clock_skip_update(rq);
++}
++
++#ifdef CONFIG_SMP
++
++static inline bool is_per_cpu_kthread(struct task_struct *p)
++{
++	if (!(p->flags & PF_KTHREAD))
++		return false;
++
++	if (p->nr_cpus_allowed != 1)
++		return false;
++
++	return true;
++}
++
++/*
++ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
++ * __set_cpus_allowed_ptr() and select_fallback_rq().
++ */
++static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
++{
++	if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
++		return false;
++
++	if (is_per_cpu_kthread(p))
++		return cpu_online(cpu);
++
++	return cpu_active(cpu);
++}
++
++/*
++ * This is how migration works:
++ *
++ * 1) we invoke migration_cpu_stop() on the target CPU using
++ *    stop_one_cpu().
++ * 2) stopper starts to run (implicitly forcing the migrated thread
++ *    off the CPU)
++ * 3) it checks whether the migrated task is still in the wrong runqueue.
++ * 4) if it's in the wrong runqueue then the migration thread removes
++ *    it and puts it into the right queue.
++ * 5) stopper completes and stop_one_cpu() returns and the migration
++ *    is done.
++ */
++
++/*
++ * move_queued_task - move a queued task to new rq.
++ *
++ * Returns (locked) new rq. Old rq's lock is released.
++ */
++static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
++				   struct task_struct *p, int new_cpu)
++{
++	lockdep_assert_held(&rq->lock);
++
++	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
++	dequeue_task(rq, p, DEQUEUE_NOCLOCK);
++	set_task_cpu(p, new_cpu);
++	rq_unlock(rq, rf);
++
++	rq = cpu_rq(new_cpu);
++
++	rq_lock(rq, rf);
++	BUG_ON(task_cpu(p) != new_cpu);
++	enqueue_task(rq, p, 0);
++	p->on_rq = TASK_ON_RQ_QUEUED;
++	check_preempt_curr(rq, p, 0);
++
++	return rq;
++}
++
++struct migration_arg {
++	struct task_struct *task;
++	int dest_cpu;
++};
++
++/*
++ * Move (not current) task off this CPU, onto the destination CPU. We're doing
++ * this because either it can't run here any more (set_cpus_allowed()
++ * away from this CPU, or CPU going down), or because we're
++ * attempting to rebalance this task on exec (sched_exec).
++ *
++ * So we race with normal scheduler movements, but that's OK, as long
++ * as the task is no longer on this CPU.
++ */
++static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
++				 struct task_struct *p, int dest_cpu)
++{
++	/* Affinity changed (again). */
++	if (!is_cpu_allowed(p, dest_cpu))
++		return rq;
++
++	update_rq_clock(rq);
++	rq = move_queued_task(rq, rf, p, dest_cpu);
++
++	return rq;
++}
++
++/*
++ * migration_cpu_stop - this will be executed by a highprio stopper thread
++ * and performs thread migration by bumping thread off CPU then
++ * 'pushing' onto another runqueue.
++ */
++static int migration_cpu_stop(void *data)
++{
++	struct migration_arg *arg = data;
++	struct task_struct *p = arg->task;
++	struct rq *rq = this_rq();
++	struct rq_flags rf;
++
++	/*
++	 * The original target CPU might have gone down and we might
++	 * be on another CPU but it doesn't matter.
++	 */
++	local_irq_disable();
++	/*
++	 * We need to explicitly wake pending tasks before running
++	 * __migrate_task() such that we will not miss enforcing cpus_allowed
++	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
++	 */
++	sched_ttwu_pending();
++
++	raw_spin_lock(&p->pi_lock);
++	rq_lock(rq, &rf);
++	/*
++	 * If task_rq(p) != rq, it cannot be migrated here, because we're
++	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
++	 * we're holding p->pi_lock.
++	 */
++	if (task_rq(p) == rq) {
++		if (task_on_rq_queued(p))
++			rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
++		else
++			p->wake_cpu = arg->dest_cpu;
++	}
++	rq_unlock(rq, &rf);
++	raw_spin_unlock(&p->pi_lock);
++
++	local_irq_enable();
++	return 0;
++}
++
++/*
++ * sched_class::set_cpus_allowed must do the below, but is not required to
++ * actually call this function.
++ */
++void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
++{
++	cpumask_copy(&p->cpus_allowed, new_mask);
++	p->nr_cpus_allowed = cpumask_weight(new_mask);
++}
++
++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
++{
++	struct rq *rq = task_rq(p);
++	bool queued, running;
++
++	lockdep_assert_held(&p->pi_lock);
++
++	queued = task_on_rq_queued(p);
++	running = task_current(rq, p);
++
++	if (queued) {
++		/*
++		 * Because __kthread_bind() calls this on blocked tasks without
++		 * holding rq->lock.
++		 */
++		lockdep_assert_held(&rq->lock);
++		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
++	}
++	if (running)
++		put_prev_task(rq, p);
++
++	p->sched_class->set_cpus_allowed(p, new_mask);
++
++	if (queued)
++		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
++	if (running)
++		set_curr_task(rq, p);
++}
++
++/*
++ * Change a given task's CPU affinity. Migrate the thread to a
++ * proper CPU and schedule it away if the CPU it's executing on
++ * is removed from the allowed bitmask.
++ *
++ * NOTE: the caller must have a valid reference to the task, the
++ * task must not exit() & deallocate itself prematurely. The
++ * call is not atomic; no spinlocks may be held.
++ */
++static int __set_cpus_allowed_ptr(struct task_struct *p,
++				  const struct cpumask *new_mask, bool check)
++{
++	const struct cpumask *cpu_valid_mask = cpu_active_mask;
++	unsigned int dest_cpu;
++	struct rq_flags rf;
++	struct rq *rq;
++	int ret = 0;
++
++	rq = task_rq_lock(p, &rf);
++	update_rq_clock(rq);
++
++	if (p->flags & PF_KTHREAD) {
++		/*
++		 * Kernel threads are allowed on online && !active CPUs
++		 */
++		cpu_valid_mask = cpu_online_mask;
++	}
++
++	/*
++	 * Must re-check here, to close a race against __kthread_bind(),
++	 * sched_setaffinity() is not guaranteed to observe the flag.
++	 */
++	if (check && (p->flags & PF_NO_SETAFFINITY)) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (cpumask_equal(&p->cpus_allowed, new_mask))
++		goto out;
++
++	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
++	if (dest_cpu >= nr_cpu_ids) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	do_set_cpus_allowed(p, new_mask);
++
++	if (p->flags & PF_KTHREAD) {
++		/*
++		 * For kernel threads that do indeed end up on online &&
++		 * !active we want to ensure they are strict per-CPU threads.
++		 */
++		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
++			!cpumask_intersects(new_mask, cpu_active_mask) &&
++			p->nr_cpus_allowed != 1);
++	}
++
++	/* Can the task run on the task's current CPU? If so, we're done */
++	if (cpumask_test_cpu(task_cpu(p), new_mask))
++		goto out;
++
++	if (task_running(rq, p) || p->state == TASK_WAKING) {
++		struct migration_arg arg = { p, dest_cpu };
++		/* Need help from migration thread: drop lock and wait. */
++		task_rq_unlock(rq, p, &rf);
++		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
++		tlb_migrate_finish(p->mm);
++		return 0;
++	} else if (task_on_rq_queued(p)) {
++		/*
++		 * OK, since we're going to drop the lock immediately
++		 * afterwards anyway.
++		 */
++		rq = move_queued_task(rq, &rf, p, dest_cpu);
++	}
++out:
++	task_rq_unlock(rq, p, &rf);
++
++	return ret;
++}
++
++int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
++{
++	return __set_cpus_allowed_ptr(p, new_mask, false);
++}
++EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
++
++void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
++{
++#ifdef CONFIG_SCHED_DEBUG
++	/*
++	 * We should never call set_task_cpu() on a blocked task,
++	 * ttwu() will sort out the placement.
++	 */
++	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
++			!p->on_rq);
++
++	/*
++	 * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
++	 * because schedstat_wait_{start,end} rebase migrating task's wait_start
++	 * time relying on p->on_rq.
++	 */
++	WARN_ON_ONCE(p->state == TASK_RUNNING &&
++		     p->sched_class == &fair_sched_class &&
++		     (p->on_rq && !task_on_rq_migrating(p)));
++
++#ifdef CONFIG_LOCKDEP
++	/*
++	 * The caller should hold either p->pi_lock or rq->lock, when changing
++	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
++	 *
++	 * sched_move_task() holds both and thus holding either pins the cgroup,
++	 * see task_group().
++	 *
++	 * Furthermore, all task_rq users should acquire both locks, see
++	 * task_rq_lock().
++	 */
++	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
++				      lockdep_is_held(&task_rq(p)->lock)));
++#endif
++	/*
++	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
++	 */
++	WARN_ON_ONCE(!cpu_online(new_cpu));
++#endif
++
++	trace_sched_migrate_task(p, new_cpu);
++
++	if (task_cpu(p) != new_cpu) {
++		if (p->sched_class->migrate_task_rq)
++			p->sched_class->migrate_task_rq(p, new_cpu);
++		p->se.nr_migrations++;
++		rseq_migrate(p);
++		perf_event_task_migrate(p);
++	}
++
++	__set_task_cpu(p, new_cpu);
++}
++
++#ifdef CONFIG_NUMA_BALANCING
++static void __migrate_swap_task(struct task_struct *p, int cpu)
++{
++	if (task_on_rq_queued(p)) {
++		struct rq *src_rq, *dst_rq;
++		struct rq_flags srf, drf;
++
++		src_rq = task_rq(p);
++		dst_rq = cpu_rq(cpu);
++
++		rq_pin_lock(src_rq, &srf);
++		rq_pin_lock(dst_rq, &drf);
++
++		p->on_rq = TASK_ON_RQ_MIGRATING;
++		deactivate_task(src_rq, p, 0);
++		set_task_cpu(p, cpu);
++		activate_task(dst_rq, p, 0);
++		p->on_rq = TASK_ON_RQ_QUEUED;
++		check_preempt_curr(dst_rq, p, 0);
++
++		rq_unpin_lock(dst_rq, &drf);
++		rq_unpin_lock(src_rq, &srf);
++
++	} else {
++		/*
++		 * Task isn't running anymore; make it appear like we migrated
++		 * it before it went to sleep. This means on wakeup we make the
++		 * previous CPU our target instead of where it really is.
++		 */
++		p->wake_cpu = cpu;
++	}
++}
++
++struct migration_swap_arg {
++	struct task_struct *src_task, *dst_task;
++	int src_cpu, dst_cpu;
++};
++
++static int migrate_swap_stop(void *data)
++{
++	struct migration_swap_arg *arg = data;
++	struct rq *src_rq, *dst_rq;
++	int ret = -EAGAIN;
++
++	if (!cpu_active(arg->src_cpu) || !cpu_active(arg->dst_cpu))
++		return -EAGAIN;
++
++	src_rq = cpu_rq(arg->src_cpu);
++	dst_rq = cpu_rq(arg->dst_cpu);
++
++	double_raw_lock(&arg->src_task->pi_lock,
++			&arg->dst_task->pi_lock);
++	double_rq_lock(src_rq, dst_rq);
++
++	if (task_cpu(arg->dst_task) != arg->dst_cpu)
++		goto unlock;
++
++	if (task_cpu(arg->src_task) != arg->src_cpu)
++		goto unlock;
++
++	if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
++		goto unlock;
++
++	if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
++		goto unlock;
++
++	__migrate_swap_task(arg->src_task, arg->dst_cpu);
++	__migrate_swap_task(arg->dst_task, arg->src_cpu);
++
++	ret = 0;
++
++unlock:
++	double_rq_unlock(src_rq, dst_rq);
++	raw_spin_unlock(&arg->dst_task->pi_lock);
++	raw_spin_unlock(&arg->src_task->pi_lock);
++
++	return ret;
++}
++
++/*
++ * Cross migrate two tasks
++ */
++int migrate_swap(struct task_struct *cur, struct task_struct *p,
++		int target_cpu, int curr_cpu)
++{
++	struct migration_swap_arg arg;
++	int ret = -EINVAL;
++
++	arg = (struct migration_swap_arg){
++		.src_task = cur,
++		.src_cpu = curr_cpu,
++		.dst_task = p,
++		.dst_cpu = target_cpu,
++	};
++
++	if (arg.src_cpu == arg.dst_cpu)
++		goto out;
++
++	/*
++	 * These three tests are all lockless; this is OK since all of them
++	 * will be re-checked with proper locks held further down the line.
++	 */
++	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
++		goto out;
++
++	if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
++		goto out;
++
++	if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
++		goto out;
++
++	trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
++	ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
++
++out:
++	return ret;
++}
++#endif /* CONFIG_NUMA_BALANCING */
++
++/*
++ * wait_task_inactive - wait for a thread to unschedule.
++ *
++ * If @match_state is nonzero, it's the @p->state value just checked and
++ * not expected to change.  If it changes, i.e. @p might have woken up,
++ * then return zero.  When we succeed in waiting for @p to be off its CPU,
++ * we return a positive number (its total switch count).  If a second call
++ * a short while later returns the same number, the caller can be sure that
++ * @p has remained unscheduled the whole time.
++ *
++ * The caller must ensure that the task *will* unschedule sometime soon,
++ * else this function might spin for a *long* time. This function can't
++ * be called with interrupts off, or it may introduce deadlock with
++ * smp_call_function() if an IPI is sent by the same process we are
++ * waiting to become inactive.
++ */
++unsigned long wait_task_inactive(struct task_struct *p, long match_state)
++{
++	int running, queued;
++	struct rq_flags rf;
++	unsigned long ncsw;
++	struct rq *rq;
++
++	for (;;) {
++		/*
++		 * We do the initial early heuristics without holding
++		 * any task-queue locks at all. We'll only try to get
++		 * the runqueue lock when things look like they will
++		 * work out!
++		 */
++		rq = task_rq(p);
++
++		/*
++		 * If the task is actively running on another CPU
++		 * still, just relax and busy-wait without holding
++		 * any locks.
++		 *
++		 * NOTE! Since we don't hold any locks, it's not
++		 * even sure that "rq" stays as the right runqueue!
++		 * But we don't care, since "task_running()" will
++		 * return false if the runqueue has changed and p
++		 * is actually now running somewhere else!
++		 */
++		while (task_running(rq, p)) {
++			if (match_state && unlikely(p->state != match_state))
++				return 0;
++			cpu_relax();
++		}
++
++		/*
++		 * Ok, time to look more closely! We need the rq
++		 * lock now, to be *sure*. If we're wrong, we'll
++		 * just go back and repeat.
++		 */
++		rq = task_rq_lock(p, &rf);
++		trace_sched_wait_task(p);
++		running = task_running(rq, p);
++		queued = task_on_rq_queued(p);
++		ncsw = 0;
++		if (!match_state || p->state == match_state)
++			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
++		task_rq_unlock(rq, p, &rf);
++
++		/*
++		 * If it changed from the expected state, bail out now.
++		 */
++		if (unlikely(!ncsw))
++			break;
++
++		/*
++		 * Was it really running after all now that we
++		 * checked with the proper locks actually held?
++		 *
++		 * Oops. Go back and try again..
++		 */
++		if (unlikely(running)) {
++			cpu_relax();
++			continue;
++		}
++
++		/*
++		 * It's not enough that it's not actively running,
++		 * it must be off the runqueue _entirely_, and not
++		 * preempted!
++		 *
++		 * So if it was still runnable (but just not actively
++		 * running right now), it's preempted, and we should
++		 * yield - it could be a while.
++		 */
++		if (unlikely(queued)) {
++			ktime_t to = NSEC_PER_SEC / HZ;
++
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++			continue;
++		}
++
++		/*
++		 * Ahh, all good. It wasn't running, and it wasn't
++		 * runnable, which means that it will never become
++		 * running in the future either. We're all done!
++		 */
++		break;
++	}
++
++	return ncsw;
++}
++
++/***
++ * kick_process - kick a running thread to enter/exit the kernel
++ * @p: the to-be-kicked thread
++ *
++ * Cause a process which is running on another CPU to enter
++ * kernel-mode, without any delay. (to get signals handled.)
++ *
++ * NOTE: this function doesn't have to take the runqueue lock,
++ * because all it wants to ensure is that the remote task enters
++ * the kernel. If the IPI races and the task has been migrated
++ * to another CPU then no harm is done and the purpose has been
++ * achieved as well.
++ */
++void kick_process(struct task_struct *p)
++{
++	int cpu;
++
++	preempt_disable();
++	cpu = task_cpu(p);
++	if ((cpu != smp_processor_id()) && task_curr(p))
++		smp_send_reschedule(cpu);
++	preempt_enable();
++}
++EXPORT_SYMBOL_GPL(kick_process);
++
++/*
++ * ->cpus_allowed is protected by both rq->lock and p->pi_lock
++ *
++ * A few notes on cpu_active vs cpu_online:
++ *
++ *  - cpu_active must be a subset of cpu_online
++ *
++ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
++ *    see __set_cpus_allowed_ptr(). At this point the newly online
++ *    CPU isn't yet part of the sched domains, and balancing will not
++ *    see it.
++ *
++ *  - on CPU-down we clear cpu_active() to mask the sched domains and
++ *    avoid the load balancer to place new tasks on the to be removed
++ *    CPU. Existing tasks will remain running there and will be taken
++ *    off.
++ *
++ * This means that fallback selection must not select !active CPUs.
++ * And can assume that any active CPU must be online. Conversely
++ * select_task_rq() below may allow selection of !active CPUs in order
++ * to satisfy the above rules.
++ */
++static int select_fallback_rq(int cpu, struct task_struct *p)
++{
++	int nid = cpu_to_node(cpu);
++	const struct cpumask *nodemask = NULL;
++	enum { cpuset, possible, fail } state = cpuset;
++	int dest_cpu;
++
++	/*
++	 * If the node that the CPU is on has been offlined, cpu_to_node()
++	 * will return -1. There is no CPU on the node, and we should
++	 * select the CPU on the other node.
++	 */
++	if (nid != -1) {
++		nodemask = cpumask_of_node(nid);
++
++		/* Look for allowed, online CPU in same node. */
++		for_each_cpu(dest_cpu, nodemask) {
++			if (!cpu_active(dest_cpu))
++				continue;
++			if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++				return dest_cpu;
++		}
++	}
++
++	for (;;) {
++		/* Any allowed, online CPU? */
++		for_each_cpu(dest_cpu, &p->cpus_allowed) {
++			if (!is_cpu_allowed(p, dest_cpu))
++				continue;
++
++			goto out;
++		}
++
++		/* No more Mr. Nice Guy. */
++		switch (state) {
++		case cpuset:
++			if (IS_ENABLED(CONFIG_CPUSETS)) {
++				cpuset_cpus_allowed_fallback(p);
++				state = possible;
++				break;
++			}
++			/* Fall-through */
++		case possible:
++			do_set_cpus_allowed(p, cpu_possible_mask);
++			state = fail;
++			break;
++
++		case fail:
++			BUG();
++			break;
++		}
++	}
++
++out:
++	if (state != cpuset) {
++		/*
++		 * Don't tell them about moving exiting tasks or
++		 * kernel threads (both mm NULL), since they never
++		 * leave kernel.
++		 */
++		if (p->mm && printk_ratelimit()) {
++			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
++					task_pid_nr(p), p->comm, cpu);
++		}
++	}
++
++	return dest_cpu;
++}
++
++/*
++ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
++ */
++static inline
++int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
++{
++	lockdep_assert_held(&p->pi_lock);
++
++	if (p->nr_cpus_allowed > 1)
++		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
++	else
++		cpu = cpumask_any(&p->cpus_allowed);
++
++	/*
++	 * In order not to call set_task_cpu() on a blocking task we need
++	 * to rely on ttwu() to place the task on a valid ->cpus_allowed
++	 * CPU.
++	 *
++	 * Since this is common to all placement strategies, this lives here.
++	 *
++	 * [ this allows ->select_task() to simply return task_cpu(p) and
++	 *   not worry about this generic constraint ]
++	 */
++	if (unlikely(!is_cpu_allowed(p, cpu)))
++		cpu = select_fallback_rq(task_cpu(p), p);
++
++	return cpu;
++}
++
++static void update_avg(u64 *avg, u64 sample)
++{
++	s64 diff = sample - *avg;
++	*avg += diff >> 3;
++}
++
++void sched_set_stop_task(int cpu, struct task_struct *stop)
++{
++	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
++	struct task_struct *old_stop = cpu_rq(cpu)->stop;
++
++	if (stop) {
++		/*
++		 * Make it appear like a SCHED_FIFO task, its something
++		 * userspace knows about and won't get confused about.
++		 *
++		 * Also, it will make PI more or less work without too
++		 * much confusion -- but then, stop work should not
++		 * rely on PI working anyway.
++		 */
++		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
++
++		stop->sched_class = &stop_sched_class;
++	}
++
++	cpu_rq(cpu)->stop = stop;
++
++	if (old_stop) {
++		/*
++		 * Reset it back to a normal scheduling class so that
++		 * it can die in pieces.
++		 */
++		old_stop->sched_class = &rt_sched_class;
++	}
++}
++
++#else
++
++static inline int __set_cpus_allowed_ptr(struct task_struct *p,
++					 const struct cpumask *new_mask, bool check)
++{
++	return set_cpus_allowed_ptr(p, new_mask);
++}
++
++#endif /* CONFIG_SMP */
++
++static void
++ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
++{
++	struct rq *rq;
++
++	if (!schedstat_enabled())
++		return;
++
++	rq = this_rq();
++
++#ifdef CONFIG_SMP
++	if (cpu == rq->cpu) {
++		__schedstat_inc(rq->ttwu_local);
++		__schedstat_inc(p->se.statistics.nr_wakeups_local);
++	} else {
++		struct sched_domain *sd;
++
++		__schedstat_inc(p->se.statistics.nr_wakeups_remote);
++		rcu_read_lock();
++		for_each_domain(rq->cpu, sd) {
++			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
++				__schedstat_inc(sd->ttwu_wake_remote);
++				break;
++			}
++		}
++		rcu_read_unlock();
++	}
++
++	if (wake_flags & WF_MIGRATED)
++		__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
++#endif /* CONFIG_SMP */
++
++	__schedstat_inc(rq->ttwu_count);
++	__schedstat_inc(p->se.statistics.nr_wakeups);
++
++	if (wake_flags & WF_SYNC)
++		__schedstat_inc(p->se.statistics.nr_wakeups_sync);
++}
++
++static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
++{
++	activate_task(rq, p, en_flags);
++	p->on_rq = TASK_ON_RQ_QUEUED;
++
++	/* If a worker is waking up, notify the workqueue: */
++	if (p->flags & PF_WQ_WORKER)
++		wq_worker_waking_up(p, cpu_of(rq));
++}
++
++/*
++ * Mark the task runnable and perform wakeup-preemption.
++ */
++static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
++			   struct rq_flags *rf)
++{
++	check_preempt_curr(rq, p, wake_flags);
++	p->state = TASK_RUNNING;
++	trace_sched_wakeup(p);
++
++#ifdef CONFIG_SMP
++	if (p->sched_class->task_woken) {
++		/*
++		 * Our task @p is fully woken up and running; so its safe to
++		 * drop the rq->lock, hereafter rq is only used for statistics.
++		 */
++		rq_unpin_lock(rq, rf);
++		p->sched_class->task_woken(rq, p);
++		rq_repin_lock(rq, rf);
++	}
++
++	if (rq->idle_stamp) {
++		u64 delta = rq_clock(rq) - rq->idle_stamp;
++		u64 max = 2*rq->max_idle_balance_cost;
++
++		update_avg(&rq->avg_idle, delta);
++
++		if (rq->avg_idle > max)
++			rq->avg_idle = max;
++
++		rq->idle_stamp = 0;
++	}
++#endif
++}
++
++static void
++ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
++		 struct rq_flags *rf)
++{
++	int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
++
++	lockdep_assert_held(&rq->lock);
++
++#ifdef CONFIG_SMP
++	if (p->sched_contributes_to_load)
++		rq->nr_uninterruptible--;
++
++	if (wake_flags & WF_MIGRATED)
++		en_flags |= ENQUEUE_MIGRATED;
++#endif
++
++	ttwu_activate(rq, p, en_flags);
++	ttwu_do_wakeup(rq, p, wake_flags, rf);
++}
++
++/*
++ * Called in case the task @p isn't fully descheduled from its runqueue,
++ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
++ * since all we need to do is flip p->state to TASK_RUNNING, since
++ * the task is still ->on_rq.
++ */
++static int ttwu_remote(struct task_struct *p, int wake_flags)
++{
++	struct rq_flags rf;
++	struct rq *rq;
++	int ret = 0;
++
++	rq = __task_rq_lock(p, &rf);
++	if (task_on_rq_queued(p)) {
++		/* check_preempt_curr() may use rq clock */
++		update_rq_clock(rq);
++		ttwu_do_wakeup(rq, p, wake_flags, &rf);
++		ret = 1;
++	}
++	__task_rq_unlock(rq, &rf);
++
++	return ret;
++}
++
++#ifdef CONFIG_SMP
++void sched_ttwu_pending(void)
++{
++	struct rq *rq = this_rq();
++	struct llist_node *llist = llist_del_all(&rq->wake_list);
++	struct task_struct *p, *t;
++	struct rq_flags rf;
++
++	if (!llist)
++		return;
++
++	rq_lock_irqsave(rq, &rf);
++	update_rq_clock(rq);
++
++	llist_for_each_entry_safe(p, t, llist, wake_entry)
++		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
++
++	rq_unlock_irqrestore(rq, &rf);
++}
++
++void scheduler_ipi(void)
++{
++	/*
++	 * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
++	 * TIF_NEED_RESCHED remotely (for the first time) will also send
++	 * this IPI.
++	 */
++	preempt_fold_need_resched();
++
++	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
++		return;
++
++	/*
++	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
++	 * traditionally all their work was done from the interrupt return
++	 * path. Now that we actually do some work, we need to make sure
++	 * we do call them.
++	 *
++	 * Some archs already do call them, luckily irq_enter/exit nest
++	 * properly.
++	 *
++	 * Arguably we should visit all archs and update all handlers,
++	 * however a fair share of IPIs are still resched only so this would
++	 * somewhat pessimize the simple resched case.
++	 */
++	irq_enter();
++	sched_ttwu_pending();
++
++	/*
++	 * Check if someone kicked us for doing the nohz idle load balance.
++	 */
++	if (unlikely(got_nohz_idle_kick())) {
++		this_rq()->idle_balance = 1;
++		raise_softirq_irqoff(SCHED_SOFTIRQ);
++	}
++	irq_exit();
++}
++
++static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
++{
++	struct rq *rq = cpu_rq(cpu);
++
++	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
++
++	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
++		if (!set_nr_if_polling(rq->idle))
++			smp_send_reschedule(cpu);
++		else
++			trace_sched_wake_idle_without_ipi(cpu);
++	}
++}
++
++void wake_up_if_idle(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct rq_flags rf;
++
++	rcu_read_lock();
++
++	if (!is_idle_task(rcu_dereference(rq->curr)))
++		goto out;
++
++	if (set_nr_if_polling(rq->idle)) {
++		trace_sched_wake_idle_without_ipi(cpu);
++	} else {
++		rq_lock_irqsave(rq, &rf);
++		if (is_idle_task(rq->curr))
++			smp_send_reschedule(cpu);
++		/* Else CPU is not idle, do nothing here: */
++		rq_unlock_irqrestore(rq, &rf);
++	}
++
++out:
++	rcu_read_unlock();
++}
++
++bool cpus_share_cache(int this_cpu, int that_cpu)
++{
++	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
++}
++#endif /* CONFIG_SMP */
++
++static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct rq_flags rf;
++
++#if defined(CONFIG_SMP)
++	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
++		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
++		ttwu_queue_remote(p, cpu, wake_flags);
++		return;
++	}
++#endif
++
++	rq_lock(rq, &rf);
++	update_rq_clock(rq);
++	ttwu_do_activate(rq, p, wake_flags, &rf);
++	rq_unlock(rq, &rf);
++}
++
++/*
++ * Notes on Program-Order guarantees on SMP systems.
++ *
++ *  MIGRATION
++ *
++ * The basic program-order guarantee on SMP systems is that when a task [t]
++ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
++ * execution on its new CPU [c1].
++ *
++ * For migration (of runnable tasks) this is provided by the following means:
++ *
++ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
++ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
++ *     rq(c1)->lock (if not at the same time, then in that order).
++ *  C) LOCK of the rq(c1)->lock scheduling in task
++ *
++ * Release/acquire chaining guarantees that B happens after A and C after B.
++ * Note: the CPU doing B need not be c0 or c1
++ *
++ * Example:
++ *
++ *   CPU0            CPU1            CPU2
++ *
++ *   LOCK rq(0)->lock
++ *   sched-out X
++ *   sched-in Y
++ *   UNLOCK rq(0)->lock
++ *
++ *                                   LOCK rq(0)->lock // orders against CPU0
++ *                                   dequeue X
++ *                                   UNLOCK rq(0)->lock
++ *
++ *                                   LOCK rq(1)->lock
++ *                                   enqueue X
++ *                                   UNLOCK rq(1)->lock
++ *
++ *                   LOCK rq(1)->lock // orders against CPU2
++ *                   sched-out Z
++ *                   sched-in X
++ *                   UNLOCK rq(1)->lock
++ *
++ *
++ *  BLOCKING -- aka. SLEEP + WAKEUP
++ *
++ * For blocking we (obviously) need to provide the same guarantee as for
++ * migration. However the means are completely different as there is no lock
++ * chain to provide order. Instead we do:
++ *
++ *   1) smp_store_release(X->on_cpu, 0)
++ *   2) smp_cond_load_acquire(!X->on_cpu)
++ *
++ * Example:
++ *
++ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
++ *
++ *   LOCK rq(0)->lock LOCK X->pi_lock
++ *   dequeue X
++ *   sched-out X
++ *   smp_store_release(X->on_cpu, 0);
++ *
++ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
++ *                    X->state = WAKING
++ *                    set_task_cpu(X,2)
++ *
++ *                    LOCK rq(2)->lock
++ *                    enqueue X
++ *                    X->state = RUNNING
++ *                    UNLOCK rq(2)->lock
++ *
++ *                                          LOCK rq(2)->lock // orders against CPU1
++ *                                          sched-out Z
++ *                                          sched-in X
++ *                                          UNLOCK rq(2)->lock
++ *
++ *                    UNLOCK X->pi_lock
++ *   UNLOCK rq(0)->lock
++ *
++ *
++ * However, for wakeups there is a second guarantee we must provide, namely we
++ * must ensure that CONDITION=1 done by the caller can not be reordered with
++ * accesses to the task state; see try_to_wake_up() and set_current_state().
++ */
++
++/**
++ * try_to_wake_up - wake up a thread
++ * @p: the thread to be awakened
++ * @state: the mask of task states that can be woken
++ * @wake_flags: wake modifier flags (WF_*)
++ *
++ * If (@state & @p->state) @p->state = TASK_RUNNING.
++ *
++ * If the task was not queued/runnable, also place it back on a runqueue.
++ *
++ * Atomic against schedule() which would dequeue a task, also see
++ * set_current_state().
++ *
++ * This function executes a full memory barrier before accessing the task
++ * state; see set_current_state().
++ *
++ * Return: %true if @p->state changes (an actual wakeup was done),
++ *	   %false otherwise.
++ */
++static int
++try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
++{
++	unsigned long flags;
++	int cpu, success = 0;
++
++	/*
++	 * If we are going to wake up a thread waiting for CONDITION we
++	 * need to ensure that CONDITION=1 done by the caller can not be
++	 * reordered with p->state check below. This pairs with mb() in
++	 * set_current_state() the waiting thread does.
++	 */
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	smp_mb__after_spinlock();
++	if (!(p->state & state))
++		goto out;
++
++	trace_sched_waking(p);
++
++	/* We're going to change ->state: */
++	success = 1;
++	cpu = task_cpu(p);
++
++	/*
++	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
++	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
++	 * in smp_cond_load_acquire() below.
++	 *
++	 * sched_ttwu_pending()			try_to_wake_up()
++	 *   STORE p->on_rq = 1			  LOAD p->state
++	 *   UNLOCK rq->lock
++	 *
++	 * __schedule() (switch to task 'p')
++	 *   LOCK rq->lock			  smp_rmb();
++	 *   smp_mb__after_spinlock();
++	 *   UNLOCK rq->lock
++	 *
++	 * [task p]
++	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
++	 *
++	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
++	 * __schedule().  See the comment for smp_mb__after_spinlock().
++	 */
++	smp_rmb();
++	if (p->on_rq && ttwu_remote(p, wake_flags))
++		goto stat;
++
++#ifdef CONFIG_SMP
++	/*
++	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
++	 * possible to, falsely, observe p->on_cpu == 0.
++	 *
++	 * One must be running (->on_cpu == 1) in order to remove oneself
++	 * from the runqueue.
++	 *
++	 * __schedule() (switch to task 'p')	try_to_wake_up()
++	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
++	 *   UNLOCK rq->lock
++	 *
++	 * __schedule() (put 'p' to sleep)
++	 *   LOCK rq->lock			  smp_rmb();
++	 *   smp_mb__after_spinlock();
++	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
++	 *
++	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
++	 * __schedule().  See the comment for smp_mb__after_spinlock().
++	 */
++	smp_rmb();
++
++	/*
++	 * If the owning (remote) CPU is still in the middle of schedule() with
++	 * this task as prev, wait until its done referencing the task.
++	 *
++	 * Pairs with the smp_store_release() in finish_task().
++	 *
++	 * This ensures that tasks getting woken will be fully ordered against
++	 * their previous state and preserve Program Order.
++	 */
++	smp_cond_load_acquire(&p->on_cpu, !VAL);
++
++	p->sched_contributes_to_load = !!task_contributes_to_load(p);
++	p->state = TASK_WAKING;
++
++	if (p->in_iowait) {
++		delayacct_blkio_end(p);
++		atomic_dec(&task_rq(p)->nr_iowait);
++	}
++
++	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
++	if (task_cpu(p) != cpu) {
++		wake_flags |= WF_MIGRATED;
++		set_task_cpu(p, cpu);
++	}
++
++#else /* CONFIG_SMP */
++
++	if (p->in_iowait) {
++		delayacct_blkio_end(p);
++		atomic_dec(&task_rq(p)->nr_iowait);
++	}
++
++#endif /* CONFIG_SMP */
++
++	ttwu_queue(p, cpu, wake_flags);
++stat:
++	ttwu_stat(p, cpu, wake_flags);
++out:
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++	return success;
++}
++
++/**
++ * try_to_wake_up_local - try to wake up a local task with rq lock held
++ * @p: the thread to be awakened
++ * @rf: request-queue flags for pinning
++ *
++ * Put @p on the run-queue if it's not already there. The caller must
++ * ensure that this_rq() is locked, @p is bound to this_rq() and not
++ * the current task.
++ */
++static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
++{
++	struct rq *rq = task_rq(p);
++
++	if (WARN_ON_ONCE(rq != this_rq()) ||
++	    WARN_ON_ONCE(p == current))
++		return;
++
++	lockdep_assert_held(&rq->lock);
++
++	if (!raw_spin_trylock(&p->pi_lock)) {
++		/*
++		 * This is OK, because current is on_cpu, which avoids it being
++		 * picked for load-balance and preemption/IRQs are still
++		 * disabled avoiding further scheduler activity on it and we've
++		 * not yet picked a replacement task.
++		 */
++		rq_unlock(rq, rf);
++		raw_spin_lock(&p->pi_lock);
++		rq_relock(rq, rf);
++	}
++
++	if (!(p->state & TASK_NORMAL))
++		goto out;
++
++	trace_sched_waking(p);
++
++	if (!task_on_rq_queued(p)) {
++		if (p->in_iowait) {
++			delayacct_blkio_end(p);
++			atomic_dec(&rq->nr_iowait);
++		}
++		ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
++	}
++
++	ttwu_do_wakeup(rq, p, 0, rf);
++	ttwu_stat(p, smp_processor_id(), 0);
++out:
++	raw_spin_unlock(&p->pi_lock);
++}
++
++/**
++ * wake_up_process - Wake up a specific process
++ * @p: The process to be woken up.
++ *
++ * Attempt to wake up the nominated process and move it to the set of runnable
++ * processes.
++ *
++ * Return: 1 if the process was woken up, 0 if it was already running.
++ *
++ * This function executes a full memory barrier before accessing the task state.
++ */
++int wake_up_process(struct task_struct *p)
++{
++	return try_to_wake_up(p, TASK_NORMAL, 0);
++}
++EXPORT_SYMBOL(wake_up_process);
++
++int wake_up_state(struct task_struct *p, unsigned int state)
++{
++	return try_to_wake_up(p, state, 0);
++}
++
++/*
++ * Perform scheduler related setup for a newly forked process p.
++ * p is forked by current.
++ *
++ * __sched_fork() is basic setup used by init_idle() too:
++ */
++static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
++{
++	p->on_rq			= 0;
++
++	p->se.on_rq			= 0;
++	p->se.exec_start		= 0;
++	p->se.sum_exec_runtime		= 0;
++	p->se.prev_sum_exec_runtime	= 0;
++	p->se.nr_migrations		= 0;
++	p->se.vruntime			= 0;
++	INIT_LIST_HEAD(&p->se.group_node);
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	p->se.cfs_rq			= NULL;
++#endif
++
++#ifdef CONFIG_SCHEDSTATS
++	/* Even if schedstat is disabled, there should not be garbage */
++	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
++#endif
++
++	RB_CLEAR_NODE(&p->dl.rb_node);
++	init_dl_task_timer(&p->dl);
++	init_dl_inactive_task_timer(&p->dl);
++	__dl_clear_params(p);
++
++	INIT_LIST_HEAD(&p->rt.run_list);
++	p->rt.timeout		= 0;
++	p->rt.time_slice	= sched_rr_timeslice;
++	p->rt.on_rq		= 0;
++	p->rt.on_list		= 0;
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++	INIT_HLIST_HEAD(&p->preempt_notifiers);
++#endif
++
++	init_numa_balancing(clone_flags, p);
++}
++
++DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
++
++#ifdef CONFIG_NUMA_BALANCING
++
++void set_numabalancing_state(bool enabled)
++{
++	if (enabled)
++		static_branch_enable(&sched_numa_balancing);
++	else
++		static_branch_disable(&sched_numa_balancing);
++}
++
++#ifdef CONFIG_PROC_SYSCTL
++int sysctl_numa_balancing(struct ctl_table *table, int write,
++			 void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	struct ctl_table t;
++	int err;
++	int state = static_branch_likely(&sched_numa_balancing);
++
++	if (write && !capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
++	t = *table;
++	t.data = &state;
++	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
++	if (err < 0)
++		return err;
++	if (write)
++		set_numabalancing_state(state);
++	return err;
++}
++#endif
++#endif
++
++#ifdef CONFIG_SCHEDSTATS
++
++DEFINE_STATIC_KEY_FALSE(sched_schedstats);
++static bool __initdata __sched_schedstats = false;
++
++static void set_schedstats(bool enabled)
++{
++	if (enabled)
++		static_branch_enable(&sched_schedstats);
++	else
++		static_branch_disable(&sched_schedstats);
++}
++
++void force_schedstat_enabled(void)
++{
++	if (!schedstat_enabled()) {
++		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
++		static_branch_enable(&sched_schedstats);
++	}
++}
++
++static int __init setup_schedstats(char *str)
++{
++	int ret = 0;
++	if (!str)
++		goto out;
++
++	/*
++	 * This code is called before jump labels have been set up, so we can't
++	 * change the static branch directly just yet.  Instead set a temporary
++	 * variable so init_schedstats() can do it later.
++	 */
++	if (!strcmp(str, "enable")) {
++		__sched_schedstats = true;
++		ret = 1;
++	} else if (!strcmp(str, "disable")) {
++		__sched_schedstats = false;
++		ret = 1;
++	}
++out:
++	if (!ret)
++		pr_warn("Unable to parse schedstats=\n");
++
++	return ret;
++}
++__setup("schedstats=", setup_schedstats);
++
++static void __init init_schedstats(void)
++{
++	set_schedstats(__sched_schedstats);
++}
++
++#ifdef CONFIG_PROC_SYSCTL
++int sysctl_schedstats(struct ctl_table *table, int write,
++			 void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	struct ctl_table t;
++	int err;
++	int state = static_branch_likely(&sched_schedstats);
++
++	if (write && !capable(CAP_SYS_ADMIN))
++		return -EPERM;
++
++	t = *table;
++	t.data = &state;
++	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
++	if (err < 0)
++		return err;
++	if (write)
++		set_schedstats(state);
++	return err;
++}
++#endif /* CONFIG_PROC_SYSCTL */
++#else  /* !CONFIG_SCHEDSTATS */
++static inline void init_schedstats(void) {}
++#endif /* CONFIG_SCHEDSTATS */
++
++/*
++ * fork()/clone()-time setup:
++ */
++int sched_fork(unsigned long clone_flags, struct task_struct *p)
++{
++	unsigned long flags;
++
++	__sched_fork(clone_flags, p);
++	/*
++	 * We mark the process as NEW here. This guarantees that
++	 * nobody will actually run it, and a signal or other external
++	 * event cannot wake it up and insert it on the runqueue either.
++	 */
++	p->state = TASK_NEW;
++
++	/*
++	 * Make sure we do not leak PI boosting priority to the child.
++	 */
++	p->prio = current->normal_prio;
++
++	/*
++	 * Revert to default priority/policy on fork if requested.
++	 */
++	if (unlikely(p->sched_reset_on_fork)) {
++		if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
++			p->policy = SCHED_NORMAL;
++			p->static_prio = NICE_TO_PRIO(0);
++			p->rt_priority = 0;
++		} else if (PRIO_TO_NICE(p->static_prio) < 0)
++			p->static_prio = NICE_TO_PRIO(0);
++
++		p->prio = p->normal_prio = __normal_prio(p);
++		set_load_weight(p, false);
++
++		/*
++		 * We don't need the reset flag anymore after the fork. It has
++		 * fulfilled its duty:
++		 */
++		p->sched_reset_on_fork = 0;
++	}
++
++	if (dl_prio(p->prio))
++		return -EAGAIN;
++	else if (rt_prio(p->prio))
++		p->sched_class = &rt_sched_class;
++	else
++		p->sched_class = &fair_sched_class;
++
++	init_entity_runnable_average(&p->se);
++
++	/*
++	 * The child is not yet in the pid-hash so no cgroup attach races,
++	 * and the cgroup is pinned to this child due to cgroup_fork()
++	 * is ran before sched_fork().
++	 *
++	 * Silence PROVE_RCU.
++	 */
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	rseq_migrate(p);
++	/*
++	 * We're setting the CPU for the first time, we don't migrate,
++	 * so use __set_task_cpu().
++	 */
++	__set_task_cpu(p, smp_processor_id());
++	if (p->sched_class->task_fork)
++		p->sched_class->task_fork(p);
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++#ifdef CONFIG_SCHED_INFO
++	if (likely(sched_info_on()))
++		memset(&p->sched_info, 0, sizeof(p->sched_info));
++#endif
++#if defined(CONFIG_SMP)
++	p->on_cpu = 0;
++#endif
++	init_task_preempt_count(p);
++#ifdef CONFIG_SMP
++	plist_node_init(&p->pushable_tasks, MAX_PRIO);
++	RB_CLEAR_NODE(&p->pushable_dl_tasks);
++#endif
++	return 0;
++}
++
++unsigned long to_ratio(u64 period, u64 runtime)
++{
++	if (runtime == RUNTIME_INF)
++		return BW_UNIT;
++
++	/*
++	 * Doing this here saves a lot of checks in all
++	 * the calling paths, and returning zero seems
++	 * safe for them anyway.
++	 */
++	if (period == 0)
++		return 0;
++
++	return div64_u64(runtime << BW_SHIFT, period);
++}
++
++/*
++ * wake_up_new_task - wake up a newly created task for the first time.
++ *
++ * This function will do some initial scheduler statistics housekeeping
++ * that must be done for every newly created context, then puts the task
++ * on the runqueue and wakes it.
++ */
++void wake_up_new_task(struct task_struct *p)
++{
++	struct rq_flags rf;
++	struct rq *rq;
++
++	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
++	p->state = TASK_RUNNING;
++#ifdef CONFIG_SMP
++	/*
++	 * Fork balancing, do it here and not earlier because:
++	 *  - cpus_allowed can change in the fork path
++	 *  - any previously selected CPU might disappear through hotplug
++	 *
++	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
++	 * as we're not fully set-up yet.
++	 */
++	p->recent_used_cpu = task_cpu(p);
++	rseq_migrate(p);
++	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
++#endif
++	rq = __task_rq_lock(p, &rf);
++	update_rq_clock(rq);
++	post_init_entity_util_avg(&p->se);
++
++	activate_task(rq, p, ENQUEUE_NOCLOCK);
++	p->on_rq = TASK_ON_RQ_QUEUED;
++	trace_sched_wakeup_new(p);
++	check_preempt_curr(rq, p, WF_FORK);
++#ifdef CONFIG_SMP
++	if (p->sched_class->task_woken) {
++		/*
++		 * Nothing relies on rq->lock after this, so its fine to
++		 * drop it.
++		 */
++		rq_unpin_lock(rq, &rf);
++		p->sched_class->task_woken(rq, p);
++		rq_repin_lock(rq, &rf);
++	}
++#endif
++	task_rq_unlock(rq, p, &rf);
++}
++
++#ifdef CONFIG_PREEMPT_NOTIFIERS
++
++static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
++
++void preempt_notifier_inc(void)
++{
++	static_branch_inc(&preempt_notifier_key);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_inc);
++
++void preempt_notifier_dec(void)
++{
++	static_branch_dec(&preempt_notifier_key);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_dec);
++
++/**
++ * preempt_notifier_register - tell me when current is being preempted & rescheduled
++ * @notifier: notifier struct to register
++ */
++void preempt_notifier_register(struct preempt_notifier *notifier)
++{
++	if (!static_branch_unlikely(&preempt_notifier_key))
++		WARN(1, "registering preempt_notifier while notifiers disabled\n");
++
++	hlist_add_head(&notifier->link, &current->preempt_notifiers);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_register);
++
++/**
++ * preempt_notifier_unregister - no longer interested in preemption notifications
++ * @notifier: notifier struct to unregister
++ *
++ * This is *not* safe to call from within a preemption notifier.
++ */
++void preempt_notifier_unregister(struct preempt_notifier *notifier)
++{
++	hlist_del(&notifier->link);
++}
++EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
++
++static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++	struct preempt_notifier *notifier;
++
++	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
++		notifier->ops->sched_in(notifier, raw_smp_processor_id());
++}
++
++static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++	if (static_branch_unlikely(&preempt_notifier_key))
++		__fire_sched_in_preempt_notifiers(curr);
++}
++
++static void
++__fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				   struct task_struct *next)
++{
++	struct preempt_notifier *notifier;
++
++	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
++		notifier->ops->sched_out(notifier, next);
++}
++
++static __always_inline void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++	if (static_branch_unlikely(&preempt_notifier_key))
++		__fire_sched_out_preempt_notifiers(curr, next);
++}
++
++#else /* !CONFIG_PREEMPT_NOTIFIERS */
++
++static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
++{
++}
++
++static inline void
++fire_sched_out_preempt_notifiers(struct task_struct *curr,
++				 struct task_struct *next)
++{
++}
++
++#endif /* CONFIG_PREEMPT_NOTIFIERS */
++
++static inline void prepare_task(struct task_struct *next)
++{
++#ifdef CONFIG_SMP
++	/*
++	 * Claim the task as running, we do this before switching to it
++	 * such that any running task will have this set.
++	 */
++	next->on_cpu = 1;
++#endif
++}
++
++static inline void finish_task(struct task_struct *prev)
++{
++#ifdef CONFIG_SMP
++	/*
++	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
++	 * We must ensure this doesn't happen until the switch is completely
++	 * finished.
++	 *
++	 * In particular, the load of prev->state in finish_task_switch() must
++	 * happen before this.
++	 *
++	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
++	 */
++	smp_store_release(&prev->on_cpu, 0);
++#endif
++}
++
++static inline void
++prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
++{
++	/*
++	 * Since the runqueue lock will be released by the next
++	 * task (which is an invalid locking op but in the case
++	 * of the scheduler it's an obvious special-case), so we
++	 * do an early lockdep release here:
++	 */
++	rq_unpin_lock(rq, rf);
++	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
++#ifdef CONFIG_DEBUG_SPINLOCK
++	/* this is a valid case when another task releases the spinlock */
++	rq->lock.owner = next;
++#endif
++}
++
++static inline void finish_lock_switch(struct rq *rq)
++{
++	/*
++	 * If we are tracking spinlock dependencies then we have to
++	 * fix up the runqueue lock - which gets 'carried over' from
++	 * prev into current:
++	 */
++	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
++	raw_spin_unlock_irq(&rq->lock);
++}
++
++/*
++ * NOP if the arch has not defined these:
++ */
++
++#ifndef prepare_arch_switch
++# define prepare_arch_switch(next)	do { } while (0)
++#endif
++
++#ifndef finish_arch_post_lock_switch
++# define finish_arch_post_lock_switch()	do { } while (0)
++#endif
++
++/**
++ * prepare_task_switch - prepare to switch tasks
++ * @rq: the runqueue preparing to switch
++ * @prev: the current task that is being switched out
++ * @next: the task we are going to switch to.
++ *
++ * This is called with the rq lock held and interrupts off. It must
++ * be paired with a subsequent finish_task_switch after the context
++ * switch.
++ *
++ * prepare_task_switch sets up locking and calls architecture specific
++ * hooks.
++ */
++static inline void
++prepare_task_switch(struct rq *rq, struct task_struct *prev,
++		    struct task_struct *next)
++{
++	kcov_prepare_switch(prev);
++	sched_info_switch(rq, prev, next);
++	perf_event_task_sched_out(prev, next);
++	rseq_preempt(prev);
++	fire_sched_out_preempt_notifiers(prev, next);
++	prepare_task(next);
++	prepare_arch_switch(next);
++}
++
++/**
++ * finish_task_switch - clean up after a task-switch
++ * @prev: the thread we just switched away from.
++ *
++ * finish_task_switch must be called after the context switch, paired
++ * with a prepare_task_switch call before the context switch.
++ * finish_task_switch will reconcile locking set up by prepare_task_switch,
++ * and do any other architecture-specific cleanup actions.
++ *
++ * Note that we may have delayed dropping an mm in context_switch(). If
++ * so, we finish that here outside of the runqueue lock. (Doing it
++ * with the lock held can cause deadlocks; see schedule() for
++ * details.)
++ *
++ * The context switch have flipped the stack from under us and restored the
++ * local variables which were saved when this task called schedule() in the
++ * past. prev == current is still correct but we need to recalculate this_rq
++ * because prev may have moved to another CPU.
++ */
++static struct rq *finish_task_switch(struct task_struct *prev)
++	__releases(rq->lock)
++{
++	struct rq *rq = this_rq();
++	struct mm_struct *mm = rq->prev_mm;
++	long prev_state;
++
++	/*
++	 * The previous task will have left us with a preempt_count of 2
++	 * because it left us after:
++	 *
++	 *	schedule()
++	 *	  preempt_disable();			// 1
++	 *	  __schedule()
++	 *	    raw_spin_lock_irq(&rq->lock)	// 2
++	 *
++	 * Also, see FORK_PREEMPT_COUNT.
++	 */
++	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
++		      "corrupted preempt_count: %s/%d/0x%x\n",
++		      current->comm, current->pid, preempt_count()))
++		preempt_count_set(FORK_PREEMPT_COUNT);
++
++	rq->prev_mm = NULL;
++
++	/*
++	 * A task struct has one reference for the use as "current".
++	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
++	 * schedule one last time. The schedule call will never return, and
++	 * the scheduled task must drop that reference.
++	 *
++	 * We must observe prev->state before clearing prev->on_cpu (in
++	 * finish_task), otherwise a concurrent wakeup can get prev
++	 * running on another CPU and we could rave with its RUNNING -> DEAD
++	 * transition, resulting in a double drop.
++	 */
++	prev_state = prev->state;
++	vtime_task_switch(prev);
++	perf_event_task_sched_in(prev, current);
++	finish_task(prev);
++	finish_lock_switch(rq);
++	finish_arch_post_lock_switch();
++	kcov_finish_switch(current);
++
++	fire_sched_in_preempt_notifiers(current);
++	/*
++	 * When switching through a kernel thread, the loop in
++	 * membarrier_{private,global}_expedited() may have observed that
++	 * kernel thread and not issued an IPI. It is therefore possible to
++	 * schedule between user->kernel->user threads without passing though
++	 * switch_mm(). Membarrier requires a barrier after storing to
++	 * rq->curr, before returning to userspace, so provide them here:
++	 *
++	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
++	 *   provided by mmdrop(),
++	 * - a sync_core for SYNC_CORE.
++	 */
++	if (mm) {
++		membarrier_mm_sync_core_before_usermode(mm);
++		mmdrop(mm);
++	}
++	if (unlikely(prev_state == TASK_DEAD)) {
++		if (prev->sched_class->task_dead)
++			prev->sched_class->task_dead(prev);
++
++		/*
++		 * Remove function-return probe instances associated with this
++		 * task and put them back on the free list.
++		 */
++		kprobe_flush_task(prev);
++
++		/* Task is done with its stack. */
++		put_task_stack(prev);
++
++		put_task_struct(prev);
++	}
++
++	tick_nohz_task_switch();
++	return rq;
++}
++
++#ifdef CONFIG_SMP
++
++/* rq->lock is NOT held, but preemption is disabled */
++static void __balance_callback(struct rq *rq)
++{
++	struct callback_head *head, *next;
++	void (*func)(struct rq *rq);
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&rq->lock, flags);
++	head = rq->balance_callback;
++	rq->balance_callback = NULL;
++	while (head) {
++		func = (void (*)(struct rq *))head->func;
++		next = head->next;
++		head->next = NULL;
++		head = next;
++
++		func(rq);
++	}
++	raw_spin_unlock_irqrestore(&rq->lock, flags);
++}
++
++static inline void balance_callback(struct rq *rq)
++{
++	if (unlikely(rq->balance_callback))
++		__balance_callback(rq);
++}
++
++#else
++
++static inline void balance_callback(struct rq *rq)
++{
++}
++
++#endif
++
++/**
++ * schedule_tail - first thing a freshly forked thread must call.
++ * @prev: the thread we just switched away from.
++ */
++asmlinkage __visible void schedule_tail(struct task_struct *prev)
++	__releases(rq->lock)
++{
++	struct rq *rq;
++
++	/*
++	 * New tasks start with FORK_PREEMPT_COUNT, see there and
++	 * finish_task_switch() for details.
++	 *
++	 * finish_task_switch() will drop rq->lock() and lower preempt_count
++	 * and the preempt_enable() will end up enabling preemption (on
++	 * PREEMPT_COUNT kernels).
++	 */
++
++	rq = finish_task_switch(prev);
++	balance_callback(rq);
++	preempt_enable();
++
++	if (current->set_child_tid)
++		put_user(task_pid_vnr(current), current->set_child_tid);
++
++	calculate_sigpending();
++}
++
++/*
++ * context_switch - switch to the new MM and the new thread's register state.
++ */
++static __always_inline struct rq *
++context_switch(struct rq *rq, struct task_struct *prev,
++	       struct task_struct *next, struct rq_flags *rf)
++{
++	prepare_task_switch(rq, prev, next);
++
++	/*
++	 * For paravirt, this is coupled with an exit in switch_to to
++	 * combine the page table reload and the switch backend into
++	 * one hypercall.
++	 */
++	arch_start_context_switch(prev);
++
++	/*
++	 * kernel -> kernel   lazy + transfer active
++	 *   user -> kernel   lazy + mmgrab() active
++	 *
++	 * kernel ->   user   switch + mmdrop() active
++	 *   user ->   user   switch
++	 */
++	if (!next->mm) {                                // to kernel
++		enter_lazy_tlb(prev->active_mm, next);
++
++		next->active_mm = prev->active_mm;
++		if (prev->mm)                           // from user
++			mmgrab(prev->active_mm);
++		else
++			prev->active_mm = NULL;
++	} else {                                        // to user
++		membarrier_switch_mm(rq, prev->active_mm, next->mm);
++		/*
++		 * sys_membarrier() requires an smp_mb() between setting
++		 * rq->curr / membarrier_switch_mm() and returning to userspace.
++		 *
++		 * The below provides this either through switch_mm(), or in
++		 * case 'prev->active_mm == next->mm' through
++		 * finish_task_switch()'s mmdrop().
++		 */
++		switch_mm_irqs_off(prev->active_mm, next->mm, next);
++
++		if (!prev->mm) {                        // from kernel
++			/* will mmdrop() in finish_task_switch(). */
++			rq->prev_mm = prev->active_mm;
++			prev->active_mm = NULL;
++		}
++	}
++
++	rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
++
++	prepare_lock_switch(rq, next, rf);
++
++	/* Here we just switch the register state and the stack. */
++	switch_to(prev, next, prev);
++	barrier();
++
++	return finish_task_switch(prev);
++}
++
++/*
++ * nr_running and nr_context_switches:
++ *
++ * externally visible scheduler statistics: current number of runnable
++ * threads, total number of context switches performed since bootup.
++ */
++unsigned long nr_running(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_online_cpu(i)
++		sum += cpu_rq(i)->nr_running;
++
++	return sum;
++}
++
++/*
++ * Check if only the current task is running on the CPU.
++ *
++ * Caution: this function does not check that the caller has disabled
++ * preemption, thus the result might have a time-of-check-to-time-of-use
++ * race.  The caller is responsible to use it correctly, for example:
++ *
++ * - from a non-preemptable section (of course)
++ *
++ * - from a thread that is bound to a single CPU
++ *
++ * - in a loop with very short iterations (e.g. a polling loop)
++ */
++bool single_task_running(void)
++{
++	return raw_rq()->nr_running == 1;
++}
++EXPORT_SYMBOL(single_task_running);
++
++unsigned long long nr_context_switches(void)
++{
++	int i;
++	unsigned long long sum = 0;
++
++	for_each_possible_cpu(i)
++		sum += cpu_rq(i)->nr_switches;
++
++	return sum;
++}
++
++/*
++ * IO-wait accounting, and how its mostly bollocks (on SMP).
++ *
++ * The idea behind IO-wait account is to account the idle time that we could
++ * have spend running if it were not for IO. That is, if we were to improve the
++ * storage performance, we'd have a proportional reduction in IO-wait time.
++ *
++ * This all works nicely on UP, where, when a task blocks on IO, we account
++ * idle time as IO-wait, because if the storage were faster, it could've been
++ * running and we'd not be idle.
++ *
++ * This has been extended to SMP, by doing the same for each CPU. This however
++ * is broken.
++ *
++ * Imagine for instance the case where two tasks block on one CPU, only the one
++ * CPU will have IO-wait accounted, while the other has regular idle. Even
++ * though, if the storage were faster, both could've ran at the same time,
++ * utilising both CPUs.
++ *
++ * This means, that when looking globally, the current IO-wait accounting on
++ * SMP is a lower bound, by reason of under accounting.
++ *
++ * Worse, since the numbers are provided per CPU, they are sometimes
++ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
++ * associated with any one particular CPU, it can wake to another CPU than it
++ * blocked on. This means the per CPU IO-wait number is meaningless.
++ *
++ * Task CPU affinities can make all that even more 'interesting'.
++ */
++
++unsigned long nr_iowait(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_possible_cpu(i)
++		sum += atomic_read(&cpu_rq(i)->nr_iowait);
++
++	return sum;
++}
++
++/*
++ * Consumers of these two interfaces, like for example the cpufreq menu
++ * governor are using nonsensical data. Boosting frequency for a CPU that has
++ * IO-wait which might not even end up running the task when it does become
++ * runnable.
++ */
++
++unsigned long nr_iowait_cpu(int cpu)
++{
++	struct rq *this = cpu_rq(cpu);
++	return atomic_read(&this->nr_iowait);
++}
++
++void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
++{
++	struct rq *rq = this_rq();
++	*nr_waiters = atomic_read(&rq->nr_iowait);
++	*load = rq->load.weight;
++}
++
++#ifdef CONFIG_SMP
++
++/*
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
++ */
++void sched_exec(void)
++{
++	struct task_struct *p = current;
++	unsigned long flags;
++	int dest_cpu;
++
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
++	if (dest_cpu == smp_processor_id())
++		goto unlock;
++
++	if (likely(cpu_active(dest_cpu))) {
++		struct migration_arg arg = { p, dest_cpu };
++
++		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
++		return;
++	}
++unlock:
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++}
++
++#endif
++
++DEFINE_PER_CPU(struct kernel_stat, kstat);
++DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
++
++EXPORT_PER_CPU_SYMBOL(kstat);
++EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
++
++/*
++ * The function fair_sched_class.update_curr accesses the struct curr
++ * and its field curr->exec_start; when called from task_sched_runtime(),
++ * we observe a high rate of cache misses in practice.
++ * Prefetching this data results in improved performance.
++ */
++static inline void prefetch_curr_exec_start(struct task_struct *p)
++{
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	struct sched_entity *curr = (&p->se)->cfs_rq->curr;
++#else
++	struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
++#endif
++	prefetch(curr);
++	prefetch(&curr->exec_start);
++}
++
++/*
++ * Return accounted runtime for the task.
++ * In case the task is currently running, return the runtime plus current's
++ * pending runtime that have not been accounted yet.
++ */
++unsigned long long task_sched_runtime(struct task_struct *p)
++{
++	struct rq_flags rf;
++	struct rq *rq;
++	u64 ns;
++
++#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
++	/*
++	 * 64-bit doesn't need locks to atomically read a 64-bit value.
++	 * So we have a optimization chance when the task's delta_exec is 0.
++	 * Reading ->on_cpu is racy, but this is ok.
++	 *
++	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
++	 * If we race with it entering CPU, unaccounted time is 0. This is
++	 * indistinguishable from the read occurring a few cycles earlier.
++	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
++	 * been accounted, so we're correct here as well.
++	 */
++	if (!p->on_cpu || !task_on_rq_queued(p))
++		return p->se.sum_exec_runtime;
++#endif
++
++	rq = task_rq_lock(p, &rf);
++	/*
++	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
++	 * project cycles that may never be accounted to this
++	 * thread, breaking clock_gettime().
++	 */
++	if (task_current(rq, p) && task_on_rq_queued(p)) {
++		prefetch_curr_exec_start(p);
++		update_rq_clock(rq);
++		p->sched_class->update_curr(rq);
++	}
++	ns = p->se.sum_exec_runtime;
++	task_rq_unlock(rq, p, &rf);
++
++	return ns;
++}
++
++/*
++ * This function gets called by the timer code, with HZ frequency.
++ * We call it with interrupts disabled.
++ */
++void scheduler_tick(void)
++{
++	int cpu = smp_processor_id();
++	struct rq *rq = cpu_rq(cpu);
++	struct task_struct *curr = rq->curr;
++	struct rq_flags rf;
++
++	sched_clock_tick();
++
++	rq_lock(rq, &rf);
++
++	update_rq_clock(rq);
++	curr->sched_class->task_tick(rq, curr, 0);
++	cpu_load_update_active(rq);
++	calc_global_load_tick(rq);
++
++	rq_unlock(rq, &rf);
++
++	perf_event_task_tick();
++
++#ifdef CONFIG_SMP
++	rq->idle_balance = idle_cpu(cpu);
++	trigger_load_balance(rq);
++#endif
++}
++
++#ifdef CONFIG_NO_HZ_FULL
++
++struct tick_work {
++	int			cpu;
++	atomic_t		state;
++	struct delayed_work	work;
++};
++/* Values for ->state, see diagram below. */
++#define TICK_SCHED_REMOTE_OFFLINE	0
++#define TICK_SCHED_REMOTE_OFFLINING	1
++#define TICK_SCHED_REMOTE_RUNNING	2
++
++/*
++ * State diagram for ->state:
++ *
++ *
++ *          TICK_SCHED_REMOTE_OFFLINE
++ *                    |   ^
++ *                    |   |
++ *                    |   | sched_tick_remote()
++ *                    |   |
++ *                    |   |
++ *                    +--TICK_SCHED_REMOTE_OFFLINING
++ *                    |   ^
++ *                    |   |
++ * sched_tick_start() |   | sched_tick_stop()
++ *                    |   |
++ *                    V   |
++ *          TICK_SCHED_REMOTE_RUNNING
++ *
++ *
++ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
++ * and sched_tick_start() are happy to leave the state in RUNNING.
++ */
++
++static struct tick_work __percpu *tick_work_cpu;
++
++static void sched_tick_remote(struct work_struct *work)
++{
++	struct delayed_work *dwork = to_delayed_work(work);
++	struct tick_work *twork = container_of(dwork, struct tick_work, work);
++	int cpu = twork->cpu;
++	struct rq *rq = cpu_rq(cpu);
++	struct task_struct *curr;
++	struct rq_flags rf;
++	u64 delta;
++	int os;
++
++	/*
++	 * Handle the tick only if it appears the remote CPU is running in full
++	 * dynticks mode. The check is racy by nature, but missing a tick or
++	 * having one too much is no big deal because the scheduler tick updates
++	 * statistics and checks timeslices in a time-independent way, regardless
++	 * of when exactly it is running.
++	 */
++	if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
++		goto out_requeue;
++
++	rq_lock_irq(rq, &rf);
++	curr = rq->curr;
++	if (is_idle_task(curr) || cpu_is_offline(cpu))
++		goto out_unlock;
++
++	update_rq_clock(rq);
++	delta = rq_clock_task(rq) - curr->se.exec_start;
++
++	/*
++	 * Make sure the next tick runs within a reasonable
++	 * amount of time.
++	 */
++	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
++	curr->sched_class->task_tick(rq, curr, 0);
++
++out_unlock:
++	rq_unlock_irq(rq, &rf);
++
++out_requeue:
++	/*
++	 * Run the remote tick once per second (1Hz). This arbitrary
++	 * frequency is large enough to avoid overload but short enough
++	 * to keep scheduler internal stats reasonably up to date.  But
++	 * first update state to reflect hotplug activity if required.
++	 */
++	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
++	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
++	if (os == TICK_SCHED_REMOTE_RUNNING)
++		queue_delayed_work(system_unbound_wq, dwork, HZ);
++}
++
++static void sched_tick_start(int cpu)
++{
++	int os;
++	struct tick_work *twork;
++
++	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
++		return;
++
++	WARN_ON_ONCE(!tick_work_cpu);
++
++	twork = per_cpu_ptr(tick_work_cpu, cpu);
++	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
++	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
++	if (os == TICK_SCHED_REMOTE_OFFLINE) {
++		twork->cpu = cpu;
++		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
++		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
++	}
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static void sched_tick_stop(int cpu)
++{
++	struct tick_work *twork;
++	int os;
++
++	if (housekeeping_cpu(cpu, HK_FLAG_TICK))
++		return;
++
++	WARN_ON_ONCE(!tick_work_cpu);
++
++	twork = per_cpu_ptr(tick_work_cpu, cpu);
++	/* There cannot be competing actions, but don't rely on stop-machine. */
++	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
++	WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
++	/* Don't cancel, as this would mess up the state machine. */
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
++int __init sched_tick_offload_init(void)
++{
++	tick_work_cpu = alloc_percpu(struct tick_work);
++	BUG_ON(!tick_work_cpu);
++	return 0;
++}
++
++#else /* !CONFIG_NO_HZ_FULL */
++static inline void sched_tick_start(int cpu) { }
++static inline void sched_tick_stop(int cpu) { }
++#endif
++
++#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
++				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
++/*
++ * If the value passed in is equal to the current preempt count
++ * then we just disabled preemption. Start timing the latency.
++ */
++static inline void preempt_latency_start(int val)
++{
++	if (preempt_count() == val) {
++		unsigned long ip = get_lock_parent_ip();
++#ifdef CONFIG_DEBUG_PREEMPT
++		current->preempt_disable_ip = ip;
++#endif
++		trace_preempt_off(CALLER_ADDR0, ip);
++	}
++}
++
++void preempt_count_add(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
++		return;
++#endif
++	__preempt_count_add(val);
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Spinlock count overflowing soon?
++	 */
++	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
++				PREEMPT_MASK - 10);
++#endif
++	preempt_latency_start(val);
++}
++EXPORT_SYMBOL(preempt_count_add);
++NOKPROBE_SYMBOL(preempt_count_add);
++
++/*
++ * If the value passed in equals to the current preempt count
++ * then we just enabled preemption. Stop timing the latency.
++ */
++static inline void preempt_latency_stop(int val)
++{
++	if (preempt_count() == val)
++		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
++}
++
++void preempt_count_sub(int val)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	/*
++	 * Underflow?
++	 */
++	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
++		return;
++	/*
++	 * Is the spinlock portion underflowing?
++	 */
++	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
++			!(preempt_count() & PREEMPT_MASK)))
++		return;
++#endif
++
++	preempt_latency_stop(val);
++	__preempt_count_sub(val);
++}
++EXPORT_SYMBOL(preempt_count_sub);
++NOKPROBE_SYMBOL(preempt_count_sub);
++
++#else
++static inline void preempt_latency_start(int val) { }
++static inline void preempt_latency_stop(int val) { }
++#endif
++
++static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	return p->preempt_disable_ip;
++#else
++	return 0;
++#endif
++}
++
++/*
++ * Print scheduling while atomic bug:
++ */
++static noinline void __schedule_bug(struct task_struct *prev)
++{
++	/* Save this before calling printk(), since that will clobber it */
++	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
++
++	if (oops_in_progress)
++		return;
++
++	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
++		prev->comm, prev->pid, preempt_count());
++
++	debug_show_held_locks(prev);
++	print_modules();
++	if (irqs_disabled())
++		print_irqtrace_events(prev);
++	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
++	    && in_atomic_preempt_off()) {
++		pr_err("Preemption disabled at:");
++		print_ip_sym(preempt_disable_ip);
++		pr_cont("\n");
++	}
++	if (panic_on_warn)
++		panic("scheduling while atomic\n");
++
++	dump_stack();
++	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
++}
++
++/*
++ * Various schedule()-time debugging checks and statistics:
++ */
++static inline void schedule_debug(struct task_struct *prev)
++{
++#ifdef CONFIG_SCHED_STACK_END_CHECK
++	if (task_stack_end_corrupted(prev))
++		panic("corrupted stack end detected inside scheduler\n");
++#endif
++
++	if (unlikely(in_atomic_preempt_off())) {
++		__schedule_bug(prev);
++		preempt_count_set(PREEMPT_DISABLED);
++	}
++	rcu_sleep_check();
++
++	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
++
++	schedstat_inc(this_rq()->sched_count);
++}
++
++/*
++ * Pick up the highest-prio task:
++ */
++static inline struct task_struct *
++pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
++{
++	const struct sched_class *class;
++	struct task_struct *p;
++
++	/*
++	 * Optimization: we know that if all tasks are in the fair class we can
++	 * call that function directly, but only if the @prev task wasn't of a
++	 * higher scheduling class, because otherwise those loose the
++	 * opportunity to pull in more work from other CPUs.
++	 */
++	if (likely((prev->sched_class == &idle_sched_class ||
++		    prev->sched_class == &fair_sched_class) &&
++		   rq->nr_running == rq->cfs.h_nr_running)) {
++
++		p = fair_sched_class.pick_next_task(rq, prev, rf);
++		if (unlikely(p == RETRY_TASK))
++			goto again;
++
++		/* Assumes fair_sched_class->next == idle_sched_class */
++		if (unlikely(!p))
++			p = idle_sched_class.pick_next_task(rq, prev, rf);
++
++		return p;
++	}
++
++again:
++	for_each_class(class) {
++		p = class->pick_next_task(rq, prev, rf);
++		if (p) {
++			if (unlikely(p == RETRY_TASK))
++				goto again;
++			return p;
++		}
++	}
++
++	/* The idle class should always have a runnable task: */
++	BUG();
++}
++
++/*
++ * __schedule() is the main scheduler function.
++ *
++ * The main means of driving the scheduler and thus entering this function are:
++ *
++ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
++ *
++ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
++ *      paths. For example, see arch/x86/entry_64.S.
++ *
++ *      To drive preemption between tasks, the scheduler sets the flag in timer
++ *      interrupt handler scheduler_tick().
++ *
++ *   3. Wakeups don't really cause entry into schedule(). They add a
++ *      task to the run-queue and that's it.
++ *
++ *      Now, if the new task added to the run-queue preempts the current
++ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
++ *      called on the nearest possible occasion:
++ *
++ *       - If the kernel is preemptible (CONFIG_PREEMPT=y):
++ *
++ *         - in syscall or exception context, at the next outmost
++ *           preempt_enable(). (this might be as soon as the wake_up()'s
++ *           spin_unlock()!)
++ *
++ *         - in IRQ context, return from interrupt-handler to
++ *           preemptible context
++ *
++ *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
++ *         then at the next:
++ *
++ *          - cond_resched() call
++ *          - explicit schedule() call
++ *          - return from syscall or exception to user-space
++ *          - return from interrupt-handler to user-space
++ *
++ * WARNING: must be called with preemption disabled!
++ */
++static void __sched notrace __schedule(bool preempt)
++{
++	struct task_struct *prev, *next;
++	unsigned long *switch_count;
++	struct rq_flags rf;
++	struct rq *rq;
++	int cpu;
++
++	cpu = smp_processor_id();
++	rq = cpu_rq(cpu);
++	prev = rq->curr;
++
++	schedule_debug(prev);
++
++	if (sched_feat(HRTICK))
++		hrtick_clear(rq);
++
++	local_irq_disable();
++	rcu_note_context_switch(preempt);
++
++	/*
++	 * Make sure that signal_pending_state()->signal_pending() below
++	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
++	 * done by the caller to avoid the race with signal_wake_up().
++	 *
++	 * The membarrier system call requires a full memory barrier
++	 * after coming from user-space, before storing to rq->curr.
++	 */
++	rq_lock(rq, &rf);
++	smp_mb__after_spinlock();
++
++	/* Promote REQ to ACT */
++	rq->clock_update_flags <<= 1;
++	update_rq_clock(rq);
++
++	switch_count = &prev->nivcsw;
++	if (!preempt && prev->state) {
++		if (unlikely(signal_pending_state(prev->state, prev))) {
++			prev->state = TASK_RUNNING;
++		} else {
++			deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
++			prev->on_rq = 0;
++
++			if (prev->in_iowait) {
++				atomic_inc(&rq->nr_iowait);
++				delayacct_blkio_start();
++			}
++
++			/*
++			 * If a worker went to sleep, notify and ask workqueue
++			 * whether it wants to wake up a task to maintain
++			 * concurrency.
++			 */
++			if (prev->flags & PF_WQ_WORKER) {
++				struct task_struct *to_wakeup;
++
++				to_wakeup = wq_worker_sleeping(prev);
++				if (to_wakeup)
++					try_to_wake_up_local(to_wakeup, &rf);
++			}
++		}
++		switch_count = &prev->nvcsw;
++	}
++
++	next = pick_next_task(rq, prev, &rf);
++	clear_tsk_need_resched(prev);
++	clear_preempt_need_resched();
++
++	if (likely(prev != next)) {
++		rq->nr_switches++;
++		rq->curr = next;
++		/*
++		 * The membarrier system call requires each architecture
++		 * to have a full memory barrier after updating
++		 * rq->curr, before returning to user-space.
++		 *
++		 * Here are the schemes providing that barrier on the
++		 * various architectures:
++		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
++		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
++		 * - finish_lock_switch() for weakly-ordered
++		 *   architectures where spin_unlock is a full barrier,
++		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
++		 *   is a RELEASE barrier),
++		 */
++		++*switch_count;
++
++		trace_sched_switch(preempt, prev, next);
++
++		/* Also unlocks the rq: */
++		rq = context_switch(rq, prev, next, &rf);
++	} else {
++		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
++		rq_unlock_irq(rq, &rf);
++	}
++
++	balance_callback(rq);
++}
++
++void __noreturn do_task_dead(void)
++{
++	/* Causes final put_task_struct in finish_task_switch(): */
++	set_special_state(TASK_DEAD);
++
++	/* Tell freezer to ignore us: */
++	current->flags |= PF_NOFREEZE;
++
++	__schedule(false);
++	BUG();
++
++	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
++	for (;;)
++		cpu_relax();
++}
++
++static inline void sched_submit_work(struct task_struct *tsk)
++{
++	if (!tsk->state || tsk_is_pi_blocked(tsk))
++		return;
++	/*
++	 * If we are going to sleep and we have plugged IO queued,
++	 * make sure to submit it to avoid deadlocks.
++	 */
++	if (blk_needs_flush_plug(tsk))
++		blk_schedule_flush_plug(tsk);
++}
++
++asmlinkage __visible void __sched schedule(void)
++{
++	struct task_struct *tsk = current;
++
++	sched_submit_work(tsk);
++	do {
++		preempt_disable();
++		__schedule(false);
++		sched_preempt_enable_no_resched();
++	} while (need_resched());
++}
++EXPORT_SYMBOL(schedule);
++
++/*
++ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
++ * state (have scheduled out non-voluntarily) by making sure that all
++ * tasks have either left the run queue or have gone into user space.
++ * As idle tasks do not do either, they must not ever be preempted
++ * (schedule out non-voluntarily).
++ *
++ * schedule_idle() is similar to schedule_preempt_disable() except that it
++ * never enables preemption because it does not call sched_submit_work().
++ */
++void __sched schedule_idle(void)
++{
++	/*
++	 * As this skips calling sched_submit_work(), which the idle task does
++	 * regardless because that function is a nop when the task is in a
++	 * TASK_RUNNING state, make sure this isn't used someplace that the
++	 * current task can be in any other state. Note, idle is always in the
++	 * TASK_RUNNING state.
++	 */
++	WARN_ON_ONCE(current->state);
++	do {
++		__schedule(false);
++	} while (need_resched());
++}
++
++#ifdef CONFIG_CONTEXT_TRACKING
++asmlinkage __visible void __sched schedule_user(void)
++{
++	/*
++	 * If we come here after a random call to set_need_resched(),
++	 * or we have been woken up remotely but the IPI has not yet arrived,
++	 * we haven't yet exited the RCU idle mode. Do it here manually until
++	 * we find a better solution.
++	 *
++	 * NB: There are buggy callers of this function.  Ideally we
++	 * should warn if prev_state != CONTEXT_USER, but that will trigger
++	 * too frequently to make sense yet.
++	 */
++	enum ctx_state prev_state = exception_enter();
++	schedule();
++	exception_exit(prev_state);
++}
++#endif
++
++/**
++ * schedule_preempt_disabled - called with preemption disabled
++ *
++ * Returns with preemption disabled. Note: preempt_count must be 1
++ */
++void __sched schedule_preempt_disabled(void)
++{
++	sched_preempt_enable_no_resched();
++	schedule();
++	preempt_disable();
++}
++
++static void __sched notrace preempt_schedule_common(void)
++{
++	do {
++		/*
++		 * Because the function tracer can trace preempt_count_sub()
++		 * and it also uses preempt_enable/disable_notrace(), if
++		 * NEED_RESCHED is set, the preempt_enable_notrace() called
++		 * by the function tracer will call this function again and
++		 * cause infinite recursion.
++		 *
++		 * Preemption must be disabled here before the function
++		 * tracer can trace. Break up preempt_disable() into two
++		 * calls. One to disable preemption without fear of being
++		 * traced. The other to still record the preemption latency,
++		 * which can also be traced by the function tracer.
++		 */
++		preempt_disable_notrace();
++		preempt_latency_start(1);
++		__schedule(true);
++		preempt_latency_stop(1);
++		preempt_enable_no_resched_notrace();
++
++		/*
++		 * Check again in case we missed a preemption opportunity
++		 * between schedule and now.
++		 */
++	} while (need_resched());
++}
++
++#ifdef CONFIG_PREEMPT
++/*
++ * this is the entry point to schedule() from in-kernel preemption
++ * off of preempt_enable. Kernel preemptions off return from interrupt
++ * occur there and call schedule directly.
++ */
++asmlinkage __visible void __sched notrace preempt_schedule(void)
++{
++	/*
++	 * If there is a non-zero preempt_count or interrupts are disabled,
++	 * we do not want to preempt the current task. Just return..
++	 */
++	if (likely(!preemptible()))
++		return;
++
++	preempt_schedule_common();
++}
++NOKPROBE_SYMBOL(preempt_schedule);
++EXPORT_SYMBOL(preempt_schedule);
++
++/**
++ * preempt_schedule_notrace - preempt_schedule called by tracing
++ *
++ * The tracing infrastructure uses preempt_enable_notrace to prevent
++ * recursion and tracing preempt enabling caused by the tracing
++ * infrastructure itself. But as tracing can happen in areas coming
++ * from userspace or just about to enter userspace, a preempt enable
++ * can occur before user_exit() is called. This will cause the scheduler
++ * to be called when the system is still in usermode.
++ *
++ * To prevent this, the preempt_enable_notrace will use this function
++ * instead of preempt_schedule() to exit user context if needed before
++ * calling the scheduler.
++ */
++asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
++{
++	enum ctx_state prev_ctx;
++
++	if (likely(!preemptible()))
++		return;
++
++	do {
++		/*
++		 * Because the function tracer can trace preempt_count_sub()
++		 * and it also uses preempt_enable/disable_notrace(), if
++		 * NEED_RESCHED is set, the preempt_enable_notrace() called
++		 * by the function tracer will call this function again and
++		 * cause infinite recursion.
++		 *
++		 * Preemption must be disabled here before the function
++		 * tracer can trace. Break up preempt_disable() into two
++		 * calls. One to disable preemption without fear of being
++		 * traced. The other to still record the preemption latency,
++		 * which can also be traced by the function tracer.
++		 */
++		preempt_disable_notrace();
++		preempt_latency_start(1);
++		/*
++		 * Needs preempt disabled in case user_exit() is traced
++		 * and the tracer calls preempt_enable_notrace() causing
++		 * an infinite recursion.
++		 */
++		prev_ctx = exception_enter();
++		__schedule(true);
++		exception_exit(prev_ctx);
++
++		preempt_latency_stop(1);
++		preempt_enable_no_resched_notrace();
++	} while (need_resched());
++}
++EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
++
++#endif /* CONFIG_PREEMPT */
++
++/*
++ * this is the entry point to schedule() from kernel preemption
++ * off of irq context.
++ * Note, that this is called and return with irqs disabled. This will
++ * protect us against recursive calling from irq.
++ */
++asmlinkage __visible void __sched preempt_schedule_irq(void)
++{
++	enum ctx_state prev_state;
++
++	/* Catch callers which need to be fixed */
++	BUG_ON(preempt_count() || !irqs_disabled());
++
++	prev_state = exception_enter();
++
++	do {
++		preempt_disable();
++		local_irq_enable();
++		__schedule(true);
++		local_irq_disable();
++		sched_preempt_enable_no_resched();
++	} while (need_resched());
++
++	exception_exit(prev_state);
++}
++
++int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
++			  void *key)
++{
++	return try_to_wake_up(curr->private, mode, wake_flags);
++}
++EXPORT_SYMBOL(default_wake_function);
++
++#ifdef CONFIG_RT_MUTEXES
++
++static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
++{
++	if (pi_task)
++		prio = min(prio, pi_task->prio);
++
++	return prio;
++}
++
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++	struct task_struct *pi_task = rt_mutex_get_top_task(p);
++
++	return __rt_effective_prio(pi_task, prio);
++}
++
++/*
++ * rt_mutex_setprio - set the current priority of a task
++ * @p: task to boost
++ * @pi_task: donor task
++ *
++ * This function changes the 'effective' priority of a task. It does
++ * not touch ->normal_prio like __setscheduler().
++ *
++ * Used by the rt_mutex code to implement priority inheritance
++ * logic. Call site only calls if the priority of the task changed.
++ */
++void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
++{
++	int prio, oldprio, queued, running, queue_flag =
++		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
++	const struct sched_class *prev_class;
++	struct rq_flags rf;
++	struct rq *rq;
++
++	/* XXX used to be waiter->prio, not waiter->task->prio */
++	prio = __rt_effective_prio(pi_task, p->normal_prio);
++
++	/*
++	 * If nothing changed; bail early.
++	 */
++	if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
++		return;
++
++	rq = __task_rq_lock(p, &rf);
++	update_rq_clock(rq);
++	/*
++	 * Set under pi_lock && rq->lock, such that the value can be used under
++	 * either lock.
++	 *
++	 * Note that there is loads of tricky to make this pointer cache work
++	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
++	 * ensure a task is de-boosted (pi_task is set to NULL) before the
++	 * task is allowed to run again (and can exit). This ensures the pointer
++	 * points to a blocked task -- which guaratees the task is present.
++	 */
++	p->pi_top_task = pi_task;
++
++	/*
++	 * For FIFO/RR we only need to set prio, if that matches we're done.
++	 */
++	if (prio == p->prio && !dl_prio(prio))
++		goto out_unlock;
++
++	/*
++	 * Idle task boosting is a nono in general. There is one
++	 * exception, when PREEMPT_RT and NOHZ is active:
++	 *
++	 * The idle task calls get_next_timer_interrupt() and holds
++	 * the timer wheel base->lock on the CPU and another CPU wants
++	 * to access the timer (probably to cancel it). We can safely
++	 * ignore the boosting request, as the idle CPU runs this code
++	 * with interrupts disabled and will complete the lock
++	 * protected section without being interrupted. So there is no
++	 * real need to boost.
++	 */
++	if (unlikely(p == rq->idle)) {
++		WARN_ON(p != rq->curr);
++		WARN_ON(p->pi_blocked_on);
++		goto out_unlock;
++	}
++
++	trace_sched_pi_setprio(p, pi_task);
++	oldprio = p->prio;
++
++	if (oldprio == prio)
++		queue_flag &= ~DEQUEUE_MOVE;
++
++	prev_class = p->sched_class;
++	queued = task_on_rq_queued(p);
++	running = task_current(rq, p);
++	if (queued)
++		dequeue_task(rq, p, queue_flag);
++	if (running)
++		put_prev_task(rq, p);
++
++	/*
++	 * Boosting condition are:
++	 * 1. -rt task is running and holds mutex A
++	 *      --> -dl task blocks on mutex A
++	 *
++	 * 2. -dl task is running and holds mutex A
++	 *      --> -dl task blocks on mutex A and could preempt the
++	 *          running task
++	 */
++	if (dl_prio(prio)) {
++		if (!dl_prio(p->normal_prio) ||
++		    (pi_task && dl_prio(pi_task->prio) &&
++		     dl_entity_preempt(&pi_task->dl, &p->dl))) {
++			p->dl.dl_boosted = 1;
++			queue_flag |= ENQUEUE_REPLENISH;
++		} else
++			p->dl.dl_boosted = 0;
++		p->sched_class = &dl_sched_class;
++	} else if (rt_prio(prio)) {
++		if (dl_prio(oldprio))
++			p->dl.dl_boosted = 0;
++		if (oldprio < prio)
++			queue_flag |= ENQUEUE_HEAD;
++		p->sched_class = &rt_sched_class;
++	} else {
++		if (dl_prio(oldprio))
++			p->dl.dl_boosted = 0;
++		if (rt_prio(oldprio))
++			p->rt.timeout = 0;
++		p->sched_class = &fair_sched_class;
++	}
++
++	p->prio = prio;
++
++	if (queued)
++		enqueue_task(rq, p, queue_flag);
++	if (running)
++		set_curr_task(rq, p);
++
++	check_class_changed(rq, p, prev_class, oldprio);
++out_unlock:
++	/* Avoid rq from going away on us: */
++	preempt_disable();
++	__task_rq_unlock(rq, &rf);
++
++	balance_callback(rq);
++	preempt_enable();
++}
++#else
++static inline int rt_effective_prio(struct task_struct *p, int prio)
++{
++	return prio;
++}
++#endif
++
++void set_user_nice(struct task_struct *p, long nice)
++{
++	bool queued, running;
++	int old_prio, delta;
++	struct rq_flags rf;
++	struct rq *rq;
++
++	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
++		return;
++	/*
++	 * We have to be careful, if called from sys_setpriority(),
++	 * the task might be in the middle of scheduling on another CPU.
++	 */
++	rq = task_rq_lock(p, &rf);
++	update_rq_clock(rq);
++
++	/*
++	 * The RT priorities are set via sched_setscheduler(), but we still
++	 * allow the 'normal' nice value to be set - but as expected
++	 * it wont have any effect on scheduling until the task is
++	 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
++	 */
++	if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
++		p->static_prio = NICE_TO_PRIO(nice);
++		goto out_unlock;
++	}
++	queued = task_on_rq_queued(p);
++	running = task_current(rq, p);
++	if (queued)
++		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
++	if (running)
++		put_prev_task(rq, p);
++
++	p->static_prio = NICE_TO_PRIO(nice);
++	set_load_weight(p, true);
++	old_prio = p->prio;
++	p->prio = effective_prio(p);
++	delta = p->prio - old_prio;
++
++	if (queued) {
++		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
++		/*
++		 * If the task increased its priority or is running and
++		 * lowered its priority, then reschedule its CPU:
++		 */
++		if (delta < 0 || (delta > 0 && task_running(rq, p)))
++			resched_curr(rq);
++	}
++	if (running)
++		set_curr_task(rq, p);
++out_unlock:
++	task_rq_unlock(rq, p, &rf);
++}
++EXPORT_SYMBOL(set_user_nice);
++
++/*
++ * can_nice - check if a task can reduce its nice value
++ * @p: task
++ * @nice: nice value
++ */
++int can_nice(const struct task_struct *p, const int nice)
++{
++	/* Convert nice value [19,-20] to rlimit style value [1,40]: */
++	int nice_rlim = nice_to_rlimit(nice);
++
++	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
++		capable(CAP_SYS_NICE));
++}
++
++#ifdef __ARCH_WANT_SYS_NICE
++
++/*
++ * sys_nice - change the priority of the current process.
++ * @increment: priority increment
++ *
++ * sys_setpriority is a more generic, but much slower function that
++ * does similar things.
++ */
++SYSCALL_DEFINE1(nice, int, increment)
++{
++	long nice, retval;
++
++	/*
++	 * Setpriority might change our priority at the same moment.
++	 * We don't have to worry. Conceptually one call occurs first
++	 * and we have a single winner.
++	 */
++	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
++	nice = task_nice(current) + increment;
++
++	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
++	if (increment < 0 && !can_nice(current, nice))
++		return -EPERM;
++
++	retval = security_task_setnice(current, nice);
++	if (retval)
++		return retval;
++
++	set_user_nice(current, nice);
++	return 0;
++}
++
++#endif
++
++/**
++ * task_prio - return the priority value of a given task.
++ * @p: the task in question.
++ *
++ * Return: The priority value as seen by users in /proc.
++ * RT tasks are offset by -200. Normal tasks are centered
++ * around 0, value goes from -16 to +15.
++ */
++int task_prio(const struct task_struct *p)
++{
++	return p->prio - MAX_RT_PRIO;
++}
++
++/**
++ * idle_cpu - is a given CPU idle currently?
++ * @cpu: the processor in question.
++ *
++ * Return: 1 if the CPU is currently idle. 0 otherwise.
++ */
++int idle_cpu(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++
++	if (rq->curr != rq->idle)
++		return 0;
++
++	if (rq->nr_running)
++		return 0;
++
++#ifdef CONFIG_SMP
++	if (!llist_empty(&rq->wake_list))
++		return 0;
++#endif
++
++	return 1;
++}
++
++/**
++ * available_idle_cpu - is a given CPU idle for enqueuing work.
++ * @cpu: the CPU in question.
++ *
++ * Return: 1 if the CPU is currently idle. 0 otherwise.
++ */
++int available_idle_cpu(int cpu)
++{
++	if (!idle_cpu(cpu))
++		return 0;
++
++	if (vcpu_is_preempted(cpu))
++		return 0;
++
++	return 1;
++}
++
++/**
++ * idle_task - return the idle task for a given CPU.
++ * @cpu: the processor in question.
++ *
++ * Return: The idle task for the CPU @cpu.
++ */
++struct task_struct *idle_task(int cpu)
++{
++	return cpu_rq(cpu)->idle;
++}
++
++/**
++ * find_process_by_pid - find a process with a matching PID value.
++ * @pid: the pid in question.
++ *
++ * The task of @pid, if found. %NULL otherwise.
++ */
++static struct task_struct *find_process_by_pid(pid_t pid)
++{
++	return pid ? find_task_by_vpid(pid) : current;
++}
++
++/*
++ * sched_setparam() passes in -1 for its policy, to let the functions
++ * it calls know not to change it.
++ */
++#define SETPARAM_POLICY	-1
++
++static void __setscheduler_params(struct task_struct *p,
++		const struct sched_attr *attr)
++{
++	int policy = attr->sched_policy;
++
++	if (policy == SETPARAM_POLICY)
++		policy = p->policy;
++
++	p->policy = policy;
++
++	if (dl_policy(policy))
++		__setparam_dl(p, attr);
++	else if (fair_policy(policy))
++		p->static_prio = NICE_TO_PRIO(attr->sched_nice);
++
++	/*
++	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
++	 * !rt_policy. Always setting this ensures that things like
++	 * getparam()/getattr() don't report silly values for !rt tasks.
++	 */
++	p->rt_priority = attr->sched_priority;
++	p->normal_prio = normal_prio(p);
++	set_load_weight(p, true);
++}
++
++/* Actually do priority change: must hold pi & rq lock. */
++static void __setscheduler(struct rq *rq, struct task_struct *p,
++			   const struct sched_attr *attr, bool keep_boost)
++{
++	__setscheduler_params(p, attr);
++
++	/*
++	 * Keep a potential priority boosting if called from
++	 * sched_setscheduler().
++	 */
++	p->prio = normal_prio(p);
++	if (keep_boost)
++		p->prio = rt_effective_prio(p, p->prio);
++
++	if (dl_prio(p->prio))
++		p->sched_class = &dl_sched_class;
++	else if (rt_prio(p->prio))
++		p->sched_class = &rt_sched_class;
++	else
++		p->sched_class = &fair_sched_class;
++}
++
++/*
++ * Check the target process has a UID that matches the current process's:
++ */
++static bool check_same_owner(struct task_struct *p)
++{
++	const struct cred *cred = current_cred(), *pcred;
++	bool match;
++
++	rcu_read_lock();
++	pcred = __task_cred(p);
++	match = (uid_eq(cred->euid, pcred->euid) ||
++		 uid_eq(cred->euid, pcred->uid));
++	rcu_read_unlock();
++	return match;
++}
++
++static int __sched_setscheduler(struct task_struct *p,
++				const struct sched_attr *attr,
++				bool user, bool pi)
++{
++	int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
++		      MAX_RT_PRIO - 1 - attr->sched_priority;
++	int retval, oldprio, oldpolicy = -1, queued, running;
++	int new_effective_prio, policy = attr->sched_policy;
++	const struct sched_class *prev_class;
++	struct rq_flags rf;
++	int reset_on_fork;
++	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
++	struct rq *rq;
++
++	/* The pi code expects interrupts enabled */
++	BUG_ON(pi && in_interrupt());
++recheck:
++	/* Double check policy once rq lock held: */
++	if (policy < 0) {
++		reset_on_fork = p->sched_reset_on_fork;
++		policy = oldpolicy = p->policy;
++	} else {
++		reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK);
++
++		if (!valid_policy(policy))
++			return -EINVAL;
++	}
++
++	if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
++		return -EINVAL;
++
++	/*
++	 * Valid priorities for SCHED_FIFO and SCHED_RR are
++	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
++	 * SCHED_BATCH and SCHED_IDLE is 0.
++	 */
++	if ((p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
++	    (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
++		return -EINVAL;
++	if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
++	    (rt_policy(policy) != (attr->sched_priority != 0)))
++		return -EINVAL;
++
++	/*
++	 * Allow unprivileged RT tasks to decrease priority:
++	 */
++	if (user && !capable(CAP_SYS_NICE)) {
++		if (fair_policy(policy)) {
++			if (attr->sched_nice < task_nice(p) &&
++			    !can_nice(p, attr->sched_nice))
++				return -EPERM;
++		}
++
++		if (rt_policy(policy)) {
++			unsigned long rlim_rtprio =
++					task_rlimit(p, RLIMIT_RTPRIO);
++
++			/* Can't set/change the rt policy: */
++			if (policy != p->policy && !rlim_rtprio)
++				return -EPERM;
++
++			/* Can't increase priority: */
++			if (attr->sched_priority > p->rt_priority &&
++			    attr->sched_priority > rlim_rtprio)
++				return -EPERM;
++		}
++
++		 /*
++		  * Can't set/change SCHED_DEADLINE policy at all for now
++		  * (safest behavior); in the future we would like to allow
++		  * unprivileged DL tasks to increase their relative deadline
++		  * or reduce their runtime (both ways reducing utilization)
++		  */
++		if (dl_policy(policy))
++			return -EPERM;
++
++		/*
++		 * Treat SCHED_IDLE as nice 20. Only allow a switch to
++		 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
++		 */
++		if (idle_policy(p->policy) && !idle_policy(policy)) {
++			if (!can_nice(p, task_nice(p)))
++				return -EPERM;
++		}
++
++		/* Can't change other user's priorities: */
++		if (!check_same_owner(p))
++			return -EPERM;
++
++		/* Normal users shall not reset the sched_reset_on_fork flag: */
++		if (p->sched_reset_on_fork && !reset_on_fork)
++			return -EPERM;
++	}
++
++	if (user) {
++		if (attr->sched_flags & SCHED_FLAG_SUGOV)
++			return -EINVAL;
++
++		retval = security_task_setscheduler(p);
++		if (retval)
++			return retval;
++	}
++
++	/*
++	 * Make sure no PI-waiters arrive (or leave) while we are
++	 * changing the priority of the task:
++	 *
++	 * To be able to change p->policy safely, the appropriate
++	 * runqueue lock must be held.
++	 */
++	rq = task_rq_lock(p, &rf);
++	update_rq_clock(rq);
++
++	/*
++	 * Changing the policy of the stop threads its a very bad idea:
++	 */
++	if (p == rq->stop) {
++		task_rq_unlock(rq, p, &rf);
++		return -EINVAL;
++	}
++
++	/*
++	 * If not changing anything there's no need to proceed further,
++	 * but store a possible modification of reset_on_fork.
++	 */
++	if (unlikely(policy == p->policy)) {
++		if (fair_policy(policy) && attr->sched_nice != task_nice(p))
++			goto change;
++		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
++			goto change;
++		if (dl_policy(policy) && dl_param_changed(p, attr))
++			goto change;
++
++		p->sched_reset_on_fork = reset_on_fork;
++		task_rq_unlock(rq, p, &rf);
++		return 0;
++	}
++change:
++
++	if (user) {
++#ifdef CONFIG_RT_GROUP_SCHED
++		/*
++		 * Do not allow realtime tasks into groups that have no runtime
++		 * assigned.
++		 */
++		if (rt_bandwidth_enabled() && rt_policy(policy) &&
++				task_group(p)->rt_bandwidth.rt_runtime == 0 &&
++				!task_group_is_autogroup(task_group(p))) {
++			task_rq_unlock(rq, p, &rf);
++			return -EPERM;
++		}
++#endif
++#ifdef CONFIG_SMP
++		if (dl_bandwidth_enabled() && dl_policy(policy) &&
++				!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
++			cpumask_t *span = rq->rd->span;
++
++			/*
++			 * Don't allow tasks with an affinity mask smaller than
++			 * the entire root_domain to become SCHED_DEADLINE. We
++			 * will also fail if there's no bandwidth available.
++			 */
++			if (!cpumask_subset(span, &p->cpus_allowed) ||
++			    rq->rd->dl_bw.bw == 0) {
++				task_rq_unlock(rq, p, &rf);
++				return -EPERM;
++			}
++		}
++#endif
++	}
++
++	/* Re-check policy now with rq lock held: */
++	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
++		policy = oldpolicy = -1;
++		task_rq_unlock(rq, p, &rf);
++		goto recheck;
++	}
++
++	/*
++	 * If setscheduling to SCHED_DEADLINE (or changing the parameters
++	 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
++	 * is available.
++	 */
++	if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
++		task_rq_unlock(rq, p, &rf);
++		return -EBUSY;
++	}
++
++	p->sched_reset_on_fork = reset_on_fork;
++	oldprio = p->prio;
++
++	if (pi) {
++		/*
++		 * Take priority boosted tasks into account. If the new
++		 * effective priority is unchanged, we just store the new
++		 * normal parameters and do not touch the scheduler class and
++		 * the runqueue. This will be done when the task deboost
++		 * itself.
++		 */
++		new_effective_prio = rt_effective_prio(p, newprio);
++		if (new_effective_prio == oldprio)
++			queue_flags &= ~DEQUEUE_MOVE;
++	}
++
++	queued = task_on_rq_queued(p);
++	running = task_current(rq, p);
++	if (queued)
++		dequeue_task(rq, p, queue_flags);
++	if (running)
++		put_prev_task(rq, p);
++
++	prev_class = p->sched_class;
++	__setscheduler(rq, p, attr, pi);
++
++	if (queued) {
++		/*
++		 * We enqueue to tail when the priority of a task is
++		 * increased (user space view).
++		 */
++		if (oldprio < p->prio)
++			queue_flags |= ENQUEUE_HEAD;
++
++		enqueue_task(rq, p, queue_flags);
++	}
++	if (running)
++		set_curr_task(rq, p);
++
++	check_class_changed(rq, p, prev_class, oldprio);
++
++	/* Avoid rq from going away on us: */
++	preempt_disable();
++	task_rq_unlock(rq, p, &rf);
++
++	if (pi)
++		rt_mutex_adjust_pi(p);
++
++	/* Run balance callbacks after we've adjusted the PI chain: */
++	balance_callback(rq);
++	preempt_enable();
++
++	return 0;
++}
++
++static int _sched_setscheduler(struct task_struct *p, int policy,
++			       const struct sched_param *param, bool check)
++{
++	struct sched_attr attr = {
++		.sched_policy   = policy,
++		.sched_priority = param->sched_priority,
++		.sched_nice	= PRIO_TO_NICE(p->static_prio),
++	};
++
++	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
++	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
++		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
++		policy &= ~SCHED_RESET_ON_FORK;
++		attr.sched_policy = policy;
++	}
++
++	return __sched_setscheduler(p, &attr, check, true);
++}
++/**
++ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * Return: 0 on success. An error code otherwise.
++ *
++ * NOTE that the task may be already dead.
++ */
++int sched_setscheduler(struct task_struct *p, int policy,
++		       const struct sched_param *param)
++{
++	return _sched_setscheduler(p, policy, param, true);
++}
++EXPORT_SYMBOL_GPL(sched_setscheduler);
++
++int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
++{
++	return __sched_setscheduler(p, attr, true, true);
++}
++EXPORT_SYMBOL_GPL(sched_setattr);
++
++int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
++{
++	return __sched_setscheduler(p, attr, false, true);
++}
++
++/**
++ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
++ * @p: the task in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * Just like sched_setscheduler, only don't bother checking if the
++ * current context has permission.  For example, this is needed in
++ * stop_machine(): we create temporary high priority worker threads,
++ * but our caller might not have that capability.
++ *
++ * Return: 0 on success. An error code otherwise.
++ */
++int sched_setscheduler_nocheck(struct task_struct *p, int policy,
++			       const struct sched_param *param)
++{
++	return _sched_setscheduler(p, policy, param, false);
++}
++EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck);
++
++static int
++do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
++{
++	struct sched_param lparam;
++	struct task_struct *p;
++	int retval;
++
++	if (!param || pid < 0)
++		return -EINVAL;
++	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
++		return -EFAULT;
++
++	rcu_read_lock();
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (p != NULL)
++		retval = sched_setscheduler(p, policy, &lparam);
++	rcu_read_unlock();
++
++	return retval;
++}
++
++/*
++ * Mimics kernel/events/core.c perf_copy_attr().
++ */
++static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
++{
++	u32 size;
++	int ret;
++
++	if (!access_ok(uattr, SCHED_ATTR_SIZE_VER0))
++		return -EFAULT;
++
++	/* Zero the full structure, so that a short copy will be nice: */
++	memset(attr, 0, sizeof(*attr));
++
++	ret = get_user(size, &uattr->size);
++	if (ret)
++		return ret;
++
++	/* Bail out on silly large: */
++	if (size > PAGE_SIZE)
++		goto err_size;
++
++	/* ABI compatibility quirk: */
++	if (!size)
++		size = SCHED_ATTR_SIZE_VER0;
++
++	if (size < SCHED_ATTR_SIZE_VER0)
++		goto err_size;
++
++	/*
++	 * If we're handed a bigger struct than we know of,
++	 * ensure all the unknown bits are 0 - i.e. new
++	 * user-space does not rely on any kernel feature
++	 * extensions we dont know about yet.
++	 */
++	if (size > sizeof(*attr)) {
++		unsigned char __user *addr;
++		unsigned char __user *end;
++		unsigned char val;
++
++		addr = (void __user *)uattr + sizeof(*attr);
++		end  = (void __user *)uattr + size;
++
++		for (; addr < end; addr++) {
++			ret = get_user(val, addr);
++			if (ret)
++				return ret;
++			if (val)
++				goto err_size;
++		}
++		size = sizeof(*attr);
++	}
++
++	ret = copy_from_user(attr, uattr, size);
++	if (ret)
++		return -EFAULT;
++
++	/*
++	 * XXX: Do we want to be lenient like existing syscalls; or do we want
++	 * to be strict and return an error on out-of-bounds values?
++	 */
++	attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE);
++
++	return 0;
++
++err_size:
++	put_user(sizeof(*attr), &uattr->size);
++	return -E2BIG;
++}
++
++/**
++ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
++ * @pid: the pid in question.
++ * @policy: new policy.
++ * @param: structure containing the new RT priority.
++ *
++ * Return: 0 on success. An error code otherwise.
++ */
++SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
++{
++	if (policy < 0)
++		return -EINVAL;
++
++	return do_sched_setscheduler(pid, policy, param);
++}
++
++/**
++ * sys_sched_setparam - set/change the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the new RT priority.
++ *
++ * Return: 0 on success. An error code otherwise.
++ */
++SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
++{
++	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
++}
++
++/**
++ * sys_sched_setattr - same as above, but with extended sched_attr
++ * @pid: the pid in question.
++ * @uattr: structure containing the extended parameters.
++ * @flags: for future extension.
++ */
++SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
++			       unsigned int, flags)
++{
++	struct sched_attr attr;
++	struct task_struct *p;
++	int retval;
++
++	if (!uattr || pid < 0 || flags)
++		return -EINVAL;
++
++	retval = sched_copy_attr(uattr, &attr);
++	if (retval)
++		return retval;
++
++	if ((int)attr.sched_policy < 0)
++		return -EINVAL;
++
++	rcu_read_lock();
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (p != NULL)
++		retval = sched_setattr(p, &attr);
++	rcu_read_unlock();
++
++	return retval;
++}
++
++/**
++ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
++ * @pid: the pid in question.
++ *
++ * Return: On success, the policy of the thread. Otherwise, a negative error
++ * code.
++ */
++SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
++{
++	struct task_struct *p;
++	int retval;
++
++	if (pid < 0)
++		return -EINVAL;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (p) {
++		retval = security_task_getscheduler(p);
++		if (!retval)
++			retval = p->policy
++				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
++	}
++	rcu_read_unlock();
++	return retval;
++}
++
++/**
++ * sys_sched_getparam - get the RT priority of a thread
++ * @pid: the pid in question.
++ * @param: structure containing the RT priority.
++ *
++ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
++ * code.
++ */
++SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
++{
++	struct sched_param lp = { .sched_priority = 0 };
++	struct task_struct *p;
++	int retval;
++
++	if (!param || pid < 0)
++		return -EINVAL;
++
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	retval = -ESRCH;
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	if (task_has_rt_policy(p))
++		lp.sched_priority = p->rt_priority;
++	rcu_read_unlock();
++
++	/*
++	 * This one might sleep, we cannot do it with a spinlock held ...
++	 */
++	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
++
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++static int sched_read_attr(struct sched_attr __user *uattr,
++			   struct sched_attr *attr,
++			   unsigned int usize)
++{
++	int ret;
++
++	if (!access_ok(uattr, usize))
++		return -EFAULT;
++
++	/*
++	 * If we're handed a smaller struct than we know of,
++	 * ensure all the unknown bits are 0 - i.e. old
++	 * user-space does not get uncomplete information.
++	 */
++	if (usize < sizeof(*attr)) {
++		unsigned char *addr;
++		unsigned char *end;
++
++		addr = (void *)attr + usize;
++		end  = (void *)attr + sizeof(*attr);
++
++		for (; addr < end; addr++) {
++			if (*addr)
++				return -EFBIG;
++		}
++
++		attr->size = usize;
++	}
++
++	ret = copy_to_user(uattr, attr, attr->size);
++	if (ret)
++		return -EFAULT;
++
++	return 0;
++}
++
++/**
++ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
++ * @pid: the pid in question.
++ * @uattr: structure containing the extended parameters.
++ * @size: sizeof(attr) for fwd/bwd comp.
++ * @flags: for future extension.
++ */
++SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
++		unsigned int, size, unsigned int, flags)
++{
++	struct sched_attr attr = {
++		.size = sizeof(struct sched_attr),
++	};
++	struct task_struct *p;
++	int retval;
++
++	if (!uattr || pid < 0 || size > PAGE_SIZE ||
++	    size < SCHED_ATTR_SIZE_VER0 || flags)
++		return -EINVAL;
++
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	retval = -ESRCH;
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	attr.sched_policy = p->policy;
++	if (p->sched_reset_on_fork)
++		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
++	if (task_has_dl_policy(p))
++		__getparam_dl(p, &attr);
++	else if (task_has_rt_policy(p))
++		attr.sched_priority = p->rt_priority;
++	else
++		attr.sched_nice = task_nice(p);
++
++	rcu_read_unlock();
++
++	retval = sched_read_attr(uattr, &attr, size);
++	return retval;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
++{
++	cpumask_var_t cpus_allowed, new_mask;
++	struct task_struct *p;
++	int retval;
++
++	rcu_read_lock();
++
++	p = find_process_by_pid(pid);
++	if (!p) {
++		rcu_read_unlock();
++		return -ESRCH;
++	}
++
++	/* Prevent p going away */
++	get_task_struct(p);
++	rcu_read_unlock();
++
++	if (p->flags & PF_NO_SETAFFINITY) {
++		retval = -EINVAL;
++		goto out_put_task;
++	}
++	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_put_task;
++	}
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
++		retval = -ENOMEM;
++		goto out_free_cpus_allowed;
++	}
++	retval = -EPERM;
++	if (!check_same_owner(p)) {
++		rcu_read_lock();
++		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
++			rcu_read_unlock();
++			goto out_free_new_mask;
++		}
++		rcu_read_unlock();
++	}
++
++	retval = security_task_setscheduler(p);
++	if (retval)
++		goto out_free_new_mask;
++
++
++	cpuset_cpus_allowed(p, cpus_allowed);
++	cpumask_and(new_mask, in_mask, cpus_allowed);
++
++	/*
++	 * Since bandwidth control happens on root_domain basis,
++	 * if admission test is enabled, we only admit -deadline
++	 * tasks allowed to run on all the CPUs in the task's
++	 * root_domain.
++	 */
++#ifdef CONFIG_SMP
++	if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
++		rcu_read_lock();
++		if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) {
++			retval = -EBUSY;
++			rcu_read_unlock();
++			goto out_free_new_mask;
++		}
++		rcu_read_unlock();
++	}
++#endif
++again:
++	retval = __set_cpus_allowed_ptr(p, new_mask, true);
++
++	if (!retval) {
++		cpuset_cpus_allowed(p, cpus_allowed);
++		if (!cpumask_subset(new_mask, cpus_allowed)) {
++			/*
++			 * We must have raced with a concurrent cpuset
++			 * update. Just reset the cpus_allowed to the
++			 * cpuset's cpus_allowed
++			 */
++			cpumask_copy(new_mask, cpus_allowed);
++			goto again;
++		}
++	}
++out_free_new_mask:
++	free_cpumask_var(new_mask);
++out_free_cpus_allowed:
++	free_cpumask_var(cpus_allowed);
++out_put_task:
++	put_task_struct(p);
++	return retval;
++}
++
++static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
++			     struct cpumask *new_mask)
++{
++	if (len < cpumask_size())
++		cpumask_clear(new_mask);
++	else if (len > cpumask_size())
++		len = cpumask_size();
++
++	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
++}
++
++/**
++ * sys_sched_setaffinity - set the CPU affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to the new CPU mask
++ *
++ * Return: 0 on success. An error code otherwise.
++ */
++SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	cpumask_var_t new_mask;
++	int retval;
++
++	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
++	if (retval == 0)
++		retval = sched_setaffinity(pid, new_mask);
++	free_cpumask_var(new_mask);
++	return retval;
++}
++
++long sched_getaffinity(pid_t pid, struct cpumask *mask)
++{
++	struct task_struct *p;
++	unsigned long flags;
++	int retval;
++
++	rcu_read_lock();
++
++	retval = -ESRCH;
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++
++out_unlock:
++	rcu_read_unlock();
++
++	return retval;
++}
++
++/**
++ * sys_sched_getaffinity - get the CPU affinity of a process
++ * @pid: pid of the process
++ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
++ * @user_mask_ptr: user-space pointer to hold the current CPU mask
++ *
++ * Return: size of CPU mask copied to user_mask_ptr on success. An
++ * error code otherwise.
++ */
++SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
++		unsigned long __user *, user_mask_ptr)
++{
++	int ret;
++	cpumask_var_t mask;
++
++	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
++		return -EINVAL;
++	if (len & (sizeof(unsigned long)-1))
++		return -EINVAL;
++
++	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	ret = sched_getaffinity(pid, mask);
++	if (ret == 0) {
++		unsigned int retlen = min(len, cpumask_size());
++
++		if (copy_to_user(user_mask_ptr, mask, retlen))
++			ret = -EFAULT;
++		else
++			ret = retlen;
++	}
++	free_cpumask_var(mask);
++
++	return ret;
++}
++
++/**
++ * sys_sched_yield - yield the current processor to other threads.
++ *
++ * This function yields the current CPU to other tasks. If there are no
++ * other threads running on this CPU then this function will return.
++ *
++ * Return: 0.
++ */
++static void do_sched_yield(void)
++{
++	struct rq_flags rf;
++	struct rq *rq;
++
++	local_irq_disable();
++	rq = this_rq();
++	rq_lock(rq, &rf);
++
++	schedstat_inc(rq->yld_count);
++	current->sched_class->yield_task(rq);
++
++	/*
++	 * Since we are going to call schedule() anyway, there's
++	 * no need to preempt or enable interrupts:
++	 */
++	preempt_disable();
++	rq_unlock(rq, &rf);
++	sched_preempt_enable_no_resched();
++
++	schedule();
++}
++
++SYSCALL_DEFINE0(sched_yield)
++{
++	do_sched_yield();
++	return 0;
++}
++
++#ifndef CONFIG_PREEMPT
++int __sched _cond_resched(void)
++{
++	if (should_resched(0)) {
++		preempt_schedule_common();
++		return 1;
++	}
++	rcu_all_qs();
++	return 0;
++}
++EXPORT_SYMBOL(_cond_resched);
++#endif
++
++/*
++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
++ * call schedule, and on return reacquire the lock.
++ *
++ * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
++ * operations here to prevent schedule() from being called twice (once via
++ * spin_unlock(), once by hand).
++ */
++int __cond_resched_lock(spinlock_t *lock)
++{
++	int resched = should_resched(PREEMPT_LOCK_OFFSET);
++	int ret = 0;
++
++	lockdep_assert_held(lock);
++
++	if (spin_needbreak(lock) || resched) {
++		spin_unlock(lock);
++		if (resched)
++			preempt_schedule_common();
++		else
++			cpu_relax();
++		ret = 1;
++		spin_lock(lock);
++	}
++	return ret;
++}
++EXPORT_SYMBOL(__cond_resched_lock);
++
++/**
++ * yield - yield the current processor to other threads.
++ *
++ * Do not ever use this function, there's a 99% chance you're doing it wrong.
++ *
++ * The scheduler is at all times free to pick the calling task as the most
++ * eligible task to run, if removing the yield() call from your code breaks
++ * it, its already broken.
++ *
++ * Typical broken usage is:
++ *
++ * while (!event)
++ *	yield();
++ *
++ * where one assumes that yield() will let 'the other' process run that will
++ * make event true. If the current task is a SCHED_FIFO task that will never
++ * happen. Never use yield() as a progress guarantee!!
++ *
++ * If you want to use yield() to wait for something, use wait_event().
++ * If you want to use yield() to be 'nice' for others, use cond_resched().
++ * If you still want to use yield(), do not!
++ */
++void __sched yield(void)
++{
++	set_current_state(TASK_RUNNING);
++	do_sched_yield();
++}
++EXPORT_SYMBOL(yield);
++
++/**
++ * yield_to - yield the current processor to another thread in
++ * your thread group, or accelerate that thread toward the
++ * processor it's on.
++ * @p: target task
++ * @preempt: whether task preemption is allowed or not
++ *
++ * It's the caller's job to ensure that the target task struct
++ * can't go away on us before we can do any checks.
++ *
++ * Return:
++ *	true (>0) if we indeed boosted the target task.
++ *	false (0) if we failed to boost the target.
++ *	-ESRCH if there's no task to yield to.
++ */
++int __sched yield_to(struct task_struct *p, bool preempt)
++{
++	struct task_struct *curr = current;
++	struct rq *rq, *p_rq;
++	unsigned long flags;
++	int yielded = 0;
++
++	local_irq_save(flags);
++	rq = this_rq();
++
++again:
++	p_rq = task_rq(p);
++	/*
++	 * If we're the only runnable task on the rq and target rq also
++	 * has only one task, there's absolutely no point in yielding.
++	 */
++	if (rq->nr_running == 1 && p_rq->nr_running == 1) {
++		yielded = -ESRCH;
++		goto out_irq;
++	}
++
++	double_rq_lock(rq, p_rq);
++	if (task_rq(p) != p_rq) {
++		double_rq_unlock(rq, p_rq);
++		goto again;
++	}
++
++	if (!curr->sched_class->yield_to_task)
++		goto out_unlock;
++
++	if (curr->sched_class != p->sched_class)
++		goto out_unlock;
++
++	if (task_running(p_rq, p) || p->state)
++		goto out_unlock;
++
++	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
++	if (yielded) {
++		schedstat_inc(rq->yld_count);
++		/*
++		 * Make p's CPU reschedule; pick_next_entity takes care of
++		 * fairness.
++		 */
++		if (preempt && rq != p_rq)
++			resched_curr(p_rq);
++	}
++
++out_unlock:
++	double_rq_unlock(rq, p_rq);
++out_irq:
++	local_irq_restore(flags);
++
++	if (yielded > 0)
++		schedule();
++
++	return yielded;
++}
++EXPORT_SYMBOL_GPL(yield_to);
++
++int io_schedule_prepare(void)
++{
++	int old_iowait = current->in_iowait;
++
++	current->in_iowait = 1;
++	blk_schedule_flush_plug(current);
++
++	return old_iowait;
++}
++
++void io_schedule_finish(int token)
++{
++	current->in_iowait = token;
++}
++
++/*
++ * This task is about to go to sleep on IO. Increment rq->nr_iowait so
++ * that process accounting knows that this is a task in IO wait state.
++ */
++long __sched io_schedule_timeout(long timeout)
++{
++	int token;
++	long ret;
++
++	token = io_schedule_prepare();
++	ret = schedule_timeout(timeout);
++	io_schedule_finish(token);
++
++	return ret;
++}
++EXPORT_SYMBOL(io_schedule_timeout);
++
++void __sched io_schedule(void)
++{
++	int token;
++
++	token = io_schedule_prepare();
++	schedule();
++	io_schedule_finish(token);
++}
++EXPORT_SYMBOL(io_schedule);
++
++/**
++ * sys_sched_get_priority_max - return maximum RT priority.
++ * @policy: scheduling class.
++ *
++ * Return: On success, this syscall returns the maximum
++ * rt_priority that can be used by a given scheduling class.
++ * On failure, a negative error code is returned.
++ */
++SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = MAX_USER_RT_PRIO-1;
++		break;
++	case SCHED_DEADLINE:
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_IDLE:
++		ret = 0;
++		break;
++	}
++	return ret;
++}
++
++/**
++ * sys_sched_get_priority_min - return minimum RT priority.
++ * @policy: scheduling class.
++ *
++ * Return: On success, this syscall returns the minimum
++ * rt_priority that can be used by a given scheduling class.
++ * On failure, a negative error code is returned.
++ */
++SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
++{
++	int ret = -EINVAL;
++
++	switch (policy) {
++	case SCHED_FIFO:
++	case SCHED_RR:
++		ret = 1;
++		break;
++	case SCHED_DEADLINE:
++	case SCHED_NORMAL:
++	case SCHED_BATCH:
++	case SCHED_IDLE:
++		ret = 0;
++	}
++	return ret;
++}
++
++static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
++{
++	struct task_struct *p;
++	unsigned int time_slice;
++	struct rq_flags rf;
++	struct rq *rq;
++	int retval;
++
++	if (pid < 0)
++		return -EINVAL;
++
++	retval = -ESRCH;
++	rcu_read_lock();
++	p = find_process_by_pid(pid);
++	if (!p)
++		goto out_unlock;
++
++	retval = security_task_getscheduler(p);
++	if (retval)
++		goto out_unlock;
++
++	rq = task_rq_lock(p, &rf);
++	time_slice = 0;
++	if (p->sched_class->get_rr_interval)
++		time_slice = p->sched_class->get_rr_interval(rq, p);
++	task_rq_unlock(rq, p, &rf);
++
++	rcu_read_unlock();
++	jiffies_to_timespec64(time_slice, t);
++	return 0;
++
++out_unlock:
++	rcu_read_unlock();
++	return retval;
++}
++
++/**
++ * sys_sched_rr_get_interval - return the default timeslice of a process.
++ * @pid: pid of the process.
++ * @interval: userspace pointer to the timeslice value.
++ *
++ * this syscall writes the default timeslice value of a given process
++ * into the user-space timespec buffer. A value of '0' means infinity.
++ *
++ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
++ * an error code.
++ */
++SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
++		struct timespec __user *, interval)
++{
++	struct timespec64 t;
++	int retval = sched_rr_get_interval(pid, &t);
++
++	if (retval == 0)
++		retval = put_timespec64(&t, interval);
++
++	return retval;
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
++		       compat_pid_t, pid,
++		       struct compat_timespec __user *, interval)
++{
++	struct timespec64 t;
++	int retval = sched_rr_get_interval(pid, &t);
++
++	if (retval == 0)
++		retval = compat_put_timespec64(&t, interval);
++	return retval;
++}
++#endif
++
++void sched_show_task(struct task_struct *p)
++{
++	unsigned long free = 0;
++	int ppid;
++
++	if (!try_get_task_stack(p))
++		return;
++
++	printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p));
++
++	if (p->state == TASK_RUNNING)
++		printk(KERN_CONT "  running task    ");
++#ifdef CONFIG_DEBUG_STACK_USAGE
++	free = stack_not_used(p);
++#endif
++	ppid = 0;
++	rcu_read_lock();
++	if (pid_alive(p))
++		ppid = task_pid_nr(rcu_dereference(p->real_parent));
++	rcu_read_unlock();
++	printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
++		task_pid_nr(p), ppid,
++		(unsigned long)task_thread_info(p)->flags);
++
++	print_worker_info(KERN_INFO, p);
++	show_stack(p, NULL);
++	put_task_stack(p);
++}
++EXPORT_SYMBOL_GPL(sched_show_task);
++
++static inline bool
++state_filter_match(unsigned long state_filter, struct task_struct *p)
++{
++	/* no filter, everything matches */
++	if (!state_filter)
++		return true;
++
++	/* filter, but doesn't match */
++	if (!(p->state & state_filter))
++		return false;
++
++	/*
++	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
++	 * TASK_KILLABLE).
++	 */
++	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
++		return false;
++
++	return true;
++}
++
++
++void show_state_filter(unsigned long state_filter)
++{
++	struct task_struct *g, *p;
++
++#if BITS_PER_LONG == 32
++	printk(KERN_INFO
++		"  task                PC stack   pid father\n");
++#else
++	printk(KERN_INFO
++		"  task                        PC stack   pid father\n");
++#endif
++	rcu_read_lock();
++	for_each_process_thread(g, p) {
++		/*
++		 * reset the NMI-timeout, listing all files on a slow
++		 * console might take a lot of time:
++		 * Also, reset softlockup watchdogs on all CPUs, because
++		 * another CPU might be blocked waiting for us to process
++		 * an IPI.
++		 */
++		touch_nmi_watchdog();
++		touch_all_softlockup_watchdogs();
++		if (state_filter_match(state_filter, p))
++			sched_show_task(p);
++	}
++
++#ifdef CONFIG_SCHED_DEBUG
++	if (!state_filter)
++		sysrq_sched_debug_show();
++#endif
++	rcu_read_unlock();
++	/*
++	 * Only show locks if all tasks are dumped:
++	 */
++	if (!state_filter)
++		debug_show_all_locks();
++}
++
++/**
++ * init_idle - set up an idle thread for a given CPU
++ * @idle: task in question
++ * @cpu: CPU the idle task belongs to
++ *
++ * NOTE: this function does not set the idle thread's NEED_RESCHED
++ * flag, to make booting more robust.
++ */
++void init_idle(struct task_struct *idle, int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	__sched_fork(0, idle);
++
++	raw_spin_lock_irqsave(&idle->pi_lock, flags);
++	raw_spin_lock(&rq->lock);
++
++	idle->state = TASK_RUNNING;
++	idle->se.exec_start = sched_clock();
++	idle->flags |= PF_IDLE;
++
++	kasan_unpoison_task_stack(idle);
++
++#ifdef CONFIG_SMP
++	/*
++	 * Its possible that init_idle() gets called multiple times on a task,
++	 * in that case do_set_cpus_allowed() will not do the right thing.
++	 *
++	 * And since this is boot we can forgo the serialization.
++	 */
++	set_cpus_allowed_common(idle, cpumask_of(cpu));
++#endif
++	/*
++	 * We're having a chicken and egg problem, even though we are
++	 * holding rq->lock, the CPU isn't yet set to this CPU so the
++	 * lockdep check in task_group() will fail.
++	 *
++	 * Similar case to sched_fork(). / Alternatively we could
++	 * use task_rq_lock() here and obtain the other rq->lock.
++	 *
++	 * Silence PROVE_RCU
++	 */
++	rcu_read_lock();
++	__set_task_cpu(idle, cpu);
++	rcu_read_unlock();
++
++	rq->curr = rq->idle = idle;
++	idle->on_rq = TASK_ON_RQ_QUEUED;
++#ifdef CONFIG_SMP
++	idle->on_cpu = 1;
++#endif
++	raw_spin_unlock(&rq->lock);
++	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
++
++	/* Set the preempt count _outside_ the spinlocks! */
++	init_idle_preempt_count(idle, cpu);
++
++	/*
++	 * The idle tasks have their own, simple scheduling class:
++	 */
++	idle->sched_class = &idle_sched_class;
++	ftrace_graph_init_idle_task(idle, cpu);
++	vtime_init_idle(idle, cpu);
++#ifdef CONFIG_SMP
++	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
++#endif
++}
++
++#ifdef CONFIG_SMP
++
++int cpuset_cpumask_can_shrink(const struct cpumask *cur,
++			      const struct cpumask *trial)
++{
++	int ret = 1;
++
++	if (!cpumask_weight(cur))
++		return ret;
++
++	ret = dl_cpuset_cpumask_can_shrink(cur, trial);
++
++	return ret;
++}
++
++int task_can_attach(struct task_struct *p,
++		    const struct cpumask *cs_cpus_allowed)
++{
++	int ret = 0;
++
++	/*
++	 * Kthreads which disallow setaffinity shouldn't be moved
++	 * to a new cpuset; we don't want to change their CPU
++	 * affinity and isolating such threads by their set of
++	 * allowed nodes is unnecessary.  Thus, cpusets are not
++	 * applicable for such threads.  This prevents checking for
++	 * success of set_cpus_allowed_ptr() on all attached tasks
++	 * before cpus_allowed may be changed.
++	 */
++	if (p->flags & PF_NO_SETAFFINITY) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
++					      cs_cpus_allowed))
++		ret = dl_task_can_attach(p, cs_cpus_allowed);
++
++out:
++	return ret;
++}
++
++bool sched_smp_initialized __read_mostly;
++
++#ifdef CONFIG_NUMA_BALANCING
++/* Migrate current task p to target_cpu */
++int migrate_task_to(struct task_struct *p, int target_cpu)
++{
++	struct migration_arg arg = { p, target_cpu };
++	int curr_cpu = task_cpu(p);
++
++	if (curr_cpu == target_cpu)
++		return 0;
++
++	if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
++		return -EINVAL;
++
++	/* TODO: This is not properly updating schedstats */
++
++	trace_sched_move_numa(p, curr_cpu, target_cpu);
++	return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
++}
++
++/*
++ * Requeue a task on a given node and accurately track the number of NUMA
++ * tasks on the runqueues
++ */
++void sched_setnuma(struct task_struct *p, int nid)
++{
++	bool queued, running;
++	struct rq_flags rf;
++	struct rq *rq;
++
++	rq = task_rq_lock(p, &rf);
++	queued = task_on_rq_queued(p);
++	running = task_current(rq, p);
++
++	if (queued)
++		dequeue_task(rq, p, DEQUEUE_SAVE);
++	if (running)
++		put_prev_task(rq, p);
++
++	p->numa_preferred_nid = nid;
++
++	if (queued)
++		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
++	if (running)
++		set_curr_task(rq, p);
++	task_rq_unlock(rq, p, &rf);
++}
++#endif /* CONFIG_NUMA_BALANCING */
++
++#ifdef CONFIG_HOTPLUG_CPU
++/*
++ * Ensure that the idle task is using init_mm right before its CPU goes
++ * offline.
++ */
++void idle_task_exit(void)
++{
++	struct mm_struct *mm = current->active_mm;
++
++	BUG_ON(cpu_online(smp_processor_id()));
++	BUG_ON(current != this_rq()->idle);
++
++	if (mm != &init_mm) {
++		switch_mm(mm, &init_mm, current);
++		finish_arch_post_lock_switch();
++	}
++
++	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
++}
++
++/*
++ * Since this CPU is going 'away' for a while, fold any nr_active delta
++ * we might have. Assumes we're called after migrate_tasks() so that the
++ * nr_active count is stable. We need to take the teardown thread which
++ * is calling this into account, so we hand in adjust = 1 to the load
++ * calculation.
++ *
++ * Also see the comment "Global load-average calculations".
++ */
++static void calc_load_migrate(struct rq *rq)
++{
++	long delta = calc_load_fold_active(rq, 1);
++	if (delta)
++		atomic_long_add(delta, &calc_load_tasks);
++}
++
++static void put_prev_task_fake(struct rq *rq, struct task_struct *prev)
++{
++}
++
++static const struct sched_class fake_sched_class = {
++	.put_prev_task = put_prev_task_fake,
++};
++
++static struct task_struct fake_task = {
++	/*
++	 * Avoid pull_{rt,dl}_task()
++	 */
++	.prio = MAX_PRIO + 1,
++	.sched_class = &fake_sched_class,
++};
++
++/*
++ * Migrate all tasks from the rq, sleeping tasks will be migrated by
++ * try_to_wake_up()->select_task_rq().
++ *
++ * Called with rq->lock held even though we'er in stop_machine() and
++ * there's no concurrency possible, we hold the required locks anyway
++ * because of lock validation efforts.
++ */
++static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
++{
++	struct rq *rq = dead_rq;
++	struct task_struct *next, *stop = rq->stop;
++	struct rq_flags orf = *rf;
++	int dest_cpu;
++
++	/*
++	 * Fudge the rq selection such that the below task selection loop
++	 * doesn't get stuck on the currently eligible stop task.
++	 *
++	 * We're currently inside stop_machine() and the rq is either stuck
++	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
++	 * either way we should never end up calling schedule() until we're
++	 * done here.
++	 */
++	rq->stop = NULL;
++
++	/*
++	 * put_prev_task() and pick_next_task() sched
++	 * class method both need to have an up-to-date
++	 * value of rq->clock[_task]
++	 */
++	update_rq_clock(rq);
++
++	for (;;) {
++		/*
++		 * There's this thread running, bail when that's the only
++		 * remaining thread:
++		 */
++		if (rq->nr_running == 1)
++			break;
++
++		/*
++		 * pick_next_task() assumes pinned rq->lock:
++		 */
++		next = pick_next_task(rq, &fake_task, rf);
++		BUG_ON(!next);
++		put_prev_task(rq, next);
++
++		/*
++		 * Rules for changing task_struct::cpus_allowed are holding
++		 * both pi_lock and rq->lock, such that holding either
++		 * stabilizes the mask.
++		 *
++		 * Drop rq->lock is not quite as disastrous as it usually is
++		 * because !cpu_active at this point, which means load-balance
++		 * will not interfere. Also, stop-machine.
++		 */
++		rq_unlock(rq, rf);
++		raw_spin_lock(&next->pi_lock);
++		rq_relock(rq, rf);
++
++		/*
++		 * Since we're inside stop-machine, _nothing_ should have
++		 * changed the task, WARN if weird stuff happened, because in
++		 * that case the above rq->lock drop is a fail too.
++		 */
++		if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
++			raw_spin_unlock(&next->pi_lock);
++			continue;
++		}
++
++		/* Find suitable destination for @next, with force if needed. */
++		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
++		rq = __migrate_task(rq, rf, next, dest_cpu);
++		if (rq != dead_rq) {
++			rq_unlock(rq, rf);
++			rq = dead_rq;
++			*rf = orf;
++			rq_relock(rq, rf);
++		}
++		raw_spin_unlock(&next->pi_lock);
++	}
++
++	rq->stop = stop;
++}
++#endif /* CONFIG_HOTPLUG_CPU */
++
++void set_rq_online(struct rq *rq)
++{
++	if (!rq->online) {
++		const struct sched_class *class;
++
++		cpumask_set_cpu(rq->cpu, rq->rd->online);
++		rq->online = 1;
++
++		for_each_class(class) {
++			if (class->rq_online)
++				class->rq_online(rq);
++		}
++	}
++}
++
++void set_rq_offline(struct rq *rq)
++{
++	if (rq->online) {
++		const struct sched_class *class;
++
++		for_each_class(class) {
++			if (class->rq_offline)
++				class->rq_offline(rq);
++		}
++
++		cpumask_clear_cpu(rq->cpu, rq->rd->online);
++		rq->online = 0;
++	}
++}
++
++/*
++ * used to mark begin/end of suspend/resume:
++ */
++static int num_cpus_frozen;
++
++/*
++ * Update cpusets according to cpu_active mask.  If cpusets are
++ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
++ * around partition_sched_domains().
++ *
++ * If we come here as part of a suspend/resume, don't touch cpusets because we
++ * want to restore it back to its original state upon resume anyway.
++ */
++static void cpuset_cpu_active(void)
++{
++	if (cpuhp_tasks_frozen) {
++		/*
++		 * num_cpus_frozen tracks how many CPUs are involved in suspend
++		 * resume sequence. As long as this is not the last online
++		 * operation in the resume sequence, just build a single sched
++		 * domain, ignoring cpusets.
++		 */
++		partition_sched_domains(1, NULL, NULL);
++		if (--num_cpus_frozen)
++			return;
++		/*
++		 * This is the last CPU online operation. So fall through and
++		 * restore the original sched domains by considering the
++		 * cpuset configurations.
++		 */
++		cpuset_force_rebuild();
++	}
++	cpuset_update_active_cpus();
++}
++
++static int cpuset_cpu_inactive(unsigned int cpu)
++{
++	if (!cpuhp_tasks_frozen) {
++		if (dl_cpu_busy(cpu))
++			return -EBUSY;
++		cpuset_update_active_cpus();
++	} else {
++		num_cpus_frozen++;
++		partition_sched_domains(1, NULL, NULL);
++	}
++	return 0;
++}
++
++int sched_cpu_activate(unsigned int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct rq_flags rf;
++
++#ifdef CONFIG_SCHED_SMT
++	/*
++	 * When going up, increment the number of cores with SMT present.
++	 */
++	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++		static_branch_inc_cpuslocked(&sched_smt_present);
++#endif
++	set_cpu_active(cpu, true);
++
++	if (sched_smp_initialized) {
++		sched_domains_numa_masks_set(cpu);
++		cpuset_cpu_active();
++	}
++
++	/*
++	 * Put the rq online, if not already. This happens:
++	 *
++	 * 1) In the early boot process, because we build the real domains
++	 *    after all CPUs have been brought up.
++	 *
++	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
++	 *    domains.
++	 */
++	rq_lock_irqsave(rq, &rf);
++	if (rq->rd) {
++		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++		set_rq_online(rq);
++	}
++	rq_unlock_irqrestore(rq, &rf);
++
++	update_max_interval();
++
++	return 0;
++}
++
++int sched_cpu_deactivate(unsigned int cpu)
++{
++	int ret;
++
++	set_cpu_active(cpu, false);
++	/*
++	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
++	 * users of this state to go away such that all new such users will
++	 * observe it.
++	 *
++	 * Do sync before park smpboot threads to take care the rcu boost case.
++	 */
++	synchronize_rcu_mult(call_rcu, call_rcu_sched);
++
++#ifdef CONFIG_SCHED_SMT
++	/*
++	 * When going down, decrement the number of cores with SMT present.
++	 */
++	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
++		static_branch_dec_cpuslocked(&sched_smt_present);
++#endif
++
++	if (!sched_smp_initialized)
++		return 0;
++
++	ret = cpuset_cpu_inactive(cpu);
++	if (ret) {
++		set_cpu_active(cpu, true);
++		return ret;
++	}
++	sched_domains_numa_masks_clear(cpu);
++	return 0;
++}
++
++static void sched_rq_cpu_starting(unsigned int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++
++	rq->calc_load_update = calc_load_update;
++	update_max_interval();
++}
++
++int sched_cpu_starting(unsigned int cpu)
++{
++	sched_rq_cpu_starting(cpu);
++	sched_tick_start(cpu);
++	return 0;
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++int sched_cpu_dying(unsigned int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	struct rq_flags rf;
++
++	/* Handle pending wakeups and then migrate everything off */
++	sched_ttwu_pending();
++	sched_tick_stop(cpu);
++
++	rq_lock_irqsave(rq, &rf);
++	if (rq->rd) {
++		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
++		set_rq_offline(rq);
++	}
++	migrate_tasks(rq, &rf);
++	BUG_ON(rq->nr_running != 1);
++	rq_unlock_irqrestore(rq, &rf);
++
++	calc_load_migrate(rq);
++	update_max_interval();
++	nohz_balance_exit_idle(rq);
++	hrtick_clear(rq);
++	return 0;
++}
++#endif
++
++void __init sched_init_smp(void)
++{
++	sched_init_numa();
++
++	/*
++	 * There's no userspace yet to cause hotplug operations; hence all the
++	 * CPU masks are stable and all blatant races in the below code cannot
++	 * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
++	 * but there won't be any contention on it.
++	 */
++	cpus_read_lock();
++	mutex_lock(&sched_domains_mutex);
++	sched_init_domains(cpu_active_mask);
++	mutex_unlock(&sched_domains_mutex);
++	cpus_read_unlock();
++
++	/* Move init over to a non-isolated CPU */
++	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
++		BUG();
++	sched_init_granularity();
++
++	init_sched_rt_class();
++	init_sched_dl_class();
++
++	sched_smp_initialized = true;
++}
++
++static int __init migration_init(void)
++{
++	sched_rq_cpu_starting(smp_processor_id());
++	return 0;
++}
++early_initcall(migration_init);
++
++#else
++void __init sched_init_smp(void)
++{
++	sched_init_granularity();
++}
++#endif /* CONFIG_SMP */
++
++int in_sched_functions(unsigned long addr)
++{
++	return in_lock_functions(addr) ||
++		(addr >= (unsigned long)__sched_text_start
++		&& addr < (unsigned long)__sched_text_end);
++}
++
++#ifdef CONFIG_CGROUP_SCHED
++/*
++ * Default task group.
++ * Every task in system belongs to this group at bootup.
++ */
++struct task_group root_task_group;
++LIST_HEAD(task_groups);
++
++/* Cacheline aligned slab cache for task_group */
++static struct kmem_cache *task_group_cache __read_mostly;
++#endif
++
++DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
++DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
++
++static __init void rq_cputime_init(void)
++{
++	int cpu;
++	struct rq_cputime *rq_cputime;
++
++	for_each_possible_cpu(cpu) {
++		rq_cputime = &per_cpu(rq_cputimes, cpu);
++		raw_spin_lock_init(&rq_cputime->lock);
++	}
++}
++
++void __init sched_init(void)
++{
++	int i, j;
++	unsigned long alloc_size = 0, ptr;
++
++	wait_bit_init();
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
++#endif
++#ifdef CONFIG_RT_GROUP_SCHED
++	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
++#endif
++	if (alloc_size) {
++		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++		root_task_group.se = (struct sched_entity **)ptr;
++		ptr += nr_cpu_ids * sizeof(void **);
++
++		root_task_group.cfs_rq = (struct cfs_rq **)ptr;
++		ptr += nr_cpu_ids * sizeof(void **);
++
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++#ifdef CONFIG_RT_GROUP_SCHED
++		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
++		ptr += nr_cpu_ids * sizeof(void **);
++
++		root_task_group.rt_rq = (struct rt_rq **)ptr;
++		ptr += nr_cpu_ids * sizeof(void **);
++
++#endif /* CONFIG_RT_GROUP_SCHED */
++	}
++#ifdef CONFIG_CPUMASK_OFFSTACK
++	for_each_possible_cpu(i) {
++		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
++			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
++		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
++			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
++	}
++#endif /* CONFIG_CPUMASK_OFFSTACK */
++
++	init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
++	init_dl_bandwidth(&def_dl_bandwidth, global_rt_period(), global_rt_runtime());
++
++#ifdef CONFIG_SMP
++	init_defrootdomain();
++#endif
++
++#ifdef CONFIG_RT_GROUP_SCHED
++	init_rt_bandwidth(&root_task_group.rt_bandwidth,
++			global_rt_period(), global_rt_runtime());
++#endif /* CONFIG_RT_GROUP_SCHED */
++
++#ifdef CONFIG_CGROUP_SCHED
++	task_group_cache = KMEM_CACHE(task_group, 0);
++
++	list_add(&root_task_group.list, &task_groups);
++	INIT_LIST_HEAD(&root_task_group.children);
++	INIT_LIST_HEAD(&root_task_group.siblings);
++	autogroup_init(&init_task);
++#endif /* CONFIG_CGROUP_SCHED */
++
++	for_each_possible_cpu(i) {
++		struct rq *rq;
++
++		rq = cpu_rq(i);
++		raw_spin_lock_init(&rq->lock);
++		rq->nr_running = 0;
++		rq->calc_load_active = 0;
++		rq->calc_load_update = jiffies + LOAD_FREQ;
++		init_cfs_rq(&rq->cfs);
++		init_rt_rq(&rq->rt);
++		init_dl_rq(&rq->dl);
++#ifdef CONFIG_FAIR_GROUP_SCHED
++		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
++		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
++		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
++		/*
++		 * How much CPU bandwidth does root_task_group get?
++		 *
++		 * In case of task-groups formed thr' the cgroup filesystem, it
++		 * gets 100% of the CPU resources in the system. This overall
++		 * system CPU resource is divided among the tasks of
++		 * root_task_group and its child task-groups in a fair manner,
++		 * based on each entity's (task or task-group's) weight
++		 * (se->load.weight).
++		 *
++		 * In other words, if root_task_group has 10 tasks of weight
++		 * 1024) and two child groups A0 and A1 (of weight 1024 each),
++		 * then A0's share of the CPU resource is:
++		 *
++		 *	A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33%
++		 *
++		 * We achieve this by letting root_task_group's tasks sit
++		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
++		 */
++		init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
++		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++
++		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
++#ifdef CONFIG_RT_GROUP_SCHED
++		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
++#endif
++
++		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
++			rq->cpu_load[j] = 0;
++
++#ifdef CONFIG_SMP
++		rq->sd = NULL;
++		rq->rd = NULL;
++		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
++		rq->balance_callback = NULL;
++		rq->active_balance = 0;
++		rq->next_balance = jiffies;
++		rq->push_cpu = 0;
++		rq->cpu = i;
++		rq->online = 0;
++		rq->idle_stamp = 0;
++		rq->avg_idle = 2*sysctl_sched_migration_cost;
++		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
++
++		INIT_LIST_HEAD(&rq->cfs_tasks);
++
++		rq_attach_root(rq, &def_root_domain);
++#ifdef CONFIG_NO_HZ_COMMON
++		rq->last_load_update_tick = jiffies;
++		rq->last_blocked_load_update_tick = jiffies;
++		atomic_set(&rq->nohz_flags, 0);
++#endif
++#endif /* CONFIG_SMP */
++		hrtick_rq_init(rq);
++		atomic_set(&rq->nr_iowait, 0);
++	}
++
++	set_load_weight(&init_task, false);
++
++	/*
++	 * The boot idle thread does lazy MMU switching as well:
++	 */
++	mmgrab(&init_mm);
++	enter_lazy_tlb(&init_mm, current);
++
++	/*
++	 * Make us the idle thread. Technically, schedule() should not be
++	 * called from this thread, however somewhere below it might be,
++	 * but because we are the idle thread, we just pick up running again
++	 * when this runqueue becomes "idle".
++	 */
++	init_idle(current, smp_processor_id());
++
++	calc_load_update = jiffies + LOAD_FREQ;
++
++#ifdef CONFIG_SMP
++	idle_thread_set_boot_cpu();
++#endif
++	init_sched_fair_class();
++
++	init_schedstats();
++
++	if (use_sched_idle_time)
++		rq_cputime_init();
++
++	scheduler_running = 1;
++}
++
++#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
++static inline int preempt_count_equals(int preempt_offset)
++{
++	int nested = preempt_count() + rcu_preempt_depth();
++
++	return (nested == preempt_offset);
++}
++
++void __might_sleep(const char *file, int line, int preempt_offset)
++{
++	/*
++	 * Blocking primitives will set (and therefore destroy) current->state,
++	 * since we will exit with TASK_RUNNING make sure we enter with it,
++	 * otherwise we will destroy state.
++	 */
++	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
++			"do not call blocking ops when !TASK_RUNNING; "
++			"state=%lx set at [<%p>] %pS\n",
++			current->state,
++			(void *)current->task_state_change,
++			(void *)current->task_state_change);
++
++	___might_sleep(file, line, preempt_offset);
++}
++EXPORT_SYMBOL(__might_sleep);
++
++void ___might_sleep(const char *file, int line, int preempt_offset)
++{
++	/* Ratelimiting timestamp: */
++	static unsigned long prev_jiffy;
++
++	unsigned long preempt_disable_ip;
++
++	/* WARN_ON_ONCE() by default, no rate limit required: */
++	rcu_sleep_check();
++
++	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
++	     !is_idle_task(current)) ||
++	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
++	    oops_in_progress)
++		return;
++
++	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
++		return;
++	prev_jiffy = jiffies;
++
++	/* Save this before calling printk(), since that will clobber it: */
++	preempt_disable_ip = get_preempt_disable_ip(current);
++
++	printk(KERN_ERR
++		"BUG: sleeping function called from invalid context at %s:%d\n",
++			file, line);
++	printk(KERN_ERR
++		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
++			in_atomic(), irqs_disabled(),
++			current->pid, current->comm);
++
++	if (task_stack_end_corrupted(current))
++		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
++
++	debug_show_held_locks(current);
++	if (irqs_disabled())
++		print_irqtrace_events(current);
++	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
++	    && !preempt_count_equals(preempt_offset)) {
++		pr_err("Preemption disabled at:");
++		print_ip_sym(preempt_disable_ip);
++		pr_cont("\n");
++	}
++	dump_stack();
++	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
++}
++EXPORT_SYMBOL(___might_sleep);
++#endif
++
++#ifdef CONFIG_MAGIC_SYSRQ
++void normalize_rt_tasks(void)
++{
++	struct task_struct *g, *p;
++	struct sched_attr attr = {
++		.sched_policy = SCHED_NORMAL,
++	};
++
++	read_lock(&tasklist_lock);
++	for_each_process_thread(g, p) {
++		/*
++		 * Only normalize user tasks:
++		 */
++		if (p->flags & PF_KTHREAD)
++			continue;
++
++		p->se.exec_start = 0;
++		schedstat_set(p->se.statistics.wait_start,  0);
++		schedstat_set(p->se.statistics.sleep_start, 0);
++		schedstat_set(p->se.statistics.block_start, 0);
++
++		if (!dl_task(p) && !rt_task(p)) {
++			/*
++			 * Renice negative nice level userspace
++			 * tasks back to 0:
++			 */
++			if (task_nice(p) < 0)
++				set_user_nice(p, 0);
++			continue;
++		}
++
++		__sched_setscheduler(p, &attr, false, false);
++	}
++	read_unlock(&tasklist_lock);
++}
++
++#endif /* CONFIG_MAGIC_SYSRQ */
++
++#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
++/*
++ * These functions are only useful for the IA64 MCA handling, or kdb.
++ *
++ * They can only be called when the whole system has been
++ * stopped - every CPU needs to be quiescent, and no scheduling
++ * activity can take place. Using them for anything else would
++ * be a serious bug, and as a result, they aren't even visible
++ * under any other configuration.
++ */
++
++/**
++ * curr_task - return the current task for a given CPU.
++ * @cpu: the processor in question.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ *
++ * Return: The current task for @cpu.
++ */
++struct task_struct *curr_task(int cpu)
++{
++	return cpu_curr(cpu);
++}
++
++#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
++
++#ifdef CONFIG_IA64
++/**
++ * set_curr_task - set the current task for a given CPU.
++ * @cpu: the processor in question.
++ * @p: the task pointer to set.
++ *
++ * Description: This function must only be used when non-maskable interrupts
++ * are serviced on a separate stack. It allows the architecture to switch the
++ * notion of the current task on a CPU in a non-blocking manner. This function
++ * must be called with all CPU's synchronized, and interrupts disabled, the
++ * and caller must save the original value of the current task (see
++ * curr_task() above) and restore that value before reenabling interrupts and
++ * re-starting the system.
++ *
++ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
++ */
++void ia64_set_curr_task(int cpu, struct task_struct *p)
++{
++	cpu_curr(cpu) = p;
++}
++
++#endif
++
++#ifdef CONFIG_CGROUP_SCHED
++/* task_group_lock serializes the addition/removal of task groups */
++static DEFINE_SPINLOCK(task_group_lock);
++
++static void sched_free_group(struct task_group *tg)
++{
++	free_fair_sched_group(tg);
++	free_rt_sched_group(tg);
++	autogroup_free(tg);
++	kmem_cache_free(task_group_cache, tg);
++}
++
++/* allocate runqueue etc for a new task group */
++struct task_group *sched_create_group(struct task_group *parent)
++{
++	struct task_group *tg;
++
++	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
++	if (!tg)
++		return ERR_PTR(-ENOMEM);
++
++	if (!alloc_fair_sched_group(tg, parent))
++		goto err;
++
++	if (!alloc_rt_sched_group(tg, parent))
++		goto err;
++
++	return tg;
++
++err:
++	sched_free_group(tg);
++	return ERR_PTR(-ENOMEM);
++}
++
++void sched_online_group(struct task_group *tg, struct task_group *parent)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&task_group_lock, flags);
++	list_add_rcu(&tg->list, &task_groups);
++
++	/* Root should already exist: */
++	WARN_ON(!parent);
++
++	tg->parent = parent;
++	INIT_LIST_HEAD(&tg->children);
++	list_add_rcu(&tg->siblings, &parent->children);
++	spin_unlock_irqrestore(&task_group_lock, flags);
++
++	online_fair_sched_group(tg);
++}
++
++/* rcu callback to free various structures associated with a task group */
++static void sched_free_group_rcu(struct rcu_head *rhp)
++{
++	/* Now it should be safe to free those cfs_rqs: */
++	sched_free_group(container_of(rhp, struct task_group, rcu));
++}
++
++void sched_destroy_group(struct task_group *tg)
++{
++	/* Wait for possible concurrent references to cfs_rqs complete: */
++	call_rcu(&tg->rcu, sched_free_group_rcu);
++}
++
++void sched_offline_group(struct task_group *tg)
++{
++	unsigned long flags;
++
++	/* End participation in shares distribution: */
++	unregister_fair_sched_group(tg);
++
++	spin_lock_irqsave(&task_group_lock, flags);
++	list_del_rcu(&tg->list);
++	list_del_rcu(&tg->siblings);
++	spin_unlock_irqrestore(&task_group_lock, flags);
++}
++
++static void sched_change_group(struct task_struct *tsk, int type)
++{
++	struct task_group *tg;
++
++	/*
++	 * All callers are synchronized by task_rq_lock(); we do not use RCU
++	 * which is pointless here. Thus, we pass "true" to task_css_check()
++	 * to prevent lockdep warnings.
++	 */
++	tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
++			  struct task_group, css);
++	tg = autogroup_task_group(tsk, tg);
++	tsk->sched_task_group = tg;
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	if (tsk->sched_class->task_change_group)
++		tsk->sched_class->task_change_group(tsk, type);
++	else
++#endif
++		set_task_rq(tsk, task_cpu(tsk));
++}
++
++/*
++ * Change task's runqueue when it moves between groups.
++ *
++ * The caller of this function should have put the task in its new group by
++ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
++ * its new group.
++ */
++void sched_move_task(struct task_struct *tsk)
++{
++	int queued, running, queue_flags =
++		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
++	struct rq_flags rf;
++	struct rq *rq;
++
++	rq = task_rq_lock(tsk, &rf);
++	update_rq_clock(rq);
++
++	running = task_current(rq, tsk);
++	queued = task_on_rq_queued(tsk);
++
++	if (queued)
++		dequeue_task(rq, tsk, queue_flags);
++	if (running)
++		put_prev_task(rq, tsk);
++
++	sched_change_group(tsk, TASK_MOVE_GROUP);
++
++	if (queued)
++		enqueue_task(rq, tsk, queue_flags);
++	if (running)
++		set_curr_task(rq, tsk);
++
++	task_rq_unlock(rq, tsk, &rf);
++}
++
++static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
++{
++	return css ? container_of(css, struct task_group, css) : NULL;
++}
++
++static struct cgroup_subsys_state *
++cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
++{
++	struct task_group *parent = css_tg(parent_css);
++	struct task_group *tg;
++
++	if (!parent) {
++		/* This is early initialization for the top cgroup */
++		return &root_task_group.css;
++	}
++
++	tg = sched_create_group(parent);
++	if (IS_ERR(tg))
++		return ERR_PTR(-ENOMEM);
++
++	return &tg->css;
++}
++
++/* Expose task group only after completing cgroup initialization */
++static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
++{
++	struct task_group *tg = css_tg(css);
++	struct task_group *parent = css_tg(css->parent);
++
++	if (parent)
++		sched_online_group(tg, parent);
++	return 0;
++}
++
++static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
++{
++	struct task_group *tg = css_tg(css);
++
++	sched_offline_group(tg);
++}
++
++static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
++{
++	struct task_group *tg = css_tg(css);
++
++	/*
++	 * Relies on the RCU grace period between css_released() and this.
++	 */
++	sched_free_group(tg);
++}
++
++/*
++ * This is called before wake_up_new_task(), therefore we really only
++ * have to set its group bits, all the other stuff does not apply.
++ */
++static void cpu_cgroup_fork(struct task_struct *task)
++{
++	struct rq_flags rf;
++	struct rq *rq;
++
++	rq = task_rq_lock(task, &rf);
++
++	update_rq_clock(rq);
++	sched_change_group(task, TASK_SET_GROUP);
++
++	task_rq_unlock(rq, task, &rf);
++}
++
++static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
++{
++	struct task_struct *task;
++	struct cgroup_subsys_state *css;
++	int ret = 0;
++
++	cgroup_taskset_for_each(task, css, tset) {
++#ifdef CONFIG_RT_GROUP_SCHED
++		if (!sched_rt_can_attach(css_tg(css), task))
++			return -EINVAL;
++#endif
++		/*
++		 * Serialize against wake_up_new_task() such that if its
++		 * running, we're sure to observe its full state.
++		 */
++		raw_spin_lock_irq(&task->pi_lock);
++		/*
++		 * Avoid calling sched_move_task() before wake_up_new_task()
++		 * has happened. This would lead to problems with PELT, due to
++		 * move wanting to detach+attach while we're not attached yet.
++		 */
++		if (task->state == TASK_NEW)
++			ret = -EINVAL;
++		raw_spin_unlock_irq(&task->pi_lock);
++
++		if (ret)
++			break;
++	}
++	return ret;
++}
++
++static void cpu_cgroup_attach(struct cgroup_taskset *tset)
++{
++	struct task_struct *task;
++	struct cgroup_subsys_state *css;
++
++	cgroup_taskset_for_each(task, css, tset)
++		sched_move_task(task);
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cftype, u64 shareval)
++{
++	if (shareval > scale_load_down(ULONG_MAX))
++		shareval = MAX_SHARES;
++	return sched_group_set_shares(css_tg(css), scale_load(shareval));
++}
++
++static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++
++	return (u64) scale_load_down(tg->shares);
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static DEFINE_MUTEX(cfs_constraints_mutex);
++
++const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
++const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
++
++static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
++
++static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
++{
++	int i, ret = 0, runtime_enabled, runtime_was_enabled;
++	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++
++	if (tg == &root_task_group)
++		return -EINVAL;
++
++	/*
++	 * Ensure we have at some amount of bandwidth every period.  This is
++	 * to prevent reaching a state of large arrears when throttled via
++	 * entity_tick() resulting in prolonged exit starvation.
++	 */
++	if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
++		return -EINVAL;
++
++	/*
++	 * Likewise, bound things on the otherside by preventing insane quota
++	 * periods.  This also allows us to normalize in computing quota
++	 * feasibility.
++	 */
++	if (period > max_cfs_quota_period)
++		return -EINVAL;
++
++	/*
++	 * Prevent race between setting of cfs_rq->runtime_enabled and
++	 * unthrottle_offline_cfs_rqs().
++	 */
++	get_online_cpus();
++	mutex_lock(&cfs_constraints_mutex);
++	ret = __cfs_schedulable(tg, period, quota);
++	if (ret)
++		goto out_unlock;
++
++	runtime_enabled = quota != RUNTIME_INF;
++	runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
++	/*
++	 * If we need to toggle cfs_bandwidth_used, off->on must occur
++	 * before making related changes, and on->off must occur afterwards
++	 */
++	if (runtime_enabled && !runtime_was_enabled)
++		cfs_bandwidth_usage_inc();
++	raw_spin_lock_irq(&cfs_b->lock);
++	cfs_b->period = ns_to_ktime(period);
++	cfs_b->quota = quota;
++
++	__refill_cfs_bandwidth_runtime(cfs_b);
++
++	/* Restart the period timer (if active) to handle new period expiry: */
++	if (runtime_enabled)
++		start_cfs_bandwidth(cfs_b);
++
++	raw_spin_unlock_irq(&cfs_b->lock);
++
++	for_each_online_cpu(i) {
++		struct cfs_rq *cfs_rq = tg->cfs_rq[i];
++		struct rq *rq = cfs_rq->rq;
++		struct rq_flags rf;
++
++		rq_lock_irq(rq, &rf);
++		cfs_rq->runtime_enabled = runtime_enabled;
++		cfs_rq->runtime_remaining = 0;
++
++		if (cfs_rq->throttled)
++			unthrottle_cfs_rq(cfs_rq);
++		rq_unlock_irq(rq, &rf);
++	}
++	if (runtime_was_enabled && !runtime_enabled)
++		cfs_bandwidth_usage_dec();
++out_unlock:
++	mutex_unlock(&cfs_constraints_mutex);
++	put_online_cpus();
++
++	return ret;
++}
++
++int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
++{
++	u64 quota, period;
++
++	period = ktime_to_ns(tg->cfs_bandwidth.period);
++	if (cfs_quota_us < 0)
++		quota = RUNTIME_INF;
++	else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
++		quota = (u64)cfs_quota_us * NSEC_PER_USEC;
++	else
++		return -EINVAL;
++
++	return tg_set_cfs_bandwidth(tg, period, quota);
++}
++
++long tg_get_cfs_quota(struct task_group *tg)
++{
++	u64 quota_us;
++
++	if (tg->cfs_bandwidth.quota == RUNTIME_INF)
++		return -1;
++
++	quota_us = tg->cfs_bandwidth.quota;
++	do_div(quota_us, NSEC_PER_USEC);
++
++	return quota_us;
++}
++
++int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
++{
++	u64 quota, period;
++
++	if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
++		return -EINVAL;
++
++	period = (u64)cfs_period_us * NSEC_PER_USEC;
++	quota = tg->cfs_bandwidth.quota;
++
++	return tg_set_cfs_bandwidth(tg, period, quota);
++}
++
++long tg_get_cfs_period(struct task_group *tg)
++{
++	u64 cfs_period_us;
++
++	cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period);
++	do_div(cfs_period_us, NSEC_PER_USEC);
++
++	return cfs_period_us;
++}
++
++static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
++				  struct cftype *cft)
++{
++	return tg_get_cfs_quota(css_tg(css));
++}
++
++static int cpu_cfs_quota_write_s64(struct cgroup_subsys_state *css,
++				   struct cftype *cftype, s64 cfs_quota_us)
++{
++	return tg_set_cfs_quota(css_tg(css), cfs_quota_us);
++}
++
++static u64 cpu_cfs_period_read_u64(struct cgroup_subsys_state *css,
++				   struct cftype *cft)
++{
++	return tg_get_cfs_period(css_tg(css));
++}
++
++static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
++				    struct cftype *cftype, u64 cfs_period_us)
++{
++	return tg_set_cfs_period(css_tg(css), cfs_period_us);
++}
++
++struct cfs_schedulable_data {
++	struct task_group *tg;
++	u64 period, quota;
++};
++
++/*
++ * normalize group quota/period to be quota/max_period
++ * note: units are usecs
++ */
++static u64 normalize_cfs_quota(struct task_group *tg,
++			       struct cfs_schedulable_data *d)
++{
++	u64 quota, period;
++
++	if (tg == d->tg) {
++		period = d->period;
++		quota = d->quota;
++	} else {
++		period = tg_get_cfs_period(tg);
++		quota = tg_get_cfs_quota(tg);
++	}
++
++	/* note: these should typically be equivalent */
++	if (quota == RUNTIME_INF || quota == -1)
++		return RUNTIME_INF;
++
++	return to_ratio(period, quota);
++}
++
++static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
++{
++	struct cfs_schedulable_data *d = data;
++	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++	s64 quota = 0, parent_quota = -1;
++
++	if (!tg->parent) {
++		quota = RUNTIME_INF;
++	} else {
++		struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
++
++		quota = normalize_cfs_quota(tg, d);
++		parent_quota = parent_b->hierarchical_quota;
++
++		/*
++		 * Ensure max(child_quota) <= parent_quota.  On cgroup2,
++		 * always take the min.  On cgroup1, only inherit when no
++		 * limit is set:
++		 */
++		if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
++			quota = min(quota, parent_quota);
++		} else {
++			if (quota == RUNTIME_INF)
++				quota = parent_quota;
++			else if (parent_quota != RUNTIME_INF && quota > parent_quota)
++				return -EINVAL;
++		}
++	}
++	cfs_b->hierarchical_quota = quota;
++
++	return 0;
++}
++
++static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
++{
++	int ret;
++	struct cfs_schedulable_data data = {
++		.tg = tg,
++		.period = period,
++		.quota = quota,
++	};
++
++	if (quota != RUNTIME_INF) {
++		do_div(data.period, NSEC_PER_USEC);
++		do_div(data.quota, NSEC_PER_USEC);
++	}
++
++	rcu_read_lock();
++	ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
++	rcu_read_unlock();
++
++	return ret;
++}
++
++static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++
++	seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
++	seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
++	seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
++
++	if (schedstat_enabled() && tg != &root_task_group) {
++		u64 ws = 0;
++		int i;
++
++		for_each_possible_cpu(i)
++			ws += schedstat_val(tg->se[i]->statistics.wait_sum);
++
++		seq_printf(sf, "wait_sum %llu\n", ws);
++	}
++
++	return 0;
++}
++#endif /* CONFIG_CFS_BANDWIDTH */
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++
++#ifdef CONFIG_RT_GROUP_SCHED
++static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
++				struct cftype *cft, s64 val)
++{
++	return sched_group_set_rt_runtime(css_tg(css), val);
++}
++
++static s64 cpu_rt_runtime_read(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	return sched_group_rt_runtime(css_tg(css));
++}
++
++static int cpu_rt_period_write_uint(struct cgroup_subsys_state *css,
++				    struct cftype *cftype, u64 rt_period_us)
++{
++	return sched_group_set_rt_period(css_tg(css), rt_period_us);
++}
++
++static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
++				   struct cftype *cft)
++{
++	return sched_group_rt_period(css_tg(css));
++}
++#endif /* CONFIG_RT_GROUP_SCHED */
++
++static struct cftype cpu_legacy_files[] = {
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "shares",
++		.read_u64 = cpu_shares_read_u64,
++		.write_u64 = cpu_shares_write_u64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "cfs_quota_us",
++		.read_s64 = cpu_cfs_quota_read_s64,
++		.write_s64 = cpu_cfs_quota_write_s64,
++	},
++	{
++		.name = "cfs_period_us",
++		.read_u64 = cpu_cfs_period_read_u64,
++		.write_u64 = cpu_cfs_period_write_u64,
++	},
++	{
++		.name = "stat",
++		.seq_show = cpu_cfs_stat_show,
++	},
++#endif
++#ifdef CONFIG_RT_GROUP_SCHED
++	{
++		.name = "rt_runtime_us",
++		.read_s64 = cpu_rt_runtime_read,
++		.write_s64 = cpu_rt_runtime_write,
++	},
++	{
++		.name = "rt_period_us",
++		.read_u64 = cpu_rt_period_read_uint,
++		.write_u64 = cpu_rt_period_write_uint,
++	},
++#endif
++	{ }	/* Terminate */
++};
++
++static int cpu_extra_stat_show(struct seq_file *sf,
++			       struct cgroup_subsys_state *css)
++{
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		struct task_group *tg = css_tg(css);
++		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
++		u64 throttled_usec;
++
++		throttled_usec = cfs_b->throttled_time;
++		do_div(throttled_usec, NSEC_PER_USEC);
++
++		seq_printf(sf, "nr_periods %d\n"
++			   "nr_throttled %d\n"
++			   "throttled_usec %llu\n",
++			   cfs_b->nr_periods, cfs_b->nr_throttled,
++			   throttled_usec);
++	}
++#endif
++	return 0;
++}
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
++			       struct cftype *cft)
++{
++	struct task_group *tg = css_tg(css);
++	u64 weight = scale_load_down(tg->shares);
++
++	return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
++}
++
++static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
++				struct cftype *cft, u64 weight)
++{
++	/*
++	 * cgroup weight knobs should use the common MIN, DFL and MAX
++	 * values which are 1, 100 and 10000 respectively.  While it loses
++	 * a bit of range on both ends, it maps pretty well onto the shares
++	 * value used by scheduler and the round-trip conversions preserve
++	 * the original value over the entire range.
++	 */
++	if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
++		return -ERANGE;
++
++	weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++
++static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
++				    struct cftype *cft)
++{
++	unsigned long weight = scale_load_down(css_tg(css)->shares);
++	int last_delta = INT_MAX;
++	int prio, delta;
++
++	/* find the closest nice value to the current weight */
++	for (prio = 0; prio < ARRAY_SIZE(sched_prio_to_weight); prio++) {
++		delta = abs(sched_prio_to_weight[prio] - weight);
++		if (delta >= last_delta)
++			break;
++		last_delta = delta;
++	}
++
++	return PRIO_TO_NICE(prio - 1 + MAX_RT_PRIO);
++}
++
++static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
++				     struct cftype *cft, s64 nice)
++{
++	unsigned long weight;
++	int idx;
++
++	if (nice < MIN_NICE || nice > MAX_NICE)
++		return -ERANGE;
++
++	idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
++	idx = array_index_nospec(idx, 40);
++	weight = sched_prio_to_weight[idx];
++
++	return sched_group_set_shares(css_tg(css), scale_load(weight));
++}
++#endif
++
++static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
++						  long period, long quota)
++{
++	if (quota < 0)
++		seq_puts(sf, "max");
++	else
++		seq_printf(sf, "%ld", quota);
++
++	seq_printf(sf, " %ld\n", period);
++}
++
++/* caller should put the current value in *@periodp before calling */
++static int __maybe_unused cpu_period_quota_parse(char *buf,
++						 u64 *periodp, u64 *quotap)
++{
++	char tok[21];	/* U64_MAX */
++
++	if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
++		return -EINVAL;
++
++	*periodp *= NSEC_PER_USEC;
++
++	if (sscanf(tok, "%llu", quotap))
++		*quotap *= NSEC_PER_USEC;
++	else if (!strcmp(tok, "max"))
++		*quotap = RUNTIME_INF;
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++#ifdef CONFIG_CFS_BANDWIDTH
++static int cpu_max_show(struct seq_file *sf, void *v)
++{
++	struct task_group *tg = css_tg(seq_css(sf));
++
++	cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
++	return 0;
++}
++
++static ssize_t cpu_max_write(struct kernfs_open_file *of,
++			     char *buf, size_t nbytes, loff_t off)
++{
++	struct task_group *tg = css_tg(of_css(of));
++	u64 period = tg_get_cfs_period(tg);
++	u64 quota;
++	int ret;
++
++	ret = cpu_period_quota_parse(buf, &period, &quota);
++	if (!ret)
++		ret = tg_set_cfs_bandwidth(tg, period, quota);
++	return ret ?: nbytes;
++}
++#endif
++
++static struct cftype cpu_files[] = {
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	{
++		.name = "weight",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_u64 = cpu_weight_read_u64,
++		.write_u64 = cpu_weight_write_u64,
++	},
++	{
++		.name = "weight.nice",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.read_s64 = cpu_weight_nice_read_s64,
++		.write_s64 = cpu_weight_nice_write_s64,
++	},
++#endif
++#ifdef CONFIG_CFS_BANDWIDTH
++	{
++		.name = "max",
++		.flags = CFTYPE_NOT_ON_ROOT,
++		.seq_show = cpu_max_show,
++		.write = cpu_max_write,
++	},
++#endif
++	{ }	/* terminate */
++};
++
++struct cgroup_subsys cpu_cgrp_subsys = {
++	.css_alloc	= cpu_cgroup_css_alloc,
++	.css_online	= cpu_cgroup_css_online,
++	.css_released	= cpu_cgroup_css_released,
++	.css_free	= cpu_cgroup_css_free,
++	.css_extra_stat_show = cpu_extra_stat_show,
++	.fork		= cpu_cgroup_fork,
++	.can_attach	= cpu_cgroup_can_attach,
++	.attach		= cpu_cgroup_attach,
++	.legacy_cftypes	= cpu_legacy_files,
++	.dfl_cftypes	= cpu_files,
++	.early_init	= true,
++	.threaded	= true,
++};
++
++#endif	/* CONFIG_CGROUP_SCHED */
++
++void dump_cpu_task(int cpu)
++{
++	pr_info("Task dump for CPU %d:\n", cpu);
++	sched_show_task(cpu_curr(cpu));
++}
++
++/*
++ * Nice levels are multiplicative, with a gentle 10% change for every
++ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
++ * nice 1, it will get ~10% less CPU time than another CPU-bound task
++ * that remained on nice 0.
++ *
++ * The "10% effect" is relative and cumulative: from _any_ nice level,
++ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
++ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
++ * If a task goes up by ~10% and another task goes down by ~10% then
++ * the relative distance between them is ~25%.)
++ */
++const int sched_prio_to_weight[40] = {
++ /* -20 */     88761,     71755,     56483,     46273,     36291,
++ /* -15 */     29154,     23254,     18705,     14949,     11916,
++ /* -10 */      9548,      7620,      6100,      4904,      3906,
++ /*  -5 */      3121,      2501,      1991,      1586,      1277,
++ /*   0 */      1024,       820,       655,       526,       423,
++ /*   5 */       335,       272,       215,       172,       137,
++ /*  10 */       110,        87,        70,        56,        45,
++ /*  15 */        36,        29,        23,        18,        15,
++};
++
++/*
++ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
++ *
++ * In cases where the weight does not change often, we can use the
++ * precalculated inverse to speed up arithmetics by turning divisions
++ * into multiplications:
++ */
++const u32 sched_prio_to_wmult[40] = {
++ /* -20 */     48388,     59856,     76040,     92818,    118348,
++ /* -15 */    147320,    184698,    229616,    287308,    360437,
++ /* -10 */    449829,    563644,    704093,    875809,   1099582,
++ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
++ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
++ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
++ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
++ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
++};
++
++#undef CREATE_TRACE_POINTS
+diff -uprN kernel/kernel/sched/idle.c kernel_new/kernel/sched/idle.c
+--- kernel/kernel/sched/idle.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/sched/idle.c	2021-04-01 18:28:07.812863113 +0800
+@@ -76,22 +76,29 @@ void __weak arch_cpu_idle_dead(void) { }
+ void __weak arch_cpu_idle(void)
+ {
+ 	cpu_idle_force_poll = 1;
+-	local_irq_enable();
++	local_irq_enable_full();
+ }
+ 
+ /**
+  * default_idle_call - Default CPU idle routine.
+  *
+  * To use when the cpuidle framework cannot be used.
++ *
++ * When interrupts are pipelined, this call is entered with hard irqs
++ * on and the root stage stalled, returns with hard irqs on, and the
++ * root stage unstalled.
+  */
+ void __cpuidle default_idle_call(void)
+ {
+ 	if (current_clr_polling_and_test()) {
+-		local_irq_enable();
++		local_irq_enable_full();
+ 	} else {
+-		stop_critical_timings();
+-		arch_cpu_idle();
+-		start_critical_timings();
++		if (ipipe_enter_cpuidle(NULL, NULL)) {
++			stop_critical_timings();
++			arch_cpu_idle();
++			start_critical_timings();
++		} else
++			local_irq_enable_full();
+ 	}
+ }
+ 
+@@ -207,6 +214,15 @@ static void cpuidle_idle_call(void)
+ exit_idle:
+ 	__current_set_polling();
+ 
++#ifdef CONFIG_IPIPE
++	/*
++	 *  Catch mishandling of the CPU's interrupt disable flag when
++	 *  pipelining IRQs.
++	 */
++	if (WARN_ON_ONCE(hard_irqs_disabled()))
++		hard_local_irq_enable();
++#endif
++
+ 	/*
+ 	 * It is up to the idle functions to reenable local interrupts
+ 	 */
+@@ -261,6 +277,9 @@ static void do_idle(void)
+ 			cpu_idle_poll();
+ 		} else {
+ 			cpuidle_idle_call();
++#ifdef CONFIG_IPIPE
++			WARN_ON_ONCE(hard_irqs_disabled());
++#endif
+ 		}
+ 		arch_cpu_idle_exit();
+ 	}
+diff -uprN kernel/kernel/sched/sched.h kernel_new/kernel/sched/sched.h
+--- kernel/kernel/sched/sched.h	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/sched/sched.h	2021-04-02 09:21:23.248426589 +0800
+@@ -64,6 +64,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/task_work.h>
+ #include <linux/tsacct_kern.h>
++#include <linux/ipipe.h>
+ #include <linux/kabi.h>
+ 
+ #include <asm/tlb.h>
+diff -uprN kernel/kernel/sched/sched.h.orig kernel_new/kernel/sched/sched.h.orig
+--- kernel/kernel/sched/sched.h.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/sched/sched.h.orig	2021-04-01 18:28:07.812863113 +0800
+@@ -0,0 +1,2315 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Scheduler internal types and methods:
++ */
++#include <linux/sched.h>
++
++#include <linux/sched/autogroup.h>
++#include <linux/sched/clock.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/cpufreq.h>
++#include <linux/sched/cputime.h>
++#include <linux/sched/deadline.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/hotplug.h>
++#include <linux/sched/idle.h>
++#include <linux/sched/init.h>
++#include <linux/sched/isolation.h>
++#include <linux/sched/jobctl.h>
++#include <linux/sched/loadavg.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/nohz.h>
++#include <linux/sched/numa_balancing.h>
++#include <linux/sched/prio.h>
++#include <linux/sched/rt.h>
++#include <linux/sched/signal.h>
++#include <linux/sched/smt.h>
++#include <linux/sched/stat.h>
++#include <linux/sched/sysctl.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/sched/topology.h>
++#include <linux/sched/user.h>
++#include <linux/sched/wake_q.h>
++#include <linux/sched/xacct.h>
++
++#include <uapi/linux/sched/types.h>
++
++#include <linux/binfmts.h>
++#include <linux/blkdev.h>
++#include <linux/compat.h>
++#include <linux/context_tracking.h>
++#include <linux/cpufreq.h>
++#include <linux/cpuidle.h>
++#include <linux/cpuset.h>
++#include <linux/ctype.h>
++#include <linux/debugfs.h>
++#include <linux/delayacct.h>
++#include <linux/init_task.h>
++#include <linux/kprobes.h>
++#include <linux/kthread.h>
++#include <linux/membarrier.h>
++#include <linux/migrate.h>
++#include <linux/mmu_context.h>
++#include <linux/nmi.h>
++#include <linux/proc_fs.h>
++#include <linux/prefetch.h>
++#include <linux/profile.h>
++#include <linux/rcupdate_wait.h>
++#include <linux/security.h>
++#include <linux/stackprotector.h>
++#include <linux/stop_machine.h>
++#include <linux/suspend.h>
++#include <linux/swait.h>
++#include <linux/syscalls.h>
++#include <linux/task_work.h>
++#include <linux/tsacct_kern.h>
++#include <linux/kabi.h>
++
++#include <asm/tlb.h>
++
++#ifdef CONFIG_PARAVIRT
++# include <asm/paravirt.h>
++#endif
++
++#include "cpupri.h"
++#include "cpudeadline.h"
++
++#ifdef CONFIG_SCHED_DEBUG
++# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
++#else
++# define SCHED_WARN_ON(x)	({ (void)(x), 0; })
++#endif
++
++struct rq;
++struct cpuidle_state;
++
++/* task_struct::on_rq states: */
++#define TASK_ON_RQ_QUEUED	1
++#define TASK_ON_RQ_MIGRATING	2
++
++extern __read_mostly int scheduler_running;
++
++extern unsigned long calc_load_update;
++extern atomic_long_t calc_load_tasks;
++
++extern void calc_global_load_tick(struct rq *this_rq);
++extern long calc_load_fold_active(struct rq *this_rq, long adjust);
++
++#ifdef CONFIG_SMP
++extern void cpu_load_update_active(struct rq *this_rq);
++#else
++static inline void cpu_load_update_active(struct rq *this_rq) { }
++#endif
++
++/*
++ * Helpers for converting nanosecond timing to jiffy resolution
++ */
++#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
++
++/*
++ * Increase resolution of nice-level calculations for 64-bit architectures.
++ * The extra resolution improves shares distribution and load balancing of
++ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
++ * hierarchies, especially on larger systems. This is not a user-visible change
++ * and does not change the user-interface for setting shares/weights.
++ *
++ * We increase resolution only if we have enough bits to allow this increased
++ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
++ * are pretty high and the returns do not justify the increased costs.
++ *
++ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
++ * increase coverage and consistency always enable it on 64-bit platforms.
++ */
++#ifdef CONFIG_64BIT
++# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
++# define scale_load(w)		((w) << SCHED_FIXEDPOINT_SHIFT)
++# define scale_load_down(w) \
++({ \
++	unsigned long __w = (w); \
++	if (__w) \
++		__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
++	__w; \
++})
++#else
++# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT)
++# define scale_load(w)		(w)
++# define scale_load_down(w)	(w)
++#endif
++
++/*
++ * Task weight (visible to users) and its load (invisible to users) have
++ * independent resolution, but they should be well calibrated. We use
++ * scale_load() and scale_load_down(w) to convert between them. The
++ * following must be true:
++ *
++ *  scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
++ *
++ */
++#define NICE_0_LOAD		(1L << NICE_0_LOAD_SHIFT)
++
++/*
++ * Single value that decides SCHED_DEADLINE internal math precision.
++ * 10 -> just above 1us
++ * 9  -> just above 0.5us
++ */
++#define DL_SCALE		10
++
++/*
++ * Single value that denotes runtime == period, ie unlimited time.
++ */
++#define RUNTIME_INF		((u64)~0ULL)
++
++static inline int idle_policy(int policy)
++{
++	return policy == SCHED_IDLE;
++}
++static inline int fair_policy(int policy)
++{
++	return policy == SCHED_NORMAL || policy == SCHED_BATCH;
++}
++
++static inline int rt_policy(int policy)
++{
++	return policy == SCHED_FIFO || policy == SCHED_RR;
++}
++
++static inline int dl_policy(int policy)
++{
++	return policy == SCHED_DEADLINE;
++}
++static inline bool valid_policy(int policy)
++{
++	return idle_policy(policy) || fair_policy(policy) ||
++		rt_policy(policy) || dl_policy(policy);
++}
++
++static inline int task_has_rt_policy(struct task_struct *p)
++{
++	return rt_policy(p->policy);
++}
++
++static inline int task_has_dl_policy(struct task_struct *p)
++{
++	return dl_policy(p->policy);
++}
++
++#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
++
++/*
++ * !! For sched_setattr_nocheck() (kernel) only !!
++ *
++ * This is actually gross. :(
++ *
++ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
++ * tasks, but still be able to sleep. We need this on platforms that cannot
++ * atomically change clock frequency. Remove once fast switching will be
++ * available on such platforms.
++ *
++ * SUGOV stands for SchedUtil GOVernor.
++ */
++#define SCHED_FLAG_SUGOV	0x10000000
++
++static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
++{
++#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
++	return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
++#else
++	return false;
++#endif
++}
++
++/*
++ * Tells if entity @a should preempt entity @b.
++ */
++static inline bool
++dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
++{
++	return dl_entity_is_special(a) ||
++	       dl_time_before(a->deadline, b->deadline);
++}
++
++/*
++ * This is the priority-queue data structure of the RT scheduling class:
++ */
++struct rt_prio_array {
++	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
++	struct list_head queue[MAX_RT_PRIO];
++};
++
++struct rt_bandwidth {
++	/* nests inside the rq lock: */
++	raw_spinlock_t		rt_runtime_lock;
++	ktime_t			rt_period;
++	u64			rt_runtime;
++	struct hrtimer		rt_period_timer;
++	unsigned int		rt_period_active;
++};
++
++void __dl_clear_params(struct task_struct *p);
++
++/*
++ * To keep the bandwidth of -deadline tasks and groups under control
++ * we need some place where:
++ *  - store the maximum -deadline bandwidth of the system (the group);
++ *  - cache the fraction of that bandwidth that is currently allocated.
++ *
++ * This is all done in the data structure below. It is similar to the
++ * one used for RT-throttling (rt_bandwidth), with the main difference
++ * that, since here we are only interested in admission control, we
++ * do not decrease any runtime while the group "executes", neither we
++ * need a timer to replenish it.
++ *
++ * With respect to SMP, the bandwidth is given on a per-CPU basis,
++ * meaning that:
++ *  - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
++ *  - dl_total_bw array contains, in the i-eth element, the currently
++ *    allocated bandwidth on the i-eth CPU.
++ * Moreover, groups consume bandwidth on each CPU, while tasks only
++ * consume bandwidth on the CPU they're running on.
++ * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
++ * that will be shown the next time the proc or cgroup controls will
++ * be red. It on its turn can be changed by writing on its own
++ * control.
++ */
++struct dl_bandwidth {
++	raw_spinlock_t		dl_runtime_lock;
++	u64			dl_runtime;
++	u64			dl_period;
++};
++
++static inline int dl_bandwidth_enabled(void)
++{
++	return sysctl_sched_rt_runtime >= 0;
++}
++
++struct dl_bw {
++	raw_spinlock_t		lock;
++	u64			bw;
++	u64			total_bw;
++};
++
++static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
++
++static inline
++void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
++{
++	dl_b->total_bw -= tsk_bw;
++	__dl_update(dl_b, (s32)tsk_bw / cpus);
++}
++
++static inline
++void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
++{
++	dl_b->total_bw += tsk_bw;
++	__dl_update(dl_b, -((s32)tsk_bw / cpus));
++}
++
++static inline
++bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
++{
++	return dl_b->bw != -1 &&
++	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
++}
++
++extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
++extern void init_dl_bw(struct dl_bw *dl_b);
++extern int  sched_dl_global_validate(void);
++extern void sched_dl_do_global(void);
++extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
++extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
++extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
++extern bool __checkparam_dl(const struct sched_attr *attr);
++extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
++extern int  dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
++extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
++extern bool dl_cpu_busy(unsigned int cpu);
++
++#ifdef CONFIG_CGROUP_SCHED
++
++#include <linux/cgroup.h>
++
++struct cfs_rq;
++struct rt_rq;
++
++extern struct list_head task_groups;
++
++struct cfs_bandwidth {
++#ifdef CONFIG_CFS_BANDWIDTH
++	raw_spinlock_t		lock;
++	ktime_t			period;
++	u64			quota;
++	u64			runtime;
++	s64			hierarchical_quota;
++
++	short			idle;
++	short			period_active;
++	struct hrtimer		period_timer;
++	struct hrtimer		slack_timer;
++	struct list_head	throttled_cfs_rq;
++
++	/* Statistics: */
++	int			nr_periods;
++	int			nr_throttled;
++	u64			throttled_time;
++
++	bool                    distribute_running;
++#endif
++};
++
++/* Task group related information */
++struct task_group {
++	struct cgroup_subsys_state css;
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	/* schedulable entities of this group on each CPU */
++	struct sched_entity	**se;
++	/* runqueue "owned" by this group on each CPU */
++	struct cfs_rq		**cfs_rq;
++	unsigned long		shares;
++
++#ifdef	CONFIG_SMP
++	/*
++	 * load_avg can be heavily contended at clock tick time, so put
++	 * it in its own cacheline separated from the fields above which
++	 * will also be accessed at each tick.
++	 */
++	atomic_long_t		load_avg ____cacheline_aligned;
++#endif
++#endif
++
++#ifdef CONFIG_RT_GROUP_SCHED
++	struct sched_rt_entity	**rt_se;
++	struct rt_rq		**rt_rq;
++
++	struct rt_bandwidth	rt_bandwidth;
++#endif
++
++	struct rcu_head		rcu;
++	struct list_head	list;
++
++	struct task_group	*parent;
++	struct list_head	siblings;
++	struct list_head	children;
++
++#ifdef CONFIG_SCHED_AUTOGROUP
++	struct autogroup	*autogroup;
++#endif
++
++	struct cfs_bandwidth	cfs_bandwidth;
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++};
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++#define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
++
++/*
++ * A weight of 0 or 1 can cause arithmetics problems.
++ * A weight of a cfs_rq is the sum of weights of which entities
++ * are queued on this cfs_rq, so a weight of a entity should not be
++ * too large, so as the shares value of a task group.
++ * (The default weight is 1024 - so there's no practical
++ *  limitation from this.)
++ */
++#define MIN_SHARES		(1UL <<  1)
++#define MAX_SHARES		(1UL << 18)
++#endif
++
++typedef int (*tg_visitor)(struct task_group *, void *);
++
++extern int walk_tg_tree_from(struct task_group *from,
++			     tg_visitor down, tg_visitor up, void *data);
++
++/*
++ * Iterate the full tree, calling @down when first entering a node and @up when
++ * leaving it for the final time.
++ *
++ * Caller must hold rcu_lock or sufficient equivalent.
++ */
++static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
++{
++	return walk_tg_tree_from(&root_task_group, down, up, data);
++}
++
++extern int tg_nop(struct task_group *tg, void *data);
++
++extern void free_fair_sched_group(struct task_group *tg);
++extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
++extern void online_fair_sched_group(struct task_group *tg);
++extern void unregister_fair_sched_group(struct task_group *tg);
++extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
++			struct sched_entity *se, int cpu,
++			struct sched_entity *parent);
++extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
++
++extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
++extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
++extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
++
++extern void free_rt_sched_group(struct task_group *tg);
++extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
++extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
++		struct sched_rt_entity *rt_se, int cpu,
++		struct sched_rt_entity *parent);
++extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
++extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
++extern long sched_group_rt_runtime(struct task_group *tg);
++extern long sched_group_rt_period(struct task_group *tg);
++extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
++
++extern struct task_group *sched_create_group(struct task_group *parent);
++extern void sched_online_group(struct task_group *tg,
++			       struct task_group *parent);
++extern void sched_destroy_group(struct task_group *tg);
++extern void sched_offline_group(struct task_group *tg);
++
++extern void sched_move_task(struct task_struct *tsk);
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
++
++#ifdef CONFIG_SMP
++extern void set_task_rq_fair(struct sched_entity *se,
++			     struct cfs_rq *prev, struct cfs_rq *next);
++#else /* !CONFIG_SMP */
++static inline void set_task_rq_fair(struct sched_entity *se,
++			     struct cfs_rq *prev, struct cfs_rq *next) { }
++#endif /* CONFIG_SMP */
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++
++#else /* CONFIG_CGROUP_SCHED */
++
++struct cfs_bandwidth { };
++
++#endif	/* CONFIG_CGROUP_SCHED */
++
++/* CFS-related fields in a runqueue */
++struct cfs_rq {
++	struct load_weight	load;
++	unsigned long		runnable_weight;
++	unsigned int		nr_running;
++	unsigned int		h_nr_running;
++
++	u64			exec_clock;
++	u64			min_vruntime;
++#ifndef CONFIG_64BIT
++	u64			min_vruntime_copy;
++#endif
++
++	struct rb_root_cached	tasks_timeline;
++
++	/*
++	 * 'curr' points to currently running entity on this cfs_rq.
++	 * It is set to NULL otherwise (i.e when none are currently running).
++	 */
++	struct sched_entity	*curr;
++	struct sched_entity	*next;
++	struct sched_entity	*last;
++	struct sched_entity	*skip;
++
++#ifdef	CONFIG_SCHED_DEBUG
++	unsigned int		nr_spread_over;
++#endif
++
++#ifdef CONFIG_SMP
++	/*
++	 * CFS load tracking
++	 */
++	struct sched_avg	avg;
++#ifndef CONFIG_64BIT
++	u64			load_last_update_time_copy;
++#endif
++	struct {
++		raw_spinlock_t	lock ____cacheline_aligned;
++		int		nr;
++		unsigned long	load_avg;
++		unsigned long	util_avg;
++		unsigned long	runnable_sum;
++	} removed;
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	unsigned long		tg_load_avg_contrib;
++	long			propagate;
++	long			prop_runnable_sum;
++
++	/*
++	 *   h_load = weight * f(tg)
++	 *
++	 * Where f(tg) is the recursive weight fraction assigned to
++	 * this group.
++	 */
++	unsigned long		h_load;
++	u64			last_h_load_update;
++	struct sched_entity	*h_load_next;
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++#endif /* CONFIG_SMP */
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	struct rq		*rq;	/* CPU runqueue to which this cfs_rq is attached */
++
++	/*
++	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
++	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
++	 * (like users, containers etc.)
++	 *
++	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
++	 * This list is used during load balance.
++	 */
++	int			on_list;
++	struct list_head	leaf_cfs_rq_list;
++	struct task_group	*tg;	/* group that "owns" this runqueue */
++
++#ifdef CONFIG_CFS_BANDWIDTH
++	int			runtime_enabled;
++	s64			runtime_remaining;
++
++	u64			throttled_clock;
++	u64			throttled_clock_task;
++	u64			throttled_clock_task_time;
++	int			throttled;
++	int			throttle_count;
++	struct list_head	throttled_list;
++#endif /* CONFIG_CFS_BANDWIDTH */
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++};
++
++static inline int rt_bandwidth_enabled(void)
++{
++	return sysctl_sched_rt_runtime >= 0;
++}
++
++/* RT IPI pull logic requires IRQ_WORK */
++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
++# define HAVE_RT_PUSH_IPI
++#endif
++
++/* Real-Time classes' related field in a runqueue: */
++struct rt_rq {
++	struct rt_prio_array	active;
++	unsigned int		rt_nr_running;
++	unsigned int		rr_nr_running;
++#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
++	struct {
++		int		curr; /* highest queued rt task prio */
++#ifdef CONFIG_SMP
++		int		next; /* next highest */
++#endif
++	} highest_prio;
++#endif
++#ifdef CONFIG_SMP
++	unsigned long		rt_nr_migratory;
++	unsigned long		rt_nr_total;
++	int			overloaded;
++	struct plist_head	pushable_tasks;
++
++#endif /* CONFIG_SMP */
++	int			rt_queued;
++
++	int			rt_throttled;
++	u64			rt_time;
++	u64			rt_runtime;
++	/* Nests inside the rq lock: */
++	raw_spinlock_t		rt_runtime_lock;
++
++#ifdef CONFIG_RT_GROUP_SCHED
++	unsigned long		rt_nr_boosted;
++
++	struct rq		*rq;
++	struct task_group	*tg;
++#endif
++};
++
++static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
++{
++	return rt_rq->rt_queued && rt_rq->rt_nr_running;
++}
++
++/* Deadline class' related fields in a runqueue */
++struct dl_rq {
++	/* runqueue is an rbtree, ordered by deadline */
++	struct rb_root_cached	root;
++
++	unsigned long		dl_nr_running;
++
++#ifdef CONFIG_SMP
++	/*
++	 * Deadline values of the currently executing and the
++	 * earliest ready task on this rq. Caching these facilitates
++	 * the decision wether or not a ready but not running task
++	 * should migrate somewhere else.
++	 */
++	struct {
++		u64		curr;
++		u64		next;
++	} earliest_dl;
++
++	unsigned long		dl_nr_migratory;
++	int			overloaded;
++
++	/*
++	 * Tasks on this rq that can be pushed away. They are kept in
++	 * an rb-tree, ordered by tasks' deadlines, with caching
++	 * of the leftmost (earliest deadline) element.
++	 */
++	struct rb_root_cached	pushable_dl_tasks_root;
++#else
++	struct dl_bw		dl_bw;
++#endif
++	/*
++	 * "Active utilization" for this runqueue: increased when a
++	 * task wakes up (becomes TASK_RUNNING) and decreased when a
++	 * task blocks
++	 */
++	u64			running_bw;
++
++	/*
++	 * Utilization of the tasks "assigned" to this runqueue (including
++	 * the tasks that are in runqueue and the tasks that executed on this
++	 * CPU and blocked). Increased when a task moves to this runqueue, and
++	 * decreased when the task moves away (migrates, changes scheduling
++	 * policy, or terminates).
++	 * This is needed to compute the "inactive utilization" for the
++	 * runqueue (inactive utilization = this_bw - running_bw).
++	 */
++	u64			this_bw;
++	u64			extra_bw;
++
++	/*
++	 * Inverse of the fraction of CPU utilization that can be reclaimed
++	 * by the GRUB algorithm.
++	 */
++	u64			bw_ratio;
++};
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++/* An entity is a task if it doesn't "own" a runqueue */
++#define entity_is_task(se)	(!se->my_q)
++#else
++#define entity_is_task(se)	1
++#endif
++
++#ifdef CONFIG_SMP
++/*
++ * XXX we want to get rid of these helpers and use the full load resolution.
++ */
++static inline long se_weight(struct sched_entity *se)
++{
++	return scale_load_down(se->load.weight);
++}
++
++static inline long se_runnable(struct sched_entity *se)
++{
++	return scale_load_down(se->runnable_weight);
++}
++
++static inline bool sched_asym_prefer(int a, int b)
++{
++	return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
++}
++
++/*
++ * We add the notion of a root-domain which will be used to define per-domain
++ * variables. Each exclusive cpuset essentially defines an island domain by
++ * fully partitioning the member CPUs from any other cpuset. Whenever a new
++ * exclusive cpuset is created, we also create and attach a new root-domain
++ * object.
++ *
++ */
++struct root_domain {
++	atomic_t		refcount;
++	atomic_t		rto_count;
++	struct rcu_head		rcu;
++	cpumask_var_t		span;
++	cpumask_var_t		online;
++
++	/* Indicate more than one runnable task for any CPU */
++	bool			overload;
++
++	/*
++	 * The bit corresponding to a CPU gets set here if such CPU has more
++	 * than one runnable -deadline task (as it is below for RT tasks).
++	 */
++	cpumask_var_t		dlo_mask;
++	atomic_t		dlo_count;
++	struct dl_bw		dl_bw;
++	struct cpudl		cpudl;
++
++#ifdef HAVE_RT_PUSH_IPI
++	/*
++	 * For IPI pull requests, loop across the rto_mask.
++	 */
++	struct irq_work		rto_push_work;
++	raw_spinlock_t		rto_lock;
++	/* These are only updated and read within rto_lock */
++	int			rto_loop;
++	int			rto_cpu;
++	/* These atomics are updated outside of a lock */
++	atomic_t		rto_loop_next;
++	atomic_t		rto_loop_start;
++#endif
++	/*
++	 * The "RT overload" flag: it gets set if a CPU has more than
++	 * one runnable RT task.
++	 */
++	cpumask_var_t		rto_mask;
++	struct cpupri		cpupri;
++
++	unsigned long		max_cpu_capacity;
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++	KABI_RESERVE(3)
++	KABI_RESERVE(4)
++};
++
++extern struct root_domain def_root_domain;
++extern struct mutex sched_domains_mutex;
++
++extern void init_defrootdomain(void);
++extern int sched_init_domains(const struct cpumask *cpu_map);
++extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
++extern void sched_get_rd(struct root_domain *rd);
++extern void sched_put_rd(struct root_domain *rd);
++
++#ifdef HAVE_RT_PUSH_IPI
++extern void rto_push_irq_work_func(struct irq_work *work);
++#endif
++#endif /* CONFIG_SMP */
++
++/*
++ * This is the main, per-CPU runqueue data structure.
++ *
++ * Locking rule: those places that want to lock multiple runqueues
++ * (such as the load balancing or the thread migration code), lock
++ * acquire operations must be ordered by ascending &runqueue.
++ */
++struct rq {
++	/* runqueue lock: */
++	raw_spinlock_t		lock;
++
++	/*
++	 * nr_running and cpu_load should be in the same cacheline because
++	 * remote CPUs use both these fields when doing load calculation.
++	 */
++	unsigned int		nr_running;
++#ifdef CONFIG_NUMA_BALANCING
++	unsigned int		nr_numa_running;
++	unsigned int		nr_preferred_running;
++	unsigned int		numa_migrate_on;
++#endif
++	#define CPU_LOAD_IDX_MAX 5
++	unsigned long		cpu_load[CPU_LOAD_IDX_MAX];
++#ifdef CONFIG_NO_HZ_COMMON
++#ifdef CONFIG_SMP
++	unsigned long		last_load_update_tick;
++	unsigned long		last_blocked_load_update_tick;
++	unsigned int		has_blocked_load;
++#endif /* CONFIG_SMP */
++	unsigned int		nohz_tick_stopped;
++	atomic_t nohz_flags;
++#endif /* CONFIG_NO_HZ_COMMON */
++
++	/* capture load from *all* tasks on this CPU: */
++	struct load_weight	load;
++	unsigned long		nr_load_updates;
++	u64			nr_switches;
++
++	struct cfs_rq		cfs;
++	struct rt_rq		rt;
++	struct dl_rq		dl;
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	/* list of leaf cfs_rq on this CPU: */
++	struct list_head	leaf_cfs_rq_list;
++	struct list_head	*tmp_alone_branch;
++#endif /* CONFIG_FAIR_GROUP_SCHED */
++
++	/*
++	 * This is part of a global counter where only the total sum
++	 * over all CPUs matters. A task can increase this counter on
++	 * one CPU and if it got migrated afterwards it may decrease
++	 * it on another CPU. Always updated under the runqueue lock:
++	 */
++	unsigned long		nr_uninterruptible;
++
++	struct task_struct	*curr;
++	struct task_struct	*idle;
++	struct task_struct	*stop;
++	unsigned long		next_balance;
++	struct mm_struct	*prev_mm;
++
++	unsigned int		clock_update_flags;
++	u64			clock;
++	u64			clock_task;
++
++	atomic_t		nr_iowait;
++
++#ifdef CONFIG_MEMBARRIER
++	int membarrier_state;
++#endif
++
++#ifdef CONFIG_SMP
++	struct root_domain	*rd;
++	struct sched_domain	*sd;
++
++	unsigned long		cpu_capacity;
++	unsigned long		cpu_capacity_orig;
++
++	struct callback_head	*balance_callback;
++
++	unsigned char		idle_balance;
++
++	/* For active balancing */
++	int			active_balance;
++	int			push_cpu;
++	struct cpu_stop_work	active_balance_work;
++
++	/* CPU of this runqueue: */
++	int			cpu;
++	int			online;
++
++	struct list_head cfs_tasks;
++
++	struct sched_avg	avg_rt;
++	struct sched_avg	avg_dl;
++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
++	struct sched_avg	avg_irq;
++#endif
++	u64			idle_stamp;
++	u64			avg_idle;
++
++	/* This is used to determine avg_idle's max value */
++	u64			max_idle_balance_cost;
++#endif
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++	u64			prev_irq_time;
++#endif
++#ifdef CONFIG_PARAVIRT
++	u64			prev_steal_time;
++#endif
++#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
++	u64			prev_steal_time_rq;
++#endif
++
++	/* calc_load related fields */
++	unsigned long		calc_load_update;
++	long			calc_load_active;
++
++#ifdef CONFIG_SCHED_HRTICK
++#ifdef CONFIG_SMP
++	int			hrtick_csd_pending;
++	call_single_data_t	hrtick_csd;
++#endif
++	struct hrtimer		hrtick_timer;
++#endif
++
++#ifdef CONFIG_SCHEDSTATS
++	/* latency stats */
++	struct sched_info	rq_sched_info;
++	unsigned long long	rq_cpu_time;
++	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
++
++	/* sys_sched_yield() stats */
++	unsigned int		yld_count;
++
++	/* schedule() stats */
++	unsigned int		sched_count;
++	unsigned int		sched_goidle;
++
++	/* try_to_wake_up() stats */
++	unsigned int		ttwu_count;
++	unsigned int		ttwu_local;
++#endif
++
++#ifdef CONFIG_SMP
++	struct llist_head	wake_list;
++#endif
++
++#ifdef CONFIG_CPU_IDLE
++	/* Must be inspected within a rcu lock section */
++	struct cpuidle_state	*idle_state;
++#endif
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++};
++
++struct cputime {
++	u64 utime;
++	u64 stime;
++};
++
++struct rq_cputime {
++	raw_spinlock_t lock;
++	unsigned long long sum_idle_time;
++	unsigned long long last_entry_idle;
++	struct cputime cpu_prev_time;
++	struct cputime cpu_last_time;
++};
++
++DECLARE_PER_CPU(struct rq_cputime, rq_cputimes);
++
++static inline int cpu_of(struct rq *rq)
++{
++#ifdef CONFIG_SMP
++	return rq->cpu;
++#else
++	return 0;
++#endif
++}
++
++
++#ifdef CONFIG_SCHED_SMT
++extern void __update_idle_core(struct rq *rq);
++
++static inline void update_idle_core(struct rq *rq)
++{
++	if (static_branch_unlikely(&sched_smt_present))
++		__update_idle_core(rq);
++}
++
++#else
++static inline void update_idle_core(struct rq *rq) { }
++#endif
++
++DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
++
++#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
++#define this_rq()		this_cpu_ptr(&runqueues)
++#define task_rq(p)		cpu_rq(task_cpu(p))
++#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
++#define raw_rq()		raw_cpu_ptr(&runqueues)
++
++static inline u64 __rq_clock_broken(struct rq *rq)
++{
++	return READ_ONCE(rq->clock);
++}
++
++/*
++ * rq::clock_update_flags bits
++ *
++ * %RQCF_REQ_SKIP - will request skipping of clock update on the next
++ *  call to __schedule(). This is an optimisation to avoid
++ *  neighbouring rq clock updates.
++ *
++ * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
++ *  in effect and calls to update_rq_clock() are being ignored.
++ *
++ * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
++ *  made to update_rq_clock() since the last time rq::lock was pinned.
++ *
++ * If inside of __schedule(), clock_update_flags will have been
++ * shifted left (a left shift is a cheap operation for the fast path
++ * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
++ *
++ *	if (rq-clock_update_flags >= RQCF_UPDATED)
++ *
++ * to check if %RQCF_UPADTED is set. It'll never be shifted more than
++ * one position though, because the next rq_unpin_lock() will shift it
++ * back.
++ */
++#define RQCF_REQ_SKIP		0x01
++#define RQCF_ACT_SKIP		0x02
++#define RQCF_UPDATED		0x04
++
++static inline void assert_clock_updated(struct rq *rq)
++{
++	/*
++	 * The only reason for not seeing a clock update since the
++	 * last rq_pin_lock() is if we're currently skipping updates.
++	 */
++	SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
++}
++
++static inline u64 rq_clock(struct rq *rq)
++{
++	lockdep_assert_held(&rq->lock);
++	assert_clock_updated(rq);
++
++	return rq->clock;
++}
++
++static inline u64 rq_clock_task(struct rq *rq)
++{
++	lockdep_assert_held(&rq->lock);
++	assert_clock_updated(rq);
++
++	return rq->clock_task;
++}
++
++static inline void rq_clock_skip_update(struct rq *rq)
++{
++	lockdep_assert_held(&rq->lock);
++	rq->clock_update_flags |= RQCF_REQ_SKIP;
++}
++
++/*
++ * See rt task throttling, which is the only time a skip
++ * request is cancelled.
++ */
++static inline void rq_clock_cancel_skipupdate(struct rq *rq)
++{
++	lockdep_assert_held(&rq->lock);
++	rq->clock_update_flags &= ~RQCF_REQ_SKIP;
++}
++
++struct rq_flags {
++	unsigned long flags;
++	struct pin_cookie cookie;
++#ifdef CONFIG_SCHED_DEBUG
++	/*
++	 * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
++	 * current pin context is stashed here in case it needs to be
++	 * restored in rq_repin_lock().
++	 */
++	unsigned int clock_update_flags;
++#endif
++};
++
++static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
++{
++	rf->cookie = lockdep_pin_lock(&rq->lock);
++
++#ifdef CONFIG_SCHED_DEBUG
++	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
++	rf->clock_update_flags = 0;
++#endif
++}
++
++static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
++{
++#ifdef CONFIG_SCHED_DEBUG
++	if (rq->clock_update_flags > RQCF_ACT_SKIP)
++		rf->clock_update_flags = RQCF_UPDATED;
++#endif
++
++	lockdep_unpin_lock(&rq->lock, rf->cookie);
++}
++
++static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
++{
++	lockdep_repin_lock(&rq->lock, rf->cookie);
++
++#ifdef CONFIG_SCHED_DEBUG
++	/*
++	 * Restore the value we stashed in @rf for this pin context.
++	 */
++	rq->clock_update_flags |= rf->clock_update_flags;
++#endif
++}
++
++#ifdef CONFIG_NUMA
++enum numa_topology_type {
++	NUMA_DIRECT,
++	NUMA_GLUELESS_MESH,
++	NUMA_BACKPLANE,
++};
++extern enum numa_topology_type sched_numa_topology_type;
++extern int sched_max_numa_distance;
++extern bool find_numa_distance(int distance);
++#endif
++
++#ifdef CONFIG_NUMA
++extern void sched_init_numa(void);
++extern void sched_domains_numa_masks_set(unsigned int cpu);
++extern void sched_domains_numa_masks_clear(unsigned int cpu);
++#else
++static inline void sched_init_numa(void) { }
++static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
++static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
++#endif
++
++#ifdef CONFIG_NUMA_BALANCING
++/* The regions in numa_faults array from task_struct */
++enum numa_faults_stats {
++	NUMA_MEM = 0,
++	NUMA_CPU,
++	NUMA_MEMBUF,
++	NUMA_CPUBUF
++};
++extern void sched_setnuma(struct task_struct *p, int node);
++extern int migrate_task_to(struct task_struct *p, int cpu);
++extern int migrate_swap(struct task_struct *p, struct task_struct *t,
++			int cpu, int scpu);
++extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
++#else
++static inline void
++init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
++{
++}
++#endif /* CONFIG_NUMA_BALANCING */
++
++#ifdef CONFIG_SMP
++
++static inline void
++queue_balance_callback(struct rq *rq,
++		       struct callback_head *head,
++		       void (*func)(struct rq *rq))
++{
++	lockdep_assert_held(&rq->lock);
++
++	if (unlikely(head->next))
++		return;
++
++	head->func = (void (*)(struct callback_head *))func;
++	head->next = rq->balance_callback;
++	rq->balance_callback = head;
++}
++
++extern void sched_ttwu_pending(void);
++
++#define rcu_dereference_check_sched_domain(p) \
++	rcu_dereference_check((p), \
++			      lockdep_is_held(&sched_domains_mutex))
++
++/*
++ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
++ * See detach_destroy_domains: synchronize_sched for details.
++ *
++ * The domain tree of any CPU may only be accessed from within
++ * preempt-disabled sections.
++ */
++#define for_each_domain(cpu, __sd) \
++	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
++			__sd; __sd = __sd->parent)
++
++#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
++
++/**
++ * highest_flag_domain - Return highest sched_domain containing flag.
++ * @cpu:	The CPU whose highest level of sched domain is to
++ *		be returned.
++ * @flag:	The flag to check for the highest sched_domain
++ *		for the given CPU.
++ *
++ * Returns the highest sched_domain of a CPU which contains the given flag.
++ */
++static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
++{
++	struct sched_domain *sd, *hsd = NULL;
++
++	for_each_domain(cpu, sd) {
++		if (!(sd->flags & flag))
++			break;
++		hsd = sd;
++	}
++
++	return hsd;
++}
++
++static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
++{
++	struct sched_domain *sd;
++
++	for_each_domain(cpu, sd) {
++		if (sd->flags & flag)
++			break;
++	}
++
++	return sd;
++}
++
++DECLARE_PER_CPU(struct sched_domain *, sd_llc);
++DECLARE_PER_CPU(int, sd_llc_size);
++DECLARE_PER_CPU(int, sd_llc_id);
++DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
++DECLARE_PER_CPU(struct sched_domain *, sd_numa);
++DECLARE_PER_CPU(struct sched_domain *, sd_asym);
++
++struct sched_group_capacity {
++	atomic_t		ref;
++	/*
++	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
++	 * for a single CPU.
++	 */
++	unsigned long		capacity;
++	unsigned long		min_capacity;		/* Min per-CPU capacity in group */
++	unsigned long		next_update;
++	int			imbalance;		/* XXX unrelated to capacity but shared group state */
++
++#ifdef CONFIG_SCHED_DEBUG
++	int			id;
++#endif
++
++	unsigned long		cpumask[0];		/* Balance mask */
++};
++
++struct sched_group {
++	struct sched_group	*next;			/* Must be a circular list */
++	atomic_t		ref;
++
++	unsigned int		group_weight;
++	struct sched_group_capacity *sgc;
++	int			asym_prefer_cpu;	/* CPU of highest priority in group */
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++
++	/*
++	 * The CPUs this group covers.
++	 *
++	 * NOTE: this field is variable length. (Allocated dynamically
++	 * by attaching extra space to the end of the structure,
++	 * depending on how many CPUs the kernel has booted up with)
++	 */
++	unsigned long		cpumask[0];
++};
++
++static inline struct cpumask *sched_group_span(struct sched_group *sg)
++{
++	return to_cpumask(sg->cpumask);
++}
++
++/*
++ * See build_balance_mask().
++ */
++static inline struct cpumask *group_balance_mask(struct sched_group *sg)
++{
++	return to_cpumask(sg->sgc->cpumask);
++}
++
++/**
++ * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
++ * @group: The group whose first CPU is to be returned.
++ */
++static inline unsigned int group_first_cpu(struct sched_group *group)
++{
++	return cpumask_first(sched_group_span(group));
++}
++
++extern int group_balance_cpu(struct sched_group *sg);
++
++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
++void register_sched_domain_sysctl(void);
++void dirty_sched_domain_sysctl(int cpu);
++void unregister_sched_domain_sysctl(void);
++#else
++static inline void register_sched_domain_sysctl(void)
++{
++}
++static inline void dirty_sched_domain_sysctl(int cpu)
++{
++}
++static inline void unregister_sched_domain_sysctl(void)
++{
++}
++#endif
++
++#else
++
++static inline void sched_ttwu_pending(void) { }
++
++#endif /* CONFIG_SMP */
++
++#include "stats.h"
++#include "autogroup.h"
++
++#ifdef CONFIG_CGROUP_SCHED
++
++/*
++ * Return the group to which this tasks belongs.
++ *
++ * We cannot use task_css() and friends because the cgroup subsystem
++ * changes that value before the cgroup_subsys::attach() method is called,
++ * therefore we cannot pin it and might observe the wrong value.
++ *
++ * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
++ * core changes this before calling sched_move_task().
++ *
++ * Instead we use a 'copy' which is updated from sched_move_task() while
++ * holding both task_struct::pi_lock and rq::lock.
++ */
++static inline struct task_group *task_group(struct task_struct *p)
++{
++	return p->sched_task_group;
++}
++
++/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
++static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
++{
++#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
++	struct task_group *tg = task_group(p);
++#endif
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
++	p->se.cfs_rq = tg->cfs_rq[cpu];
++	p->se.parent = tg->se[cpu];
++#endif
++
++#ifdef CONFIG_RT_GROUP_SCHED
++	p->rt.rt_rq  = tg->rt_rq[cpu];
++	p->rt.parent = tg->rt_se[cpu];
++#endif
++}
++
++#else /* CONFIG_CGROUP_SCHED */
++
++static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
++static inline struct task_group *task_group(struct task_struct *p)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_CGROUP_SCHED */
++
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++	set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++	/*
++	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++	 * successfuly executed on another CPU. We must ensure that updates of
++	 * per-task data have been completed by this moment.
++	 */
++	smp_wmb();
++#ifdef CONFIG_THREAD_INFO_IN_TASK
++	WRITE_ONCE(p->cpu, cpu);
++#else
++	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
++#endif
++	p->wake_cpu = cpu;
++#endif
++}
++
++/*
++ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
++ */
++#ifdef CONFIG_SCHED_DEBUG
++# include <linux/static_key.h>
++# define const_debug __read_mostly
++#else
++# define const_debug const
++#endif
++
++#define SCHED_FEAT(name, enabled)	\
++	__SCHED_FEAT_##name ,
++
++enum {
++#include "features.h"
++	__SCHED_FEAT_NR,
++};
++
++#undef SCHED_FEAT
++
++#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
++
++/*
++ * To support run-time toggling of sched features, all the translation units
++ * (but core.c) reference the sysctl_sched_features defined in core.c.
++ */
++extern const_debug unsigned int sysctl_sched_features;
++
++#define SCHED_FEAT(name, enabled)					\
++static __always_inline bool static_branch_##name(struct static_key *key) \
++{									\
++	return static_key_##enabled(key);				\
++}
++
++#include "features.h"
++#undef SCHED_FEAT
++
++extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
++#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
++
++#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */
++
++/*
++ * Each translation unit has its own copy of sysctl_sched_features to allow
++ * constants propagation at compile time and compiler optimization based on
++ * features default.
++ */
++#define SCHED_FEAT(name, enabled)	\
++	(1UL << __SCHED_FEAT_##name) * enabled |
++static const_debug __maybe_unused unsigned int sysctl_sched_features =
++#include "features.h"
++	0;
++#undef SCHED_FEAT
++
++#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
++
++#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */
++
++extern struct static_key_false sched_numa_balancing;
++extern struct static_key_false sched_schedstats;
++
++static inline u64 global_rt_period(void)
++{
++	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
++}
++
++static inline u64 global_rt_runtime(void)
++{
++	if (sysctl_sched_rt_runtime < 0)
++		return RUNTIME_INF;
++
++	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
++}
++
++static inline int task_current(struct rq *rq, struct task_struct *p)
++{
++	return rq->curr == p;
++}
++
++static inline int task_running(struct rq *rq, struct task_struct *p)
++{
++#ifdef CONFIG_SMP
++	return p->on_cpu;
++#else
++	return task_current(rq, p);
++#endif
++}
++
++static inline int task_on_rq_queued(struct task_struct *p)
++{
++	return p->on_rq == TASK_ON_RQ_QUEUED;
++}
++
++static inline int task_on_rq_migrating(struct task_struct *p)
++{
++	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
++}
++
++/*
++ * wake flags
++ */
++#define WF_SYNC			0x01		/* Waker goes to sleep after wakeup */
++#define WF_FORK			0x02		/* Child wakeup after fork */
++#define WF_MIGRATED		0x4		/* Internal use, task got migrated */
++
++/*
++ * To aid in avoiding the subversion of "niceness" due to uneven distribution
++ * of tasks with abnormal "nice" values across CPUs the contribution that
++ * each task makes to its run queue's load is weighted according to its
++ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
++ * scaled version of the new time slice allocation that they receive on time
++ * slice expiry etc.
++ */
++
++#define WEIGHT_IDLEPRIO		3
++#define WMULT_IDLEPRIO		1431655765
++
++extern const int		sched_prio_to_weight[40];
++extern const u32		sched_prio_to_wmult[40];
++
++/*
++ * {de,en}queue flags:
++ *
++ * DEQUEUE_SLEEP  - task is no longer runnable
++ * ENQUEUE_WAKEUP - task just became runnable
++ *
++ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
++ *                are in a known state which allows modification. Such pairs
++ *                should preserve as much state as possible.
++ *
++ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
++ *        in the runqueue.
++ *
++ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
++ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
++ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
++ *
++ */
++
++#define DEQUEUE_SLEEP		0x01
++#define DEQUEUE_SAVE		0x02 /* Matches ENQUEUE_RESTORE */
++#define DEQUEUE_MOVE		0x04 /* Matches ENQUEUE_MOVE */
++#define DEQUEUE_NOCLOCK		0x08 /* Matches ENQUEUE_NOCLOCK */
++
++#define ENQUEUE_WAKEUP		0x01
++#define ENQUEUE_RESTORE		0x02
++#define ENQUEUE_MOVE		0x04
++#define ENQUEUE_NOCLOCK		0x08
++
++#define ENQUEUE_HEAD		0x10
++#define ENQUEUE_REPLENISH	0x20
++#ifdef CONFIG_SMP
++#define ENQUEUE_MIGRATED	0x40
++#else
++#define ENQUEUE_MIGRATED	0x00
++#endif
++
++#define RETRY_TASK		((void *)-1UL)
++
++struct sched_class {
++	const struct sched_class *next;
++
++	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
++	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
++	void (*yield_task)   (struct rq *rq);
++	bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
++
++	void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
++
++	/*
++	 * It is the responsibility of the pick_next_task() method that will
++	 * return the next task to call put_prev_task() on the @prev task or
++	 * something equivalent.
++	 *
++	 * May return RETRY_TASK when it finds a higher prio class has runnable
++	 * tasks.
++	 */
++	struct task_struct * (*pick_next_task)(struct rq *rq,
++					       struct task_struct *prev,
++					       struct rq_flags *rf);
++	void (*put_prev_task)(struct rq *rq, struct task_struct *p);
++
++#ifdef CONFIG_SMP
++	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
++	void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
++
++	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
++
++	void (*set_cpus_allowed)(struct task_struct *p,
++				 const struct cpumask *newmask);
++
++	void (*rq_online)(struct rq *rq);
++	void (*rq_offline)(struct rq *rq);
++#endif
++
++	void (*set_curr_task)(struct rq *rq);
++	void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
++	void (*task_fork)(struct task_struct *p);
++	void (*task_dead)(struct task_struct *p);
++
++	/*
++	 * The switched_from() call is allowed to drop rq->lock, therefore we
++	 * cannot assume the switched_from/switched_to pair is serliazed by
++	 * rq->lock. They are however serialized by p->pi_lock.
++	 */
++	void (*switched_from)(struct rq *this_rq, struct task_struct *task);
++	void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
++	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
++			      int oldprio);
++
++	unsigned int (*get_rr_interval)(struct rq *rq,
++					struct task_struct *task);
++
++	void (*update_curr)(struct rq *rq);
++
++#define TASK_SET_GROUP		0
++#define TASK_MOVE_GROUP		1
++
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	void (*task_change_group)(struct task_struct *p, int type);
++#endif
++
++	KABI_RESERVE(1)
++	KABI_RESERVE(2)
++};
++
++static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
++{
++	prev->sched_class->put_prev_task(rq, prev);
++}
++
++static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
++{
++	curr->sched_class->set_curr_task(rq);
++}
++
++#ifdef CONFIG_SMP
++#define sched_class_highest (&stop_sched_class)
++#else
++#define sched_class_highest (&dl_sched_class)
++#endif
++#define for_each_class(class) \
++   for (class = sched_class_highest; class; class = class->next)
++
++extern const struct sched_class stop_sched_class;
++extern const struct sched_class dl_sched_class;
++extern const struct sched_class rt_sched_class;
++extern const struct sched_class fair_sched_class;
++extern const struct sched_class idle_sched_class;
++
++
++#ifdef CONFIG_SMP
++
++extern void update_group_capacity(struct sched_domain *sd, int cpu);
++
++extern void trigger_load_balance(struct rq *rq);
++
++extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
++
++#endif
++
++#ifdef CONFIG_CPU_IDLE
++static inline void idle_set_state(struct rq *rq,
++				  struct cpuidle_state *idle_state)
++{
++	rq->idle_state = idle_state;
++}
++
++static inline struct cpuidle_state *idle_get_state(struct rq *rq)
++{
++	SCHED_WARN_ON(!rcu_read_lock_held());
++
++	return rq->idle_state;
++}
++#else
++static inline void idle_set_state(struct rq *rq,
++				  struct cpuidle_state *idle_state)
++{
++}
++
++static inline struct cpuidle_state *idle_get_state(struct rq *rq)
++{
++	return NULL;
++}
++#endif
++
++extern void schedule_idle(void);
++
++extern void sysrq_sched_debug_show(void);
++extern void sched_init_granularity(void);
++extern void update_max_interval(void);
++
++extern void init_sched_dl_class(void);
++extern void init_sched_rt_class(void);
++extern void init_sched_fair_class(void);
++
++extern void reweight_task(struct task_struct *p, int prio);
++
++extern void resched_curr(struct rq *rq);
++extern void resched_cpu(int cpu);
++
++extern struct rt_bandwidth def_rt_bandwidth;
++extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
++
++extern struct dl_bandwidth def_dl_bandwidth;
++extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
++extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
++extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
++extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
++
++#define BW_SHIFT		20
++#define BW_UNIT			(1 << BW_SHIFT)
++#define RATIO_SHIFT		8
++unsigned long to_ratio(u64 period, u64 runtime);
++
++extern void init_entity_runnable_average(struct sched_entity *se);
++extern void post_init_entity_util_avg(struct sched_entity *se);
++
++#ifdef CONFIG_NO_HZ_FULL
++extern bool sched_can_stop_tick(struct rq *rq);
++extern int __init sched_tick_offload_init(void);
++
++/*
++ * Tick may be needed by tasks in the runqueue depending on their policy and
++ * requirements. If tick is needed, lets send the target an IPI to kick it out of
++ * nohz mode if necessary.
++ */
++static inline void sched_update_tick_dependency(struct rq *rq)
++{
++	int cpu;
++
++	if (!tick_nohz_full_enabled())
++		return;
++
++	cpu = cpu_of(rq);
++
++	if (!tick_nohz_full_cpu(cpu))
++		return;
++
++	if (sched_can_stop_tick(rq))
++		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
++	else
++		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
++}
++#else
++static inline int sched_tick_offload_init(void) { return 0; }
++static inline void sched_update_tick_dependency(struct rq *rq) { }
++#endif
++
++static inline void add_nr_running(struct rq *rq, unsigned count)
++{
++	unsigned prev_nr = rq->nr_running;
++
++	rq->nr_running = prev_nr + count;
++
++	if (prev_nr < 2 && rq->nr_running >= 2) {
++#ifdef CONFIG_SMP
++		if (!rq->rd->overload)
++			rq->rd->overload = true;
++#endif
++	}
++
++	sched_update_tick_dependency(rq);
++}
++
++static inline void sub_nr_running(struct rq *rq, unsigned count)
++{
++	rq->nr_running -= count;
++	/* Check if we still need preemption */
++	sched_update_tick_dependency(rq);
++}
++
++extern void update_rq_clock(struct rq *rq);
++
++extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
++extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
++
++extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
++
++extern const_debug unsigned int sysctl_sched_nr_migrate;
++extern const_debug unsigned int sysctl_sched_migration_cost;
++
++#ifdef CONFIG_SCHED_HRTICK
++
++/*
++ * Use hrtick when:
++ *  - enabled by features
++ *  - hrtimer is actually high res
++ */
++static inline int hrtick_enabled(struct rq *rq)
++{
++	if (!sched_feat(HRTICK))
++		return 0;
++	if (!cpu_active(cpu_of(rq)))
++		return 0;
++	return hrtimer_is_hres_active(&rq->hrtick_timer);
++}
++
++void hrtick_start(struct rq *rq, u64 delay);
++
++#else
++
++static inline int hrtick_enabled(struct rq *rq)
++{
++	return 0;
++}
++
++#endif /* CONFIG_SCHED_HRTICK */
++
++#ifndef arch_scale_freq_capacity
++static __always_inline
++unsigned long arch_scale_freq_capacity(int cpu)
++{
++	return SCHED_CAPACITY_SCALE;
++}
++#endif
++
++#ifdef CONFIG_SMP
++#ifndef arch_scale_cpu_capacity
++static __always_inline
++unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
++{
++	if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
++		return sd->smt_gain / sd->span_weight;
++
++	return SCHED_CAPACITY_SCALE;
++}
++#endif
++#else
++#ifndef arch_scale_cpu_capacity
++static __always_inline
++unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
++{
++	return SCHED_CAPACITY_SCALE;
++}
++#endif
++#endif
++
++struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
++	__acquires(rq->lock);
++
++struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
++	__acquires(p->pi_lock)
++	__acquires(rq->lock);
++
++static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
++	__releases(rq->lock)
++{
++	rq_unpin_lock(rq, rf);
++	raw_spin_unlock(&rq->lock);
++}
++
++static inline void
++task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
++	__releases(rq->lock)
++	__releases(p->pi_lock)
++{
++	rq_unpin_lock(rq, rf);
++	raw_spin_unlock(&rq->lock);
++	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
++}
++
++static inline void
++rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	raw_spin_lock_irqsave(&rq->lock, rf->flags);
++	rq_pin_lock(rq, rf);
++}
++
++static inline void
++rq_lock_irq(struct rq *rq, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	raw_spin_lock_irq(&rq->lock);
++	rq_pin_lock(rq, rf);
++}
++
++static inline void
++rq_lock(struct rq *rq, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	raw_spin_lock(&rq->lock);
++	rq_pin_lock(rq, rf);
++}
++
++static inline void
++rq_relock(struct rq *rq, struct rq_flags *rf)
++	__acquires(rq->lock)
++{
++	raw_spin_lock(&rq->lock);
++	rq_repin_lock(rq, rf);
++}
++
++static inline void
++rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
++	__releases(rq->lock)
++{
++	rq_unpin_lock(rq, rf);
++	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
++}
++
++static inline void
++rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
++	__releases(rq->lock)
++{
++	rq_unpin_lock(rq, rf);
++	raw_spin_unlock_irq(&rq->lock);
++}
++
++static inline void
++rq_unlock(struct rq *rq, struct rq_flags *rf)
++	__releases(rq->lock)
++{
++	rq_unpin_lock(rq, rf);
++	raw_spin_unlock(&rq->lock);
++}
++
++#ifdef CONFIG_SMP
++#ifdef CONFIG_PREEMPT
++
++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
++
++/*
++ * fair double_lock_balance: Safely acquires both rq->locks in a fair
++ * way at the expense of forcing extra atomic operations in all
++ * invocations.  This assures that the double_lock is acquired using the
++ * same underlying policy as the spinlock_t on this architecture, which
++ * reduces latency compared to the unfair variant below.  However, it
++ * also adds more overhead and therefore may reduce throughput.
++ */
++static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
++	__releases(this_rq->lock)
++	__acquires(busiest->lock)
++	__acquires(this_rq->lock)
++{
++	raw_spin_unlock(&this_rq->lock);
++	double_rq_lock(this_rq, busiest);
++
++	return 1;
++}
++
++#else
++/*
++ * Unfair double_lock_balance: Optimizes throughput at the expense of
++ * latency by eliminating extra atomic operations when the locks are
++ * already in proper order on entry.  This favors lower CPU-ids and will
++ * grant the double lock to lower CPUs over higher ids under contention,
++ * regardless of entry order into the function.
++ */
++static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
++	__releases(this_rq->lock)
++	__acquires(busiest->lock)
++	__acquires(this_rq->lock)
++{
++	int ret = 0;
++
++	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
++		if (busiest < this_rq) {
++			raw_spin_unlock(&this_rq->lock);
++			raw_spin_lock(&busiest->lock);
++			raw_spin_lock_nested(&this_rq->lock,
++					      SINGLE_DEPTH_NESTING);
++			ret = 1;
++		} else
++			raw_spin_lock_nested(&busiest->lock,
++					      SINGLE_DEPTH_NESTING);
++	}
++	return ret;
++}
++
++#endif /* CONFIG_PREEMPT */
++
++/*
++ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
++ */
++static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
++{
++	if (unlikely(!irqs_disabled())) {
++		/* printk() doesn't work well under rq->lock */
++		raw_spin_unlock(&this_rq->lock);
++		BUG_ON(1);
++	}
++
++	return _double_lock_balance(this_rq, busiest);
++}
++
++static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
++	__releases(busiest->lock)
++{
++	raw_spin_unlock(&busiest->lock);
++	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
++}
++
++static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
++{
++	if (l1 > l2)
++		swap(l1, l2);
++
++	spin_lock(l1);
++	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
++}
++
++static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
++{
++	if (l1 > l2)
++		swap(l1, l2);
++
++	spin_lock_irq(l1);
++	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
++}
++
++static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
++{
++	if (l1 > l2)
++		swap(l1, l2);
++
++	raw_spin_lock(l1);
++	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
++}
++
++/*
++ * double_rq_lock - safely lock two runqueues
++ *
++ * Note this does not disable interrupts like task_rq_lock,
++ * you need to do so manually before calling.
++ */
++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
++	__acquires(rq1->lock)
++	__acquires(rq2->lock)
++{
++	BUG_ON(!irqs_disabled());
++	if (rq1 == rq2) {
++		raw_spin_lock(&rq1->lock);
++		__acquire(rq2->lock);	/* Fake it out ;) */
++	} else {
++		if (rq1 < rq2) {
++			raw_spin_lock(&rq1->lock);
++			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
++		} else {
++			raw_spin_lock(&rq2->lock);
++			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
++		}
++	}
++}
++
++/*
++ * double_rq_unlock - safely unlock two runqueues
++ *
++ * Note this does not restore interrupts like task_rq_unlock,
++ * you need to do so manually after calling.
++ */
++static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
++	__releases(rq1->lock)
++	__releases(rq2->lock)
++{
++	raw_spin_unlock(&rq1->lock);
++	if (rq1 != rq2)
++		raw_spin_unlock(&rq2->lock);
++	else
++		__release(rq2->lock);
++}
++
++extern void set_rq_online (struct rq *rq);
++extern void set_rq_offline(struct rq *rq);
++extern bool sched_smp_initialized;
++
++#else /* CONFIG_SMP */
++
++/*
++ * double_rq_lock - safely lock two runqueues
++ *
++ * Note this does not disable interrupts like task_rq_lock,
++ * you need to do so manually before calling.
++ */
++static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
++	__acquires(rq1->lock)
++	__acquires(rq2->lock)
++{
++	BUG_ON(!irqs_disabled());
++	BUG_ON(rq1 != rq2);
++	raw_spin_lock(&rq1->lock);
++	__acquire(rq2->lock);	/* Fake it out ;) */
++}
++
++/*
++ * double_rq_unlock - safely unlock two runqueues
++ *
++ * Note this does not restore interrupts like task_rq_unlock,
++ * you need to do so manually after calling.
++ */
++static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
++	__releases(rq1->lock)
++	__releases(rq2->lock)
++{
++	BUG_ON(rq1 != rq2);
++	raw_spin_unlock(&rq1->lock);
++	__release(rq2->lock);
++}
++
++#endif
++
++extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
++extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
++
++#ifdef	CONFIG_SCHED_DEBUG
++extern bool sched_debug_enabled;
++
++extern void print_cfs_stats(struct seq_file *m, int cpu);
++extern void print_rt_stats(struct seq_file *m, int cpu);
++extern void print_dl_stats(struct seq_file *m, int cpu);
++extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
++extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
++extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
++#ifdef CONFIG_NUMA_BALANCING
++extern void
++show_numa_stats(struct task_struct *p, struct seq_file *m);
++extern void
++print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
++	unsigned long tpf, unsigned long gsf, unsigned long gpf);
++#endif /* CONFIG_NUMA_BALANCING */
++#endif /* CONFIG_SCHED_DEBUG */
++
++extern void init_cfs_rq(struct cfs_rq *cfs_rq);
++extern void init_rt_rq(struct rt_rq *rt_rq);
++extern void init_dl_rq(struct dl_rq *dl_rq);
++
++extern void cfs_bandwidth_usage_inc(void);
++extern void cfs_bandwidth_usage_dec(void);
++
++#ifdef CONFIG_NO_HZ_COMMON
++#define NOHZ_BALANCE_KICK_BIT	0
++#define NOHZ_STATS_KICK_BIT	1
++
++#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
++#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
++
++#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
++
++#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
++
++extern void nohz_balance_exit_idle(struct rq *rq);
++#else
++static inline void nohz_balance_exit_idle(struct rq *rq) { }
++#endif
++
++
++#ifdef CONFIG_SMP
++static inline
++void __dl_update(struct dl_bw *dl_b, s64 bw)
++{
++	struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
++	int i;
++
++	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
++			 "sched RCU must be held");
++	for_each_cpu_and(i, rd->span, cpu_active_mask) {
++		struct rq *rq = cpu_rq(i);
++
++		rq->dl.extra_bw += bw;
++	}
++}
++#else
++static inline
++void __dl_update(struct dl_bw *dl_b, s64 bw)
++{
++	struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
++
++	dl->extra_bw += bw;
++}
++#endif
++
++
++#ifdef CONFIG_IRQ_TIME_ACCOUNTING
++struct irqtime {
++	u64			total;
++	u64			tick_delta;
++	u64			irq_start_time;
++	struct u64_stats_sync	sync;
++};
++
++DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
++
++/*
++ * Returns the irqtime minus the softirq time computed by ksoftirqd.
++ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
++ * and never move forward.
++ */
++static inline u64 irq_time_read(int cpu)
++{
++	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
++	unsigned int seq;
++	u64 total;
++
++	do {
++		seq = __u64_stats_fetch_begin(&irqtime->sync);
++		total = irqtime->total;
++	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
++
++	return total;
++}
++#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
++
++#ifdef CONFIG_CPU_FREQ
++DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
++
++/**
++ * cpufreq_update_util - Take a note about CPU utilization changes.
++ * @rq: Runqueue to carry out the update for.
++ * @flags: Update reason flags.
++ *
++ * This function is called by the scheduler on the CPU whose utilization is
++ * being updated.
++ *
++ * It can only be called from RCU-sched read-side critical sections.
++ *
++ * The way cpufreq is currently arranged requires it to evaluate the CPU
++ * performance state (frequency/voltage) on a regular basis to prevent it from
++ * being stuck in a completely inadequate performance level for too long.
++ * That is not guaranteed to happen if the updates are only triggered from CFS
++ * and DL, though, because they may not be coming in if only RT tasks are
++ * active all the time (or there are RT tasks only).
++ *
++ * As a workaround for that issue, this function is called periodically by the
++ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
++ * but that really is a band-aid.  Going forward it should be replaced with
++ * solutions targeted more specifically at RT tasks.
++ */
++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
++{
++	struct update_util_data *data;
++
++	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
++						  cpu_of(rq)));
++	if (data)
++		data->func(data, rq_clock(rq), flags);
++}
++#else
++static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
++#endif /* CONFIG_CPU_FREQ */
++
++#ifdef arch_scale_freq_capacity
++# ifndef arch_scale_freq_invariant
++#  define arch_scale_freq_invariant()	true
++# endif
++#else
++# define arch_scale_freq_invariant()	false
++#endif
++
++#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
++static inline unsigned long cpu_bw_dl(struct rq *rq)
++{
++	return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
++}
++
++static inline unsigned long cpu_util_dl(struct rq *rq)
++{
++	return READ_ONCE(rq->avg_dl.util_avg);
++}
++
++static inline unsigned long cpu_util_cfs(struct rq *rq)
++{
++	unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
++
++	if (sched_feat(UTIL_EST)) {
++		util = max_t(unsigned long, util,
++			     READ_ONCE(rq->cfs.avg.util_est.enqueued));
++	}
++
++	return util;
++}
++
++static inline unsigned long cpu_util_rt(struct rq *rq)
++{
++	return READ_ONCE(rq->avg_rt.util_avg);
++}
++#endif
++
++#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
++static inline unsigned long cpu_util_irq(struct rq *rq)
++{
++	return rq->avg_irq.util_avg;
++}
++
++static inline
++unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
++{
++	util *= (max - irq);
++	util /= max;
++
++	return util;
++
++}
++#else
++static inline unsigned long cpu_util_irq(struct rq *rq)
++{
++	return 0;
++}
++
++static inline
++unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
++{
++	return util;
++}
++#endif
++
++#ifdef CONFIG_MEMBARRIER
++/*
++ * The scheduler provides memory barriers required by membarrier between:
++ * - prior user-space memory accesses and store to rq->membarrier_state,
++ * - store to rq->membarrier_state and following user-space memory accesses.
++ * In the same way it provides those guarantees around store to rq->curr.
++ */
++static inline void membarrier_switch_mm(struct rq *rq,
++					struct mm_struct *prev_mm,
++					struct mm_struct *next_mm)
++{
++	int membarrier_state;
++
++	if (prev_mm == next_mm)
++		return;
++
++	membarrier_state = atomic_read(&next_mm->membarrier_state);
++	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
++		return;
++
++	WRITE_ONCE(rq->membarrier_state, membarrier_state);
++}
++#else
++static inline void membarrier_switch_mm(struct rq *rq,
++					struct mm_struct *prev_mm,
++					struct mm_struct *next_mm)
++{
++}
++#endif
+diff -uprN kernel/kernel/sched/sched.h.rej kernel_new/kernel/sched/sched.h.rej
+--- kernel/kernel/sched/sched.h.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/sched/sched.h.rej	2021-04-01 18:28:07.812863113 +0800
+@@ -0,0 +1,10 @@
++--- kernel/sched/sched.h	2019-12-18 03:36:04.000000000 +0800
+++++ kernel/sched/sched.h	2021-03-22 09:21:43.222415443 +0800
++@@ -64,6 +64,7 @@
++ #include <linux/syscalls.h>
++ #include <linux/task_work.h>
++ #include <linux/tsacct_kern.h>
+++#include <linux/ipipe.h>
++ 
++ #include <asm/tlb.h>
++ 
+diff -uprN kernel/kernel/sched/wait.c kernel_new/kernel/sched/wait.c
+--- kernel/kernel/sched/wait.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/sched/wait.c	2021-04-01 18:28:07.812863113 +0800
+@@ -79,6 +79,8 @@ static int __wake_up_common(struct wait_
+ 	} else
+ 		curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
+ 
++	ipipe_root_only();
++
+ 	if (&curr->entry == &wq_head->head)
+ 		return nr_exclusive;
+ 
+diff -uprN kernel/kernel/signal.c kernel_new/kernel/signal.c
+--- kernel/kernel/signal.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/signal.c	2021-04-01 18:28:07.812863113 +0800
+@@ -32,6 +32,7 @@
+ #include <linux/tracehook.h>
+ #include <linux/capability.h>
+ #include <linux/freezer.h>
++#include <linux/ipipe.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/nsproxy.h>
+ #include <linux/user_namespace.h>
+@@ -748,6 +749,10 @@ still_pending:
+ void signal_wake_up_state(struct task_struct *t, unsigned int state)
+ {
+ 	set_tsk_thread_flag(t, TIF_SIGPENDING);
++
++	/* TIF_SIGPENDING must be prior to reporting. */
++	__ipipe_report_sigwake(t);
++
+ 	/*
+ 	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
+ 	 * case. We don't check t->state here because there is a race with it
+@@ -968,8 +973,11 @@ static inline bool wants_signal(int sig,
+ 	if (sig == SIGKILL)
+ 		return true;
+ 
+-	if (task_is_stopped_or_traced(p))
++	if (task_is_stopped_or_traced(p)) {
++		if (!signal_pending(p))
++			__ipipe_report_sigwake(p);
+ 		return false;
++	}
+ 
+ 	return task_curr(p) || !signal_pending(p);
+ }
+diff -uprN kernel/kernel/signal.c.orig kernel_new/kernel/signal.c.orig
+--- kernel/kernel/signal.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/signal.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,4089 @@
++/*
++ *  linux/kernel/signal.c
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ *
++ *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
++ *
++ *  2003-06-02  Jim Houston - Concurrent Computer Corp.
++ *		Changes to use preallocated sigqueue structures
++ *		to allow signals to be sent reliably.
++ */
++
++#include <linux/slab.h>
++#include <linux/export.h>
++#include <linux/init.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/user.h>
++#include <linux/sched/debug.h>
++#include <linux/sched/task.h>
++#include <linux/sched/task_stack.h>
++#include <linux/sched/cputime.h>
++#include <linux/fs.h>
++#include <linux/tty.h>
++#include <linux/binfmts.h>
++#include <linux/coredump.h>
++#include <linux/security.h>
++#include <linux/syscalls.h>
++#include <linux/ptrace.h>
++#include <linux/signal.h>
++#include <linux/signalfd.h>
++#include <linux/ratelimit.h>
++#include <linux/tracehook.h>
++#include <linux/capability.h>
++#include <linux/freezer.h>
++#include <linux/pid_namespace.h>
++#include <linux/nsproxy.h>
++#include <linux/user_namespace.h>
++#include <linux/uprobes.h>
++#include <linux/compat.h>
++#include <linux/cn_proc.h>
++#include <linux/compiler.h>
++#include <linux/posix-timers.h>
++#include <linux/livepatch.h>
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/signal.h>
++
++#include <asm/param.h>
++#include <linux/uaccess.h>
++#include <asm/unistd.h>
++#include <asm/siginfo.h>
++#include <asm/cacheflush.h>
++#include "audit.h"	/* audit_signal_info() */
++
++EXPORT_TRACEPOINT_SYMBOL(signal_generate);
++
++/*
++ * SLAB caches for signal bits.
++ */
++
++static struct kmem_cache *sigqueue_cachep;
++
++int print_fatal_signals __read_mostly;
++
++static void __user *sig_handler(struct task_struct *t, int sig)
++{
++	return t->sighand->action[sig - 1].sa.sa_handler;
++}
++
++static inline bool sig_handler_ignored(void __user *handler, int sig)
++{
++	/* Is it explicitly or implicitly ignored? */
++	return handler == SIG_IGN ||
++	       (handler == SIG_DFL && sig_kernel_ignore(sig));
++}
++
++static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
++{
++	void __user *handler;
++
++	handler = sig_handler(t, sig);
++
++	/* SIGKILL and SIGSTOP may not be sent to the global init */
++	if (unlikely(is_global_init(t) && sig_kernel_only(sig)))
++		return true;
++
++	if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
++	    handler == SIG_DFL && !(force && sig_kernel_only(sig)))
++		return true;
++
++	return sig_handler_ignored(handler, sig);
++}
++
++static bool sig_ignored(struct task_struct *t, int sig, bool force)
++{
++	/*
++	 * Blocked signals are never ignored, since the
++	 * signal handler may change by the time it is
++	 * unblocked.
++	 */
++	if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
++		return false;
++
++	/*
++	 * Tracers may want to know about even ignored signal unless it
++	 * is SIGKILL which can't be reported anyway but can be ignored
++	 * by SIGNAL_UNKILLABLE task.
++	 */
++	if (t->ptrace && sig != SIGKILL)
++		return false;
++
++	return sig_task_ignored(t, sig, force);
++}
++
++/*
++ * Re-calculate pending state from the set of locally pending
++ * signals, globally pending signals, and blocked signals.
++ */
++static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
++{
++	unsigned long ready;
++	long i;
++
++	switch (_NSIG_WORDS) {
++	default:
++		for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
++			ready |= signal->sig[i] &~ blocked->sig[i];
++		break;
++
++	case 4: ready  = signal->sig[3] &~ blocked->sig[3];
++		ready |= signal->sig[2] &~ blocked->sig[2];
++		ready |= signal->sig[1] &~ blocked->sig[1];
++		ready |= signal->sig[0] &~ blocked->sig[0];
++		break;
++
++	case 2: ready  = signal->sig[1] &~ blocked->sig[1];
++		ready |= signal->sig[0] &~ blocked->sig[0];
++		break;
++
++	case 1: ready  = signal->sig[0] &~ blocked->sig[0];
++	}
++	return ready !=	0;
++}
++
++#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
++
++static bool recalc_sigpending_tsk(struct task_struct *t)
++{
++	if ((t->jobctl & JOBCTL_PENDING_MASK) ||
++	    PENDING(&t->pending, &t->blocked) ||
++	    PENDING(&t->signal->shared_pending, &t->blocked)) {
++		set_tsk_thread_flag(t, TIF_SIGPENDING);
++		return true;
++	}
++
++	/*
++	 * We must never clear the flag in another thread, or in current
++	 * when it's possible the current syscall is returning -ERESTART*.
++	 * So we don't clear it here, and only callers who know they should do.
++	 */
++	return false;
++}
++
++/*
++ * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
++ * This is superfluous when called on current, the wakeup is a harmless no-op.
++ */
++void recalc_sigpending_and_wake(struct task_struct *t)
++{
++	if (recalc_sigpending_tsk(t))
++		signal_wake_up(t, 0);
++}
++
++void recalc_sigpending(void)
++{
++	if (!recalc_sigpending_tsk(current) && !freezing(current) &&
++	    !klp_patch_pending(current))
++		clear_thread_flag(TIF_SIGPENDING);
++
++}
++
++void calculate_sigpending(void)
++{
++	/* Have any signals or users of TIF_SIGPENDING been delayed
++	 * until after fork?
++	 */
++	spin_lock_irq(&current->sighand->siglock);
++	set_tsk_thread_flag(current, TIF_SIGPENDING);
++	recalc_sigpending();
++	spin_unlock_irq(&current->sighand->siglock);
++}
++
++/* Given the mask, find the first available signal that should be serviced. */
++
++#define SYNCHRONOUS_MASK \
++	(sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \
++	 sigmask(SIGTRAP) | sigmask(SIGFPE) | sigmask(SIGSYS))
++
++int next_signal(struct sigpending *pending, sigset_t *mask)
++{
++	unsigned long i, *s, *m, x;
++	int sig = 0;
++
++	s = pending->signal.sig;
++	m = mask->sig;
++
++	/*
++	 * Handle the first word specially: it contains the
++	 * synchronous signals that need to be dequeued first.
++	 */
++	x = *s &~ *m;
++	if (x) {
++		if (x & SYNCHRONOUS_MASK)
++			x &= SYNCHRONOUS_MASK;
++		sig = ffz(~x) + 1;
++		return sig;
++	}
++
++	switch (_NSIG_WORDS) {
++	default:
++		for (i = 1; i < _NSIG_WORDS; ++i) {
++			x = *++s &~ *++m;
++			if (!x)
++				continue;
++			sig = ffz(~x) + i*_NSIG_BPW + 1;
++			break;
++		}
++		break;
++
++	case 2:
++		x = s[1] &~ m[1];
++		if (!x)
++			break;
++		sig = ffz(~x) + _NSIG_BPW + 1;
++		break;
++
++	case 1:
++		/* Nothing to do */
++		break;
++	}
++
++	return sig;
++}
++
++static inline void print_dropped_signal(int sig)
++{
++	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
++
++	if (!print_fatal_signals)
++		return;
++
++	if (!__ratelimit(&ratelimit_state))
++		return;
++
++	pr_info("%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n",
++				current->comm, current->pid, sig);
++}
++
++/**
++ * task_set_jobctl_pending - set jobctl pending bits
++ * @task: target task
++ * @mask: pending bits to set
++ *
++ * Clear @mask from @task->jobctl.  @mask must be subset of
++ * %JOBCTL_PENDING_MASK | %JOBCTL_STOP_CONSUME | %JOBCTL_STOP_SIGMASK |
++ * %JOBCTL_TRAPPING.  If stop signo is being set, the existing signo is
++ * cleared.  If @task is already being killed or exiting, this function
++ * becomes noop.
++ *
++ * CONTEXT:
++ * Must be called with @task->sighand->siglock held.
++ *
++ * RETURNS:
++ * %true if @mask is set, %false if made noop because @task was dying.
++ */
++bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask)
++{
++	BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME |
++			JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING));
++	BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK));
++
++	if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING)))
++		return false;
++
++	if (mask & JOBCTL_STOP_SIGMASK)
++		task->jobctl &= ~JOBCTL_STOP_SIGMASK;
++
++	task->jobctl |= mask;
++	return true;
++}
++
++/**
++ * task_clear_jobctl_trapping - clear jobctl trapping bit
++ * @task: target task
++ *
++ * If JOBCTL_TRAPPING is set, a ptracer is waiting for us to enter TRACED.
++ * Clear it and wake up the ptracer.  Note that we don't need any further
++ * locking.  @task->siglock guarantees that @task->parent points to the
++ * ptracer.
++ *
++ * CONTEXT:
++ * Must be called with @task->sighand->siglock held.
++ */
++void task_clear_jobctl_trapping(struct task_struct *task)
++{
++	if (unlikely(task->jobctl & JOBCTL_TRAPPING)) {
++		task->jobctl &= ~JOBCTL_TRAPPING;
++		smp_mb();	/* advised by wake_up_bit() */
++		wake_up_bit(&task->jobctl, JOBCTL_TRAPPING_BIT);
++	}
++}
++
++/**
++ * task_clear_jobctl_pending - clear jobctl pending bits
++ * @task: target task
++ * @mask: pending bits to clear
++ *
++ * Clear @mask from @task->jobctl.  @mask must be subset of
++ * %JOBCTL_PENDING_MASK.  If %JOBCTL_STOP_PENDING is being cleared, other
++ * STOP bits are cleared together.
++ *
++ * If clearing of @mask leaves no stop or trap pending, this function calls
++ * task_clear_jobctl_trapping().
++ *
++ * CONTEXT:
++ * Must be called with @task->sighand->siglock held.
++ */
++void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask)
++{
++	BUG_ON(mask & ~JOBCTL_PENDING_MASK);
++
++	if (mask & JOBCTL_STOP_PENDING)
++		mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED;
++
++	task->jobctl &= ~mask;
++
++	if (!(task->jobctl & JOBCTL_PENDING_MASK))
++		task_clear_jobctl_trapping(task);
++}
++
++/**
++ * task_participate_group_stop - participate in a group stop
++ * @task: task participating in a group stop
++ *
++ * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop.
++ * Group stop states are cleared and the group stop count is consumed if
++ * %JOBCTL_STOP_CONSUME was set.  If the consumption completes the group
++ * stop, the appropriate %SIGNAL_* flags are set.
++ *
++ * CONTEXT:
++ * Must be called with @task->sighand->siglock held.
++ *
++ * RETURNS:
++ * %true if group stop completion should be notified to the parent, %false
++ * otherwise.
++ */
++static bool task_participate_group_stop(struct task_struct *task)
++{
++	struct signal_struct *sig = task->signal;
++	bool consume = task->jobctl & JOBCTL_STOP_CONSUME;
++
++	WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING));
++
++	task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING);
++
++	if (!consume)
++		return false;
++
++	if (!WARN_ON_ONCE(sig->group_stop_count == 0))
++		sig->group_stop_count--;
++
++	/*
++	 * Tell the caller to notify completion iff we are entering into a
++	 * fresh group stop.  Read comment in do_signal_stop() for details.
++	 */
++	if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
++		signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
++		return true;
++	}
++	return false;
++}
++
++void task_join_group_stop(struct task_struct *task)
++{
++	/* Have the new thread join an on-going signal group stop */
++	unsigned long jobctl = current->jobctl;
++	if (jobctl & JOBCTL_STOP_PENDING) {
++		struct signal_struct *sig = current->signal;
++		unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
++		unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
++		if (task_set_jobctl_pending(task, signr | gstop)) {
++			sig->group_stop_count++;
++		}
++	}
++}
++
++/*
++ * allocate a new signal queue record
++ * - this may be called without locks if and only if t == current, otherwise an
++ *   appropriate lock must be held to stop the target task from exiting
++ */
++static struct sigqueue *
++__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
++{
++	struct sigqueue *q = NULL;
++	struct user_struct *user;
++	int sigpending;
++
++	/*
++	 * Protect access to @t credentials. This can go away when all
++	 * callers hold rcu read lock.
++	 *
++	 * NOTE! A pending signal will hold on to the user refcount,
++	 * and we get/put the refcount only when the sigpending count
++	 * changes from/to zero.
++	 */
++	rcu_read_lock();
++	user = __task_cred(t)->user;
++	sigpending = atomic_inc_return(&user->sigpending);
++	if (sigpending == 1)
++		get_uid(user);
++	rcu_read_unlock();
++
++	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
++		q = kmem_cache_alloc(sigqueue_cachep, flags);
++	} else {
++		print_dropped_signal(sig);
++	}
++
++	if (unlikely(q == NULL)) {
++		if (atomic_dec_and_test(&user->sigpending))
++			free_uid(user);
++	} else {
++		INIT_LIST_HEAD(&q->list);
++		q->flags = 0;
++		q->user = user;
++	}
++
++	return q;
++}
++
++static void __sigqueue_free(struct sigqueue *q)
++{
++	if (q->flags & SIGQUEUE_PREALLOC)
++		return;
++	if (atomic_dec_and_test(&q->user->sigpending))
++		free_uid(q->user);
++	kmem_cache_free(sigqueue_cachep, q);
++}
++
++void flush_sigqueue(struct sigpending *queue)
++{
++	struct sigqueue *q;
++
++	sigemptyset(&queue->signal);
++	while (!list_empty(&queue->list)) {
++		q = list_entry(queue->list.next, struct sigqueue , list);
++		list_del_init(&q->list);
++		__sigqueue_free(q);
++	}
++}
++
++/*
++ * Flush all pending signals for this kthread.
++ */
++void flush_signals(struct task_struct *t)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&t->sighand->siglock, flags);
++	clear_tsk_thread_flag(t, TIF_SIGPENDING);
++	flush_sigqueue(&t->pending);
++	flush_sigqueue(&t->signal->shared_pending);
++	spin_unlock_irqrestore(&t->sighand->siglock, flags);
++}
++
++#ifdef CONFIG_POSIX_TIMERS
++static void __flush_itimer_signals(struct sigpending *pending)
++{
++	sigset_t signal, retain;
++	struct sigqueue *q, *n;
++
++	signal = pending->signal;
++	sigemptyset(&retain);
++
++	list_for_each_entry_safe(q, n, &pending->list, list) {
++		int sig = q->info.si_signo;
++
++		if (likely(q->info.si_code != SI_TIMER)) {
++			sigaddset(&retain, sig);
++		} else {
++			sigdelset(&signal, sig);
++			list_del_init(&q->list);
++			__sigqueue_free(q);
++		}
++	}
++
++	sigorsets(&pending->signal, &signal, &retain);
++}
++
++void flush_itimer_signals(void)
++{
++	struct task_struct *tsk = current;
++	unsigned long flags;
++
++	spin_lock_irqsave(&tsk->sighand->siglock, flags);
++	__flush_itimer_signals(&tsk->pending);
++	__flush_itimer_signals(&tsk->signal->shared_pending);
++	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
++}
++#endif
++
++void ignore_signals(struct task_struct *t)
++{
++	int i;
++
++	for (i = 0; i < _NSIG; ++i)
++		t->sighand->action[i].sa.sa_handler = SIG_IGN;
++
++	flush_signals(t);
++}
++
++/*
++ * Flush all handlers for a task.
++ */
++
++void
++flush_signal_handlers(struct task_struct *t, int force_default)
++{
++	int i;
++	struct k_sigaction *ka = &t->sighand->action[0];
++	for (i = _NSIG ; i != 0 ; i--) {
++		if (force_default || ka->sa.sa_handler != SIG_IGN)
++			ka->sa.sa_handler = SIG_DFL;
++		ka->sa.sa_flags = 0;
++#ifdef __ARCH_HAS_SA_RESTORER
++		ka->sa.sa_restorer = NULL;
++#endif
++		sigemptyset(&ka->sa.sa_mask);
++		ka++;
++	}
++}
++
++bool unhandled_signal(struct task_struct *tsk, int sig)
++{
++	void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
++	if (is_global_init(tsk))
++		return true;
++
++	if (handler != SIG_IGN && handler != SIG_DFL)
++		return false;
++
++	/* if ptraced, let the tracer determine */
++	return !tsk->ptrace;
++}
++
++static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
++			   bool *resched_timer)
++{
++	struct sigqueue *q, *first = NULL;
++
++	/*
++	 * Collect the siginfo appropriate to this signal.  Check if
++	 * there is another siginfo for the same signal.
++	*/
++	list_for_each_entry(q, &list->list, list) {
++		if (q->info.si_signo == sig) {
++			if (first)
++				goto still_pending;
++			first = q;
++		}
++	}
++
++	sigdelset(&list->signal, sig);
++
++	if (first) {
++still_pending:
++		list_del_init(&first->list);
++		copy_siginfo(info, &first->info);
++
++		*resched_timer =
++			(first->flags & SIGQUEUE_PREALLOC) &&
++			(info->si_code == SI_TIMER) &&
++			(info->si_sys_private);
++
++		__sigqueue_free(first);
++	} else {
++		/*
++		 * Ok, it wasn't in the queue.  This must be
++		 * a fast-pathed signal or we must have been
++		 * out of queue space.  So zero out the info.
++		 */
++		clear_siginfo(info);
++		info->si_signo = sig;
++		info->si_errno = 0;
++		info->si_code = SI_USER;
++		info->si_pid = 0;
++		info->si_uid = 0;
++	}
++}
++
++static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
++			siginfo_t *info, bool *resched_timer)
++{
++	int sig = next_signal(pending, mask);
++
++	if (sig)
++		collect_signal(sig, pending, info, resched_timer);
++	return sig;
++}
++
++/*
++ * Dequeue a signal and return the element to the caller, which is
++ * expected to free it.
++ *
++ * All callers have to hold the siglock.
++ */
++int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
++{
++	bool resched_timer = false;
++	int signr;
++
++	/* We only dequeue private signals from ourselves, we don't let
++	 * signalfd steal them
++	 */
++	signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
++	if (!signr) {
++		signr = __dequeue_signal(&tsk->signal->shared_pending,
++					 mask, info, &resched_timer);
++#ifdef CONFIG_POSIX_TIMERS
++		/*
++		 * itimer signal ?
++		 *
++		 * itimers are process shared and we restart periodic
++		 * itimers in the signal delivery path to prevent DoS
++		 * attacks in the high resolution timer case. This is
++		 * compliant with the old way of self-restarting
++		 * itimers, as the SIGALRM is a legacy signal and only
++		 * queued once. Changing the restart behaviour to
++		 * restart the timer in the signal dequeue path is
++		 * reducing the timer noise on heavy loaded !highres
++		 * systems too.
++		 */
++		if (unlikely(signr == SIGALRM)) {
++			struct hrtimer *tmr = &tsk->signal->real_timer;
++
++			if (!hrtimer_is_queued(tmr) &&
++			    tsk->signal->it_real_incr != 0) {
++				hrtimer_forward(tmr, tmr->base->get_time(),
++						tsk->signal->it_real_incr);
++				hrtimer_restart(tmr);
++			}
++		}
++#endif
++	}
++
++	recalc_sigpending();
++	if (!signr)
++		return 0;
++
++	if (unlikely(sig_kernel_stop(signr))) {
++		/*
++		 * Set a marker that we have dequeued a stop signal.  Our
++		 * caller might release the siglock and then the pending
++		 * stop signal it is about to process is no longer in the
++		 * pending bitmasks, but must still be cleared by a SIGCONT
++		 * (and overruled by a SIGKILL).  So those cases clear this
++		 * shared flag after we've set it.  Note that this flag may
++		 * remain set after the signal we return is ignored or
++		 * handled.  That doesn't matter because its only purpose
++		 * is to alert stop-signal processing code when another
++		 * processor has come along and cleared the flag.
++		 */
++		current->jobctl |= JOBCTL_STOP_DEQUEUED;
++	}
++#ifdef CONFIG_POSIX_TIMERS
++	if (resched_timer) {
++		/*
++		 * Release the siglock to ensure proper locking order
++		 * of timer locks outside of siglocks.  Note, we leave
++		 * irqs disabled here, since the posix-timers code is
++		 * about to disable them again anyway.
++		 */
++		spin_unlock(&tsk->sighand->siglock);
++		posixtimer_rearm(info);
++		spin_lock(&tsk->sighand->siglock);
++
++		/* Don't expose the si_sys_private value to userspace */
++		info->si_sys_private = 0;
++	}
++#endif
++	return signr;
++}
++
++static int dequeue_synchronous_signal(siginfo_t *info)
++{
++	struct task_struct *tsk = current;
++	struct sigpending *pending = &tsk->pending;
++	struct sigqueue *q, *sync = NULL;
++
++	/*
++	 * Might a synchronous signal be in the queue?
++	 */
++	if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
++		return 0;
++
++	/*
++	 * Return the first synchronous signal in the queue.
++	 */
++	list_for_each_entry(q, &pending->list, list) {
++		/* Synchronous signals have a postive si_code */
++		if ((q->info.si_code > SI_USER) &&
++		    (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
++			sync = q;
++			goto next;
++		}
++	}
++	return 0;
++next:
++	/*
++	 * Check if there is another siginfo for the same signal.
++	 */
++	list_for_each_entry_continue(q, &pending->list, list) {
++		if (q->info.si_signo == sync->info.si_signo)
++			goto still_pending;
++	}
++
++	sigdelset(&pending->signal, sync->info.si_signo);
++	recalc_sigpending();
++still_pending:
++	list_del_init(&sync->list);
++	copy_siginfo(info, &sync->info);
++	__sigqueue_free(sync);
++	return info->si_signo;
++}
++
++/*
++ * Tell a process that it has a new active signal..
++ *
++ * NOTE! we rely on the previous spin_lock to
++ * lock interrupts for us! We can only be called with
++ * "siglock" held, and the local interrupt must
++ * have been disabled when that got acquired!
++ *
++ * No need to set need_resched since signal event passing
++ * goes through ->blocked
++ */
++void signal_wake_up_state(struct task_struct *t, unsigned int state)
++{
++	set_tsk_thread_flag(t, TIF_SIGPENDING);
++	/*
++	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
++	 * case. We don't check t->state here because there is a race with it
++	 * executing another processor and just now entering stopped state.
++	 * By using wake_up_state, we ensure the process will wake up and
++	 * handle its death signal.
++	 */
++	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
++		kick_process(t);
++}
++
++/*
++ * Remove signals in mask from the pending set and queue.
++ * Returns 1 if any signals were found.
++ *
++ * All callers must be holding the siglock.
++ */
++static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
++{
++	struct sigqueue *q, *n;
++	sigset_t m;
++
++	sigandsets(&m, mask, &s->signal);
++	if (sigisemptyset(&m))
++		return;
++
++	sigandnsets(&s->signal, &s->signal, mask);
++	list_for_each_entry_safe(q, n, &s->list, list) {
++		if (sigismember(mask, q->info.si_signo)) {
++			list_del_init(&q->list);
++			__sigqueue_free(q);
++		}
++	}
++}
++
++static inline int is_si_special(const struct siginfo *info)
++{
++	return info <= SEND_SIG_FORCED;
++}
++
++static inline bool si_fromuser(const struct siginfo *info)
++{
++	return info == SEND_SIG_NOINFO ||
++		(!is_si_special(info) && SI_FROMUSER(info));
++}
++
++/*
++ * called with RCU read lock from check_kill_permission()
++ */
++static bool kill_ok_by_cred(struct task_struct *t)
++{
++	const struct cred *cred = current_cred();
++	const struct cred *tcred = __task_cred(t);
++
++	return uid_eq(cred->euid, tcred->suid) ||
++	       uid_eq(cred->euid, tcred->uid) ||
++	       uid_eq(cred->uid, tcred->suid) ||
++	       uid_eq(cred->uid, tcred->uid) ||
++	       ns_capable(tcred->user_ns, CAP_KILL);
++}
++
++/*
++ * Bad permissions for sending the signal
++ * - the caller must hold the RCU read lock
++ */
++static int check_kill_permission(int sig, struct siginfo *info,
++				 struct task_struct *t)
++{
++	struct pid *sid;
++	int error;
++
++	if (!valid_signal(sig))
++		return -EINVAL;
++
++	if (!si_fromuser(info))
++		return 0;
++
++	error = audit_signal_info(sig, t); /* Let audit system see the signal */
++	if (error)
++		return error;
++
++	if (!same_thread_group(current, t) &&
++	    !kill_ok_by_cred(t)) {
++		switch (sig) {
++		case SIGCONT:
++			sid = task_session(t);
++			/*
++			 * We don't return the error if sid == NULL. The
++			 * task was unhashed, the caller must notice this.
++			 */
++			if (!sid || sid == task_session(current))
++				break;
++		default:
++			return -EPERM;
++		}
++	}
++
++	return security_task_kill(t, info, sig, NULL);
++}
++
++/**
++ * ptrace_trap_notify - schedule trap to notify ptracer
++ * @t: tracee wanting to notify tracer
++ *
++ * This function schedules sticky ptrace trap which is cleared on the next
++ * TRAP_STOP to notify ptracer of an event.  @t must have been seized by
++ * ptracer.
++ *
++ * If @t is running, STOP trap will be taken.  If trapped for STOP and
++ * ptracer is listening for events, tracee is woken up so that it can
++ * re-trap for the new event.  If trapped otherwise, STOP trap will be
++ * eventually taken without returning to userland after the existing traps
++ * are finished by PTRACE_CONT.
++ *
++ * CONTEXT:
++ * Must be called with @task->sighand->siglock held.
++ */
++static void ptrace_trap_notify(struct task_struct *t)
++{
++	WARN_ON_ONCE(!(t->ptrace & PT_SEIZED));
++	assert_spin_locked(&t->sighand->siglock);
++
++	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
++	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
++}
++
++/*
++ * Handle magic process-wide effects of stop/continue signals. Unlike
++ * the signal actions, these happen immediately at signal-generation
++ * time regardless of blocking, ignoring, or handling.  This does the
++ * actual continuing for SIGCONT, but not the actual stopping for stop
++ * signals. The process stop is done as a signal action for SIG_DFL.
++ *
++ * Returns true if the signal should be actually delivered, otherwise
++ * it should be dropped.
++ */
++static bool prepare_signal(int sig, struct task_struct *p, bool force)
++{
++	struct signal_struct *signal = p->signal;
++	struct task_struct *t;
++	sigset_t flush;
++
++	if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
++		if (!(signal->flags & SIGNAL_GROUP_EXIT))
++			return sig == SIGKILL;
++		/*
++		 * The process is in the middle of dying, nothing to do.
++		 */
++	} else if (sig_kernel_stop(sig)) {
++		/*
++		 * This is a stop signal.  Remove SIGCONT from all queues.
++		 */
++		siginitset(&flush, sigmask(SIGCONT));
++		flush_sigqueue_mask(&flush, &signal->shared_pending);
++		for_each_thread(p, t)
++			flush_sigqueue_mask(&flush, &t->pending);
++	} else if (sig == SIGCONT) {
++		unsigned int why;
++		/*
++		 * Remove all stop signals from all queues, wake all threads.
++		 */
++		siginitset(&flush, SIG_KERNEL_STOP_MASK);
++		flush_sigqueue_mask(&flush, &signal->shared_pending);
++		for_each_thread(p, t) {
++			flush_sigqueue_mask(&flush, &t->pending);
++			task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
++			if (likely(!(t->ptrace & PT_SEIZED)))
++				wake_up_state(t, __TASK_STOPPED);
++			else
++				ptrace_trap_notify(t);
++		}
++
++		/*
++		 * Notify the parent with CLD_CONTINUED if we were stopped.
++		 *
++		 * If we were in the middle of a group stop, we pretend it
++		 * was already finished, and then continued. Since SIGCHLD
++		 * doesn't queue we report only CLD_STOPPED, as if the next
++		 * CLD_CONTINUED was dropped.
++		 */
++		why = 0;
++		if (signal->flags & SIGNAL_STOP_STOPPED)
++			why |= SIGNAL_CLD_CONTINUED;
++		else if (signal->group_stop_count)
++			why |= SIGNAL_CLD_STOPPED;
++
++		if (why) {
++			/*
++			 * The first thread which returns from do_signal_stop()
++			 * will take ->siglock, notice SIGNAL_CLD_MASK, and
++			 * notify its parent. See get_signal_to_deliver().
++			 */
++			signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
++			signal->group_stop_count = 0;
++			signal->group_exit_code = 0;
++		}
++	}
++
++	return !sig_ignored(p, sig, force);
++}
++
++/*
++ * Test if P wants to take SIG.  After we've checked all threads with this,
++ * it's equivalent to finding no threads not blocking SIG.  Any threads not
++ * blocking SIG were ruled out because they are not running and already
++ * have pending signals.  Such threads will dequeue from the shared queue
++ * as soon as they're available, so putting the signal on the shared queue
++ * will be equivalent to sending it to one such thread.
++ */
++static inline bool wants_signal(int sig, struct task_struct *p)
++{
++	if (sigismember(&p->blocked, sig))
++		return false;
++
++	if (p->flags & PF_EXITING)
++		return false;
++
++	if (sig == SIGKILL)
++		return true;
++
++	if (task_is_stopped_or_traced(p))
++		return false;
++
++	return task_curr(p) || !signal_pending(p);
++}
++
++static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
++{
++	struct signal_struct *signal = p->signal;
++	struct task_struct *t;
++
++	/*
++	 * Now find a thread we can wake up to take the signal off the queue.
++	 *
++	 * If the main thread wants the signal, it gets first crack.
++	 * Probably the least surprising to the average bear.
++	 */
++	if (wants_signal(sig, p))
++		t = p;
++	else if ((type == PIDTYPE_PID) || thread_group_empty(p))
++		/*
++		 * There is just one thread and it does not need to be woken.
++		 * It will dequeue unblocked signals before it runs again.
++		 */
++		return;
++	else {
++		/*
++		 * Otherwise try to find a suitable thread.
++		 */
++		t = signal->curr_target;
++		while (!wants_signal(sig, t)) {
++			t = next_thread(t);
++			if (t == signal->curr_target)
++				/*
++				 * No thread needs to be woken.
++				 * Any eligible threads will see
++				 * the signal in the queue soon.
++				 */
++				return;
++		}
++		signal->curr_target = t;
++	}
++
++	/*
++	 * Found a killable thread.  If the signal will be fatal,
++	 * then start taking the whole group down immediately.
++	 */
++	if (sig_fatal(p, sig) &&
++	    !(signal->flags & SIGNAL_GROUP_EXIT) &&
++	    !sigismember(&t->real_blocked, sig) &&
++	    (sig == SIGKILL || !p->ptrace)) {
++		/*
++		 * This signal will be fatal to the whole group.
++		 */
++		if (!sig_kernel_coredump(sig)) {
++			/*
++			 * Start a group exit and wake everybody up.
++			 * This way we don't have other threads
++			 * running and doing things after a slower
++			 * thread has the fatal signal pending.
++			 */
++			signal->flags = SIGNAL_GROUP_EXIT;
++			signal->group_exit_code = sig;
++			signal->group_stop_count = 0;
++			t = p;
++			do {
++				task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
++				sigaddset(&t->pending.signal, SIGKILL);
++				signal_wake_up(t, 1);
++			} while_each_thread(p, t);
++			return;
++		}
++	}
++
++	/*
++	 * The signal is already in the shared-pending queue.
++	 * Tell the chosen thread to wake up and dequeue it.
++	 */
++	signal_wake_up(t, sig == SIGKILL);
++	return;
++}
++
++static inline bool legacy_queue(struct sigpending *signals, int sig)
++{
++	return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
++}
++
++#ifdef CONFIG_USER_NS
++static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t)
++{
++	if (current_user_ns() == task_cred_xxx(t, user_ns))
++		return;
++
++	if (SI_FROMKERNEL(info))
++		return;
++
++	rcu_read_lock();
++	info->si_uid = from_kuid_munged(task_cred_xxx(t, user_ns),
++					make_kuid(current_user_ns(), info->si_uid));
++	rcu_read_unlock();
++}
++#else
++static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t)
++{
++	return;
++}
++#endif
++
++static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
++			enum pid_type type, int from_ancestor_ns)
++{
++	struct sigpending *pending;
++	struct sigqueue *q;
++	int override_rlimit;
++	int ret = 0, result;
++
++	assert_spin_locked(&t->sighand->siglock);
++
++	result = TRACE_SIGNAL_IGNORED;
++	if (!prepare_signal(sig, t,
++			from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED)))
++		goto ret;
++
++	pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
++	/*
++	 * Short-circuit ignored signals and support queuing
++	 * exactly one non-rt signal, so that we can get more
++	 * detailed information about the cause of the signal.
++	 */
++	result = TRACE_SIGNAL_ALREADY_PENDING;
++	if (legacy_queue(pending, sig))
++		goto ret;
++
++	result = TRACE_SIGNAL_DELIVERED;
++	/*
++	 * fast-pathed signals for kernel-internal things like SIGSTOP
++	 * or SIGKILL.
++	 */
++	if (info == SEND_SIG_FORCED)
++		goto out_set;
++
++	/*
++	 * Real-time signals must be queued if sent by sigqueue, or
++	 * some other real-time mechanism.  It is implementation
++	 * defined whether kill() does so.  We attempt to do so, on
++	 * the principle of least surprise, but since kill is not
++	 * allowed to fail with EAGAIN when low on memory we just
++	 * make sure at least one signal gets delivered and don't
++	 * pass on the info struct.
++	 */
++	if (sig < SIGRTMIN)
++		override_rlimit = (is_si_special(info) || info->si_code >= 0);
++	else
++		override_rlimit = 0;
++
++	q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit);
++	if (q) {
++		list_add_tail(&q->list, &pending->list);
++		switch ((unsigned long) info) {
++		case (unsigned long) SEND_SIG_NOINFO:
++			clear_siginfo(&q->info);
++			q->info.si_signo = sig;
++			q->info.si_errno = 0;
++			q->info.si_code = SI_USER;
++			q->info.si_pid = task_tgid_nr_ns(current,
++							task_active_pid_ns(t));
++			q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
++			break;
++		case (unsigned long) SEND_SIG_PRIV:
++			clear_siginfo(&q->info);
++			q->info.si_signo = sig;
++			q->info.si_errno = 0;
++			q->info.si_code = SI_KERNEL;
++			q->info.si_pid = 0;
++			q->info.si_uid = 0;
++			break;
++		default:
++			copy_siginfo(&q->info, info);
++			if (from_ancestor_ns)
++				q->info.si_pid = 0;
++			break;
++		}
++
++		userns_fixup_signal_uid(&q->info, t);
++
++	} else if (!is_si_special(info)) {
++		if (sig >= SIGRTMIN && info->si_code != SI_USER) {
++			/*
++			 * Queue overflow, abort.  We may abort if the
++			 * signal was rt and sent by user using something
++			 * other than kill().
++			 */
++			result = TRACE_SIGNAL_OVERFLOW_FAIL;
++			ret = -EAGAIN;
++			goto ret;
++		} else {
++			/*
++			 * This is a silent loss of information.  We still
++			 * send the signal, but the *info bits are lost.
++			 */
++			result = TRACE_SIGNAL_LOSE_INFO;
++		}
++	}
++
++out_set:
++	signalfd_notify(t, sig);
++	sigaddset(&pending->signal, sig);
++
++	/* Let multiprocess signals appear after on-going forks */
++	if (type > PIDTYPE_TGID) {
++		struct multiprocess_signals *delayed;
++		hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
++			sigset_t *signal = &delayed->signal;
++			/* Can't queue both a stop and a continue signal */
++			if (sig == SIGCONT)
++				sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
++			else if (sig_kernel_stop(sig))
++				sigdelset(signal, SIGCONT);
++			sigaddset(signal, sig);
++		}
++	}
++
++	complete_signal(sig, t, type);
++ret:
++	trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
++	return ret;
++}
++
++static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
++			enum pid_type type)
++{
++	int from_ancestor_ns = 0;
++
++#ifdef CONFIG_PID_NS
++	from_ancestor_ns = si_fromuser(info) &&
++			   !task_pid_nr_ns(current, task_active_pid_ns(t));
++#endif
++
++	return __send_signal(sig, info, t, type, from_ancestor_ns);
++}
++
++static void print_fatal_signal(int signr)
++{
++	struct pt_regs *regs = signal_pt_regs();
++	pr_info("potentially unexpected fatal signal %d.\n", signr);
++
++#if defined(__i386__) && !defined(__arch_um__)
++	pr_info("code at %08lx: ", regs->ip);
++	{
++		int i;
++		for (i = 0; i < 16; i++) {
++			unsigned char insn;
++
++			if (get_user(insn, (unsigned char *)(regs->ip + i)))
++				break;
++			pr_cont("%02x ", insn);
++		}
++	}
++	pr_cont("\n");
++#endif
++	preempt_disable();
++	show_regs(regs);
++	preempt_enable();
++}
++
++static int __init setup_print_fatal_signals(char *str)
++{
++	get_option (&str, &print_fatal_signals);
++
++	return 1;
++}
++
++__setup("print-fatal-signals=", setup_print_fatal_signals);
++
++int
++__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
++{
++	return send_signal(sig, info, p, PIDTYPE_TGID);
++}
++
++static int
++specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
++{
++	return send_signal(sig, info, t, PIDTYPE_PID);
++}
++
++int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
++			enum pid_type type)
++{
++	unsigned long flags;
++	int ret = -ESRCH;
++
++	if (lock_task_sighand(p, &flags)) {
++		ret = send_signal(sig, info, p, type);
++		unlock_task_sighand(p, &flags);
++	}
++
++	return ret;
++}
++
++/*
++ * Force a signal that the process can't ignore: if necessary
++ * we unblock the signal and change any SIG_IGN to SIG_DFL.
++ *
++ * Note: If we unblock the signal, we always reset it to SIG_DFL,
++ * since we do not want to have a signal handler that was blocked
++ * be invoked when user space had explicitly blocked it.
++ *
++ * We don't want to have recursive SIGSEGV's etc, for example,
++ * that is why we also clear SIGNAL_UNKILLABLE.
++ */
++int
++force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
++{
++	unsigned long int flags;
++	int ret, blocked, ignored;
++	struct k_sigaction *action;
++
++	spin_lock_irqsave(&t->sighand->siglock, flags);
++	action = &t->sighand->action[sig-1];
++	ignored = action->sa.sa_handler == SIG_IGN;
++	blocked = sigismember(&t->blocked, sig);
++	if (blocked || ignored) {
++		action->sa.sa_handler = SIG_DFL;
++		if (blocked) {
++			sigdelset(&t->blocked, sig);
++			recalc_sigpending_and_wake(t);
++		}
++	}
++	/*
++	 * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect
++	 * debugging to leave init killable.
++	 */
++	if (action->sa.sa_handler == SIG_DFL && !t->ptrace)
++		t->signal->flags &= ~SIGNAL_UNKILLABLE;
++	ret = specific_send_sig_info(sig, info, t);
++	spin_unlock_irqrestore(&t->sighand->siglock, flags);
++
++	return ret;
++}
++
++/*
++ * Nuke all other threads in the group.
++ */
++int zap_other_threads(struct task_struct *p)
++{
++	struct task_struct *t = p;
++	int count = 0;
++
++	p->signal->group_stop_count = 0;
++
++	while_each_thread(p, t) {
++		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
++		count++;
++
++		/* Don't bother with already dead threads */
++		if (t->exit_state)
++			continue;
++		sigaddset(&t->pending.signal, SIGKILL);
++		signal_wake_up(t, 1);
++	}
++
++	return count;
++}
++
++struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
++					   unsigned long *flags)
++{
++	struct sighand_struct *sighand;
++
++	rcu_read_lock();
++	for (;;) {
++		sighand = rcu_dereference(tsk->sighand);
++		if (unlikely(sighand == NULL))
++			break;
++
++		/*
++		 * This sighand can be already freed and even reused, but
++		 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which
++		 * initializes ->siglock: this slab can't go away, it has
++		 * the same object type, ->siglock can't be reinitialized.
++		 *
++		 * We need to ensure that tsk->sighand is still the same
++		 * after we take the lock, we can race with de_thread() or
++		 * __exit_signal(). In the latter case the next iteration
++		 * must see ->sighand == NULL.
++		 */
++		spin_lock_irqsave(&sighand->siglock, *flags);
++		if (likely(sighand == tsk->sighand))
++			break;
++		spin_unlock_irqrestore(&sighand->siglock, *flags);
++	}
++	rcu_read_unlock();
++
++	return sighand;
++}
++
++/*
++ * send signal info to all the members of a group
++ */
++int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
++			enum pid_type type)
++{
++	int ret;
++
++	rcu_read_lock();
++	ret = check_kill_permission(sig, info, p);
++	rcu_read_unlock();
++
++	if (!ret && sig)
++		ret = do_send_sig_info(sig, info, p, type);
++
++	return ret;
++}
++
++/*
++ * __kill_pgrp_info() sends a signal to a process group: this is what the tty
++ * control characters do (^C, ^Z etc)
++ * - the caller must hold at least a readlock on tasklist_lock
++ */
++int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
++{
++	struct task_struct *p = NULL;
++	int retval, success;
++
++	success = 0;
++	retval = -ESRCH;
++	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
++		int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
++		success |= !err;
++		retval = err;
++	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
++	return success ? 0 : retval;
++}
++
++int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
++{
++	int error = -ESRCH;
++	struct task_struct *p;
++
++	for (;;) {
++		rcu_read_lock();
++		p = pid_task(pid, PIDTYPE_PID);
++		if (p)
++			error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
++		rcu_read_unlock();
++		if (likely(!p || error != -ESRCH))
++			return error;
++
++		/*
++		 * The task was unhashed in between, try again.  If it
++		 * is dead, pid_task() will return NULL, if we race with
++		 * de_thread() it will find the new leader.
++		 */
++	}
++}
++
++static int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
++{
++	int error;
++	rcu_read_lock();
++	error = kill_pid_info(sig, info, find_vpid(pid));
++	rcu_read_unlock();
++	return error;
++}
++
++static inline bool kill_as_cred_perm(const struct cred *cred,
++				     struct task_struct *target)
++{
++	const struct cred *pcred = __task_cred(target);
++
++	return uid_eq(cred->euid, pcred->suid) ||
++	       uid_eq(cred->euid, pcred->uid) ||
++	       uid_eq(cred->uid, pcred->suid) ||
++	       uid_eq(cred->uid, pcred->uid);
++}
++
++/* like kill_pid_info(), but doesn't use uid/euid of "current" */
++int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
++			 const struct cred *cred)
++{
++	int ret = -EINVAL;
++	struct task_struct *p;
++	unsigned long flags;
++
++	if (!valid_signal(sig))
++		return ret;
++
++	rcu_read_lock();
++	p = pid_task(pid, PIDTYPE_PID);
++	if (!p) {
++		ret = -ESRCH;
++		goto out_unlock;
++	}
++	if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) {
++		ret = -EPERM;
++		goto out_unlock;
++	}
++	ret = security_task_kill(p, info, sig, cred);
++	if (ret)
++		goto out_unlock;
++
++	if (sig) {
++		if (lock_task_sighand(p, &flags)) {
++			ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
++			unlock_task_sighand(p, &flags);
++		} else
++			ret = -ESRCH;
++	}
++out_unlock:
++	rcu_read_unlock();
++	return ret;
++}
++EXPORT_SYMBOL_GPL(kill_pid_info_as_cred);
++
++/*
++ * kill_something_info() interprets pid in interesting ways just like kill(2).
++ *
++ * POSIX specifies that kill(-1,sig) is unspecified, but what we have
++ * is probably wrong.  Should make it like BSD or SYSV.
++ */
++
++static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
++{
++	int ret;
++
++	if (pid > 0) {
++		rcu_read_lock();
++		ret = kill_pid_info(sig, info, find_vpid(pid));
++		rcu_read_unlock();
++		return ret;
++	}
++
++	/* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
++	if (pid == INT_MIN)
++		return -ESRCH;
++
++	read_lock(&tasklist_lock);
++	if (pid != -1) {
++		ret = __kill_pgrp_info(sig, info,
++				pid ? find_vpid(-pid) : task_pgrp(current));
++	} else {
++		int retval = 0, count = 0;
++		struct task_struct * p;
++
++		for_each_process(p) {
++			if (task_pid_vnr(p) > 1 &&
++					!same_thread_group(p, current)) {
++				int err = group_send_sig_info(sig, info, p,
++							      PIDTYPE_MAX);
++				++count;
++				if (err != -EPERM)
++					retval = err;
++			}
++		}
++		ret = count ? retval : -ESRCH;
++	}
++	read_unlock(&tasklist_lock);
++
++	return ret;
++}
++
++/*
++ * These are for backward compatibility with the rest of the kernel source.
++ */
++
++int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
++{
++	/*
++	 * Make sure legacy kernel users don't send in bad values
++	 * (normal paths check this in check_kill_permission).
++	 */
++	if (!valid_signal(sig))
++		return -EINVAL;
++
++	return do_send_sig_info(sig, info, p, PIDTYPE_PID);
++}
++
++#define __si_special(priv) \
++	((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
++
++int
++send_sig(int sig, struct task_struct *p, int priv)
++{
++	return send_sig_info(sig, __si_special(priv), p);
++}
++
++void force_sig(int sig, struct task_struct *p)
++{
++	force_sig_info(sig, SEND_SIG_PRIV, p);
++}
++
++/*
++ * When things go south during signal handling, we
++ * will force a SIGSEGV. And if the signal that caused
++ * the problem was already a SIGSEGV, we'll want to
++ * make sure we don't even try to deliver the signal..
++ */
++void force_sigsegv(int sig, struct task_struct *p)
++{
++	if (sig == SIGSEGV) {
++		unsigned long flags;
++		spin_lock_irqsave(&p->sighand->siglock, flags);
++		p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
++		spin_unlock_irqrestore(&p->sighand->siglock, flags);
++	}
++	force_sig(SIGSEGV, p);
++}
++
++int force_sig_fault(int sig, int code, void __user *addr
++	___ARCH_SI_TRAPNO(int trapno)
++	___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
++	, struct task_struct *t)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = sig;
++	info.si_errno = 0;
++	info.si_code  = code;
++	info.si_addr  = addr;
++#ifdef __ARCH_SI_TRAPNO
++	info.si_trapno = trapno;
++#endif
++#ifdef __ia64__
++	info.si_imm = imm;
++	info.si_flags = flags;
++	info.si_isr = isr;
++#endif
++	return force_sig_info(info.si_signo, &info, t);
++}
++
++int send_sig_fault(int sig, int code, void __user *addr
++	___ARCH_SI_TRAPNO(int trapno)
++	___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
++	, struct task_struct *t)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = sig;
++	info.si_errno = 0;
++	info.si_code  = code;
++	info.si_addr  = addr;
++#ifdef __ARCH_SI_TRAPNO
++	info.si_trapno = trapno;
++#endif
++#ifdef __ia64__
++	info.si_imm = imm;
++	info.si_flags = flags;
++	info.si_isr = isr;
++#endif
++	return send_sig_info(info.si_signo, &info, t);
++}
++
++int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
++{
++	struct siginfo info;
++
++	WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
++	clear_siginfo(&info);
++	info.si_signo = SIGBUS;
++	info.si_errno = 0;
++	info.si_code = code;
++	info.si_addr = addr;
++	info.si_addr_lsb = lsb;
++	return force_sig_info(info.si_signo, &info, t);
++}
++
++int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
++{
++	struct siginfo info;
++
++	WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
++	clear_siginfo(&info);
++	info.si_signo = SIGBUS;
++	info.si_errno = 0;
++	info.si_code = code;
++	info.si_addr = addr;
++	info.si_addr_lsb = lsb;
++	return send_sig_info(info.si_signo, &info, t);
++}
++EXPORT_SYMBOL(send_sig_mceerr);
++
++int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = SIGSEGV;
++	info.si_errno = 0;
++	info.si_code  = SEGV_BNDERR;
++	info.si_addr  = addr;
++	info.si_lower = lower;
++	info.si_upper = upper;
++	return force_sig_info(info.si_signo, &info, current);
++}
++
++#ifdef SEGV_PKUERR
++int force_sig_pkuerr(void __user *addr, u32 pkey)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = SIGSEGV;
++	info.si_errno = 0;
++	info.si_code  = SEGV_PKUERR;
++	info.si_addr  = addr;
++	info.si_pkey  = pkey;
++	return force_sig_info(info.si_signo, &info, current);
++}
++#endif
++
++/* For the crazy architectures that include trap information in
++ * the errno field, instead of an actual errno value.
++ */
++int force_sig_ptrace_errno_trap(int errno, void __user *addr)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = SIGTRAP;
++	info.si_errno = errno;
++	info.si_code  = TRAP_HWBKPT;
++	info.si_addr  = addr;
++	return force_sig_info(info.si_signo, &info, current);
++}
++
++int kill_pgrp(struct pid *pid, int sig, int priv)
++{
++	int ret;
++
++	read_lock(&tasklist_lock);
++	ret = __kill_pgrp_info(sig, __si_special(priv), pid);
++	read_unlock(&tasklist_lock);
++
++	return ret;
++}
++EXPORT_SYMBOL(kill_pgrp);
++
++int kill_pid(struct pid *pid, int sig, int priv)
++{
++	return kill_pid_info(sig, __si_special(priv), pid);
++}
++EXPORT_SYMBOL(kill_pid);
++
++/*
++ * These functions support sending signals using preallocated sigqueue
++ * structures.  This is needed "because realtime applications cannot
++ * afford to lose notifications of asynchronous events, like timer
++ * expirations or I/O completions".  In the case of POSIX Timers
++ * we allocate the sigqueue structure from the timer_create.  If this
++ * allocation fails we are able to report the failure to the application
++ * with an EAGAIN error.
++ */
++struct sigqueue *sigqueue_alloc(void)
++{
++	struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
++
++	if (q)
++		q->flags |= SIGQUEUE_PREALLOC;
++
++	return q;
++}
++
++void sigqueue_free(struct sigqueue *q)
++{
++	unsigned long flags;
++	spinlock_t *lock = &current->sighand->siglock;
++
++	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
++	/*
++	 * We must hold ->siglock while testing q->list
++	 * to serialize with collect_signal() or with
++	 * __exit_signal()->flush_sigqueue().
++	 */
++	spin_lock_irqsave(lock, flags);
++	q->flags &= ~SIGQUEUE_PREALLOC;
++	/*
++	 * If it is queued it will be freed when dequeued,
++	 * like the "regular" sigqueue.
++	 */
++	if (!list_empty(&q->list))
++		q = NULL;
++	spin_unlock_irqrestore(lock, flags);
++
++	if (q)
++		__sigqueue_free(q);
++}
++
++int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
++{
++	int sig = q->info.si_signo;
++	struct sigpending *pending;
++	struct task_struct *t;
++	unsigned long flags;
++	int ret, result;
++
++	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
++
++	ret = -1;
++	rcu_read_lock();
++	t = pid_task(pid, type);
++	if (!t || !likely(lock_task_sighand(t, &flags)))
++		goto ret;
++
++	ret = 1; /* the signal is ignored */
++	result = TRACE_SIGNAL_IGNORED;
++	if (!prepare_signal(sig, t, false))
++		goto out;
++
++	ret = 0;
++	if (unlikely(!list_empty(&q->list))) {
++		/*
++		 * If an SI_TIMER entry is already queue just increment
++		 * the overrun count.
++		 */
++		BUG_ON(q->info.si_code != SI_TIMER);
++		q->info.si_overrun++;
++		result = TRACE_SIGNAL_ALREADY_PENDING;
++		goto out;
++	}
++	q->info.si_overrun = 0;
++
++	signalfd_notify(t, sig);
++	pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
++	list_add_tail(&q->list, &pending->list);
++	sigaddset(&pending->signal, sig);
++	complete_signal(sig, t, type);
++	result = TRACE_SIGNAL_DELIVERED;
++out:
++	trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
++	unlock_task_sighand(t, &flags);
++ret:
++	rcu_read_unlock();
++	return ret;
++}
++
++/*
++ * Let a parent know about the death of a child.
++ * For a stopped/continued status change, use do_notify_parent_cldstop instead.
++ *
++ * Returns true if our parent ignored us and so we've switched to
++ * self-reaping.
++ */
++bool do_notify_parent(struct task_struct *tsk, int sig)
++{
++	struct siginfo info;
++	unsigned long flags;
++	struct sighand_struct *psig;
++	bool autoreap = false;
++	u64 utime, stime;
++
++	BUG_ON(sig == -1);
++
++ 	/* do_notify_parent_cldstop should have been called instead.  */
++ 	BUG_ON(task_is_stopped_or_traced(tsk));
++
++	BUG_ON(!tsk->ptrace &&
++	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
++
++	if (sig != SIGCHLD) {
++		/*
++		 * This is only possible if parent == real_parent.
++		 * Check if it has changed security domain.
++		 */
++		if (tsk->parent_exec_id_u64 != READ_ONCE(tsk->parent->self_exec_id_u64))
++			sig = SIGCHLD;
++	}
++
++	clear_siginfo(&info);
++	info.si_signo = sig;
++	info.si_errno = 0;
++	/*
++	 * We are under tasklist_lock here so our parent is tied to
++	 * us and cannot change.
++	 *
++	 * task_active_pid_ns will always return the same pid namespace
++	 * until a task passes through release_task.
++	 *
++	 * write_lock() currently calls preempt_disable() which is the
++	 * same as rcu_read_lock(), but according to Oleg, this is not
++	 * correct to rely on this
++	 */
++	rcu_read_lock();
++	info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
++	info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
++				       task_uid(tsk));
++	rcu_read_unlock();
++
++	task_cputime(tsk, &utime, &stime);
++	info.si_utime = nsec_to_clock_t(utime + tsk->signal->utime);
++	info.si_stime = nsec_to_clock_t(stime + tsk->signal->stime);
++
++	info.si_status = tsk->exit_code & 0x7f;
++	if (tsk->exit_code & 0x80)
++		info.si_code = CLD_DUMPED;
++	else if (tsk->exit_code & 0x7f)
++		info.si_code = CLD_KILLED;
++	else {
++		info.si_code = CLD_EXITED;
++		info.si_status = tsk->exit_code >> 8;
++	}
++
++	psig = tsk->parent->sighand;
++	spin_lock_irqsave(&psig->siglock, flags);
++	if (!tsk->ptrace && sig == SIGCHLD &&
++	    (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
++	     (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) {
++		/*
++		 * We are exiting and our parent doesn't care.  POSIX.1
++		 * defines special semantics for setting SIGCHLD to SIG_IGN
++		 * or setting the SA_NOCLDWAIT flag: we should be reaped
++		 * automatically and not left for our parent's wait4 call.
++		 * Rather than having the parent do it as a magic kind of
++		 * signal handler, we just set this to tell do_exit that we
++		 * can be cleaned up without becoming a zombie.  Note that
++		 * we still call __wake_up_parent in this case, because a
++		 * blocked sys_wait4 might now return -ECHILD.
++		 *
++		 * Whether we send SIGCHLD or not for SA_NOCLDWAIT
++		 * is implementation-defined: we do (if you don't want
++		 * it, just use SIG_IGN instead).
++		 */
++		autoreap = true;
++		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
++			sig = 0;
++	}
++	if (valid_signal(sig) && sig)
++		__group_send_sig_info(sig, &info, tsk->parent);
++	__wake_up_parent(tsk, tsk->parent);
++	spin_unlock_irqrestore(&psig->siglock, flags);
++
++	return autoreap;
++}
++
++/**
++ * do_notify_parent_cldstop - notify parent of stopped/continued state change
++ * @tsk: task reporting the state change
++ * @for_ptracer: the notification is for ptracer
++ * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
++ *
++ * Notify @tsk's parent that the stopped/continued state has changed.  If
++ * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
++ * If %true, @tsk reports to @tsk->parent which should be the ptracer.
++ *
++ * CONTEXT:
++ * Must be called with tasklist_lock at least read locked.
++ */
++static void do_notify_parent_cldstop(struct task_struct *tsk,
++				     bool for_ptracer, int why)
++{
++	struct siginfo info;
++	unsigned long flags;
++	struct task_struct *parent;
++	struct sighand_struct *sighand;
++	u64 utime, stime;
++
++	if (for_ptracer) {
++		parent = tsk->parent;
++	} else {
++		tsk = tsk->group_leader;
++		parent = tsk->real_parent;
++	}
++
++	clear_siginfo(&info);
++	info.si_signo = SIGCHLD;
++	info.si_errno = 0;
++	/*
++	 * see comment in do_notify_parent() about the following 4 lines
++	 */
++	rcu_read_lock();
++	info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
++	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
++	rcu_read_unlock();
++
++	task_cputime(tsk, &utime, &stime);
++	info.si_utime = nsec_to_clock_t(utime);
++	info.si_stime = nsec_to_clock_t(stime);
++
++ 	info.si_code = why;
++ 	switch (why) {
++ 	case CLD_CONTINUED:
++ 		info.si_status = SIGCONT;
++ 		break;
++ 	case CLD_STOPPED:
++ 		info.si_status = tsk->signal->group_exit_code & 0x7f;
++ 		break;
++ 	case CLD_TRAPPED:
++ 		info.si_status = tsk->exit_code & 0x7f;
++ 		break;
++ 	default:
++ 		BUG();
++ 	}
++
++	sighand = parent->sighand;
++	spin_lock_irqsave(&sighand->siglock, flags);
++	if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
++	    !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
++		__group_send_sig_info(SIGCHLD, &info, parent);
++	/*
++	 * Even if SIGCHLD is not generated, we must wake up wait4 calls.
++	 */
++	__wake_up_parent(tsk, parent);
++	spin_unlock_irqrestore(&sighand->siglock, flags);
++}
++
++static inline bool may_ptrace_stop(void)
++{
++	if (!likely(current->ptrace))
++		return false;
++	/*
++	 * Are we in the middle of do_coredump?
++	 * If so and our tracer is also part of the coredump stopping
++	 * is a deadlock situation, and pointless because our tracer
++	 * is dead so don't allow us to stop.
++	 * If SIGKILL was already sent before the caller unlocked
++	 * ->siglock we must see ->core_state != NULL. Otherwise it
++	 * is safe to enter schedule().
++	 *
++	 * This is almost outdated, a task with the pending SIGKILL can't
++	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
++	 * after SIGKILL was already dequeued.
++	 */
++	if (unlikely(current->mm->core_state) &&
++	    unlikely(current->mm == current->parent->mm))
++		return false;
++
++	return true;
++}
++
++/*
++ * Return non-zero if there is a SIGKILL that should be waking us up.
++ * Called with the siglock held.
++ */
++static bool sigkill_pending(struct task_struct *tsk)
++{
++	return sigismember(&tsk->pending.signal, SIGKILL) ||
++	       sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
++}
++
++/*
++ * This must be called with current->sighand->siglock held.
++ *
++ * This should be the path for all ptrace stops.
++ * We always set current->last_siginfo while stopped here.
++ * That makes it a way to test a stopped process for
++ * being ptrace-stopped vs being job-control-stopped.
++ *
++ * If we actually decide not to stop at all because the tracer
++ * is gone, we keep current->exit_code unless clear_code.
++ */
++static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
++	__releases(&current->sighand->siglock)
++	__acquires(&current->sighand->siglock)
++{
++	bool gstop_done = false;
++
++	if (arch_ptrace_stop_needed(exit_code, info)) {
++		/*
++		 * The arch code has something special to do before a
++		 * ptrace stop.  This is allowed to block, e.g. for faults
++		 * on user stack pages.  We can't keep the siglock while
++		 * calling arch_ptrace_stop, so we must release it now.
++		 * To preserve proper semantics, we must do this before
++		 * any signal bookkeeping like checking group_stop_count.
++		 * Meanwhile, a SIGKILL could come in before we retake the
++		 * siglock.  That must prevent us from sleeping in TASK_TRACED.
++		 * So after regaining the lock, we must check for SIGKILL.
++		 */
++		spin_unlock_irq(&current->sighand->siglock);
++		arch_ptrace_stop(exit_code, info);
++		spin_lock_irq(&current->sighand->siglock);
++		if (sigkill_pending(current))
++			return;
++	}
++
++	set_special_state(TASK_TRACED);
++
++	/*
++	 * We're committing to trapping.  TRACED should be visible before
++	 * TRAPPING is cleared; otherwise, the tracer might fail do_wait().
++	 * Also, transition to TRACED and updates to ->jobctl should be
++	 * atomic with respect to siglock and should be done after the arch
++	 * hook as siglock is released and regrabbed across it.
++	 *
++	 *     TRACER				    TRACEE
++	 *
++	 *     ptrace_attach()
++	 * [L]   wait_on_bit(JOBCTL_TRAPPING)	[S] set_special_state(TRACED)
++	 *     do_wait()
++	 *       set_current_state()                smp_wmb();
++	 *       ptrace_do_wait()
++	 *         wait_task_stopped()
++	 *           task_stopped_code()
++	 * [L]         task_is_traced()		[S] task_clear_jobctl_trapping();
++	 */
++	smp_wmb();
++
++	current->last_siginfo = info;
++	current->exit_code = exit_code;
++
++	/*
++	 * If @why is CLD_STOPPED, we're trapping to participate in a group
++	 * stop.  Do the bookkeeping.  Note that if SIGCONT was delievered
++	 * across siglock relocks since INTERRUPT was scheduled, PENDING
++	 * could be clear now.  We act as if SIGCONT is received after
++	 * TASK_TRACED is entered - ignore it.
++	 */
++	if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING))
++		gstop_done = task_participate_group_stop(current);
++
++	/* any trap clears pending STOP trap, STOP trap clears NOTIFY */
++	task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
++	if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
++		task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);
++
++	/* entering a trap, clear TRAPPING */
++	task_clear_jobctl_trapping(current);
++
++	spin_unlock_irq(&current->sighand->siglock);
++	read_lock(&tasklist_lock);
++	if (may_ptrace_stop()) {
++		/*
++		 * Notify parents of the stop.
++		 *
++		 * While ptraced, there are two parents - the ptracer and
++		 * the real_parent of the group_leader.  The ptracer should
++		 * know about every stop while the real parent is only
++		 * interested in the completion of group stop.  The states
++		 * for the two don't interact with each other.  Notify
++		 * separately unless they're gonna be duplicates.
++		 */
++		do_notify_parent_cldstop(current, true, why);
++		if (gstop_done && ptrace_reparented(current))
++			do_notify_parent_cldstop(current, false, why);
++
++		/*
++		 * Don't want to allow preemption here, because
++		 * sys_ptrace() needs this task to be inactive.
++		 *
++		 * XXX: implement read_unlock_no_resched().
++		 */
++		preempt_disable();
++		read_unlock(&tasklist_lock);
++		preempt_enable_no_resched();
++		freezable_schedule();
++	} else {
++		/*
++		 * By the time we got the lock, our tracer went away.
++		 * Don't drop the lock yet, another tracer may come.
++		 *
++		 * If @gstop_done, the ptracer went away between group stop
++		 * completion and here.  During detach, it would have set
++		 * JOBCTL_STOP_PENDING on us and we'll re-enter
++		 * TASK_STOPPED in do_signal_stop() on return, so notifying
++		 * the real parent of the group stop completion is enough.
++		 */
++		if (gstop_done)
++			do_notify_parent_cldstop(current, false, why);
++
++		/* tasklist protects us from ptrace_freeze_traced() */
++		__set_current_state(TASK_RUNNING);
++		if (clear_code)
++			current->exit_code = 0;
++		read_unlock(&tasklist_lock);
++	}
++
++	/*
++	 * We are back.  Now reacquire the siglock before touching
++	 * last_siginfo, so that we are sure to have synchronized with
++	 * any signal-sending on another CPU that wants to examine it.
++	 */
++	spin_lock_irq(&current->sighand->siglock);
++	current->last_siginfo = NULL;
++
++	/* LISTENING can be set only during STOP traps, clear it */
++	current->jobctl &= ~JOBCTL_LISTENING;
++
++	/*
++	 * Queued signals ignored us while we were stopped for tracing.
++	 * So check for any that we should take before resuming user mode.
++	 * This sets TIF_SIGPENDING, but never clears it.
++	 */
++	recalc_sigpending_tsk(current);
++}
++
++static void ptrace_do_notify(int signr, int exit_code, int why)
++{
++	siginfo_t info;
++
++	clear_siginfo(&info);
++	info.si_signo = signr;
++	info.si_code = exit_code;
++	info.si_pid = task_pid_vnr(current);
++	info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
++
++	/* Let the debugger run.  */
++	ptrace_stop(exit_code, why, 1, &info);
++}
++
++void ptrace_notify(int exit_code)
++{
++	BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP);
++	if (unlikely(current->task_works))
++		task_work_run();
++
++	spin_lock_irq(&current->sighand->siglock);
++	ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED);
++	spin_unlock_irq(&current->sighand->siglock);
++}
++
++/**
++ * do_signal_stop - handle group stop for SIGSTOP and other stop signals
++ * @signr: signr causing group stop if initiating
++ *
++ * If %JOBCTL_STOP_PENDING is not set yet, initiate group stop with @signr
++ * and participate in it.  If already set, participate in the existing
++ * group stop.  If participated in a group stop (and thus slept), %true is
++ * returned with siglock released.
++ *
++ * If ptraced, this function doesn't handle stop itself.  Instead,
++ * %JOBCTL_TRAP_STOP is scheduled and %false is returned with siglock
++ * untouched.  The caller must ensure that INTERRUPT trap handling takes
++ * places afterwards.
++ *
++ * CONTEXT:
++ * Must be called with @current->sighand->siglock held, which is released
++ * on %true return.
++ *
++ * RETURNS:
++ * %false if group stop is already cancelled or ptrace trap is scheduled.
++ * %true if participated in group stop.
++ */
++static bool do_signal_stop(int signr)
++	__releases(&current->sighand->siglock)
++{
++	struct signal_struct *sig = current->signal;
++
++	if (!(current->jobctl & JOBCTL_STOP_PENDING)) {
++		unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
++		struct task_struct *t;
++
++		/* signr will be recorded in task->jobctl for retries */
++		WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK);
++
++		if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) ||
++		    unlikely(signal_group_exit(sig)))
++			return false;
++		/*
++		 * There is no group stop already in progress.  We must
++		 * initiate one now.
++		 *
++		 * While ptraced, a task may be resumed while group stop is
++		 * still in effect and then receive a stop signal and
++		 * initiate another group stop.  This deviates from the
++		 * usual behavior as two consecutive stop signals can't
++		 * cause two group stops when !ptraced.  That is why we
++		 * also check !task_is_stopped(t) below.
++		 *
++		 * The condition can be distinguished by testing whether
++		 * SIGNAL_STOP_STOPPED is already set.  Don't generate
++		 * group_exit_code in such case.
++		 *
++		 * This is not necessary for SIGNAL_STOP_CONTINUED because
++		 * an intervening stop signal is required to cause two
++		 * continued events regardless of ptrace.
++		 */
++		if (!(sig->flags & SIGNAL_STOP_STOPPED))
++			sig->group_exit_code = signr;
++
++		sig->group_stop_count = 0;
++
++		if (task_set_jobctl_pending(current, signr | gstop))
++			sig->group_stop_count++;
++
++		t = current;
++		while_each_thread(current, t) {
++			/*
++			 * Setting state to TASK_STOPPED for a group
++			 * stop is always done with the siglock held,
++			 * so this check has no races.
++			 */
++			if (!task_is_stopped(t) &&
++			    task_set_jobctl_pending(t, signr | gstop)) {
++				sig->group_stop_count++;
++				if (likely(!(t->ptrace & PT_SEIZED)))
++					signal_wake_up(t, 0);
++				else
++					ptrace_trap_notify(t);
++			}
++		}
++	}
++
++	if (likely(!current->ptrace)) {
++		int notify = 0;
++
++		/*
++		 * If there are no other threads in the group, or if there
++		 * is a group stop in progress and we are the last to stop,
++		 * report to the parent.
++		 */
++		if (task_participate_group_stop(current))
++			notify = CLD_STOPPED;
++
++		set_special_state(TASK_STOPPED);
++		spin_unlock_irq(&current->sighand->siglock);
++
++		/*
++		 * Notify the parent of the group stop completion.  Because
++		 * we're not holding either the siglock or tasklist_lock
++		 * here, ptracer may attach inbetween; however, this is for
++		 * group stop and should always be delivered to the real
++		 * parent of the group leader.  The new ptracer will get
++		 * its notification when this task transitions into
++		 * TASK_TRACED.
++		 */
++		if (notify) {
++			read_lock(&tasklist_lock);
++			do_notify_parent_cldstop(current, false, notify);
++			read_unlock(&tasklist_lock);
++		}
++
++		/* Now we don't run again until woken by SIGCONT or SIGKILL */
++		freezable_schedule();
++		return true;
++	} else {
++		/*
++		 * While ptraced, group stop is handled by STOP trap.
++		 * Schedule it and let the caller deal with it.
++		 */
++		task_set_jobctl_pending(current, JOBCTL_TRAP_STOP);
++		return false;
++	}
++}
++
++/**
++ * do_jobctl_trap - take care of ptrace jobctl traps
++ *
++ * When PT_SEIZED, it's used for both group stop and explicit
++ * SEIZE/INTERRUPT traps.  Both generate PTRACE_EVENT_STOP trap with
++ * accompanying siginfo.  If stopped, lower eight bits of exit_code contain
++ * the stop signal; otherwise, %SIGTRAP.
++ *
++ * When !PT_SEIZED, it's used only for group stop trap with stop signal
++ * number as exit_code and no siginfo.
++ *
++ * CONTEXT:
++ * Must be called with @current->sighand->siglock held, which may be
++ * released and re-acquired before returning with intervening sleep.
++ */
++static void do_jobctl_trap(void)
++{
++	struct signal_struct *signal = current->signal;
++	int signr = current->jobctl & JOBCTL_STOP_SIGMASK;
++
++	if (current->ptrace & PT_SEIZED) {
++		if (!signal->group_stop_count &&
++		    !(signal->flags & SIGNAL_STOP_STOPPED))
++			signr = SIGTRAP;
++		WARN_ON_ONCE(!signr);
++		ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8),
++				 CLD_STOPPED);
++	} else {
++		WARN_ON_ONCE(!signr);
++		ptrace_stop(signr, CLD_STOPPED, 0, NULL);
++		current->exit_code = 0;
++	}
++}
++
++static int ptrace_signal(int signr, siginfo_t *info)
++{
++	/*
++	 * We do not check sig_kernel_stop(signr) but set this marker
++	 * unconditionally because we do not know whether debugger will
++	 * change signr. This flag has no meaning unless we are going
++	 * to stop after return from ptrace_stop(). In this case it will
++	 * be checked in do_signal_stop(), we should only stop if it was
++	 * not cleared by SIGCONT while we were sleeping. See also the
++	 * comment in dequeue_signal().
++	 */
++	current->jobctl |= JOBCTL_STOP_DEQUEUED;
++	ptrace_stop(signr, CLD_TRAPPED, 0, info);
++
++	/* We're back.  Did the debugger cancel the sig?  */
++	signr = current->exit_code;
++	if (signr == 0)
++		return signr;
++
++	current->exit_code = 0;
++
++	/*
++	 * Update the siginfo structure if the signal has
++	 * changed.  If the debugger wanted something
++	 * specific in the siginfo structure then it should
++	 * have updated *info via PTRACE_SETSIGINFO.
++	 */
++	if (signr != info->si_signo) {
++		clear_siginfo(info);
++		info->si_signo = signr;
++		info->si_errno = 0;
++		info->si_code = SI_USER;
++		rcu_read_lock();
++		info->si_pid = task_pid_vnr(current->parent);
++		info->si_uid = from_kuid_munged(current_user_ns(),
++						task_uid(current->parent));
++		rcu_read_unlock();
++	}
++
++	/* If the (new) signal is now blocked, requeue it.  */
++	if (sigismember(&current->blocked, signr)) {
++		specific_send_sig_info(signr, info, current);
++		signr = 0;
++	}
++
++	return signr;
++}
++
++bool get_signal(struct ksignal *ksig)
++{
++	struct sighand_struct *sighand = current->sighand;
++	struct signal_struct *signal = current->signal;
++	int signr;
++
++	if (unlikely(current->task_works))
++		task_work_run();
++
++	if (unlikely(uprobe_deny_signal()))
++		return false;
++
++	/*
++	 * Do this once, we can't return to user-mode if freezing() == T.
++	 * do_signal_stop() and ptrace_stop() do freezable_schedule() and
++	 * thus do not need another check after return.
++	 */
++	try_to_freeze();
++
++relock:
++	spin_lock_irq(&sighand->siglock);
++	/*
++	 * Every stopped thread goes here after wakeup. Check to see if
++	 * we should notify the parent, prepare_signal(SIGCONT) encodes
++	 * the CLD_ si_code into SIGNAL_CLD_MASK bits.
++	 */
++	if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
++		int why;
++
++		if (signal->flags & SIGNAL_CLD_CONTINUED)
++			why = CLD_CONTINUED;
++		else
++			why = CLD_STOPPED;
++
++		signal->flags &= ~SIGNAL_CLD_MASK;
++
++		spin_unlock_irq(&sighand->siglock);
++
++		/*
++		 * Notify the parent that we're continuing.  This event is
++		 * always per-process and doesn't make whole lot of sense
++		 * for ptracers, who shouldn't consume the state via
++		 * wait(2) either, but, for backward compatibility, notify
++		 * the ptracer of the group leader too unless it's gonna be
++		 * a duplicate.
++		 */
++		read_lock(&tasklist_lock);
++		do_notify_parent_cldstop(current, false, why);
++
++		if (ptrace_reparented(current->group_leader))
++			do_notify_parent_cldstop(current->group_leader,
++						true, why);
++		read_unlock(&tasklist_lock);
++
++		goto relock;
++	}
++
++	/* Has this task already been marked for death? */
++	if (signal_group_exit(signal)) {
++		ksig->info.si_signo = signr = SIGKILL;
++		sigdelset(&current->pending.signal, SIGKILL);
++		trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
++				&sighand->action[SIGKILL - 1]);
++		recalc_sigpending();
++		goto fatal;
++	}
++
++	for (;;) {
++		struct k_sigaction *ka;
++
++		if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
++		    do_signal_stop(0))
++			goto relock;
++
++		if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
++			do_jobctl_trap();
++			spin_unlock_irq(&sighand->siglock);
++			goto relock;
++		}
++
++		/*
++		 * Signals generated by the execution of an instruction
++		 * need to be delivered before any other pending signals
++		 * so that the instruction pointer in the signal stack
++		 * frame points to the faulting instruction.
++		 */
++		signr = dequeue_synchronous_signal(&ksig->info);
++		if (!signr)
++			signr = dequeue_signal(current, &current->blocked, &ksig->info);
++
++		if (!signr)
++			break; /* will return 0 */
++
++		if (unlikely(current->ptrace) && signr != SIGKILL) {
++			signr = ptrace_signal(signr, &ksig->info);
++			if (!signr)
++				continue;
++		}
++
++		ka = &sighand->action[signr-1];
++
++		/* Trace actually delivered signals. */
++		trace_signal_deliver(signr, &ksig->info, ka);
++
++		if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
++			continue;
++		if (ka->sa.sa_handler != SIG_DFL) {
++			/* Run the handler.  */
++			ksig->ka = *ka;
++
++			if (ka->sa.sa_flags & SA_ONESHOT)
++				ka->sa.sa_handler = SIG_DFL;
++
++			break; /* will return non-zero "signr" value */
++		}
++
++		/*
++		 * Now we are doing the default action for this signal.
++		 */
++		if (sig_kernel_ignore(signr)) /* Default is nothing. */
++			continue;
++
++		/*
++		 * Global init gets no signals it doesn't want.
++		 * Container-init gets no signals it doesn't want from same
++		 * container.
++		 *
++		 * Note that if global/container-init sees a sig_kernel_only()
++		 * signal here, the signal must have been generated internally
++		 * or must have come from an ancestor namespace. In either
++		 * case, the signal cannot be dropped.
++		 */
++		if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
++				!sig_kernel_only(signr))
++			continue;
++
++		if (sig_kernel_stop(signr)) {
++			/*
++			 * The default action is to stop all threads in
++			 * the thread group.  The job control signals
++			 * do nothing in an orphaned pgrp, but SIGSTOP
++			 * always works.  Note that siglock needs to be
++			 * dropped during the call to is_orphaned_pgrp()
++			 * because of lock ordering with tasklist_lock.
++			 * This allows an intervening SIGCONT to be posted.
++			 * We need to check for that and bail out if necessary.
++			 */
++			if (signr != SIGSTOP) {
++				spin_unlock_irq(&sighand->siglock);
++
++				/* signals can be posted during this window */
++
++				if (is_current_pgrp_orphaned())
++					goto relock;
++
++				spin_lock_irq(&sighand->siglock);
++			}
++
++			if (likely(do_signal_stop(ksig->info.si_signo))) {
++				/* It released the siglock.  */
++				goto relock;
++			}
++
++			/*
++			 * We didn't actually stop, due to a race
++			 * with SIGCONT or something like that.
++			 */
++			continue;
++		}
++
++	fatal:
++		spin_unlock_irq(&sighand->siglock);
++
++		/*
++		 * Anything else is fatal, maybe with a core dump.
++		 */
++		current->flags |= PF_SIGNALED;
++
++		if (sig_kernel_coredump(signr)) {
++			if (print_fatal_signals)
++				print_fatal_signal(ksig->info.si_signo);
++			proc_coredump_connector(current);
++			/*
++			 * If it was able to dump core, this kills all
++			 * other threads in the group and synchronizes with
++			 * their demise.  If we lost the race with another
++			 * thread getting here, it set group_exit_code
++			 * first and our do_group_exit call below will use
++			 * that value and ignore the one we pass it.
++			 */
++			do_coredump(&ksig->info);
++		}
++
++		/*
++		 * Death signals, no core dump.
++		 */
++		do_group_exit(ksig->info.si_signo);
++		/* NOTREACHED */
++	}
++	spin_unlock_irq(&sighand->siglock);
++
++	ksig->sig = signr;
++	return ksig->sig > 0;
++}
++
++/**
++ * signal_delivered - 
++ * @ksig:		kernel signal struct
++ * @stepping:		nonzero if debugger single-step or block-step in use
++ *
++ * This function should be called when a signal has successfully been
++ * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask
++ * is always blocked, and the signal itself is blocked unless %SA_NODEFER
++ * is set in @ksig->ka.sa.sa_flags.  Tracing is notified.
++ */
++static void signal_delivered(struct ksignal *ksig, int stepping)
++{
++	sigset_t blocked;
++
++	/* A signal was successfully delivered, and the
++	   saved sigmask was stored on the signal frame,
++	   and will be restored by sigreturn.  So we can
++	   simply clear the restore sigmask flag.  */
++	clear_restore_sigmask();
++
++	sigorsets(&blocked, &current->blocked, &ksig->ka.sa.sa_mask);
++	if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
++		sigaddset(&blocked, ksig->sig);
++	set_current_blocked(&blocked);
++	tracehook_signal_handler(stepping);
++}
++
++void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
++{
++	if (failed)
++		force_sigsegv(ksig->sig, current);
++	else
++		signal_delivered(ksig, stepping);
++}
++
++/*
++ * It could be that complete_signal() picked us to notify about the
++ * group-wide signal. Other threads should be notified now to take
++ * the shared signals in @which since we will not.
++ */
++static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
++{
++	sigset_t retarget;
++	struct task_struct *t;
++
++	sigandsets(&retarget, &tsk->signal->shared_pending.signal, which);
++	if (sigisemptyset(&retarget))
++		return;
++
++	t = tsk;
++	while_each_thread(tsk, t) {
++		if (t->flags & PF_EXITING)
++			continue;
++
++		if (!has_pending_signals(&retarget, &t->blocked))
++			continue;
++		/* Remove the signals this thread can handle. */
++		sigandsets(&retarget, &retarget, &t->blocked);
++
++		if (!signal_pending(t))
++			signal_wake_up(t, 0);
++
++		if (sigisemptyset(&retarget))
++			break;
++	}
++}
++
++void exit_signals(struct task_struct *tsk)
++{
++	int group_stop = 0;
++	sigset_t unblocked;
++
++	/*
++	 * @tsk is about to have PF_EXITING set - lock out users which
++	 * expect stable threadgroup.
++	 */
++	cgroup_threadgroup_change_begin(tsk);
++
++	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
++		tsk->flags |= PF_EXITING;
++		cgroup_threadgroup_change_end(tsk);
++		return;
++	}
++
++	spin_lock_irq(&tsk->sighand->siglock);
++	/*
++	 * From now this task is not visible for group-wide signals,
++	 * see wants_signal(), do_signal_stop().
++	 */
++	tsk->flags |= PF_EXITING;
++
++	cgroup_threadgroup_change_end(tsk);
++
++	if (!signal_pending(tsk))
++		goto out;
++
++	unblocked = tsk->blocked;
++	signotset(&unblocked);
++	retarget_shared_pending(tsk, &unblocked);
++
++	if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) &&
++	    task_participate_group_stop(tsk))
++		group_stop = CLD_STOPPED;
++out:
++	spin_unlock_irq(&tsk->sighand->siglock);
++
++	/*
++	 * If group stop has completed, deliver the notification.  This
++	 * should always go to the real parent of the group leader.
++	 */
++	if (unlikely(group_stop)) {
++		read_lock(&tasklist_lock);
++		do_notify_parent_cldstop(tsk, false, group_stop);
++		read_unlock(&tasklist_lock);
++	}
++}
++
++EXPORT_SYMBOL(recalc_sigpending);
++EXPORT_SYMBOL_GPL(dequeue_signal);
++EXPORT_SYMBOL(flush_signals);
++EXPORT_SYMBOL(force_sig);
++EXPORT_SYMBOL(send_sig);
++EXPORT_SYMBOL(send_sig_info);
++EXPORT_SYMBOL(sigprocmask);
++
++/*
++ * System call entry points.
++ */
++
++/**
++ *  sys_restart_syscall - restart a system call
++ */
++SYSCALL_DEFINE0(restart_syscall)
++{
++	struct restart_block *restart = &current->restart_block;
++	return restart->fn(restart);
++}
++
++long do_no_restart_syscall(struct restart_block *param)
++{
++	return -EINTR;
++}
++
++static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
++{
++	if (signal_pending(tsk) && !thread_group_empty(tsk)) {
++		sigset_t newblocked;
++		/* A set of now blocked but previously unblocked signals. */
++		sigandnsets(&newblocked, newset, &current->blocked);
++		retarget_shared_pending(tsk, &newblocked);
++	}
++	tsk->blocked = *newset;
++	recalc_sigpending();
++}
++
++/**
++ * set_current_blocked - change current->blocked mask
++ * @newset: new mask
++ *
++ * It is wrong to change ->blocked directly, this helper should be used
++ * to ensure the process can't miss a shared signal we are going to block.
++ */
++void set_current_blocked(sigset_t *newset)
++{
++	sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
++	__set_current_blocked(newset);
++}
++
++void __set_current_blocked(const sigset_t *newset)
++{
++	struct task_struct *tsk = current;
++
++	/*
++	 * In case the signal mask hasn't changed, there is nothing we need
++	 * to do. The current->blocked shouldn't be modified by other task.
++	 */
++	if (sigequalsets(&tsk->blocked, newset))
++		return;
++
++	spin_lock_irq(&tsk->sighand->siglock);
++	__set_task_blocked(tsk, newset);
++	spin_unlock_irq(&tsk->sighand->siglock);
++}
++
++/*
++ * This is also useful for kernel threads that want to temporarily
++ * (or permanently) block certain signals.
++ *
++ * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel
++ * interface happily blocks "unblockable" signals like SIGKILL
++ * and friends.
++ */
++int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
++{
++	struct task_struct *tsk = current;
++	sigset_t newset;
++
++	/* Lockless, only current can change ->blocked, never from irq */
++	if (oldset)
++		*oldset = tsk->blocked;
++
++	switch (how) {
++	case SIG_BLOCK:
++		sigorsets(&newset, &tsk->blocked, set);
++		break;
++	case SIG_UNBLOCK:
++		sigandnsets(&newset, &tsk->blocked, set);
++		break;
++	case SIG_SETMASK:
++		newset = *set;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	__set_current_blocked(&newset);
++	return 0;
++}
++
++/**
++ *  sys_rt_sigprocmask - change the list of currently blocked signals
++ *  @how: whether to add, remove, or set signals
++ *  @nset: stores pending signals
++ *  @oset: previous value of signal mask if non-null
++ *  @sigsetsize: size of sigset_t type
++ */
++SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
++		sigset_t __user *, oset, size_t, sigsetsize)
++{
++	sigset_t old_set, new_set;
++	int error;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	old_set = current->blocked;
++
++	if (nset) {
++		if (copy_from_user(&new_set, nset, sizeof(sigset_t)))
++			return -EFAULT;
++		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
++
++		error = sigprocmask(how, &new_set, NULL);
++		if (error)
++			return error;
++	}
++
++	if (oset) {
++		if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
++			return -EFAULT;
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
++		compat_sigset_t __user *, oset, compat_size_t, sigsetsize)
++{
++	sigset_t old_set = current->blocked;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (nset) {
++		sigset_t new_set;
++		int error;
++		if (get_compat_sigset(&new_set, nset))
++			return -EFAULT;
++		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
++
++		error = sigprocmask(how, &new_set, NULL);
++		if (error)
++			return error;
++	}
++	return oset ? put_compat_sigset(oset, &old_set, sizeof(*oset)) : 0;
++}
++#endif
++
++static void do_sigpending(sigset_t *set)
++{
++	spin_lock_irq(&current->sighand->siglock);
++	sigorsets(set, &current->pending.signal,
++		  &current->signal->shared_pending.signal);
++	spin_unlock_irq(&current->sighand->siglock);
++
++	/* Outside the lock because only this thread touches it.  */
++	sigandsets(set, &current->blocked, set);
++}
++
++/**
++ *  sys_rt_sigpending - examine a pending signal that has been raised
++ *			while blocked
++ *  @uset: stores pending signals
++ *  @sigsetsize: size of sigset_t type or larger
++ */
++SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
++{
++	sigset_t set;
++
++	if (sigsetsize > sizeof(*uset))
++		return -EINVAL;
++
++	do_sigpending(&set);
++
++	if (copy_to_user(uset, &set, sigsetsize))
++		return -EFAULT;
++
++	return 0;
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
++		compat_size_t, sigsetsize)
++{
++	sigset_t set;
++
++	if (sigsetsize > sizeof(*uset))
++		return -EINVAL;
++
++	do_sigpending(&set);
++
++	return put_compat_sigset(uset, &set, sigsetsize);
++}
++#endif
++
++enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
++{
++	enum siginfo_layout layout = SIL_KILL;
++	if ((si_code > SI_USER) && (si_code < SI_KERNEL)) {
++		static const struct {
++			unsigned char limit, layout;
++		} filter[] = {
++			[SIGILL]  = { NSIGILL,  SIL_FAULT },
++			[SIGFPE]  = { NSIGFPE,  SIL_FAULT },
++			[SIGSEGV] = { NSIGSEGV, SIL_FAULT },
++			[SIGBUS]  = { NSIGBUS,  SIL_FAULT },
++			[SIGTRAP] = { NSIGTRAP, SIL_FAULT },
++#if defined(SIGEMT) && defined(NSIGEMT)
++			[SIGEMT]  = { NSIGEMT,  SIL_FAULT },
++#endif
++			[SIGCHLD] = { NSIGCHLD, SIL_CHLD },
++			[SIGPOLL] = { NSIGPOLL, SIL_POLL },
++			[SIGSYS]  = { NSIGSYS,  SIL_SYS },
++		};
++		if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) {
++			layout = filter[sig].layout;
++			/* Handle the exceptions */
++			if ((sig == SIGBUS) &&
++			    (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO))
++				layout = SIL_FAULT_MCEERR;
++			else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR))
++				layout = SIL_FAULT_BNDERR;
++#ifdef SEGV_PKUERR
++			else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR))
++				layout = SIL_FAULT_PKUERR;
++#endif
++		}
++		else if (si_code <= NSIGPOLL)
++			layout = SIL_POLL;
++	} else {
++		if (si_code == SI_TIMER)
++			layout = SIL_TIMER;
++		else if (si_code == SI_SIGIO)
++			layout = SIL_POLL;
++		else if (si_code < 0)
++			layout = SIL_RT;
++	}
++	return layout;
++}
++
++int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
++{
++	if (copy_to_user(to, from , sizeof(struct siginfo)))
++		return -EFAULT;
++	return 0;
++}
++
++#ifdef CONFIG_COMPAT
++int copy_siginfo_to_user32(struct compat_siginfo __user *to,
++			   const struct siginfo *from)
++#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
++{
++	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
++}
++int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
++			     const struct siginfo *from, bool x32_ABI)
++#endif
++{
++	struct compat_siginfo new;
++	memset(&new, 0, sizeof(new));
++
++	new.si_signo = from->si_signo;
++	new.si_errno = from->si_errno;
++	new.si_code  = from->si_code;
++	switch(siginfo_layout(from->si_signo, from->si_code)) {
++	case SIL_KILL:
++		new.si_pid = from->si_pid;
++		new.si_uid = from->si_uid;
++		break;
++	case SIL_TIMER:
++		new.si_tid     = from->si_tid;
++		new.si_overrun = from->si_overrun;
++		new.si_int     = from->si_int;
++		break;
++	case SIL_POLL:
++		new.si_band = from->si_band;
++		new.si_fd   = from->si_fd;
++		break;
++	case SIL_FAULT:
++		new.si_addr = ptr_to_compat(from->si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		new.si_trapno = from->si_trapno;
++#endif
++		break;
++	case SIL_FAULT_MCEERR:
++		new.si_addr = ptr_to_compat(from->si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		new.si_trapno = from->si_trapno;
++#endif
++		new.si_addr_lsb = from->si_addr_lsb;
++		break;
++	case SIL_FAULT_BNDERR:
++		new.si_addr = ptr_to_compat(from->si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		new.si_trapno = from->si_trapno;
++#endif
++		new.si_lower = ptr_to_compat(from->si_lower);
++		new.si_upper = ptr_to_compat(from->si_upper);
++		break;
++	case SIL_FAULT_PKUERR:
++		new.si_addr = ptr_to_compat(from->si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		new.si_trapno = from->si_trapno;
++#endif
++		new.si_pkey = from->si_pkey;
++		break;
++	case SIL_CHLD:
++		new.si_pid    = from->si_pid;
++		new.si_uid    = from->si_uid;
++		new.si_status = from->si_status;
++#ifdef CONFIG_X86_X32_ABI
++		if (x32_ABI) {
++			new._sifields._sigchld_x32._utime = from->si_utime;
++			new._sifields._sigchld_x32._stime = from->si_stime;
++		} else
++#endif
++		{
++			new.si_utime = from->si_utime;
++			new.si_stime = from->si_stime;
++		}
++		break;
++	case SIL_RT:
++		new.si_pid = from->si_pid;
++		new.si_uid = from->si_uid;
++		new.si_int = from->si_int;
++		break;
++	case SIL_SYS:
++		new.si_call_addr = ptr_to_compat(from->si_call_addr);
++		new.si_syscall   = from->si_syscall;
++		new.si_arch      = from->si_arch;
++		break;
++	}
++
++	if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
++		return -EFAULT;
++
++	return 0;
++}
++
++int copy_siginfo_from_user32(struct siginfo *to,
++			     const struct compat_siginfo __user *ufrom)
++{
++	struct compat_siginfo from;
++
++	if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo)))
++		return -EFAULT;
++
++	clear_siginfo(to);
++	to->si_signo = from.si_signo;
++	to->si_errno = from.si_errno;
++	to->si_code  = from.si_code;
++	switch(siginfo_layout(from.si_signo, from.si_code)) {
++	case SIL_KILL:
++		to->si_pid = from.si_pid;
++		to->si_uid = from.si_uid;
++		break;
++	case SIL_TIMER:
++		to->si_tid     = from.si_tid;
++		to->si_overrun = from.si_overrun;
++		to->si_int     = from.si_int;
++		break;
++	case SIL_POLL:
++		to->si_band = from.si_band;
++		to->si_fd   = from.si_fd;
++		break;
++	case SIL_FAULT:
++		to->si_addr = compat_ptr(from.si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		to->si_trapno = from.si_trapno;
++#endif
++		break;
++	case SIL_FAULT_MCEERR:
++		to->si_addr = compat_ptr(from.si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		to->si_trapno = from.si_trapno;
++#endif
++		to->si_addr_lsb = from.si_addr_lsb;
++		break;
++	case SIL_FAULT_BNDERR:
++		to->si_addr = compat_ptr(from.si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		to->si_trapno = from.si_trapno;
++#endif
++		to->si_lower = compat_ptr(from.si_lower);
++		to->si_upper = compat_ptr(from.si_upper);
++		break;
++	case SIL_FAULT_PKUERR:
++		to->si_addr = compat_ptr(from.si_addr);
++#ifdef __ARCH_SI_TRAPNO
++		to->si_trapno = from.si_trapno;
++#endif
++		to->si_pkey = from.si_pkey;
++		break;
++	case SIL_CHLD:
++		to->si_pid    = from.si_pid;
++		to->si_uid    = from.si_uid;
++		to->si_status = from.si_status;
++#ifdef CONFIG_X86_X32_ABI
++		if (in_x32_syscall()) {
++			to->si_utime = from._sifields._sigchld_x32._utime;
++			to->si_stime = from._sifields._sigchld_x32._stime;
++		} else
++#endif
++		{
++			to->si_utime = from.si_utime;
++			to->si_stime = from.si_stime;
++		}
++		break;
++	case SIL_RT:
++		to->si_pid = from.si_pid;
++		to->si_uid = from.si_uid;
++		to->si_int = from.si_int;
++		break;
++	case SIL_SYS:
++		to->si_call_addr = compat_ptr(from.si_call_addr);
++		to->si_syscall   = from.si_syscall;
++		to->si_arch      = from.si_arch;
++		break;
++	}
++	return 0;
++}
++#endif /* CONFIG_COMPAT */
++
++/**
++ *  do_sigtimedwait - wait for queued signals specified in @which
++ *  @which: queued signals to wait for
++ *  @info: if non-null, the signal's siginfo is returned here
++ *  @ts: upper bound on process time suspension
++ */
++static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
++		    const struct timespec *ts)
++{
++	ktime_t *to = NULL, timeout = KTIME_MAX;
++	struct task_struct *tsk = current;
++	sigset_t mask = *which;
++	int sig, ret = 0;
++
++	if (ts) {
++		if (!timespec_valid(ts))
++			return -EINVAL;
++		timeout = timespec_to_ktime(*ts);
++		to = &timeout;
++	}
++
++	/*
++	 * Invert the set of allowed signals to get those we want to block.
++	 */
++	sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
++	signotset(&mask);
++
++	spin_lock_irq(&tsk->sighand->siglock);
++	sig = dequeue_signal(tsk, &mask, info);
++	if (!sig && timeout) {
++		/*
++		 * None ready, temporarily unblock those we're interested
++		 * while we are sleeping in so that we'll be awakened when
++		 * they arrive. Unblocking is always fine, we can avoid
++		 * set_current_blocked().
++		 */
++		tsk->real_blocked = tsk->blocked;
++		sigandsets(&tsk->blocked, &tsk->blocked, &mask);
++		recalc_sigpending();
++		spin_unlock_irq(&tsk->sighand->siglock);
++
++		__set_current_state(TASK_INTERRUPTIBLE);
++		ret = freezable_schedule_hrtimeout_range(to, tsk->timer_slack_ns,
++							 HRTIMER_MODE_REL);
++		spin_lock_irq(&tsk->sighand->siglock);
++		__set_task_blocked(tsk, &tsk->real_blocked);
++		sigemptyset(&tsk->real_blocked);
++		sig = dequeue_signal(tsk, &mask, info);
++	}
++	spin_unlock_irq(&tsk->sighand->siglock);
++
++	if (sig)
++		return sig;
++	return ret ? -EINTR : -EAGAIN;
++}
++
++/**
++ *  sys_rt_sigtimedwait - synchronously wait for queued signals specified
++ *			in @uthese
++ *  @uthese: queued signals to wait for
++ *  @uinfo: if non-null, the signal's siginfo is returned here
++ *  @uts: upper bound on process time suspension
++ *  @sigsetsize: size of sigset_t type
++ */
++SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
++		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
++		size_t, sigsetsize)
++{
++	sigset_t these;
++	struct timespec ts;
++	siginfo_t info;
++	int ret;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (copy_from_user(&these, uthese, sizeof(these)))
++		return -EFAULT;
++
++	if (uts) {
++		if (copy_from_user(&ts, uts, sizeof(ts)))
++			return -EFAULT;
++	}
++
++	ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
++
++	if (ret > 0 && uinfo) {
++		if (copy_siginfo_to_user(uinfo, &info))
++			ret = -EFAULT;
++	}
++
++	return ret;
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
++		struct compat_siginfo __user *, uinfo,
++		struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
++{
++	sigset_t s;
++	struct timespec t;
++	siginfo_t info;
++	long ret;
++
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (get_compat_sigset(&s, uthese))
++		return -EFAULT;
++
++	if (uts) {
++		if (compat_get_timespec(&t, uts))
++			return -EFAULT;
++	}
++
++	ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
++
++	if (ret > 0 && uinfo) {
++		if (copy_siginfo_to_user32(uinfo, &info))
++			ret = -EFAULT;
++	}
++
++	return ret;
++}
++#endif
++
++/**
++ *  sys_kill - send a signal to a process
++ *  @pid: the PID of the process
++ *  @sig: signal to be sent
++ */
++SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = sig;
++	info.si_errno = 0;
++	info.si_code = SI_USER;
++	info.si_pid = task_tgid_vnr(current);
++	info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
++
++	return kill_something_info(sig, &info, pid);
++}
++
++static int
++do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
++{
++	struct task_struct *p;
++	int error = -ESRCH;
++
++	rcu_read_lock();
++	p = find_task_by_vpid(pid);
++	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
++		error = check_kill_permission(sig, info, p);
++		/*
++		 * The null signal is a permissions and process existence
++		 * probe.  No signal is actually delivered.
++		 */
++		if (!error && sig) {
++			error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
++			/*
++			 * If lock_task_sighand() failed we pretend the task
++			 * dies after receiving the signal. The window is tiny,
++			 * and the signal is private anyway.
++			 */
++			if (unlikely(error == -ESRCH))
++				error = 0;
++		}
++	}
++	rcu_read_unlock();
++
++	return error;
++}
++
++static int do_tkill(pid_t tgid, pid_t pid, int sig)
++{
++	struct siginfo info;
++
++	clear_siginfo(&info);
++	info.si_signo = sig;
++	info.si_errno = 0;
++	info.si_code = SI_TKILL;
++	info.si_pid = task_tgid_vnr(current);
++	info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
++
++	return do_send_specific(tgid, pid, sig, &info);
++}
++
++/**
++ *  sys_tgkill - send signal to one specific thread
++ *  @tgid: the thread group ID of the thread
++ *  @pid: the PID of the thread
++ *  @sig: signal to be sent
++ *
++ *  This syscall also checks the @tgid and returns -ESRCH even if the PID
++ *  exists but it's not belonging to the target process anymore. This
++ *  method solves the problem of threads exiting and PIDs getting reused.
++ */
++SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
++{
++	/* This is only valid for single tasks */
++	if (pid <= 0 || tgid <= 0)
++		return -EINVAL;
++
++	return do_tkill(tgid, pid, sig);
++}
++
++/**
++ *  sys_tkill - send signal to one specific task
++ *  @pid: the PID of the task
++ *  @sig: signal to be sent
++ *
++ *  Send a signal to only one task, even if it's a CLONE_THREAD task.
++ */
++SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
++{
++	/* This is only valid for single tasks */
++	if (pid <= 0)
++		return -EINVAL;
++
++	return do_tkill(0, pid, sig);
++}
++
++static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
++{
++	/* Not even root can pretend to send signals from the kernel.
++	 * Nor can they impersonate a kill()/tgkill(), which adds source info.
++	 */
++	if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
++	    (task_pid_vnr(current) != pid))
++		return -EPERM;
++
++	info->si_signo = sig;
++
++	/* POSIX.1b doesn't mention process groups.  */
++	return kill_proc_info(sig, info, pid);
++}
++
++/**
++ *  sys_rt_sigqueueinfo - send signal information to a signal
++ *  @pid: the PID of the thread
++ *  @sig: signal to be sent
++ *  @uinfo: signal info to be sent
++ */
++SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
++		siginfo_t __user *, uinfo)
++{
++	siginfo_t info;
++	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
++		return -EFAULT;
++	return do_rt_sigqueueinfo(pid, sig, &info);
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
++			compat_pid_t, pid,
++			int, sig,
++			struct compat_siginfo __user *, uinfo)
++{
++	siginfo_t info;
++	int ret = copy_siginfo_from_user32(&info, uinfo);
++	if (unlikely(ret))
++		return ret;
++	return do_rt_sigqueueinfo(pid, sig, &info);
++}
++#endif
++
++static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
++{
++	/* This is only valid for single tasks */
++	if (pid <= 0 || tgid <= 0)
++		return -EINVAL;
++
++	/* Not even root can pretend to send signals from the kernel.
++	 * Nor can they impersonate a kill()/tgkill(), which adds source info.
++	 */
++	if ((info->si_code >= 0 || info->si_code == SI_TKILL) &&
++	    (task_pid_vnr(current) != pid))
++		return -EPERM;
++
++	info->si_signo = sig;
++
++	return do_send_specific(tgid, pid, sig, info);
++}
++
++SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
++		siginfo_t __user *, uinfo)
++{
++	siginfo_t info;
++
++	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
++		return -EFAULT;
++
++	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
++			compat_pid_t, tgid,
++			compat_pid_t, pid,
++			int, sig,
++			struct compat_siginfo __user *, uinfo)
++{
++	siginfo_t info;
++
++	if (copy_siginfo_from_user32(&info, uinfo))
++		return -EFAULT;
++	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
++}
++#endif
++
++/*
++ * For kthreads only, must not be used if cloned with CLONE_SIGHAND
++ */
++void kernel_sigaction(int sig, __sighandler_t action)
++{
++	spin_lock_irq(&current->sighand->siglock);
++	current->sighand->action[sig - 1].sa.sa_handler = action;
++	if (action == SIG_IGN) {
++		sigset_t mask;
++
++		sigemptyset(&mask);
++		sigaddset(&mask, sig);
++
++		flush_sigqueue_mask(&mask, &current->signal->shared_pending);
++		flush_sigqueue_mask(&mask, &current->pending);
++		recalc_sigpending();
++	}
++	spin_unlock_irq(&current->sighand->siglock);
++}
++EXPORT_SYMBOL(kernel_sigaction);
++
++void __weak sigaction_compat_abi(struct k_sigaction *act,
++		struct k_sigaction *oact)
++{
++}
++
++int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
++{
++	struct task_struct *p = current, *t;
++	struct k_sigaction *k;
++	sigset_t mask;
++
++	if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
++		return -EINVAL;
++
++	k = &p->sighand->action[sig-1];
++
++	spin_lock_irq(&p->sighand->siglock);
++	if (oact)
++		*oact = *k;
++
++	sigaction_compat_abi(act, oact);
++
++	if (act) {
++		sigdelsetmask(&act->sa.sa_mask,
++			      sigmask(SIGKILL) | sigmask(SIGSTOP));
++		*k = *act;
++		/*
++		 * POSIX 3.3.1.3:
++		 *  "Setting a signal action to SIG_IGN for a signal that is
++		 *   pending shall cause the pending signal to be discarded,
++		 *   whether or not it is blocked."
++		 *
++		 *  "Setting a signal action to SIG_DFL for a signal that is
++		 *   pending and whose default action is to ignore the signal
++		 *   (for example, SIGCHLD), shall cause the pending signal to
++		 *   be discarded, whether or not it is blocked"
++		 */
++		if (sig_handler_ignored(sig_handler(p, sig), sig)) {
++			sigemptyset(&mask);
++			sigaddset(&mask, sig);
++			flush_sigqueue_mask(&mask, &p->signal->shared_pending);
++			for_each_thread(p, t)
++				flush_sigqueue_mask(&mask, &t->pending);
++		}
++	}
++
++	spin_unlock_irq(&p->sighand->siglock);
++	return 0;
++}
++
++static int
++do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
++		size_t min_ss_size)
++{
++	struct task_struct *t = current;
++
++	if (oss) {
++		memset(oss, 0, sizeof(stack_t));
++		oss->ss_sp = (void __user *) t->sas_ss_sp;
++		oss->ss_size = t->sas_ss_size;
++		oss->ss_flags = sas_ss_flags(sp) |
++			(current->sas_ss_flags & SS_FLAG_BITS);
++	}
++
++	if (ss) {
++		void __user *ss_sp = ss->ss_sp;
++		size_t ss_size = ss->ss_size;
++		unsigned ss_flags = ss->ss_flags;
++		int ss_mode;
++
++		if (unlikely(on_sig_stack(sp)))
++			return -EPERM;
++
++		ss_mode = ss_flags & ~SS_FLAG_BITS;
++		if (unlikely(ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK &&
++				ss_mode != 0))
++			return -EINVAL;
++
++		if (ss_mode == SS_DISABLE) {
++			ss_size = 0;
++			ss_sp = NULL;
++		} else {
++			if (unlikely(ss_size < min_ss_size))
++				return -ENOMEM;
++		}
++
++		t->sas_ss_sp = (unsigned long) ss_sp;
++		t->sas_ss_size = ss_size;
++		t->sas_ss_flags = ss_flags;
++	}
++	return 0;
++}
++
++SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
++{
++	stack_t new, old;
++	int err;
++	if (uss && copy_from_user(&new, uss, sizeof(stack_t)))
++		return -EFAULT;
++	err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL,
++			      current_user_stack_pointer(),
++			      MINSIGSTKSZ);
++	if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t)))
++		err = -EFAULT;
++	return err;
++}
++
++int restore_altstack(const stack_t __user *uss)
++{
++	stack_t new;
++	if (copy_from_user(&new, uss, sizeof(stack_t)))
++		return -EFAULT;
++	(void)do_sigaltstack(&new, NULL, current_user_stack_pointer(),
++			     MINSIGSTKSZ);
++	/* squash all but EFAULT for now */
++	return 0;
++}
++
++int __save_altstack(stack_t __user *uss, unsigned long sp)
++{
++	struct task_struct *t = current;
++	int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
++		__put_user(t->sas_ss_flags, &uss->ss_flags) |
++		__put_user(t->sas_ss_size, &uss->ss_size);
++	if (err)
++		return err;
++	if (t->sas_ss_flags & SS_AUTODISARM)
++		sas_ss_reset(t);
++	return 0;
++}
++
++#ifdef CONFIG_COMPAT
++static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr,
++				 compat_stack_t __user *uoss_ptr)
++{
++	stack_t uss, uoss;
++	int ret;
++
++	if (uss_ptr) {
++		compat_stack_t uss32;
++		if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
++			return -EFAULT;
++		uss.ss_sp = compat_ptr(uss32.ss_sp);
++		uss.ss_flags = uss32.ss_flags;
++		uss.ss_size = uss32.ss_size;
++	}
++	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss,
++			     compat_user_stack_pointer(),
++			     COMPAT_MINSIGSTKSZ);
++	if (ret >= 0 && uoss_ptr)  {
++		compat_stack_t old;
++		memset(&old, 0, sizeof(old));
++		old.ss_sp = ptr_to_compat(uoss.ss_sp);
++		old.ss_flags = uoss.ss_flags;
++		old.ss_size = uoss.ss_size;
++		if (copy_to_user(uoss_ptr, &old, sizeof(compat_stack_t)))
++			ret = -EFAULT;
++	}
++	return ret;
++}
++
++COMPAT_SYSCALL_DEFINE2(sigaltstack,
++			const compat_stack_t __user *, uss_ptr,
++			compat_stack_t __user *, uoss_ptr)
++{
++	return do_compat_sigaltstack(uss_ptr, uoss_ptr);
++}
++
++int compat_restore_altstack(const compat_stack_t __user *uss)
++{
++	int err = do_compat_sigaltstack(uss, NULL);
++	/* squash all but -EFAULT for now */
++	return err == -EFAULT ? err : 0;
++}
++
++int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
++{
++	int err;
++	struct task_struct *t = current;
++	err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp),
++			 &uss->ss_sp) |
++		__put_user(t->sas_ss_flags, &uss->ss_flags) |
++		__put_user(t->sas_ss_size, &uss->ss_size);
++	if (err)
++		return err;
++	if (t->sas_ss_flags & SS_AUTODISARM)
++		sas_ss_reset(t);
++	return 0;
++}
++#endif
++
++#ifdef __ARCH_WANT_SYS_SIGPENDING
++
++/**
++ *  sys_sigpending - examine pending signals
++ *  @uset: where mask of pending signal is returned
++ */
++SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
++{
++	sigset_t set;
++
++	if (sizeof(old_sigset_t) > sizeof(*uset))
++		return -EINVAL;
++
++	do_sigpending(&set);
++
++	if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
++		return -EFAULT;
++
++	return 0;
++}
++
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
++{
++	sigset_t set;
++
++	do_sigpending(&set);
++
++	return put_user(set.sig[0], set32);
++}
++#endif
++
++#endif
++
++#ifdef __ARCH_WANT_SYS_SIGPROCMASK
++/**
++ *  sys_sigprocmask - examine and change blocked signals
++ *  @how: whether to add, remove, or set signals
++ *  @nset: signals to add or remove (if non-null)
++ *  @oset: previous value of signal mask if non-null
++ *
++ * Some platforms have their own version with special arguments;
++ * others support only sys_rt_sigprocmask.
++ */
++
++SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
++		old_sigset_t __user *, oset)
++{
++	old_sigset_t old_set, new_set;
++	sigset_t new_blocked;
++
++	old_set = current->blocked.sig[0];
++
++	if (nset) {
++		if (copy_from_user(&new_set, nset, sizeof(*nset)))
++			return -EFAULT;
++
++		new_blocked = current->blocked;
++
++		switch (how) {
++		case SIG_BLOCK:
++			sigaddsetmask(&new_blocked, new_set);
++			break;
++		case SIG_UNBLOCK:
++			sigdelsetmask(&new_blocked, new_set);
++			break;
++		case SIG_SETMASK:
++			new_blocked.sig[0] = new_set;
++			break;
++		default:
++			return -EINVAL;
++		}
++
++		set_current_blocked(&new_blocked);
++	}
++
++	if (oset) {
++		if (copy_to_user(oset, &old_set, sizeof(*oset)))
++			return -EFAULT;
++	}
++
++	return 0;
++}
++#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
++
++#ifndef CONFIG_ODD_RT_SIGACTION
++/**
++ *  sys_rt_sigaction - alter an action taken by a process
++ *  @sig: signal to be sent
++ *  @act: new sigaction
++ *  @oact: used to save the previous sigaction
++ *  @sigsetsize: size of sigset_t type
++ */
++SYSCALL_DEFINE4(rt_sigaction, int, sig,
++		const struct sigaction __user *, act,
++		struct sigaction __user *, oact,
++		size_t, sigsetsize)
++{
++	struct k_sigaction new_sa, old_sa;
++	int ret;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
++		return -EFAULT;
++
++	ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
++	if (ret)
++		return ret;
++
++	if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
++		return -EFAULT;
++
++	return 0;
++}
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
++		const struct compat_sigaction __user *, act,
++		struct compat_sigaction __user *, oact,
++		compat_size_t, sigsetsize)
++{
++	struct k_sigaction new_ka, old_ka;
++#ifdef __ARCH_HAS_SA_RESTORER
++	compat_uptr_t restorer;
++#endif
++	int ret;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(compat_sigset_t))
++		return -EINVAL;
++
++	if (act) {
++		compat_uptr_t handler;
++		ret = get_user(handler, &act->sa_handler);
++		new_ka.sa.sa_handler = compat_ptr(handler);
++#ifdef __ARCH_HAS_SA_RESTORER
++		ret |= get_user(restorer, &act->sa_restorer);
++		new_ka.sa.sa_restorer = compat_ptr(restorer);
++#endif
++		ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask);
++		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
++		if (ret)
++			return -EFAULT;
++	}
++
++	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++	if (!ret && oact) {
++		ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), 
++			       &oact->sa_handler);
++		ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask,
++					 sizeof(oact->sa_mask));
++		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
++#ifdef __ARCH_HAS_SA_RESTORER
++		ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
++				&oact->sa_restorer);
++#endif
++	}
++	return ret;
++}
++#endif
++#endif /* !CONFIG_ODD_RT_SIGACTION */
++
++#ifdef CONFIG_OLD_SIGACTION
++SYSCALL_DEFINE3(sigaction, int, sig,
++		const struct old_sigaction __user *, act,
++	        struct old_sigaction __user *, oact)
++{
++	struct k_sigaction new_ka, old_ka;
++	int ret;
++
++	if (act) {
++		old_sigset_t mask;
++		if (!access_ok(act, sizeof(*act)) ||
++		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
++		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
++		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
++		    __get_user(mask, &act->sa_mask))
++			return -EFAULT;
++#ifdef __ARCH_HAS_KA_RESTORER
++		new_ka.ka_restorer = NULL;
++#endif
++		siginitset(&new_ka.sa.sa_mask, mask);
++	}
++
++	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++
++	if (!ret && oact) {
++		if (!access_ok(oact, sizeof(*oact)) ||
++		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
++		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
++		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
++		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
++			return -EFAULT;
++	}
++
++	return ret;
++}
++#endif
++#ifdef CONFIG_COMPAT_OLD_SIGACTION
++COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
++		const struct compat_old_sigaction __user *, act,
++	        struct compat_old_sigaction __user *, oact)
++{
++	struct k_sigaction new_ka, old_ka;
++	int ret;
++	compat_old_sigset_t mask;
++	compat_uptr_t handler, restorer;
++
++	if (act) {
++		if (!access_ok(act, sizeof(*act)) ||
++		    __get_user(handler, &act->sa_handler) ||
++		    __get_user(restorer, &act->sa_restorer) ||
++		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
++		    __get_user(mask, &act->sa_mask))
++			return -EFAULT;
++
++#ifdef __ARCH_HAS_KA_RESTORER
++		new_ka.ka_restorer = NULL;
++#endif
++		new_ka.sa.sa_handler = compat_ptr(handler);
++		new_ka.sa.sa_restorer = compat_ptr(restorer);
++		siginitset(&new_ka.sa.sa_mask, mask);
++	}
++
++	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++
++	if (!ret && oact) {
++		if (!access_ok(oact, sizeof(*oact)) ||
++		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
++			       &oact->sa_handler) ||
++		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
++			       &oact->sa_restorer) ||
++		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
++		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
++			return -EFAULT;
++	}
++	return ret;
++}
++#endif
++
++#ifdef CONFIG_SGETMASK_SYSCALL
++
++/*
++ * For backwards compatibility.  Functionality superseded by sigprocmask.
++ */
++SYSCALL_DEFINE0(sgetmask)
++{
++	/* SMP safe */
++	return current->blocked.sig[0];
++}
++
++SYSCALL_DEFINE1(ssetmask, int, newmask)
++{
++	int old = current->blocked.sig[0];
++	sigset_t newset;
++
++	siginitset(&newset, newmask);
++	set_current_blocked(&newset);
++
++	return old;
++}
++#endif /* CONFIG_SGETMASK_SYSCALL */
++
++#ifdef __ARCH_WANT_SYS_SIGNAL
++/*
++ * For backwards compatibility.  Functionality superseded by sigaction.
++ */
++SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
++{
++	struct k_sigaction new_sa, old_sa;
++	int ret;
++
++	new_sa.sa.sa_handler = handler;
++	new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
++	sigemptyset(&new_sa.sa.sa_mask);
++
++	ret = do_sigaction(sig, &new_sa, &old_sa);
++
++	return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
++}
++#endif /* __ARCH_WANT_SYS_SIGNAL */
++
++#ifdef __ARCH_WANT_SYS_PAUSE
++
++SYSCALL_DEFINE0(pause)
++{
++	while (!signal_pending(current)) {
++		__set_current_state(TASK_INTERRUPTIBLE);
++		schedule();
++	}
++	return -ERESTARTNOHAND;
++}
++
++#endif
++
++static int sigsuspend(sigset_t *set)
++{
++	current->saved_sigmask = current->blocked;
++	set_current_blocked(set);
++
++	while (!signal_pending(current)) {
++		__set_current_state(TASK_INTERRUPTIBLE);
++		schedule();
++	}
++	set_restore_sigmask();
++	return -ERESTARTNOHAND;
++}
++
++/**
++ *  sys_rt_sigsuspend - replace the signal mask for a value with the
++ *	@unewset value until a signal is received
++ *  @unewset: new signal mask value
++ *  @sigsetsize: size of sigset_t type
++ */
++SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
++{
++	sigset_t newset;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (copy_from_user(&newset, unewset, sizeof(newset)))
++		return -EFAULT;
++	return sigsuspend(&newset);
++}
++ 
++#ifdef CONFIG_COMPAT
++COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize)
++{
++	sigset_t newset;
++
++	/* XXX: Don't preclude handling different sized sigset_t's.  */
++	if (sigsetsize != sizeof(sigset_t))
++		return -EINVAL;
++
++	if (get_compat_sigset(&newset, unewset))
++		return -EFAULT;
++	return sigsuspend(&newset);
++}
++#endif
++
++#ifdef CONFIG_OLD_SIGSUSPEND
++SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask)
++{
++	sigset_t blocked;
++	siginitset(&blocked, mask);
++	return sigsuspend(&blocked);
++}
++#endif
++#ifdef CONFIG_OLD_SIGSUSPEND3
++SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask)
++{
++	sigset_t blocked;
++	siginitset(&blocked, mask);
++	return sigsuspend(&blocked);
++}
++#endif
++
++__weak const char *arch_vma_name(struct vm_area_struct *vma)
++{
++	return NULL;
++}
++
++void __init signals_init(void)
++{
++	/* If this check fails, the __ARCH_SI_PREAMBLE_SIZE value is wrong! */
++	BUILD_BUG_ON(__ARCH_SI_PREAMBLE_SIZE
++		!= offsetof(struct siginfo, _sifields._pad));
++	BUILD_BUG_ON(sizeof(struct siginfo) != SI_MAX_SIZE);
++
++	sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
++}
++
++#ifdef CONFIG_KGDB_KDB
++#include <linux/kdb.h>
++/*
++ * kdb_send_sig - Allows kdb to send signals without exposing
++ * signal internals.  This function checks if the required locks are
++ * available before calling the main signal code, to avoid kdb
++ * deadlocks.
++ */
++void kdb_send_sig(struct task_struct *t, int sig)
++{
++	static struct task_struct *kdb_prev_t;
++	int new_t, ret;
++	if (!spin_trylock(&t->sighand->siglock)) {
++		kdb_printf("Can't do kill command now.\n"
++			   "The sigmask lock is held somewhere else in "
++			   "kernel, try again later\n");
++		return;
++	}
++	new_t = kdb_prev_t != t;
++	kdb_prev_t = t;
++	if (t->state != TASK_RUNNING && new_t) {
++		spin_unlock(&t->sighand->siglock);
++		kdb_printf("Process is not RUNNING, sending a signal from "
++			   "kdb risks deadlock\n"
++			   "on the run queue locks. "
++			   "The signal has _not_ been sent.\n"
++			   "Reissue the kill command if you want to risk "
++			   "the deadlock.\n");
++		return;
++	}
++	ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
++	spin_unlock(&t->sighand->siglock);
++	if (ret)
++		kdb_printf("Fail to deliver Signal %d to process %d.\n",
++			   sig, t->pid);
++	else
++		kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid);
++}
++#endif	/* CONFIG_KGDB_KDB */
+diff -uprN kernel/kernel/stop_machine.c kernel_new/kernel/stop_machine.c
+--- kernel/kernel/stop_machine.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/stop_machine.c	2021-04-02 09:22:23.752463220 +0800
+@@ -240,6 +240,7 @@ static int multi_cpu_stop(void *data)
+ 	sdei_unmask_local_cpu();
+ 	gic_arch_restore_irqs(flags);
+ #endif
++	hard_irq_enable();
+ 	local_irq_restore(flags);
+ 	return err;
+ }
+@@ -619,6 +620,7 @@ int stop_machine_cpuslocked(cpu_stop_fn_
+ 		local_irq_save(flags);
+ 		hard_irq_disable();
+ 		ret = (*fn)(data);
++		hard_irq_enable();
+ 		local_irq_restore(flags);
+ 
+ 		return ret;
+diff -uprN kernel/kernel/stop_machine.c.orig kernel_new/kernel/stop_machine.c.orig
+--- kernel/kernel/stop_machine.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/stop_machine.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,695 @@
++/*
++ * kernel/stop_machine.c
++ *
++ * Copyright (C) 2008, 2005	IBM Corporation.
++ * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
++ * Copyright (C) 2010		SUSE Linux Products GmbH
++ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
++ *
++ * This file is released under the GPLv2 and any later version.
++ */
++#include <linux/completion.h>
++#include <linux/cpu.h>
++#include <linux/init.h>
++#include <linux/kthread.h>
++#include <linux/export.h>
++#include <linux/percpu.h>
++#include <linux/sched.h>
++#include <linux/stop_machine.h>
++#include <linux/interrupt.h>
++#include <linux/kallsyms.h>
++#include <linux/smpboot.h>
++#include <linux/atomic.h>
++#include <linux/nmi.h>
++#include <linux/sched/wake_q.h>
++
++#ifdef CONFIG_ARM64
++#include <linux/arm_sdei.h>
++#include <asm/arch_gicv3.h>
++#endif
++
++/*
++ * Structure to determine completion condition and record errors.  May
++ * be shared by works on different cpus.
++ */
++struct cpu_stop_done {
++	atomic_t		nr_todo;	/* nr left to execute */
++	int			ret;		/* collected return value */
++	struct completion	completion;	/* fired if nr_todo reaches 0 */
++};
++
++/* the actual stopper, one per every possible cpu, enabled on online cpus */
++struct cpu_stopper {
++	struct task_struct	*thread;
++
++	raw_spinlock_t		lock;
++	bool			enabled;	/* is this stopper enabled? */
++	struct list_head	works;		/* list of pending works */
++
++	struct cpu_stop_work	stop_work;	/* for stop_cpus */
++};
++
++static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
++static bool stop_machine_initialized = false;
++
++/* static data for stop_cpus */
++static DEFINE_MUTEX(stop_cpus_mutex);
++static bool stop_cpus_in_progress;
++
++static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
++{
++	memset(done, 0, sizeof(*done));
++	atomic_set(&done->nr_todo, nr_todo);
++	init_completion(&done->completion);
++}
++
++/* signal completion unless @done is NULL */
++static void cpu_stop_signal_done(struct cpu_stop_done *done)
++{
++	if (atomic_dec_and_test(&done->nr_todo))
++		complete(&done->completion);
++}
++
++static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
++					struct cpu_stop_work *work,
++					struct wake_q_head *wakeq)
++{
++	list_add_tail(&work->list, &stopper->works);
++	wake_q_add(wakeq, stopper->thread);
++}
++
++/* queue @work to @stopper.  if offline, @work is completed immediately */
++static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++	DEFINE_WAKE_Q(wakeq);
++	unsigned long flags;
++	bool enabled;
++
++	preempt_disable();
++	raw_spin_lock_irqsave(&stopper->lock, flags);
++	enabled = stopper->enabled;
++	if (enabled)
++		__cpu_stop_queue_work(stopper, work, &wakeq);
++	else if (work->done)
++		cpu_stop_signal_done(work->done);
++	raw_spin_unlock_irqrestore(&stopper->lock, flags);
++
++	wake_up_q(&wakeq);
++	preempt_enable();
++
++	return enabled;
++}
++
++/**
++ * stop_one_cpu - stop a cpu
++ * @cpu: cpu to stop
++ * @fn: function to execute
++ * @arg: argument to @fn
++ *
++ * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
++ * the highest priority preempting any task on the cpu and
++ * monopolizing it.  This function returns after the execution is
++ * complete.
++ *
++ * This function doesn't guarantee @cpu stays online till @fn
++ * completes.  If @cpu goes down in the middle, execution may happen
++ * partially or fully on different cpus.  @fn should either be ready
++ * for that or the caller should ensure that @cpu stays online until
++ * this function completes.
++ *
++ * CONTEXT:
++ * Might sleep.
++ *
++ * RETURNS:
++ * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
++ * otherwise, the return value of @fn.
++ */
++int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
++{
++	struct cpu_stop_done done;
++	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
++
++	cpu_stop_init_done(&done, 1);
++	if (!cpu_stop_queue_work(cpu, &work))
++		return -ENOENT;
++	/*
++	 * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
++	 * cycle by doing a preemption:
++	 */
++	cond_resched();
++	wait_for_completion(&done.completion);
++	return done.ret;
++}
++
++/* This controls the threads on each CPU. */
++enum multi_stop_state {
++	/* Dummy starting state for thread. */
++	MULTI_STOP_NONE,
++	/* Awaiting everyone to be scheduled. */
++	MULTI_STOP_PREPARE,
++	/* Disable interrupts. */
++	MULTI_STOP_DISABLE_IRQ,
++	/* Run the function */
++	MULTI_STOP_RUN,
++	/* Exit */
++	MULTI_STOP_EXIT,
++};
++
++struct multi_stop_data {
++	cpu_stop_fn_t		fn;
++	void			*data;
++	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
++	unsigned int		num_threads;
++	const struct cpumask	*active_cpus;
++
++	enum multi_stop_state	state;
++	atomic_t		thread_ack;
++};
++
++static void set_state(struct multi_stop_data *msdata,
++		      enum multi_stop_state newstate)
++{
++	/* Reset ack counter. */
++	atomic_set(&msdata->thread_ack, msdata->num_threads);
++	smp_wmb();
++	msdata->state = newstate;
++}
++
++/* Last one to ack a state moves to the next state. */
++static void ack_state(struct multi_stop_data *msdata)
++{
++	if (atomic_dec_and_test(&msdata->thread_ack))
++		set_state(msdata, msdata->state + 1);
++}
++
++/* This is the cpu_stop function which stops the CPU. */
++static int multi_cpu_stop(void *data)
++{
++	struct multi_stop_data *msdata = data;
++	enum multi_stop_state curstate = MULTI_STOP_NONE;
++	int cpu = smp_processor_id(), err = 0;
++	unsigned long flags;
++	bool is_active;
++
++	/*
++	 * When called from stop_machine_from_inactive_cpu(), irq might
++	 * already be disabled.  Save the state and restore it on exit.
++	 */
++	local_save_flags(flags);
++
++	if (!msdata->active_cpus)
++		is_active = cpu == cpumask_first(cpu_online_mask);
++	else
++		is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
++
++	/* Simple state machine */
++	do {
++		/* Chill out and ensure we re-read multi_stop_state. */
++		cpu_relax_yield();
++		if (msdata->state != curstate) {
++			curstate = msdata->state;
++			switch (curstate) {
++			case MULTI_STOP_DISABLE_IRQ:
++				local_irq_disable();
++				hard_irq_disable();
++#ifdef CONFIG_ARM64
++				gic_arch_disable_irqs();
++				sdei_mask_local_cpu();
++#endif
++				break;
++			case MULTI_STOP_RUN:
++				if (is_active)
++					err = msdata->fn(msdata->data);
++				break;
++			default:
++				break;
++			}
++			ack_state(msdata);
++		} else if (curstate > MULTI_STOP_PREPARE) {
++			/*
++			 * At this stage all other CPUs we depend on must spin
++			 * in the same loop. Any reason for hard-lockup should
++			 * be detected and reported on their side.
++			 */
++			touch_nmi_watchdog();
++		}
++	} while (curstate != MULTI_STOP_EXIT);
++
++#ifdef CONFIG_ARM64
++	sdei_unmask_local_cpu();
++	gic_arch_restore_irqs(flags);
++#endif
++	local_irq_restore(flags);
++	return err;
++}
++
++static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
++				    int cpu2, struct cpu_stop_work *work2)
++{
++	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
++	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
++	DEFINE_WAKE_Q(wakeq);
++	int err;
++
++retry:
++	/*
++	 * The waking up of stopper threads has to happen in the same
++	 * scheduling context as the queueing.  Otherwise, there is a
++	 * possibility of one of the above stoppers being woken up by another
++	 * CPU, and preempting us. This will cause us to not wake up the other
++	 * stopper forever.
++	 */
++	preempt_disable();
++	raw_spin_lock_irq(&stopper1->lock);
++	raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
++
++	if (!stopper1->enabled || !stopper2->enabled) {
++		err = -ENOENT;
++		goto unlock;
++	}
++
++	/*
++	 * Ensure that if we race with __stop_cpus() the stoppers won't get
++	 * queued up in reverse order leading to system deadlock.
++	 *
++	 * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has
++	 * queued a work on cpu1 but not on cpu2, we hold both locks.
++	 *
++	 * It can be falsely true but it is safe to spin until it is cleared,
++	 * queue_stop_cpus_work() does everything under preempt_disable().
++	 */
++	if (unlikely(stop_cpus_in_progress)) {
++		err = -EDEADLK;
++		goto unlock;
++	}
++
++	err = 0;
++	__cpu_stop_queue_work(stopper1, work1, &wakeq);
++	__cpu_stop_queue_work(stopper2, work2, &wakeq);
++
++unlock:
++	raw_spin_unlock(&stopper2->lock);
++	raw_spin_unlock_irq(&stopper1->lock);
++
++	if (unlikely(err == -EDEADLK)) {
++		preempt_enable();
++
++		while (stop_cpus_in_progress)
++			cpu_relax();
++
++		goto retry;
++	}
++
++	wake_up_q(&wakeq);
++	preempt_enable();
++
++	return err;
++}
++/**
++ * stop_two_cpus - stops two cpus
++ * @cpu1: the cpu to stop
++ * @cpu2: the other cpu to stop
++ * @fn: function to execute
++ * @arg: argument to @fn
++ *
++ * Stops both the current and specified CPU and runs @fn on one of them.
++ *
++ * returns when both are completed.
++ */
++int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
++{
++	struct cpu_stop_done done;
++	struct cpu_stop_work work1, work2;
++	struct multi_stop_data msdata;
++
++	msdata = (struct multi_stop_data){
++		.fn = fn,
++		.data = arg,
++		.num_threads = 2,
++		.active_cpus = cpumask_of(cpu1),
++	};
++
++	work1 = work2 = (struct cpu_stop_work){
++		.fn = multi_cpu_stop,
++		.arg = &msdata,
++		.done = &done
++	};
++
++	cpu_stop_init_done(&done, 2);
++	set_state(&msdata, MULTI_STOP_PREPARE);
++
++	if (cpu1 > cpu2)
++		swap(cpu1, cpu2);
++	if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
++		return -ENOENT;
++
++	wait_for_completion(&done.completion);
++	return done.ret;
++}
++
++/**
++ * stop_one_cpu_nowait - stop a cpu but don't wait for completion
++ * @cpu: cpu to stop
++ * @fn: function to execute
++ * @arg: argument to @fn
++ * @work_buf: pointer to cpu_stop_work structure
++ *
++ * Similar to stop_one_cpu() but doesn't wait for completion.  The
++ * caller is responsible for ensuring @work_buf is currently unused
++ * and will remain untouched until stopper starts executing @fn.
++ *
++ * CONTEXT:
++ * Don't care.
++ *
++ * RETURNS:
++ * true if cpu_stop_work was queued successfully and @fn will be called,
++ * false otherwise.
++ */
++bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
++			struct cpu_stop_work *work_buf)
++{
++	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
++	return cpu_stop_queue_work(cpu, work_buf);
++}
++
++static bool queue_stop_cpus_work(const struct cpumask *cpumask,
++				 cpu_stop_fn_t fn, void *arg,
++				 struct cpu_stop_done *done)
++{
++	struct cpu_stop_work *work;
++	unsigned int cpu;
++	bool queued = false;
++
++	/*
++	 * Disable preemption while queueing to avoid getting
++	 * preempted by a stopper which might wait for other stoppers
++	 * to enter @fn which can lead to deadlock.
++	 */
++	preempt_disable();
++	stop_cpus_in_progress = true;
++	for_each_cpu(cpu, cpumask) {
++		work = &per_cpu(cpu_stopper.stop_work, cpu);
++		work->fn = fn;
++		work->arg = arg;
++		work->done = done;
++		if (cpu_stop_queue_work(cpu, work))
++			queued = true;
++	}
++	stop_cpus_in_progress = false;
++	preempt_enable();
++
++	return queued;
++}
++
++static int __stop_cpus(const struct cpumask *cpumask,
++		       cpu_stop_fn_t fn, void *arg)
++{
++	struct cpu_stop_done done;
++
++	cpu_stop_init_done(&done, cpumask_weight(cpumask));
++	if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
++		return -ENOENT;
++	wait_for_completion(&done.completion);
++	return done.ret;
++}
++
++/**
++ * stop_cpus - stop multiple cpus
++ * @cpumask: cpus to stop
++ * @fn: function to execute
++ * @arg: argument to @fn
++ *
++ * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
++ * @fn is run in a process context with the highest priority
++ * preempting any task on the cpu and monopolizing it.  This function
++ * returns after all executions are complete.
++ *
++ * This function doesn't guarantee the cpus in @cpumask stay online
++ * till @fn completes.  If some cpus go down in the middle, execution
++ * on the cpu may happen partially or fully on different cpus.  @fn
++ * should either be ready for that or the caller should ensure that
++ * the cpus stay online until this function completes.
++ *
++ * All stop_cpus() calls are serialized making it safe for @fn to wait
++ * for all cpus to start executing it.
++ *
++ * CONTEXT:
++ * Might sleep.
++ *
++ * RETURNS:
++ * -ENOENT if @fn(@arg) was not executed at all because all cpus in
++ * @cpumask were offline; otherwise, 0 if all executions of @fn
++ * returned 0, any non zero return value if any returned non zero.
++ */
++int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
++{
++	int ret;
++
++	/* static works are used, process one request at a time */
++	mutex_lock(&stop_cpus_mutex);
++	ret = __stop_cpus(cpumask, fn, arg);
++	mutex_unlock(&stop_cpus_mutex);
++	return ret;
++}
++
++/**
++ * try_stop_cpus - try to stop multiple cpus
++ * @cpumask: cpus to stop
++ * @fn: function to execute
++ * @arg: argument to @fn
++ *
++ * Identical to stop_cpus() except that it fails with -EAGAIN if
++ * someone else is already using the facility.
++ *
++ * CONTEXT:
++ * Might sleep.
++ *
++ * RETURNS:
++ * -EAGAIN if someone else is already stopping cpus, -ENOENT if
++ * @fn(@arg) was not executed at all because all cpus in @cpumask were
++ * offline; otherwise, 0 if all executions of @fn returned 0, any non
++ * zero return value if any returned non zero.
++ */
++int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
++{
++	int ret;
++
++	/* static works are used, process one request at a time */
++	if (!mutex_trylock(&stop_cpus_mutex))
++		return -EAGAIN;
++	ret = __stop_cpus(cpumask, fn, arg);
++	mutex_unlock(&stop_cpus_mutex);
++	return ret;
++}
++
++static int cpu_stop_should_run(unsigned int cpu)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++	unsigned long flags;
++	int run;
++
++	raw_spin_lock_irqsave(&stopper->lock, flags);
++	run = !list_empty(&stopper->works);
++	raw_spin_unlock_irqrestore(&stopper->lock, flags);
++	return run;
++}
++
++static void cpu_stopper_thread(unsigned int cpu)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++	struct cpu_stop_work *work;
++
++repeat:
++	work = NULL;
++	raw_spin_lock_irq(&stopper->lock);
++	if (!list_empty(&stopper->works)) {
++		work = list_first_entry(&stopper->works,
++					struct cpu_stop_work, list);
++		list_del_init(&work->list);
++	}
++	raw_spin_unlock_irq(&stopper->lock);
++
++	if (work) {
++		cpu_stop_fn_t fn = work->fn;
++		void *arg = work->arg;
++		struct cpu_stop_done *done = work->done;
++		int ret;
++
++		/* cpu stop callbacks must not sleep, make in_atomic() == T */
++		preempt_count_inc();
++		ret = fn(arg);
++		if (done) {
++			if (ret)
++				done->ret = ret;
++			cpu_stop_signal_done(done);
++		}
++		preempt_count_dec();
++		WARN_ONCE(preempt_count(),
++			  "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
++		goto repeat;
++	}
++}
++
++void stop_machine_park(int cpu)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++	/*
++	 * Lockless. cpu_stopper_thread() will take stopper->lock and flush
++	 * the pending works before it parks, until then it is fine to queue
++	 * the new works.
++	 */
++	stopper->enabled = false;
++	kthread_park(stopper->thread);
++}
++
++extern void sched_set_stop_task(int cpu, struct task_struct *stop);
++
++static void cpu_stop_create(unsigned int cpu)
++{
++	sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
++}
++
++static void cpu_stop_park(unsigned int cpu)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++
++	WARN_ON(!list_empty(&stopper->works));
++}
++
++void stop_machine_unpark(int cpu)
++{
++	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++
++	stopper->enabled = true;
++	kthread_unpark(stopper->thread);
++}
++
++static struct smp_hotplug_thread cpu_stop_threads = {
++	.store			= &cpu_stopper.thread,
++	.thread_should_run	= cpu_stop_should_run,
++	.thread_fn		= cpu_stopper_thread,
++	.thread_comm		= "migration/%u",
++	.create			= cpu_stop_create,
++	.park			= cpu_stop_park,
++	.selfparking		= true,
++};
++
++static int __init cpu_stop_init(void)
++{
++	unsigned int cpu;
++
++	for_each_possible_cpu(cpu) {
++		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
++
++		raw_spin_lock_init(&stopper->lock);
++		INIT_LIST_HEAD(&stopper->works);
++	}
++
++	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
++	stop_machine_unpark(raw_smp_processor_id());
++	stop_machine_initialized = true;
++	return 0;
++}
++early_initcall(cpu_stop_init);
++
++int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
++			    const struct cpumask *cpus)
++{
++	struct multi_stop_data msdata = {
++		.fn = fn,
++		.data = data,
++		.num_threads = num_online_cpus(),
++		.active_cpus = cpus,
++	};
++
++	lockdep_assert_cpus_held();
++
++	if (!stop_machine_initialized) {
++		/*
++		 * Handle the case where stop_machine() is called
++		 * early in boot before stop_machine() has been
++		 * initialized.
++		 */
++		unsigned long flags;
++		int ret;
++
++		WARN_ON_ONCE(msdata.num_threads != 1);
++
++		local_irq_save(flags);
++		hard_irq_disable();
++		ret = (*fn)(data);
++		local_irq_restore(flags);
++
++		return ret;
++	}
++
++	/* Set the initial state and stop all online cpus. */
++	set_state(&msdata, MULTI_STOP_PREPARE);
++	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
++}
++
++int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
++{
++	int ret;
++
++	/* No CPUs can come up or down during this. */
++	cpus_read_lock();
++	ret = stop_machine_cpuslocked(fn, data, cpus);
++	cpus_read_unlock();
++	return ret;
++}
++EXPORT_SYMBOL_GPL(stop_machine);
++
++/**
++ * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
++ * @fn: the function to run
++ * @data: the data ptr for the @fn()
++ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
++ *
++ * This is identical to stop_machine() but can be called from a CPU which
++ * is not active.  The local CPU is in the process of hotplug (so no other
++ * CPU hotplug can start) and not marked active and doesn't have enough
++ * context to sleep.
++ *
++ * This function provides stop_machine() functionality for such state by
++ * using busy-wait for synchronization and executing @fn directly for local
++ * CPU.
++ *
++ * CONTEXT:
++ * Local CPU is inactive.  Temporarily stops all active CPUs.
++ *
++ * RETURNS:
++ * 0 if all executions of @fn returned 0, any non zero return value if any
++ * returned non zero.
++ */
++int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
++				  const struct cpumask *cpus)
++{
++	struct multi_stop_data msdata = { .fn = fn, .data = data,
++					    .active_cpus = cpus };
++	struct cpu_stop_done done;
++	int ret;
++
++	/* Local CPU must be inactive and CPU hotplug in progress. */
++	BUG_ON(cpu_active(raw_smp_processor_id()));
++	msdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
++
++	/* No proper task established and can't sleep - busy wait for lock. */
++	while (!mutex_trylock(&stop_cpus_mutex))
++		cpu_relax();
++
++	/* Schedule work on other CPUs and execute directly for local CPU */
++	set_state(&msdata, MULTI_STOP_PREPARE);
++	cpu_stop_init_done(&done, num_active_cpus());
++	queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
++			     &done);
++	ret = multi_cpu_stop(&msdata);
++
++	/* Busy wait for completion. */
++	while (!completion_done(&done.completion))
++		cpu_relax();
++
++	mutex_unlock(&stop_cpus_mutex);
++	return ret ?: done.ret;
++}
+diff -uprN kernel/kernel/stop_machine.c.rej kernel_new/kernel/stop_machine.c.rej
+--- kernel/kernel/stop_machine.c.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/stop_machine.c.rej	2021-04-01 18:28:07.813863111 +0800
+@@ -0,0 +1,10 @@
++--- kernel/stop_machine.c	2019-12-18 03:36:04.000000000 +0800
+++++ kernel/stop_machine.c	2021-03-22 09:21:43.223415449 +0800
++@@ -227,6 +227,7 @@ static int multi_cpu_stop(void *data)
++ 		}
++ 	} while (curstate != MULTI_STOP_EXIT);
++ 
+++	hard_irq_enable();
++ 	local_irq_restore(flags);
++ 	return err;
++ }
+diff -uprN kernel/kernel/time/clockevents.c kernel_new/kernel/time/clockevents.c
+--- kernel/kernel/time/clockevents.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/time/clockevents.c	2021-04-01 18:28:07.813863111 +0800
+@@ -17,6 +17,7 @@
+ #include <linux/module.h>
+ #include <linux/smp.h>
+ #include <linux/device.h>
++#include <linux/ipipe_tickdev.h>
+ 
+ #include "tick-internal.h"
+ 
+@@ -458,6 +459,8 @@ void clockevents_register_device(struct
+ 	/* Initialize state to DETACHED */
+ 	clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
+ 
++	ipipe_host_timer_register(dev);
++
+ 	if (!dev->cpumask) {
+ 		WARN_ON(num_possible_cpus() > 1);
+ 		dev->cpumask = cpumask_of(smp_processor_id());
+@@ -652,8 +655,10 @@ void tick_cleanup_dead_cpu(int cpu)
+ 	 * Unregister the clock event devices which were
+ 	 * released from the users in the notify chain.
+ 	 */
+-	list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
++	list_for_each_entry_safe(dev, tmp, &clockevents_released, list) {
+ 		list_del(&dev->list);
++		ipipe_host_timer_cleanup(dev);
++	}
+ 	/*
+ 	 * Now check whether the CPU has left unused per cpu devices
+ 	 */
+@@ -663,6 +668,7 @@ void tick_cleanup_dead_cpu(int cpu)
+ 		    !tick_is_broadcast_device(dev)) {
+ 			BUG_ON(!clockevent_state_detached(dev));
+ 			list_del(&dev->list);
++			ipipe_host_timer_cleanup(dev);
+ 		}
+ 	}
+ 	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
+diff -uprN kernel/kernel/time/clockevents.c.orig kernel_new/kernel/time/clockevents.c.orig
+--- kernel/kernel/time/clockevents.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/time/clockevents.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,788 @@
++/*
++ * linux/kernel/time/clockevents.c
++ *
++ * This file contains functions which manage clock event devices.
++ *
++ * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
++ * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
++ * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
++ *
++ * This code is licenced under the GPL version 2. For details see
++ * kernel-base/COPYING.
++ */
++
++#include <linux/clockchips.h>
++#include <linux/hrtimer.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/smp.h>
++#include <linux/device.h>
++
++#include "tick-internal.h"
++
++/* The registered clock event devices */
++static LIST_HEAD(clockevent_devices);
++static LIST_HEAD(clockevents_released);
++/* Protection for the above */
++static DEFINE_RAW_SPINLOCK(clockevents_lock);
++/* Protection for unbind operations */
++static DEFINE_MUTEX(clockevents_mutex);
++
++struct ce_unbind {
++	struct clock_event_device *ce;
++	int res;
++};
++
++static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
++			bool ismax)
++{
++	u64 clc = (u64) latch << evt->shift;
++	u64 rnd;
++
++	if (unlikely(!evt->mult)) {
++		evt->mult = 1;
++		WARN_ON(1);
++	}
++	rnd = (u64) evt->mult - 1;
++
++	/*
++	 * Upper bound sanity check. If the backwards conversion is
++	 * not equal latch, we know that the above shift overflowed.
++	 */
++	if ((clc >> evt->shift) != (u64)latch)
++		clc = ~0ULL;
++
++	/*
++	 * Scaled math oddities:
++	 *
++	 * For mult <= (1 << shift) we can safely add mult - 1 to
++	 * prevent integer rounding loss. So the backwards conversion
++	 * from nsec to device ticks will be correct.
++	 *
++	 * For mult > (1 << shift), i.e. device frequency is > 1GHz we
++	 * need to be careful. Adding mult - 1 will result in a value
++	 * which when converted back to device ticks can be larger
++	 * than latch by up to (mult - 1) >> shift. For the min_delta
++	 * calculation we still want to apply this in order to stay
++	 * above the minimum device ticks limit. For the upper limit
++	 * we would end up with a latch value larger than the upper
++	 * limit of the device, so we omit the add to stay below the
++	 * device upper boundary.
++	 *
++	 * Also omit the add if it would overflow the u64 boundary.
++	 */
++	if ((~0ULL - clc > rnd) &&
++	    (!ismax || evt->mult <= (1ULL << evt->shift)))
++		clc += rnd;
++
++	do_div(clc, evt->mult);
++
++	/* Deltas less than 1usec are pointless noise */
++	return clc > 1000 ? clc : 1000;
++}
++
++/**
++ * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
++ * @latch:	value to convert
++ * @evt:	pointer to clock event device descriptor
++ *
++ * Math helper, returns latch value converted to nanoseconds (bound checked)
++ */
++u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
++{
++	return cev_delta2ns(latch, evt, false);
++}
++EXPORT_SYMBOL_GPL(clockevent_delta2ns);
++
++static int __clockevents_switch_state(struct clock_event_device *dev,
++				      enum clock_event_state state)
++{
++	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
++		return 0;
++
++	/* Transition with new state-specific callbacks */
++	switch (state) {
++	case CLOCK_EVT_STATE_DETACHED:
++		/* The clockevent device is getting replaced. Shut it down. */
++
++	case CLOCK_EVT_STATE_SHUTDOWN:
++		if (dev->set_state_shutdown)
++			return dev->set_state_shutdown(dev);
++		return 0;
++
++	case CLOCK_EVT_STATE_PERIODIC:
++		/* Core internal bug */
++		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
++			return -ENOSYS;
++		if (dev->set_state_periodic)
++			return dev->set_state_periodic(dev);
++		return 0;
++
++	case CLOCK_EVT_STATE_ONESHOT:
++		/* Core internal bug */
++		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
++			return -ENOSYS;
++		if (dev->set_state_oneshot)
++			return dev->set_state_oneshot(dev);
++		return 0;
++
++	case CLOCK_EVT_STATE_ONESHOT_STOPPED:
++		/* Core internal bug */
++		if (WARN_ONCE(!clockevent_state_oneshot(dev),
++			      "Current state: %d\n",
++			      clockevent_get_state(dev)))
++			return -EINVAL;
++
++		if (dev->set_state_oneshot_stopped)
++			return dev->set_state_oneshot_stopped(dev);
++		else
++			return -ENOSYS;
++
++	default:
++		return -ENOSYS;
++	}
++}
++
++/**
++ * clockevents_switch_state - set the operating state of a clock event device
++ * @dev:	device to modify
++ * @state:	new state
++ *
++ * Must be called with interrupts disabled !
++ */
++void clockevents_switch_state(struct clock_event_device *dev,
++			      enum clock_event_state state)
++{
++	if (clockevent_get_state(dev) != state) {
++		if (__clockevents_switch_state(dev, state))
++			return;
++
++		clockevent_set_state(dev, state);
++
++		/*
++		 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
++		 * on it, so fix it up and emit a warning:
++		 */
++		if (clockevent_state_oneshot(dev)) {
++			if (unlikely(!dev->mult)) {
++				dev->mult = 1;
++				WARN_ON(1);
++			}
++		}
++	}
++}
++
++/**
++ * clockevents_shutdown - shutdown the device and clear next_event
++ * @dev:	device to shutdown
++ */
++void clockevents_shutdown(struct clock_event_device *dev)
++{
++	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
++	dev->next_event = KTIME_MAX;
++}
++
++/**
++ * clockevents_tick_resume -	Resume the tick device before using it again
++ * @dev:			device to resume
++ */
++int clockevents_tick_resume(struct clock_event_device *dev)
++{
++	int ret = 0;
++
++	if (dev->tick_resume)
++		ret = dev->tick_resume(dev);
++
++	return ret;
++}
++
++#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
++
++/* Limit min_delta to a jiffie */
++#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
++
++/**
++ * clockevents_increase_min_delta - raise minimum delta of a clock event device
++ * @dev:       device to increase the minimum delta
++ *
++ * Returns 0 on success, -ETIME when the minimum delta reached the limit.
++ */
++static int clockevents_increase_min_delta(struct clock_event_device *dev)
++{
++	/* Nothing to do if we already reached the limit */
++	if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
++		printk_deferred(KERN_WARNING
++				"CE: Reprogramming failure. Giving up\n");
++		dev->next_event = KTIME_MAX;
++		return -ETIME;
++	}
++
++	if (dev->min_delta_ns < 5000)
++		dev->min_delta_ns = 5000;
++	else
++		dev->min_delta_ns += dev->min_delta_ns >> 1;
++
++	if (dev->min_delta_ns > MIN_DELTA_LIMIT)
++		dev->min_delta_ns = MIN_DELTA_LIMIT;
++
++	printk_deferred(KERN_WARNING
++			"CE: %s increased min_delta_ns to %llu nsec\n",
++			dev->name ? dev->name : "?",
++			(unsigned long long) dev->min_delta_ns);
++	return 0;
++}
++
++/**
++ * clockevents_program_min_delta - Set clock event device to the minimum delay.
++ * @dev:	device to program
++ *
++ * Returns 0 on success, -ETIME when the retry loop failed.
++ */
++static int clockevents_program_min_delta(struct clock_event_device *dev)
++{
++	unsigned long long clc;
++	int64_t delta;
++	int i;
++
++	for (i = 0;;) {
++		delta = dev->min_delta_ns;
++		dev->next_event = ktime_add_ns(ktime_get(), delta);
++
++		if (clockevent_state_shutdown(dev))
++			return 0;
++
++		dev->retries++;
++		clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
++		if (dev->set_next_event((unsigned long) clc, dev) == 0)
++			return 0;
++
++		if (++i > 2) {
++			/*
++			 * We tried 3 times to program the device with the
++			 * given min_delta_ns. Try to increase the minimum
++			 * delta, if that fails as well get out of here.
++			 */
++			if (clockevents_increase_min_delta(dev))
++				return -ETIME;
++			i = 0;
++		}
++	}
++}
++
++#else  /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
++
++/**
++ * clockevents_program_min_delta - Set clock event device to the minimum delay.
++ * @dev:	device to program
++ *
++ * Returns 0 on success, -ETIME when the retry loop failed.
++ */
++static int clockevents_program_min_delta(struct clock_event_device *dev)
++{
++	unsigned long long clc;
++	int64_t delta = 0;
++	int i;
++
++	for (i = 0; i < 10; i++) {
++		delta += dev->min_delta_ns;
++		dev->next_event = ktime_add_ns(ktime_get(), delta);
++
++		if (clockevent_state_shutdown(dev))
++			return 0;
++
++		dev->retries++;
++		clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
++		if (dev->set_next_event((unsigned long) clc, dev) == 0)
++			return 0;
++	}
++	return -ETIME;
++}
++
++#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
++
++/**
++ * clockevents_program_event - Reprogram the clock event device.
++ * @dev:	device to program
++ * @expires:	absolute expiry time (monotonic clock)
++ * @force:	program minimum delay if expires can not be set
++ *
++ * Returns 0 on success, -ETIME when the event is in the past.
++ */
++int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
++			      bool force)
++{
++	unsigned long long clc;
++	int64_t delta;
++	int rc;
++
++	if (unlikely(expires < 0)) {
++		WARN_ON_ONCE(1);
++		return -ETIME;
++	}
++
++	dev->next_event = expires;
++
++	if (clockevent_state_shutdown(dev))
++		return 0;
++
++	/* We must be in ONESHOT state here */
++	WARN_ONCE(!clockevent_state_oneshot(dev), "Current state: %d\n",
++		  clockevent_get_state(dev));
++
++	/* Shortcut for clockevent devices that can deal with ktime. */
++	if (dev->features & CLOCK_EVT_FEAT_KTIME)
++		return dev->set_next_ktime(expires, dev);
++
++	delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
++	if (delta <= 0)
++		return force ? clockevents_program_min_delta(dev) : -ETIME;
++
++	delta = min(delta, (int64_t) dev->max_delta_ns);
++	delta = max(delta, (int64_t) dev->min_delta_ns);
++
++	clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
++	rc = dev->set_next_event((unsigned long) clc, dev);
++
++	return (rc && force) ? clockevents_program_min_delta(dev) : rc;
++}
++
++/*
++ * Called after a notify add to make devices available which were
++ * released from the notifier call.
++ */
++static void clockevents_notify_released(void)
++{
++	struct clock_event_device *dev;
++
++	while (!list_empty(&clockevents_released)) {
++		dev = list_entry(clockevents_released.next,
++				 struct clock_event_device, list);
++		list_del(&dev->list);
++		list_add(&dev->list, &clockevent_devices);
++		tick_check_new_device(dev);
++	}
++}
++
++/*
++ * Try to install a replacement clock event device
++ */
++static int clockevents_replace(struct clock_event_device *ced)
++{
++	struct clock_event_device *dev, *newdev = NULL;
++
++	list_for_each_entry(dev, &clockevent_devices, list) {
++		if (dev == ced || !clockevent_state_detached(dev))
++			continue;
++
++		if (!tick_check_replacement(newdev, dev))
++			continue;
++
++		if (!try_module_get(dev->owner))
++			continue;
++
++		if (newdev)
++			module_put(newdev->owner);
++		newdev = dev;
++	}
++	if (newdev) {
++		tick_install_replacement(newdev);
++		list_del_init(&ced->list);
++	}
++	return newdev ? 0 : -EBUSY;
++}
++
++/*
++ * Called with clockevents_mutex and clockevents_lock held
++ */
++static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
++{
++	/* Fast track. Device is unused */
++	if (clockevent_state_detached(ced)) {
++		list_del_init(&ced->list);
++		return 0;
++	}
++
++	return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
++}
++
++/*
++ * SMP function call to unbind a device
++ */
++static void __clockevents_unbind(void *arg)
++{
++	struct ce_unbind *cu = arg;
++	int res;
++
++	raw_spin_lock(&clockevents_lock);
++	res = __clockevents_try_unbind(cu->ce, smp_processor_id());
++	if (res == -EAGAIN)
++		res = clockevents_replace(cu->ce);
++	cu->res = res;
++	raw_spin_unlock(&clockevents_lock);
++}
++
++/*
++ * Issues smp function call to unbind a per cpu device. Called with
++ * clockevents_mutex held.
++ */
++static int clockevents_unbind(struct clock_event_device *ced, int cpu)
++{
++	struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
++
++	smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
++	return cu.res;
++}
++
++/*
++ * Unbind a clockevents device.
++ */
++int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
++{
++	int ret;
++
++	mutex_lock(&clockevents_mutex);
++	ret = clockevents_unbind(ced, cpu);
++	mutex_unlock(&clockevents_mutex);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(clockevents_unbind_device);
++
++/**
++ * clockevents_register_device - register a clock event device
++ * @dev:	device to register
++ */
++void clockevents_register_device(struct clock_event_device *dev)
++{
++	unsigned long flags;
++
++	/* Initialize state to DETACHED */
++	clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
++
++	if (!dev->cpumask) {
++		WARN_ON(num_possible_cpus() > 1);
++		dev->cpumask = cpumask_of(smp_processor_id());
++	}
++
++	if (dev->cpumask == cpu_all_mask) {
++		WARN(1, "%s cpumask == cpu_all_mask, using cpu_possible_mask instead\n",
++		     dev->name);
++		dev->cpumask = cpu_possible_mask;
++	}
++
++	raw_spin_lock_irqsave(&clockevents_lock, flags);
++
++	list_add(&dev->list, &clockevent_devices);
++	tick_check_new_device(dev);
++	clockevents_notify_released();
++
++	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
++}
++EXPORT_SYMBOL_GPL(clockevents_register_device);
++
++static void clockevents_config(struct clock_event_device *dev, u32 freq)
++{
++	u64 sec;
++
++	if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
++		return;
++
++	/*
++	 * Calculate the maximum number of seconds we can sleep. Limit
++	 * to 10 minutes for hardware which can program more than
++	 * 32bit ticks so we still get reasonable conversion values.
++	 */
++	sec = dev->max_delta_ticks;
++	do_div(sec, freq);
++	if (!sec)
++		sec = 1;
++	else if (sec > 600 && dev->max_delta_ticks > UINT_MAX)
++		sec = 600;
++
++	clockevents_calc_mult_shift(dev, freq, sec);
++	dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
++	dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
++}
++
++/**
++ * clockevents_config_and_register - Configure and register a clock event device
++ * @dev:	device to register
++ * @freq:	The clock frequency
++ * @min_delta:	The minimum clock ticks to program in oneshot mode
++ * @max_delta:	The maximum clock ticks to program in oneshot mode
++ *
++ * min/max_delta can be 0 for devices which do not support oneshot mode.
++ */
++void clockevents_config_and_register(struct clock_event_device *dev,
++				     u32 freq, unsigned long min_delta,
++				     unsigned long max_delta)
++{
++	dev->min_delta_ticks = min_delta;
++	dev->max_delta_ticks = max_delta;
++	clockevents_config(dev, freq);
++	clockevents_register_device(dev);
++}
++EXPORT_SYMBOL_GPL(clockevents_config_and_register);
++
++int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
++{
++	clockevents_config(dev, freq);
++
++	if (clockevent_state_oneshot(dev))
++		return clockevents_program_event(dev, dev->next_event, false);
++
++	if (clockevent_state_periodic(dev))
++		return __clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
++
++	return 0;
++}
++
++/**
++ * clockevents_update_freq - Update frequency and reprogram a clock event device.
++ * @dev:	device to modify
++ * @freq:	new device frequency
++ *
++ * Reconfigure and reprogram a clock event device in oneshot
++ * mode. Must be called on the cpu for which the device delivers per
++ * cpu timer events. If called for the broadcast device the core takes
++ * care of serialization.
++ *
++ * Returns 0 on success, -ETIME when the event is in the past.
++ */
++int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
++{
++	unsigned long flags;
++	int ret;
++
++	local_irq_save(flags);
++	ret = tick_broadcast_update_freq(dev, freq);
++	if (ret == -ENODEV)
++		ret = __clockevents_update_freq(dev, freq);
++	local_irq_restore(flags);
++	return ret;
++}
++
++/*
++ * Noop handler when we shut down an event device
++ */
++void clockevents_handle_noop(struct clock_event_device *dev)
++{
++}
++
++/**
++ * clockevents_exchange_device - release and request clock devices
++ * @old:	device to release (can be NULL)
++ * @new:	device to request (can be NULL)
++ *
++ * Called from various tick functions with clockevents_lock held and
++ * interrupts disabled.
++ */
++void clockevents_exchange_device(struct clock_event_device *old,
++				 struct clock_event_device *new)
++{
++	/*
++	 * Caller releases a clock event device. We queue it into the
++	 * released list and do a notify add later.
++	 */
++	if (old) {
++		module_put(old->owner);
++		clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
++		list_del(&old->list);
++		list_add(&old->list, &clockevents_released);
++	}
++
++	if (new) {
++		BUG_ON(!clockevent_state_detached(new));
++		clockevents_shutdown(new);
++	}
++}
++
++/**
++ * clockevents_suspend - suspend clock devices
++ */
++void clockevents_suspend(void)
++{
++	struct clock_event_device *dev;
++
++	list_for_each_entry_reverse(dev, &clockevent_devices, list)
++		if (dev->suspend && !clockevent_state_detached(dev))
++			dev->suspend(dev);
++}
++
++/**
++ * clockevents_resume - resume clock devices
++ */
++void clockevents_resume(void)
++{
++	struct clock_event_device *dev;
++
++	list_for_each_entry(dev, &clockevent_devices, list)
++		if (dev->resume && !clockevent_state_detached(dev))
++			dev->resume(dev);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++
++# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
++/**
++ * tick_offline_cpu - Take CPU out of the broadcast mechanism
++ * @cpu:	The outgoing CPU
++ *
++ * Called on the outgoing CPU after it took itself offline.
++ */
++void tick_offline_cpu(unsigned int cpu)
++{
++	raw_spin_lock(&clockevents_lock);
++	tick_broadcast_offline(cpu);
++	raw_spin_unlock(&clockevents_lock);
++}
++# endif
++
++/**
++ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
++ */
++void tick_cleanup_dead_cpu(int cpu)
++{
++	struct clock_event_device *dev, *tmp;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&clockevents_lock, flags);
++
++	tick_shutdown(cpu);
++	/*
++	 * Unregister the clock event devices which were
++	 * released from the users in the notify chain.
++	 */
++	list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
++		list_del(&dev->list);
++	/*
++	 * Now check whether the CPU has left unused per cpu devices
++	 */
++	list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
++		if (cpumask_test_cpu(cpu, dev->cpumask) &&
++		    cpumask_weight(dev->cpumask) == 1 &&
++		    !tick_is_broadcast_device(dev)) {
++			BUG_ON(!clockevent_state_detached(dev));
++			list_del(&dev->list);
++		}
++	}
++	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
++}
++#endif
++
++#ifdef CONFIG_SYSFS
++static struct bus_type clockevents_subsys = {
++	.name		= "clockevents",
++	.dev_name       = "clockevent",
++};
++
++static DEFINE_PER_CPU(struct device, tick_percpu_dev);
++static struct tick_device *tick_get_tick_dev(struct device *dev);
++
++static ssize_t sysfs_show_current_tick_dev(struct device *dev,
++					   struct device_attribute *attr,
++					   char *buf)
++{
++	struct tick_device *td;
++	ssize_t count = 0;
++
++	raw_spin_lock_irq(&clockevents_lock);
++	td = tick_get_tick_dev(dev);
++	if (td && td->evtdev)
++		count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
++	raw_spin_unlock_irq(&clockevents_lock);
++	return count;
++}
++static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
++
++/* We don't support the abomination of removable broadcast devices */
++static ssize_t sysfs_unbind_tick_dev(struct device *dev,
++				     struct device_attribute *attr,
++				     const char *buf, size_t count)
++{
++	char name[CS_NAME_LEN];
++	ssize_t ret = sysfs_get_uname(buf, name, count);
++	struct clock_event_device *ce;
++
++	if (ret < 0)
++		return ret;
++
++	ret = -ENODEV;
++	mutex_lock(&clockevents_mutex);
++	raw_spin_lock_irq(&clockevents_lock);
++	list_for_each_entry(ce, &clockevent_devices, list) {
++		if (!strcmp(ce->name, name)) {
++			ret = __clockevents_try_unbind(ce, dev->id);
++			break;
++		}
++	}
++	raw_spin_unlock_irq(&clockevents_lock);
++	/*
++	 * We hold clockevents_mutex, so ce can't go away
++	 */
++	if (ret == -EAGAIN)
++		ret = clockevents_unbind(ce, dev->id);
++	mutex_unlock(&clockevents_mutex);
++	return ret ? ret : count;
++}
++static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
++
++#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
++static struct device tick_bc_dev = {
++	.init_name	= "broadcast",
++	.id		= 0,
++	.bus		= &clockevents_subsys,
++};
++
++static struct tick_device *tick_get_tick_dev(struct device *dev)
++{
++	return dev == &tick_bc_dev ? tick_get_broadcast_device() :
++		&per_cpu(tick_cpu_device, dev->id);
++}
++
++static __init int tick_broadcast_init_sysfs(void)
++{
++	int err = device_register(&tick_bc_dev);
++
++	if (!err)
++		err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
++	return err;
++}
++#else
++static struct tick_device *tick_get_tick_dev(struct device *dev)
++{
++	return &per_cpu(tick_cpu_device, dev->id);
++}
++static inline int tick_broadcast_init_sysfs(void) { return 0; }
++#endif
++
++static int __init tick_init_sysfs(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		struct device *dev = &per_cpu(tick_percpu_dev, cpu);
++		int err;
++
++		dev->id = cpu;
++		dev->bus = &clockevents_subsys;
++		err = device_register(dev);
++		if (!err)
++			err = device_create_file(dev, &dev_attr_current_device);
++		if (!err)
++			err = device_create_file(dev, &dev_attr_unbind_device);
++		if (err)
++			return err;
++	}
++	return tick_broadcast_init_sysfs();
++}
++
++static int __init clockevents_init_sysfs(void)
++{
++	int err = subsys_system_register(&clockevents_subsys, NULL);
++
++	if (!err)
++		err = tick_init_sysfs();
++	return err;
++}
++device_initcall(clockevents_init_sysfs);
++#endif /* SYSFS */
+diff -uprN kernel/kernel/time/timer.c kernel_new/kernel/time/timer.c
+--- kernel/kernel/time/timer.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/time/timer.c	2021-04-01 18:28:07.813863111 +0800
+@@ -22,6 +22,7 @@
+ #include <linux/kernel_stat.h>
+ #include <linux/export.h>
+ #include <linux/interrupt.h>
++#include <linux/ipipe.h>
+ #include <linux/percpu.h>
+ #include <linux/init.h>
+ #include <linux/mm.h>
+@@ -1636,6 +1637,15 @@ static inline int collect_expired_timers
+ }
+ #endif
+ 
++static inline void do_account_tick(struct task_struct *p, int user_tick)
++{
++#ifdef CONFIG_IPIPE
++	if (!__ipipe_root_tick_p(raw_cpu_ptr(&ipipe_percpu.tick_regs)))
++		return;
++#endif
++	account_process_tick(p, user_tick);
++}
++
+ /*
+  * Called from the timer interrupt handler to charge one tick to the current
+  * process.  user_tick is 1 if the tick is user time, 0 for system.
+@@ -1645,7 +1655,7 @@ void update_process_times(int user_tick)
+ 	struct task_struct *p = current;
+ 
+ 	/* Note: this timer irq context must be accounted for as well. */
+-	account_process_tick(p, user_tick);
++	do_account_tick(p, user_tick);
+ 	run_local_timers();
+ 	rcu_check_callbacks(user_tick);
+ #ifdef CONFIG_IRQ_WORK
+diff -uprN kernel/kernel/time/timer.c.orig kernel_new/kernel/time/timer.c.orig
+--- kernel/kernel/time/timer.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/time/timer.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2020 @@
++/*
++ *  linux/kernel/timer.c
++ *
++ *  Kernel internal timers
++ *
++ *  Copyright (C) 1991, 1992  Linus Torvalds
++ *
++ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
++ *
++ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
++ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
++ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
++ *              serialize accesses to xtime/lost_ticks).
++ *                              Copyright (C) 1998  Andrea Arcangeli
++ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
++ *  2002-05-31	Move sys_sysinfo here and make its locking sane, Robert Love
++ *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
++ *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
++ *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
++ */
++
++#include <linux/kernel_stat.h>
++#include <linux/export.h>
++#include <linux/interrupt.h>
++#include <linux/percpu.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/swap.h>
++#include <linux/pid_namespace.h>
++#include <linux/notifier.h>
++#include <linux/thread_info.h>
++#include <linux/time.h>
++#include <linux/jiffies.h>
++#include <linux/posix-timers.h>
++#include <linux/cpu.h>
++#include <linux/syscalls.h>
++#include <linux/delay.h>
++#include <linux/tick.h>
++#include <linux/kallsyms.h>
++#include <linux/irq_work.h>
++#include <linux/sched/signal.h>
++#include <linux/sched/sysctl.h>
++#include <linux/sched/nohz.h>
++#include <linux/sched/debug.h>
++#include <linux/slab.h>
++#include <linux/compat.h>
++#include <linux/random.h>
++
++#include <linux/uaccess.h>
++#include <asm/unistd.h>
++#include <asm/div64.h>
++#include <asm/timex.h>
++#include <asm/io.h>
++
++#include "tick-internal.h"
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/timer.h>
++
++__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
++
++EXPORT_SYMBOL(jiffies_64);
++
++/*
++ * The timer wheel has LVL_DEPTH array levels. Each level provides an array of
++ * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
++ * level has a different granularity.
++ *
++ * The level granularity is:		LVL_CLK_DIV ^ lvl
++ * The level clock frequency is:	HZ / (LVL_CLK_DIV ^ level)
++ *
++ * The array level of a newly armed timer depends on the relative expiry
++ * time. The farther the expiry time is away the higher the array level and
++ * therefor the granularity becomes.
++ *
++ * Contrary to the original timer wheel implementation, which aims for 'exact'
++ * expiry of the timers, this implementation removes the need for recascading
++ * the timers into the lower array levels. The previous 'classic' timer wheel
++ * implementation of the kernel already violated the 'exact' expiry by adding
++ * slack to the expiry time to provide batched expiration. The granularity
++ * levels provide implicit batching.
++ *
++ * This is an optimization of the original timer wheel implementation for the
++ * majority of the timer wheel use cases: timeouts. The vast majority of
++ * timeout timers (networking, disk I/O ...) are canceled before expiry. If
++ * the timeout expires it indicates that normal operation is disturbed, so it
++ * does not matter much whether the timeout comes with a slight delay.
++ *
++ * The only exception to this are networking timers with a small expiry
++ * time. They rely on the granularity. Those fit into the first wheel level,
++ * which has HZ granularity.
++ *
++ * We don't have cascading anymore. timers with a expiry time above the
++ * capacity of the last wheel level are force expired at the maximum timeout
++ * value of the last wheel level. From data sampling we know that the maximum
++ * value observed is 5 days (network connection tracking), so this should not
++ * be an issue.
++ *
++ * The currently chosen array constants values are a good compromise between
++ * array size and granularity.
++ *
++ * This results in the following granularity and range levels:
++ *
++ * HZ 1000 steps
++ * Level Offset  Granularity            Range
++ *  0      0         1 ms                0 ms -         63 ms
++ *  1     64         8 ms               64 ms -        511 ms
++ *  2    128        64 ms              512 ms -       4095 ms (512ms - ~4s)
++ *  3    192       512 ms             4096 ms -      32767 ms (~4s - ~32s)
++ *  4    256      4096 ms (~4s)      32768 ms -     262143 ms (~32s - ~4m)
++ *  5    320     32768 ms (~32s)    262144 ms -    2097151 ms (~4m - ~34m)
++ *  6    384    262144 ms (~4m)    2097152 ms -   16777215 ms (~34m - ~4h)
++ *  7    448   2097152 ms (~34m)  16777216 ms -  134217727 ms (~4h - ~1d)
++ *  8    512  16777216 ms (~4h)  134217728 ms - 1073741822 ms (~1d - ~12d)
++ *
++ * HZ  300
++ * Level Offset  Granularity            Range
++ *  0	   0         3 ms                0 ms -        210 ms
++ *  1	  64        26 ms              213 ms -       1703 ms (213ms - ~1s)
++ *  2	 128       213 ms             1706 ms -      13650 ms (~1s - ~13s)
++ *  3	 192      1706 ms (~1s)      13653 ms -     109223 ms (~13s - ~1m)
++ *  4	 256     13653 ms (~13s)    109226 ms -     873810 ms (~1m - ~14m)
++ *  5	 320    109226 ms (~1m)     873813 ms -    6990503 ms (~14m - ~1h)
++ *  6	 384    873813 ms (~14m)   6990506 ms -   55924050 ms (~1h - ~15h)
++ *  7	 448   6990506 ms (~1h)   55924053 ms -  447392423 ms (~15h - ~5d)
++ *  8    512  55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d)
++ *
++ * HZ  250
++ * Level Offset  Granularity            Range
++ *  0	   0         4 ms                0 ms -        255 ms
++ *  1	  64        32 ms              256 ms -       2047 ms (256ms - ~2s)
++ *  2	 128       256 ms             2048 ms -      16383 ms (~2s - ~16s)
++ *  3	 192      2048 ms (~2s)      16384 ms -     131071 ms (~16s - ~2m)
++ *  4	 256     16384 ms (~16s)    131072 ms -    1048575 ms (~2m - ~17m)
++ *  5	 320    131072 ms (~2m)    1048576 ms -    8388607 ms (~17m - ~2h)
++ *  6	 384   1048576 ms (~17m)   8388608 ms -   67108863 ms (~2h - ~18h)
++ *  7	 448   8388608 ms (~2h)   67108864 ms -  536870911 ms (~18h - ~6d)
++ *  8    512  67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d)
++ *
++ * HZ  100
++ * Level Offset  Granularity            Range
++ *  0	   0         10 ms               0 ms -        630 ms
++ *  1	  64         80 ms             640 ms -       5110 ms (640ms - ~5s)
++ *  2	 128        640 ms            5120 ms -      40950 ms (~5s - ~40s)
++ *  3	 192       5120 ms (~5s)     40960 ms -     327670 ms (~40s - ~5m)
++ *  4	 256      40960 ms (~40s)   327680 ms -    2621430 ms (~5m - ~43m)
++ *  5	 320     327680 ms (~5m)   2621440 ms -   20971510 ms (~43m - ~5h)
++ *  6	 384    2621440 ms (~43m) 20971520 ms -  167772150 ms (~5h - ~1d)
++ *  7	 448   20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d)
++ */
++
++/* Clock divisor for the next level */
++#define LVL_CLK_SHIFT	3
++#define LVL_CLK_DIV	(1UL << LVL_CLK_SHIFT)
++#define LVL_CLK_MASK	(LVL_CLK_DIV - 1)
++#define LVL_SHIFT(n)	((n) * LVL_CLK_SHIFT)
++#define LVL_GRAN(n)	(1UL << LVL_SHIFT(n))
++
++/*
++ * The time start value for each level to select the bucket at enqueue
++ * time.
++ */
++#define LVL_START(n)	((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
++
++/* Size of each clock level */
++#define LVL_BITS	6
++#define LVL_SIZE	(1UL << LVL_BITS)
++#define LVL_MASK	(LVL_SIZE - 1)
++#define LVL_OFFS(n)	((n) * LVL_SIZE)
++
++/* Level depth */
++#if HZ > 100
++# define LVL_DEPTH	9
++# else
++# define LVL_DEPTH	8
++#endif
++
++/* The cutoff (max. capacity of the wheel) */
++#define WHEEL_TIMEOUT_CUTOFF	(LVL_START(LVL_DEPTH))
++#define WHEEL_TIMEOUT_MAX	(WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
++
++/*
++ * The resulting wheel size. If NOHZ is configured we allocate two
++ * wheels so we have a separate storage for the deferrable timers.
++ */
++#define WHEEL_SIZE	(LVL_SIZE * LVL_DEPTH)
++
++#ifdef CONFIG_NO_HZ_COMMON
++# define NR_BASES	2
++# define BASE_STD	0
++# define BASE_DEF	1
++#else
++# define NR_BASES	1
++# define BASE_STD	0
++# define BASE_DEF	0
++#endif
++
++struct timer_base {
++	raw_spinlock_t		lock;
++	struct timer_list	*running_timer;
++	unsigned long		clk;
++	unsigned long		next_expiry;
++	unsigned int		cpu;
++	bool			is_idle;
++	bool			must_forward_clk;
++	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
++	struct hlist_head	vectors[WHEEL_SIZE];
++} ____cacheline_aligned;
++
++static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
++
++#ifdef CONFIG_NO_HZ_COMMON
++
++static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
++static DEFINE_MUTEX(timer_keys_mutex);
++
++static void timer_update_keys(struct work_struct *work);
++static DECLARE_WORK(timer_update_work, timer_update_keys);
++
++#ifdef CONFIG_SMP
++unsigned int sysctl_timer_migration = 1;
++
++DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
++
++static void timers_update_migration(void)
++{
++	if (sysctl_timer_migration && tick_nohz_active)
++		static_branch_enable(&timers_migration_enabled);
++	else
++		static_branch_disable(&timers_migration_enabled);
++}
++#else
++static inline void timers_update_migration(void) { }
++#endif /* !CONFIG_SMP */
++
++static void timer_update_keys(struct work_struct *work)
++{
++	mutex_lock(&timer_keys_mutex);
++	timers_update_migration();
++	static_branch_enable(&timers_nohz_active);
++	mutex_unlock(&timer_keys_mutex);
++}
++
++void timers_update_nohz(void)
++{
++	schedule_work(&timer_update_work);
++}
++
++int timer_migration_handler(struct ctl_table *table, int write,
++			    void __user *buffer, size_t *lenp,
++			    loff_t *ppos)
++{
++	int ret;
++
++	mutex_lock(&timer_keys_mutex);
++	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++	if (!ret && write)
++		timers_update_migration();
++	mutex_unlock(&timer_keys_mutex);
++	return ret;
++}
++
++static inline bool is_timers_nohz_active(void)
++{
++	return static_branch_unlikely(&timers_nohz_active);
++}
++#else
++static inline bool is_timers_nohz_active(void) { return false; }
++#endif /* NO_HZ_COMMON */
++
++static unsigned long round_jiffies_common(unsigned long j, int cpu,
++		bool force_up)
++{
++	int rem;
++	unsigned long original = j;
++
++	/*
++	 * We don't want all cpus firing their timers at once hitting the
++	 * same lock or cachelines, so we skew each extra cpu with an extra
++	 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
++	 * already did this.
++	 * The skew is done by adding 3*cpunr, then round, then subtract this
++	 * extra offset again.
++	 */
++	j += cpu * 3;
++
++	rem = j % HZ;
++
++	/*
++	 * If the target jiffie is just after a whole second (which can happen
++	 * due to delays of the timer irq, long irq off times etc etc) then
++	 * we should round down to the whole second, not up. Use 1/4th second
++	 * as cutoff for this rounding as an extreme upper bound for this.
++	 * But never round down if @force_up is set.
++	 */
++	if (rem < HZ/4 && !force_up) /* round down */
++		j = j - rem;
++	else /* round up */
++		j = j - rem + HZ;
++
++	/* now that we have rounded, subtract the extra skew again */
++	j -= cpu * 3;
++
++	/*
++	 * Make sure j is still in the future. Otherwise return the
++	 * unmodified value.
++	 */
++	return time_is_after_jiffies(j) ? j : original;
++}
++
++/**
++ * __round_jiffies - function to round jiffies to a full second
++ * @j: the time in (absolute) jiffies that should be rounded
++ * @cpu: the processor number on which the timeout will happen
++ *
++ * __round_jiffies() rounds an absolute time in the future (in jiffies)
++ * up or down to (approximately) full seconds. This is useful for timers
++ * for which the exact time they fire does not matter too much, as long as
++ * they fire approximately every X seconds.
++ *
++ * By rounding these timers to whole seconds, all such timers will fire
++ * at the same time, rather than at various times spread out. The goal
++ * of this is to have the CPU wake up less, which saves power.
++ *
++ * The exact rounding is skewed for each processor to avoid all
++ * processors firing at the exact same time, which could lead
++ * to lock contention or spurious cache line bouncing.
++ *
++ * The return value is the rounded version of the @j parameter.
++ */
++unsigned long __round_jiffies(unsigned long j, int cpu)
++{
++	return round_jiffies_common(j, cpu, false);
++}
++EXPORT_SYMBOL_GPL(__round_jiffies);
++
++/**
++ * __round_jiffies_relative - function to round jiffies to a full second
++ * @j: the time in (relative) jiffies that should be rounded
++ * @cpu: the processor number on which the timeout will happen
++ *
++ * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
++ * up or down to (approximately) full seconds. This is useful for timers
++ * for which the exact time they fire does not matter too much, as long as
++ * they fire approximately every X seconds.
++ *
++ * By rounding these timers to whole seconds, all such timers will fire
++ * at the same time, rather than at various times spread out. The goal
++ * of this is to have the CPU wake up less, which saves power.
++ *
++ * The exact rounding is skewed for each processor to avoid all
++ * processors firing at the exact same time, which could lead
++ * to lock contention or spurious cache line bouncing.
++ *
++ * The return value is the rounded version of the @j parameter.
++ */
++unsigned long __round_jiffies_relative(unsigned long j, int cpu)
++{
++	unsigned long j0 = jiffies;
++
++	/* Use j0 because jiffies might change while we run */
++	return round_jiffies_common(j + j0, cpu, false) - j0;
++}
++EXPORT_SYMBOL_GPL(__round_jiffies_relative);
++
++/**
++ * round_jiffies - function to round jiffies to a full second
++ * @j: the time in (absolute) jiffies that should be rounded
++ *
++ * round_jiffies() rounds an absolute time in the future (in jiffies)
++ * up or down to (approximately) full seconds. This is useful for timers
++ * for which the exact time they fire does not matter too much, as long as
++ * they fire approximately every X seconds.
++ *
++ * By rounding these timers to whole seconds, all such timers will fire
++ * at the same time, rather than at various times spread out. The goal
++ * of this is to have the CPU wake up less, which saves power.
++ *
++ * The return value is the rounded version of the @j parameter.
++ */
++unsigned long round_jiffies(unsigned long j)
++{
++	return round_jiffies_common(j, raw_smp_processor_id(), false);
++}
++EXPORT_SYMBOL_GPL(round_jiffies);
++
++/**
++ * round_jiffies_relative - function to round jiffies to a full second
++ * @j: the time in (relative) jiffies that should be rounded
++ *
++ * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
++ * up or down to (approximately) full seconds. This is useful for timers
++ * for which the exact time they fire does not matter too much, as long as
++ * they fire approximately every X seconds.
++ *
++ * By rounding these timers to whole seconds, all such timers will fire
++ * at the same time, rather than at various times spread out. The goal
++ * of this is to have the CPU wake up less, which saves power.
++ *
++ * The return value is the rounded version of the @j parameter.
++ */
++unsigned long round_jiffies_relative(unsigned long j)
++{
++	return __round_jiffies_relative(j, raw_smp_processor_id());
++}
++EXPORT_SYMBOL_GPL(round_jiffies_relative);
++
++/**
++ * __round_jiffies_up - function to round jiffies up to a full second
++ * @j: the time in (absolute) jiffies that should be rounded
++ * @cpu: the processor number on which the timeout will happen
++ *
++ * This is the same as __round_jiffies() except that it will never
++ * round down.  This is useful for timeouts for which the exact time
++ * of firing does not matter too much, as long as they don't fire too
++ * early.
++ */
++unsigned long __round_jiffies_up(unsigned long j, int cpu)
++{
++	return round_jiffies_common(j, cpu, true);
++}
++EXPORT_SYMBOL_GPL(__round_jiffies_up);
++
++/**
++ * __round_jiffies_up_relative - function to round jiffies up to a full second
++ * @j: the time in (relative) jiffies that should be rounded
++ * @cpu: the processor number on which the timeout will happen
++ *
++ * This is the same as __round_jiffies_relative() except that it will never
++ * round down.  This is useful for timeouts for which the exact time
++ * of firing does not matter too much, as long as they don't fire too
++ * early.
++ */
++unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
++{
++	unsigned long j0 = jiffies;
++
++	/* Use j0 because jiffies might change while we run */
++	return round_jiffies_common(j + j0, cpu, true) - j0;
++}
++EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
++
++/**
++ * round_jiffies_up - function to round jiffies up to a full second
++ * @j: the time in (absolute) jiffies that should be rounded
++ *
++ * This is the same as round_jiffies() except that it will never
++ * round down.  This is useful for timeouts for which the exact time
++ * of firing does not matter too much, as long as they don't fire too
++ * early.
++ */
++unsigned long round_jiffies_up(unsigned long j)
++{
++	return round_jiffies_common(j, raw_smp_processor_id(), true);
++}
++EXPORT_SYMBOL_GPL(round_jiffies_up);
++
++/**
++ * round_jiffies_up_relative - function to round jiffies up to a full second
++ * @j: the time in (relative) jiffies that should be rounded
++ *
++ * This is the same as round_jiffies_relative() except that it will never
++ * round down.  This is useful for timeouts for which the exact time
++ * of firing does not matter too much, as long as they don't fire too
++ * early.
++ */
++unsigned long round_jiffies_up_relative(unsigned long j)
++{
++	return __round_jiffies_up_relative(j, raw_smp_processor_id());
++}
++EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
++
++
++static inline unsigned int timer_get_idx(struct timer_list *timer)
++{
++	return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
++}
++
++static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
++{
++	timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
++			idx << TIMER_ARRAYSHIFT;
++}
++
++/*
++ * Helper function to calculate the array index for a given expiry
++ * time.
++ */
++static inline unsigned calc_index(unsigned expires, unsigned lvl)
++{
++	expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
++	return LVL_OFFS(lvl) + (expires & LVL_MASK);
++}
++
++static int calc_wheel_index(unsigned long expires, unsigned long clk)
++{
++	unsigned long delta = expires - clk;
++	unsigned int idx;
++
++	if (delta < LVL_START(1)) {
++		idx = calc_index(expires, 0);
++	} else if (delta < LVL_START(2)) {
++		idx = calc_index(expires, 1);
++	} else if (delta < LVL_START(3)) {
++		idx = calc_index(expires, 2);
++	} else if (delta < LVL_START(4)) {
++		idx = calc_index(expires, 3);
++	} else if (delta < LVL_START(5)) {
++		idx = calc_index(expires, 4);
++	} else if (delta < LVL_START(6)) {
++		idx = calc_index(expires, 5);
++	} else if (delta < LVL_START(7)) {
++		idx = calc_index(expires, 6);
++	} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
++		idx = calc_index(expires, 7);
++	} else if ((long) delta < 0) {
++		idx = clk & LVL_MASK;
++	} else {
++		/*
++		 * Force expire obscene large timeouts to expire at the
++		 * capacity limit of the wheel.
++		 */
++		if (delta >= WHEEL_TIMEOUT_CUTOFF)
++			expires = clk + WHEEL_TIMEOUT_MAX;
++
++		idx = calc_index(expires, LVL_DEPTH - 1);
++	}
++	return idx;
++}
++
++/*
++ * Enqueue the timer into the hash bucket, mark it pending in
++ * the bitmap and store the index in the timer flags.
++ */
++static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
++			  unsigned int idx)
++{
++	hlist_add_head(&timer->entry, base->vectors + idx);
++	__set_bit(idx, base->pending_map);
++	timer_set_idx(timer, idx);
++}
++
++static void
++__internal_add_timer(struct timer_base *base, struct timer_list *timer)
++{
++	unsigned int idx;
++
++	idx = calc_wheel_index(timer->expires, base->clk);
++	enqueue_timer(base, timer, idx);
++}
++
++static void
++trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
++{
++	if (!is_timers_nohz_active())
++		return;
++
++	/*
++	 * TODO: This wants some optimizing similar to the code below, but we
++	 * will do that when we switch from push to pull for deferrable timers.
++	 */
++	if (timer->flags & TIMER_DEFERRABLE) {
++		if (tick_nohz_full_cpu(base->cpu))
++			wake_up_nohz_cpu(base->cpu);
++		return;
++	}
++
++	/*
++	 * We might have to IPI the remote CPU if the base is idle and the
++	 * timer is not deferrable. If the other CPU is on the way to idle
++	 * then it can't set base->is_idle as we hold the base lock:
++	 */
++	if (!base->is_idle)
++		return;
++
++	/* Check whether this is the new first expiring timer: */
++	if (time_after_eq(timer->expires, base->next_expiry))
++		return;
++
++	/*
++	 * Set the next expiry time and kick the CPU so it can reevaluate the
++	 * wheel:
++	 */
++	if (time_before(timer->expires, base->clk)) {
++		/*
++		 * Prevent from forward_timer_base() moving the base->clk
++		 * backward
++		 */
++		base->next_expiry = base->clk;
++	} else {
++		base->next_expiry = timer->expires;
++	}
++	wake_up_nohz_cpu(base->cpu);
++}
++
++static void
++internal_add_timer(struct timer_base *base, struct timer_list *timer)
++{
++	__internal_add_timer(base, timer);
++	trigger_dyntick_cpu(base, timer);
++}
++
++#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
++
++static struct debug_obj_descr timer_debug_descr;
++
++static void *timer_debug_hint(void *addr)
++{
++	return ((struct timer_list *) addr)->function;
++}
++
++static bool timer_is_static_object(void *addr)
++{
++	struct timer_list *timer = addr;
++
++	return (timer->entry.pprev == NULL &&
++		timer->entry.next == TIMER_ENTRY_STATIC);
++}
++
++/*
++ * fixup_init is called when:
++ * - an active object is initialized
++ */
++static bool timer_fixup_init(void *addr, enum debug_obj_state state)
++{
++	struct timer_list *timer = addr;
++
++	switch (state) {
++	case ODEBUG_STATE_ACTIVE:
++		del_timer_sync(timer);
++		debug_object_init(timer, &timer_debug_descr);
++		return true;
++	default:
++		return false;
++	}
++}
++
++/* Stub timer callback for improperly used timers. */
++static void stub_timer(struct timer_list *unused)
++{
++	WARN_ON(1);
++}
++
++/*
++ * fixup_activate is called when:
++ * - an active object is activated
++ * - an unknown non-static object is activated
++ */
++static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
++{
++	struct timer_list *timer = addr;
++
++	switch (state) {
++	case ODEBUG_STATE_NOTAVAILABLE:
++		timer_setup(timer, stub_timer, 0);
++		return true;
++
++	case ODEBUG_STATE_ACTIVE:
++		WARN_ON(1);
++
++	default:
++		return false;
++	}
++}
++
++/*
++ * fixup_free is called when:
++ * - an active object is freed
++ */
++static bool timer_fixup_free(void *addr, enum debug_obj_state state)
++{
++	struct timer_list *timer = addr;
++
++	switch (state) {
++	case ODEBUG_STATE_ACTIVE:
++		del_timer_sync(timer);
++		debug_object_free(timer, &timer_debug_descr);
++		return true;
++	default:
++		return false;
++	}
++}
++
++/*
++ * fixup_assert_init is called when:
++ * - an untracked/uninit-ed object is found
++ */
++static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
++{
++	struct timer_list *timer = addr;
++
++	switch (state) {
++	case ODEBUG_STATE_NOTAVAILABLE:
++		timer_setup(timer, stub_timer, 0);
++		return true;
++	default:
++		return false;
++	}
++}
++
++static struct debug_obj_descr timer_debug_descr = {
++	.name			= "timer_list",
++	.debug_hint		= timer_debug_hint,
++	.is_static_object	= timer_is_static_object,
++	.fixup_init		= timer_fixup_init,
++	.fixup_activate		= timer_fixup_activate,
++	.fixup_free		= timer_fixup_free,
++	.fixup_assert_init	= timer_fixup_assert_init,
++};
++
++static inline void debug_timer_init(struct timer_list *timer)
++{
++	debug_object_init(timer, &timer_debug_descr);
++}
++
++static inline void debug_timer_activate(struct timer_list *timer)
++{
++	debug_object_activate(timer, &timer_debug_descr);
++}
++
++static inline void debug_timer_deactivate(struct timer_list *timer)
++{
++	debug_object_deactivate(timer, &timer_debug_descr);
++}
++
++static inline void debug_timer_free(struct timer_list *timer)
++{
++	debug_object_free(timer, &timer_debug_descr);
++}
++
++static inline void debug_timer_assert_init(struct timer_list *timer)
++{
++	debug_object_assert_init(timer, &timer_debug_descr);
++}
++
++static void do_init_timer(struct timer_list *timer,
++			  void (*func)(struct timer_list *),
++			  unsigned int flags,
++			  const char *name, struct lock_class_key *key);
++
++void init_timer_on_stack_key(struct timer_list *timer,
++			     void (*func)(struct timer_list *),
++			     unsigned int flags,
++			     const char *name, struct lock_class_key *key)
++{
++	debug_object_init_on_stack(timer, &timer_debug_descr);
++	do_init_timer(timer, func, flags, name, key);
++}
++EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
++
++void destroy_timer_on_stack(struct timer_list *timer)
++{
++	debug_object_free(timer, &timer_debug_descr);
++}
++EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
++
++#else
++static inline void debug_timer_init(struct timer_list *timer) { }
++static inline void debug_timer_activate(struct timer_list *timer) { }
++static inline void debug_timer_deactivate(struct timer_list *timer) { }
++static inline void debug_timer_assert_init(struct timer_list *timer) { }
++#endif
++
++static inline void debug_init(struct timer_list *timer)
++{
++	debug_timer_init(timer);
++	trace_timer_init(timer);
++}
++
++static inline void
++debug_activate(struct timer_list *timer, unsigned long expires)
++{
++	debug_timer_activate(timer);
++	trace_timer_start(timer, expires, timer->flags);
++}
++
++static inline void debug_deactivate(struct timer_list *timer)
++{
++	debug_timer_deactivate(timer);
++	trace_timer_cancel(timer);
++}
++
++static inline void debug_assert_init(struct timer_list *timer)
++{
++	debug_timer_assert_init(timer);
++}
++
++static void do_init_timer(struct timer_list *timer,
++			  void (*func)(struct timer_list *),
++			  unsigned int flags,
++			  const char *name, struct lock_class_key *key)
++{
++	timer->entry.pprev = NULL;
++	timer->function = func;
++	timer->flags = flags | raw_smp_processor_id();
++	lockdep_init_map(&timer->lockdep_map, name, key, 0);
++}
++
++/**
++ * init_timer_key - initialize a timer
++ * @timer: the timer to be initialized
++ * @func: timer callback function
++ * @flags: timer flags
++ * @name: name of the timer
++ * @key: lockdep class key of the fake lock used for tracking timer
++ *       sync lock dependencies
++ *
++ * init_timer_key() must be done to a timer prior calling *any* of the
++ * other timer functions.
++ */
++void init_timer_key(struct timer_list *timer,
++		    void (*func)(struct timer_list *), unsigned int flags,
++		    const char *name, struct lock_class_key *key)
++{
++	debug_init(timer);
++	do_init_timer(timer, func, flags, name, key);
++}
++EXPORT_SYMBOL(init_timer_key);
++
++static inline void detach_timer(struct timer_list *timer, bool clear_pending)
++{
++	struct hlist_node *entry = &timer->entry;
++
++	debug_deactivate(timer);
++
++	__hlist_del(entry);
++	if (clear_pending)
++		entry->pprev = NULL;
++	entry->next = LIST_POISON2;
++}
++
++static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
++			     bool clear_pending)
++{
++	unsigned idx = timer_get_idx(timer);
++
++	if (!timer_pending(timer))
++		return 0;
++
++	if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
++		__clear_bit(idx, base->pending_map);
++
++	detach_timer(timer, clear_pending);
++	return 1;
++}
++
++static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
++{
++	struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
++
++	/*
++	 * If the timer is deferrable and NO_HZ_COMMON is set then we need
++	 * to use the deferrable base.
++	 */
++	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
++		base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
++	return base;
++}
++
++static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
++{
++	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++
++	/*
++	 * If the timer is deferrable and NO_HZ_COMMON is set then we need
++	 * to use the deferrable base.
++	 */
++	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
++		base = this_cpu_ptr(&timer_bases[BASE_DEF]);
++	return base;
++}
++
++static inline struct timer_base *get_timer_base(u32 tflags)
++{
++	return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
++}
++
++static inline struct timer_base *
++get_target_base(struct timer_base *base, unsigned tflags)
++{
++#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
++	if (static_branch_likely(&timers_migration_enabled) &&
++	    !(tflags & TIMER_PINNED))
++		return get_timer_cpu_base(tflags, get_nohz_timer_target());
++#endif
++	return get_timer_this_cpu_base(tflags);
++}
++
++static inline void forward_timer_base(struct timer_base *base)
++{
++#ifdef CONFIG_NO_HZ_COMMON
++	unsigned long jnow;
++
++	/*
++	 * We only forward the base when we are idle or have just come out of
++	 * idle (must_forward_clk logic), and have a delta between base clock
++	 * and jiffies. In the common case, run_timers will take care of it.
++	 */
++	if (likely(!base->must_forward_clk))
++		return;
++
++	jnow = READ_ONCE(jiffies);
++	base->must_forward_clk = base->is_idle;
++	if ((long)(jnow - base->clk) < 2)
++		return;
++
++	/*
++	 * If the next expiry value is > jiffies, then we fast forward to
++	 * jiffies otherwise we forward to the next expiry value.
++	 */
++	if (time_after(base->next_expiry, jnow)) {
++		base->clk = jnow;
++	} else {
++		if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
++			return;
++		base->clk = base->next_expiry;
++	}
++#endif
++}
++
++
++/*
++ * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
++ * that all timers which are tied to this base are locked, and the base itself
++ * is locked too.
++ *
++ * So __run_timers/migrate_timers can safely modify all timers which could
++ * be found in the base->vectors array.
++ *
++ * When a timer is migrating then the TIMER_MIGRATING flag is set and we need
++ * to wait until the migration is done.
++ */
++static struct timer_base *lock_timer_base(struct timer_list *timer,
++					  unsigned long *flags)
++	__acquires(timer->base->lock)
++{
++	for (;;) {
++		struct timer_base *base;
++		u32 tf;
++
++		/*
++		 * We need to use READ_ONCE() here, otherwise the compiler
++		 * might re-read @tf between the check for TIMER_MIGRATING
++		 * and spin_lock().
++		 */
++		tf = READ_ONCE(timer->flags);
++
++		if (!(tf & TIMER_MIGRATING)) {
++			base = get_timer_base(tf);
++			raw_spin_lock_irqsave(&base->lock, *flags);
++			if (timer->flags == tf)
++				return base;
++			raw_spin_unlock_irqrestore(&base->lock, *flags);
++		}
++		cpu_relax();
++	}
++}
++
++#define MOD_TIMER_PENDING_ONLY		0x01
++#define MOD_TIMER_REDUCE		0x02
++
++static inline int
++__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
++{
++	struct timer_base *base, *new_base;
++	unsigned int idx = UINT_MAX;
++	unsigned long clk = 0, flags;
++	int ret = 0;
++
++	BUG_ON(!timer->function);
++
++	/*
++	 * This is a common optimization triggered by the networking code - if
++	 * the timer is re-modified to have the same timeout or ends up in the
++	 * same array bucket then just return:
++	 */
++	if (timer_pending(timer)) {
++		/*
++		 * The downside of this optimization is that it can result in
++		 * larger granularity than you would get from adding a new
++		 * timer with this expiry.
++		 */
++		long diff = timer->expires - expires;
++
++		if (!diff)
++			return 1;
++		if (options & MOD_TIMER_REDUCE && diff <= 0)
++			return 1;
++
++		/*
++		 * We lock timer base and calculate the bucket index right
++		 * here. If the timer ends up in the same bucket, then we
++		 * just update the expiry time and avoid the whole
++		 * dequeue/enqueue dance.
++		 */
++		base = lock_timer_base(timer, &flags);
++		forward_timer_base(base);
++
++		if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
++		    time_before_eq(timer->expires, expires)) {
++			ret = 1;
++			goto out_unlock;
++		}
++
++		clk = base->clk;
++		idx = calc_wheel_index(expires, clk);
++
++		/*
++		 * Retrieve and compare the array index of the pending
++		 * timer. If it matches set the expiry to the new value so a
++		 * subsequent call will exit in the expires check above.
++		 */
++		if (idx == timer_get_idx(timer)) {
++			if (!(options & MOD_TIMER_REDUCE))
++				timer->expires = expires;
++			else if (time_after(timer->expires, expires))
++				timer->expires = expires;
++			ret = 1;
++			goto out_unlock;
++		}
++	} else {
++		base = lock_timer_base(timer, &flags);
++		forward_timer_base(base);
++	}
++
++	ret = detach_if_pending(timer, base, false);
++	if (!ret && (options & MOD_TIMER_PENDING_ONLY))
++		goto out_unlock;
++
++	new_base = get_target_base(base, timer->flags);
++
++	if (base != new_base) {
++		/*
++		 * We are trying to schedule the timer on the new base.
++		 * However we can't change timer's base while it is running,
++		 * otherwise del_timer_sync() can't detect that the timer's
++		 * handler yet has not finished. This also guarantees that the
++		 * timer is serialized wrt itself.
++		 */
++		if (likely(base->running_timer != timer)) {
++			/* See the comment in lock_timer_base() */
++			timer->flags |= TIMER_MIGRATING;
++
++			raw_spin_unlock(&base->lock);
++			base = new_base;
++			raw_spin_lock(&base->lock);
++			WRITE_ONCE(timer->flags,
++				   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
++			forward_timer_base(base);
++		}
++	}
++
++	debug_activate(timer, expires);
++
++	timer->expires = expires;
++	/*
++	 * If 'idx' was calculated above and the base time did not advance
++	 * between calculating 'idx' and possibly switching the base, only
++	 * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
++	 * we need to (re)calculate the wheel index via
++	 * internal_add_timer().
++	 */
++	if (idx != UINT_MAX && clk == base->clk) {
++		enqueue_timer(base, timer, idx);
++		trigger_dyntick_cpu(base, timer);
++	} else {
++		internal_add_timer(base, timer);
++	}
++
++out_unlock:
++	raw_spin_unlock_irqrestore(&base->lock, flags);
++
++	return ret;
++}
++
++/**
++ * mod_timer_pending - modify a pending timer's timeout
++ * @timer: the pending timer to be modified
++ * @expires: new timeout in jiffies
++ *
++ * mod_timer_pending() is the same for pending timers as mod_timer(),
++ * but will not re-activate and modify already deleted timers.
++ *
++ * It is useful for unserialized use of timers.
++ */
++int mod_timer_pending(struct timer_list *timer, unsigned long expires)
++{
++	return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
++}
++EXPORT_SYMBOL(mod_timer_pending);
++
++/**
++ * mod_timer - modify a timer's timeout
++ * @timer: the timer to be modified
++ * @expires: new timeout in jiffies
++ *
++ * mod_timer() is a more efficient way to update the expire field of an
++ * active timer (if the timer is inactive it will be activated)
++ *
++ * mod_timer(timer, expires) is equivalent to:
++ *
++ *     del_timer(timer); timer->expires = expires; add_timer(timer);
++ *
++ * Note that if there are multiple unserialized concurrent users of the
++ * same timer, then mod_timer() is the only safe way to modify the timeout,
++ * since add_timer() cannot modify an already running timer.
++ *
++ * The function returns whether it has modified a pending timer or not.
++ * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
++ * active timer returns 1.)
++ */
++int mod_timer(struct timer_list *timer, unsigned long expires)
++{
++	return __mod_timer(timer, expires, 0);
++}
++EXPORT_SYMBOL(mod_timer);
++
++/**
++ * timer_reduce - Modify a timer's timeout if it would reduce the timeout
++ * @timer:	The timer to be modified
++ * @expires:	New timeout in jiffies
++ *
++ * timer_reduce() is very similar to mod_timer(), except that it will only
++ * modify a running timer if that would reduce the expiration time (it will
++ * start a timer that isn't running).
++ */
++int timer_reduce(struct timer_list *timer, unsigned long expires)
++{
++	return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
++}
++EXPORT_SYMBOL(timer_reduce);
++
++/**
++ * add_timer - start a timer
++ * @timer: the timer to be added
++ *
++ * The kernel will do a ->function(@timer) callback from the
++ * timer interrupt at the ->expires point in the future. The
++ * current time is 'jiffies'.
++ *
++ * The timer's ->expires, ->function fields must be set prior calling this
++ * function.
++ *
++ * Timers with an ->expires field in the past will be executed in the next
++ * timer tick.
++ */
++void add_timer(struct timer_list *timer)
++{
++	BUG_ON(timer_pending(timer));
++	mod_timer(timer, timer->expires);
++}
++EXPORT_SYMBOL(add_timer);
++
++/**
++ * add_timer_on - start a timer on a particular CPU
++ * @timer: the timer to be added
++ * @cpu: the CPU to start it on
++ *
++ * This is not very scalable on SMP. Double adds are not possible.
++ */
++void add_timer_on(struct timer_list *timer, int cpu)
++{
++	struct timer_base *new_base, *base;
++	unsigned long flags;
++
++	BUG_ON(timer_pending(timer) || !timer->function);
++
++	new_base = get_timer_cpu_base(timer->flags, cpu);
++
++	/*
++	 * If @timer was on a different CPU, it should be migrated with the
++	 * old base locked to prevent other operations proceeding with the
++	 * wrong base locked.  See lock_timer_base().
++	 */
++	base = lock_timer_base(timer, &flags);
++	if (base != new_base) {
++		timer->flags |= TIMER_MIGRATING;
++
++		raw_spin_unlock(&base->lock);
++		base = new_base;
++		raw_spin_lock(&base->lock);
++		WRITE_ONCE(timer->flags,
++			   (timer->flags & ~TIMER_BASEMASK) | cpu);
++	}
++	forward_timer_base(base);
++
++	debug_activate(timer, timer->expires);
++	internal_add_timer(base, timer);
++	raw_spin_unlock_irqrestore(&base->lock, flags);
++}
++EXPORT_SYMBOL_GPL(add_timer_on);
++
++/**
++ * del_timer - deactivate a timer.
++ * @timer: the timer to be deactivated
++ *
++ * del_timer() deactivates a timer - this works on both active and inactive
++ * timers.
++ *
++ * The function returns whether it has deactivated a pending timer or not.
++ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
++ * active timer returns 1.)
++ */
++int del_timer(struct timer_list *timer)
++{
++	struct timer_base *base;
++	unsigned long flags;
++	int ret = 0;
++
++	debug_assert_init(timer);
++
++	if (timer_pending(timer)) {
++		base = lock_timer_base(timer, &flags);
++		ret = detach_if_pending(timer, base, true);
++		raw_spin_unlock_irqrestore(&base->lock, flags);
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL(del_timer);
++
++/**
++ * try_to_del_timer_sync - Try to deactivate a timer
++ * @timer: timer to delete
++ *
++ * This function tries to deactivate a timer. Upon successful (ret >= 0)
++ * exit the timer is not queued and the handler is not running on any CPU.
++ */
++int try_to_del_timer_sync(struct timer_list *timer)
++{
++	struct timer_base *base;
++	unsigned long flags;
++	int ret = -1;
++
++	debug_assert_init(timer);
++
++	base = lock_timer_base(timer, &flags);
++
++	if (base->running_timer != timer)
++		ret = detach_if_pending(timer, base, true);
++
++	raw_spin_unlock_irqrestore(&base->lock, flags);
++
++	return ret;
++}
++EXPORT_SYMBOL(try_to_del_timer_sync);
++
++#ifdef CONFIG_SMP
++/**
++ * del_timer_sync - deactivate a timer and wait for the handler to finish.
++ * @timer: the timer to be deactivated
++ *
++ * This function only differs from del_timer() on SMP: besides deactivating
++ * the timer it also makes sure the handler has finished executing on other
++ * CPUs.
++ *
++ * Synchronization rules: Callers must prevent restarting of the timer,
++ * otherwise this function is meaningless. It must not be called from
++ * interrupt contexts unless the timer is an irqsafe one. The caller must
++ * not hold locks which would prevent completion of the timer's
++ * handler. The timer's handler must not call add_timer_on(). Upon exit the
++ * timer is not queued and the handler is not running on any CPU.
++ *
++ * Note: For !irqsafe timers, you must not hold locks that are held in
++ *   interrupt context while calling this function. Even if the lock has
++ *   nothing to do with the timer in question.  Here's why::
++ *
++ *    CPU0                             CPU1
++ *    ----                             ----
++ *                                     <SOFTIRQ>
++ *                                       call_timer_fn();
++ *                                       base->running_timer = mytimer;
++ *    spin_lock_irq(somelock);
++ *                                     <IRQ>
++ *                                        spin_lock(somelock);
++ *    del_timer_sync(mytimer);
++ *    while (base->running_timer == mytimer);
++ *
++ * Now del_timer_sync() will never return and never release somelock.
++ * The interrupt on the other CPU is waiting to grab somelock but
++ * it has interrupted the softirq that CPU0 is waiting to finish.
++ *
++ * The function returns whether it has deactivated a pending timer or not.
++ */
++int del_timer_sync(struct timer_list *timer)
++{
++#ifdef CONFIG_LOCKDEP
++	unsigned long flags;
++
++	/*
++	 * If lockdep gives a backtrace here, please reference
++	 * the synchronization rules above.
++	 */
++	local_irq_save(flags);
++	lock_map_acquire(&timer->lockdep_map);
++	lock_map_release(&timer->lockdep_map);
++	local_irq_restore(flags);
++#endif
++	/*
++	 * don't use it in hardirq context, because it
++	 * could lead to deadlock.
++	 */
++	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
++	for (;;) {
++		int ret = try_to_del_timer_sync(timer);
++		if (ret >= 0)
++			return ret;
++		cpu_relax();
++	}
++}
++EXPORT_SYMBOL(del_timer_sync);
++#endif
++
++static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
++{
++	int count = preempt_count();
++
++#ifdef CONFIG_LOCKDEP
++	/*
++	 * It is permissible to free the timer from inside the
++	 * function that is called from it, this we need to take into
++	 * account for lockdep too. To avoid bogus "held lock freed"
++	 * warnings as well as problems when looking into
++	 * timer->lockdep_map, make a copy and use that here.
++	 */
++	struct lockdep_map lockdep_map;
++
++	lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
++#endif
++	/*
++	 * Couple the lock chain with the lock chain at
++	 * del_timer_sync() by acquiring the lock_map around the fn()
++	 * call here and in del_timer_sync().
++	 */
++	lock_map_acquire(&lockdep_map);
++
++	trace_timer_expire_entry(timer);
++	fn(timer);
++	trace_timer_expire_exit(timer);
++
++	lock_map_release(&lockdep_map);
++
++	if (count != preempt_count()) {
++		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
++			  fn, count, preempt_count());
++		/*
++		 * Restore the preempt count. That gives us a decent
++		 * chance to survive and extract information. If the
++		 * callback kept a lock held, bad luck, but not worse
++		 * than the BUG() we had.
++		 */
++		preempt_count_set(count);
++	}
++}
++
++static void expire_timers(struct timer_base *base, struct hlist_head *head)
++{
++	while (!hlist_empty(head)) {
++		struct timer_list *timer;
++		void (*fn)(struct timer_list *);
++
++		timer = hlist_entry(head->first, struct timer_list, entry);
++
++		base->running_timer = timer;
++		detach_timer(timer, true);
++
++		fn = timer->function;
++
++		if (timer->flags & TIMER_IRQSAFE) {
++			raw_spin_unlock(&base->lock);
++			call_timer_fn(timer, fn);
++			raw_spin_lock(&base->lock);
++		} else {
++			raw_spin_unlock_irq(&base->lock);
++			call_timer_fn(timer, fn);
++			raw_spin_lock_irq(&base->lock);
++		}
++	}
++}
++
++static int __collect_expired_timers(struct timer_base *base,
++				    struct hlist_head *heads)
++{
++	unsigned long clk = base->clk;
++	struct hlist_head *vec;
++	int i, levels = 0;
++	unsigned int idx;
++
++	for (i = 0; i < LVL_DEPTH; i++) {
++		idx = (clk & LVL_MASK) + i * LVL_SIZE;
++
++		if (__test_and_clear_bit(idx, base->pending_map)) {
++			vec = base->vectors + idx;
++			hlist_move_list(vec, heads++);
++			levels++;
++		}
++		/* Is it time to look at the next level? */
++		if (clk & LVL_CLK_MASK)
++			break;
++		/* Shift clock for the next level granularity */
++		clk >>= LVL_CLK_SHIFT;
++	}
++	return levels;
++}
++
++#ifdef CONFIG_NO_HZ_COMMON
++/*
++ * Find the next pending bucket of a level. Search from level start (@offset)
++ * + @clk upwards and if nothing there, search from start of the level
++ * (@offset) up to @offset + clk.
++ */
++static int next_pending_bucket(struct timer_base *base, unsigned offset,
++			       unsigned clk)
++{
++	unsigned pos, start = offset + clk;
++	unsigned end = offset + LVL_SIZE;
++
++	pos = find_next_bit(base->pending_map, end, start);
++	if (pos < end)
++		return pos - start;
++
++	pos = find_next_bit(base->pending_map, start, offset);
++	return pos < start ? pos + LVL_SIZE - start : -1;
++}
++
++/*
++ * Search the first expiring timer in the various clock levels. Caller must
++ * hold base->lock.
++ */
++static unsigned long __next_timer_interrupt(struct timer_base *base)
++{
++	unsigned long clk, next, adj;
++	unsigned lvl, offset = 0;
++
++	next = base->clk + NEXT_TIMER_MAX_DELTA;
++	clk = base->clk;
++	for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
++		int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
++
++		if (pos >= 0) {
++			unsigned long tmp = clk + (unsigned long) pos;
++
++			tmp <<= LVL_SHIFT(lvl);
++			if (time_before(tmp, next))
++				next = tmp;
++		}
++		/*
++		 * Clock for the next level. If the current level clock lower
++		 * bits are zero, we look at the next level as is. If not we
++		 * need to advance it by one because that's going to be the
++		 * next expiring bucket in that level. base->clk is the next
++		 * expiring jiffie. So in case of:
++		 *
++		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
++		 *  0    0    0    0    0    0
++		 *
++		 * we have to look at all levels @index 0. With
++		 *
++		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
++		 *  0    0    0    0    0    2
++		 *
++		 * LVL0 has the next expiring bucket @index 2. The upper
++		 * levels have the next expiring bucket @index 1.
++		 *
++		 * In case that the propagation wraps the next level the same
++		 * rules apply:
++		 *
++		 * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
++		 *  0    0    0    0    F    2
++		 *
++		 * So after looking at LVL0 we get:
++		 *
++		 * LVL5 LVL4 LVL3 LVL2 LVL1
++		 *  0    0    0    1    0
++		 *
++		 * So no propagation from LVL1 to LVL2 because that happened
++		 * with the add already, but then we need to propagate further
++		 * from LVL2 to LVL3.
++		 *
++		 * So the simple check whether the lower bits of the current
++		 * level are 0 or not is sufficient for all cases.
++		 */
++		adj = clk & LVL_CLK_MASK ? 1 : 0;
++		clk >>= LVL_CLK_SHIFT;
++		clk += adj;
++	}
++	return next;
++}
++
++/*
++ * Check, if the next hrtimer event is before the next timer wheel
++ * event:
++ */
++static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
++{
++	u64 nextevt = hrtimer_get_next_event();
++
++	/*
++	 * If high resolution timers are enabled
++	 * hrtimer_get_next_event() returns KTIME_MAX.
++	 */
++	if (expires <= nextevt)
++		return expires;
++
++	/*
++	 * If the next timer is already expired, return the tick base
++	 * time so the tick is fired immediately.
++	 */
++	if (nextevt <= basem)
++		return basem;
++
++	/*
++	 * Round up to the next jiffie. High resolution timers are
++	 * off, so the hrtimers are expired in the tick and we need to
++	 * make sure that this tick really expires the timer to avoid
++	 * a ping pong of the nohz stop code.
++	 *
++	 * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
++	 */
++	return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
++}
++
++/**
++ * get_next_timer_interrupt - return the time (clock mono) of the next timer
++ * @basej:	base time jiffies
++ * @basem:	base time clock monotonic
++ *
++ * Returns the tick aligned clock monotonic time of the next pending
++ * timer or KTIME_MAX if no timer is pending.
++ */
++u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
++{
++	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++	u64 expires = KTIME_MAX;
++	unsigned long nextevt;
++	bool is_max_delta;
++
++	/*
++	 * Pretend that there is no timer pending if the cpu is offline.
++	 * Possible pending timers will be migrated later to an active cpu.
++	 */
++	if (cpu_is_offline(smp_processor_id()))
++		return expires;
++
++	raw_spin_lock(&base->lock);
++	nextevt = __next_timer_interrupt(base);
++	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
++	base->next_expiry = nextevt;
++	/*
++	 * We have a fresh next event. Check whether we can forward the
++	 * base. We can only do that when @basej is past base->clk
++	 * otherwise we might rewind base->clk.
++	 */
++	if (time_after(basej, base->clk)) {
++		if (time_after(nextevt, basej))
++			base->clk = basej;
++		else if (time_after(nextevt, base->clk))
++			base->clk = nextevt;
++	}
++
++	if (time_before_eq(nextevt, basej)) {
++		expires = basem;
++		base->is_idle = false;
++	} else {
++		if (!is_max_delta)
++			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
++		/*
++		 * If we expect to sleep more than a tick, mark the base idle.
++		 * Also the tick is stopped so any added timer must forward
++		 * the base clk itself to keep granularity small. This idle
++		 * logic is only maintained for the BASE_STD base, deferrable
++		 * timers may still see large granularity skew (by design).
++		 */
++		if ((expires - basem) > TICK_NSEC) {
++			base->must_forward_clk = true;
++			base->is_idle = true;
++		}
++	}
++	raw_spin_unlock(&base->lock);
++
++	return cmp_next_hrtimer_event(basem, expires);
++}
++
++/**
++ * timer_clear_idle - Clear the idle state of the timer base
++ *
++ * Called with interrupts disabled
++ */
++void timer_clear_idle(void)
++{
++	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++
++	/*
++	 * We do this unlocked. The worst outcome is a remote enqueue sending
++	 * a pointless IPI, but taking the lock would just make the window for
++	 * sending the IPI a few instructions smaller for the cost of taking
++	 * the lock in the exit from idle path.
++	 */
++	base->is_idle = false;
++}
++
++static int collect_expired_timers(struct timer_base *base,
++				  struct hlist_head *heads)
++{
++	unsigned long now = READ_ONCE(jiffies);
++
++	/*
++	 * NOHZ optimization. After a long idle sleep we need to forward the
++	 * base to current jiffies. Avoid a loop by searching the bitfield for
++	 * the next expiring timer.
++	 */
++	if ((long)(now - base->clk) > 2) {
++		unsigned long next = __next_timer_interrupt(base);
++
++		/*
++		 * If the next timer is ahead of time forward to current
++		 * jiffies, otherwise forward to the next expiry time:
++		 */
++		if (time_after(next, now)) {
++			/*
++			 * The call site will increment base->clk and then
++			 * terminate the expiry loop immediately.
++			 */
++			base->clk = now;
++			return 0;
++		}
++		base->clk = next;
++	}
++	return __collect_expired_timers(base, heads);
++}
++#else
++static inline int collect_expired_timers(struct timer_base *base,
++					 struct hlist_head *heads)
++{
++	return __collect_expired_timers(base, heads);
++}
++#endif
++
++/*
++ * Called from the timer interrupt handler to charge one tick to the current
++ * process.  user_tick is 1 if the tick is user time, 0 for system.
++ */
++void update_process_times(int user_tick)
++{
++	struct task_struct *p = current;
++
++	/* Note: this timer irq context must be accounted for as well. */
++	account_process_tick(p, user_tick);
++	run_local_timers();
++	rcu_check_callbacks(user_tick);
++#ifdef CONFIG_IRQ_WORK
++	if (in_irq())
++		irq_work_tick();
++#endif
++	scheduler_tick();
++	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
++		run_posix_cpu_timers(p);
++
++	/* The current CPU might make use of net randoms without receiving IRQs
++	 * to renew them often enough. Let's update the net_rand_state from a
++	 * non-constant value that's not affine to the number of calls to make
++	 * sure it's updated when there's some activity (we don't care in idle).
++	 */
++	this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
++}
++
++/**
++ * __run_timers - run all expired timers (if any) on this CPU.
++ * @base: the timer vector to be processed.
++ */
++static inline void __run_timers(struct timer_base *base)
++{
++	struct hlist_head heads[LVL_DEPTH];
++	int levels;
++
++	if (!time_after_eq(jiffies, base->clk))
++		return;
++
++	raw_spin_lock_irq(&base->lock);
++
++	/*
++	 * timer_base::must_forward_clk must be cleared before running
++	 * timers so that any timer functions that call mod_timer() will
++	 * not try to forward the base. Idle tracking / clock forwarding
++	 * logic is only used with BASE_STD timers.
++	 *
++	 * The must_forward_clk flag is cleared unconditionally also for
++	 * the deferrable base. The deferrable base is not affected by idle
++	 * tracking and never forwarded, so clearing the flag is a NOOP.
++	 *
++	 * The fact that the deferrable base is never forwarded can cause
++	 * large variations in granularity for deferrable timers, but they
++	 * can be deferred for long periods due to idle anyway.
++	 */
++	base->must_forward_clk = false;
++
++	while (time_after_eq(jiffies, base->clk)) {
++
++		levels = collect_expired_timers(base, heads);
++		base->clk++;
++
++		while (levels--)
++			expire_timers(base, heads + levels);
++	}
++	base->running_timer = NULL;
++	raw_spin_unlock_irq(&base->lock);
++}
++
++/*
++ * This function runs timers and the timer-tq in bottom half context.
++ */
++static __latent_entropy void run_timer_softirq(struct softirq_action *h)
++{
++	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++
++	__run_timers(base);
++	if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
++		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
++}
++
++/*
++ * Called by the local, per-CPU timer interrupt on SMP.
++ */
++void run_local_timers(void)
++{
++	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
++
++	hrtimer_run_queues();
++	/* Raise the softirq only if required. */
++	if (time_before(jiffies, base->clk)) {
++		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
++			return;
++		/* CPU is awake, so check the deferrable base. */
++		base++;
++		if (time_before(jiffies, base->clk))
++			return;
++	}
++	raise_softirq(TIMER_SOFTIRQ);
++}
++
++/*
++ * Since schedule_timeout()'s timer is defined on the stack, it must store
++ * the target task on the stack as well.
++ */
++struct process_timer {
++	struct timer_list timer;
++	struct task_struct *task;
++};
++
++static void process_timeout(struct timer_list *t)
++{
++	struct process_timer *timeout = from_timer(timeout, t, timer);
++
++	wake_up_process(timeout->task);
++}
++
++/**
++ * schedule_timeout - sleep until timeout
++ * @timeout: timeout value in jiffies
++ *
++ * Make the current task sleep until @timeout jiffies have
++ * elapsed. The routine will return immediately unless
++ * the current task state has been set (see set_current_state()).
++ *
++ * You can set the task state as follows -
++ *
++ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
++ * pass before the routine returns unless the current task is explicitly
++ * woken up, (e.g. by wake_up_process())".
++ *
++ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
++ * delivered to the current task or the current task is explicitly woken
++ * up.
++ *
++ * The current task state is guaranteed to be TASK_RUNNING when this
++ * routine returns.
++ *
++ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
++ * the CPU away without a bound on the timeout. In this case the return
++ * value will be %MAX_SCHEDULE_TIMEOUT.
++ *
++ * Returns 0 when the timer has expired otherwise the remaining time in
++ * jiffies will be returned.  In all cases the return value is guaranteed
++ * to be non-negative.
++ */
++signed long __sched schedule_timeout(signed long timeout)
++{
++	struct process_timer timer;
++	unsigned long expire;
++
++	switch (timeout)
++	{
++	case MAX_SCHEDULE_TIMEOUT:
++		/*
++		 * These two special cases are useful to be comfortable
++		 * in the caller. Nothing more. We could take
++		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
++		 * but I' d like to return a valid offset (>=0) to allow
++		 * the caller to do everything it want with the retval.
++		 */
++		schedule();
++		goto out;
++	default:
++		/*
++		 * Another bit of PARANOID. Note that the retval will be
++		 * 0 since no piece of kernel is supposed to do a check
++		 * for a negative retval of schedule_timeout() (since it
++		 * should never happens anyway). You just have the printk()
++		 * that will tell you if something is gone wrong and where.
++		 */
++		if (timeout < 0) {
++			printk(KERN_ERR "schedule_timeout: wrong timeout "
++				"value %lx\n", timeout);
++			dump_stack();
++			current->state = TASK_RUNNING;
++			goto out;
++		}
++	}
++
++	expire = timeout + jiffies;
++
++	timer.task = current;
++	timer_setup_on_stack(&timer.timer, process_timeout, 0);
++	__mod_timer(&timer.timer, expire, 0);
++	schedule();
++	del_singleshot_timer_sync(&timer.timer);
++
++	/* Remove the timer from the object tracker */
++	destroy_timer_on_stack(&timer.timer);
++
++	timeout = expire - jiffies;
++
++ out:
++	return timeout < 0 ? 0 : timeout;
++}
++EXPORT_SYMBOL(schedule_timeout);
++
++/*
++ * We can use __set_current_state() here because schedule_timeout() calls
++ * schedule() unconditionally.
++ */
++signed long __sched schedule_timeout_interruptible(signed long timeout)
++{
++	__set_current_state(TASK_INTERRUPTIBLE);
++	return schedule_timeout(timeout);
++}
++EXPORT_SYMBOL(schedule_timeout_interruptible);
++
++signed long __sched schedule_timeout_killable(signed long timeout)
++{
++	__set_current_state(TASK_KILLABLE);
++	return schedule_timeout(timeout);
++}
++EXPORT_SYMBOL(schedule_timeout_killable);
++
++signed long __sched schedule_timeout_uninterruptible(signed long timeout)
++{
++	__set_current_state(TASK_UNINTERRUPTIBLE);
++	return schedule_timeout(timeout);
++}
++EXPORT_SYMBOL(schedule_timeout_uninterruptible);
++
++/*
++ * Like schedule_timeout_uninterruptible(), except this task will not contribute
++ * to load average.
++ */
++signed long __sched schedule_timeout_idle(signed long timeout)
++{
++	__set_current_state(TASK_IDLE);
++	return schedule_timeout(timeout);
++}
++EXPORT_SYMBOL(schedule_timeout_idle);
++
++#ifdef CONFIG_HOTPLUG_CPU
++static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
++{
++	struct timer_list *timer;
++	int cpu = new_base->cpu;
++
++	while (!hlist_empty(head)) {
++		timer = hlist_entry(head->first, struct timer_list, entry);
++		detach_timer(timer, false);
++		timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
++		internal_add_timer(new_base, timer);
++	}
++}
++
++int timers_prepare_cpu(unsigned int cpu)
++{
++	struct timer_base *base;
++	int b;
++
++	for (b = 0; b < NR_BASES; b++) {
++		base = per_cpu_ptr(&timer_bases[b], cpu);
++		base->clk = jiffies;
++		base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
++		base->is_idle = false;
++		base->must_forward_clk = true;
++	}
++	return 0;
++}
++
++int timers_dead_cpu(unsigned int cpu)
++{
++	struct timer_base *old_base;
++	struct timer_base *new_base;
++	int b, i;
++
++	BUG_ON(cpu_online(cpu));
++
++	for (b = 0; b < NR_BASES; b++) {
++		old_base = per_cpu_ptr(&timer_bases[b], cpu);
++		new_base = get_cpu_ptr(&timer_bases[b]);
++		/*
++		 * The caller is globally serialized and nobody else
++		 * takes two locks at once, deadlock is not possible.
++		 */
++		raw_spin_lock_irq(&new_base->lock);
++		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
++
++		/*
++		 * The current CPUs base clock might be stale. Update it
++		 * before moving the timers over.
++		 */
++		forward_timer_base(new_base);
++
++		BUG_ON(old_base->running_timer);
++
++		for (i = 0; i < WHEEL_SIZE; i++)
++			migrate_timer_list(new_base, old_base->vectors + i);
++
++		raw_spin_unlock(&old_base->lock);
++		raw_spin_unlock_irq(&new_base->lock);
++		put_cpu_ptr(&timer_bases);
++	}
++	return 0;
++}
++
++#endif /* CONFIG_HOTPLUG_CPU */
++
++static void __init init_timer_cpu(int cpu)
++{
++	struct timer_base *base;
++	int i;
++
++	for (i = 0; i < NR_BASES; i++) {
++		base = per_cpu_ptr(&timer_bases[i], cpu);
++		base->cpu = cpu;
++		raw_spin_lock_init(&base->lock);
++		base->clk = jiffies;
++	}
++}
++
++static void __init init_timer_cpus(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu)
++		init_timer_cpu(cpu);
++}
++
++void __init init_timers(void)
++{
++	init_timer_cpus();
++	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
++}
++
++/**
++ * msleep - sleep safely even with waitqueue interruptions
++ * @msecs: Time in milliseconds to sleep for
++ */
++void msleep(unsigned int msecs)
++{
++	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
++
++	while (timeout)
++		timeout = schedule_timeout_uninterruptible(timeout);
++}
++
++EXPORT_SYMBOL(msleep);
++
++/**
++ * msleep_interruptible - sleep waiting for signals
++ * @msecs: Time in milliseconds to sleep for
++ */
++unsigned long msleep_interruptible(unsigned int msecs)
++{
++	unsigned long timeout = msecs_to_jiffies(msecs) + 1;
++
++	while (timeout && !signal_pending(current))
++		timeout = schedule_timeout_interruptible(timeout);
++	return jiffies_to_msecs(timeout);
++}
++
++EXPORT_SYMBOL(msleep_interruptible);
++
++/**
++ * usleep_range - Sleep for an approximate time
++ * @min: Minimum time in usecs to sleep
++ * @max: Maximum time in usecs to sleep
++ *
++ * In non-atomic context where the exact wakeup time is flexible, use
++ * usleep_range() instead of udelay().  The sleep improves responsiveness
++ * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
++ * power usage by allowing hrtimers to take advantage of an already-
++ * scheduled interrupt instead of scheduling a new one just for this sleep.
++ */
++void __sched usleep_range(unsigned long min, unsigned long max)
++{
++	ktime_t exp = ktime_add_us(ktime_get(), min);
++	u64 delta = (u64)(max - min) * NSEC_PER_USEC;
++
++	for (;;) {
++		__set_current_state(TASK_UNINTERRUPTIBLE);
++		/* Do not return before the requested sleep time has elapsed */
++		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
++			break;
++	}
++}
++EXPORT_SYMBOL(usleep_range);
+diff -uprN kernel/kernel/trace/ftrace.c kernel_new/kernel/trace/ftrace.c
+--- kernel/kernel/trace/ftrace.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/ftrace.c	2021-04-01 18:28:07.814863110 +0800
+@@ -35,6 +35,7 @@
+ #include <linux/hash.h>
+ #include <linux/rcupdate.h>
+ #include <linux/kprobes.h>
++#include <linux/ipipe.h>
+ 
+ #include <trace/events/sched.h>
+ 
+@@ -214,8 +215,17 @@ static ftrace_func_t ftrace_ops_get_list
+ 
+ static void update_ftrace_function(void)
+ {
++	struct ftrace_ops *ops;
+ 	ftrace_func_t func;
+ 
++	for (ops = ftrace_ops_list;
++	     ops != &ftrace_list_end; ops = ops->next)
++		if (ops->flags & FTRACE_OPS_FL_IPIPE_EXCLUSIVE) {
++			set_function_trace_op = ops;
++			func = ops->func;
++			goto set_pointers;
++		}
++
+ 	/*
+ 	 * Prepare the ftrace_ops that the arch callback will use.
+ 	 * If there's only one ftrace_ops registered, the ftrace_ops_list
+@@ -245,6 +255,7 @@ static void update_ftrace_function(void)
+ 
+ 	update_function_graph_func();
+ 
++  set_pointers:
+ 	/* If there's no change, then do nothing more here */
+ 	if (ftrace_trace_function == func)
+ 		return;
+@@ -2627,6 +2638,9 @@ void __weak arch_ftrace_update_code(int
+ 
+ static void ftrace_run_update_code(int command)
+ {
++#ifdef CONFIG_IPIPE
++	unsigned long flags;
++#endif /* CONFIG_IPIPE */
+ 	int ret;
+ 
+ 	ret = ftrace_arch_code_modify_prepare();
+@@ -5618,10 +5632,10 @@ static int ftrace_process_locs(struct mo
+ 	 * reason to cause large interrupt latencies while we do it.
+ 	 */
+ 	if (!mod)
+-		local_irq_save(flags);
++		flags = hard_local_irq_save();
+ 	ftrace_update_code(mod, start_pg);
+ 	if (!mod)
+-		local_irq_restore(flags);
++		hard_local_irq_restore(flags);
+ 	ret = 0;
+  out:
+ 	mutex_unlock(&ftrace_lock);
+@@ -6166,9 +6180,11 @@ void __init ftrace_init(void)
+ 	unsigned long count, flags;
+ 	int ret;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save_notrace();
+ 	ret = ftrace_dyn_arch_init();
+-	local_irq_restore(flags);
++	hard_local_irq_restore_notrace(flags);
++
++	/* ftrace_dyn_arch_init places the return code in addr */
+ 	if (ret)
+ 		goto failed;
+ 
+@@ -6321,7 +6337,16 @@ __ftrace_ops_list_func(unsigned long ip,
+ 		}
+ 	} while_for_each_ftrace_op(op);
+ out:
+-	preempt_enable_notrace();
++#ifdef CONFIG_IPIPE
++	if (hard_irqs_disabled() || !__ipipe_root_p)
++		/*
++		 * Nothing urgent to schedule here. At latest the timer tick
++		 * will pick up whatever the tracing functions kicked off.
++		 */
++		preempt_enable_no_resched_notrace();
++	else
++#endif
++		preempt_enable_notrace();
+ 	trace_clear_recursion(bit);
+ }
+ 
+diff -uprN kernel/kernel/trace/ftrace.c.orig kernel_new/kernel/trace/ftrace.c.orig
+--- kernel/kernel/trace/ftrace.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/trace/ftrace.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,7133 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Infrastructure for profiling code inserted by 'gcc -pg'.
++ *
++ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
++ * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
++ *
++ * Originally ported from the -rt patch by:
++ *   Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
++ *
++ * Based on code in the latency_tracer, that is:
++ *
++ *  Copyright (C) 2004-2006 Ingo Molnar
++ *  Copyright (C) 2004 Nadia Yvette Chambers
++ */
++
++#include <linux/stop_machine.h>
++#include <linux/clocksource.h>
++#include <linux/sched/task.h>
++#include <linux/kallsyms.h>
++#include <linux/seq_file.h>
++#include <linux/suspend.h>
++#include <linux/tracefs.h>
++#include <linux/hardirq.h>
++#include <linux/kthread.h>
++#include <linux/uaccess.h>
++#include <linux/bsearch.h>
++#include <linux/module.h>
++#include <linux/ftrace.h>
++#include <linux/sysctl.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sort.h>
++#include <linux/list.h>
++#include <linux/hash.h>
++#include <linux/rcupdate.h>
++#include <linux/kprobes.h>
++
++#include <trace/events/sched.h>
++
++#include <asm/sections.h>
++#include <asm/setup.h>
++
++#include "trace_output.h"
++#include "trace_stat.h"
++
++#define FTRACE_WARN_ON(cond)			\
++	({					\
++		int ___r = cond;		\
++		if (WARN_ON(___r))		\
++			ftrace_kill();		\
++		___r;				\
++	})
++
++#define FTRACE_WARN_ON_ONCE(cond)		\
++	({					\
++		int ___r = cond;		\
++		if (WARN_ON_ONCE(___r))		\
++			ftrace_kill();		\
++		___r;				\
++	})
++
++/* hash bits for specific function selection */
++#define FTRACE_HASH_BITS 7
++#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
++#define FTRACE_HASH_DEFAULT_BITS 10
++#define FTRACE_HASH_MAX_BITS 12
++
++#ifdef CONFIG_DYNAMIC_FTRACE
++#define INIT_OPS_HASH(opsname)	\
++	.func_hash		= &opsname.local_hash,			\
++	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
++#define ASSIGN_OPS_HASH(opsname, val) \
++	.func_hash		= val, \
++	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
++#else
++#define INIT_OPS_HASH(opsname)
++#define ASSIGN_OPS_HASH(opsname, val)
++#endif
++
++static struct ftrace_ops ftrace_list_end __read_mostly = {
++	.func		= ftrace_stub,
++	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
++	INIT_OPS_HASH(ftrace_list_end)
++};
++
++/* ftrace_enabled is a method to turn ftrace on or off */
++int ftrace_enabled __read_mostly;
++static int last_ftrace_enabled;
++
++/* Current function tracing op */
++struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
++/* What to set function_trace_op to */
++static struct ftrace_ops *set_function_trace_op;
++
++static bool ftrace_pids_enabled(struct ftrace_ops *ops)
++{
++	struct trace_array *tr;
++
++	if (!(ops->flags & FTRACE_OPS_FL_PID) || !ops->private)
++		return false;
++
++	tr = ops->private;
++
++	return tr->function_pids != NULL;
++}
++
++static void ftrace_update_trampoline(struct ftrace_ops *ops);
++
++/*
++ * ftrace_disabled is set when an anomaly is discovered.
++ * ftrace_disabled is much stronger than ftrace_enabled.
++ */
++static int ftrace_disabled __read_mostly;
++
++static DEFINE_MUTEX(ftrace_lock);
++
++static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
++ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
++static struct ftrace_ops global_ops;
++
++#if ARCH_SUPPORTS_FTRACE_OPS
++static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
++				 struct ftrace_ops *op, struct pt_regs *regs);
++#else
++/* See comment below, where ftrace_ops_list_func is defined */
++static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
++#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
++#endif
++
++/*
++ * Traverse the ftrace_global_list, invoking all entries.  The reason that we
++ * can use rcu_dereference_raw_notrace() is that elements removed from this list
++ * are simply leaked, so there is no need to interact with a grace-period
++ * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
++ * concurrent insertions into the ftrace_global_list.
++ *
++ * Silly Alpha and silly pointer-speculation compiler optimizations!
++ */
++#define do_for_each_ftrace_op(op, list)			\
++	op = rcu_dereference_raw_notrace(list);			\
++	do
++
++/*
++ * Optimized for just a single item in the list (as that is the normal case).
++ */
++#define while_for_each_ftrace_op(op)				\
++	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\
++	       unlikely((op) != &ftrace_list_end))
++
++static inline void ftrace_ops_init(struct ftrace_ops *ops)
++{
++#ifdef CONFIG_DYNAMIC_FTRACE
++	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
++		mutex_init(&ops->local_hash.regex_lock);
++		ops->func_hash = &ops->local_hash;
++		ops->flags |= FTRACE_OPS_FL_INITIALIZED;
++	}
++#endif
++}
++
++static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
++			    struct ftrace_ops *op, struct pt_regs *regs)
++{
++	struct trace_array *tr = op->private;
++
++	if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid))
++		return;
++
++	op->saved_func(ip, parent_ip, op, regs);
++}
++
++static void ftrace_sync(struct work_struct *work)
++{
++	/*
++	 * This function is just a stub to implement a hard force
++	 * of synchronize_sched(). This requires synchronizing
++	 * tasks even in userspace and idle.
++	 *
++	 * Yes, function tracing is rude.
++	 */
++}
++
++static void ftrace_sync_ipi(void *data)
++{
++	/* Probably not needed, but do it anyway */
++	smp_rmb();
++}
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++static void update_function_graph_func(void);
++
++/* Both enabled by default (can be cleared by function_graph tracer flags */
++static bool fgraph_sleep_time = true;
++static bool fgraph_graph_time = true;
++
++#else
++static inline void update_function_graph_func(void) { }
++#endif
++
++
++static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
++{
++	/*
++	 * If this is a dynamic, RCU, or per CPU ops, or we force list func,
++	 * then it needs to call the list anyway.
++	 */
++	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) ||
++	    FTRACE_FORCE_LIST_FUNC)
++		return ftrace_ops_list_func;
++
++	return ftrace_ops_get_func(ops);
++}
++
++static void update_ftrace_function(void)
++{
++	ftrace_func_t func;
++
++	/*
++	 * Prepare the ftrace_ops that the arch callback will use.
++	 * If there's only one ftrace_ops registered, the ftrace_ops_list
++	 * will point to the ops we want.
++	 */
++	set_function_trace_op = rcu_dereference_protected(ftrace_ops_list,
++						lockdep_is_held(&ftrace_lock));
++
++	/* If there's no ftrace_ops registered, just call the stub function */
++	if (set_function_trace_op == &ftrace_list_end) {
++		func = ftrace_stub;
++
++	/*
++	 * If we are at the end of the list and this ops is
++	 * recursion safe and not dynamic and the arch supports passing ops,
++	 * then have the mcount trampoline call the function directly.
++	 */
++	} else if (rcu_dereference_protected(ftrace_ops_list->next,
++			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
++		func = ftrace_ops_get_list_func(ftrace_ops_list);
++
++	} else {
++		/* Just use the default ftrace_ops */
++		set_function_trace_op = &ftrace_list_end;
++		func = ftrace_ops_list_func;
++	}
++
++	update_function_graph_func();
++
++	/* If there's no change, then do nothing more here */
++	if (ftrace_trace_function == func)
++		return;
++
++	/*
++	 * If we are using the list function, it doesn't care
++	 * about the function_trace_ops.
++	 */
++	if (func == ftrace_ops_list_func) {
++		ftrace_trace_function = func;
++		/*
++		 * Don't even bother setting function_trace_ops,
++		 * it would be racy to do so anyway.
++		 */
++		return;
++	}
++
++#ifndef CONFIG_DYNAMIC_FTRACE
++	/*
++	 * For static tracing, we need to be a bit more careful.
++	 * The function change takes affect immediately. Thus,
++	 * we need to coorditate the setting of the function_trace_ops
++	 * with the setting of the ftrace_trace_function.
++	 *
++	 * Set the function to the list ops, which will call the
++	 * function we want, albeit indirectly, but it handles the
++	 * ftrace_ops and doesn't depend on function_trace_op.
++	 */
++	ftrace_trace_function = ftrace_ops_list_func;
++	/*
++	 * Make sure all CPUs see this. Yes this is slow, but static
++	 * tracing is slow and nasty to have enabled.
++	 */
++	schedule_on_each_cpu(ftrace_sync);
++	/* Now all cpus are using the list ops. */
++	function_trace_op = set_function_trace_op;
++	/* Make sure the function_trace_op is visible on all CPUs */
++	smp_wmb();
++	/* Nasty way to force a rmb on all cpus */
++	smp_call_function(ftrace_sync_ipi, NULL, 1);
++	/* OK, we are all set to update the ftrace_trace_function now! */
++#endif /* !CONFIG_DYNAMIC_FTRACE */
++
++	ftrace_trace_function = func;
++}
++
++static void add_ftrace_ops(struct ftrace_ops __rcu **list,
++			   struct ftrace_ops *ops)
++{
++	rcu_assign_pointer(ops->next, *list);
++
++	/*
++	 * We are entering ops into the list but another
++	 * CPU might be walking that list. We need to make sure
++	 * the ops->next pointer is valid before another CPU sees
++	 * the ops pointer included into the list.
++	 */
++	rcu_assign_pointer(*list, ops);
++}
++
++static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
++			     struct ftrace_ops *ops)
++{
++	struct ftrace_ops **p;
++
++	/*
++	 * If we are removing the last function, then simply point
++	 * to the ftrace_stub.
++	 */
++	if (rcu_dereference_protected(*list,
++			lockdep_is_held(&ftrace_lock)) == ops &&
++	    rcu_dereference_protected(ops->next,
++			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
++		*list = &ftrace_list_end;
++		return 0;
++	}
++
++	for (p = list; *p != &ftrace_list_end; p = &(*p)->next)
++		if (*p == ops)
++			break;
++
++	if (*p != ops)
++		return -1;
++
++	*p = (*p)->next;
++	return 0;
++}
++
++static void ftrace_update_trampoline(struct ftrace_ops *ops);
++
++static int __register_ftrace_function(struct ftrace_ops *ops)
++{
++	if (ops->flags & FTRACE_OPS_FL_DELETED)
++		return -EINVAL;
++
++	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
++		return -EBUSY;
++
++#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
++	/*
++	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
++	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
++	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
++	 */
++	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
++	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
++		return -EINVAL;
++
++	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
++		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
++#endif
++
++	if (!core_kernel_data((unsigned long)ops))
++		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
++
++	add_ftrace_ops(&ftrace_ops_list, ops);
++
++	/* Always save the function, and reset at unregistering */
++	ops->saved_func = ops->func;
++
++	if (ftrace_pids_enabled(ops))
++		ops->func = ftrace_pid_func;
++
++	ftrace_update_trampoline(ops);
++
++	if (ftrace_enabled)
++		update_ftrace_function();
++
++	return 0;
++}
++
++static int __unregister_ftrace_function(struct ftrace_ops *ops)
++{
++	int ret;
++
++	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
++		return -EBUSY;
++
++	ret = remove_ftrace_ops(&ftrace_ops_list, ops);
++
++	if (ret < 0)
++		return ret;
++
++	if (ftrace_enabled)
++		update_ftrace_function();
++
++	ops->func = ops->saved_func;
++
++	return 0;
++}
++
++static void ftrace_update_pid_func(void)
++{
++	struct ftrace_ops *op;
++
++	/* Only do something if we are tracing something */
++	if (ftrace_trace_function == ftrace_stub)
++		return;
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		if (op->flags & FTRACE_OPS_FL_PID) {
++			op->func = ftrace_pids_enabled(op) ?
++				ftrace_pid_func : op->saved_func;
++			ftrace_update_trampoline(op);
++		}
++	} while_for_each_ftrace_op(op);
++
++	update_ftrace_function();
++}
++
++#ifdef CONFIG_FUNCTION_PROFILER
++struct ftrace_profile {
++	struct hlist_node		node;
++	unsigned long			ip;
++	unsigned long			counter;
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	unsigned long long		time;
++	unsigned long long		time_squared;
++#endif
++};
++
++struct ftrace_profile_page {
++	struct ftrace_profile_page	*next;
++	unsigned long			index;
++	struct ftrace_profile		records[];
++};
++
++struct ftrace_profile_stat {
++	atomic_t			disabled;
++	struct hlist_head		*hash;
++	struct ftrace_profile_page	*pages;
++	struct ftrace_profile_page	*start;
++	struct tracer_stat		stat;
++};
++
++#define PROFILE_RECORDS_SIZE						\
++	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
++
++#define PROFILES_PER_PAGE					\
++	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
++
++static int ftrace_profile_enabled __read_mostly;
++
++/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
++static DEFINE_MUTEX(ftrace_profile_lock);
++
++static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
++
++#define FTRACE_PROFILE_HASH_BITS 10
++#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
++
++static void *
++function_stat_next(void *v, int idx)
++{
++	struct ftrace_profile *rec = v;
++	struct ftrace_profile_page *pg;
++
++	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
++
++ again:
++	if (idx != 0)
++		rec++;
++
++	if ((void *)rec >= (void *)&pg->records[pg->index]) {
++		pg = pg->next;
++		if (!pg)
++			return NULL;
++		rec = &pg->records[0];
++		if (!rec->counter)
++			goto again;
++	}
++
++	return rec;
++}
++
++static void *function_stat_start(struct tracer_stat *trace)
++{
++	struct ftrace_profile_stat *stat =
++		container_of(trace, struct ftrace_profile_stat, stat);
++
++	if (!stat || !stat->start)
++		return NULL;
++
++	return function_stat_next(&stat->start->records[0], 0);
++}
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++/* function graph compares on total time */
++static int function_stat_cmp(void *p1, void *p2)
++{
++	struct ftrace_profile *a = p1;
++	struct ftrace_profile *b = p2;
++
++	if (a->time < b->time)
++		return -1;
++	if (a->time > b->time)
++		return 1;
++	else
++		return 0;
++}
++#else
++/* not function graph compares against hits */
++static int function_stat_cmp(void *p1, void *p2)
++{
++	struct ftrace_profile *a = p1;
++	struct ftrace_profile *b = p2;
++
++	if (a->counter < b->counter)
++		return -1;
++	if (a->counter > b->counter)
++		return 1;
++	else
++		return 0;
++}
++#endif
++
++static int function_stat_headers(struct seq_file *m)
++{
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	seq_puts(m, "  Function                               "
++		 "Hit    Time            Avg             s^2\n"
++		    "  --------                               "
++		 "---    ----            ---             ---\n");
++#else
++	seq_puts(m, "  Function                               Hit\n"
++		    "  --------                               ---\n");
++#endif
++	return 0;
++}
++
++static int function_stat_show(struct seq_file *m, void *v)
++{
++	struct ftrace_profile *rec = v;
++	char str[KSYM_SYMBOL_LEN];
++	int ret = 0;
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	static struct trace_seq s;
++	unsigned long long avg;
++	unsigned long long stddev;
++#endif
++	mutex_lock(&ftrace_profile_lock);
++
++	/* we raced with function_profile_reset() */
++	if (unlikely(rec->counter == 0)) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	avg = div64_ul(rec->time, rec->counter);
++	if (tracing_thresh && (avg < tracing_thresh))
++		goto out;
++#endif
++
++	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	seq_puts(m, "    ");
++
++	/* Sample standard deviation (s^2) */
++	if (rec->counter <= 1)
++		stddev = 0;
++	else {
++		/*
++		 * Apply Welford's method:
++		 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
++		 */
++		stddev = rec->counter * rec->time_squared -
++			 rec->time * rec->time;
++
++		/*
++		 * Divide only 1000 for ns^2 -> us^2 conversion.
++		 * trace_print_graph_duration will divide 1000 again.
++		 */
++		stddev = div64_ul(stddev,
++				  rec->counter * (rec->counter - 1) * 1000);
++	}
++
++	trace_seq_init(&s);
++	trace_print_graph_duration(rec->time, &s);
++	trace_seq_puts(&s, "    ");
++	trace_print_graph_duration(avg, &s);
++	trace_seq_puts(&s, "    ");
++	trace_print_graph_duration(stddev, &s);
++	trace_print_seq(m, &s);
++#endif
++	seq_putc(m, '\n');
++out:
++	mutex_unlock(&ftrace_profile_lock);
++
++	return ret;
++}
++
++static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
++{
++	struct ftrace_profile_page *pg;
++
++	pg = stat->pages = stat->start;
++
++	while (pg) {
++		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
++		pg->index = 0;
++		pg = pg->next;
++	}
++
++	memset(stat->hash, 0,
++	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
++}
++
++int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
++{
++	struct ftrace_profile_page *pg;
++	int functions;
++	int pages;
++	int i;
++
++	/* If we already allocated, do nothing */
++	if (stat->pages)
++		return 0;
++
++	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
++	if (!stat->pages)
++		return -ENOMEM;
++
++#ifdef CONFIG_DYNAMIC_FTRACE
++	functions = ftrace_update_tot_cnt;
++#else
++	/*
++	 * We do not know the number of functions that exist because
++	 * dynamic tracing is what counts them. With past experience
++	 * we have around 20K functions. That should be more than enough.
++	 * It is highly unlikely we will execute every function in
++	 * the kernel.
++	 */
++	functions = 20000;
++#endif
++
++	pg = stat->start = stat->pages;
++
++	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
++
++	for (i = 1; i < pages; i++) {
++		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
++		if (!pg->next)
++			goto out_free;
++		pg = pg->next;
++	}
++
++	return 0;
++
++ out_free:
++	pg = stat->start;
++	while (pg) {
++		unsigned long tmp = (unsigned long)pg;
++
++		pg = pg->next;
++		free_page(tmp);
++	}
++
++	stat->pages = NULL;
++	stat->start = NULL;
++
++	return -ENOMEM;
++}
++
++static int ftrace_profile_init_cpu(int cpu)
++{
++	struct ftrace_profile_stat *stat;
++	int size;
++
++	stat = &per_cpu(ftrace_profile_stats, cpu);
++
++	if (stat->hash) {
++		/* If the profile is already created, simply reset it */
++		ftrace_profile_reset(stat);
++		return 0;
++	}
++
++	/*
++	 * We are profiling all functions, but usually only a few thousand
++	 * functions are hit. We'll make a hash of 1024 items.
++	 */
++	size = FTRACE_PROFILE_HASH_SIZE;
++
++	stat->hash = kcalloc(size, sizeof(struct hlist_head), GFP_KERNEL);
++
++	if (!stat->hash)
++		return -ENOMEM;
++
++	/* Preallocate the function profiling pages */
++	if (ftrace_profile_pages_init(stat) < 0) {
++		kfree(stat->hash);
++		stat->hash = NULL;
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static int ftrace_profile_init(void)
++{
++	int cpu;
++	int ret = 0;
++
++	for_each_possible_cpu(cpu) {
++		ret = ftrace_profile_init_cpu(cpu);
++		if (ret)
++			break;
++	}
++
++	return ret;
++}
++
++/* interrupts must be disabled */
++static struct ftrace_profile *
++ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
++{
++	struct ftrace_profile *rec;
++	struct hlist_head *hhd;
++	unsigned long key;
++
++	key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
++	hhd = &stat->hash[key];
++
++	if (hlist_empty(hhd))
++		return NULL;
++
++	hlist_for_each_entry_rcu_notrace(rec, hhd, node) {
++		if (rec->ip == ip)
++			return rec;
++	}
++
++	return NULL;
++}
++
++static void ftrace_add_profile(struct ftrace_profile_stat *stat,
++			       struct ftrace_profile *rec)
++{
++	unsigned long key;
++
++	key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
++	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
++}
++
++/*
++ * The memory is already allocated, this simply finds a new record to use.
++ */
++static struct ftrace_profile *
++ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
++{
++	struct ftrace_profile *rec = NULL;
++
++	/* prevent recursion (from NMIs) */
++	if (atomic_inc_return(&stat->disabled) != 1)
++		goto out;
++
++	/*
++	 * Try to find the function again since an NMI
++	 * could have added it
++	 */
++	rec = ftrace_find_profiled_func(stat, ip);
++	if (rec)
++		goto out;
++
++	if (stat->pages->index == PROFILES_PER_PAGE) {
++		if (!stat->pages->next)
++			goto out;
++		stat->pages = stat->pages->next;
++	}
++
++	rec = &stat->pages->records[stat->pages->index++];
++	rec->ip = ip;
++	ftrace_add_profile(stat, rec);
++
++ out:
++	atomic_dec(&stat->disabled);
++
++	return rec;
++}
++
++static void
++function_profile_call(unsigned long ip, unsigned long parent_ip,
++		      struct ftrace_ops *ops, struct pt_regs *regs)
++{
++	struct ftrace_profile_stat *stat;
++	struct ftrace_profile *rec;
++	unsigned long flags;
++
++	if (!ftrace_profile_enabled)
++		return;
++
++	local_irq_save(flags);
++
++	stat = this_cpu_ptr(&ftrace_profile_stats);
++	if (!stat->hash || !ftrace_profile_enabled)
++		goto out;
++
++	rec = ftrace_find_profiled_func(stat, ip);
++	if (!rec) {
++		rec = ftrace_profile_alloc(stat, ip);
++		if (!rec)
++			goto out;
++	}
++
++	rec->counter++;
++ out:
++	local_irq_restore(flags);
++}
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++static int profile_graph_entry(struct ftrace_graph_ent *trace)
++{
++	int index = current->curr_ret_stack;
++
++	function_profile_call(trace->func, 0, NULL, NULL);
++
++	/* If function graph is shutting down, ret_stack can be NULL */
++	if (!current->ret_stack)
++		return 0;
++
++	if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
++		current->ret_stack[index].subtime = 0;
++
++	return 1;
++}
++
++static void profile_graph_return(struct ftrace_graph_ret *trace)
++{
++	struct ftrace_profile_stat *stat;
++	unsigned long long calltime;
++	struct ftrace_profile *rec;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	stat = this_cpu_ptr(&ftrace_profile_stats);
++	if (!stat->hash || !ftrace_profile_enabled)
++		goto out;
++
++	/* If the calltime was zero'd ignore it */
++	if (!trace->calltime)
++		goto out;
++
++	calltime = trace->rettime - trace->calltime;
++
++	if (!fgraph_graph_time) {
++		int index;
++
++		index = current->curr_ret_stack;
++
++		/* Append this call time to the parent time to subtract */
++		if (index)
++			current->ret_stack[index - 1].subtime += calltime;
++
++		if (current->ret_stack[index].subtime < calltime)
++			calltime -= current->ret_stack[index].subtime;
++		else
++			calltime = 0;
++	}
++
++	rec = ftrace_find_profiled_func(stat, trace->func);
++	if (rec) {
++		rec->time += calltime;
++		rec->time_squared += calltime * calltime;
++	}
++
++ out:
++	local_irq_restore(flags);
++}
++
++static int register_ftrace_profiler(void)
++{
++	return register_ftrace_graph(&profile_graph_return,
++				     &profile_graph_entry);
++}
++
++static void unregister_ftrace_profiler(void)
++{
++	unregister_ftrace_graph();
++}
++#else
++static struct ftrace_ops ftrace_profile_ops __read_mostly = {
++	.func		= function_profile_call,
++	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
++	INIT_OPS_HASH(ftrace_profile_ops)
++};
++
++static int register_ftrace_profiler(void)
++{
++	return register_ftrace_function(&ftrace_profile_ops);
++}
++
++static void unregister_ftrace_profiler(void)
++{
++	unregister_ftrace_function(&ftrace_profile_ops);
++}
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++static ssize_t
++ftrace_profile_write(struct file *filp, const char __user *ubuf,
++		     size_t cnt, loff_t *ppos)
++{
++	unsigned long val;
++	int ret;
++
++	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
++	if (ret)
++		return ret;
++
++	val = !!val;
++
++	mutex_lock(&ftrace_profile_lock);
++	if (ftrace_profile_enabled ^ val) {
++		if (val) {
++			ret = ftrace_profile_init();
++			if (ret < 0) {
++				cnt = ret;
++				goto out;
++			}
++
++			ret = register_ftrace_profiler();
++			if (ret < 0) {
++				cnt = ret;
++				goto out;
++			}
++			ftrace_profile_enabled = 1;
++		} else {
++			ftrace_profile_enabled = 0;
++			/*
++			 * unregister_ftrace_profiler calls stop_machine
++			 * so this acts like an synchronize_sched.
++			 */
++			unregister_ftrace_profiler();
++		}
++	}
++ out:
++	mutex_unlock(&ftrace_profile_lock);
++
++	*ppos += cnt;
++
++	return cnt;
++}
++
++static ssize_t
++ftrace_profile_read(struct file *filp, char __user *ubuf,
++		     size_t cnt, loff_t *ppos)
++{
++	char buf[64];		/* big enough to hold a number */
++	int r;
++
++	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
++
++static const struct file_operations ftrace_profile_fops = {
++	.open		= tracing_open_generic,
++	.read		= ftrace_profile_read,
++	.write		= ftrace_profile_write,
++	.llseek		= default_llseek,
++};
++
++/* used to initialize the real stat files */
++static struct tracer_stat function_stats __initdata = {
++	.name		= "functions",
++	.stat_start	= function_stat_start,
++	.stat_next	= function_stat_next,
++	.stat_cmp	= function_stat_cmp,
++	.stat_headers	= function_stat_headers,
++	.stat_show	= function_stat_show
++};
++
++static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
++{
++	struct ftrace_profile_stat *stat;
++	struct dentry *entry;
++	char *name;
++	int ret;
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		stat = &per_cpu(ftrace_profile_stats, cpu);
++
++		name = kasprintf(GFP_KERNEL, "function%d", cpu);
++		if (!name) {
++			/*
++			 * The files created are permanent, if something happens
++			 * we still do not free memory.
++			 */
++			WARN(1,
++			     "Could not allocate stat file for cpu %d\n",
++			     cpu);
++			return;
++		}
++		stat->stat = function_stats;
++		stat->stat.name = name;
++		ret = register_stat_tracer(&stat->stat);
++		if (ret) {
++			WARN(1,
++			     "Could not register function stat for cpu %d\n",
++			     cpu);
++			kfree(name);
++			return;
++		}
++	}
++
++	entry = tracefs_create_file("function_profile_enabled", 0644,
++				    d_tracer, NULL, &ftrace_profile_fops);
++	if (!entry)
++		pr_warn("Could not create tracefs 'function_profile_enabled' entry\n");
++}
++
++#else /* CONFIG_FUNCTION_PROFILER */
++static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
++{
++}
++#endif /* CONFIG_FUNCTION_PROFILER */
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++static int ftrace_graph_active;
++#else
++# define ftrace_graph_active 0
++#endif
++
++#ifdef CONFIG_DYNAMIC_FTRACE
++
++static struct ftrace_ops *removed_ops;
++
++/*
++ * Set when doing a global update, like enabling all recs or disabling them.
++ * It is not set when just updating a single ftrace_ops.
++ */
++static bool update_all_ops;
++
++#ifndef CONFIG_FTRACE_MCOUNT_RECORD
++# error Dynamic ftrace depends on MCOUNT_RECORD
++#endif
++
++struct ftrace_func_entry {
++	struct hlist_node hlist;
++	unsigned long ip;
++};
++
++struct ftrace_func_probe {
++	struct ftrace_probe_ops	*probe_ops;
++	struct ftrace_ops	ops;
++	struct trace_array	*tr;
++	struct list_head	list;
++	void			*data;
++	int			ref;
++};
++
++/*
++ * We make these constant because no one should touch them,
++ * but they are used as the default "empty hash", to avoid allocating
++ * it all the time. These are in a read only section such that if
++ * anyone does try to modify it, it will cause an exception.
++ */
++static const struct hlist_head empty_buckets[1];
++static const struct ftrace_hash empty_hash = {
++	.buckets = (struct hlist_head *)empty_buckets,
++};
++#define EMPTY_HASH	((struct ftrace_hash *)&empty_hash)
++
++static struct ftrace_ops global_ops = {
++	.func				= ftrace_stub,
++	.local_hash.notrace_hash	= EMPTY_HASH,
++	.local_hash.filter_hash		= EMPTY_HASH,
++	INIT_OPS_HASH(global_ops)
++	.flags				= FTRACE_OPS_FL_RECURSION_SAFE |
++					  FTRACE_OPS_FL_INITIALIZED |
++					  FTRACE_OPS_FL_PID,
++};
++
++/*
++ * Used by the stack undwinder to know about dynamic ftrace trampolines.
++ */
++struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
++{
++	struct ftrace_ops *op = NULL;
++
++	/*
++	 * Some of the ops may be dynamically allocated,
++	 * they are freed after a synchronize_sched().
++	 */
++	preempt_disable_notrace();
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		/*
++		 * This is to check for dynamically allocated trampolines.
++		 * Trampolines that are in kernel text will have
++		 * core_kernel_text() return true.
++		 */
++		if (op->trampoline && op->trampoline_size)
++			if (addr >= op->trampoline &&
++			    addr < op->trampoline + op->trampoline_size) {
++				preempt_enable_notrace();
++				return op;
++			}
++	} while_for_each_ftrace_op(op);
++	preempt_enable_notrace();
++
++	return NULL;
++}
++
++/*
++ * This is used by __kernel_text_address() to return true if the
++ * address is on a dynamically allocated trampoline that would
++ * not return true for either core_kernel_text() or
++ * is_module_text_address().
++ */
++bool is_ftrace_trampoline(unsigned long addr)
++{
++	return ftrace_ops_trampoline(addr) != NULL;
++}
++
++struct ftrace_page {
++	struct ftrace_page	*next;
++	struct dyn_ftrace	*records;
++	int			index;
++	int			size;
++};
++
++#define ENTRY_SIZE sizeof(struct dyn_ftrace)
++#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
++
++/* estimate from running different kernels */
++#define NR_TO_INIT		10000
++
++static struct ftrace_page	*ftrace_pages_start;
++static struct ftrace_page	*ftrace_pages;
++
++static __always_inline unsigned long
++ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip)
++{
++	if (hash->size_bits > 0)
++		return hash_long(ip, hash->size_bits);
++
++	return 0;
++}
++
++/* Only use this function if ftrace_hash_empty() has already been tested */
++static __always_inline struct ftrace_func_entry *
++__ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
++{
++	unsigned long key;
++	struct ftrace_func_entry *entry;
++	struct hlist_head *hhd;
++
++	key = ftrace_hash_key(hash, ip);
++	hhd = &hash->buckets[key];
++
++	hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) {
++		if (entry->ip == ip)
++			return entry;
++	}
++	return NULL;
++}
++
++/**
++ * ftrace_lookup_ip - Test to see if an ip exists in an ftrace_hash
++ * @hash: The hash to look at
++ * @ip: The instruction pointer to test
++ *
++ * Search a given @hash to see if a given instruction pointer (@ip)
++ * exists in it.
++ *
++ * Returns the entry that holds the @ip if found. NULL otherwise.
++ */
++struct ftrace_func_entry *
++ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
++{
++	if (ftrace_hash_empty(hash))
++		return NULL;
++
++	return __ftrace_lookup_ip(hash, ip);
++}
++
++static void __add_hash_entry(struct ftrace_hash *hash,
++			     struct ftrace_func_entry *entry)
++{
++	struct hlist_head *hhd;
++	unsigned long key;
++
++	key = ftrace_hash_key(hash, entry->ip);
++	hhd = &hash->buckets[key];
++	hlist_add_head(&entry->hlist, hhd);
++	hash->count++;
++}
++
++static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
++{
++	struct ftrace_func_entry *entry;
++
++	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry->ip = ip;
++	__add_hash_entry(hash, entry);
++
++	return 0;
++}
++
++static void
++free_hash_entry(struct ftrace_hash *hash,
++		  struct ftrace_func_entry *entry)
++{
++	hlist_del(&entry->hlist);
++	kfree(entry);
++	hash->count--;
++}
++
++static void
++remove_hash_entry(struct ftrace_hash *hash,
++		  struct ftrace_func_entry *entry)
++{
++	hlist_del_rcu(&entry->hlist);
++	hash->count--;
++}
++
++static void ftrace_hash_clear(struct ftrace_hash *hash)
++{
++	struct hlist_head *hhd;
++	struct hlist_node *tn;
++	struct ftrace_func_entry *entry;
++	int size = 1 << hash->size_bits;
++	int i;
++
++	if (!hash->count)
++		return;
++
++	for (i = 0; i < size; i++) {
++		hhd = &hash->buckets[i];
++		hlist_for_each_entry_safe(entry, tn, hhd, hlist)
++			free_hash_entry(hash, entry);
++	}
++	FTRACE_WARN_ON(hash->count);
++}
++
++static void free_ftrace_mod(struct ftrace_mod_load *ftrace_mod)
++{
++	list_del(&ftrace_mod->list);
++	kfree(ftrace_mod->module);
++	kfree(ftrace_mod->func);
++	kfree(ftrace_mod);
++}
++
++static void clear_ftrace_mod_list(struct list_head *head)
++{
++	struct ftrace_mod_load *p, *n;
++
++	/* stack tracer isn't supported yet */
++	if (!head)
++		return;
++
++	mutex_lock(&ftrace_lock);
++	list_for_each_entry_safe(p, n, head, list)
++		free_ftrace_mod(p);
++	mutex_unlock(&ftrace_lock);
++}
++
++static void free_ftrace_hash(struct ftrace_hash *hash)
++{
++	if (!hash || hash == EMPTY_HASH)
++		return;
++	ftrace_hash_clear(hash);
++	kfree(hash->buckets);
++	kfree(hash);
++}
++
++static void __free_ftrace_hash_rcu(struct rcu_head *rcu)
++{
++	struct ftrace_hash *hash;
++
++	hash = container_of(rcu, struct ftrace_hash, rcu);
++	free_ftrace_hash(hash);
++}
++
++static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
++{
++	if (!hash || hash == EMPTY_HASH)
++		return;
++	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
++}
++
++void ftrace_free_filter(struct ftrace_ops *ops)
++{
++	ftrace_ops_init(ops);
++	free_ftrace_hash(ops->func_hash->filter_hash);
++	free_ftrace_hash(ops->func_hash->notrace_hash);
++}
++
++static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
++{
++	struct ftrace_hash *hash;
++	int size;
++
++	hash = kzalloc(sizeof(*hash), GFP_KERNEL);
++	if (!hash)
++		return NULL;
++
++	size = 1 << size_bits;
++	hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL);
++
++	if (!hash->buckets) {
++		kfree(hash);
++		return NULL;
++	}
++
++	hash->size_bits = size_bits;
++
++	return hash;
++}
++
++
++static int ftrace_add_mod(struct trace_array *tr,
++			  const char *func, const char *module,
++			  int enable)
++{
++	struct ftrace_mod_load *ftrace_mod;
++	struct list_head *mod_head = enable ? &tr->mod_trace : &tr->mod_notrace;
++
++	ftrace_mod = kzalloc(sizeof(*ftrace_mod), GFP_KERNEL);
++	if (!ftrace_mod)
++		return -ENOMEM;
++
++	ftrace_mod->func = kstrdup(func, GFP_KERNEL);
++	ftrace_mod->module = kstrdup(module, GFP_KERNEL);
++	ftrace_mod->enable = enable;
++
++	if (!ftrace_mod->func || !ftrace_mod->module)
++		goto out_free;
++
++	list_add(&ftrace_mod->list, mod_head);
++
++	return 0;
++
++ out_free:
++	free_ftrace_mod(ftrace_mod);
++
++	return -ENOMEM;
++}
++
++static struct ftrace_hash *
++alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_hash *new_hash;
++	int size;
++	int ret;
++	int i;
++
++	new_hash = alloc_ftrace_hash(size_bits);
++	if (!new_hash)
++		return NULL;
++
++	if (hash)
++		new_hash->flags = hash->flags;
++
++	/* Empty hash? */
++	if (ftrace_hash_empty(hash))
++		return new_hash;
++
++	size = 1 << hash->size_bits;
++	for (i = 0; i < size; i++) {
++		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
++			ret = add_hash_entry(new_hash, entry->ip);
++			if (ret < 0)
++				goto free_hash;
++		}
++	}
++
++	FTRACE_WARN_ON(new_hash->count != hash->count);
++
++	return new_hash;
++
++ free_hash:
++	free_ftrace_hash(new_hash);
++	return NULL;
++}
++
++static void
++ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash);
++static void
++ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
++
++static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
++				       struct ftrace_hash *new_hash);
++
++static struct ftrace_hash *
++__ftrace_hash_move(struct ftrace_hash *src)
++{
++	struct ftrace_func_entry *entry;
++	struct hlist_node *tn;
++	struct hlist_head *hhd;
++	struct ftrace_hash *new_hash;
++	int size = src->count;
++	int bits = 0;
++	int i;
++
++	/*
++	 * If the new source is empty, just return the empty_hash.
++	 */
++	if (ftrace_hash_empty(src))
++		return EMPTY_HASH;
++
++	/*
++	 * Make the hash size about 1/2 the # found
++	 */
++	for (size /= 2; size; size >>= 1)
++		bits++;
++
++	/* Don't allocate too much */
++	if (bits > FTRACE_HASH_MAX_BITS)
++		bits = FTRACE_HASH_MAX_BITS;
++
++	new_hash = alloc_ftrace_hash(bits);
++	if (!new_hash)
++		return NULL;
++
++	new_hash->flags = src->flags;
++
++	size = 1 << src->size_bits;
++	for (i = 0; i < size; i++) {
++		hhd = &src->buckets[i];
++		hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
++			remove_hash_entry(src, entry);
++			__add_hash_entry(new_hash, entry);
++		}
++	}
++
++	return new_hash;
++}
++
++static int
++ftrace_hash_move(struct ftrace_ops *ops, int enable,
++		 struct ftrace_hash **dst, struct ftrace_hash *src)
++{
++	struct ftrace_hash *new_hash;
++	int ret;
++
++	/* Reject setting notrace hash on IPMODIFY ftrace_ops */
++	if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
++		return -EINVAL;
++
++	new_hash = __ftrace_hash_move(src);
++	if (!new_hash)
++		return -ENOMEM;
++
++	/* Make sure this can be applied if it is IPMODIFY ftrace_ops */
++	if (enable) {
++		/* IPMODIFY should be updated only when filter_hash updating */
++		ret = ftrace_hash_ipmodify_update(ops, new_hash);
++		if (ret < 0) {
++			free_ftrace_hash(new_hash);
++			return ret;
++		}
++	}
++
++	/*
++	 * Remove the current set, update the hash and add
++	 * them back.
++	 */
++	ftrace_hash_rec_disable_modify(ops, enable);
++
++	rcu_assign_pointer(*dst, new_hash);
++
++	ftrace_hash_rec_enable_modify(ops, enable);
++
++	return 0;
++}
++
++static bool hash_contains_ip(unsigned long ip,
++			     struct ftrace_ops_hash *hash)
++{
++	/*
++	 * The function record is a match if it exists in the filter
++	 * hash and not in the notrace hash. Note, an emty hash is
++	 * considered a match for the filter hash, but an empty
++	 * notrace hash is considered not in the notrace hash.
++	 */
++	return (ftrace_hash_empty(hash->filter_hash) ||
++		__ftrace_lookup_ip(hash->filter_hash, ip)) &&
++		(ftrace_hash_empty(hash->notrace_hash) ||
++		 !__ftrace_lookup_ip(hash->notrace_hash, ip));
++}
++
++/*
++ * Test the hashes for this ops to see if we want to call
++ * the ops->func or not.
++ *
++ * It's a match if the ip is in the ops->filter_hash or
++ * the filter_hash does not exist or is empty,
++ *  AND
++ * the ip is not in the ops->notrace_hash.
++ *
++ * This needs to be called with preemption disabled as
++ * the hashes are freed with call_rcu_sched().
++ */
++static int
++ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
++{
++	struct ftrace_ops_hash hash;
++	int ret;
++
++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
++	/*
++	 * There's a small race when adding ops that the ftrace handler
++	 * that wants regs, may be called without them. We can not
++	 * allow that handler to be called if regs is NULL.
++	 */
++	if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS))
++		return 0;
++#endif
++
++	rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash);
++	rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash);
++
++	if (hash_contains_ip(ip, &hash))
++		ret = 1;
++	else
++		ret = 0;
++
++	return ret;
++}
++
++/*
++ * This is a double for. Do not use 'break' to break out of the loop,
++ * you must use a goto.
++ */
++#define do_for_each_ftrace_rec(pg, rec)					\
++	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
++		int _____i;						\
++		for (_____i = 0; _____i < pg->index; _____i++) {	\
++			rec = &pg->records[_____i];
++
++#define while_for_each_ftrace_rec()		\
++		}				\
++	}
++
++
++static int ftrace_cmp_recs(const void *a, const void *b)
++{
++	const struct dyn_ftrace *key = a;
++	const struct dyn_ftrace *rec = b;
++
++	if (key->flags < rec->ip)
++		return -1;
++	if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
++		return 1;
++	return 0;
++}
++
++/**
++ * ftrace_location_range - return the first address of a traced location
++ *	if it touches the given ip range
++ * @start: start of range to search.
++ * @end: end of range to search (inclusive). @end points to the last byte
++ *	to check.
++ *
++ * Returns rec->ip if the related ftrace location is a least partly within
++ * the given address range. That is, the first address of the instruction
++ * that is either a NOP or call to the function tracer. It checks the ftrace
++ * internal tables to determine if the address belongs or not.
++ */
++unsigned long ftrace_location_range(unsigned long start, unsigned long end)
++{
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	struct dyn_ftrace key;
++
++	key.ip = start;
++	key.flags = end;	/* overload flags, as it is unsigned long */
++
++	for (pg = ftrace_pages_start; pg; pg = pg->next) {
++		if (end < pg->records[0].ip ||
++		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
++			continue;
++		rec = bsearch(&key, pg->records, pg->index,
++			      sizeof(struct dyn_ftrace),
++			      ftrace_cmp_recs);
++		if (rec)
++			return rec->ip;
++	}
++
++	return 0;
++}
++
++/**
++ * ftrace_location - return true if the ip giving is a traced location
++ * @ip: the instruction pointer to check
++ *
++ * Returns rec->ip if @ip given is a pointer to a ftrace location.
++ * That is, the instruction that is either a NOP or call to
++ * the function tracer. It checks the ftrace internal tables to
++ * determine if the address belongs or not.
++ */
++unsigned long ftrace_location(unsigned long ip)
++{
++	return ftrace_location_range(ip, ip);
++}
++
++/**
++ * ftrace_text_reserved - return true if range contains an ftrace location
++ * @start: start of range to search
++ * @end: end of range to search (inclusive). @end points to the last byte to check.
++ *
++ * Returns 1 if @start and @end contains a ftrace location.
++ * That is, the instruction that is either a NOP or call to
++ * the function tracer. It checks the ftrace internal tables to
++ * determine if the address belongs or not.
++ */
++int ftrace_text_reserved(const void *start, const void *end)
++{
++	unsigned long ret;
++
++	ret = ftrace_location_range((unsigned long)start,
++				    (unsigned long)end);
++
++	return (int)!!ret;
++}
++
++/* Test if ops registered to this rec needs regs */
++static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *ops;
++	bool keep_regs = false;
++
++	for (ops = ftrace_ops_list;
++	     ops != &ftrace_list_end; ops = ops->next) {
++		/* pass rec in as regs to have non-NULL val */
++		if (ftrace_ops_test(ops, rec->ip, rec)) {
++			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
++				keep_regs = true;
++				break;
++			}
++		}
++	}
++
++	return  keep_regs;
++}
++
++static struct ftrace_ops *
++ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
++static struct ftrace_ops *
++ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
++
++static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
++				     int filter_hash,
++				     bool inc)
++{
++	struct ftrace_hash *hash;
++	struct ftrace_hash *other_hash;
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	bool update = false;
++	int count = 0;
++	int all = false;
++
++	/* Only update if the ops has been registered */
++	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
++		return false;
++
++	/*
++	 * In the filter_hash case:
++	 *   If the count is zero, we update all records.
++	 *   Otherwise we just update the items in the hash.
++	 *
++	 * In the notrace_hash case:
++	 *   We enable the update in the hash.
++	 *   As disabling notrace means enabling the tracing,
++	 *   and enabling notrace means disabling, the inc variable
++	 *   gets inversed.
++	 */
++	if (filter_hash) {
++		hash = ops->func_hash->filter_hash;
++		other_hash = ops->func_hash->notrace_hash;
++		if (ftrace_hash_empty(hash))
++			all = true;
++	} else {
++		inc = !inc;
++		hash = ops->func_hash->notrace_hash;
++		other_hash = ops->func_hash->filter_hash;
++		/*
++		 * If the notrace hash has no items,
++		 * then there's nothing to do.
++		 */
++		if (ftrace_hash_empty(hash))
++			return false;
++	}
++
++	do_for_each_ftrace_rec(pg, rec) {
++		int in_other_hash = 0;
++		int in_hash = 0;
++		int match = 0;
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		if (all) {
++			/*
++			 * Only the filter_hash affects all records.
++			 * Update if the record is not in the notrace hash.
++			 */
++			if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
++				match = 1;
++		} else {
++			in_hash = !!ftrace_lookup_ip(hash, rec->ip);
++			in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
++
++			/*
++			 * If filter_hash is set, we want to match all functions
++			 * that are in the hash but not in the other hash.
++			 *
++			 * If filter_hash is not set, then we are decrementing.
++			 * That means we match anything that is in the hash
++			 * and also in the other_hash. That is, we need to turn
++			 * off functions in the other hash because they are disabled
++			 * by this hash.
++			 */
++			if (filter_hash && in_hash && !in_other_hash)
++				match = 1;
++			else if (!filter_hash && in_hash &&
++				 (in_other_hash || ftrace_hash_empty(other_hash)))
++				match = 1;
++		}
++		if (!match)
++			continue;
++
++		if (inc) {
++			rec->flags++;
++			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == FTRACE_REF_MAX))
++				return false;
++
++			/*
++			 * If there's only a single callback registered to a
++			 * function, and the ops has a trampoline registered
++			 * for it, then we can call it directly.
++			 */
++			if (ftrace_rec_count(rec) == 1 && ops->trampoline)
++				rec->flags |= FTRACE_FL_TRAMP;
++			else
++				/*
++				 * If we are adding another function callback
++				 * to this function, and the previous had a
++				 * custom trampoline in use, then we need to go
++				 * back to the default trampoline.
++				 */
++				rec->flags &= ~FTRACE_FL_TRAMP;
++
++			/*
++			 * If any ops wants regs saved for this function
++			 * then all ops will get saved regs.
++			 */
++			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
++				rec->flags |= FTRACE_FL_REGS;
++		} else {
++			if (FTRACE_WARN_ON(ftrace_rec_count(rec) == 0))
++				return false;
++			rec->flags--;
++
++			/*
++			 * If the rec had REGS enabled and the ops that is
++			 * being removed had REGS set, then see if there is
++			 * still any ops for this record that wants regs.
++			 * If not, we can stop recording them.
++			 */
++			if (ftrace_rec_count(rec) > 0 &&
++			    rec->flags & FTRACE_FL_REGS &&
++			    ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
++				if (!test_rec_ops_needs_regs(rec))
++					rec->flags &= ~FTRACE_FL_REGS;
++			}
++
++			/*
++			 * The TRAMP needs to be set only if rec count
++			 * is decremented to one, and the ops that is
++			 * left has a trampoline. As TRAMP can only be
++			 * enabled if there is only a single ops attached
++			 * to it.
++			 */
++			if (ftrace_rec_count(rec) == 1 &&
++			    ftrace_find_tramp_ops_any(rec))
++				rec->flags |= FTRACE_FL_TRAMP;
++			else
++				rec->flags &= ~FTRACE_FL_TRAMP;
++
++			/*
++			 * flags will be cleared in ftrace_check_record()
++			 * if rec count is zero.
++			 */
++		}
++		count++;
++
++		/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
++		update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
++
++		/* Shortcut, if we handled all records, we are done. */
++		if (!all && count == hash->count)
++			return update;
++	} while_for_each_ftrace_rec();
++
++	return update;
++}
++
++static bool ftrace_hash_rec_disable(struct ftrace_ops *ops,
++				    int filter_hash)
++{
++	return __ftrace_hash_rec_update(ops, filter_hash, 0);
++}
++
++static bool ftrace_hash_rec_enable(struct ftrace_ops *ops,
++				   int filter_hash)
++{
++	return __ftrace_hash_rec_update(ops, filter_hash, 1);
++}
++
++static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops,
++					  int filter_hash, int inc)
++{
++	struct ftrace_ops *op;
++
++	__ftrace_hash_rec_update(ops, filter_hash, inc);
++
++	if (ops->func_hash != &global_ops.local_hash)
++		return;
++
++	/*
++	 * If the ops shares the global_ops hash, then we need to update
++	 * all ops that are enabled and use this hash.
++	 */
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		/* Already done */
++		if (op == ops)
++			continue;
++		if (op->func_hash == &global_ops.local_hash)
++			__ftrace_hash_rec_update(op, filter_hash, inc);
++	} while_for_each_ftrace_op(op);
++}
++
++static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops,
++					   int filter_hash)
++{
++	ftrace_hash_rec_update_modify(ops, filter_hash, 0);
++}
++
++static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
++					  int filter_hash)
++{
++	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
++}
++
++/*
++ * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
++ * or no-needed to update, -EBUSY if it detects a conflict of the flag
++ * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs.
++ * Note that old_hash and new_hash has below meanings
++ *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
++ *  - If the hash is EMPTY_HASH, it hits nothing
++ *  - Anything else hits the recs which match the hash entries.
++ */
++static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
++					 struct ftrace_hash *old_hash,
++					 struct ftrace_hash *new_hash)
++{
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec, *end = NULL;
++	int in_old, in_new;
++
++	/* Only update if the ops has been registered */
++	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
++		return 0;
++
++	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
++		return 0;
++
++	/*
++	 * Since the IPMODIFY is a very address sensitive action, we do not
++	 * allow ftrace_ops to set all functions to new hash.
++	 */
++	if (!new_hash || !old_hash)
++		return -EINVAL;
++
++	/* Update rec->flags */
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		/* We need to update only differences of filter_hash */
++		in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
++		in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
++		if (in_old == in_new)
++			continue;
++
++		if (in_new) {
++			/* New entries must ensure no others are using it */
++			if (rec->flags & FTRACE_FL_IPMODIFY)
++				goto rollback;
++			rec->flags |= FTRACE_FL_IPMODIFY;
++		} else /* Removed entry */
++			rec->flags &= ~FTRACE_FL_IPMODIFY;
++	} while_for_each_ftrace_rec();
++
++	return 0;
++
++rollback:
++	end = rec;
++
++	/* Roll back what we did above */
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		if (rec == end)
++			goto err_out;
++
++		in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
++		in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
++		if (in_old == in_new)
++			continue;
++
++		if (in_new)
++			rec->flags &= ~FTRACE_FL_IPMODIFY;
++		else
++			rec->flags |= FTRACE_FL_IPMODIFY;
++	} while_for_each_ftrace_rec();
++
++err_out:
++	return -EBUSY;
++}
++
++static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
++{
++	struct ftrace_hash *hash = ops->func_hash->filter_hash;
++
++	if (ftrace_hash_empty(hash))
++		hash = NULL;
++
++	return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
++}
++
++/* Disabling always succeeds */
++static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
++{
++	struct ftrace_hash *hash = ops->func_hash->filter_hash;
++
++	if (ftrace_hash_empty(hash))
++		hash = NULL;
++
++	__ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
++}
++
++static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
++				       struct ftrace_hash *new_hash)
++{
++	struct ftrace_hash *old_hash = ops->func_hash->filter_hash;
++
++	if (ftrace_hash_empty(old_hash))
++		old_hash = NULL;
++
++	if (ftrace_hash_empty(new_hash))
++		new_hash = NULL;
++
++	return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
++}
++
++static void print_ip_ins(const char *fmt, const unsigned char *p)
++{
++	int i;
++
++	printk(KERN_CONT "%s", fmt);
++
++	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
++		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
++}
++
++enum ftrace_bug_type ftrace_bug_type;
++const void *ftrace_expected;
++
++static void print_bug_type(void)
++{
++	switch (ftrace_bug_type) {
++	case FTRACE_BUG_UNKNOWN:
++		break;
++	case FTRACE_BUG_INIT:
++		pr_info("Initializing ftrace call sites\n");
++		break;
++	case FTRACE_BUG_NOP:
++		pr_info("Setting ftrace call site to NOP\n");
++		break;
++	case FTRACE_BUG_CALL:
++		pr_info("Setting ftrace call site to call ftrace function\n");
++		break;
++	case FTRACE_BUG_UPDATE:
++		pr_info("Updating ftrace call site to call a different ftrace function\n");
++		break;
++	}
++}
++
++/**
++ * ftrace_bug - report and shutdown function tracer
++ * @failed: The failed type (EFAULT, EINVAL, EPERM)
++ * @rec: The record that failed
++ *
++ * The arch code that enables or disables the function tracing
++ * can call ftrace_bug() when it has detected a problem in
++ * modifying the code. @failed should be one of either:
++ * EFAULT - if the problem happens on reading the @ip address
++ * EINVAL - if what is read at @ip is not what was expected
++ * EPERM - if the problem happens on writting to the @ip address
++ */
++void ftrace_bug(int failed, struct dyn_ftrace *rec)
++{
++	unsigned long ip = rec ? rec->ip : 0;
++
++	switch (failed) {
++	case -EFAULT:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on modifying ");
++		print_ip_sym(ip);
++		break;
++	case -EINVAL:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace failed to modify ");
++		print_ip_sym(ip);
++		print_ip_ins(" actual:   ", (unsigned char *)ip);
++		pr_cont("\n");
++		if (ftrace_expected) {
++			print_ip_ins(" expected: ", ftrace_expected);
++			pr_cont("\n");
++		}
++		break;
++	case -EPERM:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on writing ");
++		print_ip_sym(ip);
++		break;
++	default:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on unknown error ");
++		print_ip_sym(ip);
++	}
++	print_bug_type();
++	if (rec) {
++		struct ftrace_ops *ops = NULL;
++
++		pr_info("ftrace record flags: %lx\n", rec->flags);
++		pr_cont(" (%ld)%s", ftrace_rec_count(rec),
++			rec->flags & FTRACE_FL_REGS ? " R" : "  ");
++		if (rec->flags & FTRACE_FL_TRAMP_EN) {
++			ops = ftrace_find_tramp_ops_any(rec);
++			if (ops) {
++				do {
++					pr_cont("\ttramp: %pS (%pS)",
++						(void *)ops->trampoline,
++						(void *)ops->func);
++					ops = ftrace_find_tramp_ops_next(rec, ops);
++				} while (ops);
++			} else
++				pr_cont("\ttramp: ERROR!");
++
++		}
++		ip = ftrace_get_addr_curr(rec);
++		pr_cont("\n expected tramp: %lx\n", ip);
++	}
++}
++
++static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
++{
++	unsigned long flag = 0UL;
++
++	ftrace_bug_type = FTRACE_BUG_UNKNOWN;
++
++	if (rec->flags & FTRACE_FL_DISABLED)
++		return FTRACE_UPDATE_IGNORE;
++
++	/*
++	 * If we are updating calls:
++	 *
++	 *   If the record has a ref count, then we need to enable it
++	 *   because someone is using it.
++	 *
++	 *   Otherwise we make sure its disabled.
++	 *
++	 * If we are disabling calls, then disable all records that
++	 * are enabled.
++	 */
++	if (enable && ftrace_rec_count(rec))
++		flag = FTRACE_FL_ENABLED;
++
++	/*
++	 * If enabling and the REGS flag does not match the REGS_EN, or
++	 * the TRAMP flag doesn't match the TRAMP_EN, then do not ignore
++	 * this record. Set flags to fail the compare against ENABLED.
++	 */
++	if (flag) {
++		if (!(rec->flags & FTRACE_FL_REGS) != 
++		    !(rec->flags & FTRACE_FL_REGS_EN))
++			flag |= FTRACE_FL_REGS;
++
++		if (!(rec->flags & FTRACE_FL_TRAMP) != 
++		    !(rec->flags & FTRACE_FL_TRAMP_EN))
++			flag |= FTRACE_FL_TRAMP;
++	}
++
++	/* If the state of this record hasn't changed, then do nothing */
++	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
++		return FTRACE_UPDATE_IGNORE;
++
++	if (flag) {
++		/* Save off if rec is being enabled (for return value) */
++		flag ^= rec->flags & FTRACE_FL_ENABLED;
++
++		if (update) {
++			rec->flags |= FTRACE_FL_ENABLED;
++			if (flag & FTRACE_FL_REGS) {
++				if (rec->flags & FTRACE_FL_REGS)
++					rec->flags |= FTRACE_FL_REGS_EN;
++				else
++					rec->flags &= ~FTRACE_FL_REGS_EN;
++			}
++			if (flag & FTRACE_FL_TRAMP) {
++				if (rec->flags & FTRACE_FL_TRAMP)
++					rec->flags |= FTRACE_FL_TRAMP_EN;
++				else
++					rec->flags &= ~FTRACE_FL_TRAMP_EN;
++			}
++		}
++
++		/*
++		 * If this record is being updated from a nop, then
++		 *   return UPDATE_MAKE_CALL.
++		 * Otherwise,
++		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
++		 *   from the save regs, to a non-save regs function or
++		 *   vice versa, or from a trampoline call.
++		 */
++		if (flag & FTRACE_FL_ENABLED) {
++			ftrace_bug_type = FTRACE_BUG_CALL;
++			return FTRACE_UPDATE_MAKE_CALL;
++		}
++
++		ftrace_bug_type = FTRACE_BUG_UPDATE;
++		return FTRACE_UPDATE_MODIFY_CALL;
++	}
++
++	if (update) {
++		/* If there's no more users, clear all flags */
++		if (!ftrace_rec_count(rec))
++			rec->flags = 0;
++		else
++			/*
++			 * Just disable the record, but keep the ops TRAMP
++			 * and REGS states. The _EN flags must be disabled though.
++			 */
++			rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
++					FTRACE_FL_REGS_EN);
++	}
++
++	ftrace_bug_type = FTRACE_BUG_NOP;
++	return FTRACE_UPDATE_MAKE_NOP;
++}
++
++/**
++ * ftrace_update_record, set a record that now is tracing or not
++ * @rec: the record to update
++ * @enable: set to 1 if the record is tracing, zero to force disable
++ *
++ * The records that represent all functions that can be traced need
++ * to be updated when tracing has been enabled.
++ */
++int ftrace_update_record(struct dyn_ftrace *rec, int enable)
++{
++	return ftrace_check_record(rec, enable, 1);
++}
++
++/**
++ * ftrace_test_record, check if the record has been enabled or not
++ * @rec: the record to test
++ * @enable: set to 1 to check if enabled, 0 if it is disabled
++ *
++ * The arch code may need to test if a record is already set to
++ * tracing to determine how to modify the function code that it
++ * represents.
++ */
++int ftrace_test_record(struct dyn_ftrace *rec, int enable)
++{
++	return ftrace_check_record(rec, enable, 0);
++}
++
++static struct ftrace_ops *
++ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *op;
++	unsigned long ip = rec->ip;
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++
++		if (!op->trampoline)
++			continue;
++
++		if (hash_contains_ip(ip, op->func_hash))
++			return op;
++	} while_for_each_ftrace_op(op);
++
++	return NULL;
++}
++
++static struct ftrace_ops *
++ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
++			   struct ftrace_ops *op)
++{
++	unsigned long ip = rec->ip;
++
++	while_for_each_ftrace_op(op) {
++
++		if (!op->trampoline)
++			continue;
++
++		if (hash_contains_ip(ip, op->func_hash))
++			return op;
++	} 
++
++	return NULL;
++}
++
++static struct ftrace_ops *
++ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *op;
++	unsigned long ip = rec->ip;
++
++	/*
++	 * Need to check removed ops first.
++	 * If they are being removed, and this rec has a tramp,
++	 * and this rec is in the ops list, then it would be the
++	 * one with the tramp.
++	 */
++	if (removed_ops) {
++		if (hash_contains_ip(ip, &removed_ops->old_hash))
++			return removed_ops;
++	}
++
++	/*
++	 * Need to find the current trampoline for a rec.
++	 * Now, a trampoline is only attached to a rec if there
++	 * was a single 'ops' attached to it. But this can be called
++	 * when we are adding another op to the rec or removing the
++	 * current one. Thus, if the op is being added, we can
++	 * ignore it because it hasn't attached itself to the rec
++	 * yet.
++	 *
++	 * If an ops is being modified (hooking to different functions)
++	 * then we don't care about the new functions that are being
++	 * added, just the old ones (that are probably being removed).
++	 *
++	 * If we are adding an ops to a function that already is using
++	 * a trampoline, it needs to be removed (trampolines are only
++	 * for single ops connected), then an ops that is not being
++	 * modified also needs to be checked.
++	 */
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++
++		if (!op->trampoline)
++			continue;
++
++		/*
++		 * If the ops is being added, it hasn't gotten to
++		 * the point to be removed from this tree yet.
++		 */
++		if (op->flags & FTRACE_OPS_FL_ADDING)
++			continue;
++
++
++		/*
++		 * If the ops is being modified and is in the old
++		 * hash, then it is probably being removed from this
++		 * function.
++		 */
++		if ((op->flags & FTRACE_OPS_FL_MODIFYING) &&
++		    hash_contains_ip(ip, &op->old_hash))
++			return op;
++		/*
++		 * If the ops is not being added or modified, and it's
++		 * in its normal filter hash, then this must be the one
++		 * we want!
++		 */
++		if (!(op->flags & FTRACE_OPS_FL_MODIFYING) &&
++		    hash_contains_ip(ip, op->func_hash))
++			return op;
++
++	} while_for_each_ftrace_op(op);
++
++	return NULL;
++}
++
++static struct ftrace_ops *
++ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *op;
++	unsigned long ip = rec->ip;
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		/* pass rec in as regs to have non-NULL val */
++		if (hash_contains_ip(ip, op->func_hash))
++			return op;
++	} while_for_each_ftrace_op(op);
++
++	return NULL;
++}
++
++/**
++ * ftrace_get_addr_new - Get the call address to set to
++ * @rec:  The ftrace record descriptor
++ *
++ * If the record has the FTRACE_FL_REGS set, that means that it
++ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
++ * is not not set, then it wants to convert to the normal callback.
++ *
++ * Returns the address of the trampoline to set to
++ */
++unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *ops;
++
++	/* Trampolines take precedence over regs */
++	if (rec->flags & FTRACE_FL_TRAMP) {
++		ops = ftrace_find_tramp_ops_new(rec);
++		if (FTRACE_WARN_ON(!ops || !ops->trampoline)) {
++			pr_warn("Bad trampoline accounting at: %p (%pS) (%lx)\n",
++				(void *)rec->ip, (void *)rec->ip, rec->flags);
++			/* Ftrace is shutting down, return anything */
++			return (unsigned long)FTRACE_ADDR;
++		}
++		return ops->trampoline;
++	}
++
++	if (rec->flags & FTRACE_FL_REGS)
++		return (unsigned long)FTRACE_REGS_ADDR;
++	else
++		return (unsigned long)FTRACE_ADDR;
++}
++
++/**
++ * ftrace_get_addr_curr - Get the call address that is already there
++ * @rec:  The ftrace record descriptor
++ *
++ * The FTRACE_FL_REGS_EN is set when the record already points to
++ * a function that saves all the regs. Basically the '_EN' version
++ * represents the current state of the function.
++ *
++ * Returns the address of the trampoline that is currently being called
++ */
++unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *ops;
++
++	/* Trampolines take precedence over regs */
++	if (rec->flags & FTRACE_FL_TRAMP_EN) {
++		ops = ftrace_find_tramp_ops_curr(rec);
++		if (FTRACE_WARN_ON(!ops)) {
++			pr_warn("Bad trampoline accounting at: %p (%pS)\n",
++				(void *)rec->ip, (void *)rec->ip);
++			/* Ftrace is shutting down, return anything */
++			return (unsigned long)FTRACE_ADDR;
++		}
++		return ops->trampoline;
++	}
++
++	if (rec->flags & FTRACE_FL_REGS_EN)
++		return (unsigned long)FTRACE_REGS_ADDR;
++	else
++		return (unsigned long)FTRACE_ADDR;
++}
++
++static int
++__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
++{
++	unsigned long ftrace_old_addr;
++	unsigned long ftrace_addr;
++	int ret;
++
++	ftrace_addr = ftrace_get_addr_new(rec);
++
++	/* This needs to be done before we call ftrace_update_record */
++	ftrace_old_addr = ftrace_get_addr_curr(rec);
++
++	ret = ftrace_update_record(rec, enable);
++
++	ftrace_bug_type = FTRACE_BUG_UNKNOWN;
++
++	switch (ret) {
++	case FTRACE_UPDATE_IGNORE:
++		return 0;
++
++	case FTRACE_UPDATE_MAKE_CALL:
++		ftrace_bug_type = FTRACE_BUG_CALL;
++		return ftrace_make_call(rec, ftrace_addr);
++
++	case FTRACE_UPDATE_MAKE_NOP:
++		ftrace_bug_type = FTRACE_BUG_NOP;
++		return ftrace_make_nop(NULL, rec, ftrace_old_addr);
++
++	case FTRACE_UPDATE_MODIFY_CALL:
++		ftrace_bug_type = FTRACE_BUG_UPDATE;
++		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
++	}
++
++	return -1; /* unknow ftrace bug */
++}
++
++void __weak ftrace_replace_code(int enable)
++{
++	struct dyn_ftrace *rec;
++	struct ftrace_page *pg;
++	int failed;
++
++	if (unlikely(ftrace_disabled))
++		return;
++
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		failed = __ftrace_replace_code(rec, enable);
++		if (failed) {
++			ftrace_bug(failed, rec);
++			/* Stop processing */
++			return;
++		}
++	} while_for_each_ftrace_rec();
++}
++
++struct ftrace_rec_iter {
++	struct ftrace_page	*pg;
++	int			index;
++};
++
++/**
++ * ftrace_rec_iter_start, start up iterating over traced functions
++ *
++ * Returns an iterator handle that is used to iterate over all
++ * the records that represent address locations where functions
++ * are traced.
++ *
++ * May return NULL if no records are available.
++ */
++struct ftrace_rec_iter *ftrace_rec_iter_start(void)
++{
++	/*
++	 * We only use a single iterator.
++	 * Protected by the ftrace_lock mutex.
++	 */
++	static struct ftrace_rec_iter ftrace_rec_iter;
++	struct ftrace_rec_iter *iter = &ftrace_rec_iter;
++
++	iter->pg = ftrace_pages_start;
++	iter->index = 0;
++
++	/* Could have empty pages */
++	while (iter->pg && !iter->pg->index)
++		iter->pg = iter->pg->next;
++
++	if (!iter->pg)
++		return NULL;
++
++	return iter;
++}
++
++/**
++ * ftrace_rec_iter_next, get the next record to process.
++ * @iter: The handle to the iterator.
++ *
++ * Returns the next iterator after the given iterator @iter.
++ */
++struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
++{
++	iter->index++;
++
++	if (iter->index >= iter->pg->index) {
++		iter->pg = iter->pg->next;
++		iter->index = 0;
++
++		/* Could have empty pages */
++		while (iter->pg && !iter->pg->index)
++			iter->pg = iter->pg->next;
++	}
++
++	if (!iter->pg)
++		return NULL;
++
++	return iter;
++}
++
++/**
++ * ftrace_rec_iter_record, get the record at the iterator location
++ * @iter: The current iterator location
++ *
++ * Returns the record that the current @iter is at.
++ */
++struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
++{
++	return &iter->pg->records[iter->index];
++}
++
++static int
++ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
++{
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return 0;
++
++	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
++	if (ret) {
++		ftrace_bug_type = FTRACE_BUG_INIT;
++		ftrace_bug(ret, rec);
++		return 0;
++	}
++	return 1;
++}
++
++/*
++ * archs can override this function if they must do something
++ * before the modifying code is performed.
++ */
++int __weak ftrace_arch_code_modify_prepare(void)
++{
++	return 0;
++}
++
++/*
++ * archs can override this function if they must do something
++ * after the modifying code is performed.
++ */
++int __weak ftrace_arch_code_modify_post_process(void)
++{
++	return 0;
++}
++
++void ftrace_modify_all_code(int command)
++{
++	int update = command & FTRACE_UPDATE_TRACE_FUNC;
++	int err = 0;
++
++	/*
++	 * If the ftrace_caller calls a ftrace_ops func directly,
++	 * we need to make sure that it only traces functions it
++	 * expects to trace. When doing the switch of functions,
++	 * we need to update to the ftrace_ops_list_func first
++	 * before the transition between old and new calls are set,
++	 * as the ftrace_ops_list_func will check the ops hashes
++	 * to make sure the ops are having the right functions
++	 * traced.
++	 */
++	if (update) {
++		err = ftrace_update_ftrace_func(ftrace_ops_list_func);
++		if (FTRACE_WARN_ON(err))
++			return;
++	}
++
++	if (command & FTRACE_UPDATE_CALLS)
++		ftrace_replace_code(1);
++	else if (command & FTRACE_DISABLE_CALLS)
++		ftrace_replace_code(0);
++
++	if (update && ftrace_trace_function != ftrace_ops_list_func) {
++		function_trace_op = set_function_trace_op;
++		smp_wmb();
++		/* If irqs are disabled, we are in stop machine */
++		if (!irqs_disabled())
++			smp_call_function(ftrace_sync_ipi, NULL, 1);
++		err = ftrace_update_ftrace_func(ftrace_trace_function);
++		if (FTRACE_WARN_ON(err))
++			return;
++	}
++
++	if (command & FTRACE_START_FUNC_RET)
++		err = ftrace_enable_ftrace_graph_caller();
++	else if (command & FTRACE_STOP_FUNC_RET)
++		err = ftrace_disable_ftrace_graph_caller();
++	FTRACE_WARN_ON(err);
++}
++
++static int __ftrace_modify_code(void *data)
++{
++	int *command = data;
++
++	ftrace_modify_all_code(*command);
++
++	return 0;
++}
++
++/**
++ * ftrace_run_stop_machine, go back to the stop machine method
++ * @command: The command to tell ftrace what to do
++ *
++ * If an arch needs to fall back to the stop machine method, the
++ * it can call this function.
++ */
++void ftrace_run_stop_machine(int command)
++{
++	stop_machine(__ftrace_modify_code, &command, NULL);
++}
++
++/**
++ * arch_ftrace_update_code, modify the code to trace or not trace
++ * @command: The command that needs to be done
++ *
++ * Archs can override this function if it does not need to
++ * run stop_machine() to modify code.
++ */
++void __weak arch_ftrace_update_code(int command)
++{
++	ftrace_run_stop_machine(command);
++}
++
++static void ftrace_run_update_code(int command)
++{
++	int ret;
++
++	ret = ftrace_arch_code_modify_prepare();
++	FTRACE_WARN_ON(ret);
++	if (ret)
++		return;
++
++	/*
++	 * By default we use stop_machine() to modify the code.
++	 * But archs can do what ever they want as long as it
++	 * is safe. The stop_machine() is the safest, but also
++	 * produces the most overhead.
++	 */
++	arch_ftrace_update_code(command);
++
++	ret = ftrace_arch_code_modify_post_process();
++	FTRACE_WARN_ON(ret);
++}
++
++static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
++				   struct ftrace_ops_hash *old_hash)
++{
++	ops->flags |= FTRACE_OPS_FL_MODIFYING;
++	ops->old_hash.filter_hash = old_hash->filter_hash;
++	ops->old_hash.notrace_hash = old_hash->notrace_hash;
++	ftrace_run_update_code(command);
++	ops->old_hash.filter_hash = NULL;
++	ops->old_hash.notrace_hash = NULL;
++	ops->flags &= ~FTRACE_OPS_FL_MODIFYING;
++}
++
++static ftrace_func_t saved_ftrace_func;
++static int ftrace_start_up;
++
++void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops)
++{
++}
++
++static void ftrace_startup_enable(int command)
++{
++	if (saved_ftrace_func != ftrace_trace_function) {
++		saved_ftrace_func = ftrace_trace_function;
++		command |= FTRACE_UPDATE_TRACE_FUNC;
++	}
++
++	if (!command || !ftrace_enabled)
++		return;
++
++	ftrace_run_update_code(command);
++}
++
++static void ftrace_startup_all(int command)
++{
++	update_all_ops = true;
++	ftrace_startup_enable(command);
++	update_all_ops = false;
++}
++
++static int ftrace_startup(struct ftrace_ops *ops, int command)
++{
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	ret = __register_ftrace_function(ops);
++	if (ret)
++		return ret;
++
++	ftrace_start_up++;
++
++	/*
++	 * Note that ftrace probes uses this to start up
++	 * and modify functions it will probe. But we still
++	 * set the ADDING flag for modification, as probes
++	 * do not have trampolines. If they add them in the
++	 * future, then the probes will need to distinguish
++	 * between adding and updating probes.
++	 */
++	ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING;
++
++	ret = ftrace_hash_ipmodify_enable(ops);
++	if (ret < 0) {
++		/* Rollback registration process */
++		__unregister_ftrace_function(ops);
++		ftrace_start_up--;
++		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
++		return ret;
++	}
++
++	if (ftrace_hash_rec_enable(ops, 1))
++		command |= FTRACE_UPDATE_CALLS;
++
++	ftrace_startup_enable(command);
++
++	ops->flags &= ~FTRACE_OPS_FL_ADDING;
++
++	return 0;
++}
++
++static int ftrace_shutdown(struct ftrace_ops *ops, int command)
++{
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	ret = __unregister_ftrace_function(ops);
++	if (ret)
++		return ret;
++
++	ftrace_start_up--;
++	/*
++	 * Just warn in case of unbalance, no need to kill ftrace, it's not
++	 * critical but the ftrace_call callers may be never nopped again after
++	 * further ftrace uses.
++	 */
++	WARN_ON_ONCE(ftrace_start_up < 0);
++
++	/* Disabling ipmodify never fails */
++	ftrace_hash_ipmodify_disable(ops);
++
++	if (ftrace_hash_rec_disable(ops, 1))
++		command |= FTRACE_UPDATE_CALLS;
++
++	ops->flags &= ~FTRACE_OPS_FL_ENABLED;
++
++	if (saved_ftrace_func != ftrace_trace_function) {
++		saved_ftrace_func = ftrace_trace_function;
++		command |= FTRACE_UPDATE_TRACE_FUNC;
++	}
++
++	if (!command || !ftrace_enabled) {
++		/*
++		 * If these are dynamic or per_cpu ops, they still
++		 * need their data freed. Since, function tracing is
++		 * not currently active, we can just free them
++		 * without synchronizing all CPUs.
++		 */
++		if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
++			goto free_ops;
++
++		return 0;
++	}
++
++	/*
++	 * If the ops uses a trampoline, then it needs to be
++	 * tested first on update.
++	 */
++	ops->flags |= FTRACE_OPS_FL_REMOVING;
++	removed_ops = ops;
++
++	/* The trampoline logic checks the old hashes */
++	ops->old_hash.filter_hash = ops->func_hash->filter_hash;
++	ops->old_hash.notrace_hash = ops->func_hash->notrace_hash;
++
++	ftrace_run_update_code(command);
++
++	/*
++	 * If there's no more ops registered with ftrace, run a
++	 * sanity check to make sure all rec flags are cleared.
++	 */
++	if (rcu_dereference_protected(ftrace_ops_list,
++			lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
++		struct ftrace_page *pg;
++		struct dyn_ftrace *rec;
++
++		do_for_each_ftrace_rec(pg, rec) {
++			if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED))
++				pr_warn("  %pS flags:%lx\n",
++					(void *)rec->ip, rec->flags);
++		} while_for_each_ftrace_rec();
++	}
++
++	ops->old_hash.filter_hash = NULL;
++	ops->old_hash.notrace_hash = NULL;
++
++	removed_ops = NULL;
++	ops->flags &= ~FTRACE_OPS_FL_REMOVING;
++
++	/*
++	 * Dynamic ops may be freed, we must make sure that all
++	 * callers are done before leaving this function.
++	 * The same goes for freeing the per_cpu data of the per_cpu
++	 * ops.
++	 */
++	if (ops->flags & FTRACE_OPS_FL_DYNAMIC) {
++		/*
++		 * We need to do a hard force of sched synchronization.
++		 * This is because we use preempt_disable() to do RCU, but
++		 * the function tracers can be called where RCU is not watching
++		 * (like before user_exit()). We can not rely on the RCU
++		 * infrastructure to do the synchronization, thus we must do it
++		 * ourselves.
++		 */
++		schedule_on_each_cpu(ftrace_sync);
++
++		/*
++		 * When the kernel is preeptive, tasks can be preempted
++		 * while on a ftrace trampoline. Just scheduling a task on
++		 * a CPU is not good enough to flush them. Calling
++		 * synchornize_rcu_tasks() will wait for those tasks to
++		 * execute and either schedule voluntarily or enter user space.
++		 */
++		if (IS_ENABLED(CONFIG_PREEMPT))
++			synchronize_rcu_tasks();
++
++ free_ops:
++		arch_ftrace_trampoline_free(ops);
++	}
++
++	return 0;
++}
++
++static void ftrace_startup_sysctl(void)
++{
++	int command;
++
++	if (unlikely(ftrace_disabled))
++		return;
++
++	/* Force update next time */
++	saved_ftrace_func = NULL;
++	/* ftrace_start_up is true if we want ftrace running */
++	if (ftrace_start_up) {
++		command = FTRACE_UPDATE_CALLS;
++		if (ftrace_graph_active)
++			command |= FTRACE_START_FUNC_RET;
++		ftrace_startup_enable(command);
++	}
++}
++
++static void ftrace_shutdown_sysctl(void)
++{
++	int command;
++
++	if (unlikely(ftrace_disabled))
++		return;
++
++	/* ftrace_start_up is true if ftrace is running */
++	if (ftrace_start_up) {
++		command = FTRACE_DISABLE_CALLS;
++		if (ftrace_graph_active)
++			command |= FTRACE_STOP_FUNC_RET;
++		ftrace_run_update_code(command);
++	}
++}
++
++static u64		ftrace_update_time;
++unsigned long		ftrace_update_tot_cnt;
++
++static inline int ops_traces_mod(struct ftrace_ops *ops)
++{
++	/*
++	 * Filter_hash being empty will default to trace module.
++	 * But notrace hash requires a test of individual module functions.
++	 */
++	return ftrace_hash_empty(ops->func_hash->filter_hash) &&
++		ftrace_hash_empty(ops->func_hash->notrace_hash);
++}
++
++/*
++ * Check if the current ops references the record.
++ *
++ * If the ops traces all functions, then it was already accounted for.
++ * If the ops does not trace the current record function, skip it.
++ * If the ops ignores the function via notrace filter, skip it.
++ */
++static inline bool
++ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
++{
++	/* If ops isn't enabled, ignore it */
++	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
++		return false;
++
++	/* If ops traces all then it includes this function */
++	if (ops_traces_mod(ops))
++		return true;
++
++	/* The function must be in the filter */
++	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
++	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
++		return false;
++
++	/* If in notrace hash, we ignore it too */
++	if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
++		return false;
++
++	return true;
++}
++
++static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
++{
++	struct ftrace_page *pg;
++	struct dyn_ftrace *p;
++	u64 start, stop;
++	unsigned long update_cnt = 0;
++	unsigned long rec_flags = 0;
++	int i;
++
++	start = ftrace_now(raw_smp_processor_id());
++
++	/*
++	 * When a module is loaded, this function is called to convert
++	 * the calls to mcount in its text to nops, and also to create
++	 * an entry in the ftrace data. Now, if ftrace is activated
++	 * after this call, but before the module sets its text to
++	 * read-only, the modification of enabling ftrace can fail if
++	 * the read-only is done while ftrace is converting the calls.
++	 * To prevent this, the module's records are set as disabled
++	 * and will be enabled after the call to set the module's text
++	 * to read-only.
++	 */
++	if (mod)
++		rec_flags |= FTRACE_FL_DISABLED;
++
++	for (pg = new_pgs; pg; pg = pg->next) {
++
++		for (i = 0; i < pg->index; i++) {
++
++			/* If something went wrong, bail without enabling anything */
++			if (unlikely(ftrace_disabled))
++				return -1;
++
++			p = &pg->records[i];
++			p->flags = rec_flags;
++
++			/*
++			 * Do the initial record conversion from mcount jump
++			 * to the NOP instructions.
++			 */
++			if (!__is_defined(CC_USING_NOP_MCOUNT) &&
++			    !ftrace_code_disable(mod, p))
++				break;
++
++			update_cnt++;
++		}
++	}
++
++	stop = ftrace_now(raw_smp_processor_id());
++	ftrace_update_time = stop - start;
++	ftrace_update_tot_cnt += update_cnt;
++
++	return 0;
++}
++
++static int ftrace_allocate_records(struct ftrace_page *pg, int count)
++{
++	int order;
++	int cnt;
++
++	if (WARN_ON(!count))
++		return -EINVAL;
++
++	order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
++
++	/*
++	 * We want to fill as much as possible. No more than a page
++	 * may be empty.
++	 */
++	while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
++		order--;
++
++ again:
++	pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
++
++	if (!pg->records) {
++		/* if we can't allocate this size, try something smaller */
++		if (!order)
++			return -ENOMEM;
++		order >>= 1;
++		goto again;
++	}
++
++	cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
++	pg->size = cnt;
++
++	if (cnt > count)
++		cnt = count;
++
++	return cnt;
++}
++
++static struct ftrace_page *
++ftrace_allocate_pages(unsigned long num_to_init)
++{
++	struct ftrace_page *start_pg;
++	struct ftrace_page *pg;
++	int order;
++	int cnt;
++
++	if (!num_to_init)
++		return 0;
++
++	start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
++	if (!pg)
++		return NULL;
++
++	/*
++	 * Try to allocate as much as possible in one continues
++	 * location that fills in all of the space. We want to
++	 * waste as little space as possible.
++	 */
++	for (;;) {
++		cnt = ftrace_allocate_records(pg, num_to_init);
++		if (cnt < 0)
++			goto free_pages;
++
++		num_to_init -= cnt;
++		if (!num_to_init)
++			break;
++
++		pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
++		if (!pg->next)
++			goto free_pages;
++
++		pg = pg->next;
++	}
++
++	return start_pg;
++
++ free_pages:
++	pg = start_pg;
++	while (pg) {
++		order = get_count_order(pg->size / ENTRIES_PER_PAGE);
++		free_pages((unsigned long)pg->records, order);
++		start_pg = pg->next;
++		kfree(pg);
++		pg = start_pg;
++	}
++	pr_info("ftrace: FAILED to allocate memory for functions\n");
++	return NULL;
++}
++
++#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
++
++struct ftrace_iterator {
++	loff_t				pos;
++	loff_t				func_pos;
++	loff_t				mod_pos;
++	struct ftrace_page		*pg;
++	struct dyn_ftrace		*func;
++	struct ftrace_func_probe	*probe;
++	struct ftrace_func_entry	*probe_entry;
++	struct trace_parser		parser;
++	struct ftrace_hash		*hash;
++	struct ftrace_ops		*ops;
++	struct trace_array		*tr;
++	struct list_head		*mod_list;
++	int				pidx;
++	int				idx;
++	unsigned			flags;
++};
++
++static void *
++t_probe_next(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct trace_array *tr = iter->ops->private;
++	struct list_head *func_probes;
++	struct ftrace_hash *hash;
++	struct list_head *next;
++	struct hlist_node *hnd = NULL;
++	struct hlist_head *hhd;
++	int size;
++
++	(*pos)++;
++	iter->pos = *pos;
++
++	if (!tr)
++		return NULL;
++
++	func_probes = &tr->func_probes;
++	if (list_empty(func_probes))
++		return NULL;
++
++	if (!iter->probe) {
++		next = func_probes->next;
++		iter->probe = list_entry(next, struct ftrace_func_probe, list);
++	}
++
++	if (iter->probe_entry)
++		hnd = &iter->probe_entry->hlist;
++
++	hash = iter->probe->ops.func_hash->filter_hash;
++
++	/*
++	 * A probe being registered may temporarily have an empty hash
++	 * and it's at the end of the func_probes list.
++	 */
++	if (!hash || hash == EMPTY_HASH)
++		return NULL;
++
++	size = 1 << hash->size_bits;
++
++ retry:
++	if (iter->pidx >= size) {
++		if (iter->probe->list.next == func_probes)
++			return NULL;
++		next = iter->probe->list.next;
++		iter->probe = list_entry(next, struct ftrace_func_probe, list);
++		hash = iter->probe->ops.func_hash->filter_hash;
++		size = 1 << hash->size_bits;
++		iter->pidx = 0;
++	}
++
++	hhd = &hash->buckets[iter->pidx];
++
++	if (hlist_empty(hhd)) {
++		iter->pidx++;
++		hnd = NULL;
++		goto retry;
++	}
++
++	if (!hnd)
++		hnd = hhd->first;
++	else {
++		hnd = hnd->next;
++		if (!hnd) {
++			iter->pidx++;
++			goto retry;
++		}
++	}
++
++	if (WARN_ON_ONCE(!hnd))
++		return NULL;
++
++	iter->probe_entry = hlist_entry(hnd, struct ftrace_func_entry, hlist);
++
++	return iter;
++}
++
++static void *t_probe_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	void *p = NULL;
++	loff_t l;
++
++	if (!(iter->flags & FTRACE_ITER_DO_PROBES))
++		return NULL;
++
++	if (iter->mod_pos > *pos)
++		return NULL;
++
++	iter->probe = NULL;
++	iter->probe_entry = NULL;
++	iter->pidx = 0;
++	for (l = 0; l <= (*pos - iter->mod_pos); ) {
++		p = t_probe_next(m, &l);
++		if (!p)
++			break;
++	}
++	if (!p)
++		return NULL;
++
++	/* Only set this if we have an item */
++	iter->flags |= FTRACE_ITER_PROBE;
++
++	return iter;
++}
++
++static int
++t_probe_show(struct seq_file *m, struct ftrace_iterator *iter)
++{
++	struct ftrace_func_entry *probe_entry;
++	struct ftrace_probe_ops *probe_ops;
++	struct ftrace_func_probe *probe;
++
++	probe = iter->probe;
++	probe_entry = iter->probe_entry;
++
++	if (WARN_ON_ONCE(!probe || !probe_entry))
++		return -EIO;
++
++	probe_ops = probe->probe_ops;
++
++	if (probe_ops->print)
++		return probe_ops->print(m, probe_entry->ip, probe_ops, probe->data);
++
++	seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip,
++		   (void *)probe_ops->func);
++
++	return 0;
++}
++
++static void *
++t_mod_next(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct trace_array *tr = iter->tr;
++
++	(*pos)++;
++	iter->pos = *pos;
++
++	iter->mod_list = iter->mod_list->next;
++
++	if (iter->mod_list == &tr->mod_trace ||
++	    iter->mod_list == &tr->mod_notrace) {
++		iter->flags &= ~FTRACE_ITER_MOD;
++		return NULL;
++	}
++
++	iter->mod_pos = *pos;
++
++	return iter;
++}
++
++static void *t_mod_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	void *p = NULL;
++	loff_t l;
++
++	if (iter->func_pos > *pos)
++		return NULL;
++
++	iter->mod_pos = iter->func_pos;
++
++	/* probes are only available if tr is set */
++	if (!iter->tr)
++		return NULL;
++
++	for (l = 0; l <= (*pos - iter->func_pos); ) {
++		p = t_mod_next(m, &l);
++		if (!p)
++			break;
++	}
++	if (!p) {
++		iter->flags &= ~FTRACE_ITER_MOD;
++		return t_probe_start(m, pos);
++	}
++
++	/* Only set this if we have an item */
++	iter->flags |= FTRACE_ITER_MOD;
++
++	return iter;
++}
++
++static int
++t_mod_show(struct seq_file *m, struct ftrace_iterator *iter)
++{
++	struct ftrace_mod_load *ftrace_mod;
++	struct trace_array *tr = iter->tr;
++
++	if (WARN_ON_ONCE(!iter->mod_list) ||
++			 iter->mod_list == &tr->mod_trace ||
++			 iter->mod_list == &tr->mod_notrace)
++		return -EIO;
++
++	ftrace_mod = list_entry(iter->mod_list, struct ftrace_mod_load, list);
++
++	if (ftrace_mod->func)
++		seq_printf(m, "%s", ftrace_mod->func);
++	else
++		seq_putc(m, '*');
++
++	seq_printf(m, ":mod:%s\n", ftrace_mod->module);
++
++	return 0;
++}
++
++static void *
++t_func_next(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct dyn_ftrace *rec = NULL;
++
++	(*pos)++;
++
++ retry:
++	if (iter->idx >= iter->pg->index) {
++		if (iter->pg->next) {
++			iter->pg = iter->pg->next;
++			iter->idx = 0;
++			goto retry;
++		}
++	} else {
++		rec = &iter->pg->records[iter->idx++];
++		if (((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) &&
++		     !ftrace_lookup_ip(iter->hash, rec->ip)) ||
++
++		    ((iter->flags & FTRACE_ITER_ENABLED) &&
++		     !(rec->flags & FTRACE_FL_ENABLED))) {
++
++			rec = NULL;
++			goto retry;
++		}
++	}
++
++	if (!rec)
++		return NULL;
++
++	iter->pos = iter->func_pos = *pos;
++	iter->func = rec;
++
++	return iter;
++}
++
++static void *
++t_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	loff_t l = *pos; /* t_probe_start() must use original pos */
++	void *ret;
++
++	if (unlikely(ftrace_disabled))
++		return NULL;
++
++	if (iter->flags & FTRACE_ITER_PROBE)
++		return t_probe_next(m, pos);
++
++	if (iter->flags & FTRACE_ITER_MOD)
++		return t_mod_next(m, pos);
++
++	if (iter->flags & FTRACE_ITER_PRINTALL) {
++		/* next must increment pos, and t_probe_start does not */
++		(*pos)++;
++		return t_mod_start(m, &l);
++	}
++
++	ret = t_func_next(m, pos);
++
++	if (!ret)
++		return t_mod_start(m, &l);
++
++	return ret;
++}
++
++static void reset_iter_read(struct ftrace_iterator *iter)
++{
++	iter->pos = 0;
++	iter->func_pos = 0;
++	iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE | FTRACE_ITER_MOD);
++}
++
++static void *t_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	void *p = NULL;
++	loff_t l;
++
++	mutex_lock(&ftrace_lock);
++
++	if (unlikely(ftrace_disabled))
++		return NULL;
++
++	/*
++	 * If an lseek was done, then reset and start from beginning.
++	 */
++	if (*pos < iter->pos)
++		reset_iter_read(iter);
++
++	/*
++	 * For set_ftrace_filter reading, if we have the filter
++	 * off, we can short cut and just print out that all
++	 * functions are enabled.
++	 */
++	if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) &&
++	    ftrace_hash_empty(iter->hash)) {
++		iter->func_pos = 1; /* Account for the message */
++		if (*pos > 0)
++			return t_mod_start(m, pos);
++		iter->flags |= FTRACE_ITER_PRINTALL;
++		/* reset in case of seek/pread */
++		iter->flags &= ~FTRACE_ITER_PROBE;
++		return iter;
++	}
++
++	if (iter->flags & FTRACE_ITER_MOD)
++		return t_mod_start(m, pos);
++
++	/*
++	 * Unfortunately, we need to restart at ftrace_pages_start
++	 * every time we let go of the ftrace_mutex. This is because
++	 * those pointers can change without the lock.
++	 */
++	iter->pg = ftrace_pages_start;
++	iter->idx = 0;
++	for (l = 0; l <= *pos; ) {
++		p = t_func_next(m, &l);
++		if (!p)
++			break;
++	}
++
++	if (!p)
++		return t_mod_start(m, pos);
++
++	return iter;
++}
++
++static void t_stop(struct seq_file *m, void *p)
++{
++	mutex_unlock(&ftrace_lock);
++}
++
++void * __weak
++arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
++{
++	return NULL;
++}
++
++static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops,
++				struct dyn_ftrace *rec)
++{
++	void *ptr;
++
++	ptr = arch_ftrace_trampoline_func(ops, rec);
++	if (ptr)
++		seq_printf(m, " ->%pS", ptr);
++}
++
++static int t_show(struct seq_file *m, void *v)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct dyn_ftrace *rec;
++
++	if (iter->flags & FTRACE_ITER_PROBE)
++		return t_probe_show(m, iter);
++
++	if (iter->flags & FTRACE_ITER_MOD)
++		return t_mod_show(m, iter);
++
++	if (iter->flags & FTRACE_ITER_PRINTALL) {
++		if (iter->flags & FTRACE_ITER_NOTRACE)
++			seq_puts(m, "#### no functions disabled ####\n");
++		else
++			seq_puts(m, "#### all functions enabled ####\n");
++		return 0;
++	}
++
++	rec = iter->func;
++
++	if (!rec)
++		return 0;
++
++	seq_printf(m, "%ps", (void *)rec->ip);
++	if (iter->flags & FTRACE_ITER_ENABLED) {
++		struct ftrace_ops *ops;
++
++		seq_printf(m, " (%ld)%s%s",
++			   ftrace_rec_count(rec),
++			   rec->flags & FTRACE_FL_REGS ? " R" : "  ",
++			   rec->flags & FTRACE_FL_IPMODIFY ? " I" : "  ");
++		if (rec->flags & FTRACE_FL_TRAMP_EN) {
++			ops = ftrace_find_tramp_ops_any(rec);
++			if (ops) {
++				do {
++					seq_printf(m, "\ttramp: %pS (%pS)",
++						   (void *)ops->trampoline,
++						   (void *)ops->func);
++					add_trampoline_func(m, ops, rec);
++					ops = ftrace_find_tramp_ops_next(rec, ops);
++				} while (ops);
++			} else
++				seq_puts(m, "\ttramp: ERROR!");
++		} else {
++			add_trampoline_func(m, NULL, rec);
++		}
++	}	
++
++	seq_putc(m, '\n');
++
++	return 0;
++}
++
++static const struct seq_operations show_ftrace_seq_ops = {
++	.start = t_start,
++	.next = t_next,
++	.stop = t_stop,
++	.show = t_show,
++};
++
++static int
++ftrace_avail_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_iterator *iter;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
++	if (!iter)
++		return -ENOMEM;
++
++	iter->pg = ftrace_pages_start;
++	iter->ops = &global_ops;
++
++	return 0;
++}
++
++static int
++ftrace_enabled_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_iterator *iter;
++
++	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
++	if (!iter)
++		return -ENOMEM;
++
++	iter->pg = ftrace_pages_start;
++	iter->flags = FTRACE_ITER_ENABLED;
++	iter->ops = &global_ops;
++
++	return 0;
++}
++
++/**
++ * ftrace_regex_open - initialize function tracer filter files
++ * @ops: The ftrace_ops that hold the hash filters
++ * @flag: The type of filter to process
++ * @inode: The inode, usually passed in to your open routine
++ * @file: The file, usually passed in to your open routine
++ *
++ * ftrace_regex_open() initializes the filter files for the
++ * @ops. Depending on @flag it may process the filter hash or
++ * the notrace hash of @ops. With this called from the open
++ * routine, you can use ftrace_filter_write() for the write
++ * routine if @flag has FTRACE_ITER_FILTER set, or
++ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
++ * tracing_lseek() should be used as the lseek routine, and
++ * release must call ftrace_regex_release().
++ */
++int
++ftrace_regex_open(struct ftrace_ops *ops, int flag,
++		  struct inode *inode, struct file *file)
++{
++	struct ftrace_iterator *iter;
++	struct ftrace_hash *hash;
++	struct list_head *mod_head;
++	struct trace_array *tr = ops->private;
++	int ret = -ENOMEM;
++
++	ftrace_ops_init(ops);
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	if (tr && trace_array_get(tr) < 0)
++		return -ENODEV;
++
++	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
++	if (!iter)
++		goto out;
++
++	if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX))
++		goto out;
++
++	iter->ops = ops;
++	iter->flags = flag;
++	iter->tr = tr;
++
++	mutex_lock(&ops->func_hash->regex_lock);
++
++	if (flag & FTRACE_ITER_NOTRACE) {
++		hash = ops->func_hash->notrace_hash;
++		mod_head = tr ? &tr->mod_notrace : NULL;
++	} else {
++		hash = ops->func_hash->filter_hash;
++		mod_head = tr ? &tr->mod_trace : NULL;
++	}
++
++	iter->mod_list = mod_head;
++
++	if (file->f_mode & FMODE_WRITE) {
++		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
++
++		if (file->f_flags & O_TRUNC) {
++			iter->hash = alloc_ftrace_hash(size_bits);
++			clear_ftrace_mod_list(mod_head);
++	        } else {
++			iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash);
++		}
++
++		if (!iter->hash) {
++			trace_parser_put(&iter->parser);
++			goto out_unlock;
++		}
++	} else
++		iter->hash = hash;
++
++	ret = 0;
++
++	if (file->f_mode & FMODE_READ) {
++		iter->pg = ftrace_pages_start;
++
++		ret = seq_open(file, &show_ftrace_seq_ops);
++		if (!ret) {
++			struct seq_file *m = file->private_data;
++			m->private = iter;
++		} else {
++			/* Failed */
++			free_ftrace_hash(iter->hash);
++			trace_parser_put(&iter->parser);
++		}
++	} else
++		file->private_data = iter;
++
++ out_unlock:
++	mutex_unlock(&ops->func_hash->regex_lock);
++
++ out:
++	if (ret) {
++		kfree(iter);
++		if (tr)
++			trace_array_put(tr);
++	}
++
++	return ret;
++}
++
++static int
++ftrace_filter_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_ops *ops = inode->i_private;
++
++	return ftrace_regex_open(ops,
++			FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES,
++			inode, file);
++}
++
++static int
++ftrace_notrace_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_ops *ops = inode->i_private;
++
++	return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,
++				 inode, file);
++}
++
++/* Type for quick search ftrace basic regexes (globs) from filter_parse_regex */
++struct ftrace_glob {
++	char *search;
++	unsigned len;
++	int type;
++};
++
++/*
++ * If symbols in an architecture don't correspond exactly to the user-visible
++ * name of what they represent, it is possible to define this function to
++ * perform the necessary adjustments.
++*/
++char * __weak arch_ftrace_match_adjust(char *str, const char *search)
++{
++	return str;
++}
++
++static int ftrace_match(char *str, struct ftrace_glob *g)
++{
++	int matched = 0;
++	int slen;
++
++	str = arch_ftrace_match_adjust(str, g->search);
++
++	switch (g->type) {
++	case MATCH_FULL:
++		if (strcmp(str, g->search) == 0)
++			matched = 1;
++		break;
++	case MATCH_FRONT_ONLY:
++		if (strncmp(str, g->search, g->len) == 0)
++			matched = 1;
++		break;
++	case MATCH_MIDDLE_ONLY:
++		if (strstr(str, g->search))
++			matched = 1;
++		break;
++	case MATCH_END_ONLY:
++		slen = strlen(str);
++		if (slen >= g->len &&
++		    memcmp(str + slen - g->len, g->search, g->len) == 0)
++			matched = 1;
++		break;
++	case MATCH_GLOB:
++		if (glob_match(g->search, str))
++			matched = 1;
++		break;
++	}
++
++	return matched;
++}
++
++static int
++enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter)
++{
++	struct ftrace_func_entry *entry;
++	int ret = 0;
++
++	entry = ftrace_lookup_ip(hash, rec->ip);
++	if (clear_filter) {
++		/* Do nothing if it doesn't exist */
++		if (!entry)
++			return 0;
++
++		free_hash_entry(hash, entry);
++	} else {
++		/* Do nothing if it exists */
++		if (entry)
++			return 0;
++
++		ret = add_hash_entry(hash, rec->ip);
++	}
++	return ret;
++}
++
++static int
++ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g,
++		struct ftrace_glob *mod_g, int exclude_mod)
++{
++	char str[KSYM_SYMBOL_LEN];
++	char *modname;
++
++	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
++
++	if (mod_g) {
++		int mod_matches = (modname) ? ftrace_match(modname, mod_g) : 0;
++
++		/* blank module name to match all modules */
++		if (!mod_g->len) {
++			/* blank module globbing: modname xor exclude_mod */
++			if (!exclude_mod != !modname)
++				goto func_match;
++			return 0;
++		}
++
++		/*
++		 * exclude_mod is set to trace everything but the given
++		 * module. If it is set and the module matches, then
++		 * return 0. If it is not set, and the module doesn't match
++		 * also return 0. Otherwise, check the function to see if
++		 * that matches.
++		 */
++		if (!mod_matches == !exclude_mod)
++			return 0;
++func_match:
++		/* blank search means to match all funcs in the mod */
++		if (!func_g->len)
++			return 1;
++	}
++
++	return ftrace_match(str, func_g);
++}
++
++static int
++match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
++{
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	struct ftrace_glob func_g = { .type = MATCH_FULL };
++	struct ftrace_glob mod_g = { .type = MATCH_FULL };
++	struct ftrace_glob *mod_match = (mod) ? &mod_g : NULL;
++	int exclude_mod = 0;
++	int found = 0;
++	int ret;
++	int clear_filter = 0;
++
++	if (func) {
++		func_g.type = filter_parse_regex(func, len, &func_g.search,
++						 &clear_filter);
++		func_g.len = strlen(func_g.search);
++	}
++
++	if (mod) {
++		mod_g.type = filter_parse_regex(mod, strlen(mod),
++				&mod_g.search, &exclude_mod);
++		mod_g.len = strlen(mod_g.search);
++	}
++
++	mutex_lock(&ftrace_lock);
++
++	if (unlikely(ftrace_disabled))
++		goto out_unlock;
++
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) {
++			ret = enter_record(hash, rec, clear_filter);
++			if (ret < 0) {
++				found = ret;
++				goto out_unlock;
++			}
++			found = 1;
++		}
++	} while_for_each_ftrace_rec();
++ out_unlock:
++	mutex_unlock(&ftrace_lock);
++
++	return found;
++}
++
++static int
++ftrace_match_records(struct ftrace_hash *hash, char *buff, int len)
++{
++	return match_records(hash, buff, len, NULL);
++}
++
++static void ftrace_ops_update_code(struct ftrace_ops *ops,
++				   struct ftrace_ops_hash *old_hash)
++{
++	struct ftrace_ops *op;
++
++	if (!ftrace_enabled)
++		return;
++
++	if (ops->flags & FTRACE_OPS_FL_ENABLED) {
++		ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash);
++		return;
++	}
++
++	/*
++	 * If this is the shared global_ops filter, then we need to
++	 * check if there is another ops that shares it, is enabled.
++	 * If so, we still need to run the modify code.
++	 */
++	if (ops->func_hash != &global_ops.local_hash)
++		return;
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		if (op->func_hash == &global_ops.local_hash &&
++		    op->flags & FTRACE_OPS_FL_ENABLED) {
++			ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash);
++			/* Only need to do this once */
++			return;
++		}
++	} while_for_each_ftrace_op(op);
++}
++
++static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
++					   struct ftrace_hash **orig_hash,
++					   struct ftrace_hash *hash,
++					   int enable)
++{
++	struct ftrace_ops_hash old_hash_ops;
++	struct ftrace_hash *old_hash;
++	int ret;
++
++	old_hash = *orig_hash;
++	old_hash_ops.filter_hash = ops->func_hash->filter_hash;
++	old_hash_ops.notrace_hash = ops->func_hash->notrace_hash;
++	ret = ftrace_hash_move(ops, enable, orig_hash, hash);
++	if (!ret) {
++		ftrace_ops_update_code(ops, &old_hash_ops);
++		free_ftrace_hash_rcu(old_hash);
++	}
++	return ret;
++}
++
++static bool module_exists(const char *module)
++{
++	/* All modules have the symbol __this_module */
++	const char this_mod[] = "__this_module";
++	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
++	unsigned long val;
++	int n;
++
++	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
++
++	if (n > sizeof(modname) - 1)
++		return false;
++
++	val = module_kallsyms_lookup_name(modname);
++	return val != 0;
++}
++
++static int cache_mod(struct trace_array *tr,
++		     const char *func, char *module, int enable)
++{
++	struct ftrace_mod_load *ftrace_mod, *n;
++	struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace;
++	int ret;
++
++	mutex_lock(&ftrace_lock);
++
++	/* We do not cache inverse filters */
++	if (func[0] == '!') {
++		func++;
++		ret = -EINVAL;
++
++		/* Look to remove this hash */
++		list_for_each_entry_safe(ftrace_mod, n, head, list) {
++			if (strcmp(ftrace_mod->module, module) != 0)
++				continue;
++
++			/* no func matches all */
++			if (strcmp(func, "*") == 0 ||
++			    (ftrace_mod->func &&
++			     strcmp(ftrace_mod->func, func) == 0)) {
++				ret = 0;
++				free_ftrace_mod(ftrace_mod);
++				continue;
++			}
++		}
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* We only care about modules that have not been loaded yet */
++	if (module_exists(module))
++		goto out;
++
++	/* Save this string off, and execute it when the module is loaded */
++	ret = ftrace_add_mod(tr, func, module, enable);
++ out:
++	mutex_unlock(&ftrace_lock);
++
++	return ret;
++}
++
++static int
++ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
++		 int reset, int enable);
++
++#ifdef CONFIG_MODULES
++static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
++			     char *mod, bool enable)
++{
++	struct ftrace_mod_load *ftrace_mod, *n;
++	struct ftrace_hash **orig_hash, *new_hash;
++	LIST_HEAD(process_mods);
++	char *func;
++	int ret;
++
++	mutex_lock(&ops->func_hash->regex_lock);
++
++	if (enable)
++		orig_hash = &ops->func_hash->filter_hash;
++	else
++		orig_hash = &ops->func_hash->notrace_hash;
++
++	new_hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS,
++					      *orig_hash);
++	if (!new_hash)
++		goto out; /* warn? */
++
++	mutex_lock(&ftrace_lock);
++
++	list_for_each_entry_safe(ftrace_mod, n, head, list) {
++
++		if (strcmp(ftrace_mod->module, mod) != 0)
++			continue;
++
++		if (ftrace_mod->func)
++			func = kstrdup(ftrace_mod->func, GFP_KERNEL);
++		else
++			func = kstrdup("*", GFP_KERNEL);
++
++		if (!func) /* warn? */
++			continue;
++
++		list_del(&ftrace_mod->list);
++		list_add(&ftrace_mod->list, &process_mods);
++
++		/* Use the newly allocated func, as it may be "*" */
++		kfree(ftrace_mod->func);
++		ftrace_mod->func = func;
++	}
++
++	mutex_unlock(&ftrace_lock);
++
++	list_for_each_entry_safe(ftrace_mod, n, &process_mods, list) {
++
++		func = ftrace_mod->func;
++
++		/* Grabs ftrace_lock, which is why we have this extra step */
++		match_records(new_hash, func, strlen(func), mod);
++		free_ftrace_mod(ftrace_mod);
++	}
++
++	if (enable && list_empty(head))
++		new_hash->flags &= ~FTRACE_HASH_FL_MOD;
++
++	mutex_lock(&ftrace_lock);
++
++	ret = ftrace_hash_move_and_update_ops(ops, orig_hash,
++					      new_hash, enable);
++	mutex_unlock(&ftrace_lock);
++
++ out:
++	mutex_unlock(&ops->func_hash->regex_lock);
++
++	free_ftrace_hash(new_hash);
++}
++
++static void process_cached_mods(const char *mod_name)
++{
++	struct trace_array *tr;
++	char *mod;
++
++	mod = kstrdup(mod_name, GFP_KERNEL);
++	if (!mod)
++		return;
++
++	mutex_lock(&trace_types_lock);
++	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
++		if (!list_empty(&tr->mod_trace))
++			process_mod_list(&tr->mod_trace, tr->ops, mod, true);
++		if (!list_empty(&tr->mod_notrace))
++			process_mod_list(&tr->mod_notrace, tr->ops, mod, false);
++	}
++	mutex_unlock(&trace_types_lock);
++
++	kfree(mod);
++}
++#endif
++
++/*
++ * We register the module command as a template to show others how
++ * to register the a command as well.
++ */
++
++static int
++ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash,
++		    char *func_orig, char *cmd, char *module, int enable)
++{
++	char *func;
++	int ret;
++
++	/* match_records() modifies func, and we need the original */
++	func = kstrdup(func_orig, GFP_KERNEL);
++	if (!func)
++		return -ENOMEM;
++
++	/*
++	 * cmd == 'mod' because we only registered this func
++	 * for the 'mod' ftrace_func_command.
++	 * But if you register one func with multiple commands,
++	 * you can tell which command was used by the cmd
++	 * parameter.
++	 */
++	ret = match_records(hash, func, strlen(func), module);
++	kfree(func);
++
++	if (!ret)
++		return cache_mod(tr, func_orig, module, enable);
++	if (ret < 0)
++		return ret;
++	return 0;
++}
++
++static struct ftrace_func_command ftrace_mod_cmd = {
++	.name			= "mod",
++	.func			= ftrace_mod_callback,
++};
++
++static int __init ftrace_mod_cmd_init(void)
++{
++	return register_ftrace_command(&ftrace_mod_cmd);
++}
++core_initcall(ftrace_mod_cmd_init);
++
++static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
++				      struct ftrace_ops *op, struct pt_regs *pt_regs)
++{
++	struct ftrace_probe_ops *probe_ops;
++	struct ftrace_func_probe *probe;
++
++	probe = container_of(op, struct ftrace_func_probe, ops);
++	probe_ops = probe->probe_ops;
++
++	/*
++	 * Disable preemption for these calls to prevent a RCU grace
++	 * period. This syncs the hash iteration and freeing of items
++	 * on the hash. rcu_read_lock is too dangerous here.
++	 */
++	preempt_disable_notrace();
++	probe_ops->func(ip, parent_ip, probe->tr, probe_ops, probe->data);
++	preempt_enable_notrace();
++}
++
++struct ftrace_func_map {
++	struct ftrace_func_entry	entry;
++	void				*data;
++};
++
++struct ftrace_func_mapper {
++	struct ftrace_hash		hash;
++};
++
++/**
++ * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper
++ *
++ * Returns a ftrace_func_mapper descriptor that can be used to map ips to data.
++ */
++struct ftrace_func_mapper *allocate_ftrace_func_mapper(void)
++{
++	struct ftrace_hash *hash;
++
++	/*
++	 * The mapper is simply a ftrace_hash, but since the entries
++	 * in the hash are not ftrace_func_entry type, we define it
++	 * as a separate structure.
++	 */
++	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
++	return (struct ftrace_func_mapper *)hash;
++}
++
++/**
++ * ftrace_func_mapper_find_ip - Find some data mapped to an ip
++ * @mapper: The mapper that has the ip maps
++ * @ip: the instruction pointer to find the data for
++ *
++ * Returns the data mapped to @ip if found otherwise NULL. The return
++ * is actually the address of the mapper data pointer. The address is
++ * returned for use cases where the data is no bigger than a long, and
++ * the user can use the data pointer as its data instead of having to
++ * allocate more memory for the reference.
++ */
++void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper,
++				  unsigned long ip)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_map *map;
++
++	entry = ftrace_lookup_ip(&mapper->hash, ip);
++	if (!entry)
++		return NULL;
++
++	map = (struct ftrace_func_map *)entry;
++	return &map->data;
++}
++
++/**
++ * ftrace_func_mapper_add_ip - Map some data to an ip
++ * @mapper: The mapper that has the ip maps
++ * @ip: The instruction pointer address to map @data to
++ * @data: The data to map to @ip
++ *
++ * Returns 0 on succes otherwise an error.
++ */
++int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
++			      unsigned long ip, void *data)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_map *map;
++
++	entry = ftrace_lookup_ip(&mapper->hash, ip);
++	if (entry)
++		return -EBUSY;
++
++	map = kmalloc(sizeof(*map), GFP_KERNEL);
++	if (!map)
++		return -ENOMEM;
++
++	map->entry.ip = ip;
++	map->data = data;
++
++	__add_hash_entry(&mapper->hash, &map->entry);
++
++	return 0;
++}
++
++/**
++ * ftrace_func_mapper_remove_ip - Remove an ip from the mapping
++ * @mapper: The mapper that has the ip maps
++ * @ip: The instruction pointer address to remove the data from
++ *
++ * Returns the data if it is found, otherwise NULL.
++ * Note, if the data pointer is used as the data itself, (see 
++ * ftrace_func_mapper_find_ip(), then the return value may be meaningless,
++ * if the data pointer was set to zero.
++ */
++void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper,
++				   unsigned long ip)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_map *map;
++	void *data;
++
++	entry = ftrace_lookup_ip(&mapper->hash, ip);
++	if (!entry)
++		return NULL;
++
++	map = (struct ftrace_func_map *)entry;
++	data = map->data;
++
++	remove_hash_entry(&mapper->hash, entry);
++	kfree(entry);
++
++	return data;
++}
++
++/**
++ * free_ftrace_func_mapper - free a mapping of ips and data
++ * @mapper: The mapper that has the ip maps
++ * @free_func: A function to be called on each data item.
++ *
++ * This is used to free the function mapper. The @free_func is optional
++ * and can be used if the data needs to be freed as well.
++ */
++void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
++			     ftrace_mapper_func free_func)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_map *map;
++	struct hlist_head *hhd;
++	int size, i;
++
++	if (!mapper)
++		return;
++
++	if (free_func && mapper->hash.count) {
++		size = 1 << mapper->hash.size_bits;
++		for (i = 0; i < size; i++) {
++			hhd = &mapper->hash.buckets[i];
++			hlist_for_each_entry(entry, hhd, hlist) {
++				map = (struct ftrace_func_map *)entry;
++				free_func(map);
++			}
++		}
++	}
++	free_ftrace_hash(&mapper->hash);
++}
++
++static void release_probe(struct ftrace_func_probe *probe)
++{
++	struct ftrace_probe_ops *probe_ops;
++
++	mutex_lock(&ftrace_lock);
++
++	WARN_ON(probe->ref <= 0);
++
++	/* Subtract the ref that was used to protect this instance */
++	probe->ref--;
++
++	if (!probe->ref) {
++		probe_ops = probe->probe_ops;
++		/*
++		 * Sending zero as ip tells probe_ops to free
++		 * the probe->data itself
++		 */
++		if (probe_ops->free)
++			probe_ops->free(probe_ops, probe->tr, 0, probe->data);
++		list_del(&probe->list);
++		kfree(probe);
++	}
++	mutex_unlock(&ftrace_lock);
++}
++
++static void acquire_probe_locked(struct ftrace_func_probe *probe)
++{
++	/*
++	 * Add one ref to keep it from being freed when releasing the
++	 * ftrace_lock mutex.
++	 */
++	probe->ref++;
++}
++
++int
++register_ftrace_function_probe(char *glob, struct trace_array *tr,
++			       struct ftrace_probe_ops *probe_ops,
++			       void *data)
++{
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_probe *probe;
++	struct ftrace_hash **orig_hash;
++	struct ftrace_hash *old_hash;
++	struct ftrace_hash *hash;
++	int count = 0;
++	int size;
++	int ret;
++	int i;
++
++	if (WARN_ON(!tr))
++		return -EINVAL;
++
++	/* We do not support '!' for function probes */
++	if (WARN_ON(glob[0] == '!'))
++		return -EINVAL;
++
++
++	mutex_lock(&ftrace_lock);
++	/* Check if the probe_ops is already registered */
++	list_for_each_entry(probe, &tr->func_probes, list) {
++		if (probe->probe_ops == probe_ops)
++			break;
++	}
++	if (&probe->list == &tr->func_probes) {
++		probe = kzalloc(sizeof(*probe), GFP_KERNEL);
++		if (!probe) {
++			mutex_unlock(&ftrace_lock);
++			return -ENOMEM;
++		}
++		probe->probe_ops = probe_ops;
++		probe->ops.func = function_trace_probe_call;
++		probe->tr = tr;
++		ftrace_ops_init(&probe->ops);
++		list_add(&probe->list, &tr->func_probes);
++	}
++
++	acquire_probe_locked(probe);
++
++	mutex_unlock(&ftrace_lock);
++
++	/*
++	 * Note, there's a small window here that the func_hash->filter_hash
++	 * may be NULL or empty. Need to be carefule when reading the loop.
++	 */
++	mutex_lock(&probe->ops.func_hash->regex_lock);
++
++	orig_hash = &probe->ops.func_hash->filter_hash;
++	old_hash = *orig_hash;
++	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
++
++	if (!hash) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	ret = ftrace_match_records(hash, glob, strlen(glob));
++
++	/* Nothing found? */
++	if (!ret)
++		ret = -EINVAL;
++
++	if (ret < 0)
++		goto out;
++
++	size = 1 << hash->size_bits;
++	for (i = 0; i < size; i++) {
++		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
++			if (ftrace_lookup_ip(old_hash, entry->ip))
++				continue;
++			/*
++			 * The caller might want to do something special
++			 * for each function we find. We call the callback
++			 * to give the caller an opportunity to do so.
++			 */
++			if (probe_ops->init) {
++				ret = probe_ops->init(probe_ops, tr,
++						      entry->ip, data,
++						      &probe->data);
++				if (ret < 0) {
++					if (probe_ops->free && count)
++						probe_ops->free(probe_ops, tr,
++								0, probe->data);
++					probe->data = NULL;
++					goto out;
++				}
++			}
++			count++;
++		}
++	}
++
++	mutex_lock(&ftrace_lock);
++
++	if (!count) {
++		/* Nothing was added? */
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash,
++					      hash, 1);
++	if (ret < 0)
++		goto err_unlock;
++
++	/* One ref for each new function traced */
++	probe->ref += count;
++
++	if (!(probe->ops.flags & FTRACE_OPS_FL_ENABLED))
++		ret = ftrace_startup(&probe->ops, 0);
++
++ out_unlock:
++	mutex_unlock(&ftrace_lock);
++
++	if (!ret)
++		ret = count;
++ out:
++	mutex_unlock(&probe->ops.func_hash->regex_lock);
++	free_ftrace_hash(hash);
++
++	release_probe(probe);
++
++	return ret;
++
++ err_unlock:
++	if (!probe_ops->free || !count)
++		goto out_unlock;
++
++	/* Failed to do the move, need to call the free functions */
++	for (i = 0; i < size; i++) {
++		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
++			if (ftrace_lookup_ip(old_hash, entry->ip))
++				continue;
++			probe_ops->free(probe_ops, tr, entry->ip, probe->data);
++		}
++	}
++	goto out_unlock;
++}
++
++int
++unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
++				      struct ftrace_probe_ops *probe_ops)
++{
++	struct ftrace_ops_hash old_hash_ops;
++	struct ftrace_func_entry *entry;
++	struct ftrace_func_probe *probe;
++	struct ftrace_glob func_g;
++	struct ftrace_hash **orig_hash;
++	struct ftrace_hash *old_hash;
++	struct ftrace_hash *hash = NULL;
++	struct hlist_node *tmp;
++	struct hlist_head hhd;
++	char str[KSYM_SYMBOL_LEN];
++	int count = 0;
++	int i, ret = -ENODEV;
++	int size;
++
++	if (!glob || !strlen(glob) || !strcmp(glob, "*"))
++		func_g.search = NULL;
++	else {
++		int not;
++
++		func_g.type = filter_parse_regex(glob, strlen(glob),
++						 &func_g.search, &not);
++		func_g.len = strlen(func_g.search);
++
++		/* we do not support '!' for function probes */
++		if (WARN_ON(not))
++			return -EINVAL;
++	}
++
++	mutex_lock(&ftrace_lock);
++	/* Check if the probe_ops is already registered */
++	list_for_each_entry(probe, &tr->func_probes, list) {
++		if (probe->probe_ops == probe_ops)
++			break;
++	}
++	if (&probe->list == &tr->func_probes)
++		goto err_unlock_ftrace;
++
++	ret = -EINVAL;
++	if (!(probe->ops.flags & FTRACE_OPS_FL_INITIALIZED))
++		goto err_unlock_ftrace;
++
++	acquire_probe_locked(probe);
++
++	mutex_unlock(&ftrace_lock);
++
++	mutex_lock(&probe->ops.func_hash->regex_lock);
++
++	orig_hash = &probe->ops.func_hash->filter_hash;
++	old_hash = *orig_hash;
++
++	if (ftrace_hash_empty(old_hash))
++		goto out_unlock;
++
++	old_hash_ops.filter_hash = old_hash;
++	/* Probes only have filters */
++	old_hash_ops.notrace_hash = NULL;
++
++	ret = -ENOMEM;
++	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
++	if (!hash)
++		goto out_unlock;
++
++	INIT_HLIST_HEAD(&hhd);
++
++	size = 1 << hash->size_bits;
++	for (i = 0; i < size; i++) {
++		hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) {
++
++			if (func_g.search) {
++				kallsyms_lookup(entry->ip, NULL, NULL,
++						NULL, str);
++				if (!ftrace_match(str, &func_g))
++					continue;
++			}
++			count++;
++			remove_hash_entry(hash, entry);
++			hlist_add_head(&entry->hlist, &hhd);
++		}
++	}
++
++	/* Nothing found? */
++	if (!count) {
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	mutex_lock(&ftrace_lock);
++
++	WARN_ON(probe->ref < count);
++
++	probe->ref -= count;
++
++	if (ftrace_hash_empty(hash))
++		ftrace_shutdown(&probe->ops, 0);
++
++	ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash,
++					      hash, 1);
++
++	/* still need to update the function call sites */
++	if (ftrace_enabled && !ftrace_hash_empty(hash))
++		ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS,
++				       &old_hash_ops);
++	synchronize_sched();
++
++	hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) {
++		hlist_del(&entry->hlist);
++		if (probe_ops->free)
++			probe_ops->free(probe_ops, tr, entry->ip, probe->data);
++		kfree(entry);
++	}
++	mutex_unlock(&ftrace_lock);
++
++ out_unlock:
++	mutex_unlock(&probe->ops.func_hash->regex_lock);
++	free_ftrace_hash(hash);
++
++	release_probe(probe);
++
++	return ret;
++
++ err_unlock_ftrace:
++	mutex_unlock(&ftrace_lock);
++	return ret;
++}
++
++void clear_ftrace_function_probes(struct trace_array *tr)
++{
++	struct ftrace_func_probe *probe, *n;
++
++	list_for_each_entry_safe(probe, n, &tr->func_probes, list)
++		unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops);
++}
++
++static LIST_HEAD(ftrace_commands);
++static DEFINE_MUTEX(ftrace_cmd_mutex);
++
++/*
++ * Currently we only register ftrace commands from __init, so mark this
++ * __init too.
++ */
++__init int register_ftrace_command(struct ftrace_func_command *cmd)
++{
++	struct ftrace_func_command *p;
++	int ret = 0;
++
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry(p, &ftrace_commands, list) {
++		if (strcmp(cmd->name, p->name) == 0) {
++			ret = -EBUSY;
++			goto out_unlock;
++		}
++	}
++	list_add(&cmd->list, &ftrace_commands);
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
++
++	return ret;
++}
++
++/*
++ * Currently we only unregister ftrace commands from __init, so mark
++ * this __init too.
++ */
++__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
++{
++	struct ftrace_func_command *p, *n;
++	int ret = -ENODEV;
++
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry_safe(p, n, &ftrace_commands, list) {
++		if (strcmp(cmd->name, p->name) == 0) {
++			ret = 0;
++			list_del_init(&p->list);
++			goto out_unlock;
++		}
++	}
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
++
++	return ret;
++}
++
++static int ftrace_process_regex(struct ftrace_iterator *iter,
++				char *buff, int len, int enable)
++{
++	struct ftrace_hash *hash = iter->hash;
++	struct trace_array *tr = iter->ops->private;
++	char *func, *command, *next = buff;
++	struct ftrace_func_command *p;
++	int ret = -EINVAL;
++
++	func = strsep(&next, ":");
++
++	if (!next) {
++		ret = ftrace_match_records(hash, func, len);
++		if (!ret)
++			ret = -EINVAL;
++		if (ret < 0)
++			return ret;
++		return 0;
++	}
++
++	/* command found */
++
++	command = strsep(&next, ":");
++
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry(p, &ftrace_commands, list) {
++		if (strcmp(p->name, command) == 0) {
++			ret = p->func(tr, hash, func, command, next, enable);
++			goto out_unlock;
++		}
++	}
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
++
++	return ret;
++}
++
++static ssize_t
++ftrace_regex_write(struct file *file, const char __user *ubuf,
++		   size_t cnt, loff_t *ppos, int enable)
++{
++	struct ftrace_iterator *iter;
++	struct trace_parser *parser;
++	ssize_t ret, read;
++
++	if (!cnt)
++		return 0;
++
++	if (file->f_mode & FMODE_READ) {
++		struct seq_file *m = file->private_data;
++		iter = m->private;
++	} else
++		iter = file->private_data;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	/* iter->hash is a local copy, so we don't need regex_lock */
++
++	parser = &iter->parser;
++	read = trace_get_user(parser, ubuf, cnt, ppos);
++
++	if (read >= 0 && trace_parser_loaded(parser) &&
++	    !trace_parser_cont(parser)) {
++		ret = ftrace_process_regex(iter, parser->buffer,
++					   parser->idx, enable);
++		trace_parser_clear(parser);
++		if (ret < 0)
++			goto out;
++	}
++
++	ret = read;
++ out:
++	return ret;
++}
++
++ssize_t
++ftrace_filter_write(struct file *file, const char __user *ubuf,
++		    size_t cnt, loff_t *ppos)
++{
++	return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
++}
++
++ssize_t
++ftrace_notrace_write(struct file *file, const char __user *ubuf,
++		     size_t cnt, loff_t *ppos)
++{
++	return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
++}
++
++static int
++ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
++{
++	struct ftrace_func_entry *entry;
++
++	if (!ftrace_location(ip))
++		return -EINVAL;
++
++	if (remove) {
++		entry = ftrace_lookup_ip(hash, ip);
++		if (!entry)
++			return -ENOENT;
++		free_hash_entry(hash, entry);
++		return 0;
++	}
++
++	return add_hash_entry(hash, ip);
++}
++
++static int
++ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
++		unsigned long ip, int remove, int reset, int enable)
++{
++	struct ftrace_hash **orig_hash;
++	struct ftrace_hash *hash;
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	mutex_lock(&ops->func_hash->regex_lock);
++
++	if (enable)
++		orig_hash = &ops->func_hash->filter_hash;
++	else
++		orig_hash = &ops->func_hash->notrace_hash;
++
++	if (reset)
++		hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
++	else
++		hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
++
++	if (!hash) {
++		ret = -ENOMEM;
++		goto out_regex_unlock;
++	}
++
++	if (buf && !ftrace_match_records(hash, buf, len)) {
++		ret = -EINVAL;
++		goto out_regex_unlock;
++	}
++	if (ip) {
++		ret = ftrace_match_addr(hash, ip, remove);
++		if (ret < 0)
++			goto out_regex_unlock;
++	}
++
++	mutex_lock(&ftrace_lock);
++	ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
++	mutex_unlock(&ftrace_lock);
++
++ out_regex_unlock:
++	mutex_unlock(&ops->func_hash->regex_lock);
++
++	free_ftrace_hash(hash);
++	return ret;
++}
++
++static int
++ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
++		int reset, int enable)
++{
++	return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
++}
++
++/**
++ * ftrace_set_filter_ip - set a function to filter on in ftrace by address
++ * @ops - the ops to set the filter with
++ * @ip - the address to add to or remove from the filter.
++ * @remove - non zero to remove the ip from the filter
++ * @reset - non zero to reset all filters before applying this filter.
++ *
++ * Filters denote which functions should be enabled when tracing is enabled
++ * If @ip is NULL, it failes to update filter.
++ */
++int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
++			 int remove, int reset)
++{
++	ftrace_ops_init(ops);
++	return ftrace_set_addr(ops, ip, remove, reset, 1);
++}
++EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
++
++/**
++ * ftrace_ops_set_global_filter - setup ops to use global filters
++ * @ops - the ops which will use the global filters
++ *
++ * ftrace users who need global function trace filtering should call this.
++ * It can set the global filter only if ops were not initialized before.
++ */
++void ftrace_ops_set_global_filter(struct ftrace_ops *ops)
++{
++	if (ops->flags & FTRACE_OPS_FL_INITIALIZED)
++		return;
++
++	ftrace_ops_init(ops);
++	ops->func_hash = &global_ops.local_hash;
++}
++EXPORT_SYMBOL_GPL(ftrace_ops_set_global_filter);
++
++static int
++ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
++		 int reset, int enable)
++{
++	return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable);
++}
++
++/**
++ * ftrace_set_filter - set a function to filter on in ftrace
++ * @ops - the ops to set the filter with
++ * @buf - the string that holds the function filter text.
++ * @len - the length of the string.
++ * @reset - non zero to reset all filters before applying this filter.
++ *
++ * Filters denote which functions should be enabled when tracing is enabled.
++ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
++ */
++int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
++		       int len, int reset)
++{
++	ftrace_ops_init(ops);
++	return ftrace_set_regex(ops, buf, len, reset, 1);
++}
++EXPORT_SYMBOL_GPL(ftrace_set_filter);
++
++/**
++ * ftrace_set_notrace - set a function to not trace in ftrace
++ * @ops - the ops to set the notrace filter with
++ * @buf - the string that holds the function notrace text.
++ * @len - the length of the string.
++ * @reset - non zero to reset all filters before applying this filter.
++ *
++ * Notrace Filters denote which functions should not be enabled when tracing
++ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
++ * for tracing.
++ */
++int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
++			int len, int reset)
++{
++	ftrace_ops_init(ops);
++	return ftrace_set_regex(ops, buf, len, reset, 0);
++}
++EXPORT_SYMBOL_GPL(ftrace_set_notrace);
++/**
++ * ftrace_set_global_filter - set a function to filter on with global tracers
++ * @buf - the string that holds the function filter text.
++ * @len - the length of the string.
++ * @reset - non zero to reset all filters before applying this filter.
++ *
++ * Filters denote which functions should be enabled when tracing is enabled.
++ * If @buf is NULL and reset is set, all functions will be enabled for tracing.
++ */
++void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
++{
++	ftrace_set_regex(&global_ops, buf, len, reset, 1);
++}
++EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
++
++/**
++ * ftrace_set_global_notrace - set a function to not trace with global tracers
++ * @buf - the string that holds the function notrace text.
++ * @len - the length of the string.
++ * @reset - non zero to reset all filters before applying this filter.
++ *
++ * Notrace Filters denote which functions should not be enabled when tracing
++ * is enabled. If @buf is NULL and reset is set, all functions will be enabled
++ * for tracing.
++ */
++void ftrace_set_global_notrace(unsigned char *buf, int len, int reset)
++{
++	ftrace_set_regex(&global_ops, buf, len, reset, 0);
++}
++EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
++
++/*
++ * command line interface to allow users to set filters on boot up.
++ */
++#define FTRACE_FILTER_SIZE		COMMAND_LINE_SIZE
++static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
++static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
++
++/* Used by function selftest to not test if filter is set */
++bool ftrace_filter_param __initdata;
++
++static int __init set_ftrace_notrace(char *str)
++{
++	ftrace_filter_param = true;
++	strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
++	return 1;
++}
++__setup("ftrace_notrace=", set_ftrace_notrace);
++
++static int __init set_ftrace_filter(char *str)
++{
++	ftrace_filter_param = true;
++	strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
++	return 1;
++}
++__setup("ftrace_filter=", set_ftrace_filter);
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
++static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
++static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer);
++
++static int __init set_graph_function(char *str)
++{
++	strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
++	return 1;
++}
++__setup("ftrace_graph_filter=", set_graph_function);
++
++static int __init set_graph_notrace_function(char *str)
++{
++	strlcpy(ftrace_graph_notrace_buf, str, FTRACE_FILTER_SIZE);
++	return 1;
++}
++__setup("ftrace_graph_notrace=", set_graph_notrace_function);
++
++static int __init set_graph_max_depth_function(char *str)
++{
++	if (!str)
++		return 0;
++	fgraph_max_depth = simple_strtoul(str, NULL, 0);
++	return 1;
++}
++__setup("ftrace_graph_max_depth=", set_graph_max_depth_function);
++
++static void __init set_ftrace_early_graph(char *buf, int enable)
++{
++	int ret;
++	char *func;
++	struct ftrace_hash *hash;
++
++	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
++	if (WARN_ON(!hash))
++		return;
++
++	while (buf) {
++		func = strsep(&buf, ",");
++		/* we allow only one expression at a time */
++		ret = ftrace_graph_set_hash(hash, func);
++		if (ret)
++			printk(KERN_DEBUG "ftrace: function %s not "
++					  "traceable\n", func);
++	}
++
++	if (enable)
++		ftrace_graph_hash = hash;
++	else
++		ftrace_graph_notrace_hash = hash;
++}
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++void __init
++ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
++{
++	char *func;
++
++	ftrace_ops_init(ops);
++
++	while (buf) {
++		func = strsep(&buf, ",");
++		ftrace_set_regex(ops, func, strlen(func), 0, enable);
++	}
++}
++
++static void __init set_ftrace_early_filters(void)
++{
++	if (ftrace_filter_buf[0])
++		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
++	if (ftrace_notrace_buf[0])
++		ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	if (ftrace_graph_buf[0])
++		set_ftrace_early_graph(ftrace_graph_buf, 1);
++	if (ftrace_graph_notrace_buf[0])
++		set_ftrace_early_graph(ftrace_graph_notrace_buf, 0);
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++}
++
++int ftrace_regex_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *m = (struct seq_file *)file->private_data;
++	struct ftrace_iterator *iter;
++	struct ftrace_hash **orig_hash;
++	struct trace_parser *parser;
++	int filter_hash;
++	int ret;
++
++	if (file->f_mode & FMODE_READ) {
++		iter = m->private;
++		seq_release(inode, file);
++	} else
++		iter = file->private_data;
++
++	parser = &iter->parser;
++	if (trace_parser_loaded(parser)) {
++		ftrace_match_records(iter->hash, parser->buffer, parser->idx);
++	}
++
++	trace_parser_put(parser);
++
++	mutex_lock(&iter->ops->func_hash->regex_lock);
++
++	if (file->f_mode & FMODE_WRITE) {
++		filter_hash = !!(iter->flags & FTRACE_ITER_FILTER);
++
++		if (filter_hash) {
++			orig_hash = &iter->ops->func_hash->filter_hash;
++			if (iter->tr && !list_empty(&iter->tr->mod_trace))
++				iter->hash->flags |= FTRACE_HASH_FL_MOD;
++		} else
++			orig_hash = &iter->ops->func_hash->notrace_hash;
++
++		mutex_lock(&ftrace_lock);
++		ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
++						      iter->hash, filter_hash);
++		mutex_unlock(&ftrace_lock);
++	} else {
++		/* For read only, the hash is the ops hash */
++		iter->hash = NULL;
++	}
++
++	mutex_unlock(&iter->ops->func_hash->regex_lock);
++	free_ftrace_hash(iter->hash);
++	if (iter->tr)
++		trace_array_put(iter->tr);
++	kfree(iter);
++
++	return 0;
++}
++
++static const struct file_operations ftrace_avail_fops = {
++	.open = ftrace_avail_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release_private,
++};
++
++static const struct file_operations ftrace_enabled_fops = {
++	.open = ftrace_enabled_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release_private,
++};
++
++static const struct file_operations ftrace_filter_fops = {
++	.open = ftrace_filter_open,
++	.read = seq_read,
++	.write = ftrace_filter_write,
++	.llseek = tracing_lseek,
++	.release = ftrace_regex_release,
++};
++
++static const struct file_operations ftrace_notrace_fops = {
++	.open = ftrace_notrace_open,
++	.read = seq_read,
++	.write = ftrace_notrace_write,
++	.llseek = tracing_lseek,
++	.release = ftrace_regex_release,
++};
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++
++static DEFINE_MUTEX(graph_lock);
++
++struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH;
++struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH;
++
++enum graph_filter_type {
++	GRAPH_FILTER_NOTRACE	= 0,
++	GRAPH_FILTER_FUNCTION,
++};
++
++#define FTRACE_GRAPH_EMPTY	((void *)1)
++
++struct ftrace_graph_data {
++	struct ftrace_hash		*hash;
++	struct ftrace_func_entry	*entry;
++	int				idx;   /* for hash table iteration */
++	enum graph_filter_type		type;
++	struct ftrace_hash		*new_hash;
++	const struct seq_operations	*seq_ops;
++	struct trace_parser		parser;
++};
++
++static void *
++__g_next(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_graph_data *fgd = m->private;
++	struct ftrace_func_entry *entry = fgd->entry;
++	struct hlist_head *head;
++	int i, idx = fgd->idx;
++
++	if (*pos >= fgd->hash->count)
++		return NULL;
++
++	if (entry) {
++		hlist_for_each_entry_continue(entry, hlist) {
++			fgd->entry = entry;
++			return entry;
++		}
++
++		idx++;
++	}
++
++	for (i = idx; i < 1 << fgd->hash->size_bits; i++) {
++		head = &fgd->hash->buckets[i];
++		hlist_for_each_entry(entry, head, hlist) {
++			fgd->entry = entry;
++			fgd->idx = i;
++			return entry;
++		}
++	}
++	return NULL;
++}
++
++static void *
++g_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	(*pos)++;
++	return __g_next(m, pos);
++}
++
++static void *g_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_graph_data *fgd = m->private;
++
++	mutex_lock(&graph_lock);
++
++	if (fgd->type == GRAPH_FILTER_FUNCTION)
++		fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
++					lockdep_is_held(&graph_lock));
++	else
++		fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
++					lockdep_is_held(&graph_lock));
++
++	/* Nothing, tell g_show to print all functions are enabled */
++	if (ftrace_hash_empty(fgd->hash) && !*pos)
++		return FTRACE_GRAPH_EMPTY;
++
++	fgd->idx = 0;
++	fgd->entry = NULL;
++	return __g_next(m, pos);
++}
++
++static void g_stop(struct seq_file *m, void *p)
++{
++	mutex_unlock(&graph_lock);
++}
++
++static int g_show(struct seq_file *m, void *v)
++{
++	struct ftrace_func_entry *entry = v;
++
++	if (!entry)
++		return 0;
++
++	if (entry == FTRACE_GRAPH_EMPTY) {
++		struct ftrace_graph_data *fgd = m->private;
++
++		if (fgd->type == GRAPH_FILTER_FUNCTION)
++			seq_puts(m, "#### all functions enabled ####\n");
++		else
++			seq_puts(m, "#### no functions disabled ####\n");
++		return 0;
++	}
++
++	seq_printf(m, "%ps\n", (void *)entry->ip);
++
++	return 0;
++}
++
++static const struct seq_operations ftrace_graph_seq_ops = {
++	.start = g_start,
++	.next = g_next,
++	.stop = g_stop,
++	.show = g_show,
++};
++
++static int
++__ftrace_graph_open(struct inode *inode, struct file *file,
++		    struct ftrace_graph_data *fgd)
++{
++	int ret = 0;
++	struct ftrace_hash *new_hash = NULL;
++
++	if (file->f_mode & FMODE_WRITE) {
++		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
++
++		if (trace_parser_get_init(&fgd->parser, FTRACE_BUFF_MAX))
++			return -ENOMEM;
++
++		if (file->f_flags & O_TRUNC)
++			new_hash = alloc_ftrace_hash(size_bits);
++		else
++			new_hash = alloc_and_copy_ftrace_hash(size_bits,
++							      fgd->hash);
++		if (!new_hash) {
++			ret = -ENOMEM;
++			goto out;
++		}
++	}
++
++	if (file->f_mode & FMODE_READ) {
++		ret = seq_open(file, &ftrace_graph_seq_ops);
++		if (!ret) {
++			struct seq_file *m = file->private_data;
++			m->private = fgd;
++		} else {
++			/* Failed */
++			free_ftrace_hash(new_hash);
++			new_hash = NULL;
++		}
++	} else
++		file->private_data = fgd;
++
++out:
++	if (ret < 0 && file->f_mode & FMODE_WRITE)
++		trace_parser_put(&fgd->parser);
++
++	fgd->new_hash = new_hash;
++
++	/*
++	 * All uses of fgd->hash must be taken with the graph_lock
++	 * held. The graph_lock is going to be released, so force
++	 * fgd->hash to be reinitialized when it is taken again.
++	 */
++	fgd->hash = NULL;
++
++	return ret;
++}
++
++static int
++ftrace_graph_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_graph_data *fgd;
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
++	if (fgd == NULL)
++		return -ENOMEM;
++
++	mutex_lock(&graph_lock);
++
++	fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
++					lockdep_is_held(&graph_lock));
++	fgd->type = GRAPH_FILTER_FUNCTION;
++	fgd->seq_ops = &ftrace_graph_seq_ops;
++
++	ret = __ftrace_graph_open(inode, file, fgd);
++	if (ret < 0)
++		kfree(fgd);
++
++	mutex_unlock(&graph_lock);
++	return ret;
++}
++
++static int
++ftrace_graph_notrace_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_graph_data *fgd;
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	fgd = kmalloc(sizeof(*fgd), GFP_KERNEL);
++	if (fgd == NULL)
++		return -ENOMEM;
++
++	mutex_lock(&graph_lock);
++
++	fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
++					lockdep_is_held(&graph_lock));
++	fgd->type = GRAPH_FILTER_NOTRACE;
++	fgd->seq_ops = &ftrace_graph_seq_ops;
++
++	ret = __ftrace_graph_open(inode, file, fgd);
++	if (ret < 0)
++		kfree(fgd);
++
++	mutex_unlock(&graph_lock);
++	return ret;
++}
++
++static int
++ftrace_graph_release(struct inode *inode, struct file *file)
++{
++	struct ftrace_graph_data *fgd;
++	struct ftrace_hash *old_hash, *new_hash;
++	struct trace_parser *parser;
++	int ret = 0;
++
++	if (file->f_mode & FMODE_READ) {
++		struct seq_file *m = file->private_data;
++
++		fgd = m->private;
++		seq_release(inode, file);
++	} else {
++		fgd = file->private_data;
++	}
++
++
++	if (file->f_mode & FMODE_WRITE) {
++
++		parser = &fgd->parser;
++
++		if (trace_parser_loaded((parser))) {
++			ret = ftrace_graph_set_hash(fgd->new_hash,
++						    parser->buffer);
++		}
++
++		trace_parser_put(parser);
++
++		new_hash = __ftrace_hash_move(fgd->new_hash);
++		if (!new_hash) {
++			ret = -ENOMEM;
++			goto out;
++		}
++
++		mutex_lock(&graph_lock);
++
++		if (fgd->type == GRAPH_FILTER_FUNCTION) {
++			old_hash = rcu_dereference_protected(ftrace_graph_hash,
++					lockdep_is_held(&graph_lock));
++			rcu_assign_pointer(ftrace_graph_hash, new_hash);
++		} else {
++			old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
++					lockdep_is_held(&graph_lock));
++			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
++		}
++
++		mutex_unlock(&graph_lock);
++
++		/* Wait till all users are no longer using the old hash */
++		synchronize_sched();
++
++		free_ftrace_hash(old_hash);
++	}
++
++ out:
++	free_ftrace_hash(fgd->new_hash);
++	kfree(fgd);
++
++	return ret;
++}
++
++static int
++ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
++{
++	struct ftrace_glob func_g;
++	struct dyn_ftrace *rec;
++	struct ftrace_page *pg;
++	struct ftrace_func_entry *entry;
++	int fail = 1;
++	int not;
++
++	/* decode regex */
++	func_g.type = filter_parse_regex(buffer, strlen(buffer),
++					 &func_g.search, &not);
++
++	func_g.len = strlen(func_g.search);
++
++	mutex_lock(&ftrace_lock);
++
++	if (unlikely(ftrace_disabled)) {
++		mutex_unlock(&ftrace_lock);
++		return -ENODEV;
++	}
++
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_DISABLED)
++			continue;
++
++		if (ftrace_match_record(rec, &func_g, NULL, 0)) {
++			entry = ftrace_lookup_ip(hash, rec->ip);
++
++			if (!not) {
++				fail = 0;
++
++				if (entry)
++					continue;
++				if (add_hash_entry(hash, rec->ip) < 0)
++					goto out;
++			} else {
++				if (entry) {
++					free_hash_entry(hash, entry);
++					fail = 0;
++				}
++			}
++		}
++	} while_for_each_ftrace_rec();
++out:
++	mutex_unlock(&ftrace_lock);
++
++	if (fail)
++		return -EINVAL;
++
++	return 0;
++}
++
++static ssize_t
++ftrace_graph_write(struct file *file, const char __user *ubuf,
++		   size_t cnt, loff_t *ppos)
++{
++	ssize_t read, ret = 0;
++	struct ftrace_graph_data *fgd = file->private_data;
++	struct trace_parser *parser;
++
++	if (!cnt)
++		return 0;
++
++	/* Read mode uses seq functions */
++	if (file->f_mode & FMODE_READ) {
++		struct seq_file *m = file->private_data;
++		fgd = m->private;
++	}
++
++	parser = &fgd->parser;
++
++	read = trace_get_user(parser, ubuf, cnt, ppos);
++
++	if (read >= 0 && trace_parser_loaded(parser) &&
++	    !trace_parser_cont(parser)) {
++
++		ret = ftrace_graph_set_hash(fgd->new_hash,
++					    parser->buffer);
++		trace_parser_clear(parser);
++	}
++
++	if (!ret)
++		ret = read;
++
++	return ret;
++}
++
++static const struct file_operations ftrace_graph_fops = {
++	.open		= ftrace_graph_open,
++	.read		= seq_read,
++	.write		= ftrace_graph_write,
++	.llseek		= tracing_lseek,
++	.release	= ftrace_graph_release,
++};
++
++static const struct file_operations ftrace_graph_notrace_fops = {
++	.open		= ftrace_graph_notrace_open,
++	.read		= seq_read,
++	.write		= ftrace_graph_write,
++	.llseek		= tracing_lseek,
++	.release	= ftrace_graph_release,
++};
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++void ftrace_create_filter_files(struct ftrace_ops *ops,
++				struct dentry *parent)
++{
++
++	trace_create_file("set_ftrace_filter", 0644, parent,
++			  ops, &ftrace_filter_fops);
++
++	trace_create_file("set_ftrace_notrace", 0644, parent,
++			  ops, &ftrace_notrace_fops);
++}
++
++/*
++ * The name "destroy_filter_files" is really a misnomer. Although
++ * in the future, it may actualy delete the files, but this is
++ * really intended to make sure the ops passed in are disabled
++ * and that when this function returns, the caller is free to
++ * free the ops.
++ *
++ * The "destroy" name is only to match the "create" name that this
++ * should be paired with.
++ */
++void ftrace_destroy_filter_files(struct ftrace_ops *ops)
++{
++	mutex_lock(&ftrace_lock);
++	if (ops->flags & FTRACE_OPS_FL_ENABLED)
++		ftrace_shutdown(ops, 0);
++	ops->flags |= FTRACE_OPS_FL_DELETED;
++	ftrace_free_filter(ops);
++	mutex_unlock(&ftrace_lock);
++}
++
++static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
++{
++
++	trace_create_file("available_filter_functions", 0444,
++			d_tracer, NULL, &ftrace_avail_fops);
++
++	trace_create_file("enabled_functions", 0444,
++			d_tracer, NULL, &ftrace_enabled_fops);
++
++	ftrace_create_filter_files(&global_ops, d_tracer);
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	trace_create_file("set_graph_function", 0644, d_tracer,
++				    NULL,
++				    &ftrace_graph_fops);
++	trace_create_file("set_graph_notrace", 0644, d_tracer,
++				    NULL,
++				    &ftrace_graph_notrace_fops);
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++	return 0;
++}
++
++static int ftrace_cmp_ips(const void *a, const void *b)
++{
++	const unsigned long *ipa = a;
++	const unsigned long *ipb = b;
++
++	if (*ipa > *ipb)
++		return 1;
++	if (*ipa < *ipb)
++		return -1;
++	return 0;
++}
++
++static int ftrace_process_locs(struct module *mod,
++			       unsigned long *start,
++			       unsigned long *end)
++{
++	struct ftrace_page *start_pg;
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	unsigned long count;
++	unsigned long *p;
++	unsigned long addr;
++	unsigned long flags = 0; /* Shut up gcc */
++	int ret = -ENOMEM;
++
++	count = end - start;
++
++	if (!count)
++		return 0;
++
++	sort(start, count, sizeof(*start),
++	     ftrace_cmp_ips, NULL);
++
++	start_pg = ftrace_allocate_pages(count);
++	if (!start_pg)
++		return -ENOMEM;
++
++	mutex_lock(&ftrace_lock);
++
++	/*
++	 * Core and each module needs their own pages, as
++	 * modules will free them when they are removed.
++	 * Force a new page to be allocated for modules.
++	 */
++	if (!mod) {
++		WARN_ON(ftrace_pages || ftrace_pages_start);
++		/* First initialization */
++		ftrace_pages = ftrace_pages_start = start_pg;
++	} else {
++		if (!ftrace_pages)
++			goto out;
++
++		if (WARN_ON(ftrace_pages->next)) {
++			/* Hmm, we have free pages? */
++			while (ftrace_pages->next)
++				ftrace_pages = ftrace_pages->next;
++		}
++
++		ftrace_pages->next = start_pg;
++	}
++
++	p = start;
++	pg = start_pg;
++	while (p < end) {
++		addr = ftrace_call_adjust(*p++);
++		/*
++		 * Some architecture linkers will pad between
++		 * the different mcount_loc sections of different
++		 * object files to satisfy alignments.
++		 * Skip any NULL pointers.
++		 */
++		if (!addr)
++			continue;
++
++		if (pg->index == pg->size) {
++			/* We should have allocated enough */
++			if (WARN_ON(!pg->next))
++				break;
++			pg = pg->next;
++		}
++
++		rec = &pg->records[pg->index++];
++		rec->ip = addr;
++	}
++
++	/* We should have used all pages */
++	WARN_ON(pg->next);
++
++	/* Assign the last page to ftrace_pages */
++	ftrace_pages = pg;
++
++	/*
++	 * We only need to disable interrupts on start up
++	 * because we are modifying code that an interrupt
++	 * may execute, and the modification is not atomic.
++	 * But for modules, nothing runs the code we modify
++	 * until we are finished with it, and there's no
++	 * reason to cause large interrupt latencies while we do it.
++	 */
++	if (!mod)
++		local_irq_save(flags);
++	ftrace_update_code(mod, start_pg);
++	if (!mod)
++		local_irq_restore(flags);
++	ret = 0;
++ out:
++	mutex_unlock(&ftrace_lock);
++
++	return ret;
++}
++
++struct ftrace_mod_func {
++	struct list_head	list;
++	char			*name;
++	unsigned long		ip;
++	unsigned int		size;
++};
++
++struct ftrace_mod_map {
++	struct rcu_head		rcu;
++	struct list_head	list;
++	struct module		*mod;
++	unsigned long		start_addr;
++	unsigned long		end_addr;
++	struct list_head	funcs;
++	unsigned int		num_funcs;
++};
++
++#ifdef CONFIG_MODULES
++
++#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
++
++static LIST_HEAD(ftrace_mod_maps);
++
++static int referenced_filters(struct dyn_ftrace *rec)
++{
++	struct ftrace_ops *ops;
++	int cnt = 0;
++
++	for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
++		if (ops_references_rec(ops, rec)) {
++			cnt++;
++			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
++				rec->flags |= FTRACE_FL_REGS;
++		}
++	}
++
++	return cnt;
++}
++
++static void
++clear_mod_from_hash(struct ftrace_page *pg, struct ftrace_hash *hash)
++{
++	struct ftrace_func_entry *entry;
++	struct dyn_ftrace *rec;
++	int i;
++
++	if (ftrace_hash_empty(hash))
++		return;
++
++	for (i = 0; i < pg->index; i++) {
++		rec = &pg->records[i];
++		entry = __ftrace_lookup_ip(hash, rec->ip);
++		/*
++		 * Do not allow this rec to match again.
++		 * Yeah, it may waste some memory, but will be removed
++		 * if/when the hash is modified again.
++		 */
++		if (entry)
++			entry->ip = 0;
++	}
++}
++
++/* Clear any records from hashs */
++static void clear_mod_from_hashes(struct ftrace_page *pg)
++{
++	struct trace_array *tr;
++
++	mutex_lock(&trace_types_lock);
++	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
++		if (!tr->ops || !tr->ops->func_hash)
++			continue;
++		mutex_lock(&tr->ops->func_hash->regex_lock);
++		clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash);
++		clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash);
++		mutex_unlock(&tr->ops->func_hash->regex_lock);
++	}
++	mutex_unlock(&trace_types_lock);
++}
++
++static void ftrace_free_mod_map(struct rcu_head *rcu)
++{
++	struct ftrace_mod_map *mod_map = container_of(rcu, struct ftrace_mod_map, rcu);
++	struct ftrace_mod_func *mod_func;
++	struct ftrace_mod_func *n;
++
++	/* All the contents of mod_map are now not visible to readers */
++	list_for_each_entry_safe(mod_func, n, &mod_map->funcs, list) {
++		kfree(mod_func->name);
++		list_del(&mod_func->list);
++		kfree(mod_func);
++	}
++
++	kfree(mod_map);
++}
++
++void ftrace_release_mod(struct module *mod)
++{
++	struct ftrace_mod_map *mod_map;
++	struct ftrace_mod_map *n;
++	struct dyn_ftrace *rec;
++	struct ftrace_page **last_pg;
++	struct ftrace_page *tmp_page = NULL;
++	struct ftrace_page *pg;
++	int order;
++
++	mutex_lock(&ftrace_lock);
++
++	if (ftrace_disabled)
++		goto out_unlock;
++
++	list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
++		if (mod_map->mod == mod) {
++			list_del_rcu(&mod_map->list);
++			call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map);
++			break;
++		}
++	}
++
++	/*
++	 * Each module has its own ftrace_pages, remove
++	 * them from the list.
++	 */
++	last_pg = &ftrace_pages_start;
++	for (pg = ftrace_pages_start; pg; pg = *last_pg) {
++		rec = &pg->records[0];
++		if (within_module_core(rec->ip, mod) ||
++		    within_module_init(rec->ip, mod)) {
++			/*
++			 * As core pages are first, the first
++			 * page should never be a module page.
++			 */
++			if (WARN_ON(pg == ftrace_pages_start))
++				goto out_unlock;
++
++			/* Check if we are deleting the last page */
++			if (pg == ftrace_pages)
++				ftrace_pages = next_to_ftrace_page(last_pg);
++
++			ftrace_update_tot_cnt -= pg->index;
++			*last_pg = pg->next;
++
++			pg->next = tmp_page;
++			tmp_page = pg;
++		} else
++			last_pg = &pg->next;
++	}
++ out_unlock:
++	mutex_unlock(&ftrace_lock);
++
++	for (pg = tmp_page; pg; pg = tmp_page) {
++
++		/* Needs to be called outside of ftrace_lock */
++		clear_mod_from_hashes(pg);
++
++		order = get_count_order(pg->size / ENTRIES_PER_PAGE);
++		free_pages((unsigned long)pg->records, order);
++		tmp_page = pg->next;
++		kfree(pg);
++	}
++}
++
++void ftrace_module_enable(struct module *mod)
++{
++	struct dyn_ftrace *rec;
++	struct ftrace_page *pg;
++
++	mutex_lock(&ftrace_lock);
++
++	if (ftrace_disabled)
++		goto out_unlock;
++
++	/*
++	 * If the tracing is enabled, go ahead and enable the record.
++	 *
++	 * The reason not to enable the record immediatelly is the
++	 * inherent check of ftrace_make_nop/ftrace_make_call for
++	 * correct previous instructions.  Making first the NOP
++	 * conversion puts the module to the correct state, thus
++	 * passing the ftrace_make_call check.
++	 *
++	 * We also delay this to after the module code already set the
++	 * text to read-only, as we now need to set it back to read-write
++	 * so that we can modify the text.
++	 */
++	if (ftrace_start_up)
++		ftrace_arch_code_modify_prepare();
++
++	do_for_each_ftrace_rec(pg, rec) {
++		int cnt;
++		/*
++		 * do_for_each_ftrace_rec() is a double loop.
++		 * module text shares the pg. If a record is
++		 * not part of this module, then skip this pg,
++		 * which the "break" will do.
++		 */
++		if (!within_module_core(rec->ip, mod) &&
++		    !within_module_init(rec->ip, mod))
++			break;
++
++		cnt = 0;
++
++		/*
++		 * When adding a module, we need to check if tracers are
++		 * currently enabled and if they are, and can trace this record,
++		 * we need to enable the module functions as well as update the
++		 * reference counts for those function records.
++		 */
++		if (ftrace_start_up)
++			cnt += referenced_filters(rec);
++
++		rec->flags &= ~FTRACE_FL_DISABLED;
++		rec->flags += cnt;
++
++		if (ftrace_start_up && cnt) {
++			int failed = __ftrace_replace_code(rec, 1);
++			if (failed) {
++				ftrace_bug(failed, rec);
++				goto out_loop;
++			}
++		}
++
++	} while_for_each_ftrace_rec();
++
++ out_loop:
++	if (ftrace_start_up)
++		ftrace_arch_code_modify_post_process();
++
++ out_unlock:
++	mutex_unlock(&ftrace_lock);
++
++	process_cached_mods(mod->name);
++}
++
++void ftrace_module_init(struct module *mod)
++{
++	if (ftrace_disabled || !mod->num_ftrace_callsites)
++		return;
++
++	ftrace_process_locs(mod, mod->ftrace_callsites,
++			    mod->ftrace_callsites + mod->num_ftrace_callsites);
++}
++
++static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map,
++				struct dyn_ftrace *rec)
++{
++	struct ftrace_mod_func *mod_func;
++	unsigned long symsize;
++	unsigned long offset;
++	char str[KSYM_SYMBOL_LEN];
++	char *modname;
++	const char *ret;
++
++	ret = kallsyms_lookup(rec->ip, &symsize, &offset, &modname, str);
++	if (!ret)
++		return;
++
++	mod_func = kmalloc(sizeof(*mod_func), GFP_KERNEL);
++	if (!mod_func)
++		return;
++
++	mod_func->name = kstrdup(str, GFP_KERNEL);
++	if (!mod_func->name) {
++		kfree(mod_func);
++		return;
++	}
++
++	mod_func->ip = rec->ip - offset;
++	mod_func->size = symsize;
++
++	mod_map->num_funcs++;
++
++	list_add_rcu(&mod_func->list, &mod_map->funcs);
++}
++
++static struct ftrace_mod_map *
++allocate_ftrace_mod_map(struct module *mod,
++			unsigned long start, unsigned long end)
++{
++	struct ftrace_mod_map *mod_map;
++
++	mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL);
++	if (!mod_map)
++		return NULL;
++
++	mod_map->mod = mod;
++	mod_map->start_addr = start;
++	mod_map->end_addr = end;
++	mod_map->num_funcs = 0;
++
++	INIT_LIST_HEAD_RCU(&mod_map->funcs);
++
++	list_add_rcu(&mod_map->list, &ftrace_mod_maps);
++
++	return mod_map;
++}
++
++static const char *
++ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
++			   unsigned long addr, unsigned long *size,
++			   unsigned long *off, char *sym)
++{
++	struct ftrace_mod_func *found_func =  NULL;
++	struct ftrace_mod_func *mod_func;
++
++	list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) {
++		if (addr >= mod_func->ip &&
++		    addr < mod_func->ip + mod_func->size) {
++			found_func = mod_func;
++			break;
++		}
++	}
++
++	if (found_func) {
++		if (size)
++			*size = found_func->size;
++		if (off)
++			*off = addr - found_func->ip;
++		if (sym)
++			strlcpy(sym, found_func->name, KSYM_NAME_LEN);
++
++		return found_func->name;
++	}
++
++	return NULL;
++}
++
++const char *
++ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
++		   unsigned long *off, char **modname, char *sym)
++{
++	struct ftrace_mod_map *mod_map;
++	const char *ret = NULL;
++
++	/* mod_map is freed via call_rcu_sched() */
++	preempt_disable();
++	list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
++		ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym);
++		if (ret) {
++			if (modname)
++				*modname = mod_map->mod->name;
++			break;
++		}
++	}
++	preempt_enable();
++
++	return ret;
++}
++
++int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value,
++			   char *type, char *name,
++			   char *module_name, int *exported)
++{
++	struct ftrace_mod_map *mod_map;
++	struct ftrace_mod_func *mod_func;
++
++	preempt_disable();
++	list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
++
++		if (symnum >= mod_map->num_funcs) {
++			symnum -= mod_map->num_funcs;
++			continue;
++		}
++
++		list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) {
++			if (symnum > 1) {
++				symnum--;
++				continue;
++			}
++
++			*value = mod_func->ip;
++			*type = 'T';
++			strlcpy(name, mod_func->name, KSYM_NAME_LEN);
++			strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN);
++			*exported = 1;
++			preempt_enable();
++			return 0;
++		}
++		WARN_ON(1);
++		break;
++	}
++	preempt_enable();
++	return -ERANGE;
++}
++
++#else
++static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map,
++				struct dyn_ftrace *rec) { }
++static inline struct ftrace_mod_map *
++allocate_ftrace_mod_map(struct module *mod,
++			unsigned long start, unsigned long end)
++{
++	return NULL;
++}
++#endif /* CONFIG_MODULES */
++
++struct ftrace_init_func {
++	struct list_head list;
++	unsigned long ip;
++};
++
++/* Clear any init ips from hashes */
++static void
++clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash)
++{
++	struct ftrace_func_entry *entry;
++
++	if (ftrace_hash_empty(hash))
++		return;
++
++	entry = __ftrace_lookup_ip(hash, func->ip);
++
++	/*
++	 * Do not allow this rec to match again.
++	 * Yeah, it may waste some memory, but will be removed
++	 * if/when the hash is modified again.
++	 */
++	if (entry)
++		entry->ip = 0;
++}
++
++static void
++clear_func_from_hashes(struct ftrace_init_func *func)
++{
++	struct trace_array *tr;
++
++	mutex_lock(&trace_types_lock);
++	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
++		if (!tr->ops || !tr->ops->func_hash)
++			continue;
++		mutex_lock(&tr->ops->func_hash->regex_lock);
++		clear_func_from_hash(func, tr->ops->func_hash->filter_hash);
++		clear_func_from_hash(func, tr->ops->func_hash->notrace_hash);
++		mutex_unlock(&tr->ops->func_hash->regex_lock);
++	}
++	mutex_unlock(&trace_types_lock);
++}
++
++static void add_to_clear_hash_list(struct list_head *clear_list,
++				   struct dyn_ftrace *rec)
++{
++	struct ftrace_init_func *func;
++
++	func = kmalloc(sizeof(*func), GFP_KERNEL);
++	if (!func) {
++		WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n");
++		return;
++	}
++
++	func->ip = rec->ip;
++	list_add(&func->list, clear_list);
++}
++
++void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
++{
++	unsigned long start = (unsigned long)(start_ptr);
++	unsigned long end = (unsigned long)(end_ptr);
++	struct ftrace_page **last_pg = &ftrace_pages_start;
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	struct dyn_ftrace key;
++	struct ftrace_mod_map *mod_map = NULL;
++	struct ftrace_init_func *func, *func_next;
++	struct list_head clear_hash;
++	int order;
++
++	INIT_LIST_HEAD(&clear_hash);
++
++	key.ip = start;
++	key.flags = end;	/* overload flags, as it is unsigned long */
++
++	mutex_lock(&ftrace_lock);
++
++	/*
++	 * If we are freeing module init memory, then check if
++	 * any tracer is active. If so, we need to save a mapping of
++	 * the module functions being freed with the address.
++	 */
++	if (mod && ftrace_ops_list != &ftrace_list_end)
++		mod_map = allocate_ftrace_mod_map(mod, start, end);
++
++	for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) {
++		if (end < pg->records[0].ip ||
++		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
++			continue;
++ again:
++		rec = bsearch(&key, pg->records, pg->index,
++			      sizeof(struct dyn_ftrace),
++			      ftrace_cmp_recs);
++		if (!rec)
++			continue;
++
++		/* rec will be cleared from hashes after ftrace_lock unlock */
++		add_to_clear_hash_list(&clear_hash, rec);
++
++		if (mod_map)
++			save_ftrace_mod_rec(mod_map, rec);
++
++		pg->index--;
++		ftrace_update_tot_cnt--;
++		if (!pg->index) {
++			*last_pg = pg->next;
++			order = get_count_order(pg->size / ENTRIES_PER_PAGE);
++			free_pages((unsigned long)pg->records, order);
++			kfree(pg);
++			pg = container_of(last_pg, struct ftrace_page, next);
++			if (!(*last_pg))
++				ftrace_pages = pg;
++			continue;
++		}
++		memmove(rec, rec + 1,
++			(pg->index - (rec - pg->records)) * sizeof(*rec));
++		/* More than one function may be in this block */
++		goto again;
++	}
++	mutex_unlock(&ftrace_lock);
++
++	list_for_each_entry_safe(func, func_next, &clear_hash, list) {
++		clear_func_from_hashes(func);
++		kfree(func);
++	}
++}
++
++void __init ftrace_free_init_mem(void)
++{
++	void *start = (void *)(&__init_begin);
++	void *end = (void *)(&__init_end);
++
++	ftrace_free_mem(NULL, start, end);
++}
++
++void __init ftrace_init(void)
++{
++	extern unsigned long __start_mcount_loc[];
++	extern unsigned long __stop_mcount_loc[];
++	unsigned long count, flags;
++	int ret;
++
++	local_irq_save(flags);
++	ret = ftrace_dyn_arch_init();
++	local_irq_restore(flags);
++	if (ret)
++		goto failed;
++
++	count = __stop_mcount_loc - __start_mcount_loc;
++	if (!count) {
++		pr_info("ftrace: No functions to be traced?\n");
++		goto failed;
++	}
++
++	pr_info("ftrace: allocating %ld entries in %ld pages\n",
++		count, count / ENTRIES_PER_PAGE + 1);
++
++	last_ftrace_enabled = ftrace_enabled = 1;
++
++	ret = ftrace_process_locs(NULL,
++				  __start_mcount_loc,
++				  __stop_mcount_loc);
++
++	set_ftrace_early_filters();
++
++	return;
++ failed:
++	ftrace_disabled = 1;
++}
++
++/* Do nothing if arch does not support this */
++void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
++{
++}
++
++static void ftrace_update_trampoline(struct ftrace_ops *ops)
++{
++	arch_ftrace_update_trampoline(ops);
++}
++
++void ftrace_init_trace_array(struct trace_array *tr)
++{
++	INIT_LIST_HEAD(&tr->func_probes);
++	INIT_LIST_HEAD(&tr->mod_trace);
++	INIT_LIST_HEAD(&tr->mod_notrace);
++}
++#else
++
++static struct ftrace_ops global_ops = {
++	.func			= ftrace_stub,
++	.flags			= FTRACE_OPS_FL_RECURSION_SAFE |
++				  FTRACE_OPS_FL_INITIALIZED |
++				  FTRACE_OPS_FL_PID,
++};
++
++static int __init ftrace_nodyn_init(void)
++{
++	ftrace_enabled = 1;
++	return 0;
++}
++core_initcall(ftrace_nodyn_init);
++
++static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
++static inline void ftrace_startup_enable(int command) { }
++static inline void ftrace_startup_all(int command) { }
++/* Keep as macros so we do not need to define the commands */
++# define ftrace_startup(ops, command)					\
++	({								\
++		int ___ret = __register_ftrace_function(ops);		\
++		if (!___ret)						\
++			(ops)->flags |= FTRACE_OPS_FL_ENABLED;		\
++		___ret;							\
++	})
++# define ftrace_shutdown(ops, command)					\
++	({								\
++		int ___ret = __unregister_ftrace_function(ops);		\
++		if (!___ret)						\
++			(ops)->flags &= ~FTRACE_OPS_FL_ENABLED;		\
++		___ret;							\
++	})
++
++# define ftrace_startup_sysctl()	do { } while (0)
++# define ftrace_shutdown_sysctl()	do { } while (0)
++
++static inline int
++ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
++{
++	return 1;
++}
++
++static void ftrace_update_trampoline(struct ftrace_ops *ops)
++{
++}
++
++#endif /* CONFIG_DYNAMIC_FTRACE */
++
++__init void ftrace_init_global_array_ops(struct trace_array *tr)
++{
++	tr->ops = &global_ops;
++	tr->ops->private = tr;
++	ftrace_init_trace_array(tr);
++}
++
++void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
++{
++	/* If we filter on pids, update to use the pid function */
++	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
++		if (WARN_ON(tr->ops->func != ftrace_stub))
++			printk("ftrace ops had %pS for function\n",
++			       tr->ops->func);
++	}
++	tr->ops->func = func;
++	tr->ops->private = tr;
++}
++
++void ftrace_reset_array_ops(struct trace_array *tr)
++{
++	tr->ops->func = ftrace_stub;
++}
++
++static nokprobe_inline void
++__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
++		       struct ftrace_ops *ignored, struct pt_regs *regs)
++{
++	struct ftrace_ops *op;
++	int bit;
++
++	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
++	if (bit < 0)
++		return;
++
++	/*
++	 * Some of the ops may be dynamically allocated,
++	 * they must be freed after a synchronize_sched().
++	 */
++	preempt_disable_notrace();
++
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		/*
++		 * Check the following for each ops before calling their func:
++		 *  if RCU flag is set, then rcu_is_watching() must be true
++		 *  if PER_CPU is set, then ftrace_function_local_disable()
++		 *                          must be false
++		 *  Otherwise test if the ip matches the ops filter
++		 *
++		 * If any of the above fails then the op->func() is not executed.
++		 */
++		if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) &&
++		    ftrace_ops_test(op, ip, regs)) {
++			if (FTRACE_WARN_ON(!op->func)) {
++				pr_warn("op=%p %pS\n", op, op);
++				goto out;
++			}
++			op->func(ip, parent_ip, op, regs);
++		}
++	} while_for_each_ftrace_op(op);
++out:
++	preempt_enable_notrace();
++	trace_clear_recursion(bit);
++}
++
++/*
++ * Some archs only support passing ip and parent_ip. Even though
++ * the list function ignores the op parameter, we do not want any
++ * C side effects, where a function is called without the caller
++ * sending a third parameter.
++ * Archs are to support both the regs and ftrace_ops at the same time.
++ * If they support ftrace_ops, it is assumed they support regs.
++ * If call backs want to use regs, they must either check for regs
++ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
++ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
++ * An architecture can pass partial regs with ftrace_ops and still
++ * set the ARCH_SUPPORTS_FTRACE_OPS.
++ */
++#if ARCH_SUPPORTS_FTRACE_OPS
++static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
++				 struct ftrace_ops *op, struct pt_regs *regs)
++{
++	__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
++}
++NOKPROBE_SYMBOL(ftrace_ops_list_func);
++#else
++static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
++{
++	__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
++}
++NOKPROBE_SYMBOL(ftrace_ops_no_ops);
++#endif
++
++/*
++ * If there's only one function registered but it does not support
++ * recursion, needs RCU protection and/or requires per cpu handling, then
++ * this function will be called by the mcount trampoline.
++ */
++static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
++				   struct ftrace_ops *op, struct pt_regs *regs)
++{
++	int bit;
++
++	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
++	if (bit < 0)
++		return;
++
++	preempt_disable_notrace();
++
++	if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
++		op->func(ip, parent_ip, op, regs);
++
++	preempt_enable_notrace();
++	trace_clear_recursion(bit);
++}
++NOKPROBE_SYMBOL(ftrace_ops_assist_func);
++
++/**
++ * ftrace_ops_get_func - get the function a trampoline should call
++ * @ops: the ops to get the function for
++ *
++ * Normally the mcount trampoline will call the ops->func, but there
++ * are times that it should not. For example, if the ops does not
++ * have its own recursion protection, then it should call the
++ * ftrace_ops_assist_func() instead.
++ *
++ * Returns the function that the trampoline should call for @ops.
++ */
++ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
++{
++	/*
++	 * If the function does not handle recursion, needs to be RCU safe,
++	 * or does per cpu logic, then we need to call the assist handler.
++	 */
++	if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) ||
++	    ops->flags & FTRACE_OPS_FL_RCU)
++		return ftrace_ops_assist_func;
++
++	return ops->func;
++}
++
++static void
++ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
++		    struct task_struct *prev, struct task_struct *next)
++{
++	struct trace_array *tr = data;
++	struct trace_pid_list *pid_list;
++
++	pid_list = rcu_dereference_sched(tr->function_pids);
++
++	this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
++		       trace_ignore_this_task(pid_list, next));
++}
++
++static void
++ftrace_pid_follow_sched_process_fork(void *data,
++				     struct task_struct *self,
++				     struct task_struct *task)
++{
++	struct trace_pid_list *pid_list;
++	struct trace_array *tr = data;
++
++	pid_list = rcu_dereference_sched(tr->function_pids);
++	trace_filter_add_remove_task(pid_list, self, task);
++}
++
++static void
++ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task)
++{
++	struct trace_pid_list *pid_list;
++	struct trace_array *tr = data;
++
++	pid_list = rcu_dereference_sched(tr->function_pids);
++	trace_filter_add_remove_task(pid_list, NULL, task);
++}
++
++void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
++{
++	if (enable) {
++		register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
++						  tr);
++		register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
++						  tr);
++	} else {
++		unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork,
++						    tr);
++		unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit,
++						    tr);
++	}
++}
++
++static void clear_ftrace_pids(struct trace_array *tr)
++{
++	struct trace_pid_list *pid_list;
++	int cpu;
++
++	pid_list = rcu_dereference_protected(tr->function_pids,
++					     lockdep_is_held(&ftrace_lock));
++	if (!pid_list)
++		return;
++
++	unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
++
++	for_each_possible_cpu(cpu)
++		per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false;
++
++	rcu_assign_pointer(tr->function_pids, NULL);
++
++	/* Wait till all users are no longer using pid filtering */
++	synchronize_sched();
++
++	trace_free_pid_list(pid_list);
++}
++
++void ftrace_clear_pids(struct trace_array *tr)
++{
++	mutex_lock(&ftrace_lock);
++
++	clear_ftrace_pids(tr);
++
++	mutex_unlock(&ftrace_lock);
++}
++
++static void ftrace_pid_reset(struct trace_array *tr)
++{
++	mutex_lock(&ftrace_lock);
++	clear_ftrace_pids(tr);
++
++	ftrace_update_pid_func();
++	ftrace_startup_all(0);
++
++	mutex_unlock(&ftrace_lock);
++}
++
++/* Greater than any max PID */
++#define FTRACE_NO_PIDS		(void *)(PID_MAX_LIMIT + 1)
++
++static void *fpid_start(struct seq_file *m, loff_t *pos)
++	__acquires(RCU)
++{
++	struct trace_pid_list *pid_list;
++	struct trace_array *tr = m->private;
++
++	mutex_lock(&ftrace_lock);
++	rcu_read_lock_sched();
++
++	pid_list = rcu_dereference_sched(tr->function_pids);
++
++	if (!pid_list)
++		return !(*pos) ? FTRACE_NO_PIDS : NULL;
++
++	return trace_pid_start(pid_list, pos);
++}
++
++static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct trace_array *tr = m->private;
++	struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
++
++	if (v == FTRACE_NO_PIDS)
++		return NULL;
++
++	return trace_pid_next(pid_list, v, pos);
++}
++
++static void fpid_stop(struct seq_file *m, void *p)
++	__releases(RCU)
++{
++	rcu_read_unlock_sched();
++	mutex_unlock(&ftrace_lock);
++}
++
++static int fpid_show(struct seq_file *m, void *v)
++{
++	if (v == FTRACE_NO_PIDS) {
++		seq_puts(m, "no pid\n");
++		return 0;
++	}
++
++	return trace_pid_show(m, v);
++}
++
++static const struct seq_operations ftrace_pid_sops = {
++	.start = fpid_start,
++	.next = fpid_next,
++	.stop = fpid_stop,
++	.show = fpid_show,
++};
++
++static int
++ftrace_pid_open(struct inode *inode, struct file *file)
++{
++	struct trace_array *tr = inode->i_private;
++	struct seq_file *m;
++	int ret = 0;
++
++	if (trace_array_get(tr) < 0)
++		return -ENODEV;
++
++	if ((file->f_mode & FMODE_WRITE) &&
++	    (file->f_flags & O_TRUNC))
++		ftrace_pid_reset(tr);
++
++	ret = seq_open(file, &ftrace_pid_sops);
++	if (ret < 0) {
++		trace_array_put(tr);
++	} else {
++		m = file->private_data;
++		/* copy tr over to seq ops */
++		m->private = tr;
++	}
++
++	return ret;
++}
++
++static void ignore_task_cpu(void *data)
++{
++	struct trace_array *tr = data;
++	struct trace_pid_list *pid_list;
++
++	/*
++	 * This function is called by on_each_cpu() while the
++	 * event_mutex is held.
++	 */
++	pid_list = rcu_dereference_protected(tr->function_pids,
++					     mutex_is_locked(&ftrace_lock));
++
++	this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
++		       trace_ignore_this_task(pid_list, current));
++}
++
++static ssize_t
++ftrace_pid_write(struct file *filp, const char __user *ubuf,
++		   size_t cnt, loff_t *ppos)
++{
++	struct seq_file *m = filp->private_data;
++	struct trace_array *tr = m->private;
++	struct trace_pid_list *filtered_pids = NULL;
++	struct trace_pid_list *pid_list;
++	ssize_t ret;
++
++	if (!cnt)
++		return 0;
++
++	mutex_lock(&ftrace_lock);
++
++	filtered_pids = rcu_dereference_protected(tr->function_pids,
++					     lockdep_is_held(&ftrace_lock));
++
++	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
++	if (ret < 0)
++		goto out;
++
++	rcu_assign_pointer(tr->function_pids, pid_list);
++
++	if (filtered_pids) {
++		synchronize_sched();
++		trace_free_pid_list(filtered_pids);
++	} else if (pid_list) {
++		/* Register a probe to set whether to ignore the tracing of a task */
++		register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
++	}
++
++	/*
++	 * Ignoring of pids is done at task switch. But we have to
++	 * check for those tasks that are currently running.
++	 * Always do this in case a pid was appended or removed.
++	 */
++	on_each_cpu(ignore_task_cpu, tr, 1);
++
++	ftrace_update_pid_func();
++	ftrace_startup_all(0);
++ out:
++	mutex_unlock(&ftrace_lock);
++
++	if (ret > 0)
++		*ppos += ret;
++
++	return ret;
++}
++
++static int
++ftrace_pid_release(struct inode *inode, struct file *file)
++{
++	struct trace_array *tr = inode->i_private;
++
++	trace_array_put(tr);
++
++	return seq_release(inode, file);
++}
++
++static const struct file_operations ftrace_pid_fops = {
++	.open		= ftrace_pid_open,
++	.write		= ftrace_pid_write,
++	.read		= seq_read,
++	.llseek		= tracing_lseek,
++	.release	= ftrace_pid_release,
++};
++
++void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
++{
++	trace_create_file("set_ftrace_pid", 0644, d_tracer,
++			    tr, &ftrace_pid_fops);
++}
++
++void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
++					 struct dentry *d_tracer)
++{
++	/* Only the top level directory has the dyn_tracefs and profile */
++	WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
++
++	ftrace_init_dyn_tracefs(d_tracer);
++	ftrace_profile_tracefs(d_tracer);
++}
++
++/**
++ * ftrace_kill - kill ftrace
++ *
++ * This function should be used by panic code. It stops ftrace
++ * but in a not so nice way. If you need to simply kill ftrace
++ * from a non-atomic section, use ftrace_kill.
++ */
++void ftrace_kill(void)
++{
++	ftrace_disabled = 1;
++	ftrace_enabled = 0;
++	ftrace_trace_function = ftrace_stub;
++}
++
++/**
++ * Test if ftrace is dead or not.
++ */
++int ftrace_is_dead(void)
++{
++	return ftrace_disabled;
++}
++
++/**
++ * register_ftrace_function - register a function for profiling
++ * @ops - ops structure that holds the function for profiling.
++ *
++ * Register a function to be called by all functions in the
++ * kernel.
++ *
++ * Note: @ops->func and all the functions it calls must be labeled
++ *       with "notrace", otherwise it will go into a
++ *       recursive loop.
++ */
++int register_ftrace_function(struct ftrace_ops *ops)
++{
++	int ret = -1;
++
++	ftrace_ops_init(ops);
++
++	mutex_lock(&ftrace_lock);
++
++	ret = ftrace_startup(ops, 0);
++
++	mutex_unlock(&ftrace_lock);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(register_ftrace_function);
++
++/**
++ * unregister_ftrace_function - unregister a function for profiling.
++ * @ops - ops structure that holds the function to unregister
++ *
++ * Unregister a function that was added to be called by ftrace profiling.
++ */
++int unregister_ftrace_function(struct ftrace_ops *ops)
++{
++	int ret;
++
++	mutex_lock(&ftrace_lock);
++	ret = ftrace_shutdown(ops, 0);
++	mutex_unlock(&ftrace_lock);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(unregister_ftrace_function);
++
++int
++ftrace_enable_sysctl(struct ctl_table *table, int write,
++		     void __user *buffer, size_t *lenp,
++		     loff_t *ppos)
++{
++	int ret = -ENODEV;
++
++	mutex_lock(&ftrace_lock);
++
++	if (unlikely(ftrace_disabled))
++		goto out;
++
++	ret = proc_dointvec(table, write, buffer, lenp, ppos);
++
++	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
++		goto out;
++
++	last_ftrace_enabled = !!ftrace_enabled;
++
++	if (ftrace_enabled) {
++
++		/* we are starting ftrace again */
++		if (rcu_dereference_protected(ftrace_ops_list,
++			lockdep_is_held(&ftrace_lock)) != &ftrace_list_end)
++			update_ftrace_function();
++
++		ftrace_startup_sysctl();
++
++	} else {
++		/* stopping ftrace calls (just send to ftrace_stub) */
++		ftrace_trace_function = ftrace_stub;
++
++		ftrace_shutdown_sysctl();
++	}
++
++ out:
++	mutex_unlock(&ftrace_lock);
++	return ret;
++}
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++
++static struct ftrace_ops graph_ops = {
++	.func			= ftrace_stub,
++	.flags			= FTRACE_OPS_FL_RECURSION_SAFE |
++				   FTRACE_OPS_FL_INITIALIZED |
++				   FTRACE_OPS_FL_PID |
++				   FTRACE_OPS_FL_STUB,
++#ifdef FTRACE_GRAPH_TRAMP_ADDR
++	.trampoline		= FTRACE_GRAPH_TRAMP_ADDR,
++	/* trampoline_size is only needed for dynamically allocated tramps */
++#endif
++	ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
++};
++
++void ftrace_graph_sleep_time_control(bool enable)
++{
++	fgraph_sleep_time = enable;
++}
++
++void ftrace_graph_graph_time_control(bool enable)
++{
++	fgraph_graph_time = enable;
++}
++
++int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
++{
++	return 0;
++}
++
++/* The callbacks that hook a function */
++trace_func_graph_ret_t ftrace_graph_return =
++			(trace_func_graph_ret_t)ftrace_stub;
++trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
++static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
++
++/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
++static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
++{
++	int i;
++	int ret = 0;
++	int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
++	struct task_struct *g, *t;
++
++	for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
++		ret_stack_list[i] =
++			kmalloc_array(FTRACE_RETFUNC_DEPTH,
++				      sizeof(struct ftrace_ret_stack),
++				      GFP_KERNEL);
++		if (!ret_stack_list[i]) {
++			start = 0;
++			end = i;
++			ret = -ENOMEM;
++			goto free;
++		}
++	}
++
++	read_lock(&tasklist_lock);
++	do_each_thread(g, t) {
++		if (start == end) {
++			ret = -EAGAIN;
++			goto unlock;
++		}
++
++		if (t->ret_stack == NULL) {
++			atomic_set(&t->tracing_graph_pause, 0);
++			atomic_set(&t->trace_overrun, 0);
++			t->curr_ret_stack = -1;
++			t->curr_ret_depth = -1;
++			/* Make sure the tasks see the -1 first: */
++			smp_wmb();
++			t->ret_stack = ret_stack_list[start++];
++		}
++	} while_each_thread(g, t);
++
++unlock:
++	read_unlock(&tasklist_lock);
++free:
++	for (i = start; i < end; i++)
++		kfree(ret_stack_list[i]);
++	return ret;
++}
++
++static void
++ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
++			struct task_struct *prev, struct task_struct *next)
++{
++	unsigned long long timestamp;
++	int index;
++
++	/*
++	 * Does the user want to count the time a function was asleep.
++	 * If so, do not update the time stamps.
++	 */
++	if (fgraph_sleep_time)
++		return;
++
++	timestamp = trace_clock_local();
++
++	prev->ftrace_timestamp = timestamp;
++
++	/* only process tasks that we timestamped */
++	if (!next->ftrace_timestamp)
++		return;
++
++	/*
++	 * Update all the counters in next to make up for the
++	 * time next was sleeping.
++	 */
++	timestamp -= next->ftrace_timestamp;
++
++	for (index = next->curr_ret_stack; index >= 0; index--)
++		next->ret_stack[index].calltime += timestamp;
++}
++
++/* Allocate a return stack for each task */
++static int start_graph_tracing(void)
++{
++	struct ftrace_ret_stack **ret_stack_list;
++	int ret, cpu;
++
++	ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
++				       sizeof(struct ftrace_ret_stack *),
++				       GFP_KERNEL);
++
++	if (!ret_stack_list)
++		return -ENOMEM;
++
++	/* The cpu_boot init_task->ret_stack will never be freed */
++	for_each_online_cpu(cpu) {
++		if (!idle_task(cpu)->ret_stack)
++			ftrace_graph_init_idle_task(idle_task(cpu), cpu);
++	}
++
++	do {
++		ret = alloc_retstack_tasklist(ret_stack_list);
++	} while (ret == -EAGAIN);
++
++	if (!ret) {
++		ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
++		if (ret)
++			pr_info("ftrace_graph: Couldn't activate tracepoint"
++				" probe to kernel_sched_switch\n");
++	}
++
++	kfree(ret_stack_list);
++	return ret;
++}
++
++/*
++ * Hibernation protection.
++ * The state of the current task is too much unstable during
++ * suspend/restore to disk. We want to protect against that.
++ */
++static int
++ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
++							void *unused)
++{
++	switch (state) {
++	case PM_HIBERNATION_PREPARE:
++		pause_graph_tracing();
++		break;
++
++	case PM_POST_HIBERNATION:
++		unpause_graph_tracing();
++		break;
++	}
++	return NOTIFY_DONE;
++}
++
++static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
++{
++	if (!ftrace_ops_test(&global_ops, trace->func, NULL))
++		return 0;
++	return __ftrace_graph_entry(trace);
++}
++
++/*
++ * The function graph tracer should only trace the functions defined
++ * by set_ftrace_filter and set_ftrace_notrace. If another function
++ * tracer ops is registered, the graph tracer requires testing the
++ * function against the global ops, and not just trace any function
++ * that any ftrace_ops registered.
++ */
++static void update_function_graph_func(void)
++{
++	struct ftrace_ops *op;
++	bool do_test = false;
++
++	/*
++	 * The graph and global ops share the same set of functions
++	 * to test. If any other ops is on the list, then
++	 * the graph tracing needs to test if its the function
++	 * it should call.
++	 */
++	do_for_each_ftrace_op(op, ftrace_ops_list) {
++		if (op != &global_ops && op != &graph_ops &&
++		    op != &ftrace_list_end) {
++			do_test = true;
++			/* in double loop, break out with goto */
++			goto out;
++		}
++	} while_for_each_ftrace_op(op);
++ out:
++	if (do_test)
++		ftrace_graph_entry = ftrace_graph_entry_test;
++	else
++		ftrace_graph_entry = __ftrace_graph_entry;
++}
++
++static struct notifier_block ftrace_suspend_notifier = {
++	.notifier_call = ftrace_suspend_notifier_call,
++};
++
++int register_ftrace_graph(trace_func_graph_ret_t retfunc,
++			trace_func_graph_ent_t entryfunc)
++{
++	int ret = 0;
++
++	mutex_lock(&ftrace_lock);
++
++	/* we currently allow only one tracer registered at a time */
++	if (ftrace_graph_active) {
++		ret = -EBUSY;
++		goto out;
++	}
++
++	register_pm_notifier(&ftrace_suspend_notifier);
++
++	ftrace_graph_active++;
++	ret = start_graph_tracing();
++	if (ret) {
++		ftrace_graph_active--;
++		goto out;
++	}
++
++	ftrace_graph_return = retfunc;
++
++	/*
++	 * Update the indirect function to the entryfunc, and the
++	 * function that gets called to the entry_test first. Then
++	 * call the update fgraph entry function to determine if
++	 * the entryfunc should be called directly or not.
++	 */
++	__ftrace_graph_entry = entryfunc;
++	ftrace_graph_entry = ftrace_graph_entry_test;
++	update_function_graph_func();
++
++	ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
++out:
++	mutex_unlock(&ftrace_lock);
++	return ret;
++}
++
++void unregister_ftrace_graph(void)
++{
++	mutex_lock(&ftrace_lock);
++
++	if (unlikely(!ftrace_graph_active))
++		goto out;
++
++	ftrace_graph_active--;
++	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
++	ftrace_graph_entry = ftrace_graph_entry_stub;
++	__ftrace_graph_entry = ftrace_graph_entry_stub;
++	ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
++	unregister_pm_notifier(&ftrace_suspend_notifier);
++	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
++
++ out:
++	mutex_unlock(&ftrace_lock);
++}
++
++static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
++
++static void
++graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
++{
++	atomic_set(&t->tracing_graph_pause, 0);
++	atomic_set(&t->trace_overrun, 0);
++	t->ftrace_timestamp = 0;
++	/* make curr_ret_stack visible before we add the ret_stack */
++	smp_wmb();
++	t->ret_stack = ret_stack;
++}
++
++/*
++ * Allocate a return stack for the idle task. May be the first
++ * time through, or it may be done by CPU hotplug online.
++ */
++void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
++{
++	t->curr_ret_stack = -1;
++	t->curr_ret_depth = -1;
++	/*
++	 * The idle task has no parent, it either has its own
++	 * stack or no stack at all.
++	 */
++	if (t->ret_stack)
++		WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
++
++	if (ftrace_graph_active) {
++		struct ftrace_ret_stack *ret_stack;
++
++		ret_stack = per_cpu(idle_ret_stack, cpu);
++		if (!ret_stack) {
++			ret_stack =
++				kmalloc_array(FTRACE_RETFUNC_DEPTH,
++					      sizeof(struct ftrace_ret_stack),
++					      GFP_KERNEL);
++			if (!ret_stack)
++				return;
++			per_cpu(idle_ret_stack, cpu) = ret_stack;
++		}
++		graph_init_task(t, ret_stack);
++	}
++}
++
++/* Allocate a return stack for newly created task */
++void ftrace_graph_init_task(struct task_struct *t)
++{
++	/* Make sure we do not use the parent ret_stack */
++	t->ret_stack = NULL;
++	t->curr_ret_stack = -1;
++	t->curr_ret_depth = -1;
++
++	if (ftrace_graph_active) {
++		struct ftrace_ret_stack *ret_stack;
++
++		ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
++					  sizeof(struct ftrace_ret_stack),
++					  GFP_KERNEL);
++		if (!ret_stack)
++			return;
++		graph_init_task(t, ret_stack);
++	}
++}
++
++void ftrace_graph_exit_task(struct task_struct *t)
++{
++	struct ftrace_ret_stack	*ret_stack = t->ret_stack;
++
++	t->ret_stack = NULL;
++	/* NULL must become visible to IRQs before we free it: */
++	barrier();
++
++	kfree(ret_stack);
++}
++#endif
+diff -uprN kernel/kernel/trace/Kconfig kernel_new/kernel/trace/Kconfig
+--- kernel/kernel/trace/Kconfig	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/Kconfig	2021-04-01 18:28:07.814863110 +0800
+@@ -525,6 +525,7 @@ config DYNAMIC_FTRACE
+ 	bool "enable/disable function tracing dynamically"
+ 	depends on FUNCTION_TRACER
+ 	depends on HAVE_DYNAMIC_FTRACE
++	depends on !IPIPE
+ 	default y
+ 	help
+ 	  This option will modify all the calls to function tracing
+diff -uprN kernel/kernel/trace/ring_buffer.c kernel_new/kernel/trace/ring_buffer.c
+--- kernel/kernel/trace/ring_buffer.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/ring_buffer.c	2021-04-01 18:28:07.814863110 +0800
+@@ -2653,6 +2653,7 @@ trace_recursive_lock(struct ring_buffer_
+ {
+ 	unsigned int val = cpu_buffer->current_context;
+ 	unsigned long pc = preempt_count();
++	unsigned long flags;
+ 	int bit;
+ 
+ 	if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+@@ -2661,20 +2662,30 @@ trace_recursive_lock(struct ring_buffer_
+ 		bit = pc & NMI_MASK ? RB_CTX_NMI :
+ 			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
+ 
+-	if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
++	flags = hard_local_irq_save();
++
++	if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
++		hard_local_irq_restore(flags);
+ 		return 1;
++	}
+ 
+ 	val |= (1 << (bit + cpu_buffer->nest));
+ 	cpu_buffer->current_context = val;
+ 
++	hard_local_irq_restore(flags);
++
+ 	return 0;
+ }
+ 
+ static __always_inline void
+ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
+ {
++	unsigned long flags;
++
++	flags = hard_local_irq_save();
+ 	cpu_buffer->current_context &=
+ 		cpu_buffer->current_context - (1 << cpu_buffer->nest);
++	hard_local_irq_restore(flags);
+ }
+ 
+ /* The recursive locking above uses 4 bits */
+diff -uprN kernel/kernel/trace/trace.c kernel_new/kernel/trace/trace.c
+--- kernel/kernel/trace/trace.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/trace.c	2021-04-01 18:28:07.814863110 +0800
+@@ -2921,8 +2921,9 @@ int trace_vbprintk(unsigned long ip, con
+ 	/* Don't pollute graph traces with trace_vprintk internals */
+ 	pause_graph_tracing();
+ 
++	flags = hard_local_irq_save();
++
+ 	pc = preempt_count();
+-	preempt_disable_notrace();
+ 
+ 	tbuffer = get_trace_buf();
+ 	if (!tbuffer) {
+@@ -2935,7 +2936,6 @@ int trace_vbprintk(unsigned long ip, con
+ 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
+ 		goto out;
+ 
+-	local_save_flags(flags);
+ 	size = sizeof(*entry) + sizeof(u32) * len;
+ 	buffer = tr->trace_buffer.buffer;
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+@@ -2956,7 +2956,7 @@ out:
+ 	put_trace_buf();
+ 
+ out_nobuffer:
+-	preempt_enable_notrace();
++	hard_local_irq_restore(flags);
+ 	unpause_graph_tracing();
+ 
+ 	return len;
+diff -uprN kernel/kernel/trace/trace_clock.c kernel_new/kernel/trace/trace_clock.c
+--- kernel/kernel/trace/trace_clock.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/trace_clock.c	2021-04-01 18:28:07.815863109 +0800
+@@ -97,7 +97,7 @@ u64 notrace trace_clock_global(void)
+ 	int this_cpu;
+ 	u64 now;
+ 
+-	raw_local_irq_save(flags);
++	flags = hard_local_irq_save_notrace();
+ 
+ 	this_cpu = raw_smp_processor_id();
+ 	now = sched_clock_cpu(this_cpu);
+@@ -123,7 +123,7 @@ u64 notrace trace_clock_global(void)
+ 	arch_spin_unlock(&trace_clock_struct.lock);
+ 
+  out:
+-	raw_local_irq_restore(flags);
++	hard_local_irq_restore_notrace(flags);
+ 
+ 	return now;
+ }
+diff -uprN kernel/kernel/trace/trace_functions.c kernel_new/kernel/trace/trace_functions.c
+--- kernel/kernel/trace/trace_functions.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/trace_functions.c	2021-04-01 18:28:07.815863109 +0800
+@@ -190,7 +190,7 @@ function_stack_trace_call(unsigned long
+ 	 * Need to use raw, since this must be called before the
+ 	 * recursive protection is performed.
+ 	 */
+-	local_irq_save(flags);
++	flags = hard_local_irq_save();
+ 	cpu = raw_smp_processor_id();
+ 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
+ 	disabled = atomic_inc_return(&data->disabled);
+@@ -202,7 +202,7 @@ function_stack_trace_call(unsigned long
+ 	}
+ 
+ 	atomic_dec(&data->disabled);
+-	local_irq_restore(flags);
++	hard_local_irq_restore(flags);
+ }
+ 
+ static struct tracer_opt func_opts[] = {
+diff -uprN kernel/kernel/trace/trace_functions_graph.c kernel_new/kernel/trace/trace_functions_graph.c
+--- kernel/kernel/trace/trace_functions_graph.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/trace_functions_graph.c	2021-04-01 18:28:07.815863109 +0800
+@@ -435,7 +435,7 @@ int trace_graph_entry(struct ftrace_grap
+ 	if (tracing_thresh)
+ 		return 1;
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save_notrace();
+ 	cpu = raw_smp_processor_id();
+ 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
+ 	disabled = atomic_inc_return(&data->disabled);
+@@ -447,7 +447,7 @@ int trace_graph_entry(struct ftrace_grap
+ 	}
+ 
+ 	atomic_dec(&data->disabled);
+-	local_irq_restore(flags);
++	hard_local_irq_restore_notrace(flags);
+ 
+ 	return ret;
+ }
+@@ -511,7 +511,7 @@ void trace_graph_return(struct ftrace_gr
+ 
+ 	ftrace_graph_addr_finish(trace);
+ 
+-	local_irq_save(flags);
++	flags = hard_local_irq_save_notrace();
+ 	cpu = raw_smp_processor_id();
+ 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
+ 	disabled = atomic_inc_return(&data->disabled);
+@@ -520,7 +520,7 @@ void trace_graph_return(struct ftrace_gr
+ 		__trace_graph_return(tr, trace, flags, pc);
+ 	}
+ 	atomic_dec(&data->disabled);
+-	local_irq_restore(flags);
++	hard_local_irq_restore_notrace(flags);
+ }
+ 
+ void set_graph_array(struct trace_array *tr)
+diff -uprN kernel/kernel/trace/trace_preemptirq.c kernel_new/kernel/trace/trace_preemptirq.c
+--- kernel/kernel/trace/trace_preemptirq.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/kernel/trace/trace_preemptirq.c	2021-04-02 09:24:59.495321387 +0800
+@@ -20,6 +20,9 @@ static DEFINE_PER_CPU(int, tracing_irq_c
+ 
+ void trace_hardirqs_on(void)
+ {
++	if (!ipipe_root_p)
++		return;
++
+ 	if (this_cpu_read(tracing_irq_cpu)) {
+ 		if (!in_nmi())
+ 			trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+@@ -33,6 +36,9 @@ EXPORT_SYMBOL(trace_hardirqs_on);
+ 
+ void trace_hardirqs_off(void)
+ {
++	if (!ipipe_root_p)
++		return;
++
+ 	if (!this_cpu_read(tracing_irq_cpu)) {
+ 		this_cpu_write(tracing_irq_cpu, 1);
+ 		tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
+@@ -46,6 +52,9 @@ EXPORT_SYMBOL(trace_hardirqs_off);
+ 
+ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+ {
++	if (!ipipe_root_p)
++		return;
++
+ 	if (this_cpu_read(tracing_irq_cpu)) {
+ 		if (!in_nmi())
+ 			trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+@@ -57,8 +66,33 @@ __visible void trace_hardirqs_on_caller(
+ }
+ EXPORT_SYMBOL(trace_hardirqs_on_caller);
+ 
++__visible void trace_hardirqs_on_virt_caller(unsigned long ip)
++{
++	/*
++	 * The IRQ tracing logic only applies to the root domain, and
++	 * must consider the virtual disable flag exclusively when
++	 * leaving an interrupt/fault context.
++	 */
++	if (ipipe_root_p && !irqs_disabled())
++		trace_hardirqs_on_caller(ip);
++}
++
++__visible void trace_hardirqs_on_virt(void)
++{
++	/*
++	 * The IRQ tracing logic only applies to the root domain, and
++	 * must consider the virtual disable flag exclusively when
++	 * leaving an interrupt/fault context.
++	 */
++	if (ipipe_root_p && !irqs_disabled())
++		trace_hardirqs_on_caller(CALLER_ADDR0);
++}
++
+ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+ {
++	if (!ipipe_root_p)
++		return;
++
+ 	lockdep_hardirqs_off(CALLER_ADDR0);
+ 
+ 	if (!this_cpu_read(tracing_irq_cpu)) {
+@@ -75,14 +109,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller)
+ 
+ void trace_preempt_on(unsigned long a0, unsigned long a1)
+ {
+-	if (!in_nmi())
++	if (ipipe_root_p && !in_nmi())
+ 		trace_preempt_enable_rcuidle(a0, a1);
+ 	tracer_preempt_on(a0, a1);
+ }
+ 
+ void trace_preempt_off(unsigned long a0, unsigned long a1)
+ {
+-	if (!in_nmi())
++	if (ipipe_root_p && !in_nmi())
+ 		trace_preempt_disable_rcuidle(a0, a1);
+ 	tracer_preempt_off(a0, a1);
+ }
+diff -uprN kernel/kernel/trace/trace_preemptirq.c.orig kernel_new/kernel/trace/trace_preemptirq.c.orig
+--- kernel/kernel/trace/trace_preemptirq.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/trace/trace_preemptirq.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,89 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * preemptoff and irqoff tracepoints
++ *
++ * Copyright (C) Joel Fernandes (Google) <joel@joelfernandes.org>
++ */
++
++#include <linux/kallsyms.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/ftrace.h>
++#include "trace.h"
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/preemptirq.h>
++
++#ifdef CONFIG_TRACE_IRQFLAGS
++/* Per-cpu variable to prevent redundant calls when IRQs already off */
++static DEFINE_PER_CPU(int, tracing_irq_cpu);
++
++void trace_hardirqs_on(void)
++{
++	if (this_cpu_read(tracing_irq_cpu)) {
++		if (!in_nmi())
++			trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
++		tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
++		this_cpu_write(tracing_irq_cpu, 0);
++	}
++
++	lockdep_hardirqs_on(CALLER_ADDR0);
++}
++EXPORT_SYMBOL(trace_hardirqs_on);
++
++void trace_hardirqs_off(void)
++{
++	if (!this_cpu_read(tracing_irq_cpu)) {
++		this_cpu_write(tracing_irq_cpu, 1);
++		tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
++		if (!in_nmi())
++			trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
++	}
++
++	lockdep_hardirqs_off(CALLER_ADDR0);
++}
++EXPORT_SYMBOL(trace_hardirqs_off);
++
++__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
++{
++	if (this_cpu_read(tracing_irq_cpu)) {
++		if (!in_nmi())
++			trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
++		tracer_hardirqs_on(CALLER_ADDR0, caller_addr);
++		this_cpu_write(tracing_irq_cpu, 0);
++	}
++
++	lockdep_hardirqs_on(CALLER_ADDR0);
++}
++EXPORT_SYMBOL(trace_hardirqs_on_caller);
++
++__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
++{
++	lockdep_hardirqs_off(CALLER_ADDR0);
++
++	if (!this_cpu_read(tracing_irq_cpu)) {
++		this_cpu_write(tracing_irq_cpu, 1);
++		tracer_hardirqs_off(CALLER_ADDR0, caller_addr);
++		if (!in_nmi())
++			trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
++	}
++}
++EXPORT_SYMBOL(trace_hardirqs_off_caller);
++#endif /* CONFIG_TRACE_IRQFLAGS */
++
++#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
++
++void trace_preempt_on(unsigned long a0, unsigned long a1)
++{
++	if (!in_nmi())
++		trace_preempt_enable_rcuidle(a0, a1);
++	tracer_preempt_on(a0, a1);
++}
++
++void trace_preempt_off(unsigned long a0, unsigned long a1)
++{
++	if (!in_nmi())
++		trace_preempt_disable_rcuidle(a0, a1);
++	tracer_preempt_off(a0, a1);
++}
++#endif
+diff -uprN kernel/kernel/trace/trace_preemptirq.c.rej kernel_new/kernel/trace/trace_preemptirq.c.rej
+--- kernel/kernel/trace/trace_preemptirq.c.rej	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/kernel/trace/trace_preemptirq.c.rej	2021-04-01 18:28:07.815863109 +0800
+@@ -0,0 +1,36 @@
++--- kernel/trace/trace_preemptirq.c	2019-12-18 03:36:04.000000000 +0800
+++++ kernel/trace/trace_preemptirq.c	2021-03-22 09:21:43.227415471 +0800
++@@ -66,8 +75,33 @@ __visible void trace_hardirqs_on_caller(
++ }
++ EXPORT_SYMBOL(trace_hardirqs_on_caller);
++ 
+++__visible void trace_hardirqs_on_virt_caller(unsigned long ip)
+++{
+++	/*
+++	 * The IRQ tracing logic only applies to the root domain, and
+++	 * must consider the virtual disable flag exclusively when
+++	 * leaving an interrupt/fault context.
+++	 */
+++	if (ipipe_root_p && !irqs_disabled())
+++		trace_hardirqs_on_caller(ip);
+++}
+++
+++__visible void trace_hardirqs_on_virt(void)
+++{
+++	/*
+++	 * The IRQ tracing logic only applies to the root domain, and
+++	 * must consider the virtual disable flag exclusively when
+++	 * leaving an interrupt/fault context.
+++	 */
+++	if (ipipe_root_p && !irqs_disabled())
+++		trace_hardirqs_on_caller(CALLER_ADDR0);
+++}
+++
++ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
++ {
+++	if (!ipipe_root_p)
+++		return;
+++
++ 	if (!this_cpu_read(tracing_irq_cpu)) {
++ 		this_cpu_write(tracing_irq_cpu, 1);
++ 		tracer_hardirqs_off(CALLER_ADDR0, caller_addr);
+diff -uprN kernel/lib/atomic64.c kernel_new/lib/atomic64.c
+--- kernel/lib/atomic64.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/atomic64.c	2021-04-01 18:28:07.815863109 +0800
+@@ -29,15 +29,15 @@
+  * Ensure each lock is in a separate cacheline.
+  */
+ static union {
+-	raw_spinlock_t lock;
++	ipipe_spinlock_t lock;
+ 	char pad[L1_CACHE_BYTES];
+ } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
+ 	[0 ... (NR_LOCKS - 1)] = {
+-		.lock =  __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
++		.lock =  IPIPE_SPIN_LOCK_UNLOCKED,
+ 	},
+ };
+ 
+-static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
++static inline ipipe_spinlock_t *lock_addr(const atomic64_t *v)
+ {
+ 	unsigned long addr = (unsigned long) v;
+ 
+@@ -49,7 +49,7 @@ static inline raw_spinlock_t *lock_addr(
+ long long atomic64_read(const atomic64_t *v)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 	long long val;
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+@@ -62,7 +62,7 @@ EXPORT_SYMBOL(atomic64_read);
+ void atomic64_set(atomic64_t *v, long long i)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+ 	v->counter = i;
+@@ -74,7 +74,7 @@ EXPORT_SYMBOL(atomic64_set);
+ void atomic64_##op(long long a, atomic64_t *v)				\
+ {									\
+ 	unsigned long flags;						\
+-	raw_spinlock_t *lock = lock_addr(v);				\
++	ipipe_spinlock_t *lock = lock_addr(v);				\
+ 									\
+ 	raw_spin_lock_irqsave(lock, flags);				\
+ 	v->counter c_op a;						\
+@@ -86,7 +86,7 @@ EXPORT_SYMBOL(atomic64_##op);
+ long long atomic64_##op##_return(long long a, atomic64_t *v)		\
+ {									\
+ 	unsigned long flags;						\
+-	raw_spinlock_t *lock = lock_addr(v);				\
++	ipipe_spinlock_t *lock = lock_addr(v);				\
+ 	long long val;							\
+ 									\
+ 	raw_spin_lock_irqsave(lock, flags);				\
+@@ -100,7 +100,7 @@ EXPORT_SYMBOL(atomic64_##op##_return);
+ long long atomic64_fetch_##op(long long a, atomic64_t *v)		\
+ {									\
+ 	unsigned long flags;						\
+-	raw_spinlock_t *lock = lock_addr(v);				\
++	ipipe_spinlock_t *lock = lock_addr(v);				\
+ 	long long val;							\
+ 									\
+ 	raw_spin_lock_irqsave(lock, flags);				\
+@@ -137,7 +137,7 @@ ATOMIC64_OPS(xor, ^=)
+ long long atomic64_dec_if_positive(atomic64_t *v)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 	long long val;
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+@@ -152,7 +152,7 @@ EXPORT_SYMBOL(atomic64_dec_if_positive);
+ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 	long long val;
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+@@ -167,7 +167,7 @@ EXPORT_SYMBOL(atomic64_cmpxchg);
+ long long atomic64_xchg(atomic64_t *v, long long new)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 	long long val;
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+@@ -181,7 +181,7 @@ EXPORT_SYMBOL(atomic64_xchg);
+ long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
+ {
+ 	unsigned long flags;
+-	raw_spinlock_t *lock = lock_addr(v);
++	ipipe_spinlock_t *lock = lock_addr(v);
+ 	long long val;
+ 
+ 	raw_spin_lock_irqsave(lock, flags);
+diff -uprN kernel/lib/bust_spinlocks.c kernel_new/lib/bust_spinlocks.c
+--- kernel/lib/bust_spinlocks.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/bust_spinlocks.c	2021-04-01 18:28:07.815863109 +0800
+@@ -15,6 +15,7 @@
+ #include <linux/wait.h>
+ #include <linux/vt_kern.h>
+ #include <linux/console.h>
++#include <linux/ipipe_trace.h>
+ 
+ 
+ void __attribute__((weak)) bust_spinlocks(int yes)
+@@ -26,6 +27,7 @@ void __attribute__((weak)) bust_spinlock
+ 		unblank_screen();
+ #endif
+ 		console_unblank();
++		ipipe_trace_panic_dump();
+ 		if (--oops_in_progress == 0)
+ 			wake_up_klogd();
+ 	}
+diff -uprN kernel/lib/dump_stack.c kernel_new/lib/dump_stack.c
+--- kernel/lib/dump_stack.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/dump_stack.c	2021-04-01 18:28:07.815863109 +0800
+@@ -8,6 +8,7 @@
+ #include <linux/export.h>
+ #include <linux/sched.h>
+ #include <linux/sched/debug.h>
++#include <linux/ipipe.h>
+ #include <linux/smp.h>
+ #include <linux/atomic.h>
+ #include <linux/kexec.h>
+@@ -56,6 +57,9 @@ void dump_stack_print_info(const char *l
+ 		printk("%sHardware name: %s\n",
+ 		       log_lvl, dump_stack_arch_desc_str);
+ 
++#ifdef CONFIG_IPIPE
++	printk("I-pipe domain: %s\n", ipipe_current_domain->name);
++#endif
+ 	print_worker_info(log_lvl, current);
+ }
+ 
+@@ -85,6 +89,29 @@ static void __dump_stack(void)
+ #ifdef CONFIG_SMP
+ static atomic_t dump_lock = ATOMIC_INIT(-1);
+ 
++static unsigned long disable_local_irqs(void)
++{
++	unsigned long flags = 0; /* only to trick the UMR detection */
++
++	/*
++	 * We neither need nor want to disable root stage IRQs over
++	 * the head stage, where CPU migration can't
++	 * happen. Conversely, we neither need nor want to disable
++	 * hard IRQs from the head stage, so that latency won't
++	 * skyrocket as a result of dumping the stack backtrace.
++	 */
++	if (ipipe_root_p)
++		local_irq_save(flags);
++
++	return flags;
++}
++
++static void restore_local_irqs(unsigned long flags)
++{
++	if (ipipe_root_p)
++		local_irq_restore(flags);
++}
++
+ asmlinkage __visible void dump_stack(void)
+ {
+ 	unsigned long flags;
+@@ -97,7 +124,7 @@ asmlinkage __visible void dump_stack(voi
+ 	 * against other CPUs
+ 	 */
+ retry:
+-	local_irq_save(flags);
++	flags = disable_local_irqs();
+ 	cpu = smp_processor_id();
+ 	old = atomic_cmpxchg(&dump_lock, -1, cpu);
+ 	if (old == -1) {
+@@ -105,7 +132,7 @@ retry:
+ 	} else if (old == cpu) {
+ 		was_locked = 1;
+ 	} else {
+-		local_irq_restore(flags);
++		restore_local_irqs(flags);
+ 		/*
+ 		 * Wait for the lock to release before jumping to
+ 		 * atomic_cmpxchg() in order to mitigate the thundering herd
+@@ -120,7 +147,7 @@ retry:
+ 	if (!was_locked)
+ 		atomic_set(&dump_lock, -1);
+ 
+-	local_irq_restore(flags);
++	restore_local_irqs(flags);
+ }
+ #else
+ asmlinkage __visible void dump_stack(void)
+diff -uprN kernel/lib/ioremap.c kernel_new/lib/ioremap.c
+--- kernel/lib/ioremap.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/ioremap.c	2021-04-01 18:28:07.815863109 +0800
+@@ -11,6 +11,7 @@
+ #include <linux/sched.h>
+ #include <linux/io.h>
+ #include <linux/export.h>
++#include <linux/hardirq.h>
+ #include <asm/cacheflush.h>
+ #include <asm/pgtable.h>
+ 
+@@ -177,7 +178,12 @@ int ioremap_page_range(unsigned long add
+ 			break;
+ 	} while (pgd++, addr = next, addr != end);
+ 
+-	flush_cache_vmap(start, end);
++	/* APEI may invoke this for temporarily remapping pages in interrupt
++	 * context - nothing we can and need to propagate globally. */
++	if (!in_interrupt()) {
++		__ipipe_pin_mapping_globally(start, end);
++		flush_cache_vmap(start, end);
++	}
+ 
+ 	return err;
+ }
+diff -uprN kernel/lib/Kconfig.debug kernel_new/lib/Kconfig.debug
+--- kernel/lib/Kconfig.debug	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/Kconfig.debug	2021-04-01 18:28:07.816863108 +0800
+@@ -411,6 +411,7 @@ config MAGIC_SYSRQ
+ 	  keys are documented in <file:Documentation/admin-guide/sysrq.rst>.
+ 	  Don't say Y unless you really know what this hack does.
+ 
++
+ config MAGIC_SYSRQ_DEFAULT_ENABLE
+ 	hex "Enable magic SysRq key functions by default"
+ 	depends on MAGIC_SYSRQ
+@@ -430,6 +431,8 @@ config MAGIC_SYSRQ_SERIAL
+ 	  This option allows you to decide whether you want to enable the
+ 	  magic SysRq key.
+ 
++source "kernel/ipipe/Kconfig.debug"
++
+ config DEBUG_KERNEL
+ 	bool "Kernel debugging"
+ 	help
+diff -uprN kernel/lib/Kconfig.debug.orig kernel_new/lib/Kconfig.debug.orig
+--- kernel/lib/Kconfig.debug.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/lib/Kconfig.debug.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2058 @@
++menu "Kernel hacking"
++
++menu "printk and dmesg options"
++
++config PRINTK_TIME
++	bool "Show timing information on printks"
++	depends on PRINTK
++	help
++	  Selecting this option causes time stamps of the printk()
++	  messages to be added to the output of the syslog() system
++	  call and at the console.
++
++	  The timestamp is always recorded internally, and exported
++	  to /dev/kmsg. This flag just specifies if the timestamp should
++	  be included, not that the timestamp is recorded.
++
++	  The behavior is also controlled by the kernel command line
++	  parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst
++
++config CONSOLE_LOGLEVEL_DEFAULT
++	int "Default console loglevel (1-15)"
++	range 1 15
++	default "7"
++	help
++	  Default loglevel to determine what will be printed on the console.
++
++	  Setting a default here is equivalent to passing in loglevel=<x> in
++	  the kernel bootargs. loglevel=<x> continues to override whatever
++	  value is specified here as well.
++
++	  Note: This does not affect the log level of un-prefixed printk()
++	  usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
++	  option.
++
++config CONSOLE_LOGLEVEL_QUIET
++	int "quiet console loglevel (1-15)"
++	range 1 15
++	default "4"
++	help
++	  loglevel to use when "quiet" is passed on the kernel commandline.
++
++	  When "quiet" is passed on the kernel commandline this loglevel
++	  will be used as the loglevel. IOW passing "quiet" will be the
++	  equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>"
++
++config MESSAGE_LOGLEVEL_DEFAULT
++	int "Default message log level (1-7)"
++	range 1 7
++	default "4"
++	help
++	  Default log level for printk statements with no specified priority.
++
++	  This was hard-coded to KERN_WARNING since at least 2.6.10 but folks
++	  that are auditing their logs closely may want to set it to a lower
++	  priority.
++
++	  Note: This does not affect what message level gets printed on the console
++	  by default. To change that, use loglevel=<x> in the kernel bootargs,
++	  or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value.
++
++config BOOT_PRINTK_DELAY
++	bool "Delay each boot printk message by N milliseconds"
++	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
++	help
++	  This build option allows you to read kernel boot messages
++	  by inserting a short delay after each one.  The delay is
++	  specified in milliseconds on the kernel command line,
++	  using "boot_delay=N".
++
++	  It is likely that you would also need to use "lpj=M" to preset
++	  the "loops per jiffie" value.
++	  See a previous boot log for the "lpj" value to use for your
++	  system, and then set "lpj=M" before setting "boot_delay=N".
++	  NOTE:  Using this option may adversely affect SMP systems.
++	  I.e., processors other than the first one may not boot up.
++	  BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
++	  what it believes to be lockup conditions.
++
++config DYNAMIC_DEBUG
++	bool "Enable dynamic printk() support"
++	default n
++	depends on PRINTK
++	depends on DEBUG_FS
++	help
++
++	  Compiles debug level messages into the kernel, which would not
++	  otherwise be available at runtime. These messages can then be
++	  enabled/disabled based on various levels of scope - per source file,
++	  function, module, format string, and line number. This mechanism
++	  implicitly compiles in all pr_debug() and dev_dbg() calls, which
++	  enlarges the kernel text size by about 2%.
++
++	  If a source file is compiled with DEBUG flag set, any
++	  pr_debug() calls in it are enabled by default, but can be
++	  disabled at runtime as below.  Note that DEBUG flag is
++	  turned on by many CONFIG_*DEBUG* options.
++
++	  Usage:
++
++	  Dynamic debugging is controlled via the 'dynamic_debug/control' file,
++	  which is contained in the 'debugfs' filesystem. Thus, the debugfs
++	  filesystem must first be mounted before making use of this feature.
++	  We refer the control file as: <debugfs>/dynamic_debug/control. This
++	  file contains a list of the debug statements that can be enabled. The
++	  format for each line of the file is:
++
++		filename:lineno [module]function flags format
++
++	  filename : source file of the debug statement
++	  lineno : line number of the debug statement
++	  module : module that contains the debug statement
++	  function : function that contains the debug statement
++          flags : '=p' means the line is turned 'on' for printing
++          format : the format used for the debug statement
++
++	  From a live system:
++
++		nullarbor:~ # cat <debugfs>/dynamic_debug/control
++		# filename:lineno [module]function flags format
++		fs/aio.c:222 [aio]__put_ioctx =_ "__put_ioctx:\040freeing\040%p\012"
++		fs/aio.c:248 [aio]ioctx_alloc =_ "ENOMEM:\040nr_events\040too\040high\012"
++		fs/aio.c:1770 [aio]sys_io_cancel =_ "calling\040cancel\012"
++
++	  Example usage:
++
++		// enable the message at line 1603 of file svcsock.c
++		nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
++						<debugfs>/dynamic_debug/control
++
++		// enable all the messages in file svcsock.c
++		nullarbor:~ # echo -n 'file svcsock.c +p' >
++						<debugfs>/dynamic_debug/control
++
++		// enable all the messages in the NFS server module
++		nullarbor:~ # echo -n 'module nfsd +p' >
++						<debugfs>/dynamic_debug/control
++
++		// enable all 12 messages in the function svc_process()
++		nullarbor:~ # echo -n 'func svc_process +p' >
++						<debugfs>/dynamic_debug/control
++
++		// disable all 12 messages in the function svc_process()
++		nullarbor:~ # echo -n 'func svc_process -p' >
++						<debugfs>/dynamic_debug/control
++
++	  See Documentation/admin-guide/dynamic-debug-howto.rst for additional
++	  information.
++
++endmenu # "printk and dmesg options"
++
++menu "Compile-time checks and compiler options"
++
++config DEBUG_INFO
++	bool "Compile the kernel with debug info"
++	depends on DEBUG_KERNEL && !COMPILE_TEST
++	help
++          If you say Y here the resulting kernel image will include
++	  debugging info resulting in a larger kernel image.
++	  This adds debug symbols to the kernel and modules (gcc -g), and
++	  is needed if you intend to use kernel crashdump or binary object
++	  tools like crash, kgdb, LKCD, gdb, etc on the kernel.
++	  Say Y here only if you plan to debug the kernel.
++
++	  If unsure, say N.
++
++config DEBUG_INFO_REDUCED
++	bool "Reduce debugging information"
++	depends on DEBUG_INFO
++	help
++	  If you say Y here gcc is instructed to generate less debugging
++	  information for structure types. This means that tools that
++	  need full debugging information (like kgdb or systemtap) won't
++	  be happy. But if you merely need debugging information to
++	  resolve line numbers there is no loss. Advantage is that
++	  build directory object sizes shrink dramatically over a full
++	  DEBUG_INFO build and compile times are reduced too.
++	  Only works with newer gcc versions.
++
++config DEBUG_INFO_SPLIT
++	bool "Produce split debuginfo in .dwo files"
++	depends on DEBUG_INFO
++	help
++	  Generate debug info into separate .dwo files. This significantly
++	  reduces the build directory size for builds with DEBUG_INFO,
++	  because it stores the information only once on disk in .dwo
++	  files instead of multiple times in object files and executables.
++	  In addition the debug information is also compressed.
++
++	  Requires recent gcc (4.7+) and recent gdb/binutils.
++	  Any tool that packages or reads debug information would need
++	  to know about the .dwo files and include them.
++	  Incompatible with older versions of ccache.
++
++config DEBUG_INFO_DWARF4
++	bool "Generate dwarf4 debuginfo"
++	depends on DEBUG_INFO
++	help
++	  Generate dwarf4 debug info. This requires recent versions
++	  of gcc and gdb. It makes the debug information larger.
++	  But it significantly improves the success of resolving
++	  variables in gdb on optimized code.
++
++config GDB_SCRIPTS
++	bool "Provide GDB scripts for kernel debugging"
++	depends on DEBUG_INFO
++	help
++	  This creates the required links to GDB helper scripts in the
++	  build directory. If you load vmlinux into gdb, the helper
++	  scripts will be automatically imported by gdb as well, and
++	  additional functions are available to analyze a Linux kernel
++	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
++	  for further details.
++
++config ENABLE_MUST_CHECK
++	bool "Enable __must_check logic"
++	default y
++	help
++	  Enable the __must_check logic in the kernel build.  Disable this to
++	  suppress the "warning: ignoring return value of 'foo', declared with
++	  attribute warn_unused_result" messages.
++
++config FRAME_WARN
++	int "Warn for stack frames larger than (needs gcc 4.4)"
++	range 0 8192
++	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
++	default 1280 if (!64BIT && PARISC)
++	default 1024 if (!64BIT && !PARISC)
++	default 2048 if 64BIT
++	help
++	  Tell gcc to warn at build time for stack frames larger than this.
++	  Setting this too low will cause a lot of warnings.
++	  Setting it to 0 disables the warning.
++	  Requires gcc 4.4
++
++config STRIP_ASM_SYMS
++	bool "Strip assembler-generated symbols during link"
++	default n
++	help
++	  Strip internal assembler-generated symbols during a link (symbols
++	  that look like '.Lxxx') so they don't pollute the output of
++	  get_wchan() and suchlike.
++
++config READABLE_ASM
++        bool "Generate readable assembler code"
++        depends on DEBUG_KERNEL
++        help
++          Disable some compiler optimizations that tend to generate human unreadable
++          assembler output. This may make the kernel slightly slower, but it helps
++          to keep kernel developers who have to stare a lot at assembler listings
++          sane.
++
++config UNUSED_SYMBOLS
++	bool "Enable unused/obsolete exported symbols"
++	default y if X86
++	help
++	  Unused but exported symbols make the kernel needlessly bigger.  For
++	  that reason most of these unused exports will soon be removed.  This
++	  option is provided temporarily to provide a transition period in case
++	  some external kernel module needs one of these symbols anyway. If you
++	  encounter such a case in your module, consider if you are actually
++	  using the right API.  (rationale: since nobody in the kernel is using
++	  this in a module, there is a pretty good chance it's actually the
++	  wrong interface to use).  If you really need the symbol, please send a
++	  mail to the linux kernel mailing list mentioning the symbol and why
++	  you really need it, and what the merge plan to the mainline kernel for
++	  your module is.
++
++config PAGE_OWNER
++	bool "Track page owner"
++	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
++	select DEBUG_FS
++	select STACKTRACE
++	select STACKDEPOT
++	select PAGE_EXTENSION
++	help
++	  This keeps track of what call chain is the owner of a page, may
++	  help to find bare alloc_page(s) leaks. Even if you include this
++	  feature on your build, it is disabled in default. You should pass
++	  "page_owner=on" to boot parameter in order to enable it. Eats
++	  a fair amount of memory if enabled. See tools/vm/page_owner_sort.c
++	  for user-space helper.
++
++	  If unsure, say N.
++
++config DEBUG_FS
++	bool "Debug Filesystem"
++	help
++	  debugfs is a virtual file system that kernel developers use to put
++	  debugging files into.  Enable this option to be able to read and
++	  write to these files.
++
++	  For detailed documentation on the debugfs API, see
++	  Documentation/filesystems/.
++
++	  If unsure, say N.
++
++config HEADERS_CHECK
++	bool "Run 'make headers_check' when building vmlinux"
++	depends on !UML
++	help
++	  This option will extract the user-visible kernel headers whenever
++	  building the kernel, and will run basic sanity checks on them to
++	  ensure that exported files do not attempt to include files which
++	  were not exported, etc.
++
++	  If you're making modifications to header files which are
++	  relevant for userspace, say 'Y', and check the headers
++	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
++	  your build tree), to make sure they're suitable.
++
++config DEBUG_SECTION_MISMATCH
++	bool "Enable full Section mismatch analysis"
++	help
++	  The section mismatch analysis checks if there are illegal
++	  references from one section to another section.
++	  During linktime or runtime, some sections are dropped;
++	  any use of code/data previously in these sections would
++	  most likely result in an oops.
++	  In the code, functions and variables are annotated with
++	  __init,, etc. (see the full list in include/linux/init.h),
++	  which results in the code/data being placed in specific sections.
++	  The section mismatch analysis is always performed after a full
++	  kernel build, and enabling this option causes the following
++	  additional steps to occur:
++	  - Add the option -fno-inline-functions-called-once to gcc commands.
++	    When inlining a function annotated with __init in a non-init
++	    function, we would lose the section information and thus
++	    the analysis would not catch the illegal reference.
++	    This option tells gcc to inline less (but it does result in
++	    a larger kernel).
++	  - Run the section mismatch analysis for each module/built-in.a file.
++	    When we run the section mismatch analysis on vmlinux.o, we
++	    lose valuable information about where the mismatch was
++	    introduced.
++	    Running the analysis for each module/built-in.a file
++	    tells where the mismatch happens much closer to the
++	    source. The drawback is that the same mismatch is
++	    reported at least twice.
++	  - Enable verbose reporting from modpost in order to help resolve
++	    the section mismatches that are reported.
++
++config SECTION_MISMATCH_WARN_ONLY
++	bool "Make section mismatch errors non-fatal"
++	default y
++	help
++	  If you say N here, the build process will fail if there are any
++	  section mismatch, instead of just throwing warnings.
++
++	  If unsure, say Y.
++
++#
++# Select this config option from the architecture Kconfig, if it
++# is preferred to always offer frame pointers as a config
++# option on the architecture (regardless of KERNEL_DEBUG):
++#
++config ARCH_WANT_FRAME_POINTERS
++	bool
++
++config FRAME_POINTER
++	bool "Compile the kernel with frame pointers"
++	depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS
++	default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
++	help
++	  If you say Y here the resulting kernel image will be slightly
++	  larger and slower, but it gives very useful debugging information
++	  in case of kernel bugs. (precise oopses/stacktraces/warnings)
++
++config STACK_VALIDATION
++	bool "Compile-time stack metadata validation"
++	depends on HAVE_STACK_VALIDATION
++	default n
++	help
++	  Add compile-time checks to validate stack metadata, including frame
++	  pointers (if CONFIG_FRAME_POINTER is enabled).  This helps ensure
++	  that runtime stack traces are more reliable.
++
++	  This is also a prerequisite for generation of ORC unwind data, which
++	  is needed for CONFIG_UNWINDER_ORC.
++
++	  For more information, see
++	  tools/objtool/Documentation/stack-validation.txt.
++
++config DEBUG_FORCE_WEAK_PER_CPU
++	bool "Force weak per-cpu definitions"
++	depends on DEBUG_KERNEL
++	help
++	  s390 and alpha require percpu variables in modules to be
++	  defined weak to work around addressing range issue which
++	  puts the following two restrictions on percpu variable
++	  definitions.
++
++	  1. percpu symbols must be unique whether static or not
++	  2. percpu variables can't be defined inside a function
++
++	  To ensure that generic code follows the above rules, this
++	  option forces all percpu variables to be defined as weak.
++
++endmenu # "Compiler options"
++
++config MAGIC_SYSRQ
++	bool "Magic SysRq key"
++	depends on !UML
++	help
++	  If you say Y here, you will have some control over the system even
++	  if the system crashes for example during kernel debugging (e.g., you
++	  will be able to flush the buffer cache to disk, reboot the system
++	  immediately or dump some status information). This is accomplished
++	  by pressing various keys while holding SysRq (Alt+PrintScreen). It
++	  also works on a serial console (on PC hardware at least), if you
++	  send a BREAK and then within 5 seconds a command keypress. The
++	  keys are documented in <file:Documentation/admin-guide/sysrq.rst>.
++	  Don't say Y unless you really know what this hack does.
++
++config MAGIC_SYSRQ_DEFAULT_ENABLE
++	hex "Enable magic SysRq key functions by default"
++	depends on MAGIC_SYSRQ
++	default 0x1
++	help
++	  Specifies which SysRq key functions are enabled by default.
++	  This may be set to 1 or 0 to enable or disable them all, or
++	  to a bitmask as described in Documentation/admin-guide/sysrq.rst.
++
++config MAGIC_SYSRQ_SERIAL
++	bool "Enable magic SysRq key over serial"
++	depends on MAGIC_SYSRQ
++	default y
++	help
++	  Many embedded boards have a disconnected TTL level serial which can
++	  generate some garbage that can lead to spurious false sysrq detects.
++	  This option allows you to decide whether you want to enable the
++	  magic SysRq key.
++
++config DEBUG_KERNEL
++	bool "Kernel debugging"
++	help
++	  Say Y here if you are developing drivers or trying to debug and
++	  identify kernel problems.
++
++menu "Memory Debugging"
++
++source mm/Kconfig.debug
++
++config DEBUG_OBJECTS
++	bool "Debug object operations"
++	depends on DEBUG_KERNEL
++	help
++	  If you say Y here, additional code will be inserted into the
++	  kernel to track the life time of various objects and validate
++	  the operations on those objects.
++
++config DEBUG_OBJECTS_SELFTEST
++	bool "Debug objects selftest"
++	depends on DEBUG_OBJECTS
++	help
++	  This enables the selftest of the object debug code.
++
++config DEBUG_OBJECTS_FREE
++	bool "Debug objects in freed memory"
++	depends on DEBUG_OBJECTS
++	help
++	  This enables checks whether a k/v free operation frees an area
++	  which contains an object which has not been deactivated
++	  properly. This can make kmalloc/kfree-intensive workloads
++	  much slower.
++
++config DEBUG_OBJECTS_TIMERS
++	bool "Debug timer objects"
++	depends on DEBUG_OBJECTS
++	help
++	  If you say Y here, additional code will be inserted into the
++	  timer routines to track the life time of timer objects and
++	  validate the timer operations.
++
++config DEBUG_OBJECTS_WORK
++	bool "Debug work objects"
++	depends on DEBUG_OBJECTS
++	help
++	  If you say Y here, additional code will be inserted into the
++	  work queue routines to track the life time of work objects and
++	  validate the work operations.
++
++config DEBUG_OBJECTS_RCU_HEAD
++	bool "Debug RCU callbacks objects"
++	depends on DEBUG_OBJECTS
++	help
++	  Enable this to turn on debugging of RCU list heads (call_rcu() usage).
++
++config DEBUG_OBJECTS_PERCPU_COUNTER
++	bool "Debug percpu counter objects"
++	depends on DEBUG_OBJECTS
++	help
++	  If you say Y here, additional code will be inserted into the
++	  percpu counter routines to track the life time of percpu counter
++	  objects and validate the percpu counter operations.
++
++config DEBUG_OBJECTS_ENABLE_DEFAULT
++	int "debug_objects bootup default value (0-1)"
++        range 0 1
++        default "1"
++        depends on DEBUG_OBJECTS
++        help
++          Debug objects boot parameter default value
++
++config DEBUG_SLAB
++	bool "Debug slab memory allocations"
++	depends on DEBUG_KERNEL && SLAB
++	help
++	  Say Y here to have the kernel do limited verification on memory
++	  allocation as well as poisoning memory on free to catch use of freed
++	  memory. This can make kmalloc/kfree-intensive workloads much slower.
++
++config DEBUG_SLAB_LEAK
++	bool "Memory leak debugging"
++	depends on DEBUG_SLAB
++
++config SLUB_DEBUG_ON
++	bool "SLUB debugging on by default"
++	depends on SLUB && SLUB_DEBUG
++	default n
++	help
++	  Boot with debugging on by default. SLUB boots by default with
++	  the runtime debug capabilities switched off. Enabling this is
++	  equivalent to specifying the "slub_debug" parameter on boot.
++	  There is no support for more fine grained debug control like
++	  possible with slub_debug=xxx. SLUB debugging may be switched
++	  off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
++	  "slub_debug=-".
++
++config SLUB_STATS
++	default n
++	bool "Enable SLUB performance statistics"
++	depends on SLUB && SYSFS
++	help
++	  SLUB statistics are useful to debug SLUBs allocation behavior in
++	  order find ways to optimize the allocator. This should never be
++	  enabled for production use since keeping statistics slows down
++	  the allocator by a few percentage points. The slabinfo command
++	  supports the determination of the most active slabs to figure
++	  out which slabs are relevant to a particular load.
++	  Try running: slabinfo -DA
++
++config HAVE_DEBUG_KMEMLEAK
++	bool
++
++config DEBUG_KMEMLEAK
++	bool "Kernel memory leak detector"
++	depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK
++	select DEBUG_FS
++	select STACKTRACE if STACKTRACE_SUPPORT
++	select KALLSYMS
++	select CRC32
++	help
++	  Say Y here if you want to enable the memory leak
++	  detector. The memory allocation/freeing is traced in a way
++	  similar to the Boehm's conservative garbage collector, the
++	  difference being that the orphan objects are not freed but
++	  only shown in /sys/kernel/debug/kmemleak. Enabling this
++	  feature will introduce an overhead to memory
++	  allocations. See Documentation/dev-tools/kmemleak.rst for more
++	  details.
++
++	  Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
++	  of finding leaks due to the slab objects poisoning.
++
++	  In order to access the kmemleak file, debugfs needs to be
++	  mounted (usually at /sys/kernel/debug).
++
++config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
++	int "Maximum kmemleak early log entries"
++	depends on DEBUG_KMEMLEAK
++	range 200 40000
++	default 16000
++	help
++	  Kmemleak must track all the memory allocations to avoid
++	  reporting false positives. Since memory may be allocated or
++	  freed before kmemleak is initialised, an early log buffer is
++	  used to store these actions. If kmemleak reports "early log
++	  buffer exceeded", please increase this value.
++
++config DEBUG_KMEMLEAK_TEST
++	tristate "Simple test for the kernel memory leak detector"
++	depends on DEBUG_KMEMLEAK && m
++	help
++	  This option enables a module that explicitly leaks memory.
++
++	  If unsure, say N.
++
++config DEBUG_KMEMLEAK_DEFAULT_OFF
++	bool "Default kmemleak to off"
++	depends on DEBUG_KMEMLEAK
++	help
++	  Say Y here to disable kmemleak by default. It can then be enabled
++	  on the command line via kmemleak=on.
++
++config DEBUG_STACK_USAGE
++	bool "Stack utilization instrumentation"
++	depends on DEBUG_KERNEL && !IA64
++	help
++	  Enables the display of the minimum amount of free stack which each
++	  task has ever had available in the sysrq-T and sysrq-P debug output.
++
++	  This option will slow down process creation somewhat.
++
++config DEBUG_VM
++	bool "Debug VM"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to turn on extended checks in the virtual-memory system
++          that may impact performance.
++
++	  If unsure, say N.
++
++config DEBUG_VM_VMACACHE
++	bool "Debug VMA caching"
++	depends on DEBUG_VM
++	help
++	  Enable this to turn on VMA caching debug information. Doing so
++	  can cause significant overhead, so only enable it in non-production
++	  environments.
++
++	  If unsure, say N.
++
++config DEBUG_VM_RB
++	bool "Debug VM red-black trees"
++	depends on DEBUG_VM
++	help
++	  Enable VM red-black tree debugging information and extra validations.
++
++	  If unsure, say N.
++
++config DEBUG_VM_PGFLAGS
++	bool "Debug page-flags operations"
++	depends on DEBUG_VM
++	help
++	  Enables extra validation on page flags operations.
++
++	  If unsure, say N.
++
++config ARCH_HAS_DEBUG_VIRTUAL
++	bool
++
++config DEBUG_VIRTUAL
++	bool "Debug VM translations"
++	depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL
++	help
++	  Enable some costly sanity checks in virtual to page code. This can
++	  catch mistakes with virt_to_page() and friends.
++
++	  If unsure, say N.
++
++config DEBUG_NOMMU_REGIONS
++	bool "Debug the global anon/private NOMMU mapping region tree"
++	depends on DEBUG_KERNEL && !MMU
++	help
++	  This option causes the global tree of anonymous and private mapping
++	  regions to be regularly checked for invalid topology.
++
++config DEBUG_MEMORY_INIT
++	bool "Debug memory initialisation" if EXPERT
++	default !EXPERT
++	help
++	  Enable this for additional checks during memory initialisation.
++	  The sanity checks verify aspects of the VM such as the memory model
++	  and other information provided by the architecture. Verbose
++	  information will be printed at KERN_DEBUG loglevel depending
++	  on the mminit_loglevel= command-line option.
++
++	  If unsure, say Y
++
++config MEMORY_NOTIFIER_ERROR_INJECT
++	tristate "Memory hotplug notifier error injection module"
++	depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION
++	help
++	  This option provides the ability to inject artificial errors to
++	  memory hotplug notifier chain callbacks.  It is controlled through
++	  debugfs interface under /sys/kernel/debug/notifier-error-inject/memory
++
++	  If the notifier call chain should be failed with some events
++	  notified, write the error code to "actions/<notifier event>/error".
++
++	  Example: Inject memory hotplug offline error (-12 == -ENOMEM)
++
++	  # cd /sys/kernel/debug/notifier-error-inject/memory
++	  # echo -12 > actions/MEM_GOING_OFFLINE/error
++	  # echo offline > /sys/devices/system/memory/memoryXXX/state
++	  bash: echo: write error: Cannot allocate memory
++
++	  To compile this code as a module, choose M here: the module will
++	  be called memory-notifier-error-inject.
++
++	  If unsure, say N.
++
++config DEBUG_PER_CPU_MAPS
++	bool "Debug access to per_cpu maps"
++	depends on DEBUG_KERNEL
++	depends on SMP
++	help
++	  Say Y to verify that the per_cpu map being accessed has
++	  been set up. This adds a fair amount of code to kernel memory
++	  and decreases performance.
++
++	  Say N if unsure.
++
++config DEBUG_HIGHMEM
++	bool "Highmem debugging"
++	depends on DEBUG_KERNEL && HIGHMEM
++	help
++	  This option enables additional error checking for high memory
++	  systems.  Disable for production systems.
++
++config HAVE_DEBUG_STACKOVERFLOW
++	bool
++
++config DEBUG_STACKOVERFLOW
++	bool "Check for stack overflows"
++	depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW
++	---help---
++	  Say Y here if you want to check for overflows of kernel, IRQ
++	  and exception stacks (if your architecture uses them). This
++	  option will show detailed messages if free stack space drops
++	  below a certain limit.
++
++	  These kinds of bugs usually occur when call-chains in the
++	  kernel get too deep, especially when interrupts are
++	  involved.
++
++	  Use this in cases where you see apparently random memory
++	  corruption, especially if it appears in 'struct thread_info'
++
++	  If in doubt, say "N".
++
++source "lib/Kconfig.kasan"
++
++endmenu # "Memory Debugging"
++
++config ARCH_HAS_KCOV
++	bool
++	help
++	  KCOV does not have any arch-specific code, but currently it is enabled
++	  only for x86_64. KCOV requires testing on other archs, and most likely
++	  disabling of instrumentation for some early boot code.
++
++config CC_HAS_SANCOV_TRACE_PC
++	def_bool $(cc-option,-fsanitize-coverage=trace-pc)
++
++config KCOV
++	bool "Code coverage for fuzzing"
++	depends on ARCH_HAS_KCOV
++	depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
++	select DEBUG_FS
++	select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
++	help
++	  KCOV exposes kernel code coverage information in a form suitable
++	  for coverage-guided fuzzing (randomized testing).
++
++	  If RANDOMIZE_BASE is enabled, PC values will not be stable across
++	  different machines and across reboots. If you need stable PC values,
++	  disable RANDOMIZE_BASE.
++
++	  For more details, see Documentation/dev-tools/kcov.rst.
++
++config KCOV_ENABLE_COMPARISONS
++	bool "Enable comparison operands collection by KCOV"
++	depends on KCOV
++	depends on $(cc-option,-fsanitize-coverage=trace-cmp)
++	help
++	  KCOV also exposes operands of every comparison in the instrumented
++	  code along with operand sizes and PCs of the comparison instructions.
++	  These operands can be used by fuzzing engines to improve the quality
++	  of fuzzing coverage.
++
++config KCOV_INSTRUMENT_ALL
++	bool "Instrument all code by default"
++	depends on KCOV
++	default y
++	help
++	  If you are doing generic system call fuzzing (like e.g. syzkaller),
++	  then you will want to instrument the whole kernel and you should
++	  say y here. If you are doing more targeted fuzzing (like e.g.
++	  filesystem fuzzing with AFL) then you will want to enable coverage
++	  for more specific subsets of files, and should say n here.
++
++config DEBUG_SHIRQ
++	bool "Debug shared IRQ handlers"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to generate a spurious interrupt as soon as a shared
++	  interrupt handler is registered, and just before one is deregistered.
++	  Drivers ought to be able to handle interrupts coming in at those
++	  points; some don't and need to be caught.
++
++menu "Debug Lockups and Hangs"
++
++config LOCKUP_DETECTOR
++	bool
++
++config SOFTLOCKUP_DETECTOR
++	bool "Detect Soft Lockups"
++	depends on DEBUG_KERNEL && !S390
++	select LOCKUP_DETECTOR
++	help
++	  Say Y here to enable the kernel to act as a watchdog to detect
++	  soft lockups.
++
++	  Softlockups are bugs that cause the kernel to loop in kernel
++	  mode for more than 20 seconds, without giving other tasks a
++	  chance to run.  The current stack trace is displayed upon
++	  detection and the system will stay locked up.
++
++config BOOTPARAM_SOFTLOCKUP_PANIC
++	bool "Panic (Reboot) On Soft Lockups"
++	depends on SOFTLOCKUP_DETECTOR
++	help
++	  Say Y here to enable the kernel to panic on "soft lockups",
++	  which are bugs that cause the kernel to loop in kernel
++	  mode for more than 20 seconds (configurable using the watchdog_thresh
++	  sysctl), without giving other tasks a chance to run.
++
++	  The panic can be used in combination with panic_timeout,
++	  to cause the system to reboot automatically after a
++	  lockup has been detected. This feature is useful for
++	  high-availability systems that have uptime guarantees and
++	  where a lockup must be resolved ASAP.
++
++	  Say N if unsure.
++
++config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
++	int
++	depends on SOFTLOCKUP_DETECTOR
++	range 0 1
++	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
++	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
++
++config HARDLOCKUP_DETECTOR_PERF
++	bool
++	select SOFTLOCKUP_DETECTOR
++
++choice
++	prompt "aarch64 NMI watchdog method"
++	depends on ARM64
++	help
++	  Watchdog implementation method configuration.
++
++config SDEI_WATCHDOG
++	bool "SDEI NMI Watchdog support"
++	depends on ARM_SDE_INTERFACE && !HARDLOCKUP_CHECK_TIMESTAMP
++	select HAVE_HARDLOCKUP_DETECTOR_ARCH
++	select HARDLOCKUP_DETECTOR
++
++config PMU_WATCHDOG
++	bool "PMU NMI Watchdog support"
++	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
++	select HAVE_HARDLOCKUP_DETECTOR_PERF
++
++endchoice
++
++#
++# Enables a timestamp based low pass filter to compensate for perf based
++# hard lockup detection which runs too fast due to turbo modes.
++#
++config HARDLOCKUP_CHECK_TIMESTAMP
++	bool
++
++#
++# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
++# lockup detector rather than the perf based detector.
++#
++config HARDLOCKUP_DETECTOR
++	bool "Detect Hard Lockups"
++	depends on DEBUG_KERNEL && !S390
++	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
++	select SOFTLOCKUP_DETECTOR
++	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
++	select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
++	help
++	  Say Y here to enable the kernel to act as a watchdog to detect
++	  hard lockups.
++
++	  Hardlockups are bugs that cause the CPU to loop in kernel mode
++	  for more than 10 seconds, without letting other interrupts have a
++	  chance to run.  The current stack trace is displayed upon detection
++	  and the system will stay locked up.
++
++config BOOTPARAM_HARDLOCKUP_PANIC
++	bool "Panic (Reboot) On Hard Lockups"
++	depends on HARDLOCKUP_DETECTOR
++	help
++	  Say Y here to enable the kernel to panic on "hard lockups",
++	  which are bugs that cause the kernel to loop in kernel
++	  mode with interrupts disabled for more than 10 seconds (configurable
++	  using the watchdog_thresh sysctl).
++
++	  Say N if unsure.
++
++config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
++	int
++	depends on HARDLOCKUP_DETECTOR
++	range 0 1
++	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
++	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
++
++config DETECT_HUNG_TASK
++	bool "Detect Hung Tasks"
++	depends on DEBUG_KERNEL
++	default SOFTLOCKUP_DETECTOR
++	help
++	  Say Y here to enable the kernel to detect "hung tasks",
++	  which are bugs that cause the task to be stuck in
++	  uninterruptible "D" state indefinitely.
++
++	  When a hung task is detected, the kernel will print the
++	  current stack trace (which you should report), but the
++	  task will stay in uninterruptible state. If lockdep is
++	  enabled then all held locks will also be reported. This
++	  feature has negligible overhead.
++
++config DEFAULT_HUNG_TASK_TIMEOUT
++	int "Default timeout for hung task detection (in seconds)"
++	depends on DETECT_HUNG_TASK
++	default 120
++	help
++	  This option controls the default timeout (in seconds) used
++	  to determine when a task has become non-responsive and should
++	  be considered hung.
++
++	  It can be adjusted at runtime via the kernel.hung_task_timeout_secs
++	  sysctl or by writing a value to
++	  /proc/sys/kernel/hung_task_timeout_secs.
++
++	  A timeout of 0 disables the check.  The default is two minutes.
++	  Keeping the default should be fine in most cases.
++
++config BOOTPARAM_HUNG_TASK_PANIC
++	bool "Panic (Reboot) On Hung Tasks"
++	depends on DETECT_HUNG_TASK
++	help
++	  Say Y here to enable the kernel to panic on "hung tasks",
++	  which are bugs that cause the kernel to leave a task stuck
++	  in uninterruptible "D" state.
++
++	  The panic can be used in combination with panic_timeout,
++	  to cause the system to reboot automatically after a
++	  hung task has been detected. This feature is useful for
++	  high-availability systems that have uptime guarantees and
++	  where a hung tasks must be resolved ASAP.
++
++	  Say N if unsure.
++
++config BOOTPARAM_HUNG_TASK_PANIC_VALUE
++	int
++	depends on DETECT_HUNG_TASK
++	range 0 1
++	default 0 if !BOOTPARAM_HUNG_TASK_PANIC
++	default 1 if BOOTPARAM_HUNG_TASK_PANIC
++
++config WQ_WATCHDOG
++	bool "Detect Workqueue Stalls"
++	depends on DEBUG_KERNEL
++	help
++	  Say Y here to enable stall detection on workqueues.  If a
++	  worker pool doesn't make forward progress on a pending work
++	  item for over a given amount of time, 30s by default, a
++	  warning message is printed along with dump of workqueue
++	  state.  This can be configured through kernel parameter
++	  "workqueue.watchdog_thresh" and its sysfs counterpart.
++
++endmenu # "Debug lockups and hangs"
++
++config PANIC_ON_OOPS
++	bool "Panic on Oops"
++	help
++	  Say Y here to enable the kernel to panic when it oopses. This
++	  has the same effect as setting oops=panic on the kernel command
++	  line.
++
++	  This feature is useful to ensure that the kernel does not do
++	  anything erroneous after an oops which could result in data
++	  corruption or other issues.
++
++	  Say N if unsure.
++
++config PANIC_ON_OOPS_VALUE
++	int
++	range 0 1
++	default 0 if !PANIC_ON_OOPS
++	default 1 if PANIC_ON_OOPS
++
++config PANIC_TIMEOUT
++	int "panic timeout"
++	default 0
++	help
++	  Set the timeout value (in seconds) until a reboot occurs when the
++	  the kernel panics. If n = 0, then we wait forever. A timeout
++	  value n > 0 will wait n seconds before rebooting, while a timeout
++	  value n < 0 will reboot immediately.
++
++config SCHED_DEBUG
++	bool "Collect scheduler debugging info"
++	depends on DEBUG_KERNEL && PROC_FS
++	default y
++	help
++	  If you say Y here, the /proc/sched_debug file will be provided
++	  that can help debug the scheduler. The runtime overhead of this
++	  option is minimal.
++
++config SCHED_INFO
++	bool
++	default n
++
++config SCHEDSTATS
++	bool "Collect scheduler statistics"
++	depends on DEBUG_KERNEL && PROC_FS
++	select SCHED_INFO
++	help
++	  If you say Y here, additional code will be inserted into the
++	  scheduler and related routines to collect statistics about
++	  scheduler behavior and provide them in /proc/schedstat.  These
++	  stats may be useful for both tuning and debugging the scheduler
++	  If you aren't debugging the scheduler or trying to tune a specific
++	  application, you can say N to avoid the very slight overhead
++	  this adds.
++
++config SCHED_STACK_END_CHECK
++	bool "Detect stack corruption on calls to schedule()"
++	depends on DEBUG_KERNEL
++	default n
++	help
++	  This option checks for a stack overrun on calls to schedule().
++	  If the stack end location is found to be over written always panic as
++	  the content of the corrupted region can no longer be trusted.
++	  This is to ensure no erroneous behaviour occurs which could result in
++	  data corruption or a sporadic crash at a later stage once the region
++	  is examined. The runtime overhead introduced is minimal.
++
++config DEBUG_TIMEKEEPING
++	bool "Enable extra timekeeping sanity checking"
++	help
++	  This option will enable additional timekeeping sanity checks
++	  which may be helpful when diagnosing issues where timekeeping
++	  problems are suspected.
++
++	  This may include checks in the timekeeping hotpaths, so this
++	  option may have a (very small) performance impact to some
++	  workloads.
++
++	  If unsure, say N.
++
++config DEBUG_PREEMPT
++	bool "Debug preemptible kernel"
++	depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
++	default y
++	help
++	  If you say Y here then the kernel will use a debug variant of the
++	  commonly used smp_processor_id() function and will print warnings
++	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
++	  will detect preemption count underflows.
++
++menu "Lock Debugging (spinlocks, mutexes, etc...)"
++
++config LOCK_DEBUGGING_SUPPORT
++	bool
++	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
++	default y
++
++config PROVE_LOCKING
++	bool "Lock debugging: prove locking correctness"
++	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
++	select LOCKDEP
++	select DEBUG_SPINLOCK
++	select DEBUG_MUTEXES
++	select DEBUG_RT_MUTEXES if RT_MUTEXES
++	select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
++	select DEBUG_WW_MUTEX_SLOWPATH
++	select DEBUG_LOCK_ALLOC
++	select TRACE_IRQFLAGS
++	default n
++	help
++	 This feature enables the kernel to prove that all locking
++	 that occurs in the kernel runtime is mathematically
++	 correct: that under no circumstance could an arbitrary (and
++	 not yet triggered) combination of observed locking
++	 sequences (on an arbitrary number of CPUs, running an
++	 arbitrary number of tasks and interrupt contexts) cause a
++	 deadlock.
++
++	 In short, this feature enables the kernel to report locking
++	 related deadlocks before they actually occur.
++
++	 The proof does not depend on how hard and complex a
++	 deadlock scenario would be to trigger: how many
++	 participant CPUs, tasks and irq-contexts would be needed
++	 for it to trigger. The proof also does not depend on
++	 timing: if a race and a resulting deadlock is possible
++	 theoretically (no matter how unlikely the race scenario
++	 is), it will be proven so and will immediately be
++	 reported by the kernel (once the event is observed that
++	 makes the deadlock theoretically possible).
++
++	 If a deadlock is impossible (i.e. the locking rules, as
++	 observed by the kernel, are mathematically correct), the
++	 kernel reports nothing.
++
++	 NOTE: this feature can also be enabled for rwlocks, mutexes
++	 and rwsems - in which case all dependencies between these
++	 different locking variants are observed and mapped too, and
++	 the proof of observed correctness is also maintained for an
++	 arbitrary combination of these separate locking variants.
++
++	 For more details, see Documentation/locking/lockdep-design.txt.
++
++config LOCK_STAT
++	bool "Lock usage statistics"
++	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
++	select LOCKDEP
++	select DEBUG_SPINLOCK
++	select DEBUG_MUTEXES
++	select DEBUG_RT_MUTEXES if RT_MUTEXES
++	select DEBUG_LOCK_ALLOC
++	default n
++	help
++	 This feature enables tracking lock contention points
++
++	 For more details, see Documentation/locking/lockstat.txt
++
++	 This also enables lock events required by "perf lock",
++	 subcommand of perf.
++	 If you want to use "perf lock", you also need to turn on
++	 CONFIG_EVENT_TRACING.
++
++	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
++	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
++
++config DEBUG_RT_MUTEXES
++	bool "RT Mutex debugging, deadlock detection"
++	depends on DEBUG_KERNEL && RT_MUTEXES
++	help
++	 This allows rt mutex semantics violations and rt mutex related
++	 deadlocks (lockups) to be detected and reported automatically.
++
++config DEBUG_SPINLOCK
++	bool "Spinlock and rw-lock debugging: basic checks"
++	depends on DEBUG_KERNEL
++	select UNINLINE_SPIN_UNLOCK
++	help
++	  Say Y here and build SMP to catch missing spinlock initialization
++	  and certain other kinds of spinlock errors commonly made.  This is
++	  best used in conjunction with the NMI watchdog so that spinlock
++	  deadlocks are also debuggable.
++
++config DEBUG_MUTEXES
++	bool "Mutex debugging: basic checks"
++	depends on DEBUG_KERNEL
++	help
++	 This feature allows mutex semantics violations to be detected and
++	 reported.
++
++config DEBUG_WW_MUTEX_SLOWPATH
++	bool "Wait/wound mutex debugging: Slowpath testing"
++	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
++	select DEBUG_LOCK_ALLOC
++	select DEBUG_SPINLOCK
++	select DEBUG_MUTEXES
++	help
++	 This feature enables slowpath testing for w/w mutex users by
++	 injecting additional -EDEADLK wound/backoff cases. Together with
++	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
++	 will test all possible w/w mutex interface abuse with the
++	 exception of simply not acquiring all the required locks.
++	 Note that this feature can introduce significant overhead, so
++	 it really should not be enabled in a production or distro kernel,
++	 even a debug kernel.  If you are a driver writer, enable it.  If
++	 you are a distro, do not.
++
++config DEBUG_RWSEMS
++	bool "RW Semaphore debugging: basic checks"
++	depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
++	help
++	  This debugging feature allows mismatched rw semaphore locks and unlocks
++	  to be detected and reported.
++
++config DEBUG_LOCK_ALLOC
++	bool "Lock debugging: detect incorrect freeing of live locks"
++	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
++	select DEBUG_SPINLOCK
++	select DEBUG_MUTEXES
++	select DEBUG_RT_MUTEXES if RT_MUTEXES
++	select LOCKDEP
++	help
++	 This feature will check whether any held lock (spinlock, rwlock,
++	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
++	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
++	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
++	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
++	 held during task exit.
++
++config LOCKDEP
++	bool
++	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
++	select STACKTRACE
++	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
++	select KALLSYMS
++	select KALLSYMS_ALL
++
++config LOCKDEP_SMALL
++	bool
++
++config DEBUG_LOCKDEP
++	bool "Lock dependency engine debugging"
++	depends on DEBUG_KERNEL && LOCKDEP
++	help
++	  If you say Y here, the lock dependency engine will do
++	  additional runtime checks to debug itself, at the price
++	  of more runtime overhead.
++
++config DEBUG_ATOMIC_SLEEP
++	bool "Sleep inside atomic section checking"
++	select PREEMPT_COUNT
++	depends on DEBUG_KERNEL
++	depends on !ARCH_NO_PREEMPT
++	help
++	  If you say Y here, various routines which may sleep will become very
++	  noisy if they are called inside atomic sections: when a spinlock is
++	  held, inside an rcu read side critical section, inside preempt disabled
++	  sections, inside an interrupt, etc...
++
++config DEBUG_LOCKING_API_SELFTESTS
++	bool "Locking API boot-time self-tests"
++	depends on DEBUG_KERNEL
++	help
++	  Say Y here if you want the kernel to run a short self-test during
++	  bootup. The self-test checks whether common types of locking bugs
++	  are detected by debugging mechanisms or not. (if you disable
++	  lock debugging then those bugs wont be detected of course.)
++	  The following locking APIs are covered: spinlocks, rwlocks,
++	  mutexes and rwsems.
++
++config LOCK_TORTURE_TEST
++	tristate "torture tests for locking"
++	depends on DEBUG_KERNEL
++	select TORTURE_TEST
++	help
++	  This option provides a kernel module that runs torture tests
++	  on kernel locking primitives.  The kernel module may be built
++	  after the fact on the running kernel to be tested, if desired.
++
++	  Say Y here if you want kernel locking-primitive torture tests
++	  to be built into the kernel.
++	  Say M if you want these torture tests to build as a module.
++	  Say N if you are unsure.
++
++config WW_MUTEX_SELFTEST
++	tristate "Wait/wound mutex selftests"
++	help
++	  This option provides a kernel module that runs tests on the
++	  on the struct ww_mutex locking API.
++
++	  It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction
++	  with this test harness.
++
++	  Say M if you want these self tests to build as a module.
++	  Say N if you are unsure.
++
++endmenu # lock debugging
++
++config TRACE_IRQFLAGS
++	bool
++	help
++	  Enables hooks to interrupt enabling and disabling for
++	  either tracing or lock debugging.
++
++config STACKTRACE
++	bool "Stack backtrace support"
++	depends on STACKTRACE_SUPPORT
++	help
++	  This option causes the kernel to create a /proc/pid/stack for
++	  every process, showing its current stack trace.
++	  It is also used by various kernel debugging features that require
++	  stack trace generation.
++
++config WARN_ALL_UNSEEDED_RANDOM
++	bool "Warn for all uses of unseeded randomness"
++	default n
++	help
++	  Some parts of the kernel contain bugs relating to their use of
++	  cryptographically secure random numbers before it's actually possible
++	  to generate those numbers securely. This setting ensures that these
++	  flaws don't go unnoticed, by enabling a message, should this ever
++	  occur. This will allow people with obscure setups to know when things
++	  are going wrong, so that they might contact developers about fixing
++	  it.
++
++	  Unfortunately, on some models of some architectures getting
++	  a fully seeded CRNG is extremely difficult, and so this can
++	  result in dmesg getting spammed for a surprisingly long
++	  time.  This is really bad from a security perspective, and
++	  so architecture maintainers really need to do what they can
++	  to get the CRNG seeded sooner after the system is booted.
++	  However, since users cannot do anything actionable to
++	  address this, by default the kernel will issue only a single
++	  warning for the first use of unseeded randomness.
++
++	  Say Y here if you want to receive warnings for all uses of
++	  unseeded randomness.  This will be of use primarily for
++	  those developers interested in improving the security of
++	  Linux kernels running on their architecture (or
++	  subarchitecture).
++
++config DEBUG_KOBJECT
++	bool "kobject debugging"
++	depends on DEBUG_KERNEL
++	help
++	  If you say Y here, some extra kobject debugging messages will be sent
++	  to the syslog. 
++
++config DEBUG_KOBJECT_RELEASE
++	bool "kobject release debugging"
++	depends on DEBUG_OBJECTS_TIMERS
++	help
++	  kobjects are reference counted objects.  This means that their
++	  last reference count put is not predictable, and the kobject can
++	  live on past the point at which a driver decides to drop it's
++	  initial reference to the kobject gained on allocation.  An
++	  example of this would be a struct device which has just been
++	  unregistered.
++
++	  However, some buggy drivers assume that after such an operation,
++	  the memory backing the kobject can be immediately freed.  This
++	  goes completely against the principles of a refcounted object.
++
++	  If you say Y here, the kernel will delay the release of kobjects
++	  on the last reference count to improve the visibility of this
++	  kind of kobject release bug.
++
++config HAVE_DEBUG_BUGVERBOSE
++	bool
++
++config DEBUG_BUGVERBOSE
++	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT
++	depends on BUG && (GENERIC_BUG || HAVE_DEBUG_BUGVERBOSE)
++	default y
++	help
++	  Say Y here to make BUG() panics output the file name and line number
++	  of the BUG call as well as the EIP and oops trace.  This aids
++	  debugging but costs about 70-100K of memory.
++
++config DEBUG_LIST
++	bool "Debug linked list manipulation"
++	depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
++	help
++	  Enable this to turn on extended checks in the linked-list
++	  walking routines.
++
++	  If unsure, say N.
++
++config DEBUG_PI_LIST
++	bool "Debug priority linked list manipulation"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to turn on extended checks in the priority-ordered
++	  linked-list (plist) walking routines.  This checks the entire
++	  list multiple times during each manipulation.
++
++	  If unsure, say N.
++
++config DEBUG_SG
++	bool "Debug SG table operations"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to turn on checks on scatter-gather tables. This can
++	  help find problems with drivers that do not properly initialize
++	  their sg tables.
++
++	  If unsure, say N.
++
++config DEBUG_NOTIFIERS
++	bool "Debug notifier call chains"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to turn on sanity checking for notifier call chains.
++	  This is most useful for kernel developers to make sure that
++	  modules properly unregister themselves from notifier chains.
++	  This is a relatively cheap check but if you care about maximum
++	  performance, say N.
++
++config DEBUG_CREDENTIALS
++	bool "Debug credential management"
++	depends on DEBUG_KERNEL
++	help
++	  Enable this to turn on some debug checking for credential
++	  management.  The additional code keeps track of the number of
++	  pointers from task_structs to any given cred struct, and checks to
++	  see that this number never exceeds the usage count of the cred
++	  struct.
++
++	  Furthermore, if SELinux is enabled, this also checks that the
++	  security pointer in the cred struct is never seen to be invalid.
++
++	  If unsure, say N.
++
++source "kernel/rcu/Kconfig.debug"
++
++config DEBUG_WQ_FORCE_RR_CPU
++	bool "Force round-robin CPU selection for unbound work items"
++	depends on DEBUG_KERNEL
++	default n
++	help
++	  Workqueue used to implicitly guarantee that work items queued
++	  without explicit CPU specified are put on the local CPU.  This
++	  guarantee is no longer true and while local CPU is still
++	  preferred work items may be put on foreign CPUs.  Kernel
++	  parameter "workqueue.debug_force_rr_cpu" is added to force
++	  round-robin CPU selection to flush out usages which depend on the
++	  now broken guarantee.  This config option enables the debug
++	  feature by default.  When enabled, memory and cache locality will
++	  be impacted.
++
++config DEBUG_BLOCK_EXT_DEVT
++        bool "Force extended block device numbers and spread them"
++	depends on DEBUG_KERNEL
++	depends on BLOCK
++	default n
++	help
++	  BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
++	  SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
++	  YOU ARE DOING.  Distros, please enable this and fix whatever
++	  is broken.
++
++	  Conventionally, block device numbers are allocated from
++	  predetermined contiguous area.  However, extended block area
++	  may introduce non-contiguous block device numbers.  This
++	  option forces most block device numbers to be allocated from
++	  the extended space and spreads them to discover kernel or
++	  userland code paths which assume predetermined contiguous
++	  device number allocation.
++
++	  Note that turning on this debug option shuffles all the
++	  device numbers for all IDE and SCSI devices including libata
++	  ones, so root partition specified using device number
++	  directly (via rdev or root=MAJ:MIN) won't work anymore.
++	  Textual device names (root=/dev/sdXn) will continue to work.
++
++	  Say N if you are unsure.
++
++config CPU_HOTPLUG_STATE_CONTROL
++	bool "Enable CPU hotplug state control"
++	depends on DEBUG_KERNEL
++	depends on HOTPLUG_CPU
++	default n
++	help
++	  Allows to write steps between "offline" and "online" to the CPUs
++	  sysfs target file so states can be stepped granular. This is a debug
++	  option for now as the hotplug machinery cannot be stopped and
++	  restarted at arbitrary points yet.
++
++	  Say N if your are unsure.
++
++config NOTIFIER_ERROR_INJECTION
++	tristate "Notifier error injection"
++	depends on DEBUG_KERNEL
++	select DEBUG_FS
++	help
++	  This option provides the ability to inject artificial errors to
++	  specified notifier chain callbacks. It is useful to test the error
++	  handling of notifier call chain failures.
++
++	  Say N if unsure.
++
++config PM_NOTIFIER_ERROR_INJECT
++	tristate "PM notifier error injection module"
++	depends on PM && NOTIFIER_ERROR_INJECTION
++	default m if PM_DEBUG
++	help
++	  This option provides the ability to inject artificial errors to
++	  PM notifier chain callbacks.  It is controlled through debugfs
++	  interface /sys/kernel/debug/notifier-error-inject/pm
++
++	  If the notifier call chain should be failed with some events
++	  notified, write the error code to "actions/<notifier event>/error".
++
++	  Example: Inject PM suspend error (-12 = -ENOMEM)
++
++	  # cd /sys/kernel/debug/notifier-error-inject/pm/
++	  # echo -12 > actions/PM_SUSPEND_PREPARE/error
++	  # echo mem > /sys/power/state
++	  bash: echo: write error: Cannot allocate memory
++
++	  To compile this code as a module, choose M here: the module will
++	  be called pm-notifier-error-inject.
++
++	  If unsure, say N.
++
++config OF_RECONFIG_NOTIFIER_ERROR_INJECT
++	tristate "OF reconfig notifier error injection module"
++	depends on OF_DYNAMIC && NOTIFIER_ERROR_INJECTION
++	help
++	  This option provides the ability to inject artificial errors to
++	  OF reconfig notifier chain callbacks.  It is controlled
++	  through debugfs interface under
++	  /sys/kernel/debug/notifier-error-inject/OF-reconfig/
++
++	  If the notifier call chain should be failed with some events
++	  notified, write the error code to "actions/<notifier event>/error".
++
++	  To compile this code as a module, choose M here: the module will
++	  be called of-reconfig-notifier-error-inject.
++
++	  If unsure, say N.
++
++config NETDEV_NOTIFIER_ERROR_INJECT
++	tristate "Netdev notifier error injection module"
++	depends on NET && NOTIFIER_ERROR_INJECTION
++	help
++	  This option provides the ability to inject artificial errors to
++	  netdevice notifier chain callbacks.  It is controlled through debugfs
++	  interface /sys/kernel/debug/notifier-error-inject/netdev
++
++	  If the notifier call chain should be failed with some events
++	  notified, write the error code to "actions/<notifier event>/error".
++
++	  Example: Inject netdevice mtu change error (-22 = -EINVAL)
++
++	  # cd /sys/kernel/debug/notifier-error-inject/netdev
++	  # echo -22 > actions/NETDEV_CHANGEMTU/error
++	  # ip link set eth0 mtu 1024
++	  RTNETLINK answers: Invalid argument
++
++	  To compile this code as a module, choose M here: the module will
++	  be called netdev-notifier-error-inject.
++
++	  If unsure, say N.
++
++config FUNCTION_ERROR_INJECTION
++	def_bool y
++	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
++
++config FAULT_INJECTION
++	bool "Fault-injection framework"
++	depends on DEBUG_KERNEL
++	help
++	  Provide fault-injection framework.
++	  For more details, see Documentation/fault-injection/.
++
++config FAILSLAB
++	bool "Fault-injection capability for kmalloc"
++	depends on FAULT_INJECTION
++	depends on SLAB || SLUB
++	help
++	  Provide fault-injection capability for kmalloc.
++
++config FAIL_PAGE_ALLOC
++	bool "Fault-injection capabilitiy for alloc_pages()"
++	depends on FAULT_INJECTION
++	help
++	  Provide fault-injection capability for alloc_pages().
++
++config FAIL_MAKE_REQUEST
++	bool "Fault-injection capability for disk IO"
++	depends on FAULT_INJECTION && BLOCK
++	help
++	  Provide fault-injection capability for disk IO.
++
++config FAIL_IO_TIMEOUT
++	bool "Fault-injection capability for faking disk interrupts"
++	depends on FAULT_INJECTION && BLOCK
++	help
++	  Provide fault-injection capability on end IO handling. This
++	  will make the block layer "forget" an interrupt as configured,
++	  thus exercising the error handling.
++
++	  Only works with drivers that use the generic timeout handling,
++	  for others it wont do anything.
++
++config FAIL_FUTEX
++	bool "Fault-injection capability for futexes"
++	select DEBUG_FS
++	depends on FAULT_INJECTION && FUTEX
++	help
++	  Provide fault-injection capability for futexes.
++
++config FAULT_INJECTION_DEBUG_FS
++	bool "Debugfs entries for fault-injection capabilities"
++	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
++	help
++	  Enable configuration of fault-injection capabilities via debugfs.
++
++config FAIL_FUNCTION
++	bool "Fault-injection capability for functions"
++	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
++	help
++	  Provide function-based fault-injection capability.
++	  This will allow you to override a specific function with a return
++	  with given return value. As a result, function caller will see
++	  an error value and have to handle it. This is useful to test the
++	  error handling in various subsystems.
++
++config FAIL_MMC_REQUEST
++	bool "Fault-injection capability for MMC IO"
++	depends on FAULT_INJECTION_DEBUG_FS && MMC
++	help
++	  Provide fault-injection capability for MMC IO.
++	  This will make the mmc core return data errors. This is
++	  useful to test the error handling in the mmc block device
++	  and to test how the mmc host driver handles retries from
++	  the block device.
++
++config FAULT_INJECTION_STACKTRACE_FILTER
++	bool "stacktrace filter for fault-injection capabilities"
++	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
++	depends on !X86_64
++	select STACKTRACE
++	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
++	help
++	  Provide stacktrace filter for fault-injection capabilities
++
++config LATENCYTOP
++	bool "Latency measuring infrastructure"
++	depends on DEBUG_KERNEL
++	depends on STACKTRACE_SUPPORT
++	depends on PROC_FS
++	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
++	select KALLSYMS
++	select KALLSYMS_ALL
++	select STACKTRACE
++	select SCHEDSTATS
++	select SCHED_DEBUG
++	help
++	  Enable this option if you want to use the LatencyTOP tool
++	  to find out which userspace is blocking on what kernel operations.
++
++source kernel/trace/Kconfig
++
++config PROVIDE_OHCI1394_DMA_INIT
++	bool "Remote debugging over FireWire early on boot"
++	depends on PCI && X86
++	help
++	  If you want to debug problems which hang or crash the kernel early
++	  on boot and the crashing machine has a FireWire port, you can use
++	  this feature to remotely access the memory of the crashed machine
++	  over FireWire. This employs remote DMA as part of the OHCI1394
++	  specification which is now the standard for FireWire controllers.
++
++	  With remote DMA, you can monitor the printk buffer remotely using
++	  firescope and access all memory below 4GB using fireproxy from gdb.
++	  Even controlling a kernel debugger is possible using remote DMA.
++
++	  Usage:
++
++	  If ohci1394_dma=early is used as boot parameter, it will initialize
++	  all OHCI1394 controllers which are found in the PCI config space.
++
++	  As all changes to the FireWire bus such as enabling and disabling
++	  devices cause a bus reset and thereby disable remote DMA for all
++	  devices, be sure to have the cable plugged and FireWire enabled on
++	  the debugging host before booting the debug target for debugging.
++
++	  This code (~1k) is freed after boot. By then, the firewire stack
++	  in charge of the OHCI-1394 controllers should be used instead.
++
++	  See Documentation/debugging-via-ohci1394.txt for more information.
++
++config DMA_API_DEBUG
++	bool "Enable debugging of DMA-API usage"
++	select NEED_DMA_MAP_STATE
++	help
++	  Enable this option to debug the use of the DMA API by device drivers.
++	  With this option you will be able to detect common bugs in device
++	  drivers like double-freeing of DMA mappings or freeing mappings that
++	  were never allocated.
++
++	  This also attempts to catch cases where a page owned by DMA is
++	  accessed by the cpu in a way that could cause data corruption.  For
++	  example, this enables cow_user_page() to check that the source page is
++	  not undergoing DMA.
++
++	  This option causes a performance degradation.  Use only if you want to
++	  debug device drivers and dma interactions.
++
++	  If unsure, say N.
++
++config DMA_API_DEBUG_SG
++	bool "Debug DMA scatter-gather usage"
++	default y
++	depends on DMA_API_DEBUG
++	help
++	  Perform extra checking that callers of dma_map_sg() have respected the
++	  appropriate segment length/boundary limits for the given device when
++	  preparing DMA scatterlists.
++
++	  This is particularly likely to have been overlooked in cases where the
++	  dma_map_sg() API is used for general bulk mapping of pages rather than
++	  preparing literal scatter-gather descriptors, where there is a risk of
++	  unexpected behaviour from DMA API implementations if the scatterlist
++	  is technically out-of-spec.
++
++	  If unsure, say N.
++
++menuconfig RUNTIME_TESTING_MENU
++	bool "Runtime Testing"
++	def_bool y
++
++if RUNTIME_TESTING_MENU
++
++config LKDTM
++	tristate "Linux Kernel Dump Test Tool Module"
++	depends on DEBUG_FS
++	depends on BLOCK
++	help
++	This module enables testing of the different dumping mechanisms by
++	inducing system failures at predefined crash points.
++	If you don't need it: say N
++	Choose M here to compile this code as a module. The module will be
++	called lkdtm.
++
++	Documentation on how to use the module can be found in
++	Documentation/fault-injection/provoke-crashes.txt
++
++config TEST_LIST_SORT
++	tristate "Linked list sorting test"
++	depends on DEBUG_KERNEL || m
++	help
++	  Enable this to turn on 'list_sort()' function test. This test is
++	  executed only once during system boot (so affects only boot time),
++	  or at module load time.
++
++	  If unsure, say N.
++
++config TEST_SORT
++	tristate "Array-based sort test"
++	depends on DEBUG_KERNEL || m
++	help
++	  This option enables the self-test function of 'sort()' at boot,
++	  or at module load time.
++
++	  If unsure, say N.
++
++config KPROBES_SANITY_TEST
++	bool "Kprobes sanity tests"
++	depends on DEBUG_KERNEL
++	depends on KPROBES
++	help
++	  This option provides for testing basic kprobes functionality on
++	  boot. Samples of kprobe and kretprobe are inserted and
++	  verified for functionality.
++
++	  Say N if you are unsure.
++
++config BACKTRACE_SELF_TEST
++	tristate "Self test for the backtrace code"
++	depends on DEBUG_KERNEL
++	help
++	  This option provides a kernel module that can be used to test
++	  the kernel stack backtrace code. This option is not useful
++	  for distributions or general kernels, but only for kernel
++	  developers working on architecture code.
++
++	  Note that if you want to also test saved backtraces, you will
++	  have to enable STACKTRACE as well.
++
++	  Say N if you are unsure.
++
++config RBTREE_TEST
++	tristate "Red-Black tree test"
++	depends on DEBUG_KERNEL
++	help
++	  A benchmark measuring the performance of the rbtree library.
++	  Also includes rbtree invariant checks.
++
++config INTERVAL_TREE_TEST
++	tristate "Interval tree test"
++	depends on DEBUG_KERNEL
++	select INTERVAL_TREE
++	help
++	  A benchmark measuring the performance of the interval tree library
++
++config PERCPU_TEST
++	tristate "Per cpu operations test"
++	depends on m && DEBUG_KERNEL
++	help
++	  Enable this option to build test module which validates per-cpu
++	  operations.
++
++	  If unsure, say N.
++
++config ATOMIC64_SELFTEST
++	tristate "Perform an atomic64_t self-test"
++	help
++	  Enable this option to test the atomic64_t functions at boot or
++	  at module load time.
++
++	  If unsure, say N.
++
++config ASYNC_RAID6_TEST
++	tristate "Self test for hardware accelerated raid6 recovery"
++	depends on ASYNC_RAID6_RECOV
++	select ASYNC_MEMCPY
++	---help---
++	  This is a one-shot self test that permutes through the
++	  recovery of all the possible two disk failure scenarios for a
++	  N-disk array.  Recovery is performed with the asynchronous
++	  raid6 recovery routines, and will optionally use an offload
++	  engine if one is available.
++
++	  If unsure, say N.
++
++config TEST_HEXDUMP
++	tristate "Test functions located in the hexdump module at runtime"
++
++config TEST_STRING_HELPERS
++	tristate "Test functions located in the string_helpers module at runtime"
++
++config TEST_KSTRTOX
++	tristate "Test kstrto*() family of functions at runtime"
++
++config TEST_PRINTF
++	tristate "Test printf() family of functions at runtime"
++
++config TEST_BITMAP
++	tristate "Test bitmap_*() family of functions at runtime"
++	help
++	  Enable this option to test the bitmap functions at boot.
++
++	  If unsure, say N.
++
++config TEST_BITFIELD
++	tristate "Test bitfield functions at runtime"
++	help
++	  Enable this option to test the bitfield functions at boot.
++
++	  If unsure, say N.
++
++config TEST_UUID
++	tristate "Test functions located in the uuid module at runtime"
++
++config TEST_OVERFLOW
++	tristate "Test check_*_overflow() functions at runtime"
++
++config TEST_RHASHTABLE
++	tristate "Perform selftest on resizable hash table"
++	help
++	  Enable this option to test the rhashtable functions at boot.
++
++	  If unsure, say N.
++
++config TEST_HASH
++	tristate "Perform selftest on hash functions"
++	help
++	  Enable this option to test the kernel's integer (<linux/hash.h>),
++	  string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
++	  hash functions on boot (or module load).
++
++	  This is intended to help people writing architecture-specific
++	  optimized versions.  If unsure, say N.
++
++config TEST_IDA
++	tristate "Perform selftest on IDA functions"
++
++config TEST_PARMAN
++	tristate "Perform selftest on priority array manager"
++	depends on PARMAN
++	help
++	  Enable this option to test priority array manager on boot
++	  (or module load).
++
++	  If unsure, say N.
++
++config TEST_LKM
++	tristate "Test module loading with 'hello world' module"
++	depends on m
++	help
++	  This builds the "test_module" module that emits "Hello, world"
++	  on printk when loaded. It is designed to be used for basic
++	  evaluation of the module loading subsystem (for example when
++	  validating module verification). It lacks any extra dependencies,
++	  and will not normally be loaded by the system unless explicitly
++	  requested by name.
++
++	  If unsure, say N.
++
++config TEST_USER_COPY
++	tristate "Test user/kernel boundary protections"
++	depends on m
++	help
++	  This builds the "test_user_copy" module that runs sanity checks
++	  on the copy_to/from_user infrastructure, making sure basic
++	  user/kernel boundary testing is working. If it fails to load,
++	  a regression has been detected in the user/kernel memory boundary
++	  protections.
++
++	  If unsure, say N.
++
++config TEST_BPF
++	tristate "Test BPF filter functionality"
++	depends on m && NET
++	help
++	  This builds the "test_bpf" module that runs various test vectors
++	  against the BPF interpreter or BPF JIT compiler depending on the
++	  current setting. This is in particular useful for BPF JIT compiler
++	  development, but also to run regression tests against changes in
++	  the interpreter code. It also enables test stubs for eBPF maps and
++	  verifier used by user space verifier testsuite.
++
++	  If unsure, say N.
++
++config FIND_BIT_BENCHMARK
++	tristate "Test find_bit functions"
++	help
++	  This builds the "test_find_bit" module that measure find_*_bit()
++	  functions performance.
++
++	  If unsure, say N.
++
++config TEST_FIRMWARE
++	tristate "Test firmware loading via userspace interface"
++	depends on FW_LOADER
++	help
++	  This builds the "test_firmware" module that creates a userspace
++	  interface for testing firmware loading. This can be used to
++	  control the triggering of firmware loading without needing an
++	  actual firmware-using device. The contents can be rechecked by
++	  userspace.
++
++	  If unsure, say N.
++
++config TEST_SYSCTL
++	tristate "sysctl test driver"
++	depends on PROC_SYSCTL
++	help
++	  This builds the "test_sysctl" module. This driver enables to test the
++	  proc sysctl interfaces available to drivers safely without affecting
++	  production knobs which might alter system functionality.
++
++	  If unsure, say N.
++
++config TEST_UDELAY
++	tristate "udelay test driver"
++	help
++	  This builds the "udelay_test" module that helps to make sure
++	  that udelay() is working properly.
++
++	  If unsure, say N.
++
++config TEST_STATIC_KEYS
++	tristate "Test static keys"
++	depends on m
++	help
++	  Test the static key interfaces.
++
++	  If unsure, say N.
++
++config TEST_KMOD
++	tristate "kmod stress tester"
++	depends on m
++	depends on BLOCK && (64BIT || LBDAF)	  # for XFS, BTRFS
++	depends on NETDEVICES && NET_CORE && INET # for TUN
++	depends on BLOCK
++	select TEST_LKM
++	select XFS_FS
++	select TUN
++	select BTRFS_FS
++	help
++	  Test the kernel's module loading mechanism: kmod. kmod implements
++	  support to load modules using the Linux kernel's usermode helper.
++	  This test provides a series of tests against kmod.
++
++	  Although technically you can either build test_kmod as a module or
++	  into the kernel we disallow building it into the kernel since
++	  it stress tests request_module() and this will very likely cause
++	  some issues by taking over precious threads available from other
++	  module load requests, ultimately this could be fatal.
++
++	  To run tests run:
++
++	  tools/testing/selftests/kmod/kmod.sh --help
++
++	  If unsure, say N.
++
++config TEST_DEBUG_VIRTUAL
++	tristate "Test CONFIG_DEBUG_VIRTUAL feature"
++	depends on DEBUG_VIRTUAL
++	help
++	  Test the kernel's ability to detect incorrect calls to
++	  virt_to_phys() done against the non-linear part of the
++	  kernel's virtual address map.
++
++	  If unsure, say N.
++
++endif # RUNTIME_TESTING_MENU
++
++config MEMTEST
++	bool "Memtest"
++	depends on HAVE_MEMBLOCK
++	---help---
++	  This option adds a kernel parameter 'memtest', which allows memtest
++	  to be set.
++	        memtest=0, mean disabled; -- default
++	        memtest=1, mean do 1 test pattern;
++	        ...
++	        memtest=17, mean do 17 test patterns.
++	  If you are unsure how to answer this question, answer N.
++
++config BUG_ON_DATA_CORRUPTION
++	bool "Trigger a BUG when data corruption is detected"
++	select DEBUG_LIST
++	help
++	  Select this option if the kernel should BUG when it encounters
++	  data corruption in kernel memory structures when they get checked
++	  for validity.
++
++	  If unsure, say N.
++
++source "samples/Kconfig"
++
++source "lib/Kconfig.kgdb"
++
++source "lib/Kconfig.ubsan"
++
++config ARCH_HAS_DEVMEM_IS_ALLOWED
++	bool
++
++config STRICT_DEVMEM
++	bool "Filter access to /dev/mem"
++	depends on MMU && DEVMEM
++	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
++	default y if PPC || X86 || ARM64
++	---help---
++	  If this option is disabled, you allow userspace (root) access to all
++	  of memory, including kernel and userspace memory. Accidental
++	  access to this is obviously disastrous, but specific access can
++	  be used by people debugging the kernel. Note that with PAT support
++	  enabled, even in this case there are restrictions on /dev/mem
++	  use due to the cache aliasing requirements.
++
++	  If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
++	  file only allows userspace access to PCI space and the BIOS code and
++	  data regions.  This is sufficient for dosemu and X and all common
++	  users of /dev/mem.
++
++	  If in doubt, say Y.
++
++config IO_STRICT_DEVMEM
++	bool "Filter I/O access to /dev/mem"
++	depends on STRICT_DEVMEM
++	---help---
++	  If this option is disabled, you allow userspace (root) access to all
++	  io-memory regardless of whether a driver is actively using that
++	  range.  Accidental access to this is obviously disastrous, but
++	  specific access can be used by people debugging kernel drivers.
++
++	  If this option is switched on, the /dev/mem file only allows
++	  userspace access to *idle* io-memory ranges (see /proc/iomem) This
++	  may break traditional users of /dev/mem (dosemu, legacy X, etc...)
++	  if the driver using a given range cannot be disabled.
++
++	  If in doubt, say Y.
++
++source "arch/$(SRCARCH)/Kconfig.debug"
++
++endmenu # Kernel hacking
+diff -uprN kernel/lib/smp_processor_id.c kernel_new/lib/smp_processor_id.c
+--- kernel/lib/smp_processor_id.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/lib/smp_processor_id.c	2021-04-01 18:28:07.816863108 +0800
+@@ -6,12 +6,19 @@
+  */
+ #include <linux/export.h>
+ #include <linux/sched.h>
++#include <linux/ipipe.h>
+ 
+ notrace static unsigned int check_preemption_disabled(const char *what1,
+ 							const char *what2)
+ {
+ 	int this_cpu = raw_smp_processor_id();
+ 
++	if (hard_irqs_disabled())
++		goto out;
++
++	if (!ipipe_root_p)
++		goto out;
++
+ 	if (likely(preempt_count()))
+ 		goto out;
+ 
+diff -uprN kernel/mm/memory.c kernel_new/mm/memory.c
+--- kernel/mm/memory.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/mm/memory.c	2021-04-02 10:22:40.734935492 +0800
+@@ -55,6 +55,7 @@
+ #include <linux/export.h>
+ #include <linux/delayacct.h>
+ #include <linux/init.h>
++#include <linux/ipipe.h>
+ #include <linux/pfn_t.h>
+ #include <linux/writeback.h>
+ #include <linux/memcontrol.h>
+@@ -141,6 +142,9 @@ EXPORT_SYMBOL(zero_pfn);
+ 
+ unsigned long highest_memmap_pfn __read_mostly;
+ 
++static inline bool cow_user_page(struct page *dst, struct page *src,
++				 struct vm_fault *vmf);
++
+ /*
+  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
+  */
+@@ -710,8 +714,8 @@ out:
+ 
+ static inline unsigned long
+ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+-		unsigned long addr, int *rss)
++	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
++	     unsigned long addr, int *rss, struct page *uncow_page)
+ {
+ 	unsigned long vm_flags = vma->vm_flags;
+ 	pte_t pte = *src_pte;
+@@ -789,6 +793,24 @@ copy_one_pte(struct mm_struct *dst_mm, s
+ 	 * in the parent and the child
+ 	 */
+ 	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
++#ifdef CONFIG_IPIPE
++		if (uncow_page) {
++			struct page *old_page = vm_normal_page(vma, addr, pte);
++			struct vm_fault *vmf;
++			vmf->vma = vma;
++			vmf->address = addr;
++			cow_user_page(uncow_page, old_page, vmf);
++			pte = mk_pte(uncow_page, vma->vm_page_prot);
++
++			if (vm_flags & VM_SHARED)
++				pte = pte_mkclean(pte);
++			pte = pte_mkold(pte);
++
++			page_add_new_anon_rmap(uncow_page, vma, addr, false);
++			rss[!!PageAnon(uncow_page)]++;
++			goto out_set_pte;
++		}
++#endif /* CONFIG_IPIPE */
+ 		ptep_set_wrprotect(src_mm, addr, src_pte);
+ 		pte = pte_wrprotect(pte);
+ 	}
+@@ -836,13 +858,27 @@ static int copy_pte_range(struct mm_stru
+ 	int progress = 0;
+ 	int rss[NR_MM_COUNTERS];
+ 	swp_entry_t entry = (swp_entry_t){0};
+-
++	struct page *uncow_page = NULL;
++#ifdef CONFIG_IPIPE
++	int do_cow_break = 0;
++again:
++	if (do_cow_break) {
++		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
++		if (uncow_page == NULL)
++			return -ENOMEM;
++		do_cow_break = 0;
++	}
++#else
+ again:
++#endif
+ 	init_rss_vec(rss);
+ 
+ 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
+-	if (!dst_pte)
++	if (!dst_pte) {
++		if (uncow_page)
++			put_page(uncow_page);
+ 		return -ENOMEM;
++	}
+ 	src_pte = pte_offset_map(src_pmd, addr);
+ 	src_ptl = pte_lockptr(src_mm, src_pmd);
+ 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+@@ -865,8 +901,25 @@ again:
+ 			progress++;
+ 			continue;
+ 		}
++#ifdef CONFIG_IPIPE
++		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
++			if (is_cow_mapping(vma->vm_flags) &&
++			    test_bit(MMF_VM_PINNED, &src_mm->flags) &&
++			    ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) {
++				arch_leave_lazy_mmu_mode();
++				spin_unlock(src_ptl);
++				pte_unmap(src_pte);
++				add_mm_rss_vec(dst_mm, rss);
++				pte_unmap_unlock(dst_pte, dst_ptl);
++				cond_resched();
++				do_cow_break = 1;
++				goto again;
++			}
++		}
++#endif
+ 		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+-							vma, addr, rss);
++					 vma, addr, rss, uncow_page);
++		uncow_page = NULL;
+ 		if (entry.val)
+ 			break;
+ 		progress += 8;
+@@ -4678,6 +4731,41 @@ long copy_huge_page_from_user(struct pag
+ }
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+ 
++#ifdef CONFIG_IPIPE
++
++int __ipipe_disable_ondemand_mappings(struct task_struct *tsk)
++{
++	struct vm_area_struct *vma;
++	struct mm_struct *mm;
++	int result = 0;
++
++	mm = get_task_mm(tsk);
++	if (!mm)
++		return -EPERM;
++
++	down_write(&mm->mmap_sem);
++	if (test_bit(MMF_VM_PINNED, &mm->flags))
++		goto done_mm;
++
++	for (vma = mm->mmap; vma; vma = vma->vm_next) {
++		if (is_cow_mapping(vma->vm_flags) &&
++		    (vma->vm_flags & VM_WRITE)) {
++			result = __ipipe_pin_vma(mm, vma);
++			if (result < 0)
++				goto done_mm;
++		}
++	}
++	set_bit(MMF_VM_PINNED, &mm->flags);
++
++  done_mm:
++	up_write(&mm->mmap_sem);
++	mmput(mm);
++	return result;
++}
++EXPORT_SYMBOL_GPL(__ipipe_disable_ondemand_mappings);
++
++#endif /* CONFIG_IPIPE */
++
+ #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
+ 
+ static struct kmem_cache *page_ptl_cachep;
+diff -uprN kernel/mm/memory.c.orig kernel_new/mm/memory.c.orig
+--- kernel/mm/memory.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/mm/memory.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,4706 @@
++/*
++ *  linux/mm/memory.c
++ *
++ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
++ */
++
++/*
++ * demand-loading started 01.12.91 - seems it is high on the list of
++ * things wanted, and it should be easy to implement. - Linus
++ */
++
++/*
++ * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
++ * pages started 02.12.91, seems to work. - Linus.
++ *
++ * Tested sharing by executing about 30 /bin/sh: under the old kernel it
++ * would have taken more than the 6M I have free, but it worked well as
++ * far as I could see.
++ *
++ * Also corrected some "invalidate()"s - I wasn't doing enough of them.
++ */
++
++/*
++ * Real VM (paging to/from disk) started 18.12.91. Much more work and
++ * thought has to go into this. Oh, well..
++ * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
++ *		Found it. Everything seems to work now.
++ * 20.12.91  -  Ok, making the swap-device changeable like the root.
++ */
++
++/*
++ * 05.04.94  -  Multi-page memory management added for v1.1.
++ *              Idea by Alex Bligh (alex@cconcepts.co.uk)
++ *
++ * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
++ *		(Gerhard.Wichert@pdb.siemens.de)
++ *
++ * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
++ */
++
++#include <linux/kernel_stat.h>
++#include <linux/mm.h>
++#include <linux/sched/mm.h>
++#include <linux/sched/coredump.h>
++#include <linux/sched/numa_balancing.h>
++#include <linux/sched/task.h>
++#include <linux/hugetlb.h>
++#include <linux/mman.h>
++#include <linux/swap.h>
++#include <linux/highmem.h>
++#include <linux/pagemap.h>
++#include <linux/memremap.h>
++#include <linux/ksm.h>
++#include <linux/rmap.h>
++#include <linux/export.h>
++#include <linux/delayacct.h>
++#include <linux/init.h>
++#include <linux/pfn_t.h>
++#include <linux/writeback.h>
++#include <linux/memcontrol.h>
++#include <linux/mmu_notifier.h>
++#include <linux/swapops.h>
++#include <linux/elf.h>
++#include <linux/gfp.h>
++#include <linux/migrate.h>
++#include <linux/string.h>
++#include <linux/dma-debug.h>
++#include <linux/debugfs.h>
++#include <linux/userfaultfd_k.h>
++#include <linux/dax.h>
++#include <linux/oom.h>
++#include <linux/ktask.h>
++
++#include <asm/io.h>
++#include <asm/mmu_context.h>
++#include <asm/pgalloc.h>
++#include <linux/uaccess.h>
++#include <asm/tlb.h>
++#include <asm/tlbflush.h>
++#include <asm/pgtable.h>
++
++#include "internal.h"
++
++#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
++#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
++#endif
++
++#ifndef CONFIG_NEED_MULTIPLE_NODES
++/* use the per-pgdat data instead for discontigmem - mbligh */
++unsigned long max_mapnr;
++EXPORT_SYMBOL(max_mapnr);
++
++struct page *mem_map;
++EXPORT_SYMBOL(mem_map);
++#endif
++
++/*
++ * A number of key systems in x86 including ioremap() rely on the assumption
++ * that high_memory defines the upper bound on direct map memory, then end
++ * of ZONE_NORMAL.  Under CONFIG_DISCONTIG this means that max_low_pfn and
++ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
++ * and ZONE_HIGHMEM.
++ */
++void *high_memory;
++EXPORT_SYMBOL(high_memory);
++
++/*
++ * Randomize the address space (stacks, mmaps, brk, etc.).
++ *
++ * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
++ *   as ancient (libc5 based) binaries can segfault. )
++ */
++int randomize_va_space __read_mostly =
++#ifdef CONFIG_COMPAT_BRK
++					1;
++#else
++					2;
++#endif
++
++#ifndef arch_faults_on_old_pte
++static inline bool arch_faults_on_old_pte(void)
++{
++	/*
++	 * Those arches which don't have hw access flag feature need to
++	 * implement their own helper. By default, "true" means pagefault
++	 * will be hit on old pte.
++	 */
++	return true;
++}
++#endif
++
++static int __init disable_randmaps(char *s)
++{
++	randomize_va_space = 0;
++	return 1;
++}
++__setup("norandmaps", disable_randmaps);
++
++unsigned long zero_pfn __read_mostly;
++EXPORT_SYMBOL(zero_pfn);
++
++unsigned long highest_memmap_pfn __read_mostly;
++
++/*
++ * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
++ */
++static int __init init_zero_pfn(void)
++{
++	zero_pfn = page_to_pfn(ZERO_PAGE(0));
++	return 0;
++}
++core_initcall(init_zero_pfn);
++
++
++#if defined(SPLIT_RSS_COUNTING)
++
++void sync_mm_rss(struct mm_struct *mm)
++{
++	int i;
++
++	for (i = 0; i < NR_MM_COUNTERS; i++) {
++		if (current->rss_stat.count[i]) {
++			add_mm_counter(mm, i, current->rss_stat.count[i]);
++			current->rss_stat.count[i] = 0;
++		}
++	}
++	current->rss_stat.events = 0;
++}
++
++static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
++{
++	struct task_struct *task = current;
++
++	if (likely(task->mm == mm))
++		task->rss_stat.count[member] += val;
++	else
++		add_mm_counter(mm, member, val);
++}
++#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
++#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
++
++/* sync counter once per 64 page faults */
++#define TASK_RSS_EVENTS_THRESH	(64)
++static void check_sync_rss_stat(struct task_struct *task)
++{
++	if (unlikely(task != current))
++		return;
++	if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
++		sync_mm_rss(task->mm);
++}
++#else /* SPLIT_RSS_COUNTING */
++
++#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
++#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
++
++static void check_sync_rss_stat(struct task_struct *task)
++{
++}
++
++#endif /* SPLIT_RSS_COUNTING */
++
++/*
++ * Note: this doesn't free the actual pages themselves. That
++ * has been handled earlier when unmapping all the memory regions.
++ */
++static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
++			   unsigned long addr)
++{
++	pgtable_t token = pmd_pgtable(*pmd);
++	pmd_clear(pmd);
++	pte_free_tlb(tlb, token, addr);
++	mm_dec_nr_ptes(tlb->mm);
++}
++
++static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
++				unsigned long addr, unsigned long end,
++				unsigned long floor, unsigned long ceiling)
++{
++	pmd_t *pmd;
++	unsigned long next;
++	unsigned long start;
++
++	start = addr;
++	pmd = pmd_offset(pud, addr);
++	do {
++		next = pmd_addr_end(addr, end);
++		if (pmd_none_or_clear_bad(pmd))
++			continue;
++		free_pte_range(tlb, pmd, addr);
++	} while (pmd++, addr = next, addr != end);
++
++	start &= PUD_MASK;
++	if (start < floor)
++		return;
++	if (ceiling) {
++		ceiling &= PUD_MASK;
++		if (!ceiling)
++			return;
++	}
++	if (end - 1 > ceiling - 1)
++		return;
++
++	pmd = pmd_offset(pud, start);
++	pud_clear(pud);
++	pmd_free_tlb(tlb, pmd, start);
++	mm_dec_nr_pmds(tlb->mm);
++}
++
++static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
++				unsigned long addr, unsigned long end,
++				unsigned long floor, unsigned long ceiling)
++{
++	pud_t *pud;
++	unsigned long next;
++	unsigned long start;
++
++	start = addr;
++	pud = pud_offset(p4d, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		free_pmd_range(tlb, pud, addr, next, floor, ceiling);
++	} while (pud++, addr = next, addr != end);
++
++	start &= P4D_MASK;
++	if (start < floor)
++		return;
++	if (ceiling) {
++		ceiling &= P4D_MASK;
++		if (!ceiling)
++			return;
++	}
++	if (end - 1 > ceiling - 1)
++		return;
++
++	pud = pud_offset(p4d, start);
++	p4d_clear(p4d);
++	pud_free_tlb(tlb, pud, start);
++	mm_dec_nr_puds(tlb->mm);
++}
++
++static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
++				unsigned long addr, unsigned long end,
++				unsigned long floor, unsigned long ceiling)
++{
++	p4d_t *p4d;
++	unsigned long next;
++	unsigned long start;
++
++	start = addr;
++	p4d = p4d_offset(pgd, addr);
++	do {
++		next = p4d_addr_end(addr, end);
++		if (p4d_none_or_clear_bad(p4d))
++			continue;
++		free_pud_range(tlb, p4d, addr, next, floor, ceiling);
++	} while (p4d++, addr = next, addr != end);
++
++	start &= PGDIR_MASK;
++	if (start < floor)
++		return;
++	if (ceiling) {
++		ceiling &= PGDIR_MASK;
++		if (!ceiling)
++			return;
++	}
++	if (end - 1 > ceiling - 1)
++		return;
++
++	p4d = p4d_offset(pgd, start);
++	pgd_clear(pgd);
++	p4d_free_tlb(tlb, p4d, start);
++}
++
++/*
++ * This function frees user-level page tables of a process.
++ */
++void free_pgd_range(struct mmu_gather *tlb,
++			unsigned long addr, unsigned long end,
++			unsigned long floor, unsigned long ceiling)
++{
++	pgd_t *pgd;
++	unsigned long next;
++
++	/*
++	 * The next few lines have given us lots of grief...
++	 *
++	 * Why are we testing PMD* at this top level?  Because often
++	 * there will be no work to do at all, and we'd prefer not to
++	 * go all the way down to the bottom just to discover that.
++	 *
++	 * Why all these "- 1"s?  Because 0 represents both the bottom
++	 * of the address space and the top of it (using -1 for the
++	 * top wouldn't help much: the masks would do the wrong thing).
++	 * The rule is that addr 0 and floor 0 refer to the bottom of
++	 * the address space, but end 0 and ceiling 0 refer to the top
++	 * Comparisons need to use "end - 1" and "ceiling - 1" (though
++	 * that end 0 case should be mythical).
++	 *
++	 * Wherever addr is brought up or ceiling brought down, we must
++	 * be careful to reject "the opposite 0" before it confuses the
++	 * subsequent tests.  But what about where end is brought down
++	 * by PMD_SIZE below? no, end can't go down to 0 there.
++	 *
++	 * Whereas we round start (addr) and ceiling down, by different
++	 * masks at different levels, in order to test whether a table
++	 * now has no other vmas using it, so can be freed, we don't
++	 * bother to round floor or end up - the tests don't need that.
++	 */
++
++	addr &= PMD_MASK;
++	if (addr < floor) {
++		addr += PMD_SIZE;
++		if (!addr)
++			return;
++	}
++	if (ceiling) {
++		ceiling &= PMD_MASK;
++		if (!ceiling)
++			return;
++	}
++	if (end - 1 > ceiling - 1)
++		end -= PMD_SIZE;
++	if (addr > end - 1)
++		return;
++	/*
++	 * We add page table cache pages with PAGE_SIZE,
++	 * (see pte_free_tlb()), flush the tlb if we need
++	 */
++	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
++	pgd = pgd_offset(tlb->mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
++	} while (pgd++, addr = next, addr != end);
++}
++
++void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
++		unsigned long floor, unsigned long ceiling)
++{
++	while (vma) {
++		struct vm_area_struct *next = vma->vm_next;
++		unsigned long addr = vma->vm_start;
++
++		/*
++		 * Hide vma from rmap and truncate_pagecache before freeing
++		 * pgtables
++		 */
++		unlink_anon_vmas(vma);
++		unlink_file_vma(vma);
++
++		if (is_vm_hugetlb_page(vma)) {
++			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
++				floor, next ? next->vm_start : ceiling);
++		} else {
++			/*
++			 * Optimization: gather nearby vmas into one call down
++			 */
++			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
++			       && !is_vm_hugetlb_page(next)) {
++				vma = next;
++				next = vma->vm_next;
++				unlink_anon_vmas(vma);
++				unlink_file_vma(vma);
++			}
++			free_pgd_range(tlb, addr, vma->vm_end,
++				floor, next ? next->vm_start : ceiling);
++		}
++		vma = next;
++	}
++}
++
++int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
++{
++	spinlock_t *ptl;
++	pgtable_t new = pte_alloc_one(mm, address);
++	if (!new)
++		return -ENOMEM;
++
++	/*
++	 * Ensure all pte setup (eg. pte page lock and page clearing) are
++	 * visible before the pte is made visible to other CPUs by being
++	 * put into page tables.
++	 *
++	 * The other side of the story is the pointer chasing in the page
++	 * table walking code (when walking the page table without locking;
++	 * ie. most of the time). Fortunately, these data accesses consist
++	 * of a chain of data-dependent loads, meaning most CPUs (alpha
++	 * being the notable exception) will already guarantee loads are
++	 * seen in-order. See the alpha page table accessors for the
++	 * smp_read_barrier_depends() barriers in page table walking code.
++	 */
++	smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
++
++	ptl = pmd_lock(mm, pmd);
++	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
++		mm_inc_nr_ptes(mm);
++		pmd_populate(mm, pmd, new);
++		new = NULL;
++	}
++	spin_unlock(ptl);
++	if (new)
++		pte_free(mm, new);
++	return 0;
++}
++
++int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
++{
++	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
++	if (!new)
++		return -ENOMEM;
++
++	smp_wmb(); /* See comment in __pte_alloc */
++
++	spin_lock(&init_mm.page_table_lock);
++	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
++		pmd_populate_kernel(&init_mm, pmd, new);
++		new = NULL;
++	}
++	spin_unlock(&init_mm.page_table_lock);
++	if (new)
++		pte_free_kernel(&init_mm, new);
++	return 0;
++}
++
++static inline void init_rss_vec(int *rss)
++{
++	memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
++}
++
++static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
++{
++	int i;
++
++	if (current->mm == mm)
++		sync_mm_rss(mm);
++	for (i = 0; i < NR_MM_COUNTERS; i++)
++		if (rss[i])
++			add_mm_counter(mm, i, rss[i]);
++}
++
++/*
++ * This function is called to print an error when a bad pte
++ * is found. For example, we might have a PFN-mapped pte in
++ * a region that doesn't allow it.
++ *
++ * The calling function must still handle the error.
++ */
++static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
++			  pte_t pte, struct page *page)
++{
++	pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
++	p4d_t *p4d = p4d_offset(pgd, addr);
++	pud_t *pud = pud_offset(p4d, addr);
++	pmd_t *pmd = pmd_offset(pud, addr);
++	struct address_space *mapping;
++	pgoff_t index;
++	static unsigned long resume;
++	static unsigned long nr_shown;
++	static unsigned long nr_unshown;
++
++	/*
++	 * Allow a burst of 60 reports, then keep quiet for that minute;
++	 * or allow a steady drip of one report per second.
++	 */
++	if (nr_shown == 60) {
++		if (time_before(jiffies, resume)) {
++			nr_unshown++;
++			return;
++		}
++		if (nr_unshown) {
++			pr_alert("BUG: Bad page map: %lu messages suppressed\n",
++				 nr_unshown);
++			nr_unshown = 0;
++		}
++		nr_shown = 0;
++	}
++	if (nr_shown++ == 0)
++		resume = jiffies + 60 * HZ;
++
++	mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
++	index = linear_page_index(vma, addr);
++
++	pr_alert("BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
++		 current->comm,
++		 (long long)pte_val(pte), (long long)pmd_val(*pmd));
++	if (page)
++		dump_page(page, "bad pte");
++	pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
++		 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
++	pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
++		 vma->vm_file,
++		 vma->vm_ops ? vma->vm_ops->fault : NULL,
++		 vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
++		 mapping ? mapping->a_ops->readpage : NULL);
++	dump_stack();
++	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
++}
++
++/*
++ * vm_normal_page -- This function gets the "struct page" associated with a pte.
++ *
++ * "Special" mappings do not wish to be associated with a "struct page" (either
++ * it doesn't exist, or it exists but they don't want to touch it). In this
++ * case, NULL is returned here. "Normal" mappings do have a struct page.
++ *
++ * There are 2 broad cases. Firstly, an architecture may define a pte_special()
++ * pte bit, in which case this function is trivial. Secondly, an architecture
++ * may not have a spare pte bit, which requires a more complicated scheme,
++ * described below.
++ *
++ * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
++ * special mapping (even if there are underlying and valid "struct pages").
++ * COWed pages of a VM_PFNMAP are always normal.
++ *
++ * The way we recognize COWed pages within VM_PFNMAP mappings is through the
++ * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit
++ * set, and the vm_pgoff will point to the first PFN mapped: thus every special
++ * mapping will always honor the rule
++ *
++ *	pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
++ *
++ * And for normal mappings this is false.
++ *
++ * This restricts such mappings to be a linear translation from virtual address
++ * to pfn. To get around this restriction, we allow arbitrary mappings so long
++ * as the vma is not a COW mapping; in that case, we know that all ptes are
++ * special (because none can have been COWed).
++ *
++ *
++ * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP.
++ *
++ * VM_MIXEDMAP mappings can likewise contain memory with or without "struct
++ * page" backing, however the difference is that _all_ pages with a struct
++ * page (that is, those where pfn_valid is true) are refcounted and considered
++ * normal pages by the VM. The disadvantage is that pages are refcounted
++ * (which can be slower and simply not an option for some PFNMAP users). The
++ * advantage is that we don't have to follow the strict linearity rule of
++ * PFNMAP mappings in order to support COWable mappings.
++ *
++ */
++struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
++			     pte_t pte, bool with_public_device)
++{
++	unsigned long pfn = pte_pfn(pte);
++
++	if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
++		if (likely(!pte_special(pte)))
++			goto check_pfn;
++		if (vma->vm_ops && vma->vm_ops->find_special_page)
++			return vma->vm_ops->find_special_page(vma, addr);
++		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
++			return NULL;
++		if (is_zero_pfn(pfn))
++			return NULL;
++
++		/*
++		 * Device public pages are special pages (they are ZONE_DEVICE
++		 * pages but different from persistent memory). They behave
++		 * allmost like normal pages. The difference is that they are
++		 * not on the lru and thus should never be involve with any-
++		 * thing that involve lru manipulation (mlock, numa balancing,
++		 * ...).
++		 *
++		 * This is why we still want to return NULL for such page from
++		 * vm_normal_page() so that we do not have to special case all
++		 * call site of vm_normal_page().
++		 */
++		if (likely(pfn <= highest_memmap_pfn)) {
++			struct page *page = pfn_to_page(pfn);
++
++			if (is_device_public_page(page)) {
++				if (with_public_device)
++					return page;
++				return NULL;
++			}
++		}
++
++		if (pte_devmap(pte))
++			return NULL;
++
++		print_bad_pte(vma, addr, pte, NULL);
++		return NULL;
++	}
++
++	/* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
++
++	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
++		if (vma->vm_flags & VM_MIXEDMAP) {
++			if (!pfn_valid(pfn))
++				return NULL;
++			goto out;
++		} else {
++			unsigned long off;
++			off = (addr - vma->vm_start) >> PAGE_SHIFT;
++			if (pfn == vma->vm_pgoff + off)
++				return NULL;
++			if (!is_cow_mapping(vma->vm_flags))
++				return NULL;
++		}
++	}
++
++	if (is_zero_pfn(pfn))
++		return NULL;
++
++check_pfn:
++	if (unlikely(pfn > highest_memmap_pfn)) {
++		print_bad_pte(vma, addr, pte, NULL);
++		return NULL;
++	}
++
++	/*
++	 * NOTE! We still have PageReserved() pages in the page tables.
++	 * eg. VDSO mappings can cause them to exist.
++	 */
++out:
++	return pfn_to_page(pfn);
++}
++
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
++				pmd_t pmd)
++{
++	unsigned long pfn = pmd_pfn(pmd);
++
++	/*
++	 * There is no pmd_special() but there may be special pmds, e.g.
++	 * in a direct-access (dax) mapping, so let's just replicate the
++	 * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
++	 */
++	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
++		if (vma->vm_flags & VM_MIXEDMAP) {
++			if (!pfn_valid(pfn))
++				return NULL;
++			goto out;
++		} else {
++			unsigned long off;
++			off = (addr - vma->vm_start) >> PAGE_SHIFT;
++			if (pfn == vma->vm_pgoff + off)
++				return NULL;
++			if (!is_cow_mapping(vma->vm_flags))
++				return NULL;
++		}
++	}
++
++	if (pmd_devmap(pmd))
++		return NULL;
++	if (is_zero_pfn(pfn))
++		return NULL;
++	if (unlikely(pfn > highest_memmap_pfn))
++		return NULL;
++
++	/*
++	 * NOTE! We still have PageReserved() pages in the page tables.
++	 * eg. VDSO mappings can cause them to exist.
++	 */
++out:
++	return pfn_to_page(pfn);
++}
++#endif
++
++/*
++ * copy one vm_area from one task to the other. Assumes the page tables
++ * already present in the new task to be cleared in the whole range
++ * covered by this vma.
++ */
++
++static inline unsigned long
++copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
++		unsigned long addr, int *rss)
++{
++	unsigned long vm_flags = vma->vm_flags;
++	pte_t pte = *src_pte;
++	struct page *page;
++
++	/* pte contains position in swap or file, so copy. */
++	if (unlikely(!pte_present(pte))) {
++		swp_entry_t entry = pte_to_swp_entry(pte);
++
++		if (likely(!non_swap_entry(entry))) {
++			if (swap_duplicate(entry) < 0)
++				return entry.val;
++
++			/* make sure dst_mm is on swapoff's mmlist. */
++			if (unlikely(list_empty(&dst_mm->mmlist))) {
++				spin_lock(&mmlist_lock);
++				if (list_empty(&dst_mm->mmlist))
++					list_add(&dst_mm->mmlist,
++							&src_mm->mmlist);
++				spin_unlock(&mmlist_lock);
++			}
++			rss[MM_SWAPENTS]++;
++		} else if (is_migration_entry(entry)) {
++			page = migration_entry_to_page(entry);
++
++			rss[mm_counter(page)]++;
++
++			if (is_write_migration_entry(entry) &&
++					is_cow_mapping(vm_flags)) {
++				/*
++				 * COW mappings require pages in both
++				 * parent and child to be set to read.
++				 */
++				make_migration_entry_read(&entry);
++				pte = swp_entry_to_pte(entry);
++				if (pte_swp_soft_dirty(*src_pte))
++					pte = pte_swp_mksoft_dirty(pte);
++				set_pte_at(src_mm, addr, src_pte, pte);
++			}
++		} else if (is_device_private_entry(entry)) {
++			page = device_private_entry_to_page(entry);
++
++			/*
++			 * Update rss count even for unaddressable pages, as
++			 * they should treated just like normal pages in this
++			 * respect.
++			 *
++			 * We will likely want to have some new rss counters
++			 * for unaddressable pages, at some point. But for now
++			 * keep things as they are.
++			 */
++			get_page(page);
++			rss[mm_counter(page)]++;
++			page_dup_rmap(page, false);
++
++			/*
++			 * We do not preserve soft-dirty information, because so
++			 * far, checkpoint/restore is the only feature that
++			 * requires that. And checkpoint/restore does not work
++			 * when a device driver is involved (you cannot easily
++			 * save and restore device driver state).
++			 */
++			if (is_write_device_private_entry(entry) &&
++			    is_cow_mapping(vm_flags)) {
++				make_device_private_entry_read(&entry);
++				pte = swp_entry_to_pte(entry);
++				set_pte_at(src_mm, addr, src_pte, pte);
++			}
++		}
++		goto out_set_pte;
++	}
++
++	/*
++	 * If it's a COW mapping, write protect it both
++	 * in the parent and the child
++	 */
++	if (is_cow_mapping(vm_flags) && pte_write(pte)) {
++		ptep_set_wrprotect(src_mm, addr, src_pte);
++		pte = pte_wrprotect(pte);
++	}
++
++	/*
++	 * If it's a shared mapping, mark it clean in
++	 * the child
++	 */
++	if (vm_flags & VM_SHARED)
++		pte = pte_mkclean(pte);
++	pte = pte_mkold(pte);
++
++	page = vm_normal_page(vma, addr, pte);
++	if (page) {
++		get_page(page);
++		page_dup_rmap(page, false);
++		rss[mm_counter(page)]++;
++	} else if (pte_devmap(pte)) {
++		page = pte_page(pte);
++
++		/*
++		 * Cache coherent device memory behave like regular page and
++		 * not like persistent memory page. For more informations see
++		 * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h
++		 */
++		if (is_device_public_page(page)) {
++			get_page(page);
++			page_dup_rmap(page, false);
++			rss[mm_counter(page)]++;
++		}
++	}
++
++out_set_pte:
++	set_pte_at(dst_mm, addr, dst_pte, pte);
++	return 0;
++}
++
++static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
++		   unsigned long addr, unsigned long end)
++{
++	pte_t *orig_src_pte, *orig_dst_pte;
++	pte_t *src_pte, *dst_pte;
++	spinlock_t *src_ptl, *dst_ptl;
++	int progress = 0;
++	int rss[NR_MM_COUNTERS];
++	swp_entry_t entry = (swp_entry_t){0};
++
++again:
++	init_rss_vec(rss);
++
++	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
++	if (!dst_pte)
++		return -ENOMEM;
++	src_pte = pte_offset_map(src_pmd, addr);
++	src_ptl = pte_lockptr(src_mm, src_pmd);
++	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
++	orig_src_pte = src_pte;
++	orig_dst_pte = dst_pte;
++	arch_enter_lazy_mmu_mode();
++
++	do {
++		/*
++		 * We are holding two locks at this point - either of them
++		 * could generate latencies in another task on another CPU.
++		 */
++		if (progress >= 32) {
++			progress = 0;
++			if (need_resched() ||
++			    spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
++				break;
++		}
++		if (pte_none(*src_pte)) {
++			progress++;
++			continue;
++		}
++		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
++							vma, addr, rss);
++		if (entry.val)
++			break;
++		progress += 8;
++	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
++
++	arch_leave_lazy_mmu_mode();
++	spin_unlock(src_ptl);
++	pte_unmap(orig_src_pte);
++	add_mm_rss_vec(dst_mm, rss);
++	pte_unmap_unlock(orig_dst_pte, dst_ptl);
++	cond_resched();
++
++	if (entry.val) {
++		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
++			return -ENOMEM;
++		progress = 0;
++	}
++	if (addr != end)
++		goto again;
++	return 0;
++}
++
++static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
++		unsigned long addr, unsigned long end)
++{
++	pmd_t *src_pmd, *dst_pmd;
++	unsigned long next;
++
++	dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
++	if (!dst_pmd)
++		return -ENOMEM;
++	src_pmd = pmd_offset(src_pud, addr);
++	do {
++		next = pmd_addr_end(addr, end);
++		if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)
++			|| pmd_devmap(*src_pmd)) {
++			int err;
++			VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma);
++			err = copy_huge_pmd(dst_mm, src_mm,
++					    dst_pmd, src_pmd, addr, vma);
++			if (err == -ENOMEM)
++				return -ENOMEM;
++			if (!err)
++				continue;
++			/* fall through */
++		}
++		if (pmd_none_or_clear_bad(src_pmd))
++			continue;
++		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
++						vma, addr, next))
++			return -ENOMEM;
++	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
++	return 0;
++}
++
++static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
++		unsigned long addr, unsigned long end)
++{
++	pud_t *src_pud, *dst_pud;
++	unsigned long next;
++
++	dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
++	if (!dst_pud)
++		return -ENOMEM;
++	src_pud = pud_offset(src_p4d, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
++			int err;
++
++			VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma);
++			err = copy_huge_pud(dst_mm, src_mm,
++					    dst_pud, src_pud, addr, vma);
++			if (err == -ENOMEM)
++				return -ENOMEM;
++			if (!err)
++				continue;
++			/* fall through */
++		}
++		if (pud_none_or_clear_bad(src_pud))
++			continue;
++		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
++						vma, addr, next))
++			return -ENOMEM;
++	} while (dst_pud++, src_pud++, addr = next, addr != end);
++	return 0;
++}
++
++static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
++		unsigned long addr, unsigned long end)
++{
++	p4d_t *src_p4d, *dst_p4d;
++	unsigned long next;
++
++	dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
++	if (!dst_p4d)
++		return -ENOMEM;
++	src_p4d = p4d_offset(src_pgd, addr);
++	do {
++		next = p4d_addr_end(addr, end);
++		if (p4d_none_or_clear_bad(src_p4d))
++			continue;
++		if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
++						vma, addr, next))
++			return -ENOMEM;
++	} while (dst_p4d++, src_p4d++, addr = next, addr != end);
++	return 0;
++}
++
++int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
++		struct vm_area_struct *vma)
++{
++	pgd_t *src_pgd, *dst_pgd;
++	unsigned long next;
++	unsigned long addr = vma->vm_start;
++	unsigned long end = vma->vm_end;
++	unsigned long mmun_start;	/* For mmu_notifiers */
++	unsigned long mmun_end;		/* For mmu_notifiers */
++	bool is_cow;
++	int ret;
++
++	/*
++	 * Don't copy ptes where a page fault will fill them correctly.
++	 * Fork becomes much lighter when there are big shared or private
++	 * readonly mappings. The tradeoff is that copy_page_range is more
++	 * efficient than faulting.
++	 */
++	if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
++			!vma->anon_vma)
++		return 0;
++
++	if (is_vm_hugetlb_page(vma))
++		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
++
++	if (unlikely(vma->vm_flags & VM_PFNMAP)) {
++		/*
++		 * We do not free on error cases below as remove_vma
++		 * gets called on error from higher level routine
++		 */
++		ret = track_pfn_copy(vma);
++		if (ret)
++			return ret;
++	}
++
++	/*
++	 * We need to invalidate the secondary MMU mappings only when
++	 * there could be a permission downgrade on the ptes of the
++	 * parent mm. And a permission downgrade will only happen if
++	 * is_cow_mapping() returns true.
++	 */
++	is_cow = is_cow_mapping(vma->vm_flags);
++	mmun_start = addr;
++	mmun_end   = end;
++	if (is_cow)
++		mmu_notifier_invalidate_range_start(src_mm, mmun_start,
++						    mmun_end);
++
++	ret = 0;
++	dst_pgd = pgd_offset(dst_mm, addr);
++	src_pgd = pgd_offset(src_mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(src_pgd))
++			continue;
++		if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
++					    vma, addr, next))) {
++			ret = -ENOMEM;
++			break;
++		}
++	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
++
++	if (is_cow)
++		mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end);
++	return ret;
++}
++
++static unsigned long zap_pte_range(struct mmu_gather *tlb,
++				struct vm_area_struct *vma, pmd_t *pmd,
++				unsigned long addr, unsigned long end,
++				struct zap_details *details)
++{
++	struct mm_struct *mm = tlb->mm;
++	int force_flush = 0;
++	int rss[NR_MM_COUNTERS];
++	spinlock_t *ptl;
++	pte_t *start_pte;
++	pte_t *pte;
++	swp_entry_t entry;
++
++	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
++again:
++	init_rss_vec(rss);
++	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
++	pte = start_pte;
++	flush_tlb_batched_pending(mm);
++	arch_enter_lazy_mmu_mode();
++	do {
++		pte_t ptent = *pte;
++		if (pte_none(ptent))
++			continue;
++
++		if (pte_present(ptent)) {
++			struct page *page;
++
++			page = _vm_normal_page(vma, addr, ptent, true);
++			if (unlikely(details) && page) {
++				/*
++				 * unmap_shared_mapping_pages() wants to
++				 * invalidate cache without truncating:
++				 * unmap shared but keep private pages.
++				 */
++				if (details->check_mapping &&
++				    details->check_mapping != page_rmapping(page))
++					continue;
++			}
++			ptent = ptep_get_and_clear_full(mm, addr, pte,
++							tlb->fullmm);
++			tlb_remove_tlb_entry(tlb, pte, addr);
++			if (unlikely(!page))
++				continue;
++
++			if (!PageAnon(page)) {
++				if (pte_dirty(ptent)) {
++					force_flush = 1;
++					set_page_dirty(page);
++				}
++				if (pte_young(ptent) &&
++				    likely(!(vma->vm_flags & VM_SEQ_READ)))
++					mark_page_accessed(page);
++			}
++			rss[mm_counter(page)]--;
++			page_remove_rmap(page, false);
++			if (unlikely(page_mapcount(page) < 0))
++				print_bad_pte(vma, addr, ptent, page);
++			if (unlikely(__tlb_remove_page(tlb, page))) {
++				force_flush = 1;
++				addr += PAGE_SIZE;
++				break;
++			}
++			continue;
++		}
++
++		entry = pte_to_swp_entry(ptent);
++		if (non_swap_entry(entry) && is_device_private_entry(entry)) {
++			struct page *page = device_private_entry_to_page(entry);
++
++			if (unlikely(details && details->check_mapping)) {
++				/*
++				 * unmap_shared_mapping_pages() wants to
++				 * invalidate cache without truncating:
++				 * unmap shared but keep private pages.
++				 */
++				if (details->check_mapping !=
++				    page_rmapping(page))
++					continue;
++			}
++
++			pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
++			rss[mm_counter(page)]--;
++			page_remove_rmap(page, false);
++			put_page(page);
++			continue;
++		}
++
++		/* If details->check_mapping, we leave swap entries. */
++		if (unlikely(details))
++			continue;
++
++		entry = pte_to_swp_entry(ptent);
++		if (!non_swap_entry(entry))
++			rss[MM_SWAPENTS]--;
++		else if (is_migration_entry(entry)) {
++			struct page *page;
++
++			page = migration_entry_to_page(entry);
++			rss[mm_counter(page)]--;
++		}
++		if (unlikely(!free_swap_and_cache(entry)))
++			print_bad_pte(vma, addr, ptent, NULL);
++		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
++	} while (pte++, addr += PAGE_SIZE, addr != end);
++
++	add_mm_rss_vec(mm, rss);
++	arch_leave_lazy_mmu_mode();
++
++	/* Do the actual TLB flush before dropping ptl */
++	if (force_flush)
++		tlb_flush_mmu_tlbonly(tlb);
++	pte_unmap_unlock(start_pte, ptl);
++
++	/*
++	 * If we forced a TLB flush (either due to running out of
++	 * batch buffers or because we needed to flush dirty TLB
++	 * entries before releasing the ptl), free the batched
++	 * memory too. Restart if we didn't do everything.
++	 */
++	if (force_flush) {
++		force_flush = 0;
++		tlb_flush_mmu_free(tlb);
++		if (addr != end)
++			goto again;
++	}
++
++	return addr;
++}
++
++static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
++				struct vm_area_struct *vma, pud_t *pud,
++				unsigned long addr, unsigned long end,
++				struct zap_details *details)
++{
++	pmd_t *pmd;
++	unsigned long next;
++
++	pmd = pmd_offset(pud, addr);
++	do {
++		next = pmd_addr_end(addr, end);
++		if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
++			if (next - addr != HPAGE_PMD_SIZE)
++				__split_huge_pmd(vma, pmd, addr, false, NULL);
++			else if (zap_huge_pmd(tlb, vma, pmd, addr))
++				goto next;
++			/* fall through */
++		}
++		/*
++		 * Here there can be other concurrent MADV_DONTNEED or
++		 * trans huge page faults running, and if the pmd is
++		 * none or trans huge it can change under us. This is
++		 * because MADV_DONTNEED holds the mmap_sem in read
++		 * mode.
++		 */
++		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
++			goto next;
++		next = zap_pte_range(tlb, vma, pmd, addr, next, details);
++next:
++		cond_resched();
++	} while (pmd++, addr = next, addr != end);
++
++	return addr;
++}
++
++static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
++				struct vm_area_struct *vma, p4d_t *p4d,
++				unsigned long addr, unsigned long end,
++				struct zap_details *details)
++{
++	pud_t *pud;
++	unsigned long next;
++
++	pud = pud_offset(p4d, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
++			if (next - addr != HPAGE_PUD_SIZE) {
++				VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
++				split_huge_pud(vma, pud, addr);
++			} else if (zap_huge_pud(tlb, vma, pud, addr))
++				goto next;
++			/* fall through */
++		}
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		next = zap_pmd_range(tlb, vma, pud, addr, next, details);
++next:
++		cond_resched();
++	} while (pud++, addr = next, addr != end);
++
++	return addr;
++}
++
++static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
++				struct vm_area_struct *vma, pgd_t *pgd,
++				unsigned long addr, unsigned long end,
++				struct zap_details *details)
++{
++	p4d_t *p4d;
++	unsigned long next;
++
++	p4d = p4d_offset(pgd, addr);
++	do {
++		next = p4d_addr_end(addr, end);
++		if (p4d_none_or_clear_bad(p4d))
++			continue;
++		next = zap_pud_range(tlb, vma, p4d, addr, next, details);
++	} while (p4d++, addr = next, addr != end);
++
++	return addr;
++}
++
++void unmap_page_range(struct mmu_gather *tlb,
++			     struct vm_area_struct *vma,
++			     unsigned long addr, unsigned long end,
++			     struct zap_details *details)
++{
++	pgd_t *pgd;
++	unsigned long next;
++
++	BUG_ON(addr >= end);
++	tlb_start_vma(tlb, vma);
++	pgd = pgd_offset(vma->vm_mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
++	} while (pgd++, addr = next, addr != end);
++	tlb_end_vma(tlb, vma);
++}
++
++
++static void unmap_single_vma(struct mmu_gather *tlb,
++		struct vm_area_struct *vma, unsigned long start_addr,
++		unsigned long end_addr,
++		struct zap_details *details)
++{
++	unsigned long start = max(vma->vm_start, start_addr);
++	unsigned long end;
++
++	if (start >= vma->vm_end)
++		return;
++	end = min(vma->vm_end, end_addr);
++	if (end <= vma->vm_start)
++		return;
++
++	if (vma->vm_file)
++		uprobe_munmap(vma, start, end);
++
++	if (unlikely(vma->vm_flags & VM_PFNMAP))
++		untrack_pfn(vma, 0, 0);
++
++	if (start != end) {
++		if (unlikely(is_vm_hugetlb_page(vma))) {
++			/*
++			 * It is undesirable to test vma->vm_file as it
++			 * should be non-null for valid hugetlb area.
++			 * However, vm_file will be NULL in the error
++			 * cleanup path of mmap_region. When
++			 * hugetlbfs ->mmap method fails,
++			 * mmap_region() nullifies vma->vm_file
++			 * before calling this function to clean up.
++			 * Since no pte has actually been setup, it is
++			 * safe to do nothing in this case.
++			 */
++			if (vma->vm_file) {
++				i_mmap_lock_write(vma->vm_file->f_mapping);
++				__unmap_hugepage_range_final(tlb, vma, start, end, NULL);
++				i_mmap_unlock_write(vma->vm_file->f_mapping);
++			}
++		} else
++			unmap_page_range(tlb, vma, start, end, details);
++	}
++}
++
++/**
++ * unmap_vmas - unmap a range of memory covered by a list of vma's
++ * @tlb: address of the caller's struct mmu_gather
++ * @vma: the starting vma
++ * @start_addr: virtual address at which to start unmapping
++ * @end_addr: virtual address at which to end unmapping
++ *
++ * Unmap all pages in the vma list.
++ *
++ * Only addresses between `start' and `end' will be unmapped.
++ *
++ * The VMA list must be sorted in ascending virtual address order.
++ *
++ * unmap_vmas() assumes that the caller will flush the whole unmapped address
++ * range after unmap_vmas() returns.  So the only responsibility here is to
++ * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
++ * drops the lock and schedules.
++ */
++void unmap_vmas(struct mmu_gather *tlb,
++		struct vm_area_struct *vma, unsigned long start_addr,
++		unsigned long end_addr)
++{
++	struct mm_struct *mm = vma->vm_mm;
++
++	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
++	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
++		unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
++	mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
++}
++
++/**
++ * zap_page_range - remove user pages in a given range
++ * @vma: vm_area_struct holding the applicable pages
++ * @start: starting address of pages to zap
++ * @size: number of bytes to zap
++ *
++ * Caller must protect the VMA list
++ */
++void zap_page_range(struct vm_area_struct *vma, unsigned long start,
++		unsigned long size)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	struct mmu_gather tlb;
++	unsigned long end = start + size;
++
++	lru_add_drain();
++	tlb_gather_mmu(&tlb, mm, start, end);
++	update_hiwater_rss(mm);
++	mmu_notifier_invalidate_range_start(mm, start, end);
++	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
++		unmap_single_vma(&tlb, vma, start, end, NULL);
++	mmu_notifier_invalidate_range_end(mm, start, end);
++	tlb_finish_mmu(&tlb, start, end);
++}
++
++/**
++ * zap_page_range_single - remove user pages in a given range
++ * @vma: vm_area_struct holding the applicable pages
++ * @address: starting address of pages to zap
++ * @size: number of bytes to zap
++ * @details: details of shared cache invalidation
++ *
++ * The range must fit into one VMA.
++ */
++static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
++		unsigned long size, struct zap_details *details)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	struct mmu_gather tlb;
++	unsigned long end = address + size;
++
++	lru_add_drain();
++	tlb_gather_mmu(&tlb, mm, address, end);
++	update_hiwater_rss(mm);
++	mmu_notifier_invalidate_range_start(mm, address, end);
++	unmap_single_vma(&tlb, vma, address, end, details);
++	mmu_notifier_invalidate_range_end(mm, address, end);
++	tlb_finish_mmu(&tlb, address, end);
++}
++
++/**
++ * zap_vma_ptes - remove ptes mapping the vma
++ * @vma: vm_area_struct holding ptes to be zapped
++ * @address: starting address of pages to zap
++ * @size: number of bytes to zap
++ *
++ * This function only unmaps ptes assigned to VM_PFNMAP vmas.
++ *
++ * The entire address range must be fully contained within the vma.
++ *
++ */
++void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
++		unsigned long size)
++{
++	if (address < vma->vm_start || address + size > vma->vm_end ||
++	    		!(vma->vm_flags & VM_PFNMAP))
++		return;
++
++	zap_page_range_single(vma, address, size, NULL);
++}
++EXPORT_SYMBOL_GPL(zap_vma_ptes);
++
++pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
++			spinlock_t **ptl)
++{
++	pgd_t *pgd;
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++
++	pgd = pgd_offset(mm, addr);
++	p4d = p4d_alloc(mm, pgd, addr);
++	if (!p4d)
++		return NULL;
++	pud = pud_alloc(mm, p4d, addr);
++	if (!pud)
++		return NULL;
++	pmd = pmd_alloc(mm, pud, addr);
++	if (!pmd)
++		return NULL;
++
++	VM_BUG_ON(pmd_trans_huge(*pmd));
++	return pte_alloc_map_lock(mm, pmd, addr, ptl);
++}
++
++/*
++ * This is the old fallback for page remapping.
++ *
++ * For historical reasons, it only allows reserved pages. Only
++ * old drivers should use this, and they needed to mark their
++ * pages reserved for the old functions anyway.
++ */
++static int insert_page(struct vm_area_struct *vma, unsigned long addr,
++			struct page *page, pgprot_t prot)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	int retval;
++	pte_t *pte;
++	spinlock_t *ptl;
++
++	retval = -EINVAL;
++	if (PageAnon(page))
++		goto out;
++	retval = -ENOMEM;
++	flush_dcache_page(page);
++	pte = get_locked_pte(mm, addr, &ptl);
++	if (!pte)
++		goto out;
++	retval = -EBUSY;
++	if (!pte_none(*pte))
++		goto out_unlock;
++
++	/* Ok, finally just insert the thing.. */
++	get_page(page);
++	inc_mm_counter_fast(mm, mm_counter_file(page));
++	page_add_file_rmap(page, false);
++	set_pte_at(mm, addr, pte, mk_pte(page, prot));
++
++	retval = 0;
++	pte_unmap_unlock(pte, ptl);
++	return retval;
++out_unlock:
++	pte_unmap_unlock(pte, ptl);
++out:
++	return retval;
++}
++
++/**
++ * vm_insert_page - insert single page into user vma
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @page: source kernel page
++ *
++ * This allows drivers to insert individual pages they've allocated
++ * into a user vma.
++ *
++ * The page has to be a nice clean _individual_ kernel allocation.
++ * If you allocate a compound page, you need to have marked it as
++ * such (__GFP_COMP), or manually just split the page up yourself
++ * (see split_page()).
++ *
++ * NOTE! Traditionally this was done with "remap_pfn_range()" which
++ * took an arbitrary page protection parameter. This doesn't allow
++ * that. Your vma protection will have to be set up correctly, which
++ * means that if you want a shared writable mapping, you'd better
++ * ask for a shared writable mapping!
++ *
++ * The page does not need to be reserved.
++ *
++ * Usually this function is called from f_op->mmap() handler
++ * under mm->mmap_sem write-lock, so it can change vma->vm_flags.
++ * Caller must set VM_MIXEDMAP on vma if it wants to call this
++ * function from other places, for example from page-fault handler.
++ */
++int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
++			struct page *page)
++{
++	if (addr < vma->vm_start || addr >= vma->vm_end)
++		return -EFAULT;
++	if (!page_count(page))
++		return -EINVAL;
++	if (!(vma->vm_flags & VM_MIXEDMAP)) {
++		BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
++		BUG_ON(vma->vm_flags & VM_PFNMAP);
++		vma->vm_flags |= VM_MIXEDMAP;
++	}
++	return insert_page(vma, addr, page, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_insert_page);
++
++static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
++			pfn_t pfn, pgprot_t prot, bool mkwrite)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	int retval;
++	pte_t *pte, entry;
++	spinlock_t *ptl;
++
++	retval = -ENOMEM;
++	pte = get_locked_pte(mm, addr, &ptl);
++	if (!pte)
++		goto out;
++	retval = -EBUSY;
++	if (!pte_none(*pte)) {
++		if (mkwrite) {
++			/*
++			 * For read faults on private mappings the PFN passed
++			 * in may not match the PFN we have mapped if the
++			 * mapped PFN is a writeable COW page.  In the mkwrite
++			 * case we are creating a writable PTE for a shared
++			 * mapping and we expect the PFNs to match. If they
++			 * don't match, we are likely racing with block
++			 * allocation and mapping invalidation so just skip the
++			 * update.
++			 */
++			if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) {
++				WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
++				goto out_unlock;
++			}
++			entry = pte_mkyoung(*pte);
++			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++			if (ptep_set_access_flags(vma, addr, pte, entry, 1))
++				update_mmu_cache(vma, addr, pte);
++		}
++		goto out_unlock;
++	}
++
++	/* Ok, finally just insert the thing.. */
++	if (pfn_t_devmap(pfn))
++		entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
++	else
++		entry = pte_mkspecial(pfn_t_pte(pfn, prot));
++
++	if (mkwrite) {
++		entry = pte_mkyoung(entry);
++		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++	}
++
++	set_pte_at(mm, addr, pte, entry);
++	update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
++
++	retval = 0;
++out_unlock:
++	pte_unmap_unlock(pte, ptl);
++out:
++	return retval;
++}
++
++/**
++ * vm_insert_pfn - insert single pfn into user vma
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @pfn: source kernel pfn
++ *
++ * Similar to vm_insert_page, this allows drivers to insert individual pages
++ * they've allocated into a user vma. Same comments apply.
++ *
++ * This function should only be called from a vm_ops->fault handler, and
++ * in that case the handler should return NULL.
++ *
++ * vma cannot be a COW mapping.
++ *
++ * As this is called only for pages that do not currently exist, we
++ * do not need to flush old virtual caches or the TLB.
++ */
++int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
++			unsigned long pfn)
++{
++	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_insert_pfn);
++
++/**
++ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
++ * @vma: user vma to map to
++ * @addr: target user address of this page
++ * @pfn: source kernel pfn
++ * @pgprot: pgprot flags for the inserted page
++ *
++ * This is exactly like vm_insert_pfn, except that it allows drivers to
++ * to override pgprot on a per-page basis.
++ *
++ * This only makes sense for IO mappings, and it makes no sense for
++ * cow mappings.  In general, using multiple vmas is preferable;
++ * vm_insert_pfn_prot should only be used if using multiple VMAs is
++ * impractical.
++ */
++int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
++			unsigned long pfn, pgprot_t pgprot)
++{
++	int ret;
++	/*
++	 * Technically, architectures with pte_special can avoid all these
++	 * restrictions (same for remap_pfn_range).  However we would like
++	 * consistency in testing and feature parity among all, so we should
++	 * try to keep these invariants in place for everybody.
++	 */
++	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
++	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
++						(VM_PFNMAP|VM_MIXEDMAP));
++	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
++	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
++
++	if (addr < vma->vm_start || addr >= vma->vm_end)
++		return -EFAULT;
++
++	if (!pfn_modify_allowed(pfn, pgprot))
++		return -EACCES;
++
++	track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
++
++	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
++			false);
++
++	return ret;
++}
++EXPORT_SYMBOL(vm_insert_pfn_prot);
++
++static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
++{
++	/* these checks mirror the abort conditions in vm_normal_page */
++	if (vma->vm_flags & VM_MIXEDMAP)
++		return true;
++	if (pfn_t_devmap(pfn))
++		return true;
++	if (pfn_t_special(pfn))
++		return true;
++	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
++		return true;
++	return false;
++}
++
++static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
++			pfn_t pfn, bool mkwrite)
++{
++	pgprot_t pgprot = vma->vm_page_prot;
++
++	BUG_ON(!vm_mixed_ok(vma, pfn));
++
++	if (addr < vma->vm_start || addr >= vma->vm_end)
++		return -EFAULT;
++
++	track_pfn_insert(vma, &pgprot, pfn);
++
++	if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
++		return -EACCES;
++
++	/*
++	 * If we don't have pte special, then we have to use the pfn_valid()
++	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
++	 * refcount the page if pfn_valid is true (hence insert_page rather
++	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
++	 * without pte special, it would there be refcounted as a normal page.
++	 */
++	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
++	    !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
++		struct page *page;
++
++		/*
++		 * At this point we are committed to insert_page()
++		 * regardless of whether the caller specified flags that
++		 * result in pfn_t_has_page() == false.
++		 */
++		page = pfn_to_page(pfn_t_to_pfn(pfn));
++		return insert_page(vma, addr, page, pgprot);
++	}
++	return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
++}
++
++int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
++			pfn_t pfn)
++{
++	return __vm_insert_mixed(vma, addr, pfn, false);
++
++}
++EXPORT_SYMBOL(vm_insert_mixed);
++
++/*
++ *  If the insertion of PTE failed because someone else already added a
++ *  different entry in the mean time, we treat that as success as we assume
++ *  the same entry was actually inserted.
++ */
++
++vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
++		unsigned long addr, pfn_t pfn)
++{
++	int err;
++
++	err =  __vm_insert_mixed(vma, addr, pfn, true);
++	if (err == -ENOMEM)
++		return VM_FAULT_OOM;
++	if (err < 0 && err != -EBUSY)
++		return VM_FAULT_SIGBUS;
++	return VM_FAULT_NOPAGE;
++}
++EXPORT_SYMBOL(vmf_insert_mixed_mkwrite);
++
++/*
++ * maps a range of physical memory into the requested pages. the old
++ * mappings are removed. any references to nonexistent pages results
++ * in null mappings (currently treated as "copy-on-access")
++ */
++static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
++			unsigned long addr, unsigned long end,
++			unsigned long pfn, pgprot_t prot)
++{
++	pte_t *pte;
++	spinlock_t *ptl;
++	int err = 0;
++
++	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
++	if (!pte)
++		return -ENOMEM;
++	arch_enter_lazy_mmu_mode();
++	do {
++		BUG_ON(!pte_none(*pte));
++		if (!pfn_modify_allowed(pfn, prot)) {
++			err = -EACCES;
++			break;
++		}
++		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
++		pfn++;
++	} while (pte++, addr += PAGE_SIZE, addr != end);
++	arch_leave_lazy_mmu_mode();
++	pte_unmap_unlock(pte - 1, ptl);
++	return err;
++}
++
++static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
++			unsigned long addr, unsigned long end,
++			unsigned long pfn, pgprot_t prot)
++{
++	pmd_t *pmd;
++	unsigned long next;
++	int err;
++
++	pfn -= addr >> PAGE_SHIFT;
++	pmd = pmd_alloc(mm, pud, addr);
++	if (!pmd)
++		return -ENOMEM;
++	VM_BUG_ON(pmd_trans_huge(*pmd));
++	do {
++		next = pmd_addr_end(addr, end);
++		err = remap_pte_range(mm, pmd, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			return err;
++	} while (pmd++, addr = next, addr != end);
++	return 0;
++}
++
++static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
++			unsigned long addr, unsigned long end,
++			unsigned long pfn, pgprot_t prot)
++{
++	pud_t *pud;
++	unsigned long next;
++	int err;
++
++	pfn -= addr >> PAGE_SHIFT;
++	pud = pud_alloc(mm, p4d, addr);
++	if (!pud)
++		return -ENOMEM;
++	do {
++		next = pud_addr_end(addr, end);
++		err = remap_pmd_range(mm, pud, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			return err;
++	} while (pud++, addr = next, addr != end);
++	return 0;
++}
++
++static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
++			unsigned long addr, unsigned long end,
++			unsigned long pfn, pgprot_t prot)
++{
++	p4d_t *p4d;
++	unsigned long next;
++	int err;
++
++	pfn -= addr >> PAGE_SHIFT;
++	p4d = p4d_alloc(mm, pgd, addr);
++	if (!p4d)
++		return -ENOMEM;
++	do {
++		next = p4d_addr_end(addr, end);
++		err = remap_pud_range(mm, p4d, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			return err;
++	} while (p4d++, addr = next, addr != end);
++	return 0;
++}
++
++/**
++ * remap_pfn_range - remap kernel memory to userspace
++ * @vma: user vma to map to
++ * @addr: target user address to start at
++ * @pfn: physical address of kernel memory
++ * @size: size of map area
++ * @prot: page protection flags for this mapping
++ *
++ *  Note: this is only safe if the mm semaphore is held when called.
++ */
++int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
++		    unsigned long pfn, unsigned long size, pgprot_t prot)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	unsigned long end = addr + PAGE_ALIGN(size);
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long remap_pfn = pfn;
++	int err;
++
++	/*
++	 * Physically remapped pages are special. Tell the
++	 * rest of the world about it:
++	 *   VM_IO tells people not to look at these pages
++	 *	(accesses can have side effects).
++	 *   VM_PFNMAP tells the core MM that the base pages are just
++	 *	raw PFN mappings, and do not have a "struct page" associated
++	 *	with them.
++	 *   VM_DONTEXPAND
++	 *      Disable vma merging and expanding with mremap().
++	 *   VM_DONTDUMP
++	 *      Omit vma from core dump, even when VM_IO turned off.
++	 *
++	 * There's a horrible special case to handle copy-on-write
++	 * behaviour that some programs depend on. We mark the "original"
++	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
++	 * See vm_normal_page() for details.
++	 */
++	if (is_cow_mapping(vma->vm_flags)) {
++		if (addr != vma->vm_start || end != vma->vm_end)
++			return -EINVAL;
++		vma->vm_pgoff = pfn;
++	}
++
++	err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
++	if (err)
++		return -EINVAL;
++
++	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
++
++	BUG_ON(addr >= end);
++	pfn -= addr >> PAGE_SHIFT;
++	pgd = pgd_offset(mm, addr);
++	flush_cache_range(vma, addr, end);
++	do {
++		next = pgd_addr_end(addr, end);
++		err = remap_p4d_range(mm, pgd, addr, next,
++				pfn + (addr >> PAGE_SHIFT), prot);
++		if (err)
++			break;
++	} while (pgd++, addr = next, addr != end);
++
++	if (err)
++		untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
++
++	return err;
++}
++EXPORT_SYMBOL(remap_pfn_range);
++
++/**
++ * vm_iomap_memory - remap memory to userspace
++ * @vma: user vma to map to
++ * @start: start of area
++ * @len: size of area
++ *
++ * This is a simplified io_remap_pfn_range() for common driver use. The
++ * driver just needs to give us the physical memory range to be mapped,
++ * we'll figure out the rest from the vma information.
++ *
++ * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
++ * whatever write-combining details or similar.
++ */
++int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
++{
++	unsigned long vm_len, pfn, pages;
++
++	/* Check that the physical memory area passed in looks valid */
++	if (start + len < start)
++		return -EINVAL;
++	/*
++	 * You *really* shouldn't map things that aren't page-aligned,
++	 * but we've historically allowed it because IO memory might
++	 * just have smaller alignment.
++	 */
++	len += start & ~PAGE_MASK;
++	pfn = start >> PAGE_SHIFT;
++	pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
++	if (pfn + pages < pfn)
++		return -EINVAL;
++
++	/* We start the mapping 'vm_pgoff' pages into the area */
++	if (vma->vm_pgoff > pages)
++		return -EINVAL;
++	pfn += vma->vm_pgoff;
++	pages -= vma->vm_pgoff;
++
++	/* Can we fit all of the mapping? */
++	vm_len = vma->vm_end - vma->vm_start;
++	if (vm_len >> PAGE_SHIFT > pages)
++		return -EINVAL;
++
++	/* Ok, let it rip */
++	return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
++}
++EXPORT_SYMBOL(vm_iomap_memory);
++
++static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
++				     unsigned long addr, unsigned long end,
++				     pte_fn_t fn, void *data)
++{
++	pte_t *pte;
++	int err;
++	pgtable_t token;
++	spinlock_t *uninitialized_var(ptl);
++
++	pte = (mm == &init_mm) ?
++		pte_alloc_kernel(pmd, addr) :
++		pte_alloc_map_lock(mm, pmd, addr, &ptl);
++	if (!pte)
++		return -ENOMEM;
++
++	BUG_ON(pmd_huge(*pmd));
++
++	arch_enter_lazy_mmu_mode();
++
++	token = pmd_pgtable(*pmd);
++
++	do {
++		err = fn(pte++, token, addr, data);
++		if (err)
++			break;
++	} while (addr += PAGE_SIZE, addr != end);
++
++	arch_leave_lazy_mmu_mode();
++
++	if (mm != &init_mm)
++		pte_unmap_unlock(pte-1, ptl);
++	return err;
++}
++
++static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
++				     unsigned long addr, unsigned long end,
++				     pte_fn_t fn, void *data)
++{
++	pmd_t *pmd;
++	unsigned long next;
++	int err;
++
++	BUG_ON(pud_huge(*pud));
++
++	pmd = pmd_alloc(mm, pud, addr);
++	if (!pmd)
++		return -ENOMEM;
++	do {
++		next = pmd_addr_end(addr, end);
++		err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
++		if (err)
++			break;
++	} while (pmd++, addr = next, addr != end);
++	return err;
++}
++
++static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
++				     unsigned long addr, unsigned long end,
++				     pte_fn_t fn, void *data)
++{
++	pud_t *pud;
++	unsigned long next;
++	int err;
++
++	pud = pud_alloc(mm, p4d, addr);
++	if (!pud)
++		return -ENOMEM;
++	do {
++		next = pud_addr_end(addr, end);
++		err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
++		if (err)
++			break;
++	} while (pud++, addr = next, addr != end);
++	return err;
++}
++
++static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
++				     unsigned long addr, unsigned long end,
++				     pte_fn_t fn, void *data)
++{
++	p4d_t *p4d;
++	unsigned long next;
++	int err;
++
++	p4d = p4d_alloc(mm, pgd, addr);
++	if (!p4d)
++		return -ENOMEM;
++	do {
++		next = p4d_addr_end(addr, end);
++		err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
++		if (err)
++			break;
++	} while (p4d++, addr = next, addr != end);
++	return err;
++}
++
++/*
++ * Scan a region of virtual memory, filling in page tables as necessary
++ * and calling a provided function on each leaf page table.
++ */
++int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
++			unsigned long size, pte_fn_t fn, void *data)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	unsigned long end = addr + size;
++	int err;
++
++	if (WARN_ON(addr >= end))
++		return -EINVAL;
++
++	pgd = pgd_offset(mm, addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
++		if (err)
++			break;
++	} while (pgd++, addr = next, addr != end);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(apply_to_page_range);
++
++/*
++ * handle_pte_fault chooses page fault handler according to an entry which was
++ * read non-atomically.  Before making any commitment, on those architectures
++ * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
++ * parts, do_swap_page must check under lock before unmapping the pte and
++ * proceeding (but do_wp_page is only called after already making such a check;
++ * and do_anonymous_page can safely check later on).
++ */
++static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
++				pte_t *page_table, pte_t orig_pte)
++{
++	int same = 1;
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
++	if (sizeof(pte_t) > sizeof(unsigned long)) {
++		spinlock_t *ptl = pte_lockptr(mm, pmd);
++		spin_lock(ptl);
++		same = pte_same(*page_table, orig_pte);
++		spin_unlock(ptl);
++	}
++#endif
++	pte_unmap(page_table);
++	return same;
++}
++
++static inline bool cow_user_page(struct page *dst, struct page *src,
++				 struct vm_fault *vmf)
++{
++	bool ret;
++	void *kaddr;
++	void __user *uaddr;
++	bool locked = false;
++	struct vm_area_struct *vma = vmf->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long addr = vmf->address;
++
++	debug_dma_assert_idle(src);
++
++	if (likely(src)) {
++		copy_user_highpage(dst, src, addr, vma);
++		return true;
++	}
++
++	/*
++	 * If the source page was a PFN mapping, we don't have
++	 * a "struct page" for it. We do a best-effort copy by
++	 * just copying from the original user address. If that
++	 * fails, we just zero-fill it. Live with it.
++	 */
++	kaddr = kmap_atomic(dst);
++	uaddr = (void __user *)(addr & PAGE_MASK);
++
++	/*
++	 * On architectures with software "accessed" bits, we would
++	 * take a double page fault, so mark it accessed here.
++	 */
++	if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
++		pte_t entry;
++
++		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
++		locked = true;
++		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
++			/*
++			 * Other thread has already handled the fault
++			 * and we don't need to do anything. If it's
++			 * not the case, the fault will be triggered
++			 * again on the same address.
++			 */
++			ret = false;
++			goto pte_unlock;
++		}
++
++		entry = pte_mkyoung(vmf->orig_pte);
++		if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
++			update_mmu_cache(vma, addr, vmf->pte);
++	}
++
++	/*
++	 * This really shouldn't fail, because the page is there
++	 * in the page tables. But it might just be unreadable,
++	 * in which case we just give up and fill the result with
++	 * zeroes.
++	 */
++	if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
++		if (locked)
++			goto warn;
++
++		/* Re-validate under PTL if the page is still mapped */
++		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
++		locked = true;
++		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
++			/* The PTE changed under us. Retry page fault. */
++			ret = false;
++			goto pte_unlock;
++		}
++
++		/*
++		 * The same page can be mapped back since last copy attampt.
++		 * Try to copy again under PTL.
++		 */
++		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
++			/*
++			 * Give a warn in case there can be some obscure
++			 * use-case
++			 */
++warn:
++			WARN_ON_ONCE(1);
++			clear_page(kaddr);
++		}
++	}
++
++	ret = true;
++
++pte_unlock:
++	if (locked)
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++	kunmap_atomic(kaddr);
++	flush_dcache_page(dst);
++
++	return ret;
++}
++
++static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
++{
++	struct file *vm_file = vma->vm_file;
++
++	if (vm_file)
++		return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO;
++
++	/*
++	 * Special mappings (e.g. VDSO) do not have any file so fake
++	 * a default GFP_KERNEL for them.
++	 */
++	return GFP_KERNEL;
++}
++
++/*
++ * Notify the address space that the page is about to become writable so that
++ * it can prohibit this or wait for the page to get into an appropriate state.
++ *
++ * We do this without the lock held, so that it can sleep if it needs to.
++ */
++static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
++{
++	vm_fault_t ret;
++	struct page *page = vmf->page;
++	unsigned int old_flags = vmf->flags;
++
++	vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
++
++	ret = vmf->vma->vm_ops->page_mkwrite(vmf);
++	/* Restore original flags so that caller is not surprised */
++	vmf->flags = old_flags;
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
++		return ret;
++	if (unlikely(!(ret & VM_FAULT_LOCKED))) {
++		lock_page(page);
++		if (!page->mapping) {
++			unlock_page(page);
++			return 0; /* retry */
++		}
++		ret |= VM_FAULT_LOCKED;
++	} else
++		VM_BUG_ON_PAGE(!PageLocked(page), page);
++	return ret;
++}
++
++/*
++ * Handle dirtying of a page in shared file mapping on a write fault.
++ *
++ * The function expects the page to be locked and unlocks it.
++ */
++static void fault_dirty_shared_page(struct vm_area_struct *vma,
++				    struct page *page)
++{
++	struct address_space *mapping;
++	bool dirtied;
++	bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite;
++
++	dirtied = set_page_dirty(page);
++	VM_BUG_ON_PAGE(PageAnon(page), page);
++	/*
++	 * Take a local copy of the address_space - page.mapping may be zeroed
++	 * by truncate after unlock_page().   The address_space itself remains
++	 * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
++	 * release semantics to prevent the compiler from undoing this copying.
++	 */
++	mapping = page_rmapping(page);
++	unlock_page(page);
++
++	if ((dirtied || page_mkwrite) && mapping) {
++		/*
++		 * Some device drivers do not set page.mapping
++		 * but still dirty their pages
++		 */
++		balance_dirty_pages_ratelimited(mapping);
++	}
++
++	if (!page_mkwrite)
++		file_update_time(vma->vm_file);
++}
++
++/*
++ * Handle write page faults for pages that can be reused in the current vma
++ *
++ * This can happen either due to the mapping being with the VM_SHARED flag,
++ * or due to us being the last reference standing to the page. In either
++ * case, all we need to do here is to mark the page as writable and update
++ * any related book-keeping.
++ */
++static inline void wp_page_reuse(struct vm_fault *vmf)
++	__releases(vmf->ptl)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct page *page = vmf->page;
++	pte_t entry;
++	/*
++	 * Clear the pages cpupid information as the existing
++	 * information potentially belongs to a now completely
++	 * unrelated process.
++	 */
++	if (page)
++		page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1);
++
++	flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
++	entry = pte_mkyoung(vmf->orig_pte);
++	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++	if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
++		update_mmu_cache(vma, vmf->address, vmf->pte);
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++}
++
++/*
++ * Handle the case of a page which we actually need to copy to a new page.
++ *
++ * Called with mmap_sem locked and the old page referenced, but
++ * without the ptl held.
++ *
++ * High level logic flow:
++ *
++ * - Allocate a page, copy the content of the old page to the new one.
++ * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
++ * - Take the PTL. If the pte changed, bail out and release the allocated page
++ * - If the pte is still the way we remember it, update the page table and all
++ *   relevant references. This includes dropping the reference the page-table
++ *   held to the old page, as well as updating the rmap.
++ * - In any case, unlock the PTL and drop the reference we took to the old page.
++ */
++static vm_fault_t wp_page_copy(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct mm_struct *mm = vma->vm_mm;
++	struct page *old_page = vmf->page;
++	struct page *new_page = NULL;
++	pte_t entry;
++	int page_copied = 0;
++	const unsigned long mmun_start = vmf->address & PAGE_MASK;
++	const unsigned long mmun_end = mmun_start + PAGE_SIZE;
++	struct mem_cgroup *memcg;
++
++	if (unlikely(anon_vma_prepare(vma)))
++		goto oom;
++
++	if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
++		new_page = alloc_zeroed_user_highpage_movable(vma,
++							      vmf->address);
++		if (!new_page)
++			goto oom;
++	} else {
++		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
++				vmf->address);
++		if (!new_page)
++			goto oom;
++
++		if (!cow_user_page(new_page, old_page, vmf)) {
++			/*
++			 * COW failed, if the fault was solved by other,
++			 * it's fine. If not, userspace would re-fault on
++			 * the same address and we will handle the fault
++			 * from the second attempt.
++			 */
++			put_page(new_page);
++			if (old_page)
++				put_page(old_page);
++			return 0;
++		}
++	}
++
++	if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
++		goto oom_free_new;
++
++	__SetPageUptodate(new_page);
++
++	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
++
++	/*
++	 * Re-check the pte - we dropped the lock
++	 */
++	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
++	if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
++		if (old_page) {
++			if (!PageAnon(old_page)) {
++				dec_mm_counter_fast(mm,
++						mm_counter_file(old_page));
++				inc_mm_counter_fast(mm, MM_ANONPAGES);
++			}
++		} else {
++			inc_mm_counter_fast(mm, MM_ANONPAGES);
++		}
++		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
++		entry = mk_pte(new_page, vma->vm_page_prot);
++		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++		/*
++		 * Clear the pte entry and flush it first, before updating the
++		 * pte with the new entry. This will avoid a race condition
++		 * seen in the presence of one thread doing SMC and another
++		 * thread doing COW.
++		 */
++		ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
++		page_add_new_anon_rmap(new_page, vma, vmf->address, false);
++		mem_cgroup_commit_charge(new_page, memcg, false, false);
++		lru_cache_add_active_or_unevictable(new_page, vma);
++		/*
++		 * We call the notify macro here because, when using secondary
++		 * mmu page tables (such as kvm shadow page tables), we want the
++		 * new page to be mapped directly into the secondary page table.
++		 */
++		set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
++		update_mmu_cache(vma, vmf->address, vmf->pte);
++		if (old_page) {
++			/*
++			 * Only after switching the pte to the new page may
++			 * we remove the mapcount here. Otherwise another
++			 * process may come and find the rmap count decremented
++			 * before the pte is switched to the new page, and
++			 * "reuse" the old page writing into it while our pte
++			 * here still points into it and can be read by other
++			 * threads.
++			 *
++			 * The critical issue is to order this
++			 * page_remove_rmap with the ptp_clear_flush above.
++			 * Those stores are ordered by (if nothing else,)
++			 * the barrier present in the atomic_add_negative
++			 * in page_remove_rmap.
++			 *
++			 * Then the TLB flush in ptep_clear_flush ensures that
++			 * no process can access the old page before the
++			 * decremented mapcount is visible. And the old page
++			 * cannot be reused until after the decremented
++			 * mapcount is visible. So transitively, TLBs to
++			 * old page will be flushed before it can be reused.
++			 */
++			page_remove_rmap(old_page, false);
++		}
++
++		/* Free the old page.. */
++		new_page = old_page;
++		page_copied = 1;
++	} else {
++		mem_cgroup_cancel_charge(new_page, memcg, false);
++	}
++
++	if (new_page)
++		put_page(new_page);
++
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++	/*
++	 * No need to double call mmu_notifier->invalidate_range() callback as
++	 * the above ptep_clear_flush_notify() did already call it.
++	 */
++	mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
++	if (old_page) {
++		/*
++		 * Don't let another task, with possibly unlocked vma,
++		 * keep the mlocked page.
++		 */
++		if (page_copied && (vma->vm_flags & VM_LOCKED)) {
++			lock_page(old_page);	/* LRU manipulation */
++			if (PageMlocked(old_page))
++				munlock_vma_page(old_page);
++			unlock_page(old_page);
++		}
++		put_page(old_page);
++	}
++	return page_copied ? VM_FAULT_WRITE : 0;
++oom_free_new:
++	put_page(new_page);
++oom:
++	if (old_page)
++		put_page(old_page);
++	return VM_FAULT_OOM;
++}
++
++/**
++ * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
++ *			  writeable once the page is prepared
++ *
++ * @vmf: structure describing the fault
++ *
++ * This function handles all that is needed to finish a write page fault in a
++ * shared mapping due to PTE being read-only once the mapped page is prepared.
++ * It handles locking of PTE and modifying it. The function returns
++ * VM_FAULT_WRITE on success, 0 when PTE got changed before we acquired PTE
++ * lock.
++ *
++ * The function expects the page to be locked or other protection against
++ * concurrent faults / writeback (such as DAX radix tree locks).
++ */
++vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
++{
++	WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
++	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
++				       &vmf->ptl);
++	/*
++	 * We might have raced with another page fault while we released the
++	 * pte_offset_map_lock.
++	 */
++	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		return VM_FAULT_NOPAGE;
++	}
++	wp_page_reuse(vmf);
++	return 0;
++}
++
++/*
++ * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
++ * mapping
++ */
++static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++
++	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
++		vm_fault_t ret;
++
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		vmf->flags |= FAULT_FLAG_MKWRITE;
++		ret = vma->vm_ops->pfn_mkwrite(vmf);
++		if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
++			return ret;
++		return finish_mkwrite_fault(vmf);
++	}
++	wp_page_reuse(vmf);
++	return VM_FAULT_WRITE;
++}
++
++static vm_fault_t wp_page_shared(struct vm_fault *vmf)
++	__releases(vmf->ptl)
++{
++	struct vm_area_struct *vma = vmf->vma;
++
++	get_page(vmf->page);
++
++	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
++		vm_fault_t tmp;
++
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		tmp = do_page_mkwrite(vmf);
++		if (unlikely(!tmp || (tmp &
++				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
++			put_page(vmf->page);
++			return tmp;
++		}
++		tmp = finish_mkwrite_fault(vmf);
++		if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {
++			unlock_page(vmf->page);
++			put_page(vmf->page);
++			return tmp;
++		}
++	} else {
++		wp_page_reuse(vmf);
++		lock_page(vmf->page);
++	}
++	fault_dirty_shared_page(vma, vmf->page);
++	put_page(vmf->page);
++
++	return VM_FAULT_WRITE;
++}
++
++/*
++ * This routine handles present pages, when users try to write
++ * to a shared page. It is done by copying the page to a new address
++ * and decrementing the shared-page counter for the old page.
++ *
++ * Note that this routine assumes that the protection checks have been
++ * done by the caller (the low-level page fault routine in most cases).
++ * Thus we can safely just mark it writable once we've done any necessary
++ * COW.
++ *
++ * We also mark the page dirty at this point even though the page will
++ * change only once the write actually happens. This avoids a few races,
++ * and potentially makes it more efficient.
++ *
++ * We enter with non-exclusive mmap_sem (to exclude vma changes,
++ * but allow concurrent faults), with pte both mapped and locked.
++ * We return with mmap_sem still held, but pte unmapped and unlocked.
++ */
++static vm_fault_t do_wp_page(struct vm_fault *vmf)
++	__releases(vmf->ptl)
++{
++	struct vm_area_struct *vma = vmf->vma;
++
++	vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
++	if (!vmf->page) {
++		/*
++		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
++		 * VM_PFNMAP VMA.
++		 *
++		 * We should not cow pages in a shared writeable mapping.
++		 * Just mark the pages writable and/or call ops->pfn_mkwrite.
++		 */
++		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
++				     (VM_WRITE|VM_SHARED))
++			return wp_pfn_shared(vmf);
++
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		return wp_page_copy(vmf);
++	}
++
++	/*
++	 * Take out anonymous pages first, anonymous shared vmas are
++	 * not dirty accountable.
++	 */
++	if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
++		int total_map_swapcount;
++		if (!trylock_page(vmf->page)) {
++			get_page(vmf->page);
++			pte_unmap_unlock(vmf->pte, vmf->ptl);
++			lock_page(vmf->page);
++			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
++					vmf->address, &vmf->ptl);
++			if (!pte_same(*vmf->pte, vmf->orig_pte)) {
++				unlock_page(vmf->page);
++				pte_unmap_unlock(vmf->pte, vmf->ptl);
++				put_page(vmf->page);
++				return 0;
++			}
++			put_page(vmf->page);
++		}
++		if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
++			if (total_map_swapcount == 1) {
++				/*
++				 * The page is all ours. Move it to
++				 * our anon_vma so the rmap code will
++				 * not search our parent or siblings.
++				 * Protected against the rmap code by
++				 * the page lock.
++				 */
++				page_move_anon_rmap(vmf->page, vma);
++			}
++			unlock_page(vmf->page);
++			wp_page_reuse(vmf);
++			return VM_FAULT_WRITE;
++		}
++		unlock_page(vmf->page);
++	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
++					(VM_WRITE|VM_SHARED))) {
++		return wp_page_shared(vmf);
++	}
++
++	/*
++	 * Ok, we need to copy. Oh, well..
++	 */
++	get_page(vmf->page);
++
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++	return wp_page_copy(vmf);
++}
++
++static void unmap_mapping_range_vma(struct vm_area_struct *vma,
++		unsigned long start_addr, unsigned long end_addr,
++		struct zap_details *details)
++{
++	zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
++}
++
++static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
++					    struct zap_details *details)
++{
++	struct vm_area_struct *vma;
++	pgoff_t vba, vea, zba, zea;
++
++	vma_interval_tree_foreach(vma, root,
++			details->first_index, details->last_index) {
++
++		vba = vma->vm_pgoff;
++		vea = vba + vma_pages(vma) - 1;
++		zba = details->first_index;
++		if (zba < vba)
++			zba = vba;
++		zea = details->last_index;
++		if (zea > vea)
++			zea = vea;
++
++		unmap_mapping_range_vma(vma,
++			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
++			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
++				details);
++	}
++}
++
++/**
++ * unmap_mapping_pages() - Unmap pages from processes.
++ * @mapping: The address space containing pages to be unmapped.
++ * @start: Index of first page to be unmapped.
++ * @nr: Number of pages to be unmapped.  0 to unmap to end of file.
++ * @even_cows: Whether to unmap even private COWed pages.
++ *
++ * Unmap the pages in this address space from any userspace process which
++ * has them mmaped.  Generally, you want to remove COWed pages as well when
++ * a file is being truncated, but not when invalidating pages from the page
++ * cache.
++ */
++void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
++		pgoff_t nr, bool even_cows)
++{
++	struct zap_details details = { };
++
++	details.check_mapping = even_cows ? NULL : mapping;
++	details.first_index = start;
++	details.last_index = start + nr - 1;
++	if (details.last_index < details.first_index)
++		details.last_index = ULONG_MAX;
++
++	i_mmap_lock_write(mapping);
++	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
++		unmap_mapping_range_tree(&mapping->i_mmap, &details);
++	i_mmap_unlock_write(mapping);
++}
++
++/**
++ * unmap_mapping_range - unmap the portion of all mmaps in the specified
++ * address_space corresponding to the specified byte range in the underlying
++ * file.
++ *
++ * @mapping: the address space containing mmaps to be unmapped.
++ * @holebegin: byte in first page to unmap, relative to the start of
++ * the underlying file.  This will be rounded down to a PAGE_SIZE
++ * boundary.  Note that this is different from truncate_pagecache(), which
++ * must keep the partial page.  In contrast, we must get rid of
++ * partial pages.
++ * @holelen: size of prospective hole in bytes.  This will be rounded
++ * up to a PAGE_SIZE boundary.  A holelen of zero truncates to the
++ * end of the file.
++ * @even_cows: 1 when truncating a file, unmap even private COWed pages;
++ * but 0 when invalidating pagecache, don't throw away private data.
++ */
++void unmap_mapping_range(struct address_space *mapping,
++		loff_t const holebegin, loff_t const holelen, int even_cows)
++{
++	pgoff_t hba = holebegin >> PAGE_SHIFT;
++	pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++
++	/* Check for overflow. */
++	if (sizeof(holelen) > sizeof(hlen)) {
++		long long holeend =
++			(holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
++		if (holeend & ~(long long)ULONG_MAX)
++			hlen = ULONG_MAX - hba + 1;
++	}
++
++	unmap_mapping_pages(mapping, hba, hlen, even_cows);
++}
++EXPORT_SYMBOL(unmap_mapping_range);
++
++/*
++ * We enter with non-exclusive mmap_sem (to exclude vma changes,
++ * but allow concurrent faults), and pte mapped but not yet locked.
++ * We return with pte unmapped and unlocked.
++ *
++ * We return with the mmap_sem locked or unlocked in the same cases
++ * as does filemap_fault().
++ */
++vm_fault_t do_swap_page(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct page *page = NULL, *swapcache;
++	struct mem_cgroup *memcg;
++	swp_entry_t entry;
++	pte_t pte;
++	int locked;
++	int exclusive = 0;
++	vm_fault_t ret = 0;
++
++	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
++		goto out;
++
++	entry = pte_to_swp_entry(vmf->orig_pte);
++	if (unlikely(non_swap_entry(entry))) {
++		if (is_migration_entry(entry)) {
++			migration_entry_wait(vma->vm_mm, vmf->pmd,
++					     vmf->address);
++		} else if (is_device_private_entry(entry)) {
++			/*
++			 * For un-addressable device memory we call the pgmap
++			 * fault handler callback. The callback must migrate
++			 * the page back to some CPU accessible page.
++			 */
++			ret = device_private_entry_fault(vma, vmf->address, entry,
++						 vmf->flags, vmf->pmd);
++		} else if (is_hwpoison_entry(entry)) {
++			ret = VM_FAULT_HWPOISON;
++		} else {
++			print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL);
++			ret = VM_FAULT_SIGBUS;
++		}
++		goto out;
++	}
++
++
++	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
++	page = lookup_swap_cache(entry, vma, vmf->address);
++	swapcache = page;
++
++	if (!page) {
++		struct swap_info_struct *si = swp_swap_info(entry);
++
++		if (si->flags & SWP_SYNCHRONOUS_IO &&
++				__swap_count(entry) == 1) {
++			/* skip swapcache */
++			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
++							vmf->address);
++			if (page) {
++				__SetPageLocked(page);
++				__SetPageSwapBacked(page);
++				set_page_private(page, entry.val);
++				lru_cache_add_anon(page);
++				swap_readpage(page, true);
++			}
++		} else {
++			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
++						vmf);
++			swapcache = page;
++		}
++
++		if (!page) {
++			/*
++			 * Back out if somebody else faulted in this pte
++			 * while we released the pte lock.
++			 */
++			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
++					vmf->address, &vmf->ptl);
++			if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
++				ret = VM_FAULT_OOM;
++			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
++			goto unlock;
++		}
++
++		/* Had to read the page from swap area: Major fault */
++		ret = VM_FAULT_MAJOR;
++		count_vm_event(PGMAJFAULT);
++		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
++	} else if (PageHWPoison(page)) {
++		/*
++		 * hwpoisoned dirty swapcache pages are kept for killing
++		 * owner processes (which may be unknown at hwpoison time)
++		 */
++		ret = VM_FAULT_HWPOISON;
++		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
++		goto out_release;
++	}
++
++	locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
++
++	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
++	if (!locked) {
++		ret |= VM_FAULT_RETRY;
++		goto out_release;
++	}
++
++	/*
++	 * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
++	 * release the swapcache from under us.  The page pin, and pte_same
++	 * test below, are not enough to exclude that.  Even if it is still
++	 * swapcache, we need to check that the page's swap has not changed.
++	 */
++	if (unlikely((!PageSwapCache(page) ||
++			page_private(page) != entry.val)) && swapcache)
++		goto out_page;
++
++	page = ksm_might_need_to_copy(page, vma, vmf->address);
++	if (unlikely(!page)) {
++		ret = VM_FAULT_OOM;
++		page = swapcache;
++		goto out_page;
++	}
++
++	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
++					&memcg, false)) {
++		ret = VM_FAULT_OOM;
++		goto out_page;
++	}
++
++	/*
++	 * Back out if somebody else already faulted in this pte.
++	 */
++	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
++			&vmf->ptl);
++	if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
++		goto out_nomap;
++
++	if (unlikely(!PageUptodate(page))) {
++		ret = VM_FAULT_SIGBUS;
++		goto out_nomap;
++	}
++
++	/*
++	 * The page isn't present yet, go ahead with the fault.
++	 *
++	 * Be careful about the sequence of operations here.
++	 * To get its accounting right, reuse_swap_page() must be called
++	 * while the page is counted on swap but not yet in mapcount i.e.
++	 * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
++	 * must be called after the swap_free(), or it will never succeed.
++	 */
++
++	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
++	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
++	pte = mk_pte(page, vma->vm_page_prot);
++	if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
++		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
++		vmf->flags &= ~FAULT_FLAG_WRITE;
++		ret |= VM_FAULT_WRITE;
++		exclusive = RMAP_EXCLUSIVE;
++	}
++	flush_icache_page(vma, page);
++	if (pte_swp_soft_dirty(vmf->orig_pte))
++		pte = pte_mksoft_dirty(pte);
++	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
++	arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
++	vmf->orig_pte = pte;
++
++	/* ksm created a completely new copy */
++	if (unlikely(page != swapcache && swapcache)) {
++		page_add_new_anon_rmap(page, vma, vmf->address, false);
++		mem_cgroup_commit_charge(page, memcg, false, false);
++		lru_cache_add_active_or_unevictable(page, vma);
++	} else {
++		do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
++		mem_cgroup_commit_charge(page, memcg, true, false);
++		activate_page(page);
++	}
++
++	swap_free(entry);
++	if (mem_cgroup_swap_full(page) ||
++	    (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
++		try_to_free_swap(page);
++	unlock_page(page);
++	if (page != swapcache && swapcache) {
++		/*
++		 * Hold the lock to avoid the swap entry to be reused
++		 * until we take the PT lock for the pte_same() check
++		 * (to avoid false positives from pte_same). For
++		 * further safety release the lock after the swap_free
++		 * so that the swap count won't change under a
++		 * parallel locked swapcache.
++		 */
++		unlock_page(swapcache);
++		put_page(swapcache);
++	}
++
++	if (vmf->flags & FAULT_FLAG_WRITE) {
++		ret |= do_wp_page(vmf);
++		if (ret & VM_FAULT_ERROR)
++			ret &= VM_FAULT_ERROR;
++		goto out;
++	}
++
++	/* No need to invalidate - it was non-present before */
++	update_mmu_cache(vma, vmf->address, vmf->pte);
++unlock:
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++out:
++	return ret;
++out_nomap:
++	mem_cgroup_cancel_charge(page, memcg, false);
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++out_page:
++	unlock_page(page);
++out_release:
++	put_page(page);
++	if (page != swapcache && swapcache) {
++		unlock_page(swapcache);
++		put_page(swapcache);
++	}
++	return ret;
++}
++
++/*
++ * We enter with non-exclusive mmap_sem (to exclude vma changes,
++ * but allow concurrent faults), and pte mapped but not yet locked.
++ * We return with mmap_sem still held, but pte unmapped and unlocked.
++ */
++static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct mem_cgroup *memcg;
++	struct page *page;
++	vm_fault_t ret = 0;
++	pte_t entry;
++
++	/* File mapping without ->vm_ops ? */
++	if (vma->vm_flags & VM_SHARED)
++		return VM_FAULT_SIGBUS;
++
++	/*
++	 * Use pte_alloc() instead of pte_alloc_map().  We can't run
++	 * pte_offset_map() on pmds where a huge pmd might be created
++	 * from a different thread.
++	 *
++	 * pte_alloc_map() is safe to use under down_write(mmap_sem) or when
++	 * parallel threads are excluded by other means.
++	 *
++	 * Here we only have down_read(mmap_sem).
++	 */
++	if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
++		return VM_FAULT_OOM;
++
++	/* See the comment in pte_alloc_one_map() */
++	if (unlikely(pmd_trans_unstable(vmf->pmd)))
++		return 0;
++
++	/* Use the zero-page for reads */
++	if (!(vmf->flags & FAULT_FLAG_WRITE) &&
++			!mm_forbids_zeropage(vma->vm_mm)) {
++		entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
++						vma->vm_page_prot));
++		vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
++				vmf->address, &vmf->ptl);
++		if (!pte_none(*vmf->pte))
++			goto unlock;
++		ret = check_stable_address_space(vma->vm_mm);
++		if (ret)
++			goto unlock;
++		/* Deliver the page fault to userland, check inside PT lock */
++		if (userfaultfd_missing(vma)) {
++			pte_unmap_unlock(vmf->pte, vmf->ptl);
++			return handle_userfault(vmf, VM_UFFD_MISSING);
++		}
++		goto setpte;
++	}
++
++	/* Allocate our own private page. */
++	if (unlikely(anon_vma_prepare(vma)))
++		goto oom;
++	page = alloc_zeroed_user_highpage_movable(vma, vmf->address);
++	if (!page)
++		goto oom;
++
++	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
++					false))
++		goto oom_free_page;
++
++	/*
++	 * The memory barrier inside __SetPageUptodate makes sure that
++	 * preceeding stores to the page contents become visible before
++	 * the set_pte_at() write.
++	 */
++	__SetPageUptodate(page);
++
++	entry = mk_pte(page, vma->vm_page_prot);
++	if (vma->vm_flags & VM_WRITE)
++		entry = pte_mkwrite(pte_mkdirty(entry));
++
++	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
++			&vmf->ptl);
++	if (!pte_none(*vmf->pte))
++		goto release;
++
++	ret = check_stable_address_space(vma->vm_mm);
++	if (ret)
++		goto release;
++
++	/* Deliver the page fault to userland, check inside PT lock */
++	if (userfaultfd_missing(vma)) {
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		mem_cgroup_cancel_charge(page, memcg, false);
++		put_page(page);
++		return handle_userfault(vmf, VM_UFFD_MISSING);
++	}
++
++	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
++	page_add_new_anon_rmap(page, vma, vmf->address, false);
++	mem_cgroup_commit_charge(page, memcg, false, false);
++	lru_cache_add_active_or_unevictable(page, vma);
++setpte:
++	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
++
++	/* No need to invalidate - it was non-present before */
++	update_mmu_cache(vma, vmf->address, vmf->pte);
++unlock:
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++	return ret;
++release:
++	mem_cgroup_cancel_charge(page, memcg, false);
++	put_page(page);
++	goto unlock;
++oom_free_page:
++	put_page(page);
++oom:
++	return VM_FAULT_OOM;
++}
++
++/*
++ * The mmap_sem must have been held on entry, and may have been
++ * released depending on flags and vma->vm_ops->fault() return value.
++ * See filemap_fault() and __lock_page_retry().
++ */
++static vm_fault_t __do_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	vm_fault_t ret;
++
++	/*
++	 * Preallocate pte before we take page_lock because this might lead to
++	 * deadlocks for memcg reclaim which waits for pages under writeback:
++	 *				lock_page(A)
++	 *				SetPageWriteback(A)
++	 *				unlock_page(A)
++	 * lock_page(B)
++	 *				lock_page(B)
++	 * pte_alloc_pne
++	 *   shrink_page_list
++	 *     wait_on_page_writeback(A)
++	 *				SetPageWriteback(B)
++	 *				unlock_page(B)
++	 *				# flush A, B to clear the writeback
++	 */
++	if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
++		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
++						  vmf->address);
++		if (!vmf->prealloc_pte)
++			return VM_FAULT_OOM;
++		smp_wmb(); /* See comment in __pte_alloc() */
++	}
++
++	ret = vma->vm_ops->fault(vmf);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
++			    VM_FAULT_DONE_COW)))
++		return ret;
++
++	if (unlikely(PageHWPoison(vmf->page))) {
++		if (ret & VM_FAULT_LOCKED)
++			unlock_page(vmf->page);
++		put_page(vmf->page);
++		vmf->page = NULL;
++		return VM_FAULT_HWPOISON;
++	}
++
++	if (unlikely(!(ret & VM_FAULT_LOCKED)))
++		lock_page(vmf->page);
++	else
++		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
++
++	return ret;
++}
++
++/*
++ * The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
++ * If we check pmd_trans_unstable() first we will trip the bad_pmd() check
++ * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
++ * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
++ */
++static int pmd_devmap_trans_unstable(pmd_t *pmd)
++{
++	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
++}
++
++static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++
++	if (!pmd_none(*vmf->pmd))
++		goto map_pte;
++	if (vmf->prealloc_pte) {
++		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
++		if (unlikely(!pmd_none(*vmf->pmd))) {
++			spin_unlock(vmf->ptl);
++			goto map_pte;
++		}
++
++		mm_inc_nr_ptes(vma->vm_mm);
++		pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
++		spin_unlock(vmf->ptl);
++		vmf->prealloc_pte = NULL;
++	} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))) {
++		return VM_FAULT_OOM;
++	}
++map_pte:
++	/*
++	 * If a huge pmd materialized under us just retry later.  Use
++	 * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
++	 * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
++	 * under us and then back to pmd_none, as a result of MADV_DONTNEED
++	 * running immediately after a huge pmd fault in a different thread of
++	 * this mm, in turn leading to a misleading pmd_trans_huge() retval.
++	 * All we have to ensure is that it is a regular pmd that we can walk
++	 * with pte_offset_map() and we can do that through an atomic read in
++	 * C, which is what pmd_trans_unstable() provides.
++	 */
++	if (pmd_devmap_trans_unstable(vmf->pmd))
++		return VM_FAULT_NOPAGE;
++
++	/*
++	 * At this point we know that our vmf->pmd points to a page of ptes
++	 * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
++	 * for the duration of the fault.  If a racing MADV_DONTNEED runs and
++	 * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
++	 * be valid and we will re-check to make sure the vmf->pte isn't
++	 * pte_none() under vmf->ptl protection when we return to
++	 * alloc_set_pte().
++	 */
++	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
++			&vmf->ptl);
++	return 0;
++}
++
++#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
++
++#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
++static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
++		unsigned long haddr)
++{
++	if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
++			(vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
++		return false;
++	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
++		return false;
++	return true;
++}
++
++static void deposit_prealloc_pte(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++
++	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
++	/*
++	 * We are going to consume the prealloc table,
++	 * count that as nr_ptes.
++	 */
++	mm_inc_nr_ptes(vma->vm_mm);
++	vmf->prealloc_pte = NULL;
++}
++
++static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	bool write = vmf->flags & FAULT_FLAG_WRITE;
++	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
++	pmd_t entry;
++	int i;
++	vm_fault_t ret;
++
++	if (!transhuge_vma_suitable(vma, haddr))
++		return VM_FAULT_FALLBACK;
++
++	ret = VM_FAULT_FALLBACK;
++	page = compound_head(page);
++
++	/*
++	 * Archs like ppc64 need additonal space to store information
++	 * related to pte entry. Use the preallocated table for that.
++	 */
++	if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
++		vmf->prealloc_pte = pte_alloc_one(vma->vm_mm, vmf->address);
++		if (!vmf->prealloc_pte)
++			return VM_FAULT_OOM;
++		smp_wmb(); /* See comment in __pte_alloc() */
++	}
++
++	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
++	if (unlikely(!pmd_none(*vmf->pmd)))
++		goto out;
++
++	for (i = 0; i < HPAGE_PMD_NR; i++)
++		flush_icache_page(vma, page + i);
++
++	entry = mk_huge_pmd(page, vma->vm_page_prot);
++	if (write)
++		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
++
++	add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
++	page_add_file_rmap(page, true);
++	/*
++	 * deposit and withdraw with pmd lock held
++	 */
++	if (arch_needs_pgtable_deposit())
++		deposit_prealloc_pte(vmf);
++
++	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
++
++	update_mmu_cache_pmd(vma, haddr, vmf->pmd);
++
++	/* fault is handled */
++	ret = 0;
++	count_vm_event(THP_FILE_MAPPED);
++out:
++	spin_unlock(vmf->ptl);
++	return ret;
++}
++#else
++static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
++{
++	BUILD_BUG();
++	return 0;
++}
++#endif
++
++/**
++ * alloc_set_pte - setup new PTE entry for given page and add reverse page
++ * mapping. If needed, the fucntion allocates page table or use pre-allocated.
++ *
++ * @vmf: fault environment
++ * @memcg: memcg to charge page (only for private mappings)
++ * @page: page to map
++ *
++ * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
++ * return.
++ *
++ * Target users are page handler itself and implementations of
++ * vm_ops->map_pages.
++ */
++vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
++		struct page *page)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	bool write = vmf->flags & FAULT_FLAG_WRITE;
++	pte_t entry;
++	vm_fault_t ret;
++
++	if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
++			IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
++		/* THP on COW? */
++		VM_BUG_ON_PAGE(memcg, page);
++
++		ret = do_set_pmd(vmf, page);
++		if (ret != VM_FAULT_FALLBACK)
++			return ret;
++	}
++
++	if (!vmf->pte) {
++		ret = pte_alloc_one_map(vmf);
++		if (ret)
++			return ret;
++	}
++
++	/* Re-check under ptl */
++	if (unlikely(!pte_none(*vmf->pte)))
++		return VM_FAULT_NOPAGE;
++
++	flush_icache_page(vma, page);
++	entry = mk_pte(page, vma->vm_page_prot);
++	if (write)
++		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
++	/* copy-on-write page */
++	if (write && !(vma->vm_flags & VM_SHARED)) {
++		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
++		page_add_new_anon_rmap(page, vma, vmf->address, false);
++		mem_cgroup_commit_charge(page, memcg, false, false);
++		lru_cache_add_active_or_unevictable(page, vma);
++	} else {
++		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
++		page_add_file_rmap(page, false);
++	}
++	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
++
++	/* no need to invalidate: a not-present page won't be cached */
++	update_mmu_cache(vma, vmf->address, vmf->pte);
++
++	return 0;
++}
++
++
++/**
++ * finish_fault - finish page fault once we have prepared the page to fault
++ *
++ * @vmf: structure describing the fault
++ *
++ * This function handles all that is needed to finish a page fault once the
++ * page to fault in is prepared. It handles locking of PTEs, inserts PTE for
++ * given page, adds reverse page mapping, handles memcg charges and LRU
++ * addition. The function returns 0 on success, VM_FAULT_ code in case of
++ * error.
++ *
++ * The function expects the page to be locked and on success it consumes a
++ * reference of a page being mapped (for the PTE which maps it).
++ */
++vm_fault_t finish_fault(struct vm_fault *vmf)
++{
++	struct page *page;
++	vm_fault_t ret = 0;
++
++	/* Did we COW the page? */
++	if ((vmf->flags & FAULT_FLAG_WRITE) &&
++	    !(vmf->vma->vm_flags & VM_SHARED))
++		page = vmf->cow_page;
++	else
++		page = vmf->page;
++
++	/*
++	 * check even for read faults because we might have lost our CoWed
++	 * page
++	 */
++	if (!(vmf->vma->vm_flags & VM_SHARED))
++		ret = check_stable_address_space(vmf->vma->vm_mm);
++	if (!ret)
++		ret = alloc_set_pte(vmf, vmf->memcg, page);
++	if (vmf->pte)
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++	return ret;
++}
++
++static unsigned long fault_around_bytes __read_mostly =
++	rounddown_pow_of_two(65536);
++
++#ifdef CONFIG_DEBUG_FS
++static int fault_around_bytes_get(void *data, u64 *val)
++{
++	*val = fault_around_bytes;
++	return 0;
++}
++
++/*
++ * fault_around_bytes must be rounded down to the nearest page order as it's
++ * what do_fault_around() expects to see.
++ */
++static int fault_around_bytes_set(void *data, u64 val)
++{
++	if (val / PAGE_SIZE > PTRS_PER_PTE)
++		return -EINVAL;
++	if (val > PAGE_SIZE)
++		fault_around_bytes = rounddown_pow_of_two(val);
++	else
++		fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */
++	return 0;
++}
++DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops,
++		fault_around_bytes_get, fault_around_bytes_set, "%llu\n");
++
++static int __init fault_around_debugfs(void)
++{
++	void *ret;
++
++	ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
++			&fault_around_bytes_fops);
++	if (!ret)
++		pr_warn("Failed to create fault_around_bytes in debugfs");
++	return 0;
++}
++late_initcall(fault_around_debugfs);
++#endif
++
++/*
++ * do_fault_around() tries to map few pages around the fault address. The hope
++ * is that the pages will be needed soon and this will lower the number of
++ * faults to handle.
++ *
++ * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
++ * not ready to be mapped: not up-to-date, locked, etc.
++ *
++ * This function is called with the page table lock taken. In the split ptlock
++ * case the page table lock only protects only those entries which belong to
++ * the page table corresponding to the fault address.
++ *
++ * This function doesn't cross the VMA boundaries, in order to call map_pages()
++ * only once.
++ *
++ * fault_around_bytes defines how many bytes we'll try to map.
++ * do_fault_around() expects it to be set to a power of two less than or equal
++ * to PTRS_PER_PTE.
++ *
++ * The virtual address of the area that we map is naturally aligned to
++ * fault_around_bytes rounded down to the machine page size
++ * (and therefore to page order).  This way it's easier to guarantee
++ * that we don't cross page table boundaries.
++ */
++static vm_fault_t do_fault_around(struct vm_fault *vmf)
++{
++	unsigned long address = vmf->address, nr_pages, mask;
++	pgoff_t start_pgoff = vmf->pgoff;
++	pgoff_t end_pgoff;
++	int off;
++	vm_fault_t ret = 0;
++
++	nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
++	mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
++
++	vmf->address = max(address & mask, vmf->vma->vm_start);
++	off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
++	start_pgoff -= off;
++
++	/*
++	 *  end_pgoff is either the end of the page table, the end of
++	 *  the vma or nr_pages from start_pgoff, depending what is nearest.
++	 */
++	end_pgoff = start_pgoff -
++		((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
++		PTRS_PER_PTE - 1;
++	end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
++			start_pgoff + nr_pages - 1);
++
++	if (pmd_none(*vmf->pmd)) {
++		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
++						  vmf->address);
++		if (!vmf->prealloc_pte)
++			goto out;
++		smp_wmb(); /* See comment in __pte_alloc() */
++	}
++
++	vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
++
++	/* Huge page is mapped? Page fault is solved */
++	if (pmd_trans_huge(*vmf->pmd)) {
++		ret = VM_FAULT_NOPAGE;
++		goto out;
++	}
++
++	/* ->map_pages() haven't done anything useful. Cold page cache? */
++	if (!vmf->pte)
++		goto out;
++
++	/* check if the page fault is solved */
++	vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
++	if (!pte_none(*vmf->pte))
++		ret = VM_FAULT_NOPAGE;
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++out:
++	vmf->address = address;
++	vmf->pte = NULL;
++	return ret;
++}
++
++static vm_fault_t do_read_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	vm_fault_t ret = 0;
++
++	/*
++	 * Let's call ->map_pages() first and use ->fault() as fallback
++	 * if page by the offset is not ready to be mapped (cold cache or
++	 * something).
++	 */
++	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
++		ret = do_fault_around(vmf);
++		if (ret)
++			return ret;
++	}
++
++	ret = __do_fault(vmf);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
++		return ret;
++
++	ret |= finish_fault(vmf);
++	unlock_page(vmf->page);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
++		put_page(vmf->page);
++	return ret;
++}
++
++static vm_fault_t do_cow_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	vm_fault_t ret;
++
++	if (unlikely(anon_vma_prepare(vma)))
++		return VM_FAULT_OOM;
++
++	vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
++	if (!vmf->cow_page)
++		return VM_FAULT_OOM;
++
++	if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
++				&vmf->memcg, false)) {
++		put_page(vmf->cow_page);
++		return VM_FAULT_OOM;
++	}
++
++	ret = __do_fault(vmf);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
++		goto uncharge_out;
++	if (ret & VM_FAULT_DONE_COW)
++		return ret;
++
++	copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
++	__SetPageUptodate(vmf->cow_page);
++
++	ret |= finish_fault(vmf);
++	unlock_page(vmf->page);
++	put_page(vmf->page);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
++		goto uncharge_out;
++	return ret;
++uncharge_out:
++	mem_cgroup_cancel_charge(vmf->cow_page, vmf->memcg, false);
++	put_page(vmf->cow_page);
++	return ret;
++}
++
++static vm_fault_t do_shared_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	vm_fault_t ret, tmp;
++
++	ret = __do_fault(vmf);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
++		return ret;
++
++	/*
++	 * Check if the backing address space wants to know that the page is
++	 * about to become writable
++	 */
++	if (vma->vm_ops->page_mkwrite) {
++		unlock_page(vmf->page);
++		tmp = do_page_mkwrite(vmf);
++		if (unlikely(!tmp ||
++				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
++			put_page(vmf->page);
++			return tmp;
++		}
++	}
++
++	ret |= finish_fault(vmf);
++	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
++					VM_FAULT_RETRY))) {
++		unlock_page(vmf->page);
++		put_page(vmf->page);
++		return ret;
++	}
++
++	fault_dirty_shared_page(vma, vmf->page);
++	return ret;
++}
++
++/*
++ * We enter with non-exclusive mmap_sem (to exclude vma changes,
++ * but allow concurrent faults).
++ * The mmap_sem may have been released depending on flags and our
++ * return value.  See filemap_fault() and __lock_page_or_retry().
++ * If mmap_sem is released, vma may become invalid (for example
++ * by other thread calling munmap()).
++ */
++static vm_fault_t do_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct mm_struct *vm_mm = vma->vm_mm;
++	vm_fault_t ret;
++
++	/*
++	 * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
++	 */
++	if (!vma->vm_ops->fault) {
++		/*
++		 * If we find a migration pmd entry or a none pmd entry, which
++		 * should never happen, return SIGBUS
++		 */
++		if (unlikely(!pmd_present(*vmf->pmd)))
++			ret = VM_FAULT_SIGBUS;
++		else {
++			vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm,
++						       vmf->pmd,
++						       vmf->address,
++						       &vmf->ptl);
++			/*
++			 * Make sure this is not a temporary clearing of pte
++			 * by holding ptl and checking again. A R/M/W update
++			 * of pte involves: take ptl, clearing the pte so that
++			 * we don't have concurrent modification by hardware
++			 * followed by an update.
++			 */
++			if (unlikely(pte_none(*vmf->pte)))
++				ret = VM_FAULT_SIGBUS;
++			else
++				ret = VM_FAULT_NOPAGE;
++
++			pte_unmap_unlock(vmf->pte, vmf->ptl);
++		}
++	} else if (!(vmf->flags & FAULT_FLAG_WRITE))
++		ret = do_read_fault(vmf);
++	else if (!(vma->vm_flags & VM_SHARED))
++		ret = do_cow_fault(vmf);
++	else
++		ret = do_shared_fault(vmf);
++
++	/* preallocated pagetable is unused: free it */
++	if (vmf->prealloc_pte) {
++		pte_free(vm_mm, vmf->prealloc_pte);
++		vmf->prealloc_pte = NULL;
++	}
++	return ret;
++}
++
++static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
++				unsigned long addr, int page_nid,
++				int *flags)
++{
++	get_page(page);
++
++	count_vm_numa_event(NUMA_HINT_FAULTS);
++	if (page_nid == numa_node_id()) {
++		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
++		*flags |= TNF_FAULT_LOCAL;
++	}
++
++	return mpol_misplaced(page, vma, addr);
++}
++
++static vm_fault_t do_numa_page(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct page *page = NULL;
++	int page_nid = -1;
++	int last_cpupid;
++	int target_nid;
++	bool migrated = false;
++	pte_t pte;
++	bool was_writable = pte_savedwrite(vmf->orig_pte);
++	int flags = 0;
++
++	/*
++	 * The "pte" at this point cannot be used safely without
++	 * validation through pte_unmap_same(). It's of NUMA type but
++	 * the pfn may be screwed if the read is non atomic.
++	 */
++	vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
++	spin_lock(vmf->ptl);
++	if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		goto out;
++	}
++
++	/*
++	 * Make it present again, Depending on how arch implementes non
++	 * accessible ptes, some can allow access by kernel mode.
++	 */
++	pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
++	pte = pte_modify(pte, vma->vm_page_prot);
++	pte = pte_mkyoung(pte);
++	if (was_writable)
++		pte = pte_mkwrite(pte);
++	ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
++	update_mmu_cache(vma, vmf->address, vmf->pte);
++
++	page = vm_normal_page(vma, vmf->address, pte);
++	if (!page) {
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		return 0;
++	}
++
++	/* TODO: handle PTE-mapped THP */
++	if (PageCompound(page)) {
++		pte_unmap_unlock(vmf->pte, vmf->ptl);
++		return 0;
++	}
++
++	/*
++	 * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
++	 * much anyway since they can be in shared cache state. This misses
++	 * the case where a mapping is writable but the process never writes
++	 * to it but pte_write gets cleared during protection updates and
++	 * pte_dirty has unpredictable behaviour between PTE scan updates,
++	 * background writeback, dirty balancing and application behaviour.
++	 */
++	if (!pte_write(pte))
++		flags |= TNF_NO_GROUP;
++
++	/*
++	 * Flag if the page is shared between multiple address spaces. This
++	 * is later used when determining whether to group tasks together
++	 */
++	if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
++		flags |= TNF_SHARED;
++
++	last_cpupid = page_cpupid_last(page);
++	page_nid = page_to_nid(page);
++	target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
++			&flags);
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++	if (target_nid == -1) {
++		put_page(page);
++		goto out;
++	}
++
++	/* Migrate to the requested node */
++	migrated = migrate_misplaced_page(page, vma, target_nid);
++	if (migrated) {
++		page_nid = target_nid;
++		flags |= TNF_MIGRATED;
++	} else
++		flags |= TNF_MIGRATE_FAIL;
++
++out:
++	if (page_nid != -1)
++		task_numa_fault(last_cpupid, page_nid, 1, flags);
++	return 0;
++}
++
++static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
++{
++	if (vma_is_anonymous(vmf->vma))
++		return do_huge_pmd_anonymous_page(vmf);
++	if (vmf->vma->vm_ops->huge_fault)
++		return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
++	return VM_FAULT_FALLBACK;
++}
++
++/* `inline' is required to avoid gcc 4.1.2 build error */
++static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
++{
++	if (vma_is_anonymous(vmf->vma))
++		return do_huge_pmd_wp_page(vmf, orig_pmd);
++	if (vmf->vma->vm_ops->huge_fault)
++		return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
++
++	/* COW handled on pte level: split pmd */
++	VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
++	__split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
++
++	return VM_FAULT_FALLBACK;
++}
++
++static inline bool vma_is_accessible(struct vm_area_struct *vma)
++{
++	return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
++}
++
++static vm_fault_t create_huge_pud(struct vm_fault *vmf)
++{
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++	/* No support for anonymous transparent PUD pages yet */
++	if (vma_is_anonymous(vmf->vma))
++		return VM_FAULT_FALLBACK;
++	if (vmf->vma->vm_ops->huge_fault)
++		return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++	return VM_FAULT_FALLBACK;
++}
++
++static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
++{
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++	/* No support for anonymous transparent PUD pages yet */
++	if (vma_is_anonymous(vmf->vma))
++		return VM_FAULT_FALLBACK;
++	if (vmf->vma->vm_ops->huge_fault)
++		return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
++	return VM_FAULT_FALLBACK;
++}
++
++/*
++ * These routines also need to handle stuff like marking pages dirty
++ * and/or accessed for architectures that don't do it in hardware (most
++ * RISC architectures).  The early dirtying is also good on the i386.
++ *
++ * There is also a hook called "update_mmu_cache()" that architectures
++ * with external mmu caches can use to update those (ie the Sparc or
++ * PowerPC hashed page tables that act as extended TLBs).
++ *
++ * We enter with non-exclusive mmap_sem (to exclude vma changes, but allow
++ * concurrent faults).
++ *
++ * The mmap_sem may have been released depending on flags and our return value.
++ * See filemap_fault() and __lock_page_or_retry().
++ */
++static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
++{
++	pte_t entry;
++
++	if (unlikely(pmd_none(*vmf->pmd))) {
++		/*
++		 * Leave __pte_alloc() until later: because vm_ops->fault may
++		 * want to allocate huge page, and if we expose page table
++		 * for an instant, it will be difficult to retract from
++		 * concurrent faults and from rmap lookups.
++		 */
++		vmf->pte = NULL;
++	} else {
++		/* See comment in pte_alloc_one_map() */
++		if (pmd_devmap_trans_unstable(vmf->pmd))
++			return 0;
++		/*
++		 * A regular pmd is established and it can't morph into a huge
++		 * pmd from under us anymore at this point because we hold the
++		 * mmap_sem read mode and khugepaged takes it in write mode.
++		 * So now it's safe to run pte_offset_map().
++		 */
++		vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
++		vmf->orig_pte = *vmf->pte;
++
++		/*
++		 * some architectures can have larger ptes than wordsize,
++		 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
++		 * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
++		 * accesses.  The code below just needs a consistent view
++		 * for the ifs and we later double check anyway with the
++		 * ptl lock held. So here a barrier will do.
++		 */
++		barrier();
++		if (pte_none(vmf->orig_pte)) {
++			pte_unmap(vmf->pte);
++			vmf->pte = NULL;
++		}
++	}
++
++	if (!vmf->pte) {
++		if (vma_is_anonymous(vmf->vma))
++			return do_anonymous_page(vmf);
++		else
++			return do_fault(vmf);
++	}
++
++	if (!pte_present(vmf->orig_pte))
++		return do_swap_page(vmf);
++
++	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
++		return do_numa_page(vmf);
++
++	vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
++	spin_lock(vmf->ptl);
++	entry = vmf->orig_pte;
++	if (unlikely(!pte_same(*vmf->pte, entry)))
++		goto unlock;
++	if (vmf->flags & FAULT_FLAG_WRITE) {
++		if (!pte_write(entry))
++			return do_wp_page(vmf);
++		entry = pte_mkdirty(entry);
++	}
++	entry = pte_mkyoung(entry);
++	if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry,
++				vmf->flags & FAULT_FLAG_WRITE)) {
++		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
++	} else {
++		/*
++		 * This is needed only for protection faults but the arch code
++		 * is not yet telling us if this is a protection fault or not.
++		 * This still avoids useless tlb flushes for .text page faults
++		 * with threads.
++		 */
++		if (vmf->flags & FAULT_FLAG_WRITE)
++			flush_tlb_fix_spurious_fault(vmf->vma, vmf->address);
++	}
++unlock:
++	pte_unmap_unlock(vmf->pte, vmf->ptl);
++	return 0;
++}
++
++/*
++ * By the time we get here, we already hold the mm semaphore
++ *
++ * The mmap_sem may have been released depending on flags and our
++ * return value.  See filemap_fault() and __lock_page_or_retry().
++ */
++static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
++		unsigned long address, unsigned int flags)
++{
++	struct vm_fault vmf = {
++		.vma = vma,
++		.address = address & PAGE_MASK,
++		.flags = flags,
++		.pgoff = linear_page_index(vma, address),
++		.gfp_mask = __get_fault_gfp_mask(vma),
++	};
++	unsigned int dirty = flags & FAULT_FLAG_WRITE;
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	p4d_t *p4d;
++	vm_fault_t ret;
++
++	pgd = pgd_offset(mm, address);
++	p4d = p4d_alloc(mm, pgd, address);
++	if (!p4d)
++		return VM_FAULT_OOM;
++
++	vmf.pud = pud_alloc(mm, p4d, address);
++	if (!vmf.pud)
++		return VM_FAULT_OOM;
++	if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
++		ret = create_huge_pud(&vmf);
++		if (!(ret & VM_FAULT_FALLBACK))
++			return ret;
++	} else {
++		pud_t orig_pud = *vmf.pud;
++
++		barrier();
++		if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
++
++			/* NUMA case for anonymous PUDs would go here */
++
++			if (dirty && !pud_write(orig_pud)) {
++				ret = wp_huge_pud(&vmf, orig_pud);
++				if (!(ret & VM_FAULT_FALLBACK))
++					return ret;
++			} else {
++				huge_pud_set_accessed(&vmf, orig_pud);
++				return 0;
++			}
++		}
++	}
++
++	vmf.pmd = pmd_alloc(mm, vmf.pud, address);
++	if (!vmf.pmd)
++		return VM_FAULT_OOM;
++	if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
++		ret = create_huge_pmd(&vmf);
++		if (!(ret & VM_FAULT_FALLBACK))
++			return ret;
++	} else {
++		pmd_t orig_pmd = *vmf.pmd;
++
++		barrier();
++		if (unlikely(is_swap_pmd(orig_pmd))) {
++			VM_BUG_ON(thp_migration_supported() &&
++					  !is_pmd_migration_entry(orig_pmd));
++			if (is_pmd_migration_entry(orig_pmd))
++				pmd_migration_entry_wait(mm, vmf.pmd);
++			return 0;
++		}
++		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
++			if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
++				return do_huge_pmd_numa_page(&vmf, orig_pmd);
++
++			if (dirty && !pmd_write(orig_pmd)) {
++				ret = wp_huge_pmd(&vmf, orig_pmd);
++				if (!(ret & VM_FAULT_FALLBACK))
++					return ret;
++			} else {
++				huge_pmd_set_accessed(&vmf, orig_pmd);
++				return 0;
++			}
++		}
++	}
++
++	return handle_pte_fault(&vmf);
++}
++
++/*
++ * By the time we get here, we already hold the mm semaphore
++ *
++ * The mmap_sem may have been released depending on flags and our
++ * return value.  See filemap_fault() and __lock_page_or_retry().
++ */
++vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
++		unsigned int flags)
++{
++	vm_fault_t ret;
++
++	__set_current_state(TASK_RUNNING);
++
++	count_vm_event(PGFAULT);
++	count_memcg_event_mm(vma->vm_mm, PGFAULT);
++
++	/* do counter updates before entering really critical section. */
++	check_sync_rss_stat(current);
++
++	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
++					    flags & FAULT_FLAG_INSTRUCTION,
++					    flags & FAULT_FLAG_REMOTE))
++		return VM_FAULT_SIGSEGV;
++
++	/*
++	 * Enable the memcg OOM handling for faults triggered in user
++	 * space.  Kernel faults are handled more gracefully.
++	 */
++	if (flags & FAULT_FLAG_USER)
++		mem_cgroup_enter_user_fault();
++
++	if (unlikely(is_vm_hugetlb_page(vma)))
++		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
++	else
++		ret = __handle_mm_fault(vma, address, flags);
++
++	if (flags & FAULT_FLAG_USER) {
++		mem_cgroup_exit_user_fault();
++		/*
++		 * The task may have entered a memcg OOM situation but
++		 * if the allocation error was handled gracefully (no
++		 * VM_FAULT_OOM), there is no need to kill anything.
++		 * Just clean up the OOM state peacefully.
++		 */
++		if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
++			mem_cgroup_oom_synchronize(false);
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(handle_mm_fault);
++
++#ifndef __PAGETABLE_P4D_FOLDED
++/*
++ * Allocate p4d page table.
++ * We've already handled the fast-path in-line.
++ */
++int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
++{
++	p4d_t *new = p4d_alloc_one(mm, address);
++	if (!new)
++		return -ENOMEM;
++
++	smp_wmb(); /* See comment in __pte_alloc */
++
++	spin_lock(&mm->page_table_lock);
++	if (pgd_present(*pgd))		/* Another has populated it */
++		p4d_free(mm, new);
++	else
++		pgd_populate(mm, pgd, new);
++	spin_unlock(&mm->page_table_lock);
++	return 0;
++}
++#endif /* __PAGETABLE_P4D_FOLDED */
++
++#ifndef __PAGETABLE_PUD_FOLDED
++/*
++ * Allocate page upper directory.
++ * We've already handled the fast-path in-line.
++ */
++int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
++{
++	pud_t *new = pud_alloc_one(mm, address);
++	if (!new)
++		return -ENOMEM;
++
++	smp_wmb(); /* See comment in __pte_alloc */
++
++	spin_lock(&mm->page_table_lock);
++#ifndef __ARCH_HAS_5LEVEL_HACK
++	if (!p4d_present(*p4d)) {
++		mm_inc_nr_puds(mm);
++		p4d_populate(mm, p4d, new);
++	} else	/* Another has populated it */
++		pud_free(mm, new);
++#else
++	if (!pgd_present(*p4d)) {
++		mm_inc_nr_puds(mm);
++		pgd_populate(mm, p4d, new);
++	} else	/* Another has populated it */
++		pud_free(mm, new);
++#endif /* __ARCH_HAS_5LEVEL_HACK */
++	spin_unlock(&mm->page_table_lock);
++	return 0;
++}
++#endif /* __PAGETABLE_PUD_FOLDED */
++
++#ifndef __PAGETABLE_PMD_FOLDED
++/*
++ * Allocate page middle directory.
++ * We've already handled the fast-path in-line.
++ */
++int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
++{
++	spinlock_t *ptl;
++	pmd_t *new = pmd_alloc_one(mm, address);
++	if (!new)
++		return -ENOMEM;
++
++	smp_wmb(); /* See comment in __pte_alloc */
++
++	ptl = pud_lock(mm, pud);
++#ifndef __ARCH_HAS_4LEVEL_HACK
++	if (!pud_present(*pud)) {
++		mm_inc_nr_pmds(mm);
++		pud_populate(mm, pud, new);
++	} else	/* Another has populated it */
++		pmd_free(mm, new);
++#else
++	if (!pgd_present(*pud)) {
++		mm_inc_nr_pmds(mm);
++		pgd_populate(mm, pud, new);
++	} else /* Another has populated it */
++		pmd_free(mm, new);
++#endif /* __ARCH_HAS_4LEVEL_HACK */
++	spin_unlock(ptl);
++	return 0;
++}
++#endif /* __PAGETABLE_PMD_FOLDED */
++
++static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
++			    unsigned long *start, unsigned long *end,
++			    pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
++{
++	pgd_t *pgd;
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep;
++
++	pgd = pgd_offset(mm, address);
++	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
++		goto out;
++
++	p4d = p4d_offset(pgd, address);
++	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
++		goto out;
++
++	pud = pud_offset(p4d, address);
++	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
++		goto out;
++
++	pmd = pmd_offset(pud, address);
++	VM_BUG_ON(pmd_trans_huge(*pmd));
++
++	if (pmd_huge(*pmd)) {
++		if (!pmdpp)
++			goto out;
++
++		if (start && end) {
++			*start = address & PMD_MASK;
++			*end = *start + PMD_SIZE;
++			mmu_notifier_invalidate_range_start(mm, *start, *end);
++		}
++		*ptlp = pmd_lock(mm, pmd);
++		if (pmd_huge(*pmd)) {
++			*pmdpp = pmd;
++			return 0;
++		}
++		spin_unlock(*ptlp);
++		if (start && end)
++			mmu_notifier_invalidate_range_end(mm, *start, *end);
++	}
++
++	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
++		goto out;
++
++	if (start && end) {
++		*start = address & PAGE_MASK;
++		*end = *start + PAGE_SIZE;
++		mmu_notifier_invalidate_range_start(mm, *start, *end);
++	}
++	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
++	if (!pte_present(*ptep))
++		goto unlock;
++	*ptepp = ptep;
++	return 0;
++unlock:
++	pte_unmap_unlock(ptep, *ptlp);
++	if (start && end)
++		mmu_notifier_invalidate_range_end(mm, *start, *end);
++out:
++	return -EINVAL;
++}
++
++static inline int follow_pte(struct mm_struct *mm, unsigned long address,
++			     pte_t **ptepp, spinlock_t **ptlp)
++{
++	int res;
++
++	/* (void) is needed to make gcc happy */
++	(void) __cond_lock(*ptlp,
++			   !(res = __follow_pte_pmd(mm, address, NULL, NULL,
++						    ptepp, NULL, ptlp)));
++	return res;
++}
++
++int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
++			     unsigned long *start, unsigned long *end,
++			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
++{
++	int res;
++
++	/* (void) is needed to make gcc happy */
++	(void) __cond_lock(*ptlp,
++			   !(res = __follow_pte_pmd(mm, address, start, end,
++						    ptepp, pmdpp, ptlp)));
++	return res;
++}
++EXPORT_SYMBOL(follow_pte_pmd);
++
++/**
++ * follow_pfn - look up PFN at a user virtual address
++ * @vma: memory mapping
++ * @address: user virtual address
++ * @pfn: location to store found PFN
++ *
++ * Only IO mappings and raw PFN mappings are allowed.
++ *
++ * Returns zero and the pfn at @pfn on success, -ve otherwise.
++ */
++int follow_pfn(struct vm_area_struct *vma, unsigned long address,
++	unsigned long *pfn)
++{
++	int ret = -EINVAL;
++	spinlock_t *ptl;
++	pte_t *ptep;
++
++	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++		return ret;
++
++	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
++	if (ret)
++		return ret;
++	*pfn = pte_pfn(*ptep);
++	pte_unmap_unlock(ptep, ptl);
++	return 0;
++}
++EXPORT_SYMBOL(follow_pfn);
++
++#ifdef CONFIG_HAVE_IOREMAP_PROT
++int follow_phys(struct vm_area_struct *vma,
++		unsigned long address, unsigned int flags,
++		unsigned long *prot, resource_size_t *phys)
++{
++	int ret = -EINVAL;
++	pte_t *ptep, pte;
++	spinlock_t *ptl;
++
++	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
++		goto out;
++
++	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
++		goto out;
++	pte = *ptep;
++
++	if ((flags & FOLL_WRITE) && !pte_write(pte))
++		goto unlock;
++
++	*prot = pgprot_val(pte_pgprot(pte));
++	*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
++
++	ret = 0;
++unlock:
++	pte_unmap_unlock(ptep, ptl);
++out:
++	return ret;
++}
++
++int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
++			void *buf, int len, int write)
++{
++	resource_size_t phys_addr;
++	unsigned long prot = 0;
++	void __iomem *maddr;
++	int offset = addr & (PAGE_SIZE-1);
++
++	if (follow_phys(vma, addr, write, &prot, &phys_addr))
++		return -EINVAL;
++
++	maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
++	if (!maddr)
++		return -ENOMEM;
++
++	if (write)
++		memcpy_toio(maddr + offset, buf, len);
++	else
++		memcpy_fromio(buf, maddr + offset, len);
++	iounmap(maddr);
++
++	return len;
++}
++EXPORT_SYMBOL_GPL(generic_access_phys);
++#endif
++
++/*
++ * Access another process' address space as given in mm.  If non-NULL, use the
++ * given task for page fault accounting.
++ */
++int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
++		unsigned long addr, void *buf, int len, unsigned int gup_flags)
++{
++	struct vm_area_struct *vma;
++	void *old_buf = buf;
++	int write = gup_flags & FOLL_WRITE;
++
++	if (down_read_killable(&mm->mmap_sem))
++		return 0;
++
++	/* ignore errors, just check how much was successfully transferred */
++	while (len) {
++		int bytes, ret, offset;
++		void *maddr;
++		struct page *page = NULL;
++
++		ret = get_user_pages_remote(tsk, mm, addr, 1,
++				gup_flags, &page, &vma, NULL);
++		if (ret <= 0) {
++#ifndef CONFIG_HAVE_IOREMAP_PROT
++			break;
++#else
++			/*
++			 * Check if this is a VM_IO | VM_PFNMAP VMA, which
++			 * we can access using slightly different code.
++			 */
++			vma = find_vma(mm, addr);
++			if (!vma || vma->vm_start > addr)
++				break;
++			if (vma->vm_ops && vma->vm_ops->access)
++				ret = vma->vm_ops->access(vma, addr, buf,
++							  len, write);
++			if (ret <= 0)
++				break;
++			bytes = ret;
++#endif
++		} else {
++			bytes = len;
++			offset = addr & (PAGE_SIZE-1);
++			if (bytes > PAGE_SIZE-offset)
++				bytes = PAGE_SIZE-offset;
++
++			maddr = kmap(page);
++			if (write) {
++				copy_to_user_page(vma, page, addr,
++						  maddr + offset, buf, bytes);
++				set_page_dirty_lock(page);
++			} else {
++				copy_from_user_page(vma, page, addr,
++						    buf, maddr + offset, bytes);
++			}
++			kunmap(page);
++			put_page(page);
++		}
++		len -= bytes;
++		buf += bytes;
++		addr += bytes;
++	}
++	up_read(&mm->mmap_sem);
++
++	return buf - old_buf;
++}
++
++/**
++ * access_remote_vm - access another process' address space
++ * @mm:		the mm_struct of the target address space
++ * @addr:	start address to access
++ * @buf:	source or destination buffer
++ * @len:	number of bytes to transfer
++ * @gup_flags:	flags modifying lookup behaviour
++ *
++ * The caller must hold a reference on @mm.
++ */
++int access_remote_vm(struct mm_struct *mm, unsigned long addr,
++		void *buf, int len, unsigned int gup_flags)
++{
++	return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
++}
++
++/*
++ * Access another process' address space.
++ * Source/target buffer must be kernel space,
++ * Do not walk the page table directly, use get_user_pages
++ */
++int access_process_vm(struct task_struct *tsk, unsigned long addr,
++		void *buf, int len, unsigned int gup_flags)
++{
++	struct mm_struct *mm;
++	int ret;
++
++	mm = get_task_mm(tsk);
++	if (!mm)
++		return 0;
++
++	ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
++
++	mmput(mm);
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(access_process_vm);
++
++/*
++ * Print the name of a VMA.
++ */
++void print_vma_addr(char *prefix, unsigned long ip)
++{
++	struct mm_struct *mm = current->mm;
++	struct vm_area_struct *vma;
++
++	/*
++	 * we might be running from an atomic context so we cannot sleep
++	 */
++	if (!down_read_trylock(&mm->mmap_sem))
++		return;
++
++	vma = find_vma(mm, ip);
++	if (vma && vma->vm_file) {
++		struct file *f = vma->vm_file;
++		char *buf = (char *)__get_free_page(GFP_NOWAIT);
++		if (buf) {
++			char *p;
++
++			p = file_path(f, buf, PAGE_SIZE);
++			if (IS_ERR(p))
++				p = "?";
++			printk("%s%s[%lx+%lx]", prefix, kbasename(p),
++					vma->vm_start,
++					vma->vm_end - vma->vm_start);
++			free_page((unsigned long)buf);
++		}
++	}
++	up_read(&mm->mmap_sem);
++}
++
++#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
++void __might_fault(const char *file, int line)
++{
++	/*
++	 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
++	 * holding the mmap_sem, this is safe because kernel memory doesn't
++	 * get paged out, therefore we'll never actually fault, and the
++	 * below annotations will generate false positives.
++	 */
++	if (uaccess_kernel())
++		return;
++	if (pagefault_disabled())
++		return;
++	__might_sleep(file, line, 0);
++#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
++	if (current->mm)
++		might_lock_read(&current->mm->mmap_sem);
++#endif
++}
++EXPORT_SYMBOL(__might_fault);
++#endif
++
++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
++/*
++ * Process all subpages of the specified huge page with the specified
++ * operation.  The target subpage will be processed last to keep its
++ * cache lines hot.
++ */
++static inline void process_huge_page(
++	unsigned long addr_hint, unsigned int pages_per_huge_page,
++	void (*process_subpage)(unsigned long addr, int idx, void *arg),
++	void *arg)
++{
++	int i, n, base, l;
++	unsigned long addr = addr_hint &
++		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
++
++	/* Process target subpage last to keep its cache lines hot */
++	might_sleep();
++	n = (addr_hint - addr) / PAGE_SIZE;
++	if (2 * n <= pages_per_huge_page) {
++		/* If target subpage in first half of huge page */
++		base = 0;
++		l = n;
++		/* Process subpages at the end of huge page */
++		for (i = pages_per_huge_page - 1; i >= 2 * n; i--) {
++			cond_resched();
++			process_subpage(addr + i * PAGE_SIZE, i, arg);
++		}
++	} else {
++		/* If target subpage in second half of huge page */
++		base = pages_per_huge_page - 2 * (pages_per_huge_page - n);
++		l = pages_per_huge_page - n;
++		/* Process subpages at the begin of huge page */
++		for (i = 0; i < base; i++) {
++			cond_resched();
++			process_subpage(addr + i * PAGE_SIZE, i, arg);
++		}
++	}
++	/*
++	 * Process remaining subpages in left-right-left-right pattern
++	 * towards the target subpage
++	 */
++	for (i = 0; i < l; i++) {
++		int left_idx = base + i;
++		int right_idx = base + 2 * l - 1 - i;
++
++		cond_resched();
++		process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg);
++		cond_resched();
++		process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg);
++	}
++}
++
++struct cgp_args {
++	struct page	*base_page;
++	unsigned long	addr;
++};
++
++static int clear_gigantic_page_chunk(unsigned long start, unsigned long end,
++				     struct cgp_args *args)
++{
++	struct page *base_page = args->base_page;
++	struct page *p = base_page;
++	unsigned long addr = args->addr;
++	unsigned long i;
++
++	might_sleep();
++	for (i = start; i < end; ++i) {
++		cond_resched();
++		clear_user_highpage(p, addr + i * PAGE_SIZE);
++
++		p = mem_map_next(p, base_page, i);
++	}
++
++	return KTASK_RETURN_SUCCESS;
++}
++
++static void clear_subpage(unsigned long addr, int idx, void *arg)
++{
++	struct page *page = arg;
++
++	clear_user_highpage(page + idx, addr);
++}
++
++void clear_huge_page(struct page *page,
++		     unsigned long addr_hint, unsigned int pages_per_huge_page)
++{
++	unsigned long addr = addr_hint &
++		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
++
++	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
++		struct cgp_args args = {page, addr};
++		struct ktask_node node = {0, pages_per_huge_page,
++					  page_to_nid(page)};
++		DEFINE_KTASK_CTL(ctl, clear_gigantic_page_chunk, &args,
++				 KTASK_PTE_MINCHUNK);
++
++		ktask_run_numa(&node, 1, &ctl);
++		return;
++	}
++
++	process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page);
++}
++
++static void copy_user_gigantic_page(struct page *dst, struct page *src,
++				    unsigned long addr,
++				    struct vm_area_struct *vma,
++				    unsigned int pages_per_huge_page)
++{
++	int i;
++	struct page *dst_base = dst;
++	struct page *src_base = src;
++
++	for (i = 0; i < pages_per_huge_page; ) {
++		cond_resched();
++		copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
++
++		i++;
++		dst = mem_map_next(dst, dst_base, i);
++		src = mem_map_next(src, src_base, i);
++	}
++}
++
++struct copy_subpage_arg {
++	struct page *dst;
++	struct page *src;
++	struct vm_area_struct *vma;
++};
++
++static void copy_subpage(unsigned long addr, int idx, void *arg)
++{
++	struct copy_subpage_arg *copy_arg = arg;
++
++	copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx,
++			   addr, copy_arg->vma);
++}
++
++void copy_user_huge_page(struct page *dst, struct page *src,
++			 unsigned long addr_hint, struct vm_area_struct *vma,
++			 unsigned int pages_per_huge_page)
++{
++	unsigned long addr = addr_hint &
++		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
++	struct copy_subpage_arg arg = {
++		.dst = dst,
++		.src = src,
++		.vma = vma,
++	};
++
++	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
++		copy_user_gigantic_page(dst, src, addr, vma,
++					pages_per_huge_page);
++		return;
++	}
++
++	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
++}
++
++long copy_huge_page_from_user(struct page *dst_page,
++				const void __user *usr_src,
++				unsigned int pages_per_huge_page,
++				bool allow_pagefault)
++{
++	void *src = (void *)usr_src;
++	void *page_kaddr;
++	unsigned long i, rc = 0;
++	unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
++
++	for (i = 0; i < pages_per_huge_page; i++) {
++		if (allow_pagefault)
++			page_kaddr = kmap(dst_page + i);
++		else
++			page_kaddr = kmap_atomic(dst_page + i);
++		rc = copy_from_user(page_kaddr,
++				(const void __user *)(src + i * PAGE_SIZE),
++				PAGE_SIZE);
++		if (allow_pagefault)
++			kunmap(dst_page + i);
++		else
++			kunmap_atomic(page_kaddr);
++
++		ret_val -= (PAGE_SIZE - rc);
++		if (rc)
++			break;
++
++		cond_resched();
++	}
++	return ret_val;
++}
++#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
++
++#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS
++
++static struct kmem_cache *page_ptl_cachep;
++
++void __init ptlock_cache_init(void)
++{
++	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
++			SLAB_PANIC, NULL);
++}
++
++bool ptlock_alloc(struct page *page)
++{
++	spinlock_t *ptl;
++
++	ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL);
++	if (!ptl)
++		return false;
++	page->ptl = ptl;
++	return true;
++}
++
++void ptlock_free(struct page *page)
++{
++	kmem_cache_free(page_ptl_cachep, page->ptl);
++}
++#endif
+diff -uprN kernel/mm/mlock.c kernel_new/mm/mlock.c
+--- kernel/mm/mlock.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/mm/mlock.c	2021-04-01 18:28:07.816863108 +0800
+@@ -867,3 +867,29 @@ void user_shm_unlock(size_t size, struct
+ 	spin_unlock(&shmlock_user_lock);
+ 	free_uid(user);
+ }
++
++#ifdef CONFIG_IPIPE
++int __ipipe_pin_vma(struct mm_struct *mm, struct vm_area_struct *vma)
++{
++	unsigned int gup_flags = 0;
++	int ret, len;
++
++	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
++		return 0;
++
++	if (!((vma->vm_flags & VM_DONTEXPAND) ||
++	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(mm))) {
++		ret = populate_vma_page_range(vma, vma->vm_start, vma->vm_end,
++					      NULL);
++		return ret < 0 ? ret : 0;
++	}
++
++	if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
++		gup_flags |= FOLL_WRITE;
++	len = DIV_ROUND_UP(vma->vm_end, PAGE_SIZE) - vma->vm_start/PAGE_SIZE;
++	ret = get_user_pages_locked(vma->vm_start, len, gup_flags, NULL, NULL);
++	if (ret < 0)
++		return ret;
++	return ret == len ? 0 : -EFAULT;
++}
++#endif
+diff -uprN kernel/mm/mlock.c.orig kernel_new/mm/mlock.c.orig
+--- kernel/mm/mlock.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/mm/mlock.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,869 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ *	linux/mm/mlock.c
++ *
++ *  (C) Copyright 1995 Linus Torvalds
++ *  (C) Copyright 2002 Christoph Hellwig
++ */
++
++#include <linux/capability.h>
++#include <linux/mman.h>
++#include <linux/mm.h>
++#include <linux/sched/user.h>
++#include <linux/swap.h>
++#include <linux/swapops.h>
++#include <linux/pagemap.h>
++#include <linux/pagevec.h>
++#include <linux/mempolicy.h>
++#include <linux/syscalls.h>
++#include <linux/sched.h>
++#include <linux/export.h>
++#include <linux/rmap.h>
++#include <linux/mmzone.h>
++#include <linux/hugetlb.h>
++#include <linux/memcontrol.h>
++#include <linux/mm_inline.h>
++
++#include "internal.h"
++
++bool can_do_mlock(void)
++{
++	if (rlimit(RLIMIT_MEMLOCK) != 0)
++		return true;
++	if (capable(CAP_IPC_LOCK))
++		return true;
++	return false;
++}
++EXPORT_SYMBOL(can_do_mlock);
++
++/*
++ * Mlocked pages are marked with PageMlocked() flag for efficient testing
++ * in vmscan and, possibly, the fault path; and to support semi-accurate
++ * statistics.
++ *
++ * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
++ * be placed on the LRU "unevictable" list, rather than the [in]active lists.
++ * The unevictable list is an LRU sibling list to the [in]active lists.
++ * PageUnevictable is set to indicate the unevictable state.
++ *
++ * When lazy mlocking via vmscan, it is important to ensure that the
++ * vma's VM_LOCKED status is not concurrently being modified, otherwise we
++ * may have mlocked a page that is being munlocked. So lazy mlock must take
++ * the mmap_sem for read, and verify that the vma really is locked
++ * (see mm/rmap.c).
++ */
++
++/*
++ *  LRU accounting for clear_page_mlock()
++ */
++void clear_page_mlock(struct page *page)
++{
++	if (!TestClearPageMlocked(page))
++		return;
++
++	mod_zone_page_state(page_zone(page), NR_MLOCK,
++			    -hpage_nr_pages(page));
++	count_vm_event(UNEVICTABLE_PGCLEARED);
++	/*
++	 * The previous TestClearPageMlocked() corresponds to the smp_mb()
++	 * in __pagevec_lru_add_fn().
++	 *
++	 * See __pagevec_lru_add_fn for more explanation.
++	 */
++	if (!isolate_lru_page(page)) {
++		putback_lru_page(page);
++	} else {
++		/*
++		 * We lost the race. the page already moved to evictable list.
++		 */
++		if (PageUnevictable(page))
++			count_vm_event(UNEVICTABLE_PGSTRANDED);
++	}
++}
++
++/*
++ * Mark page as mlocked if not already.
++ * If page on LRU, isolate and putback to move to unevictable list.
++ */
++void mlock_vma_page(struct page *page)
++{
++	/* Serialize with page migration */
++	BUG_ON(!PageLocked(page));
++
++	VM_BUG_ON_PAGE(PageTail(page), page);
++	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
++
++	if (!TestSetPageMlocked(page)) {
++		mod_zone_page_state(page_zone(page), NR_MLOCK,
++				    hpage_nr_pages(page));
++		count_vm_event(UNEVICTABLE_PGMLOCKED);
++		if (!isolate_lru_page(page))
++			putback_lru_page(page);
++	}
++}
++
++/*
++ * Isolate a page from LRU with optional get_page() pin.
++ * Assumes lru_lock already held and page already pinned.
++ */
++static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
++{
++	if (PageLRU(page)) {
++		struct lruvec *lruvec;
++
++		lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
++		if (getpage)
++			get_page(page);
++		ClearPageLRU(page);
++		del_page_from_lru_list(page, lruvec, page_lru(page));
++		return true;
++	}
++
++	return false;
++}
++
++/*
++ * Finish munlock after successful page isolation
++ *
++ * Page must be locked. This is a wrapper for try_to_munlock()
++ * and putback_lru_page() with munlock accounting.
++ */
++static void __munlock_isolated_page(struct page *page)
++{
++	/*
++	 * Optimization: if the page was mapped just once, that's our mapping
++	 * and we don't need to check all the other vmas.
++	 */
++	if (page_mapcount(page) > 1)
++		try_to_munlock(page);
++
++	/* Did try_to_unlock() succeed or punt? */
++	if (!PageMlocked(page))
++		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
++
++	putback_lru_page(page);
++}
++
++/*
++ * Accounting for page isolation fail during munlock
++ *
++ * Performs accounting when page isolation fails in munlock. There is nothing
++ * else to do because it means some other task has already removed the page
++ * from the LRU. putback_lru_page() will take care of removing the page from
++ * the unevictable list, if necessary. vmscan [page_referenced()] will move
++ * the page back to the unevictable list if some other vma has it mlocked.
++ */
++static void __munlock_isolation_failed(struct page *page)
++{
++	if (PageUnevictable(page))
++		__count_vm_event(UNEVICTABLE_PGSTRANDED);
++	else
++		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
++}
++
++/**
++ * munlock_vma_page - munlock a vma page
++ * @page: page to be unlocked, either a normal page or THP page head
++ *
++ * returns the size of the page as a page mask (0 for normal page,
++ *         HPAGE_PMD_NR - 1 for THP head page)
++ *
++ * called from munlock()/munmap() path with page supposedly on the LRU.
++ * When we munlock a page, because the vma where we found the page is being
++ * munlock()ed or munmap()ed, we want to check whether other vmas hold the
++ * page locked so that we can leave it on the unevictable lru list and not
++ * bother vmscan with it.  However, to walk the page's rmap list in
++ * try_to_munlock() we must isolate the page from the LRU.  If some other
++ * task has removed the page from the LRU, we won't be able to do that.
++ * So we clear the PageMlocked as we might not get another chance.  If we
++ * can't isolate the page, we leave it for putback_lru_page() and vmscan
++ * [page_referenced()/try_to_unmap()] to deal with.
++ */
++unsigned int munlock_vma_page(struct page *page)
++{
++	int nr_pages;
++	struct zone *zone = page_zone(page);
++
++	/* For try_to_munlock() and to serialize with page migration */
++	BUG_ON(!PageLocked(page));
++
++	VM_BUG_ON_PAGE(PageTail(page), page);
++
++	/*
++	 * Serialize with any parallel __split_huge_page_refcount() which
++	 * might otherwise copy PageMlocked to part of the tail pages before
++	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
++	 */
++	spin_lock_irq(zone_lru_lock(zone));
++
++	if (!TestClearPageMlocked(page)) {
++		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
++		nr_pages = 1;
++		goto unlock_out;
++	}
++
++	nr_pages = hpage_nr_pages(page);
++	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
++
++	if (__munlock_isolate_lru_page(page, true)) {
++		spin_unlock_irq(zone_lru_lock(zone));
++		__munlock_isolated_page(page);
++		goto out;
++	}
++	__munlock_isolation_failed(page);
++
++unlock_out:
++	spin_unlock_irq(zone_lru_lock(zone));
++
++out:
++	return nr_pages - 1;
++}
++
++/*
++ * convert get_user_pages() return value to posix mlock() error
++ */
++static int __mlock_posix_error_return(long retval)
++{
++	if (retval == -EFAULT)
++		retval = -ENOMEM;
++	else if (retval == -ENOMEM)
++		retval = -EAGAIN;
++	return retval;
++}
++
++/*
++ * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
++ *
++ * The fast path is available only for evictable pages with single mapping.
++ * Then we can bypass the per-cpu pvec and get better performance.
++ * when mapcount > 1 we need try_to_munlock() which can fail.
++ * when !page_evictable(), we need the full redo logic of putback_lru_page to
++ * avoid leaving evictable page in unevictable list.
++ *
++ * In case of success, @page is added to @pvec and @pgrescued is incremented
++ * in case that the page was previously unevictable. @page is also unlocked.
++ */
++static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
++		int *pgrescued)
++{
++	VM_BUG_ON_PAGE(PageLRU(page), page);
++	VM_BUG_ON_PAGE(!PageLocked(page), page);
++
++	if (page_mapcount(page) <= 1 && page_evictable(page)) {
++		pagevec_add(pvec, page);
++		if (TestClearPageUnevictable(page))
++			(*pgrescued)++;
++		unlock_page(page);
++		return true;
++	}
++
++	return false;
++}
++
++/*
++ * Putback multiple evictable pages to the LRU
++ *
++ * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
++ * the pages might have meanwhile become unevictable but that is OK.
++ */
++static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
++{
++	count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
++	/*
++	 *__pagevec_lru_add() calls release_pages() so we don't call
++	 * put_page() explicitly
++	 */
++	__pagevec_lru_add(pvec);
++	count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
++}
++
++/*
++ * Munlock a batch of pages from the same zone
++ *
++ * The work is split to two main phases. First phase clears the Mlocked flag
++ * and attempts to isolate the pages, all under a single zone lru lock.
++ * The second phase finishes the munlock only for pages where isolation
++ * succeeded.
++ *
++ * Note that the pagevec may be modified during the process.
++ */
++static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
++{
++	int i;
++	int nr = pagevec_count(pvec);
++	int delta_munlocked = -nr;
++	struct pagevec pvec_putback;
++	int pgrescued = 0;
++
++	pagevec_init(&pvec_putback);
++
++	/* Phase 1: page isolation */
++	spin_lock_irq(zone_lru_lock(zone));
++	for (i = 0; i < nr; i++) {
++		struct page *page = pvec->pages[i];
++
++		if (TestClearPageMlocked(page)) {
++			/*
++			 * We already have pin from follow_page_mask()
++			 * so we can spare the get_page() here.
++			 */
++			if (__munlock_isolate_lru_page(page, false))
++				continue;
++			else
++				__munlock_isolation_failed(page);
++		} else {
++			delta_munlocked++;
++		}
++
++		/*
++		 * We won't be munlocking this page in the next phase
++		 * but we still need to release the follow_page_mask()
++		 * pin. We cannot do it under lru_lock however. If it's
++		 * the last pin, __page_cache_release() would deadlock.
++		 */
++		pagevec_add(&pvec_putback, pvec->pages[i]);
++		pvec->pages[i] = NULL;
++	}
++	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
++	spin_unlock_irq(zone_lru_lock(zone));
++
++	/* Now we can release pins of pages that we are not munlocking */
++	pagevec_release(&pvec_putback);
++
++	/* Phase 2: page munlock */
++	for (i = 0; i < nr; i++) {
++		struct page *page = pvec->pages[i];
++
++		if (page) {
++			lock_page(page);
++			if (!__putback_lru_fast_prepare(page, &pvec_putback,
++					&pgrescued)) {
++				/*
++				 * Slow path. We don't want to lose the last
++				 * pin before unlock_page()
++				 */
++				get_page(page); /* for putback_lru_page() */
++				__munlock_isolated_page(page);
++				unlock_page(page);
++				put_page(page); /* from follow_page_mask() */
++			}
++		}
++	}
++
++	/*
++	 * Phase 3: page putback for pages that qualified for the fast path
++	 * This will also call put_page() to return pin from follow_page_mask()
++	 */
++	if (pagevec_count(&pvec_putback))
++		__putback_lru_fast(&pvec_putback, pgrescued);
++}
++
++/*
++ * Fill up pagevec for __munlock_pagevec using pte walk
++ *
++ * The function expects that the struct page corresponding to @start address is
++ * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
++ *
++ * The rest of @pvec is filled by subsequent pages within the same pmd and same
++ * zone, as long as the pte's are present and vm_normal_page() succeeds. These
++ * pages also get pinned.
++ *
++ * Returns the address of the next page that should be scanned. This equals
++ * @start + PAGE_SIZE when no page could be added by the pte walk.
++ */
++static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
++			struct vm_area_struct *vma, struct zone *zone,
++			unsigned long start, unsigned long end)
++{
++	pte_t *pte;
++	spinlock_t *ptl;
++
++	/*
++	 * Initialize pte walk starting at the already pinned page where we
++	 * are sure that there is a pte, as it was pinned under the same
++	 * mmap_sem write op.
++	 */
++	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
++	/* Make sure we do not cross the page table boundary */
++	end = pgd_addr_end(start, end);
++	end = p4d_addr_end(start, end);
++	end = pud_addr_end(start, end);
++	end = pmd_addr_end(start, end);
++
++	/* The page next to the pinned page is the first we will try to get */
++	start += PAGE_SIZE;
++	while (start < end) {
++		struct page *page = NULL;
++		pte++;
++		if (pte_present(*pte))
++			page = vm_normal_page(vma, start, *pte);
++		/*
++		 * Break if page could not be obtained or the page's node+zone does not
++		 * match
++		 */
++		if (!page || page_zone(page) != zone)
++			break;
++
++		/*
++		 * Do not use pagevec for PTE-mapped THP,
++		 * munlock_vma_pages_range() will handle them.
++		 */
++		if (PageTransCompound(page))
++			break;
++
++		get_page(page);
++		/*
++		 * Increase the address that will be returned *before* the
++		 * eventual break due to pvec becoming full by adding the page
++		 */
++		start += PAGE_SIZE;
++		if (pagevec_add(pvec, page) == 0)
++			break;
++	}
++	pte_unmap_unlock(pte, ptl);
++	return start;
++}
++
++/*
++ * munlock_vma_pages_range() - munlock all pages in the vma range.'
++ * @vma - vma containing range to be munlock()ed.
++ * @start - start address in @vma of the range
++ * @end - end of range in @vma.
++ *
++ *  For mremap(), munmap() and exit().
++ *
++ * Called with @vma VM_LOCKED.
++ *
++ * Returns with VM_LOCKED cleared.  Callers must be prepared to
++ * deal with this.
++ *
++ * We don't save and restore VM_LOCKED here because pages are
++ * still on lru.  In unmap path, pages might be scanned by reclaim
++ * and re-mlocked by try_to_{munlock|unmap} before we unmap and
++ * free them.  This will result in freeing mlocked pages.
++ */
++void munlock_vma_pages_range(struct vm_area_struct *vma,
++			     unsigned long start, unsigned long end)
++{
++	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
++
++	while (start < end) {
++		struct page *page;
++		unsigned int page_mask = 0;
++		unsigned long page_increm;
++		struct pagevec pvec;
++		struct zone *zone;
++
++		pagevec_init(&pvec);
++		/*
++		 * Although FOLL_DUMP is intended for get_dump_page(),
++		 * it just so happens that its special treatment of the
++		 * ZERO_PAGE (returning an error instead of doing get_page)
++		 * suits munlock very well (and if somehow an abnormal page
++		 * has sneaked into the range, we won't oops here: great).
++		 */
++		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
++
++		if (page && !IS_ERR(page)) {
++			if (PageTransTail(page)) {
++				VM_BUG_ON_PAGE(PageMlocked(page), page);
++				put_page(page); /* follow_page_mask() */
++			} else if (PageTransHuge(page)) {
++				lock_page(page);
++				/*
++				 * Any THP page found by follow_page_mask() may
++				 * have gotten split before reaching
++				 * munlock_vma_page(), so we need to compute
++				 * the page_mask here instead.
++				 */
++				page_mask = munlock_vma_page(page);
++				unlock_page(page);
++				put_page(page); /* follow_page_mask() */
++			} else {
++				/*
++				 * Non-huge pages are handled in batches via
++				 * pagevec. The pin from follow_page_mask()
++				 * prevents them from collapsing by THP.
++				 */
++				pagevec_add(&pvec, page);
++				zone = page_zone(page);
++
++				/*
++				 * Try to fill the rest of pagevec using fast
++				 * pte walk. This will also update start to
++				 * the next page to process. Then munlock the
++				 * pagevec.
++				 */
++				start = __munlock_pagevec_fill(&pvec, vma,
++						zone, start, end);
++				__munlock_pagevec(&pvec, zone);
++				goto next;
++			}
++		}
++		page_increm = 1 + page_mask;
++		start += page_increm * PAGE_SIZE;
++next:
++		cond_resched();
++	}
++}
++
++/*
++ * mlock_fixup  - handle mlock[all]/munlock[all] requests.
++ *
++ * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
++ * munlock is a no-op.  However, for some special vmas, we go ahead and
++ * populate the ptes.
++ *
++ * For vmas that pass the filters, merge/split as appropriate.
++ */
++static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
++	unsigned long start, unsigned long end, vm_flags_t newflags)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgoff_t pgoff;
++	int nr_pages;
++	int ret = 0;
++	int lock = !!(newflags & VM_LOCKED);
++	vm_flags_t old_flags = vma->vm_flags;
++
++	if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
++	    is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
++	    vma_is_dax(vma))
++		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
++		goto out;
++
++	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
++	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
++			  vma->vm_file, pgoff, vma_policy(vma),
++			  vma->vm_userfaultfd_ctx);
++	if (*prev) {
++		vma = *prev;
++		goto success;
++	}
++
++	if (start != vma->vm_start) {
++		ret = split_vma(mm, vma, start, 1);
++		if (ret)
++			goto out;
++	}
++
++	if (end != vma->vm_end) {
++		ret = split_vma(mm, vma, end, 0);
++		if (ret)
++			goto out;
++	}
++
++success:
++	/*
++	 * Keep track of amount of locked VM.
++	 */
++	nr_pages = (end - start) >> PAGE_SHIFT;
++	if (!lock)
++		nr_pages = -nr_pages;
++	else if (old_flags & VM_LOCKED)
++		nr_pages = 0;
++	atomic_long_add(nr_pages, &mm->locked_vm);
++
++	/*
++	 * vm_flags is protected by the mmap_sem held in write mode.
++	 * It's okay if try_to_unmap_one unmaps a page just after we
++	 * set VM_LOCKED, populate_vma_page_range will bring it back.
++	 */
++
++	if (lock)
++		vma->vm_flags = newflags;
++	else
++		munlock_vma_pages_range(vma, start, end);
++
++out:
++	*prev = vma;
++	return ret;
++}
++
++static int apply_vma_lock_flags(unsigned long start, size_t len,
++				vm_flags_t flags)
++{
++	unsigned long nstart, end, tmp;
++	struct vm_area_struct * vma, * prev;
++	int error;
++
++	VM_BUG_ON(offset_in_page(start));
++	VM_BUG_ON(len != PAGE_ALIGN(len));
++	end = start + len;
++	if (end < start)
++		return -EINVAL;
++	if (end == start)
++		return 0;
++	vma = find_vma(current->mm, start);
++	if (!vma || vma->vm_start > start)
++		return -ENOMEM;
++
++	prev = vma->vm_prev;
++	if (start > vma->vm_start)
++		prev = vma;
++
++	for (nstart = start ; ; ) {
++		vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
++
++		newflags |= flags;
++
++		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
++		tmp = vma->vm_end;
++		if (tmp > end)
++			tmp = end;
++		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
++		if (error)
++			break;
++		nstart = tmp;
++		if (nstart < prev->vm_end)
++			nstart = prev->vm_end;
++		if (nstart >= end)
++			break;
++
++		vma = prev->vm_next;
++		if (!vma || vma->vm_start != nstart) {
++			error = -ENOMEM;
++			break;
++		}
++	}
++	return error;
++}
++
++/*
++ * Go through vma areas and sum size of mlocked
++ * vma pages, as return value.
++ * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
++ * is also counted.
++ * Return value: previously mlocked page counts
++ */
++static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
++		unsigned long start, size_t len)
++{
++	struct vm_area_struct *vma;
++	unsigned long count = 0;
++
++	if (mm == NULL)
++		mm = current->mm;
++
++	vma = find_vma(mm, start);
++	if (vma == NULL)
++		vma = mm->mmap;
++
++	for (; vma ; vma = vma->vm_next) {
++		if (start >= vma->vm_end)
++			continue;
++		if (start + len <=  vma->vm_start)
++			break;
++		if (vma->vm_flags & VM_LOCKED) {
++			if (start > vma->vm_start)
++				count -= (start - vma->vm_start);
++			if (start + len < vma->vm_end) {
++				count += start + len - vma->vm_start;
++				break;
++			}
++			count += vma->vm_end - vma->vm_start;
++		}
++	}
++
++	return count >> PAGE_SHIFT;
++}
++
++static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
++{
++	unsigned long locked;
++	unsigned long lock_limit;
++	int error = -ENOMEM;
++
++	if (!can_do_mlock())
++		return -EPERM;
++
++	len = PAGE_ALIGN(len + (offset_in_page(start)));
++	start &= PAGE_MASK;
++
++	lock_limit = rlimit(RLIMIT_MEMLOCK);
++	lock_limit >>= PAGE_SHIFT;
++	locked = len >> PAGE_SHIFT;
++
++	if (down_write_killable(&current->mm->mmap_sem))
++		return -EINTR;
++
++	locked += atomic_long_read(&current->mm->locked_vm);
++	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
++		/*
++		 * It is possible that the regions requested intersect with
++		 * previously mlocked areas, that part area in "mm->locked_vm"
++		 * should not be counted to new mlock increment count. So check
++		 * and adjust locked count if necessary.
++		 */
++		locked -= count_mm_mlocked_page_nr(current->mm,
++				start, len);
++	}
++
++	/* check against resource limits */
++	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
++		error = apply_vma_lock_flags(start, len, flags);
++
++	up_write(&current->mm->mmap_sem);
++	if (error)
++		return error;
++
++	error = __mm_populate(start, len, 0);
++	if (error)
++		return __mlock_posix_error_return(error);
++	return 0;
++}
++
++SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
++{
++	return do_mlock(start, len, VM_LOCKED);
++}
++
++SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
++{
++	vm_flags_t vm_flags = VM_LOCKED;
++
++	if (flags & ~MLOCK_ONFAULT)
++		return -EINVAL;
++
++	if (flags & MLOCK_ONFAULT)
++		vm_flags |= VM_LOCKONFAULT;
++
++	return do_mlock(start, len, vm_flags);
++}
++
++SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
++{
++	int ret;
++
++	len = PAGE_ALIGN(len + (offset_in_page(start)));
++	start &= PAGE_MASK;
++
++	if (down_write_killable(&current->mm->mmap_sem))
++		return -EINTR;
++	ret = apply_vma_lock_flags(start, len, 0);
++	up_write(&current->mm->mmap_sem);
++
++	return ret;
++}
++
++/*
++ * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
++ * and translate into the appropriate modifications to mm->def_flags and/or the
++ * flags for all current VMAs.
++ *
++ * There are a couple of subtleties with this.  If mlockall() is called multiple
++ * times with different flags, the values do not necessarily stack.  If mlockall
++ * is called once including the MCL_FUTURE flag and then a second time without
++ * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
++ */
++static int apply_mlockall_flags(int flags)
++{
++	struct vm_area_struct * vma, * prev = NULL;
++	vm_flags_t to_add = 0;
++
++	current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
++	if (flags & MCL_FUTURE) {
++		current->mm->def_flags |= VM_LOCKED;
++
++		if (flags & MCL_ONFAULT)
++			current->mm->def_flags |= VM_LOCKONFAULT;
++
++		if (!(flags & MCL_CURRENT))
++			goto out;
++	}
++
++	if (flags & MCL_CURRENT) {
++		to_add |= VM_LOCKED;
++		if (flags & MCL_ONFAULT)
++			to_add |= VM_LOCKONFAULT;
++	}
++
++	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
++		vm_flags_t newflags;
++
++		newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
++		newflags |= to_add;
++
++		/* Ignore errors */
++		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
++		cond_resched();
++	}
++out:
++	return 0;
++}
++
++SYSCALL_DEFINE1(mlockall, int, flags)
++{
++	unsigned long lock_limit;
++	int ret;
++
++	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
++	    flags == MCL_ONFAULT)
++		return -EINVAL;
++
++	if (!can_do_mlock())
++		return -EPERM;
++
++	lock_limit = rlimit(RLIMIT_MEMLOCK);
++	lock_limit >>= PAGE_SHIFT;
++
++	if (down_write_killable(&current->mm->mmap_sem))
++		return -EINTR;
++
++	ret = -ENOMEM;
++	if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
++	    capable(CAP_IPC_LOCK))
++		ret = apply_mlockall_flags(flags);
++	up_write(&current->mm->mmap_sem);
++	if (!ret && (flags & MCL_CURRENT))
++		mm_populate(0, TASK_SIZE);
++
++	return ret;
++}
++
++SYSCALL_DEFINE0(munlockall)
++{
++	int ret;
++
++	if (down_write_killable(&current->mm->mmap_sem))
++		return -EINTR;
++	ret = apply_mlockall_flags(0);
++	up_write(&current->mm->mmap_sem);
++	return ret;
++}
++
++/*
++ * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
++ * shm segments) get accounted against the user_struct instead.
++ */
++static DEFINE_SPINLOCK(shmlock_user_lock);
++
++int user_shm_lock(size_t size, struct user_struct *user)
++{
++	unsigned long lock_limit, locked;
++	int allowed = 0;
++
++	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	lock_limit = rlimit(RLIMIT_MEMLOCK);
++	if (lock_limit == RLIM_INFINITY)
++		allowed = 1;
++	lock_limit >>= PAGE_SHIFT;
++	spin_lock(&shmlock_user_lock);
++	if (!allowed &&
++	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
++		goto out;
++	get_uid(user);
++	user->locked_shm += locked;
++	allowed = 1;
++out:
++	spin_unlock(&shmlock_user_lock);
++	return allowed;
++}
++
++void user_shm_unlock(size_t size, struct user_struct *user)
++{
++	spin_lock(&shmlock_user_lock);
++	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	spin_unlock(&shmlock_user_lock);
++	free_uid(user);
++}
+diff -uprN kernel/mm/mmu_context.c kernel_new/mm/mmu_context.c
+--- kernel/mm/mmu_context.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/mm/mmu_context.c	2021-04-01 18:28:07.816863108 +0800
+@@ -9,6 +9,7 @@
+ #include <linux/sched/task.h>
+ #include <linux/mmu_context.h>
+ #include <linux/export.h>
++#include <linux/ipipe.h>
+ 
+ #include <asm/mmu_context.h>
+ 
+@@ -23,15 +24,18 @@ void use_mm(struct mm_struct *mm)
+ {
+ 	struct mm_struct *active_mm;
+ 	struct task_struct *tsk = current;
++	unsigned long flags;
+ 
+ 	task_lock(tsk);
+ 	active_mm = tsk->active_mm;
++	ipipe_mm_switch_protect(flags);
+ 	if (active_mm != mm) {
+ 		mmgrab(mm);
+ 		tsk->active_mm = mm;
+ 	}
+ 	tsk->mm = mm;
+ 	switch_mm(active_mm, mm, tsk);
++	ipipe_mm_switch_unprotect(flags);
+ 	task_unlock(tsk);
+ #ifdef finish_arch_post_lock_switch
+ 	finish_arch_post_lock_switch();
+diff -uprN kernel/mm/mprotect.c kernel_new/mm/mprotect.c
+--- kernel/mm/mprotect.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/mm/mprotect.c	2021-04-01 18:28:07.817863107 +0800
+@@ -22,6 +22,7 @@
+ #include <linux/swap.h>
+ #include <linux/swapops.h>
+ #include <linux/mmu_notifier.h>
++#include <linux/ipipe.h>
+ #include <linux/migrate.h>
+ #include <linux/perf_event.h>
+ #include <linux/pkeys.h>
+@@ -42,7 +43,7 @@ static unsigned long change_pte_range(st
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	pte_t *pte, oldpte;
+ 	spinlock_t *ptl;
+-	unsigned long pages = 0;
++	unsigned long pages = 0, flags;
+ 	int target_node = NUMA_NO_NODE;
+ 
+ 	/*
+@@ -110,6 +111,7 @@ static unsigned long change_pte_range(st
+ 					continue;
+ 			}
+ 
++			flags = hard_local_irq_save();
+ 			ptent = ptep_modify_prot_start(mm, addr, pte);
+ 			ptent = pte_modify(ptent, newprot);
+ 			if (preserve_write)
+@@ -122,6 +124,7 @@ static unsigned long change_pte_range(st
+ 				ptent = pte_mkwrite(ptent);
+ 			}
+ 			ptep_modify_prot_commit(mm, addr, pte, ptent);
++			hard_local_irq_restore(flags);
+ 			pages++;
+ 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
+ 			swp_entry_t entry = pte_to_swp_entry(oldpte);
+@@ -336,6 +339,12 @@ unsigned long change_protection(struct v
+ 		pages = hugetlb_change_protection(vma, start, end, newprot);
+ 	else
+ 		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
++#ifdef CONFIG_IPIPE
++	if (test_bit(MMF_VM_PINNED, &vma->vm_mm->flags) &&
++	    ((vma->vm_flags | vma->vm_mm->def_flags) & VM_LOCKED) &&
++	    (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
++		__ipipe_pin_vma(vma->vm_mm, vma);
++#endif
+ 
+ 	return pages;
+ }
+diff -uprN kernel/mm/mprotect.c.orig kernel_new/mm/mprotect.c.orig
+--- kernel/mm/mprotect.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/mm/mprotect.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,670 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ *  mm/mprotect.c
++ *
++ *  (C) Copyright 1994 Linus Torvalds
++ *  (C) Copyright 2002 Christoph Hellwig
++ *
++ *  Address space accounting code	<alan@lxorguk.ukuu.org.uk>
++ *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
++ */
++
++#include <linux/mm.h>
++#include <linux/hugetlb.h>
++#include <linux/shm.h>
++#include <linux/mman.h>
++#include <linux/fs.h>
++#include <linux/highmem.h>
++#include <linux/security.h>
++#include <linux/mempolicy.h>
++#include <linux/personality.h>
++#include <linux/syscalls.h>
++#include <linux/swap.h>
++#include <linux/swapops.h>
++#include <linux/mmu_notifier.h>
++#include <linux/migrate.h>
++#include <linux/perf_event.h>
++#include <linux/pkeys.h>
++#include <linux/ksm.h>
++#include <linux/uaccess.h>
++#include <linux/mm_inline.h>
++#include <asm/pgtable.h>
++#include <asm/cacheflush.h>
++#include <asm/mmu_context.h>
++#include <asm/tlbflush.h>
++
++#include "internal.h"
++
++static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
++		unsigned long addr, unsigned long end, pgprot_t newprot,
++		int dirty_accountable, int prot_numa)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pte_t *pte, oldpte;
++	spinlock_t *ptl;
++	unsigned long pages = 0;
++	int target_node = NUMA_NO_NODE;
++
++	/*
++	 * Can be called with only the mmap_sem for reading by
++	 * prot_numa so we must check the pmd isn't constantly
++	 * changing from under us from pmd_none to pmd_trans_huge
++	 * and/or the other way around.
++	 */
++	if (pmd_trans_unstable(pmd))
++		return 0;
++
++	/*
++	 * The pmd points to a regular pte so the pmd can't change
++	 * from under us even if the mmap_sem is only hold for
++	 * reading.
++	 */
++	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
++
++	/* Get target node for single threaded private VMAs */
++	if (prot_numa && !(vma->vm_flags & VM_SHARED) &&
++	    atomic_read(&vma->vm_mm->mm_users) == 1)
++		target_node = numa_node_id();
++
++	flush_tlb_batched_pending(vma->vm_mm);
++	arch_enter_lazy_mmu_mode();
++	do {
++		oldpte = *pte;
++		if (pte_present(oldpte)) {
++			pte_t ptent;
++			bool preserve_write = prot_numa && pte_write(oldpte);
++
++			/*
++			 * Avoid trapping faults against the zero or KSM
++			 * pages. See similar comment in change_huge_pmd.
++			 */
++			if (prot_numa) {
++				struct page *page;
++
++				page = vm_normal_page(vma, addr, oldpte);
++				if (!page || PageKsm(page))
++					continue;
++
++				/* Also skip shared copy-on-write pages */
++				if (is_cow_mapping(vma->vm_flags) &&
++				    page_mapcount(page) != 1)
++					continue;
++
++				/*
++				 * While migration can move some dirty pages,
++				 * it cannot move them all from MIGRATE_ASYNC
++				 * context.
++				 */
++				if (page_is_file_cache(page) && PageDirty(page))
++					continue;
++
++				/* Avoid TLB flush if possible */
++				if (pte_protnone(oldpte))
++					continue;
++
++				/*
++				 * Don't mess with PTEs if page is already on the node
++				 * a single-threaded process is running on.
++				 */
++				if (target_node == page_to_nid(page))
++					continue;
++			}
++
++			ptent = ptep_modify_prot_start(mm, addr, pte);
++			ptent = pte_modify(ptent, newprot);
++			if (preserve_write)
++				ptent = pte_mk_savedwrite(ptent);
++
++			/* Avoid taking write faults for known dirty pages */
++			if (dirty_accountable && pte_dirty(ptent) &&
++					(pte_soft_dirty(ptent) ||
++					 !(vma->vm_flags & VM_SOFTDIRTY))) {
++				ptent = pte_mkwrite(ptent);
++			}
++			ptep_modify_prot_commit(mm, addr, pte, ptent);
++			pages++;
++		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
++			swp_entry_t entry = pte_to_swp_entry(oldpte);
++
++			if (is_write_migration_entry(entry)) {
++				pte_t newpte;
++				/*
++				 * A protection check is difficult so
++				 * just be safe and disable write
++				 */
++				make_migration_entry_read(&entry);
++				newpte = swp_entry_to_pte(entry);
++				if (pte_swp_soft_dirty(oldpte))
++					newpte = pte_swp_mksoft_dirty(newpte);
++				set_pte_at(mm, addr, pte, newpte);
++
++				pages++;
++			}
++
++			if (is_write_device_private_entry(entry)) {
++				pte_t newpte;
++
++				/*
++				 * We do not preserve soft-dirtiness. See
++				 * copy_one_pte() for explanation.
++				 */
++				make_device_private_entry_read(&entry);
++				newpte = swp_entry_to_pte(entry);
++				set_pte_at(mm, addr, pte, newpte);
++
++				pages++;
++			}
++		}
++	} while (pte++, addr += PAGE_SIZE, addr != end);
++	arch_leave_lazy_mmu_mode();
++	pte_unmap_unlock(pte - 1, ptl);
++
++	return pages;
++}
++
++/*
++ * Used when setting automatic NUMA hinting protection where it is
++ * critical that a numa hinting PMD is not confused with a bad PMD.
++ */
++static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
++{
++	pmd_t pmdval = pmd_read_atomic(pmd);
++
++	/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
++	barrier();
++#endif
++
++	if (pmd_none(pmdval))
++		return 1;
++	if (pmd_trans_huge(pmdval))
++		return 0;
++	if (unlikely(pmd_bad(pmdval))) {
++		pmd_clear_bad(pmd);
++		return 1;
++	}
++
++	return 0;
++}
++
++static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
++		pud_t *pud, unsigned long addr, unsigned long end,
++		pgprot_t newprot, int dirty_accountable, int prot_numa)
++{
++	pmd_t *pmd;
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long next;
++	unsigned long pages = 0;
++	unsigned long nr_huge_updates = 0;
++	unsigned long mni_start = 0;
++
++	pmd = pmd_offset(pud, addr);
++	do {
++		unsigned long this_pages;
++
++		next = pmd_addr_end(addr, end);
++
++		/*
++		 * Automatic NUMA balancing walks the tables with mmap_sem
++		 * held for read. It's possible a parallel update to occur
++		 * between pmd_trans_huge() and a pmd_none_or_clear_bad()
++		 * check leading to a false positive and clearing.
++		 * Hence, it's necessary to atomically read the PMD value
++		 * for all the checks.
++		 */
++		if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
++		     pmd_none_or_clear_bad_unless_trans_huge(pmd))
++			goto next;
++
++		/* invoke the mmu notifier if the pmd is populated */
++		if (!mni_start) {
++			mni_start = addr;
++			mmu_notifier_invalidate_range_start(mm, mni_start, end);
++		}
++
++		if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
++			if (next - addr != HPAGE_PMD_SIZE) {
++				__split_huge_pmd(vma, pmd, addr, false, NULL);
++			} else {
++				int nr_ptes = change_huge_pmd(vma, pmd, addr,
++						newprot, prot_numa);
++
++				if (nr_ptes) {
++					if (nr_ptes == HPAGE_PMD_NR) {
++						pages += HPAGE_PMD_NR;
++						nr_huge_updates++;
++					}
++
++					/* huge pmd was handled */
++					goto next;
++				}
++			}
++			/* fall through, the trans huge pmd just split */
++		}
++		this_pages = change_pte_range(vma, pmd, addr, next, newprot,
++				 dirty_accountable, prot_numa);
++		pages += this_pages;
++next:
++		cond_resched();
++	} while (pmd++, addr = next, addr != end);
++
++	if (mni_start)
++		mmu_notifier_invalidate_range_end(mm, mni_start, end);
++
++	if (nr_huge_updates)
++		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
++	return pages;
++}
++
++static inline unsigned long change_pud_range(struct vm_area_struct *vma,
++		p4d_t *p4d, unsigned long addr, unsigned long end,
++		pgprot_t newprot, int dirty_accountable, int prot_numa)
++{
++	pud_t *pud;
++	unsigned long next;
++	unsigned long pages = 0;
++
++	pud = pud_offset(p4d, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		pages += change_pmd_range(vma, pud, addr, next, newprot,
++				 dirty_accountable, prot_numa);
++	} while (pud++, addr = next, addr != end);
++
++	return pages;
++}
++
++static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
++		pgd_t *pgd, unsigned long addr, unsigned long end,
++		pgprot_t newprot, int dirty_accountable, int prot_numa)
++{
++	p4d_t *p4d;
++	unsigned long next;
++	unsigned long pages = 0;
++
++	p4d = p4d_offset(pgd, addr);
++	do {
++		next = p4d_addr_end(addr, end);
++		if (p4d_none_or_clear_bad(p4d))
++			continue;
++		pages += change_pud_range(vma, p4d, addr, next, newprot,
++				 dirty_accountable, prot_numa);
++	} while (p4d++, addr = next, addr != end);
++
++	return pages;
++}
++
++static unsigned long change_protection_range(struct vm_area_struct *vma,
++		unsigned long addr, unsigned long end, pgprot_t newprot,
++		int dirty_accountable, int prot_numa)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	pgd_t *pgd;
++	unsigned long next;
++	unsigned long start = addr;
++	unsigned long pages = 0;
++
++	BUG_ON(addr >= end);
++	pgd = pgd_offset(mm, addr);
++	flush_cache_range(vma, addr, end);
++	inc_tlb_flush_pending(mm);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		pages += change_p4d_range(vma, pgd, addr, next, newprot,
++				 dirty_accountable, prot_numa);
++	} while (pgd++, addr = next, addr != end);
++
++	/* Only flush the TLB if we actually modified any entries: */
++	if (pages)
++		flush_tlb_range(vma, start, end);
++	dec_tlb_flush_pending(mm);
++
++	return pages;
++}
++
++unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
++		       unsigned long end, pgprot_t newprot,
++		       int dirty_accountable, int prot_numa)
++{
++	unsigned long pages;
++
++	if (is_vm_hugetlb_page(vma))
++		pages = hugetlb_change_protection(vma, start, end, newprot);
++	else
++		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
++
++	return pages;
++}
++
++static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
++			       unsigned long next, struct mm_walk *walk)
++{
++	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++		0 : -EACCES;
++}
++
++static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
++				   unsigned long addr, unsigned long next,
++				   struct mm_walk *walk)
++{
++	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
++		0 : -EACCES;
++}
++
++static int prot_none_test(unsigned long addr, unsigned long next,
++			  struct mm_walk *walk)
++{
++	return 0;
++}
++
++static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
++			   unsigned long end, unsigned long newflags)
++{
++	pgprot_t new_pgprot = vm_get_page_prot(newflags);
++	struct mm_walk prot_none_walk = {
++		.pte_entry = prot_none_pte_entry,
++		.hugetlb_entry = prot_none_hugetlb_entry,
++		.test_walk = prot_none_test,
++		.mm = current->mm,
++		.private = &new_pgprot,
++	};
++
++	return walk_page_range(start, end, &prot_none_walk);
++}
++
++int
++mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
++	unsigned long start, unsigned long end, unsigned long newflags)
++{
++	struct mm_struct *mm = vma->vm_mm;
++	unsigned long oldflags = vma->vm_flags;
++	long nrpages = (end - start) >> PAGE_SHIFT;
++	unsigned long charged = 0;
++	pgoff_t pgoff;
++	int error;
++	int dirty_accountable = 0;
++
++	if (newflags == oldflags) {
++		*pprev = vma;
++		return 0;
++	}
++
++	/*
++	 * Do PROT_NONE PFN permission checks here when we can still
++	 * bail out without undoing a lot of state. This is a rather
++	 * uncommon case, so doesn't need to be very optimized.
++	 */
++	if (arch_has_pfn_modify_check() &&
++	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
++	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
++		error = prot_none_walk(vma, start, end, newflags);
++		if (error)
++			return error;
++	}
++
++	/*
++	 * If we make a private mapping writable we increase our commit;
++	 * but (without finer accounting) cannot reduce our commit if we
++	 * make it unwritable again. hugetlb mapping were accounted for
++	 * even if read-only so there is no need to account for them here
++	 */
++	if (newflags & VM_WRITE) {
++		/* Check space limits when area turns into data. */
++		if (!may_expand_vm(mm, newflags, nrpages) &&
++				may_expand_vm(mm, oldflags, nrpages))
++			return -ENOMEM;
++		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
++						VM_SHARED|VM_NORESERVE))) {
++			charged = nrpages;
++			if (security_vm_enough_memory_mm(mm, charged))
++				return -ENOMEM;
++			newflags |= VM_ACCOUNT;
++		}
++	}
++
++	/*
++	 * First try to merge with previous and/or next vma.
++	 */
++	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
++	*pprev = vma_merge(mm, *pprev, start, end, newflags,
++			   vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
++			   vma->vm_userfaultfd_ctx);
++	if (*pprev) {
++		vma = *pprev;
++		VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
++		goto success;
++	}
++
++	*pprev = vma;
++
++	if (start != vma->vm_start) {
++		error = split_vma(mm, vma, start, 1);
++		if (error)
++			goto fail;
++	}
++
++	if (end != vma->vm_end) {
++		error = split_vma(mm, vma, end, 0);
++		if (error)
++			goto fail;
++	}
++
++success:
++	/*
++	 * vm_flags and vm_page_prot are protected by the mmap_sem
++	 * held in write mode.
++	 */
++	vma->vm_flags = newflags;
++	dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
++	vma_set_page_prot(vma);
++
++	change_protection(vma, start, end, vma->vm_page_prot,
++			  dirty_accountable, 0);
++
++	/*
++	 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
++	 * fault on access.
++	 */
++	if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
++			(newflags & VM_WRITE)) {
++		populate_vma_page_range(vma, start, end, NULL);
++	}
++
++	vm_stat_account(mm, oldflags, -nrpages);
++	vm_stat_account(mm, newflags, nrpages);
++	perf_event_mmap(vma);
++	return 0;
++
++fail:
++	vm_unacct_memory(charged);
++	return error;
++}
++
++/*
++ * pkey==-1 when doing a legacy mprotect()
++ */
++static int do_mprotect_pkey(unsigned long start, size_t len,
++		unsigned long prot, int pkey)
++{
++	unsigned long nstart, end, tmp, reqprot;
++	struct vm_area_struct *vma, *prev;
++	int error = -EINVAL;
++	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
++	const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
++				(prot & PROT_READ);
++
++	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
++	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
++		return -EINVAL;
++
++	if (start & ~PAGE_MASK)
++		return -EINVAL;
++	if (!len)
++		return 0;
++	len = PAGE_ALIGN(len);
++	end = start + len;
++	if (end <= start)
++		return -ENOMEM;
++	if (!arch_validate_prot(prot, start))
++		return -EINVAL;
++
++	reqprot = prot;
++
++	if (down_write_killable(&current->mm->mmap_sem))
++		return -EINTR;
++
++	/*
++	 * If userspace did not allocate the pkey, do not let
++	 * them use it here.
++	 */
++	error = -EINVAL;
++	if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
++		goto out;
++
++	vma = find_vma(current->mm, start);
++	error = -ENOMEM;
++	if (!vma)
++		goto out;
++	prev = vma->vm_prev;
++	if (unlikely(grows & PROT_GROWSDOWN)) {
++		if (vma->vm_start >= end)
++			goto out;
++		start = vma->vm_start;
++		error = -EINVAL;
++		if (!(vma->vm_flags & VM_GROWSDOWN))
++			goto out;
++	} else {
++		if (vma->vm_start > start)
++			goto out;
++		if (unlikely(grows & PROT_GROWSUP)) {
++			end = vma->vm_end;
++			error = -EINVAL;
++			if (!(vma->vm_flags & VM_GROWSUP))
++				goto out;
++		}
++	}
++	if (start > vma->vm_start)
++		prev = vma;
++
++	for (nstart = start ; ; ) {
++		unsigned long mask_off_old_flags;
++		unsigned long newflags;
++		int new_vma_pkey;
++
++		/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
++
++		/* Does the application expect PROT_READ to imply PROT_EXEC */
++		if (rier && (vma->vm_flags & VM_MAYEXEC))
++			prot |= PROT_EXEC;
++
++		/*
++		 * Each mprotect() call explicitly passes r/w/x permissions.
++		 * If a permission is not passed to mprotect(), it must be
++		 * cleared from the VMA.
++		 */
++		mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC |
++					VM_FLAGS_CLEAR;
++
++		new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
++		newflags = calc_vm_prot_bits(prot, new_vma_pkey);
++		newflags |= (vma->vm_flags & ~mask_off_old_flags);
++
++		/* newflags >> 4 shift VM_MAY% in place of VM_% */
++		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
++			error = -EACCES;
++			goto out;
++		}
++
++		error = security_file_mprotect(vma, reqprot, prot);
++		if (error)
++			goto out;
++
++		tmp = vma->vm_end;
++		if (tmp > end)
++			tmp = end;
++		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
++		if (error)
++			goto out;
++		nstart = tmp;
++
++		if (nstart < prev->vm_end)
++			nstart = prev->vm_end;
++		if (nstart >= end)
++			goto out;
++
++		vma = prev->vm_next;
++		if (!vma || vma->vm_start != nstart) {
++			error = -ENOMEM;
++			goto out;
++		}
++		prot = reqprot;
++	}
++out:
++	up_write(&current->mm->mmap_sem);
++	return error;
++}
++
++SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
++		unsigned long, prot)
++{
++	return do_mprotect_pkey(start, len, prot, -1);
++}
++
++#ifdef CONFIG_ARCH_HAS_PKEYS
++
++SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
++		unsigned long, prot, int, pkey)
++{
++	return do_mprotect_pkey(start, len, prot, pkey);
++}
++
++SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
++{
++	int pkey;
++	int ret;
++
++	/* No flags supported yet. */
++	if (flags)
++		return -EINVAL;
++	/* check for unsupported init values */
++	if (init_val & ~PKEY_ACCESS_MASK)
++		return -EINVAL;
++
++	down_write(&current->mm->mmap_sem);
++	pkey = mm_pkey_alloc(current->mm);
++
++	ret = -ENOSPC;
++	if (pkey == -1)
++		goto out;
++
++	ret = arch_set_user_pkey_access(current, pkey, init_val);
++	if (ret) {
++		mm_pkey_free(current->mm, pkey);
++		goto out;
++	}
++	ret = pkey;
++out:
++	up_write(&current->mm->mmap_sem);
++	return ret;
++}
++
++SYSCALL_DEFINE1(pkey_free, int, pkey)
++{
++	int ret;
++
++	down_write(&current->mm->mmap_sem);
++	ret = mm_pkey_free(current->mm, pkey);
++	up_write(&current->mm->mmap_sem);
++
++	/*
++	 * We could provie warnings or errors if any VMA still
++	 * has the pkey set here.
++	 */
++	return ret;
++}
++
++#endif /* CONFIG_ARCH_HAS_PKEYS */
+diff -uprN kernel/mm/vmalloc.c kernel_new/mm/vmalloc.c
+--- kernel/mm/vmalloc.c	2020-12-21 21:59:22.000000000 +0800
++++ kernel_new/mm/vmalloc.c	2021-04-01 18:28:07.817863107 +0800
+@@ -233,6 +233,8 @@ static int vmap_page_range_noflush(unsig
+ 			return err;
+ 	} while (pgd++, addr = next, addr != end);
+ 
++	__ipipe_pin_mapping_globally(start, end);
++
+ 	return nr;
+ }
+ 
+diff -uprN kernel/mm/vmalloc.c.orig kernel_new/mm/vmalloc.c.orig
+--- kernel/mm/vmalloc.c.orig	1970-01-01 08:00:00.000000000 +0800
++++ kernel_new/mm/vmalloc.c.orig	2020-12-21 21:59:22.000000000 +0800
+@@ -0,0 +1,2778 @@
++/*
++ *  linux/mm/vmalloc.c
++ *
++ *  Copyright (C) 1993  Linus Torvalds
++ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
++ *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
++ *  Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002
++ *  Numa awareness, Christoph Lameter, SGI, June 2005
++ */
++
++#include <linux/vmalloc.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/highmem.h>
++#include <linux/sched/signal.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/debugobjects.h>
++#include <linux/kallsyms.h>
++#include <linux/list.h>
++#include <linux/notifier.h>
++#include <linux/rbtree.h>
++#include <linux/radix-tree.h>
++#include <linux/rcupdate.h>
++#include <linux/pfn.h>
++#include <linux/kmemleak.h>
++#include <linux/atomic.h>
++#include <linux/compiler.h>
++#include <linux/llist.h>
++#include <linux/bitops.h>
++#include <linux/overflow.h>
++
++#include <linux/uaccess.h>
++#include <asm/tlbflush.h>
++#include <asm/shmparam.h>
++
++#include "internal.h"
++
++struct vfree_deferred {
++	struct llist_head list;
++	struct work_struct wq;
++};
++static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
++
++static void __vunmap(const void *, int);
++
++static void free_work(struct work_struct *w)
++{
++	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
++	struct llist_node *t, *llnode;
++
++	llist_for_each_safe(llnode, t, llist_del_all(&p->list))
++		__vunmap((void *)llnode, 1);
++}
++
++/*** Page table manipulation functions ***/
++
++static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
++{
++	pte_t *pte;
++
++	pte = pte_offset_kernel(pmd, addr);
++	do {
++		pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
++		WARN_ON(!pte_none(ptent) && !pte_present(ptent));
++	} while (pte++, addr += PAGE_SIZE, addr != end);
++}
++
++static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
++{
++	pmd_t *pmd;
++	unsigned long next;
++
++	pmd = pmd_offset(pud, addr);
++	do {
++		next = pmd_addr_end(addr, end);
++		if (pmd_clear_huge(pmd))
++			continue;
++		if (pmd_none_or_clear_bad(pmd))
++			continue;
++		vunmap_pte_range(pmd, addr, next);
++	} while (pmd++, addr = next, addr != end);
++}
++
++static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
++{
++	pud_t *pud;
++	unsigned long next;
++
++	pud = pud_offset(p4d, addr);
++	do {
++		next = pud_addr_end(addr, end);
++		if (pud_clear_huge(pud))
++			continue;
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		vunmap_pmd_range(pud, addr, next);
++	} while (pud++, addr = next, addr != end);
++}
++
++static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
++{
++	p4d_t *p4d;
++	unsigned long next;
++
++	p4d = p4d_offset(pgd, addr);
++	do {
++		next = p4d_addr_end(addr, end);
++		if (p4d_clear_huge(p4d))
++			continue;
++		if (p4d_none_or_clear_bad(p4d))
++			continue;
++		vunmap_pud_range(p4d, addr, next);
++	} while (p4d++, addr = next, addr != end);
++}
++
++static void vunmap_page_range(unsigned long addr, unsigned long end)
++{
++	pgd_t *pgd;
++	unsigned long next;
++
++	BUG_ON(addr >= end);
++	pgd = pgd_offset_k(addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		vunmap_p4d_range(pgd, addr, next);
++	} while (pgd++, addr = next, addr != end);
++}
++
++static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
++		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++{
++	pte_t *pte;
++
++	/*
++	 * nr is a running index into the array which helps higher level
++	 * callers keep track of where we're up to.
++	 */
++
++	pte = pte_alloc_kernel(pmd, addr);
++	if (!pte)
++		return -ENOMEM;
++	do {
++		struct page *page = pages[*nr];
++
++		if (WARN_ON(!pte_none(*pte)))
++			return -EBUSY;
++		if (WARN_ON(!page))
++			return -ENOMEM;
++		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
++		(*nr)++;
++	} while (pte++, addr += PAGE_SIZE, addr != end);
++	return 0;
++}
++
++static int vmap_pmd_range(pud_t *pud, unsigned long addr,
++		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++{
++	pmd_t *pmd;
++	unsigned long next;
++
++	pmd = pmd_alloc(&init_mm, pud, addr);
++	if (!pmd)
++		return -ENOMEM;
++	do {
++		next = pmd_addr_end(addr, end);
++		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
++			return -ENOMEM;
++	} while (pmd++, addr = next, addr != end);
++	return 0;
++}
++
++static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
++		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++{
++	pud_t *pud;
++	unsigned long next;
++
++	pud = pud_alloc(&init_mm, p4d, addr);
++	if (!pud)
++		return -ENOMEM;
++	do {
++		next = pud_addr_end(addr, end);
++		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
++			return -ENOMEM;
++	} while (pud++, addr = next, addr != end);
++	return 0;
++}
++
++static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
++		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
++{
++	p4d_t *p4d;
++	unsigned long next;
++
++	p4d = p4d_alloc(&init_mm, pgd, addr);
++	if (!p4d)
++		return -ENOMEM;
++	do {
++		next = p4d_addr_end(addr, end);
++		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
++			return -ENOMEM;
++	} while (p4d++, addr = next, addr != end);
++	return 0;
++}
++
++/*
++ * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
++ * will have pfns corresponding to the "pages" array.
++ *
++ * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
++ */
++static int vmap_page_range_noflush(unsigned long start, unsigned long end,
++				   pgprot_t prot, struct page **pages)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	unsigned long addr = start;
++	int err = 0;
++	int nr = 0;
++
++	BUG_ON(addr >= end);
++	pgd = pgd_offset_k(addr);
++	do {
++		next = pgd_addr_end(addr, end);
++		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
++		if (err)
++			return err;
++	} while (pgd++, addr = next, addr != end);
++
++	return nr;
++}
++
++static int vmap_page_range(unsigned long start, unsigned long end,
++			   pgprot_t prot, struct page **pages)
++{
++	int ret;
++
++	ret = vmap_page_range_noflush(start, end, prot, pages);
++	flush_cache_vmap(start, end);
++	return ret;
++}
++
++int is_vmalloc_or_module_addr(const void *x)
++{
++	/*
++	 * ARM, x86-64 and sparc64 put modules in a special place,
++	 * and fall back on vmalloc() if that fails. Others
++	 * just put it in the vmalloc space.
++	 */
++#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
++	unsigned long addr = (unsigned long)x;
++	if (addr >= MODULES_VADDR && addr < MODULES_END)
++		return 1;
++#endif
++	return is_vmalloc_addr(x);
++}
++
++/*
++ * Walk a vmap address to the struct page it maps.
++ */
++struct page *vmalloc_to_page(const void *vmalloc_addr)
++{
++	unsigned long addr = (unsigned long) vmalloc_addr;
++	struct page *page = NULL;
++	pgd_t *pgd = pgd_offset_k(addr);
++	p4d_t *p4d;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *ptep, pte;
++
++	/*
++	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
++	 * architectures that do not vmalloc module space
++	 */
++	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
++
++	if (pgd_none(*pgd))
++		return NULL;
++	p4d = p4d_offset(pgd, addr);
++	if (p4d_none(*p4d))
++		return NULL;
++	pud = pud_offset(p4d, addr);
++
++	/*
++	 * Don't dereference bad PUD or PMD (below) entries. This will also
++	 * identify huge mappings, which we may encounter on architectures
++	 * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
++	 * identified as vmalloc addresses by is_vmalloc_addr(), but are
++	 * not [unambiguously] associated with a struct page, so there is
++	 * no correct value to return for them.
++	 */
++	WARN_ON_ONCE(pud_bad(*pud));
++	if (pud_none(*pud) || pud_bad(*pud))
++		return NULL;
++	pmd = pmd_offset(pud, addr);
++	WARN_ON_ONCE(pmd_bad(*pmd));
++	if (pmd_none(*pmd) || pmd_bad(*pmd))
++		return NULL;
++
++	ptep = pte_offset_map(pmd, addr);
++	pte = *ptep;
++	if (pte_present(pte))
++		page = pte_page(pte);
++	pte_unmap(ptep);
++	return page;
++}
++EXPORT_SYMBOL(vmalloc_to_page);
++
++/*
++ * Map a vmalloc()-space virtual address to the physical page frame number.
++ */
++unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
++{
++	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
++}
++EXPORT_SYMBOL(vmalloc_to_pfn);
++
++
++/*** Global kva allocator ***/
++
++#define VM_LAZY_FREE	0x02
++#define VM_VM_AREA	0x04
++
++static DEFINE_SPINLOCK(vmap_area_lock);
++/* Export for kexec only */
++LIST_HEAD(vmap_area_list);
++static LLIST_HEAD(vmap_purge_list);
++static struct rb_root vmap_area_root = RB_ROOT;
++
++/* The vmap cache globals are protected by vmap_area_lock */
++static struct rb_node *free_vmap_cache;
++static unsigned long cached_hole_size;
++static unsigned long cached_vstart;
++static unsigned long cached_align;
++
++static unsigned long vmap_area_pcpu_hole;
++
++static struct vmap_area *__find_vmap_area(unsigned long addr)
++{
++	struct rb_node *n = vmap_area_root.rb_node;
++
++	while (n) {
++		struct vmap_area *va;
++
++		va = rb_entry(n, struct vmap_area, rb_node);
++		if (addr < va->va_start)
++			n = n->rb_left;
++		else if (addr >= va->va_end)
++			n = n->rb_right;
++		else
++			return va;
++	}
++
++	return NULL;
++}
++
++static void __insert_vmap_area(struct vmap_area *va)
++{
++	struct rb_node **p = &vmap_area_root.rb_node;
++	struct rb_node *parent = NULL;
++	struct rb_node *tmp;
++
++	while (*p) {
++		struct vmap_area *tmp_va;
++
++		parent = *p;
++		tmp_va = rb_entry(parent, struct vmap_area, rb_node);
++		if (va->va_start < tmp_va->va_end)
++			p = &(*p)->rb_left;
++		else if (va->va_end > tmp_va->va_start)
++			p = &(*p)->rb_right;
++		else
++			BUG();
++	}
++
++	rb_link_node(&va->rb_node, parent, p);
++	rb_insert_color(&va->rb_node, &vmap_area_root);
++
++	/* address-sort this list */
++	tmp = rb_prev(&va->rb_node);
++	if (tmp) {
++		struct vmap_area *prev;
++		prev = rb_entry(tmp, struct vmap_area, rb_node);
++		list_add_rcu(&va->list, &prev->list);
++	} else
++		list_add_rcu(&va->list, &vmap_area_list);
++}
++
++static void purge_vmap_area_lazy(void);
++
++static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
++
++/*
++ * Allocate a region of KVA of the specified size and alignment, within the
++ * vstart and vend.
++ */
++static struct vmap_area *alloc_vmap_area(unsigned long size,
++				unsigned long align,
++				unsigned long vstart, unsigned long vend,
++				int node, gfp_t gfp_mask)
++{
++	struct vmap_area *va;
++	struct rb_node *n;
++	unsigned long addr;
++	int purged = 0;
++	struct vmap_area *first;
++
++	BUG_ON(!size);
++	BUG_ON(offset_in_page(size));
++	BUG_ON(!is_power_of_2(align));
++
++	might_sleep();
++
++	va = kmalloc_node(sizeof(struct vmap_area),
++			gfp_mask & GFP_RECLAIM_MASK, node);
++	if (unlikely(!va))
++		return ERR_PTR(-ENOMEM);
++
++	/*
++	 * Only scan the relevant parts containing pointers to other objects
++	 * to avoid false negatives.
++	 */
++	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
++
++retry:
++	spin_lock(&vmap_area_lock);
++	/*
++	 * Invalidate cache if we have more permissive parameters.
++	 * cached_hole_size notes the largest hole noticed _below_
++	 * the vmap_area cached in free_vmap_cache: if size fits
++	 * into that hole, we want to scan from vstart to reuse
++	 * the hole instead of allocating above free_vmap_cache.
++	 * Note that __free_vmap_area may update free_vmap_cache
++	 * without updating cached_hole_size or cached_align.
++	 */
++	if (!free_vmap_cache ||
++			size < cached_hole_size ||
++			vstart < cached_vstart ||
++			align < cached_align) {
++nocache:
++		cached_hole_size = 0;
++		free_vmap_cache = NULL;
++	}
++	/* record if we encounter less permissive parameters */
++	cached_vstart = vstart;
++	cached_align = align;
++
++	/* find starting point for our search */
++	if (free_vmap_cache) {
++		first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
++		addr = ALIGN(first->va_end, align);
++		if (addr < vstart)
++			goto nocache;
++		if (addr + size < addr)
++			goto overflow;
++
++	} else {
++		addr = ALIGN(vstart, align);
++		if (addr + size < addr)
++			goto overflow;
++
++		n = vmap_area_root.rb_node;
++		first = NULL;
++
++		while (n) {
++			struct vmap_area *tmp;
++			tmp = rb_entry(n, struct vmap_area, rb_node);
++			if (tmp->va_end >= addr) {
++				first = tmp;
++				if (tmp->va_start <= addr)
++					break;
++				n = n->rb_left;
++			} else
++				n = n->rb_right;
++		}
++
++		if (!first)
++			goto found;
++	}
++
++	/* from the starting point, walk areas until a suitable hole is found */
++	while (addr + size > first->va_start && addr + size <= vend) {
++		if (addr + cached_hole_size < first->va_start)
++			cached_hole_size = first->va_start - addr;
++		addr = ALIGN(first->va_end, align);
++		if (addr + size < addr)
++			goto overflow;
++
++		if (list_is_last(&first->list, &vmap_area_list))
++			goto found;
++
++		first = list_next_entry(first, list);
++	}
++
++found:
++	/*
++	 * Check also calculated address against the vstart,
++	 * because it can be 0 because of big align request.
++	 */
++	if (addr + size > vend || addr < vstart)
++		goto overflow;
++
++	va->va_start = addr;
++	va->va_end = addr + size;
++	va->flags = 0;
++	__insert_vmap_area(va);
++	free_vmap_cache = &va->rb_node;
++	spin_unlock(&vmap_area_lock);
++
++	BUG_ON(!IS_ALIGNED(va->va_start, align));
++	BUG_ON(va->va_start < vstart);
++	BUG_ON(va->va_end > vend);
++
++	return va;
++
++overflow:
++	spin_unlock(&vmap_area_lock);
++	if (!purged) {
++		purge_vmap_area_lazy();
++		purged = 1;
++		goto retry;
++	}
++
++	if (gfpflags_allow_blocking(gfp_mask)) {
++		unsigned long freed = 0;
++		blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
++		if (freed > 0) {
++			purged = 0;
++			goto retry;
++		}
++	}
++
++	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
++		pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
++			size);
++	kfree(va);
++	return ERR_PTR(-EBUSY);
++}
++
++int register_vmap_purge_notifier(struct notifier_block *nb)
++{
++	return blocking_notifier_chain_register(&vmap_notify_list, nb);
++}
++EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
++
++int unregister_vmap_purge_notifier(struct notifier_block *nb)
++{
++	return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
++}
++EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
++
++static void __free_vmap_area(struct vmap_area *va)
++{
++	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
++
++	if (free_vmap_cache) {
++		if (va->va_end < cached_vstart) {
++			free_vmap_cache = NULL;
++		} else {
++			struct vmap_area *cache;
++			cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
++			if (va->va_start <= cache->va_start) {
++				free_vmap_cache = rb_prev(&va->rb_node);
++				/*
++				 * We don't try to update cached_hole_size or
++				 * cached_align, but it won't go very wrong.
++				 */
++			}
++		}
++	}
++	rb_erase(&va->rb_node, &vmap_area_root);
++	RB_CLEAR_NODE(&va->rb_node);
++	list_del_rcu(&va->list);
++
++	/*
++	 * Track the highest possible candidate for pcpu area
++	 * allocation.  Areas outside of vmalloc area can be returned
++	 * here too, consider only end addresses which fall inside
++	 * vmalloc area proper.
++	 */
++	if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
++		vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
++
++	kfree_rcu(va, rcu_head);
++}
++
++/*
++ * Free a region of KVA allocated by alloc_vmap_area
++ */
++static void free_vmap_area(struct vmap_area *va)
++{
++	spin_lock(&vmap_area_lock);
++	__free_vmap_area(va);
++	spin_unlock(&vmap_area_lock);
++}
++
++/*
++ * Clear the pagetable entries of a given vmap_area
++ */
++static void unmap_vmap_area(struct vmap_area *va)
++{
++	vunmap_page_range(va->va_start, va->va_end);
++}
++
++/*
++ * lazy_max_pages is the maximum amount of virtual address space we gather up
++ * before attempting to purge with a TLB flush.
++ *
++ * There is a tradeoff here: a larger number will cover more kernel page tables
++ * and take slightly longer to purge, but it will linearly reduce the number of
++ * global TLB flushes that must be performed. It would seem natural to scale
++ * this number up linearly with the number of CPUs (because vmapping activity
++ * could also scale linearly with the number of CPUs), however it is likely
++ * that in practice, workloads might be constrained in other ways that mean
++ * vmap activity will not scale linearly with CPUs. Also, I want to be
++ * conservative and not introduce a big latency on huge systems, so go with
++ * a less aggressive log scale. It will still be an improvement over the old
++ * code, and it will be simple to change the scale factor if we find that it
++ * becomes a problem on bigger systems.
++ */
++static unsigned long lazy_max_pages(void)
++{
++	unsigned int log;
++
++	log = fls(num_online_cpus());
++
++	return log * (32UL * 1024 * 1024 / PAGE_SIZE);
++}
++
++static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
++
++/*
++ * Serialize vmap purging.  There is no actual criticial section protected
++ * by this look, but we want to avoid concurrent calls for performance
++ * reasons and to make the pcpu_get_vm_areas more deterministic.
++ */
++static DEFINE_MUTEX(vmap_purge_lock);
++
++/* for per-CPU blocks */
++static void purge_fragmented_blocks_allcpus(void);
++
++/*
++ * called before a call to iounmap() if the caller wants vm_area_struct's
++ * immediately freed.
++ */
++void set_iounmap_nonlazy(void)
++{
++	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
++}
++
++/*
++ * Purges all lazily-freed vmap areas.
++ */
++static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
++{
++	struct llist_node *valist;
++	struct vmap_area *va;
++	struct vmap_area *n_va;
++	bool do_free = false;
++
++	lockdep_assert_held(&vmap_purge_lock);
++
++	valist = llist_del_all(&vmap_purge_list);
++	llist_for_each_entry(va, valist, purge_list) {
++		if (va->va_start < start)
++			start = va->va_start;
++		if (va->va_end > end)
++			end = va->va_end;
++		do_free = true;
++	}
++
++	if (!do_free)
++		return false;
++
++	flush_tlb_kernel_range(start, end);
++
++	spin_lock(&vmap_area_lock);
++	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
++		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
++
++		__free_vmap_area(va);
++		atomic_sub(nr, &vmap_lazy_nr);
++		cond_resched_lock(&vmap_area_lock);
++	}
++	spin_unlock(&vmap_area_lock);
++	return true;
++}
++
++/*
++ * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
++ * is already purging.
++ */
++static void try_purge_vmap_area_lazy(void)
++{
++	if (mutex_trylock(&vmap_purge_lock)) {
++		__purge_vmap_area_lazy(ULONG_MAX, 0);
++		mutex_unlock(&vmap_purge_lock);
++	}
++}
++
++/*
++ * Kick off a purge of the outstanding lazy areas.
++ */
++static void purge_vmap_area_lazy(void)
++{
++	mutex_lock(&vmap_purge_lock);
++	purge_fragmented_blocks_allcpus();
++	__purge_vmap_area_lazy(ULONG_MAX, 0);
++	mutex_unlock(&vmap_purge_lock);
++}
++
++/*
++ * Free a vmap area, caller ensuring that the area has been unmapped
++ * and flush_cache_vunmap had been called for the correct range
++ * previously.
++ */
++static void free_vmap_area_noflush(struct vmap_area *va)
++{
++	int nr_lazy;
++
++	nr_lazy = atomic_add_return((va->va_end - va->va_start) >> PAGE_SHIFT,
++				    &vmap_lazy_nr);
++
++	/* After this point, we may free va at any time */
++	llist_add(&va->purge_list, &vmap_purge_list);
++
++	if (unlikely(nr_lazy > lazy_max_pages()))
++		try_purge_vmap_area_lazy();
++}
++
++/*
++ * Free and unmap a vmap area
++ */
++static void free_unmap_vmap_area(struct vmap_area *va)
++{
++	flush_cache_vunmap(va->va_start, va->va_end);
++	unmap_vmap_area(va);
++	if (debug_pagealloc_enabled())
++		flush_tlb_kernel_range(va->va_start, va->va_end);
++
++	free_vmap_area_noflush(va);
++}
++
++static struct vmap_area *find_vmap_area(unsigned long addr)
++{
++	struct vmap_area *va;
++
++	spin_lock(&vmap_area_lock);
++	va = __find_vmap_area(addr);
++	spin_unlock(&vmap_area_lock);
++
++	return va;
++}
++
++/*** Per cpu kva allocator ***/
++
++/*
++ * vmap space is limited especially on 32 bit architectures. Ensure there is
++ * room for at least 16 percpu vmap blocks per CPU.
++ */
++/*
++ * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able
++ * to #define VMALLOC_SPACE		(VMALLOC_END-VMALLOC_START). Guess
++ * instead (we just need a rough idea)
++ */
++#if BITS_PER_LONG == 32
++#define VMALLOC_SPACE		(128UL*1024*1024)
++#else
++#define VMALLOC_SPACE		(128UL*1024*1024*1024)
++#endif
++
++#define VMALLOC_PAGES		(VMALLOC_SPACE / PAGE_SIZE)
++#define VMAP_MAX_ALLOC		BITS_PER_LONG	/* 256K with 4K pages */
++#define VMAP_BBMAP_BITS_MAX	1024	/* 4MB with 4K pages */
++#define VMAP_BBMAP_BITS_MIN	(VMAP_MAX_ALLOC*2)
++#define VMAP_MIN(x, y)		((x) < (y) ? (x) : (y)) /* can't use min() */
++#define VMAP_MAX(x, y)		((x) > (y) ? (x) : (y)) /* can't use max() */
++#define VMAP_BBMAP_BITS		\
++		VMAP_MIN(VMAP_BBMAP_BITS_MAX,	\
++		VMAP_MAX(VMAP_BBMAP_BITS_MIN,	\
++			VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
++
++#define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
++
++static bool vmap_initialized __read_mostly = false;
++
++struct vmap_block_queue {
++	spinlock_t lock;
++	struct list_head free;
++};
++
++struct vmap_block {
++	spinlock_t lock;
++	struct vmap_area *va;
++	unsigned long free, dirty;
++	unsigned long dirty_min, dirty_max; /*< dirty range */
++	struct list_head free_list;
++	struct rcu_head rcu_head;
++	struct list_head purge;
++};
++
++/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
++static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
++
++/*
++ * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block
++ * in the free path. Could get rid of this if we change the API to return a
++ * "cookie" from alloc, to be passed to free. But no big deal yet.
++ */
++static DEFINE_SPINLOCK(vmap_block_tree_lock);
++static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
++
++/*
++ * We should probably have a fallback mechanism to allocate virtual memory
++ * out of partially filled vmap blocks. However vmap block sizing should be
++ * fairly reasonable according to the vmalloc size, so it shouldn't be a
++ * big problem.
++ */
++
++static unsigned long addr_to_vb_idx(unsigned long addr)
++{
++	addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
++	addr /= VMAP_BLOCK_SIZE;
++	return addr;
++}
++
++static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
++{
++	unsigned long addr;
++
++	addr = va_start + (pages_off << PAGE_SHIFT);
++	BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
++	return (void *)addr;
++}
++
++/**
++ * new_vmap_block - allocates new vmap_block and occupies 2^order pages in this
++ *                  block. Of course pages number can't exceed VMAP_BBMAP_BITS
++ * @order:    how many 2^order pages should be occupied in newly allocated block
++ * @gfp_mask: flags for the page level allocator
++ *
++ * Returns: virtual address in a newly allocated block or ERR_PTR(-errno)
++ */
++static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
++{
++	struct vmap_block_queue *vbq;
++	struct vmap_block *vb;
++	struct vmap_area *va;
++	unsigned long vb_idx;
++	int node, err;
++	void *vaddr;
++
++	node = numa_node_id();
++
++	vb = kmalloc_node(sizeof(struct vmap_block),
++			gfp_mask & GFP_RECLAIM_MASK, node);
++	if (unlikely(!vb))
++		return ERR_PTR(-ENOMEM);
++
++	va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
++					VMALLOC_START, VMALLOC_END,
++					node, gfp_mask);
++	if (IS_ERR(va)) {
++		kfree(vb);
++		return ERR_CAST(va);
++	}
++
++	err = radix_tree_preload(gfp_mask);
++	if (unlikely(err)) {
++		kfree(vb);
++		free_vmap_area(va);
++		return ERR_PTR(err);
++	}
++
++	vaddr = vmap_block_vaddr(va->va_start, 0);
++	spin_lock_init(&vb->lock);
++	vb->va = va;
++	/* At least something should be left free */
++	BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
++	vb->free = VMAP_BBMAP_BITS - (1UL << order);
++	vb->dirty = 0;
++	vb->dirty_min = VMAP_BBMAP_BITS;
++	vb->dirty_max = 0;
++	INIT_LIST_HEAD(&vb->free_list);
++
++	vb_idx = addr_to_vb_idx(va->va_start);
++	spin_lock(&vmap_block_tree_lock);
++	err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
++	spin_unlock(&vmap_block_tree_lock);
++	BUG_ON(err);
++	radix_tree_preload_end();
++
++	vbq = &get_cpu_var(vmap_block_queue);
++	spin_lock(&vbq->lock);
++	list_add_tail_rcu(&vb->free_list, &vbq->free);
++	spin_unlock(&vbq->lock);
++	put_cpu_var(vmap_block_queue);
++
++	return vaddr;
++}
++
++static void free_vmap_block(struct vmap_block *vb)
++{
++	struct vmap_block *tmp;
++	unsigned long vb_idx;
++
++	vb_idx = addr_to_vb_idx(vb->va->va_start);
++	spin_lock(&vmap_block_tree_lock);
++	tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
++	spin_unlock(&vmap_block_tree_lock);
++	BUG_ON(tmp != vb);
++
++	free_vmap_area_noflush(vb->va);
++	kfree_rcu(vb, rcu_head);
++}
++
++static void purge_fragmented_blocks(int cpu)
++{
++	LIST_HEAD(purge);
++	struct vmap_block *vb;
++	struct vmap_block *n_vb;
++	struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++
++		if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
++			continue;
++
++		spin_lock(&vb->lock);
++		if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
++			vb->free = 0; /* prevent further allocs after releasing lock */
++			vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
++			vb->dirty_min = 0;
++			vb->dirty_max = VMAP_BBMAP_BITS;
++			spin_lock(&vbq->lock);
++			list_del_rcu(&vb->free_list);
++			spin_unlock(&vbq->lock);
++			spin_unlock(&vb->lock);
++			list_add_tail(&vb->purge, &purge);
++		} else
++			spin_unlock(&vb->lock);
++	}
++	rcu_read_unlock();
++
++	list_for_each_entry_safe(vb, n_vb, &purge, purge) {
++		list_del(&vb->purge);
++		free_vmap_block(vb);
++	}
++}
++
++static void purge_fragmented_blocks_allcpus(void)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu)
++		purge_fragmented_blocks(cpu);
++}
++
++static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
++{
++	struct vmap_block_queue *vbq;
++	struct vmap_block *vb;
++	void *vaddr = NULL;
++	unsigned int order;
++
++	BUG_ON(offset_in_page(size));
++	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
++	if (WARN_ON(size == 0)) {
++		/*
++		 * Allocating 0 bytes isn't what caller wants since
++		 * get_order(0) returns funny result. Just warn and terminate
++		 * early.
++		 */
++		return NULL;
++	}
++	order = get_order(size);
++
++	rcu_read_lock();
++	vbq = &get_cpu_var(vmap_block_queue);
++	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++		unsigned long pages_off;
++
++		spin_lock(&vb->lock);
++		if (vb->free < (1UL << order)) {
++			spin_unlock(&vb->lock);
++			continue;
++		}
++
++		pages_off = VMAP_BBMAP_BITS - vb->free;
++		vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
++		vb->free -= 1UL << order;
++		if (vb->free == 0) {
++			spin_lock(&vbq->lock);
++			list_del_rcu(&vb->free_list);
++			spin_unlock(&vbq->lock);
++		}
++
++		spin_unlock(&vb->lock);
++		break;
++	}
++
++	put_cpu_var(vmap_block_queue);
++	rcu_read_unlock();
++
++	/* Allocate new block if nothing was found */
++	if (!vaddr)
++		vaddr = new_vmap_block(order, gfp_mask);
++
++	return vaddr;
++}
++
++static void vb_free(const void *addr, unsigned long size)
++{
++	unsigned long offset;
++	unsigned long vb_idx;
++	unsigned int order;
++	struct vmap_block *vb;
++
++	BUG_ON(offset_in_page(size));
++	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
++
++	flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
++
++	order = get_order(size);
++
++	offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
++	offset >>= PAGE_SHIFT;
++
++	vb_idx = addr_to_vb_idx((unsigned long)addr);
++	rcu_read_lock();
++	vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
++	rcu_read_unlock();
++	BUG_ON(!vb);
++
++	vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
++
++	if (debug_pagealloc_enabled())
++		flush_tlb_kernel_range((unsigned long)addr,
++					(unsigned long)addr + size);
++
++	spin_lock(&vb->lock);
++
++	/* Expand dirty range */
++	vb->dirty_min = min(vb->dirty_min, offset);
++	vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
++
++	vb->dirty += 1UL << order;
++	if (vb->dirty == VMAP_BBMAP_BITS) {
++		BUG_ON(vb->free);
++		spin_unlock(&vb->lock);
++		free_vmap_block(vb);
++	} else
++		spin_unlock(&vb->lock);
++}
++
++/**
++ * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer
++ *
++ * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily
++ * to amortize TLB flushing overheads. What this means is that any page you
++ * have now, may, in a former life, have been mapped into kernel virtual
++ * address by the vmap layer and so there might be some CPUs with TLB entries
++ * still referencing that page (additional to the regular 1:1 kernel mapping).
++ *
++ * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can
++ * be sure that none of the pages we have control over will have any aliases
++ * from the vmap layer.
++ */
++void vm_unmap_aliases(void)
++{
++	unsigned long start = ULONG_MAX, end = 0;
++	int cpu;
++	int flush = 0;
++
++	if (unlikely(!vmap_initialized))
++		return;
++
++	might_sleep();
++
++	for_each_possible_cpu(cpu) {
++		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
++		struct vmap_block *vb;
++
++		rcu_read_lock();
++		list_for_each_entry_rcu(vb, &vbq->free, free_list) {
++			spin_lock(&vb->lock);
++			if (vb->dirty) {
++				unsigned long va_start = vb->va->va_start;
++				unsigned long s, e;
++
++				s = va_start + (vb->dirty_min << PAGE_SHIFT);
++				e = va_start + (vb->dirty_max << PAGE_SHIFT);
++
++				start = min(s, start);
++				end   = max(e, end);
++
++				flush = 1;
++			}
++			spin_unlock(&vb->lock);
++		}
++		rcu_read_unlock();
++	}
++
++	mutex_lock(&vmap_purge_lock);
++	purge_fragmented_blocks_allcpus();
++	if (!__purge_vmap_area_lazy(start, end) && flush)
++		flush_tlb_kernel_range(start, end);
++	mutex_unlock(&vmap_purge_lock);
++}
++EXPORT_SYMBOL_GPL(vm_unmap_aliases);
++
++/**
++ * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram
++ * @mem: the pointer returned by vm_map_ram
++ * @count: the count passed to that vm_map_ram call (cannot unmap partial)
++ */
++void vm_unmap_ram(const void *mem, unsigned int count)
++{
++	unsigned long size = (unsigned long)count << PAGE_SHIFT;
++	unsigned long addr = (unsigned long)mem;
++	struct vmap_area *va;
++
++	might_sleep();
++	BUG_ON(!addr);
++	BUG_ON(addr < VMALLOC_START);
++	BUG_ON(addr > VMALLOC_END);
++	BUG_ON(!PAGE_ALIGNED(addr));
++
++	if (likely(count <= VMAP_MAX_ALLOC)) {
++		debug_check_no_locks_freed(mem, size);
++		vb_free(mem, size);
++		return;
++	}
++
++	va = find_vmap_area(addr);
++	BUG_ON(!va);
++	debug_check_no_locks_freed((void *)va->va_start,
++				    (va->va_end - va->va_start));
++	free_unmap_vmap_area(va);
++}
++EXPORT_SYMBOL(vm_unmap_ram);
++
++/**
++ * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space)
++ * @pages: an array of pointers to the pages to be mapped
++ * @count: number of pages
++ * @node: prefer to allocate data structures on this node
++ * @prot: memory protection to use. PAGE_KERNEL for regular RAM
++ *
++ * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
++ * faster than vmap so it's good.  But if you mix long-life and short-life
++ * objects with vm_map_ram(), it could consume lots of address space through
++ * fragmentation (especially on a 32bit machine).  You could see failures in
++ * the end.  Please use this function for short-lived objects.
++ *
++ * Returns: a pointer to the address that has been mapped, or %NULL on failure
++ */
++void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
++{
++	unsigned long size = (unsigned long)count << PAGE_SHIFT;
++	unsigned long addr;
++	void *mem;
++
++	if (likely(count <= VMAP_MAX_ALLOC)) {
++		mem = vb_alloc(size, GFP_KERNEL);
++		if (IS_ERR(mem))
++			return NULL;
++		addr = (unsigned long)mem;
++	} else {
++		struct vmap_area *va;
++		va = alloc_vmap_area(size, PAGE_SIZE,
++				VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
++		if (IS_ERR(va))
++			return NULL;
++
++		addr = va->va_start;
++		mem = (void *)addr;
++	}
++	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
++		vm_unmap_ram(mem, count);
++		return NULL;
++	}
++	return mem;
++}
++EXPORT_SYMBOL(vm_map_ram);
++
++static struct vm_struct *vmlist __initdata;
++/**
++ * vm_area_add_early - add vmap area early during boot
++ * @vm: vm_struct to add
++ *
++ * This function is used to add fixed kernel vm area to vmlist before
++ * vmalloc_init() is called.  @vm->addr, @vm->size, and @vm->flags
++ * should contain proper values and the other fields should be zero.
++ *
++ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
++ */
++void __init vm_area_add_early(struct vm_struct *vm)
++{
++	struct vm_struct *tmp, **p;
++
++	BUG_ON(vmap_initialized);
++	for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
++		if (tmp->addr >= vm->addr) {
++			BUG_ON(tmp->addr < vm->addr + vm->size);
++			break;
++		} else
++			BUG_ON(tmp->addr + tmp->size > vm->addr);
++	}
++	vm->next = *p;
++	*p = vm;
++}
++
++/**
++ * vm_area_register_early - register vmap area early during boot
++ * @vm: vm_struct to register
++ * @align: requested alignment
++ *
++ * This function is used to register kernel vm area before
++ * vmalloc_init() is called.  @vm->size and @vm->flags should contain
++ * proper values on entry and other fields should be zero.  On return,
++ * vm->addr contains the allocated address.
++ *
++ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
++ */
++void __init vm_area_register_early(struct vm_struct *vm, size_t align)
++{
++	static size_t vm_init_off __initdata;
++	unsigned long addr;
++
++	addr = ALIGN(VMALLOC_START + vm_init_off, align);
++	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
++
++	vm->addr = (void *)addr;
++
++	vm_area_add_early(vm);
++}
++
++void __init vmalloc_init(void)
++{
++	struct vmap_area *va;
++	struct vm_struct *tmp;
++	int i;
++
++	for_each_possible_cpu(i) {
++		struct vmap_block_queue *vbq;
++		struct vfree_deferred *p;
++
++		vbq = &per_cpu(vmap_block_queue, i);
++		spin_lock_init(&vbq->lock);
++		INIT_LIST_HEAD(&vbq->free);
++		p = &per_cpu(vfree_deferred, i);
++		init_llist_head(&p->list);
++		INIT_WORK(&p->wq, free_work);
++	}
++
++	/* Import existing vmlist entries. */
++	for (tmp = vmlist; tmp; tmp = tmp->next) {
++		va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
++		va->flags = VM_VM_AREA;
++		va->va_start = (unsigned long)tmp->addr;
++		va->va_end = va->va_start + tmp->size;
++		va->vm = tmp;
++		__insert_vmap_area(va);
++	}
++
++	vmap_area_pcpu_hole = VMALLOC_END;
++
++	vmap_initialized = true;
++}
++
++/**
++ * map_kernel_range_noflush - map kernel VM area with the specified pages
++ * @addr: start of the VM area to map
++ * @size: size of the VM area to map
++ * @prot: page protection flags to use
++ * @pages: pages to map
++ *
++ * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size
++ * specify should have been allocated using get_vm_area() and its
++ * friends.
++ *
++ * NOTE:
++ * This function does NOT do any cache flushing.  The caller is
++ * responsible for calling flush_cache_vmap() on to-be-mapped areas
++ * before calling this function.
++ *
++ * RETURNS:
++ * The number of pages mapped on success, -errno on failure.
++ */
++int map_kernel_range_noflush(unsigned long addr, unsigned long size,
++			     pgprot_t prot, struct page **pages)
++{
++	return vmap_page_range_noflush(addr, addr + size, prot, pages);
++}
++
++/**
++ * unmap_kernel_range_noflush - unmap kernel VM area
++ * @addr: start of the VM area to unmap
++ * @size: size of the VM area to unmap
++ *
++ * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size
++ * specify should have been allocated using get_vm_area() and its
++ * friends.
++ *
++ * NOTE:
++ * This function does NOT do any cache flushing.  The caller is
++ * responsible for calling flush_cache_vunmap() on to-be-mapped areas
++ * before calling this function and flush_tlb_kernel_range() after.
++ */
++void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
++{
++	vunmap_page_range(addr, addr + size);
++}
++EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
++
++/**
++ * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
++ * @addr: start of the VM area to unmap
++ * @size: size of the VM area to unmap
++ *
++ * Similar to unmap_kernel_range_noflush() but flushes vcache before
++ * the unmapping and tlb after.
++ */
++void unmap_kernel_range(unsigned long addr, unsigned long size)
++{
++	unsigned long end = addr + size;
++
++	flush_cache_vunmap(addr, end);
++	vunmap_page_range(addr, end);
++	flush_tlb_kernel_range(addr, end);
++}
++EXPORT_SYMBOL_GPL(unmap_kernel_range);
++
++int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
++{
++	unsigned long addr = (unsigned long)area->addr;
++	unsigned long end = addr + get_vm_area_size(area);
++	int err;
++
++	err = vmap_page_range(addr, end, prot, pages);
++
++	return err > 0 ? 0 : err;
++}
++EXPORT_SYMBOL_GPL(map_vm_area);
++
++static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
++			      unsigned long flags, const void *caller)
++{
++	spin_lock(&vmap_area_lock);
++	vm->flags = flags;
++	vm->addr = (void *)va->va_start;
++	vm->size = va->va_end - va->va_start;
++	vm->caller = caller;
++	va->vm = vm;
++	va->flags |= VM_VM_AREA;
++	spin_unlock(&vmap_area_lock);
++}
++
++static void clear_vm_uninitialized_flag(struct vm_struct *vm)
++{
++	/*
++	 * Before removing VM_UNINITIALIZED,
++	 * we should make sure that vm has proper values.
++	 * Pair with smp_rmb() in show_numa_info().
++	 */
++	smp_wmb();
++	vm->flags &= ~VM_UNINITIALIZED;
++}
++
++static struct vm_struct *__get_vm_area_node(unsigned long size,
++		unsigned long align, unsigned long flags, unsigned long start,
++		unsigned long end, int node, gfp_t gfp_mask, const void *caller)
++{
++	struct vmap_area *va;
++	struct vm_struct *area;
++
++	BUG_ON(in_interrupt());
++	size = PAGE_ALIGN(size);
++	if (unlikely(!size))
++		return NULL;
++
++	if (flags & VM_IOREMAP)
++		align = 1ul << clamp_t(int, get_count_order_long(size),
++				       PAGE_SHIFT, IOREMAP_MAX_ORDER);
++
++	area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
++	if (unlikely(!area))
++		return NULL;
++
++	if (!(flags & VM_NO_GUARD))
++		size += PAGE_SIZE;
++
++	va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
++	if (IS_ERR(va)) {
++		kfree(area);
++		return NULL;
++	}
++
++	setup_vmalloc_vm(area, va, flags, caller);
++
++	return area;
++}
++
++struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
++				unsigned long start, unsigned long end)
++{
++	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
++				  GFP_KERNEL, __builtin_return_address(0));
++}
++EXPORT_SYMBOL_GPL(__get_vm_area);
++
++struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
++				       unsigned long start, unsigned long end,
++				       const void *caller)
++{
++	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
++				  GFP_KERNEL, caller);
++}
++
++/**
++ *	get_vm_area  -  reserve a contiguous kernel virtual area
++ *	@size:		size of the area
++ *	@flags:		%VM_IOREMAP for I/O mappings or VM_ALLOC
++ *
++ *	Search an area of @size in the kernel virtual mapping area,
++ *	and reserved it for out purposes.  Returns the area descriptor
++ *	on success or %NULL on failure.
++ */
++struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
++{
++	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
++				  NUMA_NO_NODE, GFP_KERNEL,
++				  __builtin_return_address(0));
++}
++
++struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
++				const void *caller)
++{
++	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
++				  NUMA_NO_NODE, GFP_KERNEL, caller);
++}
++
++/**
++ *	find_vm_area  -  find a continuous kernel virtual area
++ *	@addr:		base address
++ *
++ *	Search for the kernel VM area starting at @addr, and return it.
++ *	It is up to the caller to do all required locking to keep the returned
++ *	pointer valid.
++ */
++struct vm_struct *find_vm_area(const void *addr)
++{
++	struct vmap_area *va;
++
++	va = find_vmap_area((unsigned long)addr);
++	if (va && va->flags & VM_VM_AREA)
++		return va->vm;
++
++	return NULL;
++}
++
++/**
++ *	remove_vm_area  -  find and remove a continuous kernel virtual area
++ *	@addr:		base address
++ *
++ *	Search for the kernel VM area starting at @addr, and remove it.
++ *	This function returns the found VM area, but using it is NOT safe
++ *	on SMP machines, except for its size or flags.
++ */
++struct vm_struct *remove_vm_area(const void *addr)
++{
++	struct vmap_area *va;
++
++	might_sleep();
++
++	va = find_vmap_area((unsigned long)addr);
++	if (va && va->flags & VM_VM_AREA) {
++		struct vm_struct *vm = va->vm;
++
++		spin_lock(&vmap_area_lock);
++		va->vm = NULL;
++		va->flags &= ~VM_VM_AREA;
++		va->flags |= VM_LAZY_FREE;
++		spin_unlock(&vmap_area_lock);
++
++		kasan_free_shadow(vm);
++		free_unmap_vmap_area(va);
++
++		return vm;
++	}
++	return NULL;
++}
++
++static void __vunmap(const void *addr, int deallocate_pages)
++{
++	struct vm_struct *area;
++
++	if (!addr)
++		return;
++
++	if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
++			addr))
++		return;
++
++	area = find_vm_area(addr);
++	if (unlikely(!area)) {
++		WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
++				addr);
++		return;
++	}
++
++	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
++	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
++
++	remove_vm_area(addr);
++	if (deallocate_pages) {
++		int i;
++
++		for (i = 0; i < area->nr_pages; i++) {
++			struct page *page = area->pages[i];
++
++			BUG_ON(!page);
++			__free_pages(page, 0);
++		}
++
++		kvfree(area->pages);
++	}
++
++	kfree(area);
++	return;
++}
++
++static inline void __vfree_deferred(const void *addr)
++{
++	/*
++	 * Use raw_cpu_ptr() because this can be called from preemptible
++	 * context. Preemption is absolutely fine here, because the llist_add()
++	 * implementation is lockless, so it works even if we are adding to
++	 * nother cpu's list.  schedule_work() should be fine with this too.
++	 */
++	struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
++
++	if (llist_add((struct llist_node *)addr, &p->list))
++		schedule_work(&p->wq);
++}
++
++/**
++ *	vfree_atomic  -  release memory allocated by vmalloc()
++ *	@addr:		memory base address
++ *
++ *	This one is just like vfree() but can be called in any atomic context
++ *	except NMIs.
++ */
++void vfree_atomic(const void *addr)
++{
++	BUG_ON(in_nmi());
++
++	kmemleak_free(addr);
++
++	if (!addr)
++		return;
++	__vfree_deferred(addr);
++}
++
++/**
++ *	vfree  -  release memory allocated by vmalloc()
++ *	@addr:		memory base address
++ *
++ *	Free the virtually continuous memory area starting at @addr, as
++ *	obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
++ *	NULL, no operation is performed.
++ *
++ *	Must not be called in NMI context (strictly speaking, only if we don't
++ *	have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
++ *	conventions for vfree() arch-depenedent would be a really bad idea)
++ *
++ *	NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
++ */
++void vfree(const void *addr)
++{
++	BUG_ON(in_nmi());
++
++	kmemleak_free(addr);
++
++	if (!addr)
++		return;
++	if (unlikely(in_interrupt()))
++		__vfree_deferred(addr);
++	else
++		__vunmap(addr, 1);
++}
++EXPORT_SYMBOL(vfree);
++
++/**
++ *	vunmap  -  release virtual mapping obtained by vmap()
++ *	@addr:		memory base address
++ *
++ *	Free the virtually contiguous memory area starting at @addr,
++ *	which was created from the page array passed to vmap().
++ *
++ *	Must not be called in interrupt context.
++ */
++void vunmap(const void *addr)
++{
++	BUG_ON(in_interrupt());
++	might_sleep();
++	if (addr)
++		__vunmap(addr, 0);
++}
++EXPORT_SYMBOL(vunmap);
++
++/**
++ *	vmap  -  map an array of pages into virtually contiguous space
++ *	@pages:		array of page pointers
++ *	@count:		number of pages to map
++ *	@flags:		vm_area->flags
++ *	@prot:		page protection for the mapping
++ *
++ *	Maps @count pages from @pages into contiguous kernel virtual
++ *	space.
++ */
++void *vmap(struct page **pages, unsigned int count,
++		unsigned long flags, pgprot_t prot)
++{
++	struct vm_struct *area;
++	unsigned long size;		/* In bytes */
++
++	might_sleep();
++
++	if (count > totalram_pages)
++		return NULL;
++
++	size = (unsigned long)count << PAGE_SHIFT;
++	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
++	if (!area)
++		return NULL;
++
++	if (map_vm_area(area, prot, pages)) {
++		vunmap(area->addr);
++		return NULL;
++	}
++
++	return area->addr;
++}
++EXPORT_SYMBOL(vmap);
++
++static void *__vmalloc_node(unsigned long size, unsigned long align,
++			    gfp_t gfp_mask, pgprot_t prot,
++			    int node, const void *caller);
++static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
++				 pgprot_t prot, int node)
++{
++	struct page **pages;
++	unsigned int nr_pages, array_size, i;
++	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
++	const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
++	const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
++					0 :
++					__GFP_HIGHMEM;
++
++	nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
++	array_size = (nr_pages * sizeof(struct page *));
++
++	/* Please note that the recursion is strictly bounded. */
++	if (array_size > PAGE_SIZE) {
++		pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
++				PAGE_KERNEL, node, area->caller);
++	} else {
++		pages = kmalloc_node(array_size, nested_gfp, node);
++	}
++
++	if (!pages) {
++		remove_vm_area(area->addr);
++		kfree(area);
++		return NULL;
++	}
++
++	area->pages = pages;
++	area->nr_pages = nr_pages;
++
++	for (i = 0; i < area->nr_pages; i++) {
++		struct page *page;
++
++		if (node == NUMA_NO_NODE)
++			page = alloc_page(alloc_mask|highmem_mask);
++		else
++			page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
++
++		if (unlikely(!page)) {
++			/* Successfully allocated i pages, free them in __vunmap() */
++			area->nr_pages = i;
++			goto fail;
++		}
++		area->pages[i] = page;
++		if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
++			cond_resched();
++	}
++
++	if (map_vm_area(area, prot, pages))
++		goto fail;
++	return area->addr;
++
++fail:
++	warn_alloc(gfp_mask, NULL,
++			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
++			  (area->nr_pages*PAGE_SIZE), area->size);
++	vfree(area->addr);
++	return NULL;
++}
++
++/**
++ *	__vmalloc_node_range  -  allocate virtually contiguous memory
++ *	@size:		allocation size
++ *	@align:		desired alignment
++ *	@start:		vm area range start
++ *	@end:		vm area range end
++ *	@gfp_mask:	flags for the page level allocator
++ *	@prot:		protection mask for the allocated pages
++ *	@vm_flags:	additional vm area flags (e.g. %VM_NO_GUARD)
++ *	@node:		node to use for allocation or NUMA_NO_NODE
++ *	@caller:	caller's return address
++ *
++ *	Allocate enough pages to cover @size from the page level
++ *	allocator with @gfp_mask flags.  Map them into contiguous
++ *	kernel virtual space, using a pagetable protection of @prot.
++ */
++void *__vmalloc_node_range(unsigned long size, unsigned long align,
++			unsigned long start, unsigned long end, gfp_t gfp_mask,
++			pgprot_t prot, unsigned long vm_flags, int node,
++			const void *caller)
++{
++	struct vm_struct *area;
++	void *addr;
++	unsigned long real_size = size;
++
++	size = PAGE_ALIGN(size);
++	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
++		goto fail;
++
++	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
++				vm_flags, start, end, node, gfp_mask, caller);
++	if (!area)
++		goto fail;
++
++	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
++	if (!addr)
++		return NULL;
++
++	/*
++	 * First make sure the mappings are removed from all page-tables
++	 * before they are freed.
++	 */
++	vmalloc_sync_unmappings();
++
++	/*
++	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
++	 * flag. It means that vm_struct is not fully initialized.
++	 * Now, it is fully initialized, so remove this flag here.
++	 */
++	clear_vm_uninitialized_flag(area);
++
++	kmemleak_vmalloc(area, size, gfp_mask);
++
++	return addr;
++
++fail:
++	warn_alloc(gfp_mask, NULL,
++			  "vmalloc: allocation failure: %lu bytes", real_size);
++	return NULL;
++}
++
++/**
++ *	__vmalloc_node  -  allocate virtually contiguous memory
++ *	@size:		allocation size
++ *	@align:		desired alignment
++ *	@gfp_mask:	flags for the page level allocator
++ *	@prot:		protection mask for the allocated pages
++ *	@node:		node to use for allocation or NUMA_NO_NODE
++ *	@caller:	caller's return address
++ *
++ *	Allocate enough pages to cover @size from the page level
++ *	allocator with @gfp_mask flags.  Map them into contiguous
++ *	kernel virtual space, using a pagetable protection of @prot.
++ *
++ *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
++ *	and __GFP_NOFAIL are not supported
++ *
++ *	Any use of gfp flags outside of GFP_KERNEL should be consulted
++ *	with mm people.
++ *
++ */
++static void *__vmalloc_node(unsigned long size, unsigned long align,
++			    gfp_t gfp_mask, pgprot_t prot,
++			    int node, const void *caller)
++{
++	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
++				gfp_mask, prot, 0, node, caller);
++}
++
++void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
++{
++	return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
++				__builtin_return_address(0));
++}
++EXPORT_SYMBOL(__vmalloc);
++
++static inline void *__vmalloc_node_flags(unsigned long size,
++					int node, gfp_t flags)
++{
++	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
++					node, __builtin_return_address(0));
++}
++
++
++void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
++				  void *caller)
++{
++	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
++}
++
++/**
++ *	vmalloc  -  allocate virtually contiguous memory
++ *	@size:		allocation size
++ *	Allocate enough pages to cover @size from the page level
++ *	allocator and map them into contiguous kernel virtual space.
++ *
++ *	For tight control over page level allocator and protection flags
++ *	use __vmalloc() instead.
++ */
++void *vmalloc(unsigned long size)
++{
++	return __vmalloc_node_flags(size, NUMA_NO_NODE,
++				    GFP_KERNEL);
++}
++EXPORT_SYMBOL(vmalloc);
++
++/**
++ *	vzalloc - allocate virtually contiguous memory with zero fill
++ *	@size:	allocation size
++ *	Allocate enough pages to cover @size from the page level
++ *	allocator and map them into contiguous kernel virtual space.
++ *	The memory allocated is set to zero.
++ *
++ *	For tight control over page level allocator and protection flags
++ *	use __vmalloc() instead.
++ */
++void *vzalloc(unsigned long size)
++{
++	return __vmalloc_node_flags(size, NUMA_NO_NODE,
++				GFP_KERNEL | __GFP_ZERO);
++}
++EXPORT_SYMBOL(vzalloc);
++
++/**
++ * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
++ * @size: allocation size
++ *
++ * The resulting memory area is zeroed so it can be mapped to userspace
++ * without leaking data.
++ */
++void *vmalloc_user(unsigned long size)
++{
++	struct vm_struct *area;
++	void *ret;
++
++	ret = __vmalloc_node(size, SHMLBA,
++			     GFP_KERNEL | __GFP_ZERO,
++			     PAGE_KERNEL, NUMA_NO_NODE,
++			     __builtin_return_address(0));
++	if (ret) {
++		area = find_vm_area(ret);
++		area->flags |= VM_USERMAP;
++	}
++	return ret;
++}
++EXPORT_SYMBOL(vmalloc_user);
++
++/**
++ *	vmalloc_node  -  allocate memory on a specific node
++ *	@size:		allocation size
++ *	@node:		numa node
++ *
++ *	Allocate enough pages to cover @size from the page level
++ *	allocator and map them into contiguous kernel virtual space.
++ *
++ *	For tight control over page level allocator and protection flags
++ *	use __vmalloc() instead.
++ */
++void *vmalloc_node(unsigned long size, int node)
++{
++	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
++					node, __builtin_return_address(0));
++}
++EXPORT_SYMBOL(vmalloc_node);
++
++/**
++ * vzalloc_node - allocate memory on a specific node with zero fill
++ * @size:	allocation size
++ * @node:	numa node
++ *
++ * Allocate enough pages to cover @size from the page level
++ * allocator and map them into contiguous kernel virtual space.
++ * The memory allocated is set to zero.
++ *
++ * For tight control over page level allocator and protection flags
++ * use __vmalloc_node() instead.
++ */
++void *vzalloc_node(unsigned long size, int node)
++{
++	return __vmalloc_node_flags(size, node,
++			 GFP_KERNEL | __GFP_ZERO);
++}
++EXPORT_SYMBOL(vzalloc_node);
++
++/**
++ *	vmalloc_exec  -  allocate virtually contiguous, executable memory
++ *	@size:		allocation size
++ *
++ *	Kernel-internal function to allocate enough pages to cover @size
++ *	the page level allocator and map them into contiguous and
++ *	executable kernel virtual space.
++ *
++ *	For tight control over page level allocator and protection flags
++ *	use __vmalloc() instead.
++ */
++
++void *vmalloc_exec(unsigned long size)
++{
++	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
++			      NUMA_NO_NODE, __builtin_return_address(0));
++}
++
++#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
++#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
++#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
++#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
++#else
++/*
++ * 64b systems should always have either DMA or DMA32 zones. For others
++ * GFP_DMA32 should do the right thing and use the normal zone.
++ */
++#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
++#endif
++
++/**
++ *	vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
++ *	@size:		allocation size
++ *
++ *	Allocate enough 32bit PA addressable pages to cover @size from the
++ *	page level allocator and map them into contiguous kernel virtual space.
++ */
++void *vmalloc_32(unsigned long size)
++{
++	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
++			      NUMA_NO_NODE, __builtin_return_address(0));
++}
++EXPORT_SYMBOL(vmalloc_32);
++
++/**
++ * vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
++ *	@size:		allocation size
++ *
++ * The resulting memory area is 32bit addressable and zeroed so it can be
++ * mapped to userspace without leaking data.
++ */
++void *vmalloc_32_user(unsigned long size)
++{
++	struct vm_struct *area;
++	void *ret;
++
++	ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
++			     NUMA_NO_NODE, __builtin_return_address(0));
++	if (ret) {
++		area = find_vm_area(ret);
++		area->flags |= VM_USERMAP;
++	}
++	return ret;
++}
++EXPORT_SYMBOL(vmalloc_32_user);
++
++/*
++ * small helper routine , copy contents to buf from addr.
++ * If the page is not present, fill zero.
++ */
++
++static int aligned_vread(char *buf, char *addr, unsigned long count)
++{
++	struct page *p;
++	int copied = 0;
++
++	while (count) {
++		unsigned long offset, length;
++
++		offset = offset_in_page(addr);
++		length = PAGE_SIZE - offset;
++		if (length > count)
++			length = count;
++		p = vmalloc_to_page(addr);
++		/*
++		 * To do safe access to this _mapped_ area, we need
++		 * lock. But adding lock here means that we need to add
++		 * overhead of vmalloc()/vfree() calles for this _debug_
++		 * interface, rarely used. Instead of that, we'll use
++		 * kmap() and get small overhead in this access function.
++		 */
++		if (p) {
++			/*
++			 * we can expect USER0 is not used (see vread/vwrite's
++			 * function description)
++			 */
++			void *map = kmap_atomic(p);
++			memcpy(buf, map + offset, length);
++			kunmap_atomic(map);
++		} else
++			memset(buf, 0, length);
++
++		addr += length;
++		buf += length;
++		copied += length;
++		count -= length;
++	}
++	return copied;
++}
++
++static int aligned_vwrite(char *buf, char *addr, unsigned long count)
++{
++	struct page *p;
++	int copied = 0;
++
++	while (count) {
++		unsigned long offset, length;
++
++		offset = offset_in_page(addr);
++		length = PAGE_SIZE - offset;
++		if (length > count)
++			length = count;
++		p = vmalloc_to_page(addr);
++		/*
++		 * To do safe access to this _mapped_ area, we need
++		 * lock. But adding lock here means that we need to add
++		 * overhead of vmalloc()/vfree() calles for this _debug_
++		 * interface, rarely used. Instead of that, we'll use
++		 * kmap() and get small overhead in this access function.
++		 */
++		if (p) {
++			/*
++			 * we can expect USER0 is not used (see vread/vwrite's
++			 * function description)
++			 */
++			void *map = kmap_atomic(p);
++			memcpy(map + offset, buf, length);
++			kunmap_atomic(map);
++		}
++		addr += length;
++		buf += length;
++		copied += length;
++		count -= length;
++	}
++	return copied;
++}
++
++/**
++ *	vread() -  read vmalloc area in a safe way.
++ *	@buf:		buffer for reading data
++ *	@addr:		vm address.
++ *	@count:		number of bytes to be read.
++ *
++ *	Returns # of bytes which addr and buf should be increased.
++ *	(same number to @count). Returns 0 if [addr...addr+count) doesn't
++ *	includes any intersect with alive vmalloc area.
++ *
++ *	This function checks that addr is a valid vmalloc'ed area, and
++ *	copy data from that area to a given buffer. If the given memory range
++ *	of [addr...addr+count) includes some valid address, data is copied to
++ *	proper area of @buf. If there are memory holes, they'll be zero-filled.
++ *	IOREMAP area is treated as memory hole and no copy is done.
++ *
++ *	If [addr...addr+count) doesn't includes any intersects with alive
++ *	vm_struct area, returns 0. @buf should be kernel's buffer.
++ *
++ *	Note: In usual ops, vread() is never necessary because the caller
++ *	should know vmalloc() area is valid and can use memcpy().
++ *	This is for routines which have to access vmalloc area without
++ *	any informaion, as /dev/kmem.
++ *
++ */
++
++long vread(char *buf, char *addr, unsigned long count)
++{
++	struct vmap_area *va;
++	struct vm_struct *vm;
++	char *vaddr, *buf_start = buf;
++	unsigned long buflen = count;
++	unsigned long n;
++
++	/* Don't allow overflow */
++	if ((unsigned long) addr + count < count)
++		count = -(unsigned long) addr;
++
++	spin_lock(&vmap_area_lock);
++	list_for_each_entry(va, &vmap_area_list, list) {
++		if (!count)
++			break;
++
++		if (!(va->flags & VM_VM_AREA))
++			continue;
++
++		vm = va->vm;
++		vaddr = (char *) vm->addr;
++		if (addr >= vaddr + get_vm_area_size(vm))
++			continue;
++		while (addr < vaddr) {
++			if (count == 0)
++				goto finished;
++			*buf = '\0';
++			buf++;
++			addr++;
++			count--;
++		}
++		n = vaddr + get_vm_area_size(vm) - addr;
++		if (n > count)
++			n = count;
++		if (!(vm->flags & VM_IOREMAP))
++			aligned_vread(buf, addr, n);
++		else /* IOREMAP area is treated as memory hole */
++			memset(buf, 0, n);
++		buf += n;
++		addr += n;
++		count -= n;
++	}
++finished:
++	spin_unlock(&vmap_area_lock);
++
++	if (buf == buf_start)
++		return 0;
++	/* zero-fill memory holes */
++	if (buf != buf_start + buflen)
++		memset(buf, 0, buflen - (buf - buf_start));
++
++	return buflen;
++}
++
++/**
++ *	vwrite() -  write vmalloc area in a safe way.
++ *	@buf:		buffer for source data
++ *	@addr:		vm address.
++ *	@count:		number of bytes to be read.
++ *
++ *	Returns # of bytes which addr and buf should be incresed.
++ *	(same number to @count).
++ *	If [addr...addr+count) doesn't includes any intersect with valid
++ *	vmalloc area, returns 0.
++ *
++ *	This function checks that addr is a valid vmalloc'ed area, and
++ *	copy data from a buffer to the given addr. If specified range of
++ *	[addr...addr+count) includes some valid address, data is copied from
++ *	proper area of @buf. If there are memory holes, no copy to hole.
++ *	IOREMAP area is treated as memory hole and no copy is done.
++ *
++ *	If [addr...addr+count) doesn't includes any intersects with alive
++ *	vm_struct area, returns 0. @buf should be kernel's buffer.
++ *
++ *	Note: In usual ops, vwrite() is never necessary because the caller
++ *	should know vmalloc() area is valid and can use memcpy().
++ *	This is for routines which have to access vmalloc area without
++ *	any informaion, as /dev/kmem.
++ */
++
++long vwrite(char *buf, char *addr, unsigned long count)
++{
++	struct vmap_area *va;
++	struct vm_struct *vm;
++	char *vaddr;
++	unsigned long n, buflen;
++	int copied = 0;
++
++	/* Don't allow overflow */
++	if ((unsigned long) addr + count < count)
++		count = -(unsigned long) addr;
++	buflen = count;
++
++	spin_lock(&vmap_area_lock);
++	list_for_each_entry(va, &vmap_area_list, list) {
++		if (!count)
++			break;
++
++		if (!(va->flags & VM_VM_AREA))
++			continue;
++
++		vm = va->vm;
++		vaddr = (char *) vm->addr;
++		if (addr >= vaddr + get_vm_area_size(vm))
++			continue;
++		while (addr < vaddr) {
++			if (count == 0)
++				goto finished;
++			buf++;
++			addr++;
++			count--;
++		}
++		n = vaddr + get_vm_area_size(vm) - addr;
++		if (n > count)
++			n = count;
++		if (!(vm->flags & VM_IOREMAP)) {
++			aligned_vwrite(buf, addr, n);
++			copied++;
++		}
++		buf += n;
++		addr += n;
++		count -= n;
++	}
++finished:
++	spin_unlock(&vmap_area_lock);
++	if (!copied)
++		return 0;
++	return buflen;
++}
++
++/**
++ *	remap_vmalloc_range_partial  -  map vmalloc pages to userspace
++ *	@vma:		vma to cover
++ *	@uaddr:		target user address to start at
++ *	@kaddr:		virtual address of vmalloc kernel memory
++ *	@pgoff:		offset from @kaddr to start at
++ *	@size:		size of map area
++ *
++ *	Returns:	0 for success, -Exxx on failure
++ *
++ *	This function checks that @kaddr is a valid vmalloc'ed area,
++ *	and that it is big enough to cover the range starting at
++ *	@uaddr in @vma. Will return failure if that criteria isn't
++ *	met.
++ *
++ *	Similar to remap_pfn_range() (see mm/memory.c)
++ */
++int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
++				void *kaddr, unsigned long pgoff,
++				unsigned long size)
++{
++	struct vm_struct *area;
++	unsigned long off;
++	unsigned long end_index;
++
++	if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
++		return -EINVAL;
++
++	size = PAGE_ALIGN(size);
++
++	if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
++		return -EINVAL;
++
++	area = find_vm_area(kaddr);
++	if (!area)
++		return -EINVAL;
++
++	if (!(area->flags & VM_USERMAP))
++		return -EINVAL;
++
++	if (check_add_overflow(size, off, &end_index) ||
++	    end_index > get_vm_area_size(area))
++		return -EINVAL;
++	kaddr += off;
++
++	do {
++		struct page *page = vmalloc_to_page(kaddr);
++		int ret;
++
++		ret = vm_insert_page(vma, uaddr, page);
++		if (ret)
++			return ret;
++
++		uaddr += PAGE_SIZE;
++		kaddr += PAGE_SIZE;
++		size -= PAGE_SIZE;
++	} while (size > 0);
++
++	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
++
++	return 0;
++}
++EXPORT_SYMBOL(remap_vmalloc_range_partial);
++
++/**
++ *	remap_vmalloc_range  -  map vmalloc pages to userspace
++ *	@vma:		vma to cover (map full range of vma)
++ *	@addr:		vmalloc memory
++ *	@pgoff:		number of pages into addr before first page to map
++ *
++ *	Returns:	0 for success, -Exxx on failure
++ *
++ *	This function checks that addr is a valid vmalloc'ed area, and
++ *	that it is big enough to cover the vma. Will return failure if
++ *	that criteria isn't met.
++ *
++ *	Similar to remap_pfn_range() (see mm/memory.c)
++ */
++int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
++						unsigned long pgoff)
++{
++	return remap_vmalloc_range_partial(vma, vma->vm_start,
++					   addr, pgoff,
++					   vma->vm_end - vma->vm_start);
++}
++EXPORT_SYMBOL(remap_vmalloc_range);
++
++/*
++ * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
++ * not to have one.
++ *
++ * The purpose of this function is to make sure the vmalloc area
++ * mappings are identical in all page-tables in the system.
++ */
++void __weak vmalloc_sync_mappings(void)
++{
++}
++
++void __weak vmalloc_sync_unmappings(void)
++{
++}
++
++static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
++{
++	pte_t ***p = data;
++
++	if (p) {
++		*(*p) = pte;
++		(*p)++;
++	}
++	return 0;
++}
++
++/**
++ *	alloc_vm_area - allocate a range of kernel address space
++ *	@size:		size of the area
++ *	@ptes:		returns the PTEs for the address space
++ *
++ *	Returns:	NULL on failure, vm_struct on success
++ *
++ *	This function reserves a range of kernel address space, and
++ *	allocates pagetables to map that range.  No actual mappings
++ *	are created.
++ *
++ *	If @ptes is non-NULL, pointers to the PTEs (in init_mm)
++ *	allocated for the VM area are returned.
++ */
++struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
++{
++	struct vm_struct *area;
++
++	area = get_vm_area_caller(size, VM_IOREMAP,
++				__builtin_return_address(0));
++	if (area == NULL)
++		return NULL;
++
++	/*
++	 * This ensures that page tables are constructed for this region
++	 * of kernel virtual address space and mapped into init_mm.
++	 */
++	if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
++				size, f, ptes ? &ptes : NULL)) {
++		free_vm_area(area);
++		return NULL;
++	}
++
++	return area;
++}
++EXPORT_SYMBOL_GPL(alloc_vm_area);
++
++void free_vm_area(struct vm_struct *area)
++{
++	struct vm_struct *ret;
++	ret = remove_vm_area(area->addr);
++	BUG_ON(ret != area);
++	kfree(area);
++}
++EXPORT_SYMBOL_GPL(free_vm_area);
++
++#ifdef CONFIG_SMP
++static struct vmap_area *node_to_va(struct rb_node *n)
++{
++	return rb_entry_safe(n, struct vmap_area, rb_node);
++}
++
++/**
++ * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
++ * @end: target address
++ * @pnext: out arg for the next vmap_area
++ * @pprev: out arg for the previous vmap_area
++ *
++ * Returns: %true if either or both of next and prev are found,
++ *	    %false if no vmap_area exists
++ *
++ * Find vmap_areas end addresses of which enclose @end.  ie. if not
++ * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
++ */
++static bool pvm_find_next_prev(unsigned long end,
++			       struct vmap_area **pnext,
++			       struct vmap_area **pprev)
++{
++	struct rb_node *n = vmap_area_root.rb_node;
++	struct vmap_area *va = NULL;
++
++	while (n) {
++		va = rb_entry(n, struct vmap_area, rb_node);
++		if (end < va->va_end)
++			n = n->rb_left;
++		else if (end > va->va_end)
++			n = n->rb_right;
++		else
++			break;
++	}
++
++	if (!va)
++		return false;
++
++	if (va->va_end > end) {
++		*pnext = va;
++		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
++	} else {
++		*pprev = va;
++		*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
++	}
++	return true;
++}
++
++/**
++ * pvm_determine_end - find the highest aligned address between two vmap_areas
++ * @pnext: in/out arg for the next vmap_area
++ * @pprev: in/out arg for the previous vmap_area
++ * @align: alignment
++ *
++ * Returns: determined end address
++ *
++ * Find the highest aligned address between *@pnext and *@pprev below
++ * VMALLOC_END.  *@pnext and *@pprev are adjusted so that the aligned
++ * down address is between the end addresses of the two vmap_areas.
++ *
++ * Please note that the address returned by this function may fall
++ * inside *@pnext vmap_area.  The caller is responsible for checking
++ * that.
++ */
++static unsigned long pvm_determine_end(struct vmap_area **pnext,
++				       struct vmap_area **pprev,
++				       unsigned long align)
++{
++	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
++	unsigned long addr;
++
++	if (*pnext)
++		addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
++	else
++		addr = vmalloc_end;
++
++	while (*pprev && (*pprev)->va_end > addr) {
++		*pnext = *pprev;
++		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
++	}
++
++	return addr;
++}
++
++/**
++ * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
++ * @offsets: array containing offset of each area
++ * @sizes: array containing size of each area
++ * @nr_vms: the number of areas to allocate
++ * @align: alignment, all entries in @offsets and @sizes must be aligned to this
++ *
++ * Returns: kmalloc'd vm_struct pointer array pointing to allocated
++ *	    vm_structs on success, %NULL on failure
++ *
++ * Percpu allocator wants to use congruent vm areas so that it can
++ * maintain the offsets among percpu areas.  This function allocates
++ * congruent vmalloc areas for it with GFP_KERNEL.  These areas tend to
++ * be scattered pretty far, distance between two areas easily going up
++ * to gigabytes.  To avoid interacting with regular vmallocs, these
++ * areas are allocated from top.
++ *
++ * Despite its complicated look, this allocator is rather simple.  It
++ * does everything top-down and scans areas from the end looking for
++ * matching slot.  While scanning, if any of the areas overlaps with
++ * existing vmap_area, the base address is pulled down to fit the
++ * area.  Scanning is repeated till all the areas fit and then all
++ * necessary data structures are inserted and the result is returned.
++ */
++struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
++				     const size_t *sizes, int nr_vms,
++				     size_t align)
++{
++	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
++	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
++	struct vmap_area **vas, *prev, *next;
++	struct vm_struct **vms;
++	int area, area2, last_area, term_area;
++	unsigned long base, start, end, last_end;
++	bool purged = false;
++
++	/* verify parameters and allocate data structures */
++	BUG_ON(offset_in_page(align) || !is_power_of_2(align));
++	for (last_area = 0, area = 0; area < nr_vms; area++) {
++		start = offsets[area];
++		end = start + sizes[area];
++
++		/* is everything aligned properly? */
++		BUG_ON(!IS_ALIGNED(offsets[area], align));
++		BUG_ON(!IS_ALIGNED(sizes[area], align));
++
++		/* detect the area with the highest address */
++		if (start > offsets[last_area])
++			last_area = area;
++
++		for (area2 = area + 1; area2 < nr_vms; area2++) {
++			unsigned long start2 = offsets[area2];
++			unsigned long end2 = start2 + sizes[area2];
++
++			BUG_ON(start2 < end && start < end2);
++		}
++	}
++	last_end = offsets[last_area] + sizes[last_area];
++
++	if (vmalloc_end - vmalloc_start < last_end) {
++		WARN_ON(true);
++		return NULL;
++	}
++
++	vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
++	vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
++	if (!vas || !vms)
++		goto err_free2;
++
++	for (area = 0; area < nr_vms; area++) {
++		vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
++		vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
++		if (!vas[area] || !vms[area])
++			goto err_free;
++	}
++retry:
++	spin_lock(&vmap_area_lock);
++
++	/* start scanning - we scan from the top, begin with the last area */
++	area = term_area = last_area;
++	start = offsets[area];
++	end = start + sizes[area];
++
++	if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
++		base = vmalloc_end - last_end;
++		goto found;
++	}
++	base = pvm_determine_end(&next, &prev, align) - end;
++
++	while (true) {
++		BUG_ON(next && next->va_end <= base + end);
++		BUG_ON(prev && prev->va_end > base + end);
++
++		/*
++		 * base might have underflowed, add last_end before
++		 * comparing.
++		 */
++		if (base + last_end < vmalloc_start + last_end) {
++			spin_unlock(&vmap_area_lock);
++			if (!purged) {
++				purge_vmap_area_lazy();
++				purged = true;
++				goto retry;
++			}
++			goto err_free;
++		}
++
++		/*
++		 * If next overlaps, move base downwards so that it's
++		 * right below next and then recheck.
++		 */
++		if (next && next->va_start < base + end) {
++			base = pvm_determine_end(&next, &prev, align) - end;
++			term_area = area;
++			continue;
++		}
++
++		/*
++		 * If prev overlaps, shift down next and prev and move
++		 * base so that it's right below new next and then
++		 * recheck.
++		 */
++		if (prev && prev->va_end > base + start)  {
++			next = prev;
++			prev = node_to_va(rb_prev(&next->rb_node));
++			base = pvm_determine_end(&next, &prev, align) - end;
++			term_area = area;
++			continue;
++		}
++
++		/*
++		 * This area fits, move on to the previous one.  If
++		 * the previous one is the terminal one, we're done.
++		 */
++		area = (area + nr_vms - 1) % nr_vms;
++		if (area == term_area)
++			break;
++		start = offsets[area];
++		end = start + sizes[area];
++		pvm_find_next_prev(base + end, &next, &prev);
++	}
++found:
++	/* we've found a fitting base, insert all va's */
++	for (area = 0; area < nr_vms; area++) {
++		struct vmap_area *va = vas[area];
++
++		va->va_start = base + offsets[area];
++		va->va_end = va->va_start + sizes[area];
++		__insert_vmap_area(va);
++	}
++
++	vmap_area_pcpu_hole = base + offsets[last_area];
++
++	spin_unlock(&vmap_area_lock);
++
++	/* insert all vm's */
++	for (area = 0; area < nr_vms; area++)
++		setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
++				 pcpu_get_vm_areas);
++
++	kfree(vas);
++	return vms;
++
++err_free:
++	for (area = 0; area < nr_vms; area++) {
++		kfree(vas[area]);
++		kfree(vms[area]);
++	}
++err_free2:
++	kfree(vas);
++	kfree(vms);
++	return NULL;
++}
++
++/**
++ * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
++ * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
++ * @nr_vms: the number of allocated areas
++ *
++ * Free vm_structs and the array allocated by pcpu_get_vm_areas().
++ */
++void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
++{
++	int i;
++
++	for (i = 0; i < nr_vms; i++)
++		free_vm_area(vms[i]);
++	kfree(vms);
++}
++#endif	/* CONFIG_SMP */
++
++#ifdef CONFIG_PROC_FS
++static void *s_start(struct seq_file *m, loff_t *pos)
++	__acquires(&vmap_area_lock)
++{
++	spin_lock(&vmap_area_lock);
++	return seq_list_start(&vmap_area_list, *pos);
++}
++
++static void *s_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	return seq_list_next(p, &vmap_area_list, pos);
++}
++
++static void s_stop(struct seq_file *m, void *p)
++	__releases(&vmap_area_lock)
++{
++	spin_unlock(&vmap_area_lock);
++}
++
++static void show_numa_info(struct seq_file *m, struct vm_struct *v)
++{
++	if (IS_ENABLED(CONFIG_NUMA)) {
++		unsigned int nr, *counters = m->private;
++
++		if (!counters)
++			return;
++
++		if (v->flags & VM_UNINITIALIZED)
++			return;
++		/* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
++		smp_rmb();
++
++		memset(counters, 0, nr_node_ids * sizeof(unsigned int));
++
++		for (nr = 0; nr < v->nr_pages; nr++)
++			counters[page_to_nid(v->pages[nr])]++;
++
++		for_each_node_state(nr, N_HIGH_MEMORY)
++			if (counters[nr])
++				seq_printf(m, " N%u=%u", nr, counters[nr]);
++	}
++}
++
++static int s_show(struct seq_file *m, void *p)
++{
++	struct vmap_area *va;
++	struct vm_struct *v;
++
++	va = list_entry(p, struct vmap_area, list);
++
++	/*
++	 * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
++	 * behalf of vmap area is being tear down or vm_map_ram allocation.
++	 */
++	if (!(va->flags & VM_VM_AREA)) {
++		seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
++			(void *)va->va_start, (void *)va->va_end,
++			va->va_end - va->va_start,
++			va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
++
++		return 0;
++	}
++
++	v = va->vm;
++
++	seq_printf(m, "0x%pK-0x%pK %7ld",
++		v->addr, v->addr + v->size, v->size);
++
++	if (v->caller)
++		seq_printf(m, " %pS", v->caller);
++
++	if (v->nr_pages)
++		seq_printf(m, " pages=%d", v->nr_pages);
++
++	if (v->phys_addr)
++		seq_printf(m, " phys=%pa", &v->phys_addr);
++
++	if (v->flags & VM_IOREMAP)
++		seq_puts(m, " ioremap");
++
++	if (v->flags & VM_ALLOC)
++		seq_puts(m, " vmalloc");
++
++	if (v->flags & VM_MAP)
++		seq_puts(m, " vmap");
++
++	if (v->flags & VM_USERMAP)
++		seq_puts(m, " user");
++
++	if (is_vmalloc_addr(v->pages))
++		seq_puts(m, " vpages");
++
++	show_numa_info(m, v);
++	seq_putc(m, '\n');
++	return 0;
++}
++
++static const struct seq_operations vmalloc_op = {
++	.start = s_start,
++	.next = s_next,
++	.stop = s_stop,
++	.show = s_show,
++};
++
++static int __init proc_vmalloc_init(void)
++{
++	if (IS_ENABLED(CONFIG_NUMA))
++		proc_create_seq_private("vmallocinfo", 0400, NULL,
++				&vmalloc_op,
++				nr_node_ids * sizeof(unsigned int), NULL);
++	else
++		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
++	return 0;
++}
++module_init(proc_vmalloc_init);
++
++#endif
++
diff --git a/kernel.spec b/kernel.spec
index cbe324ac4cc75dc5f2e2ce693cd3dcf8f27e9fd8..40d13d4fd07e9a10f21abec2c46436a23308a0da 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -24,7 +24,7 @@
 
 Name:	 kernel-xeno
 Version: 4.19.90
-Release: %{hulkrelease}.0053.3.xeno3.1
+Release: %{hulkrelease}.0053.4.xeno3.1
 Summary: Linux Kernel
 License: GPLv2
 URL:	 http://www.kernel.org/
@@ -36,13 +36,20 @@ Source0: linux-%{version}.tar.gz#/kernel.tar.gz
 Source10: sign-modules
 Source11: x509.genkey
 Source12: extra_certificates
-Patch0:  ipipe-core-4.19.55-oe1.patch
-Patch1:  enable_irq.patch 
-Patch2:  cobalt-core-3.1-4.19.90.patch
-Patch3:  cobalt-core-3.1-4.19.90-oe1.patch
+Patch0:  ipipe-core-4.19.55-oe1_arm64.patch
+Patch1:  enable_irq_arm64.patch 
+Patch2:  cobalt-core-3.1-4.19.90_arm64.patch
+Patch3:  cobalt-core-3.1-4.19.90-oe1_arm64.patch
 Patch4:  openeuler_defconfig_arm64.patch
 Patch5:  openeuler_defconfig_arm64_2.patch
 
+Patch1000:  ipipe-core-4.19.90-oe1_x86.patch
+Patch1001:  cobalt-core-3.1-4.19.90_x86.patch
+Patch1002:  cobalt-core-3.1-4.19.90-oe1_x86.patch
+Patch1003:  openeuler_defconfig_x86.patch
+Patch1004:  openeuler_defconfig_x86_2.patch
+
+
 %if 0%{?with_kabichk}
 Source18: check-kabi
 Source20: Module.kabi_aarch64
@@ -263,12 +270,22 @@ Applypatches()
 Applypatches series.conf %{_builddir}/kernel-%{version}/linux-%{KernelVer}
 %endif
 
+%ifarch aarch64
 %patch0 -p1
 %patch1 -p1
 %patch2 -p1
 %patch3 -p1
 %patch4 -p1
 %patch5 -p1
+%endif
+
+%ifarch x86_64
+%patch1000 -p1
+%patch1001 -p1
+%patch1002 -p1
+%patch1003 -p1
+%patch1004 -p1
+%endif
 
 touch .scmversion
 
@@ -815,6 +832,8 @@ fi
 %endif
 
 %changelog
+* Tue Jun 1 2021 dinglili <dinglili@kylinos.cn> - 4.19.90-2012.4.0.0053.4
+- modify src to support multi arch:arm x86
 * Tue May 11 2021 dinglili <dinglili@kylinos.cn> - 4.19.90-2012.4.0.0053.2
 - openeuler_defconfig:update the config to support xenomai
 - cobalt
diff --git a/openeuler_defconfig_x86.patch b/openeuler_defconfig_x86.patch
new file mode 100755
index 0000000000000000000000000000000000000000..6a4ef2a51b6312300eaf48da3c685001c179621b
--- /dev/null
+++ b/openeuler_defconfig_x86.patch
@@ -0,0 +1,7264 @@
+--- kernel/arch/x86/configs/openeuler_defconfig	2020-12-21 21:59:17.000000000 +0800
++++ kernel-new/arch/x86/configs/openeuler_defconfig	2021-04-29 14:39:17.056189482 +0800
+@@ -1,135 +1,24 @@
+-CONFIG_CC_IS_GCC=y
+-CONFIG_GCC_VERSION=50400
+-CONFIG_CLANG_VERSION=0
+-CONFIG_CC_HAS_ASM_GOTO=y
+-CONFIG_IRQ_WORK=y
+-CONFIG_BUILDTIME_EXTABLE_SORT=y
+-CONFIG_THREAD_INFO_IN_TASK=y
+-
+-#
+-# General setup
+-#
+-CONFIG_INIT_ENV_ARG_LIMIT=32
+-# CONFIG_COMPILE_TEST is not set
+-CONFIG_LOCALVERSION=""
+ # CONFIG_LOCALVERSION_AUTO is not set
+-CONFIG_BUILD_SALT=""
+-CONFIG_HAVE_KERNEL_GZIP=y
+-CONFIG_HAVE_KERNEL_BZIP2=y
+-CONFIG_HAVE_KERNEL_LZMA=y
+-CONFIG_HAVE_KERNEL_XZ=y
+-CONFIG_HAVE_KERNEL_LZO=y
+-CONFIG_HAVE_KERNEL_LZ4=y
+-CONFIG_KERNEL_GZIP=y
+-# CONFIG_KERNEL_BZIP2 is not set
+-# CONFIG_KERNEL_LZMA is not set
+-# CONFIG_KERNEL_XZ is not set
+-# CONFIG_KERNEL_LZO is not set
+-# CONFIG_KERNEL_LZ4 is not set
+-CONFIG_DEFAULT_HOSTNAME="(none)"
+-CONFIG_SWAP=y
+ CONFIG_SYSVIPC=y
+-CONFIG_SYSVIPC_SYSCTL=y
+ CONFIG_POSIX_MQUEUE=y
+-CONFIG_POSIX_MQUEUE_SYSCTL=y
+-CONFIG_CROSS_MEMORY_ATTACH=y
+ # CONFIG_USELIB is not set
+-CONFIG_AUDIT=y
+-CONFIG_HAVE_ARCH_AUDITSYSCALL=y
+-CONFIG_AUDITSYSCALL=y
+-CONFIG_AUDIT_WATCH=y
+-CONFIG_AUDIT_TREE=y
+ # CONFIG_KTASK is not set
+-
+-#
+-# IRQ subsystem
+-#
+-CONFIG_GENERIC_IRQ_PROBE=y
+-CONFIG_GENERIC_IRQ_SHOW=y
+-CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y
+-CONFIG_GENERIC_PENDING_IRQ=y
+-CONFIG_GENERIC_IRQ_MIGRATION=y
+-CONFIG_IRQ_DOMAIN=y
+-CONFIG_IRQ_DOMAIN_HIERARCHY=y
+-CONFIG_GENERIC_MSI_IRQ=y
+-CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
+-CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y
+-CONFIG_GENERIC_IRQ_RESERVATION_MODE=y
+-CONFIG_IRQ_FORCED_THREADING=y
+-CONFIG_SPARSE_IRQ=y
+-# CONFIG_GENERIC_IRQ_DEBUGFS is not set
+-CONFIG_CLOCKSOURCE_WATCHDOG=y
+-CONFIG_ARCH_CLOCKSOURCE_DATA=y
+-CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
+-CONFIG_GENERIC_TIME_VSYSCALL=y
+-CONFIG_GENERIC_CLOCKEVENTS=y
+-CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+-CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
+-CONFIG_GENERIC_CMOS_UPDATE=y
+-
+-#
+-# Timers subsystem
+-#
+-CONFIG_TICK_ONESHOT=y
+-CONFIG_NO_HZ_COMMON=y
+-# CONFIG_HZ_PERIODIC is not set
+-# CONFIG_NO_HZ_IDLE is not set
+-CONFIG_NO_HZ_FULL=y
+ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+-# CONFIG_PREEMPT_NONE is not set
+-CONFIG_PREEMPT_VOLUNTARY=y
+-# CONFIG_PREEMPT is not set
+-
+-#
+-# CPU/Task time and stats accounting
+-#
+-CONFIG_VIRT_CPU_ACCOUNTING=y
+-CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
++CONFIG_PREEMPT=y
+ CONFIG_IRQ_TIME_ACCOUNTING=y
+-CONFIG_HAVE_SCHED_AVG_IRQ=y
+ CONFIG_BSD_PROCESS_ACCT=y
+ CONFIG_BSD_PROCESS_ACCT_V3=y
+-CONFIG_TASKSTATS=y
+-CONFIG_TASK_DELAY_ACCT=y
+ CONFIG_TASK_XACCT=y
+ CONFIG_TASK_IO_ACCOUNTING=y
+-CONFIG_CPU_ISOLATION=y
+-
+-#
+-# RCU Subsystem
+-#
+-CONFIG_TREE_RCU=y
+-# CONFIG_RCU_EXPERT is not set
+-CONFIG_SRCU=y
+-CONFIG_TREE_SRCU=y
+-CONFIG_RCU_STALL_COMMON=y
+-CONFIG_RCU_NEED_SEGCBLIST=y
+-CONFIG_CONTEXT_TRACKING=y
+-# CONFIG_CONTEXT_TRACKING_FORCE is not set
+-CONFIG_RCU_NOCB_CPU=y
+-CONFIG_BUILD_BIN2C=y
+-# CONFIG_IKCONFIG is not set
++CONFIG_IKCONFIG=y
++CONFIG_IKCONFIG_PROC=y
+ CONFIG_LOG_BUF_SHIFT=20
+-CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
+-CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13
+-CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+-CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
+-CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
+-CONFIG_ARCH_SUPPORTS_INT128=y
+-CONFIG_NUMA_BALANCING=y
+-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
+ CONFIG_CGROUPS=y
+-CONFIG_PAGE_COUNTER=y
+ CONFIG_MEMCG=y
+ CONFIG_MEMCG_SWAP=y
+-CONFIG_MEMCG_SWAP_ENABLED=y
+-CONFIG_MEMCG_KMEM=y
+ CONFIG_BLK_CGROUP=y
+-# CONFIG_DEBUG_BLK_CGROUP is not set
+-CONFIG_CGROUP_WRITEBACK=y
+ CONFIG_CGROUP_SCHED=y
+-CONFIG_FAIR_GROUP_SCHED=y
+ CONFIG_CFS_BANDWIDTH=y
+ CONFIG_RT_GROUP_SCHED=y
+ CONFIG_CGROUP_PIDS=y
+@@ -137,913 +26,167 @@
+ CONFIG_CGROUP_FREEZER=y
+ CONFIG_CGROUP_HUGETLB=y
+ CONFIG_CPUSETS=y
+-CONFIG_PROC_PID_CPUSET=y
+ CONFIG_CGROUP_DEVICE=y
+ CONFIG_CGROUP_CPUACCT=y
+ CONFIG_CGROUP_PERF=y
+ CONFIG_CGROUP_BPF=y
+-# CONFIG_CGROUP_DEBUG is not set
+-CONFIG_SOCK_CGROUP_DATA=y
+-# CONFIG_CGROUP_FILES is not set
+-CONFIG_NAMESPACES=y
+-CONFIG_UTS_NS=y
+-CONFIG_IPC_NS=y
+ CONFIG_USER_NS=y
+-CONFIG_PID_NS=y
+-CONFIG_NET_NS=y
+ CONFIG_CHECKPOINT_RESTORE=y
+-CONFIG_SCHED_AUTOGROUP=y
+-# CONFIG_SYSFS_DEPRECATED is not set
+-CONFIG_RELAY=y
+ CONFIG_BLK_DEV_INITRD=y
+-CONFIG_INITRAMFS_SOURCE=""
+-CONFIG_RD_GZIP=y
+-CONFIG_RD_BZIP2=y
+-CONFIG_RD_LZMA=y
+-CONFIG_RD_XZ=y
+-CONFIG_RD_LZO=y
+-CONFIG_RD_LZ4=y
+-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+-CONFIG_SYSCTL=y
+-CONFIG_ANON_INODES=y
+-CONFIG_HAVE_UID16=y
+-CONFIG_SYSCTL_EXCEPTION_TRACE=y
+-CONFIG_HAVE_PCSPKR_PLATFORM=y
+-CONFIG_BPF=y
+-# CONFIG_EXPERT is not set
+-CONFIG_UID16=y
+-CONFIG_MULTIUSER=y
+-CONFIG_SGETMASK_SYSCALL=y
+-CONFIG_SYSFS_SYSCALL=y
+-CONFIG_FHANDLE=y
+-CONFIG_POSIX_TIMERS=y
+-CONFIG_PRINTK=y
+-CONFIG_PRINTK_NMI=y
+-CONFIG_BUG=y
+-CONFIG_ELF_CORE=y
+-CONFIG_PCSPKR_PLATFORM=y
+-CONFIG_BASE_FULL=y
+-CONFIG_FUTEX=y
+-CONFIG_FUTEX_PI=y
+-CONFIG_EPOLL=y
+-CONFIG_SIGNALFD=y
+-CONFIG_TIMERFD=y
+-CONFIG_EVENTFD=y
+-CONFIG_SHMEM=y
+-CONFIG_AIO=y
+-CONFIG_ADVISE_SYSCALLS=y
+-CONFIG_MEMBARRIER=y
+-CONFIG_KALLSYMS=y
+ CONFIG_KALLSYMS_ALL=y
+-CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
+-CONFIG_KALLSYMS_BASE_RELATIVE=y
+ CONFIG_BPF_SYSCALL=y
+ CONFIG_BPF_JIT_ALWAYS_ON=y
+ CONFIG_USERFAULTFD=y
+-CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
+-CONFIG_RSEQ=y
+-# CONFIG_EMBEDDED is not set
+-CONFIG_HAVE_PERF_EVENTS=y
+-
+-#
+-# Kernel Performance Events And Counters
+-#
+-CONFIG_PERF_EVENTS=y
+-# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
+-CONFIG_VM_EVENT_COUNTERS=y
+-CONFIG_SLUB_DEBUG=y
+ # CONFIG_COMPAT_BRK is not set
+-# CONFIG_SLAB is not set
+-CONFIG_SLUB=y
+-CONFIG_SLAB_MERGE_DEFAULT=y
+ CONFIG_SLAB_FREELIST_RANDOM=y
+-# CONFIG_SLAB_FREELIST_HARDENED is not set
+-CONFIG_SLUB_CPU_PARTIAL=y
+-CONFIG_SYSTEM_DATA_VERIFICATION=y
+ CONFIG_PROFILING=y
+-CONFIG_TRACEPOINTS=y
+-CONFIG_64BIT=y
+-CONFIG_X86_64=y
+-CONFIG_X86=y
+-CONFIG_INSTRUCTION_DECODER=y
+-CONFIG_OUTPUT_FORMAT="elf64-x86-64"
+-CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
+-CONFIG_LOCKDEP_SUPPORT=y
+-CONFIG_STACKTRACE_SUPPORT=y
+-CONFIG_MMU=y
+-CONFIG_ARCH_MMAP_RND_BITS_MIN=28
+-CONFIG_ARCH_MMAP_RND_BITS_MAX=32
+-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
+-CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
+-CONFIG_GENERIC_ISA_DMA=y
+-CONFIG_GENERIC_BUG=y
+-CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+-CONFIG_GENERIC_HWEIGHT=y
+-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+-CONFIG_GENERIC_CALIBRATE_DELAY=y
+-CONFIG_ARCH_HAS_CPU_RELAX=y
+-CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+-CONFIG_ARCH_HAS_FILTER_PGPROT=y
+-CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+-CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
+-CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
+-CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+-CONFIG_ARCH_SUSPEND_POSSIBLE=y
+-CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
+-CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
+-CONFIG_ZONE_DMA32=y
+-CONFIG_AUDIT_ARCH=y
+-CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+-CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
+-CONFIG_HAVE_INTEL_TXT=y
+-CONFIG_X86_64_SMP=y
+-CONFIG_ARCH_SUPPORTS_UPROBES=y
+-CONFIG_FIX_EARLYCON_MEM=y
+-CONFIG_DYNAMIC_PHYSICAL_MASK=y
+-CONFIG_PGTABLE_LEVELS=5
+-CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
+-
+-#
+-# Processor type and features
+-#
+-CONFIG_ZONE_DMA=y
+ CONFIG_SMP=y
+-CONFIG_X86_FEATURE_NAMES=y
+ CONFIG_X86_X2APIC=y
+-CONFIG_X86_MPPARSE=y
+-# CONFIG_GOLDFISH is not set
+-CONFIG_RETPOLINE=y
+-# CONFIG_INTEL_RDT is not set
+-CONFIG_X86_EXTENDED_PLATFORM=y
+-# CONFIG_X86_NUMACHIP is not set
+-# CONFIG_X86_VSMP is not set
+-CONFIG_X86_UV=y
+-# CONFIG_X86_GOLDFISH is not set
+-# CONFIG_X86_INTEL_MID is not set
++CONFIG_INTEL_RDT=y
+ CONFIG_X86_INTEL_LPSS=y
+ CONFIG_X86_AMD_PLATFORM_DEVICE=y
+-CONFIG_IOSF_MBI=y
+-# CONFIG_IOSF_MBI_DEBUG is not set
+-CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
+-CONFIG_SCHED_OMIT_FRAME_POINTER=y
+ CONFIG_HYPERVISOR_GUEST=y
+-CONFIG_PARAVIRT=y
+-# CONFIG_PARAVIRT_DEBUG is not set
+-CONFIG_PARAVIRT_SPINLOCKS=y
+-# CONFIG_QUEUED_LOCK_STAT is not set
+-CONFIG_XEN=y
+-# CONFIG_XEN_PV is not set
+-CONFIG_XEN_PVHVM=y
+-CONFIG_XEN_PVHVM_SMP=y
+-CONFIG_XEN_SAVE_RESTORE=y
+-# CONFIG_XEN_DEBUG_FS is not set
+-# CONFIG_XEN_PVH is not set
+-CONFIG_KVM_GUEST=y
+-# CONFIG_KVM_DEBUG_FS is not set
+-CONFIG_PARAVIRT_TIME_ACCOUNTING=y
+-CONFIG_PARAVIRT_CLOCK=y
+-# CONFIG_JAILHOUSE_GUEST is not set
+-CONFIG_NO_BOOTMEM=y
+-# CONFIG_MK8 is not set
+-# CONFIG_MPSC is not set
+-# CONFIG_MCORE2 is not set
+-# CONFIG_MATOM is not set
+-CONFIG_GENERIC_CPU=y
+-CONFIG_X86_INTERNODE_CACHE_SHIFT=6
+-CONFIG_X86_L1_CACHE_SHIFT=6
+-CONFIG_X86_TSC=y
+-CONFIG_X86_CMPXCHG64=y
+-CONFIG_X86_CMOV=y
+-CONFIG_X86_MINIMUM_CPU_FAMILY=64
+-CONFIG_X86_DEBUGCTLMSR=y
+-CONFIG_CPU_SUP_INTEL=y
+-CONFIG_CPU_SUP_AMD=y
+-CONFIG_CPU_SUP_CENTAUR=y
+-CONFIG_HPET_TIMER=y
+-CONFIG_HPET_EMULATE_RTC=y
+-CONFIG_DMI=y
+-# CONFIG_GART_IOMMU is not set
+-# CONFIG_CALGARY_IOMMU is not set
+-CONFIG_MAXSMP=y
+-CONFIG_NR_CPUS_RANGE_BEGIN=8192
+-CONFIG_NR_CPUS_RANGE_END=8192
+-CONFIG_NR_CPUS_DEFAULT=8192
+-CONFIG_NR_CPUS=8192
+-CONFIG_SCHED_SMT=y
+-CONFIG_SCHED_MC=y
+-CONFIG_SCHED_MC_PRIO=y
+-CONFIG_X86_LOCAL_APIC=y
+-CONFIG_X86_IO_APIC=y
++CONFIG_MCORE2=y
++# CONFIG_SCHED_MC_PRIO is not set
+ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+-CONFIG_X86_MCE=y
+ CONFIG_X86_MCELOG_LEGACY=y
+-CONFIG_X86_MCE_INTEL=y
+-CONFIG_X86_MCE_AMD=y
+-CONFIG_X86_MCE_THRESHOLD=y
+ CONFIG_X86_MCE_INJECT=m
+-CONFIG_X86_THERMAL_VECTOR=y
+-
+-#
+-# Performance monitoring
+-#
+ CONFIG_PERF_EVENTS_INTEL_UNCORE=m
+ CONFIG_PERF_EVENTS_INTEL_RAPL=m
+ CONFIG_PERF_EVENTS_INTEL_CSTATE=m
+ CONFIG_PERF_EVENTS_AMD_POWER=m
+-CONFIG_X86_16BIT=y
+-CONFIG_X86_ESPFIX64=y
+-CONFIG_X86_VSYSCALL_EMULATION=y
+ CONFIG_I8K=m
+-CONFIG_MICROCODE=y
+-CONFIG_MICROCODE_INTEL=y
+ CONFIG_MICROCODE_AMD=y
+-CONFIG_MICROCODE_OLD_INTERFACE=y
+ CONFIG_X86_MSR=y
+ CONFIG_X86_CPUID=y
+ CONFIG_X86_5LEVEL=y
+-CONFIG_X86_DIRECT_GBPAGES=y
+-CONFIG_ARCH_HAS_MEM_ENCRYPT=y
+ CONFIG_AMD_MEM_ENCRYPT=y
+ # CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
+ CONFIG_NUMA=y
+-# CONFIG_NUMA_AWARE_SPINLOCKS is not set
+-CONFIG_AMD_NUMA=y
+-CONFIG_X86_64_ACPI_NUMA=y
+-CONFIG_NODES_SPAN_OTHER_NODES=y
+ CONFIG_NUMA_EMU=y
+ CONFIG_NODES_SHIFT=10
+-CONFIG_ARCH_SPARSEMEM_ENABLE=y
+-CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+-# CONFIG_ARCH_MEMORY_PROBE is not set
+-CONFIG_ARCH_PROC_KCORE_TEXT=y
+-CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
+-CONFIG_X86_PMEM_LEGACY_DEVICE=y
+ CONFIG_X86_PMEM_LEGACY=m
+ CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+ # CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK is not set
+-CONFIG_X86_RESERVE_LOW=64
+-CONFIG_MTRR=y
+-CONFIG_MTRR_SANITIZER=y
+ CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1
+-CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
+-CONFIG_X86_PAT=y
+-CONFIG_ARCH_USES_PG_UNCACHED=y
+-CONFIG_ARCH_RANDOM=y
+-CONFIG_X86_SMAP=y
+-CONFIG_X86_INTEL_UMIP=y
+-# CONFIG_X86_INTEL_MPX is not set
+-CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
+-CONFIG_X86_INTEL_TSX_MODE_OFF=y
+-# CONFIG_X86_INTEL_TSX_MODE_ON is not set
+-# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
+ CONFIG_EFI=y
+ CONFIG_EFI_STUB=y
+ CONFIG_EFI_MIXED=y
+-CONFIG_SECCOMP=y
+-# CONFIG_HZ_100 is not set
+-# CONFIG_HZ_250 is not set
+-# CONFIG_HZ_300 is not set
+-CONFIG_HZ_1000=y
+-CONFIG_HZ=1000
+-CONFIG_SCHED_HRTICK=y
+ CONFIG_KEXEC=y
+ CONFIG_KEXEC_FILE=y
+-CONFIG_ARCH_HAS_KEXEC_PURGATORY=y
+ CONFIG_KEXEC_VERIFY_SIG=y
+ CONFIG_KEXEC_BZIMAGE_VERIFY_SIG=y
+ CONFIG_CRASH_DUMP=y
+ CONFIG_KEXEC_JUMP=y
+-CONFIG_PHYSICAL_START=0x1000000
+-CONFIG_RELOCATABLE=y
+-CONFIG_RANDOMIZE_BASE=y
+-CONFIG_X86_NEED_RELOCS=y
+-CONFIG_PHYSICAL_ALIGN=0x200000
+-CONFIG_DYNAMIC_MEMORY_LAYOUT=y
+-CONFIG_RANDOMIZE_MEMORY=y
+-CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa
+-CONFIG_HOTPLUG_CPU=y
+ CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
+-# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
+-# CONFIG_COMPAT_VDSO is not set
+-CONFIG_LEGACY_VSYSCALL_EMULATE=y
+-# CONFIG_LEGACY_VSYSCALL_NONE is not set
+-# CONFIG_CMDLINE_BOOL is not set
+-CONFIG_MODIFY_LDT_SYSCALL=y
+-CONFIG_HAVE_LIVEPATCH_FTRACE=y
+-CONFIG_HAVE_LIVEPATCH_WO_FTRACE=y
+-
+-#
+-# Enable Livepatch
+-#
+ CONFIG_LIVEPATCH=y
+-# CONFIG_LIVEPATCH_FTRACE is not set
+-CONFIG_LIVEPATCH_WO_FTRACE=y
+-CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY=y
+-# CONFIG_LIVEPATCH_STACK is not set
+-CONFIG_LIVEPATCH_RESTRICT_KPROBE=y
+-CONFIG_ARCH_HAS_ADD_PAGES=y
+-CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+-CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+-CONFIG_USE_PERCPU_NUMA_NODE_ID=y
+-CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
+-CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
+-CONFIG_ARCH_ENABLE_THP_MIGRATION=y
+-
+-#
+-# Power management and ACPI options
+-#
+-CONFIG_ARCH_HIBERNATION_HEADER=y
+-CONFIG_SUSPEND=y
+-CONFIG_SUSPEND_FREEZER=y
+-CONFIG_HIBERNATE_CALLBACKS=y
+ CONFIG_HIBERNATION=y
+-CONFIG_PM_STD_PARTITION=""
+-CONFIG_PM_SLEEP=y
+-CONFIG_PM_SLEEP_SMP=y
+-# CONFIG_PM_AUTOSLEEP is not set
+-# CONFIG_PM_WAKELOCKS is not set
+-CONFIG_PM=y
+ CONFIG_PM_DEBUG=y
+-# CONFIG_PM_ADVANCED_DEBUG is not set
+-# CONFIG_PM_TEST_SUSPEND is not set
+-CONFIG_PM_SLEEP_DEBUG=y
+-# CONFIG_PM_TRACE_RTC is not set
+-CONFIG_PM_CLK=y
+-CONFIG_PM_GENERIC_DOMAINS=y
+-# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set
+-CONFIG_PM_GENERIC_DOMAINS_SLEEP=y
+-CONFIG_ARCH_SUPPORTS_ACPI=y
+-CONFIG_ACPI=y
+-CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
+-CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
+-CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
+-# CONFIG_ACPI_DEBUGGER is not set
+-CONFIG_ACPI_SPCR_TABLE=y
+-CONFIG_ACPI_LPIT=y
+-CONFIG_ACPI_SLEEP=y
+-# CONFIG_ACPI_PROCFS_POWER is not set
+-CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
+ CONFIG_ACPI_EC_DEBUGFS=m
+-CONFIG_ACPI_AC=y
+-CONFIG_ACPI_BATTERY=y
+-CONFIG_ACPI_BUTTON=y
+-CONFIG_ACPI_VIDEO=m
+-CONFIG_ACPI_FAN=y
+ CONFIG_ACPI_TAD=m
+ CONFIG_ACPI_DOCK=y
+-CONFIG_ACPI_CPU_FREQ_PSS=y
+-CONFIG_ACPI_PROCESSOR_CSTATE=y
+-CONFIG_ACPI_PROCESSOR_IDLE=y
+-CONFIG_ACPI_CPPC_LIB=y
+-CONFIG_ACPI_PROCESSOR=y
++# CONFIG_ACPI_PROCESSOR is not set
+ CONFIG_ACPI_IPMI=m
+-CONFIG_ACPI_HOTPLUG_CPU=y
+-CONFIG_ACPI_PROCESSOR_AGGREGATOR=m
+-CONFIG_ACPI_THERMAL=y
+-CONFIG_ACPI_NUMA=y
+-CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
+-CONFIG_ACPI_TABLE_UPGRADE=y
+-# CONFIG_ACPI_DEBUG is not set
+ CONFIG_ACPI_PCI_SLOT=y
+-CONFIG_ACPI_CONTAINER=y
+ CONFIG_ACPI_HOTPLUG_MEMORY=y
+-CONFIG_ACPI_HOTPLUG_IOAPIC=y
+ CONFIG_ACPI_SBS=m
+-CONFIG_ACPI_HED=y
+-# CONFIG_ACPI_CUSTOM_METHOD is not set
+ CONFIG_ACPI_BGRT=y
+ CONFIG_ACPI_NFIT=m
+-CONFIG_HAVE_ACPI_APEI=y
+-CONFIG_HAVE_ACPI_APEI_NMI=y
+ CONFIG_ACPI_APEI=y
+ CONFIG_ACPI_APEI_GHES=y
+ CONFIG_ACPI_APEI_PCIEAER=y
+ CONFIG_ACPI_APEI_MEMORY_FAILURE=y
+ CONFIG_ACPI_APEI_EINJ=m
+-# CONFIG_ACPI_APEI_ERST_DEBUG is not set
+ CONFIG_DPTF_POWER=m
+-CONFIG_ACPI_WATCHDOG=y
+ CONFIG_ACPI_EXTLOG=m
+ CONFIG_PMIC_OPREGION=y
+-# CONFIG_ACPI_CONFIGFS is not set
+-CONFIG_X86_PM_TIMER=y
+ CONFIG_SFI=y
+-
+-#
+-# CPU Frequency scaling
+-#
+-CONFIG_CPU_FREQ=y
+-CONFIG_CPU_FREQ_GOV_ATTR_SET=y
+-CONFIG_CPU_FREQ_GOV_COMMON=y
+-CONFIG_CPU_FREQ_STAT=y
+-CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+-# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+-# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+-# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+-# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+-# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set
+-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+-CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+-CONFIG_CPU_FREQ_GOV_USERSPACE=y
+-CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+-CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+-# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set
+-
+-#
+-# CPU frequency scaling drivers
+-#
+-CONFIG_X86_INTEL_PSTATE=y
+-# CONFIG_X86_PCC_CPUFREQ is not set
+-CONFIG_X86_ACPI_CPUFREQ=m
+-CONFIG_X86_ACPI_CPUFREQ_CPB=y
+-CONFIG_X86_POWERNOW_K8=m
+-CONFIG_X86_AMD_FREQ_SENSITIVITY=m
+-# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
+-CONFIG_X86_P4_CLOCKMOD=m
+-
+-#
+-# shared options
+-#
+-CONFIG_X86_SPEEDSTEP_LIB=m
+-
+-#
+-# CPU Idle
+-#
+-CONFIG_CPU_IDLE=y
+-# CONFIG_CPU_IDLE_GOV_LADDER is not set
+-CONFIG_CPU_IDLE_GOV_MENU=y
+-CONFIG_INTEL_IDLE=y
+-
+-#
+-# Bus options (PCI etc.)
+-#
+-CONFIG_PCI=y
+-CONFIG_PCI_DIRECT=y
+-CONFIG_PCI_MMCONFIG=y
+-CONFIG_PCI_XEN=y
+-CONFIG_PCI_DOMAINS=y
+-CONFIG_MMCONF_FAM10H=y
++# CONFIG_CPU_IDLE is not set
+ CONFIG_PCIEPORTBUS=y
+ CONFIG_HOTPLUG_PCI_PCIE=y
+-CONFIG_PCIEAER=y
+ CONFIG_PCIEAER_INJECT=m
+ CONFIG_PCIE_ECRC=y
+-CONFIG_PCIEASPM=y
+-# CONFIG_PCIEASPM_DEBUG is not set
+-CONFIG_PCIEASPM_DEFAULT=y
+-# CONFIG_PCIEASPM_POWERSAVE is not set
+-# CONFIG_PCIEASPM_POWER_SUPERSAVE is not set
+-# CONFIG_PCIEASPM_PERFORMANCE is not set
+-CONFIG_PCIE_PME=y
+ CONFIG_PCIE_DPC=y
+-# CONFIG_PCIE_PTM is not set
+-CONFIG_PCI_MSI=y
+-CONFIG_PCI_MSI_IRQ_DOMAIN=y
+-CONFIG_PCI_QUIRKS=y
+-# CONFIG_PCI_DEBUG is not set
+-# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
+ CONFIG_PCI_STUB=y
+ CONFIG_PCI_PF_STUB=m
+-# CONFIG_XEN_PCIDEV_FRONTEND is not set
+-CONFIG_PCI_ATS=y
+-CONFIG_PCI_LOCKLESS_CONFIG=y
+-CONFIG_PCI_IOV=y
+-CONFIG_PCI_PRI=y
+-CONFIG_PCI_PASID=y
+-CONFIG_PCI_LABEL=y
+-CONFIG_PCI_HYPERV=m
+ CONFIG_HOTPLUG_PCI=y
+ CONFIG_HOTPLUG_PCI_ACPI=y
+ CONFIG_HOTPLUG_PCI_ACPI_IBM=m
+-# CONFIG_HOTPLUG_PCI_CPCI is not set
+ CONFIG_HOTPLUG_PCI_SHPC=y
+-
+-#
+-# PCI controller drivers
+-#
+-
+-#
+-# Cadence PCIe controllers support
+-#
+ CONFIG_VMD=y
+-
+-#
+-# DesignWare PCI Core Support
+-#
+-# CONFIG_PCIE_DW_PLAT_HOST is not set
+-# CONFIG_HISILICON_PCIE_CAE is not set
+-
+-#
+-# PCI Endpoint
+-#
+-# CONFIG_PCI_ENDPOINT is not set
+-
+-#
+-# PCI switch controller drivers
+-#
+-# CONFIG_PCI_SW_SWITCHTEC is not set
+-CONFIG_ISA_DMA_API=y
+-CONFIG_AMD_NB=y
+ CONFIG_PCCARD=y
+-# CONFIG_PCMCIA is not set
+-CONFIG_CARDBUS=y
+-
+-#
+-# PC-card bridges
+-#
++CONFIG_PCMCIA=m
+ CONFIG_YENTA=m
+-CONFIG_YENTA_O2=y
+-CONFIG_YENTA_RICOH=y
+-CONFIG_YENTA_TI=y
+-CONFIG_YENTA_ENE_TUNE=y
+-CONFIG_YENTA_TOSHIBA=y
+-# CONFIG_RAPIDIO is not set
+-# CONFIG_X86_SYSFB is not set
+-
+-#
+-# Binary Emulations
+-#
+ CONFIG_IA32_EMULATION=y
+-# CONFIG_X86_X32 is not set
+-CONFIG_COMPAT_32=y
+-CONFIG_COMPAT=y
+-CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
+-CONFIG_SYSVIPC_COMPAT=y
+-CONFIG_X86_DEV_DMA_OPS=y
+-CONFIG_HAVE_GENERIC_GUP=y
+-
+-#
+-# Firmware Drivers
+-#
+ CONFIG_EDD=m
+-# CONFIG_EDD_OFF is not set
+-CONFIG_FIRMWARE_MEMMAP=y
+ CONFIG_DELL_RBU=m
+ CONFIG_DCDBAS=m
+-CONFIG_DMIID=y
+ CONFIG_DMI_SYSFS=y
+-CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
+-CONFIG_ISCSI_IBFT_FIND=y
+ CONFIG_ISCSI_IBFT=m
+ CONFIG_FW_CFG_SYSFS=y
+-# CONFIG_FW_CFG_SYSFS_CMDLINE is not set
+-# CONFIG_GOOGLE_FIRMWARE is not set
+-
+-#
+-# EFI (Extensible Firmware Interface) Support
+-#
+-#CONFIG_EFI_VARS is not set
+-CONFIG_EFI_ESRT=y
+-CONFIG_EFI_RUNTIME_MAP=y
+-# CONFIG_EFI_FAKE_MEMMAP is not set
+-CONFIG_EFI_RUNTIME_WRAPPERS=y
+-# CONFIG_EFI_BOOTLOADER_CONTROL is not set
+-# CONFIG_EFI_CAPSULE_LOADER is not set
+-# CONFIG_EFI_TEST is not set
+-CONFIG_APPLE_PROPERTIES=y
+-# CONFIG_RESET_ATTACK_MITIGATION is not set
+-CONFIG_UEFI_CPER=y
+-CONFIG_UEFI_CPER_X86=y
+-CONFIG_EFI_DEV_PATH_PARSER=y
+-
+-#
+-# Tegra firmware driver
+-#
+-CONFIG_HAVE_KVM=y
+-CONFIG_HAVE_KVM_IRQCHIP=y
+-CONFIG_HAVE_KVM_IRQFD=y
+-CONFIG_HAVE_KVM_IRQ_ROUTING=y
+-CONFIG_HAVE_KVM_EVENTFD=y
+-CONFIG_KVM_MMIO=y
+-CONFIG_KVM_ASYNC_PF=y
+-CONFIG_HAVE_KVM_MSI=y
+-CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
+-CONFIG_KVM_VFIO=y
+-CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
+-CONFIG_KVM_COMPAT=y
+-CONFIG_HAVE_KVM_IRQ_BYPASS=y
+-CONFIG_VIRTUALIZATION=y
++CONFIG_EFI_VARS=y
++CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE=y
+ CONFIG_KVM=m
+ CONFIG_KVM_INTEL=m
+ CONFIG_KVM_AMD=m
+-CONFIG_KVM_AMD_SEV=y
+ CONFIG_KVM_MMU_AUDIT=y
+ CONFIG_VHOST_NET=m
+-# CONFIG_VHOST_SCSI is not set
+ CONFIG_VHOST_VSOCK=m
+-CONFIG_VHOST=m
+-# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
+-
+-#
+-# General architecture-dependent options
+-#
+-CONFIG_CRASH_CORE=y
+-CONFIG_KEXEC_CORE=y
+-CONFIG_HOTPLUG_SMT=y
+ CONFIG_OPROFILE=m
+ CONFIG_OPROFILE_EVENT_MULTIPLEX=y
+-CONFIG_HAVE_OPROFILE=y
+-CONFIG_OPROFILE_NMI_TIMER=y
+ CONFIG_KPROBES=y
+-CONFIG_JUMP_LABEL=y
+-# CONFIG_STATIC_KEYS_SELFTEST is not set
+-CONFIG_OPTPROBES=y
+-CONFIG_KPROBES_ON_FTRACE=y
+-CONFIG_UPROBES=y
+-CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+-CONFIG_ARCH_USE_BUILTIN_BSWAP=y
+-CONFIG_KRETPROBES=y
+-CONFIG_USER_RETURN_NOTIFIER=y
+-CONFIG_HAVE_IOREMAP_PROT=y
+-CONFIG_HAVE_KPROBES=y
+-CONFIG_HAVE_KRETPROBES=y
+-CONFIG_HAVE_OPTPROBES=y
+-CONFIG_HAVE_KPROBES_ON_FTRACE=y
+-CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
+-CONFIG_HAVE_NMI=y
+-CONFIG_HAVE_ARCH_TRACEHOOK=y
+-CONFIG_HAVE_DMA_CONTIGUOUS=y
+-CONFIG_GENERIC_SMP_IDLE_THREAD=y
+-CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
+-CONFIG_ARCH_HAS_SET_MEMORY=y
+-CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y
+-CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
+-CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
+-CONFIG_HAVE_RSEQ=y
+-CONFIG_HAVE_CLK=y
+-CONFIG_HAVE_HW_BREAKPOINT=y
+-CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
+-CONFIG_HAVE_USER_RETURN_NOTIFIER=y
+-CONFIG_HAVE_PERF_EVENTS_NMI=y
+-CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y
+-CONFIG_HAVE_PERF_REGS=y
+-CONFIG_HAVE_PERF_USER_STACK_DUMP=y
+-CONFIG_HAVE_ARCH_JUMP_LABEL=y
+-CONFIG_HAVE_RCU_TABLE_FREE=y
+-CONFIG_HAVE_RCU_TABLE_INVALIDATE=y
+-CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
+-CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
+-CONFIG_HAVE_CMPXCHG_LOCAL=y
+-CONFIG_HAVE_CMPXCHG_DOUBLE=y
+-CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
+-CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
+-CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
+-CONFIG_SECCOMP_FILTER=y
+-CONFIG_HAVE_STACKPROTECTOR=y
+-CONFIG_CC_HAS_STACKPROTECTOR_NONE=y
+-CONFIG_STACKPROTECTOR=y
+-CONFIG_STACKPROTECTOR_STRONG=y
+-CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
+-CONFIG_HAVE_CONTEXT_TRACKING=y
+-CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
+-CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
+-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
+-CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y
+-CONFIG_HAVE_ARCH_HUGE_VMAP=y
+-CONFIG_HAVE_ARCH_SOFT_DIRTY=y
+-CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
+-CONFIG_MODULES_USE_ELF_RELA=y
+-CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
+-CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
+-CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
+-CONFIG_HAVE_EXIT_THREAD=y
+-CONFIG_ARCH_MMAP_RND_BITS=28
+-CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
+-CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
+-CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y
+-CONFIG_HAVE_COPY_THREAD_TLS=y
+-CONFIG_HAVE_STACK_VALIDATION=y
+-CONFIG_HAVE_RELIABLE_STACKTRACE=y
+-CONFIG_OLD_SIGSUSPEND3=y
+-CONFIG_COMPAT_OLD_SIGACTION=y
+-CONFIG_COMPAT_32BIT_TIME=y
+-CONFIG_HAVE_ARCH_VMAP_STACK=y
+-CONFIG_VMAP_STACK=y
+-CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
+-CONFIG_STRICT_KERNEL_RWX=y
+-CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
+-CONFIG_STRICT_MODULE_RWX=y
+-CONFIG_ARCH_HAS_REFCOUNT=y
+-# CONFIG_REFCOUNT_FULL is not set
+-CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
+-CONFIG_ARCH_USE_MEMREMAP_PROT=y
+-
+-#
+-# GCOV-based kernel profiling
+-#
+-# CONFIG_GCOV_KERNEL is not set
+-CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
+-CONFIG_PLUGIN_HOSTCC="g++"
+-CONFIG_HAVE_GCC_PLUGINS=y
+-# CONFIG_GCC_PLUGINS is not set
+-CONFIG_RT_MUTEXES=y
+-CONFIG_BASE_SMALL=0
+ CONFIG_MODULES=y
+ CONFIG_MODULE_FORCE_LOAD=y
+ CONFIG_MODULE_UNLOAD=y
+-# CONFIG_MODULE_FORCE_UNLOAD is not set
+ CONFIG_MODVERSIONS=y
+ CONFIG_MODULE_SRCVERSION_ALL=y
+ CONFIG_MODULE_SIG=y
+-# CONFIG_MODULE_SIG_FORCE is not set
+-CONFIG_MODULE_SIG_ALL=y
+-# CONFIG_MODULE_SIG_SHA1 is not set
+-# CONFIG_MODULE_SIG_SHA224 is not set
+ CONFIG_MODULE_SIG_SHA256=y
+-# CONFIG_MODULE_SIG_SHA384 is not set
+-# CONFIG_MODULE_SIG_SHA512 is not set
+-CONFIG_MODULE_SIG_HASH="sha256"
+-# CONFIG_MODULE_COMPRESS is not set
+-# CONFIG_TRIM_UNUSED_KSYMS is not set
+-CONFIG_MODULES_TREE_LOOKUP=y
+-CONFIG_BLOCK=y
+-CONFIG_BLK_SCSI_REQUEST=y
+-CONFIG_BLK_DEV_BSG=y
+-CONFIG_BLK_DEV_BSGLIB=y
+-CONFIG_BLK_DEV_INTEGRITY=y
+-# CONFIG_BLK_DEV_ZONED is not set
+ CONFIG_BLK_DEV_THROTTLING=y
+-# CONFIG_BLK_DEV_THROTTLING_LOW is not set
+-# CONFIG_BLK_CMDLINE_PARSER is not set
+ CONFIG_BLK_WBT=y
+-# CONFIG_BLK_CGROUP_IOLATENCY is not set
+-# CONFIG_BLK_WBT_SQ is not set
+-CONFIG_BLK_WBT_MQ=y
+-CONFIG_BLK_DEBUG_FS=y
+-# CONFIG_BLK_SED_OPAL is not set
+-
+-#
+-# Partition Types
+-#
+ CONFIG_PARTITION_ADVANCED=y
+-# CONFIG_ACORN_PARTITION is not set
+-# CONFIG_AIX_PARTITION is not set
+ CONFIG_OSF_PARTITION=y
+ CONFIG_AMIGA_PARTITION=y
+-# CONFIG_ATARI_PARTITION is not set
+ CONFIG_MAC_PARTITION=y
+-CONFIG_MSDOS_PARTITION=y
+ CONFIG_BSD_DISKLABEL=y
+ CONFIG_MINIX_SUBPARTITION=y
+ CONFIG_SOLARIS_X86_PARTITION=y
+ CONFIG_UNIXWARE_DISKLABEL=y
+-# CONFIG_LDM_PARTITION is not set
+ CONFIG_SGI_PARTITION=y
+-# CONFIG_ULTRIX_PARTITION is not set
+ CONFIG_SUN_PARTITION=y
+ CONFIG_KARMA_PARTITION=y
+-CONFIG_EFI_PARTITION=y
+-# CONFIG_SYSV68_PARTITION is not set
+-# CONFIG_CMDLINE_PARTITION is not set
+-CONFIG_BLOCK_COMPAT=y
+-CONFIG_BLK_MQ_PCI=y
+-CONFIG_BLK_MQ_VIRTIO=y
+-CONFIG_BLK_MQ_RDMA=y
+-
+-#
+-# IO Schedulers
+-#
+-CONFIG_IOSCHED_NOOP=y
+-CONFIG_IOSCHED_DEADLINE=y
+-CONFIG_IOSCHED_CFQ=y
+-CONFIG_CFQ_GROUP_IOSCHED=y
+-# CONFIG_DEFAULT_DEADLINE is not set
+-CONFIG_DEFAULT_CFQ=y
+-# CONFIG_DEFAULT_NOOP is not set
+-CONFIG_DEFAULT_IOSCHED="cfq"
+-CONFIG_MQ_IOSCHED_DEADLINE=y
+-CONFIG_MQ_IOSCHED_KYBER=y
+ CONFIG_IOSCHED_BFQ=y
+ CONFIG_BFQ_GROUP_IOSCHED=y
+-CONFIG_PREEMPT_NOTIFIERS=y
+-CONFIG_PADATA=y
+-CONFIG_ASN1=y
+-CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
+-CONFIG_INLINE_READ_UNLOCK=y
+-CONFIG_INLINE_READ_UNLOCK_IRQ=y
+-CONFIG_INLINE_WRITE_UNLOCK=y
+-CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
+-CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
+-CONFIG_MUTEX_SPIN_ON_OWNER=y
+-CONFIG_RWSEM_SPIN_ON_OWNER=y
+-CONFIG_LOCK_SPIN_ON_OWNER=y
+-CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
+-CONFIG_QUEUED_SPINLOCKS=y
+-CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
+-CONFIG_QUEUED_RWLOCKS=y
+-CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y
+-CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
+-CONFIG_FREEZER=y
+-
+-#
+-# Executable file formats
+-#
+-CONFIG_BINFMT_ELF=y
+-CONFIG_COMPAT_BINFMT_ELF=y
+-CONFIG_ELFCORE=y
+-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+-CONFIG_BINFMT_SCRIPT=y
++CONFIG_XENO_DRIVERS_RTIPC=y
+ CONFIG_BINFMT_MISC=m
+-CONFIG_COREDUMP=y
+-
+-#
+-# Memory Management options
+-#
+-CONFIG_SELECT_MEMORY_MODEL=y
+-CONFIG_SPARSEMEM_MANUAL=y
+-CONFIG_SPARSEMEM=y
+-CONFIG_NEED_MULTIPLE_NODES=y
+-CONFIG_HAVE_MEMORY_PRESENT=y
+-CONFIG_SPARSEMEM_EXTREME=y
+-CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+-CONFIG_SPARSEMEM_VMEMMAP=y
+-CONFIG_HAVE_MEMBLOCK=y
+-CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
+-CONFIG_ARCH_DISCARD_MEMBLOCK=y
+-CONFIG_MEMORY_ISOLATION=y
+-CONFIG_HAVE_BOOTMEM_INFO_NODE=y
+ CONFIG_MEMORY_HOTPLUG=y
+-CONFIG_MEMORY_HOTPLUG_SPARSE=y
+-# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set
+-CONFIG_MEMORY_HOTREMOVE=y
+-CONFIG_SPLIT_PTLOCK_CPUS=4
+-CONFIG_MEMORY_BALLOON=y
+-CONFIG_BALLOON_COMPACTION=y
+-CONFIG_COMPACTION=y
+-CONFIG_MIGRATION=y
+-CONFIG_PHYS_ADDR_T_64BIT=y
+-CONFIG_BOUNCE=y
+-CONFIG_VIRT_TO_BUS=y
+-CONFIG_MMU_NOTIFIER=y
++# CONFIG_COMPACTION is not set
++# CONFIG_MIGRATION is not set
+ CONFIG_KSM=y
+-CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
+-CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
+ CONFIG_MEMORY_FAILURE=y
+ CONFIG_HWPOISON_INJECT=m
+-CONFIG_TRANSPARENT_HUGEPAGE=y
+-CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
+-# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
+-CONFIG_ARCH_WANTS_THP_SWAP=y
+-CONFIG_THP_SWAP=y
+-CONFIG_TRANSPARENT_HUGE_PAGECACHE=y
+ CONFIG_CLEANCACHE=y
+ CONFIG_FRONTSWAP=y
+-# CONFIG_SHRINK_PAGECACHE is not set
+-# CONFIG_CMA is not set
+ CONFIG_MEM_SOFT_DIRTY=y
+ CONFIG_ZSWAP=y
+-CONFIG_ZPOOL=y
+ CONFIG_ZBUD=y
+-# CONFIG_Z3FOLD is not set
+ CONFIG_ZSMALLOC=y
+-# CONFIG_PGTABLE_MAPPING is not set
+ CONFIG_ZSMALLOC_STAT=y
+-CONFIG_GENERIC_EARLY_IOREMAP=y
+ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+ CONFIG_IDLE_PAGE_TRACKING=y
+-CONFIG_ARCH_HAS_ZONE_DEVICE=y
+-CONFIG_ZONE_DEVICE=y
+-CONFIG_ARCH_HAS_HMM=y
+-CONFIG_MIGRATE_VMA_HELPER=y
+-CONFIG_DEV_PAGEMAP_OPS=y
+-CONFIG_HMM=y
+-CONFIG_HMM_MIRROR=y
+-CONFIG_DEVICE_PRIVATE=y
+-CONFIG_DEVICE_PUBLIC=y
+-CONFIG_FRAME_VECTOR=y
+-CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
+-CONFIG_ARCH_HAS_PKEYS=y
+-# CONFIG_PERCPU_STATS is not set
+-# CONFIG_GUP_BENCHMARK is not set
+-CONFIG_ARCH_HAS_PTE_SPECIAL=y
+ CONFIG_NET=y
+-CONFIG_NET_INGRESS=y
+-CONFIG_NET_EGRESS=y
+-
+-#
+-# Networking options
+-#
+ CONFIG_PACKET=y
+ CONFIG_PACKET_DIAG=m
+ CONFIG_UNIX=y
+ CONFIG_UNIX_DIAG=m
+ CONFIG_TLS=m
+ CONFIG_TLS_DEVICE=y
+-CONFIG_XFRM=y
+-CONFIG_XFRM_OFFLOAD=y
+-CONFIG_XFRM_ALGO=y
+ CONFIG_XFRM_USER=y
+ CONFIG_XFRM_INTERFACE=m
+ CONFIG_XFRM_SUB_POLICY=y
+-CONFIG_XFRM_MIGRATE=y
+ CONFIG_XFRM_STATISTICS=y
+-CONFIG_XFRM_IPCOMP=m
+ CONFIG_NET_KEY=m
+ CONFIG_NET_KEY_MIGRATE=y
+-# CONFIG_SMC is not set
+ CONFIG_XDP_SOCKETS=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+@@ -1052,45 +195,28 @@
+ CONFIG_IP_MULTIPLE_TABLES=y
+ CONFIG_IP_ROUTE_MULTIPATH=y
+ CONFIG_IP_ROUTE_VERBOSE=y
+-CONFIG_IP_ROUTE_CLASSID=y
+-# CONFIG_IP_PNP is not set
+ CONFIG_NET_IPIP=m
+ CONFIG_NET_IPGRE_DEMUX=m
+-CONFIG_NET_IP_TUNNEL=m
+ CONFIG_NET_IPGRE=m
+ CONFIG_NET_IPGRE_BROADCAST=y
+-CONFIG_IP_MROUTE_COMMON=y
+ CONFIG_IP_MROUTE=y
+ CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+ CONFIG_IP_PIMSM_V1=y
+ CONFIG_IP_PIMSM_V2=y
+-CONFIG_SYN_COOKIES=y
+ CONFIG_NET_IPVTI=m
+-CONFIG_NET_UDP_TUNNEL=m
+-# CONFIG_NET_FOU is not set
+-# CONFIG_NET_FOU_IP_TUNNELS is not set
+ CONFIG_INET_AH=m
+ CONFIG_INET_ESP=m
+ CONFIG_INET_ESP_OFFLOAD=m
+ CONFIG_INET_IPCOMP=m
+-CONFIG_INET_XFRM_TUNNEL=m
+-CONFIG_INET_TUNNEL=m
+ CONFIG_INET_XFRM_MODE_TRANSPORT=m
+ CONFIG_INET_XFRM_MODE_TUNNEL=m
+ CONFIG_INET_XFRM_MODE_BEET=m
+ CONFIG_INET_DIAG=m
+-CONFIG_INET_TCP_DIAG=m
+ CONFIG_INET_UDP_DIAG=m
+ CONFIG_INET_RAW_DIAG=m
+-# CONFIG_INET_DIAG_DESTROY is not set
+ CONFIG_TCP_CONG_ADVANCED=y
+-CONFIG_TCP_CONG_BIC=m
+-CONFIG_TCP_CONG_CUBIC=y
+-CONFIG_TCP_CONG_WESTWOOD=m
+-CONFIG_TCP_CONG_HTCP=m
+ CONFIG_TCP_CONG_HSTCP=m
+ CONFIG_TCP_CONG_HYBLA=m
+-CONFIG_TCP_CONG_VEGAS=m
+ CONFIG_TCP_CONG_NV=m
+ CONFIG_TCP_CONG_SCALABLE=m
+ CONFIG_TCP_CONG_LP=m
+@@ -1098,13 +224,8 @@
+ CONFIG_TCP_CONG_YEAH=m
+ CONFIG_TCP_CONG_ILLINOIS=m
+ CONFIG_TCP_CONG_DCTCP=m
+-# CONFIG_TCP_CONG_CDG is not set
+ CONFIG_TCP_CONG_BBR=m
+-CONFIG_DEFAULT_CUBIC=y
+-# CONFIG_DEFAULT_RENO is not set
+-CONFIG_DEFAULT_TCP_CONG="cubic"
+ CONFIG_TCP_MD5SIG=y
+-CONFIG_IPV6=y
+ CONFIG_IPV6_ROUTER_PREF=y
+ CONFIG_IPV6_ROUTE_INFO=y
+ CONFIG_IPV6_OPTIMISTIC_DAD=y
+@@ -1113,9 +234,6 @@
+ CONFIG_INET6_ESP_OFFLOAD=m
+ CONFIG_INET6_IPCOMP=m
+ CONFIG_IPV6_MIP6=m
+-# CONFIG_IPV6_ILA is not set
+-CONFIG_INET6_XFRM_TUNNEL=m
+-CONFIG_INET6_TUNNEL=m
+ CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+ CONFIG_INET6_XFRM_MODE_TUNNEL=m
+ CONFIG_INET6_XFRM_MODE_BEET=m
+@@ -1123,56 +241,25 @@
+ CONFIG_IPV6_VTI=m
+ CONFIG_IPV6_SIT=m
+ CONFIG_IPV6_SIT_6RD=y
+-CONFIG_IPV6_NDISC_NODETYPE=y
+-CONFIG_IPV6_TUNNEL=m
+ CONFIG_IPV6_GRE=m
+ CONFIG_IPV6_MULTIPLE_TABLES=y
+-# CONFIG_IPV6_SUBTREES is not set
+ CONFIG_IPV6_MROUTE=y
+ CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+ CONFIG_IPV6_PIMSM_V2=y
+-# CONFIG_IPV6_SEG6_LWTUNNEL is not set
+-# CONFIG_IPV6_SEG6_HMAC is not set
+ CONFIG_NETLABEL=y
+-CONFIG_NETWORK_SECMARK=y
+-CONFIG_NET_PTP_CLASSIFY=y
+ CONFIG_NETWORK_PHY_TIMESTAMPING=y
+ CONFIG_NETFILTER=y
+-CONFIG_NETFILTER_ADVANCED=y
+-CONFIG_BRIDGE_NETFILTER=m
+-
+-#
+-# Core Netfilter Configuration
+-#
+-CONFIG_NETFILTER_INGRESS=y
+-CONFIG_NETFILTER_NETLINK=m
+-CONFIG_NETFILTER_FAMILY_BRIDGE=y
+-CONFIG_NETFILTER_FAMILY_ARP=y
+-# CONFIG_NETFILTER_NETLINK_ACCT is not set
+-CONFIG_NETFILTER_NETLINK_QUEUE=m
+-CONFIG_NETFILTER_NETLINK_LOG=m
+-CONFIG_NETFILTER_NETLINK_OSF=m
+ CONFIG_NF_CONNTRACK=m
+-CONFIG_NF_LOG_COMMON=m
+ CONFIG_NF_LOG_NETDEV=m
+-CONFIG_NETFILTER_CONNCOUNT=m
+-CONFIG_NF_CONNTRACK_MARK=y
+ CONFIG_NF_CONNTRACK_SECMARK=y
+ CONFIG_NF_CONNTRACK_ZONES=y
+-CONFIG_NF_CONNTRACK_PROCFS=y
+ CONFIG_NF_CONNTRACK_EVENTS=y
+ CONFIG_NF_CONNTRACK_TIMEOUT=y
+ CONFIG_NF_CONNTRACK_TIMESTAMP=y
+-CONFIG_NF_CONNTRACK_LABELS=y
+-CONFIG_NF_CT_PROTO_DCCP=y
+-CONFIG_NF_CT_PROTO_GRE=m
+-CONFIG_NF_CT_PROTO_SCTP=y
+-CONFIG_NF_CT_PROTO_UDPLITE=y
+ CONFIG_NF_CONNTRACK_AMANDA=m
+ CONFIG_NF_CONNTRACK_FTP=m
+ CONFIG_NF_CONNTRACK_H323=m
+ CONFIG_NF_CONNTRACK_IRC=m
+-CONFIG_NF_CONNTRACK_BROADCAST=m
+ CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+ CONFIG_NF_CONNTRACK_SNMP=m
+ CONFIG_NF_CONNTRACK_PPTP=m
+@@ -1183,18 +270,6 @@
+ CONFIG_NF_CT_NETLINK_TIMEOUT=m
+ CONFIG_NF_CT_NETLINK_HELPER=m
+ CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+-CONFIG_NF_NAT=m
+-CONFIG_NF_NAT_NEEDED=y
+-CONFIG_NF_NAT_PROTO_DCCP=y
+-CONFIG_NF_NAT_PROTO_UDPLITE=y
+-CONFIG_NF_NAT_PROTO_SCTP=y
+-CONFIG_NF_NAT_AMANDA=m
+-CONFIG_NF_NAT_FTP=m
+-CONFIG_NF_NAT_IRC=m
+-CONFIG_NF_NAT_SIP=m
+-CONFIG_NF_NAT_TFTP=m
+-CONFIG_NF_NAT_REDIRECT=y
+-CONFIG_NETFILTER_SYNPROXY=m
+ CONFIG_NF_TABLES=m
+ CONFIG_NF_TABLES_SET=m
+ CONFIG_NF_TABLES_INET=y
+@@ -1208,66 +283,37 @@
+ CONFIG_NFT_MASQ=m
+ CONFIG_NFT_REDIR=m
+ CONFIG_NFT_NAT=m
+-# CONFIG_NFT_TUNNEL is not set
+ CONFIG_NFT_OBJREF=m
+ CONFIG_NFT_QUEUE=m
+ CONFIG_NFT_QUOTA=m
+ CONFIG_NFT_REJECT=m
+-CONFIG_NFT_REJECT_INET=m
+ CONFIG_NFT_COMPAT=m
+ CONFIG_NFT_HASH=m
+-CONFIG_NFT_FIB=m
+ CONFIG_NFT_FIB_INET=m
+-# CONFIG_NFT_SOCKET is not set
+-# CONFIG_NFT_OSF is not set
+-# CONFIG_NFT_TPROXY is not set
+-CONFIG_NF_DUP_NETDEV=m
+ CONFIG_NFT_DUP_NETDEV=m
+ CONFIG_NFT_FWD_NETDEV=m
+ CONFIG_NFT_FIB_NETDEV=m
+-# CONFIG_NF_FLOW_TABLE is not set
+ CONFIG_NETFILTER_XTABLES=y
+-
+-#
+-# Xtables combined modules
+-#
+-CONFIG_NETFILTER_XT_MARK=m
+-CONFIG_NETFILTER_XT_CONNMARK=m
+ CONFIG_NETFILTER_XT_SET=m
+-
+-#
+-# Xtables targets
+-#
+ CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+ CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+ CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+ CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+-CONFIG_NETFILTER_XT_TARGET_CT=m
+ CONFIG_NETFILTER_XT_TARGET_DSCP=m
+-CONFIG_NETFILTER_XT_TARGET_HL=m
+ CONFIG_NETFILTER_XT_TARGET_HMARK=m
+ CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+-# CONFIG_NETFILTER_XT_TARGET_LED is not set
+ CONFIG_NETFILTER_XT_TARGET_LOG=m
+ CONFIG_NETFILTER_XT_TARGET_MARK=m
+-CONFIG_NETFILTER_XT_NAT=m
+-CONFIG_NETFILTER_XT_TARGET_NETMAP=m
+ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+ CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+ CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+-CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+-CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+ CONFIG_NETFILTER_XT_TARGET_TEE=m
+ CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+ CONFIG_NETFILTER_XT_TARGET_TRACE=m
+ CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+ CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+-
+-#
+-# Xtables matches
+-#
+ CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+ CONFIG_NETFILTER_XT_MATCH_BPF=m
+ CONFIG_NETFILTER_XT_MATCH_CGROUP=m
+@@ -1282,12 +328,9 @@
+ CONFIG_NETFILTER_XT_MATCH_DCCP=m
+ CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+ CONFIG_NETFILTER_XT_MATCH_DSCP=m
+-CONFIG_NETFILTER_XT_MATCH_ECN=m
+ CONFIG_NETFILTER_XT_MATCH_ESP=m
+ CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+ CONFIG_NETFILTER_XT_MATCH_HELPER=m
+-CONFIG_NETFILTER_XT_MATCH_HL=m
+-# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set
+ CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+ CONFIG_NETFILTER_XT_MATCH_IPVS=m
+ # CONFIG_NETFILTER_XT_MATCH_L2TP is not set
+@@ -1296,7 +339,6 @@
+ CONFIG_NETFILTER_XT_MATCH_MAC=m
+ CONFIG_NETFILTER_XT_MATCH_MARK=m
+ CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+-# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set
+ CONFIG_NETFILTER_XT_MATCH_OSF=m
+ CONFIG_NETFILTER_XT_MATCH_OWNER=m
+ CONFIG_NETFILTER_XT_MATCH_POLICY=m
+@@ -1306,16 +348,12 @@
+ CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+ CONFIG_NETFILTER_XT_MATCH_REALM=m
+ CONFIG_NETFILTER_XT_MATCH_RECENT=m
+-CONFIG_NETFILTER_XT_MATCH_SCTP=m
+ CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+ CONFIG_NETFILTER_XT_MATCH_STATE=m
+ CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+ CONFIG_NETFILTER_XT_MATCH_STRING=m
+ CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+-# CONFIG_NETFILTER_XT_MATCH_TIME is not set
+-# CONFIG_NETFILTER_XT_MATCH_U32 is not set
+ CONFIG_IP_SET=m
+-CONFIG_IP_SET_MAX=256
+ CONFIG_IP_SET_BITMAP_IP=m
+ CONFIG_IP_SET_BITMAP_IPMAC=m
+ CONFIG_IP_SET_BITMAP_PORT=m
+@@ -1334,22 +372,11 @@
+ CONFIG_IP_SET_LIST_SET=m
+ CONFIG_IP_VS=m
+ CONFIG_IP_VS_IPV6=y
+-# CONFIG_IP_VS_DEBUG is not set
+-CONFIG_IP_VS_TAB_BITS=12
+-
+-#
+-# IPVS transport protocol load balancing support
+-#
+ CONFIG_IP_VS_PROTO_TCP=y
+ CONFIG_IP_VS_PROTO_UDP=y
+-CONFIG_IP_VS_PROTO_AH_ESP=y
+ CONFIG_IP_VS_PROTO_ESP=y
+ CONFIG_IP_VS_PROTO_AH=y
+ CONFIG_IP_VS_PROTO_SCTP=y
+-
+-#
+-# IPVS scheduler
+-#
+ CONFIG_IP_VS_RR=m
+ CONFIG_IP_VS_WRR=m
+ CONFIG_IP_VS_LC=m
+@@ -1360,52 +387,18 @@
+ CONFIG_IP_VS_LBLCR=m
+ CONFIG_IP_VS_DH=m
+ CONFIG_IP_VS_SH=m
+-# CONFIG_IP_VS_MH is not set
+ CONFIG_IP_VS_SED=m
+ CONFIG_IP_VS_NQ=m
+-
+-#
+-# IPVS SH scheduler
+-#
+-CONFIG_IP_VS_SH_TAB_BITS=8
+-
+-#
+-# IPVS MH scheduler
+-#
+-CONFIG_IP_VS_MH_TAB_INDEX=12
+-
+-#
+-# IPVS application helper
+-#
+ CONFIG_IP_VS_FTP=m
+-CONFIG_IP_VS_NFCT=y
+ CONFIG_IP_VS_PE_SIP=m
+-
+-#
+-# IP: Netfilter Configuration
+-#
+-CONFIG_NF_DEFRAG_IPV4=m
+-CONFIG_NF_SOCKET_IPV4=m
+-CONFIG_NF_TPROXY_IPV4=m
+-CONFIG_NF_TABLES_IPV4=y
+ CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+-CONFIG_NFT_REJECT_IPV4=m
+ CONFIG_NFT_DUP_IPV4=m
+ CONFIG_NFT_FIB_IPV4=m
+ CONFIG_NF_TABLES_ARP=y
+-CONFIG_NF_DUP_IPV4=m
+ CONFIG_NF_LOG_ARP=m
+-CONFIG_NF_LOG_IPV4=m
+-CONFIG_NF_REJECT_IPV4=m
+-CONFIG_NF_NAT_IPV4=m
+-CONFIG_NF_NAT_MASQUERADE_IPV4=y
+ CONFIG_NFT_CHAIN_NAT_IPV4=m
+ CONFIG_NFT_MASQ_IPV4=m
+ CONFIG_NFT_REDIR_IPV4=m
+-CONFIG_NF_NAT_SNMP_BASIC=m
+-CONFIG_NF_NAT_PROTO_GRE=m
+-CONFIG_NF_NAT_PPTP=m
+-CONFIG_NF_NAT_H323=m
+ CONFIG_IP_NF_IPTABLES=m
+ CONFIG_IP_NF_MATCH_AH=m
+ CONFIG_IP_NF_MATCH_ECN=m
+@@ -1419,7 +412,6 @@
+ CONFIG_IP_NF_TARGET_NETMAP=m
+ CONFIG_IP_NF_TARGET_REDIRECT=m
+ CONFIG_IP_NF_MANGLE=m
+-# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
+ CONFIG_IP_NF_TARGET_ECN=m
+ CONFIG_IP_NF_TARGET_TTL=m
+ CONFIG_IP_NF_RAW=m
+@@ -1427,26 +419,12 @@
+ CONFIG_IP_NF_ARPTABLES=m
+ CONFIG_IP_NF_ARPFILTER=m
+ CONFIG_IP_NF_ARP_MANGLE=m
+-
+-#
+-# IPv6: Netfilter Configuration
+-#
+-CONFIG_NF_SOCKET_IPV6=m
+-CONFIG_NF_TPROXY_IPV6=m
+-CONFIG_NF_TABLES_IPV6=y
+ CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+ CONFIG_NFT_CHAIN_NAT_IPV6=m
+ CONFIG_NFT_MASQ_IPV6=m
+ CONFIG_NFT_REDIR_IPV6=m
+-CONFIG_NFT_REJECT_IPV6=m
+ CONFIG_NFT_DUP_IPV6=m
+ CONFIG_NFT_FIB_IPV6=m
+-CONFIG_NF_DUP_IPV6=m
+-CONFIG_NF_REJECT_IPV6=m
+-CONFIG_NF_LOG_IPV6=m
+-CONFIG_NF_NAT_IPV6=m
+-CONFIG_NF_NAT_MASQUERADE_IPV6=y
+-CONFIG_IP6_NF_IPTABLES=m
+ CONFIG_IP6_NF_MATCH_AH=m
+ CONFIG_IP6_NF_MATCH_EUI64=m
+ CONFIG_IP6_NF_MATCH_FRAG=m
+@@ -1456,8 +434,6 @@
+ CONFIG_IP6_NF_MATCH_MH=m
+ CONFIG_IP6_NF_MATCH_RPFILTER=m
+ CONFIG_IP6_NF_MATCH_RT=m
+-# CONFIG_IP6_NF_MATCH_SRH is not set
+-# CONFIG_IP6_NF_TARGET_HL is not set
+ CONFIG_IP6_NF_FILTER=m
+ CONFIG_IP6_NF_TARGET_REJECT=m
+ CONFIG_IP6_NF_TARGET_SYNPROXY=m
+@@ -1467,7 +443,6 @@
+ CONFIG_IP6_NF_NAT=m
+ CONFIG_IP6_NF_TARGET_MASQUERADE=m
+ CONFIG_IP6_NF_TARGET_NPT=m
+-CONFIG_NF_DEFRAG_IPV6=m
+ CONFIG_NF_TABLES_BRIDGE=y
+ CONFIG_NFT_BRIDGE_REJECT=m
+ CONFIG_NF_LOG_BRIDGE=m
+@@ -1492,64 +467,30 @@
+ CONFIG_BRIDGE_EBT_SNAT=m
+ CONFIG_BRIDGE_EBT_LOG=m
+ CONFIG_BRIDGE_EBT_NFLOG=m
+-# CONFIG_BPFILTER is not set
+-# CONFIG_IP_DCCP is not set
+-CONFIG_IP_SCTP=m
+-# CONFIG_SCTP_DBG_OBJCNT is not set
+-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5 is not set
+ CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
+-# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set
+ CONFIG_SCTP_COOKIE_HMAC_MD5=y
+-CONFIG_SCTP_COOKIE_HMAC_SHA1=y
+-CONFIG_INET_SCTP_DIAG=m
+-# CONFIG_RDS is not set
+ CONFIG_TIPC=m
+ CONFIG_TIPC_MEDIA_IB=y
+-CONFIG_TIPC_MEDIA_UDP=y
+-CONFIG_TIPC_DIAG=m
+ CONFIG_ATM=m
+ CONFIG_ATM_CLIP=m
+-# CONFIG_ATM_CLIP_NO_ICMP is not set
+ CONFIG_ATM_LANE=m
+-# CONFIG_ATM_MPOA is not set
+ CONFIG_ATM_BR2684=m
+-# CONFIG_ATM_BR2684_IPFILTER is not set
+ CONFIG_L2TP=m
+ CONFIG_L2TP_DEBUGFS=m
+ CONFIG_L2TP_V3=y
+ CONFIG_L2TP_IP=m
+ CONFIG_L2TP_ETH=m
+-CONFIG_STP=m
+-CONFIG_GARP=m
+-CONFIG_MRP=m
+ CONFIG_BRIDGE=m
+-CONFIG_BRIDGE_IGMP_SNOOPING=y
+ CONFIG_BRIDGE_VLAN_FILTERING=y
+-CONFIG_HAVE_NET_DSA=y
+-# CONFIG_NET_DSA is not set
+ CONFIG_VLAN_8021Q=m
+ CONFIG_VLAN_8021Q_GVRP=y
+ CONFIG_VLAN_8021Q_MVRP=y
+-# CONFIG_DECNET is not set
+-CONFIG_LLC=m
+-# CONFIG_LLC2 is not set
+-# CONFIG_ATALK is not set
+-# CONFIG_X25 is not set
+-# CONFIG_LAPB is not set
+-# CONFIG_PHONET is not set
+ CONFIG_6LOWPAN=m
+-# CONFIG_6LOWPAN_DEBUGFS is not set
+ # CONFIG_6LOWPAN_NHC is not set
+ CONFIG_IEEE802154=m
+-# CONFIG_IEEE802154_NL802154_EXPERIMENTAL is not set
+-CONFIG_IEEE802154_SOCKET=m
+ CONFIG_IEEE802154_6LOWPAN=m
+ CONFIG_MAC802154=m
+ CONFIG_NET_SCHED=y
+-
+-#
+-# Queueing/Scheduling
+-#
+ CONFIG_NET_SCH_CBQ=m
+ CONFIG_NET_SCH_HTB=m
+ CONFIG_NET_SCH_HFSC=m
+@@ -1561,36 +502,22 @@
+ CONFIG_NET_SCH_SFQ=m
+ CONFIG_NET_SCH_TEQL=m
+ CONFIG_NET_SCH_TBF=m
+-# CONFIG_NET_SCH_CBS is not set
+-# CONFIG_NET_SCH_ETF is not set
+ CONFIG_NET_SCH_GRED=m
+ CONFIG_NET_SCH_DSMARK=m
+ CONFIG_NET_SCH_NETEM=m
+ CONFIG_NET_SCH_DRR=m
+ CONFIG_NET_SCH_MQPRIO=m
+-# CONFIG_NET_SCH_SKBPRIO is not set
+ CONFIG_NET_SCH_CHOKE=m
+ CONFIG_NET_SCH_QFQ=m
+ CONFIG_NET_SCH_CODEL=m
+ CONFIG_NET_SCH_FQ_CODEL=y
+-# CONFIG_NET_SCH_CAKE is not set
+ CONFIG_NET_SCH_FQ=m
+ CONFIG_NET_SCH_HHF=m
+ CONFIG_NET_SCH_PIE=m
+ CONFIG_NET_SCH_INGRESS=m
+ CONFIG_NET_SCH_PLUG=m
+ CONFIG_NET_SCH_DEFAULT=y
+-# CONFIG_DEFAULT_FQ is not set
+-# CONFIG_DEFAULT_CODEL is not set
+ CONFIG_DEFAULT_FQ_CODEL=y
+-# CONFIG_DEFAULT_SFQ is not set
+-# CONFIG_DEFAULT_PFIFO_FAST is not set
+-CONFIG_DEFAULT_NET_SCH="fq_codel"
+-
+-#
+-# Classification
+-#
+-CONFIG_NET_CLS=y
+ CONFIG_NET_CLS_BASIC=m
+ CONFIG_NET_CLS_TCINDEX=m
+ CONFIG_NET_CLS_ROUTE4=m
+@@ -1606,22 +533,18 @@
+ CONFIG_NET_CLS_FLOWER=m
+ CONFIG_NET_CLS_MATCHALL=m
+ CONFIG_NET_EMATCH=y
+-CONFIG_NET_EMATCH_STACK=32
+ CONFIG_NET_EMATCH_CMP=m
+ CONFIG_NET_EMATCH_NBYTE=m
+ CONFIG_NET_EMATCH_U32=m
+ CONFIG_NET_EMATCH_META=m
+ CONFIG_NET_EMATCH_TEXT=m
+-# CONFIG_NET_EMATCH_CANID is not set
+ CONFIG_NET_EMATCH_IPSET=m
+-# CONFIG_NET_EMATCH_IPT is not set
+ CONFIG_NET_CLS_ACT=y
+ CONFIG_NET_ACT_POLICE=m
+ CONFIG_NET_ACT_GACT=m
+ CONFIG_GACT_PROB=y
+ CONFIG_NET_ACT_MIRRED=m
+ CONFIG_NET_ACT_SAMPLE=m
+-# CONFIG_NET_ACT_IPT is not set
+ CONFIG_NET_ACT_NAT=m
+ CONFIG_NET_ACT_PEDIT=m
+ CONFIG_NET_ACT_SIMP=m
+@@ -1629,105 +552,47 @@
+ CONFIG_NET_ACT_CSUM=m
+ CONFIG_NET_ACT_VLAN=m
+ CONFIG_NET_ACT_BPF=m
+-# CONFIG_NET_ACT_CONNMARK is not set
+ CONFIG_NET_ACT_SKBMOD=m
+-# CONFIG_NET_ACT_IFE is not set
+ CONFIG_NET_ACT_TUNNEL_KEY=m
+ CONFIG_NET_CLS_IND=y
+-CONFIG_NET_SCH_FIFO=y
+ CONFIG_DCB=y
+-CONFIG_DNS_RESOLVER=m
+-# CONFIG_BATMAN_ADV is not set
+ CONFIG_OPENVSWITCH=m
+-CONFIG_OPENVSWITCH_GRE=m
+-CONFIG_OPENVSWITCH_VXLAN=m
+-CONFIG_OPENVSWITCH_GENEVE=m
+ CONFIG_VSOCKETS=m
+-CONFIG_VSOCKETS_DIAG=m
+ CONFIG_VMWARE_VMCI_VSOCKETS=m
+ CONFIG_VIRTIO_VSOCKETS=m
+-CONFIG_VIRTIO_VSOCKETS_COMMON=m
+-CONFIG_HYPERV_VSOCKETS=m
+ CONFIG_NETLINK_DIAG=m
+-CONFIG_MPLS=y
+ CONFIG_NET_MPLS_GSO=y
+ CONFIG_MPLS_ROUTING=m
+ CONFIG_MPLS_IPTUNNEL=m
+ CONFIG_NET_NSH=y
+-# CONFIG_HSR is not set
++CONFIG_HSR=m
+ CONFIG_NET_SWITCHDEV=y
+-CONFIG_NET_L3_MASTER_DEV=y
+-# CONFIG_NET_NCSI is not set
+-CONFIG_RPS=y
+-CONFIG_RFS_ACCEL=y
+-CONFIG_XPS=y
+ CONFIG_CGROUP_NET_PRIO=y
+-CONFIG_CGROUP_NET_CLASSID=y
+-CONFIG_NET_RX_BUSY_POLL=y
+-CONFIG_BQL=y
+ CONFIG_BPF_JIT=y
+ CONFIG_BPF_STREAM_PARSER=y
+-CONFIG_NET_FLOW_LIMIT=y
+-
+-#
+-# Network testing
+-#
+ CONFIG_NET_PKTGEN=m
+ CONFIG_NET_DROP_MONITOR=y
+-# CONFIG_HAMRADIO is not set
+ CONFIG_CAN=m
+-CONFIG_CAN_RAW=m
+-CONFIG_CAN_BCM=m
+-CONFIG_CAN_GW=m
+-# CONFIG_CAN_J1939 is not set
+-
+-#
+-# CAN Device Drivers
+-#
+ CONFIG_CAN_VCAN=m
+-# CONFIG_CAN_VXCAN is not set
+ CONFIG_CAN_SLCAN=m
+-CONFIG_CAN_DEV=m
+-CONFIG_CAN_CALC_BITTIMING=y
+ CONFIG_CAN_C_CAN=m
+ CONFIG_CAN_C_CAN_PLATFORM=m
+ CONFIG_CAN_C_CAN_PCI=m
+ CONFIG_CAN_CC770=m
+-# CONFIG_CAN_CC770_ISA is not set
+ CONFIG_CAN_CC770_PLATFORM=m
+-# CONFIG_CAN_IFI_CANFD is not set
+-# CONFIG_CAN_M_CAN is not set
+-# CONFIG_CAN_PEAK_PCIEFD is not set
+ CONFIG_CAN_SJA1000=m
+-# CONFIG_CAN_SJA1000_ISA is not set
+ CONFIG_CAN_SJA1000_PLATFORM=m
+ CONFIG_CAN_EMS_PCI=m
+ CONFIG_CAN_PEAK_PCI=m
+-CONFIG_CAN_PEAK_PCIEC=y
+ CONFIG_CAN_KVASER_PCI=m
+ CONFIG_CAN_PLX_PCI=m
+ CONFIG_CAN_SOFTING=m
+-
+-#
+-# CAN SPI interfaces
+-#
+-# CONFIG_CAN_HI311X is not set
+-# CONFIG_CAN_MCP251X is not set
+-
+-#
+-# CAN USB interfaces
+-#
+ CONFIG_CAN_8DEV_USB=m
+ CONFIG_CAN_EMS_USB=m
+ CONFIG_CAN_ESD_USB2=m
+-# CONFIG_CAN_GS_USB is not set
+ CONFIG_CAN_KVASER_USB=m
+-# CONFIG_CAN_MCBA_USB is not set
+ CONFIG_CAN_PEAK_USB=m
+-# CONFIG_CAN_UCAN is not set
+-# CONFIG_CAN_DEBUG_DEVICES is not set
+ CONFIG_BT=m
+-CONFIG_BT_BREDR=y
+ CONFIG_BT_RFCOMM=m
+ CONFIG_BT_RFCOMM_TTY=y
+ CONFIG_BT_BNEP=m
+@@ -1736,30 +601,12 @@
+ CONFIG_BT_CMTP=m
+ CONFIG_BT_HIDP=m
+ CONFIG_BT_HS=y
+-CONFIG_BT_LE=y
+-# CONFIG_BT_6LOWPAN is not set
+-# CONFIG_BT_LEDS is not set
+-# CONFIG_BT_SELFTEST is not set
+-CONFIG_BT_DEBUGFS=y
+-
+-#
+-# Bluetooth device drivers
+-#
+-CONFIG_BT_INTEL=m
+-CONFIG_BT_BCM=m
+-CONFIG_BT_RTL=m
+ CONFIG_BT_HCIBTUSB=m
+ CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
+-CONFIG_BT_HCIBTUSB_BCM=y
+-CONFIG_BT_HCIBTUSB_RTL=y
+ CONFIG_BT_HCIBTSDIO=m
+ CONFIG_BT_HCIUART=m
+-CONFIG_BT_HCIUART_H4=y
+ CONFIG_BT_HCIUART_BCSP=y
+ CONFIG_BT_HCIUART_ATH3K=y
+-# CONFIG_BT_HCIUART_INTEL is not set
+-# CONFIG_BT_HCIUART_AG6XX is not set
+-# CONFIG_BT_HCIUART_MRVL is not set
+ CONFIG_BT_HCIBCM203X=m
+ CONFIG_BT_HCIBPA10X=m
+ CONFIG_BT_HCIBFUSB=m
+@@ -1767,232 +614,38 @@
+ CONFIG_BT_MRVL=m
+ CONFIG_BT_MRVL_SDIO=m
+ CONFIG_BT_ATH3K=m
+-# CONFIG_AF_RXRPC is not set
+-# CONFIG_AF_KCM is not set
+-CONFIG_STREAM_PARSER=y
+-CONFIG_FIB_RULES=y
+-CONFIG_WIRELESS=y
+ CONFIG_CFG80211=m
+-# CONFIG_NL80211_TESTMODE is not set
+-# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set
+-CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y
+-CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y
+-CONFIG_CFG80211_DEFAULT_PS=y
+-# CONFIG_CFG80211_DEBUGFS is not set
+-CONFIG_CFG80211_CRDA_SUPPORT=y
+-# CONFIG_CFG80211_WEXT is not set
+ CONFIG_MAC80211=m
+-CONFIG_MAC80211_HAS_RC=y
+-CONFIG_MAC80211_RC_MINSTREL=y
+-CONFIG_MAC80211_RC_MINSTREL_HT=y
+-CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+-CONFIG_MAC80211_RC_DEFAULT="minstrel_ht"
+-# CONFIG_MAC80211_MESH is not set
+-CONFIG_MAC80211_LEDS=y
+-CONFIG_MAC80211_DEBUGFS=y
+-# CONFIG_MAC80211_MESSAGE_TRACING is not set
+-# CONFIG_MAC80211_DEBUG_MENU is not set
+-CONFIG_MAC80211_STA_HASH_MAX_SIZE=0
+-# CONFIG_WIMAX is not set
+ CONFIG_RFKILL=m
+-CONFIG_RFKILL_LEDS=y
+-CONFIG_RFKILL_INPUT=y
+-# CONFIG_RFKILL_GPIO is not set
+-# CONFIG_NET_9P is not set
+-# CONFIG_CAIF is not set
+-CONFIG_CEPH_LIB=m
+-# CONFIG_CEPH_LIB_PRETTYDEBUG is not set
+ CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
+-# CONFIG_NFC is not set
+-CONFIG_PSAMPLE=m
+-# CONFIG_NET_IFE is not set
+ CONFIG_LWTUNNEL=y
+-CONFIG_LWTUNNEL_BPF=y
+-CONFIG_DST_CACHE=y
+-CONFIG_GRO_CELLS=y
+-CONFIG_SOCK_VALIDATE_XMIT=y
+-CONFIG_NET_DEVLINK=y
+-CONFIG_MAY_USE_DEVLINK=y
+-CONFIG_PAGE_POOL=y
+-CONFIG_FAILOVER=m
+-CONFIG_HAVE_EBPF_JIT=y
+-
+-#
+-# Device Drivers
+-#
+-
+-#
+-# Generic Driver Options
+-#
++CONFIG_NET_DEVLINK=m
+ # CONFIG_UEVENT_HELPER is not set
+ CONFIG_DEVTMPFS=y
+ CONFIG_DEVTMPFS_MOUNT=y
+-CONFIG_STANDALONE=y
+-CONFIG_PREVENT_FIRMWARE_BUILD=y
+-
+-#
+-# Firmware loader
+-#
+-CONFIG_FW_LOADER=y
+-CONFIG_EXTRA_FIRMWARE=""
+-CONFIG_FW_LOADER_USER_HELPER=y
+-# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set
+-CONFIG_WANT_DEV_COREDUMP=y
+-CONFIG_ALLOW_DEV_COREDUMP=y
+-CONFIG_DEV_COREDUMP=y
+-# CONFIG_DEBUG_DRIVER is not set
+-# CONFIG_DEBUG_DEVRES is not set
+-# CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set
+-# CONFIG_TEST_ASYNC_DRIVER_PROBE is not set
+-CONFIG_SYS_HYPERVISOR=y
+-CONFIG_GENERIC_CPU_AUTOPROBE=y
+-CONFIG_GENERIC_CPU_VULNERABILITIES=y
+-CONFIG_REGMAP=y
+-CONFIG_REGMAP_I2C=y
+-CONFIG_REGMAP_SPI=y
+-CONFIG_DMA_SHARED_BUFFER=y
+-# CONFIG_DMA_FENCE_TRACE is not set
+-
+-#
+-# Bus devices
+-#
+ CONFIG_CONNECTOR=y
+-CONFIG_PROC_EVENTS=y
+-# CONFIG_GNSS is not set
+ CONFIG_MTD=m
+-# CONFIG_MTD_TESTS is not set
+-# CONFIG_MTD_REDBOOT_PARTS is not set
+-# CONFIG_MTD_CMDLINE_PARTS is not set
+-# CONFIG_MTD_AR7_PARTS is not set
+-
+-#
+-# Partition parsers
+-#
+-
+-#
+-# User Modules And Translation Layers
+-#
+-CONFIG_MTD_BLKDEVS=m
+ CONFIG_MTD_BLOCK=m
+-# CONFIG_MTD_BLOCK_RO is not set
+-# CONFIG_FTL is not set
+-# CONFIG_NFTL is not set
+-# CONFIG_INFTL is not set
+-# CONFIG_RFD_FTL is not set
+-# CONFIG_SSFDC is not set
+-# CONFIG_SM_FTL is not set
+-# CONFIG_MTD_OOPS is not set
+-# CONFIG_MTD_SWAP is not set
+-# CONFIG_MTD_PARTITIONED_MASTER is not set
+-
+-#
+-# RAM/ROM/Flash chip drivers
+-#
+-# CONFIG_MTD_CFI is not set
+-# CONFIG_MTD_JEDECPROBE is not set
+-CONFIG_MTD_MAP_BANK_WIDTH_1=y
+-CONFIG_MTD_MAP_BANK_WIDTH_2=y
+-CONFIG_MTD_MAP_BANK_WIDTH_4=y
+-CONFIG_MTD_CFI_I1=y
+-CONFIG_MTD_CFI_I2=y
+-# CONFIG_MTD_RAM is not set
+-# CONFIG_MTD_ROM is not set
+-# CONFIG_MTD_ABSENT is not set
+-
+-#
+-# Mapping drivers for chip access
+-#
+-# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+-# CONFIG_MTD_INTEL_VR_NOR is not set
+-# CONFIG_MTD_PLATRAM is not set
+-
+-#
+-# Self-contained MTD device drivers
+-#
+-# CONFIG_MTD_PMC551 is not set
+-# CONFIG_MTD_DATAFLASH is not set
+-# CONFIG_MTD_MCHP23K256 is not set
+-# CONFIG_MTD_SST25L is not set
+-# CONFIG_MTD_SLRAM is not set
+-# CONFIG_MTD_PHRAM is not set
+-# CONFIG_MTD_MTDRAM is not set
+-# CONFIG_MTD_BLOCK2MTD is not set
+-
+-#
+-# Disk-On-Chip Device Drivers
+-#
+-# CONFIG_MTD_DOCG3 is not set
+-# CONFIG_MTD_ONENAND is not set
+-# CONFIG_MTD_NAND is not set
+-# CONFIG_MTD_SPI_NAND is not set
+-
+-#
+-# LPDDR & LPDDR2 PCM memory drivers
+-#
+-# CONFIG_MTD_LPDDR is not set
+-# CONFIG_MTD_SPI_NOR is not set
+ CONFIG_MTD_UBI=m
+-CONFIG_MTD_UBI_WL_THRESHOLD=4096
+-CONFIG_MTD_UBI_BEB_LIMIT=20
+-# CONFIG_MTD_UBI_FASTMAP is not set
+-# CONFIG_MTD_UBI_GLUEBI is not set
+-# CONFIG_MTD_UBI_BLOCK is not set
+-CONFIG_MTD_HISILICON_SFC=m
+-# CONFIG_OF is not set
+-CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
+ CONFIG_PARPORT=m
+ CONFIG_PARPORT_PC=m
+ CONFIG_PARPORT_SERIAL=m
+-# CONFIG_PARPORT_PC_FIFO is not set
+-# CONFIG_PARPORT_PC_SUPERIO is not set
+-# CONFIG_PARPORT_AX88796 is not set
+ CONFIG_PARPORT_1284=y
+-CONFIG_PARPORT_NOT_PC=y
+-CONFIG_PNP=y
+ # CONFIG_PNP_DEBUG_MESSAGES is not set
+-
+-#
+-# Protocols
+-#
+-CONFIG_PNPACPI=y
+-CONFIG_BLK_DEV=y
+ CONFIG_BLK_DEV_NULL_BLK=m
+-# CONFIG_BLK_DEV_FD is not set
+-CONFIG_CDROM=m
+-# CONFIG_PARIDE is not set
+-# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
++CONFIG_BLK_DEV_FD=m
+ CONFIG_ZRAM=m
+ CONFIG_ZRAM_WRITEBACK=y
+-# CONFIG_ZRAM_MEMORY_TRACKING is not set
+-# CONFIG_BLK_DEV_DAC960 is not set
+-# CONFIG_BLK_DEV_UMEM is not set
+ CONFIG_BLK_DEV_LOOP=m
+ CONFIG_BLK_DEV_LOOP_MIN_COUNT=0
+-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+-# CONFIG_BLK_DEV_DRBD is not set
+ CONFIG_BLK_DEV_NBD=m
+-# CONFIG_BLK_DEV_SKD is not set
+-# CONFIG_BLK_DEV_SX8 is not set
+ CONFIG_BLK_DEV_RAM=m
+-CONFIG_BLK_DEV_RAM_COUNT=16
+ CONFIG_BLK_DEV_RAM_SIZE=16384
+ CONFIG_CDROM_PKTCDVD=m
+-CONFIG_CDROM_PKTCDVD_BUFFERS=8
+-# CONFIG_CDROM_PKTCDVD_WCACHE is not set
+-# CONFIG_ATA_OVER_ETH is not set
+-CONFIG_XEN_BLKDEV_FRONTEND=m
+ CONFIG_VIRTIO_BLK=m
+-# CONFIG_VIRTIO_BLK_SCSI is not set
+ CONFIG_BLK_DEV_RBD=m
+-# CONFIG_BLK_DEV_RSXX is not set
+-
+-#
+-# NVME Support
+-#
+-CONFIG_NVME_CORE=m
+ CONFIG_BLK_DEV_NVME=m
+ CONFIG_NVME_MULTIPATH=y
+-CONFIG_NVME_FABRICS=m
+ CONFIG_NVME_RDMA=m
+ CONFIG_NVME_FC=m
+ CONFIG_NVME_TARGET=m
+@@ -2000,127 +653,26 @@
+ CONFIG_NVME_TARGET_RDMA=m
+ CONFIG_NVME_TARGET_FC=m
+ CONFIG_NVME_TARGET_FCLOOP=m
+-
+-#
+-# Misc devices
+-#
+-CONFIG_SENSORS_LIS3LV02D=m
+-# CONFIG_AD525X_DPOT is not set
+-# CONFIG_DUMMY_IRQ is not set
+-# CONFIG_IBM_ASM is not set
+-# CONFIG_PHANTOM is not set
+ CONFIG_SGI_IOC4=m
+-CONFIG_TIFM_CORE=m
+-CONFIG_TIFM_7XX1=m
+-# CONFIG_ICS932S401 is not set
+ CONFIG_ENCLOSURE_SERVICES=m
+-CONFIG_SGI_XP=m
+ CONFIG_HP_ILO=m
+-CONFIG_SGI_GRU=m
+-# CONFIG_SGI_GRU_DEBUG is not set
+ CONFIG_APDS9802ALS=m
+ CONFIG_ISL29003=m
+ CONFIG_ISL29020=m
+ CONFIG_SENSORS_TSL2550=m
+ CONFIG_SENSORS_BH1770=m
+ CONFIG_SENSORS_APDS990X=m
+-# CONFIG_HMC6352 is not set
+-# CONFIG_DS1682 is not set
+ CONFIG_VMWARE_BALLOON=m
+-# CONFIG_USB_SWITCH_FSA9480 is not set
+-# CONFIG_LATTICE_ECP3_CONFIG is not set
+-# CONFIG_SRAM is not set
+-# CONFIG_PCI_ENDPOINT_TEST is not set
+-CONFIG_MISC_RTSX=m
+-# CONFIG_C2PORT is not set
+-
+-#
+-# EEPROM support
+-#
+-# CONFIG_EEPROM_AT24 is not set
+-# CONFIG_EEPROM_AT25 is not set
+ CONFIG_EEPROM_LEGACY=m
+ CONFIG_EEPROM_MAX6875=m
+-CONFIG_EEPROM_93CX6=m
+-# CONFIG_EEPROM_93XX46 is not set
+-# CONFIG_EEPROM_IDT_89HPESX is not set
+-CONFIG_CB710_CORE=m
+-# CONFIG_CB710_DEBUG is not set
+-CONFIG_CB710_DEBUG_ASSUMPTIONS=y
+-
+-#
+-# Texas Instruments shared transport line discipline
+-#
+-# CONFIG_TI_ST is not set
+ CONFIG_SENSORS_LIS3_I2C=m
+-CONFIG_ALTERA_STAPL=m
+-CONFIG_INTEL_MEI=m
+ CONFIG_INTEL_MEI_ME=m
+-# CONFIG_INTEL_MEI_TXE is not set
+ CONFIG_VMWARE_VMCI=m
+-
+-#
+-# Intel MIC & related support
+-#
+-
+-#
+-# Intel MIC Bus Driver
+-#
+-# CONFIG_INTEL_MIC_BUS is not set
+-
+-#
+-# SCIF Bus Driver
+-#
+-# CONFIG_SCIF_BUS is not set
+-
+-#
+-# VOP Bus Driver
+-#
+-# CONFIG_VOP_BUS is not set
+-
+-#
+-# Intel MIC Host Driver
+-#
+-
+-#
+-# Intel MIC Card Driver
+-#
+-
+-#
+-# SCIF Driver
+-#
+-
+-#
+-# Intel MIC Coprocessor State Management (COSM) Drivers
+-#
+-
+-#
+-# VOP Driver
+-#
+-# CONFIG_GENWQE is not set
+-# CONFIG_ECHO is not set
+ CONFIG_MISC_RTSX_PCI=m
+ CONFIG_MISC_RTSX_USB=m
+-CONFIG_HAVE_IDE=y
+-# CONFIG_IDE is not set
+-
+-#
+-# SCSI device support
+-#
+-CONFIG_SCSI_MOD=y
+-CONFIG_RAID_ATTRS=m
+ CONFIG_SCSI=y
+-CONFIG_SCSI_DMA=y
+-CONFIG_SCSI_NETLINK=y
+-CONFIG_SCSI_MQ_DEFAULT=y
+-CONFIG_SCSI_PROC_FS=y
+-
+-#
+-# SCSI support type (disk, tape, CD-ROM)
+-#
+ CONFIG_BLK_DEV_SD=m
+ CONFIG_CHR_DEV_ST=m
+-# CONFIG_CHR_DEV_OSST is not set
+ CONFIG_BLK_DEV_SR=m
+ CONFIG_BLK_DEV_SR_VENDOR=y
+ CONFIG_CHR_DEV_SG=m
+@@ -2129,88 +681,29 @@
+ CONFIG_SCSI_CONSTANTS=y
+ CONFIG_SCSI_LOGGING=y
+ CONFIG_SCSI_SCAN_ASYNC=y
+-
+-#
+-# SCSI Transports
+-#
+-CONFIG_SCSI_SPI_ATTRS=m
+ CONFIG_SCSI_FC_ATTRS=m
+-CONFIG_SCSI_ISCSI_ATTRS=m
+-CONFIG_SCSI_SAS_ATTRS=m
+ CONFIG_SCSI_SAS_LIBSAS=m
+ CONFIG_SCSI_SAS_ATA=y
+-CONFIG_SCSI_SAS_HOST_SMP=y
+-CONFIG_SCSI_SRP_ATTRS=m
+-CONFIG_SCSI_LOWLEVEL=y
+ CONFIG_ISCSI_TCP=m
+-CONFIG_ISCSI_BOOT_SYSFS=m
+-# CONFIG_SCSI_CXGB3_ISCSI is not set
+ CONFIG_SCSI_CXGB4_ISCSI=m
+ CONFIG_SCSI_BNX2_ISCSI=m
+ CONFIG_SCSI_BNX2X_FCOE=m
+ CONFIG_BE2ISCSI=m
+-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+ CONFIG_SCSI_HPSA=m
+-# CONFIG_SCSI_3W_9XXX is not set
+-# CONFIG_SCSI_3W_SAS is not set
+-# CONFIG_SCSI_ACARD is not set
+ CONFIG_SCSI_AACRAID=m
+-# CONFIG_SCSI_AIC7XXX is not set
+-# CONFIG_SCSI_AIC79XX is not set
+-# CONFIG_SCSI_AIC94XX is not set
+-# CONFIG_SCSI_MVSAS is not set
+-# CONFIG_SCSI_MVUMI is not set
+-# CONFIG_SCSI_DPT_I2O is not set
+-# CONFIG_SCSI_ADVANSYS is not set
+-# CONFIG_SCSI_ARCMSR is not set
+-# CONFIG_SCSI_ESAS2R is not set
+-# CONFIG_MEGARAID_NEWGEN is not set
+-# CONFIG_MEGARAID_LEGACY is not set
+ CONFIG_MEGARAID_SAS=m
+-CONFIG_SCSI_MPT3SAS=m
+-CONFIG_SCSI_MPT2SAS_MAX_SGE=128
+-CONFIG_SCSI_MPT3SAS_MAX_SGE=128
+ CONFIG_SCSI_MPT2SAS=m
+ CONFIG_SCSI_SMARTPQI=m
+-# CONFIG_SCSI_UFSHCD is not set
+-# CONFIG_SCSI_HPTIOP is not set
+-# CONFIG_SCSI_BUSLOGIC is not set
+ CONFIG_VMWARE_PVSCSI=m
+-# CONFIG_XEN_SCSI_FRONTEND is not set
+-CONFIG_HYPERV_STORAGE=m
+ CONFIG_LIBFC=m
+ CONFIG_LIBFCOE=m
+-CONFIG_FCOE=m
+ CONFIG_FCOE_FNIC=m
+-# CONFIG_SCSI_SNIC is not set
+-# CONFIG_SCSI_DMX3191D is not set
+-# CONFIG_SCSI_GDTH is not set
+-# CONFIG_SCSI_ISCI is not set
+-# CONFIG_SCSI_IPS is not set
+-# CONFIG_SCSI_INITIO is not set
+-# CONFIG_SCSI_INIA100 is not set
+-# CONFIG_SCSI_PPA is not set
+-# CONFIG_SCSI_IMM is not set
+-# CONFIG_SCSI_STEX is not set
+-# CONFIG_SCSI_SYM53C8XX_2 is not set
+-# CONFIG_SCSI_IPR is not set
+-# CONFIG_SCSI_QLOGIC_1280 is not set
+ CONFIG_SCSI_QLA_FC=m
+-# CONFIG_TCM_QLA2XXX is not set
+ CONFIG_SCSI_QLA_ISCSI=m
+ CONFIG_QEDI=m
+ CONFIG_QEDF=m
+ CONFIG_SCSI_LPFC=m
+-# CONFIG_SCSI_LPFC_DEBUG_FS is not set
+-CONFIG_SCSI_HUAWEI_FC=m
+-CONFIG_SCSI_FC_HIFC=m
+-# CONFIG_SCSI_DC395x is not set
+-# CONFIG_SCSI_AM53C974 is not set
+-# CONFIG_SCSI_WD719X is not set
+ CONFIG_SCSI_DEBUG=m
+-# CONFIG_SCSI_PMCRAID is not set
+-# CONFIG_SCSI_PM8001 is not set
+-# CONFIG_SCSI_BFA_FC is not set
+ CONFIG_SCSI_VIRTIO=m
+ CONFIG_SCSI_CHELSIO_FCOE=m
+ CONFIG_SCSI_DH=y
+@@ -2218,125 +711,28 @@
+ CONFIG_SCSI_DH_HP_SW=y
+ CONFIG_SCSI_DH_EMC=y
+ CONFIG_SCSI_DH_ALUA=y
+-# CONFIG_SCSI_OSD_INITIATOR is not set
+ CONFIG_ATA=m
+-CONFIG_ATA_VERBOSE_ERROR=y
+-CONFIG_ATA_ACPI=y
+-# CONFIG_SATA_ZPODD is not set
+-CONFIG_SATA_PMP=y
+-
+-#
+-# Controllers with non-SFF native interface
+-#
+ CONFIG_SATA_AHCI=m
+-CONFIG_SATA_MOBILE_LPM_POLICY=0
+ CONFIG_SATA_AHCI_PLATFORM=m
+-# CONFIG_SATA_INIC162X is not set
+-# CONFIG_SATA_ACARD_AHCI is not set
+-# CONFIG_SATA_SIL24 is not set
+-CONFIG_ATA_SFF=y
+-
+-#
+-# SFF controllers with custom DMA interface
+-#
+-# CONFIG_PDC_ADMA is not set
+-# CONFIG_SATA_QSTOR is not set
+-# CONFIG_SATA_SX4 is not set
+-CONFIG_ATA_BMDMA=y
+-
+-#
+-# SATA SFF controllers with BMDMA
+-#
+ CONFIG_ATA_PIIX=m
+-# CONFIG_SATA_DWC is not set
+-# CONFIG_SATA_MV is not set
+-# CONFIG_SATA_NV is not set
+-# CONFIG_SATA_PROMISE is not set
+-# CONFIG_SATA_SIL is not set
+-# CONFIG_SATA_SIS is not set
+-# CONFIG_SATA_SVW is not set
+-# CONFIG_SATA_ULI is not set
+-# CONFIG_SATA_VIA is not set
+-# CONFIG_SATA_VITESSE is not set
+-
+-#
+-# PATA SFF controllers with BMDMA
+-#
+-# CONFIG_PATA_ALI is not set
+-# CONFIG_PATA_AMD is not set
+-# CONFIG_PATA_ARTOP is not set
+-# CONFIG_PATA_ATIIXP is not set
+-# CONFIG_PATA_ATP867X is not set
+-# CONFIG_PATA_CMD64X is not set
+-# CONFIG_PATA_CYPRESS is not set
+-# CONFIG_PATA_EFAR is not set
+-# CONFIG_PATA_HPT366 is not set
+-# CONFIG_PATA_HPT37X is not set
+-# CONFIG_PATA_HPT3X2N is not set
+-# CONFIG_PATA_HPT3X3 is not set
+-# CONFIG_PATA_IT8213 is not set
+-# CONFIG_PATA_IT821X is not set
+-# CONFIG_PATA_JMICRON is not set
+-# CONFIG_PATA_MARVELL is not set
+-# CONFIG_PATA_NETCELL is not set
+-# CONFIG_PATA_NINJA32 is not set
+-# CONFIG_PATA_NS87415 is not set
+-# CONFIG_PATA_OLDPIIX is not set
+-# CONFIG_PATA_OPTIDMA is not set
+-# CONFIG_PATA_PDC2027X is not set
+-# CONFIG_PATA_PDC_OLD is not set
+-# CONFIG_PATA_RADISYS is not set
+-# CONFIG_PATA_RDC is not set
+-# CONFIG_PATA_SCH is not set
+-# CONFIG_PATA_SERVERWORKS is not set
+-# CONFIG_PATA_SIL680 is not set
+-# CONFIG_PATA_SIS is not set
+-# CONFIG_PATA_TOSHIBA is not set
+-# CONFIG_PATA_TRIFLEX is not set
+-# CONFIG_PATA_VIA is not set
+-# CONFIG_PATA_WINBOND is not set
+-
+-#
+-# PIO-only SFF controllers
+-#
+-# CONFIG_PATA_CMD640_PCI is not set
+-# CONFIG_PATA_MPIIX is not set
+-# CONFIG_PATA_NS87410 is not set
+-# CONFIG_PATA_OPTI is not set
+-# CONFIG_PATA_RZ1000 is not set
+-
+-#
+-# Generic fallback / legacy drivers
+-#
+-# CONFIG_PATA_ACPI is not set
++CONFIG_PATA_ALI=m
++CONFIG_PATA_AMD=m
++CONFIG_PATA_ARTOP=m
++CONFIG_PATA_ATIIXP=m
++CONFIG_PATA_ATP867X=m
++CONFIG_PATA_CMD64X=m
++CONFIG_PATA_PCMCIA=m
+ CONFIG_ATA_GENERIC=m
+-# CONFIG_PATA_LEGACY is not set
+ CONFIG_MD=y
+ CONFIG_BLK_DEV_MD=y
+-CONFIG_MD_AUTODETECT=y
+ CONFIG_MD_LINEAR=m
+-CONFIG_MD_RAID0=m
+-CONFIG_MD_RAID1=m
+-CONFIG_MD_RAID10=m
+-CONFIG_MD_RAID456=m
+-# CONFIG_MD_MULTIPATH is not set
+ CONFIG_MD_FAULTY=m
+-# CONFIG_MD_CLUSTER is not set
+-# CONFIG_BCACHE is not set
+-CONFIG_BLK_DEV_DM_BUILTIN=y
+ CONFIG_BLK_DEV_DM=m
+-# CONFIG_DM_MQ_DEFAULT is not set
+ CONFIG_DM_DEBUG=y
+-CONFIG_DM_BUFIO=m
+-# CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING is not set
+-CONFIG_DM_BIO_PRISON=m
+-CONFIG_DM_PERSISTENT_DATA=m
+-# CONFIG_DM_UNSTRIPED is not set
+ CONFIG_DM_CRYPT=m
+ CONFIG_DM_SNAPSHOT=m
+ CONFIG_DM_THIN_PROVISIONING=m
+ CONFIG_DM_CACHE=m
+-CONFIG_DM_CACHE_SMQ=m
+ CONFIG_DM_WRITECACHE=m
+ CONFIG_DM_ERA=m
+ CONFIG_DM_MIRROR=m
+@@ -2350,7 +746,6 @@
+ CONFIG_DM_UEVENT=y
+ CONFIG_DM_FLAKEY=m
+ CONFIG_DM_VERITY=m
+-# CONFIG_DM_VERITY_FEC is not set
+ CONFIG_DM_SWITCH=m
+ CONFIG_DM_LOG_WRITES=m
+ CONFIG_DM_INTEGRITY=m
+@@ -2360,34 +755,22 @@
+ CONFIG_TCM_PSCSI=m
+ CONFIG_TCM_USER2=m
+ CONFIG_LOOPBACK_TARGET=m
+-# CONFIG_TCM_FC is not set
++CONFIG_TCM_FC=m
+ CONFIG_ISCSI_TARGET=m
+ CONFIG_ISCSI_TARGET_CXGB4=m
+-# CONFIG_SBP_TARGET is not set
++CONFIG_SBP_TARGET=m
+ CONFIG_FUSION=y
+ CONFIG_FUSION_SPI=m
+-# CONFIG_FUSION_FC is not set
+ CONFIG_FUSION_SAS=m
+-CONFIG_FUSION_MAX_SGE=128
+-# CONFIG_FUSION_CTL is not set
+ CONFIG_FUSION_LOGGING=y
+-
+-#
+-# IEEE 1394 (FireWire) support
+-#
+ CONFIG_FIREWIRE=m
+ CONFIG_FIREWIRE_OHCI=m
+ CONFIG_FIREWIRE_SBP2=m
+ CONFIG_FIREWIRE_NET=m
+-# CONFIG_FIREWIRE_NOSY is not set
+ CONFIG_MACINTOSH_DRIVERS=y
+ CONFIG_MAC_EMUMOUSEBTN=y
+-CONFIG_NETDEVICES=y
+-CONFIG_MII=m
+-CONFIG_NET_CORE=y
+ CONFIG_BONDING=m
+ CONFIG_DUMMY=m
+-# CONFIG_EQUALIZER is not set
+ CONFIG_NET_FC=y
+ CONFIG_IFB=m
+ CONFIG_NET_TEAM=m
+@@ -2400,126 +783,53 @@
+ CONFIG_MACVTAP=m
+ CONFIG_IPVLAN=m
+ CONFIG_IPVTAP=m
+-CONFIG_VXLAN=m
+ CONFIG_GENEVE=m
+-# CONFIG_GTP is not set
+ CONFIG_MACSEC=m
+ CONFIG_NETCONSOLE=m
+ CONFIG_NETCONSOLE_DYNAMIC=y
+-CONFIG_NETPOLL=y
+-CONFIG_NET_POLL_CONTROLLER=y
+ CONFIG_TUN=m
+-CONFIG_TAP=m
+-# CONFIG_TUN_VNET_CROSS_LE is not set
+ CONFIG_VETH=m
+ CONFIG_VIRTIO_NET=m
+ CONFIG_NLMON=m
+ CONFIG_NET_VRF=m
+ CONFIG_VSOCKMON=m
+-# CONFIG_ARCNET is not set
+ # CONFIG_ATM_DRIVERS is not set
+-
+-#
+-# CAIF transport drivers
+-#
+-
+-#
+-# Distributed Switch Architecture drivers
+-#
+-CONFIG_ETHERNET=y
+-CONFIG_MDIO=m
+ # CONFIG_NET_VENDOR_3COM is not set
+ # CONFIG_NET_VENDOR_ADAPTEC is not set
+ # CONFIG_NET_VENDOR_AGERE is not set
+ # CONFIG_NET_VENDOR_ALACRITECH is not set
+ # CONFIG_NET_VENDOR_ALTEON is not set
+-# CONFIG_ALTERA_TSE is not set
+-CONFIG_NET_VENDOR_AMAZON=y
+ CONFIG_ENA_ETHERNET=m
+-CONFIG_NET_VENDOR_AMD=y
+-# CONFIG_AMD8111_ETH is not set
+-# CONFIG_PCNET32 is not set
+ CONFIG_AMD_XGBE=m
+-# CONFIG_AMD_XGBE_DCB is not set
+-CONFIG_AMD_XGBE_HAVE_ECC=y
+-CONFIG_NET_VENDOR_AQUANTIA=y
+ CONFIG_AQTION=m
+ # CONFIG_NET_VENDOR_ARC is not set
+-CONFIG_NET_VENDOR_ATHEROS=y
+ CONFIG_ATL2=m
+ CONFIG_ATL1=m
+ CONFIG_ATL1E=m
+ CONFIG_ATL1C=m
+ CONFIG_ALX=m
+ # CONFIG_NET_VENDOR_AURORA is not set
+-CONFIG_NET_VENDOR_BROADCOM=y
+-# CONFIG_B44 is not set
+-# CONFIG_BCMGENET is not set
+-CONFIG_BNX2=m
+-CONFIG_CNIC=m
+ CONFIG_TIGON3=m
+-CONFIG_TIGON3_HWMON=y
+ CONFIG_BNX2X=m
+-CONFIG_BNX2X_SRIOV=y
+-# CONFIG_SYSTEMPORT is not set
+-CONFIG_BNXT=m
+-CONFIG_BNXT_SRIOV=y
+-CONFIG_BNXT_FLOWER_OFFLOAD=y
+ CONFIG_BNXT_DCB=y
+-CONFIG_BNXT_HWMON=y
+-CONFIG_NET_VENDOR_BROCADE=y
+-# CONFIG_BNA is not set
+-CONFIG_NET_VENDOR_CADENCE=y
+-# CONFIG_MACB is not set
+-CONFIG_NET_VENDOR_CAVIUM=y
+-# CONFIG_THUNDER_NIC_PF is not set
+-# CONFIG_THUNDER_NIC_VF is not set
+-# CONFIG_THUNDER_NIC_BGX is not set
+-# CONFIG_THUNDER_NIC_RGX is not set
+-CONFIG_CAVIUM_PTP=y
+ CONFIG_LIQUIDIO=m
+ CONFIG_LIQUIDIO_VF=m
+-CONFIG_NET_VENDOR_CHELSIO=y
+-# CONFIG_CHELSIO_T1 is not set
+-# CONFIG_CHELSIO_T3 is not set
+-CONFIG_CHELSIO_T4=m
+-# CONFIG_CHELSIO_T4_DCB is not set
+ CONFIG_CHELSIO_T4VF=m
+-CONFIG_CHELSIO_LIB=m
+-CONFIG_NET_VENDOR_CISCO=y
+-CONFIG_ENIC=m
+ # CONFIG_NET_VENDOR_CORTINA is not set
+-# CONFIG_CX_ECAT is not set
+ CONFIG_DNET=m
+-CONFIG_NET_VENDOR_DEC=y
+-# CONFIG_NET_TULIP is not set
+-CONFIG_NET_VENDOR_DLINK=y
+ CONFIG_DL2K=m
+-# CONFIG_SUNDANCE is not set
+-CONFIG_NET_VENDOR_EMULEX=y
+ CONFIG_BE2NET=m
+-CONFIG_BE2NET_HWMON=y
+ # CONFIG_BE2NET_BE2 is not set
+ # CONFIG_BE2NET_BE3 is not set
+-CONFIG_BE2NET_LANCER=y
+-CONFIG_BE2NET_SKYHAWK=y
+ # CONFIG_NET_VENDOR_EZCHIP is not set
+ # CONFIG_NET_VENDOR_HP is not set
++CONFIG_HINIC=m
+ # CONFIG_NET_VENDOR_I825XX is not set
+-CONFIG_NET_VENDOR_INTEL=y
+-# CONFIG_E100 is not set
+ CONFIG_E1000=m
+ CONFIG_E1000E=m
+-CONFIG_E1000E_HWTS=y
+-CONFIG_HINIC=m
+ CONFIG_IGB=m
+-CONFIG_IGB_HWMON=y
+-CONFIG_IGB_DCA=y
+ CONFIG_IGBVF=m
+-# CONFIG_IXGB is not set
+ CONFIG_IXGBE=m
+-CONFIG_IXGBE_HWMON=y
+-CONFIG_IXGBE_DCA=y
+ CONFIG_IXGBE_DCB=y
+ CONFIG_IXGBEVF=m
+ CONFIG_I40E=m
+@@ -2527,96 +837,40 @@
+ CONFIG_I40EVF=m
+ CONFIG_ICE=m
+ CONFIG_FM10K=m
+-# CONFIG_JME is not set
+ # CONFIG_NET_VENDOR_MARVELL is not set
+-CONFIG_NET_VENDOR_MELLANOX=y
+ CONFIG_MLX4_EN=m
+-CONFIG_MLX4_EN_DCB=y
+-CONFIG_MLX4_CORE=m
+-CONFIG_MLX4_DEBUG=y
+ # CONFIG_MLX4_CORE_GEN2 is not set
+ CONFIG_MLX5_CORE=m
+-CONFIG_MLX5_ACCEL=y
+ CONFIG_MLX5_FPGA=y
+ CONFIG_MLX5_CORE_EN=y
+-CONFIG_MLX5_EN_ARFS=y
+-CONFIG_MLX5_EN_RXNFC=y
+-CONFIG_MLX5_MPFS=y
+-CONFIG_MLX5_ESWITCH=y
+-CONFIG_MLX5_CORE_EN_DCB=y
+ CONFIG_MLX5_CORE_IPOIB=y
+ CONFIG_MLX5_EN_IPSEC=y
+-# CONFIG_MLX5_EN_TLS is not set
+ CONFIG_MLXSW_CORE=m
+-CONFIG_MLXSW_CORE_HWMON=y
+-CONFIG_MLXSW_CORE_THERMAL=y
+-CONFIG_MLXSW_PCI=m
+-CONFIG_MLXSW_I2C=m
+-CONFIG_MLXSW_SWITCHIB=m
+-CONFIG_MLXSW_SWITCHX2=m
+-CONFIG_MLXSW_SPECTRUM=m
+-CONFIG_MLXSW_SPECTRUM_DCB=y
+-CONFIG_MLXSW_MINIMAL=m
+-CONFIG_MLXFW=m
+ # CONFIG_NET_VENDOR_MICREL is not set
+ # CONFIG_NET_VENDOR_MICROCHIP is not set
+ # CONFIG_NET_VENDOR_MICROSEMI is not set
+-CONFIG_NET_VENDOR_MYRI=y
+ CONFIG_MYRI10GE=m
+-CONFIG_MYRI10GE_DCA=y
+-# CONFIG_FEALNX is not set
+ # CONFIG_NET_VENDOR_NATSEMI is not set
+-CONFIG_NET_VENDOR_NETERION=y
+-# CONFIG_S2IO is not set
+-# CONFIG_VXGE is not set
+-CONFIG_NET_VENDOR_NETRONOME=y
+ CONFIG_NFP=m
+-CONFIG_NFP_APP_FLOWER=y
+-CONFIG_NFP_APP_ABM_NIC=y
+-# CONFIG_NFP_DEBUG is not set
+ # CONFIG_NET_VENDOR_NI is not set
+ # CONFIG_NET_VENDOR_NVIDIA is not set
+-CONFIG_NET_VENDOR_OKI=y
+ CONFIG_ETHOC=m
+-CONFIG_NET_VENDOR_PACKET_ENGINES=y
+-# CONFIG_HAMACHI is not set
+-# CONFIG_YELLOWFIN is not set
+-CONFIG_NET_VENDOR_QLOGIC=y
+ CONFIG_QLA3XXX=m
+-# CONFIG_QLCNIC is not set
+-# CONFIG_QLGE is not set
+ CONFIG_NETXEN_NIC=m
+ CONFIG_QED=m
+-CONFIG_QED_LL2=y
+-CONFIG_QED_SRIOV=y
+ CONFIG_QEDE=m
+-CONFIG_QED_RDMA=y
+-CONFIG_QED_ISCSI=y
+-CONFIG_QED_FCOE=y
+-CONFIG_QED_OOO=y
+ # CONFIG_NET_VENDOR_QUALCOMM is not set
+ # CONFIG_NET_VENDOR_RDC is not set
+-CONFIG_NET_VENDOR_REALTEK=y
+-# CONFIG_ATP is not set
+ CONFIG_8139CP=m
+ CONFIG_8139TOO=m
+ # CONFIG_8139TOO_PIO is not set
+-# CONFIG_8139TOO_TUNE_TWISTER is not set
+ CONFIG_8139TOO_8129=y
+-# CONFIG_8139_OLD_RX_RESET is not set
+ CONFIG_R8169=m
+ # CONFIG_NET_VENDOR_RENESAS is not set
+-CONFIG_NET_VENDOR_ROCKER=y
+ CONFIG_ROCKER=m
+ # CONFIG_NET_VENDOR_SAMSUNG is not set
+ # CONFIG_NET_VENDOR_SEEQ is not set
+-CONFIG_NET_VENDOR_SOLARFLARE=y
+ CONFIG_SFC=m
+-CONFIG_SFC_MTD=y
+-CONFIG_SFC_MCDI_MON=y
+-CONFIG_SFC_SRIOV=y
+-CONFIG_SFC_MCDI_LOGGING=y
+-# CONFIG_SFC_FALCON is not set
+ # CONFIG_NET_VENDOR_SILAN is not set
+ # CONFIG_NET_VENDOR_SIS is not set
+ # CONFIG_NET_VENDOR_SMSC is not set
+@@ -2628,31 +882,16 @@
+ # CONFIG_NET_VENDOR_TI is not set
+ # CONFIG_NET_VENDOR_VIA is not set
+ # CONFIG_NET_VENDOR_WIZNET is not set
+-# CONFIG_FDDI is not set
+-# CONFIG_HIPPI is not set
+-# CONFIG_NET_SB1000 is not set
+-CONFIG_MDIO_DEVICE=y
+-CONFIG_MDIO_BUS=y
+-# CONFIG_MDIO_BCM_UNIMAC is not set
+ CONFIG_MDIO_BITBANG=m
+-CONFIG_MDIO_CAVIUM=m
+-# CONFIG_MDIO_GPIO is not set
+ CONFIG_MDIO_MSCC_MIIM=m
+ CONFIG_MDIO_THUNDER=m
+ CONFIG_PHYLIB=y
+-CONFIG_SWPHY=y
+ CONFIG_LED_TRIGGER_PHY=y
+-
+-#
+-# MII PHY device drivers
+-#
+ CONFIG_AMD_PHY=m
+ CONFIG_AQUANTIA_PHY=m
+-# CONFIG_AX88796B_PHY is not set
+ CONFIG_AT803X_PHY=m
+ CONFIG_BCM7XXX_PHY=m
+ CONFIG_BCM87XX_PHY=m
+-CONFIG_BCM_NET_PHYLIB=m
+ CONFIG_BROADCOM_PHY=m
+ CONFIG_CICADA_PHY=m
+ CONFIG_CORTINA_PHY=m
+@@ -2669,12 +908,10 @@
+ CONFIG_MARVELL_PHY=m
+ CONFIG_MARVELL_10G_PHY=m
+ CONFIG_MICREL_PHY=m
+-CONFIG_MICROCHIP_PHY=m
+ CONFIG_MICROCHIP_T1_PHY=m
+ CONFIG_MICROSEMI_PHY=m
+ CONFIG_NATIONAL_PHY=m
+ CONFIG_QSEMI_PHY=m
+-CONFIG_REALTEK_PHY=m
+ CONFIG_RENESAS_PHY=m
+ CONFIG_ROCKCHIP_PHY=m
+ CONFIG_SMSC_PHY=m
+@@ -2683,7 +920,7 @@
+ CONFIG_VITESSE_PHY=m
+ CONFIG_XILINX_GMII2RGMII=m
+ CONFIG_MICREL_KS8995MA=m
+-# CONFIG_PLIP is not set
++CONFIG_PLIP=m
+ CONFIG_PPP=m
+ CONFIG_PPP_BSDCOMP=m
+ CONFIG_PPP_DEFLATE=m
+@@ -2697,11 +934,8 @@
+ CONFIG_PPP_ASYNC=m
+ CONFIG_PPP_SYNC_TTY=m
+ CONFIG_SLIP=m
+-CONFIG_SLHC=m
+ CONFIG_SLIP_COMPRESSED=y
+ CONFIG_SLIP_SMART=y
+-# CONFIG_SLIP_MODE_SLIP6 is not set
+-CONFIG_USB_NET_DRIVERS=y
+ CONFIG_USB_CATC=m
+ CONFIG_USB_KAWETH=m
+ CONFIG_USB_PEGASUS=m
+@@ -2709,32 +943,20 @@
+ CONFIG_USB_RTL8152=m
+ CONFIG_USB_LAN78XX=m
+ CONFIG_USB_USBNET=m
+-CONFIG_USB_NET_AX8817X=m
+-CONFIG_USB_NET_AX88179_178A=m
+-CONFIG_USB_NET_CDCETHER=m
+ CONFIG_USB_NET_CDC_EEM=m
+-CONFIG_USB_NET_CDC_NCM=m
+ CONFIG_USB_NET_HUAWEI_CDC_NCM=m
+ CONFIG_USB_NET_CDC_MBIM=m
+ CONFIG_USB_NET_DM9601=m
+-# CONFIG_USB_NET_SR9700 is not set
+-# CONFIG_USB_NET_SR9800 is not set
+ CONFIG_USB_NET_SMSC75XX=m
+ CONFIG_USB_NET_SMSC95XX=m
+ CONFIG_USB_NET_GL620A=m
+-CONFIG_USB_NET_NET1080=m
+ CONFIG_USB_NET_PLUSB=m
+ CONFIG_USB_NET_MCS7830=m
+ CONFIG_USB_NET_RNDIS_HOST=m
+-CONFIG_USB_NET_CDC_SUBSET_ENABLE=m
+-CONFIG_USB_NET_CDC_SUBSET=m
+ CONFIG_USB_ALI_M5632=y
+ CONFIG_USB_AN2720=y
+-CONFIG_USB_BELKIN=y
+-CONFIG_USB_ARMLINUX=y
+ CONFIG_USB_EPSON2888=y
+ CONFIG_USB_KC2190=y
+-CONFIG_USB_NET_ZAURUS=m
+ CONFIG_USB_NET_CX82310_ETH=m
+ CONFIG_USB_NET_KALMIA=m
+ CONFIG_USB_NET_QMI_WWAN=m
+@@ -2744,130 +966,44 @@
+ CONFIG_USB_SIERRA_NET=m
+ CONFIG_USB_VL600=m
+ CONFIG_USB_NET_CH9200=m
+-CONFIG_WLAN=y
+ # CONFIG_WLAN_VENDOR_ADMTEK is not set
+-CONFIG_ATH_COMMON=m
+-CONFIG_WLAN_VENDOR_ATH=y
+-# CONFIG_ATH_DEBUG is not set
+-# CONFIG_ATH5K is not set
+-# CONFIG_ATH5K_PCI is not set
+-CONFIG_ATH9K_HW=m
+-CONFIG_ATH9K_COMMON=m
+-CONFIG_ATH9K_COMMON_DEBUG=y
+-CONFIG_ATH9K_BTCOEX_SUPPORT=y
+ CONFIG_ATH9K=m
+-CONFIG_ATH9K_PCI=y
+ CONFIG_ATH9K_AHB=y
+ CONFIG_ATH9K_DEBUGFS=y
+-# CONFIG_ATH9K_STATION_STATISTICS is not set
+-# CONFIG_ATH9K_DYNACK is not set
+ CONFIG_ATH9K_WOW=y
+-CONFIG_ATH9K_RFKILL=y
+-# CONFIG_ATH9K_CHANNEL_CONTEXT is not set
+-CONFIG_ATH9K_PCOEM=y
+ CONFIG_ATH9K_HTC=m
+-# CONFIG_ATH9K_HTC_DEBUGFS is not set
+-# CONFIG_ATH9K_HWRNG is not set
+-# CONFIG_ATH9K_COMMON_SPECTRAL is not set
+-# CONFIG_CARL9170 is not set
+-# CONFIG_ATH6KL is not set
+-# CONFIG_AR5523 is not set
+-# CONFIG_WIL6210 is not set
+ CONFIG_ATH10K=m
+-CONFIG_ATH10K_CE=y
+ CONFIG_ATH10K_PCI=m
+-# CONFIG_ATH10K_SDIO is not set
+-# CONFIG_ATH10K_USB is not set
+-# CONFIG_ATH10K_DEBUG is not set
+ CONFIG_ATH10K_DEBUGFS=y
+-# CONFIG_ATH10K_SPECTRAL is not set
+-# CONFIG_ATH10K_TRACING is not set
+-# CONFIG_WCN36XX is not set
+-# CONFIG_WLAN_VENDOR_ATMEL is not set
+-CONFIG_WLAN_VENDOR_BROADCOM=y
+-# CONFIG_B43 is not set
+-# CONFIG_B43LEGACY is not set
+-CONFIG_BRCMUTIL=m
++CONFIG_ATMEL=m
++CONFIG_PCI_ATMEL=m
++CONFIG_PCMCIA_ATMEL=m
+ CONFIG_BRCMSMAC=m
+ CONFIG_BRCMFMAC=m
+-CONFIG_BRCMFMAC_PROTO_BCDC=y
+-CONFIG_BRCMFMAC_PROTO_MSGBUF=y
+-CONFIG_BRCMFMAC_SDIO=y
+ CONFIG_BRCMFMAC_USB=y
+ CONFIG_BRCMFMAC_PCIE=y
+-# CONFIG_BRCM_TRACING is not set
+-# CONFIG_BRCMDBG is not set
+ # CONFIG_WLAN_VENDOR_CISCO is not set
+-CONFIG_WLAN_VENDOR_INTEL=y
+-# CONFIG_IPW2100 is not set
+-# CONFIG_IPW2200 is not set
+-# CONFIG_IWL4965 is not set
+-# CONFIG_IWL3945 is not set
+ CONFIG_IWLWIFI=m
+-CONFIG_IWLWIFI_LEDS=y
+ CONFIG_IWLDVM=m
+ CONFIG_IWLMVM=m
+-CONFIG_IWLWIFI_OPMODE_MODULAR=y
+-# CONFIG_IWLWIFI_BCAST_FILTERING is not set
+-
+-#
+-# Debugging Options
+-#
+-# CONFIG_IWLWIFI_DEBUG is not set
+ CONFIG_IWLWIFI_DEBUGFS=y
+ # CONFIG_IWLWIFI_DEVICE_TRACING is not set
+ # CONFIG_WLAN_VENDOR_INTERSIL is not set
+-CONFIG_WLAN_VENDOR_MARVELL=y
+-# CONFIG_LIBERTAS is not set
+-# CONFIG_LIBERTAS_THINFIRM is not set
+ CONFIG_MWIFIEX=m
+ CONFIG_MWIFIEX_SDIO=m
+ CONFIG_MWIFIEX_PCIE=m
+ CONFIG_MWIFIEX_USB=m
+-# CONFIG_MWL8K is not set
+-CONFIG_WLAN_VENDOR_MEDIATEK=y
+ CONFIG_MT7601U=m
+-CONFIG_MT76_CORE=m
+-CONFIG_MT76_LEDS=y
+-CONFIG_MT76_USB=m
+-CONFIG_MT76x2_COMMON=m
+ CONFIG_MT76x0U=m
+-# CONFIG_MT76x2E is not set
+ CONFIG_MT76x2U=m
+-CONFIG_WLAN_VENDOR_RALINK=y
+ CONFIG_RT2X00=m
+-# CONFIG_RT2400PCI is not set
+-# CONFIG_RT2500PCI is not set
+-# CONFIG_RT61PCI is not set
+ CONFIG_RT2800PCI=m
+-CONFIG_RT2800PCI_RT33XX=y
+-CONFIG_RT2800PCI_RT35XX=y
+-CONFIG_RT2800PCI_RT53XX=y
+-CONFIG_RT2800PCI_RT3290=y
+-# CONFIG_RT2500USB is not set
+-# CONFIG_RT73USB is not set
+ CONFIG_RT2800USB=m
+-CONFIG_RT2800USB_RT33XX=y
+-CONFIG_RT2800USB_RT35XX=y
+ CONFIG_RT2800USB_RT3573=y
+ CONFIG_RT2800USB_RT53XX=y
+ CONFIG_RT2800USB_RT55XX=y
+ CONFIG_RT2800USB_UNKNOWN=y
+-CONFIG_RT2800_LIB=m
+-CONFIG_RT2800_LIB_MMIO=m
+-CONFIG_RT2X00_LIB_MMIO=m
+-CONFIG_RT2X00_LIB_PCI=m
+-CONFIG_RT2X00_LIB_USB=m
+-CONFIG_RT2X00_LIB=m
+-CONFIG_RT2X00_LIB_FIRMWARE=y
+-CONFIG_RT2X00_LIB_CRYPTO=y
+-CONFIG_RT2X00_LIB_LEDS=y
+ CONFIG_RT2X00_LIB_DEBUGFS=y
+-# CONFIG_RT2X00_DEBUG is not set
+-CONFIG_WLAN_VENDOR_REALTEK=y
+-# CONFIG_RTL8180 is not set
+-# CONFIG_RTL8187 is not set
+-CONFIG_RTL_CARDS=m
+ CONFIG_RTL8192CE=m
+ CONFIG_RTL8192SE=m
+ CONFIG_RTL8192DE=m
+@@ -2877,91 +1013,34 @@
+ CONFIG_RTL8192EE=m
+ CONFIG_RTL8821AE=m
+ CONFIG_RTL8192CU=m
+-CONFIG_RTLWIFI=m
+-CONFIG_RTLWIFI_PCI=m
+-CONFIG_RTLWIFI_USB=m
+ # CONFIG_RTLWIFI_DEBUG is not set
+-CONFIG_RTL8192C_COMMON=m
+-CONFIG_RTL8723_COMMON=m
+-CONFIG_RTLBTCOEXIST=m
+ CONFIG_RTL8XXXU=m
+-# CONFIG_RTL8XXXU_UNTESTED is not set
+ # CONFIG_WLAN_VENDOR_RSI is not set
+ # CONFIG_WLAN_VENDOR_ST is not set
+ # CONFIG_WLAN_VENDOR_TI is not set
+ # CONFIG_WLAN_VENDOR_ZYDAS is not set
+-CONFIG_WLAN_VENDOR_QUANTENNA=y
+-# CONFIG_QTNFMAC_PEARL_PCIE is not set
+ CONFIG_MAC80211_HWSIM=m
+-# CONFIG_USB_NET_RNDIS_WLAN is not set
+-
+-#
+-# Enable WiMAX (Networking options) to see the WiMAX drivers
+-#
+ CONFIG_WAN=y
+-# CONFIG_LANMEDIA is not set
+ CONFIG_HDLC=m
+ CONFIG_HDLC_RAW=m
+-# CONFIG_HDLC_RAW_ETH is not set
+ CONFIG_HDLC_CISCO=m
+ CONFIG_HDLC_FR=m
+ CONFIG_HDLC_PPP=m
+-
+-#
+-# X.25/LAPB support is disabled
+-#
+-# CONFIG_PCI200SYN is not set
+-# CONFIG_WANXL is not set
+-# CONFIG_PC300TOO is not set
+-# CONFIG_FARSYNC is not set
+-# CONFIG_DSCC4 is not set
+ CONFIG_DLCI=m
+-CONFIG_DLCI_MAX=8
+-# CONFIG_SBNI is not set
+-CONFIG_IEEE802154_DRIVERS=m
+ CONFIG_IEEE802154_FAKELB=m
+-# CONFIG_IEEE802154_AT86RF230 is not set
+-# CONFIG_IEEE802154_MRF24J40 is not set
+-# CONFIG_IEEE802154_CC2520 is not set
+-# CONFIG_IEEE802154_ATUSB is not set
+-# CONFIG_IEEE802154_ADF7242 is not set
+-# CONFIG_IEEE802154_CA8210 is not set
+-# CONFIG_IEEE802154_MCR20A is not set
+-# CONFIG_IEEE802154_HWSIM is not set
+-CONFIG_XEN_NETDEV_FRONTEND=m
+ CONFIG_VMXNET3=m
+ CONFIG_FUJITSU_ES=m
+ CONFIG_THUNDERBOLT_NET=m
+-CONFIG_HYPERV_NET=m
+ CONFIG_NETDEVSIM=m
+-CONFIG_NET_FAILOVER=m
+ CONFIG_ISDN=y
+-CONFIG_ISDN_I4L=m
+ CONFIG_ISDN_PPP=y
+ CONFIG_ISDN_PPP_VJ=y
+ CONFIG_ISDN_MPP=y
+ CONFIG_IPPP_FILTER=y
+-# CONFIG_ISDN_PPP_BSDCOMP is not set
+ CONFIG_ISDN_AUDIO=y
+ CONFIG_ISDN_TTY_FAX=y
+-
+-#
+-# ISDN feature submodules
+-#
+ CONFIG_ISDN_DIVERSION=m
+-
+-#
+-# ISDN4Linux hardware drivers
+-#
+-
+-#
+-# Passive cards
+-#
+ CONFIG_ISDN_DRV_HISAX=m
+-
+-#
+-# D-channel protocol features
+-#
+ CONFIG_HISAX_EURO=y
+ CONFIG_DE_AOC=y
+ CONFIG_HISAX_NO_SENDCOMPLETE=y
+@@ -2969,11 +1048,6 @@
+ CONFIG_HISAX_NO_KEYPAD=y
+ CONFIG_HISAX_1TR6=y
+ CONFIG_HISAX_NI1=y
+-CONFIG_HISAX_MAX_CARDS=8
+-
+-#
+-# HiSax supported cards
+-#
+ CONFIG_HISAX_16_3=y
+ CONFIG_HISAX_TELESPCI=y
+ CONFIG_HISAX_S0BOX=y
+@@ -2992,17 +1066,7 @@
+ CONFIG_HISAX_W6692=y
+ CONFIG_HISAX_HFC_SX=y
+ CONFIG_HISAX_ENTERNOW_PCI=y
+-# CONFIG_HISAX_DEBUG is not set
+-
+-#
+-# HiSax PCMCIA card service modules
+-#
+-
+-#
+-# HiSax sub driver modules
+-#
+ CONFIG_HISAX_ST5481=m
+-# CONFIG_HISAX_HFCUSB is not set
+ CONFIG_HISAX_HFC4S8S=m
+ CONFIG_HISAX_FRITZ_PCIPNP=m
+ CONFIG_ISDN_CAPI=m
+@@ -3011,31 +1075,20 @@
+ CONFIG_ISDN_CAPI_MIDDLEWARE=y
+ CONFIG_ISDN_CAPI_CAPIDRV=m
+ CONFIG_ISDN_CAPI_CAPIDRV_VERBOSE=y
+-
+-#
+-# CAPI hardware drivers
+-#
+ CONFIG_CAPI_AVM=y
+ CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+ CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+ CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+ CONFIG_ISDN_DRV_AVMB1_C4=m
+-# CONFIG_CAPI_EICON is not set
+ CONFIG_ISDN_DRV_GIGASET=m
+-CONFIG_GIGASET_CAPI=y
+ CONFIG_GIGASET_BASE=m
+ CONFIG_GIGASET_M105=m
+ CONFIG_GIGASET_M101=m
+-# CONFIG_GIGASET_DEBUG is not set
+ CONFIG_HYSDN=m
+ CONFIG_HYSDN_CAPI=y
+ CONFIG_MISDN=m
+ CONFIG_MISDN_DSP=m
+ CONFIG_MISDN_L1OIP=m
+-
+-#
+-# mISDN hardware drivers
+-#
+ CONFIG_MISDN_HFCPCI=m
+ CONFIG_MISDN_HFCMULTI=m
+ CONFIG_MISDN_HFCUSB=m
+@@ -3044,314 +1097,81 @@
+ CONFIG_MISDN_INFINEON=m
+ CONFIG_MISDN_W6692=m
+ CONFIG_MISDN_NETJET=m
+-CONFIG_MISDN_IPAC=m
+-CONFIG_MISDN_ISAR=m
+-CONFIG_ISDN_HDLC=m
+-# CONFIG_NVM is not set
+-
+-#
+-# Input device support
+-#
+-CONFIG_INPUT=y
+-CONFIG_INPUT_LEDS=y
+-CONFIG_INPUT_FF_MEMLESS=m
+-CONFIG_INPUT_POLLDEV=m
+-CONFIG_INPUT_SPARSEKMAP=m
+-# CONFIG_INPUT_MATRIXKMAP is not set
+-
+-#
+-# Userland interfaces
+-#
+ CONFIG_INPUT_MOUSEDEV=y
+-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+ CONFIG_INPUT_JOYDEV=m
+ CONFIG_INPUT_EVDEV=y
+-# CONFIG_INPUT_EVBUG is not set
+-
+-#
+-# Input Device Drivers
+-#
+-CONFIG_INPUT_KEYBOARD=y
+-# CONFIG_KEYBOARD_ADC is not set
+-# CONFIG_KEYBOARD_ADP5588 is not set
+-# CONFIG_KEYBOARD_ADP5589 is not set
+-CONFIG_KEYBOARD_ATKBD=y
+-# CONFIG_KEYBOARD_QT1070 is not set
+-# CONFIG_KEYBOARD_QT2160 is not set
+-# CONFIG_KEYBOARD_DLINK_DIR685 is not set
+-# CONFIG_KEYBOARD_LKKBD is not set
+-# CONFIG_KEYBOARD_GPIO is not set
+-# CONFIG_KEYBOARD_GPIO_POLLED is not set
+-# CONFIG_KEYBOARD_TCA6416 is not set
+-# CONFIG_KEYBOARD_TCA8418 is not set
+-# CONFIG_KEYBOARD_MATRIX is not set
+-# CONFIG_KEYBOARD_LM8323 is not set
+-# CONFIG_KEYBOARD_LM8333 is not set
+-# CONFIG_KEYBOARD_MAX7359 is not set
+-# CONFIG_KEYBOARD_MCS is not set
+-# CONFIG_KEYBOARD_MPR121 is not set
+-# CONFIG_KEYBOARD_NEWTON is not set
+-# CONFIG_KEYBOARD_OPENCORES is not set
+-# CONFIG_KEYBOARD_SAMSUNG is not set
+-# CONFIG_KEYBOARD_STOWAWAY is not set
+-# CONFIG_KEYBOARD_SUNKBD is not set
+-# CONFIG_KEYBOARD_TM2_TOUCHKEY is not set
+-# CONFIG_KEYBOARD_XTKBD is not set
+-CONFIG_INPUT_MOUSE=y
+-CONFIG_MOUSE_PS2=y
+-CONFIG_MOUSE_PS2_ALPS=y
+-CONFIG_MOUSE_PS2_BYD=y
+-CONFIG_MOUSE_PS2_LOGIPS2PP=y
+-CONFIG_MOUSE_PS2_SYNAPTICS=y
+-CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS=y
+-CONFIG_MOUSE_PS2_CYPRESS=y
+-CONFIG_MOUSE_PS2_LIFEBOOK=y
+-CONFIG_MOUSE_PS2_TRACKPOINT=y
++CONFIG_KEYBOARD_GPIO=m
++CONFIG_KEYBOARD_GPIO_POLLED=m
++CONFIG_MOUSE_PS2=m
+ CONFIG_MOUSE_PS2_ELANTECH=y
+-CONFIG_MOUSE_PS2_ELANTECH_SMBUS=y
+ CONFIG_MOUSE_PS2_SENTELIC=y
+-# CONFIG_MOUSE_PS2_TOUCHKIT is not set
+-CONFIG_MOUSE_PS2_FOCALTECH=y
+ CONFIG_MOUSE_PS2_VMMOUSE=y
+-CONFIG_MOUSE_PS2_SMBUS=y
+ CONFIG_MOUSE_SERIAL=m
+ CONFIG_MOUSE_APPLETOUCH=m
+ CONFIG_MOUSE_BCM5974=m
+ CONFIG_MOUSE_CYAPA=m
+ CONFIG_MOUSE_ELAN_I2C=m
+-CONFIG_MOUSE_ELAN_I2C_I2C=y
+ CONFIG_MOUSE_ELAN_I2C_SMBUS=y
+ CONFIG_MOUSE_VSXXXAA=m
+-# CONFIG_MOUSE_GPIO is not set
+ CONFIG_MOUSE_SYNAPTICS_I2C=m
+ CONFIG_MOUSE_SYNAPTICS_USB=m
+-# CONFIG_INPUT_JOYSTICK is not set
+ CONFIG_INPUT_TABLET=y
+ CONFIG_TABLET_USB_ACECAD=m
+ CONFIG_TABLET_USB_AIPTEK=m
+ CONFIG_TABLET_USB_GTCO=m
+-# CONFIG_TABLET_USB_HANWANG is not set
+ CONFIG_TABLET_USB_KBTAB=m
+-# CONFIG_TABLET_USB_PEGASUS is not set
+ CONFIG_TABLET_SERIAL_WACOM4=m
+ CONFIG_INPUT_TOUCHSCREEN=y
+-CONFIG_TOUCHSCREEN_PROPERTIES=y
+-# CONFIG_TOUCHSCREEN_ADS7846 is not set
+-# CONFIG_TOUCHSCREEN_AD7877 is not set
+-# CONFIG_TOUCHSCREEN_AD7879 is not set
+-# CONFIG_TOUCHSCREEN_ADC is not set
+-# CONFIG_TOUCHSCREEN_ATMEL_MXT is not set
+-# CONFIG_TOUCHSCREEN_AUO_PIXCIR is not set
+-# CONFIG_TOUCHSCREEN_BU21013 is not set
+-# CONFIG_TOUCHSCREEN_BU21029 is not set
+-# CONFIG_TOUCHSCREEN_CHIPONE_ICN8505 is not set
+-# CONFIG_TOUCHSCREEN_CY8CTMG110 is not set
+-# CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set
+-# CONFIG_TOUCHSCREEN_CYTTSP4_CORE is not set
+-# CONFIG_TOUCHSCREEN_DYNAPRO is not set
+-# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
+-# CONFIG_TOUCHSCREEN_EETI is not set
+-# CONFIG_TOUCHSCREEN_EGALAX_SERIAL is not set
+-# CONFIG_TOUCHSCREEN_EXC3000 is not set
+-# CONFIG_TOUCHSCREEN_FUJITSU is not set
+-# CONFIG_TOUCHSCREEN_GOODIX is not set
+-# CONFIG_TOUCHSCREEN_HIDEEP is not set
+-# CONFIG_TOUCHSCREEN_ILI210X is not set
+-# CONFIG_TOUCHSCREEN_S6SY761 is not set
+-# CONFIG_TOUCHSCREEN_GUNZE is not set
+-# CONFIG_TOUCHSCREEN_EKTF2127 is not set
+-# CONFIG_TOUCHSCREEN_ELAN is not set
+ CONFIG_TOUCHSCREEN_ELO=m
+ CONFIG_TOUCHSCREEN_WACOM_W8001=m
+ CONFIG_TOUCHSCREEN_WACOM_I2C=m
+-# CONFIG_TOUCHSCREEN_MAX11801 is not set
+-# CONFIG_TOUCHSCREEN_MCS5000 is not set
+-# CONFIG_TOUCHSCREEN_MMS114 is not set
+-# CONFIG_TOUCHSCREEN_MELFAS_MIP4 is not set
+-# CONFIG_TOUCHSCREEN_MTOUCH is not set
+-# CONFIG_TOUCHSCREEN_INEXIO is not set
+-# CONFIG_TOUCHSCREEN_MK712 is not set
+-# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+-# CONFIG_TOUCHSCREEN_EDT_FT5X06 is not set
+-# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+-# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+-# CONFIG_TOUCHSCREEN_PIXCIR is not set
+-# CONFIG_TOUCHSCREEN_WDT87XX_I2C is not set
+-# CONFIG_TOUCHSCREEN_WM97XX is not set
+-# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+-# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+-# CONFIG_TOUCHSCREEN_TSC_SERIO is not set
+-# CONFIG_TOUCHSCREEN_TSC2004 is not set
+-# CONFIG_TOUCHSCREEN_TSC2005 is not set
+-# CONFIG_TOUCHSCREEN_TSC2007 is not set
+-# CONFIG_TOUCHSCREEN_RM_TS is not set
+-# CONFIG_TOUCHSCREEN_SILEAD is not set
+-# CONFIG_TOUCHSCREEN_SIS_I2C is not set
+-# CONFIG_TOUCHSCREEN_ST1232 is not set
+-# CONFIG_TOUCHSCREEN_STMFTS is not set
+-# CONFIG_TOUCHSCREEN_SUR40 is not set
+-# CONFIG_TOUCHSCREEN_SURFACE3_SPI is not set
+-# CONFIG_TOUCHSCREEN_SX8654 is not set
+-# CONFIG_TOUCHSCREEN_TPS6507X is not set
+-# CONFIG_TOUCHSCREEN_ZET6223 is not set
+-# CONFIG_TOUCHSCREEN_ZFORCE is not set
+-# CONFIG_TOUCHSCREEN_ROHM_BU21023 is not set
+ CONFIG_INPUT_MISC=y
+-# CONFIG_INPUT_AD714X is not set
+-# CONFIG_INPUT_BMA150 is not set
+-# CONFIG_INPUT_E3X0_BUTTON is not set
+ CONFIG_INPUT_PCSPKR=m
+-# CONFIG_INPUT_MMA8450 is not set
+ CONFIG_INPUT_APANEL=m
+ CONFIG_INPUT_GP2A=m
+-# CONFIG_INPUT_GPIO_BEEPER is not set
+-# CONFIG_INPUT_GPIO_DECODER is not set
+ CONFIG_INPUT_ATLAS_BTNS=m
+ CONFIG_INPUT_ATI_REMOTE2=m
+ CONFIG_INPUT_KEYSPAN_REMOTE=m
+-# CONFIG_INPUT_KXTJ9 is not set
+ CONFIG_INPUT_POWERMATE=m
+ CONFIG_INPUT_YEALINK=m
+ CONFIG_INPUT_CM109=m
+ CONFIG_INPUT_UINPUT=m
+-# CONFIG_INPUT_PCF8574 is not set
+-# CONFIG_INPUT_PWM_BEEPER is not set
+-# CONFIG_INPUT_PWM_VIBRA is not set
+ CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
+-# CONFIG_INPUT_ADXL34X is not set
+-# CONFIG_INPUT_IMS_PCU is not set
+-# CONFIG_INPUT_CMA3000 is not set
+-CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+-# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set
+-# CONFIG_INPUT_DRV260X_HAPTICS is not set
+-# CONFIG_INPUT_DRV2665_HAPTICS is not set
+-# CONFIG_INPUT_DRV2667_HAPTICS is not set
+-CONFIG_RMI4_CORE=m
+ CONFIG_RMI4_I2C=m
+ CONFIG_RMI4_SPI=m
+ CONFIG_RMI4_SMB=m
+-CONFIG_RMI4_F03=y
+-CONFIG_RMI4_F03_SERIO=m
+-CONFIG_RMI4_2D_SENSOR=y
+-CONFIG_RMI4_F11=y
+-CONFIG_RMI4_F12=y
+-CONFIG_RMI4_F30=y
+ CONFIG_RMI4_F34=y
+-# CONFIG_RMI4_F54 is not set
+ CONFIG_RMI4_F55=y
+-
+-#
+-# Hardware I/O ports
+-#
+-CONFIG_SERIO=y
+-CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
+-CONFIG_SERIO_I8042=y
+-CONFIG_SERIO_SERPORT=y
+-# CONFIG_SERIO_CT82C710 is not set
+-# CONFIG_SERIO_PARKBD is not set
+-# CONFIG_SERIO_PCIPS2 is not set
+-CONFIG_SERIO_LIBPS2=y
+ CONFIG_SERIO_RAW=m
+ CONFIG_SERIO_ALTERA_PS2=m
+-# CONFIG_SERIO_PS2MULT is not set
+ CONFIG_SERIO_ARC_PS2=m
+-CONFIG_HYPERV_KEYBOARD=m
+-# CONFIG_SERIO_GPIO_PS2 is not set
+-# CONFIG_USERIO is not set
+-# CONFIG_GAMEPORT is not set
+-
+-#
+-# Character devices
+-#
+-CONFIG_TTY=y
+-CONFIG_VT=y
+-CONFIG_CONSOLE_TRANSLATIONS=y
+-CONFIG_VT_CONSOLE=y
+-CONFIG_VT_CONSOLE_SLEEP=y
+-CONFIG_HW_CONSOLE=y
+-CONFIG_VT_HW_CONSOLE_BINDING=y
+-CONFIG_UNIX98_PTYS=y
+ # CONFIG_LEGACY_PTYS is not set
+ CONFIG_SERIAL_NONSTANDARD=y
+-# CONFIG_ROCKETPORT is not set
+ CONFIG_CYCLADES=m
+-# CONFIG_CYZ_INTR is not set
+-# CONFIG_MOXA_INTELLIO is not set
+-# CONFIG_MOXA_SMARTIO is not set
+ CONFIG_SYNCLINK=m
+ CONFIG_SYNCLINKMP=m
+ CONFIG_SYNCLINK_GT=m
+ CONFIG_NOZOMI=m
+-# CONFIG_ISI is not set
+ CONFIG_N_HDLC=m
+ CONFIG_N_GSM=m
+-# CONFIG_TRACE_SINK is not set
+-CONFIG_LDISC_AUTOLOAD=y
+-CONFIG_DEVMEM=y
+-# CONFIG_DEVKMEM is not set
+-
+-#
+-# Serial drivers
+-#
+-CONFIG_SERIAL_EARLYCON=y
+ CONFIG_SERIAL_8250=y
+ # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+-CONFIG_SERIAL_8250_PNP=y
+-# CONFIG_SERIAL_8250_FINTEK is not set
+ CONFIG_SERIAL_8250_CONSOLE=y
+-CONFIG_SERIAL_8250_DMA=y
+-CONFIG_SERIAL_8250_PCI=y
+-CONFIG_SERIAL_8250_EXAR=y
+ CONFIG_SERIAL_8250_NR_UARTS=32
+-CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+ CONFIG_SERIAL_8250_EXTENDED=y
+ CONFIG_SERIAL_8250_MANY_PORTS=y
+ CONFIG_SERIAL_8250_SHARE_IRQ=y
+-# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+ CONFIG_SERIAL_8250_RSA=y
+ CONFIG_SERIAL_8250_DW=y
+-# CONFIG_SERIAL_8250_RT288X is not set
+-CONFIG_SERIAL_8250_LPSS=y
+-CONFIG_SERIAL_8250_MID=y
+-# CONFIG_SERIAL_8250_MOXA is not set
+-
+-#
+-# Non-8250 serial port support
+-#
+-# CONFIG_SERIAL_KGDB_NMI is not set
+-# CONFIG_SERIAL_MAX3100 is not set
+-# CONFIG_SERIAL_MAX310X is not set
+-# CONFIG_SERIAL_UARTLITE is not set
+-CONFIG_SERIAL_CORE=y
+-CONFIG_SERIAL_CORE_CONSOLE=y
+-CONFIG_CONSOLE_POLL=y
+ CONFIG_SERIAL_JSM=m
+-# CONFIG_SERIAL_SCCNXP is not set
+-# CONFIG_SERIAL_SC16IS7XX is not set
+-# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+-# CONFIG_SERIAL_ALTERA_UART is not set
+-# CONFIG_SERIAL_IFX6X60 is not set
+ CONFIG_SERIAL_ARC=m
+-CONFIG_SERIAL_ARC_NR_PORTS=1
+-# CONFIG_SERIAL_RP2 is not set
+-# CONFIG_SERIAL_FSL_LPUART is not set
+-# CONFIG_SERIAL_DEV_BUS is not set
+ CONFIG_PRINTER=m
+-# CONFIG_LP_CONSOLE is not set
+ CONFIG_PPDEV=m
+-CONFIG_HVC_DRIVER=y
+-CONFIG_HVC_IRQ=y
+-CONFIG_HVC_XEN=y
+-CONFIG_HVC_XEN_FRONTEND=y
+ CONFIG_VIRTIO_CONSOLE=m
+ CONFIG_IPMI_HANDLER=m
+-CONFIG_IPMI_DMI_DECODE=y
+ CONFIG_IPMI_PANIC_EVENT=y
+ CONFIG_IPMI_PANIC_STRING=y
+ CONFIG_IPMI_DEVICE_INTERFACE=m
+-CONFIG_IPMI_SI=m
+ CONFIG_IPMI_SSIF=m
+ CONFIG_IPMI_WATCHDOG=m
+ CONFIG_IPMI_POWEROFF=m
+@@ -3362,192 +1182,52 @@
+ CONFIG_HW_RANDOM_VIA=m
+ CONFIG_HW_RANDOM_VIRTIO=y
+ CONFIG_NVRAM=y
+-# CONFIG_APPLICOM is not set
+-# CONFIG_MWAVE is not set
+ CONFIG_RAW_DRIVER=y
+ CONFIG_MAX_RAW_DEVS=8192
+ CONFIG_HPET=y
+-CONFIG_HPET_MMAP=y
+ # CONFIG_HPET_MMAP_DEFAULT is not set
+ CONFIG_HANGCHECK_TIMER=m
+-CONFIG_UV_MMTIMER=m
+-CONFIG_TCG_TPM=y
+-CONFIG_HW_RANDOM_TPM=y
+-CONFIG_TCG_TIS_CORE=y
+-CONFIG_TCG_TIS=y
+-# CONFIG_TCG_TIS_SPI is not set
+ CONFIG_TCG_TIS_I2C_ATMEL=m
+ CONFIG_TCG_TIS_I2C_INFINEON=m
+ CONFIG_TCG_TIS_I2C_NUVOTON=m
+ CONFIG_TCG_NSC=m
+ CONFIG_TCG_ATMEL=m
+ CONFIG_TCG_INFINEON=m
+-# CONFIG_TCG_XEN is not set
+-CONFIG_TCG_CRB=y
+-# CONFIG_TCG_VTPM_PROXY is not set
+-CONFIG_TCG_TIS_ST33ZP24=m
+ CONFIG_TCG_TIS_ST33ZP24_I2C=m
+-# CONFIG_TCG_TIS_ST33ZP24_SPI is not set
+ CONFIG_TELCLOCK=m
+-CONFIG_DEVPORT=y
+-# CONFIG_XILLYBUS is not set
+-# CONFIG_RANDOM_TRUST_CPU is not set
+-
+-#
+-# I2C support
+-#
+ CONFIG_I2C=y
+-CONFIG_ACPI_I2C_OPREGION=y
+-CONFIG_I2C_BOARDINFO=y
+-CONFIG_I2C_COMPAT=y
+ CONFIG_I2C_CHARDEV=m
+-CONFIG_I2C_MUX=m
+-
+-#
+-# Multiplexer I2C Chip support
+-#
+-# CONFIG_I2C_MUX_GPIO is not set
+-# CONFIG_I2C_MUX_LTC4306 is not set
+-# CONFIG_I2C_MUX_PCA9541 is not set
+-# CONFIG_I2C_MUX_PCA954x is not set
+-# CONFIG_I2C_MUX_REG is not set
+ CONFIG_I2C_MUX_MLXCPLD=m
+-CONFIG_I2C_HELPER_AUTO=y
+-CONFIG_I2C_SMBUS=m
+-CONFIG_I2C_ALGOBIT=m
+-CONFIG_I2C_ALGOPCA=m
+-
+-#
+-# I2C Hardware Bus support
+-#
+-
+-#
+-# PC SMBus host controller drivers
+-#
+-# CONFIG_I2C_ALI1535 is not set
+-# CONFIG_I2C_ALI1563 is not set
+-# CONFIG_I2C_ALI15X3 is not set
+ CONFIG_I2C_AMD756=m
+ CONFIG_I2C_AMD756_S4882=m
+ CONFIG_I2C_AMD8111=m
+-CONFIG_I2C_I801=m
+ CONFIG_I2C_ISCH=m
+ CONFIG_I2C_ISMT=m
+ CONFIG_I2C_PIIX4=m
+ CONFIG_I2C_NFORCE2=m
+ CONFIG_I2C_NFORCE2_S4985=m
+-# CONFIG_I2C_SIS5595 is not set
+-# CONFIG_I2C_SIS630 is not set
+ CONFIG_I2C_SIS96X=m
+ CONFIG_I2C_VIA=m
+ CONFIG_I2C_VIAPRO=m
+-
+-#
+-# ACPI drivers
+-#
+ CONFIG_I2C_SCMI=m
+-
+-#
+-# I2C system bus drivers (mostly embedded / system-on-chip)
+-#
+-# CONFIG_I2C_CBUS_GPIO is not set
+-CONFIG_I2C_DESIGNWARE_CORE=m
+ CONFIG_I2C_DESIGNWARE_PLATFORM=m
+-# CONFIG_I2C_DESIGNWARE_SLAVE is not set
+-# CONFIG_I2C_DESIGNWARE_PCI is not set
+ CONFIG_I2C_DESIGNWARE_BAYTRAIL=y
+-# CONFIG_I2C_EMEV2 is not set
+-# CONFIG_I2C_GPIO is not set
+-# CONFIG_I2C_OCORES is not set
+ CONFIG_I2C_PCA_PLATFORM=m
+ CONFIG_I2C_SIMTEC=m
+-# CONFIG_I2C_XILINX is not set
+-
+-#
+-# External I2C/SMBus adapter drivers
+-#
+ CONFIG_I2C_DIOLAN_U2C=m
+ CONFIG_I2C_PARPORT=m
+ CONFIG_I2C_PARPORT_LIGHT=m
+-# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+-# CONFIG_I2C_TAOS_EVM is not set
+ CONFIG_I2C_TINY_USB=m
+ CONFIG_I2C_VIPERBOARD=m
+-
+-#
+-# Other I2C/SMBus bus drivers
+-#
+ CONFIG_I2C_MLXCPLD=m
+ CONFIG_I2C_STUB=m
+-# CONFIG_I2C_SLAVE is not set
+-# CONFIG_I2C_DEBUG_CORE is not set
+-# CONFIG_I2C_DEBUG_ALGO is not set
+-# CONFIG_I2C_DEBUG_BUS is not set
+ CONFIG_SPI=y
+-# CONFIG_SPI_DEBUG is not set
+-CONFIG_SPI_MASTER=y
+-# CONFIG_SPI_MEM is not set
+-
+-#
+-# SPI Master Controller Drivers
+-#
+-# CONFIG_SPI_ALTERA is not set
+-# CONFIG_SPI_AXI_SPI_ENGINE is not set
+-# CONFIG_SPI_BITBANG is not set
+-# CONFIG_SPI_BUTTERFLY is not set
+-# CONFIG_SPI_CADENCE is not set
+-# CONFIG_SPI_DESIGNWARE is not set
+-# CONFIG_SPI_GPIO is not set
+-# CONFIG_SPI_LM70_LLP is not set
+-# CONFIG_SPI_OC_TINY is not set
+-# CONFIG_SPI_PXA2XX is not set
+-# CONFIG_SPI_ROCKCHIP is not set
+-# CONFIG_SPI_SC18IS602 is not set
+-# CONFIG_SPI_XCOMM is not set
+-# CONFIG_SPI_XILINX is not set
+-# CONFIG_SPI_ZYNQMP_GQSPI is not set
+-
+-#
+-# SPI Protocol Masters
+-#
+-# CONFIG_SPI_SPIDEV is not set
+-# CONFIG_SPI_LOOPBACK_TEST is not set
+-# CONFIG_SPI_TLE62X0 is not set
+-# CONFIG_SPI_SLAVE is not set
+-# CONFIG_SPMI is not set
+-# CONFIG_HSI is not set
+-CONFIG_PPS=y
+-# CONFIG_PPS_DEBUG is not set
+-
+-#
+-# PPS clients support
+-#
+-# CONFIG_PPS_CLIENT_KTIMER is not set
+ CONFIG_PPS_CLIENT_LDISC=m
+ CONFIG_PPS_CLIENT_PARPORT=m
+ CONFIG_PPS_CLIENT_GPIO=m
+-
+-#
+-# PPS generators support
+-#
+-
+-#
+-# PTP clock support
+-#
+-CONFIG_PTP_1588_CLOCK=y
+ CONFIG_DP83640_PHY=m
+-CONFIG_PTP_1588_CLOCK_KVM=m
+-CONFIG_PINCTRL=y
+-CONFIG_PINMUX=y
+-CONFIG_PINCONF=y
+-CONFIG_GENERIC_PINCONF=y
+-# CONFIG_DEBUG_PINCTRL is not set
+ CONFIG_PINCTRL_AMD=m
+-# CONFIG_PINCTRL_MCP23S08 is not set
+-# CONFIG_PINCTRL_SX150X is not set
+ CONFIG_PINCTRL_BAYTRAIL=y
+-# CONFIG_PINCTRL_CHERRYVIEW is not set
+-CONFIG_PINCTRL_INTEL=m
+ CONFIG_PINCTRL_BROXTON=m
+ CONFIG_PINCTRL_CANNONLAKE=m
+ CONFIG_PINCTRL_CEDARFORK=m
+@@ -3556,113 +1236,14 @@
+ CONFIG_PINCTRL_ICELAKE=m
+ CONFIG_PINCTRL_LEWISBURG=m
+ CONFIG_PINCTRL_SUNRISEPOINT=m
+-CONFIG_GPIOLIB=y
+-CONFIG_GPIOLIB_FASTPATH_LIMIT=512
+-CONFIG_GPIO_ACPI=y
+-CONFIG_GPIOLIB_IRQCHIP=y
+-# CONFIG_DEBUG_GPIO is not set
+ CONFIG_GPIO_SYSFS=y
+-CONFIG_GPIO_GENERIC=m
+-
+-#
+-# Memory mapped GPIO drivers
+-#
+ CONFIG_GPIO_AMDPT=m
+-# CONFIG_GPIO_DWAPB is not set
+-# CONFIG_GPIO_EXAR is not set
+-# CONFIG_GPIO_GENERIC_PLATFORM is not set
+ CONFIG_GPIO_ICH=m
+-# CONFIG_GPIO_LYNXPOINT is not set
+-# CONFIG_GPIO_MB86S7X is not set
+-# CONFIG_GPIO_MOCKUP is not set
+-# CONFIG_GPIO_VX855 is not set
+-
+-#
+-# Port-mapped I/O GPIO drivers
+-#
+-# CONFIG_GPIO_F7188X is not set
+-# CONFIG_GPIO_IT87 is not set
+-# CONFIG_GPIO_SCH is not set
+-# CONFIG_GPIO_SCH311X is not set
+-# CONFIG_GPIO_WINBOND is not set
+-# CONFIG_GPIO_WS16C48 is not set
+-
+-#
+-# I2C GPIO expanders
+-#
+-# CONFIG_GPIO_ADP5588 is not set
+-# CONFIG_GPIO_MAX7300 is not set
+-# CONFIG_GPIO_MAX732X is not set
+-# CONFIG_GPIO_PCA953X is not set
+-# CONFIG_GPIO_PCF857X is not set
+-# CONFIG_GPIO_TPIC2810 is not set
+-
+-#
+-# MFD GPIO expanders
+-#
+-
+-#
+-# PCI GPIO expanders
+-#
+-# CONFIG_GPIO_AMD8111 is not set
+-# CONFIG_GPIO_ML_IOH is not set
+-# CONFIG_GPIO_PCI_IDIO_16 is not set
+-# CONFIG_GPIO_PCIE_IDIO_24 is not set
+-# CONFIG_GPIO_RDC321X is not set
+-
+-#
+-# SPI GPIO expanders
+-#
+-# CONFIG_GPIO_MAX3191X is not set
+-# CONFIG_GPIO_MAX7301 is not set
+-# CONFIG_GPIO_MC33880 is not set
+-# CONFIG_GPIO_PISOSR is not set
+-# CONFIG_GPIO_XRA1403 is not set
+-
+-#
+-# USB GPIO expanders
+-#
+ CONFIG_GPIO_VIPERBOARD=m
+-# CONFIG_W1 is not set
+-# CONFIG_POWER_AVS is not set
+ CONFIG_POWER_RESET=y
+-# CONFIG_POWER_RESET_RESTART is not set
+-CONFIG_POWER_SUPPLY=y
+-# CONFIG_POWER_SUPPLY_DEBUG is not set
+-# CONFIG_PDA_POWER is not set
+-# CONFIG_GENERIC_ADC_BATTERY is not set
+-# CONFIG_TEST_POWER is not set
+-# CONFIG_CHARGER_ADP5061 is not set
+-# CONFIG_BATTERY_DS2780 is not set
+-# CONFIG_BATTERY_DS2781 is not set
+-# CONFIG_BATTERY_DS2782 is not set
+-# CONFIG_BATTERY_SBS is not set
+-# CONFIG_CHARGER_SBS is not set
+-# CONFIG_MANAGER_SBS is not set
+-# CONFIG_BATTERY_BQ27XXX is not set
+-# CONFIG_BATTERY_MAX17040 is not set
+-# CONFIG_BATTERY_MAX17042 is not set
+-# CONFIG_CHARGER_MAX8903 is not set
+-# CONFIG_CHARGER_LP8727 is not set
+-# CONFIG_CHARGER_GPIO is not set
+-# CONFIG_CHARGER_LTC3651 is not set
+-# CONFIG_CHARGER_BQ2415X is not set
+-# CONFIG_CHARGER_BQ24257 is not set
+-# CONFIG_CHARGER_BQ24735 is not set
+-# CONFIG_CHARGER_BQ25890 is not set
+ CONFIG_CHARGER_SMB347=m
+-# CONFIG_BATTERY_GAUGE_LTC2941 is not set
+-# CONFIG_CHARGER_RT9455 is not set
+-CONFIG_HWMON=y
+-CONFIG_HWMON_VID=m
+-# CONFIG_HWMON_DEBUG_CHIP is not set
+-
+-#
+-# Native drivers
+-#
+ CONFIG_SENSORS_ABITUGURU=m
+ CONFIG_SENSORS_ABITUGURU3=m
+-# CONFIG_SENSORS_AD7314 is not set
+ CONFIG_SENSORS_AD7414=m
+ CONFIG_SENSORS_AD7418=m
+ CONFIG_SENSORS_ADM1021=m
+@@ -3671,8 +1252,6 @@
+ CONFIG_SENSORS_ADM1029=m
+ CONFIG_SENSORS_ADM1031=m
+ CONFIG_SENSORS_ADM9240=m
+-CONFIG_SENSORS_ADT7X10=m
+-# CONFIG_SENSORS_ADT7310 is not set
+ CONFIG_SENSORS_ADT7410=m
+ CONFIG_SENSORS_ADT7411=m
+ CONFIG_SENSORS_ADT7462=m
+@@ -3684,57 +1263,38 @@
+ CONFIG_SENSORS_FAM15H_POWER=m
+ CONFIG_SENSORS_APPLESMC=m
+ CONFIG_SENSORS_ASB100=m
+-# CONFIG_SENSORS_ASPEED is not set
+ CONFIG_SENSORS_ATXP1=m
+ CONFIG_SENSORS_DS620=m
+ CONFIG_SENSORS_DS1621=m
+-CONFIG_SENSORS_DELL_SMM=m
+ CONFIG_SENSORS_I5K_AMB=m
+ CONFIG_SENSORS_F71805F=m
+ CONFIG_SENSORS_F71882FG=m
+ CONFIG_SENSORS_F75375S=m
+ CONFIG_SENSORS_FSCHMD=m
+-# CONFIG_SENSORS_FTSTEUTATES is not set
+ CONFIG_SENSORS_GL518SM=m
+ CONFIG_SENSORS_GL520SM=m
+ CONFIG_SENSORS_G760A=m
+-# CONFIG_SENSORS_G762 is not set
+-# CONFIG_SENSORS_HIH6130 is not set
+ CONFIG_SENSORS_IBMAEM=m
+ CONFIG_SENSORS_IBMPEX=m
+-# CONFIG_SENSORS_IIO_HWMON is not set
+ CONFIG_SENSORS_I5500=m
+ CONFIG_SENSORS_CORETEMP=m
+ CONFIG_SENSORS_IT87=m
+ CONFIG_SENSORS_JC42=m
+-# CONFIG_SENSORS_POWR1220 is not set
+ CONFIG_SENSORS_LINEAGE=m
+-# CONFIG_SENSORS_LTC2945 is not set
+-# CONFIG_SENSORS_LTC2990 is not set
+ CONFIG_SENSORS_LTC4151=m
+ CONFIG_SENSORS_LTC4215=m
+-# CONFIG_SENSORS_LTC4222 is not set
+ CONFIG_SENSORS_LTC4245=m
+-# CONFIG_SENSORS_LTC4260 is not set
+ CONFIG_SENSORS_LTC4261=m
+-# CONFIG_SENSORS_MAX1111 is not set
+ CONFIG_SENSORS_MAX16065=m
+ CONFIG_SENSORS_MAX1619=m
+ CONFIG_SENSORS_MAX1668=m
+ CONFIG_SENSORS_MAX197=m
+-# CONFIG_SENSORS_MAX31722 is not set
+-# CONFIG_SENSORS_MAX6621 is not set
+ CONFIG_SENSORS_MAX6639=m
+ CONFIG_SENSORS_MAX6642=m
+ CONFIG_SENSORS_MAX6650=m
+ CONFIG_SENSORS_MAX6697=m
+-# CONFIG_SENSORS_MAX31790 is not set
+ CONFIG_SENSORS_MCP3021=m
+-# CONFIG_SENSORS_MLXREG_FAN is not set
+-# CONFIG_SENSORS_TC654 is not set
+-# CONFIG_SENSORS_ADCXX is not set
+ CONFIG_SENSORS_LM63=m
+-# CONFIG_SENSORS_LM70 is not set
+ CONFIG_SENSORS_LM73=m
+ CONFIG_SENSORS_LM75=m
+ CONFIG_SENSORS_LM77=m
+@@ -3752,140 +1312,69 @@
+ CONFIG_SENSORS_PC87360=m
+ CONFIG_SENSORS_PC87427=m
+ CONFIG_SENSORS_NTC_THERMISTOR=m
+-# CONFIG_SENSORS_NCT6683 is not set
+ CONFIG_SENSORS_NCT6775=m
+-# CONFIG_SENSORS_NCT7802 is not set
+-# CONFIG_SENSORS_NCT7904 is not set
+-# CONFIG_SENSORS_NPCM7XX is not set
+ CONFIG_SENSORS_PCF8591=m
+ CONFIG_PMBUS=m
+-CONFIG_SENSORS_PMBUS=m
+ CONFIG_SENSORS_ADM1275=m
+-# CONFIG_SENSORS_IBM_CFFPS is not set
+-# CONFIG_SENSORS_IR35221 is not set
+ CONFIG_SENSORS_LM25066=m
+ CONFIG_SENSORS_LTC2978=m
+-# CONFIG_SENSORS_LTC3815 is not set
+ CONFIG_SENSORS_MAX16064=m
+-# CONFIG_SENSORS_MAX20751 is not set
+-# CONFIG_SENSORS_MAX31785 is not set
+ CONFIG_SENSORS_MAX34440=m
+ CONFIG_SENSORS_MAX8688=m
+-# CONFIG_SENSORS_TPS40422 is not set
+-# CONFIG_SENSORS_TPS53679 is not set
+ CONFIG_SENSORS_UCD9000=m
+ CONFIG_SENSORS_UCD9200=m
+ CONFIG_SENSORS_ZL6100=m
+ CONFIG_SENSORS_SHT15=m
+ CONFIG_SENSORS_SHT21=m
+-# CONFIG_SENSORS_SHT3x is not set
+-# CONFIG_SENSORS_SHTC1 is not set
+ CONFIG_SENSORS_SIS5595=m
+ CONFIG_SENSORS_DME1737=m
+ CONFIG_SENSORS_EMC1403=m
+-# CONFIG_SENSORS_EMC2103 is not set
+ CONFIG_SENSORS_EMC6W201=m
+ CONFIG_SENSORS_SMSC47M1=m
+ CONFIG_SENSORS_SMSC47M192=m
+ CONFIG_SENSORS_SMSC47B397=m
+-CONFIG_SENSORS_SCH56XX_COMMON=m
+ CONFIG_SENSORS_SCH5627=m
+ CONFIG_SENSORS_SCH5636=m
+-# CONFIG_SENSORS_STTS751 is not set
+-# CONFIG_SENSORS_SMM665 is not set
+-# CONFIG_SENSORS_ADC128D818 is not set
+ CONFIG_SENSORS_ADS1015=m
+ CONFIG_SENSORS_ADS7828=m
+-# CONFIG_SENSORS_ADS7871 is not set
+ CONFIG_SENSORS_AMC6821=m
+ CONFIG_SENSORS_INA209=m
+ CONFIG_SENSORS_INA2XX=m
+-# CONFIG_SENSORS_INA3221 is not set
+-# CONFIG_SENSORS_TC74 is not set
+ CONFIG_SENSORS_THMC50=m
+ CONFIG_SENSORS_TMP102=m
+-# CONFIG_SENSORS_TMP103 is not set
+-# CONFIG_SENSORS_TMP108 is not set
+ CONFIG_SENSORS_TMP401=m
+ CONFIG_SENSORS_TMP421=m
+ CONFIG_SENSORS_VIA_CPUTEMP=m
+ CONFIG_SENSORS_VIA686A=m
+ CONFIG_SENSORS_VT1211=m
+ CONFIG_SENSORS_VT8231=m
+-# CONFIG_SENSORS_W83773G is not set
+ CONFIG_SENSORS_W83781D=m
+ CONFIG_SENSORS_W83791D=m
+ CONFIG_SENSORS_W83792D=m
+ CONFIG_SENSORS_W83793=m
+ CONFIG_SENSORS_W83795=m
+-# CONFIG_SENSORS_W83795_FANCTRL is not set
+ CONFIG_SENSORS_W83L785TS=m
+ CONFIG_SENSORS_W83L786NG=m
+ CONFIG_SENSORS_W83627HF=m
+ CONFIG_SENSORS_W83627EHF=m
+-# CONFIG_SENSORS_XGENE is not set
+-
+-#
+-# ACPI drivers
+-#
+ CONFIG_SENSORS_ACPI_POWER=m
+ CONFIG_SENSORS_ATK0110=m
+ CONFIG_THERMAL=y
+-# CONFIG_THERMAL_STATISTICS is not set
+-CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0
+-CONFIG_THERMAL_HWMON=y
+-CONFIG_THERMAL_WRITABLE_TRIPS=y
+-CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
+-# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
+-# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
+-# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
+ CONFIG_THERMAL_GOV_FAIR_SHARE=y
+-CONFIG_THERMAL_GOV_STEP_WISE=y
+-CONFIG_THERMAL_GOV_BANG_BANG=y
+-CONFIG_THERMAL_GOV_USER_SPACE=y
+-# CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set
+-# CONFIG_THERMAL_EMULATION is not set
+ CONFIG_INTEL_POWERCLAMP=m
+-CONFIG_X86_PKG_TEMP_THERMAL=m
+-CONFIG_INTEL_SOC_DTS_IOSF_CORE=m
+-# CONFIG_INTEL_SOC_DTS_THERMAL is not set
+-
+-#
+-# ACPI INT340X thermal drivers
+-#
+ CONFIG_INT340X_THERMAL=m
+-CONFIG_ACPI_THERMAL_REL=m
+-# CONFIG_INT3406_THERMAL is not set
+ CONFIG_INTEL_PCH_THERMAL=m
+-# CONFIG_GENERIC_ADC_THERMAL is not set
+ CONFIG_WATCHDOG=y
+-CONFIG_WATCHDOG_CORE=y
+-# CONFIG_WATCHDOG_NOWAYOUT is not set
+-CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y
+ CONFIG_WATCHDOG_SYSFS=y
+-
+-#
+-# Watchdog Device Drivers
+-#
+ CONFIG_SOFT_WATCHDOG=m
+ CONFIG_WDAT_WDT=m
+-# CONFIG_XILINX_WATCHDOG is not set
+-# CONFIG_ZIIRAVE_WATCHDOG is not set
+-# CONFIG_CADENCE_WATCHDOG is not set
+-# CONFIG_DW_WATCHDOG is not set
+-# CONFIG_MAX63XX_WATCHDOG is not set
+-# CONFIG_ACQUIRE_WDT is not set
+-# CONFIG_ADVANTECH_WDT is not set
+ CONFIG_ALIM1535_WDT=m
+ CONFIG_ALIM7101_WDT=m
+-# CONFIG_EBC_C384_WDT is not set
+ CONFIG_F71808E_WDT=m
+ CONFIG_SP5100_TCO=m
+ CONFIG_SBC_FITPC2_WATCHDOG=m
+-# CONFIG_EUROTECH_WDT is not set
+ CONFIG_IB700_WDT=m
+ CONFIG_IBMASR=m
+-# CONFIG_WAFER_WDT is not set
+ CONFIG_I6300ESB_WDT=m
+ CONFIG_IE6XX_WDT=m
+ CONFIG_ITCO_WDT=m
+@@ -3893,146 +1382,26 @@
+ CONFIG_IT8712F_WDT=m
+ CONFIG_IT87_WDT=m
+ CONFIG_HP_WATCHDOG=m
+-CONFIG_HPWDT_NMI_DECODING=y
+-# CONFIG_SC1200_WDT is not set
+-# CONFIG_PC87413_WDT is not set
+ CONFIG_NV_TCO=m
+-# CONFIG_60XX_WDT is not set
+-# CONFIG_CPU5_WDT is not set
+ CONFIG_SMSC_SCH311X_WDT=m
+-# CONFIG_SMSC37B787_WDT is not set
+ CONFIG_VIA_WDT=m
+ CONFIG_W83627HF_WDT=m
+ CONFIG_W83877F_WDT=m
+ CONFIG_W83977F_WDT=m
+ CONFIG_MACHZ_WDT=m
+-# CONFIG_SBC_EPX_C3_WATCHDOG is not set
+ CONFIG_INTEL_MEI_WDT=m
+-# CONFIG_NI903X_WDT is not set
+-# CONFIG_NIC7018_WDT is not set
+-# CONFIG_MEN_A21_WDT is not set
+-CONFIG_XEN_WDT=m
+-
+-#
+-# PCI-based Watchdog Cards
+-#
+ CONFIG_PCIPCWATCHDOG=m
+ CONFIG_WDTPCI=m
+-
+-#
+-# USB-based Watchdog Cards
+-#
+ CONFIG_USBPCWATCHDOG=m
+-
+-#
+-# Watchdog Pretimeout Governors
+-#
+-# CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set
+-CONFIG_SSB_POSSIBLE=y
+-# CONFIG_SSB is not set
+-CONFIG_BCMA_POSSIBLE=y
+-CONFIG_BCMA=m
+-CONFIG_BCMA_HOST_PCI_POSSIBLE=y
+-CONFIG_BCMA_HOST_PCI=y
+-# CONFIG_BCMA_HOST_SOC is not set
+-CONFIG_BCMA_DRIVER_PCI=y
+ CONFIG_BCMA_DRIVER_GMAC_CMN=y
+ CONFIG_BCMA_DRIVER_GPIO=y
+-# CONFIG_BCMA_DEBUG is not set
+-
+-#
+-# Multifunction device drivers
+-#
+-CONFIG_MFD_CORE=y
+-# CONFIG_MFD_AS3711 is not set
+-# CONFIG_PMIC_ADP5520 is not set
+-# CONFIG_MFD_AAT2870_CORE is not set
+-# CONFIG_MFD_BCM590XX is not set
+-# CONFIG_MFD_BD9571MWV is not set
+-# CONFIG_MFD_AXP20X_I2C is not set
+-# CONFIG_MFD_CROS_EC is not set
+-# CONFIG_MFD_MADERA is not set
+-# CONFIG_PMIC_DA903X is not set
+-# CONFIG_MFD_DA9052_SPI is not set
+-# CONFIG_MFD_DA9052_I2C is not set
+-# CONFIG_MFD_DA9055 is not set
+-# CONFIG_MFD_DA9062 is not set
+-# CONFIG_MFD_DA9063 is not set
+-# CONFIG_MFD_DA9150 is not set
+-# CONFIG_MFD_DLN2 is not set
+-# CONFIG_MFD_MC13XXX_SPI is not set
+-# CONFIG_MFD_MC13XXX_I2C is not set
+-# CONFIG_HTC_PASIC3 is not set
+-# CONFIG_HTC_I2CPLD is not set
+-# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set
+-CONFIG_LPC_ICH=m
+-CONFIG_LPC_SCH=m
+-# CONFIG_INTEL_SOC_PMIC_CHTDC_TI is not set
+-CONFIG_MFD_INTEL_LPSS=y
+ CONFIG_MFD_INTEL_LPSS_ACPI=y
+ CONFIG_MFD_INTEL_LPSS_PCI=y
+-# CONFIG_MFD_JANZ_CMODIO is not set
+-# CONFIG_MFD_KEMPLD is not set
+-# CONFIG_MFD_88PM800 is not set
+-# CONFIG_MFD_88PM805 is not set
+-# CONFIG_MFD_88PM860X is not set
+-# CONFIG_MFD_MAX14577 is not set
+-# CONFIG_MFD_MAX77693 is not set
+-# CONFIG_MFD_MAX77843 is not set
+-# CONFIG_MFD_MAX8907 is not set
+-# CONFIG_MFD_MAX8925 is not set
+-# CONFIG_MFD_MAX8997 is not set
+-# CONFIG_MFD_MAX8998 is not set
+-# CONFIG_MFD_MT6397 is not set
+-# CONFIG_MFD_MENF21BMC is not set
+-# CONFIG_EZX_PCAP is not set
+ CONFIG_MFD_VIPERBOARD=m
+-# CONFIG_MFD_RETU is not set
+-# CONFIG_MFD_PCF50633 is not set
+-# CONFIG_UCB1400_CORE is not set
+-# CONFIG_MFD_RDC321X is not set
+-# CONFIG_MFD_RT5033 is not set
+-# CONFIG_MFD_RC5T583 is not set
+-# CONFIG_MFD_SEC_CORE is not set
+-# CONFIG_MFD_SI476X_CORE is not set
+ CONFIG_MFD_SM501=m
+ CONFIG_MFD_SM501_GPIO=y
+-# CONFIG_MFD_SKY81452 is not set
+-# CONFIG_MFD_SMSC is not set
+-# CONFIG_ABX500_CORE is not set
+-# CONFIG_MFD_SYSCON is not set
+-# CONFIG_MFD_TI_AM335X_TSCADC is not set
+-# CONFIG_MFD_LP3943 is not set
+-# CONFIG_MFD_LP8788 is not set
+-# CONFIG_MFD_TI_LMU is not set
+-# CONFIG_MFD_PALMAS is not set
+-# CONFIG_TPS6105X is not set
+-# CONFIG_TPS65010 is not set
+-# CONFIG_TPS6507X is not set
+-# CONFIG_MFD_TPS65086 is not set
+-# CONFIG_MFD_TPS65090 is not set
+-# CONFIG_MFD_TI_LP873X is not set
+-# CONFIG_MFD_TPS6586X is not set
+-# CONFIG_MFD_TPS65910 is not set
+-# CONFIG_MFD_TPS65912_I2C is not set
+-# CONFIG_MFD_TPS65912_SPI is not set
+-# CONFIG_MFD_TPS80031 is not set
+-# CONFIG_TWL4030_CORE is not set
+-# CONFIG_TWL6040_CORE is not set
+-# CONFIG_MFD_WL1273_CORE is not set
+-# CONFIG_MFD_LM3533 is not set
+ CONFIG_MFD_VX855=m
+-# CONFIG_MFD_ARIZONA_I2C is not set
+-# CONFIG_MFD_ARIZONA_SPI is not set
+-# CONFIG_MFD_WM8400 is not set
+-# CONFIG_MFD_WM831X_I2C is not set
+-# CONFIG_MFD_WM831X_SPI is not set
+-# CONFIG_MFD_WM8350_I2C is not set
+-# CONFIG_MFD_WM8994 is not set
+-# CONFIG_REGULATOR is not set
+-CONFIG_CEC_CORE=y
+ CONFIG_RC_CORE=m
+-CONFIG_RC_MAP=m
+ CONFIG_LIRC=y
+ CONFIG_RC_DECODERS=y
+ CONFIG_IR_NEC_DECODER=m
+@@ -4041,9 +1410,7 @@
+ CONFIG_IR_JVC_DECODER=m
+ CONFIG_IR_SONY_DECODER=m
+ CONFIG_IR_SANYO_DECODER=m
+-# CONFIG_IR_SHARP_DECODER is not set
+ CONFIG_IR_MCE_KBD_DECODER=m
+-# CONFIG_IR_XMP_DECODER is not set
+ CONFIG_IR_IMON_DECODER=m
+ CONFIG_RC_DEVICES=y
+ CONFIG_RC_ATI_REMOTE=m
+@@ -4057,65 +1424,31 @@
+ CONFIG_IR_REDRAT3=m
+ CONFIG_IR_STREAMZAP=m
+ CONFIG_IR_WINBOND_CIR=m
+-# CONFIG_IR_IGORPLUGUSB is not set
+ CONFIG_IR_IGUANA=m
+ CONFIG_IR_TTUSBIR=m
+-# CONFIG_RC_LOOPBACK is not set
+ CONFIG_IR_SERIAL=m
+ CONFIG_IR_SERIAL_TRANSMITTER=y
+ CONFIG_IR_SIR=m
+ CONFIG_MEDIA_SUPPORT=m
+-
+-#
+-# Multimedia core support
+-#
+ CONFIG_MEDIA_CAMERA_SUPPORT=y
+ CONFIG_MEDIA_ANALOG_TV_SUPPORT=y
+ CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y
+ CONFIG_MEDIA_RADIO_SUPPORT=y
+-# CONFIG_MEDIA_SDR_SUPPORT is not set
+ CONFIG_MEDIA_CEC_SUPPORT=y
+-# CONFIG_MEDIA_CONTROLLER is not set
+-CONFIG_VIDEO_DEV=m
+-CONFIG_VIDEO_V4L2=m
+-# CONFIG_VIDEO_ADV_DEBUG is not set
+-# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+-CONFIG_VIDEO_TUNER=m
+-CONFIG_VIDEOBUF_GEN=m
+-CONFIG_VIDEOBUF_DMA_SG=m
+-CONFIG_VIDEOBUF_VMALLOC=m
+-CONFIG_DVB_CORE=m
+-# CONFIG_DVB_MMAP is not set
+-CONFIG_DVB_NET=y
+-CONFIG_TTPCI_EEPROM=m
+ CONFIG_DVB_MAX_ADAPTERS=8
+ CONFIG_DVB_DYNAMIC_MINORS=y
+-# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set
+-# CONFIG_DVB_ULE_DEBUG is not set
+-
+-#
+-# Media drivers
+-#
+ CONFIG_MEDIA_USB_SUPPORT=y
+-
+-#
+-# Webcam devices
+-#
+ CONFIG_USB_VIDEO_CLASS=m
+-CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+-CONFIG_USB_GSPCA=m
+ CONFIG_USB_M5602=m
+ CONFIG_USB_STV06XX=m
+ CONFIG_USB_GL860=m
+ CONFIG_USB_GSPCA_BENQ=m
+ CONFIG_USB_GSPCA_CONEX=m
+ CONFIG_USB_GSPCA_CPIA1=m
+-# CONFIG_USB_GSPCA_DTCS033 is not set
+ CONFIG_USB_GSPCA_ETOMS=m
+ CONFIG_USB_GSPCA_FINEPIX=m
+ CONFIG_USB_GSPCA_JEILINJ=m
+ CONFIG_USB_GSPCA_JL2005BCD=m
+-# CONFIG_USB_GSPCA_KINECT is not set
+ CONFIG_USB_GSPCA_KONICA=m
+ CONFIG_USB_GSPCA_MARS=m
+ CONFIG_USB_GSPCA_MR97310A=m
+@@ -4142,61 +1475,25 @@
+ CONFIG_USB_GSPCA_SQ905C=m
+ CONFIG_USB_GSPCA_SQ930X=m
+ CONFIG_USB_GSPCA_STK014=m
+-# CONFIG_USB_GSPCA_STK1135 is not set
+ CONFIG_USB_GSPCA_STV0680=m
+ CONFIG_USB_GSPCA_SUNPLUS=m
+ CONFIG_USB_GSPCA_T613=m
+ CONFIG_USB_GSPCA_TOPRO=m
+-# CONFIG_USB_GSPCA_TOUPTEK is not set
+ CONFIG_USB_GSPCA_TV8532=m
+ CONFIG_USB_GSPCA_VC032X=m
+ CONFIG_USB_GSPCA_VICAM=m
+ CONFIG_USB_GSPCA_XIRLINK_CIT=m
+ CONFIG_USB_GSPCA_ZC3XX=m
+ CONFIG_USB_PWC=m
+-# CONFIG_USB_PWC_DEBUG is not set
+-CONFIG_USB_PWC_INPUT_EVDEV=y
+-# CONFIG_VIDEO_CPIA2 is not set
+-CONFIG_USB_ZR364XX=m
+ CONFIG_USB_STKWEBCAM=m
+ CONFIG_USB_S2255=m
+-# CONFIG_VIDEO_USBTV is not set
+-
+-#
+-# Analog TV USB devices
+-#
+ CONFIG_VIDEO_PVRUSB2=m
+-CONFIG_VIDEO_PVRUSB2_SYSFS=y
+-CONFIG_VIDEO_PVRUSB2_DVB=y
+-# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
+ CONFIG_VIDEO_HDPVR=m
+ CONFIG_VIDEO_USBVISION=m
+-# CONFIG_VIDEO_STK1160_COMMON is not set
+-# CONFIG_VIDEO_GO7007 is not set
+-
+-#
+-# Analog/digital TV USB devices
+-#
+ CONFIG_VIDEO_AU0828=m
+-CONFIG_VIDEO_AU0828_V4L2=y
+-# CONFIG_VIDEO_AU0828_RC is not set
+-CONFIG_VIDEO_CX231XX=m
+-CONFIG_VIDEO_CX231XX_RC=y
+-CONFIG_VIDEO_CX231XX_ALSA=m
+-CONFIG_VIDEO_CX231XX_DVB=m
+-CONFIG_VIDEO_TM6000=m
+-CONFIG_VIDEO_TM6000_ALSA=m
+-CONFIG_VIDEO_TM6000_DVB=m
+-
+-#
+-# Digital TV USB devices
+-#
+ CONFIG_DVB_USB=m
+-# CONFIG_DVB_USB_DEBUG is not set
+-CONFIG_DVB_USB_DIB3000MC=m
+ CONFIG_DVB_USB_A800=m
+ CONFIG_DVB_USB_DIBUSB_MB=m
+-# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set
+ CONFIG_DVB_USB_DIBUSB_MC=m
+ CONFIG_DVB_USB_DIB0700=m
+ CONFIG_DVB_USB_UMT_010=m
+@@ -4230,491 +1527,58 @@
+ CONFIG_DVB_USB_LME2510=m
+ CONFIG_DVB_USB_MXL111SF=m
+ CONFIG_DVB_USB_RTL28XXU=m
+-# CONFIG_DVB_USB_DVBSKY is not set
+-# CONFIG_DVB_USB_ZD1301 is not set
+ CONFIG_DVB_TTUSB_BUDGET=m
+ CONFIG_DVB_TTUSB_DEC=m
+ CONFIG_SMS_USB_DRV=m
+ CONFIG_DVB_B2C2_FLEXCOP_USB=m
+-# CONFIG_DVB_B2C2_FLEXCOP_USB_DEBUG is not set
+-# CONFIG_DVB_AS102 is not set
+-
+-#
+-# Webcam, TV (analog/digital) USB devices
+-#
+ CONFIG_VIDEO_EM28XX=m
+-# CONFIG_VIDEO_EM28XX_V4L2 is not set
+ CONFIG_VIDEO_EM28XX_ALSA=m
+ CONFIG_VIDEO_EM28XX_DVB=m
+-CONFIG_VIDEO_EM28XX_RC=m
+-
+-#
+-# USB HDMI CEC adapters
+-#
+ CONFIG_USB_PULSE8_CEC=m
+ CONFIG_USB_RAINSHADOW_CEC=m
+ CONFIG_MEDIA_PCI_SUPPORT=y
+-
+-#
+-# Media capture support
+-#
+-# CONFIG_VIDEO_MEYE is not set
+-# CONFIG_VIDEO_SOLO6X10 is not set
+-# CONFIG_VIDEO_TW5864 is not set
+-# CONFIG_VIDEO_TW68 is not set
+-# CONFIG_VIDEO_TW686X is not set
+-
+-#
+-# Media capture/analog TV support
+-#
+ CONFIG_VIDEO_IVTV=m
+-# CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS is not set
+-# CONFIG_VIDEO_IVTV_ALSA is not set
+ CONFIG_VIDEO_FB_IVTV=m
+-# CONFIG_VIDEO_HEXIUM_GEMINI is not set
+-# CONFIG_VIDEO_HEXIUM_ORION is not set
+-# CONFIG_VIDEO_MXB is not set
+-# CONFIG_VIDEO_DT3155 is not set
+-
+-#
+-# Media capture/analog/hybrid TV support
+-#
+-CONFIG_VIDEO_CX18=m
+-CONFIG_VIDEO_CX18_ALSA=m
+ CONFIG_VIDEO_CX23885=m
+ CONFIG_MEDIA_ALTERA_CI=m
+-# CONFIG_VIDEO_CX25821 is not set
+ CONFIG_VIDEO_CX88=m
+ CONFIG_VIDEO_CX88_ALSA=m
+ CONFIG_VIDEO_CX88_BLACKBIRD=m
+ CONFIG_VIDEO_CX88_DVB=m
+ # CONFIG_VIDEO_CX88_ENABLE_VP3054 is not set
+-CONFIG_VIDEO_CX88_MPEG=m
+-CONFIG_VIDEO_BT848=m
+-CONFIG_DVB_BT8XX=m
+ CONFIG_VIDEO_SAA7134=m
+ CONFIG_VIDEO_SAA7134_ALSA=m
+-CONFIG_VIDEO_SAA7134_RC=y
+ CONFIG_VIDEO_SAA7134_DVB=m
+ CONFIG_VIDEO_SAA7164=m
+-
+-#
+-# Media digital TV PCI Adapters
+-#
+-CONFIG_DVB_AV7110_IR=y
+-CONFIG_DVB_AV7110=m
+-CONFIG_DVB_AV7110_OSD=y
+ CONFIG_DVB_BUDGET_CORE=m
+ CONFIG_DVB_BUDGET=m
+ CONFIG_DVB_BUDGET_CI=m
+-CONFIG_DVB_BUDGET_AV=m
+-CONFIG_DVB_BUDGET_PATCH=m
+ CONFIG_DVB_B2C2_FLEXCOP_PCI=m
+-# CONFIG_DVB_B2C2_FLEXCOP_PCI_DEBUG is not set
+ CONFIG_DVB_PLUTO2=m
+ CONFIG_DVB_DM1105=m
+ CONFIG_DVB_PT1=m
+-# CONFIG_DVB_PT3 is not set
+ CONFIG_MANTIS_CORE=m
+ CONFIG_DVB_MANTIS=m
+ CONFIG_DVB_HOPPER=m
+ CONFIG_DVB_NGENE=m
+ CONFIG_DVB_DDBRIDGE=m
+-# CONFIG_DVB_DDBRIDGE_MSIENABLE is not set
+-# CONFIG_DVB_SMIPCIE is not set
+-# CONFIG_DVB_NETUP_UNIDVB is not set
+-# CONFIG_V4L_PLATFORM_DRIVERS is not set
+-# CONFIG_V4L_MEM2MEM_DRIVERS is not set
+-# CONFIG_V4L_TEST_DRIVERS is not set
+-# CONFIG_DVB_PLATFORM_DRIVERS is not set
+ CONFIG_CEC_PLATFORM_DRIVERS=y
+-
+-#
+-# Supported MMC/SDIO adapters
+-#
+ CONFIG_SMS_SDIO_DRV=m
+-CONFIG_RADIO_ADAPTERS=y
+-CONFIG_RADIO_TEA575X=m
+-# CONFIG_RADIO_SI470X is not set
+-# CONFIG_RADIO_SI4713 is not set
+-# CONFIG_USB_MR800 is not set
+-# CONFIG_USB_DSBR is not set
+-# CONFIG_RADIO_MAXIRADIO is not set
+-# CONFIG_RADIO_SHARK is not set
+-# CONFIG_RADIO_SHARK2 is not set
+-# CONFIG_USB_KEENE is not set
+-# CONFIG_USB_RAREMONO is not set
+-# CONFIG_USB_MA901 is not set
+-# CONFIG_RADIO_TEA5764 is not set
+-# CONFIG_RADIO_SAA7706H is not set
+-# CONFIG_RADIO_TEF6862 is not set
+-# CONFIG_RADIO_WL1273 is not set
+-
+-#
+-# Texas Instruments WL128x FM driver (ST based)
+-#
+-
+-#
+-# Supported FireWire (IEEE 1394) Adapters
+-#
+ CONFIG_DVB_FIREDTV=m
+-CONFIG_DVB_FIREDTV_INPUT=y
+-CONFIG_MEDIA_COMMON_OPTIONS=y
+-
+-#
+-# common driver options
+-#
+-CONFIG_VIDEO_CX2341X=m
+-CONFIG_VIDEO_TVEEPROM=m
+-CONFIG_CYPRESS_FIRMWARE=m
+-CONFIG_VIDEOBUF2_CORE=m
+-CONFIG_VIDEOBUF2_V4L2=m
+-CONFIG_VIDEOBUF2_MEMOPS=m
+-CONFIG_VIDEOBUF2_VMALLOC=m
+-CONFIG_VIDEOBUF2_DMA_SG=m
+-CONFIG_VIDEOBUF2_DVB=m
+-CONFIG_DVB_B2C2_FLEXCOP=m
+-CONFIG_VIDEO_SAA7146=m
+-CONFIG_VIDEO_SAA7146_VV=m
+-CONFIG_SMS_SIANO_MDTV=m
+-CONFIG_SMS_SIANO_RC=y
+-# CONFIG_SMS_SIANO_DEBUGFS is not set
+-
+-#
+-# Media ancillary drivers (tuners, sensors, i2c, spi, frontends)
+-#
+-CONFIG_MEDIA_SUBDRV_AUTOSELECT=y
+-CONFIG_MEDIA_ATTACH=y
+-CONFIG_VIDEO_IR_I2C=m
+-
+-#
+-# Audio decoders, processors and mixers
+-#
+-CONFIG_VIDEO_TVAUDIO=m
+-CONFIG_VIDEO_TDA7432=m
+-CONFIG_VIDEO_MSP3400=m
+-CONFIG_VIDEO_CS3308=m
+-CONFIG_VIDEO_CS5345=m
+-CONFIG_VIDEO_CS53L32A=m
+-CONFIG_VIDEO_WM8775=m
+-CONFIG_VIDEO_WM8739=m
+-CONFIG_VIDEO_VP27SMPX=m
+-
+-#
+-# RDS decoders
+-#
+-CONFIG_VIDEO_SAA6588=m
+-
+-#
+-# Video decoders
+-#
+-CONFIG_VIDEO_SAA711X=m
+-
+-#
+-# Video and audio decoders
+-#
+-CONFIG_VIDEO_SAA717X=m
+-CONFIG_VIDEO_CX25840=m
+-
+-#
+-# Video encoders
+-#
+-CONFIG_VIDEO_SAA7127=m
+-
+-#
+-# Camera sensor devices
+-#
+-
+-#
+-# Flash devices
+-#
+-
+-#
+-# Video improvement chips
+-#
+-CONFIG_VIDEO_UPD64031A=m
+-CONFIG_VIDEO_UPD64083=m
+-
+-#
+-# Audio/Video compression chips
+-#
+-CONFIG_VIDEO_SAA6752HS=m
+-
+-#
+-# SDR tuner chips
+-#
+-
+-#
+-# Miscellaneous helper chips
+-#
+-CONFIG_VIDEO_M52790=m
+-
+-#
+-# Sensors used on soc_camera driver
+-#
+-
+-#
+-# Media SPI Adapters
+-#
+-# CONFIG_CXD2880_SPI_DRV is not set
+-CONFIG_MEDIA_TUNER=m
+-CONFIG_MEDIA_TUNER_SIMPLE=m
+-CONFIG_MEDIA_TUNER_TDA18250=m
+-CONFIG_MEDIA_TUNER_TDA8290=m
+-CONFIG_MEDIA_TUNER_TDA827X=m
+-CONFIG_MEDIA_TUNER_TDA18271=m
+-CONFIG_MEDIA_TUNER_TDA9887=m
+-CONFIG_MEDIA_TUNER_TEA5761=m
+-CONFIG_MEDIA_TUNER_TEA5767=m
+-CONFIG_MEDIA_TUNER_MT20XX=m
+-CONFIG_MEDIA_TUNER_MT2060=m
+-CONFIG_MEDIA_TUNER_MT2063=m
+-CONFIG_MEDIA_TUNER_MT2266=m
+-CONFIG_MEDIA_TUNER_MT2131=m
+-CONFIG_MEDIA_TUNER_QT1010=m
+-CONFIG_MEDIA_TUNER_XC2028=m
+-CONFIG_MEDIA_TUNER_XC5000=m
+-CONFIG_MEDIA_TUNER_XC4000=m
+-CONFIG_MEDIA_TUNER_MXL5005S=m
+-CONFIG_MEDIA_TUNER_MXL5007T=m
+-CONFIG_MEDIA_TUNER_MC44S803=m
+-CONFIG_MEDIA_TUNER_MAX2165=m
+-CONFIG_MEDIA_TUNER_TDA18218=m
+-CONFIG_MEDIA_TUNER_FC0011=m
+-CONFIG_MEDIA_TUNER_FC0012=m
+-CONFIG_MEDIA_TUNER_FC0013=m
+-CONFIG_MEDIA_TUNER_TDA18212=m
+-CONFIG_MEDIA_TUNER_E4000=m
+-CONFIG_MEDIA_TUNER_FC2580=m
+-CONFIG_MEDIA_TUNER_M88RS6000T=m
+-CONFIG_MEDIA_TUNER_TUA9001=m
+-CONFIG_MEDIA_TUNER_SI2157=m
+-CONFIG_MEDIA_TUNER_IT913X=m
+-CONFIG_MEDIA_TUNER_R820T=m
+-CONFIG_MEDIA_TUNER_QM1D1C0042=m
+-CONFIG_MEDIA_TUNER_QM1D1B0004=m
+-
+-#
+-# Multistandard (satellite) frontends
+-#
+-CONFIG_DVB_STB0899=m
+-CONFIG_DVB_STB6100=m
+-CONFIG_DVB_STV090x=m
+-CONFIG_DVB_STV0910=m
+-CONFIG_DVB_STV6110x=m
+-CONFIG_DVB_STV6111=m
+-CONFIG_DVB_MXL5XX=m
+-CONFIG_DVB_M88DS3103=m
+-
+-#
+-# Multistandard (cable + terrestrial) frontends
+-#
+-CONFIG_DVB_DRXK=m
+-CONFIG_DVB_TDA18271C2DD=m
+-CONFIG_DVB_SI2165=m
+-CONFIG_DVB_MN88472=m
+-CONFIG_DVB_MN88473=m
+-
+-#
+-# DVB-S (satellite) frontends
+-#
+-CONFIG_DVB_CX24110=m
+-CONFIG_DVB_CX24123=m
+-CONFIG_DVB_MT312=m
+-CONFIG_DVB_ZL10036=m
+-CONFIG_DVB_ZL10039=m
+-CONFIG_DVB_S5H1420=m
+-CONFIG_DVB_STV0288=m
+-CONFIG_DVB_STB6000=m
+-CONFIG_DVB_STV0299=m
+-CONFIG_DVB_STV6110=m
+-CONFIG_DVB_STV0900=m
+-CONFIG_DVB_TDA8083=m
+-CONFIG_DVB_TDA10086=m
+-CONFIG_DVB_TDA8261=m
+-CONFIG_DVB_VES1X93=m
+-CONFIG_DVB_TUNER_ITD1000=m
+-CONFIG_DVB_TUNER_CX24113=m
+-CONFIG_DVB_TDA826X=m
+-CONFIG_DVB_TUA6100=m
+-CONFIG_DVB_CX24116=m
+-CONFIG_DVB_CX24117=m
+-CONFIG_DVB_CX24120=m
+-CONFIG_DVB_SI21XX=m
+-CONFIG_DVB_TS2020=m
+-CONFIG_DVB_DS3000=m
+-CONFIG_DVB_MB86A16=m
+-CONFIG_DVB_TDA10071=m
+-
+-#
+-# DVB-T (terrestrial) frontends
+-#
+-CONFIG_DVB_SP8870=m
+-CONFIG_DVB_SP887X=m
+-CONFIG_DVB_CX22700=m
+-CONFIG_DVB_CX22702=m
+-CONFIG_DVB_DRXD=m
+-CONFIG_DVB_L64781=m
+-CONFIG_DVB_TDA1004X=m
+-CONFIG_DVB_NXT6000=m
+-CONFIG_DVB_MT352=m
+-CONFIG_DVB_ZL10353=m
+-CONFIG_DVB_DIB3000MB=m
+-CONFIG_DVB_DIB3000MC=m
+-CONFIG_DVB_DIB7000M=m
+-CONFIG_DVB_DIB7000P=m
+-CONFIG_DVB_TDA10048=m
+-CONFIG_DVB_AF9013=m
+-CONFIG_DVB_EC100=m
+-CONFIG_DVB_STV0367=m
+-CONFIG_DVB_CXD2820R=m
+-CONFIG_DVB_CXD2841ER=m
+-CONFIG_DVB_RTL2830=m
+-CONFIG_DVB_RTL2832=m
+-CONFIG_DVB_SI2168=m
+-CONFIG_DVB_GP8PSK_FE=m
+-
+-#
+-# DVB-C (cable) frontends
+-#
+-CONFIG_DVB_VES1820=m
+-CONFIG_DVB_TDA10021=m
+-CONFIG_DVB_TDA10023=m
+-CONFIG_DVB_STV0297=m
+-
+-#
+-# ATSC (North American/Korean Terrestrial/Cable DTV) frontends
+-#
+-CONFIG_DVB_NXT200X=m
+-CONFIG_DVB_OR51211=m
+-CONFIG_DVB_OR51132=m
+-CONFIG_DVB_BCM3510=m
+-CONFIG_DVB_LGDT330X=m
+-CONFIG_DVB_LGDT3305=m
+-CONFIG_DVB_LGDT3306A=m
+-CONFIG_DVB_LG2160=m
+-CONFIG_DVB_S5H1409=m
+-CONFIG_DVB_AU8522=m
+-CONFIG_DVB_AU8522_DTV=m
+-CONFIG_DVB_AU8522_V4L=m
+-CONFIG_DVB_S5H1411=m
+-
+-#
+-# ISDB-T (terrestrial) frontends
+-#
+-CONFIG_DVB_S921=m
+-CONFIG_DVB_DIB8000=m
+-CONFIG_DVB_MB86A20S=m
+-
+-#
+-# ISDB-S (satellite) & ISDB-T (terrestrial) frontends
+-#
+-CONFIG_DVB_TC90522=m
+-
+-#
+-# Digital terrestrial only tuners/PLL
+-#
+-CONFIG_DVB_PLL=m
+-CONFIG_DVB_TUNER_DIB0070=m
+-CONFIG_DVB_TUNER_DIB0090=m
+-
+-#
+-# SEC control devices for DVB-S
+-#
+-CONFIG_DVB_DRX39XYJ=m
+-CONFIG_DVB_LNBH25=m
+-CONFIG_DVB_LNBP21=m
+-CONFIG_DVB_LNBP22=m
+-CONFIG_DVB_ISL6405=m
+-CONFIG_DVB_ISL6421=m
+-CONFIG_DVB_ISL6423=m
+-CONFIG_DVB_A8293=m
+-CONFIG_DVB_LGS8GXX=m
+-CONFIG_DVB_ATBM8830=m
+-CONFIG_DVB_TDA665x=m
+-CONFIG_DVB_IX2505V=m
+-CONFIG_DVB_M88RS2000=m
+-CONFIG_DVB_AF9033=m
+-
+-#
+-# Common Interface (EN50221) controller drivers
+-#
+-CONFIG_DVB_CXD2099=m
+-
+-#
+-# Tools to develop new frontends
+-#
+-CONFIG_DVB_DUMMY_FE=m
+-
+-#
+-# Graphics support
+-#
+-# CONFIG_AGP is not set
+-CONFIG_INTEL_GTT=m
+-CONFIG_VGA_ARB=y
+ CONFIG_VGA_ARB_MAX_GPUS=64
+ CONFIG_VGA_SWITCHEROO=y
+ CONFIG_DRM=m
+-CONFIG_DRM_MIPI_DSI=y
+ CONFIG_DRM_DP_AUX_CHARDEV=y
+-# CONFIG_DRM_DEBUG_SELFTEST is not set
+-CONFIG_DRM_KMS_HELPER=m
+-CONFIG_DRM_KMS_FB_HELPER=y
+-CONFIG_DRM_FBDEV_EMULATION=y
+-CONFIG_DRM_FBDEV_OVERALLOC=100
+ CONFIG_DRM_LOAD_EDID_FIRMWARE=y
+ CONFIG_DRM_DP_CEC=y
+-CONFIG_DRM_TTM=m
+-CONFIG_DRM_VM=y
+-CONFIG_DRM_SCHED=m
+-
+-#
+-# I2C encoder or helper chips
+-#
+-CONFIG_DRM_I2C_CH7006=m
+-CONFIG_DRM_I2C_SIL164=m
+-# CONFIG_DRM_I2C_NXP_TDA998X is not set
+-# CONFIG_DRM_I2C_NXP_TDA9950 is not set
+ CONFIG_DRM_RADEON=m
+ CONFIG_DRM_RADEON_USERPTR=y
+ CONFIG_DRM_AMDGPU=m
+-# CONFIG_DRM_AMDGPU_SI is not set
+-# CONFIG_DRM_AMDGPU_CIK is not set
+-# CONFIG_DRM_AMDGPU_USERPTR is not set
+-# CONFIG_DRM_AMDGPU_GART_DEBUGFS is not set
+-
+-#
+-# ACP (Audio CoProcessor) Configuration
+-#
+ CONFIG_DRM_AMD_ACP=y
+-
+-#
+-# Display Engine Configuration
+-#
+-CONFIG_DRM_AMD_DC=y
+-CONFIG_DRM_AMD_DC_DCN1_0=y
+-# CONFIG_DEBUG_KERNEL_DC is not set
+-
+-#
+-# AMD Library routines
+-#
+-CONFIG_CHASH=m
+-# CONFIG_CHASH_STATS is not set
+-# CONFIG_CHASH_SELFTEST is not set
+ CONFIG_DRM_NOUVEAU=m
+-CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT=y
+-CONFIG_NOUVEAU_DEBUG=5
+-CONFIG_NOUVEAU_DEBUG_DEFAULT=3
+-# CONFIG_NOUVEAU_DEBUG_MMU is not set
+-CONFIG_DRM_NOUVEAU_BACKLIGHT=y
+ CONFIG_DRM_I915=m
+-# CONFIG_DRM_I915_ALPHA_SUPPORT is not set
+-CONFIG_DRM_I915_CAPTURE_ERROR=y
+-CONFIG_DRM_I915_COMPRESS_ERROR=y
+-CONFIG_DRM_I915_USERPTR=y
+ CONFIG_DRM_I915_GVT=y
+ CONFIG_DRM_I915_GVT_KVMGT=m
+-# CONFIG_DRM_VGEM is not set
+ CONFIG_DRM_VKMS=m
+ CONFIG_DRM_VMWGFX=m
+ CONFIG_DRM_VMWGFX_FBCON=y
+@@ -4728,204 +1592,37 @@
+ CONFIG_DRM_QXL=m
+ CONFIG_DRM_BOCHS=m
+ CONFIG_DRM_VIRTIO_GPU=m
+-CONFIG_DRM_PANEL=y
+-
+-#
+-# Display Panels
+-#
+-# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
+-CONFIG_DRM_BRIDGE=y
+-CONFIG_DRM_PANEL_BRIDGE=y
+-
+-#
+-# Display Interface Bridges
+-#
+-# CONFIG_DRM_ANALOGIX_ANX78XX is not set
+ CONFIG_HSA_AMD=m
+-# CONFIG_DRM_HISI_HIBMC is not set
+-# CONFIG_DRM_TINYDRM is not set
+-# CONFIG_DRM_XEN is not set
+-# CONFIG_DRM_LEGACY is not set
+-CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=y
+-
+-#
+-# Frame buffer Devices
+-#
+-CONFIG_FB_CMDLINE=y
+-CONFIG_FB_NOTIFY=y
+ CONFIG_FB=y
+-# CONFIG_FIRMWARE_EDID is not set
+-CONFIG_FB_BOOT_VESA_SUPPORT=y
+-CONFIG_FB_CFB_FILLRECT=y
+-CONFIG_FB_CFB_COPYAREA=y
+-CONFIG_FB_CFB_IMAGEBLIT=y
+-CONFIG_FB_SYS_FILLRECT=m
+-CONFIG_FB_SYS_COPYAREA=m
+-CONFIG_FB_SYS_IMAGEBLIT=m
+-# CONFIG_FB_FOREIGN_ENDIAN is not set
+-CONFIG_FB_SYS_FOPS=m
+-CONFIG_FB_DEFERRED_IO=y
+-CONFIG_FB_BACKLIGHT=y
+-# CONFIG_FB_MODE_HELPERS is not set
+ CONFIG_FB_TILEBLITTING=y
+-
+-#
+-# Frame buffer hardware drivers
+-#
+-# CONFIG_FB_CIRRUS is not set
+-# CONFIG_FB_PM2 is not set
+-# CONFIG_FB_CYBER2000 is not set
+-# CONFIG_FB_ARC is not set
+-# CONFIG_FB_ASILIANT is not set
+-# CONFIG_FB_IMSTT is not set
+-# CONFIG_FB_VGA16 is not set
+-# CONFIG_FB_UVESA is not set
++CONFIG_FB_VGA16=m
+ CONFIG_FB_VESA=y
+ CONFIG_FB_EFI=y
+-# CONFIG_FB_N411 is not set
+-# CONFIG_FB_HGA is not set
+-# CONFIG_FB_OPENCORES is not set
+-# CONFIG_FB_S1D13XXX is not set
+-# CONFIG_FB_NVIDIA is not set
+-# CONFIG_FB_RIVA is not set
+-# CONFIG_FB_I740 is not set
+-# CONFIG_FB_LE80578 is not set
+-# CONFIG_FB_MATROX is not set
+-# CONFIG_FB_RADEON is not set
+-# CONFIG_FB_ATY128 is not set
+-# CONFIG_FB_ATY is not set
+-# CONFIG_FB_S3 is not set
+-# CONFIG_FB_SAVAGE is not set
+-# CONFIG_FB_SIS is not set
+-# CONFIG_FB_VIA is not set
+-# CONFIG_FB_NEOMAGIC is not set
+-# CONFIG_FB_KYRO is not set
+-# CONFIG_FB_3DFX is not set
+-# CONFIG_FB_VOODOO1 is not set
+-# CONFIG_FB_VT8623 is not set
+-# CONFIG_FB_TRIDENT is not set
+-# CONFIG_FB_ARK is not set
+-# CONFIG_FB_PM3 is not set
+-# CONFIG_FB_CARMINE is not set
+-# CONFIG_FB_SM501 is not set
+-# CONFIG_FB_SMSCUFX is not set
+-# CONFIG_FB_UDL is not set
+-# CONFIG_FB_IBM_GXT4500 is not set
+-# CONFIG_FB_VIRTUAL is not set
+-# CONFIG_XEN_FBDEV_FRONTEND is not set
+-# CONFIG_FB_METRONOME is not set
+-# CONFIG_FB_MB862XX is not set
+-# CONFIG_FB_BROADSHEET is not set
+-CONFIG_FB_HYPERV=m
+-# CONFIG_FB_SIMPLE is not set
+-# CONFIG_FB_SM712 is not set
+-CONFIG_BACKLIGHT_LCD_SUPPORT=y
+-CONFIG_LCD_CLASS_DEVICE=m
+-# CONFIG_LCD_L4F00242T03 is not set
+-# CONFIG_LCD_LMS283GF05 is not set
+-# CONFIG_LCD_LTV350QV is not set
+-# CONFIG_LCD_ILI922X is not set
+-# CONFIG_LCD_ILI9320 is not set
+-# CONFIG_LCD_TDO24M is not set
+-# CONFIG_LCD_VGG2432A4 is not set
+ CONFIG_LCD_PLATFORM=m
+-# CONFIG_LCD_S6E63M0 is not set
+-# CONFIG_LCD_LD9040 is not set
+-# CONFIG_LCD_AMS369FG06 is not set
+-# CONFIG_LCD_LMS501KF03 is not set
+-# CONFIG_LCD_HX8357 is not set
+-# CONFIG_LCD_OTM3225A is not set
+-CONFIG_BACKLIGHT_CLASS_DEVICE=y
+ # CONFIG_BACKLIGHT_GENERIC is not set
+-# CONFIG_BACKLIGHT_PWM is not set
+ CONFIG_BACKLIGHT_APPLE=m
+-# CONFIG_BACKLIGHT_PM8941_WLED is not set
+-# CONFIG_BACKLIGHT_SAHARA is not set
+-# CONFIG_BACKLIGHT_ADP8860 is not set
+-# CONFIG_BACKLIGHT_ADP8870 is not set
+-# CONFIG_BACKLIGHT_LM3630A is not set
+-# CONFIG_BACKLIGHT_LM3639 is not set
+ CONFIG_BACKLIGHT_LP855X=m
+-# CONFIG_BACKLIGHT_GPIO is not set
+-# CONFIG_BACKLIGHT_LV5207LP is not set
+-# CONFIG_BACKLIGHT_BD6107 is not set
+-# CONFIG_BACKLIGHT_ARCXCNN is not set
+-CONFIG_HDMI=y
+-
+-#
+-# Console display driver support
+-#
+-CONFIG_VGA_CONSOLE=y
+ CONFIG_VGACON_SOFT_SCROLLBACK=y
+-CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
+-# CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT is not set
+-CONFIG_DUMMY_CONSOLE=y
+-CONFIG_DUMMY_CONSOLE_COLUMNS=80
+-CONFIG_DUMMY_CONSOLE_ROWS=25
+-CONFIG_FRAMEBUFFER_CONSOLE=y
+-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+-CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+-# CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set
+ CONFIG_LOGO=y
+ # CONFIG_LOGO_LINUX_MONO is not set
+ # CONFIG_LOGO_LINUX_VGA16 is not set
+-CONFIG_LOGO_LINUX_CLUT224=y
+ CONFIG_SOUND=m
+-CONFIG_SOUND_OSS_CORE=y
+-CONFIG_SOUND_OSS_CORE_PRECLAIM=y
+ CONFIG_SND=m
+-CONFIG_SND_TIMER=m
+-CONFIG_SND_PCM=m
+-CONFIG_SND_PCM_ELD=y
+-CONFIG_SND_HWDEP=m
+-CONFIG_SND_SEQ_DEVICE=m
+-CONFIG_SND_RAWMIDI=m
+-CONFIG_SND_COMPRESS_OFFLOAD=m
+-CONFIG_SND_JACK=y
+-CONFIG_SND_JACK_INPUT_DEV=y
+ CONFIG_SND_OSSEMUL=y
+-# CONFIG_SND_MIXER_OSS is not set
+-# CONFIG_SND_PCM_OSS is not set
+-CONFIG_SND_PCM_TIMER=y
+ CONFIG_SND_HRTIMER=m
+-CONFIG_SND_DYNAMIC_MINORS=y
+-CONFIG_SND_MAX_CARDS=32
+ # CONFIG_SND_SUPPORT_OLD_API is not set
+-CONFIG_SND_PROC_FS=y
+-CONFIG_SND_VERBOSE_PROCFS=y
+-# CONFIG_SND_VERBOSE_PRINTK is not set
+-# CONFIG_SND_DEBUG is not set
+-CONFIG_SND_VMASTER=y
+-CONFIG_SND_DMA_SGBUF=y
+ CONFIG_SND_SEQUENCER=m
+ CONFIG_SND_SEQ_DUMMY=m
+ CONFIG_SND_SEQUENCER_OSS=m
+-CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+-CONFIG_SND_SEQ_MIDI_EVENT=m
+-CONFIG_SND_SEQ_MIDI=m
+-CONFIG_SND_SEQ_MIDI_EMUL=m
+-CONFIG_SND_SEQ_VIRMIDI=m
+-CONFIG_SND_MPU401_UART=m
+-CONFIG_SND_OPL3_LIB=m
+-CONFIG_SND_OPL3_LIB_SEQ=m
+-CONFIG_SND_VX_LIB=m
+-CONFIG_SND_AC97_CODEC=m
+-CONFIG_SND_DRIVERS=y
+ CONFIG_SND_PCSP=m
+ CONFIG_SND_DUMMY=m
+ CONFIG_SND_ALOOP=m
+ CONFIG_SND_VIRMIDI=m
+ CONFIG_SND_MTPAV=m
+-# CONFIG_SND_MTS64 is not set
+-# CONFIG_SND_SERIAL_U16550 is not set
+ CONFIG_SND_MPU401=m
+-# CONFIG_SND_PORTMAN2X4 is not set
+ CONFIG_SND_AC97_POWER_SAVE=y
+ CONFIG_SND_AC97_POWER_SAVE_DEFAULT=5
+-CONFIG_SND_PCI=y
+ CONFIG_SND_AD1889=m
+-# CONFIG_SND_ALS300 is not set
+-# CONFIG_SND_ALS4000 is not set
+ CONFIG_SND_ALI5451=m
+ CONFIG_SND_ASIHPI=m
+ CONFIG_SND_ATIIXP=m
+@@ -4933,17 +1630,11 @@
+ CONFIG_SND_AU8810=m
+ CONFIG_SND_AU8820=m
+ CONFIG_SND_AU8830=m
+-# CONFIG_SND_AW2 is not set
+-# CONFIG_SND_AZT3328 is not set
+ CONFIG_SND_BT87X=m
+-# CONFIG_SND_BT87X_OVERCLOCK is not set
+ CONFIG_SND_CA0106=m
+ CONFIG_SND_CMIPCI=m
+-CONFIG_SND_OXYGEN_LIB=m
+ CONFIG_SND_OXYGEN=m
+-# CONFIG_SND_CS4281 is not set
+ CONFIG_SND_CS46XX=m
+-CONFIG_SND_CS46XX_NEW_DSP=y
+ CONFIG_SND_CTXFI=m
+ CONFIG_SND_DARLA20=m
+ CONFIG_SND_GINA20=m
+@@ -4960,15 +1651,12 @@
+ CONFIG_SND_INDIGOIOX=m
+ CONFIG_SND_INDIGODJX=m
+ CONFIG_SND_EMU10K1=m
+-CONFIG_SND_EMU10K1_SEQ=m
+ CONFIG_SND_EMU10K1X=m
+ CONFIG_SND_ENS1370=m
+ CONFIG_SND_ENS1371=m
+-# CONFIG_SND_ES1938 is not set
+ CONFIG_SND_ES1968=m
+ CONFIG_SND_ES1968_INPUT=y
+ CONFIG_SND_ES1968_RADIO=y
+-# CONFIG_SND_FM801 is not set
+ CONFIG_SND_HDSP=m
+ CONFIG_SND_HDSPM=m
+ CONFIG_SND_ICE1712=m
+@@ -4981,27 +1669,17 @@
+ CONFIG_SND_MAESTRO3=m
+ CONFIG_SND_MAESTRO3_INPUT=y
+ CONFIG_SND_MIXART=m
+-# CONFIG_SND_NM256 is not set
+ CONFIG_SND_PCXHR=m
+-# CONFIG_SND_RIPTIDE is not set
+ CONFIG_SND_RME32=m
+ CONFIG_SND_RME96=m
+ CONFIG_SND_RME9652=m
+-# CONFIG_SND_SONICVIBES is not set
+ CONFIG_SND_TRIDENT=m
+ CONFIG_SND_VIA82XX=m
+ CONFIG_SND_VIA82XX_MODEM=m
+ CONFIG_SND_VIRTUOSO=m
+ CONFIG_SND_VX222=m
+-# CONFIG_SND_YMFPCI is not set
+-
+-#
+-# HD-Audio
+-#
+-CONFIG_SND_HDA=m
+ CONFIG_SND_HDA_INTEL=m
+ CONFIG_SND_HDA_HWDEP=y
+-CONFIG_SND_HDA_RECONFIG=y
+ CONFIG_SND_HDA_INPUT_BEEP=y
+ CONFIG_SND_HDA_INPUT_BEEP_MODE=0
+ CONFIG_SND_HDA_PATCH_LOADER=y
+@@ -5017,16 +1695,8 @@
+ CONFIG_SND_HDA_CODEC_CA0132_DSP=y
+ CONFIG_SND_HDA_CODEC_CMEDIA=m
+ CONFIG_SND_HDA_CODEC_SI3054=m
+-CONFIG_SND_HDA_GENERIC=m
+-CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
+-CONFIG_SND_HDA_CORE=m
+-CONFIG_SND_HDA_DSP_LOADER=y
+-CONFIG_SND_HDA_COMPONENT=y
+-CONFIG_SND_HDA_I915=y
+-CONFIG_SND_HDA_EXT_CORE=m
+ CONFIG_SND_HDA_PREALLOC_SIZE=512
+ # CONFIG_SND_SPI is not set
+-CONFIG_SND_USB=y
+ CONFIG_SND_USB_AUDIO=m
+ CONFIG_SND_USB_UA101=m
+ CONFIG_SND_USB_USX2Y=m
+@@ -5036,13 +1706,10 @@
+ CONFIG_SND_USB_6FIRE=m
+ CONFIG_SND_USB_HIFACE=m
+ CONFIG_SND_BCD2000=m
+-CONFIG_SND_USB_LINE6=m
+ CONFIG_SND_USB_POD=m
+ CONFIG_SND_USB_PODHD=m
+ CONFIG_SND_USB_TONEPORT=m
+ CONFIG_SND_USB_VARIAX=m
+-CONFIG_SND_FIREWIRE=y
+-CONFIG_SND_FIREWIRE_LIB=m
+ CONFIG_SND_DICE=m
+ CONFIG_SND_OXFW=m
+ CONFIG_SND_ISIGHT=m
+@@ -5053,45 +1720,8 @@
+ CONFIG_SND_FIREWIRE_MOTU=m
+ CONFIG_SND_FIREFACE=m
+ CONFIG_SND_SOC=m
+-CONFIG_SND_SOC_COMPRESS=y
+-CONFIG_SND_SOC_TOPOLOGY=y
+-CONFIG_SND_SOC_ACPI=m
+-# CONFIG_SND_SOC_AMD_ACP is not set
+-# CONFIG_SND_ATMEL_SOC is not set
+-# CONFIG_SND_DESIGNWARE_I2S is not set
+-
+-#
+-# SoC Audio for Freescale CPUs
+-#
+-
+-#
+-# Common SoC Audio options for Freescale CPUs:
+-#
+-# CONFIG_SND_SOC_FSL_ASRC is not set
+-# CONFIG_SND_SOC_FSL_SAI is not set
+-# CONFIG_SND_SOC_FSL_SSI is not set
+-# CONFIG_SND_SOC_FSL_SPDIF is not set
+-# CONFIG_SND_SOC_FSL_ESAI is not set
+-# CONFIG_SND_SOC_IMX_AUDMUX is not set
+-# CONFIG_SND_I2S_HI6210_I2S is not set
+-# CONFIG_SND_SOC_IMG is not set
+-CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
+-CONFIG_SND_SST_IPC=m
+-CONFIG_SND_SST_IPC_ACPI=m
+-CONFIG_SND_SOC_INTEL_SST_ACPI=m
+-CONFIG_SND_SOC_INTEL_SST=m
+-CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m
+ CONFIG_SND_SOC_INTEL_HASWELL=m
+-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
+-# CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI is not set
+-CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
+-CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
+ CONFIG_SND_SOC_INTEL_SKYLAKE=m
+-CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
+-CONFIG_SND_SOC_INTEL_MACH=y
+-# CONFIG_SND_SOC_INTEL_HASWELL_MACH is not set
+-# CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH is not set
+-# CONFIG_SND_SOC_INTEL_BROADWELL_MACH is not set
+ CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
+ CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
+ CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
+@@ -5109,172 +1739,12 @@
+ CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
+ CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
+ CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
+-# CONFIG_SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH is not set
+-
+-#
+-# STMicroelectronics STM32 SOC audio support
+-#
+-# CONFIG_SND_SOC_XTFPGA_I2S is not set
+-# CONFIG_ZX_TDM is not set
+-CONFIG_SND_SOC_I2C_AND_SPI=m
+-
+-#
+-# CODEC drivers
+-#
+-# CONFIG_SND_SOC_AC97_CODEC is not set
+-# CONFIG_SND_SOC_ADAU1701 is not set
+-# CONFIG_SND_SOC_ADAU1761_I2C is not set
+-# CONFIG_SND_SOC_ADAU1761_SPI is not set
+-# CONFIG_SND_SOC_ADAU7002 is not set
+-# CONFIG_SND_SOC_AK4104 is not set
+-# CONFIG_SND_SOC_AK4458 is not set
+-# CONFIG_SND_SOC_AK4554 is not set
+-# CONFIG_SND_SOC_AK4613 is not set
+-# CONFIG_SND_SOC_AK4642 is not set
+-# CONFIG_SND_SOC_AK5386 is not set
+-# CONFIG_SND_SOC_AK5558 is not set
+-# CONFIG_SND_SOC_ALC5623 is not set
+-# CONFIG_SND_SOC_BD28623 is not set
+-# CONFIG_SND_SOC_BT_SCO is not set
+-# CONFIG_SND_SOC_CS35L32 is not set
+-# CONFIG_SND_SOC_CS35L33 is not set
+-# CONFIG_SND_SOC_CS35L34 is not set
+-# CONFIG_SND_SOC_CS35L35 is not set
+-# CONFIG_SND_SOC_CS42L42 is not set
+-# CONFIG_SND_SOC_CS42L51_I2C is not set
+-# CONFIG_SND_SOC_CS42L52 is not set
+-# CONFIG_SND_SOC_CS42L56 is not set
+-# CONFIG_SND_SOC_CS42L73 is not set
+-# CONFIG_SND_SOC_CS4265 is not set
+-# CONFIG_SND_SOC_CS4270 is not set
+-# CONFIG_SND_SOC_CS4271_I2C is not set
+-# CONFIG_SND_SOC_CS4271_SPI is not set
+-# CONFIG_SND_SOC_CS42XX8_I2C is not set
+-# CONFIG_SND_SOC_CS43130 is not set
+-# CONFIG_SND_SOC_CS4349 is not set
+-# CONFIG_SND_SOC_CS53L30 is not set
+-CONFIG_SND_SOC_DA7213=m
+-CONFIG_SND_SOC_DA7219=m
+-CONFIG_SND_SOC_DMIC=m
+-# CONFIG_SND_SOC_ES7134 is not set
+-# CONFIG_SND_SOC_ES7241 is not set
+-CONFIG_SND_SOC_ES8316=m
+-# CONFIG_SND_SOC_ES8328_I2C is not set
+-# CONFIG_SND_SOC_ES8328_SPI is not set
+-# CONFIG_SND_SOC_GTM601 is not set
+-CONFIG_SND_SOC_HDAC_HDMI=m
+-# CONFIG_SND_SOC_INNO_RK3036 is not set
+-CONFIG_SND_SOC_MAX98090=m
+-CONFIG_SND_SOC_MAX98357A=m
+-# CONFIG_SND_SOC_MAX98504 is not set
+-# CONFIG_SND_SOC_MAX9867 is not set
+-CONFIG_SND_SOC_MAX98927=m
+-# CONFIG_SND_SOC_MAX98373 is not set
+-# CONFIG_SND_SOC_MAX9860 is not set
+-# CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set
+-# CONFIG_SND_SOC_PCM1681 is not set
+-# CONFIG_SND_SOC_PCM1789_I2C is not set
+-# CONFIG_SND_SOC_PCM179X_I2C is not set
+-# CONFIG_SND_SOC_PCM179X_SPI is not set
+-# CONFIG_SND_SOC_PCM186X_I2C is not set
+-# CONFIG_SND_SOC_PCM186X_SPI is not set
+-# CONFIG_SND_SOC_PCM3168A_I2C is not set
+-# CONFIG_SND_SOC_PCM3168A_SPI is not set
+-# CONFIG_SND_SOC_PCM512x_I2C is not set
+-# CONFIG_SND_SOC_PCM512x_SPI is not set
+-CONFIG_SND_SOC_RL6231=m
+-CONFIG_SND_SOC_RL6347A=m
+-CONFIG_SND_SOC_RT286=m
+-CONFIG_SND_SOC_RT298=m
+-CONFIG_SND_SOC_RT5514=m
+-CONFIG_SND_SOC_RT5514_SPI=m
+-# CONFIG_SND_SOC_RT5616 is not set
+-# CONFIG_SND_SOC_RT5631 is not set
+-CONFIG_SND_SOC_RT5640=m
+-CONFIG_SND_SOC_RT5645=m
+-CONFIG_SND_SOC_RT5651=m
+-CONFIG_SND_SOC_RT5663=m
+-CONFIG_SND_SOC_RT5670=m
+-# CONFIG_SND_SOC_SGTL5000 is not set
+-# CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set
+-# CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set
+-# CONFIG_SND_SOC_SPDIF is not set
+-# CONFIG_SND_SOC_SSM2305 is not set
+-# CONFIG_SND_SOC_SSM2602_SPI is not set
+-# CONFIG_SND_SOC_SSM2602_I2C is not set
+-CONFIG_SND_SOC_SSM4567=m
+-# CONFIG_SND_SOC_STA32X is not set
+-# CONFIG_SND_SOC_STA350 is not set
+-# CONFIG_SND_SOC_STI_SAS is not set
+-# CONFIG_SND_SOC_TAS2552 is not set
+-# CONFIG_SND_SOC_TAS5086 is not set
+-# CONFIG_SND_SOC_TAS571X is not set
+-# CONFIG_SND_SOC_TAS5720 is not set
+-# CONFIG_SND_SOC_TAS6424 is not set
+-# CONFIG_SND_SOC_TDA7419 is not set
+-# CONFIG_SND_SOC_TFA9879 is not set
+-# CONFIG_SND_SOC_TLV320AIC23_I2C is not set
+-# CONFIG_SND_SOC_TLV320AIC23_SPI is not set
+-# CONFIG_SND_SOC_TLV320AIC31XX is not set
+-# CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set
+-# CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set
+-# CONFIG_SND_SOC_TLV320AIC3X is not set
+-CONFIG_SND_SOC_TS3A227E=m
+-# CONFIG_SND_SOC_TSCS42XX is not set
+-# CONFIG_SND_SOC_TSCS454 is not set
+-# CONFIG_SND_SOC_WM8510 is not set
+-# CONFIG_SND_SOC_WM8523 is not set
+-# CONFIG_SND_SOC_WM8524 is not set
+-# CONFIG_SND_SOC_WM8580 is not set
+-# CONFIG_SND_SOC_WM8711 is not set
+-# CONFIG_SND_SOC_WM8728 is not set
+-# CONFIG_SND_SOC_WM8731 is not set
+-# CONFIG_SND_SOC_WM8737 is not set
+-# CONFIG_SND_SOC_WM8741 is not set
+-# CONFIG_SND_SOC_WM8750 is not set
+-# CONFIG_SND_SOC_WM8753 is not set
+-# CONFIG_SND_SOC_WM8770 is not set
+-# CONFIG_SND_SOC_WM8776 is not set
+-# CONFIG_SND_SOC_WM8782 is not set
+-# CONFIG_SND_SOC_WM8804_I2C is not set
+-# CONFIG_SND_SOC_WM8804_SPI is not set
+-# CONFIG_SND_SOC_WM8903 is not set
+-# CONFIG_SND_SOC_WM8960 is not set
+-# CONFIG_SND_SOC_WM8962 is not set
+-# CONFIG_SND_SOC_WM8974 is not set
+-# CONFIG_SND_SOC_WM8978 is not set
+-# CONFIG_SND_SOC_WM8985 is not set
+-# CONFIG_SND_SOC_ZX_AUD96P22 is not set
+-# CONFIG_SND_SOC_MAX9759 is not set
+-# CONFIG_SND_SOC_MT6351 is not set
+-# CONFIG_SND_SOC_NAU8540 is not set
+-# CONFIG_SND_SOC_NAU8810 is not set
+-CONFIG_SND_SOC_NAU8824=m
+-CONFIG_SND_SOC_NAU8825=m
+-# CONFIG_SND_SOC_TPA6130A2 is not set
+-# CONFIG_SND_SIMPLE_CARD is not set
+-CONFIG_SND_X86=y
+ CONFIG_HDMI_LPE_AUDIO=m
+-CONFIG_SND_SYNTH_EMUX=m
+-CONFIG_SND_XEN_FRONTEND=m
+-CONFIG_AC97_BUS=m
+-
+-#
+-# HID support
+-#
+-CONFIG_HID=y
+ CONFIG_HID_BATTERY_STRENGTH=y
+ CONFIG_HIDRAW=y
+ CONFIG_UHID=m
+-CONFIG_HID_GENERIC=y
+-
+-#
+-# Special HID drivers
+-#
+ CONFIG_HID_A4TECH=m
+-# CONFIG_HID_ACCUTOUCH is not set
+ CONFIG_HID_ACRUX=m
+-# CONFIG_HID_ACRUX_FF is not set
+ CONFIG_HID_APPLE=m
+ CONFIG_HID_APPLEIR=m
+ CONFIG_HID_ASUS=m
+@@ -5284,14 +1754,10 @@
+ CONFIG_HID_CHERRY=m
+ CONFIG_HID_CHICONY=m
+ CONFIG_HID_CORSAIR=m
+-# CONFIG_HID_COUGAR is not set
+ CONFIG_HID_PRODIKEYS=m
+ CONFIG_HID_CMEDIA=m
+-# CONFIG_HID_CP2112 is not set
+ CONFIG_HID_CYPRESS=m
+ CONFIG_HID_DRAGONRISE=m
+-# CONFIG_DRAGONRISE_FF is not set
+-# CONFIG_HID_EMS_FF is not set
+ CONFIG_HID_ELAN=m
+ CONFIG_HID_ELECOM=m
+ CONFIG_HID_ELO=m
+@@ -5299,8 +1765,6 @@
+ CONFIG_HID_GEMBIRD=m
+ CONFIG_HID_GFRM=m
+ CONFIG_HID_HOLTEK=m
+-# CONFIG_HOLTEK_FF is not set
+-# CONFIG_HID_GOOGLE_HAMMER is not set
+ CONFIG_HID_GT683R=m
+ CONFIG_HID_KEYTOUCH=m
+ CONFIG_HID_KYE=m
+@@ -5313,17 +1777,10 @@
+ CONFIG_HID_TWINHAN=m
+ CONFIG_HID_KENSINGTON=m
+ CONFIG_HID_LCPOWER=m
+-CONFIG_HID_LED=m
+ CONFIG_HID_LENOVO=m
+ CONFIG_HID_LOGITECH=m
+ CONFIG_HID_LOGITECH_DJ=m
+-CONFIG_HID_LOGITECH_HIDPP=m
+-# CONFIG_LOGITECH_FF is not set
+-# CONFIG_LOGIRUMBLEPAD2_FF is not set
+-# CONFIG_LOGIG940_FF is not set
+-# CONFIG_LOGIWHEELS_FF is not set
+ CONFIG_HID_MAGICMOUSE=y
+-# CONFIG_HID_MAYFLASH is not set
+ # CONFIG_HID_REDRAGON is not set
+ CONFIG_HID_MICROSOFT=m
+ CONFIG_HID_MONTEREY=m
+@@ -5332,134 +1789,54 @@
+ CONFIG_HID_NTRIG=y
+ CONFIG_HID_ORTEK=m
+ CONFIG_HID_PANTHERLORD=m
+-# CONFIG_PANTHERLORD_FF is not set
+ CONFIG_HID_PENMOUNT=m
+ CONFIG_HID_PETALYNX=m
+ CONFIG_HID_PICOLCD=m
+-CONFIG_HID_PICOLCD_FB=y
+-CONFIG_HID_PICOLCD_BACKLIGHT=y
+-CONFIG_HID_PICOLCD_LCD=y
+-CONFIG_HID_PICOLCD_LEDS=y
+-CONFIG_HID_PICOLCD_CIR=y
+ CONFIG_HID_PLANTRONICS=m
+ CONFIG_HID_PRIMAX=m
+-# CONFIG_HID_RETRODE is not set
+ CONFIG_HID_ROCCAT=m
+ CONFIG_HID_SAITEK=m
+ CONFIG_HID_SAMSUNG=m
+ CONFIG_HID_SONY=m
+ CONFIG_SONY_FF=y
+ CONFIG_HID_SPEEDLINK=m
+-# CONFIG_HID_STEAM is not set
+ CONFIG_HID_STEELSERIES=m
+ CONFIG_HID_SUNPLUS=m
+ CONFIG_HID_RMI=m
+ CONFIG_HID_GREENASIA=m
+-# CONFIG_GREENASIA_FF is not set
+-CONFIG_HID_HYPERV_MOUSE=m
+ CONFIG_HID_SMARTJOYPLUS=m
+-# CONFIG_SMARTJOYPLUS_FF is not set
+ CONFIG_HID_TIVO=m
+ CONFIG_HID_TOPSEED=m
+ CONFIG_HID_THINGM=m
+ CONFIG_HID_THRUSTMASTER=m
+-# CONFIG_THRUSTMASTER_FF is not set
+-# CONFIG_HID_UDRAW_PS3 is not set
+ CONFIG_HID_WACOM=m
+ CONFIG_HID_WIIMOTE=m
+ CONFIG_HID_XINMO=m
+ CONFIG_HID_ZEROPLUS=m
+-# CONFIG_ZEROPLUS_FF is not set
+ CONFIG_HID_ZYDACRON=m
+ CONFIG_HID_SENSOR_HUB=y
+ CONFIG_HID_SENSOR_CUSTOM_SENSOR=m
+ CONFIG_HID_ALPS=m
+-
+-#
+-# USB HID support
+-#
+-CONFIG_USB_HID=y
+ CONFIG_HID_PID=y
+ CONFIG_USB_HIDDEV=y
+-
+-#
+-# I2C HID support
+-#
+ CONFIG_I2C_HID=m
+-
+-#
+-# Intel ISH HID support
+-#
+ CONFIG_INTEL_ISH_HID=m
+-CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+-CONFIG_USB_SUPPORT=y
+-CONFIG_USB_COMMON=y
+-CONFIG_USB_ARCH_HAS_HCD=y
+ CONFIG_USB=y
+-CONFIG_USB_PCI=y
+ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+-
+-#
+-# Miscellaneous USB options
+-#
+-CONFIG_USB_DEFAULT_PERSIST=y
+-# CONFIG_USB_DYNAMIC_MINORS is not set
+-# CONFIG_USB_OTG is not set
+-# CONFIG_USB_OTG_WHITELIST is not set
+ CONFIG_USB_LEDS_TRIGGER_USBPORT=m
+ CONFIG_USB_MON=y
+-CONFIG_USB_WUSB=m
+ CONFIG_USB_WUSB_CBAF=m
+-# CONFIG_USB_WUSB_CBAF_DEBUG is not set
+-
+-#
+-# USB Host Controller Drivers
+-#
+-# CONFIG_USB_C67X00_HCD is not set
+ CONFIG_USB_XHCI_HCD=y
+ CONFIG_USB_XHCI_DBGCAP=y
+-CONFIG_USB_XHCI_PCI=y
+-# CONFIG_USB_XHCI_PLATFORM is not set
+ CONFIG_USB_EHCI_HCD=y
+ CONFIG_USB_EHCI_ROOT_HUB_TT=y
+-CONFIG_USB_EHCI_TT_NEWSCHED=y
+-CONFIG_USB_EHCI_PCI=y
+-# CONFIG_USB_EHCI_HCD_PLATFORM is not set
+-# CONFIG_USB_OXU210HP_HCD is not set
+-# CONFIG_USB_ISP116X_HCD is not set
+-# CONFIG_USB_FOTG210_HCD is not set
+-# CONFIG_USB_MAX3421_HCD is not set
+ CONFIG_USB_OHCI_HCD=y
+-CONFIG_USB_OHCI_HCD_PCI=y
+-# CONFIG_USB_OHCI_HCD_PLATFORM is not set
+ CONFIG_USB_UHCI_HCD=y
+-# CONFIG_USB_U132_HCD is not set
+-# CONFIG_USB_SL811_HCD is not set
+-# CONFIG_USB_R8A66597_HCD is not set
+-# CONFIG_USB_WHCI_HCD is not set
+ CONFIG_USB_HWA_HCD=m
+-# CONFIG_USB_HCD_BCMA is not set
+-# CONFIG_USB_HCD_TEST_MODE is not set
+-
+-#
+-# USB Device Class drivers
+-#
+-CONFIG_USB_ACM=m
+ CONFIG_USB_PRINTER=m
+-CONFIG_USB_WDM=m
+ CONFIG_USB_TMC=m
+-
+-#
+-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+-#
+-
+-#
+-# also be needed; see USB_STORAGE Help for more info
+-#
+ CONFIG_USB_STORAGE=m
+-# CONFIG_USB_STORAGE_DEBUG is not set
+ CONFIG_USB_STORAGE_REALTEK=m
+-CONFIG_REALTEK_AUTOPM=y
+ CONFIG_USB_STORAGE_DATAFAB=m
+ CONFIG_USB_STORAGE_FREECOM=m
+ CONFIG_USB_STORAGE_ISD200=m
+@@ -5473,27 +1850,12 @@
+ CONFIG_USB_STORAGE_CYPRESS_ATACB=m
+ CONFIG_USB_STORAGE_ENE_UB6250=m
+ CONFIG_USB_UAS=m
+-
+-#
+-# USB Imaging devices
+-#
+ CONFIG_USB_MDC800=m
+ CONFIG_USB_MICROTEK=m
+-# CONFIG_USBIP_CORE is not set
+-# CONFIG_USB_MUSB_HDRC is not set
+-# CONFIG_USB_DWC3 is not set
+-# CONFIG_USB_DWC2 is not set
+-# CONFIG_USB_CHIPIDEA is not set
+-# CONFIG_USB_ISP1760 is not set
+-
+-#
+-# USB port drivers
+-#
+ CONFIG_USB_USS720=m
+ CONFIG_USB_SERIAL=y
+ CONFIG_USB_SERIAL_CONSOLE=y
+ CONFIG_USB_SERIAL_GENERIC=y
+-# CONFIG_USB_SERIAL_SIMPLE is not set
+ CONFIG_USB_SERIAL_AIRCABLE=m
+ CONFIG_USB_SERIAL_ARK3116=m
+ CONFIG_USB_SERIAL_BELKIN=m
+@@ -5509,7 +1871,6 @@
+ CONFIG_USB_SERIAL_IR=m
+ CONFIG_USB_SERIAL_EDGEPORT=m
+ CONFIG_USB_SERIAL_EDGEPORT_TI=m
+-# CONFIG_USB_SERIAL_F81232 is not set
+ CONFIG_USB_SERIAL_F8153X=m
+ CONFIG_USB_SERIAL_GARMIN=m
+ CONFIG_USB_SERIAL_IPW=m
+@@ -5519,7 +1880,6 @@
+ CONFIG_USB_SERIAL_KLSI=m
+ CONFIG_USB_SERIAL_KOBIL_SCT=m
+ CONFIG_USB_SERIAL_MCT_U232=m
+-# CONFIG_USB_SERIAL_METRO is not set
+ CONFIG_USB_SERIAL_MOS7720=m
+ CONFIG_USB_SERIAL_MOS7715_PARPORT=y
+ CONFIG_USB_SERIAL_MOS7840=m
+@@ -5537,219 +1897,99 @@
+ CONFIG_USB_SERIAL_TI=m
+ CONFIG_USB_SERIAL_CYBERJACK=m
+ CONFIG_USB_SERIAL_XIRCOM=m
+-CONFIG_USB_SERIAL_WWAN=m
+ CONFIG_USB_SERIAL_OPTION=m
+ CONFIG_USB_SERIAL_OMNINET=m
+ CONFIG_USB_SERIAL_OPTICON=m
+ CONFIG_USB_SERIAL_XSENS_MT=m
+-# CONFIG_USB_SERIAL_WISHBONE is not set
+ CONFIG_USB_SERIAL_SSU100=m
+ CONFIG_USB_SERIAL_QT2=m
+ CONFIG_USB_SERIAL_UPD78F0730=m
+ CONFIG_USB_SERIAL_DEBUG=m
+-
+-#
+-# USB Miscellaneous drivers
+-#
+ CONFIG_USB_EMI62=m
+ CONFIG_USB_EMI26=m
+ CONFIG_USB_ADUTUX=m
+ CONFIG_USB_SEVSEG=m
+ CONFIG_USB_LEGOTOWER=m
+ CONFIG_USB_LCD=m
+-# CONFIG_USB_CYPRESS_CY7C63 is not set
+-# CONFIG_USB_CYTHERM is not set
+ CONFIG_USB_IDMOUSE=m
+ CONFIG_USB_FTDI_ELAN=m
+ CONFIG_USB_APPLEDISPLAY=m
+ CONFIG_USB_SISUSBVGA=m
+ CONFIG_USB_SISUSBVGA_CON=y
+ CONFIG_USB_LD=m
+-# CONFIG_USB_TRANCEVIBRATOR is not set
+ CONFIG_USB_IOWARRIOR=m
+-# CONFIG_USB_TEST is not set
+-# CONFIG_USB_EHSET_TEST_FIXTURE is not set
+ CONFIG_USB_ISIGHTFW=m
+-# CONFIG_USB_YUREX is not set
+-CONFIG_USB_EZUSB_FX2=m
+-# CONFIG_USB_HUB_USB251XB is not set
+ CONFIG_USB_HSIC_USB3503=m
+-# CONFIG_USB_HSIC_USB4604 is not set
+-# CONFIG_USB_LINK_LAYER_TEST is not set
+-# CONFIG_USB_CHAOSKEY is not set
+ CONFIG_USB_ATM=m
+ CONFIG_USB_SPEEDTOUCH=m
+ CONFIG_USB_CXACRU=m
+ CONFIG_USB_UEAGLEATM=m
+ CONFIG_USB_XUSBATM=m
+-
+-#
+-# USB Physical Layer drivers
+-#
+-# CONFIG_NOP_USB_XCEIV is not set
+-# CONFIG_USB_GPIO_VBUS is not set
+-# CONFIG_USB_ISP1301 is not set
+-# CONFIG_USB_GADGET is not set
+ CONFIG_TYPEC=y
+ CONFIG_TYPEC_TCPM=y
+-CONFIG_TYPEC_TCPCI=y
+ CONFIG_TYPEC_RT1711H=y
+ CONFIG_TYPEC_FUSB302=m
+ CONFIG_TYPEC_UCSI=y
+ CONFIG_UCSI_ACPI=y
+ CONFIG_TYPEC_TPS6598X=m
+-
+-#
+-# USB Type-C Multiplexer/DeMultiplexer Switch support
+-#
+ CONFIG_TYPEC_MUX_PI3USB30532=m
+-
+-#
+-# USB Type-C Alternate Mode drivers
+-#
+ CONFIG_TYPEC_DP_ALTMODE=y
+-CONFIG_USB_ROLE_SWITCH=y
+ CONFIG_USB_ROLES_INTEL_XHCI=y
+ CONFIG_USB_LED_TRIG=y
+-# CONFIG_USB_ULPI_BUS is not set
+ CONFIG_UWB=m
+-CONFIG_UWB_HWA=m
+ CONFIG_UWB_WHCI=m
+ CONFIG_UWB_I1480U=m
+ CONFIG_MMC=m
+-CONFIG_MMC_BLOCK=m
+-CONFIG_MMC_BLOCK_MINORS=8
+ CONFIG_SDIO_UART=m
+-# CONFIG_MMC_TEST is not set
+-
+-#
+-# MMC/SD/SDIO Host Controller Drivers
+-#
+-# CONFIG_MMC_DEBUG is not set
+ CONFIG_MMC_SDHCI=m
+ CONFIG_MMC_SDHCI_PCI=m
+-CONFIG_MMC_RICOH_MMC=y
+ CONFIG_MMC_SDHCI_ACPI=m
+ CONFIG_MMC_SDHCI_PLTFM=m
+-# CONFIG_MMC_SDHCI_F_SDH30 is not set
+-# CONFIG_MMC_WBSD is not set
+ CONFIG_MMC_TIFM_SD=m
+-# CONFIG_MMC_SPI is not set
+ CONFIG_MMC_CB710=m
+ CONFIG_MMC_VIA_SDMMC=m
+ CONFIG_MMC_VUB300=m
+ CONFIG_MMC_USHC=m
+-# CONFIG_MMC_USDHI6ROL0 is not set
+ CONFIG_MMC_REALTEK_PCI=m
+ CONFIG_MMC_REALTEK_USB=m
+-CONFIG_MMC_CQHCI=m
+-# CONFIG_MMC_TOSHIBA_PCI is not set
+-# CONFIG_MMC_MTK is not set
+-# CONFIG_MMC_SDHCI_XENON is not set
+ CONFIG_MEMSTICK=m
+-# CONFIG_MEMSTICK_DEBUG is not set
+-
+-#
+-# MemoryStick drivers
+-#
+-# CONFIG_MEMSTICK_UNSAFE_RESUME is not set
+ CONFIG_MSPRO_BLOCK=m
+-# CONFIG_MS_BLOCK is not set
+-
+-#
+-# MemoryStick Host Controller Drivers
+-#
+ CONFIG_MEMSTICK_TIFM_MS=m
+ CONFIG_MEMSTICK_JMICRON_38X=m
+ CONFIG_MEMSTICK_R592=m
+ CONFIG_MEMSTICK_REALTEK_PCI=m
+ CONFIG_MEMSTICK_REALTEK_USB=m
+-CONFIG_NEW_LEDS=y
+ CONFIG_LEDS_CLASS=y
+-# CONFIG_LEDS_CLASS_FLASH is not set
+-# CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set
+-
+-#
+-# LED drivers
+-#
+-# CONFIG_LEDS_APU is not set
+ CONFIG_LEDS_LM3530=m
+-# CONFIG_LEDS_LM3642 is not set
+-# CONFIG_LEDS_PCA9532 is not set
+-# CONFIG_LEDS_GPIO is not set
+ CONFIG_LEDS_LP3944=m
+-# CONFIG_LEDS_LP3952 is not set
+-CONFIG_LEDS_LP55XX_COMMON=m
+ CONFIG_LEDS_LP5521=m
+ CONFIG_LEDS_LP5523=m
+ CONFIG_LEDS_LP5562=m
+-# CONFIG_LEDS_LP8501 is not set
+ CONFIG_LEDS_CLEVO_MAIL=m
+-# CONFIG_LEDS_PCA955X is not set
+-# CONFIG_LEDS_PCA963X is not set
+-# CONFIG_LEDS_DAC124S085 is not set
+-# CONFIG_LEDS_PWM is not set
+-# CONFIG_LEDS_BD2802 is not set
+ CONFIG_LEDS_INTEL_SS4200=m
+ CONFIG_LEDS_LT3593=m
+-# CONFIG_LEDS_TCA6507 is not set
+-# CONFIG_LEDS_TLC591XX is not set
+-# CONFIG_LEDS_LM355x is not set
+-
+-#
+-# LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)
+-#
+ CONFIG_LEDS_BLINKM=m
+ CONFIG_LEDS_MLXCPLD=m
+-# CONFIG_LEDS_MLXREG is not set
+-# CONFIG_LEDS_USER is not set
+-# CONFIG_LEDS_NIC78BX is not set
+-
+-#
+-# LED Triggers
+-#
+-CONFIG_LEDS_TRIGGERS=y
+ CONFIG_LEDS_TRIGGER_TIMER=m
+ CONFIG_LEDS_TRIGGER_ONESHOT=m
+ CONFIG_LEDS_TRIGGER_DISK=y
+-# CONFIG_LEDS_TRIGGER_MTD is not set
+ CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+ CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+-# CONFIG_LEDS_TRIGGER_CPU is not set
+-# CONFIG_LEDS_TRIGGER_ACTIVITY is not set
+ CONFIG_LEDS_TRIGGER_GPIO=m
+ CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+-
+-#
+-# iptables trigger is under Netfilter config (LED target)
+-#
+ CONFIG_LEDS_TRIGGER_TRANSIENT=m
+ CONFIG_LEDS_TRIGGER_CAMERA=m
+-# CONFIG_LEDS_TRIGGER_PANIC is not set
+-# CONFIG_LEDS_TRIGGER_NETDEV is not set
+-# CONFIG_ACCESSIBILITY is not set
+ CONFIG_INFINIBAND=m
+ CONFIG_INFINIBAND_USER_MAD=m
+ CONFIG_INFINIBAND_USER_ACCESS=m
+-# CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI is not set
+-CONFIG_INFINIBAND_USER_MEM=y
+-CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
+-CONFIG_INFINIBAND_ADDR_TRANS=y
+-CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
+-# CONFIG_INFINIBAND_MTHCA is not set
+-# CONFIG_INFINIBAND_QIB is not set
+ CONFIG_INFINIBAND_CXGB4=m
+ CONFIG_INFINIBAND_I40IW=m
+ CONFIG_MLX4_INFINIBAND=m
+ CONFIG_MLX5_INFINIBAND=m
+-# CONFIG_INFINIBAND_NES is not set
+-# CONFIG_INFINIBAND_OCRDMA is not set
+ CONFIG_INFINIBAND_VMWARE_PVRDMA=m
+ CONFIG_INFINIBAND_USNIC=m
+ CONFIG_INFINIBAND_IPOIB=m
+ CONFIG_INFINIBAND_IPOIB_CM=y
+-CONFIG_INFINIBAND_IPOIB_DEBUG=y
+-# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set
+ CONFIG_INFINIBAND_SRP=m
+ CONFIG_INFINIBAND_SRPT=m
+ CONFIG_INFINIBAND_ISER=m
+@@ -5758,19 +1998,12 @@
+ CONFIG_INFINIBAND_RDMAVT=m
+ CONFIG_RDMA_RXE=m
+ CONFIG_INFINIBAND_HFI1=m
+-# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
+-# CONFIG_SDMA_VERBOSITY is not set
+ CONFIG_INFINIBAND_QEDR=m
+ CONFIG_INFINIBAND_BNXT_RE=m
+-CONFIG_EDAC_ATOMIC_SCRUB=y
+-CONFIG_EDAC_SUPPORT=y
+ CONFIG_EDAC=y
+-CONFIG_EDAC_LEGACY_SYSFS=y
+-# CONFIG_EDAC_DEBUG is not set
+ CONFIG_EDAC_DECODE_MCE=m
+ CONFIG_EDAC_GHES=y
+ CONFIG_EDAC_AMD64=m
+-# CONFIG_EDAC_AMD64_ERROR_INJECTION is not set
+ CONFIG_EDAC_E752X=m
+ CONFIG_EDAC_I82975X=m
+ CONFIG_EDAC_I3000=m
+@@ -5785,33 +2018,10 @@
+ CONFIG_EDAC_SBRIDGE=m
+ CONFIG_EDAC_SKX=m
+ CONFIG_EDAC_PND2=m
+-CONFIG_RTC_LIB=y
+-CONFIG_RTC_MC146818_LIB=y
+ CONFIG_RTC_CLASS=y
+-CONFIG_RTC_HCTOSYS=y
+-CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+ # CONFIG_RTC_SYSTOHC is not set
+-# CONFIG_RTC_DEBUG is not set
+-CONFIG_RTC_NVMEM=y
+-
+-#
+-# RTC interfaces
+-#
+-CONFIG_RTC_INTF_SYSFS=y
+-CONFIG_RTC_INTF_PROC=y
+-CONFIG_RTC_INTF_DEV=y
+-# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+-# CONFIG_RTC_DRV_TEST is not set
+-
+-#
+-# I2C RTC drivers
+-#
+-# CONFIG_RTC_DRV_ABB5ZES3 is not set
+-# CONFIG_RTC_DRV_ABX80X is not set
+ CONFIG_RTC_DRV_DS1307=m
+-# CONFIG_RTC_DRV_DS1307_CENTURY is not set
+ CONFIG_RTC_DRV_DS1374=m
+-# CONFIG_RTC_DRV_DS1374_WDT is not set
+ CONFIG_RTC_DRV_DS1672=m
+ CONFIG_RTC_DRV_MAX6900=m
+ CONFIG_RTC_DRV_RS5C372=m
+@@ -5819,187 +2029,56 @@
+ CONFIG_RTC_DRV_ISL12022=m
+ CONFIG_RTC_DRV_X1205=m
+ CONFIG_RTC_DRV_PCF8523=m
+-# CONFIG_RTC_DRV_PCF85063 is not set
+-# CONFIG_RTC_DRV_PCF85363 is not set
+ CONFIG_RTC_DRV_PCF8563=m
+ CONFIG_RTC_DRV_PCF8583=m
+ CONFIG_RTC_DRV_M41T80=m
+ CONFIG_RTC_DRV_M41T80_WDT=y
+ CONFIG_RTC_DRV_BQ32K=m
+-# CONFIG_RTC_DRV_S35390A is not set
+ CONFIG_RTC_DRV_FM3130=m
+-# CONFIG_RTC_DRV_RX8010 is not set
+ CONFIG_RTC_DRV_RX8581=m
+ CONFIG_RTC_DRV_RX8025=m
+ CONFIG_RTC_DRV_EM3027=m
+-# CONFIG_RTC_DRV_RV8803 is not set
+-
+-#
+-# SPI RTC drivers
+-#
+-# CONFIG_RTC_DRV_M41T93 is not set
+-# CONFIG_RTC_DRV_M41T94 is not set
+-# CONFIG_RTC_DRV_DS1302 is not set
+-# CONFIG_RTC_DRV_DS1305 is not set
+-# CONFIG_RTC_DRV_DS1343 is not set
+-# CONFIG_RTC_DRV_DS1347 is not set
+-# CONFIG_RTC_DRV_DS1390 is not set
+-# CONFIG_RTC_DRV_MAX6916 is not set
+-# CONFIG_RTC_DRV_R9701 is not set
+ CONFIG_RTC_DRV_RX4581=m
+-# CONFIG_RTC_DRV_RX6110 is not set
+-# CONFIG_RTC_DRV_RS5C348 is not set
+-# CONFIG_RTC_DRV_MAX6902 is not set
+-# CONFIG_RTC_DRV_PCF2123 is not set
+-# CONFIG_RTC_DRV_MCP795 is not set
+-CONFIG_RTC_I2C_AND_SPI=y
+-
+-#
+-# SPI and I2C RTC drivers
+-#
+ CONFIG_RTC_DRV_DS3232=m
+-CONFIG_RTC_DRV_DS3232_HWMON=y
+-# CONFIG_RTC_DRV_PCF2127 is not set
+ CONFIG_RTC_DRV_RV3029C2=m
+ # CONFIG_RTC_DRV_RV3029_HWMON is not set
+-
+-#
+-# Platform RTC drivers
+-#
+-CONFIG_RTC_DRV_CMOS=y
+ CONFIG_RTC_DRV_DS1286=m
+ CONFIG_RTC_DRV_DS1511=m
+ CONFIG_RTC_DRV_DS1553=m
+-# CONFIG_RTC_DRV_DS1685_FAMILY is not set
+ CONFIG_RTC_DRV_DS1742=m
+ CONFIG_RTC_DRV_DS2404=m
+ CONFIG_RTC_DRV_STK17TA8=m
+-# CONFIG_RTC_DRV_M48T86 is not set
+ CONFIG_RTC_DRV_M48T35=m
+ CONFIG_RTC_DRV_M48T59=m
+ CONFIG_RTC_DRV_MSM6242=m
+ CONFIG_RTC_DRV_BQ4802=m
+ CONFIG_RTC_DRV_RP5C01=m
+ CONFIG_RTC_DRV_V3020=m
+-
+-#
+-# on-CPU RTC drivers
+-#
+-# CONFIG_RTC_DRV_FTRTC010 is not set
+-
+-#
+-# HID Sensor RTC drivers
+-#
+-# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set
+-CONFIG_DMADEVICES=y
+-# CONFIG_DMADEVICES_DEBUG is not set
+-
+-#
+-# DMA Devices
+-#
+-CONFIG_DMA_ENGINE=y
+-CONFIG_DMA_VIRTUAL_CHANNELS=y
+-CONFIG_DMA_ACPI=y
+-# CONFIG_ALTERA_MSGDMA is not set
+ CONFIG_INTEL_IDMA64=m
+ CONFIG_INTEL_IOATDMA=m
+-# CONFIG_QCOM_HIDMA_MGMT is not set
+-# CONFIG_QCOM_HIDMA is not set
+-CONFIG_DW_DMAC_CORE=y
+ CONFIG_DW_DMAC=m
+-CONFIG_DW_DMAC_PCI=y
+-CONFIG_HSU_DMA=y
+-
+-#
+-# DMA Clients
+-#
+ CONFIG_ASYNC_TX_DMA=y
+-# CONFIG_DMATEST is not set
+-CONFIG_DMA_ENGINE_RAID=y
+-
+-#
+-# DMABUF options
+-#
+-CONFIG_SYNC_FILE=y
+-# CONFIG_SW_SYNC is not set
+-CONFIG_DCA=m
+-# CONFIG_AUXDISPLAY is not set
+-# CONFIG_PANEL is not set
+-CONFIG_UIO=m
+ CONFIG_UIO_CIF=m
+ CONFIG_UIO_PDRV_GENIRQ=m
+-# CONFIG_UIO_DMEM_GENIRQ is not set
+ CONFIG_UIO_AEC=m
+ CONFIG_UIO_SERCOS3=m
+ CONFIG_UIO_PCI_GENERIC=m
+-# CONFIG_UIO_NETX is not set
+-# CONFIG_UIO_PRUSS is not set
+-# CONFIG_UIO_MF624 is not set
+-CONFIG_UIO_HV_GENERIC=m
+-CONFIG_VFIO_IOMMU_TYPE1=m
+-CONFIG_VFIO_VIRQFD=m
+ CONFIG_VFIO=m
+ CONFIG_VFIO_NOIOMMU=y
+ CONFIG_VFIO_PCI=m
+-# CONFIG_VFIO_PCI_VGA is not set
+-CONFIG_VFIO_PCI_MMAP=y
+-CONFIG_VFIO_PCI_INTX=y
+ # CONFIG_VFIO_PCI_IGD is not set
+ CONFIG_VFIO_MDEV=m
+ CONFIG_VFIO_MDEV_DEVICE=m
+-CONFIG_IRQ_BYPASS_MANAGER=m
+-# CONFIG_VIRT_DRIVERS is not set
+-CONFIG_VIRTIO=y
+-CONFIG_VIRTIO_MENU=y
+ CONFIG_VIRTIO_PCI=y
+-CONFIG_VIRTIO_PCI_LEGACY=y
+ CONFIG_VIRTIO_BALLOON=m
+ CONFIG_VIRTIO_INPUT=m
+-# CONFIG_VIRTIO_MMIO is not set
+-
+-#
+-# Microsoft Hyper-V guest support
+-#
+-CONFIG_HYPERV=m
+-CONFIG_HYPERV_TSCPAGE=y
+-CONFIG_HYPERV_UTILS=m
+-CONFIG_HYPERV_BALLOON=m
+-
+-#
+-# Xen driver support
+-#
+-CONFIG_XEN_BALLOON=y
+-# CONFIG_XEN_SELFBALLOONING is not set
+-# CONFIG_XEN_BALLOON_MEMORY_HOTPLUG is not set
+-CONFIG_XEN_SCRUB_PAGES_DEFAULT=y
+-CONFIG_XEN_DEV_EVTCHN=m
+-CONFIG_XENFS=m
+-CONFIG_XEN_COMPAT_XENFS=y
+-CONFIG_XEN_SYS_HYPERVISOR=y
+-CONFIG_XEN_XENBUS_FRONTEND=y
+-# CONFIG_XEN_GNTDEV is not set
+-# CONFIG_XEN_GRANT_DEV_ALLOC is not set
+-# CONFIG_XEN_GRANT_DMA_ALLOC is not set
+-CONFIG_SWIOTLB_XEN=y
+-CONFIG_XEN_TMEM=m
+-# CONFIG_XEN_PVCALLS_FRONTEND is not set
+-CONFIG_XEN_PRIVCMD=m
+-CONFIG_XEN_EFI=y
+-CONFIG_XEN_AUTO_XLATE=y
+-CONFIG_XEN_ACPI=y
+-# CONFIG_STAGING is not set
+-CONFIG_X86_PLATFORM_DEVICES=y
+ CONFIG_ACER_WMI=m
+-# CONFIG_ACER_WIRELESS is not set
+ CONFIG_ACERHDF=m
+-# CONFIG_ALIENWARE_WMI is not set
+ CONFIG_ASUS_LAPTOP=m
+ CONFIG_DELL_SMBIOS=m
+-CONFIG_DELL_SMBIOS_WMI=y
+ # CONFIG_DELL_SMBIOS_SMM is not set
+ CONFIG_DELL_LAPTOP=m
+ CONFIG_DELL_WMI=m
+-CONFIG_DELL_WMI_DESCRIPTOR=m
+ CONFIG_DELL_WMI_AIO=m
+ CONFIG_DELL_WMI_LED=m
+ CONFIG_DELL_SMO8800=m
+@@ -6007,7 +2086,6 @@
+ CONFIG_FUJITSU_LAPTOP=m
+ CONFIG_FUJITSU_TABLET=m
+ CONFIG_AMILO_RFKILL=m
+-# CONFIG_GPD_POCKET_FAN is not set
+ CONFIG_HP_ACCEL=m
+ CONFIG_HP_WIRELESS=m
+ CONFIG_HP_WMI=m
+@@ -6017,808 +2095,131 @@
+ CONFIG_SONY_LAPTOP=m
+ CONFIG_SONYPI_COMPAT=y
+ CONFIG_IDEAPAD_LAPTOP=m
+-# CONFIG_SURFACE3_WMI is not set
+ CONFIG_THINKPAD_ACPI=m
+-CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y
+-# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set
+-# CONFIG_THINKPAD_ACPI_DEBUG is not set
+-# CONFIG_THINKPAD_ACPI_UNSAFE_LEDS is not set
+-CONFIG_THINKPAD_ACPI_VIDEO=y
+-CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
+ CONFIG_SENSORS_HDAPS=m
+-# CONFIG_INTEL_MENLOW is not set
+ CONFIG_EEEPC_LAPTOP=m
+ CONFIG_ASUS_WMI=m
+ CONFIG_ASUS_NB_WMI=m
+ CONFIG_EEEPC_WMI=m
+-# CONFIG_ASUS_WIRELESS is not set
+-CONFIG_ACPI_WMI=m
+-CONFIG_WMI_BMOF=m
+ CONFIG_INTEL_WMI_THUNDERBOLT=m
+ CONFIG_MSI_WMI=m
+-# CONFIG_PEAQ_WMI is not set
+ CONFIG_TOPSTAR_LAPTOP=m
+-# CONFIG_ACPI_TOSHIBA is not set
+ CONFIG_TOSHIBA_BT_RFKILL=m
+-# CONFIG_TOSHIBA_HAPS is not set
+-# CONFIG_TOSHIBA_WMI is not set
+ CONFIG_ACPI_CMPC=m
+-# CONFIG_INTEL_INT0002_VGPIO is not set
+ CONFIG_INTEL_HID_EVENT=m
+ CONFIG_INTEL_VBTN=m
+ CONFIG_INTEL_IPS=m
+ CONFIG_INTEL_PMC_CORE=m
+-# CONFIG_IBM_RTL is not set
+ CONFIG_SAMSUNG_LAPTOP=m
+-CONFIG_MXM_WMI=m
+ CONFIG_INTEL_OAKTRAIL=m
+ CONFIG_SAMSUNG_Q10=m
+ CONFIG_APPLE_GMUX=m
+ CONFIG_INTEL_RST=m
+-# CONFIG_INTEL_SMARTCONNECT is not set
+ CONFIG_PVPANIC=y
+-# CONFIG_INTEL_PMC_IPC is not set
+-# CONFIG_SURFACE_PRO3_BUTTON is not set
+-# CONFIG_INTEL_PUNIT_IPC is not set
+ CONFIG_MLX_PLATFORM=m
+-CONFIG_INTEL_TURBO_MAX_3=y
+-# CONFIG_I2C_MULTI_INSTANTIATE is not set
+-# CONFIG_INTEL_ATOMISP2_PM is not set
+-CONFIG_PMC_ATOM=y
+-# CONFIG_CHROME_PLATFORMS is not set
+ CONFIG_MELLANOX_PLATFORM=y
+ CONFIG_MLXREG_HOTPLUG=m
+-# CONFIG_MLXREG_IO is not set
+-CONFIG_CLKDEV_LOOKUP=y
+-CONFIG_HAVE_CLK_PREPARE=y
+-CONFIG_COMMON_CLK=y
+-
+-#
+-# Common Clock Framework
+-#
+-# CONFIG_COMMON_CLK_MAX9485 is not set
+-# CONFIG_COMMON_CLK_SI5351 is not set
+-# CONFIG_COMMON_CLK_SI544 is not set
+-# CONFIG_COMMON_CLK_CDCE706 is not set
+-# CONFIG_COMMON_CLK_CS2000_CP is not set
+-# CONFIG_COMMON_CLK_PWM is not set
+ CONFIG_HWSPINLOCK=y
+-
+-#
+-# Clock Source drivers
+-#
+-CONFIG_CLKEVT_I8253=y
+-CONFIG_I8253_LOCK=y
+-CONFIG_CLKBLD_I8253=y
+ CONFIG_MAILBOX=y
+ CONFIG_PCC=y
+-# CONFIG_ALTERA_MBOX is not set
+-CONFIG_IOMMU_API=y
+-CONFIG_IOMMU_SUPPORT=y
+-
+-#
+-# Generic IOMMU Pagetable Support
+-#
+-
+-#
+-# Generic PASID table support
+-#
+-# CONFIG_IOMMU_DEBUGFS is not set
+ CONFIG_IOMMU_DEFAULT_PASSTHROUGH=y
+-CONFIG_IOMMU_IOVA=y
+ CONFIG_AMD_IOMMU=y
+-CONFIG_AMD_IOMMU_V2=m
+-CONFIG_DMAR_TABLE=y
+ CONFIG_INTEL_IOMMU=y
+-# CONFIG_INTEL_IOMMU_SVM is not set
+ # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
+-CONFIG_INTEL_IOMMU_FLOPPY_WA=y
+ CONFIG_IRQ_REMAP=y
+-# CONFIG_SMMU_BYPASS_DEV is not set
+-
+-#
+-# Remoteproc drivers
+-#
+-# CONFIG_REMOTEPROC is not set
+-
+-#
+-# Rpmsg drivers
+-#
+-# CONFIG_RPMSG_QCOM_GLINK_RPM is not set
+-# CONFIG_RPMSG_VIRTIO is not set
+-# CONFIG_SOUNDWIRE is not set
+-
+-#
+-# SOC (System On Chip) specific Drivers
+-#
+-
+-#
+-# Amlogic SoC drivers
+-#
+-
+-#
+-# Broadcom SoC drivers
+-#
+-
+-#
+-# NXP/Freescale QorIQ SoC drivers
+-#
+-
+-#
+-# i.MX SoC drivers
+-#
+-
+-#
+-# Qualcomm SoC drivers
+-#
+-# CONFIG_SOC_TI is not set
+-
+-#
+-# Xilinx SoC drivers
+-#
+-# CONFIG_XILINX_VCU is not set
+-CONFIG_SOC_HISILICON_LBC=m
+-CONFIG_SOC_HISILICON_SYSCTL=m
+-# CONFIG_PM_DEVFREQ is not set
+-# CONFIG_EXTCON is not set
+-# CONFIG_MEMORY is not set
+ CONFIG_IIO=m
+-CONFIG_IIO_BUFFER=y
+-# CONFIG_IIO_BUFFER_CB is not set
+-# CONFIG_IIO_BUFFER_HW_CONSUMER is not set
+-CONFIG_IIO_KFIFO_BUF=m
+-CONFIG_IIO_TRIGGERED_BUFFER=m
+-# CONFIG_IIO_CONFIGFS is not set
+-CONFIG_IIO_TRIGGER=y
+-CONFIG_IIO_CONSUMERS_PER_TRIGGER=2
+-# CONFIG_IIO_SW_DEVICE is not set
+-# CONFIG_IIO_SW_TRIGGER is not set
+-
+-#
+-# Accelerometers
+-#
+-# CONFIG_ADIS16201 is not set
+-# CONFIG_ADIS16209 is not set
+-# CONFIG_ADXL345_I2C is not set
+-# CONFIG_ADXL345_SPI is not set
+-# CONFIG_BMA180 is not set
+-# CONFIG_BMA220 is not set
+-# CONFIG_BMC150_ACCEL is not set
+-# CONFIG_DA280 is not set
+-# CONFIG_DA311 is not set
+-# CONFIG_DMARD09 is not set
+-# CONFIG_DMARD10 is not set
+ CONFIG_HID_SENSOR_ACCEL_3D=m
+-# CONFIG_IIO_CROS_EC_ACCEL_LEGACY is not set
+-# CONFIG_IIO_ST_ACCEL_3AXIS is not set
+-# CONFIG_KXSD9 is not set
+-# CONFIG_KXCJK1013 is not set
+-# CONFIG_MC3230 is not set
+-# CONFIG_MMA7455_I2C is not set
+-# CONFIG_MMA7455_SPI is not set
+-# CONFIG_MMA7660 is not set
+-# CONFIG_MMA8452 is not set
+-# CONFIG_MMA9551 is not set
+-# CONFIG_MMA9553 is not set
+-# CONFIG_MXC4005 is not set
+-# CONFIG_MXC6255 is not set
+-# CONFIG_SCA3000 is not set
+-# CONFIG_STK8312 is not set
+-# CONFIG_STK8BA50 is not set
+-
+-#
+-# Analog to digital converters
+-#
+-# CONFIG_AD7266 is not set
+-# CONFIG_AD7291 is not set
+-# CONFIG_AD7298 is not set
+-# CONFIG_AD7476 is not set
+-# CONFIG_AD7766 is not set
+-# CONFIG_AD7791 is not set
+-# CONFIG_AD7793 is not set
+-# CONFIG_AD7887 is not set
+-# CONFIG_AD7923 is not set
+-# CONFIG_AD799X is not set
+-# CONFIG_HI8435 is not set
+-# CONFIG_HX711 is not set
+-# CONFIG_INA2XX_ADC is not set
+-# CONFIG_LTC2471 is not set
+-# CONFIG_LTC2485 is not set
+-# CONFIG_LTC2497 is not set
+-# CONFIG_MAX1027 is not set
+-# CONFIG_MAX11100 is not set
+-# CONFIG_MAX1118 is not set
+-# CONFIG_MAX1363 is not set
+-# CONFIG_MAX9611 is not set
+-# CONFIG_MCP320X is not set
+-# CONFIG_MCP3422 is not set
+-# CONFIG_NAU7802 is not set
+-# CONFIG_TI_ADC081C is not set
+-# CONFIG_TI_ADC0832 is not set
+-# CONFIG_TI_ADC084S021 is not set
+-# CONFIG_TI_ADC12138 is not set
+-# CONFIG_TI_ADC108S102 is not set
+-# CONFIG_TI_ADC128S052 is not set
+-# CONFIG_TI_ADC161S626 is not set
+-# CONFIG_TI_ADS1015 is not set
+-# CONFIG_TI_ADS7950 is not set
+-# CONFIG_TI_TLC4541 is not set
+-# CONFIG_VIPERBOARD_ADC is not set
+-
+-#
+-# Analog Front Ends
+-#
+-
+-#
+-# Amplifiers
+-#
+-# CONFIG_AD8366 is not set
+-
+-#
+-# Chemical Sensors
+-#
+-# CONFIG_ATLAS_PH_SENSOR is not set
+-# CONFIG_BME680 is not set
+-# CONFIG_CCS811 is not set
+-# CONFIG_IAQCORE is not set
+-# CONFIG_VZ89X is not set
+-
+-#
+-# Hid Sensor IIO Common
+-#
+-CONFIG_HID_SENSOR_IIO_COMMON=m
+-CONFIG_HID_SENSOR_IIO_TRIGGER=m
+-
+-#
+-# SSP Sensor Common
+-#
+-# CONFIG_IIO_SSP_SENSORHUB is not set
+-
+-#
+-# Counters
+-#
+-
+-#
+-# Digital to analog converters
+-#
+-# CONFIG_AD5064 is not set
+-# CONFIG_AD5360 is not set
+-# CONFIG_AD5380 is not set
+-# CONFIG_AD5421 is not set
+-# CONFIG_AD5446 is not set
+-# CONFIG_AD5449 is not set
+-# CONFIG_AD5592R is not set
+-# CONFIG_AD5593R is not set
+-# CONFIG_AD5504 is not set
+-# CONFIG_AD5624R_SPI is not set
+-# CONFIG_LTC2632 is not set
+-# CONFIG_AD5686_SPI is not set
+-# CONFIG_AD5696_I2C is not set
+-# CONFIG_AD5755 is not set
+-# CONFIG_AD5758 is not set
+-# CONFIG_AD5761 is not set
+-# CONFIG_AD5764 is not set
+-# CONFIG_AD5791 is not set
+-# CONFIG_AD7303 is not set
+-# CONFIG_AD8801 is not set
+-# CONFIG_DS4424 is not set
+-# CONFIG_M62332 is not set
+-# CONFIG_MAX517 is not set
+-# CONFIG_MCP4725 is not set
+-# CONFIG_MCP4922 is not set
+-# CONFIG_TI_DAC082S085 is not set
+-# CONFIG_TI_DAC5571 is not set
+-
+-#
+-# IIO dummy driver
+-#
+-
+-#
+-# Frequency Synthesizers DDS/PLL
+-#
+-
+-#
+-# Clock Generator/Distribution
+-#
+-# CONFIG_AD9523 is not set
+-
+-#
+-# Phase-Locked Loop (PLL) frequency synthesizers
+-#
+-# CONFIG_ADF4350 is not set
+-
+-#
+-# Digital gyroscope sensors
+-#
+-# CONFIG_ADIS16080 is not set
+-# CONFIG_ADIS16130 is not set
+-# CONFIG_ADIS16136 is not set
+-# CONFIG_ADIS16260 is not set
+-# CONFIG_ADXRS450 is not set
+-# CONFIG_BMG160 is not set
+ CONFIG_HID_SENSOR_GYRO_3D=m
+-# CONFIG_MPU3050_I2C is not set
+-# CONFIG_IIO_ST_GYRO_3AXIS is not set
+-# CONFIG_ITG3200 is not set
+-
+-#
+-# Health Sensors
+-#
+-
+-#
+-# Heart Rate Monitors
+-#
+-# CONFIG_AFE4403 is not set
+-# CONFIG_AFE4404 is not set
+-# CONFIG_MAX30100 is not set
+-# CONFIG_MAX30102 is not set
+-
+-#
+-# Humidity sensors
+-#
+-# CONFIG_AM2315 is not set
+-# CONFIG_DHT11 is not set
+-# CONFIG_HDC100X is not set
+ CONFIG_HID_SENSOR_HUMIDITY=m
+-# CONFIG_HTS221 is not set
+-# CONFIG_HTU21 is not set
+-# CONFIG_SI7005 is not set
+-# CONFIG_SI7020 is not set
+-
+-#
+-# Inertial measurement units
+-#
+-# CONFIG_ADIS16400 is not set
+-# CONFIG_ADIS16480 is not set
+-# CONFIG_BMI160_I2C is not set
+-# CONFIG_BMI160_SPI is not set
+-# CONFIG_KMX61 is not set
+-# CONFIG_INV_MPU6050_I2C is not set
+-# CONFIG_INV_MPU6050_SPI is not set
+-# CONFIG_IIO_ST_LSM6DSX is not set
+-
+-#
+-# Light sensors
+-#
+-# CONFIG_ACPI_ALS is not set
+-# CONFIG_ADJD_S311 is not set
+-# CONFIG_AL3320A is not set
+-# CONFIG_APDS9300 is not set
+-# CONFIG_APDS9960 is not set
+-# CONFIG_BH1750 is not set
+-# CONFIG_BH1780 is not set
+-# CONFIG_CM32181 is not set
+-# CONFIG_CM3232 is not set
+-# CONFIG_CM3323 is not set
+-# CONFIG_CM36651 is not set
+-# CONFIG_GP2AP020A00F is not set
+-# CONFIG_SENSORS_ISL29018 is not set
+-# CONFIG_SENSORS_ISL29028 is not set
+-# CONFIG_ISL29125 is not set
+ CONFIG_HID_SENSOR_ALS=m
+ CONFIG_HID_SENSOR_PROX=m
+-# CONFIG_JSA1212 is not set
+-# CONFIG_RPR0521 is not set
+-# CONFIG_LTR501 is not set
+-# CONFIG_LV0104CS is not set
+-# CONFIG_MAX44000 is not set
+-# CONFIG_OPT3001 is not set
+-# CONFIG_PA12203001 is not set
+-# CONFIG_SI1133 is not set
+-# CONFIG_SI1145 is not set
+-# CONFIG_STK3310 is not set
+-# CONFIG_ST_UVIS25 is not set
+-# CONFIG_TCS3414 is not set
+-# CONFIG_TCS3472 is not set
+-# CONFIG_SENSORS_TSL2563 is not set
+-# CONFIG_TSL2583 is not set
+-# CONFIG_TSL2772 is not set
+-# CONFIG_TSL4531 is not set
+-# CONFIG_US5182D is not set
+-# CONFIG_VCNL4000 is not set
+-# CONFIG_VEML6070 is not set
+-# CONFIG_VL6180 is not set
+-# CONFIG_ZOPT2201 is not set
+-
+-#
+-# Magnetometer sensors
+-#
+-# CONFIG_AK8975 is not set
+-# CONFIG_AK09911 is not set
+-# CONFIG_BMC150_MAGN_I2C is not set
+-# CONFIG_BMC150_MAGN_SPI is not set
+-# CONFIG_MAG3110 is not set
+ CONFIG_HID_SENSOR_MAGNETOMETER_3D=m
+-# CONFIG_MMC35240 is not set
+-# CONFIG_IIO_ST_MAGN_3AXIS is not set
+-# CONFIG_SENSORS_HMC5843_I2C is not set
+-# CONFIG_SENSORS_HMC5843_SPI is not set
+-
+-#
+-# Multiplexers
+-#
+-
+-#
+-# Inclinometer sensors
+-#
+ CONFIG_HID_SENSOR_INCLINOMETER_3D=m
+ CONFIG_HID_SENSOR_DEVICE_ROTATION=m
+-
+-#
+-# Triggers - standalone
+-#
+-# CONFIG_IIO_INTERRUPT_TRIGGER is not set
+-# CONFIG_IIO_SYSFS_TRIGGER is not set
+-
+-#
+-# Digital potentiometers
+-#
+-# CONFIG_AD5272 is not set
+-# CONFIG_DS1803 is not set
+-# CONFIG_MAX5481 is not set
+-# CONFIG_MAX5487 is not set
+-# CONFIG_MCP4018 is not set
+-# CONFIG_MCP4131 is not set
+-# CONFIG_MCP4531 is not set
+-# CONFIG_TPL0102 is not set
+-
+-#
+-# Digital potentiostats
+-#
+-# CONFIG_LMP91000 is not set
+-
+-#
+-# Pressure sensors
+-#
+-# CONFIG_ABP060MG is not set
+-# CONFIG_BMP280 is not set
+ CONFIG_HID_SENSOR_PRESS=m
+-# CONFIG_HP03 is not set
+-# CONFIG_MPL115_I2C is not set
+-# CONFIG_MPL115_SPI is not set
+-# CONFIG_MPL3115 is not set
+-# CONFIG_MS5611 is not set
+-# CONFIG_MS5637 is not set
+-# CONFIG_IIO_ST_PRESS is not set
+-# CONFIG_T5403 is not set
+-# CONFIG_HP206C is not set
+-# CONFIG_ZPA2326 is not set
+-
+-#
+-# Lightning sensors
+-#
+-# CONFIG_AS3935 is not set
+-
+-#
+-# Proximity and distance sensors
+-#
+-# CONFIG_ISL29501 is not set
+-# CONFIG_LIDAR_LITE_V2 is not set
+-# CONFIG_RFD77402 is not set
+-# CONFIG_SRF04 is not set
+-# CONFIG_SX9500 is not set
+-# CONFIG_SRF08 is not set
+-
+-#
+-# Resolver to digital converters
+-#
+-# CONFIG_AD2S1200 is not set
+-
+-#
+-# Temperature sensors
+-#
+-# CONFIG_MAXIM_THERMOCOUPLE is not set
+ CONFIG_HID_SENSOR_TEMP=m
+-# CONFIG_MLX90614 is not set
+-# CONFIG_MLX90632 is not set
+-# CONFIG_TMP006 is not set
+-# CONFIG_TMP007 is not set
+-# CONFIG_TSYS01 is not set
+-# CONFIG_TSYS02D is not set
+ CONFIG_NTB=m
+-# CONFIG_NTB_AMD is not set
+-# CONFIG_NTB_IDT is not set
+-# CONFIG_NTB_INTEL is not set
+-# CONFIG_NTB_SWITCHTEC is not set
+-# CONFIG_NTB_PINGPONG is not set
+-# CONFIG_NTB_TOOL is not set
+-# CONFIG_NTB_PERF is not set
+-# CONFIG_NTB_TRANSPORT is not set
+-# CONFIG_VME_BUS is not set
+ CONFIG_PWM=y
+-CONFIG_PWM_SYSFS=y
+-CONFIG_PWM_LPSS=m
+ CONFIG_PWM_LPSS_PCI=m
+ CONFIG_PWM_LPSS_PLATFORM=m
+-# CONFIG_PWM_PCA9685 is not set
+-
+-#
+-# IRQ chip support
+-#
+-CONFIG_ARM_GIC_MAX_NR=1
+-# CONFIG_IPACK_BUS is not set
+-# CONFIG_RESET_CONTROLLER is not set
+-# CONFIG_FMC is not set
+-
+-#
+-# PHY Subsystem
+-#
+-# CONFIG_GENERIC_PHY is not set
+-# CONFIG_BCM_KONA_USB2_PHY is not set
+-# CONFIG_PHY_PXA_28NM_HSIC is not set
+-# CONFIG_PHY_PXA_28NM_USB2 is not set
+-# CONFIG_PHY_CPCAP_USB is not set
+ CONFIG_POWERCAP=y
+ CONFIG_INTEL_RAPL=m
+-# CONFIG_IDLE_INJECT is not set
+-# CONFIG_MCB is not set
+-
+-#
+-# Performance monitor support
+-#
+-CONFIG_RAS=y
+-# CONFIG_RAS_CEC is not set
++CONFIG_RAS_CEC=y
+ CONFIG_THUNDERBOLT=y
+-
+-#
+-# Android
+-#
+-# CONFIG_ANDROID is not set
+-CONFIG_LIBNVDIMM=m
+-CONFIG_BLK_DEV_PMEM=m
+-CONFIG_ND_BLK=m
+-CONFIG_ND_CLAIM=y
+-CONFIG_ND_BTT=m
+-CONFIG_BTT=y
+-CONFIG_ND_PFN=m
+-CONFIG_NVDIMM_PFN=y
+-CONFIG_NVDIMM_DAX=y
+-CONFIG_DAX_DRIVER=y
+-CONFIG_DAX=y
+-CONFIG_DEV_DAX=m
+-CONFIG_DEV_DAX_PMEM=m
+-CONFIG_NVMEM=y
+-
+-#
+-# HW tracing support
+-#
+-# CONFIG_STM is not set
+-# CONFIG_INTEL_TH is not set
+-# CONFIG_FPGA is not set
+-# CONFIG_UNISYS_VISORBUS is not set
+-# CONFIG_SIOX is not set
+-# CONFIG_UACCE is not set
+-# CONFIG_SLIMBUS is not set
+-
+-#
+-# File systems
+-#
+-CONFIG_DCACHE_WORD_ACCESS=y
+-CONFIG_FS_IOMAP=y
+-# CONFIG_EXT2_FS is not set
+-# CONFIG_EXT3_FS is not set
+ CONFIG_EXT4_FS=m
+-CONFIG_EXT4_USE_FOR_EXT2=y
+ CONFIG_EXT4_FS_POSIX_ACL=y
+ CONFIG_EXT4_FS_SECURITY=y
+-# CONFIG_EXT4_ENCRYPTION is not set
+-# CONFIG_EXT4_DEBUG is not set
+-CONFIG_JBD2=m
+-# CONFIG_JBD2_DEBUG is not set
+-CONFIG_FS_MBCACHE=m
+-# CONFIG_REISERFS_FS is not set
+-# CONFIG_JFS_FS is not set
++CONFIG_REISERFS_FS=m
++CONFIG_REISERFS_PROC_INFO=y
++CONFIG_JFS_FS=m
+ CONFIG_XFS_FS=m
+ CONFIG_XFS_QUOTA=y
+ CONFIG_XFS_POSIX_ACL=y
+-# CONFIG_XFS_RT is not set
+-# CONFIG_XFS_ONLINE_SCRUB is not set
+-# CONFIG_XFS_WARN is not set
+-# CONFIG_XFS_DEBUG is not set
+ CONFIG_GFS2_FS=m
+ CONFIG_GFS2_FS_LOCKING_DLM=y
+-# CONFIG_OCFS2_FS is not set
+-# CONFIG_BTRFS_FS is not set
+-# CONFIG_NILFS2_FS is not set
+-# CONFIG_F2FS_FS is not set
++CONFIG_BTRFS_FS=m
++CONFIG_NILFS2_FS=m
++CONFIG_F2FS_FS=m
+ CONFIG_FS_DAX=y
+-CONFIG_FS_DAX_PMD=y
+-CONFIG_FS_POSIX_ACL=y
+-CONFIG_EXPORTFS=y
+-CONFIG_EXPORTFS_BLOCK_OPS=y
+-CONFIG_FILE_LOCKING=y
+ # CONFIG_MANDATORY_FILE_LOCKING is not set
+-# CONFIG_FS_ENCRYPTION is not set
+-CONFIG_FSNOTIFY=y
+-CONFIG_DNOTIFY=y
+-CONFIG_INOTIFY_USER=y
+ CONFIG_FANOTIFY=y
+ CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+ CONFIG_QUOTA=y
+ CONFIG_QUOTA_NETLINK_INTERFACE=y
+-CONFIG_PRINT_QUOTA_WARNING=y
+-# CONFIG_QUOTA_DEBUG is not set
+-CONFIG_QUOTA_TREE=y
+-# CONFIG_QFMT_V1 is not set
+ CONFIG_QFMT_V2=y
+-CONFIG_QUOTACTL=y
+-CONFIG_QUOTACTL_COMPAT=y
+ CONFIG_AUTOFS4_FS=y
+-CONFIG_AUTOFS_FS=y
+ CONFIG_FUSE_FS=m
+ CONFIG_CUSE=m
+ CONFIG_OVERLAY_FS=m
+-# CONFIG_OVERLAY_FS_REDIRECT_DIR is not set
+ # CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW is not set
+-# CONFIG_OVERLAY_FS_INDEX is not set
+-# CONFIG_OVERLAY_FS_XINO_AUTO is not set
+-# CONFIG_OVERLAY_FS_METACOPY is not set
+-
+-#
+-# Caches
+-#
+ CONFIG_FSCACHE=m
+ CONFIG_FSCACHE_STATS=y
+-# CONFIG_FSCACHE_HISTOGRAM is not set
+-# CONFIG_FSCACHE_DEBUG is not set
+-# CONFIG_FSCACHE_OBJECT_LIST is not set
+ CONFIG_CACHEFILES=m
+-# CONFIG_CACHEFILES_DEBUG is not set
+-# CONFIG_CACHEFILES_HISTOGRAM is not set
+-
+-#
+-# CD-ROM/DVD Filesystems
+-#
+ CONFIG_ISO9660_FS=m
+ CONFIG_JOLIET=y
+ CONFIG_ZISOFS=y
+ CONFIG_UDF_FS=m
+-
+-#
+-# DOS/FAT/NT Filesystems
+-#
+-CONFIG_FAT_FS=m
+ CONFIG_MSDOS_FS=m
+ CONFIG_VFAT_FS=m
+-CONFIG_FAT_DEFAULT_CODEPAGE=437
+ CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+-# CONFIG_FAT_DEFAULT_UTF8 is not set
+-
+-#
+-# Pseudo filesystems
+-#
+-CONFIG_PROC_FS=y
+ CONFIG_PROC_KCORE=y
+-CONFIG_PROC_VMCORE=y
+ CONFIG_PROC_VMCORE_DEVICE_DUMP=y
+-CONFIG_PROC_SYSCTL=y
+-CONFIG_PROC_PAGE_MONITOR=y
+-CONFIG_PROC_CHILDREN=y
+-CONFIG_KERNFS=y
+-CONFIG_SYSFS=y
+-CONFIG_TMPFS=y
+ CONFIG_TMPFS_POSIX_ACL=y
+-CONFIG_TMPFS_XATTR=y
+ CONFIG_HUGETLBFS=y
+-CONFIG_HUGETLB_PAGE=y
+-CONFIG_MEMFD_CREATE=y
+-CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
+ CONFIG_CONFIGFS_FS=y
+ CONFIG_EFIVAR_FS=y
+-CONFIG_MISC_FILESYSTEMS=y
+-# CONFIG_ORANGEFS_FS is not set
+-# CONFIG_ADFS_FS is not set
+-# CONFIG_AFFS_FS is not set
+-# CONFIG_ECRYPT_FS is not set
+-# CONFIG_HFS_FS is not set
+-# CONFIG_HFSPLUS_FS is not set
+-# CONFIG_BEFS_FS is not set
+-# CONFIG_BFS_FS is not set
+-# CONFIG_EFS_FS is not set
+-# CONFIG_JFFS2_FS is not set
+-# CONFIG_UBIFS_FS is not set
++CONFIG_ECRYPT_FS=y
++CONFIG_ECRYPT_FS_MESSAGING=y
+ CONFIG_CRAMFS=m
+-CONFIG_CRAMFS_BLOCKDEV=y
+-# CONFIG_CRAMFS_MTD is not set
+ CONFIG_SQUASHFS=m
+-# CONFIG_SQUASHFS_FILE_CACHE is not set
+ CONFIG_SQUASHFS_FILE_DIRECT=y
+-# CONFIG_SQUASHFS_DECOMP_SINGLE is not set
+-# CONFIG_SQUASHFS_DECOMP_MULTI is not set
+ CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y
+ CONFIG_SQUASHFS_XATTR=y
+-CONFIG_SQUASHFS_ZLIB=y
+-# CONFIG_SQUASHFS_LZ4 is not set
+ CONFIG_SQUASHFS_LZO=y
+ CONFIG_SQUASHFS_XZ=y
+-# CONFIG_SQUASHFS_ZSTD is not set
+-# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set
+-# CONFIG_SQUASHFS_EMBEDDED is not set
+-CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
+-# CONFIG_VXFS_FS is not set
+-# CONFIG_MINIX_FS is not set
+-# CONFIG_OMFS_FS is not set
+-# CONFIG_HPFS_FS is not set
+-# CONFIG_QNX4FS_FS is not set
+-# CONFIG_QNX6FS_FS is not set
+-# CONFIG_ROMFS_FS is not set
+-CONFIG_PSTORE=y
+-CONFIG_PSTORE_DEFLATE_COMPRESS=y
+-# CONFIG_PSTORE_LZO_COMPRESS is not set
+-# CONFIG_PSTORE_LZ4_COMPRESS is not set
+-# CONFIG_PSTORE_LZ4HC_COMPRESS is not set
+-# CONFIG_PSTORE_842_COMPRESS is not set
+-# CONFIG_PSTORE_ZSTD_COMPRESS is not set
+-CONFIG_PSTORE_COMPRESS=y
+-CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y
+-CONFIG_PSTORE_COMPRESS_DEFAULT="deflate"
+-# CONFIG_PSTORE_CONSOLE is not set
+-# CONFIG_PSTORE_PMSG is not set
+-# CONFIG_PSTORE_FTRACE is not set
+ CONFIG_PSTORE_RAM=m
+-# CONFIG_SYSV_FS is not set
+-# CONFIG_UFS_FS is not set
+-CONFIG_NETWORK_FILESYSTEMS=y
+ CONFIG_NFS_FS=m
+ # CONFIG_NFS_V2 is not set
+-CONFIG_NFS_V3=m
+ CONFIG_NFS_V3_ACL=y
+ CONFIG_NFS_V4=m
+-# CONFIG_NFS_SWAP is not set
+ CONFIG_NFS_V4_1=y
+ CONFIG_NFS_V4_2=y
+-CONFIG_PNFS_FILE_LAYOUT=m
+-CONFIG_PNFS_BLOCK=m
+-CONFIG_PNFS_FLEXFILE_LAYOUT=m
+-CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org"
+-# CONFIG_NFS_V4_1_MIGRATION is not set
+-CONFIG_NFS_V4_SECURITY_LABEL=y
+ CONFIG_NFS_FSCACHE=y
+-# CONFIG_NFS_USE_LEGACY_DNS is not set
+-CONFIG_NFS_USE_KERNEL_DNS=y
+-CONFIG_NFS_DEBUG=y
+ CONFIG_NFSD=m
+-CONFIG_NFSD_V2_ACL=y
+-CONFIG_NFSD_V3=y
+ CONFIG_NFSD_V3_ACL=y
+ CONFIG_NFSD_V4=y
+-CONFIG_NFSD_PNFS=y
+-# CONFIG_NFSD_BLOCKLAYOUT is not set
+ CONFIG_NFSD_SCSILAYOUT=y
+-# CONFIG_NFSD_FLEXFILELAYOUT is not set
+ CONFIG_NFSD_V4_SECURITY_LABEL=y
+-# CONFIG_NFSD_FAULT_INJECTION is not set
+-CONFIG_GRACE_PERIOD=m
+-CONFIG_LOCKD=m
+-CONFIG_LOCKD_V4=y
+-CONFIG_NFS_ACL_SUPPORT=m
+-CONFIG_NFS_COMMON=y
+-CONFIG_SUNRPC=m
+-CONFIG_SUNRPC_GSS=m
+-CONFIG_SUNRPC_BACKCHANNEL=y
+-CONFIG_RPCSEC_GSS_KRB5=m
+ CONFIG_SUNRPC_DEBUG=y
+-CONFIG_SUNRPC_XPRT_RDMA=m
+ CONFIG_CEPH_FS=m
+-# CONFIG_CEPH_FSCACHE is not set
+ CONFIG_CEPH_FS_POSIX_ACL=y
+ CONFIG_CIFS=m
+-# CONFIG_CIFS_STATS2 is not set
+-CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y
+ CONFIG_CIFS_WEAK_PW_HASH=y
+ CONFIG_CIFS_UPCALL=y
+ CONFIG_CIFS_XATTR=y
+ CONFIG_CIFS_POSIX=y
+ CONFIG_CIFS_ACL=y
+-CONFIG_CIFS_DEBUG=y
+-# CONFIG_CIFS_DEBUG2 is not set
+-# CONFIG_CIFS_DEBUG_DUMP_KEYS is not set
+ CONFIG_CIFS_DFS_UPCALL=y
+-# CONFIG_CIFS_SMB_DIRECT is not set
+-# CONFIG_CIFS_FSCACHE is not set
+-# CONFIG_CODA_FS is not set
+-# CONFIG_AFS_FS is not set
+-CONFIG_NLS=y
+ CONFIG_NLS_DEFAULT="utf8"
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_CODEPAGE_737=m
+@@ -6871,668 +2272,145 @@
+ CONFIG_NLS_UTF8=m
+ CONFIG_DLM=m
+ CONFIG_DLM_DEBUG=y
+-
+-#
+-# Security options
+-#
+-CONFIG_KEYS=y
+-CONFIG_KEYS_COMPAT=y
+ CONFIG_PERSISTENT_KEYRINGS=y
+ CONFIG_BIG_KEYS=y
+ CONFIG_TRUSTED_KEYS=y
+-CONFIG_ENCRYPTED_KEYS=y
+-# CONFIG_KEY_DH_OPERATIONS is not set
+-# CONFIG_SECURITY_DMESG_RESTRICT is not set
+ CONFIG_SECURITY=y
+-CONFIG_SECURITY_WRITABLE_HOOKS=y
+-CONFIG_SECURITYFS=y
+-CONFIG_SECURITY_NETWORK=y
+-CONFIG_PAGE_TABLE_ISOLATION=y
+ CONFIG_SECURITY_INFINIBAND=y
+ CONFIG_SECURITY_NETWORK_XFRM=y
+-# CONFIG_SECURITY_PATH is not set
+ CONFIG_INTEL_TXT=y
+ CONFIG_LSM_MMAP_MIN_ADDR=65535
+-CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
+ CONFIG_HARDENED_USERCOPY=y
+-CONFIG_HARDENED_USERCOPY_FALLBACK=y
+ CONFIG_FORTIFY_SOURCE=y
+-# CONFIG_STATIC_USERMODEHELPER is not set
+ CONFIG_SECURITY_SELINUX=y
+ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
+-CONFIG_SECURITY_SELINUX_DISABLE=y
+-CONFIG_SECURITY_SELINUX_DEVELOP=y
+-CONFIG_SECURITY_SELINUX_AVC_STATS=y
+-CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+-# CONFIG_SECURITY_SMACK is not set
+-# CONFIG_SECURITY_TOMOYO is not set
+-# CONFIG_SECURITY_APPARMOR is not set
+-# CONFIG_SECURITY_LOADPIN is not set
+-CONFIG_SECURITY_YAMA=y
+-CONFIG_INTEGRITY=y
++CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
++CONFIG_SECURITY_APPARMOR=y
+ CONFIG_INTEGRITY_SIGNATURE=y
+ CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+-CONFIG_INTEGRITY_TRUSTED_KEYRING=y
+-CONFIG_INTEGRITY_AUDIT=y
+ CONFIG_IMA=y
+-CONFIG_IMA_MEASURE_PCR_IDX=10
+-CONFIG_IMA_LSM_RULES=y
+-# CONFIG_IMA_TEMPLATE is not set
+-CONFIG_IMA_NG_TEMPLATE=y
+-# CONFIG_IMA_SIG_TEMPLATE is not set
+-CONFIG_IMA_DEFAULT_TEMPLATE="ima-ng"
+-CONFIG_IMA_DEFAULT_HASH_SHA1=y
+-# CONFIG_IMA_DEFAULT_HASH_SHA256 is not set
+-CONFIG_IMA_DEFAULT_HASH="sha1"
+-# CONFIG_IMA_WRITE_POLICY is not set
+-# CONFIG_IMA_READ_POLICY is not set
+ CONFIG_IMA_APPRAISE=y
+-# CONFIG_IMA_APPRAISE_BUILD_POLICY is not set
+-CONFIG_IMA_APPRAISE_BOOTPARAM=y
+-CONFIG_IMA_TRUSTED_KEYRING=y
+-# CONFIG_IMA_BLACKLIST_KEYRING is not set
+-# CONFIG_IMA_LOAD_X509 is not set
+ CONFIG_EVM=y
+-CONFIG_EVM_ATTR_FSUUID=y
+-# CONFIG_EVM_ADD_XATTRS is not set
+-# CONFIG_EVM_LOAD_X509 is not set
+-CONFIG_DEFAULT_SECURITY_SELINUX=y
+-# CONFIG_DEFAULT_SECURITY_DAC is not set
+-CONFIG_DEFAULT_SECURITY="selinux"
+-CONFIG_XOR_BLOCKS=m
+-CONFIG_ASYNC_CORE=m
+-CONFIG_ASYNC_MEMCPY=m
+-CONFIG_ASYNC_XOR=m
+-CONFIG_ASYNC_PQ=m
+-CONFIG_ASYNC_RAID6_RECOV=m
+-CONFIG_CRYPTO=y
+-
+-#
+-# Crypto core or helper
+-#
++CONFIG_DEFAULT_SECURITY_DAC=y
+ CONFIG_CRYPTO_FIPS=y
+-CONFIG_CRYPTO_ALGAPI=y
+-CONFIG_CRYPTO_ALGAPI2=y
+-CONFIG_CRYPTO_AEAD=y
+-CONFIG_CRYPTO_AEAD2=y
+-CONFIG_CRYPTO_BLKCIPHER=y
+-CONFIG_CRYPTO_BLKCIPHER2=y
+-CONFIG_CRYPTO_HASH=y
+-CONFIG_CRYPTO_HASH2=y
+-CONFIG_CRYPTO_RNG=y
+-CONFIG_CRYPTO_RNG2=y
+-CONFIG_CRYPTO_RNG_DEFAULT=y
+-CONFIG_CRYPTO_AKCIPHER2=y
+-CONFIG_CRYPTO_AKCIPHER=y
+-CONFIG_CRYPTO_KPP2=y
+-CONFIG_CRYPTO_KPP=m
+-CONFIG_CRYPTO_ACOMP2=y
+-CONFIG_CRYPTO_RSA=y
+-CONFIG_CRYPTO_DH=m
+-CONFIG_CRYPTO_ECDH=m
+-CONFIG_CRYPTO_MANAGER=y
+-CONFIG_CRYPTO_MANAGER2=y
+ CONFIG_CRYPTO_USER=m
+ # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+-CONFIG_CRYPTO_GF128MUL=y
+-CONFIG_CRYPTO_NULL=y
+-CONFIG_CRYPTO_NULL2=y
+ CONFIG_CRYPTO_PCRYPT=m
+-CONFIG_CRYPTO_WORKQUEUE=y
+-CONFIG_CRYPTO_CRYPTD=y
+-CONFIG_CRYPTO_AUTHENC=m
+ CONFIG_CRYPTO_TEST=m
+-CONFIG_CRYPTO_SIMD=y
+-CONFIG_CRYPTO_GLUE_HELPER_X86=y
+-
+-#
+-# Authenticated Encryption with Associated Data
+-#
+-CONFIG_CRYPTO_CCM=m
+-CONFIG_CRYPTO_GCM=y
+ CONFIG_CRYPTO_CHACHA20POLY1305=m
+-# CONFIG_CRYPTO_AEGIS128 is not set
+-# CONFIG_CRYPTO_AEGIS128L is not set
+-# CONFIG_CRYPTO_AEGIS256 is not set
+-# CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set
+-# CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2 is not set
+-# CONFIG_CRYPTO_AEGIS256_AESNI_SSE2 is not set
+-# CONFIG_CRYPTO_MORUS640 is not set
+-# CONFIG_CRYPTO_MORUS640_SSE2 is not set
+-# CONFIG_CRYPTO_MORUS1280 is not set
+-# CONFIG_CRYPTO_MORUS1280_SSE2 is not set
+-# CONFIG_CRYPTO_MORUS1280_AVX2 is not set
+-CONFIG_CRYPTO_SEQIV=y
+-CONFIG_CRYPTO_ECHAINIV=m
+-
+-#
+-# Block modes
+-#
+-CONFIG_CRYPTO_CBC=y
+ CONFIG_CRYPTO_CFB=y
+-CONFIG_CRYPTO_CTR=y
+ CONFIG_CRYPTO_CTS=m
+-CONFIG_CRYPTO_ECB=y
+ CONFIG_CRYPTO_LRW=m
+ CONFIG_CRYPTO_PCBC=m
+-CONFIG_CRYPTO_XTS=m
+-# CONFIG_CRYPTO_KEYWRAP is not set
+-
+-#
+-# Hash modes
+-#
+-CONFIG_CRYPTO_CMAC=m
+-CONFIG_CRYPTO_HMAC=y
+ CONFIG_CRYPTO_XCBC=m
+ CONFIG_CRYPTO_VMAC=m
+-
+-#
+-# Digest
+-#
+ CONFIG_CRYPTO_CRC32C=y
+-CONFIG_CRYPTO_CRC32C_INTEL=m
+-CONFIG_CRYPTO_CRC32=m
+ CONFIG_CRYPTO_CRC32_PCLMUL=m
+-CONFIG_CRYPTO_CRCT10DIF=y
+ CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m
+-CONFIG_CRYPTO_GHASH=y
+-CONFIG_CRYPTO_POLY1305=m
+ CONFIG_CRYPTO_POLY1305_X86_64=m
+-CONFIG_CRYPTO_MD4=m
+-CONFIG_CRYPTO_MD5=y
+ CONFIG_CRYPTO_MICHAEL_MIC=m
+ CONFIG_CRYPTO_RMD128=m
+ CONFIG_CRYPTO_RMD160=m
+ CONFIG_CRYPTO_RMD256=m
+ CONFIG_CRYPTO_RMD320=m
+-CONFIG_CRYPTO_SHA1=y
+ CONFIG_CRYPTO_SHA1_SSSE3=y
+ CONFIG_CRYPTO_SHA256_SSSE3=y
+ CONFIG_CRYPTO_SHA512_SSSE3=m
+-CONFIG_CRYPTO_SHA256=y
+-CONFIG_CRYPTO_SHA512=m
++CONFIG_CRYPTO_SHA1_MB=m
++CONFIG_CRYPTO_SHA256_MB=m
++CONFIG_CRYPTO_SHA512_MB=m
+ CONFIG_CRYPTO_SHA3=m
+-# CONFIG_CRYPTO_SM3 is not set
++CONFIG_CRYPTO_SM3=m
+ CONFIG_CRYPTO_TGR192=m
+ CONFIG_CRYPTO_WP512=m
+ CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
+-
+-#
+-# Ciphers
+-#
+-CONFIG_CRYPTO_AES=y
+-# CONFIG_CRYPTO_AES_TI is not set
+-CONFIG_CRYPTO_AES_X86_64=y
+ CONFIG_CRYPTO_AES_NI_INTEL=y
+ CONFIG_CRYPTO_ANUBIS=m
+-CONFIG_CRYPTO_ARC4=m
+ CONFIG_CRYPTO_BLOWFISH=m
+-CONFIG_CRYPTO_BLOWFISH_COMMON=m
+ CONFIG_CRYPTO_BLOWFISH_X86_64=m
+ CONFIG_CRYPTO_CAMELLIA=m
+-CONFIG_CRYPTO_CAMELLIA_X86_64=m
+-CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m
+ CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m
+-CONFIG_CRYPTO_CAST_COMMON=m
+-CONFIG_CRYPTO_CAST5=m
+ CONFIG_CRYPTO_CAST5_AVX_X86_64=m
+-CONFIG_CRYPTO_CAST6=m
+ CONFIG_CRYPTO_CAST6_AVX_X86_64=m
+-CONFIG_CRYPTO_DES=m
+ CONFIG_CRYPTO_DES3_EDE_X86_64=m
+ CONFIG_CRYPTO_FCRYPT=m
+ CONFIG_CRYPTO_KHAZAD=m
+ CONFIG_CRYPTO_SALSA20=m
+-CONFIG_CRYPTO_CHACHA20=m
+ CONFIG_CRYPTO_CHACHA20_X86_64=m
+ CONFIG_CRYPTO_SEED=m
+-CONFIG_CRYPTO_SERPENT=m
+ CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m
+-CONFIG_CRYPTO_SERPENT_AVX_X86_64=m
+ CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m
+-# CONFIG_CRYPTO_SM4 is not set
+ CONFIG_CRYPTO_TEA=m
+ CONFIG_CRYPTO_TWOFISH=m
+-CONFIG_CRYPTO_TWOFISH_COMMON=m
+-CONFIG_CRYPTO_TWOFISH_X86_64=m
+-CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m
+ CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m
+-
+-#
+-# Compression
+-#
+-CONFIG_CRYPTO_DEFLATE=y
+-CONFIG_CRYPTO_LZO=y
+-# CONFIG_CRYPTO_842 is not set
+-# CONFIG_CRYPTO_LZ4 is not set
+-# CONFIG_CRYPTO_LZ4HC is not set
+-# CONFIG_CRYPTO_ZSTD is not set
+-
+-#
+-# Random Number Generation
+-#
+ CONFIG_CRYPTO_ANSI_CPRNG=m
+-CONFIG_CRYPTO_DRBG_MENU=y
+-CONFIG_CRYPTO_DRBG_HMAC=y
+ CONFIG_CRYPTO_DRBG_HASH=y
+ CONFIG_CRYPTO_DRBG_CTR=y
+-CONFIG_CRYPTO_DRBG=y
+-CONFIG_CRYPTO_JITTERENTROPY=y
+-CONFIG_CRYPTO_USER_API=y
+ CONFIG_CRYPTO_USER_API_HASH=y
+ CONFIG_CRYPTO_USER_API_SKCIPHER=y
+ CONFIG_CRYPTO_USER_API_RNG=y
+ CONFIG_CRYPTO_USER_API_AEAD=y
+-CONFIG_CRYPTO_HASH_INFO=y
+-CONFIG_CRYPTO_HW=y
+ CONFIG_CRYPTO_DEV_PADLOCK=m
+ CONFIG_CRYPTO_DEV_PADLOCK_AES=m
+ CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
+ CONFIG_CRYPTO_DEV_CCP=y
+-CONFIG_CRYPTO_DEV_CCP_DD=m
+-CONFIG_CRYPTO_DEV_SP_CCP=y
+-CONFIG_CRYPTO_DEV_CCP_CRYPTO=m
+-CONFIG_CRYPTO_DEV_SP_PSP=y
+-CONFIG_CRYPTO_DEV_QAT=m
+ CONFIG_CRYPTO_DEV_QAT_DH895xCC=m
+ CONFIG_CRYPTO_DEV_QAT_C3XXX=m
+ CONFIG_CRYPTO_DEV_QAT_C62X=m
+ CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m
+ CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m
+ CONFIG_CRYPTO_DEV_QAT_C62XVF=m
+-CONFIG_CRYPTO_DEV_NITROX=m
+ CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m
+ CONFIG_CRYPTO_DEV_CHELSIO=m
+ CONFIG_CHELSIO_IPSEC_INLINE=y
+-# CONFIG_CRYPTO_DEV_CHELSIO_TLS is not set
+-# CONFIG_CRYPTO_DEV_VIRTIO is not set
+-CONFIG_ASYMMETRIC_KEY_TYPE=y
+-CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
+-CONFIG_X509_CERTIFICATE_PARSER=y
+-CONFIG_PKCS7_MESSAGE_PARSER=y
+-# CONFIG_PKCS7_TEST_KEY is not set
+ CONFIG_SIGNED_PE_FILE_VERIFICATION=y
+-
+-#
+-# Certificates for signature checking
+-#
+-CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"
+-CONFIG_SYSTEM_TRUSTED_KEYRING=y
+-CONFIG_SYSTEM_TRUSTED_KEYS=""
+-# CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set
+-# CONFIG_SECONDARY_TRUSTED_KEYRING is not set
+ CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+-CONFIG_SYSTEM_BLACKLIST_HASH_LIST=""
+-CONFIG_BINARY_PRINTF=y
+-
+-#
+-# Library routines
+-#
+-CONFIG_RAID6_PQ=m
+-CONFIG_BITREVERSE=y
+-CONFIG_RATIONAL=y
+-CONFIG_GENERIC_STRNCPY_FROM_USER=y
+-CONFIG_GENERIC_STRNLEN_USER=y
+-CONFIG_GENERIC_NET_UTILS=y
+-CONFIG_GENERIC_FIND_FIRST_BIT=y
+-CONFIG_GENERIC_PCI_IOMAP=y
+-CONFIG_GENERIC_IOMAP=y
+-CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
+-CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
+-CONFIG_CRC_CCITT=y
+ CONFIG_CRC16=y
+-CONFIG_CRC_T10DIF=y
+-CONFIG_CRC_ITU_T=m
+-CONFIG_CRC32=y
+-# CONFIG_CRC32_SELFTEST is not set
+-CONFIG_CRC32_SLICEBY8=y
+-# CONFIG_CRC32_SLICEBY4 is not set
+-# CONFIG_CRC32_SARWATE is not set
+-# CONFIG_CRC32_BIT is not set
+-# CONFIG_CRC64 is not set
+-# CONFIG_CRC4 is not set
+ CONFIG_CRC7=m
+-CONFIG_LIBCRC32C=m
+-CONFIG_CRC8=m
+-CONFIG_XXHASH=y
+-# CONFIG_RANDOM32_SELFTEST is not set
+-CONFIG_ZLIB_INFLATE=y
+-CONFIG_ZLIB_DEFLATE=y
+-CONFIG_LZO_COMPRESS=y
+-CONFIG_LZO_DECOMPRESS=y
+-CONFIG_LZ4_DECOMPRESS=y
+-CONFIG_XZ_DEC=y
+-CONFIG_XZ_DEC_X86=y
+-CONFIG_XZ_DEC_POWERPC=y
+-CONFIG_XZ_DEC_IA64=y
+-CONFIG_XZ_DEC_ARM=y
+-CONFIG_XZ_DEC_ARMTHUMB=y
+-CONFIG_XZ_DEC_SPARC=y
+-CONFIG_XZ_DEC_BCJ=y
+-# CONFIG_XZ_DEC_TEST is not set
+-CONFIG_DECOMPRESS_GZIP=y
+-CONFIG_DECOMPRESS_BZIP2=y
+-CONFIG_DECOMPRESS_LZMA=y
+-CONFIG_DECOMPRESS_XZ=y
+-CONFIG_DECOMPRESS_LZO=y
+-CONFIG_DECOMPRESS_LZ4=y
+-CONFIG_GENERIC_ALLOCATOR=y
+-CONFIG_REED_SOLOMON=m
+-CONFIG_REED_SOLOMON_ENC8=y
+-CONFIG_REED_SOLOMON_DEC8=y
+-CONFIG_TEXTSEARCH=y
+-CONFIG_TEXTSEARCH_KMP=m
+-CONFIG_TEXTSEARCH_BM=m
+-CONFIG_TEXTSEARCH_FSM=m
+-CONFIG_BTREE=y
+-CONFIG_INTERVAL_TREE=y
+-CONFIG_RADIX_TREE_MULTIORDER=y
+-CONFIG_ASSOCIATIVE_ARRAY=y
+-CONFIG_HAS_IOMEM=y
+-CONFIG_HAS_IOPORT_MAP=y
+-CONFIG_HAS_DMA=y
+-CONFIG_NEED_SG_DMA_LENGTH=y
+-CONFIG_NEED_DMA_MAP_STATE=y
+-CONFIG_ARCH_DMA_ADDR_T_64BIT=y
+-CONFIG_DMA_DIRECT_OPS=y
+-CONFIG_DMA_VIRT_OPS=y
+-CONFIG_SWIOTLB=y
+-CONFIG_SGL_ALLOC=y
+-CONFIG_CHECK_SIGNATURE=y
+-CONFIG_CPUMASK_OFFSTACK=y
+-CONFIG_CPU_RMAP=y
+-CONFIG_DQL=y
+-CONFIG_GLOB=y
+-# CONFIG_GLOB_SELFTEST is not set
+-CONFIG_NLATTR=y
+-CONFIG_CLZ_TAB=y
+-CONFIG_CORDIC=m
+-# CONFIG_DDR is not set
+-CONFIG_IRQ_POLL=y
+-CONFIG_MPILIB=y
+-CONFIG_SIGNATURE=y
+-CONFIG_OID_REGISTRY=y
+-CONFIG_UCS2_STRING=y
+-CONFIG_FONT_SUPPORT=y
+-# CONFIG_FONTS is not set
++CONFIG_FONTS=y
+ CONFIG_FONT_8x8=y
+-CONFIG_FONT_8x16=y
+-CONFIG_SG_POOL=y
+-CONFIG_ARCH_HAS_SG_CHAIN=y
+-CONFIG_ARCH_HAS_PMEM_API=y
+-CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y
+-CONFIG_ARCH_HAS_UACCESS_MCSAFE=y
+-CONFIG_SBITMAP=y
+-CONFIG_PARMAN=m
+-# CONFIG_STRING_SELFTEST is not set
+-
+-#
+-# Kernel hacking
+-#
+-
+-#
+-# printk and dmesg options
+-#
+ CONFIG_PRINTK_TIME=y
+-CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7
+-CONFIG_CONSOLE_LOGLEVEL_QUIET=4
+-CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
+ CONFIG_BOOT_PRINTK_DELAY=y
+ CONFIG_DYNAMIC_DEBUG=y
+-
+-#
+-# Compile-time checks and compiler options
+-#
+ CONFIG_DEBUG_INFO=y
+-# CONFIG_DEBUG_INFO_REDUCED is not set
+-# CONFIG_DEBUG_INFO_SPLIT is not set
+ CONFIG_DEBUG_INFO_DWARF4=y
+-# CONFIG_GDB_SCRIPTS is not set
+-CONFIG_ENABLE_MUST_CHECK=y
+-CONFIG_FRAME_WARN=2048
+ CONFIG_STRIP_ASM_SYMS=y
+-# CONFIG_READABLE_ASM is not set
+ # CONFIG_UNUSED_SYMBOLS is not set
+-# CONFIG_PAGE_OWNER is not set
+-CONFIG_DEBUG_FS=y
+ CONFIG_HEADERS_CHECK=y
+ CONFIG_DEBUG_SECTION_MISMATCH=y
+-CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+-CONFIG_STACK_VALIDATION=y
+-# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
+-CONFIG_MAGIC_SYSRQ=y
+-CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1
+-CONFIG_MAGIC_SYSRQ_SERIAL=y
+ CONFIG_DEBUG_KERNEL=y
+-
+-#
+-# Memory Debugging
+-#
+-# CONFIG_PAGE_EXTENSION is not set
+-# CONFIG_DEBUG_PAGEALLOC is not set
+-# CONFIG_PAGE_POISONING is not set
+-# CONFIG_DEBUG_PAGE_REF is not set
+-# CONFIG_DEBUG_RODATA_TEST is not set
+-# CONFIG_DEBUG_OBJECTS is not set
+-# CONFIG_SLUB_DEBUG_ON is not set
+-# CONFIG_SLUB_STATS is not set
+-CONFIG_HAVE_DEBUG_KMEMLEAK=y
+-# CONFIG_DEBUG_KMEMLEAK is not set
+-# CONFIG_DEBUG_STACK_USAGE is not set
+-# CONFIG_DEBUG_VM is not set
+-CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
+-# CONFIG_DEBUG_VIRTUAL is not set
+-CONFIG_DEBUG_MEMORY_INIT=y
+-# CONFIG_DEBUG_PER_CPU_MAPS is not set
+-CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+ CONFIG_DEBUG_STACKOVERFLOW=y
+-CONFIG_HAVE_ARCH_KASAN=y
+-# CONFIG_KASAN is not set
+-CONFIG_ARCH_HAS_KCOV=y
+ CONFIG_DEBUG_SHIRQ=y
+-
+-#
+-# Debug Lockups and Hangs
+-#
+-CONFIG_LOCKUP_DETECTOR=y
+-CONFIG_SOFTLOCKUP_DETECTOR=y
+-# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+-CONFIG_HARDLOCKUP_DETECTOR_PERF=y
+-CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y
+ CONFIG_HARDLOCKUP_DETECTOR=y
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+-CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=1
+-CONFIG_DETECT_HUNG_TASK=y
+-CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
+-# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+-CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+-# CONFIG_WQ_WATCHDOG is not set
+ CONFIG_PANIC_ON_OOPS=y
+-CONFIG_PANIC_ON_OOPS_VALUE=1
+-CONFIG_PANIC_TIMEOUT=0
+-CONFIG_SCHED_DEBUG=y
+-CONFIG_SCHED_INFO=y
+ CONFIG_SCHEDSTATS=y
+-# CONFIG_SCHED_STACK_END_CHECK is not set
+-# CONFIG_DEBUG_TIMEKEEPING is not set
+-
+-#
+-# Lock Debugging (spinlocks, mutexes, etc...)
+-#
+-CONFIG_LOCK_DEBUGGING_SUPPORT=y
+-# CONFIG_PROVE_LOCKING is not set
+-# CONFIG_LOCK_STAT is not set
+-# CONFIG_DEBUG_RT_MUTEXES is not set
+-# CONFIG_DEBUG_SPINLOCK is not set
+-# CONFIG_DEBUG_MUTEXES is not set
+-# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
+-# CONFIG_DEBUG_RWSEMS is not set
+-# CONFIG_DEBUG_LOCK_ALLOC is not set
+-# CONFIG_DEBUG_ATOMIC_SLEEP is not set
+-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+-# CONFIG_LOCK_TORTURE_TEST is not set
+-# CONFIG_WW_MUTEX_SELFTEST is not set
+-CONFIG_STACKTRACE=y
+-# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
+-# CONFIG_DEBUG_KOBJECT is not set
+-CONFIG_DEBUG_BUGVERBOSE=y
+ CONFIG_DEBUG_LIST=y
+-# CONFIG_DEBUG_PI_LIST is not set
+-# CONFIG_DEBUG_SG is not set
+-# CONFIG_DEBUG_NOTIFIERS is not set
+-# CONFIG_DEBUG_CREDENTIALS is not set
+-
+-#
+-# RCU Debugging
+-#
+-# CONFIG_RCU_PERF_TEST is not set
+-# CONFIG_RCU_TORTURE_TEST is not set
+ CONFIG_RCU_CPU_STALL_TIMEOUT=60
+-# CONFIG_RCU_TRACE is not set
+-# CONFIG_RCU_EQS_DEBUG is not set
+-# CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set
+-# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+-# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
+-# CONFIG_NOTIFIER_ERROR_INJECTION is not set
+-CONFIG_FUNCTION_ERROR_INJECTION=y
+-# CONFIG_FAULT_INJECTION is not set
+-# CONFIG_LATENCYTOP is not set
+-CONFIG_USER_STACKTRACE_SUPPORT=y
+-CONFIG_NOP_TRACER=y
+-CONFIG_HAVE_FUNCTION_TRACER=y
+-CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+-CONFIG_HAVE_DYNAMIC_FTRACE=y
+-CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
+-CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+-CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
+-CONFIG_HAVE_FENTRY=y
+-CONFIG_HAVE_C_RECORDMCOUNT=y
+-CONFIG_TRACER_MAX_TRACE=y
+-CONFIG_TRACE_CLOCK=y
+-CONFIG_RING_BUFFER=y
+-CONFIG_EVENT_TRACING=y
+-CONFIG_CONTEXT_SWITCH_TRACER=y
+-CONFIG_RING_BUFFER_ALLOW_SWAP=y
+-CONFIG_TRACING=y
+-CONFIG_GENERIC_TRACER=y
+-CONFIG_TRACING_SUPPORT=y
+-CONFIG_FTRACE=y
+-CONFIG_FUNCTION_TRACER=y
+-CONFIG_FUNCTION_GRAPH_TRACER=y
+-# CONFIG_PREEMPTIRQ_EVENTS is not set
+-# CONFIG_IRQSOFF_TRACER is not set
+ CONFIG_SCHED_TRACER=y
+ CONFIG_HWLAT_TRACER=y
+ CONFIG_FTRACE_SYSCALLS=y
+-CONFIG_TRACER_SNAPSHOT=y
+-# CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP is not set
+-CONFIG_BRANCH_PROFILE_NONE=y
+-# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+ CONFIG_STACK_TRACER=y
+ CONFIG_BLK_DEV_IO_TRACE=y
+-CONFIG_KPROBE_EVENTS=y
+-# CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set
+-CONFIG_UPROBE_EVENTS=y
+-CONFIG_BPF_EVENTS=y
+-CONFIG_PROBE_EVENTS=y
+-CONFIG_DYNAMIC_FTRACE=y
+-CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
+ CONFIG_FUNCTION_PROFILER=y
+-# CONFIG_BPF_KPROBE_OVERRIDE is not set
+-CONFIG_FTRACE_MCOUNT_RECORD=y
+-# CONFIG_FTRACE_STARTUP_TEST is not set
+-# CONFIG_MMIOTRACE is not set
+-CONFIG_TRACING_MAP=y
+ CONFIG_HIST_TRIGGERS=y
+-# CONFIG_TRACEPOINT_BENCHMARK is not set
+ CONFIG_RING_BUFFER_BENCHMARK=m
+-# CONFIG_RING_BUFFER_STARTUP_TEST is not set
+-# CONFIG_PREEMPTIRQ_DELAY_TEST is not set
+-# CONFIG_TRACE_EVAL_MAP_FILE is not set
+ # CONFIG_TRACING_EVENTS_GPIO is not set
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+-# CONFIG_DMA_API_DEBUG is not set
+-CONFIG_RUNTIME_TESTING_MENU=y
+-# CONFIG_LKDTM is not set
+-# CONFIG_TEST_LIST_SORT is not set
+-# CONFIG_TEST_SORT is not set
+-# CONFIG_KPROBES_SANITY_TEST is not set
+-# CONFIG_BACKTRACE_SELF_TEST is not set
+-# CONFIG_RBTREE_TEST is not set
+-# CONFIG_INTERVAL_TREE_TEST is not set
+-# CONFIG_PERCPU_TEST is not set
+ CONFIG_ATOMIC64_SELFTEST=y
+ CONFIG_ASYNC_RAID6_TEST=m
+-# CONFIG_TEST_HEXDUMP is not set
+-# CONFIG_TEST_STRING_HELPERS is not set
+ CONFIG_TEST_KSTRTOX=y
+-# CONFIG_TEST_PRINTF is not set
+-# CONFIG_TEST_BITMAP is not set
+-# CONFIG_TEST_BITFIELD is not set
+-# CONFIG_TEST_UUID is not set
+-# CONFIG_TEST_OVERFLOW is not set
+-# CONFIG_TEST_RHASHTABLE is not set
+-# CONFIG_TEST_HASH is not set
+-# CONFIG_TEST_IDA is not set
+-# CONFIG_TEST_PARMAN is not set
+-# CONFIG_TEST_LKM is not set
+-# CONFIG_TEST_USER_COPY is not set
+-# CONFIG_TEST_BPF is not set
+-# CONFIG_FIND_BIT_BENCHMARK is not set
+-# CONFIG_TEST_FIRMWARE is not set
+-# CONFIG_TEST_SYSCTL is not set
+-# CONFIG_TEST_UDELAY is not set
+-# CONFIG_TEST_STATIC_KEYS is not set
+-# CONFIG_TEST_KMOD is not set
+-# CONFIG_MEMTEST is not set
+-# CONFIG_BUG_ON_DATA_CORRUPTION is not set
+-# CONFIG_SAMPLES is not set
+-CONFIG_HAVE_ARCH_KGDB=y
+ CONFIG_KGDB=y
+-CONFIG_KGDB_SERIAL_CONSOLE=y
+ CONFIG_KGDB_TESTS=y
+-# CONFIG_KGDB_TESTS_ON_BOOT is not set
+ CONFIG_KGDB_LOW_LEVEL_TRAP=y
+ CONFIG_KGDB_KDB=y
+ CONFIG_KDB_DEFAULT_ENABLE=0x0
+ CONFIG_KDB_KEYBOARD=y
+-CONFIG_KDB_CONTINUE_CATASTROPHIC=0
+-CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
+-# CONFIG_UBSAN is not set
+-CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
+-CONFIG_STRICT_DEVMEM=y
+-# CONFIG_IO_STRICT_DEVMEM is not set
+-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+-CONFIG_EARLY_PRINTK_USB=y
+ # CONFIG_X86_VERBOSE_BOOTUP is not set
+-CONFIG_EARLY_PRINTK=y
+ CONFIG_EARLY_PRINTK_DBGP=y
+ CONFIG_EARLY_PRINTK_EFI=y
+ CONFIG_EARLY_PRINTK_USB_XDBC=y
+-# CONFIG_X86_PTDUMP is not set
+-# CONFIG_EFI_PGT_DUMP is not set
+-# CONFIG_DEBUG_WX is not set
+-CONFIG_DOUBLEFAULT=y
+-# CONFIG_DEBUG_TLBFLUSH is not set
+-CONFIG_HAVE_MMIOTRACE_SUPPORT=y
+ CONFIG_X86_DECODER_SELFTEST=y
+-CONFIG_IO_DELAY_TYPE_0X80=0
+-CONFIG_IO_DELAY_TYPE_0XED=1
+-CONFIG_IO_DELAY_TYPE_UDELAY=2
+-CONFIG_IO_DELAY_TYPE_NONE=3
+-CONFIG_IO_DELAY_0X80=y
+-# CONFIG_IO_DELAY_0XED is not set
+-# CONFIG_IO_DELAY_UDELAY is not set
+-# CONFIG_IO_DELAY_NONE is not set
+-CONFIG_DEFAULT_IO_DELAY_TYPE=0
+ CONFIG_DEBUG_BOOT_PARAMS=y
+-# CONFIG_CPA_DEBUG is not set
+ CONFIG_OPTIMIZE_INLINING=y
+-# CONFIG_DEBUG_ENTRY is not set
+-# CONFIG_DEBUG_NMI_SELFTEST is not set
+ # CONFIG_X86_DEBUG_FPU is not set
+-# CONFIG_PUNIT_ATOM_DEBUG is not set
+-CONFIG_UNWINDER_ORC=y
+-# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/openeuler_defconfig_x86_2.patch b/openeuler_defconfig_x86_2.patch
new file mode 100644
index 0000000000000000000000000000000000000000..1da3ce725805cc4a39ce77ed6b27f560fb9f6e29
--- /dev/null
+++ b/openeuler_defconfig_x86_2.patch
@@ -0,0 +1,58 @@
+--- kernel/arch/x86/configs/openeuler_defconfig	2021-05-11 15:32:22.729752809 +0800
++++ kernel-1/arch/x86/configs/openeuler_defconfig	2021-05-11 15:57:55.840614246 +0800
+@@ -157,7 +157,20 @@
+ CONFIG_KARMA_PARTITION=y
+ CONFIG_IOSCHED_BFQ=y
+ CONFIG_BFQ_GROUP_IOSCHED=y
++CONFIG_XENO_DRIVERS_16550A=m
++CONFIG_XENO_DRIVERS_16550A_ANY=y
++CONFIG_XENO_DRIVERS_CAN=y
++CONFIG_XENO_DRIVERS_CAN_LOOPBACK=y
++CONFIG_XENO_DRIVERS_CAN_VIRT=y
++CONFIG_XENO_DRIVERS_NET=m
++CONFIG_XENO_DRIVERS_NET_ETH_P_ALL=y
++CONFIG_XENO_DRIVERS_NET_RTWLAN=y
++CONFIG_XENO_DRIVERS_NET_RTIPV4_NETROUTING=y
++CONFIG_XENO_DRIVERS_NET_RTIPV4_ROUTER=y
++CONFIG_XENO_DRIVERS_NET_RTIPV4_DEBUG=y
++CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP=m
+ CONFIG_XENO_DRIVERS_RTIPC=y
++CONFIG_XENO_DRIVERS_GPIO=y
+ CONFIG_BINFMT_MISC=m
+ CONFIG_MEMORY_HOTPLUG=y
+ # CONFIG_COMPACTION is not set
+@@ -174,7 +187,6 @@
+ CONFIG_ZSMALLOC_STAT=y
+ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+ CONFIG_IDLE_PAGE_TRACKING=y
+-CONFIG_NET=y
+ CONFIG_PACKET=y
+ CONFIG_PACKET_DIAG=m
+ CONFIG_UNIX=y
+@@ -1154,15 +1166,15 @@
+ CONFIG_NOZOMI=m
+ CONFIG_N_HDLC=m
+ CONFIG_N_GSM=m
+-CONFIG_SERIAL_8250=y
++CONFIG_SERIAL_8250=m
+ # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+-CONFIG_SERIAL_8250_CONSOLE=y
++CONFIG_SERIAL_8250_CS=m
+ CONFIG_SERIAL_8250_NR_UARTS=32
+ CONFIG_SERIAL_8250_EXTENDED=y
+ CONFIG_SERIAL_8250_MANY_PORTS=y
+ CONFIG_SERIAL_8250_SHARE_IRQ=y
+ CONFIG_SERIAL_8250_RSA=y
+-CONFIG_SERIAL_8250_DW=y
++CONFIG_SERIAL_8250_DW=m
+ CONFIG_SERIAL_JSM=m
+ CONFIG_SERIAL_ARC=m
+ CONFIG_PRINTER=m
+@@ -2057,6 +2069,7 @@
+ CONFIG_INTEL_IDMA64=m
+ CONFIG_INTEL_IOATDMA=m
+ CONFIG_DW_DMAC=m
++CONFIG_DW_DMAC_PCI=y
+ CONFIG_ASYNC_TX_DMA=y
+ CONFIG_UIO_CIF=m
+ CONFIG_UIO_PDRV_GENIRQ=m